From 1d5ae1026e831016fc29fd927877c86af904481f Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 23 Oct 2019 17:51:42 +0000 Subject: [PATCH 01/10] Vendor import of stripped llvm trunk r375505, the last commit before the upstream Subversion repository was made read-only, and the LLVM project migrated to GitHub: https://llvm.org/svn/llvm-project/llvm/trunk@375505 --- include/llvm-c/Core.h | 22 +- include/llvm-c/DebugInfo.h | 47 +- include/llvm-c/Remarks.h | 17 +- include/llvm-c/Transforms/IPO.h | 18 + include/llvm-c/Transforms/Scalar.h | 6 + include/llvm-c/lto.h | 94 +- include/llvm/ADT/APFloat.h | 5 + include/llvm/ADT/APInt.h | 9 + include/llvm/ADT/Any.h | 4 +- include/llvm/ADT/ArrayRef.h | 6 + include/llvm/ADT/DenseMap.h | 57 +- include/llvm/ADT/DenseMapInfo.h | 13 +- include/llvm/ADT/DirectedGraph.h | 270 + include/llvm/ADT/Hashing.h | 1 - include/llvm/ADT/IntervalMap.h | 4 +- include/llvm/ADT/PointerIntPair.h | 11 +- include/llvm/ADT/PointerUnion.h | 30 +- include/llvm/ADT/STLExtras.h | 168 +- include/llvm/ADT/SmallBitVector.h | 2 +- include/llvm/ADT/Statistic.h | 112 +- include/llvm/ADT/StringExtras.h | 2 +- include/llvm/ADT/StringMap.h | 59 +- include/llvm/ADT/StringRef.h | 18 +- include/llvm/ADT/StringSet.h | 8 +- include/llvm/ADT/TinyPtrVector.h | 38 +- include/llvm/ADT/VariadicFunction.h | 330 - include/llvm/ADT/iterator_range.h | 1 + include/llvm/Analysis/AliasAnalysis.h | 2 +- include/llvm/Analysis/AliasSetTracker.h | 5 +- include/llvm/Analysis/AssumptionCache.h | 4 +- include/llvm/Analysis/CFG.h | 2 + .../llvm/Analysis/CFLAndersAliasAnalysis.h | 5 +- .../llvm/Analysis/CFLSteensAliasAnalysis.h | 5 +- include/llvm/Analysis/CGSCCPassManager.h | 31 +- include/llvm/Analysis/CaptureTracking.h | 6 + include/llvm/Analysis/DDG.h | 430 ++ include/llvm/Analysis/DOTGraphTraitsPass.h | 4 +- .../llvm/Analysis/DependenceGraphBuilder.h | 119 + include/llvm/Analysis/DivergenceAnalysis.h | 16 +- include/llvm/Analysis/GlobalsModRef.h | 12 +- include/llvm/Analysis/InstructionSimplify.h | 36 +- include/llvm/Analysis/LazyCallGraph.h | 10 +- .../llvm/Analysis/LegacyDivergenceAnalysis.h | 16 +- include/llvm/Analysis/Loads.h | 22 +- include/llvm/Analysis/LoopAnalysisManager.h | 10 +- include/llvm/Analysis/LoopCacheAnalysis.h | 281 + include/llvm/Analysis/LoopInfo.h | 37 +- include/llvm/Analysis/LoopInfoImpl.h | 8 +- include/llvm/Analysis/MemoryBuiltins.h | 26 +- .../llvm/Analysis/MemoryDependenceAnalysis.h | 14 +- include/llvm/Analysis/MemorySSA.h | 4 +- include/llvm/Analysis/MemorySSAUpdater.h | 3 +- include/llvm/Analysis/MustExecute.h | 285 +- include/llvm/Analysis/Passes.h | 7 + include/llvm/Analysis/ProfileSummaryInfo.h | 23 + include/llvm/Analysis/RegionInfoImpl.h | 2 +- include/llvm/Analysis/ScalarEvolution.h | 6 +- .../llvm/Analysis/ScalarEvolutionExpander.h | 22 +- include/llvm/Analysis/TargetLibraryInfo.h | 17 +- include/llvm/Analysis/TargetTransformInfo.h | 180 +- .../llvm/Analysis/TargetTransformInfoImpl.h | 55 +- include/llvm/Analysis/TypeMetadataUtils.h | 2 + include/llvm/Analysis/Utils/Local.h | 22 +- include/llvm/Analysis/ValueTracking.h | 67 +- include/llvm/Analysis/VectorUtils.h | 144 +- include/llvm/BinaryFormat/Dwarf.def | 198 +- include/llvm/BinaryFormat/Dwarf.h | 125 +- include/llvm/BinaryFormat/ELF.h | 66 + .../llvm/BinaryFormat/ELFRelocs/AArch64.def | 7 +- include/llvm/BinaryFormat/MachO.h | 5 + include/llvm/BinaryFormat/Magic.h | 1 + include/llvm/BinaryFormat/Minidump.h | 68 + .../llvm/BinaryFormat/MinidumpConstants.def | 41 +- include/llvm/BinaryFormat/Wasm.h | 14 + include/llvm/BinaryFormat/XCOFF.h | 116 +- include/llvm/Bitcode/BitcodeAnalyzer.h | 1 + include/llvm/Bitcode/LLVMBitCodes.h | 2 +- include/llvm/Bitstream/BitCodes.h | 5 + include/llvm/Bitstream/BitstreamReader.h | 1 + include/llvm/CodeGen/AccelTable.h | 2 - include/llvm/CodeGen/AsmPrinter.h | 21 +- include/llvm/CodeGen/BasicTTIImpl.h | 73 +- include/llvm/CodeGen/CallingConvLower.h | 18 +- include/llvm/CodeGen/DFAPacketizer.h | 44 +- include/llvm/CodeGen/DIE.h | 12 + include/llvm/CodeGen/FastISel.h | 4 +- include/llvm/CodeGen/FunctionLoweringInfo.h | 2 +- .../llvm/CodeGen/GlobalISel/CallLowering.h | 127 +- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 127 +- .../llvm/CodeGen/GlobalISel/CombinerInfo.h | 15 +- .../GlobalISel/ConstantFoldingMIRBuilder.h | 11 + .../llvm/CodeGen/GlobalISel/GISelKnownBits.h | 111 + .../llvm/CodeGen/GlobalISel/IRTranslator.h | 12 +- .../CodeGen/GlobalISel/InstructionSelector.h | 34 +- .../GlobalISel/InstructionSelectorImpl.h | 66 +- .../GlobalISel/LegalizationArtifactCombiner.h | 92 +- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 20 + .../llvm/CodeGen/GlobalISel/LegalizerInfo.h | 61 +- .../llvm/CodeGen/GlobalISel/MIPatternMatch.h | 20 +- .../CodeGen/GlobalISel/MachineIRBuilder.h | 93 +- include/llvm/CodeGen/GlobalISel/Utils.h | 22 +- include/llvm/CodeGen/ISDOpcodes.h | 41 +- include/llvm/CodeGen/LiveInterval.h | 6 +- include/llvm/CodeGen/LiveIntervals.h | 21 +- {lib => include/llvm}/CodeGen/LiveRangeCalc.h | 10 +- include/llvm/CodeGen/LiveRegUnits.h | 4 +- include/llvm/CodeGen/MIRYamlMapping.h | 1 + include/llvm/CodeGen/MachineBasicBlock.h | 36 +- include/llvm/CodeGen/MachineCombinerPattern.h | 21 + include/llvm/CodeGen/MachineDominators.h | 63 +- include/llvm/CodeGen/MachineFrameInfo.h | 62 +- include/llvm/CodeGen/MachineFunction.h | 61 +- include/llvm/CodeGen/MachineInstr.h | 99 +- include/llvm/CodeGen/MachineInstrBuilder.h | 62 +- include/llvm/CodeGen/MachineLoopUtils.h | 41 + include/llvm/CodeGen/MachineMemOperand.h | 7 - include/llvm/CodeGen/MachineModuleInfo.h | 52 +- include/llvm/CodeGen/MachineOperand.h | 49 +- include/llvm/CodeGen/MachinePipeliner.h | 80 +- include/llvm/CodeGen/MachinePostDominators.h | 46 +- include/llvm/CodeGen/MachineRegionInfo.h | 2 +- include/llvm/CodeGen/MachineRegisterInfo.h | 70 +- include/llvm/CodeGen/MachineScheduler.h | 1 + include/llvm/CodeGen/ModuloSchedule.h | 367 + include/llvm/CodeGen/PBQP/Math.h | 12 +- include/llvm/CodeGen/Passes.h | 4 + include/llvm/CodeGen/Register.h | 128 +- include/llvm/CodeGen/RegisterClassInfo.h | 2 +- include/llvm/CodeGen/RegisterPressure.h | 9 +- include/llvm/CodeGen/RegisterScavenging.h | 24 +- include/llvm/CodeGen/ScheduleDAGInstrs.h | 12 +- include/llvm/CodeGen/SelectionDAG.h | 95 +- include/llvm/CodeGen/SelectionDAGISel.h | 36 +- include/llvm/CodeGen/SelectionDAGNodes.h | 105 +- include/llvm/CodeGen/StackProtector.h | 6 + include/llvm/CodeGen/SwitchLoweringUtils.h | 5 +- include/llvm/CodeGen/TargetCallingConv.h | 23 +- include/llvm/CodeGen/TargetFrameLowering.h | 30 +- include/llvm/CodeGen/TargetInstrInfo.h | 102 +- include/llvm/CodeGen/TargetLowering.h | 399 +- .../CodeGen/TargetLoweringObjectFileImpl.h | 34 +- include/llvm/CodeGen/TargetPassConfig.h | 2 +- include/llvm/CodeGen/TargetRegisterInfo.h | 94 +- include/llvm/CodeGen/TargetSubtargetInfo.h | 10 +- include/llvm/CodeGen/ValueTypes.h | 4 +- include/llvm/CodeGen/ValueTypes.td | 221 +- include/llvm/CodeGen/VirtRegMap.h | 43 +- .../llvm/DebugInfo/CodeView/CVTypeVisitor.h | 4 - .../DebugInfo/CodeView/CodeViewRecordIO.h | 13 +- .../DebugInfo/CodeView/CodeViewRegisters.def | 128 + include/llvm/DebugInfo/CodeView/EnumTables.h | 11 + .../DebugInfo/CodeView/SymbolDeserializer.h | 2 +- .../llvm/DebugInfo/CodeView/SymbolRecord.h | 304 +- .../DebugInfo/CodeView/TypeDeserializer.h | 2 +- .../DebugInfo/CodeView/TypeRecordMapping.h | 1 + .../CodeView/TypeVisitorCallbackPipeline.h | 5 - include/llvm/DebugInfo/DIContext.h | 14 +- .../DWARF/DWARFAbbreviationDeclaration.h | 4 +- .../DebugInfo/DWARF/DWARFAcceleratorTable.h | 68 +- include/llvm/DebugInfo/DWARF/DWARFAttribute.h | 2 +- include/llvm/DebugInfo/DWARF/DWARFContext.h | 8 +- .../llvm/DebugInfo/DWARF/DWARFDataExtractor.h | 13 +- .../llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h | 6 +- include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h | 6 +- .../DebugInfo/DWARF/DWARFDebugArangeSet.h | 4 +- .../llvm/DebugInfo/DWARF/DWARFDebugAranges.h | 8 +- .../llvm/DebugInfo/DWARF/DWARFDebugFrame.h | 2 +- .../DebugInfo/DWARF/DWARFDebugInfoEntry.h | 10 +- include/llvm/DebugInfo/DWARF/DWARFDebugLine.h | 27 +- include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h | 35 +- .../llvm/DebugInfo/DWARF/DWARFDebugPubTable.h | 4 +- .../DebugInfo/DWARF/DWARFDebugRangeList.h | 7 +- .../llvm/DebugInfo/DWARF/DWARFDebugRnglists.h | 2 +- include/llvm/DebugInfo/DWARF/DWARFDie.h | 2 +- .../llvm/DebugInfo/DWARF/DWARFExpression.h | 14 +- include/llvm/DebugInfo/DWARF/DWARFFormValue.h | 10 +- include/llvm/DebugInfo/DWARF/DWARFListTable.h | 77 +- include/llvm/DebugInfo/DWARF/DWARFObject.h | 30 +- include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h | 2 +- include/llvm/DebugInfo/DWARF/DWARFUnit.h | 51 +- include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h | 2 +- include/llvm/DebugInfo/DWARF/DWARFVerifier.h | 4 +- include/llvm/DebugInfo/GSYM/FileEntry.h | 7 +- include/llvm/DebugInfo/GSYM/FileWriter.h | 124 + include/llvm/DebugInfo/GSYM/FunctionInfo.h | 152 +- include/llvm/DebugInfo/GSYM/GsymCreator.h | 229 + include/llvm/DebugInfo/GSYM/GsymReader.h | 228 + include/llvm/DebugInfo/GSYM/Header.h | 129 + include/llvm/DebugInfo/GSYM/InlineInfo.h | 63 +- include/llvm/DebugInfo/GSYM/LineEntry.h | 7 +- include/llvm/DebugInfo/GSYM/LineTable.h | 198 + include/llvm/DebugInfo/GSYM/Range.h | 33 +- include/llvm/DebugInfo/GSYM/StringTable.h | 7 +- include/llvm/DebugInfo/PDB/GenericError.h | 2 +- .../llvm/DebugInfo/PDB/Native/SymbolCache.h | 2 +- include/llvm/DebugInfo/PDB/PDBSymbol.h | 2 +- include/llvm/DebugInfo/Symbolize/Symbolize.h | 1 + include/llvm/Demangle/Demangle.h | 9 +- include/llvm/Demangle/DemangleConfig.h | 7 - include/llvm/Demangle/ItaniumDemangle.h | 419 +- include/llvm/Demangle/MicrosoftDemangle.h | 1 + .../llvm/Demangle/MicrosoftDemangleNodes.h | 7 +- .../ExecutionEngine/JITLink/EHFrameSupport.h | 39 +- .../llvm/ExecutionEngine/JITLink/JITLink.h | 1362 ++-- .../JITLink/JITLinkMemoryManager.h | 17 +- .../ExecutionEngine/JITLink/MachO_arm64.h | 60 + .../ExecutionEngine/JITLink/MachO_x86_64.h | 1 + include/llvm/ExecutionEngine/JITSymbol.h | 5 +- .../Orc/CompileOnDemandLayer.h | 10 +- include/llvm/ExecutionEngine/Orc/Core.h | 137 +- .../llvm/ExecutionEngine/Orc/ExecutionUtils.h | 46 +- .../ExecutionEngine/Orc/IRTransformLayer.h | 3 + include/llvm/ExecutionEngine/Orc/LLJIT.h | 4 +- .../llvm/ExecutionEngine/Orc/LambdaResolver.h | 5 +- .../ExecutionEngine/Orc/LazyEmittingLayer.h | 40 +- .../llvm/ExecutionEngine/Orc/LazyReexports.h | 13 +- include/llvm/ExecutionEngine/Orc/Legacy.h | 2 +- .../ExecutionEngine/Orc/ObjectLinkingLayer.h | 23 +- .../Orc/OrcRemoteTargetClient.h | 4 +- .../ExecutionEngine/Orc/RPCSerialization.h | 12 +- include/llvm/ExecutionEngine/Orc/RPCUtils.h | 65 +- .../Orc/RTDyldObjectLinkingLayer.h | 6 +- .../ExecutionEngine/Orc/RemoteObjectLayer.h | 21 +- .../ExecutionEngine/Orc/SpeculateAnalyses.h | 84 + .../llvm/ExecutionEngine/Orc/Speculation.h | 207 + .../ExecutionEngine/Orc/ThreadSafeModule.h | 53 +- include/llvm/ExecutionEngine/RuntimeDyld.h | 23 +- include/llvm/IR/Attributes.h | 49 +- include/llvm/IR/AutoUpgrade.h | 10 +- include/llvm/IR/BasicBlock.h | 5 + include/llvm/IR/CallSite.h | 9 + include/llvm/IR/CallingConv.h | 13 + include/llvm/IR/Constant.h | 6 + include/llvm/IR/ConstantRange.h | 10 +- include/llvm/IR/DataLayout.h | 125 +- include/llvm/IR/DebugInfoFlags.def | 6 +- include/llvm/IR/DebugInfoMetadata.h | 4 +- include/llvm/IR/DerivedTypes.h | 77 +- include/llvm/IR/DiagnosticInfo.h | 25 +- include/llvm/IR/FixedMetadataKinds.def | 43 + include/llvm/IR/Function.h | 15 +- include/llvm/IR/GlobalAlias.h | 4 - include/llvm/IR/GlobalIFunc.h | 4 - include/llvm/IR/GlobalIndirectSymbol.h | 8 +- include/llvm/IR/GlobalObject.h | 26 +- include/llvm/IR/GlobalVariable.h | 1 + include/llvm/IR/IRBuilder.h | 90 +- include/llvm/IR/InlineAsm.h | 1 + include/llvm/IR/InstrTypes.h | 12 +- include/llvm/IR/Instruction.h | 10 + include/llvm/IR/Instructions.h | 99 +- include/llvm/IR/IntrinsicInst.h | 23 +- include/llvm/IR/Intrinsics.h | 10 +- include/llvm/IR/Intrinsics.td | 291 +- include/llvm/IR/IntrinsicsAArch64.td | 125 +- include/llvm/IR/IntrinsicsAMDGPU.td | 121 +- include/llvm/IR/IntrinsicsARM.td | 9 + include/llvm/IR/IntrinsicsBPF.td | 3 + include/llvm/IR/IntrinsicsMips.td | 16 +- include/llvm/IR/IntrinsicsNVVM.td | 125 +- include/llvm/IR/IntrinsicsWebAssembly.td | 58 + include/llvm/IR/IntrinsicsX86.td | 12 +- include/llvm/IR/LLVMContext.h | 31 +- include/llvm/IR/MDBuilder.h | 5 + include/llvm/IR/Metadata.h | 4 +- include/llvm/IR/Module.h | 1 + include/llvm/IR/ModuleSummaryIndex.h | 18 +- include/llvm/IR/ModuleSummaryIndexYAML.h | 2 +- include/llvm/IR/Operator.h | 21 +- include/llvm/IR/PassManager.h | 5 +- include/llvm/IR/PassManagerInternal.h | 2 +- include/llvm/IR/PatternMatch.h | 155 +- include/llvm/IR/RemarkStreamer.h | 28 +- include/llvm/IR/Type.h | 15 +- include/llvm/IR/User.h | 2 +- include/llvm/IR/Value.h | 58 +- include/llvm/IR/ValueMap.h | 15 +- include/llvm/InitializePasses.h | 10 +- include/llvm/LTO/Config.h | 2 +- include/llvm/LTO/LTO.h | 10 +- include/llvm/LTO/legacy/LTOCodeGenerator.h | 2 +- include/llvm/LinkAllPasses.h | 2 + include/llvm/MC/MCAsmInfo.h | 18 + include/llvm/MC/MCAsmInfoXCOFF.h | 5 + include/llvm/MC/MCAsmMacro.h | 11 +- include/llvm/MC/MCContext.h | 23 +- include/llvm/MC/MCDirectives.h | 1 + include/llvm/MC/MCDwarf.h | 3 +- include/llvm/MC/MCExpr.h | 8 +- include/llvm/MC/MCFixup.h | 119 +- include/llvm/MC/MCFragment.h | 16 +- include/llvm/MC/MCInstPrinter.h | 2 - include/llvm/MC/MCInstrAnalysis.h | 6 + include/llvm/MC/MCInstrDesc.h | 23 +- include/llvm/MC/MCLinkerOptimizationHint.h | 2 + include/llvm/MC/MCRegister.h | 110 + include/llvm/MC/MCRegisterInfo.h | 98 +- include/llvm/MC/MCSection.h | 7 +- include/llvm/MC/MCSectionXCOFF.h | 22 +- include/llvm/MC/MCStreamer.h | 41 +- include/llvm/MC/MCSubtargetInfo.h | 46 + include/llvm/MC/MCSymbolWasm.h | 7 + include/llvm/MC/MCSymbolXCOFF.h | 32 + include/llvm/MC/MCWasmObjectWriter.h | 4 +- include/llvm/MC/MCXCOFFStreamer.h | 2 + include/llvm/MC/StringTableBuilder.h | 2 +- include/llvm/MC/SubtargetFeature.h | 139 +- include/llvm/MCA/CodeEmitter.h | 72 + include/llvm/MCA/Context.h | 5 +- include/llvm/MCA/HardwareUnits/LSUnit.h | 18 +- include/llvm/MCA/HardwareUnits/RegisterFile.h | 2 +- .../llvm/MCA/HardwareUnits/ResourceManager.h | 51 +- .../MCA/HardwareUnits/RetireControlUnit.h | 33 +- include/llvm/MCA/HardwareUnits/Scheduler.h | 13 +- include/llvm/MCA/Instruction.h | 51 +- include/llvm/MCA/SourceMgr.h | 5 +- include/llvm/MCA/Stages/RetireStage.h | 6 +- include/llvm/Object/Archive.h | 7 +- include/llvm/Object/Binary.h | 16 +- include/llvm/Object/COFF.h | 36 +- include/llvm/Object/ELF.h | 112 +- include/llvm/Object/ELFObjectFile.h | 31 +- include/llvm/Object/ELFTypes.h | 6 +- include/llvm/Object/MachO.h | 1 + include/llvm/Object/MachOUniversal.h | 14 +- include/llvm/Object/Minidump.h | 77 +- include/llvm/Object/ObjectFile.h | 21 +- include/llvm/Object/StackMapParser.h | 4 +- include/llvm/Object/TapiFile.h | 60 + include/llvm/Object/TapiUniversal.h | 109 + include/llvm/Object/WindowsResource.h | 55 +- include/llvm/Object/XCOFFObjectFile.h | 132 +- include/llvm/ObjectYAML/DWARFYAML.h | 2 +- include/llvm/ObjectYAML/ELFYAML.h | 116 +- include/llvm/ObjectYAML/MachOYAML.h | 3 + include/llvm/ObjectYAML/MinidumpYAML.h | 64 +- include/llvm/ObjectYAML/WasmYAML.h | 2 +- include/llvm/ObjectYAML/yaml2obj.h | 67 + include/llvm/Pass.h | 5 + include/llvm/Passes/PassBuilder.h | 7 +- .../ProfileData/Coverage/CoverageMapping.h | 16 +- .../Coverage/CoverageMappingWriter.h | 3 +- include/llvm/ProfileData/InstrProf.h | 18 +- include/llvm/ProfileData/InstrProfReader.h | 12 +- include/llvm/ProfileData/SampleProf.h | 178 +- include/llvm/ProfileData/SampleProfReader.h | 276 +- include/llvm/ProfileData/SampleProfWriter.h | 118 +- .../llvm/Remarks/BitstreamRemarkContainer.h | 106 + include/llvm/Remarks/BitstreamRemarkParser.h | 116 + .../llvm/Remarks/BitstreamRemarkSerializer.h | 196 + include/llvm/Remarks/Remark.h | 36 +- include/llvm/Remarks/RemarkFormat.h | 4 +- include/llvm/Remarks/RemarkParser.h | 38 +- include/llvm/Remarks/RemarkSerializer.h | 78 +- include/llvm/Remarks/RemarkStringTable.h | 24 +- include/llvm/Remarks/YAMLRemarkSerializer.h | 108 + include/llvm/Support/AArch64TargetParser.def | 72 +- include/llvm/Support/AArch64TargetParser.h | 3 +- include/llvm/Support/ARMTargetParser.def | 2 + include/llvm/Support/ARMTargetParser.h | 20 +- include/llvm/Support/AlignOf.h | 136 +- include/llvm/Support/Alignment.h | 403 ++ include/llvm/Support/Allocator.h | 22 +- include/llvm/Support/Automaton.h | 253 + include/llvm/Support/BinaryStreamArray.h | 2 +- include/llvm/Support/BinaryStreamReader.h | 2 +- include/llvm/Support/CRC.h | 45 +- include/llvm/Support/CommandLine.h | 3 + include/llvm/Support/Compiler.h | 81 +- include/llvm/Support/DataExtractor.h | 196 +- include/llvm/Support/Endian.h | 10 +- include/llvm/Support/Error.h | 42 +- include/llvm/Support/FileCheck.h | 604 +- include/llvm/Support/FileCollector.h | 79 + include/llvm/Support/FileSystem.h | 30 +- include/llvm/Support/FileUtilities.h | 38 + include/llvm/Support/Format.h | 5 +- include/llvm/Support/GenericDomTree.h | 6 +- .../llvm/Support/GenericDomTreeConstruction.h | 8 +- include/llvm/Support/GlobPattern.h | 2 +- include/llvm/Support/Host.h | 28 - include/llvm/Support/JamCRC.h | 48 - include/llvm/Support/MachineValueType.h | 387 +- include/llvm/Support/MathExtras.h | 187 +- include/llvm/Support/Mutex.h | 105 +- include/llvm/Support/MutexGuard.h | 40 - include/llvm/Support/OnDiskHashTable.h | 3 +- include/llvm/Support/Parallel.h | 27 - include/llvm/Support/RWMutex.h | 275 +- include/llvm/Support/Regex.h | 18 +- include/llvm/Support/Registry.h | 2 +- include/llvm/Support/SHA1.h | 2 +- include/llvm/Support/ScalableSize.h | 43 - include/llvm/Support/Signals.h | 11 + include/llvm/Support/SwapByteOrder.h | 38 +- include/llvm/Support/TargetOpcodes.def | 26 +- include/llvm/Support/TargetRegistry.h | 4 +- include/llvm/Support/TimeProfiler.h | 2 +- include/llvm/Support/TrailingObjects.h | 18 +- include/llvm/Support/TypeSize.h | 201 + include/llvm/Support/UnicodeCharRanges.h | 3 - include/llvm/Support/UniqueLock.h | 68 - include/llvm/Support/VirtualFileSystem.h | 16 +- include/llvm/Support/Win64EH.h | 4 +- include/llvm/Support/X86TargetParser.def | 4 +- include/llvm/Support/YAMLTraits.h | 11 +- include/llvm/Support/circular_raw_ostream.h | 4 + include/llvm/Support/raw_ostream.h | 27 +- include/llvm/Support/type_traits.h | 18 - include/llvm/TableGen/Automaton.td | 95 + include/llvm/TableGen/Error.h | 1 + include/llvm/TableGen/Record.h | 14 +- include/llvm/Target/GenericOpcodes.td | 87 +- include/llvm/Target/GlobalISel/Combine.td | 103 + .../Target/GlobalISel/SelectionDAGCompat.td | 25 + include/llvm/Target/Target.td | 33 +- include/llvm/Target/TargetCallingConv.td | 6 + include/llvm/Target/TargetItinerary.td | 11 + .../llvm/Target/TargetLoweringObjectFile.h | 3 +- include/llvm/Target/TargetMachine.h | 28 +- include/llvm/Target/TargetSchedule.td | 8 +- include/llvm/Target/TargetSelectionDAG.td | 144 +- include/llvm/TextAPI/MachO/Architecture.h | 4 + include/llvm/TextAPI/MachO/ArchitectureSet.h | 4 + include/llvm/TextAPI/MachO/InterfaceFile.h | 242 +- include/llvm/TextAPI/MachO/Platform.h | 45 + include/llvm/TextAPI/MachO/Symbol.h | 35 +- include/llvm/TextAPI/MachO/Target.h | 68 + include/llvm/TextAPI/MachO/TextAPIReader.h | 5 +- include/llvm/Transforms/IPO/Attributor.h | 1737 ++++- include/llvm/Transforms/IPO/GlobalDCE.h | 14 + .../llvm/Transforms/IPO/HotColdSplitting.h | 39 + include/llvm/Transforms/IPO/LowerTypeTests.h | 2 + .../llvm/Transforms/IPO/WholeProgramDevirt.h | 26 + include/llvm/Transforms/Instrumentation.h | 4 - .../Instrumentation/InstrProfiling.h | 5 +- .../Instrumentation/MemorySanitizer.h | 12 +- .../Instrumentation/SanitizerCoverage.h | 47 + .../Instrumentation/ThreadSanitizer.h | 2 + include/llvm/Transforms/Scalar.h | 9 +- .../Transforms/Scalar/CallSiteSplitting.h | 5 - .../llvm/Transforms/Scalar/ConstantHoisting.h | 10 +- include/llvm/Transforms/Scalar/Float2Int.h | 6 +- include/llvm/Transforms/Scalar/GVN.h | 7 +- .../llvm/Transforms/Scalar/GVNExpression.h | 9 +- .../llvm/Transforms/Scalar/LoopPassManager.h | 24 +- .../llvm/Transforms/Scalar/LoopUnrollPass.h | 14 + .../Scalar/LowerConstantIntrinsics.h | 41 + .../Transforms/Scalar/MergedLoadStoreMotion.h | 24 +- include/llvm/Transforms/Scalar/Reassociate.h | 4 +- include/llvm/Transforms/Scalar/SCCP.h | 3 +- .../llvm/Transforms/Utils/BasicBlockUtils.h | 11 +- include/llvm/Transforms/Utils/BuildLibCalls.h | 27 +- .../Transforms/Utils/BypassSlowDivision.h | 13 +- include/llvm/Transforms/Utils/CodeExtractor.h | 57 +- include/llvm/Transforms/Utils/Local.h | 16 +- include/llvm/Transforms/Utils/LoopUtils.h | 5 + include/llvm/Transforms/Utils/MisExpect.h | 43 + include/llvm/Transforms/Utils/PredicateInfo.h | 10 +- .../llvm/Transforms/Utils/SimplifyLibCalls.h | 10 + include/llvm/Transforms/Utils/UnrollLoop.h | 8 +- include/llvm/Transforms/Utils/ValueMapper.h | 9 +- .../Vectorize/LoopVectorizationLegality.h | 48 +- .../llvm/Transforms/Vectorize/LoopVectorize.h | 8 + .../llvm/Transforms/Vectorize/SLPVectorizer.h | 9 +- include/llvm/XRay/FDRRecordProducer.h | 4 +- include/llvm/XRay/FDRRecords.h | 6 +- include/llvm/XRay/FileHeaderReader.h | 2 +- include/llvm/module.modulemap | 2 + lib/Analysis/AliasAnalysis.cpp | 4 +- lib/Analysis/AliasSetTracker.cpp | 12 +- lib/Analysis/Analysis.cpp | 1 + lib/Analysis/AssumptionCache.cpp | 12 +- lib/Analysis/BasicAliasAnalysis.cpp | 42 +- lib/Analysis/BranchProbabilityInfo.cpp | 19 +- lib/Analysis/CFG.cpp | 11 +- lib/Analysis/CFGPrinter.cpp | 2 +- lib/Analysis/CFLAndersAliasAnalysis.cpp | 19 +- lib/Analysis/CFLSteensAliasAnalysis.cpp | 20 +- lib/Analysis/CallGraph.cpp | 4 +- lib/Analysis/CaptureTracking.cpp | 46 +- lib/Analysis/ConstantFolding.cpp | 407 +- lib/Analysis/DDG.cpp | 203 + lib/Analysis/DependenceAnalysis.cpp | 8 +- lib/Analysis/DependenceGraphBuilder.cpp | 228 + lib/Analysis/DivergenceAnalysis.cpp | 10 + lib/Analysis/GlobalsModRef.cpp | 37 +- lib/Analysis/IVDescriptors.cpp | 3 +- .../IndirectCallPromotionAnalysis.cpp | 2 +- lib/Analysis/InlineCost.cpp | 23 +- lib/Analysis/InstructionSimplify.cpp | 320 +- lib/Analysis/LazyBranchProbabilityInfo.cpp | 5 +- lib/Analysis/LazyCallGraph.cpp | 13 +- lib/Analysis/LazyValueInfo.cpp | 37 +- lib/Analysis/LegacyDivergenceAnalysis.cpp | 36 +- lib/Analysis/Lint.cpp | 2 +- lib/Analysis/Loads.cpp | 240 +- lib/Analysis/LoopAccessAnalysis.cpp | 45 +- lib/Analysis/LoopAnalysisManager.cpp | 2 +- lib/Analysis/LoopCacheAnalysis.cpp | 625 ++ lib/Analysis/LoopInfo.cpp | 39 + lib/Analysis/LoopUnrollAnalyzer.cpp | 2 +- lib/Analysis/MemDerefPrinter.cpp | 4 +- lib/Analysis/MemoryBuiltins.cpp | 51 +- lib/Analysis/MemoryDependenceAnalysis.cpp | 21 +- lib/Analysis/MemorySSA.cpp | 95 +- lib/Analysis/MemorySSAUpdater.cpp | 323 +- lib/Analysis/ModuleSummaryAnalysis.cpp | 16 +- lib/Analysis/MustExecute.cpp | 118 + lib/Analysis/OptimizationRemarkEmitter.cpp | 4 +- lib/Analysis/OrderedInstructions.cpp | 2 +- lib/Analysis/ProfileSummaryInfo.cpp | 67 + lib/Analysis/ScalarEvolution.cpp | 89 +- lib/Analysis/ScalarEvolutionExpander.cpp | 19 +- lib/Analysis/StackSafetyAnalysis.cpp | 4 +- lib/Analysis/SyncDependenceAnalysis.cpp | 61 +- lib/Analysis/TargetLibraryInfo.cpp | 44 +- lib/Analysis/TargetTransformInfo.cpp | 64 +- lib/Analysis/TypeMetadataUtils.cpp | 32 + lib/Analysis/VFABIDemangling.cpp | 418 ++ lib/Analysis/ValueTracking.cpp | 694 +- lib/Analysis/VectorUtils.cpp | 20 +- lib/AsmParser/LLLexer.cpp | 1 + lib/AsmParser/LLParser.cpp | 81 +- lib/AsmParser/LLParser.h | 4 +- lib/AsmParser/LLToken.h | 1 + lib/AsmParser/Parser.cpp | 8 +- lib/BinaryFormat/Dwarf.cpp | 22 +- lib/BinaryFormat/Magic.cpp | 5 + lib/Bitcode/Reader/BitcodeAnalyzer.cpp | 10 + lib/Bitcode/Reader/BitcodeReader.cpp | 84 +- lib/Bitcode/Reader/MetadataLoader.cpp | 6 +- lib/Bitcode/Writer/BitWriter.cpp | 2 +- lib/Bitcode/Writer/BitcodeWriter.cpp | 9 +- lib/CodeGen/AggressiveAntiDepBreaker.cpp | 16 +- lib/CodeGen/Analysis.cpp | 12 +- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 255 +- .../AsmPrinter/AsmPrinterInlineAsm.cpp | 3 +- lib/CodeGen/AsmPrinter/ByteStreamer.h | 12 +- lib/CodeGen/AsmPrinter/CodeViewDebug.cpp | 116 +- lib/CodeGen/AsmPrinter/CodeViewDebug.h | 3 +- .../AsmPrinter/DbgEntityHistoryCalculator.cpp | 12 +- lib/CodeGen/AsmPrinter/DebugLocStream.h | 19 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 178 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 31 +- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 646 +- lib/CodeGen/AsmPrinter/DwarfDebug.h | 22 +- lib/CodeGen/AsmPrinter/DwarfExpression.cpp | 95 +- lib/CodeGen/AsmPrinter/DwarfExpression.h | 95 +- lib/CodeGen/AsmPrinter/DwarfFile.h | 19 +- lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 47 +- lib/CodeGen/AsmPrinter/DwarfUnit.h | 14 +- lib/CodeGen/AsmPrinter/EHStreamer.cpp | 6 +- lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp | 2 +- lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp | 4 +- lib/CodeGen/AsmPrinter/WinException.cpp | 3 +- lib/CodeGen/AtomicExpandPass.cpp | 12 +- lib/CodeGen/BranchFolding.cpp | 34 +- lib/CodeGen/BranchRelaxation.cpp | 22 +- lib/CodeGen/BreakFalseDeps.cpp | 23 +- lib/CodeGen/CalcSpillWeights.cpp | 22 +- lib/CodeGen/CallingConvLower.cpp | 42 +- lib/CodeGen/CodeGen.cpp | 5 +- lib/CodeGen/CodeGenPrepare.cpp | 122 +- lib/CodeGen/CriticalAntiDepBreaker.cpp | 9 +- lib/CodeGen/DFAPacketizer.cpp | 81 +- lib/CodeGen/DeadMachineInstructionElim.cpp | 12 +- lib/CodeGen/DetectDeadLanes.cpp | 56 +- lib/CodeGen/EarlyIfConversion.cpp | 345 +- lib/CodeGen/ExecutionDomainFix.cpp | 1 + lib/CodeGen/ExpandMemCmp.cpp | 2 +- lib/CodeGen/ExpandPostRAPseudos.cpp | 10 +- lib/CodeGen/GCMetadata.cpp | 2 +- lib/CodeGen/GCRootLowering.cpp | 4 +- lib/CodeGen/GlobalISel/CSEInfo.cpp | 7 +- lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp | 11 + lib/CodeGen/GlobalISel/CallLowering.cpp | 284 +- lib/CodeGen/GlobalISel/Combiner.cpp | 14 +- lib/CodeGen/GlobalISel/CombinerHelper.cpp | 919 ++- lib/CodeGen/GlobalISel/GISelKnownBits.cpp | 383 ++ lib/CodeGen/GlobalISel/IRTranslator.cpp | 392 +- lib/CodeGen/GlobalISel/InstructionSelect.cpp | 38 +- .../GlobalISel/InstructionSelector.cpp | 2 +- lib/CodeGen/GlobalISel/Legalizer.cpp | 35 +- lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 978 ++- lib/CodeGen/GlobalISel/LegalizerInfo.cpp | 42 +- lib/CodeGen/GlobalISel/Localizer.cpp | 11 +- lib/CodeGen/GlobalISel/MachineIRBuilder.cpp | 93 +- lib/CodeGen/GlobalISel/RegBankSelect.cpp | 13 +- lib/CodeGen/GlobalISel/RegisterBank.cpp | 1 + lib/CodeGen/GlobalISel/RegisterBankInfo.cpp | 17 +- lib/CodeGen/GlobalISel/Utils.cpp | 98 +- lib/CodeGen/GlobalMerge.cpp | 8 +- lib/CodeGen/HardwareLoops.cpp | 2 +- lib/CodeGen/IfConversion.cpp | 200 +- lib/CodeGen/ImplicitNullChecks.cpp | 8 +- lib/CodeGen/InlineSpiller.cpp | 22 +- lib/CodeGen/InterleavedLoadCombinePass.cpp | 4 +- lib/CodeGen/LLVMTargetMachine.cpp | 34 +- lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp | 6 +- lib/CodeGen/LexicalScopes.cpp | 1 + lib/CodeGen/LiveDebugValues.cpp | 506 +- lib/CodeGen/LiveDebugVariables.cpp | 257 +- lib/CodeGen/LiveInterval.cpp | 7 +- lib/CodeGen/LiveIntervals.cpp | 59 +- lib/CodeGen/LivePhysRegs.cpp | 20 +- lib/CodeGen/LiveRangeCalc.cpp | 5 +- lib/CodeGen/LiveRangeEdit.cpp | 14 +- lib/CodeGen/LiveRangeShrink.cpp | 4 +- lib/CodeGen/LiveRegMatrix.cpp | 2 +- lib/CodeGen/LiveRegUnits.cpp | 12 +- lib/CodeGen/LiveStacks.cpp | 7 +- lib/CodeGen/LiveVariables.cpp | 29 +- lib/CodeGen/LocalStackSlotAllocation.cpp | 10 +- lib/CodeGen/LowerEmuTLS.cpp | 7 +- lib/CodeGen/MIRCanonicalizerPass.cpp | 359 +- lib/CodeGen/MIRNamerPass.cpp | 77 + lib/CodeGen/MIRParser/MILexer.cpp | 1 + lib/CodeGen/MIRParser/MILexer.h | 2 + lib/CodeGen/MIRParser/MIParser.cpp | 60 +- lib/CodeGen/MIRParser/MIRParser.cpp | 18 +- lib/CodeGen/MIRPrinter.cpp | 16 +- lib/CodeGen/MIRVRegNamerUtils.cpp | 348 + lib/CodeGen/MIRVRegNamerUtils.h | 91 + lib/CodeGen/MachineBasicBlock.cpp | 64 +- lib/CodeGen/MachineBlockPlacement.cpp | 28 +- lib/CodeGen/MachineCSE.cpp | 75 +- lib/CodeGen/MachineCombiner.cpp | 6 +- lib/CodeGen/MachineCopyPropagation.cpp | 78 +- lib/CodeGen/MachineDominators.cpp | 23 +- lib/CodeGen/MachineFrameInfo.cpp | 38 +- lib/CodeGen/MachineFunction.cpp | 58 +- lib/CodeGen/MachineFunctionPass.cpp | 6 +- lib/CodeGen/MachineInstr.cpp | 116 +- lib/CodeGen/MachineInstrBundle.cpp | 14 +- lib/CodeGen/MachineLICM.cpp | 61 +- lib/CodeGen/MachineLoopUtils.cpp | 132 + lib/CodeGen/MachineModuleInfo.cpp | 87 +- lib/CodeGen/MachineOperand.cpp | 70 +- .../MachineOptimizationRemarkEmitter.cpp | 2 +- lib/CodeGen/MachineOutliner.cpp | 16 +- lib/CodeGen/MachinePipeliner.cpp | 1235 +--- lib/CodeGen/MachinePostDominators.cpp | 55 +- lib/CodeGen/MachineRegisterInfo.cpp | 12 +- lib/CodeGen/MachineSSAUpdater.cpp | 6 +- lib/CodeGen/MachineScheduler.cpp | 59 +- lib/CodeGen/MachineSink.cpp | 73 +- lib/CodeGen/MachineTraceMetrics.cpp | 24 +- lib/CodeGen/MachineVerifier.cpp | 163 +- lib/CodeGen/MacroFusion.cpp | 4 +- lib/CodeGen/ModuloSchedule.cpp | 2022 ++++++ lib/CodeGen/OptimizePHIs.cpp | 15 +- lib/CodeGen/PHIElimination.cpp | 43 +- lib/CodeGen/PatchableFunction.cpp | 2 +- lib/CodeGen/PeepholeOptimizer.cpp | 83 +- lib/CodeGen/PreISelIntrinsicLowering.cpp | 2 +- lib/CodeGen/ProcessImplicitDefs.cpp | 8 +- lib/CodeGen/PrologEpilogInserter.cpp | 2 +- lib/CodeGen/PseudoSourceValue.cpp | 6 +- lib/CodeGen/ReachingDefAnalysis.cpp | 1 + lib/CodeGen/RegAllocBase.cpp | 4 +- lib/CodeGen/RegAllocFast.cpp | 117 +- lib/CodeGen/RegAllocGreedy.cpp | 16 +- lib/CodeGen/RegAllocPBQP.cpp | 12 +- lib/CodeGen/RegUsageInfoCollector.cpp | 10 +- lib/CodeGen/RegUsageInfoPropagate.cpp | 6 +- lib/CodeGen/RegisterCoalescer.cpp | 71 +- lib/CodeGen/RegisterPressure.cpp | 36 +- lib/CodeGen/RegisterScavenging.cpp | 62 +- lib/CodeGen/RenameIndependentSubregs.cpp | 4 +- lib/CodeGen/SafeStack.cpp | 2 +- lib/CodeGen/ScalarizeMaskedMemIntrin.cpp | 167 +- lib/CodeGen/ScheduleDAGInstrs.cpp | 57 +- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 1762 ++--- lib/CodeGen/SelectionDAG/FastISel.cpp | 67 +- .../SelectionDAG/FunctionLoweringInfo.cpp | 7 +- lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 77 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 222 +- .../SelectionDAG/LegalizeFloatTypes.cpp | 430 +- .../SelectionDAG/LegalizeIntegerTypes.cpp | 510 +- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 56 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 61 +- .../SelectionDAG/LegalizeTypesGeneric.cpp | 46 +- .../SelectionDAG/LegalizeVectorOps.cpp | 50 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 139 +- lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 2 +- .../SelectionDAG/ScheduleDAGRRList.cpp | 18 +- .../SelectionDAG/ScheduleDAGSDNodes.cpp | 18 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 3 +- lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp | 9 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 283 +- .../SelectionDAGAddressAnalysis.cpp | 1 + .../SelectionDAG/SelectionDAGBuilder.cpp | 495 +- .../SelectionDAG/SelectionDAGBuilder.h | 2 +- .../SelectionDAG/SelectionDAGDumper.cpp | 9 +- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 35 +- .../SelectionDAG/StatepointLowering.cpp | 34 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 1402 +++- lib/CodeGen/ShrinkWrap.cpp | 5 +- lib/CodeGen/SjLjEHPrepare.cpp | 5 +- lib/CodeGen/SplitKit.cpp | 6 +- lib/CodeGen/SplitKit.h | 2 +- lib/CodeGen/StackMaps.cpp | 8 +- lib/CodeGen/StackProtector.cpp | 67 +- lib/CodeGen/StackSlotColoring.cpp | 8 +- lib/CodeGen/SwiftErrorValueTracking.cpp | 3 +- lib/CodeGen/TailDuplicator.cpp | 22 +- lib/CodeGen/TargetFrameLoweringImpl.cpp | 19 +- lib/CodeGen/TargetInstrInfo.cpp | 82 +- lib/CodeGen/TargetLoweringBase.cpp | 95 +- lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 107 +- lib/CodeGen/TargetPassConfig.cpp | 24 +- lib/CodeGen/TargetRegisterInfo.cpp | 60 +- lib/CodeGen/TargetSchedule.cpp | 2 +- lib/CodeGen/TwoAddressInstructionPass.cpp | 90 +- lib/CodeGen/UnreachableBlockElim.cpp | 15 +- lib/CodeGen/ValueTypes.cpp | 150 +- lib/CodeGen/VirtRegMap.cpp | 71 +- lib/CodeGen/XRayInstrumentation.cpp | 2 +- lib/DebugInfo/CodeView/CVTypeVisitor.cpp | 15 - lib/DebugInfo/CodeView/CodeViewRecordIO.cpp | 8 +- lib/DebugInfo/CodeView/EnumTables.cpp | 166 + lib/DebugInfo/CodeView/SymbolDumper.cpp | 2 +- .../CodeView/SymbolRecordMapping.cpp | 2 +- lib/DebugInfo/CodeView/TypeRecordMapping.cpp | 238 +- .../DWARF/DWARFAbbreviationDeclaration.cpp | 8 +- lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp | 105 +- lib/DebugInfo/DWARF/DWARFCompileUnit.cpp | 10 +- lib/DebugInfo/DWARF/DWARFContext.cpp | 337 +- lib/DebugInfo/DWARF/DWARFDataExtractor.cpp | 13 +- lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp | 10 +- lib/DebugInfo/DWARF/DWARFDebugAddr.cpp | 28 +- lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp | 4 +- lib/DebugInfo/DWARF/DWARFDebugAranges.cpp | 12 +- lib/DebugInfo/DWARF/DWARFDebugFrame.cpp | 74 +- lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp | 8 +- lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 159 +- lib/DebugInfo/DWARF/DWARFDebugLoc.cpp | 259 +- lib/DebugInfo/DWARF/DWARFDebugMacro.cpp | 2 +- lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp | 6 +- lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp | 18 +- lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp | 30 +- lib/DebugInfo/DWARF/DWARFDie.cpp | 54 +- lib/DebugInfo/DWARF/DWARFExpression.cpp | 13 +- lib/DebugInfo/DWARF/DWARFFormValue.cpp | 9 +- lib/DebugInfo/DWARF/DWARFGdbIndex.cpp | 2 +- lib/DebugInfo/DWARF/DWARFListTable.cpp | 70 +- lib/DebugInfo/DWARF/DWARFTypeUnit.cpp | 14 +- lib/DebugInfo/DWARF/DWARFUnit.cpp | 235 +- lib/DebugInfo/DWARF/DWARFUnitIndex.cpp | 12 +- lib/DebugInfo/DWARF/DWARFVerifier.cpp | 120 +- lib/DebugInfo/GSYM/FileWriter.cpp | 78 + lib/DebugInfo/GSYM/FunctionInfo.cpp | 143 +- lib/DebugInfo/GSYM/GsymCreator.cpp | 275 + lib/DebugInfo/GSYM/GsymReader.cpp | 265 + lib/DebugInfo/GSYM/Header.cpp | 109 + lib/DebugInfo/GSYM/InlineInfo.cpp | 100 + lib/DebugInfo/GSYM/LineTable.cpp | 287 + lib/DebugInfo/GSYM/Range.cpp | 47 + lib/DebugInfo/MSF/MappedBlockStream.cpp | 6 +- lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp | 28 +- lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp | 2 +- lib/DebugInfo/PDB/DIA/DIASession.cpp | 46 +- lib/DebugInfo/PDB/GenericError.cpp | 4 +- .../PDB/Native/DbiModuleDescriptorBuilder.cpp | 4 +- lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp | 2 +- lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp | 4 +- lib/DebugInfo/PDB/Native/Hash.cpp | 5 +- .../PDB/Native/NativeEnumInjectedSources.cpp | 29 +- lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp | 24 +- lib/DebugInfo/PDB/Native/NativeSession.cpp | 10 +- lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp | 4 +- .../PDB/Native/NativeTypeFunctionSig.cpp | 6 +- lib/DebugInfo/PDB/Native/PDBFile.cpp | 18 +- lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp | 17 +- lib/DebugInfo/PDB/Native/TpiHashing.cpp | 6 +- lib/DebugInfo/PDB/Native/TpiStream.cpp | 2 +- lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp | 2 +- lib/DebugInfo/PDB/PDBSymbolFunc.cpp | 2 +- .../PDB/PDBSymbolTypeFunctionSig.cpp | 2 +- lib/DebugInfo/PDB/UDTLayout.cpp | 14 +- lib/DebugInfo/Symbolize/DIPrinter.cpp | 17 +- .../Symbolize/SymbolizableObjectFile.cpp | 32 +- .../Symbolize/SymbolizableObjectFile.h | 7 +- lib/DebugInfo/Symbolize/Symbolize.cpp | 52 +- lib/Demangle/ItaniumDemangle.cpp | 10 + lib/Demangle/MicrosoftDemangle.cpp | 32 +- lib/Demangle/MicrosoftDemangleNodes.cpp | 51 +- lib/ExecutionEngine/ExecutionEngine.cpp | 34 +- .../GDBRegistrationListener.cpp | 8 +- .../Interpreter/ExternalFunctions.cpp | 4 +- .../JITLink/BasicGOTAndStubsBuilder.h | 35 +- .../JITLink/EHFrameSupport.cpp | 214 +- .../JITLink/EHFrameSupportImpl.h | 50 +- lib/ExecutionEngine/JITLink/JITLink.cpp | 156 +- .../JITLink/JITLinkGeneric.cpp | 385 +- lib/ExecutionEngine/JITLink/JITLinkGeneric.h | 175 +- .../JITLink/JITLinkMemoryManager.cpp | 63 +- lib/ExecutionEngine/JITLink/MachO.cpp | 3 + .../JITLink/MachOAtomGraphBuilder.cpp | 411 -- .../JITLink/MachOAtomGraphBuilder.h | 138 - .../JITLink/MachOLinkGraphBuilder.cpp | 535 ++ .../JITLink/MachOLinkGraphBuilder.h | 269 + lib/ExecutionEngine/JITLink/MachO_arm64.cpp | 736 ++ lib/ExecutionEngine/JITLink/MachO_x86_64.cpp | 279 +- lib/ExecutionEngine/MCJIT/MCJIT.cpp | 38 +- .../OProfileJIT/OProfileJITEventListener.cpp | 2 +- .../OProfileJIT/OProfileWrapper.cpp | 4 +- .../Orc/CompileOnDemandLayer.cpp | 125 +- lib/ExecutionEngine/Orc/CompileUtils.cpp | 2 +- lib/ExecutionEngine/Orc/Core.cpp | 508 +- lib/ExecutionEngine/Orc/ExecutionUtils.cpp | 92 +- lib/ExecutionEngine/Orc/IRCompileLayer.cpp | 4 +- lib/ExecutionEngine/Orc/IRTransformLayer.cpp | 2 +- lib/ExecutionEngine/Orc/IndirectionUtils.cpp | 27 +- .../Orc/JITTargetMachineBuilder.cpp | 17 +- lib/ExecutionEngine/Orc/LLJIT.cpp | 38 +- lib/ExecutionEngine/Orc/Layer.cpp | 26 +- lib/ExecutionEngine/Orc/LazyReexports.cpp | 18 +- lib/ExecutionEngine/Orc/Legacy.cpp | 5 +- .../Orc/ObjectLinkingLayer.cpp | 256 +- lib/ExecutionEngine/Orc/OrcCBindingsStack.h | 11 +- .../Orc/RTDyldObjectLinkingLayer.cpp | 24 +- lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp | 307 + lib/ExecutionEngine/Orc/Speculation.cpp | 146 + lib/ExecutionEngine/Orc/ThreadSafeModule.cpp | 58 +- .../PerfJITEvents/PerfJITEventListener.cpp | 8 +- .../RuntimeDyld/RuntimeDyld.cpp | 64 +- .../RuntimeDyld/RuntimeDyldCOFF.cpp | 8 +- .../RuntimeDyld/RuntimeDyldChecker.cpp | 2 +- .../RuntimeDyld/RuntimeDyldELF.cpp | 54 +- .../RuntimeDyld/RuntimeDyldImpl.h | 2 +- .../RuntimeDyld/RuntimeDyldMachO.cpp | 17 +- .../Targets/RuntimeDyldCOFFX86_64.h | 8 +- .../RuntimeDyld/Targets/RuntimeDyldMachOARM.h | 5 +- .../Targets/RuntimeDyldMachOI386.h | 5 +- lib/FuzzMutate/FuzzerCLI.cpp | 2 +- lib/IR/AsmWriter.cpp | 11 +- lib/IR/AttributeImpl.h | 6 +- lib/IR/Attributes.cpp | 84 +- lib/IR/AutoUpgrade.cpp | 157 +- lib/IR/BasicBlock.cpp | 7 + lib/IR/ConstantFold.cpp | 25 +- lib/IR/ConstantRange.cpp | 76 +- lib/IR/Constants.cpp | 56 +- lib/IR/ConstantsContext.h | 12 +- lib/IR/Core.cpp | 114 +- lib/IR/DIBuilder.cpp | 2 +- lib/IR/DataLayout.cpp | 163 +- lib/IR/DebugInfo.cpp | 22 +- lib/IR/DebugInfoMetadata.cpp | 34 +- lib/IR/DiagnosticInfo.cpp | 11 + lib/IR/Function.cpp | 103 +- lib/IR/Globals.cpp | 51 +- lib/IR/IRBuilder.cpp | 8 +- lib/IR/IRPrintingPasses.cpp | 18 +- lib/IR/InlineAsm.cpp | 10 + lib/IR/Instruction.cpp | 2 +- lib/IR/Instructions.cpp | 167 +- lib/IR/IntrinsicInst.cpp | 15 +- lib/IR/LLVMContext.cpp | 31 +- lib/IR/LLVMContextImpl.cpp | 2 +- lib/IR/LegacyPassManager.cpp | 20 +- lib/IR/MDBuilder.cpp | 12 + lib/IR/Metadata.cpp | 18 + lib/IR/Module.cpp | 2 +- lib/IR/RemarkStreamer.cpp | 74 +- lib/IR/SafepointIRVerifier.cpp | 4 +- lib/IR/Type.cpp | 27 +- lib/IR/Value.cpp | 111 +- lib/IR/Verifier.cpp | 167 +- lib/LTO/Caching.cpp | 4 +- lib/LTO/LTO.cpp | 63 +- lib/LTO/LTOBackend.cpp | 18 +- lib/LTO/LTOCodeGenerator.cpp | 13 +- lib/LTO/LTOModule.cpp | 3 +- lib/LTO/SummaryBasedOptimizations.cpp | 2 +- lib/LTO/ThinLTOCodeGenerator.cpp | 139 +- lib/Linker/IRMover.cpp | 112 +- lib/Linker/LinkModules.cpp | 3 +- lib/MC/ELFObjectWriter.cpp | 86 +- lib/MC/MCAsmBackend.cpp | 5 +- lib/MC/MCAsmInfoXCOFF.cpp | 17 + lib/MC/MCAsmMacro.cpp | 2 + lib/MC/MCAsmStreamer.cpp | 140 +- lib/MC/MCAssembler.cpp | 38 +- lib/MC/MCContext.cpp | 29 +- lib/MC/MCDwarf.cpp | 44 +- lib/MC/MCELFStreamer.cpp | 7 +- lib/MC/MCExpr.cpp | 46 +- lib/MC/MCInstPrinter.cpp | 31 +- lib/MC/MCInstrAnalysis.cpp | 6 + lib/MC/MCMachOStreamer.cpp | 1 + lib/MC/MCObjectFileInfo.cpp | 14 +- lib/MC/MCObjectStreamer.cpp | 4 +- lib/MC/MCParser/AsmParser.cpp | 140 +- lib/MC/MCParser/COFFAsmParser.cpp | 155 +- lib/MC/MCParser/DarwinAsmParser.cpp | 4 +- lib/MC/MCParser/WasmAsmParser.cpp | 1 + lib/MC/MCRegisterInfo.cpp | 48 +- lib/MC/MCSectionXCOFF.cpp | 50 +- lib/MC/MCStreamer.cpp | 84 +- lib/MC/MCSubtargetInfo.cpp | 25 + lib/MC/MCWasmObjectTargetWriter.cpp | 5 +- lib/MC/MCWasmStreamer.cpp | 2 +- lib/MC/MCWinCOFFStreamer.cpp | 18 +- lib/MC/MCXCOFFStreamer.cpp | 54 +- lib/MC/MachObjectWriter.cpp | 14 +- lib/MC/StringTableBuilder.cpp | 10 +- lib/MC/WasmObjectWriter.cpp | 77 +- lib/MC/WinCOFFObjectWriter.cpp | 10 +- lib/MC/XCOFFObjectWriter.cpp | 533 +- lib/MCA/CodeEmitter.cpp | 37 + lib/MCA/Context.cpp | 23 +- lib/MCA/HardwareUnits/LSUnit.cpp | 28 +- lib/MCA/HardwareUnits/RegisterFile.cpp | 16 +- lib/MCA/HardwareUnits/ResourceManager.cpp | 59 +- lib/MCA/HardwareUnits/RetireControlUnit.cpp | 65 +- lib/MCA/HardwareUnits/Scheduler.cpp | 12 +- lib/MCA/InstrBuilder.cpp | 44 +- lib/MCA/Instruction.cpp | 4 +- lib/MCA/Stages/DispatchStage.cpp | 19 +- lib/MCA/Stages/EntryStage.cpp | 2 +- lib/MCA/Stages/ExecuteStage.cpp | 22 +- lib/MCA/Stages/RetireStage.cpp | 8 +- lib/Object/Archive.cpp | 6 +- lib/Object/ArchiveWriter.cpp | 35 +- lib/Object/Binary.cpp | 3 + lib/Object/COFFObjectFile.cpp | 198 +- lib/Object/Decompressor.cpp | 15 +- lib/Object/ELF.cpp | 2 + lib/Object/ELFObjectFile.cpp | 38 +- lib/Object/MachOObjectFile.cpp | 48 +- lib/Object/MachOUniversal.cpp | 38 +- lib/Object/Minidump.cpp | 48 +- lib/Object/Object.cpp | 10 +- lib/Object/ObjectFile.cpp | 11 +- lib/Object/RelocationResolver.cpp | 67 +- lib/Object/SymbolicFile.cpp | 1 + lib/Object/TapiFile.cpp | 104 + lib/Object/TapiUniversal.cpp | 54 + lib/Object/WasmObjectFile.cpp | 13 +- lib/Object/WindowsResource.cpp | 346 +- lib/Object/XCOFFObjectFile.cpp | 240 +- lib/ObjectYAML/COFFEmitter.cpp | 622 ++ lib/ObjectYAML/CodeViewYAMLSymbols.cpp | 2 +- lib/ObjectYAML/ELFEmitter.cpp | 1152 ++++ lib/ObjectYAML/ELFYAML.cpp | 325 +- lib/ObjectYAML/MachOEmitter.cpp | 580 ++ lib/ObjectYAML/MachOYAML.cpp | 9 + lib/ObjectYAML/MinidumpEmitter.cpp | 247 + lib/ObjectYAML/MinidumpYAML.cpp | 331 +- lib/ObjectYAML/WasmEmitter.cpp | 633 ++ lib/ObjectYAML/WasmYAML.cpp | 4 +- lib/ObjectYAML/yaml2obj.cpp | 77 + lib/Option/ArgList.cpp | 8 +- lib/Passes/PassBuilder.cpp | 168 +- lib/Passes/PassRegistry.def | 14 +- lib/ProfileData/Coverage/CoverageMapping.cpp | 60 +- .../Coverage/CoverageMappingReader.cpp | 20 +- .../Coverage/CoverageMappingWriter.cpp | 10 + lib/ProfileData/GCOV.cpp | 12 +- lib/ProfileData/InstrProf.cpp | 18 +- lib/ProfileData/InstrProfReader.cpp | 44 +- lib/ProfileData/InstrProfWriter.cpp | 2 +- lib/ProfileData/ProfileSummaryBuilder.cpp | 4 +- lib/ProfileData/SampleProf.cpp | 56 +- lib/ProfileData/SampleProfReader.cpp | 447 +- lib/ProfileData/SampleProfWriter.cpp | 279 +- lib/Remarks/BitstreamRemarkParser.cpp | 597 ++ lib/Remarks/BitstreamRemarkParser.h | 83 + lib/Remarks/BitstreamRemarkSerializer.cpp | 386 ++ lib/Remarks/RemarkFormat.cpp | 4 +- lib/Remarks/RemarkParser.cpp | 74 +- lib/Remarks/RemarkSerializer.cpp | 54 + lib/Remarks/RemarkStringTable.cpp | 28 +- lib/Remarks/YAMLRemarkParser.cpp | 165 +- lib/Remarks/YAMLRemarkParser.h | 38 +- lib/Remarks/YAMLRemarkSerializer.cpp | 134 +- lib/Support/AArch64TargetParser.cpp | 4 +- lib/Support/ABIBreak.cpp | 24 + lib/Support/APInt.cpp | 52 + lib/Support/ARMTargetParser.cpp | 8 +- lib/Support/CRC.cpp | 113 +- lib/Support/CachePruning.cpp | 2 +- lib/Support/CodeGenCoverage.cpp | 4 +- lib/Support/CommandLine.cpp | 2 +- lib/Support/CrashRecoveryContext.cpp | 8 +- lib/Support/DataExtractor.cpp | 162 +- lib/Support/Error.cpp | 17 +- lib/Support/FileCheck.cpp | 368 +- lib/Support/FileCheckImpl.h | 624 ++ lib/Support/FileCollector.cpp | 268 + lib/Support/FileOutputBuffer.cpp | 6 +- lib/Support/FileUtilities.cpp | 66 + lib/Support/GlobPattern.cpp | 23 +- lib/Support/Host.cpp | 34 +- lib/Support/JSON.cpp | 2 +- lib/Support/JamCRC.cpp | 96 - lib/Support/ManagedStatic.cpp | 13 +- lib/Support/MemoryBuffer.cpp | 31 +- lib/Support/Mutex.cpp | 123 - lib/Support/Parallel.cpp | 31 +- lib/Support/Path.cpp | 6 +- lib/Support/PrettyStackTrace.cpp | 64 +- lib/Support/RWMutex.cpp | 58 +- lib/Support/Regex.cpp | 39 +- lib/Support/Signposts.cpp | 2 + lib/Support/SpecialCaseList.cpp | 4 +- lib/Support/Statistic.cpp | 27 +- lib/Support/StringExtras.cpp | 4 +- lib/Support/TimeProfiler.cpp | 63 +- lib/Support/Timer.cpp | 10 +- lib/Support/Unix/Memory.inc | 6 +- lib/Support/Unix/Mutex.inc | 42 - lib/Support/Unix/Path.inc | 73 +- lib/Support/Unix/Process.inc | 7 +- lib/Support/Unix/Program.inc | 4 +- lib/Support/Unix/RWMutex.inc | 50 - lib/Support/Unix/Signals.inc | 15 +- lib/Support/VirtualFileSystem.cpp | 102 +- lib/Support/Windows/Mutex.inc | 56 - lib/Support/Windows/Path.inc | 85 +- lib/Support/Windows/Program.inc | 2 +- lib/Support/Windows/RWMutex.inc | 128 - lib/Support/Windows/Signals.inc | 3 + lib/Support/Windows/WindowsSupport.h | 1 + lib/Support/Windows/explicit_symbols.inc | 6 - lib/Support/YAMLTraits.cpp | 16 +- lib/Support/Z3Solver.cpp | 2 +- lib/Support/raw_ostream.cpp | 35 +- lib/Support/regcomp.c | 7 +- lib/TableGen/Error.cpp | 2 + lib/TableGen/Main.cpp | 21 +- lib/TableGen/Record.cpp | 11 +- lib/TableGen/SetTheory.cpp | 22 +- lib/TableGen/TGLexer.cpp | 4 +- lib/TableGen/TGParser.cpp | 28 +- lib/Target/AArch64/AArch64.h | 6 +- lib/Target/AArch64/AArch64.td | 80 +- .../AArch64/AArch64A57FPLoadBalancing.cpp | 12 +- .../AArch64/AArch64AdvSIMDScalarPass.cpp | 16 +- lib/Target/AArch64/AArch64AsmPrinter.cpp | 270 +- lib/Target/AArch64/AArch64CallLowering.cpp | 634 +- lib/Target/AArch64/AArch64CallLowering.h | 29 +- .../AArch64/AArch64CallingConvention.cpp | 38 +- lib/Target/AArch64/AArch64CallingConvention.h | 3 + .../AArch64/AArch64CallingConvention.td | 88 +- lib/Target/AArch64/AArch64CollectLOH.cpp | 22 +- lib/Target/AArch64/AArch64Combine.td | 18 + lib/Target/AArch64/AArch64CondBrTuning.cpp | 4 +- .../AArch64/AArch64ConditionalCompares.cpp | 6 +- .../AArch64DeadRegisterDefinitionsPass.cpp | 4 +- .../AArch64/AArch64ExpandPseudoInsts.cpp | 76 +- lib/Target/AArch64/AArch64FalkorHWPFFix.cpp | 2 +- lib/Target/AArch64/AArch64FastISel.cpp | 75 +- lib/Target/AArch64/AArch64FrameLowering.cpp | 301 +- lib/Target/AArch64/AArch64FrameLowering.h | 28 +- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 45 +- lib/Target/AArch64/AArch64ISelLowering.cpp | 535 +- lib/Target/AArch64/AArch64ISelLowering.h | 37 +- lib/Target/AArch64/AArch64InstrAtomics.td | 65 +- lib/Target/AArch64/AArch64InstrFormats.td | 220 +- lib/Target/AArch64/AArch64InstrInfo.cpp | 1058 +-- lib/Target/AArch64/AArch64InstrInfo.h | 12 +- lib/Target/AArch64/AArch64InstrInfo.td | 253 +- .../AArch64/AArch64InstructionSelector.cpp | 1094 ++- lib/Target/AArch64/AArch64LegalizerInfo.cpp | 117 +- lib/Target/AArch64/AArch64LegalizerInfo.h | 3 + .../AArch64/AArch64LoadStoreOptimizer.cpp | 160 +- lib/Target/AArch64/AArch64MCInstLower.cpp | 2 + .../AArch64/AArch64MachineFunctionInfo.h | 17 + lib/Target/AArch64/AArch64PBQPRegAlloc.cpp | 16 +- .../AArch64/AArch64PreLegalizerCombiner.cpp | 98 +- .../AArch64/AArch64RegisterBankInfo.cpp | 39 +- lib/Target/AArch64/AArch64RegisterInfo.cpp | 71 +- lib/Target/AArch64/AArch64SIMDInstrOpt.cpp | 8 +- lib/Target/AArch64/AArch64SVEInstrInfo.td | 256 +- .../AArch64/AArch64SelectionDAGInfo.cpp | 2 +- .../AArch64/AArch64SpeculationHardening.cpp | 13 +- lib/Target/AArch64/AArch64StackOffset.h | 138 + lib/Target/AArch64/AArch64StackTagging.cpp | 394 +- .../AArch64/AArch64StackTaggingPreRA.cpp | 209 + .../AArch64/AArch64StorePairSuppress.cpp | 2 +- lib/Target/AArch64/AArch64Subtarget.cpp | 50 +- lib/Target/AArch64/AArch64Subtarget.h | 48 +- lib/Target/AArch64/AArch64SystemOperands.td | 40 +- lib/Target/AArch64/AArch64TargetMachine.cpp | 35 +- .../AArch64/AArch64TargetObjectFile.cpp | 4 +- lib/Target/AArch64/AArch64TargetObjectFile.h | 3 +- .../AArch64/AArch64TargetTransformInfo.cpp | 29 +- .../AArch64/AArch64TargetTransformInfo.h | 14 +- .../AArch64/AsmParser/AArch64AsmParser.cpp | 131 +- .../MCTargetDesc/AArch64AsmBackend.cpp | 13 +- .../MCTargetDesc/AArch64ELFObjectWriter.cpp | 22 +- .../MCTargetDesc/AArch64InstPrinter.cpp | 3 +- .../AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp | 5 +- .../AArch64/MCTargetDesc/AArch64MCAsmInfo.h | 2 +- .../AArch64/MCTargetDesc/AArch64MCExpr.cpp | 7 + .../AArch64/MCTargetDesc/AArch64MCExpr.h | 20 +- .../MCTargetDesc/AArch64MCTargetDesc.cpp | 2 +- .../MCTargetDesc/AArch64MachObjectWriter.cpp | 4 +- .../AArch64WinCOFFObjectWriter.cpp | 2 +- lib/Target/AArch64/SVEInstrFormats.td | 366 +- lib/Target/AArch64/Utils/AArch64BaseInfo.cpp | 2 +- lib/Target/AArch64/Utils/AArch64BaseInfo.h | 25 +- lib/Target/AMDGPU/AMDGPU.h | 4 + lib/Target/AMDGPU/AMDGPU.td | 16 + .../AMDGPU/AMDGPUAnnotateKernelFeatures.cpp | 21 +- lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h | 4 +- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 78 +- lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 10 +- lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp | 351 +- lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 695 +- lib/Target/AMDGPU/AMDGPUCallLowering.h | 29 +- lib/Target/AMDGPU/AMDGPUCallingConv.td | 27 +- lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 12 +- lib/Target/AMDGPU/AMDGPUFrameLowering.cpp | 6 +- lib/Target/AMDGPU/AMDGPUFrameLowering.h | 4 +- lib/Target/AMDGPU/AMDGPUGISel.td | 78 +- .../AMDGPU/AMDGPUGenRegisterBankInfo.def | 76 +- .../AMDGPU/AMDGPUHSAMetadataStreamer.cpp | 9 +- lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h | 2 +- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 272 +- lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 250 +- lib/Target/AMDGPU/AMDGPUISelLowering.h | 18 +- lib/Target/AMDGPU/AMDGPUInline.cpp | 2 +- lib/Target/AMDGPU/AMDGPUInstrInfo.td | 126 +- .../AMDGPU/AMDGPUInstructionSelector.cpp | 1146 +++- lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 48 +- lib/Target/AMDGPU/AMDGPUInstructions.td | 216 +- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 1122 +++- lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 52 +- lib/Target/AMDGPU/AMDGPULibCalls.cpp | 37 +- lib/Target/AMDGPU/AMDGPULibFunc.cpp | 14 +- .../AMDGPU/AMDGPULowerKernelArguments.cpp | 20 +- lib/Target/AMDGPU/AMDGPUMCInstLower.cpp | 4 + .../AMDGPU/AMDGPUMachineCFGStructurizer.cpp | 38 +- lib/Target/AMDGPU/AMDGPUMachineFunction.cpp | 1 - lib/Target/AMDGPU/AMDGPUMachineFunction.h | 6 +- .../AMDGPU/AMDGPUPrintfRuntimeBinding.cpp | 592 ++ lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 2 +- lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 1242 +++- lib/Target/AMDGPU/AMDGPURegisterBankInfo.h | 54 +- lib/Target/AMDGPU/AMDGPURegisterBanks.td | 6 +- lib/Target/AMDGPU/AMDGPURegisterInfo.cpp | 64 +- lib/Target/AMDGPU/AMDGPURegisterInfo.h | 2 +- lib/Target/AMDGPU/AMDGPUSearchableTables.td | 4 + lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 64 +- lib/Target/AMDGPU/AMDGPUSubtarget.h | 67 +- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 29 +- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 90 +- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 20 +- lib/Target/AMDGPU/AMDILCFGStructurizer.cpp | 8 +- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 605 +- lib/Target/AMDGPU/BUFInstructions.td | 694 +- lib/Target/AMDGPU/DSInstructions.td | 92 +- .../Disassembler/AMDGPUDisassembler.cpp | 4 +- lib/Target/AMDGPU/EvergreenInstructions.td | 60 +- lib/Target/AMDGPU/FLATInstructions.td | 192 +- lib/Target/AMDGPU/GCNDPPCombine.cpp | 88 +- lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 21 +- lib/Target/AMDGPU/GCNILPSched.cpp | 1 + lib/Target/AMDGPU/GCNIterativeScheduler.cpp | 2 +- lib/Target/AMDGPU/GCNNSAReassign.cpp | 8 +- lib/Target/AMDGPU/GCNRegBankReassign.cpp | 14 +- lib/Target/AMDGPU/GCNRegPressure.cpp | 26 +- lib/Target/AMDGPU/GCNRegPressure.h | 2 +- lib/Target/AMDGPU/GCNSchedStrategy.cpp | 31 +- lib/Target/AMDGPU/GCNSchedStrategy.h | 3 + .../AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp | 2 +- .../MCTargetDesc/AMDGPUELFObjectWriter.cpp | 2 +- .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 37 +- .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h | 6 +- .../MCTargetDesc/AMDGPUTargetStreamer.cpp | 4 +- lib/Target/AMDGPU/MIMGInstructions.td | 4 +- lib/Target/AMDGPU/R600AsmPrinter.cpp | 2 +- .../AMDGPU/R600ControlFlowFinalizer.cpp | 4 +- lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp | 22 +- lib/Target/AMDGPU/R600FrameLowering.h | 6 +- lib/Target/AMDGPU/R600ISelLowering.cpp | 7 +- lib/Target/AMDGPU/R600InstrInfo.cpp | 22 +- lib/Target/AMDGPU/R600MachineScheduler.cpp | 8 +- .../AMDGPU/R600OptimizeVectorRegisters.cpp | 12 +- lib/Target/AMDGPU/R600Packetizer.cpp | 4 +- lib/Target/AMDGPU/R600RegisterInfo.cpp | 2 +- lib/Target/AMDGPU/SIAddIMGInit.cpp | 4 +- lib/Target/AMDGPU/SIDefines.h | 6 +- lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 394 +- lib/Target/AMDGPU/SIFixupVectorISel.cpp | 3 +- lib/Target/AMDGPU/SIFoldOperands.cpp | 112 +- lib/Target/AMDGPU/SIFormMemoryClauses.cpp | 22 +- lib/Target/AMDGPU/SIFrameLowering.cpp | 34 +- lib/Target/AMDGPU/SIFrameLowering.h | 6 +- lib/Target/AMDGPU/SIISelLowering.cpp | 1052 +-- lib/Target/AMDGPU/SIISelLowering.h | 54 +- lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 6 +- lib/Target/AMDGPU/SIInstrFormats.td | 5 + lib/Target/AMDGPU/SIInstrInfo.cpp | 558 +- lib/Target/AMDGPU/SIInstrInfo.h | 44 +- lib/Target/AMDGPU/SIInstrInfo.td | 320 +- lib/Target/AMDGPU/SIInstructions.td | 315 +- lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 1195 ++-- lib/Target/AMDGPU/SILowerControlFlow.cpp | 60 +- lib/Target/AMDGPU/SILowerI1Copies.cpp | 49 +- lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 4 +- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 9 +- lib/Target/AMDGPU/SIMachineFunctionInfo.h | 11 +- lib/Target/AMDGPU/SIMachineScheduler.cpp | 16 +- lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 6 +- lib/Target/AMDGPU/SIModeRegister.cpp | 2 +- lib/Target/AMDGPU/SIOptimizeExecMasking.cpp | 2 +- .../AMDGPU/SIOptimizeExecMaskingPreRA.cpp | 32 +- lib/Target/AMDGPU/SIPeepholeSDWA.cpp | 32 +- lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp | 12 +- lib/Target/AMDGPU/SIProgramInfo.h | 5 + lib/Target/AMDGPU/SIRegisterInfo.cpp | 439 +- lib/Target/AMDGPU/SIRegisterInfo.h | 15 +- lib/Target/AMDGPU/SIRegisterInfo.td | 456 +- lib/Target/AMDGPU/SIShrinkInstructions.cpp | 42 +- lib/Target/AMDGPU/SIWholeQuadMode.cpp | 35 +- lib/Target/AMDGPU/SMInstructions.td | 15 +- lib/Target/AMDGPU/SOPInstructions.td | 42 +- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 71 +- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 24 +- lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp | 2 + lib/Target/AMDGPU/VOP1Instructions.td | 65 +- lib/Target/AMDGPU/VOP2Instructions.td | 360 +- lib/Target/AMDGPU/VOP3Instructions.td | 69 +- lib/Target/AMDGPU/VOP3PInstructions.td | 2 + lib/Target/AMDGPU/VOPCInstructions.td | 30 +- lib/Target/AMDGPU/VOPInstructions.td | 12 +- lib/Target/ARC/ARCFrameLowering.h | 4 +- lib/Target/ARC/ARCISelLowering.cpp | 2 +- lib/Target/ARC/ARCMachineFunctionInfo.h | 4 +- lib/Target/ARC/ARCOptAddrMode.cpp | 16 +- lib/Target/ARC/ARCRegisterInfo.cpp | 2 +- lib/Target/ARC/ARCTargetMachine.cpp | 2 +- lib/Target/ARM/A15SDOptimizer.cpp | 54 +- lib/Target/ARM/ARM.h | 2 + lib/Target/ARM/ARM.td | 42 +- lib/Target/ARM/ARMAsmPrinter.cpp | 66 +- lib/Target/ARM/ARMBaseInstrInfo.cpp | 216 +- lib/Target/ARM/ARMBaseInstrInfo.h | 21 +- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 49 +- lib/Target/ARM/ARMBaseRegisterInfo.h | 5 +- lib/Target/ARM/ARMBasicBlockInfo.cpp | 16 +- lib/Target/ARM/ARMBasicBlockInfo.h | 31 +- lib/Target/ARM/ARMCallLowering.cpp | 54 +- lib/Target/ARM/ARMCallLowering.h | 5 +- lib/Target/ARM/ARMCallingConv.cpp | 2 +- lib/Target/ARM/ARMCodeGenPrepare.cpp | 90 +- lib/Target/ARM/ARMConstantIslandPass.cpp | 291 +- lib/Target/ARM/ARMConstantPoolValue.cpp | 1 + lib/Target/ARM/ARMExpandPseudoInsts.cpp | 96 +- lib/Target/ARM/ARMFastISel.cpp | 88 +- lib/Target/ARM/ARMFrameLowering.cpp | 65 +- lib/Target/ARM/ARMFrameLowering.h | 5 + lib/Target/ARM/ARMISelDAGToDAG.cpp | 224 +- lib/Target/ARM/ARMISelLowering.cpp | 2073 ++++-- lib/Target/ARM/ARMISelLowering.h | 45 +- lib/Target/ARM/ARMInstrFormats.td | 23 +- lib/Target/ARM/ARMInstrInfo.cpp | 2 +- lib/Target/ARM/ARMInstrInfo.td | 127 +- lib/Target/ARM/ARMInstrMVE.td | 1420 +++- lib/Target/ARM/ARMInstrNEON.td | 191 +- lib/Target/ARM/ARMInstrThumb.td | 16 +- lib/Target/ARM/ARMInstrThumb2.td | 98 +- lib/Target/ARM/ARMInstrVFP.td | 96 +- lib/Target/ARM/ARMInstructionSelector.cpp | 41 +- lib/Target/ARM/ARMLegalizerInfo.cpp | 2 + lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 32 +- lib/Target/ARM/ARMLowOverheadLoops.cpp | 364 +- lib/Target/ARM/ARMMCInstLower.cpp | 4 +- lib/Target/ARM/ARMMachineFunctionInfo.h | 8 + lib/Target/ARM/ARMParallelDSP.cpp | 671 +- lib/Target/ARM/ARMPredicates.td | 2 +- lib/Target/ARM/ARMRegisterInfo.td | 18 +- lib/Target/ARM/ARMScheduleA9.td | 4 +- lib/Target/ARM/ARMScheduleM4.td | 24 +- lib/Target/ARM/ARMSubtarget.cpp | 14 +- lib/Target/ARM/ARMSubtarget.h | 30 +- lib/Target/ARM/ARMTargetMachine.cpp | 13 +- lib/Target/ARM/ARMTargetTransformInfo.cpp | 362 +- lib/Target/ARM/ARMTargetTransformInfo.h | 24 +- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 251 +- .../ARM/Disassembler/ARMDisassembler.cpp | 31 +- .../ARM/MCTargetDesc/ARMAddressingModes.h | 20 +- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 14 +- lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h | 3 + .../ARM/MCTargetDesc/ARMELFObjectWriter.cpp | 6 +- .../ARM/MCTargetDesc/ARMInstPrinter.cpp | 12 +- lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h | 5 +- .../ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 21 +- .../ARM/MCTargetDesc/ARMMachObjectWriter.cpp | 6 +- .../ARM/MCTargetDesc/ARMTargetStreamer.cpp | 4 +- .../MCTargetDesc/ARMWinCOFFObjectWriter.cpp | 2 +- .../ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp | 10 - lib/Target/ARM/MLxExpansionPass.cpp | 42 +- lib/Target/ARM/MVETailPredication.cpp | 519 ++ lib/Target/ARM/MVEVPTBlockPass.cpp | 278 + lib/Target/ARM/Thumb1FrameLowering.cpp | 8 +- lib/Target/ARM/Thumb1InstrInfo.cpp | 17 +- lib/Target/ARM/Thumb2ITBlockPass.cpp | 134 +- lib/Target/ARM/Thumb2InstrInfo.cpp | 38 +- lib/Target/ARM/Thumb2SizeReduction.cpp | 28 +- lib/Target/ARM/ThumbRegisterInfo.cpp | 11 +- lib/Target/AVR/AVRAsmPrinter.cpp | 2 +- lib/Target/AVR/AVRExpandPseudoInsts.cpp | 12 +- lib/Target/AVR/AVRFrameLowering.cpp | 5 +- lib/Target/AVR/AVRISelDAGToDAG.cpp | 2 +- lib/Target/AVR/AVRISelLowering.cpp | 27 +- lib/Target/AVR/AVRISelLowering.h | 4 +- lib/Target/AVR/AVRRegisterInfo.cpp | 2 +- lib/Target/AVR/AVRTargetMachine.cpp | 2 +- lib/Target/AVR/AsmParser/AVRAsmParser.cpp | 8 +- .../AVR/MCTargetDesc/AVRELFObjectWriter.cpp | 2 +- lib/Target/BPF/AsmParser/BPFAsmParser.cpp | 6 +- lib/Target/BPF/BPF.h | 4 +- lib/Target/BPF/BPFAbstractMemberAccess.cpp | 718 +- lib/Target/BPF/BPFAsmPrinter.cpp | 2 +- lib/Target/BPF/BPFCORE.h | 14 +- lib/Target/BPF/BPFFrameLowering.h | 2 +- lib/Target/BPF/BPFISelDAGToDAG.cpp | 172 +- lib/Target/BPF/BPFISelLowering.cpp | 21 +- lib/Target/BPF/BPFInstrInfo.cpp | 6 +- lib/Target/BPF/BPFInstrInfo.td | 2 +- lib/Target/BPF/BPFMIChecking.cpp | 1 + lib/Target/BPF/BPFMIPeephole.cpp | 206 +- lib/Target/BPF/BPFMISimplifyPatchable.cpp | 27 +- lib/Target/BPF/BPFRegisterInfo.cpp | 6 +- lib/Target/BPF/BPFTargetMachine.cpp | 16 +- lib/Target/BPF/BTF.h | 54 +- lib/Target/BPF/BTFDebug.cpp | 277 +- lib/Target/BPF/BTFDebug.h | 29 +- .../BPF/MCTargetDesc/BPFELFObjectWriter.cpp | 4 +- .../Hexagon/AsmParser/HexagonAsmParser.cpp | 2 +- lib/Target/Hexagon/BitTracker.cpp | 21 +- lib/Target/Hexagon/HexagonAsmPrinter.cpp | 2 +- lib/Target/Hexagon/HexagonBitSimplify.cpp | 71 +- lib/Target/Hexagon/HexagonBitTracker.cpp | 8 +- lib/Target/Hexagon/HexagonBlockRanges.cpp | 14 +- .../Hexagon/HexagonBranchRelaxation.cpp | 5 +- lib/Target/Hexagon/HexagonConstExtenders.cpp | 17 +- .../Hexagon/HexagonConstPropagation.cpp | 32 +- lib/Target/Hexagon/HexagonCopyToCombine.cpp | 32 +- lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td | 696 +- lib/Target/Hexagon/HexagonDepOperands.td | 83 +- lib/Target/Hexagon/HexagonEarlyIfConv.cpp | 24 +- lib/Target/Hexagon/HexagonExpandCondsets.cpp | 30 +- lib/Target/Hexagon/HexagonFixupHwLoops.cpp | 5 +- lib/Target/Hexagon/HexagonFrameLowering.cpp | 58 +- lib/Target/Hexagon/HexagonFrameLowering.h | 2 +- lib/Target/Hexagon/HexagonGenExtract.cpp | 2 +- lib/Target/Hexagon/HexagonGenInsert.cpp | 27 +- lib/Target/Hexagon/HexagonGenMux.cpp | 6 +- lib/Target/Hexagon/HexagonGenPredicate.cpp | 14 +- lib/Target/Hexagon/HexagonHardwareLoops.cpp | 56 +- lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 2 +- lib/Target/Hexagon/HexagonISelLowering.cpp | 156 +- lib/Target/Hexagon/HexagonISelLowering.h | 15 +- lib/Target/Hexagon/HexagonISelLoweringHVX.cpp | 24 + lib/Target/Hexagon/HexagonInstrInfo.cpp | 273 +- lib/Target/Hexagon/HexagonInstrInfo.h | 22 +- lib/Target/Hexagon/HexagonIntrinsics.td | 46 +- .../Hexagon/HexagonLoopIdiomRecognition.cpp | 15 +- lib/Target/Hexagon/HexagonNewValueJump.cpp | 10 +- lib/Target/Hexagon/HexagonOptAddrMode.cpp | 12 +- lib/Target/Hexagon/HexagonPatterns.td | 192 +- lib/Target/Hexagon/HexagonPatternsHVX.td | 38 +- lib/Target/Hexagon/HexagonPeephole.cpp | 38 +- lib/Target/Hexagon/HexagonRegisterInfo.cpp | 6 +- .../Hexagon/HexagonSplitConst32AndConst64.cpp | 8 +- lib/Target/Hexagon/HexagonSplitDouble.cpp | 60 +- lib/Target/Hexagon/HexagonStoreWidening.cpp | 2 +- lib/Target/Hexagon/HexagonSubtarget.cpp | 19 +- lib/Target/Hexagon/HexagonSubtarget.h | 2 +- lib/Target/Hexagon/HexagonTargetMachine.cpp | 12 +- .../Hexagon/HexagonTargetTransformInfo.cpp | 2 + .../Hexagon/HexagonTargetTransformInfo.h | 4 +- lib/Target/Hexagon/HexagonVExtract.cpp | 12 +- lib/Target/Hexagon/HexagonVLIWPacketizer.cpp | 37 +- lib/Target/Hexagon/HexagonVLIWPacketizer.h | 3 +- .../MCTargetDesc/HexagonAsmBackend.cpp | 6 +- .../MCTargetDesc/HexagonELFObjectWriter.cpp | 4 +- .../Hexagon/MCTargetDesc/HexagonMCChecker.cpp | 7 +- .../MCTargetDesc/HexagonMCELFStreamer.cpp | 4 +- .../MCTargetDesc/HexagonMCTargetDesc.cpp | 10 +- lib/Target/Hexagon/RDFCopy.cpp | 4 +- lib/Target/Hexagon/RDFDeadCode.cpp | 1 + lib/Target/Hexagon/RDFGraph.cpp | 16 +- lib/Target/Hexagon/RDFLiveness.cpp | 8 +- lib/Target/Hexagon/RDFRegisters.cpp | 8 +- lib/Target/Hexagon/RDFRegisters.h | 8 +- lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp | 23 +- lib/Target/Lanai/LanaiAsmPrinter.cpp | 2 +- lib/Target/Lanai/LanaiDelaySlotFiller.cpp | 2 +- lib/Target/Lanai/LanaiFrameLowering.cpp | 4 +- lib/Target/Lanai/LanaiFrameLowering.h | 2 +- lib/Target/Lanai/LanaiISelLowering.cpp | 15 +- lib/Target/Lanai/LanaiISelLowering.h | 4 +- lib/Target/Lanai/LanaiInstrInfo.cpp | 9 +- lib/Target/Lanai/LanaiInstrInfo.h | 3 +- lib/Target/Lanai/LanaiRegisterInfo.cpp | 2 +- .../MCTargetDesc/LanaiELFObjectWriter.cpp | 2 +- .../MSP430/AsmParser/MSP430AsmParser.cpp | 12 +- .../MCTargetDesc/MSP430ELFObjectWriter.cpp | 4 +- lib/Target/MSP430/MSP430AsmPrinter.cpp | 8 +- lib/Target/MSP430/MSP430BranchSelector.cpp | 1 + lib/Target/MSP430/MSP430FrameLowering.h | 3 +- lib/Target/MSP430/MSP430ISelLowering.cpp | 27 +- lib/Target/MSP430/MSP430ISelLowering.h | 2 + lib/Target/MSP430/MSP430RegisterInfo.cpp | 2 +- lib/Target/MSP430/MSP430TargetMachine.cpp | 2 +- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 696 +- .../Mips/Disassembler/MipsDisassembler.cpp | 16 + .../Mips/MCTargetDesc/MipsAsmBackend.cpp | 1 - lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h | 6 +- .../Mips/MCTargetDesc/MipsELFObjectWriter.cpp | 4 +- .../Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 7 +- lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h | 5 +- .../Mips/MCTargetDesc/MipsMCTargetDesc.cpp | 11 +- .../Mips/MCTargetDesc/MipsNaClELFStreamer.cpp | 2 +- .../Mips/MCTargetDesc/MipsOptionRecord.cpp | 4 +- .../Mips/MCTargetDesc/MipsTargetStreamer.cpp | 96 +- lib/Target/Mips/MicroMipsDSPInstrInfo.td | 4 +- lib/Target/Mips/MicroMipsInstrInfo.td | 9 +- lib/Target/Mips/MicroMipsSizeReduction.cpp | 18 +- lib/Target/Mips/Mips.td | 12 + lib/Target/Mips/Mips16ISelDAGToDAG.cpp | 2 +- lib/Target/Mips/Mips16ISelLowering.cpp | 16 +- lib/Target/Mips/Mips16InstrInfo.cpp | 2 +- lib/Target/Mips/Mips64InstrInfo.td | 36 + lib/Target/Mips/MipsAsmPrinter.cpp | 12 +- lib/Target/Mips/MipsCallLowering.cpp | 150 +- lib/Target/Mips/MipsCallLowering.h | 8 +- lib/Target/Mips/MipsConstantIslandPass.cpp | 63 +- lib/Target/Mips/MipsDSPInstrInfo.td | 19 +- lib/Target/Mips/MipsExpandPseudo.cpp | 54 +- lib/Target/Mips/MipsFastISel.cpp | 12 +- lib/Target/Mips/MipsFrameLowering.h | 5 +- lib/Target/Mips/MipsISelDAGToDAG.cpp | 53 +- lib/Target/Mips/MipsISelDAGToDAG.h | 5 + lib/Target/Mips/MipsISelLowering.cpp | 160 +- lib/Target/Mips/MipsISelLowering.h | 13 +- lib/Target/Mips/MipsInstrInfo.cpp | 3 +- lib/Target/Mips/MipsInstrInfo.h | 2 +- lib/Target/Mips/MipsInstrInfo.td | 30 +- lib/Target/Mips/MipsInstructionSelector.cpp | 206 +- lib/Target/Mips/MipsLegalizerInfo.cpp | 244 +- lib/Target/Mips/MipsLegalizerInfo.h | 3 + lib/Target/Mips/MipsMSAInstrInfo.td | 55 +- lib/Target/Mips/MipsOptimizePICCall.cpp | 5 +- lib/Target/Mips/MipsPfmCounters.td | 18 + lib/Target/Mips/MipsPreLegalizerCombiner.cpp | 3 +- lib/Target/Mips/MipsRegisterBankInfo.cpp | 328 +- lib/Target/Mips/MipsRegisterBankInfo.h | 9 + lib/Target/Mips/MipsRegisterBanks.td | 2 +- lib/Target/Mips/MipsSEFrameLowering.cpp | 55 +- lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 54 +- lib/Target/Mips/MipsSEISelDAGToDAG.h | 6 +- lib/Target/Mips/MipsSEISelLowering.cpp | 124 +- lib/Target/Mips/MipsSEInstrInfo.cpp | 20 +- lib/Target/Mips/MipsSERegisterInfo.cpp | 8 +- lib/Target/Mips/MipsSubtarget.cpp | 17 +- lib/Target/Mips/MipsSubtarget.h | 15 +- lib/Target/Mips/MipsTargetMachine.cpp | 18 +- lib/Target/Mips/MipsTargetStreamer.h | 14 +- lib/Target/NVPTX/NVPTX.h | 2 +- lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 34 +- lib/Target/NVPTX/NVPTXAsmPrinter.h | 2 +- lib/Target/NVPTX/NVPTXFrameLowering.cpp | 2 +- lib/Target/NVPTX/NVPTXISelLowering.cpp | 58 +- lib/Target/NVPTX/NVPTXInstrInfo.td | 13 +- lib/Target/NVPTX/NVPTXIntrinsics.td | 169 +- lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp | 2 +- lib/Target/NVPTX/NVPTXLowerAlloca.cpp | 97 +- lib/Target/NVPTX/NVPTXLowerArgs.cpp | 2 +- lib/Target/NVPTX/NVPTXPeephole.cpp | 2 +- lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp | 2 +- lib/Target/NVPTX/NVPTXTargetMachine.cpp | 2 +- lib/Target/NVPTX/NVPTXUtilities.cpp | 13 +- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 10 +- .../PowerPC/Disassembler/PPCDisassembler.cpp | 6 - .../MCTargetDesc/PPCELFObjectWriter.cpp | 6 +- .../PowerPC/MCTargetDesc/PPCInstPrinter.cpp | 25 + .../PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp | 1 + lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp | 4 +- lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h | 14 +- .../MCTargetDesc/PPCMachObjectWriter.cpp | 4 +- .../MCTargetDesc/PPCXCOFFObjectWriter.cpp | 2 +- lib/Target/PowerPC/P9InstrResources.td | 8 +- lib/Target/PowerPC/PPC.h | 8 +- lib/Target/PowerPC/PPCAsmPrinter.cpp | 490 +- lib/Target/PowerPC/PPCBranchCoalescing.cpp | 13 +- lib/Target/PowerPC/PPCBranchSelector.cpp | 29 +- lib/Target/PowerPC/PPCFastISel.cpp | 41 +- lib/Target/PowerPC/PPCFrameLowering.cpp | 71 +- lib/Target/PowerPC/PPCFrameLowering.h | 11 +- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 105 +- lib/Target/PowerPC/PPCISelLowering.cpp | 544 +- lib/Target/PowerPC/PPCISelLowering.h | 49 +- lib/Target/PowerPC/PPCInstr64Bit.td | 4 +- lib/Target/PowerPC/PPCInstrAltivec.td | 12 +- lib/Target/PowerPC/PPCInstrFormats.td | 9 +- lib/Target/PowerPC/PPCInstrInfo.cpp | 336 +- lib/Target/PowerPC/PPCInstrInfo.h | 40 +- lib/Target/PowerPC/PPCInstrInfo.td | 206 +- lib/Target/PowerPC/PPCInstrVSX.td | 180 +- lib/Target/PowerPC/PPCLoopPreIncPrep.cpp | 676 +- lib/Target/PowerPC/PPCMCInstLower.cpp | 23 +- lib/Target/PowerPC/PPCMIPeephole.cpp | 82 +- lib/Target/PowerPC/PPCPreEmitPeephole.cpp | 106 +- lib/Target/PowerPC/PPCQPXLoadSplat.cpp | 6 +- lib/Target/PowerPC/PPCReduceCRLogicals.cpp | 15 +- lib/Target/PowerPC/PPCRegisterInfo.cpp | 39 +- lib/Target/PowerPC/PPCRegisterInfo.td | 22 +- lib/Target/PowerPC/PPCSubtarget.cpp | 18 +- lib/Target/PowerPC/PPCSubtarget.h | 24 +- lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 4 +- lib/Target/PowerPC/PPCTOCRegDeps.cpp | 9 +- lib/Target/PowerPC/PPCTargetMachine.cpp | 38 +- lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 68 +- lib/Target/PowerPC/PPCTargetTransformInfo.h | 12 +- lib/Target/PowerPC/PPCVSXCopy.cpp | 6 +- lib/Target/PowerPC/PPCVSXFMAMutate.cpp | 18 +- lib/Target/PowerPC/PPCVSXSwapRemoval.cpp | 32 +- lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp | 282 +- .../RISCV/Disassembler/RISCVDisassembler.cpp | 139 +- .../RISCV/MCTargetDesc/RISCVAsmBackend.cpp | 17 +- .../MCTargetDesc/RISCVELFObjectWriter.cpp | 13 +- .../RISCV/MCTargetDesc/RISCVInstPrinter.cpp | 41 + .../RISCV/MCTargetDesc/RISCVInstPrinter.h | 8 +- .../RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp | 20 + .../RISCV/MCTargetDesc/RISCVMCAsmInfo.h | 3 + .../RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp | 4 +- lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h | 1 + .../RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp | 4 +- lib/Target/RISCV/RISCV.h | 7 + lib/Target/RISCV/RISCV.td | 11 +- lib/Target/RISCV/RISCVCallLowering.cpp | 50 + lib/Target/RISCV/RISCVCallLowering.h | 42 + lib/Target/RISCV/RISCVCallingConv.td | 28 +- lib/Target/RISCV/RISCVExpandPseudoInsts.cpp | 54 +- lib/Target/RISCV/RISCVFrameLowering.cpp | 164 +- lib/Target/RISCV/RISCVFrameLowering.h | 9 +- lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 5 +- lib/Target/RISCV/RISCVISelLowering.cpp | 323 +- lib/Target/RISCV/RISCVISelLowering.h | 6 + lib/Target/RISCV/RISCVInstrInfo.cpp | 114 +- lib/Target/RISCV/RISCVInstrInfo.h | 18 +- lib/Target/RISCV/RISCVInstrInfo.td | 22 + lib/Target/RISCV/RISCVInstrInfoA.td | 34 +- lib/Target/RISCV/RISCVInstrInfoC.td | 124 +- lib/Target/RISCV/RISCVInstrInfoF.td | 6 + lib/Target/RISCV/RISCVInstructionSelector.cpp | 103 + lib/Target/RISCV/RISCVLegalizerInfo.cpp | 23 + lib/Target/RISCV/RISCVLegalizerInfo.h | 28 + lib/Target/RISCV/RISCVMergeBaseOffset.cpp | 16 +- lib/Target/RISCV/RISCVRegisterBankInfo.cpp | 26 + lib/Target/RISCV/RISCVRegisterBankInfo.h | 37 + lib/Target/RISCV/RISCVRegisterBanks.td | 13 + lib/Target/RISCV/RISCVRegisterInfo.cpp | 13 +- lib/Target/RISCV/RISCVRegisterInfo.h | 6 + lib/Target/RISCV/RISCVRegisterInfo.td | 100 +- lib/Target/RISCV/RISCVSubtarget.cpp | 30 +- lib/Target/RISCV/RISCVSubtarget.h | 20 + lib/Target/RISCV/RISCVTargetMachine.cpp | 31 +- lib/Target/RISCV/Utils/RISCVBaseInfo.h | 16 + lib/Target/Sparc/AsmParser/SparcAsmParser.cpp | 8 +- lib/Target/Sparc/DelaySlotFiller.cpp | 10 +- .../MCTargetDesc/SparcELFObjectWriter.cpp | 6 +- lib/Target/Sparc/SparcFrameLowering.cpp | 3 +- lib/Target/Sparc/SparcISelDAGToDAG.cpp | 4 +- lib/Target/Sparc/SparcISelLowering.cpp | 28 +- lib/Target/Sparc/SparcISelLowering.h | 4 +- lib/Target/Sparc/SparcInstr64Bit.td | 2 +- lib/Target/Sparc/SparcInstrInfo.cpp | 4 +- lib/Target/Sparc/SparcInstrInfo.td | 8 +- lib/Target/Sparc/SparcRegisterInfo.cpp | 12 +- lib/Target/Sparc/SparcTargetMachine.cpp | 4 +- .../SystemZ/AsmParser/SystemZAsmParser.cpp | 12 +- .../MCTargetDesc/SystemZMCObjectWriter.cpp | 2 +- lib/Target/SystemZ/SystemZ.h | 1 - lib/Target/SystemZ/SystemZAsmPrinter.cpp | 20 + lib/Target/SystemZ/SystemZAsmPrinter.h | 1 + lib/Target/SystemZ/SystemZElimCompare.cpp | 9 +- lib/Target/SystemZ/SystemZExpandPseudo.cpp | 152 - lib/Target/SystemZ/SystemZFrameLowering.cpp | 6 +- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 11 +- lib/Target/SystemZ/SystemZISelLowering.cpp | 244 +- lib/Target/SystemZ/SystemZInstrFP.td | 32 +- lib/Target/SystemZ/SystemZInstrFormats.td | 166 +- lib/Target/SystemZ/SystemZInstrInfo.cpp | 168 +- lib/Target/SystemZ/SystemZInstrInfo.h | 29 +- lib/Target/SystemZ/SystemZInstrInfo.td | 22 +- lib/Target/SystemZ/SystemZInstrVector.td | 26 +- lib/Target/SystemZ/SystemZLongBranch.cpp | 26 +- .../SystemZ/SystemZMachineScheduler.cpp | 5 +- lib/Target/SystemZ/SystemZOperands.td | 121 +- lib/Target/SystemZ/SystemZOperators.td | 6 +- lib/Target/SystemZ/SystemZPatterns.td | 4 +- lib/Target/SystemZ/SystemZPostRewrite.cpp | 164 +- lib/Target/SystemZ/SystemZProcessors.td | 3 +- lib/Target/SystemZ/SystemZRegisterInfo.cpp | 19 +- lib/Target/SystemZ/SystemZRegisterInfo.h | 9 + lib/Target/SystemZ/SystemZSchedule.td | 2 +- ...cheduleArch13.td => SystemZScheduleZ15.td} | 64 +- .../SystemZ/SystemZSelectionDAGInfo.cpp | 8 +- lib/Target/SystemZ/SystemZShortenInst.cpp | 4 +- lib/Target/SystemZ/SystemZTargetMachine.cpp | 11 +- .../SystemZ/SystemZTargetTransformInfo.cpp | 5 +- .../SystemZ/SystemZTargetTransformInfo.h | 8 +- lib/Target/TargetLoweringObjectFile.cpp | 1 + lib/Target/TargetMachine.cpp | 20 +- lib/Target/TargetMachineC.cpp | 2 +- .../AsmParser/WebAssemblyAsmParser.cpp | 100 +- .../Disassembler/WebAssemblyDisassembler.cpp | 24 +- .../MCTargetDesc/WebAssemblyAsmBackend.cpp | 10 +- .../MCTargetDesc/WebAssemblyInstPrinter.cpp | 57 +- .../MCTargetDesc/WebAssemblyInstPrinter.h | 3 + .../MCTargetDesc/WebAssemblyMCCodeEmitter.cpp | 1 + .../MCTargetDesc/WebAssemblyMCTargetDesc.h | 74 +- .../WebAssemblyTargetStreamer.cpp | 33 +- .../MCTargetDesc/WebAssemblyTargetStreamer.h | 3 - .../WebAssemblyWasmObjectWriter.cpp | 11 +- .../WebAssembly/WebAssemblyAsmPrinter.cpp | 38 +- lib/Target/WebAssembly/WebAssemblyCFGSort.cpp | 5 +- .../WebAssembly/WebAssemblyCFGStackify.cpp | 151 +- .../WebAssembly/WebAssemblyExplicitLocals.cpp | 22 +- .../WebAssembly/WebAssemblyFastISel.cpp | 36 +- .../WebAssemblyFixFunctionBitcasts.cpp | 2 + .../WebAssemblyFixIrreducibleControlFlow.cpp | 3 +- .../WebAssembly/WebAssemblyFrameLowering.cpp | 8 +- .../WebAssembly/WebAssemblyFrameLowering.h | 4 +- lib/Target/WebAssembly/WebAssemblyISD.def | 2 + .../WebAssembly/WebAssemblyISelDAGToDAG.cpp | 131 +- .../WebAssembly/WebAssemblyISelLowering.cpp | 306 +- .../WebAssembly/WebAssemblyISelLowering.h | 2 +- .../WebAssembly/WebAssemblyInstrAtomics.td | 100 +- .../WebAssembly/WebAssemblyInstrBulkMemory.td | 4 +- .../WebAssembly/WebAssemblyInstrControl.td | 44 +- .../WebAssembly/WebAssemblyInstrConv.td | 17 + .../WebAssembly/WebAssemblyInstrInfo.cpp | 4 +- lib/Target/WebAssembly/WebAssemblyInstrInfo.h | 2 +- .../WebAssembly/WebAssemblyInstrInfo.td | 3 +- .../WebAssembly/WebAssemblyInstrMemory.td | 51 - .../WebAssembly/WebAssemblyInstrSIMD.td | 221 +- .../WebAssembly/WebAssemblyLateEHPrepare.cpp | 12 +- .../WebAssembly/WebAssemblyLowerBrUnless.cpp | 4 +- .../WebAssemblyLowerEmscriptenEHSjLj.cpp | 119 +- .../WebAssemblyLowerGlobalDtors.cpp | 2 +- .../WebAssembly/WebAssemblyMCInstLower.cpp | 55 +- .../WebAssembly/WebAssemblyMCInstLower.h | 3 + .../WebAssemblyMachineFunctionInfo.cpp | 12 +- .../WebAssemblyMachineFunctionInfo.h | 13 +- .../WebAssemblyMemIntrinsicResults.cpp | 7 +- .../WebAssemblyOptimizeLiveIntervals.cpp | 2 +- .../WebAssemblyOptimizeReturned.cpp | 7 +- .../WebAssembly/WebAssemblyPeephole.cpp | 107 +- .../WebAssemblyPrepareForLiveIntervals.cpp | 2 +- .../WebAssembly/WebAssemblyRegColoring.cpp | 7 +- .../WebAssembly/WebAssemblyRegNumbering.cpp | 2 +- .../WebAssembly/WebAssemblyRegStackify.cpp | 24 +- .../WebAssembly/WebAssemblyRegisterInfo.cpp | 6 +- .../WebAssembly/WebAssemblyTargetMachine.cpp | 2 +- .../WebAssemblyTargetTransformInfo.cpp | 5 +- .../WebAssemblyTargetTransformInfo.h | 2 +- .../WebAssembly/WebAssemblyUtilities.cpp | 21 +- lib/Target/X86/AsmParser/X86AsmParser.cpp | 170 + lib/Target/X86/AsmParser/X86AsmParserCommon.h | 4 + lib/Target/X86/AsmParser/X86Operand.h | 25 +- .../Disassembler/X86DisassemblerDecoder.cpp | 5 +- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 6 +- .../X86/MCTargetDesc/X86ELFObjectWriter.cpp | 19 +- lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 2 + .../X86/MCTargetDesc/X86MCCodeEmitter.cpp | 3 + .../X86/MCTargetDesc/X86MCTargetDesc.cpp | 61 +- lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h | 11 +- .../X86/MCTargetDesc/X86MachObjectWriter.cpp | 7 +- .../MCTargetDesc/X86WinCOFFObjectWriter.cpp | 2 +- .../X86/MCTargetDesc/X86WinCOFFStreamer.cpp | 5 +- .../MCTargetDesc/X86WinCOFFTargetStreamer.cpp | 2 +- lib/Target/X86/X86.h | 10 +- lib/Target/X86/X86.td | 56 +- lib/Target/X86/X86AsmPrinter.cpp | 8 +- .../X86/X86AvoidStoreForwardingBlocks.cpp | 3 +- lib/Target/X86/X86AvoidTrailingCall.cpp | 108 + lib/Target/X86/X86CallFrameOptimization.cpp | 26 +- lib/Target/X86/X86CallLowering.cpp | 49 +- lib/Target/X86/X86CallLowering.h | 5 +- lib/Target/X86/X86CallingConv.td | 2 + lib/Target/X86/X86CmovConversion.cpp | 18 +- lib/Target/X86/X86CondBrFolding.cpp | 2 +- lib/Target/X86/X86DomainReassignment.cpp | 20 +- lib/Target/X86/X86EvexToVex.cpp | 2 +- lib/Target/X86/X86ExpandPseudo.cpp | 11 +- lib/Target/X86/X86FastISel.cpp | 15 +- lib/Target/X86/X86FixupBWInsts.cpp | 68 +- lib/Target/X86/X86FixupLEAs.cpp | 203 +- lib/Target/X86/X86FixupSetCC.cpp | 4 +- lib/Target/X86/X86FlagsCopyLowering.cpp | 13 +- lib/Target/X86/X86FloatingPoint.cpp | 6 +- lib/Target/X86/X86FrameLowering.cpp | 106 +- lib/Target/X86/X86FrameLowering.h | 4 +- lib/Target/X86/X86ISelDAGToDAG.cpp | 304 +- lib/Target/X86/X86ISelLowering.cpp | 5914 +++++++++-------- lib/Target/X86/X86ISelLowering.h | 77 +- lib/Target/X86/X86IndirectBranchTracking.cpp | 2 +- lib/Target/X86/X86InsertPrefetch.cpp | 8 +- lib/Target/X86/X86InstrAVX512.td | 1467 ++-- lib/Target/X86/X86InstrArithmetic.td | 10 +- lib/Target/X86/X86InstrBuilder.h | 6 +- lib/Target/X86/X86InstrCMovSetCC.td | 33 +- lib/Target/X86/X86InstrCompiler.td | 135 +- lib/Target/X86/X86InstrControl.td | 85 +- lib/Target/X86/X86InstrExtension.td | 11 +- lib/Target/X86/X86InstrFoldTables.cpp | 287 + lib/Target/X86/X86InstrFoldTables.h | 39 +- lib/Target/X86/X86InstrFragmentsSIMD.td | 26 + lib/Target/X86/X86InstrInfo.cpp | 582 +- lib/Target/X86/X86InstrInfo.h | 28 +- lib/Target/X86/X86InstrInfo.td | 57 +- lib/Target/X86/X86InstrMMX.td | 33 +- lib/Target/X86/X86InstrMPX.td | 32 +- lib/Target/X86/X86InstrSSE.td | 549 +- lib/Target/X86/X86InstrSystem.td | 2 +- lib/Target/X86/X86InstrTSX.td | 2 +- lib/Target/X86/X86InstrXOP.td | 26 +- lib/Target/X86/X86InstructionSelector.cpp | 135 +- lib/Target/X86/X86IntrinsicsInfo.h | 6 +- lib/Target/X86/X86LegalizerInfo.cpp | 20 + lib/Target/X86/X86LegalizerInfo.h | 3 + lib/Target/X86/X86MCInstLower.cpp | 313 +- lib/Target/X86/X86MachineFunctionInfo.h | 8 + lib/Target/X86/X86OptimizeLEAs.cpp | 60 +- lib/Target/X86/X86RegisterBankInfo.cpp | 4 +- lib/Target/X86/X86RegisterInfo.cpp | 31 +- lib/Target/X86/X86RetpolineThunks.cpp | 8 +- lib/Target/X86/X86SchedBroadwell.td | 8 +- lib/Target/X86/X86SchedHaswell.td | 8 +- lib/Target/X86/X86SchedPredicates.td | 57 + lib/Target/X86/X86SchedSandyBridge.td | 8 +- lib/Target/X86/X86SchedSkylakeClient.td | 8 +- lib/Target/X86/X86SchedSkylakeServer.td | 8 +- lib/Target/X86/X86Schedule.td | 24 +- lib/Target/X86/X86ScheduleAtom.td | 6 +- lib/Target/X86/X86ScheduleBdVer2.td | 6 +- lib/Target/X86/X86ScheduleBtVer2.td | 255 +- lib/Target/X86/X86ScheduleSLM.td | 8 +- lib/Target/X86/X86ScheduleZnver1.td | 8 +- lib/Target/X86/X86SelectionDAGInfo.cpp | 2 +- .../X86/X86SpeculativeLoadHardening.cpp | 59 +- lib/Target/X86/X86Subtarget.cpp | 18 +- lib/Target/X86/X86Subtarget.h | 23 +- lib/Target/X86/X86TargetMachine.cpp | 49 +- lib/Target/X86/X86TargetMachine.h | 2 +- lib/Target/X86/X86TargetObjectFile.cpp | 4 +- lib/Target/X86/X86TargetObjectFile.h | 3 +- lib/Target/X86/X86TargetTransformInfo.cpp | 269 +- lib/Target/X86/X86TargetTransformInfo.h | 11 +- lib/Target/X86/X86VZeroUpper.cpp | 6 +- lib/Target/X86/X86WinAllocaExpander.cpp | 4 +- lib/Target/X86/X86WinEHState.cpp | 5 +- lib/Target/XCore/XCoreAsmPrinter.cpp | 4 +- lib/Target/XCore/XCoreFrameLowering.cpp | 6 +- .../XCore/XCoreFrameToArgsOffsetElim.cpp | 2 +- lib/Target/XCore/XCoreISelLowering.cpp | 21 +- lib/Target/XCore/XCoreRegisterInfo.cpp | 2 +- lib/Target/XCore/XCoreTargetMachine.cpp | 2 +- lib/Target/XCore/XCoreTargetTransformInfo.h | 3 +- lib/TextAPI/MachO/Architecture.cpp | 4 + lib/TextAPI/MachO/InterfaceFile.cpp | 80 +- lib/TextAPI/MachO/Platform.cpp | 91 + lib/TextAPI/MachO/Symbol.cpp | 9 + lib/TextAPI/MachO/Target.cpp | 75 + lib/TextAPI/MachO/TextStub.cpp | 606 +- lib/TextAPI/MachO/TextStubCommon.cpp | 91 +- lib/TextAPI/MachO/TextStubCommon.h | 8 +- .../llvm-dlltool/DlltoolDriver.cpp | 26 +- lib/ToolDrivers/llvm-lib/LibDriver.cpp | 230 +- .../AggressiveInstCombine.cpp | 78 +- lib/Transforms/Coroutines/CoroCleanup.cpp | 7 +- lib/Transforms/Coroutines/CoroEarly.cpp | 26 +- lib/Transforms/Coroutines/CoroElide.cpp | 2 +- lib/Transforms/Coroutines/CoroFrame.cpp | 650 +- lib/Transforms/Coroutines/CoroInstr.h | 205 +- lib/Transforms/Coroutines/CoroInternal.h | 160 +- lib/Transforms/Coroutines/CoroSplit.cpp | 1192 +++- lib/Transforms/Coroutines/Coroutines.cpp | 342 +- lib/Transforms/IPO/ArgumentPromotion.cpp | 2 +- lib/Transforms/IPO/Attributor.cpp | 5097 +++++++++++--- lib/Transforms/IPO/BlockExtractor.cpp | 5 +- lib/Transforms/IPO/ConstantMerge.cpp | 4 +- lib/Transforms/IPO/CrossDSOCFI.cpp | 10 +- lib/Transforms/IPO/FunctionAttrs.cpp | 38 +- lib/Transforms/IPO/FunctionImport.cpp | 43 +- lib/Transforms/IPO/GlobalDCE.cpp | 156 +- lib/Transforms/IPO/GlobalOpt.cpp | 176 +- lib/Transforms/IPO/HotColdSplitting.cpp | 61 +- lib/Transforms/IPO/IPO.cpp | 13 + lib/Transforms/IPO/InferFunctionAttrs.cpp | 20 +- lib/Transforms/IPO/Inliner.cpp | 21 +- lib/Transforms/IPO/LoopExtractor.cpp | 6 +- lib/Transforms/IPO/LowerTypeTests.cpp | 303 +- lib/Transforms/IPO/MergeFunctions.cpp | 4 +- lib/Transforms/IPO/PartialInlining.cpp | 20 +- lib/Transforms/IPO/PassManagerBuilder.cpp | 1 + lib/Transforms/IPO/SCCP.cpp | 18 +- lib/Transforms/IPO/SampleProfile.cpp | 236 +- lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp | 21 +- lib/Transforms/IPO/WholeProgramDevirt.cpp | 389 +- .../InstCombine/InstCombineAddSub.cpp | 268 +- .../InstCombine/InstCombineAndOrXor.cpp | 278 +- .../InstCombine/InstCombineAtomicRMW.cpp | 4 +- .../InstCombine/InstCombineCalls.cpp | 121 +- .../InstCombine/InstCombineCasts.cpp | 102 +- .../InstCombine/InstCombineCompares.cpp | 870 ++- .../InstCombine/InstCombineInternal.h | 116 +- .../InstCombineLoadStoreAlloca.cpp | 93 +- .../InstCombine/InstCombineMulDivRem.cpp | 77 +- lib/Transforms/InstCombine/InstCombinePHI.cpp | 6 +- .../InstCombine/InstCombineSelect.cpp | 455 +- .../InstCombine/InstCombineShifts.cpp | 378 +- .../InstCombineSimplifyDemanded.cpp | 48 +- .../InstCombine/InstCombineVectorOps.cpp | 171 +- .../InstCombine/InstructionCombining.cpp | 69 +- .../Instrumentation/AddressSanitizer.cpp | 98 +- .../Instrumentation/BoundsChecking.cpp | 2 +- lib/Transforms/Instrumentation/CFGMST.h | 4 +- .../ControlHeightReduction.cpp | 26 +- .../Instrumentation/DataFlowSanitizer.cpp | 2 +- .../Instrumentation/GCOVProfiling.cpp | 49 +- .../Instrumentation/HWAddressSanitizer.cpp | 376 +- .../Instrumentation/IndirectCallPromotion.cpp | 2 +- .../Instrumentation/InstrOrderFile.cpp | 3 +- .../Instrumentation/InstrProfiling.cpp | 65 +- .../Instrumentation/Instrumentation.cpp | 5 +- .../Instrumentation/MemorySanitizer.cpp | 89 +- .../Instrumentation/PGOInstrumentation.cpp | 220 +- .../Instrumentation/PGOMemOPSizeOpt.cpp | 6 +- .../Instrumentation/SanitizerCoverage.cpp | 166 +- .../Instrumentation/ThreadSanitizer.cpp | 54 +- .../Instrumentation/ValueProfileCollector.cpp | 78 + .../Instrumentation/ValueProfileCollector.h | 79 + .../Instrumentation/ValueProfilePlugins.inc | 75 + lib/Transforms/ObjCARC/PtrState.cpp | 4 + .../Scalar/AlignmentFromAssumptions.cpp | 8 +- lib/Transforms/Scalar/CallSiteSplitting.cpp | 2 +- lib/Transforms/Scalar/ConstantHoisting.cpp | 24 +- lib/Transforms/Scalar/ConstantProp.cpp | 2 +- .../Scalar/CorrelatedValuePropagation.cpp | 184 +- lib/Transforms/Scalar/DCE.cpp | 31 +- .../Scalar/DeadStoreElimination.cpp | 7 +- lib/Transforms/Scalar/DivRemPairs.cpp | 239 +- lib/Transforms/Scalar/EarlyCSE.cpp | 22 +- lib/Transforms/Scalar/FlattenCFGPass.cpp | 24 +- lib/Transforms/Scalar/Float2Int.cpp | 47 +- lib/Transforms/Scalar/GVN.cpp | 199 +- lib/Transforms/Scalar/GVNHoist.cpp | 17 +- lib/Transforms/Scalar/GuardWidening.cpp | 2 +- lib/Transforms/Scalar/IndVarSimplify.cpp | 389 +- lib/Transforms/Scalar/InferAddressSpaces.cpp | 38 +- lib/Transforms/Scalar/InstSimplifyPass.cpp | 48 +- lib/Transforms/Scalar/JumpThreading.cpp | 18 +- lib/Transforms/Scalar/LICM.cpp | 55 +- lib/Transforms/Scalar/LoopDataPrefetch.cpp | 4 +- lib/Transforms/Scalar/LoopDeletion.cpp | 2 +- lib/Transforms/Scalar/LoopFuse.cpp | 642 +- lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 890 ++- lib/Transforms/Scalar/LoopInstSimplify.cpp | 5 +- lib/Transforms/Scalar/LoopInterchange.cpp | 62 +- lib/Transforms/Scalar/LoopLoadElimination.cpp | 3 +- lib/Transforms/Scalar/LoopPredication.cpp | 2 +- lib/Transforms/Scalar/LoopRerollPass.cpp | 3 +- lib/Transforms/Scalar/LoopRotation.cpp | 10 +- lib/Transforms/Scalar/LoopSimplifyCFG.cpp | 4 +- lib/Transforms/Scalar/LoopSink.cpp | 9 +- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 20 +- .../Scalar/LoopUnrollAndJamPass.cpp | 8 +- lib/Transforms/Scalar/LoopUnrollPass.cpp | 128 +- lib/Transforms/Scalar/LoopUnswitch.cpp | 87 +- lib/Transforms/Scalar/LoopVersioningLICM.cpp | 31 - .../Scalar/LowerConstantIntrinsics.cpp | 170 + .../Scalar/LowerExpectIntrinsic.cpp | 33 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 110 +- lib/Transforms/Scalar/MergeICmps.cpp | 2 +- .../Scalar/MergedLoadStoreMotion.cpp | 159 +- lib/Transforms/Scalar/NaryReassociate.cpp | 2 +- lib/Transforms/Scalar/NewGVN.cpp | 25 +- .../Scalar/PartiallyInlineLibCalls.cpp | 2 +- lib/Transforms/Scalar/PlaceSafepoints.cpp | 6 +- lib/Transforms/Scalar/Reassociate.cpp | 188 +- .../Scalar/RewriteStatepointsForGC.cpp | 6 +- lib/Transforms/Scalar/SCCP.cpp | 75 +- lib/Transforms/Scalar/SROA.cpp | 40 +- lib/Transforms/Scalar/Scalar.cpp | 9 + .../Scalar/SeparateConstOffsetFromGEP.cpp | 2 +- lib/Transforms/Scalar/SimpleLoopUnswitch.cpp | 25 +- lib/Transforms/Scalar/SpeculateAroundPHIs.cpp | 6 +- lib/Transforms/Scalar/StructurizeCFG.cpp | 2 +- .../Scalar/TailRecursionElimination.cpp | 2 +- lib/Transforms/Utils/BasicBlockUtils.cpp | 64 +- lib/Transforms/Utils/BuildLibCalls.cpp | 94 +- lib/Transforms/Utils/BypassSlowDivision.cpp | 8 +- lib/Transforms/Utils/CanonicalizeAliases.cpp | 1 + lib/Transforms/Utils/CloneFunction.cpp | 15 + lib/Transforms/Utils/CloneModule.cpp | 18 +- lib/Transforms/Utils/CodeExtractor.cpp | 317 +- .../Utils/EntryExitInstrumenter.cpp | 2 +- lib/Transforms/Utils/Evaluator.cpp | 2 +- lib/Transforms/Utils/FlattenCFG.cpp | 20 +- lib/Transforms/Utils/FunctionImportUtils.cpp | 2 +- .../ImportedFunctionsInliningStatistics.cpp | 6 +- lib/Transforms/Utils/LibCallsShrinkWrap.cpp | 2 +- lib/Transforms/Utils/Local.cpp | 209 +- lib/Transforms/Utils/LoopRotationUtils.cpp | 27 +- lib/Transforms/Utils/LoopSimplify.cpp | 15 +- lib/Transforms/Utils/LoopUnroll.cpp | 12 +- lib/Transforms/Utils/LoopUnrollAndJam.cpp | 6 +- lib/Transforms/Utils/LoopUnrollPeel.cpp | 161 +- lib/Transforms/Utils/LoopUtils.cpp | 56 + lib/Transforms/Utils/LoopVersioning.cpp | 4 +- lib/Transforms/Utils/MetaRenamer.cpp | 5 +- lib/Transforms/Utils/MisExpect.cpp | 177 + lib/Transforms/Utils/ModuleUtils.cpp | 2 +- lib/Transforms/Utils/PredicateInfo.cpp | 80 +- lib/Transforms/Utils/SimplifyCFG.cpp | 250 +- lib/Transforms/Utils/SimplifyLibCalls.cpp | 690 +- lib/Transforms/Utils/SymbolRewriter.cpp | 12 +- lib/Transforms/Utils/VNCoercion.cpp | 2 +- lib/Transforms/Utils/ValueMapper.cpp | 60 +- .../Vectorize/LoadStoreVectorizer.cpp | 26 +- .../Vectorize/LoopVectorizationLegality.cpp | 186 +- .../Vectorize/LoopVectorizationPlanner.h | 4 +- lib/Transforms/Vectorize/LoopVectorize.cpp | 740 ++- lib/Transforms/Vectorize/SLPVectorizer.cpp | 814 ++- lib/Transforms/Vectorize/VPlan.cpp | 19 +- lib/Transforms/Vectorize/VPlan.h | 4 + .../Vectorize/VPlanHCFGTransforms.cpp | 2 +- lib/Transforms/Vectorize/VPlanSLP.cpp | 13 +- lib/WindowsManifest/WindowsManifestMerger.cpp | 4 +- lib/XRay/FDRRecordProducer.cpp | 37 +- lib/XRay/FileHeaderReader.cpp | 14 +- lib/XRay/InstrumentationMap.cpp | 17 +- lib/XRay/Profile.cpp | 10 +- lib/XRay/RecordInitializer.cpp | 202 +- lib/XRay/Trace.cpp | 43 +- tools/bugpoint/BugDriver.h | 7 +- tools/bugpoint/ExtractFunction.cpp | 3 +- tools/bugpoint/OptimizerDriver.cpp | 12 +- tools/bugpoint/ToolRunner.cpp | 16 +- tools/bugpoint/bugpoint.cpp | 46 +- tools/llc/llc.cpp | 27 +- tools/lli/lli.cpp | 71 +- tools/llvm-ar/llvm-ar.cpp | 192 +- tools/llvm-as/llvm-as.cpp | 2 +- tools/llvm-cov/CodeCoverage.cpp | 24 +- tools/llvm-cov/SourceCoverageView.cpp | 8 +- tools/llvm-cov/TestingSupport.cpp | 10 +- tools/llvm-cxxdump/llvm-cxxdump.cpp | 6 +- tools/llvm-cxxmap/llvm-cxxmap.cpp | 2 +- tools/llvm-dis/llvm-dis.cpp | 4 +- tools/llvm-dwarfdump/Statistics.cpp | 263 +- tools/llvm-dwarfdump/llvm-dwarfdump.cpp | 2 +- tools/llvm-extract/llvm-extract.cpp | 16 +- tools/llvm-ifs/CMakeLists.txt | 10 + tools/llvm-ifs/LLVMBuild.txt | 21 + tools/llvm-ifs/llvm-ifs.cpp | 532 ++ tools/llvm-link/llvm-link.cpp | 6 +- tools/llvm-lto/llvm-lto.cpp | 20 +- tools/llvm-lto2/llvm-lto2.cpp | 12 +- tools/llvm-mc/Disassembler.cpp | 15 +- tools/llvm-mc/Disassembler.h | 10 +- tools/llvm-mc/llvm-mc.cpp | 27 +- tools/llvm-mca/CodeRegion.cpp | 6 +- tools/llvm-mca/CodeRegionGenerator.cpp | 2 + tools/llvm-mca/Views/BottleneckAnalysis.cpp | 40 +- tools/llvm-mca/Views/BottleneckAnalysis.h | 8 +- tools/llvm-mca/Views/InstructionInfoView.cpp | 31 +- tools/llvm-mca/Views/InstructionInfoView.h | 13 +- tools/llvm-mca/Views/TimelineView.cpp | 50 +- tools/llvm-mca/Views/TimelineView.h | 1 + tools/llvm-mca/llvm-mca.cpp | 113 +- tools/llvm-modextract/llvm-modextract.cpp | 2 +- tools/llvm-nm/llvm-nm.cpp | 51 +- tools/llvm-objcopy/COFF/COFFObjcopy.cpp | 88 +- tools/llvm-objcopy/COFF/Reader.cpp | 18 +- tools/llvm-objcopy/COFF/Writer.cpp | 4 +- tools/llvm-objcopy/CommonOpts.td | 123 + tools/llvm-objcopy/CopyConfig.cpp | 372 +- tools/llvm-objcopy/CopyConfig.h | 110 +- tools/llvm-objcopy/ELF/ELFConfig.cpp | 133 + tools/llvm-objcopy/ELF/ELFConfig.h | 44 + tools/llvm-objcopy/ELF/ELFObjcopy.cpp | 169 +- tools/llvm-objcopy/ELF/Object.cpp | 252 +- tools/llvm-objcopy/ELF/Object.h | 56 +- .../llvm-objcopy/MachO/MachOLayoutBuilder.cpp | 350 + tools/llvm-objcopy/MachO/MachOLayoutBuilder.h | 50 + tools/llvm-objcopy/MachO/MachOObjcopy.cpp | 30 +- tools/llvm-objcopy/MachO/MachOReader.cpp | 45 +- tools/llvm-objcopy/MachO/MachOReader.h | 3 + tools/llvm-objcopy/MachO/MachOWriter.cpp | 313 +- tools/llvm-objcopy/MachO/MachOWriter.h | 19 +- tools/llvm-objcopy/MachO/Object.h | 27 + tools/llvm-objcopy/ObjcopyOpts.td | 141 +- tools/llvm-objcopy/StripOpts.td | 103 +- tools/llvm-objcopy/llvm-objcopy.cpp | 53 +- tools/llvm-objdump/COFFDump.cpp | 77 +- tools/llvm-objdump/ELFDump.cpp | 2 +- tools/llvm-objdump/MachODump.cpp | 375 +- tools/llvm-objdump/llvm-objdump.cpp | 545 +- tools/llvm-objdump/llvm-objdump.h | 36 +- tools/llvm-pdbutil/BytesOutputStyle.cpp | 2 +- tools/llvm-pdbutil/DumpOutputStyle.cpp | 9 +- tools/llvm-pdbutil/ExplainOutputStyle.cpp | 2 +- tools/llvm-pdbutil/InputFile.cpp | 17 +- tools/llvm-pdbutil/MinimalSymbolDumper.cpp | 5 +- tools/llvm-pdbutil/PrettyTypeDumper.cpp | 4 +- tools/llvm-pdbutil/llvm-pdbutil.cpp | 10 +- tools/llvm-profdata/llvm-profdata.cpp | 287 +- tools/llvm-readobj/ARMEHABIPrinter.h | 19 +- tools/llvm-readobj/ARMWinEHPrinter.cpp | 9 +- tools/llvm-readobj/COFFDumper.cpp | 362 +- tools/llvm-readobj/DwarfCFIEHPrinter.h | 54 +- tools/llvm-readobj/ELFDumper.cpp | 1668 +++-- tools/llvm-readobj/MachODumper.cpp | 61 +- tools/llvm-readobj/ObjDumper.cpp | 32 +- tools/llvm-readobj/ObjDumper.h | 11 +- tools/llvm-readobj/WasmDumper.cpp | 7 +- tools/llvm-readobj/Win64EHDumper.cpp | 13 +- tools/llvm-readobj/WindowsResourceDumper.cpp | 8 +- tools/llvm-readobj/XCOFFDumper.cpp | 400 +- tools/llvm-readobj/llvm-readobj.cpp | 148 +- tools/llvm-readobj/llvm-readobj.h | 25 +- tools/llvm-reduce/CMakeLists.txt | 26 + tools/llvm-reduce/DeltaManager.h | 36 + tools/llvm-reduce/LLVMBuild.txt | 24 + tools/llvm-reduce/TestRunner.cpp | 42 + tools/llvm-reduce/TestRunner.h | 46 + tools/llvm-reduce/deltas/Delta.cpp | 162 + tools/llvm-reduce/deltas/Delta.h | 76 + tools/llvm-reduce/deltas/ReduceArguments.cpp | 125 + tools/llvm-reduce/deltas/ReduceArguments.h | 21 + .../llvm-reduce/deltas/ReduceBasicBlocks.cpp | 146 + tools/llvm-reduce/deltas/ReduceBasicBlocks.h | 20 + tools/llvm-reduce/deltas/ReduceFunctions.cpp | 77 + tools/llvm-reduce/deltas/ReduceFunctions.h | 20 + tools/llvm-reduce/deltas/ReduceGlobalVars.cpp | 74 + tools/llvm-reduce/deltas/ReduceGlobalVars.h | 20 + .../llvm-reduce/deltas/ReduceInstructions.cpp | 65 + tools/llvm-reduce/deltas/ReduceInstructions.h | 20 + tools/llvm-reduce/deltas/ReduceMetadata.cpp | 138 + tools/llvm-reduce/deltas/ReduceMetadata.h | 18 + tools/llvm-reduce/llvm-reduce.cpp | 114 + tools/llvm-rtdyld/llvm-rtdyld.cpp | 102 +- tools/llvm-stress/llvm-stress.cpp | 4 +- tools/llvm-symbolizer/llvm-symbolizer.cpp | 6 + tools/llvm-xray/func-id-helper.cpp | 2 +- tools/llvm-xray/xray-account.cpp | 2 +- tools/llvm-xray/xray-converter.cpp | 4 +- tools/llvm-xray/xray-extract.cpp | 2 +- tools/llvm-xray/xray-fdr-dump.cpp | 2 +- tools/llvm-xray/xray-graph-diff.cpp | 2 +- tools/llvm-xray/xray-graph.cpp | 2 +- tools/opt/opt.cpp | 13 +- tools/vfabi-demangle-fuzzer/CMakeLists.txt | 7 + .../vfabi-demangler-fuzzer.cpp | 26 + utils/TableGen/AsmMatcherEmitter.cpp | 19 +- utils/TableGen/AsmWriterEmitter.cpp | 3 +- utils/TableGen/CallingConvEmitter.cpp | 4 + utils/TableGen/CodeEmitterGen.cpp | 309 +- utils/TableGen/CodeGenDAGPatterns.cpp | 42 +- utils/TableGen/CodeGenDAGPatterns.h | 4 + utils/TableGen/CodeGenInstruction.cpp | 1 + utils/TableGen/CodeGenInstruction.h | 1 + utils/TableGen/CodeGenIntrinsics.h | 8 + utils/TableGen/CodeGenMapTable.cpp | 12 +- utils/TableGen/CodeGenRegisters.cpp | 52 +- utils/TableGen/CodeGenRegisters.h | 26 +- utils/TableGen/CodeGenSchedule.cpp | 24 +- utils/TableGen/CodeGenTarget.cpp | 70 +- utils/TableGen/CodeGenTarget.h | 6 + utils/TableGen/DAGISelEmitter.cpp | 2 +- utils/TableGen/DAGISelMatcher.h | 8 +- utils/TableGen/DAGISelMatcherEmitter.cpp | 22 +- utils/TableGen/DAGISelMatcherGen.cpp | 10 +- utils/TableGen/DAGISelMatcherOpt.cpp | 9 +- utils/TableGen/DFAEmitter.cpp | 394 ++ utils/TableGen/DFAEmitter.h | 107 + utils/TableGen/DFAPacketizerEmitter.cpp | 653 +- utils/TableGen/DisassemblerEmitter.cpp | 2 +- utils/TableGen/FixedLenDecoderEmitter.cpp | 93 +- utils/TableGen/GICombinerEmitter.cpp | 452 ++ utils/TableGen/GlobalISel/CMakeLists.txt | 7 + utils/TableGen/GlobalISel/CodeExpander.cpp | 93 + utils/TableGen/GlobalISel/CodeExpander.h | 55 + utils/TableGen/GlobalISel/CodeExpansions.h | 43 + utils/TableGen/GlobalISelEmitter.cpp | 775 ++- utils/TableGen/InfoByHwMode.cpp | 11 + utils/TableGen/InfoByHwMode.h | 5 + utils/TableGen/InstrDocsEmitter.cpp | 2 +- utils/TableGen/InstrInfoEmitter.cpp | 52 +- utils/TableGen/IntrinsicEmitter.cpp | 20 +- utils/TableGen/RISCVCompressInstEmitter.cpp | 13 +- utils/TableGen/RegisterInfoEmitter.cpp | 4 +- utils/TableGen/SearchableTableEmitter.cpp | 16 +- utils/TableGen/SubtargetEmitter.cpp | 8 +- utils/TableGen/SubtargetFeatureInfo.cpp | 12 +- utils/TableGen/TableGen.cpp | 157 +- utils/TableGen/TableGenBackends.h | 2 + .../WebAssemblyDisassemblerEmitter.cpp | 2 +- utils/TableGen/X86DisassemblerTables.cpp | 2 +- utils/TableGen/X86EVEX2VEXTablesEmitter.cpp | 1 + utils/TableGen/X86RecognizableInstr.cpp | 14 +- utils/add_argument_names.py | 82 + utils/llvm-locstats/CMakeLists.txt | 12 + utils/llvm-locstats/llvm-locstats.py | 209 + 2118 files changed, 120930 insertions(+), 48978 deletions(-) create mode 100644 include/llvm/ADT/DirectedGraph.h delete mode 100644 include/llvm/ADT/VariadicFunction.h create mode 100644 include/llvm/Analysis/DDG.h create mode 100644 include/llvm/Analysis/DependenceGraphBuilder.h create mode 100644 include/llvm/Analysis/LoopCacheAnalysis.h create mode 100644 include/llvm/CodeGen/GlobalISel/GISelKnownBits.h rename {lib => include/llvm}/CodeGen/LiveRangeCalc.h (98%) create mode 100644 include/llvm/CodeGen/MachineLoopUtils.h create mode 100644 include/llvm/CodeGen/ModuloSchedule.h create mode 100644 include/llvm/DebugInfo/GSYM/FileWriter.h create mode 100644 include/llvm/DebugInfo/GSYM/GsymCreator.h create mode 100644 include/llvm/DebugInfo/GSYM/GsymReader.h create mode 100644 include/llvm/DebugInfo/GSYM/Header.h create mode 100644 include/llvm/DebugInfo/GSYM/LineTable.h create mode 100644 include/llvm/ExecutionEngine/JITLink/MachO_arm64.h create mode 100644 include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h create mode 100644 include/llvm/ExecutionEngine/Orc/Speculation.h create mode 100644 include/llvm/IR/FixedMetadataKinds.def create mode 100644 include/llvm/MC/MCRegister.h create mode 100644 include/llvm/MCA/CodeEmitter.h create mode 100644 include/llvm/Object/TapiFile.h create mode 100644 include/llvm/Object/TapiUniversal.h create mode 100644 include/llvm/ObjectYAML/yaml2obj.h create mode 100644 include/llvm/Remarks/BitstreamRemarkContainer.h create mode 100644 include/llvm/Remarks/BitstreamRemarkParser.h create mode 100644 include/llvm/Remarks/BitstreamRemarkSerializer.h create mode 100644 include/llvm/Remarks/YAMLRemarkSerializer.h create mode 100644 include/llvm/Support/Alignment.h create mode 100644 include/llvm/Support/Automaton.h create mode 100644 include/llvm/Support/FileCollector.h delete mode 100644 include/llvm/Support/JamCRC.h delete mode 100644 include/llvm/Support/MutexGuard.h delete mode 100644 include/llvm/Support/ScalableSize.h create mode 100644 include/llvm/Support/TypeSize.h delete mode 100644 include/llvm/Support/UniqueLock.h create mode 100644 include/llvm/TableGen/Automaton.td create mode 100644 include/llvm/Target/GlobalISel/Combine.td create mode 100644 include/llvm/TextAPI/MachO/Platform.h create mode 100644 include/llvm/TextAPI/MachO/Target.h create mode 100644 include/llvm/Transforms/Instrumentation/SanitizerCoverage.h create mode 100644 include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h create mode 100644 include/llvm/Transforms/Utils/MisExpect.h create mode 100644 lib/Analysis/DDG.cpp create mode 100644 lib/Analysis/DependenceGraphBuilder.cpp create mode 100644 lib/Analysis/LoopCacheAnalysis.cpp create mode 100644 lib/Analysis/VFABIDemangling.cpp create mode 100644 lib/CodeGen/GlobalISel/GISelKnownBits.cpp create mode 100644 lib/CodeGen/MIRNamerPass.cpp create mode 100644 lib/CodeGen/MIRVRegNamerUtils.cpp create mode 100644 lib/CodeGen/MIRVRegNamerUtils.h create mode 100644 lib/CodeGen/MachineLoopUtils.cpp create mode 100644 lib/CodeGen/ModuloSchedule.cpp create mode 100644 lib/DebugInfo/GSYM/FileWriter.cpp create mode 100644 lib/DebugInfo/GSYM/GsymCreator.cpp create mode 100644 lib/DebugInfo/GSYM/GsymReader.cpp create mode 100644 lib/DebugInfo/GSYM/Header.cpp create mode 100644 lib/DebugInfo/GSYM/LineTable.cpp delete mode 100644 lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp delete mode 100644 lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h create mode 100644 lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp create mode 100644 lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h create mode 100644 lib/ExecutionEngine/JITLink/MachO_arm64.cpp create mode 100644 lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp create mode 100644 lib/ExecutionEngine/Orc/Speculation.cpp create mode 100644 lib/MCA/CodeEmitter.cpp create mode 100644 lib/Object/TapiFile.cpp create mode 100644 lib/Object/TapiUniversal.cpp create mode 100644 lib/ObjectYAML/COFFEmitter.cpp create mode 100644 lib/ObjectYAML/ELFEmitter.cpp create mode 100644 lib/ObjectYAML/MachOEmitter.cpp create mode 100644 lib/ObjectYAML/MinidumpEmitter.cpp create mode 100644 lib/ObjectYAML/WasmEmitter.cpp create mode 100644 lib/ObjectYAML/yaml2obj.cpp create mode 100644 lib/Remarks/BitstreamRemarkParser.cpp create mode 100644 lib/Remarks/BitstreamRemarkParser.h create mode 100644 lib/Remarks/BitstreamRemarkSerializer.cpp create mode 100644 lib/Remarks/RemarkSerializer.cpp create mode 100644 lib/Support/ABIBreak.cpp create mode 100644 lib/Support/FileCheckImpl.h create mode 100644 lib/Support/FileCollector.cpp delete mode 100644 lib/Support/JamCRC.cpp delete mode 100644 lib/Support/Mutex.cpp delete mode 100644 lib/Support/Unix/Mutex.inc delete mode 100644 lib/Support/Unix/RWMutex.inc delete mode 100644 lib/Support/Windows/Mutex.inc delete mode 100644 lib/Support/Windows/RWMutex.inc create mode 100644 lib/Target/AArch64/AArch64Combine.td create mode 100644 lib/Target/AArch64/AArch64StackOffset.h create mode 100644 lib/Target/AArch64/AArch64StackTaggingPreRA.cpp create mode 100644 lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp create mode 100644 lib/Target/ARM/MVETailPredication.cpp create mode 100644 lib/Target/ARM/MVEVPTBlockPass.cpp create mode 100644 lib/Target/Mips/MipsPfmCounters.td create mode 100644 lib/Target/RISCV/RISCVCallLowering.cpp create mode 100644 lib/Target/RISCV/RISCVCallLowering.h create mode 100644 lib/Target/RISCV/RISCVInstructionSelector.cpp create mode 100644 lib/Target/RISCV/RISCVLegalizerInfo.cpp create mode 100644 lib/Target/RISCV/RISCVLegalizerInfo.h create mode 100644 lib/Target/RISCV/RISCVRegisterBankInfo.cpp create mode 100644 lib/Target/RISCV/RISCVRegisterBankInfo.h create mode 100644 lib/Target/RISCV/RISCVRegisterBanks.td delete mode 100644 lib/Target/SystemZ/SystemZExpandPseudo.cpp rename lib/Target/SystemZ/{SystemZScheduleArch13.td => SystemZScheduleZ15.td} (97%) create mode 100644 lib/Target/X86/X86AvoidTrailingCall.cpp create mode 100644 lib/TextAPI/MachO/Platform.cpp create mode 100644 lib/TextAPI/MachO/Target.cpp create mode 100644 lib/Transforms/Instrumentation/ValueProfileCollector.cpp create mode 100644 lib/Transforms/Instrumentation/ValueProfileCollector.h create mode 100644 lib/Transforms/Instrumentation/ValueProfilePlugins.inc create mode 100644 lib/Transforms/Scalar/LowerConstantIntrinsics.cpp create mode 100644 lib/Transforms/Utils/MisExpect.cpp create mode 100644 tools/llvm-ifs/CMakeLists.txt create mode 100644 tools/llvm-ifs/LLVMBuild.txt create mode 100644 tools/llvm-ifs/llvm-ifs.cpp create mode 100644 tools/llvm-objcopy/CommonOpts.td create mode 100644 tools/llvm-objcopy/ELF/ELFConfig.cpp create mode 100644 tools/llvm-objcopy/ELF/ELFConfig.h create mode 100644 tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp create mode 100644 tools/llvm-objcopy/MachO/MachOLayoutBuilder.h create mode 100644 tools/llvm-reduce/CMakeLists.txt create mode 100644 tools/llvm-reduce/DeltaManager.h create mode 100644 tools/llvm-reduce/LLVMBuild.txt create mode 100644 tools/llvm-reduce/TestRunner.cpp create mode 100644 tools/llvm-reduce/TestRunner.h create mode 100644 tools/llvm-reduce/deltas/Delta.cpp create mode 100644 tools/llvm-reduce/deltas/Delta.h create mode 100644 tools/llvm-reduce/deltas/ReduceArguments.cpp create mode 100644 tools/llvm-reduce/deltas/ReduceArguments.h create mode 100644 tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp create mode 100644 tools/llvm-reduce/deltas/ReduceBasicBlocks.h create mode 100644 tools/llvm-reduce/deltas/ReduceFunctions.cpp create mode 100644 tools/llvm-reduce/deltas/ReduceFunctions.h create mode 100644 tools/llvm-reduce/deltas/ReduceGlobalVars.cpp create mode 100644 tools/llvm-reduce/deltas/ReduceGlobalVars.h create mode 100644 tools/llvm-reduce/deltas/ReduceInstructions.cpp create mode 100644 tools/llvm-reduce/deltas/ReduceInstructions.h create mode 100644 tools/llvm-reduce/deltas/ReduceMetadata.cpp create mode 100644 tools/llvm-reduce/deltas/ReduceMetadata.h create mode 100644 tools/llvm-reduce/llvm-reduce.cpp create mode 100644 tools/vfabi-demangle-fuzzer/CMakeLists.txt create mode 100644 tools/vfabi-demangle-fuzzer/vfabi-demangler-fuzzer.cpp create mode 100644 utils/TableGen/DFAEmitter.cpp create mode 100644 utils/TableGen/DFAEmitter.h create mode 100644 utils/TableGen/GICombinerEmitter.cpp create mode 100644 utils/TableGen/GlobalISel/CMakeLists.txt create mode 100644 utils/TableGen/GlobalISel/CodeExpander.cpp create mode 100644 utils/TableGen/GlobalISel/CodeExpander.h create mode 100644 utils/TableGen/GlobalISel/CodeExpansions.h create mode 100755 utils/add_argument_names.py create mode 100644 utils/llvm-locstats/CMakeLists.txt create mode 100755 utils/llvm-locstats/llvm-locstats.py diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h index cac2f297056..b8497095666 100644 --- a/include/llvm-c/Core.h +++ b/include/llvm-c/Core.h @@ -370,9 +370,13 @@ typedef enum { LLVMAtomicRMWBinOpUMax, /**< Sets the value if it's greater than the original using an unsigned comparison and return the old one */ - LLVMAtomicRMWBinOpUMin /**< Sets the value if it's greater than the - original using an unsigned comparison and return - the old one */ + LLVMAtomicRMWBinOpUMin, /**< Sets the value if it's greater than the + original using an unsigned comparison and return + the old one */ + LLVMAtomicRMWBinOpFAdd, /**< Add a floating point value and return the + old one */ + LLVMAtomicRMWBinOpFSub /**< Subtract a floating point value and return the + old one */ } LLVMAtomicRMWBinOp; typedef enum { @@ -1539,6 +1543,7 @@ LLVMTypeRef LLVMX86MMXType(void); macro(GlobalVariable) \ macro(UndefValue) \ macro(Instruction) \ + macro(UnaryOperator) \ macro(BinaryOperator) \ macro(CallInst) \ macro(IntrinsicInst) \ @@ -1571,6 +1576,8 @@ LLVMTypeRef LLVMX86MMXType(void); macro(ResumeInst) \ macro(CleanupReturnInst) \ macro(CatchReturnInst) \ + macro(CatchSwitchInst) \ + macro(CallBrInst) \ macro(FuncletPadInst) \ macro(CatchPadInst) \ macro(CleanupPadInst) \ @@ -1592,7 +1599,10 @@ LLVMTypeRef LLVMX86MMXType(void); macro(ZExtInst) \ macro(ExtractValueInst) \ macro(LoadInst) \ - macro(VAArgInst) + macro(VAArgInst) \ + macro(AtomicCmpXchgInst) \ + macro(AtomicRMWInst) \ + macro(FenceInst) /** * @defgroup LLVMCCoreValueGeneral General APIs @@ -3807,8 +3817,12 @@ LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str, const char *Name); LLVMBool LLVMGetVolatile(LLVMValueRef MemoryAccessInst); void LLVMSetVolatile(LLVMValueRef MemoryAccessInst, LLVMBool IsVolatile); +LLVMBool LLVMGetWeak(LLVMValueRef CmpXchgInst); +void LLVMSetWeak(LLVMValueRef CmpXchgInst, LLVMBool IsWeak); LLVMAtomicOrdering LLVMGetOrdering(LLVMValueRef MemoryAccessInst); void LLVMSetOrdering(LLVMValueRef MemoryAccessInst, LLVMAtomicOrdering Ordering); +LLVMAtomicRMWBinOp LLVMGetAtomicRMWBinOp(LLVMValueRef AtomicRMWInst); +void LLVMSetAtomicRMWBinOp(LLVMValueRef AtomicRMWInst, LLVMAtomicRMWBinOp BinOp); /* Casts */ LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef, LLVMValueRef Val, diff --git a/include/llvm-c/DebugInfo.h b/include/llvm-c/DebugInfo.h index 33c8110a863..41e9f96bbb9 100644 --- a/include/llvm-c/DebugInfo.h +++ b/include/llvm-c/DebugInfo.h @@ -32,7 +32,7 @@ typedef enum { LLVMDIFlagPublic = 3, LLVMDIFlagFwdDecl = 1 << 2, LLVMDIFlagAppleBlock = 1 << 3, - LLVMDIFlagBlockByrefStruct = 1 << 4, + LLVMDIFlagReservedBit4 = 1 << 4, LLVMDIFlagVirtual = 1 << 5, LLVMDIFlagArtificial = 1 << 6, LLVMDIFlagExplicit = 1 << 7, @@ -169,6 +169,19 @@ typedef unsigned LLVMMetadataKind; */ typedef unsigned LLVMDWARFTypeEncoding; +/** + * Describes the kind of macro declaration used for LLVMDIBuilderCreateMacro. + * @see llvm::dwarf::MacinfoRecordType + * @note Values are from DW_MACINFO_* constants in the DWARF specification. + */ +typedef enum { + LLVMDWARFMacinfoRecordTypeDefine = 0x01, + LLVMDWARFMacinfoRecordTypeMacro = 0x02, + LLVMDWARFMacinfoRecordTypeStartFile = 0x03, + LLVMDWARFMacinfoRecordTypeEndFile = 0x04, + LLVMDWARFMacinfoRecordTypeVendorExt = 0xff +} LLVMDWARFMacinfoRecordType; + /** * The current debug metadata version number. */ @@ -521,6 +534,38 @@ LLVMDIBuilderCreateSubroutineType(LLVMDIBuilderRef Builder, unsigned NumParameterTypes, LLVMDIFlags Flags); +/** + * Create debugging information entry for a macro. + * @param Builder The DIBuilder. + * @param ParentMacroFile Macro parent (could be NULL). + * @param Line Source line number where the macro is defined. + * @param RecordType DW_MACINFO_define or DW_MACINFO_undef. + * @param Name Macro name. + * @param NameLen Macro name length. + * @param Value Macro value. + * @param ValueLen Macro value length. + */ +LLVMMetadataRef LLVMDIBuilderCreateMacro(LLVMDIBuilderRef Builder, + LLVMMetadataRef ParentMacroFile, + unsigned Line, + LLVMDWARFMacinfoRecordType RecordType, + const char *Name, size_t NameLen, + const char *Value, size_t ValueLen); + +/** + * Create debugging information temporary entry for a macro file. + * List of macro node direct children will be calculated by DIBuilder, + * using the \p ParentMacroFile relationship. + * @param Builder The DIBuilder. + * @param ParentMacroFile Macro parent (could be NULL). + * @param Line Source line number where the macro file is included. + * @param File File descriptor containing the name of the macro file. + */ +LLVMMetadataRef +LLVMDIBuilderCreateTempMacroFile(LLVMDIBuilderRef Builder, + LLVMMetadataRef ParentMacroFile, unsigned Line, + LLVMMetadataRef File); + /** * Create debugging information entry for an enumerator. * @param Builder The DIBuilder. diff --git a/include/llvm-c/Remarks.h b/include/llvm-c/Remarks.h index 88eb5120c57..5444aebddd6 100644 --- a/include/llvm-c/Remarks.h +++ b/include/llvm-c/Remarks.h @@ -30,7 +30,8 @@ extern "C" { * @{ */ -#define REMARKS_API_VERSION 0 +// 0 -> 1: Bitstream remarks support. +#define REMARKS_API_VERSION 1 /** * The type of the emitted remark. @@ -240,6 +241,20 @@ typedef struct LLVMRemarkOpaqueParser *LLVMRemarkParserRef; extern LLVMRemarkParserRef LLVMRemarkParserCreateYAML(const void *Buf, uint64_t Size); +/** + * Creates a remark parser that can be used to parse the buffer located in \p + * Buf of size \p Size bytes. + * + * \p Buf cannot be `NULL`. + * + * This function should be paired with LLVMRemarkParserDispose() to avoid + * leaking resources. + * + * \since REMARKS_API_VERSION=1 + */ +extern LLVMRemarkParserRef LLVMRemarkParserCreateBitstream(const void *Buf, + uint64_t Size); + /** * Returns the next remark in the file. * diff --git a/include/llvm-c/Transforms/IPO.h b/include/llvm-c/Transforms/IPO.h index 7a82ed46414..51d00758128 100644 --- a/include/llvm-c/Transforms/IPO.h +++ b/include/llvm-c/Transforms/IPO.h @@ -34,6 +34,9 @@ void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM); /** See llvm::createConstantMergePass function. */ void LLVMAddConstantMergePass(LLVMPassManagerRef PM); +/** See llvm::createMergeFunctionsPass function. */ +void LLVMAddMergeFunctionsPass(LLVMPassManagerRef PM); + /** See llvm::createCalledValuePropagationPass function. */ void LLVMAddCalledValuePropagationPass(LLVMPassManagerRef PM); @@ -67,6 +70,21 @@ void LLVMAddIPSCCPPass(LLVMPassManagerRef PM); /** See llvm::createInternalizePass function. */ void LLVMAddInternalizePass(LLVMPassManagerRef, unsigned AllButMain); +/** + * Create and add the internalize pass to the given pass manager with the + * provided preservation callback. + * + * The context parameter is forwarded to the callback on each invocation. + * As such, it is the responsibility of the caller to extend its lifetime + * until execution of this pass has finished. + * + * @see llvm::createInternalizePass function. + */ +void LLVMAddInternalizePassWithMustPreservePredicate( + LLVMPassManagerRef PM, + void *Context, + LLVMBool (*MustPreserve)(LLVMValueRef, void *)); + /** See llvm::createStripDeadPrototypesPass function. */ void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM); diff --git a/include/llvm-c/Transforms/Scalar.h b/include/llvm-c/Transforms/Scalar.h index 031cf98b2df..6f3a3d8b375 100644 --- a/include/llvm-c/Transforms/Scalar.h +++ b/include/llvm-c/Transforms/Scalar.h @@ -35,6 +35,9 @@ extern "C" { /** See llvm::createAggressiveDCEPass function. */ void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM); +/** See llvm::createDeadCodeEliminationPass function. */ +void LLVMAddDCEPass(LLVMPassManagerRef PM); + /** See llvm::createBitTrackingDCEPass function. */ void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM); @@ -144,6 +147,9 @@ void LLVMAddEarlyCSEMemSSAPass(LLVMPassManagerRef PM); /** See llvm::createLowerExpectIntrinsicPass function */ void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM); +/** See llvm::createLowerConstantIntrinsicsPass function */ +void LLVMAddLowerConstantIntrinsicsPass(LLVMPassManagerRef PM); + /** See llvm::createTypeBasedAliasAnalysisPass function */ void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM); diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h index 2467722b195..41e6067cf44 100644 --- a/include/llvm-c/lto.h +++ b/include/llvm-c/lto.h @@ -44,7 +44,7 @@ typedef bool lto_bool_t; * @{ */ -#define LTO_API_VERSION 24 +#define LTO_API_VERSION 25 /** * \since prior to LTO_API_VERSION=3 @@ -550,6 +550,56 @@ extern void lto_codegen_set_should_embed_uselists(lto_code_gen_t cg, lto_bool_t ShouldEmbedUselists); +/** Opaque reference to an LTO input file */ +typedef struct LLVMOpaqueLTOInput *lto_input_t; + +/** + * Creates an LTO input file from a buffer. The path + * argument is used for diagnotics as this function + * otherwise does not know which file the given buffer + * is associated with. + * + * \since LTO_API_VERSION=24 + */ +extern lto_input_t lto_input_create(const void *buffer, + size_t buffer_size, + const char *path); + +/** + * Frees all memory internally allocated by the LTO input file. + * Upon return the lto_module_t is no longer valid. + * + * \since LTO_API_VERSION=24 + */ +extern void lto_input_dispose(lto_input_t input); + +/** + * Returns the number of dependent library specifiers + * for the given LTO input file. + * + * \since LTO_API_VERSION=24 + */ +extern unsigned lto_input_get_num_dependent_libraries(lto_input_t input); + +/** + * Returns the ith dependent library specifier + * for the given LTO input file. The returned + * string is not null-terminated. + * + * \since LTO_API_VERSION=24 + */ +extern const char * lto_input_get_dependent_library(lto_input_t input, + size_t index, + size_t *size); + +/** + * Returns the list of libcall symbols that can be generated by LTO + * that might not be visible from the symbol table of bitcode files. + * + * \since prior to LTO_API_VERSION=25 + */ +extern const char *const *lto_runtime_lib_symbols_list(size_t *size); + /** * @} // endgoup LLVMCLTO * @defgroup LLVMCTLTO ThinLTO @@ -846,48 +896,6 @@ thinlto_codegen_set_cache_size_megabytes(thinlto_code_gen_t cg, extern void thinlto_codegen_set_cache_size_files(thinlto_code_gen_t cg, unsigned max_size_files); -/** Opaque reference to an LTO input file */ -typedef struct LLVMOpaqueLTOInput *lto_input_t; - -/** - * Creates an LTO input file from a buffer. The path - * argument is used for diagnotics as this function - * otherwise does not know which file the given buffer - * is associated with. - * - * \since LTO_API_VERSION=24 - */ -extern lto_input_t lto_input_create(const void *buffer, - size_t buffer_size, - const char *path); - -/** - * Frees all memory internally allocated by the LTO input file. - * Upon return the lto_module_t is no longer valid. - * - * \since LTO_API_VERSION=24 - */ -extern void lto_input_dispose(lto_input_t input); - -/** - * Returns the number of dependent library specifiers - * for the given LTO input file. - * - * \since LTO_API_VERSION=24 - */ -extern unsigned lto_input_get_num_dependent_libraries(lto_input_t input); - -/** - * Returns the ith dependent library specifier - * for the given LTO input file. The returned - * string is not null-terminated. - * - * \since LTO_API_VERSION=24 - */ -extern const char * lto_input_get_dependent_library(lto_input_t input, - size_t index, - size_t *size); - /** * @} // endgroup LLVMCTLTO_CACHING */ diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h index a9648d35cf5..1c496973379 100644 --- a/include/llvm/ADT/APFloat.h +++ b/include/llvm/ADT/APFloat.h @@ -192,6 +192,11 @@ struct APFloatBase { /// IEEE-754R 7: Default exception handling. /// /// opUnderflow or opOverflow are always returned or-ed with opInexact. + /// + /// APFloat models this behavior specified by IEEE-754: + /// "For operations producing results in floating-point format, the default + /// result of an operation that signals the invalid operation exception + /// shall be a quiet NaN." enum opStatus { opOK = 0x00, opInvalidOp = 0x01, diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h index 2381b75e08b..8dce5a621bb 100644 --- a/include/llvm/ADT/APInt.h +++ b/include/llvm/ADT/APInt.h @@ -1467,6 +1467,13 @@ public: U.pVal[whichWord(BitPosition)] &= Mask; } + /// Set bottom loBits bits to 0. + void clearLowBits(unsigned loBits) { + assert(loBits <= BitWidth && "More bits than bitwidth"); + APInt Keep = getHighBitsSet(BitWidth, BitWidth - loBits); + *this &= Keep; + } + /// Set the sign bit to 0. void clearSignBit() { clearBit(BitWidth - 1); @@ -1496,9 +1503,11 @@ public: /// Insert the bits from a smaller APInt starting at bitPosition. void insertBits(const APInt &SubBits, unsigned bitPosition); + void insertBits(uint64_t SubBits, unsigned bitPosition, unsigned numBits); /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits). APInt extractBits(unsigned numBits, unsigned bitPosition) const; + uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const; /// @} /// \name Value Characterization Functions diff --git a/include/llvm/ADT/Any.h b/include/llvm/ADT/Any.h index 5dcd6e73c54..49657e02a99 100644 --- a/include/llvm/ADT/Any.h +++ b/include/llvm/ADT/Any.h @@ -38,7 +38,7 @@ class Any { explicit StorageImpl(T &&Value) : Value(std::move(Value)) {} std::unique_ptr clone() const override { - return llvm::make_unique>(Value); + return std::make_unique>(Value); } const void *id() const override { return &TypeId::Id; } @@ -78,7 +78,7 @@ public: int>::type = 0> Any(T &&Value) { using U = typename std::decay::type; - Storage = llvm::make_unique>(std::forward(Value)); + Storage = std::make_unique>(std::forward(Value)); } Any(Any &&Other) : Storage(std::move(Other.Storage)) {} diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h index 773c88f7c9f..f6455d3fa41 100644 --- a/include/llvm/ADT/ArrayRef.h +++ b/include/llvm/ADT/ArrayRef.h @@ -481,6 +481,12 @@ namespace llvm { return Vec; } + /// Construct an ArrayRef from a std::array. + template + ArrayRef makeArrayRef(const std::array &Arr) { + return Arr; + } + /// Construct an ArrayRef from an ArrayRef (no-op) (const) template ArrayRef makeArrayRef(const ArrayRef &Vec) { return Vec; diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h index a05cf8130d3..948a6e6bfb3 100644 --- a/include/llvm/ADT/DenseMap.h +++ b/include/llvm/ADT/DenseMap.h @@ -38,33 +38,7 @@ namespace detail { // implementation without requiring two members. template struct DenseMapPair : public std::pair { - - // FIXME: Switch to inheriting constructors when we drop support for older - // clang versions. - // NOTE: This default constructor is declared with '{}' rather than - // '= default' to work around a separate bug in clang-3.8. This can - // also go when we switch to inheriting constructors. - DenseMapPair() {} - - DenseMapPair(const KeyT &Key, const ValueT &Value) - : std::pair(Key, Value) {} - - DenseMapPair(KeyT &&Key, ValueT &&Value) - : std::pair(std::move(Key), std::move(Value)) {} - - template - DenseMapPair(AltKeyT &&AltKey, AltValueT &&AltValue, - typename std::enable_if< - std::is_convertible::value && - std::is_convertible::value>::type * = 0) - : std::pair(std::forward(AltKey), - std::forward(AltValue)) {} - - template - DenseMapPair(AltPairT &&AltPair, - typename std::enable_if>::value>::type * = nullptr) - : std::pair(std::forward(AltPair)) {} + using std::pair::pair; KeyT &getFirst() { return std::pair::first; } const KeyT &getFirst() const { return std::pair::first; } @@ -748,7 +722,7 @@ public: ~DenseMap() { this->destroyAll(); - operator delete(Buckets); + deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT)); } void swap(DenseMap& RHS) { @@ -768,7 +742,7 @@ public: DenseMap& operator=(DenseMap &&other) { this->destroyAll(); - operator delete(Buckets); + deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT)); init(0); swap(other); return *this; @@ -776,7 +750,7 @@ public: void copyFrom(const DenseMap& other) { this->destroyAll(); - operator delete(Buckets); + deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT)); if (allocateBuckets(other.NumBuckets)) { this->BaseT::copyFrom(other); } else { @@ -809,10 +783,12 @@ public: this->moveFromOldBuckets(OldBuckets, OldBuckets+OldNumBuckets); // Free the old table. - operator delete(OldBuckets); + deallocate_buffer(OldBuckets, sizeof(BucketT) * OldNumBuckets, + alignof(BucketT)); } void shrink_and_clear() { + unsigned OldNumBuckets = NumBuckets; unsigned OldNumEntries = NumEntries; this->destroyAll(); @@ -825,7 +801,8 @@ public: return; } - operator delete(Buckets); + deallocate_buffer(Buckets, sizeof(BucketT) * OldNumBuckets, + alignof(BucketT)); init(NewNumBuckets); } @@ -861,7 +838,8 @@ private: return false; } - Buckets = static_cast(operator new(sizeof(BucketT) * NumBuckets)); + Buckets = static_cast( + allocate_buffer(sizeof(BucketT) * NumBuckets, alignof(BucketT))); return true; } }; @@ -1076,7 +1054,8 @@ public: this->moveFromOldBuckets(OldRep.Buckets, OldRep.Buckets+OldRep.NumBuckets); // Free the old table. - operator delete(OldRep.Buckets); + deallocate_buffer(OldRep.Buckets, sizeof(BucketT) * OldRep.NumBuckets, + alignof(BucketT)); } void shrink_and_clear() { @@ -1160,15 +1139,17 @@ private: if (Small) return; - operator delete(getLargeRep()->Buckets); + deallocate_buffer(getLargeRep()->Buckets, + sizeof(BucketT) * getLargeRep()->NumBuckets, + alignof(BucketT)); getLargeRep()->~LargeRep(); } LargeRep allocateBuckets(unsigned Num) { assert(Num > InlineBuckets && "Must allocate more buckets than are inline"); - LargeRep Rep = { - static_cast(operator new(sizeof(BucketT) * Num)), Num - }; + LargeRep Rep = {static_cast(allocate_buffer( + sizeof(BucketT) * Num, alignof(BucketT))), + Num}; return Rep; } }; diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h index 5ef6f3ad1b0..bd4c60c8f13 100644 --- a/include/llvm/ADT/DenseMapInfo.h +++ b/include/llvm/ADT/DenseMapInfo.h @@ -17,7 +17,7 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/PointerLikeTypeTraits.h" -#include "llvm/Support/ScalableSize.h" +#include "llvm/Support/TypeSize.h" #include #include #include @@ -67,6 +67,17 @@ template<> struct DenseMapInfo { } }; +// Provide DenseMapInfo for unsigned chars. +template <> struct DenseMapInfo { + static inline unsigned char getEmptyKey() { return ~0; } + static inline unsigned char getTombstoneKey() { return ~0 - 1; } + static unsigned getHashValue(const unsigned char &Val) { return Val * 37U; } + + static bool isEqual(const unsigned char &LHS, const unsigned char &RHS) { + return LHS == RHS; + } +}; + // Provide DenseMapInfo for unsigned shorts. template <> struct DenseMapInfo { static inline unsigned short getEmptyKey() { return 0xFFFF; } diff --git a/include/llvm/ADT/DirectedGraph.h b/include/llvm/ADT/DirectedGraph.h new file mode 100644 index 00000000000..f6a358d99cd --- /dev/null +++ b/include/llvm/ADT/DirectedGraph.h @@ -0,0 +1,270 @@ +//===- llvm/ADT/DirectedGraph.h - Directed Graph ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface and a base class implementation for a +// directed graph. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_DIRECTEDGRAPH_H +#define LLVM_ADT_DIRECTEDGRAPH_H + +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +/// Represent an edge in the directed graph. +/// The edge contains the target node it connects to. +template class DGEdge { +public: + DGEdge() = delete; + /// Create an edge pointing to the given node \p N. + explicit DGEdge(NodeType &N) : TargetNode(N) {} + explicit DGEdge(const DGEdge &E) + : TargetNode(E.TargetNode) {} + DGEdge &operator=(const DGEdge &E) { + TargetNode = E.TargetNode; + return *this; + } + + /// Static polymorphism: delegate implementation (via isEqualTo) to the + /// derived class. + bool operator==(const EdgeType &E) const { return getDerived().isEqualTo(E); } + bool operator!=(const EdgeType &E) const { return !operator==(E); } + + /// Retrieve the target node this edge connects to. + const NodeType &getTargetNode() const { return TargetNode; } + NodeType &getTargetNode() { + return const_cast( + static_cast &>(*this).getTargetNode()); + } + +protected: + // As the default implementation use address comparison for equality. + bool isEqualTo(const EdgeType &E) const { return this == &E; } + + // Cast the 'this' pointer to the derived type and return a reference. + EdgeType &getDerived() { return *static_cast(this); } + const EdgeType &getDerived() const { + return *static_cast(this); + } + + // The target node this edge connects to. + NodeType &TargetNode; +}; + +/// Represent a node in the directed graph. +/// The node has a (possibly empty) list of outgoing edges. +template class DGNode { +public: + using EdgeListTy = SetVector; + using iterator = typename EdgeListTy::iterator; + using const_iterator = typename EdgeListTy::const_iterator; + + /// Create a node with a single outgoing edge \p E. + explicit DGNode(EdgeType &E) : Edges() { Edges.insert(&E); } + DGNode() = default; + + explicit DGNode(const DGNode &N) : Edges(N.Edges) {} + DGNode(DGNode &&N) : Edges(std::move(N.Edges)) {} + + DGNode &operator=(const DGNode &N) { + Edges = N.Edges; + return *this; + } + DGNode &operator=(const DGNode &&N) { + Edges = std::move(N.Edges); + return *this; + } + + /// Static polymorphism: delegate implementation (via isEqualTo) to the + /// derived class. + bool operator==(const NodeType &N) const { return getDerived().isEqualTo(N); } + bool operator!=(const NodeType &N) const { return !operator==(N); } + + const_iterator begin() const { return Edges.begin(); } + const_iterator end() const { return Edges.end(); } + iterator begin() { return Edges.begin(); } + iterator end() { return Edges.end(); } + const EdgeType &front() const { return *Edges.front(); } + EdgeType &front() { return *Edges.front(); } + const EdgeType &back() const { return *Edges.back(); } + EdgeType &back() { return *Edges.back(); } + + /// Collect in \p EL, all the edges from this node to \p N. + /// Return true if at least one edge was found, and false otherwise. + /// Note that this implementation allows more than one edge to connect + /// a given pair of nodes. + bool findEdgesTo(const NodeType &N, SmallVectorImpl &EL) const { + assert(EL.empty() && "Expected the list of edges to be empty."); + for (auto *E : Edges) + if (E->getTargetNode() == N) + EL.push_back(E); + return !EL.empty(); + } + + /// Add the given edge \p E to this node, if it doesn't exist already. Returns + /// true if the edge is added and false otherwise. + bool addEdge(EdgeType &E) { return Edges.insert(&E); } + + /// Remove the given edge \p E from this node, if it exists. + void removeEdge(EdgeType &E) { Edges.remove(&E); } + + /// Test whether there is an edge that goes from this node to \p N. + bool hasEdgeTo(const NodeType &N) const { + return (findEdgeTo(N) != Edges.end()); + } + + /// Retrieve the outgoing edges for the node. + const EdgeListTy &getEdges() const { return Edges; } + EdgeListTy &getEdges() { + return const_cast( + static_cast &>(*this).Edges); + } + + /// Clear the outgoing edges. + void clear() { Edges.clear(); } + +protected: + // As the default implementation use address comparison for equality. + bool isEqualTo(const NodeType &N) const { return this == &N; } + + // Cast the 'this' pointer to the derived type and return a reference. + NodeType &getDerived() { return *static_cast(this); } + const NodeType &getDerived() const { + return *static_cast(this); + } + + /// Find an edge to \p N. If more than one edge exists, this will return + /// the first one in the list of edges. + const_iterator findEdgeTo(const NodeType &N) const { + return llvm::find_if( + Edges, [&N](const EdgeType *E) { return E->getTargetNode() == N; }); + } + + // The list of outgoing edges. + EdgeListTy Edges; +}; + +/// Directed graph +/// +/// The graph is represented by a table of nodes. +/// Each node contains a (possibly empty) list of outgoing edges. +/// Each edge contains the target node it connects to. +template class DirectedGraph { +protected: + using NodeListTy = SmallVector; + using EdgeListTy = SmallVector; +public: + using iterator = typename NodeListTy::iterator; + using const_iterator = typename NodeListTy::const_iterator; + using DGraphType = DirectedGraph; + + DirectedGraph() = default; + explicit DirectedGraph(NodeType &N) : Nodes() { addNode(N); } + DirectedGraph(const DGraphType &G) : Nodes(G.Nodes) {} + DirectedGraph(DGraphType &&RHS) : Nodes(std::move(RHS.Nodes)) {} + DGraphType &operator=(const DGraphType &G) { + Nodes = G.Nodes; + return *this; + } + DGraphType &operator=(const DGraphType &&G) { + Nodes = std::move(G.Nodes); + return *this; + } + + const_iterator begin() const { return Nodes.begin(); } + const_iterator end() const { return Nodes.end(); } + iterator begin() { return Nodes.begin(); } + iterator end() { return Nodes.end(); } + const NodeType &front() const { return *Nodes.front(); } + NodeType &front() { return *Nodes.front(); } + const NodeType &back() const { return *Nodes.back(); } + NodeType &back() { return *Nodes.back(); } + + size_t size() const { return Nodes.size(); } + + /// Find the given node \p N in the table. + const_iterator findNode(const NodeType &N) const { + return llvm::find_if(Nodes, + [&N](const NodeType *Node) { return *Node == N; }); + } + iterator findNode(const NodeType &N) { + return const_cast( + static_cast(*this).findNode(N)); + } + + /// Add the given node \p N to the graph if it is not already present. + bool addNode(NodeType &N) { + if (findNode(N) != Nodes.end()) + return false; + Nodes.push_back(&N); + return true; + } + + /// Collect in \p EL all edges that are coming into node \p N. Return true + /// if at least one edge was found, and false otherwise. + bool findIncomingEdgesToNode(const NodeType &N, SmallVectorImpl &EL) const { + assert(EL.empty() && "Expected the list of edges to be empty."); + EdgeListTy TempList; + for (auto *Node : Nodes) { + if (*Node == N) + continue; + Node->findEdgesTo(N, TempList); + EL.insert(EL.end(), TempList.begin(), TempList.end()); + TempList.clear(); + } + return !EL.empty(); + } + + /// Remove the given node \p N from the graph. If the node has incoming or + /// outgoing edges, they are also removed. Return true if the node was found + /// and then removed, and false if the node was not found in the graph to + /// begin with. + bool removeNode(NodeType &N) { + iterator IT = findNode(N); + if (IT == Nodes.end()) + return false; + // Remove incoming edges. + EdgeListTy EL; + for (auto *Node : Nodes) { + if (*Node == N) + continue; + Node->findEdgesTo(N, EL); + for (auto *E : EL) + Node->removeEdge(*E); + EL.clear(); + } + N.clear(); + Nodes.erase(IT); + return true; + } + + /// Assuming nodes \p Src and \p Dst are already in the graph, connect node \p + /// Src to node \p Dst using the provided edge \p E. Return true if \p Src is + /// not already connected to \p Dst via \p E, and false otherwise. + bool connect(NodeType &Src, NodeType &Dst, EdgeType &E) { + assert(findNode(Src) != Nodes.end() && "Src node should be present."); + assert(findNode(Dst) != Nodes.end() && "Dst node should be present."); + assert((E.getTargetNode() == Dst) && + "Target of the given edge does not match Dst."); + return Src.addEdge(E); + } + +protected: + // The list of nodes in the graph. + NodeListTy Nodes; +}; + +} // namespace llvm + +#endif // LLVM_ADT_DIRECTEDGRAPH_H diff --git a/include/llvm/ADT/Hashing.h b/include/llvm/ADT/Hashing.h index 008188bfa21..b22606bdb51 100644 --- a/include/llvm/ADT/Hashing.h +++ b/include/llvm/ADT/Hashing.h @@ -45,7 +45,6 @@ #define LLVM_ADT_HASHING_H #include "llvm/Support/DataTypes.h" -#include "llvm/Support/Host.h" #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/type_traits.h" #include diff --git a/include/llvm/ADT/IntervalMap.h b/include/llvm/ADT/IntervalMap.h index 12828c4cfda..a02876ee77f 100644 --- a/include/llvm/ADT/IntervalMap.h +++ b/include/llvm/ADT/IntervalMap.h @@ -963,8 +963,8 @@ public: private: // The root data is either a RootLeaf or a RootBranchData instance. - LLVM_ALIGNAS(RootLeaf) LLVM_ALIGNAS(RootBranchData) - AlignedCharArrayUnion data; + alignas(RootLeaf) alignas(RootBranchData) + AlignedCharArrayUnion data; // Tree height. // 0: Leaves in root. diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h index 24a2bb67a36..fa6bf150446 100644 --- a/include/llvm/ADT/PointerIntPair.h +++ b/include/llvm/ADT/PointerIntPair.h @@ -13,6 +13,7 @@ #ifndef LLVM_ADT_POINTERINTPAIR_H #define LLVM_ADT_POINTERINTPAIR_H +#include "llvm/Support/Compiler.h" #include "llvm/Support/PointerLikeTypeTraits.h" #include "llvm/Support/type_traits.h" #include @@ -59,19 +60,19 @@ public: IntType getInt() const { return (IntType)Info::getInt(Value); } - void setPointer(PointerTy PtrVal) { + void setPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION { Value = Info::updatePointer(Value, PtrVal); } - void setInt(IntType IntVal) { + void setInt(IntType IntVal) LLVM_LVALUE_FUNCTION { Value = Info::updateInt(Value, static_cast(IntVal)); } - void initWithPointer(PointerTy PtrVal) { + void initWithPointer(PointerTy PtrVal) LLVM_LVALUE_FUNCTION { Value = Info::updatePointer(0, PtrVal); } - void setPointerAndInt(PointerTy PtrVal, IntType IntVal) { + void setPointerAndInt(PointerTy PtrVal, IntType IntVal) LLVM_LVALUE_FUNCTION { Value = Info::updateInt(Info::updatePointer(0, PtrVal), static_cast(IntVal)); } @@ -89,7 +90,7 @@ public: void *getOpaqueValue() const { return reinterpret_cast(Value); } - void setFromOpaqueValue(void *Val) { + void setFromOpaqueValue(void *Val) LLVM_LVALUE_FUNCTION { Value = reinterpret_cast(Val); } diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h index 2bcdf546c6e..98c905775a7 100644 --- a/include/llvm/ADT/PointerUnion.h +++ b/include/llvm/ADT/PointerUnion.h @@ -54,21 +54,14 @@ struct PointerUnionTypeSelectorReturn< }; namespace pointer_union_detail { - constexpr int constexprMin(int a, int b) { return a < b ? a : b; } /// Determine the number of bits required to store integers with values < n. /// This is ceil(log2(n)). constexpr int bitsRequired(unsigned n) { return n > 1 ? 1 + bitsRequired((n + 1) / 2) : 0; } - // FIXME: In C++14, replace this with - // std::min({PointerLikeTypeTraits::NumLowBitsAvailable...}) - template constexpr int lowBitsAvailable() { - return PointerLikeTypeTraits::NumLowBitsAvailable; - } - template - constexpr int lowBitsAvailable() { - return constexprMin(lowBitsAvailable(), lowBitsAvailable()); + template constexpr int lowBitsAvailable() { + return std::min({PointerLikeTypeTraits::NumLowBitsAvailable...}); } /// Find the index of a type in a list of types. TypeIndex::Index @@ -167,10 +160,11 @@ class PointerUnion void *, pointer_union_detail::bitsRequired(sizeof...(PTs)), int, pointer_union_detail::PointerUnionUIntTraits>, 0, PTs...> { - // The first type is special in some ways, but we don't want PointerUnion to - // be a 'template ' because it's much more - // convenient to have a name for the whole pack. So split off the first type - // here. + // The first type is special because we want to directly cast a pointer to a + // default-initialized union to a pointer to the first type. But we don't + // want PointerUnion to be a 'template ' + // because it's much more convenient to have a name for the whole pack. So + // split off the first type here. using First = typename pointer_union_detail::GetFirstType::type; using Base = typename PointerUnion::PointerUnionMembers; @@ -182,12 +176,7 @@ public: /// Test if the pointer held in the union is null, regardless of /// which type it is. - bool isNull() const { - // Convert from the void* to one of the pointer types, to make sure that - // we recursively strip off low bits if we have a nested PointerUnion. - return !PointerLikeTypeTraits::getFromVoidPointer( - this->Val.getPointer()); - } + bool isNull() const { return !this->Val.getPointer(); } explicit operator bool() const { return !isNull(); } @@ -226,7 +215,8 @@ public: First *getAddrOfPtr1() { assert(is() && "Val is not the first pointer"); assert( - get() == this->Val.getPointer() && + PointerLikeTypeTraits::getAsVoidPointer(get()) == + this->Val.getPointer() && "Can't get the address because PointerLikeTypeTraits changes the ptr"); return const_cast( reinterpret_cast(this->Val.getAddrOfPointer())); diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h index 81dce0168c7..274933bc520 100644 --- a/include/llvm/ADT/STLExtras.h +++ b/include/llvm/ADT/STLExtras.h @@ -95,18 +95,6 @@ template struct identity { } }; -template struct less_ptr { - bool operator()(const Ty* left, const Ty* right) const { - return *left < *right; - } -}; - -template struct greater_ptr { - bool operator()(const Ty* left, const Ty* right) const { - return *right < *left; - } -}; - /// An efficient, type-erasing, non-owning reference to a callable. This is /// intended for use as the type of a function parameter that is not used /// after the function in question returns. @@ -530,10 +518,6 @@ bool all_of(R &&range, UnaryPredicate P); template bool any_of(R &&range, UnaryPredicate P); -template struct index_sequence; - -template struct index_sequence_for; - namespace detail { using std::declval; @@ -568,38 +552,38 @@ struct zip_common : public zip_traits { std::tuple iterators; protected: - template value_type deref(index_sequence) const { + template value_type deref(std::index_sequence) const { return value_type(*std::get(iterators)...); } template - decltype(iterators) tup_inc(index_sequence) const { + decltype(iterators) tup_inc(std::index_sequence) const { return std::tuple(std::next(std::get(iterators))...); } template - decltype(iterators) tup_dec(index_sequence) const { + decltype(iterators) tup_dec(std::index_sequence) const { return std::tuple(std::prev(std::get(iterators))...); } public: zip_common(Iters &&... ts) : iterators(std::forward(ts)...) {} - value_type operator*() { return deref(index_sequence_for{}); } + value_type operator*() { return deref(std::index_sequence_for{}); } const value_type operator*() const { - return deref(index_sequence_for{}); + return deref(std::index_sequence_for{}); } ZipType &operator++() { - iterators = tup_inc(index_sequence_for{}); + iterators = tup_inc(std::index_sequence_for{}); return *reinterpret_cast(this); } ZipType &operator--() { static_assert(Base::IsBidirectional, "All inner iterators must be at least bidirectional."); - iterators = tup_dec(index_sequence_for{}); + iterators = tup_dec(std::index_sequence_for{}); return *reinterpret_cast(this); } }; @@ -618,7 +602,8 @@ struct zip_first : public zip_common, Iters...> { template class zip_shortest : public zip_common, Iters...> { template - bool test(const zip_shortest &other, index_sequence) const { + bool test(const zip_shortest &other, + std::index_sequence) const { return all_of(std::initializer_list{std::get(this->iterators) != std::get(other.iterators)...}, identity{}); @@ -630,7 +615,7 @@ public: zip_shortest(Iters &&... ts) : Base(std::forward(ts)...) {} bool operator==(const zip_shortest &other) const { - return !test(other, index_sequence_for{}); + return !test(other, std::index_sequence_for{}); } }; @@ -646,18 +631,21 @@ public: private: std::tuple ts; - template iterator begin_impl(index_sequence) const { + template + iterator begin_impl(std::index_sequence) const { return iterator(std::begin(std::get(ts))...); } - template iterator end_impl(index_sequence) const { + template iterator end_impl(std::index_sequence) const { return iterator(std::end(std::get(ts))...); } public: zippy(Args &&... ts_) : ts(std::forward(ts_)...) {} - iterator begin() const { return begin_impl(index_sequence_for{}); } - iterator end() const { return end_impl(index_sequence_for{}); } + iterator begin() const { + return begin_impl(std::index_sequence_for{}); + } + iterator end() const { return end_impl(std::index_sequence_for{}); } }; } // end namespace detail @@ -727,20 +715,20 @@ private: template bool test(const zip_longest_iterator &other, - index_sequence) const { + std::index_sequence) const { return llvm::any_of( std::initializer_list{std::get(this->iterators) != std::get(other.iterators)...}, identity{}); } - template value_type deref(index_sequence) const { + template value_type deref(std::index_sequence) const { return value_type( deref_or_none(std::get(iterators), std::get(end_iterators))...); } template - decltype(iterators) tup_inc(index_sequence) const { + decltype(iterators) tup_inc(std::index_sequence) const { return std::tuple( next_or_end(std::get(iterators), std::get(end_iterators))...); } @@ -750,17 +738,19 @@ public: : iterators(std::forward(ts.first)...), end_iterators(std::forward(ts.second)...) {} - value_type operator*() { return deref(index_sequence_for{}); } + value_type operator*() { return deref(std::index_sequence_for{}); } - value_type operator*() const { return deref(index_sequence_for{}); } + value_type operator*() const { + return deref(std::index_sequence_for{}); + } zip_longest_iterator &operator++() { - iterators = tup_inc(index_sequence_for{}); + iterators = tup_inc(std::index_sequence_for{}); return *this; } bool operator==(const zip_longest_iterator &other) const { - return !test(other, index_sequence_for{}); + return !test(other, std::index_sequence_for{}); } }; @@ -777,12 +767,13 @@ public: private: std::tuple ts; - template iterator begin_impl(index_sequence) const { + template + iterator begin_impl(std::index_sequence) const { return iterator(std::make_pair(adl_begin(std::get(ts)), adl_end(std::get(ts)))...); } - template iterator end_impl(index_sequence) const { + template iterator end_impl(std::index_sequence) const { return iterator(std::make_pair(adl_end(std::get(ts)), adl_end(std::get(ts)))...); } @@ -790,8 +781,10 @@ private: public: zip_longest_range(Args &&... ts_) : ts(std::forward(ts_)...) {} - iterator begin() const { return begin_impl(index_sequence_for{}); } - iterator end() const { return end_impl(index_sequence_for{}); } + iterator begin() const { + return begin_impl(std::index_sequence_for{}); + } + iterator end() const { return end_impl(std::index_sequence_for{}); } }; } // namespace detail @@ -847,7 +840,7 @@ class concat_iterator /// Increments the first non-end iterator. /// /// It is an error to call this with all iterators at the end. - template void increment(index_sequence) { + template void increment(std::index_sequence) { // Build a sequence of functions to increment each iterator if possible. bool (concat_iterator::*IncrementHelperFns[])() = { &concat_iterator::incrementHelper...}; @@ -876,7 +869,7 @@ class concat_iterator /// reference. /// /// It is an error to call this with all iterators at the end. - template ValueT &get(index_sequence) const { + template ValueT &get(std::index_sequence) const { // Build a sequence of functions to get from iterator if possible. ValueT *(concat_iterator::*GetHelperFns[])() const = { &concat_iterator::getHelper...}; @@ -901,11 +894,13 @@ public: using BaseT::operator++; concat_iterator &operator++() { - increment(index_sequence_for()); + increment(std::index_sequence_for()); return *this; } - ValueT &operator*() const { return get(index_sequence_for()); } + ValueT &operator*() const { + return get(std::index_sequence_for()); + } bool operator==(const concat_iterator &RHS) const { return Begins == RHS.Begins && Ends == RHS.Ends; @@ -928,10 +923,10 @@ public: private: std::tuple Ranges; - template iterator begin_impl(index_sequence) { + template iterator begin_impl(std::index_sequence) { return iterator(std::get(Ranges)...); } - template iterator end_impl(index_sequence) { + template iterator end_impl(std::index_sequence) { return iterator(make_range(std::end(std::get(Ranges)), std::end(std::get(Ranges)))...); } @@ -940,8 +935,8 @@ public: concat_range(RangeTs &&... Ranges) : Ranges(std::forward(Ranges)...) {} - iterator begin() { return begin_impl(index_sequence_for{}); } - iterator end() { return end_impl(index_sequence_for{}); } + iterator begin() { return begin_impl(std::index_sequence_for{}); } + iterator end() { return end_impl(std::index_sequence_for{}); } }; } // end namespace detail @@ -990,28 +985,6 @@ struct on_first { } }; -// A subset of N3658. More stuff can be added as-needed. - -/// Represents a compile-time sequence of integers. -template struct integer_sequence { - using value_type = T; - - static constexpr size_t size() { return sizeof...(I); } -}; - -/// Alias for the common case of a sequence of size_ts. -template -struct index_sequence : integer_sequence {}; - -template -struct build_index_impl : build_index_impl {}; -template -struct build_index_impl<0, I...> : index_sequence {}; - -/// Creates a compile-time integer sequence for a parameter pack. -template -struct index_sequence_for : build_index_impl {}; - /// Utility type to build an inheritance chain that makes it easy to rank /// overload candidates. template struct rank : rank {}; @@ -1391,41 +1364,6 @@ void replace(Container &Cont, typename Container::iterator ContIt, // Extra additions to //===----------------------------------------------------------------------===// -// Implement make_unique according to N3656. - -/// Constructs a `new T()` with the given args and returns a -/// `unique_ptr` which owns the object. -/// -/// Example: -/// -/// auto p = make_unique(); -/// auto p = make_unique>(0, 1); -template -typename std::enable_if::value, std::unique_ptr>::type -make_unique(Args &&... args) { - return std::unique_ptr(new T(std::forward(args)...)); -} - -/// Constructs a `new T[n]` with the given args and returns a -/// `unique_ptr` which owns the object. -/// -/// \param n size of the new array. -/// -/// Example: -/// -/// auto p = make_unique(2); // value-initializes the array with 0's. -template -typename std::enable_if::value && std::extent::value == 0, - std::unique_ptr>::type -make_unique(size_t n) { - return std::unique_ptr(new typename std::remove_extent::type[n]()); -} - -/// This function isn't used and is only here to provide better compile errors. -template -typename std::enable_if::value != 0>::type -make_unique(Args &&...) = delete; - struct FreeDeleter { void operator()(void* v) { ::free(v); @@ -1439,20 +1377,6 @@ struct pair_hash { } }; -/// A functor like C++14's std::less in its absence. -struct less { - template bool operator()(A &&a, B &&b) const { - return std::forward(a) < std::forward(b); - } -}; - -/// A functor like C++14's std::equal in its absence. -struct equal { - template bool operator()(A &&a, B &&b) const { - return std::forward(a) == std::forward(b); - } -}; - /// Binary functor that adapts to any other binary functor after dereferencing /// operands. template struct deref { @@ -1580,7 +1504,7 @@ template detail::enumerator enumerate(R &&TheRange) { namespace detail { template -auto apply_tuple_impl(F &&f, Tuple &&t, index_sequence) +auto apply_tuple_impl(F &&f, Tuple &&t, std::index_sequence) -> decltype(std::forward(f)(std::get(std::forward(t))...)) { return std::forward(f)(std::get(std::forward(t))...); } @@ -1593,9 +1517,9 @@ auto apply_tuple_impl(F &&f, Tuple &&t, index_sequence) template auto apply_tuple(F &&f, Tuple &&t) -> decltype(detail::apply_tuple_impl( std::forward(f), std::forward(t), - build_index_impl< + std::make_index_sequence< std::tuple_size::type>::value>{})) { - using Indices = build_index_impl< + using Indices = std::make_index_sequence< std::tuple_size::type>::value>; return detail::apply_tuple_impl(std::forward(f), std::forward(t), diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h index 742450e6a95..61375c00802 100644 --- a/include/llvm/ADT/SmallBitVector.h +++ b/include/llvm/ADT/SmallBitVector.h @@ -290,7 +290,7 @@ public: ++Prev; uintptr_t Bits = getSmallBits(); // Mask in previous bits. - uintptr_t Mask = (1 << Prev) - 1; + uintptr_t Mask = (uintptr_t(1) << Prev) - 1; Bits |= Mask; if (Bits == ~uintptr_t(0) || Prev + 1 >= getSmallSize()) diff --git a/include/llvm/ADT/Statistic.h b/include/llvm/ADT/Statistic.h index 2ac59da596e..b7387ddcf1c 100644 --- a/include/llvm/ADT/Statistic.h +++ b/include/llvm/ADT/Statistic.h @@ -44,38 +44,39 @@ class raw_ostream; class raw_fd_ostream; class StringRef; -class Statistic { +class StatisticBase { public: const char *DebugType; const char *Name; const char *Desc; - std::atomic Value; - std::atomic Initialized; - unsigned getValue() const { return Value.load(std::memory_order_relaxed); } + StatisticBase(const char *DebugType, const char *Name, const char *Desc) + : DebugType(DebugType), Name(Name), Desc(Desc) {} + const char *getDebugType() const { return DebugType; } const char *getName() const { return Name; } const char *getDesc() const { return Desc; } +}; - /// construct - This should only be called for non-global statistics. - void construct(const char *debugtype, const char *name, const char *desc) { - DebugType = debugtype; - Name = name; - Desc = desc; - Value = 0; - Initialized = false; - } +class TrackingStatistic : public StatisticBase { +public: + std::atomic Value; + std::atomic Initialized; + + TrackingStatistic(const char *DebugType, const char *Name, const char *Desc) + : StatisticBase(DebugType, Name, Desc), Value(0), Initialized(false) {} + + unsigned getValue() const { return Value.load(std::memory_order_relaxed); } // Allow use of this class as the value itself. operator unsigned() const { return getValue(); } -#if LLVM_ENABLE_STATS - const Statistic &operator=(unsigned Val) { + const TrackingStatistic &operator=(unsigned Val) { Value.store(Val, std::memory_order_relaxed); return init(); } - const Statistic &operator++() { + const TrackingStatistic &operator++() { Value.fetch_add(1, std::memory_order_relaxed); return init(); } @@ -85,7 +86,7 @@ public: return Value.fetch_add(1, std::memory_order_relaxed); } - const Statistic &operator--() { + const TrackingStatistic &operator--() { Value.fetch_sub(1, std::memory_order_relaxed); return init(); } @@ -95,14 +96,14 @@ public: return Value.fetch_sub(1, std::memory_order_relaxed); } - const Statistic &operator+=(unsigned V) { + const TrackingStatistic &operator+=(unsigned V) { if (V == 0) return *this; Value.fetch_add(V, std::memory_order_relaxed); return init(); } - const Statistic &operator-=(unsigned V) { + const TrackingStatistic &operator-=(unsigned V) { if (V == 0) return *this; Value.fetch_sub(V, std::memory_order_relaxed); @@ -119,42 +120,8 @@ public: init(); } -#else // Statistics are disabled in release builds. - - const Statistic &operator=(unsigned Val) { - return *this; - } - - const Statistic &operator++() { - return *this; - } - - unsigned operator++(int) { - return 0; - } - - const Statistic &operator--() { - return *this; - } - - unsigned operator--(int) { - return 0; - } - - const Statistic &operator+=(const unsigned &V) { - return *this; - } - - const Statistic &operator-=(const unsigned &V) { - return *this; - } - - void updateMax(unsigned V) {} - -#endif // LLVM_ENABLE_STATS - protected: - Statistic &init() { + TrackingStatistic &init() { if (!Initialized.load(std::memory_order_acquire)) RegisterStatistic(); return *this; @@ -163,10 +130,47 @@ protected: void RegisterStatistic(); }; +class NoopStatistic : public StatisticBase { +public: + using StatisticBase::StatisticBase; + + unsigned getValue() const { return 0; } + + // Allow use of this class as the value itself. + operator unsigned() const { return 0; } + + const NoopStatistic &operator=(unsigned Val) { return *this; } + + const NoopStatistic &operator++() { return *this; } + + unsigned operator++(int) { return 0; } + + const NoopStatistic &operator--() { return *this; } + + unsigned operator--(int) { return 0; } + + const NoopStatistic &operator+=(const unsigned &V) { return *this; } + + const NoopStatistic &operator-=(const unsigned &V) { return *this; } + + void updateMax(unsigned V) {} +}; + +#if LLVM_ENABLE_STATS +using Statistic = TrackingStatistic; +#else +using Statistic = NoopStatistic; +#endif + // STATISTIC - A macro to make definition of statistics really simple. This // automatically passes the DEBUG_TYPE of the file into the statistic. #define STATISTIC(VARNAME, DESC) \ - static llvm::Statistic VARNAME = {DEBUG_TYPE, #VARNAME, DESC, {0}, {false}} + static llvm::Statistic VARNAME = {DEBUG_TYPE, #VARNAME, DESC} + +// ALWAYS_ENABLED_STATISTIC - A macro to define a statistic like STATISTIC but +// it is enabled even if LLVM_ENABLE_STATS is off. +#define ALWAYS_ENABLED_STATISTIC(VARNAME, DESC) \ + static llvm::TrackingStatistic VARNAME = {DEBUG_TYPE, #VARNAME, DESC} /// Enable the collection and printing of statistics. void EnableStatistics(bool PrintOnExit = true); diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h index 16ac90bd6c8..ef1a11e0619 100644 --- a/include/llvm/ADT/StringExtras.h +++ b/include/llvm/ADT/StringExtras.h @@ -345,7 +345,7 @@ inline void join_items_impl(std::string &Result, Sep Separator, const Arg1 &A1, join_items_impl(Result, Separator, std::forward(Items)...); } -inline size_t join_one_item_size(char C) { return 1; } +inline size_t join_one_item_size(char) { return 1; } inline size_t join_one_item_size(const char *S) { return S ? ::strlen(S) : 0; } template inline size_t join_one_item_size(const T &Str) { diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h index 8a586fc2670..108185bd07b 100644 --- a/include/llvm/ADT/StringMap.h +++ b/include/llvm/ADT/StringMap.h @@ -118,36 +118,59 @@ public: } }; -/// StringMapEntry - This is used to represent one value that is inserted into -/// a StringMap. It contains the Value itself and the key: the string length -/// and data. +/// StringMapEntryStorage - Holds the value in a StringMapEntry. +/// +/// Factored out into a separate base class to make it easier to specialize. +/// This is primarily intended to support StringSet, which doesn't need a value +/// stored at all. template -class StringMapEntry : public StringMapEntryBase { +class StringMapEntryStorage : public StringMapEntryBase { public: ValueTy second; - explicit StringMapEntry(size_t strLen) + explicit StringMapEntryStorage(size_t strLen) : StringMapEntryBase(strLen), second() {} template - StringMapEntry(size_t strLen, InitTy &&... InitVals) + StringMapEntryStorage(size_t strLen, InitTy &&... InitVals) : StringMapEntryBase(strLen), second(std::forward(InitVals)...) {} - StringMapEntry(StringMapEntry &E) = delete; - - StringRef getKey() const { - return StringRef(getKeyData(), getKeyLength()); - } + StringMapEntryStorage(StringMapEntryStorage &E) = delete; const ValueTy &getValue() const { return second; } ValueTy &getValue() { return second; } void setValue(const ValueTy &V) { second = V; } +}; + +template<> +class StringMapEntryStorage : public StringMapEntryBase { +public: + explicit StringMapEntryStorage(size_t strLen, NoneType none = None) + : StringMapEntryBase(strLen) {} + StringMapEntryStorage(StringMapEntryStorage &E) = delete; + + NoneType getValue() const { return None; } +}; + +/// StringMapEntry - This is used to represent one value that is inserted into +/// a StringMap. It contains the Value itself and the key: the string length +/// and data. +template +class StringMapEntry final : public StringMapEntryStorage { +public: + using StringMapEntryStorage::StringMapEntryStorage; + + StringRef getKey() const { + return StringRef(getKeyData(), this->getKeyLength()); + } /// getKeyData - Return the start of the string data that is the key for this /// value. The string data is always stored immediately after the /// StringMapEntry object. const char *getKeyData() const {return reinterpret_cast(this+1);} - StringRef first() const { return StringRef(getKeyData(), getKeyLength()); } + StringRef first() const { + return StringRef(getKeyData(), this->getKeyLength()); + } /// Create a StringMapEntry for the specified key construct the value using /// \p InitiVals. @@ -199,7 +222,7 @@ public: template void Destroy(AllocatorTy &Allocator) { // Free memory referenced by the item. - size_t AllocSize = sizeof(StringMapEntry) + getKeyLength() + 1; + size_t AllocSize = sizeof(StringMapEntry) + this->getKeyLength() + 1; this->~StringMapEntry(); Allocator.Deallocate(static_cast(this), AllocSize); } @@ -391,6 +414,16 @@ public: return try_emplace(KV.first, std::move(KV.second)); } + /// Inserts an element or assigns to the current element if the key already + /// exists. The return type is the same as try_emplace. + template + std::pair insert_or_assign(StringRef Key, V &&Val) { + auto Ret = try_emplace(Key, std::forward(Val)); + if (!Ret.second) + Ret.first->second = std::forward(Val); + return Ret; + } + /// Emplace a new element for the specified key into the map if the key isn't /// already in the map. The bool component of the returned pair is true /// if and only if the insertion takes place, and the iterator component of diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h index 4661b1e68b2..52baab17bed 100644 --- a/include/llvm/ADT/StringRef.h +++ b/include/llvm/ADT/StringRef.h @@ -67,6 +67,20 @@ namespace llvm { return ::memcmp(Lhs,Rhs,Length); } + // Constexpr version of std::strlen. + static constexpr size_t strLen(const char *Str) { +#if __cplusplus > 201402L + return std::char_traits::length(Str); +#elif __has_builtin(__builtin_strlen) || defined(__GNUC__) + return __builtin_strlen(Str); +#else + const char *Begin = Str; + while (*Str != '\0') + ++Str; + return Str - Begin; +#endif + } + public: /// @name Constructors /// @{ @@ -79,8 +93,8 @@ namespace llvm { StringRef(std::nullptr_t) = delete; /// Construct a string ref from a cstring. - /*implicit*/ StringRef(const char *Str) - : Data(Str), Length(Str ? ::strlen(Str) : 0) {} + /*implicit*/ constexpr StringRef(const char *Str) + : Data(Str), Length(Str ? strLen(Str) : 0) {} /// Construct a string ref from a pointer and length. /*implicit*/ constexpr StringRef(const char *data, size_t length) diff --git a/include/llvm/ADT/StringSet.h b/include/llvm/ADT/StringSet.h index af3a44a7b32..60be09d3c32 100644 --- a/include/llvm/ADT/StringSet.h +++ b/include/llvm/ADT/StringSet.h @@ -24,8 +24,8 @@ namespace llvm { /// StringSet - A wrapper for StringMap that provides set-like functionality. template - class StringSet : public StringMap { - using base = StringMap; + class StringSet : public StringMap { + using base = StringMap; public: StringSet() = default; @@ -37,13 +37,13 @@ namespace llvm { std::pair insert(StringRef Key) { assert(!Key.empty()); - return base::insert(std::make_pair(Key, '\0')); + return base::insert(std::make_pair(Key, None)); } template void insert(const InputIt &Begin, const InputIt &End) { for (auto It = Begin; It != End; ++It) - base::insert(std::make_pair(*It, '\0')); + base::insert(std::make_pair(*It, None)); } template diff --git a/include/llvm/ADT/TinyPtrVector.h b/include/llvm/ADT/TinyPtrVector.h index ac82451a9b2..6b76d35d4e9 100644 --- a/include/llvm/ADT/TinyPtrVector.h +++ b/include/llvm/ADT/TinyPtrVector.h @@ -31,6 +31,10 @@ class TinyPtrVector { public: using VecTy = SmallVector; using value_type = typename VecTy::value_type; + // EltTy must be the first pointer type so that is is true for the + // default-constructed PtrUnion. This allows an empty TinyPtrVector to + // naturally vend a begin/end iterator of type EltTy* without an additional + // check for the empty state. using PtrUnion = PointerUnion; private: @@ -96,14 +100,14 @@ public: if (RHS.Val.template is()) { V->clear(); V->push_back(RHS.front()); - RHS.Val = (EltTy)nullptr; + RHS.Val = EltTy(); return *this; } delete V; } Val = RHS.Val; - RHS.Val = (EltTy)nullptr; + RHS.Val = EltTy(); return *this; } @@ -213,9 +217,9 @@ public: EltTy operator[](unsigned i) const { assert(!Val.isNull() && "can't index into an empty vector"); - if (EltTy V = Val.template dyn_cast()) { + if (Val.template is()) { assert(i == 0 && "tinyvector index out of range"); - return V; + return Val.template get(); } assert(i < Val.template get()->size() && @@ -225,29 +229,29 @@ public: EltTy front() const { assert(!empty() && "vector empty"); - if (EltTy V = Val.template dyn_cast()) - return V; + if (Val.template is()) + return Val.template get(); return Val.template get()->front(); } EltTy back() const { assert(!empty() && "vector empty"); - if (EltTy V = Val.template dyn_cast()) - return V; + if (Val.template is()) + return Val.template get(); return Val.template get()->back(); } void push_back(EltTy NewVal) { - assert(NewVal && "Can't add a null value"); - // If we have nothing, add something. if (Val.isNull()) { Val = NewVal; + assert(!Val.isNull() && "Can't add a null value"); return; } // If we have a single value, convert to a vector. - if (EltTy V = Val.template dyn_cast()) { + if (Val.template is()) { + EltTy V = Val.template get(); Val = new VecTy(); Val.template get()->push_back(V); } @@ -267,7 +271,7 @@ public: void clear() { // If we have a single value, convert to empty. if (Val.template is()) { - Val = (EltTy)nullptr; + Val = EltTy(); } else if (VecTy *Vec = Val.template dyn_cast()) { // If we have a vector form, just clear it. Vec->clear(); @@ -282,7 +286,7 @@ public: // If we have a single value, convert to empty. if (Val.template is()) { if (I == begin()) - Val = (EltTy)nullptr; + Val = EltTy(); } else if (VecTy *Vec = Val.template dyn_cast()) { // multiple items in a vector; just do the erase, there is no // benefit to collapsing back to a pointer @@ -298,7 +302,7 @@ public: if (Val.template is()) { if (S == begin() && S != E) - Val = (EltTy)nullptr; + Val = EltTy(); } else if (VecTy *Vec = Val.template dyn_cast()) { return Vec->erase(S, E); } @@ -313,7 +317,8 @@ public: return std::prev(end()); } assert(!Val.isNull() && "Null value with non-end insert iterator."); - if (EltTy V = Val.template dyn_cast()) { + if (Val.template is()) { + EltTy V = Val.template get(); assert(I == begin()); Val = Elt; push_back(V); @@ -339,7 +344,8 @@ public: } Val = new VecTy(); - } else if (EltTy V = Val.template dyn_cast()) { + } else if (Val.template is()) { + EltTy V = Val.template get(); Val = new VecTy(); Val.template get()->push_back(V); } diff --git a/include/llvm/ADT/VariadicFunction.h b/include/llvm/ADT/VariadicFunction.h deleted file mode 100644 index 5aefb05ecdd..00000000000 --- a/include/llvm/ADT/VariadicFunction.h +++ /dev/null @@ -1,330 +0,0 @@ -//===- VariadicFunction.h - Variadic Functions ------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements compile-time type-safe variadic functions. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ADT_VARIADICFUNCTION_H -#define LLVM_ADT_VARIADICFUNCTION_H - -#include "llvm/ADT/ArrayRef.h" - -namespace llvm { - -// Define macros to aid in expanding a comma separated series with the index of -// the series pasted onto the last token. -#define LLVM_COMMA_JOIN1(x) x ## 0 -#define LLVM_COMMA_JOIN2(x) LLVM_COMMA_JOIN1(x), x ## 1 -#define LLVM_COMMA_JOIN3(x) LLVM_COMMA_JOIN2(x), x ## 2 -#define LLVM_COMMA_JOIN4(x) LLVM_COMMA_JOIN3(x), x ## 3 -#define LLVM_COMMA_JOIN5(x) LLVM_COMMA_JOIN4(x), x ## 4 -#define LLVM_COMMA_JOIN6(x) LLVM_COMMA_JOIN5(x), x ## 5 -#define LLVM_COMMA_JOIN7(x) LLVM_COMMA_JOIN6(x), x ## 6 -#define LLVM_COMMA_JOIN8(x) LLVM_COMMA_JOIN7(x), x ## 7 -#define LLVM_COMMA_JOIN9(x) LLVM_COMMA_JOIN8(x), x ## 8 -#define LLVM_COMMA_JOIN10(x) LLVM_COMMA_JOIN9(x), x ## 9 -#define LLVM_COMMA_JOIN11(x) LLVM_COMMA_JOIN10(x), x ## 10 -#define LLVM_COMMA_JOIN12(x) LLVM_COMMA_JOIN11(x), x ## 11 -#define LLVM_COMMA_JOIN13(x) LLVM_COMMA_JOIN12(x), x ## 12 -#define LLVM_COMMA_JOIN14(x) LLVM_COMMA_JOIN13(x), x ## 13 -#define LLVM_COMMA_JOIN15(x) LLVM_COMMA_JOIN14(x), x ## 14 -#define LLVM_COMMA_JOIN16(x) LLVM_COMMA_JOIN15(x), x ## 15 -#define LLVM_COMMA_JOIN17(x) LLVM_COMMA_JOIN16(x), x ## 16 -#define LLVM_COMMA_JOIN18(x) LLVM_COMMA_JOIN17(x), x ## 17 -#define LLVM_COMMA_JOIN19(x) LLVM_COMMA_JOIN18(x), x ## 18 -#define LLVM_COMMA_JOIN20(x) LLVM_COMMA_JOIN19(x), x ## 19 -#define LLVM_COMMA_JOIN21(x) LLVM_COMMA_JOIN20(x), x ## 20 -#define LLVM_COMMA_JOIN22(x) LLVM_COMMA_JOIN21(x), x ## 21 -#define LLVM_COMMA_JOIN23(x) LLVM_COMMA_JOIN22(x), x ## 22 -#define LLVM_COMMA_JOIN24(x) LLVM_COMMA_JOIN23(x), x ## 23 -#define LLVM_COMMA_JOIN25(x) LLVM_COMMA_JOIN24(x), x ## 24 -#define LLVM_COMMA_JOIN26(x) LLVM_COMMA_JOIN25(x), x ## 25 -#define LLVM_COMMA_JOIN27(x) LLVM_COMMA_JOIN26(x), x ## 26 -#define LLVM_COMMA_JOIN28(x) LLVM_COMMA_JOIN27(x), x ## 27 -#define LLVM_COMMA_JOIN29(x) LLVM_COMMA_JOIN28(x), x ## 28 -#define LLVM_COMMA_JOIN30(x) LLVM_COMMA_JOIN29(x), x ## 29 -#define LLVM_COMMA_JOIN31(x) LLVM_COMMA_JOIN30(x), x ## 30 -#define LLVM_COMMA_JOIN32(x) LLVM_COMMA_JOIN31(x), x ## 31 - -/// Class which can simulate a type-safe variadic function. -/// -/// The VariadicFunction class template makes it easy to define -/// type-safe variadic functions where all arguments have the same -/// type. -/// -/// Suppose we need a variadic function like this: -/// -/// ResultT Foo(const ArgT &A_0, const ArgT &A_1, ..., const ArgT &A_N); -/// -/// Instead of many overloads of Foo(), we only need to define a helper -/// function that takes an array of arguments: -/// -/// ResultT FooImpl(ArrayRef Args) { -/// // 'Args[i]' is a pointer to the i-th argument passed to Foo(). -/// ... -/// } -/// -/// and then define Foo() like this: -/// -/// const VariadicFunction Foo; -/// -/// VariadicFunction takes care of defining the overloads of Foo(). -/// -/// Actually, Foo is a function object (i.e. functor) instead of a plain -/// function. This object is stateless and its constructor/destructor -/// does nothing, so it's safe to create global objects and call Foo(...) at -/// any time. -/// -/// Sometimes we need a variadic function to have some fixed leading -/// arguments whose types may be different from that of the optional -/// arguments. For example: -/// -/// bool FullMatch(const StringRef &S, const RE &Regex, -/// const ArgT &A_0, ..., const ArgT &A_N); -/// -/// VariadicFunctionN is for such cases, where N is the number of fixed -/// arguments. It is like VariadicFunction, except that it takes N more -/// template arguments for the types of the fixed arguments: -/// -/// bool FullMatchImpl(const StringRef &S, const RE &Regex, -/// ArrayRef Args) { ... } -/// const VariadicFunction2 -/// FullMatch; -/// -/// Currently VariadicFunction and friends support up-to 3 -/// fixed leading arguments and up-to 32 optional arguments. -template )> -struct VariadicFunction { - ResultT operator()() const { - return Func(None); - } - -#define LLVM_DEFINE_OVERLOAD(N) \ - ResultT operator()(LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \ - const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \ - return Func(makeArrayRef(Args)); \ - } - LLVM_DEFINE_OVERLOAD(1) - LLVM_DEFINE_OVERLOAD(2) - LLVM_DEFINE_OVERLOAD(3) - LLVM_DEFINE_OVERLOAD(4) - LLVM_DEFINE_OVERLOAD(5) - LLVM_DEFINE_OVERLOAD(6) - LLVM_DEFINE_OVERLOAD(7) - LLVM_DEFINE_OVERLOAD(8) - LLVM_DEFINE_OVERLOAD(9) - LLVM_DEFINE_OVERLOAD(10) - LLVM_DEFINE_OVERLOAD(11) - LLVM_DEFINE_OVERLOAD(12) - LLVM_DEFINE_OVERLOAD(13) - LLVM_DEFINE_OVERLOAD(14) - LLVM_DEFINE_OVERLOAD(15) - LLVM_DEFINE_OVERLOAD(16) - LLVM_DEFINE_OVERLOAD(17) - LLVM_DEFINE_OVERLOAD(18) - LLVM_DEFINE_OVERLOAD(19) - LLVM_DEFINE_OVERLOAD(20) - LLVM_DEFINE_OVERLOAD(21) - LLVM_DEFINE_OVERLOAD(22) - LLVM_DEFINE_OVERLOAD(23) - LLVM_DEFINE_OVERLOAD(24) - LLVM_DEFINE_OVERLOAD(25) - LLVM_DEFINE_OVERLOAD(26) - LLVM_DEFINE_OVERLOAD(27) - LLVM_DEFINE_OVERLOAD(28) - LLVM_DEFINE_OVERLOAD(29) - LLVM_DEFINE_OVERLOAD(30) - LLVM_DEFINE_OVERLOAD(31) - LLVM_DEFINE_OVERLOAD(32) -#undef LLVM_DEFINE_OVERLOAD -}; - -template )> -struct VariadicFunction1 { - ResultT operator()(Param0T P0) const { - return Func(P0, None); - } - -#define LLVM_DEFINE_OVERLOAD(N) \ - ResultT operator()(Param0T P0, LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \ - const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \ - return Func(P0, makeArrayRef(Args)); \ - } - LLVM_DEFINE_OVERLOAD(1) - LLVM_DEFINE_OVERLOAD(2) - LLVM_DEFINE_OVERLOAD(3) - LLVM_DEFINE_OVERLOAD(4) - LLVM_DEFINE_OVERLOAD(5) - LLVM_DEFINE_OVERLOAD(6) - LLVM_DEFINE_OVERLOAD(7) - LLVM_DEFINE_OVERLOAD(8) - LLVM_DEFINE_OVERLOAD(9) - LLVM_DEFINE_OVERLOAD(10) - LLVM_DEFINE_OVERLOAD(11) - LLVM_DEFINE_OVERLOAD(12) - LLVM_DEFINE_OVERLOAD(13) - LLVM_DEFINE_OVERLOAD(14) - LLVM_DEFINE_OVERLOAD(15) - LLVM_DEFINE_OVERLOAD(16) - LLVM_DEFINE_OVERLOAD(17) - LLVM_DEFINE_OVERLOAD(18) - LLVM_DEFINE_OVERLOAD(19) - LLVM_DEFINE_OVERLOAD(20) - LLVM_DEFINE_OVERLOAD(21) - LLVM_DEFINE_OVERLOAD(22) - LLVM_DEFINE_OVERLOAD(23) - LLVM_DEFINE_OVERLOAD(24) - LLVM_DEFINE_OVERLOAD(25) - LLVM_DEFINE_OVERLOAD(26) - LLVM_DEFINE_OVERLOAD(27) - LLVM_DEFINE_OVERLOAD(28) - LLVM_DEFINE_OVERLOAD(29) - LLVM_DEFINE_OVERLOAD(30) - LLVM_DEFINE_OVERLOAD(31) - LLVM_DEFINE_OVERLOAD(32) -#undef LLVM_DEFINE_OVERLOAD -}; - -template )> -struct VariadicFunction2 { - ResultT operator()(Param0T P0, Param1T P1) const { - return Func(P0, P1, None); - } - -#define LLVM_DEFINE_OVERLOAD(N) \ - ResultT operator()(Param0T P0, Param1T P1, \ - LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \ - const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \ - return Func(P0, P1, makeArrayRef(Args)); \ - } - LLVM_DEFINE_OVERLOAD(1) - LLVM_DEFINE_OVERLOAD(2) - LLVM_DEFINE_OVERLOAD(3) - LLVM_DEFINE_OVERLOAD(4) - LLVM_DEFINE_OVERLOAD(5) - LLVM_DEFINE_OVERLOAD(6) - LLVM_DEFINE_OVERLOAD(7) - LLVM_DEFINE_OVERLOAD(8) - LLVM_DEFINE_OVERLOAD(9) - LLVM_DEFINE_OVERLOAD(10) - LLVM_DEFINE_OVERLOAD(11) - LLVM_DEFINE_OVERLOAD(12) - LLVM_DEFINE_OVERLOAD(13) - LLVM_DEFINE_OVERLOAD(14) - LLVM_DEFINE_OVERLOAD(15) - LLVM_DEFINE_OVERLOAD(16) - LLVM_DEFINE_OVERLOAD(17) - LLVM_DEFINE_OVERLOAD(18) - LLVM_DEFINE_OVERLOAD(19) - LLVM_DEFINE_OVERLOAD(20) - LLVM_DEFINE_OVERLOAD(21) - LLVM_DEFINE_OVERLOAD(22) - LLVM_DEFINE_OVERLOAD(23) - LLVM_DEFINE_OVERLOAD(24) - LLVM_DEFINE_OVERLOAD(25) - LLVM_DEFINE_OVERLOAD(26) - LLVM_DEFINE_OVERLOAD(27) - LLVM_DEFINE_OVERLOAD(28) - LLVM_DEFINE_OVERLOAD(29) - LLVM_DEFINE_OVERLOAD(30) - LLVM_DEFINE_OVERLOAD(31) - LLVM_DEFINE_OVERLOAD(32) -#undef LLVM_DEFINE_OVERLOAD -}; - -template )> -struct VariadicFunction3 { - ResultT operator()(Param0T P0, Param1T P1, Param2T P2) const { - return Func(P0, P1, P2, None); - } - -#define LLVM_DEFINE_OVERLOAD(N) \ - ResultT operator()(Param0T P0, Param1T P1, Param2T P2, \ - LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \ - const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \ - return Func(P0, P1, P2, makeArrayRef(Args)); \ - } - LLVM_DEFINE_OVERLOAD(1) - LLVM_DEFINE_OVERLOAD(2) - LLVM_DEFINE_OVERLOAD(3) - LLVM_DEFINE_OVERLOAD(4) - LLVM_DEFINE_OVERLOAD(5) - LLVM_DEFINE_OVERLOAD(6) - LLVM_DEFINE_OVERLOAD(7) - LLVM_DEFINE_OVERLOAD(8) - LLVM_DEFINE_OVERLOAD(9) - LLVM_DEFINE_OVERLOAD(10) - LLVM_DEFINE_OVERLOAD(11) - LLVM_DEFINE_OVERLOAD(12) - LLVM_DEFINE_OVERLOAD(13) - LLVM_DEFINE_OVERLOAD(14) - LLVM_DEFINE_OVERLOAD(15) - LLVM_DEFINE_OVERLOAD(16) - LLVM_DEFINE_OVERLOAD(17) - LLVM_DEFINE_OVERLOAD(18) - LLVM_DEFINE_OVERLOAD(19) - LLVM_DEFINE_OVERLOAD(20) - LLVM_DEFINE_OVERLOAD(21) - LLVM_DEFINE_OVERLOAD(22) - LLVM_DEFINE_OVERLOAD(23) - LLVM_DEFINE_OVERLOAD(24) - LLVM_DEFINE_OVERLOAD(25) - LLVM_DEFINE_OVERLOAD(26) - LLVM_DEFINE_OVERLOAD(27) - LLVM_DEFINE_OVERLOAD(28) - LLVM_DEFINE_OVERLOAD(29) - LLVM_DEFINE_OVERLOAD(30) - LLVM_DEFINE_OVERLOAD(31) - LLVM_DEFINE_OVERLOAD(32) -#undef LLVM_DEFINE_OVERLOAD -}; - -// Cleanup the macro namespace. -#undef LLVM_COMMA_JOIN1 -#undef LLVM_COMMA_JOIN2 -#undef LLVM_COMMA_JOIN3 -#undef LLVM_COMMA_JOIN4 -#undef LLVM_COMMA_JOIN5 -#undef LLVM_COMMA_JOIN6 -#undef LLVM_COMMA_JOIN7 -#undef LLVM_COMMA_JOIN8 -#undef LLVM_COMMA_JOIN9 -#undef LLVM_COMMA_JOIN10 -#undef LLVM_COMMA_JOIN11 -#undef LLVM_COMMA_JOIN12 -#undef LLVM_COMMA_JOIN13 -#undef LLVM_COMMA_JOIN14 -#undef LLVM_COMMA_JOIN15 -#undef LLVM_COMMA_JOIN16 -#undef LLVM_COMMA_JOIN17 -#undef LLVM_COMMA_JOIN18 -#undef LLVM_COMMA_JOIN19 -#undef LLVM_COMMA_JOIN20 -#undef LLVM_COMMA_JOIN21 -#undef LLVM_COMMA_JOIN22 -#undef LLVM_COMMA_JOIN23 -#undef LLVM_COMMA_JOIN24 -#undef LLVM_COMMA_JOIN25 -#undef LLVM_COMMA_JOIN26 -#undef LLVM_COMMA_JOIN27 -#undef LLVM_COMMA_JOIN28 -#undef LLVM_COMMA_JOIN29 -#undef LLVM_COMMA_JOIN30 -#undef LLVM_COMMA_JOIN31 -#undef LLVM_COMMA_JOIN32 - -} // end namespace llvm - -#endif // LLVM_ADT_VARIADICFUNCTION_H diff --git a/include/llvm/ADT/iterator_range.h b/include/llvm/ADT/iterator_range.h index 774c7c4e336..aa8830943ca 100644 --- a/include/llvm/ADT/iterator_range.h +++ b/include/llvm/ADT/iterator_range.h @@ -44,6 +44,7 @@ public: IteratorT begin() const { return begin_iterator; } IteratorT end() const { return end_iterator; } + bool empty() const { return begin_iterator == end_iterator; } }; /// Convenience function for iterating over sub-ranges. diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index 948341554f2..282142f51bb 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -949,7 +949,7 @@ template class AAResultBase { /// A pointer to the AAResults object that this AAResult is /// aggregated within. May be null if not aggregated. - AAResults *AAR; + AAResults *AAR = nullptr; /// Helper to dispatch calls back through the derived type. DerivedT &derived() { return static_cast(*this); } diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h index 34a509b7f4b..187317e3831 100644 --- a/include/llvm/Analysis/AliasSetTracker.h +++ b/include/llvm/Analysis/AliasSetTracker.h @@ -87,10 +87,11 @@ class AliasSet : public ilist_node { AAInfo = NewAAInfo; else { AAMDNodes Intersection(AAInfo.intersect(NewAAInfo)); - if (!Intersection) { + if (!Intersection.TBAA || !Intersection.Scope || + !Intersection.NoAlias) { // NewAAInfo conflicts with AAInfo. AAInfo = DenseMapInfo::getTombstoneKey(); - return SizeChanged; + SizeChanged = true; } AAInfo = Intersection; } diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h index b42846472f2..0efbd59023d 100644 --- a/include/llvm/Analysis/AssumptionCache.h +++ b/include/llvm/Analysis/AssumptionCache.h @@ -73,8 +73,8 @@ class AssumptionCache { /// Get the vector of assumptions which affect a value from the cache. SmallVector &getOrInsertAffectedValues(Value *V); - /// Copy affected values in the cache for OV to be affected values for NV. - void copyAffectedValuesInCache(Value *OV, Value *NV); + /// Move affected values in the cache for OV to be affected values for NV. + void transferAffectedValuesInCache(Value *OV, Value *NV); /// Flag tracking whether we have scanned the function yet. /// diff --git a/include/llvm/Analysis/CFG.h b/include/llvm/Analysis/CFG.h index bb55e76ac86..68f137ba622 100644 --- a/include/llvm/Analysis/CFG.h +++ b/include/llvm/Analysis/CFG.h @@ -46,6 +46,8 @@ unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ); /// bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges = false); +bool isCriticalEdge(const Instruction *TI, const BasicBlock *Succ, + bool AllowIdenticalEdges = false); /// Determine whether instruction 'To' is reachable from 'From', without passing /// through any blocks in ExclusionSet, returning true if uncertain. diff --git a/include/llvm/Analysis/CFLAndersAliasAnalysis.h b/include/llvm/Analysis/CFLAndersAliasAnalysis.h index 7c8b42b1d8d..5f5e52af3d8 100644 --- a/include/llvm/Analysis/CFLAndersAliasAnalysis.h +++ b/include/llvm/Analysis/CFLAndersAliasAnalysis.h @@ -41,7 +41,8 @@ class CFLAndersAAResult : public AAResultBase { class FunctionInfo; public: - explicit CFLAndersAAResult(const TargetLibraryInfo &TLI); + explicit CFLAndersAAResult( + std::function GetTLI); CFLAndersAAResult(CFLAndersAAResult &&RHS); ~CFLAndersAAResult(); @@ -74,7 +75,7 @@ private: /// Build summary for a given function FunctionInfo buildInfoFrom(const Function &); - const TargetLibraryInfo &TLI; + std::function GetTLI; /// Cached mapping of Functions to their StratifiedSets. /// If a function's sets are currently being built, it is marked diff --git a/include/llvm/Analysis/CFLSteensAliasAnalysis.h b/include/llvm/Analysis/CFLSteensAliasAnalysis.h index cc7a47cd9a5..135321616b7 100644 --- a/include/llvm/Analysis/CFLSteensAliasAnalysis.h +++ b/include/llvm/Analysis/CFLSteensAliasAnalysis.h @@ -42,7 +42,8 @@ class CFLSteensAAResult : public AAResultBase { class FunctionInfo; public: - explicit CFLSteensAAResult(const TargetLibraryInfo &TLI); + explicit CFLSteensAAResult( + std::function GetTLI); CFLSteensAAResult(CFLSteensAAResult &&Arg); ~CFLSteensAAResult(); @@ -90,7 +91,7 @@ public: } private: - const TargetLibraryInfo &TLI; + std::function GetTLI; /// Cached mapping of Functions to their StratifiedSets. /// If a function's sets are currently being built, it is marked diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h index 8af5fb86995..933f2210daf 100644 --- a/include/llvm/Analysis/CGSCCPassManager.h +++ b/include/llvm/Analysis/CGSCCPassManager.h @@ -88,6 +88,7 @@ #ifndef LLVM_ANALYSIS_CGSCCPASSMANAGER_H #define LLVM_ANALYSIS_CGSCCPASSMANAGER_H +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PriorityWorklist.h" #include "llvm/ADT/STLExtras.h" @@ -583,10 +584,12 @@ public: SmallVectorImpl &CallHandles) { assert(CallHandles.empty() && "Must start with a clear set of handles."); - SmallVector CallCounts; + SmallDenseMap CallCounts; + CallCount CountLocal = {0, 0}; for (LazyCallGraph::Node &N : C) { - CallCounts.push_back({0, 0}); - CallCount &Count = CallCounts.back(); + CallCount &Count = + CallCounts.insert(std::make_pair(&N.getFunction(), CountLocal)) + .first->second; for (Instruction &I : instructions(N.getFunction())) if (auto CS = CallSite(&I)) { if (CS.getCalledFunction()) { @@ -626,8 +629,6 @@ public: // Check that we didn't miss any update scenario. assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!"); assert(C->begin() != C->end() && "Cannot have an empty SCC!"); - assert((int)CallCounts.size() == C->size() && - "Cannot have changed the size of the SCC!"); // Check whether any of the handles were devirtualized. auto IsDevirtualizedHandle = [&](WeakTrackingVH &CallH) { @@ -642,7 +643,7 @@ public: if (!F) return false; - LLVM_DEBUG(dbgs() << "Found devirutalized call from " + LLVM_DEBUG(dbgs() << "Found devirtualized call from " << CS.getParent()->getParent()->getName() << " to " << F->getName() << "\n"); @@ -664,12 +665,20 @@ public: // manner of transformations such as DCE and other things, but seems to // work well in practice. if (!Devirt) - for (int i = 0, Size = C->size(); i < Size; ++i) - if (CallCounts[i].Indirect > NewCallCounts[i].Indirect && - CallCounts[i].Direct < NewCallCounts[i].Direct) { - Devirt = true; - break; + // Iterate over the keys in NewCallCounts, if Function also exists in + // CallCounts, make the check below. + for (auto &Pair : NewCallCounts) { + auto &CallCountNew = Pair.second; + auto CountIt = CallCounts.find(Pair.first); + if (CountIt != CallCounts.end()) { + const auto &CallCountOld = CountIt->second; + if (CallCountOld.Indirect > CallCountNew.Indirect && + CallCountOld.Direct < CallCountNew.Direct) { + Devirt = true; + break; + } } + } if (!Devirt) { PA.intersect(std::move(PassPA)); diff --git a/include/llvm/Analysis/CaptureTracking.h b/include/llvm/Analysis/CaptureTracking.h index ca7abd34fea..29921a51d5b 100644 --- a/include/llvm/Analysis/CaptureTracking.h +++ b/include/llvm/Analysis/CaptureTracking.h @@ -17,6 +17,7 @@ namespace llvm { class Value; class Use; + class DataLayout; class Instruction; class DominatorTree; class OrderedBasicBlock; @@ -83,6 +84,11 @@ namespace llvm { /// use U. Return true to stop the traversal or false to continue looking /// for more capturing instructions. virtual bool captured(const Use *U) = 0; + + /// isDereferenceableOrNull - Overload to allow clients with additional + /// knowledge about pointer dereferenceability to provide it and thereby + /// avoid conservative responses when a pointer is compared to null. + virtual bool isDereferenceableOrNull(Value *O, const DataLayout &DL); }; /// PointerMayBeCaptured - Visit the value and the values derived from it and diff --git a/include/llvm/Analysis/DDG.h b/include/llvm/Analysis/DDG.h new file mode 100644 index 00000000000..0e1eb9d2cda --- /dev/null +++ b/include/llvm/Analysis/DDG.h @@ -0,0 +1,430 @@ +//===- llvm/Analysis/DDG.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the Data-Dependence Graph (DDG). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DDG_H +#define LLVM_ANALYSIS_DDG_H + +#include "llvm/ADT/DirectedGraph.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/DependenceGraphBuilder.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/IR/Instructions.h" +#include + +namespace llvm { +class DDGNode; +class DDGEdge; +using DDGNodeBase = DGNode; +using DDGEdgeBase = DGEdge; +using DDGBase = DirectedGraph; +class LPMUpdater; + +/// Data Dependence Graph Node +/// The graph can represent the following types of nodes: +/// 1. Single instruction node containing just one instruction. +/// 2. Multiple instruction node where two or more instructions from +/// the same basic block are merged into one node. +/// 3. Root node is a special node that connects to all components such that +/// there is always a path from it to any node in the graph. +class DDGNode : public DDGNodeBase { +public: + using InstructionListType = SmallVectorImpl; + + enum class NodeKind { + Unknown, + SingleInstruction, + MultiInstruction, + Root, + }; + + DDGNode() = delete; + DDGNode(const NodeKind K) : DDGNodeBase(), Kind(K) {} + DDGNode(const DDGNode &N) : DDGNodeBase(N), Kind(N.Kind) {} + DDGNode(DDGNode &&N) : DDGNodeBase(std::move(N)), Kind(N.Kind) {} + virtual ~DDGNode() = 0; + + DDGNode &operator=(const DDGNode &N) { + DGNode::operator=(N); + Kind = N.Kind; + return *this; + } + + DDGNode &operator=(DDGNode &&N) { + DGNode::operator=(std::move(N)); + Kind = N.Kind; + return *this; + } + + /// Getter for the kind of this node. + NodeKind getKind() const { return Kind; } + + /// Collect a list of instructions, in \p IList, for which predicate \p Pred + /// evaluates to true when iterating over instructions of this node. Return + /// true if at least one instruction was collected, and false otherwise. + bool collectInstructions(llvm::function_ref const &Pred, + InstructionListType &IList) const; + +protected: + /// Setter for the kind of this node. + void setKind(NodeKind K) { Kind = K; } + +private: + NodeKind Kind; +}; + +/// Subclass of DDGNode representing the root node of the graph. +/// There should only be one such node in a given graph. +class RootDDGNode : public DDGNode { +public: + RootDDGNode() : DDGNode(NodeKind::Root) {} + RootDDGNode(const RootDDGNode &N) = delete; + RootDDGNode(RootDDGNode &&N) : DDGNode(std::move(N)) {} + ~RootDDGNode() {} + + /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc. + static bool classof(const DDGNode *N) { + return N->getKind() == NodeKind::Root; + } + static bool classof(const RootDDGNode *N) { return true; } +}; + +/// Subclass of DDGNode representing single or multi-instruction nodes. +class SimpleDDGNode : public DDGNode { +public: + SimpleDDGNode() = delete; + SimpleDDGNode(Instruction &I); + SimpleDDGNode(const SimpleDDGNode &N); + SimpleDDGNode(SimpleDDGNode &&N); + ~SimpleDDGNode(); + + SimpleDDGNode &operator=(const SimpleDDGNode &N) { + DDGNode::operator=(N); + InstList = N.InstList; + return *this; + } + + SimpleDDGNode &operator=(SimpleDDGNode &&N) { + DDGNode::operator=(std::move(N)); + InstList = std::move(N.InstList); + return *this; + } + + /// Get the list of instructions in this node. + const InstructionListType &getInstructions() const { + assert(!InstList.empty() && "Instruction List is empty."); + return InstList; + } + InstructionListType &getInstructions() { + return const_cast( + static_cast(this)->getInstructions()); + } + + /// Get the first/last instruction in the node. + Instruction *getFirstInstruction() const { return getInstructions().front(); } + Instruction *getLastInstruction() const { return getInstructions().back(); } + + /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc. + static bool classof(const DDGNode *N) { + return N->getKind() == NodeKind::SingleInstruction || + N->getKind() == NodeKind::MultiInstruction; + } + static bool classof(const SimpleDDGNode *N) { return true; } + +private: + /// Append the list of instructions in \p Input to this node. + void appendInstructions(const InstructionListType &Input) { + setKind((InstList.size() == 0 && Input.size() == 1) + ? NodeKind::SingleInstruction + : NodeKind::MultiInstruction); + InstList.insert(InstList.end(), Input.begin(), Input.end()); + } + void appendInstructions(const SimpleDDGNode &Input) { + appendInstructions(Input.getInstructions()); + } + + /// List of instructions associated with a single or multi-instruction node. + SmallVector InstList; +}; + +/// Data Dependency Graph Edge. +/// An edge in the DDG can represent a def-use relationship or +/// a memory dependence based on the result of DependenceAnalysis. +/// A rooted edge connects the root node to one of the components +/// of the graph. +class DDGEdge : public DDGEdgeBase { +public: + /// The kind of edge in the DDG + enum class EdgeKind { Unknown, RegisterDefUse, MemoryDependence, Rooted }; + + explicit DDGEdge(DDGNode &N) = delete; + DDGEdge(DDGNode &N, EdgeKind K) : DDGEdgeBase(N), Kind(K) {} + DDGEdge(const DDGEdge &E) : DDGEdgeBase(E), Kind(E.getKind()) {} + DDGEdge(DDGEdge &&E) : DDGEdgeBase(std::move(E)), Kind(E.Kind) {} + DDGEdge &operator=(const DDGEdge &E) { + DDGEdgeBase::operator=(E); + Kind = E.Kind; + return *this; + } + + DDGEdge &operator=(DDGEdge &&E) { + DDGEdgeBase::operator=(std::move(E)); + Kind = E.Kind; + return *this; + } + + /// Get the edge kind + EdgeKind getKind() const { return Kind; }; + + /// Return true if this is a def-use edge, and false otherwise. + bool isDefUse() const { return Kind == EdgeKind::RegisterDefUse; } + + /// Return true if this is a memory dependence edge, and false otherwise. + bool isMemoryDependence() const { return Kind == EdgeKind::MemoryDependence; } + + /// Return true if this is an edge stemming from the root node, and false + /// otherwise. + bool isRooted() const { return Kind == EdgeKind::Rooted; } + +private: + EdgeKind Kind; +}; + +/// Encapsulate some common data and functionality needed for different +/// variations of data dependence graphs. +template class DependenceGraphInfo { +public: + using DependenceList = SmallVector, 1>; + + DependenceGraphInfo() = delete; + DependenceGraphInfo(const DependenceGraphInfo &G) = delete; + DependenceGraphInfo(const std::string &N, const DependenceInfo &DepInfo) + : Name(N), DI(DepInfo), Root(nullptr) {} + DependenceGraphInfo(DependenceGraphInfo &&G) + : Name(std::move(G.Name)), DI(std::move(G.DI)), Root(G.Root) {} + virtual ~DependenceGraphInfo() {} + + /// Return the label that is used to name this graph. + const StringRef getName() const { return Name; } + + /// Return the root node of the graph. + NodeType &getRoot() const { + assert(Root && "Root node is not available yet. Graph construction may " + "still be in progress\n"); + return *Root; + } + +protected: + // Name of the graph. + std::string Name; + + // Store a copy of DependenceInfo in the graph, so that individual memory + // dependencies don't need to be stored. Instead when the dependence is + // queried it is recomputed using @DI. + const DependenceInfo DI; + + // A special node in the graph that has an edge to every connected component of + // the graph, to ensure all nodes are reachable in a graph walk. + NodeType *Root = nullptr; +}; + +using DDGInfo = DependenceGraphInfo; + +/// Data Dependency Graph +class DataDependenceGraph : public DDGBase, public DDGInfo { + friend class DDGBuilder; + +public: + using NodeType = DDGNode; + using EdgeType = DDGEdge; + + DataDependenceGraph() = delete; + DataDependenceGraph(const DataDependenceGraph &G) = delete; + DataDependenceGraph(DataDependenceGraph &&G) + : DDGBase(std::move(G)), DDGInfo(std::move(G)) {} + DataDependenceGraph(Function &F, DependenceInfo &DI); + DataDependenceGraph(const Loop &L, DependenceInfo &DI); + ~DataDependenceGraph(); + +protected: + /// Add node \p N to the graph, if it's not added yet, and keep track of + /// the root node. Return true if node is successfully added. + bool addNode(NodeType &N); + +}; + +/// Concrete implementation of a pure data dependence graph builder. This class +/// provides custom implementation for the pure-virtual functions used in the +/// generic dependence graph build algorithm. +/// +/// For information about time complexity of the build algorithm see the +/// comments near the declaration of AbstractDependenceGraphBuilder. +class DDGBuilder : public AbstractDependenceGraphBuilder { +public: + DDGBuilder(DataDependenceGraph &G, DependenceInfo &D, + const BasicBlockListType &BBs) + : AbstractDependenceGraphBuilder(G, D, BBs) {} + DDGNode &createRootNode() final override { + auto *RN = new RootDDGNode(); + assert(RN && "Failed to allocate memory for DDG root node."); + Graph.addNode(*RN); + return *RN; + } + DDGNode &createFineGrainedNode(Instruction &I) final override { + auto *SN = new SimpleDDGNode(I); + assert(SN && "Failed to allocate memory for simple DDG node."); + Graph.addNode(*SN); + return *SN; + } + DDGEdge &createDefUseEdge(DDGNode &Src, DDGNode &Tgt) final override { + auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::RegisterDefUse); + assert(E && "Failed to allocate memory for edge"); + Graph.connect(Src, Tgt, *E); + return *E; + } + DDGEdge &createMemoryEdge(DDGNode &Src, DDGNode &Tgt) final override { + auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::MemoryDependence); + assert(E && "Failed to allocate memory for edge"); + Graph.connect(Src, Tgt, *E); + return *E; + } + DDGEdge &createRootedEdge(DDGNode &Src, DDGNode &Tgt) final override { + auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::Rooted); + assert(E && "Failed to allocate memory for edge"); + assert(isa(Src) && "Expected root node"); + Graph.connect(Src, Tgt, *E); + return *E; + } + +}; + +raw_ostream &operator<<(raw_ostream &OS, const DDGNode &N); +raw_ostream &operator<<(raw_ostream &OS, const DDGNode::NodeKind K); +raw_ostream &operator<<(raw_ostream &OS, const DDGEdge &E); +raw_ostream &operator<<(raw_ostream &OS, const DDGEdge::EdgeKind K); +raw_ostream &operator<<(raw_ostream &OS, const DataDependenceGraph &G); + +//===--------------------------------------------------------------------===// +// DDG Analysis Passes +//===--------------------------------------------------------------------===// + +/// Analysis pass that builds the DDG for a loop. +class DDGAnalysis : public AnalysisInfoMixin { +public: + using Result = std::unique_ptr; + Result run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR); + +private: + friend AnalysisInfoMixin; + static AnalysisKey Key; +}; + +/// Textual printer pass for the DDG of a loop. +class DDGAnalysisPrinterPass : public PassInfoMixin { +public: + explicit DDGAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); + +private: + raw_ostream &OS; +}; + +//===--------------------------------------------------------------------===// +// GraphTraits specializations for the DDG +//===--------------------------------------------------------------------===// + +/// non-const versions of the grapth trait specializations for DDG +template <> struct GraphTraits { + using NodeRef = DDGNode *; + + static DDGNode *DDGGetTargetNode(DGEdge *P) { + return &P->getTargetNode(); + } + + // Provide a mapped iterator so that the GraphTrait-based implementations can + // find the target nodes without having to explicitly go through the edges. + using ChildIteratorType = + mapped_iterator; + using ChildEdgeIteratorType = DDGNode::iterator; + + static NodeRef getEntryNode(NodeRef N) { return N; } + static ChildIteratorType child_begin(NodeRef N) { + return ChildIteratorType(N->begin(), &DDGGetTargetNode); + } + static ChildIteratorType child_end(NodeRef N) { + return ChildIteratorType(N->end(), &DDGGetTargetNode); + } + + static ChildEdgeIteratorType child_edge_begin(NodeRef N) { + return N->begin(); + } + static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); } +}; + +template <> +struct GraphTraits : public GraphTraits { + using nodes_iterator = DataDependenceGraph::iterator; + static NodeRef getEntryNode(DataDependenceGraph *DG) { + return &DG->getRoot(); + } + static nodes_iterator nodes_begin(DataDependenceGraph *DG) { + return DG->begin(); + } + static nodes_iterator nodes_end(DataDependenceGraph *DG) { return DG->end(); } +}; + +/// const versions of the grapth trait specializations for DDG +template <> struct GraphTraits { + using NodeRef = const DDGNode *; + + static const DDGNode *DDGGetTargetNode(const DGEdge *P) { + return &P->getTargetNode(); + } + + // Provide a mapped iterator so that the GraphTrait-based implementations can + // find the target nodes without having to explicitly go through the edges. + using ChildIteratorType = + mapped_iterator; + using ChildEdgeIteratorType = DDGNode::const_iterator; + + static NodeRef getEntryNode(NodeRef N) { return N; } + static ChildIteratorType child_begin(NodeRef N) { + return ChildIteratorType(N->begin(), &DDGGetTargetNode); + } + static ChildIteratorType child_end(NodeRef N) { + return ChildIteratorType(N->end(), &DDGGetTargetNode); + } + + static ChildEdgeIteratorType child_edge_begin(NodeRef N) { + return N->begin(); + } + static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); } +}; + +template <> +struct GraphTraits + : public GraphTraits { + using nodes_iterator = DataDependenceGraph::const_iterator; + static NodeRef getEntryNode(const DataDependenceGraph *DG) { + return &DG->getRoot(); + } + static nodes_iterator nodes_begin(const DataDependenceGraph *DG) { + return DG->begin(); + } + static nodes_iterator nodes_end(const DataDependenceGraph *DG) { + return DG->end(); + } +}; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_DDG_H diff --git a/include/llvm/Analysis/DOTGraphTraitsPass.h b/include/llvm/Analysis/DOTGraphTraitsPass.h index 0410a331465..c9e8df5db1c 100644 --- a/include/llvm/Analysis/DOTGraphTraitsPass.h +++ b/include/llvm/Analysis/DOTGraphTraitsPass.h @@ -99,7 +99,7 @@ public: errs() << "Writing '" << Filename << "'..."; - raw_fd_ostream File(Filename, EC, sys::fs::F_Text); + raw_fd_ostream File(Filename, EC, sys::fs::OF_Text); std::string GraphName = DOTGraphTraits::getGraphName(Graph); std::string Title = GraphName + " for '" + F.getName().str() + "' function"; @@ -162,7 +162,7 @@ public: errs() << "Writing '" << Filename << "'..."; - raw_fd_ostream File(Filename, EC, sys::fs::F_Text); + raw_fd_ostream File(Filename, EC, sys::fs::OF_Text); std::string Title = DOTGraphTraits::getGraphName(Graph); if (!EC) diff --git a/include/llvm/Analysis/DependenceGraphBuilder.h b/include/llvm/Analysis/DependenceGraphBuilder.h new file mode 100644 index 00000000000..5f4bdb47043 --- /dev/null +++ b/include/llvm/Analysis/DependenceGraphBuilder.h @@ -0,0 +1,119 @@ +//===- llvm/Analysis/DependenceGraphBuilder.h -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a builder interface that can be used to populate dependence +// graphs such as DDG and PDG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H +#define LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H + +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Instructions.h" + +namespace llvm { + +/// This abstract builder class defines a set of high-level steps for creating +/// DDG-like graphs. The client code is expected to inherit from this class and +/// define concrete implementation for each of the pure virtual functions used +/// in the high-level algorithm. +template class AbstractDependenceGraphBuilder { +protected: + using BasicBlockListType = SmallVectorImpl; + +private: + using NodeType = typename GraphType::NodeType; + using EdgeType = typename GraphType::EdgeType; + +public: + using ClassesType = EquivalenceClasses; + using NodeListType = SmallVector; + + AbstractDependenceGraphBuilder(GraphType &G, DependenceInfo &D, + const BasicBlockListType &BBs) + : Graph(G), DI(D), BBList(BBs) {} + virtual ~AbstractDependenceGraphBuilder() {} + + /// The main entry to the graph construction algorithm. It starts by + /// creating nodes in increasing order of granularity and then + /// adds def-use and memory edges. + /// + /// The algorithmic complexity of this implementation is O(V^2 * I^2), where V + /// is the number of vertecies (nodes) and I is the number of instructions in + /// each node. The total number of instructions, N, is equal to V * I, + /// therefore the worst-case time complexity is O(N^2). The average time + /// complexity is O((N^2)/2). + void populate() { + createFineGrainedNodes(); + createDefUseEdges(); + createMemoryDependencyEdges(); + createAndConnectRootNode(); + } + + /// Create fine grained nodes. These are typically atomic nodes that + /// consist of a single instruction. + void createFineGrainedNodes(); + + /// Analyze the def-use chains and create edges from the nodes containing + /// definitions to the nodes containing the uses. + void createDefUseEdges(); + + /// Analyze data dependencies that exist between memory loads or stores, + /// in the graph nodes and create edges between them. + void createMemoryDependencyEdges(); + + /// Create a root node and add edges such that each node in the graph is + /// reachable from the root. + void createAndConnectRootNode(); + +protected: + /// Create the root node of the graph. + virtual NodeType &createRootNode() = 0; + + /// Create an atomic node in the graph given a single instruction. + virtual NodeType &createFineGrainedNode(Instruction &I) = 0; + + /// Create a def-use edge going from \p Src to \p Tgt. + virtual EdgeType &createDefUseEdge(NodeType &Src, NodeType &Tgt) = 0; + + /// Create a memory dependence edge going from \p Src to \p Tgt. + virtual EdgeType &createMemoryEdge(NodeType &Src, NodeType &Tgt) = 0; + + /// Create a rooted edge going from \p Src to \p Tgt . + virtual EdgeType &createRootedEdge(NodeType &Src, NodeType &Tgt) = 0; + + /// Deallocate memory of edge \p E. + virtual void destroyEdge(EdgeType &E) { delete &E; } + + /// Deallocate memory of node \p N. + virtual void destroyNode(NodeType &N) { delete &N; } + + /// Map types to map instructions to nodes used when populating the graph. + using InstToNodeMap = DenseMap; + + /// Reference to the graph that gets built by a concrete implementation of + /// this builder. + GraphType &Graph; + + /// Dependence information used to create memory dependence edges in the + /// graph. + DependenceInfo &DI; + + /// The list of basic blocks to consider when building the graph. + const BasicBlockListType &BBList; + + /// A mapping from instructions to the corresponding nodes in the graph. + InstToNodeMap IMap; +}; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H diff --git a/include/llvm/Analysis/DivergenceAnalysis.h b/include/llvm/Analysis/DivergenceAnalysis.h index 3cfb9d13df9..2fac9c8b4b3 100644 --- a/include/llvm/Analysis/DivergenceAnalysis.h +++ b/include/llvm/Analysis/DivergenceAnalysis.h @@ -73,9 +73,12 @@ public: /// operands bool isAlwaysUniform(const Value &Val) const; - /// \brief Whether \p Val is a divergent value + /// \brief Whether \p Val is divergent at its definition. bool isDivergent(const Value &Val) const; + /// \brief Whether \p U is divergent. Uses of a uniform value can be divergent. + bool isDivergentUse(const Use &U) const; + void print(raw_ostream &OS, const Module *) const; private: @@ -189,12 +192,19 @@ public: /// The GPU kernel this analysis result is for const Function &getFunction() const { return DA.getFunction(); } - /// Whether \p V is divergent. + /// Whether \p V is divergent at its definition. bool isDivergent(const Value &V) const; - /// Whether \p V is uniform/non-divergent + /// Whether \p U is divergent. Uses of a uniform value can be divergent. + bool isDivergentUse(const Use &U) const; + + /// Whether \p V is uniform/non-divergent. bool isUniform(const Value &V) const { return !isDivergent(V); } + /// Whether \p U is uniform/non-divergent. Uses of a uniform value can be + /// divergent. + bool isUniformUse(const Use &U) const { return !isDivergentUse(U); } + /// Print all divergent values in the kernel. void print(raw_ostream &OS, const Module *) const; }; diff --git a/include/llvm/Analysis/GlobalsModRef.h b/include/llvm/Analysis/GlobalsModRef.h index d3fcfc2d41a..5d1c5a05206 100644 --- a/include/llvm/Analysis/GlobalsModRef.h +++ b/include/llvm/Analysis/GlobalsModRef.h @@ -34,7 +34,7 @@ class GlobalsAAResult : public AAResultBase { class FunctionInfo; const DataLayout &DL; - const TargetLibraryInfo &TLI; + std::function GetTLI; /// The globals that do not have their addresses taken. SmallPtrSet NonAddressTakenGlobals; @@ -72,14 +72,18 @@ class GlobalsAAResult : public AAResultBase { /// could perform to the memory utilization here if this becomes a problem. std::list Handles; - explicit GlobalsAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI); + explicit GlobalsAAResult( + const DataLayout &DL, + std::function GetTLI); public: GlobalsAAResult(GlobalsAAResult &&Arg); ~GlobalsAAResult(); - static GlobalsAAResult analyzeModule(Module &M, const TargetLibraryInfo &TLI, - CallGraph &CG); + static GlobalsAAResult + analyzeModule(Module &M, + std::function GetTLI, + CallGraph &CG); //------------------------------------------------ // Implement the AliasAnalysis API diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h index 054ffca7215..a5ffca13046 100644 --- a/include/llvm/Analysis/InstructionSimplify.h +++ b/include/llvm/Analysis/InstructionSimplify.h @@ -31,6 +31,7 @@ #ifndef LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H #define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H +#include "llvm/ADT/SetVector.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Operator.h" #include "llvm/IR/User.h" @@ -141,6 +142,13 @@ Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q); +/// Given operands for the multiplication of a FMA, fold the result or return +/// null. In contrast to SimplifyFMulInst, this function will not perform +/// simplifications whose unrounded results differ when rounded to the argument +/// type. +Value *SimplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); + /// Given operands for a Mul, fold the result or return null. Value *SimplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); @@ -234,21 +242,19 @@ Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, /// Given operand for a UnaryOperator, fold the result or return null. Value *SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q); -/// Given operand for an FP UnaryOperator, fold the result or return null. -/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the -/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp. -Value *SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, - const SimplifyQuery &Q); +/// Given operand for a UnaryOperator, fold the result or return null. +/// Try to use FastMathFlags when folding the result. +Value *SimplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, + const SimplifyQuery &Q); /// Given operands for a BinaryOperator, fold the result or return null. Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q); -/// Given operands for an FP BinaryOperator, fold the result or return null. -/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the -/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. -Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - FastMathFlags FMF, const SimplifyQuery &Q); +/// Given operands for a BinaryOperator, fold the result or return null. +/// Try to use FastMathFlags when folding the result. +Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q); /// Given a callsite, fold the result or return null. Value *SimplifyCall(CallBase *Call, const SimplifyQuery &Q); @@ -263,12 +269,14 @@ Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, /// This first performs a normal RAUW of I with SimpleV. It then recursively /// attempts to simplify those users updated by the operation. The 'I' /// instruction must not be equal to the simplified value 'SimpleV'. +/// If UnsimplifiedUsers is provided, instructions that could not be simplified +/// are added to it. /// /// The function returns true if any simplifications were performed. -bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr); +bool replaceAndRecursivelySimplify( + Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr, + SmallSetVector *UnsimplifiedUsers = nullptr); /// Recursively attempt to simplify an instruction. /// diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h index 2d83929211e..20a35bef189 100644 --- a/include/llvm/Analysis/LazyCallGraph.h +++ b/include/llvm/Analysis/LazyCallGraph.h @@ -931,7 +931,8 @@ public: /// This sets up the graph and computes all of the entry points of the graph. /// No function definitions are scanned until their nodes in the graph are /// requested during traversal. - LazyCallGraph(Module &M, TargetLibraryInfo &TLI); + LazyCallGraph(Module &M, + function_ref GetTLI); LazyCallGraph(LazyCallGraph &&G); LazyCallGraph &operator=(LazyCallGraph &&RHS); @@ -1267,7 +1268,12 @@ public: /// This just builds the set of entry points to the call graph. The rest is /// built lazily as it is walked. LazyCallGraph run(Module &M, ModuleAnalysisManager &AM) { - return LazyCallGraph(M, AM.getResult(M)); + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; + return LazyCallGraph(M, GetTLI); } }; diff --git a/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/include/llvm/Analysis/LegacyDivergenceAnalysis.h index 0a338b81664..e33b8f4129f 100644 --- a/include/llvm/Analysis/LegacyDivergenceAnalysis.h +++ b/include/llvm/Analysis/LegacyDivergenceAnalysis.h @@ -39,17 +39,18 @@ public: void print(raw_ostream &OS, const Module *) const override; // Returns true if V is divergent at its definition. - // - // Even if this function returns false, V may still be divergent when used - // in a different basic block. bool isDivergent(const Value *V) const; + // Returns true if U is divergent. Uses of a uniform value can be divergent. + bool isDivergentUse(const Use *U) const; + // Returns true if V is uniform/non-divergent. - // - // Even if this function returns true, V may still be divergent when used - // in a different basic block. bool isUniform(const Value *V) const { return !isDivergent(V); } + // Returns true if U is uniform/non-divergent. Uses of a uniform value can be + // divergent. + bool isUniformUse(const Use *U) const { return !isDivergentUse(U); } + // Keep the analysis results uptodate by removing an erased value. void removeValue(const Value *V) { DivergentValues.erase(V); } @@ -62,6 +63,9 @@ private: // Stores all divergent values. DenseSet DivergentValues; + + // Stores divergent uses of possibly uniform values. + DenseSet DivergentUses; }; } // End llvm namespace diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h index 5df6bb02308..9604b2521e8 100644 --- a/include/llvm/Analysis/Loads.h +++ b/include/llvm/Analysis/Loads.h @@ -20,7 +20,9 @@ namespace llvm { class DataLayout; +class Loop; class MDNode; +class ScalarEvolution; /// Return true if this is always a dereferenceable pointer. If the context /// instruction is specified perform context-sensitive analysis and return true @@ -35,7 +37,8 @@ bool isDereferenceablePointer(const Value *V, Type *Ty, /// performs context-sensitive analysis and returns true if the pointer is /// dereferenceable at the specified instruction. bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, - unsigned Align, const DataLayout &DL, + MaybeAlign Alignment, + const DataLayout &DL, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr); @@ -43,7 +46,7 @@ bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, /// greater or equal than requested. If the context instruction is specified /// performs context-sensitive analysis and returns true if the pointer is /// dereferenceable at the specified instruction. -bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, +bool isDereferenceableAndAlignedPointer(const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr); @@ -56,11 +59,22 @@ bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, /// If it is not obviously safe to load from the specified pointer, we do a /// quick local scan of the basic block containing ScanFrom, to determine if /// the address is already accessed. -bool isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, +bool isSafeToLoadUnconditionally(Value *V, MaybeAlign Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom = nullptr, const DominatorTree *DT = nullptr); +/// Return true if we can prove that the given load (which is assumed to be +/// within the specified loop) would access only dereferenceable memory, and +/// be properly aligned on every iteration of the specified loop regardless of +/// its placement within the loop. (i.e. does not require predication beyond +/// that required by the the header itself and could be hoisted into the header +/// if desired.) This is more powerful than the variants above when the +/// address loaded from is analyzeable by SCEV. +bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, + ScalarEvolution &SE, + DominatorTree &DT); + /// Return true if we know that executing a load from this value cannot trap. /// /// If DT and ScanFrom are specified this method performs context-sensitive @@ -69,7 +83,7 @@ bool isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, /// If it is not obviously safe to load from the specified pointer, we do a /// quick local scan of the basic block containing ScanFrom, to determine if /// the address is already accessed. -bool isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align, +bool isSafeToLoadUnconditionally(Value *V, Type *Ty, MaybeAlign Alignment, const DataLayout &DL, Instruction *ScanFrom = nullptr, const DominatorTree *DT = nullptr); diff --git a/include/llvm/Analysis/LoopAnalysisManager.h b/include/llvm/Analysis/LoopAnalysisManager.h index 368a810cfa6..a2e65a7310a 100644 --- a/include/llvm/Analysis/LoopAnalysisManager.h +++ b/include/llvm/Analysis/LoopAnalysisManager.h @@ -86,8 +86,9 @@ typedef InnerAnalysisManagerProxy template <> class LoopAnalysisManagerFunctionProxy::Result { public: explicit Result(LoopAnalysisManager &InnerAM, LoopInfo &LI) - : InnerAM(&InnerAM), LI(&LI) {} - Result(Result &&Arg) : InnerAM(std::move(Arg.InnerAM)), LI(Arg.LI) { + : InnerAM(&InnerAM), LI(&LI), MSSAUsed(false) {} + Result(Result &&Arg) + : InnerAM(std::move(Arg.InnerAM)), LI(Arg.LI), MSSAUsed(Arg.MSSAUsed) { // We have to null out the analysis manager in the moved-from state // because we are taking ownership of the responsibilty to clear the // analysis state. @@ -96,6 +97,7 @@ public: Result &operator=(Result &&RHS) { InnerAM = RHS.InnerAM; LI = RHS.LI; + MSSAUsed = RHS.MSSAUsed; // We have to null out the analysis manager in the moved-from state // because we are taking ownership of the responsibilty to clear the // analysis state. @@ -112,6 +114,9 @@ public: InnerAM->clear(); } + /// Mark MemorySSA as used so we can invalidate self if MSSA is invalidated. + void markMSSAUsed() { MSSAUsed = true; } + /// Accessor for the analysis manager. LoopAnalysisManager &getManager() { return *InnerAM; } @@ -130,6 +135,7 @@ public: private: LoopAnalysisManager *InnerAM; LoopInfo *LI; + bool MSSAUsed; }; /// Provide a specialized run method for the \c LoopAnalysisManagerFunctionProxy diff --git a/include/llvm/Analysis/LoopCacheAnalysis.h b/include/llvm/Analysis/LoopCacheAnalysis.h new file mode 100644 index 00000000000..ffec78b6db2 --- /dev/null +++ b/include/llvm/Analysis/LoopCacheAnalysis.h @@ -0,0 +1,281 @@ +//===- llvm/Analysis/LoopCacheAnalysis.h ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the interface for the loop cache analysis. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H +#define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +class LPMUpdater; +using CacheCostTy = int64_t; +using LoopVectorTy = SmallVector; + +/// Represents a memory reference as a base pointer and a set of indexing +/// operations. For example given the array reference A[i][2j+1][3k+2] in a +/// 3-dim loop nest: +/// for(i=0;i A +/// Subscripts -> [{0,+,1}<%for.i>][{1,+,2}<%for.j>][{2,+,3}<%for.k>] +/// Sizes -> [m][o][4] +class IndexedReference { + friend raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R); + +public: + /// Construct an indexed reference given a \p StoreOrLoadInst instruction. + IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, + ScalarEvolution &SE); + + bool isValid() const { return IsValid; } + const SCEV *getBasePointer() const { return BasePointer; } + size_t getNumSubscripts() const { return Subscripts.size(); } + const SCEV *getSubscript(unsigned SubNum) const { + assert(SubNum < getNumSubscripts() && "Invalid subscript number"); + return Subscripts[SubNum]; + } + const SCEV *getFirstSubscript() const { + assert(!Subscripts.empty() && "Expecting non-empty container"); + return Subscripts.front(); + } + const SCEV *getLastSubscript() const { + assert(!Subscripts.empty() && "Expecting non-empty container"); + return Subscripts.back(); + } + + /// Return true/false if the current object and the indexed reference \p Other + /// are/aren't in the same cache line of size \p CLS. Two references are in + /// the same chace line iff the distance between them in the innermost + /// dimension is less than the cache line size. Return None if unsure. + Optional hasSpacialReuse(const IndexedReference &Other, unsigned CLS, + AliasAnalysis &AA) const; + + /// Return true if the current object and the indexed reference \p Other + /// have distance smaller than \p MaxDistance in the dimension associated with + /// the given loop \p L. Return false if the distance is not smaller than \p + /// MaxDistance and None if unsure. + Optional hasTemporalReuse(const IndexedReference &Other, + unsigned MaxDistance, const Loop &L, + DependenceInfo &DI, AliasAnalysis &AA) const; + + /// Compute the cost of the reference w.r.t. the given loop \p L when it is + /// considered in the innermost position in the loop nest. + /// The cost is defined as: + /// - equal to one if the reference is loop invariant, or + /// - equal to '(TripCount * stride) / cache_line_size' if: + /// + the reference stride is less than the cache line size, and + /// + the coefficient of this loop's index variable used in all other + /// subscripts is zero + /// - or otherwise equal to 'TripCount'. + CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const; + +private: + /// Attempt to delinearize the indexed reference. + bool delinearize(const LoopInfo &LI); + + /// Return true if the index reference is invariant with respect to loop \p L. + bool isLoopInvariant(const Loop &L) const; + + /// Return true if the indexed reference is 'consecutive' in loop \p L. + /// An indexed reference is 'consecutive' if the only coefficient that uses + /// the loop induction variable is the rightmost one, and the access stride is + /// smaller than the cache line size \p CLS. + bool isConsecutive(const Loop &L, unsigned CLS) const; + + /// Return the coefficient used in the rightmost dimension. + const SCEV *getLastCoefficient() const; + + /// Return true if the coefficient corresponding to induction variable of + /// loop \p L in the given \p Subscript is zero or is loop invariant in \p L. + bool isCoeffForLoopZeroOrInvariant(const SCEV &Subscript, + const Loop &L) const; + + /// Verify that the given \p Subscript is 'well formed' (must be a simple add + /// recurrence). + bool isSimpleAddRecurrence(const SCEV &Subscript, const Loop &L) const; + + /// Return true if the given reference \p Other is definetely aliased with + /// the indexed reference represented by this class. + bool isAliased(const IndexedReference &Other, AliasAnalysis &AA) const; + +private: + /// True if the reference can be delinearized, false otherwise. + bool IsValid = false; + + /// Represent the memory reference instruction. + Instruction &StoreOrLoadInst; + + /// The base pointer of the memory reference. + const SCEV *BasePointer = nullptr; + + /// The subscript (indexes) of the memory reference. + SmallVector Subscripts; + + /// The dimensions of the memory reference. + SmallVector Sizes; + + ScalarEvolution &SE; +}; + +/// A reference group represents a set of memory references that exhibit +/// temporal or spacial reuse. Two references belong to the same +/// reference group with respect to a inner loop L iff: +/// 1. they have a loop independent dependency, or +/// 2. they have a loop carried dependence with a small dependence distance +/// (e.g. less than 2) carried by the inner loop, or +/// 3. they refer to the same array, and the subscript in their innermost +/// dimension is less than or equal to 'd' (where 'd' is less than the cache +/// line size) +/// +/// Intuitively a reference group represents memory references that access +/// the same cache line. Conditions 1,2 above account for temporal reuse, while +/// contition 3 accounts for spacial reuse. +using ReferenceGroupTy = SmallVector, 8>; +using ReferenceGroupsTy = SmallVector; + +/// \c CacheCost represents the estimated cost of a inner loop as the number of +/// cache lines used by the memory references it contains. +/// The 'cache cost' of a loop 'L' in a loop nest 'LN' is computed as the sum of +/// the cache costs of all of its reference groups when the loop is considered +/// to be in the innermost position in the nest. +/// A reference group represents memory references that fall into the same cache +/// line. Each reference group is analysed with respect to the innermost loop in +/// a loop nest. The cost of a reference is defined as follow: +/// - one if it is loop invariant w.r.t the innermost loop, +/// - equal to the loop trip count divided by the cache line times the +/// reference stride if the reference stride is less than the cache line +/// size (CLS), and the coefficient of this loop's index variable used in all +/// other subscripts is zero (e.g. RefCost = TripCount/(CLS/RefStride)) +/// - equal to the innermost loop trip count if the reference stride is greater +/// or equal to the cache line size CLS. +class CacheCost { + friend raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC); + using LoopTripCountTy = std::pair; + using LoopCacheCostTy = std::pair; + +public: + static CacheCostTy constexpr InvalidCost = -1; + + /// Construct a CacheCost object for the loop nest described by \p Loops. + /// The optional parameter \p TRT can be used to specify the max. distance + /// between array elements accessed in a loop so that the elements are + /// classified to have temporal reuse. + CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, + TargetTransformInfo &TTI, AliasAnalysis &AA, DependenceInfo &DI, + Optional TRT = None); + + /// Create a CacheCost for the loop nest rooted by \p Root. + /// The optional parameter \p TRT can be used to specify the max. distance + /// between array elements accessed in a loop so that the elements are + /// classified to have temporal reuse. + static std::unique_ptr + getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, + Optional TRT = None); + + /// Return the estimated cost of loop \p L if the given loop is part of the + /// loop nest associated with this object. Return -1 otherwise. + CacheCostTy getLoopCost(const Loop &L) const { + auto IT = std::find_if( + LoopCosts.begin(), LoopCosts.end(), + [&L](const LoopCacheCostTy &LCC) { return LCC.first == &L; }); + return (IT != LoopCosts.end()) ? (*IT).second : -1; + } + + /// Return the estimated ordered loop costs. + const ArrayRef getLoopCosts() const { return LoopCosts; } + +private: + /// Calculate the cache footprint of each loop in the nest (when it is + /// considered to be in the innermost position). + void calculateCacheFootprint(); + + /// Partition store/load instructions in the loop nest into reference groups. + /// Two or more memory accesses belong in the same reference group if they + /// share the same cache line. + bool populateReferenceGroups(ReferenceGroupsTy &RefGroups) const; + + /// Calculate the cost of the given loop \p L assuming it is the innermost + /// loop in nest. + CacheCostTy computeLoopCacheCost(const Loop &L, + const ReferenceGroupsTy &RefGroups) const; + + /// Compute the cost of a representative reference in reference group \p RG + /// when the given loop \p L is considered as the innermost loop in the nest. + /// The computed cost is an estimate for the number of cache lines used by the + /// reference group. The representative reference cost is defined as: + /// - equal to one if the reference is loop invariant, or + /// - equal to '(TripCount * stride) / cache_line_size' if (a) loop \p L's + /// induction variable is used only in the reference subscript associated + /// with loop \p L, and (b) the reference stride is less than the cache + /// line size, or + /// - TripCount otherwise + CacheCostTy computeRefGroupCacheCost(const ReferenceGroupTy &RG, + const Loop &L) const; + + /// Sort the LoopCosts vector by decreasing cache cost. + void sortLoopCosts() { + sort(LoopCosts, [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) { + return A.second > B.second; + }); + } + +private: + /// Loops in the loop nest associated with this object. + LoopVectorTy Loops; + + /// Trip counts for the loops in the loop nest associated with this object. + SmallVector TripCounts; + + /// Cache costs for the loops in the loop nest associated with this object. + SmallVector LoopCosts; + + /// The max. distance between array elements accessed in a loop so that the + /// elements are classified to have temporal reuse. + Optional TRT; + + const LoopInfo &LI; + ScalarEvolution &SE; + TargetTransformInfo &TTI; + AliasAnalysis &AA; + DependenceInfo &DI; +}; + +raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R); +raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC); + +/// Printer pass for the \c CacheCost results. +class LoopCachePrinterPass : public PassInfoMixin { + raw_ostream &OS; + +public: + explicit LoopCachePrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_LOOPCACHEANALYSIS_H diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 584eb3a8c85..abf3863b060 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -30,6 +30,9 @@ // instance. In particular, a Loop might be inside such a non-loop SCC, or a // non-loop SCC might contain a sub-SCC which is a Loop. // +// For an overview of terminology used in this API (and thus all of our loop +// analyses or transforms), see docs/LoopTerminology.rst. +// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_LOOPINFO_H @@ -570,9 +573,9 @@ public: bool getIncomingAndBackEdge(BasicBlock *&Incoming, BasicBlock *&Backedge) const; - /// Below are some utilities to get loop bounds and induction variable, and - /// check if a given phinode is an auxiliary induction variable, as well as - /// checking if the loop is canonical. + /// Below are some utilities to get the loop guard, loop bounds and induction + /// variable, and to check if a given phinode is an auxiliary induction + /// variable, if the loop is guarded, and if the loop is canonical. /// /// Here is an example: /// \code @@ -604,6 +607,9 @@ public: /// /// - getInductionVariable --> i_1 /// - isAuxiliaryInductionVariable(x) --> true if x == i_1 + /// - getLoopGuardBranch() + /// --> `if (guardcmp) goto preheader; else goto afterloop` + /// - isGuarded() --> true /// - isCanonical --> false struct LoopBounds { /// Return the LoopBounds object if @@ -725,6 +731,31 @@ public: bool isAuxiliaryInductionVariable(PHINode &AuxIndVar, ScalarEvolution &SE) const; + /// Return the loop guard branch, if it exists. + /// + /// This currently only works on simplified loop, as it requires a preheader + /// and a latch to identify the guard. It will work on loops of the form: + /// \code + /// GuardBB: + /// br cond1, Preheader, ExitSucc <== GuardBranch + /// Preheader: + /// br Header + /// Header: + /// ... + /// br Latch + /// Latch: + /// br cond2, Header, ExitBlock + /// ExitBlock: + /// br ExitSucc + /// ExitSucc: + /// \endcode + BranchInst *getLoopGuardBranch() const; + + /// Return true iff the loop is + /// - in simplify rotated form, and + /// - guarded by a loop guard branch. + bool isGuarded() const { return (getLoopGuardBranch() != nullptr); } + /// Return true if the loop induction variable starts at zero and increments /// by one each time through the loop. bool isCanonical(ScalarEvolution &SE) const; diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h index 4c33dac9e21..8b11e848a19 100644 --- a/include/llvm/Analysis/LoopInfoImpl.h +++ b/include/llvm/Analysis/LoopInfoImpl.h @@ -85,9 +85,9 @@ template bool LoopBase::hasDedicatedExits() const { // Each predecessor of each exit block of a normal loop is contained // within the loop. - SmallVector ExitBlocks; - getExitBlocks(ExitBlocks); - for (BlockT *EB : ExitBlocks) + SmallVector UniqueExitBlocks; + getUniqueExitBlocks(UniqueExitBlocks); + for (BlockT *EB : UniqueExitBlocks) for (BlockT *Predecessor : children>(EB)) if (!contains(Predecessor)) return false; @@ -200,8 +200,6 @@ BlockT *LoopBase::getLoopPredecessor() const { } } - // Make sure there is only one exit out of the preheader. - assert(Out && "Header of loop has no predecessors from outside loop?"); return Out; } diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h index 49f9e58ffad..a89d76b9e5b 100644 --- a/include/llvm/Analysis/MemoryBuiltins.h +++ b/include/llvm/Analysis/MemoryBuiltins.h @@ -58,6 +58,9 @@ class Value; /// like). bool isAllocationFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast = false); +bool isAllocationFn(const Value *V, + function_ref GetTLI, + bool LookThroughBitCast = false); /// Tests if a value is a call or invoke to a function that returns a /// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions). @@ -68,6 +71,9 @@ bool isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, /// allocates uninitialized memory (such as malloc). bool isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast = false); +bool isMallocLikeFn(const Value *V, + function_ref GetTLI, + bool LookThroughBitCast = false); /// Tests if a value is a call or invoke to a library function that /// allocates zero-filled memory (such as calloc). @@ -93,6 +99,16 @@ bool isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, /// reallocates memory (e.g., realloc). bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI); +/// Tests if a value is a call or invoke to a library function that +/// allocates memory and throws if an allocation failed (e.g., new). +bool isOpNewLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); + +/// Tests if a value is a call or invoke to a library function that +/// allocates memory (strdup, strndup). +bool isStrdupLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); + //===----------------------------------------------------------------------===// // malloc Call Utility Functions. // @@ -100,9 +116,13 @@ bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI); /// extractMallocCall - Returns the corresponding CallInst if the instruction /// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we /// ignore InvokeInst here. -const CallInst *extractMallocCall(const Value *I, const TargetLibraryInfo *TLI); -inline CallInst *extractMallocCall(Value *I, const TargetLibraryInfo *TLI) { - return const_cast(extractMallocCall((const Value*)I, TLI)); +const CallInst * +extractMallocCall(const Value *I, + function_ref GetTLI); +inline CallInst * +extractMallocCall(Value *I, + function_ref GetTLI) { + return const_cast(extractMallocCall((const Value *)I, GetTLI)); } /// getMallocType - Returns the PointerType resulting from the malloc call. diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h index e2669c2fa60..e89e5690fad 100644 --- a/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -362,11 +362,14 @@ private: PhiValues &PV; PredIteratorCache PredCache; + unsigned DefaultBlockScanLimit; + public: MemoryDependenceResults(AliasAnalysis &AA, AssumptionCache &AC, - const TargetLibraryInfo &TLI, - DominatorTree &DT, PhiValues &PV) - : AA(AA), AC(AC), TLI(TLI), DT(DT), PV(PV) {} + const TargetLibraryInfo &TLI, DominatorTree &DT, + PhiValues &PV, unsigned DefaultBlockScanLimit) + : AA(AA), AC(AC), TLI(TLI), DT(DT), PV(PV), + DefaultBlockScanLimit(DefaultBlockScanLimit) {} /// Handle invalidation in the new PM. bool invalidate(Function &F, const PreservedAnalyses &PA, @@ -511,9 +514,14 @@ class MemoryDependenceAnalysis static AnalysisKey Key; + unsigned DefaultBlockScanLimit; + public: using Result = MemoryDependenceResults; + MemoryDependenceAnalysis(); + MemoryDependenceAnalysis(unsigned DefaultBlockScanLimit) : DefaultBlockScanLimit(DefaultBlockScanLimit) { } + MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM); }; diff --git a/include/llvm/Analysis/MemorySSA.h b/include/llvm/Analysis/MemorySSA.h index b7730be7535..e89bf26a723 100644 --- a/include/llvm/Analysis/MemorySSA.h +++ b/include/llvm/Analysis/MemorySSA.h @@ -793,6 +793,7 @@ protected: friend class MemorySSAPrinterLegacyPass; friend class MemorySSAUpdater; + void verifyPrevDefInPhis(Function &F) const; void verifyDefUses(Function &F) const; void verifyDomination(Function &F) const; void verifyOrdering(Function &F) const; @@ -830,7 +831,8 @@ protected: void insertIntoListsBefore(MemoryAccess *, const BasicBlock *, AccessList::iterator); MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *, - const MemoryUseOrDef *Template = nullptr); + const MemoryUseOrDef *Template = nullptr, + bool CreationMustSucceed = true); private: template class ClobberWalkerBase; diff --git a/include/llvm/Analysis/MemorySSAUpdater.h b/include/llvm/Analysis/MemorySSAUpdater.h index d4d8040c1ff..1d34663721e 100644 --- a/include/llvm/Analysis/MemorySSAUpdater.h +++ b/include/llvm/Analysis/MemorySSAUpdater.h @@ -99,7 +99,7 @@ public: /// load a /// Where a mayalias b, *does* require RenameUses be set to true. void insertDef(MemoryDef *Def, bool RenameUses = false); - void insertUse(MemoryUse *Use); + void insertUse(MemoryUse *Use, bool RenameUses = false); /// Update the MemoryPhi in `To` following an edge deletion between `From` and /// `To`. If `To` becomes unreachable, a call to removeBlocks should be made. void removeEdge(BasicBlock *From, BasicBlock *To); @@ -275,6 +275,7 @@ private: getPreviousDefRecursive(BasicBlock *, DenseMap> &); MemoryAccess *recursePhi(MemoryAccess *Phi); + MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi); template MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands); void tryRemoveTrivialPhis(ArrayRef UpdatedPHIs); diff --git a/include/llvm/Analysis/MustExecute.h b/include/llvm/Analysis/MustExecute.h index 3ef539c89d9..87cf9f85c7f 100644 --- a/include/llvm/Analysis/MustExecute.h +++ b/include/llvm/Analysis/MustExecute.h @@ -7,10 +7,17 @@ //===----------------------------------------------------------------------===// /// \file /// Contains a collection of routines for determining if a given instruction is -/// guaranteed to execute if a given point in control flow is reached. The most +/// guaranteed to execute if a given point in control flow is reached. The most /// common example is an instruction within a loop being provably executed if we /// branch to the header of it's containing loop. /// +/// There are two interfaces available to determine if an instruction is +/// executed once a given point in the control flow is reached: +/// 1) A loop-centric one derived from LoopSafetyInfo. +/// 2) A "must be executed context"-based one implemented in the +/// MustBeExecutedContextExplorer. +/// Please refer to the class comments for more information. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_MUSTEXECUTE_H @@ -164,6 +171,280 @@ public: virtual ~ICFLoopSafetyInfo() {}; }; -} +struct MustBeExecutedContextExplorer; + +/// Must be executed iterators visit stretches of instructions that are +/// guaranteed to be executed together, potentially with other instruction +/// executed in-between. +/// +/// Given the following code, and assuming all statements are single +/// instructions which transfer execution to the successor (see +/// isGuaranteedToTransferExecutionToSuccessor), there are two possible +/// outcomes. If we start the iterator at A, B, or E, we will visit only A, B, +/// and E. If we start at C or D, we will visit all instructions A-E. +/// +/// \code +/// A; +/// B; +/// if (...) { +/// C; +/// D; +/// } +/// E; +/// \endcode +/// +/// +/// Below is the example extneded with instructions F and G. Now we assume F +/// might not transfer execution to it's successor G. As a result we get the +/// following visit sets: +/// +/// Start Instruction | Visit Set +/// A | A, B, E, F +/// B | A, B, E, F +/// C | A, B, C, D, E, F +/// D | A, B, C, D, E, F +/// E | A, B, E, F +/// F | A, B, E, F +/// G | A, B, E, F, G +/// +/// +/// \code +/// A; +/// B; +/// if (...) { +/// C; +/// D; +/// } +/// E; +/// F; // Might not transfer execution to its successor G. +/// G; +/// \endcode +/// +/// +/// A more complex example involving conditionals, loops, break, and continue +/// is shown below. We again assume all instructions will transmit control to +/// the successor and we assume we can prove the inner loop to be finite. We +/// omit non-trivial branch conditions as the exploration is oblivious to them. +/// Constant branches are assumed to be unconditional in the CFG. The resulting +/// visist sets are shown in the table below. +/// +/// \code +/// A; +/// while (true) { +/// B; +/// if (...) +/// C; +/// if (...) +/// continue; +/// D; +/// if (...) +/// break; +/// do { +/// if (...) +/// continue; +/// E; +/// } while (...); +/// F; +/// } +/// G; +/// \endcode +/// +/// Start Instruction | Visit Set +/// A | A, B +/// B | A, B +/// C | A, B, C +/// D | A, B, D +/// E | A, B, D, E, F +/// F | A, B, D, F +/// G | A, B, D, G +/// +/// +/// Note that the examples show optimal visist sets but not necessarily the ones +/// derived by the explorer depending on the available CFG analyses (see +/// MustBeExecutedContextExplorer). Also note that we, depending on the options, +/// the visit set can contain instructions from other functions. +struct MustBeExecutedIterator { + /// Type declarations that make his class an input iterator. + ///{ + typedef const Instruction *value_type; + typedef std::ptrdiff_t difference_type; + typedef const Instruction **pointer; + typedef const Instruction *&reference; + typedef std::input_iterator_tag iterator_category; + ///} + + using ExplorerTy = MustBeExecutedContextExplorer; + + MustBeExecutedIterator(const MustBeExecutedIterator &Other) + : Visited(Other.Visited), Explorer(Other.Explorer), + CurInst(Other.CurInst) {} + + MustBeExecutedIterator(MustBeExecutedIterator &&Other) + : Visited(std::move(Other.Visited)), Explorer(Other.Explorer), + CurInst(Other.CurInst) {} + + MustBeExecutedIterator &operator=(MustBeExecutedIterator &&Other) { + if (this != &Other) { + std::swap(Visited, Other.Visited); + std::swap(CurInst, Other.CurInst); + } + return *this; + } + + ~MustBeExecutedIterator() {} + + /// Pre- and post-increment operators. + ///{ + MustBeExecutedIterator &operator++() { + CurInst = advance(); + return *this; + } + + MustBeExecutedIterator operator++(int) { + MustBeExecutedIterator tmp(*this); + operator++(); + return tmp; + } + ///} + + /// Equality and inequality operators. Note that we ignore the history here. + ///{ + bool operator==(const MustBeExecutedIterator &Other) const { + return CurInst == Other.CurInst; + } + + bool operator!=(const MustBeExecutedIterator &Other) const { + return !(*this == Other); + } + ///} + + /// Return the underlying instruction. + const Instruction *&operator*() { return CurInst; } + const Instruction *getCurrentInst() const { return CurInst; } + + /// Return true if \p I was encountered by this iterator already. + bool count(const Instruction *I) const { return Visited.count(I); } + +private: + using VisitedSetTy = DenseSet; + + /// Private constructors. + MustBeExecutedIterator(ExplorerTy &Explorer, const Instruction *I); + + /// Reset the iterator to its initial state pointing at \p I. + void reset(const Instruction *I); + + /// Try to advance one of the underlying positions (Head or Tail). + /// + /// \return The next instruction in the must be executed context, or nullptr + /// if none was found. + const Instruction *advance(); + + /// A set to track the visited instructions in order to deal with endless + /// loops and recursion. + VisitedSetTy Visited; + + /// A reference to the explorer that created this iterator. + ExplorerTy &Explorer; + + /// The instruction we are currently exposing to the user. There is always an + /// instruction that we know is executed with the given program point, + /// initially the program point itself. + const Instruction *CurInst; + + friend struct MustBeExecutedContextExplorer; +}; + +/// A "must be executed context" for a given program point PP is the set of +/// instructions, potentially before and after PP, that are executed always when +/// PP is reached. The MustBeExecutedContextExplorer an interface to explore +/// "must be executed contexts" in a module through the use of +/// MustBeExecutedIterator. +/// +/// The explorer exposes "must be executed iterators" that traverse the must be +/// executed context. There is little information sharing between iterators as +/// the expected use case involves few iterators for "far apart" instructions. +/// If that changes, we should consider caching more intermediate results. +struct MustBeExecutedContextExplorer { + + /// In the description of the parameters we use PP to denote a program point + /// for which the must be executed context is explored, or put differently, + /// for which the MustBeExecutedIterator is created. + /// + /// \param ExploreInterBlock Flag to indicate if instructions in blocks + /// other than the parent of PP should be + /// explored. + MustBeExecutedContextExplorer(bool ExploreInterBlock) + : ExploreInterBlock(ExploreInterBlock), EndIterator(*this, nullptr) {} + + /// Clean up the dynamically allocated iterators. + ~MustBeExecutedContextExplorer() { + DeleteContainerSeconds(InstructionIteratorMap); + } + + /// Iterator-based interface. \see MustBeExecutedIterator. + ///{ + using iterator = MustBeExecutedIterator; + using const_iterator = const MustBeExecutedIterator; + + /// Return an iterator to explore the context around \p PP. + iterator &begin(const Instruction *PP) { + auto *&It = InstructionIteratorMap[PP]; + if (!It) + It = new iterator(*this, PP); + return *It; + } + + /// Return an iterator to explore the cached context around \p PP. + const_iterator &begin(const Instruction *PP) const { + return *InstructionIteratorMap.lookup(PP); + } + + /// Return an universal end iterator. + ///{ + iterator &end() { return EndIterator; } + iterator &end(const Instruction *) { return EndIterator; } + + const_iterator &end() const { return EndIterator; } + const_iterator &end(const Instruction *) const { return EndIterator; } + ///} + + /// Return an iterator range to explore the context around \p PP. + llvm::iterator_range range(const Instruction *PP) { + return llvm::make_range(begin(PP), end(PP)); + } + + /// Return an iterator range to explore the cached context around \p PP. + llvm::iterator_range range(const Instruction *PP) const { + return llvm::make_range(begin(PP), end(PP)); + } + ///} + + /// Return the next instruction that is guaranteed to be executed after \p PP. + /// + /// \param It The iterator that is used to traverse the must be + /// executed context. + /// \param PP The program point for which the next instruction + /// that is guaranteed to execute is determined. + const Instruction * + getMustBeExecutedNextInstruction(MustBeExecutedIterator &It, + const Instruction *PP); + + /// Parameter that limit the performed exploration. See the constructor for + /// their meaning. + ///{ + const bool ExploreInterBlock; + ///} + +private: + /// Map from instructions to associated must be executed iterators. + DenseMap + InstructionIteratorMap; + + /// A unique end iterator. + MustBeExecutedIterator EndIterator; +}; + +} // namespace llvm #endif diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h index d9c97dff8c6..8562519fa7b 100644 --- a/include/llvm/Analysis/Passes.h +++ b/include/llvm/Analysis/Passes.h @@ -103,6 +103,13 @@ namespace llvm { // FunctionPass *createMustExecutePrinter(); + //===--------------------------------------------------------------------===// + // + // createMustBeExecutedContextPrinter - This pass prints information about which + // instructions are guaranteed to execute together (run with -analyze). + // + ModulePass *createMustBeExecutedContextPrinter(); + } #endif diff --git a/include/llvm/Analysis/ProfileSummaryInfo.h b/include/llvm/Analysis/ProfileSummaryInfo.h index f309d344b8d..6693e40ccf2 100644 --- a/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/include/llvm/Analysis/ProfileSummaryInfo.h @@ -52,6 +52,15 @@ private: // because the number of profile counts required to reach the hot // percentile is above a huge threshold. Optional HasHugeWorkingSetSize; + // True if the working set size of the code is considered large, + // because the number of profile counts required to reach the hot + // percentile is above a large threshold. + Optional HasLargeWorkingSetSize; + // Compute the threshold for a given cutoff. + Optional computeThreshold(int PercentileCutoff); + // The map that caches the threshold values. The keys are the percentile + // cutoff values and the values are the corresponding threshold values. + DenseMap ThresholdCache; public: ProfileSummaryInfo(Module &M) : M(M) {} @@ -96,6 +105,8 @@ public: bool AllowSynthetic = false); /// Returns true if the working set size of the code is considered huge. bool hasHugeWorkingSetSize(); + /// Returns true if the working set size of the code is considered large. + bool hasLargeWorkingSetSize(); /// Returns true if \p F has hot function entry. bool isFunctionEntryHot(const Function *F); /// Returns true if \p F contains hot code. @@ -104,14 +115,26 @@ public: bool isFunctionEntryCold(const Function *F); /// Returns true if \p F contains only cold code. bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI); + /// Returns true if \p F contains hot code with regard to a given hot + /// percentile cutoff value. + bool isFunctionHotInCallGraphNthPercentile(int PercentileCutoff, + const Function *F, + BlockFrequencyInfo &BFI); /// Returns true if count \p C is considered hot. bool isHotCount(uint64_t C); /// Returns true if count \p C is considered cold. bool isColdCount(uint64_t C); + /// Returns true if count \p C is considered hot with regard to a given + /// hot percentile cutoff value. + bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C); /// Returns true if BasicBlock \p BB is considered hot. bool isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI); /// Returns true if BasicBlock \p BB is considered cold. bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI); + /// Returns true if BasicBlock \p BB is considered hot with regard to a given + /// hot percentile cutoff value. + bool isHotBlockNthPercentile(int PercentileCutoff, + const BasicBlock *BB, BlockFrequencyInfo *BFI); /// Returns true if CallSite \p CS is considered hot. bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI); /// Returns true if Callsite \p CS is considered cold. diff --git a/include/llvm/Analysis/RegionInfoImpl.h b/include/llvm/Analysis/RegionInfoImpl.h index c59c09dd209..6b5936680c3 100644 --- a/include/llvm/Analysis/RegionInfoImpl.h +++ b/include/llvm/Analysis/RegionInfoImpl.h @@ -365,7 +365,7 @@ typename Tr::RegionNodeT *RegionBase::getBBNode(BlockT *BB) const { auto Deconst = const_cast *>(this); typename BBNodeMapT::value_type V = { BB, - llvm::make_unique(static_cast(Deconst), BB)}; + std::make_unique(static_cast(Deconst), BB)}; at = BBNodeMap.insert(std::move(V)).first; } return at->second.get(); diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 0bd98ef37e7..9c55f7a5090 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -468,6 +468,8 @@ template <> struct DenseMapInfo { /// can't do much with the SCEV objects directly, they must ask this class /// for services. class ScalarEvolution { + friend class ScalarEvolutionsTest; + public: /// An enum describing the relationship between a SCEV and a loop. enum LoopDisposition { @@ -777,10 +779,10 @@ public: /// to (i.e. a "conservative over-approximation") of the value returend by /// getBackedgeTakenCount. If such a value cannot be computed, it returns the /// SCEVCouldNotCompute object. - const SCEV *getMaxBackedgeTakenCount(const Loop *L); + const SCEV *getConstantMaxBackedgeTakenCount(const Loop *L); /// Return true if the backedge taken count is either the value returned by - /// getMaxBackedgeTakenCount or zero. + /// getConstantMaxBackedgeTakenCount or zero. bool isBackedgeTakenCountMaxOrZero(const Loop *L); /// Return true if the specified loop has an analyzable loop-invariant diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index a519f93216b..b4d727449fb 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -77,9 +77,13 @@ namespace llvm { /// Phis that complete an IV chain. Reuse DenseSet> ChainedPhis; - /// When true, expressions are expanded in "canonical" form. In particular, - /// addrecs are expanded as arithmetic based on a canonical induction - /// variable. When false, expression are expanded in a more literal form. + /// When true, SCEVExpander tries to expand expressions in "canonical" form. + /// When false, expressions are expanded in a more literal form. + /// + /// In "canonical" form addrecs are expanded as arithmetic based on a + /// canonical induction variable. Note that CanonicalMode doesn't guarantee + /// that all expressions are expanded in "canonical" form. For some + /// expressions literal mode can be preferred. bool CanonicalMode; /// When invoked from LSR, the expander is in "strength reduction" mode. The @@ -275,8 +279,16 @@ namespace llvm { /// Clear the current insertion point. This is useful if the instruction /// that had been serving as the insertion point may have been deleted. - void clearInsertPoint() { - Builder.ClearInsertionPoint(); + void clearInsertPoint() { Builder.ClearInsertionPoint(); } + + /// Set location information used by debugging information. + void SetCurrentDebugLocation(DebugLoc L) { + Builder.SetCurrentDebugLocation(std::move(L)); + } + + /// Get location information used by debugging information. + const DebugLoc &getCurrentDebugLocation() const { + return Builder.getCurrentDebugLocation(); } /// Return true if the specified instruction was inserted by the code diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h index 4b5200f5a83..d4b223863c5 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.h +++ b/include/llvm/Analysis/TargetLibraryInfo.h @@ -30,11 +30,12 @@ struct VecDesc { unsigned VectorizationFactor; }; - enum LibFunc { + enum LibFunc : unsigned { #define TLI_DEFINE_ENUM #include "llvm/Analysis/TargetLibraryInfo.def" - NumLibFuncs + NumLibFuncs, + NotLibFunc }; /// Implementation of the target library information. @@ -48,7 +49,7 @@ class TargetLibraryInfoImpl { unsigned char AvailableArray[(NumLibFuncs+3)/4]; llvm::DenseMap CustomNames; - static StringRef const StandardNames[NumLibFuncs]; + static StringLiteral const StandardNames[NumLibFuncs]; bool ShouldExtI32Param, ShouldExtI32Return, ShouldSignExtI32Param; enum AvailabilityState { @@ -359,7 +360,6 @@ public: TargetLibraryAnalysis(TargetLibraryInfoImpl PresetInfoImpl) : PresetInfoImpl(std::move(PresetInfoImpl)) {} - TargetLibraryInfo run(Module &M, ModuleAnalysisManager &); TargetLibraryInfo run(Function &F, FunctionAnalysisManager &); private: @@ -385,8 +385,13 @@ public: explicit TargetLibraryInfoWrapperPass(const Triple &T); explicit TargetLibraryInfoWrapperPass(const TargetLibraryInfoImpl &TLI); - TargetLibraryInfo &getTLI() { return TLI; } - const TargetLibraryInfo &getTLI() const { return TLI; } + TargetLibraryInfo &getTLI(const Function &F LLVM_ATTRIBUTE_UNUSED) { + return TLI; + } + const TargetLibraryInfo & + getTLI(const Function &F LLVM_ATTRIBUTE_UNUSED) const { + return TLI; + } }; } // end namespace llvm diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 7574b811bc1..d6fa8841165 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -368,6 +368,20 @@ public: /// optimize away. unsigned getFlatAddressSpace() const; + /// Return any intrinsic address operand indexes which may be rewritten if + /// they use a flat address space pointer. + /// + /// \returns true if the intrinsic was handled. + bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, + Intrinsic::ID IID) const; + + /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p + /// NewV, which has a different address space. This should happen for every + /// operand index that collectFlatAddressOperands returned for the intrinsic. + /// \returns true if the intrinsic /// was handled. + bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, + Value *OldV, Value *NewV) const; + /// Test whether calls to a function lower to actual program function /// calls. /// @@ -469,12 +483,17 @@ public: bool Force; /// Allow using trip count upper bound to unroll loops. bool UpperBound; - /// Allow peeling off loop iterations for loops with low dynamic tripcount. + /// Allow peeling off loop iterations. bool AllowPeeling; /// Allow unrolling of all the iterations of the runtime loop remainder. bool UnrollRemainder; /// Allow unroll and jam. Used to enable unroll and jam for the target. bool UnrollAndJam; + /// Allow peeling basing on profile. Uses to enable peeling off all + /// iterations basing on provided profile. + /// If the value is true the peeling cost model can decide to peel only + /// some iterations and in this case it will set this to false. + bool PeelProfiledIterations; /// Threshold for unroll and jam, for inner loop size. The 'Threshold' /// value above is used during unroll and jam for the outer loop size. /// This value is used in the same manner to limit the size of the inner @@ -555,15 +574,15 @@ public: /// modes that operate across loop iterations. bool shouldFavorBackedgeIndex(const Loop *L) const; - /// Return true if the target supports masked load. - bool isLegalMaskedStore(Type *DataType) const; /// Return true if the target supports masked store. - bool isLegalMaskedLoad(Type *DataType) const; + bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) const; + /// Return true if the target supports masked load. + bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) const; /// Return true if the target supports nontemporal store. - bool isLegalNTStore(Type *DataType, unsigned Alignment) const; + bool isLegalNTStore(Type *DataType, Align Alignment) const; /// Return true if the target supports nontemporal load. - bool isLegalNTLoad(Type *DataType, unsigned Alignment) const; + bool isLegalNTLoad(Type *DataType, Align Alignment) const; /// Return true if the target supports masked scatter. bool isLegalMaskedScatter(Type *DataType) const; @@ -622,12 +641,6 @@ public: /// Return true if this type is legal. bool isTypeLegal(Type *Ty) const; - /// Returns the target's jmp_buf alignment in bytes. - unsigned getJumpBufAlignment() const; - - /// Returns the target's jmp_buf size in bytes. - unsigned getJumpBufSize() const; - /// Return true if switches should be turned into lookup tables for the /// target. bool shouldBuildLookupTables() const; @@ -775,10 +788,23 @@ public: /// Additional properties of an operand's values. enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 }; - /// \return The number of scalar or vector registers that the target has. - /// If 'Vectors' is true, it returns the number of vector registers. If it is - /// set to false, it returns the number of scalar registers. - unsigned getNumberOfRegisters(bool Vector) const; + /// \return the number of registers in the target-provided register class. + unsigned getNumberOfRegisters(unsigned ClassID) const; + + /// \return the target-provided register class ID for the provided type, + /// accounting for type promotion and other type-legalization techniques that the target might apply. + /// However, it specifically does not account for the scalarization or splitting of vector types. + /// Should a vector type require scalarization or splitting into multiple underlying vector registers, + /// that type should be mapped to a register class containing no registers. + /// Specifically, this is designed to provide a simple, high-level view of the register allocation + /// later performed by the backend. These register classes don't necessarily map onto the + /// register classes used by the backend. + /// FIXME: It's not currently possible to determine how many registers + /// are used by the provided type. + unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const; + + /// \return the target-provided register class name + const char* getRegisterClassName(unsigned ClassID) const; /// \return The width of the largest scalar or vector register type. unsigned getRegisterBitWidth(bool Vector) const; @@ -824,18 +850,20 @@ public: /// \return The associativity of the cache level, if available. llvm::Optional getCacheAssociativity(CacheLevel Level) const; - /// \return How much before a load we should place the prefetch instruction. - /// This is currently measured in number of instructions. + /// \return How much before a load we should place the prefetch + /// instruction. This is currently measured in number of + /// instructions. unsigned getPrefetchDistance() const; - /// \return Some HW prefetchers can handle accesses up to a certain constant - /// stride. This is the minimum stride in bytes where it makes sense to start - /// adding SW prefetches. The default is 1, i.e. prefetch with any stride. + /// \return Some HW prefetchers can handle accesses up to a certain + /// constant stride. This is the minimum stride in bytes where it + /// makes sense to start adding SW prefetches. The default is 1, + /// i.e. prefetch with any stride. unsigned getMinPrefetchStride() const; - /// \return The maximum number of iterations to prefetch ahead. If the - /// required number of iterations is more than this number, no prefetching is - /// performed. + /// \return The maximum number of iterations to prefetch ahead. If + /// the required number of iterations is more than this number, no + /// prefetching is performed. unsigned getMaxPrefetchIterationsAhead() const; /// \return The maximum interleave factor that any transform should try to @@ -1155,6 +1183,10 @@ public: virtual bool isSourceOfDivergence(const Value *V) = 0; virtual bool isAlwaysUniform(const Value *V) = 0; virtual unsigned getFlatAddressSpace() = 0; + virtual bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, + Intrinsic::ID IID) const = 0; + virtual bool rewriteIntrinsicWithAddressSpace( + IntrinsicInst *II, Value *OldV, Value *NewV) const = 0; virtual bool isLoweredToCall(const Function *F) = 0; virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) = 0; @@ -1177,10 +1209,10 @@ public: TargetLibraryInfo *LibInfo) = 0; virtual bool shouldFavorPostInc() const = 0; virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0; - virtual bool isLegalMaskedStore(Type *DataType) = 0; - virtual bool isLegalMaskedLoad(Type *DataType) = 0; - virtual bool isLegalNTStore(Type *DataType, unsigned Alignment) = 0; - virtual bool isLegalNTLoad(Type *DataType, unsigned Alignment) = 0; + virtual bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) = 0; + virtual bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) = 0; + virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0; + virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0; virtual bool isLegalMaskedScatter(Type *DataType) = 0; virtual bool isLegalMaskedGather(Type *DataType) = 0; virtual bool isLegalMaskedCompressStore(Type *DataType) = 0; @@ -1196,8 +1228,6 @@ public: virtual bool isProfitableToHoist(Instruction *I) = 0; virtual bool useAA() = 0; virtual bool isTypeLegal(Type *Ty) = 0; - virtual unsigned getJumpBufAlignment() = 0; - virtual unsigned getJumpBufSize() = 0; virtual bool shouldBuildLookupTables() = 0; virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; virtual bool useColdCCForColdCall(Function &F) = 0; @@ -1228,19 +1258,35 @@ public: Type *Ty) = 0; virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) = 0; - virtual unsigned getNumberOfRegisters(bool Vector) = 0; + virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0; + virtual unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const = 0; + virtual const char* getRegisterClassName(unsigned ClassID) const = 0; virtual unsigned getRegisterBitWidth(bool Vector) const = 0; virtual unsigned getMinVectorRegisterBitWidth() = 0; virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0; virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0; virtual bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; - virtual unsigned getCacheLineSize() = 0; - virtual llvm::Optional getCacheSize(CacheLevel Level) = 0; - virtual llvm::Optional getCacheAssociativity(CacheLevel Level) = 0; - virtual unsigned getPrefetchDistance() = 0; - virtual unsigned getMinPrefetchStride() = 0; - virtual unsigned getMaxPrefetchIterationsAhead() = 0; + virtual unsigned getCacheLineSize() const = 0; + virtual llvm::Optional getCacheSize(CacheLevel Level) const = 0; + virtual llvm::Optional getCacheAssociativity(CacheLevel Level) const = 0; + + /// \return How much before a load we should place the prefetch + /// instruction. This is currently measured in number of + /// instructions. + virtual unsigned getPrefetchDistance() const = 0; + + /// \return Some HW prefetchers can handle accesses up to a certain + /// constant stride. This is the minimum stride in bytes where it + /// makes sense to start adding SW prefetches. The default is 1, + /// i.e. prefetch with any stride. + virtual unsigned getMinPrefetchStride() const = 0; + + /// \return The maximum number of iterations to prefetch ahead. If + /// the required number of iterations is more than this number, no + /// prefetching is performed. + virtual unsigned getMaxPrefetchIterationsAhead() const = 0; + virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, @@ -1395,6 +1441,16 @@ public: return Impl.getFlatAddressSpace(); } + bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, + Intrinsic::ID IID) const override { + return Impl.collectFlatAddressOperands(OpIndexes, IID); + } + + bool rewriteIntrinsicWithAddressSpace( + IntrinsicInst *II, Value *OldV, Value *NewV) const override { + return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV); + } + bool isLoweredToCall(const Function *F) override { return Impl.isLoweredToCall(F); } @@ -1440,16 +1496,16 @@ public: bool shouldFavorBackedgeIndex(const Loop *L) const override { return Impl.shouldFavorBackedgeIndex(L); } - bool isLegalMaskedStore(Type *DataType) override { - return Impl.isLegalMaskedStore(DataType); + bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) override { + return Impl.isLegalMaskedStore(DataType, Alignment); } - bool isLegalMaskedLoad(Type *DataType) override { - return Impl.isLegalMaskedLoad(DataType); + bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) override { + return Impl.isLegalMaskedLoad(DataType, Alignment); } - bool isLegalNTStore(Type *DataType, unsigned Alignment) override { + bool isLegalNTStore(Type *DataType, Align Alignment) override { return Impl.isLegalNTStore(DataType, Alignment); } - bool isLegalNTLoad(Type *DataType, unsigned Alignment) override { + bool isLegalNTLoad(Type *DataType, Align Alignment) override { return Impl.isLegalNTLoad(DataType, Alignment); } bool isLegalMaskedScatter(Type *DataType) override { @@ -1490,8 +1546,6 @@ public: } bool useAA() override { return Impl.useAA(); } bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); } - unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); } - unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); } bool shouldBuildLookupTables() override { return Impl.shouldBuildLookupTables(); } @@ -1563,8 +1617,14 @@ public: Type *Ty) override { return Impl.getIntImmCost(IID, Idx, Imm, Ty); } - unsigned getNumberOfRegisters(bool Vector) override { - return Impl.getNumberOfRegisters(Vector); + unsigned getNumberOfRegisters(unsigned ClassID) const override { + return Impl.getNumberOfRegisters(ClassID); + } + unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const override { + return Impl.getRegisterClassForType(Vector, Ty); + } + const char* getRegisterClassName(unsigned ClassID) const override { + return Impl.getRegisterClassName(ClassID); } unsigned getRegisterBitWidth(bool Vector) const override { return Impl.getRegisterBitWidth(Vector); @@ -1583,22 +1643,36 @@ public: return Impl.shouldConsiderAddressTypePromotion( I, AllowPromotionWithoutCommonHeader); } - unsigned getCacheLineSize() override { + unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); } - llvm::Optional getCacheSize(CacheLevel Level) override { + llvm::Optional getCacheSize(CacheLevel Level) const override { return Impl.getCacheSize(Level); } - llvm::Optional getCacheAssociativity(CacheLevel Level) override { + llvm::Optional getCacheAssociativity(CacheLevel Level) const override { return Impl.getCacheAssociativity(Level); } - unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); } - unsigned getMinPrefetchStride() override { + + /// Return the preferred prefetch distance in terms of instructions. + /// + unsigned getPrefetchDistance() const override { + return Impl.getPrefetchDistance(); + } + + /// Return the minimum stride necessary to trigger software + /// prefetching. + /// + unsigned getMinPrefetchStride() const override { return Impl.getMinPrefetchStride(); } - unsigned getMaxPrefetchIterationsAhead() override { + + /// Return the maximum prefetch distance in terms of loop + /// iterations. + /// + unsigned getMaxPrefetchIterationsAhead() const override { return Impl.getMaxPrefetchIterationsAhead(); } + unsigned getMaxInterleaveFactor(unsigned VF) override { return Impl.getMaxInterleaveFactor(VF); } diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index b99e1eb9adf..a431fa0d458 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -156,6 +156,16 @@ public: return -1; } + bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, + Intrinsic::ID IID) const { + return false; + } + + bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, + Value *OldV, Value *NewV) const { + return false; + } + bool isLoweredToCall(const Function *F) { assert(F && "A concrete function must be provided to this routine."); @@ -233,18 +243,18 @@ public: bool shouldFavorBackedgeIndex(const Loop *L) const { return false; } - bool isLegalMaskedStore(Type *DataType) { return false; } + bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { return false; } - bool isLegalMaskedLoad(Type *DataType) { return false; } + bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { return false; } - bool isLegalNTStore(Type *DataType, unsigned Alignment) { + bool isLegalNTStore(Type *DataType, Align Alignment) { // By default, assume nontemporal memory stores are available for stores // that are aligned and have a size that is a power of 2. unsigned DataSize = DL.getTypeStoreSize(DataType); return Alignment >= DataSize && isPowerOf2_32(DataSize); } - bool isLegalNTLoad(Type *DataType, unsigned Alignment) { + bool isLegalNTLoad(Type *DataType, Align Alignment) { // By default, assume nontemporal memory loads are available for loads that // are aligned and have a size that is a power of 2. unsigned DataSize = DL.getTypeStoreSize(DataType); @@ -284,10 +294,6 @@ public: bool isTypeLegal(Type *Ty) { return false; } - unsigned getJumpBufAlignment() { return 0; } - - unsigned getJumpBufSize() { return 0; } - bool shouldBuildLookupTables() { return true; } bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } @@ -348,7 +354,20 @@ public: return TTI::TCC_Free; } - unsigned getNumberOfRegisters(bool Vector) { return 8; } + unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; } + + unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { + return Vector ? 1 : 0; + }; + + const char* getRegisterClassName(unsigned ClassID) const { + switch (ClassID) { + default: + return "Generic::Unknown Register Class"; + case 0: return "Generic::ScalarRC"; + case 1: return "Generic::VectorRC"; + } + } unsigned getRegisterBitWidth(bool Vector) const { return 32; } @@ -365,21 +384,20 @@ public: return false; } - unsigned getCacheLineSize() { return 0; } + unsigned getCacheLineSize() const { return 0; } - llvm::Optional getCacheSize(TargetTransformInfo::CacheLevel Level) { + llvm::Optional getCacheSize(TargetTransformInfo::CacheLevel Level) const { switch (Level) { case TargetTransformInfo::CacheLevel::L1D: LLVM_FALLTHROUGH; case TargetTransformInfo::CacheLevel::L2D: return llvm::Optional(); } - llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); } llvm::Optional getCacheAssociativity( - TargetTransformInfo::CacheLevel Level) { + TargetTransformInfo::CacheLevel Level) const { switch (Level) { case TargetTransformInfo::CacheLevel::L1D: LLVM_FALLTHROUGH; @@ -390,11 +408,9 @@ public: llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); } - unsigned getPrefetchDistance() { return 0; } - - unsigned getMinPrefetchStride() { return 1; } - - unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; } + unsigned getPrefetchDistance() const { return 0; } + unsigned getMinPrefetchStride() const { return 1; } + unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; } unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } @@ -830,6 +846,9 @@ public: if (isa(U)) return TTI::TCC_Free; // Model all PHI nodes as free. + if (isa(U)) + return TTI::TCC_Free; // Model all ExtractValue nodes as free. + // Static alloca doesn't generate target instructions. if (auto *A = dyn_cast(U)) if (A->isStaticAlloca()) diff --git a/include/llvm/Analysis/TypeMetadataUtils.h b/include/llvm/Analysis/TypeMetadataUtils.h index 82cf8efeea5..43ce26147c2 100644 --- a/include/llvm/Analysis/TypeMetadataUtils.h +++ b/include/llvm/Analysis/TypeMetadataUtils.h @@ -50,6 +50,8 @@ void findDevirtualizableCallsForTypeCheckedLoad( SmallVectorImpl &LoadedPtrs, SmallVectorImpl &Preds, bool &HasNonCallUses, const CallInst *CI, DominatorTree &DT); + +Constant *getPointerAtOffset(Constant *I, uint64_t Offset, Module &M); } #endif diff --git a/include/llvm/Analysis/Utils/Local.h b/include/llvm/Analysis/Utils/Local.h index acbdf5dca32..a63bcec9bc4 100644 --- a/include/llvm/Analysis/Utils/Local.h +++ b/include/llvm/Analysis/Utils/Local.h @@ -32,7 +32,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, Value *Result = Constant::getNullValue(IntPtrTy); // If the GEP is inbounds, we know that none of the addressing operations will - // overflow in an unsigned sense. + // overflow in a signed sense. bool isInBounds = GEPOp->isInBounds() && !NoAssumptions; // Build a mask for high order bits. @@ -51,10 +51,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, // Handle a struct index, which adds its field offset to the pointer. if (StructType *STy = GTI.getStructTypeOrNull()) { - if (OpC->getType()->isVectorTy()) - OpC = OpC->getSplatValue(); - - uint64_t OpValue = cast(OpC)->getZExtValue(); + uint64_t OpValue = OpC->getUniqueInteger().getZExtValue(); Size = DL.getStructLayout(STy)->getElementOffset(OpValue); if (Size) @@ -63,20 +60,31 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, continue; } + // Splat the constant if needed. + if (IntPtrTy->isVectorTy() && !OpC->getType()->isVectorTy()) + OpC = ConstantVector::getSplat(IntPtrTy->getVectorNumElements(), OpC); + Constant *Scale = ConstantInt::get(IntPtrTy, Size); Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); - Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/); + Scale = + ConstantExpr::getMul(OC, Scale, false /*NUW*/, isInBounds /*NSW*/); // Emit an add instruction. Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); continue; } + + // Splat the index if needed. + if (IntPtrTy->isVectorTy() && !Op->getType()->isVectorTy()) + Op = Builder->CreateVectorSplat(IntPtrTy->getVectorNumElements(), Op); + // Convert to correct type. if (Op->getType() != IntPtrTy) Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); if (Size != 1) { // We'll let instcombine(mul) convert this to a shl if possible. Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size), - GEP->getName()+".idx", isInBounds /*NUW*/); + GEP->getName() + ".idx", false /*NUW*/, + isInBounds /*NSW*/); } // Emit an add instruction. diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index fa7e0e0eef7..33b064fcf9d 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -242,19 +242,21 @@ class Value; /// This is a wrapper around Value::stripAndAccumulateConstantOffsets that /// creates and later unpacks the required APInt. inline Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const DataLayout &DL) { + const DataLayout &DL, + bool AllowNonInbounds = true) { APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); Value *Base = - Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt, - /* AllowNonInbounds */ true); + Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt, AllowNonInbounds); + Offset = OffsetAPInt.getSExtValue(); return Base; } - inline const Value *GetPointerBaseWithConstantOffset(const Value *Ptr, - int64_t &Offset, - const DataLayout &DL) { - return GetPointerBaseWithConstantOffset(const_cast(Ptr), Offset, - DL); + inline const Value * + GetPointerBaseWithConstantOffset(const Value *Ptr, int64_t &Offset, + const DataLayout &DL, + bool AllowNonInbounds = true) { + return GetPointerBaseWithConstantOffset(const_cast(Ptr), Offset, DL, + AllowNonInbounds); } /// Returns true if the GEP is based on a pointer to a string (array of @@ -307,20 +309,26 @@ class Value; uint64_t GetStringLength(const Value *V, unsigned CharSize = 8); /// This function returns call pointer argument that is considered the same by - /// aliasing rules. You CAN'T use it to replace one value with another. - const Value *getArgumentAliasingToReturnedPointer(const CallBase *Call); - inline Value *getArgumentAliasingToReturnedPointer(CallBase *Call) { + /// aliasing rules. You CAN'T use it to replace one value with another. If + /// \p MustPreserveNullness is true, the call must preserve the nullness of + /// the pointer. + const Value *getArgumentAliasingToReturnedPointer(const CallBase *Call, + bool MustPreserveNullness); + inline Value * + getArgumentAliasingToReturnedPointer(CallBase *Call, + bool MustPreserveNullness) { return const_cast(getArgumentAliasingToReturnedPointer( - const_cast(Call))); + const_cast(Call), MustPreserveNullness)); } - // {launder,strip}.invariant.group returns pointer that aliases its argument, - // and it only captures pointer by returning it. - // These intrinsics are not marked as nocapture, because returning is - // considered as capture. The arguments are not marked as returned neither, - // because it would make it useless. + /// {launder,strip}.invariant.group returns pointer that aliases its argument, + /// and it only captures pointer by returning it. + /// These intrinsics are not marked as nocapture, because returning is + /// considered as capture. The arguments are not marked as returned neither, + /// because it would make it useless. If \p MustPreserveNullness is true, + /// the intrinsic must preserve the nullness of the pointer. bool isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( - const CallBase *Call); + const CallBase *Call, bool MustPreserveNullness); /// This method strips off any GEP address adjustments and pointer casts from /// the specified value, returning the original object being addressed. Note @@ -376,6 +384,13 @@ class Value; /// Return true if the only users of this pointer are lifetime markers. bool onlyUsedByLifetimeMarkers(const Value *V); + /// Return true if speculation of the given load must be suppressed to avoid + /// ordering or interfering with an active sanitizer. If not suppressed, + /// dereferenceability and alignment must be proven separately. Note: This + /// is only needed for raw reasoning; if you use the interface below + /// (isSafeToSpeculativelyExecute), this is handled internally. + bool mustSuppressSpeculation(const LoadInst &LI); + /// Return true if the instruction does not have any effects besides /// calculating the result and does not have undefined behavior. /// @@ -605,12 +620,12 @@ class Value; SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp = nullptr, unsigned Depth = 0); + inline SelectPatternResult - matchSelectPattern(const Value *V, const Value *&LHS, const Value *&RHS, - Instruction::CastOps *CastOp = nullptr) { - Value *L = const_cast(LHS); - Value *R = const_cast(RHS); - auto Result = matchSelectPattern(const_cast(V), L, R); + matchSelectPattern(const Value *V, const Value *&LHS, const Value *&RHS) { + Value *L = const_cast(LHS); + Value *R = const_cast(RHS); + auto Result = matchSelectPattern(const_cast(V), L, R); LHS = L; RHS = R; return Result; @@ -654,6 +669,12 @@ class Value; Optional isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL); + + /// If Ptr1 is provably equal to Ptr2 plus a constant offset, return that + /// offset. For example, Ptr1 might be &A[42], and Ptr2 might be &A[40]. In + /// this case offset would be -8. + Optional isPointerOffset(const Value *Ptr1, const Value *Ptr2, + const DataLayout &DL); } // end namespace llvm #endif // LLVM_ANALYSIS_VALUETRACKING_H diff --git a/include/llvm/Analysis/VectorUtils.h b/include/llvm/Analysis/VectorUtils.h index d93d2bc4570..4a61c2bc35c 100644 --- a/include/llvm/Analysis/VectorUtils.h +++ b/include/llvm/Analysis/VectorUtils.h @@ -15,18 +15,129 @@ #include "llvm/ADT/MapVector.h" #include "llvm/Analysis/LoopAccessAnalysis.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/CheckedArithmetic.h" namespace llvm { +/// Describes the type of Parameters +enum class VFParamKind { + Vector, // No semantic information. + OMP_Linear, // declare simd linear(i) + OMP_LinearRef, // declare simd linear(ref(i)) + OMP_LinearVal, // declare simd linear(val(i)) + OMP_LinearUVal, // declare simd linear(uval(i)) + OMP_LinearPos, // declare simd linear(i:c) uniform(c) + OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c) + OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c) + OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c + OMP_Uniform, // declare simd uniform(i) + GlobalPredicate, // Global logical predicate that acts on all lanes + // of the input and output mask concurrently. For + // example, it is implied by the `M` token in the + // Vector Function ABI mangled name. + Unknown +}; + +/// Describes the type of Instruction Set Architecture +enum class VFISAKind { + AdvancedSIMD, // AArch64 Advanced SIMD (NEON) + SVE, // AArch64 Scalable Vector Extension + SSE, // x86 SSE + AVX, // x86 AVX + AVX2, // x86 AVX2 + AVX512, // x86 AVX512 + Unknown // Unknown ISA +}; + +/// Encapsulates information needed to describe a parameter. +/// +/// The description of the parameter is not linked directly to +/// OpenMP or any other vector function description. This structure +/// is extendible to handle other paradigms that describe vector +/// functions and their parameters. +struct VFParameter { + unsigned ParamPos; // Parameter Position in Scalar Function. + VFParamKind ParamKind; // Kind of Parameter. + int LinearStepOrPos = 0; // Step or Position of the Parameter. + Align Alignment = Align(); // Optional aligment in bytes, defaulted to 1. + + // Comparison operator. + bool operator==(const VFParameter &Other) const { + return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) == + std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos, + Other.Alignment); + } +}; + +/// Contains the information about the kind of vectorization +/// available. +/// +/// This object in independent on the paradigm used to +/// represent vector functions. in particular, it is not attached to +/// any target-specific ABI. +struct VFShape { + unsigned VF; // Vectorization factor. + bool IsScalable; // True if the function is a scalable function. + VFISAKind ISA; // Instruction Set Architecture. + SmallVector Parameters; // List of parameter informations. + // Comparison operator. + bool operator==(const VFShape &Other) const { + return std::tie(VF, IsScalable, ISA, Parameters) == + std::tie(Other.VF, Other.IsScalable, Other.ISA, Other.Parameters); + } +}; + +/// Holds the VFShape for a specific scalar to vector function mapping. +struct VFInfo { + VFShape Shape; // Classification of the vector function. + StringRef ScalarName; // Scalar Function Name. + StringRef VectorName; // Vector Function Name associated to this VFInfo. + + // Comparison operator. + bool operator==(const VFInfo &Other) const { + return std::tie(Shape, ScalarName, VectorName) == + std::tie(Shape, Other.ScalarName, Other.VectorName); + } +}; + +namespace VFABI { +/// Function to contruct a VFInfo out of a mangled names in the +/// following format: +/// +/// {()} +/// +/// where is the name of the vector function, mangled according +/// to the rules described in the Vector Function ABI of the target vector +/// extentsion (or from now on). The is in the following +/// format: +/// +/// _ZGV_[()] +/// +/// This methods support demangling rules for the following : +/// +/// * AArch64: https://developer.arm.com/docs/101129/latest +/// +/// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and +/// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt +/// +/// +/// +/// \param MangledName -> input string in the format +/// _ZGV_[()]. +Optional tryDemangleForVFABI(StringRef MangledName); + +/// Retrieve the `VFParamKind` from a string token. +VFParamKind getVFParamKindFromString(const StringRef Token); +} // end namespace VFABI + template class ArrayRef; class DemandedBits; class GetElementPtrInst; template class InterleaveGroup; class Loop; class ScalarEvolution; +class TargetLibraryInfo; class TargetTransformInfo; class Type; class Value; @@ -270,13 +381,12 @@ APInt possiblyDemandedEltsInMask(Value *Mask); /// the interleaved store group doesn't allow gaps. template class InterleaveGroup { public: - InterleaveGroup(uint32_t Factor, bool Reverse, uint32_t Align) - : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {} - - InterleaveGroup(InstTy *Instr, int32_t Stride, uint32_t Align) - : Align(Align), InsertPos(Instr) { - assert(Align && "The alignment should be non-zero"); + InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment) + : Factor(Factor), Reverse(Reverse), Alignment(Alignment), + InsertPos(nullptr) {} + InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment) + : Alignment(Alignment), InsertPos(Instr) { Factor = std::abs(Stride); assert(Factor > 1 && "Invalid interleave factor"); @@ -286,7 +396,7 @@ public: bool isReverse() const { return Reverse; } uint32_t getFactor() const { return Factor; } - uint32_t getAlignment() const { return Align; } + uint32_t getAlignment() const { return Alignment.value(); } uint32_t getNumMembers() const { return Members.size(); } /// Try to insert a new member \p Instr with index \p Index and @@ -294,9 +404,7 @@ public: /// negative if it is the new leader. /// /// \returns false if the instruction doesn't belong to the group. - bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign) { - assert(NewAlign && "The new member's alignment should be non-zero"); - + bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) { // Make sure the key fits in an int32_t. Optional MaybeKey = checkedAdd(Index, SmallestKey); if (!MaybeKey) @@ -328,7 +436,7 @@ public: } // It's always safe to select the minimum alignment. - Align = std::min(Align, NewAlign); + Alignment = std::min(Alignment, NewAlign); Members[Key] = Instr; return true; } @@ -387,7 +495,7 @@ public: private: uint32_t Factor; // Interleave Factor. bool Reverse; - uint32_t Align; + Align Alignment; DenseMap Members; int32_t SmallestKey = 0; int32_t LargestKey = 0; @@ -504,8 +612,8 @@ private: struct StrideDescriptor { StrideDescriptor() = default; StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size, - unsigned Align) - : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {} + Align Alignment) + : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {} // The access's stride. It is negative for a reverse access. int64_t Stride = 0; @@ -517,7 +625,7 @@ private: uint64_t Size = 0; // The alignment of this access. - unsigned Align = 0; + Align Alignment; }; /// A type for holding instructions and their stride descriptors. @@ -528,11 +636,11 @@ private: /// /// \returns the newly created interleave group. InterleaveGroup * - createInterleaveGroup(Instruction *Instr, int Stride, unsigned Align) { + createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) { assert(!InterleaveGroupMap.count(Instr) && "Already in an interleaved access group"); InterleaveGroupMap[Instr] = - new InterleaveGroup(Instr, Stride, Align); + new InterleaveGroup(Instr, Stride, Alignment); InterleaveGroups.insert(InterleaveGroupMap[Instr]); return InterleaveGroupMap[Instr]; } diff --git a/include/llvm/BinaryFormat/Dwarf.def b/include/llvm/BinaryFormat/Dwarf.def index b0f78d0fd61..34a7410f747 100644 --- a/include/llvm/BinaryFormat/Dwarf.def +++ b/include/llvm/BinaryFormat/Dwarf.def @@ -17,7 +17,7 @@ defined HANDLE_DW_VIRTUALITY || defined HANDLE_DW_DEFAULTED || \ defined HANDLE_DW_CC || defined HANDLE_DW_LNS || defined HANDLE_DW_LNE || \ defined HANDLE_DW_LNCT || defined HANDLE_DW_MACRO || \ - defined HANDLE_DW_RLE || \ + defined HANDLE_DW_RLE || defined HANDLE_DW_LLE || \ (defined HANDLE_DW_CFA && defined HANDLE_DW_CFA_PRED) || \ defined HANDLE_DW_APPLE_PROPERTY || defined HANDLE_DW_UT || \ defined HANDLE_DWARF_SECTION || defined HANDLE_DW_IDX || \ @@ -26,7 +26,17 @@ #endif #ifndef HANDLE_DW_TAG -#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) +#endif + +// Note that DW_KIND is not a DWARF concept, but rather a way for us to +// generate a list of tags that belong together. +#ifndef DW_KIND_NONE +#define DW_KIND_NONE 0 +#endif + +#ifndef DW_KIND_TYPE +#define DW_KIND_TYPE 1 #endif #ifndef HANDLE_DW_AT @@ -81,6 +91,10 @@ #define HANDLE_DW_RLE(ID, NAME) #endif +#ifndef HANDLE_DW_LLE +#define HANDLE_DW_LLE(ID, NAME) +#endif + #ifndef HANDLE_DW_CFA #define HANDLE_DW_CFA(ID, NAME) #endif @@ -109,94 +123,94 @@ #define HANDLE_DW_END(ID, NAME) #endif -HANDLE_DW_TAG(0x0000, null, 2, DWARF) -HANDLE_DW_TAG(0x0001, array_type, 2, DWARF) -HANDLE_DW_TAG(0x0002, class_type, 2, DWARF) -HANDLE_DW_TAG(0x0003, entry_point, 2, DWARF) -HANDLE_DW_TAG(0x0004, enumeration_type, 2, DWARF) -HANDLE_DW_TAG(0x0005, formal_parameter, 2, DWARF) -HANDLE_DW_TAG(0x0008, imported_declaration, 2, DWARF) -HANDLE_DW_TAG(0x000a, label, 2, DWARF) -HANDLE_DW_TAG(0x000b, lexical_block, 2, DWARF) -HANDLE_DW_TAG(0x000d, member, 2, DWARF) -HANDLE_DW_TAG(0x000f, pointer_type, 2, DWARF) -HANDLE_DW_TAG(0x0010, reference_type, 2, DWARF) -HANDLE_DW_TAG(0x0011, compile_unit, 2, DWARF) -HANDLE_DW_TAG(0x0012, string_type, 2, DWARF) -HANDLE_DW_TAG(0x0013, structure_type, 2, DWARF) -HANDLE_DW_TAG(0x0015, subroutine_type, 2, DWARF) -HANDLE_DW_TAG(0x0016, typedef, 2, DWARF) -HANDLE_DW_TAG(0x0017, union_type, 2, DWARF) -HANDLE_DW_TAG(0x0018, unspecified_parameters, 2, DWARF) -HANDLE_DW_TAG(0x0019, variant, 2, DWARF) -HANDLE_DW_TAG(0x001a, common_block, 2, DWARF) -HANDLE_DW_TAG(0x001b, common_inclusion, 2, DWARF) -HANDLE_DW_TAG(0x001c, inheritance, 2, DWARF) -HANDLE_DW_TAG(0x001d, inlined_subroutine, 2, DWARF) -HANDLE_DW_TAG(0x001e, module, 2, DWARF) -HANDLE_DW_TAG(0x001f, ptr_to_member_type, 2, DWARF) -HANDLE_DW_TAG(0x0020, set_type, 2, DWARF) -HANDLE_DW_TAG(0x0021, subrange_type, 2, DWARF) -HANDLE_DW_TAG(0x0022, with_stmt, 2, DWARF) -HANDLE_DW_TAG(0x0023, access_declaration, 2, DWARF) -HANDLE_DW_TAG(0x0024, base_type, 2, DWARF) -HANDLE_DW_TAG(0x0025, catch_block, 2, DWARF) -HANDLE_DW_TAG(0x0026, const_type, 2, DWARF) -HANDLE_DW_TAG(0x0027, constant, 2, DWARF) -HANDLE_DW_TAG(0x0028, enumerator, 2, DWARF) -HANDLE_DW_TAG(0x0029, file_type, 2, DWARF) -HANDLE_DW_TAG(0x002a, friend, 2, DWARF) -HANDLE_DW_TAG(0x002b, namelist, 2, DWARF) -HANDLE_DW_TAG(0x002c, namelist_item, 2, DWARF) -HANDLE_DW_TAG(0x002d, packed_type, 2, DWARF) -HANDLE_DW_TAG(0x002e, subprogram, 2, DWARF) -HANDLE_DW_TAG(0x002f, template_type_parameter, 2, DWARF) -HANDLE_DW_TAG(0x0030, template_value_parameter, 2, DWARF) -HANDLE_DW_TAG(0x0031, thrown_type, 2, DWARF) -HANDLE_DW_TAG(0x0032, try_block, 2, DWARF) -HANDLE_DW_TAG(0x0033, variant_part, 2, DWARF) -HANDLE_DW_TAG(0x0034, variable, 2, DWARF) -HANDLE_DW_TAG(0x0035, volatile_type, 2, DWARF) +HANDLE_DW_TAG(0x0000, null, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0001, array_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0002, class_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0003, entry_point, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0004, enumeration_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0005, formal_parameter, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0008, imported_declaration, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x000a, label, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x000b, lexical_block, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x000d, member, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x000f, pointer_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0010, reference_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0011, compile_unit, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0012, string_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0013, structure_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0015, subroutine_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0016, typedef, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0017, union_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0018, unspecified_parameters, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0019, variant, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x001a, common_block, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x001b, common_inclusion, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x001c, inheritance, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x001d, inlined_subroutine, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x001e, module, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x001f, ptr_to_member_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0020, set_type, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0021, subrange_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0022, with_stmt, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0023, access_declaration, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0024, base_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0025, catch_block, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0026, const_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0027, constant, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0028, enumerator, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0029, file_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x002a, friend, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x002b, namelist, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x002c, namelist_item, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x002d, packed_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x002e, subprogram, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x002f, template_type_parameter, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0030, template_value_parameter, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0031, thrown_type, 2, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0032, try_block, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0033, variant_part, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0034, variable, 2, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0035, volatile_type, 2, DWARF, DW_KIND_TYPE) // New in DWARF v3: -HANDLE_DW_TAG(0x0036, dwarf_procedure, 3, DWARF) -HANDLE_DW_TAG(0x0037, restrict_type, 3, DWARF) -HANDLE_DW_TAG(0x0038, interface_type, 3, DWARF) -HANDLE_DW_TAG(0x0039, namespace, 3, DWARF) -HANDLE_DW_TAG(0x003a, imported_module, 3, DWARF) -HANDLE_DW_TAG(0x003b, unspecified_type, 3, DWARF) -HANDLE_DW_TAG(0x003c, partial_unit, 3, DWARF) -HANDLE_DW_TAG(0x003d, imported_unit, 3, DWARF) -HANDLE_DW_TAG(0x003f, condition, 3, DWARF) -HANDLE_DW_TAG(0x0040, shared_type, 3, DWARF) +HANDLE_DW_TAG(0x0036, dwarf_procedure, 3, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0037, restrict_type, 3, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0038, interface_type, 3, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0039, namespace, 3, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x003a, imported_module, 3, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x003b, unspecified_type, 3, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x003c, partial_unit, 3, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x003d, imported_unit, 3, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x003f, condition, 3, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0040, shared_type, 3, DWARF, DW_KIND_TYPE) // New in DWARF v4: -HANDLE_DW_TAG(0x0041, type_unit, 4, DWARF) -HANDLE_DW_TAG(0x0042, rvalue_reference_type, 4, DWARF) -HANDLE_DW_TAG(0x0043, template_alias, 4, DWARF) +HANDLE_DW_TAG(0x0041, type_unit, 4, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0042, rvalue_reference_type, 4, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0043, template_alias, 4, DWARF, DW_KIND_NONE) // New in DWARF v5: -HANDLE_DW_TAG(0x0044, coarray_type, 5, DWARF) -HANDLE_DW_TAG(0x0045, generic_subrange, 5, DWARF) -HANDLE_DW_TAG(0x0046, dynamic_type, 5, DWARF) -HANDLE_DW_TAG(0x0047, atomic_type, 5, DWARF) -HANDLE_DW_TAG(0x0048, call_site, 5, DWARF) -HANDLE_DW_TAG(0x0049, call_site_parameter, 5, DWARF) -HANDLE_DW_TAG(0x004a, skeleton_unit, 5, DWARF) -HANDLE_DW_TAG(0x004b, immutable_type, 5, DWARF) +HANDLE_DW_TAG(0x0044, coarray_type, 5, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0045, generic_subrange, 5, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0046, dynamic_type, 5, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0047, atomic_type, 5, DWARF, DW_KIND_TYPE) +HANDLE_DW_TAG(0x0048, call_site, 5, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x0049, call_site_parameter, 5, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x004a, skeleton_unit, 5, DWARF, DW_KIND_NONE) +HANDLE_DW_TAG(0x004b, immutable_type, 5, DWARF, DW_KIND_TYPE) // Vendor extensions: -HANDLE_DW_TAG(0x4081, MIPS_loop, 0, MIPS) -HANDLE_DW_TAG(0x4101, format_label, 0, GNU) -HANDLE_DW_TAG(0x4102, function_template, 0, GNU) -HANDLE_DW_TAG(0x4103, class_template, 0, GNU) -HANDLE_DW_TAG(0x4106, GNU_template_template_param, 0, GNU) -HANDLE_DW_TAG(0x4107, GNU_template_parameter_pack, 0, GNU) -HANDLE_DW_TAG(0x4108, GNU_formal_parameter_pack, 0, GNU) -HANDLE_DW_TAG(0x4109, GNU_call_site, 0, GNU) -HANDLE_DW_TAG(0x410a, GNU_call_site_parameter, 0, GNU) -HANDLE_DW_TAG(0x4200, APPLE_property, 0, APPLE) -HANDLE_DW_TAG(0xb000, BORLAND_property, 0, BORLAND) -HANDLE_DW_TAG(0xb001, BORLAND_Delphi_string, 0, BORLAND) -HANDLE_DW_TAG(0xb002, BORLAND_Delphi_dynamic_array, 0, BORLAND) -HANDLE_DW_TAG(0xb003, BORLAND_Delphi_set, 0, BORLAND) -HANDLE_DW_TAG(0xb004, BORLAND_Delphi_variant, 0, BORLAND) +HANDLE_DW_TAG(0x4081, MIPS_loop, 0, MIPS, DW_KIND_NONE) +HANDLE_DW_TAG(0x4101, format_label, 0, GNU, DW_KIND_NONE) +HANDLE_DW_TAG(0x4102, function_template, 0, GNU, DW_KIND_NONE) +HANDLE_DW_TAG(0x4103, class_template, 0, GNU, DW_KIND_NONE) +HANDLE_DW_TAG(0x4106, GNU_template_template_param, 0, GNU, DW_KIND_NONE) +HANDLE_DW_TAG(0x4107, GNU_template_parameter_pack, 0, GNU, DW_KIND_NONE) +HANDLE_DW_TAG(0x4108, GNU_formal_parameter_pack, 0, GNU, DW_KIND_NONE) +HANDLE_DW_TAG(0x4109, GNU_call_site, 0, GNU, DW_KIND_NONE) +HANDLE_DW_TAG(0x410a, GNU_call_site_parameter, 0, GNU, DW_KIND_NONE) +HANDLE_DW_TAG(0x4200, APPLE_property, 0, APPLE, DW_KIND_NONE) +HANDLE_DW_TAG(0xb000, BORLAND_property, 0, BORLAND, DW_KIND_NONE) +HANDLE_DW_TAG(0xb001, BORLAND_Delphi_string, 0, BORLAND, DW_KIND_TYPE) +HANDLE_DW_TAG(0xb002, BORLAND_Delphi_dynamic_array, 0, BORLAND, DW_KIND_TYPE) +HANDLE_DW_TAG(0xb003, BORLAND_Delphi_set, 0, BORLAND, DW_KIND_TYPE) +HANDLE_DW_TAG(0xb004, BORLAND_Delphi_variant, 0, BORLAND, DW_KIND_TYPE) // Attributes. HANDLE_DW_AT(0x01, sibling, 2, DWARF) @@ -815,6 +829,17 @@ HANDLE_DW_RLE(0x05, base_address) HANDLE_DW_RLE(0x06, start_end) HANDLE_DW_RLE(0x07, start_length) +// DWARF v5 Loc List Entry encoding values. +HANDLE_DW_LLE(0x00, end_of_list) +HANDLE_DW_LLE(0x01, base_addressx) +HANDLE_DW_LLE(0x02, startx_endx) +HANDLE_DW_LLE(0x03, startx_length) +HANDLE_DW_LLE(0x04, offset_pair) +HANDLE_DW_LLE(0x05, default_location) +HANDLE_DW_LLE(0x06, base_address) +HANDLE_DW_LLE(0x07, start_end) +HANDLE_DW_LLE(0x08, start_length) + // Call frame instruction encodings. HANDLE_DW_CFA(0x00, nop) HANDLE_DW_CFA(0x40, advance_loc) @@ -929,6 +954,7 @@ HANDLE_DW_IDX(0x05, type_hash) #undef HANDLE_DW_LNCT #undef HANDLE_DW_MACRO #undef HANDLE_DW_RLE +#undef HANDLE_DW_LLE #undef HANDLE_DW_CFA #undef HANDLE_DW_CFA_PRED #undef HANDLE_DW_APPLE_PROPERTY diff --git a/include/llvm/BinaryFormat/Dwarf.h b/include/llvm/BinaryFormat/Dwarf.h index 76d9c365c0a..1c6aee48661 100644 --- a/include/llvm/BinaryFormat/Dwarf.h +++ b/include/llvm/BinaryFormat/Dwarf.h @@ -46,6 +46,11 @@ enum LLVMConstants : uint32_t { DW_VIRTUALITY_invalid = ~0U, // Virtuality for invalid results. DW_MACINFO_invalid = ~0U, // Macinfo type for invalid results. + // Special values for an initial length field. + DW_LENGTH_lo_reserved = 0xfffffff0, // Lower bound of the reserved range. + DW_LENGTH_DWARF64 = 0xffffffff, // Indicator of 64-bit DWARF format. + DW_LENGTH_hi_reserved = 0xffffffff, // Upper bound of the reserved range. + // Other constants. DWARF_VERSION = 4, // Default dwarf version we output. DW_PUBTYPES_VERSION = 2, // Section version number for .debug_pubtypes. @@ -75,7 +80,7 @@ const uint64_t DW64_CIE_ID = UINT64_MAX; const uint32_t DW_INVALID_OFFSET = UINT32_MAX; enum Tag : uint16_t { -#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) DW_TAG_##NAME = ID, +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) DW_TAG_##NAME = ID, #include "llvm/BinaryFormat/Dwarf.def" DW_TAG_lo_user = 0x4080, DW_TAG_hi_user = 0xffff, @@ -84,29 +89,12 @@ enum Tag : uint16_t { inline bool isType(Tag T) { switch (T) { - case DW_TAG_array_type: - case DW_TAG_class_type: - case DW_TAG_interface_type: - case DW_TAG_enumeration_type: - case DW_TAG_pointer_type: - case DW_TAG_reference_type: - case DW_TAG_rvalue_reference_type: - case DW_TAG_string_type: - case DW_TAG_structure_type: - case DW_TAG_subroutine_type: - case DW_TAG_union_type: - case DW_TAG_ptr_to_member_type: - case DW_TAG_set_type: - case DW_TAG_subrange_type: - case DW_TAG_base_type: - case DW_TAG_const_type: - case DW_TAG_file_type: - case DW_TAG_packed_type: - case DW_TAG_volatile_type: - case DW_TAG_typedef: - return true; default: return false; +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) \ + case DW_TAG_##NAME: \ + return (KIND == DW_KIND_TYPE); +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -129,9 +117,10 @@ enum LocationAtom { #include "llvm/BinaryFormat/Dwarf.def" DW_OP_lo_user = 0xe0, DW_OP_hi_user = 0xff, - DW_OP_LLVM_fragment = 0x1000, ///< Only used in LLVM metadata. - DW_OP_LLVM_convert = 0x1001, ///< Only used in LLVM metadata. - DW_OP_LLVM_tag_offset = 0x1002, ///< Only used in LLVM metadata. + DW_OP_LLVM_fragment = 0x1000, ///< Only used in LLVM metadata. + DW_OP_LLVM_convert = 0x1001, ///< Only used in LLVM metadata. + DW_OP_LLVM_tag_offset = 0x1002, ///< Only used in LLVM metadata. + DW_OP_LLVM_entry_value = 0x1003, ///< Only used in LLVM metadata. }; enum TypeKind : uint8_t { @@ -192,6 +181,59 @@ enum SourceLanguage { DW_LANG_hi_user = 0xffff }; +inline bool isCPlusPlus(SourceLanguage S) { + // Deliberately enumerate all the language options so we get a warning when + // new language options are added (-Wswitch) that'll hopefully help keep this + // switch up-to-date when new C++ versions are added. + switch (S) { + case DW_LANG_C_plus_plus: + case DW_LANG_C_plus_plus_03: + case DW_LANG_C_plus_plus_11: + case DW_LANG_C_plus_plus_14: + return true; + case DW_LANG_C89: + case DW_LANG_C: + case DW_LANG_Ada83: + case DW_LANG_Cobol74: + case DW_LANG_Cobol85: + case DW_LANG_Fortran77: + case DW_LANG_Fortran90: + case DW_LANG_Pascal83: + case DW_LANG_Modula2: + case DW_LANG_Java: + case DW_LANG_C99: + case DW_LANG_Ada95: + case DW_LANG_Fortran95: + case DW_LANG_PLI: + case DW_LANG_ObjC: + case DW_LANG_ObjC_plus_plus: + case DW_LANG_UPC: + case DW_LANG_D: + case DW_LANG_Python: + case DW_LANG_OpenCL: + case DW_LANG_Go: + case DW_LANG_Modula3: + case DW_LANG_Haskell: + case DW_LANG_OCaml: + case DW_LANG_Rust: + case DW_LANG_C11: + case DW_LANG_Swift: + case DW_LANG_Julia: + case DW_LANG_Dylan: + case DW_LANG_Fortran03: + case DW_LANG_Fortran08: + case DW_LANG_RenderScript: + case DW_LANG_BLISS: + case DW_LANG_Mips_Assembler: + case DW_LANG_GOOGLE_RenderScript: + case DW_LANG_BORLAND_Delphi: + case DW_LANG_lo_user: + case DW_LANG_hi_user: + return false; + } + llvm_unreachable("Invalid source language"); +} + enum CaseSensitivity { // Identifier case codes DW_ID_case_sensitive = 0x00, @@ -267,11 +309,17 @@ enum MacroEntryType { }; /// DWARF v5 range list entry encoding values. -enum RangeListEntries { +enum RnglistEntries { #define HANDLE_DW_RLE(ID, NAME) DW_RLE_##NAME = ID, #include "llvm/BinaryFormat/Dwarf.def" }; +/// DWARF v5 loc list entry encoding values. +enum LoclistEntries { +#define HANDLE_DW_LLE(ID, NAME) DW_LLE_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" +}; + /// Call frame instruction encodings. enum CallFrameInfo { #define HANDLE_DW_CFA(ID, NAME) DW_CFA_##NAME = ID, @@ -307,19 +355,6 @@ enum Constants { DW_EH_PE_indirect = 0x80 }; -/// Constants for location lists in DWARF v5. -enum LocationListEntry : unsigned char { - DW_LLE_end_of_list = 0x00, - DW_LLE_base_addressx = 0x01, - DW_LLE_startx_endx = 0x02, - DW_LLE_startx_length = 0x03, - DW_LLE_offset_pair = 0x04, - DW_LLE_default_location = 0x05, - DW_LLE_base_address = 0x06, - DW_LLE_start_end = 0x07, - DW_LLE_start_length = 0x08 -}; - /// Constants for the DW_APPLE_PROPERTY_attributes attribute. /// Keep this list in sync with clang's DeclSpec.h ObjCPropertyAttributeKind! enum ApplePropertyAttributes { @@ -434,6 +469,7 @@ StringRef LNStandardString(unsigned Standard); StringRef LNExtendedString(unsigned Encoding); StringRef MacinfoString(unsigned Encoding); StringRef RangeListEncodingString(unsigned Encoding); +StringRef LocListEncodingString(unsigned Encoding); StringRef CallFrameString(unsigned Encoding, Triple::ArchType Arch); StringRef ApplePropertyString(unsigned); StringRef UnitTypeString(unsigned); @@ -525,6 +561,17 @@ struct FormParams { explicit operator bool() const { return Version && AddrSize; } }; +/// Get the byte size of the unit length field depending on the DWARF format. +inline uint8_t getUnitLengthFieldByteSize(DwarfFormat Format) { + switch (Format) { + case DwarfFormat::DWARF32: + return 4; + case DwarfFormat::DWARF64: + return 12; + } + llvm_unreachable("Invalid Format value"); +} + /// Get the fixed byte size for a given form. /// /// If the form has a fixed byte size, then an Optional with a value will be diff --git a/include/llvm/BinaryFormat/ELF.h b/include/llvm/BinaryFormat/ELF.h index 2bd71113784..46edfb6260b 100644 --- a/include/llvm/BinaryFormat/ELF.h +++ b/include/llvm/BinaryFormat/ELF.h @@ -1356,6 +1356,72 @@ enum : unsigned { NT_GNU_BUILD_ATTRIBUTE_FUNC = 0x101, }; +// Core note types +enum : unsigned { + NT_PRSTATUS = 1, + NT_FPREGSET = 2, + NT_PRPSINFO = 3, + NT_TASKSTRUCT = 4, + NT_AUXV = 6, + NT_PSTATUS = 10, + NT_FPREGS = 12, + NT_PSINFO = 13, + NT_LWPSTATUS = 16, + NT_LWPSINFO = 17, + NT_WIN32PSTATUS = 18, + + NT_PPC_VMX = 0x100, + NT_PPC_VSX = 0x102, + NT_PPC_TAR = 0x103, + NT_PPC_PPR = 0x104, + NT_PPC_DSCR = 0x105, + NT_PPC_EBB = 0x106, + NT_PPC_PMU = 0x107, + NT_PPC_TM_CGPR = 0x108, + NT_PPC_TM_CFPR = 0x109, + NT_PPC_TM_CVMX = 0x10a, + NT_PPC_TM_CVSX = 0x10b, + NT_PPC_TM_SPR = 0x10c, + NT_PPC_TM_CTAR = 0x10d, + NT_PPC_TM_CPPR = 0x10e, + NT_PPC_TM_CDSCR = 0x10f, + + NT_386_TLS = 0x200, + NT_386_IOPERM = 0x201, + NT_X86_XSTATE = 0x202, + + NT_S390_HIGH_GPRS = 0x300, + NT_S390_TIMER = 0x301, + NT_S390_TODCMP = 0x302, + NT_S390_TODPREG = 0x303, + NT_S390_CTRS = 0x304, + NT_S390_PREFIX = 0x305, + NT_S390_LAST_BREAK = 0x306, + NT_S390_SYSTEM_CALL = 0x307, + NT_S390_TDB = 0x308, + NT_S390_VXRS_LOW = 0x309, + NT_S390_VXRS_HIGH = 0x30a, + NT_S390_GS_CB = 0x30b, + NT_S390_GS_BC = 0x30c, + + NT_ARM_VFP = 0x400, + NT_ARM_TLS = 0x401, + NT_ARM_HW_BREAK = 0x402, + NT_ARM_HW_WATCH = 0x403, + NT_ARM_SVE = 0x405, + NT_ARM_PAC_MASK = 0x406, + + NT_FILE = 0x46494c45, + NT_PRXFPREG = 0x46e62b7f, + NT_SIGINFO = 0x53494749, +}; + +// LLVM-specific notes. +enum { + NT_LLVM_HWASAN_GLOBALS = 3, +}; + +// GNU note types enum { NT_GNU_ABI_TAG = 1, NT_GNU_HWCAP = 2, diff --git a/include/llvm/BinaryFormat/ELFRelocs/AArch64.def b/include/llvm/BinaryFormat/ELFRelocs/AArch64.def index 4afcd7d1f09..c8364133e31 100644 --- a/include/llvm/BinaryFormat/ELFRelocs/AArch64.def +++ b/include/llvm/BinaryFormat/ELFRelocs/AArch64.def @@ -124,8 +124,11 @@ ELF_RELOC(R_AARCH64_COPY, 0x400) ELF_RELOC(R_AARCH64_GLOB_DAT, 0x401) ELF_RELOC(R_AARCH64_JUMP_SLOT, 0x402) ELF_RELOC(R_AARCH64_RELATIVE, 0x403) -ELF_RELOC(R_AARCH64_TLS_DTPREL64, 0x404) -ELF_RELOC(R_AARCH64_TLS_DTPMOD64, 0x405) +// 0x404 and 0x405 are now R_AARCH64_TLS_IMPDEF1 and R_AARCH64_TLS_IMPDEF2 +// We follow GNU and define TLS_IMPDEF1 as TLS_DTPMOD64 and TLS_IMPDEF2 as +// TLS_DTPREL64 +ELF_RELOC(R_AARCH64_TLS_DTPMOD64, 0x404) +ELF_RELOC(R_AARCH64_TLS_DTPREL64, 0x405) ELF_RELOC(R_AARCH64_TLS_TPREL64, 0x406) ELF_RELOC(R_AARCH64_TLSDESC, 0x407) ELF_RELOC(R_AARCH64_IRELATIVE, 0x408) diff --git a/include/llvm/BinaryFormat/MachO.h b/include/llvm/BinaryFormat/MachO.h index a01393a3b30..fb50e549cb9 100644 --- a/include/llvm/BinaryFormat/MachO.h +++ b/include/llvm/BinaryFormat/MachO.h @@ -581,6 +581,11 @@ struct section_64 { uint32_t reserved3; }; +inline bool isVirtualSection(uint8_t type) { + return (type == MachO::S_ZEROFILL || type == MachO::S_GB_ZEROFILL || + type == MachO::S_THREAD_LOCAL_ZEROFILL); +} + struct fvmlib { uint32_t name; uint32_t minor_version; diff --git a/include/llvm/BinaryFormat/Magic.h b/include/llvm/BinaryFormat/Magic.h index cd9833ec4d2..64c687262f4 100644 --- a/include/llvm/BinaryFormat/Magic.h +++ b/include/llvm/BinaryFormat/Magic.h @@ -49,6 +49,7 @@ struct file_magic { xcoff_object_64, ///< 64-bit XCOFF object file wasm_object, ///< WebAssembly Object file pdb, ///< Windows PDB debug info file + tapi_file, ///< Text-based Dynamic Library Stub file }; bool is_object() const { return V != unknown; } diff --git a/include/llvm/BinaryFormat/Minidump.h b/include/llvm/BinaryFormat/Minidump.h index 65c17d1eb00..89cd779951c 100644 --- a/include/llvm/BinaryFormat/Minidump.h +++ b/include/llvm/BinaryFormat/Minidump.h @@ -18,12 +18,15 @@ #ifndef LLVM_BINARYFORMAT_MINIDUMP_H #define LLVM_BINARYFORMAT_MINIDUMP_H +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/Support/Endian.h" namespace llvm { namespace minidump { +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); + /// The minidump header is the first part of a minidump file. It identifies the /// file as a minidump file, and gives the location of the stream directory. struct Header { @@ -67,6 +70,50 @@ struct MemoryDescriptor { }; static_assert(sizeof(MemoryDescriptor) == 16, ""); +struct MemoryInfoListHeader { + support::ulittle32_t SizeOfHeader; + support::ulittle32_t SizeOfEntry; + support::ulittle64_t NumberOfEntries; + + MemoryInfoListHeader() = default; + MemoryInfoListHeader(uint32_t SizeOfHeader, uint32_t SizeOfEntry, + uint64_t NumberOfEntries) + : SizeOfHeader(SizeOfHeader), SizeOfEntry(SizeOfEntry), + NumberOfEntries(NumberOfEntries) {} +}; +static_assert(sizeof(MemoryInfoListHeader) == 16, ""); + +enum class MemoryProtection : uint32_t { +#define HANDLE_MDMP_PROTECT(CODE, NAME, NATIVENAME) NAME = CODE, +#include "llvm/BinaryFormat/MinidumpConstants.def" + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/0xffffffffu), +}; + +enum class MemoryState : uint32_t { +#define HANDLE_MDMP_MEMSTATE(CODE, NAME, NATIVENAME) NAME = CODE, +#include "llvm/BinaryFormat/MinidumpConstants.def" + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/0xffffffffu), +}; + +enum class MemoryType : uint32_t { +#define HANDLE_MDMP_MEMTYPE(CODE, NAME, NATIVENAME) NAME = CODE, +#include "llvm/BinaryFormat/MinidumpConstants.def" + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/0xffffffffu), +}; + +struct MemoryInfo { + support::ulittle64_t BaseAddress; + support::ulittle64_t AllocationBase; + support::little_t AllocationProtect; + support::ulittle32_t Reserved0; + support::ulittle64_t RegionSize; + support::little_t State; + support::little_t Protect; + support::little_t Type; + support::ulittle32_t Reserved1; +}; +static_assert(sizeof(MemoryInfo) == 48, ""); + /// Specifies the location and type of a single stream in the minidump file. The /// minidump stream directory is an array of entries of this type, with its size /// given by Header.NumberOfStreams. @@ -180,6 +227,27 @@ struct Thread { }; static_assert(sizeof(Thread) == 48, ""); +struct Exception { + static constexpr size_t MaxParameters = 15; + + support::ulittle32_t ExceptionCode; + support::ulittle32_t ExceptionFlags; + support::ulittle64_t ExceptionRecord; + support::ulittle64_t ExceptionAddress; + support::ulittle32_t NumberParameters; + support::ulittle32_t UnusedAlignment; + support::ulittle64_t ExceptionInformation[MaxParameters]; +}; +static_assert(sizeof(Exception) == 152, ""); + +struct ExceptionStream { + support::ulittle32_t ThreadId; + support::ulittle32_t UnusedAlignment; + Exception ExceptionRecord; + LocationDescriptor ThreadContext; +}; +static_assert(sizeof(ExceptionStream) == 168, ""); + } // namespace minidump template <> struct DenseMapInfo { diff --git a/include/llvm/BinaryFormat/MinidumpConstants.def b/include/llvm/BinaryFormat/MinidumpConstants.def index d4f13dd9921..aeef399af7a 100644 --- a/include/llvm/BinaryFormat/MinidumpConstants.def +++ b/include/llvm/BinaryFormat/MinidumpConstants.def @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#if !(defined HANDLE_MDMP_STREAM_TYPE || defined HANDLE_MDMP_ARCH || \ - defined HANDLE_MDMP_PLATFORM) +#if !(defined(HANDLE_MDMP_STREAM_TYPE) || defined(HANDLE_MDMP_ARCH) || \ + defined(HANDLE_MDMP_PLATFORM) || defined(HANDLE_MDMP_PROTECT) || \ + defined(HANDLE_MDMP_MEMSTATE) || defined(HANDLE_MDMP_MEMTYPE)) #error "Missing HANDLE_MDMP definition" #endif @@ -23,6 +24,18 @@ #define HANDLE_MDMP_PLATFORM(CODE, NAME) #endif +#ifndef HANDLE_MDMP_PROTECT +#define HANDLE_MDMP_PROTECT(CODE, NAME, NATIVENAME) +#endif + +#ifndef HANDLE_MDMP_MEMSTATE +#define HANDLE_MDMP_MEMSTATE(CODE, NAME, NATIVENAME) +#endif + +#ifndef HANDLE_MDMP_MEMTYPE +#define HANDLE_MDMP_MEMTYPE(CODE, NAME, NATIVENAME) +#endif + HANDLE_MDMP_STREAM_TYPE(0x0003, ThreadList) HANDLE_MDMP_STREAM_TYPE(0x0004, ModuleList) HANDLE_MDMP_STREAM_TYPE(0x0005, MemoryList) @@ -102,6 +115,30 @@ HANDLE_MDMP_PLATFORM(0x8203, Android) // Android HANDLE_MDMP_PLATFORM(0x8204, PS3) // PS3 HANDLE_MDMP_PLATFORM(0x8205, NaCl) // Native Client (NaCl) +HANDLE_MDMP_PROTECT(0x01, NoAccess, PAGE_NO_ACCESS) +HANDLE_MDMP_PROTECT(0x02, ReadOnly, PAGE_READ_ONLY) +HANDLE_MDMP_PROTECT(0x04, ReadWrite, PAGE_READ_WRITE) +HANDLE_MDMP_PROTECT(0x08, WriteCopy, PAGE_WRITE_COPY) +HANDLE_MDMP_PROTECT(0x10, Execute, PAGE_EXECUTE) +HANDLE_MDMP_PROTECT(0x20, ExecuteRead, PAGE_EXECUTE_READ) +HANDLE_MDMP_PROTECT(0x40, ExecuteReadWrite, PAGE_EXECUTE_READ_WRITE) +HANDLE_MDMP_PROTECT(0x80, ExeciteWriteCopy, PAGE_EXECUTE_WRITE_COPY) +HANDLE_MDMP_PROTECT(0x100, Guard, PAGE_GUARD) +HANDLE_MDMP_PROTECT(0x200, NoCache, PAGE_NOCACHE) +HANDLE_MDMP_PROTECT(0x400, WriteCombine, PAGE_WRITECOMBINE) +HANDLE_MDMP_PROTECT(0x40000000, TargetsInvalid, PAGE_TARGETS_INVALID) + +HANDLE_MDMP_MEMSTATE(0x01000, Commit, MEM_COMMIT) +HANDLE_MDMP_MEMSTATE(0x02000, Reserve, MEM_RESERVE) +HANDLE_MDMP_MEMSTATE(0x10000, Free, MEM_FREE) + +HANDLE_MDMP_MEMTYPE(0x0020000, Private, MEM_PRIVATE) +HANDLE_MDMP_MEMTYPE(0x0040000, Mapped, MEM_MAPPED) +HANDLE_MDMP_MEMTYPE(0x1000000, Image, MEM_IMAGE) + #undef HANDLE_MDMP_STREAM_TYPE #undef HANDLE_MDMP_ARCH #undef HANDLE_MDMP_PLATFORM +#undef HANDLE_MDMP_PROTECT +#undef HANDLE_MDMP_MEMSTATE +#undef HANDLE_MDMP_MEMTYPE diff --git a/include/llvm/BinaryFormat/Wasm.h b/include/llvm/BinaryFormat/Wasm.h index 0f22bfe610c..f550d880f68 100644 --- a/include/llvm/BinaryFormat/Wasm.h +++ b/include/llvm/BinaryFormat/Wasm.h @@ -16,6 +16,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" namespace llvm { namespace wasm { @@ -251,9 +252,21 @@ enum : unsigned { WASM_OPCODE_F32_CONST = 0x43, WASM_OPCODE_F64_CONST = 0x44, WASM_OPCODE_I32_ADD = 0x6a, +}; + +// Opcodes used in synthetic functions. +enum : unsigned { + WASM_OPCODE_IF = 0x04, + WASM_OPCODE_ELSE = 0x05, + WASM_OPCODE_DROP = 0x1a, WASM_OPCODE_MISC_PREFIX = 0xfc, WASM_OPCODE_MEMORY_INIT = 0x08, WASM_OPCODE_DATA_DROP = 0x09, + WASM_OPCODE_ATOMICS_PREFIX = 0xfe, + WASM_OPCODE_ATOMIC_NOTIFY = 0x00, + WASM_OPCODE_I32_ATOMIC_WAIT = 0x01, + WASM_OPCODE_I32_ATOMIC_STORE = 0x17, + WASM_OPCODE_I32_RMW_CMPXCHG = 0x48, }; enum : unsigned { @@ -318,6 +331,7 @@ const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN = 0x4; const unsigned WASM_SYMBOL_UNDEFINED = 0x10; const unsigned WASM_SYMBOL_EXPORTED = 0x20; const unsigned WASM_SYMBOL_EXPLICIT_NAME = 0x40; +const unsigned WASM_SYMBOL_NO_STRIP = 0x80; #define WASM_RELOC(name, value) name = value, diff --git a/include/llvm/BinaryFormat/XCOFF.h b/include/llvm/BinaryFormat/XCOFF.h index 7774ab3ed24..20a0f446272 100644 --- a/include/llvm/BinaryFormat/XCOFF.h +++ b/include/llvm/BinaryFormat/XCOFF.h @@ -19,12 +19,13 @@ namespace llvm { namespace XCOFF { // Constants used in the XCOFF definition. -enum { SectionNameSize = 8, SymbolNameSize = 8 }; +enum { FileNamePadSize = 6, NameSize = 8, SymbolTableEntrySize = 18 }; + enum ReservedSectionNum { N_DEBUG = -2, N_ABS = -1, N_UNDEF = 0 }; // x_smclas field of x_csect from system header: /usr/include/syms.h /// Storage Mapping Class definitions. -enum StorageMappingClass { +enum StorageMappingClass : uint8_t { // READ ONLY CLASSES XMC_PR = 0, ///< Program Code XMC_RO = 1, ///< Read Only Constant @@ -139,6 +140,117 @@ enum StorageClass : uint8_t { C_TCSYM = 134 // Reserved }; +enum SymbolType { + XTY_ER = 0, ///< External reference. + XTY_SD = 1, ///< Csect definition for initialized storage. + XTY_LD = 2, ///< Label definition. + ///< Defines an entry point to an initialized csect. + XTY_CM = 3 ///< Common csect definition. For uninitialized storage. +}; + +// Relocation types, defined in `/usr/include/reloc.h`. +enum RelocationType : uint8_t { + R_POS = 0x00, ///< Positive relocation. Provides the address of the referenced + ///< symbol. + R_RL = 0x0c, ///< Positive indirect load relocation. Modifiable instruction. + R_RLA = 0x0d, ///< Positive load address relocation. Modifiable instruction. + + R_NEG = 0x01, ///< Negative relocation. Provides the negative of the address + ///< of the referenced symbol. + R_REL = 0x02, ///< Relative to self relocation. Provides a displacement value + ///< between the address of the referenced symbol and the + ///< address being relocated. + + R_TOC = 0x03, ///< Relative to the TOC relocation. Provides a displacement + ///< that is the difference between the address of the + ///< referenced symbol and the TOC anchor csect. + R_TRL = 0x12, ///< TOC relative indirect load relocation. Similar to R_TOC, + ///< but not modifiable instruction. + + R_TRLA = + 0x13, ///< Relative to the TOC or to the thread-local storage base + ///< relocation. Compilers are not permitted to generate this + ///< relocation type. It is the result of a reversible + ///< transformation by the linker of an R_TOC relation that turned a + ///< load instruction into an add-immediate instruction. + + R_GL = 0x05, ///< Global linkage-external TOC address relocation. Provides the + ///< address of the external TOC associated with a defined + ///< external symbol. + R_TCL = 0x06, ///< Local object TOC address relocation. Provides the address + ///< of the local TOC entry of a defined external symbol. + + R_REF = 0x0f, ///< A non-relocating relocation. Used to prevent the binder + ///< from garbage collecting a csect (such as code used for + ///< dynamic initialization of non-local statics) for which + ///< another csect has an implicit dependency. + + R_BA = 0x08, ///< Branch absolute relocation. Provides the address of the + ///< referenced symbol. References a non-modifiable instruction. + R_BR = 0x0a, ///< Branch relative to self relocation. Provides the + ///< displacement that is the difference between the address of + ///< the referenced symbol and the address of the referenced + ///< branch instruction. References a non-modifiable instruction. + R_RBA = 0x18, ///< Branch absolute relocation. Similar to R_BA but + ///< references a modifiable instruction. + R_RBR = 0x1a, ///< Branch relative to self relocation. Similar to the R_BR + ///< relocation type, but references a modifiable instruction. + + R_TLS = 0x20, ///< General-dynamic reference to TLS symbol. + R_TLS_IE = 0x21, ///< Initial-exec reference to TLS symbol. + R_TLS_LD = 0x22, ///< Local-dynamic reference to TLS symbol. + R_TLS_LE = 0x23, ///< Local-exec reference to TLS symbol. + R_TLSM = 0x24, ///< Module reference to TLS. Provides a handle for the module + ///< containing the referenced symbol. + R_TLSML = 0x25, ///< Module reference to the local TLS storage. + + R_TOCU = 0x30, ///< Relative to TOC upper. Specifies the high-order 16 bits of + ///< a large code model TOC-relative relocation. + R_TOCL = 0x31 ///< Relative to TOC lower. Specifies the low-order 16 bits of a + ///< large code model TOC-relative relocation. +}; + +struct FileHeader32 { + uint16_t Magic; + uint16_t NumberOfSections; + int32_t TimeStamp; + uint32_t SymbolTableFileOffset; + int32_t NumberOfSymbolTableEntries; + uint16_t AuxiliaryHeaderSize; + uint16_t Flags; +}; + +struct SectionHeader32 { + char Name[XCOFF::NameSize]; + uint32_t PhysicalAddress; + uint32_t VirtualAddress; + uint32_t Size; + uint32_t FileOffsetToData; + uint32_t FileOffsetToRelocations; + uint32_t FileOffsetToLineNumbers; + uint16_t NumberOfRelocations; + uint16_t NumberOfLineNumbers; + int32_t Flags; +}; + +enum CFileStringType : uint8_t { + XFT_FN = 0, ///< Specifies the source-file name. + XFT_CT = 1, ///< Specifies the compiler time stamp. + XFT_CV = 2, ///< Specifies the compiler version number. + XFT_CD = 128 ///< Specifies compiler-defined information. +}; + +enum CFileLangId : uint8_t { + TB_C = 0, ///< C language. + TB_CPLUSPLUS = 9 ///< C++ language. +}; + +enum CFileCpuId : uint8_t { + TCPU_PPC64 = 2, ///< PowerPC common architecture 64-bit mode. + TCPU_COM = 3, ///< POWER and PowerPC architecture common. + TCPU_970 = 19 ///< PPC970 - PowerPC 64-bit architecture. +}; + } // end namespace XCOFF } // end namespace llvm diff --git a/include/llvm/Bitcode/BitcodeAnalyzer.h b/include/llvm/Bitcode/BitcodeAnalyzer.h index cfdebd6fe6c..5fb8bb26f25 100644 --- a/include/llvm/Bitcode/BitcodeAnalyzer.h +++ b/include/llvm/Bitcode/BitcodeAnalyzer.h @@ -30,6 +30,7 @@ enum CurStreamTypeType { LLVMIRBitstream, ClangSerializedASTBitstream, ClangSerializedDiagnosticsBitstream, + LLVMBitstreamRemarks }; struct BCDumpOptions { diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index decd4dd3a96..1a397068caf 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -391,7 +391,7 @@ enum CastOpcodes { /// have no fixed relation to the LLVM IR enum values. Changing these will /// break compatibility with old files. enum UnaryOpcodes { - UNOP_NEG = 0 + UNOP_FNEG = 0 }; /// BinaryOpcodes - These are values used in the bitcode files to encode which diff --git a/include/llvm/Bitstream/BitCodes.h b/include/llvm/Bitstream/BitCodes.h index adf54ba9639..41a3de3b20e 100644 --- a/include/llvm/Bitstream/BitCodes.h +++ b/include/llvm/Bitstream/BitCodes.h @@ -168,6 +168,11 @@ class BitCodeAbbrev { SmallVector OperandList; public: + BitCodeAbbrev() = default; + + explicit BitCodeAbbrev(std::initializer_list OperandList) + : OperandList(OperandList) {} + unsigned getNumOperandInfos() const { return static_cast(OperandList.size()); } diff --git a/include/llvm/Bitstream/BitstreamReader.h b/include/llvm/Bitstream/BitstreamReader.h index ee82e7ec1ba..b49a969a2d8 100644 --- a/include/llvm/Bitstream/BitstreamReader.h +++ b/include/llvm/Bitstream/BitstreamReader.h @@ -379,6 +379,7 @@ public: using SimpleBitstreamCursor::ReadVBR; using SimpleBitstreamCursor::ReadVBR64; using SimpleBitstreamCursor::SizeInBytes; + using SimpleBitstreamCursor::skipToEnd; /// Return the number of bits used to encode an abbrev #. unsigned getAbbrevIDWidth() const { return CurCodeSize; } diff --git a/include/llvm/CodeGen/AccelTable.h b/include/llvm/CodeGen/AccelTable.h index 734531a65d5..f8f6b5448f3 100644 --- a/include/llvm/CodeGen/AccelTable.h +++ b/include/llvm/CodeGen/AccelTable.h @@ -101,8 +101,6 @@ /// /// An Apple Accelerator Table can be serialized by calling emitAppleAccelTable /// function. -/// -/// TODO: Add DWARF v5 emission code. namespace llvm { diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index d110f8b01cb..a4580da5aec 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -111,6 +111,10 @@ public: /// of each call to runOnMachineFunction(). MCSymbol *CurrentFnSym = nullptr; + /// The symbol for the current function descriptor on AIX. This is created + /// at the beginning of each call to SetupMachineFunction(). + MCSymbol *CurrentFnDescSym = nullptr; + /// The symbol used to represent the start of the current function for the /// purpose of calculating its size (e.g. using the .size directive). By /// default, this is equal to CurrentFnSym. @@ -304,7 +308,7 @@ public: /// This should be called when a new MachineFunction is being processed from /// runOnMachineFunction. - void SetupMachineFunction(MachineFunction &MF); + virtual void SetupMachineFunction(MachineFunction &MF); /// This method emits the body and trailer for a function. void EmitFunctionBody(); @@ -342,12 +346,11 @@ public: /// so, emit it and return true, otherwise do nothing and return false. bool EmitSpecialLLVMGlobal(const GlobalVariable *GV); - /// Emit an alignment directive to the specified power of two boundary. For - /// example, if you pass in 3 here, you will get an 8 byte alignment. If a + /// Emit an alignment directive to the specified power of two boundary. If a /// global value is specified, and if that global has an explicit alignment /// requested, it will override the alignment request if required for /// correctness. - void EmitAlignment(unsigned NumBits, const GlobalObject *GV = nullptr) const; + void EmitAlignment(Align Alignment, const GlobalObject *GV = nullptr) const; /// Lower the specified LLVM Constant to an MCExpr. virtual const MCExpr *lowerConstant(const Constant *CV); @@ -400,7 +403,7 @@ public: /// By default, this method prints the label for the specified /// MachineBasicBlock, an alignment (if present) and a comment describing it /// if appropriate. - virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB) const; + virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB); /// Targets can override this to emit stuff at the end of a basic block. virtual void EmitBasicBlockEnd(const MachineBasicBlock &MBB); @@ -415,6 +418,10 @@ public: virtual void EmitFunctionEntryLabel(); + virtual void EmitFunctionDescriptor() { + llvm_unreachable("Function descriptor is target-specific."); + } + virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV); /// Targets can override this to change how global constants that are part of @@ -635,6 +642,10 @@ public: /// supported by the target. void EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const; + /// Return the alignment for the specified \p GV. + static Align getGVAlignment(const GlobalValue *GV, const DataLayout &DL, + Align InAlign = Align::None()); + private: /// Private state for PrintSpecial() // Assign a unique ID to this machine instruction. diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index 70bf670fdf0..2e57b4c9d33 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -190,6 +190,7 @@ private: protected: explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL) : BaseT(DL) {} + virtual ~BasicTTIImplBase() = default; using TargetTransformInfoImplBase::DL; @@ -215,6 +216,16 @@ public: return -1; } + bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, + Intrinsic::ID IID) const { + return false; + } + + bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, + Value *OldV, Value *NewV) const { + return false; + } + bool isLegalAddImmediate(int64_t imm) { return getTLI()->isLegalAddImmediate(imm); } @@ -317,7 +328,7 @@ public: unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize) { /// Try to find the estimated number of clusters. Note that the number of - /// clusters identified in this function could be different from the actural + /// clusters identified in this function could be different from the actual /// numbers found in lowering. This function ignore switches that are /// lowered with a mix of jump table / bit test / BTree. This function was /// initially intended to be used when estimating the cost of switch in @@ -371,10 +382,6 @@ public: return N; } - unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); } - - unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); } - bool shouldBuildLookupTables() { const TargetLoweringBase *TLI = getTLI(); return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || @@ -508,13 +515,44 @@ public: return BaseT::getInstructionLatency(I); } + virtual Optional + getCacheSize(TargetTransformInfo::CacheLevel Level) const { + return Optional( + getST()->getCacheSize(static_cast(Level))); + } + + virtual Optional + getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { + Optional TargetResult = + getST()->getCacheAssociativity(static_cast(Level)); + + if (TargetResult) + return TargetResult; + + return BaseT::getCacheAssociativity(Level); + } + + virtual unsigned getCacheLineSize() const { + return getST()->getCacheLineSize(); + } + + virtual unsigned getPrefetchDistance() const { + return getST()->getPrefetchDistance(); + } + + virtual unsigned getMinPrefetchStride() const { + return getST()->getMinPrefetchStride(); + } + + virtual unsigned getMaxPrefetchIterationsAhead() const { + return getST()->getMaxPrefetchIterationsAhead(); + } + /// @} /// \name Vector TTI Implementations /// @{ - unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; } - unsigned getRegisterBitWidth(bool Vector) const { return 32; } /// Estimate the overhead of scalarizing an instruction. Insert and Extract @@ -1111,9 +1149,7 @@ public: OpPropsBW); // For non-rotates (X != Y) we must add shift-by-zero handling costs. if (X != Y) { - Type *CondTy = Type::getInt1Ty(RetTy->getContext()); - if (RetVF > 1) - CondTy = VectorType::get(CondTy, RetVF); + Type *CondTy = RetTy->getWithNewBitWidth(1); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, nullptr); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, @@ -1131,7 +1167,6 @@ public: unsigned getIntrinsicInstrCost( Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed = std::numeric_limits::max()) { - unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1); auto *ConcreteTTI = static_cast(this); SmallVector ISDs; @@ -1288,9 +1323,7 @@ public: /*IsUnsigned=*/false); case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { - Type *CondTy = Type::getInt1Ty(RetTy->getContext()); - if (RetVF > 1) - CondTy = VectorType::get(CondTy, RetVF); + Type *CondTy = RetTy->getWithNewBitWidth(1); Type *OpTy = StructType::create({RetTy, CondTy}); Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat @@ -1310,9 +1343,7 @@ public: } case Intrinsic::uadd_sat: case Intrinsic::usub_sat: { - Type *CondTy = Type::getInt1Ty(RetTy->getContext()); - if (RetVF > 1) - CondTy = VectorType::get(CondTy, RetVF); + Type *CondTy = RetTy->getWithNewBitWidth(1); Type *OpTy = StructType::create({RetTy, CondTy}); Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat @@ -1329,9 +1360,7 @@ public: case Intrinsic::smul_fix: case Intrinsic::umul_fix: { unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; - Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); - if (RetVF > 1) - ExtTy = VectorType::get(ExtTy, RetVF); + Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); unsigned ExtOp = IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; @@ -1395,9 +1424,7 @@ public: Type *MulTy = RetTy->getContainedType(0); Type *OverflowTy = RetTy->getContainedType(1); unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; - Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); - if (MulTy->isVectorTy()) - ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() ); + Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); unsigned ExtOp = IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h index aa339e1cc91..a30ca638ee6 100644 --- a/include/llvm/CodeGen/CallingConvLower.h +++ b/include/llvm/CodeGen/CallingConvLower.h @@ -20,6 +20,7 @@ #include "llvm/CodeGen/TargetCallingConv.h" #include "llvm/IR/CallingConv.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Alignment.h" namespace llvm { @@ -43,6 +44,7 @@ public: AExtUpper, // The value is in the upper bits of the location and should be // extended with undefined upper bits when retrieved. BCvt, // The value is bit-converted in the location. + Trunc, // The value is truncated in the location. VExt, // The value is vector-widened in the location. // FIXME: Not implemented yet. Code that uses AExt to mean // vector-widen should be fixed to use VExt instead. @@ -197,7 +199,7 @@ private: LLVMContext &Context; unsigned StackOffset; - unsigned MaxStackArgAlign; + Align MaxStackArgAlign; SmallVector UsedRegs; SmallVector PendingLocs; SmallVector PendingArgFlags; @@ -421,19 +423,19 @@ public: /// AllocateStack - Allocate a chunk of stack space with the specified size /// and alignment. - unsigned AllocateStack(unsigned Size, unsigned Align) { - assert(Align && ((Align - 1) & Align) == 0); // Align is power of 2. - StackOffset = alignTo(StackOffset, Align); + unsigned AllocateStack(unsigned Size, unsigned Alignment) { + const Align CheckedAlignment(Alignment); + StackOffset = alignTo(StackOffset, CheckedAlignment); unsigned Result = StackOffset; StackOffset += Size; - MaxStackArgAlign = std::max(Align, MaxStackArgAlign); - ensureMaxAlignment(Align); + MaxStackArgAlign = std::max(CheckedAlignment, MaxStackArgAlign); + ensureMaxAlignment(CheckedAlignment); return Result; } - void ensureMaxAlignment(unsigned Align) { + void ensureMaxAlignment(Align Alignment) { if (!AnalyzingMustTailForwardedRegs) - MF.getFrameInfo().ensureMaxAlignment(Align); + MF.getFrameInfo().ensureMaxAlignment(Alignment.value()); } /// Version of AllocateStack with extra register to be shadowed. diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h index cf58ee0cabe..705465b15c4 100644 --- a/include/llvm/CodeGen/DFAPacketizer.h +++ b/include/llvm/CodeGen/DFAPacketizer.h @@ -28,6 +28,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" +#include "llvm/Support/Automaton.h" #include #include #include @@ -76,26 +77,26 @@ using DFAStateInput = int64_t; class DFAPacketizer { private: - using UnsignPair = std::pair; - const InstrItineraryData *InstrItins; - int CurrentState = 0; - const DFAStateInput (*DFAStateInputTable)[2]; - const unsigned *DFAStateEntryTable; - - // CachedTable is a map from to ToState. - DenseMap CachedTable; - - // Read the DFA transition table and update CachedTable. - void ReadTable(unsigned state); + Automaton A; public: - DFAPacketizer(const InstrItineraryData *I, const DFAStateInput (*SIT)[2], - const unsigned *SET); + DFAPacketizer(const InstrItineraryData *InstrItins, Automaton a) : + InstrItins(InstrItins), A(std::move(a)) { + // Start off with resource tracking disabled. + A.enableTranscription(false); + } // Reset the current state to make all resources available. void clearResources() { - CurrentState = 0; + A.reset(); + } + + // Set whether this packetizer should track not just whether instructions + // can be packetized, but also which functional units each instruction ends up + // using after packetization. + void setTrackResources(bool Track) { + A.enableTranscription(Track); } // Return the DFAInput for an instruction class. @@ -120,6 +121,15 @@ public: // current state to reflect that change. void reserveResources(MachineInstr &MI); + // Return the resources used by the InstIdx'th instruction added to this + // packet. The resources are returned as a bitvector of functional units. + // + // Note that a bundle may be packed in multiple valid ways. This function + // returns one arbitary valid packing. + // + // Requires setTrackResources(true) to have been called. + unsigned getUsedResources(unsigned InstIdx); + const InstrItineraryData *getInstrItins() const { return InstrItins; } }; @@ -134,7 +144,7 @@ class VLIWPacketizerList { protected: MachineFunction &MF; const TargetInstrInfo *TII; - AliasAnalysis *AA; + AAResults *AA; // The VLIW Scheduler. DefaultVLIWScheduler *VLIWScheduler; @@ -146,9 +156,9 @@ protected: std::map MIToSUnit; public: - // The AliasAnalysis parameter can be nullptr. + // The AAResults parameter can be nullptr. VLIWPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, - AliasAnalysis *AA); + AAResults *AA); virtual ~VLIWPacketizerList(); diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h index 684f9e40ca5..e8e7504a6cd 100644 --- a/include/llvm/CodeGen/DIE.h +++ b/include/llvm/CodeGen/DIE.h @@ -550,6 +550,14 @@ public: return *static_cast(Last ? Last->Next.getPointer() : nullptr); } + void takeNodes(IntrusiveBackList &Other) { + for (auto &N : Other) { + N.Next.setPointerAndInt(&N, true); + push_back(N); + } + Other.Last = nullptr; + } + class const_iterator; class iterator : public iterator_facade_base { @@ -685,6 +693,10 @@ public: return addValue(Alloc, DIEValue(Attribute, Form, std::forward(Value))); } + /// Take ownership of the nodes in \p Other, and append them to the back of + /// the list. + void takeValues(DIEValueList &Other) { List.takeNodes(Other.List); } + value_range values() { return make_range(value_iterator(List.begin()), value_iterator(List.end())); } diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h index f09b59daf4d..03d681feb7a 100644 --- a/include/llvm/CodeGen/FastISel.h +++ b/include/llvm/CodeGen/FastISel.h @@ -93,9 +93,9 @@ public: SmallVector OutVals; SmallVector OutFlags; - SmallVector OutRegs; + SmallVector OutRegs; SmallVector Ins; - SmallVector InRegs; + SmallVector InRegs; CallLoweringInfo() : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h index fb60191abd3..f812a2f6c58 100644 --- a/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -20,7 +20,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -37,6 +36,7 @@ namespace llvm { class Argument; class BasicBlock; class BranchProbabilityInfo; +class LegacyDivergenceAnalysis; class Function; class Instruction; class MachineFunction; diff --git a/include/llvm/CodeGen/GlobalISel/CallLowering.h b/include/llvm/CodeGen/GlobalISel/CallLowering.h index d717121ad78..4901a3748e4 100644 --- a/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -45,18 +45,62 @@ class CallLowering { public: struct ArgInfo { SmallVector Regs; + // If the argument had to be split into multiple parts according to the + // target calling convention, then this contains the original vregs + // if the argument was an incoming arg. + SmallVector OrigRegs; Type *Ty; - ISD::ArgFlagsTy Flags; + SmallVector Flags; bool IsFixed; ArgInfo(ArrayRef Regs, Type *Ty, - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy{}, bool IsFixed = true) - : Regs(Regs.begin(), Regs.end()), Ty(Ty), Flags(Flags), - IsFixed(IsFixed) { + ArrayRef Flags = ArrayRef(), + bool IsFixed = true) + : Regs(Regs.begin(), Regs.end()), Ty(Ty), + Flags(Flags.begin(), Flags.end()), IsFixed(IsFixed) { + if (!Regs.empty() && Flags.empty()) + this->Flags.push_back(ISD::ArgFlagsTy()); // FIXME: We should have just one way of saying "no register". assert((Ty->isVoidTy() == (Regs.empty() || Regs[0] == 0)) && "only void types should have no register"); } + + ArgInfo() : Ty(nullptr), IsFixed(false) {} + }; + + struct CallLoweringInfo { + /// Calling convention to be used for the call. + CallingConv::ID CallConv = CallingConv::C; + + /// Destination of the call. It should be either a register, globaladdress, + /// or externalsymbol. + MachineOperand Callee = MachineOperand::CreateImm(0); + + /// Descriptor for the return type of the function. + ArgInfo OrigRet; + + /// List of descriptors of the arguments passed to the function. + SmallVector OrigArgs; + + /// Valid if the call has a swifterror inout parameter, and contains the + /// vreg that the swifterror should be copied into after the call. + Register SwiftErrorVReg = 0; + + MDNode *KnownCallees = nullptr; + + /// True if the call must be tail call optimized. + bool IsMustTailCall = false; + + /// True if the call passes all target-independent checks for tail call + /// optimization. + bool IsTailCall = false; + + /// True if the call was lowered as a tail call. This is consumed by the + /// legalizer. This allows the legalizer to lower libcalls as tail calls. + bool LoweredTailCall = false; + + /// True if the call is to a vararg function. + bool IsVarArg = false; }; /// Argument handling is mostly uniform between the four places that @@ -72,9 +116,9 @@ public: virtual ~ValueHandler() = default; - /// Returns true if the handler is dealing with formal arguments, - /// not with return values etc. - virtual bool isArgumentHandler() const { return false; } + /// Returns true if the handler is dealing with incoming arguments, + /// i.e. those that move values from some physical location to vregs. + virtual bool isIncomingArgumentHandler() const = 0; /// Materialize a VReg containing the address of the specified /// stack-based object. This is either based on a FrameIndex or @@ -112,8 +156,8 @@ public: virtual bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, const ArgInfo &Info, - CCState &State) { - return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); + ISD::ArgFlagsTy Flags, CCState &State) { + return AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); } MachineIRBuilder &MIRBuilder; @@ -162,12 +206,42 @@ protected: /// \p Callback to move them to the assigned locations. /// /// \return True if everything has succeeded, false otherwise. - bool handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef Args, + bool handleAssignments(MachineIRBuilder &MIRBuilder, + SmallVectorImpl &Args, ValueHandler &Handler) const; bool handleAssignments(CCState &CCState, SmallVectorImpl &ArgLocs, - MachineIRBuilder &MIRBuilder, ArrayRef Args, + MachineIRBuilder &MIRBuilder, + SmallVectorImpl &Args, ValueHandler &Handler) const; + + /// Analyze passed or returned values from a call, supplied in \p ArgInfo, + /// incorporating info about the passed values into \p CCState. + /// + /// Used to check if arguments are suitable for tail call lowering. + bool analyzeArgInfo(CCState &CCState, SmallVectorImpl &Args, + CCAssignFn &AssignFnFixed, + CCAssignFn &AssignFnVarArg) const; + + /// \returns True if the calling convention for a callee and its caller pass + /// results in the same way. Typically used for tail call eligibility checks. + /// + /// \p Info is the CallLoweringInfo for the call. + /// \p MF is the MachineFunction for the caller. + /// \p InArgs contains the results of the call. + /// \p CalleeAssignFnFixed is the CCAssignFn to be used for the callee for + /// fixed arguments. + /// \p CalleeAssignFnVarArg is similar, but for varargs. + /// \p CallerAssignFnFixed is the CCAssignFn to be used for the caller for + /// fixed arguments. + /// \p CallerAssignFnVarArg is similar, but for varargs. + bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, + SmallVectorImpl &InArgs, + CCAssignFn &CalleeAssignFnFixed, + CCAssignFn &CalleeAssignFnVarArg, + CCAssignFn &CallerAssignFnFixed, + CCAssignFn &CallerAssignFnVarArg) const; + public: CallLowering(const TargetLowering *TLI) : TLI(TLI) {} virtual ~CallLowering() = default; @@ -223,37 +297,10 @@ public: /// This hook must be implemented to lower the given call instruction, /// including argument and return value marshalling. /// - /// \p CallConv is the calling convention to be used for the call. - /// - /// \p Callee is the destination of the call. It should be either a register, - /// globaladdress, or externalsymbol. - /// - /// \p OrigRet is a descriptor for the return type of the function. - /// - /// \p OrigArgs is a list of descriptors of the arguments passed to the - /// function. - /// - /// \p SwiftErrorVReg is non-zero if the call has a swifterror inout - /// parameter, and contains the vreg that the swifterror should be copied into - /// after the call. /// /// \return true if the lowering succeeded, false otherwise. - virtual bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, - const MachineOperand &Callee, const ArgInfo &OrigRet, - ArrayRef OrigArgs, - Register SwiftErrorVReg) const { - if (!supportSwiftError()) { - assert(SwiftErrorVReg == 0 && "trying to use unsupported swifterror"); - return lowerCall(MIRBuilder, CallConv, Callee, OrigRet, OrigArgs); - } - return false; - } - - /// This hook behaves as the extended lowerCall function, but for targets that - /// do not support swifterror value promotion. - virtual bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, - const MachineOperand &Callee, const ArgInfo &OrigRet, - ArrayRef OrigArgs) const { + virtual bool lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const { return false; } diff --git a/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 0c50c9c5e0c..4c04dc52547 100644 --- a/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -27,6 +27,8 @@ class MachineIRBuilder; class MachineRegisterInfo; class MachineInstr; class MachineOperand; +class GISelKnownBits; +class MachineDominatorTree; struct PreferredTuple { LLT Ty; // The result type of the extend. @@ -35,12 +37,17 @@ struct PreferredTuple { }; class CombinerHelper { +protected: MachineIRBuilder &Builder; MachineRegisterInfo &MRI; GISelChangeObserver &Observer; + GISelKnownBits *KB; + MachineDominatorTree *MDT; public: - CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B); + CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, + GISelKnownBits *KB = nullptr, + MachineDominatorTree *MDT = nullptr); /// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const; @@ -56,18 +63,132 @@ public: bool matchCombineCopy(MachineInstr &MI); void applyCombineCopy(MachineInstr &MI); + /// Returns true if \p DefMI precedes \p UseMI or they are the same + /// instruction. Both must be in the same basic block. + bool isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI); + + /// Returns true if \p DefMI dominates \p UseMI. By definition an + /// instruction dominates itself. + /// + /// If we haven't been provided with a MachineDominatorTree during + /// construction, this function returns a conservative result that tracks just + /// a single basic block. + bool dominates(MachineInstr &DefMI, MachineInstr &UseMI); + /// If \p MI is extend that consumes the result of a load, try to combine it. /// Returns true if MI changed. bool tryCombineExtendingLoads(MachineInstr &MI); bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo); void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo); - bool matchCombineBr(MachineInstr &MI); - bool tryCombineBr(MachineInstr &MI); + /// Combine \p MI into a pre-indexed or post-indexed load/store operation if + /// legal and the surrounding code makes it useful. + bool tryCombineIndexedLoadStore(MachineInstr &MI); + + bool matchElideBrByInvertingCond(MachineInstr &MI); + void applyElideBrByInvertingCond(MachineInstr &MI); + bool tryElideBrByInvertingCond(MachineInstr &MI); + + /// If \p MI is G_CONCAT_VECTORS, try to combine it. + /// Returns true if MI changed. + /// Right now, we support: + /// - concat_vector(undef, undef) => undef + /// - concat_vector(build_vector(A, B), build_vector(C, D)) => + /// build_vector(A, B, C, D) + /// + /// \pre MI.getOpcode() == G_CONCAT_VECTORS. + bool tryCombineConcatVectors(MachineInstr &MI); + /// Check if the G_CONCAT_VECTORS \p MI is undef or if it + /// can be flattened into a build_vector. + /// In the first case \p IsUndef will be true. + /// In the second case \p Ops will contain the operands needed + /// to produce the flattened build_vector. + /// + /// \pre MI.getOpcode() == G_CONCAT_VECTORS. + bool matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, + SmallVectorImpl &Ops); + /// Replace \p MI with a flattened build_vector with \p Ops or an + /// implicit_def if IsUndef is true. + void applyCombineConcatVectors(MachineInstr &MI, bool IsUndef, + const ArrayRef Ops); + + /// Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS. + /// Returns true if MI changed. + /// + /// \pre MI.getOpcode() == G_SHUFFLE_VECTOR. + bool tryCombineShuffleVector(MachineInstr &MI); + /// Check if the G_SHUFFLE_VECTOR \p MI can be replaced by a + /// concat_vectors. + /// \p Ops will contain the operands needed to produce the flattened + /// concat_vectors. + /// + /// \pre MI.getOpcode() == G_SHUFFLE_VECTOR. + bool matchCombineShuffleVector(MachineInstr &MI, + SmallVectorImpl &Ops); + /// Replace \p MI with a concat_vectors with \p Ops. + void applyCombineShuffleVector(MachineInstr &MI, + const ArrayRef Ops); + + /// Optimize memcpy intrinsics et al, e.g. constant len calls. + /// /p MaxLen if non-zero specifies the max length of a mem libcall to inline. + /// + /// For example (pre-indexed): + /// + /// $addr = G_GEP $base, $offset + /// [...] + /// $val = G_LOAD $addr + /// [...] + /// $whatever = COPY $addr + /// + /// --> + /// + /// $val, $addr = G_INDEXED_LOAD $base, $offset, 1 (IsPre) + /// [...] + /// $whatever = COPY $addr + /// + /// or (post-indexed): + /// + /// G_STORE $val, $base + /// [...] + /// $addr = G_GEP $base, $offset + /// [...] + /// $whatever = COPY $addr + /// + /// --> + /// + /// $addr = G_INDEXED_STORE $val, $base, $offset + /// [...] + /// $whatever = COPY $addr + bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0); /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); + +private: + // Memcpy family optimization helpers. + bool optimizeMemcpy(MachineInstr &MI, Register Dst, Register Src, + unsigned KnownLen, unsigned DstAlign, unsigned SrcAlign, + bool IsVolatile); + bool optimizeMemmove(MachineInstr &MI, Register Dst, Register Src, + unsigned KnownLen, unsigned DstAlign, unsigned SrcAlign, + bool IsVolatile); + bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val, + unsigned KnownLen, unsigned DstAlign, bool IsVolatile); + + /// Given a non-indexed load or store instruction \p MI, find an offset that + /// can be usefully and legally folded into it as a post-indexing operation. + /// + /// \returns true if a candidate is found. + bool findPostIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base, + Register &Offset); + + /// Given a non-indexed load or store instruction \p MI, find an offset that + /// can be usefully and legally folded into it as a pre-indexing operation. + /// + /// \returns true if a candidate is found. + bool findPreIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base, + Register &Offset); }; } // namespace llvm diff --git a/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/include/llvm/CodeGen/GlobalISel/CombinerInfo.h index 3b09a8e2b47..ad645a46bbe 100644 --- a/include/llvm/CodeGen/GlobalISel/CombinerInfo.h +++ b/include/llvm/CodeGen/GlobalISel/CombinerInfo.h @@ -27,9 +27,11 @@ class MachineRegisterInfo; class CombinerInfo { public: CombinerInfo(bool AllowIllegalOps, bool ShouldLegalizeIllegal, - LegalizerInfo *LInfo) + LegalizerInfo *LInfo, bool OptEnabled, bool OptSize, + bool MinSize) : IllegalOpsAllowed(AllowIllegalOps), - LegalizeIllegalOps(ShouldLegalizeIllegal), LInfo(LInfo) { + LegalizeIllegalOps(ShouldLegalizeIllegal), LInfo(LInfo), + EnableOpt(OptEnabled), EnableOptSize(OptSize), EnableMinSize(MinSize) { assert(((AllowIllegalOps || !LegalizeIllegalOps) || LInfo) && "Expecting legalizerInfo when illegalops not allowed"); } @@ -43,6 +45,15 @@ public: bool LegalizeIllegalOps; // TODO: Make use of this. const LegalizerInfo *LInfo; + /// Whether optimizations should be enabled. This is to distinguish between + /// uses of the combiner unconditionally and only when optimizations are + /// specifically enabled/ + bool EnableOpt; + /// Whether we're optimizing for size. + bool EnableOptSize; + /// Whether we're optimizing for minsize (-Oz). + bool EnableMinSize; + /// Attempt to combine instructions using MI as the root. /// /// Use Observer to report the creation, modification, and erasure of diff --git a/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h b/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h index e817d9b4550..df196bfbd43 100644 --- a/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h +++ b/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h @@ -54,6 +54,17 @@ public: return buildConstant(Dst, MaybeCst->getSExtValue()); break; } + case TargetOpcode::G_SEXT_INREG: { + assert(DstOps.size() == 1 && "Invalid dst ops"); + assert(SrcOps.size() == 2 && "Invalid src ops"); + const DstOp &Dst = DstOps[0]; + const SrcOp &Src0 = SrcOps[0]; + const SrcOp &Src1 = SrcOps[1]; + if (auto MaybeCst = + ConstantFoldExtOp(Opc, Src0.getReg(), Src1.getImm(), *getMRI())) + return buildConstant(Dst, MaybeCst->getSExtValue()); + break; + } } return MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps); } diff --git a/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h b/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h new file mode 100644 index 00000000000..dfe5a7f3177 --- /dev/null +++ b/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h @@ -0,0 +1,111 @@ +//===- llvm/CodeGen/GlobalISel/GISelKnownBits.h ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// Provides analysis for querying information about KnownBits during GISel +/// passes. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H +#define LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H + +#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/KnownBits.h" + +namespace llvm { + +class TargetLowering; +class DataLayout; + +class GISelKnownBits : public GISelChangeObserver { + MachineFunction &MF; + MachineRegisterInfo &MRI; + const TargetLowering &TL; + const DataLayout &DL; + +public: + GISelKnownBits(MachineFunction &MF); + virtual ~GISelKnownBits() = default; + void setMF(MachineFunction &MF); + virtual void computeKnownBitsImpl(Register R, KnownBits &Known, + const APInt &DemandedElts, + unsigned Depth = 0); + + // KnownBitsAPI + KnownBits getKnownBits(Register R); + // Calls getKnownBits for first operand def of MI. + KnownBits getKnownBits(MachineInstr &MI); + APInt getKnownZeroes(Register R); + APInt getKnownOnes(Register R); + + /// \return true if 'V & Mask' is known to be zero in DemandedElts. We use + /// this predicate to simplify operations downstream. + /// Mask is known to be zero for bits that V cannot have. + bool maskedValueIsZero(Register Val, const APInt &Mask) { + return Mask.isSubsetOf(getKnownBits(Val).Zero); + } + + /// \return true if the sign bit of Op is known to be zero. We use this + /// predicate to simplify operations downstream. + bool signBitIsZero(Register Op); + + // FIXME: Is this the right place for G_FRAME_INDEX? Should it be in + // TargetLowering? + void computeKnownBitsForFrameIndex(Register R, KnownBits &Known, + const APInt &DemandedElts, + unsigned Depth = 0); + static Align inferAlignmentForFrameIdx(int FrameIdx, int Offset, + const MachineFunction &MF); + static void computeKnownBitsForAlignment(KnownBits &Known, + MaybeAlign Alignment); + + // Try to infer alignment for MI. + static MaybeAlign inferPtrAlignment(const MachineInstr &MI); + + // Observer API. No-op for non-caching implementation. + void erasingInstr(MachineInstr &MI) override{}; + void createdInstr(MachineInstr &MI) override{}; + void changingInstr(MachineInstr &MI) override{}; + void changedInstr(MachineInstr &MI) override{}; + +protected: + unsigned getMaxDepth() const { return 6; } +}; + +/// To use KnownBitsInfo analysis in a pass, +/// KnownBitsInfo &Info = getAnalysis().get(MF); +/// Add to observer if the Info is caching. +/// WrapperObserver.addObserver(Info); + +/// Eventually add other features such as caching/ser/deserializing +/// to MIR etc. Those implementations can derive from GISelKnownBits +/// and override computeKnownBitsImpl. +class GISelKnownBitsAnalysis : public MachineFunctionPass { + std::unique_ptr Info; + +public: + static char ID; + GISelKnownBitsAnalysis() : MachineFunctionPass(ID) { + initializeGISelKnownBitsAnalysisPass(*PassRegistry::getPassRegistry()); + } + GISelKnownBits &get(MachineFunction &MF) { + if (!Info) + Info = std::make_unique(MF); + return *Info.get(); + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + void releaseMemory() override { Info.reset(); } +}; +} // namespace llvm + +#endif // ifdef diff --git a/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 8654ba83f08..bdb92aa4689 100644 --- a/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -213,8 +213,8 @@ private: bool translateStore(const User &U, MachineIRBuilder &MIRBuilder); /// Translate an LLVM string intrinsic (memcpy, memset, ...). - bool translateMemfunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, - unsigned ID); + bool translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, + Intrinsic::ID ID); void getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder); @@ -243,6 +243,10 @@ private: bool valueIsSplit(const Value &V, SmallVectorImpl *Offsets = nullptr); + /// Common code for translating normal calls or invokes. + bool translateCallSite(const ImmutableCallSite &CS, + MachineIRBuilder &MIRBuilder); + /// Translate call instruction. /// \pre \p U is a call instruction. bool translateCall(const User &U, MachineIRBuilder &MIRBuilder); @@ -514,6 +518,10 @@ private: // function has the optnone attribute. bool EnableOpts = false; + /// True when the block contains a tail call. This allows the IRTranslator to + /// stop translating such blocks early. + bool HasTailCall = false; + /// Switch analysis and optimization. class GISelSwitchLowering : public SwitchCG::SwitchLowering { public: diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index e9b93be7675..fd3dc743000 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -31,6 +31,7 @@ namespace llvm { class APInt; class APFloat; +class GISelKnownBits; class MachineInstr; class MachineInstrBuilder; class MachineFunction; @@ -148,6 +149,13 @@ enum { /// - AddrSpaceN+1 ... GIM_CheckMemoryAddressSpace, + /// Check the minimum alignment of the memory access for the given machine + /// memory operand. + /// - InsnID - Instruction ID + /// - MMOIdx - MMO index + /// - MinAlign - Minimum acceptable alignment + GIM_CheckMemoryAlignment, + /// Check the size of the memory access for the given machine memory operand /// against the size of an operand. /// - InsnID - Instruction ID @@ -201,11 +209,22 @@ enum { /// - Expected Intrinsic ID GIM_CheckIntrinsicID, + /// Check the operand is a specific predicate + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - Expected predicate + GIM_CheckCmpPredicate, + /// Check the specified operand is an MBB /// - InsnID - Instruction ID /// - OpIdx - Operand index GIM_CheckIsMBB, + /// Check the specified operand is an Imm + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + GIM_CheckIsImm, + /// Check if the specified operand is safe to fold into the current /// instruction. /// - InsnID - Instruction ID @@ -365,7 +384,20 @@ public: /// if returns true: /// for I in all mutated/inserted instructions: /// !isPreISelGenericOpcode(I.getOpcode()) - virtual bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const = 0; + virtual bool select(MachineInstr &I) = 0; + + CodeGenCoverage *CoverageInfo = nullptr; + GISelKnownBits *KnownBits = nullptr; + MachineFunction *MF = nullptr; + + /// Setup per-MF selector state. + virtual void setupMF(MachineFunction &mf, + GISelKnownBits &KB, + CodeGenCoverage &covinfo) { + CoverageInfo = &covinfo; + KnownBits = &KB; + MF = &mf; + } protected: using ComplexRendererFns = diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h index e8ee4af0cb0..08f2f54bcf9 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -98,7 +98,7 @@ bool InstructionSelector::executeMatchTable( return false; break; } - if (TRI.isPhysicalRegister(MO.getReg())) { + if (Register::isPhysicalRegister(MO.getReg())) { DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), dbgs() << CurrentIdx << ": Is a physical register\n"); if (handleReject() == RejectAndGiveUp) @@ -409,6 +409,30 @@ bool InstructionSelector::executeMatchTable( return false; break; } + case GIM_CheckMemoryAlignment: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t MMOIdx = MatchTable[CurrentIdx++]; + unsigned MinAlign = MatchTable[CurrentIdx++]; + + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + + if (State.MIs[InsnID]->getNumMemOperands() <= MMOIdx) { + if (handleReject() == RejectAndGiveUp) + return false; + break; + } + + MachineMemOperand *MMO + = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckMemoryAlignment" + << "(MIs[" << InsnID << "]->memoperands() + " << MMOIdx + << ")->getAlignment() >= " << MinAlign << ")\n"); + if (MMO->getAlignment() < MinAlign && handleReject() == RejectAndGiveUp) + return false; + + break; + } case GIM_CheckMemorySizeEqualTo: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t MMOIdx = MatchTable[CurrentIdx++]; @@ -638,7 +662,21 @@ bool InstructionSelector::executeMatchTable( return false; break; } - + case GIM_CheckCmpPredicate: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t Value = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckCmpPredicate(MIs[" + << InsnID << "]->getOperand(" << OpIdx + << "), Value=" << Value << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx); + if (!MO.isPredicate() || MO.getPredicate() != Value) + if (handleReject() == RejectAndGiveUp) + return false; + break; + } case GIM_CheckIsMBB: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; @@ -652,7 +690,19 @@ bool InstructionSelector::executeMatchTable( } break; } - + case GIM_CheckIsImm: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckIsImm(MIs[" << InsnID + << "]->getOperand(" << OpIdx << "))\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + if (!State.MIs[InsnID]->getOperand(OpIdx).isImm()) { + if (handleReject() == RejectAndGiveUp) + return false; + } + break; + } case GIM_CheckIsSafeToFold: { int64_t InsnID = MatchTable[CurrentIdx++]; DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), @@ -792,11 +842,13 @@ bool InstructionSelector::executeMatchTable( case GIR_AddRegister: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t RegNum = MatchTable[CurrentIdx++]; + uint64_t RegFlags = MatchTable[CurrentIdx++]; assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); - OutMIs[InsnID].addReg(RegNum); - DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), - dbgs() << CurrentIdx << ": GIR_AddRegister(OutMIs[" - << InsnID << "], " << RegNum << ")\n"); + OutMIs[InsnID].addReg(RegNum, RegFlags); + DEBUG_WITH_TYPE( + TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_AddRegister(OutMIs[" + << InsnID << "], " << RegNum << ", " << RegFlags << ")\n"); break; } diff --git a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index a22778b8848..7f960e72784 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -47,8 +47,7 @@ public: bool tryCombineAnyExt(MachineInstr &MI, SmallVectorImpl &DeadInsts) { - if (MI.getOpcode() != TargetOpcode::G_ANYEXT) - return false; + assert(MI.getOpcode() == TargetOpcode::G_ANYEXT); Builder.setInstr(MI); Register DstReg = MI.getOperand(0).getReg(); @@ -93,9 +92,7 @@ public: bool tryCombineZExt(MachineInstr &MI, SmallVectorImpl &DeadInsts) { - - if (MI.getOpcode() != TargetOpcode::G_ZEXT) - return false; + assert(MI.getOpcode() == TargetOpcode::G_ZEXT); Builder.setInstr(MI); Register DstReg = MI.getOperand(0).getReg(); @@ -136,32 +133,24 @@ public: bool tryCombineSExt(MachineInstr &MI, SmallVectorImpl &DeadInsts) { - - if (MI.getOpcode() != TargetOpcode::G_SEXT) - return false; + assert(MI.getOpcode() == TargetOpcode::G_SEXT); Builder.setInstr(MI); Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); - // sext(trunc x) - > ashr (shl (aext/copy/trunc x), c), c + // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c) Register TruncSrc; if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { LLT DstTy = MRI.getType(DstReg); - // Guess on the RHS shift amount type, which should be re-legalized if - // applicable. - if (isInstUnsupported({TargetOpcode::G_SHL, {DstTy, DstTy}}) || - isInstUnsupported({TargetOpcode::G_ASHR, {DstTy, DstTy}}) || - isConstantUnsupported(DstTy)) + if (isInstUnsupported({TargetOpcode::G_SEXT_INREG, {DstTy}})) return false; LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); LLT SrcTy = MRI.getType(SrcReg); - unsigned ShAmt = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits(); - auto MIBShAmt = Builder.buildConstant(DstTy, ShAmt); - auto MIBShl = Builder.buildInstr( - TargetOpcode::G_SHL, {DstTy}, - {Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), MIBShAmt}); - Builder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {MIBShl, MIBShAmt}); + uint64_t SizeInBits = SrcTy.getScalarSizeInBits(); + Builder.buildInstr( + TargetOpcode::G_SEXT_INREG, {DstReg}, + {Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), SizeInBits}); markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } @@ -172,9 +161,8 @@ public: bool tryFoldImplicitDef(MachineInstr &MI, SmallVectorImpl &DeadInsts) { unsigned Opcode = MI.getOpcode(); - if (Opcode != TargetOpcode::G_ANYEXT && Opcode != TargetOpcode::G_ZEXT && - Opcode != TargetOpcode::G_SEXT) - return false; + assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT || + Opcode == TargetOpcode::G_SEXT); if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(), MRI)) { @@ -203,21 +191,38 @@ public: return false; } - static unsigned getMergeOpcode(LLT OpTy, LLT DestTy) { + static unsigned canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp, + LLT OpTy, LLT DestTy) { if (OpTy.isVector() && DestTy.isVector()) - return TargetOpcode::G_CONCAT_VECTORS; + return MergeOp == TargetOpcode::G_CONCAT_VECTORS; - if (OpTy.isVector() && !DestTy.isVector()) - return TargetOpcode::G_BUILD_VECTOR; + if (OpTy.isVector() && !DestTy.isVector()) { + if (MergeOp == TargetOpcode::G_BUILD_VECTOR) + return true; - return TargetOpcode::G_MERGE_VALUES; + if (MergeOp == TargetOpcode::G_CONCAT_VECTORS) { + if (ConvertOp == 0) + return true; + + const unsigned OpEltSize = OpTy.getElementType().getSizeInBits(); + + // Don't handle scalarization with a cast that isn't in the same + // direction as the vector cast. This could be handled, but it would + // require more intermediate unmerges. + if (ConvertOp == TargetOpcode::G_TRUNC) + return DestTy.getSizeInBits() <= OpEltSize; + return DestTy.getSizeInBits() >= OpEltSize; + } + + return false; + } + + return MergeOp == TargetOpcode::G_MERGE_VALUES; } bool tryCombineMerges(MachineInstr &MI, SmallVectorImpl &DeadInsts) { - - if (MI.getOpcode() != TargetOpcode::G_UNMERGE_VALUES) - return false; + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); unsigned NumDefs = MI.getNumOperands() - 1; MachineInstr *SrcDef = @@ -237,16 +242,14 @@ public: MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI); } - // FIXME: Handle scalarizing concat_vectors (scalar result type with vector - // source) - unsigned MergingOpcode = getMergeOpcode(OpTy, DestTy); - if (!MergeI || MergeI->getOpcode() != MergingOpcode) + if (!MergeI || !canFoldMergeOpcode(MergeI->getOpcode(), + ConvertOp, OpTy, DestTy)) return false; const unsigned NumMergeRegs = MergeI->getNumOperands() - 1; if (NumMergeRegs < NumDefs) { - if (ConvertOp != 0 || NumDefs % NumMergeRegs != 0) + if (NumDefs % NumMergeRegs != 0) return false; Builder.setInstr(MI); @@ -264,7 +267,22 @@ public: ++j, ++DefIdx) DstRegs.push_back(MI.getOperand(DefIdx).getReg()); - Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg()); + if (ConvertOp) { + SmallVector TmpRegs; + // This is a vector that is being scalarized and casted. Extract to + // the element type, and do the conversion on the scalars. + LLT MergeEltTy + = MRI.getType(MergeI->getOperand(0).getReg()).getElementType(); + for (unsigned j = 0; j < NumMergeRegs; ++j) + TmpRegs.push_back(MRI.createGenericVirtualRegister(MergeEltTy)); + + Builder.buildUnmerge(TmpRegs, MergeI->getOperand(Idx + 1).getReg()); + + for (unsigned j = 0; j < NumMergeRegs; ++j) + Builder.buildInstr(ConvertOp, {DstRegs[j]}, {TmpRegs[j]}); + } else { + Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg()); + } } } else if (NumMergeRegs > NumDefs) { diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index a0f21e8b19d..fbfe71255a3 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -200,6 +200,13 @@ public: LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy); + LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy); + LegalizeResult fewerElementsVectorBuildVector(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy); + LegalizeResult reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); @@ -219,9 +226,17 @@ public: LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI); LegalizeResult lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty); + LegalizeResult lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI); + LegalizeResult lowerFMad(MachineInstr &MI); + LegalizeResult lowerUnmergeValues(MachineInstr &MI); + LegalizeResult lowerShuffleVector(MachineInstr &MI); + LegalizeResult lowerDynStackAlloc(MachineInstr &MI); + LegalizeResult lowerExtract(MachineInstr &MI); + LegalizeResult lowerInsert(MachineInstr &MI); + LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI); private: MachineRegisterInfo &MRI; @@ -236,6 +251,11 @@ createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, ArrayRef Args); +/// Create a libcall to memcpy et al. +LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, + MachineRegisterInfo &MRI, + MachineInstr &MI); + } // End namespace llvm. #endif diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 513c98f2d23..1cf62d1fde5 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -331,6 +331,8 @@ class LegalizeRuleSet { /// individually handled. SmallBitVector TypeIdxsCovered{MCOI::OPERAND_LAST_GENERIC - MCOI::OPERAND_FIRST_GENERIC + 2}; + SmallBitVector ImmIdxsCovered{MCOI::OPERAND_LAST_GENERIC_IMM - + MCOI::OPERAND_FIRST_GENERIC_IMM + 2}; #endif unsigned typeIdx(unsigned TypeIdx) { @@ -342,9 +344,21 @@ class LegalizeRuleSet { #endif return TypeIdx; } - void markAllTypeIdxsAsCovered() { + + unsigned immIdx(unsigned ImmIdx) { + assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM - + MCOI::OPERAND_FIRST_GENERIC_IMM) && + "Imm Index is out of bounds"); +#ifndef NDEBUG + ImmIdxsCovered.set(ImmIdx); +#endif + return ImmIdx; + } + + void markAllIdxsAsCovered() { #ifndef NDEBUG TypeIdxsCovered.set(); + ImmIdxsCovered.set(); #endif } @@ -403,6 +417,15 @@ class LegalizeRuleSet { return actionIf(Action, typePairInSet(typeIdx(0), typeIdx(1), Types), Mutation); } + /// Use the given action when type index 0 is any type in the given list and + /// imm index 0 is anything. Action should not be an action that requires + /// mutation. + LegalizeRuleSet &actionForTypeWithAnyImm(LegalizeAction Action, + std::initializer_list Types) { + using namespace LegalityPredicates; + immIdx(0); // Inform verifier imm idx 0 is handled. + return actionIf(Action, typeInSet(typeIdx(0), Types)); + } /// Use the given action when type indexes 0 and 1 are both in the given list. /// That is, the type pair is in the cartesian product of the list. /// Action should not be an action that requires mutation. @@ -454,7 +477,7 @@ public: LegalizeRuleSet &legalIf(LegalityPredicate Predicate) { // We have no choice but conservatively assume that the free-form // user-provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Legal, Predicate); } /// The instruction is legal when type index 0 is any type in the given list. @@ -466,6 +489,12 @@ public: LegalizeRuleSet &legalFor(std::initializer_list> Types) { return actionFor(LegalizeAction::Legal, Types); } + /// The instruction is legal when type index 0 is any type in the given list + /// and imm index 0 is anything. + LegalizeRuleSet &legalForTypeWithAnyImm(std::initializer_list Types) { + markAllIdxsAsCovered(); + return actionForTypeWithAnyImm(LegalizeAction::Legal, Types); + } /// The instruction is legal when type indexes 0 and 1 along with the memory /// size and minimum alignment is any type and size tuple in the given list. LegalizeRuleSet &legalForTypesWithMemDesc( @@ -497,7 +526,7 @@ public: LegalizeRuleSet &alwaysLegal() { using namespace LegalizeMutations; - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Legal, always); } @@ -506,7 +535,7 @@ public: using namespace LegalizeMutations; // We have no choice but conservatively assume that predicate-less lowering // properly handles all type indices by design: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Lower, always); } /// The instruction is lowered if predicate is true. Keep type index 0 as the @@ -515,7 +544,7 @@ public: using namespace LegalizeMutations; // We have no choice but conservatively assume that lowering with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Lower, Predicate); } /// The instruction is lowered if predicate is true. @@ -523,7 +552,7 @@ public: LegalizeMutation Mutation) { // We have no choice but conservatively assume that lowering with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Lower, Predicate, Mutation); } /// The instruction is lowered when type index 0 is any type in the given @@ -571,7 +600,7 @@ public: LegalizeRuleSet &libcallIf(LegalityPredicate Predicate) { // We have no choice but conservatively assume that a libcall with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Libcall, Predicate); } LegalizeRuleSet &libcallFor(std::initializer_list Types) { @@ -597,7 +626,7 @@ public: LegalizeMutation Mutation) { // We have no choice but conservatively assume that an action with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::WidenScalar, Predicate, Mutation); } /// Narrow the scalar to the one selected by the mutation if the predicate is @@ -606,7 +635,7 @@ public: LegalizeMutation Mutation) { // We have no choice but conservatively assume that an action with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::NarrowScalar, Predicate, Mutation); } @@ -616,7 +645,7 @@ public: LegalizeMutation Mutation) { // We have no choice but conservatively assume that an action with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::MoreElements, Predicate, Mutation); } /// Remove elements to reach the type selected by the mutation if the @@ -625,7 +654,7 @@ public: LegalizeMutation Mutation) { // We have no choice but conservatively assume that an action with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::FewerElements, Predicate, Mutation); } @@ -640,11 +669,15 @@ public: return actionIf(LegalizeAction::Unsupported, LegalityPredicates::memSizeInBytesNotPow2(0)); } + LegalizeRuleSet &lowerIfMemSizeNotPow2() { + return actionIf(LegalizeAction::Lower, + LegalityPredicates::memSizeInBytesNotPow2(0)); + } LegalizeRuleSet &customIf(LegalityPredicate Predicate) { // We have no choice but conservatively assume that a custom action with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Custom, Predicate); } LegalizeRuleSet &customFor(std::initializer_list Types) { @@ -882,6 +915,10 @@ public: /// LegalizeRuleSet in any way at all. /// \pre Type indices of the opcode form a dense [0, \p NumTypeIdxs) set. bool verifyTypeIdxsCoverage(unsigned NumTypeIdxs) const; + /// Check if there is no imm index which is obviously not handled by the + /// LegalizeRuleSet in any way at all. + /// \pre Type indices of the opcode form a dense [0, \p NumTypeIdxs) set. + bool verifyImmIdxsCoverage(unsigned NumImmIdxs) const; /// Apply the ruleset to the given LegalityQuery. LegalizeActionStep apply(const LegalityQuery &Query) const; diff --git a/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h index 13eddd9539f..be12341f576 100644 --- a/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ b/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -21,7 +21,7 @@ namespace llvm { namespace MIPatternMatch { template -bool mi_match(Reg R, MachineRegisterInfo &MRI, Pattern &&P) { +bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P) { return P.match(MRI, R); } @@ -30,7 +30,7 @@ template struct OneUse_match { SubPatternT SubPat; OneUse_match(const SubPatternT &SP) : SubPat(SP) {} - bool match(MachineRegisterInfo &MRI, unsigned Reg) { + bool match(const MachineRegisterInfo &MRI, unsigned Reg) { return MRI.hasOneUse(Reg) && SubPat.match(MRI, Reg); } }; @@ -71,7 +71,7 @@ inline operand_type_match m_Reg() { return operand_type_match(); } /// Matching combinators. template struct And { template - bool match(MachineRegisterInfo &MRI, MatchSrc &&src) { + bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) { return true; } }; @@ -83,14 +83,14 @@ struct And : And { : And(std::forward(preds)...), P(std::forward(p)) { } template - bool match(MachineRegisterInfo &MRI, MatchSrc &&src) { + bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) { return P.match(MRI, src) && And::match(MRI, src); } }; template struct Or { template - bool match(MachineRegisterInfo &MRI, MatchSrc &&src) { + bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) { return false; } }; @@ -101,7 +101,7 @@ struct Or : Or { Or(Pred &&p, Preds &&... preds) : Or(std::forward(preds)...), P(std::forward(p)) {} template - bool match(MachineRegisterInfo &MRI, MatchSrc &&src) { + bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) { return P.match(MRI, src) || Or::match(MRI, src); } }; @@ -175,7 +175,8 @@ struct BinaryOp_match { RHS_P R; BinaryOp_match(const LHS_P &LHS, const RHS_P &RHS) : L(LHS), R(RHS) {} - template bool match(MachineRegisterInfo &MRI, OpTy &&Op) { + template + bool match(const MachineRegisterInfo &MRI, OpTy &&Op) { MachineInstr *TmpMI; if (mi_match(Op, MRI, m_MInstr(TmpMI))) { if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 3) { @@ -242,7 +243,8 @@ template struct UnaryOp_match { SrcTy L; UnaryOp_match(const SrcTy &LHS) : L(LHS) {} - template bool match(MachineRegisterInfo &MRI, OpTy &&Op) { + template + bool match(const MachineRegisterInfo &MRI, OpTy &&Op) { MachineInstr *TmpMI; if (mi_match(Op, MRI, m_MInstr(TmpMI))) { if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 2) { @@ -323,7 +325,7 @@ struct CheckType { LLT Ty; CheckType(const LLT &Ty) : Ty(Ty) {} - bool match(MachineRegisterInfo &MRI, unsigned Reg) { + bool match(const MachineRegisterInfo &MRI, unsigned Reg) { return MRI.getType(Reg) == Ty; } }; diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 10d712176b1..416f9c19f79 100644 --- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -122,14 +122,22 @@ class SrcOp { MachineInstrBuilder SrcMIB; Register Reg; CmpInst::Predicate Pred; + int64_t Imm; }; public: - enum class SrcType { Ty_Reg, Ty_MIB, Ty_Predicate }; + enum class SrcType { Ty_Reg, Ty_MIB, Ty_Predicate, Ty_Imm }; SrcOp(Register R) : Reg(R), Ty(SrcType::Ty_Reg) {} SrcOp(const MachineOperand &Op) : Reg(Op.getReg()), Ty(SrcType::Ty_Reg) {} SrcOp(const MachineInstrBuilder &MIB) : SrcMIB(MIB), Ty(SrcType::Ty_MIB) {} SrcOp(const CmpInst::Predicate P) : Pred(P), Ty(SrcType::Ty_Predicate) {} + /// Use of registers held in unsigned integer variables (or more rarely signed + /// integers) is no longer permitted to avoid ambiguity with upcoming support + /// for immediates. + SrcOp(unsigned) = delete; + SrcOp(int) = delete; + SrcOp(uint64_t V) : Imm(V), Ty(SrcType::Ty_Imm) {} + SrcOp(int64_t V) : Imm(V), Ty(SrcType::Ty_Imm) {} void addSrcToMIB(MachineInstrBuilder &MIB) const { switch (Ty) { @@ -142,12 +150,16 @@ public: case SrcType::Ty_MIB: MIB.addUse(SrcMIB->getOperand(0).getReg()); break; + case SrcType::Ty_Imm: + MIB.addImm(Imm); + break; } } LLT getLLTTy(const MachineRegisterInfo &MRI) const { switch (Ty) { case SrcType::Ty_Predicate: + case SrcType::Ty_Imm: llvm_unreachable("Not a register operand"); case SrcType::Ty_Reg: return MRI.getType(Reg); @@ -160,6 +172,7 @@ public: Register getReg() const { switch (Ty) { case SrcType::Ty_Predicate: + case SrcType::Ty_Imm: llvm_unreachable("Not a register operand"); case SrcType::Ty_Reg: return Reg; @@ -178,6 +191,15 @@ public: } } + int64_t getImm() const { + switch (Ty) { + case SrcType::Ty_Imm: + return Imm; + default: + llvm_unreachable("Not an immediate"); + } + } + SrcType getSrcOpKind() const { return Ty; } private: @@ -348,6 +370,17 @@ public: /// given. Convert "llvm.dbg.label Label" to "DBG_LABEL Label". MachineInstrBuilder buildDbgLabel(const MDNode *Label); + /// Build and insert \p Res = G_DYN_STACKALLOC \p Size, \p Align + /// + /// G_DYN_STACKALLOC does a dynamic stack allocation and writes the address of + /// the allocated memory into \p Res. + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res must be a generic virtual register with pointer type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildDynStackAlloc(const DstOp &Res, const SrcOp &Size, + unsigned Align); + /// Build and insert \p Res = G_FRAME_INDEX \p Idx /// /// G_FRAME_INDEX materializes the address of an alloca value or other @@ -489,11 +522,21 @@ public: return buildInstr(TargetOpcode::G_PTRTOINT, {Dst}, {Src}); } + /// Build and insert a G_INTTOPTR instruction. + MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_INTTOPTR, {Dst}, {Src}); + } + /// Build and insert \p Dst = G_BITCAST \p Src MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src) { return buildInstr(TargetOpcode::G_BITCAST, {Dst}, {Src}); } + /// Build and insert \p Dst = G_ADDRSPACE_CAST \p Src + MachineInstrBuilder buildAddrSpaceCast(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_ADDRSPACE_CAST, {Dst}, {Src}); + } + /// \return The opcode of the extension the target wants to use for boolean /// values. unsigned getBoolExtOp(bool IsVec, bool IsFP) const; @@ -867,7 +910,8 @@ public: /// /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, - const SrcOp &Op0, const SrcOp &Op1); + const SrcOp &Op0, const SrcOp &Op1, + Optional Flags = None); /// Build and insert a \p Res = G_SELECT \p Tst, \p Op0, \p Op1 /// @@ -880,7 +924,8 @@ public: /// /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, - const SrcOp &Op0, const SrcOp &Op1); + const SrcOp &Op0, const SrcOp &Op1, + Optional Flags = None); /// Build and insert \p Res = G_INSERT_VECTOR_ELT \p Val, /// \p Elt, \p Idx @@ -961,8 +1006,8 @@ public: /// same type. /// /// \return a MachineInstrBuilder for the newly created instruction. - MachineInstrBuilder buildAtomicRMW(unsigned Opcode, Register OldValRes, - Register Addr, Register Val, + MachineInstrBuilder buildAtomicRMW(unsigned Opcode, const DstOp &OldValRes, + const SrcOp &Addr, const SrcOp &Val, MachineMemOperand &MMO); /// Build and insert `OldValRes = G_ATOMICRMW_XCHG Addr, Val, MMO`. @@ -1135,6 +1180,16 @@ public: MachineInstrBuilder buildAtomicRMWUmin(Register OldValRes, Register Addr, Register Val, MachineMemOperand &MMO); + /// Build and insert `OldValRes = G_ATOMICRMW_FADD Addr, Val, MMO`. + MachineInstrBuilder buildAtomicRMWFAdd( + const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO); + + /// Build and insert `OldValRes = G_ATOMICRMW_FSUB Addr, Val, MMO`. + MachineInstrBuilder buildAtomicRMWFSub( + const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO); + /// Build and insert `G_FENCE Ordering, Scope`. MachineInstrBuilder buildFence(unsigned Ordering, unsigned Scope); @@ -1210,6 +1265,12 @@ public: return buildInstr(TargetOpcode::G_SMULH, {Dst}, {Src0, Src1}, Flags); } + MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, + const SrcOp &Src1, + Optional Flags = None) { + return buildInstr(TargetOpcode::G_FMUL, {Dst}, {Src0, Src1}, Flags); + } + MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional Flags = None) { @@ -1300,8 +1361,9 @@ public: /// Build and insert \p Res = G_FADD \p Op0, \p Op1 MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, - const SrcOp &Src1) { - return buildInstr(TargetOpcode::G_FADD, {Dst}, {Src0, Src1}); + const SrcOp &Src1, + Optional Flags = None) { + return buildInstr(TargetOpcode::G_FADD, {Dst}, {Src0, Src1}, Flags); } /// Build and insert \p Res = G_FSUB \p Op0, \p Op1 @@ -1316,14 +1378,23 @@ public: return buildInstr(TargetOpcode::G_FMA, {Dst}, {Src0, Src1, Src2}); } + /// Build and insert \p Res = G_FMAD \p Op0, \p Op1, \p Op2 + MachineInstrBuilder buildFMAD(const DstOp &Dst, const SrcOp &Src0, + const SrcOp &Src1, const SrcOp &Src2, + Optional Flags = None) { + return buildInstr(TargetOpcode::G_FMAD, {Dst}, {Src0, Src1, Src2}, Flags); + } + /// Build and insert \p Res = G_FNEG \p Op0 - MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0) { - return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0}); + MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, + Optional Flags = None) { + return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0}, Flags); } /// Build and insert \p Res = G_FABS \p Op0 - MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0) { - return buildInstr(TargetOpcode::G_FABS, {Dst}, {Src0}); + MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, + Optional Flags = None) { + return buildInstr(TargetOpcode::G_FABS, {Dst}, {Src0}, Flags); } /// Build and insert \p Dst = G_FCANONICALIZE \p Src0 diff --git a/include/llvm/CodeGen/GlobalISel/Utils.h b/include/llvm/CodeGen/GlobalISel/Utils.h index 4cdaa48fb68..8af2853473c 100644 --- a/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/include/llvm/CodeGen/GlobalISel/Utils.h @@ -16,6 +16,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/Register.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include "llvm/Support/MachineValueType.h" namespace llvm { @@ -117,14 +119,16 @@ struct ValueAndVReg { unsigned VReg; }; /// If \p VReg is defined by a statically evaluable chain of -/// instructions rooted on a G_CONSTANT (\p LookThroughInstrs == true) -/// and that constant fits in int64_t, returns its value as well as -/// the virtual register defined by this G_CONSTANT. -/// When \p LookThroughInstrs == false, this function behaves like +/// instructions rooted on a G_F/CONSTANT (\p LookThroughInstrs == true) +/// and that constant fits in int64_t, returns its value as well as the +/// virtual register defined by this G_F/CONSTANT. +/// When \p LookThroughInstrs == false this function behaves like /// getConstantVRegVal. +/// When \p HandleFConstants == false the function bails on G_FCONSTANTs. Optional getConstantVRegValWithLookThrough(unsigned VReg, const MachineRegisterInfo &MRI, - bool LookThroughInstrs = true); + bool LookThroughInstrs = true, + bool HandleFConstants = true); const ConstantFP* getConstantFPVRegVal(unsigned VReg, const MachineRegisterInfo &MRI); @@ -151,6 +155,9 @@ Optional ConstantFoldBinOp(unsigned Opcode, const unsigned Op1, const unsigned Op2, const MachineRegisterInfo &MRI); +Optional ConstantFoldExtOp(unsigned Opcode, const unsigned Op1, + uint64_t Imm, const MachineRegisterInfo &MRI); + /// Returns true if \p Val can be assumed to never be a NaN. If \p SNaN is true, /// this returns if \p Val can be assumed to never be a signaling NaN. bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, @@ -161,5 +168,10 @@ inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI) { return isKnownNeverNaN(Val, MRI, true); } +/// Get a rough equivalent of an MVT for a given LLT. +MVT getMVTForLLT(LLT Ty); +/// Get a rough equivalent of an LLT for a given MVT. +LLT getLLTForMVT(MVT Ty); + } // End namespace llvm. #endif diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index acf27dcc5fa..658ad31fa2a 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -281,7 +281,7 @@ namespace ISD { /// Same as the corresponding unsaturated fixed point instructions, but the /// result is clamped between the min and max values representable by the /// bits of the first 2 operands. - SMULFIXSAT, + SMULFIXSAT, UMULFIXSAT, /// Simple binary floating point operators. FADD, FSUB, FMUL, FDIV, FREM, @@ -301,6 +301,14 @@ namespace ISD { STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, STRICT_FLOG10, STRICT_FLOG2, STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, STRICT_FTRUNC, + STRICT_LROUND, STRICT_LLROUND, STRICT_LRINT, STRICT_LLRINT, + + /// STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or + /// unsigned integer. These have the same semantics as fptosi and fptoui + /// in IR. + /// They are used to limit optimizations while the DAG is being optimized. + STRICT_FP_TO_SINT, + STRICT_FP_TO_UINT, /// X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating /// point type down to the precision of the destination VT. TRUNC is a @@ -398,6 +406,13 @@ namespace ISD { /// than the vector element type, and is implicitly truncated to it. SCALAR_TO_VECTOR, + /// SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL + /// duplicated in all lanes. The type of the operand must match the vector + /// element type, except when they are integer types. In this case the + /// operand is allowed to be wider than the vector element type, and is + /// implicitly truncated to it. + SPLAT_VECTOR, + /// MULHU/MULHS - Multiply high - Multiply two integers of type iN, /// producing an unsigned/signed value of type i[2*N], then return the top /// part. @@ -569,13 +584,6 @@ namespace ISD { /// 3 Round to -inf FLT_ROUNDS_, - /// X = FP_ROUND_INREG(Y, VT) - This operator takes an FP register, and - /// rounds it to a floating point value. It then promotes it and returns it - /// in a register of the same size. This operation effectively just - /// discards excess precision. The type to round down to is specified by - /// the VT operand, a VTSDNode. - FP_ROUND_INREG, - /// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type. FP_EXTEND, @@ -957,6 +965,23 @@ namespace ISD { static const int LAST_INDEXED_MODE = POST_DEC + 1; + //===--------------------------------------------------------------------===// + /// MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's + /// index parameter when calculating addresses. + /// + /// SIGNED_SCALED Addr = Base + ((signed)Index * sizeof(element)) + /// SIGNED_UNSCALED Addr = Base + (signed)Index + /// UNSIGNED_SCALED Addr = Base + ((unsigned)Index * sizeof(element)) + /// UNSIGNED_UNSCALED Addr = Base + (unsigned)Index + enum MemIndexType { + SIGNED_SCALED = 0, + SIGNED_UNSCALED, + UNSIGNED_SCALED, + UNSIGNED_UNSCALED + }; + + static const int LAST_MEM_INDEX_TYPE = UNSIGNED_UNSCALED + 1; + //===--------------------------------------------------------------------===// /// LoadExtType enum - This enum defines the three variants of LOADEXT /// (load with extension). diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h index 8bb88165d3e..290a2381d9c 100644 --- a/include/llvm/CodeGen/LiveInterval.h +++ b/include/llvm/CodeGen/LiveInterval.h @@ -189,6 +189,10 @@ namespace llvm { return start == Other.start && end == Other.end; } + bool operator!=(const Segment &Other) const { + return !(*this == Other); + } + void dump() const; }; @@ -224,7 +228,7 @@ namespace llvm { /// Constructs a new LiveRange object. LiveRange(bool UseSegmentSet = false) - : segmentSet(UseSegmentSet ? llvm::make_unique() + : segmentSet(UseSegmentSet ? std::make_unique() : nullptr) {} /// Constructs a new LiveRange object by copying segments and valnos from diff --git a/include/llvm/CodeGen/LiveIntervals.h b/include/llvm/CodeGen/LiveIntervals.h index 588b0f9cf39..888d72b87bd 100644 --- a/include/llvm/CodeGen/LiveIntervals.h +++ b/include/llvm/CodeGen/LiveIntervals.h @@ -111,30 +111,31 @@ class VirtRegMap; const MachineBlockFrequencyInfo *MBFI, const MachineBasicBlock *MBB); - LiveInterval &getInterval(unsigned Reg) { + LiveInterval &getInterval(Register Reg) { if (hasInterval(Reg)) - return *VirtRegIntervals[Reg]; + return *VirtRegIntervals[Reg.id()]; else return createAndComputeVirtRegInterval(Reg); } - const LiveInterval &getInterval(unsigned Reg) const { + const LiveInterval &getInterval(Register Reg) const { return const_cast(this)->getInterval(Reg); } - bool hasInterval(unsigned Reg) const { - return VirtRegIntervals.inBounds(Reg) && VirtRegIntervals[Reg]; + bool hasInterval(Register Reg) const { + return VirtRegIntervals.inBounds(Reg.id()) && + VirtRegIntervals[Reg.id()]; } /// Interval creation. - LiveInterval &createEmptyInterval(unsigned Reg) { + LiveInterval &createEmptyInterval(Register Reg) { assert(!hasInterval(Reg) && "Interval already exists!"); - VirtRegIntervals.grow(Reg); - VirtRegIntervals[Reg] = createInterval(Reg); - return *VirtRegIntervals[Reg]; + VirtRegIntervals.grow(Reg.id()); + VirtRegIntervals[Reg.id()] = createInterval(Reg); + return *VirtRegIntervals[Reg.id()]; } - LiveInterval &createAndComputeVirtRegInterval(unsigned Reg) { + LiveInterval &createAndComputeVirtRegInterval(Register Reg) { LiveInterval &LI = createEmptyInterval(Reg); computeVirtRegInterval(LI); return LI; diff --git a/lib/CodeGen/LiveRangeCalc.h b/include/llvm/CodeGen/LiveRangeCalc.h similarity index 98% rename from lib/CodeGen/LiveRangeCalc.h rename to include/llvm/CodeGen/LiveRangeCalc.h index 11aea5a3b01..08026c05733 100644 --- a/lib/CodeGen/LiveRangeCalc.h +++ b/include/llvm/CodeGen/LiveRangeCalc.h @@ -114,7 +114,7 @@ class LiveRangeCalc { VNInfo *Value = nullptr; LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill) - : LR(LR), DomNode(node), Kill(kill) {} + : LR(LR), DomNode(node), Kill(kill) {} }; /// LiveIn - Work list of blocks where the live-in value has yet to be @@ -145,9 +145,8 @@ class LiveRangeCalc { /// @p Undef, the function returns false. /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. - bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, - SlotIndex Use, unsigned PhysReg, - ArrayRef Undefs); + bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, SlotIndex Use, + unsigned PhysReg, ArrayRef Undefs); /// updateSSA - Compute the values that will be live in to all requested /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. @@ -267,8 +266,7 @@ public: /// @param Kill Index in block where LI is killed. If the value is /// live-through, set Kill = SLotIndex() and also call /// setLiveOutValue(MBB, 0). - void addLiveInBlock(LiveRange &LR, - MachineDomTreeNode *DomNode, + void addLiveInBlock(LiveRange &LR, MachineDomTreeNode *DomNode, SlotIndex Kill = SlotIndex()) { LiveIn.push_back(LiveInBlock(LR, DomNode, Kill)); } diff --git a/include/llvm/CodeGen/LiveRegUnits.h b/include/llvm/CodeGen/LiveRegUnits.h index 7dbb2feab8b..314afad9297 100644 --- a/include/llvm/CodeGen/LiveRegUnits.h +++ b/include/llvm/CodeGen/LiveRegUnits.h @@ -53,8 +53,8 @@ public: ModifiedRegUnits.addRegsInMask(O->getRegMask()); if (!O->isReg()) continue; - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Reg.isPhysical()) continue; if (O->isDef()) { // Some architectures (e.g. AArch64 XZR/WZR) have registers that are diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h index 94e76a75e8d..069d0aa4509 100644 --- a/include/llvm/CodeGen/MIRYamlMapping.h +++ b/include/llvm/CodeGen/MIRYamlMapping.h @@ -314,6 +314,7 @@ struct ScalarEnumerationTraits { static void enumeration(yaml::IO &IO, TargetStackID::Value &ID) { IO.enumCase(ID, "default", TargetStackID::Default); IO.enumCase(ID, "sgpr-spill", TargetStackID::SGPRSpill); + IO.enumCase(ID, "sve-vec", TargetStackID::SVEVector); IO.enumCase(ID, "noalloc", TargetStackID::NoAlloc); } }; diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index 333d0a78618..ccdde78a0b2 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -103,9 +103,9 @@ private: using LiveInVector = std::vector; LiveInVector LiveIns; - /// Alignment of the basic block. Zero if the basic block does not need to be - /// aligned. The alignment is specified as log2(bytes). - unsigned Alignment = 0; + /// Alignment of the basic block. One if the basic block does not need to be + /// aligned. + Align Alignment; /// Indicate that this basic block is entered via an exception handler. bool IsEHPad = false; @@ -312,7 +312,7 @@ public: /// Adds the specified register as a live in. Note that it is an error to add /// the same register to the same set more than once unless the intention is /// to call sortUniqueLiveIns after all registers are added. - void addLiveIn(MCPhysReg PhysReg, + void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask = LaneBitmask::getAll()) { LiveIns.push_back(RegisterMaskPair(PhysReg, LaneMask)); } @@ -331,7 +331,7 @@ public: /// Add PhysReg as live in to this block, and ensure that there is a copy of /// PhysReg to a virtual register of class RC. Return the virtual register /// that is a copy of the live in PhysReg. - unsigned addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC); + unsigned addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC); /// Remove the specified register from the live in set. void removeLiveIn(MCPhysReg Reg, @@ -372,13 +372,11 @@ public: /// \see getBeginClobberMask() const uint32_t *getEndClobberMask(const TargetRegisterInfo *TRI) const; - /// Return alignment of the basic block. The alignment is specified as - /// log2(bytes). - unsigned getAlignment() const { return Alignment; } + /// Return alignment of the basic block. + Align getAlignment() const { return Alignment; } - /// Set alignment of the basic block. The alignment is specified as - /// log2(bytes). - void setAlignment(unsigned Align) { Alignment = Align; } + /// Set alignment of the basic block. + void setAlignment(Align A) { Alignment = A; } /// Returns true if the block is a landing pad. That is this basic block is /// entered via an exception handler. @@ -636,6 +634,18 @@ public: return Insts.insertAfter(I.getInstrIterator(), MI); } + /// If I is bundled then insert MI into the instruction list after the end of + /// the bundle, otherwise insert MI immediately after I. + instr_iterator insertAfterBundle(instr_iterator I, MachineInstr *MI) { + assert((I == instr_end() || I->getParent() == this) && + "iterator points outside of basic block"); + assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() && + "Cannot insert instruction with bundle flags"); + while (I->isBundledWithSucc()) + ++I; + return Insts.insertAfter(I, MI); + } + /// Remove an instruction from the instruction list and delete it. /// /// If the instruction is part of a bundle, the other instructions in the @@ -723,6 +733,10 @@ public: /// CFG so that it branches to 'New' instead. void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New); + /// Update all phi nodes in this basic block to refer to basic block \p New + /// instead of basic block \p Old. + void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New); + /// Various pieces of code can cause excess edges in the CFG to be inserted. /// If we have proven that MBB can only branch to DestA and DestB, remove any /// other MBB successors from the CFG. DestA and DestB can be null. Besides diff --git a/include/llvm/CodeGen/MachineCombinerPattern.h b/include/llvm/CodeGen/MachineCombinerPattern.h index 4f4034baf80..50322722220 100644 --- a/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/include/llvm/CodeGen/MachineCombinerPattern.h @@ -39,6 +39,10 @@ enum class MachineCombinerPattern { MULADDXI_OP1, MULSUBXI_OP1, // Floating Point + FMULADDH_OP1, + FMULADDH_OP2, + FMULSUBH_OP1, + FMULSUBH_OP2, FMULADDS_OP1, FMULADDS_OP2, FMULSUBS_OP1, @@ -47,16 +51,25 @@ enum class MachineCombinerPattern { FMULADDD_OP2, FMULSUBD_OP1, FMULSUBD_OP2, + FNMULSUBH_OP1, FNMULSUBS_OP1, FNMULSUBD_OP1, FMLAv1i32_indexed_OP1, FMLAv1i32_indexed_OP2, FMLAv1i64_indexed_OP1, FMLAv1i64_indexed_OP2, + FMLAv4f16_OP1, + FMLAv4f16_OP2, + FMLAv8f16_OP1, + FMLAv8f16_OP2, FMLAv2f32_OP2, FMLAv2f32_OP1, FMLAv2f64_OP1, FMLAv2f64_OP2, + FMLAv4i16_indexed_OP1, + FMLAv4i16_indexed_OP2, + FMLAv8i16_indexed_OP1, + FMLAv8i16_indexed_OP2, FMLAv2i32_indexed_OP1, FMLAv2i32_indexed_OP2, FMLAv2i64_indexed_OP1, @@ -67,10 +80,18 @@ enum class MachineCombinerPattern { FMLAv4i32_indexed_OP2, FMLSv1i32_indexed_OP2, FMLSv1i64_indexed_OP2, + FMLSv4f16_OP1, + FMLSv4f16_OP2, + FMLSv8f16_OP1, + FMLSv8f16_OP2, FMLSv2f32_OP1, FMLSv2f32_OP2, FMLSv2f64_OP1, FMLSv2f64_OP2, + FMLSv4i16_indexed_OP1, + FMLSv4i16_indexed_OP2, + FMLSv8i16_indexed_OP1, + FMLSv8i16_indexed_OP2, FMLSv2i32_indexed_OP1, FMLSv2i32_indexed_OP2, FMLSv2i64_indexed_OP1, diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h index d2200080b89..e4d7a02f8c4 100644 --- a/include/llvm/CodeGen/MachineDominators.h +++ b/include/llvm/CodeGen/MachineDominators.h @@ -44,6 +44,8 @@ using MachineDomTreeNode = DomTreeNodeBase; /// compute a normal dominator tree. /// class MachineDominatorTree : public MachineFunctionPass { + using DomTreeT = DomTreeBase; + /// Helper structure used to hold all the basic blocks /// involved in the split of a critical edge. struct CriticalEdge { @@ -65,8 +67,8 @@ class MachineDominatorTree : public MachineFunctionPass { /// such as BB == elt.NewBB. mutable SmallSet NewBBs; - /// The DominatorTreeBase that is used to compute a normal dominator tree - std::unique_ptr> DT; + /// The DominatorTreeBase that is used to compute a normal dominator tree. + std::unique_ptr DT; /// Apply all the recorded critical edges to the DT. /// This updates the underlying DT information in a way that uses @@ -80,8 +82,8 @@ public: MachineDominatorTree(); - DomTreeBase &getBase() { - if (!DT) DT.reset(new DomTreeBase()); + DomTreeT &getBase() { + if (!DT) DT.reset(new DomTreeT()); applySplitCriticalEdges(); return *DT; } @@ -92,31 +94,30 @@ public: /// multiple blocks if we are computing post dominators. For forward /// dominators, this will always be a single block (the entry node). /// - inline const SmallVectorImpl &getRoots() const { + const SmallVectorImpl &getRoots() const { applySplitCriticalEdges(); return DT->getRoots(); } - inline MachineBasicBlock *getRoot() const { + MachineBasicBlock *getRoot() const { applySplitCriticalEdges(); return DT->getRoot(); } - inline MachineDomTreeNode *getRootNode() const { + MachineDomTreeNode *getRootNode() const { applySplitCriticalEdges(); return DT->getRootNode(); } bool runOnMachineFunction(MachineFunction &F) override; - inline bool dominates(const MachineDomTreeNode* A, - const MachineDomTreeNode* B) const { + bool dominates(const MachineDomTreeNode *A, + const MachineDomTreeNode *B) const { applySplitCriticalEdges(); return DT->dominates(A, B); } - inline bool dominates(const MachineBasicBlock* A, - const MachineBasicBlock* B) const { + bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const { applySplitCriticalEdges(); return DT->dominates(A, B); } @@ -133,36 +134,30 @@ public: for (; &*I != A && &*I != B; ++I) /*empty*/ ; - //if(!DT.IsPostDominators) { - // A dominates B if it is found first in the basic block. - return &*I == A; - //} else { - // // A post-dominates B if B is found first in the basic block. - // return &*I == B; - //} + return &*I == A; } - inline bool properlyDominates(const MachineDomTreeNode* A, - const MachineDomTreeNode* B) const { + bool properlyDominates(const MachineDomTreeNode *A, + const MachineDomTreeNode *B) const { applySplitCriticalEdges(); return DT->properlyDominates(A, B); } - inline bool properlyDominates(const MachineBasicBlock* A, - const MachineBasicBlock* B) const { + bool properlyDominates(const MachineBasicBlock *A, + const MachineBasicBlock *B) const { applySplitCriticalEdges(); return DT->properlyDominates(A, B); } /// findNearestCommonDominator - Find nearest common dominator basic block /// for basic block A and B. If there is no such block then return NULL. - inline MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A, - MachineBasicBlock *B) { + MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A, + MachineBasicBlock *B) { applySplitCriticalEdges(); return DT->findNearestCommonDominator(A, B); } - inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const { + MachineDomTreeNode *operator[](MachineBasicBlock *BB) const { applySplitCriticalEdges(); return DT->getNode(BB); } @@ -170,7 +165,7 @@ public: /// getNode - return the (Post)DominatorTree node for the specified basic /// block. This is the same as using operator[] on this class. /// - inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const { + MachineDomTreeNode *getNode(MachineBasicBlock *BB) const { applySplitCriticalEdges(); return DT->getNode(BB); } @@ -178,8 +173,8 @@ public: /// addNewBlock - Add a new node to the dominator tree information. This /// creates a new node as a child of DomBB dominator node,linking it into /// the children list of the immediate dominator. - inline MachineDomTreeNode *addNewBlock(MachineBasicBlock *BB, - MachineBasicBlock *DomBB) { + MachineDomTreeNode *addNewBlock(MachineBasicBlock *BB, + MachineBasicBlock *DomBB) { applySplitCriticalEdges(); return DT->addNewBlock(BB, DomBB); } @@ -187,14 +182,14 @@ public: /// changeImmediateDominator - This method is used to update the dominator /// tree information when a node's immediate dominator changes. /// - inline void changeImmediateDominator(MachineBasicBlock *N, - MachineBasicBlock* NewIDom) { + void changeImmediateDominator(MachineBasicBlock *N, + MachineBasicBlock *NewIDom) { applySplitCriticalEdges(); DT->changeImmediateDominator(N, NewIDom); } - inline void changeImmediateDominator(MachineDomTreeNode *N, - MachineDomTreeNode* NewIDom) { + void changeImmediateDominator(MachineDomTreeNode *N, + MachineDomTreeNode *NewIDom) { applySplitCriticalEdges(); DT->changeImmediateDominator(N, NewIDom); } @@ -202,14 +197,14 @@ public: /// eraseNode - Removes a node from the dominator tree. Block must not /// dominate any other blocks. Removes node from its immediate dominator's /// children list. Deletes dominator node associated with basic block BB. - inline void eraseNode(MachineBasicBlock *BB) { + void eraseNode(MachineBasicBlock *BB) { applySplitCriticalEdges(); DT->eraseNode(BB); } /// splitBlock - BB is split and now it has one successor. Update dominator /// tree to reflect this change. - inline void splitBlock(MachineBasicBlock* NewBB) { + void splitBlock(MachineBasicBlock* NewBB) { applySplitCriticalEdges(); DT->splitBlock(NewBB); } diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index 761735120a6..01fc50d14a7 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -14,6 +14,7 @@ #define LLVM_CODEGEN_MACHINEFRAMEINFO_H #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/DataTypes.h" #include #include @@ -129,7 +130,7 @@ private: uint64_t Size; // The required alignment of this stack slot. - unsigned Alignment; + Align Alignment; // If true, the value of the stack object is set before // entering the function and is not modified inside the function. By @@ -180,17 +181,16 @@ private: uint8_t SSPLayout; - StackObject(uint64_t Size, unsigned Alignment, int64_t SPOffset, + StackObject(uint64_t Size, Align Alignment, int64_t SPOffset, bool IsImmutable, bool IsSpillSlot, const AllocaInst *Alloca, bool IsAliased, uint8_t StackID = 0) - : SPOffset(SPOffset), Size(Size), Alignment(Alignment), - isImmutable(IsImmutable), isSpillSlot(IsSpillSlot), - StackID(StackID), Alloca(Alloca), isAliased(IsAliased), - SSPLayout(SSPLK_None) {} + : SPOffset(SPOffset), Size(Size), Alignment(Alignment), + isImmutable(IsImmutable), isSpillSlot(IsSpillSlot), StackID(StackID), + Alloca(Alloca), isAliased(IsAliased), SSPLayout(SSPLK_None) {} }; /// The alignment of the stack. - unsigned StackAlignment; + Align StackAlignment; /// Can the stack be realigned. This can be false if the target does not /// support stack realignment, or if the user asks us not to realign the @@ -260,7 +260,7 @@ private: /// native alignment maintained by the compiler, dynamic alignment code will /// be needed. /// - unsigned MaxAlignment = 0; + Align MaxAlignment; /// Set to true if this function adjusts the stack -- e.g., /// when calling another function. This is only valid during and after @@ -304,7 +304,7 @@ private: /// Required alignment of the local object blob, which is the strictest /// alignment of any object in it. - unsigned LocalFrameMaxAlign = 0; + Align LocalFrameMaxAlign; /// Whether the local object blob needs to be allocated together. If not, /// PEI should ignore the isPreAllocated flags on the stack objects and @@ -338,8 +338,8 @@ private: public: explicit MachineFrameInfo(unsigned StackAlignment, bool StackRealignable, bool ForcedRealign) - : StackAlignment(StackAlignment), StackRealignable(StackRealignable), - ForcedRealign(ForcedRealign) {} + : StackAlignment(assumeAligned(StackAlignment)), + StackRealignable(StackRealignable), ForcedRealign(ForcedRealign) {} /// Return true if there are any stack objects in this function. bool hasStackObjects() const { return !Objects.empty(); } @@ -419,10 +419,12 @@ public: /// Required alignment of the local object blob, /// which is the strictest alignment of any object in it. - void setLocalFrameMaxAlign(unsigned Align) { LocalFrameMaxAlign = Align; } + void setLocalFrameMaxAlign(Align Alignment) { + LocalFrameMaxAlign = Alignment; + } /// Return the required alignment of the local object blob. - unsigned getLocalFrameMaxAlign() const { return LocalFrameMaxAlign; } + Align getLocalFrameMaxAlign() const { return LocalFrameMaxAlign; } /// Get whether the local allocation blob should be allocated together or /// let PEI allocate the locals in it directly. @@ -462,14 +464,14 @@ public: unsigned getObjectAlignment(int ObjectIdx) const { assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && "Invalid Object Idx!"); - return Objects[ObjectIdx+NumFixedObjects].Alignment; + return Objects[ObjectIdx + NumFixedObjects].Alignment.value(); } /// setObjectAlignment - Change the alignment of the specified stack object. void setObjectAlignment(int ObjectIdx, unsigned Align) { assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && "Invalid Object Idx!"); - Objects[ObjectIdx+NumFixedObjects].Alignment = Align; + Objects[ObjectIdx + NumFixedObjects].Alignment = assumeAligned(Align); // Only ensure max alignment for the default stack. if (getStackID(ObjectIdx) == 0) @@ -561,10 +563,14 @@ public: /// Return the alignment in bytes that this function must be aligned to, /// which is greater than the default stack alignment provided by the target. - unsigned getMaxAlignment() const { return MaxAlignment; } + unsigned getMaxAlignment() const { return MaxAlignment.value(); } /// Make sure the function is at least Align bytes aligned. - void ensureMaxAlignment(unsigned Align); + void ensureMaxAlignment(Align Alignment); + /// FIXME: Remove this once transition to Align is over. + inline void ensureMaxAlignment(unsigned Align) { + ensureMaxAlignment(assumeAligned(Align)); + } /// Return true if this function adjusts the stack -- e.g., /// when calling another function. This is only valid during and after @@ -728,12 +734,24 @@ public: /// Create a new statically sized stack object, returning /// a nonnegative identifier to represent it. - int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSpillSlot, + int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca = nullptr, uint8_t ID = 0); + /// FIXME: Remove this function when transition to Align is over. + inline int CreateStackObject(uint64_t Size, unsigned Alignment, + bool isSpillSlot, + const AllocaInst *Alloca = nullptr, + uint8_t ID = 0) { + return CreateStackObject(Size, assumeAligned(Alignment), isSpillSlot, + Alloca, ID); + } /// Create a new statically sized stack object that represents a spill slot, /// returning a nonnegative identifier to represent it. - int CreateSpillStackObject(uint64_t Size, unsigned Alignment); + int CreateSpillStackObject(uint64_t Size, Align Alignment); + /// FIXME: Remove this function when transition to Align is over. + inline int CreateSpillStackObject(uint64_t Size, unsigned Alignment) { + return CreateSpillStackObject(Size, assumeAligned(Alignment)); + } /// Remove or mark dead a statically sized stack object. void RemoveStackObject(int ObjectIdx) { @@ -744,7 +762,11 @@ public: /// Notify the MachineFrameInfo object that a variable sized object has been /// created. This must be created whenever a variable sized object is /// created, whether or not the index returned is actually used. - int CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca); + int CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca); + /// FIXME: Remove this function when transition to Align is over. + int CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca) { + return CreateVariableSizedObject(assumeAligned(Alignment), Alloca); + } /// Returns a reference to call saved info vector for the current function. const std::vector &getCalleeSavedInfo() const { diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index 201c126ee52..3a3176e51c5 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -36,6 +36,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Recycler.h" +#include "llvm/Target/TargetMachine.h" #include #include #include @@ -277,7 +278,7 @@ class MachineFunction { unsigned FunctionNumber; /// Alignment - The alignment of the function. - unsigned Alignment; + Align Alignment; /// ExposesReturnsTwice - True if the function calls setjmp or related /// functions with attribute "returns twice", but doesn't have @@ -322,7 +323,7 @@ class MachineFunction { std::vector> CodeViewAnnotations; /// CodeView heapallocsites. - std::vector> + std::vector> CodeViewHeapAllocSites; bool CallsEHReturn = false; @@ -400,6 +401,17 @@ private: /// Map a call instruction to call site arguments forwarding info. CallSiteInfoMap CallSitesInfo; + /// A helper function that returns call site info for a give call + /// instruction if debug entry value support is enabled. + CallSiteInfoMap::iterator getCallSiteInfo(const MachineInstr *MI) { + assert(MI->isCall() && + "Call site info refers only to call instructions!"); + + if (!Target.Options.EnableDebugEntryValues) + return CallSitesInfo.end(); + return CallSitesInfo.find(MI); + } + // Callbacks for insertion and removal. void handleInsertion(MachineInstr &MI); void handleRemoval(MachineInstr &MI); @@ -508,15 +520,16 @@ public: const WinEHFuncInfo *getWinEHFuncInfo() const { return WinEHInfo; } WinEHFuncInfo *getWinEHFuncInfo() { return WinEHInfo; } - /// getAlignment - Return the alignment (log2, not bytes) of the function. - unsigned getAlignment() const { return Alignment; } + /// getAlignment - Return the alignment of the function. + Align getAlignment() const { return Alignment; } - /// setAlignment - Set the alignment (log2, not bytes) of the function. - void setAlignment(unsigned A) { Alignment = A; } + /// setAlignment - Set the alignment of the function. + void setAlignment(Align A) { Alignment = A; } - /// ensureAlignment - Make sure the function is at least 1 << A bytes aligned. - void ensureAlignment(unsigned A) { - if (Alignment < A) Alignment = A; + /// ensureAlignment - Make sure the function is at least A bytes aligned. + void ensureAlignment(Align A) { + if (Alignment < A) + Alignment = A; } /// exposesReturnsTwice - Returns true if the function calls setjmp or @@ -935,10 +948,10 @@ public: } /// Record heapallocsites - void addCodeViewHeapAllocSite(MachineInstr *I, MDNode *MD); + void addCodeViewHeapAllocSite(MachineInstr *I, const MDNode *MD); - ArrayRef> - getCodeViewHeapAllocSites() const { + ArrayRef> + getCodeViewHeapAllocSites() const { return CodeViewHeapAllocSites; } @@ -976,12 +989,24 @@ public: return CallSitesInfo; } - /// Update call sites info by deleting entry for \p Old call instruction. - /// If \p New is present then transfer \p Old call info to it. This function - /// should be called before removing call instruction or before replacing - /// call instruction with new one. - void updateCallSiteInfo(const MachineInstr *Old, - const MachineInstr *New = nullptr); + /// Following functions update call site info. They should be called before + /// removing, replacing or copying call instruction. + + /// Move the call site info from \p Old to \New call site info. This function + /// is used when we are replacing one call instruction with another one to + /// the same callee. + void moveCallSiteInfo(const MachineInstr *Old, + const MachineInstr *New); + + /// Erase the call site info for \p MI. It is used to remove a call + /// instruction from the instruction stream. + void eraseCallSiteInfo(const MachineInstr *MI); + + /// Copy the call site info from \p Old to \ New. Its usage is when we are + /// making a copy of the instruction that will be inserted at different point + /// of the instruction stream. + void copyCallSiteInfo(const MachineInstr *Old, + const MachineInstr *New); }; //===--------------------------------------------------------------------===// diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index c82c5b13750..c94ad292ec9 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -20,11 +20,9 @@ #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetOpcodes.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/InlineAsm.h" #include "llvm/MC/MCInstrDesc.h" @@ -38,6 +36,7 @@ namespace llvm { +class AAResults; template class ArrayRef; class DIExpression; class DILocalVariable; @@ -427,6 +426,22 @@ public: return getNumExplicitDefs() + MCID->getNumImplicitDefs(); } + /// Returns true if the instruction has implicit definition. + bool hasImplicitDef() const { + for (unsigned I = getNumExplicitOperands(), E = getNumOperands(); + I != E; ++I) { + const MachineOperand &MO = getOperand(I); + if (MO.isDef() && MO.isImplicit()) + return true; + } + return false; + } + + /// Returns the implicit operands number. + unsigned getNumImplicitOperands() const { + return getNumOperands() - getNumExplicitOperands(); + } + /// Return true if operand \p OpIdx is a subregister index. bool isOperandSubregIdx(unsigned OpIdx) const { assert(getOperand(OpIdx).getType() == MachineOperand::MO_Immediate && @@ -602,6 +617,12 @@ public: return hasPropertyInBundle(1ULL << MCFlag, Type); } + /// Return true if this is an instruction that should go through the usual + /// legalization steps. + bool isPreISelOpcode(QueryType Type = IgnoreBundle) const { + return hasProperty(MCID::PreISelOpcode, Type); + } + /// Return true if this instruction can have a variable number of operands. /// In this case, the variable operands will be after the normal /// operands but before the implicit definitions and uses (if any are @@ -1020,15 +1041,13 @@ public: } /// A DBG_VALUE is an entry value iff its debug expression contains the - /// DW_OP_entry_value DWARF operation. - bool isDebugEntryValue() const { - return isDebugValue() && getDebugExpression()->isEntryValue(); - } + /// DW_OP_LLVM_entry_value operation. + bool isDebugEntryValue() const; /// Return true if the instruction is a debug value which describes a part of /// a variable as unavailable. bool isUndefDebugValue() const { - return isDebugValue() && getOperand(0).isReg() && !getOperand(0).getReg(); + return isDebugValue() && getOperand(0).isReg() && !getOperand(0).getReg().isValid(); } bool isPHI() const { @@ -1140,7 +1159,7 @@ public: /// is a read of a super-register. /// This does not count partial redefines of virtual registers as reads: /// %reg1024:6 = OP. - bool readsRegister(unsigned Reg, + bool readsRegister(Register Reg, const TargetRegisterInfo *TRI = nullptr) const { return findRegisterUseOperandIdx(Reg, false, TRI) != -1; } @@ -1148,20 +1167,20 @@ public: /// Return true if the MachineInstr reads the specified virtual register. /// Take into account that a partial define is a /// read-modify-write operation. - bool readsVirtualRegister(unsigned Reg) const { + bool readsVirtualRegister(Register Reg) const { return readsWritesVirtualRegister(Reg).first; } /// Return a pair of bools (reads, writes) indicating if this instruction /// reads or writes Reg. This also considers partial defines. /// If Ops is not null, all operand indices for Reg are added. - std::pair readsWritesVirtualRegister(unsigned Reg, + std::pair readsWritesVirtualRegister(Register Reg, SmallVectorImpl *Ops = nullptr) const; /// Return true if the MachineInstr kills the specified register. /// If TargetRegisterInfo is passed, then it also checks if there is /// a kill of a super-register. - bool killsRegister(unsigned Reg, + bool killsRegister(Register Reg, const TargetRegisterInfo *TRI = nullptr) const { return findRegisterUseOperandIdx(Reg, true, TRI) != -1; } @@ -1170,7 +1189,7 @@ public: /// If TargetRegisterInfo is passed, then it also checks /// if there is a def of a super-register. /// NOTE: It's ignoring subreg indices on virtual registers. - bool definesRegister(unsigned Reg, + bool definesRegister(Register Reg, const TargetRegisterInfo *TRI = nullptr) const { return findRegisterDefOperandIdx(Reg, false, false, TRI) != -1; } @@ -1178,38 +1197,38 @@ public: /// Return true if the MachineInstr modifies (fully define or partially /// define) the specified register. /// NOTE: It's ignoring subreg indices on virtual registers. - bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const { + bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const { return findRegisterDefOperandIdx(Reg, false, true, TRI) != -1; } /// Returns true if the register is dead in this machine instruction. /// If TargetRegisterInfo is passed, then it also checks /// if there is a dead def of a super-register. - bool registerDefIsDead(unsigned Reg, + bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI = nullptr) const { return findRegisterDefOperandIdx(Reg, true, false, TRI) != -1; } /// Returns true if the MachineInstr has an implicit-use operand of exactly /// the given register (not considering sub/super-registers). - bool hasRegisterImplicitUseOperand(unsigned Reg) const; + bool hasRegisterImplicitUseOperand(Register Reg) const; /// Returns the operand index that is a use of the specific register or -1 /// if it is not found. It further tightens the search criteria to a use /// that kills the register if isKill is true. - int findRegisterUseOperandIdx(unsigned Reg, bool isKill = false, + int findRegisterUseOperandIdx(Register Reg, bool isKill = false, const TargetRegisterInfo *TRI = nullptr) const; /// Wrapper for findRegisterUseOperandIdx, it returns /// a pointer to the MachineOperand rather than an index. - MachineOperand *findRegisterUseOperand(unsigned Reg, bool isKill = false, + MachineOperand *findRegisterUseOperand(Register Reg, bool isKill = false, const TargetRegisterInfo *TRI = nullptr) { int Idx = findRegisterUseOperandIdx(Reg, isKill, TRI); return (Idx == -1) ? nullptr : &getOperand(Idx); } const MachineOperand *findRegisterUseOperand( - unsigned Reg, bool isKill = false, + Register Reg, bool isKill = false, const TargetRegisterInfo *TRI = nullptr) const { return const_cast(this)-> findRegisterUseOperand(Reg, isKill, TRI); @@ -1221,14 +1240,14 @@ public: /// overlap the specified register. If TargetRegisterInfo is non-null, /// then it also checks if there is a def of a super-register. /// This may also return a register mask operand when Overlap is true. - int findRegisterDefOperandIdx(unsigned Reg, + int findRegisterDefOperandIdx(Register Reg, bool isDead = false, bool Overlap = false, const TargetRegisterInfo *TRI = nullptr) const; /// Wrapper for findRegisterDefOperandIdx, it returns /// a pointer to the MachineOperand rather than an index. MachineOperand * - findRegisterDefOperand(unsigned Reg, bool isDead = false, + findRegisterDefOperand(Register Reg, bool isDead = false, bool Overlap = false, const TargetRegisterInfo *TRI = nullptr) { int Idx = findRegisterDefOperandIdx(Reg, isDead, Overlap, TRI); @@ -1236,7 +1255,7 @@ public: } const MachineOperand * - findRegisterDefOperand(unsigned Reg, bool isDead = false, + findRegisterDefOperand(Register Reg, bool isDead = false, bool Overlap = false, const TargetRegisterInfo *TRI = nullptr) const { return const_cast(this)->findRegisterDefOperand( @@ -1283,7 +1302,7 @@ public: /// /// \pre CurRC must not be NULL. const TargetRegisterClass *getRegClassConstraintEffectForVReg( - unsigned Reg, const TargetRegisterClass *CurRC, + Register Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ExploreBundle = false) const; @@ -1346,39 +1365,39 @@ public: /// Replace all occurrences of FromReg with ToReg:SubIdx, /// properly composing subreg indices where necessary. - void substituteRegister(unsigned FromReg, unsigned ToReg, unsigned SubIdx, + void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo); /// We have determined MI kills a register. Look for the /// operand that uses it and mark it as IsKill. If AddIfNotFound is true, /// add a implicit operand if it's not found. Returns true if the operand /// exists / is added. - bool addRegisterKilled(unsigned IncomingReg, + bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound = false); /// Clear all kill flags affecting Reg. If RegInfo is provided, this includes /// all aliasing registers. - void clearRegisterKills(unsigned Reg, const TargetRegisterInfo *RegInfo); + void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo); /// We have determined MI defined a register without a use. /// Look for the operand that defines it and mark it as IsDead. If /// AddIfNotFound is true, add a implicit operand if it's not found. Returns /// true if the operand exists / is added. - bool addRegisterDead(unsigned Reg, const TargetRegisterInfo *RegInfo, + bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound = false); /// Clear all dead flags on operands defining register @p Reg. - void clearRegisterDeads(unsigned Reg); + void clearRegisterDeads(Register Reg); /// Mark all subregister defs of register @p Reg with the undef flag. /// This function is used when we determined to have a subregister def in an /// otherwise undefined super register. - void setRegisterDefReadUndef(unsigned Reg, bool IsUndef = true); + void setRegisterDefReadUndef(Register Reg, bool IsUndef = true); /// We have determined MI defines a register. Make sure there is an operand /// defining Reg. - void addRegisterDefined(unsigned Reg, + void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo = nullptr); /// Mark every physreg used by this instruction as @@ -1386,13 +1405,13 @@ public: /// /// On instructions with register mask operands, also add implicit-def /// operands for all registers in UsedRegs. - void setPhysRegsDeadExcept(ArrayRef UsedRegs, + void setPhysRegsDeadExcept(ArrayRef UsedRegs, const TargetRegisterInfo &TRI); /// Return true if it is safe to move this instruction. If /// SawStore is set to true, it means that there is a store (or call) between /// the instruction's location and its intended destination. - bool isSafeToMove(AliasAnalysis *AA, bool &SawStore) const; + bool isSafeToMove(AAResults *AA, bool &SawStore) const; /// Returns true if this instruction's memory access aliases the memory /// access of Other. @@ -1404,7 +1423,7 @@ public: /// @param AA Optional alias analysis, used to compare memory operands. /// @param Other MachineInstr to check aliasing against. /// @param UseTBAA Whether to pass TBAA information to alias analysis. - bool mayAlias(AliasAnalysis *AA, const MachineInstr &Other, bool UseTBAA) const; + bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const; /// Return true if this instruction may have an ordered /// or volatile memory reference, or if the information describing the memory @@ -1419,7 +1438,7 @@ public: /// argument area of a function (if it does not change). If the instruction /// does multiple loads, this returns true only if all of the loads are /// dereferenceable and invariant. - bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const; + bool isDereferenceableInvariantLoad(AAResults *AA) const; /// If the specified instruction is a PHI that always merges together the /// same virtual register, return the register, otherwise return 0. @@ -1603,9 +1622,15 @@ public: /// Scan instructions following MI and collect any matching DBG_VALUEs. void collectDebugValues(SmallVectorImpl &DbgValues); - /// Find all DBG_VALUEs immediately following this instruction that point - /// to a register def in this instruction and point them to \p Reg instead. - void changeDebugValuesDefReg(unsigned Reg); + /// Find all DBG_VALUEs that point to the register def in this instruction + /// and point them to \p Reg instead. + void changeDebugValuesDefReg(Register Reg); + + /// Returns the Intrinsic::ID for this instruction. + /// \pre Must have an intrinsic ID operand. + unsigned getIntrinsicID() const { + return getOperand(getNumExplicitDefs()).getIntrinsicID(); + } private: /// If this instruction is embedded into a MachineFunction, return the @@ -1630,7 +1655,7 @@ private: /// this MI and the given operand index \p OpIdx. /// If the related operand does not constrained Reg, this returns CurRC. const TargetRegisterClass *getRegClassConstraintEffectForVRegImpl( - unsigned OpIdx, unsigned Reg, const TargetRegisterClass *CurRC, + unsigned OpIdx, Register Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const; }; diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h index 6d7fb72b6bd..880d4829ac7 100644 --- a/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/include/llvm/CodeGen/MachineInstrBuilder.h @@ -85,7 +85,7 @@ public: Register getReg(unsigned Idx) const { return MI->getOperand(Idx).getReg(); } /// Add a new virtual register operand. - const MachineInstrBuilder &addReg(unsigned RegNo, unsigned flags = 0, + const MachineInstrBuilder &addReg(Register RegNo, unsigned flags = 0, unsigned SubReg = 0) const { assert((flags & 0x1) == 0 && "Passing in 'true' to addReg is forbidden! Use enums instead."); @@ -104,14 +104,14 @@ public: } /// Add a virtual register definition operand. - const MachineInstrBuilder &addDef(unsigned RegNo, unsigned Flags = 0, + const MachineInstrBuilder &addDef(Register RegNo, unsigned Flags = 0, unsigned SubReg = 0) const { return addReg(RegNo, Flags | RegState::Define, SubReg); } /// Add a virtual register use operand. It is an error for Flags to contain /// `RegState::Define` when calling this function. - const MachineInstrBuilder &addUse(unsigned RegNo, unsigned Flags = 0, + const MachineInstrBuilder &addUse(Register RegNo, unsigned Flags = 0, unsigned SubReg = 0) const { assert(!(Flags & RegState::Define) && "Misleading addUse defines register, use addReg instead."); @@ -135,7 +135,7 @@ public: } const MachineInstrBuilder &addMBB(MachineBasicBlock *MBB, - unsigned char TargetFlags = 0) const { + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateMBB(MBB, TargetFlags)); return *this; } @@ -145,42 +145,42 @@ public: return *this; } - const MachineInstrBuilder &addConstantPoolIndex(unsigned Idx, - int Offset = 0, - unsigned char TargetFlags = 0) const { + const MachineInstrBuilder & + addConstantPoolIndex(unsigned Idx, int Offset = 0, + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateCPI(Idx, Offset, TargetFlags)); return *this; } const MachineInstrBuilder &addTargetIndex(unsigned Idx, int64_t Offset = 0, - unsigned char TargetFlags = 0) const { + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateTargetIndex(Idx, Offset, TargetFlags)); return *this; } const MachineInstrBuilder &addJumpTableIndex(unsigned Idx, - unsigned char TargetFlags = 0) const { + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateJTI(Idx, TargetFlags)); return *this; } const MachineInstrBuilder &addGlobalAddress(const GlobalValue *GV, int64_t Offset = 0, - unsigned char TargetFlags = 0) const { + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateGA(GV, Offset, TargetFlags)); return *this; } const MachineInstrBuilder &addExternalSymbol(const char *FnName, - unsigned char TargetFlags = 0) const { + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateES(FnName, TargetFlags)); return *this; } const MachineInstrBuilder &addBlockAddress(const BlockAddress *BA, int64_t Offset = 0, - unsigned char TargetFlags = 0) const { + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateBA(BA, Offset, TargetFlags)); return *this; } @@ -250,6 +250,11 @@ public: return *this; } + const MachineInstrBuilder &addShuffleMask(const Constant *Val) const { + MI->addOperand(*MF, MachineOperand::CreateShuffleMask(Val)); + return *this; + } + const MachineInstrBuilder &addSym(MCSymbol *Sym, unsigned char TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateMCSymbol(Sym, TargetFlags)); @@ -316,7 +321,7 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, /// This version of the builder sets up the first operand as a /// destination virtual register. inline MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, - const MCInstrDesc &MCID, unsigned DestReg) { + const MCInstrDesc &MCID, Register DestReg) { return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, DL)) .addReg(DestReg, RegState::Define); } @@ -327,7 +332,7 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, const DebugLoc &DL, const MCInstrDesc &MCID, - unsigned DestReg) { + Register DestReg) { MachineFunction &MF = *BB.getParent(); MachineInstr *MI = MF.CreateMachineInstr(MCID, DL); BB.insert(I, MI); @@ -343,7 +348,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::instr_iterator I, const DebugLoc &DL, const MCInstrDesc &MCID, - unsigned DestReg) { + Register DestReg) { MachineFunction &MF = *BB.getParent(); MachineInstr *MI = MF.CreateMachineInstr(MCID, DL); BB.insert(I, MI); @@ -352,7 +357,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I, const DebugLoc &DL, const MCInstrDesc &MCID, - unsigned DestReg) { + Register DestReg) { // Calling the overload for instr_iterator is always correct. However, the // definition is not available in headers, so inline the check. if (I.isInsideBundle()) @@ -362,7 +367,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I, inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr *I, const DebugLoc &DL, const MCInstrDesc &MCID, - unsigned DestReg) { + Register DestReg) { return BuildMI(BB, *I, DL, MCID, DestReg); } @@ -416,7 +421,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, const DebugLoc &DL, /// end of the given MachineBasicBlock, and sets up the first operand as a /// destination virtual register. inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, const DebugLoc &DL, - const MCInstrDesc &MCID, unsigned DestReg) { + const MCInstrDesc &MCID, Register DestReg) { return BuildMI(*BB, BB->end(), DL, MCID, DestReg); } @@ -426,7 +431,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, const DebugLoc &DL, /// second operand is an immediate. MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID, bool IsIndirect, - unsigned Reg, const MDNode *Variable, + Register Reg, const MDNode *Variable, const MDNode *Expr); /// This version of the builder builds a DBG_VALUE intrinsic @@ -442,7 +447,7 @@ MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, const DebugLoc &DL, const MCInstrDesc &MCID, bool IsIndirect, - unsigned Reg, const MDNode *Variable, + Register Reg, const MDNode *Variable, const MDNode *Expr); /// This version of the builder builds a DBG_VALUE intrinsic @@ -490,16 +495,13 @@ inline unsigned getRenamableRegState(bool B) { /// Get all register state flags from machine operand \p RegOp. inline unsigned getRegState(const MachineOperand &RegOp) { assert(RegOp.isReg() && "Not a register operand"); - return getDefRegState(RegOp.isDef()) | - getImplRegState(RegOp.isImplicit()) | - getKillRegState(RegOp.isKill()) | - getDeadRegState(RegOp.isDead()) | - getUndefRegState(RegOp.isUndef()) | - getInternalReadRegState(RegOp.isInternalRead()) | - getDebugRegState(RegOp.isDebug()) | - getRenamableRegState( - TargetRegisterInfo::isPhysicalRegister(RegOp.getReg()) && - RegOp.isRenamable()); + return getDefRegState(RegOp.isDef()) | getImplRegState(RegOp.isImplicit()) | + getKillRegState(RegOp.isKill()) | getDeadRegState(RegOp.isDead()) | + getUndefRegState(RegOp.isUndef()) | + getInternalReadRegState(RegOp.isInternalRead()) | + getDebugRegState(RegOp.isDebug()) | + getRenamableRegState(Register::isPhysicalRegister(RegOp.getReg()) && + RegOp.isRenamable()); } /// Helper class for constructing bundles of MachineInstrs. diff --git a/include/llvm/CodeGen/MachineLoopUtils.h b/include/llvm/CodeGen/MachineLoopUtils.h new file mode 100644 index 00000000000..41379b75d00 --- /dev/null +++ b/include/llvm/CodeGen/MachineLoopUtils.h @@ -0,0 +1,41 @@ +//=- MachineLoopUtils.h - Helper functions for manipulating loops -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H +#define LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H + +namespace llvm { +class MachineBasicBlock; +class MachineRegisterInfo; +class TargetInstrInfo; + +enum LoopPeelDirection { + LPD_Front, ///< Peel the first iteration of the loop. + LPD_Back ///< Peel the last iteration of the loop. +}; + +/// Peels a single block loop. Loop must have two successors, one of which +/// must be itself. Similarly it must have two predecessors, one of which must +/// be itself. +/// +/// The loop block is copied and inserted into the CFG such that two copies of +/// the loop follow on from each other. The copy is inserted either before or +/// after the loop based on Direction. +/// +/// Phis are updated and an unconditional branch inserted at the end of the +/// clone so as to execute a single iteration. +/// +/// The trip count of Loop is not updated. +MachineBasicBlock *PeelSingleBlockLoop(LoopPeelDirection Direction, + MachineBasicBlock *Loop, + MachineRegisterInfo &MRI, + const TargetInstrInfo *TII); + +} // namespace llvm + +#endif // LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h index 65f706302bc..33a48a235e1 100644 --- a/include/llvm/CodeGen/MachineMemOperand.h +++ b/include/llvm/CodeGen/MachineMemOperand.h @@ -293,8 +293,6 @@ public: /// Support for operator<<. /// @{ - void print(raw_ostream &OS) const; - void print(raw_ostream &OS, ModuleSlotTracker &MST) const; void print(raw_ostream &OS, ModuleSlotTracker &MST, SmallVectorImpl &SSNs, const LLVMContext &Context, const MachineFrameInfo *MFI, const TargetInstrInfo *TII) const; @@ -319,11 +317,6 @@ public: } }; -inline raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MRO) { - MRO.print(OS); - return OS; -} - } // End llvm namespace #endif diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h index 4ff5c7fd013..6902dada242 100644 --- a/include/llvm/CodeGen/MachineModuleInfo.h +++ b/include/llvm/CodeGen/MachineModuleInfo.h @@ -33,6 +33,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/IR/PassManager.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Pass.h" @@ -74,7 +75,10 @@ protected: /// made by different debugging and exception handling schemes and reformated /// for specific use. /// -class MachineModuleInfo : public ImmutablePass { +class MachineModuleInfo { + friend class MachineModuleInfoWrapperPass; + friend class MachineModuleAnalysis; + const LLVMTargetMachine &TM; /// This is the MCContext used for the entire code generator. @@ -140,15 +144,17 @@ class MachineModuleInfo : public ImmutablePass { const Function *LastRequest = nullptr; ///< Used for shortcut/cache. MachineFunction *LastResult = nullptr; ///< Used for shortcut/cache. + MachineModuleInfo &operator=(MachineModuleInfo &&MMII) = delete; + public: - static char ID; // Pass identification, replacement for typeid - explicit MachineModuleInfo(const LLVMTargetMachine *TM = nullptr); - ~MachineModuleInfo() override; - // Initialization and Finalization - bool doInitialization(Module &) override; - bool doFinalization(Module &) override; + MachineModuleInfo(MachineModuleInfo &&MMII); + + ~MachineModuleInfo(); + + void initialize(); + void finalize(); const LLVMTargetMachine &getTarget() const { return TM; } @@ -254,6 +260,38 @@ public: /// \} }; // End class MachineModuleInfo +class MachineModuleInfoWrapperPass : public ImmutablePass { + MachineModuleInfo MMI; + +public: + static char ID; // Pass identification, replacement for typeid + explicit MachineModuleInfoWrapperPass(const LLVMTargetMachine *TM = nullptr); + + // Initialization and Finalization + bool doInitialization(Module &) override; + bool doFinalization(Module &) override; + + MachineModuleInfo &getMMI() { return MMI; } + const MachineModuleInfo &getMMI() const { return MMI; } +}; + +/// An analysis that produces \c MachineInfo for a module. +class MachineModuleAnalysis : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + static AnalysisKey Key; + + const LLVMTargetMachine *TM; + +public: + /// Provide the result type for this analysis pass. + using Result = MachineModuleInfo; + + MachineModuleAnalysis(const LLVMTargetMachine *TM) : TM(TM) {} + + /// Run the analysis pass and produce machine module information. + MachineModuleInfo run(Module &M, ModuleAnalysisManager &); +}; + } // end namespace llvm #endif // LLVM_CODEGEN_MACHINEMODULEINFO_H diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h index 2152c7582e5..df914dc2d85 100644 --- a/include/llvm/CodeGen/MachineOperand.h +++ b/include/llvm/CodeGen/MachineOperand.h @@ -23,6 +23,7 @@ namespace llvm { class BlockAddress; +class Constant; class ConstantFP; class ConstantInt; class GlobalValue; @@ -68,7 +69,8 @@ public: MO_CFIIndex, ///< MCCFIInstruction index. MO_IntrinsicID, ///< Intrinsic ID for ISel MO_Predicate, ///< Generic predicate for ISel - MO_Last = MO_Predicate, + MO_ShuffleMask, ///< Other IR Constant for ISel (shuffle masks) + MO_Last = MO_ShuffleMask }; private: @@ -172,6 +174,7 @@ private: unsigned CFIIndex; // For MO_CFI. Intrinsic::ID IntrinsicID; // For MO_IntrinsicID. unsigned Pred; // For MO_Predicate + const Constant *ShuffleMask; // For MO_ShuffleMask struct { // For MO_Register. // Register number is in SmallContents.RegNo. @@ -341,6 +344,7 @@ public: bool isCFIIndex() const { return OpKind == MO_CFIIndex; } bool isIntrinsicID() const { return OpKind == MO_IntrinsicID; } bool isPredicate() const { return OpKind == MO_Predicate; } + bool isShuffleMask() const { return OpKind == MO_ShuffleMask; } //===--------------------------------------------------------------------===// // Accessors for Register Operands //===--------------------------------------------------------------------===// @@ -455,7 +459,7 @@ public: /// Change the register this operand corresponds to. /// - void setReg(unsigned Reg); + void setReg(Register Reg); void setSubReg(unsigned subReg) { assert(isReg() && "Wrong MachineOperand mutator"); @@ -468,13 +472,13 @@ public: /// using TargetRegisterInfo to compose the subreg indices if necessary. /// Reg must be a virtual register, SubIdx can be 0. /// - void substVirtReg(unsigned Reg, unsigned SubIdx, const TargetRegisterInfo&); + void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo&); /// substPhysReg - Substitute the current register with the physical register /// Reg, taking any existing SubReg into account. For instance, /// substPhysReg(%eax) will change %reg1024:sub_8bit to %al. /// - void substPhysReg(unsigned Reg, const TargetRegisterInfo&); + void substPhysReg(MCRegister Reg, const TargetRegisterInfo&); void setIsUse(bool Val = true) { setIsDef(!Val); } @@ -579,6 +583,11 @@ public: return Contents.Pred; } + const Constant *getShuffleMask() const { + assert(isShuffleMask() && "Wrong MachineOperand accessor"); + return Contents.ShuffleMask; + } + /// Return the offset from the symbol in this operand. This always returns 0 /// for ExternalSymbol operands. int64_t getOffset() const { @@ -717,11 +726,11 @@ public: void ChangeToFPImmediate(const ConstantFP *FPImm); /// ChangeToES - Replace this operand with a new external symbol operand. - void ChangeToES(const char *SymName, unsigned char TargetFlags = 0); + void ChangeToES(const char *SymName, unsigned TargetFlags = 0); /// ChangeToGA - Replace this operand with a new global address operand. void ChangeToGA(const GlobalValue *GV, int64_t Offset, - unsigned char TargetFlags = 0); + unsigned TargetFlags = 0); /// ChangeToMCSymbol - Replace this operand with a new MC symbol operand. void ChangeToMCSymbol(MCSymbol *Sym); @@ -731,12 +740,12 @@ public: /// Replace this operand with a target index. void ChangeToTargetIndex(unsigned Idx, int64_t Offset, - unsigned char TargetFlags = 0); + unsigned TargetFlags = 0); /// ChangeToRegister - Replace this operand with a new register operand of /// the specified value. If an operand is known to be an register already, /// the setReg method should be used. - void ChangeToRegister(unsigned Reg, bool isDef, bool isImp = false, + void ChangeToRegister(Register Reg, bool isDef, bool isImp = false, bool isKill = false, bool isDead = false, bool isUndef = false, bool isDebug = false); @@ -762,7 +771,7 @@ public: return Op; } - static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp = false, + static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp = false, bool isKill = false, bool isDead = false, bool isUndef = false, bool isEarlyClobber = false, @@ -788,7 +797,7 @@ public: return Op; } static MachineOperand CreateMBB(MachineBasicBlock *MBB, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_MachineBasicBlock); Op.setMBB(MBB); Op.setTargetFlags(TargetFlags); @@ -800,7 +809,7 @@ public: return Op; } static MachineOperand CreateCPI(unsigned Idx, int Offset, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_ConstantPoolIndex); Op.setIndex(Idx); Op.setOffset(Offset); @@ -808,21 +817,21 @@ public: return Op; } static MachineOperand CreateTargetIndex(unsigned Idx, int64_t Offset, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_TargetIndex); Op.setIndex(Idx); Op.setOffset(Offset); Op.setTargetFlags(TargetFlags); return Op; } - static MachineOperand CreateJTI(unsigned Idx, unsigned char TargetFlags = 0) { + static MachineOperand CreateJTI(unsigned Idx, unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_JumpTableIndex); Op.setIndex(Idx); Op.setTargetFlags(TargetFlags); return Op; } static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_GlobalAddress); Op.Contents.OffsetedInfo.Val.GV = GV; Op.setOffset(Offset); @@ -830,7 +839,7 @@ public: return Op; } static MachineOperand CreateES(const char *SymName, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_ExternalSymbol); Op.Contents.OffsetedInfo.Val.SymbolName = SymName; Op.setOffset(0); // Offset is always 0. @@ -838,7 +847,7 @@ public: return Op; } static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_BlockAddress); Op.Contents.OffsetedInfo.Val.BA = BA; Op.setOffset(Offset); @@ -876,7 +885,7 @@ public: } static MachineOperand CreateMCSymbol(MCSymbol *Sym, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_MCSymbol); Op.Contents.Sym = Sym; Op.setOffset(0); @@ -902,6 +911,12 @@ public: return Op; } + static MachineOperand CreateShuffleMask(const Constant *C) { + MachineOperand Op(MachineOperand::MO_ShuffleMask); + Op.Contents.ShuffleMask = C; + return Op; + } + friend class MachineInstr; friend class MachineRegisterInfo; diff --git a/include/llvm/CodeGen/MachinePipeliner.h b/include/llvm/CodeGen/MachinePipeliner.h index 03ca5307268..e9cf7e115bf 100644 --- a/include/llvm/CodeGen/MachinePipeliner.h +++ b/include/llvm/CodeGen/MachinePipeliner.h @@ -40,6 +40,8 @@ #ifndef LLVM_LIB_CODEGEN_MACHINEPIPELINER_H #define LLVM_LIB_CODEGEN_MACHINEPIPELINER_H +#include "llvm/Analysis/AliasAnalysis.h" + #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" @@ -148,7 +150,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs { /// We may create a new instruction, so remember it because it /// must be deleted when the pass is finished. - SmallPtrSet NewMIs; + DenseMap NewMIs; /// Ordered list of DAG postprocessing steps. std::vector> Mutations; @@ -200,7 +202,7 @@ public: RegClassInfo(rci), II_setByPragma(II), Topo(SUnits, &ExitSU) { P.MF->getSubtarget().getSMSMutations(Mutations); if (SwpEnableCopyToPhi) - Mutations.push_back(llvm::make_unique()); + Mutations.push_back(std::make_unique()); } void schedule() override; @@ -297,53 +299,8 @@ private: void computeNodeOrder(NodeSetType &NodeSets); void checkValidNodeOrder(const NodeSetType &Circuits) const; bool schedulePipeline(SMSchedule &Schedule); - void generatePipelinedLoop(SMSchedule &Schedule); - void generateProlog(SMSchedule &Schedule, unsigned LastStage, - MachineBasicBlock *KernelBB, ValueMapTy *VRMap, - MBBVectorTy &PrologBBs); - void generateEpilog(SMSchedule &Schedule, unsigned LastStage, - MachineBasicBlock *KernelBB, ValueMapTy *VRMap, - MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs); - void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, - MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, - SMSchedule &Schedule, ValueMapTy *VRMap, - InstrMapTy &InstrMap, unsigned LastStageNum, - unsigned CurStageNum, bool IsLast); - void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, - MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, - SMSchedule &Schedule, ValueMapTy *VRMap, - InstrMapTy &InstrMap, unsigned LastStageNum, - unsigned CurStageNum, bool IsLast); - void removeDeadInstructions(MachineBasicBlock *KernelBB, - MBBVectorTy &EpilogBBs); - void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs, - SMSchedule &Schedule); - void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs, - MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs, - SMSchedule &Schedule, ValueMapTy *VRMap); bool computeDelta(MachineInstr &MI, unsigned &Delta); - void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI, - unsigned Num); - MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum, - unsigned InstStageNum); - MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum, - unsigned InstStageNum, - SMSchedule &Schedule); - void updateInstruction(MachineInstr *NewMI, bool LastDef, - unsigned CurStageNum, unsigned InstrStageNum, - SMSchedule &Schedule, ValueMapTy *VRMap); MachineInstr *findDefInLoop(unsigned Reg); - unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal, - unsigned LoopStage, ValueMapTy *VRMap, - MachineBasicBlock *BB); - void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum, - SMSchedule &Schedule, ValueMapTy *VRMap, - InstrMapTy &InstrMap); - void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule, - InstrMapTy &InstrMap, unsigned CurStageNum, - unsigned PhiNum, MachineInstr *Phi, - unsigned OldReg, unsigned NewReg, - unsigned PrevReg = 0); bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos, unsigned &OffsetPos, unsigned &NewBase, int64_t &NewOffset); @@ -529,12 +486,6 @@ private: /// Map from instruction to execution cycle. std::map InstrToCycle; - /// Map for each register and the max difference between its uses and def. - /// The first element in the pair is the max difference in stages. The - /// second is true if the register defines a Phi value and loop value is - /// scheduled before the Phi. - std::map> RegToStageDiff; - /// Keep track of the first cycle value in the schedule. It starts /// as zero, but the algorithm allows negative values. int FirstCycle = 0; @@ -560,7 +511,6 @@ public: void reset() { ScheduledInstrs.clear(); InstrToCycle.clear(); - RegToStageDiff.clear(); FirstCycle = 0; LastCycle = 0; InitiationInterval = 0; @@ -620,28 +570,6 @@ public: return (LastCycle - FirstCycle) / InitiationInterval; } - /// Return the max. number of stages/iterations that can occur between a - /// register definition and its uses. - unsigned getStagesForReg(int Reg, unsigned CurStage) { - std::pair Stages = RegToStageDiff[Reg]; - if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second) - return 1; - return Stages.first; - } - - /// The number of stages for a Phi is a little different than other - /// instructions. The minimum value computed in RegToStageDiff is 1 - /// because we assume the Phi is needed for at least 1 iteration. - /// This is not the case if the loop value is scheduled prior to the - /// Phi in the same stage. This function returns the number of stages - /// or iterations needed between the Phi definition and any uses. - unsigned getStagesForPhi(int Reg) { - std::pair Stages = RegToStageDiff[Reg]; - if (Stages.second) - return Stages.first; - return Stages.first - 1; - } - /// Return the instructions that are scheduled at the specified cycle. std::deque &getInstructions(int cycle) { return ScheduledInstrs[cycle]; diff --git a/include/llvm/CodeGen/MachinePostDominators.h b/include/llvm/CodeGen/MachinePostDominators.h index b67e6b52ac8..cb258b5e7b2 100644 --- a/include/llvm/CodeGen/MachinePostDominators.h +++ b/include/llvm/CodeGen/MachinePostDominators.h @@ -16,68 +16,76 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include namespace llvm { /// -/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used -/// to compute the post-dominator tree. +/// MachinePostDominatorTree - an analysis pass wrapper for DominatorTree +/// used to compute the post-dominator tree for MachineFunctions. /// -struct MachinePostDominatorTree : public MachineFunctionPass { -private: - PostDomTreeBase *DT; +class MachinePostDominatorTree : public MachineFunctionPass { + using PostDomTreeT = PostDomTreeBase; + std::unique_ptr PDT; public: static char ID; MachinePostDominatorTree(); - ~MachinePostDominatorTree() override; - FunctionPass *createMachinePostDominatorTreePass(); const SmallVectorImpl &getRoots() const { - return DT->getRoots(); + return PDT->getRoots(); } - MachineDomTreeNode *getRootNode() const { - return DT->getRootNode(); - } + MachineDomTreeNode *getRootNode() const { return PDT->getRootNode(); } MachineDomTreeNode *operator[](MachineBasicBlock *BB) const { - return DT->getNode(BB); + return PDT->getNode(BB); } MachineDomTreeNode *getNode(MachineBasicBlock *BB) const { - return DT->getNode(BB); + return PDT->getNode(BB); } bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const { - return DT->dominates(A, B); + return PDT->dominates(A, B); } bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const { - return DT->dominates(A, B); + return PDT->dominates(A, B); } bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const { - return DT->properlyDominates(A, B); + return PDT->properlyDominates(A, B); } bool properlyDominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const { - return DT->properlyDominates(A, B); + return PDT->properlyDominates(A, B); + } + + bool isVirtualRoot(const MachineDomTreeNode *Node) const { + return PDT->isVirtualRoot(Node); } MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A, - MachineBasicBlock *B) { - return DT->findNearestCommonDominator(A, B); + MachineBasicBlock *B) const { + return PDT->findNearestCommonDominator(A, B); } + /// Returns the nearest common dominator of the given blocks. + /// If that tree node is a virtual root, a nullptr will be returned. + MachineBasicBlock * + findNearestCommonDominator(ArrayRef Blocks) const; + bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override { PDT.reset(nullptr); } + void verifyAnalysis() const override; void print(llvm::raw_ostream &OS, const Module *M = nullptr) const override; }; } //end of namespace llvm diff --git a/include/llvm/CodeGen/MachineRegionInfo.h b/include/llvm/CodeGen/MachineRegionInfo.h index 6d9fb9b9100..eeb69fef2c6 100644 --- a/include/llvm/CodeGen/MachineRegionInfo.h +++ b/include/llvm/CodeGen/MachineRegionInfo.h @@ -22,7 +22,7 @@ namespace llvm { -struct MachinePostDominatorTree; +class MachinePostDominatorTree; class MachineRegion; class MachineRegionNode; class MachineRegionInfo; diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index b5deed1f501..488a5a55a16 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -107,16 +107,16 @@ private: /// getRegUseDefListHead - Return the head pointer for the register use/def /// list for the specified virtual or physical register. - MachineOperand *&getRegUseDefListHead(unsigned RegNo) { - if (TargetRegisterInfo::isVirtualRegister(RegNo)) - return VRegInfo[RegNo].second; - return PhysRegUseDefLists[RegNo]; + MachineOperand *&getRegUseDefListHead(Register RegNo) { + if (RegNo.isVirtual()) + return VRegInfo[RegNo.id()].second; + return PhysRegUseDefLists[RegNo.id()]; } - MachineOperand *getRegUseDefListHead(unsigned RegNo) const { - if (TargetRegisterInfo::isVirtualRegister(RegNo)) - return VRegInfo[RegNo].second; - return PhysRegUseDefLists[RegNo]; + MachineOperand *getRegUseDefListHead(Register RegNo) const { + if (RegNo.isVirtual()) + return VRegInfo[RegNo.id()].second; + return PhysRegUseDefLists[RegNo.id()]; } /// Get the next element in the use-def chain. @@ -214,8 +214,8 @@ public: bool shouldTrackSubRegLiveness(const TargetRegisterClass &RC) const { return subRegLivenessEnabled() && RC.HasDisjunctSubRegs; } - bool shouldTrackSubRegLiveness(unsigned VReg) const { - assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Must pass a VReg"); + bool shouldTrackSubRegLiveness(Register VReg) const { + assert(VReg.isVirtual() && "Must pass a VReg"); return shouldTrackSubRegLiveness(*getRegClass(VReg)); } bool subRegLivenessEnabled() const { @@ -326,7 +326,7 @@ public: /// of the specified register, skipping those marked as Debug. using reg_nodbg_iterator = defusechain_iterator; - reg_nodbg_iterator reg_nodbg_begin(unsigned RegNo) const { + reg_nodbg_iterator reg_nodbg_begin(Register RegNo) const { return reg_nodbg_iterator(getRegUseDefListHead(RegNo)); } static reg_nodbg_iterator reg_nodbg_end() { @@ -374,7 +374,7 @@ public: /// reg_nodbg_empty - Return true if the only instructions using or defining /// Reg are Debug instructions. - bool reg_nodbg_empty(unsigned RegNo) const { + bool reg_nodbg_empty(Register RegNo) const { return reg_nodbg_begin(RegNo) == reg_nodbg_end(); } @@ -628,10 +628,10 @@ public: /// Return the register class of the specified virtual register. /// This shouldn't be used directly unless \p Reg has a register class. /// \see getRegClassOrNull when this might happen. - const TargetRegisterClass *getRegClass(unsigned Reg) const { - assert(VRegInfo[Reg].first.is() && + const TargetRegisterClass *getRegClass(Register Reg) const { + assert(VRegInfo[Reg.id()].first.is() && "Register class not set, wrong accessor"); - return VRegInfo[Reg].first.get(); + return VRegInfo[Reg.id()].first.get(); } /// Return the register class of \p Reg, or null if Reg has not been assigned @@ -727,7 +727,7 @@ public: /// Get the low-level type of \p Reg or LLT{} if Reg is not a generic /// (target independent) virtual register. LLT getType(unsigned Reg) const { - if (TargetRegisterInfo::isVirtualRegister(Reg) && VRegToType.inBounds(Reg)) + if (Register::isVirtualRegister(Reg) && VRegToType.inBounds(Reg)) return VRegToType[Reg]; return LLT{}; } @@ -760,7 +760,7 @@ public: /// specified virtual register. This is typically used by target, and in case /// of an earlier hint it will be overwritten. void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg) { - assert(TargetRegisterInfo::isVirtualRegister(VReg)); + assert(Register::isVirtualRegister(VReg)); RegAllocHints[VReg].first = Type; RegAllocHints[VReg].second.clear(); RegAllocHints[VReg].second.push_back(PrefReg); @@ -769,7 +769,7 @@ public: /// addRegAllocationHint - Add a register allocation hint to the hints /// vector for VReg. void addRegAllocationHint(unsigned VReg, unsigned PrefReg) { - assert(TargetRegisterInfo::isVirtualRegister(VReg)); + assert(Register::isVirtualRegister(VReg)); RegAllocHints[VReg].second.push_back(PrefReg); } @@ -789,17 +789,18 @@ public: /// specified virtual register. If there are many hints, this returns the /// one with the greatest weight. std::pair - getRegAllocationHint(unsigned VReg) const { - assert(TargetRegisterInfo::isVirtualRegister(VReg)); - unsigned BestHint = (RegAllocHints[VReg].second.size() ? - RegAllocHints[VReg].second[0] : 0); - return std::pair(RegAllocHints[VReg].first, BestHint); + getRegAllocationHint(Register VReg) const { + assert(VReg.isVirtual()); + unsigned BestHint = (RegAllocHints[VReg.id()].second.size() ? + RegAllocHints[VReg.id()].second[0] : 0); + return std::pair(RegAllocHints[VReg.id()].first, + BestHint); } /// getSimpleHint - same as getRegAllocationHint except it will only return /// a target independent hint. - unsigned getSimpleHint(unsigned VReg) const { - assert(TargetRegisterInfo::isVirtualRegister(VReg)); + Register getSimpleHint(Register VReg) const { + assert(VReg.isVirtual()); std::pair Hint = getRegAllocationHint(VReg); return Hint.first ? 0 : Hint.second; } @@ -808,7 +809,7 @@ public: /// register allocation hints for VReg. const std::pair> &getRegAllocationHints(unsigned VReg) const { - assert(TargetRegisterInfo::isVirtualRegister(VReg)); + assert(Register::isVirtualRegister(VReg)); return RegAllocHints[VReg]; } @@ -817,6 +818,17 @@ public: /// deleted during LiveDebugVariables analysis. void markUsesInDebugValueAsUndef(unsigned Reg) const; + /// updateDbgUsersToReg - Update a collection of DBG_VALUE instructions + /// to refer to the designated register. + void updateDbgUsersToReg(unsigned Reg, + ArrayRef Users) const { + for (MachineInstr *MI : Users) { + assert(MI->isDebugInstr()); + assert(MI->getOperand(0).isReg()); + MI->getOperand(0).setReg(Reg); + } + } + /// Return true if the specified register is modified in this function. /// This checks that no defining machine operands exist for the register or /// any of its aliases. Definitions found on functions marked noreturn are @@ -882,8 +894,8 @@ public: /// /// Reserved registers may belong to an allocatable register class, but the /// target has explicitly requested that they are not used. - bool isReserved(unsigned PhysReg) const { - return getReservedRegs().test(PhysReg); + bool isReserved(Register PhysReg) const { + return getReservedRegs().test(PhysReg.id()); } /// Returns true when the given register unit is considered reserved. @@ -1164,7 +1176,7 @@ public: PSetIterator(unsigned RegUnit, const MachineRegisterInfo *MRI) { const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); - if (TargetRegisterInfo::isVirtualRegister(RegUnit)) { + if (Register::isVirtualRegister(RegUnit)) { const TargetRegisterClass *RC = MRI->getRegClass(RegUnit); PSet = TRI->getRegClassPressureSets(RC); Weight = TRI->getRegClassWeight(RC).RegWeight; diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h index 75a334f61ad..333367943ac 100644 --- a/include/llvm/CodeGen/MachineScheduler.h +++ b/include/llvm/CodeGen/MachineScheduler.h @@ -100,6 +100,7 @@ namespace llvm { extern cl::opt ForceTopDown; extern cl::opt ForceBottomUp; +extern cl::opt VerifyScheduling; class LiveIntervals; class MachineDominatorTree; diff --git a/include/llvm/CodeGen/ModuloSchedule.h b/include/llvm/CodeGen/ModuloSchedule.h new file mode 100644 index 00000000000..81a9b63b64c --- /dev/null +++ b/include/llvm/CodeGen/ModuloSchedule.h @@ -0,0 +1,367 @@ +//===- ModuloSchedule.h - Software pipeline schedule expansion ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Software pipelining (SWP) is an instruction scheduling technique for loops +// that overlaps loop iterations and exploits ILP via compiler transformations. +// +// There are multiple methods for analyzing a loop and creating a schedule. +// An example algorithm is Swing Modulo Scheduling (implemented by the +// MachinePipeliner). The details of how a schedule is arrived at are irrelevant +// for the task of actually rewriting a loop to adhere to the schedule, which +// is what this file does. +// +// A schedule is, for every instruction in a block, a Cycle and a Stage. Note +// that we only support single-block loops, so "block" and "loop" can be used +// interchangably. +// +// The Cycle of an instruction defines a partial order of the instructions in +// the remapped loop. Instructions within a cycle must not consume the output +// of any instruction in the same cycle. Cycle information is assumed to have +// been calculated such that the processor will execute instructions in +// lock-step (for example in a VLIW ISA). +// +// The Stage of an instruction defines the mapping between logical loop +// iterations and pipelined loop iterations. An example (unrolled) pipeline +// may look something like: +// +// I0[0] Execute instruction I0 of iteration 0 +// I1[0], I0[1] Execute I0 of iteration 1 and I1 of iteration 1 +// I1[1], I0[2] +// I1[2], I0[3] +// +// In the schedule for this unrolled sequence we would say that I0 was scheduled +// in stage 0 and I1 in stage 1: +// +// loop: +// [stage 0] x = I0 +// [stage 1] I1 x (from stage 0) +// +// And to actually generate valid code we must insert a phi: +// +// loop: +// x' = phi(x) +// x = I0 +// I1 x' +// +// This is a simple example; the rules for how to generate correct code given +// an arbitrary schedule containing loop-carried values are complex. +// +// Note that these examples only mention the steady-state kernel of the +// generated loop; prologs and epilogs must be generated also that prime and +// flush the pipeline. Doing so is nontrivial. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_MODULOSCHEDULE_H +#define LLVM_LIB_CODEGEN_MODULOSCHEDULE_H + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineLoopUtils.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include +#include + +namespace llvm { +class MachineBasicBlock; +class MachineInstr; +class LiveIntervals; + +/// Represents a schedule for a single-block loop. For every instruction we +/// maintain a Cycle and Stage. +class ModuloSchedule { +private: + /// The block containing the loop instructions. + MachineLoop *Loop; + + /// The instructions to be generated, in total order. Cycle provides a partial + /// order; the total order within cycles has been decided by the schedule + /// producer. + std::vector ScheduledInstrs; + + /// The cycle for each instruction. + DenseMap Cycle; + + /// The stage for each instruction. + DenseMap Stage; + + /// The number of stages in this schedule (Max(Stage) + 1). + int NumStages; + +public: + /// Create a new ModuloSchedule. + /// \arg ScheduledInstrs The new loop instructions, in total resequenced + /// order. + /// \arg Cycle Cycle index for all instructions in ScheduledInstrs. Cycle does + /// not need to start at zero. ScheduledInstrs must be partially ordered by + /// Cycle. + /// \arg Stage Stage index for all instructions in ScheduleInstrs. + ModuloSchedule(MachineFunction &MF, MachineLoop *Loop, + std::vector ScheduledInstrs, + DenseMap Cycle, + DenseMap Stage) + : Loop(Loop), ScheduledInstrs(ScheduledInstrs), Cycle(std::move(Cycle)), + Stage(std::move(Stage)) { + NumStages = 0; + for (auto &KV : this->Stage) + NumStages = std::max(NumStages, KV.second); + ++NumStages; + } + + /// Return the single-block loop being scheduled. + MachineLoop *getLoop() const { return Loop; } + + /// Return the number of stages contained in this schedule, which is the + /// largest stage index + 1. + int getNumStages() const { return NumStages; } + + /// Return the first cycle in the schedule, which is the cycle index of the + /// first instruction. + int getFirstCycle() { return Cycle[ScheduledInstrs.front()]; } + + /// Return the final cycle in the schedule, which is the cycle index of the + /// last instruction. + int getFinalCycle() { return Cycle[ScheduledInstrs.back()]; } + + /// Return the stage that MI is scheduled in, or -1. + int getStage(MachineInstr *MI) { + auto I = Stage.find(MI); + return I == Stage.end() ? -1 : I->second; + } + + /// Return the cycle that MI is scheduled at, or -1. + int getCycle(MachineInstr *MI) { + auto I = Cycle.find(MI); + return I == Cycle.end() ? -1 : I->second; + } + + /// Return the rescheduled instructions in order. + ArrayRef getInstructions() { return ScheduledInstrs; } + + void dump() { print(dbgs()); } + void print(raw_ostream &OS); +}; + +/// The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, +/// rewriting the old loop and inserting prologs and epilogs as required. +class ModuloScheduleExpander { +public: + using InstrChangesTy = DenseMap>; + +private: + using ValueMapTy = DenseMap; + using MBBVectorTy = SmallVectorImpl; + using InstrMapTy = DenseMap; + + ModuloSchedule &Schedule; + MachineFunction &MF; + const TargetSubtargetInfo &ST; + MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; + LiveIntervals &LIS; + + MachineBasicBlock *BB; + MachineBasicBlock *Preheader; + MachineBasicBlock *NewKernel = nullptr; + std::unique_ptr LoopInfo; + + /// Map for each register and the max difference between its uses and def. + /// The first element in the pair is the max difference in stages. The + /// second is true if the register defines a Phi value and loop value is + /// scheduled before the Phi. + std::map> RegToStageDiff; + + /// Instructions to change when emitting the final schedule. + InstrChangesTy InstrChanges; + + void generatePipelinedLoop(); + void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, MBBVectorTy &PrologBBs); + void generateEpilog(unsigned LastStage, MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, MBBVectorTy &EpilogBBs, + MBBVectorTy &PrologBBs); + void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, + MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, InstrMapTy &InstrMap, + unsigned LastStageNum, unsigned CurStageNum, + bool IsLast); + void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, + MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, InstrMapTy &InstrMap, + unsigned LastStageNum, unsigned CurStageNum, bool IsLast); + void removeDeadInstructions(MachineBasicBlock *KernelBB, + MBBVectorTy &EpilogBBs); + void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs); + void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs, + MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs, + ValueMapTy *VRMap); + bool computeDelta(MachineInstr &MI, unsigned &Delta); + void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI, + unsigned Num); + MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum, + unsigned InstStageNum); + MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum, + unsigned InstStageNum); + void updateInstruction(MachineInstr *NewMI, bool LastDef, + unsigned CurStageNum, unsigned InstrStageNum, + ValueMapTy *VRMap); + MachineInstr *findDefInLoop(unsigned Reg); + unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal, + unsigned LoopStage, ValueMapTy *VRMap, + MachineBasicBlock *BB); + void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum, + ValueMapTy *VRMap, InstrMapTy &InstrMap); + void rewriteScheduledInstr(MachineBasicBlock *BB, InstrMapTy &InstrMap, + unsigned CurStageNum, unsigned PhiNum, + MachineInstr *Phi, unsigned OldReg, + unsigned NewReg, unsigned PrevReg = 0); + bool isLoopCarried(MachineInstr &Phi); + + /// Return the max. number of stages/iterations that can occur between a + /// register definition and its uses. + unsigned getStagesForReg(int Reg, unsigned CurStage) { + std::pair Stages = RegToStageDiff[Reg]; + if ((int)CurStage > Schedule.getNumStages() - 1 && Stages.first == 0 && + Stages.second) + return 1; + return Stages.first; + } + + /// The number of stages for a Phi is a little different than other + /// instructions. The minimum value computed in RegToStageDiff is 1 + /// because we assume the Phi is needed for at least 1 iteration. + /// This is not the case if the loop value is scheduled prior to the + /// Phi in the same stage. This function returns the number of stages + /// or iterations needed between the Phi definition and any uses. + unsigned getStagesForPhi(int Reg) { + std::pair Stages = RegToStageDiff[Reg]; + if (Stages.second) + return Stages.first; + return Stages.first - 1; + } + +public: + /// Create a new ModuloScheduleExpander. + /// \arg InstrChanges Modifications to make to instructions with memory + /// operands. + /// FIXME: InstrChanges is opaque and is an implementation detail of an + /// optimization in MachinePipeliner that crosses abstraction boundaries. + ModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S, + LiveIntervals &LIS, InstrChangesTy InstrChanges) + : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()), + TII(ST.getInstrInfo()), LIS(LIS), + InstrChanges(std::move(InstrChanges)) {} + + /// Performs the actual expansion. + void expand(); + /// Performs final cleanup after expansion. + void cleanup(); + + /// Returns the newly rewritten kernel block, or nullptr if this was + /// optimized away. + MachineBasicBlock *getRewrittenKernel() { return NewKernel; } +}; + +/// A reimplementation of ModuloScheduleExpander. It works by generating a +/// standalone kernel loop and peeling out the prologs and epilogs. +class PeelingModuloScheduleExpander { + ModuloSchedule &Schedule; + MachineFunction &MF; + const TargetSubtargetInfo &ST; + MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; + LiveIntervals *LIS; + + /// The original loop block that gets rewritten in-place. + MachineBasicBlock *BB; + /// The original loop preheader. + MachineBasicBlock *Preheader; + /// All prolog and epilog blocks. + SmallVector Prologs, Epilogs; + /// For every block, the stages that are produced. + DenseMap LiveStages; + /// For every block, the stages that are available. A stage can be available + /// but not produced (in the epilog) or produced but not available (in the + /// prolog). + DenseMap AvailableStages; + + /// CanonicalMIs and BlockMIs form a bidirectional map between any of the + /// loop kernel clones. + DenseMap CanonicalMIs; + DenseMap, MachineInstr *> + BlockMIs; + + /// State passed from peelKernel to peelPrologAndEpilogs(). + std::deque PeeledFront, PeeledBack; + +public: + PeelingModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S, + LiveIntervals *LIS) + : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()), + TII(ST.getInstrInfo()), LIS(LIS) {} + + void expand(); + + /// Runs ModuloScheduleExpander and treats it as a golden input to validate + /// aspects of the code generated by PeelingModuloScheduleExpander. + void validateAgainstModuloScheduleExpander(); + +protected: + /// Converts BB from the original loop body to the rewritten, pipelined + /// steady-state. + void rewriteKernel(); + +private: + /// Peels one iteration of the rewritten kernel (BB) in the specified + /// direction. + MachineBasicBlock *peelKernel(LoopPeelDirection LPD); + /// Peel the kernel forwards and backwards to produce prologs and epilogs, + /// and stitch them together. + void peelPrologAndEpilogs(); + /// All prolog and epilog blocks are clones of the kernel, so any produced + /// register in one block has an corollary in all other blocks. + Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB); + /// Change all users of MI, if MI is predicated out + /// (LiveStages[MI->getParent()] == false). + void rewriteUsesOf(MachineInstr *MI); + /// Insert branches between prologs, kernel and epilogs. + void fixupBranches(); + /// Create a poor-man's LCSSA by cloning only the PHIs from the kernel block + /// to a block dominated by all prologs and epilogs. This allows us to treat + /// the loop exiting block as any other kernel clone. + MachineBasicBlock *CreateLCSSAExitingBlock(); + /// Helper to get the stage of an instruction in the schedule. + unsigned getStage(MachineInstr *MI) { + if (CanonicalMIs.count(MI)) + MI = CanonicalMIs[MI]; + return Schedule.getStage(MI); + } +}; + +/// Expander that simply annotates each scheduled instruction with a post-instr +/// symbol that can be consumed by the ModuloScheduleTest pass. +/// +/// The post-instr symbol is a way of annotating an instruction that can be +/// roundtripped in MIR. The syntax is: +/// MYINST %0, post-instr-symbol +class ModuloScheduleTestAnnotater { + MachineFunction &MF; + ModuloSchedule &S; + +public: + ModuloScheduleTestAnnotater(MachineFunction &MF, ModuloSchedule &S) + : MF(MF), S(S) {} + + /// Performs the annotation. + void annotate(); +}; + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_MODULOSCHEDULE_H diff --git a/include/llvm/CodeGen/PBQP/Math.h b/include/llvm/CodeGen/PBQP/Math.h index 8b014ccbb07..099ba788e9a 100644 --- a/include/llvm/CodeGen/PBQP/Math.h +++ b/include/llvm/CodeGen/PBQP/Math.h @@ -28,17 +28,17 @@ class Vector { public: /// Construct a PBQP vector of the given size. explicit Vector(unsigned Length) - : Length(Length), Data(llvm::make_unique(Length)) {} + : Length(Length), Data(std::make_unique(Length)) {} /// Construct a PBQP vector with initializer. Vector(unsigned Length, PBQPNum InitVal) - : Length(Length), Data(llvm::make_unique(Length)) { + : Length(Length), Data(std::make_unique(Length)) { std::fill(Data.get(), Data.get() + Length, InitVal); } /// Copy construct a PBQP vector. Vector(const Vector &V) - : Length(V.Length), Data(llvm::make_unique(Length)) { + : Length(V.Length), Data(std::make_unique(Length)) { std::copy(V.Data.get(), V.Data.get() + Length, Data.get()); } @@ -125,21 +125,21 @@ private: public: /// Construct a PBQP Matrix with the given dimensions. Matrix(unsigned Rows, unsigned Cols) : - Rows(Rows), Cols(Cols), Data(llvm::make_unique(Rows * Cols)) { + Rows(Rows), Cols(Cols), Data(std::make_unique(Rows * Cols)) { } /// Construct a PBQP Matrix with the given dimensions and initial /// value. Matrix(unsigned Rows, unsigned Cols, PBQPNum InitVal) : Rows(Rows), Cols(Cols), - Data(llvm::make_unique(Rows * Cols)) { + Data(std::make_unique(Rows * Cols)) { std::fill(Data.get(), Data.get() + (Rows * Cols), InitVal); } /// Copy construct a PBQP matrix. Matrix(const Matrix &M) : Rows(M.Rows), Cols(M.Cols), - Data(llvm::make_unique(Rows * Cols)) { + Data(std::make_unique(Rows * Cols)) { std::copy(M.Data.get(), M.Data.get() + (Rows * Cols), Data.get()); } diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index d92ee93268e..1e765ce51e4 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -226,6 +226,10 @@ namespace llvm { /// inserting cmov instructions. extern char &EarlyIfConverterID; + /// EarlyIfPredicator - This pass performs if-conversion on SSA form by + /// predicating if/else block and insert select at the join point. + extern char &EarlyIfPredicatorID; + /// This pass performs instruction combining using trace metrics to estimate /// critical-path and resource depth. extern char &MachineCombinerID; diff --git a/include/llvm/CodeGen/Register.h b/include/llvm/CodeGen/Register.h index 907c1a99e56..aa5173684e2 100644 --- a/include/llvm/CodeGen/Register.h +++ b/include/llvm/CodeGen/Register.h @@ -9,6 +9,7 @@ #ifndef LLVM_CODEGEN_REGISTER_H #define LLVM_CODEGEN_REGISTER_H +#include "llvm/MC/MCRegister.h" #include namespace llvm { @@ -20,41 +21,136 @@ class Register { public: Register(unsigned Val = 0): Reg(Val) {} + Register(MCRegister Val): Reg(Val) {} - /// Return true if the specified register number is in the virtual register - /// namespace. - bool isVirtual() const { + // Register numbers can represent physical registers, virtual registers, and + // sometimes stack slots. The unsigned values are divided into these ranges: + // + // 0 Not a register, can be used as a sentinel. + // [1;2^30) Physical registers assigned by TableGen. + // [2^30;2^31) Stack slots. (Rarely used.) + // [2^31;2^32) Virtual registers assigned by MachineRegisterInfo. + // + // Further sentinels can be allocated from the small negative integers. + // DenseMapInfo uses -1u and -2u. + + /// isStackSlot - Sometimes it is useful the be able to store a non-negative + /// frame index in a variable that normally holds a register. isStackSlot() + /// returns true if Reg is in the range used for stack slots. + /// + /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack + /// slots, so if a variable may contains a stack slot, always check + /// isStackSlot() first. + /// + static bool isStackSlot(unsigned Reg) { + return MCRegister::isStackSlot(Reg); + } + + /// Compute the frame index from a register value representing a stack slot. + static int stackSlot2Index(unsigned Reg) { + assert(isStackSlot(Reg) && "Not a stack slot"); + return int(Reg - (1u << 30)); + } + + /// Convert a non-negative frame index to a stack slot register value. + static unsigned index2StackSlot(int FI) { + assert(FI >= 0 && "Cannot hold a negative frame index."); + return FI + (1u << 30); + } + + /// Return true if the specified register number is in + /// the physical register namespace. + static bool isPhysicalRegister(unsigned Reg) { + return MCRegister::isPhysicalRegister(Reg); + } + + /// Return true if the specified register number is in + /// the virtual register namespace. + static bool isVirtualRegister(unsigned Reg) { + assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first."); return int(Reg) < 0; } - /// Return true if the specified register number is in the physical register - /// namespace. - bool isPhysical() const { - return int(Reg) > 0; - } - - /// Convert a virtual register number to a 0-based index. The first virtual - /// register in a function will get the index 0. - unsigned virtRegIndex() const { - assert(isVirtual() && "Not a virtual register"); + /// Convert a virtual register number to a 0-based index. + /// The first virtual register in a function will get the index 0. + static unsigned virtReg2Index(unsigned Reg) { + assert(isVirtualRegister(Reg) && "Not a virtual register"); return Reg & ~(1u << 31); } /// Convert a 0-based index to a virtual register number. /// This is the inverse operation of VirtReg2IndexFunctor below. - static Register index2VirtReg(unsigned Index) { - return Register(Index | (1u << 31)); + static unsigned index2VirtReg(unsigned Index) { + return Index | (1u << 31); + } + + /// Return true if the specified register number is in the virtual register + /// namespace. + bool isVirtual() const { + return isVirtualRegister(Reg); + } + + /// Return true if the specified register number is in the physical register + /// namespace. + bool isPhysical() const { + return isPhysicalRegister(Reg); + } + + /// Convert a virtual register number to a 0-based index. The first virtual + /// register in a function will get the index 0. + unsigned virtRegIndex() const { + return virtReg2Index(Reg); } operator unsigned() const { return Reg; } + unsigned id() const { return Reg; } + + operator MCRegister() const { + return MCRegister(Reg); + } + bool isValid() const { return Reg != 0; } + + /// Comparisons between register objects + bool operator==(const Register &Other) const { return Reg == Other.Reg; } + bool operator!=(const Register &Other) const { return Reg != Other.Reg; } + bool operator==(const MCRegister &Other) const { return Reg == Other.id(); } + bool operator!=(const MCRegister &Other) const { return Reg != Other.id(); } + + /// Comparisons against register constants. E.g. + /// * R == AArch64::WZR + /// * R == 0 + /// * R == VirtRegMap::NO_PHYS_REG + bool operator==(unsigned Other) const { return Reg == Other; } + bool operator!=(unsigned Other) const { return Reg != Other; } + bool operator==(int Other) const { return Reg == unsigned(Other); } + bool operator!=(int Other) const { return Reg != unsigned(Other); } + // MSVC requires that we explicitly declare these two as well. + bool operator==(MCPhysReg Other) const { return Reg == unsigned(Other); } + bool operator!=(MCPhysReg Other) const { return Reg != unsigned(Other); } +}; + +// Provide DenseMapInfo for Register +template<> struct DenseMapInfo { + static inline unsigned getEmptyKey() { + return DenseMapInfo::getEmptyKey(); + } + static inline unsigned getTombstoneKey() { + return DenseMapInfo::getTombstoneKey(); + } + static unsigned getHashValue(const Register &Val) { + return DenseMapInfo::getHashValue(Val.id()); + } + static bool isEqual(const Register &LHS, const Register &RHS) { + return DenseMapInfo::isEqual(LHS.id(), RHS.id()); + } }; } -#endif +#endif // ifndef LLVM_CODEGEN_REGISTER_H diff --git a/include/llvm/CodeGen/RegisterClassInfo.h b/include/llvm/CodeGen/RegisterClassInfo.h index 14af5c4d090..25b310c4762 100644 --- a/include/llvm/CodeGen/RegisterClassInfo.h +++ b/include/llvm/CodeGen/RegisterClassInfo.h @@ -110,7 +110,7 @@ public: /// getLastCalleeSavedAlias - Returns the last callee saved register that /// overlaps PhysReg, or 0 if Reg doesn't overlap a CalleeSavedAliases. unsigned getLastCalleeSavedAlias(unsigned PhysReg) const { - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); + assert(Register::isPhysicalRegister(PhysReg)); if (PhysReg < CalleeSavedAliases.size()) return CalleeSavedAliases[PhysReg]; return 0; diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h index 5bbaa03fd75..92333b859f1 100644 --- a/include/llvm/CodeGen/RegisterPressure.h +++ b/include/llvm/CodeGen/RegisterPressure.h @@ -129,6 +129,8 @@ public: bool operator==(const PressureChange &RHS) const { return PSetID == RHS.PSetID && UnitInc == RHS.UnitInc; } + + void dump() const; }; /// List of PressureChanges in order of increasing, unique PSetID. @@ -248,6 +250,7 @@ struct RegPressureDelta { bool operator!=(const RegPressureDelta &RHS) const { return !operator==(RHS); } + void dump() const; }; /// A set of live virtual registers and physical register units. @@ -273,15 +276,15 @@ private: unsigned NumRegUnits; unsigned getSparseIndexFromReg(unsigned Reg) const { - if (TargetRegisterInfo::isVirtualRegister(Reg)) - return TargetRegisterInfo::virtReg2Index(Reg) + NumRegUnits; + if (Register::isVirtualRegister(Reg)) + return Register::virtReg2Index(Reg) + NumRegUnits; assert(Reg < NumRegUnits); return Reg; } unsigned getRegFromSparseIndex(unsigned SparseIndex) const { if (SparseIndex >= NumRegUnits) - return TargetRegisterInfo::index2VirtReg(SparseIndex-NumRegUnits); + return Register::index2VirtReg(SparseIndex-NumRegUnits); return SparseIndex; } diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h index 9c48df82f07..5b5a80a67e7 100644 --- a/include/llvm/CodeGen/RegisterScavenging.h +++ b/include/llvm/CodeGen/RegisterScavenging.h @@ -51,7 +51,7 @@ class RegScavenger { /// If non-zero, the specific register is currently being /// scavenged. That is, it is spilled to this scavenging stack slot. - unsigned Reg = 0; + Register Reg; /// The instruction that restores the scavenged register from stack. const MachineInstr *Restore = nullptr; @@ -119,14 +119,14 @@ public: MachineBasicBlock::iterator getCurrentPosition() const { return MBBI; } /// Return if a specific register is currently used. - bool isRegUsed(unsigned Reg, bool includeReserved = true) const; + bool isRegUsed(Register Reg, bool includeReserved = true) const; /// Return all available registers in the register class in Mask. BitVector getRegsAvailable(const TargetRegisterClass *RC); /// Find an unused register of the specified register class. /// Return 0 if none is found. - unsigned FindUnusedReg(const TargetRegisterClass *RC) const; + Register FindUnusedReg(const TargetRegisterClass *RC) const; /// Add a scavenging frame index. void addScavengingFrameIndex(int FI) { @@ -160,10 +160,10 @@ public: /// /// If \p AllowSpill is false, fail if a spill is required to make the /// register available, and return NoRegister. - unsigned scavengeRegister(const TargetRegisterClass *RC, + Register scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj, bool AllowSpill = true); - unsigned scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj, + Register scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj, bool AllowSpill = true) { return scavengeRegister(RegClass, MBBI, SPAdj, AllowSpill); } @@ -177,17 +177,17 @@ public: /// /// If \p AllowSpill is false, fail if a spill is required to make the /// register available, and return NoRegister. - unsigned scavengeRegisterBackwards(const TargetRegisterClass &RC, + Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill = true); /// Tell the scavenger a register is used. - void setRegUsed(unsigned Reg, LaneBitmask LaneMask = LaneBitmask::getAll()); + void setRegUsed(Register Reg, LaneBitmask LaneMask = LaneBitmask::getAll()); private: /// Returns true if a register is reserved. It is never "unused". - bool isReserved(unsigned Reg) const { return MRI->isReserved(Reg); } + bool isReserved(Register Reg) const { return MRI->isReserved(Reg); } /// setUsed / setUnused - Mark the state of one or a number of register units. /// @@ -203,16 +203,16 @@ private: void determineKillsAndDefs(); /// Add all Reg Units that Reg contains to BV. - void addRegUnits(BitVector &BV, unsigned Reg); + void addRegUnits(BitVector &BV, Register Reg); /// Remove all Reg Units that \p Reg contains from \p BV. - void removeRegUnits(BitVector &BV, unsigned Reg); + void removeRegUnits(BitVector &BV, Register Reg); /// Return the candidate register that is unused for the longest after /// StartMI. UseMI is set to the instruction where the search stopped. /// /// No more than InstrLimit instructions are inspected. - unsigned findSurvivorReg(MachineBasicBlock::iterator StartMI, + Register findSurvivorReg(MachineBasicBlock::iterator StartMI, BitVector &Candidates, unsigned InstrLimit, MachineBasicBlock::iterator &UseMI); @@ -225,7 +225,7 @@ private: /// Spill a register after position \p After and reload it before position /// \p UseMI. - ScavengedInfo &spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj, + ScavengedInfo &spill(Register Reg, const TargetRegisterClass &RC, int SPAdj, MachineBasicBlock::iterator Before, MachineBasicBlock::iterator &UseMI); }; diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index 3e3b604acba..1eb9b9f322b 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -34,6 +34,7 @@ namespace llvm { + class AAResults; class LiveIntervals; class MachineFrameInfo; class MachineFunction; @@ -57,7 +58,7 @@ namespace llvm { : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {} unsigned getSparseSetIndex() const { - return TargetRegisterInfo::virtReg2Index(VirtReg); + return Register::virtReg2Index(VirtReg); } }; @@ -173,7 +174,7 @@ namespace llvm { /// Tracks the last instructions in this region using each virtual register. VReg2SUnitOperIdxMultiMap CurrentVRegUses; - AliasAnalysis *AAForDep = nullptr; + AAResults *AAForDep = nullptr; /// Remember a generic side-effecting instruction as we proceed. /// No other SU ever gets scheduled around it (except in the special @@ -201,7 +202,7 @@ namespace llvm { Value2SUsMap &loads, unsigned N); /// Adds a chain edge between SUa and SUb, but only if both - /// AliasAnalysis and Target fail to deny the dependency. + /// AAResults and Target fail to deny the dependency. void addChainDependency(SUnit *SUa, SUnit *SUb, unsigned Latency = 0); @@ -306,7 +307,7 @@ namespace llvm { /// If \p RPTracker is non-null, compute register pressure as a side effect. /// The DAG builder is an efficient place to do it because it already visits /// operands. - void buildSchedGraph(AliasAnalysis *AA, + void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker = nullptr, PressureDiffs *PDiffs = nullptr, LiveIntervals *LIS = nullptr, @@ -374,6 +375,9 @@ namespace llvm { /// Returns a mask for which lanes get read/written by the given (register) /// machine operand. LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const; + + /// Returns true if the def register in \p MO has no uses. + bool deadDefHasNoUse(const MachineOperand &MO); }; /// Creates a new SUnit and return a ptr to it. diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 12a97084702..6b8e2dd803b 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -26,8 +26,6 @@ #include "llvm/ADT/ilist.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -58,6 +56,7 @@ namespace llvm { +class AAResults; class BlockAddress; class Constant; class ConstantFP; @@ -66,6 +65,7 @@ class DataLayout; struct fltSemantics; class GlobalValue; struct KnownBits; +class LegacyDivergenceAnalysis; class LLVMContext; class MachineBasicBlock; class MachineConstantPoolValue; @@ -269,7 +269,13 @@ class SelectionDAG { using CallSiteInfo = MachineFunction::CallSiteInfo; using CallSiteInfoImpl = MachineFunction::CallSiteInfoImpl; - DenseMap SDCallSiteInfo; + + struct CallSiteDbgInfo { + CallSiteInfo CSInfo; + MDNode *HeapAllocSite = nullptr; + }; + + DenseMap SDCallSiteDbgInfo; uint16_t NextPersistentId = 0; @@ -382,7 +388,11 @@ private: Node->OperandList = nullptr; } void CreateTopologicalOrder(std::vector& Order); + public: + // Maximum depth for recursive analysis such as computeKnownBits, etc. + static constexpr unsigned MaxRecursionDepth = 6; + explicit SelectionDAG(const TargetMachine &TM, CodeGenOpt::Level); SelectionDAG(const SelectionDAG &) = delete; SelectionDAG &operator=(const SelectionDAG &) = delete; @@ -489,7 +499,7 @@ public: /// certain types of nodes together, or eliminating superfluous nodes. The /// Level argument controls whether Combine is allowed to produce nodes and /// types that are illegal on the target. - void Combine(CombineLevel Level, AliasAnalysis *AA, + void Combine(CombineLevel Level, AAResults *AA, CodeGenOpt::Level OptLevel); /// This transforms the SelectionDAG into a SelectionDAG that @@ -628,10 +638,9 @@ public: SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset = 0, bool isTargetGA = false, - unsigned char TargetFlags = 0); + unsigned TargetFlags = 0); SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, - int64_t offset = 0, - unsigned char TargetFlags = 0) { + int64_t offset = 0, unsigned TargetFlags = 0) { return getGlobalAddress(GV, DL, VT, offset, true, TargetFlags); } SDValue getFrameIndex(int FI, EVT VT, bool isTarget = false); @@ -639,28 +648,27 @@ public: return getFrameIndex(FI, VT, true); } SDValue getJumpTable(int JTI, EVT VT, bool isTarget = false, - unsigned char TargetFlags = 0); - SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0); + SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags = 0) { return getJumpTable(JTI, VT, true, TargetFlags); } - SDValue getConstantPool(const Constant *C, EVT VT, - unsigned Align = 0, int Offs = 0, bool isT=false, - unsigned char TargetFlags = 0); - SDValue getTargetConstantPool(const Constant *C, EVT VT, - unsigned Align = 0, int Offset = 0, - unsigned char TargetFlags = 0) { + SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align = 0, + int Offs = 0, bool isT = false, + unsigned TargetFlags = 0); + SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align = 0, + int Offset = 0, unsigned TargetFlags = 0) { return getConstantPool(C, VT, Align, Offset, true, TargetFlags); } SDValue getConstantPool(MachineConstantPoolValue *C, EVT VT, unsigned Align = 0, int Offs = 0, bool isT=false, - unsigned char TargetFlags = 0); - SDValue getTargetConstantPool(MachineConstantPoolValue *C, - EVT VT, unsigned Align = 0, - int Offset = 0, unsigned char TargetFlags=0) { + unsigned TargetFlags = 0); + SDValue getTargetConstantPool(MachineConstantPoolValue *C, EVT VT, + unsigned Align = 0, int Offset = 0, + unsigned TargetFlags = 0) { return getConstantPool(C, VT, Align, Offset, true, TargetFlags); } SDValue getTargetIndex(int Index, EVT VT, int64_t Offset = 0, - unsigned char TargetFlags = 0); + unsigned TargetFlags = 0); // When generating a branch to a BB, we don't in general know enough // to provide debug info for the BB at that time, so keep this one around. SDValue getBasicBlock(MachineBasicBlock *MBB); @@ -668,7 +676,7 @@ public: SDValue getExternalSymbol(const char *Sym, EVT VT); SDValue getExternalSymbol(const char *Sym, const SDLoc &dl, EVT VT); SDValue getTargetExternalSymbol(const char *Sym, EVT VT, - unsigned char TargetFlags = 0); + unsigned TargetFlags = 0); SDValue getMCSymbol(MCSymbol *Sym, EVT VT); SDValue getValueType(EVT); @@ -677,12 +685,10 @@ public: SDValue getEHLabel(const SDLoc &dl, SDValue Root, MCSymbol *Label); SDValue getLabelNode(unsigned Opcode, const SDLoc &dl, SDValue Root, MCSymbol *Label); - SDValue getBlockAddress(const BlockAddress *BA, EVT VT, - int64_t Offset = 0, bool isTarget = false, - unsigned char TargetFlags = 0); + SDValue getBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset = 0, + bool isTarget = false, unsigned TargetFlags = 0); SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, - int64_t Offset = 0, - unsigned char TargetFlags = 0) { + int64_t Offset = 0, unsigned TargetFlags = 0) { return getBlockAddress(BA, VT, Offset, true, TargetFlags); } @@ -1035,7 +1041,7 @@ public: unsigned Align = 0, MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore, - unsigned Size = 0, + uint64_t Size = 0, const AAMDNodes &AAInfo = AAMDNodes()); SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, @@ -1117,9 +1123,11 @@ public: MachineMemOperand *MMO, bool IsTruncating = false, bool IsCompressing = false); SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, - ArrayRef Ops, MachineMemOperand *MMO); + ArrayRef Ops, MachineMemOperand *MMO, + ISD::MemIndexType IndexType); SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, - ArrayRef Ops, MachineMemOperand *MMO); + ArrayRef Ops, MachineMemOperand *MMO, + ISD::MemIndexType IndexType); /// Return (create a new or find existing) a target-specific node. /// TargetMemSDNode should be derived class from MemSDNode. @@ -1588,9 +1596,12 @@ public: /// Extract. The reduction must use one of the opcodes listed in /p /// CandidateBinOps and on success /p BinOp will contain the matching opcode. /// Returns the vector that is being reduced on, or SDValue() if a reduction - /// was not matched. + /// was not matched. If \p AllowPartials is set then in the case of a + /// reduction pattern that only matches the first few stages, the extracted + /// subvector of the start of the reduction is returned. SDValue matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, - ArrayRef CandidateBinOps); + ArrayRef CandidateBinOps, + bool AllowPartials = false); /// Utility function used by legalize and lowering to /// "unroll" a vector operation by splitting out the scalars and operating @@ -1664,16 +1675,28 @@ public: } void addCallSiteInfo(const SDNode *CallNode, CallSiteInfoImpl &&CallInfo) { - SDCallSiteInfo[CallNode] = std::move(CallInfo); + SDCallSiteDbgInfo[CallNode].CSInfo = std::move(CallInfo); } CallSiteInfo getSDCallSiteInfo(const SDNode *CallNode) { - auto I = SDCallSiteInfo.find(CallNode); - if (I != SDCallSiteInfo.end()) - return std::move(I->second); + auto I = SDCallSiteDbgInfo.find(CallNode); + if (I != SDCallSiteDbgInfo.end()) + return std::move(I->second).CSInfo; return CallSiteInfo(); } + void addHeapAllocSite(const SDNode *Node, MDNode *MD) { + SDCallSiteDbgInfo[Node].HeapAllocSite = MD; + } + + /// Return the HeapAllocSite type associated with the SDNode, if it exists. + MDNode *getHeapAllocSite(const SDNode *Node) { + auto It = SDCallSiteDbgInfo.find(Node); + if (It == SDCallSiteDbgInfo.end()) + return nullptr; + return It->second.HeapAllocSite; + } + private: void InsertNode(SDNode *N); bool RemoveNodeFromCSEMaps(SDNode *N); @@ -1712,7 +1735,7 @@ private: std::map ExtendedValueTypeNodes; StringMap ExternalSymbols; - std::map,SDNode*> TargetExternalSymbols; + std::map, SDNode *> TargetExternalSymbols; DenseMap MCSymbols; }; diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h index 147c325342f..de71a21d467 100644 --- a/include/llvm/CodeGen/SelectionDAGISel.h +++ b/include/llvm/CodeGen/SelectionDAGISel.h @@ -22,22 +22,23 @@ #include namespace llvm { - class FastISel; - class SelectionDAGBuilder; - class SDValue; - class MachineRegisterInfo; - class MachineBasicBlock; - class MachineFunction; - class MachineInstr; - class OptimizationRemarkEmitter; - class TargetLowering; - class TargetLibraryInfo; - class FunctionLoweringInfo; - class ScheduleHazardRecognizer; - class SwiftErrorValueTracking; - class GCFunctionInfo; - class ScheduleDAGSDNodes; - class LoadInst; +class AAResults; +class FastISel; +class SelectionDAGBuilder; +class SDValue; +class MachineRegisterInfo; +class MachineBasicBlock; +class MachineFunction; +class MachineInstr; +class OptimizationRemarkEmitter; +class TargetLowering; +class TargetLibraryInfo; +class FunctionLoweringInfo; +class ScheduleHazardRecognizer; +class SwiftErrorValueTracking; +class GCFunctionInfo; +class ScheduleDAGSDNodes; +class LoadInst; /// SelectionDAGISel - This is the common base class used for SelectionDAG-based /// pattern-matching instruction selectors. @@ -51,7 +52,7 @@ public: MachineRegisterInfo *RegInfo; SelectionDAG *CurDAG; SelectionDAGBuilder *SDB; - AliasAnalysis *AA; + AAResults *AA; GCFunctionInfo *GFI; CodeGenOpt::Level OptLevel; const TargetInstrInfo *TII; @@ -162,6 +163,7 @@ public: OPC_EmitMergeInputChains1_1, OPC_EmitMergeInputChains1_2, OPC_EmitCopyToReg, + OPC_EmitCopyToReg2, OPC_EmitNodeXForm, OPC_EmitNode, // Space-optimized forms that implicitly encode number of result VTs. diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 5aab9643e09..ceb8b72635a 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -548,10 +548,15 @@ BEGIN_TWO_BYTE_PACK() class LSBaseSDNodeBitfields { friend class LSBaseSDNode; + friend class MaskedGatherScatterSDNode; uint16_t : NumMemSDNodeBits; - uint16_t AddressingMode : 3; // enum ISD::MemIndexedMode + // This storage is shared between disparate class hierarchies to hold an + // enumeration specific to the class hierarchy in use. + // LSBaseSDNode => enum ISD::MemIndexedMode + // MaskedGatherScatterSDNode => enum ISD::MemIndexType + uint16_t AddressingMode : 3; }; enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 }; @@ -696,14 +701,20 @@ public: case ISD::STRICT_FLOG: case ISD::STRICT_FLOG10: case ISD::STRICT_FLOG2: + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FMAXNUM: case ISD::STRICT_FMINNUM: case ISD::STRICT_FCEIL: case ISD::STRICT_FFLOOR: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_ROUND: case ISD::STRICT_FP_EXTEND: return true; @@ -1346,6 +1357,17 @@ public: /// store occurs. AtomicOrdering getOrdering() const { return MMO->getOrdering(); } + /// Return true if the memory operation ordering is Unordered or higher. + bool isAtomic() const { return MMO->isAtomic(); } + + /// Returns true if the memory operation doesn't imply any ordering + /// constraints on surrounding memory operations beyond the normal memory + /// aliasing rules. + bool isUnordered() const { return MMO->isUnordered(); } + + /// Returns true if the memory operation is neither atomic or volatile. + bool isSimple() const { return !isAtomic() && !isVolatile(); } + /// Return the type of the in-memory value. EVT getMemoryVT() const { return MemoryVT; } @@ -1702,16 +1724,16 @@ class GlobalAddressSDNode : public SDNode { const GlobalValue *TheGlobal; int64_t Offset; - unsigned char TargetFlags; + unsigned TargetFlags; GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, const GlobalValue *GA, EVT VT, int64_t o, - unsigned char TF); + unsigned TF); public: const GlobalValue *getGlobal() const { return TheGlobal; } int64_t getOffset() const { return Offset; } - unsigned char getTargetFlags() const { return TargetFlags; } + unsigned getTargetFlags() const { return TargetFlags; } // Return the address space this GlobalAddress belongs to. unsigned getAddressSpace() const; @@ -1778,16 +1800,16 @@ class JumpTableSDNode : public SDNode { friend class SelectionDAG; int JTI; - unsigned char TargetFlags; + unsigned TargetFlags; - JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned char TF) + JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF) : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable, 0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) { } public: int getIndex() const { return JTI; } - unsigned char getTargetFlags() const { return TargetFlags; } + unsigned getTargetFlags() const { return TargetFlags; } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::JumpTable || @@ -1804,10 +1826,10 @@ class ConstantPoolSDNode : public SDNode { } Val; int Offset; // It's a MachineConstantPoolValue if top bit is set. unsigned Alignment; // Minimum alignment requirement of CP (not log2 value). - unsigned char TargetFlags; + unsigned TargetFlags; ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o, - unsigned Align, unsigned char TF) + unsigned Align, unsigned TF) : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0, DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align), TargetFlags(TF) { @@ -1816,7 +1838,7 @@ class ConstantPoolSDNode : public SDNode { } ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, - EVT VT, int o, unsigned Align, unsigned char TF) + EVT VT, int o, unsigned Align, unsigned TF) : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0, DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align), TargetFlags(TF) { @@ -1847,7 +1869,7 @@ public: // Return the alignment of this constant pool object, which is either 0 (for // default alignment) or the desired value. unsigned getAlignment() const { return Alignment; } - unsigned char getTargetFlags() const { return TargetFlags; } + unsigned getTargetFlags() const { return TargetFlags; } Type *getType() const; @@ -1861,16 +1883,16 @@ public: class TargetIndexSDNode : public SDNode { friend class SelectionDAG; - unsigned char TargetFlags; + unsigned TargetFlags; int Index; int64_t Offset; public: - TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned char TF) - : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)), - TargetFlags(TF), Index(Idx), Offset(Ofs) {} + TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF) + : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)), + TargetFlags(TF), Index(Idx), Offset(Ofs) {} - unsigned char getTargetFlags() const { return TargetFlags; } + unsigned getTargetFlags() const { return TargetFlags; } int getIndex() const { return Index; } int64_t getOffset() const { return Offset; } @@ -2063,17 +2085,17 @@ class BlockAddressSDNode : public SDNode { const BlockAddress *BA; int64_t Offset; - unsigned char TargetFlags; + unsigned TargetFlags; BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba, - int64_t o, unsigned char Flags) + int64_t o, unsigned Flags) : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)), BA(ba), Offset(o), TargetFlags(Flags) {} public: const BlockAddress *getBlockAddress() const { return BA; } int64_t getOffset() const { return Offset; } - unsigned char getTargetFlags() const { return TargetFlags; } + unsigned getTargetFlags() const { return TargetFlags; } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::BlockAddress || @@ -2104,15 +2126,16 @@ class ExternalSymbolSDNode : public SDNode { friend class SelectionDAG; const char *Symbol; - unsigned char TargetFlags; + unsigned TargetFlags; - ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned char TF, EVT VT) - : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, - 0, DebugLoc(), getSDVTList(VT)), Symbol(Sym), TargetFlags(TF) {} + ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT) + : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0, + DebugLoc(), getSDVTList(VT)), + Symbol(Sym), TargetFlags(TF) {} public: const char *getSymbol() const { return Symbol; } - unsigned char getTargetFlags() const { return TargetFlags; } + unsigned getTargetFlags() const { return TargetFlags; } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::ExternalSymbol || @@ -2181,8 +2204,6 @@ public: : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { LSBaseSDNodeBits.AddressingMode = AM; assert(getAddressingMode() == AM && "Value truncated"); - assert((!MMO->isAtomic() || MMO->isVolatile()) && - "use an AtomicSDNode instead for non-volatile atomics"); } const SDValue &getOffset() const { @@ -2362,8 +2383,24 @@ public: MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, - MachineMemOperand *MMO) - : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {} + MachineMemOperand *MMO, ISD::MemIndexType IndexType) + : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { + LSBaseSDNodeBits.AddressingMode = IndexType; + assert(getIndexType() == IndexType && "Value truncated"); + } + + /// How is Index applied to BasePtr when computing addresses. + ISD::MemIndexType getIndexType() const { + return static_cast(LSBaseSDNodeBits.AddressingMode); + } + bool isIndexScaled() const { + return (getIndexType() == ISD::SIGNED_SCALED) || + (getIndexType() == ISD::UNSIGNED_SCALED); + } + bool isIndexSigned() const { + return (getIndexType() == ISD::SIGNED_SCALED) || + (getIndexType() == ISD::SIGNED_UNSCALED); + } // In the both nodes address is Op1, mask is Op2: // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale) @@ -2387,8 +2424,10 @@ public: friend class SelectionDAG; MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, - EVT MemVT, MachineMemOperand *MMO) - : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO) {} + EVT MemVT, MachineMemOperand *MMO, + ISD::MemIndexType IndexType) + : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO, + IndexType) {} const SDValue &getPassThru() const { return getOperand(1); } @@ -2404,8 +2443,10 @@ public: friend class SelectionDAG; MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, - EVT MemVT, MachineMemOperand *MMO) - : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO) {} + EVT MemVT, MachineMemOperand *MMO, + ISD::MemIndexType IndexType) + : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO, + IndexType) {} const SDValue &getValue() const { return getOperand(1); } diff --git a/include/llvm/CodeGen/StackProtector.h b/include/llvm/CodeGen/StackProtector.h index 2bdf4425e24..ed52db3e626 100644 --- a/include/llvm/CodeGen/StackProtector.h +++ b/include/llvm/CodeGen/StackProtector.h @@ -61,6 +61,12 @@ private: /// protection when -fstack-protection is used. unsigned SSPBufferSize = 0; + /// VisitedPHIs - The set of PHI nodes visited when determining + /// if a variable's reference has been taken. This set + /// is maintained to ensure we don't visit the same PHI node multiple + /// times. + SmallPtrSet VisitedPHIs; + // A prologue is generated. bool HasPrologue = false; diff --git a/include/llvm/CodeGen/SwitchLoweringUtils.h b/include/llvm/CodeGen/SwitchLoweringUtils.h index 62134dc792f..b8adcf759b1 100644 --- a/include/llvm/CodeGen/SwitchLoweringUtils.h +++ b/include/llvm/CodeGen/SwitchLoweringUtils.h @@ -212,16 +212,17 @@ struct BitTestBlock { BitTestInfo Cases; BranchProbability Prob; BranchProbability DefaultProb; + bool OmitRangeCheck; BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D, BitTestInfo C, BranchProbability Pr) : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D), - Cases(std::move(C)), Prob(Pr) {} + Cases(std::move(C)), Prob(Pr), OmitRangeCheck(false) {} }; -/// Return the range of value within a range. +/// Return the range of values within a range. uint64_t getJumpTableRange(const CaseClusterVector &Clusters, unsigned First, unsigned Last); diff --git a/include/llvm/CodeGen/TargetCallingConv.h b/include/llvm/CodeGen/TargetCallingConv.h index aebeeecbe50..db3d1175afe 100644 --- a/include/llvm/CodeGen/TargetCallingConv.h +++ b/include/llvm/CodeGen/TargetCallingConv.h @@ -14,6 +14,7 @@ #define LLVM_CODEGEN_TARGETCALLINGCONV_H #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include @@ -120,16 +121,22 @@ namespace ISD { bool isPointer() const { return IsPointer; } void setPointer() { IsPointer = 1; } - unsigned getByValAlign() const { return (1U << ByValAlign) / 2; } - void setByValAlign(unsigned A) { - ByValAlign = Log2_32(A) + 1; - assert(getByValAlign() == A && "bitfield overflow"); + unsigned getByValAlign() const { + MaybeAlign A = decodeMaybeAlign(ByValAlign); + return A ? A->value() : 0; + } + void setByValAlign(Align A) { + ByValAlign = encode(A); + assert(getByValAlign() == A.value() && "bitfield overflow"); } - unsigned getOrigAlign() const { return (1U << OrigAlign) / 2; } - void setOrigAlign(unsigned A) { - OrigAlign = Log2_32(A) + 1; - assert(getOrigAlign() == A && "bitfield overflow"); + unsigned getOrigAlign() const { + MaybeAlign A = decodeMaybeAlign(OrigAlign); + return A ? A->value() : 0; + } + void setOrigAlign(Align A) { + OrigAlign = encode(A); + assert(getOrigAlign() == A.value() && "bitfield overflow"); } unsigned getByValSize() const { return ByValSize; } diff --git a/include/llvm/CodeGen/TargetFrameLowering.h b/include/llvm/CodeGen/TargetFrameLowering.h index 878c9ffd2b5..72edb27964c 100644 --- a/include/llvm/CodeGen/TargetFrameLowering.h +++ b/include/llvm/CodeGen/TargetFrameLowering.h @@ -28,6 +28,7 @@ namespace TargetStackID { enum Value { Default = 0, SGPRSpill = 1, + SVEVector = 2, NoAlloc = 255 }; } @@ -53,15 +54,15 @@ public: }; private: StackDirection StackDir; - unsigned StackAlignment; - unsigned TransientStackAlignment; + Align StackAlignment; + Align TransientStackAlignment; int LocalAreaOffset; bool StackRealignable; public: - TargetFrameLowering(StackDirection D, unsigned StackAl, int LAO, - unsigned TransAl = 1, bool StackReal = true) - : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl), - LocalAreaOffset(LAO), StackRealignable(StackReal) {} + TargetFrameLowering(StackDirection D, Align StackAl, int LAO, + Align TransAl = Align::None(), bool StackReal = true) + : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl), + LocalAreaOffset(LAO), StackRealignable(StackReal) {} virtual ~TargetFrameLowering(); @@ -76,7 +77,7 @@ public: /// stack pointer must be aligned on entry to a function. Typically, this /// is the largest alignment for any data object in the target. /// - unsigned getStackAlignment() const { return StackAlignment; } + unsigned getStackAlignment() const { return StackAlignment.value(); } /// alignSPAdjust - This method aligns the stack adjustment to the correct /// alignment. @@ -95,7 +96,7 @@ public: /// calls. /// unsigned getTransientStackAlignment() const { - return TransientStackAlignment; + return TransientStackAlignment.value(); } /// isStackRealignable - This method returns whether the stack can be @@ -366,15 +367,10 @@ public: /// Check if given function is safe for not having callee saved registers. /// This is used when interprocedural register allocation is enabled. - static bool isSafeForNoCSROpt(const Function &F) { - if (!F.hasLocalLinkage() || F.hasAddressTaken() || - !F.hasFnAttribute(Attribute::NoRecurse)) - return false; - // Function should not be optimized as tail call. - for (const User *U : F.users()) - if (auto CS = ImmutableCallSite(U)) - if (CS.isTailCall()) - return false; + static bool isSafeForNoCSROpt(const Function &F); + + /// Check if the no-CSR optimisation is profitable for the given function. + virtual bool isProfitableForNoCSROpt(const Function &F) const { return true; } diff --git a/include/llvm/CodeGen/TargetInstrInfo.h b/include/llvm/CodeGen/TargetInstrInfo.h index 25b04f8c019..5011cf34c0e 100644 --- a/include/llvm/CodeGen/TargetInstrInfo.h +++ b/include/llvm/CodeGen/TargetInstrInfo.h @@ -22,7 +22,7 @@ #include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOutliner.h" #include "llvm/CodeGen/PseudoSourceValue.h" @@ -38,10 +38,12 @@ namespace llvm { +class AAResults; class DFAPacketizer; class InstrItineraryData; class LiveIntervals; class LiveVariables; +class MachineLoop; class MachineMemOperand; class MachineRegisterInfo; class MCAsmInfo; @@ -60,6 +62,8 @@ class TargetSubtargetInfo; template class SmallVectorImpl; +using ParamLoadedValue = std::pair; + //--------------------------------------------------------------------------- /// /// TargetInstrInfo - Interface to description of machine instruction set @@ -92,7 +96,7 @@ public: /// registers so that the instructions result is independent of the place /// in the function. bool isTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA = nullptr) const { + AAResults *AA = nullptr) const { return MI.getOpcode() == TargetOpcode::IMPLICIT_DEF || (MI.getDesc().isRematerializable() && (isReallyTriviallyReMaterializable(MI, AA) || @@ -108,7 +112,7 @@ protected: /// not always available. /// Requirements must be check as stated in isTriviallyReMaterializable() . virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const { + AAResults *AA) const { return false; } @@ -151,7 +155,7 @@ private: /// this function does target-independent tests to determine if the /// instruction is really trivially rematerializable. bool isReallyTriviallyReMaterializableGeneric(const MachineInstr &MI, - AliasAnalysis *AA) const; + AAResults *AA) const; public: /// These methods return the opcode of the frame setup/destroy instructions @@ -419,7 +423,8 @@ public: /// findCommutedOpIndices(MI, Op1, Op2); /// can be interpreted as a query asking to find an operand that would be /// commutable with the operand#1. - virtual bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, + virtual bool findCommutedOpIndices(const MachineInstr &MI, + unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const; /// A pair composed of a register and a sub-register index. @@ -659,6 +664,50 @@ public: BytesAdded); } + /// Object returned by analyzeLoopForPipelining. Allows software pipelining + /// implementations to query attributes of the loop being pipelined and to + /// apply target-specific updates to the loop once pipelining is complete. + class PipelinerLoopInfo { + public: + virtual ~PipelinerLoopInfo(); + /// Return true if the given instruction should not be pipelined and should + /// be ignored. An example could be a loop comparison, or induction variable + /// update with no users being pipelined. + virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const = 0; + + /// Create a condition to determine if the trip count of the loop is greater + /// than TC. + /// + /// If the trip count is statically known to be greater than TC, return + /// true. If the trip count is statically known to be not greater than TC, + /// return false. Otherwise return nullopt and fill out Cond with the test + /// condition. + virtual Optional + createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB, + SmallVectorImpl &Cond) = 0; + + /// Modify the loop such that the trip count is + /// OriginalTC + TripCountAdjust. + virtual void adjustTripCount(int TripCountAdjust) = 0; + + /// Called when the loop's preheader has been modified to NewPreheader. + virtual void setPreheader(MachineBasicBlock *NewPreheader) = 0; + + /// Called when the loop is being removed. Any instructions in the preheader + /// should be removed. + /// + /// Once this function is called, no other functions on this object are + /// valid; the loop has been removed. + virtual void disposed() = 0; + }; + + /// Analyze loop L, which must be a single-basic-block loop, and if the + /// conditions can be understood enough produce a PipelinerLoopInfo object. + virtual std::unique_ptr + analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { + return nullptr; + } + /// Analyze the loop code, return true if it cannot be understoo. Upon /// success, this function returns false and returns information about the /// induction variable and compare instruction used at the end. @@ -730,6 +779,19 @@ public: return false; } + /// Return the increase in code size needed to predicate a contiguous run of + /// NumInsts instructions. + virtual unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, + unsigned NumInsts) const { + return 0; + } + + /// Return an estimate for the code size reduction (in bytes) which will be + /// caused by removing the given branch instruction during if-conversion. + virtual unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const { + return getInstSizeInBytes(MI); + } + /// Return true if it's profitable to unpredicate /// one side of a 'diamond', i.e. two sides of if-else predicated on mutually /// exclusive predicates. @@ -1558,8 +1620,7 @@ public: /// function. virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA = nullptr) const { + const MachineInstr &MIb) const { assert((MIa.mayLoad() || MIa.mayStore()) && "MIa must load from or modify a memory location"); assert((MIb.mayLoad() || MIb.mayStore()) && @@ -1636,6 +1697,28 @@ public: return false; } + /// During PHI eleimination lets target to make necessary checks and + /// insert the copy to the PHI destination register in a target specific + /// manner. + virtual MachineInstr *createPHIDestinationCopy( + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, + const DebugLoc &DL, Register Src, Register Dst) const { + return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst) + .addReg(Src); + } + + /// During PHI eleimination lets target to make necessary checks and + /// insert the copy to the PHI destination register in a target specific + /// manner. + virtual MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsPt, + const DebugLoc &DL, Register Src, + Register SrcSubReg, + Register Dst) const { + return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst) + .addReg(Src, 0, SrcSubReg); + } + /// Returns a \p outliner::OutlinedFunction struct containing target-specific /// information for a set of outlining candidates. virtual outliner::OutlinedFunction getOutliningCandidateInfo( @@ -1691,6 +1774,11 @@ public: return false; } + /// Produce the expression describing the \p MI loading a value into + /// the parameter's forwarding register. + virtual Optional + describeLoadedValue(const MachineInstr &MI) const; + private: unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; unsigned CatchRetOpcode; diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h index d5cca60bb1b..a58fca7e73f 100644 --- a/include/llvm/CodeGen/TargetLowering.h +++ b/include/llvm/CodeGen/TargetLowering.h @@ -28,7 +28,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/RuntimeLibcalls.h" @@ -48,6 +47,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" @@ -72,8 +72,10 @@ class Constant; class FastISel; class FunctionLoweringInfo; class GlobalValue; +class GISelKnownBits; class IntrinsicInst; struct KnownBits; +class LegacyDivergenceAnalysis; class LLVMContext; class MachineBasicBlock; class MachineFunction; @@ -122,8 +124,7 @@ public: TypeLegal, // The target natively supports this type. TypePromoteInteger, // Replace this integer with a larger one. TypeExpandInteger, // Split this integer into two of half the size. - TypeSoftenFloat, // Convert this float to a same size integer type, - // if an operation is not supported in target HW. + TypeSoftenFloat, // Convert this float to a same size integer type. TypeExpandFloat, // Split this float into two of half the size. TypeScalarizeVector, // Replace this one-element vector with its element. TypeSplitVector, // Split this vector into two of half the size. @@ -284,7 +285,7 @@ public: /// a constant pool load whose address depends on the select condition. The /// parameter may be used to differentiate a select with FP compare from /// integer compare. - virtual bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const { + virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const { return true; } @@ -539,6 +540,12 @@ public: return hasAndNotCompare(X); } + /// Return true if the target has a bit-test instruction: + /// (X & (1 << Y)) ==/!= 0 + /// This knowledge can be used to prevent breaking the pattern, + /// or creating it if it could be recognized. + virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; } + /// There are two ways to clear extreme bits (either low or high): /// Mask: x & (-1 << y) (the instcombine canonical form) /// Shifts: x >> y << y @@ -571,6 +578,38 @@ public: return false; } + /// Given the pattern + /// (X & (C l>>/<< Y)) ==/!= 0 + /// return true if it should be transformed into: + /// ((X <> Y) & C) ==/!= 0 + /// WARNING: if 'X' is a constant, the fold may deadlock! + /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat() + /// here because it can end up being not linked in. + virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, + unsigned OldShiftOpcode, unsigned NewShiftOpcode, + SelectionDAG &DAG) const { + if (hasBitTest(X, Y)) { + // One interesting pattern that we'd want to form is 'bit test': + // ((1 << Y) & C) ==/!= 0 + // But we also need to be careful not to try to reverse that fold. + + // Is this '1 << Y' ? + if (OldShiftOpcode == ISD::SHL && CC->isOne()) + return false; // Keep the 'bit test' pattern. + + // Will it be '1 << Y' after the transform ? + if (XC && NewShiftOpcode == ISD::SHL && XC->isOne()) + return true; // Do form the 'bit test' pattern. + } + + // If 'X' is a constant, and we transform, then we will immediately + // try to undo the fold, thus causing endless combine loop. + // So by default, let's assume everyone prefers the fold + // iff 'X' is not a constant. + return !XC; + } + /// These two forms are equivalent: /// sub %y, (xor %x, -1) /// add (add %x, 1), %y @@ -798,9 +837,9 @@ public: PointerUnion ptrVal; int offset = 0; // offset off of ptrVal - unsigned size = 0; // the size of the memory location + uint64_t size = 0; // the size of the memory location // (taken from memVT if zero) - unsigned align = 1; // alignment + MaybeAlign align = Align::None(); // alignment MachineMemOperand::Flags flags = MachineMemOperand::MONone; IntrinsicInfo() = default; @@ -884,6 +923,7 @@ public: case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: + case ISD::UMULFIXSAT: Supported = isSupportedFixedPointOperation(Op, VT, Scale); break; } @@ -891,6 +931,8 @@ public: return Supported ? Action : Expand; } + // If Op is a strict floating-point operation, return the result + // of getOperationAction for the equivalent non-strict operation. LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const { unsigned EqOpc; switch (Op) { @@ -911,26 +953,25 @@ public: case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break; case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break; case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break; + case ISD::STRICT_LRINT: EqOpc = ISD::LRINT; break; + case ISD::STRICT_LLRINT: EqOpc = ISD::LLRINT; break; case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break; case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break; case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break; case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break; case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break; + case ISD::STRICT_LROUND: EqOpc = ISD::LROUND; break; + case ISD::STRICT_LLROUND: EqOpc = ISD::LLROUND; break; case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break; case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break; + case ISD::STRICT_FP_TO_SINT: EqOpc = ISD::FP_TO_SINT; break; + case ISD::STRICT_FP_TO_UINT: EqOpc = ISD::FP_TO_UINT; break; case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break; case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break; } - auto Action = getOperationAction(EqOpc, VT); - - // We don't currently handle Custom or Promote for strict FP pseudo-ops. - // For now, we just expand for those cases. - if (Action != Legal) - Action = Expand; - - return Action; + return getOperationAction(EqOpc, VT); } /// Return true if the specified operation is legal on this target or can be @@ -1206,7 +1247,7 @@ public: EltTy = PointerTy.getTypeForEVT(Ty->getContext()); } return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false), - VTy->getNumElements()); + VTy->getElementCount()); } return EVT::getEVT(Ty, AllowUnknown); @@ -1316,9 +1357,9 @@ public: /// Certain targets have context senstive alignment requirements, where one /// type has the alignment requirement of another type. - virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy, - DataLayout DL) const { - return DL.getABITypeAlignment(ArgTy); + virtual Align getABIAlignmentForCallingConv(Type *ArgTy, + DataLayout DL) const { + return Align(DL.getABITypeAlignment(ArgTy)); } /// If true, then instruction selection should seek to shrink the FP constant @@ -1426,11 +1467,38 @@ public: return false; } + /// LLT handling variant. + virtual bool allowsMisalignedMemoryAccesses( + LLT, unsigned AddrSpace = 0, unsigned Align = 1, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool * /*Fast*/ = nullptr) const { + return false; + } + + /// This function returns true if the memory access is aligned or if the + /// target allows this specific unaligned memory access. If the access is + /// allowed, the optional final parameter returns if the access is also fast + /// (as defined by the target). + bool allowsMemoryAccessForAlignment( + LLVMContext &Context, const DataLayout &DL, EVT VT, + unsigned AddrSpace = 0, unsigned Alignment = 1, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool *Fast = nullptr) const; + + /// Return true if the memory access of this type is aligned or if the target + /// allows this specific unaligned access for the given MachineMemOperand. + /// If the access is allowed, the optional final parameter returns if the + /// access is also fast (as defined by the target). + bool allowsMemoryAccessForAlignment(LLVMContext &Context, + const DataLayout &DL, EVT VT, + const MachineMemOperand &MMO, + bool *Fast = nullptr) const; + /// Return true if the target supports a memory access of this type for the /// given address space and alignment. If the access is allowed, the optional /// final parameter returns if the access is also fast (as defined by the /// target). - bool + virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace = 0, unsigned Alignment = 1, MachineMemOperand::Flags Flags = MachineMemOperand::MONone, @@ -1463,6 +1531,16 @@ public: return MVT::Other; } + + /// LLT returning variant. + virtual LLT + getOptimalMemOpLLT(uint64_t /*Size*/, unsigned /*DstAlign*/, + unsigned /*SrcAlign*/, bool /*IsMemset*/, + bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/, + const AttributeList & /*FuncAttributes*/) const { + return LLT(); + } + /// Returns true if it's safe to use load / store of the specified type to /// expand memcpy / memset inline. /// @@ -1522,35 +1600,19 @@ public: report_fatal_error("Funclet EH is not implemented for this target"); } - /// Returns the target's jmp_buf size in bytes (if never set, the default is - /// 200) - unsigned getJumpBufSize() const { - return JumpBufSize; - } - - /// Returns the target's jmp_buf alignment in bytes (if never set, the default - /// is 0) - unsigned getJumpBufAlignment() const { - return JumpBufAlignment; - } - /// Return the minimum stack alignment of an argument. - unsigned getMinStackArgumentAlignment() const { + Align getMinStackArgumentAlignment() const { return MinStackArgumentAlignment; } /// Return the minimum function alignment. - unsigned getMinFunctionAlignment() const { - return MinFunctionAlignment; - } + Align getMinFunctionAlignment() const { return MinFunctionAlignment; } /// Return the preferred function alignment. - unsigned getPrefFunctionAlignment() const { - return PrefFunctionAlignment; - } + Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; } /// Return the preferred loop alignment. - virtual unsigned getPrefLoopAlignment(MachineLoop *ML = nullptr) const { + virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const { return PrefLoopAlignment; } @@ -1772,6 +1834,11 @@ public: return IsSigned; } + /// Returns true if arguments should be extended in lib calls. + virtual bool shouldExtendTypeInLibCall(EVT Type) const { + return true; + } + /// Returns how the given (atomic) load should be expanded by the /// IR-level AtomicExpand pass. virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { @@ -1848,7 +1915,8 @@ public: /// This may be true if the target does not directly support the /// multiplication operation for the specified type or the sequence of simpler /// ops is faster than the multiply. - virtual bool decomposeMulByConstant(EVT VT, SDValue C) const { + virtual bool decomposeMulByConstant(LLVMContext &Context, + EVT VT, SDValue C) const { return false; } @@ -2056,40 +2124,25 @@ protected: TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7); } - /// Set the target's required jmp_buf buffer size (in bytes); default is 200 - void setJumpBufSize(unsigned Size) { - JumpBufSize = Size; - } - - /// Set the target's required jmp_buf buffer alignment (in bytes); default is - /// 0 - void setJumpBufAlignment(unsigned Align) { - JumpBufAlignment = Align; - } - - /// Set the target's minimum function alignment (in log2(bytes)) - void setMinFunctionAlignment(unsigned Align) { - MinFunctionAlignment = Align; + /// Set the target's minimum function alignment. + void setMinFunctionAlignment(Align Alignment) { + MinFunctionAlignment = Alignment; } /// Set the target's preferred function alignment. This should be set if - /// there is a performance benefit to higher-than-minimum alignment (in - /// log2(bytes)) - void setPrefFunctionAlignment(unsigned Align) { - PrefFunctionAlignment = Align; + /// there is a performance benefit to higher-than-minimum alignment + void setPrefFunctionAlignment(Align Alignment) { + PrefFunctionAlignment = Alignment; } - /// Set the target's preferred loop alignment. Default alignment is zero, it - /// means the target does not care about loop alignment. The alignment is - /// specified in log2(bytes). The target may also override - /// getPrefLoopAlignment to provide per-loop values. - void setPrefLoopAlignment(unsigned Align) { - PrefLoopAlignment = Align; - } + /// Set the target's preferred loop alignment. Default alignment is one, it + /// means the target does not care about loop alignment. The target may also + /// override getPrefLoopAlignment to provide per-loop values. + void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; } - /// Set the minimum stack alignment of an argument (in log2(bytes)). - void setMinStackArgumentAlignment(unsigned Align) { - MinStackArgumentAlignment = Align; + /// Set the minimum stack alignment of an argument. + void setMinStackArgumentAlignment(Align Alignment) { + MinStackArgumentAlignment = Alignment; } /// Set the maximum atomic operation size supported by the @@ -2555,6 +2608,12 @@ public: // same blocks of its users. virtual bool shouldConsiderGEPOffsetSplit() const { return false; } + // Return the shift amount threshold for profitable transforms into shifts. + // Transforms creating shifts above the returned value will be avoided. + virtual unsigned getShiftAmountThreshold(EVT VT) const { + return VT.getScalarSizeInBits(); + } + //===--------------------------------------------------------------------===// // Runtime Library hooks // @@ -2650,25 +2709,19 @@ private: /// register usage. Sched::Preference SchedPreferenceInfo; - /// The size, in bytes, of the target's jmp_buf buffers - unsigned JumpBufSize; - - /// The alignment, in bytes, of the target's jmp_buf buffers - unsigned JumpBufAlignment; - /// The minimum alignment that any argument on the stack needs to have. - unsigned MinStackArgumentAlignment; + Align MinStackArgumentAlignment; /// The minimum function alignment (used when optimizing for size, and to /// prevent explicitly provided alignment from leading to incorrect code). - unsigned MinFunctionAlignment; + Align MinFunctionAlignment; /// The preferred function alignment (used when alignment unspecified and /// optimizing for speed). - unsigned PrefFunctionAlignment; + Align PrefFunctionAlignment; - /// The preferred loop alignment. - unsigned PrefLoopAlignment; + /// The preferred loop alignment (in log2 bot in bytes). + Align PrefLoopAlignment; /// Size in bits of the maximum atomics size the backend supports. /// Accesses larger than this will be expanded by AtomicExpandPass. @@ -2744,7 +2797,6 @@ private: /// up the MVT::LAST_VALUETYPE value to the next multiple of 8. uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8]; -protected: ValueTypeActionImpl ValueTypeActions; private: @@ -2790,7 +2842,7 @@ protected: /// expected to be merged. unsigned GatherAllAliasesMaxDepth; - /// Specify maximum number of store instructions per memset call. + /// \brief Specify maximum number of store instructions per memset call. /// /// When lowering \@llvm.memset this field specifies the maximum number of /// store operations that may be substituted for the call to memset. Targets @@ -2801,12 +2853,10 @@ protected: /// with 16-bit alignment would result in four 2-byte stores and one 1-byte /// store. This only applies to setting a constant array of a constant size. unsigned MaxStoresPerMemset; - - /// Maximum number of stores operations that may be substituted for the call - /// to memset, used for functions with OptSize attribute. + /// Likewise for functions with the OptSize attribute. unsigned MaxStoresPerMemsetOptSize; - /// Specify maximum bytes of store instructions per memcpy call. + /// \brief Specify maximum number of store instructions per memcpy call. /// /// When lowering \@llvm.memcpy this field specifies the maximum number of /// store operations that may be substituted for a call to memcpy. Targets @@ -2818,8 +2868,8 @@ protected: /// and one 1-byte store. This only applies to copying a constant array of /// constant size. unsigned MaxStoresPerMemcpy; - - + /// Likewise for functions with the OptSize attribute. + unsigned MaxStoresPerMemcpyOptSize; /// \brief Specify max number of store instructions to glue in inlined memcpy. /// /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number @@ -2827,13 +2877,22 @@ protected: // vectorization later on. unsigned MaxGluedStoresPerMemcpy = 0; - /// Maximum number of store operations that may be substituted for a call to - /// memcpy, used for functions with OptSize attribute. - unsigned MaxStoresPerMemcpyOptSize; + /// \brief Specify maximum number of load instructions per memcmp call. + /// + /// When lowering \@llvm.memcmp this field specifies the maximum number of + /// pairs of load operations that may be substituted for a call to memcmp. + /// Targets must set this value based on the cost threshold for that target. + /// Targets should assume that the memcmp will be done using as many of the + /// largest load operations first, followed by smaller ones, if necessary, per + /// alignment restrictions. For example, loading 7 bytes on a 32-bit machine + /// with 32-bit alignment would result in one 4-byte load, a one 2-byte load + /// and one 1-byte load. This only applies to copying a constant array of + /// constant size. unsigned MaxLoadsPerMemcmp; + /// Likewise for functions with the OptSize attribute. unsigned MaxLoadsPerMemcmpOptSize; - /// Specify maximum bytes of store instructions per memmove call. + /// \brief Specify maximum number of store instructions per memmove call. /// /// When lowering \@llvm.memmove this field specifies the maximum number of /// store instructions that may be substituted for a call to memmove. Targets @@ -2844,9 +2903,7 @@ protected: /// with 8-bit alignment would result in nine 1-byte stores. This only /// applies to copying a constant array of constant size. unsigned MaxStoresPerMemmove; - - /// Maximum number of store instructions that may be substituted for a call to - /// memmove, used for functions with OptSize attribute. + /// Likewise for functions with the OptSize attribute. unsigned MaxStoresPerMemmoveOptSize; /// Tells the code generator that select is more expensive than a branch if @@ -2885,6 +2942,7 @@ protected: class TargetLowering : public TargetLoweringBase { public: struct DAGCombinerInfo; + struct MakeLibCallOptions; TargetLowering(const TargetLowering &) = delete; TargetLowering &operator=(const TargetLowering &) = delete; @@ -2925,6 +2983,14 @@ public: return false; } + /// Returns true if the specified base+offset is a legal indexed addressing + /// mode for this target. \p MI is the load or store instruction that is being + /// considered for transformation. + virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, + bool IsPre, MachineRegisterInfo &MRI) const { + return false; + } + /// Return the entry encoding for a jump table in the current function. The /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. virtual unsigned getJumpTableEncoding() const; @@ -2955,14 +3021,15 @@ public: void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - const SDLoc &DL) const; + const SDLoc &DL, const SDValue OldLHS, + const SDValue OldRHS) const; /// Returns a pair of (return value, chain). /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. - std::pair makeLibCall( - SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef Ops, - bool isSigned, const SDLoc &dl, bool doesNotReturn = false, - bool isReturnValueUsed = true, bool isPostTypeLegalization = false) const; + std::pair makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, + EVT RetVT, ArrayRef Ops, + MakeLibCallOptions CallOptions, + const SDLoc &dl) const; /// Check whether parameters to a call that are passed in callee saved /// registers are the same as from the calling function. This needs to be @@ -3065,6 +3132,14 @@ public: bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, DAGCombinerInfo &DCI) const; + /// More limited version of SimplifyDemandedBits that can be used to "look + /// through" ops that don't contribute to the DemandedBits/DemandedElts - + /// bitwise ops etc. + SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, + const APInt &DemandedElts, + SelectionDAG &DAG, + unsigned Depth) const; + /// Look at Vector Op. At this point, we know that only the DemandedElts /// elements of the result of Op are ever used downstream. If we can use /// this information to simplify Op, create a new simplified DAG node and @@ -3099,6 +3174,15 @@ public: const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth = 0) const; + /// Determine which of the bits specified in Mask are known to be either zero + /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts + /// argument allows us to only collect the known bits that are shared by the + /// requested vector elements. This is for GISel. + virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, + Register R, KnownBits &Known, + const APInt &DemandedElts, + const MachineRegisterInfo &MRI, + unsigned Depth = 0) const; /// Determine which of the bits of FrameIndex \p FIOp are known to be 0. /// Default implementation computes low bits based on alignment @@ -3139,6 +3223,21 @@ public: TargetLoweringOpt &TLO, unsigned Depth = 0) const; + /// More limited version of SimplifyDemandedBits that can be used to "look + /// through" ops that don't contribute to the DemandedBits/DemandedElts - + /// bitwise ops etc. + virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + SelectionDAG &DAG, unsigned Depth) const; + + /// Tries to build a legal vector shuffle using the provided parameters + /// or equivalent variations. The Mask argument maybe be modified as the + /// function tries different variations. + /// Returns an empty SDValue if the operation fails. + SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, + SDValue N1, MutableArrayRef Mask, + SelectionDAG &DAG) const; + /// This method returns the constant pool value that will be loaded by LD. /// NOTE: You must check for implicit extensions of the constant by LD. virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const; @@ -3174,6 +3273,8 @@ public: SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true); SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true); + bool recursivelyDeleteUnusedNodes(SDNode *N); + void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO); }; @@ -3297,6 +3398,18 @@ public: llvm_unreachable("Not Implemented"); } + /// Return 1 if we can compute the negated form of the specified expression + /// for the same cost as the expression itself, or 2 if we can compute the + /// negated form more cheaply than the expression itself. Else return 0. + virtual char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, bool ForCodeSize, + unsigned Depth = 0) const; + + /// If isNegatibleForFree returns true, return the newly negated expression. + virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, bool ForCodeSize, + unsigned Depth = 0) const; + //===--------------------------------------------------------------------===// // Lowering methods - These methods must be implemented by targets so that // the SelectionDAGBuilder code knows how to lower these. @@ -3468,6 +3581,51 @@ public: } }; + /// This structure is used to pass arguments to makeLibCall function. + struct MakeLibCallOptions { + // By passing type list before soften to makeLibCall, the target hook + // shouldExtendTypeInLibCall can get the original type before soften. + ArrayRef OpsVTBeforeSoften; + EVT RetVTBeforeSoften; + bool IsSExt : 1; + bool DoesNotReturn : 1; + bool IsReturnValueUsed : 1; + bool IsPostTypeLegalization : 1; + bool IsSoften : 1; + + MakeLibCallOptions() + : IsSExt(false), DoesNotReturn(false), IsReturnValueUsed(true), + IsPostTypeLegalization(false), IsSoften(false) {} + + MakeLibCallOptions &setSExt(bool Value = true) { + IsSExt = Value; + return *this; + } + + MakeLibCallOptions &setNoReturn(bool Value = true) { + DoesNotReturn = Value; + return *this; + } + + MakeLibCallOptions &setDiscardResult(bool Value = true) { + IsReturnValueUsed = !Value; + return *this; + } + + MakeLibCallOptions &setIsPostTypeLegalization(bool Value = true) { + IsPostTypeLegalization = Value; + return *this; + } + + MakeLibCallOptions &setTypeListBeforeSoften(ArrayRef OpsVT, EVT RetVT, + bool Value = true) { + OpsVTBeforeSoften = OpsVT; + RetVTBeforeSoften = RetVT; + IsSoften = Value; + return *this; + } + }; + /// This function lowers an abstract call to a function into an actual call. /// This returns a pair of operands. The first element is the return value /// for the function (if RetTy is not VoidTy). The second element is the @@ -3537,8 +3695,8 @@ public: /// Return the register ID of the name passed in. Used by named register /// global variables extension. There is no target-independent behaviour /// so the default action is to bail. - virtual unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { + virtual Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const { report_fatal_error("Named registers not implemented for this target"); } @@ -3597,6 +3755,25 @@ public: return MachineMemOperand::MONone; } + /// Should SelectionDAG lower an atomic store of the given kind as a normal + /// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to + /// eventually migrate all targets to the using StoreSDNodes, but porting is + /// being done target at a time. + virtual bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const { + assert(SI.isAtomic() && "violated precondition"); + return false; + } + + /// Should SelectionDAG lower an atomic load of the given kind as a normal + /// LoadSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to + /// eventually migrate all targets to the using LoadSDNodes, but porting is + /// being done target at a time. + virtual bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { + assert(LI.isAtomic() && "violated precondition"); + return false; + } + + /// This callback is invoked by the type legalizer to legalize nodes with an /// illegal operand type but legal result types. It replaces the /// LowerOperation callback in the type Legalizer. The reason we can not do @@ -3665,6 +3842,7 @@ public: C_Register, // Constraint represents specific register(s). C_RegisterClass, // Constraint represents any of register(s) in class. C_Memory, // Memory constraint. + C_Immediate, // Requires an immediate. C_Other, // Something else. C_Unknown // Unsupported constraint. }; @@ -3905,7 +4083,7 @@ public: /// \param N Node to expand /// \param Result output after conversion /// \returns True, if the expansion was successful, false otherwise - bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const; /// Expand UINT(i64) to double(f64) conversion /// \param N Node to expand @@ -3986,8 +4164,8 @@ public: /// method accepts integers as its arguments. SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const; - /// Method for building the DAG expansion of ISD::SMULFIX. This method accepts - /// integers as its arguments. + /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This + /// method accepts integers as its arguments. SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const; /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion @@ -4070,6 +4248,11 @@ private: DAGCombinerInfo &DCI, const SDLoc &DL) const; + // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <> Y) & C) ==/!= 0 + SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift( + EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL) const; + SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL, @@ -4077,6 +4260,14 @@ private: SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL) const; + + SDValue prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, + SDValue CompTargetNode, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL, + SmallVectorImpl &Created) const; + SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, + ISD::CondCode Cond, DAGCombinerInfo &DCI, + const SDLoc &DL) const; }; /// Given an LLVM IR type and return type attributes, compute the return value diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index a1fb81cb009..59f5ddbd9da 100644 --- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -14,6 +14,7 @@ #ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H #define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H +#include "llvm/BinaryFormat/XCOFF.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCExpr.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -35,7 +36,7 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile { protected: MCSymbolRefExpr::VariantKind PLTRelativeVariantKind = MCSymbolRefExpr::VK_None; - const TargetMachine *TM; + const TargetMachine *TM = nullptr; public: TargetLoweringObjectFileELF() = default; @@ -126,7 +127,8 @@ public: MachineModuleInfo *MMI) const override; /// Get MachO PC relative GOT entry relocation - const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + const MCExpr *getIndirectSymViaGOTPCRel(const GlobalValue *GV, + const MCSymbol *Sym, const MCValue &MV, int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const override; @@ -206,6 +208,34 @@ public: const TargetMachine &TM) const override; }; +class TargetLoweringObjectFileXCOFF : public TargetLoweringObjectFile { +public: + TargetLoweringObjectFileXCOFF() = default; + ~TargetLoweringObjectFileXCOFF() override = default; + + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; + + bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference, + const Function &F) const override; + + MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind, + const TargetMachine &TM) const override; + + MCSection *getStaticCtorSection(unsigned Priority, + const MCSymbol *KeySym) const override; + MCSection *getStaticDtorSection(unsigned Priority, + const MCSymbol *KeySym) const override; + + const MCExpr *lowerRelativeReference(const GlobalValue *LHS, + const GlobalValue *RHS, + const TargetMachine &TM) const override; + + MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind, + const TargetMachine &TM) const override; + + static XCOFF::StorageClass getStorageClassForGlobal(const GlobalObject *GO); +}; + } // end namespace llvm #endif // LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H diff --git a/include/llvm/CodeGen/TargetPassConfig.h b/include/llvm/CodeGen/TargetPassConfig.h index 0bd82aafac3..d48fc664c1c 100644 --- a/include/llvm/CodeGen/TargetPassConfig.h +++ b/include/llvm/CodeGen/TargetPassConfig.h @@ -280,7 +280,7 @@ public: /// /// This can also be used to plug a new MachineSchedStrategy into an instance /// of the standard ScheduleDAGMI: - /// return new ScheduleDAGMI(C, make_unique(C), /*RemoveKillFlags=*/false) + /// return new ScheduleDAGMI(C, std::make_unique(C), /*RemoveKillFlags=*/false) /// /// Return NULL to select the default (generic) machine scheduler. virtual ScheduleDAGInstrs * diff --git a/include/llvm/CodeGen/TargetRegisterInfo.h b/include/llvm/CodeGen/TargetRegisterInfo.h index ddbd677b3ea..c42ca3ad6eb 100644 --- a/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/include/llvm/CodeGen/TargetRegisterInfo.h @@ -87,11 +87,20 @@ public: /// Return true if the specified register is included in this register class. /// This does not include virtual registers. bool contains(unsigned Reg) const { + /// FIXME: Historically this function has returned false when given vregs + /// but it should probably only receive physical registers + if (!Register::isPhysicalRegister(Reg)) + return false; return MC->contains(Reg); } /// Return true if both registers are in this class. bool contains(unsigned Reg1, unsigned Reg2) const { + /// FIXME: Historically this function has returned false when given a vregs + /// but it should probably only receive physical registers + if (!Register::isPhysicalRegister(Reg1) || + !Register::isPhysicalRegister(Reg2)) + return false; return MC->contains(Reg1, Reg2); } @@ -258,57 +267,6 @@ public: // Further sentinels can be allocated from the small negative integers. // DenseMapInfo uses -1u and -2u. - /// isStackSlot - Sometimes it is useful the be able to store a non-negative - /// frame index in a variable that normally holds a register. isStackSlot() - /// returns true if Reg is in the range used for stack slots. - /// - /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack - /// slots, so if a variable may contains a stack slot, always check - /// isStackSlot() first. - /// - static bool isStackSlot(unsigned Reg) { - return int(Reg) >= (1 << 30); - } - - /// Compute the frame index from a register value representing a stack slot. - static int stackSlot2Index(unsigned Reg) { - assert(isStackSlot(Reg) && "Not a stack slot"); - return int(Reg - (1u << 30)); - } - - /// Convert a non-negative frame index to a stack slot register value. - static unsigned index2StackSlot(int FI) { - assert(FI >= 0 && "Cannot hold a negative frame index."); - return FI + (1u << 30); - } - - /// Return true if the specified register number is in - /// the physical register namespace. - static bool isPhysicalRegister(unsigned Reg) { - assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first."); - return int(Reg) > 0; - } - - /// Return true if the specified register number is in - /// the virtual register namespace. - static bool isVirtualRegister(unsigned Reg) { - assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first."); - return int(Reg) < 0; - } - - /// Convert a virtual register number to a 0-based index. - /// The first virtual register in a function will get the index 0. - static unsigned virtReg2Index(unsigned Reg) { - assert(isVirtualRegister(Reg) && "Not a virtual register"); - return Reg & ~(1u << 31); - } - - /// Convert a 0-based index to a virtual register number. - /// This is the inverse operation of VirtReg2IndexFunctor below. - static unsigned index2VirtReg(unsigned Index) { - return Index | (1u << 31); - } - /// Return the size in bits of a register from class RC. unsigned getRegSizeInBits(const TargetRegisterClass &RC) const { return getRegClassInfo(RC).RegSize; @@ -419,9 +377,9 @@ public: /// Returns true if the two registers are equal or alias each other. /// The registers may be virtual registers. - bool regsOverlap(unsigned regA, unsigned regB) const { + bool regsOverlap(Register regA, Register regB) const { if (regA == regB) return true; - if (isVirtualRegister(regA) || isVirtualRegister(regB)) + if (regA.isVirtual() || regB.isVirtual()) return false; // Regunits are numerically ordered. Find a common unit. @@ -489,6 +447,14 @@ public: llvm_unreachable("target does not provide no preserved mask"); } + /// Return a list of all of the registers which are clobbered "inside" a call + /// to the given function. For example, these might be needed for PLT + /// sequences of long-branch veneers. + virtual ArrayRef + getIntraCallClobberedRegs(const MachineFunction *MF) const { + return {}; + } + /// Return true if all bits that are set in mask \p mask0 are also set in /// \p mask1. bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const; @@ -535,6 +501,11 @@ public: return false; } + /// This is a wrapper around getCallPreservedMask(). + /// Return true if the register is preserved after the call. + virtual bool isCalleeSavedPhysReg(unsigned PhysReg, + const MachineFunction &MF) const; + /// Prior to adding the live-out mask to a stackmap or patchpoint /// instruction, provide the target the opportunity to adjust it (mainly to /// remove pseudo-registers that should be ignored). @@ -709,13 +680,9 @@ public: /// Find the largest common subclass of A and B. /// Return NULL if there is no common subclass. - /// The common subclass should contain - /// simple value type SVT if it is not the Any type. const TargetRegisterClass * getCommonSubClass(const TargetRegisterClass *A, - const TargetRegisterClass *B, - const MVT::SimpleValueType SVT = - MVT::SimpleValueType::Any) const; + const TargetRegisterClass *B) const; /// Returns a TargetRegisterClass used for pointer values. /// If a target supports multiple different pointer register classes, @@ -1005,6 +972,13 @@ public: const MachineRegisterInfo &MRI) const { return nullptr; } + + /// Returns the physical register number of sub-register "Index" + /// for physical register RegNo. Return zero if the sub-register does not + /// exist. + inline Register getSubReg(MCRegister Reg, unsigned Idx) const { + return static_cast(this)->getSubReg(Reg, Idx); + } }; //===----------------------------------------------------------------------===// @@ -1156,7 +1130,7 @@ public: struct VirtReg2IndexFunctor { using argument_type = unsigned; unsigned operator()(unsigned Reg) const { - return TargetRegisterInfo::virtReg2Index(Reg); + return Register::virtReg2Index(Reg); } }; @@ -1170,7 +1144,7 @@ struct VirtReg2IndexFunctor { /// %physreg17 - a physical register when no TRI instance given. /// /// Usage: OS << printReg(Reg, TRI, SubRegIdx) << '\n'; -Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr, +Printable printReg(Register Reg, const TargetRegisterInfo *TRI = nullptr, unsigned SubIdx = 0, const MachineRegisterInfo *MRI = nullptr); diff --git a/include/llvm/CodeGen/TargetSubtargetInfo.h b/include/llvm/CodeGen/TargetSubtargetInfo.h index 037fc3ed324..56018eca8c2 100644 --- a/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -106,12 +106,10 @@ public: // us do things like a dedicated avx512 selector). However, we might want // to also specialize selectors by MachineFunction, which would let us be // aware of optsize/optnone and such. - virtual const InstructionSelector *getInstructionSelector() const { + virtual InstructionSelector *getInstructionSelector() const { return nullptr; } - virtual unsigned getHwMode() const { return 0; } - /// Target can subclass this hook to select a different DAG scheduler. virtual RegisterScheduler::FunctionPassCtor getDAGScheduler(CodeGenOpt::Level) const { @@ -274,6 +272,12 @@ public: /// scheduling, DAGCombine, etc.). virtual bool useAA() const; + /// \brief Sink addresses into blocks using GEP instructions rather than + /// pointer casts and arithmetic. + virtual bool addrSinkUsingGEPs() const { + return useAA(); + } + /// Enable the use of the early if conversion pass. virtual bool enableEarlyIfConversion() const { return false; } diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h index c540c94f79d..cd4c4ca6408 100644 --- a/include/llvm/CodeGen/ValueTypes.h +++ b/include/llvm/CodeGen/ValueTypes.h @@ -81,7 +81,7 @@ namespace llvm { /// Returns the EVT that represents a vector EC.Min elements in length, /// where each element is of type VT. - static EVT getVectorVT(LLVMContext &Context, EVT VT, MVT::ElementCount EC) { + static EVT getVectorVT(LLVMContext &Context, EVT VT, ElementCount EC) { MVT M = MVT::getVectorVT(VT.V, EC); if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE) return M; @@ -277,7 +277,7 @@ namespace llvm { } // Given a (possibly scalable) vector type, return the ElementCount - MVT::ElementCount getVectorElementCount() const { + ElementCount getVectorElementCount() const { assert((isVector()) && "Invalid vector type!"); if (isSimple()) return V.getVectorElementCount(); diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td index 5818ac183fc..16df565bc8b 100644 --- a/include/llvm/CodeGen/ValueTypes.td +++ b/include/llvm/CodeGen/ValueTypes.td @@ -40,127 +40,132 @@ def v16i1 : ValueType<16, 18>; // 16 x i1 vector value def v32i1 : ValueType<32 , 19>; // 32 x i1 vector value def v64i1 : ValueType<64 , 20>; // 64 x i1 vector value def v128i1 : ValueType<128, 21>; // 128 x i1 vector value -def v512i1 : ValueType<512, 22>; // 512 x i1 vector value -def v1024i1: ValueType<1024,23>; //1024 x i1 vector value +def v256i1 : ValueType<256, 22>; // 256 x i1 vector value +def v512i1 : ValueType<512, 23>; // 512 x i1 vector value +def v1024i1: ValueType<1024,24>; //1024 x i1 vector value -def v1i8 : ValueType<8, 24>; // 1 x i8 vector value -def v2i8 : ValueType<16 , 25>; // 2 x i8 vector value -def v4i8 : ValueType<32 , 26>; // 4 x i8 vector value -def v8i8 : ValueType<64 , 27>; // 8 x i8 vector value -def v16i8 : ValueType<128, 28>; // 16 x i8 vector value -def v32i8 : ValueType<256, 29>; // 32 x i8 vector value -def v64i8 : ValueType<512, 30>; // 64 x i8 vector value -def v128i8 : ValueType<1024,31>; //128 x i8 vector value -def v256i8 : ValueType<2048,32>; //256 x i8 vector value +def v1i8 : ValueType<8, 25>; // 1 x i8 vector value +def v2i8 : ValueType<16 , 26>; // 2 x i8 vector value +def v4i8 : ValueType<32 , 27>; // 4 x i8 vector value +def v8i8 : ValueType<64 , 28>; // 8 x i8 vector value +def v16i8 : ValueType<128, 29>; // 16 x i8 vector value +def v32i8 : ValueType<256, 30>; // 32 x i8 vector value +def v64i8 : ValueType<512, 31>; // 64 x i8 vector value +def v128i8 : ValueType<1024,32>; //128 x i8 vector value +def v256i8 : ValueType<2048,33>; //256 x i8 vector value -def v1i16 : ValueType<16 , 33>; // 1 x i16 vector value -def v2i16 : ValueType<32 , 34>; // 2 x i16 vector value -def v4i16 : ValueType<64 , 35>; // 4 x i16 vector value -def v8i16 : ValueType<128, 36>; // 8 x i16 vector value -def v16i16 : ValueType<256, 37>; // 16 x i16 vector value -def v32i16 : ValueType<512, 38>; // 32 x i16 vector value -def v64i16 : ValueType<1024,39>; // 64 x i16 vector value -def v128i16: ValueType<2048,40>; //128 x i16 vector value +def v1i16 : ValueType<16 , 34>; // 1 x i16 vector value +def v2i16 : ValueType<32 , 35>; // 2 x i16 vector value +def v3i16 : ValueType<48 , 36>; // 3 x i16 vector value +def v4i16 : ValueType<64 , 37>; // 4 x i16 vector value +def v8i16 : ValueType<128, 38>; // 8 x i16 vector value +def v16i16 : ValueType<256, 39>; // 16 x i16 vector value +def v32i16 : ValueType<512, 40>; // 32 x i16 vector value +def v64i16 : ValueType<1024,41>; // 64 x i16 vector value +def v128i16: ValueType<2048,42>; //128 x i16 vector value -def v1i32 : ValueType<32 , 41>; // 1 x i32 vector value -def v2i32 : ValueType<64 , 42>; // 2 x i32 vector value -def v3i32 : ValueType<96 , 43>; // 3 x i32 vector value -def v4i32 : ValueType<128, 44>; // 4 x i32 vector value -def v5i32 : ValueType<160, 45>; // 5 x i32 vector value -def v8i32 : ValueType<256, 46>; // 8 x i32 vector value -def v16i32 : ValueType<512, 47>; // 16 x i32 vector value -def v32i32 : ValueType<1024,48>; // 32 x i32 vector value -def v64i32 : ValueType<2048,49>; // 64 x i32 vector value -def v128i32 : ValueType<4096,50>; // 128 x i32 vector value -def v256i32 : ValueType<8182,51>; // 256 x i32 vector value -def v512i32 : ValueType<16384,52>; // 512 x i32 vector value -def v1024i32 : ValueType<32768,53>; // 1024 x i32 vector value -def v2048i32 : ValueType<65536,54>; // 2048 x i32 vector value +def v1i32 : ValueType<32 , 43>; // 1 x i32 vector value +def v2i32 : ValueType<64 , 44>; // 2 x i32 vector value +def v3i32 : ValueType<96 , 45>; // 3 x i32 vector value +def v4i32 : ValueType<128, 46>; // 4 x i32 vector value +def v5i32 : ValueType<160, 47>; // 5 x i32 vector value +def v8i32 : ValueType<256, 48>; // 8 x i32 vector value +def v16i32 : ValueType<512, 49>; // 16 x i32 vector value +def v32i32 : ValueType<1024,50>; // 32 x i32 vector value +def v64i32 : ValueType<2048,51>; // 64 x i32 vector value +def v128i32 : ValueType<4096,52>; // 128 x i32 vector value +def v256i32 : ValueType<8182,53>; // 256 x i32 vector value +def v512i32 : ValueType<16384,54>; // 512 x i32 vector value +def v1024i32 : ValueType<32768,55>; // 1024 x i32 vector value +def v2048i32 : ValueType<65536,56>; // 2048 x i32 vector value -def v1i64 : ValueType<64 , 55>; // 1 x i64 vector value -def v2i64 : ValueType<128, 56>; // 2 x i64 vector value -def v4i64 : ValueType<256, 57>; // 4 x i64 vector value -def v8i64 : ValueType<512, 58>; // 8 x i64 vector value -def v16i64 : ValueType<1024,59>; // 16 x i64 vector value -def v32i64 : ValueType<2048,60>; // 32 x i64 vector value +def v1i64 : ValueType<64 , 57>; // 1 x i64 vector value +def v2i64 : ValueType<128, 58>; // 2 x i64 vector value +def v4i64 : ValueType<256, 59>; // 4 x i64 vector value +def v8i64 : ValueType<512, 60>; // 8 x i64 vector value +def v16i64 : ValueType<1024,61>; // 16 x i64 vector value +def v32i64 : ValueType<2048,62>; // 32 x i64 vector value -def v1i128 : ValueType<128, 61>; // 1 x i128 vector value +def v1i128 : ValueType<128, 63>; // 1 x i128 vector value -def nxv1i1 : ValueType<1, 62>; // n x 1 x i1 vector value -def nxv2i1 : ValueType<2, 63>; // n x 2 x i1 vector value -def nxv4i1 : ValueType<4, 64>; // n x 4 x i1 vector value -def nxv8i1 : ValueType<8, 65>; // n x 8 x i1 vector value -def nxv16i1 : ValueType<16, 66>; // n x 16 x i1 vector value -def nxv32i1 : ValueType<32, 67>; // n x 32 x i1 vector value +def v2f16 : ValueType<32 , 64>; // 2 x f16 vector value +def v3f16 : ValueType<48 , 65>; // 3 x f16 vector value +def v4f16 : ValueType<64 , 66>; // 4 x f16 vector value +def v8f16 : ValueType<128, 67>; // 8 x f16 vector value +def v16f16 : ValueType<256, 68>; // 8 x f16 vector value +def v32f16 : ValueType<512, 69>; // 8 x f16 vector value +def v1f32 : ValueType<32 , 70>; // 1 x f32 vector value +def v2f32 : ValueType<64 , 71>; // 2 x f32 vector value +def v3f32 : ValueType<96 , 72>; // 3 x f32 vector value +def v4f32 : ValueType<128, 73>; // 4 x f32 vector value +def v5f32 : ValueType<160, 74>; // 5 x f32 vector value +def v8f32 : ValueType<256, 75>; // 8 x f32 vector value +def v16f32 : ValueType<512, 76>; // 16 x f32 vector value +def v32f32 : ValueType<1024, 77>; // 32 x f32 vector value +def v64f32 : ValueType<2048, 78>; // 64 x f32 vector value +def v128f32 : ValueType<4096, 79>; // 128 x f32 vector value +def v256f32 : ValueType<8182, 80>; // 256 x f32 vector value +def v512f32 : ValueType<16384, 81>; // 512 x f32 vector value +def v1024f32 : ValueType<32768, 82>; // 1024 x f32 vector value +def v2048f32 : ValueType<65536, 83>; // 2048 x f32 vector value +def v1f64 : ValueType<64, 84>; // 1 x f64 vector value +def v2f64 : ValueType<128, 85>; // 2 x f64 vector value +def v4f64 : ValueType<256, 86>; // 4 x f64 vector value +def v8f64 : ValueType<512, 87>; // 8 x f64 vector value -def nxv1i8 : ValueType<8, 68>; // n x 1 x i8 vector value -def nxv2i8 : ValueType<16, 69>; // n x 2 x i8 vector value -def nxv4i8 : ValueType<32, 70>; // n x 4 x i8 vector value -def nxv8i8 : ValueType<64, 71>; // n x 8 x i8 vector value -def nxv16i8 : ValueType<128, 72>; // n x 16 x i8 vector value -def nxv32i8 : ValueType<256, 73>; // n x 32 x i8 vector value +def nxv1i1 : ValueType<1, 88>; // n x 1 x i1 vector value +def nxv2i1 : ValueType<2, 89>; // n x 2 x i1 vector value +def nxv4i1 : ValueType<4, 90>; // n x 4 x i1 vector value +def nxv8i1 : ValueType<8, 91>; // n x 8 x i1 vector value +def nxv16i1 : ValueType<16, 92>; // n x 16 x i1 vector value +def nxv32i1 : ValueType<32, 93>; // n x 32 x i1 vector value -def nxv1i16 : ValueType<16, 74>; // n x 1 x i16 vector value -def nxv2i16 : ValueType<32, 75>; // n x 2 x i16 vector value -def nxv4i16 : ValueType<64, 76>; // n x 4 x i16 vector value -def nxv8i16 : ValueType<128, 77>; // n x 8 x i16 vector value -def nxv16i16: ValueType<256, 78>; // n x 16 x i16 vector value -def nxv32i16: ValueType<512, 79>; // n x 32 x i16 vector value +def nxv1i8 : ValueType<8, 94>; // n x 1 x i8 vector value +def nxv2i8 : ValueType<16, 95>; // n x 2 x i8 vector value +def nxv4i8 : ValueType<32, 96>; // n x 4 x i8 vector value +def nxv8i8 : ValueType<64, 97>; // n x 8 x i8 vector value +def nxv16i8 : ValueType<128, 98>; // n x 16 x i8 vector value +def nxv32i8 : ValueType<256, 99>; // n x 32 x i8 vector value -def nxv1i32 : ValueType<32, 80>; // n x 1 x i32 vector value -def nxv2i32 : ValueType<64, 81>; // n x 2 x i32 vector value -def nxv4i32 : ValueType<128, 82>; // n x 4 x i32 vector value -def nxv8i32 : ValueType<256, 83>; // n x 8 x i32 vector value -def nxv16i32: ValueType<512, 84>; // n x 16 x i32 vector value -def nxv32i32: ValueType<1024,85>; // n x 32 x i32 vector value +def nxv1i16 : ValueType<16, 100>; // n x 1 x i16 vector value +def nxv2i16 : ValueType<32, 101>; // n x 2 x i16 vector value +def nxv4i16 : ValueType<64, 102>; // n x 4 x i16 vector value +def nxv8i16 : ValueType<128, 103>; // n x 8 x i16 vector value +def nxv16i16: ValueType<256, 104>; // n x 16 x i16 vector value +def nxv32i16: ValueType<512, 105>; // n x 32 x i16 vector value -def nxv1i64 : ValueType<64, 86>; // n x 1 x i64 vector value -def nxv2i64 : ValueType<128, 87>; // n x 2 x i64 vector value -def nxv4i64 : ValueType<256, 88>; // n x 4 x i64 vector value -def nxv8i64 : ValueType<512, 89>; // n x 8 x i64 vector value -def nxv16i64: ValueType<1024,90>; // n x 16 x i64 vector value -def nxv32i64: ValueType<2048,91>; // n x 32 x i64 vector value +def nxv1i32 : ValueType<32, 106>; // n x 1 x i32 vector value +def nxv2i32 : ValueType<64, 107>; // n x 2 x i32 vector value +def nxv4i32 : ValueType<128, 108>; // n x 4 x i32 vector value +def nxv8i32 : ValueType<256, 109>; // n x 8 x i32 vector value +def nxv16i32: ValueType<512, 110>; // n x 16 x i32 vector value +def nxv32i32: ValueType<1024,111>; // n x 32 x i32 vector value -def v2f16 : ValueType<32 , 92>; // 2 x f16 vector value -def v4f16 : ValueType<64 , 93>; // 4 x f16 vector value -def v8f16 : ValueType<128, 94>; // 8 x f16 vector value -def v1f32 : ValueType<32 , 95>; // 1 x f32 vector value -def v2f32 : ValueType<64 , 96>; // 2 x f32 vector value -def v3f32 : ValueType<96 , 97>; // 3 x f32 vector value -def v4f32 : ValueType<128, 98>; // 4 x f32 vector value -def v5f32 : ValueType<160, 99>; // 5 x f32 vector value -def v8f32 : ValueType<256, 100>; // 8 x f32 vector value -def v16f32 : ValueType<512, 101>; // 16 x f32 vector value -def v32f32 : ValueType<1024, 102>; // 32 x f32 vector value -def v64f32 : ValueType<2048, 103>; // 64 x f32 vector value -def v128f32 : ValueType<4096, 104>; // 128 x f32 vector value -def v256f32 : ValueType<8182, 105>; // 256 x f32 vector value -def v512f32 : ValueType<16384, 106>; // 512 x f32 vector value -def v1024f32 : ValueType<32768, 107>; // 1024 x f32 vector value -def v2048f32 : ValueType<65536, 108>; // 2048 x f32 vector value -def v1f64 : ValueType<64, 109>; // 1 x f64 vector value -def v2f64 : ValueType<128, 110>; // 2 x f64 vector value -def v4f64 : ValueType<256, 111>; // 4 x f64 vector value -def v8f64 : ValueType<512, 112>; // 8 x f64 vector value +def nxv1i64 : ValueType<64, 112>; // n x 1 x i64 vector value +def nxv2i64 : ValueType<128, 113>; // n x 2 x i64 vector value +def nxv4i64 : ValueType<256, 114>; // n x 4 x i64 vector value +def nxv8i64 : ValueType<512, 115>; // n x 8 x i64 vector value +def nxv16i64: ValueType<1024,116>; // n x 16 x i64 vector value +def nxv32i64: ValueType<2048,117>; // n x 32 x i64 vector value -def nxv2f16 : ValueType<32 , 113>; // n x 2 x f16 vector value -def nxv4f16 : ValueType<64 , 114>; // n x 4 x f16 vector value -def nxv8f16 : ValueType<128, 115>; // n x 8 x f16 vector value -def nxv1f32 : ValueType<32 , 116>; // n x 1 x f32 vector value -def nxv2f32 : ValueType<64 , 117>; // n x 2 x f32 vector value -def nxv4f32 : ValueType<128, 118>; // n x 4 x f32 vector value -def nxv8f32 : ValueType<256, 119>; // n x 8 x f32 vector value -def nxv16f32 : ValueType<512, 120>; // n x 16 x f32 vector value -def nxv1f64 : ValueType<64, 121>; // n x 1 x f64 vector value -def nxv2f64 : ValueType<128, 122>; // n x 2 x f64 vector value -def nxv4f64 : ValueType<256, 123>; // n x 4 x f64 vector value -def nxv8f64 : ValueType<512, 124>; // n x 8 x f64 vector value +def nxv2f16 : ValueType<32 , 118>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64 , 119>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 120>; // n x 8 x f16 vector value +def nxv1f32 : ValueType<32 , 121>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64 , 122>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 123>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 124>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 125>; // n x 16 x f32 vector value +def nxv1f64 : ValueType<64, 126>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 127>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 128>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 129>; // n x 8 x f64 vector value -def x86mmx : ValueType<64 , 125>; // X86 MMX value -def FlagVT : ValueType<0 , 126>; // Pre-RA sched glue -def isVoid : ValueType<0 , 127>; // Produces no value -def untyped: ValueType<8 , 128>; // Produces an untyped value -def exnref: ValueType<0, 129>; // WebAssembly's exnref type +def x86mmx : ValueType<64 , 130>; // X86 MMX value +def FlagVT : ValueType<0 , 131>; // Pre-RA sched glue +def isVoid : ValueType<0 , 132>; // Produces no value +def untyped: ValueType<8 , 133>; // Produces an untyped value +def exnref: ValueType<0, 134>; // WebAssembly's exnref type def token : ValueType<0 , 248>; // TokenTy def MetadataVT: ValueType<0, 249>; // Metadata diff --git a/include/llvm/CodeGen/VirtRegMap.h b/include/llvm/CodeGen/VirtRegMap.h index 70eb048f05e..db25ed5c511 100644 --- a/include/llvm/CodeGen/VirtRegMap.h +++ b/include/llvm/CodeGen/VirtRegMap.h @@ -49,7 +49,7 @@ class TargetInstrInfo; /// it; even spilled virtual registers (the register mapped to a /// spilled register is the temporary used to load it from the /// stack). - IndexedMap Virt2PhysMap; + IndexedMap Virt2PhysMap; /// Virt2StackSlotMap - This is virtual register to stack slot /// mapping. Each spilled virtual register has an entry in it @@ -93,7 +93,7 @@ class TargetInstrInfo; /// returns true if the specified virtual register is /// mapped to a physical register - bool hasPhys(unsigned virtReg) const { + bool hasPhys(Register virtReg) const { return getPhys(virtReg) != NO_PHYS_REG; } @@ -101,20 +101,20 @@ class TargetInstrInfo; /// virtual register Register getPhys(Register virtReg) const { assert(virtReg.isVirtual()); - return Virt2PhysMap[virtReg]; + return Virt2PhysMap[virtReg.id()]; } /// creates a mapping for the specified virtual register to /// the specified physical register - void assignVirt2Phys(unsigned virtReg, MCPhysReg physReg); + void assignVirt2Phys(Register virtReg, MCPhysReg physReg); /// clears the specified virtual register's, physical /// register mapping - void clearVirt(unsigned virtReg) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - assert(Virt2PhysMap[virtReg] != NO_PHYS_REG && + void clearVirt(Register virtReg) { + assert(virtReg.isVirtual()); + assert(Virt2PhysMap[virtReg.id()] != NO_PHYS_REG && "attempt to clear a not assigned virtual register"); - Virt2PhysMap[virtReg] = NO_PHYS_REG; + Virt2PhysMap[virtReg.id()] = NO_PHYS_REG; } /// clears all virtual to physical register mappings @@ -124,21 +124,21 @@ class TargetInstrInfo; } /// returns true if VirtReg is assigned to its preferred physreg. - bool hasPreferredPhys(unsigned VirtReg); + bool hasPreferredPhys(Register VirtReg); /// returns true if VirtReg has a known preferred register. /// This returns false if VirtReg has a preference that is a virtual /// register that hasn't been assigned yet. - bool hasKnownPreference(unsigned VirtReg); + bool hasKnownPreference(Register VirtReg); /// records virtReg is a split live interval from SReg. - void setIsSplitFromReg(unsigned virtReg, unsigned SReg) { - Virt2SplitMap[virtReg] = SReg; + void setIsSplitFromReg(Register virtReg, unsigned SReg) { + Virt2SplitMap[virtReg.id()] = SReg; } /// returns the live interval virtReg is split from. - unsigned getPreSplitReg(unsigned virtReg) const { - return Virt2SplitMap[virtReg]; + unsigned getPreSplitReg(Register virtReg) const { + return Virt2SplitMap[virtReg.id()]; } /// getOriginal - Return the original virtual register that VirtReg descends @@ -152,28 +152,29 @@ class TargetInstrInfo; /// returns true if the specified virtual register is not /// mapped to a stack slot or rematerialized. - bool isAssignedReg(unsigned virtReg) const { + bool isAssignedReg(Register virtReg) const { if (getStackSlot(virtReg) == NO_STACK_SLOT) return true; // Split register can be assigned a physical register as well as a // stack slot or remat id. - return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG); + return (Virt2SplitMap[virtReg.id()] && + Virt2PhysMap[virtReg.id()] != NO_PHYS_REG); } /// returns the stack slot mapped to the specified virtual /// register - int getStackSlot(unsigned virtReg) const { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - return Virt2StackSlotMap[virtReg]; + int getStackSlot(Register virtReg) const { + assert(virtReg.isVirtual()); + return Virt2StackSlotMap[virtReg.id()]; } /// create a mapping for the specifed virtual register to /// the next available stack slot - int assignVirt2StackSlot(unsigned virtReg); + int assignVirt2StackSlot(Register virtReg); /// create a mapping for the specified virtual register to /// the specified stack slot - void assignVirt2StackSlot(unsigned virtReg, int SS); + void assignVirt2StackSlot(Register virtReg, int SS); void print(raw_ostream &OS, const Module* M = nullptr) const override; void dump() const; diff --git a/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h b/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h index 7d20bb0a7bd..7538cb2c254 100644 --- a/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h +++ b/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h @@ -11,7 +11,6 @@ #include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" #include "llvm/Support/Error.h" namespace llvm { @@ -31,9 +30,6 @@ enum VisitorDataSource { Error visitTypeRecord(CVType &Record, TypeIndex Index, TypeVisitorCallbacks &Callbacks, VisitorDataSource Source = VDS_BytesPresent); -Error visitTypeRecord(CVType &Record, TypeIndex Index, - TypeVisitorCallbackPipeline &Callbacks, - VisitorDataSource Source = VDS_BytesPresent); Error visitTypeRecord(CVType &Record, TypeVisitorCallbacks &Callbacks, VisitorDataSource Source = VDS_BytesPresent); diff --git a/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h b/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h index 00fb0cf4cc9..60829a51dc2 100644 --- a/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h +++ b/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h @@ -33,6 +33,9 @@ public: virtual void EmitIntValue(uint64_t Value, unsigned Size) = 0; virtual void EmitBinaryData(StringRef Data) = 0; virtual void AddComment(const Twine &T) = 0; + virtual void AddRawComment(const Twine &T) = 0; + virtual bool isVerboseAsm() = 0; + virtual std::string getTypeName(TypeIndex TI) = 0; virtual ~CodeViewRecordStreamer() = default; }; @@ -206,6 +209,11 @@ public: return 0; } + void emitRawComment(const Twine &T) { + if (isStreaming() && Streamer->isVerboseAsm()) + Streamer->AddRawComment(T); + } + private: void emitEncodedSignedInteger(const int64_t &Value, const Twine &Comment = ""); @@ -225,9 +233,10 @@ private: } void emitComment(const Twine &Comment) { - if (isStreaming()) { + if (isStreaming() && Streamer->isVerboseAsm()) { Twine TComment(Comment); - Streamer->AddComment(TComment); + if (!TComment.isTriviallyEmpty()) + Streamer->AddComment(TComment); } } diff --git a/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def b/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def index 9767e49c44f..ed5c143818e 100644 --- a/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def +++ b/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def @@ -366,8 +366,134 @@ CV_REGISTER(AMD64_K7, 765) #endif // defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_X86) +#if defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM) + +// ARM registers + +CV_REGISTER(ARM_NOREG, 0) + +// General purpose 32-bit integer regisers + +CV_REGISTER(ARM_R0, 10) +CV_REGISTER(ARM_R1, 11) +CV_REGISTER(ARM_R2, 12) +CV_REGISTER(ARM_R3, 13) +CV_REGISTER(ARM_R4, 14) +CV_REGISTER(ARM_R5, 15) +CV_REGISTER(ARM_R6, 16) +CV_REGISTER(ARM_R7, 17) +CV_REGISTER(ARM_R8, 18) +CV_REGISTER(ARM_R9, 19) +CV_REGISTER(ARM_R10, 20) +CV_REGISTER(ARM_R11, 21) +CV_REGISTER(ARM_R12, 22) +CV_REGISTER(ARM_SP, 23) +CV_REGISTER(ARM_LR, 24) +CV_REGISTER(ARM_PC, 25) + +// Status register + +CV_REGISTER(ARM_CPSR, 25) + +// ARM VFPv1 registers + +CV_REGISTER(ARM_FPSCR, 40) +CV_REGISTER(ARM_FPEXC, 41) + +// ARM VFPv3/NEON registers + +CV_REGISTER(ARM_FS32, 200) +CV_REGISTER(ARM_FS33, 201) +CV_REGISTER(ARM_FS34, 202) +CV_REGISTER(ARM_FS35, 203) +CV_REGISTER(ARM_FS36, 204) +CV_REGISTER(ARM_FS37, 205) +CV_REGISTER(ARM_FS38, 206) +CV_REGISTER(ARM_FS39, 207) +CV_REGISTER(ARM_FS40, 208) +CV_REGISTER(ARM_FS41, 209) +CV_REGISTER(ARM_FS42, 210) +CV_REGISTER(ARM_FS43, 211) +CV_REGISTER(ARM_FS44, 212) +CV_REGISTER(ARM_FS45, 213) +CV_REGISTER(ARM_FS46, 214) +CV_REGISTER(ARM_FS47, 215) +CV_REGISTER(ARM_FS48, 216) +CV_REGISTER(ARM_FS49, 217) +CV_REGISTER(ARM_FS50, 218) +CV_REGISTER(ARM_FS51, 219) +CV_REGISTER(ARM_FS52, 220) +CV_REGISTER(ARM_FS53, 221) +CV_REGISTER(ARM_FS54, 222) +CV_REGISTER(ARM_FS55, 223) +CV_REGISTER(ARM_FS56, 224) +CV_REGISTER(ARM_FS57, 225) +CV_REGISTER(ARM_FS58, 226) +CV_REGISTER(ARM_FS59, 227) +CV_REGISTER(ARM_FS60, 228) +CV_REGISTER(ARM_FS61, 229) +CV_REGISTER(ARM_FS62, 230) +CV_REGISTER(ARM_FS63, 231) + +CV_REGISTER(ARM_ND0, 300) +CV_REGISTER(ARM_ND1, 301) +CV_REGISTER(ARM_ND2, 302) +CV_REGISTER(ARM_ND3, 303) +CV_REGISTER(ARM_ND4, 304) +CV_REGISTER(ARM_ND5, 305) +CV_REGISTER(ARM_ND6, 306) +CV_REGISTER(ARM_ND7, 307) +CV_REGISTER(ARM_ND8, 308) +CV_REGISTER(ARM_ND9, 309) +CV_REGISTER(ARM_ND10, 310) +CV_REGISTER(ARM_ND11, 311) +CV_REGISTER(ARM_ND12, 312) +CV_REGISTER(ARM_ND13, 313) +CV_REGISTER(ARM_ND14, 314) +CV_REGISTER(ARM_ND15, 315) +CV_REGISTER(ARM_ND16, 316) +CV_REGISTER(ARM_ND17, 317) +CV_REGISTER(ARM_ND18, 318) +CV_REGISTER(ARM_ND19, 319) +CV_REGISTER(ARM_ND20, 320) +CV_REGISTER(ARM_ND21, 321) +CV_REGISTER(ARM_ND22, 322) +CV_REGISTER(ARM_ND23, 323) +CV_REGISTER(ARM_ND24, 324) +CV_REGISTER(ARM_ND25, 325) +CV_REGISTER(ARM_ND26, 326) +CV_REGISTER(ARM_ND27, 327) +CV_REGISTER(ARM_ND28, 328) +CV_REGISTER(ARM_ND29, 329) +CV_REGISTER(ARM_ND30, 330) +CV_REGISTER(ARM_ND31, 331) + +CV_REGISTER(ARM_NQ0, 400) +CV_REGISTER(ARM_NQ1, 401) +CV_REGISTER(ARM_NQ2, 402) +CV_REGISTER(ARM_NQ3, 403) +CV_REGISTER(ARM_NQ4, 404) +CV_REGISTER(ARM_NQ5, 405) +CV_REGISTER(ARM_NQ6, 406) +CV_REGISTER(ARM_NQ7, 407) +CV_REGISTER(ARM_NQ8, 408) +CV_REGISTER(ARM_NQ9, 409) +CV_REGISTER(ARM_NQ10, 410) +CV_REGISTER(ARM_NQ11, 411) +CV_REGISTER(ARM_NQ12, 412) +CV_REGISTER(ARM_NQ13, 413) +CV_REGISTER(ARM_NQ14, 414) +CV_REGISTER(ARM_NQ15, 415) + +#endif // defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM) + #if defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM64) +// arm64intr.h from MSVC defines ARM64_FPSR, which conflicts with +// these declarations. +#pragma push_macro("ARM64_FPSR") +#undef ARM64_FPSR + // ARM64 registers CV_REGISTER(ARM64_NOREG, 0) @@ -556,4 +682,6 @@ CV_REGISTER(ARM64_Q31, 211) CV_REGISTER(ARM64_FPSR, 220) +#pragma pop_macro("ARM64_FPSR") + #endif // defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM64) diff --git a/include/llvm/DebugInfo/CodeView/EnumTables.h b/include/llvm/DebugInfo/CodeView/EnumTables.h index ed126ed9e2f..270cd4b8330 100644 --- a/include/llvm/DebugInfo/CodeView/EnumTables.h +++ b/include/llvm/DebugInfo/CodeView/EnumTables.h @@ -37,6 +37,17 @@ ArrayRef> getThunkOrdinalNames(); ArrayRef> getTrampolineNames(); ArrayRef> getImageSectionCharacteristicNames(); +ArrayRef> getClassOptionNames(); +ArrayRef> getMemberAccessNames(); +ArrayRef> getMethodOptionNames(); +ArrayRef> getMemberKindNames(); +ArrayRef> getPtrKindNames(); +ArrayRef> getPtrModeNames(); +ArrayRef> getPtrMemberRepNames(); +ArrayRef> getTypeModifierNames(); +ArrayRef> getCallingConventions(); +ArrayRef> getFunctionOptionEnum(); +ArrayRef> getLabelTypeEnum(); } // end namespace codeview } // end namespace llvm diff --git a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h index 62761cb87c8..108abb29149 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h +++ b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h @@ -62,7 +62,7 @@ public: Error visitSymbolBegin(CVSymbol &Record) override { assert(!Mapping && "Already in a symbol mapping!"); - Mapping = llvm::make_unique(Record.content(), Container); + Mapping = std::make_unique(Record.content(), Container); return Mapping->Mapping.visitSymbolBegin(Record); } Error visitSymbolEnd(CVSymbol &Record) override { diff --git a/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/include/llvm/DebugInfo/CodeView/SymbolRecord.h index 5e9a7432b9b..1aafa3ca9f1 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolRecord.h +++ b/include/llvm/DebugInfo/CodeView/SymbolRecord.h @@ -73,17 +73,17 @@ public: Thunk32Sym(SymbolRecordKind Kind, uint32_t RecordOffset) : SymbolRecord(Kind), RecordOffset(RecordOffset) {} - uint32_t Parent; - uint32_t End; - uint32_t Next; - uint32_t Offset; - uint16_t Segment; - uint16_t Length; + uint32_t Parent = 0; + uint32_t End = 0; + uint32_t Next = 0; + uint32_t Offset = 0; + uint16_t Segment = 0; + uint16_t Length = 0; ThunkOrdinal Thunk; StringRef Name; ArrayRef VariantData; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_TRAMPOLINE @@ -94,13 +94,13 @@ public: : SymbolRecord(Kind), RecordOffset(RecordOffset) {} TrampolineType Type; - uint16_t Size; - uint32_t ThunkOffset; - uint32_t TargetOffset; - uint16_t ThunkSection; - uint16_t TargetSection; + uint16_t Size = 0; + uint32_t ThunkOffset = 0; + uint32_t TargetOffset = 0; + uint16_t ThunkSection = 0; + uint16_t TargetSection = 0; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_SECTION @@ -110,14 +110,14 @@ public: SectionSym(SymbolRecordKind Kind, uint32_t RecordOffset) : SymbolRecord(Kind), RecordOffset(RecordOffset) {} - uint16_t SectionNumber; - uint8_t Alignment; - uint32_t Rva; - uint32_t Length; - uint32_t Characteristics; + uint16_t SectionNumber = 0; + uint8_t Alignment = 0; + uint32_t Rva = 0; + uint32_t Length = 0; + uint32_t Characteristics = 0; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_COFFGROUP @@ -127,13 +127,13 @@ public: CoffGroupSym(SymbolRecordKind Kind, uint32_t RecordOffset) : SymbolRecord(Kind), RecordOffset(RecordOffset) {} - uint32_t Size; - uint32_t Characteristics; - uint32_t Offset; - uint16_t Segment; + uint32_t Size = 0; + uint32_t Characteristics = 0; + uint32_t Offset = 0; + uint16_t Segment = 0; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; class ScopeEndSym : public SymbolRecord { @@ -142,7 +142,7 @@ public: ScopeEndSym(SymbolRecordKind Kind, uint32_t RecordOffset) : SymbolRecord(Kind), RecordOffset(RecordOffset) {} - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; class CallerSym : public SymbolRecord { @@ -153,7 +153,7 @@ public: std::vector Indices; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; struct DecodedAnnotation { @@ -333,7 +333,7 @@ private: class InlineSiteSym : public SymbolRecord { public: explicit InlineSiteSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - InlineSiteSym(uint32_t RecordOffset) + explicit InlineSiteSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::InlineSiteSym), RecordOffset(RecordOffset) {} @@ -342,12 +342,12 @@ public: BinaryAnnotationIterator()); } - uint32_t Parent; - uint32_t End; + uint32_t Parent = 0; + uint32_t End = 0; TypeIndex Inlinee; std::vector AnnotationData; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_PUB32 @@ -371,7 +371,7 @@ public: class RegisterSym : public SymbolRecord { public: explicit RegisterSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - RegisterSym(uint32_t RecordOffset) + explicit RegisterSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::RegisterSym), RecordOffset(RecordOffset) {} @@ -379,7 +379,7 @@ public: RegisterId Register; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_PROCREF, S_LPROCREF @@ -390,13 +390,13 @@ public: : SymbolRecord(SymbolRecordKind::ProcRefSym), RecordOffset(RecordOffset) { } - uint32_t SumName; - uint32_t SymOffset; - uint16_t Module; + uint32_t SumName = 0; + uint32_t SymOffset = 0; + uint16_t Module = 0; StringRef Name; uint16_t modi() const { return Module - 1; } - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_LOCAL @@ -410,7 +410,7 @@ public: LocalSymFlags Flags; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; struct LocalVariableAddrRange { @@ -440,11 +440,11 @@ public: return RecordOffset + RelocationOffset; } - uint32_t Program; + uint32_t Program = 0; LocalVariableAddrRange Range; std::vector Gaps; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_DEFRANGE_SUBFIELD @@ -453,7 +453,7 @@ class DefRangeSubfieldSym : public SymbolRecord { public: explicit DefRangeSubfieldSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - DefRangeSubfieldSym(uint32_t RecordOffset) + explicit DefRangeSubfieldSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::DefRangeSubfieldSym), RecordOffset(RecordOffset) {} @@ -461,58 +461,62 @@ public: return RecordOffset + RelocationOffset; } - uint32_t Program; - uint16_t OffsetInParent; + uint32_t Program = 0; + uint16_t OffsetInParent = 0; LocalVariableAddrRange Range; std::vector Gaps; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; +}; + +struct DefRangeRegisterHeader { + ulittle16_t Register; + ulittle16_t MayHaveNoName; }; // S_DEFRANGE_REGISTER class DefRangeRegisterSym : public SymbolRecord { public: - struct Header { - ulittle16_t Register; - ulittle16_t MayHaveNoName; - }; - explicit DefRangeRegisterSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - DefRangeRegisterSym(uint32_t RecordOffset) + explicit DefRangeRegisterSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::DefRangeRegisterSym), RecordOffset(RecordOffset) {} - uint32_t getRelocationOffset() const { return RecordOffset + sizeof(Header); } + uint32_t getRelocationOffset() const { return RecordOffset + sizeof(DefRangeRegisterHeader); } - Header Hdr; + DefRangeRegisterHeader Hdr; LocalVariableAddrRange Range; std::vector Gaps; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; +}; + +struct DefRangeSubfieldRegisterHeader { + ulittle16_t Register; + ulittle16_t MayHaveNoName; + ulittle32_t OffsetInParent; }; // S_DEFRANGE_SUBFIELD_REGISTER class DefRangeSubfieldRegisterSym : public SymbolRecord { public: - struct Header { - ulittle16_t Register; - ulittle16_t MayHaveNoName; - ulittle32_t OffsetInParent; - }; - explicit DefRangeSubfieldRegisterSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - DefRangeSubfieldRegisterSym(uint32_t RecordOffset) + explicit DefRangeSubfieldRegisterSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::DefRangeSubfieldRegisterSym), RecordOffset(RecordOffset) {} - uint32_t getRelocationOffset() const { return RecordOffset + sizeof(Header); } + uint32_t getRelocationOffset() const { return RecordOffset + sizeof(DefRangeSubfieldRegisterHeader); } - Header Hdr; + DefRangeSubfieldRegisterHeader Hdr; LocalVariableAddrRange Range; std::vector Gaps; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; +}; + +struct DefRangeFramePointerRelHeader { + little32_t Offset; }; // S_DEFRANGE_FRAMEPOINTER_REL @@ -522,7 +526,7 @@ class DefRangeFramePointerRelSym : public SymbolRecord { public: explicit DefRangeFramePointerRelSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - DefRangeFramePointerRelSym(uint32_t RecordOffset) + explicit DefRangeFramePointerRelSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::DefRangeFramePointerRelSym), RecordOffset(RecordOffset) {} @@ -530,22 +534,22 @@ public: return RecordOffset + RelocationOffset; } - int32_t Offset; + DefRangeFramePointerRelHeader Hdr; LocalVariableAddrRange Range; std::vector Gaps; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; +}; + +struct DefRangeRegisterRelHeader { + ulittle16_t Register; + ulittle16_t Flags; + little32_t BasePointerOffset; }; // S_DEFRANGE_REGISTER_REL class DefRangeRegisterRelSym : public SymbolRecord { public: - struct Header { - ulittle16_t Register; - ulittle16_t Flags; - little32_t BasePointerOffset; - }; - explicit DefRangeRegisterRelSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} explicit DefRangeRegisterRelSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::DefRangeRegisterRelSym), @@ -563,13 +567,13 @@ public: bool hasSpilledUDTMember() const { return Hdr.Flags & IsSubfieldFlag; } uint16_t offsetInParent() const { return Hdr.Flags >> OffsetInParentShift; } - uint32_t getRelocationOffset() const { return RecordOffset + sizeof(Header); } + uint32_t getRelocationOffset() const { return RecordOffset + sizeof(DefRangeRegisterRelHeader); } - Header Hdr; + DefRangeRegisterRelHeader Hdr; LocalVariableAddrRange Range; std::vector Gaps; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE @@ -581,9 +585,9 @@ public: : SymbolRecord(SymbolRecordKind::DefRangeFramePointerRelFullScopeSym), RecordOffset(RecordOffset) {} - int32_t Offset; + int32_t Offset = 0; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_BLOCK32 @@ -599,14 +603,14 @@ public: return RecordOffset + RelocationOffset; } - uint32_t Parent; - uint32_t End; - uint32_t CodeSize; - uint32_t CodeOffset; - uint16_t Segment; + uint32_t Parent = 0; + uint32_t End = 0; + uint32_t CodeSize = 0; + uint32_t CodeOffset = 0; + uint16_t Segment = 0; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_LABEL32 @@ -622,12 +626,12 @@ public: return RecordOffset + RelocationOffset; } - uint32_t CodeOffset; - uint16_t Segment; + uint32_t CodeOffset = 0; + uint16_t Segment = 0; ProcSymFlags Flags; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_OBJNAME @@ -635,82 +639,82 @@ class ObjNameSym : public SymbolRecord { public: explicit ObjNameSym() : SymbolRecord(SymbolRecordKind::ObjNameSym) {} explicit ObjNameSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - ObjNameSym(uint32_t RecordOffset) + explicit ObjNameSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::ObjNameSym), RecordOffset(RecordOffset) { } - uint32_t Signature; + uint32_t Signature = 0; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_ENVBLOCK class EnvBlockSym : public SymbolRecord { public: explicit EnvBlockSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - EnvBlockSym(uint32_t RecordOffset) + explicit EnvBlockSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::EnvBlockSym), RecordOffset(RecordOffset) {} std::vector Fields; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_EXPORT class ExportSym : public SymbolRecord { public: explicit ExportSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - ExportSym(uint32_t RecordOffset) + explicit ExportSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::ExportSym), RecordOffset(RecordOffset) {} - uint16_t Ordinal; + uint16_t Ordinal = 0; ExportFlags Flags; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_FILESTATIC class FileStaticSym : public SymbolRecord { public: explicit FileStaticSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - FileStaticSym(uint32_t RecordOffset) + explicit FileStaticSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::FileStaticSym), RecordOffset(RecordOffset) {} TypeIndex Index; - uint32_t ModFilenameOffset; + uint32_t ModFilenameOffset = 0; LocalSymFlags Flags; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_COMPILE2 class Compile2Sym : public SymbolRecord { public: explicit Compile2Sym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - Compile2Sym(uint32_t RecordOffset) + explicit Compile2Sym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::Compile2Sym), RecordOffset(RecordOffset) {} CompileSym2Flags Flags; CPUType Machine; - uint16_t VersionFrontendMajor; - uint16_t VersionFrontendMinor; - uint16_t VersionFrontendBuild; - uint16_t VersionBackendMajor; - uint16_t VersionBackendMinor; - uint16_t VersionBackendBuild; + uint16_t VersionFrontendMajor = 0; + uint16_t VersionFrontendMinor = 0; + uint16_t VersionFrontendBuild = 0; + uint16_t VersionBackendMajor = 0; + uint16_t VersionBackendMinor = 0; + uint16_t VersionBackendBuild = 0; StringRef Version; std::vector ExtraStrings; uint8_t getLanguage() const { return static_cast(Flags) & 0xFF; } uint32_t getFlags() const { return static_cast(Flags) & ~0xFF; } - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_COMPILE3 @@ -718,20 +722,20 @@ class Compile3Sym : public SymbolRecord { public: Compile3Sym() : SymbolRecord(SymbolRecordKind::Compile3Sym) {} explicit Compile3Sym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - Compile3Sym(uint32_t RecordOffset) + explicit Compile3Sym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::Compile3Sym), RecordOffset(RecordOffset) {} CompileSym3Flags Flags; CPUType Machine; - uint16_t VersionFrontendMajor; - uint16_t VersionFrontendMinor; - uint16_t VersionFrontendBuild; - uint16_t VersionFrontendQFE; - uint16_t VersionBackendMajor; - uint16_t VersionBackendMinor; - uint16_t VersionBackendBuild; - uint16_t VersionBackendQFE; + uint16_t VersionFrontendMajor = 0; + uint16_t VersionFrontendMinor = 0; + uint16_t VersionFrontendBuild = 0; + uint16_t VersionFrontendQFE = 0; + uint16_t VersionBackendMajor = 0; + uint16_t VersionBackendMinor = 0; + uint16_t VersionBackendBuild = 0; + uint16_t VersionBackendQFE = 0; StringRef Version; void setLanguage(SourceLanguage Lang) { @@ -750,7 +754,7 @@ public: (getFlags() & (CompileSym3Flags::PGO | CompileSym3Flags::LTCG)); } - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_FRAMEPROC @@ -761,12 +765,12 @@ public: : SymbolRecord(SymbolRecordKind::FrameProcSym), RecordOffset(RecordOffset) {} - uint32_t TotalFrameBytes; - uint32_t PaddingFrameBytes; - uint32_t OffsetToPadding; - uint32_t BytesOfCalleeSavedRegisters; - uint32_t OffsetOfExceptionHandler; - uint16_t SectionIdOfExceptionHandler; + uint32_t TotalFrameBytes = 0; + uint32_t PaddingFrameBytes = 0; + uint32_t OffsetToPadding = 0; + uint32_t BytesOfCalleeSavedRegisters = 0; + uint32_t OffsetOfExceptionHandler = 0; + uint16_t SectionIdOfExceptionHandler = 0; FrameProcedureOptions Flags; /// Extract the register this frame uses to refer to local variables. @@ -781,7 +785,7 @@ public: EncodedFramePtrReg((uint32_t(Flags) >> 16U) & 0x3U), CPU); } - uint32_t RecordOffset; + uint32_t RecordOffset = 0; private: }; @@ -799,11 +803,11 @@ public: return RecordOffset + RelocationOffset; } - uint32_t CodeOffset; - uint16_t Segment; + uint32_t CodeOffset = 0; + uint16_t Segment = 0; TypeIndex Type; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_HEAPALLOCSITE @@ -820,12 +824,12 @@ public: return RecordOffset + RelocationOffset; } - uint32_t CodeOffset; - uint16_t Segment; - uint16_t CallInstructionSize; + uint32_t CodeOffset = 0; + uint16_t Segment = 0; + uint16_t CallInstructionSize = 0; TypeIndex Type; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_FRAMECOOKIE @@ -841,12 +845,12 @@ public: return RecordOffset + RelocationOffset; } - uint32_t CodeOffset; - uint16_t Register; + uint32_t CodeOffset = 0; + uint16_t Register = 0; FrameCookieKind CookieKind; - uint8_t Flags; + uint8_t Flags = 0; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_UDT, S_COBOLUDT @@ -859,20 +863,20 @@ public: TypeIndex Type; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_BUILDINFO class BuildInfoSym : public SymbolRecord { public: explicit BuildInfoSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - BuildInfoSym(uint32_t RecordOffset) + explicit BuildInfoSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::BuildInfoSym), RecordOffset(RecordOffset) {} TypeIndex BuildId; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_BPREL32 @@ -883,11 +887,11 @@ public: : SymbolRecord(SymbolRecordKind::BPRelativeSym), RecordOffset(RecordOffset) {} - int32_t Offset; + int32_t Offset = 0; TypeIndex Type; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_REGREL32 @@ -898,19 +902,19 @@ public: : SymbolRecord(SymbolRecordKind::RegRelativeSym), RecordOffset(RecordOffset) {} - uint32_t Offset; + uint32_t Offset = 0; TypeIndex Type; RegisterId Register; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_CONSTANT, S_MANCONSTANT class ConstantSym : public SymbolRecord { public: explicit ConstantSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - ConstantSym(uint32_t RecordOffset) + explicit ConstantSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::ConstantSym), RecordOffset(RecordOffset) {} @@ -918,7 +922,7 @@ public: APSInt Value; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_LDATA32, S_GDATA32, S_LMANDATA, S_GMANDATA @@ -927,7 +931,7 @@ class DataSym : public SymbolRecord { public: explicit DataSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {} - DataSym(uint32_t RecordOffset) + explicit DataSym(uint32_t RecordOffset) : SymbolRecord(SymbolRecordKind::DataSym), RecordOffset(RecordOffset) {} uint32_t getRelocationOffset() const { @@ -935,11 +939,11 @@ public: } TypeIndex Type; - uint32_t DataOffset; - uint16_t Segment; + uint32_t DataOffset = 0; + uint16_t Segment = 0; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_LTHREAD32, S_GTHREAD32 @@ -957,11 +961,11 @@ public: } TypeIndex Type; - uint32_t DataOffset; - uint16_t Segment; + uint32_t DataOffset = 0; + uint16_t Segment = 0; StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_UNAMESPACE @@ -974,7 +978,7 @@ public: StringRef Name; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; // S_ANNOTATION @@ -989,7 +993,7 @@ public: uint16_t Segment = 0; std::vector Strings; - uint32_t RecordOffset; + uint32_t RecordOffset = 0; }; using CVSymbol = CVRecord; diff --git a/include/llvm/DebugInfo/CodeView/TypeDeserializer.h b/include/llvm/DebugInfo/CodeView/TypeDeserializer.h index 081de32dd02..2b17f5ccb13 100644 --- a/include/llvm/DebugInfo/CodeView/TypeDeserializer.h +++ b/include/llvm/DebugInfo/CodeView/TypeDeserializer.h @@ -66,7 +66,7 @@ public: Error visitTypeBegin(CVType &Record) override { assert(!Mapping && "Already in a type mapping!"); - Mapping = llvm::make_unique(Record.content()); + Mapping = std::make_unique(Record.content()); return Mapping->Mapping.visitTypeBegin(Record); } diff --git a/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h b/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h index 4c309c10ff0..c6044d5138a 100644 --- a/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h +++ b/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h @@ -10,6 +10,7 @@ #define LLVM_DEBUGINFO_CODEVIEW_TYPERECORDMAPPING_H #include "llvm/ADT/Optional.h" +#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" #include "llvm/Support/Error.h" diff --git a/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h b/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h index 169715be2d5..fb0b579d6a0 100644 --- a/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h +++ b/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h @@ -82,11 +82,6 @@ public: Pipeline.push_back(&Callbacks); } - void addCallbackToPipelineFront(TypeVisitorCallbacks &Callbacks) { - auto CallBackItr = Pipeline.begin(); - Pipeline.insert(CallBackItr, &Callbacks); - } - #define TYPE_RECORD(EnumName, EnumVal, Name) \ Error visitKnownRecord(CVType &CVR, Name##Record &Record) override { \ return visitKnownRecordImpl(CVR, Record); \ diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h index d2a5318179e..fbebfe634b6 100644 --- a/include/llvm/DebugInfo/DIContext.h +++ b/include/llvm/DebugInfo/DIContext.h @@ -28,6 +28,10 @@ namespace llvm { /// A format-neutral container for source line information. struct DILineInfo { + // DILineInfo contains "" for function/filename it cannot fetch. + static constexpr const char *const BadString = ""; + // Use "??" instead of "" to make our output closer to addr2line. + static constexpr const char *const Addr2LineBadString = "??"; std::string FileName; std::string FunctionName; Optional Source; @@ -38,7 +42,7 @@ struct DILineInfo { // DWARF-specific. uint32_t Discriminator = 0; - DILineInfo() : FileName(""), FunctionName("") {} + DILineInfo() : FileName(BadString), FunctionName(BadString) {} bool operator==(const DILineInfo &RHS) const { return Line == RHS.Line && Column == RHS.Column && @@ -61,9 +65,9 @@ struct DILineInfo { void dump(raw_ostream &OS) { OS << "Line info: "; - if (FileName != "") + if (FileName != BadString) OS << "file '" << FileName << "', "; - if (FunctionName != "") + if (FunctionName != BadString) OS << "function '" << FunctionName << "', "; OS << "line " << Line << ", "; OS << "column " << Column << ", "; @@ -109,7 +113,7 @@ struct DIGlobal { uint64_t Start = 0; uint64_t Size = 0; - DIGlobal() : Name("") {} + DIGlobal() : Name(DILineInfo::BadString) {} }; struct DILocal { @@ -289,7 +293,7 @@ public: LoadedObjectInfoHelper(Ts &&... Args) : Base(std::forward(Args)...) {} std::unique_ptr clone() const override { - return llvm::make_unique(static_cast(*this)); + return std::make_unique(static_cast(*this)); } }; diff --git a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h index ccf2891c2e2..39ae53c4e7f 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h +++ b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h @@ -130,11 +130,11 @@ public: /// \param Attr DWARF attribute to search for. /// \param U the DWARFUnit the contains the DIE. /// \returns Optional DWARF form value if the attribute was extracted. - Optional getAttributeValue(const uint32_t DIEOffset, + Optional getAttributeValue(const uint64_t DIEOffset, const dwarf::Attribute Attr, const DWARFUnit &U) const; - bool extract(DataExtractor Data, uint32_t* OffsetPtr); + bool extract(DataExtractor Data, uint64_t* OffsetPtr); void dump(raw_ostream &OS) const; // Return an optional byte size of all attribute data in this abbreviation diff --git a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h index 303375703d2..c9042e59326 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h +++ b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h @@ -96,7 +96,7 @@ class AppleAcceleratorTable : public DWARFAcceleratorTable { using AtomType = uint16_t; using Form = dwarf::Form; - uint32_t DIEOffsetBase; + uint64_t DIEOffsetBase; SmallVector, 3> Atoms; Optional extractOffset(Optional Value) const; @@ -109,7 +109,7 @@ class AppleAcceleratorTable : public DWARFAcceleratorTable { /// Returns true if we should continue scanning for entries or false if we've /// reached the last (sentinel) entry of encountered a parsing error. bool dumpName(ScopedPrinter &W, SmallVectorImpl &AtomForms, - uint32_t *DataOffset) const; + uint64_t *DataOffset) const; public: /// Apple-specific implementation of an Accelerator Entry. @@ -119,7 +119,7 @@ public: Entry(const HeaderData &Data); Entry() = default; - void extract(const AppleAcceleratorTable &AccelTable, uint32_t *Offset); + void extract(const AppleAcceleratorTable &AccelTable, uint64_t *Offset); public: Optional getCUOffset() const override; @@ -143,7 +143,7 @@ public: class ValueIterator : public std::iterator { const AppleAcceleratorTable *AccelTable = nullptr; Entry Current; ///< The current entry. - unsigned DataOffset = 0; ///< Offset into the section. + uint64_t DataOffset = 0; ///< Offset into the section. unsigned Data = 0; ///< Current data entry. unsigned NumData = 0; ///< Number of data entries. @@ -151,7 +151,7 @@ public: void Next(); public: /// Construct a new iterator for the entries at \p DataOffset. - ValueIterator(const AppleAcceleratorTable &AccelTable, unsigned DataOffset); + ValueIterator(const AppleAcceleratorTable &AccelTable, uint64_t DataOffset); /// End marker. ValueIterator() = default; @@ -193,7 +193,7 @@ public: /// DieOffset is the offset into the .debug_info section for the DIE /// related to the input hash data offset. /// DieTag is the tag of the DIE - std::pair readAtoms(uint32_t &HashDataOffset); + std::pair readAtoms(uint64_t *HashDataOffset); void dump(raw_ostream &OS) const override; /// Look up all entries in the accelerator table matching \c Key. @@ -245,7 +245,7 @@ public: struct Header : public HeaderPOD { SmallString<8> AugmentationString; - Error extract(const DWARFDataExtractor &AS, uint32_t *Offset); + Error extract(const DWARFDataExtractor &AS, uint64_t *Offset); void dump(ScopedPrinter &W) const; }; @@ -354,12 +354,12 @@ public: DataExtractor StrData; uint32_t Index; - uint32_t StringOffset; - uint32_t EntryOffset; + uint64_t StringOffset; + uint64_t EntryOffset; public: NameTableEntry(const DataExtractor &StrData, uint32_t Index, - uint32_t StringOffset, uint32_t EntryOffset) + uint64_t StringOffset, uint64_t EntryOffset) : StrData(StrData), Index(Index), StringOffset(StringOffset), EntryOffset(EntryOffset) {} @@ -367,17 +367,17 @@ public: uint32_t getIndex() const { return Index; } /// Returns the offset of the name of the described entities. - uint32_t getStringOffset() const { return StringOffset; } + uint64_t getStringOffset() const { return StringOffset; } /// Return the string referenced by this name table entry or nullptr if the /// string offset is not valid. const char *getString() const { - uint32_t Off = StringOffset; + uint64_t Off = StringOffset; return StrData.getCStr(&Off); } /// Returns the offset of the first Entry in the list. - uint32_t getEntryOffset() const { return EntryOffset; } + uint64_t getEntryOffset() const { return EntryOffset; } }; /// Represents a single accelerator table within the DWARF v5 .debug_names @@ -389,40 +389,40 @@ public: // Base of the whole unit and of various important tables, as offsets from // the start of the section. - uint32_t Base; - uint32_t CUsBase; - uint32_t BucketsBase; - uint32_t HashesBase; - uint32_t StringOffsetsBase; - uint32_t EntryOffsetsBase; - uint32_t EntriesBase; + uint64_t Base; + uint64_t CUsBase; + uint64_t BucketsBase; + uint64_t HashesBase; + uint64_t StringOffsetsBase; + uint64_t EntryOffsetsBase; + uint64_t EntriesBase; void dumpCUs(ScopedPrinter &W) const; void dumpLocalTUs(ScopedPrinter &W) const; void dumpForeignTUs(ScopedPrinter &W) const; void dumpAbbreviations(ScopedPrinter &W) const; - bool dumpEntry(ScopedPrinter &W, uint32_t *Offset) const; + bool dumpEntry(ScopedPrinter &W, uint64_t *Offset) const; void dumpName(ScopedPrinter &W, const NameTableEntry &NTE, Optional Hash) const; void dumpBucket(ScopedPrinter &W, uint32_t Bucket) const; - Expected extractAttributeEncoding(uint32_t *Offset); + Expected extractAttributeEncoding(uint64_t *Offset); Expected> - extractAttributeEncodings(uint32_t *Offset); + extractAttributeEncodings(uint64_t *Offset); - Expected extractAbbrev(uint32_t *Offset); + Expected extractAbbrev(uint64_t *Offset); public: - NameIndex(const DWARFDebugNames &Section, uint32_t Base) + NameIndex(const DWARFDebugNames &Section, uint64_t Base) : Section(Section), Base(Base) {} /// Reads offset of compilation unit CU. CU is 0-based. - uint32_t getCUOffset(uint32_t CU) const; + uint64_t getCUOffset(uint32_t CU) const; uint32_t getCUCount() const { return Hdr.CompUnitCount; } /// Reads offset of local type unit TU, TU is 0-based. - uint32_t getLocalTUOffset(uint32_t TU) const; + uint64_t getLocalTUOffset(uint32_t TU) const; uint32_t getLocalTUCount() const { return Hdr.LocalTypeUnitCount; } /// Reads signature of foreign type unit TU. TU is 0-based. @@ -451,7 +451,7 @@ public: return Abbrevs; } - Expected getEntry(uint32_t *Offset) const; + Expected getEntry(uint64_t *Offset) const; /// Look up all entries in this Name Index matching \c Key. iterator_range equal_range(StringRef Key) const; @@ -460,8 +460,8 @@ public: NameIterator end() const { return NameIterator(this, getNameCount() + 1); } Error extract(); - uint32_t getUnitOffset() const { return Base; } - uint32_t getNextUnitOffset() const { return Base + 4 + Hdr.UnitLength; } + uint64_t getUnitOffset() const { return Base; } + uint64_t getNextUnitOffset() const { return Base + 4 + Hdr.UnitLength; } void dump(ScopedPrinter &W) const; friend class DWARFDebugNames; @@ -479,12 +479,12 @@ public: bool IsLocal; Optional CurrentEntry; - unsigned DataOffset = 0; ///< Offset into the section. + uint64_t DataOffset = 0; ///< Offset into the section. std::string Key; ///< The Key we are searching for. Optional Hash; ///< Hash of Key, if it has been computed. bool getEntryAtCurrentOffset(); - Optional findEntryOffsetInCurrentIndex(); + Optional findEntryOffsetInCurrentIndex(); bool findInCurrentIndex(); void searchFromStartOfCurrentIndex(); void next(); @@ -572,7 +572,7 @@ public: private: SmallVector NameIndices; - DenseMap CUToNameIndex; + DenseMap CUToNameIndex; public: DWARFDebugNames(const DWARFDataExtractor &AccelSection, @@ -591,7 +591,7 @@ public: /// Return the Name Index covering the compile unit at CUOffset, or nullptr if /// there is no Name Index covering that unit. - const NameIndex *getCUNameIndex(uint32_t CUOffset); + const NameIndex *getCUNameIndex(uint64_t CUOffset); }; } // end namespace llvm diff --git a/include/llvm/DebugInfo/DWARF/DWARFAttribute.h b/include/llvm/DebugInfo/DWARF/DWARFAttribute.h index c8ad19ad6bf..dfc778346db 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFAttribute.h +++ b/include/llvm/DebugInfo/DWARF/DWARFAttribute.h @@ -23,7 +23,7 @@ namespace llvm { /// attributes in a DWARFDie. struct DWARFAttribute { /// The debug info/types offset for this attribute. - uint32_t Offset = 0; + uint64_t Offset = 0; /// The debug info/types section byte size of the data for this attribute. uint32_t ByteSize = 0; /// The attribute enumeration of this attribute. diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h index 23cf21c3523..fae163622ed 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -225,10 +225,10 @@ public: DWARFCompileUnit *getDWOCompileUnitForHash(uint64_t Hash); /// Return the compile unit that includes an offset (relative to .debug_info). - DWARFCompileUnit *getCompileUnitForOffset(uint32_t Offset); + DWARFCompileUnit *getCompileUnitForOffset(uint64_t Offset); /// Get a DIE given an exact offset. - DWARFDie getDIEForOffset(uint32_t Offset); + DWARFDie getDIEForOffset(uint64_t Offset); unsigned getMaxVersion() { // Ensure info units have been parsed to discover MaxVersion @@ -301,10 +301,10 @@ public: std::function RecoverableErrorCallback); DataExtractor getStringExtractor() const { - return DataExtractor(DObj->getStringSection(), false, 0); + return DataExtractor(DObj->getStrSection(), false, 0); } DataExtractor getLineStringExtractor() const { - return DataExtractor(DObj->getLineStringSection(), false, 0); + return DataExtractor(DObj->getLineStrSection(), false, 0); } /// Wraps the returned DIEs for a given address. diff --git a/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h b/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h index 7c2a159b71f..980724c525d 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h @@ -35,20 +35,25 @@ public: /// Extracts a value and applies a relocation to the result if /// one exists for the given offset. - uint64_t getRelocatedValue(uint32_t Size, uint32_t *Off, - uint64_t *SectionIndex = nullptr) const; + uint64_t getRelocatedValue(uint32_t Size, uint64_t *Off, + uint64_t *SectionIndex = nullptr, + Error *Err = nullptr) const; /// Extracts an address-sized value and applies a relocation to the result if /// one exists for the given offset. - uint64_t getRelocatedAddress(uint32_t *Off, uint64_t *SecIx = nullptr) const { + uint64_t getRelocatedAddress(uint64_t *Off, uint64_t *SecIx = nullptr) const { return getRelocatedValue(getAddressSize(), Off, SecIx); } + uint64_t getRelocatedAddress(Cursor &C, uint64_t *SecIx = nullptr) const { + return getRelocatedValue(getAddressSize(), &getOffset(C), SecIx, + &getError(C)); + } /// Extracts a DWARF-encoded pointer in \p Offset using \p Encoding. /// There is a DWARF encoding that uses a PC-relative adjustment. /// For these values, \p AbsPosOffset is used to fix them, which should /// reflect the absolute address of this pointer. - Optional getEncodedPointer(uint32_t *Offset, uint8_t Encoding, + Optional getEncodedPointer(uint64_t *Offset, uint8_t Encoding, uint64_t AbsPosOffset = 0) const; size_t size() const { return Section == nullptr ? 0 : Section->Data.size(); } diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h index 28fd8484b4a..1398e16252a 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h @@ -20,7 +20,7 @@ namespace llvm { class raw_ostream; class DWARFAbbreviationDeclarationSet { - uint32_t Offset; + uint64_t Offset; /// Code of the first abbreviation, if all abbreviations in the set have /// consecutive codes. UINT32_MAX otherwise. uint32_t FirstAbbrCode; @@ -32,9 +32,9 @@ class DWARFAbbreviationDeclarationSet { public: DWARFAbbreviationDeclarationSet(); - uint32_t getOffset() const { return Offset; } + uint64_t getOffset() const { return Offset; } void dump(raw_ostream &OS) const; - bool extract(DataExtractor Data, uint32_t *OffsetPtr); + bool extract(DataExtractor Data, uint64_t *OffsetPtr); const DWARFAbbreviationDeclaration * getAbbreviationDeclaration(uint32_t AbbrCode) const; diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h index a98bf282fe7..4539b9c9d58 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h @@ -45,7 +45,7 @@ public: private: dwarf::DwarfFormat Format; - uint32_t HeaderOffset; + uint64_t HeaderOffset; Header HeaderData; uint32_t DataSize = 0; std::vector Addrs; @@ -54,11 +54,11 @@ public: void clear(); /// Extract an entire table, including all addresses. - Error extract(DWARFDataExtractor Data, uint32_t *OffsetPtr, + Error extract(DWARFDataExtractor Data, uint64_t *OffsetPtr, uint16_t Version, uint8_t AddrSize, std::function WarnCallback); - uint32_t getHeaderOffset() const { return HeaderOffset; } + uint64_t getHeaderOffset() const { return HeaderOffset; } uint8_t getAddrSize() const { return HeaderData.AddrSize; } void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {}) const; diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h b/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h index 5b6c578bc3b..ebe4ad6e24d 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h @@ -49,7 +49,7 @@ private: using DescriptorColl = std::vector; using desc_iterator_range = iterator_range; - uint32_t Offset; + uint64_t Offset; Header HeaderData; DescriptorColl ArangeDescriptors; @@ -57,7 +57,7 @@ public: DWARFDebugArangeSet() { clear(); } void clear(); - bool extract(DataExtractor data, uint32_t *offset_ptr); + bool extract(DataExtractor data, uint64_t *offset_ptr); void dump(raw_ostream &OS) const; uint32_t getCompileUnitDIEOffset() const { return HeaderData.CuOffset; } diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h index 03223fbc80a..172f1d2c9db 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h @@ -28,7 +28,7 @@ private: void extract(DataExtractor DebugArangesData); /// Call appendRange multiple times and then call construct. - void appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC); + void appendRange(uint64_t CUOffset, uint64_t LowPC, uint64_t HighPC); void construct(); struct Range { @@ -60,10 +60,10 @@ private: struct RangeEndpoint { uint64_t Address; - uint32_t CUOffset; + uint64_t CUOffset; bool IsRangeStart; - RangeEndpoint(uint64_t Address, uint32_t CUOffset, bool IsRangeStart) + RangeEndpoint(uint64_t Address, uint64_t CUOffset, bool IsRangeStart) : Address(Address), CUOffset(CUOffset), IsRangeStart(IsRangeStart) {} bool operator<(const RangeEndpoint &Other) const { @@ -76,7 +76,7 @@ private: std::vector Endpoints; RangeColl Aranges; - DenseSet ParsedCUOffsets; + DenseSet ParsedCUOffsets; }; } // end namespace llvm diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h index d960f4bc9b1..c6539df0d75 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h @@ -69,7 +69,7 @@ public: /// starting at *Offset and ending at EndOffset. *Offset is updated /// to EndOffset upon successful parsing, or indicates the offset /// where a problem occurred in case an error is returned. - Error parse(DataExtractor Data, uint32_t *Offset, uint32_t EndOffset); + Error parse(DWARFDataExtractor Data, uint64_t *Offset, uint64_t EndOffset); void dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH, unsigned IndentLevel = 1) const; diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h index f50063b2437..ded960337ec 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h @@ -22,7 +22,7 @@ class DWARFUnit; /// DWARFDebugInfoEntry - A DIE with only the minimum required data. class DWARFDebugInfoEntry { /// Offset within the .debug_info of the start of this entry. - uint32_t Offset = 0; + uint64_t Offset = 0; /// The integer depth of this DIE within the compile unit DIEs where the /// compile/type unit DIE has a depth of zero. @@ -36,14 +36,14 @@ public: /// Extracts a debug info entry, which is a child of a given unit, /// starting at a given offset. If DIE can't be extracted, returns false and /// doesn't change OffsetPtr. - bool extractFast(const DWARFUnit &U, uint32_t *OffsetPtr); + bool extractFast(const DWARFUnit &U, uint64_t *OffsetPtr); /// High performance extraction should use this call. - bool extractFast(const DWARFUnit &U, uint32_t *OffsetPtr, - const DWARFDataExtractor &DebugInfoData, uint32_t UEndOffset, + bool extractFast(const DWARFUnit &U, uint64_t *OffsetPtr, + const DWARFDataExtractor &DebugInfoData, uint64_t UEndOffset, uint32_t Depth); - uint32_t getOffset() const { return Offset; } + uint64_t getOffset() const { return Offset; } uint32_t getDepth() const { return Depth; } dwarf::Tag getTag() const { diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index e7425c19237..c2be8304ad8 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -18,6 +18,7 @@ #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" #include "llvm/Support/MD5.h" +#include "llvm/Support/Path.h" #include #include #include @@ -128,13 +129,15 @@ public: bool hasFileAtIndex(uint64_t FileIndex) const; - bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir, - DILineInfoSpecifier::FileLineInfoKind Kind, - std::string &Result) const; + bool + getFileNameByIndex(uint64_t FileIndex, StringRef CompDir, + DILineInfoSpecifier::FileLineInfoKind Kind, + std::string &Result, + sys::path::Style Style = sys::path::Style::native) const; void clear(); void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const; - Error parse(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr, + Error parse(const DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr, const DWARFContext &Ctx, const DWARFUnit *U = nullptr); }; @@ -278,7 +281,7 @@ public: /// Parse prologue and all rows. Error parse( - DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr, + DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr, const DWARFContext &Ctx, const DWARFUnit *U, std::function RecoverableErrorCallback, raw_ostream *OS = nullptr); @@ -305,9 +308,9 @@ public: std::vector &Result) const; }; - const LineTable *getLineTable(uint32_t Offset) const; + const LineTable *getLineTable(uint64_t Offset) const; Expected getOrParseLineTable( - DWARFDataExtractor &DebugLineData, uint32_t Offset, + DWARFDataExtractor &DebugLineData, uint64_t Offset, const DWARFContext &Ctx, const DWARFUnit *U, std::function RecoverableErrorCallback); @@ -350,17 +353,17 @@ public: bool done() const { return Done; } /// Get the offset the parser has reached. - uint32_t getOffset() const { return Offset; } + uint64_t getOffset() const { return Offset; } private: - DWARFUnit *prepareToParse(uint32_t Offset); - void moveToNextTable(uint32_t OldOffset, const Prologue &P); + DWARFUnit *prepareToParse(uint64_t Offset); + void moveToNextTable(uint64_t OldOffset, const Prologue &P); LineToUnitMap LineToUnit; DWARFDataExtractor &DebugLineData; const DWARFContext &Context; - uint32_t Offset = 0; + uint64_t Offset = 0; bool Done = false; }; @@ -377,7 +380,7 @@ private: struct Sequence Sequence; }; - using LineTableMapTy = std::map; + using LineTableMapTy = std::map; using LineTableIter = LineTableMapTy::iterator; using LineTableConstIter = LineTableMapTy::const_iterator; diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h index cced6048e81..c79d98e34f6 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h @@ -11,6 +11,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include @@ -29,19 +30,20 @@ public: /// The ending address of the instruction range. uint64_t End; /// The location of the variable within the specified range. - SmallVector Loc; + SmallVector Loc; }; /// A list of locations that contain one variable. struct LocationList { /// The beginning offset where this location list is stored in the debug_loc /// section. - unsigned Offset; + uint64_t Offset; /// All the locations in which the variable is stored. SmallVector Entries; /// Dump this list on OS. - void dump(raw_ostream &OS, bool IsLittleEndian, unsigned AddressSize, - const MCRegisterInfo *MRI, DWARFUnit *U, uint64_t BaseAddress, + void dump(raw_ostream &OS, uint64_t BaseAddress, bool IsLittleEndian, + unsigned AddressSize, const MCRegisterInfo *MRI, DWARFUnit *U, + DIDumpOptions DumpOpts, unsigned Indent) const; }; @@ -58,7 +60,7 @@ private: public: /// Print the location lists found within the debug_loc section. - void dump(raw_ostream &OS, const MCRegisterInfo *RegInfo, + void dump(raw_ostream &OS, const MCRegisterInfo *RegInfo, DIDumpOptions DumpOpts, Optional Offset) const; /// Parse the debug_loc section accessible via the 'data' parameter using the @@ -68,25 +70,29 @@ public: /// Return the location list at the given offset or nullptr. LocationList const *getLocationListAtOffset(uint64_t Offset) const; - Optional parseOneLocationList(DWARFDataExtractor Data, - uint32_t *Offset); + Expected + parseOneLocationList(const DWARFDataExtractor &Data, uint64_t *Offset); }; class DWARFDebugLoclists { public: struct Entry { uint8_t Kind; + uint64_t Offset; uint64_t Value0; uint64_t Value1; - SmallVector Loc; + SmallVector Loc; + void dump(raw_ostream &OS, uint64_t &BaseAddr, bool IsLittleEndian, + unsigned AddressSize, const MCRegisterInfo *MRI, DWARFUnit *U, + DIDumpOptions DumpOpts, unsigned Indent, size_t MaxEncodingStringLength) const; }; struct LocationList { - unsigned Offset; + uint64_t Offset; SmallVector Entries; void dump(raw_ostream &OS, uint64_t BaseAddr, bool IsLittleEndian, unsigned AddressSize, const MCRegisterInfo *RegInfo, - DWARFUnit *U, unsigned Indent) const; + DWARFUnit *U, DIDumpOptions DumpOpts, unsigned Indent) const; }; private: @@ -99,15 +105,16 @@ private: bool IsLittleEndian; public: - void parse(DataExtractor data, unsigned Version); + void parse(DataExtractor data, uint64_t Offset, uint64_t EndOffset, uint16_t Version); void dump(raw_ostream &OS, uint64_t BaseAddr, const MCRegisterInfo *RegInfo, - Optional Offset) const; + DIDumpOptions DumpOpts, Optional Offset) const; /// Return the location list at the given offset or nullptr. LocationList const *getLocationListAtOffset(uint64_t Offset) const; - static Optional - parseOneLocationList(DataExtractor Data, unsigned *Offset, unsigned Version); + static Expected parseOneLocationList(const DataExtractor &Data, + uint64_t *Offset, + unsigned Version); }; } // end namespace llvm diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h b/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h index 99e91ca9031..ae57306b90e 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h @@ -25,7 +25,7 @@ class DWARFDebugPubTable { public: struct Entry { /// Section offset from the beginning of the compilation unit. - uint32_t SecOffset; + uint64_t SecOffset; /// An entry of the various gnu_pub* debug sections. dwarf::PubIndexEntryDescriptor Descriptor; @@ -50,7 +50,7 @@ public: /// The offset from the beginning of the .debug_info section of the /// compilation unit header referenced by the set. - uint32_t Offset; + uint64_t Offset; /// The size in bytes of the contents of the .debug_info section generated /// to represent that compilation unit. diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h index a66f6029234..2f72c642a2d 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h @@ -53,14 +53,13 @@ public: assert(AddressSize == 4 || AddressSize == 8); if (AddressSize == 4) return StartAddress == -1U; - else - return StartAddress == -1ULL; + return StartAddress == -1ULL; } }; private: /// Offset in .debug_ranges section. - uint32_t Offset; + uint64_t Offset; uint8_t AddressSize; std::vector Entries; @@ -69,7 +68,7 @@ public: void clear(); void dump(raw_ostream &OS) const; - Error extract(const DWARFDataExtractor &data, uint32_t *offset_ptr); + Error extract(const DWARFDataExtractor &data, uint64_t *offset_ptr); const std::vector &getEntries() { return Entries; } /// getAbsoluteRanges - Returns absolute address ranges defined by this range diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h index 167ddde3ec3..952c41e188c 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h @@ -34,7 +34,7 @@ struct RangeListEntry : public DWARFListEntryBase { uint64_t Value0; uint64_t Value1; - Error extract(DWARFDataExtractor Data, uint32_t End, uint32_t *OffsetPtr); + Error extract(DWARFDataExtractor Data, uint64_t End, uint64_t *OffsetPtr); void dump(raw_ostream &OS, uint8_t AddrSize, uint8_t MaxEncodingStringLength, uint64_t &CurrentBase, DIDumpOptions DumpOpts, llvm::function_ref(uint32_t)> diff --git a/include/llvm/DebugInfo/DWARF/DWARFDie.h b/include/llvm/DebugInfo/DWARF/DWARFDie.h index 21e68f983bb..f7f08b4a499 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDie.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDie.h @@ -63,7 +63,7 @@ public: /// Get the absolute offset into the debug info or types section. /// /// \returns the DIE offset or -1U if invalid. - uint32_t getOffset() const { + uint64_t getOffset() const { assert(isValid() && "must check validity prior to calling"); return Die->getOffset(); } diff --git a/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/include/llvm/DebugInfo/DWARF/DWARFExpression.h index f066dd58d60..456d9df957a 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFExpression.h +++ b/include/llvm/DebugInfo/DWARF/DWARFExpression.h @@ -77,18 +77,18 @@ public: uint8_t Opcode; ///< The Op Opcode, DW_OP_. Description Desc; bool Error; - uint32_t EndOffset; + uint64_t EndOffset; uint64_t Operands[2]; - uint32_t OperandEndOffsets[2]; + uint64_t OperandEndOffsets[2]; public: Description &getDescription() { return Desc; } uint8_t getCode() { return Opcode; } uint64_t getRawOperand(unsigned Idx) { return Operands[Idx]; } - uint32_t getOperandEndOffset(unsigned Idx) { return OperandEndOffsets[Idx]; } - uint32_t getEndOffset() { return EndOffset; } + uint64_t getOperandEndOffset(unsigned Idx) { return OperandEndOffsets[Idx]; } + uint64_t getEndOffset() { return EndOffset; } bool extract(DataExtractor Data, uint16_t Version, uint8_t AddressSize, - uint32_t Offset); + uint64_t Offset); bool isError() { return Error; } bool print(raw_ostream &OS, const DWARFExpression *Expr, const MCRegisterInfo *RegInfo, DWARFUnit *U, bool isEH); @@ -101,9 +101,9 @@ public: Operation> { friend class DWARFExpression; const DWARFExpression *Expr; - uint32_t Offset; + uint64_t Offset; Operation Op; - iterator(const DWARFExpression *Expr, uint32_t Offset) + iterator(const DWARFExpression *Expr, uint64_t Offset) : Expr(Expr), Offset(Offset) { Op.Error = Offset >= Expr->Data.getData().size() || diff --git a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h index 731e71ed9ea..6fec6fcb6b3 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h +++ b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h @@ -70,7 +70,7 @@ public: static DWARFFormValue createFromBlockValue(dwarf::Form F, ArrayRef D); static DWARFFormValue createFromUnit(dwarf::Form F, const DWARFUnit *Unit, - uint32_t *OffsetPtr); + uint64_t *OffsetPtr); dwarf::Form getForm() const { return Form; } uint64_t getRawUValue() const { return Value.uval; } @@ -87,12 +87,12 @@ public: /// in \p FormParams is needed to interpret some forms. The optional /// \p Context and \p Unit allows extracting information if the form refers /// to other sections (e.g., .debug_str). - bool extractValue(const DWARFDataExtractor &Data, uint32_t *OffsetPtr, + bool extractValue(const DWARFDataExtractor &Data, uint64_t *OffsetPtr, dwarf::FormParams FormParams, const DWARFContext *Context = nullptr, const DWARFUnit *Unit = nullptr); - bool extractValue(const DWARFDataExtractor &Data, uint32_t *OffsetPtr, + bool extractValue(const DWARFDataExtractor &Data, uint64_t *OffsetPtr, dwarf::FormParams FormParams, const DWARFUnit *U) { return extractValue(Data, OffsetPtr, FormParams, nullptr, U); } @@ -128,7 +128,7 @@ public: /// \param OffsetPtr A reference to the offset that will be updated. /// \param Params DWARF parameters to help interpret forms. /// \returns true on success, false if the form was not skipped. - bool skipValue(DataExtractor DebugInfoData, uint32_t *OffsetPtr, + bool skipValue(DataExtractor DebugInfoData, uint64_t *OffsetPtr, const dwarf::FormParams Params) const { return DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr, Params); } @@ -144,7 +144,7 @@ public: /// \param FormParams DWARF parameters to help interpret forms. /// \returns true on success, false if the form was not skipped. static bool skipValue(dwarf::Form Form, DataExtractor DebugInfoData, - uint32_t *OffsetPtr, + uint64_t *OffsetPtr, const dwarf::FormParams FormParams); private: diff --git a/include/llvm/DebugInfo/DWARF/DWARFListTable.h b/include/llvm/DebugInfo/DWARF/DWARFListTable.h index a1ea69b040f..496fdb2477f 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFListTable.h +++ b/include/llvm/DebugInfo/DWARF/DWARFListTable.h @@ -26,7 +26,7 @@ namespace llvm { /// entries. struct DWARFListEntryBase { /// The offset at which the entry is located in the section. - uint32_t Offset; + uint64_t Offset; /// The DWARF encoding (DW_RLE_* or DW_LLE_*). uint8_t EntryKind; /// The index of the section this entry belongs to. @@ -46,8 +46,8 @@ public: const ListEntries &getEntries() const { return Entries; } bool empty() const { return Entries.empty(); } void clear() { Entries.clear(); } - Error extract(DWARFDataExtractor Data, uint32_t HeaderOffset, uint32_t End, - uint32_t *OffsetPtr, StringRef SectionName, + Error extract(DWARFDataExtractor Data, uint64_t HeaderOffset, uint64_t End, + uint64_t *OffsetPtr, StringRef SectionName, StringRef ListStringName); }; @@ -57,7 +57,7 @@ class DWARFListTableHeader { struct Header { /// The total length of the entries for this table, not including the length /// field itself. - uint32_t Length = 0; + uint64_t Length = 0; /// The DWARF version number. uint16_t Version; /// The size in bytes of an address on the target architecture. For @@ -75,12 +75,12 @@ class DWARFListTableHeader { /// The offset table, which contains offsets to the individual list entries. /// It is used by forms such as DW_FORM_rnglistx. /// FIXME: Generate the table and use the appropriate forms. - std::vector Offsets; + std::vector Offsets; /// The table's format, either DWARF32 or DWARF64. dwarf::DwarfFormat Format; /// The offset at which the header (and hence the table) is located within /// its section. - uint32_t HeaderOffset; + uint64_t HeaderOffset; /// The name of the section the list is located in. StringRef SectionName; /// A characterization of the list for dumping purposes, e.g. "range" or @@ -95,28 +95,40 @@ public: HeaderData = {}; Offsets.clear(); } - uint32_t getHeaderOffset() const { return HeaderOffset; } + uint64_t getHeaderOffset() const { return HeaderOffset; } uint8_t getAddrSize() const { return HeaderData.AddrSize; } - uint32_t getLength() const { return HeaderData.Length; } + uint64_t getLength() const { return HeaderData.Length; } uint16_t getVersion() const { return HeaderData.Version; } StringRef getSectionName() const { return SectionName; } StringRef getListTypeString() const { return ListTypeString; } dwarf::DwarfFormat getFormat() const { return Format; } + /// Return the size of the table header including the length but not including + /// the offsets. + static uint8_t getHeaderSize(dwarf::DwarfFormat Format) { + switch (Format) { + case dwarf::DwarfFormat::DWARF32: + return 12; + case dwarf::DwarfFormat::DWARF64: + return 20; + } + llvm_unreachable("Invalid DWARF format (expected DWARF32 or DWARF64"); + } + void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {}) const; - Optional getOffsetEntry(uint32_t Index) const { + Optional getOffsetEntry(uint32_t Index) const { if (Index < Offsets.size()) return Offsets[Index]; return None; } /// Extract the table header and the array of offsets. - Error extract(DWARFDataExtractor Data, uint32_t *OffsetPtr); + Error extract(DWARFDataExtractor Data, uint64_t *OffsetPtr); /// Returns the length of the table, including the length field, or 0 if the /// length has not been determined (e.g. because the table has not yet been /// parsed, or there was a problem in parsing). - uint32_t length() const; + uint64_t length() const; }; /// A class representing a table of lists as specified in the DWARF v5 @@ -128,7 +140,7 @@ template class DWARFListTableBase { DWARFListTableHeader Header; /// A mapping between file offsets and lists. It is used to find a particular /// list based on an offset (obtained from DW_AT_ranges, for example). - std::map ListMap; + std::map ListMap; /// This string is displayed as a heading before the list is dumped /// (e.g. "ranges:"). StringRef HeaderString; @@ -144,17 +156,18 @@ public: ListMap.clear(); } /// Extract the table header and the array of offsets. - Error extractHeaderAndOffsets(DWARFDataExtractor Data, uint32_t *OffsetPtr) { + Error extractHeaderAndOffsets(DWARFDataExtractor Data, uint64_t *OffsetPtr) { return Header.extract(Data, OffsetPtr); } /// Extract an entire table, including all list entries. - Error extract(DWARFDataExtractor Data, uint32_t *OffsetPtr); + Error extract(DWARFDataExtractor Data, uint64_t *OffsetPtr); /// Look up a list based on a given offset. Extract it and enter it into the /// list map if necessary. - Expected findList(DWARFDataExtractor Data, uint32_t Offset); + Expected findList(DWARFDataExtractor Data, uint64_t Offset); - uint32_t getHeaderOffset() const { return Header.getHeaderOffset(); } + uint64_t getHeaderOffset() const { return Header.getHeaderOffset(); } uint8_t getAddrSize() const { return Header.getAddrSize(); } + dwarf::DwarfFormat getFormat() const { return Header.getFormat(); } void dump(raw_ostream &OS, llvm::function_ref(uint32_t)> @@ -162,37 +175,31 @@ public: DIDumpOptions DumpOpts = {}) const; /// Return the contents of the offset entry designated by a given index. - Optional getOffsetEntry(uint32_t Index) const { + Optional getOffsetEntry(uint32_t Index) const { return Header.getOffsetEntry(Index); } /// Return the size of the table header including the length but not including /// the offsets. This is dependent on the table format, which is unambiguously /// derived from parsing the table. uint8_t getHeaderSize() const { - switch (Header.getFormat()) { - case dwarf::DwarfFormat::DWARF32: - return 12; - case dwarf::DwarfFormat::DWARF64: - return 20; - } - llvm_unreachable("Invalid DWARF format (expected DWARF32 or DWARF64"); + return DWARFListTableHeader::getHeaderSize(getFormat()); } - uint32_t length() { return Header.length(); } + uint64_t length() { return Header.length(); } }; template Error DWARFListTableBase::extract(DWARFDataExtractor Data, - uint32_t *OffsetPtr) { + uint64_t *OffsetPtr) { clear(); if (Error E = extractHeaderAndOffsets(Data, OffsetPtr)) return E; Data.setAddressSize(Header.getAddrSize()); - uint32_t End = getHeaderOffset() + Header.length(); + uint64_t End = getHeaderOffset() + Header.length(); while (*OffsetPtr < End) { DWARFListType CurrentList; - uint32_t Off = *OffsetPtr; + uint64_t Off = *OffsetPtr; if (Error E = CurrentList.extract(Data, getHeaderOffset(), End, OffsetPtr, Header.getSectionName(), Header.getListTypeString())) @@ -208,13 +215,13 @@ Error DWARFListTableBase::extract(DWARFDataExtractor Data, template Error DWARFListType::extract(DWARFDataExtractor Data, - uint32_t HeaderOffset, uint32_t End, - uint32_t *OffsetPtr, + uint64_t HeaderOffset, uint64_t End, + uint64_t *OffsetPtr, StringRef SectionName, StringRef ListTypeString) { if (*OffsetPtr < HeaderOffset || *OffsetPtr >= End) return createStringError(errc::invalid_argument, - "invalid %s list offset 0x%" PRIx32, + "invalid %s list offset 0x%" PRIx64, ListTypeString.data(), *OffsetPtr); Entries.clear(); while (*OffsetPtr < End) { @@ -227,7 +234,7 @@ Error DWARFListType::extract(DWARFDataExtractor Data, } return createStringError(errc::illegal_byte_sequence, "no end of list marker detected at end of %s table " - "starting at offset 0x%" PRIx32, + "starting at offset 0x%" PRIx64, SectionName.data(), HeaderOffset); } @@ -261,15 +268,15 @@ void DWARFListTableBase::dump( template Expected DWARFListTableBase::findList(DWARFDataExtractor Data, - uint32_t Offset) { + uint64_t Offset) { auto Entry = ListMap.find(Offset); if (Entry != ListMap.end()) return Entry->second; // Extract the list from the section and enter it into the list map. DWARFListType List; - uint32_t End = getHeaderOffset() + Header.length(); - uint32_t StartingOffset = Offset; + uint64_t End = getHeaderOffset() + Header.length(); + uint64_t StartingOffset = Offset; if (Error E = List.extract(Data, getHeaderOffset(), End, &Offset, Header.getSectionName(), Header.getListTypeString())) diff --git a/include/llvm/DebugInfo/DWARF/DWARFObject.h b/include/llvm/DebugInfo/DWARF/DWARFObject.h index 1bba74a25d0..88fe3f434ed 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFObject.h +++ b/include/llvm/DebugInfo/DWARF/DWARFObject.h @@ -39,20 +39,20 @@ public: virtual StringRef getAbbrevSection() const { return ""; } virtual const DWARFSection &getLocSection() const { return Dummy; } virtual const DWARFSection &getLoclistsSection() const { return Dummy; } - virtual StringRef getARangeSection() const { return ""; } - virtual StringRef getDebugFrameSection() const { return ""; } - virtual StringRef getEHFrameSection() const { return ""; } + virtual StringRef getArangesSection() const { return ""; } + virtual const DWARFSection &getFrameSection() const { return Dummy; } + virtual const DWARFSection &getEHFrameSection() const { return Dummy; } virtual const DWARFSection &getLineSection() const { return Dummy; } - virtual StringRef getLineStringSection() const { return ""; } - virtual StringRef getStringSection() const { return ""; } - virtual const DWARFSection &getRangeSection() const { return Dummy; } + virtual StringRef getLineStrSection() const { return ""; } + virtual StringRef getStrSection() const { return ""; } + virtual const DWARFSection &getRangesSection() const { return Dummy; } virtual const DWARFSection &getRnglistsSection() const { return Dummy; } virtual StringRef getMacinfoSection() const { return ""; } - virtual const DWARFSection &getPubNamesSection() const { return Dummy; } - virtual const DWARFSection &getPubTypesSection() const { return Dummy; } - virtual const DWARFSection &getGnuPubNamesSection() const { return Dummy; } - virtual const DWARFSection &getGnuPubTypesSection() const { return Dummy; } - virtual const DWARFSection &getStringOffsetSection() const { return Dummy; } + virtual const DWARFSection &getPubnamesSection() const { return Dummy; } + virtual const DWARFSection &getPubtypesSection() const { return Dummy; } + virtual const DWARFSection &getGnuPubnamesSection() const { return Dummy; } + virtual const DWARFSection &getGnuPubtypesSection() const { return Dummy; } + virtual const DWARFSection &getStrOffsetsSection() const { return Dummy; } virtual void forEachInfoDWOSections(function_ref F) const {} virtual void @@ -60,11 +60,11 @@ public: virtual StringRef getAbbrevDWOSection() const { return ""; } virtual const DWARFSection &getLineDWOSection() const { return Dummy; } virtual const DWARFSection &getLocDWOSection() const { return Dummy; } - virtual StringRef getStringDWOSection() const { return ""; } - virtual const DWARFSection &getStringOffsetDWOSection() const { + virtual StringRef getStrDWOSection() const { return ""; } + virtual const DWARFSection &getStrOffsetsDWOSection() const { return Dummy; } - virtual const DWARFSection &getRangeDWOSection() const { return Dummy; } + virtual const DWARFSection &getRangesDWOSection() const { return Dummy; } virtual const DWARFSection &getRnglistsDWOSection() const { return Dummy; } virtual const DWARFSection &getAddrSection() const { return Dummy; } virtual const DWARFSection &getAppleNamesSection() const { return Dummy; } @@ -72,7 +72,7 @@ public: virtual const DWARFSection &getAppleNamespacesSection() const { return Dummy; } - virtual const DWARFSection &getDebugNamesSection() const { return Dummy; } + virtual const DWARFSection &getNamesSection() const { return Dummy; } virtual const DWARFSection &getAppleObjCSection() const { return Dummy; } virtual StringRef getCUIndexSection() const { return ""; } virtual StringRef getGdbIndexSection() const { return ""; } diff --git a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h index 90d89375fd3..c95bdcbd8a4 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h +++ b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h @@ -34,7 +34,7 @@ public: LS, LE, IsDWO, UnitVector) {} uint64_t getTypeHash() const { return getHeader().getTypeHash(); } - uint32_t getTypeOffset() const { return getHeader().getTypeOffset(); } + uint64_t getTypeOffset() const { return getHeader().getTypeOffset(); } void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {}) override; // Enable LLVM-style RTTI. diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h index f9f90db3189..51de114a350 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -45,7 +45,7 @@ class DWARFUnit; /// parse the header before deciding what specific kind of unit to construct. class DWARFUnitHeader { // Offset within section. - uint32_t Offset = 0; + uint64_t Offset = 0; // Version, address size, and DWARF format. dwarf::FormParams FormParams; uint64_t Length = 0; @@ -56,7 +56,7 @@ class DWARFUnitHeader { // For type units only. uint64_t TypeHash = 0; - uint32_t TypeOffset = 0; + uint64_t TypeOffset = 0; // For v5 split or skeleton compile units only. Optional DWOId; @@ -70,10 +70,10 @@ class DWARFUnitHeader { public: /// Parse a unit header from \p debug_info starting at \p offset_ptr. bool extract(DWARFContext &Context, const DWARFDataExtractor &debug_info, - uint32_t *offset_ptr, DWARFSectionKind Kind = DW_SECT_INFO, + uint64_t *offset_ptr, DWARFSectionKind Kind = DW_SECT_INFO, const DWARFUnitIndex *Index = nullptr, const DWARFUnitIndex::Entry *Entry = nullptr); - uint32_t getOffset() const { return Offset; } + uint64_t getOffset() const { return Offset; } const dwarf::FormParams &getFormParams() const { return FormParams; } uint16_t getVersion() const { return FormParams.Version; } dwarf::DwarfFormat getFormat() const { return FormParams.Format; } @@ -91,16 +91,17 @@ public: } const DWARFUnitIndex::Entry *getIndexEntry() const { return IndexEntry; } uint64_t getTypeHash() const { return TypeHash; } - uint32_t getTypeOffset() const { return TypeOffset; } + uint64_t getTypeOffset() const { return TypeOffset; } uint8_t getUnitType() const { return UnitType; } bool isTypeUnit() const { return UnitType == dwarf::DW_UT_type || UnitType == dwarf::DW_UT_split_type; } uint8_t getSize() const { return Size; } - uint32_t getNextUnitOffset() const { - return Offset + Length + - (FormParams.Format == llvm::dwarf::DwarfFormat::DWARF64 ? 4 : 0) + - FormParams.getDwarfOffsetByteSize(); + uint8_t getUnitLengthFieldByteSize() const { + return dwarf::getUnitLengthFieldByteSize(FormParams.Format); + } + uint64_t getNextUnitOffset() const { + return Offset + Length + getUnitLengthFieldByteSize(); } }; @@ -110,7 +111,7 @@ const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context, /// Describe a collection of units. Intended to hold all units either from /// .debug_info and .debug_types, or from .debug_info.dwo and .debug_types.dwo. class DWARFUnitVector final : public SmallVector, 1> { - std::function(uint32_t, DWARFSectionKind, + std::function(uint64_t, DWARFSectionKind, const DWARFSection *, const DWARFUnitIndex::Entry *)> Parser; @@ -121,7 +122,7 @@ public: using iterator = typename UnitVector::iterator; using iterator_range = llvm::iterator_range; - DWARFUnit *getUnitForOffset(uint32_t Offset) const; + DWARFUnit *getUnitForOffset(uint64_t Offset) const; DWARFUnit *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E); /// Read units from a .debug_info or .debug_types section. Calls made @@ -197,7 +198,7 @@ class DWARFUnit { DWARFUnitHeader Header; const DWARFDebugAbbrev *Abbrev; const DWARFSection *RangeSection; - uint32_t RangeSectionBase; + uint64_t RangeSectionBase; /// We either keep track of the location list section or its data, depending /// on whether we are handling a split DWARF section or not. union { @@ -275,7 +276,7 @@ public: const DWARFSection &getInfoSection() const { return InfoSection; } const DWARFSection *getLocSection() const { return LocSection; } StringRef getLocSectionData() const { return LocSectionData; } - uint32_t getOffset() const { return Header.getOffset(); } + uint64_t getOffset() const { return Header.getOffset(); } const dwarf::FormParams &getFormParams() const { return Header.getFormParams(); } @@ -285,10 +286,10 @@ public: uint8_t getDwarfOffsetByteSize() const { return Header.getDwarfOffsetByteSize(); } - uint32_t getLength() const { return Header.getLength(); } + uint64_t getLength() const { return Header.getLength(); } uint8_t getUnitType() const { return Header.getUnitType(); } bool isTypeUnit() const { return Header.isTypeUnit(); } - uint32_t getNextUnitOffset() const { return Header.getNextUnitOffset(); } + uint64_t getNextUnitOffset() const { return Header.getNextUnitOffset(); } const DWARFSection &getLineSection() const { return LineSection; } StringRef getStringSection() const { return StringSection; } const DWARFSection &getStringOffsetSection() const { @@ -303,7 +304,7 @@ public: /// Recursively update address to Die map. void updateAddressDieMap(DWARFDie Die); - void setRangesSection(const DWARFSection *RS, uint32_t Base) { + void setRangesSection(const DWARFSection *RS, uint64_t Base) { RangeSection = RS; RangeSectionBase = Base; } @@ -322,7 +323,7 @@ public: /// .debug_ranges section. If the extraction is unsuccessful, an error /// is returned. Successful extraction requires that the compile unit /// has already been extracted. - Error extractRangeList(uint32_t RangeListOffset, + Error extractRangeList(uint64_t RangeListOffset, DWARFDebugRangeList &RangeList) const; void clear(); @@ -405,7 +406,7 @@ public: /// Return a vector of address ranges resulting from a (possibly encoded) /// range list starting at a given offset in the appropriate ranges section. - Expected findRnglistFromOffset(uint32_t Offset); + Expected findRnglistFromOffset(uint64_t Offset); /// Return a vector of address ranges retrieved from an encoded range /// list whose offset is found via a table lookup given an index (DWARF v5 @@ -415,7 +416,7 @@ public: /// Return a rangelist's offset based on an index. The index designates /// an entry in the rangelist table's offset array and is supplied by /// DW_FORM_rnglistx. - Optional getRnglistOffset(uint32_t Index) { + Optional getRnglistOffset(uint32_t Index) { if (RngListTable) return RngListTable->getOffsetEntry(Index); return None; @@ -470,7 +471,7 @@ public: /// unit's DIE vector. /// /// The unit needs to have its DIEs extracted for this method to work. - DWARFDie getDIEForOffset(uint32_t Offset) { + DWARFDie getDIEForOffset(uint64_t Offset) { extractDIEsIfNeeded(false); assert(!DieArray.empty()); auto It = @@ -495,15 +496,19 @@ public: } virtual void dump(raw_ostream &OS, DIDumpOptions DumpOpts) = 0; + + Error tryExtractDIEsIfNeeded(bool CUDieOnly); + private: /// Size in bytes of the .debug_info data associated with this compile unit. size_t getDebugInfoSize() const { - return Header.getLength() + 4 - getHeaderSize(); + return Header.getLength() + Header.getUnitLengthFieldByteSize() - + getHeaderSize(); } /// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it - /// hasn't already been done. Returns the number of DIEs parsed at this call. - size_t extractDIEsIfNeeded(bool CUDieOnly); + /// hasn't already been done + void extractDIEsIfNeeded(bool CUDieOnly); /// extractDIEsToVector - Appends all parsed DIEs to a vector. void extractDIEsToVector(bool AppendCUDie, bool AppendNonCUDIEs, diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h b/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h index fc8c707c512..684103aac2f 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h +++ b/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h @@ -37,7 +37,7 @@ class DWARFUnitIndex { uint32_t NumUnits; uint32_t NumBuckets = 0; - bool parse(DataExtractor IndexData, uint32_t *OffsetPtr); + bool parse(DataExtractor IndexData, uint64_t *OffsetPtr); void dump(raw_ostream &OS) const; }; diff --git a/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h index f1268f22027..a4a3a11d441 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFVerifier.h +++ b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h @@ -94,7 +94,7 @@ private: /// A map that tracks all references (converted absolute references) so we /// can verify each reference points to a valid DIE and not an offset that /// lies between to valid DIEs. - std::map> ReferenceToDIEOffsets; + std::map> ReferenceToDIEOffsets; uint32_t NumDebugLineErrors = 0; // Used to relax some checks that do not currently work portably bool IsObjectFile; @@ -138,7 +138,7 @@ private: /// /// \returns true if the header is verified successfully, false otherwise. bool verifyUnitHeader(const DWARFDataExtractor DebugInfoData, - uint32_t *Offset, unsigned UnitIndex, uint8_t &UnitType, + uint64_t *Offset, unsigned UnitIndex, uint8_t &UnitType, bool &isUnitDWARF64); /// Verifies the header of a unit in a .debug_info or .debug_types section. diff --git a/include/llvm/DebugInfo/GSYM/FileEntry.h b/include/llvm/DebugInfo/GSYM/FileEntry.h index 228b4efa065..49e7fc9c429 100644 --- a/include/llvm/DebugInfo/GSYM/FileEntry.h +++ b/include/llvm/DebugInfo/GSYM/FileEntry.h @@ -1,9 +1,8 @@ //===- FileEntry.h ----------------------------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/include/llvm/DebugInfo/GSYM/FileWriter.h b/include/llvm/DebugInfo/GSYM/FileWriter.h new file mode 100644 index 00000000000..cd568765a4f --- /dev/null +++ b/include/llvm/DebugInfo/GSYM/FileWriter.h @@ -0,0 +1,124 @@ +//===- FileWriter.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_FILEWRITER_H +#define LLVM_DEBUGINFO_GSYM_FILEWRITER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Endian.h" + +#include +#include +#include + +namespace llvm { +class raw_pwrite_stream; + +namespace gsym { + +/// A simplified binary data writer class that doesn't require targets, target +/// definitions, architectures, or require any other optional compile time +/// libraries to be enabled via the build process. This class needs the ability +/// to seek to different spots in the binary stream that is produces to fixup +/// offsets and sizes. +class FileWriter { + llvm::raw_pwrite_stream &OS; + llvm::support::endianness ByteOrder; +public: + FileWriter(llvm::raw_pwrite_stream &S, llvm::support::endianness B) + : OS(S), ByteOrder(B) {} + ~FileWriter(); + /// Write a single uint8_t value into the stream at the current file + /// position. + /// + /// \param Value The value to write into the stream. + void writeU8(uint8_t Value); + + /// Write a single uint16_t value into the stream at the current file + /// position. The value will be byte swapped if needed to match the byte + /// order specified during construction. + /// + /// \param Value The value to write into the stream. + void writeU16(uint16_t Value); + + /// Write a single uint32_t value into the stream at the current file + /// position. The value will be byte swapped if needed to match the byte + /// order specified during construction. + /// + /// \param Value The value to write into the stream. + void writeU32(uint32_t Value); + + /// Write a single uint64_t value into the stream at the current file + /// position. The value will be byte swapped if needed to match the byte + /// order specified during construction. + /// + /// \param Value The value to write into the stream. + void writeU64(uint64_t Value); + + /// Write the value into the stream encoded using signed LEB128 at the + /// current file position. + /// + /// \param Value The value to write into the stream. + void writeSLEB(int64_t Value); + + /// Write the value into the stream encoded using unsigned LEB128 at the + /// current file position. + /// + /// \param Value The value to write into the stream. + void writeULEB(uint64_t Value); + + /// Write an array of uint8_t values into the stream at the current file + /// position. + /// + /// \param Data An array of values to write into the stream. + void writeData(llvm::ArrayRef Data); + + /// Write a NULL terminated C string into the stream at the current file + /// position. The entire contents of Str will be written into the steam at + /// the current file position and then an extra NULL termation byte will be + /// written. It is up to the user to ensure that Str doesn't contain any NULL + /// characters unless the additional NULL characters are desired. + /// + /// \param Str The value to write into the stream. + void writeNullTerminated(llvm::StringRef Str); + + /// Fixup a uint32_t value at the specified offset in the stream. This + /// function will save the current file position, seek to the specified + /// offset, overwrite the data using Value, and then restore the file + /// position to the previous file position. + /// + /// \param Value The value to write into the stream. + /// \param Offset The offset at which to write the Value within the stream. + void fixup32(uint32_t Value, uint64_t Offset); + + /// Pad with zeroes at the current file position until the current file + /// position matches the specified alignment. + /// + /// \param Align An integer speciying the desired alignment. This does not + /// need to be a power of two. + void alignTo(size_t Align); + + /// Return the current offset within the file. + /// + /// \return The unsigned offset from the start of the file of the current + /// file position. + uint64_t tell(); + + llvm::raw_pwrite_stream &get_stream() { + return OS; + } + +private: + FileWriter(const FileWriter &rhs) = delete; + void operator=(const FileWriter &rhs) = delete; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_FILEWRITER_H diff --git a/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/include/llvm/DebugInfo/GSYM/FunctionInfo.h index eedb1e638fd..63e18bb2ecd 100644 --- a/include/llvm/DebugInfo/GSYM/FunctionInfo.h +++ b/include/llvm/DebugInfo/GSYM/FunctionInfo.h @@ -1,17 +1,17 @@ //===- FunctionInfo.h -------------------------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H #define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H +#include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/GSYM/InlineInfo.h" -#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" #include "llvm/DebugInfo/GSYM/Range.h" #include "llvm/DebugInfo/GSYM/StringTable.h" #include @@ -21,41 +21,125 @@ namespace llvm { class raw_ostream; namespace gsym { -/// Function information in GSYM files encodes information for one -/// contiguous address range. The name of the function is encoded as -/// a string table offset and allows multiple functions with the same -/// name to share the name string in the string table. Line tables are -/// stored in a sorted vector of gsym::LineEntry objects and are split -/// into line tables for each function. If a function has a discontiguous -/// range, it will be split into two gsym::FunctionInfo objects. If the -/// function has inline functions, the information will be encoded in -/// the "Inline" member, see gsym::InlineInfo for more information. +/// Function information in GSYM files encodes information for one contiguous +/// address range. If a function has discontiguous address ranges, they will +/// need to be encoded using multiple FunctionInfo objects. +/// +/// ENCODING +/// +/// The function information gets the function start address as an argument +/// to the FunctionInfo::decode(...) function. This information is calculated +/// from the GSYM header and an address offset from the GSYM address offsets +/// table. The encoded FunctionInfo information must be alinged to a 4 byte +/// boundary. +/// +/// The encoded data for a FunctionInfo starts with fixed data that all +/// function info objects have: +/// +/// ENCODING NAME DESCRIPTION +/// ========= =========== ==================================================== +/// uint32_t Size The size in bytes of this function. +/// uint32_t Name The string table offset of the function name. +/// +/// The optional data in a FunctionInfo object follows this fixed information +/// and consists of a stream of tuples that consist of: +/// +/// ENCODING NAME DESCRIPTION +/// ========= =========== ==================================================== +/// uint32_t InfoType An "InfoType" enumeration that describes the type +/// of optional data that is encoded. +/// uint32_t InfoLength The size in bytes of the encoded data that +/// immediately follows this length if this value is +/// greater than zero. +/// uint8_t[] InfoData Encoded bytes that represent the data for the +/// "InfoType". These bytes are only present if +/// "InfoLength" is greater than zero. +/// +/// The "InfoType" is an enumeration: +/// +/// enum InfoType { +/// EndOfList = 0u, +/// LineTableInfo = 1u, +/// InlineInfo = 2u +/// }; +/// +/// This stream of tuples is terminated by a "InfoType" whose value is +/// InfoType::EndOfList and a zero for "InfoLength". This signifies the end of +/// the optional information list. This format allows us to add new optional +/// information data to a FunctionInfo object over time and allows older +/// clients to still parse the format and skip over any data that they don't +/// understand or want to parse. +/// +/// So the function information encoding essientially looks like: +/// +/// struct { +/// uint32_t Size; +/// uint32_t Name; +/// struct { +/// uint32_t InfoType; +/// uint32_t InfoLength; +/// uint8_t InfoData[InfoLength]; +/// }[N]; +/// } +/// +/// Where "N" is the number of tuples. struct FunctionInfo { AddressRange Range; uint32_t Name; ///< String table offset in the string table. - std::vector Lines; - InlineInfo Inline; + llvm::Optional OptLineTable; + llvm::Optional Inline; FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0) : Range(Addr, Addr + Size), Name(N) {} + /// Query if a FunctionInfo has rich debug info. + /// + /// \returns A bool that indicates if this object has something else than + /// range and name. When converting information from a symbol table and from + /// debug info, we might end up with multiple FunctionInfo objects for the + /// same range and we need to be able to tell which one is the better object + /// to use. bool hasRichInfo() const { - /// Returns whether we have something else than range and name. When - /// converting information from a symbol table and from debug info, we - /// might end up with multiple FunctionInfo objects for the same range - /// and we need to be able to tell which one is the better object to use. - return !Lines.empty() || Inline.isValid(); + return OptLineTable.hasValue() || Inline.hasValue(); } + /// Query if a FunctionInfo object is valid. + /// + /// Address and size can be zero and there can be no line entries for a + /// symbol so the only indication this entry is valid is if the name is + /// not zero. This can happen when extracting information from symbol + /// tables that do not encode symbol sizes. In that case only the + /// address and name will be filled in. + /// + /// \returns A boolean indicating if this FunctionInfo is valid. bool isValid() const { - /// Address and size can be zero and there can be no line entries for a - /// symbol so the only indication this entry is valid is if the name is - /// not zero. This can happen when extracting information from symbol - /// tables that do not encode symbol sizes. In that case only the - /// address and name will be filled in. return Name != 0; } + /// Decode an object from a binary data stream. + /// + /// \param Data The binary stream to read the data from. This object must + /// have the data for the object starting at offset zero. The data + /// can contain more data than needed. + /// + /// \param BaseAddr The FunctionInfo's start address and will be used as the + /// base address when decoding any contained information like the line table + /// and the inline info. + /// + /// \returns An FunctionInfo or an error describing the issue that was + /// encountered during decoding. + static llvm::Expected decode(DataExtractor &Data, + uint64_t BaseAddr); + + /// Encode this object into FileWriter stream. + /// + /// \param O The binary stream to write the data to at the current file + /// position. + /// + /// \returns An error object that indicates failure or the offset of the + /// function info that was successfully written into the stream. + llvm::Expected encode(FileWriter &O) const; + uint64_t startAddress() const { return Range.Start; } uint64_t endAddress() const { return Range.End; } uint64_t size() const { return Range.size(); } @@ -66,14 +150,14 @@ struct FunctionInfo { void clear() { Range = {0, 0}; Name = 0; - Lines.clear(); - Inline.clear(); + OptLineTable = None; + Inline = None; } }; inline bool operator==(const FunctionInfo &LHS, const FunctionInfo &RHS) { return LHS.Range == RHS.Range && LHS.Name == RHS.Name && - LHS.Lines == RHS.Lines && LHS.Inline == RHS.Inline; + LHS.OptLineTable == RHS.OptLineTable && LHS.Inline == RHS.Inline; } inline bool operator!=(const FunctionInfo &LHS, const FunctionInfo &RHS) { return !(LHS == RHS); @@ -89,14 +173,10 @@ inline bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS) { return LHS.Range < RHS.Range; // Then sort by inline - if (LHS.Inline.isValid() != RHS.Inline.isValid()) - return RHS.Inline.isValid(); + if (LHS.Inline.hasValue() != RHS.Inline.hasValue()) + return RHS.Inline.hasValue(); - // If the number of lines is the same, then compare line table entries - if (LHS.Lines.size() == RHS.Lines.size()) - return LHS.Lines < RHS.Lines; - // Then sort by number of line table entries (more is better) - return LHS.Lines.size() < RHS.Lines.size(); + return LHS.OptLineTable < RHS.OptLineTable; } raw_ostream &operator<<(raw_ostream &OS, const FunctionInfo &R); diff --git a/include/llvm/DebugInfo/GSYM/GsymCreator.h b/include/llvm/DebugInfo/GSYM/GsymCreator.h new file mode 100644 index 00000000000..12c8187132b --- /dev/null +++ b/include/llvm/DebugInfo/GSYM/GsymCreator.h @@ -0,0 +1,229 @@ +//===- GsymCreator.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H +#define LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H + +#include +#include +#include +#include +#include + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Path.h" + +namespace llvm { + +namespace gsym { +class FileWriter; + +/// GsymCreator is used to emit GSYM data to a stand alone file or section +/// within a file. +/// +/// The GsymCreator is designed to be used in 3 stages: +/// - Create FunctionInfo objects and add them +/// - Finalize the GsymCreator object +/// - Save to file or section +/// +/// The first stage involves creating FunctionInfo objects from another source +/// of information like compiler debug info metadata, DWARF or Breakpad files. +/// Any strings in the FunctionInfo or contained information, like InlineInfo +/// or LineTable objects, should get the string table offsets by calling +/// GsymCreator::insertString(...). Any file indexes that are needed should be +/// obtained by calling GsymCreator::insertFile(...). All of the function calls +/// in GsymCreator are thread safe. This allows multiple threads to create and +/// add FunctionInfo objects while parsing debug information. +/// +/// Once all of the FunctionInfo objects have been added, the +/// GsymCreator::finalize(...) must be called prior to saving. This function +/// will sort the FunctionInfo objects, finalize the string table, and do any +/// other passes on the information needed to prepare the information to be +/// saved. +/// +/// Once the object has been finalized, it can be saved to a file or section. +/// +/// ENCODING +/// +/// GSYM files are designed to be memory mapped into a process as shared, read +/// only data, and used as is. +/// +/// The GSYM file format when in a stand alone file consists of: +/// - Header +/// - Address Table +/// - Function Info Offsets +/// - File Table +/// - String Table +/// - Function Info Data +/// +/// HEADER +/// +/// The header is fully described in "llvm/DebugInfo/GSYM/Header.h". +/// +/// ADDRESS TABLE +/// +/// The address table immediately follows the header in the file and consists +/// of Header.NumAddresses address offsets. These offsets are sorted and can be +/// binary searched for efficient lookups. Addresses in the address table are +/// stored as offsets from a 64 bit base address found in Header.BaseAddress. +/// This allows the address table to contain 8, 16, or 32 offsets. This allows +/// the address table to not require full 64 bit addresses for each address. +/// The resulting GSYM size is smaller and causes fewer pages to be touched +/// during address lookups when the address table is smaller. The size of the +/// address offsets in the address table is specified in the header in +/// Header.AddrOffSize. The first offset in the address table is alinged to +/// Header.AddrOffSize alignement to ensure efficient access when loaded into +/// memory. +/// +/// FUNCTION INFO OFFSETS TABLE +/// +/// The function info offsets table immediately follows the address table and +/// consists of Header.NumAddresses 32 bit file offsets: one for each address +/// in the address table. This data is algined to a 4 byte boundary. The +/// offsets in this table are the relative offsets from the start offset of the +/// GSYM header and point to the function info data for each address in the +/// address table. Keeping this data separate from the address table helps to +/// reduce the number of pages that are touched when address lookups occur on a +/// GSYM file. +/// +/// FILE TABLE +/// +/// The file table immediately follows the function info offsets table. The +/// encoding of the FileTable is: +/// +/// struct FileTable { +/// uint32_t Count; +/// FileEntry Files[]; +/// }; +/// +/// The file table starts with a 32 bit count of the number of files that are +/// used in all of the function info, followed by that number of FileEntry +/// structures. The file table is aligned to a 4 byte boundary, Each file in +/// the file table is represented with a FileEntry structure. +/// See "llvm/DebugInfo/GSYM/FileEntry.h" for details. +/// +/// STRING TABLE +/// +/// The string table follows the file table in stand alone GSYM files and +/// contains all strings for everything contained in the GSYM file. Any string +/// data should be added to the string table and any references to strings +/// inside GSYM information must be stored as 32 bit string table offsets into +/// this string table. The string table always starts with an empty string at +/// offset zero and is followed by any strings needed by the GSYM information. +/// The start of the string table is not aligned to any boundary. +/// +/// FUNCTION INFO DATA +/// +/// The function info data is the payload that contains information about the +/// address that is being looked up. It contains all of the encoded +/// FunctionInfo objects. Each encoded FunctionInfo's data is pointed to by an +/// entry in the Function Info Offsets Table. For details on the exact encoding +/// of FunctionInfo objects, see "llvm/DebugInfo/GSYM/FunctionInfo.h". +class GsymCreator { + // Private member variables require Mutex protections + mutable std::recursive_mutex Mutex; + std::vector Funcs; + StringTableBuilder StrTab; + DenseMap FileEntryToIndex; + std::vector Files; + std::vector UUID; + bool Finalized = false; + +public: + + GsymCreator(); + + /// Save a GSYM file to a stand alone file. + /// + /// \param Path The file path to save the GSYM file to. + /// \param ByteOrder The endianness to use when saving the file. + /// \returns An error object that indicates success or failure of the save. + llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder) const; + + /// Encode a GSYM into the file writer stream at the current position. + /// + /// \param O The stream to save the binary data to + /// \returns An error object that indicates success or failure of the save. + llvm::Error encode(FileWriter &O) const; + + /// Insert a string into the GSYM string table. + /// + /// All strings used by GSYM files must be uniqued by adding them to this + /// string pool and using the returned offset for any string values. + /// + /// \param S The string to insert into the string table. + /// \returns The unique 32 bit offset into the string table. + uint32_t insertString(StringRef S); + + /// Insert a file into this GSYM creator. + /// + /// Inserts a file by adding a FileEntry into the "Files" member variable if + /// the file has not already been added. The file path is split into + /// directory and filename which are both added to the string table. This + /// allows paths to be stored efficiently by reusing the directories that are + /// common between multiple files. + /// + /// \param Path The path to the file to insert. + /// \param Style The path style for the "Path" parameter. + /// \returns The unique file index for the inserted file. + uint32_t insertFile(StringRef Path, + sys::path::Style Style = sys::path::Style::native); + + /// Add a function info to this GSYM creator. + /// + /// All information in the FunctionInfo object must use the + /// GsymCreator::insertString(...) function when creating string table + /// offsets for names and other strings. + /// + /// \param FI The function info object to emplace into our functions list. + void addFunctionInfo(FunctionInfo &&FI); + + /// Finalize the data in the GSYM creator prior to saving the data out. + /// + /// Finalize must be called after all FunctionInfo objects have been added + /// and before GsymCreator::save() is called. + /// + /// \param OS Output stream to report duplicate function infos, overlapping + /// function infos, and function infos that were merged or removed. + /// \returns An error object that indicates success or failure of the + /// finalize. + llvm::Error finalize(llvm::raw_ostream &OS); + + /// Set the UUID value. + /// + /// \param UUIDBytes The new UUID bytes. + void setUUID(llvm::ArrayRef UUIDBytes) { + UUID.assign(UUIDBytes.begin(), UUIDBytes.end()); + } + + /// Thread safe iteration over all function infos. + /// + /// \param Callback A callback function that will get called with each + /// FunctionInfo. If the callback returns false, stop iterating. + void forEachFunctionInfo( + std::function const &Callback); + + /// Thread safe const iteration over all function infos. + /// + /// \param Callback A callback function that will get called with each + /// FunctionInfo. If the callback returns false, stop iterating. + void forEachFunctionInfo( + std::function const &Callback) const; + +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H diff --git a/include/llvm/DebugInfo/GSYM/GsymReader.h b/include/llvm/DebugInfo/GSYM/GsymReader.h new file mode 100644 index 00000000000..113bcee9c9a --- /dev/null +++ b/include/llvm/DebugInfo/GSYM/GsymReader.h @@ -0,0 +1,228 @@ +//===- GsymReader.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H +#define LLVM_DEBUGINFO_GSYM_GSYMREADER_H + + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/Header.h" +#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/DebugInfo/GSYM/StringTable.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorOr.h" + +#include +#include +#include +#include +#include + +namespace llvm { +class MemoryBuffer; +class raw_ostream; + +namespace gsym { + +/// GsymReader is used to read GSYM data from a file or buffer. +/// +/// This class is optimized for very quick lookups when the endianness matches +/// the host system. The Header, address table, address info offsets, and file +/// table is designed to be mmap'ed as read only into memory and used without +/// any parsing needed. If the endianness doesn't match, we swap these objects +/// and tables into GsymReader::SwappedData and then point our header and +/// ArrayRefs to this swapped internal data. +/// +/// GsymReader objects must use one of the static functions to create an +/// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...). + +class GsymReader { + GsymReader(std::unique_ptr Buffer); + llvm::Error parse(); + + std::unique_ptr MemBuffer; + StringRef GsymBytes; + llvm::support::endianness Endian; + const Header *Hdr = nullptr; + ArrayRef AddrOffsets; + ArrayRef AddrInfoOffsets; + ArrayRef Files; + StringTable StrTab; + /// When the GSYM file's endianness doesn't match the host system then + /// we must decode all data structures that need to be swapped into + /// local storage and set point the ArrayRef objects above to these swapped + /// copies. + struct SwappedData { + Header Hdr; + std::vector AddrOffsets; + std::vector AddrInfoOffsets; + std::vector Files; + }; + std::unique_ptr Swap; + +public: + GsymReader(GsymReader &&RHS); + ~GsymReader(); + + /// Construct a GsymReader from a file on disk. + /// + /// \param Path The file path the GSYM file to read. + /// \returns An expected GsymReader that contains the object or an error + /// object that indicates reason for failing to read the GSYM. + static llvm::Expected openFile(StringRef Path); + + /// Construct a GsymReader from a buffer. + /// + /// \param Bytes A set of bytes that will be copied and owned by the + /// returned object on success. + /// \returns An expected GsymReader that contains the object or an error + /// object that indicates reason for failing to read the GSYM. + static llvm::Expected copyBuffer(StringRef Bytes); + + /// Access the GSYM header. + /// \returns A native endian version of the GSYM header. + const Header &getHeader() const; + + /// Get the full function info for an address. + /// + /// \param Addr A virtual address from the orignal object file to lookup. + /// \returns An expected FunctionInfo that contains the function info object + /// or an error object that indicates reason for failing to lookup the + /// address, + llvm::Expected getFunctionInfo(uint64_t Addr) const; + + /// Get a string from the string table. + /// + /// \param Offset The string table offset for the string to retrieve. + /// \returns The string from the strin table. + StringRef getString(uint32_t Offset) const { return StrTab[Offset]; } + +protected: + /// Gets an address from the address table. + /// + /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress. + /// + /// \param Index A index into the address table. + /// \returns A resolved virtual address for adddress in the address table + /// or llvm::None if Index is out of bounds. + Optional getAddress(size_t Index) const; + + /// Get the a file entry for the suppplied file index. + /// + /// Used to convert any file indexes in the FunctionInfo data back into + /// files. This function can be used for iteration, but is more commonly used + /// for random access when doing lookups. + /// + /// \param Index An index into the file table. + /// \returns An optional FileInfo that will be valid if the file index is + /// valid, or llvm::None if the file index is out of bounds, + Optional getFile(uint32_t Index) const { + if (Index < Files.size()) + return Files[Index]; + return llvm::None; + } + + /// Get an appropriate address info offsets array. + /// + /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 + /// byte offsets from the The gsym::Header::BaseAddress. The table is stored + /// internally as a array of bytes that are in the correct endianness. When + /// we access this table we must get an array that matches those sizes. This + /// templatized helper function is used when accessing address offsets in the + /// AddrOffsets member variable. + /// + /// \returns An ArrayRef of an appropriate address offset size. + template ArrayRef + getAddrOffsets() const { + return ArrayRef(reinterpret_cast(AddrOffsets.data()), + AddrOffsets.size()/sizeof(T)); + } + + /// Get an appropriate address from the address table. + /// + /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 + /// byte address offsets from the The gsym::Header::BaseAddress. The table is + /// stored internally as a array of bytes that are in the correct endianness. + /// In order to extract an address from the address table we must access the + /// address offset using the correct size and then add it to the BaseAddress + /// in the header. + /// + /// \param Index An index into the AddrOffsets array. + /// \returns An virtual address that matches the original object file for the + /// address as the specified index, or llvm::None if Index is out of bounds. + template Optional + addressForIndex(size_t Index) const { + ArrayRef AIO = getAddrOffsets(); + if (Index < AIO.size()) + return AIO[Index] + Hdr->BaseAddress; + return llvm::None; + } + /// Lookup an address offset in the AddrOffsets table. + /// + /// Given an address offset, look it up using a binary search of the + /// AddrOffsets table. + /// + /// \param AddrOffset An address offset, that has already been computed by + /// subtracting the gsym::Header::BaseAddress. + /// \returns The matching address offset index. This index will be used to + /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. + template + uint64_t getAddressOffsetIndex(const uint64_t AddrOffset) const { + ArrayRef AIO = getAddrOffsets(); + const auto Begin = AIO.begin(); + const auto End = AIO.end(); + auto Iter = std::lower_bound(Begin, End, AddrOffset); + if (Iter == End || AddrOffset < *Iter) + --Iter; + return std::distance(Begin, Iter); + } + + /// Create a GSYM from a memory buffer. + /// + /// Called by both openFile() and copyBuffer(), this function does all of the + /// work of parsing the GSYM file and returning an error. + /// + /// \param MemBuffer A memory buffer that will transfer ownership into the + /// GsymReader. + /// \returns An expected GsymReader that contains the object or an error + /// object that indicates reason for failing to read the GSYM. + static llvm::Expected + create(std::unique_ptr &MemBuffer); + + + /// Given an address, find the address index. + /// + /// Binary search the address table and find the matching address index. + /// + /// \param Addr A virtual address that matches the original object file + /// to lookup. + /// \returns An index into the address table. This index can be used to + /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. + /// Returns an error if the address isn't in the GSYM with details of why. + Expected getAddressIndex(const uint64_t Addr) const; + + /// Given an address index, get the offset for the FunctionInfo. + /// + /// Looking up an address is done by finding the corresponding address + /// index for the address. This index is then used to get the offset of the + /// FunctionInfo data that we will decode using this function. + /// + /// \param Index An index into the address table. + /// \returns An optional GSYM data offset for the offset of the FunctionInfo + /// that needs to be decoded. + Optional getAddressInfoOffset(size_t Index) const; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H diff --git a/include/llvm/DebugInfo/GSYM/Header.h b/include/llvm/DebugInfo/GSYM/Header.h new file mode 100644 index 00000000000..6652c59c97a --- /dev/null +++ b/include/llvm/DebugInfo/GSYM/Header.h @@ -0,0 +1,129 @@ +//===- Header.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_HEADER_H +#define LLVM_DEBUGINFO_GSYM_HEADER_H + +#include "llvm/Support/Error.h" + +#include +#include + +namespace llvm { +class raw_ostream; +class DataExtractor; + +namespace gsym { +class FileWriter; + +constexpr uint32_t GSYM_MAGIC = 0x4753594d; // 'GSYM' +constexpr uint32_t GSYM_CIGAM = 0x4d595347; // 'MYSG' +constexpr uint32_t GSYM_VERSION = 1; +constexpr size_t GSYM_MAX_UUID_SIZE = 20; + +/// The GSYM header. +/// +/// The GSYM header is found at the start of a stand alone GSYM file, or as +/// the first bytes in a section when GSYM is contained in a section of an +/// executable file (ELF, mach-o, COFF). +/// +/// The structure is encoded exactly as it appears in the structure definition +/// with no gaps between members. Alignment should not change from system to +/// system as the members were laid out so that they shouldn't align +/// differently on different architectures. +/// +/// When endianness of the system loading a GSYM file matches, the file can +/// be mmap'ed in and a pointer to the header can be cast to the first bytes +/// of the file (stand alone GSYM file) or section data (GSYM in a section). +/// When endianness is swapped, the Header::decode() function should be used to +/// decode the header. +struct Header { + /// The magic bytes should be set to GSYM_MAGIC. This helps detect if a file + /// is a GSYM file by scanning the first 4 bytes of a file or section. + /// This value might appear byte swapped + uint32_t Magic; + /// The version can number determines how the header is decoded and how each + /// InfoType in FunctionInfo is encoded/decoded. As version numbers increase, + /// "Magic" and "Version" members should always appear at offset zero and 4 + /// respectively to ensure clients figure out if they can parse the format. + uint16_t Version; + /// The size in bytes of each address offset in the address offsets table. + uint8_t AddrOffSize; + /// The size in bytes of the UUID encoded in the "UUID" member. + uint8_t UUIDSize; + /// The 64 bit base address that all address offsets in the address offsets + /// table are relative to. Storing a full 64 bit address allows our address + /// offsets table to be smaller on disk. + uint64_t BaseAddress; + /// The number of addresses stored in the address offsets table. + uint32_t NumAddresses; + /// The file relative offset of the start of the string table for strings + /// contained in the GSYM file. If the GSYM in contained in a stand alone + /// file this will be the file offset of the start of the string table. If + /// the GSYM is contained in a section within an executable file, this can + /// be the offset of the first string used in the GSYM file and can possibly + /// span one or more executable string tables. This allows the strings to + /// share string tables in an ELF or mach-o file. + uint32_t StrtabOffset; + /// The size in bytes of the string table. For a stand alone GSYM file, this + /// will be the exact size in bytes of the string table. When the GSYM data + /// is in a section within an executable file, this size can span one or more + /// sections that contains strings. This allows any strings that are already + /// stored in the executable file to be re-used, and any extra strings could + /// be added to another string table and the string table offset and size + /// can be set to span all needed string tables. + uint32_t StrtabSize; + /// The UUID of the original executable file. This is stored to allow + /// matching a GSYM file to an executable file when symbolication is + /// required. Only the first "UUIDSize" bytes of the UUID are valid. Any + /// bytes in the UUID value that appear after the first UUIDSize bytes should + /// be set to zero. + uint8_t UUID[GSYM_MAX_UUID_SIZE]; + + /// Check if a header is valid and return an error if anything is wrong. + /// + /// This function can be used prior to encoding a header to ensure it is + /// valid, or after decoding a header to ensure it is valid and supported. + /// + /// Check a correctly byte swapped header for errors: + /// - check magic value + /// - check that version number is supported + /// - check that the address offset size is supported + /// - check that the UUID size is valid + /// + /// \returns An error if anything is wrong in the header, or Error::success() + /// if there are no errors. + llvm::Error checkForError() const; + + /// Decode an object from a binary data stream. + /// + /// \param Data The binary stream to read the data from. This object must + /// have the data for the object starting at offset zero. The data + /// can contain more data than needed. + /// + /// \returns A Header or an error describing the issue that was + /// encountered during decoding. + static llvm::Expected
decode(DataExtractor &Data); + + /// Encode this object into FileWriter stream. + /// + /// \param O The binary stream to write the data to at the current file + /// position. + /// + /// \returns An error object that indicates success or failure of the + /// encoding process. + llvm::Error encode(FileWriter &O) const; +}; + +bool operator==(const Header &LHS, const Header &RHS); +raw_ostream &operator<<(raw_ostream &OS, const llvm::gsym::Header &H); + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_HEADER_H diff --git a/include/llvm/DebugInfo/GSYM/InlineInfo.h b/include/llvm/DebugInfo/GSYM/InlineInfo.h index 22243062293..48fd9a7c130 100644 --- a/include/llvm/DebugInfo/GSYM/InlineInfo.h +++ b/include/llvm/DebugInfo/GSYM/InlineInfo.h @@ -1,9 +1,8 @@ //===- InlineInfo.h ---------------------------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -12,6 +11,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/Support/Error.h" #include #include @@ -31,6 +31,30 @@ namespace gsym { /// Any clients that encode information will need to ensure the ranges are /// all contined correctly or lookups could fail. Add ranges in these objects /// must be contained in the top level FunctionInfo address ranges as well. +/// +/// ENCODING +/// +/// When saved to disk, the inline info encodes all ranges to be relative to +/// a parent address range. This will be the FunctionInfo's start address if +/// the InlineInfo is directly contained in a FunctionInfo, or a the start +/// address of the containing parent InlineInfo's first "Ranges" member. This +/// allows address ranges to be efficiently encoded using ULEB128 encodings as +/// we encode the offset and size of each range instead of full addresses. This +/// also makes any encoded addresses easy to relocate as we just need to +/// relocate the FunctionInfo's start address. +/// +/// - The AddressRanges member "Ranges" is encoded using an approriate base +/// address as described above. +/// - UINT8 boolean value that specifies if the InlineInfo object has children. +/// - UINT32 string table offset that points to the name of the inline +/// function. +/// - ULEB128 integer that specifies the file of the call site that called +/// this function. +/// - ULEB128 integer that specifies the source line of the call site that +/// called this function. +/// - if this object has children, enocode each child InlineInfo using the +/// the first address range's start address as the base address. +/// struct InlineInfo { uint32_t Name; ///< String table offset in the string table. @@ -62,6 +86,37 @@ struct InlineInfo { /// \returns optional vector of InlineInfo objects that describe the /// inline call stack for a given address, false otherwise. llvm::Optional getInlineStack(uint64_t Addr) const; + + /// Decode an InlineInfo object from a binary data stream. + /// + /// \param Data The binary stream to read the data from. This object must + /// have the data for the InlineInfo object starting at offset zero. The data + /// can contain more data than needed. + /// + /// \param BaseAddr The base address to use when decoding all address ranges. + /// This will be the FunctionInfo's start address if this object is directly + /// contained in a FunctionInfo object, or the start address of the first + /// address range in an InlineInfo object of this object is a child of + /// another InlineInfo object. + /// \returns An InlineInfo or an error describing the issue that was + /// encountered during decoding. + static llvm::Expected decode(DataExtractor &Data, + uint64_t BaseAddr); + + /// Encode this InlineInfo object into FileWriter stream. + /// + /// \param O The binary stream to write the data to at the current file + /// position. + /// + /// \param BaseAddr The base address to use when encoding all address ranges. + /// This will be the FunctionInfo's start address if this object is directly + /// contained in a FunctionInfo object, or the start address of the first + /// address range in an InlineInfo object of this object is a child of + /// another InlineInfo object. + /// + /// \returns An error object that indicates success or failure or the + /// encoding process. + llvm::Error encode(FileWriter &O, uint64_t BaseAddr) const; }; inline bool operator==(const InlineInfo &LHS, const InlineInfo &RHS) { diff --git a/include/llvm/DebugInfo/GSYM/LineEntry.h b/include/llvm/DebugInfo/GSYM/LineEntry.h index 6b9380940bd..aac7c48e067 100644 --- a/include/llvm/DebugInfo/GSYM/LineEntry.h +++ b/include/llvm/DebugInfo/GSYM/LineEntry.h @@ -1,9 +1,8 @@ //===- LineEntry.h ----------------------------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/include/llvm/DebugInfo/GSYM/LineTable.h b/include/llvm/DebugInfo/GSYM/LineTable.h new file mode 100644 index 00000000000..3cdbccb08ce --- /dev/null +++ b/include/llvm/DebugInfo/GSYM/LineTable.h @@ -0,0 +1,198 @@ +//===- LineTable.h ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_LINETABLE_H +#define LLVM_DEBUGINFO_GSYM_LINETABLE_H + +#include "llvm/DebugInfo/GSYM/LineEntry.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace llvm { +namespace gsym { + +struct FunctionInfo; +class FileWriter; + +/// LineTable class contains deserialized versions of line tables for each +/// function's address ranges. +/// +/// When saved to disk, the line table is encoded using a modified version of +/// the DWARF line tables that only tracks address to source file and line. +/// +/// ENCODING +/// +/// The line table starts with a small prolog that contains the following +/// values: +/// +/// ENCODING NAME DESCRIPTION +/// ======== =========== ==================================================== +/// SLEB MinDelta The min line delta for special opcodes that advance +/// the address and line number. +/// SLEB MaxDelta The max line delta for single byte opcodes that +/// advance the address and line number. +/// ULEB FirstLine The value of the first source line number to +/// initialize the LineEntry with. +/// +/// Once these prolog items are read, we initialize a LineEntry struct with +/// the start address of the function from the FunctionInfo's address range, +/// a default file index of 1, and the line number set to "FirstLine" from +/// the prolog above: +/// +/// LineEntry Row(BaseAddr, 1, FirstLine); +/// +/// The line table state machine is now initialized and ready to be parsed. +/// The stream that follows this encodes the line entries in a compact +/// form. Some opcodes cause "Row" to be modified and some opcodes may also +/// push "Row" onto the end of the "LineTable.Lines" vector. The end result +/// is a vector of LineEntry structs that is sorted in ascending address +/// order. +/// +/// NORMAL OPCODES +/// +/// The opcodes 0 through 3 are normal in opcodes. Their encoding and +/// descriptions are listed below: +/// +/// ENCODING ENUMERATION VALUE DESCRIPTION +/// ======== ================ ===== ======================================== +/// LTOC_EndSequence 0x00 Parsing is done. +/// ULEB LTOC_SetFile 0x01 Row.File = ULEB +/// ULEB LTOC_AdvancePC 0x02 Row.Addr += ULEB, push "Row". +/// SLEB LTOC_AdvanceLine 0x03 Row.Line += SLEB +/// LTOC_FirstSpecial 0x04 First special opcode (see SPECIAL +/// OPCODES below). +/// +/// SPECIAL OPCODES +/// +/// Opcodes LTOC_FirstSpecial through 255 are special opcodes that always +/// increment both the Row.Addr and Row.Line and push "Row" onto the +/// LineEntry.Lines array. They do this by using some of the bits to +/// increment/decrement the source line number, and some of the bits to +/// increment the address. Line numbers can go up or down when making line +/// tables, where addresses always only increase since line tables are sorted +/// by address. +/// +/// In order to calculate the amount to increment the line and address for +/// these special opcodes, we calculate the number of values reserved for the +/// line increment/decrement using the "MinDelta" and "MaxDelta" from the +/// prolog: +/// +/// const int64_t LineRange = MaxDelta - MinDelta + 1; +/// +/// Then we can adjust the opcode to not include any of the normal opcodes: +/// +/// const uint8_t AdjustedOp = Opcode - LTOC_FirstSpecial; +/// +/// And we can calculate the line offset, and address offset: +/// +/// const int64_t LineDelta = MinDelta + (AdjustedOp % LineRange); +/// const uint64_t AddrDelta = (AdjustedOp / LineRange); +/// +/// And use these to modify our "Row": +/// +/// Row.Line += LineDelta; +/// Row.Addr += AddrDelta; +/// +/// And push a row onto the line table: +/// +/// Lines.push_back(Row); +/// +/// This is verify similar to the way that DWARF encodes its line tables. The +/// only difference is the DWARF line tables have more normal opcodes and the +/// "Row" contains more members, like source column number, bools for end of +/// prologue, beginnging of epilogue, is statement and many others. There are +/// also more complex rules that happen for the extra normal opcodes. By +/// leaving these extra opcodes out, we leave more bits for the special +/// opcodes that allows us to encode line tables in fewer bytes than standard +/// DWARF encodings. +/// +/// Opcodes that will push "Row" onto the LineEntry.Lines include the +/// LTOC_AdvancePC opcode and all special opcodes. All other opcodes +/// only modify the current "Row", or cause the line table to end. +class LineTable { + typedef std::vector Collection; + Collection Lines; ///< All line entries in the line table. +public: + static LineEntry lookup(DataExtractor &Data, uint64_t BaseAddr, + uint64_t Addr); + + /// Decode an LineTable object from a binary data stream. + /// + /// \param Data The binary stream to read the data from. This object must + /// have the data for the LineTable object starting at offset zero. The data + /// can contain more data than needed. + /// + /// \param BaseAddr The base address to use when decoding the line table. + /// This will be the FunctionInfo's start address and will be used to + /// initialize the line table row prior to parsing any opcodes. + /// + /// \returns An LineTable or an error describing the issue that was + /// encountered during decoding. + static llvm::Expected decode(DataExtractor &Data, + uint64_t BaseAddr); + /// Encode this LineTable object into FileWriter stream. + /// + /// \param O The binary stream to write the data to at the current file + /// position. + /// + /// \param BaseAddr The base address to use when decoding the line table. + /// This will be the FunctionInfo's start address. + /// + /// \returns An error object that indicates success or failure or the + /// encoding process. + llvm::Error encode(FileWriter &O, uint64_t BaseAddr) const; + bool empty() const { return Lines.empty(); } + void clear() { Lines.clear(); } + void push(const LineEntry &LE) { + Lines.push_back(LE); + } + size_t isValid() const { + return !Lines.empty(); + } + size_t size() const { + return Lines.size(); + } + LineEntry &get(size_t i) { + assert(i < Lines.size()); + return Lines[i]; + } + const LineEntry &get(size_t i) const { + assert(i < Lines.size()); + return Lines[i]; + } + LineEntry &operator[](size_t i) { + return get(i); + } + const LineEntry &operator[](size_t i) const { + return get(i); + } + bool operator==(const LineTable &RHS) const { + return Lines == RHS.Lines; + } + bool operator!=(const LineTable &RHS) const { + return Lines != RHS.Lines; + } + bool operator<(const LineTable &RHS) const { + const auto LHSSize = Lines.size(); + const auto RHSSize = RHS.Lines.size(); + if (LHSSize == RHSSize) + return Lines < RHS.Lines; + return LHSSize < RHSSize; + } + Collection::const_iterator begin() const { return Lines.begin(); } + Collection::const_iterator end() const { return Lines.end(); } + +}; + +raw_ostream &operator<<(raw_ostream &OS, const gsym::LineTable <); + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_LINETABLE_H diff --git a/include/llvm/DebugInfo/GSYM/Range.h b/include/llvm/DebugInfo/GSYM/Range.h index 772ff244c5b..37cfec713f2 100644 --- a/include/llvm/DebugInfo/GSYM/Range.h +++ b/include/llvm/DebugInfo/GSYM/Range.h @@ -1,9 +1,8 @@ -//===- AddressRange.h -------------------------------------------*- C++ -*-===// +//===- Range.h --------------------------------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -21,10 +20,13 @@ #define HEX64(v) llvm::format_hex(v, 18) namespace llvm { +class DataExtractor; class raw_ostream; namespace gsym { +class FileWriter; + /// A class that represents an address range. The range is specified using /// a start and an end address. struct AddressRange { @@ -47,6 +49,18 @@ struct AddressRange { bool operator<(const AddressRange &R) const { return std::make_pair(Start, End) < std::make_pair(R.Start, R.End); } + /// AddressRange objects are encoded and decoded to be relative to a base + /// address. This will be the FunctionInfo's start address if the AddressRange + /// is directly contained in a FunctionInfo, or a base address of the + /// containing parent AddressRange or AddressRanges. This allows address + /// ranges to be efficiently encoded using ULEB128 encodings as we encode the + /// offset and size of each range instead of full addresses. This also makes + /// encoded addresses easy to relocate as we just need to relocate one base + /// address. + /// @{ + void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset); + void encode(FileWriter &O, uint64_t BaseAddr) const; + /// @} }; raw_ostream &operator<<(raw_ostream &OS, const AddressRange &R); @@ -66,6 +80,7 @@ public: void clear() { Ranges.clear(); } bool empty() const { return Ranges.empty(); } bool contains(uint64_t Addr) const; + bool contains(AddressRange Range) const; void insert(AddressRange Range); size_t size() const { return Ranges.size(); } bool operator==(const AddressRanges &RHS) const { @@ -77,6 +92,14 @@ public: } Collection::const_iterator begin() const { return Ranges.begin(); } Collection::const_iterator end() const { return Ranges.end(); } + + /// Address ranges are decoded and encoded to be relative to a base address. + /// See the AddressRange comment for the encode and decode methods for full + /// details. + /// @{ + void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset); + void encode(FileWriter &O, uint64_t BaseAddr) const; + /// @} }; raw_ostream &operator<<(raw_ostream &OS, const AddressRanges &AR); diff --git a/include/llvm/DebugInfo/GSYM/StringTable.h b/include/llvm/DebugInfo/GSYM/StringTable.h index 0001b8b8274..a96ae5899da 100644 --- a/include/llvm/DebugInfo/GSYM/StringTable.h +++ b/include/llvm/DebugInfo/GSYM/StringTable.h @@ -1,9 +1,8 @@ //===- StringTable.h --------------------------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/include/llvm/DebugInfo/PDB/GenericError.h b/include/llvm/DebugInfo/PDB/GenericError.h index ec85d92d2a9..af93be931b8 100644 --- a/include/llvm/DebugInfo/PDB/GenericError.h +++ b/include/llvm/DebugInfo/PDB/GenericError.h @@ -20,7 +20,7 @@ enum class pdb_error_code { dia_sdk_not_present, dia_failed_loading, signature_out_of_date, - external_cmdline_ref, + no_matching_pch, unspecified, }; } // namespace pdb diff --git a/include/llvm/DebugInfo/PDB/Native/SymbolCache.h b/include/llvm/DebugInfo/PDB/Native/SymbolCache.h index 0b15ab474f7..4adf3b394c2 100644 --- a/include/llvm/DebugInfo/PDB/Native/SymbolCache.h +++ b/include/llvm/DebugInfo/PDB/Native/SymbolCache.h @@ -87,7 +87,7 @@ public: // Initial construction must not access the cache, since it must be done // atomically. - auto Result = llvm::make_unique( + auto Result = std::make_unique( Session, Id, std::forward(ConstructorArgs)...); Result->SymbolId = Id; diff --git a/include/llvm/DebugInfo/PDB/PDBSymbol.h b/include/llvm/DebugInfo/PDB/PDBSymbol.h index d9004a8894d..0d95a246755 100644 --- a/include/llvm/DebugInfo/PDB/PDBSymbol.h +++ b/include/llvm/DebugInfo/PDB/PDBSymbol.h @@ -131,7 +131,7 @@ public: auto BaseIter = RawSymbol->findChildren(T::Tag); if (!BaseIter) return nullptr; - return llvm::make_unique>(std::move(BaseIter)); + return std::make_unique>(std::move(BaseIter)); } std::unique_ptr findAllChildren(PDB_SymType Type) const; std::unique_ptr findAllChildren() const; diff --git a/include/llvm/DebugInfo/Symbolize/Symbolize.h b/include/llvm/DebugInfo/Symbolize/Symbolize.h index d3da28ca0b7..11599fc1797 100644 --- a/include/llvm/DebugInfo/Symbolize/Symbolize.h +++ b/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -39,6 +39,7 @@ public: bool UseSymbolTable = true; bool Demangle = true; bool RelativeAddresses = false; + bool UntagAddresses = false; std::string DefaultArch; std::vector DsymHints; std::string FallbackDebugPath; diff --git a/include/llvm/Demangle/Demangle.h b/include/llvm/Demangle/Demangle.h index 6fea7ef13f1..7b85b9a9ccf 100644 --- a/include/llvm/Demangle/Demangle.h +++ b/include/llvm/Demangle/Demangle.h @@ -32,7 +32,14 @@ char *itaniumDemangle(const char *mangled_name, char *buf, size_t *n, int *status); -enum MSDemangleFlags { MSDF_None = 0, MSDF_DumpBackrefs = 1 << 0 }; +enum MSDemangleFlags { + MSDF_None = 0, + MSDF_DumpBackrefs = 1 << 0, + MSDF_NoAccessSpecifier = 1 << 1, + MSDF_NoCallingConvention = 1 << 2, + MSDF_NoReturnType = 1 << 3, + MSDF_NoMemberType = 1 << 4, +}; char *microsoftDemangle(const char *mangled_name, char *buf, size_t *n, int *status, MSDemangleFlags Flags = MSDF_None); diff --git a/include/llvm/Demangle/DemangleConfig.h b/include/llvm/Demangle/DemangleConfig.h index 73f89d357c8..b7b7dbd24c7 100644 --- a/include/llvm/Demangle/DemangleConfig.h +++ b/include/llvm/Demangle/DemangleConfig.h @@ -15,13 +15,6 @@ #ifndef LLVM_DEMANGLE_COMPILER_H #define LLVM_DEMANGLE_COMPILER_H -#ifdef _MSC_VER -// snprintf is implemented in VS 2015 -#if _MSC_VER < 1900 -#define snprintf _snprintf_s -#endif -#endif - #ifndef __has_feature #define __has_feature(x) 0 #endif diff --git a/include/llvm/Demangle/ItaniumDemangle.h b/include/llvm/Demangle/ItaniumDemangle.h index aaccb27e17a..7784e842bfe 100644 --- a/include/llvm/Demangle/ItaniumDemangle.h +++ b/include/llvm/Demangle/ItaniumDemangle.h @@ -57,6 +57,11 @@ X(LocalName) \ X(VectorType) \ X(PixelVectorType) \ + X(SyntheticTemplateParamName) \ + X(TypeTemplateParamDecl) \ + X(NonTypeTemplateParamDecl) \ + X(TemplateTemplateParamDecl) \ + X(TemplateParamPackDecl) \ X(ParameterPack) \ X(TemplateArgumentPack) \ X(ParameterPackExpansion) \ @@ -91,6 +96,8 @@ X(ThrowExpr) \ X(UUIDOfExpr) \ X(BoolExpr) \ + X(StringLiteral) \ + X(LambdaExpr) \ X(IntegerCastExpr) \ X(IntegerLiteral) \ X(FloatLiteral) \ @@ -303,7 +310,7 @@ inline Qualifiers operator|=(Qualifiers &Q1, Qualifiers Q2) { return Q1 = static_cast(Q1 | Q2); } -class QualType : public Node { +class QualType final : public Node { protected: const Qualifiers Quals; const Node *Child; @@ -964,6 +971,127 @@ public: } }; +enum class TemplateParamKind { Type, NonType, Template }; + +/// An invented name for a template parameter for which we don't have a +/// corresponding template argument. +/// +/// This node is created when parsing the for a lambda with +/// explicit template arguments, which might be referenced in the parameter +/// types appearing later in the . +class SyntheticTemplateParamName final : public Node { + TemplateParamKind Kind; + unsigned Index; + +public: + SyntheticTemplateParamName(TemplateParamKind Kind_, unsigned Index_) + : Node(KSyntheticTemplateParamName), Kind(Kind_), Index(Index_) {} + + template void match(Fn F) const { F(Kind, Index); } + + void printLeft(OutputStream &S) const override { + switch (Kind) { + case TemplateParamKind::Type: + S += "$T"; + break; + case TemplateParamKind::NonType: + S += "$N"; + break; + case TemplateParamKind::Template: + S += "$TT"; + break; + } + if (Index > 0) + S << Index - 1; + } +}; + +/// A template type parameter declaration, 'typename T'. +class TypeTemplateParamDecl final : public Node { + Node *Name; + +public: + TypeTemplateParamDecl(Node *Name_) + : Node(KTypeTemplateParamDecl, Cache::Yes), Name(Name_) {} + + template void match(Fn F) const { F(Name); } + + void printLeft(OutputStream &S) const override { + S += "typename "; + } + + void printRight(OutputStream &S) const override { + Name->print(S); + } +}; + +/// A non-type template parameter declaration, 'int N'. +class NonTypeTemplateParamDecl final : public Node { + Node *Name; + Node *Type; + +public: + NonTypeTemplateParamDecl(Node *Name_, Node *Type_) + : Node(KNonTypeTemplateParamDecl, Cache::Yes), Name(Name_), Type(Type_) {} + + template void match(Fn F) const { F(Name, Type); } + + void printLeft(OutputStream &S) const override { + Type->printLeft(S); + if (!Type->hasRHSComponent(S)) + S += " "; + } + + void printRight(OutputStream &S) const override { + Name->print(S); + Type->printRight(S); + } +}; + +/// A template template parameter declaration, +/// 'template typename N'. +class TemplateTemplateParamDecl final : public Node { + Node *Name; + NodeArray Params; + +public: + TemplateTemplateParamDecl(Node *Name_, NodeArray Params_) + : Node(KTemplateTemplateParamDecl, Cache::Yes), Name(Name_), + Params(Params_) {} + + template void match(Fn F) const { F(Name, Params); } + + void printLeft(OutputStream &S) const override { + S += "template<"; + Params.printWithComma(S); + S += "> typename "; + } + + void printRight(OutputStream &S) const override { + Name->print(S); + } +}; + +/// A template parameter pack declaration, 'typename ...T'. +class TemplateParamPackDecl final : public Node { + Node *Param; + +public: + TemplateParamPackDecl(Node *Param_) + : Node(KTemplateParamPackDecl, Cache::Yes), Param(Param_) {} + + template void match(Fn F) const { F(Param); } + + void printLeft(OutputStream &S) const override { + Param->printLeft(S); + S += "..."; + } + + void printRight(OutputStream &S) const override { + Param->printRight(S); + } +}; + /// An unexpanded parameter pack (either in the expression or type context). If /// this AST is correct, this node will have a ParameterPackExpansion node above /// it. @@ -1410,21 +1538,36 @@ public: }; class ClosureTypeName : public Node { + NodeArray TemplateParams; NodeArray Params; StringView Count; public: - ClosureTypeName(NodeArray Params_, StringView Count_) - : Node(KClosureTypeName), Params(Params_), Count(Count_) {} + ClosureTypeName(NodeArray TemplateParams_, NodeArray Params_, + StringView Count_) + : Node(KClosureTypeName), TemplateParams(TemplateParams_), + Params(Params_), Count(Count_) {} - template void match(Fn F) const { F(Params, Count); } + template void match(Fn F) const { + F(TemplateParams, Params, Count); + } + + void printDeclarator(OutputStream &S) const { + if (!TemplateParams.empty()) { + S += "<"; + TemplateParams.printWithComma(S); + S += ">"; + } + S += "("; + Params.printWithComma(S); + S += ")"; + } void printLeft(OutputStream &S) const override { S += "\'lambda"; S += Count; - S += "\'("; - Params.printWithComma(S); - S += ")"; + S += "\'"; + printDeclarator(S); } }; @@ -1902,6 +2045,37 @@ public: } }; +class StringLiteral : public Node { + const Node *Type; + +public: + StringLiteral(const Node *Type_) : Node(KStringLiteral), Type(Type_) {} + + template void match(Fn F) const { F(Type); } + + void printLeft(OutputStream &S) const override { + S += "\"<"; + Type->print(S); + S += ">\""; + } +}; + +class LambdaExpr : public Node { + const Node *Type; + +public: + LambdaExpr(const Node *Type_) : Node(KLambdaExpr), Type(Type_) {} + + template void match(Fn F) const { F(Type); } + + void printLeft(OutputStream &S) const override { + S += "[]"; + if (Type->getKind() == KClosureTypeName) + static_cast(Type)->printDeclarator(S); + S += "{...}"; + } +}; + class IntegerCastExpr : public Node { // ty(integer) const Node *Ty; @@ -2167,10 +2341,36 @@ template struct AbstractManglingParser { // table. PODSmallVector Subs; + using TemplateParamList = PODSmallVector; + + class ScopedTemplateParamList { + AbstractManglingParser *Parser; + size_t OldNumTemplateParamLists; + TemplateParamList Params; + + public: + ScopedTemplateParamList(AbstractManglingParser *Parser) + : Parser(Parser), + OldNumTemplateParamLists(Parser->TemplateParams.size()) { + Parser->TemplateParams.push_back(&Params); + } + ~ScopedTemplateParamList() { + assert(Parser->TemplateParams.size() >= OldNumTemplateParamLists); + Parser->TemplateParams.dropBack(OldNumTemplateParamLists); + } + }; + // Template parameter table. Like the above, but referenced like "T42_". // This has a smaller size compared to Subs and Names because it can be // stored on the stack. - PODSmallVector TemplateParams; + TemplateParamList OuterTemplateParams; + + // Lists of template parameters indexed by template parameter depth, + // referenced like "TL2_4_". If nonempty, element 0 is always + // OuterTemplateParams; inner elements are always template parameter lists of + // lambda expressions. For a generic lambda with no explicit template + // parameter list, the corresponding parameter list pointer will be null. + PODSmallVector TemplateParams; // Set of unresolved forward references. These can occur in a // conversion operator's type, and are resolved in the enclosing . @@ -2178,7 +2378,9 @@ template struct AbstractManglingParser { bool TryToParseTemplateArgs = true; bool PermitForwardTemplateReferences = false; - bool ParsingLambdaParams = false; + size_t ParsingLambdaParamsAtLevel = (size_t)-1; + + unsigned NumSyntheticTemplateParameters[3] = {}; Alloc ASTAllocator; @@ -2193,9 +2395,11 @@ template struct AbstractManglingParser { Names.clear(); Subs.clear(); TemplateParams.clear(); - ParsingLambdaParams = false; + ParsingLambdaParamsAtLevel = (size_t)-1; TryToParseTemplateArgs = true; PermitForwardTemplateReferences = false; + for (int I = 0; I != 3; ++I) + NumSyntheticTemplateParameters[I] = 0; ASTAllocator.reset(); } @@ -2253,6 +2457,7 @@ template struct AbstractManglingParser { bool parseSeqId(size_t *Out); Node *parseSubstitution(); Node *parseTemplateParam(); + Node *parseTemplateParamDecl(); Node *parseTemplateArgs(bool TagTemplates = false); Node *parseTemplateArg(); @@ -2301,9 +2506,10 @@ template struct AbstractManglingParser { size_t E = ForwardTemplateRefs.size(); for (; I < E; ++I) { size_t Idx = ForwardTemplateRefs[I]->Index; - if (Idx >= TemplateParams.size()) + if (TemplateParams.empty() || !TemplateParams[0] || + Idx >= TemplateParams[0]->size()) return true; - ForwardTemplateRefs[I]->Ref = TemplateParams[Idx]; + ForwardTemplateRefs[I]->Ref = (*TemplateParams[0])[Idx]; } ForwardTemplateRefs.dropBack(State.ForwardTemplateRefsBegin); return false; @@ -2470,7 +2676,12 @@ AbstractManglingParser::parseUnqualifiedName(NameState *State) { // ::= + # Parameter types or "v" if the lambda has no parameters template Node * -AbstractManglingParser::parseUnnamedTypeName(NameState *) { +AbstractManglingParser::parseUnnamedTypeName(NameState *State) { + // refer to the innermost . Clear out any + // outer args that we may have inserted into TemplateParams. + if (State != nullptr) + TemplateParams.clear(); + if (consumeIf("Ut")) { StringView Count = parseNumber(); if (!consumeIf('_')) @@ -2478,22 +2689,59 @@ AbstractManglingParser::parseUnnamedTypeName(NameState *) { return make(Count); } if (consumeIf("Ul")) { - NodeArray Params; - SwapAndRestore SwapParams(ParsingLambdaParams, true); + SwapAndRestore SwapParams(ParsingLambdaParamsAtLevel, + TemplateParams.size()); + ScopedTemplateParamList LambdaTemplateParams(this); + + size_t ParamsBegin = Names.size(); + while (look() == 'T' && + StringView("yptn").find(look(1)) != StringView::npos) { + Node *T = parseTemplateParamDecl(); + if (!T) + return nullptr; + Names.push_back(T); + } + NodeArray TempParams = popTrailingNodeArray(ParamsBegin); + + // FIXME: If TempParams is empty and none of the function parameters + // includes 'auto', we should remove LambdaTemplateParams from the + // TemplateParams list. Unfortunately, we don't find out whether there are + // any 'auto' parameters until too late in an example such as: + // + // template void f( + // decltype([](decltype([](T v) {}), + // auto) {})) {} + // template void f( + // decltype([](decltype([](T w) {}), + // int) {})) {} + // + // Here, the type of v is at level 2 but the type of w is at level 1. We + // don't find this out until we encounter the type of the next parameter. + // + // However, compilers can't actually cope with the former example in + // practice, and it's likely to be made ill-formed in future, so we don't + // need to support it here. + // + // If we encounter an 'auto' in the function parameter types, we will + // recreate a template parameter scope for it, but any intervening lambdas + // will be parsed in the 'wrong' template parameter depth. + if (TempParams.empty()) + TemplateParams.pop_back(); + if (!consumeIf("vE")) { - size_t ParamsBegin = Names.size(); do { Node *P = getDerived().parseType(); if (P == nullptr) return nullptr; Names.push_back(P); } while (!consumeIf('E')); - Params = popTrailingNodeArray(ParamsBegin); } + NodeArray Params = popTrailingNodeArray(ParamsBegin); + StringView Count = parseNumber(); if (!consumeIf('_')) return nullptr; - return make(Params, Count); + return make(TempParams, Params, Count); } if (consumeIf("Ub")) { (void)parseNumber(); @@ -3949,6 +4197,7 @@ Node *AbstractManglingParser::parseConversionExpr() { // ::= L E # floating literal // ::= L E # string literal // ::= L E # nullptr literal (i.e., "LDnE") +// ::= L E # lambda expression // FIXME: ::= L _ E # complex floating point literal (C 2000) // ::= L E # external name template @@ -4020,24 +4269,43 @@ Node *AbstractManglingParser::parseExprPrimary() { return R; } return nullptr; + case 'A': { + Node *T = getDerived().parseType(); + if (T == nullptr) + return nullptr; + // FIXME: We need to include the string contents in the mangling. + if (consumeIf('E')) + return make(T); + return nullptr; + } + case 'D': + if (consumeIf("DnE")) + return make("nullptr"); + return nullptr; case 'T': // Invalid mangled name per // http://sourcerytools.com/pipermail/cxx-abi-dev/2011-August/002422.html return nullptr; + case 'U': { + // FIXME: Should we support LUb... for block literals? + if (look(1) != 'l') + return nullptr; + Node *T = parseUnnamedTypeName(nullptr); + if (!T || !consumeIf('E')) + return nullptr; + return make(T); + } default: { // might be named type Node *T = getDerived().parseType(); if (T == nullptr) return nullptr; StringView N = parseNumber(); - if (!N.empty()) { - if (!consumeIf('E')) - return nullptr; - return make(T, N); - } - if (consumeIf('E')) - return T; - return nullptr; + if (N.empty()) + return nullptr; + if (!consumeIf('E')) + return nullptr; + return make(T, N); } } } @@ -5062,11 +5330,22 @@ Node *AbstractManglingParser::parseSubstitution() { // ::= T_ # first template parameter // ::= T _ +// ::= TL __ +// ::= TL _ _ template Node *AbstractManglingParser::parseTemplateParam() { if (!consumeIf('T')) return nullptr; + size_t Level = 0; + if (consumeIf('L')) { + if (parsePositiveInteger(&Level)) + return nullptr; + ++Level; + if (!consumeIf('_')) + return nullptr; + } + size_t Index = 0; if (!consumeIf('_')) { if (parsePositiveInteger(&Index)) @@ -5076,15 +5355,11 @@ Node *AbstractManglingParser::parseTemplateParam() { return nullptr; } - // Itanium ABI 5.1.8: In a generic lambda, uses of auto in the parameter list - // are mangled as the corresponding artificial template type parameter. - if (ParsingLambdaParams) - return make("auto"); - // If we're in a context where this refers to a // further ahead in the mangled name (currently just conversion // operator types), then we should only look it up in the right context. - if (PermitForwardTemplateReferences) { + // This can only happen at the outermost level. + if (PermitForwardTemplateReferences && Level == 0) { Node *ForwardRef = make(Index); if (!ForwardRef) return nullptr; @@ -5094,9 +5369,78 @@ Node *AbstractManglingParser::parseTemplateParam() { return ForwardRef; } - if (Index >= TemplateParams.size()) + if (Level >= TemplateParams.size() || !TemplateParams[Level] || + Index >= TemplateParams[Level]->size()) { + // Itanium ABI 5.1.8: In a generic lambda, uses of auto in the parameter + // list are mangled as the corresponding artificial template type parameter. + if (ParsingLambdaParamsAtLevel == Level && Level <= TemplateParams.size()) { + // This will be popped by the ScopedTemplateParamList in + // parseUnnamedTypeName. + if (Level == TemplateParams.size()) + TemplateParams.push_back(nullptr); + return make("auto"); + } + return nullptr; - return TemplateParams[Index]; + } + + return (*TemplateParams[Level])[Index]; +} + +// ::= Ty # type parameter +// ::= Tn # non-type parameter +// ::= Tt * E # template parameter +// ::= Tp # parameter pack +template +Node *AbstractManglingParser::parseTemplateParamDecl() { + auto InventTemplateParamName = [&](TemplateParamKind Kind) { + unsigned Index = NumSyntheticTemplateParameters[(int)Kind]++; + Node *N = make(Kind, Index); + if (N) TemplateParams.back()->push_back(N); + return N; + }; + + if (consumeIf("Ty")) { + Node *Name = InventTemplateParamName(TemplateParamKind::Type); + if (!Name) + return nullptr; + return make(Name); + } + + if (consumeIf("Tn")) { + Node *Name = InventTemplateParamName(TemplateParamKind::NonType); + if (!Name) + return nullptr; + Node *Type = parseType(); + if (!Type) + return nullptr; + return make(Name, Type); + } + + if (consumeIf("Tt")) { + Node *Name = InventTemplateParamName(TemplateParamKind::Template); + if (!Name) + return nullptr; + size_t ParamsBegin = Names.size(); + ScopedTemplateParamList TemplateTemplateParamParams(this); + while (!consumeIf("E")) { + Node *P = parseTemplateParamDecl(); + if (!P) + return nullptr; + Names.push_back(P); + } + NodeArray Params = popTrailingNodeArray(ParamsBegin); + return make(Name, Params); + } + + if (consumeIf("Tp")) { + Node *P = parseTemplateParamDecl(); + if (!P) + return nullptr; + return make(P); + } + + return nullptr; } // ::= # type or template @@ -5153,8 +5497,11 @@ AbstractManglingParser::parseTemplateArgs(bool TagTemplates) { // refer to the innermost . Clear out any // outer args that we may have inserted into TemplateParams. - if (TagTemplates) + if (TagTemplates) { TemplateParams.clear(); + TemplateParams.push_back(&OuterTemplateParams); + OuterTemplateParams.clear(); + } size_t ArgsBegin = Names.size(); while (!consumeIf('E')) { @@ -5172,7 +5519,7 @@ AbstractManglingParser::parseTemplateArgs(bool TagTemplates) { if (!TableEntry) return nullptr; } - TemplateParams.push_back(TableEntry); + TemplateParams.back()->push_back(TableEntry); } else { Node *Arg = getDerived().parseTemplateArg(); if (Arg == nullptr) diff --git a/include/llvm/Demangle/MicrosoftDemangle.h b/include/llvm/Demangle/MicrosoftDemangle.h index 382e79401c4..c6f26061bed 100644 --- a/include/llvm/Demangle/MicrosoftDemangle.h +++ b/include/llvm/Demangle/MicrosoftDemangle.h @@ -158,6 +158,7 @@ private: QualifiedNameNode *QN); SymbolNode *demangleDeclarator(StringView &MangledName); SymbolNode *demangleMD5Name(StringView &MangledName); + SymbolNode *demangleTypeinfoName(StringView &MangledName); VariableSymbolNode *demangleVariableEncoding(StringView &MangledName, StorageClass SC); diff --git a/include/llvm/Demangle/MicrosoftDemangleNodes.h b/include/llvm/Demangle/MicrosoftDemangleNodes.h index da9d9d5bfdc..81b279fe237 100644 --- a/include/llvm/Demangle/MicrosoftDemangleNodes.h +++ b/include/llvm/Demangle/MicrosoftDemangleNodes.h @@ -16,6 +16,8 @@ #include "llvm/Demangle/DemangleConfig.h" #include "llvm/Demangle/StringView.h" #include +#include +#include namespace llvm { namespace itanium_demangle { @@ -73,6 +75,9 @@ enum OutputFlags { OF_Default = 0, OF_NoCallingConvention = 1, OF_NoTagSpecifier = 2, + OF_NoAccessSpecifier = 4, + OF_NoMemberType = 8, + OF_NoReturnType = 16, }; // Types @@ -301,8 +306,6 @@ struct TypeNode : public Node { outputPost(OS, Flags); } - void outputQuals(bool SpaceBefore, bool SpaceAfter) const; - Qualifiers Quals = Q_None; }; diff --git a/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h b/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h index 8d2f641254b..72687682f60 100644 --- a/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h +++ b/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h @@ -22,17 +22,21 @@ namespace llvm { namespace jitlink { /// Registers all FDEs in the given eh-frame section with the current process. -Error registerEHFrameSection(const void *EHFrameSectionAddr); +Error registerEHFrameSection(const void *EHFrameSectionAddr, + size_t EHFrameSectionSize); /// Deregisters all FDEs in the given eh-frame section with the current process. -Error deregisterEHFrameSection(const void *EHFrameSectionAddr); +Error deregisterEHFrameSection(const void *EHFrameSectionAddr, + size_t EHFrameSectionSize); /// Supports registration/deregistration of EH-frames in a target process. class EHFrameRegistrar { public: virtual ~EHFrameRegistrar(); - virtual Error registerEHFrames(JITTargetAddress EHFrameSectionAddr) = 0; - virtual Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr) = 0; + virtual Error registerEHFrames(JITTargetAddress EHFrameSectionAddr, + size_t EHFrameSectionSize) = 0; + virtual Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr, + size_t EHFrameSectionSize) = 0; }; /// Registers / Deregisters EH-frames in the current process. @@ -48,31 +52,38 @@ public: InProcessEHFrameRegistrar(InProcessEHFrameRegistrar &&) = delete; InProcessEHFrameRegistrar &operator=(InProcessEHFrameRegistrar &&) = delete; - Error registerEHFrames(JITTargetAddress EHFrameSectionAddr) override { + Error registerEHFrames(JITTargetAddress EHFrameSectionAddr, + size_t EHFrameSectionSize) override { return registerEHFrameSection( - jitTargetAddressToPointer(EHFrameSectionAddr)); + jitTargetAddressToPointer(EHFrameSectionAddr), + EHFrameSectionSize); } - Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr) override { + Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr, + size_t EHFrameSectionSize) override { return deregisterEHFrameSection( - jitTargetAddressToPointer(EHFrameSectionAddr)); + jitTargetAddressToPointer(EHFrameSectionAddr), + EHFrameSectionSize); } private: InProcessEHFrameRegistrar(); }; -using StoreFrameAddressFunction = std::function; +using StoreFrameRangeFunction = + std::function; -/// Creates a pass that records the address of the EH frame section. If no -/// eh-frame section is found, it will set EHFrameAddr to zero. +/// Creates a pass that records the address and size of the EH frame section. +/// If no eh-frame section is found then the address and size will both be given +/// as zero. /// /// Authors of JITLinkContexts can use this function to register a post-fixup -/// pass that records the address of the eh-frame section. This address can +/// pass that records the range of the eh-frame section. This range can /// be used after finalization to register and deregister the frame. -AtomGraphPassFunction +LinkGraphPassFunction createEHFrameRecorderPass(const Triple &TT, - StoreFrameAddressFunction StoreFrameAddress); + StoreFrameRangeFunction StoreFrameRange); } // end namespace jitlink } // end namespace llvm diff --git a/include/llvm/ExecutionEngine/JITLink/JITLink.h b/include/llvm/ExecutionEngine/JITLink/JITLink.h index be80d44ccf5..b531127cf89 100644 --- a/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -34,6 +34,9 @@ namespace llvm { namespace jitlink { +class Symbol; +class Section; + /// Base class for errors originating in JIT linker, e.g. missing relocation /// support. class JITLinkError : public ErrorInfo { @@ -50,27 +53,22 @@ private: std::string ErrMsg; }; -// Forward declare the Atom class. -class Atom; - -/// Edge class. Represents both object file relocations, as well as layout and -/// keep-alive constraints. +/// Represents fixups and constraints in the LinkGraph. class Edge { public: using Kind = uint8_t; - using GenericEdgeKind = enum : Kind { + enum GenericEdgeKind : Kind { Invalid, // Invalid edge value. FirstKeepAlive, // Keeps target alive. Offset/addend zero. KeepAlive = FirstKeepAlive, // Tag first edge kind that preserves liveness. - LayoutNext, // Layout constraint. Offset/Addend zero. FirstRelocation // First architecture specific relocation. }; using OffsetT = uint32_t; using AddendT = int64_t; - Edge(Kind K, OffsetT Offset, Atom &Target, AddendT Addend) + Edge(Kind K, OffsetT Offset, Symbol &Target, AddendT Addend) : Target(&Target), Offset(Offset), Addend(Addend), K(K) {} OffsetT getOffset() const { return Offset; } @@ -82,169 +80,516 @@ public: return K - FirstRelocation; } bool isKeepAlive() const { return K >= FirstKeepAlive; } - Atom &getTarget() const { return *Target; } - void setTarget(Atom &Target) { this->Target = &Target; } + Symbol &getTarget() const { return *Target; } + void setTarget(Symbol &Target) { this->Target = &Target; } AddendT getAddend() const { return Addend; } void setAddend(AddendT Addend) { this->Addend = Addend; } private: - Atom *Target; - OffsetT Offset; - AddendT Addend; + Symbol *Target = nullptr; + OffsetT Offset = 0; + AddendT Addend = 0; Kind K = 0; }; -using EdgeVector = std::vector; +/// Returns the string name of the given generic edge kind, or "unknown" +/// otherwise. Useful for debugging. +const char *getGenericEdgeKindName(Edge::Kind K); -const StringRef getGenericEdgeKindName(Edge::Kind K); - -/// Base Atom class. Used by absolute and undefined atoms. -class Atom { - friend class AtomGraph; +/// Base class for Addressable entities (externals, absolutes, blocks). +class Addressable { + friend class LinkGraph; protected: - /// Create a named (as yet unresolved) atom. - Atom(StringRef Name) - : Name(Name), IsDefined(false), IsLive(false), ShouldDiscard(false), - IsGlobal(false), IsAbsolute(false), IsCallable(false), - IsExported(false), IsWeak(false), HasLayoutNext(false), - IsCommon(false) {} + Addressable(JITTargetAddress Address, bool IsDefined) + : Address(Address), IsDefined(IsDefined), IsAbsolute(false) {} - /// Create an absolute symbol atom. - Atom(StringRef Name, JITTargetAddress Address) - : Name(Name), Address(Address), IsDefined(true), IsLive(false), - ShouldDiscard(false), IsGlobal(false), IsAbsolute(false), - IsCallable(false), IsExported(false), IsWeak(false), - HasLayoutNext(false), IsCommon(false) {} + Addressable(JITTargetAddress Address) + : Address(Address), IsDefined(false), IsAbsolute(true) { + assert(!(IsDefined && IsAbsolute) && + "Block cannot be both defined and absolute"); + } public: - /// Returns true if this atom has a name. - bool hasName() const { return Name != StringRef(); } + Addressable(const Addressable &) = delete; + Addressable &operator=(const Addressable &) = default; + Addressable(Addressable &&) = delete; + Addressable &operator=(Addressable &&) = default; - /// Returns the name of this atom. - StringRef getName() const { return Name; } - - /// Returns the current target address of this atom. - /// The initial target address (for atoms that have one) will be taken from - /// the input object file's virtual address space. During the layout phase - /// of JIT linking the atom's address will be updated to point to its final - /// address in the JIT'd process. JITTargetAddress getAddress() const { return Address; } - - /// Set the current target address of this atom. void setAddress(JITTargetAddress Address) { this->Address = Address; } - /// Returns true if this is a defined atom. - bool isDefined() const { return IsDefined; } - - /// Returns true if this atom is marked as live. - bool isLive() const { return IsLive; } - - /// Mark this atom as live. - /// - /// Note: Only defined and absolute atoms can be marked live. - void setLive(bool IsLive) { - assert((IsDefined || IsAbsolute || !IsLive) && - "Only defined and absolute atoms can be marked live"); - this->IsLive = IsLive; - } - - /// Returns true if this atom should be discarded during pruning. - bool shouldDiscard() const { return ShouldDiscard; } - - /// Mark this atom to be discarded. - /// - /// Note: Only defined and absolute atoms can be marked live. - void setShouldDiscard(bool ShouldDiscard) { - assert((IsDefined || IsAbsolute || !ShouldDiscard) && - "Only defined and absolute atoms can be marked live"); - this->ShouldDiscard = ShouldDiscard; - } - - /// Returns true if this definition is global (i.e. visible outside this - /// linkage unit). - /// - /// Note: This is distict from Exported, which means visibile outside the - /// JITDylib that this graph is being linked in to. - bool isGlobal() const { return IsGlobal; } - - /// Mark this atom as global. - void setGlobal(bool IsGlobal) { this->IsGlobal = IsGlobal; } - - /// Returns true if this atom represents an absolute symbol. - bool isAbsolute() const { return IsAbsolute; } - - /// Returns true if this atom is known to be callable. - /// - /// Primarily provided for easy interoperability with ORC, which uses the - /// JITSymbolFlags::Common flag to identify symbols that can be interposed - /// with stubs. - bool isCallable() const { return IsCallable; } - - /// Mark this atom as callable. - void setCallable(bool IsCallable) { - assert((IsDefined || IsAbsolute || !IsCallable) && - "Callable atoms must be defined or absolute"); - this->IsCallable = IsCallable; - } - - /// Returns true if this atom should appear in the symbol table of a final - /// linked image. - bool isExported() const { return IsExported; } - - /// Mark this atom as exported. - void setExported(bool IsExported) { - assert((!IsExported || ((IsDefined || IsAbsolute) && hasName())) && - "Exported atoms must have names"); - this->IsExported = IsExported; - } - - /// Returns true if this is a weak symbol. - bool isWeak() const { return IsWeak; } - - /// Mark this atom as weak. - void setWeak(bool IsWeak) { this->IsWeak = IsWeak; } + /// Returns true if this is a defined addressable, in which case you + /// can downcast this to a . + bool isDefined() const { return static_cast(IsDefined); } + bool isAbsolute() const { return static_cast(IsAbsolute); } private: - StringRef Name; JITTargetAddress Address = 0; - - bool IsDefined : 1; - bool IsLive : 1; - bool ShouldDiscard : 1; - - bool IsGlobal : 1; - bool IsAbsolute : 1; - bool IsCallable : 1; - bool IsExported : 1; - bool IsWeak : 1; - -protected: - // These flags only make sense for DefinedAtom, but we can minimize the size - // of DefinedAtom by defining them here. - bool HasLayoutNext : 1; - bool IsCommon : 1; + uint64_t IsDefined : 1; + uint64_t IsAbsolute : 1; }; -// Forward declare DefinedAtom. -class DefinedAtom; +using BlockOrdinal = unsigned; +using SectionOrdinal = unsigned; -raw_ostream &operator<<(raw_ostream &OS, const Atom &A); -void printEdge(raw_ostream &OS, const Atom &FixupAtom, const Edge &E, +/// An Addressable with content and edges. +class Block : public Addressable { + friend class LinkGraph; + +private: + /// Create a zero-fill defined addressable. + Block(Section &Parent, BlockOrdinal Ordinal, JITTargetAddress Size, + JITTargetAddress Address, uint64_t Alignment, uint64_t AlignmentOffset) + : Addressable(Address, true), Parent(Parent), Size(Size), + Ordinal(Ordinal) { + assert(isPowerOf2_64(Alignment) && "Alignment must be power of 2"); + assert(AlignmentOffset < Alignment && + "Alignment offset cannot exceed alignment"); + assert(AlignmentOffset <= MaxAlignmentOffset && + "Alignment offset exceeds maximum"); + P2Align = Alignment ? countTrailingZeros(Alignment) : 0; + this->AlignmentOffset = AlignmentOffset; + } + + /// Create a defined addressable for the given content. + Block(Section &Parent, BlockOrdinal Ordinal, StringRef Content, + JITTargetAddress Address, uint64_t Alignment, uint64_t AlignmentOffset) + : Addressable(Address, true), Parent(Parent), Data(Content.data()), + Size(Content.size()), Ordinal(Ordinal) { + assert(isPowerOf2_64(Alignment) && "Alignment must be power of 2"); + assert(AlignmentOffset < Alignment && + "Alignment offset cannot exceed alignment"); + assert(AlignmentOffset <= MaxAlignmentOffset && + "Alignment offset exceeds maximum"); + P2Align = Alignment ? countTrailingZeros(Alignment) : 0; + this->AlignmentOffset = AlignmentOffset; + } + +public: + using EdgeVector = std::vector; + using edge_iterator = EdgeVector::iterator; + using const_edge_iterator = EdgeVector::const_iterator; + + Block(const Block &) = delete; + Block &operator=(const Block &) = delete; + Block(Block &&) = delete; + Block &operator=(Block &&) = delete; + + /// Return the parent section for this block. + Section &getSection() const { return Parent; } + + /// Return the ordinal for this block. + BlockOrdinal getOrdinal() const { return Ordinal; } + + /// Returns true if this is a zero-fill block. + /// + /// If true, getSize is callable but getContent is not (the content is + /// defined to be a sequence of zero bytes of length Size). + bool isZeroFill() const { return !Data; } + + /// Returns the size of this defined addressable. + size_t getSize() const { return Size; } + + /// Get the content for this block. Block must not be a zero-fill block. + StringRef getContent() const { + assert(Data && "Section does not contain content"); + return StringRef(Data, Size); + } + + /// Set the content for this block. + /// Caller is responsible for ensuring the underlying bytes are not + /// deallocated while pointed to by this block. + void setContent(StringRef Content) { + Data = Content.data(); + Size = Content.size(); + } + + /// Get the alignment for this content. + uint64_t getAlignment() const { return 1ull << P2Align; } + + /// Get the alignment offset for this content. + uint64_t getAlignmentOffset() const { return AlignmentOffset; } + + /// Add an edge to this block. + void addEdge(Edge::Kind K, Edge::OffsetT Offset, Symbol &Target, + Edge::AddendT Addend) { + Edges.push_back(Edge(K, Offset, Target, Addend)); + } + + /// Return the list of edges attached to this content. + iterator_range edges() { + return make_range(Edges.begin(), Edges.end()); + } + + /// Returns the list of edges attached to this content. + iterator_range edges() const { + return make_range(Edges.begin(), Edges.end()); + } + + /// Return the size of the edges list. + size_t edges_size() const { return Edges.size(); } + + /// Returns true if the list of edges is empty. + bool edges_empty() const { return Edges.empty(); } + +private: + static constexpr uint64_t MaxAlignmentOffset = (1ULL << 57) - 1; + + uint64_t P2Align : 5; + uint64_t AlignmentOffset : 57; + Section &Parent; + const char *Data = nullptr; + size_t Size = 0; + BlockOrdinal Ordinal = 0; + std::vector Edges; +}; + +/// Describes symbol linkage. This can be used to make resolve definition +/// clashes. +enum class Linkage : uint8_t { + Strong, + Weak, +}; + +/// For errors and debugging output. +const char *getLinkageName(Linkage L); + +/// Defines the scope in which this symbol should be visible: +/// Default -- Visible in the public interface of the linkage unit. +/// Hidden -- Visible within the linkage unit, but not exported from it. +/// Local -- Visible only within the LinkGraph. +enum class Scope : uint8_t { Default, Hidden, Local }; + +/// For debugging output. +const char *getScopeName(Scope S); + +raw_ostream &operator<<(raw_ostream &OS, const Block &B); + +/// Symbol representation. +/// +/// Symbols represent locations within Addressable objects. +/// They can be either Named or Anonymous. +/// Anonymous symbols have neither linkage nor visibility, and must point at +/// ContentBlocks. +/// Named symbols may be in one of four states: +/// - Null: Default initialized. Assignable, but otherwise unusable. +/// - Defined: Has both linkage and visibility and points to a ContentBlock +/// - Common: Has both linkage and visibility, points to a null Addressable. +/// - External: Has neither linkage nor visibility, points to an external +/// Addressable. +/// +class Symbol { + friend class LinkGraph; + +private: + Symbol(Addressable &Base, JITTargetAddress Offset, StringRef Name, + JITTargetAddress Size, Linkage L, Scope S, bool IsLive, + bool IsCallable) + : Name(Name), Base(&Base), Offset(Offset), Size(Size) { + setLinkage(L); + setScope(S); + setLive(IsLive); + setCallable(IsCallable); + } + + static Symbol &constructCommon(void *SymStorage, Block &Base, StringRef Name, + JITTargetAddress Size, Scope S, bool IsLive) { + assert(SymStorage && "Storage cannot be null"); + assert(!Name.empty() && "Common symbol name cannot be empty"); + assert(Base.isDefined() && + "Cannot create common symbol from undefined block"); + assert(static_cast(Base).getSize() == Size && + "Common symbol size should match underlying block size"); + auto *Sym = reinterpret_cast(SymStorage); + new (Sym) Symbol(Base, 0, Name, Size, Linkage::Weak, S, IsLive, false); + return *Sym; + } + + static Symbol &constructExternal(void *SymStorage, Addressable &Base, + StringRef Name, JITTargetAddress Size) { + assert(SymStorage && "Storage cannot be null"); + assert(!Base.isDefined() && + "Cannot create external symbol from defined block"); + assert(!Name.empty() && "External symbol name cannot be empty"); + auto *Sym = reinterpret_cast(SymStorage); + new (Sym) Symbol(Base, 0, Name, Size, Linkage::Strong, Scope::Default, + false, false); + return *Sym; + } + + static Symbol &constructAbsolute(void *SymStorage, Addressable &Base, + StringRef Name, JITTargetAddress Size, + Linkage L, Scope S, bool IsLive) { + assert(SymStorage && "Storage cannot be null"); + assert(!Base.isDefined() && + "Cannot create absolute symbol from a defined block"); + auto *Sym = reinterpret_cast(SymStorage); + new (Sym) Symbol(Base, 0, Name, Size, L, S, IsLive, false); + return *Sym; + } + + static Symbol &constructAnonDef(void *SymStorage, Block &Base, + JITTargetAddress Offset, + JITTargetAddress Size, bool IsCallable, + bool IsLive) { + assert(SymStorage && "Storage cannot be null"); + auto *Sym = reinterpret_cast(SymStorage); + new (Sym) Symbol(Base, Offset, StringRef(), Size, Linkage::Strong, + Scope::Local, IsLive, IsCallable); + return *Sym; + } + + static Symbol &constructNamedDef(void *SymStorage, Block &Base, + JITTargetAddress Offset, StringRef Name, + JITTargetAddress Size, Linkage L, Scope S, + bool IsLive, bool IsCallable) { + assert(SymStorage && "Storage cannot be null"); + assert(!Name.empty() && "Name cannot be empty"); + auto *Sym = reinterpret_cast(SymStorage); + new (Sym) Symbol(Base, Offset, Name, Size, L, S, IsLive, IsCallable); + return *Sym; + } + +public: + /// Create a null Symbol. This allows Symbols to be default initialized for + /// use in containers (e.g. as map values). Null symbols are only useful for + /// assigning to. + Symbol() = default; + + // Symbols are not movable or copyable. + Symbol(const Symbol &) = delete; + Symbol &operator=(const Symbol &) = delete; + Symbol(Symbol &&) = delete; + Symbol &operator=(Symbol &&) = delete; + + /// Returns true if this symbol has a name. + bool hasName() const { return !Name.empty(); } + + /// Returns the name of this symbol (empty if the symbol is anonymous). + StringRef getName() const { + assert((!Name.empty() || getScope() == Scope::Local) && + "Anonymous symbol has non-local scope"); + return Name; + } + + /// Returns true if this Symbol has content (potentially) defined within this + /// object file (i.e. is anything but an external or absolute symbol). + bool isDefined() const { + assert(Base && "Attempt to access null symbol"); + return Base->isDefined(); + } + + /// Returns true if this symbol is live (i.e. should be treated as a root for + /// dead stripping). + bool isLive() const { + assert(Base && "Attempting to access null symbol"); + return IsLive; + } + + /// Set this symbol's live bit. + void setLive(bool IsLive) { this->IsLive = IsLive; } + + /// Returns true is this symbol is callable. + bool isCallable() const { return IsCallable; } + + /// Set this symbol's callable bit. + void setCallable(bool IsCallable) { this->IsCallable = IsCallable; } + + /// Returns true if the underlying addressable is an unresolved external. + bool isExternal() const { + assert(Base && "Attempt to access null symbol"); + return !Base->isDefined() && !Base->isAbsolute(); + } + + /// Returns true if the underlying addressable is an absolute symbol. + bool isAbsolute() const { + assert(Base && "Attempt to access null symbol"); + return !Base->isDefined() && Base->isAbsolute(); + } + + /// Return the addressable that this symbol points to. + Addressable &getAddressable() { + assert(Base && "Cannot get underlying addressable for null symbol"); + return *Base; + } + + /// Return the addressable that thsi symbol points to. + const Addressable &getAddressable() const { + assert(Base && "Cannot get underlying addressable for null symbol"); + return *Base; + } + + /// Return the Block for this Symbol (Symbol must be defined). + Block &getBlock() { + assert(Base && "Cannot get block for null symbol"); + assert(Base->isDefined() && "Not a defined symbol"); + return static_cast(*Base); + } + + /// Return the Block for this Symbol (Symbol must be defined). + const Block &getBlock() const { + assert(Base && "Cannot get block for null symbol"); + assert(Base->isDefined() && "Not a defined symbol"); + return static_cast(*Base); + } + + /// Returns the offset for this symbol within the underlying addressable. + JITTargetAddress getOffset() const { return Offset; } + + /// Returns the address of this symbol. + JITTargetAddress getAddress() const { return Base->getAddress() + Offset; } + + /// Returns the size of this symbol. + JITTargetAddress getSize() const { return Size; } + + /// Returns true if this symbol is backed by a zero-fill block. + /// This method may only be called on defined symbols. + bool isSymbolZeroFill() const { return getBlock().isZeroFill(); } + + /// Returns the content in the underlying block covered by this symbol. + /// This method may only be called on defined non-zero-fill symbols. + StringRef getSymbolContent() const { + return getBlock().getContent().substr(Offset, Size); + } + + /// Get the linkage for this Symbol. + Linkage getLinkage() const { return static_cast(L); } + + /// Set the linkage for this Symbol. + void setLinkage(Linkage L) { + assert((L == Linkage::Strong || (Base->isDefined() && !Name.empty())) && + "Linkage can only be applied to defined named symbols"); + this->L = static_cast(L); + } + + /// Get the visibility for this Symbol. + Scope getScope() const { return static_cast(S); } + + /// Set the visibility for this Symbol. + void setScope(Scope S) { + assert((S == Scope::Default || Base->isDefined() || Base->isAbsolute()) && + "Invalid visibility for symbol type"); + this->S = static_cast(S); + } + +private: + void makeExternal(Addressable &A) { + assert(!A.isDefined() && "Attempting to make external with defined block"); + Base = &A; + Offset = 0; + setLinkage(Linkage::Strong); + setScope(Scope::Default); + IsLive = 0; + // note: Size and IsCallable fields left unchanged. + } + + static constexpr uint64_t MaxOffset = (1ULL << 59) - 1; + + // FIXME: A char* or SymbolStringPtr may pack better. + StringRef Name; + Addressable *Base = nullptr; + uint64_t Offset : 59; + uint64_t L : 1; + uint64_t S : 2; + uint64_t IsLive : 1; + uint64_t IsCallable : 1; + JITTargetAddress Size = 0; +}; + +raw_ostream &operator<<(raw_ostream &OS, const Symbol &A); + +void printEdge(raw_ostream &OS, const Block &B, const Edge &E, StringRef EdgeKindName); -/// Represents a section address range via a pair of DefinedAtom pointers to -/// the first and last atoms in the section. +/// Represents an object file section. +class Section { + friend class LinkGraph; + +private: + Section(StringRef Name, sys::Memory::ProtectionFlags Prot, + SectionOrdinal SecOrdinal) + : Name(Name), Prot(Prot), SecOrdinal(SecOrdinal) {} + + using SymbolSet = DenseSet; + using BlockSet = DenseSet; + +public: + using symbol_iterator = SymbolSet::iterator; + using const_symbol_iterator = SymbolSet::const_iterator; + + using block_iterator = BlockSet::iterator; + using const_block_iterator = BlockSet::const_iterator; + + ~Section(); + + /// Returns the name of this section. + StringRef getName() const { return Name; } + + /// Returns the protection flags for this section. + sys::Memory::ProtectionFlags getProtectionFlags() const { return Prot; } + + /// Returns the ordinal for this section. + SectionOrdinal getOrdinal() const { return SecOrdinal; } + + /// Returns an iterator over the symbols defined in this section. + iterator_range symbols() { + return make_range(Symbols.begin(), Symbols.end()); + } + + /// Returns an iterator over the symbols defined in this section. + iterator_range symbols() const { + return make_range(Symbols.begin(), Symbols.end()); + } + + /// Return the number of symbols in this section. + SymbolSet::size_type symbols_size() { return Symbols.size(); } + + /// Return true if this section contains no symbols. + bool symbols_empty() const { return Symbols.empty(); } + + /// Returns the ordinal for the next block. + BlockOrdinal getNextBlockOrdinal() { return NextBlockOrdinal++; } + +private: + void addSymbol(Symbol &Sym) { + assert(!Symbols.count(&Sym) && "Symbol is already in this section"); + Symbols.insert(&Sym); + } + + void removeSymbol(Symbol &Sym) { + assert(Symbols.count(&Sym) && "symbol is not in this section"); + Symbols.erase(&Sym); + } + + StringRef Name; + sys::Memory::ProtectionFlags Prot; + SectionOrdinal SecOrdinal = 0; + BlockOrdinal NextBlockOrdinal = 0; + SymbolSet Symbols; +}; + +/// Represents a section address range via a pair of Block pointers +/// to the first and last Blocks in the section. class SectionRange { public: SectionRange() = default; - SectionRange(DefinedAtom *First, DefinedAtom *Last) - : First(First), Last(Last) {} - DefinedAtom *getFirstAtom() const { + SectionRange(const Section &Sec) { + if (Sec.symbols_empty()) + return; + First = Last = *Sec.symbols().begin(); + for (auto *Sym : Sec.symbols()) { + if (Sym->getAddress() < First->getAddress()) + First = Sym; + if (Sym->getAddress() > Last->getAddress()) + Last = Sym; + } + } + Symbol *getFirstSymbol() const { assert((!Last || First) && "First can not be null if end is non-null"); return First; } - DefinedAtom *getLastAtom() const { + Symbol *getLastSymbol() const { assert((First || !Last) && "Last can not be null if start is non-null"); return Last; } @@ -252,291 +597,120 @@ public: assert((First || !Last) && "Last can not be null if start is non-null"); return !First; } - JITTargetAddress getStart() const; - JITTargetAddress getEnd() const; - uint64_t getSize() const; + JITTargetAddress getStart() const { + return First ? First->getBlock().getAddress() : 0; + } + JITTargetAddress getEnd() const { + return Last ? Last->getBlock().getAddress() + Last->getBlock().getSize() + : 0; + } + uint64_t getSize() const { return getEnd() - getStart(); } private: - DefinedAtom *First = nullptr; - DefinedAtom *Last = nullptr; + Symbol *First = nullptr; + Symbol *Last = nullptr; }; -/// Represents an object file section. -class Section { - friend class AtomGraph; - -private: - Section(StringRef Name, uint32_t Alignment, sys::Memory::ProtectionFlags Prot, - unsigned Ordinal, bool IsZeroFill) - : Name(Name), Alignment(Alignment), Prot(Prot), Ordinal(Ordinal), - IsZeroFill(IsZeroFill) { - assert(isPowerOf2_32(Alignment) && "Alignments must be a power of 2"); - } - - using DefinedAtomSet = DenseSet; - -public: - using atom_iterator = DefinedAtomSet::iterator; - using const_atom_iterator = DefinedAtomSet::const_iterator; - - ~Section(); - StringRef getName() const { return Name; } - uint32_t getAlignment() const { return Alignment; } - sys::Memory::ProtectionFlags getProtectionFlags() const { return Prot; } - unsigned getSectionOrdinal() const { return Ordinal; } - size_t getNextAtomOrdinal() { return ++NextAtomOrdinal; } - - bool isZeroFill() const { return IsZeroFill; } - - /// Returns an iterator over the atoms in the section (in no particular - /// order). - iterator_range atoms() { - return make_range(DefinedAtoms.begin(), DefinedAtoms.end()); - } - - /// Returns an iterator over the atoms in the section (in no particular - /// order). - iterator_range atoms() const { - return make_range(DefinedAtoms.begin(), DefinedAtoms.end()); - } - - /// Return the number of atoms in this section. - DefinedAtomSet::size_type atoms_size() { return DefinedAtoms.size(); } - - /// Return true if this section contains no atoms. - bool atoms_empty() const { return DefinedAtoms.empty(); } - - /// Returns the range of this section as the pair of atoms with the lowest - /// and highest target address. This operation is expensive, as it - /// must traverse all atoms in the section. - /// - /// Note: If the section is empty, both values will be null. The section - /// address will evaluate to null, and the size to zero. If the section - /// contains a single atom both values will point to it, the address will - /// evaluate to the address of that atom, and the size will be the size of - /// that atom. - SectionRange getRange() const; - -private: - void addAtom(DefinedAtom &DA) { - assert(!DefinedAtoms.count(&DA) && "Atom is already in this section"); - DefinedAtoms.insert(&DA); - } - - void removeAtom(DefinedAtom &DA) { - assert(DefinedAtoms.count(&DA) && "Atom is not in this section"); - DefinedAtoms.erase(&DA); - } - - StringRef Name; - uint32_t Alignment = 0; - sys::Memory::ProtectionFlags Prot; - unsigned Ordinal = 0; - unsigned NextAtomOrdinal = 0; - bool IsZeroFill = false; - DefinedAtomSet DefinedAtoms; -}; - -/// Defined atom class. Suitable for use by defined named and anonymous -/// atoms. -class DefinedAtom : public Atom { - friend class AtomGraph; - -private: - DefinedAtom(Section &Parent, JITTargetAddress Address, uint32_t Alignment) - : Atom("", Address), Parent(Parent), Ordinal(Parent.getNextAtomOrdinal()), - Alignment(Alignment) { - assert(isPowerOf2_32(Alignment) && "Alignments must be a power of two"); - } - - DefinedAtom(Section &Parent, StringRef Name, JITTargetAddress Address, - uint32_t Alignment) - : Atom(Name, Address), Parent(Parent), - Ordinal(Parent.getNextAtomOrdinal()), Alignment(Alignment) { - assert(isPowerOf2_32(Alignment) && "Alignments must be a power of two"); - } - -public: - using edge_iterator = EdgeVector::iterator; - - Section &getSection() const { return Parent; } - - uint64_t getSize() const { return Size; } - - StringRef getContent() const { - assert(!Parent.isZeroFill() && "Trying to get content for zero-fill atom"); - assert(Size <= std::numeric_limits::max() && - "Content size too large"); - return {ContentPtr, static_cast(Size)}; - } - void setContent(StringRef Content) { - assert(!Parent.isZeroFill() && "Calling setContent on zero-fill atom?"); - ContentPtr = Content.data(); - Size = Content.size(); - } - - bool isZeroFill() const { return Parent.isZeroFill(); } - - void setZeroFill(uint64_t Size) { - assert(Parent.isZeroFill() && !ContentPtr && - "Can't set zero-fill length of a non zero-fill atom"); - this->Size = Size; - } - - uint64_t getZeroFillSize() const { - assert(Parent.isZeroFill() && - "Can't get zero-fill length of a non zero-fill atom"); - return Size; - } - - uint32_t getAlignment() const { return Alignment; } - - bool hasLayoutNext() const { return HasLayoutNext; } - void setLayoutNext(DefinedAtom &Next) { - assert(!HasLayoutNext && "Atom already has layout-next constraint"); - HasLayoutNext = true; - Edges.push_back(Edge(Edge::LayoutNext, 0, Next, 0)); - } - DefinedAtom &getLayoutNext() { - assert(HasLayoutNext && "Atom does not have a layout-next constraint"); - DefinedAtom *Next = nullptr; - for (auto &E : edges()) - if (E.getKind() == Edge::LayoutNext) { - assert(E.getTarget().isDefined() && - "layout-next target atom must be a defined atom"); - Next = static_cast(&E.getTarget()); - break; - } - assert(Next && "Missing LayoutNext edge"); - return *Next; - } - - bool isCommon() const { return IsCommon; } - - void addEdge(Edge::Kind K, Edge::OffsetT Offset, Atom &Target, - Edge::AddendT Addend) { - assert(K != Edge::LayoutNext && - "Layout edges should be added via setLayoutNext"); - Edges.push_back(Edge(K, Offset, Target, Addend)); - } - - iterator_range edges() { - return make_range(Edges.begin(), Edges.end()); - } - size_t edges_size() const { return Edges.size(); } - bool edges_empty() const { return Edges.empty(); } - - unsigned getOrdinal() const { return Ordinal; } - -private: - void setCommon(uint64_t Size) { - assert(ContentPtr == 0 && "Atom already has content?"); - IsCommon = true; - setZeroFill(Size); - } - - EdgeVector Edges; - uint64_t Size = 0; - Section &Parent; - const char *ContentPtr = nullptr; - unsigned Ordinal = 0; - uint32_t Alignment = 0; -}; - -inline JITTargetAddress SectionRange::getStart() const { - return First ? First->getAddress() : 0; -} - -inline JITTargetAddress SectionRange::getEnd() const { - return Last ? Last->getAddress() + Last->getSize() : 0; -} - -inline uint64_t SectionRange::getSize() const { return getEnd() - getStart(); } - -inline SectionRange Section::getRange() const { - if (atoms_empty()) - return SectionRange(); - DefinedAtom *First = *DefinedAtoms.begin(), *Last = *DefinedAtoms.begin(); - for (auto *DA : atoms()) { - if (DA->getAddress() < First->getAddress()) - First = DA; - if (DA->getAddress() > Last->getAddress()) - Last = DA; - } - return SectionRange(First, Last); -} - -class AtomGraph { +class LinkGraph { private: using SectionList = std::vector>; - using AddressToAtomMap = std::map; - using NamedAtomMap = DenseMap; - using ExternalAtomSet = DenseSet; + using ExternalSymbolSet = DenseSet; + using BlockSet = DenseSet; + + template + Addressable &createAddressable(ArgTs &&... Args) { + Addressable *A = + reinterpret_cast(Allocator.Allocate()); + new (A) Addressable(std::forward(Args)...); + return *A; + } + + void destroyAddressable(Addressable &A) { + A.~Addressable(); + Allocator.Deallocate(&A); + } + + template Block &createBlock(ArgTs &&... Args) { + Block *B = reinterpret_cast(Allocator.Allocate()); + new (B) Block(std::forward(Args)...); + Blocks.insert(B); + return *B; + } + + void destroyBlock(Block &B) { + Blocks.erase(&B); + B.~Block(); + Allocator.Deallocate(&B); + } + + void destroySymbol(Symbol &S) { + S.~Symbol(); + Allocator.Deallocate(&S); + } public: - using external_atom_iterator = ExternalAtomSet::iterator; + using external_symbol_iterator = ExternalSymbolSet::iterator; + + using block_iterator = BlockSet::iterator; using section_iterator = pointee_iterator; using const_section_iterator = pointee_iterator; - template - class defined_atom_iterator_impl + template + class defined_symbol_iterator_impl : public iterator_facade_base< - defined_atom_iterator_impl, + defined_symbol_iterator_impl, std::forward_iterator_tag, T> { public: - defined_atom_iterator_impl() = default; + defined_symbol_iterator_impl() = default; - defined_atom_iterator_impl(SecItrT SI, SecItrT SE) - : SI(SI), SE(SE), - AI(SI != SE ? SI->atoms().begin() : Section::atom_iterator()) { - moveToNextAtomOrEnd(); + defined_symbol_iterator_impl(SectionItrT SecI, SectionItrT SecE) + : SecI(SecI), SecE(SecE), + SymI(SecI != SecE ? SecI->symbols().begin() : SymbolItrT()) { + moveToNextSymbolOrEnd(); } - bool operator==(const defined_atom_iterator_impl &RHS) const { - return (SI == RHS.SI) && (AI == RHS.AI); + bool operator==(const defined_symbol_iterator_impl &RHS) const { + return (SecI == RHS.SecI) && (SymI == RHS.SymI); } T operator*() const { - assert(AI != SI->atoms().end() && "Dereferencing end?"); - return *AI; + assert(SymI != SecI->symbols().end() && "Dereferencing end?"); + return *SymI; } - defined_atom_iterator_impl operator++() { - ++AI; - moveToNextAtomOrEnd(); + defined_symbol_iterator_impl operator++() { + ++SymI; + moveToNextSymbolOrEnd(); return *this; } private: - void moveToNextAtomOrEnd() { - while (SI != SE && AI == SI->atoms().end()) { - ++SI; - if (SI == SE) - AI = Section::atom_iterator(); - else - AI = SI->atoms().begin(); + void moveToNextSymbolOrEnd() { + while (SecI != SecE && SymI == SecI->symbols().end()) { + ++SecI; + SymI = SecI == SecE ? SymbolItrT() : SecI->symbols().begin(); } } - SecItrT SI, SE; - AtomItrT AI; + SectionItrT SecI, SecE; + SymbolItrT SymI; }; - using defined_atom_iterator = - defined_atom_iterator_impl; + using defined_symbol_iterator = + defined_symbol_iterator_impl; - using const_defined_atom_iterator = - defined_atom_iterator_impl; + using const_defined_symbol_iterator = defined_symbol_iterator_impl< + const_section_iterator, Section::const_symbol_iterator, const Symbol *>; - AtomGraph(std::string Name, unsigned PointerSize, + LinkGraph(std::string Name, unsigned PointerSize, support::endianness Endianness) : Name(std::move(Name)), PointerSize(PointerSize), Endianness(Endianness) {} + ~LinkGraph(); + /// Returns the name of this graph (usually the name of the original /// underlying MemoryBuffer). const std::string &getName() { return Name; } @@ -544,84 +718,83 @@ public: /// Returns the pointer size for use in this graph. unsigned getPointerSize() const { return PointerSize; } - /// Returns the endianness of atom-content in this graph. + /// Returns the endianness of content in this graph. support::endianness getEndianness() const { return Endianness; } /// Create a section with the given name, protection flags, and alignment. - Section &createSection(StringRef Name, uint32_t Alignment, - sys::Memory::ProtectionFlags Prot, bool IsZeroFill) { - std::unique_ptr
Sec( - new Section(Name, Alignment, Prot, Sections.size(), IsZeroFill)); + Section &createSection(StringRef Name, sys::Memory::ProtectionFlags Prot) { + std::unique_ptr
Sec(new Section(Name, Prot, Sections.size())); Sections.push_back(std::move(Sec)); return *Sections.back(); } - /// Add an external atom representing an undefined symbol in this graph. - Atom &addExternalAtom(StringRef Name) { - assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted"); - Atom *A = reinterpret_cast( - AtomAllocator.Allocate(sizeof(Atom), alignof(Atom))); - new (A) Atom(Name); - ExternalAtoms.insert(A); - NamedAtoms[Name] = A; - return *A; + /// Create a content block. + Block &createContentBlock(Section &Parent, StringRef Content, + uint64_t Address, uint64_t Alignment, + uint64_t AlignmentOffset) { + return createBlock(Parent, Parent.getNextBlockOrdinal(), Content, Address, + Alignment, AlignmentOffset); } - /// Add an external atom representing an absolute symbol. - Atom &addAbsoluteAtom(StringRef Name, JITTargetAddress Addr) { - assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted"); - Atom *A = reinterpret_cast( - AtomAllocator.Allocate(sizeof(Atom), alignof(Atom))); - new (A) Atom(Name, Addr); - AbsoluteAtoms.insert(A); - NamedAtoms[Name] = A; - return *A; + /// Create a zero-fill block. + Block &createZeroFillBlock(Section &Parent, uint64_t Size, uint64_t Address, + uint64_t Alignment, uint64_t AlignmentOffset) { + return createBlock(Parent, Parent.getNextBlockOrdinal(), Size, Address, + Alignment, AlignmentOffset); } - /// Add an anonymous defined atom to the graph. - /// - /// Anonymous atoms have content but no name. They must have an address. - DefinedAtom &addAnonymousAtom(Section &Parent, JITTargetAddress Address, - uint32_t Alignment) { - DefinedAtom *A = reinterpret_cast( - AtomAllocator.Allocate(sizeof(DefinedAtom), alignof(DefinedAtom))); - new (A) DefinedAtom(Parent, Address, Alignment); - Parent.addAtom(*A); - getAddrToAtomMap()[A->getAddress()] = A; - return *A; + /// Add an external symbol. + /// Some formats (e.g. ELF) allow Symbols to have sizes. For Symbols whose + /// size is not known, you should substitute '0'. + Symbol &addExternalSymbol(StringRef Name, uint64_t Size) { + auto &Sym = Symbol::constructExternal( + Allocator.Allocate(), createAddressable(0, false), Name, Size); + ExternalSymbols.insert(&Sym); + return Sym; } - /// Add a defined atom to the graph. - /// - /// Allocates and constructs a DefinedAtom instance with the given parent, - /// name, address, and alignment. - DefinedAtom &addDefinedAtom(Section &Parent, StringRef Name, - JITTargetAddress Address, uint32_t Alignment) { - assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted"); - DefinedAtom *A = reinterpret_cast( - AtomAllocator.Allocate(sizeof(DefinedAtom), alignof(DefinedAtom))); - new (A) DefinedAtom(Parent, Name, Address, Alignment); - Parent.addAtom(*A); - getAddrToAtomMap()[A->getAddress()] = A; - NamedAtoms[Name] = A; - return *A; + /// Add an absolute symbol. + Symbol &addAbsoluteSymbol(StringRef Name, JITTargetAddress Address, + uint64_t Size, Linkage L, Scope S, bool IsLive) { + auto &Sym = Symbol::constructAbsolute(Allocator.Allocate(), + createAddressable(Address), Name, + Size, L, S, IsLive); + AbsoluteSymbols.insert(&Sym); + return Sym; } - /// Add a common symbol atom to the graph. - /// - /// Adds a common-symbol atom to the graph with the given parent, name, - /// address, alignment and size. - DefinedAtom &addCommonAtom(Section &Parent, StringRef Name, - JITTargetAddress Address, uint32_t Alignment, - uint64_t Size) { - assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted"); - DefinedAtom *A = reinterpret_cast( - AtomAllocator.Allocate(sizeof(DefinedAtom), alignof(DefinedAtom))); - new (A) DefinedAtom(Parent, Name, Address, Alignment); - A->setCommon(Size); - Parent.addAtom(*A); - NamedAtoms[Name] = A; - return *A; + /// Convenience method for adding a weak zero-fill symbol. + Symbol &addCommonSymbol(StringRef Name, Scope S, Section &Section, + JITTargetAddress Address, uint64_t Size, + uint64_t Alignment, bool IsLive) { + auto &Sym = Symbol::constructCommon( + Allocator.Allocate(), + createBlock(Section, Section.getNextBlockOrdinal(), Address, Size, + Alignment, 0), + Name, Size, S, IsLive); + Section.addSymbol(Sym); + return Sym; + } + + /// Add an anonymous symbol. + Symbol &addAnonymousSymbol(Block &Content, JITTargetAddress Offset, + JITTargetAddress Size, bool IsCallable, + bool IsLive) { + auto &Sym = Symbol::constructAnonDef(Allocator.Allocate(), Content, + Offset, Size, IsCallable, IsLive); + Content.getSection().addSymbol(Sym); + return Sym; + } + + /// Add a named symbol. + Symbol &addDefinedSymbol(Block &Content, JITTargetAddress Offset, + StringRef Name, JITTargetAddress Size, Linkage L, + Scope S, bool IsCallable, bool IsLive) { + auto &Sym = + Symbol::constructNamedDef(Allocator.Allocate(), Content, Offset, + Name, Size, L, S, IsLive, IsCallable); + Content.getSection().addSymbol(Sym); + return Sym; } iterator_range sections() { @@ -638,135 +811,79 @@ public: return nullptr; } - iterator_range external_atoms() { - return make_range(ExternalAtoms.begin(), ExternalAtoms.end()); + iterator_range external_symbols() { + return make_range(ExternalSymbols.begin(), ExternalSymbols.end()); } - iterator_range absolute_atoms() { - return make_range(AbsoluteAtoms.begin(), AbsoluteAtoms.end()); + iterator_range absolute_symbols() { + return make_range(AbsoluteSymbols.begin(), AbsoluteSymbols.end()); } - iterator_range defined_atoms() { - return make_range(defined_atom_iterator(Sections.begin(), Sections.end()), - defined_atom_iterator(Sections.end(), Sections.end())); + iterator_range defined_symbols() { + return make_range(defined_symbol_iterator(Sections.begin(), Sections.end()), + defined_symbol_iterator(Sections.end(), Sections.end())); } - iterator_range defined_atoms() const { + iterator_range defined_symbols() const { return make_range( - const_defined_atom_iterator(Sections.begin(), Sections.end()), - const_defined_atom_iterator(Sections.end(), Sections.end())); + const_defined_symbol_iterator(Sections.begin(), Sections.end()), + const_defined_symbol_iterator(Sections.end(), Sections.end())); } - /// Returns the atom with the given name, which must exist in this graph. - Atom &getAtomByName(StringRef Name) { - auto I = NamedAtoms.find(Name); - assert(I != NamedAtoms.end() && "Name not in NamedAtoms map"); - return *I->second; + iterator_range blocks() { + return make_range(Blocks.begin(), Blocks.end()); } - /// Returns the atom with the given name, which must exist in this graph and - /// be a DefinedAtom. - DefinedAtom &getDefinedAtomByName(StringRef Name) { - auto &A = getAtomByName(Name); - assert(A.isDefined() && "Atom is not a defined atom"); - return static_cast(A); - } - - /// Search for the given atom by name. - /// Returns the atom (if found) or an error (if no atom with this name - /// exists). - Expected findAtomByName(StringRef Name) { - auto I = NamedAtoms.find(Name); - if (I == NamedAtoms.end()) - return make_error("No atom named " + Name); - return *I->second; - } - - /// Search for the given defined atom by name. - /// Returns the defined atom (if found) or an error (if no atom with this - /// name exists, or if one exists but is not a defined atom). - Expected findDefinedAtomByName(StringRef Name) { - auto I = NamedAtoms.find(Name); - if (I == NamedAtoms.end()) - return make_error("No atom named " + Name); - if (!I->second->isDefined()) - return make_error("Atom " + Name + - " exists but is not a " - "defined atom"); - return static_cast(*I->second); - } - - /// Returns the atom covering the given address, or an error if no such atom - /// exists. - /// - /// Returns null if no atom exists at the given address. - DefinedAtom *getAtomByAddress(JITTargetAddress Address) { - refreshAddrToAtomCache(); - - // If there are no defined atoms, bail out early. - if (AddrToAtomCache->empty()) - return nullptr; - - // Find the atom *after* the given address. - auto I = AddrToAtomCache->upper_bound(Address); - - // If this address falls before any known atom, bail out. - if (I == AddrToAtomCache->begin()) - return nullptr; - - // The atom we're looking for is the one before the atom we found. - --I; - - // Otherwise range check the atom that was found. - assert(!I->second->getContent().empty() && "Atom content not set"); - if (Address >= I->second->getAddress() + I->second->getContent().size()) - return nullptr; - - return I->second; - } - - /// Like getAtomByAddress, but returns an Error if the given address is not - /// covered by an atom, rather than a null pointer. - Expected findAtomByAddress(JITTargetAddress Address) { - if (auto *DA = getAtomByAddress(Address)) - return *DA; - return make_error("No atom at address " + - formatv("{0:x16}", Address)); - } - - // Remove the given external atom from the graph. - void removeExternalAtom(Atom &A) { - assert(!A.isDefined() && !A.isAbsolute() && "A is not an external atom"); - assert(ExternalAtoms.count(&A) && "A is not in the external atoms set"); - ExternalAtoms.erase(&A); - A.~Atom(); - } - - /// Remove the given absolute atom from the graph. - void removeAbsoluteAtom(Atom &A) { - assert(A.isAbsolute() && "A is not an absolute atom"); - assert(AbsoluteAtoms.count(&A) && "A is not in the absolute atoms set"); - AbsoluteAtoms.erase(&A); - A.~Atom(); - } - - /// Remove the given defined atom from the graph. - void removeDefinedAtom(DefinedAtom &DA) { - if (AddrToAtomCache) { - assert(AddrToAtomCache->count(DA.getAddress()) && - "Cache exists, but does not contain atom"); - AddrToAtomCache->erase(DA.getAddress()); + /// Turn a defined symbol into an external one. + void makeExternal(Symbol &Sym) { + if (Sym.getAddressable().isAbsolute()) { + assert(AbsoluteSymbols.count(&Sym) && + "Sym is not in the absolute symbols set"); + AbsoluteSymbols.erase(&Sym); + } else { + assert(Sym.isDefined() && "Sym is not a defined symbol"); + Section &Sec = Sym.getBlock().getSection(); + Sec.removeSymbol(Sym); } - if (DA.hasName()) { - assert(NamedAtoms.count(DA.getName()) && "Named atom not in map"); - NamedAtoms.erase(DA.getName()); - } - DA.getSection().removeAtom(DA); - DA.~DefinedAtom(); + Sym.makeExternal(createAddressable(false)); + ExternalSymbols.insert(&Sym); } - /// Invalidate the atom-to-address map. - void invalidateAddrToAtomMap() { AddrToAtomCache = None; } + /// Removes an external symbol. Also removes the underlying Addressable. + void removeExternalSymbol(Symbol &Sym) { + assert(!Sym.isDefined() && !Sym.isAbsolute() && + "Sym is not an external symbol"); + assert(ExternalSymbols.count(&Sym) && "Symbol is not in the externals set"); + ExternalSymbols.erase(&Sym); + Addressable &Base = *Sym.Base; + destroySymbol(Sym); + destroyAddressable(Base); + } + + /// Remove an absolute symbol. Also removes the underlying Addressable. + void removeAbsoluteSymbol(Symbol &Sym) { + assert(!Sym.isDefined() && Sym.isAbsolute() && + "Sym is not an absolute symbol"); + assert(AbsoluteSymbols.count(&Sym) && + "Symbol is not in the absolute symbols set"); + AbsoluteSymbols.erase(&Sym); + Addressable &Base = *Sym.Base; + destroySymbol(Sym); + destroyAddressable(Base); + } + + /// Removes defined symbols. Does not remove the underlying block. + void removeDefinedSymbol(Symbol &Sym) { + assert(Sym.isDefined() && "Sym is not a defined symbol"); + Sym.getBlock().getSection().removeSymbol(Sym); + destroySymbol(Sym); + } + + /// Remove a block. + void removeBlock(Block &B) { + Blocks.erase(&B); + destroyBlock(B); + } /// Dump the graph. /// @@ -778,87 +895,84 @@ public: std::function()); private: - AddressToAtomMap &getAddrToAtomMap() { - refreshAddrToAtomCache(); - return *AddrToAtomCache; - } - - const AddressToAtomMap &getAddrToAtomMap() const { - refreshAddrToAtomCache(); - return *AddrToAtomCache; - } - - void refreshAddrToAtomCache() const { - if (!AddrToAtomCache) { - AddrToAtomCache = AddressToAtomMap(); - for (auto *DA : defined_atoms()) - (*AddrToAtomCache)[DA->getAddress()] = const_cast(DA); - } - } - - // Put the BumpPtrAllocator first so that we don't free any of the atoms in - // it until all of their destructors have been run. - BumpPtrAllocator AtomAllocator; + // Put the BumpPtrAllocator first so that we don't free any of the underlying + // memory until the Symbol/Addressable destructors have been run. + BumpPtrAllocator Allocator; std::string Name; unsigned PointerSize; support::endianness Endianness; + BlockSet Blocks; SectionList Sections; - NamedAtomMap NamedAtoms; - ExternalAtomSet ExternalAtoms; - ExternalAtomSet AbsoluteAtoms; - mutable Optional AddrToAtomCache; + ExternalSymbolSet ExternalSymbols; + ExternalSymbolSet AbsoluteSymbols; }; -/// A function for mutating AtomGraphs. -using AtomGraphPassFunction = std::function; +/// A function for mutating LinkGraphs. +using LinkGraphPassFunction = std::function; -/// A list of atom graph passes. -using AtomGraphPassList = std::vector; +/// A list of LinkGraph passes. +using LinkGraphPassList = std::vector; -/// An atom graph pass configuration, consisting of a list of pre-prune, +/// An LinkGraph pass configuration, consisting of a list of pre-prune, /// post-prune, and post-fixup passes. struct PassConfiguration { /// Pre-prune passes. /// /// These passes are called on the graph after it is built, and before any - /// atoms have been pruned. + /// symbols have been pruned. /// - /// Notable use cases: Marking atoms live or should-discard. - AtomGraphPassList PrePrunePasses; + /// Notable use cases: Marking symbols live or should-discard. + LinkGraphPassList PrePrunePasses; /// Post-prune passes. /// - /// These passes are called on the graph after dead and should-discard atoms - /// have been removed, but before fixups are applied. + /// These passes are called on the graph after dead stripping, but before + /// fixups are applied. /// - /// Notable use cases: Building GOT, stub, and TLV atoms. - AtomGraphPassList PostPrunePasses; + /// Notable use cases: Building GOT, stub, and TLV symbols. + LinkGraphPassList PostPrunePasses; /// Post-fixup passes. /// - /// These passes are called on the graph after atom contents has been copied + /// These passes are called on the graph after block contents has been copied /// to working memory, and fixups applied. /// /// Notable use cases: Testing and validation. - AtomGraphPassList PostFixupPasses; + LinkGraphPassList PostFixupPasses; }; /// A map of symbol names to resolved addresses. using AsyncLookupResult = DenseMap; -/// A function to call with a resolved symbol map (See AsyncLookupResult) or an -/// error if resolution failed. -using JITLinkAsyncLookupContinuation = - std::function LR)>; +/// A function object to call with a resolved symbol map (See AsyncLookupResult) +/// or an error if resolution failed. +class JITLinkAsyncLookupContinuation { +public: + virtual ~JITLinkAsyncLookupContinuation() {} + virtual void run(Expected LR) = 0; -/// An asynchronous symbol lookup. Performs a search (possibly asynchronously) -/// for the given symbols, calling the given continuation with either the result -/// (if the lookup succeeds), or an error (if the lookup fails). -using JITLinkAsyncLookupFunction = - std::function &Symbols, - JITLinkAsyncLookupContinuation LookupContinuation)>; +private: + virtual void anchor(); +}; + +/// Create a lookup continuation from a function object. +template +std::unique_ptr +createLookupContinuation(Continuation Cont) { + + class Impl final : public JITLinkAsyncLookupContinuation { + public: + Impl(Continuation C) : C(std::move(C)) {} + void run(Expected LR) override { C(std::move(LR)); } + + private: + Continuation C; + }; + + return std::make_unique(std::move(Cont)); +} /// Holds context for a single jitLink invocation. class JITLinkContext { @@ -881,13 +995,13 @@ public: /// lookup continutation which it must call with a result to continue the /// linking process. virtual void lookup(const DenseSet &Symbols, - JITLinkAsyncLookupContinuation LookupContinuation) = 0; + std::unique_ptr LC) = 0; - /// Called by JITLink once all defined atoms in the graph have been assigned - /// their final memory locations in the target process. At this point he - /// atom graph can be, inspected to build a symbol table however the atom + /// Called by JITLink once all defined symbols in the graph have been assigned + /// their final memory locations in the target process. At this point the + /// LinkGraph can be inspected to build a symbol table, however the block /// content will not generally have been copied to the target location yet. - virtual void notifyResolved(AtomGraph &G) = 0; + virtual void notifyResolved(LinkGraph &G) = 0; /// Called by JITLink to notify the context that the object has been /// finalized (i.e. emitted to memory and memory permissions set). If all of @@ -904,20 +1018,20 @@ public: /// Returns the mark-live pass to be used for this link. If no pass is /// returned (the default) then the target-specific linker implementation will - /// choose a conservative default (usually marking all atoms live). + /// choose a conservative default (usually marking all symbols live). /// This function is only called if shouldAddDefaultTargetPasses returns true, /// otherwise the JITContext is responsible for adding a mark-live pass in /// modifyPassConfig. - virtual AtomGraphPassFunction getMarkLivePass(const Triple &TT) const; + virtual LinkGraphPassFunction getMarkLivePass(const Triple &TT) const; /// Called by JITLink to modify the pass pipeline prior to linking. /// The default version performs no modification. virtual Error modifyPassConfig(const Triple &TT, PassConfiguration &Config); }; -/// Marks all atoms in a graph live. This can be used as a default, conservative -/// mark-live implementation. -Error markAllAtomsLive(AtomGraph &G); +/// Marks all symbols in a graph live. This can be used as a default, +/// conservative mark-live implementation. +Error markAllSymbolsLive(LinkGraph &G); /// Basic JITLink implementation. /// diff --git a/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h index 9d0b37fe4a4..ac5a593bb77 100644 --- a/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h +++ b/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h @@ -33,20 +33,19 @@ public: class SegmentRequest { public: SegmentRequest() = default; - SegmentRequest(size_t ContentSize, unsigned ContentAlign, - uint64_t ZeroFillSize, unsigned ZeroFillAlign) - : ContentSize(ContentSize), ZeroFillSize(ZeroFillSize), - ContentAlign(ContentAlign), ZeroFillAlign(ZeroFillAlign) {} + SegmentRequest(uint64_t Alignment, size_t ContentSize, + uint64_t ZeroFillSize) + : Alignment(Alignment), ContentSize(ContentSize), + ZeroFillSize(ZeroFillSize) { + assert(isPowerOf2_32(Alignment) && "Alignment must be power of 2"); + } + uint64_t getAlignment() const { return Alignment; } size_t getContentSize() const { return ContentSize; } - unsigned getContentAlignment() const { return ContentAlign; } uint64_t getZeroFillSize() const { return ZeroFillSize; } - unsigned getZeroFillAlignment() const { return ZeroFillAlign; } - private: + uint64_t Alignment = 0; size_t ContentSize = 0; uint64_t ZeroFillSize = 0; - unsigned ContentAlign = 0; - unsigned ZeroFillAlign = 0; }; using SegmentsRequestMap = DenseMap; diff --git a/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h b/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h new file mode 100644 index 00000000000..d70b545fff8 --- /dev/null +++ b/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h @@ -0,0 +1,60 @@ +//===---- MachO_arm64.h - JIT link functions for MachO/arm64 ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// jit-link functions for MachO/arm64. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_MACHO_ARM64_H +#define LLVM_EXECUTIONENGINE_JITLINK_MACHO_ARM64_H + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" + +namespace llvm { +namespace jitlink { + +namespace MachO_arm64_Edges { + +enum MachOARM64RelocationKind : Edge::Kind { + Branch26 = Edge::FirstRelocation, + Pointer32, + Pointer64, + Pointer64Anon, + Page21, + PageOffset12, + GOTPage21, + GOTPageOffset12, + PointerToGOT, + PairedAddend, + LDRLiteral19, + Delta32, + Delta64, + NegDelta32, + NegDelta64, +}; + +} // namespace MachO_arm64_Edges + +/// jit-link the given object buffer, which must be a MachO arm64 object file. +/// +/// If PrePrunePasses is empty then a default mark-live pass will be inserted +/// that will mark all exported atoms live. If PrePrunePasses is not empty, the +/// caller is responsible for including a pass to mark atoms as live. +/// +/// If PostPrunePasses is empty then a default GOT-and-stubs insertion pass will +/// be inserted. If PostPrunePasses is not empty then the caller is responsible +/// for including a pass to insert GOT and stub edges. +void jitLink_MachO_arm64(std::unique_ptr Ctx); + +/// Return the string name of the given MachO arm64 edge kind. +StringRef getMachOARM64RelocationKindName(Edge::Kind R); + +} // end namespace jitlink +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_JITLINK_MACHO_ARM64_H diff --git a/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h b/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h index 1d5b586afc3..00a7feb86e8 100644 --- a/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h +++ b/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h @@ -22,6 +22,7 @@ namespace MachO_x86_64_Edges { enum MachOX86RelocationKind : Edge::Kind { Branch32 = Edge::FirstRelocation, + Pointer32, Pointer64, Pointer64Anon, PCRel32, diff --git a/include/llvm/ExecutionEngine/JITSymbol.h b/include/llvm/ExecutionEngine/JITSymbol.h index b14154c5b5e..c0f1ca4b987 100644 --- a/include/llvm/ExecutionEngine/JITSymbol.h +++ b/include/llvm/ExecutionEngine/JITSymbol.h @@ -23,6 +23,7 @@ #include #include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" @@ -217,7 +218,7 @@ private: /// Represents a symbol in the JIT. class JITSymbol { public: - using GetAddressFtor = std::function()>; + using GetAddressFtor = unique_function()>; /// Create a 'null' symbol, used to represent a "symbol not found" /// result from a successful (non-erroneous) lookup. @@ -325,7 +326,7 @@ class JITSymbolResolver { public: using LookupSet = std::set; using LookupResult = std::map; - using OnResolvedFunction = std::function)>; + using OnResolvedFunction = unique_function)>; virtual ~JITSymbolResolver() = default; diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h index 5f593a27cad..7946b5b7b20 100644 --- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h +++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h @@ -26,6 +26,7 @@ #include "llvm/ExecutionEngine/Orc/LazyReexports.h" #include "llvm/ExecutionEngine/Orc/Legacy.h" #include "llvm/ExecutionEngine/Orc/OrcError.h" +#include "llvm/ExecutionEngine/Orc/Speculation.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constant.h" @@ -91,6 +92,8 @@ public: /// Sets the partition function. void setPartitionFunction(PartitionFunction Partition); + /// Sets the ImplSymbolMap + void setImplMap(ImplSymbolMap *Imp); /// Emits the given module. This should not be called by clients: it will be /// called by the JIT when a definition added via the add method is requested. void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; @@ -128,6 +131,7 @@ private: PerDylibResourcesMap DylibResources; PartitionFunction Partition = compileRequested; SymbolLinkagePromoter PromoteSymbols; + ImplSymbolMap *AliaseeImpls = nullptr; }; /// Compile-on-demand layer. @@ -187,7 +191,7 @@ private: std::unique_ptr> wrapOwnership(ResourcePtrT ResourcePtr) { using RO = ResourceOwnerImpl; - return llvm::make_unique(std::move(ResourcePtr)); + return std::make_unique(std::move(ResourcePtr)); } struct LogicalDylib { @@ -440,7 +444,7 @@ private: return Error::success(); // Create the GlobalValues module. - auto GVsM = llvm::make_unique((SrcM.getName() + ".globals").str(), + auto GVsM = std::make_unique((SrcM.getName() + ".globals").str(), SrcM.getContext()); GVsM->setDataLayout(DL); @@ -633,7 +637,7 @@ private: NewName += F->getName(); } - auto M = llvm::make_unique(NewName, SrcM.getContext()); + auto M = std::make_unique(NewName, SrcM.getContext()); M->setDataLayout(SrcM.getDataLayout()); ValueToValueMapTy VMap; diff --git a/include/llvm/ExecutionEngine/Orc/Core.h b/include/llvm/ExecutionEngine/Orc/Core.h index 94a5618233e..4f22a4c3879 100644 --- a/include/llvm/ExecutionEngine/Orc/Core.h +++ b/include/llvm/ExecutionEngine/Orc/Core.h @@ -14,6 +14,7 @@ #define LLVM_EXECUTIONENGINE_ORC_CORE_H #include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/FunctionExtras.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/Orc/SymbolStringPool.h" #include "llvm/ExecutionEngine/OrcV1Deprecation.h" @@ -51,8 +52,7 @@ using SymbolMap = DenseMap; /// A map from symbol names (as SymbolStringPtrs) to JITSymbolFlags. using SymbolFlagsMap = DenseMap; -/// A base class for materialization failures that allows the failing -/// symbols to be obtained for logging. +/// A map from JITDylibs to sets of symbols. using SymbolDependenceMap = DenseMap; /// A list of (JITDylib*, bool) pairs. @@ -108,7 +108,7 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolAliasMap &Aliases); raw_ostream &operator<<(raw_ostream &OS, const SymbolState &S); /// Callback to notify client that symbols have been resolved. -using SymbolsResolvedCallback = std::function)>; +using SymbolsResolvedCallback = unique_function)>; /// Callback to register the dependencies for a given query. using RegisterDependenciesFunction = @@ -124,13 +124,13 @@ class FailedToMaterialize : public ErrorInfo { public: static char ID; - FailedToMaterialize(SymbolNameSet Symbols); + FailedToMaterialize(std::shared_ptr Symbols); std::error_code convertToErrorCode() const override; void log(raw_ostream &OS) const override; - const SymbolNameSet &getSymbols() const { return Symbols; } + const SymbolDependenceMap &getSymbols() const { return *Symbols; } private: - SymbolNameSet Symbols; + std::shared_ptr Symbols; }; /// Used to notify clients when symbols can not be found during a lookup. @@ -205,12 +205,26 @@ public: /// symbols must be ones covered by this MaterializationResponsibility /// instance. Individual calls to this method may resolve a subset of the /// symbols, but all symbols must have been resolved prior to calling emit. - void notifyResolved(const SymbolMap &Symbols); + /// + /// This method will return an error if any symbols being resolved have been + /// moved to the error state due to the failure of a dependency. If this + /// method returns an error then clients should log it and call + /// failMaterialize. If no dependencies have been registered for the + /// symbols covered by this MaterializationResponsibiility then this method + /// is guaranteed to return Error::success() and can be wrapped with cantFail. + Error notifyResolved(const SymbolMap &Symbols); /// Notifies the target JITDylib (and any pending queries on that JITDylib) /// that all symbols covered by this MaterializationResponsibility instance /// have been emitted. - void notifyEmitted(); + /// + /// This method will return an error if any symbols being resolved have been + /// moved to the error state due to the failure of a dependency. If this + /// method returns an error then clients should log it and call + /// failMaterialize. If no dependencies have been registered for the + /// symbols covered by this MaterializationResponsibiility then this method + /// is guaranteed to return Error::success() and can be wrapped with cantFail. + Error notifyEmitted(); /// Adds new symbols to the JITDylib and this responsibility instance. /// JITDylib entries start out in the materializing state. @@ -346,7 +360,7 @@ private: /// inline std::unique_ptr absoluteSymbols(SymbolMap Symbols, VModuleKey K = VModuleKey()) { - return llvm::make_unique( + return std::make_unique( std::move(Symbols), std::move(K)); } @@ -390,7 +404,7 @@ private: /// \endcode inline std::unique_ptr symbolAliases(SymbolAliasMap Aliases, VModuleKey K = VModuleKey()) { - return llvm::make_unique( + return std::make_unique( nullptr, true, std::move(Aliases), std::move(K)); } @@ -402,7 +416,7 @@ symbolAliases(SymbolAliasMap Aliases, VModuleKey K = VModuleKey()) { inline std::unique_ptr reexports(JITDylib &SourceJD, SymbolAliasMap Aliases, bool MatchNonExported = false, VModuleKey K = VModuleKey()) { - return llvm::make_unique( + return std::make_unique( &SourceJD, MatchNonExported, std::move(Aliases), std::move(K)); } @@ -411,32 +425,13 @@ reexports(JITDylib &SourceJD, SymbolAliasMap Aliases, Expected buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols); -/// ReexportsGenerator can be used with JITDylib::setGenerator to automatically -/// re-export a subset of the source JITDylib's symbols in the target. -class ReexportsGenerator { -public: - using SymbolPredicate = std::function; - - /// Create a reexports generator. If an Allow predicate is passed, only - /// symbols for which the predicate returns true will be reexported. If no - /// Allow predicate is passed, all symbols will be exported. - ReexportsGenerator(JITDylib &SourceJD, bool MatchNonExported = false, - SymbolPredicate Allow = SymbolPredicate()); - - Expected operator()(JITDylib &JD, const SymbolNameSet &Names); - -private: - JITDylib &SourceJD; - bool MatchNonExported = false; - SymbolPredicate Allow; -}; - /// Represents the state that a symbol has reached during materialization. enum class SymbolState : uint8_t { Invalid, /// No symbol should be in this state. NeverSearched, /// Added to the symbol table, never queried. Materializing, /// Queried, materialization begun. Resolved, /// Assigned address, still materializing. + Emitted, /// Emitted to memory, but waiting on transitive dependencies. Ready = 0x3f /// Ready and safe for clients to access. }; @@ -502,8 +497,12 @@ class JITDylib { friend class ExecutionSession; friend class MaterializationResponsibility; public: - using GeneratorFunction = std::function( - JITDylib &Parent, const SymbolNameSet &Names)>; + class DefinitionGenerator { + public: + virtual ~DefinitionGenerator(); + virtual Expected + tryToGenerate(JITDylib &Parent, const SymbolNameSet &Names) = 0; + }; using AsynchronousSymbolQuerySet = std::set>; @@ -519,13 +518,20 @@ public: /// Get a reference to the ExecutionSession for this JITDylib. ExecutionSession &getExecutionSession() const { return ES; } - /// Set a definition generator. If set, whenever a symbol fails to resolve - /// within this JITDylib, lookup and lookupFlags will pass the unresolved - /// symbols set to the definition generator. The generator can optionally - /// add a definition for the unresolved symbols to the dylib. - void setGenerator(GeneratorFunction DefGenerator) { - this->DefGenerator = std::move(DefGenerator); - } + /// Adds a definition generator to this JITDylib and returns a referenece to + /// it. + /// + /// When JITDylibs are searched during lookup, if no existing definition of + /// a symbol is found, then any generators that have been added are run (in + /// the order that they were added) to potentially generate a definition. + template + GeneratorT &addGenerator(std::unique_ptr DefGenerator); + + /// Remove a definition generator from this JITDylib. + /// + /// The given generator must exist in this JITDylib's generators list (i.e. + /// have been added and not yet removed). + void removeGenerator(DefinitionGenerator &G); /// Set the search order to be used when fixing up definitions in JITDylib. /// This will replace the previous search order, and apply to any symbol @@ -633,17 +639,17 @@ private: struct MaterializingInfo { SymbolDependenceMap Dependants; SymbolDependenceMap UnemittedDependencies; - bool IsEmitted = false; void addQuery(std::shared_ptr Q); void removeQuery(const AsynchronousSymbolQuery &Q); AsynchronousSymbolQueryList takeQueriesMeeting(SymbolState RequiredState); - AsynchronousSymbolQueryList takeAllQueries(); + AsynchronousSymbolQueryList takeAllPendingQueries() { + return std::move(PendingQueries); + } bool hasQueriesPending() const { return !PendingQueries.empty(); } const AsynchronousSymbolQueryList &pendingQueries() const { return PendingQueries; } - private: AsynchronousSymbolQueryList PendingQueries; }; @@ -710,9 +716,9 @@ private: SymbolNameSet &Unresolved, bool MatchNonExported, MaterializationUnitList &MUs); - void lodgeQueryImpl(std::shared_ptr &Q, - SymbolNameSet &Unresolved, bool MatchNonExported, - MaterializationUnitList &MUs); + Error lodgeQueryImpl(std::shared_ptr &Q, + SymbolNameSet &Unresolved, bool MatchNonExported, + MaterializationUnitList &MUs); bool lookupImpl(std::shared_ptr &Q, std::vector> &MUs, @@ -734,18 +740,20 @@ private: void addDependencies(const SymbolStringPtr &Name, const SymbolDependenceMap &Dependants); - void resolve(const SymbolMap &Resolved); + Error resolve(const SymbolMap &Resolved); - void emit(const SymbolFlagsMap &Emitted); + Error emit(const SymbolFlagsMap &Emitted); - void notifyFailed(const SymbolNameSet &FailedSymbols); + using FailedSymbolsWorklist = + std::vector>; + static void notifyFailed(FailedSymbolsWorklist FailedSymbols); ExecutionSession &ES; std::string JITDylibName; SymbolTable Symbols; UnmaterializedInfosMap UnmaterializedInfos; MaterializingInfosMap MaterializingInfos; - GeneratorFunction DefGenerator; + std::vector> DefGenerators; JITDylibSearchList SearchOrder; }; @@ -933,6 +941,14 @@ private: OutstandingMUs; }; +template +GeneratorT &JITDylib::addGenerator(std::unique_ptr DefGenerator) { + auto &G = *DefGenerator; + ES.runSessionLocked( + [&]() { DefGenerators.push_back(std::move(DefGenerator)); }); + return G; +} + template auto JITDylib::withSearchOrderDo(Func &&F) -> decltype(F(std::declval())) { @@ -972,6 +988,27 @@ Error JITDylib::define(std::unique_ptr &MU) { }); } +/// ReexportsGenerator can be used with JITDylib::setGenerator to automatically +/// re-export a subset of the source JITDylib's symbols in the target. +class ReexportsGenerator : public JITDylib::DefinitionGenerator { +public: + using SymbolPredicate = std::function; + + /// Create a reexports generator. If an Allow predicate is passed, only + /// symbols for which the predicate returns true will be reexported. If no + /// Allow predicate is passed, all symbols will be exported. + ReexportsGenerator(JITDylib &SourceJD, bool MatchNonExported = false, + SymbolPredicate Allow = SymbolPredicate()); + + Expected tryToGenerate(JITDylib &JD, + const SymbolNameSet &Names) override; + +private: + JITDylib &SourceJD; + bool MatchNonExported = false; + SymbolPredicate Allow; +}; + /// Mangles symbol names then uniques them in the context of an /// ExecutionSession. class MangleAndInterner { diff --git a/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h index 75865920c74..cf0a428662e 100644 --- a/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h +++ b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h @@ -19,6 +19,7 @@ #include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/OrcError.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/Object/Archive.h" #include "llvm/Support/DynamicLibrary.h" #include #include @@ -37,6 +38,8 @@ class Value; namespace orc { +class ObjectLayer; + /// This iterator provides a convenient way to iterate over the elements /// of an llvm.global_ctors/llvm.global_dtors instance. /// @@ -237,7 +240,7 @@ public: /// If an instance of this class is attached to a JITDylib as a fallback /// definition generator, then any symbol found in the given DynamicLibrary that /// passes the 'Allow' predicate will be added to the JITDylib. -class DynamicLibrarySearchGenerator { +class DynamicLibrarySearchGenerator : public JITDylib::DefinitionGenerator { public: using SymbolPredicate = std::function; @@ -253,19 +256,20 @@ public: /// Permanently loads the library at the given path and, on success, returns /// a DynamicLibrarySearchGenerator that will search it for symbol definitions /// in the library. On failure returns the reason the library failed to load. - static Expected + static Expected> Load(const char *FileName, char GlobalPrefix, SymbolPredicate Allow = SymbolPredicate()); /// Creates a DynamicLibrarySearchGenerator that searches for symbols in /// the current process. - static Expected + static Expected> GetForCurrentProcess(char GlobalPrefix, SymbolPredicate Allow = SymbolPredicate()) { return Load(nullptr, GlobalPrefix, std::move(Allow)); } - Expected operator()(JITDylib &JD, const SymbolNameSet &Names); + Expected tryToGenerate(JITDylib &JD, + const SymbolNameSet &Names) override; private: sys::DynamicLibrary Dylib; @@ -273,6 +277,40 @@ private: char GlobalPrefix; }; +/// A utility class to expose symbols from a static library. +/// +/// If an instance of this class is attached to a JITDylib as a fallback +/// definition generator, then any symbol found in the archive will result in +/// the containing object being added to the JITDylib. +class StaticLibraryDefinitionGenerator : public JITDylib::DefinitionGenerator { +public: + /// Try to create a StaticLibraryDefinitionGenerator from the given path. + /// + /// This call will succeed if the file at the given path is a static library + /// is a valid archive, otherwise it will return an error. + static Expected> + Load(ObjectLayer &L, const char *FileName); + + /// Try to create a StaticLibrarySearchGenerator from the given memory buffer. + /// Thhis call will succeed if the buffer contains a valid archive, otherwise + /// it will return an error. + static Expected> + Create(ObjectLayer &L, std::unique_ptr ArchiveBuffer); + + Expected tryToGenerate(JITDylib &JD, + const SymbolNameSet &Names) override; + +private: + StaticLibraryDefinitionGenerator(ObjectLayer &L, + std::unique_ptr ArchiveBuffer, + Error &Err); + + ObjectLayer &L; + std::unique_ptr ArchiveBuffer; + object::Archive Archive; + size_t UnrealizedObjects = 0; +}; + } // end namespace orc } // end namespace llvm diff --git a/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h b/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h index 1b4c8b6cd95..b71e5b33971 100644 --- a/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h +++ b/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h @@ -22,6 +22,9 @@ namespace llvm { class Module; namespace orc { +/// A layer that applies a transform to emitted modules. +/// The transform function is responsible for locking the ThreadSafeContext +/// before operating on the module. class IRTransformLayer : public IRLayer { public: using TransformFunction = std::function( diff --git a/include/llvm/ExecutionEngine/Orc/LLJIT.h b/include/llvm/ExecutionEngine/Orc/LLJIT.h index 0aac1916423..b1e47d77557 100644 --- a/include/llvm/ExecutionEngine/Orc/LLJIT.h +++ b/include/llvm/ExecutionEngine/Orc/LLJIT.h @@ -184,8 +184,8 @@ private: class LLJITBuilderState { public: - using ObjectLinkingLayerCreator = - std::function(ExecutionSession &)>; + using ObjectLinkingLayerCreator = std::function( + ExecutionSession &, const Triple &TT)>; using CompileFunctionCreator = std::function( diff --git a/include/llvm/ExecutionEngine/Orc/LambdaResolver.h b/include/llvm/ExecutionEngine/Orc/LambdaResolver.h index 855e31b3354..b31914f12a0 100644 --- a/include/llvm/ExecutionEngine/Orc/LambdaResolver.h +++ b/include/llvm/ExecutionEngine/Orc/LambdaResolver.h @@ -16,6 +16,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/OrcV1Deprecation.h" #include namespace llvm { @@ -62,7 +63,7 @@ std::shared_ptr> createLambdaResolver(DylibLookupFtorT DylibLookupFtor, ExternalLookupFtorT ExternalLookupFtor) { using LR = LambdaResolver; - return make_unique(std::move(DylibLookupFtor), + return std::make_unique(std::move(DylibLookupFtor), std::move(ExternalLookupFtor)); } @@ -72,7 +73,7 @@ createLambdaResolver(ORCv1DeprecationAcknowledgement, DylibLookupFtorT DylibLookupFtor, ExternalLookupFtorT ExternalLookupFtor) { using LR = LambdaResolver; - return make_unique(AcknowledgeORCv1Deprecation, + return std::make_unique(AcknowledgeORCv1Deprecation, std::move(DylibLookupFtor), std::move(ExternalLookupFtor)); } diff --git a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h index 16202d89f86..b67a9feed52 100644 --- a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h @@ -49,28 +49,24 @@ private: switch (EmitState) { case NotEmitted: if (auto GV = searchGVs(Name, ExportedSymbolsOnly)) { - // Create a std::string version of Name to capture here - the argument - // (a StringRef) may go away before the lambda is executed. - // FIXME: Use capture-init when we move to C++14. - std::string PName = Name; JITSymbolFlags Flags = JITSymbolFlags::fromGlobalValue(*GV); - auto GetAddress = - [this, ExportedSymbolsOnly, PName, &B]() -> Expected { - if (this->EmitState == Emitting) - return 0; - else if (this->EmitState == NotEmitted) { - this->EmitState = Emitting; - if (auto Err = this->emitToBaseLayer(B)) - return std::move(Err); - this->EmitState = Emitted; - } - if (auto Sym = B.findSymbolIn(K, PName, ExportedSymbolsOnly)) - return Sym.getAddress(); - else if (auto Err = Sym.takeError()) + auto GetAddress = [this, ExportedSymbolsOnly, Name = Name.str(), + &B]() -> Expected { + if (this->EmitState == Emitting) + return 0; + else if (this->EmitState == NotEmitted) { + this->EmitState = Emitting; + if (auto Err = this->emitToBaseLayer(B)) return std::move(Err); - else - llvm_unreachable("Successful symbol lookup should return " - "definition address here"); + this->EmitState = Emitted; + } + if (auto Sym = B.findSymbolIn(K, Name, ExportedSymbolsOnly)) + return Sym.getAddress(); + else if (auto Err = Sym.takeError()) + return std::move(Err); + else + llvm_unreachable("Successful symbol lookup should return " + "definition address here"); }; return JITSymbol(std::move(GetAddress), Flags); } else @@ -171,7 +167,7 @@ private: bool ExportedSymbolsOnly) const { assert(!MangledSymbols && "Mangled symbols map already exists?"); - auto Symbols = llvm::make_unique>(); + auto Symbols = std::make_unique>(); Mangler Mang; @@ -209,7 +205,7 @@ public: Error addModule(VModuleKey K, std::unique_ptr M) { assert(!ModuleMap.count(K) && "VModuleKey K already in use"); ModuleMap[K] = - llvm::make_unique(std::move(K), std::move(M)); + std::make_unique(std::move(K), std::move(M)); return Error::success(); } diff --git a/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/include/llvm/ExecutionEngine/Orc/LazyReexports.h index 9fdd1d15f78..311ed59b154 100644 --- a/include/llvm/ExecutionEngine/Orc/LazyReexports.h +++ b/include/llvm/ExecutionEngine/Orc/LazyReexports.h @@ -18,6 +18,7 @@ #include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" +#include "llvm/ExecutionEngine/Orc/Speculation.h" namespace llvm { @@ -70,7 +71,7 @@ public: template static std::unique_ptr createNotifyResolvedFunction(NotifyResolvedImpl NotifyResolved) { - return llvm::make_unique>( + return std::make_unique>( std::move(NotifyResolved)); } @@ -159,7 +160,7 @@ public: IndirectStubsManager &ISManager, JITDylib &SourceJD, SymbolAliasMap CallableAliases, - VModuleKey K); + ImplSymbolMap *SrcJDLoc, VModuleKey K); StringRef getName() const override; @@ -174,6 +175,7 @@ private: SymbolAliasMap CallableAliases; std::shared_ptr NotifyResolved; + ImplSymbolMap *AliaseeTable; }; /// Define lazy-reexports based on the given SymbolAliasMap. Each lazy re-export @@ -182,9 +184,10 @@ private: inline std::unique_ptr lazyReexports(LazyCallThroughManager &LCTManager, IndirectStubsManager &ISManager, JITDylib &SourceJD, - SymbolAliasMap CallableAliases, VModuleKey K = VModuleKey()) { - return llvm::make_unique( - LCTManager, ISManager, SourceJD, std::move(CallableAliases), + SymbolAliasMap CallableAliases, ImplSymbolMap *SrcJDLoc = nullptr, + VModuleKey K = VModuleKey()) { + return std::make_unique( + LCTManager, ISManager, SourceJD, std::move(CallableAliases), SrcJDLoc, std::move(K)); } diff --git a/include/llvm/ExecutionEngine/Orc/Legacy.h b/include/llvm/ExecutionEngine/Orc/Legacy.h index f9cbbf6ff18..148e260c956 100644 --- a/include/llvm/ExecutionEngine/Orc/Legacy.h +++ b/include/llvm/ExecutionEngine/Orc/Legacy.h @@ -84,7 +84,7 @@ createSymbolResolver(GetResponsibilitySetFn &&GetResponsibilitySet, typename std::remove_reference::type>::type, typename std::remove_cv< typename std::remove_reference::type>::type>; - return llvm::make_unique( + return std::make_unique( std::forward(GetResponsibilitySet), std::forward(Lookup)); } diff --git a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index c1e7d27f446..caf8e707516 100644 --- a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -73,6 +73,9 @@ public: virtual Error notifyRemovingAllModules() { return Error::success(); } }; + using ReturnObjectBufferFunction = + std::function)>; + /// Construct an ObjectLinkingLayer with the given NotifyLoaded, /// and NotifyEmitted functors. ObjectLinkingLayer(ExecutionSession &ES, @@ -81,6 +84,13 @@ public: /// Destruct an ObjectLinkingLayer. ~ObjectLinkingLayer(); + /// Set an object buffer return function. By default object buffers are + /// deleted once the JIT has linked them. If a return function is set then + /// it will be called to transfer ownership of the buffer instead. + void setReturnObjectBuffer(ReturnObjectBufferFunction ReturnObjectBuffer) { + this->ReturnObjectBuffer = std::move(ReturnObjectBuffer); + } + /// Add a pass-config modifier. ObjectLinkingLayer &addPlugin(std::unique_ptr P) { std::lock_guard Lock(LayerMutex); @@ -138,6 +148,7 @@ private: jitlink::JITLinkMemoryManager &MemMgr; bool OverrideObjectFlags = false; bool AutoClaimObjectSymbols = false; + ReturnObjectBufferFunction ReturnObjectBuffer; DenseMap TrackedAllocs; std::vector UntrackedAllocs; std::vector> Plugins; @@ -153,10 +164,16 @@ public: Error notifyRemovingAllModules() override; private: + + struct EHFrameRange { + JITTargetAddress Addr = 0; + size_t Size; + }; + jitlink::EHFrameRegistrar &Registrar; - DenseMap InProcessLinks; - DenseMap TrackedEHFrameAddrs; - std::vector UntrackedEHFrameAddrs; + DenseMap InProcessLinks; + DenseMap TrackedEHFrameRanges; + std::vector UntrackedEHFrameRanges; }; } // end namespace orc diff --git a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h index 8b875b7906e..86e8d5df3ad 100644 --- a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h +++ b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h @@ -493,7 +493,7 @@ public: ExecutionSession &ES, JITTargetAddress ErrorHandlerAddress) : JITCompileCallbackManager( - llvm::make_unique(Client), ES, + std::make_unique(Client), ES, ErrorHandlerAddress) {} }; @@ -553,7 +553,7 @@ public: auto Id = IndirectStubOwnerIds.getNext(); if (auto Err = callB(Id)) return std::move(Err); - return llvm::make_unique(*this, Id); + return std::make_unique(*this, Id); } Expected diff --git a/include/llvm/ExecutionEngine/Orc/RPCSerialization.h b/include/llvm/ExecutionEngine/Orc/RPCSerialization.h index 07c7471afc6..752a0a34e0a 100644 --- a/include/llvm/ExecutionEngine/Orc/RPCSerialization.h +++ b/include/llvm/ExecutionEngine/Orc/RPCSerialization.h @@ -359,9 +359,9 @@ public: { assert(KeyName != nullptr && "No keyname pointer"); std::lock_guard Lock(SerializersMutex); - // FIXME: Move capture Serialize once we have C++14. Serializers[ErrorInfoT::classID()] = - [KeyName, Serialize](ChannelT &C, const ErrorInfoBase &EIB) -> Error { + [KeyName, Serialize = std::move(Serialize)]( + ChannelT &C, const ErrorInfoBase &EIB) -> Error { assert(EIB.dynamicClassID() == ErrorInfoT::classID() && "Serializer called for wrong error type"); if (auto Err = serializeSeq(C, *KeyName)) @@ -551,26 +551,26 @@ public: /// RPC channel serialization for std::tuple. static Error serialize(ChannelT &C, const std::tuple &V) { - return serializeTupleHelper(C, V, llvm::index_sequence_for()); + return serializeTupleHelper(C, V, std::index_sequence_for()); } /// RPC channel deserialization for std::tuple. static Error deserialize(ChannelT &C, std::tuple &V) { - return deserializeTupleHelper(C, V, llvm::index_sequence_for()); + return deserializeTupleHelper(C, V, std::index_sequence_for()); } private: // Serialization helper for std::tuple. template static Error serializeTupleHelper(ChannelT &C, const std::tuple &V, - llvm::index_sequence _) { + std::index_sequence _) { return serializeSeq(C, std::get(V)...); } // Serialization helper for std::tuple. template static Error deserializeTupleHelper(ChannelT &C, std::tuple &V, - llvm::index_sequence _) { + std::index_sequence _) { return deserializeSeq(C, std::get(V)...); } }; diff --git a/include/llvm/ExecutionEngine/Orc/RPCUtils.h b/include/llvm/ExecutionEngine/Orc/RPCUtils.h index 3b11e1b283d..ee9c2cc69c3 100644 --- a/include/llvm/ExecutionEngine/Orc/RPCUtils.h +++ b/include/llvm/ExecutionEngine/Orc/RPCUtils.h @@ -338,7 +338,9 @@ public: return Err; // Close the response message. - return C.endSendMessage(); + if (auto Err = C.endSendMessage()) + return Err; + return C.send(); } template @@ -350,7 +352,9 @@ public: return Err2; if (auto Err2 = serializeSeq(C, std::move(Err))) return Err2; - return C.endSendMessage(); + if (auto Err2 = C.endSendMessage()) + return Err2; + return C.send(); } }; @@ -378,8 +382,11 @@ public: C, *ResultOrErr)) return Err; - // Close the response message. - return C.endSendMessage(); + // End the response message. + if (auto Err = C.endSendMessage()) + return Err; + + return C.send(); } template @@ -389,7 +396,9 @@ public: return Err; if (auto Err2 = C.startSendMessage(ResponseId, SeqNo)) return Err2; - return C.endSendMessage(); + if (auto Err2 = C.endSendMessage()) + return Err2; + return C.send(); } }; @@ -502,7 +511,7 @@ public: static typename WrappedHandlerReturn::Type unpackAndRun(HandlerT &Handler, std::tuple &Args) { return unpackAndRunHelper(Handler, Args, - llvm::index_sequence_for()); + std::index_sequence_for()); } // Call the given handler with the given arguments. @@ -510,7 +519,7 @@ public: static Error unpackAndRunAsync(HandlerT &Handler, ResponderT &Responder, std::tuple &Args) { return unpackAndRunAsyncHelper(Handler, Responder, Args, - llvm::index_sequence_for()); + std::index_sequence_for()); } // Call the given handler with the given arguments. @@ -540,14 +549,13 @@ public: // Deserialize arguments from the channel. template static Error deserializeArgs(ChannelT &C, std::tuple &Args) { - return deserializeArgsHelper(C, Args, - llvm::index_sequence_for()); + return deserializeArgsHelper(C, Args, std::index_sequence_for()); } private: template static Error deserializeArgsHelper(ChannelT &C, std::tuple &Args, - llvm::index_sequence _) { + std::index_sequence _) { return SequenceSerialization::deserialize( C, std::get(Args)...); } @@ -556,18 +564,16 @@ private: static typename WrappedHandlerReturn< typename HandlerTraits::ReturnType>::Type unpackAndRunHelper(HandlerT &Handler, ArgTuple &Args, - llvm::index_sequence) { + std::index_sequence) { return run(Handler, std::move(std::get(Args))...); } - template static typename WrappedHandlerReturn< typename HandlerTraits::ReturnType>::Type unpackAndRunAsyncHelper(HandlerT &Handler, ResponderT &Responder, - ArgTuple &Args, - llvm::index_sequence) { + ArgTuple &Args, std::index_sequence) { return run(Handler, Responder, std::move(std::get(Args))...); } }; @@ -743,11 +749,15 @@ public: // to the user defined handler. Error handleResponse(ChannelT &C) override { Error Result = Error::success(); - if (auto Err = - SerializationTraits::deserialize(C, Result)) + if (auto Err = SerializationTraits::deserialize( + C, Result)) { + consumeError(std::move(Result)); return Err; - if (auto Err = C.endReceiveMessage()) + } + if (auto Err = C.endReceiveMessage()) { + consumeError(std::move(Result)); return Err; + } return Handler(std::move(Result)); } @@ -767,7 +777,7 @@ private: // Create a ResponseHandler from a given user handler. template std::unique_ptr> createResponseHandler(HandlerT H) { - return llvm::make_unique>( + return std::make_unique>( std::move(H)); } @@ -1403,14 +1413,12 @@ public: using ErrorReturn = typename RTraits::ErrorReturnType; using ErrorReturnPromise = typename RTraits::ReturnPromiseType; - // FIXME: Stack allocate and move this into the handler once LLVM builds - // with C++14. - auto Promise = std::make_shared(); - auto FutureResult = Promise->get_future(); + ErrorReturnPromise Promise; + auto FutureResult = Promise.get_future(); if (auto Err = this->template appendCallAsync( - [Promise](ErrorReturn RetOrErr) { - Promise->set_value(std::move(RetOrErr)); + [Promise = std::move(Promise)](ErrorReturn RetOrErr) mutable { + Promise.set_value(std::move(RetOrErr)); return Error::success(); }, Args...)) { @@ -1523,6 +1531,12 @@ public: return std::move(Err); } + if (auto Err = this->C.send()) { + detail::ResultTraits::consumeAbandoned( + std::move(Result)); + return std::move(Err); + } + while (!ReceivedResponse) { if (auto Err = this->handleOne()) { detail::ResultTraits::consumeAbandoned( @@ -1582,8 +1596,7 @@ public: // outstanding calls count, then poke the condition variable. using ArgType = typename detail::ResponseHandlerArg< typename detail::HandlerTraits::Type>::ArgType; - // FIXME: Move handler into wrapped handler once we have C++14. - auto WrappedHandler = [this, Handler](ArgType Arg) { + auto WrappedHandler = [this, Handler = std::move(Handler)](ArgType Arg) { auto Err = Handler(std::move(Arg)); std::unique_lock Lock(M); --NumOutstandingCalls; diff --git a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h index d9535ce5f21..c5106cf09ec 100644 --- a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h @@ -216,7 +216,7 @@ private: : K(std::move(K)), Parent(Parent), MemMgr(std::move(MemMgr)), - PFC(llvm::make_unique( + PFC(std::make_unique( std::move(Obj), std::move(Resolver), ProcessAllSections)) { buildInitialSymbolTable(PFC->Obj); @@ -234,7 +234,7 @@ private: JITSymbolResolverAdapter ResolverAdapter(Parent.ES, *PFC->Resolver, nullptr); - PFC->RTDyld = llvm::make_unique(*MemMgr, ResolverAdapter); + PFC->RTDyld = std::make_unique(*MemMgr, ResolverAdapter); PFC->RTDyld->setProcessAllSections(PFC->ProcessAllSections); Finalized = true; @@ -338,7 +338,7 @@ private: std::shared_ptr Resolver, bool ProcessAllSections) { using LOS = ConcreteLinkedObject; - return llvm::make_unique(Parent, std::move(K), std::move(Obj), + return std::make_unique(Parent, std::move(K), std::move(Obj), std::move(MemMgr), std::move(Resolver), ProcessAllSections); } diff --git a/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h b/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h index b87cf697a81..d7304cfcf93 100644 --- a/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h +++ b/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h @@ -137,17 +137,12 @@ protected: RemoteSymbolId Id) : C(C), Id(Id) {} - RemoteSymbolMaterializer(const RemoteSymbolMaterializer &Other) - : C(Other.C), Id(Other.Id) { - // FIXME: This is a horrible, auto_ptr-style, copy-as-move operation. - // It should be removed as soon as LLVM has C++14's generalized - // lambda capture (at which point the materializer can be moved - // into the lambda in remoteToJITSymbol below). - const_cast(Other).Id = 0; + RemoteSymbolMaterializer(RemoteSymbolMaterializer &&Other) + : C(Other.C), Id(Other.Id) { + Other.Id = 0; } - RemoteSymbolMaterializer& - operator=(const RemoteSymbolMaterializer&) = delete; + RemoteSymbolMaterializer &operator=(RemoteSymbolMaterializer &&) = delete; /// Release the remote symbol. ~RemoteSymbolMaterializer() { @@ -218,9 +213,9 @@ protected: return nullptr; // else... RemoteSymbolMaterializer RSM(*this, RemoteSym.first); - auto Sym = - JITSymbol([RSM]() mutable { return RSM.materialize(); }, - RemoteSym.second); + auto Sym = JITSymbol( + [RSM = std::move(RSM)]() mutable { return RSM.materialize(); }, + RemoteSym.second); return Sym; } else return RemoteSymOrErr.takeError(); @@ -472,7 +467,7 @@ private: } Expected addObject(std::string ObjBuffer) { - auto Buffer = llvm::make_unique(std::move(ObjBuffer)); + auto Buffer = std::make_unique(std::move(ObjBuffer)); auto Id = HandleIdMgr.getNext(); assert(!BaseLayerHandles.count(Id) && "Id already in use?"); diff --git a/include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h b/include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h new file mode 100644 index 00000000000..cf57b63b644 --- /dev/null +++ b/include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h @@ -0,0 +1,84 @@ +//===-- SpeculateAnalyses.h --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// \file +/// Contains the Analyses and Result Interpretation to select likely functions +/// to Speculatively compile before they are called. [Purely Experimentation] +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_SPECULATEANALYSES_H +#define LLVM_EXECUTIONENGINE_ORC_SPECULATEANALYSES_H + +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/Speculation.h" + +#include + +namespace llvm { + +namespace orc { + +// Provides common code. +class SpeculateQuery { +protected: + void findCalles(const BasicBlock *, DenseSet &); + bool isStraightLine(const Function &F); + +public: + using ResultTy = Optional>>; +}; + +// Direct calls in high frequency basic blocks are extracted. +class BlockFreqQuery : public SpeculateQuery { + size_t numBBToGet(size_t); + +public: + // Find likely next executables based on IR Block Frequency + ResultTy operator()(Function &F); +}; + +// This Query generates a sequence of basic blocks which follows the order of +// execution. +// A handful of BB with higher block frequencies are taken, then path to entry +// and end BB are discovered by traversing up & down the CFG. +class SequenceBBQuery : public SpeculateQuery { + struct WalkDirection { + bool Upward = true, Downward = true; + // the block associated contain a call + bool CallerBlock = false; + }; + +public: + using VisitedBlocksInfoTy = DenseMap; + using BlockListTy = SmallVector; + using BackEdgesInfoTy = + SmallVector, 8>; + using BlockFreqInfoTy = + SmallVector, 8>; + +private: + std::size_t getHottestBlocks(std::size_t TotalBlocks); + BlockListTy rearrangeBB(const Function &, const BlockListTy &); + BlockListTy queryCFG(Function &, const BlockListTy &); + void traverseToEntryBlock(const BasicBlock *, const BlockListTy &, + const BackEdgesInfoTy &, + const BranchProbabilityInfo *, + VisitedBlocksInfoTy &); + void traverseToExitBlock(const BasicBlock *, const BlockListTy &, + const BackEdgesInfoTy &, + const BranchProbabilityInfo *, + VisitedBlocksInfoTy &); + +public: + ResultTy operator()(Function &F); +}; + +} // namespace orc +} // namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_SPECULATEANALYSES_H diff --git a/include/llvm/ExecutionEngine/Orc/Speculation.h b/include/llvm/ExecutionEngine/Orc/Speculation.h new file mode 100644 index 00000000000..766a6b070f1 --- /dev/null +++ b/include/llvm/ExecutionEngine/Orc/Speculation.h @@ -0,0 +1,207 @@ +//===-- Speculation.h - Speculative Compilation --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Contains the definition to support speculative compilation when laziness is +// enabled. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_SPECULATION_H +#define LLVM_EXECUTIONENGINE_ORC_SPECULATION_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/Debug.h" + +#include +#include +#include +#include + +namespace llvm { +namespace orc { + +class Speculator; + +// Track the Impls (JITDylib,Symbols) of Symbols while lazy call through +// trampolines are created. Operations are guarded by locks tp ensure that Imap +// stays in consistent state after read/write + +class ImplSymbolMap { + friend class Speculator; + +public: + using AliaseeDetails = std::pair; + using Alias = SymbolStringPtr; + using ImapTy = DenseMap; + void trackImpls(SymbolAliasMap ImplMaps, JITDylib *SrcJD); + +private: + // FIX ME: find a right way to distinguish the pre-compile Symbols, and update + // the callsite + Optional getImplFor(const SymbolStringPtr &StubSymbol) { + std::lock_guard Lockit(ConcurrentAccess); + auto Position = Maps.find(StubSymbol); + if (Position != Maps.end()) + return Position->getSecond(); + else + return None; + } + + std::mutex ConcurrentAccess; + ImapTy Maps; +}; + +// Defines Speculator Concept, +class Speculator { +public: + using TargetFAddr = JITTargetAddress; + using FunctionCandidatesMap = DenseMap; + using StubAddrLikelies = DenseMap; + +private: + void registerSymbolsWithAddr(TargetFAddr ImplAddr, + SymbolNameSet likelySymbols) { + std::lock_guard Lockit(ConcurrentAccess); + GlobalSpecMap.insert({ImplAddr, std::move(likelySymbols)}); + } + + void launchCompile(JITTargetAddress FAddr) { + SymbolNameSet CandidateSet; + // Copy CandidateSet is necessary, to avoid unsynchronized access to + // the datastructure. + { + std::lock_guard Lockit(ConcurrentAccess); + auto It = GlobalSpecMap.find(FAddr); + if (It == GlobalSpecMap.end()) + return; + CandidateSet = It->getSecond(); + } + + SymbolDependenceMap SpeculativeLookUpImpls; + + for (auto &Callee : CandidateSet) { + auto ImplSymbol = AliaseeImplTable.getImplFor(Callee); + // try to distinguish already compiled & library symbols + if (!ImplSymbol.hasValue()) + continue; + const auto &ImplSymbolName = ImplSymbol.getPointer()->first; + JITDylib *ImplJD = ImplSymbol.getPointer()->second; + auto &SymbolsInJD = SpeculativeLookUpImpls[ImplJD]; + SymbolsInJD.insert(ImplSymbolName); + } + + DEBUG_WITH_TYPE("orc", for (auto &I + : SpeculativeLookUpImpls) { + llvm::dbgs() << "\n In " << I.first->getName() << " JITDylib "; + for (auto &N : I.second) + llvm::dbgs() << "\n Likely Symbol : " << N; + }); + + // for a given symbol, there may be no symbol qualified for speculatively + // compile try to fix this before jumping to this code if possible. + for (auto &LookupPair : SpeculativeLookUpImpls) + ES.lookup(JITDylibSearchList({{LookupPair.first, true}}), + LookupPair.second, SymbolState::Ready, + [this](Expected Result) { + if (auto Err = Result.takeError()) + ES.reportError(std::move(Err)); + }, + NoDependenciesToRegister); + } + +public: + Speculator(ImplSymbolMap &Impl, ExecutionSession &ref) + : AliaseeImplTable(Impl), ES(ref), GlobalSpecMap(0) {} + Speculator(const Speculator &) = delete; + Speculator(Speculator &&) = delete; + Speculator &operator=(const Speculator &) = delete; + Speculator &operator=(Speculator &&) = delete; + + /// Define symbols for this Speculator object (__orc_speculator) and the + /// speculation runtime entry point symbol (__orc_speculate_for) in the + /// given JITDylib. + Error addSpeculationRuntime(JITDylib &JD, MangleAndInterner &Mangle); + + // Speculatively compile likely functions for the given Stub Address. + // destination of __orc_speculate_for jump + void speculateFor(TargetFAddr StubAddr) { launchCompile(StubAddr); } + + // FIXME : Register with Stub Address, after JITLink Fix. + void registerSymbols(FunctionCandidatesMap Candidates, JITDylib *JD) { + for (auto &SymPair : Candidates) { + auto Target = SymPair.first; + auto Likely = SymPair.second; + + auto OnReadyFixUp = [Likely, Target, + this](Expected ReadySymbol) { + if (ReadySymbol) { + auto RAddr = (*ReadySymbol)[Target].getAddress(); + registerSymbolsWithAddr(RAddr, std::move(Likely)); + } else + this->getES().reportError(ReadySymbol.takeError()); + }; + // Include non-exported symbols also. + ES.lookup(JITDylibSearchList({{JD, true}}), SymbolNameSet({Target}), + SymbolState::Ready, OnReadyFixUp, NoDependenciesToRegister); + } + } + + ExecutionSession &getES() { return ES; } + +private: + static void speculateForEntryPoint(Speculator *Ptr, uint64_t StubId); + std::mutex ConcurrentAccess; + ImplSymbolMap &AliaseeImplTable; + ExecutionSession &ES; + StubAddrLikelies GlobalSpecMap; +}; + +class IRSpeculationLayer : public IRLayer { +public: + using IRlikiesStrRef = Optional>>; + using ResultEval = std::function; + using TargetAndLikelies = DenseMap; + + IRSpeculationLayer(ExecutionSession &ES, IRCompileLayer &BaseLayer, + Speculator &Spec, MangleAndInterner &Mangle, + ResultEval Interpreter) + : IRLayer(ES), NextLayer(BaseLayer), S(Spec), Mangle(Mangle), + QueryAnalysis(Interpreter) {} + + void emit(MaterializationResponsibility R, ThreadSafeModule TSM); + +private: + TargetAndLikelies + internToJITSymbols(DenseMap> IRNames) { + assert(!IRNames.empty() && "No IRNames received to Intern?"); + TargetAndLikelies InternedNames; + DenseSet TargetJITNames; + for (auto &NamePair : IRNames) { + for (auto &TargetNames : NamePair.second) + TargetJITNames.insert(Mangle(TargetNames)); + + InternedNames[Mangle(NamePair.first)] = std::move(TargetJITNames); + } + return InternedNames; + } + + IRCompileLayer &NextLayer; + Speculator &S; + MangleAndInterner &Mangle; + ResultEval QueryAnalysis; +}; + +} // namespace orc +} // namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_SPECULATION_H diff --git a/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h b/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h index 5787500387c..2347faed37a 100644 --- a/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h +++ b/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h @@ -38,17 +38,12 @@ private: public: // RAII based lock for ThreadSafeContext. class LLVM_NODISCARD Lock { - private: - using UnderlyingLock = std::lock_guard; - public: - Lock(std::shared_ptr S) - : S(std::move(S)), - L(llvm::make_unique(this->S->Mutex)) {} + Lock(std::shared_ptr S) : S(std::move(S)), L(this->S->Mutex) {} private: std::shared_ptr S; - std::unique_ptr L; + std::unique_lock L; }; /// Construct a null context. @@ -69,7 +64,7 @@ public: /// instance, or null if the instance was default constructed. const LLVMContext *getContext() const { return S ? S->Ctx.get() : nullptr; } - Lock getLock() { + Lock getLock() const { assert(S && "Can not lock an empty ThreadSafeContext"); return Lock(S); } @@ -95,7 +90,7 @@ public: // We also need to lock the context to make sure the module tear-down // does not overlap any other work on the context. if (M) { - auto L = getContextLock(); + auto L = TSCtx.getLock(); M = nullptr; } M = std::move(Other.M); @@ -117,23 +112,14 @@ public: ~ThreadSafeModule() { // We need to lock the context while we destruct the module. if (M) { - auto L = getContextLock(); + auto L = TSCtx.getLock(); M = nullptr; } } - /// Get the module wrapped by this ThreadSafeModule. - Module *getModule() { return M.get(); } - - /// Get the module wrapped by this ThreadSafeModule. - const Module *getModule() const { return M.get(); } - - /// Take out a lock on the ThreadSafeContext for this module. - ThreadSafeContext::Lock getContextLock() { return TSCtx.getLock(); } - /// Boolean conversion: This ThreadSafeModule will evaluate to true if it /// wraps a non-null module. - explicit operator bool() { + explicit operator bool() const { if (M) { assert(TSCtx.getContext() && "Non-null module must have non-null context"); @@ -142,6 +128,33 @@ public: return false; } + /// Locks the associated ThreadSafeContext and calls the given function + /// on the contained Module. + template + auto withModuleDo(Func &&F) -> decltype(F(std::declval())) { + assert(M && "Can not call on null module"); + auto Lock = TSCtx.getLock(); + return F(*M); + } + + /// Locks the associated ThreadSafeContext and calls the given function + /// on the contained Module. + template + auto withModuleDo(Func &&F) const + -> decltype(F(std::declval())) { + auto Lock = TSCtx.getLock(); + return F(*M); + } + + /// Get a raw pointer to the contained module without locking the context. + Module *getModuleUnlocked() { return M.get(); } + + /// Get a raw pointer to the contained module without locking the context. + const Module *getModuleUnlocked() const { return M.get(); } + + /// Returns the context for this ThreadSafeModule. + ThreadSafeContext getContext() const { return TSCtx; } + private: std::unique_ptr M; ThreadSafeContext TSCtx; diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h index b2b4eba4707..ce7024a7f19 100644 --- a/include/llvm/ExecutionEngine/RuntimeDyld.h +++ b/include/llvm/ExecutionEngine/RuntimeDyld.h @@ -13,6 +13,7 @@ #ifndef LLVM_EXECUTIONENGINE_RUNTIMEDYLD_H #define LLVM_EXECUTIONENGINE_RUNTIMEDYLD_H +#include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/DIContext.h" @@ -271,10 +272,10 @@ private: std::unique_ptr UnderlyingBuffer, RuntimeDyld::MemoryManager &MemMgr, JITSymbolResolver &Resolver, bool ProcessAllSections, - std::function, - std::map)> + unique_function, + std::map)> OnLoaded, - std::function OnEmitted); + unique_function OnEmitted); // RuntimeDyldImpl is the actual class. RuntimeDyld is just the public // interface. @@ -291,14 +292,14 @@ private: // but ORC's RTDyldObjectLinkingLayer2. Internally it constructs a RuntimeDyld // instance and uses continuation passing to perform the fix-up and finalize // steps asynchronously. -void jitLinkForORC(object::ObjectFile &Obj, - std::unique_ptr UnderlyingBuffer, - RuntimeDyld::MemoryManager &MemMgr, - JITSymbolResolver &Resolver, bool ProcessAllSections, - std::function, - std::map)> - OnLoaded, - std::function OnEmitted); +void jitLinkForORC( + object::ObjectFile &Obj, std::unique_ptr UnderlyingBuffer, + RuntimeDyld::MemoryManager &MemMgr, JITSymbolResolver &Resolver, + bool ProcessAllSections, + unique_function, + std::map)> + OnLoaded, + unique_function OnEmitted); } // end namespace llvm diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h index 06cc09e1cfc..e6b280465f7 100644 --- a/include/llvm/IR/Attributes.h +++ b/include/llvm/IR/Attributes.h @@ -22,6 +22,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/PointerLikeTypeTraits.h" #include #include @@ -94,8 +95,8 @@ public: /// Return a uniquified Attribute object that has the specific /// alignment set. - static Attribute getWithAlignment(LLVMContext &Context, uint64_t Align); - static Attribute getWithStackAlignment(LLVMContext &Context, uint64_t Align); + static Attribute getWithAlignment(LLVMContext &Context, Align Alignment); + static Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment); static Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes); static Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context, @@ -150,11 +151,11 @@ public: /// Returns the alignment field of an attribute as a byte alignment /// value. - unsigned getAlignment() const; + MaybeAlign getAlignment() const; /// Returns the stack alignment field of an attribute as a byte /// alignment value. - unsigned getStackAlignment() const; + MaybeAlign getStackAlignment() const; /// Returns the number of dereferenceable bytes from the /// dereferenceable attribute. @@ -284,8 +285,8 @@ public: /// Return the target-dependent attribute object. Attribute getAttribute(StringRef Kind) const; - unsigned getAlignment() const; - unsigned getStackAlignment() const; + MaybeAlign getAlignment() const; + MaybeAlign getStackAlignment() const; uint64_t getDereferenceableBytes() const; uint64_t getDereferenceableOrNullBytes() const; Type *getByValType() const; @@ -603,16 +604,16 @@ public: } /// Return the alignment of the return value. - unsigned getRetAlignment() const; + MaybeAlign getRetAlignment() const; /// Return the alignment for the specified function parameter. - unsigned getParamAlignment(unsigned ArgNo) const; + MaybeAlign getParamAlignment(unsigned ArgNo) const; /// Return the byval type for the specified function parameter. Type *getParamByValType(unsigned ArgNo) const; /// Get the stack alignment. - unsigned getStackAlignment(unsigned Index) const; + MaybeAlign getStackAlignment(unsigned Index) const; /// Get the number of dereferenceable bytes (or zero if unknown). uint64_t getDereferenceableBytes(unsigned Index) const; @@ -704,9 +705,9 @@ template <> struct DenseMapInfo { /// equality, presence of attributes, etc. class AttrBuilder { std::bitset Attrs; - std::map TargetDepAttrs; - uint64_t Alignment = 0; - uint64_t StackAlignment = 0; + std::map> TargetDepAttrs; + MaybeAlign Alignment; + MaybeAlign StackAlignment; uint64_t DerefBytes = 0; uint64_t DerefOrNullBytes = 0; uint64_t AllocSizeArgs = 0; @@ -773,10 +774,10 @@ public: bool hasAlignmentAttr() const; /// Retrieve the alignment attribute, if it exists. - uint64_t getAlignment() const { return Alignment; } + MaybeAlign getAlignment() const { return Alignment; } /// Retrieve the stack alignment attribute, if it exists. - uint64_t getStackAlignment() const { return StackAlignment; } + MaybeAlign getStackAlignment() const { return StackAlignment; } /// Retrieve the number of dereferenceable bytes, if the /// dereferenceable attribute exists (zero is returned otherwise). @@ -793,13 +794,29 @@ public: /// doesn't exist, pair(0, 0) is returned. std::pair> getAllocSizeArgs() const; + /// This turns an alignment into the form used internally in Attribute. + /// This call has no effect if Align is not set. + AttrBuilder &addAlignmentAttr(MaybeAlign Align); + /// This turns an int alignment (which must be a power of 2) into the /// form used internally in Attribute. - AttrBuilder &addAlignmentAttr(unsigned Align); + /// This call has no effect if Align is 0. + /// Deprecated, use the version using a MaybeAlign. + inline AttrBuilder &addAlignmentAttr(unsigned Align) { + return addAlignmentAttr(MaybeAlign(Align)); + } + + /// This turns a stack alignment into the form used internally in Attribute. + /// This call has no effect if Align is not set. + AttrBuilder &addStackAlignmentAttr(MaybeAlign Align); /// This turns an int stack alignment (which must be a power of 2) into /// the form used internally in Attribute. - AttrBuilder &addStackAlignmentAttr(unsigned Align); + /// This call has no effect if Align is 0. + /// Deprecated, use the version using a MaybeAlign. + inline AttrBuilder &addStackAlignmentAttr(unsigned Align) { + return addStackAlignmentAttr(MaybeAlign(Align)); + } /// This turns the number of dereferenceable bytes into the form used /// internally in Attribute. diff --git a/include/llvm/IR/AutoUpgrade.h b/include/llvm/IR/AutoUpgrade.h index 017ad93d8a2..66f38e5b55d 100644 --- a/include/llvm/IR/AutoUpgrade.h +++ b/include/llvm/IR/AutoUpgrade.h @@ -54,9 +54,9 @@ namespace llvm { /// module is modified. bool UpgradeModuleFlags(Module &M); - /// This checks for objc retain release marker which should be upgraded. It - /// returns true if module is modified. - bool UpgradeRetainReleaseMarker(Module &M); + /// Convert calls to ARC runtime functions to intrinsic calls and upgrade the + /// old retain release marker to new module flag format. + void UpgradeARCRuntime(Module &M); void UpgradeSectionAttributes(Module &M); @@ -87,6 +87,10 @@ namespace llvm { /// Upgrade the loop attachment metadata node. MDNode *upgradeInstructionLoopAttachment(MDNode &N); + /// Upgrade the datalayout string by adding a section for address space + /// pointers. + std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple); + } // End llvm namespace #endif diff --git a/include/llvm/IR/BasicBlock.h b/include/llvm/IR/BasicBlock.h index 69555af50e1..d594145f863 100644 --- a/include/llvm/IR/BasicBlock.h +++ b/include/llvm/IR/BasicBlock.h @@ -192,6 +192,11 @@ public: std::function>> instructionsWithoutDebug(); + /// Return the size of the basic block ignoring debug instructions + filter_iterator>::difference_type + sizeWithoutDebug() const; + /// Unlink 'this' from the containing function, but do not delete it. void removeFromParent(); diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h index b47a96c5d5f..13b1ae8d0e3 100644 --- a/include/llvm/IR/CallSite.h +++ b/include/llvm/IR/CallSite.h @@ -854,6 +854,15 @@ public: return CI.ParameterEncoding[0]; } + /// Return the use of the callee value in the underlying instruction. Only + /// valid for callback calls! + const Use &getCalleeUseForCallback() const { + int CalleeArgIdx = getCallArgOperandNoForCallee(); + assert(CalleeArgIdx >= 0 && + unsigned(CalleeArgIdx) < getInstruction()->getNumOperands()); + return getInstruction()->getOperandUse(CalleeArgIdx); + } + /// Return the pointer to function that is being called. Value *getCalledValue() const { if (isDirectCall()) diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h index 399c6ad521f..c1c979c2e2a 100644 --- a/include/llvm/IR/CallingConv.h +++ b/include/llvm/IR/CallingConv.h @@ -75,6 +75,11 @@ namespace CallingConv { // CXX_FAST_TLS - Calling convention for access functions. CXX_FAST_TLS = 17, + /// Tail - This calling convention attemps to make calls as fast as + /// possible while guaranteeing that tail call optimization can always + /// be performed. + Tail = 18, + // Target - This is the start of the target-specific calling conventions, // e.g. fastcall and thiscall on X86. FirstTargetCC = 64, @@ -222,6 +227,14 @@ namespace CallingConv { // Calling convention between AArch64 Advanced SIMD functions AArch64_VectorCall = 97, + /// Calling convention between AArch64 SVE functions + AArch64_SVE_VectorCall = 98, + + /// Calling convention for emscripten __invoke_* functions. The first + /// argument is required to be the function ptr being indirectly called. + /// The remainder matches the regular calling convention. + WASM_EmscriptenInvoke = 99, + /// The highest possible calling convention ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/include/llvm/IR/Constant.h b/include/llvm/IR/Constant.h index 93157665122..2b6a6e4141b 100644 --- a/include/llvm/IR/Constant.h +++ b/include/llvm/IR/Constant.h @@ -86,6 +86,12 @@ public: /// floating-point constant with all NaN elements. bool isNaN() const; + /// Return true if this constant and a constant 'Y' are element-wise equal. + /// This is identical to just comparing the pointers, with the exception that + /// for vectors, if only one of the constants has an `undef` element in some + /// lane, the constants still match. + bool isElementWiseEqual(Value *Y) const; + /// Return true if this is a vector constant that includes any undefined /// elements. bool containsUndefElement() const; diff --git a/include/llvm/IR/ConstantRange.h b/include/llvm/IR/ConstantRange.h index 91f3f31abe1..964f9e8e9bc 100644 --- a/include/llvm/IR/ConstantRange.h +++ b/include/llvm/IR/ConstantRange.h @@ -330,9 +330,13 @@ public: /// from an addition of a value in this range and a value in \p Other. ConstantRange add(const ConstantRange &Other) const; - /// Return a new range representing the possible values resulting from a - /// known NSW addition of a value in this range and \p Other constant. - ConstantRange addWithNoSignedWrap(const APInt &Other) const; + /// Return a new range representing the possible values resulting + /// from an addition with wrap type \p NoWrapKind of a value in this + /// range and a value in \p Other. + /// If the result range is disjoint, the preferred range is determined by the + /// \p PreferredRangeType. + ConstantRange addWithNoWrap(const ConstantRange &Other, unsigned NoWrapKind, + PreferredRangeType RangeType = Smallest) const; /// Return a new range representing the possible values resulting /// from a subtraction of a value in this range and a value in \p Other. diff --git a/include/llvm/IR/DataLayout.h b/include/llvm/IR/DataLayout.h index ac9770a1512..85093dd218f 100644 --- a/include/llvm/IR/DataLayout.h +++ b/include/llvm/IR/DataLayout.h @@ -25,10 +25,11 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/TypeSize.h" #include #include #include @@ -71,11 +72,11 @@ struct LayoutAlignElem { /// Alignment type from \c AlignTypeEnum unsigned AlignType : 8; unsigned TypeBitWidth : 24; - unsigned ABIAlign : 16; - unsigned PrefAlign : 16; + Align ABIAlign; + Align PrefAlign; - static LayoutAlignElem get(AlignTypeEnum align_type, unsigned abi_align, - unsigned pref_align, uint32_t bit_width); + static LayoutAlignElem get(AlignTypeEnum align_type, Align abi_align, + Align pref_align, uint32_t bit_width); bool operator==(const LayoutAlignElem &rhs) const; }; @@ -87,15 +88,15 @@ struct LayoutAlignElem { /// \note The unusual order of elements in the structure attempts to reduce /// padding and make the structure slightly more cache friendly. struct PointerAlignElem { - unsigned ABIAlign; - unsigned PrefAlign; + Align ABIAlign; + Align PrefAlign; uint32_t TypeByteWidth; uint32_t AddressSpace; uint32_t IndexWidth; /// Initializer - static PointerAlignElem get(uint32_t AddressSpace, unsigned ABIAlign, - unsigned PrefAlign, uint32_t TypeByteWidth, + static PointerAlignElem get(uint32_t AddressSpace, Align ABIAlign, + Align PrefAlign, uint32_t TypeByteWidth, uint32_t IndexWidth); bool operator==(const PointerAlignElem &rhs) const; @@ -120,10 +121,10 @@ private: bool BigEndian; unsigned AllocaAddrSpace; - unsigned StackNaturalAlign; + MaybeAlign StackNaturalAlign; unsigned ProgramAddrSpace; - unsigned FunctionPtrAlign; + MaybeAlign FunctionPtrAlign; FunctionPtrAlignType TheFunctionPtrAlignType; enum ManglingModeT { @@ -172,16 +173,15 @@ private: /// well-defined bitwise representation. SmallVector NonIntegralAddressSpaces; - void setAlignment(AlignTypeEnum align_type, unsigned abi_align, - unsigned pref_align, uint32_t bit_width); - unsigned getAlignmentInfo(AlignTypeEnum align_type, uint32_t bit_width, - bool ABIAlign, Type *Ty) const; - void setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign, - unsigned PrefAlign, uint32_t TypeByteWidth, - uint32_t IndexWidth); + void setAlignment(AlignTypeEnum align_type, Align abi_align, Align pref_align, + uint32_t bit_width); + Align getAlignmentInfo(AlignTypeEnum align_type, uint32_t bit_width, + bool ABIAlign, Type *Ty) const; + void setPointerAlignment(uint32_t AddrSpace, Align ABIAlign, Align PrefAlign, + uint32_t TypeByteWidth, uint32_t IndexWidth); /// Internal helper method that returns requested alignment for type. - unsigned getAlignment(Type *Ty, bool abi_or_pref) const; + Align getAlignment(Type *Ty, bool abi_or_pref) const; /// Parses a target data specification string. Assert if the string is /// malformed. @@ -261,17 +261,21 @@ public: bool isIllegalInteger(uint64_t Width) const { return !isLegalInteger(Width); } /// Returns true if the given alignment exceeds the natural stack alignment. - bool exceedsNaturalStackAlignment(unsigned Align) const { - return (StackNaturalAlign != 0) && (Align > StackNaturalAlign); + bool exceedsNaturalStackAlignment(Align Alignment) const { + return StackNaturalAlign && (Alignment > StackNaturalAlign); + } + + Align getStackAlignment() const { + assert(StackNaturalAlign && "StackNaturalAlign must be defined"); + return *StackNaturalAlign; } - unsigned getStackAlignment() const { return StackNaturalAlign; } unsigned getAllocaAddrSpace() const { return AllocaAddrSpace; } /// Returns the alignment of function pointers, which may or may not be /// related to the alignment of functions. /// \see getFunctionPtrAlignType - unsigned getFunctionPtrAlign() const { return FunctionPtrAlign; } + MaybeAlign getFunctionPtrAlign() const { return FunctionPtrAlign; } /// Return the type of function pointer alignment. /// \see getFunctionPtrAlign @@ -344,12 +348,12 @@ public: } /// Layout pointer alignment - unsigned getPointerABIAlignment(unsigned AS) const; + Align getPointerABIAlignment(unsigned AS) const; /// Return target's alignment for stack-based pointers /// FIXME: The defaults need to be removed once all of /// the backends/clients are updated. - unsigned getPointerPrefAlignment(unsigned AS = 0) const; + Align getPointerPrefAlignment(unsigned AS = 0) const; /// Layout pointer size /// FIXME: The defaults need to be removed once all of @@ -433,23 +437,33 @@ public: /// Returns the number of bits necessary to hold the specified type. /// + /// If Ty is a scalable vector type, the scalable property will be set and + /// the runtime size will be a positive integer multiple of the base size. + /// /// For example, returns 36 for i36 and 80 for x86_fp80. The type passed must /// have a size (Type::isSized() must return true). - uint64_t getTypeSizeInBits(Type *Ty) const; + TypeSize getTypeSizeInBits(Type *Ty) const; /// Returns the maximum number of bytes that may be overwritten by /// storing the specified type. /// + /// If Ty is a scalable vector type, the scalable property will be set and + /// the runtime size will be a positive integer multiple of the base size. + /// /// For example, returns 5 for i36 and 10 for x86_fp80. - uint64_t getTypeStoreSize(Type *Ty) const { - return (getTypeSizeInBits(Ty) + 7) / 8; + TypeSize getTypeStoreSize(Type *Ty) const { + TypeSize BaseSize = getTypeSizeInBits(Ty); + return { (BaseSize.getKnownMinSize() + 7) / 8, BaseSize.isScalable() }; } /// Returns the maximum number of bits that may be overwritten by /// storing the specified type; always a multiple of 8. /// + /// If Ty is a scalable vector type, the scalable property will be set and + /// the runtime size will be a positive integer multiple of the base size. + /// /// For example, returns 40 for i36 and 80 for x86_fp80. - uint64_t getTypeStoreSizeInBits(Type *Ty) const { + TypeSize getTypeStoreSizeInBits(Type *Ty) const { return 8 * getTypeStoreSize(Ty); } @@ -464,9 +478,12 @@ public: /// Returns the offset in bytes between successive objects of the /// specified type, including alignment padding. /// + /// If Ty is a scalable vector type, the scalable property will be set and + /// the runtime size will be a positive integer multiple of the base size. + /// /// This is the amount that alloca reserves for this type. For example, /// returns 12 or 16 for x86_fp80, depending on alignment. - uint64_t getTypeAllocSize(Type *Ty) const { + TypeSize getTypeAllocSize(Type *Ty) const { // Round up to the next alignment boundary. return alignTo(getTypeStoreSize(Ty), getABITypeAlignment(Ty)); } @@ -474,18 +491,28 @@ public: /// Returns the offset in bits between successive objects of the /// specified type, including alignment padding; always a multiple of 8. /// + /// If Ty is a scalable vector type, the scalable property will be set and + /// the runtime size will be a positive integer multiple of the base size. + /// /// This is the amount that alloca reserves for this type. For example, /// returns 96 or 128 for x86_fp80, depending on alignment. - uint64_t getTypeAllocSizeInBits(Type *Ty) const { + TypeSize getTypeAllocSizeInBits(Type *Ty) const { return 8 * getTypeAllocSize(Ty); } /// Returns the minimum ABI-required alignment for the specified type. unsigned getABITypeAlignment(Type *Ty) const; + /// Helper function to return `Alignment` if it's set or the result of + /// `getABITypeAlignment(Ty)`, in any case the result is a valid alignment. + inline Align getValueOrABITypeAlignment(MaybeAlign Alignment, + Type *Ty) const { + return Alignment ? *Alignment : Align(getABITypeAlignment(Ty)); + } + /// Returns the minimum ABI-required alignment for an integer type of /// the specified bitwidth. - unsigned getABIIntegerTypeAlignment(unsigned BitWidth) const; + Align getABIIntegerTypeAlignment(unsigned BitWidth) const; /// Returns the preferred stack/global alignment for the specified /// type. @@ -493,10 +520,6 @@ public: /// This is always at least as good as the ABI alignment. unsigned getPrefTypeAlignment(Type *Ty) const; - /// Returns the preferred alignment for the specified type, returned as - /// log2 of the value (a shift amount). - unsigned getPreferredTypeAlignmentShift(Type *Ty) const; - /// Returns an integer type with size at least as big as that of a /// pointer in the given address space. IntegerType *getIntPtrType(LLVMContext &C, unsigned AddressSpace = 0) const; @@ -561,7 +584,7 @@ inline LLVMTargetDataRef wrap(const DataLayout *P) { /// based on the DataLayout structure. class StructLayout { uint64_t StructSize; - unsigned StructAlignment; + Align StructAlignment; unsigned IsPadded : 1; unsigned NumElements : 31; uint64_t MemberOffsets[1]; // variable sized array! @@ -571,7 +594,7 @@ public: uint64_t getSizeInBits() const { return 8 * StructSize; } - unsigned getAlignment() const { return StructAlignment; } + Align getAlignment() const { return StructAlignment; } /// Returns whether the struct has padding or not between its fields. /// NB: Padding in nested element is not taken into account. @@ -598,13 +621,13 @@ private: // The implementation of this method is provided inline as it is particularly // well suited to constant folding when called on a specific Type subclass. -inline uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const { +inline TypeSize DataLayout::getTypeSizeInBits(Type *Ty) const { assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); switch (Ty->getTypeID()) { case Type::LabelTyID: - return getPointerSizeInBits(0); + return TypeSize::Fixed(getPointerSizeInBits(0)); case Type::PointerTyID: - return getPointerSizeInBits(Ty->getPointerAddressSpace()); + return TypeSize::Fixed(getPointerSizeInBits(Ty->getPointerAddressSpace())); case Type::ArrayTyID: { ArrayType *ATy = cast(Ty); return ATy->getNumElements() * @@ -612,26 +635,30 @@ inline uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const { } case Type::StructTyID: // Get the layout annotation... which is lazily created on demand. - return getStructLayout(cast(Ty))->getSizeInBits(); + return TypeSize::Fixed( + getStructLayout(cast(Ty))->getSizeInBits()); case Type::IntegerTyID: - return Ty->getIntegerBitWidth(); + return TypeSize::Fixed(Ty->getIntegerBitWidth()); case Type::HalfTyID: - return 16; + return TypeSize::Fixed(16); case Type::FloatTyID: - return 32; + return TypeSize::Fixed(32); case Type::DoubleTyID: case Type::X86_MMXTyID: - return 64; + return TypeSize::Fixed(64); case Type::PPC_FP128TyID: case Type::FP128TyID: - return 128; + return TypeSize::Fixed(128); // In memory objects this is always aligned to a higher boundary, but // only 80 bits contain information. case Type::X86_FP80TyID: - return 80; + return TypeSize::Fixed(80); case Type::VectorTyID: { VectorType *VTy = cast(Ty); - return VTy->getNumElements() * getTypeSizeInBits(VTy->getElementType()); + auto EltCnt = VTy->getElementCount(); + uint64_t MinBits = EltCnt.Min * + getTypeSizeInBits(VTy->getElementType()).getFixedSize(); + return TypeSize(MinBits, EltCnt.Scalable); } default: llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type"); diff --git a/include/llvm/IR/DebugInfoFlags.def b/include/llvm/IR/DebugInfoFlags.def index 07e3d6bdc9e..f90c580f10e 100644 --- a/include/llvm/IR/DebugInfoFlags.def +++ b/include/llvm/IR/DebugInfoFlags.def @@ -31,7 +31,8 @@ HANDLE_DI_FLAG(2, Protected) HANDLE_DI_FLAG(3, Public) HANDLE_DI_FLAG((1 << 2), FwdDecl) HANDLE_DI_FLAG((1 << 3), AppleBlock) -HANDLE_DI_FLAG((1 << 4), BlockByrefStruct) +// Used to be BlockByRef, can be reused for anything except DICompositeType. +HANDLE_DI_FLAG((1 << 4), ReservedBit4) HANDLE_DI_FLAG((1 << 5), Virtual) HANDLE_DI_FLAG((1 << 6), Artificial) HANDLE_DI_FLAG((1 << 7), Explicit) @@ -42,8 +43,7 @@ HANDLE_DI_FLAG((1 << 11), Vector) HANDLE_DI_FLAG((1 << 12), StaticMember) HANDLE_DI_FLAG((1 << 13), LValueReference) HANDLE_DI_FLAG((1 << 14), RValueReference) -// 15 was formerly ExternalTypeRef, but this was never used. -HANDLE_DI_FLAG((1 << 15), Reserved) +HANDLE_DI_FLAG((1 << 15), ExportSymbols) HANDLE_DI_FLAG((1 << 16), SingleInheritance) HANDLE_DI_FLAG((2 << 16), MultipleInheritance) HANDLE_DI_FLAG((3 << 16), VirtualInheritance) diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h index 9dc6dfbb0f6..28a59576b7c 100644 --- a/include/llvm/IR/DebugInfoMetadata.h +++ b/include/llvm/IR/DebugInfoMetadata.h @@ -650,7 +650,6 @@ public: } bool isForwardDecl() const { return getFlags() & FlagFwdDecl; } bool isAppleBlockExtension() const { return getFlags() & FlagAppleBlock; } - bool isBlockByrefStruct() const { return getFlags() & FlagBlockByrefStruct; } bool isVirtual() const { return getFlags() & FlagVirtual; } bool isArtificial() const { return getFlags() & FlagArtificial; } bool isObjectPointer() const { return getFlags() & FlagObjectPointer; } @@ -668,6 +667,7 @@ public: } bool isBigEndian() const { return getFlags() & FlagBigEndian; } bool isLittleEndian() const { return getFlags() & FlagLittleEndian; } + bool getExportSymbols() const { return getFlags() & FlagExportSymbols; } static bool classof(const Metadata *MD) { switch (MD->getMetadataID()) { @@ -2569,7 +2569,7 @@ public: /// (This is the only configuration of entry values that is supported.) bool isEntryValue() const { return getNumElements() > 0 && - getElement(0) == dwarf::DW_OP_entry_value; + getElement(0) == dwarf::DW_OP_LLVM_entry_value; } }; diff --git a/include/llvm/IR/DerivedTypes.h b/include/llvm/IR/DerivedTypes.h index 3c1d4278905..20097ef3f31 100644 --- a/include/llvm/IR/DerivedTypes.h +++ b/include/llvm/IR/DerivedTypes.h @@ -23,7 +23,7 @@ #include "llvm/IR/Type.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/ScalableSize.h" +#include "llvm/Support/TypeSize.h" #include #include @@ -62,6 +62,11 @@ public: /// Get or create an IntegerType instance. static IntegerType *get(LLVMContext &C, unsigned NumBits); + /// Returns type twice as wide the input type. + IntegerType *getExtendedType() const { + return Type::getIntNTy(getContext(), 2 * getScalarSizeInBits()); + } + /// Get the number of bits in this IntegerType unsigned getBitWidth() const { return getSubclassData(); } @@ -470,19 +475,45 @@ public: /// This static method is like getInteger except that the element types are /// twice as wide as the elements in the input type. static VectorType *getExtendedElementVectorType(VectorType *VTy) { - unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); - Type *EltTy = IntegerType::get(VTy->getContext(), EltBits * 2); + assert(VTy->isIntOrIntVectorTy() && "VTy expected to be a vector of ints."); + auto *EltTy = cast(VTy->getElementType()); + return VectorType::get(EltTy->getExtendedType(), VTy->getElementCount()); + } + + // This static method gets a VectorType with the same number of elements as + // the input type, and the element type is an integer or float type which + // is half as wide as the elements in the input type. + static VectorType *getTruncatedElementVectorType(VectorType *VTy) { + Type *EltTy; + if (VTy->getElementType()->isFloatingPointTy()) { + switch(VTy->getElementType()->getTypeID()) { + case DoubleTyID: + EltTy = Type::getFloatTy(VTy->getContext()); + break; + case FloatTyID: + EltTy = Type::getHalfTy(VTy->getContext()); + break; + default: + llvm_unreachable("Cannot create narrower fp vector element type"); + } + } else { + unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); + assert((EltBits & 1) == 0 && + "Cannot truncate vector element with odd bit-width"); + EltTy = IntegerType::get(VTy->getContext(), EltBits / 2); + } return VectorType::get(EltTy, VTy->getElementCount()); } - /// This static method is like getInteger except that the element types are - /// half as wide as the elements in the input type. - static VectorType *getTruncatedElementVectorType(VectorType *VTy) { - unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); - assert((EltBits & 1) == 0 && - "Cannot truncate vector element with odd bit-width"); - Type *EltTy = IntegerType::get(VTy->getContext(), EltBits / 2); - return VectorType::get(EltTy, VTy->getElementCount()); + // This static method returns a VectorType with a smaller number of elements + // of a larger type than the input element type. For example, a <16 x i8> + // subdivided twice would return <4 x i32> + static VectorType *getSubdividedVectorType(VectorType *VTy, int NumSubdivs) { + for (int i = 0; i < NumSubdivs; ++i) { + VTy = VectorType::getDoubleElementsVectorType(VTy); + VTy = VectorType::getTruncatedElementVectorType(VTy); + } + return VTy; } /// This static method returns a VectorType with half as many elements as the @@ -540,6 +571,10 @@ bool Type::getVectorIsScalable() const { return cast(this)->isScalable(); } +ElementCount Type::getVectorElementCount() const { + return cast(this)->getElementCount(); +} + /// Class to represent pointers. class PointerType : public Type { explicit PointerType(Type *ElType, unsigned AddrSpace); @@ -577,6 +612,26 @@ public: } }; +Type *Type::getExtendedType() const { + assert( + isIntOrIntVectorTy() && + "Original type expected to be a vector of integers or a scalar integer."); + if (auto *VTy = dyn_cast(this)) + return VectorType::getExtendedElementVectorType( + const_cast(VTy)); + return cast(this)->getExtendedType(); +} + +Type *Type::getWithNewBitWidth(unsigned NewBitWidth) const { + assert( + isIntOrIntVectorTy() && + "Original type expected to be a vector of integers or a scalar integer."); + Type *NewType = getIntNTy(getContext(), NewBitWidth); + if (isVectorTy()) + NewType = VectorType::get(NewType, getVectorElementCount()); + return NewType; +} + unsigned Type::getPointerAddressSpace() const { return cast(getScalarType())->getAddressSpace(); } diff --git a/include/llvm/IR/DiagnosticInfo.h b/include/llvm/IR/DiagnosticInfo.h index 373663289db..ec469982d37 100644 --- a/include/llvm/IR/DiagnosticInfo.h +++ b/include/llvm/IR/DiagnosticInfo.h @@ -74,8 +74,10 @@ enum DiagnosticKind { DK_LastMachineRemark = DK_MachineOptimizationRemarkAnalysis, DK_MIRParser, DK_PGOProfile, + DK_MisExpect, DK_Unsupported, - DK_FirstPluginKind + DK_FirstPluginKind // Must be last value to work with + // getNextAvailablePluginDiagnosticKind }; /// Get the next available kind ID for a plugin diagnostic. @@ -663,7 +665,7 @@ public: private: /// The IR value (currently basic block) that the optimization operates on. /// This is currently used to provide run-time hotness information with PGO. - const Value *CodeRegion; + const Value *CodeRegion = nullptr; }; /// Diagnostic information for applied optimization remarks. @@ -1002,6 +1004,25 @@ public: void print(DiagnosticPrinter &DP) const override; }; +/// Diagnostic information for MisExpect analysis. +class DiagnosticInfoMisExpect : public DiagnosticInfoWithLocationBase { +public: + DiagnosticInfoMisExpect(const Instruction *Inst, Twine &Msg); + + /// \see DiagnosticInfo::print. + void print(DiagnosticPrinter &DP) const override; + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == DK_MisExpect; + } + + const Twine &getMsg() const { return Msg; } + +private: + /// Message to report. + const Twine &Msg; +}; + } // end namespace llvm #endif // LLVM_IR_DIAGNOSTICINFO_H diff --git a/include/llvm/IR/FixedMetadataKinds.def b/include/llvm/IR/FixedMetadataKinds.def new file mode 100644 index 00000000000..0e1ffef5867 --- /dev/null +++ b/include/llvm/IR/FixedMetadataKinds.def @@ -0,0 +1,43 @@ +/*===-- FixedMetadataKinds.def - Fixed metadata kind IDs -------*- C++ -*-=== *\ +|* +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +|* See https://llvm.org/LICENSE.txt for license information. +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +|* +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_FIXED_MD_KIND +#error "LLVM_FIXED_MD_KIND(EnumID, Name, Value) is not defined." +#endif + +LLVM_FIXED_MD_KIND(MD_dbg, "dbg", 0) +LLVM_FIXED_MD_KIND(MD_tbaa, "tbaa", 1) +LLVM_FIXED_MD_KIND(MD_prof, "prof", 2) +LLVM_FIXED_MD_KIND(MD_fpmath, "fpmath", 3) +LLVM_FIXED_MD_KIND(MD_range, "range", 4) +LLVM_FIXED_MD_KIND(MD_tbaa_struct, "tbaa.struct", 5) +LLVM_FIXED_MD_KIND(MD_invariant_load, "invariant.load", 6) +LLVM_FIXED_MD_KIND(MD_alias_scope, "alias.scope", 7) +LLVM_FIXED_MD_KIND(MD_noalias, "noalias", 8) +LLVM_FIXED_MD_KIND(MD_nontemporal, "nontemporal", 9) +LLVM_FIXED_MD_KIND(MD_mem_parallel_loop_access, + "llvm.mem.parallel_loop_access", 10) +LLVM_FIXED_MD_KIND(MD_nonnull, "nonnull", 11) +LLVM_FIXED_MD_KIND(MD_dereferenceable, "dereferenceable", 12) +LLVM_FIXED_MD_KIND(MD_dereferenceable_or_null, "dereferenceable_or_null", 13) +LLVM_FIXED_MD_KIND(MD_make_implicit, "make.implicit", 14) +LLVM_FIXED_MD_KIND(MD_unpredictable, "unpredictable", 15) +LLVM_FIXED_MD_KIND(MD_invariant_group, "invariant.group", 16) +LLVM_FIXED_MD_KIND(MD_align, "align", 17) +LLVM_FIXED_MD_KIND(MD_loop, "llvm.loop", 18) +LLVM_FIXED_MD_KIND(MD_type, "type", 19) +LLVM_FIXED_MD_KIND(MD_section_prefix, "section_prefix", 20) +LLVM_FIXED_MD_KIND(MD_absolute_symbol, "absolute_symbol", 21) +LLVM_FIXED_MD_KIND(MD_associated, "associated", 22) +LLVM_FIXED_MD_KIND(MD_callees, "callees", 23) +LLVM_FIXED_MD_KIND(MD_irr_loop, "irr_loop", 24) +LLVM_FIXED_MD_KIND(MD_access_group, "llvm.access.group", 25) +LLVM_FIXED_MD_KIND(MD_callback, "callback", 26) +LLVM_FIXED_MD_KIND(MD_preserve_access_index, "llvm.preserve.access.index", 27) +LLVM_FIXED_MD_KIND(MD_misexpect, "misexpect", 28) +LLVM_FIXED_MD_KIND(MD_vcall_visibility, "vcall_visibility", 29) diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h index 7fa61e12f43..d586a9460d2 100644 --- a/include/llvm/IR/Function.h +++ b/include/llvm/IR/Function.h @@ -343,7 +343,10 @@ public: unsigned getFnStackAlignment() const { if (!hasFnAttribute(Attribute::StackAlignment)) return 0; - return AttributeSets.getStackAlignment(AttributeList::FunctionIndex); + if (const auto MA = + AttributeSets.getStackAlignment(AttributeList::FunctionIndex)) + return MA->value(); + return 0; } /// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm @@ -433,7 +436,9 @@ public: /// Extract the alignment for a call or parameter (0=unknown). unsigned getParamAlignment(unsigned ArgNo) const { - return AttributeSets.getParamAlignment(ArgNo); + if (const auto MA = AttributeSets.getParamAlignment(ArgNo)) + return MA->value(); + return 0; } /// Extract the byval type for a parameter. @@ -710,6 +715,12 @@ public: return Arguments + NumArgs; } + Argument* getArg(unsigned i) const { + assert (i < NumArgs && "getArg() out of range!"); + CheckLazyArguments(); + return Arguments + i; + } + iterator_range args() { return make_range(arg_begin(), arg_end()); } diff --git a/include/llvm/IR/GlobalAlias.h b/include/llvm/IR/GlobalAlias.h index 3cd40570130..f2d9b9676ec 100644 --- a/include/llvm/IR/GlobalAlias.h +++ b/include/llvm/IR/GlobalAlias.h @@ -58,10 +58,6 @@ public: // Linkage, Type, Parent and AddressSpace taken from the Aliasee. static GlobalAlias *create(const Twine &Name, GlobalValue *Aliasee); - void copyAttributesFrom(const GlobalValue *Src) { - GlobalValue::copyAttributesFrom(Src); - } - /// removeFromParent - This method unlinks 'this' from the containing module, /// but does not delete it. /// diff --git a/include/llvm/IR/GlobalIFunc.h b/include/llvm/IR/GlobalIFunc.h index bc0d3c053cc..0fdae917878 100644 --- a/include/llvm/IR/GlobalIFunc.h +++ b/include/llvm/IR/GlobalIFunc.h @@ -46,10 +46,6 @@ public: LinkageTypes Linkage, const Twine &Name, Constant *Resolver, Module *Parent); - void copyAttributesFrom(const GlobalIFunc *Src) { - GlobalValue::copyAttributesFrom(Src); - } - /// This method unlinks 'this' from the containing module, but does not /// delete it. void removeFromParent(); diff --git a/include/llvm/IR/GlobalIndirectSymbol.h b/include/llvm/IR/GlobalIndirectSymbol.h index 8bc3f90b94a..d996237aa3e 100644 --- a/include/llvm/IR/GlobalIndirectSymbol.h +++ b/include/llvm/IR/GlobalIndirectSymbol.h @@ -42,6 +42,10 @@ public: /// Provide fast operand accessors DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); + void copyAttributesFrom(const GlobalValue *Src) { + GlobalValue::copyAttributesFrom(Src); + } + /// These methods set and retrieve indirect symbol. void setIndirectSymbol(Constant *Symbol) { setOperand(0, Symbol); @@ -54,9 +58,7 @@ public: static_cast(this)->getIndirectSymbol()); } - const GlobalObject *getBaseObject() const { - return dyn_cast(getIndirectSymbol()->stripInBoundsOffsets()); - } + const GlobalObject *getBaseObject() const; GlobalObject *getBaseObject() { return const_cast( static_cast(this)->getBaseObject()); diff --git a/include/llvm/IR/GlobalObject.h b/include/llvm/IR/GlobalObject.h index b8ab6140ebe..ce81eb9f071 100644 --- a/include/llvm/IR/GlobalObject.h +++ b/include/llvm/IR/GlobalObject.h @@ -17,6 +17,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Value.h" +#include "llvm/Support/Alignment.h" #include #include @@ -27,6 +28,20 @@ class MDNode; class Metadata; class GlobalObject : public GlobalValue { +public: + // VCallVisibility - values for visibility metadata attached to vtables. This + // describes the scope in which a virtual call could end up being dispatched + // through this vtable. + enum VCallVisibility { + // Type is potentially visible to external code. + VCallVisibilityPublic = 0, + // Type is only visible to code which will be in the current Module after + // LTO internalization. + VCallVisibilityLinkageUnit = 1, + // Type is only visible to code in the current Module. + VCallVisibilityTranslationUnit = 2, + }; + protected: GlobalObject(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps, LinkageTypes Linkage, const Twine &Name, @@ -58,9 +73,14 @@ public: unsigned getAlignment() const { unsigned Data = getGlobalValueSubClassData(); unsigned AlignmentData = Data & AlignmentMask; - return (1u << AlignmentData) >> 1; + MaybeAlign Align = decodeMaybeAlign(AlignmentData); + return Align ? Align->value() : 0; } - void setAlignment(unsigned Align); + + /// FIXME: Remove this setter once the migration to MaybeAlign is over. + LLVM_ATTRIBUTE_DEPRECATED(void setAlignment(unsigned Align), + "Please use `void setAlignment(MaybeAlign Align)`"); + void setAlignment(MaybeAlign Align); unsigned getGlobalObjectSubClassData() const { unsigned ValueData = getGlobalValueSubClassData(); @@ -158,6 +178,8 @@ public: void copyMetadata(const GlobalObject *Src, unsigned Offset); void addTypeMetadata(unsigned Offset, Metadata *TypeID); + void addVCallVisibilityMetadata(VCallVisibility Visibility); + VCallVisibility getVCallVisibility() const; protected: void copyAttributesFrom(const GlobalObject *Src); diff --git a/include/llvm/IR/GlobalVariable.h b/include/llvm/IR/GlobalVariable.h index 2e2c8c47791..2c730bc312e 100644 --- a/include/llvm/IR/GlobalVariable.h +++ b/include/llvm/IR/GlobalVariable.h @@ -243,6 +243,7 @@ public: bool hasImplicitSection() const { return getAttributes().hasAttribute("bss-section") || getAttributes().hasAttribute("data-section") || + getAttributes().hasAttribute("relro-section") || getAttributes().hasAttribute("rodata-section"); } diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h index a74364dffb2..d1ddb75cde9 100644 --- a/include/llvm/IR/IRBuilder.h +++ b/include/llvm/IR/IRBuilder.h @@ -1461,7 +1461,7 @@ public: if (Value *V = foldConstant(Opc, LHS, RHS, Name)) return V; Instruction *BinOp = BinaryOperator::Create(Opc, LHS, RHS); if (isa(BinOp)) - BinOp = setFPAttrs(BinOp, FPMathTag, FMF); + setFPAttrs(BinOp, FPMathTag, FMF); return Insert(BinOp, Name); } @@ -1479,7 +1479,8 @@ public: CallInst *C = CreateIntrinsic(ID, {L->getType()}, {L, R, RoundingV, ExceptV}, nullptr, Name); - return cast(setFPAttrs(C, FPMathTag, UseFMF)); + setFPAttrs(C, FPMathTag, UseFMF); + return C; } Value *CreateNeg(Value *V, const Twine &Name = "", @@ -1504,7 +1505,7 @@ public: MDNode *FPMathTag = nullptr) { if (auto *VC = dyn_cast(V)) return Insert(Folder.CreateFNeg(VC), Name); - return Insert(setFPAttrs(BinaryOperator::CreateFNeg(V), FPMathTag, FMF), + return Insert(setFPAttrs(UnaryOperator::CreateFNeg(V), FPMathTag, FMF), Name); } @@ -1514,9 +1515,7 @@ public: const Twine &Name = "") { if (auto *VC = dyn_cast(V)) return Insert(Folder.CreateFNeg(VC), Name); - // TODO: This should return UnaryOperator::CreateFNeg(...) once we are - // confident that they are optimized sufficiently. - return Insert(setFPAttrs(BinaryOperator::CreateFNeg(V), nullptr, + return Insert(setFPAttrs(UnaryOperator::CreateFNeg(V), nullptr, FMFSource->getFastMathFlags()), Name); } @@ -1534,7 +1533,7 @@ public: return Insert(Folder.CreateUnOp(Opc, VC), Name); Instruction *UnOp = UnaryOperator::Create(Opc, V); if (isa(UnOp)) - UnOp = setFPAttrs(UnOp, FPMathTag, FMF); + setFPAttrs(UnOp, FPMathTag, FMF); return Insert(UnOp, Name); } @@ -1612,19 +1611,19 @@ public: LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name) { LoadInst *LI = CreateLoad(Ty, Ptr, Name); - LI->setAlignment(Align); + LI->setAlignment(MaybeAlign(Align)); return LI; } LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const Twine &Name = "") { LoadInst *LI = CreateLoad(Ty, Ptr, Name); - LI->setAlignment(Align); + LI->setAlignment(MaybeAlign(Align)); return LI; } LoadInst *CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, bool isVolatile, const Twine &Name = "") { LoadInst *LI = CreateLoad(Ty, Ptr, isVolatile, Name); - LI->setAlignment(Align); + LI->setAlignment(MaybeAlign(Align)); return LI; } @@ -1649,7 +1648,7 @@ public: StoreInst *CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile = false) { StoreInst *SI = CreateStore(Val, Ptr, isVolatile); - SI->setAlignment(Align); + SI->setAlignment(MaybeAlign(Align)); return SI; } @@ -1913,11 +1912,17 @@ public: return V; } - Value *CreateFPToUI(Value *V, Type *DestTy, const Twine &Name = ""){ + Value *CreateFPToUI(Value *V, Type *DestTy, const Twine &Name = "") { + if (IsFPConstrained) + return CreateConstrainedFPCast(Intrinsic::experimental_constrained_fptoui, + V, DestTy, nullptr, Name); return CreateCast(Instruction::FPToUI, V, DestTy, Name); } - Value *CreateFPToSI(Value *V, Type *DestTy, const Twine &Name = ""){ + Value *CreateFPToSI(Value *V, Type *DestTy, const Twine &Name = "") { + if (IsFPConstrained) + return CreateConstrainedFPCast(Intrinsic::experimental_constrained_fptosi, + V, DestTy, nullptr, Name); return CreateCast(Instruction::FPToSI, V, DestTy, Name); } @@ -1931,10 +1936,17 @@ public: Value *CreateFPTrunc(Value *V, Type *DestTy, const Twine &Name = "") { + if (IsFPConstrained) + return CreateConstrainedFPCast( + Intrinsic::experimental_constrained_fptrunc, V, DestTy, nullptr, + Name); return CreateCast(Instruction::FPTrunc, V, DestTy, Name); } Value *CreateFPExt(Value *V, Type *DestTy, const Twine &Name = "") { + if (IsFPConstrained) + return CreateConstrainedFPCast(Intrinsic::experimental_constrained_fpext, + V, DestTy, nullptr, Name); return CreateCast(Instruction::FPExt, V, DestTy, Name); } @@ -2046,6 +2058,37 @@ public: return Insert(CastInst::CreateFPCast(V, DestTy), Name); } + CallInst *CreateConstrainedFPCast( + Intrinsic::ID ID, Value *V, Type *DestTy, + Instruction *FMFSource = nullptr, const Twine &Name = "", + MDNode *FPMathTag = nullptr, + Optional Rounding = None, + Optional Except = None) { + Value *ExceptV = getConstrainedFPExcept(Except); + + FastMathFlags UseFMF = FMF; + if (FMFSource) + UseFMF = FMFSource->getFastMathFlags(); + + CallInst *C; + switch (ID) { + default: { + Value *RoundingV = getConstrainedFPRounding(Rounding); + C = CreateIntrinsic(ID, {DestTy, V->getType()}, {V, RoundingV, ExceptV}, + nullptr, Name); + } break; + case Intrinsic::experimental_constrained_fpext: + case Intrinsic::experimental_constrained_fptoui: + case Intrinsic::experimental_constrained_fptosi: + C = CreateIntrinsic(ID, {DestTy, V->getType()}, {V, ExceptV}, nullptr, + Name); + break; + } + if (isa(C)) + setFPAttrs(C, FPMathTag, UseFMF); + return C; + } + // Provided to resolve 'CreateIntCast(Ptr, Ptr, "...")', giving a // compile time error, instead of converting the string to bool for the // isSigned parameter. @@ -2187,7 +2230,10 @@ public: PHINode *CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name = "") { - return Insert(PHINode::Create(Ty, NumReservedValues), Name); + PHINode *Phi = PHINode::Create(Ty, NumReservedValues); + if (isa(Phi)) + setFPAttrs(Phi, nullptr /* MDNode* */, FMF); + return Insert(Phi, Name); } CallInst *CreateCall(FunctionType *FTy, Value *Callee, @@ -2195,7 +2241,7 @@ public: MDNode *FPMathTag = nullptr) { CallInst *CI = CallInst::Create(FTy, Callee, Args, DefaultOperandBundles); if (isa(CI)) - CI = cast(setFPAttrs(CI, FPMathTag, FMF)); + setFPAttrs(CI, FPMathTag, FMF); return Insert(CI, Name); } @@ -2204,7 +2250,7 @@ public: const Twine &Name = "", MDNode *FPMathTag = nullptr) { CallInst *CI = CallInst::Create(FTy, Callee, Args, OpBundles); if (isa(CI)) - CI = cast(setFPAttrs(CI, FPMathTag, FMF)); + setFPAttrs(CI, FPMathTag, FMF); return Insert(CI, Name); } @@ -2252,7 +2298,7 @@ public: Sel = addBranchMetadata(Sel, Prof, Unpred); } if (isa(Sel)) - Sel = cast(setFPAttrs(Sel, nullptr /* MDNode* */, FMF)); + setFPAttrs(Sel, nullptr /* MDNode* */, FMF); return Insert(Sel, Name); } @@ -2454,7 +2500,7 @@ public: } Value *CreatePreserveArrayAccessIndex(Value *Base, unsigned Dimension, - unsigned LastIndex) { + unsigned LastIndex, MDNode *DbgInfo) { assert(isa(Base->getType()) && "Invalid Base ptr type for preserve.array.access.index."); auto *BaseType = Base->getType(); @@ -2476,6 +2522,8 @@ public: Value *DimV = getInt32(Dimension); CallInst *Fn = CreateCall(FnPreserveArrayAccessIndex, {Base, DimV, LastIndexV}); + if (DbgInfo) + Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); return Fn; } @@ -2493,7 +2541,8 @@ public: Value *DIIndex = getInt32(FieldIndex); CallInst *Fn = CreateCall(FnPreserveUnionAccessIndex, {Base, DIIndex}); - Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); + if (DbgInfo) + Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); return Fn; } @@ -2516,7 +2565,8 @@ public: Value *DIIndex = getInt32(FieldIndex); CallInst *Fn = CreateCall(FnPreserveStructAccessIndex, {Base, GEPIndex, DIIndex}); - Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); + if (DbgInfo) + Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); return Fn; } diff --git a/include/llvm/IR/InlineAsm.h b/include/llvm/IR/InlineAsm.h index 2aac807623a..72d8ad1501a 100644 --- a/include/llvm/IR/InlineAsm.h +++ b/include/llvm/IR/InlineAsm.h @@ -244,6 +244,7 @@ public: Constraint_m, Constraint_o, Constraint_v, + Constraint_A, Constraint_Q, Constraint_R, Constraint_S, diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h index ca419b50da6..7fb94e9d8c2 100644 --- a/include/llvm/IR/InstrTypes.h +++ b/include/llvm/IR/InstrTypes.h @@ -975,7 +975,7 @@ public: static Type* makeCmpResultType(Type* opnd_type) { if (VectorType* vt = dyn_cast(opnd_type)) { return VectorType::get(Type::getInt1Ty(opnd_type->getContext()), - vt->getNumElements()); + vt->getElementCount()); } return Type::getInt1Ty(opnd_type->getContext()); } @@ -1567,11 +1567,17 @@ public: } /// Extract the alignment of the return value. - unsigned getRetAlignment() const { return Attrs.getRetAlignment(); } + unsigned getRetAlignment() const { + if (const auto MA = Attrs.getRetAlignment()) + return MA->value(); + return 0; + } /// Extract the alignment for a call or parameter (0=unknown). unsigned getParamAlignment(unsigned ArgNo) const { - return Attrs.getParamAlignment(ArgNo); + if (const auto MA = Attrs.getParamAlignment(ArgNo)) + return MA->value(); + return 0; } /// Extract the byval type for a call or parameter. diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h index 6a9a74bd16f..803f6977b32 100644 --- a/include/llvm/IR/Instruction.h +++ b/include/llvm/IR/Instruction.h @@ -229,6 +229,16 @@ public: return hasMetadataHashEntry(); } + /// Return true if this instruction has the given type of metadata attached. + bool hasMetadata(unsigned KindID) const { + return getMetadata(KindID) != nullptr; + } + + /// Return true if this instruction has the given type of metadata attached. + bool hasMetadata(StringRef Kind) const { + return getMetadata(Kind) != nullptr; + } + /// Get the metadata of given kind attached to this Instruction. /// If the metadata is not found then return null. MDNode *getMetadata(unsigned KindID) const { diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h index 215ce45c7b7..fa980df03ef 100644 --- a/include/llvm/IR/Instructions.h +++ b/include/llvm/IR/Instructions.h @@ -110,9 +110,11 @@ public: /// Return the alignment of the memory that is being allocated by the /// instruction. unsigned getAlignment() const { - return (1u << (getSubclassDataFromInstruction() & 31)) >> 1; + if (const auto MA = decodeMaybeAlign(getSubclassDataFromInstruction() & 31)) + return MA->value(); + return 0; } - void setAlignment(unsigned Align); + void setAlignment(MaybeAlign Align); /// Return true if this alloca is in the entry block of the function and is a /// constant size. If so, the code generator will fold it into the @@ -182,15 +184,15 @@ public: LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile, BasicBlock *InsertAtEnd); LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile, - unsigned Align, Instruction *InsertBefore = nullptr); + MaybeAlign Align, Instruction *InsertBefore = nullptr); LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile, - unsigned Align, BasicBlock *InsertAtEnd); + MaybeAlign Align, BasicBlock *InsertAtEnd); LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile, - unsigned Align, AtomicOrdering Order, + MaybeAlign Align, AtomicOrdering Order, SyncScope::ID SSID = SyncScope::System, Instruction *InsertBefore = nullptr); LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile, - unsigned Align, AtomicOrdering Order, SyncScope::ID SSID, + MaybeAlign Align, AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd); // Deprecated [opaque pointer types] @@ -209,20 +211,20 @@ public: BasicBlock *InsertAtEnd) : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr, isVolatile, InsertAtEnd) {} - LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align, + LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, MaybeAlign Align, Instruction *InsertBefore = nullptr) : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr, isVolatile, Align, InsertBefore) {} - LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align, + LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, MaybeAlign Align, BasicBlock *InsertAtEnd) : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr, isVolatile, Align, InsertAtEnd) {} - LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align, + LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, MaybeAlign Align, AtomicOrdering Order, SyncScope::ID SSID = SyncScope::System, Instruction *InsertBefore = nullptr) : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr, isVolatile, Align, Order, SSID, InsertBefore) {} - LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align, + LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, MaybeAlign Align, AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd) : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr, isVolatile, Align, Order, SSID, InsertAtEnd) {} @@ -238,10 +240,13 @@ public: /// Return the alignment of the access that is being performed. unsigned getAlignment() const { - return (1 << ((getSubclassDataFromInstruction() >> 1) & 31)) >> 1; + if (const auto MA = + decodeMaybeAlign((getSubclassDataFromInstruction() >> 1) & 31)) + return MA->value(); + return 0; } - void setAlignment(unsigned Align); + void setAlignment(MaybeAlign Align); /// Returns the ordering constraint of this load instruction. AtomicOrdering getOrdering() const { @@ -332,17 +337,15 @@ public: StoreInst(Value *Val, Value *Ptr, bool isVolatile = false, Instruction *InsertBefore = nullptr); StoreInst(Value *Val, Value *Ptr, bool isVolatile, BasicBlock *InsertAtEnd); - StoreInst(Value *Val, Value *Ptr, bool isVolatile, - unsigned Align, Instruction *InsertBefore = nullptr); - StoreInst(Value *Val, Value *Ptr, bool isVolatile, - unsigned Align, BasicBlock *InsertAtEnd); - StoreInst(Value *Val, Value *Ptr, bool isVolatile, - unsigned Align, AtomicOrdering Order, - SyncScope::ID SSID = SyncScope::System, + StoreInst(Value *Val, Value *Ptr, bool isVolatile, MaybeAlign Align, Instruction *InsertBefore = nullptr); - StoreInst(Value *Val, Value *Ptr, bool isVolatile, - unsigned Align, AtomicOrdering Order, SyncScope::ID SSID, + StoreInst(Value *Val, Value *Ptr, bool isVolatile, MaybeAlign Align, BasicBlock *InsertAtEnd); + StoreInst(Value *Val, Value *Ptr, bool isVolatile, MaybeAlign Align, + AtomicOrdering Order, SyncScope::ID SSID = SyncScope::System, + Instruction *InsertBefore = nullptr); + StoreInst(Value *Val, Value *Ptr, bool isVolatile, MaybeAlign Align, + AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd); // allocate space for exactly two operands void *operator new(size_t s) { @@ -363,10 +366,13 @@ public: /// Return the alignment of the access that is being performed unsigned getAlignment() const { - return (1 << ((getSubclassDataFromInstruction() >> 1) & 31)) >> 1; + if (const auto MA = + decodeMaybeAlign((getSubclassDataFromInstruction() >> 1) & 31)) + return MA->value(); + return 0; } - void setAlignment(unsigned Align); + void setAlignment(MaybeAlign Align); /// Returns the ordering constraint of this store instruction. AtomicOrdering getOrdering() const { @@ -1764,6 +1770,10 @@ public: void setTrueValue(Value *V) { Op<1>() = V; } void setFalseValue(Value *V) { Op<2>() = V; } + /// Swap the true and false values of the select instruction. + /// This doesn't swap prof metadata. + void swapValues() { Op<1>().swap(Op<2>()); } + /// Return a string if the specified operands are invalid /// for a select operation, otherwise return null. static const char *areInvalidOperands(Value *Cond, Value *True, Value *False); @@ -3455,16 +3465,7 @@ public: class SwitchInstProfUpdateWrapper { SwitchInst &SI; Optional > Weights = None; - - // Sticky invalid state is needed to safely ignore operations with prof data - // in cases where SwitchInstProfUpdateWrapper is created from SwitchInst - // with inconsistent prof data. TODO: once we fix all prof data - // inconsistencies we can turn invalid state to assertions. - enum { - Invalid, - Initialized, - Changed - } State = Invalid; + bool Changed = false; protected: static MDNode *getProfBranchWeightsMD(const SwitchInst &SI); @@ -3482,7 +3483,7 @@ public: SwitchInstProfUpdateWrapper(SwitchInst &SI) : SI(SI) { init(); } ~SwitchInstProfUpdateWrapper() { - if (State == Changed) + if (Changed) SI.setMetadata(LLVMContext::MD_prof, buildProfBranchWeightsMD()); } @@ -3938,6 +3939,9 @@ class CallBrInst : public CallBase { ArrayRef IndirectDests, ArrayRef Args, ArrayRef Bundles, const Twine &NameStr); + /// Should the Indirect Destinations change, scan + update the Arg list. + void updateArgBlockAddresses(unsigned i, BasicBlock *B); + /// Compute the number of operands to allocate. static int ComputeNumOperands(int NumArgs, int NumIndirectDests, int NumBundleInputs = 0) { @@ -4075,7 +4079,7 @@ public: return cast(*(&Op<-1>() - getNumIndirectDests() - 1)); } BasicBlock *getIndirectDest(unsigned i) const { - return cast(*(&Op<-1>() - getNumIndirectDests() + i)); + return cast_or_null(*(&Op<-1>() - getNumIndirectDests() + i)); } SmallVector getIndirectDests() const { SmallVector IndirectDests; @@ -4087,6 +4091,7 @@ public: *(&Op<-1>() - getNumIndirectDests() - 1) = reinterpret_cast(B); } void setIndirectDest(unsigned i, BasicBlock *B) { + updateArgBlockAddresses(i, B); *(&Op<-1>() - getNumIndirectDests() + i) = reinterpret_cast(B); } @@ -4096,11 +4101,10 @@ public: return i == 0 ? getDefaultDest() : getIndirectDest(i - 1); } - void setSuccessor(unsigned idx, BasicBlock *NewSucc) { - assert(idx < getNumIndirectDests() + 1 && + void setSuccessor(unsigned i, BasicBlock *NewSucc) { + assert(i < getNumIndirectDests() + 1 && "Successor # out of range for callbr!"); - *(&Op<-1>() - getNumIndirectDests() -1 + idx) = - reinterpret_cast(NewSucc); + return i == 0 ? setDefaultDest(NewSucc) : setIndirectDest(i - 1, NewSucc); } unsigned getNumSuccessors() const { return getNumIndirectDests() + 1; } @@ -5251,31 +5255,38 @@ public: /// A helper function that returns the pointer operand of a load or store /// instruction. Returns nullptr if not load or store. -inline Value *getLoadStorePointerOperand(Value *V) { +inline const Value *getLoadStorePointerOperand(const Value *V) { if (auto *Load = dyn_cast(V)) return Load->getPointerOperand(); if (auto *Store = dyn_cast(V)) return Store->getPointerOperand(); return nullptr; } +inline Value *getLoadStorePointerOperand(Value *V) { + return const_cast( + getLoadStorePointerOperand(static_cast(V))); +} /// A helper function that returns the pointer operand of a load, store /// or GEP instruction. Returns nullptr if not load, store, or GEP. -inline Value *getPointerOperand(Value *V) { +inline const Value *getPointerOperand(const Value *V) { if (auto *Ptr = getLoadStorePointerOperand(V)) return Ptr; if (auto *Gep = dyn_cast(V)) return Gep->getPointerOperand(); return nullptr; } +inline Value *getPointerOperand(Value *V) { + return const_cast(getPointerOperand(static_cast(V))); +} /// A helper function that returns the alignment of load or store instruction. -inline unsigned getLoadStoreAlignment(Value *I) { +inline MaybeAlign getLoadStoreAlignment(Value *I) { assert((isa(I) || isa(I)) && "Expected Load or Store instruction"); if (auto *LI = dyn_cast(I)) - return LI->getAlignment(); - return cast(I)->getAlignment(); + return MaybeAlign(LI->getAlignment()); + return MaybeAlign(cast(I)->getAlignment()); } /// A helper function that returns the address space of the pointer operand of diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h index 438bdb29b70..c989b4a2e72 100644 --- a/include/llvm/IR/IntrinsicInst.h +++ b/include/llvm/IR/IntrinsicInst.h @@ -259,6 +259,8 @@ namespace llvm { case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: @@ -271,12 +273,16 @@ namespace llvm { case Intrinsic::experimental_constrained_log: case Intrinsic::experimental_constrained_log10: case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_lrint: + case Intrinsic::experimental_constrained_llrint: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_maxnum: case Intrinsic::experimental_constrained_minnum: case Intrinsic::experimental_constrained_ceil: case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_lround: + case Intrinsic::experimental_constrained_llround: case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_trunc: return true; @@ -405,11 +411,11 @@ namespace llvm { setArgOperand(ARG_DEST, Ptr); } - void setDestAlignment(unsigned Align) { + void setDestAlignment(unsigned Alignment) { removeParamAttr(ARG_DEST, Attribute::Alignment); - if (Align > 0) - addParamAttr(ARG_DEST, - Attribute::getWithAlignment(getContext(), Align)); + if (Alignment > 0) + addParamAttr(ARG_DEST, Attribute::getWithAlignment(getContext(), + Align(Alignment))); } void setLength(Value *L) { @@ -454,11 +460,12 @@ namespace llvm { BaseCL::setArgOperand(ARG_SOURCE, Ptr); } - void setSourceAlignment(unsigned Align) { + void setSourceAlignment(unsigned Alignment) { BaseCL::removeParamAttr(ARG_SOURCE, Attribute::Alignment); - if (Align > 0) - BaseCL::addParamAttr(ARG_SOURCE, Attribute::getWithAlignment( - BaseCL::getContext(), Align)); + if (Alignment > 0) + BaseCL::addParamAttr(ARG_SOURCE, + Attribute::getWithAlignment(BaseCL::getContext(), + Align(Alignment))); } }; diff --git a/include/llvm/IR/Intrinsics.h b/include/llvm/IR/Intrinsics.h index f38f92022d2..9e4ebd915af 100644 --- a/include/llvm/IR/Intrinsics.h +++ b/include/llvm/IR/Intrinsics.h @@ -100,7 +100,8 @@ namespace Intrinsic { Integer, Vector, Pointer, Struct, Argument, ExtendArgument, TruncArgument, HalfVecArgument, SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfAnyPtrsToElt, - VecElementArgument + VecElementArgument, ScalableVecArgument, Subdivide2Argument, + Subdivide4Argument, VecOfBitcastsToInt } Kind; union { @@ -125,14 +126,17 @@ namespace Intrinsic { assert(Kind == Argument || Kind == ExtendArgument || Kind == TruncArgument || Kind == HalfVecArgument || Kind == SameVecWidthArgument || Kind == PtrToArgument || - Kind == PtrToElt || Kind == VecElementArgument); + Kind == PtrToElt || Kind == VecElementArgument || + Kind == Subdivide2Argument || Kind == Subdivide4Argument || + Kind == VecOfBitcastsToInt); return Argument_Info >> 3; } ArgKind getArgumentKind() const { assert(Kind == Argument || Kind == ExtendArgument || Kind == TruncArgument || Kind == HalfVecArgument || Kind == SameVecWidthArgument || Kind == PtrToArgument || - Kind == VecElementArgument); + Kind == VecElementArgument || Kind == Subdivide2Argument || + Kind == Subdivide4Argument || Kind == VecOfBitcastsToInt); return (ArgKind)(Argument_Info & 7); } diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index d660f827843..7a0263f88c2 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -63,6 +63,12 @@ class NoCapture : IntrinsicProperty { int ArgNo = argNo; } +// NoAlias - The specified argument pointer is not aliasing other "noalias" pointer +// arguments of the intrinsic wrt. the intrinsic scope. +class NoAlias : IntrinsicProperty { + int ArgNo = argNo; +} + // Returned - The specified argument is always the return value of the // intrinsic. class Returned : IntrinsicProperty { @@ -181,6 +187,16 @@ class LLVMVectorElementType : LLVMMatchType; // vector type, but change the element count to be half as many class LLVMHalfElementsVectorType : LLVMMatchType; +// Match the type of another intrinsic parameter that is expected to be a +// vector type (i.e. ) but with each element subdivided to +// form a vector with more elements that are smaller than the original. +class LLVMSubdivide2VectorType : LLVMMatchType; +class LLVMSubdivide4VectorType : LLVMMatchType; + +// Match the element count and bit width of another intrinsic parameter, but +// change the element type to an integer. +class LLVMVectorOfBitcastsToInt : LLVMMatchType; + def llvm_void_ty : LLVMType; let isAny = 1 in { def llvm_any_ty : LLVMType; @@ -407,9 +423,9 @@ def int_objc_arc_annotation_bottomup_bbend : Intrinsic<[], //===--------------------- Code Generator Intrinsics ----------------------===// // def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>; -def int_addressofreturnaddress : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; -def int_frameaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>; -def int_sponentry : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; +def int_addressofreturnaddress : Intrinsic<[llvm_anyptr_ty], [], [IntrNoMem]>; +def int_frameaddress : Intrinsic<[llvm_anyptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>; +def int_sponentry : Intrinsic<[llvm_anyptr_ty], [], [IntrNoMem]>; def int_read_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty], [IntrReadMem], "llvm.read_register">; def int_write_register : Intrinsic<[], [llvm_metadata_ty, llvm_anyint_ty], @@ -451,8 +467,8 @@ def int_thread_pointer : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>, // from being reordered overly much with respect to nearby access to the same // memory while not impeding optimization. def int_prefetch - : Intrinsic<[], [ llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ], - [ IntrInaccessibleMemOrArgMemOnly, ReadOnly<0>, NoCapture<0>, + : Intrinsic<[], [ llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ], + [ IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, ReadOnly<0>, NoCapture<0>, ImmArg<1>, ImmArg<2>]>; def int_pcmarker : Intrinsic<[], [llvm_i32_ty]>; @@ -460,7 +476,7 @@ def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>; // The assume intrinsic is marked as arbitrarily writing so that proper // control dependencies will be maintained. -def int_assume : Intrinsic<[], [llvm_i1_ty], []>; +def int_assume : Intrinsic<[], [llvm_i1_ty], [IntrWillReturn]>; // Stack Protector Intrinsic - The stackprotector intrinsic writes the stack // guard to the correct place on the stack frame. @@ -493,23 +509,23 @@ def int_instrprof_value_profile : Intrinsic<[], def int_memcpy : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], - [IntrArgMemOnly, NoCapture<0>, NoCapture<1>, - WriteOnly<0>, ReadOnly<1>, ImmArg<3>]>; + [IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>, + NoAlias<0>, NoAlias<1>, WriteOnly<0>, ReadOnly<1>, ImmArg<3>]>; def int_memmove : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], - [IntrArgMemOnly, NoCapture<0>, NoCapture<1>, + [IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>, ReadOnly<1>, ImmArg<3>]>; def int_memset : Intrinsic<[], [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i1_ty], - [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>, + [IntrArgMemOnly, IntrWillReturn, NoCapture<0>, WriteOnly<0>, ImmArg<3>]>; // FIXME: Add version of these floating point intrinsics which allow non-default // rounding modes and FP exception handling. -let IntrProperties = [IntrNoMem, IntrSpeculatable] in { +let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { def int_fma : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; @@ -551,19 +567,19 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in { def int_minnum : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, Commutative] + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] >; def int_maxnum : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, Commutative] + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] >; def int_minimum : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, Commutative] + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] >; def int_maximum : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, Commutative] + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] >; // NOTE: these are internal interfaces. @@ -576,13 +592,13 @@ def int_siglongjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrNoReturn]>; def int_objectsize : Intrinsic<[llvm_anyint_ty], [llvm_anyptr_ty, llvm_i1_ty, llvm_i1_ty, llvm_i1_ty], - [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>, ImmArg<3>]>, + [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<1>, ImmArg<2>, ImmArg<3>]>, GCCBuiltin<"__builtin_object_size">; //===--------------- Constrained Floating Point Intrinsics ----------------===// // -let IntrProperties = [IntrInaccessibleMemOnly] in { +let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in { def int_experimental_constrained_fadd : Intrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -616,6 +632,14 @@ let IntrProperties = [IntrInaccessibleMemOnly] in { llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_fptosi : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; + + def int_experimental_constrained_fptoui : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ], [ llvm_anyfloat_ty, llvm_metadata_ty, @@ -679,6 +703,14 @@ let IntrProperties = [IntrInaccessibleMemOnly] in { [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_lrint : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_llrint : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty, + llvm_metadata_ty ]>; def int_experimental_constrained_maxnum : Intrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -697,6 +729,12 @@ let IntrProperties = [IntrInaccessibleMemOnly] in { [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_lround : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_llround : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; def int_experimental_constrained_round : Intrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty, @@ -706,18 +744,19 @@ let IntrProperties = [IntrInaccessibleMemOnly] in { llvm_metadata_ty, llvm_metadata_ty ]>; } -// FIXME: Add intrinsics for fcmp, fptoui and fptosi. +// FIXME: Add intrinsic for fcmp. +// FIXME: Consider maybe adding intrinsics for sitofp, uitofp. //===------------------------- Expect Intrinsics --------------------------===// // def int_expect : Intrinsic<[llvm_anyint_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrWillReturn]>; //===-------------------- Bit Manipulation Intrinsics ---------------------===// // // None of these intrinsics accesses memory at all. -let IntrProperties = [IntrNoMem, IntrSpeculatable] in { +let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; def int_bitreverse : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; @@ -727,7 +766,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in { [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; } -let IntrProperties = [IntrNoMem, IntrSpeculatable, ImmArg<1>] in { +let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<1>] in { def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; } @@ -739,7 +778,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, ImmArg<1>] in { // mean the optimizers can change them aggressively. Special handling // needed in a few places. These synthetic intrinsics have no // side-effects and just mark information about their operands. -let IntrProperties = [IntrNoMem, IntrSpeculatable] in { +let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { def int_dbg_declare : Intrinsic<[], [llvm_metadata_ty, llvm_metadata_ty, @@ -796,21 +835,21 @@ def int_eh_sjlj_setup_dispatch : Intrinsic<[], []>; def int_var_annotation : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], - [], "llvm.var.annotation">; + [IntrWillReturn], "llvm.var.annotation">; def int_ptr_annotation : Intrinsic<[LLVMAnyPointerType], [LLVMMatchType<0>, llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], - [], "llvm.ptr.annotation">; + [IntrWillReturn], "llvm.ptr.annotation">; def int_annotation : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], - [], "llvm.annotation">; + [IntrWillReturn], "llvm.annotation">; // Annotates the current program point with metadata strings which are emitted // as CodeView debug info records. This is expensive, as it disables inlining // and is modelled as having side effects. def int_codeview_annotation : Intrinsic<[], [llvm_metadata_ty], - [IntrInaccessibleMemOnly, IntrNoDuplicate], + [IntrInaccessibleMemOnly, IntrNoDuplicate, IntrWillReturn], "llvm.codeview.annotation">; //===------------------------ Trampoline Intrinsics -----------------------===// @@ -828,79 +867,77 @@ def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], // // Expose the carry flag from add operations on two integrals. -def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; -def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; +let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { + def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [LLVMMatchType<0>, LLVMMatchType<0>]>; + def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [LLVMMatchType<0>, LLVMMatchType<0>]>; -def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; -def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; - -def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; -def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [LLVMMatchType<0>, LLVMMatchType<0>]>; + def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [LLVMMatchType<0>, LLVMMatchType<0>]>; + def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [LLVMMatchType<0>, LLVMMatchType<0>]>; + def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [LLVMMatchType<0>, LLVMMatchType<0>]>; +} //===------------------------- Saturation Arithmetic Intrinsics ---------------------===// // def int_sadd_sat : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, Commutative]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]>; def int_uadd_sat : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable, Commutative]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]>; def int_ssub_sat : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; def int_usub_sat : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; //===------------------------- Fixed Point Arithmetic Intrinsics ---------------------===// // def int_smul_fix : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<2>]>; def int_umul_fix : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<2>]>; //===------------------- Fixed Point Saturation Arithmetic Intrinsics ----------------===// // def int_smul_fix_sat : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<2>]>; +def int_umul_fix_sat : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<2>]>; //===------------------------- Memory Use Markers -------------------------===// // def int_lifetime_start : Intrinsic<[], [llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>; + [IntrArgMemOnly, IntrWillReturn, NoCapture<1>, ImmArg<0>]>; def int_lifetime_end : Intrinsic<[], [llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>; + [IntrArgMemOnly, IntrWillReturn, NoCapture<1>, ImmArg<0>]>; def int_invariant_start : Intrinsic<[llvm_descriptor_ty], [llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>; + [IntrArgMemOnly, IntrWillReturn, NoCapture<1>, ImmArg<0>]>; def int_invariant_end : Intrinsic<[], [llvm_descriptor_ty, llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture<2>, ImmArg<1>]>; + [IntrArgMemOnly, IntrWillReturn, NoCapture<2>, ImmArg<1>]>; // launder.invariant.group can't be marked with 'readnone' (IntrNoMem), // because it would cause CSE of two barriers with the same argument. @@ -916,12 +953,12 @@ def int_invariant_end : Intrinsic<[], // might change in the future. def int_launder_invariant_group : Intrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>], - [IntrInaccessibleMemOnly, IntrSpeculatable]>; + [IntrInaccessibleMemOnly, IntrSpeculatable, IntrWillReturn]>; def int_strip_invariant_group : Intrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>], - [IntrSpeculatable, IntrNoMem]>; + [IntrSpeculatable, IntrNoMem, IntrWillReturn]>; //===------------------------ Stackmap Intrinsics -------------------------===// // @@ -964,6 +1001,14 @@ def int_coro_id : Intrinsic<[llvm_token_ty], [llvm_i32_ty, llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], [IntrArgMemOnly, IntrReadMem, ReadNone<1>, ReadOnly<2>, NoCapture<2>]>; +def int_coro_id_retcon : Intrinsic<[llvm_token_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty, + llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], + []>; +def int_coro_id_retcon_once : Intrinsic<[llvm_token_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty, + llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], + []>; def int_coro_alloc : Intrinsic<[llvm_i1_ty], [llvm_token_ty], []>; def int_coro_begin : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty], [WriteOnly<1>]>; @@ -979,6 +1024,13 @@ def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>; def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>; +def int_coro_suspend_retcon : Intrinsic<[llvm_any_ty], [llvm_vararg_ty], []>; +def int_coro_prepare_retcon : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], + [IntrNoMem]>; +def int_coro_alloca_alloc : Intrinsic<[llvm_token_ty], + [llvm_anyint_ty, llvm_i32_ty], []>; +def int_coro_alloca_get : Intrinsic<[llvm_ptr_ty], [llvm_token_ty], []>; +def int_coro_alloca_free : Intrinsic<[], [llvm_token_ty], []>; def int_coro_param : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_ptr_ty], [IntrNoMem, ReadNone<0>, ReadNone<1>]>; @@ -1018,19 +1070,19 @@ def int_experimental_guard : Intrinsic<[], [llvm_i1_ty, llvm_vararg_ty], // Supports widenable conditions for guards represented as explicit branches. def int_experimental_widenable_condition : Intrinsic<[llvm_i1_ty], [], - [IntrInaccessibleMemOnly]>; + [IntrInaccessibleMemOnly, IntrWillReturn]>; // NOP: calls/invokes to this intrinsic are removed by codegen -def int_donothing : Intrinsic<[], [], [IntrNoMem]>; +def int_donothing : Intrinsic<[], [], [IntrNoMem, IntrWillReturn]>; // This instruction has no actual effect, though it is treated by the optimizer // has having opaque side effects. This may be inserted into loops to ensure // that they are not removed even if they turn out to be empty, for languages // which specify that infinite loops must be preserved. -def int_sideeffect : Intrinsic<[], [], [IntrInaccessibleMemOnly]>; +def int_sideeffect : Intrinsic<[], [], [IntrInaccessibleMemOnly, IntrWillReturn]>; -// Intrisics to support half precision floating point format -let IntrProperties = [IntrNoMem] in { +// Intrinsics to support half precision floating point format +let IntrProperties = [IntrNoMem, IntrWillReturn] in { def int_convert_to_fp16 : Intrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>; def int_convert_from_fp16 : Intrinsic<[llvm_anyfloat_ty], [llvm_i16_ty]>; } @@ -1041,7 +1093,11 @@ def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [], "llvm.clear_cache">; // Intrinsic to detect whether its argument is a constant. -def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem], "llvm.is.constant">; +def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem, IntrWillReturn], "llvm.is.constant">; + +// Intrinsic to mask out bits of a pointer. +def int_ptrmask: Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_anyint_ty], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; //===-------------------------- Masked Intrinsics -------------------------===// // @@ -1049,45 +1105,45 @@ def int_masked_store : Intrinsic<[], [llvm_anyvector_ty, LLVMAnyPointerType>, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [IntrArgMemOnly, ImmArg<2>]>; + [IntrArgMemOnly, IntrWillReturn, ImmArg<2>]>; def int_masked_load : Intrinsic<[llvm_anyvector_ty], [LLVMAnyPointerType>, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], - [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>; + [IntrReadMem, IntrArgMemOnly, IntrWillReturn, ImmArg<1>]>; def int_masked_gather: Intrinsic<[llvm_anyvector_ty], [LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], - [IntrReadMem, ImmArg<1>]>; + [IntrReadMem, IntrWillReturn, ImmArg<1>]>; def int_masked_scatter: Intrinsic<[], [llvm_anyvector_ty, LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [ImmArg<2>]>; + [IntrWillReturn, ImmArg<2>]>; def int_masked_expandload: Intrinsic<[llvm_anyvector_ty], [LLVMPointerToElt<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], - [IntrReadMem]>; + [IntrReadMem, IntrWillReturn]>; def int_masked_compressstore: Intrinsic<[], [llvm_anyvector_ty, LLVMPointerToElt<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [IntrArgMemOnly]>; + [IntrArgMemOnly, IntrWillReturn]>; // Test whether a pointer is associated with a type metadata identifier. def int_type_test : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty], - [IntrNoMem]>; + [IntrNoMem, IntrWillReturn]>; // Safely loads a function pointer from a virtual table pointer using type metadata. def int_type_checked_load : Intrinsic<[llvm_ptr_ty, llvm_i1_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_metadata_ty], - [IntrNoMem]>; + [IntrNoMem, IntrWillReturn]>; // Create a branch funnel that implements an indirect call to a limited set of // callees. This needs to be a musttail call. @@ -1098,6 +1154,8 @@ def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty], def int_hwasan_check_memaccess : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], [IntrInaccessibleMemOnly, ImmArg<2>]>; +def int_hwasan_check_memaccess_shortgranules : + Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], [IntrInaccessibleMemOnly, ImmArg<2>]>; // Xray intrinsics //===----------------------------------------------------------------------===// @@ -1121,7 +1179,7 @@ def int_memcpy_element_unordered_atomic llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i32_ty ], [ - IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, + IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>, WriteOnly<0>, ReadOnly<1>, ImmArg<3> ]>; @@ -1132,58 +1190,47 @@ def int_memmove_element_unordered_atomic llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i32_ty ], [ - IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, + IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>, WriteOnly<0>, ReadOnly<1>, ImmArg<3> ]>; // @llvm.memset.element.unordered.atomic.*(dest, value, length, elementsize) def int_memset_element_unordered_atomic : Intrinsic<[], [ llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i32_ty ], - [ IntrArgMemOnly, NoCapture<0>, WriteOnly<0>, ImmArg<3> ]>; + [ IntrArgMemOnly, IntrWillReturn, NoCapture<0>, WriteOnly<0>, ImmArg<3> ]>; //===------------------------ Reduction Intrinsics ------------------------===// // -def int_experimental_vector_reduce_v2_fadd : Intrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, - llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_v2_fmul : Intrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, - llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_add : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_mul : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_and : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_or : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_xor : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_smax : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_smin : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_umax : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_umin : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_fmax : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; -def int_experimental_vector_reduce_fmin : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty], - [IntrNoMem]>; +let IntrProperties = [IntrNoMem, IntrWillReturn] in { + def int_experimental_vector_reduce_v2_fadd : Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, + llvm_anyvector_ty]>; + def int_experimental_vector_reduce_v2_fmul : Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, + llvm_anyvector_ty]>; + def int_experimental_vector_reduce_add : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_mul : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_and : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_or : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_xor : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_smax : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_smin : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_umax : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_umin : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_fmax : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_experimental_vector_reduce_fmin : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; +} //===---------- Intrinsics to control hardware supported loops ----------===// diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td index 832aca4fd30..db01700f409 100644 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -691,7 +691,7 @@ def int_aarch64_crc32cx : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], // Memory Tagging Extensions (MTE) Intrinsics let TargetPrefix = "aarch64" in { def int_aarch64_irg : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty], - [IntrInaccessibleMemOnly]>; + [IntrNoMem, IntrHasSideEffects]>; def int_aarch64_addg : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrNoMem]>; def int_aarch64_gmi : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], @@ -707,7 +707,7 @@ def int_aarch64_subp : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty], // Generate a randomly tagged stack base pointer. def int_aarch64_irg_sp : Intrinsic<[llvm_ptr_ty], [llvm_i64_ty], - [IntrInaccessibleMemOnly]>; + [IntrNoMem, IntrHasSideEffects]>; // Transfer pointer tag with offset. // ptr1 = tagp(ptr0, baseptr, tag_offset) returns a pointer where @@ -733,3 +733,124 @@ def int_aarch64_settag_zero : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], def int_aarch64_stgp : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty], [IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>; } + +// Transactional Memory Extension (TME) Intrinsics +let TargetPrefix = "aarch64" in { +def int_aarch64_tstart : GCCBuiltin<"__builtin_arm_tstart">, + Intrinsic<[llvm_i64_ty]>; + +def int_aarch64_tcommit : GCCBuiltin<"__builtin_arm_tcommit">, Intrinsic<[]>; + +def int_aarch64_tcancel : GCCBuiltin<"__builtin_arm_tcancel">, + Intrinsic<[], [llvm_i64_ty], [ImmArg<0>]>; + +def int_aarch64_ttest : GCCBuiltin<"__builtin_arm_ttest">, + Intrinsic<[llvm_i64_ty], [], + [IntrNoMem, IntrHasSideEffects]>; +} + +def llvm_nxv2i1_ty : LLVMType; +def llvm_nxv4i1_ty : LLVMType; +def llvm_nxv8i1_ty : LLVMType; +def llvm_nxv16i1_ty : LLVMType; +def llvm_nxv16i8_ty : LLVMType; +def llvm_nxv4i32_ty : LLVMType; +def llvm_nxv2i64_ty : LLVMType; +def llvm_nxv8f16_ty : LLVMType; +def llvm_nxv4f32_ty : LLVMType; +def llvm_nxv2f64_ty : LLVMType; + +let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". + class AdvSIMD_Merged1VectorArg_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>], + [IntrNoMem]>; + + class AdvSIMD_SVE_CNT_Intrinsic + : Intrinsic<[LLVMVectorOfBitcastsToInt<0>], + [LLVMVectorOfBitcastsToInt<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty], + [IntrNoMem]>; + + class AdvSIMD_SVE_Unpack_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>], + [IntrNoMem]>; + + class AdvSIMD_SVE_PUNPKHI_Intrinsic + : Intrinsic<[LLVMHalfElementsVectorType<0>], + [llvm_anyvector_ty], + [IntrNoMem]>; + + class AdvSIMD_SVE_DOT_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMSubdivide4VectorType<0>, + LLVMSubdivide4VectorType<0>], + [IntrNoMem]>; + + class AdvSIMD_SVE_DOT_Indexed_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMSubdivide4VectorType<0>, + LLVMSubdivide4VectorType<0>, + llvm_i32_ty], + [IntrNoMem]>; + + // This class of intrinsics are not intended to be useful within LLVM IR but + // are instead here to support some of the more regid parts of the ACLE. + class Builtin_SVCVT + : GCCBuiltin<"__builtin_sve_" # name>, + Intrinsic<[OUT], [OUT, llvm_nxv16i1_ty, IN], [IntrNoMem]>; +} + +//===----------------------------------------------------------------------===// +// SVE + +let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". + +// +// Integer arithmetic +// + +def int_aarch64_sve_abs : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_neg : AdvSIMD_Merged1VectorArg_Intrinsic; + +def int_aarch64_sve_sdot : AdvSIMD_SVE_DOT_Intrinsic; +def int_aarch64_sve_sdot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic; + +def int_aarch64_sve_udot : AdvSIMD_SVE_DOT_Intrinsic; +def int_aarch64_sve_udot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic; + +// +// Counting bits +// + +def int_aarch64_sve_cnt : AdvSIMD_SVE_CNT_Intrinsic; + +// +// Permutations and selection +// + +def int_aarch64_sve_sunpkhi : AdvSIMD_SVE_Unpack_Intrinsic; +def int_aarch64_sve_sunpklo : AdvSIMD_SVE_Unpack_Intrinsic; + +def int_aarch64_sve_uunpkhi : AdvSIMD_SVE_Unpack_Intrinsic; +def int_aarch64_sve_uunpklo : AdvSIMD_SVE_Unpack_Intrinsic; + +// +// Floating-point comparisons +// + +def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>; + +// +// Predicate operations +// + +def int_aarch64_sve_punpkhi : AdvSIMD_SVE_PUNPKHI_Intrinsic; +def int_aarch64_sve_punpklo : AdvSIMD_SVE_PUNPKHI_Intrinsic; +} diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index 3982444b540..ab6ee7f92dd 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -175,6 +175,7 @@ def int_amdgcn_implicit_buffer_ptr : // Set EXEC to the 64-bit value given. // This is always moved to the beginning of the basic block. +// FIXME: Should be mangled for wave size. def int_amdgcn_init_exec : Intrinsic<[], [llvm_i64_ty], // 64-bit literal constant [IntrConvergent, ImmArg<0>]>; @@ -185,7 +186,7 @@ def int_amdgcn_init_exec : Intrinsic<[], def int_amdgcn_init_exec_from_input : Intrinsic<[], [llvm_i32_ty, // 32-bit SGPR input llvm_i32_ty], // bit offset of the thread count - [IntrConvergent]>; + [IntrConvergent, ImmArg<1>]>; def int_amdgcn_wavefrontsize : GCCBuiltin<"__builtin_amdgcn_wavefrontsize">, @@ -199,12 +200,14 @@ def int_amdgcn_wavefrontsize : // The first parameter is s_sendmsg immediate (i16), // the second one is copied to m0 def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">, - Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>; + Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], + [ImmArg<0>, IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">, - Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>; + Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], + [ImmArg<0>, IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, - Intrinsic<[], [], [IntrConvergent]>; + Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent]>; def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">, Intrinsic<[], [], [IntrConvergent]>; @@ -835,9 +838,6 @@ defset list AMDGPUImageDimAtomicIntrinsics = { defm int_amdgcn_image_atomic_and : AMDGPUImageDimAtomic<"ATOMIC_AND">; defm int_amdgcn_image_atomic_or : AMDGPUImageDimAtomic<"ATOMIC_OR">; defm int_amdgcn_image_atomic_xor : AMDGPUImageDimAtomic<"ATOMIC_XOR">; - - // TODO: INC/DEC are weird: they seem to have a vdata argument in hardware, - // even though it clearly shouldn't be needed defm int_amdgcn_image_atomic_inc : AMDGPUImageDimAtomic<"ATOMIC_INC">; defm int_amdgcn_image_atomic_dec : AMDGPUImageDimAtomic<"ATOMIC_DEC">; @@ -854,8 +854,8 @@ let TargetPrefix = "amdgcn" in { defset list AMDGPUBufferIntrinsics = { -class AMDGPUBufferLoad : Intrinsic < - [llvm_any_ty], +class AMDGPUBufferLoad : Intrinsic < + [data_ty], [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) @@ -863,7 +863,7 @@ class AMDGPUBufferLoad : Intrinsic < llvm_i1_ty], // slc(imm) [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; -def int_amdgcn_buffer_load_format : AMDGPUBufferLoad; +def int_amdgcn_buffer_load_format : AMDGPUBufferLoad; def int_amdgcn_buffer_load : AMDGPUBufferLoad; def int_amdgcn_s_buffer_load : Intrinsic < @@ -874,9 +874,9 @@ def int_amdgcn_s_buffer_load : Intrinsic < [IntrNoMem, ImmArg<2>]>, AMDGPURsrcIntrinsic<0>; -class AMDGPUBufferStore : Intrinsic < +class AMDGPUBufferStore : Intrinsic < [], - [llvm_any_ty, // vdata(VGPR) + [data_ty, // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) @@ -884,7 +884,7 @@ class AMDGPUBufferStore : Intrinsic < llvm_i1_ty], // slc(imm) [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; -def int_amdgcn_buffer_store_format : AMDGPUBufferStore; +def int_amdgcn_buffer_store_format : AMDGPUBufferStore; def int_amdgcn_buffer_store : AMDGPUBufferStore; // New buffer intrinsics with separate raw and struct variants. The raw @@ -894,56 +894,68 @@ def int_amdgcn_buffer_store : AMDGPUBufferStore; // and swizzling changes depending on whether idxen is set in the instruction. // These new instrinsics also keep the offset and soffset arguments separate as // they behave differently in bounds checking and swizzling. -class AMDGPURawBufferLoad : Intrinsic < - [llvm_any_ty], +class AMDGPURawBufferLoad : Intrinsic < + [data_ty], [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, + // bit 1 = slc, + // bit 2 = dlc on gfx10+), + // swizzled buffer (bit 3 = swz)) [IntrReadMem, ImmArg<3>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; -def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad; +def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad; def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad; -class AMDGPUStructBufferLoad : Intrinsic < - [llvm_any_ty], +class AMDGPUStructBufferLoad : Intrinsic < + [data_ty], [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, + // bit 1 = slc, + // bit 2 = dlc on gfx10+), + // swizzled buffer (bit 3 = swz)) [IntrReadMem, ImmArg<4>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; -def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad; +def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad; def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad; -class AMDGPURawBufferStore : Intrinsic < +class AMDGPURawBufferStore : Intrinsic < [], - [llvm_any_ty, // vdata(VGPR) + [data_ty, // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, + // bit 1 = slc, + // bit 2 = dlc on gfx10+), + // swizzled buffer (bit 3 = swz)) [IntrWriteMem, ImmArg<4>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; -def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore; +def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore; def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore; -class AMDGPUStructBufferStore : Intrinsic < +class AMDGPUStructBufferStore : Intrinsic < [], - [llvm_any_ty, // vdata(VGPR) + [data_ty, // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, + // bit 1 = slc, + // bit 2 = dlc on gfx10+), + // swizzled buffer (bit 3 = swz)) [IntrWriteMem, ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; -def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore; +def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore; def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore; -class AMDGPURawBufferAtomic : Intrinsic < - [llvm_anyint_ty], +class AMDGPURawBufferAtomic : Intrinsic < + [data_ty], [LLVMMatchType<0>, // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) @@ -961,6 +973,8 @@ def int_amdgcn_raw_buffer_atomic_umax : AMDGPURawBufferAtomic; def int_amdgcn_raw_buffer_atomic_and : AMDGPURawBufferAtomic; def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic; def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic; +def int_amdgcn_raw_buffer_atomic_inc : AMDGPURawBufferAtomic; +def int_amdgcn_raw_buffer_atomic_dec : AMDGPURawBufferAtomic; def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic< [llvm_anyint_ty], [LLVMMatchType<0>, // src(VGPR) @@ -972,8 +986,8 @@ def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic< [ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; -class AMDGPUStructBufferAtomic : Intrinsic < - [llvm_anyint_ty], +class AMDGPUStructBufferAtomic : Intrinsic < + [data_ty], [LLVMMatchType<0>, // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) @@ -992,6 +1006,8 @@ def int_amdgcn_struct_buffer_atomic_umax : AMDGPUStructBufferAtomic; def int_amdgcn_struct_buffer_atomic_and : AMDGPUStructBufferAtomic; def int_amdgcn_struct_buffer_atomic_or : AMDGPUStructBufferAtomic; def int_amdgcn_struct_buffer_atomic_xor : AMDGPUStructBufferAtomic; +def int_amdgcn_struct_buffer_atomic_inc : AMDGPUStructBufferAtomic; +def int_amdgcn_struct_buffer_atomic_dec : AMDGPUStructBufferAtomic; def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic< [llvm_anyint_ty], [LLVMMatchType<0>, // src(VGPR) @@ -1046,7 +1062,10 @@ def int_amdgcn_raw_tbuffer_load : Intrinsic < llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, + // bit 1 = slc, + // bit 2 = dlc on gfx10+), + // swizzled buffer (bit 3 = swz)) [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; @@ -1057,7 +1076,10 @@ def int_amdgcn_raw_tbuffer_store : Intrinsic < llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, + // bit 1 = slc, + // bit 2 = dlc on gfx10+), + // swizzled buffer (bit 3 = swz)) [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; @@ -1068,7 +1090,10 @@ def int_amdgcn_struct_tbuffer_load : Intrinsic < llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, + // bit 1 = slc, + // bit 2 = dlc on gfx10+), + // swizzled buffer (bit 3 = swz)) [IntrReadMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; @@ -1080,7 +1105,10 @@ def int_amdgcn_struct_tbuffer_store : Intrinsic < llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) - llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+) + llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc, + // bit 1 = slc, + // bit 2 = dlc on gfx10+), + // swizzled buffer (bit 3 = swz)) [IntrWriteMem, ImmArg<5>, ImmArg<6>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; @@ -1431,6 +1459,13 @@ def int_amdgcn_wqm : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; +// Copies the source value to the destination value, such that the source +// is computed as if the entire program were executed in WQM if any other +// program code executes in WQM. +def int_amdgcn_softwqm : Intrinsic<[llvm_any_ty], + [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] +>; + // Return true if at least one thread within the pixel quad passes true into // the function. def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty], @@ -1459,6 +1494,18 @@ def int_amdgcn_set_inactive : LLVMMatchType<0>], // value for the inactive lanes to take [IntrNoMem, IntrConvergent]>; +// Return if the given flat pointer points to a local memory address. +def int_amdgcn_is_shared : GCCBuiltin<"__builtin_amdgcn_is_shared">, + Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], + [IntrNoMem, IntrSpeculatable, NoCapture<0>] +>; + +// Return if the given flat pointer points to a prvate memory address. +def int_amdgcn_is_private : GCCBuiltin<"__builtin_amdgcn_is_private">, + Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], + [IntrNoMem, IntrSpeculatable, NoCapture<0>] +>; + //===----------------------------------------------------------------------===// // CI+ Intrinsics //===----------------------------------------------------------------------===// diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td index 4792af097d9..e13da6157e0 100644 --- a/include/llvm/IR/IntrinsicsARM.td +++ b/include/llvm/IR/IntrinsicsARM.td @@ -777,5 +777,14 @@ class Neon_Dot_Intrinsic def int_arm_neon_udot : Neon_Dot_Intrinsic; def int_arm_neon_sdot : Neon_Dot_Intrinsic; +def int_arm_vctp8 : Intrinsic<[llvm_v16i1_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_arm_vctp16 : Intrinsic<[llvm_v8i1_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_arm_vctp32 : Intrinsic<[llvm_v4i1_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_arm_vctp64 : Intrinsic<[llvm_v2i1_ty], [llvm_i32_ty], [IntrNoMem]>; + +// GNU eabi mcount +def int_arm_gnu_eabi_mcount : Intrinsic<[], + [], + [IntrReadMem, IntrWriteMem]>; } // end TargetPrefix diff --git a/include/llvm/IR/IntrinsicsBPF.td b/include/llvm/IR/IntrinsicsBPF.td index d7595a2a770..3618cc6a412 100644 --- a/include/llvm/IR/IntrinsicsBPF.td +++ b/include/llvm/IR/IntrinsicsBPF.td @@ -20,4 +20,7 @@ let TargetPrefix = "bpf" in { // All intrinsics start with "llvm.bpf." Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>; def int_bpf_pseudo : GCCBuiltin<"__builtin_bpf_pseudo">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty]>; + def int_bpf_preserve_field_info : GCCBuiltin<"__builtin_bpf_preserve_field_info">, + Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_i64_ty], + [IntrNoMem, ImmArg<1>]>; } diff --git a/include/llvm/IR/IntrinsicsMips.td b/include/llvm/IR/IntrinsicsMips.td index 6393a9ca35d..bfcdd80a52d 100644 --- a/include/llvm/IR/IntrinsicsMips.td +++ b/include/llvm/IR/IntrinsicsMips.td @@ -1260,16 +1260,16 @@ def int_mips_insve_d : GCCBuiltin<"__builtin_msa_insve_d">, def int_mips_ld_b : GCCBuiltin<"__builtin_msa_ld_b">, Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>; + [IntrReadMem, IntrArgMemOnly]>; def int_mips_ld_h : GCCBuiltin<"__builtin_msa_ld_h">, Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>; + [IntrReadMem, IntrArgMemOnly]>; def int_mips_ld_w : GCCBuiltin<"__builtin_msa_ld_w">, Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>; + [IntrReadMem, IntrArgMemOnly]>; def int_mips_ld_d : GCCBuiltin<"__builtin_msa_ld_d">, Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>; + [IntrReadMem, IntrArgMemOnly]>; def int_mips_ldi_b : GCCBuiltin<"__builtin_msa_ldi_b">, Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>; @@ -1684,16 +1684,16 @@ def int_mips_srlri_d : GCCBuiltin<"__builtin_msa_srlri_d">, def int_mips_st_b : GCCBuiltin<"__builtin_msa_st_b">, Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrArgMemOnly, ImmArg<2>]>; + [IntrArgMemOnly]>; def int_mips_st_h : GCCBuiltin<"__builtin_msa_st_h">, Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrArgMemOnly, ImmArg<2>]>; + [IntrArgMemOnly]>; def int_mips_st_w : GCCBuiltin<"__builtin_msa_st_w">, Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrArgMemOnly, ImmArg<2>]>; + [IntrArgMemOnly]>; def int_mips_st_d : GCCBuiltin<"__builtin_msa_st_d">, Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrArgMemOnly, ImmArg<2>]>; + [IntrArgMemOnly]>; def int_mips_subs_s_b : GCCBuiltin<"__builtin_msa_subs_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td index dba7dd76c4f..0483d965ba6 100644 --- a/include/llvm/IR/IntrinsicsNVVM.td +++ b/include/llvm/IR/IntrinsicsNVVM.td @@ -276,6 +276,26 @@ class NVVM_MMA_SUPPORTED frags, string layout_a, string layout_b ); } +class SHFL_INFO { + string Suffix = !if(sync, "sync_", "") + # mode # "_" + # type + # !if(return_pred, "p", ""); + + string Name = "int_nvvm_shfl_" # Suffix; + string Builtin = "__nvvm_shfl_" # Suffix; + string IntrName = "llvm.nvvm.shfl." # !subst("_",".", Suffix); + list withGccBuiltin = !if(return_pred, [], [1]); + list withoutGccBuiltin = !if(return_pred, [1], []); + LLVMType OpType = !cond( + !eq(type,"i32"): llvm_i32_ty, + !eq(type,"f32"): llvm_float_ty); + list RetTy = !if(return_pred, [OpType, llvm_i1_ty], [OpType]); + list ArgsTy = !if(sync, + [llvm_i32_ty, OpType, llvm_i32_ty, llvm_i32_ty], + [OpType, llvm_i32_ty, llvm_i32_ty]); +} + let TargetPrefix = "nvvm" in { def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], @@ -3955,90 +3975,27 @@ def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">; // // SHUFFLE // - -// shfl.down.b32 dest, val, offset, mask_and_clamp -def int_nvvm_shfl_down_i32 : - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.down.i32">, - GCCBuiltin<"__nvvm_shfl_down_i32">; -def int_nvvm_shfl_down_f32 : - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.down.f32">, - GCCBuiltin<"__nvvm_shfl_down_f32">; - -// shfl.up.b32 dest, val, offset, mask_and_clamp -def int_nvvm_shfl_up_i32 : - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.up.i32">, - GCCBuiltin<"__nvvm_shfl_up_i32">; -def int_nvvm_shfl_up_f32 : - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.up.f32">, - GCCBuiltin<"__nvvm_shfl_up_f32">; - -// shfl.bfly.b32 dest, val, offset, mask_and_clamp -def int_nvvm_shfl_bfly_i32 : - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.bfly.i32">, - GCCBuiltin<"__nvvm_shfl_bfly_i32">; -def int_nvvm_shfl_bfly_f32 : - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.bfly.f32">, - GCCBuiltin<"__nvvm_shfl_bfly_f32">; - -// shfl.idx.b32 dest, val, lane, mask_and_clamp -def int_nvvm_shfl_idx_i32 : - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.idx.i32">, - GCCBuiltin<"__nvvm_shfl_idx_i32">; -def int_nvvm_shfl_idx_f32 : - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.idx.f32">, - GCCBuiltin<"__nvvm_shfl_idx_f32">; - -// Synchronizing shfl variants available in CUDA-9. -// On sm_70 these don't have to be convergent, so we may eventually want to -// implement non-convergent variant of this intrinsic. - -// shfl.sync.down.b32 dest, threadmask, val, offset , mask_and_clamp -def int_nvvm_shfl_sync_down_i32 : - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.down.i32">, - GCCBuiltin<"__nvvm_shfl_sync_down_i32">; -def int_nvvm_shfl_sync_down_f32 : - Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.down.f32">, - GCCBuiltin<"__nvvm_shfl_sync_down_f32">; - -// shfl.sync.up.b32 dest, threadmask, val, offset, mask_and_clamp -def int_nvvm_shfl_sync_up_i32 : - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.up.i32">, - GCCBuiltin<"__nvvm_shfl_sync_up_i32">; -def int_nvvm_shfl_sync_up_f32 : - Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.up.f32">, - GCCBuiltin<"__nvvm_shfl_sync_up_f32">; - -// shfl.sync.bfly.b32 dest, threadmask, val, offset, mask_and_clamp -def int_nvvm_shfl_sync_bfly_i32 : - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.bfly.i32">, - GCCBuiltin<"__nvvm_shfl_sync_bfly_i32">; -def int_nvvm_shfl_sync_bfly_f32 : - Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.bfly.f32">, - GCCBuiltin<"__nvvm_shfl_sync_bfly_f32">; - -// shfl.sync.idx.b32 dest, threadmask, val, lane, mask_and_clamp -def int_nvvm_shfl_sync_idx_i32 : - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.idx.i32">, - GCCBuiltin<"__nvvm_shfl_sync_idx_i32">; -def int_nvvm_shfl_sync_idx_f32 : - Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.shfl.sync.idx.f32">, - GCCBuiltin<"__nvvm_shfl_sync_idx_f32">; +// Generate intrinsics for all variants of shfl instruction. +foreach sync = [0, 1] in { + foreach mode = ["up", "down", "bfly", "idx"] in { + foreach type = ["i32", "f32"] in { + foreach return_pred = [0, 1] in { + foreach i = [SHFL_INFO] in { + foreach _ = i.withGccBuiltin in { + def i.Name : GCCBuiltin, + Intrinsic; + } + foreach _ = i.withoutGccBuiltin in { + def i.Name : Intrinsic; + } + } + } + } + } +} // // VOTE diff --git a/include/llvm/IR/IntrinsicsWebAssembly.td b/include/llvm/IR/IntrinsicsWebAssembly.td index 1b892727547..810979b9993 100644 --- a/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/include/llvm/IR/IntrinsicsWebAssembly.td @@ -23,6 +23,17 @@ def int_wasm_memory_grow : Intrinsic<[llvm_anyint_ty], [llvm_i32_ty, LLVMMatchType<0>], []>; +//===----------------------------------------------------------------------===// +// Trapping float-to-int conversions +//===----------------------------------------------------------------------===// + +def int_wasm_trunc_signed : Intrinsic<[llvm_anyint_ty], + [llvm_anyfloat_ty], + [IntrNoMem]>; +def int_wasm_trunc_unsigned : Intrinsic<[llvm_anyint_ty], + [llvm_anyfloat_ty], + [IntrNoMem]>; + //===----------------------------------------------------------------------===// // Saturating float-to-int conversions //===----------------------------------------------------------------------===// @@ -89,6 +100,10 @@ def int_wasm_atomic_notify: // SIMD intrinsics //===----------------------------------------------------------------------===// +def int_wasm_swizzle : + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_sub_saturate_signed : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>], @@ -109,6 +124,39 @@ def int_wasm_alltrue : Intrinsic<[llvm_i32_ty], [llvm_anyvector_ty], [IntrNoMem, IntrSpeculatable]>; +def int_wasm_qfma : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_qfms : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_narrow_signed : + Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_narrow_unsigned : + Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_widen_low_signed : + Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_widen_high_signed : + Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_widen_low_unsigned : + Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_widen_high_unsigned : + Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty], + [IntrNoMem, IntrSpeculatable]>; + //===----------------------------------------------------------------------===// // Bulk memory intrinsics @@ -133,4 +181,14 @@ def int_wasm_tls_size : [], [IntrNoMem, IntrSpeculatable]>; +def int_wasm_tls_align : + Intrinsic<[llvm_anyint_ty], + [], + [IntrNoMem, IntrSpeculatable]>; + +def int_wasm_tls_base : + Intrinsic<[llvm_ptr_ty], + [], + [IntrReadMem]>; + } // TargetPrefix = "wasm" diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 236d312d7d7..5796686dd79 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -2091,16 +2091,20 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_ptr_ty], [], []>; def int_x86_lwpins32 : GCCBuiltin<"__builtin_ia32_lwpins32">, - Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [ImmArg<2>]>; def int_x86_lwpins64 : GCCBuiltin<"__builtin_ia32_lwpins64">, - Intrinsic<[llvm_i8_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; + Intrinsic<[llvm_i8_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], + [ImmArg<2>]>; def int_x86_lwpval32 : GCCBuiltin<"__builtin_ia32_lwpval32">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [ImmArg<2>]>; def int_x86_lwpval64 : GCCBuiltin<"__builtin_ia32_lwpval64">, - Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], + [ImmArg<2>]>; } //===----------------------------------------------------------------------===// diff --git a/include/llvm/IR/LLVMContext.h b/include/llvm/IR/LLVMContext.h index c8050450041..91bd57dc5ac 100644 --- a/include/llvm/IR/LLVMContext.h +++ b/include/llvm/IR/LLVMContext.h @@ -72,34 +72,9 @@ public: // Pinned metadata names, which always have the same value. This is a // compile-time performance optimization, not a correctness optimization. enum : unsigned { - MD_dbg = 0, // "dbg" - MD_tbaa = 1, // "tbaa" - MD_prof = 2, // "prof" - MD_fpmath = 3, // "fpmath" - MD_range = 4, // "range" - MD_tbaa_struct = 5, // "tbaa.struct" - MD_invariant_load = 6, // "invariant.load" - MD_alias_scope = 7, // "alias.scope" - MD_noalias = 8, // "noalias", - MD_nontemporal = 9, // "nontemporal" - MD_mem_parallel_loop_access = 10, // "llvm.mem.parallel_loop_access" - MD_nonnull = 11, // "nonnull" - MD_dereferenceable = 12, // "dereferenceable" - MD_dereferenceable_or_null = 13, // "dereferenceable_or_null" - MD_make_implicit = 14, // "make.implicit" - MD_unpredictable = 15, // "unpredictable" - MD_invariant_group = 16, // "invariant.group" - MD_align = 17, // "align" - MD_loop = 18, // "llvm.loop" - MD_type = 19, // "type" - MD_section_prefix = 20, // "section_prefix" - MD_absolute_symbol = 21, // "absolute_symbol" - MD_associated = 22, // "associated" - MD_callees = 23, // "callees" - MD_irr_loop = 24, // "irr_loop" - MD_access_group = 25, // "llvm.access.group" - MD_callback = 26, // "callback" - MD_preserve_access_index = 27, // "llvm.preserve.*.access.index" +#define LLVM_FIXED_MD_KIND(EnumID, Name, Value) EnumID = Value, +#include "llvm/IR/FixedMetadataKinds.def" +#undef LLVM_FIXED_MD_KIND }; /// Known operand bundle tag IDs, which always have the same value. All diff --git a/include/llvm/IR/MDBuilder.h b/include/llvm/IR/MDBuilder.h index 3a2b1bddf45..11e2e262325 100644 --- a/include/llvm/IR/MDBuilder.h +++ b/include/llvm/IR/MDBuilder.h @@ -16,6 +16,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/DataTypes.h" #include @@ -75,6 +76,10 @@ public: /// Return metadata containing the section prefix for a function. MDNode *createFunctionSectionPrefix(StringRef Prefix); + /// return metadata containing expected value + MDNode *createMisExpect(uint64_t Index, uint64_t LikelyWeight, + uint64_t UnlikelyWeight); + //===------------------------------------------------------------------===// // Range metadata. //===------------------------------------------------------------------===// diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h index 7ca2540181b..f62b1e246cc 100644 --- a/include/llvm/IR/Metadata.h +++ b/include/llvm/IR/Metadata.h @@ -601,7 +601,7 @@ dyn_extract_or_null(Y &&MD) { /// These are used to efficiently contain a byte sequence for metadata. /// MDString is always unnamed. class MDString : public Metadata { - friend class StringMapEntry; + friend class StringMapEntryStorage; StringMapEntry *Entry = nullptr; @@ -806,7 +806,7 @@ public: /// Ensure that this has RAUW support, and then return it. ReplaceableMetadataImpl *getOrCreateReplaceableUses() { if (!hasReplaceableUses()) - makeReplaceable(llvm::make_unique(getContext())); + makeReplaceable(std::make_unique(getContext())); return getReplaceableUses(); } diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h index f458680cfe1..59331142766 100644 --- a/include/llvm/IR/Module.h +++ b/include/llvm/IR/Module.h @@ -46,6 +46,7 @@ class FunctionType; class GVMaterializer; class LLVMContext; class MemoryBuffer; +class Pass; class RandomNumberGenerator; template class SmallPtrSetImpl; class StructType; diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h index aacf8cfc089..be60447abd8 100644 --- a/include/llvm/IR/ModuleSummaryIndex.h +++ b/include/llvm/IR/ModuleSummaryIndex.h @@ -119,7 +119,7 @@ class GlobalValueSummary; using GlobalValueSummaryList = std::vector>; -struct LLVM_ALIGNAS(8) GlobalValueSummaryInfo { +struct alignas(8) GlobalValueSummaryInfo { union NameOrGV { NameOrGV(bool HaveGVs) { if (HaveGVs) @@ -603,7 +603,7 @@ public: if (!TypeTests.empty() || !TypeTestAssumeVCalls.empty() || !TypeCheckedLoadVCalls.empty() || !TypeTestAssumeConstVCalls.empty() || !TypeCheckedLoadConstVCalls.empty()) - TIdInfo = llvm::make_unique(TypeIdInfo{ + TIdInfo = std::make_unique(TypeIdInfo{ std::move(TypeTests), std::move(TypeTestAssumeVCalls), std::move(TypeCheckedLoadVCalls), std::move(TypeTestAssumeConstVCalls), @@ -632,6 +632,8 @@ public: /// Return the list of pairs. ArrayRef calls() const { return CallGraphEdgeList; } + void addCall(EdgeTy E) { CallGraphEdgeList.push_back(E); } + /// Returns the list of type identifiers used by this function in /// llvm.type.test intrinsics other than by an llvm.assume intrinsic, /// represented as GUIDs. @@ -680,7 +682,7 @@ public: /// were unable to devirtualize a checked call. void addTypeTest(GlobalValue::GUID Guid) { if (!TIdInfo) - TIdInfo = llvm::make_unique(); + TIdInfo = std::make_unique(); TIdInfo->TypeTests.push_back(Guid); } @@ -780,7 +782,7 @@ public: void setVTableFuncs(VTableFuncList Funcs) { assert(!VTableFuncs); - VTableFuncs = llvm::make_unique(std::move(Funcs)); + VTableFuncs = std::make_unique(std::move(Funcs)); } ArrayRef vTableFuncs() const { @@ -1293,6 +1295,12 @@ public: return nullptr; } + TypeIdSummary *getTypeIdSummary(StringRef TypeId) { + return const_cast( + static_cast(this)->getTypeIdSummary( + TypeId)); + } + const std::map & typeIdCompatibleVtableMap() const { return TypeIdCompatibleVtableMap; @@ -1411,7 +1419,7 @@ template <> struct GraphTraits : public GraphTraits { static NodeRef getEntryNode(ModuleSummaryIndex *I) { std::unique_ptr Root = - make_unique(I->calculateCallGraphRoot()); + std::make_unique(I->calculateCallGraphRoot()); GlobalValueSummaryInfo G(I->haveGVs()); G.SummaryList.push_back(std::move(Root)); static auto P = diff --git a/include/llvm/IR/ModuleSummaryIndexYAML.h b/include/llvm/IR/ModuleSummaryIndexYAML.h index 26d9c43fabf..4d4a67c7517 100644 --- a/include/llvm/IR/ModuleSummaryIndexYAML.h +++ b/include/llvm/IR/ModuleSummaryIndexYAML.h @@ -220,7 +220,7 @@ template <> struct CustomMappingTraits { V.emplace(RefGUID, /*IsAnalysis=*/false); Refs.push_back(ValueInfo(/*IsAnalysis=*/false, &*V.find(RefGUID))); } - Elem.SummaryList.push_back(llvm::make_unique( + Elem.SummaryList.push_back(std::make_unique( GlobalValueSummary::GVFlags( static_cast(FSum.Linkage), FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal, FSum.CanAutoHide), diff --git a/include/llvm/IR/Operator.h b/include/llvm/IR/Operator.h index 8199c65ca8a..037f5aed03e 100644 --- a/include/llvm/IR/Operator.h +++ b/include/llvm/IR/Operator.h @@ -379,16 +379,25 @@ public: return false; switch (Opcode) { + case Instruction::FNeg: + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + // FIXME: To clean up and correct the semantics of fast-math-flags, FCmp + // should not be treated as a math op, but the other opcodes should. + // This would make things consistent with Select/PHI (FP value type + // determines whether they are math ops and, therefore, capable of + // having fast-math-flags). case Instruction::FCmp: return true; - // non math FP Operators (no FMF) - case Instruction::ExtractElement: - case Instruction::ShuffleVector: - case Instruction::InsertElement: case Instruction::PHI: - return false; - default: + case Instruction::Select: + case Instruction::Call: return V->getType()->isFPOrFPVectorTy(); + default: + return false; } } }; diff --git a/include/llvm/IR/PassManager.h b/include/llvm/IR/PassManager.h index 37fe2a5b01a..1e1f4a92f84 100644 --- a/include/llvm/IR/PassManager.h +++ b/include/llvm/IR/PassManager.h @@ -45,6 +45,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PassInstrumentation.h" #include "llvm/IR/PassManagerInternal.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TypeName.h" #include "llvm/Support/raw_ostream.h" @@ -418,7 +419,7 @@ template Args, - llvm::index_sequence) { + std::index_sequence) { (void)Args; return AM.template getResult(IR, std::get(Args)...); } @@ -435,7 +436,7 @@ getAnalysisResult(AnalysisManager &AM, IRUnitT &IR, std::tuple Args) { return (getAnalysisResultUnpackTuple< PassT, IRUnitT>)(AM, IR, Args, - llvm::index_sequence_for{}); + std::index_sequence_for{}); } } // namespace detail diff --git a/include/llvm/IR/PassManagerInternal.h b/include/llvm/IR/PassManagerInternal.h index 58198bf67b1..c602c0b5cc2 100644 --- a/include/llvm/IR/PassManagerInternal.h +++ b/include/llvm/IR/PassManagerInternal.h @@ -289,7 +289,7 @@ struct AnalysisPassModel : AnalysisPassConcept> run(IRUnitT &IR, AnalysisManager &AM, ExtraArgTs... ExtraArgs) override { - return llvm::make_unique( + return std::make_unique( Pass.run(IR, AM, std::forward(ExtraArgs)...)); } diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h index 0f03d7cc56b..2851b24c05a 100644 --- a/include/llvm/IR/PatternMatch.h +++ b/include/llvm/IR/PatternMatch.h @@ -88,6 +88,25 @@ inline class_match m_Undef() { return class_match(); } /// Match an arbitrary Constant and ignore it. inline class_match m_Constant() { return class_match(); } +/// Match an arbitrary basic block value and ignore it. +inline class_match m_BasicBlock() { + return class_match(); +} + +/// Inverting matcher +template struct match_unless { + Ty M; + + match_unless(const Ty &Matcher) : M(Matcher) {} + + template bool match(ITy *V) { return !M.match(V); } +}; + +/// Match if the inner matcher does *NOT* match. +template inline match_unless m_Unless(const Ty &M) { + return match_unless(M); +} + /// Matching combinators template struct match_combine_or { LTy L; @@ -300,6 +319,15 @@ template struct cstfp_pred_ty : public Predicate { // /////////////////////////////////////////////////////////////////////////////// +struct is_any_apint { + bool isValue(const APInt &C) { return true; } +}; +/// Match an integer or vector with any integral constant. +/// For vectors, this includes constants with undefined elements. +inline cst_pred_ty m_AnyIntegralConstant() { + return cst_pred_ty(); +} + struct is_all_ones { bool isValue(const APInt &C) { return C.isAllOnesValue(); } }; @@ -388,6 +416,18 @@ inline api_pred_ty m_Power2(const APInt *&V) { return V; } +struct is_negated_power2 { + bool isValue(const APInt &C) { return (-C).isPowerOf2(); } +}; +/// Match a integer or vector negated power-of-2. +/// For vectors, this includes constants with undefined elements. +inline cst_pred_ty m_NegatedPower2() { + return cst_pred_ty(); +} +inline api_pred_ty m_NegatedPower2(const APInt *&V) { + return V; +} + struct is_power2_or_zero { bool isValue(const APInt &C) { return !C || C.isPowerOf2(); } }; @@ -528,6 +568,12 @@ inline bind_ty m_Constant(Constant *&C) { return C; } /// Match a ConstantFP, capturing the value if we match. inline bind_ty m_ConstantFP(ConstantFP *&C) { return C; } +/// Match a basic block value, capturing it if we match. +inline bind_ty m_BasicBlock(BasicBlock *&V) { return V; } +inline bind_ty m_BasicBlock(const BasicBlock *&V) { + return V; +} + /// Match a specified Value*. struct specificval_ty { const Value *Val; @@ -597,11 +643,11 @@ struct bind_const_intval_ty { }; /// Match a specified integer value or vector of all elements of that -// value. +/// value. struct specific_intval { - uint64_t Val; + APInt Val; - specific_intval(uint64_t V) : Val(V) {} + specific_intval(APInt V) : Val(std::move(V)) {} template bool match(ITy *V) { const auto *CI = dyn_cast(V); @@ -609,18 +655,50 @@ struct specific_intval { if (const auto *C = dyn_cast(V)) CI = dyn_cast_or_null(C->getSplatValue()); - return CI && CI->getValue() == Val; + return CI && APInt::isSameValue(CI->getValue(), Val); } }; /// Match a specific integer value or vector with all elements equal to /// the value. -inline specific_intval m_SpecificInt(uint64_t V) { return specific_intval(V); } +inline specific_intval m_SpecificInt(APInt V) { + return specific_intval(std::move(V)); +} + +inline specific_intval m_SpecificInt(uint64_t V) { + return m_SpecificInt(APInt(64, V)); +} /// Match a ConstantInt and bind to its value. This does not match /// ConstantInts wider than 64-bits. inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; } +/// Match a specified basic block value. +struct specific_bbval { + BasicBlock *Val; + + specific_bbval(BasicBlock *Val) : Val(Val) {} + + template bool match(ITy *V) { + const auto *BB = dyn_cast(V); + return BB && BB == Val; + } +}; + +/// Match a specific basic block value. +inline specific_bbval m_SpecificBB(BasicBlock *BB) { + return specific_bbval(BB); +} + +/// A commutative-friendly version of m_Specific(). +inline deferredval_ty m_Deferred(BasicBlock *const &BB) { + return BB; +} +inline deferredval_ty +m_Deferred(const BasicBlock *const &BB) { + return BB; +} + //===----------------------------------------------------------------------===// // Matcher for any binary operator. // @@ -968,6 +1046,12 @@ struct is_idiv_op { } }; +struct is_irem_op { + bool isOpType(unsigned Opcode) { + return Opcode == Instruction::SRem || Opcode == Instruction::URem; + } +}; + /// Matches shift operations. template inline BinOpPred_match m_Shift(const LHS &L, @@ -1003,6 +1087,13 @@ inline BinOpPred_match m_IDiv(const LHS &L, return BinOpPred_match(L, R); } +/// Matches integer remainder operations. +template +inline BinOpPred_match m_IRem(const LHS &L, + const RHS &R) { + return BinOpPred_match(L, R); +} + //===----------------------------------------------------------------------===// // Class that matches exact binary ops. // @@ -1210,6 +1301,12 @@ inline CastClass_match m_Trunc(const OpTy &Op) { return CastClass_match(Op); } +template +inline match_combine_or, OpTy> +m_TruncOrSelf(const OpTy &Op) { + return m_CombineOr(m_Trunc(Op), Op); +} + /// Matches SExt. template inline CastClass_match m_SExt(const OpTy &Op) { @@ -1222,6 +1319,18 @@ inline CastClass_match m_ZExt(const OpTy &Op) { return CastClass_match(Op); } +template +inline match_combine_or, OpTy> +m_ZExtOrSelf(const OpTy &Op) { + return m_CombineOr(m_ZExt(Op), Op); +} + +template +inline match_combine_or, OpTy> +m_SExtOrSelf(const OpTy &Op) { + return m_CombineOr(m_SExt(Op), Op); +} + template inline match_combine_or, CastClass_match> @@ -1229,6 +1338,15 @@ m_ZExtOrSExt(const OpTy &Op) { return m_CombineOr(m_ZExt(Op), m_SExt(Op)); } +template +inline match_combine_or< + match_combine_or, + CastClass_match>, + OpTy> +m_ZExtOrSExtOrSelf(const OpTy &Op) { + return m_CombineOr(m_ZExtOrSExt(Op), Op); +} + /// Matches UIToFP. template inline CastClass_match m_UIToFP(const OpTy &Op) { @@ -1274,27 +1392,34 @@ struct br_match { inline br_match m_UnconditionalBr(BasicBlock *&Succ) { return br_match(Succ); } -template struct brc_match { +template +struct brc_match { Cond_t Cond; - BasicBlock *&T, *&F; + TrueBlock_t T; + FalseBlock_t F; - brc_match(const Cond_t &C, BasicBlock *&t, BasicBlock *&f) + brc_match(const Cond_t &C, const TrueBlock_t &t, const FalseBlock_t &f) : Cond(C), T(t), F(f) {} template bool match(OpTy *V) { if (auto *BI = dyn_cast(V)) - if (BI->isConditional() && Cond.match(BI->getCondition())) { - T = BI->getSuccessor(0); - F = BI->getSuccessor(1); - return true; - } + if (BI->isConditional() && Cond.match(BI->getCondition())) + return T.match(BI->getSuccessor(0)) && F.match(BI->getSuccessor(1)); return false; } }; template -inline brc_match m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) { - return brc_match(C, T, F); +inline brc_match, bind_ty> +m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) { + return brc_match, bind_ty>( + C, m_BasicBlock(T), m_BasicBlock(F)); +} + +template +inline brc_match +m_Br(const Cond_t &C, const TrueBlock_t &T, const FalseBlock_t &F) { + return brc_match(C, T, F); } //===----------------------------------------------------------------------===// diff --git a/include/llvm/IR/RemarkStreamer.h b/include/llvm/IR/RemarkStreamer.h index f34cc660b2f..2abf6f99cb0 100644 --- a/include/llvm/IR/RemarkStreamer.h +++ b/include/llvm/IR/RemarkStreamer.h @@ -25,12 +25,12 @@ namespace llvm { /// Streamer for remarks. class RemarkStreamer { - /// The filename that the remark diagnostics are emitted to. - const std::string Filename; /// The regex used to filter remarks based on the passes that emit them. Optional PassFilter; /// The object used to serialize the remarks to a specific format. - std::unique_ptr Serializer; + std::unique_ptr RemarkSerializer; + /// The filename that the remark diagnostics are emitted to. + const Optional Filename; /// Convert diagnostics into remark objects. /// The lifetime of the members of the result is bound to the lifetime of @@ -38,14 +38,16 @@ class RemarkStreamer { remarks::Remark toRemark(const DiagnosticInfoOptimizationBase &Diag); public: - RemarkStreamer(StringRef Filename, - std::unique_ptr Serializer); + RemarkStreamer(std::unique_ptr RemarkSerializer, + Optional Filename = None); /// Return the filename that the remark diagnostics are emitted to. - StringRef getFilename() const { return Filename; } + Optional getFilename() const { + return Filename ? Optional(*Filename) : None; + } /// Return stream that the remark diagnostics are emitted to. - raw_ostream &getStream() { return Serializer->OS; } + raw_ostream &getStream() { return RemarkSerializer->OS; } /// Return the serializer used for this stream. - remarks::Serializer &getSerializer() { return *Serializer; } + remarks::RemarkSerializer &getSerializer() { return *RemarkSerializer; } /// Set a pass filter based on a regex \p Filter. /// Returns an error if the regex is invalid. Error setFilter(StringRef Filter); @@ -84,13 +86,21 @@ struct RemarkSetupFormatError : RemarkSetupErrorInfo { using RemarkSetupErrorInfo::RemarkSetupErrorInfo; }; -/// Setup optimization remarks. +/// Setup optimization remarks that output to a file. Expected> setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses, StringRef RemarksFormat, bool RemarksWithHotness, unsigned RemarksHotnessThreshold = 0); +/// Setup optimization remarks that output directly to a raw_ostream. +/// \p OS is managed by the caller and should be open for writing as long as \p +/// Context is streaming remarks to it. +Error setupOptimizationRemarks(LLVMContext &Context, raw_ostream &OS, + StringRef RemarksPasses, StringRef RemarksFormat, + bool RemarksWithHotness, + unsigned RemarksHotnessThreshold = 0); + } // end namespace llvm #endif // LLVM_IR_REMARKSTREAMER_H diff --git a/include/llvm/IR/Type.h b/include/llvm/IR/Type.h index f2aa49030aa..d0961dac833 100644 --- a/include/llvm/IR/Type.h +++ b/include/llvm/IR/Type.h @@ -21,6 +21,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TypeSize.h" #include #include #include @@ -281,12 +282,15 @@ public: /// This will return zero if the type does not have a size or is not a /// primitive type. /// + /// If this is a scalable vector type, the scalable property will be set and + /// the runtime size will be a positive integer multiple of the base size. + /// /// Note that this may not reflect the size of memory allocated for an /// instance of the type or the number of bytes that are written when an /// instance of the type is stored to memory. The DataLayout class provides /// additional query functions to provide this information. /// - unsigned getPrimitiveSizeInBits() const LLVM_READONLY; + TypeSize getPrimitiveSizeInBits() const LLVM_READONLY; /// If this is a vector type, return the getPrimitiveSizeInBits value for the /// element type. Otherwise return the getPrimitiveSizeInBits value for this @@ -368,6 +372,7 @@ public: inline bool getVectorIsScalable() const; inline unsigned getVectorNumElements() const; + inline ElementCount getVectorElementCount() const; Type *getVectorElementType() const { assert(getTypeID() == VectorTyID); return ContainedTys[0]; @@ -378,6 +383,14 @@ public: return ContainedTys[0]; } + /// Given an integer or vector type, change the lane bitwidth to NewBitwidth, + /// whilst keeping the old number of lanes. + inline Type *getWithNewBitWidth(unsigned NewBitWidth) const; + + /// Given scalar/vector integer type, returns a type with elements twice as + /// wide as in the original type. For vectors, preserves element count. + inline Type *getExtendedType() const; + /// Get the address space of this pointer or pointer vector type. inline unsigned getPointerAddressSpace() const; diff --git a/include/llvm/IR/User.h b/include/llvm/IR/User.h index 19d87c5c621..850ee72a038 100644 --- a/include/llvm/IR/User.h +++ b/include/llvm/IR/User.h @@ -111,7 +111,7 @@ public: #endif } /// Placement delete - required by std, called if the ctor throws. - void operator delete(void *Usr, unsigned, bool) { + void operator delete(void *Usr, unsigned, unsigned) { // Note: If a subclass manipulates the information which is required to calculate the // Usr memory pointer, e.g. NumUserOperands, the operator delete of that subclass has // to restore the changed information to the original value, since the dtor of that class diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h index b2d8e7ac474..f2c4b3b3f20 100644 --- a/include/llvm/IR/Value.h +++ b/include/llvm/IR/Value.h @@ -14,8 +14,10 @@ #define LLVM_IR_VALUE_H #include "llvm-c/Types.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/Use.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Casting.h" #include @@ -292,10 +294,29 @@ public: /// "V" instead of "this". This function skips metadata entries in the list. void replaceNonMetadataUsesWith(Value *V); + /// Go through the uses list for this definition and make each use point + /// to "V" if the callback ShouldReplace returns true for the given Use. + /// Unlike replaceAllUsesWith() this function does not support basic block + /// values or constant users. + void replaceUsesWithIf(Value *New, + llvm::function_ref ShouldReplace) { + assert(New && "Value::replaceUsesWithIf() is invalid!"); + assert(New->getType() == getType() && + "replaceUses of value with new value of different type!"); + + for (use_iterator UI = use_begin(), E = use_end(); UI != E;) { + Use &U = *UI; + ++UI; + if (!ShouldReplace(U)) + continue; + U.set(New); + } + } + /// replaceUsesOutsideBlock - Go through the uses list for this definition and /// make each use point to "V" instead of "this" when the use is outside the /// block. 'This's use list is expected to have at least one element. - /// Unlike replaceAllUsesWith this function does not support basic block + /// Unlike replaceAllUsesWith() this function does not support basic block /// values or constant users. void replaceUsesOutsideBlock(Value *V, BasicBlock *BB); @@ -493,17 +514,27 @@ public: /// swifterror attribute. bool isSwiftError() const; - /// Strip off pointer casts, all-zero GEPs, address space casts, and aliases. + /// Strip off pointer casts, all-zero GEPs and address space casts. /// /// Returns the original uncasted value. If this is called on a non-pointer /// value, it returns 'this'. const Value *stripPointerCasts() const; Value *stripPointerCasts() { return const_cast( - static_cast(this)->stripPointerCasts()); + static_cast(this)->stripPointerCasts()); } - /// Strip off pointer casts, all-zero GEPs, address space casts, and aliases + /// Strip off pointer casts, all-zero GEPs, address space casts, and aliases. + /// + /// Returns the original uncasted value. If this is called on a non-pointer + /// value, it returns 'this'. + const Value *stripPointerCastsAndAliases() const; + Value *stripPointerCastsAndAliases() { + return const_cast( + static_cast(this)->stripPointerCastsAndAliases()); + } + + /// Strip off pointer casts, all-zero GEPs and address space casts /// but ensures the representation of the result stays the same. /// /// Returns the original uncasted value with the same representation. If this @@ -514,26 +545,15 @@ public: ->stripPointerCastsSameRepresentation()); } - /// Strip off pointer casts, all-zero GEPs, aliases and invariant group - /// info. + /// Strip off pointer casts, all-zero GEPs and invariant group info. /// /// Returns the original uncasted value. If this is called on a non-pointer /// value, it returns 'this'. This function should be used only in /// Alias analysis. const Value *stripPointerCastsAndInvariantGroups() const; Value *stripPointerCastsAndInvariantGroups() { - return const_cast( - static_cast(this)->stripPointerCastsAndInvariantGroups()); - } - - /// Strip off pointer casts and all-zero GEPs. - /// - /// Returns the original uncasted value. If this is called on a non-pointer - /// value, it returns 'this'. - const Value *stripPointerCastsNoFollowAliases() const; - Value *stripPointerCastsNoFollowAliases() { - return const_cast( - static_cast(this)->stripPointerCastsNoFollowAliases()); + return const_cast(static_cast(this) + ->stripPointerCastsAndInvariantGroups()); } /// Strip off pointer casts and all-constant inbounds GEPs. @@ -612,7 +632,7 @@ public: /// /// Returns an alignment which is either specified explicitly, e.g. via /// align attribute of a function argument, or guaranteed by DataLayout. - unsigned getPointerAlignment(const DataLayout &DL) const; + MaybeAlign getPointerAlignment(const DataLayout &DL) const; /// Translate PHI node to its predecessor from the given basic block. /// diff --git a/include/llvm/IR/ValueMap.h b/include/llvm/IR/ValueMap.h index 6a79b1d387f..fb5440d5efe 100644 --- a/include/llvm/IR/ValueMap.h +++ b/include/llvm/IR/ValueMap.h @@ -33,11 +33,11 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Mutex.h" -#include "llvm/Support/UniqueLock.h" #include #include #include #include +#include #include #include @@ -93,7 +93,6 @@ class ValueMap { MapT Map; Optional MDMap; ExtraData Data; - bool MayMapMetadata = true; public: using key_type = KeyT; @@ -120,10 +119,6 @@ public: } Optional &getMDMap() { return MDMap; } - bool mayMapMetadata() const { return MayMapMetadata; } - void enableMapMetadata() { MayMapMetadata = true; } - void disableMapMetadata() { MayMapMetadata = false; } - /// Get the mapped metadata, if it's in the map. Optional getMappedMD(const Metadata *MD) const { if (!MDMap) @@ -266,9 +261,9 @@ public: // Make a copy that won't get changed even when *this is destroyed. ValueMapCallbackVH Copy(*this); typename Config::mutex_type *M = Config::getMutex(Copy.Map->Data); - unique_lock Guard; + std::unique_lock Guard; if (M) - Guard = unique_lock(*M); + Guard = std::unique_lock(*M); Config::onDelete(Copy.Map->Data, Copy.Unwrap()); // May destroy *this. Copy.Map->Map.erase(Copy); // Definitely destroys *this. } @@ -279,9 +274,9 @@ public: // Make a copy that won't get changed even when *this is destroyed. ValueMapCallbackVH Copy(*this); typename Config::mutex_type *M = Config::getMutex(Copy.Map->Data); - unique_lock Guard; + std::unique_lock Guard; if (M) - Guard = unique_lock(*M); + Guard = std::unique_lock(*M); KeyT typed_new_key = cast(new_key); // Can destroy *this: diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 164d0be2855..49f69340c82 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -132,6 +132,7 @@ void initializeDwarfEHPreparePass(PassRegistry&); void initializeEarlyCSELegacyPassPass(PassRegistry&); void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry&); void initializeEarlyIfConverterPass(PassRegistry&); +void initializeEarlyIfPredicatorPass(PassRegistry &); void initializeEarlyMachineLICMPass(PassRegistry&); void initializeEarlyTailDuplicatePass(PassRegistry&); void initializeEdgeBundlesPass(PassRegistry&); @@ -202,6 +203,7 @@ void initializeLegacyLICMPassPass(PassRegistry&); void initializeLegacyLoopSinkPassPass(PassRegistry&); void initializeLegalizerPass(PassRegistry&); void initializeGISelCSEAnalysisWrapperPassPass(PassRegistry &); +void initializeGISelKnownBitsAnalysisPass(PassRegistry &); void initializeLibCallsShrinkWrapLegacyPassPass(PassRegistry&); void initializeLintPass(PassRegistry&); void initializeLiveDebugValuesPass(PassRegistry&); @@ -241,6 +243,7 @@ void initializeLoopVectorizePass(PassRegistry&); void initializeLoopVersioningLICMPass(PassRegistry&); void initializeLoopVersioningPassPass(PassRegistry&); void initializeLowerAtomicLegacyPassPass(PassRegistry&); +void initializeLowerConstantIntrinsicsPass(PassRegistry&); void initializeLowerEmuTLSPass(PassRegistry&); void initializeLowerExpectIntrinsicPass(PassRegistry&); void initializeLowerGuardIntrinsicLegacyPassPass(PassRegistry&); @@ -250,6 +253,7 @@ void initializeLowerInvokeLegacyPassPass(PassRegistry&); void initializeLowerSwitchPass(PassRegistry&); void initializeLowerTypeTestsPass(PassRegistry&); void initializeMIRCanonicalizerPass(PassRegistry &); +void initializeMIRNamerPass(PassRegistry &); void initializeMIRPrintingPassPass(PassRegistry&); void initializeMachineBlockFrequencyInfoPass(PassRegistry&); void initializeMachineBlockPlacementPass(PassRegistry&); @@ -263,7 +267,7 @@ void initializeMachineDominatorTreePass(PassRegistry&); void initializeMachineFunctionPrinterPassPass(PassRegistry&); void initializeMachineLICMPass(PassRegistry&); void initializeMachineLoopInfoPass(PassRegistry&); -void initializeMachineModuleInfoPass(PassRegistry&); +void initializeMachineModuleInfoWrapperPassPass(PassRegistry &); void initializeMachineOptimizationRemarkEmitterPassPass(PassRegistry&); void initializeMachineOutlinerPass(PassRegistry&); void initializeMachinePipelinerPass(PassRegistry&); @@ -286,7 +290,9 @@ void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry&); void initializeMetaRenamerPass(PassRegistry&); void initializeModuleDebugInfoPrinterPass(PassRegistry&); void initializeModuleSummaryIndexWrapperPassPass(PassRegistry&); +void initializeModuloScheduleTestPass(PassRegistry&); void initializeMustExecutePrinterPass(PassRegistry&); +void initializeMustBeExecutedContextPrinterPass(PassRegistry&); void initializeNameAnonGlobalLegacyPassPass(PassRegistry&); void initializeNaryReassociateLegacyPassPass(PassRegistry&); void initializeNewGVNLegacyPassPass(PassRegistry&); @@ -360,7 +366,7 @@ void initializeSROALegacyPassPass(PassRegistry&); void initializeSafeStackLegacyPassPass(PassRegistry&); void initializeSafepointIRVerifierPass(PassRegistry&); void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&); -void initializeSanitizerCoverageModulePass(PassRegistry&); +void initializeModuleSanitizerCoverageLegacyPassPass(PassRegistry &); void initializeScalarEvolutionWrapperPassPass(PassRegistry&); void initializeScalarizeMaskedMemIntrinPass(PassRegistry&); void initializeScalarizerLegacyPassPass(PassRegistry&); diff --git a/include/llvm/LTO/Config.h b/include/llvm/LTO/Config.h index fb107e3fbe0..daa6585b111 100644 --- a/include/llvm/LTO/Config.h +++ b/include/llvm/LTO/Config.h @@ -226,7 +226,7 @@ struct LTOLLVMContext : LLVMContext { setDiscardValueNames(C.ShouldDiscardValueNames); enableDebugTypeODRUniquing(); setDiagnosticHandler( - llvm::make_unique(&DiagHandler), true); + std::make_unique(&DiagHandler), true); } DiagnosticHandlerFunction DiagHandler; }; diff --git a/include/llvm/LTO/LTO.h b/include/llvm/LTO/LTO.h index ca0a8b64523..0a1e3e1d0e4 100644 --- a/include/llvm/LTO/LTO.h +++ b/include/llvm/LTO/LTO.h @@ -59,7 +59,9 @@ void thinLTOResolvePrevailingInIndex( /// must apply the changes to the Module via thinLTOInternalizeModule. void thinLTOInternalizeAndPromoteInIndex( ModuleSummaryIndex &Index, - function_ref isExported); + function_ref isExported, + function_ref + isPrevailing); /// Computes a unique hash for the Module considering the current list of /// export/import and other global analysis results. @@ -296,6 +298,10 @@ public: /// Cache) for each task identifier. Error run(AddStreamFn AddStream, NativeObjectCache Cache = nullptr); + /// Static method that returns a list of libcall symbols that can be generated + /// by LTO but might not be visible from bitcode symbol table. + static ArrayRef getRuntimeLibcallSymbols(); + private: Config Conf; @@ -303,7 +309,7 @@ private: RegularLTOState(unsigned ParallelCodeGenParallelismLevel, Config &Conf); struct CommonResolution { uint64_t Size = 0; - unsigned Align = 0; + MaybeAlign Align; /// Record if at least one instance of the common was marked as prevailing bool Prevailing = false; }; diff --git a/include/llvm/LTO/legacy/LTOCodeGenerator.h b/include/llvm/LTO/legacy/LTOCodeGenerator.h index d3cb4c8b79a..8718df4b88e 100644 --- a/include/llvm/LTO/legacy/LTOCodeGenerator.h +++ b/include/llvm/LTO/legacy/LTOCodeGenerator.h @@ -113,7 +113,7 @@ struct LTOCodeGenerator { ShouldRestoreGlobalsLinkage = Value; } - void addMustPreserveSymbol(StringRef Sym) { MustPreserveSymbols[Sym] = 1; } + void addMustPreserveSymbol(StringRef Sym) { MustPreserveSymbols.insert(Sym); } /// Pass options to the driver and optimization passes. /// diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index 675d179eb22..ac88165845d 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -140,6 +140,7 @@ namespace { (void) llvm::createLoopVersioningLICMPass(); (void) llvm::createLoopIdiomPass(); (void) llvm::createLoopRotatePass(); + (void) llvm::createLowerConstantIntrinsicsPass(); (void) llvm::createLowerExpectIntrinsicPass(); (void) llvm::createLowerInvokePass(); (void) llvm::createLowerSwitchPass(); @@ -219,6 +220,7 @@ namespace { (void) llvm::createStraightLineStrengthReducePass(); (void) llvm::createMemDerefPrinter(); (void) llvm::createMustExecutePrinter(); + (void) llvm::createMustBeExecutedContextPrinter(); (void) llvm::createFloat2IntPass(); (void) llvm::createEliminateAvailableExternallyPass(); (void) llvm::createScalarizeMaskedMemIntrinPass(); diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h index 971e9354da8..3261c483e0d 100644 --- a/include/llvm/MC/MCAsmInfo.h +++ b/include/llvm/MC/MCAsmInfo.h @@ -165,6 +165,10 @@ protected: /// instead. bool UseDataRegionDirectives = false; + /// True if .align is to be used for alignment. Only power-of-two + /// alignment is supported. + bool UseDotAlignForAlignment = false; + //===--- Data Emission Directives -------------------------------------===// /// This should be set to the directive used to get some number of zero bytes @@ -313,6 +317,10 @@ protected: /// Defaults to false. bool HasLinkOnceDirective = false; + /// True if we have a .lglobl directive, which is used to emit the information + /// of a static symbol into the symbol table. Defaults to false. + bool HasDotLGloblDirective = false; + /// This attribute, if not MCSA_Invalid, is used to declare a symbol as having /// hidden visibility. Defaults to MCSA_Hidden. MCSymbolAttr HiddenVisibilityAttr = MCSA_Hidden; @@ -388,6 +396,9 @@ protected: // %hi(), and similar unary operators. bool HasMipsExpressions = false; + // If true, emit function descriptor symbol on AIX. + bool NeedsFunctionDescriptors = false; + public: explicit MCAsmInfo(); virtual ~MCAsmInfo(); @@ -520,6 +531,10 @@ public: return UseDataRegionDirectives; } + bool useDotAlignForAlignment() const { + return UseDotAlignForAlignment; + } + const char *getZeroDirective() const { return ZeroDirective; } const char *getAsciiDirective() const { return AsciiDirective; } const char *getAscizDirective() const { return AscizDirective; } @@ -557,6 +572,8 @@ public: bool hasLinkOnceDirective() const { return HasLinkOnceDirective; } + bool hasDotLGloblDirective() const { return HasDotLGloblDirective; } + MCSymbolAttr getHiddenVisibilityAttr() const { return HiddenVisibilityAttr; } MCSymbolAttr getHiddenDeclarationVisibilityAttr() const { @@ -639,6 +656,7 @@ public: bool canRelaxRelocations() const { return RelaxELFRelocations; } void setRelaxELFRelocations(bool V) { RelaxELFRelocations = V; } bool hasMipsExpressions() const { return HasMipsExpressions; } + bool needsFunctionDescriptors() const { return NeedsFunctionDescriptors; } }; } // end namespace llvm diff --git a/include/llvm/MC/MCAsmInfoXCOFF.h b/include/llvm/MC/MCAsmInfoXCOFF.h index 2a72ba7398a..4a3bacc954e 100644 --- a/include/llvm/MC/MCAsmInfoXCOFF.h +++ b/include/llvm/MC/MCAsmInfoXCOFF.h @@ -18,6 +18,11 @@ class MCAsmInfoXCOFF : public MCAsmInfo { protected: MCAsmInfoXCOFF(); + +public: + // Return true only when the identifier Name does not need quotes to be + // syntactically correct for XCOFF. + bool isValidUnquotedName(StringRef Name) const override; }; } // end namespace llvm diff --git a/include/llvm/MC/MCAsmMacro.h b/include/llvm/MC/MCAsmMacro.h index 364d3b5f366..7eecce0faf6 100644 --- a/include/llvm/MC/MCAsmMacro.h +++ b/include/llvm/MC/MCAsmMacro.h @@ -124,7 +124,6 @@ public: } void dump(raw_ostream &OS) const; - void dump() const { dump(dbgs()); } }; struct MCAsmMacroParameter { @@ -133,10 +132,10 @@ struct MCAsmMacroParameter { bool Required = false; bool Vararg = false; - MCAsmMacroParameter() = default; - +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void dump() const { dump(dbgs()); } - void dump(raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump(raw_ostream &OS) const; +#endif }; typedef std::vector MCAsmMacroParameters; @@ -149,8 +148,10 @@ public: MCAsmMacro(StringRef N, StringRef B, MCAsmMacroParameters P) : Name(N), Body(B), Parameters(std::move(P)) {} +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void dump() const { dump(dbgs()); } - void dump(raw_ostream &OS) const; + LLVM_DUMP_METHOD void dump(raw_ostream &OS) const; +#endif }; } // namespace llvm diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h index 5c2124cc0d1..b925f321888 100644 --- a/include/llvm/MC/MCContext.h +++ b/include/llvm/MC/MCContext.h @@ -22,6 +22,7 @@ #include "llvm/MC/MCAsmMacro.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" @@ -112,6 +113,9 @@ namespace llvm { /// number of section symbols with the same name). StringMap UsedNames; + /// Keeps track of labels that are used in inline assembly. + SymbolTable InlineAsmUsedLabelNames; + /// The next ID to dole out to an unnamed assembler temporary symbol with /// a given prefix. StringMap NextID; @@ -275,6 +279,8 @@ namespace llvm { /// Do automatic reset in destructor bool AutoReset; + MCTargetOptions const *TargetOptions; + bool HadError = false; MCSymbol *createSymbolImpl(const StringMapEntry *Name, @@ -298,7 +304,9 @@ namespace llvm { public: explicit MCContext(const MCAsmInfo *MAI, const MCRegisterInfo *MRI, const MCObjectFileInfo *MOFI, - const SourceMgr *Mgr = nullptr, bool DoAutoReset = true); + const SourceMgr *Mgr = nullptr, + MCTargetOptions const *TargetOpts = nullptr, + bool DoAutoReset = true); MCContext(const MCContext &) = delete; MCContext &operator=(const MCContext &) = delete; ~MCContext(); @@ -377,6 +385,16 @@ namespace llvm { /// APIs. const SymbolTable &getSymbols() const { return Symbols; } + /// isInlineAsmLabel - Return true if the name is a label referenced in + /// inline assembly. + MCSymbol *getInlineAsmLabel(StringRef Name) const { + return InlineAsmUsedLabelNames.lookup(Name); + } + + /// registerInlineAsmLabel - Records that the name is a label referenced in + /// inline assembly. + void registerInlineAsmLabel(MCSymbol *Sym); + /// @} /// \name Section Management @@ -490,6 +508,8 @@ namespace llvm { MCSectionXCOFF *getXCOFFSection(StringRef Section, XCOFF::StorageMappingClass MappingClass, + XCOFF::SymbolType CSectType, + XCOFF::StorageClass StorageClass, SectionKind K, const char *BeginSymName = nullptr); @@ -659,6 +679,7 @@ namespace llvm { bool hadError() { return HadError; } void reportError(SMLoc L, const Twine &Msg); + void reportWarning(SMLoc L, const Twine &Msg); // Unrecoverable error has occurred. Display the best diagnostic we can // and bail via exit(1). For now, most MC backend errors are unrecoverable. // FIXME: We should really do something about that. diff --git a/include/llvm/MC/MCDirectives.h b/include/llvm/MC/MCDirectives.h index 4029264c202..ea79e68674e 100644 --- a/include/llvm/MC/MCDirectives.h +++ b/include/llvm/MC/MCDirectives.h @@ -28,6 +28,7 @@ enum MCSymbolAttr { MCSA_ELF_TypeNoType, ///< .type _foo, STT_NOTYPE # aka @notype MCSA_ELF_TypeGnuUniqueObject, /// .type _foo, @gnu_unique_object MCSA_Global, ///< .globl + MCSA_LGlobal, ///< .lglobl (XCOFF) MCSA_Hidden, ///< .hidden (ELF) MCSA_IndirectSymbol, ///< .indirect_symbol (MachO) MCSA_Internal, ///< .internal (ELF) diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h index 1a37aafd065..a33b4b31bb0 100644 --- a/include/llvm/MC/MCDwarf.h +++ b/include/llvm/MC/MCDwarf.h @@ -629,7 +629,8 @@ public: static void Emit(MCObjectStreamer &streamer, MCAsmBackend *MAB, bool isEH); static void EmitAdvanceLoc(MCObjectStreamer &Streamer, uint64_t AddrDelta); static void EncodeAdvanceLoc(MCContext &Context, uint64_t AddrDelta, - raw_ostream &OS); + raw_ostream &OS, uint32_t *Offset = nullptr, + uint32_t *Size = nullptr); }; } // end namespace llvm diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h index fb23c0114c7..eb2786501f8 100644 --- a/include/llvm/MC/MCExpr.h +++ b/include/llvm/MC/MCExpr.h @@ -46,10 +46,6 @@ private: ExprKind Kind; SMLoc Loc; - bool evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm, - const MCAsmLayout *Layout, - const SectionAddrMap *Addrs) const; - bool evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm, const MCAsmLayout *Layout, const SectionAddrMap *Addrs, bool InSet) const; @@ -136,7 +132,7 @@ class MCConstantExpr : public MCExpr { int64_t Value; bool PrintInHex = false; - MCConstantExpr(int64_t Value) + explicit MCConstantExpr(int64_t Value) : MCExpr(MCExpr::Constant, SMLoc()), Value(Value) {} MCConstantExpr(int64_t Value, bool PrintInHex) @@ -239,6 +235,8 @@ public: VK_PPC_TOC_LO, // symbol@toc@l VK_PPC_TOC_HI, // symbol@toc@h VK_PPC_TOC_HA, // symbol@toc@ha + VK_PPC_U, // symbol@u + VK_PPC_L, // symbol@l VK_PPC_DTPMOD, // symbol@dtpmod VK_PPC_TPREL_LO, // symbol@tprel@l VK_PPC_TPREL_HI, // symbol@tprel@h diff --git a/include/llvm/MC/MCFixup.h b/include/llvm/MC/MCFixup.h index accffb7f224..29e321e2354 100644 --- a/include/llvm/MC/MCFixup.h +++ b/include/llvm/MC/MCFixup.h @@ -20,35 +20,38 @@ class MCExpr; /// Extensible enumeration to represent the type of a fixup. enum MCFixupKind { - FK_NONE = 0, ///< A no-op fixup. - FK_Data_1, ///< A one-byte fixup. - FK_Data_2, ///< A two-byte fixup. - FK_Data_4, ///< A four-byte fixup. - FK_Data_8, ///< A eight-byte fixup. - FK_PCRel_1, ///< A one-byte pc relative fixup. - FK_PCRel_2, ///< A two-byte pc relative fixup. - FK_PCRel_4, ///< A four-byte pc relative fixup. - FK_PCRel_8, ///< A eight-byte pc relative fixup. - FK_GPRel_1, ///< A one-byte gp relative fixup. - FK_GPRel_2, ///< A two-byte gp relative fixup. - FK_GPRel_4, ///< A four-byte gp relative fixup. - FK_GPRel_8, ///< A eight-byte gp relative fixup. - FK_DTPRel_4, ///< A four-byte dtp relative fixup. - FK_DTPRel_8, ///< A eight-byte dtp relative fixup. - FK_TPRel_4, ///< A four-byte tp relative fixup. - FK_TPRel_8, ///< A eight-byte tp relative fixup. - FK_SecRel_1, ///< A one-byte section relative fixup. - FK_SecRel_2, ///< A two-byte section relative fixup. - FK_SecRel_4, ///< A four-byte section relative fixup. - FK_SecRel_8, ///< A eight-byte section relative fixup. - FK_Data_Add_1, ///< A one-byte add fixup. - FK_Data_Add_2, ///< A two-byte add fixup. - FK_Data_Add_4, ///< A four-byte add fixup. - FK_Data_Add_8, ///< A eight-byte add fixup. - FK_Data_Sub_1, ///< A one-byte sub fixup. - FK_Data_Sub_2, ///< A two-byte sub fixup. - FK_Data_Sub_4, ///< A four-byte sub fixup. - FK_Data_Sub_8, ///< A eight-byte sub fixup. + FK_NONE = 0, ///< A no-op fixup. + FK_Data_1, ///< A one-byte fixup. + FK_Data_2, ///< A two-byte fixup. + FK_Data_4, ///< A four-byte fixup. + FK_Data_8, ///< A eight-byte fixup. + FK_Data_6b, ///< A six-bits fixup. + FK_PCRel_1, ///< A one-byte pc relative fixup. + FK_PCRel_2, ///< A two-byte pc relative fixup. + FK_PCRel_4, ///< A four-byte pc relative fixup. + FK_PCRel_8, ///< A eight-byte pc relative fixup. + FK_GPRel_1, ///< A one-byte gp relative fixup. + FK_GPRel_2, ///< A two-byte gp relative fixup. + FK_GPRel_4, ///< A four-byte gp relative fixup. + FK_GPRel_8, ///< A eight-byte gp relative fixup. + FK_DTPRel_4, ///< A four-byte dtp relative fixup. + FK_DTPRel_8, ///< A eight-byte dtp relative fixup. + FK_TPRel_4, ///< A four-byte tp relative fixup. + FK_TPRel_8, ///< A eight-byte tp relative fixup. + FK_SecRel_1, ///< A one-byte section relative fixup. + FK_SecRel_2, ///< A two-byte section relative fixup. + FK_SecRel_4, ///< A four-byte section relative fixup. + FK_SecRel_8, ///< A eight-byte section relative fixup. + FK_Data_Add_1, ///< A one-byte add fixup. + FK_Data_Add_2, ///< A two-byte add fixup. + FK_Data_Add_4, ///< A four-byte add fixup. + FK_Data_Add_8, ///< A eight-byte add fixup. + FK_Data_Add_6b, ///< A six-bits add fixup. + FK_Data_Sub_1, ///< A one-byte sub fixup. + FK_Data_Sub_2, ///< A two-byte sub fixup. + FK_Data_Sub_4, ///< A four-byte sub fixup. + FK_Data_Sub_8, ///< A eight-byte sub fixup. + FK_Data_Sub_6b, ///< A six-bits sub fixup. FirstTargetFixupKind = 128, @@ -75,25 +78,25 @@ class MCFixup { /// The value to put into the fixup location. The exact interpretation of the /// expression is target dependent, usually it will be one of the operands to /// an instruction or an assembler directive. - const MCExpr *Value; + const MCExpr *Value = nullptr; /// The byte index of start of the relocation inside the MCFragment. - uint32_t Offset; + uint32_t Offset = 0; /// The target dependent kind of fixup item this is. The kind is used to /// determine how the operand value should be encoded into the instruction. - unsigned Kind; + MCFixupKind Kind = FK_NONE; /// The source location which gave rise to the fixup, if any. SMLoc Loc; public: static MCFixup create(uint32_t Offset, const MCExpr *Value, MCFixupKind Kind, SMLoc Loc = SMLoc()) { - assert(unsigned(Kind) < MaxTargetFixupKind && "Kind out of range!"); + assert(Kind < MaxTargetFixupKind && "Kind out of range!"); MCFixup FI; FI.Value = Value; FI.Offset = Offset; - FI.Kind = unsigned(Kind); + FI.Kind = Kind; FI.Loc = Loc; return FI; } @@ -104,7 +107,7 @@ public: MCFixup FI; FI.Value = Fixup.getValue(); FI.Offset = Fixup.getOffset(); - FI.Kind = (unsigned)getAddKindForKind(Fixup.getKind()); + FI.Kind = getAddKindForKind(Fixup.getKind()); FI.Loc = Fixup.getLoc(); return FI; } @@ -115,12 +118,14 @@ public: MCFixup FI; FI.Value = Fixup.getValue(); FI.Offset = Fixup.getOffset(); - FI.Kind = (unsigned)getSubKindForKind(Fixup.getKind()); + FI.Kind = getSubKindForKind(Fixup.getKind()); FI.Loc = Fixup.getLoc(); return FI; } - MCFixupKind getKind() const { return MCFixupKind(Kind); } + MCFixupKind getKind() const { return Kind; } + + unsigned getTargetKind() const { return Kind; } uint32_t getOffset() const { return Offset; } void setOffset(uint32_t Value) { Offset = Value; } @@ -129,37 +134,63 @@ public: /// Return the generic fixup kind for a value with the given size. It /// is an error to pass an unsupported size. - static MCFixupKind getKindForSize(unsigned Size, bool isPCRel) { + static MCFixupKind getKindForSize(unsigned Size, bool IsPCRel) { switch (Size) { default: llvm_unreachable("Invalid generic fixup size!"); - case 1: return isPCRel ? FK_PCRel_1 : FK_Data_1; - case 2: return isPCRel ? FK_PCRel_2 : FK_Data_2; - case 4: return isPCRel ? FK_PCRel_4 : FK_Data_4; - case 8: return isPCRel ? FK_PCRel_8 : FK_Data_8; + case 1: + return IsPCRel ? FK_PCRel_1 : FK_Data_1; + case 2: + return IsPCRel ? FK_PCRel_2 : FK_Data_2; + case 4: + return IsPCRel ? FK_PCRel_4 : FK_Data_4; + case 8: + return IsPCRel ? FK_PCRel_8 : FK_Data_8; + } + } + + /// Return the generic fixup kind for a value with the given size in bits. + /// It is an error to pass an unsupported size. + static MCFixupKind getKindForSizeInBits(unsigned Size, bool IsPCRel) { + switch (Size) { + default: + llvm_unreachable("Invalid generic fixup size!"); + case 6: + assert(!IsPCRel && "Invalid pc-relative fixup size!"); + return FK_Data_6b; + case 8: + return IsPCRel ? FK_PCRel_1 : FK_Data_1; + case 16: + return IsPCRel ? FK_PCRel_2 : FK_Data_2; + case 32: + return IsPCRel ? FK_PCRel_4 : FK_Data_4; + case 64: + return IsPCRel ? FK_PCRel_8 : FK_Data_8; } } /// Return the generic fixup kind for an addition with a given size. It /// is an error to pass an unsupported size. - static MCFixupKind getAddKindForKind(unsigned Kind) { + static MCFixupKind getAddKindForKind(MCFixupKind Kind) { switch (Kind) { default: llvm_unreachable("Unknown type to convert!"); case FK_Data_1: return FK_Data_Add_1; case FK_Data_2: return FK_Data_Add_2; case FK_Data_4: return FK_Data_Add_4; case FK_Data_8: return FK_Data_Add_8; + case FK_Data_6b: return FK_Data_Add_6b; } } /// Return the generic fixup kind for an subtraction with a given size. It /// is an error to pass an unsupported size. - static MCFixupKind getSubKindForKind(unsigned Kind) { + static MCFixupKind getSubKindForKind(MCFixupKind Kind) { switch (Kind) { default: llvm_unreachable("Unknown type to convert!"); case FK_Data_1: return FK_Data_Sub_1; case FK_Data_2: return FK_Data_Sub_2; case FK_Data_4: return FK_Data_Sub_4; case FK_Data_8: return FK_Data_Sub_8; + case FK_Data_6b: return FK_Data_Sub_6b; } } diff --git a/include/llvm/MC/MCFragment.h b/include/llvm/MC/MCFragment.h index aadf2ce725e..b0def566c46 100644 --- a/include/llvm/MC/MCFragment.h +++ b/include/llvm/MC/MCFragment.h @@ -149,6 +149,7 @@ public: case MCFragment::FT_CompactEncodedInst: case MCFragment::FT_Data: case MCFragment::FT_Dwarf: + case MCFragment::FT_DwarfFrame: return true; } } @@ -232,7 +233,8 @@ public: static bool classof(const MCFragment *F) { MCFragment::FragmentType Kind = F->getKind(); return Kind == MCFragment::FT_Relaxable || Kind == MCFragment::FT_Data || - Kind == MCFragment::FT_CVDefRange || Kind == MCFragment::FT_Dwarf;; + Kind == MCFragment::FT_CVDefRange || Kind == MCFragment::FT_Dwarf || + Kind == MCFragment::FT_DwarfFrame; } }; @@ -543,27 +545,21 @@ public: } }; -class MCDwarfCallFrameFragment : public MCFragment { +class MCDwarfCallFrameFragment : public MCEncodedFragmentWithFixups<8, 1> { /// AddrDelta - The expression for the difference of the two symbols that /// make up the address delta between two .cfi_* dwarf directives. const MCExpr *AddrDelta; - SmallString<8> Contents; - public: MCDwarfCallFrameFragment(const MCExpr &AddrDelta, MCSection *Sec = nullptr) - : MCFragment(FT_DwarfFrame, false, Sec), AddrDelta(&AddrDelta) { - Contents.push_back(0); - } + : MCEncodedFragmentWithFixups<8, 1>(FT_DwarfFrame, false, Sec), + AddrDelta(&AddrDelta) {} /// \name Accessors /// @{ const MCExpr &getAddrDelta() const { return *AddrDelta; } - SmallString<8> &getContents() { return Contents; } - const SmallString<8> &getContents() const { return Contents; } - /// @} static bool classof(const MCFragment *F) { diff --git a/include/llvm/MC/MCInstPrinter.h b/include/llvm/MC/MCInstPrinter.h index 6bbc4bc2903..4501ce3084c 100644 --- a/include/llvm/MC/MCInstPrinter.h +++ b/include/llvm/MC/MCInstPrinter.h @@ -87,12 +87,10 @@ public: /// Utility functions to make adding mark ups simpler. StringRef markup(StringRef s) const; - StringRef markup(StringRef a, StringRef b) const; bool getPrintImmHex() const { return PrintImmHex; } void setPrintImmHex(bool Value) { PrintImmHex = Value; } - HexStyle::Style getPrintHexStyle() const { return PrintHexStyle; } void setPrintHexStyle(HexStyle::Style Value) { PrintHexStyle = Value; } /// Utility function to print immediates in decimal or hex. diff --git a/include/llvm/MC/MCInstrAnalysis.h b/include/llvm/MC/MCInstrAnalysis.h index dfefd7e7277..898ca47b13b 100644 --- a/include/llvm/MC/MCInstrAnalysis.h +++ b/include/llvm/MC/MCInstrAnalysis.h @@ -152,6 +152,12 @@ public: evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, uint64_t &Target) const; + /// Given an instruction tries to get the address of a memory operand. Returns + /// the address on success. + virtual Optional evaluateMemoryOperandAddress(const MCInst &Inst, + uint64_t Addr, + uint64_t Size) const; + /// Returns (PLT virtual address, GOT virtual address) pairs for PLT entries. virtual std::vector> findPltEntries(uint64_t PltSectionVA, ArrayRef PltContents, diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h index 0aa586dfc90..e75a27614a2 100644 --- a/include/llvm/MC/MCInstrDesc.h +++ b/include/llvm/MC/MCInstrDesc.h @@ -56,7 +56,11 @@ enum OperandType { OPERAND_GENERIC_5 = 11, OPERAND_LAST_GENERIC = 11, - OPERAND_FIRST_TARGET = 12, + OPERAND_FIRST_GENERIC_IMM = 12, + OPERAND_GENERIC_IMM_0 = 12, + OPERAND_LAST_GENERIC_IMM = 12, + + OPERAND_FIRST_TARGET = 13, }; } @@ -103,6 +107,16 @@ public: assert(isGenericType() && "non-generic types don't have an index"); return OperandType - MCOI::OPERAND_FIRST_GENERIC; } + + bool isGenericImm() const { + return OperandType >= MCOI::OPERAND_FIRST_GENERIC_IMM && + OperandType <= MCOI::OPERAND_LAST_GENERIC_IMM; + } + + unsigned getGenericImmIndex() const { + assert(isGenericImm() && "non-generic immediates don't have an index"); + return OperandType - MCOI::OPERAND_FIRST_GENERIC_IMM; + } }; //===----------------------------------------------------------------------===// @@ -115,7 +129,8 @@ namespace MCID { /// not use these directly. These all correspond to bitfields in the /// MCInstrDesc::Flags field. enum Flag { - Variadic = 0, + PreISelOpcode = 0, + Variadic, HasOptionalDef, Pseudo, Return, @@ -228,6 +243,10 @@ public: /// Return flags of this instruction. uint64_t getFlags() const { return Flags; } + /// \returns true if this instruction is emitted before instruction selection + /// and should be legalized/regbankselected/selected. + bool isPreISelOpcode() const { return Flags & (1ULL << MCID::PreISelOpcode); } + /// Return true if this instruction can have a variable number of /// operands. In this case, the variable operands will be after the normal /// operands but before the implicit definitions and uses (if any are diff --git a/include/llvm/MC/MCLinkerOptimizationHint.h b/include/llvm/MC/MCLinkerOptimizationHint.h index f2a1364ad88..003491f32f7 100644 --- a/include/llvm/MC/MCLinkerOptimizationHint.h +++ b/include/llvm/MC/MCLinkerOptimizationHint.h @@ -61,6 +61,7 @@ static inline int MCLOHNameToId(StringRef Name) { MCLOHCaseNameToId(AdrpAdd) MCLOHCaseNameToId(AdrpLdrGot) .Default(-1); +#undef MCLOHCaseNameToId } static inline StringRef MCLOHIdToName(MCLOHType Kind) { @@ -76,6 +77,7 @@ static inline StringRef MCLOHIdToName(MCLOHType Kind) { MCLOHCaseIdToName(AdrpLdrGot); } return StringRef(); +#undef MCLOHCaseIdToName } static inline int MCLOHIdToNbArgs(MCLOHType Kind) { diff --git a/include/llvm/MC/MCRegister.h b/include/llvm/MC/MCRegister.h new file mode 100644 index 00000000000..8372947a4ba --- /dev/null +++ b/include/llvm/MC/MCRegister.h @@ -0,0 +1,110 @@ +//===-- llvm/MC/Register.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_REGISTER_H +#define LLVM_MC_REGISTER_H + +#include "llvm/ADT/DenseMapInfo.h" +#include + +namespace llvm { + +/// An unsigned integer type large enough to represent all physical registers, +/// but not necessarily virtual registers. +using MCPhysReg = uint16_t; + +/// Wrapper class representing physical registers. Should be passed by value. +class MCRegister { + unsigned Reg; + +public: + MCRegister(unsigned Val = 0): Reg(Val) {} + + // Register numbers can represent physical registers, virtual registers, and + // sometimes stack slots. The unsigned values are divided into these ranges: + // + // 0 Not a register, can be used as a sentinel. + // [1;2^30) Physical registers assigned by TableGen. + // [2^30;2^31) Stack slots. (Rarely used.) + // [2^31;2^32) Virtual registers assigned by MachineRegisterInfo. + // + // Further sentinels can be allocated from the small negative integers. + // DenseMapInfo uses -1u and -2u. + + /// This is the portion of the positive number space that is not a physical + /// register. StackSlot values do not exist in the MC layer, see + /// Register::isStackSlot() for the more information on them. + /// + /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack + /// slots, so if a variable may contains a stack slot, always check + /// isStackSlot() first. + static bool isStackSlot(unsigned Reg) { + return int(Reg) >= (1 << 30); + } + + /// Return true if the specified register number is in + /// the physical register namespace. + static bool isPhysicalRegister(unsigned Reg) { + assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first."); + return int(Reg) > 0; + } + + /// Return true if the specified register number is in the physical register + /// namespace. + bool isPhysical() const { + return isPhysicalRegister(Reg); + } + + operator unsigned() const { + return Reg; + } + + unsigned id() const { + return Reg; + } + + bool isValid() const { + return Reg != 0; + } + + /// Comparisons between register objects + bool operator==(const MCRegister &Other) const { return Reg == Other.Reg; } + bool operator!=(const MCRegister &Other) const { return Reg != Other.Reg; } + + /// Comparisons against register constants. E.g. + /// * R == AArch64::WZR + /// * R == 0 + /// * R == VirtRegMap::NO_PHYS_REG + bool operator==(unsigned Other) const { return Reg == Other; } + bool operator!=(unsigned Other) const { return Reg != Other; } + bool operator==(int Other) const { return Reg == unsigned(Other); } + bool operator!=(int Other) const { return Reg != unsigned(Other); } + // MSVC requires that we explicitly declare these two as well. + bool operator==(MCPhysReg Other) const { return Reg == unsigned(Other); } + bool operator!=(MCPhysReg Other) const { return Reg != unsigned(Other); } +}; + +// Provide DenseMapInfo for MCRegister +template<> struct DenseMapInfo { + static inline unsigned getEmptyKey() { + return DenseMapInfo::getEmptyKey(); + } + static inline unsigned getTombstoneKey() { + return DenseMapInfo::getTombstoneKey(); + } + static unsigned getHashValue(const MCRegister &Val) { + return DenseMapInfo::getHashValue(Val.id()); + } + static bool isEqual(const MCRegister &LHS, const MCRegister &RHS) { + return DenseMapInfo::isEqual(LHS.id(), RHS.id()); + } +}; + +} + +#endif // ifndef LLVM_MC_REGISTER_H diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h index 92d39c3fcfb..c7dc56ea588 100644 --- a/include/llvm/MC/MCRegisterInfo.h +++ b/include/llvm/MC/MCRegisterInfo.h @@ -18,16 +18,13 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/iterator_range.h" #include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCRegister.h" #include #include #include namespace llvm { -/// An unsigned integer type large enough to represent all physical registers, -/// but not necessarily virtual registers. -using MCPhysReg = uint16_t; - /// MCRegisterClass - Base class of TargetRegisterClass. class MCRegisterClass { public: @@ -65,16 +62,17 @@ public: /// contains - Return true if the specified register is included in this /// register class. This does not include virtual registers. - bool contains(unsigned Reg) const { - unsigned InByte = Reg % 8; - unsigned Byte = Reg / 8; + bool contains(MCRegister Reg) const { + unsigned RegNo = unsigned(Reg); + unsigned InByte = RegNo % 8; + unsigned Byte = RegNo / 8; if (Byte >= RegSetSize) return false; return (RegSet[Byte] & (1 << InByte)) != 0; } /// contains - Return true if both registers are in this class. - bool contains(unsigned Reg1, unsigned Reg2) const { + bool contains(MCRegister Reg1, MCRegister Reg2) const { return contains(Reg1) && contains(Reg2); } @@ -148,8 +146,8 @@ public: private: const MCRegisterDesc *Desc; // Pointer to the descriptor array unsigned NumRegs; // Number of entries in the array - unsigned RAReg; // Return address register - unsigned PCReg; // Program counter register + MCRegister RAReg; // Return address register + MCRegister PCReg; // Program counter register const MCRegisterClass *Classes; // Pointer to the regclass array unsigned NumClasses; // Number of entries in the array unsigned NumRegUnits; // Number of regunits. @@ -175,8 +173,8 @@ private: const DwarfLLVMRegPair *EHL2DwarfRegs; // LLVM to Dwarf regs mapping EH const DwarfLLVMRegPair *Dwarf2LRegs; // Dwarf to LLVM regs mapping const DwarfLLVMRegPair *EHDwarf2LRegs; // Dwarf to LLVM regs mapping EH - DenseMap L2SEHRegs; // LLVM to SEH regs mapping - DenseMap L2CVRegs; // LLVM to CV regs mapping + DenseMap L2SEHRegs; // LLVM to SEH regs mapping + DenseMap L2CVRegs; // LLVM to CV regs mapping public: /// DiffListIterator - Base iterator class that can traverse the @@ -202,7 +200,7 @@ public: /// advance - Move to the next list position, return the applied /// differential. This function does not detect the end of the list, that /// is the caller's responsibility (by checking for a 0 return value). - unsigned advance() { + MCRegister advance() { assert(isValid() && "Cannot move off the end of the list."); MCPhysReg D = *List++; Val += D; @@ -214,7 +212,7 @@ public: bool isValid() const { return List; } /// Dereference the iterator to get the value at the current position. - unsigned operator*() const { return Val; } + MCRegister operator*() const { return Val; } /// Pre-increment to move to the next position. void operator++() { @@ -309,26 +307,26 @@ public: /// as the LLVM register number. /// FIXME: TableGen these numbers. Currently this requires target specific /// initialization code. - void mapLLVMRegToSEHReg(unsigned LLVMReg, int SEHReg) { + void mapLLVMRegToSEHReg(MCRegister LLVMReg, int SEHReg) { L2SEHRegs[LLVMReg] = SEHReg; } - void mapLLVMRegToCVReg(unsigned LLVMReg, int CVReg) { + void mapLLVMRegToCVReg(MCRegister LLVMReg, int CVReg) { L2CVRegs[LLVMReg] = CVReg; } /// This method should return the register where the return /// address can be found. - unsigned getRARegister() const { + MCRegister getRARegister() const { return RAReg; } /// Return the register which is the program counter. - unsigned getProgramCounter() const { + MCRegister getProgramCounter() const { return PCReg; } - const MCRegisterDesc &operator[](unsigned RegNo) const { + const MCRegisterDesc &operator[](MCRegister RegNo) const { assert(RegNo < NumRegs && "Attempting to access record for invalid register number!"); return Desc[RegNo]; @@ -336,24 +334,24 @@ public: /// Provide a get method, equivalent to [], but more useful with a /// pointer to this object. - const MCRegisterDesc &get(unsigned RegNo) const { + const MCRegisterDesc &get(MCRegister RegNo) const { return operator[](RegNo); } /// Returns the physical register number of sub-register "Index" /// for physical register RegNo. Return zero if the sub-register does not /// exist. - unsigned getSubReg(unsigned Reg, unsigned Idx) const; + MCRegister getSubReg(MCRegister Reg, unsigned Idx) const; /// Return a super-register of the specified register /// Reg so its sub-register of index SubIdx is Reg. - unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, - const MCRegisterClass *RC) const; + MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, + const MCRegisterClass *RC) const; /// For a given register pair, return the sub-register index /// if the second register is a sub-register of the first. Return zero /// otherwise. - unsigned getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const; + unsigned getSubRegIndex(MCRegister RegNo, MCRegister SubRegNo) const; /// Get the size of the bit range covered by a sub-register index. /// If the index isn't continuous, return the sum of the sizes of its parts. @@ -367,7 +365,7 @@ public: /// Return the human-readable symbolic target-specific name for the /// specified physical register. - const char *getName(unsigned RegNo) const { + const char *getName(MCRegister RegNo) const { return RegStrings + get(RegNo).Name; } @@ -395,15 +393,11 @@ public: /// number. Returns -1 if there is no equivalent value. The second /// parameter allows targets to use different numberings for EH info and /// debugging info. - int getDwarfRegNum(unsigned RegNum, bool isEH) const; + int getDwarfRegNum(MCRegister RegNum, bool isEH) const; - /// Map a dwarf register back to a target register. - int getLLVMRegNum(unsigned RegNum, bool isEH) const; - - /// Map a DWARF EH register back to a target register (same as - /// getLLVMRegNum(RegNum, true)) but return -1 if there is no mapping, - /// rather than asserting that there must be one. - int getLLVMRegNumFromEH(unsigned RegNum) const; + /// Map a dwarf register back to a target register. Returns None is there is + /// no mapping. + Optional getLLVMRegNum(unsigned RegNum, bool isEH) const; /// Map a target EH register number to an equivalent DWARF register /// number. @@ -411,11 +405,11 @@ public: /// Map a target register to an equivalent SEH register /// number. Returns LLVM register number if there is no equivalent value. - int getSEHRegNum(unsigned RegNum) const; + int getSEHRegNum(MCRegister RegNum) const; /// Map a target register to an equivalent CodeView register /// number. - int getCodeViewRegNum(unsigned RegNum) const; + int getCodeViewRegNum(MCRegister RegNum) const; regclass_iterator regclass_begin() const { return Classes; } regclass_iterator regclass_end() const { return Classes+NumClasses; } @@ -439,34 +433,34 @@ public: } /// Returns the encoding for RegNo - uint16_t getEncodingValue(unsigned RegNo) const { + uint16_t getEncodingValue(MCRegister RegNo) const { assert(RegNo < NumRegs && "Attempting to get encoding for invalid register number!"); return RegEncodingTable[RegNo]; } /// Returns true if RegB is a sub-register of RegA. - bool isSubRegister(unsigned RegA, unsigned RegB) const { + bool isSubRegister(MCRegister RegA, MCRegister RegB) const { return isSuperRegister(RegB, RegA); } /// Returns true if RegB is a super-register of RegA. - bool isSuperRegister(unsigned RegA, unsigned RegB) const; + bool isSuperRegister(MCRegister RegA, MCRegister RegB) const; /// Returns true if RegB is a sub-register of RegA or if RegB == RegA. - bool isSubRegisterEq(unsigned RegA, unsigned RegB) const { + bool isSubRegisterEq(MCRegister RegA, MCRegister RegB) const { return isSuperRegisterEq(RegB, RegA); } /// Returns true if RegB is a super-register of RegA or if /// RegB == RegA. - bool isSuperRegisterEq(unsigned RegA, unsigned RegB) const { + bool isSuperRegisterEq(MCRegister RegA, MCRegister RegB) const { return RegA == RegB || isSuperRegister(RegA, RegB); } /// Returns true if RegB is a super-register or sub-register of RegA /// or if RegB == RegA. - bool isSuperOrSubRegisterEq(unsigned RegA, unsigned RegB) const { + bool isSuperOrSubRegisterEq(MCRegister RegA, MCRegister RegB) const { return isSubRegisterEq(RegA, RegB) || isSuperRegister(RegA, RegB); } }; @@ -482,8 +476,8 @@ public: /// If IncludeSelf is set, Reg itself is included in the list. class MCSubRegIterator : public MCRegisterInfo::DiffListIterator { public: - MCSubRegIterator(unsigned Reg, const MCRegisterInfo *MCRI, - bool IncludeSelf = false) { + MCSubRegIterator(MCRegister Reg, const MCRegisterInfo *MCRI, + bool IncludeSelf = false) { init(Reg, MCRI->DiffLists + MCRI->get(Reg).SubRegs); // Initially, the iterator points to Reg itself. if (!IncludeSelf) @@ -500,13 +494,13 @@ class MCSubRegIndexIterator { public: /// Constructs an iterator that traverses subregisters and their /// associated subregister indices. - MCSubRegIndexIterator(unsigned Reg, const MCRegisterInfo *MCRI) + MCSubRegIndexIterator(MCRegister Reg, const MCRegisterInfo *MCRI) : SRIter(Reg, MCRI) { SRIndex = MCRI->SubRegIndices + MCRI->get(Reg).SubRegIndices; } /// Returns current sub-register. - unsigned getSubReg() const { + MCRegister getSubReg() const { return *SRIter; } @@ -531,7 +525,7 @@ class MCSuperRegIterator : public MCRegisterInfo::DiffListIterator { public: MCSuperRegIterator() = default; - MCSuperRegIterator(unsigned Reg, const MCRegisterInfo *MCRI, + MCSuperRegIterator(MCRegister Reg, const MCRegisterInfo *MCRI, bool IncludeSelf = false) { init(Reg, MCRI->DiffLists + MCRI->get(Reg).SuperRegs); // Initially, the iterator points to Reg itself. @@ -542,7 +536,7 @@ public: // Definition for isSuperRegister. Put it down here since it needs the // iterator defined above in addition to the MCRegisterInfo class itself. -inline bool MCRegisterInfo::isSuperRegister(unsigned RegA, unsigned RegB) const{ +inline bool MCRegisterInfo::isSuperRegister(MCRegister RegA, MCRegister RegB) const{ for (MCSuperRegIterator I(RegA, this); I.isValid(); ++I) if (*I == RegB) return true; @@ -569,7 +563,7 @@ public: /// in Reg. MCRegUnitIterator() = default; - MCRegUnitIterator(unsigned Reg, const MCRegisterInfo *MCRI) { + MCRegUnitIterator(MCRegister Reg, const MCRegisterInfo *MCRI) { assert(Reg && "Null register has no regunits"); // Decode the RegUnits MCRegisterDesc field. unsigned RU = MCRI->get(Reg).RegUnits; @@ -600,7 +594,7 @@ public: /// Constructs an iterator that traverses the register units and their /// associated LaneMasks in Reg. - MCRegUnitMaskIterator(unsigned Reg, const MCRegisterInfo *MCRI) + MCRegUnitMaskIterator(MCRegister Reg, const MCRegisterInfo *MCRI) : RUIter(Reg, MCRI) { uint16_t Idx = MCRI->get(Reg).RegUnitLaneMasks; MaskListIter = &MCRI->RegUnitMaskSequences[Idx]; @@ -667,7 +661,7 @@ public: /// any ordering or that entries are unique. class MCRegAliasIterator { private: - unsigned Reg; + MCRegister Reg; const MCRegisterInfo *MCRI; bool IncludeSelf; @@ -676,7 +670,7 @@ private: MCSuperRegIterator SI; public: - MCRegAliasIterator(unsigned Reg, const MCRegisterInfo *MCRI, + MCRegAliasIterator(MCRegister Reg, const MCRegisterInfo *MCRI, bool IncludeSelf) : Reg(Reg), MCRI(MCRI), IncludeSelf(IncludeSelf) { // Initialize the iterators. @@ -692,7 +686,7 @@ public: bool isValid() const { return RI.isValid(); } - unsigned operator*() const { + MCRegister operator*() const { assert(SI.isValid() && "Cannot dereference an invalid iterator."); return *SI; } diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h index 6fad1ec2069..d057feda87d 100644 --- a/include/llvm/MC/MCSection.h +++ b/include/llvm/MC/MCSection.h @@ -17,6 +17,7 @@ #include "llvm/ADT/ilist.h" #include "llvm/MC/MCFragment.h" #include "llvm/MC/SectionKind.h" +#include "llvm/Support/Alignment.h" #include #include @@ -58,7 +59,7 @@ private: MCSymbol *Begin; MCSymbol *End = nullptr; /// The alignment requirement of this section. - unsigned Alignment = 1; + Align Alignment; /// The section index in the assemblers section list. unsigned Ordinal = 0; /// The index of this section in the layout order. @@ -117,8 +118,8 @@ public: MCSymbol *getEndSymbol(MCContext &Ctx); bool hasEnded() const; - unsigned getAlignment() const { return Alignment; } - void setAlignment(unsigned Value) { Alignment = Value; } + unsigned getAlignment() const { return Alignment.value(); } + void setAlignment(Align Value) { Alignment = Value; } unsigned getOrdinal() const { return Ordinal; } void setOrdinal(unsigned Value) { Ordinal = Value; } diff --git a/include/llvm/MC/MCSectionXCOFF.h b/include/llvm/MC/MCSectionXCOFF.h index 2a3f391fd3e..ee302ed5ece 100644 --- a/include/llvm/MC/MCSectionXCOFF.h +++ b/include/llvm/MC/MCSectionXCOFF.h @@ -23,16 +23,30 @@ class MCSymbol; // This class represents an XCOFF `Control Section`, more commonly referred to // as a csect. A csect represents the smallest possible unit of data/code which -// will be relocated as a single block. +// will be relocated as a single block. A csect can either be: +// 1) Initialized: The Type will be XTY_SD, and the symbols inside the csect +// will have a label definition representing their offset within the csect. +// 2) Uninitialized: The Type will be XTY_CM, it will contain a single symbol, +// and may not contain label definitions. +// 3) An external reference providing a symbol table entry for a symbol +// contained in another XCOFF object file. External reference csects are not +// implemented yet. class MCSectionXCOFF final : public MCSection { friend class MCContext; StringRef Name; XCOFF::StorageMappingClass MappingClass; + XCOFF::SymbolType Type; + XCOFF::StorageClass StorageClass; MCSectionXCOFF(StringRef Section, XCOFF::StorageMappingClass SMC, - SectionKind K, MCSymbol *Begin) - : MCSection(SV_XCOFF, K, Begin), Name(Section), MappingClass(SMC) {} + XCOFF::SymbolType ST, XCOFF::StorageClass SC, SectionKind K, + MCSymbol *Begin) + : MCSection(SV_XCOFF, K, Begin), Name(Section), MappingClass(SMC), + Type(ST), StorageClass(SC) { + assert((ST == XCOFF::XTY_SD || ST == XCOFF::XTY_CM) && + "Invalid or unhandled type for csect."); + } public: ~MCSectionXCOFF(); @@ -43,6 +57,8 @@ public: StringRef getSectionName() const { return Name; } XCOFF::StorageMappingClass getMappingClass() const { return MappingClass; } + XCOFF::StorageClass getStorageClass() const { return StorageClass; } + XCOFF::SymbolType getCSectType() const { return Type; } void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, raw_ostream &OS, diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h index 731e7515448..6b48580ae57 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -46,6 +46,7 @@ struct MCDwarfFrameInfo; class MCExpr; class MCInst; class MCInstPrinter; +class MCRegister; class MCSection; class MCStreamer; class MCSymbolRefExpr; @@ -53,6 +54,13 @@ class MCSubtargetInfo; class raw_ostream; class Twine; +namespace codeview { +struct DefRangeRegisterRelHeader; +struct DefRangeSubfieldRegisterHeader; +struct DefRangeRegisterHeader; +struct DefRangeFramePointerRelHeader; +} + using MCSectionSubPair = std::pair; /// Target specific streamer interface. This is used so that targets can @@ -536,6 +544,15 @@ public: /// \param Symbol - Symbol the image relative relocation should point to. virtual void EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset); + /// Emits an lcomm directive with XCOFF csect information. + /// + /// \param Symbol - The symbol we are emiting. + /// \param Size - The size of the block of storage. + /// \param ByteAlignment - The alignment of the symbol in bytes. Must be a power + /// of 2. + virtual void EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + /// Emit an ELF .size directive. /// /// This corresponds to an assembler statement such as: @@ -860,6 +877,22 @@ public: ArrayRef> Ranges, StringRef FixedSizePortion); + virtual void EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeRegisterRelHeader DRHdr); + + virtual void EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeSubfieldRegisterHeader DRHdr); + + virtual void EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeRegisterHeader DRHdr); + + virtual void EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeFramePointerRelHeader DRHdr); + /// This implements the CodeView '.cv_stringtable' assembler directive. virtual void EmitCVStringTableDirective() {} @@ -917,13 +950,13 @@ public: virtual void EmitWinCFIFuncletOrFuncEnd(SMLoc Loc = SMLoc()); virtual void EmitWinCFIStartChained(SMLoc Loc = SMLoc()); virtual void EmitWinCFIEndChained(SMLoc Loc = SMLoc()); - virtual void EmitWinCFIPushReg(unsigned Register, SMLoc Loc = SMLoc()); - virtual void EmitWinCFISetFrame(unsigned Register, unsigned Offset, + virtual void EmitWinCFIPushReg(MCRegister Register, SMLoc Loc = SMLoc()); + virtual void EmitWinCFISetFrame(MCRegister Register, unsigned Offset, SMLoc Loc = SMLoc()); virtual void EmitWinCFIAllocStack(unsigned Size, SMLoc Loc = SMLoc()); - virtual void EmitWinCFISaveReg(unsigned Register, unsigned Offset, + virtual void EmitWinCFISaveReg(MCRegister Register, unsigned Offset, SMLoc Loc = SMLoc()); - virtual void EmitWinCFISaveXMM(unsigned Register, unsigned Offset, + virtual void EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, SMLoc Loc = SMLoc()); virtual void EmitWinCFIPushFrame(bool Code, SMLoc Loc = SMLoc()); virtual void EmitWinCFIEndProlog(SMLoc Loc = SMLoc()); diff --git a/include/llvm/MC/MCSubtargetInfo.h b/include/llvm/MC/MCSubtargetInfo.h index 9490a6eceda..09130c4641e 100644 --- a/include/llvm/MC/MCSubtargetInfo.h +++ b/include/llvm/MC/MCSubtargetInfo.h @@ -221,6 +221,52 @@ public: auto Found = std::lower_bound(ProcDesc.begin(), ProcDesc.end(), CPU); return Found != ProcDesc.end() && StringRef(Found->Key) == CPU; } + + virtual unsigned getHwMode() const { return 0; } + + /// Return the cache size in bytes for the given level of cache. + /// Level is zero-based, so a value of zero means the first level of + /// cache. + /// + virtual Optional getCacheSize(unsigned Level) const; + + /// Return the cache associatvity for the given level of cache. + /// Level is zero-based, so a value of zero means the first level of + /// cache. + /// + virtual Optional getCacheAssociativity(unsigned Level) const; + + /// Return the target cache line size in bytes at a given level. + /// + virtual Optional getCacheLineSize(unsigned Level) const; + + /// Return the target cache line size in bytes. By default, return + /// the line size for the bottom-most level of cache. This provides + /// a more convenient interface for the common case where all cache + /// levels have the same line size. Return zero if there is no + /// cache model. + /// + virtual unsigned getCacheLineSize() const { + Optional Size = getCacheLineSize(0); + if (Size) + return *Size; + + return 0; + } + + /// Return the preferred prefetch distance in terms of instructions. + /// + virtual unsigned getPrefetchDistance() const; + + /// Return the maximum prefetch distance in terms of loop + /// iterations. + /// + virtual unsigned getMaxPrefetchIterationsAhead() const; + + /// Return the minimum stride necessary to trigger software + /// prefetching. + /// + virtual unsigned getMinPrefetchStride() const; }; } // end namespace llvm diff --git a/include/llvm/MC/MCSymbolWasm.h b/include/llvm/MC/MCSymbolWasm.h index c50cd0ee470..95beebe3f75 100644 --- a/include/llvm/MC/MCSymbolWasm.h +++ b/include/llvm/MC/MCSymbolWasm.h @@ -54,6 +54,13 @@ public: modifyFlags(wasm::WASM_SYMBOL_EXPORTED, wasm::WASM_SYMBOL_EXPORTED); } + bool isNoStrip() const { + return getFlags() & wasm::WASM_SYMBOL_NO_STRIP; + } + void setNoStrip() const { + modifyFlags(wasm::WASM_SYMBOL_NO_STRIP, wasm::WASM_SYMBOL_NO_STRIP); + } + bool isWeak() const { return IsWeak; } void setWeak(bool isWeak) { IsWeak = isWeak; } diff --git a/include/llvm/MC/MCSymbolXCOFF.h b/include/llvm/MC/MCSymbolXCOFF.h index 0a1fe147513..98ecd246692 100644 --- a/include/llvm/MC/MCSymbolXCOFF.h +++ b/include/llvm/MC/MCSymbolXCOFF.h @@ -8,17 +8,49 @@ #ifndef LLVM_MC_MCSYMBOLXCOFF_H #define LLVM_MC_MCSYMBOLXCOFF_H +#include "llvm/ADT/Optional.h" #include "llvm/BinaryFormat/XCOFF.h" #include "llvm/MC/MCSymbol.h" namespace llvm { +class MCSectionXCOFF; + class MCSymbolXCOFF : public MCSymbol { public: MCSymbolXCOFF(const StringMapEntry *Name, bool isTemporary) : MCSymbol(SymbolKindXCOFF, Name, isTemporary) {} static bool classof(const MCSymbol *S) { return S->isXCOFF(); } + + void setStorageClass(XCOFF::StorageClass SC) { + assert((!StorageClass.hasValue() || StorageClass.getValue() == SC) && + "Redefining StorageClass of XCOFF MCSymbol."); + StorageClass = SC; + }; + + XCOFF::StorageClass getStorageClass() const { + assert(StorageClass.hasValue() && + "StorageClass not set on XCOFF MCSymbol."); + return StorageClass.getValue(); + } + + void setContainingCsect(MCSectionXCOFF *C) { + assert((!ContainingCsect || ContainingCsect == C) && + "Trying to set a containing csect that doesn't match the one that" + "this symbol is already mapped to."); + ContainingCsect = C; + } + + MCSectionXCOFF *getContainingCsect() const { + assert(ContainingCsect && + "Trying to get containing csect but none was set."); + return ContainingCsect; + } + +private: + Optional StorageClass; + MCSectionXCOFF *ContainingCsect = nullptr; }; } // end namespace llvm diff --git a/include/llvm/MC/MCWasmObjectWriter.h b/include/llvm/MC/MCWasmObjectWriter.h index 4adbca28f11..fbb68549b50 100644 --- a/include/llvm/MC/MCWasmObjectWriter.h +++ b/include/llvm/MC/MCWasmObjectWriter.h @@ -20,9 +20,10 @@ class raw_pwrite_stream; class MCWasmObjectTargetWriter : public MCObjectTargetWriter { const unsigned Is64Bit : 1; + const unsigned IsEmscripten : 1; protected: - explicit MCWasmObjectTargetWriter(bool Is64Bit_); + explicit MCWasmObjectTargetWriter(bool Is64Bit_, bool IsEmscripten); public: virtual ~MCWasmObjectTargetWriter(); @@ -38,6 +39,7 @@ public: /// \name Accessors /// @{ bool is64Bit() const { return Is64Bit; } + bool isEmscripten() const { return IsEmscripten; } /// @} }; diff --git a/include/llvm/MC/MCXCOFFStreamer.h b/include/llvm/MC/MCXCOFFStreamer.h index 159ae481874..b13b0031d18 100644 --- a/include/llvm/MC/MCXCOFFStreamer.h +++ b/include/llvm/MC/MCXCOFFStreamer.h @@ -26,6 +26,8 @@ public: uint64_t Size = 0, unsigned ByteAlignment = 0, SMLoc Loc = SMLoc()) override; void EmitInstToData(const MCInst &Inst, const MCSubtargetInfo &) override; + void EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlign) override; }; } // end namespace llvm diff --git a/include/llvm/MC/StringTableBuilder.h b/include/llvm/MC/StringTableBuilder.h index c83eca4e512..c8d4c3bbc26 100644 --- a/include/llvm/MC/StringTableBuilder.h +++ b/include/llvm/MC/StringTableBuilder.h @@ -22,7 +22,7 @@ class raw_ostream; /// Utility for building string tables with deduplicated suffixes. class StringTableBuilder { public: - enum Kind { ELF, WinCOFF, MachO, RAW, DWARF }; + enum Kind { ELF, WinCOFF, MachO, RAW, DWARF, XCOFF }; private: DenseMap StringIndexMap; diff --git a/include/llvm/MC/SubtargetFeature.h b/include/llvm/MC/SubtargetFeature.h index fc9565ceafa..defbc3c6472 100644 --- a/include/llvm/MC/SubtargetFeature.h +++ b/include/llvm/MC/SubtargetFeature.h @@ -18,6 +18,7 @@ #define LLVM_MC_SUBTARGETFEATURE_H #include "llvm/ADT/StringRef.h" +#include "llvm/Support/MathExtras.h" #include #include #include @@ -33,20 +34,123 @@ const unsigned MAX_SUBTARGET_WORDS = 3; const unsigned MAX_SUBTARGET_FEATURES = MAX_SUBTARGET_WORDS * 64; /// Container class for subtarget features. -/// This is convenient because std::bitset does not have a constructor -/// with an initializer list of set bits. -class FeatureBitset : public std::bitset { +/// This is a constexpr reimplementation of a subset of std::bitset. It would be +/// nice to use std::bitset directly, but it doesn't support constant +/// initialization. +class FeatureBitset { + static_assert((MAX_SUBTARGET_FEATURES % 64) == 0, + "Should be a multiple of 64!"); + // This cannot be a std::array, operator[] is not constexpr until C++17. + uint64_t Bits[MAX_SUBTARGET_WORDS] = {}; + +protected: + constexpr FeatureBitset(const std::array &B) { + for (unsigned I = 0; I != B.size(); ++I) + Bits[I] = B[I]; + } + public: - // Cannot inherit constructors because it's not supported by VC++.. - FeatureBitset() = default; - - FeatureBitset(const bitset& B) : bitset(B) {} - - FeatureBitset(std::initializer_list Init) { + constexpr FeatureBitset() = default; + constexpr FeatureBitset(std::initializer_list Init) { for (auto I : Init) set(I); } + FeatureBitset &set() { + std::fill(std::begin(Bits), std::end(Bits), -1ULL); + return *this; + } + + constexpr FeatureBitset &set(unsigned I) { + // GCC <6.2 crashes if this is written in a single statement. + uint64_t NewBits = Bits[I / 64] | (uint64_t(1) << (I % 64)); + Bits[I / 64] = NewBits; + return *this; + } + + constexpr FeatureBitset &reset(unsigned I) { + // GCC <6.2 crashes if this is written in a single statement. + uint64_t NewBits = Bits[I / 64] & ~(uint64_t(1) << (I % 64)); + Bits[I / 64] = NewBits; + return *this; + } + + constexpr FeatureBitset &flip(unsigned I) { + // GCC <6.2 crashes if this is written in a single statement. + uint64_t NewBits = Bits[I / 64] ^ (uint64_t(1) << (I % 64)); + Bits[I / 64] = NewBits; + return *this; + } + + constexpr bool operator[](unsigned I) const { + uint64_t Mask = uint64_t(1) << (I % 64); + return (Bits[I / 64] & Mask) != 0; + } + + constexpr bool test(unsigned I) const { return (*this)[I]; } + + constexpr size_t size() const { return MAX_SUBTARGET_FEATURES; } + + bool any() const { + return llvm::any_of(Bits, [](uint64_t I) { return I != 0; }); + } + bool none() const { return !any(); } + size_t count() const { + size_t Count = 0; + for (auto B : Bits) + Count += countPopulation(B); + return Count; + } + + constexpr FeatureBitset &operator^=(const FeatureBitset &RHS) { + for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) { + Bits[I] ^= RHS.Bits[I]; + } + return *this; + } + constexpr FeatureBitset operator^(const FeatureBitset &RHS) const { + FeatureBitset Result = *this; + Result ^= RHS; + return Result; + } + + constexpr FeatureBitset &operator&=(const FeatureBitset &RHS) { + for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) { + Bits[I] &= RHS.Bits[I]; + } + return *this; + } + constexpr FeatureBitset operator&(const FeatureBitset &RHS) const { + FeatureBitset Result = *this; + Result &= RHS; + return Result; + } + + constexpr FeatureBitset &operator|=(const FeatureBitset &RHS) { + for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) { + Bits[I] |= RHS.Bits[I]; + } + return *this; + } + constexpr FeatureBitset operator|(const FeatureBitset &RHS) const { + FeatureBitset Result = *this; + Result |= RHS; + return Result; + } + + constexpr FeatureBitset operator~() const { + FeatureBitset Result = *this; + for (auto &B : Result.Bits) + B = ~B; + return Result; + } + + bool operator==(const FeatureBitset &RHS) const { + return std::equal(std::begin(Bits), std::end(Bits), std::begin(RHS.Bits)); + } + + bool operator!=(const FeatureBitset &RHS) const { return !(*this == RHS); } + bool operator < (const FeatureBitset &Other) const { for (unsigned I = 0, E = size(); I != E; ++I) { bool LHS = test(I), RHS = Other.test(I); @@ -58,23 +162,12 @@ public: }; /// Class used to store the subtarget bits in the tables created by tablegen. -/// The std::initializer_list constructor of FeatureBitset can't be done at -/// compile time and requires a static constructor to run at startup. -class FeatureBitArray { - std::array Bits; - +class FeatureBitArray : public FeatureBitset { public: constexpr FeatureBitArray(const std::array &B) - : Bits(B) {} + : FeatureBitset(B) {} - FeatureBitset getAsBitset() const { - FeatureBitset Result; - - for (unsigned i = 0, e = Bits.size(); i != e; ++i) - Result |= FeatureBitset(Bits[i]) << (64 * i); - - return Result; - } + const FeatureBitset &getAsBitset() const { return *this; } }; //===----------------------------------------------------------------------===// diff --git a/include/llvm/MCA/CodeEmitter.h b/include/llvm/MCA/CodeEmitter.h new file mode 100644 index 00000000000..c8d222bd8c2 --- /dev/null +++ b/include/llvm/MCA/CodeEmitter.h @@ -0,0 +1,72 @@ +//===--------------------- CodeEmitter.h ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// A utility class used to compute instruction encodings. It buffers encodings +/// for later usage. It exposes a simple API to compute and get the encodings as +/// StringRef. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_CODEEMITTER_H +#define LLVM_MCA_CODEEMITTER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/Instruction.h" +#include "llvm/MCA/Support.h" +#include "llvm/Support/raw_ostream.h" + +#include + +namespace llvm { +namespace mca { + +/// A utility class used to compute instruction encodings for a code region. +/// +/// It provides a simple API to compute and return instruction encodings as +/// strings. Encodings are cached internally for later usage. +class CodeEmitter { + const MCSubtargetInfo &STI; + const MCAsmBackend &MAB; + const MCCodeEmitter &MCE; + + SmallString<256> Code; + raw_svector_ostream VecOS; + ArrayRef Sequence; + + // An EncodingInfo pair stores information. Base (i.e. first) + // is an index to the `Code`. Length (i.e. second) is the encoding size. + using EncodingInfo = std::pair; + + // A cache of encodings. + SmallVector Encodings; + + EncodingInfo getOrCreateEncodingInfo(unsigned MCID); + +public: + CodeEmitter(const MCSubtargetInfo &ST, const MCAsmBackend &AB, + const MCCodeEmitter &CE, ArrayRef S) + : STI(ST), MAB(AB), MCE(CE), VecOS(Code), Sequence(S), + Encodings(S.size()) {} + + StringRef getEncoding(unsigned MCID) { + EncodingInfo EI = getOrCreateEncodingInfo(MCID); + return StringRef(&Code[EI.first], EI.second); + } +}; + +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_CODEEMITTER_H diff --git a/include/llvm/MCA/Context.h b/include/llvm/MCA/Context.h index 503d780d494..af3cb8e1e83 100644 --- a/include/llvm/MCA/Context.h +++ b/include/llvm/MCA/Context.h @@ -20,7 +20,6 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MCA/HardwareUnits/HardwareUnit.h" -#include "llvm/MCA/InstrBuilder.h" #include "llvm/MCA/Pipeline.h" #include "llvm/MCA/SourceMgr.h" #include @@ -58,6 +57,9 @@ public: Context(const Context &C) = delete; Context &operator=(const Context &C) = delete; + const MCRegisterInfo &getMCRegisterInfo() const { return MRI; } + const MCSubtargetInfo &getMCSubtargetInfo() const { return STI; } + void addHardwareUnit(std::unique_ptr H) { Hardware.push_back(std::move(H)); } @@ -65,7 +67,6 @@ public: /// Construct a basic pipeline for simulating an out-of-order pipeline. /// This pipeline consists of Fetch, Dispatch, Execute, and Retire stages. std::unique_ptr createDefaultPipeline(const PipelineOptions &Opts, - InstrBuilder &IB, SourceMgr &SrcMgr); }; diff --git a/include/llvm/MCA/HardwareUnits/LSUnit.h b/include/llvm/MCA/HardwareUnits/LSUnit.h index ae9a49c6485..34903794db4 100644 --- a/include/llvm/MCA/HardwareUnits/LSUnit.h +++ b/include/llvm/MCA/HardwareUnits/LSUnit.h @@ -209,8 +209,10 @@ public: unsigned getUsedLQEntries() const { return UsedLQEntries; } unsigned getUsedSQEntries() const { return UsedSQEntries; } - unsigned assignLQSlot() { return UsedLQEntries++; } - unsigned assignSQSlot() { return UsedSQEntries++; } + void acquireLQSlot() { ++UsedLQEntries; } + void acquireSQSlot() { ++UsedSQEntries; } + void releaseLQSlot() { --UsedLQEntries; } + void releaseSQSlot() { --UsedSQEntries; } bool assumeNoAlias() const { return NoAlias; } @@ -285,13 +287,18 @@ public: unsigned createMemoryGroup() { Groups.insert( - std::make_pair(NextGroupID, llvm::make_unique())); + std::make_pair(NextGroupID, std::make_unique())); return NextGroupID++; } - // Instruction executed event handlers. virtual void onInstructionExecuted(const InstRef &IR); + // Loads are tracked by the LDQ (load queue) from dispatch until completion. + // Stores are tracked by the STQ (store queue) from dispatch until commitment. + // By default we conservatively assume that the LDQ receives a load at + // dispatch. Loads leave the LDQ at retirement stage. + virtual void onInstructionRetired(const InstRef &IR); + virtual void onInstructionIssued(const InstRef &IR) { unsigned GroupID = IR.getInstruction()->getLSUTokenID(); Groups[GroupID]->onInstructionIssued(IR); @@ -436,9 +443,6 @@ public: /// 6. A store has to wait until an older store barrier is fully executed. unsigned dispatch(const InstRef &IR) override; - // FIXME: For simplicity, we optimistically assume a similar behavior for - // store instructions. In practice, store operations don't tend to leave the - // store queue until they reach the 'Retired' stage (See PR39830). void onInstructionExecuted(const InstRef &IR) override; }; diff --git a/include/llvm/MCA/HardwareUnits/RegisterFile.h b/include/llvm/MCA/HardwareUnits/RegisterFile.h index 36506327bd2..cd7718d9874 100644 --- a/include/llvm/MCA/HardwareUnits/RegisterFile.h +++ b/include/llvm/MCA/HardwareUnits/RegisterFile.h @@ -220,7 +220,7 @@ public: // // Current implementation can simulate up to 32 register files (including the // special register file at index #0). - unsigned isAvailable(ArrayRef Regs) const; + unsigned isAvailable(ArrayRef Regs) const; // Returns the number of PRFs implemented by this processor. unsigned getNumRegisterFiles() const { return RegisterFiles.size(); } diff --git a/include/llvm/MCA/HardwareUnits/ResourceManager.h b/include/llvm/MCA/HardwareUnits/ResourceManager.h index 2f91185516f..917af375004 100644 --- a/include/llvm/MCA/HardwareUnits/ResourceManager.h +++ b/include/llvm/MCA/HardwareUnits/ResourceManager.h @@ -33,8 +33,7 @@ namespace mca { /// with a buffer size of -1 is always available if it is not reserved. /// /// Values of type ResourceStateEvent are returned by method -/// ResourceState::isBufferAvailable(), which is used to query the internal -/// state of a resource. +/// ResourceManager::canBeDispatched() /// /// The naming convention for resource state events is: /// * Event names start with prefix RS_ @@ -263,16 +262,26 @@ public: /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots. ResourceStateEvent isBufferAvailable() const; - /// Reserve a slot in the buffer. - void reserveBuffer() { - if (AvailableSlots) - AvailableSlots--; + /// Reserve a buffer slot. + /// + /// Returns true if the buffer is not full. + /// It always returns true if BufferSize is set to zero. + bool reserveBuffer() { + if (BufferSize <= 0) + return true; + + --AvailableSlots; + assert(AvailableSlots <= static_cast(BufferSize)); + return AvailableSlots; } - /// Release a slot in the buffer. + /// Releases a slot in the buffer. void releaseBuffer() { - if (BufferSize > 0) - AvailableSlots++; + // Ignore dispatch hazards or invalid buffer sizes. + if (BufferSize <= 0) + return; + + ++AvailableSlots; assert(AvailableSlots <= static_cast(BufferSize)); } @@ -351,9 +360,16 @@ class ResourceManager { // Set of processor resource units that are available during this cycle. uint64_t AvailableProcResUnits; - // Set of processor resource groups that are currently reserved. + // Set of processor resources that are currently reserved. uint64_t ReservedResourceGroups; + // Set of unavailable scheduler buffer resources. This is used internally to + // speedup `canBeDispatched()` queries. + uint64_t AvailableBuffers; + + // Set of dispatch hazard buffer resources that are currently unavailable. + uint64_t ReservedBuffers; + // Returns the actual resource unit that will be used. ResourceRef selectPipe(uint64_t ResourceID); @@ -382,17 +398,20 @@ public: // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if // there are enough available slots in the buffers. - ResourceStateEvent canBeDispatched(ArrayRef Buffers) const; + ResourceStateEvent canBeDispatched(uint64_t ConsumedBuffers) const; // Return the processor resource identifier associated to this Mask. unsigned resolveResourceMask(uint64_t Mask) const; - // Consume a slot in every buffered resource from array 'Buffers'. Resource - // units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved. - void reserveBuffers(ArrayRef Buffers); + // Acquires a slot from every buffered resource in mask `ConsumedBuffers`. + // Units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved. + void reserveBuffers(uint64_t ConsumedBuffers); - // Release buffer entries previously allocated by method reserveBuffers. - void releaseBuffers(ArrayRef Buffers); + // Releases a slot from every buffered resource in mask `ConsumedBuffers`. + // ConsumedBuffers is a bitmask of previously acquired buffers (using method + // `reserveBuffers`). Units that are dispatch hazards (i.e. BufferSize=0) are + // not automatically unreserved by this method. + void releaseBuffers(uint64_t ConsumedBuffers); // Reserve a processor resource. A reserved resource is not available for // instruction issue until it is released. diff --git a/include/llvm/MCA/HardwareUnits/RetireControlUnit.h b/include/llvm/MCA/HardwareUnits/RetireControlUnit.h index 06290141739..acbd4543bd4 100644 --- a/include/llvm/MCA/HardwareUnits/RetireControlUnit.h +++ b/include/llvm/MCA/HardwareUnits/RetireControlUnit.h @@ -57,34 +57,43 @@ struct RetireControlUnit : public HardwareUnit { private: unsigned NextAvailableSlotIdx; unsigned CurrentInstructionSlotIdx; - unsigned AvailableSlots; + unsigned NumROBEntries; + unsigned AvailableEntries; unsigned MaxRetirePerCycle; // 0 means no limit. std::vector Queue; -public: - RetireControlUnit(const MCSchedModel &SM); - - bool isEmpty() const { return AvailableSlots == Queue.size(); } - bool isAvailable(unsigned Quantity = 1) const { + unsigned normalizeQuantity(unsigned Quantity) const { // Some instructions may declare a number of uOps which exceeds the size // of the reorder buffer. To avoid problems, cap the amount of slots to // the size of the reorder buffer. - Quantity = std::min(Quantity, static_cast(Queue.size())); + Quantity = std::min(Quantity, NumROBEntries); // Further normalize the number of micro opcodes for instructions that // declare zero opcodes. This should match the behavior of method // reserveSlot(). - Quantity = std::max(Quantity, 1U); - return AvailableSlots >= Quantity; + return std::max(Quantity, 1U); + } + + unsigned computeNextSlotIdx() const; + +public: + RetireControlUnit(const MCSchedModel &SM); + + bool isEmpty() const { return AvailableEntries == NumROBEntries; } + + bool isAvailable(unsigned Quantity = 1) const { + return AvailableEntries >= normalizeQuantity(Quantity); } unsigned getMaxRetirePerCycle() const { return MaxRetirePerCycle; } - // Reserves a number of slots, and returns a new token. - unsigned reserveSlot(const InstRef &IS, unsigned NumMicroOps); + // Reserves a number of slots, and returns a new token reference. + unsigned dispatch(const InstRef &IS); // Return the current token from the RCU's circular token queue. - const RUToken &peekCurrentToken() const; + const RUToken &getCurrentToken() const; + + const RUToken &peekNextToken() const; // Advance the pointer to the next token in the circular token queue. void consumeCurrentToken(); diff --git a/include/llvm/MCA/HardwareUnits/Scheduler.h b/include/llvm/MCA/HardwareUnits/Scheduler.h index 27beb842dfd..6c196757e57 100644 --- a/include/llvm/MCA/HardwareUnits/Scheduler.h +++ b/include/llvm/MCA/HardwareUnits/Scheduler.h @@ -68,7 +68,7 @@ public: /// instructions from the dispatch stage, until the write-back stage. /// class Scheduler : public HardwareUnit { - LSUnit &LSU; + LSUnitBase &LSU; // Instruction selection strategy for this Scheduler. std::unique_ptr Strategy; @@ -154,15 +154,15 @@ class Scheduler : public HardwareUnit { bool promoteToPendingSet(SmallVectorImpl &Pending); public: - Scheduler(const MCSchedModel &Model, LSUnit &Lsu) + Scheduler(const MCSchedModel &Model, LSUnitBase &Lsu) : Scheduler(Model, Lsu, nullptr) {} - Scheduler(const MCSchedModel &Model, LSUnit &Lsu, + Scheduler(const MCSchedModel &Model, LSUnitBase &Lsu, std::unique_ptr SelectStrategy) - : Scheduler(make_unique(Model), Lsu, + : Scheduler(std::make_unique(Model), Lsu, std::move(SelectStrategy)) {} - Scheduler(std::unique_ptr RM, LSUnit &Lsu, + Scheduler(std::unique_ptr RM, LSUnitBase &Lsu, std::unique_ptr SelectStrategy) : LSU(Lsu), Resources(std::move(RM)), BusyResourceUnits(0), NumDispatchedToThePendingSet(0), HadTokenStall(false) { @@ -228,6 +228,9 @@ public: SmallVectorImpl &Ready); /// Convert a resource mask into a valid llvm processor resource identifier. + /// + /// Only the most significant bit of the Mask is used by this method to + /// identify the processor resource. unsigned getResourceID(uint64_t Mask) const { return Resources->resolveResourceMask(Mask); } diff --git a/include/llvm/MCA/Instruction.h b/include/llvm/MCA/Instruction.h index d4d3f22797f..c97cb463d0f 100644 --- a/include/llvm/MCA/Instruction.h +++ b/include/llvm/MCA/Instruction.h @@ -18,6 +18,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCRegister.h" // definition of MCPhysReg. #include "llvm/Support/MathExtras.h" #ifndef NDEBUG @@ -42,7 +43,7 @@ struct WriteDescriptor { unsigned Latency; // This field is set to a value different than zero only if this // is an implicit definition. - unsigned RegisterID; + MCPhysReg RegisterID; // Instruction itineraries would set this field to the SchedClass ID. // Otherwise, it defaults to the WriteResourceID from the MCWriteLatencyEntry // element associated to this write. @@ -70,7 +71,7 @@ struct ReadDescriptor { // uses always come first in the sequence of uses. unsigned UseIndex; // This field is only set if this is an implicit read. - unsigned RegisterID; + MCPhysReg RegisterID; // Scheduling Class Index. It is used to query the scheduling model for the // MCSchedClassDesc object. unsigned SchedClassID; @@ -85,7 +86,7 @@ class ReadState; /// Field RegID is set to the invalid register for memory dependencies. struct CriticalDependency { unsigned IID; - unsigned RegID; + MCPhysReg RegID; unsigned Cycles; }; @@ -106,7 +107,7 @@ class WriteState { // to speedup queries on the register file. // For implicit writes, this field always matches the value of // field RegisterID from WD. - unsigned RegisterID; + MCPhysReg RegisterID; // Physical register file that serves register RegisterID. unsigned PRFID; @@ -146,7 +147,7 @@ class WriteState { SmallVector, 4> Users; public: - WriteState(const WriteDescriptor &Desc, unsigned RegID, + WriteState(const WriteDescriptor &Desc, MCPhysReg RegID, bool clearsSuperRegs = false, bool writesZero = false) : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), PRFID(0), ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero), @@ -158,7 +159,7 @@ public: int getCyclesLeft() const { return CyclesLeft; } unsigned getWriteResourceID() const { return WD->SClassOrWriteResourceID; } - unsigned getRegisterID() const { return RegisterID; } + MCPhysReg getRegisterID() const { return RegisterID; } unsigned getRegisterFileID() const { return PRFID; } unsigned getLatency() const { return WD->Latency; } unsigned getDependentWriteCyclesLeft() const { @@ -200,7 +201,7 @@ public: } void setDependentWrite(const WriteState *Other) { DependentWrite = Other; } - void writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles); + void writeStartEvent(unsigned IID, MCPhysReg RegID, unsigned Cycles); void setWriteZero() { WritesZero = true; } void setEliminated() { assert(Users.empty() && "Write is in an inconsistent state."); @@ -226,7 +227,7 @@ public: class ReadState { const ReadDescriptor *RD; // Physical register identified associated to this read. - unsigned RegisterID; + MCPhysReg RegisterID; // Physical register file that serves register RegisterID. unsigned PRFID; // Number of writes that contribute to the definition of RegisterID. @@ -253,14 +254,14 @@ class ReadState { bool IndependentFromDef; public: - ReadState(const ReadDescriptor &Desc, unsigned RegID) + ReadState(const ReadDescriptor &Desc, MCPhysReg RegID) : RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0), CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), CRD(), IsReady(true), IsZero(false), IndependentFromDef(false) {} const ReadDescriptor &getDescriptor() const { return *RD; } unsigned getSchedClass() const { return RD->SchedClassID; } - unsigned getRegisterID() const { return RegisterID; } + MCPhysReg getRegisterID() const { return RegisterID; } unsigned getRegisterFileID() const { return PRFID; } const CriticalDependency &getCriticalRegDep() const { return CRD; } @@ -272,7 +273,7 @@ public: void setIndependentFromDef() { IndependentFromDef = true; } void cycleEvent(); - void writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles); + void writeStartEvent(unsigned IID, MCPhysReg RegID, unsigned Cycles); void setDependentWrites(unsigned Writes) { DependentWrites = Writes; IsReady = !Writes; @@ -352,11 +353,14 @@ struct InstrDesc { // reports the number of "consumed cycles". SmallVector, 4> Resources; - // A list of buffered resources consumed by this instruction. - SmallVector Buffers; + // A bitmask of used hardware buffers. + uint64_t UsedBuffers; - unsigned UsedProcResUnits; - unsigned UsedProcResGroups; + // A bitmask of used processor resource units. + uint64_t UsedProcResUnits; + + // A bitmask of used processor resource groups. + uint64_t UsedProcResGroups; unsigned MaxLatency; // Number of MicroOps for this instruction. @@ -414,6 +418,7 @@ public: const InstrDesc &getDesc() const { return Desc; } unsigned getLatency() const { return Desc.MaxLatency; } + unsigned getNumMicroOps() const { return Desc.NumMicroOps; } bool hasDependentUsers() const { return any_of(Defs, @@ -463,6 +468,12 @@ class Instruction : public InstructionBase { // operation. unsigned LSUTokenID; + // A resource mask which identifies buffered resources consumed by this + // instruction at dispatch stage. In the absence of macro-fusion, this value + // should always match the value of field `UsedBuffers` from the instruction + // descriptor (see field InstrBase::Desc). + uint64_t UsedBuffers; + // Critical register dependency. CriticalDependency CriticalRegDep; @@ -480,12 +491,18 @@ class Instruction : public InstructionBase { public: Instruction(const InstrDesc &D) : InstructionBase(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES), - RCUTokenID(0), LSUTokenID(0), CriticalRegDep(), CriticalMemDep(), - CriticalResourceMask(0), IsEliminated(false) {} + RCUTokenID(0), LSUTokenID(0), UsedBuffers(D.UsedBuffers), + CriticalRegDep(), CriticalMemDep(), CriticalResourceMask(0), + IsEliminated(false) {} unsigned getRCUTokenID() const { return RCUTokenID; } unsigned getLSUTokenID() const { return LSUTokenID; } void setLSUTokenID(unsigned LSUTok) { LSUTokenID = LSUTok; } + + uint64_t getUsedBuffers() const { return UsedBuffers; } + void setUsedBuffers(uint64_t Mask) { UsedBuffers = Mask; } + void clearUsedBuffers() { UsedBuffers = 0ULL; } + int getCyclesLeft() const { return CyclesLeft; } // Transition to the dispatch stage, and assign a RCUToken to this diff --git a/include/llvm/MCA/SourceMgr.h b/include/llvm/MCA/SourceMgr.h index dbe31db1b1d..e844171bdca 100644 --- a/include/llvm/MCA/SourceMgr.h +++ b/include/llvm/MCA/SourceMgr.h @@ -16,12 +16,13 @@ #define LLVM_MCA_SOURCEMGR_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/MCA/Instruction.h" namespace llvm { namespace mca { -class Instruction; - +// MSVC >= 19.15, < 19.20 need to see the definition of class Instruction to +// prevent compiler error C2139 about intrinsic type trait '__is_assignable'. typedef std::pair SourceRef; class SourceMgr { diff --git a/include/llvm/MCA/Stages/RetireStage.h b/include/llvm/MCA/Stages/RetireStage.h index 08c216ac7bf..f4713688d25 100644 --- a/include/llvm/MCA/Stages/RetireStage.h +++ b/include/llvm/MCA/Stages/RetireStage.h @@ -16,6 +16,7 @@ #ifndef LLVM_MCA_RETIRE_STAGE_H #define LLVM_MCA_RETIRE_STAGE_H +#include "llvm/MCA/HardwareUnits/LSUnit.h" #include "llvm/MCA/HardwareUnits/RegisterFile.h" #include "llvm/MCA/HardwareUnits/RetireControlUnit.h" #include "llvm/MCA/Stages/Stage.h" @@ -27,13 +28,14 @@ class RetireStage final : public Stage { // Owner will go away when we move listeners/eventing to the stages. RetireControlUnit &RCU; RegisterFile &PRF; + LSUnitBase &LSU; RetireStage(const RetireStage &Other) = delete; RetireStage &operator=(const RetireStage &Other) = delete; public: - RetireStage(RetireControlUnit &R, RegisterFile &F) - : Stage(), RCU(R), PRF(F) {} + RetireStage(RetireControlUnit &R, RegisterFile &F, LSUnitBase &LS) + : Stage(), RCU(R), PRF(F), LSU(LS) {} bool hasWorkToComplete() const override { return !RCU.isEmpty(); } Error cycleStart() override; diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h index c40278a4f92..c3f36bdd9d1 100644 --- a/include/llvm/Object/Archive.h +++ b/include/llvm/Object/Archive.h @@ -48,8 +48,7 @@ public: /// Get the name looking up long names. Expected getName(uint64_t Size) const; - /// Members are not larger than 4GB. - Expected getSize() const; + Expected getSize() const; Expected getAccessMode() const; Expected> getLastModified() const; @@ -136,6 +135,7 @@ public: Expected getBuffer() const; uint64_t getChildOffset() const; + uint64_t getDataOffset() const { return getChildOffset() + StartOfFile; } Expected getMemoryBufferRef() const; @@ -221,6 +221,9 @@ public: Archive(MemoryBufferRef Source, Error &Err); static Expected> create(MemoryBufferRef Source); + /// Size field is 10 decimal digits long + static const uint64_t MaxMemberSize = 9999999999; + enum Kind { K_GNU, K_GNU64, diff --git a/include/llvm/Object/Binary.h b/include/llvm/Object/Binary.h index 3c3e977baff..aa5e718f5e9 100644 --- a/include/llvm/Object/Binary.h +++ b/include/llvm/Object/Binary.h @@ -42,7 +42,9 @@ protected: ID_Archive, ID_MachOUniversalBinary, ID_COFFImportFile, - ID_IR, // LLVM IR + ID_IR, // LLVM IR + ID_TapiUniversal, // Text-based Dynamic Library Stub file. + ID_TapiFile, // Text-based Dynamic Library Stub file. ID_Minidump, @@ -101,16 +103,18 @@ public: return TypeID > ID_StartObjects && TypeID < ID_EndObjects; } - bool isSymbolic() const { return isIR() || isObject() || isCOFFImportFile(); } - - bool isArchive() const { - return TypeID == ID_Archive; + bool isSymbolic() const { + return isIR() || isObject() || isCOFFImportFile() || isTapiFile(); } + bool isArchive() const { return TypeID == ID_Archive; } + bool isMachOUniversalBinary() const { return TypeID == ID_MachOUniversalBinary; } + bool isTapiUniversal() const { return TypeID == ID_TapiUniversal; } + bool isELF() const { return TypeID >= ID_ELF32L && TypeID <= ID_ELF64B; } @@ -137,6 +141,8 @@ public: bool isMinidump() const { return TypeID == ID_Minidump; } + bool isTapiFile() const { return TypeID == ID_TapiFile; } + bool isLittleEndian() const { return !(TypeID == ID_ELF32B || TypeID == ID_ELF64B || TypeID == ID_MachO32B || TypeID == ID_MachO64B); diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h index c53cbc46c74..b91ee5887fe 100644 --- a/include/llvm/Object/COFF.h +++ b/include/llvm/Object/COFF.h @@ -314,7 +314,10 @@ public: return CS16 ? CS16->Name.Offset : CS32->Name.Offset; } - uint32_t getValue() const { return CS16 ? CS16->Value : CS32->Value; } + uint32_t getValue() const { + assert(isSet() && "COFFSymbolRef points to nothing!"); + return CS16 ? CS16->Value : CS32->Value; + } int32_t getSectionNumber() const { assert(isSet() && "COFFSymbolRef points to nothing!"); @@ -969,11 +972,14 @@ public: return nullptr; return reinterpret_cast(base()); } - std::error_code getCOFFHeader(const coff_file_header *&Res) const; - std::error_code - getCOFFBigObjHeader(const coff_bigobj_file_header *&Res) const; - std::error_code getPE32Header(const pe32_header *&Res) const; - std::error_code getPE32PlusHeader(const pe32plus_header *&Res) const; + + const coff_file_header *getCOFFHeader() const { return COFFHeader; } + const coff_bigobj_file_header *getCOFFBigObjHeader() const { + return COFFBigObjHeader; + } + const pe32_header *getPE32Header() const { return PE32Header; } + const pe32plus_header *getPE32PlusHeader() const { return PE32PlusHeader; } + std::error_code getDataDirectory(uint32_t index, const data_directory *&Res) const; std::error_code getSection(int32_t index, const coff_section *&Res) const; @@ -1201,16 +1207,34 @@ public: ResourceSectionRef() = default; explicit ResourceSectionRef(StringRef Ref) : BBS(Ref, support::little) {} + Error load(const COFFObjectFile *O); + Error load(const COFFObjectFile *O, const SectionRef &S); + Expected> getEntryNameString(const coff_resource_dir_entry &Entry); Expected getEntrySubDir(const coff_resource_dir_entry &Entry); + Expected + getEntryData(const coff_resource_dir_entry &Entry); Expected getBaseTable(); + Expected + getTableEntry(const coff_resource_dir_table &Table, uint32_t Index); + + Expected getContents(const coff_resource_data_entry &Entry); private: BinaryByteStream BBS; + SectionRef Section; + const COFFObjectFile *Obj; + + std::vector Relocs; + Expected getTableAtOffset(uint32_t Offset); + Expected + getTableEntryAtOffset(uint32_t Offset); + Expected + getDataEntryAtOffset(uint32_t Offset); Expected> getDirStringAtOffset(uint32_t Offset); }; diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h index cf8e4529bad..28b00c8413d 100644 --- a/include/llvm/Object/ELF.h +++ b/include/llvm/Object/ELF.h @@ -64,6 +64,10 @@ std::string getSecIndexForError(const ELFFile *Obj, return "[unknown index]"; } +static inline Error defaultWarningHandler(const Twine &Msg) { + return createError(Msg); +} + template class ELFFile { public: @@ -95,6 +99,13 @@ public: using Elf_Relr_Range = typename ELFT::RelrRange; using Elf_Phdr_Range = typename ELFT::PhdrRange; + // This is a callback that can be passed to a number of functions. + // It can be used to ignore non-critical errors (warnings), which is + // useful for dumpers, like llvm-readobj. + // It accepts a warning message string and returns a success + // when the warning should be ignored or an error otherwise. + using WarningHandler = llvm::function_ref; + const uint8_t *base() const { return Buf.bytes_begin(); } size_t getBufSize() const { return Buf.size(); } @@ -114,7 +125,9 @@ public: template Expected getEntry(const Elf_Shdr *Section, uint32_t Entry) const; - Expected getStringTable(const Elf_Shdr *Section) const; + Expected + getStringTable(const Elf_Shdr *Section, + WarningHandler WarnHandler = &defaultWarningHandler) const; Expected getStringTableForSymtab(const Elf_Shdr &Section) const; Expected getStringTableForSymtab(const Elf_Shdr &Section, Elf_Shdr_Range Sections) const; @@ -137,15 +150,16 @@ public: static Expected create(StringRef Object); + bool isLE() const { + return getHeader()->getDataEncoding() == ELF::ELFDATA2LSB; + } + bool isMipsELF64() const { return getHeader()->e_machine == ELF::EM_MIPS && getHeader()->getFileClass() == ELF::ELFCLASS64; } - bool isMips64EL() const { - return isMipsELF64() && - getHeader()->getDataEncoding() == ELF::ELFDATA2LSB; - } + bool isMips64EL() const { return isMipsELF64() && isLE(); } Expected sections() const; @@ -261,7 +275,9 @@ public: return make_range(notes_begin(Shdr, Err), notes_end()); } - Expected getSectionStringTable(Elf_Shdr_Range Sections) const; + Expected getSectionStringTable( + Elf_Shdr_Range Sections, + WarningHandler WarnHandler = &defaultWarningHandler) const; Expected getSectionIndex(const Elf_Sym *Sym, Elf_Sym_Range Syms, ArrayRef ShndxTable) const; Expected getSection(const Elf_Sym *Sym, @@ -271,12 +287,13 @@ public: Elf_Sym_Range Symtab, ArrayRef ShndxTable) const; Expected getSection(uint32_t Index) const; - Expected getSection(const StringRef SectionName) const; Expected getSymbol(const Elf_Shdr *Sec, uint32_t Index) const; - Expected getSectionName(const Elf_Shdr *Section) const; + Expected + getSectionName(const Elf_Shdr *Section, + WarningHandler WarnHandler = &defaultWarningHandler) const; Expected getSectionName(const Elf_Shdr *Section, StringRef DotShstrtab) const; template @@ -459,18 +476,18 @@ ELFFile::getRelocationSymbol(const Elf_Rel *Rel, template Expected -ELFFile::getSectionStringTable(Elf_Shdr_Range Sections) const { +ELFFile::getSectionStringTable(Elf_Shdr_Range Sections, + WarningHandler WarnHandler) const { uint32_t Index = getHeader()->e_shstrndx; if (Index == ELF::SHN_XINDEX) Index = Sections[0].sh_link; if (!Index) // no section string table. return ""; - // TODO: Test a case when the sh_link of the section with index 0 is broken. if (Index >= Sections.size()) return createError("section header string table index " + Twine(Index) + " does not exist"); - return getStringTable(&Sections[Index]); + return getStringTable(&Sections[Index], WarnHandler); } template ELFFile::ELFFile(StringRef Object) : Buf(Object) {} @@ -495,7 +512,8 @@ Expected ELFFile::sections() const { Twine(getHeader()->e_shentsize)); const uint64_t FileSize = Buf.size(); - if (SectionTableOffset + sizeof(Elf_Shdr) > FileSize) + if (SectionTableOffset + sizeof(Elf_Shdr) > FileSize || + SectionTableOffset + (uintX_t)sizeof(Elf_Shdr) < SectionTableOffset) return createError( "section header table goes past the end of the file: e_shoff = 0x" + Twine::utohexstr(SectionTableOffset)); @@ -513,15 +531,22 @@ Expected ELFFile::sections() const { NumSections = First->sh_size; if (NumSections > UINT64_MAX / sizeof(Elf_Shdr)) - // TODO: this error is untested. - return createError("section table goes past the end of file"); + return createError("invalid number of sections specified in the NULL " + "section's sh_size field (" + + Twine(NumSections) + ")"); const uint64_t SectionTableSize = NumSections * sizeof(Elf_Shdr); + if (SectionTableOffset + SectionTableSize < SectionTableOffset) + return createError( + "invalid section header table offset (e_shoff = 0x" + + Twine::utohexstr(SectionTableOffset) + + ") or invalid number of sections specified in the first section " + "header's sh_size field (0x" + + Twine::utohexstr(NumSections) + ")"); // Section table goes past end of file! if (SectionTableOffset + SectionTableSize > FileSize) return createError("section table goes past the end of file"); - return makeArrayRef(First, NumSections); } @@ -540,8 +565,9 @@ template Expected ELFFile::getEntry(const Elf_Shdr *Section, uint32_t Entry) const { if (sizeof(T) != Section->sh_entsize) - // TODO: this error is untested. - return createError("invalid sh_entsize"); + return createError("section " + getSecIndexForError(this, Section) + + " has invalid sh_entsize: expected " + Twine(sizeof(T)) + + ", but got " + Twine(Section->sh_entsize)); size_t Pos = Section->sh_offset + Entry * sizeof(T); if (Pos + sizeof(T) > Buf.size()) return createError("unable to access section " + @@ -560,43 +586,27 @@ ELFFile::getSection(uint32_t Index) const { return object::getSection(*TableOrErr, Index); } -template -Expected -ELFFile::getSection(const StringRef SectionName) const { - auto TableOrErr = sections(); - if (!TableOrErr) - return TableOrErr.takeError(); - for (auto &Sec : *TableOrErr) { - auto SecNameOrErr = getSectionName(&Sec); - if (!SecNameOrErr) - return SecNameOrErr.takeError(); - if (*SecNameOrErr == SectionName) - return &Sec; - } - // TODO: this error is untested. - return createError("invalid section name"); -} - template Expected -ELFFile::getStringTable(const Elf_Shdr *Section) const { +ELFFile::getStringTable(const Elf_Shdr *Section, + WarningHandler WarnHandler) const { if (Section->sh_type != ELF::SHT_STRTAB) - return createError("invalid sh_type for string table section " + - getSecIndexForError(this, Section) + - ": expected SHT_STRTAB, but got " + - object::getELFSectionTypeName(getHeader()->e_machine, - Section->sh_type)); + if (Error E = WarnHandler("invalid sh_type for string table section " + + getSecIndexForError(this, Section) + + ": expected SHT_STRTAB, but got " + + object::getELFSectionTypeName( + getHeader()->e_machine, Section->sh_type))) + return std::move(E); + auto V = getSectionContentsAsArray(Section); if (!V) return V.takeError(); ArrayRef Data = *V; if (Data.empty()) - // TODO: this error is untested. - return createError("empty string table"); + return createError("SHT_STRTAB string table section " + + getSecIndexForError(this, Section) + " is empty"); if (Data.back() != '\0') - return createError(object::getELFSectionTypeName(getHeader()->e_machine, - Section->sh_type) + - " string table section " + + return createError("SHT_STRTAB string table section " + getSecIndexForError(this, Section) + " is non-null terminated"); return StringRef(Data.begin(), Data.size()); @@ -626,8 +636,11 @@ ELFFile::getSHNDXTable(const Elf_Shdr &Section, const Elf_Shdr &SymTable = **SymTableOrErr; if (SymTable.sh_type != ELF::SHT_SYMTAB && SymTable.sh_type != ELF::SHT_DYNSYM) - // TODO: this error is untested. - return createError("invalid sh_type"); + return createError("SHT_SYMTAB_SHNDX section is linked with " + + object::getELFSectionTypeName(getHeader()->e_machine, + SymTable.sh_type) + + " section (expected SHT_SYMTAB/SHT_DYNSYM)"); + if (V.size() != (SymTable.sh_size / sizeof(Elf_Sym))) return createError("SHT_SYMTAB_SHNDX section has sh_size (" + Twine(SymTable.sh_size) + @@ -662,11 +675,12 @@ ELFFile::getStringTableForSymtab(const Elf_Shdr &Sec, template Expected -ELFFile::getSectionName(const Elf_Shdr *Section) const { +ELFFile::getSectionName(const Elf_Shdr *Section, + WarningHandler WarnHandler) const { auto SectionsOrErr = sections(); if (!SectionsOrErr) return SectionsOrErr.takeError(); - auto Table = getSectionStringTable(*SectionsOrErr); + auto Table = getSectionStringTable(*SectionsOrErr, WarnHandler); if (!Table) return Table.takeError(); return getSectionName(Section, *Table); diff --git a/include/llvm/Object/ELFObjectFile.h b/include/llvm/Object/ELFObjectFile.h index 86c015efd70..424289a9cca 100644 --- a/include/llvm/Object/ELFObjectFile.h +++ b/include/llvm/Object/ELFObjectFile.h @@ -41,7 +41,7 @@ namespace llvm { namespace object { -constexpr int NumElfSymbolTypes = 8; +constexpr int NumElfSymbolTypes = 16; extern const llvm::EnumEntry ElfSymbolTypes[NumElfSymbolTypes]; class elf_symbol_iterator; @@ -239,6 +239,10 @@ public: using Elf_Rela = typename ELFT::Rela; using Elf_Dyn = typename ELFT::Dyn; + SectionRef toSectionRef(const Elf_Shdr *Sec) const { + return SectionRef(toDRI(Sec), this); + } + private: ELFObjectFile(MemoryBufferRef Object, ELFFile EF, const Elf_Shdr *DotDynSymSec, const Elf_Shdr *DotSymtabSec, @@ -284,7 +288,8 @@ protected: relocation_iterator section_rel_begin(DataRefImpl Sec) const override; relocation_iterator section_rel_end(DataRefImpl Sec) const override; std::vector dynamic_relocation_sections() const override; - section_iterator getRelocatedSection(DataRefImpl Sec) const override; + Expected + getRelocatedSection(DataRefImpl Sec) const override; void moveRelocationNext(DataRefImpl &Rel) const override; uint64_t getRelocationOffset(DataRefImpl Rel) const override; @@ -461,13 +466,15 @@ Expected ELFObjectFile::getSymbolName(DataRefImpl Sym) const { if (!SymStrTabOrErr) return SymStrTabOrErr.takeError(); Expected Name = ESym->getName(*SymStrTabOrErr); + if (Name && !Name->empty()) + return Name; // If the symbol name is empty use the section name. - if ((!Name || Name->empty()) && ESym->getType() == ELF::STT_SECTION) { - StringRef SecName; - Expected Sec = getSymbolSection(Sym); - if (Sec && !(*Sec)->getName(SecName)) - return SecName; + if (ESym->getType() == ELF::STT_SECTION) { + if (Expected SecOrErr = getSymbolSection(Sym)) { + consumeError(Name.takeError()); + return (*SecOrErr)->getName(); + } } return Name; } @@ -835,7 +842,7 @@ ELFObjectFile::section_rel_end(DataRefImpl Sec) const { } template -section_iterator +Expected ELFObjectFile::getRelocatedSection(DataRefImpl Sec) const { if (EF.getHeader()->e_type != ELF::ET_REL) return section_end(); @@ -845,10 +852,10 @@ ELFObjectFile::getRelocatedSection(DataRefImpl Sec) const { if (Type != ELF::SHT_REL && Type != ELF::SHT_RELA) return section_end(); - auto R = EF.getSection(EShdr->sh_info); - if (!R) - report_fatal_error(errorToErrorCode(R.takeError()).message()); - return section_iterator(SectionRef(toDRI(*R), this)); + Expected SecOrErr = EF.getSection(EShdr->sh_info); + if (!SecOrErr) + return SecOrErr.takeError(); + return section_iterator(SectionRef(toDRI(*SecOrErr), this)); } // Relocations diff --git a/include/llvm/Object/ELFTypes.h b/include/llvm/Object/ELFTypes.h index 5552208b1f8..7d1ade4d543 100644 --- a/include/llvm/Object/ELFTypes.h +++ b/include/llvm/Object/ELFTypes.h @@ -248,7 +248,11 @@ template Expected Elf_Sym_Impl::getName(StringRef StrTab) const { uint32_t Offset = this->st_name; if (Offset >= StrTab.size()) - return errorCodeToError(object_error::parse_failed); + return createStringError(object_error::parse_failed, + "st_name (0x%" PRIx32 + ") is past the end of the string table" + " of size 0x%zx", + Offset, StrTab.size()); return StringRef(StrTab.data() + Offset); } diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h index ca9512f2170..76be8049a7d 100644 --- a/include/llvm/Object/MachO.h +++ b/include/llvm/Object/MachO.h @@ -297,6 +297,7 @@ public: uint64_t getSectionAddress(DataRefImpl Sec) const override; uint64_t getSectionIndex(DataRefImpl Sec) const override; uint64_t getSectionSize(DataRefImpl Sec) const override; + ArrayRef getSectionContents(uint32_t Offset, uint64_t Size) const; Expected> getSectionContents(DataRefImpl Sec) const override; uint64_t getSectionAlignment(DataRefImpl Sec) const override; diff --git a/include/llvm/Object/MachOUniversal.h b/include/llvm/Object/MachOUniversal.h index 5bf724f2c8b..eb45aff4480 100644 --- a/include/llvm/Object/MachOUniversal.h +++ b/include/llvm/Object/MachOUniversal.h @@ -31,6 +31,8 @@ class MachOUniversalBinary : public Binary { uint32_t Magic; uint32_t NumberOfObjects; public: + static constexpr uint32_t MaxSectionAlignment = 15; /* 2**15 or 0x8000 */ + class ObjectForArch { const MachOUniversalBinary *Parent; /// Index of object in the universal binary. @@ -64,13 +66,13 @@ public: else // Parent->getMagic() == MachO::FAT_MAGIC_64 return Header64.cpusubtype; } - uint32_t getOffset() const { + uint64_t getOffset() const { if (Parent->getMagic() == MachO::FAT_MAGIC) return Header.offset; else // Parent->getMagic() == MachO::FAT_MAGIC_64 return Header64.offset; } - uint32_t getSize() const { + uint64_t getSize() const { if (Parent->getMagic() == MachO::FAT_MAGIC) return Header.size; else // Parent->getMagic() == MachO::FAT_MAGIC_64 @@ -157,8 +159,14 @@ public: return V->isMachOUniversalBinary(); } - Expected> + Expected getObjectForArch(StringRef ArchName) const; + + Expected> + getMachOObjectForArch(StringRef ArchName) const; + + Expected> + getArchiveForArch(StringRef ArchName) const; }; } diff --git a/include/llvm/Object/Minidump.h b/include/llvm/Object/Minidump.h index 470008d552e..4429493aff4 100644 --- a/include/llvm/Object/Minidump.h +++ b/include/llvm/Object/Minidump.h @@ -11,6 +11,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/iterator.h" #include "llvm/BinaryFormat/Minidump.h" #include "llvm/Object/Binary.h" #include "llvm/Support/Error.h" @@ -80,16 +81,65 @@ public: return getListStream(minidump::StreamType::ThreadList); } - /// Returns the list of memory ranges embedded in the MemoryList stream. An - /// error is returned if the file does not contain this stream, or if the - /// stream is not large enough to contain the number of memory descriptors - /// declared in the stream header. The consistency of the MemoryDescriptor - /// entries themselves is not checked in any way. + /// Returns the contents of the Exception stream. An error is returned if the + /// file does not contain this stream, or the stream is smaller than the size + /// of the ExceptionStream structure. The internal consistency of the stream + /// is not checked in any way. + Expected getExceptionStream() const { + return getStream( + minidump::StreamType::Exception); + } + + /// Returns the list of descriptors embedded in the MemoryList stream. The + /// descriptors provide the content of interesting regions of memory at the + /// time the minidump was taken. An error is returned if the file does not + /// contain this stream, or if the stream is not large enough to contain the + /// number of memory descriptors declared in the stream header. The + /// consistency of the MemoryDescriptor entries themselves is not checked in + /// any way. Expected> getMemoryList() const { return getListStream( minidump::StreamType::MemoryList); } + class MemoryInfoIterator + : public iterator_facade_base { + public: + MemoryInfoIterator(ArrayRef Storage, size_t Stride) + : Storage(Storage), Stride(Stride) { + assert(Storage.size() % Stride == 0); + } + + bool operator==(const MemoryInfoIterator &R) const { + return Storage.size() == R.Storage.size(); + } + + const minidump::MemoryInfo &operator*() const { + assert(Storage.size() >= sizeof(minidump::MemoryInfo)); + return *reinterpret_cast(Storage.data()); + } + + MemoryInfoIterator &operator++() { + Storage = Storage.drop_front(Stride); + return *this; + } + + private: + ArrayRef Storage; + size_t Stride; + }; + + /// Returns the list of descriptors embedded in the MemoryInfoList stream. The + /// descriptors provide properties (e.g. permissions) of interesting regions + /// of memory at the time the minidump was taken. An error is returned if the + /// file does not contain this stream, or if the stream is not large enough to + /// contain the number of memory descriptors declared in the stream header. + /// The consistency of the MemoryInfoList entries themselves is not checked + /// in any way. + Expected> getMemoryInfoList() const; + private: static Error createError(StringRef Str) { return make_error(Str, object_error::parse_failed); @@ -137,10 +187,10 @@ private: }; template -Expected MinidumpFile::getStream(minidump::StreamType Stream) const { - if (auto OptionalStream = getRawStream(Stream)) { - if (OptionalStream->size() >= sizeof(T)) - return *reinterpret_cast(OptionalStream->data()); +Expected MinidumpFile::getStream(minidump::StreamType Type) const { + if (Optional> Stream = getRawStream(Type)) { + if (Stream->size() >= sizeof(T)) + return *reinterpret_cast(Stream->data()); return createEOFError(); } return createError("No such stream"); @@ -153,10 +203,11 @@ Expected> MinidumpFile::getDataSliceAs(ArrayRef Data, // Check for overflow. if (Count > std::numeric_limits::max() / sizeof(T)) return createEOFError(); - auto ExpectedArray = getDataSlice(Data, Offset, sizeof(T) * Count); - if (!ExpectedArray) - return ExpectedArray.takeError(); - return ArrayRef(reinterpret_cast(ExpectedArray->data()), Count); + Expected> Slice = + getDataSlice(Data, Offset, sizeof(T) * Count); + if (!Slice) + return Slice.takeError(); + return ArrayRef(reinterpret_cast(Slice->data()), Count); } } // end namespace object diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h index 483a3486bd7..adc9dbc189a 100644 --- a/include/llvm/Object/ObjectFile.h +++ b/include/llvm/Object/ObjectFile.h @@ -94,7 +94,7 @@ public: void moveNext(); - std::error_code getName(StringRef &Result) const; + Expected getName() const; uint64_t getAddress() const; uint64_t getIndex() const; uint64_t getSize() const; @@ -130,18 +130,13 @@ public: iterator_range relocations() const { return make_range(relocation_begin(), relocation_end()); } - section_iterator getRelocatedSection() const; + Expected getRelocatedSection() const; DataRefImpl getRawDataRefImpl() const; const ObjectFile *getObject() const; }; struct SectionedAddress { - // TODO: constructors could be removed when C++14 would be adopted. - SectionedAddress() {} - SectionedAddress(uint64_t Addr, uint64_t SectIdx) - : Address(Addr), SectionIndex(SectIdx) {} - const static uint64_t UndefSection = UINT64_MAX; uint64_t Address = 0; @@ -277,7 +272,7 @@ protected: virtual bool isBerkeleyData(DataRefImpl Sec) const; virtual relocation_iterator section_rel_begin(DataRefImpl Sec) const = 0; virtual relocation_iterator section_rel_end(DataRefImpl Sec) const = 0; - virtual section_iterator getRelocatedSection(DataRefImpl Sec) const; + virtual Expected getRelocatedSection(DataRefImpl Sec) const; // Same as above for RelocationRef. friend class RelocationRef; @@ -434,12 +429,8 @@ inline void SectionRef::moveNext() { return OwningObject->moveSectionNext(SectionPimpl); } -inline std::error_code SectionRef::getName(StringRef &Result) const { - Expected NameOrErr = OwningObject->getSectionName(SectionPimpl); - if (!NameOrErr) - return errorToErrorCode(NameOrErr.takeError()); - Result = *NameOrErr; - return std::error_code(); +inline Expected SectionRef::getName() const { + return OwningObject->getSectionName(SectionPimpl); } inline uint64_t SectionRef::getAddress() const { @@ -510,7 +501,7 @@ inline relocation_iterator SectionRef::relocation_end() const { return OwningObject->section_rel_end(SectionPimpl); } -inline section_iterator SectionRef::getRelocatedSection() const { +inline Expected SectionRef::getRelocatedSection() const { return OwningObject->getRelocatedSection(SectionPimpl); } diff --git a/include/llvm/Object/StackMapParser.h b/include/llvm/Object/StackMapParser.h index ed44efbf80b..b408f404103 100644 --- a/include/llvm/Object/StackMapParser.h +++ b/include/llvm/Object/StackMapParser.h @@ -19,7 +19,7 @@ namespace llvm { -/// A parser for the latest stackmap format. At the moment, latest=V2. +/// A parser for the latest stackmap format. At the moment, latest=V3. template class StackMapParser { public: @@ -299,7 +299,7 @@ public: const uint8_t *P; }; - /// Construct a parser for a version-2 stackmap. StackMap data will be read + /// Construct a parser for a version-3 stackmap. StackMap data will be read /// from the given array. StackMapParser(ArrayRef StackMapSection) : StackMapSection(StackMapSection) { diff --git a/include/llvm/Object/TapiFile.h b/include/llvm/Object/TapiFile.h new file mode 100644 index 00000000000..bc2e04e1cc9 --- /dev/null +++ b/include/llvm/Object/TapiFile.h @@ -0,0 +1,60 @@ +//===- TapiFile.h - Text-based Dynamic Library Stub -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the TapiFile interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_TAPI_FILE_H +#define LLVM_OBJECT_TAPI_FILE_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/TextAPI/MachO/InterfaceFile.h" + +namespace llvm { +namespace object { + +class TapiFile : public SymbolicFile { +public: + TapiFile(MemoryBufferRef Source, const MachO::InterfaceFile &interface, + MachO::Architecture Arch); + ~TapiFile() override; + + void moveSymbolNext(DataRefImpl &DRI) const override; + + Error printSymbolName(raw_ostream &OS, DataRefImpl DRI) const override; + + uint32_t getSymbolFlags(DataRefImpl DRI) const override; + + basic_symbol_iterator symbol_begin() const override; + + basic_symbol_iterator symbol_end() const override; + + static bool classof(const Binary *v) { return v->isTapiFile(); } + +private: + struct Symbol { + StringRef Prefix; + StringRef Name; + uint32_t Flags; + + constexpr Symbol(StringRef Prefix, StringRef Name, uint32_t Flags) + : Prefix(Prefix), Name(Name), Flags(Flags) {} + }; + + std::vector Symbols; +}; + +} // end namespace object. +} // end namespace llvm. + +#endif // LLVM_OBJECT_TAPI_FILE_H diff --git a/include/llvm/Object/TapiUniversal.h b/include/llvm/Object/TapiUniversal.h new file mode 100644 index 00000000000..4931183852a --- /dev/null +++ b/include/llvm/Object/TapiUniversal.h @@ -0,0 +1,109 @@ +//===-- TapiUniversal.h - Text-based Dynamic Library Stub -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the TapiUniversal interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_TAPI_UNIVERSAL_H +#define LLVM_OBJECT_TAPI_UNIVERSAL_H + +#include "llvm/Object/Binary.h" +#include "llvm/Object/TapiFile.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/TextAPI/MachO/Architecture.h" +#include "llvm/TextAPI/MachO/InterfaceFile.h" + +namespace llvm { +namespace object { + +class TapiUniversal : public Binary { +public: + class ObjectForArch { + const TapiUniversal *Parent; + int Index; + + public: + ObjectForArch(const TapiUniversal *Parent, int Index) + : Parent(Parent), Index(Index) {} + + ObjectForArch getNext() const { return ObjectForArch(Parent, Index + 1); } + + bool operator==(const ObjectForArch &Other) const { + return (Parent == Other.Parent) && (Index == Other.Index); + } + + uint32_t getCPUType() const { + auto Result = + MachO::getCPUTypeFromArchitecture(Parent->Architectures[Index]); + return Result.first; + } + + uint32_t getCPUSubType() const { + auto Result = + MachO::getCPUTypeFromArchitecture(Parent->Architectures[Index]); + return Result.second; + } + + std::string getArchFlagName() const { + return MachO::getArchitectureName(Parent->Architectures[Index]); + } + + Expected> getAsObjectFile() const; + }; + + class object_iterator { + ObjectForArch Obj; + + public: + object_iterator(const ObjectForArch &Obj) : Obj(Obj) {} + const ObjectForArch *operator->() const { return &Obj; } + const ObjectForArch &operator*() const { return Obj; } + + bool operator==(const object_iterator &Other) const { + return Obj == Other.Obj; + } + bool operator!=(const object_iterator &Other) const { + return !(*this == Other); + } + + object_iterator &operator++() { // Preincrement + Obj = Obj.getNext(); + return *this; + } + }; + + TapiUniversal(MemoryBufferRef Source, Error &Err); + static Expected> + create(MemoryBufferRef Source); + ~TapiUniversal() override; + + object_iterator begin_objects() const { return ObjectForArch(this, 0); } + object_iterator end_objects() const { + return ObjectForArch(this, Architectures.size()); + } + + iterator_range objects() const { + return make_range(begin_objects(), end_objects()); + } + + uint32_t getNumberOfObjects() const { return Architectures.size(); } + + // Cast methods. + static bool classof(const Binary *v) { return v->isTapiUniversal(); } + +private: + std::unique_ptr ParsedFile; + std::vector Architectures; +}; + +} // end namespace object. +} // end namespace llvm. + +#endif // LLVM_OBJECT_TAPI_UNIVERSAL_H diff --git a/include/llvm/Object/WindowsResource.h b/include/llvm/Object/WindowsResource.h index 356dcb03abb..a0d658491cb 100644 --- a/include/llvm/Object/WindowsResource.h +++ b/include/llvm/Object/WindowsResource.h @@ -31,6 +31,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" #include "llvm/Object/Error.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamReader.h" @@ -48,6 +49,7 @@ class ScopedPrinter; namespace object { class WindowsResource; +class ResourceSectionRef; const size_t WIN_RES_MAGIC_SIZE = 16; const size_t WIN_RES_NULL_ENTRY_SIZE = 16; @@ -151,8 +153,11 @@ private: class WindowsResourceParser { public: class TreeNode; - WindowsResourceParser(); + WindowsResourceParser(bool MinGW = false); Error parse(WindowsResource *WR, std::vector &Duplicates); + Error parse(ResourceSectionRef &RSR, StringRef Filename, + std::vector &Duplicates); + void cleanUpManifests(std::vector &Duplicates); void printTree(raw_ostream &OS) const; const TreeNode &getTree() const { return Root; } const ArrayRef> getData() const { return Data; } @@ -181,32 +186,38 @@ public: private: friend class WindowsResourceParser; - static uint32_t StringCount; - static uint32_t DataCount; - - static std::unique_ptr createStringNode(); + // Index is the StringTable vector index for this node's name. + static std::unique_ptr createStringNode(uint32_t Index); static std::unique_ptr createIDNode(); + // DataIndex is the Data vector index that the data node points at. static std::unique_ptr createDataNode(uint16_t MajorVersion, uint16_t MinorVersion, uint32_t Characteristics, - uint32_t Origin); + uint32_t Origin, + uint32_t DataIndex); - explicit TreeNode(bool IsStringNode); + explicit TreeNode(uint32_t StringIndex); TreeNode(uint16_t MajorVersion, uint16_t MinorVersion, - uint32_t Characteristics, uint32_t Origin); + uint32_t Characteristics, uint32_t Origin, uint32_t DataIndex); bool addEntry(const ResourceEntryRef &Entry, uint32_t Origin, - bool &IsNewTypeString, bool &IsNewNameString, + std::vector> &Data, + std::vector> &StringTable, TreeNode *&Result); - TreeNode &addTypeNode(const ResourceEntryRef &Entry, bool &IsNewTypeString); - TreeNode &addNameNode(const ResourceEntryRef &Entry, bool &IsNewNameString); + TreeNode &addTypeNode(const ResourceEntryRef &Entry, + std::vector> &StringTable); + TreeNode &addNameNode(const ResourceEntryRef &Entry, + std::vector> &StringTable); bool addLanguageNode(const ResourceEntryRef &Entry, uint32_t Origin, + std::vector> &Data, TreeNode *&Result); bool addDataChild(uint32_t ID, uint16_t MajorVersion, uint16_t MinorVersion, uint32_t Characteristics, uint32_t Origin, - TreeNode *&Result); + uint32_t DataIndex, TreeNode *&Result); TreeNode &addIDChild(uint32_t ID); - TreeNode &addNameChild(ArrayRef NameRef, bool &IsNewString); + TreeNode &addNameChild(ArrayRef NameRef, + std::vector> &StringTable); + void shiftDataIndexDown(uint32_t Index); bool IsDataNode = false; uint32_t StringIndex; @@ -222,12 +233,30 @@ public: uint32_t Origin; }; + struct StringOrID { + bool IsString; + ArrayRef String; + uint32_t ID; + + StringOrID(uint32_t ID) : IsString(false), ID(ID) {} + StringOrID(ArrayRef String) : IsString(true), String(String) {} + }; + private: + Error addChildren(TreeNode &Node, ResourceSectionRef &RSR, + const coff_resource_dir_table &Table, uint32_t Origin, + std::vector &Context, + std::vector &Duplicates); + bool shouldIgnoreDuplicate(const ResourceEntryRef &Entry) const; + bool shouldIgnoreDuplicate(const std::vector &Context) const; + TreeNode Root; std::vector> Data; std::vector> StringTable; std::vector InputFilenames; + + bool MinGW; }; Expected> diff --git a/include/llvm/Object/XCOFFObjectFile.h b/include/llvm/Object/XCOFFObjectFile.h index cdee7129a2a..84073ce5f6c 100644 --- a/include/llvm/Object/XCOFFObjectFile.h +++ b/include/llvm/Object/XCOFFObjectFile.h @@ -13,23 +13,8 @@ #ifndef LLVM_OBJECT_XCOFFOBJECTFILE_H #define LLVM_OBJECT_XCOFFOBJECTFILE_H -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/BinaryFormat/Magic.h" #include "llvm/BinaryFormat/XCOFF.h" -#include "llvm/MC/SubtargetFeature.h" -#include "llvm/Object/Binary.h" -#include "llvm/Object/Error.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Object/SymbolicFile.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/MemoryBuffer.h" -#include -#include -#include -#include namespace llvm { namespace object { @@ -63,7 +48,7 @@ struct XCOFFFileHeader64 { }; struct XCOFFSectionHeader32 { - char Name[XCOFF::SectionNameSize]; + char Name[XCOFF::NameSize]; support::ubig32_t PhysicalAddress; support::ubig32_t VirtualAddress; support::ubig32_t SectionSize; @@ -78,7 +63,7 @@ struct XCOFFSectionHeader32 { }; struct XCOFFSectionHeader64 { - char Name[XCOFF::SectionNameSize]; + char Name[XCOFF::NameSize]; support::ubig64_t PhysicalAddress; support::ubig64_t VirtualAddress; support::ubig64_t SectionSize; @@ -106,7 +91,7 @@ struct XCOFFSymbolEntry { } CFileLanguageIdAndTypeIdType; union { - char SymbolName[XCOFF::SymbolNameSize]; + char SymbolName[XCOFF::NameSize]; NameInStrTblType NameInStrTbl; }; @@ -127,6 +112,75 @@ struct XCOFFStringTable { const char *Data; }; +struct XCOFFCsectAuxEnt32 { + support::ubig32_t + SectionOrLength; // If the symbol type is XTY_SD or XTY_CM, the csect + // length. + // If the symbol type is XTY_LD, the symbol table + // index of the containing csect. + // If the symbol type is XTY_ER, 0. + support::ubig32_t ParameterHashIndex; + support::ubig16_t TypeChkSectNum; + uint8_t SymbolAlignmentAndType; + XCOFF::StorageMappingClass StorageMappingClass; + support::ubig32_t StabInfoIndex; + support::ubig16_t StabSectNum; +}; + +struct XCOFFFileAuxEnt { + typedef struct { + support::big32_t Magic; // Zero indicates name in string table. + support::ubig32_t Offset; + char NamePad[XCOFF::FileNamePadSize]; + } NameInStrTblType; + union { + char Name[XCOFF::NameSize + XCOFF::FileNamePadSize]; + NameInStrTblType NameInStrTbl; + }; + XCOFF::CFileStringType Type; + uint8_t ReservedZeros[2]; + uint8_t AuxType; // 64-bit XCOFF file only. +}; + +struct XCOFFSectAuxEntForStat { + support::ubig32_t SectionLength; + support::ubig16_t NumberOfRelocEnt; + support::ubig16_t NumberOfLineNum; + uint8_t Pad[10]; +}; + +struct XCOFFRelocation32 { + // Masks for packing/unpacking the r_rsize field of relocations. + + // The msb is used to indicate if the bits being relocated are signed or + // unsigned. + static constexpr uint8_t XR_SIGN_INDICATOR_MASK = 0x80; + + // The 2nd msb is used to indicate that the binder has replaced/modified the + // original instruction. + static constexpr uint8_t XR_FIXUP_INDICATOR_MASK = 0x40; + + // The remaining bits specify the bit length of the relocatable reference + // minus one. + static constexpr uint8_t XR_BIASED_LENGTH_MASK = 0x3f; + +public: + support::ubig32_t VirtualAddress; + support::ubig32_t SymbolIndex; + + // Packed field, see XR_* masks for details of packing. + uint8_t Info; + + XCOFF::RelocationType Type; + +public: + bool isRelocationSigned() const; + bool isFixupIndicated() const; + + // Returns the number of bits being relocated. + uint8_t getRelocatedLength() const; +}; + class XCOFFObjectFile : public ObjectFile { private: const void *FileHeader = nullptr; @@ -146,18 +200,18 @@ private: const XCOFFSectionHeader32 *toSection32(DataRefImpl Ref) const; const XCOFFSectionHeader64 *toSection64(DataRefImpl Ref) const; - void checkSectionAddress(uintptr_t Addr, uintptr_t TableAddr) const; uintptr_t getSectionHeaderTableAddress() const; + uintptr_t getEndOfSymbolTableAddress() const; // This returns a pointer to the start of the storage for the name field of // the 32-bit or 64-bit SectionHeader struct. This string is *not* necessarily // null-terminated. const char *getSectionNameInternal(DataRefImpl Sec) const; - int32_t getSectionFlags(DataRefImpl Sec) const; + // This function returns string table entry. + Expected getStringTableEntry(uint32_t Offset) const; static bool isReservedSectionNumber(int16_t SectionNumber); - Expected getSectionByNum(int16_t Num) const; // Constructor and "create" factory function. The constructor is only a thin // wrapper around the base constructor. The "create" function fills out the @@ -175,6 +229,8 @@ private: friend Expected> ObjectFile::createXCOFFObjectFile(MemoryBufferRef Object, unsigned FileType); + void checkSectionAddress(uintptr_t Addr, uintptr_t TableAddr) const; + public: // Interface inherited from base classes. void moveSymbolNext(DataRefImpl &Symb) const override; @@ -253,15 +309,49 @@ public: uint32_t getLogicalNumberOfSymbolTableEntries32() const; uint32_t getNumberOfSymbolTableEntries64() const; + uint32_t getSymbolIndex(uintptr_t SymEntPtr) const; + Expected getSymbolNameByIndex(uint32_t SymbolTableIndex) const; + Expected getCFileName(const XCOFFFileAuxEnt *CFileEntPtr) const; uint16_t getOptionalHeaderSize() const; uint16_t getFlags() const; // Section header table related interfaces. ArrayRef sections32() const; ArrayRef sections64() const; + + int32_t getSectionFlags(DataRefImpl Sec) const; + Expected getSectionByNum(int16_t Num) const; + + void checkSymbolEntryPointer(uintptr_t SymbolEntPtr) const; + + // Relocation-related interfaces. + Expected + getLogicalNumberOfRelocationEntries(const XCOFFSectionHeader32 &Sec) const; + + Expected> + relocations(const XCOFFSectionHeader32 &) const; }; // XCOFFObjectFile +class XCOFFSymbolRef { + const DataRefImpl SymEntDataRef; + const XCOFFObjectFile *const OwningObjectPtr; + +public: + XCOFFSymbolRef(DataRefImpl SymEntDataRef, + const XCOFFObjectFile *OwningObjectPtr) + : SymEntDataRef(SymEntDataRef), OwningObjectPtr(OwningObjectPtr){}; + + XCOFF::StorageClass getStorageClass() const; + uint8_t getNumberOfAuxEntries() const; + const XCOFFCsectAuxEnt32 *getXCOFFCsectAuxEnt32() const; + uint16_t getType() const; + int16_t getSectionNumber() const; + + bool hasCsectAuxEnt() const; + bool isFunction() const; +}; + } // namespace object } // namespace llvm diff --git a/include/llvm/ObjectYAML/DWARFYAML.h b/include/llvm/ObjectYAML/DWARFYAML.h index 78d736c3ef0..525fd9a8924 100644 --- a/include/llvm/ObjectYAML/DWARFYAML.h +++ b/include/llvm/ObjectYAML/DWARFYAML.h @@ -234,7 +234,7 @@ template <> struct MappingTraits { static void mapping(IO &IO, DWARFYAML::InitialLength &DWARF); }; -#define HANDLE_DW_TAG(unused, name, unused2, unused3) \ +#define HANDLE_DW_TAG(unused, name, unused2, unused3, unused4) \ io.enumCase(value, "DW_TAG_" #name, dwarf::DW_TAG_##name); template <> struct ScalarEnumerationTraits { diff --git a/include/llvm/ObjectYAML/ELFYAML.h b/include/llvm/ObjectYAML/ELFYAML.h index f4212516f48..0898a0e7d53 100644 --- a/include/llvm/ObjectYAML/ELFYAML.h +++ b/include/llvm/ObjectYAML/ELFYAML.h @@ -25,6 +25,8 @@ namespace llvm { namespace ELFYAML { +StringRef dropUniqueSuffix(StringRef S); + // These types are invariant across 32/64-bit ELF, so for simplicity just // directly give them their exact sizes. We don't need to worry about // endianness because these are just the types in the YAMLIO structures, @@ -54,8 +56,6 @@ LLVM_YAML_STRONG_TYPEDEF(uint64_t, ELF_SHF) LLVM_YAML_STRONG_TYPEDEF(uint16_t, ELF_SHN) LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STB) LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STT) -LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STV) -LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STO) LLVM_YAML_STRONG_TYPEDEF(uint8_t, MIPS_AFL_REG) LLVM_YAML_STRONG_TYPEDEF(uint8_t, MIPS_ABI_FP) @@ -77,7 +77,7 @@ struct FileHeader { llvm::yaml::Hex64 Entry; Optional SHEntSize; - Optional SHOffset; + Optional SHOff; Optional SHNum; Optional SHStrNdx; }; @@ -107,7 +107,7 @@ struct Symbol { ELF_STB Binding; llvm::yaml::Hex64 Value; llvm::yaml::Hex64 Size; - uint8_t Other; + Optional Other; }; struct SectionOrType { @@ -119,6 +119,11 @@ struct DynamicEntry { llvm::yaml::Hex64 Val; }; +struct StackSizeEntry { + llvm::yaml::Hex64 Address; + llvm::yaml::Hex64 Size; +}; + struct Section { enum class SectionKind { Dynamic, @@ -126,10 +131,14 @@ struct Section { RawContent, Relocation, NoBits, + Hash, Verdef, Verneed, + StackSizes, + SymtabShndxSection, Symver, - MipsABIFlags + MipsABIFlags, + Addrsig }; SectionKind Kind; StringRef Name; @@ -140,16 +149,44 @@ struct Section { llvm::yaml::Hex64 AddressAlign; Optional EntSize; + // Usually sections are not created implicitly, but loaded from YAML. + // When they are, this flag is used to signal about that. + bool IsImplicit; + + Section(SectionKind Kind, bool IsImplicit = false) + : Kind(Kind), IsImplicit(IsImplicit) {} + virtual ~Section(); + + // The following members are used to override section fields which is + // useful for creating invalid objects. + + // This can be used to override the offset stored in the sh_name field. + // It does not affect the name stored in the string table. + Optional ShName; + // This can be used to override the sh_offset field. It does not place the - // section data at the offset specified. Useful for creating invalid objects. + // section data at the offset specified. Optional ShOffset; // This can be used to override the sh_size field. It does not affect the // content written. Optional ShSize; +}; - Section(SectionKind Kind) : Kind(Kind) {} - virtual ~Section(); +struct StackSizesSection : Section { + Optional Content; + Optional Size; + Optional> Entries; + + StackSizesSection() : Section(SectionKind::StackSizes) {} + + static bool classof(const Section *S) { + return S->Kind == SectionKind::StackSizes; + } + + static bool nameMatches(StringRef Name) { + return Name == ".stack_sizes"; + } }; struct DynamicSection : Section { @@ -185,6 +222,17 @@ struct NoBitsSection : Section { } }; +struct HashSection : Section { + Optional Content; + Optional Size; + Optional> Bucket; + Optional> Chain; + + HashSection() : Section(SectionKind::Hash) {} + + static bool classof(const Section *S) { return S->Kind == SectionKind::Hash; } +}; + struct VernauxEntry { uint32_t Hash; uint16_t Flags; @@ -209,6 +257,26 @@ struct VerneedSection : Section { } }; +struct AddrsigSymbol { + AddrsigSymbol(StringRef N) : Name(N), Index(None) {} + AddrsigSymbol(llvm::yaml::Hex32 Ndx) : Name(None), Index(Ndx) {} + AddrsigSymbol() : Name(None), Index(None) {} + + Optional Name; + Optional Index; +}; + +struct AddrsigSection : Section { + Optional Content; + Optional Size; + Optional> Symbols; + + AddrsigSection() : Section(SectionKind::Addrsig) {} + static bool classof(const Section *S) { + return S->Kind == SectionKind::Addrsig; + } +}; + struct SymverSection : Section { std::vector Entries; @@ -269,6 +337,16 @@ struct RelocationSection : Section { } }; +struct SymtabShndxSection : Section { + std::vector Entries; + + SymtabShndxSection() : Section(SectionKind::SymtabShndxSection) {} + + static bool classof(const Section *S) { + return S->Kind == SectionKind::SymtabShndxSection; + } +}; + // Represents .MIPS.abiflags section struct MipsABIFlags : Section { llvm::yaml::Hex16 Version; @@ -298,13 +376,15 @@ struct Object { // cleaner and nicer if we read them from the YAML as a separate // top-level key, which automatically ensures that invariants like there // being a single SHT_SYMTAB section are upheld. - std::vector Symbols; + Optional> Symbols; std::vector DynamicSymbols; }; } // end namespace ELFYAML } // end namespace llvm +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::AddrsigSymbol) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::StackSizeEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::DynamicEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::ProgramHeader) LLVM_YAML_IS_SEQUENCE_VECTOR(std::unique_ptr) @@ -380,16 +460,6 @@ struct ScalarEnumerationTraits { static void enumeration(IO &IO, ELFYAML::ELF_STT &Value); }; -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &IO, ELFYAML::ELF_STV &Value); -}; - -template <> -struct ScalarBitSetTraits { - static void bitset(IO &IO, ELFYAML::ELF_STO &Value); -}; - template <> struct ScalarEnumerationTraits { static void enumeration(IO &IO, ELFYAML::ELF_REL &Value); @@ -450,6 +520,10 @@ struct MappingTraits { static StringRef validate(IO &IO, ELFYAML::Symbol &Symbol); }; +template <> struct MappingTraits { + static void mapping(IO &IO, ELFYAML::StackSizeEntry &Rel); +}; + template <> struct MappingTraits { static void mapping(IO &IO, ELFYAML::DynamicEntry &Rel); }; @@ -466,6 +540,10 @@ template <> struct MappingTraits { static void mapping(IO &IO, ELFYAML::VernauxEntry &E); }; +template <> struct MappingTraits { + static void mapping(IO &IO, ELFYAML::AddrsigSymbol &Sym); +}; + template <> struct MappingTraits { static void mapping(IO &IO, ELFYAML::Relocation &Rel); }; diff --git a/include/llvm/ObjectYAML/MachOYAML.h b/include/llvm/ObjectYAML/MachOYAML.h index d7e1c033f43..327c3b9f892 100644 --- a/include/llvm/ObjectYAML/MachOYAML.h +++ b/include/llvm/ObjectYAML/MachOYAML.h @@ -18,6 +18,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/ObjectYAML/DWARFYAML.h" +#include "llvm/ObjectYAML/YAML.h" #include "llvm/Support/YAMLTraits.h" #include #include @@ -39,6 +40,7 @@ struct Section { llvm::yaml::Hex32 reserved1; llvm::yaml::Hex32 reserved2; llvm::yaml::Hex32 reserved3; + Optional content; }; struct FileHeader { @@ -198,6 +200,7 @@ template <> struct MappingTraits { template <> struct MappingTraits { static void mapping(IO &IO, MachOYAML::Section &Section); + static StringRef validate(IO &io, MachOYAML::Section &Section); }; template <> struct MappingTraits { diff --git a/include/llvm/ObjectYAML/MinidumpYAML.h b/include/llvm/ObjectYAML/MinidumpYAML.h index 39fdd62e017..c1711a28dd8 100644 --- a/include/llvm/ObjectYAML/MinidumpYAML.h +++ b/include/llvm/ObjectYAML/MinidumpYAML.h @@ -26,6 +26,8 @@ namespace MinidumpYAML { /// from Types to Kinds is fixed and given by the static getKind function. struct Stream { enum class StreamKind { + Exception, + MemoryInfoList, MemoryList, ModuleList, RawContent, @@ -102,6 +104,45 @@ using ModuleListStream = detail::ListStream; using ThreadListStream = detail::ListStream; using MemoryListStream = detail::ListStream; +/// ExceptionStream minidump stream. +struct ExceptionStream : public Stream { + minidump::ExceptionStream MDExceptionStream; + yaml::BinaryRef ThreadContext; + + ExceptionStream() + : Stream(StreamKind::Exception, minidump::StreamType::Exception), + MDExceptionStream({}) {} + + explicit ExceptionStream(const minidump::ExceptionStream &MDExceptionStream, + ArrayRef ThreadContext) + : Stream(StreamKind::Exception, minidump::StreamType::Exception), + MDExceptionStream(MDExceptionStream), ThreadContext(ThreadContext) {} + + static bool classof(const Stream *S) { + return S->Kind == StreamKind::Exception; + } +}; + +/// A structure containing the list of MemoryInfo entries comprising a +/// MemoryInfoList stream. +struct MemoryInfoListStream : public Stream { + std::vector Infos; + + MemoryInfoListStream() + : Stream(StreamKind::MemoryInfoList, + minidump::StreamType::MemoryInfoList) {} + + explicit MemoryInfoListStream( + iterator_range Range) + : Stream(StreamKind::MemoryInfoList, + minidump::StreamType::MemoryInfoList), + Infos(Range.begin(), Range.end()) {} + + static bool classof(const Stream *S) { + return S->Kind == StreamKind::MemoryInfoList; + } +}; + /// A minidump stream represented as a sequence of hex bytes. This is used as a /// fallback when no other stream kind is suitable. struct RawContentStream : public Stream { @@ -122,16 +163,16 @@ struct SystemInfoStream : public Stream { minidump::SystemInfo Info; std::string CSDVersion; - explicit SystemInfoStream(const minidump::SystemInfo &Info, - std::string CSDVersion) - : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo), - Info(Info), CSDVersion(std::move(CSDVersion)) {} - SystemInfoStream() : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo) { memset(&Info, 0, sizeof(Info)); } + explicit SystemInfoStream(const minidump::SystemInfo &Info, + std::string CSDVersion) + : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo), + Info(Info), CSDVersion(std::move(CSDVersion)) {} + static bool classof(const Stream *S) { return S->Kind == StreamKind::SystemInfo; } @@ -177,12 +218,6 @@ struct Object { static Expected create(const object::MinidumpFile &File); }; -/// Serialize the minidump file represented by Obj to OS in binary form. -void writeAsBinary(Object &Obj, raw_ostream &OS); - -/// Serialize the yaml string as a minidump file to OS in binary form. -Error writeAsBinary(StringRef Yaml, raw_ostream &OS); - } // namespace MinidumpYAML namespace yaml { @@ -213,6 +248,10 @@ template <> struct MappingContextTraits { } // namespace llvm +LLVM_YAML_DECLARE_BITSET_TRAITS(llvm::minidump::MemoryProtection) +LLVM_YAML_DECLARE_BITSET_TRAITS(llvm::minidump::MemoryState) +LLVM_YAML_DECLARE_BITSET_TRAITS(llvm::minidump::MemoryType) + LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::ProcessorArchitecture) LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::OSPlatform) LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::StreamType) @@ -220,6 +259,8 @@ LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::StreamType) LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::ArmInfo) LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::OtherInfo) LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::X86Info) +LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::Exception) +LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::MemoryInfo) LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::VSFixedFileInfo) LLVM_YAML_DECLARE_MAPPING_TRAITS( @@ -233,6 +274,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(std::unique_ptr) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::MemoryListStream::entry_type) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::ModuleListStream::entry_type) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::ThreadListStream::entry_type) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::minidump::MemoryInfo) LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::MinidumpYAML::Object) diff --git a/include/llvm/ObjectYAML/WasmYAML.h b/include/llvm/ObjectYAML/WasmYAML.h index 2411dc7ac17..15a8cc21502 100644 --- a/include/llvm/ObjectYAML/WasmYAML.h +++ b/include/llvm/ObjectYAML/WasmYAML.h @@ -145,7 +145,7 @@ struct Signature { uint32_t Index; SignatureForm Form = wasm::WASM_TYPE_FUNC; std::vector ParamTypes; - ValueType ReturnType; + std::vector ReturnTypes; }; struct SymbolInfo { diff --git a/include/llvm/ObjectYAML/yaml2obj.h b/include/llvm/ObjectYAML/yaml2obj.h new file mode 100644 index 00000000000..386551337d8 --- /dev/null +++ b/include/llvm/ObjectYAML/yaml2obj.h @@ -0,0 +1,67 @@ +//===--- yaml2obj.h - -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// Common declarations for yaml2obj +//===----------------------------------------------------------------------===// +#ifndef LLVM_TOOLS_YAML2OBJ_YAML2OBJ_H +#define LLVM_TOOLS_YAML2OBJ_YAML2OBJ_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include + +namespace llvm { +class raw_ostream; +template class SmallVectorImpl; +template class Expected; + +namespace object { +class ObjectFile; +} + +namespace COFFYAML { +struct Object; +} + +namespace ELFYAML { +struct Object; +} + +namespace MinidumpYAML { +struct Object; +} + +namespace WasmYAML { +struct Object; +} + +namespace yaml { +class Input; +struct YamlObjectFile; + +using ErrorHandler = llvm::function_ref; + +bool yaml2coff(COFFYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH); +bool yaml2elf(ELFYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH); +bool yaml2macho(YamlObjectFile &Doc, raw_ostream &Out, ErrorHandler EH); +bool yaml2minidump(MinidumpYAML::Object &Doc, raw_ostream &Out, + ErrorHandler EH); +bool yaml2wasm(WasmYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH); + +bool convertYAML(Input &YIn, raw_ostream &Out, ErrorHandler ErrHandler, + unsigned DocNum = 1); + +/// Convenience function for tests. +std::unique_ptr +yaml2ObjectFile(SmallVectorImpl &Storage, StringRef Yaml, + ErrorHandler ErrHandler); + +} // namespace yaml +} // namespace llvm + +#endif diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h index 329f7eaba73..1d53ae32cf3 100644 --- a/include/llvm/Pass.h +++ b/include/llvm/Pass.h @@ -306,6 +306,9 @@ protected: }; //===----------------------------------------------------------------------===// +/// Deprecated - do not create new passes as BasicBlockPasses. Use FunctionPass +/// with a loop over the BasicBlocks instead. +// /// BasicBlockPass class - This class is used to implement most local /// optimizations. Optimizations should subclass this class if they /// meet the following constraints: @@ -338,6 +341,8 @@ public: /// do any post processing needed after all passes have run. virtual bool doFinalization(Function &); + void preparePassManager(PMStack &PMS) override; + void assignPassManager(PMStack &PMS, PassManagerType T) override; /// Return what kind of Pass Manager can manage this pass. diff --git a/include/llvm/Passes/PassBuilder.h b/include/llvm/Passes/PassBuilder.h index 5e6660599f9..f73e4b42dd4 100644 --- a/include/llvm/Passes/PassBuilder.h +++ b/include/llvm/Passes/PassBuilder.h @@ -629,6 +629,12 @@ public: TopLevelPipelineParsingCallbacks.push_back(C); } + /// Add PGOInstrumenation passes for O0 only. + void addPGOInstrPassesForO0(ModulePassManager &MPM, bool DebugLogging, + bool RunProfileGen, bool IsCS, + std::string ProfileFile, + std::string ProfileRemappingFile); + private: static Optional> parsePipelineText(StringRef Text); @@ -660,7 +666,6 @@ private: OptimizationLevel Level, bool RunProfileGen, bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile); - void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel); // Extension Point callbacks diff --git a/include/llvm/ProfileData/Coverage/CoverageMapping.h b/include/llvm/ProfileData/Coverage/CoverageMapping.h index 11758ac4cf2..0dd0c7ec806 100644 --- a/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -301,7 +301,12 @@ public: struct FunctionRecord { /// Raw function name. std::string Name; - /// Associated files. + /// Mapping from FileID (i.e. vector index) to filename. Used to support + /// macro expansions within a function in which the macro and function are + /// defined in separate files. + /// + /// TODO: Uniquing filenames across all function records may be a performance + /// optimization. std::vector Filenames; /// Regions in the function along with their counts. std::vector CountedRegions; @@ -508,6 +513,7 @@ public: class CoverageMapping { DenseMap> RecordProvenance; std::vector Functions; + DenseMap> FilenameHash2RecordIndices; std::vector> FuncHashMismatches; CoverageMapping() = default; @@ -516,6 +522,13 @@ class CoverageMapping { Error loadFunctionRecord(const CoverageMappingRecord &Record, IndexedInstrProfReader &ProfileReader); + /// Look up the indices for function records which are at least partially + /// defined in the specified file. This is guaranteed to return a superset of + /// such records: extra records not in the file may be included if there is + /// a hash collision on the filename. Clients must be robust to collisions. + ArrayRef + getImpreciseRecordIndicesForFilename(StringRef Filename) const; + public: CoverageMapping(const CoverageMapping &) = delete; CoverageMapping &operator=(const CoverageMapping &) = delete; @@ -527,6 +540,7 @@ public: /// Load the coverage mapping from the given object files and profile. If /// \p Arches is non-empty, it must specify an architecture for each object. + /// Ignores non-instrumented object files unless all are not instrumented. static Expected> load(ArrayRef ObjectFilenames, StringRef ProfileFilename, ArrayRef Arches = None); diff --git a/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h b/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h index 5f88cacdfcb..6fcd8a09a49 100644 --- a/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h +++ b/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h @@ -30,8 +30,7 @@ class CoverageFilenamesSectionWriter { ArrayRef Filenames; public: - CoverageFilenamesSectionWriter(ArrayRef Filenames) - : Filenames(Filenames) {} + CoverageFilenamesSectionWriter(ArrayRef Filenames); /// Write encoded filenames to the given output stream. void write(raw_ostream &OS); diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h index c7d764ade30..c26f7694999 100644 --- a/include/llvm/ProfileData/InstrProf.h +++ b/include/llvm/ProfileData/InstrProf.h @@ -93,10 +93,6 @@ inline StringRef getInstrProfValuesVarPrefix() { return "__profvp_"; } /// Return the name of value profile node array variables: inline StringRef getInstrProfVNodesVarName() { return "__llvm_prf_vnodes"; } -/// Return the name prefix of the COMDAT group for instrumentation variables -/// associated with a COMDAT function. -inline StringRef getInstrProfComdatPrefix() { return "__profv_"; } - /// Return the name of the variable holding the strings (possibly compressed) /// of all function's PGO names. inline StringRef getInstrProfNamesVarName() { @@ -634,8 +630,8 @@ struct OverlapStats { FuncHash = Hash; } - Error accumuateCounts(const std::string &BaseFilename, - const std::string &TestFilename, bool IsCS); + Error accumulateCounts(const std::string &BaseFilename, + const std::string &TestFilename, bool IsCS); void addOneMismatch(const CountSumOrPercent &MismatchFunc); void addOneUnique(const CountSumOrPercent &UniqueFunc); @@ -695,7 +691,7 @@ struct InstrProfRecord { InstrProfRecord(const InstrProfRecord &RHS) : Counts(RHS.Counts), ValueData(RHS.ValueData - ? llvm::make_unique(*RHS.ValueData) + ? std::make_unique(*RHS.ValueData) : nullptr) {} InstrProfRecord &operator=(InstrProfRecord &&) = default; InstrProfRecord &operator=(const InstrProfRecord &RHS) { @@ -705,7 +701,7 @@ struct InstrProfRecord { return *this; } if (!ValueData) - ValueData = llvm::make_unique(*RHS.ValueData); + ValueData = std::make_unique(*RHS.ValueData); else *ValueData = *RHS.ValueData; return *this; @@ -772,7 +768,7 @@ struct InstrProfRecord { void clearValueData() { ValueData = nullptr; } /// Compute the sums of all counts and store in Sum. - void accumuateCounts(CountSumOrPercent &Sum) const; + void accumulateCounts(CountSumOrPercent &Sum) const; /// Compute the overlap b/w this IntrprofRecord and Other. void overlap(InstrProfRecord &Other, OverlapStats &Overlap, @@ -817,7 +813,7 @@ private: std::vector & getOrCreateValueSitesForKind(uint32_t ValueKind) { if (!ValueData) - ValueData = llvm::make_unique(); + ValueData = std::make_unique(); switch (ValueKind) { case IPVK_IndirectCallTarget: return ValueData->IndirectCallSites; @@ -897,7 +893,7 @@ InstrProfRecord::getValueForSite(uint32_t ValueKind, uint32_t Site, return std::unique_ptr(nullptr); } - auto VD = llvm::make_unique(N); + auto VD = std::make_unique(N); TotalCount = getValueForSite(VD.get(), ValueKind, Site); return VD; diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h index 73751faab88..f5f552672bf 100644 --- a/include/llvm/ProfileData/InstrProfReader.h +++ b/include/llvm/ProfileData/InstrProfReader.h @@ -92,7 +92,7 @@ public: virtual InstrProfSymtab &getSymtab() = 0; /// Compute the sum of counts and return in Sum. - void accumuateCounts(CountSumOrPercent &Sum, bool IsCS); + void accumulateCounts(CountSumOrPercent &Sum, bool IsCS); protected: std::unique_ptr Symtab; @@ -268,8 +268,14 @@ private: return (const char *)ValueDataStart; } - const uint64_t *getCounter(IntPtrT CounterPtr) const { - ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t); + /// Get the offset of \p CounterPtr from the start of the counters section of + /// the profile. The offset has units of "number of counters", i.e. increasing + /// the offset by 1 corresponds to an increase in the *byte offset* by 8. + ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const { + return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t); + } + + const uint64_t *getCounter(ptrdiff_t Offset) const { return CountersStart + Offset; } diff --git a/include/llvm/ProfileData/SampleProf.h b/include/llvm/ProfileData/SampleProf.h index 7fbc857b723..55418d9d0f9 100644 --- a/include/llvm/ProfileData/SampleProf.h +++ b/include/llvm/ProfileData/SampleProf.h @@ -18,15 +18,18 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include #include #include +#include #include #include #include @@ -49,7 +52,10 @@ enum class sampleprof_error { truncated_name_table, not_implemented, counter_overflow, - ostream_seek_unsupported + ostream_seek_unsupported, + compress_failed, + uncompress_failed, + zlib_unavailable }; inline std::error_code make_error_code(sampleprof_error E) { @@ -83,6 +89,7 @@ enum SampleProfileFormat { SPF_Text = 0x1, SPF_Compact_Binary = 0x2, SPF_GCC = 0x3, + SPF_Ext_Binary = 0x4, SPF_Binary = 0xff }; @@ -105,6 +112,61 @@ static inline StringRef getRepInFormat(StringRef Name, static inline uint64_t SPVersion() { return 103; } +// Section Type used by SampleProfileExtBinaryBaseReader and +// SampleProfileExtBinaryBaseWriter. Never change the existing +// value of enum. Only append new ones. +enum SecType { + SecInValid = 0, + SecProfSummary = 1, + SecNameTable = 2, + SecProfileSymbolList = 3, + SecFuncOffsetTable = 4, + // marker for the first type of profile. + SecFuncProfileFirst = 32, + SecLBRProfile = SecFuncProfileFirst +}; + +static inline std::string getSecName(SecType Type) { + switch (Type) { + case SecInValid: + return "InvalidSection"; + case SecProfSummary: + return "ProfileSummarySection"; + case SecNameTable: + return "NameTableSection"; + case SecProfileSymbolList: + return "ProfileSymbolListSection"; + case SecFuncOffsetTable: + return "FuncOffsetTableSection"; + case SecLBRProfile: + return "LBRProfileSection"; + } + llvm_unreachable("A SecType has no name for output"); +} + +// Entry type of section header table used by SampleProfileExtBinaryBaseReader +// and SampleProfileExtBinaryBaseWriter. +struct SecHdrTableEntry { + SecType Type; + uint64_t Flags; + uint64_t Offset; + uint64_t Size; +}; + +enum SecFlags { SecFlagInValid = 0, SecFlagCompress = (1 << 0) }; + +static inline void addSecFlags(SecHdrTableEntry &Entry, uint64_t Flags) { + Entry.Flags |= Flags; +} + +static inline void removeSecFlags(SecHdrTableEntry &Entry, uint64_t Flags) { + Entry.Flags &= ~Flags; +} + +static inline bool hasSecFlag(SecHdrTableEntry &Entry, SecFlags Flag) { + return Entry.Flags & Flag; +} + /// Represents the relative location of an instruction. /// /// Instruction locations are specified by the line offset from the @@ -143,8 +205,18 @@ raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc); /// will be a list of one or more functions. class SampleRecord { public: - using CallTargetMap = StringMap; + using CallTarget = std::pair; + struct CallTargetComparator { + bool operator()(const CallTarget &LHS, const CallTarget &RHS) const { + if (LHS.second != RHS.second) + return LHS.second > RHS.second; + return LHS.first < RHS.first; + } + }; + + using SortedCallTargetSet = std::set; + using CallTargetMap = StringMap; SampleRecord() = default; /// Increment the number of samples for this record by \p S. @@ -179,6 +251,18 @@ public: uint64_t getSamples() const { return NumSamples; } const CallTargetMap &getCallTargets() const { return CallTargets; } + const SortedCallTargetSet getSortedCallTargets() const { + return SortCallTargets(CallTargets); + } + + /// Sort call targets in descending order of call frequency. + static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) { + SortedCallTargetSet SortedTargets; + for (const auto &I : Targets) { + SortedTargets.emplace(I.first(), I.second); + } + return SortedTargets; + } /// Merge the samples in \p Other into this record. /// Optionally scale sample counts by \p Weight. @@ -205,7 +289,7 @@ class FunctionSamples; using BodySampleMap = std::map; // NOTE: Using a StringMap here makes parsed profiles consume around 17% more // memory, which is *very* significant for large profiles. -using FunctionSamplesMap = std::map; +using FunctionSamplesMap = std::map>; using CallsiteSampleMap = std::map; /// Representation of the samples collected for a function. @@ -447,11 +531,10 @@ public: StringRef getNameInModule(StringRef Name, const Module *M) const { if (Format != SPF_Compact_Binary) return Name; - // Expect CurrentModule to be initialized by GUIDToFuncNameMapper. - if (M != CurrentModule) - llvm_unreachable("Input Module should be the same as CurrentModule"); - auto iter = GUIDToFuncNameMap.find(std::stoull(Name.data())); - if (iter == GUIDToFuncNameMap.end()) + + assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be popluated first"); + auto iter = GUIDToFuncNameMap->find(std::stoull(Name.data())); + if (iter == GUIDToFuncNameMap->end()) return StringRef(); return iter->second; } @@ -472,42 +555,10 @@ public: const FunctionSamples *findFunctionSamples(const DILocation *DIL) const; static SampleProfileFormat Format; + /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for - /// all the function symbols defined or declared in CurrentModule. - static DenseMap GUIDToFuncNameMap; - static Module *CurrentModule; - - class GUIDToFuncNameMapper { - public: - GUIDToFuncNameMapper(Module &M) { - if (Format != SPF_Compact_Binary) - return; - - for (const auto &F : M) { - StringRef OrigName = F.getName(); - GUIDToFuncNameMap.insert({Function::getGUID(OrigName), OrigName}); - /// Local to global var promotion used by optimization like thinlto - /// will rename the var and add suffix like ".llvm.xxx" to the - /// original local name. In sample profile, the suffixes of function - /// names are all stripped. Since it is possible that the mapper is - /// built in post-thin-link phase and var promotion has been done, - /// we need to add the substring of function name without the suffix - /// into the GUIDToFuncNameMap. - StringRef CanonName = getCanonicalFnName(F); - if (CanonName != OrigName) - GUIDToFuncNameMap.insert({Function::getGUID(CanonName), CanonName}); - } - CurrentModule = &M; - } - - ~GUIDToFuncNameMapper() { - if (Format != SPF_Compact_Binary) - return; - - GUIDToFuncNameMap.clear(); - CurrentModule = nullptr; - } - }; + /// all the function symbols defined or declared in current module. + DenseMap *GUIDToFuncNameMap = nullptr; // Assume the input \p Name is a name coming from FunctionSamples itself. // If the format is SPF_Compact_Binary, the name is already a GUID and we @@ -583,6 +634,47 @@ private: SamplesWithLocList V; }; +/// ProfileSymbolList records the list of function symbols shown up +/// in the binary used to generate the profile. It is useful to +/// to discriminate a function being so cold as not to shown up +/// in the profile and a function newly added. +class ProfileSymbolList { +public: + /// copy indicates whether we need to copy the underlying memory + /// for the input Name. + void add(StringRef Name, bool copy = false) { + if (!copy) { + Syms.insert(Name); + return; + } + Syms.insert(Name.copy(Allocator)); + } + + bool contains(StringRef Name) { return Syms.count(Name); } + + void merge(const ProfileSymbolList &List) { + for (auto Sym : List.Syms) + add(Sym, true); + } + + unsigned size() { return Syms.size(); } + + void setToCompress(bool TC) { ToCompress = TC; } + bool toCompress() { return ToCompress; } + + std::error_code read(const uint8_t *Data, uint64_t ListSize); + std::error_code write(raw_ostream &OS); + void dump(raw_ostream &OS = dbgs()) const; + +private: + // Determine whether or not to compress the symbol list when + // writing it into profile. The variable is unused when the symbol + // list is read from an existing profile. + bool ToCompress = false; + DenseSet Syms; + BumpPtrAllocator Allocator; +}; + } // end namespace sampleprof } // end namespace llvm diff --git a/include/llvm/ProfileData/SampleProfReader.h b/include/llvm/ProfileData/SampleProfReader.h index 969cdea859c..5a5d4cfde22 100644 --- a/include/llvm/ProfileData/SampleProfReader.h +++ b/include/llvm/ProfileData/SampleProfReader.h @@ -235,6 +235,62 @@ class raw_ostream; namespace sampleprof { +class SampleProfileReader; + +/// SampleProfileReaderItaniumRemapper remaps the profile data from a +/// sample profile data reader, by applying a provided set of equivalences +/// between components of the symbol names in the profile. +class SampleProfileReaderItaniumRemapper { +public: + SampleProfileReaderItaniumRemapper(std::unique_ptr B, + std::unique_ptr SRR, + SampleProfileReader &R) + : Buffer(std::move(B)), Remappings(std::move(SRR)), Reader(R) { + assert(Remappings && "Remappings cannot be nullptr"); + } + + /// Create a remapper from the given remapping file. The remapper will + /// be used for profile read in by Reader. + static ErrorOr> + create(const std::string Filename, SampleProfileReader &Reader, + LLVMContext &C); + + /// Create a remapper from the given Buffer. The remapper will + /// be used for profile read in by Reader. + static ErrorOr> + create(std::unique_ptr &B, SampleProfileReader &Reader, + LLVMContext &C); + + /// Apply remappings to the profile read by Reader. + void applyRemapping(LLVMContext &Ctx); + + bool hasApplied() { return RemappingApplied; } + + /// Insert function name into remapper. + void insert(StringRef FunctionName) { Remappings->insert(FunctionName); } + + /// Query whether there is equivalent in the remapper which has been + /// inserted. + bool exist(StringRef FunctionName) { + return Remappings->lookup(FunctionName); + } + + /// Return the samples collected for function \p F if remapper knows + /// it is present in SampleMap. + FunctionSamples *getSamplesFor(StringRef FunctionName); + +private: + // The buffer holding the content read from remapping file. + std::unique_ptr Buffer; + std::unique_ptr Remappings; + DenseMap SampleMap; + // The Reader the remapper is servicing. + SampleProfileReader &Reader; + // Indicate whether remapping has been applied to the profile read + // by Reader -- by calling applyRemapping. + bool RemappingApplied = false; +}; + /// Sample-based profile reader. /// /// Each profile contains sample counts for all the functions @@ -273,13 +329,22 @@ public: /// Read and validate the file header. virtual std::error_code readHeader() = 0; - /// Read sample profiles from the associated file. - virtual std::error_code read() = 0; + /// The interface to read sample profiles from the associated file. + std::error_code read() { + if (std::error_code EC = readImpl()) + return EC; + if (Remapper) + Remapper->applyRemapping(Ctx); + return sampleprof_error::success; + } + + /// The implementaion to read sample profiles from the associated file. + virtual std::error_code readImpl() = 0; /// Print the profile for \p FName on stream \p OS. void dumpFunctionProfile(StringRef FName, raw_ostream &OS = dbgs()); - virtual void collectFuncsToUse(const Module &M) {} + virtual void collectFuncsFrom(const Module &M) {} /// Print all the profiles on stream \p OS. void dump(raw_ostream &OS = dbgs()); @@ -295,6 +360,10 @@ public: /// Return the samples collected for function \p F. virtual FunctionSamples *getSamplesFor(StringRef Fname) { + if (Remapper) { + if (auto FS = Remapper->getSamplesFor(Fname)) + return FS; + } std::string FGUID; Fname = getRepInFormat(Fname, getFormat(), FGUID); auto It = Profiles.find(Fname); @@ -313,18 +382,33 @@ public: } /// Create a sample profile reader appropriate to the file format. + /// Create a remapper underlying if RemapFilename is not empty. static ErrorOr> - create(const Twine &Filename, LLVMContext &C); + create(const std::string Filename, LLVMContext &C, + const std::string RemapFilename = ""); /// Create a sample profile reader from the supplied memory buffer. + /// Create a remapper underlying if RemapFilename is not empty. static ErrorOr> - create(std::unique_ptr &B, LLVMContext &C); + create(std::unique_ptr &B, LLVMContext &C, + const std::string RemapFilename = ""); /// Return the profile summary. - ProfileSummary &getSummary() { return *(Summary.get()); } + ProfileSummary &getSummary() const { return *(Summary.get()); } + + MemoryBuffer *getBuffer() const { return Buffer.get(); } /// \brief Return the profile format. - SampleProfileFormat getFormat() { return Format; } + SampleProfileFormat getFormat() const { return Format; } + + virtual std::unique_ptr getProfileSymbolList() { + return nullptr; + }; + + /// It includes all the names that have samples either in outline instance + /// or inline instance. + virtual std::vector *getNameTable() { return nullptr; } + virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) { return false; }; protected: /// Map every function to its associated profile. @@ -352,6 +436,8 @@ protected: /// Compute summary for this profile. void computeSummary(); + std::unique_ptr Remapper; + /// \brief The format of sample. SampleProfileFormat Format = SPF_None; }; @@ -365,7 +451,7 @@ public: std::error_code readHeader() override { return sampleprof_error::success; } /// Read sample profiles from the associated file. - std::error_code read() override; + std::error_code readImpl() override; /// Return true if \p Buffer is in the format supported by this class. static bool hasFormat(const MemoryBuffer &Buffer); @@ -381,7 +467,11 @@ public: virtual std::error_code readHeader() override; /// Read sample profiles from the associated file. - std::error_code read() override; + std::error_code readImpl() override; + + /// It includes all the names that have samples either in outline instance + /// or inline instance. + virtual std::vector *getNameTable() override { return &NameTable; } protected: /// Read a numeric value of type T from the profile. @@ -411,48 +501,136 @@ protected: bool at_eof() const { return Data >= End; } /// Read the next function profile instance. - std::error_code readFuncProfile(); + std::error_code readFuncProfile(const uint8_t *Start); /// Read the contents of the given profile instance. std::error_code readProfile(FunctionSamples &FProfile); + /// Read the contents of Magic number and Version number. + std::error_code readMagicIdent(); + + /// Read profile summary. + std::error_code readSummary(); + + /// Read the whole name table. + virtual std::error_code readNameTable(); + /// Points to the current location in the buffer. const uint8_t *Data = nullptr; /// Points to the end of the buffer. const uint8_t *End = nullptr; + /// Function name table. + std::vector NameTable; + + /// Read a string indirectly via the name table. + virtual ErrorOr readStringFromTable(); + private: std::error_code readSummaryEntry(std::vector &Entries); virtual std::error_code verifySPMagic(uint64_t Magic) = 0; - - /// Read profile summary. - std::error_code readSummary(); - - /// Read the whole name table. - virtual std::error_code readNameTable() = 0; - - /// Read a string indirectly via the name table. - virtual ErrorOr readStringFromTable() = 0; }; class SampleProfileReaderRawBinary : public SampleProfileReaderBinary { private: - /// Function name table. - std::vector NameTable; virtual std::error_code verifySPMagic(uint64_t Magic) override; - virtual std::error_code readNameTable() override; - /// Read a string indirectly via the name table. - virtual ErrorOr readStringFromTable() override; public: - SampleProfileReaderRawBinary(std::unique_ptr B, LLVMContext &C) - : SampleProfileReaderBinary(std::move(B), C, SPF_Binary) {} + SampleProfileReaderRawBinary(std::unique_ptr B, LLVMContext &C, + SampleProfileFormat Format = SPF_Binary) + : SampleProfileReaderBinary(std::move(B), C, Format) {} /// \brief Return true if \p Buffer is in the format supported by this class. static bool hasFormat(const MemoryBuffer &Buffer); }; +/// SampleProfileReaderExtBinaryBase/SampleProfileWriterExtBinaryBase defines +/// the basic structure of the extensible binary format. +/// The format is organized in sections except the magic and version number +/// at the beginning. There is a section table before all the sections, and +/// each entry in the table describes the entry type, start, size and +/// attributes. The format in each section is defined by the section itself. +/// +/// It is easy to add a new section while maintaining the backward +/// compatibility of the profile. Nothing extra needs to be done. If we want +/// to extend an existing section, like add cache misses information in +/// addition to the sample count in the profile body, we can add a new section +/// with the extension and retire the existing section, and we could choose +/// to keep the parser of the old section if we want the reader to be able +/// to read both new and old format profile. +/// +/// SampleProfileReaderExtBinary/SampleProfileWriterExtBinary define the +/// commonly used sections of a profile in extensible binary format. It is +/// possible to define other types of profile inherited from +/// SampleProfileReaderExtBinaryBase/SampleProfileWriterExtBinaryBase. +class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary { +private: + std::error_code decompressSection(const uint8_t *SecStart, + const uint64_t SecSize, + const uint8_t *&DecompressBuf, + uint64_t &DecompressBufSize); + + BumpPtrAllocator Allocator; + +protected: + std::vector SecHdrTable; + std::unique_ptr ProfSymList; + std::error_code readSecHdrTableEntry(); + std::error_code readSecHdrTable(); + virtual std::error_code readHeader() override; + virtual std::error_code verifySPMagic(uint64_t Magic) override = 0; + virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size, + SecType Type) = 0; + +public: + SampleProfileReaderExtBinaryBase(std::unique_ptr B, + LLVMContext &C, SampleProfileFormat Format) + : SampleProfileReaderBinary(std::move(B), C, Format) {} + + /// Read sample profiles in extensible format from the associated file. + std::error_code readImpl() override; + + /// Get the total size of all \p Type sections. + uint64_t getSectionSize(SecType Type); + /// Get the total size of header and all sections. + uint64_t getFileSize(); + virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) override; +}; + +class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase { +private: + virtual std::error_code verifySPMagic(uint64_t Magic) override; + virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size, + SecType Type) override; + std::error_code readProfileSymbolList(); + std::error_code readFuncOffsetTable(); + std::error_code readFuncProfiles(); + + /// The table mapping from function name to the offset of its FunctionSample + /// towards file start. + DenseMap FuncOffsetTable; + /// The set containing the functions to use when compiling a module. + DenseSet FuncsToUse; + /// Use all functions from the input profile. + bool UseAllFuncs = true; + +public: + SampleProfileReaderExtBinary(std::unique_ptr B, LLVMContext &C, + SampleProfileFormat Format = SPF_Ext_Binary) + : SampleProfileReaderExtBinaryBase(std::move(B), C, Format) {} + + /// \brief Return true if \p Buffer is in the format supported by this class. + static bool hasFormat(const MemoryBuffer &Buffer); + + virtual std::unique_ptr getProfileSymbolList() override { + return std::move(ProfSymList); + }; + + /// Collect functions with definitions in Module \p M. + void collectFuncsFrom(const Module &M) override; +}; + class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary { private: /// Function name table. @@ -462,6 +640,8 @@ private: DenseMap FuncOffsetTable; /// The set containing the functions to use when compiling a module. DenseSet FuncsToUse; + /// Use all functions from the input profile. + bool UseAllFuncs = true; virtual std::error_code verifySPMagic(uint64_t Magic) override; virtual std::error_code readNameTable() override; /// Read a string indirectly via the name table. @@ -478,10 +658,10 @@ public: static bool hasFormat(const MemoryBuffer &Buffer); /// Read samples only for functions to use. - std::error_code read() override; + std::error_code readImpl() override; /// Collect functions to be used when compiling Module \p M. - void collectFuncsToUse(const Module &M) override; + void collectFuncsFrom(const Module &M) override; }; using InlineCallStack = SmallVector; @@ -509,7 +689,7 @@ public: std::error_code readHeader() override; /// Read sample profiles from the associated file. - std::error_code read() override; + std::error_code readImpl() override; /// Return true if \p Buffer is in the format supported by this class. static bool hasFormat(const MemoryBuffer &Buffer); @@ -537,44 +717,6 @@ protected: static const uint32_t GCOVTagAFDOFunction = 0xac000000; }; -/// A profile data reader proxy that remaps the profile data from another -/// sample profile data reader, by applying a provided set of equivalences -/// between components of the symbol names in the profile. -class SampleProfileReaderItaniumRemapper : public SampleProfileReader { -public: - SampleProfileReaderItaniumRemapper( - std::unique_ptr B, LLVMContext &C, - std::unique_ptr Underlying) - : SampleProfileReader(std::move(B), C, Underlying->getFormat()) { - Profiles = std::move(Underlying->getProfiles()); - Summary = takeSummary(*Underlying); - // Keep the underlying reader alive; the profile data may contain - // StringRefs referencing names in its name table. - UnderlyingReader = std::move(Underlying); - } - - /// Create a remapped sample profile from the given remapping file and - /// underlying samples. - static ErrorOr> - create(const Twine &Filename, LLVMContext &C, - std::unique_ptr Underlying); - - /// Read and validate the file header. - std::error_code readHeader() override { return sampleprof_error::success; } - - /// Read remapping file and apply it to the sample profile. - std::error_code read() override; - - /// Return the samples collected for function \p F. - FunctionSamples *getSamplesFor(StringRef FunctionName) override; - using SampleProfileReader::getSamplesFor; - -private: - SymbolRemappingReader Remappings; - DenseMap SampleMap; - std::unique_ptr UnderlyingReader; -}; - } // end namespace sampleprof } // end namespace llvm diff --git a/include/llvm/ProfileData/SampleProfWriter.h b/include/llvm/ProfileData/SampleProfWriter.h index 81e6e3ab0b4..cc951594c9e 100644 --- a/include/llvm/ProfileData/SampleProfWriter.h +++ b/include/llvm/ProfileData/SampleProfWriter.h @@ -36,7 +36,7 @@ public: /// Write sample profiles in \p S. /// /// \returns status code of the file update operation. - virtual std::error_code write(const FunctionSamples &S) = 0; + virtual std::error_code writeSample(const FunctionSamples &S) = 0; /// Write all the sample profiles in the given map of samples. /// @@ -56,6 +56,8 @@ public: static ErrorOr> create(std::unique_ptr &OS, SampleProfileFormat Format); + virtual void setProfileSymbolList(ProfileSymbolList *PSL) {} + protected: SampleProfileWriter(std::unique_ptr &OS) : OutputStream(std::move(OS)) {} @@ -64,6 +66,10 @@ protected: virtual std::error_code writeHeader(const StringMap &ProfileMap) = 0; + // Write function profiles to the profile file. + virtual std::error_code + writeFuncProfiles(const StringMap &ProfileMap); + /// Output stream where to emit the profile to. std::unique_ptr OutputStream; @@ -72,12 +78,15 @@ protected: /// Compute summary for this profile. void computeSummary(const StringMap &ProfileMap); + + /// Profile format. + SampleProfileFormat Format; }; /// Sample-based profile writer (text format). class SampleProfileWriterText : public SampleProfileWriter { public: - std::error_code write(const FunctionSamples &S) override; + std::error_code writeSample(const FunctionSamples &S) override; protected: SampleProfileWriterText(std::unique_ptr &OS) @@ -102,13 +111,14 @@ private: /// Sample-based profile writer (binary format). class SampleProfileWriterBinary : public SampleProfileWriter { public: - virtual std::error_code write(const FunctionSamples &S) override; SampleProfileWriterBinary(std::unique_ptr &OS) : SampleProfileWriter(OS) {} + virtual std::error_code writeSample(const FunctionSamples &S) override; + protected: - virtual std::error_code writeNameTable() = 0; - virtual std::error_code writeMagicIdent() = 0; + virtual std::error_code writeMagicIdent(SampleProfileFormat Format); + virtual std::error_code writeNameTable(); virtual std::error_code writeHeader(const StringMap &ProfileMap) override; std::error_code writeSummary(); @@ -118,10 +128,10 @@ protected: MapVector NameTable; -private: void addName(StringRef FName); void addNames(const FunctionSamples &S); +private: friend ErrorOr> SampleProfileWriter::create(std::unique_ptr &OS, SampleProfileFormat Format); @@ -129,10 +139,99 @@ private: class SampleProfileWriterRawBinary : public SampleProfileWriterBinary { using SampleProfileWriterBinary::SampleProfileWriterBinary; +}; + +class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary { + using SampleProfileWriterBinary::SampleProfileWriterBinary; +public: + virtual std::error_code + write(const StringMap &ProfileMap) override; + + void setToCompressAllSections(); + void setToCompressSection(SecType Type); protected: - virtual std::error_code writeNameTable() override; - virtual std::error_code writeMagicIdent() override; + uint64_t markSectionStart(SecType Type); + std::error_code addNewSection(SecType Sec, uint64_t SectionStart); + virtual void initSectionHdrLayout() = 0; + virtual std::error_code + writeSections(const StringMap &ProfileMap) = 0; + + // Specifiy the order of sections in section header table. Note + // the order of sections in the profile may be different that the + // order in SectionHdrLayout. sample Reader will follow the order + // in SectionHdrLayout to read each section. + SmallVector SectionHdrLayout; + +private: + void allocSecHdrTable(); + std::error_code writeSecHdrTable(); + virtual std::error_code + writeHeader(const StringMap &ProfileMap) override; + void addSectionFlags(SecType Type, SecFlags Flags); + SecHdrTableEntry &getEntryInLayout(SecType Type); + std::error_code compressAndOutput(); + + // We will swap the raw_ostream held by LocalBufStream and that + // held by OutputStream if we try to add a section which needs + // compression. After the swap, all the data written to output + // will be temporarily buffered into the underlying raw_string_ostream + // originally held by LocalBufStream. After the data writing for the + // section is completed, compress the data in the local buffer, + // swap the raw_ostream back and write the compressed data to the + // real output. + std::unique_ptr LocalBufStream; + // The location where the output stream starts. + uint64_t FileStart; + // The location in the output stream where the SecHdrTable should be + // written to. + uint64_t SecHdrTableOffset; + // Initial Section Flags setting. + std::vector SecHdrTable; +}; + +class SampleProfileWriterExtBinary : public SampleProfileWriterExtBinaryBase { +public: + SampleProfileWriterExtBinary(std::unique_ptr &OS) + : SampleProfileWriterExtBinaryBase(OS) { + initSectionHdrLayout(); + } + + virtual std::error_code writeSample(const FunctionSamples &S) override; + virtual void setProfileSymbolList(ProfileSymbolList *PSL) override { + ProfSymList = PSL; + }; + +private: + virtual void initSectionHdrLayout() override { + // Note that SecFuncOffsetTable section is written after SecLBRProfile + // in the profile, but is put before SecLBRProfile in SectionHdrLayout. + // + // This is because sample reader follows the order of SectionHdrLayout to + // read each section, to read function profiles on demand sample reader + // need to get the offset of each function profile first. + // + // SecFuncOffsetTable section is written after SecLBRProfile in the + // profile because FuncOffsetTable needs to be populated while section + // SecLBRProfile is written. + SectionHdrLayout = {{SecProfSummary, 0, 0, 0}, + {SecNameTable, 0, 0, 0}, + {SecFuncOffsetTable, 0, 0, 0}, + {SecLBRProfile, 0, 0, 0}, + {SecProfileSymbolList, 0, 0, 0}}; + }; + virtual std::error_code + writeSections(const StringMap &ProfileMap) override; + ProfileSymbolList *ProfSymList = nullptr; + + // Save the start of SecLBRProfile so we can compute the offset to the + // start of SecLBRProfile for each Function's Profile and will keep it + // in FuncOffsetTable. + uint64_t SecLBRProfileStart; + // FuncOffsetTable maps function name to its profile offset in SecLBRProfile + // section. It is used to load function profile on demand. + MapVector FuncOffsetTable; + std::error_code writeFuncOffsetTable(); }; // CompactBinary is a compact format of binary profile which both reduces @@ -169,7 +268,7 @@ class SampleProfileWriterCompactBinary : public SampleProfileWriterBinary { using SampleProfileWriterBinary::SampleProfileWriterBinary; public: - virtual std::error_code write(const FunctionSamples &S) override; + virtual std::error_code writeSample(const FunctionSamples &S) override; virtual std::error_code write(const StringMap &ProfileMap) override; @@ -181,7 +280,6 @@ protected: /// towards profile start. uint64_t TableOffset; virtual std::error_code writeNameTable() override; - virtual std::error_code writeMagicIdent() override; virtual std::error_code writeHeader(const StringMap &ProfileMap) override; std::error_code writeFuncOffsetTable(); diff --git a/include/llvm/Remarks/BitstreamRemarkContainer.h b/include/llvm/Remarks/BitstreamRemarkContainer.h new file mode 100644 index 00000000000..a2282fca04a --- /dev/null +++ b/include/llvm/Remarks/BitstreamRemarkContainer.h @@ -0,0 +1,106 @@ +//===-- BitstreamRemarkContainer.h - Container for remarks --------------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides declarations for things used in the various types of +// remark containers. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_REMARKS_REMARK_CONTAINER_H +#define LLVM_REMARKS_REMARK_CONTAINER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Bitstream/BitCodes.h" +#include + +namespace llvm { +namespace remarks { + +/// The current version of the remark container. +/// Note: this is different from the version of the remark entry. +constexpr uint64_t CurrentContainerVersion = 0; +/// The magic number used for identifying remark blocks. +constexpr StringLiteral ContainerMagic("RMRK"); + +/// Type of the remark container. +/// The remark container has two modes: +/// * separate: the metadata is separate from the remarks and points to the +/// auxiliary file that contains the remarks. +/// * standalone: the metadata and the remarks are emitted together. +enum class BitstreamRemarkContainerType { + /// The metadata emitted separately. + /// This will contain the following: + /// * Container version and type + /// * String table + /// * External file + SeparateRemarksMeta, + /// The remarks emitted separately. + /// This will contain the following: + /// * Container version and type + /// * Remark version + SeparateRemarksFile, + /// Everything is emitted together. + /// This will contain the following: + /// * Container version and type + /// * Remark version + /// * String table + Standalone, + First = SeparateRemarksMeta, + Last = Standalone, +}; + +/// The possible blocks that will be encountered in a bitstream remark +/// container. +enum BlockIDs { + /// The metadata block is mandatory. It should always come after the + /// BLOCKINFO_BLOCK, and contains metadata that should be used when parsing + /// REMARK_BLOCKs. + /// There should always be only one META_BLOCK. + META_BLOCK_ID = bitc::FIRST_APPLICATION_BLOCKID, + /// One remark entry is represented using a REMARK_BLOCK. There can be + /// multiple REMARK_BLOCKs in the same file. + REMARK_BLOCK_ID +}; + +constexpr StringRef MetaBlockName = StringRef("Meta", 4); +constexpr StringRef RemarkBlockName = StringRef("Remark", 6); + +/// The possible records that can be encountered in the previously described +/// blocks. +enum RecordIDs { + // Meta block records. + RECORD_META_CONTAINER_INFO = 1, + RECORD_META_REMARK_VERSION, + RECORD_META_STRTAB, + RECORD_META_EXTERNAL_FILE, + // Remark block records. + RECORD_REMARK_HEADER, + RECORD_REMARK_DEBUG_LOC, + RECORD_REMARK_HOTNESS, + RECORD_REMARK_ARG_WITH_DEBUGLOC, + RECORD_REMARK_ARG_WITHOUT_DEBUGLOC, + // Helpers. + RECORD_FIRST = RECORD_META_CONTAINER_INFO, + RECORD_LAST = RECORD_REMARK_ARG_WITHOUT_DEBUGLOC +}; + +constexpr StringRef MetaContainerInfoName = StringRef("Container info", 14); +constexpr StringRef MetaRemarkVersionName = StringRef("Remark version", 14); +constexpr StringRef MetaStrTabName = StringRef("String table", 12); +constexpr StringRef MetaExternalFileName = StringRef("External File", 13); +constexpr StringRef RemarkHeaderName = StringRef("Remark header", 13); +constexpr StringRef RemarkDebugLocName = StringRef("Remark debug location", 21); +constexpr StringRef RemarkHotnessName = StringRef("Remark hotness", 14); +constexpr StringRef RemarkArgWithDebugLocName = + StringRef("Argument with debug location", 28); +constexpr StringRef RemarkArgWithoutDebugLocName = StringRef("Argument", 8); + +} // end namespace remarks +} // end namespace llvm + +#endif /* LLVM_REMARKS_REMARK_CONTAINER_H */ diff --git a/include/llvm/Remarks/BitstreamRemarkParser.h b/include/llvm/Remarks/BitstreamRemarkParser.h new file mode 100644 index 00000000000..7ebd731693b --- /dev/null +++ b/include/llvm/Remarks/BitstreamRemarkParser.h @@ -0,0 +1,116 @@ +//===-- BitstreamRemarkParser.h - Bitstream parser --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides an implementation of the remark parser using the LLVM +// Bitstream format. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_REMARKS_BITSTREAM_REMARK_PARSER_H +#define LLVM_REMARKS_BITSTREAM_REMARK_PARSER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Bitstream/BitstreamReader.h" +#include "llvm/Remarks/BitstreamRemarkContainer.h" +#include "llvm/Remarks/Remark.h" +#include "llvm/Remarks/RemarkParser.h" +#include "llvm/Support/Error.h" +#include + +namespace llvm { +namespace remarks { + +/// Helper to parse a META_BLOCK for a bitstream remark container. +struct BitstreamMetaParserHelper { + /// The Bitstream reader. + BitstreamCursor &Stream; + /// Reference to the storage for the block info. + BitstreamBlockInfo &BlockInfo; + /// The parsed content: depending on the container type, some fields might be + /// empty. + Optional ContainerVersion; + Optional ContainerType; + Optional StrTabBuf; + Optional ExternalFilePath; + Optional RemarkVersion; + + /// Continue parsing with \p Stream. \p Stream is expected to contain a + /// ENTER_SUBBLOCK to the META_BLOCK at the current position. + /// \p Stream is expected to have a BLOCKINFO_BLOCK set. + BitstreamMetaParserHelper(BitstreamCursor &Stream, + BitstreamBlockInfo &BlockInfo); + + /// Parse the META_BLOCK and fill the available entries. + /// This helper does not check for the validity of the fields. + Error parse(); +}; + +/// Helper to parse a REMARK_BLOCK for a bitstream remark container. +struct BitstreamRemarkParserHelper { + /// The Bitstream reader. + BitstreamCursor &Stream; + /// The parsed content: depending on the remark, some fields might be empty. + Optional Type; + Optional RemarkNameIdx; + Optional PassNameIdx; + Optional FunctionNameIdx; + Optional SourceFileNameIdx; + Optional SourceLine; + Optional SourceColumn; + Optional Hotness; + struct Argument { + Optional KeyIdx; + Optional ValueIdx; + Optional SourceFileNameIdx; + Optional SourceLine; + Optional SourceColumn; + }; + Optional> Args; + /// Avoid re-allocating a vector every time. + SmallVector TmpArgs; + + /// Continue parsing with \p Stream. \p Stream is expected to contain a + /// ENTER_SUBBLOCK to the REMARK_BLOCK at the current position. + /// \p Stream is expected to have a BLOCKINFO_BLOCK set and to have already + /// parsed the META_BLOCK. + BitstreamRemarkParserHelper(BitstreamCursor &Stream); + + /// Parse the REMARK_BLOCK and fill the available entries. + /// This helper does not check for the validity of the fields. + Error parse(); +}; + +/// Helper to parse any bitstream remark container. +struct BitstreamParserHelper { + /// The Bitstream reader. + BitstreamCursor Stream; + /// The block info block. + BitstreamBlockInfo BlockInfo; + /// Start parsing at \p Buffer. + BitstreamParserHelper(StringRef Buffer); + /// Parse the magic number. + Expected> parseMagic(); + /// Parse the block info block containing all the abbrevs. + /// This needs to be called before calling any other parsing function. + Error parseBlockInfoBlock(); + /// Return true if the next block is a META_BLOCK. This function does not move + /// the cursor. + Expected isMetaBlock(); + /// Return true if the next block is a REMARK_BLOCK. This function does not + /// move the cursor. + Expected isRemarkBlock(); + /// Return true if the parser reached the end of the stream. + bool atEndOfStream() { return Stream.AtEndOfStream(); } + /// Jump to the end of the stream, skipping everything. + void skipToEnd() { return Stream.skipToEnd(); } +}; + +} // end namespace remarks +} // end namespace llvm + +#endif /* LLVM_REMARKS_BITSTREAM_REMARK_PARSER_H */ diff --git a/include/llvm/Remarks/BitstreamRemarkSerializer.h b/include/llvm/Remarks/BitstreamRemarkSerializer.h new file mode 100644 index 00000000000..62a175a1db0 --- /dev/null +++ b/include/llvm/Remarks/BitstreamRemarkSerializer.h @@ -0,0 +1,196 @@ +//===-- BitstreamRemarkSerializer.h - Bitstream serializer ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides an implementation of the serializer using the LLVM +// Bitstream format. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_REMARKS_BITSTREAM_REMARK_SERIALIZER_H +#define LLVM_REMARKS_BITSTREAM_REMARK_SERIALIZER_H + +#include "llvm/Bitstream/BitstreamWriter.h" +#include "llvm/Remarks/BitstreamRemarkContainer.h" +#include "llvm/Remarks/RemarkSerializer.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace remarks { + +/// Serialize the remarks to LLVM bitstream. +/// This class provides ways to emit remarks in the LLVM bitstream format and +/// its associated metadata. +/// +/// * The separate model: +/// Separate meta: | Container info +/// | String table +/// | External file +/// +/// Separate remarks: | Container info +/// | Remark version +/// | Remark0 +/// | Remark1 +/// | Remark2 +/// | ... +/// +/// * The standalone model: | Container info +/// | String table +/// | Remark version +/// | Remark0 +/// | Remark1 +/// | Remark2 +/// | ... +/// +struct BitstreamRemarkSerializerHelper { + /// Buffer used for encoding the bitstream before writing it to the final + /// stream. + SmallVector Encoded; + /// Buffer used to construct records and pass to the bitstream writer. + SmallVector R; + /// The Bitstream writer. + BitstreamWriter Bitstream; + /// The type of the container we are serializing. + BitstreamRemarkContainerType ContainerType; + + /// Abbrev IDs initialized in the block info block. + /// Note: depending on the container type, some IDs might be uninitialized. + /// Warning: When adding more abbrev IDs, make sure to update the + /// BlockCodeSize (in the call to EnterSubblock). + uint64_t RecordMetaContainerInfoAbbrevID = 0; + uint64_t RecordMetaRemarkVersionAbbrevID = 0; + uint64_t RecordMetaStrTabAbbrevID = 0; + uint64_t RecordMetaExternalFileAbbrevID = 0; + uint64_t RecordRemarkHeaderAbbrevID = 0; + uint64_t RecordRemarkDebugLocAbbrevID = 0; + uint64_t RecordRemarkHotnessAbbrevID = 0; + uint64_t RecordRemarkArgWithDebugLocAbbrevID = 0; + uint64_t RecordRemarkArgWithoutDebugLocAbbrevID = 0; + + BitstreamRemarkSerializerHelper(BitstreamRemarkContainerType ContainerType); + + // Disable copy and move: Bitstream points to Encoded, which needs special + // handling during copy/move, but moving the vectors is probably useless + // anyway. + BitstreamRemarkSerializerHelper(const BitstreamRemarkSerializerHelper &) = + delete; + BitstreamRemarkSerializerHelper & + operator=(const BitstreamRemarkSerializerHelper &) = delete; + BitstreamRemarkSerializerHelper(BitstreamRemarkSerializerHelper &&) = delete; + BitstreamRemarkSerializerHelper & + operator=(BitstreamRemarkSerializerHelper &&) = delete; + + /// Set up the necessary block info entries according to the container type. + void setupBlockInfo(); + + /// Set up the block info for the metadata block. + void setupMetaBlockInfo(); + /// The remark version in the metadata block. + void setupMetaRemarkVersion(); + void emitMetaRemarkVersion(uint64_t RemarkVersion); + /// The strtab in the metadata block. + void setupMetaStrTab(); + void emitMetaStrTab(const StringTable &StrTab); + /// The external file in the metadata block. + void setupMetaExternalFile(); + void emitMetaExternalFile(StringRef Filename); + + /// The block info for the remarks block. + void setupRemarkBlockInfo(); + + /// Emit the metadata for the remarks. + void emitMetaBlock(uint64_t ContainerVersion, + Optional RemarkVersion, + Optional StrTab = None, + Optional Filename = None); + + /// Emit a remark block. The string table is required. + void emitRemarkBlock(const Remark &Remark, StringTable &StrTab); + /// Finalize the writing to \p OS. + void flushToStream(raw_ostream &OS); + /// Finalize the writing to a buffer. + /// The contents of the buffer remain valid for the lifetime of the object. + /// Any call to any other function in this class will invalidate the buffer. + StringRef getBuffer(); +}; + +/// Implementation of the remark serializer using LLVM bitstream. +struct BitstreamRemarkSerializer : public RemarkSerializer { + /// The file should contain: + /// 1) The block info block that describes how to read the blocks. + /// 2) The metadata block that contains various information about the remarks + /// in the file. + /// 3) A number of remark blocks. + + /// We need to set up 1) and 2) first, so that we can emit 3) after. This flag + /// is used to emit the first two blocks only once. + bool DidSetUp = false; + /// The helper to emit bitstream. + BitstreamRemarkSerializerHelper Helper; + + /// Construct a serializer that will create its own string table. + BitstreamRemarkSerializer(raw_ostream &OS, SerializerMode Mode); + /// Construct a serializer with a pre-filled string table. + BitstreamRemarkSerializer(raw_ostream &OS, SerializerMode Mode, + StringTable StrTab); + + /// Emit a remark to the stream. This also emits the metadata associated to + /// the remarks based on the SerializerMode specified at construction. + /// This writes the serialized output to the provided stream. + void emit(const Remark &Remark) override; + /// The metadata serializer associated to this remark serializer. Based on the + /// container type of the current serializer, the container type of the + /// metadata serializer will change. + std::unique_ptr + metaSerializer(raw_ostream &OS, + Optional ExternalFilename = None) override; + + static bool classof(const RemarkSerializer *S) { + return S->SerializerFormat == Format::Bitstream; + } +}; + +/// Serializer of metadata for bitstream remarks. +struct BitstreamMetaSerializer : public MetaSerializer { + /// This class can be used with [1] a pre-constructed + /// BitstreamRemarkSerializerHelper, or with [2] one that is owned by the meta + /// serializer. In case of [1], we need to be able to store a reference to the + /// object, while in case of [2] we need to store the whole object. + Optional TmpHelper; + /// The actual helper, that can point to \p TmpHelper or to an external helper + /// object. + BitstreamRemarkSerializerHelper *Helper = nullptr; + + Optional StrTab; + Optional ExternalFilename; + + /// Create a new meta serializer based on \p ContainerType. + BitstreamMetaSerializer(raw_ostream &OS, + BitstreamRemarkContainerType ContainerType, + Optional StrTab = None, + Optional ExternalFilename = None) + : MetaSerializer(OS), TmpHelper(None), Helper(nullptr), StrTab(StrTab), + ExternalFilename(ExternalFilename) { + TmpHelper.emplace(ContainerType); + Helper = &*TmpHelper; + } + + /// Create a new meta serializer based on a previously built \p Helper. + BitstreamMetaSerializer(raw_ostream &OS, + BitstreamRemarkSerializerHelper &Helper, + Optional StrTab = None, + Optional ExternalFilename = None) + : MetaSerializer(OS), TmpHelper(None), Helper(&Helper), StrTab(StrTab), + ExternalFilename(ExternalFilename) {} + + void emit() override; +}; + +} // end namespace remarks +} // end namespace llvm + +#endif /* LLVM_REMARKS_BITSTREAM_REMARK_SERIALIZER_H */ diff --git a/include/llvm/Remarks/Remark.h b/include/llvm/Remarks/Remark.h index 05d0ea60acc..1243311fb8c 100644 --- a/include/llvm/Remarks/Remark.h +++ b/include/llvm/Remarks/Remark.h @@ -23,7 +23,8 @@ namespace llvm { namespace remarks { -constexpr uint64_t Version = 0; +/// The current version of the remark entry. +constexpr uint64_t CurrentRemarkVersion = 0; /// The debug location used to track a remark back to the source file. struct RemarkLocation { @@ -58,7 +59,8 @@ enum class Type { AnalysisFPCommute, AnalysisAliasing, Failure, - LastTypeValue = Failure + First = Unknown, + Last = Failure }; /// A remark type used for both emission and parsing. @@ -107,6 +109,36 @@ private: // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Remark, LLVMRemarkEntryRef) +/// Comparison operators for Remark objects and dependent objects. +inline bool operator==(const RemarkLocation &LHS, const RemarkLocation &RHS) { + return LHS.SourceFilePath == RHS.SourceFilePath && + LHS.SourceLine == RHS.SourceLine && + LHS.SourceColumn == RHS.SourceColumn; +} + +inline bool operator!=(const RemarkLocation &LHS, const RemarkLocation &RHS) { + return !(LHS == RHS); +} + +inline bool operator==(const Argument &LHS, const Argument &RHS) { + return LHS.Key == RHS.Key && LHS.Val == RHS.Val && LHS.Loc == RHS.Loc; +} + +inline bool operator!=(const Argument &LHS, const Argument &RHS) { + return !(LHS == RHS); +} + +inline bool operator==(const Remark &LHS, const Remark &RHS) { + return LHS.RemarkType == RHS.RemarkType && LHS.PassName == RHS.PassName && + LHS.RemarkName == RHS.RemarkName && + LHS.FunctionName == RHS.FunctionName && LHS.Loc == RHS.Loc && + LHS.Hotness == RHS.Hotness && LHS.Args == RHS.Args; +} + +inline bool operator!=(const Remark &LHS, const Remark &RHS) { + return !(LHS == RHS); +} + } // end namespace remarks } // end namespace llvm diff --git a/include/llvm/Remarks/RemarkFormat.h b/include/llvm/Remarks/RemarkFormat.h index e167d99d251..6dd32b22609 100644 --- a/include/llvm/Remarks/RemarkFormat.h +++ b/include/llvm/Remarks/RemarkFormat.h @@ -19,10 +19,10 @@ namespace llvm { namespace remarks { -constexpr StringRef Magic("REMARKS", 7); +constexpr StringLiteral Magic("REMARKS"); /// The format used for serializing/deserializing remarks. -enum class Format { Unknown, YAML }; +enum class Format { Unknown, YAML, YAMLStrTab, Bitstream }; /// Parse and validate a string for the remark format. Expected parseFormat(StringRef FormatStr); diff --git a/include/llvm/Remarks/RemarkParser.h b/include/llvm/Remarks/RemarkParser.h index 671e1abe5ec..d6b1fddb06f 100644 --- a/include/llvm/Remarks/RemarkParser.h +++ b/include/llvm/Remarks/RemarkParser.h @@ -23,9 +23,6 @@ namespace llvm { namespace remarks { -struct ParserImpl; -struct ParsedStringTable; - class EndOfFileError : public ErrorInfo { public: static char ID; @@ -39,11 +36,13 @@ public: }; /// Parser used to parse a raw buffer to remarks::Remark objects. -struct Parser { +struct RemarkParser { /// The format of the parser. Format ParserFormat; + /// Path to prepend when opening an external remark file. + std::string ExternalFilePrependPath; - Parser(Format ParserFormat) : ParserFormat(ParserFormat) {} + RemarkParser(Format ParserFormat) : ParserFormat(ParserFormat) {} /// If no error occurs, this returns a valid Remark object. /// If an error of type EndOfFileError occurs, it is safe to recover from it @@ -52,7 +51,7 @@ struct Parser { /// The pointer should never be null. virtual Expected> next() = 0; - virtual ~Parser() = default; + virtual ~RemarkParser() = default; }; /// In-memory representation of the string table parsed from a buffer (e.g. the @@ -60,16 +59,33 @@ struct Parser { struct ParsedStringTable { /// The buffer mapped from the section contents. StringRef Buffer; - /// Collection of offsets in the buffer for each string entry. - SmallVector Offsets; + /// This object has high changes to be std::move'd around, so don't use a + /// SmallVector for once. + std::vector Offsets; - Expected operator[](size_t Index) const; ParsedStringTable(StringRef Buffer); + /// Disable copy. + ParsedStringTable(const ParsedStringTable &) = delete; + ParsedStringTable &operator=(const ParsedStringTable &) = delete; + /// Should be movable. + ParsedStringTable(ParsedStringTable &&) = default; + ParsedStringTable &operator=(ParsedStringTable &&) = default; + + size_t size() const { return Offsets.size(); } + Expected operator[](size_t Index) const; }; -Expected> +Expected> createRemarkParser(Format ParserFormat, + StringRef Buf); + +Expected> createRemarkParser(Format ParserFormat, StringRef Buf, - Optional StrTab = None); + ParsedStringTable StrTab); + +Expected> +createRemarkParserFromMeta(Format ParserFormat, StringRef Buf, + Optional StrTab = None, + Optional ExternalFilePrependPath = None); } // end namespace remarks } // end namespace llvm diff --git a/include/llvm/Remarks/RemarkSerializer.h b/include/llvm/Remarks/RemarkSerializer.h index def5c2e1662..35752cd5f6f 100644 --- a/include/llvm/Remarks/RemarkSerializer.h +++ b/include/llvm/Remarks/RemarkSerializer.h @@ -14,54 +14,74 @@ #define LLVM_REMARKS_REMARK_SERIALIZER_H #include "llvm/Remarks/Remark.h" +#include "llvm/Remarks/RemarkFormat.h" #include "llvm/Remarks/RemarkStringTable.h" -#include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" namespace llvm { namespace remarks { +enum class SerializerMode { + Separate, // A mode where the metadata is serialized separately from the + // remarks. Typically, this is used when the remarks need to be + // streamed to a side file and the metadata is embedded into the + // final result of the compilation. + Standalone // A mode where everything can be retrieved in the same + // file/buffer. Typically, this is used for storing remarks for + // later use. +}; + +struct MetaSerializer; + /// This is the base class for a remark serializer. /// It includes support for using a string table while emitting. -struct Serializer { +struct RemarkSerializer { + /// The format of the serializer. + Format SerializerFormat; /// The open raw_ostream that the remark diagnostics are emitted to. raw_ostream &OS; + /// The serialization mode. + SerializerMode Mode; /// The string table containing all the unique strings used in the output. /// The table can be serialized to be consumed after the compilation. Optional StrTab; - Serializer(raw_ostream &OS) : OS(OS), StrTab() {} + RemarkSerializer(Format SerializerFormat, raw_ostream &OS, + SerializerMode Mode) + : SerializerFormat(SerializerFormat), OS(OS), Mode(Mode), StrTab() {} /// This is just an interface. - virtual ~Serializer() = default; - virtual void emit(const Remark &Remark) = 0; -}; - -/// Wether the serializer should use a string table while emitting. -enum class UseStringTable { No, Yes }; - -/// Serialize the remarks to YAML. One remark entry looks like this: -/// --- ! -/// Pass: -/// Name: -/// DebugLoc: { File: , Line: , -/// Column: } -/// Function: -/// Args: -/// - : -/// DebugLoc: { File: , Line: , Column: } -/// ... -struct YAMLSerializer : public Serializer { - /// The YAML streamer. - yaml::Output YAMLOutput; - - YAMLSerializer(raw_ostream &OS, - UseStringTable UseStringTable = remarks::UseStringTable::No); - + virtual ~RemarkSerializer() = default; /// Emit a remark to the stream. - void emit(const Remark &Remark) override; + virtual void emit(const Remark &Remark) = 0; + /// Return the corresponding metadata serializer. + virtual std::unique_ptr + metaSerializer(raw_ostream &OS, + Optional ExternalFilename = None) = 0; }; +/// This is the base class for a remark metadata serializer. +struct MetaSerializer { + /// The open raw_ostream that the metadata is emitted to. + raw_ostream &OS; + + MetaSerializer(raw_ostream &OS) : OS(OS) {} + + /// This is just an interface. + virtual ~MetaSerializer() = default; + virtual void emit() = 0; +}; + +/// Create a remark serializer. +Expected> +createRemarkSerializer(Format RemarksFormat, SerializerMode Mode, + raw_ostream &OS); + +/// Create a remark serializer that uses a pre-filled string table. +Expected> +createRemarkSerializer(Format RemarksFormat, SerializerMode Mode, + raw_ostream &OS, remarks::StringTable StrTab); + } // end namespace remarks } // end namespace llvm diff --git a/include/llvm/Remarks/RemarkStringTable.h b/include/llvm/Remarks/RemarkStringTable.h index f9b4fdbbfb8..4ce27ee884c 100644 --- a/include/llvm/Remarks/RemarkStringTable.h +++ b/include/llvm/Remarks/RemarkStringTable.h @@ -18,7 +18,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Allocator.h" +#include "llvm/Remarks/Remark.h" #include namespace llvm { @@ -27,21 +27,35 @@ class raw_ostream; namespace remarks { +struct ParsedStringTable; + /// The string table used for serializing remarks. /// This table can be for example serialized in a section to be consumed after /// the compilation. struct StringTable { - /// Allocator holding all the memory used by the map. - BumpPtrAllocator Allocator; /// The string table containing all the unique strings used in the output. /// It maps a string to an unique ID. - StringMap StrTab; + StringMap StrTab; /// Total size of the string table when serialized. size_t SerializedSize = 0; - StringTable() : Allocator(), StrTab(Allocator) {} + StringTable() = default; + + /// Disable copy. + StringTable(const StringTable &) = delete; + StringTable &operator=(const StringTable &) = delete; + /// Should be movable. + StringTable(StringTable &&) = default; + StringTable &operator=(StringTable &&) = default; + + /// Construct a string table from a ParsedStringTable. + StringTable(const ParsedStringTable &Other); + /// Add a string to the table. It returns an unique ID of the string. std::pair add(StringRef Str); + /// Modify \p R to use strings from this string table. If the string table + /// does not contain the strings, it adds them. + void internalize(Remark &R); /// Serialize the string table to a stream. It is serialized as a little /// endian uint64 (the size of the table in bytes) followed by a sequence of /// NULL-terminated strings, where the N-th string is the string with the ID N diff --git a/include/llvm/Remarks/YAMLRemarkSerializer.h b/include/llvm/Remarks/YAMLRemarkSerializer.h new file mode 100644 index 00000000000..f1213beab15 --- /dev/null +++ b/include/llvm/Remarks/YAMLRemarkSerializer.h @@ -0,0 +1,108 @@ +//===-- YAMLRemarkSerializer.h - YAML Remark serialization ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides an interface for serializing remarks to YAML. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_REMARKS_YAML_REMARK_SERIALIZER_H +#define LLVM_REMARKS_YAML_REMARK_SERIALIZER_H + +#include "llvm/Remarks/RemarkSerializer.h" +#include "llvm/Support/YAMLTraits.h" + +namespace llvm { +namespace remarks { + +/// Serialize the remarks to YAML. One remark entry looks like this: +/// --- ! +/// Pass: +/// Name: +/// DebugLoc: { File: , Line: , +/// Column: } +/// Function: +/// Args: +/// - : +/// DebugLoc: { File: , Line: , Column: } +/// ... +struct YAMLRemarkSerializer : public RemarkSerializer { + /// The YAML streamer. + yaml::Output YAMLOutput; + + YAMLRemarkSerializer(raw_ostream &OS, SerializerMode Mode, + Optional StrTab = None); + + void emit(const Remark &Remark) override; + std::unique_ptr + metaSerializer(raw_ostream &OS, + Optional ExternalFilename = None) override; + + static bool classof(const RemarkSerializer *S) { + return S->SerializerFormat == Format::YAML; + } + +protected: + YAMLRemarkSerializer(Format SerializerFormat, raw_ostream &OS, + SerializerMode Mode, + Optional StrTab = None); +}; + +struct YAMLMetaSerializer : public MetaSerializer { + Optional ExternalFilename; + + YAMLMetaSerializer(raw_ostream &OS, Optional ExternalFilename) + : MetaSerializer(OS), ExternalFilename(ExternalFilename) {} + + void emit() override; +}; + +/// Serialize the remarks to YAML using a string table. An remark entry looks +/// like the regular YAML remark but instead of string entries it's using +/// numbers that map to an index in the string table. +struct YAMLStrTabRemarkSerializer : public YAMLRemarkSerializer { + /// Wether we already emitted the metadata in standalone mode. + /// This should be set to true after the first invocation of `emit`. + bool DidEmitMeta = false; + + YAMLStrTabRemarkSerializer(raw_ostream &OS, SerializerMode Mode) + : YAMLRemarkSerializer(Format::YAMLStrTab, OS, Mode) { + // We always need a string table for this type of serializer. + StrTab.emplace(); + } + YAMLStrTabRemarkSerializer(raw_ostream &OS, SerializerMode Mode, + StringTable StrTab) + : YAMLRemarkSerializer(Format::YAMLStrTab, OS, Mode, std::move(StrTab)) {} + + /// Override to emit the metadata if necessary. + void emit(const Remark &Remark) override; + + std::unique_ptr + metaSerializer(raw_ostream &OS, + Optional ExternalFilename = None) override; + + static bool classof(const RemarkSerializer *S) { + return S->SerializerFormat == Format::YAMLStrTab; + } +}; + +struct YAMLStrTabMetaSerializer : public YAMLMetaSerializer { + /// The string table is part of the metadata. + const StringTable &StrTab; + + YAMLStrTabMetaSerializer(raw_ostream &OS, + Optional ExternalFilename, + const StringTable &StrTab) + : YAMLMetaSerializer(OS, ExternalFilename), StrTab(StrTab) {} + + void emit() override; +}; + +} // end namespace remarks +} // end namespace llvm + +#endif /* LLVM_REMARKS_REMARK_SERIALIZER_H */ diff --git a/include/llvm/Support/AArch64TargetParser.def b/include/llvm/Support/AArch64TargetParser.def index e152f383b3e..15737265dfc 100644 --- a/include/llvm/Support/AArch64TargetParser.def +++ b/include/llvm/Support/AArch64TargetParser.def @@ -50,35 +50,36 @@ AARCH64_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a", #define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) #endif // FIXME: This would be nicer were it tablegen -AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr) -AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr) -AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc") -AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse") -AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm") -AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto") -AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4") -AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3") -AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2") -AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes") -AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod") -AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8") -AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon") -AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16") -AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml") -AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe") -AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras") -AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve") -AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2") -AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes") -AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4") -AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3") -AARCH64_ARCH_EXT_NAME("bitperm", AArch64::AEK_BITPERM, "+bitperm", "-bitperm") -AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc") -AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand") -AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte") -AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs") -AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb") -AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres") +AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr) +AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr) +AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc") +AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse") +AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm") +AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto") +AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4") +AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3") +AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2") +AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes") +AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod") +AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8") +AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon") +AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16") +AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml") +AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe") +AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras") +AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve") +AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2") +AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes") +AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4") +AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3") +AARCH64_ARCH_EXT_NAME("sve2-bitperm", AArch64::AEK_SVE2BITPERM, "+sve2-bitperm", "-sve2-bitperm") +AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc") +AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand") +AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte") +AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs") +AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb") +AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres") +AARCH64_ARCH_EXT_NAME("tme", AArch64::AEK_TME, "+tme", "-tme") #undef AARCH64_ARCH_EXT_NAME #ifndef AARCH64_CPU_NAME @@ -92,6 +93,12 @@ AARCH64_CPU_NAME("cortex-a55", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC)) AARCH64_CPU_NAME("cortex-a57", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_CRC)) +AARCH64_CPU_NAME("cortex-a65", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS | + AArch64::AEK_RCPC | AArch64::AEK_SSBS)) +AARCH64_CPU_NAME("cortex-a65ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS | + AArch64::AEK_RCPC | AArch64::AEK_SSBS)) AARCH64_CPU_NAME("cortex-a72", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_CRC)) AARCH64_CPU_NAME("cortex-a73", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, @@ -104,6 +111,13 @@ AARCH64_CPU_NAME("cortex-a76", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, AARCH64_CPU_NAME("cortex-a76ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | AArch64::AEK_SSBS)) +AARCH64_CPU_NAME("neoverse-e1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS | + AArch64::AEK_RCPC | AArch64::AEK_SSBS)) +AARCH64_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | + AArch64::AEK_PROFILE | AArch64::AEK_RAS | AArch64::AEK_RCPC | + AArch64::AEK_SSBS)) AARCH64_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_NONE)) AARCH64_CPU_NAME("exynos-m1", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, diff --git a/include/llvm/Support/AArch64TargetParser.h b/include/llvm/Support/AArch64TargetParser.h index 965d38535e7..94f341c8326 100644 --- a/include/llvm/Support/AArch64TargetParser.h +++ b/include/llvm/Support/AArch64TargetParser.h @@ -53,7 +53,8 @@ enum ArchExtKind : unsigned { AEK_SVE2AES = 1 << 24, AEK_SVE2SM4 = 1 << 25, AEK_SVE2SHA3 = 1 << 26, - AEK_BITPERM = 1 << 27, + AEK_SVE2BITPERM = 1 << 27, + AEK_TME = 1 << 28, }; enum class ArchKind { diff --git a/include/llvm/Support/ARMTargetParser.def b/include/llvm/Support/ARMTargetParser.def index f466b325274..3e77e20762c 100644 --- a/include/llvm/Support/ARMTargetParser.def +++ b/include/llvm/Support/ARMTargetParser.def @@ -274,6 +274,8 @@ ARM_CPU_NAME("cortex-a76", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (ARM::AEK_FP16 | ARM::AEK_DOTPROD)) ARM_CPU_NAME("cortex-a76ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (ARM::AEK_FP16 | ARM::AEK_DOTPROD)) +ARM_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, + (ARM::AEK_FP16 | ARM::AEK_DOTPROD)) ARM_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) ARM_CPU_NAME("exynos-m1", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) ARM_CPU_NAME("exynos-m2", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) diff --git a/include/llvm/Support/ARMTargetParser.h b/include/llvm/Support/ARMTargetParser.h index 4b9070dea59..02d4c975129 100644 --- a/include/llvm/Support/ARMTargetParser.h +++ b/include/llvm/Support/ARMTargetParser.h @@ -39,19 +39,13 @@ enum ArchExtKind : unsigned { AEK_DSP = 1 << 10, AEK_FP16 = 1 << 11, AEK_RAS = 1 << 12, - AEK_SVE = 1 << 13, - AEK_DOTPROD = 1 << 14, - AEK_SHA2 = 1 << 15, - AEK_AES = 1 << 16, - AEK_FP16FML = 1 << 17, - AEK_SB = 1 << 18, - AEK_SVE2 = 1 << 19, - AEK_SVE2AES = 1 << 20, - AEK_SVE2SM4 = 1 << 21, - AEK_SVE2SHA3 = 1 << 22, - AEK_BITPERM = 1 << 23, - AEK_FP_DP = 1 << 24, - AEK_LOB = 1 << 25, + AEK_DOTPROD = 1 << 13, + AEK_SHA2 = 1 << 14, + AEK_AES = 1 << 15, + AEK_FP16FML = 1 << 16, + AEK_SB = 1 << 17, + AEK_FP_DP = 1 << 18, + AEK_LOB = 1 << 19, // Unsupported extensions. AEK_OS = 0x8000000, AEK_IWMMXT = 0x10000000, diff --git a/include/llvm/Support/AlignOf.h b/include/llvm/Support/AlignOf.h index d12401f0eb4..eb42542b777 100644 --- a/include/llvm/Support/AlignOf.h +++ b/include/llvm/Support/AlignOf.h @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This file defines the AlignedCharArray and AlignedCharArrayUnion classes. +// This file defines the AlignedCharArrayUnion class. // //===----------------------------------------------------------------------===// @@ -18,128 +18,38 @@ namespace llvm { -/// \struct AlignedCharArray -/// Helper for building an aligned character array type. -/// -/// This template is used to explicitly build up a collection of aligned -/// character array types. We have to build these up using a macro and explicit -/// specialization to cope with MSVC (at least till 2015) where only an -/// integer literal can be used to specify an alignment constraint. Once built -/// up here, we can then begin to indirect between these using normal C++ -/// template parameters. - -// MSVC requires special handling here. -#ifndef _MSC_VER - -template -struct AlignedCharArray { - alignas(Alignment) char buffer[Size]; -}; - -#else // _MSC_VER - -/// Create a type with an aligned char buffer. -template -struct AlignedCharArray; - -// We provide special variations of this template for the most common -// alignments because __declspec(align(...)) doesn't actually work when it is -// a member of a by-value function argument in MSVC, even if the alignment -// request is something reasonably like 8-byte or 16-byte. Note that we can't -// even include the declspec with the union that forces the alignment because -// MSVC warns on the existence of the declspec despite the union member forcing -// proper alignment. - -template -struct AlignedCharArray<1, Size> { - union { - char aligned; - char buffer[Size]; - }; -}; - -template -struct AlignedCharArray<2, Size> { - union { - short aligned; - char buffer[Size]; - }; -}; - -template -struct AlignedCharArray<4, Size> { - union { - int aligned; - char buffer[Size]; - }; -}; - -template -struct AlignedCharArray<8, Size> { - union { - double aligned; - char buffer[Size]; - }; -}; - - -// The rest of these are provided with a __declspec(align(...)) and we simply -// can't pass them by-value as function arguments on MSVC. - -#define LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(x) \ - template \ - struct AlignedCharArray { \ - __declspec(align(x)) char buffer[Size]; \ - }; - -LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(16) -LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(32) -LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(64) -LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(128) - -#undef LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT - -#endif // _MSC_VER - namespace detail { -template -class AlignerImpl { - T1 t1; T2 t2; T3 t3; T4 t4; T5 t5; T6 t6; T7 t7; T8 t8; T9 t9; T10 t10; +template class AlignerImpl { + T t; + AlignerImpl rest; AlignerImpl() = delete; }; -template -union SizerImpl { - char arr1[sizeof(T1)], arr2[sizeof(T2)], arr3[sizeof(T3)], arr4[sizeof(T4)], - arr5[sizeof(T5)], arr6[sizeof(T6)], arr7[sizeof(T7)], arr8[sizeof(T8)], - arr9[sizeof(T9)], arr10[sizeof(T10)]; +template class AlignerImpl { + T t; + AlignerImpl() = delete; }; + +template union SizerImpl { + char arr[sizeof(T)]; + SizerImpl rest; +}; + +template union SizerImpl { char arr[sizeof(T)]; }; } // end namespace detail -/// This union template exposes a suitably aligned and sized character -/// array member which can hold elements of any of up to ten types. +/// A suitably aligned and sized character array member which can hold elements +/// of any type. /// -/// These types may be arrays, structs, or any other types. The goal is to -/// expose a char array buffer member which can be used as suitable storage for -/// a placement new of any of these types. Support for more than ten types can -/// be added at the cost of more boilerplate. -template -struct AlignedCharArrayUnion : llvm::AlignedCharArray< - alignof(llvm::detail::AlignerImpl), - sizeof(::llvm::detail::SizerImpl)> { +/// These types may be arrays, structs, or any other types. This exposes a +/// `buffer` member which can be used as suitable storage for a placement new of +/// any of these types. +template struct AlignedCharArrayUnion { + alignas(::llvm::detail::AlignerImpl) char buffer[sizeof( + llvm::detail::SizerImpl)]; }; + } // end namespace llvm #endif // LLVM_SUPPORT_ALIGNOF_H diff --git a/include/llvm/Support/Alignment.h b/include/llvm/Support/Alignment.h new file mode 100644 index 00000000000..72fad87dd0d --- /dev/null +++ b/include/llvm/Support/Alignment.h @@ -0,0 +1,403 @@ +//===-- llvm/Support/Alignment.h - Useful alignment functions ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains types to represent alignments. +// They are instrumented to guarantee some invariants are preserved and prevent +// invalid manipulations. +// +// - Align represents an alignment in bytes, it is always set and always a valid +// power of two, its minimum value is 1 which means no alignment requirements. +// +// - MaybeAlign is an optional type, it may be undefined or set. When it's set +// you can get the underlying Align type by using the getValue() method. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_ALIGNMENT_H_ +#define LLVM_SUPPORT_ALIGNMENT_H_ + +#include "llvm/ADT/Optional.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/MathExtras.h" +#include +#include + +namespace llvm { + +#define ALIGN_CHECK_ISPOSITIVE(decl) \ + assert(decl > 0 && (#decl " should be defined")) +#define ALIGN_CHECK_ISSET(decl) \ + assert(decl.hasValue() && (#decl " should be defined")) + +/// This struct is a compact representation of a valid (non-zero power of two) +/// alignment. +/// It is suitable for use as static global constants. +struct Align { +private: + uint8_t ShiftValue = 0; /// The log2 of the required alignment. + /// ShiftValue is less than 64 by construction. + + friend struct MaybeAlign; + friend unsigned Log2(Align); + friend bool operator==(Align Lhs, Align Rhs); + friend bool operator!=(Align Lhs, Align Rhs); + friend bool operator<=(Align Lhs, Align Rhs); + friend bool operator>=(Align Lhs, Align Rhs); + friend bool operator<(Align Lhs, Align Rhs); + friend bool operator>(Align Lhs, Align Rhs); + friend unsigned encode(struct MaybeAlign A); + friend struct MaybeAlign decodeMaybeAlign(unsigned Value); + + /// A trivial type to allow construction of constexpr Align. + /// This is currently needed to workaround a bug in GCC 5.3 which prevents + /// definition of constexpr assign operators. + /// https://stackoverflow.com/questions/46756288/explicitly-defaulted-function-cannot-be-declared-as-constexpr-because-the-implic + /// FIXME: Remove this, make all assign operators constexpr and introduce user + /// defined literals when we don't have to support GCC 5.3 anymore. + /// https://llvm.org/docs/GettingStarted.html#getting-a-modern-host-c-toolchain + struct LogValue { + uint8_t Log; + }; + +public: + /// Default is byte-aligned. + constexpr Align() = default; + /// Do not perform checks in case of copy/move construct/assign, because the + /// checks have been performed when building `Other`. + constexpr Align(const Align &Other) = default; + constexpr Align(Align &&Other) = default; + Align &operator=(const Align &Other) = default; + Align &operator=(Align &&Other) = default; + + explicit Align(uint64_t Value) { + assert(Value > 0 && "Value must not be 0"); + assert(llvm::isPowerOf2_64(Value) && "Alignment is not a power of 2"); + ShiftValue = Log2_64(Value); + assert(ShiftValue < 64 && "Broken invariant"); + } + + /// This is a hole in the type system and should not be abused. + /// Needed to interact with C for instance. + uint64_t value() const { return uint64_t(1) << ShiftValue; } + + /// Returns a default constructed Align which corresponds to no alignment. + /// This is useful to test for unalignment as it conveys clear semantic. + /// `if (A != Align::None())` + /// would be better than + /// `if (A > Align(1))` + constexpr static const Align None() { return Align(); } + + /// Allow constructions of constexpr Align. + template constexpr static LogValue Constant() { + return LogValue{static_cast(CTLog2())}; + } + + /// Allow constructions of constexpr Align from types. + /// Compile time equivalent to Align(alignof(T)). + template constexpr static LogValue Of() { + return Constant::value>(); + } + + /// Constexpr constructor from LogValue type. + constexpr Align(LogValue CA) : ShiftValue(CA.Log) {} +}; + +/// Treats the value 0 as a 1, so Align is always at least 1. +inline Align assumeAligned(uint64_t Value) { + return Value ? Align(Value) : Align(); +} + +/// This struct is a compact representation of a valid (power of two) or +/// undefined (0) alignment. +struct MaybeAlign : public llvm::Optional { +private: + using UP = llvm::Optional; + +public: + /// Default is undefined. + MaybeAlign() = default; + /// Do not perform checks in case of copy/move construct/assign, because the + /// checks have been performed when building `Other`. + MaybeAlign(const MaybeAlign &Other) = default; + MaybeAlign &operator=(const MaybeAlign &Other) = default; + MaybeAlign(MaybeAlign &&Other) = default; + MaybeAlign &operator=(MaybeAlign &&Other) = default; + + /// Use llvm::Optional constructor. + using UP::UP; + + explicit MaybeAlign(uint64_t Value) { + assert((Value == 0 || llvm::isPowerOf2_64(Value)) && + "Alignment is neither 0 nor a power of 2"); + if (Value) + emplace(Value); + } + + /// For convenience, returns a valid alignment or 1 if undefined. + Align valueOrOne() const { return hasValue() ? getValue() : Align(); } +}; + +/// Checks that SizeInBytes is a multiple of the alignment. +inline bool isAligned(Align Lhs, uint64_t SizeInBytes) { + return SizeInBytes % Lhs.value() == 0; +} + +/// Checks that SizeInBytes is a multiple of the alignment. +/// Returns false if the alignment is undefined. +inline bool isAligned(MaybeAlign Lhs, uint64_t SizeInBytes) { + ALIGN_CHECK_ISSET(Lhs); + return SizeInBytes % (*Lhs).value() == 0; +} + +/// Checks that Addr is a multiple of the alignment. +inline bool isAddrAligned(Align Lhs, const void *Addr) { + return isAligned(Lhs, reinterpret_cast(Addr)); +} + +/// Returns a multiple of A needed to store `Size` bytes. +inline uint64_t alignTo(uint64_t Size, Align A) { + const uint64_t value = A.value(); + // The following line is equivalent to `(Size + value - 1) / value * value`. + + // The division followed by a multiplication can be thought of as a right + // shift followed by a left shift which zeros out the extra bits produced in + // the bump; `~(value - 1)` is a mask where all those bits being zeroed out + // are just zero. + + // Most compilers can generate this code but the pattern may be missed when + // multiple functions gets inlined. + return (Size + value - 1) & ~(value - 1); +} + +/// Returns a multiple of A needed to store `Size` bytes. +/// Returns `Size` if current alignment is undefined. +inline uint64_t alignTo(uint64_t Size, MaybeAlign A) { + return A ? alignTo(Size, A.getValue()) : Size; +} + +/// Aligns `Addr` to `Alignment` bytes, rounding up. +inline uintptr_t alignAddr(const void *Addr, Align Alignment) { + uintptr_t ArithAddr = reinterpret_cast(Addr); + assert(static_cast(ArithAddr + Alignment.value() - 1) >= + ArithAddr && "Overflow"); + return alignTo(ArithAddr, Alignment); +} + +/// Returns the offset to the next integer (mod 2**64) that is greater than +/// or equal to \p Value and is a multiple of \p Align. +inline uint64_t offsetToAlignment(uint64_t Value, Align Alignment) { + return alignTo(Value, Alignment) - Value; +} + +/// Returns the necessary adjustment for aligning `Addr` to `Alignment` +/// bytes, rounding up. +inline uint64_t offsetToAlignedAddr(const void *Addr, Align Alignment) { + return offsetToAlignment(reinterpret_cast(Addr), Alignment); +} + +/// Returns the log2 of the alignment. +inline unsigned Log2(Align A) { return A.ShiftValue; } + +/// Returns the log2 of the alignment. +/// \pre A must be defined. +inline unsigned Log2(MaybeAlign A) { + ALIGN_CHECK_ISSET(A); + return Log2(A.getValue()); +} + +/// Returns the alignment that satisfies both alignments. +/// Same semantic as MinAlign. +inline Align commonAlignment(Align A, Align B) { return std::min(A, B); } + +/// Returns the alignment that satisfies both alignments. +/// Same semantic as MinAlign. +inline Align commonAlignment(Align A, uint64_t Offset) { + return Align(MinAlign(A.value(), Offset)); +} + +/// Returns the alignment that satisfies both alignments. +/// Same semantic as MinAlign. +inline MaybeAlign commonAlignment(MaybeAlign A, MaybeAlign B) { + return A && B ? commonAlignment(*A, *B) : A ? A : B; +} + +/// Returns the alignment that satisfies both alignments. +/// Same semantic as MinAlign. +inline MaybeAlign commonAlignment(MaybeAlign A, uint64_t Offset) { + return MaybeAlign(MinAlign((*A).value(), Offset)); +} + +/// Returns a representation of the alignment that encodes undefined as 0. +inline unsigned encode(MaybeAlign A) { return A ? A->ShiftValue + 1 : 0; } + +/// Dual operation of the encode function above. +inline MaybeAlign decodeMaybeAlign(unsigned Value) { + if (Value == 0) + return MaybeAlign(); + Align Out; + Out.ShiftValue = Value - 1; + return Out; +} + +/// Returns a representation of the alignment, the encoded value is positive by +/// definition. +inline unsigned encode(Align A) { return encode(MaybeAlign(A)); } + +/// Comparisons between Align and scalars. Rhs must be positive. +inline bool operator==(Align Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISPOSITIVE(Rhs); + return Lhs.value() == Rhs; +} +inline bool operator!=(Align Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISPOSITIVE(Rhs); + return Lhs.value() != Rhs; +} +inline bool operator<=(Align Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISPOSITIVE(Rhs); + return Lhs.value() <= Rhs; +} +inline bool operator>=(Align Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISPOSITIVE(Rhs); + return Lhs.value() >= Rhs; +} +inline bool operator<(Align Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISPOSITIVE(Rhs); + return Lhs.value() < Rhs; +} +inline bool operator>(Align Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISPOSITIVE(Rhs); + return Lhs.value() > Rhs; +} + +/// Comparisons between MaybeAlign and scalars. +inline bool operator==(MaybeAlign Lhs, uint64_t Rhs) { + return Lhs ? (*Lhs).value() == Rhs : Rhs == 0; +} +inline bool operator!=(MaybeAlign Lhs, uint64_t Rhs) { + return Lhs ? (*Lhs).value() != Rhs : Rhs != 0; +} +inline bool operator<=(MaybeAlign Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISSET(Lhs); + ALIGN_CHECK_ISPOSITIVE(Rhs); + return (*Lhs).value() <= Rhs; +} +inline bool operator>=(MaybeAlign Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISSET(Lhs); + ALIGN_CHECK_ISPOSITIVE(Rhs); + return (*Lhs).value() >= Rhs; +} +inline bool operator<(MaybeAlign Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISSET(Lhs); + ALIGN_CHECK_ISPOSITIVE(Rhs); + return (*Lhs).value() < Rhs; +} +inline bool operator>(MaybeAlign Lhs, uint64_t Rhs) { + ALIGN_CHECK_ISSET(Lhs); + ALIGN_CHECK_ISPOSITIVE(Rhs); + return (*Lhs).value() > Rhs; +} + +/// Comparisons operators between Align. +inline bool operator==(Align Lhs, Align Rhs) { + return Lhs.ShiftValue == Rhs.ShiftValue; +} +inline bool operator!=(Align Lhs, Align Rhs) { + return Lhs.ShiftValue != Rhs.ShiftValue; +} +inline bool operator<=(Align Lhs, Align Rhs) { + return Lhs.ShiftValue <= Rhs.ShiftValue; +} +inline bool operator>=(Align Lhs, Align Rhs) { + return Lhs.ShiftValue >= Rhs.ShiftValue; +} +inline bool operator<(Align Lhs, Align Rhs) { + return Lhs.ShiftValue < Rhs.ShiftValue; +} +inline bool operator>(Align Lhs, Align Rhs) { + return Lhs.ShiftValue > Rhs.ShiftValue; +} + +/// Comparisons operators between Align and MaybeAlign. +inline bool operator==(Align Lhs, MaybeAlign Rhs) { + ALIGN_CHECK_ISSET(Rhs); + return Lhs.value() == (*Rhs).value(); +} +inline bool operator!=(Align Lhs, MaybeAlign Rhs) { + ALIGN_CHECK_ISSET(Rhs); + return Lhs.value() != (*Rhs).value(); +} +inline bool operator<=(Align Lhs, MaybeAlign Rhs) { + ALIGN_CHECK_ISSET(Rhs); + return Lhs.value() <= (*Rhs).value(); +} +inline bool operator>=(Align Lhs, MaybeAlign Rhs) { + ALIGN_CHECK_ISSET(Rhs); + return Lhs.value() >= (*Rhs).value(); +} +inline bool operator<(Align Lhs, MaybeAlign Rhs) { + ALIGN_CHECK_ISSET(Rhs); + return Lhs.value() < (*Rhs).value(); +} +inline bool operator>(Align Lhs, MaybeAlign Rhs) { + ALIGN_CHECK_ISSET(Rhs); + return Lhs.value() > (*Rhs).value(); +} + +/// Comparisons operators between MaybeAlign and Align. +inline bool operator==(MaybeAlign Lhs, Align Rhs) { + ALIGN_CHECK_ISSET(Lhs); + return Lhs && (*Lhs).value() == Rhs.value(); +} +inline bool operator!=(MaybeAlign Lhs, Align Rhs) { + ALIGN_CHECK_ISSET(Lhs); + return Lhs && (*Lhs).value() != Rhs.value(); +} +inline bool operator<=(MaybeAlign Lhs, Align Rhs) { + ALIGN_CHECK_ISSET(Lhs); + return Lhs && (*Lhs).value() <= Rhs.value(); +} +inline bool operator>=(MaybeAlign Lhs, Align Rhs) { + ALIGN_CHECK_ISSET(Lhs); + return Lhs && (*Lhs).value() >= Rhs.value(); +} +inline bool operator<(MaybeAlign Lhs, Align Rhs) { + ALIGN_CHECK_ISSET(Lhs); + return Lhs && (*Lhs).value() < Rhs.value(); +} +inline bool operator>(MaybeAlign Lhs, Align Rhs) { + ALIGN_CHECK_ISSET(Lhs); + return Lhs && (*Lhs).value() > Rhs.value(); +} + +inline Align operator/(Align Lhs, uint64_t Divisor) { + assert(llvm::isPowerOf2_64(Divisor) && + "Divisor must be positive and a power of 2"); + assert(Lhs != 1 && "Can't halve byte alignment"); + return Align(Lhs.value() / Divisor); +} + +inline MaybeAlign operator/(MaybeAlign Lhs, uint64_t Divisor) { + assert(llvm::isPowerOf2_64(Divisor) && + "Divisor must be positive and a power of 2"); + return Lhs ? Lhs.getValue() / Divisor : MaybeAlign(); +} + +inline Align max(MaybeAlign Lhs, Align Rhs) { + return Lhs && *Lhs > Rhs ? *Lhs : Rhs; +} + +inline Align max(Align Lhs, MaybeAlign Rhs) { + return Rhs && *Rhs > Lhs ? *Rhs : Lhs; +} + +#undef ALIGN_CHECK_ISPOSITIVE +#undef ALIGN_CHECK_ISSET + +} // namespace llvm + +#endif // LLVM_SUPPORT_ALIGNMENT_H_ diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h index 09e967b98ab..106b90c35bf 100644 --- a/include/llvm/Support/Allocator.h +++ b/include/llvm/Support/Allocator.h @@ -22,6 +22,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -211,13 +212,11 @@ public: /// Allocate space at the specified alignment. LLVM_ATTRIBUTE_RETURNS_NONNULL LLVM_ATTRIBUTE_RETURNS_NOALIAS void * - Allocate(size_t Size, size_t Alignment) { - assert(Alignment > 0 && "0-byte alignnment is not allowed. Use 1 instead."); - + Allocate(size_t Size, Align Alignment) { // Keep track of how many bytes we've allocated. BytesAllocated += Size; - size_t Adjustment = alignmentAdjustment(CurPtr, Alignment); + size_t Adjustment = offsetToAlignedAddr(CurPtr, Alignment); assert(Adjustment + Size >= Size && "Adjustment + Size must not overflow"); size_t SizeToAllocate = Size; @@ -240,7 +239,7 @@ public: } // If Size is really big, allocate a separate slab for it. - size_t PaddedSize = SizeToAllocate + Alignment - 1; + size_t PaddedSize = SizeToAllocate + Alignment.value() - 1; if (PaddedSize > SizeThreshold) { void *NewSlab = Allocator.Allocate(PaddedSize, 0); // We own the new slab and don't want anyone reading anyting other than @@ -268,6 +267,12 @@ public: return AlignedPtr; } + inline LLVM_ATTRIBUTE_RETURNS_NONNULL LLVM_ATTRIBUTE_RETURNS_NOALIAS void * + Allocate(size_t Size, size_t Alignment) { + assert(Alignment > 0 && "0-byte alignnment is not allowed. Use 1 instead."); + return Allocate(Size, Align(Alignment)); + } + // Pull in base class overloads. using AllocatorBase::Allocate; @@ -461,7 +466,7 @@ public: /// all memory allocated so far. void DestroyAll() { auto DestroyElements = [](char *Begin, char *End) { - assert(Begin == (char *)alignAddr(Begin, alignof(T))); + assert(Begin == (char *)alignAddr(Begin, Align::Of())); for (char *Ptr = Begin; Ptr + sizeof(T) <= End; Ptr += sizeof(T)) reinterpret_cast(Ptr)->~T(); }; @@ -470,7 +475,7 @@ public: ++I) { size_t AllocatedSlabSize = BumpPtrAllocator::computeSlabSize( std::distance(Allocator.Slabs.begin(), I)); - char *Begin = (char *)alignAddr(*I, alignof(T)); + char *Begin = (char *)alignAddr(*I, Align::Of()); char *End = *I == Allocator.Slabs.back() ? Allocator.CurPtr : (char *)*I + AllocatedSlabSize; @@ -480,7 +485,8 @@ public: for (auto &PtrAndSize : Allocator.CustomSizedSlabs) { void *Ptr = PtrAndSize.first; size_t Size = PtrAndSize.second; - DestroyElements((char *)alignAddr(Ptr, alignof(T)), (char *)Ptr + Size); + DestroyElements((char *)alignAddr(Ptr, Align::Of()), + (char *)Ptr + Size); } Allocator.Reset(); diff --git a/include/llvm/Support/Automaton.h b/include/llvm/Support/Automaton.h new file mode 100644 index 00000000000..7c13a698e49 --- /dev/null +++ b/include/llvm/Support/Automaton.h @@ -0,0 +1,253 @@ +//===-- Automaton.h - Support for driving TableGen-produced DFAs ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements class that drive and introspect deterministic finite- +// state automata (DFAs) as generated by TableGen's -gen-automata backend. +// +// For a description of how to define an automaton, see +// include/llvm/TableGen/Automaton.td. +// +// One important detail is that these deterministic automata are created from +// (potentially) nondeterministic definitions. Therefore a unique sequence of +// input symbols will produce one path through the DFA but multiple paths +// through the original NFA. An automaton by default only returns "accepted" or +// "not accepted", but frequently we want to analyze what NFA path was taken. +// Finding a path through the NFA states that results in a DFA state can help +// answer *what* the solution to a problem was, not just that there exists a +// solution. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_AUTOMATON_H +#define LLVM_SUPPORT_AUTOMATON_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Allocator.h" +#include +#include +#include +#include +#include + +namespace llvm { + +using NfaPath = SmallVector; + +/// Forward define the pair type used by the automata transition info tables. +/// +/// Experimental results with large tables have shown a significant (multiple +/// orders of magnitude) parsing speedup by using a custom struct here with a +/// trivial constructor rather than std::pair. +struct NfaStatePair { + uint64_t FromDfaState, ToDfaState; + + bool operator<(const NfaStatePair &Other) const { + return std::make_tuple(FromDfaState, ToDfaState) < + std::make_tuple(Other.FromDfaState, Other.ToDfaState); + } +}; + +namespace internal { +/// The internal class that maintains all possible paths through an NFA based +/// on a path through the DFA. +class NfaTranscriber { +private: + /// Cached transition table. This is a table of NfaStatePairs that contains + /// zero-terminated sequences pointed to by DFA transitions. + ArrayRef TransitionInfo; + + /// A simple linked-list of traversed states that can have a shared tail. The + /// traversed path is stored in reverse order with the latest state as the + /// head. + struct PathSegment { + uint64_t State; + PathSegment *Tail; + }; + + /// We allocate segment objects frequently. Allocate them upfront and dispose + /// at the end of a traversal rather than hammering the system allocator. + SpecificBumpPtrAllocator Allocator; + + /// Heads of each tracked path. These are not ordered. + std::deque Heads; + + /// The returned paths. This is populated during getPaths. + SmallVector Paths; + + /// Create a new segment and return it. + PathSegment *makePathSegment(uint64_t State, PathSegment *Tail) { + PathSegment *P = Allocator.Allocate(); + *P = {State, Tail}; + return P; + } + + /// Pairs defines a sequence of possible NFA transitions for a single DFA + /// transition. + void transition(ArrayRef Pairs) { + // Iterate over all existing heads. We will mutate the Heads deque during + // iteration. + unsigned NumHeads = Heads.size(); + for (unsigned I = 0; I < NumHeads; ++I) { + PathSegment *Head = Heads[I]; + // The sequence of pairs is sorted. Select the set of pairs that + // transition from the current head state. + auto PI = lower_bound(Pairs, NfaStatePair{Head->State, 0ULL}); + auto PE = upper_bound(Pairs, NfaStatePair{Head->State, INT64_MAX}); + // For every transition from the current head state, add a new path + // segment. + for (; PI != PE; ++PI) + if (PI->FromDfaState == Head->State) + Heads.push_back(makePathSegment(PI->ToDfaState, Head)); + } + // Now we've iterated over all the initial heads and added new ones, + // dispose of the original heads. + Heads.erase(Heads.begin(), std::next(Heads.begin(), NumHeads)); + } + +public: + NfaTranscriber(ArrayRef TransitionInfo) + : TransitionInfo(TransitionInfo) { + reset(); + } + + void reset() { + Paths.clear(); + Heads.clear(); + Allocator.DestroyAll(); + // The initial NFA state is 0. + Heads.push_back(makePathSegment(0ULL, nullptr)); + } + + void transition(unsigned TransitionInfoIdx) { + unsigned EndIdx = TransitionInfoIdx; + while (TransitionInfo[EndIdx].ToDfaState != 0) + ++EndIdx; + ArrayRef Pairs(&TransitionInfo[TransitionInfoIdx], + EndIdx - TransitionInfoIdx); + transition(Pairs); + } + + ArrayRef getPaths() { + Paths.clear(); + for (auto *Head : Heads) { + NfaPath P; + while (Head->State != 0) { + P.push_back(Head->State); + Head = Head->Tail; + } + std::reverse(P.begin(), P.end()); + Paths.push_back(std::move(P)); + } + return Paths; + } +}; +} // namespace internal + +/// A deterministic finite-state automaton. The automaton is defined in +/// TableGen; this object drives an automaton defined by tblgen-emitted tables. +/// +/// An automaton accepts a sequence of input tokens ("actions"). This class is +/// templated on the type of these actions. +template class Automaton { + /// Map from {State, Action} to {NewState, TransitionInfoIdx}. + /// TransitionInfoIdx is used by the DfaTranscriber to analyze the transition. + /// FIXME: This uses a std::map because ActionT can be a pair type including + /// an enum. In particular DenseMapInfo must be defined to use + /// DenseMap here. + /// This is a shared_ptr to allow very quick copy-construction of Automata; this + /// state is immutable after construction so this is safe. + using MapTy = std::map, std::pair>; + std::shared_ptr M; + /// An optional transcription object. This uses much more state than simply + /// traversing the DFA for acceptance, so is heap allocated. + std::shared_ptr Transcriber; + /// The initial DFA state is 1. + uint64_t State = 1; + /// True if we should transcribe and false if not (even if Transcriber is defined). + bool Transcribe; + +public: + /// Create an automaton. + /// \param Transitions The Transitions table as created by TableGen. Note that + /// because the action type differs per automaton, the + /// table type is templated as ArrayRef. + /// \param TranscriptionTable The TransitionInfo table as created by TableGen. + /// + /// Providing the TranscriptionTable argument as non-empty will enable the + /// use of transcription, which analyzes the possible paths in the original + /// NFA taken by the DFA. NOTE: This is substantially more work than simply + /// driving the DFA, so unless you require the getPaths() method leave this + /// empty. + template + Automaton(ArrayRef Transitions, + ArrayRef TranscriptionTable = {}) { + if (!TranscriptionTable.empty()) + Transcriber = + std::make_shared(TranscriptionTable); + Transcribe = Transcriber != nullptr; + M = std::make_shared(); + for (const auto &I : Transitions) + // Greedily read and cache the transition table. + M->emplace(std::make_pair(I.FromDfaState, I.Action), + std::make_pair(I.ToDfaState, I.InfoIdx)); + } + Automaton(const Automaton &) = default; + + /// Reset the automaton to its initial state. + void reset() { + State = 1; + if (Transcriber) + Transcriber->reset(); + } + + /// Enable or disable transcription. Transcription is only available if + /// TranscriptionTable was provided to the constructor. + void enableTranscription(bool Enable = true) { + assert(Transcriber && + "Transcription is only available if TranscriptionTable was provided " + "to the Automaton constructor"); + Transcribe = Enable; + } + + /// Transition the automaton based on input symbol A. Return true if the + /// automaton transitioned to a valid state, false if the automaton + /// transitioned to an invalid state. + /// + /// If this function returns false, all methods are undefined until reset() is + /// called. + bool add(const ActionT &A) { + auto I = M->find({State, A}); + if (I == M->end()) + return false; + if (Transcriber && Transcribe) + Transcriber->transition(I->second.second); + State = I->second.first; + return true; + } + + /// Return true if the automaton can be transitioned based on input symbol A. + bool canAdd(const ActionT &A) { + auto I = M->find({State, A}); + return I != M->end(); + } + + /// Obtain a set of possible paths through the input nondeterministic + /// automaton that could be obtained from the sequence of input actions + /// presented to this deterministic automaton. + ArrayRef getNfaPaths() { + assert(Transcriber && Transcribe && + "Can only obtain NFA paths if transcribing!"); + return Transcriber->getPaths(); + } +}; + +} // namespace llvm + +#endif // LLVM_SUPPORT_AUTOMATON_H diff --git a/include/llvm/Support/BinaryStreamArray.h b/include/llvm/Support/BinaryStreamArray.h index 96d09db69ae..67ba2e4189b 100644 --- a/include/llvm/Support/BinaryStreamArray.h +++ b/include/llvm/Support/BinaryStreamArray.h @@ -286,7 +286,7 @@ public: // an exact multiple of the element size. consumeError(std::move(EC)); } - assert(llvm::alignmentAdjustment(Data.data(), alignof(T)) == 0); + assert(isAddrAligned(Align::Of(), Data.data())); return *reinterpret_cast(Data.data()); } diff --git a/include/llvm/Support/BinaryStreamReader.h b/include/llvm/Support/BinaryStreamReader.h index d8fddde66bf..9e16ce227ff 100644 --- a/include/llvm/Support/BinaryStreamReader.h +++ b/include/llvm/Support/BinaryStreamReader.h @@ -198,7 +198,7 @@ public: if (auto EC = readBytes(Bytes, NumElements * sizeof(T))) return EC; - assert(alignmentAdjustment(Bytes.data(), alignof(T)) == 0 && + assert(isAddrAligned(Align::Of(), Bytes.data()) && "Reading at invalid alignment!"); Array = ArrayRef(reinterpret_cast(Bytes.data()), NumElements); diff --git a/include/llvm/Support/CRC.h b/include/llvm/Support/CRC.h index 6ea8e3edcea..210890ae06d 100644 --- a/include/llvm/Support/CRC.h +++ b/include/llvm/Support/CRC.h @@ -6,20 +6,55 @@ // //===----------------------------------------------------------------------===// // -// This file contains basic functions for calculating Cyclic Redundancy Check -// or CRC. +// This file contains implementations of CRC functions. // //===----------------------------------------------------------------------===// #ifndef LLVM_SUPPORT_CRC_H #define LLVM_SUPPORT_CRC_H -#include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" namespace llvm { -/// zlib independent CRC32 calculation. -uint32_t crc32(uint32_t CRC, StringRef S); +template class ArrayRef; + +// Compute the CRC-32 of Data. +uint32_t crc32(ArrayRef Data); + +// Compute the running CRC-32 of Data, with CRC being the previous value of the +// checksum. +uint32_t crc32(uint32_t CRC, ArrayRef Data); + +// Class for computing the JamCRC. +// +// We will use the "Rocksoft^tm Model CRC Algorithm" to describe the properties +// of this CRC: +// Width : 32 +// Poly : 04C11DB7 +// Init : FFFFFFFF +// RefIn : True +// RefOut : True +// XorOut : 00000000 +// Check : 340BC6D9 (result of CRC for "123456789") +// +// In other words, this is the same as CRC-32, except that XorOut is 0 instead +// of FFFFFFFF. +// +// N.B. We permit flexibility of the "Init" value. Some consumers of this need +// it to be zero. +class JamCRC { +public: + JamCRC(uint32_t Init = 0xFFFFFFFFU) : CRC(Init) {} + + // Update the CRC calculation with Data. + void update(ArrayRef Data); + + uint32_t getCRC() const { return CRC; } + +private: + uint32_t CRC; +}; + } // end namespace llvm #endif diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h index 3cc2c3c0121..63784463e17 100644 --- a/include/llvm/Support/CommandLine.h +++ b/include/llvm/Support/CommandLine.h @@ -2000,6 +2000,9 @@ void ResetAllOptionOccurrences(); /// where no options are supported. void ResetCommandLineParser(); +/// Parses `Arg` into the option handler `Handler`. +bool ProvidePositionalOption(Option *Handler, StringRef Arg, int i); + } // end namespace cl } // end namespace llvm diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h index 3f4f465f396..cb7e57d4cd2 100644 --- a/include/llvm/Support/Compiler.h +++ b/include/llvm/Support/Compiler.h @@ -7,7 +7,8 @@ //===----------------------------------------------------------------------===// // // This file defines several macros, based on the current compiler. This allows -// use of compiler-specific features in a way that remains portable. +// use of compiler-specific features in a way that remains portable. This header +// can be included from either C or C++. // //===----------------------------------------------------------------------===// @@ -16,7 +17,9 @@ #include "llvm/Config/llvm-config.h" +#ifdef __cplusplus #include +#endif #include #if defined(_MSC_VER) @@ -35,14 +38,20 @@ # define __has_attribute(x) 0 #endif -#ifndef __has_cpp_attribute -# define __has_cpp_attribute(x) 0 -#endif - #ifndef __has_builtin # define __has_builtin(x) 0 #endif +// Only use __has_cpp_attribute in C++ mode. GCC defines __has_cpp_attribute in +// C mode, but the :: in __has_cpp_attribute(scoped::attribute) is invalid. +#ifndef LLVM_HAS_CPP_ATTRIBUTE +#if defined(__cplusplus) && defined(__has_cpp_attribute) +# define LLVM_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) +#else +# define LLVM_HAS_CPP_ATTRIBUTE(x) 0 +#endif +#endif + /// \macro LLVM_GNUC_PREREQ /// Extend the default __GNUC_PREREQ even if glibc's features.h isn't /// available. @@ -62,13 +71,21 @@ /// \macro LLVM_MSC_PREREQ /// Is the compiler MSVC of at least the specified version? /// The common \param version values to check for are: -/// * 1900: Microsoft Visual Studio 2015 / 14.0 +/// * 1910: VS2017, version 15.1 & 15.2 +/// * 1911: VS2017, version 15.3 & 15.4 +/// * 1912: VS2017, version 15.5 +/// * 1913: VS2017, version 15.6 +/// * 1914: VS2017, version 15.7 +/// * 1915: VS2017, version 15.8 +/// * 1916: VS2017, version 15.9 +/// * 1920: VS2019, version 16.0 +/// * 1921: VS2019, version 16.1 #ifdef _MSC_VER #define LLVM_MSC_PREREQ(version) (_MSC_VER >= (version)) -// We require at least MSVC 2015. -#if !LLVM_MSC_PREREQ(1900) -#error LLVM requires at least MSVC 2015. +// We require at least MSVC 2017. +#if !LLVM_MSC_PREREQ(1910) +#error LLVM requires at least MSVC 2017. #endif #else @@ -120,14 +137,18 @@ #endif /// LLVM_NODISCARD - Warn if a type or return value is discarded. -#if __cplusplus > 201402L && __has_cpp_attribute(nodiscard) + +// Use the 'nodiscard' attribute in C++17 or newer mode. +#if __cplusplus > 201402L && LLVM_HAS_CPP_ATTRIBUTE(nodiscard) #define LLVM_NODISCARD [[nodiscard]] -#elif !__cplusplus -// Workaround for llvm.org/PR23435, since clang 3.6 and below emit a spurious -// error when __has_cpp_attribute is given a scoped attribute in C mode. -#define LLVM_NODISCARD -#elif __has_cpp_attribute(clang::warn_unused_result) +#elif LLVM_HAS_CPP_ATTRIBUTE(clang::warn_unused_result) #define LLVM_NODISCARD [[clang::warn_unused_result]] +// Clang in C++14 mode claims that it has the 'nodiscard' attribute, but also +// warns in the pedantic mode that 'nodiscard' is a C++17 extension (PR33518). +// Use the 'nodiscard' attribute in C++14 mode only with GCC. +// TODO: remove this workaround when PR33518 is resolved. +#elif defined(__GNUC__) && LLVM_HAS_CPP_ATTRIBUTE(nodiscard) +#define LLVM_NODISCARD [[nodiscard]] #else #define LLVM_NODISCARD #endif @@ -139,7 +160,7 @@ // The clang-tidy check bugprone-use-after-move recognizes this attribute as a // marker that a moved-from object has left the indeterminate state and can be // reused. -#if __has_cpp_attribute(clang::reinitializes) +#if LLVM_HAS_CPP_ATTRIBUTE(clang::reinitializes) #define LLVM_ATTRIBUTE_REINITIALIZES [[clang::reinitializes]] #else #define LLVM_ATTRIBUTE_REINITIALIZES @@ -240,15 +261,13 @@ #endif /// LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements. -#if __cplusplus > 201402L && __has_cpp_attribute(fallthrough) +#if __cplusplus > 201402L && LLVM_HAS_CPP_ATTRIBUTE(fallthrough) #define LLVM_FALLTHROUGH [[fallthrough]] -#elif __has_cpp_attribute(gnu::fallthrough) +#elif LLVM_HAS_CPP_ATTRIBUTE(gnu::fallthrough) #define LLVM_FALLTHROUGH [[gnu::fallthrough]] -#elif !__cplusplus -// Workaround for llvm.org/PR23435, since clang 3.6 and below emit a spurious -// error when __has_cpp_attribute is given a scoped attribute in C mode. -#define LLVM_FALLTHROUGH -#elif __has_cpp_attribute(clang::fallthrough) +#elif __has_attribute(fallthrough) +#define LLVM_FALLTHROUGH __attribute__((fallthrough)) +#elif LLVM_HAS_CPP_ATTRIBUTE(clang::fallthrough) #define LLVM_FALLTHROUGH [[clang::fallthrough]] #else #define LLVM_FALLTHROUGH @@ -256,7 +275,7 @@ /// LLVM_REQUIRE_CONSTANT_INITIALIZATION - Apply this to globals to ensure that /// they are constant initialized. -#if __has_cpp_attribute(clang::require_constant_initialization) +#if LLVM_HAS_CPP_ATTRIBUTE(clang::require_constant_initialization) #define LLVM_REQUIRE_CONSTANT_INITIALIZATION \ [[clang::require_constant_initialization]] #else @@ -338,14 +357,6 @@ # define LLVM_ASSUME_ALIGNED(p, a) (p) #endif -/// \macro LLVM_ALIGNAS -/// Used to specify a minimum alignment for a structure or variable. -#if __GNUC__ && !__has_feature(cxx_alignas) && !LLVM_GNUC_PREREQ(4, 8, 1) -# define LLVM_ALIGNAS(x) __attribute__((aligned(x))) -#else -# define LLVM_ALIGNAS(x) alignas(x) -#endif - /// \macro LLVM_PACKED /// Used to specify a packed structure. /// LLVM_PACKED( @@ -376,8 +387,8 @@ /// \macro LLVM_PTR_SIZE /// A constant integer equivalent to the value of sizeof(void*). -/// Generally used in combination with LLVM_ALIGNAS or when doing computation in -/// the preprocessor. +/// Generally used in combination with alignas or when doing computation in the +/// preprocessor. #ifdef __SIZEOF_POINTER__ # define LLVM_PTR_SIZE __SIZEOF_POINTER__ #elif defined(_WIN64) @@ -527,6 +538,7 @@ void AnnotateIgnoreWritesEnd(const char *file, int line); #define LLVM_ENABLE_EXCEPTIONS 1 #endif +#ifdef __cplusplus namespace llvm { /// Allocate a buffer of memory with the given size and alignment. @@ -569,4 +581,5 @@ inline void deallocate_buffer(void *Ptr, size_t Size, size_t Alignment) { } // End namespace llvm +#endif // __cplusplus #endif diff --git a/include/llvm/Support/DataExtractor.h b/include/llvm/Support/DataExtractor.h index 6b08a2a2a44..f590a1e104f 100644 --- a/include/llvm/Support/DataExtractor.h +++ b/include/llvm/Support/DataExtractor.h @@ -11,6 +11,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/Error.h" namespace llvm { @@ -42,6 +43,38 @@ class DataExtractor { uint8_t IsLittleEndian; uint8_t AddressSize; public: + /// A class representing a position in a DataExtractor, as well as any error + /// encountered during extraction. It enables one to extract a sequence of + /// values without error-checking and then checking for errors in bulk at the + /// end. The class holds an Error object, so failing to check the result of + /// the parse will result in a runtime error. The error flag is sticky and + /// will cause all subsequent extraction functions to fail without even + /// attempting to parse and without updating the Cursor offset. After clearing + /// the error flag, one can again use the Cursor object for parsing. + class Cursor { + uint64_t Offset; + Error Err; + + friend class DataExtractor; + + public: + /// Construct a cursor for extraction from the given offset. + explicit Cursor(uint64_t Offset) : Offset(Offset), Err(Error::success()) {} + + /// Checks whether the cursor is valid (i.e. no errors were encountered). In + /// case of errors, this does not clear the error flag -- one must call + /// takeError() instead. + explicit operator bool() { return !Err; } + + /// Return the current position of this Cursor. In the error state this is + /// the position of the Cursor before the first error was encountered. + uint64_t tell() const { return Offset; } + + /// Return error contained inside this Cursor, if any. Clears the internal + /// Cursor state. + Error takeError() { return std::move(Err); } + }; + /// Construct with a buffer that is owned by the caller. /// /// This constructor allows us to use data that is owned by the @@ -49,6 +82,11 @@ public: /// valid. DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize) : Data(Data), IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {} + DataExtractor(ArrayRef Data, bool IsLittleEndian, + uint8_t AddressSize) + : Data(StringRef(reinterpret_cast(Data.data()), + Data.size())), + IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {} /// Get the data pointed to by this extractor. StringRef getData() const { return Data; } @@ -79,17 +117,17 @@ public: /// pointed to by \a offset_ptr is out of bounds, or if the /// offset plus the length of the C string is out of bounds, /// NULL will be returned. - const char *getCStr(uint32_t *offset_ptr) const; + const char *getCStr(uint64_t *offset_ptr) const; - /// Extract a C string from \a *OffsetPtr. + /// Extract a C string from \a *offset_ptr. /// /// Returns a StringRef for the C String from the data at the offset - /// pointed to by \a OffsetPtr. A variable length NULL terminated C - /// string will be extracted and the \a OffsetPtr will be + /// pointed to by \a offset_ptr. A variable length NULL terminated C + /// string will be extracted and the \a offset_ptr will be /// updated with the offset of the byte that follows the NULL /// terminator byte. /// - /// \param[in,out] OffsetPtr + /// \param[in,out] offset_ptr /// A pointer to an offset within the data that will be advanced /// by the appropriate number of bytes if the value is extracted /// correctly. If the offset is out of bounds or there are not @@ -98,10 +136,10 @@ public: /// /// \return /// A StringRef for the C string value in the data. If the offset - /// pointed to by \a OffsetPtr is out of bounds, or if the + /// pointed to by \a offset_ptr is out of bounds, or if the /// offset plus the length of the C string is out of bounds, /// a default-initialized StringRef will be returned. - StringRef getCStrRef(uint32_t *OffsetPtr) const; + StringRef getCStrRef(uint64_t *offset_ptr) const; /// Extract an unsigned integer of size \a byte_size from \a /// *offset_ptr. @@ -124,10 +162,24 @@ public: /// @param[in] byte_size /// The size in byte of the integer to extract. /// + /// @param[in,out] Err + /// A pointer to an Error object. Upon return the Error object is set to + /// indicate the result (success/failure) of the function. If the Error + /// object is already set when calling this function, no extraction is + /// performed. + /// /// @return /// The unsigned integer value that was extracted, or zero on /// failure. - uint64_t getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const; + uint64_t getUnsigned(uint64_t *offset_ptr, uint32_t byte_size, + Error *Err = nullptr) const; + + /// Extract an unsigned integer of the given size from the location given by + /// the cursor. In case of an extraction error, or if the cursor is already in + /// an error state, zero is returned. + uint64_t getUnsigned(Cursor &C, uint32_t Size) const { + return getUnsigned(&C.Offset, Size, &C.Err); + } /// Extract an signed integer of size \a byte_size from \a *offset_ptr. /// @@ -152,7 +204,7 @@ public: /// @return /// The sign extended signed integer value that was extracted, /// or zero on failure. - int64_t getSigned(uint32_t *offset_ptr, uint32_t size) const; + int64_t getSigned(uint64_t *offset_ptr, uint32_t size) const; //------------------------------------------------------------------ /// Extract an pointer from \a *offset_ptr. @@ -171,10 +223,15 @@ public: /// /// @return /// The extracted pointer value as a 64 integer. - uint64_t getAddress(uint32_t *offset_ptr) const { + uint64_t getAddress(uint64_t *offset_ptr) const { return getUnsigned(offset_ptr, AddressSize); } + /// Extract a pointer-sized unsigned integer from the location given by the + /// cursor. In case of an extraction error, or if the cursor is already in + /// an error state, zero is returned. + uint64_t getAddress(Cursor &C) const { return getUnsigned(C, AddressSize); } + /// Extract a uint8_t value from \a *offset_ptr. /// /// Extract a single uint8_t from the binary data at the offset @@ -187,9 +244,20 @@ public: /// enough bytes to extract this value, the offset will be left /// unmodified. /// + /// @param[in,out] Err + /// A pointer to an Error object. Upon return the Error object is set to + /// indicate the result (success/failure) of the function. If the Error + /// object is already set when calling this function, no extraction is + /// performed. + /// /// @return /// The extracted uint8_t value. - uint8_t getU8(uint32_t *offset_ptr) const; + uint8_t getU8(uint64_t *offset_ptr, Error *Err = nullptr) const; + + /// Extract a single uint8_t value from the location given by the cursor. In + /// case of an extraction error, or if the cursor is already in an error + /// state, zero is returned. + uint8_t getU8(Cursor &C) const { return getU8(&C.Offset, &C.Err); } /// Extract \a count uint8_t values from \a *offset_ptr. /// @@ -214,7 +282,27 @@ public: /// @return /// \a dst if all values were properly extracted and copied, /// NULL otherise. - uint8_t *getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const; + uint8_t *getU8(uint64_t *offset_ptr, uint8_t *dst, uint32_t count) const; + + /// Extract \a Count uint8_t values from the location given by the cursor and + /// store them into the destination buffer. In case of an extraction error, or + /// if the cursor is already in an error state, a nullptr is returned and the + /// destination buffer is left unchanged. + uint8_t *getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const; + + /// Extract \a Count uint8_t values from the location given by the cursor and + /// store them into the destination vector. The vector is resized to fit the + /// extracted data. In case of an extraction error, or if the cursor is + /// already in an error state, the destination vector is left unchanged and + /// cursor is placed into an error state. + void getU8(Cursor &C, SmallVectorImpl &Dst, uint32_t Count) const { + if (isValidOffsetForDataOfSize(C.Offset, Count)) + Dst.resize(Count); + + // This relies on the fact that getU8 will not attempt to write to the + // buffer if isValidOffsetForDataOfSize(C.Offset, Count) is false. + getU8(C, Dst.data(), Count); + } //------------------------------------------------------------------ /// Extract a uint16_t value from \a *offset_ptr. @@ -229,10 +317,21 @@ public: /// enough bytes to extract this value, the offset will be left /// unmodified. /// + /// @param[in,out] Err + /// A pointer to an Error object. Upon return the Error object is set to + /// indicate the result (success/failure) of the function. If the Error + /// object is already set when calling this function, no extraction is + /// performed. + /// /// @return /// The extracted uint16_t value. //------------------------------------------------------------------ - uint16_t getU16(uint32_t *offset_ptr) const; + uint16_t getU16(uint64_t *offset_ptr, Error *Err = nullptr) const; + + /// Extract a single uint16_t value from the location given by the cursor. In + /// case of an extraction error, or if the cursor is already in an error + /// state, zero is returned. + uint16_t getU16(Cursor &C) const { return getU16(&C.Offset, &C.Err); } /// Extract \a count uint16_t values from \a *offset_ptr. /// @@ -257,7 +356,7 @@ public: /// @return /// \a dst if all values were properly extracted and copied, /// NULL otherise. - uint16_t *getU16(uint32_t *offset_ptr, uint16_t *dst, uint32_t count) const; + uint16_t *getU16(uint64_t *offset_ptr, uint16_t *dst, uint32_t count) const; /// Extract a 24-bit unsigned value from \a *offset_ptr and return it /// in a uint32_t. @@ -274,7 +373,7 @@ public: /// /// @return /// The extracted 24-bit value represented in a uint32_t. - uint32_t getU24(uint32_t *offset_ptr) const; + uint32_t getU24(uint64_t *offset_ptr) const; /// Extract a uint32_t value from \a *offset_ptr. /// @@ -288,9 +387,20 @@ public: /// enough bytes to extract this value, the offset will be left /// unmodified. /// + /// @param[in,out] Err + /// A pointer to an Error object. Upon return the Error object is set to + /// indicate the result (success/failure) of the function. If the Error + /// object is already set when calling this function, no extraction is + /// performed. + /// /// @return /// The extracted uint32_t value. - uint32_t getU32(uint32_t *offset_ptr) const; + uint32_t getU32(uint64_t *offset_ptr, Error *Err = nullptr) const; + + /// Extract a single uint32_t value from the location given by the cursor. In + /// case of an extraction error, or if the cursor is already in an error + /// state, zero is returned. + uint32_t getU32(Cursor &C) const { return getU32(&C.Offset, &C.Err); } /// Extract \a count uint32_t values from \a *offset_ptr. /// @@ -315,7 +425,7 @@ public: /// @return /// \a dst if all values were properly extracted and copied, /// NULL otherise. - uint32_t *getU32(uint32_t *offset_ptr, uint32_t *dst, uint32_t count) const; + uint32_t *getU32(uint64_t *offset_ptr, uint32_t *dst, uint32_t count) const; /// Extract a uint64_t value from \a *offset_ptr. /// @@ -329,9 +439,20 @@ public: /// enough bytes to extract this value, the offset will be left /// unmodified. /// + /// @param[in,out] Err + /// A pointer to an Error object. Upon return the Error object is set to + /// indicate the result (success/failure) of the function. If the Error + /// object is already set when calling this function, no extraction is + /// performed. + /// /// @return /// The extracted uint64_t value. - uint64_t getU64(uint32_t *offset_ptr) const; + uint64_t getU64(uint64_t *offset_ptr, Error *Err = nullptr) const; + + /// Extract a single uint64_t value from the location given by the cursor. In + /// case of an extraction error, or if the cursor is already in an error + /// state, zero is returned. + uint64_t getU64(Cursor &C) const { return getU64(&C.Offset, &C.Err); } /// Extract \a count uint64_t values from \a *offset_ptr. /// @@ -356,7 +477,7 @@ public: /// @return /// \a dst if all values were properly extracted and copied, /// NULL otherise. - uint64_t *getU64(uint32_t *offset_ptr, uint64_t *dst, uint32_t count) const; + uint64_t *getU64(uint64_t *offset_ptr, uint64_t *dst, uint32_t count) const; /// Extract a signed LEB128 value from \a *offset_ptr. /// @@ -374,7 +495,7 @@ public: /// /// @return /// The extracted signed integer value. - int64_t getSLEB128(uint32_t *offset_ptr) const; + int64_t getSLEB128(uint64_t *offset_ptr) const; /// Extract a unsigned LEB128 value from \a *offset_ptr. /// @@ -390,23 +511,44 @@ public: /// enough bytes to extract this value, the offset will be left /// unmodified. /// + /// @param[in,out] Err + /// A pointer to an Error object. Upon return the Error object is set to + /// indicate the result (success/failure) of the function. If the Error + /// object is already set when calling this function, no extraction is + /// performed. + /// /// @return /// The extracted unsigned integer value. - uint64_t getULEB128(uint32_t *offset_ptr) const; + uint64_t getULEB128(uint64_t *offset_ptr, llvm::Error *Err = nullptr) const; + + /// Extract an unsigned ULEB128 value from the location given by the cursor. + /// In case of an extraction error, or if the cursor is already in an error + /// state, zero is returned. + uint64_t getULEB128(Cursor &C) const { return getULEB128(&C.Offset, &C.Err); } + + /// Advance the Cursor position by the given number of bytes. No-op if the + /// cursor is in an error state. + void skip(Cursor &C, uint64_t Length) const; + + /// Return true iff the cursor is at the end of the buffer, regardless of the + /// error state of the cursor. The only way both eof and error states can be + /// true is if one attempts a read while the cursor is at the very end of the + /// data buffer. + bool eof(const Cursor &C) const { return Data.size() == C.Offset; } /// Test the validity of \a offset. /// /// @return /// \b true if \a offset is a valid offset into the data in this /// object, \b false otherwise. - bool isValidOffset(uint32_t offset) const { return Data.size() > offset; } + bool isValidOffset(uint64_t offset) const { return Data.size() > offset; } /// Test the availability of \a length bytes of data from \a offset. /// /// @return /// \b true if \a offset is a valid offset and there are \a /// length bytes available at that offset, \b false otherwise. - bool isValidOffsetForDataOfSize(uint32_t offset, uint32_t length) const { + bool isValidOffsetForDataOfSize(uint64_t offset, uint64_t length) const { return offset + length >= offset && isValidOffset(offset + length - 1); } @@ -417,9 +559,15 @@ public: /// \b true if \a offset is a valid offset and there are enough /// bytes for a pointer available at that offset, \b false /// otherwise. - bool isValidOffsetForAddress(uint32_t offset) const { + bool isValidOffsetForAddress(uint64_t offset) const { return isValidOffsetForDataOfSize(offset, AddressSize); } + +protected: + // Make it possible for subclasses to access these fields without making them + // public. + static uint64_t &getOffset(Cursor &C) { return C.Offset; } + static Error &getError(Cursor &C) { return C.Err; } }; } // namespace llvm diff --git a/include/llvm/Support/Endian.h b/include/llvm/Support/Endian.h index d8be94427d7..87aecedd3a4 100644 --- a/include/llvm/Support/Endian.h +++ b/include/llvm/Support/Endian.h @@ -203,9 +203,8 @@ inline void writeAtBitAlignment(void *memory, value_type value, namespace detail { -template +template ::value> struct packed_endian_specific_integral { using value_type = ValueType; static constexpr endianness endian = Endian; @@ -246,8 +245,9 @@ struct packed_endian_specific_integral { } private: - AlignedCharArray::value, - sizeof(value_type)> Value; + struct { + alignas(ALIGN) char buffer[sizeof(value_type)]; + } Value; public: struct ref { diff --git a/include/llvm/Support/Error.h b/include/llvm/Support/Error.h index 299fce7a136..350877a219b 100644 --- a/include/llvm/Support/Error.h +++ b/include/llvm/Support/Error.h @@ -328,7 +328,7 @@ inline ErrorSuccess Error::success() { return ErrorSuccess(); } /// Make a Error instance representing failure using the given error info /// type. template Error make_error(ArgTs &&... Args) { - return Error(llvm::make_unique(std::forward(Args)...)); + return Error(std::make_unique(std::forward(Args)...)); } /// Base class for user error types. Users should declare their error types @@ -548,7 +548,7 @@ public: /// Take ownership of the stored error. /// After calling this the Expected is in an indeterminate state that can /// only be safely destructed. No further calls (beside the destructor) should - /// be made on the Expected vaule. + /// be made on the Expected value. Error takeError() { #if LLVM_ENABLE_ABI_BREAKING_CHECKS Unchecked = false; @@ -704,6 +704,12 @@ inline void cantFail(Error Err, const char *Msg = nullptr) { if (Err) { if (!Msg) Msg = "Failure value returned from cantFail wrapped call"; +#ifndef NDEBUG + std::string Str; + raw_string_ostream OS(Str); + OS << Msg << "\n" << Err; + Msg = OS.str().c_str(); +#endif llvm_unreachable(Msg); } } @@ -728,6 +734,13 @@ T cantFail(Expected ValOrErr, const char *Msg = nullptr) { else { if (!Msg) Msg = "Failure value returned from cantFail wrapped call"; +#ifndef NDEBUG + std::string Str; + raw_string_ostream OS(Str); + auto E = ValOrErr.takeError(); + OS << Msg << "\n" << E; + Msg = OS.str().c_str(); +#endif llvm_unreachable(Msg); } } @@ -752,6 +765,13 @@ T& cantFail(Expected ValOrErr, const char *Msg = nullptr) { else { if (!Msg) Msg = "Failure value returned from cantFail wrapped call"; +#ifndef NDEBUG + std::string Str; + raw_string_ostream OS(Str); + auto E = ValOrErr.takeError(); + OS << Msg << "\n" << E; + Msg = OS.str().c_str(); +#endif llvm_unreachable(Msg); } } @@ -982,6 +1002,20 @@ inline void consumeError(Error Err) { handleAllErrors(std::move(Err), [](const ErrorInfoBase &) {}); } +/// Convert an Expected to an Optional without doing anything. This method +/// should be used only where an error can be considered a reasonable and +/// expected return value. +/// +/// Uses of this method are potentially indicative of problems: perhaps the +/// error should be propagated further, or the error-producer should just +/// return an Optional in the first place. +template Optional expectedToOptional(Expected &&E) { + if (E) + return std::move(*E); + consumeError(E.takeError()); + return None; +} + /// Helper for converting an Error to a bool. /// /// This method returns true if Err is in an error state, or false if it is @@ -1170,6 +1204,10 @@ inline Error createStringError(std::error_code EC, char const *Fmt, Error createStringError(std::error_code EC, char const *Msg); +inline Error createStringError(std::error_code EC, const Twine &S) { + return createStringError(EC, S.str().c_str()); +} + template inline Error createStringError(std::errc EC, char const *Fmt, const Ts &... Vals) { diff --git a/include/llvm/Support/FileCheck.h b/include/llvm/Support/FileCheck.h index 0cd25a71a3b..2547449246a 100644 --- a/include/llvm/Support/FileCheck.h +++ b/include/llvm/Support/FileCheck.h @@ -13,12 +13,12 @@ #ifndef LLVM_SUPPORT_FILECHECK_H #define LLVM_SUPPORT_FILECHECK_H -#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Regex.h" #include "llvm/Support/SourceMgr.h" +#include #include -#include namespace llvm { @@ -30,6 +30,7 @@ struct FileCheckRequest { std::vector GlobalDefines; bool AllowEmptyInput = false; bool MatchFullLines = false; + bool IgnoreCase = false; bool EnableVarScope = false; bool AllowDeprecatedDagOverlap = false; bool Verbose = false; @@ -37,217 +38,7 @@ struct FileCheckRequest { }; //===----------------------------------------------------------------------===// -// Numeric substitution handling code. -//===----------------------------------------------------------------------===// - -/// Base class representing the AST of a given expression. -class FileCheckExpressionAST { -public: - virtual ~FileCheckExpressionAST() = default; - - /// Evaluates and \returns the value of the expression represented by this - /// AST or an error if evaluation fails. - virtual Expected eval() const = 0; -}; - -/// Class representing an unsigned literal in the AST of an expression. -class FileCheckExpressionLiteral : public FileCheckExpressionAST { -private: - /// Actual value of the literal. - uint64_t Value; - -public: - /// Constructs a literal with the specified value. - FileCheckExpressionLiteral(uint64_t Val) : Value(Val) {} - - /// \returns the literal's value. - Expected eval() const { return Value; } -}; - -/// Class to represent an undefined variable error, which quotes that -/// variable's name when printed. -class FileCheckUndefVarError : public ErrorInfo { -private: - StringRef VarName; - -public: - static char ID; - - FileCheckUndefVarError(StringRef VarName) : VarName(VarName) {} - - StringRef getVarName() const { return VarName; } - - std::error_code convertToErrorCode() const override { - return inconvertibleErrorCode(); - } - - /// Print name of variable associated with this error. - void log(raw_ostream &OS) const override { - OS << "\""; - OS.write_escaped(VarName) << "\""; - } -}; - -/// Class representing a numeric variable and its associated current value. -class FileCheckNumericVariable { -private: - /// Name of the numeric variable. - StringRef Name; - - /// Value of numeric variable, if defined, or None otherwise. - Optional Value; - - /// Line number where this variable is defined, or None if defined before - /// input is parsed. Used to determine whether a variable is defined on the - /// same line as a given use. - Optional DefLineNumber; - -public: - /// Constructor for a variable \p Name defined at line \p DefLineNumber or - /// defined before input is parsed if DefLineNumber is None. - FileCheckNumericVariable(StringRef Name, - Optional DefLineNumber = None) - : Name(Name), DefLineNumber(DefLineNumber) {} - - /// \returns name of this numeric variable. - StringRef getName() const { return Name; } - - /// \returns this variable's value. - Optional getValue() const { return Value; } - - /// Sets value of this numeric variable, if undefined. Triggers an assertion - /// failure if the variable is actually defined. - void setValue(uint64_t Value); - - /// Clears value of this numeric variable, regardless of whether it is - /// currently defined or not. - void clearValue(); - - /// \returns the line number where this variable is defined, if any, or None - /// if defined before input is parsed. - Optional getDefLineNumber() { return DefLineNumber; } -}; - -/// Class representing the use of a numeric variable in the AST of an -/// expression. -class FileCheckNumericVariableUse : public FileCheckExpressionAST { -private: - /// Name of the numeric variable. - StringRef Name; - - /// Pointer to the class instance for the variable this use is about. - FileCheckNumericVariable *NumericVariable; - -public: - FileCheckNumericVariableUse(StringRef Name, - FileCheckNumericVariable *NumericVariable) - : Name(Name), NumericVariable(NumericVariable) {} - - /// \returns the value of the variable referenced by this instance. - Expected eval() const; -}; - -/// Type of functions evaluating a given binary operation. -using binop_eval_t = uint64_t (*)(uint64_t, uint64_t); - -/// Class representing a single binary operation in the AST of an expression. -class FileCheckASTBinop : public FileCheckExpressionAST { -private: - /// Left operand. - std::unique_ptr LeftOperand; - - /// Right operand. - std::unique_ptr RightOperand; - - /// Pointer to function that can evaluate this binary operation. - binop_eval_t EvalBinop; - -public: - FileCheckASTBinop(binop_eval_t EvalBinop, - std::unique_ptr LeftOp, - std::unique_ptr RightOp) - : EvalBinop(EvalBinop) { - LeftOperand = std::move(LeftOp); - RightOperand = std::move(RightOp); - } - - /// Evaluates the value of the binary operation represented by this AST, - /// using EvalBinop on the result of recursively evaluating the operands. - /// \returns the expression value or an error if an undefined numeric - /// variable is used in one of the operands. - Expected eval() const; -}; - -class FileCheckPatternContext; - -/// Class representing a substitution to perform in the RegExStr string. -class FileCheckSubstitution { -protected: - /// Pointer to a class instance holding, among other things, the table with - /// the values of live string variables at the start of any given CHECK line. - /// Used for substituting string variables with the text they were defined - /// as. Expressions are linked to the numeric variables they use at - /// parse time and directly access the value of the numeric variable to - /// evaluate their value. - FileCheckPatternContext *Context; - - /// The string that needs to be substituted for something else. For a - /// string variable this is its name, otherwise this is the whole expression. - StringRef FromStr; - - // Index in RegExStr of where to do the substitution. - size_t InsertIdx; - -public: - FileCheckSubstitution(FileCheckPatternContext *Context, StringRef VarName, - size_t InsertIdx) - : Context(Context), FromStr(VarName), InsertIdx(InsertIdx) {} - - virtual ~FileCheckSubstitution() = default; - - /// \returns the string to be substituted for something else. - StringRef getFromString() const { return FromStr; } - - /// \returns the index where the substitution is to be performed in RegExStr. - size_t getIndex() const { return InsertIdx; } - - /// \returns a string containing the result of the substitution represented - /// by this class instance or an error if substitution failed. - virtual Expected getResult() const = 0; -}; - -class FileCheckStringSubstitution : public FileCheckSubstitution { -public: - FileCheckStringSubstitution(FileCheckPatternContext *Context, - StringRef VarName, size_t InsertIdx) - : FileCheckSubstitution(Context, VarName, InsertIdx) {} - - /// \returns the text that the string variable in this substitution matched - /// when defined, or an error if the variable is undefined. - Expected getResult() const override; -}; - -class FileCheckNumericSubstitution : public FileCheckSubstitution { -private: - /// Pointer to the class representing the expression whose value is to be - /// substituted. - std::unique_ptr ExpressionAST; - -public: - FileCheckNumericSubstitution(FileCheckPatternContext *Context, StringRef Expr, - std::unique_ptr ExprAST, - size_t InsertIdx) - : FileCheckSubstitution(Context, Expr, InsertIdx) { - ExpressionAST = std::move(ExprAST); - } - - /// \returns a string containing the result of evaluating the expression in - /// this substitution, or an error if evaluation failed. - Expected getResult() const override; -}; - -//===----------------------------------------------------------------------===// -// Pattern handling code. +// Summary of a FileCheck diagnostic. //===----------------------------------------------------------------------===// namespace Check { @@ -291,325 +82,6 @@ public: }; } // namespace Check -struct FileCheckDiag; - -/// Class holding the FileCheckPattern global state, shared by all patterns: -/// tables holding values of variables and whether they are defined or not at -/// any given time in the matching process. -class FileCheckPatternContext { - friend class FileCheckPattern; - -private: - /// When matching a given pattern, this holds the value of all the string - /// variables defined in previous patterns. In a pattern, only the last - /// definition for a given variable is recorded in this table. - /// Back-references are used for uses after any the other definition. - StringMap GlobalVariableTable; - - /// Map of all string variables defined so far. Used at parse time to detect - /// a name conflict between a numeric variable and a string variable when - /// the former is defined on a later line than the latter. - StringMap DefinedVariableTable; - - /// When matching a given pattern, this holds the pointers to the classes - /// representing the numeric variables defined in previous patterns. When - /// matching a pattern all definitions for that pattern are recorded in the - /// NumericVariableDefs table in the FileCheckPattern instance of that - /// pattern. - StringMap GlobalNumericVariableTable; - - /// Pointer to the class instance representing the @LINE pseudo variable for - /// easily updating its value. - FileCheckNumericVariable *LineVariable = nullptr; - - /// Vector holding pointers to all parsed numeric variables. Used to - /// automatically free them once they are guaranteed to no longer be used. - std::vector> NumericVariables; - - /// Vector holding pointers to all substitutions. Used to automatically free - /// them once they are guaranteed to no longer be used. - std::vector> Substitutions; - -public: - /// \returns the value of string variable \p VarName or an error if no such - /// variable has been defined. - Expected getPatternVarValue(StringRef VarName); - - /// Defines string and numeric variables from definitions given on the - /// command line, passed as a vector of [#]VAR=VAL strings in - /// \p CmdlineDefines. \returns an error list containing diagnostics against - /// \p SM for all definition parsing failures, if any, or Success otherwise. - Error defineCmdlineVariables(std::vector &CmdlineDefines, - SourceMgr &SM); - - /// Create @LINE pseudo variable. Value is set when pattern are being - /// matched. - void createLineVariable(); - - /// Undefines local variables (variables whose name does not start with a '$' - /// sign), i.e. removes them from GlobalVariableTable and from - /// GlobalNumericVariableTable and also clears the value of numeric - /// variables. - void clearLocalVars(); - -private: - /// Makes a new numeric variable and registers it for destruction when the - /// context is destroyed. - template - FileCheckNumericVariable *makeNumericVariable(Types... args); - - /// Makes a new string substitution and registers it for destruction when the - /// context is destroyed. - FileCheckSubstitution *makeStringSubstitution(StringRef VarName, - size_t InsertIdx); - - /// Makes a new numeric substitution and registers it for destruction when - /// the context is destroyed. - FileCheckSubstitution * - makeNumericSubstitution(StringRef ExpressionStr, - std::unique_ptr ExpressionAST, - size_t InsertIdx); -}; - -/// Class to represent an error holding a diagnostic with location information -/// used when printing it. -class FileCheckErrorDiagnostic : public ErrorInfo { -private: - SMDiagnostic Diagnostic; - -public: - static char ID; - - FileCheckErrorDiagnostic(SMDiagnostic &&Diag) : Diagnostic(Diag) {} - - std::error_code convertToErrorCode() const override { - return inconvertibleErrorCode(); - } - - /// Print diagnostic associated with this error when printing the error. - void log(raw_ostream &OS) const override { Diagnostic.print(nullptr, OS); } - - static Error get(const SourceMgr &SM, SMLoc Loc, const Twine &ErrMsg) { - return make_error( - SM.GetMessage(Loc, SourceMgr::DK_Error, ErrMsg)); - } - - static Error get(const SourceMgr &SM, StringRef Buffer, const Twine &ErrMsg) { - return get(SM, SMLoc::getFromPointer(Buffer.data()), ErrMsg); - } -}; - -class FileCheckNotFoundError : public ErrorInfo { -public: - static char ID; - - std::error_code convertToErrorCode() const override { - return inconvertibleErrorCode(); - } - - /// Print diagnostic associated with this error when printing the error. - void log(raw_ostream &OS) const override { - OS << "String not found in input"; - } -}; - -class FileCheckPattern { - SMLoc PatternLoc; - - /// A fixed string to match as the pattern or empty if this pattern requires - /// a regex match. - StringRef FixedStr; - - /// A regex string to match as the pattern or empty if this pattern requires - /// a fixed string to match. - std::string RegExStr; - - /// Entries in this vector represent a substitution of a string variable or - /// an expression in the RegExStr regex at match time. For example, in the - /// case of a CHECK directive with the pattern "foo[[bar]]baz[[#N+1]]", - /// RegExStr will contain "foobaz" and we'll get two entries in this vector - /// that tells us to insert the value of string variable "bar" at offset 3 - /// and the value of expression "N+1" at offset 6. - std::vector Substitutions; - - /// Maps names of string variables defined in a pattern to the number of - /// their parenthesis group in RegExStr capturing their last definition. - /// - /// E.g. for the pattern "foo[[bar:.*]]baz([[bar]][[QUUX]][[bar:.*]])", - /// RegExStr will be "foo(.*)baz(\1(.*))" where is - /// the value captured for QUUX on the earlier line where it was defined, and - /// VariableDefs will map "bar" to the third parenthesis group which captures - /// the second definition of "bar". - /// - /// Note: uses std::map rather than StringMap to be able to get the key when - /// iterating over values. - std::map VariableDefs; - - /// Structure representing the definition of a numeric variable in a pattern. - /// It holds the pointer to the class representing the numeric variable whose - /// value is being defined and the number of the parenthesis group in - /// RegExStr to capture that value. - struct FileCheckNumericVariableMatch { - /// Pointer to class representing the numeric variable whose value is being - /// defined. - FileCheckNumericVariable *DefinedNumericVariable; - - /// Number of the parenthesis group in RegExStr that captures the value of - /// this numeric variable definition. - unsigned CaptureParenGroup; - }; - - /// Holds the number of the parenthesis group in RegExStr and pointer to the - /// corresponding FileCheckNumericVariable class instance of all numeric - /// variable definitions. Used to set the matched value of all those - /// variables. - StringMap NumericVariableDefs; - - /// Pointer to a class instance holding the global state shared by all - /// patterns: - /// - separate tables with the values of live string and numeric variables - /// respectively at the start of any given CHECK line; - /// - table holding whether a string variable has been defined at any given - /// point during the parsing phase. - FileCheckPatternContext *Context; - - Check::FileCheckType CheckTy; - - /// Line number for this CHECK pattern or None if it is an implicit pattern. - /// Used to determine whether a variable definition is made on an earlier - /// line to the one with this CHECK. - Optional LineNumber; - -public: - FileCheckPattern(Check::FileCheckType Ty, FileCheckPatternContext *Context, - Optional Line = None) - : Context(Context), CheckTy(Ty), LineNumber(Line) {} - - /// \returns the location in source code. - SMLoc getLoc() const { return PatternLoc; } - - /// \returns the pointer to the global state for all patterns in this - /// FileCheck instance. - FileCheckPatternContext *getContext() const { return Context; } - - /// \returns whether \p C is a valid first character for a variable name. - static bool isValidVarNameStart(char C); - - /// Parsing information about a variable. - struct VariableProperties { - StringRef Name; - bool IsPseudo; - }; - - /// Parses the string at the start of \p Str for a variable name. \returns - /// a VariableProperties structure holding the variable name and whether it - /// is the name of a pseudo variable, or an error holding a diagnostic - /// against \p SM if parsing fail. If parsing was successful, also strips - /// \p Str from the variable name. - static Expected parseVariable(StringRef &Str, - const SourceMgr &SM); - /// Parses \p Expr for the name of a numeric variable to be defined at line - /// \p LineNumber or before input is parsed if \p LineNumber is None. - /// \returns a pointer to the class instance representing that variable, - /// creating it if needed, or an error holding a diagnostic against \p SM - /// should defining such a variable be invalid. - static Expected parseNumericVariableDefinition( - StringRef &Expr, FileCheckPatternContext *Context, - Optional LineNumber, const SourceMgr &SM); - /// Parses \p Expr for a numeric substitution block. Parameter - /// \p IsLegacyLineExpr indicates whether \p Expr should be a legacy @LINE - /// expression. \returns a pointer to the class instance representing the AST - /// of the expression whose value must be substituted, or an error holding a - /// diagnostic against \p SM if parsing fails. If substitution was - /// successful, sets \p DefinedNumericVariable to point to the class - /// representing the numeric variable being defined in this numeric - /// substitution block, or None if this block does not define any variable. - Expected> - parseNumericSubstitutionBlock( - StringRef Expr, - Optional &DefinedNumericVariable, - bool IsLegacyLineExpr, const SourceMgr &SM) const; - /// Parses the pattern in \p PatternStr and initializes this FileCheckPattern - /// instance accordingly. - /// - /// \p Prefix provides which prefix is being matched, \p Req describes the - /// global options that influence the parsing such as whitespace - /// canonicalization, \p SM provides the SourceMgr used for error reports. - /// \returns true in case of an error, false otherwise. - bool parsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, - const FileCheckRequest &Req); - /// Matches the pattern string against the input buffer \p Buffer - /// - /// \returns the position that is matched or an error indicating why matching - /// failed. If there is a match, updates \p MatchLen with the size of the - /// matched string. - /// - /// The GlobalVariableTable StringMap in the FileCheckPatternContext class - /// instance provides the current values of FileCheck string variables and - /// is updated if this match defines new values. Likewise, the - /// GlobalNumericVariableTable StringMap in the same class provides the - /// current values of FileCheck numeric variables and is updated if this - /// match defines new numeric values. - Expected match(StringRef Buffer, size_t &MatchLen, - const SourceMgr &SM) const; - /// Prints the value of successful substitutions or the name of the undefined - /// string or numeric variables preventing a successful substitution. - void printSubstitutions(const SourceMgr &SM, StringRef Buffer, - SMRange MatchRange = None) const; - void printFuzzyMatch(const SourceMgr &SM, StringRef Buffer, - std::vector *Diags) const; - - bool hasVariable() const { - return !(Substitutions.empty() && VariableDefs.empty()); - } - - Check::FileCheckType getCheckTy() const { return CheckTy; } - - int getCount() const { return CheckTy.getCount(); } - -private: - bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); - void AddBackrefToRegEx(unsigned BackrefNum); - /// Computes an arbitrary estimate for the quality of matching this pattern - /// at the start of \p Buffer; a distance of zero should correspond to a - /// perfect match. - unsigned computeMatchDistance(StringRef Buffer) const; - /// Finds the closing sequence of a regex variable usage or definition. - /// - /// \p Str has to point in the beginning of the definition (right after the - /// opening sequence). \p SM holds the SourceMgr used for error repporting. - /// \returns the offset of the closing sequence within Str, or npos if it - /// was not found. - size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); - - /// Parses \p Name as a (pseudo if \p IsPseudo is true) numeric variable use. - /// \returns the pointer to the class instance representing that variable if - /// successful, or an error holding a diagnostic against \p SM otherwise. - Expected> - parseNumericVariableUse(StringRef Name, bool IsPseudo, - const SourceMgr &SM) const; - enum class AllowedOperand { LineVar, Literal, Any }; - /// Parses \p Expr for use of a numeric operand. Accepts both literal values - /// and numeric variables, depending on the value of \p AO. \returns the - /// class representing that operand in the AST of the expression or an error - /// holding a diagnostic against \p SM otherwise. - Expected> - parseNumericOperand(StringRef &Expr, AllowedOperand AO, - const SourceMgr &SM) const; - /// Parses \p Expr for a binary operation. The left operand of this binary - /// operation is given in \p LeftOp and \p IsLegacyLineExpr indicates whether - /// we are parsing a legacy @LINE expression. \returns the class representing - /// the binary operation in the AST of the expression, or an error holding a - /// diagnostic against \p SM otherwise. - Expected> - parseBinop(StringRef &Expr, std::unique_ptr LeftOp, - bool IsLegacyLineExpr, const SourceMgr &SM) const; -}; - -//===----------------------------------------------------------------------===// -/// Summary of a FileCheck diagnostic. -//===----------------------------------------------------------------------===// - struct FileCheckDiag { /// What is the FileCheck directive for this diagnostic? Check::FileCheckType CheckTy; @@ -659,61 +131,20 @@ struct FileCheckDiag { SMLoc CheckLoc, MatchType MatchTy, SMRange InputRange); }; -//===----------------------------------------------------------------------===// -// Check Strings. -//===----------------------------------------------------------------------===// - -/// A check that we found in the input file. -struct FileCheckString { - /// The pattern to match. - FileCheckPattern Pat; - - /// Which prefix name this check matched. - StringRef Prefix; - - /// The location in the match file that the check string was specified. - SMLoc Loc; - - /// All of the strings that are disallowed from occurring between this match - /// string and the previous one (or start of file). - std::vector DagNotStrings; - - FileCheckString(const FileCheckPattern &P, StringRef S, SMLoc L) - : Pat(P), Prefix(S), Loc(L) {} - - /// Matches check string and its "not strings" and/or "dag strings". - size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, - size_t &MatchLen, FileCheckRequest &Req, - std::vector *Diags) const; - - /// Verifies that there is a single line in the given \p Buffer. Errors are - /// reported against \p SM. - bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; - /// Verifies that there is no newline in the given \p Buffer. Errors are - /// reported against \p SM. - bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; - /// Verifies that none of the strings in \p NotStrings are found in the given - /// \p Buffer. Errors are reported against \p SM and diagnostics recorded in - /// \p Diags according to the verbosity level set in \p Req. - bool CheckNot(const SourceMgr &SM, StringRef Buffer, - const std::vector &NotStrings, - const FileCheckRequest &Req, - std::vector *Diags) const; - /// Matches "dag strings" and their mixed "not strings". - size_t CheckDag(const SourceMgr &SM, StringRef Buffer, - std::vector &NotStrings, - const FileCheckRequest &Req, - std::vector *Diags) const; -}; +class FileCheckPatternContext; +struct FileCheckString; /// FileCheck class takes the request and exposes various methods that /// use information from the request. class FileCheck { FileCheckRequest Req; - FileCheckPatternContext PatternContext; + std::unique_ptr PatternContext; + // C++17 TODO: make this a plain std::vector. + std::unique_ptr> CheckStrings; public: - FileCheck(FileCheckRequest Req) : Req(Req) {} + explicit FileCheck(FileCheckRequest Req); + ~FileCheck(); // Combines the check prefixes into a single regex so that we can efficiently // scan for any of the set. @@ -723,13 +154,11 @@ public: Regex buildCheckPrefixRegex(); /// Reads the check file from \p Buffer and records the expected strings it - /// contains in the \p CheckStrings vector. Errors are reported against - /// \p SM. + /// contains. Errors are reported against \p SM. /// /// Only expected strings whose prefix is one of those listed in \p PrefixRE /// are recorded. \returns true in case of an error, false otherwise. - bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, - std::vector &CheckStrings); + bool readCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE); bool ValidateCheckPrefixes(); @@ -739,13 +168,14 @@ public: SmallVectorImpl &OutputBuffer); /// Checks the input to FileCheck provided in the \p Buffer against the - /// \p CheckStrings read from the check file and record diagnostics emitted + /// expected strings read from the check file and record diagnostics emitted /// in \p Diags. Errors are recorded against \p SM. /// /// \returns false if the input fails to satisfy the checks. - bool CheckInput(SourceMgr &SM, StringRef Buffer, - ArrayRef CheckStrings, + bool checkInput(SourceMgr &SM, StringRef Buffer, std::vector *Diags = nullptr); }; + } // namespace llvm + #endif diff --git a/include/llvm/Support/FileCollector.h b/include/llvm/Support/FileCollector.h new file mode 100644 index 00000000000..19429bd3e9b --- /dev/null +++ b/include/llvm/Support/FileCollector.h @@ -0,0 +1,79 @@ +//===-- FileCollector.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_FILE_COLLECTOR_H +#define LLVM_SUPPORT_FILE_COLLECTOR_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/VirtualFileSystem.h" + +#include + +namespace llvm { + +/// Collects files into a directory and generates a mapping that can be used by +/// the VFS. +class FileCollector { +public: + FileCollector(std::string Root, std::string OverlayRoot); + + void addFile(const Twine &file); + + /// Write the yaml mapping (for the VFS) to the given file. + std::error_code writeMapping(StringRef mapping_file); + + /// Copy the files into the root directory. + /// + /// When StopOnError is true (the default) we abort as soon as one file + /// cannot be copied. This is relatively common, for example when a file was + /// removed after it was added to the mapping. + std::error_code copyFiles(bool StopOnError = true); + + /// Create a VFS that collects all the paths that might be looked at by the + /// file system accesses. + static IntrusiveRefCntPtr + createCollectorVFS(IntrusiveRefCntPtr BaseFS, + std::shared_ptr Collector); + +private: + void addFileImpl(StringRef SrcPath); + + bool markAsSeen(StringRef Path) { return Seen.insert(Path).second; } + + bool getRealPath(StringRef SrcPath, SmallVectorImpl &Result); + + void addFileToMapping(StringRef VirtualPath, StringRef RealPath) { + VFSWriter.addFileMapping(VirtualPath, RealPath); + } + +protected: + /// Synchronizes adding files. + std::mutex Mutex; + + /// The root directory where files are copied. + std::string Root; + + /// The root directory where the VFS overlay lives. + std::string OverlayRoot; + + /// Tracks already seen files so they can be skipped. + StringSet<> Seen; + + /// The yaml mapping writer. + vfs::YAMLVFSWriter VFSWriter; + + /// Caches RealPath calls when resolving symlinks. + StringMap SymlinkMap; +}; + +} // end namespace llvm + +#endif // LLVM_SUPPORT_FILE_COLLECTOR_H diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h index 1bec27bddad..a29a9d78794 100644 --- a/include/llvm/Support/FileSystem.h +++ b/include/llvm/Support/FileSystem.h @@ -991,29 +991,27 @@ file_t getStdoutHandle(); /// Returns kInvalidFile when the stream is closed. file_t getStderrHandle(); -/// Reads \p Buf.size() bytes from \p FileHandle into \p Buf. The number of -/// bytes actually read is returned in \p BytesRead. On Unix, this is equivalent -/// to `*BytesRead = ::read(FD, Buf.data(), Buf.size())`, with error reporting. -/// BytesRead will contain zero when reaching EOF. +/// Reads \p Buf.size() bytes from \p FileHandle into \p Buf. Returns the number +/// of bytes actually read. On Unix, this is equivalent to `return ::read(FD, +/// Buf.data(), Buf.size())`, with error reporting. Returns 0 when reaching EOF. /// /// @param FileHandle File to read from. /// @param Buf Buffer to read into. -/// @param BytesRead Output parameter of the number of bytes read. -/// @returns The error, if any, or errc::success. -std::error_code readNativeFile(file_t FileHandle, MutableArrayRef Buf, - size_t *BytesRead); +/// @returns The number of bytes read, or error. +Expected readNativeFile(file_t FileHandle, MutableArrayRef Buf); /// Reads \p Buf.size() bytes from \p FileHandle at offset \p Offset into \p /// Buf. If 'pread' is available, this will use that, otherwise it will use -/// 'lseek'. Bytes requested beyond the end of the file will be zero -/// initialized. +/// 'lseek'. Returns the number of bytes actually read. Returns 0 when reaching +/// EOF. /// /// @param FileHandle File to read from. /// @param Buf Buffer to read into. /// @param Offset Offset into the file at which the read should occur. -/// @returns The error, if any, or errc::success. -std::error_code readNativeFileSlice(file_t FileHandle, - MutableArrayRef Buf, size_t Offset); +/// @returns The number of bytes read, or error. +Expected readNativeFileSlice(file_t FileHandle, + MutableArrayRef Buf, + uint64_t Offset); /// @brief Opens the file with the given name in a write-only or read-write /// mode, returning its open file descriptor. If the file does not exist, it @@ -1217,9 +1215,9 @@ class directory_entry { // that whole structure, callers end up paying for a stat(). // std::filesystem::directory_entry may be a better model. std::string Path; - file_type Type; // Most platforms can provide this. - bool FollowSymlinks; // Affects the behavior of status(). - basic_file_status Status; // If available. + file_type Type = file_type::type_unknown; // Most platforms can provide this. + bool FollowSymlinks = true; // Affects the behavior of status(). + basic_file_status Status; // If available. public: explicit directory_entry(const Twine &Path, bool FollowSymlinks = true, diff --git a/include/llvm/Support/FileUtilities.h b/include/llvm/Support/FileUtilities.h index 16b2206924c..04efdced32a 100644 --- a/include/llvm/Support/FileUtilities.h +++ b/include/llvm/Support/FileUtilities.h @@ -14,6 +14,9 @@ #ifndef LLVM_SUPPORT_FILEUTILITIES_H #define LLVM_SUPPORT_FILEUTILITIES_H +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" @@ -72,6 +75,41 @@ namespace llvm { /// will not be removed when the object is destroyed. void releaseFile() { DeleteIt = false; } }; + + enum class atomic_write_error { + failed_to_create_uniq_file = 0, + output_stream_error, + failed_to_rename_temp_file + }; + + class AtomicFileWriteError : public llvm::ErrorInfo { + public: + AtomicFileWriteError(atomic_write_error Error) : Error(Error) {} + + void log(raw_ostream &OS) const override; + + const atomic_write_error Error; + static char ID; + + private: + // Users are not expected to use error_code. + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } + }; + + // atomic_write_error + whatever the Writer can return + + /// Creates a unique file with name according to the given \p TempPathModel, + /// writes content of \p Buffer to the file and renames it to \p FinalPath. + /// + /// \returns \c AtomicFileWriteError in case of error. + llvm::Error writeFileAtomically(StringRef TempPathModel, StringRef FinalPath, + StringRef Buffer); + + llvm::Error + writeFileAtomically(StringRef TempPathModel, StringRef FinalPath, + std::function Writer); } // End llvm namespace #endif diff --git a/include/llvm/Support/Format.h b/include/llvm/Support/Format.h index 77dcbaebf1a..9dd7b401b46 100644 --- a/include/llvm/Support/Format.h +++ b/include/llvm/Support/Format.h @@ -29,6 +29,7 @@ #include #include #include +#include namespace llvm { @@ -91,7 +92,7 @@ class format_object final : public format_object_base { template int snprint_tuple(char *Buffer, unsigned BufferSize, - index_sequence) const { + std::index_sequence) const { #ifdef _MSC_VER return _snprintf(Buffer, BufferSize, Fmt, std::get(Vals)...); #else @@ -106,7 +107,7 @@ public: } int snprint(char *Buffer, unsigned BufferSize) const override { - return snprint_tuple(Buffer, BufferSize, index_sequence_for()); + return snprint_tuple(Buffer, BufferSize, std::index_sequence_for()); } }; diff --git a/include/llvm/Support/GenericDomTree.h b/include/llvm/Support/GenericDomTree.h index 99620802505..9169379f746 100644 --- a/include/llvm/Support/GenericDomTree.h +++ b/include/llvm/Support/GenericDomTree.h @@ -242,7 +242,7 @@ protected: using DomTreeNodeMapType = DenseMap>>; DomTreeNodeMapType DomTreeNodes; - DomTreeNodeBase *RootNode; + DomTreeNodeBase *RootNode = nullptr; ParentPtr Parent = nullptr; mutable bool DFSInfoValid = false; @@ -571,7 +571,7 @@ protected: assert(IDomNode && "Not immediate dominator specified for block!"); DFSInfoValid = false; return (DomTreeNodes[BB] = IDomNode->addChild( - llvm::make_unique>(BB, IDomNode))).get(); + std::make_unique>(BB, IDomNode))).get(); } /// Add a new node to the forward dominator tree and make it a new root. @@ -585,7 +585,7 @@ protected: "Cannot change root of post-dominator tree"); DFSInfoValid = false; DomTreeNodeBase *NewNode = (DomTreeNodes[BB] = - llvm::make_unique>(BB, nullptr)).get(); + std::make_unique>(BB, nullptr)).get(); if (Roots.empty()) { addRoot(BB); } else { diff --git a/include/llvm/Support/GenericDomTreeConstruction.h b/include/llvm/Support/GenericDomTreeConstruction.h index ccceba88171..7c0278e8770 100644 --- a/include/llvm/Support/GenericDomTreeConstruction.h +++ b/include/llvm/Support/GenericDomTreeConstruction.h @@ -186,7 +186,7 @@ struct SemiNCAInfo { // Add a new tree node for this NodeT, and link it as a child of // IDomNode return (DT.DomTreeNodes[BB] = IDomNode->addChild( - llvm::make_unique>(BB, IDomNode))) + std::make_unique>(BB, IDomNode))) .get(); } @@ -586,7 +586,7 @@ struct SemiNCAInfo { NodePtr Root = IsPostDom ? nullptr : DT.Roots[0]; DT.RootNode = (DT.DomTreeNodes[Root] = - llvm::make_unique>(Root, nullptr)) + std::make_unique>(Root, nullptr)) .get(); SNCA.attachNewSubtree(DT, DT.RootNode); } @@ -611,7 +611,7 @@ struct SemiNCAInfo { // Add a new tree node for this BasicBlock, and link it as a child of // IDomNode. DT.DomTreeNodes[W] = IDomNode->addChild( - llvm::make_unique>(W, IDomNode)); + std::make_unique>(W, IDomNode)); } } @@ -663,7 +663,7 @@ struct SemiNCAInfo { TreeNodePtr VirtualRoot = DT.getNode(nullptr); FromTN = (DT.DomTreeNodes[From] = VirtualRoot->addChild( - llvm::make_unique>(From, VirtualRoot))) + std::make_unique>(From, VirtualRoot))) .get(); DT.Roots.push_back(From); } diff --git a/include/llvm/Support/GlobPattern.h b/include/llvm/Support/GlobPattern.h index 66a4cd94c12..0098ac65fd3 100644 --- a/include/llvm/Support/GlobPattern.h +++ b/include/llvm/Support/GlobPattern.h @@ -21,7 +21,7 @@ #include // This class represents a glob pattern. Supported metacharacters -// are "*", "?", "[]" and "[^]". +// are "*", "?", "\", "[]", "[^]", and "[!]". namespace llvm { class BitVector; template class ArrayRef; diff --git a/include/llvm/Support/Host.h b/include/llvm/Support/Host.h index b37cc514c92..44f543c363d 100644 --- a/include/llvm/Support/Host.h +++ b/include/llvm/Support/Host.h @@ -15,39 +15,11 @@ #include "llvm/ADT/StringMap.h" -#if defined(__linux__) || defined(__GNU__) || defined(__HAIKU__) -#include -#elif defined(_AIX) -#include -#elif defined(__sun) -/* Solaris provides _BIG_ENDIAN/_LITTLE_ENDIAN selector in sys/types.h */ -#include -#define BIG_ENDIAN 4321 -#define LITTLE_ENDIAN 1234 -#if defined(_BIG_ENDIAN) -#define BYTE_ORDER BIG_ENDIAN -#else -#define BYTE_ORDER LITTLE_ENDIAN -#endif -#else -#if !defined(BYTE_ORDER) && !defined(_WIN32) -#include -#endif -#endif - #include namespace llvm { namespace sys { -#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN -constexpr bool IsBigEndianHost = true; -#else -constexpr bool IsBigEndianHost = false; -#endif - - static const bool IsLittleEndianHost = !IsBigEndianHost; - /// getDefaultTargetTriple() - Return the default target triple the compiler /// has been configured to produce code for. /// diff --git a/include/llvm/Support/JamCRC.h b/include/llvm/Support/JamCRC.h deleted file mode 100644 index b6fc4e7b9b0..00000000000 --- a/include/llvm/Support/JamCRC.h +++ /dev/null @@ -1,48 +0,0 @@ -//===-- llvm/Support/JamCRC.h - Cyclic Redundancy Check ---------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains an implementation of JamCRC. -// -// We will use the "Rocksoft^tm Model CRC Algorithm" to describe the properties -// of this CRC: -// Width : 32 -// Poly : 04C11DB7 -// Init : FFFFFFFF -// RefIn : True -// RefOut : True -// XorOut : 00000000 -// Check : 340BC6D9 (result of CRC for "123456789") -// -// N.B. We permit flexibility of the "Init" value. Some consumers of this need -// it to be zero. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_SUPPORT_JAMCRC_H -#define LLVM_SUPPORT_JAMCRC_H - -#include "llvm/Support/DataTypes.h" - -namespace llvm { -template class ArrayRef; - -class JamCRC { -public: - JamCRC(uint32_t Init = 0xFFFFFFFFU) : CRC(Init) {} - - // Update the CRC calculation with Data. - void update(ArrayRef Data); - - uint32_t getCRC() const { return CRC; } - -private: - uint32_t CRC; -}; -} // End of namespace llvm - -#endif diff --git a/include/llvm/Support/MachineValueType.h b/include/llvm/Support/MachineValueType.h index b94d2c4836c..7f9f0b85c55 100644 --- a/include/llvm/Support/MachineValueType.h +++ b/include/llvm/Support/MachineValueType.h @@ -17,6 +17,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/TypeSize.h" #include namespace llvm { @@ -64,152 +65,162 @@ namespace llvm { v32i1 = 19, // 32 x i1 v64i1 = 20, // 64 x i1 v128i1 = 21, // 128 x i1 - v512i1 = 22, // 512 x i1 - v1024i1 = 23, // 1024 x i1 + v256i1 = 22, // 256 x i1 + v512i1 = 23, // 512 x i1 + v1024i1 = 24, // 1024 x i1 - v1i8 = 24, // 1 x i8 - v2i8 = 25, // 2 x i8 - v4i8 = 26, // 4 x i8 - v8i8 = 27, // 8 x i8 - v16i8 = 28, // 16 x i8 - v32i8 = 29, // 32 x i8 - v64i8 = 30, // 64 x i8 - v128i8 = 31, //128 x i8 - v256i8 = 32, //256 x i8 + v1i8 = 25, // 1 x i8 + v2i8 = 26, // 2 x i8 + v4i8 = 27, // 4 x i8 + v8i8 = 28, // 8 x i8 + v16i8 = 29, // 16 x i8 + v32i8 = 30, // 32 x i8 + v64i8 = 31, // 64 x i8 + v128i8 = 32, //128 x i8 + v256i8 = 33, //256 x i8 - v1i16 = 33, // 1 x i16 - v2i16 = 34, // 2 x i16 - v4i16 = 35, // 4 x i16 - v8i16 = 36, // 8 x i16 - v16i16 = 37, // 16 x i16 - v32i16 = 38, // 32 x i16 - v64i16 = 39, // 64 x i16 - v128i16 = 40, //128 x i16 + v1i16 = 34, // 1 x i16 + v2i16 = 35, // 2 x i16 + v3i16 = 36, // 3 x i16 + v4i16 = 37, // 4 x i16 + v8i16 = 38, // 8 x i16 + v16i16 = 39, // 16 x i16 + v32i16 = 40, // 32 x i16 + v64i16 = 41, // 64 x i16 + v128i16 = 42, //128 x i16 - v1i32 = 41, // 1 x i32 - v2i32 = 42, // 2 x i32 - v3i32 = 43, // 3 x i32 - v4i32 = 44, // 4 x i32 - v5i32 = 45, // 5 x i32 - v8i32 = 46, // 8 x i32 - v16i32 = 47, // 16 x i32 - v32i32 = 48, // 32 x i32 - v64i32 = 49, // 64 x i32 - v128i32 = 50, // 128 x i32 - v256i32 = 51, // 256 x i32 - v512i32 = 52, // 512 x i32 - v1024i32 = 53, // 1024 x i32 - v2048i32 = 54, // 2048 x i32 + v1i32 = 43, // 1 x i32 + v2i32 = 44, // 2 x i32 + v3i32 = 45, // 3 x i32 + v4i32 = 46, // 4 x i32 + v5i32 = 47, // 5 x i32 + v8i32 = 48, // 8 x i32 + v16i32 = 49, // 16 x i32 + v32i32 = 50, // 32 x i32 + v64i32 = 51, // 64 x i32 + v128i32 = 52, // 128 x i32 + v256i32 = 53, // 256 x i32 + v512i32 = 54, // 512 x i32 + v1024i32 = 55, // 1024 x i32 + v2048i32 = 56, // 2048 x i32 - v1i64 = 55, // 1 x i64 - v2i64 = 56, // 2 x i64 - v4i64 = 57, // 4 x i64 - v8i64 = 58, // 8 x i64 - v16i64 = 59, // 16 x i64 - v32i64 = 60, // 32 x i64 + v1i64 = 57, // 1 x i64 + v2i64 = 58, // 2 x i64 + v4i64 = 59, // 4 x i64 + v8i64 = 60, // 8 x i64 + v16i64 = 61, // 16 x i64 + v32i64 = 62, // 32 x i64 - v1i128 = 61, // 1 x i128 + v1i128 = 63, // 1 x i128 - // Scalable integer types - nxv1i1 = 62, // n x 1 x i1 - nxv2i1 = 63, // n x 2 x i1 - nxv4i1 = 64, // n x 4 x i1 - nxv8i1 = 65, // n x 8 x i1 - nxv16i1 = 66, // n x 16 x i1 - nxv32i1 = 67, // n x 32 x i1 + FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i1, + LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i128, - nxv1i8 = 68, // n x 1 x i8 - nxv2i8 = 69, // n x 2 x i8 - nxv4i8 = 70, // n x 4 x i8 - nxv8i8 = 71, // n x 8 x i8 - nxv16i8 = 72, // n x 16 x i8 - nxv32i8 = 73, // n x 32 x i8 + v2f16 = 64, // 2 x f16 + v3f16 = 65, // 3 x f16 + v4f16 = 66, // 4 x f16 + v8f16 = 67, // 8 x f16 + v16f16 = 68, // 16 x f16 + v32f16 = 69, // 32 x f16 + v1f32 = 70, // 1 x f32 + v2f32 = 71, // 2 x f32 + v3f32 = 72, // 3 x f32 + v4f32 = 73, // 4 x f32 + v5f32 = 74, // 5 x f32 + v8f32 = 75, // 8 x f32 + v16f32 = 76, // 16 x f32 + v32f32 = 77, // 32 x f32 + v64f32 = 78, // 64 x f32 + v128f32 = 79, // 128 x f32 + v256f32 = 80, // 256 x f32 + v512f32 = 81, // 512 x f32 + v1024f32 = 82, // 1024 x f32 + v2048f32 = 83, // 2048 x f32 + v1f64 = 84, // 1 x f64 + v2f64 = 85, // 2 x f64 + v4f64 = 86, // 4 x f64 + v8f64 = 87, // 8 x f64 - nxv1i16 = 74, // n x 1 x i16 - nxv2i16 = 75, // n x 2 x i16 - nxv4i16 = 76, // n x 4 x i16 - nxv8i16 = 77, // n x 8 x i16 - nxv16i16 = 78, // n x 16 x i16 - nxv32i16 = 79, // n x 32 x i16 + FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v2f16, + LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v8f64, - nxv1i32 = 80, // n x 1 x i32 - nxv2i32 = 81, // n x 2 x i32 - nxv4i32 = 82, // n x 4 x i32 - nxv8i32 = 83, // n x 8 x i32 - nxv16i32 = 84, // n x 16 x i32 - nxv32i32 = 85, // n x 32 x i32 + FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1, + LAST_FIXEDLEN_VECTOR_VALUETYPE = v8f64, - nxv1i64 = 86, // n x 1 x i64 - nxv2i64 = 87, // n x 2 x i64 - nxv4i64 = 88, // n x 4 x i64 - nxv8i64 = 89, // n x 8 x i64 - nxv16i64 = 90, // n x 16 x i64 - nxv32i64 = 91, // n x 32 x i64 + nxv1i1 = 88, // n x 1 x i1 + nxv2i1 = 89, // n x 2 x i1 + nxv4i1 = 90, // n x 4 x i1 + nxv8i1 = 91, // n x 8 x i1 + nxv16i1 = 92, // n x 16 x i1 + nxv32i1 = 93, // n x 32 x i1 - FIRST_INTEGER_VECTOR_VALUETYPE = v1i1, - LAST_INTEGER_VECTOR_VALUETYPE = nxv32i64, + nxv1i8 = 94, // n x 1 x i8 + nxv2i8 = 95, // n x 2 x i8 + nxv4i8 = 96, // n x 4 x i8 + nxv8i8 = 97, // n x 8 x i8 + nxv16i8 = 98, // n x 16 x i8 + nxv32i8 = 99, // n x 32 x i8 - FIRST_INTEGER_SCALABLE_VALUETYPE = nxv1i1, - LAST_INTEGER_SCALABLE_VALUETYPE = nxv32i64, + nxv1i16 = 100, // n x 1 x i16 + nxv2i16 = 101, // n x 2 x i16 + nxv4i16 = 102, // n x 4 x i16 + nxv8i16 = 103, // n x 8 x i16 + nxv16i16 = 104, // n x 16 x i16 + nxv32i16 = 105, // n x 32 x i16 - v2f16 = 92, // 2 x f16 - v4f16 = 93, // 4 x f16 - v8f16 = 94, // 8 x f16 - v1f32 = 95, // 1 x f32 - v2f32 = 96, // 2 x f32 - v3f32 = 97, // 3 x f32 - v4f32 = 98, // 4 x f32 - v5f32 = 99, // 5 x f32 - v8f32 = 100, // 8 x f32 - v16f32 = 101, // 16 x f32 - v32f32 = 102, // 32 x f32 - v64f32 = 103, // 64 x f32 - v128f32 = 104, // 128 x f32 - v256f32 = 105, // 256 x f32 - v512f32 = 106, // 512 x f32 - v1024f32 = 107, // 1024 x f32 - v2048f32 = 108, // 2048 x f32 - v1f64 = 109, // 1 x f64 - v2f64 = 110, // 2 x f64 - v4f64 = 111, // 4 x f64 - v8f64 = 112, // 8 x f64 + nxv1i32 = 106, // n x 1 x i32 + nxv2i32 = 107, // n x 2 x i32 + nxv4i32 = 108, // n x 4 x i32 + nxv8i32 = 109, // n x 8 x i32 + nxv16i32 = 110, // n x 16 x i32 + nxv32i32 = 111, // n x 32 x i32 - nxv2f16 = 113, // n x 2 x f16 - nxv4f16 = 114, // n x 4 x f16 - nxv8f16 = 115, // n x 8 x f16 - nxv1f32 = 116, // n x 1 x f32 - nxv2f32 = 117, // n x 2 x f32 - nxv4f32 = 118, // n x 4 x f32 - nxv8f32 = 119, // n x 8 x f32 - nxv16f32 = 120, // n x 16 x f32 - nxv1f64 = 121, // n x 1 x f64 - nxv2f64 = 122, // n x 2 x f64 - nxv4f64 = 123, // n x 4 x f64 - nxv8f64 = 124, // n x 8 x f64 + nxv1i64 = 112, // n x 1 x i64 + nxv2i64 = 113, // n x 2 x i64 + nxv4i64 = 114, // n x 4 x i64 + nxv8i64 = 115, // n x 8 x i64 + nxv16i64 = 116, // n x 16 x i64 + nxv32i64 = 117, // n x 32 x i64 - FIRST_FP_VECTOR_VALUETYPE = v2f16, - LAST_FP_VECTOR_VALUETYPE = nxv8f64, + FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1, + LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64, - FIRST_FP_SCALABLE_VALUETYPE = nxv2f16, - LAST_FP_SCALABLE_VALUETYPE = nxv8f64, + nxv2f16 = 118, // n x 2 x f16 + nxv4f16 = 119, // n x 4 x f16 + nxv8f16 = 120, // n x 8 x f16 + nxv1f32 = 121, // n x 1 x f32 + nxv2f32 = 122, // n x 2 x f32 + nxv4f32 = 123, // n x 4 x f32 + nxv8f32 = 124, // n x 8 x f32 + nxv16f32 = 125, // n x 16 x f32 + nxv1f64 = 126, // n x 1 x f64 + nxv2f64 = 127, // n x 2 x f64 + nxv4f64 = 128, // n x 4 x f64 + nxv8f64 = 129, // n x 8 x f64 + + FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv2f16, + LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64, + + FIRST_SCALABLE_VECTOR_VALUETYPE = nxv1i1, + LAST_SCALABLE_VECTOR_VALUETYPE = nxv8f64, FIRST_VECTOR_VALUETYPE = v1i1, LAST_VECTOR_VALUETYPE = nxv8f64, - x86mmx = 125, // This is an X86 MMX value + x86mmx = 130, // This is an X86 MMX value - Glue = 126, // This glues nodes together during pre-RA sched + Glue = 131, // This glues nodes together during pre-RA sched - isVoid = 127, // This has no value + isVoid = 132, // This has no value - Untyped = 128, // This value takes a register, but has + Untyped = 133, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. - exnref = 129, // WebAssembly's exnref type + exnref = 134, // WebAssembly's exnref type FIRST_VALUETYPE = 1, // This is always the beginning of the list. - LAST_VALUETYPE = 130, // This always remains at the end of the list. + LAST_VALUETYPE = 135, // This always remains at the end of the list. // This is the current maximum for LAST_VALUETYPE. // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors @@ -253,41 +264,6 @@ namespace llvm { SimpleValueType SimpleTy = INVALID_SIMPLE_VALUE_TYPE; - // A class to represent the number of elements in a vector - // - // For fixed-length vectors, the total number of elements is equal to 'Min' - // For scalable vectors, the total number of elements is a multiple of 'Min' - class ElementCount { - public: - unsigned Min; - bool Scalable; - - ElementCount(unsigned Min, bool Scalable) - : Min(Min), Scalable(Scalable) {} - - ElementCount operator*(unsigned RHS) { - return { Min * RHS, Scalable }; - } - - ElementCount& operator*=(unsigned RHS) { - Min *= RHS; - return *this; - } - - ElementCount operator/(unsigned RHS) { - return { Min / RHS, Scalable }; - } - - ElementCount& operator/=(unsigned RHS) { - Min /= RHS; - return *this; - } - - bool operator==(const ElementCount& RHS) { - return Min == RHS.Min && Scalable == RHS.Scalable; - } - }; - constexpr MVT() = default; constexpr MVT(SimpleValueType SVT) : SimpleTy(SVT) {} @@ -308,16 +284,20 @@ namespace llvm { bool isFloatingPoint() const { return ((SimpleTy >= MVT::FIRST_FP_VALUETYPE && SimpleTy <= MVT::LAST_FP_VALUETYPE) || - (SimpleTy >= MVT::FIRST_FP_VECTOR_VALUETYPE && - SimpleTy <= MVT::LAST_FP_VECTOR_VALUETYPE)); + (SimpleTy >= MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE && + SimpleTy <= MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE) || + (SimpleTy >= MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE && + SimpleTy <= MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE)); } /// Return true if this is an integer or a vector integer type. bool isInteger() const { return ((SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE && SimpleTy <= MVT::LAST_INTEGER_VALUETYPE) || - (SimpleTy >= MVT::FIRST_INTEGER_VECTOR_VALUETYPE && - SimpleTy <= MVT::LAST_INTEGER_VECTOR_VALUETYPE)); + (SimpleTy >= MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE && + SimpleTy <= MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE) || + (SimpleTy >= MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE && + SimpleTy <= MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE)); } /// Return true if this is an integer, not including vectors. @@ -335,10 +315,13 @@ namespace llvm { /// Return true if this is a vector value type where the /// runtime length is machine dependent bool isScalableVector() const { - return ((SimpleTy >= MVT::FIRST_INTEGER_SCALABLE_VALUETYPE && - SimpleTy <= MVT::LAST_INTEGER_SCALABLE_VALUETYPE) || - (SimpleTy >= MVT::FIRST_FP_SCALABLE_VALUETYPE && - SimpleTy <= MVT::LAST_FP_SCALABLE_VALUETYPE)); + return (SimpleTy >= MVT::FIRST_SCALABLE_VECTOR_VALUETYPE && + SimpleTy <= MVT::LAST_SCALABLE_VECTOR_VALUETYPE); + } + + bool isFixedLengthVector() const { + return (SimpleTy >= MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE && + SimpleTy <= MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE); } /// Return true if this is a 16-bit vector type. @@ -373,17 +356,18 @@ namespace llvm { /// Return true if this is a 256-bit vector type. bool is256BitVector() const { - return (SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64 || - SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 || - SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64); + return (SimpleTy == MVT::v16f16 || SimpleTy == MVT::v8f32 || + SimpleTy == MVT::v4f64 || SimpleTy == MVT::v32i8 || + SimpleTy == MVT::v16i16 || SimpleTy == MVT::v8i32 || + SimpleTy == MVT::v4i64 || SimpleTy == MVT::v256i1); } /// Return true if this is a 512-bit vector type. bool is512BitVector() const { - return (SimpleTy == MVT::v16f32 || SimpleTy == MVT::v8f64 || - SimpleTy == MVT::v512i1 || SimpleTy == MVT::v64i8 || - SimpleTy == MVT::v32i16 || SimpleTy == MVT::v16i32 || - SimpleTy == MVT::v8i64); + return (SimpleTy == MVT::v32f16 || SimpleTy == MVT::v16f32 || + SimpleTy == MVT::v8f64 || SimpleTy == MVT::v512i1 || + SimpleTy == MVT::v64i8 || SimpleTy == MVT::v32i16 || + SimpleTy == MVT::v16i32 || SimpleTy == MVT::v8i64); } /// Return true if this is a 1024-bit vector type. @@ -406,6 +390,15 @@ namespace llvm { SimpleTy==MVT::vAny || SimpleTy==MVT::iPTRAny); } + /// Return a VT for a vector type with the same element type but + /// half the number of elements. + MVT getHalfNumVectorElementsVT() const { + MVT EltVT = getVectorElementType(); + auto EltCnt = getVectorElementCount(); + assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!"); + return getVectorVT(EltVT, EltCnt / 2); + } + /// Returns true if the given vector is a power of 2. bool isPow2VectorType() const { unsigned NElts = getVectorNumElements(); @@ -440,6 +433,7 @@ namespace llvm { case v32i1: case v64i1: case v128i1: + case v256i1: case v512i1: case v1024i1: case nxv1i1: @@ -465,6 +459,7 @@ namespace llvm { case nxv32i8: return i8; case v1i16: case v2i16: + case v3i16: case v4i16: case v8i16: case v16i16: @@ -511,8 +506,11 @@ namespace llvm { case nxv32i64: return i64; case v1i128: return i128; case v2f16: + case v3f16: case v4f16: case v8f16: + case v16f16: + case v32f16: case nxv2f16: case nxv4f16: case nxv8f16: return f16; @@ -558,6 +556,7 @@ namespace llvm { case v512i1: case v512i32: case v512f32: return 512; + case v256i1: case v256i8: case v256i32: case v256f32: return 256; @@ -576,6 +575,7 @@ namespace llvm { case v32i16: case v32i32: case v32i64: + case v32f16: case v32f32: case nxv32i1: case nxv32i8: @@ -587,6 +587,7 @@ namespace llvm { case v16i16: case v16i32: case v16i64: + case v16f16: case v16f32: case nxv16i1: case nxv16i8: @@ -628,7 +629,9 @@ namespace llvm { case nxv4f16: case nxv4f32: case nxv4f64: return 4; + case v3i16: case v3i32: + case v3f16: case v3f32: return 3; case v2i1: case v2i8: @@ -664,7 +667,7 @@ namespace llvm { } } - MVT::ElementCount getVectorElementCount() const { + ElementCount getVectorElementCount() const { return { getVectorNumElements(), isScalableVector() }; } @@ -721,6 +724,8 @@ namespace llvm { case nxv1i32: case nxv2f16: case nxv1f32: return 32; + case v3i16: + case v3f16: return 48; case x86mmx: case f64 : case i64 : @@ -763,10 +768,12 @@ namespace llvm { case nxv2f64: return 128; case v5i32: case v5f32: return 160; + case v256i1: case v32i8: case v16i16: case v8i32: case v4i64: + case v16f16: case v8f32: case v4f64: case nxv32i8: @@ -780,6 +787,7 @@ namespace llvm { case v32i16: case v16i32: case v8i64: + case v32f16: case v16f32: case v8f64: case nxv32i16: @@ -900,6 +908,7 @@ namespace llvm { if (NumElements == 32) return MVT::v32i1; if (NumElements == 64) return MVT::v64i1; if (NumElements == 128) return MVT::v128i1; + if (NumElements == 256) return MVT::v256i1; if (NumElements == 512) return MVT::v512i1; if (NumElements == 1024) return MVT::v1024i1; break; @@ -917,6 +926,7 @@ namespace llvm { case MVT::i16: if (NumElements == 1) return MVT::v1i16; if (NumElements == 2) return MVT::v2i16; + if (NumElements == 3) return MVT::v3i16; if (NumElements == 4) return MVT::v4i16; if (NumElements == 8) return MVT::v8i16; if (NumElements == 16) return MVT::v16i16; @@ -953,8 +963,11 @@ namespace llvm { break; case MVT::f16: if (NumElements == 2) return MVT::v2f16; + if (NumElements == 3) return MVT::v3f16; if (NumElements == 4) return MVT::v4f16; if (NumElements == 8) return MVT::v8f16; + if (NumElements == 16) return MVT::v16f16; + if (NumElements == 32) return MVT::v32f16; break; case MVT::f32: if (NumElements == 1) return MVT::v1f32; @@ -1054,7 +1067,7 @@ namespace llvm { return getVectorVT(VT, NumElements); } - static MVT getVectorVT(MVT VT, MVT::ElementCount EC) { + static MVT getVectorVT(MVT VT, ElementCount EC) { if (EC.Scalable) return getScalableVectorVT(VT, EC.Min); return getVectorVT(VT, EC.Min); @@ -1108,26 +1121,40 @@ namespace llvm { (MVT::SimpleValueType)(MVT::LAST_VECTOR_VALUETYPE + 1)); } - static mvt_range integer_vector_valuetypes() { + static mvt_range fixedlen_vector_valuetypes() { return mvt_range( - MVT::FIRST_INTEGER_VECTOR_VALUETYPE, - (MVT::SimpleValueType)(MVT::LAST_INTEGER_VECTOR_VALUETYPE + 1)); + MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE, + (MVT::SimpleValueType)(MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE + 1)); } - static mvt_range fp_vector_valuetypes() { + static mvt_range scalable_vector_valuetypes() { return mvt_range( - MVT::FIRST_FP_VECTOR_VALUETYPE, - (MVT::SimpleValueType)(MVT::LAST_FP_VECTOR_VALUETYPE + 1)); + MVT::FIRST_SCALABLE_VECTOR_VALUETYPE, + (MVT::SimpleValueType)(MVT::LAST_SCALABLE_VECTOR_VALUETYPE + 1)); + } + + static mvt_range integer_fixedlen_vector_valuetypes() { + return mvt_range( + MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE, + (MVT::SimpleValueType)(MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE + 1)); + } + + static mvt_range fp_fixedlen_vector_valuetypes() { + return mvt_range( + MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE, + (MVT::SimpleValueType)(MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE + 1)); } static mvt_range integer_scalable_vector_valuetypes() { - return mvt_range(MVT::FIRST_INTEGER_SCALABLE_VALUETYPE, - (MVT::SimpleValueType)(MVT::LAST_INTEGER_SCALABLE_VALUETYPE + 1)); + return mvt_range( + MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE, + (MVT::SimpleValueType)(MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE + 1)); } static mvt_range fp_scalable_vector_valuetypes() { - return mvt_range(MVT::FIRST_FP_SCALABLE_VALUETYPE, - (MVT::SimpleValueType)(MVT::LAST_FP_SCALABLE_VALUETYPE + 1)); + return mvt_range( + MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE, + (MVT::SimpleValueType)(MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE + 1)); } /// @} }; diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h index 249139e824b..004a6f5f6eb 100644 --- a/include/llvm/Support/MathExtras.h +++ b/include/llvm/Support/MathExtras.h @@ -39,6 +39,7 @@ unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); #endif namespace llvm { + /// The behavior an operation has on an input of 0. enum ZeroBehavior { /// The returned value is undefined. @@ -49,6 +50,42 @@ enum ZeroBehavior { ZB_Width }; +/// Mathematical constants. +namespace numbers { +// TODO: Track C++20 std::numbers. +// TODO: Favor using the hexadecimal FP constants (requires C++17). +constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113 + egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620 + ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162 + ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392 + log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0) + log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2) + pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796 + inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541 + sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161 + inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197 + sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219 + inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1) + sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194 + inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1) + phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622 +constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113 + egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620 + ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162 + ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392 + log2ef = 1.44269504F, // (0x1.715476P+0) + log10ef = .434294482F, // (0x1.bcb7b2P-2) + pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796 + inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541 + sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161 + inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197 + sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193 + inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1) + sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194 + inv_sqrt3f = .577350269F, // (0x1.279a74P-1) + phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622 +} // namespace numbers + namespace detail { template struct TrailingZerosCounter { static unsigned count(T Val, ZeroBehavior) { @@ -73,13 +110,13 @@ template struct TrailingZerosCounter { } }; -#if __GNUC__ >= 4 || defined(_MSC_VER) +#if defined(__GNUC__) || defined(_MSC_VER) template struct TrailingZerosCounter { static unsigned count(T Val, ZeroBehavior ZB) { if (ZB != ZB_Undefined && Val == 0) return 32; -#if __has_builtin(__builtin_ctz) || LLVM_GNUC_PREREQ(4, 0, 0) +#if __has_builtin(__builtin_ctz) || defined(__GNUC__) return __builtin_ctz(Val); #elif defined(_MSC_VER) unsigned long Index; @@ -95,7 +132,7 @@ template struct TrailingZerosCounter { if (ZB != ZB_Undefined && Val == 0) return 64; -#if __has_builtin(__builtin_ctzll) || LLVM_GNUC_PREREQ(4, 0, 0) +#if __has_builtin(__builtin_ctzll) || defined(__GNUC__) return __builtin_ctzll(Val); #elif defined(_MSC_VER) unsigned long Index; @@ -142,13 +179,13 @@ template struct LeadingZerosCounter { } }; -#if __GNUC__ >= 4 || defined(_MSC_VER) +#if defined(__GNUC__) || defined(_MSC_VER) template struct LeadingZerosCounter { static unsigned count(T Val, ZeroBehavior ZB) { if (ZB != ZB_Undefined && Val == 0) return 32; -#if __has_builtin(__builtin_clz) || LLVM_GNUC_PREREQ(4, 0, 0) +#if __has_builtin(__builtin_clz) || defined(__GNUC__) return __builtin_clz(Val); #elif defined(_MSC_VER) unsigned long Index; @@ -164,7 +201,7 @@ template struct LeadingZerosCounter { if (ZB != ZB_Undefined && Val == 0) return 64; -#if __has_builtin(__builtin_clzll) || LLVM_GNUC_PREREQ(4, 0, 0) +#if __has_builtin(__builtin_clzll) || defined(__GNUC__) return __builtin_clzll(Val); #elif defined(_MSC_VER) unsigned long Index; @@ -486,7 +523,7 @@ template struct PopulationCounter { static unsigned count(T Value) { // Generic version, forward to 32 bits. static_assert(SizeOfT <= 4, "Not implemented!"); -#if __GNUC__ >= 4 +#if defined(__GNUC__) return __builtin_popcount(Value); #else uint32_t v = Value; @@ -499,7 +536,7 @@ template struct PopulationCounter { template struct PopulationCounter { static unsigned count(T Value) { -#if __GNUC__ >= 4 +#if defined(__GNUC__) return __builtin_popcountll(Value); #else uint64_t v = Value; @@ -523,6 +560,16 @@ inline unsigned countPopulation(T Value) { return detail::PopulationCounter::count(Value); } +/// Compile time Log2. +/// Valid only for positive powers of two. +template constexpr inline size_t CTLog2() { + static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue), + "Value is not a valid power of 2"); + return 1 + CTLog2(); +} + +template <> constexpr inline size_t CTLog2<1>() { return 0; } + /// Return the log base 2 of the specified value. inline double Log2(double Value) { #if defined(__ANDROID_API__) && __ANDROID_API__ < 18 @@ -620,25 +667,6 @@ constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) { return (A | B) & (1 + ~(A | B)); } -/// Aligns \c Addr to \c Alignment bytes, rounding up. -/// -/// Alignment should be a power of two. This method rounds up, so -/// alignAddr(7, 4) == 8 and alignAddr(8, 4) == 8. -inline uintptr_t alignAddr(const void *Addr, size_t Alignment) { - assert(Alignment && isPowerOf2_64((uint64_t)Alignment) && - "Alignment is not a power of two!"); - - assert((uintptr_t)Addr + Alignment - 1 >= (uintptr_t)Addr); - - return (((uintptr_t)Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1)); -} - -/// Returns the necessary adjustment for aligning \c Ptr to \c Alignment -/// bytes, rounding up. -inline size_t alignmentAdjustment(const void *Ptr, size_t Alignment) { - return alignAddr(Ptr, Alignment) - (uintptr_t)Ptr; -} - /// Returns the next power of two (in 64-bits) that is strictly greater than A. /// Returns zero on overflow. inline uint64_t NextPowerOf2(uint64_t A) { @@ -704,19 +732,6 @@ inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) { return alignTo(Numerator, Denominator) / Denominator; } -/// \c alignTo for contexts where a constant expression is required. -/// \sa alignTo -/// -/// \todo FIXME: remove when \c constexpr becomes really \c constexpr -template -struct AlignTo { - static_assert(Align != 0u, "Align must be non-zero"); - template - struct from_value { - static const uint64_t value = (Value + Align - 1) / Align * Align; - }; -}; - /// Returns the largest uint64_t less than or equal to \p Value and is /// \p Skew mod \p Align. \p Align must be non-zero inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { @@ -725,13 +740,6 @@ inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { return (Value - Skew) / Align * Align + Skew; } -/// Returns the offset to the next integer (mod 2**64) that is greater than -/// or equal to \p Value and is a multiple of \p Align. \p Align must be -/// non-zero. -inline uint64_t OffsetToAlignment(uint64_t Value, uint64_t Align) { - return alignTo(Value, Align) - Value; -} - /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. /// Requires 0 < B <= 32. template constexpr inline int32_t SignExtend32(uint32_t X) { @@ -853,6 +861,91 @@ SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) { /// Use this rather than HUGE_VALF; the latter causes warnings on MSVC. extern const float huge_valf; + + +/// Add two signed integers, computing the two's complement truncated result, +/// returning true if overflow occured. +template +typename std::enable_if::value, T>::type +AddOverflow(T X, T Y, T &Result) { +#if __has_builtin(__builtin_add_overflow) + return __builtin_add_overflow(X, Y, &Result); +#else + // Perform the unsigned addition. + using U = typename std::make_unsigned::type; + const U UX = static_cast(X); + const U UY = static_cast(Y); + const U UResult = UX + UY; + + // Convert to signed. + Result = static_cast(UResult); + + // Adding two positive numbers should result in a positive number. + if (X > 0 && Y > 0) + return Result <= 0; + // Adding two negatives should result in a negative number. + if (X < 0 && Y < 0) + return Result >= 0; + return false; +#endif +} + +/// Subtract two signed integers, computing the two's complement truncated +/// result, returning true if an overflow ocurred. +template +typename std::enable_if::value, T>::type +SubOverflow(T X, T Y, T &Result) { +#if __has_builtin(__builtin_sub_overflow) + return __builtin_sub_overflow(X, Y, &Result); +#else + // Perform the unsigned addition. + using U = typename std::make_unsigned::type; + const U UX = static_cast(X); + const U UY = static_cast(Y); + const U UResult = UX - UY; + + // Convert to signed. + Result = static_cast(UResult); + + // Subtracting a positive number from a negative results in a negative number. + if (X <= 0 && Y > 0) + return Result >= 0; + // Subtracting a negative number from a positive results in a positive number. + if (X >= 0 && Y < 0) + return Result <= 0; + return false; +#endif +} + + +/// Multiply two signed integers, computing the two's complement truncated +/// result, returning true if an overflow ocurred. +template +typename std::enable_if::value, T>::type +MulOverflow(T X, T Y, T &Result) { + // Perform the unsigned multiplication on absolute values. + using U = typename std::make_unsigned::type; + const U UX = X < 0 ? (0 - static_cast(X)) : static_cast(X); + const U UY = Y < 0 ? (0 - static_cast(Y)) : static_cast(Y); + const U UResult = UX * UY; + + // Convert to signed. + const bool IsNegative = (X < 0) ^ (Y < 0); + Result = IsNegative ? (0 - UResult) : UResult; + + // If any of the args was 0, result is 0 and no overflow occurs. + if (UX == 0 || UY == 0) + return false; + + // UX and UY are in [1, 2^n], where n is the number of digits. + // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for + // positive) divided by an argument compares to the other. + if (IsNegative) + return UX > (static_cast(std::numeric_limits::max()) + U(1)) / UY; + else + return UX > (static_cast(std::numeric_limits::max())) / UY; +} + } // End llvm namespace #endif diff --git a/include/llvm/Support/Mutex.h b/include/llvm/Support/Mutex.h index c3abfc7a780..1d8a0d3c87c 100644 --- a/include/llvm/Support/Mutex.h +++ b/include/llvm/Support/Mutex.h @@ -13,97 +13,31 @@ #ifndef LLVM_SUPPORT_MUTEX_H #define LLVM_SUPPORT_MUTEX_H -#include "llvm/Config/llvm-config.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Threading.h" #include +#include namespace llvm { namespace sys { - /// Platform agnostic Mutex class. - class MutexImpl - { - /// @name Constructors - /// @{ - public: - - /// Initializes the lock but doesn't acquire it. if \p recursive is set - /// to false, the lock will not be recursive which makes it cheaper but - /// also more likely to deadlock (same thread can't acquire more than - /// once). - /// Default Constructor. - explicit MutexImpl(bool recursive = true); - - /// Releases and removes the lock - /// Destructor - ~MutexImpl(); - - /// @} - /// @name Methods - /// @{ - public: - - /// Attempts to unconditionally acquire the lock. If the lock is held by - /// another thread, this method will wait until it can acquire the lock. - /// @returns false if any kind of error occurs, true otherwise. - /// Unconditionally acquire the lock. - bool acquire(); - - /// Attempts to release the lock. If the lock is held by the current - /// thread, the lock is released allowing other threads to acquire the - /// lock. - /// @returns false if any kind of error occurs, true otherwise. - /// Unconditionally release the lock. - bool release(); - - /// Attempts to acquire the lock without blocking. If the lock is not - /// available, this function returns false quickly (without blocking). If - /// the lock is available, it is acquired. - /// @returns false if any kind of error occurs or the lock is not - /// available, true otherwise. - /// Try to acquire the lock. - bool tryacquire(); - - //@} - /// @name Platform Dependent Data - /// @{ - private: -#if defined(LLVM_ENABLE_THREADS) && LLVM_ENABLE_THREADS != 0 - void* data_; ///< We don't know what the data will be -#endif - - /// @} - /// @name Do Not Implement - /// @{ - private: - MutexImpl(const MutexImpl &) = delete; - void operator=(const MutexImpl &) = delete; - /// @} - }; - - /// SmartMutex - A mutex with a compile time constant parameter that /// indicates whether this mutex should become a no-op when we're not /// running in multithreaded mode. template class SmartMutex { - MutexImpl impl; - unsigned acquired; - bool recursive; - public: - explicit SmartMutex(bool rec = true) : - impl(rec), acquired(0), recursive(rec) { } + std::recursive_mutex impl; + unsigned acquired = 0; + public: bool lock() { if (!mt_only || llvm_is_multithreaded()) { - return impl.acquire(); + impl.lock(); + return true; } else { // Single-threaded debugging code. This would be racy in // multithreaded mode, but provides not sanity checks in single // threaded mode. - assert((recursive || acquired == 0) && "Lock already acquired!!"); ++acquired; return true; } @@ -111,13 +45,13 @@ namespace llvm bool unlock() { if (!mt_only || llvm_is_multithreaded()) { - return impl.release(); + impl.unlock(); + return true; } else { // Single-threaded debugging code. This would be racy in // multithreaded mode, but provides not sanity checks in single // threaded mode. - assert(((recursive && acquired) || (acquired == 1)) && - "Lock not acquired before release!"); + assert(acquired && "Lock not acquired before release!"); --acquired; return true; } @@ -125,31 +59,16 @@ namespace llvm bool try_lock() { if (!mt_only || llvm_is_multithreaded()) - return impl.tryacquire(); + return impl.try_lock(); else return true; } - - private: - SmartMutex(const SmartMutex & original); - void operator=(const SmartMutex &); }; /// Mutex - A standard, always enforced mutex. typedef SmartMutex Mutex; - template - class SmartScopedLock { - SmartMutex& mtx; - - public: - SmartScopedLock(SmartMutex& m) : mtx(m) { - mtx.lock(); - } - - ~SmartScopedLock() { - mtx.unlock(); - } - }; + template + using SmartScopedLock = std::lock_guard>; typedef SmartScopedLock ScopedLock; } diff --git a/include/llvm/Support/MutexGuard.h b/include/llvm/Support/MutexGuard.h deleted file mode 100644 index d86ced14581..00000000000 --- a/include/llvm/Support/MutexGuard.h +++ /dev/null @@ -1,40 +0,0 @@ -//===-- Support/MutexGuard.h - Acquire/Release Mutex In Scope ---*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines a guard for a block of code that ensures a Mutex is locked -// upon construction and released upon destruction. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_SUPPORT_MUTEXGUARD_H -#define LLVM_SUPPORT_MUTEXGUARD_H - -#include "llvm/Support/Mutex.h" - -namespace llvm { - /// Instances of this class acquire a given Mutex Lock when constructed and - /// hold that lock until destruction. The intention is to instantiate one of - /// these on the stack at the top of some scope to be assured that C++ - /// destruction of the object will always release the Mutex and thus avoid - /// a host of nasty multi-threading problems in the face of exceptions, etc. - /// Guard a section of code with a Mutex. - class MutexGuard { - sys::Mutex &M; - MutexGuard(const MutexGuard &) = delete; - void operator=(const MutexGuard &) = delete; - public: - MutexGuard(sys::Mutex &m) : M(m) { M.lock(); } - ~MutexGuard() { M.unlock(); } - /// holds - Returns true if this locker instance holds the specified lock. - /// This is mostly used in assertions to validate that the correct mutex - /// is held. - bool holds(const sys::Mutex& lock) const { return &M == &lock; } - }; -} - -#endif // LLVM_SUPPORT_MUTEXGUARD_H diff --git a/include/llvm/Support/OnDiskHashTable.h b/include/llvm/Support/OnDiskHashTable.h index d84da92aab9..11dc0de0f35 100644 --- a/include/llvm/Support/OnDiskHashTable.h +++ b/include/llvm/Support/OnDiskHashTable.h @@ -13,6 +13,7 @@ #ifndef LLVM_SUPPORT_ONDISKHASHTABLE_H #define LLVM_SUPPORT_ONDISKHASHTABLE_H +#include "llvm/Support/Alignment.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/EndianStream.h" @@ -207,7 +208,7 @@ public: // Pad with zeros so that we can start the hashtable at an aligned address. offset_type TableOff = Out.tell(); - uint64_t N = llvm::OffsetToAlignment(TableOff, alignof(offset_type)); + uint64_t N = offsetToAlignment(TableOff, Align(alignof(offset_type))); TableOff += N; while (N--) LE.write(0); diff --git a/include/llvm/Support/Parallel.h b/include/llvm/Support/Parallel.h index eab9b492c4a..3c0ed2c1112 100644 --- a/include/llvm/Support/Parallel.h +++ b/include/llvm/Support/Parallel.h @@ -18,14 +18,6 @@ #include #include -#if defined(_MSC_VER) && LLVM_ENABLE_THREADS -#pragma warning(push) -#pragma warning(disable : 4530) -#include -#include -#pragma warning(pop) -#endif - namespace llvm { namespace parallel { @@ -84,23 +76,6 @@ public: void sync() const { L.sync(); } }; -#if defined(_MSC_VER) -template -void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End, - const Comparator &Comp) { - concurrency::parallel_sort(Start, End, Comp); -} -template -void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { - concurrency::parallel_for_each(Begin, End, Fn); -} - -template -void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { - concurrency::parallel_for(Begin, End, Fn); -} - -#else const ptrdiff_t MinParallelSize = 1024; /// Inclusive median. @@ -188,8 +163,6 @@ void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { #endif -#endif - template using DefComparator = std::less::value_type>; diff --git a/include/llvm/Support/RWMutex.h b/include/llvm/Support/RWMutex.h index 9cd57cbd65a..150bc7dbbce 100644 --- a/include/llvm/Support/RWMutex.h +++ b/include/llvm/Support/RWMutex.h @@ -16,161 +16,184 @@ #include "llvm/Config/llvm-config.h" #include "llvm/Support/Threading.h" #include +#include +#include + +// std::shared_timed_mutex is only availble on macOS 10.12 and later. +#if defined(__APPLE__) && defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) +#if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101200 +#define LLVM_USE_RW_MUTEX_IMPL +#endif +#endif namespace llvm { namespace sys { - /// Platform agnostic RWMutex class. - class RWMutexImpl - { - /// @name Constructors - /// @{ - public: +#if defined(LLVM_USE_RW_MUTEX_IMPL) +/// Platform agnostic RWMutex class. +class RWMutexImpl { + /// @name Constructors + /// @{ +public: + /// Initializes the lock but doesn't acquire it. + /// Default Constructor. + explicit RWMutexImpl(); - /// Initializes the lock but doesn't acquire it. - /// Default Constructor. - explicit RWMutexImpl(); + /// @} + /// @name Do Not Implement + /// @{ + RWMutexImpl(const RWMutexImpl &original) = delete; + RWMutexImpl &operator=(const RWMutexImpl &) = delete; + /// @} - /// @} - /// @name Do Not Implement - /// @{ - RWMutexImpl(const RWMutexImpl & original) = delete; - RWMutexImpl &operator=(const RWMutexImpl &) = delete; - /// @} + /// Releases and removes the lock + /// Destructor + ~RWMutexImpl(); - /// Releases and removes the lock - /// Destructor - ~RWMutexImpl(); + /// @} + /// @name Methods + /// @{ +public: + /// Attempts to unconditionally acquire the lock in reader mode. If the + /// lock is held by a writer, this method will wait until it can acquire + /// the lock. + /// @returns false if any kind of error occurs, true otherwise. + /// Unconditionally acquire the lock in reader mode. + bool lock_shared(); - /// @} - /// @name Methods - /// @{ - public: + /// Attempts to release the lock in reader mode. + /// @returns false if any kind of error occurs, true otherwise. + /// Unconditionally release the lock in reader mode. + bool unlock_shared(); - /// Attempts to unconditionally acquire the lock in reader mode. If the - /// lock is held by a writer, this method will wait until it can acquire - /// the lock. - /// @returns false if any kind of error occurs, true otherwise. - /// Unconditionally acquire the lock in reader mode. - bool reader_acquire(); + /// Attempts to unconditionally acquire the lock in reader mode. If the + /// lock is held by any readers, this method will wait until it can + /// acquire the lock. + /// @returns false if any kind of error occurs, true otherwise. + /// Unconditionally acquire the lock in writer mode. + bool lock(); - /// Attempts to release the lock in reader mode. - /// @returns false if any kind of error occurs, true otherwise. - /// Unconditionally release the lock in reader mode. - bool reader_release(); + /// Attempts to release the lock in writer mode. + /// @returns false if any kind of error occurs, true otherwise. + /// Unconditionally release the lock in write mode. + bool unlock(); - /// Attempts to unconditionally acquire the lock in reader mode. If the - /// lock is held by any readers, this method will wait until it can - /// acquire the lock. - /// @returns false if any kind of error occurs, true otherwise. - /// Unconditionally acquire the lock in writer mode. - bool writer_acquire(); - - /// Attempts to release the lock in writer mode. - /// @returns false if any kind of error occurs, true otherwise. - /// Unconditionally release the lock in write mode. - bool writer_release(); - - //@} - /// @name Platform Dependent Data - /// @{ - private: + //@} + /// @name Platform Dependent Data + /// @{ +private: #if defined(LLVM_ENABLE_THREADS) && LLVM_ENABLE_THREADS != 0 - void* data_ = nullptr; ///< We don't know what the data will be + void *data_ = nullptr; ///< We don't know what the data will be +#endif +}; #endif - }; - /// SmartMutex - An R/W mutex with a compile time constant parameter that - /// indicates whether this mutex should become a no-op when we're not - /// running in multithreaded mode. - template - class SmartRWMutex { - RWMutexImpl impl; - unsigned readers = 0; - unsigned writers = 0; +/// SmartMutex - An R/W mutex with a compile time constant parameter that +/// indicates whether this mutex should become a no-op when we're not +/// running in multithreaded mode. +template class SmartRWMutex { + // shared_mutex (C++17) is more efficient than shared_timed_mutex (C++14) + // on Windows and always available on MSVC. +#if defined(_MSC_VER) || __cplusplus > 201402L + std::shared_mutex impl; +#else +#if !defined(LLVM_USE_RW_MUTEX_IMPL) + std::shared_timed_mutex impl; +#else + RWMutexImpl impl; +#endif +#endif + unsigned readers = 0; + unsigned writers = 0; - public: - explicit SmartRWMutex() = default; - SmartRWMutex(const SmartRWMutex & original) = delete; - SmartRWMutex &operator=(const SmartRWMutex &) = delete; +public: + bool lock_shared() { + if (!mt_only || llvm_is_multithreaded()) { + impl.lock_shared(); + return true; + } - bool lock_shared() { - if (!mt_only || llvm_is_multithreaded()) - return impl.reader_acquire(); + // Single-threaded debugging code. This would be racy in multithreaded + // mode, but provides not sanity checks in single threaded mode. + ++readers; + return true; + } - // Single-threaded debugging code. This would be racy in multithreaded - // mode, but provides not sanity checks in single threaded mode. - ++readers; - return true; - } + bool unlock_shared() { + if (!mt_only || llvm_is_multithreaded()) { + impl.unlock_shared(); + return true; + } - bool unlock_shared() { - if (!mt_only || llvm_is_multithreaded()) - return impl.reader_release(); + // Single-threaded debugging code. This would be racy in multithreaded + // mode, but provides not sanity checks in single threaded mode. + assert(readers > 0 && "Reader lock not acquired before release!"); + --readers; + return true; + } - // Single-threaded debugging code. This would be racy in multithreaded - // mode, but provides not sanity checks in single threaded mode. - assert(readers > 0 && "Reader lock not acquired before release!"); - --readers; - return true; - } + bool lock() { + if (!mt_only || llvm_is_multithreaded()) { + impl.lock(); + return true; + } - bool lock() { - if (!mt_only || llvm_is_multithreaded()) - return impl.writer_acquire(); + // Single-threaded debugging code. This would be racy in multithreaded + // mode, but provides not sanity checks in single threaded mode. + assert(writers == 0 && "Writer lock already acquired!"); + ++writers; + return true; + } - // Single-threaded debugging code. This would be racy in multithreaded - // mode, but provides not sanity checks in single threaded mode. - assert(writers == 0 && "Writer lock already acquired!"); - ++writers; - return true; - } + bool unlock() { + if (!mt_only || llvm_is_multithreaded()) { + impl.unlock(); + return true; + } - bool unlock() { - if (!mt_only || llvm_is_multithreaded()) - return impl.writer_release(); + // Single-threaded debugging code. This would be racy in multithreaded + // mode, but provides not sanity checks in single threaded mode. + assert(writers == 1 && "Writer lock not acquired before release!"); + --writers; + return true; + } +}; - // Single-threaded debugging code. This would be racy in multithreaded - // mode, but provides not sanity checks in single threaded mode. - assert(writers == 1 && "Writer lock not acquired before release!"); - --writers; - return true; - } - }; +typedef SmartRWMutex RWMutex; - typedef SmartRWMutex RWMutex; +/// ScopedReader - RAII acquisition of a reader lock +#if !defined(LLVM_USE_RW_MUTEX_IMPL) +template +using SmartScopedReader = const std::shared_lock>; +#else +template struct SmartScopedReader { + SmartRWMutex &mutex; - /// ScopedReader - RAII acquisition of a reader lock - template - struct SmartScopedReader { - SmartRWMutex& mutex; + explicit SmartScopedReader(SmartRWMutex &m) : mutex(m) { + mutex.lock_shared(); + } - explicit SmartScopedReader(SmartRWMutex& m) : mutex(m) { - mutex.lock_shared(); - } + ~SmartScopedReader() { mutex.unlock_shared(); } +}; +#endif +typedef SmartScopedReader ScopedReader; - ~SmartScopedReader() { - mutex.unlock_shared(); - } - }; +/// ScopedWriter - RAII acquisition of a writer lock +#if !defined(LLVM_USE_RW_MUTEX_IMPL) +template +using SmartScopedWriter = std::lock_guard>; +#else +template struct SmartScopedWriter { + SmartRWMutex &mutex; - typedef SmartScopedReader ScopedReader; + explicit SmartScopedWriter(SmartRWMutex &m) : mutex(m) { + mutex.lock(); + } - /// ScopedWriter - RAII acquisition of a writer lock - template - struct SmartScopedWriter { - SmartRWMutex& mutex; - - explicit SmartScopedWriter(SmartRWMutex& m) : mutex(m) { - mutex.lock(); - } - - ~SmartScopedWriter() { - mutex.unlock(); - } - }; - - typedef SmartScopedWriter ScopedWriter; + ~SmartScopedWriter() { mutex.unlock(); } +}; +#endif +typedef SmartScopedWriter ScopedWriter; } // end namespace sys } // end namespace llvm diff --git a/include/llvm/Support/Regex.h b/include/llvm/Support/Regex.h index 2d19b10fd89..b2620ab4cfc 100644 --- a/include/llvm/Support/Regex.h +++ b/include/llvm/Support/Regex.h @@ -44,6 +44,9 @@ namespace llvm { Regex(); /// Compiles the given regular expression \p Regex. + /// + /// \param Regex - referenced string is no longer needed after this + /// constructor does finish. Only its compiled form is kept stored. Regex(StringRef Regex, unsigned Flags = NoFlags); Regex(const Regex &) = delete; Regex &operator=(Regex regex) { @@ -54,9 +57,10 @@ namespace llvm { Regex(Regex &®ex); ~Regex(); - /// isValid - returns the error encountered during regex compilation, or - /// matching, if any. + /// isValid - returns the error encountered during regex compilation, if + /// any. bool isValid(std::string &Error) const; + bool isValid() const { return !error; } /// getNumMatches - In a valid regex, return the number of parenthesized /// matches it contains. The number filled in by match will include this @@ -69,8 +73,12 @@ namespace llvm { /// with references to the matched group expressions (inside \p String), /// the first group is always the entire pattern. /// + /// \param Error - If non-null, any errors in the matching will be recorded + /// as a non-empty string. If there is no error, it will be an empty string. + /// /// This returns true on a successful match. - bool match(StringRef String, SmallVectorImpl *Matches = nullptr); + bool match(StringRef String, SmallVectorImpl *Matches = nullptr, + std::string *Error = nullptr) const; /// sub - Return the result of replacing the first match of the regex in /// \p String with the \p Repl string. Backreferences like "\0" in the @@ -81,9 +89,9 @@ namespace llvm { /// /// \param Error If non-null, any errors in the substitution (invalid /// backreferences, trailing backslashes) will be recorded as a non-empty - /// string. + /// string. If there is no error, it will be an empty string. std::string sub(StringRef Repl, StringRef String, - std::string *Error = nullptr); + std::string *Error = nullptr) const; /// If this function returns true, ^Str$ is an extended regular /// expression that matches Str and only Str. diff --git a/include/llvm/Support/Registry.h b/include/llvm/Support/Registry.h index 4d8aa5f1470..5bb6a254a47 100644 --- a/include/llvm/Support/Registry.h +++ b/include/llvm/Support/Registry.h @@ -115,7 +115,7 @@ namespace llvm { entry Entry; node Node; - static std::unique_ptr CtorFn() { return make_unique(); } + static std::unique_ptr CtorFn() { return std::make_unique(); } public: Add(StringRef Name, StringRef Desc) diff --git a/include/llvm/Support/SHA1.h b/include/llvm/Support/SHA1.h index 87fe94bbd5c..2cfbd217936 100644 --- a/include/llvm/Support/SHA1.h +++ b/include/llvm/Support/SHA1.h @@ -16,13 +16,13 @@ #define LLVM_SUPPORT_SHA1_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" #include #include namespace llvm { template class ArrayRef; -class StringRef; /// A class that wrap the SHA1 algorithm. class SHA1 { diff --git a/include/llvm/Support/ScalableSize.h b/include/llvm/Support/ScalableSize.h deleted file mode 100644 index 96bf043773a..00000000000 --- a/include/llvm/Support/ScalableSize.h +++ /dev/null @@ -1,43 +0,0 @@ -//===- ScalableSize.h - Scalable vector size info ---------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file provides a struct that can be used to query the size of IR types -// which may be scalable vectors. It provides convenience operators so that -// it can be used in much the same way as a single scalar value. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_SUPPORT_SCALABLESIZE_H -#define LLVM_SUPPORT_SCALABLESIZE_H - -namespace llvm { - -class ElementCount { -public: - unsigned Min; // Minimum number of vector elements. - bool Scalable; // If true, NumElements is a multiple of 'Min' determined - // at runtime rather than compile time. - - ElementCount(unsigned Min, bool Scalable) - : Min(Min), Scalable(Scalable) {} - - ElementCount operator*(unsigned RHS) { - return { Min * RHS, Scalable }; - } - ElementCount operator/(unsigned RHS) { - return { Min / RHS, Scalable }; - } - - bool operator==(const ElementCount& RHS) const { - return Min == RHS.Min && Scalable == RHS.Scalable; - } -}; - -} // end namespace llvm - -#endif // LLVM_SUPPORT_SCALABLESIZE_H diff --git a/include/llvm/Support/Signals.h b/include/llvm/Support/Signals.h index a6b215a2431..a4f1fad22dd 100644 --- a/include/llvm/Support/Signals.h +++ b/include/llvm/Support/Signals.h @@ -84,6 +84,17 @@ namespace sys { /// function. Note also that the handler may be executed on a different /// thread on some platforms. void SetInfoSignalFunction(void (*Handler)()); + + /// Registers a function to be called when a "pipe" signal is delivered to + /// the process. + /// + /// The "pipe" signal typically indicates a failed write to a pipe (SIGPIPE). + /// The default installed handler calls `exit(EX_IOERR)`, causing the process + /// to immediately exit with an IO error exit code. + /// + /// This function is only applicable on POSIX systems. + void SetPipeSignalFunction(void (*Handler)()); + } // End sys namespace } // End llvm namespace diff --git a/include/llvm/Support/SwapByteOrder.h b/include/llvm/Support/SwapByteOrder.h index 06a447a27c2..6cec87006c0 100644 --- a/include/llvm/Support/SwapByteOrder.h +++ b/include/llvm/Support/SwapByteOrder.h @@ -22,9 +22,37 @@ #include #endif +#if defined(__linux__) || defined(__GNU__) || defined(__HAIKU__) +#include +#elif defined(_AIX) +#include +#elif defined(__sun) +/* Solaris provides _BIG_ENDIAN/_LITTLE_ENDIAN selector in sys/types.h */ +#include +#define BIG_ENDIAN 4321 +#define LITTLE_ENDIAN 1234 +#if defined(_BIG_ENDIAN) +#define BYTE_ORDER BIG_ENDIAN +#else +#define BYTE_ORDER LITTLE_ENDIAN +#endif +#else +#if !defined(BYTE_ORDER) && !defined(_WIN32) +#include +#endif +#endif + namespace llvm { namespace sys { +#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN +constexpr bool IsBigEndianHost = true; +#else +constexpr bool IsBigEndianHost = false; +#endif + +static const bool IsLittleEndianHost = !IsBigEndianHost; + /// SwapByteOrder_16 - This function returns a byte-swapped representation of /// the 16-bit argument. inline uint16_t SwapByteOrder_16(uint16_t value) { @@ -39,10 +67,9 @@ inline uint16_t SwapByteOrder_16(uint16_t value) { #endif } -/// SwapByteOrder_32 - This function returns a byte-swapped representation of -/// the 32-bit argument. +/// This function returns a byte-swapped representation of the 32-bit argument. inline uint32_t SwapByteOrder_32(uint32_t value) { -#if defined(__llvm__) || (LLVM_GNUC_PREREQ(4, 3, 0) && !defined(__ICC)) +#if defined(__llvm__) || (defined(__GNUC__) && !defined(__ICC)) return __builtin_bswap32(value); #elif defined(_MSC_VER) && !defined(_DEBUG) return _byteswap_ulong(value); @@ -55,10 +82,9 @@ inline uint32_t SwapByteOrder_32(uint32_t value) { #endif } -/// SwapByteOrder_64 - This function returns a byte-swapped representation of -/// the 64-bit argument. +/// This function returns a byte-swapped representation of the 64-bit argument. inline uint64_t SwapByteOrder_64(uint64_t value) { -#if defined(__llvm__) || (LLVM_GNUC_PREREQ(4, 3, 0) && !defined(__ICC)) +#if defined(__llvm__) || (defined(__GNUC__) && !defined(__ICC)) return __builtin_bswap64(value); #elif defined(_MSC_VER) && !defined(_DEBUG) return _byteswap_uint64(value); diff --git a/include/llvm/Support/TargetOpcodes.def b/include/llvm/Support/TargetOpcodes.def index 598c1064efd..11731ac3541 100644 --- a/include/llvm/Support/TargetOpcodes.def +++ b/include/llvm/Support/TargetOpcodes.def @@ -294,9 +294,21 @@ HANDLE_TARGET_OPCODE(G_SEXTLOAD) /// Generic zeroext load HANDLE_TARGET_OPCODE(G_ZEXTLOAD) +/// Generic indexed load (including anyext load) +HANDLE_TARGET_OPCODE(G_INDEXED_LOAD) + +/// Generic indexed signext load +HANDLE_TARGET_OPCODE(G_INDEXED_SEXTLOAD) + +/// Generic indexed zeroext load +HANDLE_TARGET_OPCODE(G_INDEXED_ZEXTLOAD) + /// Generic store. HANDLE_TARGET_OPCODE(G_STORE) +/// Generic indexed store. +HANDLE_TARGET_OPCODE(G_INDEXED_STORE) + /// Generic atomic cmpxchg with internal success check. HANDLE_TARGET_OPCODE(G_ATOMIC_CMPXCHG_WITH_SUCCESS) @@ -315,6 +327,8 @@ HANDLE_TARGET_OPCODE(G_ATOMICRMW_MAX) HANDLE_TARGET_OPCODE(G_ATOMICRMW_MIN) HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMAX) HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMIN) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_FADD) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_FSUB) // Generic atomic fence HANDLE_TARGET_OPCODE(G_FENCE) @@ -354,6 +368,7 @@ HANDLE_TARGET_OPCODE(G_VAARG) // Generic sign extend HANDLE_TARGET_OPCODE(G_SEXT) +HANDLE_TARGET_OPCODE(G_SEXT_INREG) // Generic zero extend HANDLE_TARGET_OPCODE(G_ZEXT) @@ -436,6 +451,9 @@ HANDLE_TARGET_OPCODE(G_FMUL) /// Generic FMA multiplication. Behaves like llvm fma intrinsic HANDLE_TARGET_OPCODE(G_FMA) +/// Generic FP multiply and add. Behaves as separate fmul and fadd. +HANDLE_TARGET_OPCODE(G_FMAD) + /// Generic FP division. HANDLE_TARGET_OPCODE(G_FDIV) @@ -557,6 +575,9 @@ HANDLE_TARGET_OPCODE(G_CTPOP) /// Generic byte swap. HANDLE_TARGET_OPCODE(G_BSWAP) +/// Generic bit reverse. +HANDLE_TARGET_OPCODE(G_BITREVERSE) + /// Floating point ceil. HANDLE_TARGET_OPCODE(G_FCEIL) @@ -587,12 +608,15 @@ HANDLE_TARGET_OPCODE(G_BLOCK_ADDR) /// Generic jump table address HANDLE_TARGET_OPCODE(G_JUMP_TABLE) +/// Generic dynamic stack allocation. +HANDLE_TARGET_OPCODE(G_DYN_STACKALLOC) + // TODO: Add more generic opcodes as we move along. /// Marker for the end of the generic opcode. /// This is used to check if an opcode is in the range of the /// generic opcodes. -HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_JUMP_TABLE) +HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_DYN_STACKALLOC) /// BUILTIN_OP_END - This must be the last enum value in this list. /// The target-specific post-isel opcode values start here. diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h index bf75650760d..f4bc26b858c 100644 --- a/include/llvm/Support/TargetRegistry.h +++ b/include/llvm/Support/TargetRegistry.h @@ -510,8 +510,8 @@ public: std::move(Emitter), RelaxAll); break; case Triple::XCOFF: - S = createXCOFFStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll); + S = createXCOFFStreamer(Ctx, std::move(TAB), std::move(OW), + std::move(Emitter), RelaxAll); break; } if (ObjectTargetStreamerCtorFn) diff --git a/include/llvm/Support/TimeProfiler.h b/include/llvm/Support/TimeProfiler.h index 72b6f7180bd..8cc430d0bc7 100644 --- a/include/llvm/Support/TimeProfiler.h +++ b/include/llvm/Support/TimeProfiler.h @@ -19,7 +19,7 @@ extern TimeTraceProfiler *TimeTraceProfilerInstance; /// Initialize the time trace profiler. /// This sets up the global \p TimeTraceProfilerInstance /// variable to be the profiler instance. -void timeTraceProfilerInitialize(); +void timeTraceProfilerInitialize(unsigned TimeTraceGranularity); /// Cleanup the time trace profiler, if it was initialized. void timeTraceProfilerCleanup(); diff --git a/include/llvm/Support/TrailingObjects.h b/include/llvm/Support/TrailingObjects.h index 8cf4f7aed7f..49be89613c4 100644 --- a/include/llvm/Support/TrailingObjects.h +++ b/include/llvm/Support/TrailingObjects.h @@ -47,6 +47,7 @@ #define LLVM_SUPPORT_TRAILINGOBJECTS_H #include "llvm/Support/AlignOf.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/type_traits.h" @@ -87,11 +88,6 @@ protected: template struct OverloadToken {}; }; -/// This helper template works-around MSVC 2013's lack of useful -/// alignas() support. The argument to alignas(), in MSVC, is -/// required to be a literal integer. But, you *can* use template -/// specialization to select between a bunch of different alignas() -/// expressions... template class TrailingObjectsAligner : public TrailingObjectsBase {}; template <> @@ -172,7 +168,7 @@ protected: if (requiresRealignment()) return reinterpret_cast( - llvm::alignAddr(Ptr, alignof(NextTy))); + alignAddr(Ptr, Align::Of())); else return reinterpret_cast(Ptr); } @@ -186,7 +182,7 @@ protected: Obj, TrailingObjectsBase::OverloadToken()); if (requiresRealignment()) - return reinterpret_cast(llvm::alignAddr(Ptr, alignof(NextTy))); + return reinterpret_cast(alignAddr(Ptr, Align::Of())); else return reinterpret_cast(Ptr); } @@ -254,9 +250,7 @@ class TrailingObjects : private trailing_objects_internal::TrailingObjectsImpl< // because BaseTy isn't complete at class instantiation time, but // will be by the time this function is instantiated. static void verifyTrailingObjectsAssertions() { -#ifdef LLVM_IS_FINAL - static_assert(LLVM_IS_FINAL(BaseTy), "BaseTy must be final."); -#endif + static_assert(std::is_final(), "BaseTy must be final."); } // These two methods are the base of the recursion for this method. @@ -369,7 +363,9 @@ public: template struct FixedSizeStorage { template struct with_counts { enum { Size = totalSizeToAlloc(Counts...) }; - typedef llvm::AlignedCharArray type; + struct type { + alignas(BaseTy) char buffer[Size]; + }; }; }; diff --git a/include/llvm/Support/TypeSize.h b/include/llvm/Support/TypeSize.h new file mode 100644 index 00000000000..711679cdcac --- /dev/null +++ b/include/llvm/Support/TypeSize.h @@ -0,0 +1,201 @@ +//===- TypeSize.h - Wrapper around type sizes -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides a struct that can be used to query the size of IR types +// which may be scalable vectors. It provides convenience operators so that +// it can be used in much the same way as a single scalar value. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_TYPESIZE_H +#define LLVM_SUPPORT_TYPESIZE_H + +#include +#include + +namespace llvm { + +class ElementCount { +public: + unsigned Min; // Minimum number of vector elements. + bool Scalable; // If true, NumElements is a multiple of 'Min' determined + // at runtime rather than compile time. + + ElementCount(unsigned Min, bool Scalable) + : Min(Min), Scalable(Scalable) {} + + ElementCount operator*(unsigned RHS) { + return { Min * RHS, Scalable }; + } + ElementCount operator/(unsigned RHS) { + return { Min / RHS, Scalable }; + } + + bool operator==(const ElementCount& RHS) const { + return Min == RHS.Min && Scalable == RHS.Scalable; + } + bool operator!=(const ElementCount& RHS) const { + return !(*this == RHS); + } +}; + +// This class is used to represent the size of types. If the type is of fixed +// size, it will represent the exact size. If the type is a scalable vector, +// it will represent the known minimum size. +class TypeSize { + uint64_t MinSize; // The known minimum size. + bool IsScalable; // If true, then the runtime size is an integer multiple + // of MinSize. + +public: + constexpr TypeSize(uint64_t MinSize, bool Scalable) + : MinSize(MinSize), IsScalable(Scalable) {} + + static constexpr TypeSize Fixed(uint64_t Size) { + return TypeSize(Size, /*IsScalable=*/false); + } + + static constexpr TypeSize Scalable(uint64_t MinSize) { + return TypeSize(MinSize, /*IsScalable=*/true); + } + + // Scalable vector types with the same minimum size as a fixed size type are + // not guaranteed to be the same size at runtime, so they are never + // considered to be equal. + friend bool operator==(const TypeSize &LHS, const TypeSize &RHS) { + return std::tie(LHS.MinSize, LHS.IsScalable) == + std::tie(RHS.MinSize, RHS.IsScalable); + } + + friend bool operator!=(const TypeSize &LHS, const TypeSize &RHS) { + return !(LHS == RHS); + } + + // For many cases, size ordering between scalable and fixed size types cannot + // be determined at compile time, so such comparisons aren't allowed. + // + // e.g. could be bigger than <4 x i32> with a runtime + // vscale >= 5, equal sized with a vscale of 4, and smaller with + // a vscale <= 3. + // + // If the scalable flags match, just perform the requested comparison + // between the minimum sizes. + friend bool operator<(const TypeSize &LHS, const TypeSize &RHS) { + assert(LHS.IsScalable == RHS.IsScalable && + "Ordering comparison of scalable and fixed types"); + + return LHS.MinSize < RHS.MinSize; + } + + friend bool operator>(const TypeSize &LHS, const TypeSize &RHS) { + return RHS < LHS; + } + + friend bool operator<=(const TypeSize &LHS, const TypeSize &RHS) { + return !(RHS < LHS); + } + + friend bool operator>=(const TypeSize &LHS, const TypeSize& RHS) { + return !(LHS < RHS); + } + + // Convenience operators to obtain relative sizes independently of + // the scalable flag. + TypeSize operator*(unsigned RHS) const { + return { MinSize * RHS, IsScalable }; + } + + friend TypeSize operator*(const unsigned LHS, const TypeSize &RHS) { + return { LHS * RHS.MinSize, RHS.IsScalable }; + } + + TypeSize operator/(unsigned RHS) const { + return { MinSize / RHS, IsScalable }; + } + + // Return the minimum size with the assumption that the size is exact. + // Use in places where a scalable size doesn't make sense (e.g. non-vector + // types, or vectors in backends which don't support scalable vectors). + uint64_t getFixedSize() const { + assert(!IsScalable && "Request for a fixed size on a scalable object"); + return MinSize; + } + + // Return the known minimum size. Use in places where the scalable property + // doesn't matter (e.g. determining alignment) or in conjunction with the + // isScalable method below. + uint64_t getKnownMinSize() const { + return MinSize; + } + + // Return whether or not the size is scalable. + bool isScalable() const { + return IsScalable; + } + + // Casts to a uint64_t if this is a fixed-width size. + // + // NOTE: This interface is obsolete and will be removed in a future version + // of LLVM in favour of calling getFixedSize() directly. + operator uint64_t() const { + return getFixedSize(); + } + + // Additional convenience operators needed to avoid ambiguous parses. + // TODO: Make uint64_t the default operator? + TypeSize operator*(uint64_t RHS) const { + return { MinSize * RHS, IsScalable }; + } + + TypeSize operator*(int RHS) const { + return { MinSize * RHS, IsScalable }; + } + + TypeSize operator*(int64_t RHS) const { + return { MinSize * RHS, IsScalable }; + } + + friend TypeSize operator*(const uint64_t LHS, const TypeSize &RHS) { + return { LHS * RHS.MinSize, RHS.IsScalable }; + } + + friend TypeSize operator*(const int LHS, const TypeSize &RHS) { + return { LHS * RHS.MinSize, RHS.IsScalable }; + } + + friend TypeSize operator*(const int64_t LHS, const TypeSize &RHS) { + return { LHS * RHS.MinSize, RHS.IsScalable }; + } + + TypeSize operator/(uint64_t RHS) const { + return { MinSize / RHS, IsScalable }; + } + + TypeSize operator/(int RHS) const { + return { MinSize / RHS, IsScalable }; + } + + TypeSize operator/(int64_t RHS) const { + return { MinSize / RHS, IsScalable }; + } +}; + +/// Returns a TypeSize with a known minimum size that is the next integer +/// (mod 2**64) that is greater than or equal to \p Value and is a multiple +/// of \p Align. \p Align must be non-zero. +/// +/// Similar to the alignTo functions in MathExtras.h +inline TypeSize alignTo(TypeSize Size, uint64_t Align) { + assert(Align != 0u && "Align must be non-zero"); + return {(Size.getKnownMinSize() + Align - 1) / Align * Align, + Size.isScalable()}; +} + +} // end namespace llvm + +#endif // LLVM_SUPPORT_TypeSize_H diff --git a/include/llvm/Support/UnicodeCharRanges.h b/include/llvm/Support/UnicodeCharRanges.h index 4b59f8a92b7..73d3603b74d 100644 --- a/include/llvm/Support/UnicodeCharRanges.h +++ b/include/llvm/Support/UnicodeCharRanges.h @@ -9,11 +9,8 @@ #define LLVM_SUPPORT_UNICODECHARRANGES_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Mutex.h" -#include "llvm/Support/MutexGuard.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/include/llvm/Support/UniqueLock.h b/include/llvm/Support/UniqueLock.h deleted file mode 100644 index 0a887ad5965..00000000000 --- a/include/llvm/Support/UniqueLock.h +++ /dev/null @@ -1,68 +0,0 @@ -//===- Support/UniqueLock.h - Acquire/Release Mutex In Scope ----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines a guard for a block of code that ensures a Mutex is locked -// upon construction and released upon destruction. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_SUPPORT_UNIQUE_LOCK_H -#define LLVM_SUPPORT_UNIQUE_LOCK_H - -#include - -namespace llvm { - - /// A pared-down imitation of std::unique_lock from C++11. Contrary to the - /// name, it's really more of a wrapper for a lock. It may or may not have - /// an associated mutex, which is guaranteed to be locked upon creation - /// and unlocked after destruction. unique_lock can also unlock the mutex - /// and re-lock it freely during its lifetime. - /// Guard a section of code with a mutex. - template - class unique_lock { - MutexT *M = nullptr; - bool locked = false; - - public: - unique_lock() = default; - explicit unique_lock(MutexT &m) : M(&m), locked(true) { M->lock(); } - unique_lock(const unique_lock &) = delete; - unique_lock &operator=(const unique_lock &) = delete; - - void operator=(unique_lock &&o) { - if (owns_lock()) - M->unlock(); - M = o.M; - locked = o.locked; - o.M = nullptr; - o.locked = false; - } - - ~unique_lock() { if (owns_lock()) M->unlock(); } - - void lock() { - assert(!locked && "mutex already locked!"); - assert(M && "no associated mutex!"); - M->lock(); - locked = true; - } - - void unlock() { - assert(locked && "unlocking a mutex that isn't locked!"); - assert(M && "no associated mutex!"); - M->unlock(); - locked = false; - } - - bool owns_lock() { return locked; } - }; - -} // end namespace llvm - -#endif // LLVM_SUPPORT_UNIQUE_LOCK_H diff --git a/include/llvm/Support/VirtualFileSystem.h b/include/llvm/Support/VirtualFileSystem.h index 31c9e851dae..c844d9d194f 100644 --- a/include/llvm/Support/VirtualFileSystem.h +++ b/include/llvm/Support/VirtualFileSystem.h @@ -647,9 +647,19 @@ private: friend class VFSFromYamlDirIterImpl; friend class RedirectingFileSystemParser; + bool shouldUseExternalFS() const { + return ExternalFSValidWD && IsFallthrough; + } + /// The root(s) of the virtual file system. std::vector> Roots; + /// The current working directory of the file system. + std::string WorkingDirectory; + + /// Whether the current working directory is valid for the external FS. + bool ExternalFSValidWD = false; + /// The file system to use for external references. IntrusiveRefCntPtr ExternalFS; @@ -689,8 +699,7 @@ private: true; #endif - RedirectingFileSystem(IntrusiveRefCntPtr ExternalFS) - : ExternalFS(std::move(ExternalFS)) {} + RedirectingFileSystem(IntrusiveRefCntPtr ExternalFS); /// Looks up the path [Start, End) in \p From, possibly /// recursing into the contents of \p From if it is a directory. @@ -730,9 +739,10 @@ public: StringRef getExternalContentsPrefixDir() const; + void dump(raw_ostream &OS) const; + void dumpEntry(raw_ostream &OS, Entry *E, int NumSpaces = 0) const; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void dump() const; - LLVM_DUMP_METHOD void dumpEntry(Entry *E, int NumSpaces = 0) const; #endif }; diff --git a/include/llvm/Support/Win64EH.h b/include/llvm/Support/Win64EH.h index bdd23b41594..8220131e5be 100644 --- a/include/llvm/Support/Win64EH.h +++ b/include/llvm/Support/Win64EH.h @@ -30,7 +30,9 @@ enum UnwindOpcodes { UOP_SetFPReg, UOP_SaveNonVol, UOP_SaveNonVolBig, - UOP_SaveXMM128 = 8, + UOP_Epilog, + UOP_SpareCode, + UOP_SaveXMM128, UOP_SaveXMM128Big, UOP_PushMachFrame, // The following set of unwind opcodes is for ARM64. They are documented at diff --git a/include/llvm/Support/X86TargetParser.def b/include/llvm/Support/X86TargetParser.def index 1749be3b3ae..4ebf2d79cb8 100644 --- a/include/llvm/Support/X86TargetParser.def +++ b/include/llvm/Support/X86TargetParser.def @@ -112,6 +112,7 @@ X86_CPU_SUBTYPE ("k6-2", AMDPENTIUM_K62) X86_CPU_SUBTYPE ("k6-3", AMDPENTIUM_K63) X86_CPU_SUBTYPE ("geode", AMDPENTIUM_GEODE) X86_CPU_SUBTYPE ("cooperlake", INTEL_COREI7_COOPERLAKE) +X86_CPU_SUBTYPE ("tigerlake", INTEL_COREI7_TIGERLAKE) #undef X86_CPU_SUBTYPE_COMPAT #undef X86_CPU_SUBTYPE @@ -160,12 +161,13 @@ X86_FEATURE_COMPAT(32, FEATURE_GFNI, "gfni") X86_FEATURE_COMPAT(33, FEATURE_VPCLMULQDQ, "vpclmulqdq") X86_FEATURE_COMPAT(34, FEATURE_AVX512VNNI, "avx512vnni") X86_FEATURE_COMPAT(35, FEATURE_AVX512BITALG, "avx512bitalg") +X86_FEATURE_COMPAT(36, FEATURE_AVX512BF16, "avx512bf16") // Features below here are not in libgcc/compiler-rt. X86_FEATURE (64, FEATURE_MOVBE) X86_FEATURE (65, FEATURE_ADX) X86_FEATURE (66, FEATURE_EM64T) X86_FEATURE (67, FEATURE_CLFLUSHOPT) X86_FEATURE (68, FEATURE_SHA) -X86_FEATURE (69, FEATURE_AVX512BF16) +X86_FEATURE (69, FEATURE_AVX512VP2INTERSECT) #undef X86_FEATURE_COMPAT #undef X86_FEATURE diff --git a/include/llvm/Support/YAMLTraits.h b/include/llvm/Support/YAMLTraits.h index 5181dc56d81..a3bfa7dc467 100644 --- a/include/llvm/Support/YAMLTraits.h +++ b/include/llvm/Support/YAMLTraits.h @@ -649,7 +649,8 @@ inline bool isBool(StringRef S) { inline QuotingType needsQuotes(StringRef S) { if (S.empty()) return QuotingType::Single; - if (isspace(S.front()) || isspace(S.back())) + if (isspace(static_cast(S.front())) || + isspace(static_cast(S.back()))) return QuotingType::Single; if (isNull(S)) return QuotingType::Single; @@ -748,7 +749,7 @@ public: IO(void *Ctxt = nullptr); virtual ~IO(); - virtual bool outputting() = 0; + virtual bool outputting() const = 0; virtual unsigned beginSequence() = 0; virtual bool preflightElement(unsigned, void *&) = 0; @@ -842,7 +843,7 @@ public: Val = Val | ConstVal; } - void *getContext(); + void *getContext() const; void setContext(void *); template void mapRequired(const char *Key, T &Val) { @@ -1402,7 +1403,7 @@ public: std::error_code error(); private: - bool outputting() override; + bool outputting() const override; bool mapTag(StringRef, bool) override; void beginMapping() override; void endMapping() override; @@ -1549,7 +1550,7 @@ public: /// anyway. void setWriteDefaultValues(bool Write) { WriteDefaultValues = Write; } - bool outputting() override; + bool outputting() const override; bool mapTag(StringRef, bool) override; void beginMapping() override; void endMapping() override; diff --git a/include/llvm/Support/circular_raw_ostream.h b/include/llvm/Support/circular_raw_ostream.h index 4ecdb17376f..a72acd4fe00 100644 --- a/include/llvm/Support/circular_raw_ostream.h +++ b/include/llvm/Support/circular_raw_ostream.h @@ -122,6 +122,10 @@ namespace llvm { delete[] BufferArray; } + bool is_displayed() const override { + return TheStream->is_displayed(); + } + /// setStream - Tell the circular_raw_ostream to output a /// different stream. "Owns" tells circular_raw_ostream whether /// it should take responsibility for managing the underlying diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h index 48bb623b063..0debc5da7a6 100644 --- a/include/llvm/Support/raw_ostream.h +++ b/include/llvm/Support/raw_ostream.h @@ -72,7 +72,7 @@ private: public: // color order matches ANSI escape sequence, don't change - enum Colors { + enum class Colors { BLACK = 0, RED, GREEN, @@ -81,9 +81,21 @@ public: MAGENTA, CYAN, WHITE, - SAVEDCOLOR + SAVEDCOLOR, + RESET, }; + static const Colors BLACK = Colors::BLACK; + static const Colors RED = Colors::RED; + static const Colors GREEN = Colors::GREEN; + static const Colors YELLOW = Colors::YELLOW; + static const Colors BLUE = Colors::BLUE; + static const Colors MAGENTA = Colors::MAGENTA; + static const Colors CYAN = Colors::CYAN; + static const Colors WHITE = Colors::WHITE; + static const Colors SAVEDCOLOR = Colors::SAVEDCOLOR; + static const Colors RESET = Colors::RESET; + explicit raw_ostream(bool unbuffered = false) : BufferMode(unbuffered ? Unbuffered : InternalBuffer) { // Start out ready to flush. @@ -214,6 +226,9 @@ public: /// Output \p N in hexadecimal, without any prefix or padding. raw_ostream &write_hex(unsigned long long N); + // Change the foreground color of text. + raw_ostream &operator<<(Colors C); + /// Output a formatted UUID with dash separators. using uuid_t = uint8_t[16]; raw_ostream &write_uuid(const uuid_t UUID); @@ -277,6 +292,10 @@ public: /// This function determines if this stream is displayed and supports colors. virtual bool has_colors() const { return is_displayed(); } + // Enable or disable colors. Once disable_colors() is called, + // changeColor() has no effect until enable_colors() is called. + virtual void enable_colors(bool /*enable*/) {} + //===--------------------------------------------------------------------===// // Subclass Interface //===--------------------------------------------------------------------===// @@ -365,8 +384,8 @@ public: class raw_fd_ostream : public raw_pwrite_stream { int FD; bool ShouldClose; - bool SupportsSeeking; + bool ColorEnabled = true; #ifdef _WIN32 /// True if this fd refers to a Windows console device. Mintty and other @@ -442,6 +461,8 @@ public: bool has_colors() const override; + void enable_colors(bool enable) override { ColorEnabled = enable; } + std::error_code error() const { return EC; } /// Return the value of the flag in this raw_fd_ostream indicating whether an diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h index c8c6a76a90f..b7d48e8e1ad 100644 --- a/include/llvm/Support/type_traits.h +++ b/include/llvm/Support/type_traits.h @@ -17,11 +17,6 @@ #include #include -#ifndef __has_feature -#define LLVM_DEFINED_HAS_FEATURE -#define __has_feature(x) 0 -#endif - namespace llvm { @@ -194,17 +189,4 @@ class is_trivially_copyable : public std::true_type { } // end namespace llvm -// If the compiler supports detecting whether a class is final, define -// an LLVM_IS_FINAL macro. If it cannot be defined properly, this -// macro will be left undefined. -#if __cplusplus >= 201402L || defined(_MSC_VER) -#define LLVM_IS_FINAL(Ty) std::is_final() -#elif __has_feature(is_final) || LLVM_GNUC_PREREQ(4, 7, 0) -#define LLVM_IS_FINAL(Ty) __is_final(Ty) -#endif - -#ifdef LLVM_DEFINED_HAS_FEATURE -#undef __has_feature -#endif - #endif // LLVM_SUPPORT_TYPE_TRAITS_H diff --git a/include/llvm/TableGen/Automaton.td b/include/llvm/TableGen/Automaton.td new file mode 100644 index 00000000000..13ced2a0e78 --- /dev/null +++ b/include/llvm/TableGen/Automaton.td @@ -0,0 +1,95 @@ +//===- Automaton.td ----------------------------------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the key top-level classes needed to produce a reasonably +// generic finite-state automaton. +// +//===----------------------------------------------------------------------===// + +// Define a record inheriting from GenericAutomaton to generate a reasonably +// generic finite-state automaton over a set of actions and states. +// +// This automaton is defined by: +// 1) a state space (explicit, always bits<32>). +// 2) a set of input symbols (actions, explicit) and +// 3) a transition function from state + action -> state. +// +// A theoretical automaton is defined by : +// Q: A set of possible states. +// S: (sigma) The input alphabet. +// d: (delta) The transition function f(q in Q, s in S) -> q' in Q. +// F: The set of final (accepting) states. +// +// Because generating all possible states is tedious, we instead define the +// transition function only and crawl all reachable states starting from the +// initial state with all inputs under all transitions until termination. +// +// We define F = S, that is, all valid states are accepting. +// +// To ensure the generation of the automaton terminates, the state transitions +// are defined as a lattice (meaning every transitioned-to state is more +// specific than the transitioned-from state, for some definition of specificity). +// Concretely a transition may set one or more bits in the state that were +// previously zero to one. If any bit was not zero, the transition is invalid. +// +// Instead of defining all possible states (which would be cumbersome), the user +// provides a set of possible Transitions from state A, consuming an input +// symbol A to state B. The Transition object transforms state A to state B and +// acts as a predicate. This means the state space can be discovered by crawling +// all the possible transitions until none are valid. +// +// This automaton is considered to be nondeterministic, meaning that multiple +// transitions can occur from any (state, action) pair. The generated automaton +// is determinized, meaning that is executes in O(k) time where k is the input +// sequence length. +// +// In addition to a generated automaton that determines if a sequence of inputs +// is accepted or not, a table is emitted that allows determining a plausible +// sequence of states traversed to accept that input. +class GenericAutomaton { + // Name of a class that inherits from Transition. All records inheriting from + // this class will be considered when constructing the automaton. + string TransitionClass; + + // Names of fields within TransitionClass that define the action symbol. This + // defines the action as an N-tuple. + // + // Each symbol field can be of class, int, string or code type. + // If the type of a field is a class, the Record's name is used verbatim + // in C++ and the class name is used as the C++ type name. + // If the type of a field is a string, code or int, that is also used + // verbatim in C++. + // + // To override the C++ type name for field F, define a field called TypeOf_F. + // This should be a string that will be used verbatim in C++. + // + // As an example, to define a 2-tuple with an enum and a string, one might: + // def MyTransition : Transition { + // MyEnum S1; + // int S2; + // } + // def MyAutomaton : GenericAutomaton }{ + // let TransitionClass = "Transition"; + // let SymbolFields = ["S1", "S2"]; + // let TypeOf_S1 = "MyEnumInCxxKind"; + // } + list SymbolFields; +} + +// All transitions inherit from Transition. +class Transition { + // A transition S' = T(S) is valid if, for every set bit in NewState, the + // corresponding bit in S is clear. That is: + // def T(S): + // S' = S | NewState + // return S' if S' != S else Failure + // + // The automaton generator uses this property to crawl the set of possible + // transitions from a starting state of 0b0. + bits<32> NewState; +} diff --git a/include/llvm/TableGen/Error.h b/include/llvm/TableGen/Error.h index 7c83b629862..cf990427f57 100644 --- a/include/llvm/TableGen/Error.h +++ b/include/llvm/TableGen/Error.h @@ -18,6 +18,7 @@ namespace llvm { +void PrintNote(const Twine &Msg); void PrintNote(ArrayRef NoteLoc, const Twine &Msg); void PrintWarning(ArrayRef WarningLoc, const Twine &Msg); diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h index bf7f02208c2..73ed342a610 100644 --- a/include/llvm/TableGen/Record.h +++ b/include/llvm/TableGen/Record.h @@ -1263,7 +1263,14 @@ class FieldInit : public TypedInit { FieldInit(Init *R, StringInit *FN) : TypedInit(IK_FieldInit, R->getFieldType(FN)), Rec(R), FieldName(FN) { - assert(getType() && "FieldInit with non-record type!"); +#ifndef NDEBUG + if (!getType()) { + llvm::errs() << "In Record = " << Rec->getAsString() + << ", got FieldName = " << *FieldName + << " with non-record type!\n"; + llvm_unreachable("FieldInit with non-record type!"); + } +#endif } public: @@ -1323,6 +1330,7 @@ public: void Profile(FoldingSetNodeID &ID) const; Init *getOperator() const { return Val; } + Record *getOperatorAsDef(ArrayRef Loc) const; StringInit *getName() const { return ValName; } @@ -1680,10 +1688,10 @@ raw_ostream &operator<<(raw_ostream &OS, const Record &R); class RecordKeeper { friend class RecordRecTy; - using RecordMap = std::map>; + using RecordMap = std::map, std::less<>>; RecordMap Classes, Defs; FoldingSet RecordTypePool; - std::map ExtraGlobals; + std::map> ExtraGlobals; unsigned AnonCounter = 0; public: diff --git a/include/llvm/Target/GenericOpcodes.td b/include/llvm/Target/GenericOpcodes.td index 45718327b4a..4b49dfd4dd1 100644 --- a/include/llvm/Target/GenericOpcodes.td +++ b/include/llvm/Target/GenericOpcodes.td @@ -15,7 +15,9 @@ // Unary ops. //------------------------------------------------------------------------------ -class GenericInstruction : StandardPseudoInstruction; +class GenericInstruction : StandardPseudoInstruction { + let isPreISelOpcode = 1; +} // Extend the underlying scalar type of an operation, leaving the high bits // unspecified. @@ -33,6 +35,20 @@ def G_SEXT : GenericInstruction { let hasSideEffects = 0; } +// Sign extend the a value from an arbitrary bit position, copying the sign bit +// into all bits above it. This is equivalent to a shl + ashr pair with an +// appropriate shift amount. $sz is an immediate (MachineOperand::isImm() +// returns true) to allow targets to have some bitwidths legal and others +// lowered. This opcode is particularly useful if the target has sign-extension +// instructions that are cheaper than the constituent shifts as the optimizer is +// able to make decisions on whether it's better to hang on to the G_SEXT_INREG +// or to lower it and optimize the individual shifts. +def G_SEXT_INREG : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src, untyped_imm_0:$sz); + let hasSideEffects = 0; +} + // Zero extend the underlying scalar type of an operation, putting zero bits // into the newly-created space. def G_ZEXT : GenericInstruction { @@ -157,6 +173,12 @@ def G_BSWAP : GenericInstruction { let hasSideEffects = 0; } +def G_BITREVERSE : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src); + let hasSideEffects = 0; +} + def G_ADDRSPACE_CAST : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); @@ -175,6 +197,12 @@ def G_JUMP_TABLE : GenericInstruction { let hasSideEffects = 0; } +def G_DYN_STACKALLOC : GenericInstruction { + let OutOperandList = (outs ptype0:$dst); + let InOperandList = (ins type1:$size, i32imm:$align); + let hasSideEffects = 1; +} + //------------------------------------------------------------------------------ // Binary ops. //------------------------------------------------------------------------------ @@ -598,6 +626,15 @@ def G_FMA : GenericInstruction { let isCommutable = 0; } +/// Generic FP multiply and add. Perform a * b + c, while getting the +/// same result as the separately rounded operations, unlike G_FMA. +def G_FMAD : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3); + let hasSideEffects = 0; + let isCommutable = 0; +} + // Generic FP division. def G_FDIV : GenericInstruction { let OutOperandList = (outs type0:$dst); @@ -725,7 +762,11 @@ def G_INTRINSIC_ROUND : GenericInstruction { // Memory ops //------------------------------------------------------------------------------ -// Generic load. Expects a MachineMemOperand in addition to explicit operands. +// Generic load. Expects a MachineMemOperand in addition to explicit +// operands. If the result size is larger than the memory size, the +// high bits are undefined. If the result is a vector type and larger +// than the memory size, the high elements are undefined (i.e. this is +// not a per-element, vector anyextload) def G_LOAD : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins ptype1:$addr); @@ -749,6 +790,32 @@ def G_ZEXTLOAD : GenericInstruction { let mayLoad = 1; } +// Generic indexed load. Combines a GEP with a load. $newaddr is set to $base + $offset. +// If $am is 0 (post-indexed), then the value is loaded from $base; if $am is 1 (pre-indexed) +// then the value is loaded from $newaddr. +def G_INDEXED_LOAD : GenericInstruction { + let OutOperandList = (outs type0:$dst, ptype1:$newaddr); + let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am); + let hasSideEffects = 0; + let mayLoad = 1; +} + +// Same as G_INDEXED_LOAD except that the load performed is sign-extending, as with G_SEXTLOAD. +def G_INDEXED_SEXTLOAD : GenericInstruction { + let OutOperandList = (outs type0:$dst, ptype1:$newaddr); + let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am); + let hasSideEffects = 0; + let mayLoad = 1; +} + +// Same as G_INDEXED_LOAD except that the load performed is zero-extending, as with G_ZEXTLOAD. +def G_INDEXED_ZEXTLOAD : GenericInstruction { + let OutOperandList = (outs type0:$dst, ptype1:$newaddr); + let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am); + let hasSideEffects = 0; + let mayLoad = 1; +} + // Generic store. Expects a MachineMemOperand in addition to explicit operands. def G_STORE : GenericInstruction { let OutOperandList = (outs); @@ -757,6 +824,15 @@ def G_STORE : GenericInstruction { let mayStore = 1; } +// Combines a store with a GEP. See description of G_INDEXED_LOAD for indexing behaviour. +def G_INDEXED_STORE : GenericInstruction { + let OutOperandList = (outs ptype0:$newaddr); + let InOperandList = (ins type1:$src, ptype0:$base, ptype2:$offset, + unknown:$am); + let hasSideEffects = 0; + let mayStore = 1; +} + // Generic atomic cmpxchg with internal success check. Expects a // MachineMemOperand in addition to explicit operands. def G_ATOMIC_CMPXCHG_WITH_SUCCESS : GenericInstruction { @@ -798,6 +874,8 @@ def G_ATOMICRMW_MAX : G_ATOMICRMW_OP; def G_ATOMICRMW_MIN : G_ATOMICRMW_OP; def G_ATOMICRMW_UMAX : G_ATOMICRMW_OP; def G_ATOMICRMW_UMIN : G_ATOMICRMW_OP; +def G_ATOMICRMW_FADD : G_ATOMICRMW_OP; +def G_ATOMICRMW_FSUB : G_ATOMICRMW_OP; def G_FENCE : GenericInstruction { let OutOperandList = (outs); @@ -947,9 +1025,12 @@ def G_EXTRACT_VECTOR_ELT : GenericInstruction { } // Generic shufflevector. +// +// The mask operand should be an IR Constant which exactly matches the +// corresponding mask for the IR shufflevector instruction. def G_SHUFFLE_VECTOR: GenericInstruction { let OutOperandList = (outs type0:$dst); - let InOperandList = (ins type1:$v1, type1:$v2, type2:$mask); + let InOperandList = (ins type1:$v1, type1:$v2, unknown:$mask); let hasSideEffects = 0; } diff --git a/include/llvm/Target/GlobalISel/Combine.td b/include/llvm/Target/GlobalISel/Combine.td new file mode 100644 index 00000000000..dcac399fd69 --- /dev/null +++ b/include/llvm/Target/GlobalISel/Combine.td @@ -0,0 +1,103 @@ +//===- Combine.td - Combine rule definitions ---------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Declare GlobalISel combine rules and provide mechanisms to opt-out. +// +//===----------------------------------------------------------------------===// + +// Common base class for GICombineRule and GICombineGroup. +class GICombine { + // See GICombineGroup. We only declare it here to make the tablegen pass + // simpler. + list Rules = ?; +} + +// A group of combine rules that can be added to a GICombiner or another group. +class GICombineGroup rules> : GICombine { + // The rules contained in this group. The rules in a group are flattened into + // a single list and sorted into whatever order is most efficient. However, + // they will never be re-ordered such that behaviour differs from the + // specified order. It is therefore possible to use the order of rules in this + // list to describe priorities. + let Rules = rules; +} + +// Declares a combiner helper class +class GICombinerHelper rules> + : GICombineGroup { + // The class name to use in the generated output. + string Classname = classname; + // The name of a run-time compiler option that will be generated to disable + // specific rules within this combiner. + string DisableRuleOption = ?; +} +class GICombineRule : GICombine { + /// Defines the external interface of the match rule. This includes: + /// * The names of the root nodes (requires at least one) + /// See GIDefKind for details. + dag Defs = defs; + + /// Defines the things which must be true for the pattern to match + /// See GIMatchKind for details. + dag Match = match; + + /// Defines the things which happen after the decision is made to apply a + /// combine rule. + /// See GIApplyKind for details. + dag Apply = apply; +} + +/// The operator at the root of a GICombineRule.Defs dag. +def defs; + +/// All arguments of the defs operator must be subclasses of GIDefKind or +/// sub-dags whose operator is GIDefKindWithArgs. +class GIDefKind; +class GIDefKindWithArgs; +/// Declare a root node. There must be at least one of these in every combine +/// rule. +/// TODO: The plan is to elide `root` definitions and determine it from the DAG +/// itself with an overide for situations where the usual determination +/// is incorrect. +def root : GIDefKind; + +/// The operator at the root of a GICombineRule.Match dag. +def match; +/// All arguments of the match operator must be either: +/// * A subclass of GIMatchKind +/// * A subclass of GIMatchKindWithArgs +/// * A MIR code block (deprecated) +/// The GIMatchKind and GIMatchKindWithArgs cases are described in more detail +/// in their definitions below. +/// For the Instruction case, these are collected into a DAG where operand names +/// that occur multiple times introduce edges. +class GIMatchKind; +class GIMatchKindWithArgs; + +/// The operator at the root of a GICombineRule.Apply dag. +def apply; +/// All arguments of the apply operator must be subclasses of GIApplyKind, or +/// sub-dags whose operator is GIApplyKindWithArgs, or an MIR block +/// (deprecated). +class GIApplyKind; +class GIApplyKindWithArgs; + +def copy_prop : GICombineRule< + (defs root:$d), + (match [{ return Helper.matchCombineCopy(${d}); }]), + (apply [{ Helper.applyCombineCopy(${d}); }])>; +def trivial_combines : GICombineGroup<[copy_prop]>; + +// FIXME: Is there a reason this wasn't in tryCombine? I've left it out of +// all_combines because it wasn't there. +def elide_br_by_inverting_cond : GICombineRule< + (defs root:$d), + (match [{ return Helper.matchElideBrByInvertingCond(${d}); }]), + (apply [{ Helper.applyElideBrByInvertingCond(${d}); }])>; + +def all_combines : GICombineGroup<[trivial_combines]>; diff --git a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 6cc58d6521d..b846d2252b8 100644 --- a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -27,6 +27,7 @@ class GINodeEquiv { // (ISD::LOAD, ISD::ATOMIC_LOAD, ISD::STORE, ISD::ATOMIC_STORE) but GlobalISel // stores this information in the MachineMemoryOperand. bit CheckMMOIsNonAtomic = 0; + bit CheckMMOIsAtomic = 0; // SelectionDAG has one node for all loads and uses predicates to // differentiate them. GlobalISel on the other hand uses separate opcodes. @@ -34,6 +35,10 @@ class GINodeEquiv { // depending on the predicates on the node. Instruction IfSignExtend = ?; Instruction IfZeroExtend = ?; + + // SelectionDAG has one setcc for all compares. This differentiates + // for G_ICMP and G_FCMP. + Instruction IfFloatingPoint = ?; } // These are defined in the same order as the G_* instructions. @@ -46,6 +51,7 @@ def : GINodeEquiv; // G_PTRTOINT - SelectionDAG has no equivalent. def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; @@ -72,6 +78,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; @@ -85,6 +92,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; @@ -100,10 +108,15 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; // Broadly speaking G_LOAD is equivalent to ISD::LOAD but there are some // complications that tablegen must take care of. For example, Predicates such @@ -117,6 +130,11 @@ def : GINodeEquiv { let IfSignExtend = G_SEXTLOAD; let IfZeroExtend = G_ZEXTLOAD; } + +def : GINodeEquiv { + let IfFloatingPoint = G_FCMP; +} + // Broadly speaking G_STORE is equivalent to ISD::STORE but there are some // complications that tablegen must take care of. For example, predicates such // as isTruncStore require that this is not a perfect 1:1 mapping since a @@ -126,6 +144,11 @@ def : GINodeEquiv { // G_STORE with a non-atomic MachineMemOperand. def : GINodeEquiv { let CheckMMOIsNonAtomic = 1; } +def : GINodeEquiv { + let CheckMMOIsNonAtomic = 0; + let CheckMMOIsAtomic = 1; +} + def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; @@ -138,6 +161,8 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; // Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern. diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index d58662e128e..dd8679661b9 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -351,7 +351,11 @@ def interleave; // RegisterTuples instances can be used in other set operations to form // register classes and so on. This is the only way of using the generated // registers. -class RegisterTuples Indices, list Regs> { +// +// RegNames may be specified to supply asm names for the generated tuples. +// If used must have the same size as the list of produced registers. +class RegisterTuples Indices, list Regs, + list RegNames = []> { // SubRegs - N lists of registers to be zipped up. Super-registers are // synthesized from the first element of each SubRegs list, the second // element and so on. @@ -360,6 +364,9 @@ class RegisterTuples Indices, list Regs> { // SubRegIndices - N SubRegIndex instances. This provides the names of the // sub-registers in the synthesized super-registers. list SubRegIndices = Indices; + + // List of asm names for the generated tuple registers. + list RegAsmNames = RegNames; } @@ -436,6 +443,15 @@ class InstructionEncoding { bit hasCompleteDecoder = 1; } +// Allows specifying an InstructionEncoding by HwMode. If an Instruction specifies +// an EncodingByHwMode, its Inst and Size members are ignored and Ts are used +// to encode and decode based on HwMode. +class EncodingByHwMode Ms = [], list Ts = []> + : HwModeSelect { + // The length of this list must be the same as the length of Ms. + list Objects = Ts; +} + //===----------------------------------------------------------------------===// // Instruction set description - These classes correspond to the C++ classes in // the Target/TargetInstrInfo.h file. @@ -447,6 +463,10 @@ class Instruction : InstructionEncoding { dag InOperandList; // An dag containing the MI use operand list. string AsmString = ""; // The .s format to print the instruction with. + // Allows specifying a canonical InstructionEncoding by HwMode. If non-empty, + // the Inst member of this Instruction is ignored. + EncodingByHwMode EncodingInfos; + // Pattern - Set to the DAG pattern for this instruction, if we know of one, // otherwise, uninitialized. list Pattern; @@ -472,6 +492,10 @@ class Instruction : InstructionEncoding { // Added complexity passed onto matching pattern. int AddedComplexity = 0; + // Indicates if this is a pre-isel opcode that should be + // legalized/regbankselected/selected. + bit isPreISelOpcode = 0; + // These bits capture information about the high-level semantics of the // instruction. bit isReturn = 0; // Is this instruction a return instruction? @@ -834,6 +858,7 @@ def f64imm : Operand; class TypedOperand : Operand { let OperandType = Ty; bit IsPointer = 0; + bit IsImmediate = 0; } def type0 : TypedOperand<"OPERAND_GENERIC_0">; @@ -852,6 +877,12 @@ let IsPointer = 1 in { def ptype5 : TypedOperand<"OPERAND_GENERIC_5">; } +// untyped_imm is for operands where isImm() will be true. It currently has no +// special behaviour and is only used for clarity. +def untyped_imm_0 : TypedOperand<"OPERAND_GENERIC_IMM_0"> { + let IsImmediate = 1; +} + /// zero_reg definition - Special node to stand for the zero register. /// def zero_reg; diff --git a/include/llvm/Target/TargetCallingConv.td b/include/llvm/Target/TargetCallingConv.td index 1bc03cf8a49..7b1973cc382 100644 --- a/include/llvm/Target/TargetCallingConv.td +++ b/include/llvm/Target/TargetCallingConv.td @@ -152,6 +152,12 @@ class CCBitConvertToType : CCAction { ValueType DestTy = destTy; } +/// CCTruncToType - If applied, this truncates the specified current value to +/// the specified type. +class CCTruncToType : CCAction { + ValueType DestTy = destTy; +} + /// CCPassIndirect - If applied, this stores the value to stack and passes the pointer /// as normal argument. class CCPassIndirect : CCAction { diff --git a/include/llvm/Target/TargetItinerary.td b/include/llvm/Target/TargetItinerary.td index b68ed045520..89e5abd947d 100644 --- a/include/llvm/Target/TargetItinerary.td +++ b/include/llvm/Target/TargetItinerary.td @@ -127,6 +127,17 @@ class ProcessorItineraries fu, list bp, list FU = fu; list BP = bp; list IID = iid; + // The packetizer automaton to use for this itinerary. By default all + // itineraries for a target are bundled up into the same automaton. This only + // works correctly when there are no conflicts in functional unit IDs between + // itineraries. For example, given two itineraries A<[SLOT_A]>, B<[SLOT_B]>, + // SLOT_A and SLOT_B will be assigned the same functional unit index, and + // the generated packetizer will confuse instructions referencing these slots. + // + // To avoid this, setting PacketizerNamespace to non-"" will cause this + // itinerary to be generated in a different automaton. The subtarget will need + // to declare a method "create##Namespace##DFAPacketizer()". + string PacketizerNamespace = ""; } // NoItineraries - A marker that can be used by processors without schedule diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h index 3a2497bff11..d74341b23fb 100644 --- a/include/llvm/Target/TargetLoweringObjectFile.h +++ b/include/llvm/Target/TargetLoweringObjectFile.h @@ -191,7 +191,8 @@ public: } /// Get the target specific PC relative GOT entry relocation - virtual const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + virtual const MCExpr *getIndirectSymViaGOTPCRel(const GlobalValue *GV, + const MCSymbol *Sym, const MCValue &MV, int64_t Offset, MachineModuleInfo *MMI, diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index cdf9f8bfd5e..285c0ec0fb9 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -25,7 +25,7 @@ namespace llvm { class Function; class GlobalValue; -class MachineModuleInfo; +class MachineModuleInfoWrapperPass; class Mangler; class MCAsmInfo; class MCContext; @@ -284,12 +284,13 @@ public: /// emitted. Typically this will involve several steps of code generation. /// This method should return true if emission of this file type is not /// supported, or false on success. - /// \p MMI is an optional parameter that, if set to non-nullptr, + /// \p MMIWP is an optional parameter that, if set to non-nullptr, /// will be used to set the MachineModuloInfo for this PM. - virtual bool addPassesToEmitFile(PassManagerBase &, raw_pwrite_stream &, - raw_pwrite_stream *, CodeGenFileType, - bool /*DisableVerify*/ = true, - MachineModuleInfo *MMI = nullptr) { + virtual bool + addPassesToEmitFile(PassManagerBase &, raw_pwrite_stream &, + raw_pwrite_stream *, CodeGenFileType, + bool /*DisableVerify*/ = true, + MachineModuleInfoWrapperPass *MMIWP = nullptr) { return true; } @@ -341,12 +342,13 @@ public: /// Add passes to the specified pass manager to get the specified file /// emitted. Typically this will involve several steps of code generation. - /// \p MMI is an optional parameter that, if set to non-nullptr, - /// will be used to set the MachineModuloInfofor this PM. - bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out, - raw_pwrite_stream *DwoOut, CodeGenFileType FileType, - bool DisableVerify = true, - MachineModuleInfo *MMI = nullptr) override; + /// \p MMIWP is an optional parameter that, if set to non-nullptr, + /// will be used to set the MachineModuloInfo for this PM. + bool + addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out, + raw_pwrite_stream *DwoOut, CodeGenFileType FileType, + bool DisableVerify = true, + MachineModuleInfoWrapperPass *MMIWP = nullptr) override; /// Add passes to the specified pass manager to get machine code emitted with /// the MCJIT. This method returns true if machine code is not supported. It @@ -365,7 +367,7 @@ public: /// Adds an AsmPrinter pass to the pipeline that prints assembly or /// machine code from the MI representation. bool addAsmPrinter(PassManagerBase &PM, raw_pwrite_stream &Out, - raw_pwrite_stream *DwoOut, CodeGenFileType FileTYpe, + raw_pwrite_stream *DwoOut, CodeGenFileType FileType, MCContext &Context); /// True if the target uses physical regs at Prolog/Epilog insertion diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td index a36d259df83..24f37e94da9 100644 --- a/include/llvm/Target/TargetSchedule.td +++ b/include/llvm/Target/TargetSchedule.td @@ -563,10 +563,10 @@ class RetireControlUnit { // Base class for Load/StoreQueue. It is used to identify processor resources // which describe load/store queues in the LS unit. -class MemoryQueue { - ProcResource QueueDescriptor = PR; +class MemoryQueue { + ProcResourceKind QueueDescriptor = PR; SchedMachineModel SchedModel = ?; } -class LoadQueue : MemoryQueue; -class StoreQueue : MemoryQueue; +class LoadQueue : MemoryQueue; +class StoreQueue : MemoryQueue; diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index b913a054ac2..441f3d7d118 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -137,9 +137,12 @@ def SDTFPSignOp : SDTypeProfile<1, 2, [ // fcopysign. def SDTFPTernaryOp : SDTypeProfile<1, 3, [ // fmadd, fnmsub, etc. SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0> ]>; -def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // ctlz, cttz +def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // bitreverse SDTCisSameAs<0, 1>, SDTCisInt<0> ]>; +def SDTIntBitCountUnaryOp : SDTypeProfile<1, 1, [ // ctlz, cttz + SDTCisInt<0>, SDTCisInt<1> +]>; def SDTIntExtendOp : SDTypeProfile<1, 1, [ // sext, zext, anyext SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1> ]>; @@ -239,6 +242,9 @@ def SDTVecExtract : SDTypeProfile<1, 2, [ // vector extract def SDTVecInsert : SDTypeProfile<1, 3, [ // vector insert SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>, SDTCisPtrTy<3> ]>; +def SDTVecReduce : SDTypeProfile<1, 1, [ // vector reduction + SDTCisInt<0>, SDTCisVec<1> +]>; def SDTSubVecExtract : SDTypeProfile<1, 2, [// subvector extract SDTCisSubVecOfVec<0,1>, SDTCisInt<2> @@ -393,6 +399,7 @@ def usubsat : SDNode<"ISD::USUBSAT" , SDTIntBinOp>; def smulfix : SDNode<"ISD::SMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>; def smulfixsat : SDNode<"ISD::SMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>; def umulfix : SDNode<"ISD::UMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>; +def umulfixsat : SDNode<"ISD::UMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>; def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>; def sext_invec : SDNode<"ISD::SIGN_EXTEND_VECTOR_INREG", SDTExtInvec>; @@ -401,11 +408,11 @@ def zext_invec : SDNode<"ISD::ZERO_EXTEND_VECTOR_INREG", SDTExtInvec>; def abs : SDNode<"ISD::ABS" , SDTIntUnaryOp>; def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>; def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>; -def ctlz : SDNode<"ISD::CTLZ" , SDTIntUnaryOp>; -def cttz : SDNode<"ISD::CTTZ" , SDTIntUnaryOp>; -def ctpop : SDNode<"ISD::CTPOP" , SDTIntUnaryOp>; -def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntUnaryOp>; -def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntUnaryOp>; +def ctlz : SDNode<"ISD::CTLZ" , SDTIntBitCountUnaryOp>; +def cttz : SDNode<"ISD::CTTZ" , SDTIntBitCountUnaryOp>; +def ctpop : SDNode<"ISD::CTPOP" , SDTIntBitCountUnaryOp>; +def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>; +def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>; def sext : SDNode<"ISD::SIGN_EXTEND", SDTIntExtendOp>; def zext : SDNode<"ISD::ZERO_EXTEND", SDTIntExtendOp>; def anyext : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>; @@ -415,6 +422,12 @@ def addrspacecast : SDNode<"ISD::ADDRSPACECAST", SDTUnaryOp>; def extractelt : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDTVecExtract>; def insertelt : SDNode<"ISD::INSERT_VECTOR_ELT", SDTVecInsert>; +def vecreduce_add : SDNode<"ISD::VECREDUCE_ADD", SDTVecReduce>; +def vecreduce_smax : SDNode<"ISD::VECREDUCE_SMAX", SDTVecReduce>; +def vecreduce_umax : SDNode<"ISD::VECREDUCE_UMAX", SDTVecReduce>; +def vecreduce_smin : SDNode<"ISD::VECREDUCE_SMIN", SDTVecReduce>; +def vecreduce_umin : SDNode<"ISD::VECREDUCE_UMIN", SDTVecReduce>; + def fadd : SDNode<"ISD::FADD" , SDTFPBinOp, [SDNPCommutative]>; def fsub : SDNode<"ISD::FSUB" , SDTFPBinOp>; def fmul : SDNode<"ISD::FMUL" , SDTFPBinOp, [SDNPCommutative]>; @@ -493,12 +506,20 @@ def strict_flog2 : SDNode<"ISD::STRICT_FLOG2", SDTFPUnaryOp, [SDNPHasChain]>; def strict_frint : SDNode<"ISD::STRICT_FRINT", SDTFPUnaryOp, [SDNPHasChain]>; +def strict_lrint : SDNode<"ISD::STRICT_LRINT", + SDTFPToIntOp, [SDNPHasChain]>; +def strict_llrint : SDNode<"ISD::STRICT_LLRINT", + SDTFPToIntOp, [SDNPHasChain]>; def strict_fnearbyint : SDNode<"ISD::STRICT_FNEARBYINT", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fceil : SDNode<"ISD::STRICT_FCEIL", SDTFPUnaryOp, [SDNPHasChain]>; def strict_ffloor : SDNode<"ISD::STRICT_FFLOOR", SDTFPUnaryOp, [SDNPHasChain]>; +def strict_lround : SDNode<"ISD::STRICT_LROUND", + SDTFPToIntOp, [SDNPHasChain]>; +def strict_llround : SDNode<"ISD::STRICT_LLROUND", + SDTFPToIntOp, [SDNPHasChain]>; def strict_fround : SDNode<"ISD::STRICT_FROUND", SDTFPUnaryOp, [SDNPHasChain]>; def strict_ftrunc : SDNode<"ISD::STRICT_FTRUNC", @@ -513,6 +534,10 @@ def strict_fpround : SDNode<"ISD::STRICT_FP_ROUND", SDTFPRoundOp, [SDNPHasChain]>; def strict_fpextend : SDNode<"ISD::STRICT_FP_EXTEND", SDTFPExtendOp, [SDNPHasChain]>; +def strict_fp_to_sint : SDNode<"ISD::STRICT_FP_TO_SINT", + SDTFPToIntOp, [SDNPHasChain]>; +def strict_fp_to_uint : SDNode<"ISD::STRICT_FP_TO_UINT", + SDTFPToIntOp, [SDNPHasChain]>; def setcc : SDNode<"ISD::SETCC" , SDTSetCC>; def select : SDNode<"ISD::SELECT" , SDTSelect>; @@ -638,16 +663,32 @@ def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>; //===----------------------------------------------------------------------===// // Selection DAG Condition Codes -class CondCode; // ISD::CondCode enums -def SETOEQ : CondCode; def SETOGT : CondCode; -def SETOGE : CondCode; def SETOLT : CondCode; def SETOLE : CondCode; -def SETONE : CondCode; def SETO : CondCode; def SETUO : CondCode; -def SETUEQ : CondCode; def SETUGT : CondCode; def SETUGE : CondCode; -def SETULT : CondCode; def SETULE : CondCode; def SETUNE : CondCode; - -def SETEQ : CondCode; def SETGT : CondCode; def SETGE : CondCode; -def SETLT : CondCode; def SETLE : CondCode; def SETNE : CondCode; +class CondCode { + string ICmpPredicate = icmpName; + string FCmpPredicate = fcmpName; +} +// ISD::CondCode enums, and mapping to CmpInst::Predicate names +def SETOEQ : CondCode<"FCMP_OEQ">; +def SETOGT : CondCode<"FCMP_OGT">; +def SETOGE : CondCode<"FCMP_OGE">; +def SETOLT : CondCode<"FCMP_OLT">; +def SETOLE : CondCode<"FCMP_OLE">; +def SETONE : CondCode<"FCMP_ONE">; +def SETO : CondCode<"FCMP_ORD">; +def SETUO : CondCode<"FCMP_UNO">; +def SETUEQ : CondCode<"FCMP_UEQ">; +def SETUGT : CondCode<"FCMP_UGT", "ICMP_UGT">; +def SETUGE : CondCode<"FCMP_UGE", "ICMP_UGE">; +def SETULT : CondCode<"FCMP_ULT", "ICMP_ULT">; +def SETULE : CondCode<"FCMP_ULE", "ICMP_ULE">; +def SETUNE : CondCode<"FCMP_UNE">; +def SETEQ : CondCode<"", "ICMP_EQ">; +def SETGT : CondCode<"", "ICMP_SGT">; +def SETGE : CondCode<"", "ICMP_SGE">; +def SETLT : CondCode<"", "ICMP_SLT">; +def SETLE : CondCode<"", "ICMP_SLE">; +def SETNE : CondCode<"", "ICMP_NE">; //===----------------------------------------------------------------------===// // Selection DAG Node Transformation Functions. @@ -741,6 +782,10 @@ class PatFrags frags, code pred = [{}], // If this empty, accept any address space. list AddressSpaces = ?; + // cast(N)->getAlignment() >= + // If this is empty, accept any alignment. + int MinAlignment = ?; + // cast(N)->getOrdering() == AtomicOrdering::Monotonic bit IsAtomicOrderingMonotonic = ?; // cast(N)->getOrdering() == AtomicOrdering::Acquire @@ -766,8 +811,6 @@ class PatFrags frags, code pred = [{}], // cast(N)->getMemoryVT().getScalarType() == MVT::; // cast(N)->getMemoryVT().getScalarType() == MVT::; ValueType ScalarMemoryVT = ?; - - // TODO: Add alignment } // PatFrag - A version of PatFrags matching only a single fragment. @@ -813,6 +856,11 @@ class ImmLeaf : ImmLeaf; + // An ImmLeaf except that Imm is an APInt. This is useful when you need to // zero-extend the immediate instead of sign-extend it. // @@ -1111,6 +1159,16 @@ def pre_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset), let IsStore = 1; let MemoryVT = f32; } +def pre_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i8; +} +def pre_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i16; +} def post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), (istore node:$val, node:$ptr, node:$offset), [{ @@ -1148,14 +1206,26 @@ def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset), let IsStore = 1; let MemoryVT = f32; } +def post_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i8; +} +def post_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i16; +} -def nonvolatile_load : PatFrag<(ops node:$ptr), - (load node:$ptr), [{ - return !cast(N)->isVolatile(); +// TODO: Split these into volatile and unordered flavors to enable +// selectively legal optimizations for each. (See D66309) +def simple_load : PatFrag<(ops node:$ptr), + (load node:$ptr), [{ + return cast(N)->isSimple(); }]>; -def nonvolatile_store : PatFrag<(ops node:$val, node:$ptr), - (store node:$val, node:$ptr), [{ - return !cast(N)->isVolatile(); +def simple_store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->isSimple(); }]>; // nontemporal store fragments. @@ -1277,6 +1347,12 @@ def any_flog2 : PatFrags<(ops node:$src), def any_frint : PatFrags<(ops node:$src), [(strict_frint node:$src), (frint node:$src)]>; +def any_lrint : PatFrags<(ops node:$src), + [(strict_lrint node:$src), + (lrint node:$src)]>; +def any_llrint : PatFrags<(ops node:$src), + [(strict_llrint node:$src), + (llrint node:$src)]>; def any_fnearbyint : PatFrags<(ops node:$src), [(strict_fnearbyint node:$src), (fnearbyint node:$src)]>; @@ -1286,6 +1362,12 @@ def any_fceil : PatFrags<(ops node:$src), def any_ffloor : PatFrags<(ops node:$src), [(strict_ffloor node:$src), (ffloor node:$src)]>; +def any_lround : PatFrags<(ops node:$src), + [(strict_lround node:$src), + (lround node:$src)]>; +def any_llround : PatFrags<(ops node:$src), + [(strict_llround node:$src), + (llround node:$src)]>; def any_fround : PatFrags<(ops node:$src), [(strict_fround node:$src), (fround node:$src)]>; @@ -1310,6 +1392,12 @@ def any_extloadf32 : PatFrags<(ops node:$ptr), def any_extloadf64 : PatFrags<(ops node:$ptr), [(strict_extloadf64 node:$ptr), (extloadf64 node:$ptr)]>; +def any_fp_to_sint : PatFrags<(ops node:$src), + [(strict_fp_to_sint node:$src), + (fp_to_sint node:$src)]>; +def any_fp_to_uint : PatFrags<(ops node:$src), + [(strict_fp_to_uint node:$src), + (fp_to_uint node:$src)]>; multiclass binary_atomic_op_ord { def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val), @@ -1367,26 +1455,26 @@ multiclass ternary_atomic_op_ord { } } -multiclass binary_atomic_op { +multiclass binary_atomic_op { def _8 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { let IsAtomic = 1; - let MemoryVT = i8; + let MemoryVT = !if(IsInt, i8, ?); } def _16 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { let IsAtomic = 1; - let MemoryVT = i16; + let MemoryVT = !if(IsInt, i16, f16); } def _32 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { let IsAtomic = 1; - let MemoryVT = i32; + let MemoryVT = !if(IsInt, i32, f32); } def _64 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { let IsAtomic = 1; - let MemoryVT = i64; + let MemoryVT = !if(IsInt, i64, f64); } defm NAME#_8 : binary_atomic_op_ord; diff --git a/include/llvm/TextAPI/MachO/Architecture.h b/include/llvm/TextAPI/MachO/Architecture.h index 055baeb0c0f..3898cbada68 100644 --- a/include/llvm/TextAPI/MachO/Architecture.h +++ b/include/llvm/TextAPI/MachO/Architecture.h @@ -14,6 +14,7 @@ #define LLVM_TEXTAPI_MACHO_ARCHITECTURE_H #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" #include "llvm/Support/raw_ostream.h" namespace llvm { @@ -39,6 +40,9 @@ StringRef getArchitectureName(Architecture Arch); /// Convert an architecture slice to a CPU Type and Subtype pair. std::pair getCPUTypeFromArchitecture(Architecture Arch); +/// Convert a target to an architecture slice. +Architecture mapToArchitecture(const llvm::Triple &Target); + raw_ostream &operator<<(raw_ostream &OS, Architecture Arch); } // end namespace MachO. diff --git a/include/llvm/TextAPI/MachO/ArchitectureSet.h b/include/llvm/TextAPI/MachO/ArchitectureSet.h index d8dfc7f1af2..6e4ede6275b 100644 --- a/include/llvm/TextAPI/MachO/ArchitectureSet.h +++ b/include/llvm/TextAPI/MachO/ArchitectureSet.h @@ -59,6 +59,10 @@ public: ArchSetType rawValue() const { return ArchSet; } + bool hasX86() const { + return has(AK_i386) || has(AK_x86_64) || has(AK_x86_64h); + } + template class arch_iterator : public std::iterator { diff --git a/include/llvm/TextAPI/MachO/InterfaceFile.h b/include/llvm/TextAPI/MachO/InterfaceFile.h index e722449d52f..bd434e04b69 100644 --- a/include/llvm/TextAPI/MachO/InterfaceFile.h +++ b/include/llvm/TextAPI/MachO/InterfaceFile.h @@ -26,21 +26,13 @@ #include "llvm/TextAPI/MachO/Architecture.h" #include "llvm/TextAPI/MachO/ArchitectureSet.h" #include "llvm/TextAPI/MachO/PackedVersion.h" +#include "llvm/TextAPI/MachO/Platform.h" #include "llvm/TextAPI/MachO/Symbol.h" +#include "llvm/TextAPI/MachO/Target.h" namespace llvm { namespace MachO { -/// Defines the list of MachO platforms. -enum class PlatformKind : unsigned { - unknown, - macOS = MachO::PLATFORM_MACOS, - iOS = MachO::PLATFORM_IOS, - tvOS = MachO::PLATFORM_TVOS, - watchOS = MachO::PLATFORM_WATCHOS, - bridgeOS = MachO::PLATFORM_BRIDGEOS, -}; - /// Defines a list of Objective-C constraints. enum class ObjCConstraintType : unsigned { /// No constraint. @@ -75,6 +67,9 @@ enum FileType : unsigned { /// Text-based stub file (.tbd) version 3.0 TBD_V3 = 1U << 2, + /// Text-based stub file (.tbd) version 4.0 + TBD_V4 = 1U << 3, + All = ~0U, LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/All), @@ -89,29 +84,42 @@ public: InterfaceFileRef(StringRef InstallName) : InstallName(InstallName) {} - InterfaceFileRef(StringRef InstallName, ArchitectureSet Archs) - : InstallName(InstallName), Architectures(Archs) {} + InterfaceFileRef(StringRef InstallName, const TargetList Targets) + : InstallName(InstallName), Targets(std::move(Targets)) {} StringRef getInstallName() const { return InstallName; }; - void addArchitectures(ArchitectureSet Archs) { Architectures |= Archs; } - ArchitectureSet getArchitectures() const { return Architectures; } - bool hasArchitecture(Architecture Arch) const { - return Architectures.has(Arch); + + void addTarget(const Target &Target); + template void addTargets(RangeT &&Targets) { + for (const auto &Target : Targets) + addTarget(Target(Target)); } + using const_target_iterator = TargetList::const_iterator; + using const_target_range = llvm::iterator_range; + const_target_range targets() const { return {Targets}; } + + ArchitectureSet getArchitectures() const { + return mapToArchitectureSet(Targets); + } + + PlatformSet getPlatforms() const { return mapToPlatformSet(Targets); } + bool operator==(const InterfaceFileRef &O) const { - return std::tie(InstallName, Architectures) == - std::tie(O.InstallName, O.Architectures); + return std::tie(InstallName, Targets) == std::tie(O.InstallName, O.Targets); + } + + bool operator!=(const InterfaceFileRef &O) const { + return std::tie(InstallName, Targets) != std::tie(O.InstallName, O.Targets); } bool operator<(const InterfaceFileRef &O) const { - return std::tie(InstallName, Architectures) < - std::tie(O.InstallName, O.Architectures); + return std::tie(InstallName, Targets) < std::tie(O.InstallName, O.Targets); } private: std::string InstallName; - ArchitectureSet Architectures; + TargetList Targets; }; } // end namespace MachO. @@ -170,27 +178,43 @@ public: /// \return The file type. FileType getFileType() const { return FileKind; } - /// Set the platform. - void setPlatform(PlatformKind Platform_) { Platform = Platform_; } - - /// Get the platform. - PlatformKind getPlatform() const { return Platform; } - - /// Specify the set of supported architectures by this file. - void setArchitectures(ArchitectureSet Architectures_) { - Architectures = Architectures_; + /// Get the architectures. + /// + /// \return The applicable architectures. + ArchitectureSet getArchitectures() const { + return mapToArchitectureSet(Targets); } - /// Add the set of supported architectures by this file. - void addArchitectures(ArchitectureSet Architectures_) { - Architectures |= Architectures_; + /// Get the platforms. + /// + /// \return The applicable platforms. + PlatformSet getPlatforms() const { return mapToPlatformSet(Targets); } + + /// Set and add target. + /// + /// \param Target the target to add into. + void addTarget(const Target &Target); + + /// Set and add targets. + /// + /// Add the subset of llvm::triples that is supported by Tapi + /// + /// \param Targets the collection of targets. + template void addTargets(RangeT &&Targets) { + for (const auto &Target_ : Targets) + addTarget(Target(Target_)); } - /// Add supported architecture by this file.. - void addArch(Architecture Arch) { Architectures.set(Arch); } + using const_target_iterator = TargetList::const_iterator; + using const_target_range = llvm::iterator_range; + const_target_range targets() const { return {Targets}; } - /// Get the set of supported architectures. - ArchitectureSet getArchitectures() const { return Architectures; } + using const_filtered_target_iterator = + llvm::filter_iterator>; + using const_filtered_target_range = + llvm::iterator_range; + const_filtered_target_range targets(ArchitectureSet Archs) const; /// Set the install name of the library. void setInstallName(StringRef InstallName_) { InstallName = InstallName_; } @@ -244,11 +268,18 @@ public: /// Check if this file was generated during InstallAPI. bool isInstallAPI() const { return IsInstallAPI; } - /// Set the parent umbrella framework. - void setParentUmbrella(StringRef Parent) { ParentUmbrella = Parent; } + /// Set the parent umbrella frameworks. + /// \param Target_ The target applicable to Parent + /// \param Parent The name of Parent + void addParentUmbrella(const Target &Target_, StringRef Parent); + const std::vector> &umbrellas() const { + return ParentUmbrellas; + } /// Get the parent umbrella framework. - StringRef getParentUmbrella() const { return ParentUmbrella; } + const std::vector> getParentUmbrellas() const { + return ParentUmbrellas; + } /// Add an allowable client. /// @@ -257,9 +288,9 @@ public: /// that is being generated needs to match one of the allowable clients or the /// linker refuses to link this library. /// - /// \param Name The name of the client that is allowed to link this library. - /// \param Architectures The set of architecture for which this applies. - void addAllowableClient(StringRef Name, ArchitectureSet Architectures); + /// \param InstallName The name of the client that is allowed to link this library. + /// \param Target The target triple for which this applies. + void addAllowableClient(StringRef InstallName, const Target &Target); /// Get the list of allowable clients. /// @@ -271,9 +302,8 @@ public: /// Add a re-exported library. /// /// \param InstallName The name of the library to re-export. - /// \param Architectures The set of architecture for which this applies. - void addReexportedLibrary(StringRef InstallName, - ArchitectureSet Architectures); + /// \param Target The target triple for which this applies. + void addReexportedLibrary(StringRef InstallName, const Target &Target); /// Get the list of re-exported libraries. /// @@ -282,27 +312,27 @@ public: return ReexportedLibraries; } - /// Add an architecture/UUID pair. + /// Add an Target/UUID pair. /// - /// \param Arch The architecture for which this applies. + /// \param Target The target triple for which this applies. /// \param UUID The UUID of the library for the specified architecture. - void addUUID(Architecture Arch, StringRef UUID); + void addUUID(const Target &Target, StringRef UUID); - /// Add an architecture/UUID pair. + /// Add an Target/UUID pair. /// - /// \param Arch The architecture for which this applies. + /// \param Target The target triple for which this applies. /// \param UUID The UUID of the library for the specified architecture. - void addUUID(Architecture Arch, uint8_t UUID[16]); + void addUUID(const Target &Target, uint8_t UUID[16]); - /// Get the list of architecture/UUID pairs. + /// Get the list of Target/UUID pairs. /// - /// \return Returns a list of architecture/UUID pairs. - const std::vector> &uuids() const { + /// \return Returns a list of Target/UUID pairs. + const std::vector> &uuids() const { return UUIDs; } /// Add a symbol to the symbols list or extend an existing one. - void addSymbol(SymbolKind Kind, StringRef Name, ArchitectureSet Architectures, + void addSymbol(SymbolKind Kind, StringRef Name, const TargetList &Targets, SymbolFlags Flags = SymbolFlags::None); using SymbolMapType = DenseMap; @@ -320,84 +350,35 @@ public: reference operator*() const { return I->second; } pointer operator->() const { return I->second; } }; + using const_symbol_range = iterator_range; - // Custom iterator to return only exported symbols. - struct const_export_iterator - : public iterator_adaptor_base< - const_export_iterator, const_symbol_iterator, - std::forward_iterator_tag, const Symbol *> { - const_symbol_iterator _end; - - void skipToNextSymbol() { - while (I != _end && I->isUndefined()) - ++I; - } - - const_export_iterator() = default; - template - const_export_iterator(U &&it, U &&end) - : iterator_adaptor_base(std::forward(it)), - _end(std::forward(end)) { - skipToNextSymbol(); - } - - const_export_iterator &operator++() { - ++I; - skipToNextSymbol(); - return *this; - } - - const_export_iterator operator++(int) { - const_export_iterator tmp(*this); - ++(*this); - return tmp; - } - }; - using const_export_range = llvm::iterator_range; - - // Custom iterator to return only undefined symbols. - struct const_undefined_iterator - : public iterator_adaptor_base< - const_undefined_iterator, const_symbol_iterator, - std::forward_iterator_tag, const Symbol *> { - const_symbol_iterator _end; - - void skipToNextSymbol() { - while (I != _end && !I->isUndefined()) - ++I; - } - - const_undefined_iterator() = default; - template - const_undefined_iterator(U &&it, U &&end) - : iterator_adaptor_base(std::forward(it)), - _end(std::forward(end)) { - skipToNextSymbol(); - } - - const_undefined_iterator &operator++() { - ++I; - skipToNextSymbol(); - return *this; - } - - const_undefined_iterator operator++(int) { - const_undefined_iterator tmp(*this); - ++(*this); - return tmp; - } - }; - using const_undefined_range = llvm::iterator_range; + using const_filtered_symbol_iterator = + filter_iterator>; + using const_filtered_symbol_range = + iterator_range; const_symbol_range symbols() const { return {Symbols.begin(), Symbols.end()}; } - const_export_range exports() const { - return {{Symbols.begin(), Symbols.end()}, {Symbols.end(), Symbols.end()}}; + + const_filtered_symbol_range exports() const { + std::function fn = [](const Symbol *Symbol) { + return !Symbol->isUndefined(); + }; + return make_filter_range( + make_range({Symbols.begin()}, {Symbols.end()}), + fn); } - const_undefined_range undefineds() const { - return {{Symbols.begin(), Symbols.end()}, {Symbols.end(), Symbols.end()}}; + + const_filtered_symbol_range undefineds() const { + std::function fn = [](const Symbol *Symbol) { + return Symbol->isUndefined(); + }; + return make_filter_range( + make_range({Symbols.begin()}, {Symbols.end()}), + fn); } private: @@ -411,10 +392,9 @@ private: return StringRef(reinterpret_cast(Ptr), String.size()); } + TargetList Targets; std::string Path; FileType FileKind; - PlatformKind Platform; - ArchitectureSet Architectures; std::string InstallName; PackedVersion CurrentVersion; PackedVersion CompatibilityVersion; @@ -423,10 +403,10 @@ private: bool IsAppExtensionSafe{false}; bool IsInstallAPI{false}; ObjCConstraintType ObjcConstraint = ObjCConstraintType::None; - std::string ParentUmbrella; + std::vector> ParentUmbrellas; std::vector AllowableClients; std::vector ReexportedLibraries; - std::vector> UUIDs; + std::vector> UUIDs; SymbolMapType Symbols; }; diff --git a/include/llvm/TextAPI/MachO/Platform.h b/include/llvm/TextAPI/MachO/Platform.h new file mode 100644 index 00000000000..a22aae9b7dc --- /dev/null +++ b/include/llvm/TextAPI/MachO/Platform.h @@ -0,0 +1,45 @@ +//===- llvm/TextAPI/MachO/Platform.h - Platform -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the Platforms supported by Tapi and helpers. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TEXTAPI_MACHO_PLATFORM_H +#define LLVM_TEXTAPI_MACHO_PLATFORM_H + +#include "llvm/ADT/SmallSet.h" +#include "llvm/BinaryFormat/MachO.h" + +namespace llvm { +namespace MachO { + +/// Defines the list of MachO platforms. +enum class PlatformKind : unsigned { + unknown, + macOS = MachO::PLATFORM_MACOS, + iOS = MachO::PLATFORM_IOS, + tvOS = MachO::PLATFORM_TVOS, + watchOS = MachO::PLATFORM_WATCHOS, + bridgeOS = MachO::PLATFORM_BRIDGEOS, + macCatalyst = MachO::PLATFORM_MACCATALYST, + iOSSimulator = MachO::PLATFORM_IOSSIMULATOR, + tvOSSimulator = MachO::PLATFORM_TVOSSIMULATOR, + watchOSSimulator = MachO::PLATFORM_WATCHOSSIMULATOR +}; + +using PlatformSet = SmallSet; + +PlatformKind mapToPlatformKind(PlatformKind Platform, bool WantSim); +PlatformKind mapToPlatformKind(const Triple &Target); +PlatformSet mapToPlatformSet(ArrayRef Targets); +StringRef getPlatformName(PlatformKind Platform); + +} // end namespace MachO. +} // end namespace llvm. + +#endif // LLVM_TEXTAPI_MACHO_PLATFORM_H \ No newline at end of file diff --git a/include/llvm/TextAPI/MachO/Symbol.h b/include/llvm/TextAPI/MachO/Symbol.h index 3c7ff5e0f4e..1b1632c599c 100644 --- a/include/llvm/TextAPI/MachO/Symbol.h +++ b/include/llvm/TextAPI/MachO/Symbol.h @@ -14,6 +14,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TextAPI/MachO/ArchitectureSet.h" +#include "llvm/TextAPI/MachO/Target.h" namespace llvm { namespace MachO { @@ -37,7 +38,10 @@ enum class SymbolFlags : uint8_t { /// Undefined Undefined = 1U << 3, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/Undefined), + /// Rexported + Rexported = 1U << 4, + + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/Rexported), }; // clang-format on @@ -49,16 +53,18 @@ enum class SymbolKind : uint8_t { ObjectiveCInstanceVariable, }; +using TargetList = SmallVector; class Symbol { public: - constexpr Symbol(SymbolKind Kind, StringRef Name, - ArchitectureSet Architectures, SymbolFlags Flags) - : Name(Name), Architectures(Architectures), Kind(Kind), Flags(Flags) {} + Symbol(SymbolKind Kind, StringRef Name, TargetList Targets, SymbolFlags Flags) + : Name(Name), Targets(std::move(Targets)), Kind(Kind), Flags(Flags) {} + void addTarget(Target target) { Targets.emplace_back(target); } SymbolKind getKind() const { return Kind; } StringRef getName() const { return Name; } - ArchitectureSet getArchitectures() const { return Architectures; } - void addArchitectures(ArchitectureSet Archs) { Architectures |= Archs; } + ArchitectureSet getArchitectures() const { + return mapToArchitectureSet(Targets); + } SymbolFlags getFlags() const { return Flags; } bool isWeakDefined() const { @@ -78,6 +84,21 @@ public: return (Flags & SymbolFlags::Undefined) == SymbolFlags::Undefined; } + bool isReexported() const { + return (Flags & SymbolFlags::Rexported) == SymbolFlags::Rexported; + } + + using const_target_iterator = TargetList::const_iterator; + using const_target_range = llvm::iterator_range; + const_target_range targets() const { return {Targets}; } + + using const_filtered_target_iterator = + llvm::filter_iterator>; + using const_filtered_target_range = + llvm::iterator_range; + const_filtered_target_range targets(ArchitectureSet architectures) const; + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void dump(raw_ostream &OS) const; void dump() const { dump(llvm::errs()); } @@ -85,7 +106,7 @@ public: private: StringRef Name; - ArchitectureSet Architectures; + TargetList Targets; SymbolKind Kind; SymbolFlags Flags; }; diff --git a/include/llvm/TextAPI/MachO/Target.h b/include/llvm/TextAPI/MachO/Target.h new file mode 100644 index 00000000000..5fe44cb7d36 --- /dev/null +++ b/include/llvm/TextAPI/MachO/Target.h @@ -0,0 +1,68 @@ +//===- llvm/TextAPI/Target.h - TAPI Target ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TEXTAPI_MACHO_TARGET_H +#define LLVM_TEXTAPI_MACHO_TARGET_H + +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Error.h" +#include "llvm/TextAPI/MachO/Architecture.h" +#include "llvm/TextAPI/MachO/ArchitectureSet.h" +#include "llvm/TextAPI/MachO/Platform.h" + +namespace llvm { +namespace MachO { + +// This is similar to a llvm Triple, but the triple doesn't have all the +// information we need. For example there is no enum value for x86_64h. The +// only way to get that information is to parse the triple string. +class Target { +public: + Target() = default; + Target(Architecture Arch, PlatformKind Platform) + : Arch(Arch), Platform(Platform) {} + explicit Target(const llvm::Triple &Triple) + : Arch(mapToArchitecture(Triple)), Platform(mapToPlatformKind(Triple)) {} + + static llvm::Expected create(StringRef Target); + + operator std::string() const; + + Architecture Arch; + PlatformKind Platform; +}; + +inline bool operator==(const Target &LHS, const Target &RHS) { + return std::tie(LHS.Arch, LHS.Platform) == std::tie(RHS.Arch, RHS.Platform); +} + +inline bool operator!=(const Target &LHS, const Target &RHS) { + return std::tie(LHS.Arch, LHS.Platform) != std::tie(RHS.Arch, RHS.Platform); +} + +inline bool operator<(const Target &LHS, const Target &RHS) { + return std::tie(LHS.Arch, LHS.Platform) < std::tie(RHS.Arch, RHS.Platform); +} + +inline bool operator==(const Target &LHS, const Architecture &RHS) { + return LHS.Arch == RHS; +} + +inline bool operator!=(const Target &LHS, const Architecture &RHS) { + return LHS.Arch != RHS; +} + +PlatformSet mapToPlatformSet(ArrayRef Targets); +ArchitectureSet mapToArchitectureSet(ArrayRef Targets); + +raw_ostream &operator<<(raw_ostream &OS, const Target &Target); + +} // namespace MachO +} // namespace llvm + +#endif // LLVM_TEXTAPI_MACHO_TARGET_H diff --git a/include/llvm/TextAPI/MachO/TextAPIReader.h b/include/llvm/TextAPI/MachO/TextAPIReader.h index 6d9c09de529..c551f0454e8 100644 --- a/include/llvm/TextAPI/MachO/TextAPIReader.h +++ b/include/llvm/TextAPI/MachO/TextAPIReader.h @@ -20,10 +20,7 @@ class InterfaceFile; class TextAPIReader { public: static Expected> - get(std::unique_ptr InputBuffer); - - static Expected> - getUnmanaged(llvm::MemoryBuffer *InputBuffer); + get(MemoryBufferRef InputBuffer); TextAPIReader() = delete; }; diff --git a/include/llvm/Transforms/IPO/Attributor.h b/include/llvm/Transforms/IPO/Attributor.h index 5dbe21ac5e4..3dbe0fcd76e 100644 --- a/include/llvm/Transforms/IPO/Attributor.h +++ b/include/llvm/Transforms/IPO/Attributor.h @@ -60,13 +60,12 @@ // manifest their result in the IR for passes to come. // // Attribute manifestation is not mandatory. If desired, there is support to -// generate a single LLVM-IR attribute already in the AbstractAttribute base -// class. In the simplest case, a subclass overloads -// `AbstractAttribute::getManifestPosition()` and -// `AbstractAttribute::getAttrKind()` to return the appropriate values. The -// Attributor manifestation framework will then create and place a new attribute -// if it is allowed to do so (based on the abstract state). Other use cases can -// be achieved by overloading other abstract attribute methods. +// generate a single or multiple LLVM-IR attributes already in the helper struct +// IRAttribute. In the simplest case, a subclass inherits from IRAttribute with +// a proper Attribute::AttrKind as template parameter. The Attributor +// manifestation framework will then create and place a new attribute if it is +// allowed to do so (based on the abstract state). Other use cases can be +// achieved by overloading AbstractAttribute or IRAttribute methods. // // // The "mechanics" of adding a new "abstract attribute": @@ -97,7 +96,13 @@ #ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H #define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H -#include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/MustExecute.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/PassManager.h" @@ -105,6 +110,7 @@ namespace llvm { struct AbstractAttribute; struct InformationCache; +struct AAIsDead; class Function; @@ -120,6 +126,563 @@ ChangeStatus operator|(ChangeStatus l, ChangeStatus r); ChangeStatus operator&(ChangeStatus l, ChangeStatus r); ///} +/// Helper to describe and deal with positions in the LLVM-IR. +/// +/// A position in the IR is described by an anchor value and an "offset" that +/// could be the argument number, for call sites and arguments, or an indicator +/// of the "position kind". The kinds, specified in the Kind enum below, include +/// the locations in the attribute list, i.a., function scope and return value, +/// as well as a distinction between call sites and functions. Finally, there +/// are floating values that do not have a corresponding attribute list +/// position. +struct IRPosition { + virtual ~IRPosition() {} + + /// The positions we distinguish in the IR. + /// + /// The values are chosen such that the KindOrArgNo member has a value >= 1 + /// if it is an argument or call site argument while a value < 1 indicates the + /// respective kind of that value. + enum Kind : int { + IRP_INVALID = -6, ///< An invalid position. + IRP_FLOAT = -5, ///< A position that is not associated with a spot suitable + ///< for attributes. This could be any value or instruction. + IRP_RETURNED = -4, ///< An attribute for the function return value. + IRP_CALL_SITE_RETURNED = -3, ///< An attribute for a call site return value. + IRP_FUNCTION = -2, ///< An attribute for a function (scope). + IRP_CALL_SITE = -1, ///< An attribute for a call site (function scope). + IRP_ARGUMENT = 0, ///< An attribute for a function argument. + IRP_CALL_SITE_ARGUMENT = 1, ///< An attribute for a call site argument. + }; + + /// Default constructor available to create invalid positions implicitly. All + /// other positions need to be created explicitly through the appropriate + /// static member function. + IRPosition() : AnchorVal(nullptr), KindOrArgNo(IRP_INVALID) { verify(); } + + /// Create a position describing the value of \p V. + static const IRPosition value(const Value &V) { + if (auto *Arg = dyn_cast(&V)) + return IRPosition::argument(*Arg); + if (auto *CB = dyn_cast(&V)) + return IRPosition::callsite_returned(*CB); + return IRPosition(const_cast(V), IRP_FLOAT); + } + + /// Create a position describing the function scope of \p F. + static const IRPosition function(const Function &F) { + return IRPosition(const_cast(F), IRP_FUNCTION); + } + + /// Create a position describing the returned value of \p F. + static const IRPosition returned(const Function &F) { + return IRPosition(const_cast(F), IRP_RETURNED); + } + + /// Create a position describing the argument \p Arg. + static const IRPosition argument(const Argument &Arg) { + return IRPosition(const_cast(Arg), Kind(Arg.getArgNo())); + } + + /// Create a position describing the function scope of \p CB. + static const IRPosition callsite_function(const CallBase &CB) { + return IRPosition(const_cast(CB), IRP_CALL_SITE); + } + + /// Create a position describing the returned value of \p CB. + static const IRPosition callsite_returned(const CallBase &CB) { + return IRPosition(const_cast(CB), IRP_CALL_SITE_RETURNED); + } + + /// Create a position describing the argument of \p CB at position \p ArgNo. + static const IRPosition callsite_argument(const CallBase &CB, + unsigned ArgNo) { + return IRPosition(const_cast(CB), Kind(ArgNo)); + } + + /// Create a position describing the function scope of \p ICS. + static const IRPosition callsite_function(ImmutableCallSite ICS) { + return IRPosition::callsite_function(cast(*ICS.getInstruction())); + } + + /// Create a position describing the returned value of \p ICS. + static const IRPosition callsite_returned(ImmutableCallSite ICS) { + return IRPosition::callsite_returned(cast(*ICS.getInstruction())); + } + + /// Create a position describing the argument of \p ICS at position \p ArgNo. + static const IRPosition callsite_argument(ImmutableCallSite ICS, + unsigned ArgNo) { + return IRPosition::callsite_argument(cast(*ICS.getInstruction()), + ArgNo); + } + + /// Create a position describing the argument of \p ACS at position \p ArgNo. + static const IRPosition callsite_argument(AbstractCallSite ACS, + unsigned ArgNo) { + int CSArgNo = ACS.getCallArgOperandNo(ArgNo); + if (CSArgNo >= 0) + return IRPosition::callsite_argument( + cast(*ACS.getInstruction()), CSArgNo); + return IRPosition(); + } + + /// Create a position with function scope matching the "context" of \p IRP. + /// If \p IRP is a call site (see isAnyCallSitePosition()) then the result + /// will be a call site position, otherwise the function position of the + /// associated function. + static const IRPosition function_scope(const IRPosition &IRP) { + if (IRP.isAnyCallSitePosition()) { + return IRPosition::callsite_function( + cast(IRP.getAnchorValue())); + } + assert(IRP.getAssociatedFunction()); + return IRPosition::function(*IRP.getAssociatedFunction()); + } + + bool operator==(const IRPosition &RHS) const { + return (AnchorVal == RHS.AnchorVal) && (KindOrArgNo == RHS.KindOrArgNo); + } + bool operator!=(const IRPosition &RHS) const { return !(*this == RHS); } + + /// Return the value this abstract attribute is anchored with. + /// + /// The anchor value might not be the associated value if the latter is not + /// sufficient to determine where arguments will be manifested. This is, so + /// far, only the case for call site arguments as the value is not sufficient + /// to pinpoint them. Instead, we can use the call site as an anchor. + /// + ///{ + Value &getAnchorValue() { + assert(KindOrArgNo != IRP_INVALID && + "Invalid position does not have an anchor value!"); + return *AnchorVal; + } + const Value &getAnchorValue() const { + return const_cast(this)->getAnchorValue(); + } + ///} + + /// Return the associated function, if any. + /// + ///{ + Function *getAssociatedFunction() { + if (auto *CB = dyn_cast(AnchorVal)) + return CB->getCalledFunction(); + assert(KindOrArgNo != IRP_INVALID && + "Invalid position does not have an anchor scope!"); + Value &V = getAnchorValue(); + if (isa(V)) + return &cast(V); + if (isa(V)) + return cast(V).getParent(); + if (isa(V)) + return cast(V).getFunction(); + return nullptr; + } + const Function *getAssociatedFunction() const { + return const_cast(this)->getAssociatedFunction(); + } + ///} + + /// Return the associated argument, if any. + /// + ///{ + Argument *getAssociatedArgument() { + if (auto *Arg = dyn_cast(&getAnchorValue())) + return Arg; + int ArgNo = getArgNo(); + if (ArgNo < 0) + return nullptr; + Function *AssociatedFn = getAssociatedFunction(); + if (!AssociatedFn || AssociatedFn->arg_size() <= unsigned(ArgNo)) + return nullptr; + return AssociatedFn->arg_begin() + ArgNo; + } + const Argument *getAssociatedArgument() const { + return const_cast(this)->getAssociatedArgument(); + } + ///} + + /// Return true if the position refers to a function interface, that is the + /// function scope, the function return, or an argumnt. + bool isFnInterfaceKind() const { + switch (getPositionKind()) { + case IRPosition::IRP_FUNCTION: + case IRPosition::IRP_RETURNED: + case IRPosition::IRP_ARGUMENT: + return true; + default: + return false; + } + } + + /// Return the Function surrounding the anchor value. + /// + ///{ + Function *getAnchorScope() { + Value &V = getAnchorValue(); + if (isa(V)) + return &cast(V); + if (isa(V)) + return cast(V).getParent(); + if (isa(V)) + return cast(V).getFunction(); + return nullptr; + } + const Function *getAnchorScope() const { + return const_cast(this)->getAnchorScope(); + } + ///} + + /// Return the context instruction, if any. + /// + ///{ + Instruction *getCtxI() { + Value &V = getAnchorValue(); + if (auto *I = dyn_cast(&V)) + return I; + if (auto *Arg = dyn_cast(&V)) + if (!Arg->getParent()->isDeclaration()) + return &Arg->getParent()->getEntryBlock().front(); + if (auto *F = dyn_cast(&V)) + if (!F->isDeclaration()) + return &(F->getEntryBlock().front()); + return nullptr; + } + const Instruction *getCtxI() const { + return const_cast(this)->getCtxI(); + } + ///} + + /// Return the value this abstract attribute is associated with. + /// + ///{ + Value &getAssociatedValue() { + assert(KindOrArgNo != IRP_INVALID && + "Invalid position does not have an associated value!"); + if (getArgNo() < 0 || isa(AnchorVal)) + return *AnchorVal; + assert(isa(AnchorVal) && "Expected a call base!"); + return *cast(AnchorVal)->getArgOperand(getArgNo()); + } + const Value &getAssociatedValue() const { + return const_cast(this)->getAssociatedValue(); + } + ///} + + /// Return the argument number of the associated value if it is an argument or + /// call site argument, otherwise a negative value. + int getArgNo() const { return KindOrArgNo; } + + /// Return the index in the attribute list for this position. + unsigned getAttrIdx() const { + switch (getPositionKind()) { + case IRPosition::IRP_INVALID: + case IRPosition::IRP_FLOAT: + break; + case IRPosition::IRP_FUNCTION: + case IRPosition::IRP_CALL_SITE: + return AttributeList::FunctionIndex; + case IRPosition::IRP_RETURNED: + case IRPosition::IRP_CALL_SITE_RETURNED: + return AttributeList::ReturnIndex; + case IRPosition::IRP_ARGUMENT: + case IRPosition::IRP_CALL_SITE_ARGUMENT: + return KindOrArgNo + AttributeList::FirstArgIndex; + } + llvm_unreachable( + "There is no attribute index for a floating or invalid position!"); + } + + /// Return the associated position kind. + Kind getPositionKind() const { + if (getArgNo() >= 0) { + assert(((isa(getAnchorValue()) && + isa(getAssociatedValue())) || + isa(getAnchorValue())) && + "Expected argument or call base due to argument number!"); + if (isa(getAnchorValue())) + return IRP_CALL_SITE_ARGUMENT; + return IRP_ARGUMENT; + } + + assert(KindOrArgNo < 0 && + "Expected (call site) arguments to never reach this point!"); + return Kind(KindOrArgNo); + } + + /// TODO: Figure out if the attribute related helper functions should live + /// here or somewhere else. + + /// Return true if any kind in \p AKs existing in the IR at a position that + /// will affect this one. See also getAttrs(...). + /// \param IgnoreSubsumingPositions Flag to determine if subsuming positions, + /// e.g., the function position if this is an + /// argument position, should be ignored. + bool hasAttr(ArrayRef AKs, + bool IgnoreSubsumingPositions = false) const; + + /// Return the attributes of any kind in \p AKs existing in the IR at a + /// position that will affect this one. While each position can only have a + /// single attribute of any kind in \p AKs, there are "subsuming" positions + /// that could have an attribute as well. This method returns all attributes + /// found in \p Attrs. + void getAttrs(ArrayRef AKs, + SmallVectorImpl &Attrs) const; + + /// Return the attribute of kind \p AK existing in the IR at this position. + Attribute getAttr(Attribute::AttrKind AK) const { + if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT) + return Attribute(); + + AttributeList AttrList; + if (ImmutableCallSite ICS = ImmutableCallSite(&getAnchorValue())) + AttrList = ICS.getAttributes(); + else + AttrList = getAssociatedFunction()->getAttributes(); + + if (AttrList.hasAttribute(getAttrIdx(), AK)) + return AttrList.getAttribute(getAttrIdx(), AK); + return Attribute(); + } + + /// Remove the attribute of kind \p AKs existing in the IR at this position. + void removeAttrs(ArrayRef AKs) { + if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT) + return; + + AttributeList AttrList; + CallSite CS = CallSite(&getAnchorValue()); + if (CS) + AttrList = CS.getAttributes(); + else + AttrList = getAssociatedFunction()->getAttributes(); + + LLVMContext &Ctx = getAnchorValue().getContext(); + for (Attribute::AttrKind AK : AKs) + AttrList = AttrList.removeAttribute(Ctx, getAttrIdx(), AK); + + if (CS) + CS.setAttributes(AttrList); + else + getAssociatedFunction()->setAttributes(AttrList); + } + + bool isAnyCallSitePosition() const { + switch (getPositionKind()) { + case IRPosition::IRP_CALL_SITE: + case IRPosition::IRP_CALL_SITE_RETURNED: + case IRPosition::IRP_CALL_SITE_ARGUMENT: + return true; + default: + return false; + } + } + + /// Special DenseMap key values. + /// + ///{ + static const IRPosition EmptyKey; + static const IRPosition TombstoneKey; + ///} + +private: + /// Private constructor for special values only! + explicit IRPosition(int KindOrArgNo) + : AnchorVal(0), KindOrArgNo(KindOrArgNo) {} + + /// IRPosition anchored at \p AnchorVal with kind/argument numbet \p PK. + explicit IRPosition(Value &AnchorVal, Kind PK) + : AnchorVal(&AnchorVal), KindOrArgNo(PK) { + verify(); + } + + /// Verify internal invariants. + void verify(); + + /// The value this position is anchored at. + Value *AnchorVal; + + /// The argument number, if non-negative, or the position "kind". + int KindOrArgNo; +}; + +/// Helper that allows IRPosition as a key in a DenseMap. +template <> struct DenseMapInfo { + static inline IRPosition getEmptyKey() { return IRPosition::EmptyKey; } + static inline IRPosition getTombstoneKey() { + return IRPosition::TombstoneKey; + } + static unsigned getHashValue(const IRPosition &IRP) { + return (DenseMapInfo::getHashValue(&IRP.getAnchorValue()) << 4) ^ + (unsigned(IRP.getArgNo())); + } + static bool isEqual(const IRPosition &LHS, const IRPosition &RHS) { + return LHS == RHS; + } +}; + +/// A visitor class for IR positions. +/// +/// Given a position P, the SubsumingPositionIterator allows to visit "subsuming +/// positions" wrt. attributes/information. Thus, if a piece of information +/// holds for a subsuming position, it also holds for the position P. +/// +/// The subsuming positions always include the initial position and then, +/// depending on the position kind, additionally the following ones: +/// - for IRP_RETURNED: +/// - the function (IRP_FUNCTION) +/// - for IRP_ARGUMENT: +/// - the function (IRP_FUNCTION) +/// - for IRP_CALL_SITE: +/// - the callee (IRP_FUNCTION), if known +/// - for IRP_CALL_SITE_RETURNED: +/// - the callee (IRP_RETURNED), if known +/// - the call site (IRP_FUNCTION) +/// - the callee (IRP_FUNCTION), if known +/// - for IRP_CALL_SITE_ARGUMENT: +/// - the argument of the callee (IRP_ARGUMENT), if known +/// - the callee (IRP_FUNCTION), if known +/// - the position the call site argument is associated with if it is not +/// anchored to the call site, e.g., if it is an arugment then the argument +/// (IRP_ARGUMENT) +class SubsumingPositionIterator { + SmallVector IRPositions; + using iterator = decltype(IRPositions)::iterator; + +public: + SubsumingPositionIterator(const IRPosition &IRP); + iterator begin() { return IRPositions.begin(); } + iterator end() { return IRPositions.end(); } +}; + +/// Wrapper for FunctoinAnalysisManager. +struct AnalysisGetter { + template + typename Analysis::Result *getAnalysis(const Function &F) { + if (!MAM || !F.getParent()) + return nullptr; + auto &FAM = MAM->getResult( + const_cast(*F.getParent())) + .getManager(); + return &FAM.getResult(const_cast(F)); + } + + template + typename Analysis::Result *getAnalysis(const Module &M) { + if (!MAM) + return nullptr; + return &MAM->getResult(const_cast(M)); + } + AnalysisGetter(ModuleAnalysisManager &MAM) : MAM(&MAM) {} + AnalysisGetter() {} + +private: + ModuleAnalysisManager *MAM = nullptr; +}; + +/// Data structure to hold cached (LLVM-IR) information. +/// +/// All attributes are given an InformationCache object at creation time to +/// avoid inspection of the IR by all of them individually. This default +/// InformationCache will hold information required by 'default' attributes, +/// thus the ones deduced when Attributor::identifyDefaultAbstractAttributes(..) +/// is called. +/// +/// If custom abstract attributes, registered manually through +/// Attributor::registerAA(...), need more information, especially if it is not +/// reusable, it is advised to inherit from the InformationCache and cast the +/// instance down in the abstract attributes. +struct InformationCache { + InformationCache(const Module &M, AnalysisGetter &AG) + : DL(M.getDataLayout()), Explorer(/* ExploreInterBlock */ true), AG(AG) { + + CallGraph *CG = AG.getAnalysis(M); + if (!CG) + return; + + DenseMap SccSize; + for (scc_iterator I = scc_begin(CG); !I.isAtEnd(); ++I) { + for (CallGraphNode *Node : *I) + SccSize[Node->getFunction()] = I->size(); + } + SccSizeOpt = std::move(SccSize); + } + + /// A map type from opcodes to instructions with this opcode. + using OpcodeInstMapTy = DenseMap>; + + /// Return the map that relates "interesting" opcodes with all instructions + /// with that opcode in \p F. + OpcodeInstMapTy &getOpcodeInstMapForFunction(const Function &F) { + return FuncInstOpcodeMap[&F]; + } + + /// A vector type to hold instructions. + using InstructionVectorTy = std::vector; + + /// Return the instructions in \p F that may read or write memory. + InstructionVectorTy &getReadOrWriteInstsForFunction(const Function &F) { + return FuncRWInstsMap[&F]; + } + + /// Return MustBeExecutedContextExplorer + MustBeExecutedContextExplorer &getMustBeExecutedContextExplorer() { + return Explorer; + } + + /// Return TargetLibraryInfo for function \p F. + TargetLibraryInfo *getTargetLibraryInfoForFunction(const Function &F) { + return AG.getAnalysis(F); + } + + /// Return AliasAnalysis Result for function \p F. + AAResults *getAAResultsForFunction(const Function &F) { + return AG.getAnalysis(F); + } + + /// Return SCC size on call graph for function \p F. + unsigned getSccSize(const Function &F) { + if (!SccSizeOpt.hasValue()) + return 0; + return (SccSizeOpt.getValue())[&F]; + } + + /// Return datalayout used in the module. + const DataLayout &getDL() { return DL; } + +private: + /// A map type from functions to opcode to instruction maps. + using FuncInstOpcodeMapTy = DenseMap; + + /// A map type from functions to their read or write instructions. + using FuncRWInstsMapTy = DenseMap; + + /// A nested map that remembers all instructions in a function with a certain + /// instruction opcode (Instruction::getOpcode()). + FuncInstOpcodeMapTy FuncInstOpcodeMap; + + /// A map from functions to their instructions that may read or write memory. + FuncRWInstsMapTy FuncRWInstsMap; + + /// The datalayout used in the module. + const DataLayout &DL; + + /// MustBeExecutedContextExplorer + MustBeExecutedContextExplorer Explorer; + + /// Getters for analysis. + AnalysisGetter &AG; + + /// Cache result for scc size in the call graph + Optional> SccSizeOpt; + + /// Give the Attributor access to the members so + /// Attributor::identifyDefaultAbstractAttributes(...) can initialize them. + friend struct Attributor; +}; + /// The fixpoint analysis framework that orchestrates the attribute deduction. /// /// The Attributor provides a general abstract analysis framework (guided @@ -148,6 +711,18 @@ ChangeStatus operator&(ChangeStatus l, ChangeStatus r); /// NOTE: The mechanics of adding a new "concrete" abstract attribute are /// described in the file comment. struct Attributor { + /// Constructor + /// + /// \param InfoCache Cache to hold various information accessible for + /// the abstract attributes. + /// \param DepRecomputeInterval Number of iterations until the dependences + /// between abstract attributes are recomputed. + /// \param Whitelist If not null, a set limiting the attribute opportunities. + Attributor(InformationCache &InfoCache, unsigned DepRecomputeInterval, + DenseSet *Whitelist = nullptr) + : InfoCache(InfoCache), DepRecomputeInterval(DepRecomputeInterval), + Whitelist(Whitelist) {} + ~Attributor() { DeleteContainerPointers(AllAbstractAttributes); } /// Run the analyses until a fixpoint is reached or enforced (timeout). @@ -156,12 +731,13 @@ struct Attributor { /// as the Attributor is not destroyed (it owns the attributes now). /// /// \Returns CHANGED if the IR was changed, otherwise UNCHANGED. - ChangeStatus run(); + ChangeStatus run(Module &M); - /// Lookup an abstract attribute of type \p AAType anchored at value \p V and - /// argument number \p ArgNo. If no attribute is found and \p V is a call base - /// instruction, the called function is tried as a value next. Thus, the - /// returned abstract attribute might be anchored at the callee of \p V. + /// Lookup an abstract attribute of type \p AAType at position \p IRP. While + /// no abstract attribute is found equivalent positions are checked, see + /// SubsumingPositionIterator. Thus, the returned abstract attribute + /// might be anchored at a different position, e.g., the callee if \p IRP is a + /// call base. /// /// This method is the only (supported) way an abstract attribute can retrieve /// information from another abstract attribute. As an example, take an @@ -170,51 +746,29 @@ struct Attributor { /// most optimistic information for other abstract attributes in-flight, e.g. /// the one reasoning about the "captured" state for the argument or the one /// reasoning on the memory access behavior of the function as a whole. + /// + /// If the flag \p TrackDependence is set to false the dependence from + /// \p QueryingAA to the return abstract attribute is not automatically + /// recorded. This should only be used if the caller will record the + /// dependence explicitly if necessary, thus if it the returned abstract + /// attribute is used for reasoning. To record the dependences explicitly use + /// the `Attributor::recordDependence` method. template - const AAType *getAAFor(AbstractAttribute &QueryingAA, const Value &V, - int ArgNo = -1) { - static_assert(std::is_base_of::value, - "Cannot query an attribute with a type not derived from " - "'AbstractAttribute'!"); - assert(AAType::ID != Attribute::None && - "Cannot lookup generic abstract attributes!"); + const AAType &getAAFor(const AbstractAttribute &QueryingAA, + const IRPosition &IRP, bool TrackDependence = true) { + return getOrCreateAAFor(IRP, &QueryingAA, TrackDependence); + } - // Determine the argument number automatically for llvm::Arguments if none - // is set. Do not override a given one as it could be a use of the argument - // in a call site. - if (ArgNo == -1) - if (auto *Arg = dyn_cast(&V)) - ArgNo = Arg->getArgNo(); - - // If a function was given together with an argument number, perform the - // lookup for the actual argument instead. Don't do it for variadic - // arguments. - if (ArgNo >= 0 && isa(&V) && - cast(&V)->arg_size() > (size_t)ArgNo) - return getAAFor( - QueryingAA, *(cast(&V)->arg_begin() + ArgNo), ArgNo); - - // Lookup the abstract attribute of type AAType. If found, return it after - // registering a dependence of QueryingAA on the one returned attribute. - const auto &KindToAbstractAttributeMap = AAMap.lookup({&V, ArgNo}); - if (AAType *AA = static_cast( - KindToAbstractAttributeMap.lookup(AAType::ID))) { - // Do not return an attribute with an invalid state. This minimizes checks - // at the calls sites and allows the fallback below to kick in. - if (AA->getState().isValidState()) { - QueryMap[AA].insert(&QueryingAA); - return AA; - } - } - - // If no abstract attribute was found and we look for a call site argument, - // defer to the actual argument instead. - ImmutableCallSite ICS(&V); - if (ICS && ICS.getCalledValue()) - return getAAFor(QueryingAA, *ICS.getCalledValue(), ArgNo); - - // No matching attribute found - return nullptr; + /// Explicitly record a dependence from \p FromAA to \p ToAA, that is if + /// \p FromAA changes \p ToAA should be updated as well. + /// + /// This method should be used in conjunction with the `getAAFor` method and + /// with the TrackDependence flag passed to the method set to false. This can + /// be beneficial to avoid false dependences but it requires the users of + /// `getAAFor` to explicitly record true dependences through this method. + void recordDependence(const AbstractAttribute &FromAA, + const AbstractAttribute &ToAA) { + QueryMap[&FromAA].insert(const_cast(&ToAA)); } /// Introduce a new abstract attribute into the fixpoint analysis. @@ -222,126 +776,242 @@ struct Attributor { /// Note that ownership of the attribute is given to the Attributor. It will /// invoke delete for the Attributor on destruction of the Attributor. /// - /// Attributes are identified by - /// (1) their anchored value (see AA.getAnchoredValue()), - /// (2) their argument number (\p ArgNo, or Argument::getArgNo()), and - /// (3) their default attribute kind (see AAType::ID). - template AAType ®isterAA(AAType &AA, int ArgNo = -1) { + /// Attributes are identified by their IR position (AAType::getIRPosition()) + /// and the address of their static member (see AAType::ID). + template AAType ®isterAA(AAType &AA) { static_assert(std::is_base_of::value, "Cannot register an attribute with a type not derived from " "'AbstractAttribute'!"); - - // Determine the anchor value and the argument number which are used to - // lookup the attribute together with AAType::ID. If passed an argument, - // use its argument number but do not override a given one as it could be a - // use of the argument at a call site. - Value &AnchoredVal = AA.getAnchoredValue(); - if (ArgNo == -1) - if (auto *Arg = dyn_cast(&AnchoredVal)) - ArgNo = Arg->getArgNo(); - // Put the attribute in the lookup map structure and the container we use to // keep track of all attributes. - AAMap[{&AnchoredVal, ArgNo}][AAType::ID] = &AA; + IRPosition &IRP = AA.getIRPosition(); + auto &KindToAbstractAttributeMap = AAMap[IRP]; + assert(!KindToAbstractAttributeMap.count(&AAType::ID) && + "Attribute already in map!"); + KindToAbstractAttributeMap[&AAType::ID] = &AA; AllAbstractAttributes.push_back(&AA); return AA; } + /// Return the internal information cache. + InformationCache &getInfoCache() { return InfoCache; } + /// Determine opportunities to derive 'default' attributes in \p F and create /// abstract attribute objects for them. /// /// \param F The function that is checked for attribute opportunities. - /// \param InfoCache A cache for information queryable by the new attributes. - /// \param Whitelist If not null, a set limiting the attribute opportunities. /// /// Note that abstract attribute instances are generally created even if the /// IR already contains the information they would deduce. The most important /// reason for this is the single interface, the one of the abstract attribute /// instance, which can be queried without the need to look at the IR in /// various places. - void identifyDefaultAbstractAttributes( - Function &F, InformationCache &InfoCache, - DenseSet *Whitelist = nullptr); + void identifyDefaultAbstractAttributes(Function &F); + + /// Initialize the information cache for queries regarding function \p F. + /// + /// This method needs to be called for all function that might be looked at + /// through the information cache interface *prior* to looking at them. + void initializeInformationCache(Function &F); + + /// Mark the internal function \p F as live. + /// + /// This will trigger the identification and initialization of attributes for + /// \p F. + void markLiveInternalFunction(const Function &F) { + assert(F.hasLocalLinkage() && + "Only local linkage is assumed dead initially."); + + identifyDefaultAbstractAttributes(const_cast(F)); + } + + /// Record that \p I is deleted after information was manifested. + void deleteAfterManifest(Instruction &I) { ToBeDeletedInsts.insert(&I); } + + /// Record that \p BB is deleted after information was manifested. + void deleteAfterManifest(BasicBlock &BB) { ToBeDeletedBlocks.insert(&BB); } + + /// Record that \p F is deleted after information was manifested. + void deleteAfterManifest(Function &F) { ToBeDeletedFunctions.insert(&F); } + + /// Return true if \p AA (or its context instruction) is assumed dead. + /// + /// If \p LivenessAA is not provided it is queried. + bool isAssumedDead(const AbstractAttribute &AA, const AAIsDead *LivenessAA); /// Check \p Pred on all function call sites. /// /// This method will evaluate \p Pred on call sites and return /// true if \p Pred holds in every call sites. However, this is only possible /// all call sites are known, hence the function has internal linkage. - bool checkForAllCallSites(Function &F, std::function &Pred, + bool checkForAllCallSites(const function_ref &Pred, + const AbstractAttribute &QueryingAA, bool RequireAllCallSites); + /// Check \p Pred on all values potentially returned by \p F. + /// + /// This method will evaluate \p Pred on all values potentially returned by + /// the function associated with \p QueryingAA. The returned values are + /// matched with their respective return instructions. Returns true if \p Pred + /// holds on all of them. + bool checkForAllReturnedValuesAndReturnInsts( + const function_ref &)> + &Pred, + const AbstractAttribute &QueryingAA); + + /// Check \p Pred on all values potentially returned by the function + /// associated with \p QueryingAA. + /// + /// This is the context insensitive version of the method above. + bool checkForAllReturnedValues(const function_ref &Pred, + const AbstractAttribute &QueryingAA); + + /// Check \p Pred on all instructions with an opcode present in \p Opcodes. + /// + /// This method will evaluate \p Pred on all instructions with an opcode + /// present in \p Opcode and return true if \p Pred holds on all of them. + bool checkForAllInstructions(const function_ref &Pred, + const AbstractAttribute &QueryingAA, + const ArrayRef &Opcodes); + + /// Check \p Pred on all call-like instructions (=CallBased derived). + /// + /// See checkForAllCallLikeInstructions(...) for more information. + bool + checkForAllCallLikeInstructions(const function_ref &Pred, + const AbstractAttribute &QueryingAA) { + return checkForAllInstructions(Pred, QueryingAA, + {(unsigned)Instruction::Invoke, + (unsigned)Instruction::CallBr, + (unsigned)Instruction::Call}); + } + + /// Check \p Pred on all Read/Write instructions. + /// + /// This method will evaluate \p Pred on all instructions that read or write + /// to memory present in the information cache and return true if \p Pred + /// holds on all of them. + bool checkForAllReadWriteInstructions( + const llvm::function_ref &Pred, + AbstractAttribute &QueryingAA); + + /// Return the data layout associated with the anchor scope. + const DataLayout &getDataLayout() const { return InfoCache.DL; } + private: + /// Check \p Pred on all call sites of \p Fn. + /// + /// This method will evaluate \p Pred on call sites and return + /// true if \p Pred holds in every call sites. However, this is only possible + /// all call sites are known, hence the function has internal linkage. + bool checkForAllCallSites(const function_ref &Pred, + const Function &Fn, bool RequireAllCallSites, + const AbstractAttribute *QueryingAA); + + /// The private version of getAAFor that allows to omit a querying abstract + /// attribute. See also the public getAAFor method. + template + const AAType &getOrCreateAAFor(const IRPosition &IRP, + const AbstractAttribute *QueryingAA = nullptr, + bool TrackDependence = false) { + if (const AAType *AAPtr = + lookupAAFor(IRP, QueryingAA, TrackDependence)) + return *AAPtr; + + // No matching attribute found, create one. + // Use the static create method. + auto &AA = AAType::createForPosition(IRP, *this); + registerAA(AA); + + // For now we ignore naked and optnone functions. + bool Invalidate = Whitelist && !Whitelist->count(&AAType::ID); + if (const Function *Fn = IRP.getAnchorScope()) + Invalidate |= Fn->hasFnAttribute(Attribute::Naked) || + Fn->hasFnAttribute(Attribute::OptimizeNone); + + // Bootstrap the new attribute with an initial update to propagate + // information, e.g., function -> call site. If it is not on a given + // whitelist we will not perform updates at all. + if (Invalidate) { + AA.getState().indicatePessimisticFixpoint(); + return AA; + } + + AA.initialize(*this); + AA.update(*this); + + if (TrackDependence && AA.getState().isValidState()) + QueryMap[&AA].insert(const_cast(QueryingAA)); + return AA; + } + + /// Return the attribute of \p AAType for \p IRP if existing. + template + const AAType *lookupAAFor(const IRPosition &IRP, + const AbstractAttribute *QueryingAA = nullptr, + bool TrackDependence = false) { + static_assert(std::is_base_of::value, + "Cannot query an attribute with a type not derived from " + "'AbstractAttribute'!"); + assert((QueryingAA || !TrackDependence) && + "Cannot track dependences without a QueryingAA!"); + + // Lookup the abstract attribute of type AAType. If found, return it after + // registering a dependence of QueryingAA on the one returned attribute. + const auto &KindToAbstractAttributeMap = AAMap.lookup(IRP); + if (AAType *AA = static_cast( + KindToAbstractAttributeMap.lookup(&AAType::ID))) { + // Do not register a dependence on an attribute with an invalid state. + if (TrackDependence && AA->getState().isValidState()) + QueryMap[AA].insert(const_cast(QueryingAA)); + return AA; + } + return nullptr; + } + /// The set of all abstract attributes. ///{ using AAVector = SmallVector; AAVector AllAbstractAttributes; ///} - /// A nested map to lookup abstract attributes based on the anchored value and - /// an argument positions (or -1) on the outer level, and attribute kinds - /// (Attribute::AttrKind) on the inner level. + /// A nested map to lookup abstract attributes based on the argument position + /// on the outer level, and the addresses of the static member (AAType::ID) on + /// the inner level. ///{ - using KindToAbstractAttributeMap = DenseMap; - DenseMap, KindToAbstractAttributeMap> AAMap; + using KindToAbstractAttributeMap = + DenseMap; + DenseMap AAMap; ///} /// A map from abstract attributes to the ones that queried them through calls /// to the getAAFor<...>(...) method. ///{ using QueryMapTy = - DenseMap>; + MapVector>; QueryMapTy QueryMap; ///} -}; -/// Data structure to hold cached (LLVM-IR) information. -/// -/// All attributes are given an InformationCache object at creation time to -/// avoid inspection of the IR by all of them individually. This default -/// InformationCache will hold information required by 'default' attributes, -/// thus the ones deduced when Attributor::identifyDefaultAbstractAttributes(..) -/// is called. -/// -/// If custom abstract attributes, registered manually through -/// Attributor::registerAA(...), need more information, especially if it is not -/// reusable, it is advised to inherit from the InformationCache and cast the -/// instance down in the abstract attributes. -struct InformationCache { - /// A map type from opcodes to instructions with this opcode. - using OpcodeInstMapTy = DenseMap>; + /// The information cache that holds pre-processed (LLVM-IR) information. + InformationCache &InfoCache; - /// Return the map that relates "interesting" opcodes with all instructions - /// with that opcode in \p F. - OpcodeInstMapTy &getOpcodeInstMapForFunction(Function &F) { - return FuncInstOpcodeMap[&F]; - } + /// Number of iterations until the dependences between abstract attributes are + /// recomputed. + const unsigned DepRecomputeInterval; - /// A vector type to hold instructions. - using InstructionVectorTy = std::vector; + /// If not null, a set limiting the attribute opportunities. + const DenseSet *Whitelist; - /// Return the instructions in \p F that may read or write memory. - InstructionVectorTy &getReadOrWriteInstsForFunction(Function &F) { - return FuncRWInstsMap[&F]; - } + /// A set to remember the functions we already assume to be live and visited. + DenseSet VisitedFunctions; -private: - /// A map type from functions to opcode to instruction maps. - using FuncInstOpcodeMapTy = DenseMap; - - /// A map type from functions to their read or write instructions. - using FuncRWInstsMapTy = DenseMap; - - /// A nested map that remembers all instructions in a function with a certain - /// instruction opcode (Instruction::getOpcode()). - FuncInstOpcodeMapTy FuncInstOpcodeMap; - - /// A map from functions to their instructions that may read or write memory. - FuncRWInstsMapTy FuncRWInstsMap; - - /// Give the Attributor access to the members so - /// Attributor::identifyDefaultAbstractAttributes(...) can initialize them. - friend struct Attributor; + /// Functions, blocks, and instructions we delete after manifest is done. + /// + ///{ + SmallPtrSet ToBeDeletedFunctions; + SmallPtrSet ToBeDeletedBlocks; + SmallPtrSet ToBeDeletedInsts; + ///} }; /// An interface to query the internal state of an abstract attribute. @@ -375,13 +1045,17 @@ struct AbstractState { /// /// This will usually make the optimistically assumed state the known to be /// true state. - virtual void indicateOptimisticFixpoint() = 0; + /// + /// \returns ChangeStatus::UNCHANGED as the assumed value should not change. + virtual ChangeStatus indicateOptimisticFixpoint() = 0; /// Indicate that the abstract state should converge to the pessimistic state. /// /// This will usually revert the optimistically assumed state to the known to /// be true state. - virtual void indicatePessimisticFixpoint() = 0; + /// + /// \returns ChangeStatus::CHANGED as the assumed value may change. + virtual ChangeStatus indicatePessimisticFixpoint() = 0; }; /// Simple state with integers encoding. @@ -412,10 +1086,16 @@ struct IntegerState : public AbstractState { bool isAtFixpoint() const override { return Assumed == Known; } /// See AbstractState::indicateOptimisticFixpoint(...) - void indicateOptimisticFixpoint() override { Known = Assumed; } + ChangeStatus indicateOptimisticFixpoint() override { + Known = Assumed; + return ChangeStatus::UNCHANGED; + } /// See AbstractState::indicatePessimisticFixpoint(...) - void indicatePessimisticFixpoint() override { Assumed = Known; } + ChangeStatus indicatePessimisticFixpoint() override { + Assumed = Known; + return ChangeStatus::CHANGED; + } /// Return the known state encoding base_t getKnown() const { return Known; } @@ -448,6 +1128,12 @@ struct IntegerState : public AbstractState { return *this; } + /// Remove the bits in \p BitsEncoding from the "known bits". + IntegerState &removeKnownBits(base_t BitsEncoding) { + Known = (Known & ~BitsEncoding); + return *this; + } + /// Keep only "assumed bits" also set in \p BitsEncoding but all known ones. IntegerState &intersectAssumedBits(base_t BitsEncoding) { // Make sure we never loose any "known bits". @@ -455,6 +1141,62 @@ struct IntegerState : public AbstractState { return *this; } + /// Take minimum of assumed and \p Value. + IntegerState &takeAssumedMinimum(base_t Value) { + // Make sure we never loose "known value". + Assumed = std::max(std::min(Assumed, Value), Known); + return *this; + } + + /// Take maximum of known and \p Value. + IntegerState &takeKnownMaximum(base_t Value) { + // Make sure we never loose "known value". + Assumed = std::max(Value, Assumed); + Known = std::max(Value, Known); + return *this; + } + + /// Equality for IntegerState. + bool operator==(const IntegerState &R) const { + return this->getAssumed() == R.getAssumed() && + this->getKnown() == R.getKnown(); + } + + /// Inequality for IntegerState. + bool operator!=(const IntegerState &R) const { return !(*this == R); } + + /// "Clamp" this state with \p R. The result is the minimum of the assumed + /// information but not less than what was known before. + /// + /// TODO: Consider replacing the operator with a call or using it only when + /// we can also take the maximum of the known information, thus when + /// \p R is not dependent on additional assumed state. + IntegerState operator^=(const IntegerState &R) { + takeAssumedMinimum(R.Assumed); + return *this; + } + + /// "Clamp" this state with \p R. The result is the maximum of the known + /// information but not more than what was assumed before. + IntegerState operator+=(const IntegerState &R) { + takeKnownMaximum(R.Known); + return *this; + } + + /// Make this the minimum, known and assumed, of this state and \p R. + IntegerState operator&=(const IntegerState &R) { + Known = std::min(Known, R.Known); + Assumed = std::min(Assumed, R.Assumed); + return *this; + } + + /// Make this the maximum, known and assumed, of this state and \p R. + IntegerState operator|=(const IntegerState &R) { + Known = std::max(Known, R.Known); + Assumed = std::max(Assumed, R.Assumed); + return *this; + } + private: /// The known state encoding in an integer of type base_t. base_t Known = getWorstState(); @@ -468,6 +1210,77 @@ struct BooleanState : public IntegerState { BooleanState() : IntegerState(1){}; }; +/// Helper struct necessary as the modular build fails if the virtual method +/// IRAttribute::manifest is defined in the Attributor.cpp. +struct IRAttributeManifest { + static ChangeStatus manifestAttrs(Attributor &A, IRPosition &IRP, + const ArrayRef &DeducedAttrs); +}; + +/// Helper to tie a abstract state implementation to an abstract attribute. +template +struct StateWrapper : public StateTy, public Base { + /// Provide static access to the type of the state. + using StateType = StateTy; + + /// See AbstractAttribute::getState(...). + StateType &getState() override { return *this; } + + /// See AbstractAttribute::getState(...). + const AbstractState &getState() const override { return *this; } +}; + +/// Helper class that provides common functionality to manifest IR attributes. +template +struct IRAttribute : public IRPosition, public Base { + IRAttribute(const IRPosition &IRP) : IRPosition(IRP) {} + ~IRAttribute() {} + + /// See AbstractAttribute::initialize(...). + virtual void initialize(Attributor &A) override { + if (hasAttr(getAttrKind())) { + this->getState().indicateOptimisticFixpoint(); + return; + } + + const IRPosition &IRP = this->getIRPosition(); + bool IsFnInterface = IRP.isFnInterfaceKind(); + const Function *FnScope = IRP.getAnchorScope(); + // TODO: Not all attributes require an exact definition. Find a way to + // enable deduction for some but not all attributes in case the + // definition might be changed at runtime, see also + // http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html. + // TODO: We could always determine abstract attributes and if sufficient + // information was found we could duplicate the functions that do not + // have an exact definition. + if (IsFnInterface && (!FnScope || !FnScope->hasExactDefinition())) + this->getState().indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + SmallVector DeducedAttrs; + getDeducedAttributes(getAnchorValue().getContext(), DeducedAttrs); + return IRAttributeManifest::manifestAttrs(A, getIRPosition(), DeducedAttrs); + } + + /// Return the kind that identifies the abstract attribute implementation. + Attribute::AttrKind getAttrKind() const { return AK; } + + /// Return the deduced attributes in \p Attrs. + virtual void getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl &Attrs) const { + Attrs.emplace_back(Attribute::get(Ctx, getAttrKind())); + } + + /// Return an IR position, see struct IRPosition. + /// + ///{ + IRPosition &getIRPosition() override { return *this; } + const IRPosition &getIRPosition() const override { return *this; } + ///} +}; + /// Base struct for all "concrete attribute" deductions. /// /// The abstract attribute is a minimal interface that allows the Attributor to @@ -512,29 +1325,7 @@ struct BooleanState : public IntegerState { /// NOTE: The mechanics of adding a new "concrete" abstract attribute are /// described in the file comment. struct AbstractAttribute { - - /// The positions attributes can be manifested in. - enum ManifestPosition { - MP_ARGUMENT, ///< An attribute for a function argument. - MP_CALL_SITE_ARGUMENT, ///< An attribute for a call site argument. - MP_FUNCTION, ///< An attribute for a function as a whole. - MP_RETURNED, ///< An attribute for the function return value. - }; - - /// An abstract attribute associated with \p AssociatedVal and anchored at - /// \p AnchoredVal. - /// - /// \param AssociatedVal The value this abstract attribute is associated with. - /// \param AnchoredVal The value this abstract attributes is anchored at. - /// \param InfoCache Cached information accessible to the abstract attribute. - AbstractAttribute(Value *AssociatedVal, Value &AnchoredVal, - InformationCache &InfoCache) - : AssociatedVal(AssociatedVal), AnchoredVal(AnchoredVal), - InfoCache(InfoCache) {} - - /// An abstract attribute associated with and anchored at \p V. - AbstractAttribute(Value &V, InformationCache &InfoCache) - : AbstractAttribute(&V, V, InfoCache) {} + using StateType = AbstractState; /// Virtual destructor. virtual ~AbstractAttribute() {} @@ -550,47 +1341,11 @@ struct AbstractAttribute { virtual void initialize(Attributor &A) {} /// Return the internal abstract state for inspection. - virtual const AbstractState &getState() const = 0; + virtual StateType &getState() = 0; + virtual const StateType &getState() const = 0; - /// Return the value this abstract attribute is anchored with. - /// - /// The anchored value might not be the associated value if the latter is not - /// sufficient to determine where arguments will be manifested. This is mostly - /// the case for call site arguments as the value is not sufficient to - /// pinpoint them. Instead, we can use the call site as an anchor. - /// - ///{ - Value &getAnchoredValue() { return AnchoredVal; } - const Value &getAnchoredValue() const { return AnchoredVal; } - ///} - - /// Return the llvm::Function surrounding the anchored value. - /// - ///{ - Function &getAnchorScope(); - const Function &getAnchorScope() const; - ///} - - /// Return the value this abstract attribute is associated with. - /// - /// The abstract state usually represents this value. - /// - ///{ - virtual Value *getAssociatedValue() { return AssociatedVal; } - virtual const Value *getAssociatedValue() const { return AssociatedVal; } - ///} - - /// Return the position this abstract state is manifested in. - virtual ManifestPosition getManifestPosition() const = 0; - - /// Return the kind that identifies the abstract attribute implementation. - virtual Attribute::AttrKind getAttrKind() const = 0; - - /// Return the deduced attributes in \p Attrs. - virtual void getDeducedAttributes(SmallVectorImpl &Attrs) const { - LLVMContext &Ctx = AnchoredVal.getContext(); - Attrs.emplace_back(Attribute::get(Ctx, getAttrKind())); - } + /// Return an IR position, see struct IRPosition. + virtual const IRPosition &getIRPosition() const = 0; /// Helper functions, for debug purposes only. ///{ @@ -617,10 +1372,19 @@ protected: /// represented by the abstract attribute in the LLVM-IR. /// /// \Return CHANGED if the IR was altered, otherwise UNCHANGED. - virtual ChangeStatus manifest(Attributor &A); + virtual ChangeStatus manifest(Attributor &A) { + return ChangeStatus::UNCHANGED; + } - /// Return the internal abstract state for careful modification. - virtual AbstractState &getState() = 0; + /// Hook to enable custom statistic tracking, called after manifest that + /// resulted in a change if statistics are enabled. + /// + /// We require subclasses to provide an implementation so we remember to + /// add statistics for them. + virtual void trackStatistics() const = 0; + + /// Return an IR position, see struct IRPosition. + virtual IRPosition &getIRPosition() = 0; /// The actual update/transfer function which has to be implemented by the /// derived classes. @@ -630,15 +1394,6 @@ protected: /// /// \Return CHANGED if the internal state changed, otherwise UNCHANGED. virtual ChangeStatus updateImpl(Attributor &A) = 0; - - /// The value this abstract attribute is associated with. - Value *AssociatedVal; - - /// The value this abstract attribute is anchored at. - Value &AnchoredVal; - - /// The information cache accessible to this abstract attribute. - InformationCache &InfoCache; }; /// Forward declarations of output streams for debug purposes. @@ -646,8 +1401,10 @@ protected: ///{ raw_ostream &operator<<(raw_ostream &OS, const AbstractAttribute &AA); raw_ostream &operator<<(raw_ostream &OS, ChangeStatus S); -raw_ostream &operator<<(raw_ostream &OS, AbstractAttribute::ManifestPosition); +raw_ostream &operator<<(raw_ostream &OS, IRPosition::Kind); +raw_ostream &operator<<(raw_ostream &OS, const IRPosition &); raw_ostream &operator<<(raw_ostream &OS, const AbstractState &State); +raw_ostream &operator<<(raw_ostream &OS, const IntegerState &S); ///} struct AttributorPass : public PassInfoMixin { @@ -661,129 +1418,531 @@ Pass *createAttributorLegacyPass(); /// ---------------------------------------------------------------------------- /// An abstract attribute for the returned values of a function. -struct AAReturnedValues : public AbstractAttribute { - /// See AbstractAttribute::AbstractAttribute(...). - AAReturnedValues(Function &F, InformationCache &InfoCache) - : AbstractAttribute(F, InfoCache) {} +struct AAReturnedValues + : public IRAttribute { + AAReturnedValues(const IRPosition &IRP) : IRAttribute(IRP) {} + + /// Return an assumed unique return value if a single candidate is found. If + /// there cannot be one, return a nullptr. If it is not clear yet, return the + /// Optional::NoneType. + Optional getAssumedUniqueReturnValue(Attributor &A) const; /// Check \p Pred on all returned values. /// /// This method will evaluate \p Pred on returned values and return /// true if (1) all returned values are known, and (2) \p Pred returned true /// for all returned values. - virtual bool - checkForallReturnedValues(std::function &Pred) const = 0; + /// + /// Note: Unlike the Attributor::checkForAllReturnedValuesAndReturnInsts + /// method, this one will not filter dead return instructions. + virtual bool checkForAllReturnedValuesAndReturnInsts( + const function_ref &)> + &Pred) const = 0; - /// See AbstractAttribute::getAttrKind() - Attribute::AttrKind getAttrKind() const override { return ID; } + using iterator = + MapVector>::iterator; + using const_iterator = + MapVector>::const_iterator; + virtual llvm::iterator_range returned_values() = 0; + virtual llvm::iterator_range returned_values() const = 0; - /// The identifier used by the Attributor for this class of attributes. - static constexpr Attribute::AttrKind ID = Attribute::Returned; + virtual size_t getNumReturnValues() const = 0; + virtual const SmallSetVector &getUnresolvedCalls() const = 0; + + /// Create an abstract attribute view for the position \p IRP. + static AAReturnedValues &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; }; -struct AANoUnwind : public AbstractAttribute { - /// An abstract interface for all nosync attributes. - AANoUnwind(Value &V, InformationCache &InfoCache) - : AbstractAttribute(V, InfoCache) {} - - /// See AbstractAttribute::getAttrKind()/ - Attribute::AttrKind getAttrKind() const override { return ID; } - - static constexpr Attribute::AttrKind ID = Attribute::NoUnwind; +struct AANoUnwind + : public IRAttribute> { + AANoUnwind(const IRPosition &IRP) : IRAttribute(IRP) {} /// Returns true if nounwind is assumed. - virtual bool isAssumedNoUnwind() const = 0; + bool isAssumedNoUnwind() const { return getAssumed(); } /// Returns true if nounwind is known. - virtual bool isKnownNoUnwind() const = 0; + bool isKnownNoUnwind() const { return getKnown(); } + + /// Create an abstract attribute view for the position \p IRP. + static AANoUnwind &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; }; -struct AANoSync : public AbstractAttribute { - /// An abstract interface for all nosync attributes. - AANoSync(Value &V, InformationCache &InfoCache) - : AbstractAttribute(V, InfoCache) {} - - /// See AbstractAttribute::getAttrKind(). - Attribute::AttrKind getAttrKind() const override { return ID; } - - static constexpr Attribute::AttrKind ID = - Attribute::AttrKind(Attribute::NoSync); +struct AANoSync + : public IRAttribute> { + AANoSync(const IRPosition &IRP) : IRAttribute(IRP) {} /// Returns true if "nosync" is assumed. - virtual bool isAssumedNoSync() const = 0; + bool isAssumedNoSync() const { return getAssumed(); } /// Returns true if "nosync" is known. - virtual bool isKnownNoSync() const = 0; + bool isKnownNoSync() const { return getKnown(); } + + /// Create an abstract attribute view for the position \p IRP. + static AANoSync &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; }; /// An abstract interface for all nonnull attributes. -struct AANonNull : public AbstractAttribute { - - /// See AbstractAttribute::AbstractAttribute(...). - AANonNull(Value &V, InformationCache &InfoCache) - : AbstractAttribute(V, InfoCache) {} - - /// See AbstractAttribute::AbstractAttribute(...). - AANonNull(Value *AssociatedVal, Value &AnchoredValue, - InformationCache &InfoCache) - : AbstractAttribute(AssociatedVal, AnchoredValue, InfoCache) {} +struct AANonNull + : public IRAttribute> { + AANonNull(const IRPosition &IRP) : IRAttribute(IRP) {} /// Return true if we assume that the underlying value is nonnull. - virtual bool isAssumedNonNull() const = 0; + bool isAssumedNonNull() const { return getAssumed(); } /// Return true if we know that underlying value is nonnull. - virtual bool isKnownNonNull() const = 0; + bool isKnownNonNull() const { return getKnown(); } - /// See AbastractState::getAttrKind(). - Attribute::AttrKind getAttrKind() const override { return ID; } + /// Create an abstract attribute view for the position \p IRP. + static AANonNull &createForPosition(const IRPosition &IRP, Attributor &A); - /// The identifier used by the Attributor for this class of attributes. - static constexpr Attribute::AttrKind ID = Attribute::NonNull; + /// Unique ID (due to the unique address) + static const char ID; }; /// An abstract attribute for norecurse. -struct AANoRecurse : public AbstractAttribute { - - /// See AbstractAttribute::AbstractAttribute(...). - AANoRecurse(Value &V, InformationCache &InfoCache) - : AbstractAttribute(V, InfoCache) {} - - /// See AbstractAttribute::getAttrKind() - virtual Attribute::AttrKind getAttrKind() const override { - return Attribute::NoRecurse; - } - - /// Return true if "norecurse" is known. - virtual bool isKnownNoRecurse() const = 0; +struct AANoRecurse + : public IRAttribute> { + AANoRecurse(const IRPosition &IRP) : IRAttribute(IRP) {} /// Return true if "norecurse" is assumed. - virtual bool isAssumedNoRecurse() const = 0; + bool isAssumedNoRecurse() const { return getAssumed(); } - /// The identifier used by the Attributor for this class of attributes. - static constexpr Attribute::AttrKind ID = Attribute::NoRecurse; + /// Return true if "norecurse" is known. + bool isKnownNoRecurse() const { return getKnown(); } + + /// Create an abstract attribute view for the position \p IRP. + static AANoRecurse &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; }; /// An abstract attribute for willreturn. -struct AAWillReturn : public AbstractAttribute { - - /// See AbstractAttribute::AbstractAttribute(...). - AAWillReturn(Value &V, InformationCache &InfoCache) - : AbstractAttribute(V, InfoCache) {} - - /// See AbstractAttribute::getAttrKind() - virtual Attribute::AttrKind getAttrKind() const override { - return Attribute::WillReturn; - } - - /// Return true if "willreturn" is known. - virtual bool isKnownWillReturn() const = 0; +struct AAWillReturn + : public IRAttribute> { + AAWillReturn(const IRPosition &IRP) : IRAttribute(IRP) {} /// Return true if "willreturn" is assumed. - virtual bool isAssumedWillReturn() const = 0; + bool isAssumedWillReturn() const { return getAssumed(); } - /// The identifier used by the Attributor for this class of attributes. - static constexpr Attribute::AttrKind ID = Attribute::WillReturn; + /// Return true if "willreturn" is known. + bool isKnownWillReturn() const { return getKnown(); } + + /// Create an abstract attribute view for the position \p IRP. + static AAWillReturn &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; }; + +/// An abstract interface for all noalias attributes. +struct AANoAlias + : public IRAttribute> { + AANoAlias(const IRPosition &IRP) : IRAttribute(IRP) {} + + /// Return true if we assume that the underlying value is alias. + bool isAssumedNoAlias() const { return getAssumed(); } + + /// Return true if we know that underlying value is noalias. + bool isKnownNoAlias() const { return getKnown(); } + + /// Create an abstract attribute view for the position \p IRP. + static AANoAlias &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + +/// An AbstractAttribute for nofree. +struct AANoFree + : public IRAttribute> { + AANoFree(const IRPosition &IRP) : IRAttribute(IRP) {} + + /// Return true if "nofree" is assumed. + bool isAssumedNoFree() const { return getAssumed(); } + + /// Return true if "nofree" is known. + bool isKnownNoFree() const { return getKnown(); } + + /// Create an abstract attribute view for the position \p IRP. + static AANoFree &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + +/// An AbstractAttribute for noreturn. +struct AANoReturn + : public IRAttribute> { + AANoReturn(const IRPosition &IRP) : IRAttribute(IRP) {} + + /// Return true if the underlying object is assumed to never return. + bool isAssumedNoReturn() const { return getAssumed(); } + + /// Return true if the underlying object is known to never return. + bool isKnownNoReturn() const { return getKnown(); } + + /// Create an abstract attribute view for the position \p IRP. + static AANoReturn &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + +/// An abstract interface for liveness abstract attribute. +struct AAIsDead : public StateWrapper, + public IRPosition { + AAIsDead(const IRPosition &IRP) : IRPosition(IRP) {} + + /// Returns true if \p BB is assumed dead. + virtual bool isAssumedDead(const BasicBlock *BB) const = 0; + + /// Returns true if \p BB is known dead. + virtual bool isKnownDead(const BasicBlock *BB) const = 0; + + /// Returns true if \p I is assumed dead. + virtual bool isAssumedDead(const Instruction *I) const = 0; + + /// Returns true if \p I is known dead. + virtual bool isKnownDead(const Instruction *I) const = 0; + + /// This method is used to check if at least one instruction in a collection + /// of instructions is live. + template bool isLiveInstSet(T begin, T end) const { + for (const auto &I : llvm::make_range(begin, end)) { + assert(I->getFunction() == getIRPosition().getAssociatedFunction() && + "Instruction must be in the same anchor scope function."); + + if (!isAssumedDead(I)) + return true; + } + + return false; + } + + /// Return an IR position, see struct IRPosition. + /// + ///{ + IRPosition &getIRPosition() override { return *this; } + const IRPosition &getIRPosition() const override { return *this; } + ///} + + /// Create an abstract attribute view for the position \p IRP. + static AAIsDead &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + +/// State for dereferenceable attribute +struct DerefState : AbstractState { + + /// State representing for dereferenceable bytes. + IntegerState DerefBytesState; + + /// State representing that whether the value is globaly dereferenceable. + BooleanState GlobalState; + + /// See AbstractState::isValidState() + bool isValidState() const override { return DerefBytesState.isValidState(); } + + /// See AbstractState::isAtFixpoint() + bool isAtFixpoint() const override { + return !isValidState() || + (DerefBytesState.isAtFixpoint() && GlobalState.isAtFixpoint()); + } + + /// See AbstractState::indicateOptimisticFixpoint(...) + ChangeStatus indicateOptimisticFixpoint() override { + DerefBytesState.indicateOptimisticFixpoint(); + GlobalState.indicateOptimisticFixpoint(); + return ChangeStatus::UNCHANGED; + } + + /// See AbstractState::indicatePessimisticFixpoint(...) + ChangeStatus indicatePessimisticFixpoint() override { + DerefBytesState.indicatePessimisticFixpoint(); + GlobalState.indicatePessimisticFixpoint(); + return ChangeStatus::CHANGED; + } + + /// Update known dereferenceable bytes. + void takeKnownDerefBytesMaximum(uint64_t Bytes) { + DerefBytesState.takeKnownMaximum(Bytes); + } + + /// Update assumed dereferenceable bytes. + void takeAssumedDerefBytesMinimum(uint64_t Bytes) { + DerefBytesState.takeAssumedMinimum(Bytes); + } + + /// Equality for DerefState. + bool operator==(const DerefState &R) { + return this->DerefBytesState == R.DerefBytesState && + this->GlobalState == R.GlobalState; + } + + /// Inequality for IntegerState. + bool operator!=(const DerefState &R) { return !(*this == R); } + + /// See IntegerState::operator^= + DerefState operator^=(const DerefState &R) { + DerefBytesState ^= R.DerefBytesState; + GlobalState ^= R.GlobalState; + return *this; + } + + /// See IntegerState::operator+= + DerefState operator+=(const DerefState &R) { + DerefBytesState += R.DerefBytesState; + GlobalState += R.GlobalState; + return *this; + } + + /// See IntegerState::operator&= + DerefState operator&=(const DerefState &R) { + DerefBytesState &= R.DerefBytesState; + GlobalState &= R.GlobalState; + return *this; + } + + /// See IntegerState::operator|= + DerefState operator|=(const DerefState &R) { + DerefBytesState |= R.DerefBytesState; + GlobalState |= R.GlobalState; + return *this; + } + +protected: + const AANonNull *NonNullAA = nullptr; +}; + +/// An abstract interface for all dereferenceable attribute. +struct AADereferenceable + : public IRAttribute> { + AADereferenceable(const IRPosition &IRP) : IRAttribute(IRP) {} + + /// Return true if we assume that the underlying value is nonnull. + bool isAssumedNonNull() const { + return NonNullAA && NonNullAA->isAssumedNonNull(); + } + + /// Return true if we know that the underlying value is nonnull. + bool isKnownNonNull() const { + return NonNullAA && NonNullAA->isKnownNonNull(); + } + + /// Return true if we assume that underlying value is + /// dereferenceable(_or_null) globally. + bool isAssumedGlobal() const { return GlobalState.getAssumed(); } + + /// Return true if we know that underlying value is + /// dereferenceable(_or_null) globally. + bool isKnownGlobal() const { return GlobalState.getKnown(); } + + /// Return assumed dereferenceable bytes. + uint32_t getAssumedDereferenceableBytes() const { + return DerefBytesState.getAssumed(); + } + + /// Return known dereferenceable bytes. + uint32_t getKnownDereferenceableBytes() const { + return DerefBytesState.getKnown(); + } + + /// Create an abstract attribute view for the position \p IRP. + static AADereferenceable &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + +/// An abstract interface for all align attributes. +struct AAAlign + : public IRAttribute> { + AAAlign(const IRPosition &IRP) : IRAttribute(IRP) {} + + /// Return assumed alignment. + unsigned getAssumedAlign() const { return getAssumed(); } + + /// Return known alignemnt. + unsigned getKnownAlign() const { return getKnown(); } + + /// Create an abstract attribute view for the position \p IRP. + static AAAlign &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + +/// An abstract interface for all nocapture attributes. +struct AANoCapture + : public IRAttribute> { + AANoCapture(const IRPosition &IRP) : IRAttribute(IRP) {} + + /// State encoding bits. A set bit in the state means the property holds. + /// NO_CAPTURE is the best possible state, 0 the worst possible state. + enum { + NOT_CAPTURED_IN_MEM = 1 << 0, + NOT_CAPTURED_IN_INT = 1 << 1, + NOT_CAPTURED_IN_RET = 1 << 2, + + /// If we do not capture the value in memory or through integers we can only + /// communicate it back as a derived pointer. + NO_CAPTURE_MAYBE_RETURNED = NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT, + + /// If we do not capture the value in memory, through integers, or as a + /// derived pointer we know it is not captured. + NO_CAPTURE = + NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT | NOT_CAPTURED_IN_RET, + }; + + /// Return true if we know that the underlying value is not captured in its + /// respective scope. + bool isKnownNoCapture() const { return isKnown(NO_CAPTURE); } + + /// Return true if we assume that the underlying value is not captured in its + /// respective scope. + bool isAssumedNoCapture() const { return isAssumed(NO_CAPTURE); } + + /// Return true if we know that the underlying value is not captured in its + /// respective scope but we allow it to escape through a "return". + bool isKnownNoCaptureMaybeReturned() const { + return isKnown(NO_CAPTURE_MAYBE_RETURNED); + } + + /// Return true if we assume that the underlying value is not captured in its + /// respective scope but we allow it to escape through a "return". + bool isAssumedNoCaptureMaybeReturned() const { + return isAssumed(NO_CAPTURE_MAYBE_RETURNED); + } + + /// Create an abstract attribute view for the position \p IRP. + static AANoCapture &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + +/// An abstract interface for value simplify abstract attribute. +struct AAValueSimplify : public StateWrapper, + public IRPosition { + AAValueSimplify(const IRPosition &IRP) : IRPosition(IRP) {} + + /// Return an IR position, see struct IRPosition. + /// + ///{ + IRPosition &getIRPosition() { return *this; } + const IRPosition &getIRPosition() const { return *this; } + ///} + + /// Return an assumed simplified value if a single candidate is found. If + /// there cannot be one, return original value. If it is not clear yet, return + /// the Optional::NoneType. + virtual Optional getAssumedSimplifiedValue(Attributor &A) const = 0; + + /// Create an abstract attribute view for the position \p IRP. + static AAValueSimplify &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + +struct AAHeapToStack : public StateWrapper, + public IRPosition { + AAHeapToStack(const IRPosition &IRP) : IRPosition(IRP) {} + + /// Returns true if HeapToStack conversion is assumed to be possible. + bool isAssumedHeapToStack() const { return getAssumed(); } + + /// Returns true if HeapToStack conversion is known to be possible. + bool isKnownHeapToStack() const { return getKnown(); } + + /// Return an IR position, see struct IRPosition. + /// + ///{ + IRPosition &getIRPosition() { return *this; } + const IRPosition &getIRPosition() const { return *this; } + ///} + + /// Create an abstract attribute view for the position \p IRP. + static AAHeapToStack &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + +/// An abstract interface for all memory related attributes. +struct AAMemoryBehavior + : public IRAttribute> { + AAMemoryBehavior(const IRPosition &IRP) : IRAttribute(IRP) {} + + /// State encoding bits. A set bit in the state means the property holds. + /// BEST_STATE is the best possible state, 0 the worst possible state. + enum { + NO_READS = 1 << 0, + NO_WRITES = 1 << 1, + NO_ACCESSES = NO_READS | NO_WRITES, + + BEST_STATE = NO_ACCESSES, + }; + + /// Return true if we know that the underlying value is not read or accessed + /// in its respective scope. + bool isKnownReadNone() const { return isKnown(NO_ACCESSES); } + + /// Return true if we assume that the underlying value is not read or accessed + /// in its respective scope. + bool isAssumedReadNone() const { return isAssumed(NO_ACCESSES); } + + /// Return true if we know that the underlying value is not accessed + /// (=written) in its respective scope. + bool isKnownReadOnly() const { return isKnown(NO_WRITES); } + + /// Return true if we assume that the underlying value is not accessed + /// (=written) in its respective scope. + bool isAssumedReadOnly() const { return isAssumed(NO_WRITES); } + + /// Return true if we know that the underlying value is not read in its + /// respective scope. + bool isKnownWriteOnly() const { return isKnown(NO_READS); } + + /// Return true if we assume that the underlying value is not read in its + /// respective scope. + bool isAssumedWriteOnly() const { return isAssumed(NO_READS); } + + /// Create an abstract attribute view for the position \p IRP. + static AAMemoryBehavior &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H diff --git a/include/llvm/Transforms/IPO/GlobalDCE.h b/include/llvm/Transforms/IPO/GlobalDCE.h index c434484d1ae..0a6851849e7 100644 --- a/include/llvm/Transforms/IPO/GlobalDCE.h +++ b/include/llvm/Transforms/IPO/GlobalDCE.h @@ -43,11 +43,25 @@ private: /// Comdat -> Globals in that Comdat section. std::unordered_multimap ComdatMembers; + /// !type metadata -> set of (vtable, offset) pairs + DenseMap, 4>> + TypeIdMap; + + // Global variables which are vtables, and which we have enough information + // about to safely do dead virtual function elimination. + SmallPtrSet VFESafeVTables; + void UpdateGVDependencies(GlobalValue &GV); void MarkLive(GlobalValue &GV, SmallVectorImpl *Updates = nullptr); bool RemoveUnusedGlobalValue(GlobalValue &GV); + // Dead virtual function elimination. + void AddVirtualFunctionDependencies(Module &M); + void ScanVTables(Module &M); + void ScanTypeCheckedLoadIntrinsics(Module &M); + void ScanVTableLoad(Function *Caller, Metadata *TypeId, uint64_t CallOffset); + void ComputeDependencies(Value *V, SmallPtrSetImpl &U); }; diff --git a/include/llvm/Transforms/IPO/HotColdSplitting.h b/include/llvm/Transforms/IPO/HotColdSplitting.h index 73668844590..8c3049fbaac 100644 --- a/include/llvm/Transforms/IPO/HotColdSplitting.h +++ b/include/llvm/Transforms/IPO/HotColdSplitting.h @@ -17,6 +17,45 @@ namespace llvm { class Module; +class ProfileSummaryInfo; +class BlockFrequencyInfo; +class TargetTransformInfo; +class OptimizationRemarkEmitter; +class AssumptionCache; +class DominatorTree; +class CodeExtractorAnalysisCache; + +/// A sequence of basic blocks. +/// +/// A 0-sized SmallVector is slightly cheaper to move than a std::vector. +using BlockSequence = SmallVector; + +class HotColdSplitting { +public: + HotColdSplitting(ProfileSummaryInfo *ProfSI, + function_ref GBFI, + function_ref GTTI, + std::function *GORE, + function_ref LAC) + : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE), LookupAC(LAC) {} + bool run(Module &M); + +private: + bool isFunctionCold(const Function &F) const; + bool shouldOutlineFrom(const Function &F) const; + bool outlineColdRegions(Function &F, bool HasProfileSummary); + Function *extractColdRegion(const BlockSequence &Region, + const CodeExtractorAnalysisCache &CEAC, + DominatorTree &DT, BlockFrequencyInfo *BFI, + TargetTransformInfo &TTI, + OptimizationRemarkEmitter &ORE, + AssumptionCache *AC, unsigned Count); + ProfileSummaryInfo *PSI; + function_ref GetBFI; + function_ref GetTTI; + std::function *GetORE; + function_ref LookupAC; +}; /// Pass to outline cold regions. class HotColdSplittingPass : public PassInfoMixin { diff --git a/include/llvm/Transforms/IPO/LowerTypeTests.h b/include/llvm/Transforms/IPO/LowerTypeTests.h index 39b23f5957d..3c2bb65b955 100644 --- a/include/llvm/Transforms/IPO/LowerTypeTests.h +++ b/include/llvm/Transforms/IPO/LowerTypeTests.h @@ -193,6 +193,8 @@ struct ByteArrayBuilder { uint64_t &AllocByteOffset, uint8_t &AllocMask); }; +bool isJumpTableCanonical(Function *F); + } // end namespace lowertypetests class LowerTypeTestsPass : public PassInfoMixin { diff --git a/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/include/llvm/Transforms/IPO/WholeProgramDevirt.h index 509fcc86706..22435e4ed1e 100644 --- a/include/llvm/Transforms/IPO/WholeProgramDevirt.h +++ b/include/llvm/Transforms/IPO/WholeProgramDevirt.h @@ -16,8 +16,10 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/Transforms/IPO/FunctionImport.h" #include #include +#include #include #include @@ -28,6 +30,7 @@ template class MutableArrayRef; class Function; class GlobalVariable; class ModuleSummaryIndex; +struct ValueInfo; namespace wholeprogramdevirt { @@ -228,6 +231,29 @@ struct WholeProgramDevirtPass : public PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &); }; +struct VTableSlotSummary { + StringRef TypeID; + uint64_t ByteOffset; +}; + +/// Perform index-based whole program devirtualization on the \p Summary +/// index. Any devirtualized targets used by a type test in another module +/// are added to the \p ExportedGUIDs set. For any local devirtualized targets +/// only used within the defining module, the information necessary for +/// locating the corresponding WPD resolution is recorded for the ValueInfo +/// in case it is exported by cross module importing (in which case the +/// devirtualized target name will need adjustment). +void runWholeProgramDevirtOnIndex( + ModuleSummaryIndex &Summary, std::set &ExportedGUIDs, + std::map> &LocalWPDTargetsMap); + +/// Call after cross-module importing to update the recorded single impl +/// devirt target names for any locals that were exported. +void updateIndexWPDForExports( + ModuleSummaryIndex &Summary, + function_ref isExported, + std::map> &LocalWPDTargetsMap); + } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_WHOLEPROGRAMDEVIRT_H diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h index 8b70d2926ae..fcad1e11895 100644 --- a/include/llvm/Transforms/Instrumentation.h +++ b/include/llvm/Transforms/Instrumentation.h @@ -181,10 +181,6 @@ struct SanitizerCoverageOptions { SanitizerCoverageOptions() = default; }; -// Insert SanitizerCoverage instrumentation. -ModulePass *createSanitizerCoverageModulePass( - const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()); - /// Calculate what to divide by to scale counts. /// /// Given the maximum count, calculate a divisor that will scale all the diff --git a/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/include/llvm/Transforms/Instrumentation/InstrProfiling.h index 8f76d4a1ce5..2e0fae527b1 100644 --- a/include/llvm/Transforms/Instrumentation/InstrProfiling.h +++ b/include/llvm/Transforms/Instrumentation/InstrProfiling.h @@ -39,13 +39,14 @@ public: : Options(Options), IsCS(IsCS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); - bool run(Module &M, const TargetLibraryInfo &TLI); + bool run(Module &M, + std::function GetTLI); private: InstrProfOptions Options; Module *M; Triple TT; - const TargetLibraryInfo *TLI; + std::function GetTLI; struct PerFunctionProfileData { uint32_t NumValueSites[IPVK_Last + 1]; GlobalVariable *RegionCounters = nullptr; diff --git a/include/llvm/Transforms/Instrumentation/MemorySanitizer.h b/include/llvm/Transforms/Instrumentation/MemorySanitizer.h index 0739d9e58a6..01a86ee3f1f 100644 --- a/include/llvm/Transforms/Instrumentation/MemorySanitizer.h +++ b/include/llvm/Transforms/Instrumentation/MemorySanitizer.h @@ -19,12 +19,11 @@ namespace llvm { struct MemorySanitizerOptions { - MemorySanitizerOptions() = default; - MemorySanitizerOptions(int TrackOrigins, bool Recover, bool Kernel) - : TrackOrigins(TrackOrigins), Recover(Recover), Kernel(Kernel) {} - int TrackOrigins = 0; - bool Recover = false; - bool Kernel = false; + MemorySanitizerOptions() : MemorySanitizerOptions(0, false, false){}; + MemorySanitizerOptions(int TrackOrigins, bool Recover, bool Kernel); + bool Kernel; + int TrackOrigins; + bool Recover; }; // Insert MemorySanitizer instrumentation (detection of uninitialized reads) @@ -41,6 +40,7 @@ struct MemorySanitizerPass : public PassInfoMixin { MemorySanitizerPass(MemorySanitizerOptions Options) : Options(Options) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); private: MemorySanitizerOptions Options; diff --git a/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h b/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h new file mode 100644 index 00000000000..85a43ff86f2 --- /dev/null +++ b/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h @@ -0,0 +1,47 @@ +//===--------- Definition of the SanitizerCoverage class --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the SanitizerCoverage class which is a port of the legacy +// SanitizerCoverage pass to use the new PassManager infrastructure. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERCOVERAGE_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERCOVERAGE_H + +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Instrumentation.h" + +namespace llvm { + +/// This is the ModuleSanitizerCoverage pass used in the new pass manager. The +/// pass instruments functions for coverage, adds initialization calls to the +/// module for trace PC guards and 8bit counters if they are requested, and +/// appends globals to llvm.compiler.used. +class ModuleSanitizerCoveragePass + : public PassInfoMixin { +public: + explicit ModuleSanitizerCoveragePass( + SanitizerCoverageOptions Options = SanitizerCoverageOptions()) + : Options(Options) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + +private: + SanitizerCoverageOptions Options; +}; + +// Insert SanitizerCoverage instrumentation. +ModulePass *createModuleSanitizerCoverageLegacyPassPass( + const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()); + +} // namespace llvm + +#endif diff --git a/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h b/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h index b4e7d9924ff..ce0e46745ab 100644 --- a/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h +++ b/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h @@ -27,6 +27,8 @@ FunctionPass *createThreadSanitizerLegacyPassPass(); /// yet, the pass inserts the declarations. Otherwise the existing globals are struct ThreadSanitizerPass : public PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; + } // namespace llvm #endif /* LLVM_TRANSFORMS_INSTRUMENTATION_THREADSANITIZER_H */ diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index f9360b5ee2c..f06230b6f36 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -308,7 +308,7 @@ FunctionPass *createGVNSinkPass(); // MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads // are hoisted into the header, while stores sink into the footer. // -FunctionPass *createMergedLoadStoreMotionPass(); +FunctionPass *createMergedLoadStoreMotionPass(bool SplitFooterBB = false); //===----------------------------------------------------------------------===// // @@ -395,6 +395,13 @@ extern char &InferAddressSpacesID; // "block_weights" metadata. FunctionPass *createLowerExpectIntrinsicPass(); +//===----------------------------------------------------------------------===// +// +// LowerConstantIntrinsicss - Expand any remaining llvm.objectsize and +// llvm.is.constant intrinsic calls, even for the unknown cases. +// +FunctionPass *createLowerConstantIntrinsicsPass(); + //===----------------------------------------------------------------------===// // // PartiallyInlineLibCalls - Tries to inline the fast path of library diff --git a/include/llvm/Transforms/Scalar/CallSiteSplitting.h b/include/llvm/Transforms/Scalar/CallSiteSplitting.h index b6055639e8a..74cbf84b64b 100644 --- a/include/llvm/Transforms/Scalar/CallSiteSplitting.h +++ b/include/llvm/Transforms/Scalar/CallSiteSplitting.h @@ -9,13 +9,8 @@ #ifndef LLVM_TRANSFORMS_SCALAR_CALLSITESPLITTING__H #define LLVM_TRANSFORMS_SCALAR_CALLSITESPLITTING__H -#include "llvm/ADT/SetVector.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" -#include "llvm/Support/Compiler.h" -#include namespace llvm { diff --git a/include/llvm/Transforms/Scalar/ConstantHoisting.h b/include/llvm/Transforms/Scalar/ConstantHoisting.h index 6b0fc9c1dd0..39039b09324 100644 --- a/include/llvm/Transforms/Scalar/ConstantHoisting.h +++ b/include/llvm/Transforms/Scalar/ConstantHoisting.h @@ -37,7 +37,9 @@ #define LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/PassManager.h" @@ -154,21 +156,21 @@ private: /// Keeps track of constant candidates found in the function. using ConstCandVecType = std::vector; - using GVCandVecMapType = DenseMap; + using GVCandVecMapType = MapVector; ConstCandVecType ConstIntCandVec; GVCandVecMapType ConstGEPCandMap; /// These are the final constants we decided to hoist. using ConstInfoVecType = SmallVector; - using GVInfoVecMapType = DenseMap; + using GVInfoVecMapType = MapVector; ConstInfoVecType ConstIntInfoVec; GVInfoVecMapType ConstGEPInfoMap; /// Keep track of cast instructions we already cloned. - SmallDenseMap ClonedCastMap; + MapVector ClonedCastMap; Instruction *findMatInsertPt(Instruction *Inst, unsigned Idx = ~0U) const; - SmallPtrSet + SetVector findConstantInsertionPoint(const consthoist::ConstantInfo &ConstInfo) const; void collectConstantCandidates(ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx, diff --git a/include/llvm/Transforms/Scalar/Float2Int.h b/include/llvm/Transforms/Scalar/Float2Int.h index 06aeb832252..f04b98a19d8 100644 --- a/include/llvm/Transforms/Scalar/Float2Int.h +++ b/include/llvm/Transforms/Scalar/Float2Int.h @@ -17,6 +17,7 @@ #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/MapVector.h" #include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" @@ -26,10 +27,11 @@ public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); // Glue for old PM. - bool runImpl(Function &F); + bool runImpl(Function &F, const DominatorTree &DT); private: - void findRoots(Function &F, SmallPtrSet &Roots); + void findRoots(Function &F, const DominatorTree &DT, + SmallPtrSet &Roots); void seen(Instruction *I, ConstantRange R); ConstantRange badRange(); ConstantRange unknownRange(); diff --git a/include/llvm/Transforms/Scalar/GVN.h b/include/llvm/Transforms/Scalar/GVN.h index 9fe00a9e7f2..8a64768af6b 100644 --- a/include/llvm/Transforms/Scalar/GVN.h +++ b/include/llvm/Transforms/Scalar/GVN.h @@ -120,6 +120,8 @@ public: uint32_t lookupOrAddCall(CallInst *C); uint32_t phiTranslateImpl(const BasicBlock *BB, const BasicBlock *PhiBlock, uint32_t Num, GVN &Gvn); + bool areCallValsEqual(uint32_t Num, uint32_t NewNum, const BasicBlock *Pred, + const BasicBlock *PhiBlock, GVN &Gvn); std::pair assignExpNewValueNum(Expression &exp); bool areAllValsInBB(uint32_t num, const BasicBlock *BB, GVN &Gvn); @@ -159,6 +161,7 @@ private: SetVector DeadBlocks; OptimizationRemarkEmitter *ORE; ImplicitControlFlowTracking *ICF; + LoopInfo *LI; ValueTable VN; @@ -175,7 +178,7 @@ private: // Block-local map of equivalent values to their leader, does not // propagate to any successors. Entries added mid-block are applied // to the remaining instructions in the block. - SmallMapVector ReplaceWithConstMap; + SmallMapVector ReplaceOperandsWithMap; SmallVector InstrsToErase; // Map the block to reversed postorder traversal number. It is used to @@ -280,7 +283,7 @@ private: void verifyRemoved(const Instruction *I) const; bool splitCriticalEdges(); BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ); - bool replaceOperandsWithConsts(Instruction *I) const; + bool replaceOperandsForInBlockEquality(Instruction *I) const; bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root, bool DominatesByEdge); bool processFoldableCondBr(BranchInst *BI); diff --git a/include/llvm/Transforms/Scalar/GVNExpression.h b/include/llvm/Transforms/Scalar/GVNExpression.h index 3dc4515f85a..1600d1af324 100644 --- a/include/llvm/Transforms/Scalar/GVNExpression.h +++ b/include/llvm/Transforms/Scalar/GVNExpression.h @@ -323,7 +323,7 @@ public: class LoadExpression final : public MemoryExpression { private: LoadInst *Load; - unsigned Alignment; + MaybeAlign Alignment; public: LoadExpression(unsigned NumOperands, LoadInst *L, @@ -333,7 +333,8 @@ public: LoadExpression(enum ExpressionType EType, unsigned NumOperands, LoadInst *L, const MemoryAccess *MemoryLeader) : MemoryExpression(NumOperands, EType, MemoryLeader), Load(L) { - Alignment = L ? L->getAlignment() : 0; + if (L) + Alignment = MaybeAlign(L->getAlignment()); } LoadExpression() = delete; @@ -348,8 +349,8 @@ public: LoadInst *getLoadInst() const { return Load; } void setLoadInst(LoadInst *L) { Load = L; } - unsigned getAlignment() const { return Alignment; } - void setAlignment(unsigned Align) { Alignment = Align; } + MaybeAlign getAlignment() const { return Alignment; } + void setAlignment(MaybeAlign Align) { Alignment = Align; } bool equals(const Expression &Other) const override; bool exactlyEquals(const Expression &Other) const override { diff --git a/include/llvm/Transforms/Scalar/LoopPassManager.h b/include/llvm/Transforms/Scalar/LoopPassManager.h index 61ec58585fd..aed764855b2 100644 --- a/include/llvm/Transforms/Scalar/LoopPassManager.h +++ b/include/llvm/Transforms/Scalar/LoopPassManager.h @@ -263,8 +263,10 @@ template class FunctionToLoopPassAdaptor : public PassInfoMixin> { public: - explicit FunctionToLoopPassAdaptor(LoopPassT Pass, bool DebugLogging = false) - : Pass(std::move(Pass)), LoopCanonicalizationFPM(DebugLogging) { + explicit FunctionToLoopPassAdaptor(LoopPassT Pass, bool UseMemorySSA = false, + bool DebugLogging = false) + : Pass(std::move(Pass)), LoopCanonicalizationFPM(DebugLogging), + UseMemorySSA(UseMemorySSA) { LoopCanonicalizationFPM.addPass(LoopSimplifyPass()); LoopCanonicalizationFPM.addPass(LCSSAPass()); } @@ -293,7 +295,7 @@ public: return PA; // Get the analysis results needed by loop passes. - MemorySSA *MSSA = EnableMSSALoopDependency + MemorySSA *MSSA = UseMemorySSA ? (&AM.getResult(F).getMSSA()) : nullptr; LoopStandardAnalysisResults LAR = {AM.getResult(F), @@ -310,8 +312,10 @@ public: // LoopStandardAnalysisResults object. The loop analyses cached in this // manager have access to those analysis results and so it must invalidate // itself when they go away. - LoopAnalysisManager &LAM = - AM.getResult(F).getManager(); + auto &LAMFP = AM.getResult(F); + if (UseMemorySSA) + LAMFP.markMSSAUsed(); + LoopAnalysisManager &LAM = LAMFP.getManager(); // A postorder worklist of loops to process. SmallPriorityWorklist Worklist; @@ -382,7 +386,7 @@ public: PA.preserve(); PA.preserve(); PA.preserve(); - if (EnableMSSALoopDependency) + if (UseMemorySSA) PA.preserve(); // FIXME: What we really want to do here is preserve an AA category, but // that concept doesn't exist yet. @@ -397,14 +401,18 @@ private: LoopPassT Pass; FunctionPassManager LoopCanonicalizationFPM; + + bool UseMemorySSA = false; }; /// A function to deduce a loop pass type and wrap it in the templated /// adaptor. template FunctionToLoopPassAdaptor -createFunctionToLoopPassAdaptor(LoopPassT Pass, bool DebugLogging = false) { - return FunctionToLoopPassAdaptor(std::move(Pass), DebugLogging); +createFunctionToLoopPassAdaptor(LoopPassT Pass, bool UseMemorySSA = false, + bool DebugLogging = false) { + return FunctionToLoopPassAdaptor(std::move(Pass), UseMemorySSA, + DebugLogging); } /// Pass for printing a loop's contents as textual IR. diff --git a/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/include/llvm/Transforms/Scalar/LoopUnrollPass.h index a84d889a83a..afeb1f1da02 100644 --- a/include/llvm/Transforms/Scalar/LoopUnrollPass.h +++ b/include/llvm/Transforms/Scalar/LoopUnrollPass.h @@ -62,6 +62,8 @@ struct LoopUnrollOptions { Optional AllowPeeling; Optional AllowRuntime; Optional AllowUpperBound; + Optional AllowProfileBasedPeeling; + Optional FullUnrollMaxCount; int OptLevel; /// If false, use a cost model to determine whether unrolling of a loop is @@ -110,6 +112,18 @@ struct LoopUnrollOptions { OptLevel = O; return *this; } + + // Enables or disables loop peeling basing on profile. + LoopUnrollOptions &setProfileBasedPeeling(int O) { + AllowProfileBasedPeeling = O; + return *this; + } + + // Sets the max full unroll count. + LoopUnrollOptions &setFullUnrollMaxCount(unsigned O) { + FullUnrollMaxCount = O; + return *this; + } }; /// Loop unroll pass that will support both full and partial unrolling. diff --git a/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h b/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h new file mode 100644 index 00000000000..a5ad4a2192a --- /dev/null +++ b/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h @@ -0,0 +1,41 @@ +//===- LowerConstantIntrinsics.h - Lower constant int. pass -*- C++ -*-========// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// The header file for the LowerConstantIntrinsics pass as used by the new pass +/// manager. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_LOWERCONSTANTINTRINSICS_H +#define LLVM_TRANSFORMS_SCALAR_LOWERCONSTANTINTRINSICS_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +struct LowerConstantIntrinsicsPass : + PassInfoMixin { +public: + explicit LowerConstantIntrinsicsPass() {} + + /// Run the pass over the function. + /// + /// This will lower all remaining 'objectsize' and 'is.constant'` + /// intrinsic calls in this function, even when the argument has no known + /// size or is not a constant respectively. The resulting constant is + /// propagated and conditional branches are resolved where possible. + /// This complements the Instruction Simplification and + /// Instruction Combination passes of the optimized pass chain. + PreservedAnalyses run(Function &F, FunctionAnalysisManager &); +}; + +} + +#endif diff --git a/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h b/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h index 9071a56532f..c5f6d6e0e8b 100644 --- a/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h +++ b/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h @@ -27,12 +27,28 @@ #include "llvm/IR/PassManager.h" namespace llvm { -class MergedLoadStoreMotionPass - : public PassInfoMixin { -public: - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +struct MergedLoadStoreMotionOptions { + bool SplitFooterBB; + MergedLoadStoreMotionOptions(bool SplitFooterBB = false) + : SplitFooterBB(SplitFooterBB) {} + + MergedLoadStoreMotionOptions &splitFooterBB(bool SFBB) { + SplitFooterBB = SFBB; + return *this; + } }; +class MergedLoadStoreMotionPass + : public PassInfoMixin { + MergedLoadStoreMotionOptions Options; + +public: + MergedLoadStoreMotionPass() + : MergedLoadStoreMotionPass(MergedLoadStoreMotionOptions()) {} + MergedLoadStoreMotionPass(const MergedLoadStoreMotionOptions &PassOptions) + : Options(PassOptions) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; } #endif // LLVM_TRANSFORMS_SCALAR_MERGEDLOADSTOREMOTION_H diff --git a/include/llvm/Transforms/Scalar/Reassociate.h b/include/llvm/Transforms/Scalar/Reassociate.h index 2db8d8ce309..d5b175eff0e 100644 --- a/include/llvm/Transforms/Scalar/Reassociate.h +++ b/include/llvm/Transforms/Scalar/Reassociate.h @@ -122,7 +122,9 @@ private: void EraseInst(Instruction *I); void RecursivelyEraseDeadInsts(Instruction *I, OrderedSet &Insts); void OptimizeInst(Instruction *I); - Instruction *canonicalizeNegConstExpr(Instruction *I); + Instruction *canonicalizeNegFPConstantsForOp(Instruction *I, Instruction *Op, + Value *OtherOp); + Instruction *canonicalizeNegFPConstants(Instruction *I); void BuildPairMap(ReversePostOrderTraversal &RPOT); }; diff --git a/include/llvm/Transforms/Scalar/SCCP.h b/include/llvm/Transforms/Scalar/SCCP.h index 0ffd983eb3e..45e674a20a1 100644 --- a/include/llvm/Transforms/Scalar/SCCP.h +++ b/include/llvm/Transforms/Scalar/SCCP.h @@ -45,7 +45,8 @@ struct AnalysisResultsForFn { PostDominatorTree *PDT; }; -bool runIPSCCP(Module &M, const DataLayout &DL, const TargetLibraryInfo *TLI, +bool runIPSCCP(Module &M, const DataLayout &DL, + std::function GetTLI, function_ref getAnalysis); } // end namespace llvm diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h index 4d861ffe9a3..698e57fd039 100644 --- a/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -83,10 +83,16 @@ bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI = nullptr); /// Attempts to merge a block into its predecessor, if possible. The return /// value indicates success or failure. +/// By default do not merge blocks if BB's predecessor has multiple successors. +/// If PredecessorWithTwoSuccessors = true, the blocks can only be merged +/// if BB's Pred has a branch to BB and to AnotherBB, and BB has a single +/// successor Sing. In this case the branch will be updated with Sing instead of +/// BB, and BB will still be merged into its predecessor and removed. bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU = nullptr, LoopInfo *LI = nullptr, MemorySSAUpdater *MSSAU = nullptr, - MemoryDependenceResults *MemDep = nullptr); + MemoryDependenceResults *MemDep = nullptr, + bool PredecessorWithTwoSuccessors = false); /// Replace all uses of an instruction (specified by BI) with a value, then /// remove and delete the original instruction. @@ -222,7 +228,8 @@ BasicBlock *SplitEdge(BasicBlock *From, BasicBlock *To, /// info is updated. BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT = nullptr, LoopInfo *LI = nullptr, - MemorySSAUpdater *MSSAU = nullptr); + MemorySSAUpdater *MSSAU = nullptr, + const Twine &BBName = ""); /// This method introduces at least one new basic block into the function and /// moves some of the predecessors of BB to be predecessors of the new block. diff --git a/include/llvm/Transforms/Utils/BuildLibCalls.h b/include/llvm/Transforms/Utils/BuildLibCalls.h index 8421c31a36d..3d15b2a7bf2 100644 --- a/include/llvm/Transforms/Utils/BuildLibCalls.h +++ b/include/llvm/Transforms/Utils/BuildLibCalls.h @@ -30,17 +30,16 @@ namespace llvm { bool inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI); bool inferLibFuncAttributes(Module *M, StringRef Name, const TargetLibraryInfo &TLI); - /// Check whether the overloaded unary floating point function + /// Check whether the overloaded floating point function /// corresponding to \a Ty is available. - bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, - LibFunc DoubleFn, LibFunc FloatFn, - LibFunc LongDoubleFn); + bool hasFloatFn(const TargetLibraryInfo *TLI, Type *Ty, + LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn); - /// Get the name of the overloaded unary floating point function + /// Get the name of the overloaded floating point function /// corresponding to \a Ty. - StringRef getUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, - LibFunc DoubleFn, LibFunc FloatFn, - LibFunc LongDoubleFn); + StringRef getFloatFnName(const TargetLibraryInfo *TLI, Type *Ty, + LibFunc DoubleFn, LibFunc FloatFn, + LibFunc LongDoubleFn); /// Return V if it is an i8*, otherwise cast it to i8*. Value *castToCStr(Value *V, IRBuilder<> &B); @@ -51,6 +50,11 @@ namespace llvm { Value *emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI); + /// Emit a call to the strdup function to the builder, for the specified + /// pointer. Ptr is required to be some pointer type, and the return value has + /// 'i8*' type. + Value *emitStrDup(Value *Ptr, IRBuilder<> &B, const TargetLibraryInfo *TLI); + /// Emit a call to the strnlen function to the builder, for the specified /// pointer. Ptr is required to be some pointer type, MaxLen must be of size_t /// type, and the return value has 'intptr_t' type. @@ -164,6 +168,13 @@ namespace llvm { Value *emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, IRBuilder<> &B, const AttributeList &Attrs); + /// Emit a call to the binary function DoubleFn, FloatFn or LongDoubleFn, + /// depending of the type of Op1. + Value *emitBinaryFloatFnCall(Value *Op1, Value *Op2, + const TargetLibraryInfo *TLI, LibFunc DoubleFn, + LibFunc FloatFn, LibFunc LongDoubleFn, + IRBuilder<> &B, const AttributeList &Attrs); + /// Emit a call to the putchar function. This assumes that Char is an integer. Value *emitPutChar(Value *Char, IRBuilder<> &B, const TargetLibraryInfo *TLI); diff --git a/include/llvm/Transforms/Utils/BypassSlowDivision.h b/include/llvm/Transforms/Utils/BypassSlowDivision.h index 471055921fa..bd98c902d1a 100644 --- a/include/llvm/Transforms/Utils/BypassSlowDivision.h +++ b/include/llvm/Transforms/Utils/BypassSlowDivision.h @@ -19,6 +19,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" +#include "llvm/IR/ValueHandle.h" #include namespace llvm { @@ -28,8 +29,10 @@ class Value; struct DivRemMapKey { bool SignedOp; - Value *Dividend; - Value *Divisor; + AssertingVH Dividend; + AssertingVH Divisor; + + DivRemMapKey() = default; DivRemMapKey(bool InSignedOp, Value *InDividend, Value *InDivisor) : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} @@ -50,8 +53,10 @@ template <> struct DenseMapInfo { } static unsigned getHashValue(const DivRemMapKey &Val) { - return (unsigned)(reinterpret_cast(Val.Dividend) ^ - reinterpret_cast(Val.Divisor)) ^ + return (unsigned)(reinterpret_cast( + static_cast(Val.Dividend)) ^ + reinterpret_cast( + static_cast(Val.Divisor))) ^ (unsigned)Val.SignedOp; } }; diff --git a/include/llvm/Transforms/Utils/CodeExtractor.h b/include/llvm/Transforms/Utils/CodeExtractor.h index 9d79ee1633f..8a1ab796734 100644 --- a/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/include/llvm/Transforms/Utils/CodeExtractor.h @@ -22,6 +22,7 @@ namespace llvm { +class AllocaInst; class BasicBlock; class BlockFrequency; class BlockFrequencyInfo; @@ -36,6 +37,38 @@ class Module; class Type; class Value; +/// A cache for the CodeExtractor analysis. The operation \ref +/// CodeExtractor::extractCodeRegion is guaranteed not to invalidate this +/// object. This object should conservatively be considered invalid if any +/// other mutating operations on the IR occur. +/// +/// Constructing this object is O(n) in the size of the function. +class CodeExtractorAnalysisCache { + /// The allocas in the function. + SmallVector Allocas; + + /// Base memory addresses of load/store instructions, grouped by block. + DenseMap> BaseMemAddrs; + + /// Blocks which contain instructions which may have unknown side-effects + /// on memory. + DenseSet SideEffectingBlocks; + + void findSideEffectInfoForBlock(BasicBlock &BB); + +public: + CodeExtractorAnalysisCache(Function &F); + + /// Get the allocas in the function at the time the analysis was created. + /// Note that some of these allocas may no longer be present in the function, + /// due to \ref CodeExtractor::extractCodeRegion. + ArrayRef getAllocas() const { return Allocas; } + + /// Check whether \p BB contains an instruction thought to load from, store + /// to, or otherwise clobber the alloca \p Addr. + bool doesBlockContainClobberOfAddr(BasicBlock &BB, AllocaInst *Addr) const; +}; + /// Utility class for extracting code into a new function. /// /// This utility provides a simple interface for extracting some sequence of @@ -104,13 +137,21 @@ class Value; /// /// Returns zero when called on a CodeExtractor instance where isEligible /// returns false. - Function *extractCodeRegion(); + Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC); + + /// Verify that assumption cache isn't stale after a region is extracted. + /// Returns false when verifier finds errors. AssumptionCache is passed as + /// parameter to make this function stateless. + static bool verifyAssumptionCache(const Function& F, AssumptionCache *AC); /// Test whether this code extractor is eligible. /// /// Based on the blocks used when constructing the code extractor, /// determine whether it is eligible for extraction. - bool isEligible() const { return !Blocks.empty(); } + /// + /// Checks that varargs handling (with vastart and vaend) is only done in + /// the outlined blocks. + bool isEligible() const; /// Compute the set of input values and output values for the code. /// @@ -127,7 +168,9 @@ class Value; /// region. /// /// Returns true if it is safe to do the code motion. - bool isLegalToShrinkwrapLifetimeMarkers(Instruction *AllocaAddr) const; + bool + isLegalToShrinkwrapLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, + Instruction *AllocaAddr) const; /// Find the set of allocas whose life ranges are contained within the /// outlined region. @@ -137,7 +180,8 @@ class Value; /// are used by the lifetime markers are also candidates for shrink- /// wrapping. The instructions that need to be sunk are collected in /// 'Allocas'. - void findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, + void findAllocas(const CodeExtractorAnalysisCache &CEAC, + ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const; /// Find or create a block within the outline region for placing hoisted @@ -158,8 +202,9 @@ class Value; Instruction *LifeEnd = nullptr; }; - LifetimeMarkerInfo getLifetimeMarkers(Instruction *Addr, - BasicBlock *ExitBlock) const; + LifetimeMarkerInfo + getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, + Instruction *Addr, BasicBlock *ExitBlock) const; void severSplitPHINodesOfEntry(BasicBlock *&Header); void severSplitPHINodesOfExits(const SmallPtrSetImpl &Exits); diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index ff516f23097..9fcb2f64d79 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -271,6 +271,15 @@ inline unsigned getKnownAlignment(Value *V, const DataLayout &DL, return getOrEnforceKnownAlignment(V, 0, DL, CxtI, AC, DT); } +/// Create a call that matches the invoke \p II in terms of arguments, +/// attributes, debug information, etc. The call is not placed in a block and it +/// will not have a name. The invoke instruction is not removed, nor are the +/// uses replaced by the new call. +CallInst *createCallMatchingInvoke(InvokeInst *II); + +/// This function converts the specified invoek into a normall call. +void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr); + ///===---------------------------------------------------------------------===// /// Dbg Intrinsic utilities /// @@ -403,8 +412,7 @@ void removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU = nullptr); /// Remove all blocks that can not be reached from the function's entry. /// /// Returns true if any basic block was removed. -bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI = nullptr, - DomTreeUpdater *DTU = nullptr, +bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU = nullptr, MemorySSAUpdater *MSSAU = nullptr); /// Combine the metadata of two instructions so that K can replace J. Some @@ -424,6 +432,10 @@ void combineMetadata(Instruction *K, const Instruction *J, void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove); +/// Copy the metadata from the source instruction to the destination (the +/// replacement for the source instruction). +void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source); + /// Patch the replacement so that it is not more restrictive than the value /// being replaced. It assumes that the replacement does not get moved from /// its original position. diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h index 68bdded5cf9..d32f08717e9 100644 --- a/include/llvm/Transforms/Utils/LoopUtils.h +++ b/include/llvm/Transforms/Utils/LoopUtils.h @@ -215,6 +215,9 @@ makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef FollowupAttrs, /// Look for the loop attribute that disables all transformation heuristic. bool hasDisableAllTransformsHint(const Loop *L); +/// Look for the loop attribute that disables the LICM transformation heuristics. +bool hasDisableLICMTransformsHint(const Loop *L); + /// The mode sets how eager a transformation should be applied. enum TransformationMode { /// The pass can use heuristics to determine whether a transformation should @@ -252,6 +255,8 @@ TransformationMode hasLICMVersioningTransformation(Loop *L); /// @} /// Set input string into loop metadata by keeping other values intact. +/// If the string is already in loop metadata update value if it is +/// different. void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V = 0); diff --git a/include/llvm/Transforms/Utils/MisExpect.h b/include/llvm/Transforms/Utils/MisExpect.h new file mode 100644 index 00000000000..1dbe8cb9593 --- /dev/null +++ b/include/llvm/Transforms/Utils/MisExpect.h @@ -0,0 +1,43 @@ +//===--- MisExpect.h - Check the use of llvm.expect with PGO data ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit warnings for potentially incorrect usage of the +// llvm.expect intrinsic. This utility extracts the threshold values from +// metadata associated with the instrumented Branch or Switch instruction. The +// threshold values are then used to determine if a warning should be emmited. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" + +namespace llvm { +namespace misexpect { + +/// verifyMisExpect - compares PGO counters to the thresholds used for +/// llvm.expect and warns if the PGO counters are outside of the expected +/// range. +/// \param I The Instruction being checked +/// \param Weights A vector of profile weights for each target block +/// \param Ctx The current LLVM context +void verifyMisExpect(llvm::Instruction *I, + const llvm::SmallVector &Weights, + llvm::LLVMContext &Ctx); + +/// checkClangInstrumentation - verify if llvm.expect matches PGO profile +/// This function checks the frontend instrumentation in the backend when +/// lowering llvm.expect intrinsics. It checks for existing metadata, and +/// then validates the use of llvm.expect against the assigned branch weights. +// +/// \param I the Instruction being checked +void checkFrontendInstrumentation(Instruction &I); + +} // namespace misexpect +} // namespace llvm diff --git a/include/llvm/Transforms/Utils/PredicateInfo.h b/include/llvm/Transforms/Utils/PredicateInfo.h index da4a5dcc28c..7c7a8eb04a2 100644 --- a/include/llvm/Transforms/Utils/PredicateInfo.h +++ b/include/llvm/Transforms/Utils/PredicateInfo.h @@ -229,10 +229,10 @@ protected: private: void buildPredicateInfo(); - void processAssume(IntrinsicInst *, BasicBlock *, SmallPtrSetImpl &); - void processBranch(BranchInst *, BasicBlock *, SmallPtrSetImpl &); - void processSwitch(SwitchInst *, BasicBlock *, SmallPtrSetImpl &); - void renameUses(SmallPtrSetImpl &); + void processAssume(IntrinsicInst *, BasicBlock *, SmallVectorImpl &); + void processBranch(BranchInst *, BasicBlock *, SmallVectorImpl &); + void processSwitch(SwitchInst *, BasicBlock *, SmallVectorImpl &); + void renameUses(SmallVectorImpl &); using ValueDFS = PredicateInfoClasses::ValueDFS; typedef SmallVectorImpl ValueDFSStack; void convertUsesToDFSOrdered(Value *, SmallVectorImpl &); @@ -240,7 +240,7 @@ private: bool stackIsInScope(const ValueDFSStack &, const ValueDFS &) const; void popStackUntilDFSScope(ValueDFSStack &, const ValueDFS &); ValueInfo &getOrCreateValueInfo(Value *); - void addInfoFor(SmallPtrSetImpl &OpsToRename, Value *Op, + void addInfoFor(SmallVectorImpl &OpsToRename, Value *Op, PredicateBase *PB); const ValueInfo &getValueInfo(Value *) const; Function &F; diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 2572094ddac..88c2ef787ad 100644 --- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -126,6 +126,12 @@ private: /// Erase an instruction from its parent with our eraser. void eraseFromParent(Instruction *I); + /// Replace an instruction with a value and erase it from its parent. + void substituteInParent(Instruction *I, Value *With) { + replaceAllUsesWith(I, With); + eraseFromParent(I); + } + Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B); public: @@ -154,6 +160,7 @@ private: Value *optimizeStrRChr(CallInst *CI, IRBuilder<> &B); Value *optimizeStrCmp(CallInst *CI, IRBuilder<> &B); Value *optimizeStrNCmp(CallInst *CI, IRBuilder<> &B); + Value *optimizeStrNDup(CallInst *CI, IRBuilder<> &B); Value *optimizeStrCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeStpCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeStrNCpy(CallInst *CI, IRBuilder<> &B); @@ -164,14 +171,17 @@ private: Value *optimizeStrCSpn(CallInst *CI, IRBuilder<> &B); Value *optimizeStrStr(CallInst *CI, IRBuilder<> &B); Value *optimizeMemChr(CallInst *CI, IRBuilder<> &B); + Value *optimizeMemRChr(CallInst *CI, IRBuilder<> &B); Value *optimizeMemCmp(CallInst *CI, IRBuilder<> &B); Value *optimizeBCmp(CallInst *CI, IRBuilder<> &B); Value *optimizeMemCmpBCmpCommon(CallInst *CI, IRBuilder<> &B); + Value *optimizeMemPCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B); Value *optimizeMemSet(CallInst *CI, IRBuilder<> &B); Value *optimizeRealloc(CallInst *CI, IRBuilder<> &B); Value *optimizeWcslen(CallInst *CI, IRBuilder<> &B); + Value *optimizeBCopy(CallInst *CI, IRBuilder<> &B); // Wrapper for all String/Memory Library Call Optimizations Value *optimizeStringMemoryLibCall(CallInst *CI, IRBuilder<> &B); diff --git a/include/llvm/Transforms/Utils/UnrollLoop.h b/include/llvm/Transforms/Utils/UnrollLoop.h index 593ca26feb9..02b81b4b7ee 100644 --- a/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/include/llvm/Transforms/Utils/UnrollLoop.h @@ -114,8 +114,8 @@ bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, - unsigned MaxTripCount, unsigned &TripMultiple, - unsigned LoopSize, + unsigned MaxTripCount, bool MaxOrZero, + unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound); @@ -132,7 +132,9 @@ TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional UserThreshold, Optional UserCount, Optional UserAllowPartial, Optional UserRuntime, - Optional UserUpperBound, Optional UserAllowPeeling); + Optional UserUpperBound, Optional UserAllowPeeling, + Optional UserAllowProfileBasedPeeling, + Optional UserFullUnrollMaxCount); unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h index 1952a210291..ff5bfc60958 100644 --- a/include/llvm/Transforms/Utils/ValueMapper.h +++ b/include/llvm/Transforms/Utils/ValueMapper.h @@ -22,7 +22,7 @@ namespace llvm { class Constant; class Function; -class GlobalAlias; +class GlobalIndirectSymbol; class GlobalVariable; class Instruction; class MDNode; @@ -120,7 +120,7 @@ inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) { /// instance: /// - \a scheduleMapGlobalInitializer() /// - \a scheduleMapAppendingVariable() -/// - \a scheduleMapGlobalAliasee() +/// - \a scheduleMapGlobalIndirectSymbol() /// - \a scheduleRemapFunction() /// /// Sometimes a callback needs a different mapping context. Such a context can @@ -180,8 +180,9 @@ public: bool IsOldCtorDtor, ArrayRef NewMembers, unsigned MappingContextID = 0); - void scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, - unsigned MappingContextID = 0); + void scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, + Constant &Target, + unsigned MappingContextID = 0); void scheduleRemapFunction(Function &F, unsigned MappingContextID = 0); }; diff --git a/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index b144006e262..d1e7acc877b 100644 --- a/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -33,18 +33,6 @@ namespace llvm { -/// Create an analysis remark that explains why vectorization failed -/// -/// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p -/// RemarkName is the identifier for the remark. If \p I is passed it is an -/// instruction that prevents vectorization. Otherwise \p TheLoop is used for -/// the location of the remark. \return the remark object that can be -/// streamed to. -OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName, - StringRef RemarkName, - Loop *TheLoop, - Instruction *I = nullptr); - /// Utility class for getting and setting loop vectorizer hints in the form /// of loop metadata. /// This class keeps a number of loop annotations locally (as member variables) @@ -55,7 +43,8 @@ OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName, /// for example 'force', means a decision has been made. So, we need to be /// careful NOT to add them if the user hasn't specifically asked so. class LoopVectorizeHints { - enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED }; + enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED, + HK_PREDICATE }; /// Hint - associates name and validation with the hint value. struct Hint { @@ -81,6 +70,9 @@ class LoopVectorizeHints { /// Already Vectorized Hint IsVectorized; + /// Vector Predicate + Hint Predicate; + /// Return the loop metadata prefix. static StringRef Prefix() { return "llvm.loop."; } @@ -109,6 +101,7 @@ public: unsigned getWidth() const { return Width.Value; } unsigned getInterleave() const { return Interleave.Value; } unsigned getIsVectorized() const { return IsVectorized.Value; } + unsigned getPredicate() const { return Predicate.Value; } enum ForceKind getForce() const { if ((ForceKind)Force.Value == FK_Undefined && hasDisableAllTransformsHint(TheLoop)) @@ -235,8 +228,8 @@ public: bool canVectorize(bool UseVPlanNativePath); /// Return true if we can vectorize this loop while folding its tail by - /// masking. - bool canFoldTailByMasking(); + /// masking, and mark all respective loads/stores for masking. + bool prepareToFoldTailByMasking(); /// Returns the primary induction variable. PHINode *getPrimaryInduction() { return PrimaryInduction; } @@ -362,9 +355,16 @@ private: bool canVectorizeOuterLoop(); /// Return true if all of the instructions in the block can be speculatively - /// executed. \p SafePtrs is a list of addresses that are known to be legal - /// and we know that we can read from them without segfault. - bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl &SafePtrs); + /// executed, and record the loads/stores that require masking. If's that + /// guard loads can be ignored under "assume safety" unless \p PreserveGuards + /// is true. This can happen when we introduces guards for which the original + /// "unguarded-loads are safe" assumption does not hold. For example, the + /// vectorizer's fold-tail transformation changes the loop to execute beyond + /// its original trip-count, under a proper guard, which should be preserved. + /// \p SafePtrs is a list of addresses that are known to be legal and we know + /// that we can read from them without segfault. + bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl &SafePtrs, + bool PreserveGuards = false); /// Updates the vectorization state by adding \p Phi to the inductions list. /// This can set \p Phi as the main induction of the loop if \p Phi is a @@ -382,14 +382,6 @@ private: return LAI ? &LAI->getSymbolicStrides() : nullptr; } - /// Reports a vectorization illegality: print \p DebugMsg for debugging - /// purposes along with the corresponding optimization remark \p RemarkName. - /// If \p I is passed it is an instruction that prevents vectorization. - /// Otherwise the loop is used for the location of the remark. - void reportVectorizationFailure(const StringRef DebugMsg, - const StringRef OREMsg, const StringRef ORETag, - Instruction *I = nullptr) const; - /// The loop that we evaluate. Loop *TheLoop; @@ -452,8 +444,8 @@ private: /// Holds the widest induction type encountered. Type *WidestIndTy = nullptr; - /// Allowed outside users. This holds the induction and reduction - /// vars which can be accessed from outside the loop. + /// Allowed outside users. This holds the variables that can be accessed from + /// outside the loop. SmallPtrSet AllowedExit; /// Can we assume the absence of NaNs. diff --git a/include/llvm/Transforms/Vectorize/LoopVectorize.h b/include/llvm/Transforms/Vectorize/LoopVectorize.h index d1ec06afb02..d824e2903ef 100644 --- a/include/llvm/Transforms/Vectorize/LoopVectorize.h +++ b/include/llvm/Transforms/Vectorize/LoopVectorize.h @@ -155,6 +155,14 @@ struct LoopVectorizePass : public PassInfoMixin { bool processLoop(Loop *L); }; +/// Reports a vectorization failure: print \p DebugMsg for debugging +/// purposes along with the corresponding optimization remark \p RemarkName. +/// If \p I is passed, it is an instruction that prevents vectorization. +/// Otherwise, the loop \p TheLoop is used for the location of the remark. +void reportVectorizationFailure(const StringRef DebugMsg, + const StringRef OREMsg, const StringRef ORETag, + OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr); + } // end namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H diff --git a/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/include/llvm/Transforms/Vectorize/SLPVectorizer.h index ac6afb761d4..32ccc8a4638 100644 --- a/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -24,7 +24,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/PassManager.h" -#include "llvm/IR/ValueHandle.h" namespace llvm { @@ -60,8 +59,8 @@ extern cl::opt RunSLPVectorization; struct SLPVectorizerPass : public PassInfoMixin { using StoreList = SmallVector; using StoreListMap = MapVector; - using WeakTrackingVHList = SmallVector; - using WeakTrackingVHListMap = MapVector; + using GEPList = SmallVector; + using GEPListMap = MapVector; ScalarEvolution *SE = nullptr; TargetTransformInfo *TTI = nullptr; @@ -131,7 +130,7 @@ private: /// Tries to vectorize constructs started from CmpInst, InsertValueInst or /// InsertElementInst instructions. - bool vectorizeSimpleInstructions(SmallVectorImpl &Instructions, + bool vectorizeSimpleInstructions(SmallVectorImpl &Instructions, BasicBlock *BB, slpvectorizer::BoUpSLP &R); /// Scan the basic block and look for patterns that are likely to start @@ -147,7 +146,7 @@ private: StoreListMap Stores; /// The getelementptr instructions in a basic block organized by base pointer. - WeakTrackingVHListMap GEPs; + GEPListMap GEPs; }; } // end namespace llvm diff --git a/include/llvm/XRay/FDRRecordProducer.h b/include/llvm/XRay/FDRRecordProducer.h index b530a85bc7e..043d91568f4 100644 --- a/include/llvm/XRay/FDRRecordProducer.h +++ b/include/llvm/XRay/FDRRecordProducer.h @@ -27,7 +27,7 @@ public: class FileBasedRecordProducer : public RecordProducer { const XRayFileHeader &Header; DataExtractor &E; - uint32_t &OffsetPtr; + uint64_t &OffsetPtr; uint32_t CurrentBufferBytes = 0; // Helper function which gets the next record by speculatively reading through @@ -36,7 +36,7 @@ class FileBasedRecordProducer : public RecordProducer { public: FileBasedRecordProducer(const XRayFileHeader &FH, DataExtractor &DE, - uint32_t &OP) + uint64_t &OP) : Header(FH), E(DE), OffsetPtr(OP) {} /// This producer encapsulates the logic for loading a File-backed diff --git a/include/llvm/XRay/FDRRecords.h b/include/llvm/XRay/FDRRecords.h index a8ce74bd88f..e3e16f71e2f 100644 --- a/include/llvm/XRay/FDRRecords.h +++ b/include/llvm/XRay/FDRRecords.h @@ -417,16 +417,16 @@ public: class RecordInitializer : public RecordVisitor { DataExtractor &E; - uint32_t &OffsetPtr; + uint64_t &OffsetPtr; uint16_t Version; public: static constexpr uint16_t DefaultVersion = 5u; - explicit RecordInitializer(DataExtractor &DE, uint32_t &OP, uint16_t V) + explicit RecordInitializer(DataExtractor &DE, uint64_t &OP, uint16_t V) : RecordVisitor(), E(DE), OffsetPtr(OP), Version(V) {} - explicit RecordInitializer(DataExtractor &DE, uint32_t &OP) + explicit RecordInitializer(DataExtractor &DE, uint64_t &OP) : RecordInitializer(DE, OP, DefaultVersion) {} Error visit(BufferExtents &) override; diff --git a/include/llvm/XRay/FileHeaderReader.h b/include/llvm/XRay/FileHeaderReader.h index 1c9681cfd9a..30878f3e99e 100644 --- a/include/llvm/XRay/FileHeaderReader.h +++ b/include/llvm/XRay/FileHeaderReader.h @@ -24,7 +24,7 @@ namespace xray { /// Convenience function for loading the file header given a data extractor at a /// specified offset. Expected readBinaryFormatHeader(DataExtractor &HeaderExtractor, - uint32_t &OffsetPtr); + uint64_t &OffsetPtr); } // namespace xray } // namespace llvm diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap index 9c4668e1473..ecb3b37004f 100644 --- a/include/llvm/module.modulemap +++ b/include/llvm/module.modulemap @@ -253,6 +253,7 @@ module LLVM_IR { textual header "IR/DebugInfoFlags.def" textual header "IR/Instruction.def" textual header "IR/Metadata.def" + textual header "IR/FixedMetadataKinds.def" textual header "IR/Value.def" textual header "IR/RuntimeLibcalls.def" } @@ -331,6 +332,7 @@ module LLVM_TableGen { module LLVM_Transforms { requires cplusplus umbrella "Transforms" + module * { export * } } diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 32241e355eb..55dd9a4cda0 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -784,7 +784,7 @@ bool AAResultsWrapperPass::runOnFunction(Function &F) { // previous object first, in this case replacing it with an empty one, before // registering new results. AAR.reset( - new AAResults(getAnalysis().getTLI())); + new AAResults(getAnalysis().getTLI(F))); // BasicAA is always available for function analyses. Also, we add it first // so that it can trump TBAA results when it proves MustAlias. @@ -840,7 +840,7 @@ void AAResultsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F, BasicAAResult &BAR) { - AAResults AAR(P.getAnalysis().getTLI()); + AAResults AAR(P.getAnalysis().getTLI(F)); // Add in our explicitly constructed BasicAA results. if (!DisableBasicAA) diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index a6e5b9fab55..79fbcd464c1 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -119,6 +119,12 @@ void AliasSetTracker::removeAliasSet(AliasSet *AS) { TotalMayAliasSetSize -= AS->size(); AliasSets.erase(AS); + // If we've removed the saturated alias set, set saturated marker back to + // nullptr and ensure this tracker is empty. + if (AS == AliasAnyAS) { + AliasAnyAS = nullptr; + assert(AliasSets.empty() && "Tracker not empty"); + } } void AliasSet::removeFromTracker(AliasSetTracker &AST) { @@ -690,8 +696,10 @@ void AliasSet::print(raw_ostream &OS) const { } void AliasSetTracker::print(raw_ostream &OS) const { - OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for " - << PointerMap.size() << " pointer values.\n"; + OS << "Alias Set Tracker: " << AliasSets.size(); + if (AliasAnyAS) + OS << " (Saturated)"; + OS << " alias sets for " << PointerMap.size() << " pointer values.\n"; for (const AliasSet &AS : *this) AS.print(OS); OS << "\n"; diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index d46a8d8e306..af718526684 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -65,6 +65,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeModuleDebugInfoPrinterPass(Registry); initializeModuleSummaryIndexWrapperPassPass(Registry); initializeMustExecutePrinterPass(Registry); + initializeMustBeExecutedContextPrinterPass(Registry); initializeObjCARCAAWrapperPassPass(Registry); initializeOptimizationRemarkEmitterWrapperPassPass(Registry); initializePhiValuesWrapperPassPass(Registry); diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp index cf2f845dee0..129944743c5 100644 --- a/lib/Analysis/AssumptionCache.cpp +++ b/lib/Analysis/AssumptionCache.cpp @@ -130,7 +130,10 @@ void AssumptionCache::unregisterAssumption(CallInst *CI) { if (AVI != AffectedValues.end()) AffectedValues.erase(AVI); } - remove_if(AssumeHandles, [CI](WeakTrackingVH &VH) { return CI == VH; }); + + AssumeHandles.erase( + remove_if(AssumeHandles, [CI](WeakTrackingVH &VH) { return CI == VH; }), + AssumeHandles.end()); } void AssumptionCache::AffectedValueCallbackVH::deleted() { @@ -140,7 +143,7 @@ void AssumptionCache::AffectedValueCallbackVH::deleted() { // 'this' now dangles! } -void AssumptionCache::copyAffectedValuesInCache(Value *OV, Value *NV) { +void AssumptionCache::transferAffectedValuesInCache(Value *OV, Value *NV) { auto &NAVV = getOrInsertAffectedValues(NV); auto AVI = AffectedValues.find(OV); if (AVI == AffectedValues.end()) @@ -149,6 +152,7 @@ void AssumptionCache::copyAffectedValuesInCache(Value *OV, Value *NV) { for (auto &A : AVI->second) if (std::find(NAVV.begin(), NAVV.end(), A) == NAVV.end()) NAVV.push_back(A); + AffectedValues.erase(OV); } void AssumptionCache::AffectedValueCallbackVH::allUsesReplacedWith(Value *NV) { @@ -157,7 +161,7 @@ void AssumptionCache::AffectedValueCallbackVH::allUsesReplacedWith(Value *NV) { // Any assumptions that affected this value now affect the new value. - AC->copyAffectedValuesInCache(getValPtr(), NV); + AC->transferAffectedValuesInCache(getValPtr(), NV); // 'this' now might dangle! If the AffectedValues map was resized to add an // entry for NV then this object might have been destroyed in favor of some // copy in the grown map. @@ -252,7 +256,7 @@ AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) { // Ok, build a new cache by scanning the function, insert it and the value // handle into our map, and return the newly populated cache. auto IP = AssumptionCaches.insert(std::make_pair( - FunctionCallbackVH(&F, this), llvm::make_unique(F))); + FunctionCallbackVH(&F, this), std::make_unique(F))); assert(IP.second && "Scanning function already in the map?"); return *IP.first->second; } diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 3721c99883b..f3c30c258c1 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -233,6 +233,26 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size, return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size; } +/// Return the minimal extent from \p V to the end of the underlying object, +/// assuming the result is used in an aliasing query. E.g., we do use the query +/// location size and the fact that null pointers cannot alias here. +static uint64_t getMinimalExtentFrom(const Value &V, + const LocationSize &LocSize, + const DataLayout &DL, + bool NullIsValidLoc) { + // If we have dereferenceability information we know a lower bound for the + // extent as accesses for a lower offset would be valid. We need to exclude + // the "or null" part if null is a valid pointer. + bool CanBeNull; + uint64_t DerefBytes = V.getPointerDereferenceableBytes(DL, CanBeNull); + DerefBytes = (CanBeNull && NullIsValidLoc) ? 0 : DerefBytes; + // If queried with a precise location size, we assume that location size to be + // accessed, thus valid. + if (LocSize.isPrecise()) + DerefBytes = std::max(DerefBytes, LocSize.getValue()); + return DerefBytes; +} + /// Returns true if we can prove that the object specified by V has size Size. static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL, const TargetLibraryInfo &TLI, bool NullIsValidLoc) { @@ -481,7 +501,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V, // because it should be in sync with CaptureTracking. Not using it may // cause weird miscompilations where 2 aliasing pointers are assumed to // noalias. - if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) { + if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) { V = RP; continue; } @@ -1792,10 +1812,12 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, // If the size of one access is larger than the entire object on the other // side, then we know such behavior is undefined and can assume no alias. bool NullIsValidLocation = NullPointerIsDefined(&F); - if ((V1Size.isPrecise() && isObjectSmallerThan(O2, V1Size.getValue(), DL, TLI, - NullIsValidLocation)) || - (V2Size.isPrecise() && isObjectSmallerThan(O1, V2Size.getValue(), DL, TLI, - NullIsValidLocation))) + if ((isObjectSmallerThan( + O2, getMinimalExtentFrom(*V1, V1Size, DL, NullIsValidLocation), DL, + TLI, NullIsValidLocation)) || + (isObjectSmallerThan( + O1, getMinimalExtentFrom(*V2, V2Size, DL, NullIsValidLocation), DL, + TLI, NullIsValidLocation))) return NoAlias; // Check the cache before climbing up use-def chains. This also terminates @@ -2053,8 +2075,9 @@ bool BasicAAWrapperPass::runOnFunction(Function &F) { auto *LIWP = getAnalysisIfAvailable(); auto *PVWP = getAnalysisIfAvailable(); - Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), F, TLIWP.getTLI(), - ACT.getAssumptionCache(F), &DTWP.getDomTree(), + Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), F, + TLIWP.getTLI(F), ACT.getAssumptionCache(F), + &DTWP.getDomTree(), LIWP ? &LIWP->getLoopInfo() : nullptr, PVWP ? &PVWP->getResult() : nullptr)); @@ -2071,8 +2094,7 @@ void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) { return BasicAAResult( - F.getParent()->getDataLayout(), - F, - P.getAnalysis().getTLI(), + F.getParent()->getDataLayout(), F, + P.getAnalysis().getTLI(F), P.getAnalysis().getAssumptionCache(F)); } diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 5eb95003f5d..a06ee096d54 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -118,6 +118,13 @@ static const uint32_t ZH_NONTAKEN_WEIGHT = 12; static const uint32_t FPH_TAKEN_WEIGHT = 20; static const uint32_t FPH_NONTAKEN_WEIGHT = 12; +/// This is the probability for an ordered floating point comparison. +static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1; +/// This is the probability for an unordered floating point comparison, it means +/// one or two of the operands are NaN. Usually it is used to test for an +/// exceptional case, so the result is unlikely. +static const uint32_t FPH_UNO_WEIGHT = 1; + /// Invoke-terminating normal branch taken weight /// /// This is the weight for branching to the normal destination of an invoke @@ -778,6 +785,8 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) { if (!FCmp) return false; + uint32_t TakenWeight = FPH_TAKEN_WEIGHT; + uint32_t NontakenWeight = FPH_NONTAKEN_WEIGHT; bool isProb; if (FCmp->isEquality()) { // f1 == f2 -> Unlikely @@ -786,9 +795,13 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) { } else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) { // !isnan -> Likely isProb = true; + TakenWeight = FPH_ORD_WEIGHT; + NontakenWeight = FPH_UNO_WEIGHT; } else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) { // isnan -> Unlikely isProb = false; + TakenWeight = FPH_ORD_WEIGHT; + NontakenWeight = FPH_UNO_WEIGHT; } else { return false; } @@ -798,8 +811,7 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) { if (!isProb) std::swap(TakenIdx, NonTakenIdx); - BranchProbability TakenProb(FPH_TAKEN_WEIGHT, - FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT); + BranchProbability TakenProb(TakenWeight, TakenWeight + NontakenWeight); setEdgeProbability(BB, TakenIdx, TakenProb); setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl()); return true; @@ -1014,7 +1026,8 @@ void BranchProbabilityInfoWrapperPass::getAnalysisUsage( bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) { const LoopInfo &LI = getAnalysis().getLoopInfo(); - const TargetLibraryInfo &TLI = getAnalysis().getTLI(); + const TargetLibraryInfo &TLI = + getAnalysis().getTLI(F); BPI.calculate(F, LI, &TLI); return false; } diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp index 18b83d6838c..8215b4ecbb0 100644 --- a/lib/Analysis/CFG.cpp +++ b/lib/Analysis/CFG.cpp @@ -87,11 +87,18 @@ unsigned llvm::GetSuccessorNumber(const BasicBlock *BB, /// with multiple predecessors. bool llvm::isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges) { - assert(TI->isTerminator() && "Must be a terminator to have successors!"); assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!"); + return isCriticalEdge(TI, TI->getSuccessor(SuccNum), AllowIdenticalEdges); +} + +bool llvm::isCriticalEdge(const Instruction *TI, const BasicBlock *Dest, + bool AllowIdenticalEdges) { + assert(TI->isTerminator() && "Must be a terminator to have successors!"); if (TI->getNumSuccessors() == 1) return false; - const BasicBlock *Dest = TI->getSuccessor(SuccNum); + assert(find(predecessors(Dest), TI->getParent()) != pred_end(Dest) && + "No edge between TI's block and Dest."); + const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest); // If there is more than one predecessor, this is a critical edge... diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index 619b675b58d..4f4103fefa2 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -99,7 +99,7 @@ static void writeCFGToDotFile(Function &F, bool CFGOnly = false) { errs() << "Writing '" << Filename << "'..."; std::error_code EC; - raw_fd_ostream File(Filename, EC, sys::fs::F_Text); + raw_fd_ostream File(Filename, EC, sys::fs::OF_Text); if (!EC) WriteGraph(File, (const Function*)&F, CFGOnly); diff --git a/lib/Analysis/CFLAndersAliasAnalysis.cpp b/lib/Analysis/CFLAndersAliasAnalysis.cpp index 690e514d4f5..fd90bd1521d 100644 --- a/lib/Analysis/CFLAndersAliasAnalysis.cpp +++ b/lib/Analysis/CFLAndersAliasAnalysis.cpp @@ -88,9 +88,11 @@ using namespace llvm::cflaa; #define DEBUG_TYPE "cfl-anders-aa" -CFLAndersAAResult::CFLAndersAAResult(const TargetLibraryInfo &TLI) : TLI(TLI) {} +CFLAndersAAResult::CFLAndersAAResult( + std::function GetTLI) + : GetTLI(std::move(GetTLI)) {} CFLAndersAAResult::CFLAndersAAResult(CFLAndersAAResult &&RHS) - : AAResultBase(std::move(RHS)), TLI(RHS.TLI) {} + : AAResultBase(std::move(RHS)), GetTLI(std::move(RHS.GetTLI)) {} CFLAndersAAResult::~CFLAndersAAResult() = default; namespace { @@ -779,7 +781,7 @@ static AliasAttrMap buildAttrMap(const CFLGraph &Graph, CFLAndersAAResult::FunctionInfo CFLAndersAAResult::buildInfoFrom(const Function &Fn) { CFLGraphBuilder GraphBuilder( - *this, TLI, + *this, GetTLI(const_cast(Fn)), // Cast away the constness here due to GraphBuilder's API requirement const_cast(Fn)); auto &Graph = GraphBuilder.getCFLGraph(); @@ -898,7 +900,10 @@ AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA, AnalysisKey CFLAndersAA::Key; CFLAndersAAResult CFLAndersAA::run(Function &F, FunctionAnalysisManager &AM) { - return CFLAndersAAResult(AM.getResult(F)); + auto GetTLI = [&AM](Function &F) -> TargetLibraryInfo & { + return AM.getResult(F); + }; + return CFLAndersAAResult(GetTLI); } char CFLAndersAAWrapperPass::ID = 0; @@ -914,8 +919,10 @@ CFLAndersAAWrapperPass::CFLAndersAAWrapperPass() : ImmutablePass(ID) { } void CFLAndersAAWrapperPass::initializePass() { - auto &TLIWP = getAnalysis(); - Result.reset(new CFLAndersAAResult(TLIWP.getTLI())); + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; + Result.reset(new CFLAndersAAResult(GetTLI)); } void CFLAndersAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/lib/Analysis/CFLSteensAliasAnalysis.cpp b/lib/Analysis/CFLSteensAliasAnalysis.cpp index 44b1834f70b..b87aa406539 100644 --- a/lib/Analysis/CFLSteensAliasAnalysis.cpp +++ b/lib/Analysis/CFLSteensAliasAnalysis.cpp @@ -60,10 +60,11 @@ using namespace llvm::cflaa; #define DEBUG_TYPE "cfl-steens-aa" -CFLSteensAAResult::CFLSteensAAResult(const TargetLibraryInfo &TLI) - : AAResultBase(), TLI(TLI) {} +CFLSteensAAResult::CFLSteensAAResult( + std::function GetTLI) + : AAResultBase(), GetTLI(std::move(GetTLI)) {} CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg) - : AAResultBase(std::move(Arg)), TLI(Arg.TLI) {} + : AAResultBase(std::move(Arg)), GetTLI(std::move(Arg.GetTLI)) {} CFLSteensAAResult::~CFLSteensAAResult() = default; /// Information we have about a function and would like to keep around. @@ -181,7 +182,7 @@ CFLSteensAAResult::FunctionInfo::FunctionInfo( // Builds the graph + StratifiedSets for a function. CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) { - CFLGraphBuilder GraphBuilder(*this, TLI, *Fn); + CFLGraphBuilder GraphBuilder(*this, GetTLI(*Fn), *Fn); StratifiedSetsBuilder SetBuilder; // Add all CFLGraph nodes and all Dereference edges to StratifiedSets @@ -331,7 +332,10 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA, AnalysisKey CFLSteensAA::Key; CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) { - return CFLSteensAAResult(AM.getResult(F)); + auto GetTLI = [&AM](Function &F) -> const TargetLibraryInfo & { + return AM.getResult(F); + }; + return CFLSteensAAResult(GetTLI); } char CFLSteensAAWrapperPass::ID = 0; @@ -347,8 +351,10 @@ CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) { } void CFLSteensAAWrapperPass::initializePass() { - auto &TLIWP = getAnalysis(); - Result.reset(new CFLSteensAAResult(TLIWP.getTLI())); + auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; + Result.reset(new CFLSteensAAResult(GetTLI)); } void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp index ec5e94d499b..70aeb1a688e 100644 --- a/lib/Analysis/CallGraph.cpp +++ b/lib/Analysis/CallGraph.cpp @@ -29,7 +29,7 @@ using namespace llvm; CallGraph::CallGraph(Module &M) : M(M), ExternalCallingNode(getOrInsertFunction(nullptr)), - CallsExternalNode(llvm::make_unique(nullptr)) { + CallsExternalNode(std::make_unique(nullptr)) { // Add every function to the call graph. for (Function &F : M) addToCallGraph(&F); @@ -150,7 +150,7 @@ CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) { return CGN.get(); assert((!F || F->getParent() == &M) && "Function not in current module!"); - CGN = llvm::make_unique(const_cast(F)); + CGN = std::make_unique(const_cast(F)); return CGN.get(); } diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index adaa83a6c44..20e2f06540a 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -33,6 +33,22 @@ CaptureTracker::~CaptureTracker() {} bool CaptureTracker::shouldExplore(const Use *U) { return true; } +bool CaptureTracker::isDereferenceableOrNull(Value *O, const DataLayout &DL) { + // An inbounds GEP can either be a valid pointer (pointing into + // or to the end of an allocation), or be null in the default + // address space. So for an inbounds GEP there is no way to let + // the pointer escape using clever GEP hacking because doing so + // would make the pointer point outside of the allocated object + // and thus make the GEP result a poison value. Similarly, other + // dereferenceable pointers cannot be manipulated without producing + // poison. + if (auto *GEP = dyn_cast(O)) + if (GEP->isInBounds()) + return true; + bool CanBeNull; + return O->getPointerDereferenceableBytes(DL, CanBeNull); +} + namespace { struct SimpleCaptureTracker : public CaptureTracker { explicit SimpleCaptureTracker(bool ReturnCaptures) @@ -251,7 +267,8 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, // marked with nocapture do not capture. This means that places like // GetUnderlyingObject in ValueTracking or DecomposeGEPExpression // in BasicAA also need to know about this property. - if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call)) { + if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call, + true)) { AddUses(Call); break; } @@ -330,7 +347,9 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, AddUses(I); break; case Instruction::ICmp: { - if (auto *CPN = dyn_cast(I->getOperand(1))) { + unsigned Idx = (I->getOperand(0) == V) ? 0 : 1; + unsigned OtherIdx = 1 - Idx; + if (auto *CPN = dyn_cast(I->getOperand(OtherIdx))) { // Don't count comparisons of a no-alias return value against null as // captures. This allows us to ignore comparisons of malloc results // with null, for example. @@ -338,29 +357,18 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, if (isNoAliasCall(V->stripPointerCasts())) break; if (!I->getFunction()->nullPointerIsDefined()) { - auto *O = I->getOperand(0)->stripPointerCastsSameRepresentation(); - // An inbounds GEP can either be a valid pointer (pointing into - // or to the end of an allocation), or be null in the default - // address space. So for an inbounds GEPs there is no way to let - // the pointer escape using clever GEP hacking because doing so - // would make the pointer point outside of the allocated object - // and thus make the GEP result a poison value. - if (auto *GEP = dyn_cast(O)) - if (GEP->isInBounds()) - break; - // Comparing a dereferenceable_or_null argument against null - // cannot lead to pointer escapes, because if it is not null it - // must be a valid (in-bounds) pointer. - bool CanBeNull; - if (O->getPointerDereferenceableBytes(I->getModule()->getDataLayout(), CanBeNull)) + auto *O = I->getOperand(Idx)->stripPointerCastsSameRepresentation(); + // Comparing a dereferenceable_or_null pointer against null cannot + // lead to pointer escapes, because if it is not null it must be a + // valid (in-bounds) pointer. + if (Tracker->isDereferenceableOrNull(O, I->getModule()->getDataLayout())) break; } } // Comparison against value stored in global variable. Given the pointer // does not escape, its value cannot be guessed and stored separately in a // global variable. - unsigned OtherIndex = (I->getOperand(0) == V) ? 1 : 0; - auto *LI = dyn_cast(I->getOperand(OtherIndex)); + auto *LI = dyn_cast(I->getOperand(OtherIdx)); if (LI && isa(LI->getPointerOperand())) break; // Otherwise, be conservative. There are crazy ways to capture pointers diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 20231ca78b4..8dbcf7034fd 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -93,6 +93,9 @@ static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy, /// This always returns a non-null constant, but it may be a /// ConstantExpr if unfoldable. Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { + assert(CastInst::castIsValid(Instruction::BitCast, C, DestTy) && + "Invalid constantexpr bitcast!"); + // Catch the obvious splat cases. if (C->isNullValue() && !DestTy->isX86_MMXTy()) return Constant::getNullValue(DestTy); @@ -521,8 +524,23 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy, return nullptr; C = FoldBitCast(C, MapTy->getPointerTo(AS), DL); - if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, MapTy, DL)) - return FoldBitCast(Res, LoadTy, DL); + if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, MapTy, DL)) { + if (Res->isNullValue() && !LoadTy->isX86_MMXTy()) + // Materializing a zero can be done trivially without a bitcast + return Constant::getNullValue(LoadTy); + Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy; + Res = FoldBitCast(Res, CastTy, DL); + if (LoadTy->isPtrOrPtrVectorTy()) { + // For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr + if (Res->isNullValue() && !LoadTy->isX86_MMXTy()) + return Constant::getNullValue(LoadTy); + if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) + // Be careful not to replace a load of an addrspace value with an inttoptr here + return nullptr; + Res = ConstantExpr::getCast(Instruction::IntToPtr, Res, LoadTy); + } + return Res; + } return nullptr; } @@ -544,7 +562,7 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy, int64_t InitializerSize = DL.getTypeAllocSize(GV->getInitializer()->getType()); // If we're not accessing anything in this constant, the result is undefined. - if (Offset + BytesLoaded <= 0) + if (Offset <= -1 * static_cast(BytesLoaded)) return UndefValue::get(IntType); // If we're not accessing anything in this constant, the result is undefined. @@ -781,10 +799,10 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef Ops, } /// Strip the pointer casts, but preserve the address space information. -Constant* StripPtrCastKeepAS(Constant* Ptr, Type *&ElemTy) { +Constant *StripPtrCastKeepAS(Constant *Ptr, Type *&ElemTy) { assert(Ptr->getType()->isPointerTy() && "Not a pointer type"); auto *OldPtrTy = cast(Ptr->getType()); - Ptr = Ptr->stripPointerCasts(); + Ptr = cast(Ptr->stripPointerCasts()); auto *NewPtrTy = cast(Ptr->getType()); ElemTy = NewPtrTy->getPointerElementType(); @@ -1038,7 +1056,7 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, return ConstantExpr::getExtractElement(Ops[0], Ops[1]); case Instruction::ExtractValue: return ConstantExpr::getExtractValue( - Ops[0], dyn_cast(InstOrCE)->getIndices()); + Ops[0], cast(InstOrCE)->getIndices()); case Instruction::InsertElement: return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]); case Instruction::ShuffleVector: @@ -1464,40 +1482,50 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { if (!F->hasName()) return false; - StringRef Name = F->getName(); // In these cases, the check of the length is required. We don't want to // return true for a name like "cos\0blah" which strcmp would return equal to // "cos", but has length 8. + StringRef Name = F->getName(); switch (Name[0]) { default: return false; case 'a': - return Name == "acos" || Name == "asin" || Name == "atan" || - Name == "atan2" || Name == "acosf" || Name == "asinf" || - Name == "atanf" || Name == "atan2f"; + return Name == "acos" || Name == "acosf" || + Name == "asin" || Name == "asinf" || + Name == "atan" || Name == "atanf" || + Name == "atan2" || Name == "atan2f"; case 'c': - return Name == "ceil" || Name == "cos" || Name == "cosh" || - Name == "ceilf" || Name == "cosf" || Name == "coshf"; + return Name == "ceil" || Name == "ceilf" || + Name == "cos" || Name == "cosf" || + Name == "cosh" || Name == "coshf"; case 'e': - return Name == "exp" || Name == "exp2" || Name == "expf" || Name == "exp2f"; + return Name == "exp" || Name == "expf" || + Name == "exp2" || Name == "exp2f"; case 'f': - return Name == "fabs" || Name == "floor" || Name == "fmod" || - Name == "fabsf" || Name == "floorf" || Name == "fmodf"; + return Name == "fabs" || Name == "fabsf" || + Name == "floor" || Name == "floorf" || + Name == "fmod" || Name == "fmodf"; case 'l': - return Name == "log" || Name == "log10" || Name == "logf" || - Name == "log10f"; + return Name == "log" || Name == "logf" || + Name == "log2" || Name == "log2f" || + Name == "log10" || Name == "log10f"; + case 'n': + return Name == "nearbyint" || Name == "nearbyintf"; case 'p': return Name == "pow" || Name == "powf"; case 'r': - return Name == "round" || Name == "roundf"; + return Name == "rint" || Name == "rintf" || + Name == "round" || Name == "roundf"; case 's': - return Name == "sin" || Name == "sinh" || Name == "sqrt" || - Name == "sinf" || Name == "sinhf" || Name == "sqrtf"; + return Name == "sin" || Name == "sinf" || + Name == "sinh" || Name == "sinhf" || + Name == "sqrt" || Name == "sqrtf"; case 't': - return Name == "tan" || Name == "tanh" || Name == "tanf" || Name == "tanhf"; + return Name == "tan" || Name == "tanf" || + Name == "tanh" || Name == "tanhf" || + Name == "trunc" || Name == "truncf"; case '_': - // Check for various function names that get used for the math functions // when the header files are preprocessed with the macro // __FINITE_MATH_ONLY__ enabled. @@ -1713,40 +1741,37 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) return nullptr; - if (IntrinsicID == Intrinsic::round) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmNearestTiesToAway); - return ConstantFP::get(Ty->getContext(), V); + // Use internal versions of these intrinsics. + APFloat U = Op->getValueAPF(); + + if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint) { + U.roundToIntegral(APFloat::rmNearestTiesToEven); + return ConstantFP::get(Ty->getContext(), U); } - if (IntrinsicID == Intrinsic::floor) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmTowardNegative); - return ConstantFP::get(Ty->getContext(), V); + if (IntrinsicID == Intrinsic::round) { + U.roundToIntegral(APFloat::rmNearestTiesToAway); + return ConstantFP::get(Ty->getContext(), U); } if (IntrinsicID == Intrinsic::ceil) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmTowardPositive); - return ConstantFP::get(Ty->getContext(), V); + U.roundToIntegral(APFloat::rmTowardPositive); + return ConstantFP::get(Ty->getContext(), U); + } + + if (IntrinsicID == Intrinsic::floor) { + U.roundToIntegral(APFloat::rmTowardNegative); + return ConstantFP::get(Ty->getContext(), U); } if (IntrinsicID == Intrinsic::trunc) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmTowardZero); - return ConstantFP::get(Ty->getContext(), V); + U.roundToIntegral(APFloat::rmTowardZero); + return ConstantFP::get(Ty->getContext(), U); } - if (IntrinsicID == Intrinsic::rint) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmNearestTiesToEven); - return ConstantFP::get(Ty->getContext(), V); - } - - if (IntrinsicID == Intrinsic::nearbyint) { - APFloat V = Op->getValueAPF(); - V.roundToIntegral(APFloat::rmNearestTiesToEven); - return ConstantFP::get(Ty->getContext(), V); + if (IntrinsicID == Intrinsic::fabs) { + U.clearSign(); + return ConstantFP::get(Ty->getContext(), U); } /// We only fold functions with finite arguments. Folding NaN and inf is @@ -1763,18 +1788,19 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, switch (IntrinsicID) { default: break; - case Intrinsic::fabs: - return ConstantFoldFP(fabs, V, Ty); - case Intrinsic::log2: - return ConstantFoldFP(Log2, V, Ty); case Intrinsic::log: return ConstantFoldFP(log, V, Ty); + case Intrinsic::log2: + // TODO: What about hosts that lack a C99 library? + return ConstantFoldFP(Log2, V, Ty); case Intrinsic::log10: + // TODO: What about hosts that lack a C99 library? return ConstantFoldFP(log10, V, Ty); case Intrinsic::exp: return ConstantFoldFP(exp, V, Ty); case Intrinsic::exp2: - return ConstantFoldFP(exp2, V, Ty); + // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library. + return ConstantFoldBinaryFP(pow, 2.0, V, Ty); case Intrinsic::sin: return ConstantFoldFP(sin, V, Ty); case Intrinsic::cos: @@ -1786,104 +1812,150 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, if (!TLI) return nullptr; - char NameKeyChar = Name[0]; - if (Name[0] == '_' && Name.size() > 2 && Name[1] == '_') - NameKeyChar = Name[2]; - - switch (NameKeyChar) { - case 'a': - if ((Name == "acos" && TLI->has(LibFunc_acos)) || - (Name == "acosf" && TLI->has(LibFunc_acosf)) || - (Name == "__acos_finite" && TLI->has(LibFunc_acos_finite)) || - (Name == "__acosf_finite" && TLI->has(LibFunc_acosf_finite))) + LibFunc Func = NotLibFunc; + TLI->getLibFunc(Name, Func); + switch (Func) { + default: + break; + case LibFunc_acos: + case LibFunc_acosf: + case LibFunc_acos_finite: + case LibFunc_acosf_finite: + if (TLI->has(Func)) return ConstantFoldFP(acos, V, Ty); - else if ((Name == "asin" && TLI->has(LibFunc_asin)) || - (Name == "asinf" && TLI->has(LibFunc_asinf)) || - (Name == "__asin_finite" && TLI->has(LibFunc_asin_finite)) || - (Name == "__asinf_finite" && TLI->has(LibFunc_asinf_finite))) + break; + case LibFunc_asin: + case LibFunc_asinf: + case LibFunc_asin_finite: + case LibFunc_asinf_finite: + if (TLI->has(Func)) return ConstantFoldFP(asin, V, Ty); - else if ((Name == "atan" && TLI->has(LibFunc_atan)) || - (Name == "atanf" && TLI->has(LibFunc_atanf))) + break; + case LibFunc_atan: + case LibFunc_atanf: + if (TLI->has(Func)) return ConstantFoldFP(atan, V, Ty); break; - case 'c': - if ((Name == "ceil" && TLI->has(LibFunc_ceil)) || - (Name == "ceilf" && TLI->has(LibFunc_ceilf))) - return ConstantFoldFP(ceil, V, Ty); - else if ((Name == "cos" && TLI->has(LibFunc_cos)) || - (Name == "cosf" && TLI->has(LibFunc_cosf))) + case LibFunc_ceil: + case LibFunc_ceilf: + if (TLI->has(Func)) { + U.roundToIntegral(APFloat::rmTowardPositive); + return ConstantFP::get(Ty->getContext(), U); + } + break; + case LibFunc_cos: + case LibFunc_cosf: + if (TLI->has(Func)) return ConstantFoldFP(cos, V, Ty); - else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) || - (Name == "coshf" && TLI->has(LibFunc_coshf)) || - (Name == "__cosh_finite" && TLI->has(LibFunc_cosh_finite)) || - (Name == "__coshf_finite" && TLI->has(LibFunc_coshf_finite))) + break; + case LibFunc_cosh: + case LibFunc_coshf: + case LibFunc_cosh_finite: + case LibFunc_coshf_finite: + if (TLI->has(Func)) return ConstantFoldFP(cosh, V, Ty); break; - case 'e': - if ((Name == "exp" && TLI->has(LibFunc_exp)) || - (Name == "expf" && TLI->has(LibFunc_expf)) || - (Name == "__exp_finite" && TLI->has(LibFunc_exp_finite)) || - (Name == "__expf_finite" && TLI->has(LibFunc_expf_finite))) + case LibFunc_exp: + case LibFunc_expf: + case LibFunc_exp_finite: + case LibFunc_expf_finite: + if (TLI->has(Func)) return ConstantFoldFP(exp, V, Ty); - if ((Name == "exp2" && TLI->has(LibFunc_exp2)) || - (Name == "exp2f" && TLI->has(LibFunc_exp2f)) || - (Name == "__exp2_finite" && TLI->has(LibFunc_exp2_finite)) || - (Name == "__exp2f_finite" && TLI->has(LibFunc_exp2f_finite))) - // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a - // C99 library. + break; + case LibFunc_exp2: + case LibFunc_exp2f: + case LibFunc_exp2_finite: + case LibFunc_exp2f_finite: + if (TLI->has(Func)) + // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library. return ConstantFoldBinaryFP(pow, 2.0, V, Ty); break; - case 'f': - if ((Name == "fabs" && TLI->has(LibFunc_fabs)) || - (Name == "fabsf" && TLI->has(LibFunc_fabsf))) - return ConstantFoldFP(fabs, V, Ty); - else if ((Name == "floor" && TLI->has(LibFunc_floor)) || - (Name == "floorf" && TLI->has(LibFunc_floorf))) - return ConstantFoldFP(floor, V, Ty); + case LibFunc_fabs: + case LibFunc_fabsf: + if (TLI->has(Func)) { + U.clearSign(); + return ConstantFP::get(Ty->getContext(), U); + } break; - case 'l': - if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) || - (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)) || - (Name == "__log_finite" && V > 0 && - TLI->has(LibFunc_log_finite)) || - (Name == "__logf_finite" && V > 0 && - TLI->has(LibFunc_logf_finite))) + case LibFunc_floor: + case LibFunc_floorf: + if (TLI->has(Func)) { + U.roundToIntegral(APFloat::rmTowardNegative); + return ConstantFP::get(Ty->getContext(), U); + } + break; + case LibFunc_log: + case LibFunc_logf: + case LibFunc_log_finite: + case LibFunc_logf_finite: + if (V > 0.0 && TLI->has(Func)) return ConstantFoldFP(log, V, Ty); - else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) || - (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)) || - (Name == "__log10_finite" && V > 0 && - TLI->has(LibFunc_log10_finite)) || - (Name == "__log10f_finite" && V > 0 && - TLI->has(LibFunc_log10f_finite))) + break; + case LibFunc_log2: + case LibFunc_log2f: + case LibFunc_log2_finite: + case LibFunc_log2f_finite: + if (V > 0.0 && TLI->has(Func)) + // TODO: What about hosts that lack a C99 library? + return ConstantFoldFP(Log2, V, Ty); + break; + case LibFunc_log10: + case LibFunc_log10f: + case LibFunc_log10_finite: + case LibFunc_log10f_finite: + if (V > 0.0 && TLI->has(Func)) + // TODO: What about hosts that lack a C99 library? return ConstantFoldFP(log10, V, Ty); break; - case 'r': - if ((Name == "round" && TLI->has(LibFunc_round)) || - (Name == "roundf" && TLI->has(LibFunc_roundf))) - return ConstantFoldFP(round, V, Ty); + case LibFunc_nearbyint: + case LibFunc_nearbyintf: + case LibFunc_rint: + case LibFunc_rintf: + if (TLI->has(Func)) { + U.roundToIntegral(APFloat::rmNearestTiesToEven); + return ConstantFP::get(Ty->getContext(), U); + } break; - case 's': - if ((Name == "sin" && TLI->has(LibFunc_sin)) || - (Name == "sinf" && TLI->has(LibFunc_sinf))) + case LibFunc_round: + case LibFunc_roundf: + if (TLI->has(Func)) { + U.roundToIntegral(APFloat::rmNearestTiesToAway); + return ConstantFP::get(Ty->getContext(), U); + } + break; + case LibFunc_sin: + case LibFunc_sinf: + if (TLI->has(Func)) return ConstantFoldFP(sin, V, Ty); - else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) || - (Name == "sinhf" && TLI->has(LibFunc_sinhf)) || - (Name == "__sinh_finite" && TLI->has(LibFunc_sinh_finite)) || - (Name == "__sinhf_finite" && TLI->has(LibFunc_sinhf_finite))) + break; + case LibFunc_sinh: + case LibFunc_sinhf: + case LibFunc_sinh_finite: + case LibFunc_sinhf_finite: + if (TLI->has(Func)) return ConstantFoldFP(sinh, V, Ty); - else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) || - (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf))) + break; + case LibFunc_sqrt: + case LibFunc_sqrtf: + if (V >= 0.0 && TLI->has(Func)) return ConstantFoldFP(sqrt, V, Ty); break; - case 't': - if ((Name == "tan" && TLI->has(LibFunc_tan)) || - (Name == "tanf" && TLI->has(LibFunc_tanf))) + case LibFunc_tan: + case LibFunc_tanf: + if (TLI->has(Func)) return ConstantFoldFP(tan, V, Ty); - else if ((Name == "tanh" && TLI->has(LibFunc_tanh)) || - (Name == "tanhf" && TLI->has(LibFunc_tanhf))) + break; + case LibFunc_tanh: + case LibFunc_tanhf: + if (TLI->has(Func)) return ConstantFoldFP(tanh, V, Ty); break; - default: + case LibFunc_trunc: + case LibFunc_truncf: + if (TLI->has(Func)) { + U.roundToIntegral(APFloat::rmTowardZero); + return ConstantFP::get(Ty->getContext(), U); + } break; } return nullptr; @@ -2002,19 +2074,35 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, if (!TLI) return nullptr; - if ((Name == "pow" && TLI->has(LibFunc_pow)) || - (Name == "powf" && TLI->has(LibFunc_powf)) || - (Name == "__pow_finite" && TLI->has(LibFunc_pow_finite)) || - (Name == "__powf_finite" && TLI->has(LibFunc_powf_finite))) - return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); - if ((Name == "fmod" && TLI->has(LibFunc_fmod)) || - (Name == "fmodf" && TLI->has(LibFunc_fmodf))) - return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); - if ((Name == "atan2" && TLI->has(LibFunc_atan2)) || - (Name == "atan2f" && TLI->has(LibFunc_atan2f)) || - (Name == "__atan2_finite" && TLI->has(LibFunc_atan2_finite)) || - (Name == "__atan2f_finite" && TLI->has(LibFunc_atan2f_finite))) - return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); + + LibFunc Func = NotLibFunc; + TLI->getLibFunc(Name, Func); + switch (Func) { + default: + break; + case LibFunc_pow: + case LibFunc_powf: + case LibFunc_pow_finite: + case LibFunc_powf_finite: + if (TLI->has(Func)) + return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); + break; + case LibFunc_fmod: + case LibFunc_fmodf: + if (TLI->has(Func)) { + APFloat V = Op1->getValueAPF(); + if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF())) + return ConstantFP::get(Ty->getContext(), V); + } + break; + case LibFunc_atan2: + case LibFunc_atan2f: + case LibFunc_atan2_finite: + case LibFunc_atan2f_finite: + if (TLI->has(Func)) + return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); + break; + } } else if (auto *Op2C = dyn_cast(Operands[1])) { if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy()) return ConstantFP::get(Ty->getContext(), @@ -2041,20 +2129,27 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, switch (IntrinsicID) { default: break; + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + // X - undef -> { undef, false } + // undef - X -> { undef, false } + // X + undef -> { undef, false } + // undef + x -> { undef, false } + if (!C0 || !C1) { + return ConstantStruct::get( + cast(Ty), + {UndefValue::get(Ty->getStructElementType(0)), + Constant::getNullValue(Ty->getStructElementType(1))}); + } + LLVM_FALLTHROUGH; case Intrinsic::smul_with_overflow: - case Intrinsic::umul_with_overflow: - // Even if both operands are undef, we cannot fold muls to undef - // in the general case. For example, on i2 there are no inputs - // that would produce { i2 -1, i1 true } as the result. + case Intrinsic::umul_with_overflow: { + // undef * X -> { 0, false } + // X * undef -> { 0, false } if (!C0 || !C1) return Constant::getNullValue(Ty); - LLVM_FALLTHROUGH; - case Intrinsic::sadd_with_overflow: - case Intrinsic::uadd_with_overflow: - case Intrinsic::ssub_with_overflow: - case Intrinsic::usub_with_overflow: { - if (!C0 || !C1) - return UndefValue::get(Ty); APInt Res; bool Overflow; @@ -2194,13 +2289,9 @@ static Constant *ConstantFoldScalarCall3(StringRef Name, case Intrinsic::fma: case Intrinsic::fmuladd: { APFloat V = Op1->getValueAPF(); - APFloat::opStatus s = V.fusedMultiplyAdd(Op2->getValueAPF(), - Op3->getValueAPF(), - APFloat::rmNearestTiesToEven); - if (s != APFloat::opInvalidOp) - return ConstantFP::get(Ty->getContext(), V); - - return nullptr; + V.fusedMultiplyAdd(Op2->getValueAPF(), Op3->getValueAPF(), + APFloat::rmNearestTiesToEven); + return ConstantFP::get(Ty->getContext(), V); } } } diff --git a/lib/Analysis/DDG.cpp b/lib/Analysis/DDG.cpp new file mode 100644 index 00000000000..b5c3c761ad9 --- /dev/null +++ b/lib/Analysis/DDG.cpp @@ -0,0 +1,203 @@ +//===- DDG.cpp - Data Dependence Graph -------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The implementation for the data dependence graph. +//===----------------------------------------------------------------------===// +#include "llvm/Analysis/DDG.h" +#include "llvm/Analysis/LoopInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "ddg" + +template class llvm::DGEdge; +template class llvm::DGNode; +template class llvm::DirectedGraph; + +//===--------------------------------------------------------------------===// +// DDGNode implementation +//===--------------------------------------------------------------------===// +DDGNode::~DDGNode() {} + +bool DDGNode::collectInstructions( + llvm::function_ref const &Pred, + InstructionListType &IList) const { + assert(IList.empty() && "Expected the IList to be empty on entry."); + if (isa(this)) { + for (auto *I : cast(this)->getInstructions()) + if (Pred(I)) + IList.push_back(I); + } else + llvm_unreachable("unimplemented type of node"); + return !IList.empty(); +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode::NodeKind K) { + const char *Out; + switch (K) { + case DDGNode::NodeKind::SingleInstruction: + Out = "single-instruction"; + break; + case DDGNode::NodeKind::MultiInstruction: + Out = "multi-instruction"; + break; + case DDGNode::NodeKind::Root: + Out = "root"; + break; + case DDGNode::NodeKind::Unknown: + Out = "??"; + break; + } + OS << Out; + return OS; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode &N) { + OS << "Node Address:" << &N << ":" << N.getKind() << "\n"; + if (isa(N)) { + OS << " Instructions:\n"; + for (auto *I : cast(N).getInstructions()) + OS.indent(2) << *I << "\n"; + } else if (!isa(N)) + llvm_unreachable("unimplemented type of node"); + + OS << (N.getEdges().empty() ? " Edges:none!\n" : " Edges:\n"); + for (auto &E : N.getEdges()) + OS.indent(2) << *E; + return OS; +} + +//===--------------------------------------------------------------------===// +// SimpleDDGNode implementation +//===--------------------------------------------------------------------===// + +SimpleDDGNode::SimpleDDGNode(Instruction &I) + : DDGNode(NodeKind::SingleInstruction), InstList() { + assert(InstList.empty() && "Expected empty list."); + InstList.push_back(&I); +} + +SimpleDDGNode::SimpleDDGNode(const SimpleDDGNode &N) + : DDGNode(N), InstList(N.InstList) { + assert(((getKind() == NodeKind::SingleInstruction && InstList.size() == 1) || + (getKind() == NodeKind::MultiInstruction && InstList.size() > 1)) && + "constructing from invalid simple node."); +} + +SimpleDDGNode::SimpleDDGNode(SimpleDDGNode &&N) + : DDGNode(std::move(N)), InstList(std::move(N.InstList)) { + assert(((getKind() == NodeKind::SingleInstruction && InstList.size() == 1) || + (getKind() == NodeKind::MultiInstruction && InstList.size() > 1)) && + "constructing from invalid simple node."); +} + +SimpleDDGNode::~SimpleDDGNode() { InstList.clear(); } + +//===--------------------------------------------------------------------===// +// DDGEdge implementation +//===--------------------------------------------------------------------===// + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGEdge::EdgeKind K) { + const char *Out; + switch (K) { + case DDGEdge::EdgeKind::RegisterDefUse: + Out = "def-use"; + break; + case DDGEdge::EdgeKind::MemoryDependence: + Out = "memory"; + break; + case DDGEdge::EdgeKind::Rooted: + Out = "rooted"; + break; + case DDGEdge::EdgeKind::Unknown: + Out = "??"; + break; + } + OS << Out; + return OS; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGEdge &E) { + OS << "[" << E.getKind() << "] to " << &E.getTargetNode() << "\n"; + return OS; +} + +//===--------------------------------------------------------------------===// +// DataDependenceGraph implementation +//===--------------------------------------------------------------------===// +using BasicBlockListType = SmallVector; + +DataDependenceGraph::DataDependenceGraph(Function &F, DependenceInfo &D) + : DependenceGraphInfo(F.getName().str(), D) { + BasicBlockListType BBList; + for (auto &BB : F.getBasicBlockList()) + BBList.push_back(&BB); + DDGBuilder(*this, D, BBList).populate(); +} + +DataDependenceGraph::DataDependenceGraph(const Loop &L, DependenceInfo &D) + : DependenceGraphInfo(Twine(L.getHeader()->getParent()->getName() + "." + + L.getHeader()->getName()) + .str(), + D) { + BasicBlockListType BBList; + for (BasicBlock *BB : L.blocks()) + BBList.push_back(BB); + DDGBuilder(*this, D, BBList).populate(); +} + +DataDependenceGraph::~DataDependenceGraph() { + for (auto *N : Nodes) { + for (auto *E : *N) + delete E; + delete N; + } +} + +bool DataDependenceGraph::addNode(DDGNode &N) { + if (!DDGBase::addNode(N)) + return false; + + // In general, if the root node is already created and linked, it is not safe + // to add new nodes since they may be unreachable by the root. + // TODO: Allow adding Pi-block nodes after root is created. Pi-blocks are an + // exception because they represent components that are already reachable by + // root. + assert(!Root && "Root node is already added. No more nodes can be added."); + if (isa(N)) + Root = &N; + + return true; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DataDependenceGraph &G) { + for (auto *Node : G) + OS << *Node << "\n"; + return OS; +} + +//===--------------------------------------------------------------------===// +// DDG Analysis Passes +//===--------------------------------------------------------------------===// + +/// DDG as a loop pass. +DDGAnalysis::Result DDGAnalysis::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR) { + Function *F = L.getHeader()->getParent(); + DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI); + return std::make_unique(L, DI); +} +AnalysisKey DDGAnalysis::Key; + +PreservedAnalyses DDGAnalysisPrinterPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + OS << "'DDG' for loop '" << L.getHeader()->getName() << "':\n"; + OS << *AM.getResult(L, AR); + return PreservedAnalyses::all(); +} diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index 75f269e84f9..0038c9fb9ce 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -254,7 +254,7 @@ FullDependence::FullDependence(Instruction *Source, Instruction *Destination, LoopIndependent(PossiblyLoopIndependent) { Consistent = true; if (CommonLevels) - DV = make_unique(CommonLevels); + DV = std::make_unique(CommonLevels); } // The rest are simple getters that hide the implementation. @@ -3415,7 +3415,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { // can only analyze simple loads and stores, i.e., no calls, invokes, etc. LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n"); - return make_unique(Src, Dst); + return std::make_unique(Src, Dst); } assert(isLoadOrStore(Src) && "instruction is not load or store"); @@ -3430,7 +3430,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, case PartialAlias: // cannot analyse objects if we don't understand their aliasing. LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n"); - return make_unique(Src, Dst); + return std::make_unique(Src, Dst); case NoAlias: // If the objects noalias, they are distinct, accesses are independent. LLVM_DEBUG(dbgs() << "no alias\n"); @@ -3777,7 +3777,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, return nullptr; } - return make_unique(std::move(Result)); + return std::make_unique(std::move(Result)); } diff --git a/lib/Analysis/DependenceGraphBuilder.cpp b/lib/Analysis/DependenceGraphBuilder.cpp new file mode 100644 index 00000000000..ed1d8351b2f --- /dev/null +++ b/lib/Analysis/DependenceGraphBuilder.cpp @@ -0,0 +1,228 @@ +//===- DependenceGraphBuilder.cpp ------------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file implements common steps of the build algorithm for construction +// of dependence graphs such as DDG and PDG. +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DependenceGraphBuilder.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DDG.h" + +using namespace llvm; + +#define DEBUG_TYPE "dgb" + +STATISTIC(TotalGraphs, "Number of dependence graphs created."); +STATISTIC(TotalDefUseEdges, "Number of def-use edges created."); +STATISTIC(TotalMemoryEdges, "Number of memory dependence edges created."); +STATISTIC(TotalFineGrainedNodes, "Number of fine-grained nodes created."); +STATISTIC(TotalConfusedEdges, + "Number of confused memory dependencies between two nodes."); +STATISTIC(TotalEdgeReversals, + "Number of times the source and sink of dependence was reversed to " + "expose cycles in the graph."); + +using InstructionListType = SmallVector; + +//===--------------------------------------------------------------------===// +// AbstractDependenceGraphBuilder implementation +//===--------------------------------------------------------------------===// + +template +void AbstractDependenceGraphBuilder::createFineGrainedNodes() { + ++TotalGraphs; + assert(IMap.empty() && "Expected empty instruction map at start"); + for (BasicBlock *BB : BBList) + for (Instruction &I : *BB) { + auto &NewNode = createFineGrainedNode(I); + IMap.insert(std::make_pair(&I, &NewNode)); + ++TotalFineGrainedNodes; + } +} + +template +void AbstractDependenceGraphBuilder::createAndConnectRootNode() { + // Create a root node that connects to every connected component of the graph. + // This is done to allow graph iterators to visit all the disjoint components + // of the graph, in a single walk. + // + // This algorithm works by going through each node of the graph and for each + // node N, do a DFS starting from N. A rooted edge is established between the + // root node and N (if N is not yet visited). All the nodes reachable from N + // are marked as visited and are skipped in the DFS of subsequent nodes. + // + // Note: This algorithm tries to limit the number of edges out of the root + // node to some extent, but there may be redundant edges created depending on + // the iteration order. For example for a graph {A -> B}, an edge from the + // root node is added to both nodes if B is visited before A. While it does + // not result in minimal number of edges, this approach saves compile-time + // while keeping the number of edges in check. + auto &RootNode = createRootNode(); + df_iterator_default_set Visited; + for (auto *N : Graph) { + if (*N == RootNode) + continue; + for (auto I : depth_first_ext(N, Visited)) + if (I == N) + createRootedEdge(RootNode, *N); + } +} + +template void AbstractDependenceGraphBuilder::createDefUseEdges() { + for (NodeType *N : Graph) { + InstructionListType SrcIList; + N->collectInstructions([](const Instruction *I) { return true; }, SrcIList); + + // Use a set to mark the targets that we link to N, so we don't add + // duplicate def-use edges when more than one instruction in a target node + // use results of instructions that are contained in N. + SmallPtrSet VisitedTargets; + + for (Instruction *II : SrcIList) { + for (User *U : II->users()) { + Instruction *UI = dyn_cast(U); + if (!UI) + continue; + NodeType *DstNode = nullptr; + if (IMap.find(UI) != IMap.end()) + DstNode = IMap.find(UI)->second; + + // In the case of loops, the scope of the subgraph is all the + // basic blocks (and instructions within them) belonging to the loop. We + // simply ignore all the edges coming from (or going into) instructions + // or basic blocks outside of this range. + if (!DstNode) { + LLVM_DEBUG( + dbgs() + << "skipped def-use edge since the sink" << *UI + << " is outside the range of instructions being considered.\n"); + continue; + } + + // Self dependencies are ignored because they are redundant and + // uninteresting. + if (DstNode == N) { + LLVM_DEBUG(dbgs() + << "skipped def-use edge since the sink and the source (" + << N << ") are the same.\n"); + continue; + } + + if (VisitedTargets.insert(DstNode).second) { + createDefUseEdge(*N, *DstNode); + ++TotalDefUseEdges; + } + } + } + } +} + +template +void AbstractDependenceGraphBuilder::createMemoryDependencyEdges() { + using DGIterator = typename G::iterator; + auto isMemoryAccess = [](const Instruction *I) { + return I->mayReadOrWriteMemory(); + }; + for (DGIterator SrcIt = Graph.begin(), E = Graph.end(); SrcIt != E; ++SrcIt) { + InstructionListType SrcIList; + (*SrcIt)->collectInstructions(isMemoryAccess, SrcIList); + if (SrcIList.empty()) + continue; + + for (DGIterator DstIt = SrcIt; DstIt != E; ++DstIt) { + if (**SrcIt == **DstIt) + continue; + InstructionListType DstIList; + (*DstIt)->collectInstructions(isMemoryAccess, DstIList); + if (DstIList.empty()) + continue; + bool ForwardEdgeCreated = false; + bool BackwardEdgeCreated = false; + for (Instruction *ISrc : SrcIList) { + for (Instruction *IDst : DstIList) { + auto D = DI.depends(ISrc, IDst, true); + if (!D) + continue; + + // If we have a dependence with its left-most non-'=' direction + // being '>' we need to reverse the direction of the edge, because + // the source of the dependence cannot occur after the sink. For + // confused dependencies, we will create edges in both directions to + // represent the possibility of a cycle. + + auto createConfusedEdges = [&](NodeType &Src, NodeType &Dst) { + if (!ForwardEdgeCreated) { + createMemoryEdge(Src, Dst); + ++TotalMemoryEdges; + } + if (!BackwardEdgeCreated) { + createMemoryEdge(Dst, Src); + ++TotalMemoryEdges; + } + ForwardEdgeCreated = BackwardEdgeCreated = true; + ++TotalConfusedEdges; + }; + + auto createForwardEdge = [&](NodeType &Src, NodeType &Dst) { + if (!ForwardEdgeCreated) { + createMemoryEdge(Src, Dst); + ++TotalMemoryEdges; + } + ForwardEdgeCreated = true; + }; + + auto createBackwardEdge = [&](NodeType &Src, NodeType &Dst) { + if (!BackwardEdgeCreated) { + createMemoryEdge(Dst, Src); + ++TotalMemoryEdges; + } + BackwardEdgeCreated = true; + }; + + if (D->isConfused()) + createConfusedEdges(**SrcIt, **DstIt); + else if (D->isOrdered() && !D->isLoopIndependent()) { + bool ReversedEdge = false; + for (unsigned Level = 1; Level <= D->getLevels(); ++Level) { + if (D->getDirection(Level) == Dependence::DVEntry::EQ) + continue; + else if (D->getDirection(Level) == Dependence::DVEntry::GT) { + createBackwardEdge(**SrcIt, **DstIt); + ReversedEdge = true; + ++TotalEdgeReversals; + break; + } else if (D->getDirection(Level) == Dependence::DVEntry::LT) + break; + else { + createConfusedEdges(**SrcIt, **DstIt); + break; + } + } + if (!ReversedEdge) + createForwardEdge(**SrcIt, **DstIt); + } else + createForwardEdge(**SrcIt, **DstIt); + + // Avoid creating duplicate edges. + if (ForwardEdgeCreated && BackwardEdgeCreated) + break; + } + + // If we've created edges in both directions, there is no more + // unique edge that we can create between these two nodes, so we + // can exit early. + if (ForwardEdgeCreated && BackwardEdgeCreated) + break; + } + } + } +} + +template class llvm::AbstractDependenceGraphBuilder; +template class llvm::DependenceGraphInfo; diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp index 0ccd59ef2bf..3d1be1e1cce 100644 --- a/lib/Analysis/DivergenceAnalysis.cpp +++ b/lib/Analysis/DivergenceAnalysis.cpp @@ -412,6 +412,12 @@ bool DivergenceAnalysis::isDivergent(const Value &V) const { return DivergentValues.find(&V) != DivergentValues.end(); } +bool DivergenceAnalysis::isDivergentUse(const Use &U) const { + Value &V = *U.get(); + Instruction &I = *cast(U.getUser()); + return isDivergent(V) || isTemporalDivergent(*I.getParent(), V); +} + void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const { if (DivergentValues.empty()) return; @@ -449,6 +455,10 @@ bool GPUDivergenceAnalysis::isDivergent(const Value &val) const { return DA.isDivergent(val); } +bool GPUDivergenceAnalysis::isDivergentUse(const Use &use) const { + return DA.isDivergentUse(use); +} + void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const { OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n"; DA.print(OS, mod); diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp index 0d6c0ffb18a..efdf9706ba3 100644 --- a/lib/Analysis/GlobalsModRef.cpp +++ b/lib/Analysis/GlobalsModRef.cpp @@ -370,7 +370,8 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V, // passing into the function. if (Call->isDataOperand(&U)) { // Detect calls to free. - if (Call->isArgOperand(&U) && isFreeCall(I, &TLI)) { + if (Call->isArgOperand(&U) && + isFreeCall(I, &GetTLI(*Call->getFunction()))) { if (Writers) Writers->insert(Call->getParent()->getParent()); } else { @@ -432,7 +433,7 @@ bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) { Value *Ptr = GetUnderlyingObject(SI->getOperand(0), GV->getParent()->getDataLayout()); - if (!isAllocLikeFn(Ptr, &TLI)) + if (!isAllocLikeFn(Ptr, &GetTLI(*SI->getFunction()))) return false; // Too hard to analyze. // Analyze all uses of the allocation. If any of them are used in a @@ -576,6 +577,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { // We handle calls specially because the graph-relevant aspects are // handled above. if (auto *Call = dyn_cast(&I)) { + auto &TLI = GetTLI(*Node->getFunction()); if (isAllocationFn(Call, &TLI) || isFreeCall(Call, &TLI)) { // FIXME: It is completely unclear why this is necessary and not // handled by the above graph code. @@ -937,12 +939,13 @@ ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call, return intersectModRef(Known, AAResultBase::getModRefInfo(Call, Loc, AAQI)); } -GlobalsAAResult::GlobalsAAResult(const DataLayout &DL, - const TargetLibraryInfo &TLI) - : AAResultBase(), DL(DL), TLI(TLI) {} +GlobalsAAResult::GlobalsAAResult( + const DataLayout &DL, + std::function GetTLI) + : AAResultBase(), DL(DL), GetTLI(std::move(GetTLI)) {} GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg) - : AAResultBase(std::move(Arg)), DL(Arg.DL), TLI(Arg.TLI), + : AAResultBase(std::move(Arg)), DL(Arg.DL), GetTLI(std::move(Arg.GetTLI)), NonAddressTakenGlobals(std::move(Arg.NonAddressTakenGlobals)), IndirectGlobals(std::move(Arg.IndirectGlobals)), AllocsForIndirectGlobals(std::move(Arg.AllocsForIndirectGlobals)), @@ -957,10 +960,10 @@ GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg) GlobalsAAResult::~GlobalsAAResult() {} -/*static*/ GlobalsAAResult -GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI, - CallGraph &CG) { - GlobalsAAResult Result(M.getDataLayout(), TLI); +/*static*/ GlobalsAAResult GlobalsAAResult::analyzeModule( + Module &M, std::function GetTLI, + CallGraph &CG) { + GlobalsAAResult Result(M.getDataLayout(), GetTLI); // Discover which functions aren't recursive, to feed into AnalyzeGlobals. Result.CollectSCCMembership(CG); @@ -977,8 +980,12 @@ GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI, AnalysisKey GlobalsAA::Key; GlobalsAAResult GlobalsAA::run(Module &M, ModuleAnalysisManager &AM) { - return GlobalsAAResult::analyzeModule(M, - AM.getResult(M), + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; + return GlobalsAAResult::analyzeModule(M, GetTLI, AM.getResult(M)); } @@ -999,9 +1006,11 @@ GlobalsAAWrapperPass::GlobalsAAWrapperPass() : ModulePass(ID) { } bool GlobalsAAWrapperPass::runOnModule(Module &M) { + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; Result.reset(new GlobalsAAResult(GlobalsAAResult::analyzeModule( - M, getAnalysis().getTLI(), - getAnalysis().getCallGraph()))); + M, GetTLI, getAnalysis().getCallGraph()))); return false; } diff --git a/lib/Analysis/IVDescriptors.cpp b/lib/Analysis/IVDescriptors.cpp index ce285f82f72..6fb600114bc 100644 --- a/lib/Analysis/IVDescriptors.cpp +++ b/lib/Analysis/IVDescriptors.cpp @@ -300,7 +300,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); if (!ReduxDesc.isRecurrence()) return false; - if (isa(ReduxDesc.getPatternInst())) + // FIXME: FMF is allowed on phi, but propagation is not handled correctly. + if (isa(ReduxDesc.getPatternInst()) && !IsAPhi) FMF &= ReduxDesc.getPatternInst()->getFastMathFlags(); } diff --git a/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/lib/Analysis/IndirectCallPromotionAnalysis.cpp index 6ff840efcb6..68153de8219 100644 --- a/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -53,7 +53,7 @@ static cl::opt "call callsite")); ICallPromotionAnalysis::ICallPromotionAnalysis() { - ValueDataArray = llvm::make_unique(MaxNumPromotions); + ValueDataArray = std::make_unique(MaxNumPromotions); } bool ICallPromotionAnalysis::isPromotionProfitable(uint64_t Count, diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 0dec146e046..89811ec0e37 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -436,7 +436,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { if (auto *AllocSize = dyn_cast_or_null(Size)) { Type *Ty = I.getAllocatedType(); AllocatedSize = SaturatingMultiplyAdd( - AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize); + AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getFixedSize(), + AllocatedSize); return Base::visitAlloca(I); } } @@ -444,7 +445,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { // Accumulate the allocated size. if (I.isStaticAlloca()) { Type *Ty = I.getAllocatedType(); - AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize); + AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty).getFixedSize(), + AllocatedSize); } // We will happily inline static alloca instructions. @@ -1070,8 +1072,8 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { Value *SimpleV = nullptr; if (auto FI = dyn_cast(&I)) - SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS, - CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL); + SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, + CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL); else SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL); @@ -1453,19 +1455,6 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { // Maximum valid cost increased in this function. int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1; - // Exit early for a large switch, assuming one case needs at least one - // instruction. - // FIXME: This is not true for a bit test, but ignore such case for now to - // save compile-time. - int64_t CostLowerBound = - std::min((int64_t)CostUpperBound, - (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); - - if (CostLowerBound > Threshold && !ComputeFullInlineCost) { - addCost((int64_t)SI.getNumCases() * InlineConstants::InstrCost); - return false; - } - unsigned JumpTableSize = 0; unsigned NumCaseCluster = TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize); diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index e34bf6f4e43..cb898772170 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -56,8 +56,8 @@ static Value *simplifyFPUnOp(unsigned, Value *, const FastMathFlags &, const SimplifyQuery &, unsigned); static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); -static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &, - const SimplifyQuery &, unsigned); +static Value *SimplifyBinOp(unsigned, Value *, Value *, const FastMathFlags &, + const SimplifyQuery &, unsigned); static Value *SimplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, @@ -1371,7 +1371,8 @@ Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, /// Commuted variants are assumed to be handled by calling this function again /// with the parameters swapped. static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, - ICmpInst *UnsignedICmp, bool IsAnd) { + ICmpInst *UnsignedICmp, bool IsAnd, + const SimplifyQuery &Q) { Value *X, *Y; ICmpInst::Predicate EqPred; @@ -1380,6 +1381,59 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, return nullptr; ICmpInst::Predicate UnsignedPred; + + Value *A, *B; + // Y = (A - B); + if (match(Y, m_Sub(m_Value(A), m_Value(B)))) { + if (match(UnsignedICmp, + m_c_ICmp(UnsignedPred, m_Specific(A), m_Specific(B))) && + ICmpInst::isUnsigned(UnsignedPred)) { + if (UnsignedICmp->getOperand(0) != A) + UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred); + + // A >=/<= B || (A - B) != 0 <--> true + if ((UnsignedPred == ICmpInst::ICMP_UGE || + UnsignedPred == ICmpInst::ICMP_ULE) && + EqPred == ICmpInst::ICMP_NE && !IsAnd) + return ConstantInt::getTrue(UnsignedICmp->getType()); + // A B && (A - B) == 0 <--> false + if ((UnsignedPred == ICmpInst::ICMP_ULT || + UnsignedPred == ICmpInst::ICMP_UGT) && + EqPred == ICmpInst::ICMP_EQ && IsAnd) + return ConstantInt::getFalse(UnsignedICmp->getType()); + + // A B && (A - B) != 0 <--> A B + // A B || (A - B) != 0 <--> (A - B) != 0 + if (EqPred == ICmpInst::ICMP_NE && (UnsignedPred == ICmpInst::ICMP_ULT || + UnsignedPred == ICmpInst::ICMP_UGT)) + return IsAnd ? UnsignedICmp : ZeroICmp; + + // A <=/>= B && (A - B) == 0 <--> (A - B) == 0 + // A <=/>= B || (A - B) == 0 <--> A <=/>= B + if (EqPred == ICmpInst::ICMP_EQ && (UnsignedPred == ICmpInst::ICMP_ULE || + UnsignedPred == ICmpInst::ICMP_UGE)) + return IsAnd ? ZeroICmp : UnsignedICmp; + } + + // Given Y = (A - B) + // Y >= A && Y != 0 --> Y >= A iff B != 0 + // Y < A || Y == 0 --> Y < A iff B != 0 + if (match(UnsignedICmp, + m_c_ICmp(UnsignedPred, m_Specific(Y), m_Specific(A)))) { + if (UnsignedICmp->getOperand(0) != Y) + UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred); + + if (UnsignedPred == ICmpInst::ICMP_UGE && IsAnd && + EqPred == ICmpInst::ICMP_NE && + isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return UnsignedICmp; + if (UnsignedPred == ICmpInst::ICMP_ULT && !IsAnd && + EqPred == ICmpInst::ICMP_EQ && + isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return UnsignedICmp; + } + } + if (match(UnsignedICmp, m_ICmp(UnsignedPred, m_Value(X), m_Specific(Y))) && ICmpInst::isUnsigned(UnsignedPred)) ; @@ -1395,19 +1449,33 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE) return IsAnd ? UnsignedICmp : ZeroICmp; - // X >= Y || Y != 0 --> true + // X <= Y && Y != 0 --> X <= Y iff X != 0 + // X <= Y || Y != 0 --> Y != 0 iff X != 0 + if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE && + isKnownNonZero(X, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return IsAnd ? UnsignedICmp : ZeroICmp; + + // X >= Y && Y == 0 --> Y == 0 // X >= Y || Y == 0 --> X >= Y - if (UnsignedPred == ICmpInst::ICMP_UGE && !IsAnd) { - if (EqPred == ICmpInst::ICMP_NE) - return getTrue(UnsignedICmp->getType()); - return UnsignedICmp; - } + if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_EQ) + return IsAnd ? ZeroICmp : UnsignedICmp; + + // X > Y && Y == 0 --> Y == 0 iff X != 0 + // X > Y || Y == 0 --> X > Y iff X != 0 + if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ && + isKnownNonZero(X, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return IsAnd ? ZeroICmp : UnsignedICmp; // X < Y && Y == 0 --> false if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_EQ && IsAnd) return getFalse(UnsignedICmp->getType()); + // X >= Y || Y != 0 --> true + if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_NE && + !IsAnd) + return getTrue(UnsignedICmp->getType()); + return nullptr; } @@ -1587,10 +1655,10 @@ static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1, } static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1, - const InstrInfoQuery &IIQ) { - if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true)) + const SimplifyQuery &Q) { + if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true, Q)) return X; - if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true)) + if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true, Q)) return X; if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1)) @@ -1604,9 +1672,9 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1, if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true)) return X; - if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, IIQ)) + if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, Q.IIQ)) return X; - if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, IIQ)) + if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, Q.IIQ)) return X; return nullptr; @@ -1660,10 +1728,10 @@ static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1, } static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1, - const InstrInfoQuery &IIQ) { - if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false)) + const SimplifyQuery &Q) { + if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false, Q)) return X; - if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false)) + if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false, Q)) return X; if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1)) @@ -1677,9 +1745,9 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1, if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false)) return X; - if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, IIQ)) + if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, Q.IIQ)) return X; - if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, IIQ)) + if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, Q.IIQ)) return X; return nullptr; @@ -1738,8 +1806,8 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q, auto *ICmp0 = dyn_cast(Op0); auto *ICmp1 = dyn_cast(Op1); if (ICmp0 && ICmp1) - V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q.IIQ) - : simplifyOrOfICmps(ICmp0, ICmp1, Q.IIQ); + V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q) + : simplifyOrOfICmps(ICmp0, ICmp1, Q); auto *FCmp0 = dyn_cast(Op0); auto *FCmp1 = dyn_cast(Op1); @@ -1759,6 +1827,77 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q, return nullptr; } +/// Check that the Op1 is in expected form, i.e.: +/// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???) +/// %Op1 = extractvalue { i4, i1 } %Agg, 1 +static bool omitCheckForZeroBeforeMulWithOverflowInternal(Value *Op1, + Value *X) { + auto *Extract = dyn_cast(Op1); + // We should only be extracting the overflow bit. + if (!Extract || !Extract->getIndices().equals(1)) + return false; + Value *Agg = Extract->getAggregateOperand(); + // This should be a multiplication-with-overflow intrinsic. + if (!match(Agg, m_CombineOr(m_Intrinsic(), + m_Intrinsic()))) + return false; + // One of its multipliers should be the value we checked for zero before. + if (!match(Agg, m_CombineOr(m_Argument<0>(m_Specific(X)), + m_Argument<1>(m_Specific(X))))) + return false; + return true; +} + +/// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some +/// other form of check, e.g. one that was using division; it may have been +/// guarded against division-by-zero. We can drop that check now. +/// Look for: +/// %Op0 = icmp ne i4 %X, 0 +/// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???) +/// %Op1 = extractvalue { i4, i1 } %Agg, 1 +/// %??? = and i1 %Op0, %Op1 +/// We can just return %Op1 +static Value *omitCheckForZeroBeforeMulWithOverflow(Value *Op0, Value *Op1) { + ICmpInst::Predicate Pred; + Value *X; + if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) || + Pred != ICmpInst::Predicate::ICMP_NE) + return nullptr; + // Is Op1 in expected form? + if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X)) + return nullptr; + // Can omit 'and', and just return the overflow bit. + return Op1; +} + +/// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some +/// other form of check, e.g. one that was using division; it may have been +/// guarded against division-by-zero. We can drop that check now. +/// Look for: +/// %Op0 = icmp eq i4 %X, 0 +/// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???) +/// %Op1 = extractvalue { i4, i1 } %Agg, 1 +/// %NotOp1 = xor i1 %Op1, true +/// %or = or i1 %Op0, %NotOp1 +/// We can just return %NotOp1 +static Value *omitCheckForZeroBeforeInvertedMulWithOverflow(Value *Op0, + Value *NotOp1) { + ICmpInst::Predicate Pred; + Value *X; + if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) || + Pred != ICmpInst::Predicate::ICMP_EQ) + return nullptr; + // We expect the other hand of an 'or' to be a 'not'. + Value *Op1; + if (!match(NotOp1, m_Not(m_Value(Op1)))) + return nullptr; + // Is Op1 in expected form? + if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X)) + return nullptr; + // Can omit 'and', and just return the inverted overflow bit. + return NotOp1; +} + /// Given operands for an And, see if we can fold the result. /// If not, this returns null. static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, @@ -1813,6 +1952,14 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return Op0; } + // If we have a multiplication overflow check that is being 'and'ed with a + // check that one of the multipliers is not zero, we can omit the 'and', and + // only keep the overflow check. + if (Value *V = omitCheckForZeroBeforeMulWithOverflow(Op0, Op1)) + return V; + if (Value *V = omitCheckForZeroBeforeMulWithOverflow(Op1, Op0)) + return V; + // A & (-A) = A if A is a power of two or zero. if (match(Op0, m_Neg(m_Specific(Op1))) || match(Op1, m_Neg(m_Specific(Op0)))) { @@ -1987,6 +2134,14 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false)) return V; + // If we have a multiplication overflow check that is being 'and'ed with a + // check that one of the multipliers is not zero, we can omit the 'and', and + // only keep the overflow check. + if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op0, Op1)) + return V; + if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op1, Op0)) + return V; + // Try some generic simplifications for associative operations. if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q, MaxRecurse)) @@ -3529,6 +3684,9 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, // %sel = select i1 %cmp, i32 -2147483648, i32 %add // // We can't replace %sel with %add unless we strip away the flags. + // TODO: This is an unusual limitation because better analysis results in + // worse simplification. InstCombine can do this fold more generally + // by dropping the flags. Remove this fold to save compile-time? if (isa(B)) if (Q.IIQ.hasNoSignedWrap(B) || Q.IIQ.hasNoUnsignedWrap(B)) return nullptr; @@ -4324,14 +4482,16 @@ static Constant *propagateNaN(Constant *In) { return In; } -static Constant *simplifyFPBinop(Value *Op0, Value *Op1) { - if (isa(Op0) || isa(Op1)) - return ConstantFP::getNaN(Op0->getType()); +/// Perform folds that are common to any floating-point operation. This implies +/// transforms based on undef/NaN because the operation itself makes no +/// difference to the result. +static Constant *simplifyFPOp(ArrayRef Ops) { + if (any_of(Ops, [](Value *V) { return isa(V); })) + return ConstantFP::getNaN(Ops[0]->getType()); - if (match(Op0, m_NaN())) - return propagateNaN(cast(Op0)); - if (match(Op1, m_NaN())) - return propagateNaN(cast(Op1)); + for (Value *V : Ops) + if (match(V, m_NaN())) + return propagateNaN(cast(V)); return nullptr; } @@ -4343,7 +4503,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPBinop(Op0, Op1)) + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // fadd X, -0 ==> X @@ -4390,7 +4550,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPBinop(Op0, Op1)) + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // fsub X, +0 ==> X @@ -4430,23 +4590,27 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, return nullptr; } -/// Given the operands for an FMul, see if we can fold the result -static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const SimplifyQuery &Q, unsigned MaxRecurse) { - if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q)) - return C; - - if (Constant *C = simplifyFPBinop(Op0, Op1)) +static Value *SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q, unsigned MaxRecurse) { + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // fmul X, 1.0 ==> X if (match(Op1, m_FPOne())) return Op0; + // fmul 1.0, X ==> X + if (match(Op0, m_FPOne())) + return Op1; + // fmul nnan nsz X, 0 ==> 0 if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZeroFP())) return ConstantFP::getNullValue(Op0->getType()); + // fmul nnan nsz 0, X ==> 0 + if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZeroFP())) + return ConstantFP::getNullValue(Op1->getType()); + // sqrt(X) * sqrt(X) --> X, if we can: // 1. Remove the intermediate rounding (reassociate). // 2. Ignore non-zero negative numbers because sqrt would produce NAN. @@ -4459,6 +4623,16 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, return nullptr; } +/// Given the operands for an FMul, see if we can fold the result +static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q, unsigned MaxRecurse) { + if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q)) + return C; + + // Now apply simplifications that do not require rounding. + return SimplifyFMAFMul(Op0, Op1, FMF, Q, MaxRecurse); +} + Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q) { return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit); @@ -4475,12 +4649,17 @@ Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit); } +Value *llvm::SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFMAFMul(Op0, Op1, FMF, Q, RecursionLimit); +} + static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, unsigned) { if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPBinop(Op0, Op1)) + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // X / 1.0 -> X @@ -4525,7 +4704,7 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPBinop(Op0, Op1)) + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // Unlike fdiv, the result of frem always matches the sign of the dividend. @@ -4564,8 +4743,7 @@ static Value *simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q, /// Given the operand for a UnaryOperator, see if we can fold the result. /// If not, this returns null. -/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the -/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp. +/// Try to use FastMathFlags when folding the result. static Value *simplifyFPUnOp(unsigned Opcode, Value *Op, const FastMathFlags &FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { @@ -4581,8 +4759,8 @@ Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) { return ::simplifyUnOp(Opcode, Op, Q, RecursionLimit); } -Value *llvm::SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, - const SimplifyQuery &Q) { +Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, + const SimplifyQuery &Q) { return ::simplifyFPUnOp(Opcode, Op, FMF, Q, RecursionLimit); } @@ -4634,11 +4812,10 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, /// Given operands for a BinaryOperator, see if we can fold the result. /// If not, this returns null. -/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the -/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. -static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const FastMathFlags &FMF, const SimplifyQuery &Q, - unsigned MaxRecurse) { +/// Try to use FastMathFlags when folding the result. +static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const FastMathFlags &FMF, const SimplifyQuery &Q, + unsigned MaxRecurse) { switch (Opcode) { case Instruction::FAdd: return SimplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse); @@ -4658,9 +4835,9 @@ Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit); } -Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - FastMathFlags FMF, const SimplifyQuery &Q) { - return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit); +Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q) { + return ::SimplifyBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit); } /// Given operands for a CmpInst, see if we can fold the result. @@ -5009,6 +5186,15 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { } return nullptr; } + case Intrinsic::fma: + case Intrinsic::fmuladd: { + Value *Op0 = Call->getArgOperand(0); + Value *Op1 = Call->getArgOperand(1); + Value *Op2 = Call->getArgOperand(2); + if (Value *V = simplifyFPOp({ Op0, Op1, Op2 })) + return V; + return nullptr; + } default: return nullptr; } @@ -5221,14 +5407,16 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, /// If we have a pre-simplified value in 'SimpleV', that is forcibly used to /// replace the instruction 'I'. Otherwise, we simply add 'I' to the list of /// instructions to process and attempt to simplify it using -/// InstructionSimplify. +/// InstructionSimplify. Recursively visited users which could not be +/// simplified themselves are to the optional UnsimplifiedUsers set for +/// further processing by the caller. /// /// This routine returns 'true' only when *it* simplifies something. The passed /// in simplified value does not count toward this. -static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionCache *AC) { +static bool replaceAndRecursivelySimplifyImpl( + Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI, + const DominatorTree *DT, AssumptionCache *AC, + SmallSetVector *UnsimplifiedUsers = nullptr) { bool Simplified = false; SmallSetVector Worklist; const DataLayout &DL = I->getModule()->getDataLayout(); @@ -5258,8 +5446,11 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, // See if this instruction simplifies. SimpleV = SimplifyInstruction(I, {DL, TLI, DT, AC}); - if (!SimpleV) + if (!SimpleV) { + if (UnsimplifiedUsers) + UnsimplifiedUsers->insert(I); continue; + } Simplified = true; @@ -5285,16 +5476,17 @@ bool llvm::recursivelySimplifyInstruction(Instruction *I, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC) { - return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC); + return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC, nullptr); } -bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionCache *AC) { +bool llvm::replaceAndRecursivelySimplify( + Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI, + const DominatorTree *DT, AssumptionCache *AC, + SmallSetVector *UnsimplifiedUsers) { assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!"); assert(SimpleV && "Must provide a simplified value."); - return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC); + return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC, + UnsimplifiedUsers); } namespace llvm { @@ -5302,7 +5494,7 @@ const SimplifyQuery getBestSimplifyQuery(Pass &P, Function &F) { auto *DTWP = P.getAnalysisIfAvailable(); auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *TLIWP = P.getAnalysisIfAvailable(); - auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr; + auto *TLI = TLIWP ? &TLIWP->getTLI(F) : nullptr; auto *ACWP = P.getAnalysisIfAvailable(); auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr; return {F.getParent()->getDataLayout(), TLI, DT, AC}; diff --git a/lib/Analysis/LazyBranchProbabilityInfo.cpp b/lib/Analysis/LazyBranchProbabilityInfo.cpp index f2592c26b37..e727de468a0 100644 --- a/lib/Analysis/LazyBranchProbabilityInfo.cpp +++ b/lib/Analysis/LazyBranchProbabilityInfo.cpp @@ -55,8 +55,9 @@ void LazyBranchProbabilityInfoPass::releaseMemory() { LBPI.reset(); } bool LazyBranchProbabilityInfoPass::runOnFunction(Function &F) { LoopInfo &LI = getAnalysis().getLoopInfo(); - TargetLibraryInfo &TLI = getAnalysis().getTLI(); - LBPI = llvm::make_unique(&F, &LI, &TLI); + TargetLibraryInfo &TLI = + getAnalysis().getTLI(F); + LBPI = std::make_unique(&F, &LI, &TLI); return false; } diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp index 797fcf51642..ef31c1e0ba8 100644 --- a/lib/Analysis/LazyCallGraph.cpp +++ b/lib/Analysis/LazyCallGraph.cpp @@ -150,7 +150,8 @@ static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) { return TLI.getLibFunc(F, LF) || TLI.isFunctionVectorizable(F.getName()); } -LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) { +LazyCallGraph::LazyCallGraph( + Module &M, function_ref GetTLI) { LLVM_DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier() << "\n"); for (Function &F : M) { @@ -159,7 +160,7 @@ LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) { // If this function is a known lib function to LLVM then we want to // synthesize reference edges to it to model the fact that LLVM can turn // arbitrary code into a library function call. - if (isKnownLibFunction(F, TLI)) + if (isKnownLibFunction(F, GetTLI(F))) LibFunctions.insert(&F); if (F.hasLocalLinkage()) @@ -631,7 +632,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall( // If the merge range is empty, then adding the edge didn't actually form any // new cycles. We're done. - if (empty(MergeRange)) { + if (MergeRange.empty()) { // Now that the SCC structure is finalized, flip the kind to call. SourceN->setEdgeKind(TargetN, Edge::Call); return false; // No new cycle. @@ -1751,16 +1752,14 @@ static void printNode(raw_ostream &OS, LazyCallGraph::Node &N) { } static void printSCC(raw_ostream &OS, LazyCallGraph::SCC &C) { - ptrdiff_t Size = size(C); - OS << " SCC with " << Size << " functions:\n"; + OS << " SCC with " << C.size() << " functions:\n"; for (LazyCallGraph::Node &N : C) OS << " " << N.getFunction().getName() << "\n"; } static void printRefSCC(raw_ostream &OS, LazyCallGraph::RefSCC &C) { - ptrdiff_t Size = size(C); - OS << " RefSCC with " << Size << " call SCCs:\n"; + OS << " RefSCC with " << C.size() << " call SCCs:\n"; for (LazyCallGraph::SCC &InnerC : C) printSCC(OS, InnerC); diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 542ff709d47..96722f32e35 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -188,7 +188,7 @@ namespace { else { auto It = ValueCache.find_as(Val); if (It == ValueCache.end()) { - ValueCache[Val] = make_unique(Val, this); + ValueCache[Val] = std::make_unique(Val, this); It = ValueCache.find_as(Val); assert(It != ValueCache.end() && "Val was just added to the map!"); } @@ -434,6 +434,8 @@ namespace { ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB); bool solveBlockValueIntrinsic(ValueLatticeElement &BBLV, IntrinsicInst *II, BasicBlock *BB); + bool solveBlockValueExtractValue(ValueLatticeElement &BBLV, + ExtractValueInst *EVI, BasicBlock *BB); void intersectAssumeOrGuardBlockValueConstantRange(Value *Val, ValueLatticeElement &BBLV, Instruction *BBI); @@ -648,9 +650,7 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res, return solveBlockValueBinaryOp(Res, BO, BB); if (auto *EVI = dyn_cast(BBI)) - if (auto *WO = dyn_cast(EVI->getAggregateOperand())) - if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 0) - return solveBlockValueOverflowIntrinsic(Res, WO, BB); + return solveBlockValueExtractValue(Res, EVI, BB); if (auto *II = dyn_cast(BBI)) return solveBlockValueIntrinsic(Res, II, BB); @@ -1135,6 +1135,33 @@ bool LazyValueInfoImpl::solveBlockValueIntrinsic( } } +bool LazyValueInfoImpl::solveBlockValueExtractValue( + ValueLatticeElement &BBLV, ExtractValueInst *EVI, BasicBlock *BB) { + if (auto *WO = dyn_cast(EVI->getAggregateOperand())) + if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 0) + return solveBlockValueOverflowIntrinsic(BBLV, WO, BB); + + // Handle extractvalue of insertvalue to allow further simplification + // based on replaced with.overflow intrinsics. + if (Value *V = SimplifyExtractValueInst( + EVI->getAggregateOperand(), EVI->getIndices(), + EVI->getModule()->getDataLayout())) { + if (!hasBlockValue(V, BB)) { + if (pushBlockValue({ BB, V })) + return false; + BBLV = ValueLatticeElement::getOverdefined(); + return true; + } + BBLV = getBlockValue(V, BB); + return true; + } + + LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined (unknown extractvalue).\n"); + BBLV = ValueLatticeElement::getOverdefined(); + return true; +} + static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI, bool isTrueDest) { Value *LHS = ICI->getOperand(0); @@ -1575,7 +1602,7 @@ bool LazyValueInfoWrapperPass::runOnFunction(Function &F) { DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); Info.DT = DTWP ? &DTWP->getDomTree() : nullptr; - Info.TLI = &getAnalysis().getTLI(); + Info.TLI = &getAnalysis().getTLI(F); if (Info.PImpl) getImpl(Info.PImpl, Info.AC, &DL, Info.DT).clear(); diff --git a/lib/Analysis/LegacyDivergenceAnalysis.cpp b/lib/Analysis/LegacyDivergenceAnalysis.cpp index 52212e1c42a..7de9d2cbfdd 100644 --- a/lib/Analysis/LegacyDivergenceAnalysis.cpp +++ b/lib/Analysis/LegacyDivergenceAnalysis.cpp @@ -93,8 +93,9 @@ namespace { class DivergencePropagator { public: DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT, - PostDominatorTree &PDT, DenseSet &DV) - : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {} + PostDominatorTree &PDT, DenseSet &DV, + DenseSet &DU) + : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV), DU(DU) {} void populateWithSourcesOfDivergence(); void propagate(); @@ -118,11 +119,14 @@ private: PostDominatorTree &PDT; std::vector Worklist; // Stack for DFS. DenseSet &DV; // Stores all divergent values. + DenseSet &DU; // Stores divergent uses of possibly uniform + // values. }; void DivergencePropagator::populateWithSourcesOfDivergence() { Worklist.clear(); DV.clear(); + DU.clear(); for (auto &I : instructions(F)) { if (TTI.isSourceOfDivergence(&I)) { Worklist.push_back(&I); @@ -197,8 +201,10 @@ void DivergencePropagator::exploreSyncDependency(Instruction *TI) { // dominators of TI until it is outside the influence region. BasicBlock *InfluencedBB = ThisBB; while (InfluenceRegion.count(InfluencedBB)) { - for (auto &I : *InfluencedBB) - findUsersOutsideInfluenceRegion(I, InfluenceRegion); + for (auto &I : *InfluencedBB) { + if (!DV.count(&I)) + findUsersOutsideInfluenceRegion(I, InfluenceRegion); + } DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom(); if (IDomNode == nullptr) break; @@ -208,9 +214,10 @@ void DivergencePropagator::exploreSyncDependency(Instruction *TI) { void DivergencePropagator::findUsersOutsideInfluenceRegion( Instruction &I, const DenseSet &InfluenceRegion) { - for (User *U : I.users()) { - Instruction *UserInst = cast(U); + for (Use &Use : I.uses()) { + Instruction *UserInst = cast(Use.getUser()); if (!InfluenceRegion.count(UserInst->getParent())) { + DU.insert(&Use); if (DV.insert(UserInst).second) Worklist.push_back(UserInst); } @@ -250,9 +257,8 @@ void DivergencePropagator::computeInfluenceRegion( void DivergencePropagator::exploreDataDependency(Value *V) { // Follow def-use chains of V. for (User *U : V->users()) { - Instruction *UserInst = cast(U); - if (!TTI.isAlwaysUniform(U) && DV.insert(UserInst).second) - Worklist.push_back(UserInst); + if (!TTI.isAlwaysUniform(U) && DV.insert(U).second) + Worklist.push_back(U); } } @@ -320,6 +326,7 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) { return false; DivergentValues.clear(); + DivergentUses.clear(); gpuDA = nullptr; auto &DT = getAnalysis().getDomTree(); @@ -328,11 +335,11 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) { if (shouldUseGPUDivergenceAnalysis(F)) { // run the new GPU divergence analysis auto &LI = getAnalysis().getLoopInfo(); - gpuDA = llvm::make_unique(F, DT, PDT, LI, TTI); + gpuDA = std::make_unique(F, DT, PDT, LI, TTI); } else { // run LLVM's existing DivergenceAnalysis - DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues); + DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues, DivergentUses); DP.populateWithSourcesOfDivergence(); DP.propagate(); } @@ -351,6 +358,13 @@ bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const { return DivergentValues.count(V); } +bool LegacyDivergenceAnalysis::isDivergentUse(const Use *U) const { + if (gpuDA) { + return gpuDA->isDivergentUse(*U); + } + return DivergentValues.count(U->get()) || DivergentUses.count(U); +} + void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const { if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty()) return; diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index d28b8a189d4..db18716c64c 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -205,7 +205,7 @@ bool Lint::runOnFunction(Function &F) { AA = &getAnalysis().getAAResults(); AC = &getAnalysis().getAssumptionCache(F); DT = &getAnalysis().getDomTree(); - TLI = &getAnalysis().getTLI(); + TLI = &getAnalysis().getTLI(F); visit(F); dbgs() << MessagesStr.str(); Messages.clear(); diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 31da4e9ec78..641e92eac78 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -12,6 +12,9 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalAlias.h" @@ -24,34 +27,30 @@ using namespace llvm; -static bool isAligned(const Value *Base, const APInt &Offset, unsigned Align, - const DataLayout &DL) { - APInt BaseAlign(Offset.getBitWidth(), Base->getPointerAlignment(DL)); - - if (!BaseAlign) { - Type *Ty = Base->getType()->getPointerElementType(); - if (!Ty->isSized()) - return false; - BaseAlign = DL.getABITypeAlignment(Ty); - } - - APInt Alignment(Offset.getBitWidth(), Align); - - assert(Alignment.isPowerOf2() && "must be a power of 2!"); - return BaseAlign.uge(Alignment) && !(Offset & (Alignment-1)); +static MaybeAlign getBaseAlign(const Value *Base, const DataLayout &DL) { + if (const MaybeAlign PA = Base->getPointerAlignment(DL)) + return *PA; + Type *const Ty = Base->getType()->getPointerElementType(); + if (!Ty->isSized()) + return None; + return Align(DL.getABITypeAlignment(Ty)); } -static bool isAligned(const Value *Base, unsigned Align, const DataLayout &DL) { - Type *Ty = Base->getType(); - assert(Ty->isSized() && "must be sized"); - APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0); - return isAligned(Base, Offset, Align, DL); +static bool isAligned(const Value *Base, const APInt &Offset, Align Alignment, + const DataLayout &DL) { + if (MaybeAlign BA = getBaseAlign(Base, DL)) { + const APInt APBaseAlign(Offset.getBitWidth(), BA->value()); + const APInt APAlign(Offset.getBitWidth(), Alignment.value()); + assert(APAlign.isPowerOf2() && "must be a power of 2!"); + return APBaseAlign.uge(APAlign) && !(Offset & (APAlign - 1)); + } + return false; } /// Test if V is always a pointer to allocated and suitably aligned memory for /// a simple load or store. static bool isDereferenceableAndAlignedPointer( - const Value *V, unsigned Align, const APInt &Size, const DataLayout &DL, + const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT, SmallPtrSetImpl &Visited) { // Already visited? Bail out, we've likely hit unreachable code. @@ -63,17 +62,22 @@ static bool isDereferenceableAndAlignedPointer( // bitcast instructions are no-ops as far as dereferenceability is concerned. if (const BitCastOperator *BC = dyn_cast(V)) - return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, Size, - DL, CtxI, DT, Visited); + return isDereferenceableAndAlignedPointer(BC->getOperand(0), Alignment, + Size, DL, CtxI, DT, Visited); bool CheckForNonNull = false; APInt KnownDerefBytes(Size.getBitWidth(), V->getPointerDereferenceableBytes(DL, CheckForNonNull)); - if (KnownDerefBytes.getBoolValue()) { - if (KnownDerefBytes.uge(Size)) - if (!CheckForNonNull || isKnownNonZero(V, DL, 0, nullptr, CtxI, DT)) - return isAligned(V, Align, DL); - } + if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(Size)) + if (!CheckForNonNull || isKnownNonZero(V, DL, 0, nullptr, CtxI, DT)) { + // As we recursed through GEPs to get here, we've incrementally checked + // that each step advanced by a multiple of the alignment. If our base is + // properly aligned, then the original offset accessed must also be. + Type *Ty = V->getType(); + assert(Ty->isSized() && "must be sized"); + APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0); + return isAligned(V, Offset, Alignment, DL); + } // For GEPs, determine if the indexing lands within the allocated object. if (const GEPOperator *GEP = dyn_cast(V)) { @@ -81,7 +85,8 @@ static bool isDereferenceableAndAlignedPointer( APInt Offset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); if (!GEP->accumulateConstantOffset(DL, Offset) || Offset.isNegative() || - !Offset.urem(APInt(Offset.getBitWidth(), Align)).isMinValue()) + !Offset.urem(APInt(Offset.getBitWidth(), Alignment.value())) + .isMinValue()) return false; // If the base pointer is dereferenceable for Offset+Size bytes, then the @@ -93,67 +98,69 @@ static bool isDereferenceableAndAlignedPointer( // Offset and Size may have different bit widths if we have visited an // addrspacecast, so we can't do arithmetic directly on the APInt values. return isDereferenceableAndAlignedPointer( - Base, Align, Offset + Size.sextOrTrunc(Offset.getBitWidth()), - DL, CtxI, DT, Visited); + Base, Alignment, Offset + Size.sextOrTrunc(Offset.getBitWidth()), DL, + CtxI, DT, Visited); } // For gc.relocate, look through relocations if (const GCRelocateInst *RelocateInst = dyn_cast(V)) return isDereferenceableAndAlignedPointer( - RelocateInst->getDerivedPtr(), Align, Size, DL, CtxI, DT, Visited); + RelocateInst->getDerivedPtr(), Alignment, Size, DL, CtxI, DT, Visited); if (const AddrSpaceCastInst *ASC = dyn_cast(V)) - return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, Size, - DL, CtxI, DT, Visited); + return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Alignment, + Size, DL, CtxI, DT, Visited); if (const auto *Call = dyn_cast(V)) - if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) - return isDereferenceableAndAlignedPointer(RP, Align, Size, DL, CtxI, DT, - Visited); + if (auto *RP = getArgumentAliasingToReturnedPointer(Call, true)) + return isDereferenceableAndAlignedPointer(RP, Alignment, Size, DL, CtxI, + DT, Visited); // If we don't know, assume the worst. return false; } -bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, +bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { + // Note: At the moment, Size can be zero. This ends up being interpreted as + // a query of whether [Base, V] is dereferenceable and V is aligned (since + // that's what the implementation happened to do). It's unclear if this is + // the desired semantic, but at least SelectionDAG does exercise this case. + SmallPtrSet Visited; - return ::isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT, + return ::isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT, Visited); } bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, - unsigned Align, + MaybeAlign MA, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { + if (!Ty->isSized()) + return false; + // When dereferenceability information is provided by a dereferenceable // attribute, we know exactly how many bytes are dereferenceable. If we can // determine the exact offset to the attributed variable, we can use that // information here. // Require ABI alignment for loads without alignment specification - if (Align == 0) - Align = DL.getABITypeAlignment(Ty); - - if (!Ty->isSized()) - return false; - - SmallPtrSet Visited; - return ::isDereferenceableAndAlignedPointer( - V, Align, - APInt(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)), - DL, CtxI, DT, Visited); + const Align Alignment = DL.getValueOrABITypeAlignment(MA, Ty); + APInt AccessSize(DL.getIndexTypeSizeInBits(V->getType()), + DL.getTypeStoreSize(Ty)); + return isDereferenceableAndAlignedPointer(V, Alignment, AccessSize, DL, CtxI, + DT); } bool llvm::isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { - return isDereferenceableAndAlignedPointer(V, Ty, 1, DL, CtxI, DT); + return isDereferenceableAndAlignedPointer(V, Ty, Align::None(), DL, CtxI, DT); } /// Test if A and B will obviously have the same value. @@ -187,6 +194,60 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { return false; } +bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, + ScalarEvolution &SE, + DominatorTree &DT) { + auto &DL = LI->getModule()->getDataLayout(); + Value *Ptr = LI->getPointerOperand(); + + APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()), + DL.getTypeStoreSize(LI->getType())); + const Align Alignment = DL.getValueOrABITypeAlignment( + MaybeAlign(LI->getAlignment()), LI->getType()); + + Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI(); + + // If given a uniform (i.e. non-varying) address, see if we can prove the + // access is safe within the loop w/o needing predication. + if (L->isLoopInvariant(Ptr)) + return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL, + HeaderFirstNonPHI, &DT); + + // Otherwise, check to see if we have a repeating access pattern where we can + // prove that all accesses are well aligned and dereferenceable. + auto *AddRec = dyn_cast(SE.getSCEV(Ptr)); + if (!AddRec || AddRec->getLoop() != L || !AddRec->isAffine()) + return false; + auto* Step = dyn_cast(AddRec->getStepRecurrence(SE)); + if (!Step) + return false; + // TODO: generalize to access patterns which have gaps + if (Step->getAPInt() != EltSize) + return false; + + // TODO: If the symbolic trip count has a small bound (max count), we might + // be able to prove safety. + auto TC = SE.getSmallConstantTripCount(L); + if (!TC) + return false; + + const APInt AccessSize = TC * EltSize; + + auto *StartS = dyn_cast(AddRec->getStart()); + if (!StartS) + return false; + assert(SE.isLoopInvariant(StartS, L) && "implied by addrec definition"); + Value *Base = StartS->getValue(); + + // For the moment, restrict ourselves to the case where the access size is a + // multiple of the requested alignment and the base is aligned. + // TODO: generalize if a case found which warrants + if (EltSize.urem(Alignment.value()) != 0) + return false; + return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL, + HeaderFirstNonPHI, &DT); +} + /// Check if executing a load of this pointer value cannot trap. /// /// If DT and ScanFrom are specified this method performs context-sensitive @@ -198,65 +259,26 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { /// /// This uses the pointee type to determine how many bytes need to be safe to /// load from the pointer. -bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, +bool llvm::isSafeToLoadUnconditionally(Value *V, MaybeAlign MA, APInt &Size, const DataLayout &DL, Instruction *ScanFrom, const DominatorTree *DT) { // Zero alignment means that the load has the ABI alignment for the target - if (Align == 0) - Align = DL.getABITypeAlignment(V->getType()->getPointerElementType()); - assert(isPowerOf2_32(Align)); + const Align Alignment = + DL.getValueOrABITypeAlignment(MA, V->getType()->getPointerElementType()); // If DT is not specified we can't make context-sensitive query const Instruction* CtxI = DT ? ScanFrom : nullptr; - if (isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT)) + if (isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT)) return true; - int64_t ByteOffset = 0; - Value *Base = V; - Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL); - - if (ByteOffset < 0) // out of bounds - return false; - - Type *BaseType = nullptr; - unsigned BaseAlign = 0; - if (const AllocaInst *AI = dyn_cast(Base)) { - // An alloca is safe to load from as load as it is suitably aligned. - BaseType = AI->getAllocatedType(); - BaseAlign = AI->getAlignment(); - } else if (const GlobalVariable *GV = dyn_cast(Base)) { - // Global variables are not necessarily safe to load from if they are - // interposed arbitrarily. Their size may change or they may be weak and - // require a test to determine if they were in fact provided. - if (!GV->isInterposable()) { - BaseType = GV->getType()->getElementType(); - BaseAlign = GV->getAlignment(); - } - } - - PointerType *AddrTy = cast(V->getType()); - uint64_t LoadSize = DL.getTypeStoreSize(AddrTy->getElementType()); - - // If we found a base allocated type from either an alloca or global variable, - // try to see if we are definitively within the allocated region. We need to - // know the size of the base type and the loaded type to do anything in this - // case. - if (BaseType && BaseType->isSized()) { - if (BaseAlign == 0) - BaseAlign = DL.getPrefTypeAlignment(BaseType); - - if (Align <= BaseAlign) { - // Check if the load is within the bounds of the underlying object. - if (ByteOffset + LoadSize <= DL.getTypeAllocSize(BaseType) && - ((ByteOffset % Align) == 0)) - return true; - } - } - if (!ScanFrom) return false; + if (Size.getBitWidth() > 64) + return false; + const uint64_t LoadSize = Size.getZExtValue(); + // Otherwise, be a little bit aggressive by scanning the local block where we // want to check to see if the pointer is already being loaded or stored // from/to. If so, the previous load or store would have already trapped, @@ -279,7 +301,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, return false; Value *AccessedPtr; - unsigned AccessedAlign; + MaybeAlign MaybeAccessedAlign; if (LoadInst *LI = dyn_cast(BBI)) { // Ignore volatile loads. The execution of a volatile load cannot // be used to prove an address is backed by regular memory; it can, @@ -287,24 +309,26 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, if (LI->isVolatile()) continue; AccessedPtr = LI->getPointerOperand(); - AccessedAlign = LI->getAlignment(); + MaybeAccessedAlign = MaybeAlign(LI->getAlignment()); } else if (StoreInst *SI = dyn_cast(BBI)) { // Ignore volatile stores (see comment for loads). if (SI->isVolatile()) continue; AccessedPtr = SI->getPointerOperand(); - AccessedAlign = SI->getAlignment(); + MaybeAccessedAlign = MaybeAlign(SI->getAlignment()); } else continue; Type *AccessedTy = AccessedPtr->getType()->getPointerElementType(); - if (AccessedAlign == 0) - AccessedAlign = DL.getABITypeAlignment(AccessedTy); - if (AccessedAlign < Align) + + const Align AccessedAlign = + DL.getValueOrABITypeAlignment(MaybeAccessedAlign, AccessedTy); + if (AccessedAlign < Alignment) continue; // Handle trivial cases. - if (AccessedPtr == V) + if (AccessedPtr == V && + LoadSize <= DL.getTypeStoreSize(AccessedTy)) return true; if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) && @@ -314,12 +338,12 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, return false; } -bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align, +bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, MaybeAlign Alignment, const DataLayout &DL, Instruction *ScanFrom, const DominatorTree *DT) { APInt Size(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)); - return isSafeToLoadUnconditionally(V, Align, Size, DL, ScanFrom, DT); + return isSafeToLoadUnconditionally(V, Alignment, Size, DL, ScanFrom, DT); } /// DefMaxInstsToScan - the default number of maximum instructions diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index 36bd9a8b7ea..3d8f77675f3 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -1189,18 +1189,31 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, unsigned IdxWidth = DL.getIndexSizeInBits(ASA); Type *Ty = cast(PtrA->getType())->getElementType(); - APInt Size(IdxWidth, DL.getTypeStoreSize(Ty)); APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0); PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); + // Retrieve the address space again as pointer stripping now tracks through + // `addrspacecast`. + ASA = cast(PtrA->getType())->getAddressSpace(); + ASB = cast(PtrB->getType())->getAddressSpace(); + // Check that the address spaces match and that the pointers are valid. + if (ASA != ASB) + return false; + + IdxWidth = DL.getIndexSizeInBits(ASA); + OffsetA = OffsetA.sextOrTrunc(IdxWidth); + OffsetB = OffsetB.sextOrTrunc(IdxWidth); + + APInt Size(IdxWidth, DL.getTypeStoreSize(Ty)); + // OffsetDelta = OffsetB - OffsetA; const SCEV *OffsetSCEVA = SE.getConstant(OffsetA); const SCEV *OffsetSCEVB = SE.getConstant(OffsetB); const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA); - const SCEVConstant *OffsetDeltaC = dyn_cast(OffsetDeltaSCEV); - const APInt &OffsetDelta = OffsetDeltaC->getAPInt(); + const APInt &OffsetDelta = cast(OffsetDeltaSCEV)->getAPInt(); + // Check if they are based on the same pointer. That makes the offsets // sufficient. if (PtrA == PtrB) @@ -1641,13 +1654,21 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, // Check every access pair. while (AI != AE) { Visited.insert(*AI); - EquivalenceClasses::member_iterator OI = std::next(AI); + bool AIIsWrite = AI->getInt(); + // Check loads only against next equivalent class, but stores also against + // other stores in the same equivalence class - to the same address. + EquivalenceClasses::member_iterator OI = + (AIIsWrite ? AI : std::next(AI)); while (OI != AE) { // Check every accessing instruction pair in program order. for (std::vector::iterator I1 = Accesses[*AI].begin(), I1E = Accesses[*AI].end(); I1 != I1E; ++I1) - for (std::vector::iterator I2 = Accesses[*OI].begin(), - I2E = Accesses[*OI].end(); I2 != I2E; ++I2) { + // Scan all accesses of another equivalence class, but only the next + // accesses of the same equivalent class. + for (std::vector::iterator + I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()), + I2E = (OI == AI ? I1E : Accesses[*OI].end()); + I2 != I2E; ++I2) { auto A = std::make_pair(&*AI, *I1); auto B = std::make_pair(&*OI, *I2); @@ -2078,7 +2099,7 @@ OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName, DL = I->getDebugLoc(); } - Report = make_unique(DEBUG_TYPE, RemarkName, DL, + Report = std::make_unique(DEBUG_TYPE, RemarkName, DL, CodeRegion); return *Report; } @@ -2323,9 +2344,9 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI) - : PSE(llvm::make_unique(*SE, *L)), - PtrRtChecking(llvm::make_unique(SE)), - DepChecker(llvm::make_unique(*PSE, L)), TheLoop(L), + : PSE(std::make_unique(*SE, *L)), + PtrRtChecking(std::make_unique(SE)), + DepChecker(std::make_unique(*PSE, L)), TheLoop(L), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false), HasConvergentOp(false), HasDependenceInvolvingLoopInvariantAddress(false) { @@ -2380,7 +2401,7 @@ const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) { auto &LAI = LoopAccessInfoMap[L]; if (!LAI) - LAI = llvm::make_unique(L, SE, TLI, AA, DT, LI); + LAI = std::make_unique(L, SE, TLI, AA, DT, LI); return *LAI.get(); } @@ -2399,7 +2420,7 @@ void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const { bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) { SE = &getAnalysis().getSE(); auto *TLIP = getAnalysisIfAvailable(); - TLI = TLIP ? &TLIP->getTLI() : nullptr; + TLI = TLIP ? &TLIP->getTLI(F) : nullptr; AA = &getAnalysis().getAAResults(); DT = &getAnalysis().getDomTree(); LI = &getAnalysis().getLoopInfo(); diff --git a/lib/Analysis/LoopAnalysisManager.cpp b/lib/Analysis/LoopAnalysisManager.cpp index a10a87ce113..02d40fb8d72 100644 --- a/lib/Analysis/LoopAnalysisManager.cpp +++ b/lib/Analysis/LoopAnalysisManager.cpp @@ -46,7 +46,7 @@ bool LoopAnalysisManagerFunctionProxy::Result::invalidate( // invalidation logic below to act on that. auto PAC = PA.getChecker(); bool invalidateMemorySSAAnalysis = false; - if (EnableMSSALoopDependency) + if (MSSAUsed) invalidateMemorySSAAnalysis = Inv.invalidate(F, PA); if (!(PAC.preserved() || PAC.preservedSet>()) || Inv.invalidate(F, PA) || diff --git a/lib/Analysis/LoopCacheAnalysis.cpp b/lib/Analysis/LoopCacheAnalysis.cpp new file mode 100644 index 00000000000..10d2fe07884 --- /dev/null +++ b/lib/Analysis/LoopCacheAnalysis.cpp @@ -0,0 +1,625 @@ +//===- LoopCacheAnalysis.cpp - Loop Cache Analysis -------------------------==// +// +// The LLVM Compiler Infrastructure +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the implementation for the loop cache analysis. +/// The implementation is largely based on the following paper: +/// +/// Compiler Optimizations for Improving Data Locality +/// By: Steve Carr, Katherine S. McKinley, Chau-Wen Tseng +/// http://www.cs.utexas.edu/users/mckinley/papers/asplos-1994.pdf +/// +/// The general approach taken to estimate the number of cache lines used by the +/// memory references in an inner loop is: +/// 1. Partition memory references that exhibit temporal or spacial reuse +/// into reference groups. +/// 2. For each loop L in the a loop nest LN: +/// a. Compute the cost of the reference group +/// b. Compute the loop cost by summing up the reference groups costs +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopCacheAnalysis.h" +#include "llvm/ADT/BreadthFirstIterator.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "loop-cache-cost" + +static cl::opt DefaultTripCount( + "default-trip-count", cl::init(100), cl::Hidden, + cl::desc("Use this to specify the default trip count of a loop")); + +// In this analysis two array references are considered to exhibit temporal +// reuse if they access either the same memory location, or a memory location +// with distance smaller than a configurable threshold. +static cl::opt TemporalReuseThreshold( + "temporal-reuse-threshold", cl::init(2), cl::Hidden, + cl::desc("Use this to specify the max. distance between array elements " + "accessed in a loop so that the elements are classified to have " + "temporal reuse")); + +/// Retrieve the innermost loop in the given loop nest \p Loops. It returns a +/// nullptr if any loops in the loop vector supplied has more than one sibling. +/// The loop vector is expected to contain loops collected in breadth-first +/// order. +static Loop *getInnerMostLoop(const LoopVectorTy &Loops) { + assert(!Loops.empty() && "Expecting a non-empy loop vector"); + + Loop *LastLoop = Loops.back(); + Loop *ParentLoop = LastLoop->getParentLoop(); + + if (ParentLoop == nullptr) { + assert(Loops.size() == 1 && "Expecting a single loop"); + return LastLoop; + } + + return (std::is_sorted(Loops.begin(), Loops.end(), + [](const Loop *L1, const Loop *L2) { + return L1->getLoopDepth() < L2->getLoopDepth(); + })) + ? LastLoop + : nullptr; +} + +static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize, + const Loop &L, ScalarEvolution &SE) { + const SCEVAddRecExpr *AR = dyn_cast(&AccessFn); + if (!AR || !AR->isAffine()) + return false; + + assert(AR->getLoop() && "AR should have a loop"); + + // Check that start and increment are not add recurrences. + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(SE); + if (isa(Start) || isa(Step)) + return false; + + // Check that start and increment are both invariant in the loop. + if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L)) + return false; + + return AR->getStepRecurrence(SE) == &ElemSize; +} + +/// Compute the trip count for the given loop \p L. Return the SCEV expression +/// for the trip count or nullptr if it cannot be computed. +static const SCEV *computeTripCount(const Loop &L, ScalarEvolution &SE) { + const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L); + if (isa(BackedgeTakenCount) || + !isa(BackedgeTakenCount)) + return nullptr; + + return SE.getAddExpr(BackedgeTakenCount, + SE.getOne(BackedgeTakenCount->getType())); +} + +//===----------------------------------------------------------------------===// +// IndexedReference implementation +// +raw_ostream &llvm::operator<<(raw_ostream &OS, const IndexedReference &R) { + if (!R.IsValid) { + OS << R.StoreOrLoadInst; + OS << ", IsValid=false."; + return OS; + } + + OS << *R.BasePointer; + for (const SCEV *Subscript : R.Subscripts) + OS << "[" << *Subscript << "]"; + + OS << ", Sizes: "; + for (const SCEV *Size : R.Sizes) + OS << "[" << *Size << "]"; + + return OS; +} + +IndexedReference::IndexedReference(Instruction &StoreOrLoadInst, + const LoopInfo &LI, ScalarEvolution &SE) + : StoreOrLoadInst(StoreOrLoadInst), SE(SE) { + assert((isa(StoreOrLoadInst) || isa(StoreOrLoadInst)) && + "Expecting a load or store instruction"); + + IsValid = delinearize(LI); + if (IsValid) + LLVM_DEBUG(dbgs().indent(2) << "Succesfully delinearized: " << *this + << "\n"); +} + +Optional IndexedReference::hasSpacialReuse(const IndexedReference &Other, + unsigned CLS, + AliasAnalysis &AA) const { + assert(IsValid && "Expecting a valid reference"); + + if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) { + LLVM_DEBUG(dbgs().indent(2) + << "No spacial reuse: different base pointers\n"); + return false; + } + + unsigned NumSubscripts = getNumSubscripts(); + if (NumSubscripts != Other.getNumSubscripts()) { + LLVM_DEBUG(dbgs().indent(2) + << "No spacial reuse: different number of subscripts\n"); + return false; + } + + // all subscripts must be equal, except the leftmost one (the last one). + for (auto SubNum : seq(0, NumSubscripts - 1)) { + if (getSubscript(SubNum) != Other.getSubscript(SubNum)) { + LLVM_DEBUG(dbgs().indent(2) << "No spacial reuse, different subscripts: " + << "\n\t" << *getSubscript(SubNum) << "\n\t" + << *Other.getSubscript(SubNum) << "\n"); + return false; + } + } + + // the difference between the last subscripts must be less than the cache line + // size. + const SCEV *LastSubscript = getLastSubscript(); + const SCEV *OtherLastSubscript = Other.getLastSubscript(); + const SCEVConstant *Diff = dyn_cast( + SE.getMinusSCEV(LastSubscript, OtherLastSubscript)); + + if (Diff == nullptr) { + LLVM_DEBUG(dbgs().indent(2) + << "No spacial reuse, difference between subscript:\n\t" + << *LastSubscript << "\n\t" << OtherLastSubscript + << "\nis not constant.\n"); + return None; + } + + bool InSameCacheLine = (Diff->getValue()->getSExtValue() < CLS); + + LLVM_DEBUG({ + if (InSameCacheLine) + dbgs().indent(2) << "Found spacial reuse.\n"; + else + dbgs().indent(2) << "No spacial reuse.\n"; + }); + + return InSameCacheLine; +} + +Optional IndexedReference::hasTemporalReuse(const IndexedReference &Other, + unsigned MaxDistance, + const Loop &L, + DependenceInfo &DI, + AliasAnalysis &AA) const { + assert(IsValid && "Expecting a valid reference"); + + if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) { + LLVM_DEBUG(dbgs().indent(2) + << "No temporal reuse: different base pointer\n"); + return false; + } + + std::unique_ptr D = + DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst, true); + + if (D == nullptr) { + LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n"); + return false; + } + + if (D->isLoopIndependent()) { + LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n"); + return true; + } + + // Check the dependence distance at every loop level. There is temporal reuse + // if the distance at the given loop's depth is small (|d| <= MaxDistance) and + // it is zero at every other loop level. + int LoopDepth = L.getLoopDepth(); + int Levels = D->getLevels(); + for (int Level = 1; Level <= Levels; ++Level) { + const SCEV *Distance = D->getDistance(Level); + const SCEVConstant *SCEVConst = dyn_cast_or_null(Distance); + + if (SCEVConst == nullptr) { + LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n"); + return None; + } + + const ConstantInt &CI = *SCEVConst->getValue(); + if (Level != LoopDepth && !CI.isZero()) { + LLVM_DEBUG(dbgs().indent(2) + << "No temporal reuse: distance is not zero at depth=" << Level + << "\n"); + return false; + } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) { + LLVM_DEBUG( + dbgs().indent(2) + << "No temporal reuse: distance is greater than MaxDistance at depth=" + << Level << "\n"); + return false; + } + } + + LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n"); + return true; +} + +CacheCostTy IndexedReference::computeRefCost(const Loop &L, + unsigned CLS) const { + assert(IsValid && "Expecting a valid reference"); + LLVM_DEBUG({ + dbgs().indent(2) << "Computing cache cost for:\n"; + dbgs().indent(4) << *this << "\n"; + }); + + // If the indexed reference is loop invariant the cost is one. + if (isLoopInvariant(L)) { + LLVM_DEBUG(dbgs().indent(4) << "Reference is loop invariant: RefCost=1\n"); + return 1; + } + + const SCEV *TripCount = computeTripCount(L, SE); + if (!TripCount) { + LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName() + << " could not be computed, using DefaultTripCount\n"); + const SCEV *ElemSize = Sizes.back(); + TripCount = SE.getConstant(ElemSize->getType(), DefaultTripCount); + } + LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n"); + + // If the indexed reference is 'consecutive' the cost is + // (TripCount*Stride)/CLS, otherwise the cost is TripCount. + const SCEV *RefCost = TripCount; + + if (isConsecutive(L, CLS)) { + const SCEV *Coeff = getLastCoefficient(); + const SCEV *ElemSize = Sizes.back(); + const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); + const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS); + const SCEV *Numerator = SE.getMulExpr(Stride, TripCount); + RefCost = SE.getUDivExpr(Numerator, CacheLineSize); + LLVM_DEBUG(dbgs().indent(4) + << "Access is consecutive: RefCost=(TripCount*Stride)/CLS=" + << *RefCost << "\n"); + } else + LLVM_DEBUG(dbgs().indent(4) + << "Access is not consecutive: RefCost=TripCount=" << *RefCost + << "\n"); + + // Attempt to fold RefCost into a constant. + if (auto ConstantCost = dyn_cast(RefCost)) + return ConstantCost->getValue()->getSExtValue(); + + LLVM_DEBUG(dbgs().indent(4) + << "RefCost is not a constant! Setting to RefCost=InvalidCost " + "(invalid value).\n"); + + return CacheCost::InvalidCost; +} + +bool IndexedReference::delinearize(const LoopInfo &LI) { + assert(Subscripts.empty() && "Subscripts should be empty"); + assert(Sizes.empty() && "Sizes should be empty"); + assert(!IsValid && "Should be called once from the constructor"); + LLVM_DEBUG(dbgs() << "Delinearizing: " << StoreOrLoadInst << "\n"); + + const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst); + const BasicBlock *BB = StoreOrLoadInst.getParent(); + + for (Loop *L = LI.getLoopFor(BB); L != nullptr; L = L->getParentLoop()) { + const SCEV *AccessFn = + SE.getSCEVAtScope(getPointerOperand(&StoreOrLoadInst), L); + + BasePointer = dyn_cast(SE.getPointerBase(AccessFn)); + if (BasePointer == nullptr) { + LLVM_DEBUG( + dbgs().indent(2) + << "ERROR: failed to delinearize, can't identify base pointer\n"); + return false; + } + + AccessFn = SE.getMinusSCEV(AccessFn, BasePointer); + + LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName() + << "', AccessFn: " << *AccessFn << "\n"); + + SE.delinearize(AccessFn, Subscripts, Sizes, + SE.getElementSize(&StoreOrLoadInst)); + + if (Subscripts.empty() || Sizes.empty() || + Subscripts.size() != Sizes.size()) { + // Attempt to determine whether we have a single dimensional array access. + // before giving up. + if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) { + LLVM_DEBUG(dbgs().indent(2) + << "ERROR: failed to delinearize reference\n"); + Subscripts.clear(); + Sizes.clear(); + break; + } + + const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize); + Subscripts.push_back(Div); + Sizes.push_back(ElemSize); + } + + return all_of(Subscripts, [&](const SCEV *Subscript) { + return isSimpleAddRecurrence(*Subscript, *L); + }); + } + + return false; +} + +bool IndexedReference::isLoopInvariant(const Loop &L) const { + Value *Addr = getPointerOperand(&StoreOrLoadInst); + assert(Addr != nullptr && "Expecting either a load or a store instruction"); + assert(SE.isSCEVable(Addr->getType()) && "Addr should be SCEVable"); + + if (SE.isLoopInvariant(SE.getSCEV(Addr), &L)) + return true; + + // The indexed reference is loop invariant if none of the coefficients use + // the loop induction variable. + bool allCoeffForLoopAreZero = all_of(Subscripts, [&](const SCEV *Subscript) { + return isCoeffForLoopZeroOrInvariant(*Subscript, L); + }); + + return allCoeffForLoopAreZero; +} + +bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const { + // The indexed reference is 'consecutive' if the only coefficient that uses + // the loop induction variable is the last one... + const SCEV *LastSubscript = Subscripts.back(); + for (const SCEV *Subscript : Subscripts) { + if (Subscript == LastSubscript) + continue; + if (!isCoeffForLoopZeroOrInvariant(*Subscript, L)) + return false; + } + + // ...and the access stride is less than the cache line size. + const SCEV *Coeff = getLastCoefficient(); + const SCEV *ElemSize = Sizes.back(); + const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); + const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS); + + return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize); +} + +const SCEV *IndexedReference::getLastCoefficient() const { + const SCEV *LastSubscript = getLastSubscript(); + assert(isa(LastSubscript) && + "Expecting a SCEV add recurrence expression"); + const SCEVAddRecExpr *AR = dyn_cast(LastSubscript); + return AR->getStepRecurrence(SE); +} + +bool IndexedReference::isCoeffForLoopZeroOrInvariant(const SCEV &Subscript, + const Loop &L) const { + const SCEVAddRecExpr *AR = dyn_cast(&Subscript); + return (AR != nullptr) ? AR->getLoop() != &L + : SE.isLoopInvariant(&Subscript, &L); +} + +bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript, + const Loop &L) const { + if (!isa(Subscript)) + return false; + + const SCEVAddRecExpr *AR = cast(&Subscript); + assert(AR->getLoop() && "AR should have a loop"); + + if (!AR->isAffine()) + return false; + + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(SE); + + if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L)) + return false; + + return true; +} + +bool IndexedReference::isAliased(const IndexedReference &Other, + AliasAnalysis &AA) const { + const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst); + const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst); + return AA.isMustAlias(Loc1, Loc2); +} + +//===----------------------------------------------------------------------===// +// CacheCost implementation +// +raw_ostream &llvm::operator<<(raw_ostream &OS, const CacheCost &CC) { + for (const auto &LC : CC.LoopCosts) { + const Loop *L = LC.first; + OS << "Loop '" << L->getName() << "' has cost = " << LC.second << "\n"; + } + return OS; +} + +CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, + ScalarEvolution &SE, TargetTransformInfo &TTI, + AliasAnalysis &AA, DependenceInfo &DI, + Optional TRT) + : Loops(Loops), TripCounts(), LoopCosts(), + TRT(TRT == None ? Optional(TemporalReuseThreshold) : TRT), + LI(LI), SE(SE), TTI(TTI), AA(AA), DI(DI) { + assert(!Loops.empty() && "Expecting a non-empty loop vector."); + + for (const Loop *L : Loops) { + unsigned TripCount = SE.getSmallConstantTripCount(L); + TripCount = (TripCount == 0) ? DefaultTripCount : TripCount; + TripCounts.push_back({L, TripCount}); + } + + calculateCacheFootprint(); +} + +std::unique_ptr +CacheCost::getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, + DependenceInfo &DI, Optional TRT) { + if (Root.getParentLoop()) { + LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n"); + return nullptr; + } + + LoopVectorTy Loops; + for (Loop *L : breadth_first(&Root)) + Loops.push_back(L); + + if (!getInnerMostLoop(Loops)) { + LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more " + "than one innermost loop\n"); + return nullptr; + } + + return std::make_unique(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT); +} + +void CacheCost::calculateCacheFootprint() { + LLVM_DEBUG(dbgs() << "POPULATING REFERENCE GROUPS\n"); + ReferenceGroupsTy RefGroups; + if (!populateReferenceGroups(RefGroups)) + return; + + LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n"); + for (const Loop *L : Loops) { + assert((std::find_if(LoopCosts.begin(), LoopCosts.end(), + [L](const LoopCacheCostTy &LCC) { + return LCC.first == L; + }) == LoopCosts.end()) && + "Should not add duplicate element"); + CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups); + LoopCosts.push_back(std::make_pair(L, LoopCost)); + } + + sortLoopCosts(); + RefGroups.clear(); +} + +bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const { + assert(RefGroups.empty() && "Reference groups should be empty"); + + unsigned CLS = TTI.getCacheLineSize(); + Loop *InnerMostLoop = getInnerMostLoop(Loops); + assert(InnerMostLoop != nullptr && "Expecting a valid innermost loop"); + + for (BasicBlock *BB : InnerMostLoop->getBlocks()) { + for (Instruction &I : *BB) { + if (!isa(I) && !isa(I)) + continue; + + std::unique_ptr R(new IndexedReference(I, LI, SE)); + if (!R->isValid()) + continue; + + bool Added = false; + for (ReferenceGroupTy &RefGroup : RefGroups) { + const IndexedReference &Representative = *RefGroup.front().get(); + LLVM_DEBUG({ + dbgs() << "References:\n"; + dbgs().indent(2) << *R << "\n"; + dbgs().indent(2) << Representative << "\n"; + }); + + Optional HasTemporalReuse = + R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA); + Optional HasSpacialReuse = + R->hasSpacialReuse(Representative, CLS, AA); + + if ((HasTemporalReuse.hasValue() && *HasTemporalReuse) || + (HasSpacialReuse.hasValue() && *HasSpacialReuse)) { + RefGroup.push_back(std::move(R)); + Added = true; + break; + } + } + + if (!Added) { + ReferenceGroupTy RG; + RG.push_back(std::move(R)); + RefGroups.push_back(std::move(RG)); + } + } + } + + if (RefGroups.empty()) + return false; + + LLVM_DEBUG({ + dbgs() << "\nIDENTIFIED REFERENCE GROUPS:\n"; + int n = 1; + for (const ReferenceGroupTy &RG : RefGroups) { + dbgs().indent(2) << "RefGroup " << n << ":\n"; + for (const auto &IR : RG) + dbgs().indent(4) << *IR << "\n"; + n++; + } + dbgs() << "\n"; + }); + + return true; +} + +CacheCostTy +CacheCost::computeLoopCacheCost(const Loop &L, + const ReferenceGroupsTy &RefGroups) const { + if (!L.isLoopSimplifyForm()) + return InvalidCost; + + LLVM_DEBUG(dbgs() << "Considering loop '" << L.getName() + << "' as innermost loop.\n"); + + // Compute the product of the trip counts of each other loop in the nest. + CacheCostTy TripCountsProduct = 1; + for (const auto &TC : TripCounts) { + if (TC.first == &L) + continue; + TripCountsProduct *= TC.second; + } + + CacheCostTy LoopCost = 0; + for (const ReferenceGroupTy &RG : RefGroups) { + CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L); + LoopCost += RefGroupCost * TripCountsProduct; + } + + LLVM_DEBUG(dbgs().indent(2) << "Loop '" << L.getName() + << "' has cost=" << LoopCost << "\n"); + + return LoopCost; +} + +CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG, + const Loop &L) const { + assert(!RG.empty() && "Reference group should have at least one member."); + + const IndexedReference *Representative = RG.front().get(); + return Representative->computeRefCost(L, TTI.getCacheLineSize()); +} + +//===----------------------------------------------------------------------===// +// LoopCachePrinterPass implementation +// +PreservedAnalyses LoopCachePrinterPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + Function *F = L.getHeader()->getParent(); + DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI); + + if (auto CC = CacheCost::getCacheCost(L, AR, DI)) + OS << *CC; + + return PreservedAnalyses::all(); +} diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index aa5da085980..dbab5db7dbc 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -359,6 +359,45 @@ bool Loop::isAuxiliaryInductionVariable(PHINode &AuxIndVar, return SE.isLoopInvariant(IndDesc.getStep(), this); } +BranchInst *Loop::getLoopGuardBranch() const { + if (!isLoopSimplifyForm()) + return nullptr; + + BasicBlock *Preheader = getLoopPreheader(); + BasicBlock *Latch = getLoopLatch(); + assert(Preheader && Latch && + "Expecting a loop with valid preheader and latch"); + + // Loop should be in rotate form. + if (!isLoopExiting(Latch)) + return nullptr; + + // Disallow loops with more than one unique exit block, as we do not verify + // that GuardOtherSucc post dominates all exit blocks. + BasicBlock *ExitFromLatch = getUniqueExitBlock(); + if (!ExitFromLatch) + return nullptr; + + BasicBlock *ExitFromLatchSucc = ExitFromLatch->getUniqueSuccessor(); + if (!ExitFromLatchSucc) + return nullptr; + + BasicBlock *GuardBB = Preheader->getUniquePredecessor(); + if (!GuardBB) + return nullptr; + + assert(GuardBB->getTerminator() && "Expecting valid guard terminator"); + + BranchInst *GuardBI = dyn_cast(GuardBB->getTerminator()); + if (!GuardBI || GuardBI->isUnconditional()) + return nullptr; + + BasicBlock *GuardOtherSucc = (GuardBI->getSuccessor(0) == Preheader) + ? GuardBI->getSuccessor(1) + : GuardBI->getSuccessor(0); + return (GuardOtherSucc == ExitFromLatchSucc) ? GuardBI : nullptr; +} + bool Loop::isCanonical(ScalarEvolution &SE) const { InductionDescriptor IndDesc; if (!getInductionDescriptor(SE, IndDesc)) diff --git a/lib/Analysis/LoopUnrollAnalyzer.cpp b/lib/Analysis/LoopUnrollAnalyzer.cpp index 1728b5e9f6d..762623de41e 100644 --- a/lib/Analysis/LoopUnrollAnalyzer.cpp +++ b/lib/Analysis/LoopUnrollAnalyzer.cpp @@ -78,7 +78,7 @@ bool UnrolledInstAnalyzer::visitBinaryOperator(BinaryOperator &I) { const DataLayout &DL = I.getModule()->getDataLayout(); if (auto FI = dyn_cast(&I)) SimpleV = - SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL); + SimplifyBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL); else SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL); diff --git a/lib/Analysis/MemDerefPrinter.cpp b/lib/Analysis/MemDerefPrinter.cpp index 77ebf89d9a0..5cf516a538b 100644 --- a/lib/Analysis/MemDerefPrinter.cpp +++ b/lib/Analysis/MemDerefPrinter.cpp @@ -55,8 +55,8 @@ bool MemDerefPrinter::runOnFunction(Function &F) { Value *PO = LI->getPointerOperand(); if (isDereferenceablePointer(PO, LI->getType(), DL)) Deref.push_back(PO); - if (isDereferenceableAndAlignedPointer(PO, LI->getType(), - LI->getAlignment(), DL)) + if (isDereferenceableAndAlignedPointer( + PO, LI->getType(), MaybeAlign(LI->getAlignment()), DL)) DerefAndAligned.insert(PO); } } diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 729dad46365..172c86eb464 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -180,6 +180,19 @@ static Optional getAllocationData(const Value *V, AllocType AllocTy, return None; } +static Optional +getAllocationData(const Value *V, AllocType AllocTy, + function_ref GetTLI, + bool LookThroughBitCast = false) { + bool IsNoBuiltinCall; + if (const Function *Callee = + getCalledFunction(V, LookThroughBitCast, IsNoBuiltinCall)) + if (!IsNoBuiltinCall) + return getAllocationDataForFunction( + Callee, AllocTy, &GetTLI(const_cast(*Callee))); + return None; +} + static Optional getAllocationSize(const Value *V, const TargetLibraryInfo *TLI) { bool IsNoBuiltinCall; @@ -223,6 +236,11 @@ bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast) { return getAllocationData(V, AnyAlloc, TLI, LookThroughBitCast).hasValue(); } +bool llvm::isAllocationFn( + const Value *V, function_ref GetTLI, + bool LookThroughBitCast) { + return getAllocationData(V, AnyAlloc, GetTLI, LookThroughBitCast).hasValue(); +} /// Tests if a value is a call or invoke to a function that returns a /// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions). @@ -240,6 +258,12 @@ bool llvm::isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast) { return getAllocationData(V, MallocLike, TLI, LookThroughBitCast).hasValue(); } +bool llvm::isMallocLikeFn( + const Value *V, function_ref GetTLI, + bool LookThroughBitCast) { + return getAllocationData(V, MallocLike, GetTLI, LookThroughBitCast) + .hasValue(); +} /// Tests if a value is a call or invoke to a library function that /// allocates zero-filled memory (such as calloc). @@ -276,12 +300,27 @@ bool llvm::isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI) { return getAllocationDataForFunction(F, ReallocLike, TLI).hasValue(); } +/// Tests if a value is a call or invoke to a library function that +/// allocates memory and throws if an allocation failed (e.g., new). +bool llvm::isOpNewLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, OpNewLike, TLI, LookThroughBitCast).hasValue(); +} + +/// Tests if a value is a call or invoke to a library function that +/// allocates memory (strdup, strndup). +bool llvm::isStrdupLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, StrDupLike, TLI, LookThroughBitCast).hasValue(); +} + /// extractMallocCall - Returns the corresponding CallInst if the instruction /// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we /// ignore InvokeInst here. -const CallInst *llvm::extractMallocCall(const Value *I, - const TargetLibraryInfo *TLI) { - return isMallocLikeFn(I, TLI) ? dyn_cast(I) : nullptr; +const CallInst *llvm::extractMallocCall( + const Value *I, + function_ref GetTLI) { + return isMallocLikeFn(I, GetTLI) ? dyn_cast(I) : nullptr; } static Value *computeArraySize(const CallInst *CI, const DataLayout &DL, @@ -521,9 +560,9 @@ STATISTIC(ObjectVisitorArgument, STATISTIC(ObjectVisitorLoad, "Number of load instructions with unsolved size and offset"); -APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) { - if (Options.RoundToAlign && Align) - return APInt(IntTyBits, alignTo(Size.getZExtValue(), Align)); +APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Alignment) { + if (Options.RoundToAlign && Alignment) + return APInt(IntTyBits, alignTo(Size.getZExtValue(), Align(Alignment))); return Size; } diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index b25b655165d..884587e020b 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -183,7 +183,7 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, MemDepResult MemoryDependenceResults::getCallDependencyFrom( CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt, BasicBlock *BB) { - unsigned Limit = BlockScanLimit; + unsigned Limit = getDefaultBlockScanLimit(); // Walk backwards through the block, looking for dependencies. while (ScanIt != BB->begin()) { @@ -356,7 +356,7 @@ MemDepResult MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB) { - if (!LI->getMetadata(LLVMContext::MD_invariant_group)) + if (!LI->hasMetadata(LLVMContext::MD_invariant_group)) return MemDepResult::getUnknown(); // Take the ptr operand after all casts and geps 0. This way we can search @@ -417,7 +417,7 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, // same pointer operand) we can assume that value pointed by pointer // operand didn't change. if ((isa(U) || isa(U)) && - U->getMetadata(LLVMContext::MD_invariant_group) != nullptr) + U->hasMetadata(LLVMContext::MD_invariant_group)) ClosestDependency = GetClosestDependency(ClosestDependency, U); } } @@ -443,7 +443,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( OrderedBasicBlock *OBB) { bool isInvariantLoad = false; - unsigned DefaultLimit = BlockScanLimit; + unsigned DefaultLimit = getDefaultBlockScanLimit(); if (!Limit) Limit = &DefaultLimit; @@ -481,7 +481,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // Arguably, this logic should be pushed inside AliasAnalysis itself. if (isLoad && QueryInst) { LoadInst *LI = dyn_cast(QueryInst); - if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr) + if (LI && LI->hasMetadata(LLVMContext::MD_invariant_load)) isInvariantLoad = true; } @@ -1746,6 +1746,9 @@ void MemoryDependenceResults::verifyRemoved(Instruction *D) const { AnalysisKey MemoryDependenceAnalysis::Key; +MemoryDependenceAnalysis::MemoryDependenceAnalysis() + : DefaultBlockScanLimit(BlockScanLimit) {} + MemoryDependenceResults MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) { auto &AA = AM.getResult(F); @@ -1753,7 +1756,7 @@ MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) { auto &TLI = AM.getResult(F); auto &DT = AM.getResult(F); auto &PV = AM.getResult(F); - return MemoryDependenceResults(AA, AC, TLI, DT, PV); + return MemoryDependenceResults(AA, AC, TLI, DT, PV, DefaultBlockScanLimit); } char MemoryDependenceWrapperPass::ID = 0; @@ -1807,15 +1810,15 @@ bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &P } unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const { - return BlockScanLimit; + return DefaultBlockScanLimit; } bool MemoryDependenceWrapperPass::runOnFunction(Function &F) { auto &AA = getAnalysis().getAAResults(); auto &AC = getAnalysis().getAssumptionCache(F); - auto &TLI = getAnalysis().getTLI(); + auto &TLI = getAnalysis().getTLI(F); auto &DT = getAnalysis().getDomTree(); auto &PV = getAnalysis().getResult(); - MemDep.emplace(AA, AC, TLI, DT, PV); + MemDep.emplace(AA, AC, TLI, DT, PV, BlockScanLimit); return false; } diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp index 17f5d9b9f0a..cfb8b7e7dcb 100644 --- a/lib/Analysis/MemorySSA.cpp +++ b/lib/Analysis/MemorySSA.cpp @@ -49,6 +49,7 @@ #include "llvm/Support/raw_ostream.h" #include #include +#include #include #include #include @@ -83,7 +84,7 @@ bool llvm::VerifyMemorySSA = false; #endif /// Enables memory ssa as a dependency for loop passes in legacy pass manager. cl::opt llvm::EnableMSSALoopDependency( - "enable-mssa-loop-dependency", cl::Hidden, cl::init(false), + "enable-mssa-loop-dependency", cl::Hidden, cl::init(true), cl::desc("Enable MemorySSA dependency for loop pass manager")); static cl::opt @@ -284,6 +285,11 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc, case Intrinsic::invariant_end: case Intrinsic::assume: return {false, NoAlias}; + case Intrinsic::dbg_addr: + case Intrinsic::dbg_declare: + case Intrinsic::dbg_label: + case Intrinsic::dbg_value: + llvm_unreachable("debuginfo shouldn't have associated defs!"); default: break; } @@ -369,7 +375,7 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA, const Instruction *I) { // If the memory can't be changed, then loads of the memory can't be // clobbered. - return isa(I) && (I->getMetadata(LLVMContext::MD_invariant_load) || + return isa(I) && (I->hasMetadata(LLVMContext::MD_invariant_load) || AA.pointsToConstantMemory(MemoryLocation( cast(I)->getPointerOperand()))); } @@ -867,6 +873,7 @@ template class ClobberWalker { if (!DefChainEnd) for (auto *MA : def_chain(const_cast(Target))) DefChainEnd = MA; + assert(DefChainEnd && "Failed to find dominating phi/liveOnEntry"); // If any of the terminated paths don't dominate the phi we'll try to // optimize, we need to figure out what they are and quit. @@ -1087,9 +1094,14 @@ void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal, AccessList *Accesses = It->second.get(); auto *Phi = cast(&Accesses->front()); if (RenameAllUses) { - int PhiIndex = Phi->getBasicBlockIndex(BB); - assert(PhiIndex != -1 && "Incomplete phi during partial rename"); - Phi->setIncomingValue(PhiIndex, IncomingVal); + bool ReplacementDone = false; + for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) + if (Phi->getIncomingBlock(I) == BB) { + Phi->setIncomingValue(I, IncomingVal); + ReplacementDone = true; + } + (void) ReplacementDone; + assert(ReplacementDone && "Incomplete phi during partial rename"); } else Phi->addIncoming(IncomingVal, BB); } @@ -1237,7 +1249,7 @@ MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); if (Res.second) - Res.first->second = llvm::make_unique(); + Res.first->second = std::make_unique(); return Res.first->second.get(); } @@ -1245,7 +1257,7 @@ MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) { auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr)); if (Res.second) - Res.first->second = llvm::make_unique(); + Res.first->second = std::make_unique(); return Res.first->second.get(); } @@ -1554,10 +1566,10 @@ MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() { if (!WalkerBase) WalkerBase = - llvm::make_unique>(this, AA, DT); + std::make_unique>(this, AA, DT); Walker = - llvm::make_unique>(this, WalkerBase.get()); + std::make_unique>(this, WalkerBase.get()); return Walker.get(); } @@ -1567,10 +1579,10 @@ MemorySSAWalker *MemorySSA::getSkipSelfWalker() { if (!WalkerBase) WalkerBase = - llvm::make_unique>(this, AA, DT); + std::make_unique>(this, AA, DT); SkipWalker = - llvm::make_unique>(this, WalkerBase.get()); + std::make_unique>(this, WalkerBase.get()); return SkipWalker.get(); } @@ -1687,13 +1699,15 @@ MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) { MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, MemoryAccess *Definition, - const MemoryUseOrDef *Template) { + const MemoryUseOrDef *Template, + bool CreationMustSucceed) { assert(!isa(I) && "Cannot create a defined access for a PHI"); MemoryUseOrDef *NewAccess = createNewAccess(I, AA, Template); - assert( - NewAccess != nullptr && - "Tried to create a memory access for a non-memory touching instruction"); - NewAccess->setDefiningAccess(Definition); + if (CreationMustSucceed) + assert(NewAccess != nullptr && "Tried to create a memory access for a " + "non-memory touching instruction"); + if (NewAccess) + NewAccess->setDefiningAccess(Definition); return NewAccess; } @@ -1717,13 +1731,21 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I, AliasAnalysisType *AAP, const MemoryUseOrDef *Template) { // The assume intrinsic has a control dependency which we model by claiming - // that it writes arbitrarily. Ignore that fake memory dependency here. + // that it writes arbitrarily. Debuginfo intrinsics may be considered + // clobbers when we have a nonstandard AA pipeline. Ignore these fake memory + // dependencies here. // FIXME: Replace this special casing with a more accurate modelling of // assume's control dependency. if (IntrinsicInst *II = dyn_cast(I)) if (II->getIntrinsicID() == Intrinsic::assume) return nullptr; + // Using a nonstandard AA pipelines might leave us with unexpected modref + // results for I, so add a check to not model instructions that may not read + // from or write to memory. This is necessary for correctness. + if (!I->mayReadFromMemory() && !I->mayWriteToMemory()) + return nullptr; + bool Def, Use; if (Template) { Def = dyn_cast_or_null(Template) != nullptr; @@ -1850,6 +1872,7 @@ void MemorySSA::verifyMemorySSA() const { verifyDomination(F); verifyOrdering(F); verifyDominationNumbers(F); + verifyPrevDefInPhis(F); // Previously, the verification used to also verify that the clobberingAccess // cached by MemorySSA is the same as the clobberingAccess found at a later // query to AA. This does not hold true in general due to the current fragility @@ -1862,6 +1885,40 @@ void MemorySSA::verifyMemorySSA() const { // example, see test4 added in D51960. } +void MemorySSA::verifyPrevDefInPhis(Function &F) const { +#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS) + for (const BasicBlock &BB : F) { + if (MemoryPhi *Phi = getMemoryAccess(&BB)) { + for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { + auto *Pred = Phi->getIncomingBlock(I); + auto *IncAcc = Phi->getIncomingValue(I); + // If Pred has no unreachable predecessors, get last def looking at + // IDoms. If, while walkings IDoms, any of these has an unreachable + // predecessor, then the incoming def can be any access. + if (auto *DTNode = DT->getNode(Pred)) { + while (DTNode) { + if (auto *DefList = getBlockDefs(DTNode->getBlock())) { + auto *LastAcc = &*(--DefList->end()); + assert(LastAcc == IncAcc && + "Incorrect incoming access into phi."); + break; + } + DTNode = DTNode->getIDom(); + } + } else { + // If Pred has unreachable predecessors, but has at least a Def, the + // incoming access can be the last Def in Pred, or it could have been + // optimized to LoE. After an update, though, the LoE may have been + // replaced by another access, so IncAcc may be any access. + // If Pred has unreachable predecessors and no Defs, incoming access + // should be LoE; However, after an update, it may be any access. + } + } + } + } +#endif +} + /// Verify that all of the blocks we believe to have valid domination numbers /// actually have valid domination numbers. void MemorySSA::verifyDominationNumbers(const Function &F) const { @@ -2005,7 +2062,7 @@ void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const { /// accesses and verifying that, for each use, it appears in the /// appropriate def's use list void MemorySSA::verifyDefUses(Function &F) const { -#ifndef NDEBUG +#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS) for (BasicBlock &B : F) { // Phi nodes are attached to basic blocks if (MemoryPhi *Phi = getMemoryAccess(&B)) { @@ -2212,7 +2269,7 @@ MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult(F); auto &AA = AM.getResult(F); - return MemorySSAAnalysis::Result(llvm::make_unique(F, &AA, &DT)); + return MemorySSAAnalysis::Result(std::make_unique(F, &AA, &DT)); } bool MemorySSAAnalysis::Result::invalidate( diff --git a/lib/Analysis/MemorySSAUpdater.cpp b/lib/Analysis/MemorySSAUpdater.cpp index 4c1feee7fd9..f2d56b05d96 100644 --- a/lib/Analysis/MemorySSAUpdater.cpp +++ b/lib/Analysis/MemorySSAUpdater.cpp @@ -44,11 +44,15 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive( // First, do a cache lookup. Without this cache, certain CFG structures // (like a series of if statements) take exponential time to visit. auto Cached = CachedPreviousDef.find(BB); - if (Cached != CachedPreviousDef.end()) { + if (Cached != CachedPreviousDef.end()) return Cached->second; - } - if (BasicBlock *Pred = BB->getSinglePredecessor()) { + // If this method is called from an unreachable block, return LoE. + if (!MSSA->DT->isReachableFromEntry(BB)) + return MSSA->getLiveOnEntryDef(); + + if (BasicBlock *Pred = BB->getUniquePredecessor()) { + VisitedBlocks.insert(BB); // Single predecessor case, just recurse, we can only have one definition. MemoryAccess *Result = getPreviousDefFromEnd(Pred, CachedPreviousDef); CachedPreviousDef.insert({BB, Result}); @@ -71,11 +75,19 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive( // Recurse to get the values in our predecessors for placement of a // potential phi node. This will insert phi nodes if we cycle in order to // break the cycle and have an operand. - for (auto *Pred : predecessors(BB)) - if (MSSA->DT->isReachableFromEntry(Pred)) - PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef)); - else + bool UniqueIncomingAccess = true; + MemoryAccess *SingleAccess = nullptr; + for (auto *Pred : predecessors(BB)) { + if (MSSA->DT->isReachableFromEntry(Pred)) { + auto *IncomingAccess = getPreviousDefFromEnd(Pred, CachedPreviousDef); + if (!SingleAccess) + SingleAccess = IncomingAccess; + else if (IncomingAccess != SingleAccess) + UniqueIncomingAccess = false; + PhiOps.push_back(IncomingAccess); + } else PhiOps.push_back(MSSA->getLiveOnEntryDef()); + } // Now try to simplify the ops to avoid placing a phi. // This may return null if we never created a phi yet, that's okay @@ -84,7 +96,15 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive( // See if we can avoid the phi by simplifying it. auto *Result = tryRemoveTrivialPhi(Phi, PhiOps); // If we couldn't simplify, we may have to create a phi - if (Result == Phi) { + if (Result == Phi && UniqueIncomingAccess && SingleAccess) { + // A concrete Phi only exists if we created an empty one to break a cycle. + if (Phi) { + assert(Phi->operands().empty() && "Expected empty Phi"); + Phi->replaceAllUsesWith(SingleAccess); + removeMemoryAccess(Phi); + } + Result = SingleAccess; + } else if (Result == Phi && !(UniqueIncomingAccess && SingleAccess)) { if (!Phi) Phi = MSSA->createMemoryPhi(BB); @@ -173,12 +193,9 @@ MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) { TrackingVH Res(Phi); SmallVector, 8> Uses; std::copy(Phi->user_begin(), Phi->user_end(), std::back_inserter(Uses)); - for (auto &U : Uses) { - if (MemoryPhi *UsePhi = dyn_cast(&*U)) { - auto OperRange = UsePhi->operands(); - tryRemoveTrivialPhi(UsePhi, OperRange); - } - } + for (auto &U : Uses) + if (MemoryPhi *UsePhi = dyn_cast(&*U)) + tryRemoveTrivialPhi(UsePhi); return Res; } @@ -187,6 +204,11 @@ MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) { // argument. // IE phi(a, a) or b = phi(a, b) or c = phi(a, a, c) // We recursively try to remove them. +MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi) { + assert(Phi && "Can only remove concrete Phi."); + auto OperRange = Phi->operands(); + return tryRemoveTrivialPhi(Phi, OperRange); +} template MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands) { @@ -218,17 +240,49 @@ MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi, return recursePhi(Same); } -void MemorySSAUpdater::insertUse(MemoryUse *MU) { +void MemorySSAUpdater::insertUse(MemoryUse *MU, bool RenameUses) { InsertedPHIs.clear(); MU->setDefiningAccess(getPreviousDef(MU)); - // Unlike for defs, there is no extra work to do. Because uses do not create - // new may-defs, there are only two cases: - // + + // In cases without unreachable blocks, because uses do not create new + // may-defs, there are only two cases: // 1. There was a def already below us, and therefore, we should not have // created a phi node because it was already needed for the def. // // 2. There is no def below us, and therefore, there is no extra renaming work // to do. + + // In cases with unreachable blocks, where the unnecessary Phis were + // optimized out, adding the Use may re-insert those Phis. Hence, when + // inserting Uses outside of the MSSA creation process, and new Phis were + // added, rename all uses if we are asked. + + if (!RenameUses && !InsertedPHIs.empty()) { + auto *Defs = MSSA->getBlockDefs(MU->getBlock()); + (void)Defs; + assert((!Defs || (++Defs->begin() == Defs->end())) && + "Block may have only a Phi or no defs"); + } + + if (RenameUses && InsertedPHIs.size()) { + SmallPtrSet Visited; + BasicBlock *StartBlock = MU->getBlock(); + + if (auto *Defs = MSSA->getWritableBlockDefs(StartBlock)) { + MemoryAccess *FirstDef = &*Defs->begin(); + // Convert to incoming value if it's a memorydef. A phi *is* already an + // incoming value. + if (auto *MD = dyn_cast(FirstDef)) + FirstDef = MD->getDefiningAccess(); + + MSSA->renamePass(MU->getBlock(), FirstDef, Visited); + } + // We just inserted a phi into this block, so the incoming value will + // become the phi anyway, so it does not matter what we pass. + for (auto &MP : InsertedPHIs) + if (MemoryPhi *Phi = cast_or_null(MP)) + MSSA->renamePass(Phi->getBlock(), nullptr, Visited); + } } // Set every incoming edge {BB, MP->getBlock()} of MemoryPhi MP to NewDef. @@ -260,33 +314,35 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { // See if we had a local def, and if not, go hunting. MemoryAccess *DefBefore = getPreviousDef(MD); - bool DefBeforeSameBlock = DefBefore->getBlock() == MD->getBlock(); + bool DefBeforeSameBlock = false; + if (DefBefore->getBlock() == MD->getBlock() && + !(isa(DefBefore) && + std::find(InsertedPHIs.begin(), InsertedPHIs.end(), DefBefore) != + InsertedPHIs.end())) + DefBeforeSameBlock = true; // There is a def before us, which means we can replace any store/phi uses // of that thing with us, since we are in the way of whatever was there // before. // We now define that def's memorydefs and memoryphis if (DefBeforeSameBlock) { - for (auto UI = DefBefore->use_begin(), UE = DefBefore->use_end(); - UI != UE;) { - Use &U = *UI++; + DefBefore->replaceUsesWithIf(MD, [MD](Use &U) { // Leave the MemoryUses alone. // Also make sure we skip ourselves to avoid self references. - if (isa(U.getUser()) || U.getUser() == MD) - continue; + User *Usr = U.getUser(); + return !isa(Usr) && Usr != MD; // Defs are automatically unoptimized when the user is set to MD below, // because the isOptimized() call will fail to find the same ID. - U.set(MD); - } + }); } // and that def is now our defining access. MD->setDefiningAccess(DefBefore); - // Remember the index where we may insert new phis below. - unsigned NewPhiIndex = InsertedPHIs.size(); - SmallVector FixupList(InsertedPHIs.begin(), InsertedPHIs.end()); + + // Remember the index where we may insert new phis. + unsigned NewPhiIndex = InsertedPHIs.size(); if (!DefBeforeSameBlock) { // If there was a local def before us, we must have the same effect it // did. Because every may-def is the same, any phis/etc we would create, it @@ -302,44 +358,52 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { // If this is the first def in the block and this insert is in an arbitrary // place, compute IDF and place phis. + SmallPtrSet DefiningBlocks; + + // If this is the last Def in the block, also compute IDF based on MD, since + // this may a new Def added, and we may need additional Phis. auto Iter = MD->getDefsIterator(); ++Iter; auto IterEnd = MSSA->getBlockDefs(MD->getBlock())->end(); - if (Iter == IterEnd) { - ForwardIDFCalculator IDFs(*MSSA->DT); - SmallVector IDFBlocks; - SmallPtrSet DefiningBlocks; + if (Iter == IterEnd) DefiningBlocks.insert(MD->getBlock()); - IDFs.setDefiningBlocks(DefiningBlocks); - IDFs.calculate(IDFBlocks); - SmallVector, 4> NewInsertedPHIs; - for (auto *BBIDF : IDFBlocks) - if (!MSSA->getMemoryAccess(BBIDF)) { - auto *MPhi = MSSA->createMemoryPhi(BBIDF); - NewInsertedPHIs.push_back(MPhi); - // Add the phis created into the IDF blocks to NonOptPhis, so they are - // not optimized out as trivial by the call to getPreviousDefFromEnd - // below. Once they are complete, all these Phis are added to the - // FixupList, and removed from NonOptPhis inside fixupDefs(). - NonOptPhis.insert(MPhi); - } - for (auto &MPhi : NewInsertedPHIs) { - auto *BBIDF = MPhi->getBlock(); - for (auto *Pred : predecessors(BBIDF)) { - DenseMap> CachedPreviousDef; - MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef), - Pred); - } + for (const auto &VH : InsertedPHIs) + if (const auto *RealPHI = cast_or_null(VH)) + DefiningBlocks.insert(RealPHI->getBlock()); + ForwardIDFCalculator IDFs(*MSSA->DT); + SmallVector IDFBlocks; + IDFs.setDefiningBlocks(DefiningBlocks); + IDFs.calculate(IDFBlocks); + SmallVector, 4> NewInsertedPHIs; + for (auto *BBIDF : IDFBlocks) { + auto *MPhi = MSSA->getMemoryAccess(BBIDF); + if (!MPhi) { + MPhi = MSSA->createMemoryPhi(BBIDF); + NewInsertedPHIs.push_back(MPhi); } + // Add the phis created into the IDF blocks to NonOptPhis, so they are not + // optimized out as trivial by the call to getPreviousDefFromEnd below. + // Once they are complete, all these Phis are added to the FixupList, and + // removed from NonOptPhis inside fixupDefs(). Existing Phis in IDF may + // need fixing as well, and potentially be trivial before this insertion, + // hence add all IDF Phis. See PR43044. + NonOptPhis.insert(MPhi); + } + for (auto &MPhi : NewInsertedPHIs) { + auto *BBIDF = MPhi->getBlock(); + for (auto *Pred : predecessors(BBIDF)) { + DenseMap> CachedPreviousDef; + MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef), Pred); + } + } - // Re-take the index where we're adding the new phis, because the above - // call to getPreviousDefFromEnd, may have inserted into InsertedPHIs. - NewPhiIndex = InsertedPHIs.size(); - for (auto &MPhi : NewInsertedPHIs) { - InsertedPHIs.push_back(&*MPhi); - FixupList.push_back(&*MPhi); - } + // Re-take the index where we're adding the new phis, because the above call + // to getPreviousDefFromEnd, may have inserted into InsertedPHIs. + NewPhiIndex = InsertedPHIs.size(); + for (auto &MPhi : NewInsertedPHIs) { + InsertedPHIs.push_back(&*MPhi); + FixupList.push_back(&*MPhi); } FixupList.push_back(MD); @@ -458,8 +522,7 @@ void MemorySSAUpdater::fixupDefs(const SmallVectorImpl &Vars) { void MemorySSAUpdater::removeEdge(BasicBlock *From, BasicBlock *To) { if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) { MPhi->unorderedDeleteIncomingBlock(From); - if (MPhi->getNumIncomingValues() == 1) - removeMemoryAccess(MPhi); + tryRemoveTrivialPhi(MPhi); } } @@ -475,34 +538,51 @@ void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(const BasicBlock *From, Found = true; return false; }); - if (MPhi->getNumIncomingValues() == 1) - removeMemoryAccess(MPhi); + tryRemoveTrivialPhi(MPhi); } } +static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA, + const ValueToValueMapTy &VMap, + PhiToDefMap &MPhiMap, + bool CloneWasSimplified, + MemorySSA *MSSA) { + MemoryAccess *InsnDefining = MA; + if (MemoryDef *DefMUD = dyn_cast(InsnDefining)) { + if (!MSSA->isLiveOnEntryDef(DefMUD)) { + Instruction *DefMUDI = DefMUD->getMemoryInst(); + assert(DefMUDI && "Found MemoryUseOrDef with no Instruction."); + if (Instruction *NewDefMUDI = + cast_or_null(VMap.lookup(DefMUDI))) { + InsnDefining = MSSA->getMemoryAccess(NewDefMUDI); + if (!CloneWasSimplified) + assert(InsnDefining && "Defining instruction cannot be nullptr."); + else if (!InsnDefining || isa(InsnDefining)) { + // The clone was simplified, it's no longer a MemoryDef, look up. + auto DefIt = DefMUD->getDefsIterator(); + // Since simplified clones only occur in single block cloning, a + // previous definition must exist, otherwise NewDefMUDI would not + // have been found in VMap. + assert(DefIt != MSSA->getBlockDefs(DefMUD->getBlock())->begin() && + "Previous def must exist"); + InsnDefining = getNewDefiningAccessForClone( + &*(--DefIt), VMap, MPhiMap, CloneWasSimplified, MSSA); + } + } + } + } else { + MemoryPhi *DefPhi = cast(InsnDefining); + if (MemoryAccess *NewDefPhi = MPhiMap.lookup(DefPhi)) + InsnDefining = NewDefPhi; + } + assert(InsnDefining && "Defining instruction cannot be nullptr."); + return InsnDefining; +} + void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB, const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap, bool CloneWasSimplified) { - auto GetNewDefiningAccess = [&](MemoryAccess *MA) -> MemoryAccess * { - MemoryAccess *InsnDefining = MA; - if (MemoryUseOrDef *DefMUD = dyn_cast(InsnDefining)) { - if (!MSSA->isLiveOnEntryDef(DefMUD)) { - Instruction *DefMUDI = DefMUD->getMemoryInst(); - assert(DefMUDI && "Found MemoryUseOrDef with no Instruction."); - if (Instruction *NewDefMUDI = - cast_or_null(VMap.lookup(DefMUDI))) - InsnDefining = MSSA->getMemoryAccess(NewDefMUDI); - } - } else { - MemoryPhi *DefPhi = cast(InsnDefining); - if (MemoryAccess *NewDefPhi = MPhiMap.lookup(DefPhi)) - InsnDefining = NewDefPhi; - } - assert(InsnDefining && "Defining instruction cannot be nullptr."); - return InsnDefining; - }; - const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB); if (!Acc) return; @@ -519,9 +599,13 @@ void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB, if (Instruction *NewInsn = dyn_cast_or_null(VMap.lookup(Insn))) { MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess( - NewInsn, GetNewDefiningAccess(MUD->getDefiningAccess()), - CloneWasSimplified ? nullptr : MUD); - MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End); + NewInsn, + getNewDefiningAccessForClone(MUD->getDefiningAccess(), VMap, + MPhiMap, CloneWasSimplified, MSSA), + /*Template=*/CloneWasSimplified ? nullptr : MUD, + /*CreationMustSucceed=*/CloneWasSimplified ? false : true); + if (NewUseOrDef) + MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End); } } } @@ -563,8 +647,7 @@ void MemorySSAUpdater::updatePhisWhenInsertingUniqueBackedgeBlock( // If NewMPhi is a trivial phi, remove it. Its use in the header MPhi will be // replaced with the unique value. - if (HasUniqueIncomingValue) - removeMemoryAccess(NewMPhi); + tryRemoveTrivialPhi(NewMPhi); } void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, @@ -770,6 +853,9 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef Updates, } else { // Single predecessor, BB cannot be dead. GetLastDef of Pred. assert(Count == 1 && Pred && "Single predecessor expected."); + // BB can be unreachable though, return LoE if that is the case. + if (!DT.getNode(BB)) + return MSSA->getLiveOnEntryDef(); BB = Pred; } }; @@ -1010,7 +1096,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef Updates, for (; UI != E;) { Use &U = *UI; ++UI; - MemoryAccess *Usr = dyn_cast(U.getUser()); + MemoryAccess *Usr = cast(U.getUser()); if (MemoryPhi *UsrPhi = dyn_cast(Usr)) { BasicBlock *DominatedBlock = UsrPhi->getIncomingBlock(U); if (!DT.dominates(DominatingBlock, DominatedBlock)) @@ -1052,9 +1138,9 @@ void MemorySSAUpdater::moveTo(MemoryUseOrDef *What, BasicBlock *BB, // Now reinsert it into the IR and do whatever fixups needed. if (auto *MD = dyn_cast(What)) - insertDef(MD); + insertDef(MD, /*RenameUses=*/true); else - insertUse(cast(What)); + insertUse(cast(What), /*RenameUses=*/true); // Clear dangling pointers. We added all MemoryPhi users, but not all // of them are removed by fixupDefs(). @@ -1084,25 +1170,32 @@ void MemorySSAUpdater::moveAllAccesses(BasicBlock *From, BasicBlock *To, if (!Accs) return; + assert(Start->getParent() == To && "Incorrect Start instruction"); MemoryAccess *FirstInNew = nullptr; for (Instruction &I : make_range(Start->getIterator(), To->end())) if ((FirstInNew = MSSA->getMemoryAccess(&I))) break; - if (!FirstInNew) - return; + if (FirstInNew) { + auto *MUD = cast(FirstInNew); + do { + auto NextIt = ++MUD->getIterator(); + MemoryUseOrDef *NextMUD = (!Accs || NextIt == Accs->end()) + ? nullptr + : cast(&*NextIt); + MSSA->moveTo(MUD, To, MemorySSA::End); + // Moving MUD from Accs in the moveTo above, may delete Accs, so we need + // to retrieve it again. + Accs = MSSA->getWritableBlockAccesses(From); + MUD = NextMUD; + } while (MUD); + } - auto *MUD = cast(FirstInNew); - do { - auto NextIt = ++MUD->getIterator(); - MemoryUseOrDef *NextMUD = (!Accs || NextIt == Accs->end()) - ? nullptr - : cast(&*NextIt); - MSSA->moveTo(MUD, To, MemorySSA::End); - // Moving MUD from Accs in the moveTo above, may delete Accs, so we need to - // retrieve it again. - Accs = MSSA->getWritableBlockAccesses(From); - MUD = NextMUD; - } while (MUD); + // If all accesses were moved and only a trivial Phi remains, we try to remove + // that Phi. This is needed when From is going to be deleted. + auto *Defs = MSSA->getWritableBlockDefs(From); + if (Defs && !Defs->empty()) + if (auto *Phi = dyn_cast(&*Defs->begin())) + tryRemoveTrivialPhi(Phi); } void MemorySSAUpdater::moveAllAfterSpliceBlocks(BasicBlock *From, @@ -1118,7 +1211,7 @@ void MemorySSAUpdater::moveAllAfterSpliceBlocks(BasicBlock *From, void MemorySSAUpdater::moveAllAfterMergeBlocks(BasicBlock *From, BasicBlock *To, Instruction *Start) { - assert(From->getSinglePredecessor() == To && + assert(From->getUniquePredecessor() == To && "From block is expected to have a single predecessor (To)."); moveAllAccesses(From, To, Start); for (BasicBlock *Succ : successors(From)) @@ -1173,8 +1266,7 @@ void MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor( return false; }); Phi->addIncoming(NewPhi, New); - if (onlySingleValue(NewPhi)) - removeMemoryAccess(NewPhi); + tryRemoveTrivialPhi(NewPhi); } } @@ -1239,10 +1331,8 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA, bool OptimizePhis) { unsigned PhisSize = PhisToOptimize.size(); while (PhisSize-- > 0) if (MemoryPhi *MP = - cast_or_null(PhisToOptimize.pop_back_val())) { - auto OperRange = MP->operands(); - tryRemoveTrivialPhi(MP, OperRange); - } + cast_or_null(PhisToOptimize.pop_back_val())) + tryRemoveTrivialPhi(MP); } } @@ -1256,8 +1346,7 @@ void MemorySSAUpdater::removeBlocks( if (!DeadBlocks.count(Succ)) if (MemoryPhi *MP = MSSA->getMemoryAccess(Succ)) { MP->unorderedDeleteIncomingBlock(BB); - if (MP->getNumIncomingValues() == 1) - removeMemoryAccess(MP); + tryRemoveTrivialPhi(MP); } // Drop all references of all accesses in BB if (MemorySSA::AccessList *Acc = MSSA->getWritableBlockAccesses(BB)) @@ -1281,10 +1370,8 @@ void MemorySSAUpdater::removeBlocks( void MemorySSAUpdater::tryRemoveTrivialPhis(ArrayRef UpdatedPHIs) { for (auto &VH : UpdatedPHIs) - if (auto *MPhi = cast_or_null(VH)) { - auto OperRange = MPhi->operands(); - tryRemoveTrivialPhi(MPhi, OperRange); - } + if (auto *MPhi = cast_or_null(VH)) + tryRemoveTrivialPhi(MPhi); } void MemorySSAUpdater::changeToUnreachable(const Instruction *I) { diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index e25eb290a66..8232bf07caf 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -319,7 +319,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, auto *CalledValue = CS.getCalledValue(); auto *CalledFunction = CS.getCalledFunction(); if (CalledValue && !CalledFunction) { - CalledValue = CalledValue->stripPointerCastsNoFollowAliases(); + CalledValue = CalledValue->stripPointerCasts(); // Stripping pointer casts can reveal a called function. CalledFunction = dyn_cast(CalledValue); } @@ -467,7 +467,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // FIXME: refactor this to use the same code that inliner is using. // Don't try to import functions with noinline attribute. F.getAttributes().hasFnAttribute(Attribute::NoInline)}; - auto FuncSummary = llvm::make_unique( + auto FuncSummary = std::make_unique( Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs), CallGraphEdges.takeVector(), TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(), @@ -598,7 +598,7 @@ static void computeVariableSummary(ModuleSummaryIndex &Index, !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() && !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass(); GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized, CanBeInternalized); - auto GVarSummary = llvm::make_unique(Flags, VarFlags, + auto GVarSummary = std::make_unique(Flags, VarFlags, RefEdges.takeVector()); if (NonRenamableLocal) CantBePromoted.insert(V.getGUID()); @@ -616,7 +616,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal, /* Live = */ false, A.isDSOLocal(), A.hasLinkOnceODRLinkage() && A.hasGlobalUnnamedAddr()); - auto AS = llvm::make_unique(Flags); + auto AS = std::make_unique(Flags); auto *Aliasee = A.getBaseObject(); auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID()); assert(AliaseeVI && "Alias expects aliasee summary to be available"); @@ -696,7 +696,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( // Create the appropriate summary type. if (Function *F = dyn_cast(GV)) { std::unique_ptr Summary = - llvm::make_unique( + std::make_unique( GVFlags, /*InstCount=*/0, FunctionSummary::FFlags{ F->hasFnAttribute(Attribute::ReadNone), @@ -714,7 +714,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( Index.addGlobalValueSummary(*GV, std::move(Summary)); } else { std::unique_ptr Summary = - llvm::make_unique( + std::make_unique( GVFlags, GlobalVarSummary::GVarFlags(false, false), ArrayRef{}); Index.addGlobalValueSummary(*GV, std::move(Summary)); @@ -741,7 +741,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( else if (F.hasProfileData()) { LoopInfo LI{DT}; BranchProbabilityInfo BPI{F, LI}; - BFIPtr = llvm::make_unique(F, BPI, LI); + BFIPtr = std::make_unique(F, BPI, LI); BFI = BFIPtr.get(); } @@ -813,7 +813,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( if (!ModuleSummaryDotFile.empty()) { std::error_code EC; - raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::OF_None); if (EC) report_fatal_error(Twine("Failed to open dot file ") + ModuleSummaryDotFile + ": " + EC.message() + "\n"); diff --git a/lib/Analysis/MustExecute.cpp b/lib/Analysis/MustExecute.cpp index b616cd6f762..44527773115 100644 --- a/lib/Analysis/MustExecute.cpp +++ b/lib/Analysis/MustExecute.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/MustExecute.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" @@ -19,8 +21,11 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; +#define DEBUG_TYPE "must-execute" + const DenseMap & LoopSafetyInfo::getBlockColors() const { return BlockColors; @@ -306,6 +311,17 @@ namespace { } bool runOnFunction(Function &F) override; }; + struct MustBeExecutedContextPrinter : public ModulePass { + static char ID; + + MustBeExecutedContextPrinter() : ModulePass(ID) { + initializeMustBeExecutedContextPrinterPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + bool runOnModule(Module &M) override; + }; } char MustExecutePrinter::ID = 0; @@ -320,6 +336,36 @@ FunctionPass *llvm::createMustExecutePrinter() { return new MustExecutePrinter(); } +char MustBeExecutedContextPrinter::ID = 0; +INITIALIZE_PASS_BEGIN( + MustBeExecutedContextPrinter, "print-must-be-executed-contexts", + "print the must-be-executed-contexed for all instructions", false, true) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(MustBeExecutedContextPrinter, + "print-must-be-executed-contexts", + "print the must-be-executed-contexed for all instructions", + false, true) + +ModulePass *llvm::createMustBeExecutedContextPrinter() { + return new MustBeExecutedContextPrinter(); +} + +bool MustBeExecutedContextPrinter::runOnModule(Module &M) { + MustBeExecutedContextExplorer Explorer(true); + for (Function &F : M) { + for (Instruction &I : instructions(F)) { + dbgs() << "-- Explore context of: " << I << "\n"; + for (const Instruction *CI : Explorer.range(&I)) + dbgs() << " [F: " << CI->getFunction()->getName() << "] " << *CI + << "\n"; + } + } + + return false; +} + static bool isMustExecuteIn(const Instruction &I, Loop *L, DominatorTree *DT) { // TODO: merge these two routines. For the moment, we display the best // result obtained by *either* implementation. This is a bit unfair since no @@ -396,3 +442,75 @@ bool MustExecutePrinter::runOnFunction(Function &F) { return false; } + +const Instruction * +MustBeExecutedContextExplorer::getMustBeExecutedNextInstruction( + MustBeExecutedIterator &It, const Instruction *PP) { + if (!PP) + return PP; + LLVM_DEBUG(dbgs() << "Find next instruction for " << *PP << "\n"); + + // If we explore only inside a given basic block we stop at terminators. + if (!ExploreInterBlock && PP->isTerminator()) { + LLVM_DEBUG(dbgs() << "\tReached terminator in intra-block mode, done\n"); + return nullptr; + } + + // If we do not traverse the call graph we check if we can make progress in + // the current function. First, check if the instruction is guaranteed to + // transfer execution to the successor. + bool TransfersExecution = isGuaranteedToTransferExecutionToSuccessor(PP); + if (!TransfersExecution) + return nullptr; + + // If this is not a terminator we know that there is a single instruction + // after this one that is executed next if control is transfered. If not, + // we can try to go back to a call site we entered earlier. If none exists, we + // do not know any instruction that has to be executd next. + if (!PP->isTerminator()) { + const Instruction *NextPP = PP->getNextNode(); + LLVM_DEBUG(dbgs() << "\tIntermediate instruction does transfer control\n"); + return NextPP; + } + + // Finally, we have to handle terminators, trivial ones first. + assert(PP->isTerminator() && "Expected a terminator!"); + + // A terminator without a successor is not handled yet. + if (PP->getNumSuccessors() == 0) { + LLVM_DEBUG(dbgs() << "\tUnhandled terminator\n"); + return nullptr; + } + + // A terminator with a single successor, we will continue at the beginning of + // that one. + if (PP->getNumSuccessors() == 1) { + LLVM_DEBUG( + dbgs() << "\tUnconditional terminator, continue with successor\n"); + return &PP->getSuccessor(0)->front(); + } + + LLVM_DEBUG(dbgs() << "\tNo join point found\n"); + return nullptr; +} + +MustBeExecutedIterator::MustBeExecutedIterator( + MustBeExecutedContextExplorer &Explorer, const Instruction *I) + : Explorer(Explorer), CurInst(I) { + reset(I); +} + +void MustBeExecutedIterator::reset(const Instruction *I) { + CurInst = I; + Visited.clear(); + Visited.insert(I); +} + +const Instruction *MustBeExecutedIterator::advance() { + assert(CurInst && "Cannot advance an end iterator!"); + const Instruction *Next = + Explorer.getMustBeExecutedNextInstruction(*this, CurInst); + if (Next && !Visited.insert(Next).second) + Next = nullptr; + return Next; +} diff --git a/lib/Analysis/OptimizationRemarkEmitter.cpp b/lib/Analysis/OptimizationRemarkEmitter.cpp index 72c40a0be23..07a5619a35b 100644 --- a/lib/Analysis/OptimizationRemarkEmitter.cpp +++ b/lib/Analysis/OptimizationRemarkEmitter.cpp @@ -39,7 +39,7 @@ OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F) BPI.calculate(*F, LI); // Finally compute BFI. - OwnedBFI = llvm::make_unique(*F, BPI, LI); + OwnedBFI = std::make_unique(*F, BPI, LI); BFI = OwnedBFI.get(); } @@ -97,7 +97,7 @@ bool OptimizationRemarkEmitterWrapperPass::runOnFunction(Function &Fn) { else BFI = nullptr; - ORE = llvm::make_unique(&Fn, BFI); + ORE = std::make_unique(&Fn, BFI); return false; } diff --git a/lib/Analysis/OrderedInstructions.cpp b/lib/Analysis/OrderedInstructions.cpp index 458c0a7de6c..e947e5e388a 100644 --- a/lib/Analysis/OrderedInstructions.cpp +++ b/lib/Analysis/OrderedInstructions.cpp @@ -21,7 +21,7 @@ bool OrderedInstructions::localDominates(const Instruction *InstA, const BasicBlock *IBB = InstA->getParent(); auto OBB = OBBMap.find(IBB); if (OBB == OBBMap.end()) - OBB = OBBMap.insert({IBB, make_unique(IBB)}).first; + OBB = OBBMap.insert({IBB, std::make_unique(IBB)}).first; return OBB->second->dominates(InstA, InstB); } diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp index dce19d6d546..b99b7571502 100644 --- a/lib/Analysis/ProfileSummaryInfo.cpp +++ b/lib/Analysis/ProfileSummaryInfo.cpp @@ -45,6 +45,13 @@ static cl::opt ProfileSummaryHugeWorkingSetSizeThreshold( " blocks required to reach the -profile-summary-cutoff-hot" " percentile exceeds this count.")); +static cl::opt ProfileSummaryLargeWorkingSetSizeThreshold( + "profile-summary-large-working-set-size-threshold", cl::Hidden, + cl::init(12500), cl::ZeroOrMore, + cl::desc("The code working set size is considered large if the number of" + " blocks required to reach the -profile-summary-cutoff-hot" + " percentile exceeds this count.")); + // The next two options override the counts derived from summary computation and // are useful for debugging purposes. static cl::opt ProfileSummaryHotCount( @@ -186,6 +193,31 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F, return true; } +// Like isFunctionHotInCallGraph but for a given cutoff. +bool ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile( + int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) { + if (!F || !computeSummary()) + return false; + if (auto FunctionCount = F->getEntryCount()) + if (isHotCountNthPercentile(PercentileCutoff, FunctionCount.getCount())) + return true; + + if (hasSampleProfile()) { + uint64_t TotalCallCount = 0; + for (const auto &BB : *F) + for (const auto &I : BB) + if (isa(I) || isa(I)) + if (auto CallCount = getProfileCount(&I, nullptr)) + TotalCallCount += CallCount.getValue(); + if (isHotCountNthPercentile(PercentileCutoff, TotalCallCount)) + return true; + } + for (const auto &BB : *F) + if (isHotBlockNthPercentile(PercentileCutoff, &BB, &BFI)) + return true; + return false; +} + /// Returns true if the function's entry is a cold. If it returns false, it /// either means it is not cold or it is unknown whether it is cold or not (for /// example, no profile data is available). @@ -222,6 +254,23 @@ void ProfileSummaryInfo::computeThresholds() { "Cold count threshold cannot exceed hot count threshold!"); HasHugeWorkingSetSize = HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold; + HasLargeWorkingSetSize = + HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold; +} + +Optional ProfileSummaryInfo::computeThreshold(int PercentileCutoff) { + if (!computeSummary()) + return None; + auto iter = ThresholdCache.find(PercentileCutoff); + if (iter != ThresholdCache.end()) { + return iter->second; + } + auto &DetailedSummary = Summary->getDetailedSummary(); + auto &Entry = + getEntryForPercentile(DetailedSummary, PercentileCutoff); + uint64_t CountThreshold = Entry.MinCount; + ThresholdCache[PercentileCutoff] = CountThreshold; + return CountThreshold; } bool ProfileSummaryInfo::hasHugeWorkingSetSize() { @@ -230,6 +279,12 @@ bool ProfileSummaryInfo::hasHugeWorkingSetSize() { return HasHugeWorkingSetSize && HasHugeWorkingSetSize.getValue(); } +bool ProfileSummaryInfo::hasLargeWorkingSetSize() { + if (!HasLargeWorkingSetSize) + computeThresholds(); + return HasLargeWorkingSetSize && HasLargeWorkingSetSize.getValue(); +} + bool ProfileSummaryInfo::isHotCount(uint64_t C) { if (!HotCountThreshold) computeThresholds(); @@ -242,6 +297,11 @@ bool ProfileSummaryInfo::isColdCount(uint64_t C) { return ColdCountThreshold && C <= ColdCountThreshold.getValue(); } +bool ProfileSummaryInfo::isHotCountNthPercentile(int PercentileCutoff, uint64_t C) { + auto CountThreshold = computeThreshold(PercentileCutoff); + return CountThreshold && C >= CountThreshold.getValue(); +} + uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() { if (!HotCountThreshold) computeThresholds(); @@ -265,6 +325,13 @@ bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB, return Count && isColdCount(*Count); } +bool ProfileSummaryInfo::isHotBlockNthPercentile(int PercentileCutoff, + const BasicBlock *BB, + BlockFrequencyInfo *BFI) { + auto Count = BFI->getBlockProfileCount(BB); + return Count && isHotCountNthPercentile(PercentileCutoff, *Count); +} + bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI) { auto C = getProfileCount(CS.getInstruction(), BFI); diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index bc2cfd6fcc4..5ce0a1adeaa 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -148,6 +148,7 @@ STATISTIC(NumBruteForceTripCountsComputed, static cl::opt MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, + cl::ZeroOrMore, cl::desc("Maximum number of iterations SCEV will " "symbolically execute a constant " "derived loop"), @@ -157,6 +158,9 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, static cl::opt VerifySCEV( "verify-scev", cl::Hidden, cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); +static cl::opt VerifySCEVStrict( + "verify-scev-strict", cl::Hidden, + cl::desc("Enable stricter verification with -verify-scev is passed")); static cl::opt VerifySCEVMap("verify-scev-maps", cl::Hidden, cl::desc("Verify no dangling value in ScalarEvolution's " @@ -1707,7 +1711,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // in infinite recursion. In the later case, the analysis code will // cope with a conservative value, and it will take care to purge // that value once it has finished. - const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); + const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L); if (!isa(MaxBECount)) { // Manually compute the final value for AR, checking for // overflow. @@ -2051,7 +2055,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { // in infinite recursion. In the later case, the analysis code will // cope with a conservative value, and it will take care to purge // that value once it has finished. - const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); + const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L); if (!isa(MaxBECount)) { // Manually compute the final value for AR, checking for // overflow. @@ -3421,7 +3425,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X } - // It's tempting to want to call getMaxBackedgeTakenCount count here and + // It's tempting to want to call getConstantMaxBackedgeTakenCount count here and // use that information to infer NUW and NSW flags. However, computing a // BE count requires calling getAddRecExpr, so we may not yet have a // meaningful BE count at this point (and if we don't, we'd be stuck @@ -4991,7 +4995,7 @@ const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN, // overflow. if (auto *BEInst = dyn_cast(BEValueV)) if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)) - (void)getAddRecExpr(getAddExpr(StartVal, Accum, Flags), Accum, L, Flags); + (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); return PHISCEV; } @@ -5596,6 +5600,22 @@ ScalarEvolution::getRangeRef(const SCEV *S, ConservativeResult.intersectWith(X, RangeType)); } + if (const SCEVSMinExpr *SMin = dyn_cast(S)) { + ConstantRange X = getRangeRef(SMin->getOperand(0), SignHint); + for (unsigned i = 1, e = SMin->getNumOperands(); i != e; ++i) + X = X.smin(getRangeRef(SMin->getOperand(i), SignHint)); + return setRange(SMin, SignHint, + ConservativeResult.intersectWith(X, RangeType)); + } + + if (const SCEVUMinExpr *UMin = dyn_cast(S)) { + ConstantRange X = getRangeRef(UMin->getOperand(0), SignHint); + for (unsigned i = 1, e = UMin->getNumOperands(); i != e; ++i) + X = X.umin(getRangeRef(UMin->getOperand(i), SignHint)); + return setRange(UMin, SignHint, + ConservativeResult.intersectWith(X, RangeType)); + } + if (const SCEVUDivExpr *UDiv = dyn_cast(S)) { ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint); ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint); @@ -5654,7 +5674,7 @@ ScalarEvolution::getRangeRef(const SCEV *S, // TODO: non-affine addrec if (AddRec->isAffine()) { - const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); + const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(AddRec->getLoop()); if (!isa(MaxBECount) && getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { auto RangeFromAffine = getRangeForAffineAR( @@ -6523,7 +6543,7 @@ unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L, unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) { const auto *MaxExitCount = - dyn_cast(getMaxBackedgeTakenCount(L)); + dyn_cast(getConstantMaxBackedgeTakenCount(L)); return getConstantTripCount(MaxExitCount); } @@ -6599,7 +6619,7 @@ const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) { /// Similar to getBackedgeTakenCount, except return the least SCEV value that is /// known never to be less than the actual backedge taken count. -const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { +const SCEV *ScalarEvolution::getConstantMaxBackedgeTakenCount(const Loop *L) { return getBackedgeTakenInfo(L).getMax(this); } @@ -9833,6 +9853,10 @@ Optional ScalarEvolution::computeConstantDifference(const SCEV *More, // We avoid subtracting expressions here because this function is usually // fairly deep in the call stack (i.e. is called many times). + // X - X = 0. + if (More == Less) + return APInt(getTypeSizeInBits(More->getType()), 0); + if (isa(Less) && isa(More)) { const auto *LAR = cast(Less); const auto *MAR = cast(More); @@ -10314,10 +10338,43 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred, return false; } +static bool isKnownPredicateExtendIdiom(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + // zext x u<= sext x, sext x s<= zext x + switch (Pred) { + case ICmpInst::ICMP_SGE: + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_SLE: { + // If operand >=s 0 then ZExt == SExt. If operand (LHS); + const SCEVZeroExtendExpr *ZExt = dyn_cast(RHS); + if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand()) + return true; + break; + } + case ICmpInst::ICMP_UGE: + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_ULE: { + // If operand >=s 0 then ZExt == SExt. If operand (LHS); + const SCEVSignExtendExpr *SExt = dyn_cast(RHS); + if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand()) + return true; + break; + } + default: + break; + }; + return false; +} + bool ScalarEvolution::isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { - return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) || + return isKnownPredicateExtendIdiom(Pred, LHS, RHS) || + isKnownPredicateViaConstantRanges(Pred, LHS, RHS) || IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) || IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) || isKnownPredicateViaNoOverflow(Pred, LHS, RHS); @@ -11434,8 +11491,8 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; - if (!isa(SE->getMaxBackedgeTakenCount(L))) { - OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L); + if (!isa(SE->getConstantMaxBackedgeTakenCount(L))) { + OS << "max backedge-taken count is " << *SE->getConstantMaxBackedgeTakenCount(L); if (SE->isBackedgeTakenCountMaxOrZero(L)) OS << ", actual taken count either this or zero."; } else { @@ -11901,14 +11958,14 @@ void ScalarEvolution::verify() const { SE.getTypeSizeInBits(NewBECount->getType())) CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType()); - auto *ConstantDelta = - dyn_cast(SE2.getMinusSCEV(CurBECount, NewBECount)); + const SCEV *Delta = SE2.getMinusSCEV(CurBECount, NewBECount); - if (ConstantDelta && ConstantDelta->getAPInt() != 0) { - dbgs() << "Trip Count Changed!\n"; + // Unless VerifySCEVStrict is set, we only compare constant deltas. + if ((VerifySCEVStrict || isa(Delta)) && !Delta->isZero()) { + dbgs() << "Trip Count for " << *L << " Changed!\n"; dbgs() << "Old: " << *CurBECount << "\n"; dbgs() << "New: " << *NewBECount << "\n"; - dbgs() << "Delta: " << *ConstantDelta << "\n"; + dbgs() << "Delta: " << *Delta << "\n"; std::abort(); } } @@ -11959,7 +12016,7 @@ ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) { bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) { SE.reset(new ScalarEvolution( - F, getAnalysis().getTLI(), + F, getAnalysis().getTLI(F), getAnalysis().getAssumptionCache(F), getAnalysis().getDomTree(), getAnalysis().getLoopInfo())); diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index e8a95d35482..bceec921188 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -240,9 +240,6 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, /// division. If so, update S with Factor divided out and return true. /// S need not be evenly divisible if a reasonable remainder can be /// computed. -/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made -/// unnecessary; in its place, just signed-divide Ops[i] by the scale and -/// check to see if the divide was folded. static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, const SCEV *Factor, ScalarEvolution &SE, const DataLayout &DL) { @@ -1486,7 +1483,18 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { } Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { - if (!CanonicalMode) return expandAddRecExprLiterally(S); + // In canonical mode we compute the addrec as an expression of a canonical IV + // using evaluateAtIteration and expand the resulting SCEV expression. This + // way we avoid introducing new IVs to carry on the comutation of the addrec + // throughout the loop. + // + // For nested addrecs evaluateAtIteration might need a canonical IV of a + // type wider than the addrec itself. Emitting a canonical IV of the + // proper type might produce non-legal types, for example expanding an i64 + // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall + // back to non-canonical mode for nested addrecs. + if (!CanonicalMode || (S->getNumOperands() > 2)) + return expandAddRecExprLiterally(S); Type *Ty = SE.getEffectiveSCEVType(S->getType()); const Loop *L = S->getLoop(); @@ -2094,11 +2102,10 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At, for (BasicBlock *BB : ExitingBlocks) { ICmpInst::Predicate Pred; Instruction *LHS, *RHS; - BasicBlock *TrueBB, *FalseBB; if (!match(BB->getTerminator(), m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)), - TrueBB, FalseBB))) + m_BasicBlock(), m_BasicBlock()))) continue; if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At)) diff --git a/lib/Analysis/StackSafetyAnalysis.cpp b/lib/Analysis/StackSafetyAnalysis.cpp index 4cf235db86e..1b363869895 100644 --- a/lib/Analysis/StackSafetyAnalysis.cpp +++ b/lib/Analysis/StackSafetyAnalysis.cpp @@ -333,8 +333,8 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(const Value *Ptr, UseInfo &US) { // FIXME: consult devirt? // Do not follow aliases, otherwise we could inadvertently follow // dso_preemptable aliases or aliases with interposable linkage. - const GlobalValue *Callee = dyn_cast( - CS.getCalledValue()->stripPointerCastsNoFollowAliases()); + const GlobalValue *Callee = + dyn_cast(CS.getCalledValue()->stripPointerCasts()); if (!Callee) { US.updateRange(UnknownRange); return false; diff --git a/lib/Analysis/SyncDependenceAnalysis.cpp b/lib/Analysis/SyncDependenceAnalysis.cpp index 3cf248a3114..8447dc87069 100644 --- a/lib/Analysis/SyncDependenceAnalysis.cpp +++ b/lib/Analysis/SyncDependenceAnalysis.cpp @@ -218,9 +218,11 @@ struct DivergencePropagator { template std::unique_ptr computeJoinPoints(const BasicBlock &RootBlock, - SuccessorIterable NodeSuccessors, const Loop *ParentLoop, const BasicBlock * PdBoundBlock) { + SuccessorIterable NodeSuccessors, const Loop *ParentLoop) { assert(JoinBlocks); + LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints. Parent loop: " << (ParentLoop ? ParentLoop->getName() : "") << "\n" ); + // bootstrap with branch targets for (const auto *SuccBlock : NodeSuccessors) { DefMap.emplace(SuccBlock, SuccBlock); @@ -228,13 +230,19 @@ struct DivergencePropagator { if (ParentLoop && !ParentLoop->contains(SuccBlock)) { // immediate loop exit from node. ReachedLoopExits.insert(SuccBlock); - continue; } else { // regular successor PendingUpdates.insert(SuccBlock); } } + LLVM_DEBUG( + dbgs() << "SDA: rpo order:\n"; + for (const auto * RpoBlock : FuncRPOT) { + dbgs() << "- " << RpoBlock->getName() << "\n"; + } + ); + auto ItBeginRPO = FuncRPOT.begin(); // skip until term (TODO RPOT won't let us start at @term directly) @@ -245,16 +253,18 @@ struct DivergencePropagator { // propagate definitions at the immediate successors of the node in RPO auto ItBlockRPO = ItBeginRPO; - while (++ItBlockRPO != ItEndRPO && *ItBlockRPO != PdBoundBlock) { + while ((++ItBlockRPO != ItEndRPO) && + !PendingUpdates.empty()) { const auto *Block = *ItBlockRPO; + LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n"); - // skip @block if not pending update + // skip Block if not pending update auto ItPending = PendingUpdates.find(Block); if (ItPending == PendingUpdates.end()) continue; PendingUpdates.erase(ItPending); - // propagate definition at @block to its successors + // propagate definition at Block to its successors auto ItDef = DefMap.find(Block); const auto *DefBlock = ItDef->second; assert(DefBlock); @@ -278,6 +288,8 @@ struct DivergencePropagator { } } + LLVM_DEBUG(dbgs() << "SDA::joins. After propagation:\n"; printDefs(dbgs())); + // We need to know the definition at the parent loop header to decide // whether the definition at the header is different from the definition at // the loop exits, which would indicate a divergent loop exits. @@ -292,24 +304,17 @@ struct DivergencePropagator { // | // proper exit from both loops // - // D post-dominates B as it is the only proper exit from the "A loop". - // If C has a divergent branch, propagation will therefore stop at D. - // That implies that B will never receive a definition. - // But that definition can only be the same as at D (D itself in thise case) - // because all paths to anywhere have to pass through D. - // - const BasicBlock *ParentLoopHeader = - ParentLoop ? ParentLoop->getHeader() : nullptr; - if (ParentLoop && ParentLoop->contains(PdBoundBlock)) { - DefMap[ParentLoopHeader] = DefMap[PdBoundBlock]; - } - // analyze reached loop exits if (!ReachedLoopExits.empty()) { + const BasicBlock *ParentLoopHeader = + ParentLoop ? ParentLoop->getHeader() : nullptr; + assert(ParentLoop); - const auto *HeaderDefBlock = DefMap[ParentLoopHeader]; + auto ItHeaderDef = DefMap.find(ParentLoopHeader); + const auto *HeaderDefBlock = (ItHeaderDef == DefMap.end()) ? nullptr : ItHeaderDef->second; + LLVM_DEBUG(printDefs(dbgs())); - assert(HeaderDefBlock && "no definition in header of carrying loop"); + assert(HeaderDefBlock && "no definition at header of carrying loop"); for (const auto *ExitBlock : ReachedLoopExits) { auto ItExitDef = DefMap.find(ExitBlock); @@ -339,19 +344,10 @@ const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) { return *ItCached->second; } - // dont propagte beyond the immediate post dom of the loop - const auto *PdNode = PDT.getNode(const_cast(Loop.getHeader())); - const auto *IpdNode = PdNode->getIDom(); - const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr; - while (PdBoundBlock && Loop.contains(PdBoundBlock)) { - IpdNode = IpdNode->getIDom(); - PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr; - } - // compute all join points DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI}; auto JoinBlocks = Propagator.computeJoinPoints( - *Loop.getHeader(), LoopExits, Loop.getParentLoop(), PdBoundBlock); + *Loop.getHeader(), LoopExits, Loop.getParentLoop()); auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks)); assert(ItInserted.second); @@ -370,16 +366,11 @@ SyncDependenceAnalysis::join_blocks(const Instruction &Term) { if (ItCached != CachedBranchJoins.end()) return *ItCached->second; - // dont propagate beyond the immediate post dominator of the branch - const auto *PdNode = PDT.getNode(const_cast(Term.getParent())); - const auto *IpdNode = PdNode->getIDom(); - const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr; - // compute all join points DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI}; const auto &TermBlock = *Term.getParent(); auto JoinBlocks = Propagator.computeJoinPoints( - TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock), PdBoundBlock); + TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock)); auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks)); assert(ItInserted.second); diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp index ef139d3257d..23096969805 100644 --- a/lib/Analysis/TargetLibraryInfo.cpp +++ b/lib/Analysis/TargetLibraryInfo.cpp @@ -28,7 +28,8 @@ static cl::opt ClVectorLibrary( clEnumValN(TargetLibraryInfoImpl::SVML, "SVML", "Intel SVML library"))); -StringRef const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = { +StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = + { #define TLI_DEFINE_STRING #include "llvm/Analysis/TargetLibraryInfo.def" }; @@ -58,14 +59,14 @@ static bool hasBcmp(const Triple &TT) { return TT.isGNUEnvironment() || TT.isMusl(); // Both NetBSD and OpenBSD are planning to remove the function. Windows does // not have it. - return TT.isOSFreeBSD() || TT.isOSSolaris() || TT.isOSDarwin(); + return TT.isOSFreeBSD() || TT.isOSSolaris(); } /// Initialize the set of available library functions based on the specified /// target triple. This should be carefully written so that a missing target /// triple gets a sane set of defaults. static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, - ArrayRef StandardNames) { + ArrayRef StandardNames) { // Verify that the StandardNames array is in alphabetical order. assert(std::is_sorted(StandardNames.begin(), StandardNames.end(), [](StringRef LHS, StringRef RHS) { @@ -104,19 +105,10 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setShouldSignExtI32Param(ShouldSignExtI32Param); if (T.getArch() == Triple::r600 || - T.getArch() == Triple::amdgcn) { - TLI.setUnavailable(LibFunc_ldexp); - TLI.setUnavailable(LibFunc_ldexpf); - TLI.setUnavailable(LibFunc_ldexpl); - TLI.setUnavailable(LibFunc_exp10); - TLI.setUnavailable(LibFunc_exp10f); - TLI.setUnavailable(LibFunc_exp10l); - TLI.setUnavailable(LibFunc_log10); - TLI.setUnavailable(LibFunc_log10f); - TLI.setUnavailable(LibFunc_log10l); - } + T.getArch() == Triple::amdgcn) + TLI.disableAllFunctions(); - // There are no library implementations of mempcy and memset for AMD gpus and + // There are no library implementations of memcpy and memset for AMD gpus and // these can be difficult to lower in the backend. if (T.getArch() == Triple::r600 || T.getArch() == Triple::amdgcn) { @@ -623,19 +615,14 @@ static StringRef sanitizeFunctionName(StringRef funcName) { return GlobalValue::dropLLVMManglingEscape(funcName); } -bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, - LibFunc &F) const { - StringRef const *Start = &StandardNames[0]; - StringRef const *End = &StandardNames[NumLibFuncs]; - +bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, LibFunc &F) const { funcName = sanitizeFunctionName(funcName); if (funcName.empty()) return false; - StringRef const *I = std::lower_bound( - Start, End, funcName, [](StringRef LHS, StringRef RHS) { - return LHS < RHS; - }); + const auto *Start = std::begin(StandardNames); + const auto *End = std::end(StandardNames); + const auto *I = std::lower_bound(Start, End, funcName); if (I != End && *I == funcName) { F = (LibFunc)(I - Start); return true; @@ -1481,6 +1468,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return false; } case LibFunc::NumLibFuncs: + case LibFunc::NotLibFunc: break; } @@ -1599,14 +1587,6 @@ StringRef TargetLibraryInfoImpl::getScalarizedFunction(StringRef F, return I->ScalarFnName; } -TargetLibraryInfo TargetLibraryAnalysis::run(Module &M, - ModuleAnalysisManager &) { - if (PresetInfoImpl) - return TargetLibraryInfo(*PresetInfoImpl); - - return TargetLibraryInfo(lookupInfoImpl(Triple(M.getTargetTriple()))); -} - TargetLibraryInfo TargetLibraryAnalysis::run(Function &F, FunctionAnalysisManager &) { if (PresetInfoImpl) diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index eb04c34453f..c9c294873ea 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -9,6 +9,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -59,11 +60,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - for (SmallVectorImpl::iterator I = ExitingBlocks.begin(), - IE = ExitingBlocks.end(); - I != IE; ++I) { - BasicBlock *BB = *I; - + for (BasicBlock *BB : ExitingBlocks) { // If we pass the updated counter back through a phi, we need to know // which latch the updated value will be coming from. if (!L->isLoopLatch(BB)) { @@ -97,13 +94,11 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, // For this to be true, we must dominate all blocks with backedges. Such // blocks are in-loop predecessors to the header block. bool NotAlways = false; - for (pred_iterator PI = pred_begin(L->getHeader()), - PIE = pred_end(L->getHeader()); - PI != PIE; ++PI) { - if (!L->contains(*PI)) + for (BasicBlock *Pred : predecessors(L->getHeader())) { + if (!L->contains(Pred)) continue; - if (!DT.dominates(*I, *PI)) { + if (!DT.dominates(BB, Pred)) { NotAlways = true; break; } @@ -127,7 +122,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, // Note that this block may not be the loop latch block, even if the loop // has a latch block. - ExitBlock = *I; + ExitBlock = BB; ExitCount = EC; break; } @@ -227,6 +222,16 @@ unsigned TargetTransformInfo::getFlatAddressSpace() const { return TTIImpl->getFlatAddressSpace(); } +bool TargetTransformInfo::collectFlatAddressOperands( + SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const { + return TTIImpl->collectFlatAddressOperands(OpIndexes, IID); +} + +bool TargetTransformInfo::rewriteIntrinsicWithAddressSpace( + IntrinsicInst *II, Value *OldV, Value *NewV) const { + return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); +} + bool TargetTransformInfo::isLoweredToCall(const Function *F) const { return TTIImpl->isLoweredToCall(F); } @@ -283,21 +288,22 @@ bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const { return TTIImpl->shouldFavorBackedgeIndex(L); } -bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const { - return TTIImpl->isLegalMaskedStore(DataType); +bool TargetTransformInfo::isLegalMaskedStore(Type *DataType, + MaybeAlign Alignment) const { + return TTIImpl->isLegalMaskedStore(DataType, Alignment); } -bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const { - return TTIImpl->isLegalMaskedLoad(DataType); +bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType, + MaybeAlign Alignment) const { + return TTIImpl->isLegalMaskedLoad(DataType, Alignment); } bool TargetTransformInfo::isLegalNTStore(Type *DataType, - unsigned Alignment) const { + Align Alignment) const { return TTIImpl->isLegalNTStore(DataType, Alignment); } -bool TargetTransformInfo::isLegalNTLoad(Type *DataType, - unsigned Alignment) const { +bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const { return TTIImpl->isLegalNTLoad(DataType, Alignment); } @@ -359,14 +365,6 @@ bool TargetTransformInfo::isTypeLegal(Type *Ty) const { return TTIImpl->isTypeLegal(Ty); } -unsigned TargetTransformInfo::getJumpBufAlignment() const { - return TTIImpl->getJumpBufAlignment(); -} - -unsigned TargetTransformInfo::getJumpBufSize() const { - return TTIImpl->getJumpBufSize(); -} - bool TargetTransformInfo::shouldBuildLookupTables() const { return TTIImpl->shouldBuildLookupTables(); } @@ -470,8 +468,16 @@ int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, return Cost; } -unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { - return TTIImpl->getNumberOfRegisters(Vector); +unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const { + return TTIImpl->getNumberOfRegisters(ClassID); +} + +unsigned TargetTransformInfo::getRegisterClassForType(bool Vector, Type *Ty) const { + return TTIImpl->getRegisterClassForType(Vector, Ty); +} + +const char* TargetTransformInfo::getRegisterClassName(unsigned ClassID) const { + return TTIImpl->getRegisterClassName(ClassID); } unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { @@ -1276,6 +1282,8 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { return getVectorInstrCost(I->getOpcode(), IE->getType(), Idx); } + case Instruction::ExtractValue: + return 0; // Model all ExtractValue nodes as free. case Instruction::ShuffleVector: { const ShuffleVectorInst *Shuffle = cast(I); Type *Ty = Shuffle->getType(); diff --git a/lib/Analysis/TypeMetadataUtils.cpp b/lib/Analysis/TypeMetadataUtils.cpp index 9311dfbc6eb..072d291f3f9 100644 --- a/lib/Analysis/TypeMetadataUtils.cpp +++ b/lib/Analysis/TypeMetadataUtils.cpp @@ -127,3 +127,35 @@ void llvm::findDevirtualizableCallsForTypeCheckedLoad( findCallsAtConstantOffset(DevirtCalls, &HasNonCallUses, LoadedPtr, Offset->getZExtValue(), CI, DT); } + +Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M) { + if (I->getType()->isPointerTy()) { + if (Offset == 0) + return I; + return nullptr; + } + + const DataLayout &DL = M.getDataLayout(); + + if (auto *C = dyn_cast(I)) { + const StructLayout *SL = DL.getStructLayout(C->getType()); + if (Offset >= SL->getSizeInBytes()) + return nullptr; + + unsigned Op = SL->getElementContainingOffset(Offset); + return getPointerAtOffset(cast(I->getOperand(Op)), + Offset - SL->getElementOffset(Op), M); + } + if (auto *C = dyn_cast(I)) { + ArrayType *VTableTy = C->getType(); + uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType()); + + unsigned Op = Offset / ElemSize; + if (Op >= C->getNumOperands()) + return nullptr; + + return getPointerAtOffset(cast(I->getOperand(Op)), + Offset % ElemSize, M); + } + return nullptr; +} diff --git a/lib/Analysis/VFABIDemangling.cpp b/lib/Analysis/VFABIDemangling.cpp new file mode 100644 index 00000000000..6fd8ae63f5f --- /dev/null +++ b/lib/Analysis/VFABIDemangling.cpp @@ -0,0 +1,418 @@ +//===- VFABIDemangling.cpp - Vector Function ABI demangling utilities. ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/VectorUtils.h" + +using namespace llvm; + +namespace { +/// Utilities for the Vector Function ABI name parser. + +/// Return types for the parser functions. +enum class ParseRet { + OK, // Found. + None, // Not found. + Error // Syntax error. +}; + +/// Extracts the `` information from the mangled string, and +/// sets the `ISA` accordingly. +ParseRet tryParseISA(StringRef &MangledName, VFISAKind &ISA) { + if (MangledName.empty()) + return ParseRet::Error; + + ISA = StringSwitch(MangledName.take_front(1)) + .Case("n", VFISAKind::AdvancedSIMD) + .Case("s", VFISAKind::SVE) + .Case("b", VFISAKind::SSE) + .Case("c", VFISAKind::AVX) + .Case("d", VFISAKind::AVX2) + .Case("e", VFISAKind::AVX512) + .Default(VFISAKind::Unknown); + + MangledName = MangledName.drop_front(1); + + return ParseRet::OK; +} + +/// Extracts the `` information from the mangled string, and +/// sets `IsMasked` accordingly. The input string `MangledName` is +/// left unmodified. +ParseRet tryParseMask(StringRef &MangledName, bool &IsMasked) { + if (MangledName.consume_front("M")) { + IsMasked = true; + return ParseRet::OK; + } + + if (MangledName.consume_front("N")) { + IsMasked = false; + return ParseRet::OK; + } + + return ParseRet::Error; +} + +/// Extract the `` information from the mangled string, and +/// sets `VF` accordingly. A ` == "x"` token is interpreted as a scalable +/// vector length. On success, the `` token is removed from +/// the input string `ParseString`. +/// +ParseRet tryParseVLEN(StringRef &ParseString, unsigned &VF, bool &IsScalable) { + if (ParseString.consume_front("x")) { + VF = 0; + IsScalable = true; + return ParseRet::OK; + } + + if (ParseString.consumeInteger(10, VF)) + return ParseRet::Error; + + IsScalable = false; + return ParseRet::OK; +} + +/// The function looks for the following strings at the beginning of +/// the input string `ParseString`: +/// +/// +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `Pos` to +/// , and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +/// +/// The function expects to be one of "ls", "Rs", "Us" or +/// "Ls". +ParseRet tryParseLinearTokenWithRuntimeStep(StringRef &ParseString, + VFParamKind &PKind, int &Pos, + const StringRef Token) { + if (ParseString.consume_front(Token)) { + PKind = VFABI::getVFParamKindFromString(Token); + if (ParseString.consumeInteger(10, Pos)) + return ParseRet::Error; + return ParseRet::OK; + } + + return ParseRet::None; +} + +/// The function looks for the following stringt at the beginning of +/// the input string `ParseString`: +/// +/// +/// +/// is one of "ls", "Rs", "Us" or "Ls". +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `StepOrPos` to +/// , and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +ParseRet tryParseLinearWithRuntimeStep(StringRef &ParseString, + VFParamKind &PKind, int &StepOrPos) { + ParseRet Ret; + + // "ls" + Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "ls"); + if (Ret != ParseRet::None) + return Ret; + + // "Rs" + Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "Rs"); + if (Ret != ParseRet::None) + return Ret; + + // "Ls" + Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "Ls"); + if (Ret != ParseRet::None) + return Ret; + + // "Us" + Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "Us"); + if (Ret != ParseRet::None) + return Ret; + + return ParseRet::None; +} + +/// The function looks for the following strings at the beginning of +/// the input string `ParseString`: +/// +/// {"n"} +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `LinearStep` to +/// , and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +/// +/// The function expects to be one of "l", "R", "U" or +/// "L". +ParseRet tryParseCompileTimeLinearToken(StringRef &ParseString, + VFParamKind &PKind, int &LinearStep, + const StringRef Token) { + if (ParseString.consume_front(Token)) { + PKind = VFABI::getVFParamKindFromString(Token); + const bool Negate = ParseString.consume_front("n"); + if (ParseString.consumeInteger(10, LinearStep)) + LinearStep = 1; + if (Negate) + LinearStep *= -1; + return ParseRet::OK; + } + + return ParseRet::None; +} + +/// The function looks for the following strings at the beginning of +/// the input string `ParseString`: +/// +/// ["l" | "R" | "U" | "L"] {"n"} +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `LinearStep` to +/// , and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +ParseRet tryParseLinearWithCompileTimeStep(StringRef &ParseString, + VFParamKind &PKind, int &StepOrPos) { + // "l" {"n"} + if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "l") == + ParseRet::OK) + return ParseRet::OK; + + // "R" {"n"} + if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "R") == + ParseRet::OK) + return ParseRet::OK; + + // "L" {"n"} + if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "L") == + ParseRet::OK) + return ParseRet::OK; + + // "U" {"n"} + if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "U") == + ParseRet::OK) + return ParseRet::OK; + + return ParseRet::None; +} + +/// The function looks for the following strings at the beginning of +/// the input string `ParseString`: +/// +/// "u" +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `Pos` to +/// , and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +ParseRet tryParseUniform(StringRef &ParseString, VFParamKind &PKind, int &Pos) { + // "u" + const char *UniformToken = "u"; + if (ParseString.consume_front(UniformToken)) { + PKind = VFABI::getVFParamKindFromString(UniformToken); + if (ParseString.consumeInteger(10, Pos)) + return ParseRet::Error; + + return ParseRet::OK; + } + return ParseRet::None; +} + +/// Looks into the part of the mangled name in search +/// for valid paramaters at the beginning of the string +/// `ParseString`. +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `StepOrPos` +/// accordingly, and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +ParseRet tryParseParameter(StringRef &ParseString, VFParamKind &PKind, + int &StepOrPos) { + if (ParseString.consume_front("v")) { + PKind = VFParamKind::Vector; + StepOrPos = 0; + return ParseRet::OK; + } + + const ParseRet HasLinearRuntime = + tryParseLinearWithRuntimeStep(ParseString, PKind, StepOrPos); + if (HasLinearRuntime != ParseRet::None) + return HasLinearRuntime; + + const ParseRet HasLinearCompileTime = + tryParseLinearWithCompileTimeStep(ParseString, PKind, StepOrPos); + if (HasLinearCompileTime != ParseRet::None) + return HasLinearCompileTime; + + const ParseRet HasUniform = tryParseUniform(ParseString, PKind, StepOrPos); + if (HasUniform != ParseRet::None) + return HasUniform; + + return ParseRet::None; +} + +/// Looks into the part of the mangled name in search +/// of a valid 'aligned' clause. The function should be invoked +/// after parsing a parameter via `tryParseParameter`. +/// +/// On success, it removes the parsed parameter from `ParseString`, +/// sets `PKind` to the correspondent enum value, sets `StepOrPos` +/// accordingly, and return success. On a syntax error, it return a +/// parsing error. If nothing is parsed, it returns None. +ParseRet tryParseAlign(StringRef &ParseString, Align &Alignment) { + uint64_t Val; + // "a" + if (ParseString.consume_front("a")) { + if (ParseString.consumeInteger(10, Val)) + return ParseRet::Error; + + if (!isPowerOf2_64(Val)) + return ParseRet::Error; + + Alignment = Align(Val); + + return ParseRet::OK; + } + + return ParseRet::None; +} +} // namespace + +// Format of the ABI name: +// _ZGV_[()] +Optional VFABI::tryDemangleForVFABI(StringRef MangledName) { + // Assume there is no custom name , and therefore the + // vector name consists of + // _ZGV_. + StringRef VectorName = MangledName; + + // Parse the fixed size part of the manled name + if (!MangledName.consume_front("_ZGV")) + return None; + + // Extract ISA. An unknow ISA is also supported, so we accept all + // values. + VFISAKind ISA; + if (tryParseISA(MangledName, ISA) != ParseRet::OK) + return None; + + // Extract . + bool IsMasked; + if (tryParseMask(MangledName, IsMasked) != ParseRet::OK) + return None; + + // Parse the variable size, starting from . + unsigned VF; + bool IsScalable; + if (tryParseVLEN(MangledName, VF, IsScalable) != ParseRet::OK) + return None; + + // Parse the . + ParseRet ParamFound; + SmallVector Parameters; + do { + const unsigned ParameterPos = Parameters.size(); + VFParamKind PKind; + int StepOrPos; + ParamFound = tryParseParameter(MangledName, PKind, StepOrPos); + + // Bail off if there is a parsing error in the parsing of the parameter. + if (ParamFound == ParseRet::Error) + return None; + + if (ParamFound == ParseRet::OK) { + Align Alignment; + // Look for the alignment token "a ". + const ParseRet AlignFound = tryParseAlign(MangledName, Alignment); + // Bail off if there is a syntax error in the align token. + if (AlignFound == ParseRet::Error) + return None; + + // Add the parameter. + Parameters.push_back({ParameterPos, PKind, StepOrPos, Alignment}); + } + } while (ParamFound == ParseRet::OK); + + // A valid MangledName mus have at least one valid entry in the + // . + if (Parameters.empty()) + return None; + + // Check for the and the optional , which + // are separated from the prefix with "_" + if (!MangledName.consume_front("_")) + return None; + + // The rest of the string must be in the format: + // [()] + const StringRef ScalarName = + MangledName.take_while([](char In) { return In != '('; }); + + if (ScalarName.empty()) + return None; + + // Reduce MangledName to [()]. + MangledName = MangledName.ltrim(ScalarName); + // Find the optional custom name redirection. + if (MangledName.consume_front("(")) { + if (!MangledName.consume_back(")")) + return None; + // Update the vector variant with the one specified by the user. + VectorName = MangledName; + // If the vector name is missing, bail out. + if (VectorName.empty()) + return None; + } + + // When is "M", we need to add a parameter that is used as + // global predicate for the function. + if (IsMasked) { + const unsigned Pos = Parameters.size(); + Parameters.push_back({Pos, VFParamKind::GlobalPredicate}); + } + + // Asserts for parameters of type `VFParamKind::GlobalPredicate`, as + // prescribed by the Vector Function ABI specifications supported by + // this parser: + // 1. Uniqueness. + // 2. Must be the last in the parameter list. + const auto NGlobalPreds = std::count_if( + Parameters.begin(), Parameters.end(), [](const VFParameter PK) { + return PK.ParamKind == VFParamKind::GlobalPredicate; + }); + assert(NGlobalPreds < 2 && "Cannot have more than one global predicate."); + if (NGlobalPreds) + assert(Parameters.back().ParamKind == VFParamKind::GlobalPredicate && + "The global predicate must be the last parameter"); + + const VFShape Shape({VF, IsScalable, ISA, Parameters}); + return VFInfo({Shape, ScalarName, VectorName}); +} + +VFParamKind VFABI::getVFParamKindFromString(const StringRef Token) { + const VFParamKind ParamKind = StringSwitch(Token) + .Case("v", VFParamKind::Vector) + .Case("l", VFParamKind::OMP_Linear) + .Case("R", VFParamKind::OMP_LinearRef) + .Case("L", VFParamKind::OMP_LinearVal) + .Case("U", VFParamKind::OMP_LinearUVal) + .Case("ls", VFParamKind::OMP_LinearPos) + .Case("Ls", VFParamKind::OMP_LinearValPos) + .Case("Rs", VFParamKind::OMP_LinearRefPos) + .Case("Us", VFParamKind::OMP_LinearUValPos) + .Case("u", VFParamKind::OMP_Uniform) + .Default(VFParamKind::Unknown); + + if (ParamKind != VFParamKind::Unknown) + return ParamKind; + + // This function should never be invoked with an invalid input. + llvm_unreachable("This fuction should be invoken only on parameters" + " that have a textual representation in the mangled name" + " of the Vector Function ABI"); +} diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index c70906dcc62..bbf38999183 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -558,12 +558,18 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv, return true; } + // Don't let an assume affect itself - this would cause the problems + // `isEphemeralValueOf` is trying to prevent, and it would also make + // the loop below go out of bounds. + if (Inv == CxtI) + return false; + // The context comes first, but they're both in the same block. Make sure // there is nothing in between that might interrupt the control flow. for (BasicBlock::const_iterator I = std::next(BasicBlock::const_iterator(CxtI)), IE(Inv); I != IE; ++I) - if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I)) + if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) return false; return !isEphemeralValueOf(Inv, CxtI); @@ -1049,7 +1055,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, break; } case Instruction::Select: { - const Value *LHS, *RHS; + const Value *LHS = nullptr, *RHS = nullptr; SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor; if (SelectPatternResult::isMinOrMax(SPF)) { computeKnownBits(RHS, Known, Depth + 1, Q); @@ -1095,7 +1101,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, // RHS from matchSelectPattern returns the negation part of abs pattern. // If the negate has an NSW flag we can assume the sign bit of the result // will be 0 because that makes abs(INT_MIN) undefined. - if (Q.IIQ.hasNoSignedWrap(cast(RHS))) + if (match(RHS, m_Neg(m_Specific(LHS))) && + Q.IIQ.hasNoSignedWrap(cast(RHS))) MaxHighZeros = 1; } @@ -1366,7 +1373,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, else if (LR == I) L = LL; else - break; + continue; // Check for recurrence with L and R flipped. // Ok, we have a PHI of the form L op= R. Check for low // zero bits. computeKnownBits(R, Known2, Depth + 1, Q); @@ -1714,9 +1721,9 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, // Aligned pointers have trailing zeros - refine Known.Zero set if (V->getType()->isPointerTy()) { - unsigned Align = V->getPointerAlignment(Q.DL); + const MaybeAlign Align = V->getPointerAlignment(Q.DL); if (Align) - Known.Zero.setLowBits(countTrailingZeros(Align)); + Known.Zero.setLowBits(countTrailingZeros(Align->value())); } // computeKnownBitsFromAssume strictly refines Known. @@ -2066,7 +2073,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { if (const auto *Call = dyn_cast(V)) { if (Call->isReturnNonNull()) return true; - if (const auto *RP = getArgumentAliasingToReturnedPointer(Call)) + if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true)) return isKnownNonZero(RP, Depth, Q); } } @@ -2300,7 +2307,7 @@ static bool isSignedMinMaxClamp(const Value *Select, const Value *&In, cast(Select)->getOpcode() == Instruction::Select && "Input should be a Select!"); - const Value *LHS, *RHS, *LHS2, *RHS2; + const Value *LHS = nullptr, *RHS = nullptr; SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor; if (SPF != SPF_SMAX && SPF != SPF_SMIN) return false; @@ -2308,6 +2315,7 @@ static bool isSignedMinMaxClamp(const Value *Select, const Value *&In, if (!match(RHS, m_APInt(CLow))) return false; + const Value *LHS2 = nullptr, *RHS2 = nullptr; SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor; if (getInverseMinMaxFlavor(SPF) != SPF2) return false; @@ -2384,253 +2392,256 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, if (Depth == MaxDepth) return 1; // Limit search depth. - const Operator *U = dyn_cast(V); - switch (Operator::getOpcode(V)) { - default: break; - case Instruction::SExt: - Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); - return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp; + if (auto *U = dyn_cast(V)) { + switch (Operator::getOpcode(V)) { + default: break; + case Instruction::SExt: + Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); + return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp; - case Instruction::SDiv: { - const APInt *Denominator; - // sdiv X, C -> adds log(C) sign bits. - if (match(U->getOperand(1), m_APInt(Denominator))) { + case Instruction::SDiv: { + const APInt *Denominator; + // sdiv X, C -> adds log(C) sign bits. + if (match(U->getOperand(1), m_APInt(Denominator))) { - // Ignore non-positive denominator. - if (!Denominator->isStrictlyPositive()) - break; + // Ignore non-positive denominator. + if (!Denominator->isStrictlyPositive()) + break; - // Calculate the incoming numerator bits. - unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + // Calculate the incoming numerator bits. + unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - // Add floor(log(C)) bits to the numerator bits. - return std::min(TyBits, NumBits + Denominator->logBase2()); - } - break; - } - - case Instruction::SRem: { - const APInt *Denominator; - // srem X, C -> we know that the result is within [-C+1,C) when C is a - // positive constant. This let us put a lower bound on the number of sign - // bits. - if (match(U->getOperand(1), m_APInt(Denominator))) { - - // Ignore non-positive denominator. - if (!Denominator->isStrictlyPositive()) - break; - - // Calculate the incoming numerator bits. SRem by a positive constant - // can't lower the number of sign bits. - unsigned NumrBits = - ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - - // Calculate the leading sign bit constraints by examining the - // denominator. Given that the denominator is positive, there are two - // cases: - // - // 1. the numerator is positive. The result range is [0,C) and [0,C) u< - // (1 << ceilLogBase2(C)). - // - // 2. the numerator is negative. Then the result range is (-C,0] and - // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)). - // - // Thus a lower bound on the number of sign bits is `TyBits - - // ceilLogBase2(C)`. - - unsigned ResBits = TyBits - Denominator->ceilLogBase2(); - return std::max(NumrBits, ResBits); - } - break; - } - - case Instruction::AShr: { - Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - // ashr X, C -> adds C sign bits. Vectors too. - const APInt *ShAmt; - if (match(U->getOperand(1), m_APInt(ShAmt))) { - if (ShAmt->uge(TyBits)) - break; // Bad shift. - unsigned ShAmtLimited = ShAmt->getZExtValue(); - Tmp += ShAmtLimited; - if (Tmp > TyBits) Tmp = TyBits; - } - return Tmp; - } - case Instruction::Shl: { - const APInt *ShAmt; - if (match(U->getOperand(1), m_APInt(ShAmt))) { - // shl destroys sign bits. - Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - if (ShAmt->uge(TyBits) || // Bad shift. - ShAmt->uge(Tmp)) break; // Shifted all sign bits out. - Tmp2 = ShAmt->getZExtValue(); - return Tmp - Tmp2; - } - break; - } - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: // NOT is handled here. - // Logical binary ops preserve the number of sign bits at the worst. - Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - if (Tmp != 1) { - Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); - FirstAnswer = std::min(Tmp, Tmp2); - // We computed what we know about the sign bits as our first - // answer. Now proceed to the generic code that uses - // computeKnownBits, and pick whichever answer is better. - } - break; - - case Instruction::Select: { - // If we have a clamp pattern, we know that the number of sign bits will be - // the minimum of the clamp min/max range. - const Value *X; - const APInt *CLow, *CHigh; - if (isSignedMinMaxClamp(U, X, CLow, CHigh)) - return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits()); - - Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); - if (Tmp == 1) break; - Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q); - return std::min(Tmp, Tmp2); - } - - case Instruction::Add: - // Add can have at most one carry bit. Thus we know that the output - // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - if (Tmp == 1) break; - - // Special case decrementing a value (ADD X, -1): - if (const auto *CRHS = dyn_cast(U->getOperand(1))) - if (CRHS->isAllOnesValue()) { - KnownBits Known(TyBits); - computeKnownBits(U->getOperand(0), Known, Depth + 1, Q); - - // If the input is known to be 0 or 1, the output is 0/-1, which is all - // sign bits set. - if ((Known.Zero | 1).isAllOnesValue()) - return TyBits; - - // If we are subtracting one from a positive number, there is no carry - // out of the result. - if (Known.isNonNegative()) - return Tmp; + // Add floor(log(C)) bits to the numerator bits. + return std::min(TyBits, NumBits + Denominator->logBase2()); } - - Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); - if (Tmp2 == 1) break; - return std::min(Tmp, Tmp2)-1; - - case Instruction::Sub: - Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); - if (Tmp2 == 1) break; - - // Handle NEG. - if (const auto *CLHS = dyn_cast(U->getOperand(0))) - if (CLHS->isNullValue()) { - KnownBits Known(TyBits); - computeKnownBits(U->getOperand(1), Known, Depth + 1, Q); - // If the input is known to be 0 or 1, the output is 0/-1, which is all - // sign bits set. - if ((Known.Zero | 1).isAllOnesValue()) - return TyBits; - - // If the input is known to be positive (the sign bit is known clear), - // the output of the NEG has the same number of sign bits as the input. - if (Known.isNonNegative()) - return Tmp2; - - // Otherwise, we treat this like a SUB. - } - - // Sub can have at most one carry bit. Thus we know that the output - // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - if (Tmp == 1) break; - return std::min(Tmp, Tmp2)-1; - - case Instruction::Mul: { - // The output of the Mul can be at most twice the valid bits in the inputs. - unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - if (SignBitsOp0 == 1) break; - unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); - if (SignBitsOp1 == 1) break; - unsigned OutValidBits = - (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1); - return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1; - } - - case Instruction::PHI: { - const PHINode *PN = cast(U); - unsigned NumIncomingValues = PN->getNumIncomingValues(); - // Don't analyze large in-degree PHIs. - if (NumIncomingValues > 4) break; - // Unreachable blocks may have zero-operand PHI nodes. - if (NumIncomingValues == 0) break; - - // Take the minimum of all incoming values. This can't infinitely loop - // because of our depth threshold. - Tmp = ComputeNumSignBits(PN->getIncomingValue(0), Depth + 1, Q); - for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) { - if (Tmp == 1) return Tmp; - Tmp = std::min( - Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, Q)); - } - return Tmp; - } - - case Instruction::Trunc: - // FIXME: it's tricky to do anything useful for this, but it is an important - // case for targets like X86. - break; - - case Instruction::ExtractElement: - // Look through extract element. At the moment we keep this simple and skip - // tracking the specific element. But at least we might find information - // valid for all elements of the vector (for example if vector is sign - // extended, shifted, etc). - return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); - - case Instruction::ShuffleVector: { - // TODO: This is copied almost directly from the SelectionDAG version of - // ComputeNumSignBits. It would be better if we could share common - // code. If not, make sure that changes are translated to the DAG. - - // Collect the minimum number of sign bits that are shared by every vector - // element referenced by the shuffle. - auto *Shuf = cast(U); - int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements(); - int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements(); - APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); - for (int i = 0; i != NumMaskElts; ++i) { - int M = Shuf->getMaskValue(i); - assert(M < NumElts * 2 && "Invalid shuffle mask constant"); - // For undef elements, we don't know anything about the common state of - // the shuffle result. - if (M == -1) - return 1; - if (M < NumElts) - DemandedLHS.setBit(M % NumElts); - else - DemandedRHS.setBit(M % NumElts); - } - Tmp = std::numeric_limits::max(); - if (!!DemandedLHS) - Tmp = ComputeNumSignBits(Shuf->getOperand(0), Depth + 1, Q); - if (!!DemandedRHS) { - Tmp2 = ComputeNumSignBits(Shuf->getOperand(1), Depth + 1, Q); - Tmp = std::min(Tmp, Tmp2); - } - // If we don't know anything, early out and try computeKnownBits fall-back. - if (Tmp == 1) break; - assert(Tmp <= V->getType()->getScalarSizeInBits() && - "Failed to determine minimum sign bits"); - return Tmp; - } + } + + case Instruction::SRem: { + const APInt *Denominator; + // srem X, C -> we know that the result is within [-C+1,C) when C is a + // positive constant. This let us put a lower bound on the number of sign + // bits. + if (match(U->getOperand(1), m_APInt(Denominator))) { + + // Ignore non-positive denominator. + if (!Denominator->isStrictlyPositive()) + break; + + // Calculate the incoming numerator bits. SRem by a positive constant + // can't lower the number of sign bits. + unsigned NumrBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + + // Calculate the leading sign bit constraints by examining the + // denominator. Given that the denominator is positive, there are two + // cases: + // + // 1. the numerator is positive. The result range is [0,C) and [0,C) u< + // (1 << ceilLogBase2(C)). + // + // 2. the numerator is negative. Then the result range is (-C,0] and + // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)). + // + // Thus a lower bound on the number of sign bits is `TyBits - + // ceilLogBase2(C)`. + + unsigned ResBits = TyBits - Denominator->ceilLogBase2(); + return std::max(NumrBits, ResBits); + } + break; + } + + case Instruction::AShr: { + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + // ashr X, C -> adds C sign bits. Vectors too. + const APInt *ShAmt; + if (match(U->getOperand(1), m_APInt(ShAmt))) { + if (ShAmt->uge(TyBits)) + break; // Bad shift. + unsigned ShAmtLimited = ShAmt->getZExtValue(); + Tmp += ShAmtLimited; + if (Tmp > TyBits) Tmp = TyBits; + } + return Tmp; + } + case Instruction::Shl: { + const APInt *ShAmt; + if (match(U->getOperand(1), m_APInt(ShAmt))) { + // shl destroys sign bits. + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (ShAmt->uge(TyBits) || // Bad shift. + ShAmt->uge(Tmp)) break; // Shifted all sign bits out. + Tmp2 = ShAmt->getZExtValue(); + return Tmp - Tmp2; + } + break; + } + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: // NOT is handled here. + // Logical binary ops preserve the number of sign bits at the worst. + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (Tmp != 1) { + Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); + FirstAnswer = std::min(Tmp, Tmp2); + // We computed what we know about the sign bits as our first + // answer. Now proceed to the generic code that uses + // computeKnownBits, and pick whichever answer is better. + } + break; + + case Instruction::Select: { + // If we have a clamp pattern, we know that the number of sign bits will + // be the minimum of the clamp min/max range. + const Value *X; + const APInt *CLow, *CHigh; + if (isSignedMinMaxClamp(U, X, CLow, CHigh)) + return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits()); + + Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); + if (Tmp == 1) break; + Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q); + return std::min(Tmp, Tmp2); + } + + case Instruction::Add: + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (Tmp == 1) break; + + // Special case decrementing a value (ADD X, -1): + if (const auto *CRHS = dyn_cast(U->getOperand(1))) + if (CRHS->isAllOnesValue()) { + KnownBits Known(TyBits); + computeKnownBits(U->getOperand(0), Known, Depth + 1, Q); + + // If the input is known to be 0 or 1, the output is 0/-1, which is + // all sign bits set. + if ((Known.Zero | 1).isAllOnesValue()) + return TyBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (Known.isNonNegative()) + return Tmp; + } + + Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); + if (Tmp2 == 1) break; + return std::min(Tmp, Tmp2) - 1; + + case Instruction::Sub: + Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); + if (Tmp2 == 1) break; + + // Handle NEG. + if (const auto *CLHS = dyn_cast(U->getOperand(0))) + if (CLHS->isNullValue()) { + KnownBits Known(TyBits); + computeKnownBits(U->getOperand(1), Known, Depth + 1, Q); + // If the input is known to be 0 or 1, the output is 0/-1, which is + // all sign bits set. + if ((Known.Zero | 1).isAllOnesValue()) + return TyBits; + + // If the input is known to be positive (the sign bit is known clear), + // the output of the NEG has the same number of sign bits as the + // input. + if (Known.isNonNegative()) + return Tmp2; + + // Otherwise, we treat this like a SUB. + } + + // Sub can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (Tmp == 1) break; + return std::min(Tmp, Tmp2) - 1; + + case Instruction::Mul: { + // The output of the Mul can be at most twice the valid bits in the + // inputs. + unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (SignBitsOp0 == 1) break; + unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q); + if (SignBitsOp1 == 1) break; + unsigned OutValidBits = + (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1); + return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1; + } + + case Instruction::PHI: { + const PHINode *PN = cast(U); + unsigned NumIncomingValues = PN->getNumIncomingValues(); + // Don't analyze large in-degree PHIs. + if (NumIncomingValues > 4) break; + // Unreachable blocks may have zero-operand PHI nodes. + if (NumIncomingValues == 0) break; + + // Take the minimum of all incoming values. This can't infinitely loop + // because of our depth threshold. + Tmp = ComputeNumSignBits(PN->getIncomingValue(0), Depth + 1, Q); + for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) { + if (Tmp == 1) return Tmp; + Tmp = std::min( + Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, Q)); + } + return Tmp; + } + + case Instruction::Trunc: + // FIXME: it's tricky to do anything useful for this, but it is an + // important case for targets like X86. + break; + + case Instruction::ExtractElement: + // Look through extract element. At the moment we keep this simple and + // skip tracking the specific element. But at least we might find + // information valid for all elements of the vector (for example if vector + // is sign extended, shifted, etc). + return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + + case Instruction::ShuffleVector: { + // TODO: This is copied almost directly from the SelectionDAG version of + // ComputeNumSignBits. It would be better if we could share common + // code. If not, make sure that changes are translated to the DAG. + + // Collect the minimum number of sign bits that are shared by every vector + // element referenced by the shuffle. + auto *Shuf = cast(U); + int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements(); + int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements(); + APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); + for (int i = 0; i != NumMaskElts; ++i) { + int M = Shuf->getMaskValue(i); + assert(M < NumElts * 2 && "Invalid shuffle mask constant"); + // For undef elements, we don't know anything about the common state of + // the shuffle result. + if (M == -1) + return 1; + if (M < NumElts) + DemandedLHS.setBit(M % NumElts); + else + DemandedRHS.setBit(M % NumElts); + } + Tmp = std::numeric_limits::max(); + if (!!DemandedLHS) + Tmp = ComputeNumSignBits(Shuf->getOperand(0), Depth + 1, Q); + if (!!DemandedRHS) { + Tmp2 = ComputeNumSignBits(Shuf->getOperand(1), Depth + 1, Q); + Tmp = std::min(Tmp, Tmp2); + } + // If we don't know anything, early out and try computeKnownBits + // fall-back. + if (Tmp == 1) + break; + assert(Tmp <= V->getType()->getScalarSizeInBits() && + "Failed to determine minimum sign bits"); + return Tmp; + } + } } // Finally, if we can prove that the top bits of the result are 0's or 1's, @@ -2655,8 +2666,6 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, /// through SExt instructions only if LookThroughSExt is true. bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, bool LookThroughSExt, unsigned Depth) { - const unsigned MaxDepth = 6; - assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); assert(V->getType()->isIntegerTy() && "Not integer or pointer type!"); @@ -3651,23 +3660,28 @@ uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) { return Len == ~0ULL ? 1 : Len; } -const Value *llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call) { +const Value * +llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call, + bool MustPreserveNullness) { assert(Call && "getArgumentAliasingToReturnedPointer only works on nonnull calls"); if (const Value *RV = Call->getReturnedArgOperand()) return RV; // This can be used only as a aliasing property. - if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call)) + if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( + Call, MustPreserveNullness)) return Call->getArgOperand(0); return nullptr; } bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( - const CallBase *Call) { + const CallBase *Call, bool MustPreserveNullness) { return Call->getIntrinsicID() == Intrinsic::launder_invariant_group || Call->getIntrinsicID() == Intrinsic::strip_invariant_group || Call->getIntrinsicID() == Intrinsic::aarch64_irg || - Call->getIntrinsicID() == Intrinsic::aarch64_tagp; + Call->getIntrinsicID() == Intrinsic::aarch64_tagp || + (!MustPreserveNullness && + Call->getIntrinsicID() == Intrinsic::ptrmask); } /// \p PN defines a loop-variant pointer to an object. Check if the @@ -3725,7 +3739,7 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, // because it should be in sync with CaptureTracking. Not using it may // cause weird miscompilations where 2 aliasing pointers are assumed to // noalias. - if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) { + if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) { V = RP; continue; } @@ -3865,6 +3879,18 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { return true; } +bool llvm::mustSuppressSpeculation(const LoadInst &LI) { + if (!LI.isUnordered()) + return true; + const Function &F = *LI.getFunction(); + // Speculative load may create a race that did not exist in the source. + return F.hasFnAttribute(Attribute::SanitizeThread) || + // Speculative load may load data from dirty regions. + F.hasFnAttribute(Attribute::SanitizeAddress) || + F.hasFnAttribute(Attribute::SanitizeHWAddress); +} + + bool llvm::isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI, const DominatorTree *DT) { @@ -3909,17 +3935,12 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, } case Instruction::Load: { const LoadInst *LI = cast(Inst); - if (!LI->isUnordered() || - // Speculative load may create a race that did not exist in the source. - LI->getFunction()->hasFnAttribute(Attribute::SanitizeThread) || - // Speculative load may load data from dirty regions. - LI->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) || - LI->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress)) + if (mustSuppressSpeculation(*LI)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); - return isDereferenceableAndAlignedPointer(LI->getPointerOperand(), - LI->getType(), LI->getAlignment(), - DL, CtxI, DT); + return isDereferenceableAndAlignedPointer( + LI->getPointerOperand(), LI->getType(), MaybeAlign(LI->getAlignment()), + DL, CtxI, DT); } case Instruction::Call: { auto *CI = cast(Inst); @@ -4221,22 +4242,9 @@ OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS, } bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { - // A memory operation returns normally if it isn't volatile. A volatile - // operation is allowed to trap. - // - // An atomic operation isn't guaranteed to return in a reasonable amount of - // time because it's possible for another thread to interfere with it for an + // Note: An atomic operation isn't guaranteed to return in a reasonable amount + // of time because it's possible for another thread to interfere with it for an // arbitrary length of time, but programs aren't allowed to rely on that. - if (const LoadInst *LI = dyn_cast(I)) - return !LI->isVolatile(); - if (const StoreInst *SI = dyn_cast(I)) - return !SI->isVolatile(); - if (const AtomicCmpXchgInst *CXI = dyn_cast(I)) - return !CXI->isVolatile(); - if (const AtomicRMWInst *RMWI = dyn_cast(I)) - return !RMWI->isVolatile(); - if (const MemIntrinsic *MII = dyn_cast(I)) - return !MII->isVolatile(); // If there is no successor, then execution can't transfer to it. if (const auto *CRI = dyn_cast(I)) @@ -4277,10 +4285,7 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { // FIXME: This isn't aggressive enough; a call which only writes to a global // is guaranteed to return. - return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() || - match(I, m_Intrinsic()) || - match(I, m_Intrinsic()) || - match(I, m_Intrinsic()); + return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory(); } // Other instructions return normally. @@ -4572,12 +4577,12 @@ static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred, // TODO: Allow FP min/max with nnan/nsz. assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison"); - Value *A, *B; + Value *A = nullptr, *B = nullptr; SelectPatternResult L = matchSelectPattern(TVal, A, B, nullptr, Depth + 1); if (!SelectPatternResult::isMinOrMax(L.Flavor)) return {SPF_UNKNOWN, SPNB_NA, false}; - Value *C, *D; + Value *C = nullptr, *D = nullptr; SelectPatternResult R = matchSelectPattern(FVal, C, D, nullptr, Depth + 1); if (L.Flavor != R.Flavor) return {SPF_UNKNOWN, SPNB_NA, false}; @@ -5627,8 +5632,8 @@ static void setLimitsForIntrinsic(const IntrinsicInst &II, APInt &Lower, } static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower, - APInt &Upper) { - const Value *LHS, *RHS; + APInt &Upper, const InstrInfoQuery &IIQ) { + const Value *LHS = nullptr, *RHS = nullptr; SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS); if (R.Flavor == SPF_UNKNOWN) return; @@ -5640,7 +5645,8 @@ static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower, // then the result of abs(X) is [0..SIGNED_MAX], // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN. Lower = APInt::getNullValue(BitWidth); - if (cast(RHS)->hasNoSignedWrap()) + if (match(RHS, m_Neg(m_Specific(LHS))) && + IIQ.hasNoSignedWrap(cast(RHS))) Upper = APInt::getSignedMaxValue(BitWidth) + 1; else Upper = APInt::getSignedMinValue(BitWidth) + 1; @@ -5694,7 +5700,7 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) { else if (auto *II = dyn_cast(V)) setLimitsForIntrinsic(*II, Lower, Upper); else if (auto *SI = dyn_cast(V)) - setLimitsForSelectPattern(*SI, Lower, Upper); + setLimitsForSelectPattern(*SI, Lower, Upper, IIQ); ConstantRange CR = ConstantRange::getNonEmpty(Lower, Upper); @@ -5704,3 +5710,111 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) { return CR; } + +static Optional +getOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, const DataLayout &DL) { + // Skip over the first indices. + gep_type_iterator GTI = gep_type_begin(GEP); + for (unsigned i = 1; i != Idx; ++i, ++GTI) + /*skip along*/; + + // Compute the offset implied by the rest of the indices. + int64_t Offset = 0; + for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { + ConstantInt *OpC = dyn_cast(GEP->getOperand(i)); + if (!OpC) + return None; + if (OpC->isZero()) + continue; // No offset. + + // Handle struct indices, which add their field offset to the pointer. + if (StructType *STy = GTI.getStructTypeOrNull()) { + Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + continue; + } + + // Otherwise, we have a sequential type like an array or vector. Multiply + // the index by the ElementSize. + uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()); + Offset += Size * OpC->getSExtValue(); + } + + return Offset; +} + +Optional llvm::isPointerOffset(const Value *Ptr1, const Value *Ptr2, + const DataLayout &DL) { + Ptr1 = Ptr1->stripPointerCasts(); + Ptr2 = Ptr2->stripPointerCasts(); + + // Handle the trivial case first. + if (Ptr1 == Ptr2) { + return 0; + } + + const GEPOperator *GEP1 = dyn_cast(Ptr1); + const GEPOperator *GEP2 = dyn_cast(Ptr2); + + // If one pointer is a GEP see if the GEP is a constant offset from the base, + // as in "P" and "gep P, 1". + // Also do this iteratively to handle the the following case: + // Ptr_t1 = GEP Ptr1, c1 + // Ptr_t2 = GEP Ptr_t1, c2 + // Ptr2 = GEP Ptr_t2, c3 + // where we will return c1+c2+c3. + // TODO: Handle the case when both Ptr1 and Ptr2 are GEPs of some common base + // -- replace getOffsetFromBase with getOffsetAndBase, check that the bases + // are the same, and return the difference between offsets. + auto getOffsetFromBase = [&DL](const GEPOperator *GEP, + const Value *Ptr) -> Optional { + const GEPOperator *GEP_T = GEP; + int64_t OffsetVal = 0; + bool HasSameBase = false; + while (GEP_T) { + auto Offset = getOffsetFromIndex(GEP_T, 1, DL); + if (!Offset) + return None; + OffsetVal += *Offset; + auto Op0 = GEP_T->getOperand(0)->stripPointerCasts(); + if (Op0 == Ptr) { + HasSameBase = true; + break; + } + GEP_T = dyn_cast(Op0); + } + if (!HasSameBase) + return None; + return OffsetVal; + }; + + if (GEP1) { + auto Offset = getOffsetFromBase(GEP1, Ptr2); + if (Offset) + return -*Offset; + } + if (GEP2) { + auto Offset = getOffsetFromBase(GEP2, Ptr1); + if (Offset) + return Offset; + } + + // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical + // base. After that base, they may have some number of common (and + // potentially variable) indices. After that they handle some constant + // offset, which determines their offset from each other. At this point, we + // handle no other case. + if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) + return None; + + // Skip any common indices and track the GEP types. + unsigned Idx = 1; + for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx) + if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) + break; + + auto Offset1 = getOffsetFromIndex(GEP1, Idx, DL); + auto Offset2 = getOffsetFromIndex(GEP2, Idx, DL); + if (!Offset1 || !Offset2) + return None; + return *Offset2 - *Offset1; +} diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp index 986756eb262..600f57ab9d7 100644 --- a/lib/Analysis/VectorUtils.cpp +++ b/lib/Analysis/VectorUtils.cpp @@ -56,6 +56,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::smul_fix: case Intrinsic::smul_fix_sat: case Intrinsic::umul_fix: + case Intrinsic::umul_fix_sat: case Intrinsic::sqrt: // Begin floating-point. case Intrinsic::sin: case Intrinsic::cos: @@ -98,6 +99,7 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, case Intrinsic::smul_fix: case Intrinsic::smul_fix_sat: case Intrinsic::umul_fix: + case Intrinsic::umul_fix_sat: return (ScalarOpdIdx == 2); default: return false; @@ -830,15 +832,15 @@ void InterleavedAccessInfo::collectConstStrideAccesses( /*Assume=*/true, /*ShouldCheckWrap=*/false); const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); - PointerType *PtrTy = dyn_cast(Ptr->getType()); + PointerType *PtrTy = cast(Ptr->getType()); uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType()); // An alignment of 0 means target ABI alignment. - unsigned Align = getLoadStoreAlignment(&I); - if (!Align) - Align = DL.getABITypeAlignment(PtrTy->getElementType()); + MaybeAlign Alignment = MaybeAlign(getLoadStoreAlignment(&I)); + if (!Alignment) + Alignment = Align(DL.getABITypeAlignment(PtrTy->getElementType())); - AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align); + AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, *Alignment); } } @@ -925,7 +927,7 @@ void InterleavedAccessInfo::analyzeInterleaving( if (!Group) { LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B << '\n'); - Group = createInterleaveGroup(B, DesB.Stride, DesB.Align); + Group = createInterleaveGroup(B, DesB.Stride, DesB.Alignment); } if (B->mayWriteToMemory()) StoreGroups.insert(Group); @@ -964,6 +966,10 @@ void InterleavedAccessInfo::analyzeInterleaving( // instructions that precede it. if (isInterleaved(A)) { InterleaveGroup *StoreGroup = getInterleaveGroup(A); + + LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to " + "dependence between " << *A << " and "<< *B << '\n'); + StoreGroups.remove(StoreGroup); releaseGroup(StoreGroup); } @@ -1028,7 +1034,7 @@ void InterleavedAccessInfo::analyzeInterleaving( Group->getIndex(B) + DistanceToB / static_cast(DesB.Size); // Try to insert A into B's group. - if (Group->insertMember(A, IndexA, DesA.Align)) { + if (Group->insertMember(A, IndexA, DesA.Alignment)) { LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n' << " into the interleave group with" << *B << '\n'); diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 72d2357c293..5292b0e6274 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -622,6 +622,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(amdgpu_ps); KEYWORD(amdgpu_cs); KEYWORD(amdgpu_kernel); + KEYWORD(tailcc); KEYWORD(cc); KEYWORD(c); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 87dff6468f2..594537307d0 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -1122,7 +1122,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, if (ParseToken(lltok::StringConstant, "expected partition string")) return true; } else if (Lex.getKind() == lltok::kw_align) { - unsigned Alignment; + MaybeAlign Alignment; if (ParseOptionalAlignment(Alignment)) return true; GV->setAlignment(Alignment); } else if (Lex.getKind() == lltok::MetadataVar) { @@ -1229,12 +1229,13 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, // As a hack, we allow function alignment to be initially parsed as an // attribute on a function declaration/definition or added to an attribute // group and later moved to the alignment field. - unsigned Alignment; + MaybeAlign Alignment; if (inAttrGrp) { Lex.Lex(); - if (ParseToken(lltok::equal, "expected '=' here") || - ParseUInt32(Alignment)) + uint32_t Value = 0; + if (ParseToken(lltok::equal, "expected '=' here") || ParseUInt32(Value)) return true; + Alignment = Align(Value); } else { if (ParseOptionalAlignment(Alignment)) return true; @@ -1603,7 +1604,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { continue; } case lltok::kw_align: { - unsigned Alignment; + MaybeAlign Alignment; if (ParseOptionalAlignment(Alignment)) return true; B.addAlignmentAttr(Alignment); @@ -1720,7 +1721,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { continue; } case lltok::kw_align: { - unsigned Alignment; + MaybeAlign Alignment; if (ParseOptionalAlignment(Alignment)) return true; B.addAlignmentAttr(Alignment); @@ -1955,6 +1956,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'amdgpu_ps' /// ::= 'amdgpu_cs' /// ::= 'amdgpu_kernel' +/// ::= 'tailcc' /// ::= 'cc' UINT /// bool LLParser::ParseOptionalCallingConv(unsigned &CC) { @@ -2000,6 +2002,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) { case lltok::kw_amdgpu_ps: CC = CallingConv::AMDGPU_PS; break; case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break; case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break; + case lltok::kw_tailcc: CC = CallingConv::Tail; break; case lltok::kw_cc: { Lex.Lex(); return ParseUInt32(CC); @@ -2067,16 +2070,19 @@ bool LLParser::ParseOptionalFunctionMetadata(Function &F) { /// ParseOptionalAlignment /// ::= /* empty */ /// ::= 'align' 4 -bool LLParser::ParseOptionalAlignment(unsigned &Alignment) { - Alignment = 0; +bool LLParser::ParseOptionalAlignment(MaybeAlign &Alignment) { + Alignment = None; if (!EatIfPresent(lltok::kw_align)) return false; LocTy AlignLoc = Lex.getLoc(); - if (ParseUInt32(Alignment)) return true; - if (!isPowerOf2_32(Alignment)) + uint32_t Value = 0; + if (ParseUInt32(Value)) + return true; + if (!isPowerOf2_32(Value)) return Error(AlignLoc, "alignment is not a power of two"); - if (Alignment > Value::MaximumAlignment) + if (Value > Value::MaximumAlignment) return Error(AlignLoc, "huge alignments are not supported yet"); + Alignment = Align(Value); return false; } @@ -2113,7 +2119,7 @@ bool LLParser::ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, /// /// This returns with AteExtraComma set to true if it ate an excess comma at the /// end. -bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment, +bool LLParser::ParseOptionalCommaAlign(MaybeAlign &Alignment, bool &AteExtraComma) { AteExtraComma = false; while (EatIfPresent(lltok::comma)) { @@ -2551,6 +2557,7 @@ bool LLParser::ParseOptionalOperandBundles( /// bool LLParser::ParseArgumentList(SmallVectorImpl &ArgList, bool &isVarArg){ + unsigned CurValID = 0; isVarArg = false; assert(Lex.getKind() == lltok::lparen); Lex.Lex(); // eat the (. @@ -2575,6 +2582,12 @@ bool LLParser::ParseArgumentList(SmallVectorImpl &ArgList, if (Lex.getKind() == lltok::LocalVar) { Name = Lex.getStrVal(); Lex.Lex(); + } else if (Lex.getKind() == lltok::LocalVarID) { + if (Lex.getUIntVal() != CurValID) + return Error(TypeLoc, "argument expected to be numbered '%" + + Twine(CurValID) + "'"); + ++CurValID; + Lex.Lex(); } if (!FunctionType::isValidArgumentType(ArgTy)) @@ -2602,6 +2615,13 @@ bool LLParser::ParseArgumentList(SmallVectorImpl &ArgList, Name = Lex.getStrVal(); Lex.Lex(); } else { + if (Lex.getKind() == lltok::LocalVarID) { + if (Lex.getUIntVal() != CurValID) + return Error(TypeLoc, "argument expected to be numbered '%" + + Twine(CurValID) + "'"); + Lex.Lex(); + } + ++CurValID; Name = ""; } @@ -3093,7 +3113,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { ParseToken(lltok::rbrace, "expected end of struct constant")) return true; - ID.ConstantStructElts = make_unique(Elts.size()); + ID.ConstantStructElts = std::make_unique(Elts.size()); ID.UIntVal = Elts.size(); memcpy(ID.ConstantStructElts.get(), Elts.data(), Elts.size() * sizeof(Elts[0])); @@ -3115,7 +3135,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { return true; if (isPackedStruct) { - ID.ConstantStructElts = make_unique(Elts.size()); + ID.ConstantStructElts = std::make_unique(Elts.size()); memcpy(ID.ConstantStructElts.get(), Elts.data(), Elts.size() * sizeof(Elts[0])); ID.UIntVal = Elts.size(); @@ -5354,7 +5374,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { LocTy BuiltinLoc; std::string Section; std::string Partition; - unsigned Alignment; + MaybeAlign Alignment; std::string GC; GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None; unsigned AddrSpace = 0; @@ -5471,7 +5491,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { Fn->setCallingConv(CC); Fn->setAttributes(PAL); Fn->setUnnamedAddr(UnnamedAddr); - Fn->setAlignment(Alignment); + Fn->setAlignment(MaybeAlign(Alignment)); Fn->setSection(Section); Fn->setPartition(Partition); Fn->setComdat(C); @@ -5788,7 +5808,19 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_extractelement: return ParseExtractElement(Inst, PFS); case lltok::kw_insertelement: return ParseInsertElement(Inst, PFS); case lltok::kw_shufflevector: return ParseShuffleVector(Inst, PFS); - case lltok::kw_phi: return ParsePHI(Inst, PFS); + case lltok::kw_phi: { + FastMathFlags FMF = EatFastMathFlagsIfPresent(); + int Res = ParsePHI(Inst, PFS); + if (Res != 0) + return Res; + if (FMF.any()) { + if (!Inst->getType()->isFPOrFPVectorTy()) + return Error(Loc, "fast-math-flags specified for phi without " + "floating-point scalar or vector return type"); + Inst->setFastMathFlags(FMF); + } + return 0; + } case lltok::kw_landingpad: return ParseLandingPad(Inst, PFS); // Call. case lltok::kw_call: return ParseCall(Inst, PFS, CallInst::TCK_None); @@ -6837,7 +6869,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) { Value *Size = nullptr; LocTy SizeLoc, TyLoc, ASLoc; - unsigned Alignment = 0; + MaybeAlign Alignment; unsigned AddrSpace = 0; Type *Ty = nullptr; @@ -6885,7 +6917,8 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) { if (Size && !Size->getType()->isIntegerTy()) return Error(SizeLoc, "element count must have integer type"); - AllocaInst *AI = new AllocaInst(Ty, AddrSpace, Size, Alignment); + AllocaInst *AI = + new AllocaInst(Ty, AddrSpace, Size, Alignment ? Alignment->value() : 0); AI->setUsedWithInAlloca(IsInAlloca); AI->setSwiftError(IsSwiftError); Inst = AI; @@ -6898,7 +6931,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) { /// 'singlethread'? AtomicOrdering (',' 'align' i32)? int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { Value *Val; LocTy Loc; - unsigned Alignment = 0; + MaybeAlign Alignment; bool AteExtraComma = false; bool isAtomic = false; AtomicOrdering Ordering = AtomicOrdering::NotAtomic; @@ -6947,7 +6980,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { /// 'singlethread'? AtomicOrdering (',' 'align' i32)? int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) { Value *Val, *Ptr; LocTy Loc, PtrLoc; - unsigned Alignment = 0; + MaybeAlign Alignment; bool AteExtraComma = false; bool isAtomic = false; AtomicOrdering Ordering = AtomicOrdering::NotAtomic; @@ -8074,7 +8107,7 @@ bool LLParser::ParseFunctionSummary(std::string Name, GlobalValue::GUID GUID, if (ParseToken(lltok::rparen, "expected ')' here")) return true; - auto FS = llvm::make_unique( + auto FS = std::make_unique( GVFlags, InstCount, FFlags, /*EntryCount=*/0, std::move(Refs), std::move(Calls), std::move(TypeIdInfo.TypeTests), std::move(TypeIdInfo.TypeTestAssumeVCalls), @@ -8134,7 +8167,7 @@ bool LLParser::ParseVariableSummary(std::string Name, GlobalValue::GUID GUID, return true; auto GS = - llvm::make_unique(GVFlags, GVarFlags, std::move(Refs)); + std::make_unique(GVFlags, GVarFlags, std::move(Refs)); GS->setModulePath(ModulePath); GS->setVTableFuncs(std::move(VTableFuncs)); @@ -8175,7 +8208,7 @@ bool LLParser::ParseAliasSummary(std::string Name, GlobalValue::GUID GUID, if (ParseToken(lltok::rparen, "expected ')' here")) return true; - auto AS = llvm::make_unique(GVFlags); + auto AS = std::make_unique(GVFlags); AS->setModulePath(ModulePath); diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 610e2e26200..abc423b4e3c 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -281,14 +281,14 @@ namespace llvm { void ParseOptionalVisibility(unsigned &Res); void ParseOptionalDLLStorageClass(unsigned &Res); bool ParseOptionalCallingConv(unsigned &CC); - bool ParseOptionalAlignment(unsigned &Alignment); + bool ParseOptionalAlignment(MaybeAlign &Alignment); bool ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes); bool ParseScopeAndOrdering(bool isAtomic, SyncScope::ID &SSID, AtomicOrdering &Ordering); bool ParseScope(SyncScope::ID &SSID); bool ParseOrdering(AtomicOrdering &Ordering); bool ParseOptionalStackAlignment(unsigned &Alignment); - bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma); + bool ParseOptionalCommaAlign(MaybeAlign &Alignment, bool &AteExtraComma); bool ParseOptionalCommaAddrSpace(unsigned &AddrSpace, LocTy &Loc, bool &AteExtraComma); bool ParseOptionalCommaInAlloca(bool &IsInAlloca); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 0e9ba4db474..f49feb2dc14 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -168,6 +168,7 @@ enum Kind { kw_amdgpu_ps, kw_amdgpu_cs, kw_amdgpu_kernel, + kw_tailcc, // Attributes: kw_attributes, diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp index b13c6237f41..b7f552a6fcc 100644 --- a/lib/AsmParser/Parser.cpp +++ b/lib/AsmParser/Parser.cpp @@ -42,7 +42,7 @@ llvm::parseAssembly(MemoryBufferRef F, SMDiagnostic &Err, LLVMContext &Context, SlotMapping *Slots, bool UpgradeDebugInfo, StringRef DataLayoutString) { std::unique_ptr M = - make_unique(F.getBufferIdentifier(), Context); + std::make_unique(F.getBufferIdentifier(), Context); if (parseAssemblyInto(F, M.get(), nullptr, Err, Slots, UpgradeDebugInfo, DataLayoutString)) @@ -71,9 +71,9 @@ ParsedModuleAndIndex llvm::parseAssemblyWithIndex( MemoryBufferRef F, SMDiagnostic &Err, LLVMContext &Context, SlotMapping *Slots, bool UpgradeDebugInfo, StringRef DataLayoutString) { std::unique_ptr M = - make_unique(F.getBufferIdentifier(), Context); + std::make_unique(F.getBufferIdentifier(), Context); std::unique_ptr Index = - make_unique(/*HaveGVs=*/true); + std::make_unique(/*HaveGVs=*/true); if (parseAssemblyInto(F, M.get(), Index.get(), Err, Slots, UpgradeDebugInfo, DataLayoutString)) @@ -123,7 +123,7 @@ static bool parseSummaryIndexAssemblyInto(MemoryBufferRef F, std::unique_ptr llvm::parseSummaryIndexAssembly(MemoryBufferRef F, SMDiagnostic &Err) { std::unique_ptr Index = - make_unique(/*HaveGVs=*/false); + std::make_unique(/*HaveGVs=*/false); if (parseSummaryIndexAssemblyInto(F, *Index, Err)) return nullptr; diff --git a/lib/BinaryFormat/Dwarf.cpp b/lib/BinaryFormat/Dwarf.cpp index eb6bd33ce58..d06cccdf0df 100644 --- a/lib/BinaryFormat/Dwarf.cpp +++ b/lib/BinaryFormat/Dwarf.cpp @@ -22,7 +22,7 @@ StringRef llvm::dwarf::TagString(unsigned Tag) { switch (Tag) { default: return StringRef(); -#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) \ case DW_TAG_##NAME: \ return "DW_TAG_" #NAME; #include "llvm/BinaryFormat/Dwarf.def" @@ -31,7 +31,7 @@ StringRef llvm::dwarf::TagString(unsigned Tag) { unsigned llvm::dwarf::getTag(StringRef TagString) { return StringSwitch(TagString) -#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) \ .Case("DW_TAG_" #NAME, DW_TAG_##NAME) #include "llvm/BinaryFormat/Dwarf.def" .Default(DW_TAG_invalid); @@ -41,7 +41,7 @@ unsigned llvm::dwarf::TagVersion(dwarf::Tag Tag) { switch (Tag) { default: return 0; -#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) \ case DW_TAG_##NAME: \ return VERSION; #include "llvm/BinaryFormat/Dwarf.def" @@ -52,7 +52,7 @@ unsigned llvm::dwarf::TagVendor(dwarf::Tag Tag) { switch (Tag) { default: return 0; -#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR, KIND) \ case DW_TAG_##NAME: \ return DWARF_VENDOR_##VENDOR; #include "llvm/BinaryFormat/Dwarf.def" @@ -149,6 +149,8 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) { return "DW_OP_LLVM_fragment"; case DW_OP_LLVM_tag_offset: return "DW_OP_LLVM_tag_offset"; + case DW_OP_LLVM_entry_value: + return "DW_OP_LLVM_entry_value"; } } @@ -160,6 +162,7 @@ unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) { .Case("DW_OP_LLVM_convert", DW_OP_LLVM_convert) .Case("DW_OP_LLVM_fragment", DW_OP_LLVM_fragment) .Case("DW_OP_LLVM_tag_offset", DW_OP_LLVM_tag_offset) + .Case("DW_OP_LLVM_entry_value", DW_OP_LLVM_entry_value) .Default(0); } @@ -472,6 +475,17 @@ StringRef llvm::dwarf::RangeListEncodingString(unsigned Encoding) { } } +StringRef llvm::dwarf::LocListEncodingString(unsigned Encoding) { + switch (Encoding) { + default: + return StringRef(); +#define HANDLE_DW_LLE(ID, NAME) \ + case DW_LLE_##NAME: \ + return "DW_LLE_" #NAME; +#include "llvm/BinaryFormat/Dwarf.def" + } +} + StringRef llvm::dwarf::CallFrameString(unsigned Encoding, Triple::ArchType Arch) { assert(Arch != llvm::Triple::ArchType::UnknownArch); diff --git a/lib/BinaryFormat/Magic.cpp b/lib/BinaryFormat/Magic.cpp index 7dfe23690a5..bbcbbabeded 100644 --- a/lib/BinaryFormat/Magic.cpp +++ b/lib/BinaryFormat/Magic.cpp @@ -210,6 +210,11 @@ file_magic llvm::identify_magic(StringRef Magic) { return file_magic::coff_object; break; + case 0x2d: // YAML '-' + if (startswith(Magic, "--- !tapi") || startswith(Magic, "---\narchs:")) + return file_magic::tapi_file; + break; + default: break; } diff --git a/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/lib/Bitcode/Reader/BitcodeAnalyzer.cpp index 9c30d563a31..e70caa83c8c 100644 --- a/lib/Bitcode/Reader/BitcodeAnalyzer.cpp +++ b/lib/Bitcode/Reader/BitcodeAnalyzer.cpp @@ -434,6 +434,13 @@ static Expected ReadSignature(BitstreamCursor &Stream) { return std::move(Err); if (Signature[2] == 'A' && Signature[3] == 'G') return ClangSerializedDiagnosticsBitstream; + } else if (Signature[0] == 'R' && Signature[1] == 'M') { + if (Error Err = tryRead(Signature[2], 8)) + return std::move(Err); + if (Error Err = tryRead(Signature[3], 8)) + return std::move(Err); + if (Signature[2] == 'R' && Signature[3] == 'K') + return LLVMBitstreamRemarks; } else { if (Error Err = tryRead(Signature[2], 4)) return std::move(Err); @@ -627,6 +634,9 @@ void BitcodeAnalyzer::printStats(BCDumpOptions O, case ClangSerializedDiagnosticsBitstream: O.OS << "Clang Serialized Diagnostics\n"; break; + case LLVMBitstreamRemarks: + O.OS << "LLVM Remarks\n"; + break; } O.OS << " # Toplevel Blocks: " << NumTopBlocks << "\n"; O.OS << "\n"; diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 29dc7f61639..15eead1de31 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -722,7 +722,7 @@ private: /// Converts alignment exponent (i.e. power of two (or zero)) to the /// corresponding alignment to use. If alignment is too large, returns /// a corresponding error code. - Error parseAlignmentValue(uint64_t Exponent, unsigned &Alignment); + Error parseAlignmentValue(uint64_t Exponent, MaybeAlign &Alignment); Error parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); Error parseModule(uint64_t ResumeBit, bool ShouldLazyLoadMetadata = false); @@ -1063,7 +1063,7 @@ static int getDecodedUnaryOpcode(unsigned Val, Type *Ty) { switch (Val) { default: return -1; - case bitc::UNOP_NEG: + case bitc::UNOP_FNEG: return IsFP ? Instruction::FNeg : -1; } } @@ -1544,12 +1544,12 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { } Error BitcodeReader::parseAlignmentValue(uint64_t Exponent, - unsigned &Alignment) { + MaybeAlign &Alignment) { // Note: Alignment in bitcode files is incremented by 1, so that zero // can be used for default alignment. if (Exponent > Value::MaxAlignmentExponent + 1) return error("Invalid alignment value"); - Alignment = (1 << static_cast(Exponent)) >> 1; + Alignment = decodeMaybeAlign(Exponent); return Error::success(); } @@ -2377,6 +2377,8 @@ Error BitcodeReader::parseConstants() { CurTy = flattenPointerTypes(CurFullTy); continue; // Skip the ValueList manipulation. case bitc::CST_CODE_NULL: // NULL + if (CurTy->isVoidTy() || CurTy->isFunctionTy() || CurTy->isLabelTy()) + return error("Invalid type for a constant null value"); V = Constant::getNullValue(CurTy); break; case bitc::CST_CODE_INTEGER: // INTEGER: [intval] @@ -3110,7 +3112,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef Record) { uint64_t RawLinkage = Record[3]; GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage); - unsigned Alignment; + MaybeAlign Alignment; if (Error Err = parseAlignmentValue(Record[4], Alignment)) return Err; std::string Section; @@ -3241,7 +3243,7 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef Record) { Context, getPointerElementFlatType(PTy))); } - unsigned Alignment; + MaybeAlign Alignment; if (Error Err = parseAlignmentValue(Record[5], Alignment)) return Err; Func->setAlignment(Alignment); @@ -3646,6 +3648,11 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit, break; } Record.clear(); + + // Upgrade data layout string. + std::string DL = llvm::UpgradeDataLayoutString( + TheModule->getDataLayoutStr(), TheModule->getTargetTriple()); + TheModule->setDataLayout(DL); } } @@ -4622,31 +4629,48 @@ Error BitcodeReader::parseFunctionBody(Function *F) { InstructionList.push_back(I); break; case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...] - if (Record.size() < 1 || ((Record.size()-1)&1)) + if (Record.size() < 1) return error("Invalid record"); + // The first record specifies the type. FullTy = getFullyStructuredTypeByID(Record[0]); Type *Ty = flattenPointerTypes(FullTy); if (!Ty) return error("Invalid record"); - PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2); + // Phi arguments are pairs of records of [value, basic block]. + // There is an optional final record for fast-math-flags if this phi has a + // floating-point type. + size_t NumArgs = (Record.size() - 1) / 2; + if ((Record.size() - 1) % 2 == 1 && !Ty->isFPOrFPVectorTy()) + return error("Invalid record"); + + PHINode *PN = PHINode::Create(Ty, NumArgs); InstructionList.push_back(PN); - for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) { + for (unsigned i = 0; i != NumArgs; i++) { Value *V; // With the new function encoding, it is possible that operands have // negative IDs (for forward references). Use a signed VBR // representation to keep the encoding small. if (UseRelativeIDs) - V = getValueSigned(Record, 1+i, NextValueNo, Ty); + V = getValueSigned(Record, i * 2 + 1, NextValueNo, Ty); else - V = getValue(Record, 1+i, NextValueNo, Ty); - BasicBlock *BB = getBasicBlock(Record[2+i]); + V = getValue(Record, i * 2 + 1, NextValueNo, Ty); + BasicBlock *BB = getBasicBlock(Record[i * 2 + 2]); if (!V || !BB) return error("Invalid record"); PN->addIncoming(V, BB); } I = PN; + + // If there are an even number of records, the final record must be FMF. + if (Record.size() % 2 == 0) { + assert(isa(I) && "Unexpected phi type"); + FastMathFlags FMF = getDecodedFastMathFlags(Record[Record.size() - 1]); + if (FMF.any()) + I->setFastMathFlags(FMF); + } + break; } @@ -4726,7 +4750,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { } Type *OpTy = getTypeByID(Record[1]); Value *Size = getFnValueByID(Record[2], OpTy); - unsigned Align; + MaybeAlign Align; if (Error Err = parseAlignmentValue(AlignRecord & ~FlagMask, Align)) { return Err; } @@ -4737,7 +4761,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { const DataLayout &DL = TheModule->getDataLayout(); unsigned AS = DL.getAllocaAddrSpace(); - AllocaInst *AI = new AllocaInst(Ty, AS, Size, Align); + AllocaInst *AI = new AllocaInst(Ty, AS, Size, Align ? Align->value() : 0); AI->setUsedWithInAlloca(InAlloca); AI->setSwiftError(SwiftError); I = AI; @@ -4765,7 +4789,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType())) return Err; - unsigned Align; + MaybeAlign Align; if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; I = new LoadInst(Ty, Op, "", Record[OpNum + 1], Align); @@ -4802,7 +4826,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { return error("Invalid record"); SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]); - unsigned Align; + MaybeAlign Align; if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; I = new LoadInst(Ty, Op, "", Record[OpNum + 1], Align, Ordering, SSID); @@ -4824,10 +4848,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (Error Err = typeCheckLoadStoreInst(Val->getType(), Ptr->getType())) return Err; - unsigned Align; + MaybeAlign Align; if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; - I = new StoreInst(Val, Ptr, Record[OpNum+1], Align); + I = new StoreInst(Val, Ptr, Record[OpNum + 1], Align); InstructionList.push_back(I); break; } @@ -4857,10 +4881,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (Ordering != AtomicOrdering::NotAtomic && Record[OpNum] == 0) return error("Invalid record"); - unsigned Align; + MaybeAlign Align; if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; - I = new StoreInst(Val, Ptr, Record[OpNum+1], Align, Ordering, SSID); + I = new StoreInst(Val, Ptr, Record[OpNum + 1], Align, Ordering, SSID); InstructionList.push_back(I); break; } @@ -5312,7 +5336,7 @@ Error BitcodeReader::materializeModule() { UpgradeModuleFlags(*TheModule); - UpgradeRetainReleaseMarker(*TheModule); + UpgradeARCRuntime(*TheModule); return Error::success(); } @@ -5874,7 +5898,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { ArrayRef(Record).slice(CallGraphEdgeStartIndex), IsOldProfileFormat, HasProfile, HasRelBF); setSpecialRefs(Refs, NumRORefs, NumWORefs); - auto FS = llvm::make_unique( + auto FS = std::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), /*EntryCount=*/0, std::move(Refs), std::move(Calls), std::move(PendingTypeTests), std::move(PendingTypeTestAssumeVCalls), @@ -5900,7 +5924,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { uint64_t RawFlags = Record[1]; unsigned AliaseeID = Record[2]; auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); - auto AS = llvm::make_unique(Flags); + auto AS = std::make_unique(Flags); // The module path string ref set in the summary must be owned by the // index's module string table. Since we don't have a module path // string table section in the per-module index, we create a single @@ -5934,7 +5958,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { std::vector Refs = makeRefList(ArrayRef(Record).slice(RefArrayStart)); auto FS = - llvm::make_unique(Flags, GVF, std::move(Refs)); + std::make_unique(Flags, GVF, std::move(Refs)); FS->setModulePath(getThisModule()->first()); auto GUID = getValueInfoFromValueId(ValueID); FS->setOriginalName(GUID.second); @@ -5961,7 +5985,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { VTableFuncs.push_back({Callee, Offset}); } auto VS = - llvm::make_unique(Flags, GVF, std::move(Refs)); + std::make_unique(Flags, GVF, std::move(Refs)); VS->setModulePath(getThisModule()->first()); VS->setVTableFuncs(VTableFuncs); auto GUID = getValueInfoFromValueId(ValueID); @@ -6019,7 +6043,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { IsOldProfileFormat, HasProfile, false); ValueInfo VI = getValueInfoFromValueId(ValueID).first; setSpecialRefs(Refs, NumRORefs, NumWORefs); - auto FS = llvm::make_unique( + auto FS = std::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), EntryCount, std::move(Refs), std::move(Edges), std::move(PendingTypeTests), std::move(PendingTypeTestAssumeVCalls), @@ -6046,7 +6070,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { uint64_t RawFlags = Record[2]; unsigned AliaseeValueId = Record[3]; auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); - auto AS = llvm::make_unique(Flags); + auto AS = std::make_unique(Flags); LastSeenSummary = AS.get(); AS->setModulePath(ModuleIdMap[ModuleId]); @@ -6075,7 +6099,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { std::vector Refs = makeRefList(ArrayRef(Record).slice(RefArrayStart)); auto FS = - llvm::make_unique(Flags, GVF, std::move(Refs)); + std::make_unique(Flags, GVF, std::move(Refs)); LastSeenSummary = FS.get(); FS->setModulePath(ModuleIdMap[ModuleId]); ValueInfo VI = getValueInfoFromValueId(ValueID).first; @@ -6438,7 +6462,7 @@ BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll, Context); std::unique_ptr M = - llvm::make_unique(ModuleIdentifier, Context); + std::make_unique(ModuleIdentifier, Context); M->setMaterializer(R); // Delay parsing Metadata if ShouldLazyLoadMetadata is true. @@ -6485,7 +6509,7 @@ Expected> BitcodeModule::getSummary() { if (Error JumpFailed = Stream.JumpToBit(ModuleBit)) return std::move(JumpFailed); - auto Index = llvm::make_unique(/*HaveGVs=*/false); + auto Index = std::make_unique(/*HaveGVs=*/false); ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, *Index, ModuleIdentifier, 0); diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp index 108f7118958..4da51dda8b7 100644 --- a/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/lib/Bitcode/Reader/MetadataLoader.cpp @@ -515,7 +515,7 @@ class MetadataLoader::MetadataLoaderImpl { GV.getMetadata(LLVMContext::MD_dbg, MDs); GV.eraseMetadata(LLVMContext::MD_dbg); for (auto *MD : MDs) - if (auto *DGV = dyn_cast_or_null(MD)) { + if (auto *DGV = dyn_cast(MD)) { auto *DGVE = DIGlobalVariableExpression::getDistinct( Context, DGV, DIExpression::get(Context, {})); GV.addMetadata(LLVMContext::MD_dbg, *DGVE); @@ -987,7 +987,7 @@ void MetadataLoader::MetadataLoaderImpl::lazyLoadOneMetadata( assert(ID >= MDStringRef.size() && "Unexpected lazy-loading of MDString"); // Lookup first if the metadata hasn't already been loaded. if (auto *MD = MetadataList.lookup(ID)) { - auto *N = dyn_cast_or_null(MD); + auto *N = cast(MD); if (!N->isTemporary()) return; } @@ -2133,7 +2133,7 @@ MetadataLoader::MetadataLoader(BitstreamCursor &Stream, Module &TheModule, BitcodeReaderValueList &ValueList, bool IsImporting, std::function getTypeByID) - : Pimpl(llvm::make_unique( + : Pimpl(std::make_unique( Stream, TheModule, ValueList, std::move(getTypeByID), IsImporting)) {} Error MetadataLoader::parseMetadata(bool ModuleLevel) { diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp index 76ca89147e5..be59c1f9283 100644 --- a/lib/Bitcode/Writer/BitWriter.cpp +++ b/lib/Bitcode/Writer/BitWriter.cpp @@ -19,7 +19,7 @@ using namespace llvm; int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) { std::error_code EC; - raw_fd_ostream OS(Path, EC, sys::fs::F_None); + raw_fd_ostream OS(Path, EC, sys::fs::OF_None); if (EC) return -1; diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 5c7b970a3a7..deb4019ea8b 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -86,7 +86,7 @@ static cl::opt cl::desc("Number of metadatas above which we emit an index " "to enable lazy-loading")); -cl::opt WriteRelBFToSummary( +static cl::opt WriteRelBFToSummary( "write-relbf-to-summary", cl::Hidden, cl::init(false), cl::desc("Write relative block frequency to function summary ")); @@ -520,7 +520,7 @@ static unsigned getEncodedCastOpcode(unsigned Opcode) { static unsigned getEncodedUnaryOpcode(unsigned Opcode) { switch (Opcode) { default: llvm_unreachable("Unknown binary instruction!"); - case Instruction::FNeg: return bitc::UNOP_NEG; + case Instruction::FNeg: return bitc::UNOP_FNEG; } } @@ -2880,6 +2880,11 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, pushValueSigned(PN.getIncomingValue(i), InstID, Vals64); Vals64.push_back(VE.getValueID(PN.getIncomingBlock(i))); } + + uint64_t Flags = getOptimizationFlags(&I); + if (Flags != 0) + Vals64.push_back(Flags); + // Emit a Vals64 vector and exit. Stream.EmitRecord(Code, Vals64, AbbrevToUse); Vals64.clear(); diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 444f618d8b8..f64b775a8b7 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -232,7 +232,7 @@ bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr &MI, if (!MO.isReg() || !MO.isImplicit()) return false; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) return false; @@ -252,7 +252,7 @@ void AggressiveAntiDepBreaker::GetPassthruRegs( if (!MO.isReg()) continue; if ((MO.isDef() && MI.isRegTiedToUseOperand(i)) || IsImplicitDefUse(MI, MO)) { - const unsigned Reg = MO.getReg(); + const Register Reg = MO.getReg(); for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) PassthruRegs.insert(*SubRegs); @@ -365,7 +365,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n"); @@ -375,7 +375,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" @@ -418,7 +418,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; // Ignore KILLs and passthru registers for liveness... if (MI.isKill() || (PassthruRegs.count(Reg) != 0)) @@ -471,7 +471,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" @@ -506,7 +506,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; if (FirstReg != 0) { @@ -790,7 +790,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( CriticalPathSU = SU; } } - + assert(CriticalPathSU && "Failed to find SUnit critical path"); CriticalPathMI = CriticalPathSU->getInstr(); } diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index d158e70b86a..4f24f077d12 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -309,7 +309,8 @@ static const Value *getNoopInput(const Value *V, NoopInput = Op; } else if (isa(I) && TLI.allowTruncateForTailCall(Op->getType(), I->getType())) { - DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits()); + DataBits = std::min((uint64_t)DataBits, + I->getType()->getPrimitiveSizeInBits().getFixedSize()); NoopInput = Op; } else if (auto CS = ImmutableCallSite(I)) { const Value *ReturnedOp = CS.getReturnedArgOperand(); @@ -523,7 +524,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { // longjmp on x86), it can end up causing miscompilation that has not // been fully understood. if (!Ret && - (!TM.Options.GuaranteedTailCallOpt || !isa(Term))) + ((!TM.Options.GuaranteedTailCallOpt && + CS.getCallingConv() != CallingConv::Tail) || !isa(Term))) return false; // If I will have a chain, make sure no other instruction that will have a @@ -536,9 +538,11 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { // Debug info intrinsics do not get in the way of tail call optimization. if (isa(BBI)) continue; - // A lifetime end intrinsic should not stop tail call optimization. + // A lifetime end or assume intrinsic should not stop tail call + // optimization. if (const IntrinsicInst *II = dyn_cast(BBI)) - if (II->getIntrinsicID() == Intrinsic::lifetime_end) + if (II->getIntrinsicID() == Intrinsic::lifetime_end || + II->getIntrinsicID() == Intrinsic::assume) continue; if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || !isSafeToSpeculativelyExecute(&*BBI)) diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 54f6cc2d557..73c53d6c4af 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -91,10 +91,12 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/SectionKind.h" @@ -159,30 +161,30 @@ static gcp_map_type &getGCMap(void *&P) { return *(gcp_map_type*)P; } -/// getGVAlignmentLog2 - Return the alignment to use for the specified global -/// value in log2 form. This rounds up to the preferred alignment if possible -/// and legal. -static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL, - unsigned InBits = 0) { - unsigned NumBits = 0; +/// getGVAlignment - Return the alignment to use for the specified global +/// value. This rounds up to the preferred alignment if possible and legal. +Align AsmPrinter::getGVAlignment(const GlobalValue *GV, const DataLayout &DL, + Align InAlign) { + Align Alignment; if (const GlobalVariable *GVar = dyn_cast(GV)) - NumBits = DL.getPreferredAlignmentLog(GVar); + Alignment = Align(DL.getPreferredAlignment(GVar)); - // If InBits is specified, round it to it. - if (InBits > NumBits) - NumBits = InBits; + // If InAlign is specified, round it to it. + if (InAlign > Alignment) + Alignment = InAlign; // If the GV has a specified alignment, take it into account. - if (GV->getAlignment() == 0) - return NumBits; + const MaybeAlign GVAlign(GV->getAlignment()); + if (!GVAlign) + return Alignment; - unsigned GVAlign = Log2_32(GV->getAlignment()); + assert(GVAlign && "GVAlign must be set"); // If the GVAlign is larger than NumBits, or if we are required to obey // NumBits because the GV has an assigned section, obey it. - if (GVAlign > NumBits || GV->hasSection()) - NumBits = GVAlign; - return NumBits; + if (*GVAlign > Alignment || GV->hasSection()) + Alignment = *GVAlign; + return Alignment; } AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr Streamer) @@ -248,13 +250,14 @@ const MCSection *AsmPrinter::getCurrentSection() const { void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); } bool AsmPrinter::doInitialization(Module &M) { - MMI = getAnalysisIfAvailable(); + auto *MMIWP = getAnalysisIfAvailable(); + MMI = MMIWP ? &MMIWP->getMMI() : nullptr; // Initialize TargetLoweringObjectFile. const_cast(getObjFileLowering()) @@ -311,7 +314,7 @@ bool AsmPrinter::doInitialization(Module &M) { if (MAI->doesSupportDebugInformation()) { bool EmitCodeView = MMI->getModule()->getCodeViewFlag(); if (EmitCodeView && TM.getTargetTriple().isOSWindows()) { - Handlers.emplace_back(llvm::make_unique(this), + Handlers.emplace_back(std::make_unique(this), DbgTimerName, DbgTimerDescription, CodeViewLineTablesGroupName, CodeViewLineTablesGroupDescription); @@ -380,7 +383,7 @@ bool AsmPrinter::doInitialization(Module &M) { if (mdconst::extract_or_null( MMI->getModule()->getModuleFlag("cfguardtable"))) - Handlers.emplace_back(llvm::make_unique(this), CFGuardName, + Handlers.emplace_back(std::make_unique(this), CFGuardName, CFGuardDescription, DWARFGroupName, DWARFGroupDescription); @@ -425,7 +428,10 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global); return; case GlobalValue::PrivateLinkage: + return; case GlobalValue::InternalLinkage: + if (MAI->hasDotLGloblDirective()) + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_LGlobal); return; case GlobalValue::AppendingLinkage: case GlobalValue::AvailableExternallyLinkage: @@ -501,7 +507,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // If the alignment is specified, we *must* obey it. Overaligning a global // with a specified alignment is a prompt way to break globals emitted to // sections and expected to be contiguous (e.g. ObjC metadata). - unsigned AlignLog = getGVAlignmentLog2(GV, DL); + const Align Alignment = getGVAlignment(GV, DL); for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerDescription, @@ -513,12 +519,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Handle common symbols if (GVKind.isCommon()) { if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. - unsigned Align = 1 << AlignLog; - if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) - Align = 0; - // .comm _foo, 42, 4 - OutStreamer->EmitCommonSymbol(GVSym, Size, Align); + const bool SupportsAlignment = + getObjFileLowering().getCommDirectiveSupportsAlignment(); + OutStreamer->EmitCommonSymbol(GVSym, Size, + SupportsAlignment ? Alignment.value() : 0); return; } @@ -531,10 +536,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { TheSection->isVirtualSection()) { if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined. - unsigned Align = 1 << AlignLog; EmitLinkage(GV, GVSym); // .zerofill __DATA, __bss, _foo, 400, 5 - OutStreamer->EmitZerofill(TheSection, GVSym, Size, Align); + OutStreamer->EmitZerofill(TheSection, GVSym, Size, Alignment.value()); return; } @@ -544,7 +548,6 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { getObjFileLowering().getBSSSection() == TheSection) { if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. - unsigned Align = 1 << AlignLog; // Use .lcomm only if it supports user-specified alignment. // Otherwise, while it would still be correct to use .lcomm in some @@ -554,17 +557,17 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Prefer to simply fall back to .local / .comm in this case. if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) { // .lcomm _foo, 42 - OutStreamer->EmitLocalCommonSymbol(GVSym, Size, Align); + OutStreamer->EmitLocalCommonSymbol(GVSym, Size, Alignment.value()); return; } - if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) - Align = 0; - // .local _foo OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Local); // .comm _foo, 42, 4 - OutStreamer->EmitCommonSymbol(GVSym, Size, Align); + const bool SupportsAlignment = + getObjFileLowering().getCommDirectiveSupportsAlignment(); + OutStreamer->EmitCommonSymbol(GVSym, Size, + SupportsAlignment ? Alignment.value() : 0); return; } @@ -585,11 +588,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (GVKind.isThreadBSS()) { TheSection = getObjFileLowering().getTLSBSSSection(); - OutStreamer->EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog); + OutStreamer->EmitTBSSSymbol(TheSection, MangSym, Size, Alignment.value()); } else if (GVKind.isThreadData()) { OutStreamer->SwitchSection(TheSection); - EmitAlignment(AlignLog, GV); + EmitAlignment(Alignment, GV); OutStreamer->EmitLabel(MangSym); EmitGlobalConstant(GV->getParent()->getDataLayout(), @@ -625,7 +628,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer->SwitchSection(TheSection); EmitLinkage(GV, EmittedInitSym); - EmitAlignment(AlignLog, GV); + EmitAlignment(Alignment, GV); OutStreamer->EmitLabel(EmittedInitSym); @@ -664,6 +667,10 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(&F, TM)); EmitVisibility(CurrentFnSym, F.getVisibility()); + if (MAI->needsFunctionDescriptors() && + F.getLinkage() != GlobalValue::InternalLinkage) + EmitLinkage(&F, CurrentFnDescSym); + EmitLinkage(&F, CurrentFnSym); if (MAI->hasFunctionAlignment()) EmitAlignment(MF->getAlignment(), &F); @@ -699,8 +706,13 @@ void AsmPrinter::EmitFunctionHeader() { } } - // Emit the CurrentFnSym. This is a virtual function to allow targets to - // do their wild and crazy things as required. + // Emit the function descriptor. This is a virtual function to allow targets + // to emit their specific function descriptor. + if (MAI->needsFunctionDescriptors()) + EmitFunctionDescriptor(); + + // Emit the CurrentFnSym. This is a virtual function to allow targets to do + // their wild and crazy things as required. EmitFunctionEntryLabel(); // If the function had address-taken blocks that got deleted, then we have @@ -783,7 +795,7 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { /// emitImplicitDef - This method emits the specified machine instruction /// that is an implicit def. void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const { - unsigned RegNo = MI->getOperand(0).getReg(); + Register RegNo = MI->getOperand(0).getReg(); SmallString<128> Str; raw_svector_ostream OS(Str); @@ -910,7 +922,8 @@ static bool emitDebugLabelComment(const MachineInstr *MI, AsmPrinter &AP) { OS << "DEBUG_LABEL: "; const DILabel *V = MI->getDebugLabel(); - if (auto *SP = dyn_cast(V->getScope())) { + if (auto *SP = dyn_cast( + V->getScope()->getNonLexicalBlockFileScope())) { StringRef Name = SP->getName(); if (!Name.empty()) OS << Name << ":"; @@ -1024,7 +1037,7 @@ void AsmPrinter::EmitFunctionBody() { // Get MachineDominatorTree or compute it on the fly if it's unavailable MDT = getAnalysisIfAvailable(); if (!MDT) { - OwnedMDT = make_unique(); + OwnedMDT = std::make_unique(); OwnedMDT->getBase().recalculate(*MF); MDT = OwnedMDT.get(); } @@ -1032,7 +1045,7 @@ void AsmPrinter::EmitFunctionBody() { // Get MachineLoopInfo or compute it on the fly if it's unavailable MLI = getAnalysisIfAvailable(); if (!MLI) { - OwnedMLI = make_unique(); + OwnedMLI = std::make_unique(); OwnedMLI->getBase().analyze(MDT->getBase()); MLI = OwnedMLI.get(); } @@ -1052,9 +1065,13 @@ void AsmPrinter::EmitFunctionBody() { ++NumInstsInFunction; } - // If there is a pre-instruction symbol, emit a label for it here. + // If there is a pre-instruction symbol, emit a label for it here. If the + // instruction was duplicated and the label has already been emitted, + // don't re-emit the same label. + // FIXME: Consider strengthening that to an assertion. if (MCSymbol *S = MI.getPreInstrSymbol()) - OutStreamer->EmitLabel(S); + if (S->isUndefined()) + OutStreamer->EmitLabel(S); if (ShouldPrintDebugScopes) { for (const HandlerInfo &HI : Handlers) { @@ -1107,9 +1124,13 @@ void AsmPrinter::EmitFunctionBody() { break; } - // If there is a post-instruction symbol, emit a label for it here. + // If there is a post-instruction symbol, emit a label for it here. If + // the instruction was duplicated and the label has already been emitted, + // don't re-emit the same label. + // FIXME: Consider strengthening that to an assertion. if (MCSymbol *S = MI.getPostInstrSymbol()) - OutStreamer->EmitLabel(S); + if (S->isUndefined()) + OutStreamer->EmitLabel(S); if (ShouldPrintDebugScopes) { for (const HandlerInfo &HI : Handlers) { @@ -1313,11 +1334,10 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M, // Set the symbol type to function if the alias has a function type. // This affects codegen when the aliasee is not a function. - if (IsFunction) { - OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction); - if (isa(GIS)) - OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction); - } + if (IsFunction) + OutStreamer->EmitSymbolAttribute(Name, isa(GIS) + ? MCSA_ELF_TypeIndFunction + : MCSA_ELF_TypeFunction); EmitVisibility(Name, GIS.getVisibility()); @@ -1349,60 +1369,28 @@ void AsmPrinter::emitRemarksSection(Module &M) { RemarkStreamer *RS = M.getContext().getRemarkStreamer(); if (!RS) return; - const remarks::Serializer &Serializer = RS->getSerializer(); + remarks::RemarkSerializer &RemarkSerializer = RS->getSerializer(); + + Optional> Filename; + if (Optional FilenameRef = RS->getFilename()) { + Filename = *FilenameRef; + sys::fs::make_absolute(*Filename); + assert(!Filename->empty() && "The filename can't be empty."); + } + + std::string Buf; + raw_string_ostream OS(Buf); + std::unique_ptr MetaSerializer = + Filename ? RemarkSerializer.metaSerializer(OS, StringRef(*Filename)) + : RemarkSerializer.metaSerializer(OS); + MetaSerializer->emit(); // Switch to the right section: .remarks/__remarks. MCSection *RemarksSection = OutContext.getObjectFileInfo()->getRemarksSection(); OutStreamer->SwitchSection(RemarksSection); - // Emit the magic number. - OutStreamer->EmitBytes(remarks::Magic); - // Explicitly emit a '\0'. - OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1); - - // Emit the version number: little-endian uint64_t. - // The version number is located at the offset 0x0 in the section. - std::array Version; - support::endian::write64le(Version.data(), remarks::Version); - OutStreamer->EmitBinaryData(StringRef(Version.data(), Version.size())); - - // Emit the string table in the section. - // Note: we need to use the streamer here to emit it in the section. We can't - // just use the serialize function with a raw_ostream because of the way - // MCStreamers work. - uint64_t StrTabSize = - Serializer.StrTab ? Serializer.StrTab->SerializedSize : 0; - // Emit the total size of the string table (the size itself excluded): - // little-endian uint64_t. - // The total size is located after the version number. - // Note: even if no string table is used, emit 0. - std::array StrTabSizeBuf; - support::endian::write64le(StrTabSizeBuf.data(), StrTabSize); - OutStreamer->EmitBinaryData( - StringRef(StrTabSizeBuf.data(), StrTabSizeBuf.size())); - - if (const Optional &StrTab = Serializer.StrTab) { - std::vector StrTabStrings = StrTab->serialize(); - // Emit a list of null-terminated strings. - // Note: the order is important here: the ID used in the remarks corresponds - // to the position of the string in the section. - for (StringRef Str : StrTabStrings) { - OutStreamer->EmitBytes(Str); - // Explicitly emit a '\0'. - OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1); - } - } - - // Emit the null-terminated absolute path to the remark file. - // The path is located at the offset 0x4 in the section. - StringRef FilenameRef = RS->getFilename(); - SmallString<128> Filename = FilenameRef; - sys::fs::make_absolute(Filename); - assert(!Filename.empty() && "The filename can't be empty."); - OutStreamer->EmitBytes(Filename); - // Explicitly emit a '\0'. - OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1); + OutStreamer->EmitBinaryData(OS.str()); } bool AsmPrinter::doFinalization(Module &M) { @@ -1455,7 +1443,7 @@ bool AsmPrinter::doFinalization(Module &M) { OutStreamer->SwitchSection(TLOF.getDataSection()); const DataLayout &DL = M.getDataLayout(); - EmitAlignment(Log2_32(DL.getPointerSize())); + EmitAlignment(Align(DL.getPointerSize())); for (const auto &Stub : Stubs) { OutStreamer->EmitLabel(Stub.first); OutStreamer->EmitSymbolValue(Stub.second.getPointer(), @@ -1482,7 +1470,7 @@ bool AsmPrinter::doFinalization(Module &M) { COFF::IMAGE_SCN_LNK_COMDAT, SectionKind::getReadOnly(), Stub.first->getName(), COFF::IMAGE_COMDAT_SELECT_ANY)); - EmitAlignment(Log2_32(DL.getPointerSize())); + EmitAlignment(Align(DL.getPointerSize())); OutStreamer->EmitSymbolAttribute(Stub.first, MCSA_Global); OutStreamer->EmitLabel(Stub.first); OutStreamer->EmitSymbolValue(Stub.second.getPointer(), @@ -1607,8 +1595,7 @@ bool AsmPrinter::doFinalization(Module &M) { "expected llvm.used to be an array type"); if (const auto *A = cast(LU->getInitializer())) { for (const Value *Op : A->operands()) { - const auto *GV = - cast(Op->stripPointerCastsNoFollowAliases()); + const auto *GV = cast(Op->stripPointerCasts()); // Global symbols with internal or private linkage are not visible to // the linker, and thus would cause an error when the linker tried to // preserve the symbol due to the `/include:` directive. @@ -1679,8 +1666,27 @@ MCSymbol *AsmPrinter::getCurExceptionSym() { void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { this->MF = &MF; + // Get the function symbol. - CurrentFnSym = getSymbol(&MF.getFunction()); + if (MAI->needsFunctionDescriptors()) { + assert(TM.getTargetTriple().isOSAIX() && "Function descriptor is only" + " supported on AIX."); + assert(CurrentFnDescSym && "The function descriptor symbol needs to be" + " initalized first."); + + // Get the function entry point symbol. + CurrentFnSym = + OutContext.getOrCreateSymbol("." + CurrentFnDescSym->getName()); + + const Function &F = MF.getFunction(); + MCSectionXCOFF *FnEntryPointSec = + cast(getObjFileLowering().SectionForGlobal(&F, TM)); + // Set the containing csect. + cast(CurrentFnSym)->setContainingCsect(FnEntryPointSec); + } else { + CurrentFnSym = getSymbol(&MF.getFunction()); + } + CurrentFnSymForSize = CurrentFnSym; CurrentFnBegin = nullptr; CurExceptionSym = nullptr; @@ -1765,7 +1771,7 @@ void AsmPrinter::EmitConstantPool() { if (CurSection != CPSections[i].S) { OutStreamer->SwitchSection(CPSections[i].S); - EmitAlignment(Log2_32(CPSections[i].Alignment)); + EmitAlignment(Align(CPSections[i].Alignment)); CurSection = CPSections[i].S; Offset = 0; } @@ -1812,7 +1818,7 @@ void AsmPrinter::EmitJumpTableInfo() { OutStreamer->SwitchSection(ReadOnlySection); } - EmitAlignment(Log2_32(MJTI->getEntryAlignment(DL))); + EmitAlignment(Align(MJTI->getEntryAlignment(DL))); // Jump tables in code sections are marked with a data_region directive // where that's supported. @@ -2025,10 +2031,10 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List, } // Emit the function pointers in the target-specific order - unsigned Align = Log2_32(DL.getPointerPrefAlignment()); llvm::stable_sort(Structors, [](const Structor &L, const Structor &R) { return L.Priority < R.Priority; }); + const Align Align = DL.getPointerPrefAlignment(); for (Structor &S : Structors) { const TargetLoweringObjectFile &Obj = getObjFileLowering(); const MCSymbol *KeySym = nullptr; @@ -2149,23 +2155,20 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, //===----------------------------------------------------------------------===// // EmitAlignment - Emit an alignment directive to the specified power of -// two boundary. For example, if you pass in 3 here, you will get an 8 -// byte alignment. If a global value is specified, and if that global has +// two boundary. If a global value is specified, and if that global has // an explicit alignment requested, it will override the alignment request // if required for correctness. -void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const { +void AsmPrinter::EmitAlignment(Align Alignment, const GlobalObject *GV) const { if (GV) - NumBits = getGVAlignmentLog2(GV, GV->getParent()->getDataLayout(), NumBits); + Alignment = getGVAlignment(GV, GV->getParent()->getDataLayout(), Alignment); - if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. + if (Alignment == Align::None()) + return; // 1-byte aligned: no need to emit alignment. - assert(NumBits < - static_cast(std::numeric_limits::digits) && - "undefined behavior"); if (getCurrentSection()->getKind().isText()) - OutStreamer->EmitCodeAlignment(1u << NumBits); + OutStreamer->EmitCodeAlignment(Alignment.value()); else - OutStreamer->EmitValueToAlignment(1u << NumBits); + OutStreamer->EmitValueToAlignment(Alignment.value()); } //===----------------------------------------------------------------------===// @@ -2481,6 +2484,7 @@ static void emitGlobalConstantStruct(const DataLayout &DL, } static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) { + assert(ET && "Unknown float type"); APInt API = APF.bitcastToAPInt(); // First print a comment with what we think the original floating-point value @@ -2488,11 +2492,7 @@ static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) { if (AP.isVerbose()) { SmallString<8> StrVal; APF.toString(StrVal); - - if (ET) - ET->print(AP.OutStreamer->GetCommentOS()); - else - AP.OutStreamer->GetCommentOS() << "Printing Type"; + ET->print(AP.OutStreamer->GetCommentOS()); AP.OutStreamer->GetCommentOS() << ' ' << StrVal << '\n'; } @@ -2670,7 +2670,7 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, const GlobalValue *FinalGV = dyn_cast(GV->getOperand(0)); const MCSymbol *FinalSym = AP.getSymbol(FinalGV); *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel( - FinalSym, MV, Offset, AP.MMI, *AP.OutStreamer); + FinalGV, FinalSym, MV, Offset, AP.MMI, *AP.OutStreamer); // Update GOT equivalent usage information --NumUses; @@ -2930,7 +2930,7 @@ void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB, /// EmitBasicBlockStart - This method prints the label for the specified /// MachineBasicBlock, an alignment (if present) and a comment describing /// it if appropriate. -void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { +void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) { // End the previous funclet and start a new one. if (MBB.isEHFuncletEntry()) { for (const HandlerInfo &HI : Handlers) { @@ -2940,8 +2940,9 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { } // Emit an alignment directive for this block, if needed. - if (unsigned Align = MBB.getAlignment()) - EmitAlignment(Align); + const Align Alignment = MBB.getAlignment(); + if (Alignment != Align::None()) + EmitAlignment(Alignment); MCCodePaddingContext Context; setupCodePaddingContext(MBB, Context); OutStreamer->EmitCodePaddingBasicBlockStart(Context); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 7721e996aca..420df26a2b8 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -72,7 +72,7 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr, const MDNode *LocMDNode) const { if (!DiagInfo) { - DiagInfo = make_unique(); + DiagInfo = std::make_unique(); MCContext &Context = MMI->getContext(); Context.setInlineSourceManager(&DiagInfo->SrcMgr); @@ -432,6 +432,7 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress(); MCSymbol *Sym = AP->GetBlockAddressSymbol(BA); Sym->print(OS, AP->MAI); + MMI->getContext().registerInlineAsmLabel(Sym); } else if (MI->getOperand(OpNo).isMBB()) { const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol(); Sym->print(OS, AP->MAI); diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h index db2ff458eb2..09f7496cd4e 100644 --- a/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -73,18 +73,18 @@ class HashingByteStreamer final : public ByteStreamer { class BufferByteStreamer final : public ByteStreamer { private: SmallVectorImpl &Buffer; - SmallVectorImpl &Comments; + std::vector &Comments; +public: /// Only verbose textual output needs comments. This will be set to /// true for that case, and false otherwise. If false, comments passed in to /// the emit methods will be ignored. - bool GenerateComments; + const bool GenerateComments; -public: BufferByteStreamer(SmallVectorImpl &Buffer, - SmallVectorImpl &Comments, - bool GenerateComments) - : Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) {} + std::vector &Comments, bool GenerateComments) + : Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) { + } void EmitInt8(uint8_t Byte, const Twine &Comment) override { Buffer.push_back(Byte); if (GenerateComments) diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 932959c311f..c6457f3626d 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -98,7 +98,8 @@ using namespace llvm::codeview; namespace { class CVMCAdapter : public CodeViewRecordStreamer { public: - CVMCAdapter(MCStreamer &OS) : OS(&OS) {} + CVMCAdapter(MCStreamer &OS, TypeCollection &TypeTable) + : OS(&OS), TypeTable(TypeTable) {} void EmitBytes(StringRef Data) { OS->EmitBytes(Data); } @@ -110,8 +111,24 @@ public: void AddComment(const Twine &T) { OS->AddComment(T); } + void AddRawComment(const Twine &T) { OS->emitRawComment(T); } + + bool isVerboseAsm() { return OS->isVerboseAsm(); } + + std::string getTypeName(TypeIndex TI) { + std::string TypeName; + if (!TI.isNoneType()) { + if (TI.isSimple()) + TypeName = TypeIndex::simpleTypeName(TI); + else + TypeName = TypeTable.getTypeName(TI); + } + return TypeName; + } + private: MCStreamer *OS = nullptr; + TypeCollection &TypeTable; }; } // namespace @@ -617,13 +634,6 @@ emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S, OS.EmitBytes(NullTerminatedString); } -static StringRef getTypeLeafName(TypeLeafKind TypeKind) { - for (const EnumEntry &EE : getTypeLeafNames()) - if (EE.Value == TypeKind) - return EE.Name; - return ""; -} - void CodeViewDebug::emitTypeInformation() { if (TypeTable.empty()) return; @@ -632,30 +642,11 @@ void CodeViewDebug::emitTypeInformation() { OS.SwitchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection()); emitCodeViewMagicVersion(); - SmallString<8> CommentPrefix; - if (OS.isVerboseAsm()) { - CommentPrefix += '\t'; - CommentPrefix += Asm->MAI->getCommentString(); - CommentPrefix += ' '; - } - TypeTableCollection Table(TypeTable.records()); - SmallString<512> CommentBlock; - raw_svector_ostream CommentOS(CommentBlock); - std::unique_ptr SP; - std::unique_ptr TDV; TypeVisitorCallbackPipeline Pipeline; - if (OS.isVerboseAsm()) { - // To construct block comment describing the type record for readability. - SP = llvm::make_unique(CommentOS); - SP->setPrefix(CommentPrefix); - TDV = llvm::make_unique(Table, SP.get(), false); - Pipeline.addCallbackToPipeline(*TDV); - } - // To emit type record using Codeview MCStreamer adapter - CVMCAdapter CVMCOS(OS); + CVMCAdapter CVMCOS(OS, Table); TypeRecordMapping typeMapping(CVMCOS); Pipeline.addCallbackToPipeline(typeMapping); @@ -664,17 +655,6 @@ void CodeViewDebug::emitTypeInformation() { // This will fail if the record data is invalid. CVType Record = Table.getType(*B); - CommentBlock.clear(); - - auto RecordLen = Record.length(); - auto RecordKind = Record.kind(); - if (OS.isVerboseAsm()) - CVMCOS.AddComment("Record length"); - CVMCOS.EmitIntValue(RecordLen - 2, 2); - if (OS.isVerboseAsm()) - CVMCOS.AddComment("Record kind: " + getTypeLeafName(RecordKind)); - CVMCOS.EmitIntValue(RecordKind, sizeof(RecordKind)); - Error E = codeview::visitTypeRecord(Record, *B, Pipeline); if (E) { @@ -682,13 +662,6 @@ void CodeViewDebug::emitTypeInformation() { llvm_unreachable("produced malformed type record"); } - if (OS.isVerboseAsm()) { - // emitRawComment will insert its own tab and comment string before - // the first line, so strip off our first one. It also prints its own - // newline. - OS.emitRawComment( - CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim()); - } B = Table.getNext(*B); } } @@ -1135,7 +1108,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, if (!BeginLabel->isDefined() || !EndLabel->isDefined()) continue; - DIType *DITy = std::get<2>(HeapAllocSite); + const DIType *DITy = std::get<2>(HeapAllocSite); MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE); OS.AddComment("Call site offset"); OS.EmitCOFFSecRel32(BeginLabel, /*Offset=*/0); @@ -1363,7 +1336,7 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { const TargetRegisterInfo *TRI = TSI.getRegisterInfo(); const MachineFrameInfo &MFI = MF->getFrameInfo(); const Function &GV = MF->getFunction(); - auto Insertion = FnDebugInfo.insert({&GV, llvm::make_unique()}); + auto Insertion = FnDebugInfo.insert({&GV, std::make_unique()}); assert(Insertion.second && "function already has info"); CurFn = Insertion.first->second.get(); CurFn->FuncId = NextFuncId++; @@ -2633,17 +2606,6 @@ void CodeViewDebug::emitLocalVariableList(const FunctionInfo &FI, emitLocalVariable(FI, L); } -/// Only call this on endian-specific types like ulittle16_t and little32_t, or -/// structs composed of them. -template -static void copyBytesForDefRange(SmallString<20> &BytePrefix, - SymbolKind SymKind, const T &DefRangeHeader) { - BytePrefix.resize(2 + sizeof(T)); - ulittle16_t SymKindLE = ulittle16_t(SymKind); - memcpy(&BytePrefix[0], &SymKindLE, 2); - memcpy(&BytePrefix[2], &DefRangeHeader, sizeof(T)); -} - void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, const LocalVariable &Var) { // LocalSym record, see SymbolRecord.h for more info. @@ -2692,8 +2654,9 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, (bool(Flags & LocalSymFlags::IsParameter) ? (EncFP == FI.EncodedParamFramePtrReg) : (EncFP == FI.EncodedLocalFramePtrReg))) { - little32_t FPOffset = little32_t(Offset); - copyBytesForDefRange(BytePrefix, S_DEFRANGE_FRAMEPOINTER_REL, FPOffset); + DefRangeFramePointerRelHeader DRHdr; + DRHdr.Offset = Offset; + OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); } else { uint16_t RegRelFlags = 0; if (DefRange.IsSubfield) { @@ -2701,28 +2664,27 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, (DefRange.StructOffset << DefRangeRegisterRelSym::OffsetInParentShift); } - DefRangeRegisterRelSym::Header DRHdr; + DefRangeRegisterRelHeader DRHdr; DRHdr.Register = Reg; DRHdr.Flags = RegRelFlags; DRHdr.BasePointerOffset = Offset; - copyBytesForDefRange(BytePrefix, S_DEFRANGE_REGISTER_REL, DRHdr); + OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); } } else { assert(DefRange.DataOffset == 0 && "unexpected offset into register"); if (DefRange.IsSubfield) { - DefRangeSubfieldRegisterSym::Header DRHdr; + DefRangeSubfieldRegisterHeader DRHdr; DRHdr.Register = DefRange.CVRegister; DRHdr.MayHaveNoName = 0; DRHdr.OffsetInParent = DefRange.StructOffset; - copyBytesForDefRange(BytePrefix, S_DEFRANGE_SUBFIELD_REGISTER, DRHdr); + OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); } else { - DefRangeRegisterSym::Header DRHdr; + DefRangeRegisterHeader DRHdr; DRHdr.Register = DefRange.CVRegister; DRHdr.MayHaveNoName = 0; - copyBytesForDefRange(BytePrefix, S_DEFRANGE_REGISTER, DRHdr); + OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); } } - OS.EmitCVDefRangeDirective(DefRange.Ranges, BytePrefix); } } @@ -2896,6 +2858,14 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) { CurFn = nullptr; } +// Usable locations are valid with non-zero line numbers. A line number of zero +// corresponds to optimized code that doesn't have a distinct source location. +// In this case, we try to use the previous or next source location depending on +// the context. +static bool isUsableDebugLoc(DebugLoc DL) { + return DL && DL.getLine() != 0; +} + void CodeViewDebug::beginInstruction(const MachineInstr *MI) { DebugHandlerBase::beginInstruction(MI); @@ -2907,19 +2877,21 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) { // If the first instruction of a new MBB has no location, find the first // instruction with a location and use that. DebugLoc DL = MI->getDebugLoc(); - if (!DL && MI->getParent() != PrevInstBB) { + if (!isUsableDebugLoc(DL) && MI->getParent() != PrevInstBB) { for (const auto &NextMI : *MI->getParent()) { if (NextMI.isDebugInstr()) continue; DL = NextMI.getDebugLoc(); - if (DL) + if (isUsableDebugLoc(DL)) break; } + // FIXME: Handle the case where the BB has no valid locations. This would + // probably require doing a real dataflow analysis. } PrevInstBB = MI->getParent(); // If we still don't have a debug location, don't record a location. - if (!DL) + if (!isUsableDebugLoc(DL)) return; maybeRecordLocation(DL, Asm->MF); @@ -3026,7 +2998,7 @@ void CodeViewDebug::collectGlobalVariableInfo() { auto Insertion = ScopeGlobals.insert( {Scope, std::unique_ptr()}); if (Insertion.second) - Insertion.first->second = llvm::make_unique(); + Insertion.first->second = std::make_unique(); VariableList = Insertion.first->second.get(); } else if (GV->hasComdat()) // Emit this global variable into a COMDAT section. diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h index ce57b789d7f..7ffd77926cf 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -148,7 +148,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { SmallVector ChildBlocks; std::vector> Annotations; - std::vector> HeapAllocSites; + std::vector> + HeapAllocSites; const MCSymbol *Begin = nullptr; const MCSymbol *End = nullptr; diff --git a/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index ddd60575b6c..7f9d6c618ad 100644 --- a/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -41,7 +41,7 @@ using EntryIndex = DbgValueHistoryMap::EntryIndex; static Register isDescribedByReg(const MachineInstr &MI) { assert(MI.isDebugValue()); assert(MI.getNumOperands() == 4); - // If the location of variable is an entry value (DW_OP_entry_value) + // If the location of variable is an entry value (DW_OP_LLVM_entry_value) // do not consider it as a register location. if (MI.getDebugExpression()->isEntryValue()) return 0; @@ -177,13 +177,13 @@ static void handleNewDebugValue(InlinedEntity Var, const MachineInstr &DV, IndicesToErase.push_back(Index); Entry.endEntry(NewIndex); } - if (unsigned Reg = isDescribedByReg(DV)) + if (Register Reg = isDescribedByReg(DV)) TrackedRegs[Reg] |= !Overlaps; } // If the new debug value is described by a register, add tracking of // that register if it is not already tracked. - if (unsigned NewReg = isDescribedByReg(DV)) { + if (Register NewReg = isDescribedByReg(DV)) { if (!TrackedRegs.count(NewReg)) addRegDescribedVar(RegVars, NewReg, Var); LiveEntries[Var].insert(NewIndex); @@ -234,7 +234,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF, DbgLabelInstrMap &DbgLabels) { const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); - unsigned FrameReg = TRI->getFrameRegister(*MF); + Register FrameReg = TRI->getFrameRegister(*MF); RegDescribedVarsMap RegVars; DbgValueEntriesMap LiveEntries; for (const auto &MBB : *MF) { @@ -275,7 +275,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF, continue; // If this is a virtual register, only clobber it since it doesn't // have aliases. - if (TRI->isVirtualRegister(MO.getReg())) + if (Register::isVirtualRegister(MO.getReg())) clobberRegisterUses(RegVars, MO.getReg(), DbgValues, LiveEntries, MI); // If this is a register def operand, it may end a debug value @@ -296,7 +296,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF, // Don't consider SP to be clobbered by register masks. for (auto It : RegVars) { unsigned int Reg = It.first; - if (Reg != SP && TRI->isPhysicalRegister(Reg) && + if (Reg != SP && Register::isPhysicalRegister(Reg) && MO.clobbersPhysReg(Reg)) RegsToClobber.push_back(Reg); } diff --git a/lib/CodeGen/AsmPrinter/DebugLocStream.h b/lib/CodeGen/AsmPrinter/DebugLocStream.h index 789291771b5..0db86b09d19 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocStream.h +++ b/lib/CodeGen/AsmPrinter/DebugLocStream.h @@ -38,21 +38,18 @@ public: : CU(CU), EntryOffset(EntryOffset) {} }; struct Entry { - const MCSymbol *BeginSym; - const MCSymbol *EndSym; + const MCSymbol *Begin; + const MCSymbol *End; size_t ByteOffset; size_t CommentOffset; - Entry(const MCSymbol *BeginSym, const MCSymbol *EndSym, size_t ByteOffset, - size_t CommentOffset) - : BeginSym(BeginSym), EndSym(EndSym), ByteOffset(ByteOffset), - CommentOffset(CommentOffset) {} }; private: SmallVector Lists; SmallVector Entries; SmallString<256> DWARFBytes; - SmallVector Comments; + std::vector Comments; + MCSymbol *Sym; /// Only verbose textual output needs comments. This will be set to /// true for that case, and false otherwise. @@ -63,6 +60,12 @@ public: size_t getNumLists() const { return Lists.size(); } const List &getList(size_t LI) const { return Lists[LI]; } ArrayRef getLists() const { return Lists; } + MCSymbol *getSym() const { + return Sym; + } + void setSym(MCSymbol *Sym) { + this->Sym = Sym; + } class ListBuilder; class EntryBuilder; @@ -93,7 +96,7 @@ private: /// Until the next call, bytes added to the stream will be added to this /// entry. void startEntry(const MCSymbol *BeginSym, const MCSymbol *EndSym) { - Entries.emplace_back(BeginSym, EndSym, DWARFBytes.size(), Comments.size()); + Entries.push_back({BeginSym, EndSym, DWARFBytes.size(), Comments.size()}); } /// Finalize a .debug_loc entry, deleting if it's empty. diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 9548ad9918c..a61c98ec1c1 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -208,7 +208,7 @@ void DwarfCompileUnit::addLocationAttribute( if (!Loc) { addToAccelTable = true; Loc = new (DIEValueAllocator) DIELoc; - DwarfExpr = llvm::make_unique(*Asm, *this, *Loc); + DwarfExpr = std::make_unique(*Asm, *this, *Loc); } if (Expr) { @@ -326,14 +326,13 @@ void DwarfCompileUnit::addRange(RangeSpan Range) { // emitted into and the subprogram was contained within. If these are the // same then extend our current range, otherwise add this as a new range. if (CURanges.empty() || !SameAsPrevCU || - (&CURanges.back().getEnd()->getSection() != - &Range.getEnd()->getSection())) { + (&CURanges.back().End->getSection() != + &Range.End->getSection())) { CURanges.push_back(Range); - DD->addSectionLabel(Range.getStart()); return; } - CURanges.back().setEnd(Range.getEnd()); + CURanges.back().End = Range.End; } void DwarfCompileUnit::initStmtList() { @@ -399,7 +398,7 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { } else { const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo(); MachineLocation Location(RI->getFrameRegister(*Asm->MF)); - if (RI->isPhysicalRegister(Location.getReg())) + if (Register::isPhysicalRegister(Location.getReg())) addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); } } @@ -468,14 +467,6 @@ void DwarfCompileUnit::constructScopeDIE( void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, SmallVector Range) { - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - - // Emit the offset into .debug_ranges or .debug_rnglists as a relocatable - // label. emitDIE() will handle emitting it appropriately. - const MCSymbol *RangeSectionSym = - DD->getDwarfVersion() >= 5 - ? TLOF.getDwarfRnglistsSection()->getBeginSymbol() - : TLOF.getDwarfRangesSection()->getBeginSymbol(); HasRangeLists = true; @@ -494,12 +485,17 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, // (DW_RLE_startx_endx etc.). if (DD->getDwarfVersion() >= 5) addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_rnglistx, Index); - else if (isDwoUnit()) - addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), - RangeSectionSym); - else - addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), - RangeSectionSym); + else { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + const MCSymbol *RangeSectionSym = + TLOF.getDwarfRangesSection()->getBeginSymbol(); + if (isDwoUnit()) + addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), + RangeSectionSym); + else + addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), + RangeSectionSym); + } } void DwarfCompileUnit::attachRangesOrLowHighPC( @@ -507,7 +503,7 @@ void DwarfCompileUnit::attachRangesOrLowHighPC( if (Ranges.size() == 1 || !DD->useRangesSection()) { const RangeSpan &Front = Ranges.front(); const RangeSpan &Back = Ranges.back(); - attachLowHighPC(Die, Front.getStart(), Back.getEnd()); + attachLowHighPC(Die, Front.Begin, Back.End); } else addScopeRangeList(Die, std::move(Ranges)); } @@ -517,8 +513,8 @@ void DwarfCompileUnit::attachRangesOrLowHighPC( SmallVector List; List.reserve(Ranges.size()); for (const InsnRange &R : Ranges) - List.push_back(RangeSpan(DD->getLabelBeforeInsn(R.first), - DD->getLabelAfterInsn(R.second))); + List.push_back( + {DD->getLabelBeforeInsn(R.first), DD->getLabelAfterInsn(R.second)}); attachRangesOrLowHighPC(Die, std::move(List)); } @@ -647,8 +643,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg); DwarfExpr.addFragmentOffset(Expr); SmallVector Ops; - Ops.push_back(dwarf::DW_OP_plus_uconst); - Ops.push_back(Offset); + DIExpression::appendOffset(Ops, Offset); // According to // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf // cuda-gdb requires DW_AT_address_class for all variables to be able to @@ -892,32 +887,117 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); } -DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE, - const DISubprogram &CalleeSP, - bool IsTail, - const MCExpr *PCOffset) { +/// Whether to use the GNU analog for a DWARF5 tag, attribute, or location atom. +static bool useGNUAnalogForDwarf5Feature(DwarfDebug *DD) { + return DD->getDwarfVersion() == 4 && DD->tuneForGDB(); +} + +dwarf::Tag DwarfCompileUnit::getDwarf5OrGNUTag(dwarf::Tag Tag) const { + if (!useGNUAnalogForDwarf5Feature(DD)) + return Tag; + switch (Tag) { + case dwarf::DW_TAG_call_site: + return dwarf::DW_TAG_GNU_call_site; + case dwarf::DW_TAG_call_site_parameter: + return dwarf::DW_TAG_GNU_call_site_parameter; + default: + llvm_unreachable("DWARF5 tag with no GNU analog"); + } +} + +dwarf::Attribute +DwarfCompileUnit::getDwarf5OrGNUAttr(dwarf::Attribute Attr) const { + if (!useGNUAnalogForDwarf5Feature(DD)) + return Attr; + switch (Attr) { + case dwarf::DW_AT_call_all_calls: + return dwarf::DW_AT_GNU_all_call_sites; + case dwarf::DW_AT_call_target: + return dwarf::DW_AT_GNU_call_site_target; + case dwarf::DW_AT_call_origin: + return dwarf::DW_AT_abstract_origin; + case dwarf::DW_AT_call_pc: + return dwarf::DW_AT_low_pc; + case dwarf::DW_AT_call_value: + return dwarf::DW_AT_GNU_call_site_value; + case dwarf::DW_AT_call_tail_call: + return dwarf::DW_AT_GNU_tail_call; + default: + llvm_unreachable("DWARF5 attribute with no GNU analog"); + } +} + +dwarf::LocationAtom +DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const { + if (!useGNUAnalogForDwarf5Feature(DD)) + return Loc; + switch (Loc) { + case dwarf::DW_OP_entry_value: + return dwarf::DW_OP_GNU_entry_value; + default: + llvm_unreachable("DWARF5 location atom with no GNU analog"); + } +} + +DIE &DwarfCompileUnit::constructCallSiteEntryDIE( + DIE &ScopeDIE, const DISubprogram *CalleeSP, bool IsTail, + const MCSymbol *PCAddr, const MCExpr *PCOffset, unsigned CallReg) { // Insert a call site entry DIE within ScopeDIE. - DIE &CallSiteDIE = - createAndAddDIE(dwarf::DW_TAG_call_site, ScopeDIE, nullptr); + DIE &CallSiteDIE = createAndAddDIE(getDwarf5OrGNUTag(dwarf::DW_TAG_call_site), + ScopeDIE, nullptr); - // For the purposes of showing tail call frames in backtraces, a key piece of - // information is DW_AT_call_origin, a pointer to the callee DIE. - DIE *CalleeDIE = getOrCreateSubprogramDIE(&CalleeSP); - assert(CalleeDIE && "Could not create DIE for call site entry origin"); - addDIEEntry(CallSiteDIE, dwarf::DW_AT_call_origin, *CalleeDIE); - - if (IsTail) { - // Attach DW_AT_call_tail_call to tail calls for standards compliance. - addFlag(CallSiteDIE, dwarf::DW_AT_call_tail_call); + if (CallReg) { + // Indirect call. + addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target), + MachineLocation(CallReg)); } else { - // Attach the return PC to allow the debugger to disambiguate call paths - // from one function to another. + DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP); + assert(CalleeDIE && "Could not create DIE for call site entry origin"); + addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin), + *CalleeDIE); + } + + if (IsTail) + // Attach DW_AT_call_tail_call to tail calls for standards compliance. + addFlag(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_tail_call)); + + // Attach the return PC to allow the debugger to disambiguate call paths + // from one function to another. + if (DD->getDwarfVersion() == 4 && DD->tuneForGDB()) { + assert(PCAddr && "Missing PC information for a call"); + addLabelAddress(CallSiteDIE, dwarf::DW_AT_low_pc, PCAddr); + } else if (!IsTail || DD->tuneForGDB()) { assert(PCOffset && "Missing return PC information for a call"); addAddressExpr(CallSiteDIE, dwarf::DW_AT_call_return_pc, PCOffset); } + return CallSiteDIE; } +void DwarfCompileUnit::constructCallSiteParmEntryDIEs( + DIE &CallSiteDIE, SmallVector &Params) { + for (const auto &Param : Params) { + unsigned Register = Param.getRegister(); + auto CallSiteDieParam = + DIE::get(DIEValueAllocator, + getDwarf5OrGNUTag(dwarf::DW_TAG_call_site_parameter)); + insertDIE(CallSiteDieParam); + addAddress(*CallSiteDieParam, dwarf::DW_AT_location, + MachineLocation(Register)); + + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + DwarfExpr.setCallSiteParamValueFlag(); + + DwarfDebug::emitDebugLocValue(*Asm, nullptr, Param.getValue(), DwarfExpr); + + addBlock(*CallSiteDieParam, getDwarf5OrGNUAttr(dwarf::DW_AT_call_value), + DwarfExpr.finalize()); + + CallSiteDIE.addChild(CallSiteDieParam); + } +} + DIE *DwarfCompileUnit::constructImportedEntityDIE( const DIImportedEntity *Module) { DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag()); @@ -997,11 +1077,11 @@ void DwarfCompileUnit::createAbstractEntity(const DINode *Node, assert(Scope && Scope->isAbstractScope()); auto &Entity = getAbstractEntities()[Node]; if (isa(Node)) { - Entity = llvm::make_unique( + Entity = std::make_unique( cast(Node), nullptr /* IA */);; DU->addScopeVariable(Scope, cast(Entity.get())); } else if (isa(Node)) { - Entity = llvm::make_unique( + Entity = std::make_unique( cast(Node), nullptr /* IA */); DU->addScopeLabel(Scope, cast(Entity.get())); } @@ -1081,16 +1161,8 @@ void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty, GlobalTypes.insert(std::make_pair(std::move(FullName), &getUnitDie())); } -/// addVariableAddress - Add DW_AT_location attribute for a -/// DbgVariable based on provided MachineLocation. void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die, MachineLocation Location) { - // addBlockByrefAddress is obsolete and will be removed soon. - // The clang frontend always generates block byref variables with a - // complex expression that encodes exactly what addBlockByrefAddress - // would do. - assert((!DV.isBlockByrefVariable() || DV.hasComplexAddress()) && - "block byref variable without a complex expression"); if (DV.hasComplexAddress()) addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); else @@ -1133,7 +1205,7 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, if (DIExpr->isEntryValue()) { DwarfExpr.setEntryValueFlag(); - DwarfExpr.addEntryValueExpression(Cursor); + DwarfExpr.beginEntryValueExpression(Cursor); } const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo(); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index ea980dfda17..1b7ea2673ac 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -227,12 +227,35 @@ public: void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); + /// This takes a DWARF 5 tag and returns it or a GNU analog. + dwarf::Tag getDwarf5OrGNUTag(dwarf::Tag Tag) const; + + /// This takes a DWARF 5 attribute and returns it or a GNU analog. + dwarf::Attribute getDwarf5OrGNUAttr(dwarf::Attribute Attr) const; + + /// This takes a DWARF 5 location atom and either returns it or a GNU analog. + dwarf::LocationAtom getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const; + /// Construct a call site entry DIE describing a call within \p Scope to a - /// callee described by \p CalleeSP. \p IsTail specifies whether the call is - /// a tail call. \p PCOffset must be non-zero for non-tail calls or be the + /// callee described by \p CalleeSP. + /// \p IsTail specifies whether the call is a tail call. + /// \p PCAddr (used for GDB + DWARF 4 tuning) points to the PC value after + /// the call instruction. + /// \p PCOffset (used for cases other than GDB + DWARF 4 tuning) must be + /// non-zero for non-tail calls (in the case of non-gdb tuning, since for + /// GDB + DWARF 5 tuning we still generate PC info for tail calls) or be the /// function-local offset to PC value after the call instruction. - DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram &CalleeSP, - bool IsTail, const MCExpr *PCOffset); + /// \p CallReg is a register location for an indirect call. For direct calls + /// the \p CallReg is set to 0. + DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram *CalleeSP, + bool IsTail, const MCSymbol *PCAddr, + const MCExpr *PCOffset, unsigned CallReg); + /// Construct call site parameter DIEs for the \p CallSiteDIE. The \p Params + /// were collected by the \ref collectCallSiteParameters. + /// Note: The order of parameters does not matter, since debuggers recognize + /// call site parameters by the DW_AT_location attribute. + void constructCallSiteParmEntryDIEs(DIE &CallSiteDIE, + SmallVector &Params); /// Construct import_module DIE. DIE *constructImportedEntityDIE(const DIImportedEntity *Module); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 71bb2b0858c..c505e77e5ac 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -39,6 +40,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/DebugInfo/DWARF/DWARFExpression.h" @@ -83,6 +85,8 @@ using namespace llvm; #define DEBUG_TYPE "dwarfdebug" +STATISTIC(NumCSParams, "Number of dbg call site params created"); + static cl::opt DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, cl::desc("Disable debug info printing")); @@ -166,26 +170,26 @@ static const char *const DbgTimerDescription = "DWARF Debug Writer"; static constexpr unsigned ULEB128PadSize = 4; void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) { - BS.EmitInt8( + getActiveStreamer().EmitInt8( Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op) : dwarf::OperationEncodingString(Op)); } void DebugLocDwarfExpression::emitSigned(int64_t Value) { - BS.EmitSLEB128(Value, Twine(Value)); + getActiveStreamer().EmitSLEB128(Value, Twine(Value)); } void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) { - BS.EmitULEB128(Value, Twine(Value)); + getActiveStreamer().EmitULEB128(Value, Twine(Value)); } void DebugLocDwarfExpression::emitData1(uint8_t Value) { - BS.EmitInt8(Value, Twine(Value)); + getActiveStreamer().EmitInt8(Value, Twine(Value)); } void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) { assert(Idx < (1ULL << (ULEB128PadSize * 7)) && "Idx wont fit"); - BS.EmitULEB128(Idx, Twine(Idx), ULEB128PadSize); + getActiveStreamer().EmitULEB128(Idx, Twine(Idx), ULEB128PadSize); } bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, @@ -194,54 +198,34 @@ bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, return false; } -bool DbgVariable::isBlockByrefVariable() const { - assert(getVariable() && "Invalid complex DbgVariable!"); - return getVariable()->getType()->isBlockByrefStruct(); +void DebugLocDwarfExpression::enableTemporaryBuffer() { + assert(!IsBuffering && "Already buffering?"); + if (!TmpBuf) + TmpBuf = std::make_unique(OutBS.GenerateComments); + IsBuffering = true; +} + +void DebugLocDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; } + +unsigned DebugLocDwarfExpression::getTemporaryBufferSize() { + return TmpBuf ? TmpBuf->Bytes.size() : 0; +} + +void DebugLocDwarfExpression::commitTemporaryBuffer() { + if (!TmpBuf) + return; + for (auto Byte : enumerate(TmpBuf->Bytes)) { + const char *Comment = (Byte.index() < TmpBuf->Comments.size()) + ? TmpBuf->Comments[Byte.index()].c_str() + : ""; + OutBS.EmitInt8(Byte.value(), Comment); + } + TmpBuf->Bytes.clear(); + TmpBuf->Comments.clear(); } const DIType *DbgVariable::getType() const { - DIType *Ty = getVariable()->getType(); - // FIXME: isBlockByrefVariable should be reformulated in terms of complex - // addresses instead. - if (Ty->isBlockByrefStruct()) { - /* Byref variables, in Blocks, are declared by the programmer as - "SomeType VarName;", but the compiler creates a - __Block_byref_x_VarName struct, and gives the variable VarName - either the struct, or a pointer to the struct, as its type. This - is necessary for various behind-the-scenes things the compiler - needs to do with by-reference variables in blocks. - - However, as far as the original *programmer* is concerned, the - variable should still have type 'SomeType', as originally declared. - - The following function dives into the __Block_byref_x_VarName - struct to find the original type of the variable. This will be - passed back to the code generating the type for the Debug - Information Entry for the variable 'VarName'. 'VarName' will then - have the original type 'SomeType' in its debug information. - - The original type 'SomeType' will be the type of the field named - 'VarName' inside the __Block_byref_x_VarName struct. - - NOTE: In order for this to not completely fail on the debugger - side, the Debug Information Entry for the variable VarName needs to - have a DW_AT_location that tells the debugger how to unwind through - the pointers and __Block_byref_x_VarName struct to find the actual - value of the variable. The function addBlockByrefType does this. */ - DIType *subType = Ty; - uint16_t tag = Ty->getTag(); - - if (tag == dwarf::DW_TAG_pointer_type) - subType = cast(Ty)->getBaseType(); - - auto Elements = cast(subType)->getElements(); - for (unsigned i = 0, N = Elements.size(); i < N; ++i) { - auto *DT = cast(Elements[i]); - if (getName() == DT->getName()) - return DT->getBaseType(); - } - } - return Ty; + return getVariable()->getType(); } /// Get .debug_loc entry for the instruction range starting at MI. @@ -275,7 +259,7 @@ void DbgVariable::initializeDbgValue(const MachineInstr *DbgValue) { assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() && "Wrong inlined-at"); - ValueLoc = llvm::make_unique(getDebugLocValue(DbgValue)); + ValueLoc = std::make_unique(getDebugLocValue(DbgValue)); if (auto *E = DbgValue->getDebugExpression()) if (E->getNumElements()) FrameIndexExprs.push_back({0, E}); @@ -551,6 +535,157 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, } } +/// Try to interpret values loaded into registers that forward parameters +/// for \p CallMI. Store parameters with interpreted value into \p Params. +static void collectCallSiteParameters(const MachineInstr *CallMI, + ParamSet &Params) { + auto *MF = CallMI->getMF(); + auto CalleesMap = MF->getCallSitesInfo(); + auto CallFwdRegsInfo = CalleesMap.find(CallMI); + + // There is no information for the call instruction. + if (CallFwdRegsInfo == CalleesMap.end()) + return; + + auto *MBB = CallMI->getParent(); + const auto &TRI = MF->getSubtarget().getRegisterInfo(); + const auto &TII = MF->getSubtarget().getInstrInfo(); + const auto &TLI = MF->getSubtarget().getTargetLowering(); + + // Skip the call instruction. + auto I = std::next(CallMI->getReverseIterator()); + + DenseSet ForwardedRegWorklist; + // Add all the forwarding registers into the ForwardedRegWorklist. + for (auto ArgReg : CallFwdRegsInfo->second) { + bool InsertedReg = ForwardedRegWorklist.insert(ArgReg.Reg).second; + assert(InsertedReg && "Single register used to forward two arguments?"); + (void)InsertedReg; + } + + // We erase, from the ForwardedRegWorklist, those forwarding registers for + // which we successfully describe a loaded value (by using + // the describeLoadedValue()). For those remaining arguments in the working + // list, for which we do not describe a loaded value by + // the describeLoadedValue(), we try to generate an entry value expression + // for their call site value desctipion, if the call is within the entry MBB. + // The RegsForEntryValues maps a forwarding register into the register holding + // the entry value. + // TODO: Handle situations when call site parameter value can be described + // as the entry value within basic blocks other then the first one. + bool ShouldTryEmitEntryVals = MBB->getIterator() == MF->begin(); + DenseMap RegsForEntryValues; + + // If the MI is an instruction defining one or more parameters' forwarding + // registers, add those defines. We can currently only describe forwarded + // registers that are explicitly defined, but keep track of implicit defines + // also to remove those registers from the work list. + auto getForwardingRegsDefinedByMI = [&](const MachineInstr &MI, + SmallVectorImpl &Explicit, + SmallVectorImpl &Implicit) { + if (MI.isDebugInstr()) + return; + + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.isDef() && + Register::isPhysicalRegister(MO.getReg())) { + for (auto FwdReg : ForwardedRegWorklist) { + if (TRI->regsOverlap(FwdReg, MO.getReg())) { + if (MO.isImplicit()) + Implicit.push_back(FwdReg); + else + Explicit.push_back(FwdReg); + break; + } + } + } + } + }; + + auto finishCallSiteParam = [&](DbgValueLoc DbgLocVal, unsigned Reg) { + unsigned FwdReg = Reg; + if (ShouldTryEmitEntryVals) { + auto EntryValReg = RegsForEntryValues.find(Reg); + if (EntryValReg != RegsForEntryValues.end()) + FwdReg = EntryValReg->second; + } + + DbgCallSiteParam CSParm(FwdReg, DbgLocVal); + Params.push_back(CSParm); + ++NumCSParams; + }; + + // Search for a loading value in forwaring registers. + for (; I != MBB->rend(); ++I) { + // If the next instruction is a call we can not interpret parameter's + // forwarding registers or we finished the interpretation of all parameters. + if (I->isCall()) + return; + + if (ForwardedRegWorklist.empty()) + return; + + SmallVector ExplicitFwdRegDefs; + SmallVector ImplicitFwdRegDefs; + getForwardingRegsDefinedByMI(*I, ExplicitFwdRegDefs, ImplicitFwdRegDefs); + if (ExplicitFwdRegDefs.empty() && ImplicitFwdRegDefs.empty()) + continue; + + // If the MI clobbers more then one forwarding register we must remove + // all of them from the working list. + for (auto Reg : concat(ExplicitFwdRegDefs, ImplicitFwdRegDefs)) + ForwardedRegWorklist.erase(Reg); + + // The describeLoadedValue() hook currently does not have any information + // about which register it should describe in case of multiple defines, so + // for now we only handle instructions where a forwarded register is (at + // least partially) defined by the instruction's single explicit define. + if (I->getNumExplicitDefs() != 1 || ExplicitFwdRegDefs.empty()) + continue; + unsigned Reg = ExplicitFwdRegDefs[0]; + + if (auto ParamValue = TII->describeLoadedValue(*I)) { + if (ParamValue->first.isImm()) { + int64_t Val = ParamValue->first.getImm(); + DbgValueLoc DbgLocVal(ParamValue->second, Val); + finishCallSiteParam(DbgLocVal, Reg); + } else if (ParamValue->first.isReg()) { + Register RegLoc = ParamValue->first.getReg(); + unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + Register FP = TRI->getFrameRegister(*MF); + bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP); + if (TRI->isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) { + DbgValueLoc DbgLocVal(ParamValue->second, + MachineLocation(RegLoc, + /*IsIndirect=*/IsSPorFP)); + finishCallSiteParam(DbgLocVal, Reg); + } else if (ShouldTryEmitEntryVals) { + ForwardedRegWorklist.insert(RegLoc); + RegsForEntryValues[RegLoc] = Reg; + } + } + } + } + + // Emit the call site parameter's value as an entry value. + if (ShouldTryEmitEntryVals) { + // Create an expression where the register's entry value is used. + DIExpression *EntryExpr = DIExpression::get( + MF->getFunction().getContext(), {dwarf::DW_OP_LLVM_entry_value, 1}); + for (auto RegEntry : ForwardedRegWorklist) { + unsigned FwdReg = RegEntry; + auto EntryValReg = RegsForEntryValues.find(RegEntry); + if (EntryValReg != RegsForEntryValues.end()) + FwdReg = EntryValReg->second; + + DbgValueLoc DbgLocVal(EntryExpr, MachineLocation(RegEntry)); + DbgCallSiteParam CSParm(FwdReg, DbgLocVal); + Params.push_back(CSParm); + ++NumCSParams; + } + } +} + void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU, DIE &ScopeDIE, const MachineFunction &MF) { @@ -563,10 +698,11 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, // for both tail and non-tail calls. Don't use DW_AT_call_all_source_calls // because one of its requirements is not met: call site entries for // optimized-out calls are elided. - CU.addFlag(ScopeDIE, dwarf::DW_AT_call_all_calls); + CU.addFlag(ScopeDIE, CU.getDwarf5OrGNUAttr(dwarf::DW_AT_call_all_calls)); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); assert(TII && "TargetInstrInfo not found: cannot label tail calls"); + bool ApplyGNUExtensions = getDwarfVersion() == 4 && tuneForGDB(); // Emit call site entries for each call or tail call in the function. for (const MachineBasicBlock &MBB : MF) { @@ -581,30 +717,66 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, return; // If this is a direct call, find the callee's subprogram. + // In the case of an indirect call find the register that holds + // the callee. const MachineOperand &CalleeOp = MI.getOperand(0); - if (!CalleeOp.isGlobal()) - continue; - const Function *CalleeDecl = dyn_cast(CalleeOp.getGlobal()); - if (!CalleeDecl || !CalleeDecl->getSubprogram()) + if (!CalleeOp.isGlobal() && !CalleeOp.isReg()) continue; + unsigned CallReg = 0; + const DISubprogram *CalleeSP = nullptr; + const Function *CalleeDecl = nullptr; + if (CalleeOp.isReg()) { + CallReg = CalleeOp.getReg(); + if (!CallReg) + continue; + } else { + CalleeDecl = dyn_cast(CalleeOp.getGlobal()); + if (!CalleeDecl || !CalleeDecl->getSubprogram()) + continue; + CalleeSP = CalleeDecl->getSubprogram(); + } + // TODO: Omit call site entries for runtime calls (objc_msgSend, etc). - // TODO: Add support for indirect calls. bool IsTail = TII->isTailCall(MI); - // For tail calls, no return PC information is needed. For regular calls, - // the return PC is needed to disambiguate paths in the call graph which - // could lead to some target function. + // For tail calls, for non-gdb tuning, no return PC information is needed. + // For regular calls (and tail calls in GDB tuning), the return PC + // is needed to disambiguate paths in the call graph which could lead to + // some target function. const MCExpr *PCOffset = - IsTail ? nullptr : getFunctionLocalOffsetAfterInsn(&MI); + (IsTail && !tuneForGDB()) ? nullptr + : getFunctionLocalOffsetAfterInsn(&MI); + + // Address of a call-like instruction for a normal call or a jump-like + // instruction for a tail call. This is needed for GDB + DWARF 4 tuning. + const MCSymbol *PCAddr = + ApplyGNUExtensions ? const_cast(getLabelAfterInsn(&MI)) + : nullptr; + + assert((IsTail || PCOffset || PCAddr) && + "Call without return PC information"); - assert((IsTail || PCOffset) && "Call without return PC information"); LLVM_DEBUG(dbgs() << "CallSiteEntry: " << MF.getName() << " -> " - << CalleeDecl->getName() << (IsTail ? " [tail]" : "") - << "\n"); - CU.constructCallSiteEntryDIE(ScopeDIE, *CalleeDecl->getSubprogram(), - IsTail, PCOffset); + << (CalleeDecl ? CalleeDecl->getName() + : StringRef(MF.getSubtarget() + .getRegisterInfo() + ->getName(CallReg))) + << (IsTail ? " [IsTail]" : "") << "\n"); + + DIE &CallSiteDIE = + CU.constructCallSiteEntryDIE(ScopeDIE, CalleeSP, IsTail, PCAddr, + PCOffset, CallReg); + + // GDB and LLDB support call site parameter debug info. + if (Asm->TM.Options.EnableDebugEntryValues && + (tuneForGDB() || tuneForLLDB())) { + ParamSet Params; + // Try to interpret values of call site parameters. + collectCallSiteParameters(&MI, Params); + CU.constructCallSiteParmEntryDIEs(CallSiteDIE, Params); + } } } } @@ -680,7 +852,7 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { CompilationDir = DIUnit->getDirectory(); - auto OwnedUnit = llvm::make_unique( + auto OwnedUnit = std::make_unique( InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder); DwarfCompileUnit &NewCU = *OwnedUnit; InfoHolder.addUnit(std::move(OwnedUnit)); @@ -793,8 +965,6 @@ void DwarfDebug::beginModule() { DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; Holder.setRnglistsTableBaseSym( Asm->createTempSymbol("rnglists_table_base")); - Holder.setLoclistsTableBaseSym( - Asm->createTempSymbol("loclists_table_base")); if (useSplitDwarf()) InfoHolder.setRnglistsTableBaseSym( @@ -907,7 +1077,7 @@ void DwarfDebug::finalizeModuleInfo() { // If we're splitting the dwarf out now that we've got the entire // CU then add the dwo id to it. auto *SkCU = TheCU.getSkeleton(); - if (useSplitDwarf() && !empty(TheCU.getUnitDie().children())) { + if (useSplitDwarf() && !TheCU.getUnitDie().children().empty()) { finishUnitAttributes(TheCU.getCUNode(), TheCU); TheCU.addString(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_name, Asm->TM.Options.MCOptions.SplitDwarfFile); @@ -951,7 +1121,7 @@ void DwarfDebug::finalizeModuleInfo() { // 2.17.3). U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); else - U.setBaseAddress(TheCU.getRanges().front().getStart()); + U.setBaseAddress(TheCU.getRanges().front().Begin); U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges()); } @@ -959,15 +1129,19 @@ void DwarfDebug::finalizeModuleInfo() { // is a bit pessimistic under LTO. if (!AddrPool.isEmpty() && (getDwarfVersion() >= 5 || - (SkCU && !empty(TheCU.getUnitDie().children())))) + (SkCU && !TheCU.getUnitDie().children().empty()))) U.addAddrTableBase(); if (getDwarfVersion() >= 5) { if (U.hasRangeLists()) U.addRnglistsBase(); - if (!DebugLocs.getLists().empty() && !useSplitDwarf()) - U.addLoclistsBase(); + if (!DebugLocs.getLists().empty() && !useSplitDwarf()) { + DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base")); + U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base, + DebugLocs.getSym(), + TLOF.getDwarfLoclistsSection()->getBeginSymbol()); + } } auto *CUNode = cast(P.first); @@ -1105,7 +1279,7 @@ void DwarfDebug::collectVariableInfoFromMFTable( continue; ensureAbstractEntityIsCreatedIfScoped(TheCU, Var.first, Scope->getScopeNode()); - auto RegVar = llvm::make_unique( + auto RegVar = std::make_unique( cast(Var.first), Var.second); RegVar->initializeMMI(VI.Expr, VI.Slot); if (DbgVariable *DbgVar = MFVars.lookup(Var)) @@ -1316,13 +1490,13 @@ DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU, ensureAbstractEntityIsCreatedIfScoped(TheCU, Node, Scope.getScopeNode()); if (isa(Node)) { ConcreteEntities.push_back( - llvm::make_unique(cast(Node), + std::make_unique(cast(Node), Location)); InfoHolder.addScopeVariable(&Scope, cast(ConcreteEntities.back().get())); } else if (isa(Node)) { ConcreteEntities.push_back( - llvm::make_unique(cast(Node), + std::make_unique(cast(Node), Location, Sym)); InfoHolder.addScopeLabel(&Scope, cast(ConcreteEntities.back().get())); @@ -1419,11 +1593,14 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU, LexicalScope *Scope = nullptr; const DILabel *Label = cast(IL.first); + // The scope could have an extra lexical block file. + const DILocalScope *LocalScope = + Label->getScope()->getNonLexicalBlockFileScope(); // Get inlined DILocation if it is inlined label. if (const DILocation *IA = IL.second) - Scope = LScopes.findInlinedScope(Label->getScope(), IA); + Scope = LScopes.findInlinedScope(LocalScope, IA); else - Scope = LScopes.findLexicalScope(Label->getScope()); + Scope = LScopes.findLexicalScope(LocalScope); // If label scope is not found then skip this label. if (!Scope) continue; @@ -1607,6 +1784,9 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) { if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) return; + SectionLabels.insert(std::make_pair(&Asm->getFunctionBegin()->getSection(), + Asm->getFunctionBegin())); + DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit()); // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function @@ -1654,7 +1834,7 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) { collectEntityInfo(TheCU, SP, Processed); // Add the range of this function to the list of ranges for the CU. - TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd())); + TheCU.addRange({Asm->getFunctionBegin(), Asm->getFunctionEnd()}); // Under -gmlt, skip building the subprogram if there are no inlined // subroutines inside it. But with -fdebug-info-for-profiling, the subprogram @@ -1836,9 +2016,10 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU, case dwarf::DW_TAG_union_type: case dwarf::DW_TAG_enumeration_type: return dwarf::PubIndexEntryDescriptor( - dwarf::GIEK_TYPE, CU->getLanguage() != dwarf::DW_LANG_C_plus_plus - ? dwarf::GIEL_STATIC - : dwarf::GIEL_EXTERNAL); + dwarf::GIEK_TYPE, + dwarf::isCPlusPlus((dwarf::SourceLanguage)CU->getLanguage()) + ? dwarf::GIEL_EXTERNAL + : dwarf::GIEL_STATIC); case dwarf::DW_TAG_typedef: case dwarf::DW_TAG_base_type: case dwarf::DW_TAG_subrange_type: @@ -1967,7 +2148,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, DWARFExpression Expr(Data, getDwarfVersion(), PtrSize); using Encoding = DWARFExpression::Operation::Encoding; - uint32_t Offset = 0; + uint64_t Offset = 0; for (auto &Op : Expr) { assert(Op.getCode() != dwarf::DW_OP_const_type && "3 operand ops not yet supported"); @@ -1990,7 +2171,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, if (Comment != End) Comment++; } else { - for (uint32_t J = Offset; J < Op.getOperandEndOffset(I); ++J) + for (uint64_t J = Offset; J < Op.getOperandEndOffset(I); ++J) Streamer.EmitInt8(Data.getData()[J], Comment != End ? *(Comment++) : ""); } Offset = Op.getOperandEndOffset(I); @@ -2020,7 +2201,7 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, if (DIExpr->isEntryValue()) { DwarfExpr.setEntryValueFlag(); - DwarfExpr.addEntryValueExpression(Cursor); + DwarfExpr.beginEntryValueExpression(Cursor); } const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo(); @@ -2083,7 +2264,7 @@ void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry, } // Emit the common part of the DWARF 5 range/locations list tables header. -static void emitListsTableHeaderStart(AsmPrinter *Asm, const DwarfFile &Holder, +static void emitListsTableHeaderStart(AsmPrinter *Asm, MCSymbol *TableStart, MCSymbol *TableEnd) { // Build the table header, which starts with the length field. @@ -2108,7 +2289,7 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, const DwarfFile &Holder) { MCSymbol *TableStart = Asm->createTempSymbol("debug_rnglist_table_start"); MCSymbol *TableEnd = Asm->createTempSymbol("debug_rnglist_table_end"); - emitListsTableHeaderStart(Asm, Holder, TableStart, TableEnd); + emitListsTableHeaderStart(Asm, TableStart, TableEnd); Asm->OutStreamer->AddComment("Offset entry count"); Asm->emitInt32(Holder.getRangeLists().size()); @@ -2125,94 +2306,147 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, // designates the end of the table for the caller to emit when the table is // complete. static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm, - const DwarfFile &Holder) { + const DwarfDebug &DD) { MCSymbol *TableStart = Asm->createTempSymbol("debug_loclist_table_start"); MCSymbol *TableEnd = Asm->createTempSymbol("debug_loclist_table_end"); - emitListsTableHeaderStart(Asm, Holder, TableStart, TableEnd); + emitListsTableHeaderStart(Asm, TableStart, TableEnd); + + const auto &DebugLocs = DD.getDebugLocs(); // FIXME: Generate the offsets table and use DW_FORM_loclistx with the // DW_AT_loclists_base attribute. Until then set the number of offsets to 0. Asm->OutStreamer->AddComment("Offset entry count"); Asm->emitInt32(0); - Asm->OutStreamer->EmitLabel(Holder.getLoclistsTableBaseSym()); + Asm->OutStreamer->EmitLabel(DebugLocs.getSym()); return TableEnd; } +template +static void emitRangeList( + DwarfDebug &DD, AsmPrinter *Asm, MCSymbol *Sym, const Ranges &R, + const DwarfCompileUnit &CU, unsigned BaseAddressx, unsigned OffsetPair, + unsigned StartxLength, unsigned EndOfList, + StringRef (*StringifyEnum)(unsigned), + bool ShouldUseBaseAddress, + PayloadEmitter EmitPayload) { + + auto Size = Asm->MAI->getCodePointerSize(); + bool UseDwarf5 = DD.getDwarfVersion() >= 5; + + // Emit our symbol so we can find the beginning of the range. + Asm->OutStreamer->EmitLabel(Sym); + + // Gather all the ranges that apply to the same section so they can share + // a base address entry. + MapVector> SectionRanges; + + for (const auto &Range : R) + SectionRanges[&Range.Begin->getSection()].push_back(&Range); + + const MCSymbol *CUBase = CU.getBaseAddress(); + bool BaseIsSet = false; + for (const auto &P : SectionRanges) { + auto *Base = CUBase; + if (!Base && ShouldUseBaseAddress) { + const MCSymbol *Begin = P.second.front()->Begin; + const MCSymbol *NewBase = DD.getSectionLabel(&Begin->getSection()); + if (!UseDwarf5) { + Base = NewBase; + BaseIsSet = true; + Asm->OutStreamer->EmitIntValue(-1, Size); + Asm->OutStreamer->AddComment(" base address"); + Asm->OutStreamer->EmitSymbolValue(Base, Size); + } else if (NewBase != Begin || P.second.size() > 1) { + // Only use a base address if + // * the existing pool address doesn't match (NewBase != Begin) + // * or, there's more than one entry to share the base address + Base = NewBase; + BaseIsSet = true; + Asm->OutStreamer->AddComment(StringifyEnum(BaseAddressx)); + Asm->emitInt8(BaseAddressx); + Asm->OutStreamer->AddComment(" base address index"); + Asm->EmitULEB128(DD.getAddressPool().getIndex(Base)); + } + } else if (BaseIsSet && !UseDwarf5) { + BaseIsSet = false; + assert(!Base); + Asm->OutStreamer->EmitIntValue(-1, Size); + Asm->OutStreamer->EmitIntValue(0, Size); + } + + for (const auto *RS : P.second) { + const MCSymbol *Begin = RS->Begin; + const MCSymbol *End = RS->End; + assert(Begin && "Range without a begin symbol?"); + assert(End && "Range without an end symbol?"); + if (Base) { + if (UseDwarf5) { + // Emit offset_pair when we have a base. + Asm->OutStreamer->AddComment(StringifyEnum(OffsetPair)); + Asm->emitInt8(OffsetPair); + Asm->OutStreamer->AddComment(" starting offset"); + Asm->EmitLabelDifferenceAsULEB128(Begin, Base); + Asm->OutStreamer->AddComment(" ending offset"); + Asm->EmitLabelDifferenceAsULEB128(End, Base); + } else { + Asm->EmitLabelDifference(Begin, Base, Size); + Asm->EmitLabelDifference(End, Base, Size); + } + } else if (UseDwarf5) { + Asm->OutStreamer->AddComment(StringifyEnum(StartxLength)); + Asm->emitInt8(StartxLength); + Asm->OutStreamer->AddComment(" start index"); + Asm->EmitULEB128(DD.getAddressPool().getIndex(Begin)); + Asm->OutStreamer->AddComment(" length"); + Asm->EmitLabelDifferenceAsULEB128(End, Begin); + } else { + Asm->OutStreamer->EmitSymbolValue(Begin, Size); + Asm->OutStreamer->EmitSymbolValue(End, Size); + } + EmitPayload(*RS); + } + } + + if (UseDwarf5) { + Asm->OutStreamer->AddComment(StringifyEnum(EndOfList)); + Asm->emitInt8(EndOfList); + } else { + // Terminate the list with two 0 values. + Asm->OutStreamer->EmitIntValue(0, Size); + Asm->OutStreamer->EmitIntValue(0, Size); + } +} + +static void emitLocList(DwarfDebug &DD, AsmPrinter *Asm, const DebugLocStream::List &List) { + emitRangeList( + DD, Asm, List.Label, DD.getDebugLocs().getEntries(List), *List.CU, + dwarf::DW_LLE_base_addressx, dwarf::DW_LLE_offset_pair, + dwarf::DW_LLE_startx_length, dwarf::DW_LLE_end_of_list, + llvm::dwarf::LocListEncodingString, + /* ShouldUseBaseAddress */ true, + [&](const DebugLocStream::Entry &E) { + DD.emitDebugLocEntryLocation(E, List.CU); + }); +} + // Emit locations into the .debug_loc/.debug_rnglists section. void DwarfDebug::emitDebugLoc() { if (DebugLocs.getLists().empty()) return; - bool IsLocLists = getDwarfVersion() >= 5; MCSymbol *TableEnd = nullptr; - if (IsLocLists) { + if (getDwarfVersion() >= 5) { Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfLoclistsSection()); - TableEnd = emitLoclistsTableHeader(Asm, useSplitDwarf() ? SkeletonHolder - : InfoHolder); + TableEnd = emitLoclistsTableHeader(Asm, *this); } else { Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfLocSection()); } - unsigned char Size = Asm->MAI->getCodePointerSize(); - for (const auto &List : DebugLocs.getLists()) { - Asm->OutStreamer->EmitLabel(List.Label); - - const DwarfCompileUnit *CU = List.CU; - const MCSymbol *Base = CU->getBaseAddress(); - for (const auto &Entry : DebugLocs.getEntries(List)) { - if (Base) { - // Set up the range. This range is relative to the entry point of the - // compile unit. This is a hard coded 0 for low_pc when we're emitting - // ranges, or the DW_AT_low_pc on the compile unit otherwise. - if (IsLocLists) { - Asm->OutStreamer->AddComment("DW_LLE_offset_pair"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_LLE_offset_pair, 1); - Asm->OutStreamer->AddComment(" starting offset"); - Asm->EmitLabelDifferenceAsULEB128(Entry.BeginSym, Base); - Asm->OutStreamer->AddComment(" ending offset"); - Asm->EmitLabelDifferenceAsULEB128(Entry.EndSym, Base); - } else { - Asm->EmitLabelDifference(Entry.BeginSym, Base, Size); - Asm->EmitLabelDifference(Entry.EndSym, Base, Size); - } - - emitDebugLocEntryLocation(Entry, CU); - continue; - } - - // We have no base address. - if (IsLocLists) { - // TODO: Use DW_LLE_base_addressx + DW_LLE_offset_pair, or - // DW_LLE_startx_length in case if there is only a single range. - // That should reduce the size of the debug data emited. - // For now just use the DW_LLE_startx_length for all cases. - Asm->OutStreamer->AddComment("DW_LLE_startx_length"); - Asm->emitInt8(dwarf::DW_LLE_startx_length); - Asm->OutStreamer->AddComment(" start idx"); - Asm->EmitULEB128(AddrPool.getIndex(Entry.BeginSym)); - Asm->OutStreamer->AddComment(" length"); - Asm->EmitLabelDifferenceAsULEB128(Entry.EndSym, Entry.BeginSym); - } else { - Asm->OutStreamer->EmitSymbolValue(Entry.BeginSym, Size); - Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size); - } - - emitDebugLocEntryLocation(Entry, CU); - } - - if (IsLocLists) { - // .debug_loclists section ends with DW_LLE_end_of_list. - Asm->OutStreamer->AddComment("DW_LLE_end_of_list"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_LLE_end_of_list, 1); - } else { - // Terminate the .debug_loc list with two 0 values. - Asm->OutStreamer->EmitIntValue(0, Size); - Asm->OutStreamer->EmitIntValue(0, Size); - } - } + for (const auto &List : DebugLocs.getLists()) + emitLocList(*this, Asm, List); if (TableEnd) Asm->OutStreamer->EmitLabel(TableEnd); @@ -2232,9 +2466,9 @@ void DwarfDebug::emitDebugLocDWO() { // Ideally/in v5, this could use SectionLabels to reuse existing addresses // in the address pool to minimize object size/relocations. Asm->emitInt8(dwarf::DW_LLE_startx_length); - unsigned idx = AddrPool.getIndex(Entry.BeginSym); + unsigned idx = AddrPool.getIndex(Entry.Begin); Asm->EmitULEB128(idx); - Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4); + Asm->EmitLabelDifference(Entry.End, Entry.Begin, 4); emitDebugLocEntryLocation(Entry, List.CU); } @@ -2360,7 +2594,7 @@ void DwarfDebug::emitDebugARanges() { // 7.20 in the Dwarf specs requires the table to be aligned to a tuple. unsigned Padding = - OffsetToAlignment(sizeof(int32_t) + ContentSize, TupleSize); + offsetToAlignment(sizeof(int32_t) + ContentSize, Align(TupleSize)); ContentSize += Padding; ContentSize += (List.size() + 1) * TupleSize; @@ -2405,93 +2639,13 @@ void DwarfDebug::emitDebugARanges() { /// Emit a single range list. We handle both DWARF v5 and earlier. static void emitRangeList(DwarfDebug &DD, AsmPrinter *Asm, const RangeSpanList &List) { - - auto DwarfVersion = DD.getDwarfVersion(); - // Emit our symbol so we can find the beginning of the range. - Asm->OutStreamer->EmitLabel(List.getSym()); - // Gather all the ranges that apply to the same section so they can share - // a base address entry. - MapVector> SectionRanges; - // Size for our labels. - auto Size = Asm->MAI->getCodePointerSize(); - - for (const RangeSpan &Range : List.getRanges()) - SectionRanges[&Range.getStart()->getSection()].push_back(&Range); - - const DwarfCompileUnit &CU = List.getCU(); - const MCSymbol *CUBase = CU.getBaseAddress(); - bool BaseIsSet = false; - for (const auto &P : SectionRanges) { - // Don't bother with a base address entry if there's only one range in - // this section in this range list - for example ranges for a CU will - // usually consist of single regions from each of many sections - // (-ffunction-sections, or just C++ inline functions) except under LTO - // or optnone where there may be holes in a single CU's section - // contributions. - auto *Base = CUBase; - if (!Base && (P.second.size() > 1 || DwarfVersion < 5) && - (CU.getCUNode()->getRangesBaseAddress() || DwarfVersion >= 5)) { - BaseIsSet = true; - // FIXME/use care: This may not be a useful base address if it's not - // the lowest address/range in this object. - Base = P.second.front()->getStart(); - if (DwarfVersion >= 5) { - Base = DD.getSectionLabel(&Base->getSection()); - Asm->OutStreamer->AddComment("DW_RLE_base_addressx"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_base_addressx, 1); - Asm->OutStreamer->AddComment(" base address index"); - Asm->EmitULEB128(DD.getAddressPool().getIndex(Base)); - } else { - Asm->OutStreamer->EmitIntValue(-1, Size); - Asm->OutStreamer->AddComment(" base address"); - Asm->OutStreamer->EmitSymbolValue(Base, Size); - } - } else if (BaseIsSet && DwarfVersion < 5) { - BaseIsSet = false; - assert(!Base); - Asm->OutStreamer->EmitIntValue(-1, Size); - Asm->OutStreamer->EmitIntValue(0, Size); - } - - for (const auto *RS : P.second) { - const MCSymbol *Begin = RS->getStart(); - const MCSymbol *End = RS->getEnd(); - assert(Begin && "Range without a begin symbol?"); - assert(End && "Range without an end symbol?"); - if (Base) { - if (DwarfVersion >= 5) { - // Emit DW_RLE_offset_pair when we have a base. - Asm->OutStreamer->AddComment("DW_RLE_offset_pair"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_offset_pair, 1); - Asm->OutStreamer->AddComment(" starting offset"); - Asm->EmitLabelDifferenceAsULEB128(Begin, Base); - Asm->OutStreamer->AddComment(" ending offset"); - Asm->EmitLabelDifferenceAsULEB128(End, Base); - } else { - Asm->EmitLabelDifference(Begin, Base, Size); - Asm->EmitLabelDifference(End, Base, Size); - } - } else if (DwarfVersion >= 5) { - Asm->OutStreamer->AddComment("DW_RLE_startx_length"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_startx_length, 1); - Asm->OutStreamer->AddComment(" start index"); - Asm->EmitULEB128(DD.getAddressPool().getIndex(Begin)); - Asm->OutStreamer->AddComment(" length"); - Asm->EmitLabelDifferenceAsULEB128(End, Begin); - } else { - Asm->OutStreamer->EmitSymbolValue(Begin, Size); - Asm->OutStreamer->EmitSymbolValue(End, Size); - } - } - } - if (DwarfVersion >= 5) { - Asm->OutStreamer->AddComment("DW_RLE_end_of_list"); - Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_end_of_list, 1); - } else { - // Terminate the list with two 0 values. - Asm->OutStreamer->EmitIntValue(0, Size); - Asm->OutStreamer->EmitIntValue(0, Size); - } + emitRangeList(DD, Asm, List.getSym(), List.getRanges(), List.getCU(), + dwarf::DW_RLE_base_addressx, dwarf::DW_RLE_offset_pair, + dwarf::DW_RLE_startx_length, dwarf::DW_RLE_end_of_list, + llvm::dwarf::RangeListEncodingString, + List.getCU().getCUNode()->getRangesBaseAddress() || + DD.getDwarfVersion() >= 5, + [](auto) {}); } static void emitDebugRangesImpl(DwarfDebug &DD, AsmPrinter *Asm, @@ -2637,7 +2791,7 @@ void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { - auto OwnedUnit = llvm::make_unique( + auto OwnedUnit = std::make_unique( CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder); DwarfCompileUnit &NewCU = *OwnedUnit; NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection()); @@ -2737,7 +2891,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, bool TopLevelType = TypeUnitsUnderConstruction.empty(); AddrPool.resetUsedFlag(); - auto OwnedUnit = llvm::make_unique(CU, Asm, this, &InfoHolder, + auto OwnedUnit = std::make_unique(CU, Asm, this, &InfoHolder, getDwoLineTable(CU)); DwarfTypeUnit &NewTU = *OwnedUnit; DIE &UnitDie = NewTU.getUnitDie(); @@ -2879,10 +3033,6 @@ uint16_t DwarfDebug::getDwarfVersion() const { return Asm->OutStreamer->getContext().getDwarfVersion(); } -void DwarfDebug::addSectionLabel(const MCSymbol *Sym) { - SectionLabels.insert(std::make_pair(&Sym->getSection(), Sym)); -} - const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) { return SectionLabels.find(S)->second; } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 3ac474e2bdd..c8c511f67c2 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -153,7 +153,7 @@ public: assert(!ValueLoc && "Already initialized?"); assert(!Value.getExpression()->isFragment() && "Fragments not supported."); - ValueLoc = llvm::make_unique(Value); + ValueLoc = std::make_unique(Value); if (auto *E = ValueLoc->getExpression()) if (E->getNumElements()) FrameIndexExprs.push_back({0, E}); @@ -216,7 +216,6 @@ public: return !FrameIndexExprs.empty(); } - bool isBlockByrefVariable() const; const DIType *getType() const; static bool classof(const DbgEntity *N) { @@ -254,6 +253,25 @@ public: } }; +/// Used for tracking debug info about call site parameters. +class DbgCallSiteParam { +private: + unsigned Register; ///< Parameter register at the callee entry point. + DbgValueLoc Value; ///< Corresponding location for the parameter value at + ///< the call site. +public: + DbgCallSiteParam(unsigned Reg, DbgValueLoc Val) + : Register(Reg), Value(Val) { + assert(Reg && "Parameter register cannot be undef"); + } + + unsigned getRegister() const { return Register; } + DbgValueLoc getValue() const { return Value; } +}; + +/// Collection used for storing debug call site parameters. +using ParamSet = SmallVector; + /// Helper used to pair up a symbol and its DWARF compile unit. struct SymbolCU { SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {} diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 2858afaa1cf..1c5a244d7c5 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Support/ErrorHandling.h" @@ -97,7 +98,7 @@ void DwarfExpression::addAnd(unsigned Mask) { bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg, unsigned MaxSize) { - if (!TRI.isPhysicalRegister(MachineReg)) { + if (!llvm::Register::isPhysicalRegister(MachineReg)) { if (isFrameRegister(TRI, MachineReg)) { DwarfRegs.push_back({-1, 0, nullptr}); return true; @@ -241,15 +242,22 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, return false; } - // Handle simple register locations. - if (!isMemoryLocation() && !HasComplexExpression) { + // Handle simple register locations. If we are supposed to emit + // a call site parameter expression and if that expression is just a register + // location, emit it with addBReg and offset 0, because we should emit a DWARF + // expression representing a value, rather than a location. + if (!isMemoryLocation() && !HasComplexExpression && + (!isParameterValue() || isEntryValue())) { for (auto &Reg : DwarfRegs) { if (Reg.DwarfRegNo >= 0) addReg(Reg.DwarfRegNo, Reg.Comment); addOpPiece(Reg.Size); } - if (isEntryValue() && DwarfVersion >= 4) + if (isEntryValue()) + finalizeEntryValue(); + + if (isEntryValue() && !isParameterValue() && DwarfVersion >= 4) emitOp(dwarf::DW_OP_stack_value); DwarfRegs.clear(); @@ -275,19 +283,27 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, // Pattern-match combinations for which more efficient representations exist. // [Reg, DW_OP_plus_uconst, Offset] --> [DW_OP_breg, Offset]. if (Op && (Op->getOp() == dwarf::DW_OP_plus_uconst)) { - SignedOffset = Op->getArg(0); - ExprCursor.take(); + uint64_t Offset = Op->getArg(0); + uint64_t IntMax = static_cast(std::numeric_limits::max()); + if (Offset <= IntMax) { + SignedOffset = Offset; + ExprCursor.take(); + } } // [Reg, DW_OP_constu, Offset, DW_OP_plus] --> [DW_OP_breg, Offset] // [Reg, DW_OP_constu, Offset, DW_OP_minus] --> [DW_OP_breg,-Offset] // If Reg is a subregister we need to mask it out before subtracting. if (Op && Op->getOp() == dwarf::DW_OP_constu) { + uint64_t Offset = Op->getArg(0); + uint64_t IntMax = static_cast(std::numeric_limits::max()); auto N = ExprCursor.peekNext(); - if (N && (N->getOp() == dwarf::DW_OP_plus || - (N->getOp() == dwarf::DW_OP_minus && !SubRegisterSizeInBits))) { - int Offset = Op->getArg(0); - SignedOffset = (N->getOp() == dwarf::DW_OP_minus) ? -Offset : Offset; + if (N && N->getOp() == dwarf::DW_OP_plus && Offset <= IntMax) { + SignedOffset = Offset; + ExprCursor.consume(2); + } else if (N && N->getOp() == dwarf::DW_OP_minus && + !SubRegisterSizeInBits && Offset <= IntMax + 1) { + SignedOffset = -static_cast(Offset); ExprCursor.consume(2); } } @@ -300,17 +316,34 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, return true; } -void DwarfExpression::addEntryValueExpression(DIExpressionCursor &ExprCursor) { +void DwarfExpression::beginEntryValueExpression( + DIExpressionCursor &ExprCursor) { auto Op = ExprCursor.take(); - assert(Op && Op->getOp() == dwarf::DW_OP_entry_value); + (void)Op; + assert(Op && Op->getOp() == dwarf::DW_OP_LLVM_entry_value); assert(!isMemoryLocation() && "We don't support entry values of memory locations yet"); + assert(!IsEmittingEntryValue && "Already emitting entry value?"); + assert(Op->getArg(0) == 1 && + "Can currently only emit entry values covering a single operation"); - if (DwarfVersion >= 5) - emitOp(dwarf::DW_OP_entry_value); - else - emitOp(dwarf::DW_OP_GNU_entry_value); - emitUnsigned(Op->getArg(0)); + emitOp(CU.getDwarf5OrGNULocationAtom(dwarf::DW_OP_entry_value)); + IsEmittingEntryValue = true; + enableTemporaryBuffer(); +} + +void DwarfExpression::finalizeEntryValue() { + assert(IsEmittingEntryValue && "Entry value not open?"); + disableTemporaryBuffer(); + + // Emit the entry value's size operand. + unsigned Size = getTemporaryBufferSize(); + emitUnsigned(Size); + + // Emit the entry value's DWARF block operand. + commitTemporaryBuffer(); + + IsEmittingEntryValue = false; } /// Assuming a well-formed expression, match "DW_OP_deref* DW_OP_LLVM_fragment?". @@ -340,7 +373,17 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, while (ExprCursor) { auto Op = ExprCursor.take(); - switch (Op->getOp()) { + uint64_t OpNum = Op->getOp(); + + if (OpNum >= dwarf::DW_OP_reg0 && OpNum <= dwarf::DW_OP_reg31) { + emitOp(OpNum); + continue; + } else if (OpNum >= dwarf::DW_OP_breg0 && OpNum <= dwarf::DW_OP_breg31) { + addBReg(OpNum - dwarf::DW_OP_breg0, Op->getArg(0)); + continue; + } + + switch (OpNum) { case dwarf::DW_OP_LLVM_fragment: { unsigned SizeInBits = Op->getArg(1); unsigned FragmentOffset = Op->getArg(0); @@ -389,10 +432,13 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, case dwarf::DW_OP_lit0: case dwarf::DW_OP_not: case dwarf::DW_OP_dup: - emitOp(Op->getOp()); + emitOp(OpNum); break; case dwarf::DW_OP_deref: assert(!isRegisterLocation()); + // For more detailed explanation see llvm.org/PR43343. + assert(!isParameterValue() && "Parameter entry values should not be " + "dereferenced due to safety reasons."); if (!isMemoryLocation() && ::isMemoryLocation(ExprCursor)) // Turning this into a memory location description makes the deref // implicit. @@ -458,12 +504,21 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, case dwarf::DW_OP_LLVM_tag_offset: TagOffset = Op->getArg(0); break; + case dwarf::DW_OP_regx: + emitOp(dwarf::DW_OP_regx); + emitUnsigned(Op->getArg(0)); + break; + case dwarf::DW_OP_bregx: + emitOp(dwarf::DW_OP_bregx); + emitUnsigned(Op->getArg(0)); + emitSigned(Op->getArg(1)); + break; default: llvm_unreachable("unhandled opcode found in expression"); } } - if (isImplicitLocation()) + if (isImplicitLocation() && !isParameterValue()) // Turn this into an implicit location description. addStackValue(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h index ec2ef6e575f..1ad46669f9b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H +#include "ByteStreamer.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" @@ -26,7 +27,6 @@ namespace llvm { class AsmPrinter; class APInt; -class ByteStreamer; class DwarfCompileUnit; class DIELoc; class TargetRegisterInfo; @@ -95,6 +95,13 @@ public: /// Base class containing the logic for constructing DWARF expressions /// independently of whether they are emitted into a DIE or into a .debug_loc /// entry. +/// +/// Some DWARF operations, e.g. DW_OP_entry_value, need to calculate the size +/// of a succeeding DWARF block before the latter is emitted to the output. +/// To handle such cases, data can conditionally be emitted to a temporary +/// buffer, which can later on be committed to the main output. The size of the +/// temporary buffer is queryable, allowing for the size of the data to be +/// emitted before the data is committed. class DwarfExpression { protected: /// Holds information about all subregisters comprising a register location. @@ -104,6 +111,9 @@ protected: const char *Comment; }; + /// Whether we are currently emitting an entry value operation. + bool IsEmittingEntryValue = false; + DwarfCompileUnit &CU; /// The register location, if any. @@ -120,7 +130,7 @@ protected: enum { Unknown = 0, Register, Memory, Implicit }; /// The flags of location description being produced. - enum { EntryValue = 1 }; + enum { EntryValue = 1, CallSiteParamValue }; unsigned LocationKind : 3; unsigned LocationFlags : 2; @@ -147,6 +157,10 @@ public: return LocationFlags & EntryValue; } + bool isParameterValue() { + return LocationFlags & CallSiteParamValue; + } + Optional TagOffset; protected: @@ -174,6 +188,22 @@ protected: virtual void emitBaseTypeRef(uint64_t Idx) = 0; + /// Start emitting data to the temporary buffer. The data stored in the + /// temporary buffer can be committed to the main output using + /// commitTemporaryBuffer(). + virtual void enableTemporaryBuffer() = 0; + + /// Disable emission to the temporary buffer. This does not commit data + /// in the temporary buffer to the main output. + virtual void disableTemporaryBuffer() = 0; + + /// Return the emitted size, in number of bytes, for the data stored in the + /// temporary buffer. + virtual unsigned getTemporaryBufferSize() = 0; + + /// Commit the data stored in the temporary buffer to the main output. + virtual void commitTemporaryBuffer() = 0; + /// Emit a normalized unsigned constant. void emitConstu(uint64_t Value); @@ -233,6 +263,10 @@ protected: /// expression. See PR21176 for more details. void addStackValue(); + /// Finalize an entry value by emitting its size operand, and committing the + /// DWARF block which has been emitted to the temporary buffer. + void finalizeEntryValue(); + ~DwarfExpression() = default; public: @@ -264,6 +298,11 @@ public: LocationFlags |= EntryValue; } + /// Lock this down to become a call site parameter location. + void setCallSiteParamValueFlag() { + LocationFlags |= CallSiteParamValue; + } + /// Emit a machine register location. As an optimization this may also consume /// the prefix of a DwarfExpression if a more efficient representation for /// combining the register location and the first operation exists. @@ -278,8 +317,11 @@ public: DIExpressionCursor &Expr, unsigned MachineReg, unsigned FragmentOffsetInBits = 0); - /// Emit entry value dwarf operation. - void addEntryValueExpression(DIExpressionCursor &ExprCursor); + /// Begin emission of an entry value dwarf operation. The entry value's + /// first operand is the size of the DWARF block (its second operand), + /// which needs to be calculated at time of emission, so we don't emit + /// any operands here. + void beginEntryValueExpression(DIExpressionCursor &ExprCursor); /// Emit all remaining operations in the DIExpressionCursor. /// @@ -299,31 +341,62 @@ public: /// DwarfExpression implementation for .debug_loc entries. class DebugLocDwarfExpression final : public DwarfExpression { - ByteStreamer &BS; + + struct TempBuffer { + SmallString<32> Bytes; + std::vector Comments; + BufferByteStreamer BS; + + TempBuffer(bool GenerateComments) : BS(Bytes, Comments, GenerateComments) {} + }; + + std::unique_ptr TmpBuf; + BufferByteStreamer &OutBS; + bool IsBuffering = false; + + /// Return the byte streamer that currently is being emitted to. + ByteStreamer &getActiveStreamer() { return IsBuffering ? TmpBuf->BS : OutBS; } void emitOp(uint8_t Op, const char *Comment = nullptr) override; void emitSigned(int64_t Value) override; void emitUnsigned(uint64_t Value) override; void emitData1(uint8_t Value) override; void emitBaseTypeRef(uint64_t Idx) override; + + void enableTemporaryBuffer() override; + void disableTemporaryBuffer() override; + unsigned getTemporaryBufferSize() override; + void commitTemporaryBuffer() override; + bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) override; - public: - DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS, DwarfCompileUnit &CU) - : DwarfExpression(DwarfVersion, CU), BS(BS) {} + DebugLocDwarfExpression(unsigned DwarfVersion, BufferByteStreamer &BS, + DwarfCompileUnit &CU) + : DwarfExpression(DwarfVersion, CU), OutBS(BS) {} }; /// DwarfExpression implementation for singular DW_AT_location. class DIEDwarfExpression final : public DwarfExpression { -const AsmPrinter &AP; - DIELoc &DIE; + const AsmPrinter &AP; + DIELoc &OutDIE; + DIELoc TmpDIE; + bool IsBuffering = false; + + /// Return the DIE that currently is being emitted to. + DIELoc &getActiveDIE() { return IsBuffering ? TmpDIE : OutDIE; } void emitOp(uint8_t Op, const char *Comment = nullptr) override; void emitSigned(int64_t Value) override; void emitUnsigned(uint64_t Value) override; void emitData1(uint8_t Value) override; void emitBaseTypeRef(uint64_t Idx) override; + + void enableTemporaryBuffer() override; + void disableTemporaryBuffer() override; + unsigned getTemporaryBufferSize() override; + void commitTemporaryBuffer() override; + bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) override; public: @@ -331,7 +404,7 @@ public: DIELoc *finalize() { DwarfExpression::finalize(); - return &DIE; + return &OutDIE; } }; diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h index 244678ce9dc..35fa51fb24c 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -32,15 +32,9 @@ class LexicalScope; class MCSection; // Data structure to hold a range for range lists. -class RangeSpan { -public: - RangeSpan(MCSymbol *S, MCSymbol *E) : Start(S), End(E) {} - const MCSymbol *getStart() const { return Start; } - const MCSymbol *getEnd() const { return End; } - void setEnd(const MCSymbol *E) { End = E; } - -private: - const MCSymbol *Start, *End; +struct RangeSpan { + const MCSymbol *Begin; + const MCSymbol *End; }; class RangeSpanList { @@ -86,10 +80,6 @@ class DwarfFile { /// The table is shared by all units. MCSymbol *RnglistsTableBaseSym = nullptr; - /// DWARF v5: The symbol that designates the base of the locations list table. - /// The table is shared by all units. - MCSymbol *LoclistsTableBaseSym = nullptr; - /// The variables of a lexical scope. struct ScopeVars { /// We need to sort Args by ArgNo and check for duplicates. This could also @@ -167,9 +157,6 @@ public: MCSymbol *getRnglistsTableBaseSym() const { return RnglistsTableBaseSym; } void setRnglistsTableBaseSym(MCSymbol *Sym) { RnglistsTableBaseSym = Sym; } - MCSymbol *getLoclistsTableBaseSym() const { return LoclistsTableBaseSym; } - void setLoclistsTableBaseSym(MCSymbol *Sym) { LoclistsTableBaseSym = Sym; } - /// \returns false if the variable was merged with a previous one. bool addScopeVariable(LexicalScope *LS, DbgVariable *Var); diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 991ab94b50a..37c68c08579 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -47,31 +47,42 @@ using namespace llvm; #define DEBUG_TYPE "dwarfdebug" DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, - DwarfCompileUnit &CU, - DIELoc &DIE) - : DwarfExpression(AP.getDwarfVersion(), CU), AP(AP), - DIE(DIE) {} + DwarfCompileUnit &CU, DIELoc &DIE) + : DwarfExpression(AP.getDwarfVersion(), CU), AP(AP), OutDIE(DIE) {} void DIEDwarfExpression::emitOp(uint8_t Op, const char* Comment) { - CU.addUInt(DIE, dwarf::DW_FORM_data1, Op); + CU.addUInt(getActiveDIE(), dwarf::DW_FORM_data1, Op); } void DIEDwarfExpression::emitSigned(int64_t Value) { - CU.addSInt(DIE, dwarf::DW_FORM_sdata, Value); + CU.addSInt(getActiveDIE(), dwarf::DW_FORM_sdata, Value); } void DIEDwarfExpression::emitUnsigned(uint64_t Value) { - CU.addUInt(DIE, dwarf::DW_FORM_udata, Value); + CU.addUInt(getActiveDIE(), dwarf::DW_FORM_udata, Value); } void DIEDwarfExpression::emitData1(uint8_t Value) { - CU.addUInt(DIE, dwarf::DW_FORM_data1, Value); + CU.addUInt(getActiveDIE(), dwarf::DW_FORM_data1, Value); } void DIEDwarfExpression::emitBaseTypeRef(uint64_t Idx) { - CU.addBaseTypeRef(DIE, Idx); + CU.addBaseTypeRef(getActiveDIE(), Idx); } +void DIEDwarfExpression::enableTemporaryBuffer() { + assert(!IsBuffering && "Already buffering?"); + IsBuffering = true; +} + +void DIEDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; } + +unsigned DIEDwarfExpression::getTemporaryBufferSize() { + return TmpDIE.ComputeSize(&AP); +} + +void DIEDwarfExpression::commitTemporaryBuffer() { OutDIE.takeValues(TmpDIE); } + bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) { return MachineReg == TRI.getFrameRegister(*AP.MF); @@ -205,6 +216,10 @@ void DwarfUnit::insertDIE(const DINode *Desc, DIE *D) { MDNodeToDieMap.insert(std::make_pair(Desc, D)); } +void DwarfUnit::insertDIE(DIE *D) { + MDNodeToDieMap.insert(std::make_pair(nullptr, D)); +} + void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) { if (DD->getDwarfVersion() >= 4) Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_flag_present, @@ -718,7 +733,7 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const { return ""; // FIXME: Decide whether to implement this for non-C++ languages. - if (getLanguage() != dwarf::DW_LANG_C_plus_plus) + if (!dwarf::isCPlusPlus((dwarf::SourceLanguage)getLanguage())) return ""; std::string CS; @@ -942,6 +957,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { if (CTy->isAppleBlockExtension()) addFlag(Buffer, dwarf::DW_AT_APPLE_block); + if (CTy->getExportSymbols()) + addFlag(Buffer, dwarf::DW_AT_export_symbols); + // This is outside the DWARF spec, but GDB expects a DW_AT_containing_type // inside C++ composite types to point to the base class with the vtable. // Rust uses DW_AT_containing_type to link a vtable to the type @@ -1696,15 +1714,6 @@ void DwarfUnit::addRnglistsBase() { TLOF.getDwarfRnglistsSection()->getBeginSymbol()); } -void DwarfUnit::addLoclistsBase() { - assert(DD->getDwarfVersion() >= 5 && - "DW_AT_loclists_base requires DWARF version 5 or later"); - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - addSectionLabel(getUnitDie(), dwarf::DW_AT_loclists_base, - DU->getLoclistsTableBaseSym(), - TLOF.getDwarfLoclistsSection()->getBeginSymbol()); -} - void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) { addFlag(D, dwarf::DW_AT_declaration); StringRef Name = CTy->getName(); diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index 56c934a35ae..46c52a1faf4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -127,6 +127,8 @@ public: /// the mappings are kept in DwarfDebug. void insertDIE(const DINode *Desc, DIE *D); + void insertDIE(DIE *D); + /// Add a flag that is true to the DIE. void addFlag(DIE &Die, dwarf::Attribute Attribute); @@ -214,15 +216,6 @@ public: /// Add thrown types. void addThrownTypes(DIE &Die, DINodeArray ThrownTypes); - // FIXME: Should be reformulated in terms of addComplexAddress. - /// Start with the address based on the location provided, and generate the - /// DWARF information necessary to find the actual Block variable (navigating - /// the Block struct) based on the starting location. Add the DWARF - /// information to the die. Obsolete, please use addComplexAddress instead. - void addBlockByrefAddress(const DbgVariable &DV, DIE &Die, - dwarf::Attribute Attribute, - const MachineLocation &Location); - /// Add a new type attribute to the specified entity. /// /// This takes and attribute parameter because DW_AT_friend attributes are @@ -279,9 +272,6 @@ public: /// Add the DW_AT_rnglists_base attribute to the unit DIE. void addRnglistsBase(); - /// Add the DW_AT_loclists_base attribute to the unit DIE. - void addLoclistsBase(); - virtual DwarfCompileUnit &getCU() = 0; void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy); diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 99e3687b36b..31dfaaac836 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -426,7 +426,7 @@ MCSymbol *EHStreamer::emitExceptionTable() { // EHABI). In this case LSDASection will be NULL. if (LSDASection) Asm->OutStreamer->SwitchSection(LSDASection); - Asm->EmitAlignment(2); + Asm->EmitAlignment(Align(4)); // Emit the LSDA. MCSymbol *GCCETSym = @@ -602,11 +602,11 @@ MCSymbol *EHStreamer::emitExceptionTable() { } if (HaveTTData) { - Asm->EmitAlignment(2); + Asm->EmitAlignment(Align(4)); emitTypeInfos(TTypeEncoding, TTBaseLabel); } - Asm->EmitAlignment(2); + Asm->EmitAlignment(Align(4)); return GCCETSym; } diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index 39392b79e96..3849644d158 100644 --- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -72,7 +72,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, **/ // Align to address width. - AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8)); // Emit PointCount. OS.AddComment("safe point count"); diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 3145cc90dc7..b4eda5fa8c5 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -129,7 +129,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, report_fatal_error(" Too much descriptor for ocaml GC"); } AP.emitInt16(NumDescriptors); - AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8)); for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(), IE = Info.funcinfo_end(); @@ -180,7 +180,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, AP.emitInt16(K->StackOffset); } - AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8)); } } } diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp index 155e91ce61a..0398675577c 100644 --- a/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/lib/CodeGen/AsmPrinter/WinException.cpp @@ -982,8 +982,7 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { OS.EmitValueToAlignment(4); OS.EmitLabel(LSDALabel); - const Function *Per = - dyn_cast(F.getPersonalityFn()->stripPointerCasts()); + const auto *Per = cast(F.getPersonalityFn()->stripPointerCasts()); StringRef PerName = Per->getName(); int BaseState = -1; if (PerName == "_except_handler4") { diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index dc7eaf6a5fe..27b298dcf6a 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -382,7 +382,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { Value *NewAddr = Builder.CreateBitCast(Addr, PT); auto *NewLI = Builder.CreateLoad(NewTy, NewAddr); - NewLI->setAlignment(LI->getAlignment()); + NewLI->setAlignment(MaybeAlign(LI->getAlignment())); NewLI->setVolatile(LI->isVolatile()); NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); @@ -469,7 +469,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { Value *NewAddr = Builder.CreateBitCast(Addr, PT); StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); - NewSI->setAlignment(SI->getAlignment()); + NewSI->setAlignment(MaybeAlign(SI->getAlignment())); NewSI->setVolatile(SI->isVolatile()); NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); @@ -1376,7 +1376,7 @@ Value *AtomicExpand::insertRMWCmpXchgLoop( Builder.SetInsertPoint(BB); LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr); // Atomics require at least natural alignment. - InitLoaded->setAlignment(ResultTy->getPrimitiveSizeInBits() / 8); + InitLoaded->setAlignment(MaybeAlign(ResultTy->getPrimitiveSizeInBits() / 8)); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. @@ -1711,7 +1711,7 @@ bool AtomicExpand::expandAtomicOpToLibcall( // 'expected' argument, if present. if (CASExpected) { AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType()); - AllocaCASExpected->setAlignment(AllocaAlignment); + AllocaCASExpected->setAlignment(MaybeAlign(AllocaAlignment)); unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace(); AllocaCASExpected_i8 = @@ -1730,7 +1730,7 @@ bool AtomicExpand::expandAtomicOpToLibcall( Args.push_back(IntValue); } else { AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType()); - AllocaValue->setAlignment(AllocaAlignment); + AllocaValue->setAlignment(MaybeAlign(AllocaAlignment)); AllocaValue_i8 = Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx)); Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64); @@ -1742,7 +1742,7 @@ bool AtomicExpand::expandAtomicOpToLibcall( // 'ret' argument. if (!CASExpected && HasResult && !UseSizedLibcall) { AllocaResult = AllocaBuilder.CreateAlloca(I->getType()); - AllocaResult->setAlignment(AllocaAlignment); + AllocaResult->setAlignment(MaybeAlign(AllocaAlignment)); unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace(); AllocaResult_i8 = Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS)); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index fb54b5d6c8d..455916eeb82 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -129,9 +129,10 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { getAnalysis()); BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo, getAnalysis()); - return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(), - MF.getSubtarget().getRegisterInfo(), - getAnalysisIfAvailable()); + auto *MMIWP = getAnalysisIfAvailable(); + return Folder.OptimizeFunction( + MF, MF.getSubtarget().getInstrInfo(), MF.getSubtarget().getRegisterInfo(), + MMIWP ? &MMIWP->getMMI() : nullptr); } BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, @@ -161,6 +162,11 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { // Avoid matching if this pointer gets reused. TriedMerging.erase(MBB); + // Update call site info. + std::for_each(MBB->begin(), MBB->end(), [MF](const MachineInstr &MI) { + if (MI.isCall(MachineInstr::IgnoreBundle)) + MF->eraseCallSiteInfo(&MI); + }); // Remove the block. MF->erase(MBB); EHScopeMembership.erase(MBB); @@ -1306,6 +1312,8 @@ static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) { /// result in infinite loops. static bool IsBetterFallthrough(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2) { + assert(MBB1 && MBB2 && "Unknown MachineBasicBlock"); + // Right now, we use a simple heuristic. If MBB2 ends with a call, and // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to // optimize branches that branch to either a return block or an assert block @@ -1843,7 +1851,7 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, template static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI, Container &Set) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) Set.insert(*AI); } else { @@ -1871,7 +1879,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, for (const MachineOperand &MO : Loc->operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isUse()) { @@ -1909,7 +1917,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, return Loc; if (!MO.isReg() || MO.isUse()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (Uses.count(Reg)) { @@ -1937,14 +1945,14 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, for (const MachineOperand &MO : PI->operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isUse()) { addRegAndItsAliases(Reg, TRI, Uses); } else { if (Uses.erase(Reg)) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) Uses.erase(*SubRegs); // Use sub-registers to be conservative } @@ -2010,7 +2018,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { } if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isDef()) { @@ -2060,13 +2068,13 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { for (const MachineOperand &MO : TIB->operands()) { if (!MO.isReg() || !MO.isUse() || !MO.isKill()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (!AllDefsSet.count(Reg)) { continue; } - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) ActiveDefsSet.erase(*AI); } else { @@ -2078,8 +2086,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { for (const MachineOperand &MO : TIB->operands()) { if (!MO.isReg() || !MO.isDef() || MO.isDead()) continue; - unsigned Reg = MO.getReg(); - if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Reg || Register::isVirtualRegister(Reg)) continue; addRegAndItsAliases(Reg, TRI, ActiveDefsSet); addRegAndItsAliases(Reg, TRI, AllDefsSet); diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp index 3ad6266d4f3..6efdc9efa96 100644 --- a/lib/CodeGen/BranchRelaxation.cpp +++ b/lib/CodeGen/BranchRelaxation.cpp @@ -64,19 +64,18 @@ class BranchRelaxation : public MachineFunctionPass { /// Compute the offset immediately following this block. \p MBB is the next /// block. unsigned postOffset(const MachineBasicBlock &MBB) const { - unsigned PO = Offset + Size; - unsigned Align = MBB.getAlignment(); - if (Align == 0) + const unsigned PO = Offset + Size; + const Align Alignment = MBB.getAlignment(); + if (Alignment == 1) return PO; - unsigned AlignAmt = 1 << Align; - unsigned ParentAlign = MBB.getParent()->getAlignment(); - if (Align <= ParentAlign) - return PO + OffsetToAlignment(PO, AlignAmt); + const Align ParentAlign = MBB.getParent()->getAlignment(); + if (Alignment <= ParentAlign) + return PO + offsetToAlignment(PO, Alignment); // The alignment of this MBB is larger than the function's alignment, so we // can't tell whether or not it will insert nops. Assume that it will. - return PO + AlignAmt + OffsetToAlignment(PO, AlignAmt); + return PO + Alignment.value() + offsetToAlignment(PO, Alignment); } }; @@ -128,9 +127,8 @@ void BranchRelaxation::verify() { #ifndef NDEBUG unsigned PrevNum = MF->begin()->getNumber(); for (MachineBasicBlock &MBB : *MF) { - unsigned Align = MBB.getAlignment(); - unsigned Num = MBB.getNumber(); - assert(BlockInfo[Num].Offset % (1u << Align) == 0); + const unsigned Num = MBB.getNumber(); + assert(isAligned(MBB.getAlignment(), BlockInfo[Num].Offset)); assert(!Num || BlockInfo[PrevNum].postOffset(MBB) <= BlockInfo[Num].Offset); assert(BlockInfo[Num].Size == computeBlockSize(MBB)); PrevNum = Num; @@ -143,7 +141,7 @@ void BranchRelaxation::verify() { LLVM_DUMP_METHOD void BranchRelaxation::dumpBBs() { for (auto &MBB : *MF) { const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()]; - dbgs() << format("%bb.%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset) + dbgs() << format("%%bb.%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset) << format("size=%#x\n", BBI.Size); } } diff --git a/lib/CodeGen/BreakFalseDeps.cpp b/lib/CodeGen/BreakFalseDeps.cpp index cc4b2caa9be..709164e5f17 100644 --- a/lib/CodeGen/BreakFalseDeps.cpp +++ b/lib/CodeGen/BreakFalseDeps.cpp @@ -9,12 +9,11 @@ /// \file Break False Dependency pass. /// /// Some instructions have false dependencies which cause unnecessary stalls. -/// For exmaple, instructions that only write part of a register, and implicitly -/// need to read the other parts of the register. This may cause unwanted +/// For example, instructions may write part of a register and implicitly +/// need to read the other parts of the register. This may cause unwanted /// stalls preventing otherwise unrelated instructions from executing in /// parallel in an out-of-order CPU. -/// This pass is aimed at identifying and avoiding these depepndencies when -/// possible. +/// This pass is aimed at identifying and avoiding these dependencies. // //===----------------------------------------------------------------------===// @@ -24,6 +23,7 @@ #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; @@ -109,7 +109,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, MachineOperand &MO = MI->getOperand(OpIdx); assert(MO.isUndef() && "Expected undef machine operand"); - unsigned OriginalReg = MO.getReg(); + Register OriginalReg = MO.getReg(); // Update only undef operands that have reg units that are mapped to one root. for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) { @@ -162,7 +162,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, unsigned Pref) { - unsigned reg = MI->getOperand(OpIdx).getReg(); + Register reg = MI->getOperand(OpIdx).getReg(); unsigned Clearance = RDA->getClearance(MI, reg); LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); @@ -178,6 +178,7 @@ void BreakFalseDeps::processDefs(MachineInstr *MI) { assert(!MI->isDebugInstr() && "Won't process debug values"); // Break dependence on undef uses. Do this before updating LiveRegs below. + // This can remove a false dependence with no additional instructions. unsigned OpNum; unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI); if (Pref) { @@ -189,6 +190,11 @@ void BreakFalseDeps::processDefs(MachineInstr *MI) { UndefReads.push_back(std::make_pair(MI, OpNum)); } + // The code below allows the target to create a new instruction to break the + // dependence. That opposes the goal of minimizing size, so bail out now. + if (MF->getFunction().hasMinSize()) + return; + const MCInstrDesc &MCID = MI->getDesc(); for (unsigned i = 0, e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); @@ -209,6 +215,11 @@ void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) { if (UndefReads.empty()) return; + // The code below allows the target to create a new instruction to break the + // dependence. That opposes the goal of minimizing size, so bail out now. + if (MF->getFunction().hasMinSize()) + return; + // Collect this block's live out register units. LiveRegSet.init(*TRI); // We do not need to care about pristine registers as they are just preserved diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 7164fdfb788..bf97aaee366 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -40,7 +40,7 @@ void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, MachineRegisterInfo &MRI = MF.getRegInfo(); VirtRegAuxInfo VRAI(MF, LIS, VRM, MLI, MBFI, norm); for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (MRI.reg_nodbg_empty(Reg)) continue; VRAI.calculateSpillWeightAndHint(LIS.getInterval(Reg)); @@ -48,10 +48,11 @@ void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, } // Return the preferred allocation register for reg, given a COPY instruction. -static unsigned copyHint(const MachineInstr *mi, unsigned reg, +static Register copyHint(const MachineInstr *mi, unsigned reg, const TargetRegisterInfo &tri, const MachineRegisterInfo &mri) { - unsigned sub, hreg, hsub; + unsigned sub, hsub; + Register hreg; if (mi->getOperand(0).getReg() == reg) { sub = mi->getOperand(0).getSubReg(); hreg = mi->getOperand(1).getReg(); @@ -65,11 +66,11 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg, if (!hreg) return 0; - if (TargetRegisterInfo::isVirtualRegister(hreg)) - return sub == hsub ? hreg : 0; + if (Register::isVirtualRegister(hreg)) + return sub == hsub ? hreg : Register(); const TargetRegisterClass *rc = mri.getRegClass(reg); - unsigned CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg); + Register CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg); if (rc->contains(CopiedPReg)) return CopiedPReg; @@ -112,7 +113,7 @@ static bool isRematerializable(const LiveInterval &LI, // If the original (pre-splitting) registers match this // copy came from a split. - if (!TargetRegisterInfo::isVirtualRegister(Reg) || + if (!Register::isVirtualRegister(Reg) || VRM->getOriginal(Reg) != Original) return false; @@ -243,7 +244,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, // Get allocation hints from copies. if (!mi->isCopy()) continue; - unsigned hint = copyHint(mi, li.reg, tri, mri); + Register hint = copyHint(mi, li.reg, tri, mri); if (!hint) continue; // Force hweight onto the stack so that x86 doesn't add hidden precision, @@ -251,8 +252,9 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, // // FIXME: we probably shouldn't use floats at all. volatile float hweight = Hint[hint] += weight; - if (TargetRegisterInfo::isVirtualRegister(hint) || mri.isAllocatable(hint)) - CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint))); + if (Register::isVirtualRegister(hint) || mri.isAllocatable(hint)) + CopyHints.insert( + CopyHint(hint, hweight, Register::isPhysicalRegister(hint))); } Hint.clear(); diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index 497fcb14784..a397039180a 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -32,7 +32,6 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, TRI(*MF.getSubtarget().getRegisterInfo()), Locs(locs), Context(C) { // No stack is used. StackOffset = 0; - MaxStackArgAlign = 1; clearByValRegsInfo(); UsedRegs.resize((TRI.getNumRegs()+31)/32); @@ -41,20 +40,21 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, /// Allocate space on the stack large enough to pass an argument by value. /// The size and alignment information of the argument is encoded in /// its parameter attribute. -void CCState::HandleByVal(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - int MinSize, int MinAlign, - ISD::ArgFlagsTy ArgFlags) { - unsigned Align = ArgFlags.getByValAlign(); +void CCState::HandleByVal(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, int MinSize, + int MinAlignment, ISD::ArgFlagsTy ArgFlags) { + Align MinAlign(MinAlignment); + Align Alignment(ArgFlags.getByValAlign()); unsigned Size = ArgFlags.getByValSize(); if (MinSize > (int)Size) Size = MinSize; - if (MinAlign > (int)Align) - Align = MinAlign; - ensureMaxAlignment(Align); - MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, Align); + if (MinAlign > Alignment) + Alignment = MinAlign; + ensureMaxAlignment(Alignment); + MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, + Alignment.value()); Size = unsigned(alignTo(Size, MinAlign)); - unsigned Offset = AllocateStack(Size, Align); + unsigned Offset = AllocateStack(Size, Alignment.value()); addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); } @@ -90,13 +90,8 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl &Ins, for (unsigned i = 0; i != NumArgs; ++i) { MVT ArgVT = Ins[i].VT; ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; - if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Formal argument #" << i << " has unhandled type " - << EVT(ArgVT).getEVTString() << '\n'; -#endif - llvm_unreachable(nullptr); - } + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) + report_fatal_error("unable to allocate function argument #" + Twine(i)); } } @@ -122,13 +117,8 @@ void CCState::AnalyzeReturn(const SmallVectorImpl &Outs, for (unsigned i = 0, e = Outs.size(); i != e; ++i) { MVT VT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Return operand #" << i << " has unhandled type " - << EVT(VT).getEVTString() << '\n'; -#endif - llvm_unreachable(nullptr); - } + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) + report_fatal_error("unable to allocate function return #" + Twine(i)); } } @@ -209,7 +199,7 @@ static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) { void CCState::getRemainingRegParmsForType(SmallVectorImpl &Regs, MVT VT, CCAssignFn Fn) { unsigned SavedStackOffset = StackOffset; - unsigned SavedMaxStackArgAlign = MaxStackArgAlign; + Align SavedMaxStackArgAlign = MaxStackArgAlign; unsigned NumLocs = Locs.size(); // Set the 'inreg' flag if it is used for this calling convention. diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index c37ed57781d..ad9525f927e 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -28,6 +28,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeDetectDeadLanesPass(Registry); initializeDwarfEHPreparePass(Registry); initializeEarlyIfConverterPass(Registry); + initializeEarlyIfPredicatorPass(Registry); initializeEarlyMachineLICMPass(Registry); initializeEarlyTailDuplicatePass(Registry); initializeExpandMemCmpPassPass(Registry); @@ -53,6 +54,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeLocalStackSlotPassPass(Registry); initializeLowerIntrinsicsPass(Registry); initializeMIRCanonicalizerPass(Registry); + initializeMIRNamerPass(Registry); initializeMachineBlockFrequencyInfoPass(Registry); initializeMachineBlockPlacementPass(Registry); initializeMachineBlockPlacementStatsPass(Registry); @@ -63,10 +65,11 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineFunctionPrinterPassPass(Registry); initializeMachineLICMPass(Registry); initializeMachineLoopInfoPass(Registry); - initializeMachineModuleInfoPass(Registry); + initializeMachineModuleInfoWrapperPassPass(Registry); initializeMachineOptimizationRemarkEmitterPassPass(Registry); initializeMachineOutlinerPass(Registry); initializeMachinePipelinerPass(Registry); + initializeModuloScheduleTestPass(Registry); initializeMachinePostDominatorTreePass(Registry); initializeMachineRegionInfoPassPass(Registry); initializeMachineSchedulerPass(Registry); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 52b4bbea012..fa4432ea23e 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -344,7 +344,7 @@ class TypePromotionTransaction; // Get the DominatorTree, building if necessary. DominatorTree &getDT(Function &F) { if (!DT) - DT = llvm::make_unique(F); + DT = std::make_unique(F); return *DT; } @@ -424,7 +424,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLI = SubtargetInfo->getTargetLowering(); TRI = SubtargetInfo->getRegisterInfo(); } - TLInfo = &getAnalysis().getTLI(); + TLInfo = &getAnalysis().getTLI(F); TTI = &getAnalysis().getTTI(F); LI = &getAnalysis().getLoopInfo(); BPI.reset(new BranchProbabilityInfo(F, *LI)); @@ -1524,7 +1524,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL) { BasicBlock *UserBB = User->getParent(); DenseMap InsertedTruncs; - TruncInst *TruncI = dyn_cast(User); + auto *TruncI = cast(User); bool MadeChange = false; for (Value::user_iterator TruncUI = TruncI->user_begin(), @@ -1682,10 +1682,11 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, TheUse = InsertedShift; } - // If we removed all uses, nuke the shift. + // If we removed all uses, or there are none, nuke the shift. if (ShiftI->use_empty()) { salvageDebugInfo(*ShiftI); ShiftI->eraseFromParent(); + MadeChange = true; } return MadeChange; @@ -1811,7 +1812,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { AllocaInst *AI; if ((AI = dyn_cast(Val)) && AI->getAlignment() < PrefAlign && DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) - AI->setAlignment(PrefAlign); + AI->setAlignment(MaybeAlign(PrefAlign)); // Global variables can only be aligned if they are defined in this // object (i.e. they are uniquely initialized in this object), and // over-aligning global variables that have an explicit section is @@ -1821,7 +1822,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { GV->getPointerAlignment(*DL) < PrefAlign && DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2) - GV->setAlignment(PrefAlign); + GV->setAlignment(MaybeAlign(PrefAlign)); } // If this is a memcpy (or similar) then we may be able to improve the // alignment @@ -1867,24 +1868,10 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { }); return true; } - case Intrinsic::objectsize: { - // Lower all uses of llvm.objectsize.* - Value *RetVal = - lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true); - - resetIteratorIfInvalidatedWhileCalling(BB, [&]() { - replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); - }); - return true; - } - case Intrinsic::is_constant: { - // If is_constant hasn't folded away yet, lower it to false now. - Constant *RetVal = ConstantInt::get(II->getType(), 0); - resetIteratorIfInvalidatedWhileCalling(BB, [&]() { - replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); - }); - return true; - } + case Intrinsic::objectsize: + llvm_unreachable("llvm.objectsize.* should have been lowered already"); + case Intrinsic::is_constant: + llvm_unreachable("llvm.is.constant.* should have been lowered already"); case Intrinsic::aarch64_stlxr: case Intrinsic::aarch64_stxr: { ZExtInst *ExtVal = dyn_cast(CI->getArgOperand(0)); @@ -2024,17 +2011,18 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail /// call. const Function *F = BB->getParent(); - SmallVector TailCalls; + SmallVector TailCallBBs; if (PN) { for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { // Look through bitcasts. Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts(); CallInst *CI = dyn_cast(IncomingVal); + BasicBlock *PredBB = PN->getIncomingBlock(I); // Make sure the phi value is indeed produced by the tail call. - if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && + if (CI && CI->hasOneUse() && CI->getParent() == PredBB && TLI->mayBeEmittedAsTailCall(CI) && attributesPermitTailCall(F, CI, RetI, *TLI)) - TailCalls.push_back(CI); + TailCallBBs.push_back(PredBB); } } else { SmallPtrSet VisitedBBs; @@ -2052,24 +2040,20 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT CallInst *CI = dyn_cast(&*RI); if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && attributesPermitTailCall(F, CI, RetI, *TLI)) - TailCalls.push_back(CI); + TailCallBBs.push_back(*PI); } } bool Changed = false; - for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { - CallInst *CI = TailCalls[i]; - CallSite CS(CI); - + for (auto const &TailCallBB : TailCallBBs) { // Make sure the call instruction is followed by an unconditional branch to // the return block. - BasicBlock *CallBB = CI->getParent(); - BranchInst *BI = dyn_cast(CallBB->getTerminator()); + BranchInst *BI = dyn_cast(TailCallBB->getTerminator()); if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) continue; - // Duplicate the return into CallBB. - (void)FoldReturnIntoUncondBranch(RetI, BB, CallBB); + // Duplicate the return into TailCallBB. + (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB); ModifiedDT = Changed = true; ++NumRetsDup; } @@ -2683,26 +2667,26 @@ private: void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, Value *NewVal) { - Actions.push_back(llvm::make_unique( + Actions.push_back(std::make_unique( Inst, Idx, NewVal)); } void TypePromotionTransaction::eraseInstruction(Instruction *Inst, Value *NewVal) { Actions.push_back( - llvm::make_unique( + std::make_unique( Inst, RemovedInsts, NewVal)); } void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, Value *New) { Actions.push_back( - llvm::make_unique(Inst, New)); + std::make_unique(Inst, New)); } void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { Actions.push_back( - llvm::make_unique(Inst, NewTy)); + std::make_unique(Inst, NewTy)); } Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, @@ -2732,7 +2716,7 @@ Value *TypePromotionTransaction::createZExt(Instruction *Inst, void TypePromotionTransaction::moveBefore(Instruction *Inst, Instruction *Before) { Actions.push_back( - llvm::make_unique( + std::make_unique( Inst, Before)); } @@ -3048,7 +3032,7 @@ public: To = dyn_cast(OldReplacement); OldReplacement = Get(From); } - assert(Get(To) == To && "Replacement PHI node is already replaced."); + assert(To && Get(To) == To && "Replacement PHI node is already replaced."); Put(From, To); From->replaceAllUsesWith(To); AllPhiNodes.erase(From); @@ -3334,7 +3318,7 @@ private: // So the values are different and does not match. So we need them to // match. (But we register no more than one match per PHI node, so that // we won't later try to replace them twice.) - if (!MatchedPHIs.insert(FirstPhi).second) + if (MatchedPHIs.insert(FirstPhi).second) Matcher.insert({ FirstPhi, SecondPhi }); // But me must check it. WorkList.push_back({ FirstPhi, SecondPhi }); @@ -3412,11 +3396,10 @@ private: Select->setFalseValue(ST.Get(Map[FalseValue])); } else { // Must be a Phi node then. - PHINode *PHI = cast(V); - auto *CurrentPhi = dyn_cast(Current); + auto *PHI = cast(V); // Fill the Phi node with values from predecessors. for (auto B : predecessors(PHI->getParent())) { - Value *PV = CurrentPhi->getIncomingValueForBlock(B); + Value *PV = cast(Current)->getIncomingValueForBlock(B); assert(Map.find(PV) != Map.end() && "No predecessor Value!"); PHI->addIncoming(ST.Get(Map[PV]), B); } @@ -3785,13 +3768,11 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, // poisoned value regular value // It should be OK since undef covers valid value. if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) { - const Instruction *ExtInst = - dyn_cast(*Inst->user_begin()); + const auto *ExtInst = cast(*Inst->user_begin()); if (ExtInst->hasOneUse()) { - const Instruction *AndInst = - dyn_cast(*ExtInst->user_begin()); + const auto *AndInst = dyn_cast(*ExtInst->user_begin()); if (AndInst && AndInst->getOpcode() == Instruction::And) { - const ConstantInt *Cst = dyn_cast(AndInst->getOperand(1)); + const auto *Cst = dyn_cast(AndInst->getOperand(1)); if (Cst && Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth())) return true; @@ -4793,8 +4774,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, << " for " << *MemoryInst << "\n"); if (SunkAddr->getType() != Addr->getType()) SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); - } else if (AddrSinkUsingGEPs || - (!AddrSinkUsingGEPs.getNumOccurrences() && TM && TTI->useAA())) { + } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && + TM && SubtargetInfo->addrSinkUsingGEPs())) { // By default, we use the GEP-based method when AA is used later. This // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode @@ -5816,7 +5797,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { return false; IRBuilder<> Builder(Load->getNextNode()); - auto *NewAnd = dyn_cast( + auto *NewAnd = cast( Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits))); // Mark this instruction as "inserted by CGP", so that other // optimizations don't touch it. @@ -6193,35 +6174,49 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { // OpsToSink can contain multiple uses in a use chain (e.g. // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating - // uses must come first, which means they are sunk first, temporarily creating - // invalid IR. This will be fixed once their dominated users are sunk and - // updated. + // uses must come first, so we process the ops in reverse order so as to not + // create invalid IR. BasicBlock *TargetBB = I->getParent(); bool Changed = false; SmallVector ToReplace; - for (Use *U : OpsToSink) { + for (Use *U : reverse(OpsToSink)) { auto *UI = cast(U->get()); if (UI->getParent() == TargetBB || isa(UI)) continue; ToReplace.push_back(U); } - SmallPtrSet MaybeDead; + SetVector MaybeDead; + DenseMap NewInstructions; + Instruction *InsertPoint = I; for (Use *U : ToReplace) { auto *UI = cast(U->get()); Instruction *NI = UI->clone(); + NewInstructions[UI] = NI; MaybeDead.insert(UI); LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n"); - NI->insertBefore(I); + NI->insertBefore(InsertPoint); + InsertPoint = NI; InsertedInsts.insert(NI); - U->set(NI); + + // Update the use for the new instruction, making sure that we update the + // sunk instruction uses, if it is part of a chain that has already been + // sunk. + Instruction *OldI = cast(U->getUser()); + if (NewInstructions.count(OldI)) + NewInstructions[OldI]->setOperand(U->getOperandNo(), NI); + else + U->set(NI); Changed = true; } // Remove instructions that are dead after sinking. - for (auto *I : MaybeDead) - if (!I->hasNUsesOrMore(1)) + for (auto *I : MaybeDead) { + if (!I->hasNUsesOrMore(1)) { + LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n"); I->eraseFromParent(); + } + } return Changed; } @@ -7106,7 +7101,6 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { for (auto &I : reverse(BB)) { if (makeBitReverse(I, *DL, *TLI)) { MadeBitReverse = MadeChange = true; - ModifiedDT = true; break; } } diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 4144c243a34..702e7e244bc 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -187,7 +187,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; const TargetRegisterClass *NewRC = nullptr; @@ -272,7 +272,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) { } if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; if (!MO.isDef()) continue; @@ -303,7 +303,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; if (!MO.isUse()) continue; @@ -457,6 +457,7 @@ BreakAntiDependencies(const std::vector &SUnits, if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) Max = SU; } + assert(Max && "Failed to find bottom of the critical path"); #ifndef NDEBUG { @@ -612,7 +613,7 @@ BreakAntiDependencies(const std::vector &SUnits, for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) { AntiDepReg = 0; diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index b99be5d7a87..a169c3cb16b 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -23,6 +23,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" @@ -71,39 +73,13 @@ static DFAInput getDFAInsnInput(const std::vector &InsnClass) { // -------------------------------------------------------------------- -DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, - const DFAStateInput (*SIT)[2], - const unsigned *SET): - InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET) { - // Make sure DFA types are large enough for the number of terms & resources. - static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= - (8 * sizeof(DFAInput)), - "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput"); - static_assert( - (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)), - "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); -} - -// Read the DFA transition table and update CachedTable. -// -// Format of the transition tables: -// DFAStateInputTable[][2] = pairs of for all valid -// transitions -// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable -// for the ith state -// -void DFAPacketizer::ReadTable(unsigned int state) { - unsigned ThisState = DFAStateEntryTable[state]; - unsigned NextStateInTable = DFAStateEntryTable[state+1]; - // Early exit in case CachedTable has already contains this - // state's transitions. - if (CachedTable.count(UnsignPair(state, DFAStateInputTable[ThisState][0]))) - return; - - for (unsigned i = ThisState; i < NextStateInTable; i++) - CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] = - DFAStateInputTable[i][1]; -} +// Make sure DFA types are large enough for the number of terms & resources. +static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= + (8 * sizeof(DFAInput)), + "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput"); +static_assert( + (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)), + "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); // Return the DFAInput for an instruction class. DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) { @@ -129,9 +105,7 @@ DFAInput DFAPacketizer::getInsnInput(const std::vector &InsnClass) { bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); DFAInput InsnInput = getInsnInput(InsnClass); - UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); - ReadTable(CurrentState); - return CachedTable.count(StateTrans) != 0; + return A.canAdd(InsnInput); } // Reserve the resources occupied by a MCInstrDesc and change the current @@ -139,10 +113,7 @@ bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) { void DFAPacketizer::reserveResources(const MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); DFAInput InsnInput = getInsnInput(InsnClass); - UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); - ReadTable(CurrentState); - assert(CachedTable.count(StateTrans) != 0); - CurrentState = CachedTable[StateTrans]; + A.add(InsnInput); } // Check if the resources occupied by a machine instruction are available @@ -159,19 +130,33 @@ void DFAPacketizer::reserveResources(MachineInstr &MI) { reserveResources(&MID); } +unsigned DFAPacketizer::getUsedResources(unsigned InstIdx) { + ArrayRef NfaPaths = A.getNfaPaths(); + assert(!NfaPaths.empty() && "Invalid bundle!"); + const NfaPath &RS = NfaPaths.front(); + + // RS stores the cumulative resources used up to and including the I'th + // instruction. The 0th instruction is the base case. + if (InstIdx == 0) + return RS[0]; + // Return the difference between the cumulative resources used by InstIdx and + // its predecessor. + return RS[InstIdx] ^ RS[InstIdx - 1]; +} + namespace llvm { // This class extends ScheduleDAGInstrs and overrides the schedule method // to build the dependence graph. class DefaultVLIWScheduler : public ScheduleDAGInstrs { private: - AliasAnalysis *AA; + AAResults *AA; /// Ordered list of DAG postprocessing steps. std::vector> Mutations; public: DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, - AliasAnalysis *AA); + AAResults *AA); // Actual scheduling work. void schedule() override; @@ -189,7 +174,7 @@ protected: DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, - AliasAnalysis *AA) + AAResults *AA) : ScheduleDAGInstrs(MF, &MLI), AA(AA) { CanHandleTerminators = true; } @@ -207,9 +192,10 @@ void DefaultVLIWScheduler::schedule() { } VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf, - MachineLoopInfo &mli, AliasAnalysis *aa) + MachineLoopInfo &mli, AAResults *aa) : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) { ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget()); + ResourceTracker->setTrackResources(true); VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA); } @@ -224,8 +210,11 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, LLVM_DEBUG({ if (!CurrentPacketMIs.empty()) { dbgs() << "Finalizing packet:\n"; - for (MachineInstr *MI : CurrentPacketMIs) - dbgs() << " * " << *MI; + unsigned Idx = 0; + for (MachineInstr *MI : CurrentPacketMIs) { + unsigned R = ResourceTracker->getUsedResources(Idx++); + dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI; + } } }); if (CurrentPacketMIs.size() > 1) { diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 049ce706330..9a537c859a6 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -75,8 +75,8 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef()) { - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { // Don't delete live physreg defs, or any reserved register defs. if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg)) return false; @@ -140,8 +140,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef()) { - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { // Check the subreg set, not the alias set, because a def // of a super-register may still be partially live after // this def. @@ -159,8 +159,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isUse()) { - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) LivePhysRegs.set(*AI); } diff --git a/lib/CodeGen/DetectDeadLanes.cpp b/lib/CodeGen/DetectDeadLanes.cpp index fe78acf4d80..6d5306c1dc0 100644 --- a/lib/CodeGen/DetectDeadLanes.cpp +++ b/lib/CodeGen/DetectDeadLanes.cpp @@ -154,7 +154,7 @@ static bool isCrossCopy(const MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC, const MachineOperand &MO) { assert(lowersToCopies(MI)); - unsigned SrcReg = MO.getReg(); + Register SrcReg = MO.getReg(); const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); if (DstRC == SrcRC) return false; @@ -194,8 +194,8 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO, LaneBitmask UsedLanes) { if (!MO.readsReg()) return; - unsigned MOReg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(MOReg)) + Register MOReg = MO.getReg(); + if (!Register::isVirtualRegister(MOReg)) return; unsigned MOSubReg = MO.getSubReg(); @@ -203,7 +203,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO, UsedLanes = TRI->composeSubRegIndexLaneMask(MOSubReg, UsedLanes); UsedLanes &= MRI->getMaxLaneMaskForVReg(MOReg); - unsigned MORegIdx = TargetRegisterInfo::virtReg2Index(MOReg); + unsigned MORegIdx = Register::virtReg2Index(MOReg); VRegInfo &MORegInfo = VRegInfos[MORegIdx]; LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes; // Any change at all? @@ -219,7 +219,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO, void DetectDeadLanes::transferUsedLanesStep(const MachineInstr &MI, LaneBitmask UsedLanes) { for (const MachineOperand &MO : MI.uses()) { - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) continue; LaneBitmask UsedOnMO = transferUsedLanes(MI, UsedLanes, MO); addUsedLanesOnOperand(MO, UsedOnMO); @@ -230,8 +230,8 @@ LaneBitmask DetectDeadLanes::transferUsedLanes(const MachineInstr &MI, LaneBitmask UsedLanes, const MachineOperand &MO) const { unsigned OpNum = MI.getOperandNo(&MO); - assert(lowersToCopies(MI) && DefinedByCopy[ - TargetRegisterInfo::virtReg2Index(MI.getOperand(0).getReg())]); + assert(lowersToCopies(MI) && + DefinedByCopy[Register::virtReg2Index(MI.getOperand(0).getReg())]); switch (MI.getOpcode()) { case TargetOpcode::COPY: @@ -250,7 +250,7 @@ LaneBitmask DetectDeadLanes::transferUsedLanes(const MachineInstr &MI, return MO2UsedLanes; const MachineOperand &Def = MI.getOperand(0); - unsigned DefReg = Def.getReg(); + Register DefReg = Def.getReg(); const TargetRegisterClass *RC = MRI->getRegClass(DefReg); LaneBitmask MO1UsedLanes; if (RC->CoveredBySubRegs) @@ -285,10 +285,10 @@ void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use, if (MI.getOpcode() == TargetOpcode::PATCHPOINT) return; const MachineOperand &Def = *MI.defs().begin(); - unsigned DefReg = Def.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DefReg)) + Register DefReg = Def.getReg(); + if (!Register::isVirtualRegister(DefReg)) return; - unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg); + unsigned DefRegIdx = Register::virtReg2Index(DefReg); if (!DefinedByCopy.test(DefRegIdx)) return; @@ -360,7 +360,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) { if (lowersToCopies(DefMI)) { // Start optimisatically with no used or defined lanes for copy // instructions. The following dataflow analysis will add more bits. - unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg); + unsigned RegIdx = Register::virtReg2Index(Reg); DefinedByCopy.set(RegIdx); PutInWorklist(RegIdx); @@ -377,17 +377,17 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) { for (const MachineOperand &MO : DefMI.uses()) { if (!MO.isReg() || !MO.readsReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; LaneBitmask MODefinedLanes; - if (TargetRegisterInfo::isPhysicalRegister(MOReg)) { + if (Register::isPhysicalRegister(MOReg)) { MODefinedLanes = LaneBitmask::getAll(); } else if (isCrossCopy(*MRI, DefMI, DefRC, MO)) { MODefinedLanes = LaneBitmask::getAll(); } else { - assert(TargetRegisterInfo::isVirtualRegister(MOReg)); + assert(Register::isVirtualRegister(MOReg)); if (MRI->hasOneDef(MOReg)) { const MachineOperand &MODef = *MRI->def_begin(MOReg); const MachineInstr &MODefMI = *MODef.getParent(); @@ -428,10 +428,10 @@ LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) { if (lowersToCopies(UseMI)) { assert(UseMI.getDesc().getNumDefs() == 1); const MachineOperand &Def = *UseMI.defs().begin(); - unsigned DefReg = Def.getReg(); + Register DefReg = Def.getReg(); // The used lanes of COPY-like instruction operands are determined by the // following dataflow analysis. - if (TargetRegisterInfo::isVirtualRegister(DefReg)) { + if (Register::isVirtualRegister(DefReg)) { // But ignore copies across incompatible register classes. bool CrossCopy = false; if (lowersToCopies(UseMI)) { @@ -470,10 +470,10 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO, if (!lowersToCopies(MI)) return false; const MachineOperand &Def = MI.getOperand(0); - unsigned DefReg = Def.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DefReg)) + Register DefReg = Def.getReg(); + if (!Register::isVirtualRegister(DefReg)) return false; - unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg); + unsigned DefRegIdx = Register::virtReg2Index(DefReg); if (!DefinedByCopy.test(DefRegIdx)) return false; @@ -482,8 +482,8 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO, if (UsedLanes.any()) return false; - unsigned MOReg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(MOReg)) { + Register MOReg = MO.getReg(); + if (Register::isVirtualRegister(MOReg)) { const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg); *CrossCopy = isCrossCopy(*MRI, MI, DstRC, MO); } @@ -494,7 +494,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { // First pass: Populate defs/uses of vregs with initial values unsigned NumVirtRegs = MRI->getNumVirtRegs(); for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + unsigned Reg = Register::index2VirtReg(RegIdx); // Determine used/defined lanes and add copy instructions to worklist. VRegInfo &Info = VRegInfos[RegIdx]; @@ -508,7 +508,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { Worklist.pop_front(); WorklistMembers.reset(RegIdx); VRegInfo &Info = VRegInfos[RegIdx]; - unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + unsigned Reg = Register::index2VirtReg(RegIdx); // Transfer UsedLanes to operands of DefMI (backwards dataflow). MachineOperand &Def = *MRI->def_begin(Reg); @@ -522,7 +522,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "Defined/Used lanes:\n"; for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + unsigned Reg = Register::index2VirtReg(RegIdx); const VRegInfo &Info = VRegInfos[RegIdx]; dbgs() << printReg(Reg, nullptr) << " Used: " << PrintLaneMask(Info.UsedLanes) @@ -536,10 +536,10 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { for (MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; - unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg); + unsigned RegIdx = Register::virtReg2Index(Reg); const VRegInfo &RegInfo = VRegInfos[RegIdx]; if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes.none()) { LLVM_DEBUG(dbgs() diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index 0a83760befa..e5694218b5c 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" @@ -140,6 +141,18 @@ private: /// speculated. bool canSpeculateInstrs(MachineBasicBlock *MBB); + /// Return true if all non-terminator instructions in MBB can be safely + /// predicated. + bool canPredicateInstrs(MachineBasicBlock *MBB); + + /// Scan through instruction dependencies and update InsertAfter array. + /// Return false if any dependency is incompatible with if conversion. + bool InstrDependenciesAllowIfConv(MachineInstr *I); + + /// Predicate all instructions of the basic block with current condition + /// except for terminators. Reverse the condition if ReversePredicate is set. + void PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate); + /// Find a valid insertion point in Head. bool findInsertionPoint(); @@ -163,11 +176,14 @@ public: /// canConvertIf - If the sub-CFG headed by MBB can be if-converted, /// initialize the internal state, and return true. - bool canConvertIf(MachineBasicBlock *MBB); + /// If predicate is set try to predicate the block otherwise try to + /// speculatively execute it. + bool canConvertIf(MachineBasicBlock *MBB, bool Predicate = false); /// convertIf - If-convert the last block passed to canConvertIf(), assuming /// it is possible. Add any erased blocks to RemovedBlocks. - void convertIf(SmallVectorImpl &RemovedBlocks); + void convertIf(SmallVectorImpl &RemovedBlocks, + bool Predicate = false); }; } // end anonymous namespace @@ -225,37 +241,112 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { } // Check for any dependencies on Head instructions. - for (const MachineOperand &MO : I->operands()) { - if (MO.isRegMask()) { - LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I); - return false; - } - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); + if (!InstrDependenciesAllowIfConv(&(*I))) + return false; + } + return true; +} - // Remember clobbered regunits. - if (MO.isDef() && TargetRegisterInfo::isPhysicalRegister(Reg)) - for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) - ClobberedRegUnits.set(*Units); +/// Check that there is no dependencies preventing if conversion. +/// +/// If instruction uses any values that are defined in the head basic block, +/// the defining instructions are added to InsertAfter. +bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) { + for (const MachineOperand &MO : I->operands()) { + if (MO.isRegMask()) { + LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I); + return false; + } + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); - if (!MO.readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - MachineInstr *DefMI = MRI->getVRegDef(Reg); - if (!DefMI || DefMI->getParent() != Head) - continue; - if (InsertAfter.insert(DefMI).second) - LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " depends on " - << *DefMI); - if (DefMI->isTerminator()) { - LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); - return false; - } + // Remember clobbered regunits. + if (MO.isDef() && Register::isPhysicalRegister(Reg)) + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + ClobberedRegUnits.set(*Units); + + if (!MO.readsReg() || !Register::isVirtualRegister(Reg)) + continue; + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (!DefMI || DefMI->getParent() != Head) + continue; + if (InsertAfter.insert(DefMI).second) + LLVM_DEBUG(dbgs() << printMBBReference(*I->getParent()) << " depends on " + << *DefMI); + if (DefMI->isTerminator()) { + LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); + return false; } } return true; } +/// canPredicateInstrs - Returns true if all the instructions in MBB can safely +/// be predicates. The terminators are not considered. +/// +/// If instructions use any values that are defined in the head basic block, +/// the defining instructions are added to InsertAfter. +/// +/// Any clobbered regunits are added to ClobberedRegUnits. +/// +bool SSAIfConv::canPredicateInstrs(MachineBasicBlock *MBB) { + // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to + // get right. + if (!MBB->livein_empty()) { + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n"); + return false; + } + + unsigned InstrCount = 0; + + // Check all instructions, except the terminators. It is assumed that + // terminators never have side effects or define any used register values. + for (MachineBasicBlock::iterator I = MBB->begin(), + E = MBB->getFirstTerminator(); + I != E; ++I) { + if (I->isDebugInstr()) + continue; + + if (++InstrCount > BlockInstrLimit && !Stress) { + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than " + << BlockInstrLimit << " instructions.\n"); + return false; + } + + // There shouldn't normally be any phis in a single-predecessor block. + if (I->isPHI()) { + LLVM_DEBUG(dbgs() << "Can't predicate: " << *I); + return false; + } + + // Check that instruction is predicable and that it is not already + // predicated. + if (!TII->isPredicable(*I) || TII->isPredicated(*I)) { + return false; + } + + // Check for any dependencies on Head instructions. + if (!InstrDependenciesAllowIfConv(&(*I))) + return false; + } + return true; +} + +// Apply predicate to all instructions in the machine block. +void SSAIfConv::PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate) { + auto Condition = Cond; + if (ReversePredicate) + TII->reverseBranchCondition(Condition); + // Terminators don't need to be predicated as they will be removed. + for (MachineBasicBlock::iterator I = MBB->begin(), + E = MBB->getFirstTerminator(); + I != E; ++I) { + if (I->isDebugInstr()) + continue; + TII->PredicateInstruction(*I, Condition); + } +} /// Find an insertion point in Head for the speculated instructions. The /// insertion point must be: @@ -288,8 +379,8 @@ bool SSAIfConv::findInsertionPoint() { // We're ignoring regmask operands. That is conservatively correct. if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; // I clobbers Reg, so it isn't live before I. if (MO.isDef()) @@ -337,7 +428,7 @@ bool SSAIfConv::findInsertionPoint() { /// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is /// a potential candidate for if-conversion. Fill out the internal state. /// -bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { +bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) { Head = MBB; TBB = FBB = Tail = nullptr; @@ -378,8 +469,9 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { } // This is a triangle or a diamond. - // If Tail doesn't have any phis, there must be side effects. - if (Tail->empty() || !Tail->front().isPHI()) { + // Skip if we cannot predicate and there are no phis skip as there must be + // side effects that can only be handled with predication. + if (!Predicate && (Tail->empty() || !Tail->front().isPHI())) { LLVM_DEBUG(dbgs() << "No phis in tail.\n"); return false; } @@ -423,8 +515,8 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { if (PI.PHI->getOperand(i+1).getMBB() == FPred) PI.FReg = PI.PHI->getOperand(i).getReg(); } - assert(TargetRegisterInfo::isVirtualRegister(PI.TReg) && "Bad PHI"); - assert(TargetRegisterInfo::isVirtualRegister(PI.FReg) && "Bad PHI"); + assert(Register::isVirtualRegister(PI.TReg) && "Bad PHI"); + assert(Register::isVirtualRegister(PI.FReg) && "Bad PHI"); // Get target information. if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg, @@ -437,10 +529,17 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { // Check that the conditional instructions can be speculated. InsertAfter.clear(); ClobberedRegUnits.reset(); - if (TBB != Tail && !canSpeculateInstrs(TBB)) - return false; - if (FBB != Tail && !canSpeculateInstrs(FBB)) - return false; + if (Predicate) { + if (TBB != Tail && !canPredicateInstrs(TBB)) + return false; + if (FBB != Tail && !canPredicateInstrs(FBB)) + return false; + } else { + if (TBB != Tail && !canSpeculateInstrs(TBB)) + return false; + if (FBB != Tail && !canSpeculateInstrs(FBB)) + return false; + } // Try to find a valid insertion point for the speculated instructions in the // head basic block. @@ -467,7 +566,7 @@ void SSAIfConv::replacePHIInstrs() { for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { PHIInfo &PI = PHIs[i]; LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); - unsigned DstReg = PI.PHI->getOperand(0).getReg(); + Register DstReg = PI.PHI->getOperand(0).getReg(); TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); PI.PHI->eraseFromParent(); @@ -494,7 +593,7 @@ void SSAIfConv::rewritePHIOperands() { // equal. DstReg = PI.TReg; } else { - unsigned PHIDst = PI.PHI->getOperand(0).getReg(); + Register PHIDst = PI.PHI->getOperand(0).getReg(); DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst)); TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); @@ -521,7 +620,8 @@ void SSAIfConv::rewritePHIOperands() { /// /// Any basic blocks erased will be added to RemovedBlocks. /// -void SSAIfConv::convertIf(SmallVectorImpl &RemovedBlocks) { +void SSAIfConv::convertIf(SmallVectorImpl &RemovedBlocks, + bool Predicate) { assert(Head && Tail && TBB && FBB && "Call canConvertIf first."); // Update statistics. @@ -531,11 +631,16 @@ void SSAIfConv::convertIf(SmallVectorImpl &RemovedBlocks) { ++NumDiamondsConv; // Move all instructions into Head, except for the terminators. - if (TBB != Tail) + if (TBB != Tail) { + if (Predicate) + PredicateBlock(TBB, /*ReversePredicate=*/false); Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator()); - if (FBB != Tail) + } + if (FBB != Tail) { + if (Predicate) + PredicateBlock(FBB, /*ReversePredicate=*/true); Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator()); - + } // Are there extra Tail predecessors? bool ExtraPreds = Tail->pred_size() != 2; if (ExtraPreds) @@ -587,7 +692,6 @@ void SSAIfConv::convertIf(SmallVectorImpl &RemovedBlocks) { LLVM_DEBUG(dbgs() << *Head); } - //===----------------------------------------------------------------------===// // EarlyIfConverter Pass //===----------------------------------------------------------------------===// @@ -613,8 +717,6 @@ public: private: bool tryConvertIf(MachineBasicBlock*); - void updateDomTree(ArrayRef Removed); - void updateLoops(ArrayRef Removed); void invalidateTraces(); bool shouldConvertIf(); }; @@ -642,32 +744,36 @@ void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } +namespace { /// Update the dominator tree after if-conversion erased some blocks. -void EarlyIfConverter::updateDomTree(ArrayRef Removed) { +void updateDomTree(MachineDominatorTree *DomTree, const SSAIfConv &IfConv, + ArrayRef Removed) { // convertIf can remove TBB, FBB, and Tail can be merged into Head. // TBB and FBB should not dominate any blocks. // Tail children should be transferred to Head. MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head); - for (unsigned i = 0, e = Removed.size(); i != e; ++i) { - MachineDomTreeNode *Node = DomTree->getNode(Removed[i]); + for (auto B : Removed) { + MachineDomTreeNode *Node = DomTree->getNode(B); assert(Node != HeadNode && "Cannot erase the head node"); while (Node->getNumChildren()) { assert(Node->getBlock() == IfConv.Tail && "Unexpected children"); DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode); } - DomTree->eraseNode(Removed[i]); + DomTree->eraseNode(B); } } /// Update LoopInfo after if-conversion. -void EarlyIfConverter::updateLoops(ArrayRef Removed) { +void updateLoops(MachineLoopInfo *Loops, + ArrayRef Removed) { if (!Loops) return; // If-conversion doesn't change loop structure, and it doesn't mess with back // edges, so updating LoopInfo is simply removing the dead blocks. - for (unsigned i = 0, e = Removed.size(); i != e; ++i) - Loops->removeBlock(Removed[i]); + for (auto B : Removed) + Loops->removeBlock(B); } +} // namespace /// Invalidate MachineTraceMetrics before if-conversion. void EarlyIfConverter::invalidateTraces() { @@ -783,8 +889,8 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { SmallVector RemovedBlocks; IfConv.convertIf(RemovedBlocks); Changed = true; - updateDomTree(RemovedBlocks); - updateLoops(RemovedBlocks); + updateDomTree(DomTree, IfConv, RemovedBlocks); + updateLoops(Loops, RemovedBlocks); } return Changed; } @@ -822,3 +928,132 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { return Changed; } + +//===----------------------------------------------------------------------===// +// EarlyIfPredicator Pass +//===----------------------------------------------------------------------===// + +namespace { +class EarlyIfPredicator : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + TargetSchedModel SchedModel; + MachineRegisterInfo *MRI; + MachineDominatorTree *DomTree; + MachineLoopInfo *Loops; + SSAIfConv IfConv; + +public: + static char ID; + EarlyIfPredicator() : MachineFunctionPass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + StringRef getPassName() const override { return "Early If-predicator"; } + +protected: + bool tryConvertIf(MachineBasicBlock *); + bool shouldConvertIf(); +}; +} // end anonymous namespace + +#undef DEBUG_TYPE +#define DEBUG_TYPE "early-if-predicator" + +char EarlyIfPredicator::ID = 0; +char &llvm::EarlyIfPredicatorID = EarlyIfPredicator::ID; + +INITIALIZE_PASS_BEGIN(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", false, + false) + +void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// Apply the target heuristic to decide if the transformation is profitable. +bool EarlyIfPredicator::shouldConvertIf() { + if (IfConv.isTriangle()) { + MachineBasicBlock &IfBlock = + (IfConv.TBB == IfConv.Tail) ? *IfConv.FBB : *IfConv.TBB; + + unsigned ExtraPredCost = 0; + unsigned Cycles = 0; + for (MachineInstr &I : IfBlock) { + unsigned NumCycles = SchedModel.computeInstrLatency(&I, false); + if (NumCycles > 1) + Cycles += NumCycles - 1; + ExtraPredCost += TII->getPredicationCost(I); + } + + return TII->isProfitableToIfCvt(IfBlock, Cycles, ExtraPredCost, + BranchProbability::getUnknown()); + } + unsigned TExtra = 0; + unsigned FExtra = 0; + unsigned TCycle = 0; + unsigned FCycle = 0; + for (MachineInstr &I : *IfConv.TBB) { + unsigned NumCycles = SchedModel.computeInstrLatency(&I, false); + if (NumCycles > 1) + TCycle += NumCycles - 1; + TExtra += TII->getPredicationCost(I); + } + for (MachineInstr &I : *IfConv.FBB) { + unsigned NumCycles = SchedModel.computeInstrLatency(&I, false); + if (NumCycles > 1) + FCycle += NumCycles - 1; + FExtra += TII->getPredicationCost(I); + } + return TII->isProfitableToIfCvt(*IfConv.TBB, TCycle, TExtra, *IfConv.FBB, + FCycle, FExtra, + BranchProbability::getUnknown()); +} + +/// Attempt repeated if-conversion on MBB, return true if successful. +/// +bool EarlyIfPredicator::tryConvertIf(MachineBasicBlock *MBB) { + bool Changed = false; + while (IfConv.canConvertIf(MBB, /*Predicate*/ true) && shouldConvertIf()) { + // If-convert MBB and update analyses. + SmallVector RemovedBlocks; + IfConv.convertIf(RemovedBlocks, /*Predicate*/ true); + Changed = true; + updateDomTree(DomTree, IfConv, RemovedBlocks); + updateLoops(Loops, RemovedBlocks); + } + return Changed; +} + +bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "********** EARLY IF-PREDICATOR **********\n" + << "********** Function: " << MF.getName() << '\n'); + if (skipFunction(MF.getFunction())) + return false; + + const TargetSubtargetInfo &STI = MF.getSubtarget(); + TII = STI.getInstrInfo(); + TRI = STI.getRegisterInfo(); + MRI = &MF.getRegInfo(); + SchedModel.init(&STI); + DomTree = &getAnalysis(); + Loops = getAnalysisIfAvailable(); + + bool Changed = false; + IfConv.runOnMachineFunction(MF); + + // Visit blocks in dominator tree post-order. The post-order enables nested + // if-conversion in a single pass. The tryConvertIf() function may erase + // blocks, but only blocks dominated by the head block. This makes it safe to + // update the dominator tree while the post-order iterator is still active. + for (auto DomNode : post_order(DomTree)) + if (tryConvertIf(DomNode->getBlock())) + Changed = true; + + return Changed; +} diff --git a/lib/CodeGen/ExecutionDomainFix.cpp b/lib/CodeGen/ExecutionDomainFix.cpp index a2dd5eee33b..2cca05ea6f5 100644 --- a/lib/CodeGen/ExecutionDomainFix.cpp +++ b/lib/CodeGen/ExecutionDomainFix.cpp @@ -9,6 +9,7 @@ #include "llvm/CodeGen/ExecutionDomainFix.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/lib/CodeGen/ExpandMemCmp.cpp b/lib/CodeGen/ExpandMemCmp.cpp index b425482e6ad..9916f2de041 100644 --- a/lib/CodeGen/ExpandMemCmp.cpp +++ b/lib/CodeGen/ExpandMemCmp.cpp @@ -795,7 +795,7 @@ public: TPC->getTM().getSubtargetImpl(F)->getTargetLowering(); const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); + &getAnalysis().getTLI(F); const TargetTransformInfo *TTI = &getAnalysis().getTTI(F); auto PA = runImpl(F, TLI, TTI, TL); diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index 0ab70aff7dc..1fc57fac148 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -79,17 +79,17 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) && MI->getOperand(3).isImm() && "Invalid subreg_to_reg"); - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned InsReg = MI->getOperand(2).getReg(); + Register DstReg = MI->getOperand(0).getReg(); + Register InsReg = MI->getOperand(2).getReg(); assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?"); unsigned SubIdx = MI->getOperand(3).getImm(); assert(SubIdx != 0 && "Invalid index for insert_subreg"); - unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx); + Register DstSubReg = TRI->getSubReg(DstReg, SubIdx); - assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && + assert(Register::isPhysicalRegister(DstReg) && "Insert destination must be in a physical register"); - assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && + assert(Register::isPhysicalRegister(InsReg) && "Inserted value must be in a physical register"); LLVM_DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index 9c53550eaa9..c1d22ef8919 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -72,7 +72,7 @@ GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { return *I->second; GCStrategy *S = getGCStrategy(F.getGC()); - Functions.push_back(llvm::make_unique(F, *S)); + Functions.push_back(std::make_unique(F, *S)); GCFunctionInfo *GFI = Functions.back().get(); FInfoMap[&F] = GFI; return *GFI; diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp index 90571d090bf..0dc0a5bce74 100644 --- a/lib/CodeGen/GCRootLowering.cpp +++ b/lib/CodeGen/GCRootLowering.cpp @@ -249,7 +249,7 @@ GCMachineCodeAnalysis::GCMachineCodeAnalysis() : MachineFunctionPass(ID) {} void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); AU.setPreservesAll(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); } @@ -310,7 +310,7 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { return false; FI = &getAnalysis().getFunctionInfo(MF.getFunction()); - MMI = &getAnalysis(); + MMI = &getAnalysis().getMMI(); TII = MF.getSubtarget().getInstrInfo(); // Find the size of the stack frame. There may be no correct static frame diff --git a/lib/CodeGen/GlobalISel/CSEInfo.cpp b/lib/CodeGen/GlobalISel/CSEInfo.cpp index 4518dbee1a9..7d9d812d34b 100644 --- a/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -52,6 +52,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) { case TargetOpcode::G_ANYEXT: case TargetOpcode::G_UNMERGE_VALUES: case TargetOpcode::G_TRUNC: + case TargetOpcode::G_GEP: return true; } return false; @@ -65,9 +66,9 @@ std::unique_ptr llvm::getStandardCSEConfigForOpt(CodeGenOpt::Level Level) { std::unique_ptr Config; if (Level == CodeGenOpt::None) - Config = make_unique(); + Config = std::make_unique(); else - Config = make_unique(); + Config = std::make_unique(); return Config; } @@ -332,7 +333,7 @@ GISelInstProfileBuilder::addNodeIDFlag(unsigned Flag) const { const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand( const MachineOperand &MO) const { if (MO.isReg()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!MO.isDef()) addNodeIDRegNum(Reg); LLT Ty = MRI.getType(Reg); diff --git a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 461bc6038c2..51a74793f02 100644 --- a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -162,6 +162,17 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, return buildConstant(DstOps[0], Cst->getSExtValue()); break; } + case TargetOpcode::G_SEXT_INREG: { + assert(DstOps.size() == 1 && "Invalid dst ops"); + assert(SrcOps.size() == 2 && "Invalid src ops"); + const DstOp &Dst = DstOps[0]; + const SrcOp &Src0 = SrcOps[0]; + const SrcOp &Src1 = SrcOps[1]; + if (auto MaybeCst = + ConstantFoldExtOp(Opc, Src0.getReg(), Src1.getImm(), *getMRI())) + return buildConstant(Dst, MaybeCst->getSExtValue()); + break; + } } bool CanCopy = checkCopyToDefsPossible(DstOps); if (!canPerformCSEForOpc(Opc)) diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp index a5d8205a34a..cdad92f7db4 100644 --- a/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -11,14 +11,16 @@ /// //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #define DEBUG_TYPE "call-lowering" @@ -32,66 +34,70 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, ArrayRef> ArgRegs, Register SwiftErrorVReg, std::function GetCalleeReg) const { + CallLoweringInfo Info; auto &DL = CS.getParent()->getParent()->getParent()->getDataLayout(); // First step is to marshall all the function's parameters into the correct // physregs and memory locations. Gather the sequence of argument types that // we'll pass to the assigner function. - SmallVector OrigArgs; unsigned i = 0; unsigned NumFixedArgs = CS.getFunctionType()->getNumParams(); for (auto &Arg : CS.args()) { ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}, i < NumFixedArgs}; setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS); - // We don't currently support swiftself args. - if (OrigArg.Flags.isSwiftSelf()) - return false; - OrigArgs.push_back(OrigArg); + Info.OrigArgs.push_back(OrigArg); ++i; } - MachineOperand Callee = MachineOperand::CreateImm(0); if (const Function *F = CS.getCalledFunction()) - Callee = MachineOperand::CreateGA(F, 0); + Info.Callee = MachineOperand::CreateGA(F, 0); else - Callee = MachineOperand::CreateReg(GetCalleeReg(), false); + Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false); - ArgInfo OrigRet{ResRegs, CS.getType(), ISD::ArgFlagsTy{}}; - if (!OrigRet.Ty->isVoidTy()) - setArgFlags(OrigRet, AttributeList::ReturnIndex, DL, CS); + Info.OrigRet = ArgInfo{ResRegs, CS.getType(), ISD::ArgFlagsTy{}}; + if (!Info.OrigRet.Ty->isVoidTy()) + setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CS); - return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs, - SwiftErrorVReg); + Info.KnownCallees = + CS.getInstruction()->getMetadata(LLVMContext::MD_callees); + Info.CallConv = CS.getCallingConv(); + Info.SwiftErrorVReg = SwiftErrorVReg; + Info.IsMustTailCall = CS.isMustTailCall(); + Info.IsTailCall = CS.isTailCall() && + isInTailCallPosition(CS, MIRBuilder.getMF().getTarget()); + Info.IsVarArg = CS.getFunctionType()->isVarArg(); + return lowerCall(MIRBuilder, Info); } template void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const { + auto &Flags = Arg.Flags[0]; const AttributeList &Attrs = FuncInfo.getAttributes(); if (Attrs.hasAttribute(OpIdx, Attribute::ZExt)) - Arg.Flags.setZExt(); + Flags.setZExt(); if (Attrs.hasAttribute(OpIdx, Attribute::SExt)) - Arg.Flags.setSExt(); + Flags.setSExt(); if (Attrs.hasAttribute(OpIdx, Attribute::InReg)) - Arg.Flags.setInReg(); + Flags.setInReg(); if (Attrs.hasAttribute(OpIdx, Attribute::StructRet)) - Arg.Flags.setSRet(); + Flags.setSRet(); if (Attrs.hasAttribute(OpIdx, Attribute::SwiftSelf)) - Arg.Flags.setSwiftSelf(); + Flags.setSwiftSelf(); if (Attrs.hasAttribute(OpIdx, Attribute::SwiftError)) - Arg.Flags.setSwiftError(); + Flags.setSwiftError(); if (Attrs.hasAttribute(OpIdx, Attribute::ByVal)) - Arg.Flags.setByVal(); + Flags.setByVal(); if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca)) - Arg.Flags.setInAlloca(); + Flags.setInAlloca(); - if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) { + if (Flags.isByVal() || Flags.isInAlloca()) { Type *ElementTy = cast(Arg.Ty)->getElementType(); auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType(); - Arg.Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy)); + Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. @@ -100,11 +106,11 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, FrameAlign = FuncInfo.getParamAlignment(OpIdx - 2); else FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL); - Arg.Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Attrs.hasAttribute(OpIdx, Attribute::Nest)) - Arg.Flags.setNest(); - Arg.Flags.setOrigAlign(DL.getABITypeAlignment(Arg.Ty)); + Flags.setNest(); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty))); } template void @@ -159,7 +165,7 @@ void CallLowering::unpackRegs(ArrayRef DstRegs, Register SrcReg, } bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, - ArrayRef Args, + SmallVectorImpl &Args, ValueHandler &Handler) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); @@ -171,7 +177,7 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, bool CallLowering::handleAssignments(CCState &CCInfo, SmallVectorImpl &ArgLocs, MachineIRBuilder &MIRBuilder, - ArrayRef Args, + SmallVectorImpl &Args, ValueHandler &Handler) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); @@ -180,14 +186,99 @@ bool CallLowering::handleAssignments(CCState &CCInfo, unsigned NumArgs = Args.size(); for (unsigned i = 0; i != NumArgs; ++i) { MVT CurVT = MVT::getVT(Args[i].Ty); - if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) { - // Try to use the register type if we couldn't assign the VT. - if (!Handler.isArgumentHandler() || !CurVT.isValid()) + if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], + Args[i].Flags[0], CCInfo)) { + if (!CurVT.isValid()) return false; - CurVT = TLI->getRegisterTypeForCallingConv( + MVT NewVT = TLI->getRegisterTypeForCallingConv( F.getContext(), F.getCallingConv(), EVT(CurVT)); - if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) - return false; + + // If we need to split the type over multiple regs, check it's a scenario + // we currently support. + unsigned NumParts = TLI->getNumRegistersForCallingConv( + F.getContext(), F.getCallingConv(), CurVT); + if (NumParts > 1) { + // For now only handle exact splits. + if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits()) + return false; + } + + // For incoming arguments (physregs to vregs), we could have values in + // physregs (or memlocs) which we want to extract and copy to vregs. + // During this, we might have to deal with the LLT being split across + // multiple regs, so we have to record this information for later. + // + // If we have outgoing args, then we have the opposite case. We have a + // vreg with an LLT which we want to assign to a physical location, and + // we might have to record that the value has to be split later. + if (Handler.isIncomingArgumentHandler()) { + if (NumParts == 1) { + // Try to use the register type if we couldn't assign the VT. + if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], + Args[i].Flags[0], CCInfo)) + return false; + } else { + // We're handling an incoming arg which is split over multiple regs. + // E.g. passing an s128 on AArch64. + ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; + Args[i].OrigRegs.push_back(Args[i].Regs[0]); + Args[i].Regs.clear(); + Args[i].Flags.clear(); + LLT NewLLT = getLLTForMVT(NewVT); + // For each split register, create and assign a vreg that will store + // the incoming component of the larger value. These will later be + // merged to form the final vreg. + for (unsigned Part = 0; Part < NumParts; ++Part) { + Register Reg = + MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT); + ISD::ArgFlagsTy Flags = OrigFlags; + if (Part == 0) { + Flags.setSplit(); + } else { + Flags.setOrigAlign(Align::None()); + if (Part == NumParts - 1) + Flags.setSplitEnd(); + } + Args[i].Regs.push_back(Reg); + Args[i].Flags.push_back(Flags); + if (Handler.assignArg(i + Part, NewVT, NewVT, CCValAssign::Full, + Args[i], Args[i].Flags[Part], CCInfo)) { + // Still couldn't assign this smaller part type for some reason. + return false; + } + } + } + } else { + // Handling an outgoing arg that might need to be split. + if (NumParts < 2) + return false; // Don't know how to deal with this type combination. + + // This type is passed via multiple registers in the calling convention. + // We need to extract the individual parts. + Register LargeReg = Args[i].Regs[0]; + LLT SmallTy = LLT::scalar(NewVT.getSizeInBits()); + auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg); + assert(Unmerge->getNumOperands() == NumParts + 1); + ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; + // We're going to replace the regs and flags with the split ones. + Args[i].Regs.clear(); + Args[i].Flags.clear(); + for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) { + ISD::ArgFlagsTy Flags = OrigFlags; + if (PartIdx == 0) { + Flags.setSplit(); + } else { + Flags.setOrigAlign(Align::None()); + if (PartIdx == NumParts - 1) + Flags.setSplitEnd(); + } + Args[i].Regs.push_back(Unmerge.getReg(PartIdx)); + Args[i].Flags.push_back(Flags); + if (Handler.assignArg(i + PartIdx, NewVT, NewVT, CCValAssign::Full, + Args[i], Args[i].Flags[PartIdx], CCInfo)) + return false; + } + } } } @@ -202,18 +293,32 @@ bool CallLowering::handleAssignments(CCState &CCInfo, continue; } - assert(Args[i].Regs.size() == 1 && - "Can't handle multiple virtual regs yet"); - // FIXME: Pack registers if we have more than one. Register ArgReg = Args[i].Regs[0]; + MVT OrigVT = MVT::getVT(Args[i].Ty); + MVT VAVT = VA.getValVT(); if (VA.isRegLoc()) { - MVT OrigVT = MVT::getVT(Args[i].Ty); - MVT VAVT = VA.getValVT(); - if (Handler.isArgumentHandler() && VAVT != OrigVT) { - if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) - return false; // Can't handle this type of arg yet. + if (Handler.isIncomingArgumentHandler() && VAVT != OrigVT) { + if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) { + // Expected to be multiple regs for a single incoming arg. + unsigned NumArgRegs = Args[i].Regs.size(); + if (NumArgRegs < 2) + return false; + + assert((j + (NumArgRegs - 1)) < ArgLocs.size() && + "Too many regs for number of args"); + for (unsigned Part = 0; Part < NumArgRegs; ++Part) { + // There should be Regs.size() ArgLocs per argument. + VA = ArgLocs[j + Part]; + Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA); + } + j += NumArgRegs - 1; + // Merge the split registers into the expected larger result vreg + // of the original call. + MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs); + continue; + } const LLT VATy(VAVT); Register NewReg = MIRBuilder.getMRI()->createGenericVirtualRegister(VATy); @@ -234,10 +339,28 @@ bool CallLowering::handleAssignments(CCState &CCInfo, } else { MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0); } + } else if (!Handler.isIncomingArgumentHandler()) { + assert((j + (Args[i].Regs.size() - 1)) < ArgLocs.size() && + "Too many regs for number of args"); + // This is an outgoing argument that might have been split. + for (unsigned Part = 0; Part < Args[i].Regs.size(); ++Part) { + // There should be Regs.size() ArgLocs per argument. + VA = ArgLocs[j + Part]; + Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA); + } + j += Args[i].Regs.size() - 1; } else { Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA); } } else if (VA.isMemLoc()) { + // Don't currently support loading/storing a type that needs to be split + // to the stack. Should be easy, just not implemented yet. + if (Args[i].Regs.size() > 1) { + LLVM_DEBUG( + dbgs() + << "Load/store a split arg to/from the stack not implemented yet"); + return false; + } MVT VT = MVT::getVT(Args[i].Ty); unsigned Size = VT == MVT::iPTR ? DL.getPointerSize() : alignTo(VT.getSizeInBits(), 8) / 8; @@ -253,6 +376,81 @@ bool CallLowering::handleAssignments(CCState &CCInfo, return true; } +bool CallLowering::analyzeArgInfo(CCState &CCState, + SmallVectorImpl &Args, + CCAssignFn &AssignFnFixed, + CCAssignFn &AssignFnVarArg) const { + for (unsigned i = 0, e = Args.size(); i < e; ++i) { + MVT VT = MVT::getVT(Args[i].Ty); + CCAssignFn &Fn = Args[i].IsFixed ? AssignFnFixed : AssignFnVarArg; + if (Fn(i, VT, VT, CCValAssign::Full, Args[i].Flags[0], CCState)) { + // Bail out on anything we can't handle. + LLVM_DEBUG(dbgs() << "Cannot analyze " << EVT(VT).getEVTString() + << " (arg number = " << i << "\n"); + return false; + } + } + return true; +} + +bool CallLowering::resultsCompatible(CallLoweringInfo &Info, + MachineFunction &MF, + SmallVectorImpl &InArgs, + CCAssignFn &CalleeAssignFnFixed, + CCAssignFn &CalleeAssignFnVarArg, + CCAssignFn &CallerAssignFnFixed, + CCAssignFn &CallerAssignFnVarArg) const { + const Function &F = MF.getFunction(); + CallingConv::ID CalleeCC = Info.CallConv; + CallingConv::ID CallerCC = F.getCallingConv(); + + if (CallerCC == CalleeCC) + return true; + + SmallVector ArgLocs1; + CCState CCInfo1(CalleeCC, false, MF, ArgLocs1, F.getContext()); + if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFnFixed, + CalleeAssignFnVarArg)) + return false; + + SmallVector ArgLocs2; + CCState CCInfo2(CallerCC, false, MF, ArgLocs2, F.getContext()); + if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFnFixed, + CalleeAssignFnVarArg)) + return false; + + // We need the argument locations to match up exactly. If there's more in + // one than the other, then we are done. + if (ArgLocs1.size() != ArgLocs2.size()) + return false; + + // Make sure that each location is passed in exactly the same way. + for (unsigned i = 0, e = ArgLocs1.size(); i < e; ++i) { + const CCValAssign &Loc1 = ArgLocs1[i]; + const CCValAssign &Loc2 = ArgLocs2[i]; + + // We need both of them to be the same. So if one is a register and one + // isn't, we're done. + if (Loc1.isRegLoc() != Loc2.isRegLoc()) + return false; + + if (Loc1.isRegLoc()) { + // If they don't have the same register location, we're done. + if (Loc1.getLocReg() != Loc2.getLocReg()) + return false; + + // They matched, so we can move to the next ArgLoc. + continue; + } + + // Loc1 wasn't a RegLoc, so they both must be MemLocs. Check if they match. + if (Loc1.getLocMemOffset() != Loc2.getLocMemOffset()) + return false; + } + + return true; +} + Register CallLowering::ValueHandler::extendRegister(Register ValReg, CCValAssign &VA) { LLT LocTy{VA.getLocVT()}; diff --git a/lib/CodeGen/GlobalISel/Combiner.cpp b/lib/CodeGen/GlobalISel/Combiner.cpp index 31cb1dbbc9b..b4562a5c660 100644 --- a/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/lib/CodeGen/GlobalISel/Combiner.cpp @@ -27,6 +27,18 @@ using namespace llvm; +namespace llvm { +cl::OptionCategory GICombinerOptionCategory( + "GlobalISel Combiner", + "Control the rules which are enabled. These options all take a comma " + "separated list of rules to disable and may be specified by number " + "or number range (e.g. 1-10)." +#ifndef NDEBUG + " They may also be specified by name." +#endif +); +} // end namespace llvm + namespace { /// This class acts as the glue the joins the CombinerHelper to the overall /// Combine algorithm. The CombinerHelper is intended to report the @@ -92,7 +104,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF, return false; Builder = - CSEInfo ? make_unique() : make_unique(); + CSEInfo ? std::make_unique() : std::make_unique(); MRI = &MF.getRegInfo(); Builder->setMF(MF); if (CSEInfo) diff --git a/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 9cbf3dd83ff..854769d283f 100644 --- a/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -8,19 +8,36 @@ #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "gi-combiner" using namespace llvm; +// Option to allow testing of the combiner while no targets know about indexed +// addressing. +static cl::opt + ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), + cl::desc("Force all indexed operations to be " + "legal for the GlobalISel combiner")); + + CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, - MachineIRBuilder &B) - : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {} + MachineIRBuilder &B, GISelKnownBits *KB, + MachineDominatorTree *MDT) + : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), + KB(KB), MDT(MDT) { + (void)this->KB; +} void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const { @@ -55,8 +72,8 @@ bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { if (MI.getOpcode() != TargetOpcode::COPY) return false; - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(DstReg); LLT SrcTy = MRI.getType(SrcReg); // Simple Copy Propagation. @@ -66,12 +83,183 @@ bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { return false; } void CombinerHelper::applyCombineCopy(MachineInstr &MI) { - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); MI.eraseFromParent(); replaceRegWith(MRI, DstReg, SrcReg); } +bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) { + bool IsUndef = false; + SmallVector Ops; + if (matchCombineConcatVectors(MI, IsUndef, Ops)) { + applyCombineConcatVectors(MI, IsUndef, Ops); + return true; + } + return false; +} + +bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, + SmallVectorImpl &Ops) { + assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && + "Invalid instruction"); + IsUndef = true; + MachineInstr *Undef = nullptr; + + // Walk over all the operands of concat vectors and check if they are + // build_vector themselves or undef. + // Then collect their operands in Ops. + for (const MachineOperand &MO : MI.operands()) { + // Skip the instruction definition. + if (MO.isDef()) + continue; + Register Reg = MO.getReg(); + MachineInstr *Def = MRI.getVRegDef(Reg); + assert(Def && "Operand not defined"); + switch (Def->getOpcode()) { + case TargetOpcode::G_BUILD_VECTOR: + IsUndef = false; + // Remember the operands of the build_vector to fold + // them into the yet-to-build flattened concat vectors. + for (const MachineOperand &BuildVecMO : Def->operands()) { + // Skip the definition. + if (BuildVecMO.isDef()) + continue; + Ops.push_back(BuildVecMO.getReg()); + } + break; + case TargetOpcode::G_IMPLICIT_DEF: { + LLT OpType = MRI.getType(Reg); + // Keep one undef value for all the undef operands. + if (!Undef) { + Builder.setInsertPt(*MI.getParent(), MI); + Undef = Builder.buildUndef(OpType.getScalarType()); + } + assert(MRI.getType(Undef->getOperand(0).getReg()) == + OpType.getScalarType() && + "All undefs should have the same type"); + // Break the undef vector in as many scalar elements as needed + // for the flattening. + for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements(); + EltIdx != EltEnd; ++EltIdx) + Ops.push_back(Undef->getOperand(0).getReg()); + break; + } + default: + return false; + } + } + return true; +} +void CombinerHelper::applyCombineConcatVectors( + MachineInstr &MI, bool IsUndef, const ArrayRef Ops) { + // We determined that the concat_vectors can be flatten. + // Generate the flattened build_vector. + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInsertPt(*MI.getParent(), MI); + Register NewDstReg = MRI.cloneVirtualRegister(DstReg); + + // Note: IsUndef is sort of redundant. We could have determine it by + // checking that at all Ops are undef. Alternatively, we could have + // generate a build_vector of undefs and rely on another combine to + // clean that up. For now, given we already gather this information + // in tryCombineConcatVectors, just save compile time and issue the + // right thing. + if (IsUndef) + Builder.buildUndef(NewDstReg); + else + Builder.buildBuildVector(NewDstReg, Ops); + MI.eraseFromParent(); + replaceRegWith(MRI, DstReg, NewDstReg); +} + +bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) { + SmallVector Ops; + if (matchCombineShuffleVector(MI, Ops)) { + applyCombineShuffleVector(MI, Ops); + return true; + } + return false; +} + +bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, + SmallVectorImpl &Ops) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && + "Invalid instruction kind"); + LLT DstType = MRI.getType(MI.getOperand(0).getReg()); + Register Src1 = MI.getOperand(1).getReg(); + LLT SrcType = MRI.getType(Src1); + unsigned DstNumElts = DstType.getNumElements(); + unsigned SrcNumElts = SrcType.getNumElements(); + + // If the resulting vector is smaller than the size of the source + // vectors being concatenated, we won't be able to replace the + // shuffle vector into a concat_vectors. + // + // Note: We may still be able to produce a concat_vectors fed by + // extract_vector_elt and so on. It is less clear that would + // be better though, so don't bother for now. + if (DstNumElts < 2 * SrcNumElts) + return false; + + // Check that the shuffle mask can be broken evenly between the + // different sources. + if (DstNumElts % SrcNumElts != 0) + return false; + + // Mask length is a multiple of the source vector length. + // Check if the shuffle is some kind of concatenation of the input + // vectors. + unsigned NumConcat = DstNumElts / SrcNumElts; + SmallVector ConcatSrcs(NumConcat, -1); + SmallVector Mask; + ShuffleVectorInst::getShuffleMask(MI.getOperand(3).getShuffleMask(), Mask); + for (unsigned i = 0; i != DstNumElts; ++i) { + int Idx = Mask[i]; + // Undef value. + if (Idx < 0) + continue; + // Ensure the indices in each SrcType sized piece are sequential and that + // the same source is used for the whole piece. + if ((Idx % SrcNumElts != (i % SrcNumElts)) || + (ConcatSrcs[i / SrcNumElts] >= 0 && + ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) + return false; + // Remember which source this index came from. + ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; + } + + // The shuffle is concatenating multiple vectors together. + // Collect the different operands for that. + Register UndefReg; + Register Src2 = MI.getOperand(2).getReg(); + for (auto Src : ConcatSrcs) { + if (Src < 0) { + if (!UndefReg) { + Builder.setInsertPt(*MI.getParent(), MI); + UndefReg = Builder.buildUndef(SrcType).getReg(0); + } + Ops.push_back(UndefReg); + } else if (Src == 0) + Ops.push_back(Src1); + else + Ops.push_back(Src2); + } + return true; +} + +void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI, + const ArrayRef Ops) { + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInsertPt(*MI.getParent(), MI); + Register NewDstReg = MRI.cloneVirtualRegister(DstReg); + + Builder.buildConcatVectors(NewDstReg, Ops); + + MI.eraseFromParent(); + replaceRegWith(MRI, DstReg, NewDstReg); +} + namespace { /// Select a preference between two uses. CurrentUse is the current preference @@ -279,7 +467,7 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, // up the type and extend so that it uses the preferred use. if (UseMI->getOpcode() == Preferred.ExtendOpcode || UseMI->getOpcode() == TargetOpcode::G_ANYEXT) { - unsigned UseDstReg = UseMI->getOperand(0).getReg(); + Register UseDstReg = UseMI->getOperand(0).getReg(); MachineOperand &UseSrcMO = UseMI->getOperand(1); const LLT &UseDstTy = MRI.getType(UseDstReg); if (UseDstReg != ChosenDstReg) { @@ -342,8 +530,212 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, Observer.changedInstr(MI); } -bool CombinerHelper::matchCombineBr(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR"); +bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) { + assert(DefMI.getParent() == UseMI.getParent()); + if (&DefMI == &UseMI) + return false; + + // Loop through the basic block until we find one of the instructions. + MachineBasicBlock::const_iterator I = DefMI.getParent()->begin(); + for (; &*I != &DefMI && &*I != &UseMI; ++I) + return &*I == &DefMI; + + llvm_unreachable("Block must contain instructions"); +} + +bool CombinerHelper::dominates(MachineInstr &DefMI, MachineInstr &UseMI) { + if (MDT) + return MDT->dominates(&DefMI, &UseMI); + else if (DefMI.getParent() != UseMI.getParent()) + return false; + + return isPredecessor(DefMI, UseMI); +} + +bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, + Register &Base, Register &Offset) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + +#ifndef NDEBUG + unsigned Opcode = MI.getOpcode(); + assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || + Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); +#endif + + Base = MI.getOperand(1).getReg(); + MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base); + if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) + return false; + + LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI); + + for (auto &Use : MRI.use_instructions(Base)) { + if (Use.getOpcode() != TargetOpcode::G_GEP) + continue; + + Offset = Use.getOperand(2).getReg(); + if (!ForceLegalIndexing && + !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) { + LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: " + << Use); + continue; + } + + // Make sure the offset calculation is before the potentially indexed op. + // FIXME: we really care about dependency here. The offset calculation might + // be movable. + MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset); + if (!OffsetDef || !dominates(*OffsetDef, MI)) { + LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: " + << Use); + continue; + } + + // FIXME: check whether all uses of Base are load/store with foldable + // addressing modes. If so, using the normal addr-modes is better than + // forming an indexed one. + + bool MemOpDominatesAddrUses = true; + for (auto &GEPUse : MRI.use_instructions(Use.getOperand(0).getReg())) { + if (!dominates(MI, GEPUse)) { + MemOpDominatesAddrUses = false; + break; + } + } + + if (!MemOpDominatesAddrUses) { + LLVM_DEBUG( + dbgs() << " Ignoring candidate as memop does not dominate uses: " + << Use); + continue; + } + + LLVM_DEBUG(dbgs() << " Found match: " << Use); + Addr = Use.getOperand(0).getReg(); + return true; + } + + return false; +} + +bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr, + Register &Base, Register &Offset) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + +#ifndef NDEBUG + unsigned Opcode = MI.getOpcode(); + assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || + Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); +#endif + + Addr = MI.getOperand(1).getReg(); + MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_GEP, Addr, MRI); + if (!AddrDef || MRI.hasOneUse(Addr)) + return false; + + Base = AddrDef->getOperand(1).getReg(); + Offset = AddrDef->getOperand(2).getReg(); + + LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI); + + if (!ForceLegalIndexing && + !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) { + LLVM_DEBUG(dbgs() << " Skipping, not legal for target"); + return false; + } + + MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI); + if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { + LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway."); + return false; + } + + if (MI.getOpcode() == TargetOpcode::G_STORE) { + // Would require a copy. + if (Base == MI.getOperand(0).getReg()) { + LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway."); + return false; + } + + // We're expecting one use of Addr in MI, but it could also be the + // value stored, which isn't actually dominated by the instruction. + if (MI.getOperand(0).getReg() == Addr) { + LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses"); + return false; + } + } + + // FIXME: check whether all uses of the base pointer are constant GEPs. That + // might allow us to end base's liveness here by adjusting the constant. + + for (auto &UseMI : MRI.use_instructions(Addr)) { + if (!dominates(MI, UseMI)) { + LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses."); + return false; + } + } + + return true; +} + +bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) { + unsigned Opcode = MI.getOpcode(); + if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD && + Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE) + return false; + + bool IsStore = Opcode == TargetOpcode::G_STORE; + Register Addr, Base, Offset; + bool IsPre = findPreIndexCandidate(MI, Addr, Base, Offset); + if (!IsPre && !findPostIndexCandidate(MI, Addr, Base, Offset)) + return false; + + + unsigned NewOpcode; + switch (Opcode) { + case TargetOpcode::G_LOAD: + NewOpcode = TargetOpcode::G_INDEXED_LOAD; + break; + case TargetOpcode::G_SEXTLOAD: + NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD; + break; + case TargetOpcode::G_ZEXTLOAD: + NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD; + break; + case TargetOpcode::G_STORE: + NewOpcode = TargetOpcode::G_INDEXED_STORE; + break; + default: + llvm_unreachable("Unknown load/store opcode"); + } + + MachineInstr &AddrDef = *MRI.getUniqueVRegDef(Addr); + MachineIRBuilder MIRBuilder(MI); + auto MIB = MIRBuilder.buildInstr(NewOpcode); + if (IsStore) { + MIB.addDef(Addr); + MIB.addUse(MI.getOperand(0).getReg()); + } else { + MIB.addDef(MI.getOperand(0).getReg()); + MIB.addDef(Addr); + } + + MIB.addUse(Base); + MIB.addUse(Offset); + MIB.addImm(IsPre); + MI.eraseFromParent(); + AddrDef.eraseFromParent(); + + LLVM_DEBUG(dbgs() << " Combinined to indexed operation"); + return true; +} + +bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) { + if (MI.getOpcode() != TargetOpcode::G_BR) + return false; + // Try to match the following: // bb1: // %c(s32) = G_ICMP pred, %a, %b @@ -380,9 +772,14 @@ bool CombinerHelper::matchCombineBr(MachineInstr &MI) { return true; } -bool CombinerHelper::tryCombineBr(MachineInstr &MI) { - if (!matchCombineBr(MI)) +bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) { + if (!matchElideBrByInvertingCond(MI)) return false; + applyElideBrByInvertingCond(MI); + return true; +} + +void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) { MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB(); MachineBasicBlock::iterator BrIt(MI); MachineInstr *BrCond = &*std::prev(BrIt); @@ -401,11 +798,509 @@ bool CombinerHelper::tryCombineBr(MachineInstr &MI) { BrCond->getOperand(1).setMBB(BrTarget); Observer.changedInstr(*BrCond); MI.eraseFromParent(); +} + +static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { + // On Darwin, -Os means optimize for size without hurting performance, so + // only really optimize for size when -Oz (MinSize) is used. + if (MF.getTarget().getTargetTriple().isOSDarwin()) + return MF.getFunction().hasMinSize(); + return MF.getFunction().hasOptSize(); +} + +// Returns a list of types to use for memory op lowering in MemOps. A partial +// port of findOptimalMemOpLowering in TargetLowering. +static bool findGISelOptimalMemOpLowering( + std::vector &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, + unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, + bool AllowOverlap, unsigned DstAS, unsigned SrcAS, + const AttributeList &FuncAttributes, const TargetLowering &TLI) { + // If 'SrcAlign' is zero, that means the memory operation does not need to + // load the value, i.e. memset or memcpy from constant string. Otherwise, + // it's the inferred alignment of the source. 'DstAlign', on the other hand, + // is the specified alignment of the memory operation. If it is zero, that + // means it's possible to change the alignment of the destination. + // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does + // not need to be loaded. + if (SrcAlign != 0 && SrcAlign < DstAlign) + return false; + + LLT Ty = TLI.getOptimalMemOpLLT(Size, DstAlign, SrcAlign, IsMemset, + ZeroMemset, MemcpyStrSrc, FuncAttributes); + + if (Ty == LLT()) { + // Use the largest scalar type whose alignment constraints are satisfied. + // We only need to check DstAlign here as SrcAlign is always greater or + // equal to DstAlign (or zero). + Ty = LLT::scalar(64); + while (DstAlign && DstAlign < Ty.getSizeInBytes() && + !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, DstAlign)) + Ty = LLT::scalar(Ty.getSizeInBytes()); + assert(Ty.getSizeInBits() > 0 && "Could not find valid type"); + // FIXME: check for the largest legal type we can load/store to. + } + + unsigned NumMemOps = 0; + while (Size != 0) { + unsigned TySize = Ty.getSizeInBytes(); + while (TySize > Size) { + // For now, only use non-vector load / store's for the left-over pieces. + LLT NewTy = Ty; + // FIXME: check for mem op safety and legality of the types. Not all of + // SDAGisms map cleanly to GISel concepts. + if (NewTy.isVector()) + NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32); + NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1)); + unsigned NewTySize = NewTy.getSizeInBytes(); + assert(NewTySize > 0 && "Could not find appropriate type"); + + // If the new LLT cannot cover all of the remaining bits, then consider + // issuing a (or a pair of) unaligned and overlapping load / store. + bool Fast; + // Need to get a VT equivalent for allowMisalignedMemoryAccesses(). + MVT VT = getMVTForLLT(Ty); + if (NumMemOps && AllowOverlap && NewTySize < Size && + TLI.allowsMisalignedMemoryAccesses( + VT, DstAS, DstAlign, MachineMemOperand::MONone, &Fast) && + Fast) + TySize = Size; + else { + Ty = NewTy; + TySize = NewTySize; + } + } + + if (++NumMemOps > Limit) + return false; + + MemOps.push_back(Ty); + Size -= TySize; + } + return true; } +static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { + if (Ty.isVector()) + return VectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), + Ty.getNumElements()); + return IntegerType::get(C, Ty.getSizeInBits()); +} + +// Get a vectorized representation of the memset value operand, GISel edition. +static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { + MachineRegisterInfo &MRI = *MIB.getMRI(); + unsigned NumBits = Ty.getScalarSizeInBits(); + auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); + if (!Ty.isVector() && ValVRegAndVal) { + unsigned KnownVal = ValVRegAndVal->Value; + APInt Scalar = APInt(8, KnownVal); + APInt SplatVal = APInt::getSplat(NumBits, Scalar); + return MIB.buildConstant(Ty, SplatVal).getReg(0); + } + // FIXME: for vector types create a G_BUILD_VECTOR. + if (Ty.isVector()) + return Register(); + + // Extend the byte value to the larger type, and then multiply by a magic + // value 0x010101... in order to replicate it across every byte. + LLT ExtType = Ty.getScalarType(); + auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val); + if (NumBits > 8) { + APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); + auto MagicMI = MIB.buildConstant(ExtType, Magic); + Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0); + } + + assert(ExtType == Ty && "Vector memset value type not supported yet"); + return Val; +} + +bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val, + unsigned KnownLen, unsigned Align, + bool IsVolatile) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + auto &DL = MF.getDataLayout(); + LLVMContext &C = MF.getFunction().getContext(); + + assert(KnownLen != 0 && "Have a zero length memset length!"); + + bool DstAlignCanChange = false; + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + + MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); + if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) + DstAlignCanChange = true; + + unsigned Limit = TLI.getMaxStoresPerMemset(OptSize); + std::vector MemOps; + + const auto &DstMMO = **MI.memoperands_begin(); + MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); + + auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); + bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0; + + if (!findGISelOptimalMemOpLowering( + MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), 0, + /*IsMemset=*/true, + /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false, + /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), ~0u, + MF.getFunction().getAttributes(), TLI)) + return false; + + if (DstAlignCanChange) { + // Get an estimate of the type from the LLT. + Type *IRTy = getTypeForLLT(MemOps[0], C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy); + if (NewAlign > Align) { + Align = NewAlign; + unsigned FI = FIDef->getOperand(1).getIndex(); + // Give the stack frame object a larger alignment if needed. + if (MFI.getObjectAlignment(FI) < Align) + MFI.setObjectAlignment(FI, Align); + } + } + + MachineIRBuilder MIB(MI); + // Find the largest store and generate the bit pattern for it. + LLT LargestTy = MemOps[0]; + for (unsigned i = 1; i < MemOps.size(); i++) + if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits()) + LargestTy = MemOps[i]; + + // The memset stored value is always defined as an s8, so in order to make it + // work with larger store types we need to repeat the bit pattern across the + // wider type. + Register MemSetValue = getMemsetValue(Val, LargestTy, MIB); + + if (!MemSetValue) + return false; + + // Generate the stores. For each store type in the list, we generate the + // matching store of that type to the destination address. + LLT PtrTy = MRI.getType(Dst); + unsigned DstOff = 0; + unsigned Size = KnownLen; + for (unsigned I = 0; I < MemOps.size(); I++) { + LLT Ty = MemOps[I]; + unsigned TySize = Ty.getSizeInBytes(); + if (TySize > Size) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + assert(I == MemOps.size() - 1 && I != 0); + DstOff -= TySize - Size; + } + + // If this store is smaller than the largest store see whether we can get + // the smaller value for free with a truncate. + Register Value = MemSetValue; + if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) { + MVT VT = getMVTForLLT(Ty); + MVT LargestVT = getMVTForLLT(LargestTy); + if (!LargestTy.isVector() && !Ty.isVector() && + TLI.isTruncateFree(LargestVT, VT)) + Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0); + else + Value = getMemsetValue(Val, Ty, MIB); + if (!Value) + return false; + } + + auto *StoreMMO = + MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes()); + + Register Ptr = Dst; + if (DstOff != 0) { + auto Offset = + MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff); + Ptr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0); + } + + MIB.buildStore(Value, Ptr, *StoreMMO); + DstOff += Ty.getSizeInBytes(); + Size -= TySize; + } + + MI.eraseFromParent(); + return true; +} + + +bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, + Register Src, unsigned KnownLen, + unsigned DstAlign, unsigned SrcAlign, + bool IsVolatile) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + auto &DL = MF.getDataLayout(); + LLVMContext &C = MF.getFunction().getContext(); + + assert(KnownLen != 0 && "Have a zero length memcpy length!"); + + bool DstAlignCanChange = false; + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + unsigned Alignment = MinAlign(DstAlign, SrcAlign); + + MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); + if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) + DstAlignCanChange = true; + + // FIXME: infer better src pointer alignment like SelectionDAG does here. + // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining + // if the memcpy is in a tail call position. + + unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize); + std::vector MemOps; + + const auto &DstMMO = **MI.memoperands_begin(); + const auto &SrcMMO = **std::next(MI.memoperands_begin()); + MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); + MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); + + if (!findGISelOptimalMemOpLowering( + MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment), + SrcAlign, + /*IsMemset=*/false, + /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, + /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), + SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI)) + return false; + + if (DstAlignCanChange) { + // Get an estimate of the type from the LLT. + Type *IRTy = getTypeForLLT(MemOps[0], C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy); + + // Don't promote to an alignment that would require dynamic stack + // realignment. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TRI->needsStackRealignment(MF)) + while (NewAlign > Alignment && + DL.exceedsNaturalStackAlignment(Align(NewAlign))) + NewAlign /= 2; + + if (NewAlign > Alignment) { + Alignment = NewAlign; + unsigned FI = FIDef->getOperand(1).getIndex(); + // Give the stack frame object a larger alignment if needed. + if (MFI.getObjectAlignment(FI) < Alignment) + MFI.setObjectAlignment(FI, Alignment); + } + } + + LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n"); + + MachineIRBuilder MIB(MI); + // Now we need to emit a pair of load and stores for each of the types we've + // collected. I.e. for each type, generate a load from the source pointer of + // that type width, and then generate a corresponding store to the dest buffer + // of that value loaded. This can result in a sequence of loads and stores + // mixed types, depending on what the target specifies as good types to use. + unsigned CurrOffset = 0; + LLT PtrTy = MRI.getType(Src); + unsigned Size = KnownLen; + for (auto CopyTy : MemOps) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + if (CopyTy.getSizeInBytes() > Size) + CurrOffset -= CopyTy.getSizeInBytes() - Size; + + // Construct MMOs for the accesses. + auto *LoadMMO = + MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); + auto *StoreMMO = + MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); + + // Create the load. + Register LoadPtr = Src; + Register Offset; + if (CurrOffset != 0) { + Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) + .getReg(0); + LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0); + } + auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); + + // Create the store. + Register StorePtr = + CurrOffset == 0 ? Dst : MIB.buildGEP(PtrTy, Dst, Offset).getReg(0); + MIB.buildStore(LdVal, StorePtr, *StoreMMO); + CurrOffset += CopyTy.getSizeInBytes(); + Size -= CopyTy.getSizeInBytes(); + } + + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, + Register Src, unsigned KnownLen, + unsigned DstAlign, unsigned SrcAlign, + bool IsVolatile) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + auto &DL = MF.getDataLayout(); + LLVMContext &C = MF.getFunction().getContext(); + + assert(KnownLen != 0 && "Have a zero length memmove length!"); + + bool DstAlignCanChange = false; + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + unsigned Alignment = MinAlign(DstAlign, SrcAlign); + + MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); + if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) + DstAlignCanChange = true; + + unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize); + std::vector MemOps; + + const auto &DstMMO = **MI.memoperands_begin(); + const auto &SrcMMO = **std::next(MI.memoperands_begin()); + MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); + MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); + + // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due + // to a bug in it's findOptimalMemOpLowering implementation. For now do the + // same thing here. + if (!findGISelOptimalMemOpLowering( + MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment), + SrcAlign, + /*IsMemset=*/false, + /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, + /*AllowOverlap=*/false, DstPtrInfo.getAddrSpace(), + SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI)) + return false; + + if (DstAlignCanChange) { + // Get an estimate of the type from the LLT. + Type *IRTy = getTypeForLLT(MemOps[0], C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy); + + // Don't promote to an alignment that would require dynamic stack + // realignment. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TRI->needsStackRealignment(MF)) + while (NewAlign > Alignment && + DL.exceedsNaturalStackAlignment(Align(NewAlign))) + NewAlign /= 2; + + if (NewAlign > Alignment) { + Alignment = NewAlign; + unsigned FI = FIDef->getOperand(1).getIndex(); + // Give the stack frame object a larger alignment if needed. + if (MFI.getObjectAlignment(FI) < Alignment) + MFI.setObjectAlignment(FI, Alignment); + } + } + + LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n"); + + MachineIRBuilder MIB(MI); + // Memmove requires that we perform the loads first before issuing the stores. + // Apart from that, this loop is pretty much doing the same thing as the + // memcpy codegen function. + unsigned CurrOffset = 0; + LLT PtrTy = MRI.getType(Src); + SmallVector LoadVals; + for (auto CopyTy : MemOps) { + // Construct MMO for the load. + auto *LoadMMO = + MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); + + // Create the load. + Register LoadPtr = Src; + if (CurrOffset != 0) { + auto Offset = + MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); + LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0); + } + LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); + CurrOffset += CopyTy.getSizeInBytes(); + } + + CurrOffset = 0; + for (unsigned I = 0; I < MemOps.size(); ++I) { + LLT CopyTy = MemOps[I]; + // Now store the values loaded. + auto *StoreMMO = + MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); + + Register StorePtr = Dst; + if (CurrOffset != 0) { + auto Offset = + MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); + StorePtr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0); + } + MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); + CurrOffset += CopyTy.getSizeInBytes(); + } + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { + // This combine is fairly complex so it's not written with a separate + // matcher function. + assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); + Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID(); + assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove || + ID == Intrinsic::memset) && + "Expected a memcpy like intrinsic"); + + auto MMOIt = MI.memoperands_begin(); + const MachineMemOperand *MemOp = *MMOIt; + bool IsVolatile = MemOp->isVolatile(); + // Don't try to optimize volatile. + if (IsVolatile) + return false; + + unsigned DstAlign = MemOp->getBaseAlignment(); + unsigned SrcAlign = 0; + Register Dst = MI.getOperand(1).getReg(); + Register Src = MI.getOperand(2).getReg(); + Register Len = MI.getOperand(3).getReg(); + + if (ID != Intrinsic::memset) { + assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI"); + MemOp = *(++MMOIt); + SrcAlign = MemOp->getBaseAlignment(); + } + + // See if this is a constant length copy + auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); + if (!LenVRegAndVal) + return false; // Leave it to the legalizer to lower it to a libcall. + unsigned KnownLen = LenVRegAndVal->Value; + + if (KnownLen == 0) { + MI.eraseFromParent(); + return true; + } + + if (MaxLen && KnownLen > MaxLen) + return false; + + if (ID == Intrinsic::memcpy) + return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); + if (ID == Intrinsic::memmove) + return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); + if (ID == Intrinsic::memset) + return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile); + return false; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; - return tryCombineExtendingLoads(MI); + if (tryCombineExtendingLoads(MI)) + return true; + if (tryCombineIndexedLoadStore(MI)) + return true; + return false; } diff --git a/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/lib/CodeGen/GlobalISel/GISelKnownBits.cpp new file mode 100644 index 00000000000..be8efa8795f --- /dev/null +++ b/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -0,0 +1,383 @@ +//===- lib/CodeGen/GlobalISel/GISelKnownBits.cpp --------------*- C++ *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// Provides analysis for querying information about KnownBits during GISel +/// passes. +// +//===------------------ +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" + +#define DEBUG_TYPE "gisel-known-bits" + +using namespace llvm; + +char llvm::GISelKnownBitsAnalysis::ID = 0; + +INITIALIZE_PASS_BEGIN(GISelKnownBitsAnalysis, DEBUG_TYPE, + "Analysis for ComputingKnownBits", false, true) +INITIALIZE_PASS_END(GISelKnownBitsAnalysis, DEBUG_TYPE, + "Analysis for ComputingKnownBits", false, true) + +GISelKnownBits::GISelKnownBits(MachineFunction &MF) + : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()), + DL(MF.getFunction().getParent()->getDataLayout()) {} + +Align GISelKnownBits::inferAlignmentForFrameIdx(int FrameIdx, int Offset, + const MachineFunction &MF) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + return commonAlignment(Align(MFI.getObjectAlignment(FrameIdx)), Offset); + // TODO: How to handle cases with Base + Offset? +} + +MaybeAlign GISelKnownBits::inferPtrAlignment(const MachineInstr &MI) { + if (MI.getOpcode() == TargetOpcode::G_FRAME_INDEX) { + int FrameIdx = MI.getOperand(1).getIndex(); + return inferAlignmentForFrameIdx(FrameIdx, 0, *MI.getMF()); + } + return None; +} + +void GISelKnownBits::computeKnownBitsForFrameIndex(Register R, KnownBits &Known, + const APInt &DemandedElts, + unsigned Depth) { + const MachineInstr &MI = *MRI.getVRegDef(R); + computeKnownBitsForAlignment(Known, inferPtrAlignment(MI)); +} + +void GISelKnownBits::computeKnownBitsForAlignment(KnownBits &Known, + MaybeAlign Alignment) { + if (Alignment) + // The low bits are known zero if the pointer is aligned. + Known.Zero.setLowBits(Log2(Alignment)); +} + +KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) { + return getKnownBits(MI.getOperand(0).getReg()); +} + +KnownBits GISelKnownBits::getKnownBits(Register R) { + KnownBits Known; + LLT Ty = MRI.getType(R); + APInt DemandedElts = + Ty.isVector() ? APInt::getAllOnesValue(Ty.getNumElements()) : APInt(1, 1); + computeKnownBitsImpl(R, Known, DemandedElts); + return Known; +} + +bool GISelKnownBits::signBitIsZero(Register R) { + LLT Ty = MRI.getType(R); + unsigned BitWidth = Ty.getScalarSizeInBits(); + return maskedValueIsZero(R, APInt::getSignMask(BitWidth)); +} + +APInt GISelKnownBits::getKnownZeroes(Register R) { + return getKnownBits(R).Zero; +} + +APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; } + +void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, + const APInt &DemandedElts, + unsigned Depth) { + MachineInstr &MI = *MRI.getVRegDef(R); + unsigned Opcode = MI.getOpcode(); + LLT DstTy = MRI.getType(R); + + // Handle the case where this is called on a register that does not have a + // type constraint (i.e. it has a register class constraint instead). This is + // unlikely to occur except by looking through copies but it is possible for + // the initial register being queried to be in this state. + if (!DstTy.isValid()) { + Known = KnownBits(); + return; + } + + unsigned BitWidth = DstTy.getSizeInBits(); + Known = KnownBits(BitWidth); // Don't know anything + + if (DstTy.isVector()) + return; // TODO: Handle vectors. + + if (Depth == getMaxDepth()) + return; + + if (!DemandedElts) + return; // No demanded elts, better to assume we don't know anything. + + KnownBits Known2; + + switch (Opcode) { + default: + TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI, + Depth); + break; + case TargetOpcode::COPY: { + MachineOperand Dst = MI.getOperand(0); + MachineOperand Src = MI.getOperand(1); + // Look through trivial copies but don't look through trivial copies of the + // form `%1:(s32) = OP %0:gpr32` known-bits analysis is currently unable to + // determine the bit width of a register class. + // + // We can't use NoSubRegister by name as it's defined by each target but + // it's always defined to be 0 by tablegen. + if (Dst.getSubReg() == 0 /*NoSubRegister*/ && Src.getReg().isVirtual() && + Src.getSubReg() == 0 /*NoSubRegister*/ && + MRI.getType(Src.getReg()).isValid()) { + // Don't increment Depth for this one since we didn't do any work. + computeKnownBitsImpl(Src.getReg(), Known, DemandedElts, Depth); + } + break; + } + case TargetOpcode::G_CONSTANT: { + auto CstVal = getConstantVRegVal(R, MRI); + if (!CstVal) + break; + Known.One = *CstVal; + Known.Zero = ~Known.One; + break; + } + case TargetOpcode::G_FRAME_INDEX: { + computeKnownBitsForFrameIndex(R, Known, DemandedElts); + break; + } + case TargetOpcode::G_SUB: { + // If low bits are known to be zero in both operands, then we know they are + // going to be 0 in the result. Both addition and complement operations + // preserve the low zero bits. + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + unsigned KnownZeroLow = Known2.countMinTrailingZeros(); + if (KnownZeroLow == 0) + break; + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, + Depth + 1); + KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); + Known.Zero.setLowBits(KnownZeroLow); + break; + } + case TargetOpcode::G_XOR: { + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); + Known.Zero = KnownZeroOut; + break; + } + case TargetOpcode::G_GEP: { + // G_GEP is like G_ADD. FIXME: Is this true for all targets? + LLT Ty = MRI.getType(MI.getOperand(1).getReg()); + if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace())) + break; + LLVM_FALLTHROUGH; + } + case TargetOpcode::G_ADD: { + // Output known-0 bits are known if clear or set in both the low clear bits + // common to both LHS & RHS. For example, 8+(X<<3) is known to have the + // low 3 bits clear. + // Output known-0 bits are also known if the top bits of each input are + // known to be clear. For example, if one input has the top 10 bits clear + // and the other has the top 8 bits clear, we know the top 7 bits of the + // output must be clear. + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + unsigned KnownZeroHigh = Known2.countMinLeadingZeros(); + unsigned KnownZeroLow = Known2.countMinTrailingZeros(); + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, + Depth + 1); + KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros()); + KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); + Known.Zero.setLowBits(KnownZeroLow); + if (KnownZeroHigh > 1) + Known.Zero.setHighBits(KnownZeroHigh - 1); + break; + } + case TargetOpcode::G_AND: { + // If either the LHS or the RHS are Zero, the result is zero. + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + + // Output known-1 bits are only known if set in both the LHS & RHS. + Known.One &= Known2.One; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + Known.Zero |= Known2.Zero; + break; + } + case TargetOpcode::G_OR: { + // If either the LHS or the RHS are Zero, the result is zero. + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + Known.Zero &= Known2.Zero; + // Output known-1 are known to be set if set in either the LHS | RHS. + Known.One |= Known2.One; + break; + } + case TargetOpcode::G_MUL: { + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conservative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + unsigned TrailZ = + Known.countMinTrailingZeros() + Known2.countMinTrailingZeros(); + unsigned LeadZ = + std::max(Known.countMinLeadingZeros() + Known2.countMinLeadingZeros(), + BitWidth) - + BitWidth; + + Known.resetAll(); + Known.Zero.setLowBits(std::min(TrailZ, BitWidth)); + Known.Zero.setHighBits(std::min(LeadZ, BitWidth)); + break; + } + case TargetOpcode::G_SELECT: { + computeKnownBitsImpl(MI.getOperand(3).getReg(), Known, DemandedElts, + Depth + 1); + // If we don't know any bits, early out. + if (Known.isUnknown()) + break; + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, + Depth + 1); + // Only known if known in both the LHS and RHS. + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + break; + } + case TargetOpcode::G_FCMP: + case TargetOpcode::G_ICMP: { + if (TL.getBooleanContents(DstTy.isVector(), + Opcode == TargetOpcode::G_FCMP) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + Known.Zero.setBitsFrom(1); + break; + } + case TargetOpcode::G_SEXT: { + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + // If the sign bit is known to be zero or one, then sext will extend + // it to the top bits, else it will just zext. + Known = Known.sext(BitWidth); + break; + } + case TargetOpcode::G_ANYEXT: { + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); + break; + } + case TargetOpcode::G_LOAD: { + if (MI.hasOneMemOperand()) { + const MachineMemOperand *MMO = *MI.memoperands_begin(); + if (const MDNode *Ranges = MMO->getRanges()) { + computeKnownBitsFromRangeMetadata(*Ranges, Known); + } + } + break; + } + case TargetOpcode::G_ZEXTLOAD: { + // Everything above the retrieved bits is zero + if (MI.hasOneMemOperand()) + Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits()); + break; + } + case TargetOpcode::G_ASHR: + case TargetOpcode::G_LSHR: + case TargetOpcode::G_SHL: { + KnownBits RHSKnown; + computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, + Depth + 1); + if (!RHSKnown.isConstant()) { + LLVM_DEBUG( + MachineInstr *RHSMI = MRI.getVRegDef(MI.getOperand(2).getReg()); + dbgs() << '[' << Depth << "] Shift not known constant: " << *RHSMI); + break; + } + uint64_t Shift = RHSKnown.getConstant().getZExtValue(); + LLVM_DEBUG(dbgs() << '[' << Depth << "] Shift is " << Shift << '\n'); + + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + + switch (Opcode) { + case TargetOpcode::G_ASHR: + Known.Zero = Known.Zero.ashr(Shift); + Known.One = Known.One.ashr(Shift); + break; + case TargetOpcode::G_LSHR: + Known.Zero = Known.Zero.lshr(Shift); + Known.One = Known.One.lshr(Shift); + Known.Zero.setBitsFrom(Known.Zero.getBitWidth() - Shift); + break; + case TargetOpcode::G_SHL: + Known.Zero = Known.Zero.shl(Shift); + Known.One = Known.One.shl(Shift); + Known.Zero.setBits(0, Shift); + break; + } + break; + } + case TargetOpcode::G_INTTOPTR: + case TargetOpcode::G_PTRTOINT: + // Fall through and handle them the same as zext/trunc. + LLVM_FALLTHROUGH; + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_TRUNC: { + Register SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + unsigned SrcBitWidth = SrcTy.isPointer() + ? DL.getIndexSizeInBits(SrcTy.getAddressSpace()) + : SrcTy.getSizeInBits(); + assert(SrcBitWidth && "SrcBitWidth can't be zero"); + Known = Known.zextOrTrunc(SrcBitWidth, true); + computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); + Known = Known.zextOrTrunc(BitWidth, true); + if (BitWidth > SrcBitWidth) + Known.Zero.setBitsFrom(SrcBitWidth); + break; + } + } + + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + LLVM_DEBUG(dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" + << Depth << "] Computed for: " << MI << "[" << Depth + << "] Known: 0x" + << (Known.Zero | Known.One).toString(16, false) << "\n" + << "[" << Depth << "] Zero: 0x" + << Known.Zero.toString(16, false) << "\n" + << "[" << Depth << "] One: 0x" + << Known.One.toString(16, false) << "\n"); +} + +void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) { + return false; +} diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 6e99bdbd826..45cef4aca88 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -334,7 +335,7 @@ bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { bool IRTranslator::translateCompare(const User &U, MachineIRBuilder &MIRBuilder) { - const CmpInst *CI = dyn_cast(&U); + auto *CI = dyn_cast(&U); Register Op0 = getOrCreateVReg(*U.getOperand(0)); Register Op1 = getOrCreateVReg(*U.getOperand(1)); Register Res = getOrCreateVReg(U); @@ -345,11 +346,12 @@ bool IRTranslator::translateCompare(const User &U, MIRBuilder.buildICmp(Pred, Res, Op0, Op1); else if (Pred == CmpInst::FCMP_FALSE) MIRBuilder.buildCopy( - Res, getOrCreateVReg(*Constant::getNullValue(CI->getType()))); + Res, getOrCreateVReg(*Constant::getNullValue(U.getType()))); else if (Pred == CmpInst::FCMP_TRUE) MIRBuilder.buildCopy( - Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType()))); + Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType()))); else { + assert(CI && "Instruction should be CmpInst"); MIRBuilder.buildInstr(TargetOpcode::G_FCMP, {Res}, {Pred, Op0, Op1}, MachineInstr::copyFlagsFromInstruction(*CI)); } @@ -588,8 +590,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, Register CondRHS = getOrCreateVReg(*CB.CmpRHS); Cond = MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); } else { - assert(CB.PredInfo.Pred == CmpInst::ICMP_ULE && - "Can only handle ULE ranges"); + assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE && + "Can only handle SLE ranges"); const APInt& Low = cast(CB.CmpLHS)->getValue(); const APInt& High = cast(CB.CmpRHS)->getValue(); @@ -598,7 +600,7 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, if (cast(CB.CmpLHS)->isMinValue(true)) { Register CondRHS = getOrCreateVReg(*CB.CmpRHS); Cond = - MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, CmpOpReg, CondRHS).getReg(0); + MIB.buildICmp(CmpInst::ICMP_SLE, i1Ty, CmpOpReg, CondRHS).getReg(0); } else { const LLT &CmpTy = MRI->getType(CmpOpReg); auto Sub = MIB.buildSub({CmpTy}, CmpOpReg, CondLHS); @@ -728,7 +730,7 @@ bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, MHS = nullptr; } else { // Check I->Low <= Cond <= I->High. - Pred = CmpInst::ICMP_ULE; + Pred = CmpInst::ICMP_SLE; LHS = I->Low; MHS = Cond; RHS = I->High; @@ -879,7 +881,8 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { return true; } - + const MDNode *Ranges = + Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr; for (unsigned i = 0; i < Regs.size(); ++i) { Register Addr; MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8); @@ -888,7 +891,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { unsigned BaseAlign = getMemOpAlignment(LI); auto MMO = MF->getMachineMemOperand( Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8, - MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr, + MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), Ranges, LI.getSyncScopeID(), LI.getOrdering()); MIRBuilder.buildLoad(Regs[i], Addr, *MMO); } @@ -1075,36 +1078,29 @@ bool IRTranslator::translateGetElementPtr(const User &U, } if (Offset != 0) { - Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset); - MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetMIB.getReg(0)); - - BaseReg = NewBaseReg; + BaseReg = + MIRBuilder.buildGEP(PtrTy, BaseReg, OffsetMIB.getReg(0)).getReg(0); Offset = 0; } Register IdxReg = getOrCreateVReg(*Idx); - if (MRI->getType(IdxReg) != OffsetTy) { - Register NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy); - MIRBuilder.buildSExtOrTrunc(NewIdxReg, IdxReg); - IdxReg = NewIdxReg; - } + if (MRI->getType(IdxReg) != OffsetTy) + IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0); // N = N + Idx * ElementSize; // Avoid doing it for ElementSize of 1. Register GepOffsetReg; if (ElementSize != 1) { - GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy); auto ElementSizeMIB = MIRBuilder.buildConstant( getLLTForType(*OffsetIRTy, *DL), ElementSize); - MIRBuilder.buildMul(GepOffsetReg, ElementSizeMIB.getReg(0), IdxReg); + GepOffsetReg = + MIRBuilder.buildMul(OffsetTy, ElementSizeMIB, IdxReg).getReg(0); } else GepOffsetReg = IdxReg; - Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg); - BaseReg = NewBaseReg; + BaseReg = MIRBuilder.buildGEP(PtrTy, BaseReg, GepOffsetReg).getReg(0); } } @@ -1119,54 +1115,51 @@ bool IRTranslator::translateGetElementPtr(const User &U, return true; } -bool IRTranslator::translateMemfunc(const CallInst &CI, +bool IRTranslator::translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, - unsigned ID) { + Intrinsic::ID ID) { // If the source is undef, then just emit a nop. - if (isa(CI.getArgOperand(1))) { - switch (ID) { - case Intrinsic::memmove: - case Intrinsic::memcpy: - case Intrinsic::memset: - return true; - default: - break; - } + if (isa(CI.getArgOperand(1))) + return true; + + ArrayRef Res; + auto ICall = MIRBuilder.buildIntrinsic(ID, Res, true); + for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) + ICall.addUse(getOrCreateVReg(**AI)); + + unsigned DstAlign = 0, SrcAlign = 0; + unsigned IsVol = + cast(CI.getArgOperand(CI.getNumArgOperands() - 1)) + ->getZExtValue(); + + if (auto *MCI = dyn_cast(&CI)) { + DstAlign = std::max(MCI->getDestAlignment(), 1); + SrcAlign = std::max(MCI->getSourceAlignment(), 1); + } else if (auto *MMI = dyn_cast(&CI)) { + DstAlign = std::max(MMI->getDestAlignment(), 1); + SrcAlign = std::max(MMI->getSourceAlignment(), 1); + } else { + auto *MSI = cast(&CI); + DstAlign = std::max(MSI->getDestAlignment(), 1); } - LLT SizeTy = getLLTForType(*CI.getArgOperand(2)->getType(), *DL); - Type *DstTy = CI.getArgOperand(0)->getType(); - if (cast(DstTy)->getAddressSpace() != 0 || - SizeTy.getSizeInBits() != DL->getPointerSizeInBits(0)) - return false; + // We need to propagate the tail call flag from the IR inst as an argument. + // Otherwise, we have to pessimize and assume later that we cannot tail call + // any memory intrinsics. + ICall.addImm(CI.isTailCall() ? 1 : 0); - SmallVector Args; - for (int i = 0; i < 3; ++i) { - const auto &Arg = CI.getArgOperand(i); - Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType()); - } + // Create mem operands to store the alignment and volatile info. + auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; + ICall.addMemOperand(MF->getMachineMemOperand( + MachinePointerInfo(CI.getArgOperand(0)), + MachineMemOperand::MOStore | VolFlag, 1, DstAlign)); + if (ID != Intrinsic::memset) + ICall.addMemOperand(MF->getMachineMemOperand( + MachinePointerInfo(CI.getArgOperand(1)), + MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign)); - const char *Callee; - switch (ID) { - case Intrinsic::memmove: - case Intrinsic::memcpy: { - Type *SrcTy = CI.getArgOperand(1)->getType(); - if(cast(SrcTy)->getAddressSpace() != 0) - return false; - Callee = ID == Intrinsic::memcpy ? "memcpy" : "memmove"; - break; - } - case Intrinsic::memset: - Callee = "memset"; - break; - default: - return false; - } - - return CLI->lowerCall(MIRBuilder, CI.getCallingConv(), - MachineOperand::CreateES(Callee), - CallLowering::ArgInfo({0}, CI.getType()), Args); + return true; } void IRTranslator::getStackGuard(Register DstReg, @@ -1186,7 +1179,7 @@ void IRTranslator::getStackGuard(Register DstReg, MachineMemOperand::MODereferenceable; MachineMemOperand *MemRef = MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8, - DL->getPointerABIAlignment(0)); + DL->getPointerABIAlignment(0).value()); MIB.setMemRefs({MemRef}); } @@ -1208,6 +1201,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { break; case Intrinsic::bswap: return TargetOpcode::G_BSWAP; + case Intrinsic::bitreverse: + return TargetOpcode::G_BITREVERSE; case Intrinsic::ceil: return TargetOpcode::G_FCEIL; case Intrinsic::cos: @@ -1383,16 +1378,17 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. - MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression()); + MIRBuilder.buildDirectDbgValue(0, DI.getVariable(), DI.getExpression()); } else if (const auto *CI = dyn_cast(V)) { MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression()); } else { - Register Reg = getOrCreateVReg(*V); - // FIXME: This does not handle register-indirect values at offset 0. The - // direct/indirect thing shouldn't really be handled by something as - // implicit as reg+noreg vs reg+imm in the first palce, but it seems - // pretty baked in right now. - MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression()); + for (Register Reg : getOrCreateVRegs(*V)) { + // FIXME: This does not handle register-indirect values at offset 0. The + // direct/indirect thing shouldn't really be handled by something as + // implicit as reg+noreg vs reg+imm in the first place, but it seems + // pretty baked in right now. + MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression()); + } } return true; } @@ -1433,7 +1429,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: - return translateMemfunc(CI, MIRBuilder, ID); + return translateMemFunc(CI, MIRBuilder, ID); case Intrinsic::eh_typeid_for: { GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0)); Register Reg = getOrCreateVReg(CI); @@ -1441,18 +1437,12 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MIRBuilder.buildConstant(Reg, TypeID); return true; } - case Intrinsic::objectsize: { - // If we don't know by now, we're never going to know. - const ConstantInt *Min = cast(CI.getArgOperand(1)); + case Intrinsic::objectsize: + llvm_unreachable("llvm.objectsize.* should have been lowered already"); - MIRBuilder.buildConstant(getOrCreateVReg(CI), Min->isZero() ? -1ULL : 0); - return true; - } case Intrinsic::is_constant: - // If this wasn't constant-folded away by now, then it's not a - // constant. - MIRBuilder.buildConstant(getOrCreateVReg(CI), 0); - return true; + llvm_unreachable("llvm.is.constant.* should have been lowered already"); + case Intrinsic::stackguard: getStackGuard(getOrCreateVReg(CI), MIRBuilder); return true; @@ -1551,6 +1541,46 @@ bool IRTranslator::translateInlineAsm(const CallInst &CI, return true; } +bool IRTranslator::translateCallSite(const ImmutableCallSite &CS, + MachineIRBuilder &MIRBuilder) { + const Instruction &I = *CS.getInstruction(); + ArrayRef Res = getOrCreateVRegs(I); + + SmallVector, 8> Args; + Register SwiftInVReg = 0; + Register SwiftErrorVReg = 0; + for (auto &Arg : CS.args()) { + if (CLI->supportSwiftError() && isSwiftError(Arg)) { + assert(SwiftInVReg == 0 && "Expected only one swift error argument"); + LLT Ty = getLLTForType(*Arg->getType(), *DL); + SwiftInVReg = MRI->createGenericVirtualRegister(Ty); + MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt( + &I, &MIRBuilder.getMBB(), Arg)); + Args.emplace_back(makeArrayRef(SwiftInVReg)); + SwiftErrorVReg = + SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg); + continue; + } + Args.push_back(getOrCreateVRegs(*Arg)); + } + + // We don't set HasCalls on MFI here yet because call lowering may decide to + // optimize into tail calls. Instead, we defer that to selection where a final + // scan is done to check if any instructions are calls. + bool Success = + CLI->lowerCall(MIRBuilder, CS, Res, Args, SwiftErrorVReg, + [&]() { return getOrCreateVReg(*CS.getCalledValue()); }); + + // Check if we just inserted a tail call. + if (Success) { + assert(!HasTailCall && "Can't tail call return twice from block?"); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + HasTailCall = TII->isTailCall(*std::prev(MIRBuilder.getInsertPt())); + } + + return Success; +} + bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { const CallInst &CI = cast(U); auto TII = MF->getTarget().getIntrinsicInfo(); @@ -1570,34 +1600,8 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { ID = static_cast(TII->getIntrinsicID(F)); } - if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) { - ArrayRef Res = getOrCreateVRegs(CI); - - SmallVector, 8> Args; - Register SwiftInVReg = 0; - Register SwiftErrorVReg = 0; - for (auto &Arg: CI.arg_operands()) { - if (CLI->supportSwiftError() && isSwiftError(Arg)) { - assert(SwiftInVReg == 0 && "Expected only one swift error argument"); - LLT Ty = getLLTForType(*Arg->getType(), *DL); - SwiftInVReg = MRI->createGenericVirtualRegister(Ty); - MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt( - &CI, &MIRBuilder.getMBB(), Arg)); - Args.emplace_back(makeArrayRef(SwiftInVReg)); - SwiftErrorVReg = - SwiftError.getOrCreateVRegDefAt(&CI, &MIRBuilder.getMBB(), Arg); - continue; - } - Args.push_back(getOrCreateVRegs(*Arg)); - } - - MF->getFrameInfo().setHasCalls(true); - bool Success = - CLI->lowerCall(MIRBuilder, &CI, Res, Args, SwiftErrorVReg, - [&]() { return getOrCreateVReg(*CI.getCalledValue()); }); - - return Success; - } + if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) + return translateCallSite(&CI, MIRBuilder); assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic"); @@ -1615,14 +1619,29 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { if (isa(CI)) MIB->copyIRFlags(CI); - for (auto &Arg : CI.arg_operands()) { + for (auto &Arg : enumerate(CI.arg_operands())) { // Some intrinsics take metadata parameters. Reject them. - if (isa(Arg)) + if (isa(Arg.value())) return false; - ArrayRef VRegs = getOrCreateVRegs(*Arg); - if (VRegs.size() > 1) - return false; - MIB.addUse(VRegs[0]); + + // If this is required to be an immediate, don't materialize it in a + // register. + if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) { + if (ConstantInt *CI = dyn_cast(Arg.value())) { + // imm arguments are more convenient than cimm (and realistically + // probably sufficient), so use them. + assert(CI->getBitWidth() <= 64 && + "large intrinsic immediates not handled"); + MIB.addImm(CI->getSExtValue()); + } else { + MIB.addFPImm(cast(Arg.value())); + } + } else { + ArrayRef VRegs = getOrCreateVRegs(*Arg.value()); + if (VRegs.size() > 1) + return false; + MIB.addUse(VRegs[0]); + } } // Add a MachineMemOperand if it is a target mem intrinsic. @@ -1630,13 +1649,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { TargetLowering::IntrinsicInfo Info; // TODO: Add a GlobalISel version of getTgtMemIntrinsic. if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) { - unsigned Align = Info.align; - if (Align == 0) - Align = DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext())); + MaybeAlign Align = Info.align; + if (!Align) + Align = MaybeAlign( + DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext()))); uint64_t Size = Info.memVT.getStoreSize(); - MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), - Info.flags, Size, Align)); + MIB.addMemOperand(MF->getMachineMemOperand( + MachinePointerInfo(Info.ptrVal), Info.flags, Size, Align->value())); } return true; @@ -1672,30 +1692,7 @@ bool IRTranslator::translateInvoke(const User &U, MCSymbol *BeginSymbol = Context.createTempSymbol(); MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol); - ArrayRef Res; - if (!I.getType()->isVoidTy()) - Res = getOrCreateVRegs(I); - SmallVector, 8> Args; - Register SwiftErrorVReg = 0; - Register SwiftInVReg = 0; - for (auto &Arg : I.arg_operands()) { - if (CLI->supportSwiftError() && isSwiftError(Arg)) { - assert(SwiftInVReg == 0 && "Expected only one swift error argument"); - LLT Ty = getLLTForType(*Arg->getType(), *DL); - SwiftInVReg = MRI->createGenericVirtualRegister(Ty); - MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt( - &I, &MIRBuilder.getMBB(), Arg)); - Args.push_back(makeArrayRef(SwiftInVReg)); - SwiftErrorVReg = - SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg); - continue; - } - - Args.push_back(getOrCreateVRegs(*Arg)); - } - - if (!CLI->lowerCall(MIRBuilder, &I, Res, Args, SwiftErrorVReg, - [&]() { return getOrCreateVReg(*I.getCalledValue()); })) + if (!translateCallSite(&I, MIRBuilder)) return false; MCSymbol *EndSymbol = Context.createTempSymbol(); @@ -1811,36 +1808,25 @@ bool IRTranslator::translateAlloca(const User &U, Register AllocSize = MRI->createGenericVirtualRegister(IntPtrTy); Register TySize = - getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, -DL->getTypeAllocSize(Ty))); + getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty))); MIRBuilder.buildMul(AllocSize, NumElts, TySize); - LLT PtrTy = getLLTForType(*AI.getType(), *DL); - auto &TLI = *MF->getSubtarget().getTargetLowering(); - Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); - - Register SPTmp = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildCopy(SPTmp, SPReg); - - Register AllocTmp = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildGEP(AllocTmp, SPTmp, AllocSize); - - // Handle alignment. We have to realign if the allocation granule was smaller - // than stack alignment, or the specific alloca requires more than stack - // alignment. unsigned StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlignment(); - Align = std::max(Align, StackAlign); - if (Align > StackAlign || DL->getTypeAllocSize(Ty) % StackAlign != 0) { - // Round the size of the allocation up to the stack alignment size - // by add SA-1 to the size. This doesn't overflow because we're computing - // an address inside an alloca. - Register AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildPtrMask(AlignedAlloc, AllocTmp, Log2_32(Align)); - AllocTmp = AlignedAlloc; - } + if (Align <= StackAlign) + Align = 0; - MIRBuilder.buildCopy(SPReg, AllocTmp); - MIRBuilder.buildCopy(getOrCreateVReg(AI), AllocTmp); + // Round the size of the allocation up to the stack alignment size + // by add SA-1 to the size. This doesn't overflow because we're computing + // an address inside an alloca. + auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign - 1); + auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne, + MachineInstr::NoUWrap); + auto AlignCst = + MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign - 1)); + auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst); + + MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Align); MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, &AI); assert(MF->getFrameInfo().hasVarSizedObjects()); @@ -1926,7 +1912,7 @@ bool IRTranslator::translateShuffleVector(const User &U, .addDef(getOrCreateVReg(U)) .addUse(getOrCreateVReg(*U.getOperand(0))) .addUse(getOrCreateVReg(*U.getOperand(1))) - .addUse(getOrCreateVReg(*U.getOperand(2))); + .addShuffleMask(cast(U.getOperand(2))); return true; } @@ -1991,7 +1977,6 @@ bool IRTranslator::translateAtomicRMW(const User &U, unsigned Opcode = 0; switch (I.getOperation()) { default: - llvm_unreachable("Unknown atomicrmw op"); return false; case AtomicRMWInst::Xchg: Opcode = TargetOpcode::G_ATOMICRMW_XCHG; @@ -2026,6 +2011,12 @@ bool IRTranslator::translateAtomicRMW(const User &U, case AtomicRMWInst::UMin: Opcode = TargetOpcode::G_ATOMICRMW_UMIN; break; + case AtomicRMWInst::FAdd: + Opcode = TargetOpcode::G_ATOMICRMW_FADD; + break; + case AtomicRMWInst::FSub: + Opcode = TargetOpcode::G_ATOMICRMW_FSUB; + break; } MIRBuilder.buildAtomicRMW( @@ -2197,6 +2188,20 @@ void IRTranslator::finalizeFunction() { FuncInfo.clear(); } +/// Returns true if a BasicBlock \p BB within a variadic function contains a +/// variadic musttail call. +static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) { + if (!IsVarArg) + return false; + + // Walk the block backwards, because tail calls usually only appear at the end + // of a block. + return std::any_of(BB.rbegin(), BB.rend(), [](const Instruction &I) { + const auto *CI = dyn_cast(&I); + return CI && CI->isMustTailCall(); + }); +} + bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { MF = &CurMF; const Function &F = MF->getFunction(); @@ -2212,26 +2217,26 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { : TPC->isGISelCSEEnabled(); if (EnableCSE) { - EntryBuilder = make_unique(CurMF); + EntryBuilder = std::make_unique(CurMF); CSEInfo = &Wrapper.get(TPC->getCSEConfig()); EntryBuilder->setCSEInfo(CSEInfo); - CurBuilder = make_unique(CurMF); + CurBuilder = std::make_unique(CurMF); CurBuilder->setCSEInfo(CSEInfo); } else { - EntryBuilder = make_unique(); - CurBuilder = make_unique(); + EntryBuilder = std::make_unique(); + CurBuilder = std::make_unique(); } CLI = MF->getSubtarget().getCallLowering(); CurBuilder->setMF(*MF); EntryBuilder->setMF(*MF); MRI = &MF->getRegInfo(); DL = &F.getParent()->getDataLayout(); - ORE = llvm::make_unique(&F); + ORE = std::make_unique(&F); FuncInfo.MF = MF; FuncInfo.BPI = nullptr; const auto &TLI = *MF->getSubtarget().getTargetLowering(); const TargetMachine &TM = MF->getTarget(); - SL = make_unique(this, FuncInfo); + SL = std::make_unique(this, FuncInfo); SL->init(TLI, TM, *DL); EnableOpts = TM.getOptLevel() != CodeGenOpt::None && !skipFunction(F); @@ -2258,6 +2263,9 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { SwiftError.setFunction(CurMF); SwiftError.createEntriesInEntryBlock(DbgLoc); + bool IsVarArg = F.isVarArg(); + bool HasMustTailInVarArgFn = false; + // Create all blocks, in IR order, to preserve the layout. for (const BasicBlock &BB: F) { auto *&MBB = BBToMBB[&BB]; @@ -2267,8 +2275,13 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { if (BB.hasAddressTaken()) MBB->setHasAddressTaken(); + + if (!HasMustTailInVarArgFn) + HasMustTailInVarArgFn = checkForMustTailInVarArgFn(IsVarArg, BB); } + MF->getFrameInfo().setHasMustTailInVarArgFunc(HasMustTailInVarArgFn); + // Make our arguments/constants entry block fallthrough to the IR entry block. EntryBB->addSuccessor(&getMBB(F.front())); @@ -2286,18 +2299,6 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { } } - // We don't currently support translating swifterror or swiftself functions. - for (auto &Arg : F.args()) { - if (Arg.hasSwiftSelfAttr()) { - OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", - F.getSubprogram(), &F.getEntryBlock()); - R << "unable to lower arguments due to swiftself: " - << ore::NV("Prototype", F.getType()); - reportTranslationError(*MF, *TPC, *ORE, R); - return false; - } - } - if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs)) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", F.getSubprogram(), &F.getEntryBlock()); @@ -2322,8 +2323,15 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { // Set the insertion point of all the following translations to // the end of this basic block. CurBuilder->setMBB(MBB); - + HasTailCall = false; for (const Instruction &Inst : *BB) { + // If we translated a tail call in the last step, then we know + // everything after the call is either a return, or something that is + // handled by the call itself. (E.g. a lifetime marker or assume + // intrinsic.) In this case, we should stop translating the block and + // move on. + if (HasTailCall) + break; #ifndef NDEBUG Verifier.setCurrentInst(&Inst); #endif // ifndef NDEBUG diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 70694fe6b6c..7c4fd2d140d 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -12,11 +12,14 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -45,6 +48,7 @@ INITIALIZE_PASS_BEGIN(InstructionSelect, DEBUG_TYPE, "Select target instructions out of generic instructions", false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE, "Select target instructions out of generic instructions", false, false) @@ -53,6 +57,8 @@ InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) { } void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -64,11 +70,13 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { return false; LLVM_DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n'); + GISelKnownBits &KB = getAnalysis().get(MF); const TargetPassConfig &TPC = getAnalysis(); - const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector(); + InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector(); CodeGenCoverage CoverageInfo; assert(ISel && "Cannot work without InstructionSelector"); + ISel->setupMF(MF, KB, CoverageInfo); // An optimization remark emitter. Used to report failures. MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); @@ -124,7 +132,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { continue; } - if (!ISel->select(MI, CoverageInfo)) { + if (!ISel->select(MI)) { // FIXME: It would be nice to dump all inserted instructions. It's // not obvious how, esp. considering select() can insert after MI. reportGISelFailure(MF, TPC, MORE, "gisel-select", "cannot select", MI); @@ -159,10 +167,10 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { --MII; if (MI.getOpcode() != TargetOpcode::COPY) continue; - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(SrcReg) && - TargetRegisterInfo::isVirtualRegister(DstReg)) { + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + if (Register::isVirtualRegister(SrcReg) && + Register::isVirtualRegister(DstReg)) { auto SrcRC = MRI.getRegClass(SrcReg); auto DstRC = MRI.getRegClass(DstReg); if (SrcRC == DstRC) { @@ -179,7 +187,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { // that the size of the now-constrained vreg is unchanged and that it has a // register class. for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - unsigned VReg = TargetRegisterInfo::index2VirtReg(I); + unsigned VReg = Register::index2VirtReg(I); MachineInstr *MI = nullptr; if (!MRI.def_empty(VReg)) @@ -217,6 +225,22 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { auto &TLI = *MF.getSubtarget().getTargetLowering(); TLI.finalizeLowering(MF); + // Determine if there are any calls in this machine function. Ported from + // SelectionDAG. + MachineFrameInfo &MFI = MF.getFrameInfo(); + for (const auto &MBB : MF) { + if (MFI.hasCalls() && MF.hasInlineAsm()) + break; + + for (const auto &MI : MBB) { + if ((MI.isCall() && !MI.isReturn()) || MI.isStackAligningInlineAsm()) + MFI.setHasCalls(true); + if (MI.isInlineAsm()) + MF.setHasInlineAsm(true); + } + } + + LLVM_DEBUG({ dbgs() << "Rules covered by selecting function: " << MF.getName() << ":"; for (auto RuleID : CoverageInfo.covered()) diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 2ad35b3a72c..28143b30d4e 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -79,5 +79,5 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI, return true; return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() && - !MI.hasUnmodeledSideEffects() && empty(MI.implicit_operands()); + !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty(); } diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp index b5b26bff34b..1593e21fe07 100644 --- a/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -184,11 +184,11 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { : TPC.isGISelCSEEnabled(); if (EnableCSE) { - MIRBuilder = make_unique(); + MIRBuilder = std::make_unique(); CSEInfo = &Wrapper.get(TPC.getCSEConfig()); MIRBuilder->setCSEInfo(CSEInfo); } else - MIRBuilder = make_unique(); + MIRBuilder = std::make_unique(); // This observer keeps the worklist updated. LegalizerWorkListManager WorkListObserver(InstList, ArtifactList); // We want both WorkListObserver as well as CSEInfo to observe all changes. @@ -206,8 +206,16 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) { WrapperObserver.erasingInstr(*DeadMI); }; + auto stopLegalizing = [&](MachineInstr &MI) { + Helper.MIRBuilder.stopObservingChanges(); + reportGISelFailure(MF, TPC, MORE, "gisel-legalize", + "unable to legalize instruction", MI); + }; bool Changed = false; + SmallVector RetryList; do { + assert(RetryList.empty() && "Expected no instructions in RetryList"); + unsigned NumArtifacts = ArtifactList.size(); while (!InstList.empty()) { MachineInstr &MI = *InstList.pop_back_val(); assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); @@ -222,14 +230,31 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // Error out if we couldn't legalize this instruction. We may want to // fall back to DAG ISel instead in the future. if (Res == LegalizerHelper::UnableToLegalize) { - Helper.MIRBuilder.stopObservingChanges(); - reportGISelFailure(MF, TPC, MORE, "gisel-legalize", - "unable to legalize instruction", MI); + // Move illegal artifacts to RetryList instead of aborting because + // legalizing InstList may generate artifacts that allow + // ArtifactCombiner to combine away them. + if (isArtifact(MI)) { + RetryList.push_back(&MI); + continue; + } + stopLegalizing(MI); return false; } WorkListObserver.printNewInstrs(); Changed |= Res == LegalizerHelper::Legalized; } + // Try to combine the instructions in RetryList again if there + // are new artifacts. If not, stop legalizing. + if (!RetryList.empty()) { + if (ArtifactList.size() > NumArtifacts) { + while (!RetryList.empty()) + ArtifactList.insert(RetryList.pop_back_val()); + } else { + MachineInstr *MI = *RetryList.begin(); + stopLegalizing(*MI); + return false; + } + } while (!ArtifactList.empty()) { MachineInstr &MI = *ArtifactList.pop_back_val(); assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index f5cf7fc9bd9..21512e54387 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -171,6 +172,26 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, return true; } +static LLT getGCDType(LLT OrigTy, LLT TargetTy) { + if (OrigTy.isVector() && TargetTy.isVector()) { + assert(OrigTy.getElementType() == TargetTy.getElementType()); + int GCD = greatestCommonDivisor(OrigTy.getNumElements(), + TargetTy.getNumElements()); + return LLT::scalarOrVector(GCD, OrigTy.getElementType()); + } + + if (OrigTy.isVector() && !TargetTy.isVector()) { + assert(OrigTy.getElementType() == TargetTy); + return TargetTy; + } + + assert(!OrigTy.isVector() && !TargetTy.isVector()); + + int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(), + TargetTy.getSizeInBits()); + return LLT::scalar(GCD); +} + void LegalizerHelper::insertParts(Register DstReg, LLT ResultTy, LLT PartTy, ArrayRef PartRegs, @@ -219,11 +240,29 @@ void LegalizerHelper::insertParts(Register DstReg, static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { switch (Opcode) { case TargetOpcode::G_SDIV: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32; + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + switch (Size) { + case 32: + return RTLIB::SDIV_I32; + case 64: + return RTLIB::SDIV_I64; + case 128: + return RTLIB::SDIV_I128; + default: + llvm_unreachable("unexpected size"); + } case TargetOpcode::G_UDIV: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32; + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + switch (Size) { + case 32: + return RTLIB::UDIV_I32; + case 64: + return RTLIB::UDIV_I64; + case 128: + return RTLIB::UDIV_I128; + default: + llvm_unreachable("unexpected size"); + } case TargetOpcode::G_SREM: assert((Size == 32 || Size == 64) && "Unsupported size"); return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32; @@ -288,6 +327,35 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { llvm_unreachable("Unknown libcall function"); } +/// True if an instruction is in tail position in its caller. Intended for +/// legalizing libcalls as tail calls when possible. +static bool isLibCallInTailPosition(MachineInstr &MI) { + const Function &F = MI.getParent()->getParent()->getFunction(); + + // Conservatively require the attributes of the call to match those of + // the return. Ignore NoAlias and NonNull because they don't affect the + // call sequence. + AttributeList CallerAttrs = F.getAttributes(); + if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex) + .removeAttribute(Attribute::NoAlias) + .removeAttribute(Attribute::NonNull) + .hasAttributes()) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) + return false; + + // Only tail call if the following instruction is a standard return. + auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); + MachineInstr *Next = MI.getNextNode(); + if (!Next || TII.isTailCall(*Next) || !Next->isReturn()) + return false; + + return true; +} + LegalizerHelper::LegalizeResult llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, @@ -296,9 +364,12 @@ llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); const char *Name = TLI.getLibcallName(Libcall); - MIRBuilder.getMF().getFrameInfo().setHasCalls(true); - if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), - MachineOperand::CreateES(Name), Result, Args)) + CallLowering::CallLoweringInfo Info; + Info.CallConv = TLI.getLibcallCallingConv(Libcall); + Info.Callee = MachineOperand::CreateES(Name); + Info.OrigRet = Result; + std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); + if (!CLI.lowerCall(MIRBuilder, Info)) return LegalizerHelper::UnableToLegalize; return LegalizerHelper::Legalized; @@ -317,6 +388,74 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Args); } +LegalizerHelper::LegalizeResult +llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); + auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); + + SmallVector Args; + // Add all the args, except for the last which is an imm denoting 'tail'. + for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) { + Register Reg = MI.getOperand(i).getReg(); + + // Need derive an IR type for call lowering. + LLT OpLLT = MRI.getType(Reg); + Type *OpTy = nullptr; + if (OpLLT.isPointer()) + OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace()); + else + OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits()); + Args.push_back({Reg, OpTy}); + } + + auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); + auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); + Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID(); + RTLIB::Libcall RTLibcall; + switch (ID) { + case Intrinsic::memcpy: + RTLibcall = RTLIB::MEMCPY; + break; + case Intrinsic::memset: + RTLibcall = RTLIB::MEMSET; + break; + case Intrinsic::memmove: + RTLibcall = RTLIB::MEMMOVE; + break; + default: + return LegalizerHelper::UnableToLegalize; + } + const char *Name = TLI.getLibcallName(RTLibcall); + + MIRBuilder.setInstr(MI); + + CallLowering::CallLoweringInfo Info; + Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); + Info.Callee = MachineOperand::CreateES(Name); + Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx)); + Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 && + isLibCallInTailPosition(MI); + + std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); + if (!CLI.lowerCall(MIRBuilder, Info)) + return LegalizerHelper::UnableToLegalize; + + if (Info.LoweredTailCall) { + assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?"); + // We must have a return following the call to get past + // isLibCallInTailPosition. + assert(MI.getNextNode() && MI.getNextNode()->isReturn() && + "Expected instr following MI to be a return?"); + + // We lowered a tail call, so the call is now the return from the block. + // Delete the old return. + MI.getNextNode()->eraseFromParent(); + } + + return LegalizerHelper::Legalized; +} + static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType) { auto ToMVT = MVT::getVT(ToType); @@ -518,6 +657,65 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_SEXT: { + if (TypeIdx != 0) + return UnableToLegalize; + + Register SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + // FIXME: support the general case where the requested NarrowTy may not be + // the same as the source type. E.g. s128 = sext(s32) + if ((SrcTy.getSizeInBits() != SizeOp0 / 2) || + SrcTy.getSizeInBits() != NarrowTy.getSizeInBits()) { + LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy << "\n"); + return UnableToLegalize; + } + + // Shift the sign bit of the low register through the high register. + auto ShiftAmt = + MIRBuilder.buildConstant(LLT::scalar(64), NarrowTy.getSizeInBits() - 1); + auto Shift = MIRBuilder.buildAShr(NarrowTy, SrcReg, ShiftAmt); + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {SrcReg, Shift.getReg(0)}); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_ZEXT: { + if (TypeIdx != 0) + return UnableToLegalize; + + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + uint64_t SizeOp1 = SrcTy.getSizeInBits(); + if (SizeOp0 % SizeOp1 != 0) + return UnableToLegalize; + + // Generate a merge where the bottom bits are taken from the source, and + // zero everything else. + Register ZeroReg = MIRBuilder.buildConstant(SrcTy, 0).getReg(0); + unsigned NumParts = SizeOp0 / SizeOp1; + SmallVector Srcs = {MI.getOperand(1).getReg()}; + for (unsigned Part = 1; Part < NumParts; ++Part) + Srcs.push_back(ZeroReg); + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Srcs); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_TRUNC: { + if (TypeIdx != 1) + return UnableToLegalize; + + uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + if (NarrowTy.getSizeInBits() * 2 != SizeOp1) { + LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n"); + return UnableToLegalize; + } + + auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg()); + MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Unmerge.getReg(0)); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_ADD: { // FIXME: add support for when SizeOp0 isn't an exact multiple of // NarrowSize. @@ -530,15 +728,17 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); - Register CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); - MIRBuilder.buildConstant(CarryIn, 0); - + Register CarryIn; for (int i = 0; i < NumParts; ++i) { Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); - MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], - Src2Regs[i], CarryIn); + if (i == 0) + MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]); + else { + MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], + Src2Regs[i], CarryIn); + } DstRegs.push_back(DstReg); CarryIn = CarryOut; @@ -730,7 +930,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, for (unsigned j = 1; j < MI.getNumOperands(); j += 2) MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1)); } - MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); + MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI()); MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); Observer.changedInstr(MI); MI.eraseFromParent(); @@ -763,6 +963,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, CmpInst::Predicate Pred = static_cast(MI.getOperand(1).getPredicate()); + LLT ResTy = MRI.getType(MI.getOperand(0).getReg()); if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL); @@ -771,18 +972,109 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0); MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero); } else { - const LLT s1 = LLT::scalar(1); - MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, s1, LHSH, RHSH); + MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH); MachineInstrBuilder CmpHEQ = - MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, s1, LHSH, RHSH); + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH); MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( - ICmpInst::getUnsignedPredicate(Pred), s1, LHSL, RHSL); + ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL); MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH); } Observer.changedInstr(MI); MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_SEXT_INREG: { + if (TypeIdx != 0) + return UnableToLegalize; + + if (!MI.getOperand(2).isImm()) + return UnableToLegalize; + int64_t SizeInBits = MI.getOperand(2).getImm(); + + // So long as the new type has more bits than the bits we're extending we + // don't need to break it apart. + if (NarrowTy.getScalarSizeInBits() >= SizeInBits) { + Observer.changingInstr(MI); + // We don't lose any non-extension bits by truncating the src and + // sign-extending the dst. + MachineOperand &MO1 = MI.getOperand(1); + auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1.getReg()); + MO1.setReg(TruncMIB->getOperand(0).getReg()); + + MachineOperand &MO2 = MI.getOperand(0); + Register DstExt = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + MIRBuilder.buildInstr(TargetOpcode::G_SEXT, {MO2.getReg()}, {DstExt}); + MO2.setReg(DstExt); + Observer.changedInstr(MI); + return Legalized; + } + + // Break it apart. Components below the extension point are unmodified. The + // component containing the extension point becomes a narrower SEXT_INREG. + // Components above it are ashr'd from the component containing the + // extension point. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; + + // List the registers where the destination will be scattered. + SmallVector DstRegs; + // List the registers where the source will be split. + SmallVector SrcRegs; + + // Create all the temporary registers. + for (int i = 0; i < NumParts; ++i) { + Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy); + + SrcRegs.push_back(SrcReg); + } + + // Explode the big arguments into smaller chunks. + MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1).getReg()); + + Register AshrCstReg = + MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1) + ->getOperand(0) + .getReg(); + Register FullExtensionReg = 0; + Register PartialExtensionReg = 0; + + // Do the operation on each small part. + for (int i = 0; i < NumParts; ++i) { + if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits) + DstRegs.push_back(SrcRegs[i]); + else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) { + assert(PartialExtensionReg && + "Expected to visit partial extension before full"); + if (FullExtensionReg) { + DstRegs.push_back(FullExtensionReg); + continue; + } + DstRegs.push_back(MIRBuilder + .buildInstr(TargetOpcode::G_ASHR, {NarrowTy}, + {PartialExtensionReg, AshrCstReg}) + ->getOperand(0) + .getReg()); + FullExtensionReg = DstRegs.back(); + } else { + DstRegs.push_back( + MIRBuilder + .buildInstr( + TargetOpcode::G_SEXT_INREG, {NarrowTy}, + {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()}) + ->getOperand(0) + .getReg()); + PartialExtensionReg = DstRegs.back(); + } + } + + // Gather the destination registers into the final destination. + Register DstReg = MI.getOperand(0).getReg(); + MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); + return Legalized; + } } } @@ -892,7 +1184,7 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); - Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg : + Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg : MRI.createGenericVirtualRegister(WideTy); auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset); @@ -903,6 +1195,8 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, if (WideSize > DstSize) MIRBuilder.buildTrunc(DstReg, ResultReg); + else if (DstTy.isPointer()) + MIRBuilder.buildIntToPtr(DstReg, ResultReg); MI.eraseFromParent(); return Legalized; @@ -1218,6 +1512,24 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_BITREVERSE: { + Observer.changingInstr(MI); + + Register DstReg = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(DstReg); + unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); + + Register DstExt = MRI.createGenericVirtualRegister(WideTy); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + MI.getOperand(0).setReg(DstExt); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + + auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits); + auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt); + MIRBuilder.buildTrunc(DstReg, Shift); + Observer.changedInstr(MI); + return Legalized; + } case TargetOpcode::G_ADD: case TargetOpcode::G_AND: case TargetOpcode::G_MUL: @@ -1310,13 +1622,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: - if (TypeIdx != 0) - return UnableToLegalize; Observer.changingInstr(MI); - widenScalarDst(MI, WideTy); + + if (TypeIdx == 0) + widenScalarDst(MI, WideTy); + else + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); + Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_SITOFP: if (TypeIdx != 1) return UnableToLegalize; @@ -1483,6 +1797,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FMUL: case TargetOpcode::G_FSUB: case TargetOpcode::G_FMA: + case TargetOpcode::G_FMAD: case TargetOpcode::G_FNEG: case TargetOpcode::G_FABS: case TargetOpcode::G_FCANONICALIZE: @@ -1553,6 +1868,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_SEXT_INREG: + if (TypeIdx != 0) + return UnableToLegalize; + + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC); + Observer.changedInstr(MI); + return Legalized; } } @@ -1579,6 +1903,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_SADDO: + case TargetOpcode::G_SSUBO: + return lowerSADDO_SSUBO(MI); case TargetOpcode::G_SMULO: case TargetOpcode::G_UMULO: { // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the @@ -1669,6 +1996,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_FMAD: + return lowerFMad(MI); case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { Register OldValRes = MI.getOperand(0).getReg(); Register SuccessRes = MI.getOperand(1).getReg(); @@ -1690,11 +2019,57 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { LLT DstTy = MRI.getType(DstReg); auto &MMO = **MI.memoperands_begin(); - if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { - // In the case of G_LOAD, this was a non-extending load already and we're - // about to lower to the same instruction. - if (MI.getOpcode() == TargetOpcode::G_LOAD) + if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { + if (MI.getOpcode() == TargetOpcode::G_LOAD) { + // This load needs splitting into power of 2 sized loads. + if (DstTy.isVector()) return UnableToLegalize; + if (isPowerOf2_32(DstTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + // Our strategy here is to generate anyextending loads for the smaller + // types up to next power-2 result type, and then combine the two larger + // result values together, before truncating back down to the non-pow-2 + // type. + // E.g. v1 = i24 load => + // v2 = i32 load (2 byte) + // v3 = i32 load (1 byte) + // v4 = i32 shl v3, 16 + // v5 = i32 or v4, v2 + // v1 = i24 trunc v5 + // By doing this we generate the correct truncate which should get + // combined away as an artifact with a matching extend. + uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); + uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; + + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = MF.getMachineMemOperand( + &MMO, LargeSplitSize / 8, SmallSplitSize / 8); + + LLT PtrTy = MRI.getType(PtrReg); + unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); + LLT AnyExtTy = LLT::scalar(AnyExtSize); + Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); + Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); + auto LargeLoad = + MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO); + + auto OffsetCst = + MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); + Register GEPReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0)); + auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), + *SmallMMO); + + auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); + auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); + auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); + MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); + MI.eraseFromParent(); + return Legalized; + } MIRBuilder.buildLoad(DstReg, PtrReg, MMO); MI.eraseFromParent(); return Legalized; @@ -1723,6 +2098,51 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return UnableToLegalize; } + case TargetOpcode::G_STORE: { + // Lower a non-power of 2 store into multiple pow-2 stores. + // E.g. split an i24 store into an i16 store + i8 store. + // We do this by first extending the stored value to the next largest power + // of 2 type, and then using truncating stores to store the components. + // By doing this, likewise with G_LOAD, generate an extend that can be + // artifact-combined away instead of leaving behind extracts. + Register SrcReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + MachineMemOperand &MMO = **MI.memoperands_begin(); + if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) + return UnableToLegalize; + if (SrcTy.isVector()) + return UnableToLegalize; + if (isPowerOf2_32(SrcTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + // Extend to the next pow-2. + const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); + auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); + + // Obtain the smaller value by shifting away the larger value. + uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); + uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; + auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); + auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); + + // Generate the GEP and truncating stores. + LLT PtrTy = MRI.getType(PtrReg); + auto OffsetCst = + MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); + Register GEPReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0)); + + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = + MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); + MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); + MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_CTLZ_ZERO_UNDEF: case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTLZ: @@ -1797,6 +2217,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return lowerUITOFP(MI, TypeIdx, Ty); case G_SITOFP: return lowerSITOFP(MI, TypeIdx, Ty); + case G_FPTOUI: + return lowerFPTOUI(MI, TypeIdx, Ty); case G_SMIN: case G_SMAX: case G_UMIN: @@ -1807,6 +2229,31 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { case G_FMINNUM: case G_FMAXNUM: return lowerFMinNumMaxNum(MI); + case G_UNMERGE_VALUES: + return lowerUnmergeValues(MI); + case TargetOpcode::G_SEXT_INREG: { + assert(MI.getOperand(2).isImm() && "Expected immediate"); + int64_t SizeInBits = MI.getOperand(2).getImm(); + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + Register TmpRes = MRI.createGenericVirtualRegister(DstTy); + + auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits); + MIRBuilder.buildInstr(TargetOpcode::G_SHL, {TmpRes}, {SrcReg, MIBSz->getOperand(0).getReg()}); + MIRBuilder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {TmpRes, MIBSz->getOperand(0).getReg()}); + MI.eraseFromParent(); + return Legalized; + } + case G_SHUFFLE_VECTOR: + return lowerShuffleVector(MI); + case G_DYN_STACKALLOC: + return lowerDynStackAlloc(MI); + case G_EXTRACT: + return lowerExtract(MI); + case G_INSERT: + return lowerInsert(MI); } } @@ -2282,6 +2729,105 @@ LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + const int NumDst = MI.getNumOperands() - 1; + const Register SrcReg = MI.getOperand(NumDst).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + // TODO: Create sequence of extracts. + if (DstTy == NarrowTy) + return UnableToLegalize; + + LLT GCDTy = getGCDType(SrcTy, NarrowTy); + if (DstTy == GCDTy) { + // This would just be a copy of the same unmerge. + // TODO: Create extracts, pad with undef and create intermediate merges. + return UnableToLegalize; + } + + auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); + const int NumUnmerge = Unmerge->getNumOperands() - 1; + const int PartsPerUnmerge = NumDst / NumUnmerge; + + for (int I = 0; I != NumUnmerge; ++I) { + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); + + for (int J = 0; J != PartsPerUnmerge; ++J) + MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg()); + MIB.addUse(Unmerge.getReg(I)); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy) { + assert(TypeIdx == 0 && "not a vector type index"); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = DstTy.getElementType(); + + int DstNumElts = DstTy.getNumElements(); + int NarrowNumElts = NarrowTy.getNumElements(); + int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts; + LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy); + + SmallVector ConcatOps; + SmallVector SubBuildVector; + + Register UndefReg; + if (WidenedDstTy != DstTy) + UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0); + + // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as + // necessary. + // + // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 + // -> <2 x s16> + // + // %4:_(s16) = G_IMPLICIT_DEF + // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 + // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 + // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6 + // %3:_(<3 x s16>) = G_EXTRACT %7, 0 + for (int I = 0; I != NumConcat; ++I) { + for (int J = 0; J != NarrowNumElts; ++J) { + int SrcIdx = NarrowNumElts * I + J; + + if (SrcIdx < DstNumElts) { + Register SrcReg = MI.getOperand(SrcIdx + 1).getReg(); + SubBuildVector.push_back(SrcReg); + } else + SubBuildVector.push_back(UndefReg); + } + + auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector); + ConcatOps.push_back(BuildVec.getReg(0)); + SubBuildVector.clear(); + } + + if (DstTy == WidenedDstTy) + MIRBuilder.buildConcatVectors(DstReg, ConcatOps); + else { + auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps); + MIRBuilder.buildExtract(DstReg, Concat, 0); + } + + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { @@ -2395,6 +2941,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FDIV: case G_FREM: case G_FMA: + case G_FMAD: case G_FPOW: case G_FEXP: case G_FEXP2: @@ -2411,6 +2958,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FSIN: case G_FSQRT: case G_BSWAP: + case G_BITREVERSE: case G_SDIV: case G_SMIN: case G_SMAX: @@ -2453,6 +3001,10 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); case G_PHI: return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); + case G_UNMERGE_VALUES: + return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); + case G_BUILD_VECTOR: + return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy); case G_LOAD: case G_STORE: return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); @@ -2604,11 +3156,11 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, switch (MI.getOpcode()) { case TargetOpcode::G_SHL: { // Short: ShAmt < NewBitSize - auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt); + auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt); - auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt); - auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); - auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); + auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); + auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt); + auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); // Long: ShAmt >= NewBitSize auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero. @@ -2622,41 +3174,25 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, ResultRegs[1] = Hi.getReg(0); break; } - case TargetOpcode::G_LSHR: { - // Short: ShAmt < NewBitSize - auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt); - - auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); - auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack); - auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); - - // Long: ShAmt >= NewBitSize - auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. - auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part. - - auto Lo = MIRBuilder.buildSelect( - HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); - auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); - - ResultRegs[0] = Lo.getReg(0); - ResultRegs[1] = Hi.getReg(0); - break; - } + case TargetOpcode::G_LSHR: case TargetOpcode::G_ASHR: { // Short: ShAmt < NewBitSize - auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt); + auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt}); - auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); - auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack); - auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); + auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt); + auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack); + auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); // Long: ShAmt >= NewBitSize - - // Sign of Hi part. - auto HiL = MIRBuilder.buildAShr( - HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1)); - - auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part. + MachineInstrBuilder HiL; + if (MI.getOpcode() == TargetOpcode::G_LSHR) { + HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. + } else { + auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1); + HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part. + } + auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, + {InH, AmtExcess}); // Lo from Hi part. auto Lo = MIRBuilder.buildSelect( HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); @@ -2701,12 +3237,22 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, MIRBuilder.setInstr(MI); unsigned Opc = MI.getOpcode(); switch (Opc) { - case TargetOpcode::G_IMPLICIT_DEF: { + case TargetOpcode::G_IMPLICIT_DEF: + case TargetOpcode::G_LOAD: { + if (TypeIdx != 0) + return UnableToLegalize; Observer.changingInstr(MI); moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_STORE: + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; case TargetOpcode::G_AND: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: @@ -2748,6 +3294,26 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_UNMERGE_VALUES: { + if (TypeIdx != 1) + return UnableToLegalize; + + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + int NumDst = MI.getNumOperands() - 1; + moreElementsVectorSrc(MI, MoreTy, NumDst); + + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); + for (int I = 0; I != NumDst; ++I) + MIB.addDef(MI.getOperand(I).getReg()); + + int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits(); + for (int I = NumDst; I != NewNumDst; ++I) + MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); + + MIB.addUse(MI.getOperand(NumDst).getReg()); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_PHI: return moreElementsVectorPhi(MI, TypeIdx, MoreTy); default: @@ -3310,6 +3876,48 @@ LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return UnableToLegalize; } +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + const LLT S64 = LLT::scalar(64); + const LLT S32 = LLT::scalar(32); + + if (SrcTy != S64 && SrcTy != S32) + return UnableToLegalize; + if (DstTy != S32 && DstTy != S64) + return UnableToLegalize; + + // FPTOSI gives same result as FPTOUI for positive signed integers. + // FPTOUI needs to deal with fp values that convert to unsigned integers + // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp. + + APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits()); + APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle() + : APFloat::IEEEdouble(), + APInt::getNullValue(SrcTy.getSizeInBits())); + TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven); + + MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src); + + MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP); + // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on + // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1. + MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold); + MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub); + MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt); + MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit); + + MachineInstrBuilder FCMP = + MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, DstTy, Src, Threshold); + MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res); + + MI.eraseFromParent(); + return Legalized; +} + static CmpInst::Predicate minMaxToCompare(unsigned Opc) { switch (Opc) { case TargetOpcode::G_SMIN: @@ -3419,3 +4027,251 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { MI.eraseFromParent(); return Legalized; } + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) { + // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c + Register DstReg = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(DstReg); + unsigned Flags = MI.getFlags(); + + auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2), + Flags); + MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { + const unsigned NumDst = MI.getNumOperands() - 1; + const Register SrcReg = MI.getOperand(NumDst).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + Register Dst0Reg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst0Reg); + + + // Expand scalarizing unmerge as bitcast to integer and shift. + if (!DstTy.isVector() && SrcTy.isVector() && + SrcTy.getElementType() == DstTy) { + LLT IntTy = LLT::scalar(SrcTy.getSizeInBits()); + Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0); + + MIRBuilder.buildTrunc(Dst0Reg, Cast); + + const unsigned DstSize = DstTy.getSizeInBits(); + unsigned Offset = DstSize; + for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) { + auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset); + auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt); + MIRBuilder.buildTrunc(MI.getOperand(I), Shift); + } + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { + Register DstReg = MI.getOperand(0).getReg(); + Register Src0Reg = MI.getOperand(1).getReg(); + Register Src1Reg = MI.getOperand(2).getReg(); + LLT Src0Ty = MRI.getType(Src0Reg); + LLT DstTy = MRI.getType(DstReg); + LLT IdxTy = LLT::scalar(32); + + const Constant *ShufMask = MI.getOperand(3).getShuffleMask(); + + SmallVector Mask; + ShuffleVectorInst::getShuffleMask(ShufMask, Mask); + + if (DstTy.isScalar()) { + if (Src0Ty.isVector()) + return UnableToLegalize; + + // This is just a SELECT. + assert(Mask.size() == 1 && "Expected a single mask element"); + Register Val; + if (Mask[0] < 0 || Mask[0] > 1) + Val = MIRBuilder.buildUndef(DstTy).getReg(0); + else + Val = Mask[0] == 0 ? Src0Reg : Src1Reg; + MIRBuilder.buildCopy(DstReg, Val); + MI.eraseFromParent(); + return Legalized; + } + + Register Undef; + SmallVector BuildVec; + LLT EltTy = DstTy.getElementType(); + + for (int Idx : Mask) { + if (Idx < 0) { + if (!Undef.isValid()) + Undef = MIRBuilder.buildUndef(EltTy).getReg(0); + BuildVec.push_back(Undef); + continue; + } + + if (Src0Ty.isScalar()) { + BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg); + } else { + int NumElts = Src0Ty.getNumElements(); + Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg; + int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts; + auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); + auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK); + BuildVec.push_back(Extract.getReg(0)); + } + } + + MIRBuilder.buildBuildVector(DstReg, BuildVec); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register AllocSize = MI.getOperand(1).getReg(); + unsigned Align = MI.getOperand(2).getImm(); + + const auto &MF = *MI.getMF(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + + LLT PtrTy = MRI.getType(Dst); + LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); + + Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); + auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg); + SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp); + + // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't + // have to generate an extra instruction to negate the alloc and then use + // G_GEP to add the negative offset. + auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize); + if (Align) { + APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true); + AlignMask.negate(); + auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask); + Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst); + } + + SPTmp = MIRBuilder.buildCast(PtrTy, Alloc); + MIRBuilder.buildCopy(SPReg, SPTmp); + MIRBuilder.buildCopy(Dst, SPTmp); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerExtract(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + unsigned Offset = MI.getOperand(2).getImm(); + + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + if (DstTy.isScalar() && + (SrcTy.isScalar() || + (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) { + LLT SrcIntTy = SrcTy; + if (!SrcTy.isScalar()) { + SrcIntTy = LLT::scalar(SrcTy.getSizeInBits()); + Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0); + } + + if (Offset == 0) + MIRBuilder.buildTrunc(Dst, Src); + else { + auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset); + auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt); + MIRBuilder.buildTrunc(Dst, Shr); + } + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register InsertSrc = MI.getOperand(2).getReg(); + uint64_t Offset = MI.getOperand(3).getImm(); + + LLT DstTy = MRI.getType(Src); + LLT InsertTy = MRI.getType(InsertSrc); + + if (InsertTy.isScalar() && + (DstTy.isScalar() || + (DstTy.isVector() && DstTy.getElementType() == InsertTy))) { + LLT IntDstTy = DstTy; + if (!DstTy.isScalar()) { + IntDstTy = LLT::scalar(DstTy.getSizeInBits()); + Src = MIRBuilder.buildBitcast(IntDstTy, Src).getReg(0); + } + + Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0); + if (Offset != 0) { + auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset); + ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0); + } + + APInt MaskVal = ~APInt::getBitsSet(DstTy.getSizeInBits(), Offset, + InsertTy.getSizeInBits()); + + auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal); + auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask); + auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc); + + MIRBuilder.buildBitcast(Dst, Or); + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) { + Register Dst0 = MI.getOperand(0).getReg(); + Register Dst1 = MI.getOperand(1).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO; + + LLT Ty = MRI.getType(Dst0); + LLT BoolTy = MRI.getType(Dst1); + + if (IsAdd) + MIRBuilder.buildAdd(Dst0, LHS, RHS); + else + MIRBuilder.buildSub(Dst0, LHS, RHS); + + // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow. + + auto Zero = MIRBuilder.buildConstant(Ty, 0); + + // For an addition, the result should be less than one of the operands (LHS) + // if and only if the other operand (RHS) is negative, otherwise there will + // be overflow. + // For a subtraction, the result should be less than one of the operands + // (LHS) if and only if the other operand (RHS) is (non-zero) positive, + // otherwise there will be overflow. + auto ResultLowerThanLHS = + MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS); + auto ConditionRHS = MIRBuilder.buildICmp( + IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero); + + MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS); + MI.eraseFromParent(); + return Legalized; +} diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 6e1de95b327..70045512fae 100644 --- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -215,7 +215,30 @@ bool LegalizeRuleSet::verifyTypeIdxsCoverage(unsigned NumTypeIdxs) const { return true; } const bool AllCovered = (FirstUncovered >= NumTypeIdxs); - LLVM_DEBUG(dbgs() << ".. the first uncovered type index: " << FirstUncovered + if (NumTypeIdxs > 0) + LLVM_DEBUG(dbgs() << ".. the first uncovered type index: " << FirstUncovered + << ", " << (AllCovered ? "OK" : "FAIL") << "\n"); + return AllCovered; +#else + return true; +#endif +} + +bool LegalizeRuleSet::verifyImmIdxsCoverage(unsigned NumImmIdxs) const { +#ifndef NDEBUG + if (Rules.empty()) { + LLVM_DEBUG( + dbgs() << ".. imm index coverage check SKIPPED: no rules defined\n"); + return true; + } + const int64_t FirstUncovered = ImmIdxsCovered.find_first_unset(); + if (FirstUncovered < 0) { + LLVM_DEBUG(dbgs() << ".. imm index coverage check SKIPPED:" + " user-defined predicate detected\n"); + return true; + } + const bool AllCovered = (FirstUncovered >= NumImmIdxs); + LLVM_DEBUG(dbgs() << ".. the first uncovered imm index: " << FirstUncovered << ", " << (AllCovered ? "OK" : "FAIL") << "\n"); return AllCovered; #else @@ -387,8 +410,6 @@ unsigned LegalizerInfo::getActionDefinitionsIdx(unsigned Opcode) const { LLVM_DEBUG(dbgs() << ".. opcode " << Opcode << " is aliased to " << Alias << "\n"); OpcodeIdx = getOpcodeIdxForOpcode(Alias); - LLVM_DEBUG(dbgs() << ".. opcode " << Alias << " is aliased to " - << RulesForOpcode[OpcodeIdx].getAlias() << "\n"); assert(RulesForOpcode[OpcodeIdx].getAlias() == 0 && "Cannot chain aliases"); } @@ -412,7 +433,7 @@ LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder( std::initializer_list Opcodes) { unsigned Representative = *Opcodes.begin(); - assert(!empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() && + assert(!llvm::empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() && "Initializer list must have at least two opcodes"); for (auto I = Opcodes.begin() + 1, E = Opcodes.end(); I != E; ++I) @@ -677,12 +698,23 @@ void LegalizerInfo::verify(const MCInstrInfo &MII) const { ? std::max(OpInfo.getGenericTypeIndex() + 1U, Acc) : Acc; }); + const unsigned NumImmIdxs = std::accumulate( + MCID.opInfo_begin(), MCID.opInfo_end(), 0U, + [](unsigned Acc, const MCOperandInfo &OpInfo) { + return OpInfo.isGenericImm() + ? std::max(OpInfo.getGenericImmIndex() + 1U, Acc) + : Acc; + }); LLVM_DEBUG(dbgs() << MII.getName(Opcode) << " (opcode " << Opcode << "): " << NumTypeIdxs << " type ind" - << (NumTypeIdxs == 1 ? "ex" : "ices") << "\n"); + << (NumTypeIdxs == 1 ? "ex" : "ices") << ", " + << NumImmIdxs << " imm ind" + << (NumImmIdxs == 1 ? "ex" : "ices") << "\n"); const LegalizeRuleSet &RuleSet = getActionDefinitions(Opcode); if (!RuleSet.verifyTypeIdxsCoverage(NumTypeIdxs)) FailedOpcodes.push_back(Opcode); + else if (!RuleSet.verifyImmIdxsCoverage(NumImmIdxs)) + FailedOpcodes.push_back(Opcode); } if (!FailedOpcodes.empty()) { errs() << "The following opcodes have ill-defined legalization rules:"; diff --git a/lib/CodeGen/GlobalISel/Localizer.cpp b/lib/CodeGen/GlobalISel/Localizer.cpp index 3592409710a..f882ecbf5db 100644 --- a/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/lib/CodeGen/GlobalISel/Localizer.cpp @@ -79,7 +79,7 @@ bool Localizer::shouldLocalize(const MachineInstr &MI) { return true; case TargetOpcode::G_GLOBAL_VALUE: { unsigned RematCost = TTI->getGISelRematGlobalCost(); - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); unsigned MaxUses = maxUses(RematCost); if (MaxUses == UINT_MAX) return true; // Remats are "free" so always localize. @@ -121,7 +121,7 @@ bool Localizer::localizeInterBlock(MachineFunction &MF, LLVM_DEBUG(dbgs() << "Should localize: " << MI); assert(MI.getDesc().getNumDefs() == 1 && "More than one definition not supported yet"); - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); // Check if all the users of MI are local. // We are going to invalidation the list of use operands, so we // can't use range iterator. @@ -151,7 +151,7 @@ bool Localizer::localizeInterBlock(MachineFunction &MF, LocalizedMI); // Set a new register for the definition. - unsigned NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg)); + Register NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg)); MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg)); LocalizedMI->getOperand(0).setReg(NewReg); NewVRegIt = @@ -177,7 +177,7 @@ bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) { // many users, but this case may be better served by regalloc improvements. for (MachineInstr *MI : LocalizedInstrs) { - unsigned Reg = MI->getOperand(0).getReg(); + Register Reg = MI->getOperand(0).getReg(); MachineBasicBlock &MBB = *MI->getParent(); // All of the user MIs of this reg. SmallPtrSet Users; @@ -220,5 +220,6 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) { LocalizedSetVecT LocalizedInstrs; bool Changed = localizeInterBlock(MF, LocalizedInstrs); - return Changed |= localizeIntraBlock(LocalizedInstrs); + Changed |= localizeIntraBlock(LocalizedInstrs); + return Changed; } diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index b7a73326b85..df770f6664c 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -107,9 +107,13 @@ MachineIRBuilder::buildIndirectDbgValue(Register Reg, const MDNode *Variable, assert( cast(Variable)->isValidLocationForIntrinsic(getDL()) && "Expected inlined-at fields to agree"); + // DBG_VALUE insts now carry IR-level indirection in their DIExpression + // rather than encoding it in the instruction itself. + const DIExpression *DIExpr = cast(Expr); + DIExpr = DIExpression::append(DIExpr, {dwarf::DW_OP_deref}); return insertInstr(BuildMI(getMF(), getDL(), getTII().get(TargetOpcode::DBG_VALUE), - /*IsIndirect*/ true, Reg, Variable, Expr)); + /*IsIndirect*/ false, Reg, Variable, DIExpr)); } MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI, @@ -120,11 +124,15 @@ MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI, assert( cast(Variable)->isValidLocationForIntrinsic(getDL()) && "Expected inlined-at fields to agree"); + // DBG_VALUE insts now carry IR-level indirection in their DIExpression + // rather than encoding it in the instruction itself. + const DIExpression *DIExpr = cast(Expr); + DIExpr = DIExpression::append(DIExpr, {dwarf::DW_OP_deref}); return buildInstr(TargetOpcode::DBG_VALUE) .addFrameIndex(FI) - .addImm(0) + .addReg(0) .addMetadata(Variable) - .addMetadata(Expr); + .addMetadata(DIExpr); } MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, @@ -148,7 +156,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, MIB.addReg(0U); } - return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr); + return MIB.addReg(0).addMetadata(Variable).addMetadata(Expr); } MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) { @@ -160,6 +168,17 @@ MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) { return MIB.addMetadata(Label); } +MachineInstrBuilder MachineIRBuilder::buildDynStackAlloc(const DstOp &Res, + const SrcOp &Size, + unsigned Align) { + assert(Res.getLLTTy(*getMRI()).isPointer() && "expected ptr dst type"); + auto MIB = buildInstr(TargetOpcode::G_DYN_STACKALLOC); + Res.addDefToMIB(*getMRI(), MIB); + Size.addSrcToMIB(MIB); + MIB.addImm(Align); + return MIB; +} + MachineInstrBuilder MachineIRBuilder::buildFrameIndex(const DstOp &Res, int Idx) { assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type"); @@ -207,11 +226,7 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(const DstOp &Res, Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch"); assert(Op1.getLLTTy(*getMRI()).isScalar() && "invalid offset type"); - auto MIB = buildInstr(TargetOpcode::G_GEP); - Res.addDefToMIB(*getMRI(), MIB); - Op0.addSrcToMIB(MIB); - Op1.addSrcToMIB(MIB); - return MIB; + return buildInstr(TargetOpcode::G_GEP, {Res}, {Op0, Op1}); } Optional @@ -697,17 +712,19 @@ MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred, MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, - const SrcOp &Op1) { + const SrcOp &Op1, + Optional Flags) { - return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}); + return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}, Flags); } MachineInstrBuilder MachineIRBuilder::buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, - const SrcOp &Op1) { + const SrcOp &Op1, + Optional Flags) { - return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}); + return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}, Flags); } MachineInstrBuilder @@ -774,26 +791,28 @@ MachineIRBuilder::buildAtomicCmpXchg(Register OldValRes, Register Addr, .addMemOperand(&MMO); } -MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode, - Register OldValRes, - Register Addr, - Register Val, - MachineMemOperand &MMO) { +MachineInstrBuilder MachineIRBuilder::buildAtomicRMW( + unsigned Opcode, const DstOp &OldValRes, + const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO) { + #ifndef NDEBUG - LLT OldValResTy = getMRI()->getType(OldValRes); - LLT AddrTy = getMRI()->getType(Addr); - LLT ValTy = getMRI()->getType(Val); + LLT OldValResTy = OldValRes.getLLTTy(*getMRI()); + LLT AddrTy = Addr.getLLTTy(*getMRI()); + LLT ValTy = Val.getLLTTy(*getMRI()); assert(OldValResTy.isScalar() && "invalid operand type"); assert(AddrTy.isPointer() && "invalid operand type"); assert(ValTy.isValid() && "invalid operand type"); assert(OldValResTy == ValTy && "type mismatch"); + assert(MMO.isAtomic() && "not atomic mem operand"); #endif - return buildInstr(Opcode) - .addDef(OldValRes) - .addUse(Addr) - .addUse(Val) - .addMemOperand(&MMO); + auto MIB = buildInstr(Opcode); + OldValRes.addDefToMIB(*getMRI(), MIB); + Addr.addSrcToMIB(MIB); + Val.addSrcToMIB(MIB); + MIB.addMemOperand(&MMO); + return MIB; } MachineInstrBuilder @@ -864,6 +883,21 @@ MachineIRBuilder::buildAtomicRMWUmin(Register OldValRes, Register Addr, MMO); } +MachineInstrBuilder +MachineIRBuilder::buildAtomicRMWFAdd( + const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FADD, OldValRes, Addr, Val, + MMO); +} + +MachineInstrBuilder +MachineIRBuilder::buildAtomicRMWFSub(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FSUB, OldValRes, Addr, Val, + MMO); +} + MachineInstrBuilder MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) { return buildInstr(TargetOpcode::G_FENCE) @@ -1037,8 +1071,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, "input operands do not cover output register"); if (SrcOps.size() == 1) return buildCast(DstOps[0], SrcOps[0]); - if (DstOps[0].getLLTTy(*getMRI()).isVector()) - return buildInstr(TargetOpcode::G_CONCAT_VECTORS, DstOps, SrcOps); + if (DstOps[0].getLLTTy(*getMRI()).isVector()) { + if (SrcOps[0].getLLTTy(*getMRI()).isVector()) + return buildInstr(TargetOpcode::G_CONCAT_VECTORS, DstOps, SrcOps); + return buildInstr(TargetOpcode::G_BUILD_VECTOR, DstOps, SrcOps); + } break; } case TargetOpcode::G_EXTRACT_VECTOR_ELT: { diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 42be88fcf94..f0e35c65c53 100644 --- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -92,7 +92,7 @@ void RegBankSelect::init(MachineFunction &MF) { MBPI = nullptr; } MIRBuilder.setMF(MF); - MORE = llvm::make_unique(MF, MBFI); + MORE = std::make_unique(MF, MBFI); } void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const { @@ -139,7 +139,7 @@ bool RegBankSelect::repairReg( "need new vreg for each breakdown"); // An empty range of new register means no repairing. - assert(!empty(NewVRegs) && "We should not have to repair"); + assert(!NewVRegs.empty() && "We should not have to repair"); MachineInstr *MI; if (ValMapping.NumBreakDowns == 1) { @@ -154,7 +154,7 @@ bool RegBankSelect::repairReg( std::swap(Src, Dst); assert((RepairPt.getNumInsertPoints() == 1 || - TargetRegisterInfo::isPhysicalRegister(Dst)) && + Register::isPhysicalRegister(Dst)) && "We are about to create several defs for Dst"); // Build the instruction used to repair, then clone it at the right @@ -398,7 +398,7 @@ void RegBankSelect::tryAvoidingSplit( // Check if this is a physical or virtual register. Register Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { // We are going to split every outgoing edges. // Check that this is possible. // FIXME: The machine representation is currently broken @@ -687,8 +687,9 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { // iterator before hand. MachineInstr &MI = *MII++; - // Ignore target-specific instructions: they should use proper regclasses. - if (isTargetSpecificOpcode(MI.getOpcode())) + // Ignore target-specific post-isel instructions: they should use proper + // regclasses. + if (isTargetSpecificOpcode(MI.getOpcode()) && !MI.isPreISelOpcode()) continue; if (!assignInstr(MI)) { diff --git a/lib/CodeGen/GlobalISel/RegisterBank.cpp b/lib/CodeGen/GlobalISel/RegisterBank.cpp index 4e41f338934..fc9c802693a 100644 --- a/lib/CodeGen/GlobalISel/RegisterBank.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBank.cpp @@ -12,6 +12,7 @@ #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" #define DEBUG_TYPE "registerbank" diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index 159422e3887..3fcc55286be 100644 --- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -82,7 +82,7 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const { const RegisterBank * RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI)); assert(Reg && "NoRegister does not have a register bank"); @@ -97,8 +97,7 @@ RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterClass & RegisterBankInfo::getMinimalPhysRegClass(Register Reg, const TargetRegisterInfo &TRI) const { - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && - "Reg must be a physreg"); + assert(Register::isPhysicalRegister(Reg) && "Reg must be a physreg"); const auto &RegRCIt = PhysRegMinimalRCs.find(Reg); if (RegRCIt != PhysRegMinimalRCs.end()) return *RegRCIt->second; @@ -284,7 +283,7 @@ RegisterBankInfo::getPartialMapping(unsigned StartIdx, unsigned Length, ++NumPartialMappingsCreated; auto &PartMapping = MapOfPartialMappings[Hash]; - PartMapping = llvm::make_unique(StartIdx, Length, RegBank); + PartMapping = std::make_unique(StartIdx, Length, RegBank); return *PartMapping; } @@ -318,7 +317,7 @@ RegisterBankInfo::getValueMapping(const PartialMapping *BreakDown, ++NumValueMappingsCreated; auto &ValMapping = MapOfValueMappings[Hash]; - ValMapping = llvm::make_unique(BreakDown, NumBreakDowns); + ValMapping = std::make_unique(BreakDown, NumBreakDowns); return *ValMapping; } @@ -342,7 +341,7 @@ RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const { // mapping, because we use the pointer of the ValueMapping // to hash and we expect them to uniquely identify an instance // of value mapping. - Res = llvm::make_unique(std::distance(Begin, End)); + Res = std::make_unique(std::distance(Begin, End)); unsigned Idx = 0; for (Iterator It = Begin; It != End; ++It, ++Idx) { const ValueMapping *ValMap = *It; @@ -392,7 +391,7 @@ RegisterBankInfo::getInstructionMappingImpl( ++NumInstructionMappingsCreated; auto &InstrMapping = MapOfInstructionMappings[Hash]; - InstrMapping = llvm::make_unique( + InstrMapping = std::make_unique( ID, Cost, OperandsMapping, NumOperands); return *InstrMapping; } @@ -456,7 +455,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { "This mapping is too complex for this function"); iterator_range::const_iterator> NewRegs = OpdMapper.getVRegs(OpIdx); - if (empty(NewRegs)) { + if (NewRegs.empty()) { LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n"); continue; } @@ -489,7 +488,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { unsigned RegisterBankInfo::getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { // The size is not directly available for physical registers. // Instead, we need to access a register class that contains Reg and // get the size of that register class. diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp index 766ea1d60ba..45618d7992a 100644 --- a/lib/CodeGen/GlobalISel/Utils.cpp +++ b/lib/CodeGen/GlobalISel/Utils.cpp @@ -43,10 +43,9 @@ unsigned llvm::constrainOperandRegClass( const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, const MachineOperand &RegMO, unsigned OpIdx) { - unsigned Reg = RegMO.getReg(); + Register Reg = RegMO.getReg(); // Assume physical registers are properly constrained. - assert(TargetRegisterInfo::isVirtualRegister(Reg) && - "PhysReg not implemented"); + assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented"); unsigned ConstrainedReg = constrainRegToClass(MRI, TII, RBI, Reg, RegClass); // If we created a new virtual register because the class is not compatible @@ -73,10 +72,9 @@ unsigned llvm::constrainOperandRegClass( MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, const MachineOperand &RegMO, unsigned OpIdx) { - unsigned Reg = RegMO.getReg(); + Register Reg = RegMO.getReg(); // Assume physical registers are properly constrained. - assert(TargetRegisterInfo::isVirtualRegister(Reg) && - "PhysReg not implemented"); + assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented"); const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF); // Some of the target independent instructions, like COPY, may not impose any @@ -130,9 +128,9 @@ bool llvm::constrainSelectedInstRegOperands(MachineInstr &I, LLVM_DEBUG(dbgs() << "Converting operand: " << MO << '\n'); assert(MO.isReg() && "Unsupported non-reg operand"); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // Physical registers don't need to be constrained. - if (TRI.isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) continue; // Register operands with a value of 0 (e.g. predicate operands) don't need @@ -170,9 +168,8 @@ bool llvm::isTriviallyDead(const MachineInstr &MI, if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg) || - !MRI.use_nodbg_empty(Reg)) + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg) || !MRI.use_nodbg_empty(Reg)) return false; } return true; @@ -219,11 +216,33 @@ Optional llvm::getConstantVRegVal(unsigned VReg, } Optional llvm::getConstantVRegValWithLookThrough( - unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) { + unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs, + bool HandleFConstant) { SmallVector, 4> SeenOpcodes; MachineInstr *MI; - while ((MI = MRI.getVRegDef(VReg)) && - MI->getOpcode() != TargetOpcode::G_CONSTANT && LookThroughInstrs) { + auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) { + return Opcode == TargetOpcode::G_CONSTANT || + (HandleFConstant && Opcode == TargetOpcode::G_FCONSTANT); + }; + auto GetImmediateValue = [HandleFConstant, + &MRI](const MachineInstr &MI) -> Optional { + const MachineOperand &CstVal = MI.getOperand(1); + if (!CstVal.isImm() && !CstVal.isCImm() && + (!HandleFConstant || !CstVal.isFPImm())) + return None; + if (!CstVal.isFPImm()) { + unsigned BitWidth = + MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm()) + : CstVal.getCImm()->getValue(); + assert(Val.getBitWidth() == BitWidth && + "Value bitwidth doesn't match definition type"); + return Val; + } + return CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); + }; + while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) && + LookThroughInstrs) { switch (MI->getOpcode()) { case TargetOpcode::G_TRUNC: case TargetOpcode::G_SEXT: @@ -235,7 +254,7 @@ Optional llvm::getConstantVRegValWithLookThrough( break; case TargetOpcode::COPY: VReg = MI->getOperand(1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(VReg)) + if (Register::isPhysicalRegister(VReg)) return None; break; case TargetOpcode::G_INTTOPTR: @@ -245,16 +264,13 @@ Optional llvm::getConstantVRegValWithLookThrough( return None; } } - if (!MI || MI->getOpcode() != TargetOpcode::G_CONSTANT || - (!MI->getOperand(1).isImm() && !MI->getOperand(1).isCImm())) + if (!MI || !IsConstantOpcode(MI->getOpcode())) return None; - const MachineOperand &CstVal = MI->getOperand(1); - unsigned BitWidth = MRI.getType(MI->getOperand(0).getReg()).getSizeInBits(); - APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm()) - : CstVal.getCImm()->getValue(); - assert(Val.getBitWidth() == BitWidth && - "Value bitwidth doesn't match definition type"); + Optional MaybeVal = GetImmediateValue(*MI); + if (!MaybeVal) + return None; + APInt &Val = *MaybeVal; while (!SeenOpcodes.empty()) { std::pair OpcodeAndSize = SeenOpcodes.pop_back_val(); switch (OpcodeAndSize.first) { @@ -291,7 +307,7 @@ llvm::MachineInstr *llvm::getDefIgnoringCopies(Register Reg, if (!DstTy.isValid()) return nullptr; while (DefMI->getOpcode() == TargetOpcode::COPY) { - unsigned SrcReg = DefMI->getOperand(1).getReg(); + Register SrcReg = DefMI->getOperand(1).getReg(); auto SrcTy = MRI.getType(SrcReg); if (!SrcTy.isValid() || SrcTy != DstTy) break; @@ -395,6 +411,40 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, return false; } +Optional llvm::ConstantFoldExtOp(unsigned Opcode, const unsigned Op1, + uint64_t Imm, + const MachineRegisterInfo &MRI) { + auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI); + if (MaybeOp1Cst) { + LLT Ty = MRI.getType(Op1); + APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true); + switch (Opcode) { + default: + break; + case TargetOpcode::G_SEXT_INREG: + return C1.trunc(Imm).sext(C1.getBitWidth()); + } + } + return None; +} + void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) { AU.addPreserved(); } + +MVT llvm::getMVTForLLT(LLT Ty) { + if (!Ty.isVector()) + return MVT::getIntegerVT(Ty.getSizeInBits()); + + return MVT::getVectorVT( + MVT::getIntegerVT(Ty.getElementType().getSizeInBits()), + Ty.getNumElements()); +} + +LLT llvm::getLLTForMVT(MVT Ty) { + if (!Ty.isVector()) + return LLT::scalar(Ty.getSizeInBits()); + + return LLT::vector(Ty.getVectorNumElements(), + Ty.getVectorElementType().getSizeInBits()); +} diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index 09201c2e7ba..d4fa45fcb40 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -456,14 +456,14 @@ bool GlobalMerge::doMerge(const SmallVectorImpl &Globals, bool HasExternal = false; StringRef FirstExternalName; - unsigned MaxAlign = 1; + Align MaxAlign; unsigned CurIdx = 0; for (j = i; j != -1; j = GlobalSet.find_next(j)) { Type *Ty = Globals[j]->getValueType(); // Make sure we use the same alignment AsmPrinter would use. - unsigned Align = DL.getPreferredAlignment(Globals[j]); - unsigned Padding = alignTo(MergedSize, Align) - MergedSize; + Align Alignment(DL.getPreferredAlignment(Globals[j])); + unsigned Padding = alignTo(MergedSize, Alignment) - MergedSize; MergedSize += Padding; MergedSize += DL.getTypeAllocSize(Ty); if (MergedSize > MaxOffset) { @@ -478,7 +478,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl &Globals, Inits.push_back(Globals[j]->getInitializer()); StructIdxs.push_back(CurIdx++); - MaxAlign = std::max(MaxAlign, Align); + MaxAlign = std::max(MaxAlign, Alignment); if (Globals[j]->hasExternalLinkage() && !HasExternal) { HasExternal = true; diff --git a/lib/CodeGen/HardwareLoops.cpp b/lib/CodeGen/HardwareLoops.cpp index 5f57cabbe86..6a0f98d2e2b 100644 --- a/lib/CodeGen/HardwareLoops.cpp +++ b/lib/CodeGen/HardwareLoops.cpp @@ -183,7 +183,7 @@ bool HardwareLoops::runOnFunction(Function &F) { TTI = &getAnalysis().getTTI(F); DL = &F.getParent()->getDataLayout(); auto *TLIP = getAnalysisIfAvailable(); - LibInfo = TLIP ? &TLIP->getTLI() : nullptr; + LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr; PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); AC = &getAnalysis().getAssumptionCache(F); M = F.getParent(); diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index b17a253fe23..d9caa566069 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -285,14 +285,113 @@ namespace { Prediction); } - bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, - unsigned TCycle, unsigned TExtra, - MachineBasicBlock &FBB, - unsigned FCycle, unsigned FExtra, - BranchProbability Prediction) const { - return TCycle > 0 && FCycle > 0 && - TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra, - Prediction); + bool MeetIfcvtSizeLimit(BBInfo &TBBInfo, BBInfo &FBBInfo, + MachineBasicBlock &CommBB, unsigned Dups, + BranchProbability Prediction, bool Forked) const { + const MachineFunction &MF = *TBBInfo.BB->getParent(); + if (MF.getFunction().hasMinSize()) { + MachineBasicBlock::iterator TIB = TBBInfo.BB->begin(); + MachineBasicBlock::iterator FIB = FBBInfo.BB->begin(); + MachineBasicBlock::iterator TIE = TBBInfo.BB->end(); + MachineBasicBlock::iterator FIE = FBBInfo.BB->end(); + + unsigned Dups1, Dups2; + if (!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2, + *TBBInfo.BB, *FBBInfo.BB, + /*SkipUnconditionalBranches*/ true)) + llvm_unreachable("should already have been checked by ValidDiamond"); + + unsigned BranchBytes = 0; + unsigned CommonBytes = 0; + + // Count common instructions at the start of the true and false blocks. + for (auto &I : make_range(TBBInfo.BB->begin(), TIB)) { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + for (auto &I : make_range(FBBInfo.BB->begin(), FIB)) { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + + // Count instructions at the end of the true and false blocks, after + // the ones we plan to predicate. Analyzable branches will be removed + // (unless this is a forked diamond), and all other instructions are + // common between the two blocks. + for (auto &I : make_range(TIE, TBBInfo.BB->end())) { + if (I.isBranch() && TBBInfo.IsBrAnalyzable && !Forked) { + LLVM_DEBUG(dbgs() << "Saving branch: " << I); + BranchBytes += TII->predictBranchSizeForIfCvt(I); + } else { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + } + for (auto &I : make_range(FIE, FBBInfo.BB->end())) { + if (I.isBranch() && FBBInfo.IsBrAnalyzable && !Forked) { + LLVM_DEBUG(dbgs() << "Saving branch: " << I); + BranchBytes += TII->predictBranchSizeForIfCvt(I); + } else { + LLVM_DEBUG(dbgs() << "Common inst: " << I); + CommonBytes += TII->getInstSizeInBytes(I); + } + } + for (auto &I : CommBB.terminators()) { + if (I.isBranch()) { + LLVM_DEBUG(dbgs() << "Saving branch: " << I); + BranchBytes += TII->predictBranchSizeForIfCvt(I); + } + } + + // The common instructions in one branch will be eliminated, halving + // their code size. + CommonBytes /= 2; + + // Count the instructions which we need to predicate. + unsigned NumPredicatedInstructions = 0; + for (auto &I : make_range(TIB, TIE)) { + if (!I.isDebugInstr()) { + LLVM_DEBUG(dbgs() << "Predicating: " << I); + NumPredicatedInstructions++; + } + } + for (auto &I : make_range(FIB, FIE)) { + if (!I.isDebugInstr()) { + LLVM_DEBUG(dbgs() << "Predicating: " << I); + NumPredicatedInstructions++; + } + } + + // Even though we're optimising for size at the expense of performance, + // avoid creating really long predicated blocks. + if (NumPredicatedInstructions > 15) + return false; + + // Some targets (e.g. Thumb2) need to insert extra instructions to + // start predicated blocks. + unsigned ExtraPredicateBytes = TII->extraSizeToPredicateInstructions( + MF, NumPredicatedInstructions); + + LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(BranchBytes=" << BranchBytes + << ", CommonBytes=" << CommonBytes + << ", NumPredicatedInstructions=" + << NumPredicatedInstructions + << ", ExtraPredicateBytes=" << ExtraPredicateBytes + << ")\n"); + return (BranchBytes + CommonBytes) > ExtraPredicateBytes; + } else { + unsigned TCycle = TBBInfo.NonPredSize + TBBInfo.ExtraCost - Dups; + unsigned FCycle = FBBInfo.NonPredSize + FBBInfo.ExtraCost - Dups; + bool Res = TCycle > 0 && FCycle > 0 && + TII->isProfitableToIfCvt( + *TBBInfo.BB, TCycle, TBBInfo.ExtraCost2, *FBBInfo.BB, + FCycle, FBBInfo.ExtraCost2, Prediction); + LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(TCycle=" << TCycle + << ", FCycle=" << FCycle + << ", TExtra=" << TBBInfo.ExtraCost2 << ", FExtra=" + << FBBInfo.ExtraCost2 << ") = " << Res << "\n"); + return Res; + } } /// Returns true if Block ends without a terminator. @@ -356,8 +455,10 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (!PreRegAlloc) { // Tail merge tend to expose more if-conversion opportunities. BranchFolder BF(true, false, MBFI, *MBPI); - BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo(), - getAnalysisIfAvailable()); + auto *MMIWP = getAnalysisIfAvailable(); + BFChange = BF.OptimizeFunction( + MF, TII, ST.getRegisterInfo(), + MMIWP ? &MMIWP->getMMI() : nullptr); } LLVM_DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" @@ -496,8 +597,10 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (MadeChange && IfCvtBranchFold) { BranchFolder BF(false, false, MBFI, *MBPI); - BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), - getAnalysisIfAvailable()); + auto *MMIWP = getAnalysisIfAvailable(); + BF.OptimizeFunction( + MF, TII, MF.getSubtarget().getRegisterInfo(), + MMIWP ? &MMIWP->getMMI() : nullptr); } MadeChange |= BFChange; @@ -569,6 +672,9 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, bool FalseBranch, unsigned &Dups, BranchProbability Prediction) const { Dups = 0; + if (TrueBBI.BB == FalseBBI.BB) + return false; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; @@ -835,6 +941,8 @@ bool IfConverter::ValidForkedDiamond( TrueBBICalc.BB = TrueBBI.BB; FalseBBICalc.BB = FalseBBI.BB; + TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable; + FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable; if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) return false; @@ -892,6 +1000,8 @@ bool IfConverter::ValidDiamond( TrueBBICalc.BB = TrueBBI.BB; FalseBBICalc.BB = FalseBBI.BB; + TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable; + FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable; if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc)) return false; // The size is used to decide whether to if-convert, and the shared portions @@ -912,6 +1022,12 @@ void IfConverter::AnalyzeBranches(BBInfo &BBI) { BBI.BrCond.clear(); BBI.IsBrAnalyzable = !TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); + if (!BBI.IsBrAnalyzable) { + BBI.TrueBB = nullptr; + BBI.FalseBB = nullptr; + BBI.BrCond.clear(); + } + SmallVector RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); BBI.IsBrReversible = (RevCond.size() == 0) || !TII->reverseBranchCondition(RevCond); @@ -1173,13 +1289,9 @@ void IfConverter::AnalyzeBlock( if (CanRevCond) { BBInfo TrueBBICalc, FalseBBICalc; - auto feasibleDiamond = [&]() { - bool MeetsSize = MeetIfcvtSizeLimit( - *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) + - TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2, - *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) + - FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2, - Prediction); + auto feasibleDiamond = [&](bool Forked) { + bool MeetsSize = MeetIfcvtSizeLimit(TrueBBICalc, FalseBBICalc, *BB, + Dups + Dups2, Prediction, Forked); bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond, /* IsTriangle */ false, /* RevCond */ false, /* hasCommonTail */ true); @@ -1191,7 +1303,7 @@ void IfConverter::AnalyzeBlock( if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2, TrueBBICalc, FalseBBICalc)) { - if (feasibleDiamond()) { + if (feasibleDiamond(false)) { // Diamond: // EBB // / \_ @@ -1200,14 +1312,14 @@ void IfConverter::AnalyzeBlock( // \ / // TailBB // Note TailBB can be empty. - Tokens.push_back(llvm::make_unique( + Tokens.push_back(std::make_unique( BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2, (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred)); Enqueued = true; } } else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2, TrueBBICalc, FalseBBICalc)) { - if (feasibleDiamond()) { + if (feasibleDiamond(true)) { // ForkedDiamond: // if TBB and FBB have a common tail that includes their conditional // branch instructions, then we can If Convert this pattern. @@ -1218,7 +1330,7 @@ void IfConverter::AnalyzeBlock( // / \ / \ // FalseBB TrueBB FalseBB // - Tokens.push_back(llvm::make_unique( + Tokens.push_back(std::make_unique( BBI, ICForkedDiamond, TNeedSub | FNeedSub, Dups, Dups2, (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred)); Enqueued = true; @@ -1238,7 +1350,7 @@ void IfConverter::AnalyzeBlock( // | / // FBB Tokens.push_back( - llvm::make_unique(BBI, ICTriangle, TNeedSub, Dups)); + std::make_unique(BBI, ICTriangle, TNeedSub, Dups)); Enqueued = true; } @@ -1247,7 +1359,7 @@ void IfConverter::AnalyzeBlock( TrueBBI.ExtraCost2, Prediction) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { Tokens.push_back( - llvm::make_unique(BBI, ICTriangleRev, TNeedSub, Dups)); + std::make_unique(BBI, ICTriangleRev, TNeedSub, Dups)); Enqueued = true; } @@ -1263,7 +1375,7 @@ void IfConverter::AnalyzeBlock( // | // FBB Tokens.push_back( - llvm::make_unique(BBI, ICSimple, TNeedSub, Dups)); + std::make_unique(BBI, ICSimple, TNeedSub, Dups)); Enqueued = true; } @@ -1275,7 +1387,7 @@ void IfConverter::AnalyzeBlock( FalseBBI.NonPredSize + FalseBBI.ExtraCost, FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond, true)) { - Tokens.push_back(llvm::make_unique(BBI, ICTriangleFalse, + Tokens.push_back(std::make_unique(BBI, ICTriangleFalse, FNeedSub, Dups)); Enqueued = true; } @@ -1287,7 +1399,7 @@ void IfConverter::AnalyzeBlock( FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { Tokens.push_back( - llvm::make_unique(BBI, ICTriangleFRev, FNeedSub, Dups)); + std::make_unique(BBI, ICTriangleFRev, FNeedSub, Dups)); Enqueued = true; } @@ -1297,7 +1409,7 @@ void IfConverter::AnalyzeBlock( FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond)) { Tokens.push_back( - llvm::make_unique(BBI, ICSimpleFalse, FNeedSub, Dups)); + std::make_unique(BBI, ICSimpleFalse, FNeedSub, Dups)); Enqueued = true; } } @@ -1730,6 +1842,11 @@ bool IfConverter::IfConvertDiamondCommon( ++i; } while (NumDups1 != 0) { + // Since this instruction is going to be deleted, update call + // site info state if the instruction is call instruction. + if (DI2->isCall(MachineInstr::IgnoreBundle)) + MBB2.getParent()->eraseCallSiteInfo(&*DI2); + ++DI2; if (DI2 == MBB2.end()) break; @@ -1758,14 +1875,27 @@ bool IfConverter::IfConvertDiamondCommon( if (!BBI1->IsBrAnalyzable) verifySameBranchInstructions(&MBB1, &MBB2); #endif - BBI1->NonPredSize -= TII->removeBranch(*BBI1->BB); - // Remove duplicated instructions. + // Remove duplicated instructions from the tail of MBB1: any branch + // instructions, and the common instructions counted by NumDups2. DI1 = MBB1.end(); + while (DI1 != MBB1.begin()) { + MachineBasicBlock::iterator Prev = std::prev(DI1); + if (!Prev->isBranch() && !Prev->isDebugInstr()) + break; + DI1 = Prev; + } for (unsigned i = 0; i != NumDups2; ) { // NumDups2 only counted non-dbg_value instructions, so this won't // run off the head of the list. assert(DI1 != MBB1.begin()); + --DI1; + + // Since this instruction is going to be deleted, update call + // site info state if the instruction is call instruction. + if (DI1->isCall(MachineInstr::IgnoreBundle)) + MBB1.getParent()->eraseCallSiteInfo(&*DI1); + // skip dbg_value instructions if (!DI1->isDebugInstr()) ++i; @@ -1815,7 +1945,7 @@ bool IfConverter::IfConvertDiamondCommon( for (const MachineOperand &MO : FI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isDef()) { @@ -1983,7 +2113,7 @@ static bool MaySpeculate(const MachineInstr &MI, for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isDef() && !LaterRedefs.count(Reg)) @@ -2050,6 +2180,10 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, break; MachineInstr *MI = MF.CloneMachineInstr(&I); + // Make a copy of the call site info. + if (MI->isCall(MachineInstr::IgnoreBundle)) + MF.copyCallSiteInfo(&I,MI); + ToBBI.BB->insert(ToBBI.BB->end(), MI); ToBBI.NonPredSize++; unsigned ExtraPredCost = TII->getPredicationCost(I); diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp index 1e82ea65961..b7dcaec9010 100644 --- a/lib/CodeGen/ImplicitNullChecks.cpp +++ b/lib/CodeGen/ImplicitNullChecks.cpp @@ -278,12 +278,12 @@ bool ImplicitNullChecks::canReorder(const MachineInstr *A, if (!(MOA.isReg() && MOA.getReg())) continue; - unsigned RegA = MOA.getReg(); + Register RegA = MOA.getReg(); for (auto MOB : B->operands()) { if (!(MOB.isReg() && MOB.getReg())) continue; - unsigned RegB = MOB.getReg(); + Register RegB = MOB.getReg(); if (TRI->regsOverlap(RegA, RegB) && (MOA.isDef() || MOB.isDef())) return false; @@ -517,7 +517,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // // we must ensure that there are no instructions between the 'test' and // conditional jump that modify %rax. - const unsigned PointerReg = MBP.LHS.getReg(); + const Register PointerReg = MBP.LHS.getReg(); assert(MBP.ConditionDef->getParent() == &MBB && "Should be in basic block"); @@ -689,7 +689,7 @@ void ImplicitNullChecks::rewriteNullChecks( for (const MachineOperand &MO : FaultingInstr->operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg || MBB->isLiveIn(Reg)) continue; MBB->addLiveIn(Reg); diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 41ae8061a91..2408f18678e 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "LiveRangeCalc.h" #include "Spiller.h" #include "SplitKit.h" #include "llvm/ADT/ArrayRef.h" @@ -26,6 +25,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -346,8 +346,7 @@ void InlineSpiller::collectRegsToSpill() { } bool InlineSpiller::isSibling(unsigned Reg) { - return TargetRegisterInfo::isVirtualRegister(Reg) && - VRM.getOriginal(Reg) == Original; + return Register::isVirtualRegister(Reg) && VRM.getOriginal(Reg) == Original; } /// It is beneficial to spill to earlier place in the same BB in case @@ -377,7 +376,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI, assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy"); #endif - unsigned SrcReg = CopyMI.getOperand(1).getReg(); + Register SrcReg = CopyMI.getOperand(1).getReg(); LiveInterval &SrcLI = LIS.getInterval(SrcReg); VNInfo *SrcVNI = SrcLI.getVNInfoAt(Idx); LiveQueryResult SrcQ = SrcLI.Query(Idx); @@ -845,9 +844,8 @@ foldMemoryOperand(ArrayRef> Ops, for (MIBundleOperands MO(*MI); MO.isValid(); ++MO) { if (!MO->isReg()) continue; - unsigned Reg = MO->getReg(); - if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || - MRI.isReserved(Reg)) { + Register Reg = MO->getReg(); + if (!Reg || Register::isVirtualRegister(Reg) || MRI.isReserved(Reg)) { continue; } // Skip non-Defs, including undef uses and internal reads. @@ -869,7 +867,7 @@ foldMemoryOperand(ArrayRef> Ops, --NumSpills; LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI); if (MI->isCall()) - MI->getMF()->updateCallSiteInfo(MI, FoldMI); + MI->getMF()->moveCallSiteInfo(MI, FoldMI); MI->eraseFromParent(); // Insert any new instructions other than FoldMI into the LIS maps. @@ -1111,8 +1109,8 @@ void InlineSpiller::spillAll() { void InlineSpiller::spill(LiveRangeEdit &edit) { ++NumSpilledRanges; Edit = &edit; - assert(!TargetRegisterInfo::isStackSlot(edit.getReg()) - && "Trying to spill a stack slot."); + assert(!Register::isStackSlot(edit.getReg()) && + "Trying to spill a stack slot."); // Share a stack slot among all descendants of Original. Original = VRM.getOriginal(edit.getReg()); StackSlot = VRM.getStackSlot(Original); @@ -1147,7 +1145,7 @@ void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot, // save a copy of LiveInterval in StackSlotToOrigLI because the original // LiveInterval may be cleared after all its references are spilled. if (StackSlotToOrigLI.find(StackSlot) == StackSlotToOrigLI.end()) { - auto LI = llvm::make_unique(OrigLI.reg, OrigLI.weight); + auto LI = std::make_unique(OrigLI.reg, OrigLI.weight); LI->assign(OrigLI, Allocator); StackSlotToOrigLI[StackSlot] = std::move(LI); } @@ -1459,7 +1457,7 @@ void HoistSpillHelper::hoistAllSpills() { LiveRangeEdit Edit(nullptr, NewVRegs, MF, LIS, &VRM, this); for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); unsigned Original = VRM.getPreSplitReg(Reg); if (!MRI.def_empty(Reg)) Virt2SiblingsMap[Original].insert(Reg); diff --git a/lib/CodeGen/InterleavedLoadCombinePass.cpp b/lib/CodeGen/InterleavedLoadCombinePass.cpp index 9525da849e2..770c4952d16 100644 --- a/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -940,8 +940,8 @@ public: /// \param V input value /// \param Result result polynomial static void computePolynomial(Value &V, Polynomial &Result) { - if (isa(&V)) - computePolynomialBinOp(*dyn_cast(&V), Result); + if (auto *BO = dyn_cast(&V)) + computePolynomialBinOp(*BO, Result); else Result = Polynomial(&V); } diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 886ae7e94ad..1c362aec6e6 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -96,14 +96,15 @@ LLVMTargetMachine::getTargetTransformInfo(const Function &F) { /// addPassesToX helper drives creation and initialization of TargetPassConfig. static TargetPassConfig * addPassesToGenerateCode(LLVMTargetMachine &TM, PassManagerBase &PM, - bool DisableVerify, MachineModuleInfo &MMI) { + bool DisableVerify, + MachineModuleInfoWrapperPass &MMIWP) { // Targets may override createPassConfig to provide a target-specific // subclass. TargetPassConfig *PassConfig = TM.createPassConfig(PM); // Set PassConfig options provided by TargetMachine. PassConfig->setDisableVerify(DisableVerify); PM.add(PassConfig); - PM.add(&MMI); + PM.add(&MMIWP); if (PassConfig->addISelPasses()) return nullptr; @@ -139,7 +140,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, std::unique_ptr MAB( getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions)); - auto FOut = llvm::make_unique(Out); + auto FOut = std::make_unique(Out); MCStreamer *S = getTarget().createAsmStreamer( Context, std::move(FOut), Options.MCOptions.AsmVerbose, Options.MCOptions.MCUseDwarfDirectory, InstPrinter, std::move(MCE), @@ -186,17 +187,15 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, return false; } -bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, - raw_pwrite_stream &Out, - raw_pwrite_stream *DwoOut, - CodeGenFileType FileType, - bool DisableVerify, - MachineModuleInfo *MMI) { +bool LLVMTargetMachine::addPassesToEmitFile( + PassManagerBase &PM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, + CodeGenFileType FileType, bool DisableVerify, + MachineModuleInfoWrapperPass *MMIWP) { // Add common CodeGen passes. - if (!MMI) - MMI = new MachineModuleInfo(this); + if (!MMIWP) + MMIWP = new MachineModuleInfoWrapperPass(this); TargetPassConfig *PassConfig = - addPassesToGenerateCode(*this, PM, DisableVerify, *MMI); + addPassesToGenerateCode(*this, PM, DisableVerify, *MMIWP); if (!PassConfig) return true; @@ -206,12 +205,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, // testing to be meaningful, we need to ensure that the symbols created // are MCSymbolXCOFF variants, which requires that // the TargetLoweringObjectFile instance has been initialized. - MCContext &Ctx = MMI->getContext(); + MCContext &Ctx = MMIWP->getMMI().getContext(); const_cast(*this->getObjFileLowering()) .Initialize(Ctx, *this); } PM.add(createPrintMIRPass(Out)); - } else if (addAsmPrinter(PM, Out, DwoOut, FileType, MMI->getContext())) + } else if (addAsmPrinter(PM, Out, DwoOut, FileType, + MMIWP->getMMI().getContext())) return true; PM.add(createFreeMachineFunctionPass()); @@ -227,15 +227,15 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, raw_pwrite_stream &Out, bool DisableVerify) { // Add common CodeGen passes. - MachineModuleInfo *MMI = new MachineModuleInfo(this); + MachineModuleInfoWrapperPass *MMIWP = new MachineModuleInfoWrapperPass(this); TargetPassConfig *PassConfig = - addPassesToGenerateCode(*this, PM, DisableVerify, *MMI); + addPassesToGenerateCode(*this, PM, DisableVerify, *MMIWP); if (!PassConfig) return true; assert(TargetPassConfig::willCompleteCodeGenPipeline() && "Cannot emit MC with limited codegen pipeline"); - Ctx = &MMI->getContext(); + Ctx = &MMIWP->getMMI().getContext(); if (Options.MCOptions.MCSaveTempLabels) Ctx->setAllowTemporaryLabels(false); diff --git a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp index 200ac0ba15b..cef5085ae07 100644 --- a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp +++ b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp @@ -73,18 +73,18 @@ LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const { if (!MDT) { LLVM_DEBUG(dbgs() << "Building DominatorTree on the fly\n"); - OwnedMDT = make_unique(); + OwnedMDT = std::make_unique(); OwnedMDT->getBase().recalculate(*MF); MDT = OwnedMDT.get(); } // Generate LoopInfo from it. - OwnedMLI = make_unique(); + OwnedMLI = std::make_unique(); OwnedMLI->getBase().analyze(MDT->getBase()); MLI = OwnedMLI.get(); } - OwnedMBFI = make_unique(); + OwnedMBFI = std::make_unique(); OwnedMBFI->calculate(*MF, MBPI, *MLI); return *OwnedMBFI.get(); } diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index 503821537ed..ac3ef0e709f 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp index a669e64692b..f1b237d83e8 100644 --- a/lib/CodeGen/LiveDebugValues.cpp +++ b/lib/CodeGen/LiveDebugValues.cpp @@ -7,14 +7,23 @@ //===----------------------------------------------------------------------===// /// /// This pass implements a data flow analysis that propagates debug location -/// information by inserting additional DBG_VALUE instructions into the machine -/// instruction stream. The pass internally builds debug location liveness -/// ranges to determine the points where additional DBG_VALUEs need to be -/// inserted. +/// information by inserting additional DBG_VALUE insts into the machine +/// instruction stream. Before running, each DBG_VALUE inst corresponds to a +/// source assignment of a variable. Afterwards, a DBG_VALUE inst specifies a +/// variable location for the current basic block (see SourceLevelDebugging.rst). /// /// This is a separate pass from DbgValueHistoryCalculator to facilitate /// testing and improve modularity. /// +/// Each variable location is represented by a VarLoc object that identifies the +/// source variable, its current machine-location, and the DBG_VALUE inst that +/// specifies the location. Each VarLoc is indexed in the (function-scope) +/// VarLocMap, giving each VarLoc a unique index. Rather than operate directly +/// on machine locations, the dataflow analysis in this pass identifies +/// locations by their index in the VarLocMap, meaning all the variable +/// locations in a block can be described by a sparse vector of VarLocMap +/// indexes. +/// //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" @@ -68,6 +77,7 @@ using namespace llvm; #define DEBUG_TYPE "livedebugvalues" STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted"); +STATISTIC(NumRemoved, "Number of DBG_VALUE instructions removed"); // If @MI is a DBG_VALUE with debug value described by a defined // register, returns the number of this register. In the other case, returns 0. @@ -179,8 +189,16 @@ private: } }; + /// Identity of the variable at this location. const DebugVariable Var; - const MachineInstr &MI; ///< Only used for cloning a new DBG_VALUE. + + /// The expression applied to this location. + const DIExpression *Expr; + + /// DBG_VALUE to clone var/expr information from if this location + /// is moved. + const MachineInstr &MI; + mutable UserValueScopes UVS; enum VarLocKind { InvalidKind = 0, @@ -201,9 +219,9 @@ private: const ConstantInt *CImm; } Loc; - VarLoc(const MachineInstr &MI, LexicalScopes &LS, - VarLocKind K = InvalidKind) - : Var(MI), MI(MI), UVS(MI.getDebugLoc(), LS){ + VarLoc(const MachineInstr &MI, LexicalScopes &LS) + : Var(MI), Expr(MI.getDebugExpression()), MI(MI), + UVS(MI.getDebugLoc(), LS) { static_assert((sizeof(Loc) == sizeof(uint64_t)), "hash does not cover all members of Loc"); assert(MI.isDebugValue() && "not a DBG_VALUE"); @@ -225,17 +243,78 @@ private: "entry values must be register locations"); } - /// The constructor for spill locations. - VarLoc(const MachineInstr &MI, unsigned SpillBase, int SpillOffset, - LexicalScopes &LS) - : Var(MI), MI(MI), UVS(MI.getDebugLoc(), LS) { - assert(MI.isDebugValue() && "not a DBG_VALUE"); - assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE"); - Kind = SpillLocKind; - Loc.SpillLocation = {SpillBase, SpillOffset}; + /// Take the variable and machine-location in DBG_VALUE MI, and build an + /// entry location using the given expression. + static VarLoc CreateEntryLoc(const MachineInstr &MI, LexicalScopes &LS, + const DIExpression *EntryExpr) { + VarLoc VL(MI, LS); + VL.Kind = EntryValueKind; + VL.Expr = EntryExpr; + return VL; } - // Is the Loc field a constant or constant object? + /// Copy the register location in DBG_VALUE MI, updating the register to + /// be NewReg. + static VarLoc CreateCopyLoc(const MachineInstr &MI, LexicalScopes &LS, + unsigned NewReg) { + VarLoc VL(MI, LS); + assert(VL.Kind == RegisterKind); + VL.Loc.RegNo = NewReg; + return VL; + } + + /// Take the variable described by DBG_VALUE MI, and create a VarLoc + /// locating it in the specified spill location. + static VarLoc CreateSpillLoc(const MachineInstr &MI, unsigned SpillBase, + int SpillOffset, LexicalScopes &LS) { + VarLoc VL(MI, LS); + assert(VL.Kind == RegisterKind); + VL.Kind = SpillLocKind; + VL.Loc.SpillLocation = {SpillBase, SpillOffset}; + return VL; + } + + /// Create a DBG_VALUE representing this VarLoc in the given function. + /// Copies variable-specific information such as DILocalVariable and + /// inlining information from the original DBG_VALUE instruction, which may + /// have been several transfers ago. + MachineInstr *BuildDbgValue(MachineFunction &MF) const { + const DebugLoc &DbgLoc = MI.getDebugLoc(); + bool Indirect = MI.isIndirectDebugValue(); + const auto &IID = MI.getDesc(); + const DILocalVariable *Var = MI.getDebugVariable(); + const DIExpression *DIExpr = MI.getDebugExpression(); + + switch (Kind) { + case EntryValueKind: + // An entry value is a register location -- but with an updated + // expression. + return BuildMI(MF, DbgLoc, IID, Indirect, Loc.RegNo, Var, Expr); + case RegisterKind: + // Register locations are like the source DBG_VALUE, but with the + // register number from this VarLoc. + return BuildMI(MF, DbgLoc, IID, Indirect, Loc.RegNo, Var, DIExpr); + case SpillLocKind: { + // Spills are indirect DBG_VALUEs, with a base register and offset. + // Use the original DBG_VALUEs expression to build the spilt location + // on top of. FIXME: spill locations created before this pass runs + // are not recognized, and not handled here. + auto *SpillExpr = DIExpression::prepend( + DIExpr, DIExpression::ApplyOffset, Loc.SpillLocation.SpillOffset); + unsigned Base = Loc.SpillLocation.SpillBase; + return BuildMI(MF, DbgLoc, IID, true, Base, Var, SpillExpr); + } + case ImmediateKind: { + MachineOperand MO = MI.getOperand(0); + return BuildMI(MF, DbgLoc, IID, Indirect, MO, Var, DIExpr); + } + case InvalidKind: + llvm_unreachable("Tried to produce DBG_VALUE for invalid VarLoc"); + } + llvm_unreachable("Unrecognized LiveDebugValues.VarLoc.Kind enum"); + } + + /// Is the Loc field a constant or constant object? bool isConstant() const { return Kind == ImmediateKind; } /// If this variable is described by a register, return it, @@ -251,18 +330,42 @@ private: bool dominates(MachineBasicBlock &MBB) const { return UVS.dominates(&MBB); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - LLVM_DUMP_METHOD void dump() const { MI.dump(); } + // TRI can be null. + void dump(const TargetRegisterInfo *TRI, raw_ostream &Out = dbgs()) const { + dbgs() << "VarLoc("; + switch (Kind) { + case RegisterKind: + case EntryValueKind: + dbgs() << printReg(Loc.RegNo, TRI); + break; + case SpillLocKind: + dbgs() << printReg(Loc.SpillLocation.SpillBase, TRI); + dbgs() << "[" << Loc.SpillLocation.SpillOffset << "]"; + break; + case ImmediateKind: + dbgs() << Loc.Immediate; + break; + case InvalidKind: + llvm_unreachable("Invalid VarLoc in dump method"); + } + + dbgs() << ", \"" << Var.getVar()->getName() << "\", " << *Expr << ", "; + if (Var.getInlinedAt()) + dbgs() << "!" << Var.getInlinedAt()->getMetadataID() << ")\n"; + else + dbgs() << "(null))\n"; + } #endif bool operator==(const VarLoc &Other) const { return Kind == Other.Kind && Var == Other.Var && - Loc.Hash == Other.Loc.Hash; + Loc.Hash == Other.Loc.Hash && Expr == Other.Expr; } /// This operator guarantees that VarLocs are sorted by Variable first. bool operator<(const VarLoc &Other) const { - return std::tie(Var, Kind, Loc.Hash) < - std::tie(Other.Var, Other.Kind, Other.Loc.Hash); + return std::tie(Var, Kind, Loc.Hash, Expr) < + std::tie(Other.Var, Other.Kind, Other.Loc.Hash, Other.Expr); } }; @@ -271,8 +374,8 @@ private: using VarLocSet = SparseBitVector<>; using VarLocInMBB = SmallDenseMap; struct TransferDebugPair { - MachineInstr *TransferInst; - MachineInstr *DebugInst; + MachineInstr *TransferInst; /// Instruction where this transfer occurs. + unsigned LocationID; /// Location number for the transfer dest. }; using TransferMap = SmallVector; @@ -320,6 +423,14 @@ private: Vars.insert({Var, VarLocID}); } + /// Insert a set of ranges. + void insertFromLocSet(const VarLocSet &ToLoad, const VarLocMap &Map) { + for (unsigned Id : ToLoad) { + const VarLoc &Var = Map[Id]; + insert(Id, Var.Var); + } + } + /// Empty the set. void clear() { VarLocs.clear(); @@ -333,8 +444,18 @@ private: } }; - bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF, - unsigned &Reg); + /// Tests whether this instruction is a spill to a stack location. + bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF); + + /// Decide if @MI is a spill instruction and return true if it is. We use 2 + /// criteria to make this decision: + /// - Is this instruction a store to a spill slot? + /// - Is there a register operand that is both used and killed? + /// TODO: Store optimization can fold spills into other stores (including + /// other spills). We do not handle this yet (more than one memory operand). + bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF, + unsigned &Reg); + /// If a given instruction is identified as a spill, return the spill location /// and set \p Reg to the spilled register. Optional isRestoreInstruction(const MachineInstr &MI, @@ -361,13 +482,13 @@ private: void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers, DebugParamMap &DebugEntryVals); - bool transferTerminatorInst(MachineInstr &MI, OpenRangesSet &OpenRanges, - VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs); + bool transferTerminator(MachineBasicBlock *MBB, OpenRangesSet &OpenRanges, + VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs); - bool process(MachineInstr &MI, OpenRangesSet &OpenRanges, + void process(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, TransferMap &Transfers, DebugParamMap &DebugEntryVals, - bool transferChanges, OverlapMap &OverlapFragments, + OverlapMap &OverlapFragments, VarToFragments &SeenFragments); void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments, @@ -376,7 +497,12 @@ private: bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs, const VarLocMap &VarLocIDs, SmallPtrSet &Visited, - SmallPtrSetImpl &ArtificialBlocks); + SmallPtrSetImpl &ArtificialBlocks, + VarLocInMBB &PendingInLocs); + + /// Create DBG_VALUE insts for inlocs that have been propagated but + /// had their instruction creation deferred. + void flushPendingLocs(VarLocInMBB &PendingInLocs, VarLocMap &VarLocIDs); bool ExtendRanges(MachineFunction &MF); @@ -518,7 +644,7 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF, const VarLoc &VL = VarLocIDs[VLL]; Out << " Var: " << VL.Var.getVar()->getName(); Out << " MI: "; - VL.dump(); + VL.dump(TRI, Out); } } Out << "\n"; @@ -567,11 +693,7 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI, ID = VarLocIDs.insert(VL); OpenRanges.insert(ID, VL.Var); } else if (MI.hasOneMemOperand()) { - // It's a stack spill -- fetch spill base and offset. - VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI); - VarLoc VL(MI, SpillLocation.SpillBase, SpillLocation.SpillOffset, LS); - ID = VarLocIDs.insert(VL); - OpenRanges.insert(ID, VL.Var); + llvm_unreachable("DBG_VALUE with mem operand encountered after regalloc?"); } else { // This must be an undefined location. We should leave OpenRanges closed. assert(MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == 0 && @@ -585,7 +707,6 @@ void LiveDebugValues::emitEntryValues(MachineInstr &MI, TransferMap &Transfers, DebugParamMap &DebugEntryVals, SparseBitVector<> &KillSet) { - MachineFunction *MF = MI.getParent()->getParent(); for (unsigned ID : KillSet) { if (!VarLocIDs[ID].Var.getVar()->isParameter()) continue; @@ -600,20 +721,12 @@ void LiveDebugValues::emitEntryValues(MachineInstr &MI, auto ParamDebugInstr = DebugEntryVals[CurrDebugInstr->getDebugVariable()]; DIExpression *NewExpr = DIExpression::prepend( ParamDebugInstr->getDebugExpression(), DIExpression::EntryValue); - MachineInstr *EntryValDbgMI = - BuildMI(*MF, ParamDebugInstr->getDebugLoc(), ParamDebugInstr->getDesc(), - ParamDebugInstr->isIndirectDebugValue(), - ParamDebugInstr->getOperand(0).getReg(), - ParamDebugInstr->getDebugVariable(), NewExpr); - if (ParamDebugInstr->isIndirectDebugValue()) - EntryValDbgMI->getOperand(1).setImm( - ParamDebugInstr->getOperand(1).getImm()); + VarLoc EntryLoc = VarLoc::CreateEntryLoc(*ParamDebugInstr, LS, NewExpr); - Transfers.push_back({&MI, EntryValDbgMI}); - VarLoc VL(*EntryValDbgMI, LS); - unsigned EntryValLocID = VarLocIDs.insert(VL); - OpenRanges.insert(EntryValLocID, VL.Var); + unsigned EntryValLocID = VarLocIDs.insert(EntryLoc); + Transfers.push_back({&MI, EntryValLocID}); + OpenRanges.insert(EntryValLocID, EntryLoc.Var); } } @@ -627,21 +740,19 @@ void LiveDebugValues::insertTransferDebugPair( VarLocMap &VarLocIDs, unsigned OldVarID, TransferKind Kind, unsigned NewReg) { const MachineInstr *DebugInstr = &VarLocIDs[OldVarID].MI; - MachineFunction *MF = MI.getParent()->getParent(); - MachineInstr *NewDebugInstr; auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &DebugInstr, - &VarLocIDs](VarLoc &VL, MachineInstr *NewDebugInstr) { + &VarLocIDs](VarLoc &VL) { unsigned LocId = VarLocIDs.insert(VL); // Close this variable's previous location range. DebugVariable V(*DebugInstr); OpenRanges.erase(V); + // Record the new location as an open range, and a postponed transfer + // inserting a DBG_VALUE for this location. OpenRanges.insert(LocId, VL.Var); - // The newly created DBG_VALUE instruction NewDebugInstr must be inserted - // after MI. Keep track of the pairing. - TransferDebugPair MIP = {&MI, NewDebugInstr}; + TransferDebugPair MIP = {&MI, LocId}; Transfers.push_back(MIP); }; @@ -653,37 +764,25 @@ void LiveDebugValues::insertTransferDebugPair( "No register supplied when handling a copy of a debug value"); // Create a DBG_VALUE instruction to describe the Var in its new // register location. - NewDebugInstr = BuildMI( - *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), - DebugInstr->isIndirectDebugValue(), NewReg, - DebugInstr->getDebugVariable(), DebugInstr->getDebugExpression()); - if (DebugInstr->isIndirectDebugValue()) - NewDebugInstr->getOperand(1).setImm(DebugInstr->getOperand(1).getImm()); - VarLoc VL(*NewDebugInstr, LS); - ProcessVarLoc(VL, NewDebugInstr); - LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register copy: "; - NewDebugInstr->print(dbgs(), /*IsStandalone*/false, - /*SkipOpers*/false, /*SkipDebugLoc*/false, - /*AddNewLine*/true, TII)); + VarLoc VL = VarLoc::CreateCopyLoc(*DebugInstr, LS, NewReg); + ProcessVarLoc(VL); + LLVM_DEBUG({ + dbgs() << "Creating VarLoc for register copy:"; + VL.dump(TRI); + }); return; } case TransferKind::TransferSpill: { // Create a DBG_VALUE instruction to describe the Var in its spilled // location. VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI); - auto *SpillExpr = DIExpression::prepend(DebugInstr->getDebugExpression(), - DIExpression::ApplyOffset, - SpillLocation.SpillOffset); - NewDebugInstr = BuildMI( - *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), true, - SpillLocation.SpillBase, DebugInstr->getDebugVariable(), SpillExpr); - VarLoc VL(*NewDebugInstr, SpillLocation.SpillBase, - SpillLocation.SpillOffset, LS); - ProcessVarLoc(VL, NewDebugInstr); - LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: "; - NewDebugInstr->print(dbgs(), /*IsStandalone*/false, - /*SkipOpers*/false, /*SkipDebugLoc*/false, - /*AddNewLine*/true, TII)); + VarLoc VL = VarLoc::CreateSpillLoc(*DebugInstr, SpillLocation.SpillBase, + SpillLocation.SpillOffset, LS); + ProcessVarLoc(VL); + LLVM_DEBUG({ + dbgs() << "Creating VarLoc for spill:"; + VL.dump(TRI); + }); return; } case TransferKind::TransferRestore: { @@ -691,15 +790,14 @@ void LiveDebugValues::insertTransferDebugPair( "No register supplied when handling a restore of a debug value"); MachineFunction *MF = MI.getMF(); DIBuilder DIB(*const_cast(MF->getFunction()).getParent()); - NewDebugInstr = - BuildMI(*MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), false, - NewReg, DebugInstr->getDebugVariable(), DIB.createExpression()); - VarLoc VL(*NewDebugInstr, LS); - ProcessVarLoc(VL, NewDebugInstr); - LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register restore: "; - NewDebugInstr->print(dbgs(), /*IsStandalone*/false, - /*SkipOpers*/false, /*SkipDebugLoc*/false, - /*AddNewLine*/true, TII)); + // DebugInstr refers to the pre-spill location, therefore we can reuse + // its expression. + VarLoc VL = VarLoc::CreateCopyLoc(*DebugInstr, LS, NewReg); + ProcessVarLoc(VL); + LLVM_DEBUG({ + dbgs() << "Creating VarLoc for restore:"; + VL.dump(TRI); + }); return; } } @@ -719,7 +817,7 @@ void LiveDebugValues::transferRegisterDef( // instructions never clobber SP, because some backends (e.g., AArch64) // never list SP in the regmask. if (MO.isReg() && MO.isDef() && MO.getReg() && - TRI->isPhysicalRegister(MO.getReg()) && + Register::isPhysicalRegister(MO.getReg()) && !(MI.isCall() && MO.getReg() == SP)) { // Remove ranges of all aliased registers. for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) @@ -748,16 +846,8 @@ void LiveDebugValues::transferRegisterDef( } } -/// Decide if @MI is a spill instruction and return true if it is. We use 2 -/// criteria to make this decision: -/// - Is this instruction a store to a spill slot? -/// - Is there a register operand that is both used and killed? -/// TODO: Store optimization can fold spills into other stores (including -/// other spills). We do not handle this yet (more than one memory operand). bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI, - MachineFunction *MF, unsigned &Reg) { - SmallVector Accesses; - + MachineFunction *MF) { // TODO: Handle multiple stores folded into one. if (!MI.hasOneMemOperand()) return false; @@ -766,6 +856,14 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI, return false; // This is not a spill instruction, since no valid size was // returned from either function. + return true; +} + +bool LiveDebugValues::isLocationSpill(const MachineInstr &MI, + MachineFunction *MF, unsigned &Reg) { + if (!isSpillInstruction(MI, MF)) + return false; + auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) { if (!MO.isReg() || !MO.isUse()) { Reg = 0; @@ -834,7 +932,37 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI, LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump();); - if (isSpillInstruction(MI, MF, Reg)) { + // First, if there are any DBG_VALUEs pointing at a spill slot that is + // written to, then close the variable location. The value in memory + // will have changed. + VarLocSet KillSet; + if (isSpillInstruction(MI, MF)) { + Loc = extractSpillBaseRegAndOffset(MI); + for (unsigned ID : OpenRanges.getVarLocs()) { + const VarLoc &VL = VarLocIDs[ID]; + if (VL.Kind == VarLoc::SpillLocKind && VL.Loc.SpillLocation == *Loc) { + // This location is overwritten by the current instruction -- terminate + // the open range, and insert an explicit DBG_VALUE $noreg. + // + // Doing this at a later stage would require re-interpreting all + // DBG_VALUes and DIExpressions to identify whether they point at + // memory, and then analysing all memory writes to see if they + // overwrite that memory, which is expensive. + // + // At this stage, we already know which DBG_VALUEs are for spills and + // where they are located; it's best to fix handle overwrites now. + KillSet.set(ID); + VarLoc UndefVL = VarLoc::CreateCopyLoc(VL.MI, LS, 0); + unsigned UndefLocID = VarLocIDs.insert(UndefVL); + Transfers.push_back({&MI, UndefLocID}); + } + } + OpenRanges.erase(KillSet, VarLocIDs); + } + + // Try to recognise spill and restore instructions that may create a new + // variable location. + if (isLocationSpill(MI, MF, Reg)) { TKind = TransferKind::TransferSpill; LLVM_DEBUG(dbgs() << "Recognized as spill: "; MI.dump();); LLVM_DEBUG(dbgs() << "Register: " << Reg << " " << printReg(Reg, TRI) @@ -854,6 +982,7 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI, LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '(' << VarLocIDs[ID].Var.getVar()->getName() << ")\n"); } else if (TKind == TransferKind::TransferRestore && + VarLocIDs[ID].Kind == VarLoc::SpillLocKind && VarLocIDs[ID].Loc.SpillLocation == *Loc) { LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '(' << VarLocIDs[ID].Var.getVar()->getName() << ")\n"); @@ -885,8 +1014,8 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI, return false; }; - unsigned SrcReg = SrcRegOp->getReg(); - unsigned DestReg = DestRegOp->getReg(); + Register SrcReg = SrcRegOp->getReg(); + Register DestReg = DestRegOp->getReg(); // We want to recognize instructions where destination register is callee // saved register. If register that could be clobbered by the call is @@ -906,26 +1035,20 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI, } /// Terminate all open ranges at the end of the current basic block. -bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI, - OpenRangesSet &OpenRanges, - VarLocInMBB &OutLocs, - const VarLocMap &VarLocIDs) { +bool LiveDebugValues::transferTerminator(MachineBasicBlock *CurMBB, + OpenRangesSet &OpenRanges, + VarLocInMBB &OutLocs, + const VarLocMap &VarLocIDs) { bool Changed = false; - const MachineBasicBlock *CurMBB = MI.getParent(); - if (!(MI.isTerminator() || (&MI == &CurMBB->back()))) - return false; - - if (OpenRanges.empty()) - return false; LLVM_DEBUG(for (unsigned ID : OpenRanges.getVarLocs()) { // Copy OpenRanges to OutLocs, if not already present. dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ": "; - VarLocIDs[ID].dump(); + VarLocIDs[ID].dump(TRI); }); VarLocSet &VLS = OutLocs[CurMBB]; - Changed = VLS |= OpenRanges.getVarLocs(); + Changed = VLS != OpenRanges.getVarLocs(); // New OutLocs set may be different due to spill, restore or register // copy instruction processing. if (Changed) @@ -995,26 +1118,17 @@ void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI, } /// This routine creates OpenRanges and OutLocs. -bool LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges, +void LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, - TransferMap &Transfers, DebugParamMap &DebugEntryVals, - bool transferChanges, + TransferMap &Transfers, + DebugParamMap &DebugEntryVals, OverlapMap &OverlapFragments, VarToFragments &SeenFragments) { - bool Changed = false; transferDebugValue(MI, OpenRanges, VarLocIDs); transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers, DebugEntryVals); - if (transferChanges) { - transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers); - transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers); - } else { - // Build up a map of overlapping fragments on the first run through. - if (MI.isDebugValue()) - accumulateFragmentMap(MI, SeenFragments, OverlapFragments); - } - Changed = transferTerminatorInst(MI, OpenRanges, OutLocs, VarLocIDs); - return Changed; + transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers); + transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers); } /// This routine joins the analysis results of all incoming edges in @MBB by @@ -1024,7 +1138,8 @@ bool LiveDebugValues::join( MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs, const VarLocMap &VarLocIDs, SmallPtrSet &Visited, - SmallPtrSetImpl &ArtificialBlocks) { + SmallPtrSetImpl &ArtificialBlocks, + VarLocInMBB &PendingInLocs) { LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n"); bool Changed = false; @@ -1034,9 +1149,11 @@ bool LiveDebugValues::join( // can be joined. int NumVisited = 0; for (auto p : MBB.predecessors()) { - // Ignore unvisited predecessor blocks. As we are processing - // the blocks in reverse post-order any unvisited block can - // be considered to not remove any incoming values. + // Ignore backedges if we have not visited the predecessor yet. As the + // predecessor hasn't yet had locations propagated into it, most locations + // will not yet be valid, so treat them as all being uninitialized and + // potentially valid. If a location guessed to be correct here is + // invalidated later, we will remove it when we revisit this block. if (!Visited.count(p)) { LLVM_DEBUG(dbgs() << " ignoring unvisited pred MBB: " << p->getNumber() << "\n"); @@ -1086,44 +1203,59 @@ bool LiveDebugValues::join( // is the entry block which has no predecessor. assert((NumVisited || MBB.pred_empty()) && "Should have processed at least one predecessor"); - if (InLocsT.empty()) - return false; VarLocSet &ILS = InLocs[&MBB]; + VarLocSet &Pending = PendingInLocs[&MBB]; - // Insert DBG_VALUE instructions, if not already inserted. + // New locations will have DBG_VALUE insts inserted at the start of the + // block, after location propagation has finished. Record the insertions + // that we need to perform in the Pending set. VarLocSet Diff = InLocsT; Diff.intersectWithComplement(ILS); for (auto ID : Diff) { - // This VarLoc is not found in InLocs i.e. it is not yet inserted. So, a - // new range is started for the var from the mbb's beginning by inserting - // a new DBG_VALUE. process() will end this range however appropriate. - const VarLoc &DiffIt = VarLocIDs[ID]; - const MachineInstr *DebugInstr = &DiffIt.MI; - MachineInstr *MI = nullptr; - if (DiffIt.isConstant()) { - MachineOperand MO(DebugInstr->getOperand(0)); - MI = BuildMI(MBB, MBB.instr_begin(), DebugInstr->getDebugLoc(), - DebugInstr->getDesc(), false, MO, - DebugInstr->getDebugVariable(), - DebugInstr->getDebugExpression()); - } else { - MI = BuildMI(MBB, MBB.instr_begin(), DebugInstr->getDebugLoc(), - DebugInstr->getDesc(), DebugInstr->isIndirectDebugValue(), - DebugInstr->getOperand(0).getReg(), - DebugInstr->getDebugVariable(), - DebugInstr->getDebugExpression()); - if (DebugInstr->isIndirectDebugValue()) - MI->getOperand(1).setImm(DebugInstr->getOperand(1).getImm()); - } - LLVM_DEBUG(dbgs() << "Inserted: "; MI->dump();); + Pending.set(ID); ILS.set(ID); ++NumInserted; Changed = true; } + + // We may have lost locations by learning about a predecessor that either + // loses or moves a variable. Find any locations in ILS that are not in the + // new in-locations, and delete those. + VarLocSet Removed = ILS; + Removed.intersectWithComplement(InLocsT); + for (auto ID : Removed) { + Pending.reset(ID); + ILS.reset(ID); + ++NumRemoved; + Changed = true; + } + return Changed; } +void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs, + VarLocMap &VarLocIDs) { + // PendingInLocs records all locations propagated into blocks, which have + // not had DBG_VALUE insts created. Go through and create those insts now. + for (auto &Iter : PendingInLocs) { + // Map is keyed on a constant pointer, unwrap it so we can insert insts. + auto &MBB = const_cast(*Iter.first); + VarLocSet &Pending = Iter.second; + + for (unsigned ID : Pending) { + // The ID location is live-in to MBB -- work out what kind of machine + // location it is and create a DBG_VALUE. + const VarLoc &DiffIt = VarLocIDs[ID]; + MachineInstr *MI = DiffIt.BuildDbgValue(*MBB.getParent()); + MBB.insert(MBB.instr_begin(), MI); + + (void)MI; + LLVM_DEBUG(dbgs() << "Inserted: "; MI->dump();); + } + } +} + /// Calculate the liveness information for the given machine function and /// extend ranges across basic blocks. bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { @@ -1140,6 +1272,9 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { VarLocInMBB OutLocs; // Ranges that exist beyond bb. VarLocInMBB InLocs; // Ranges that are incoming after joining. TransferMap Transfers; // DBG_VALUEs associated with spills. + VarLocInMBB PendingInLocs; // Ranges that are incoming after joining, but + // that we have deferred creating DBG_VALUE insts + // for immediately. VarToFragments SeenFragments; @@ -1156,8 +1291,6 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { std::greater> Pending; - enum : bool { dontTransferChanges = false, transferChanges = true }; - // Besides parameter's modification, check whether a DBG_VALUE is inlined // in order to deduce whether the variable that it tracks comes from // a different function. If that is the case we can't track its entry value. @@ -1169,7 +1302,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); - unsigned FP = TRI->getFrameRegister(MF); + Register FP = TRI->getFrameRegister(MF); auto IsRegOtherThanSPAndFP = [&](const MachineOperand &Op) -> bool { return Op.isReg() && Op.getReg() != SP && Op.getReg() != FP; }; @@ -1195,23 +1328,14 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { !MI.getDebugExpression()->isFragment()) DebugEntryVals[MI.getDebugVariable()] = &MI; - // Initialize every mbb with OutLocs. - // We are not looking at any spill instructions during the initial pass - // over the BBs. The LiveDebugVariables pass has already created DBG_VALUE - // instructions for spills of registers that are known to be user variables - // within the BB in which the spill occurs. + // Initialize per-block structures and scan for fragment overlaps. for (auto &MBB : MF) { + PendingInLocs[&MBB] = VarLocSet(); + for (auto &MI : MBB) { - process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, DebugEntryVals, - dontTransferChanges, OverlapFragments, SeenFragments); + if (MI.isDebugValue()) + accumulateFragmentMap(MI, SeenFragments, OverlapFragments); } - // Add any entry DBG_VALUE instructions necessitated by parameter - // clobbering. - for (auto &TR : Transfers) { - MBB.insertAfter(MachineBasicBlock::iterator(*TR.TransferInst), - TR.DebugInst); - } - Transfers.clear(); } auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool { @@ -1248,26 +1372,21 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { while (!Worklist.empty()) { MachineBasicBlock *MBB = OrderToBB[Worklist.top()]; Worklist.pop(); - MBBJoined = - join(*MBB, OutLocs, InLocs, VarLocIDs, Visited, ArtificialBlocks); - Visited.insert(MBB); + MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs, Visited, + ArtificialBlocks, PendingInLocs); + MBBJoined |= Visited.insert(MBB).second; if (MBBJoined) { MBBJoined = false; Changed = true; // Now that we have started to extend ranges across BBs we need to // examine spill instructions to see whether they spill registers that // correspond to user variables. + // First load any pending inlocs. + OpenRanges.insertFromLocSet(PendingInLocs[MBB], VarLocIDs); for (auto &MI : *MBB) - OLChanged |= process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, - DebugEntryVals, transferChanges, OverlapFragments, - SeenFragments); - - // Add any DBG_VALUE instructions necessitated by spills. - for (auto &TR : Transfers) - MBB->insertAfter(MachineBasicBlock::iterator(*TR.TransferInst), - TR.DebugInst); - Transfers.clear(); + DebugEntryVals, OverlapFragments, SeenFragments); + OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs); LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "OutLocs after propagating", dbgs())); @@ -1289,6 +1408,19 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { assert(Pending.empty() && "Pending should be empty"); } + // Add any DBG_VALUE instructions created by location transfers. + for (auto &TR : Transfers) { + MachineBasicBlock *MBB = TR.TransferInst->getParent(); + const VarLoc &VL = VarLocIDs[TR.LocationID]; + MachineInstr *MI = VL.BuildDbgValue(MF); + MBB->insertAfterBundle(TR.TransferInst->getIterator(), MI); + } + Transfers.clear(); + + // Deferred inlocs will not have had any DBG_VALUE insts created; do + // that now. + flushPendingLocs(PendingInLocs, VarLocIDs); + LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "Final OutLocs", dbgs())); LLVM_DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, "Final InLocs", dbgs())); return Changed; @@ -1308,7 +1440,7 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); TFI = MF.getSubtarget().getFrameLowering(); TFI->determineCalleeSaves(MF, CalleeSavedRegs, - make_unique().get()); + std::make_unique().get()); LS.initialize(MF); bool Changed = ExtendRanges(MF); diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 656ec7d4bdf..2dd462fc72b 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -99,28 +99,27 @@ enum : unsigned { UndefLocNo = ~0U }; /// usage of the location. class DbgValueLocation { public: - DbgValueLocation(unsigned LocNo, bool WasIndirect) - : LocNo(LocNo), WasIndirect(WasIndirect) { + DbgValueLocation(unsigned LocNo) + : LocNo(LocNo) { static_assert(sizeof(*this) == sizeof(unsigned), "bad bitfield packing"); assert(locNo() == LocNo && "location truncation"); } - DbgValueLocation() : LocNo(0), WasIndirect(0) {} + DbgValueLocation() : LocNo(0) {} unsigned locNo() const { // Fix up the undef location number, which gets truncated. return LocNo == INT_MAX ? UndefLocNo : LocNo; } - bool wasIndirect() const { return WasIndirect; } bool isUndef() const { return locNo() == UndefLocNo; } DbgValueLocation changeLocNo(unsigned NewLocNo) const { - return DbgValueLocation(NewLocNo, WasIndirect); + return DbgValueLocation(NewLocNo); } friend inline bool operator==(const DbgValueLocation &LHS, const DbgValueLocation &RHS) { - return LHS.LocNo == RHS.LocNo && LHS.WasIndirect == RHS.WasIndirect; + return LHS.LocNo == RHS.LocNo; } friend inline bool operator!=(const DbgValueLocation &LHS, @@ -129,8 +128,7 @@ public: } private: - unsigned LocNo : 31; - unsigned WasIndirect : 1; + unsigned LocNo; }; /// Map of where a user value is live, and its location. @@ -144,22 +142,51 @@ namespace { class LDVImpl; +/// A UserValue is uniquely identified by the source variable it refers to +/// (Variable), the expression describing how to get the value (Expression) and +/// the specific usage (InlinedAt). InlinedAt differentiates both between +/// inline and non-inline functions, and multiple inlined instances in the same +/// scope. FIXME: The only part of the Expression which matters for UserValue +/// identification is the fragment part. +class UserValueIdentity { +private: + /// The debug info variable we are part of. + const DILocalVariable *Variable; + /// Any complex address expression. + const DIExpression *Expression; + /// Function usage identification. + const DILocation *InlinedAt; + +public: + UserValueIdentity(const DILocalVariable *Var, const DIExpression *Expr, + const DILocation *IA) + : Variable(Var), Expression(Expr), InlinedAt(IA) {} + + bool match(const DILocalVariable *Var, const DIExpression *Expr, + const DILocation *IA) const { + // FIXME: The fragment should be part of the identity, but not + // other things in the expression like stack values. + return Var == Variable && Expr == Expression && IA == InlinedAt; + } + + bool match(const UserValueIdentity &Other) const { + return match(Other.Variable, Other.Expression, Other.InlinedAt); + } + + unsigned hash_value() const { + return hash_combine(Variable, Expression, InlinedAt); + } +}; + /// A user value is a part of a debug info user variable. /// /// A DBG_VALUE instruction notes that (a sub-register of) a virtual register /// holds part of a user variable. The part is identified by a byte offset. -/// -/// UserValues are grouped into equivalence classes for easier searching. Two -/// user values are related if they refer to the same variable, or if they are -/// held by the same virtual register. The equivalence class is the transitive -/// closure of that relation. class UserValue { const DILocalVariable *Variable; ///< The debug info variable we are part of. const DIExpression *Expression; ///< Any complex address expression. DebugLoc dl; ///< The debug location for the variable. This is ///< used by dwarf writer to find lexical scope. - UserValue *leader; ///< Equivalence class leader. - UserValue *next = nullptr; ///< Next value in equivalence class, or null. /// Numbered locations referenced by locmap. SmallVector locations; @@ -180,49 +207,15 @@ class UserValue { LiveIntervals &LIS); public: + UserValue(const UserValue &) = delete; + /// Create a new UserValue. UserValue(const DILocalVariable *var, const DIExpression *expr, DebugLoc L, LocMap::Allocator &alloc) - : Variable(var), Expression(expr), dl(std::move(L)), leader(this), - locInts(alloc) {} + : Variable(var), Expression(expr), dl(std::move(L)), locInts(alloc) {} - /// Get the leader of this value's equivalence class. - UserValue *getLeader() { - UserValue *l = leader; - while (l != l->leader) - l = l->leader; - return leader = l; - } - - /// Return the next UserValue in the equivalence class. - UserValue *getNext() const { return next; } - - /// Does this UserValue match the parameters? - bool match(const DILocalVariable *Var, const DIExpression *Expr, - const DILocation *IA) const { - // FIXME: The fragment should be part of the equivalence class, but not - // other things in the expression like stack values. - return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA; - } - - /// Merge equivalence classes. - static UserValue *merge(UserValue *L1, UserValue *L2) { - L2 = L2->getLeader(); - if (!L1) - return L2; - L1 = L1->getLeader(); - if (L1 == L2) - return L1; - // Splice L2 before L1's members. - UserValue *End = L2; - while (End->next) { - End->leader = L1; - End = End->next; - } - End->leader = L1; - End->next = L1->next; - L1->next = L2; - return L1; + UserValueIdentity getId() { + return UserValueIdentity(Variable, Expression, dl->getInlinedAt()); } /// Return the location number that matches Loc. @@ -261,8 +254,8 @@ public: void mapVirtRegs(LDVImpl *LDV); /// Add a definition point to this value. - void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect) { - DbgValueLocation Loc(getLocationNo(LocMO), IsIndirect); + void addDef(SlotIndex Idx, const MachineOperand &LocMO) { + DbgValueLocation Loc(getLocationNo(LocMO)); // Add a singular (Idx,Idx) -> Loc mapping. LocMap::iterator I = locInts.find(Idx); if (!I.valid() || I.start() != Idx) @@ -297,11 +290,10 @@ public: /// /// \param LI Scan for copies of the value in LI->reg. /// \param LocNo Location number of LI->reg. - /// \param WasIndirect Indicates if the original use of LI->reg was indirect /// \param Kills Points where the range of LocNo could be extended. /// \param [in,out] NewDefs Append (Idx, LocNo) of inserted defs here. void addDefsFromCopies( - LiveInterval *LI, unsigned LocNo, bool WasIndirect, + LiveInterval *LI, unsigned LocNo, const SmallVectorImpl &Kills, SmallVectorImpl> &NewDefs, MachineRegisterInfo &MRI, LiveIntervals &LIS); @@ -335,7 +327,29 @@ public: void print(raw_ostream &, const TargetRegisterInfo *); }; +} // namespace +namespace llvm { +template <> struct DenseMapInfo { + static UserValueIdentity getEmptyKey() { + auto Key = DenseMapInfo::getEmptyKey(); + return UserValueIdentity(Key, nullptr, nullptr); + } + static UserValueIdentity getTombstoneKey() { + auto Key = DenseMapInfo::getTombstoneKey(); + return UserValueIdentity(Key, nullptr, nullptr); + } + static unsigned getHashValue(const UserValueIdentity &Val) { + return Val.hash_value(); + } + static bool isEqual(const UserValueIdentity &LHS, + const UserValueIdentity &RHS) { + return LHS.match(RHS); + } +}; +} // namespace llvm + +namespace { /// A user label is a part of a debug info user label. class UserLabel { const DILabel *Label; ///< The debug info label we are part of. @@ -387,20 +401,20 @@ class LDVImpl { /// All allocated UserLabel instances. SmallVector, 2> userLabels; - /// Map virtual register to eq class leader. - using VRMap = DenseMap; - VRMap virtRegToEqClass; + /// Map virtual register to UserValues which use it. + using VRMap = DenseMap>; + VRMap VirtRegToUserVals; - /// Map user variable to eq class leader. - using UVMap = DenseMap; - UVMap userVarMap; + /// Map unique UserValue identity to UserValue. + using UVMap = DenseMap; + UVMap UserVarMap; /// Find or create a UserValue. UserValue *getUserValue(const DILocalVariable *Var, const DIExpression *Expr, const DebugLoc &DL); - /// Find the EC leader for VirtReg or null. - UserValue *lookupVirtReg(unsigned VirtReg); + /// Find the UserValues for VirtReg or null. + SmallVectorImpl *lookupVirtReg(unsigned VirtReg); /// Add DBG_VALUE instruction to our maps. /// @@ -440,8 +454,8 @@ public: MF = nullptr; userValues.clear(); userLabels.clear(); - virtRegToEqClass.clear(); - userVarMap.clear(); + VirtRegToUserVals.clear(); + UserVarMap.clear(); // Make sure we call emitDebugValues if the machine function was modified. assert((!ModifiedMF || EmitDone) && "Dbg values are not emitted in LDV"); @@ -449,8 +463,8 @@ public: ModifiedMF = false; } - /// Map virtual register to an equivalence class. - void mapVirtReg(unsigned VirtReg, UserValue *EC); + /// Map virtual register to a UserValue. + void mapVirtReg(unsigned VirtReg, UserValue *UV); /// Replace all references to OldReg with NewRegs. void splitRegister(unsigned OldReg, ArrayRef NewRegs); @@ -521,8 +535,6 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { OS << "undef"; else { OS << I.value().locNo(); - if (I.value().wasIndirect()) - OS << " ind"; } } for (unsigned i = 0, e = locations.size(); i != e; ++i) { @@ -554,37 +566,33 @@ void LDVImpl::print(raw_ostream &OS) { void UserValue::mapVirtRegs(LDVImpl *LDV) { for (unsigned i = 0, e = locations.size(); i != e; ++i) if (locations[i].isReg() && - TargetRegisterInfo::isVirtualRegister(locations[i].getReg())) + Register::isVirtualRegister(locations[i].getReg())) LDV->mapVirtReg(locations[i].getReg(), this); } UserValue *LDVImpl::getUserValue(const DILocalVariable *Var, const DIExpression *Expr, const DebugLoc &DL) { - UserValue *&Leader = userVarMap[Var]; - if (Leader) { - UserValue *UV = Leader->getLeader(); - Leader = UV; - for (; UV; UV = UV->getNext()) - if (UV->match(Var, Expr, DL->getInlinedAt())) - return UV; - } + auto Ident = UserValueIdentity(Var, Expr, DL->getInlinedAt()); + UserValue *&UVEntry = UserVarMap[Ident]; - userValues.push_back( - llvm::make_unique(Var, Expr, DL, allocator)); - UserValue *UV = userValues.back().get(); - Leader = UserValue::merge(Leader, UV); - return UV; + if (UVEntry) + return UVEntry; + + userValues.push_back(std::make_unique(Var, Expr, DL, allocator)); + return UVEntry = userValues.back().get(); } -void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Only map VirtRegs"); - UserValue *&Leader = virtRegToEqClass[VirtReg]; - Leader = UserValue::merge(Leader, EC); +void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *UV) { + assert(Register::isVirtualRegister(VirtReg) && "Only map VirtRegs"); + assert(UserVarMap.find(UV->getId()) != UserVarMap.end() && + "UserValue should exist in UserVarMap"); + VirtRegToUserVals[VirtReg].push_back(UV); } -UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) { - if (UserValue *UV = virtRegToEqClass.lookup(VirtReg)) - return UV->getLeader(); +SmallVectorImpl *LDVImpl::lookupVirtReg(unsigned VirtReg) { + VRMap::iterator Itr = VirtRegToUserVals.find(VirtReg); + if (Itr != VirtRegToUserVals.end()) + return &Itr->getSecond(); return nullptr; } @@ -606,8 +614,8 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { // could be removed or replaced by asserts. bool Discard = false; if (MI.getOperand(0).isReg() && - TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) { - const unsigned Reg = MI.getOperand(0).getReg(); + Register::isVirtualRegister(MI.getOperand(0).getReg())) { + const Register Reg = MI.getOperand(0).getReg(); if (!LIS->hasInterval(Reg)) { // The DBG_VALUE is described by a virtual register that does not have a // live interval. Discard the DBG_VALUE. @@ -631,19 +639,18 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { } // Get or create the UserValue for (variable,offset) here. - bool IsIndirect = MI.getOperand(1).isImm(); - if (IsIndirect) - assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset"); + assert(!MI.getOperand(1).isImm() && "DBG_VALUE with indirect flag before " + "LiveDebugVariables"); const DILocalVariable *Var = MI.getDebugVariable(); const DIExpression *Expr = MI.getDebugExpression(); UserValue *UV = getUserValue(Var, Expr, MI.getDebugLoc()); if (!Discard) - UV->addDef(Idx, MI.getOperand(0), IsIndirect); + UV->addDef(Idx, MI.getOperand(0)); else { MachineOperand MO = MachineOperand::CreateReg(0U, false); MO.setIsDebug(); - UV->addDef(Idx, MO, false); + UV->addDef(Idx, MO); } return true; } @@ -666,7 +673,7 @@ bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) { } } if (!Found) - userLabels.push_back(llvm::make_unique(Label, DL, Idx)); + userLabels.push_back(std::make_unique(Label, DL, Idx)); return true; } @@ -751,14 +758,14 @@ void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR, } void UserValue::addDefsFromCopies( - LiveInterval *LI, unsigned LocNo, bool WasIndirect, + LiveInterval *LI, unsigned LocNo, const SmallVectorImpl &Kills, SmallVectorImpl> &NewDefs, MachineRegisterInfo &MRI, LiveIntervals &LIS) { if (Kills.empty()) return; // Don't track copies from physregs, there are too many uses. - if (!TargetRegisterInfo::isVirtualRegister(LI->reg)) + if (!Register::isVirtualRegister(LI->reg)) return; // Collect all the (vreg, valno) pairs that are copies of LI. @@ -768,13 +775,13 @@ void UserValue::addDefsFromCopies( // Copies of the full value. if (MO.getSubReg() || !MI->isCopy()) continue; - unsigned DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI->getOperand(0).getReg(); // Don't follow copies to physregs. These are usually setting up call // arguments, and the argument registers are always call clobbered. We are // better off in the source register which could be a callee-saved register, // or it could be spilled. - if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + if (!Register::isVirtualRegister(DstReg)) continue; // Is LocNo extended to reach this copy? If not, another def may be blocking @@ -815,7 +822,7 @@ void UserValue::addDefsFromCopies( MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def); assert(CopyMI && CopyMI->isCopy() && "Bad copy value"); unsigned LocNo = getLocationNo(CopyMI->getOperand(0)); - DbgValueLocation NewLoc(LocNo, WasIndirect); + DbgValueLocation NewLoc(LocNo); I.insert(Idx, Idx.getNextSlot(), NewLoc); NewDefs.push_back(std::make_pair(Idx, NewLoc)); break; @@ -845,7 +852,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, } // Register locations are constrained to where the register value is live. - if (TargetRegisterInfo::isVirtualRegister(LocMO.getReg())) { + if (Register::isVirtualRegister(LocMO.getReg())) { LiveInterval *LI = nullptr; const VNInfo *VNI = nullptr; if (LIS.hasInterval(LocMO.getReg())) { @@ -863,8 +870,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, // sub-register in that regclass). For now, simply skip handling copies if // a sub-register is involved. if (LI && !LocMO.getSubReg()) - addDefsFromCopies(LI, Loc.locNo(), Loc.wasIndirect(), Kills, Defs, MRI, - LIS); + addDefsFromCopies(LI, Loc.locNo(), Kills, Defs, MRI, LIS); continue; } @@ -1123,16 +1129,18 @@ UserValue::splitRegister(unsigned OldReg, ArrayRef NewRegs, void LDVImpl::splitRegister(unsigned OldReg, ArrayRef NewRegs) { bool DidChange = false; - for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext()) - DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS); + if (auto *UserVals = lookupVirtReg(OldReg)) + for (auto *UV : *UserVals) + DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS); if (!DidChange) return; // Map all of the new virtual registers. - UserValue *UV = lookupVirtReg(OldReg); - for (unsigned i = 0; i != NewRegs.size(); ++i) - mapVirtReg(NewRegs[i], UV); + if (auto *UserVals = lookupVirtReg(OldReg)) + for (auto *UV : *UserVals) + for (unsigned i = 0; i != NewRegs.size(); ++i) + mapVirtReg(NewRegs[i], UV); } void LiveDebugVariables:: @@ -1161,10 +1169,10 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF, MachineOperand Loc = locations[I]; // Only virtual registers are rewritten. if (Loc.isReg() && Loc.getReg() && - TargetRegisterInfo::isVirtualRegister(Loc.getReg())) { - unsigned VirtReg = Loc.getReg(); + Register::isVirtualRegister(Loc.getReg())) { + Register VirtReg = Loc.getReg(); if (VRM.isAssignedReg(VirtReg) && - TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) { + Register::isPhysicalRegister(VRM.getPhys(VirtReg))) { // This can create a %noreg operand in rare cases when the sub-register // index is no longer available. That means the user value is in a // non-existent sub-register, and %noreg is exactly what we want. @@ -1258,7 +1266,7 @@ findNextInsertLocation(MachineBasicBlock *MBB, const TargetRegisterInfo &TRI) { if (!LocMO.isReg()) return MBB->instr_end(); - unsigned Reg = LocMO.getReg(); + Register Reg = LocMO.getReg(); // Find the next instruction in the MBB that define the register Reg. while (I != MBB->end() && !I->isTerminator()) { @@ -1302,21 +1310,14 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, // that the original virtual register was a pointer. Also, add the stack slot // offset for the spilled register to the expression. const DIExpression *Expr = Expression; - uint8_t DIExprFlags = DIExpression::ApplyOffset; - bool IsIndirect = Loc.wasIndirect(); - if (Spilled) { - if (IsIndirect) - DIExprFlags |= DIExpression::DerefAfter; - Expr = - DIExpression::prepend(Expr, DIExprFlags, SpillOffset); - IsIndirect = true; - } + if (Spilled) + Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, SpillOffset); assert((!Spilled || MO.isFI()) && "a spilled location must be a frame index"); do { BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE), - IsIndirect, MO, Variable, Expr); + Spilled, MO, Variable, Expr); // Continue and insert DBG_VALUES after every redefinition of register // associated with the debug value within the range diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 70b2a77fe80..54ac46f2e7c 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -886,7 +886,7 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR, const TargetRegisterInfo &TRI) { // Phys reg should not be tracked at subreg level. // Same for noreg (Reg == 0). - if (!TargetRegisterInfo::isVirtualRegister(Reg) || !Reg) + if (!Register::isVirtualRegister(Reg) || !Reg) return; // Remove the values that don't define those lanes. SmallVector ToBeRemoved; @@ -917,7 +917,8 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR, for (VNInfo *VNI : ToBeRemoved) SR.removeValNo(VNI); - assert(!SR.empty() && "At least one value should be defined by this mask"); + // If the subrange is empty at this point, the MIR is invalid. Do not assert + // and let the verifier catch this case. } void LiveInterval::refineSubRanges( @@ -967,7 +968,7 @@ void LiveInterval::computeSubRangeUndefs(SmallVectorImpl &Undefs, LaneBitmask LaneMask, const MachineRegisterInfo &MRI, const SlotIndexes &Indexes) const { - assert(TargetRegisterInfo::isVirtualRegister(reg)); + assert(Register::isVirtualRegister(reg)); LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg); assert((VRegMask & LaneMask).any()); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); diff --git a/lib/CodeGen/LiveIntervals.cpp b/lib/CodeGen/LiveIntervals.cpp index aa85569063b..2989930ad09 100644 --- a/lib/CodeGen/LiveIntervals.cpp +++ b/lib/CodeGen/LiveIntervals.cpp @@ -14,7 +14,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveIntervals.h" -#include "LiveRangeCalc.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" @@ -22,6 +21,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -108,7 +108,7 @@ LiveIntervals::~LiveIntervals() { void LiveIntervals::releaseMemory() { // Free the live intervals themselves. for (unsigned i = 0, e = VirtRegIntervals.size(); i != e; ++i) - delete VirtRegIntervals[TargetRegisterInfo::index2VirtReg(i)]; + delete VirtRegIntervals[Register::index2VirtReg(i)]; VirtRegIntervals.clear(); RegMaskSlots.clear(); RegMaskBits.clear(); @@ -161,7 +161,7 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const { // Dump the virtregs. for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (hasInterval(Reg)) OS << getInterval(Reg) << '\n'; } @@ -186,7 +186,7 @@ LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const { #endif LiveInterval* LiveIntervals::createInterval(unsigned reg) { - float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? huge_valf : 0.0F; + float Weight = Register::isPhysicalRegister(reg) ? huge_valf : 0.0F; return new LiveInterval(reg, Weight); } @@ -201,7 +201,7 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { void LiveIntervals::computeVirtRegs() { for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; createAndComputeVirtRegInterval(Reg); @@ -441,8 +441,8 @@ void LiveIntervals::extendSegmentsToUses(LiveRange &Segments, bool LiveIntervals::shrinkToUses(LiveInterval *li, SmallVectorImpl *dead) { LLVM_DEBUG(dbgs() << "Shrink: " << *li << '\n'); - assert(TargetRegisterInfo::isVirtualRegister(li->reg) - && "Can only shrink virtual registers"); + assert(Register::isVirtualRegister(li->reg) && + "Can only shrink virtual registers"); // Shrink subregister live ranges. bool NeedsCleanup = false; @@ -541,8 +541,8 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) { LLVM_DEBUG(dbgs() << "Shrink: " << SR << '\n'); - assert(TargetRegisterInfo::isVirtualRegister(Reg) - && "Can only shrink virtual registers"); + assert(Register::isVirtualRegister(Reg) && + "Can only shrink virtual registers"); // Find all the values used, including PHI kills. ShrinkToUsesWorkList WorkList; @@ -688,7 +688,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { LiveRange::const_iterator>, 4> SRs; for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; const LiveInterval &LI = getInterval(Reg); @@ -986,10 +986,10 @@ public: MO.setIsKill(false); } - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { LiveInterval &LI = LIS.getInterval(Reg); if (LI.hasSubRanges()) { unsigned SubReg = MO.getSubReg(); @@ -1023,7 +1023,7 @@ private: return; LLVM_DEBUG({ dbgs() << " "; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { dbgs() << printReg(Reg); if (LaneMask.any()) dbgs() << " L" << PrintLaneMask(LaneMask); @@ -1288,6 +1288,20 @@ private: const SlotIndex SplitPos = NewIdxDef; OldIdxVNI = OldIdxIn->valno; + SlotIndex NewDefEndPoint = std::next(NewIdxIn)->end; + LiveRange::iterator Prev = std::prev(OldIdxIn); + if (OldIdxIn != LR.begin() && + SlotIndex::isEarlierInstr(NewIdx, Prev->end)) { + // If the segment before OldIdx read a value defined earlier than + // NewIdx, the moved instruction also reads and forwards that + // value. Extend the lifetime of the new def point. + + // Extend to where the previous range started, unless there is + // another redef first. + NewDefEndPoint = std::min(OldIdxIn->start, + std::next(NewIdxOut)->start); + } + // Merge the OldIdxIn and OldIdxOut segments into OldIdxOut. OldIdxOut->valno->def = OldIdxIn->start; *OldIdxOut = LiveRange::Segment(OldIdxIn->start, OldIdxOut->end, @@ -1305,7 +1319,8 @@ private: // There is no gap between NewSegment and its predecessor. *NewSegment = LiveRange::Segment(Next->start, SplitPos, Next->valno); - *Next = LiveRange::Segment(SplitPos, Next->end, OldIdxVNI); + + *Next = LiveRange::Segment(SplitPos, NewDefEndPoint, OldIdxVNI); Next->valno->def = SplitPos; } else { // There is a gap between NewSegment and its predecessor @@ -1384,7 +1399,7 @@ private: // Return the last use of reg between NewIdx and OldIdx. SlotIndex findLastUseBefore(SlotIndex Before, unsigned Reg, LaneBitmask LaneMask) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { SlotIndex LastUse = Before; for (MachineOperand &MO : MRI.use_nodbg_operands(Reg)) { if (MO.isUndef()) @@ -1429,7 +1444,7 @@ private: // Check if MII uses Reg. for (MIBundleOperands MO(*MII); MO.isValid(); ++MO) if (MO->isReg() && !MO->isUndef() && - TargetRegisterInfo::isPhysicalRegister(MO->getReg()) && + Register::isPhysicalRegister(MO->getReg()) && TRI.hasRegUnit(MO->getReg(), Reg)) return Idx.getRegSlot(); } @@ -1439,7 +1454,10 @@ private: }; void LiveIntervals::handleMove(MachineInstr &MI, bool UpdateFlags) { - assert(!MI.isBundled() && "Can't handle bundled instructions yet."); + // It is fine to move a bundle as a whole, but not an individual instruction + // inside it. + assert((!MI.isBundled() || MI.getOpcode() == TargetOpcode::BUNDLE) && + "Cannot move instruction in bundle"); SlotIndex OldIndex = Indexes->getInstructionIndex(MI); Indexes->removeMachineInstrFromMaps(MI); SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI); @@ -1582,8 +1600,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(), MOE = MI.operands_end(); MOI != MOE; ++MOI) { - if (MOI->isReg() && - TargetRegisterInfo::isVirtualRegister(MOI->getReg()) && + if (MOI->isReg() && Register::isVirtualRegister(MOI->getReg()) && !hasInterval(MOI->getReg())) { createAndComputeVirtRegInterval(MOI->getReg()); } @@ -1591,7 +1608,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, } for (unsigned Reg : OrigRegs) { - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) continue; LiveInterval &LI = getInterval(Reg); @@ -1642,7 +1659,7 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI, unsigned Reg = LI.reg; const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); for (unsigned I = 1; I < NumComp; ++I) { - unsigned NewVReg = MRI->createVirtualRegister(RegClass); + Register NewVReg = MRI->createVirtualRegister(RegClass); LiveInterval &NewLI = createEmptyInterval(NewVReg); SplitLIs.push_back(&NewLI); } diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp index cd3d248ac87..c2a1cc7c649 100644 --- a/lib/CodeGen/LivePhysRegs.cpp +++ b/lib/CodeGen/LivePhysRegs.cpp @@ -46,8 +46,8 @@ void LivePhysRegs::removeDefs(const MachineInstr &MI) { if (O->isReg()) { if (!O->isDef() || O->isDebug()) continue; - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; removeReg(Reg); } else if (O->isRegMask()) @@ -60,8 +60,8 @@ void LivePhysRegs::addUses(const MachineInstr &MI) { for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (!O->isReg() || !O->readsReg() || O->isDebug()) continue; - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; addReg(Reg); } @@ -86,8 +86,8 @@ void LivePhysRegs::stepForward(const MachineInstr &MI, // Remove killed registers from the set. for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (O->isReg() && !O->isDebug()) { - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; if (O->isDef()) { // Note, dead defs are still recorded. The caller should decide how to @@ -292,10 +292,10 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) { if (!MO->isReg() || !MO->isDef() || MO->isDebug()) continue; - unsigned Reg = MO->getReg(); + Register Reg = MO->getReg(); if (Reg == 0) continue; - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + assert(Register::isPhysicalRegister(Reg)); bool IsNotLive = LiveRegs.available(MRI, Reg); MO->setIsDead(IsNotLive); @@ -309,10 +309,10 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) { if (!MO->isReg() || !MO->readsReg() || MO->isDebug()) continue; - unsigned Reg = MO->getReg(); + Register Reg = MO->getReg(); if (Reg == 0) continue; - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + assert(Register::isPhysicalRegister(Reg)); bool IsNotLive = LiveRegs.available(MRI, Reg); MO->setIsKill(IsNotLive); diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index d670f28df6b..24b57be0da0 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -#include "LiveRangeCalc.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -372,8 +372,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, report_fatal_error("Use not jointly dominated by defs."); } - if (TargetRegisterInfo::isPhysicalRegister(PhysReg) && - !MBB->isLiveIn(PhysReg)) { + if (Register::isPhysicalRegister(PhysReg) && !MBB->isLiveIn(PhysReg)) { MBB->getParent()->verify(); const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); errs() << "The register " << printReg(PhysReg, TRI) diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 882e562ba95..34bac082bcd 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -32,7 +32,7 @@ void LiveRangeEdit::Delegate::anchor() { } LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg, bool createSubRanges) { - unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); if (VRM) VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); @@ -52,7 +52,7 @@ LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg, } unsigned LiveRangeEdit::createFrom(unsigned OldReg) { - unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); if (VRM) { VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); } @@ -114,7 +114,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, continue; // We can't remat physreg uses, unless it is a constant. - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + if (Register::isPhysicalRegister(MO.getReg())) { if (MRI.isConstantPhysReg(MO.getReg())) continue; return false; @@ -232,7 +232,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, LLVM_DEBUG(dbgs() << " folded: " << *FoldMI); LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI); if (UseMI->isCall()) - UseMI->getMF()->updateCallSiteInfo(UseMI, FoldMI); + UseMI->getMF()->moveCallSiteInfo(UseMI, FoldMI); UseMI->eraseFromParent(); DefMI->addRegisterDead(LI->reg, nullptr); Dead.push_back(DefMI); @@ -308,8 +308,8 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, MOE = MI->operands_end(); MOI != MOE; ++MOI) { if (!MOI->isReg()) continue; - unsigned Reg = MOI->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + Register Reg = MOI->getReg(); + if (!Register::isVirtualRegister(Reg)) { // Check if MI reads any unreserved physregs. if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) ReadsPhysRegs = true; @@ -349,7 +349,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, // Remove all operands that aren't physregs. for (unsigned i = MI->getNumOperands(); i; --i) { const MachineOperand &MO = MI->getOperand(i-1); - if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) continue; MI->RemoveOperand(i-1); } diff --git a/lib/CodeGen/LiveRangeShrink.cpp b/lib/CodeGen/LiveRangeShrink.cpp index 8818f1ce0ad..cbf112ee2bd 100644 --- a/lib/CodeGen/LiveRangeShrink.cpp +++ b/lib/CodeGen/LiveRangeShrink.cpp @@ -172,10 +172,10 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.isDead() || MO.isDebug()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // Do not move the instruction if it def/uses a physical register, // unless it is a constant physical register or a noreg. - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + if (!Register::isVirtualRegister(Reg)) { if (!Reg || MRI.isConstantPhysReg(Reg)) continue; Insert = nullptr; diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index ce99e5535c2..72c79e5f8a7 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -118,7 +118,7 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { } void LiveRegMatrix::unassign(LiveInterval &VirtReg) { - unsigned PhysReg = VRM->getPhys(VirtReg.reg); + Register PhysReg = VRM->getPhys(VirtReg.reg); LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg, TRI) << " from " << printReg(PhysReg, TRI) << ':'); VRM->clearVirt(VirtReg.reg); diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp index 6afb7fb7aa1..97763def1f4 100644 --- a/lib/CodeGen/LiveRegUnits.cpp +++ b/lib/CodeGen/LiveRegUnits.cpp @@ -47,8 +47,8 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) { if (O->isReg()) { if (!O->isDef() || O->isDebug()) continue; - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; removeReg(Reg); } else if (O->isRegMask()) @@ -59,8 +59,8 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) { for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (!O->isReg() || !O->readsReg() || O->isDebug()) continue; - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; addReg(Reg); } @@ -70,8 +70,8 @@ void LiveRegUnits::accumulate(const MachineInstr &MI) { // Add defs, uses and regmask clobbers to the set. for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (O->isReg()) { - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; if (!O->isDef() && !O->readsReg()) continue; diff --git a/lib/CodeGen/LiveStacks.cpp b/lib/CodeGen/LiveStacks.cpp index f55977d7272..8df84ebf4f0 100644 --- a/lib/CodeGen/LiveStacks.cpp +++ b/lib/CodeGen/LiveStacks.cpp @@ -58,9 +58,10 @@ LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) { assert(Slot >= 0 && "Spill slot indice must be >= 0"); SS2IntervalMap::iterator I = S2IMap.find(Slot); if (I == S2IMap.end()) { - I = S2IMap.emplace(std::piecewise_construct, std::forward_as_tuple(Slot), - std::forward_as_tuple( - TargetRegisterInfo::index2StackSlot(Slot), 0.0F)) + I = S2IMap + .emplace( + std::piecewise_construct, std::forward_as_tuple(Slot), + std::forward_as_tuple(Register::index2StackSlot(Slot), 0.0F)) .first; S2RCMap.insert(std::make_pair(Slot, RC)); } else { diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index aaff982ef1b..9bd55c6f750 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -26,6 +26,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -82,7 +83,7 @@ LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const { /// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg. LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) { - assert(TargetRegisterInfo::isVirtualRegister(RegIdx) && + assert(Register::isVirtualRegister(RegIdx) && "getVarInfo: not a virtual register!"); VirtRegInfo.grow(RegIdx); return VirtRegInfo[RegIdx]; @@ -214,7 +215,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, MachineOperand &MO = LastDef->getOperand(i); if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) continue; - unsigned DefReg = MO.getReg(); + Register DefReg = MO.getReg(); if (TRI->isSubRegister(Reg, DefReg)) { for (MCSubRegIterator SubRegs(DefReg, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) @@ -519,10 +520,9 @@ void LiveVariables::runOnInstr(MachineInstr &MI, } if (!MO.isReg() || MO.getReg() == 0) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (MO.isUse()) { - if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) && - MRI->isReserved(MOReg))) + if (!(Register::isPhysicalRegister(MOReg) && MRI->isReserved(MOReg))) MO.setIsKill(false); if (MO.readsReg()) UseRegs.push_back(MOReg); @@ -530,8 +530,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI, assert(MO.isDef()); // FIXME: We should not remove any dead flags. However the MIPS RDDSP // instruction needs it at the moment: http://llvm.org/PR27116. - if (TargetRegisterInfo::isPhysicalRegister(MOReg) && - !MRI->isReserved(MOReg)) + if (Register::isPhysicalRegister(MOReg) && !MRI->isReserved(MOReg)) MO.setIsDead(false); DefRegs.push_back(MOReg); } @@ -541,7 +540,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI, // Process all uses. for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) { unsigned MOReg = UseRegs[i]; - if (TargetRegisterInfo::isVirtualRegister(MOReg)) + if (Register::isVirtualRegister(MOReg)) HandleVirtRegUse(MOReg, MBB, MI); else if (!MRI->isReserved(MOReg)) HandlePhysRegUse(MOReg, MI); @@ -554,7 +553,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI, // Process all defs. for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { unsigned MOReg = DefRegs[i]; - if (TargetRegisterInfo::isVirtualRegister(MOReg)) + if (Register::isVirtualRegister(MOReg)) HandleVirtRegDef(MOReg, MI); else if (!MRI->isReserved(MOReg)) HandlePhysRegDef(MOReg, &MI, Defs); @@ -566,7 +565,7 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) { // Mark live-in registers as live-in. SmallVector Defs; for (const auto &LI : MBB->liveins()) { - assert(TargetRegisterInfo::isPhysicalRegister(LI.PhysReg) && + assert(Register::isPhysicalRegister(LI.PhysReg) && "Cannot have a live-in virtual register!"); HandlePhysRegDef(LI.PhysReg, nullptr, Defs); } @@ -654,7 +653,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // Convert and transfer the dead / killed information we have gathered into // VirtRegInfo onto MI's. for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) { - const unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + const unsigned Reg = Register::index2VirtReg(i); for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j) if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg)) VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI); @@ -692,8 +691,8 @@ void LiveVariables::removeVirtualRegistersKilled(MachineInstr &MI) { MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isKill()) { MO.setIsKill(false); - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isVirtualRegister(Reg)) { bool removed = getVarInfo(Reg).removeKill(MI); assert(removed && "kill not in register's VarInfo?"); (void)removed; @@ -783,7 +782,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, for (; BBI != BBE; ++BBI) { for (MachineInstr::mop_iterator I = BBI->operands_begin(), E = BBI->operands_end(); I != E; ++I) { - if (I->isReg() && TargetRegisterInfo::isVirtualRegister(I->getReg())) { + if (I->isReg() && Register::isVirtualRegister(I->getReg())) { if (I->isDef()) Defs.insert(I->getReg()); else if (I->isKill()) @@ -794,7 +793,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, // Update info for all live variables for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); // If the Defs is defined in the successor it can't be live in BB. if (Defs.count(Reg)) diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index b14d76a585f..2392d4d00b5 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -261,7 +261,7 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // Remember how big this blob of stack space is MFI.setLocalFrameSize(Offset); - MFI.setLocalFrameMaxAlign(MaxAlign); + MFI.setLocalFrameMaxAlign(assumeAligned(MaxAlign)); } static inline bool @@ -351,6 +351,14 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { assert(MFI.isObjectPreAllocated(FrameIdx) && "Only pre-allocated locals expected!"); + // We need to keep the references to the stack protector slot through frame + // index operands so that it gets resolved by PEI rather than this pass. + // This avoids accesses to the stack protector though virtual base + // registers, and forces PEI to address it using fp/sp/bp. + if (MFI.hasStackProtectorIndex() && + FrameIdx == MFI.getStackProtectorIndex()) + continue; + LLVM_DEBUG(dbgs() << "Considering: " << MI); unsigned idx = 0; diff --git a/lib/CodeGen/LowerEmuTLS.cpp b/lib/CodeGen/LowerEmuTLS.cpp index c8cf6abda4f..ed48365b010 100644 --- a/lib/CodeGen/LowerEmuTLS.cpp +++ b/lib/CodeGen/LowerEmuTLS.cpp @@ -142,7 +142,7 @@ bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) { assert(EmuTlsTmplVar && "Failed to create emualted TLS initializer"); EmuTlsTmplVar->setConstant(true); EmuTlsTmplVar->setInitializer(const_cast(InitValue)); - EmuTlsTmplVar->setAlignment(GVAlignment); + EmuTlsTmplVar->setAlignment(Align(GVAlignment)); copyLinkageVisibility(M, GV, EmuTlsTmplVar); } @@ -155,9 +155,8 @@ bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) { ArrayRef ElementValueArray(ElementValues, 4); EmuTlsVar->setInitializer( ConstantStruct::get(EmuTlsVarType, ElementValueArray)); - unsigned MaxAlignment = std::max( - DL.getABITypeAlignment(WordType), - DL.getABITypeAlignment(VoidPtrType)); + Align MaxAlignment(std::max(DL.getABITypeAlignment(WordType), + DL.getABITypeAlignment(VoidPtrType))); EmuTlsVar->setAlignment(MaxAlignment); return true; } diff --git a/lib/CodeGen/MIRCanonicalizerPass.cpp b/lib/CodeGen/MIRCanonicalizerPass.cpp index f49bc854e23..c9bb5461aa3 100644 --- a/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -23,12 +23,14 @@ // //===----------------------------------------------------------------------===// +#include "MIRVRegNamerUtils.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include @@ -71,28 +73,6 @@ public: } // end anonymous namespace -enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate }; -class TypedVReg { - VRType type; - unsigned reg; - -public: - TypedVReg(unsigned reg) : type(RSE_Reg), reg(reg) {} - TypedVReg(VRType type) : type(type), reg(~0U) { - assert(type != RSE_Reg && "Expected a non-register type."); - } - - bool isReg() const { return type == RSE_Reg; } - bool isFrameIndex() const { return type == RSE_FrameIndex; } - bool isCandidate() const { return type == RSE_NewCandidate; } - - VRType getType() const { return type; } - unsigned getReg() const { - assert(this->isReg() && "Expected a virtual or physical register."); - return reg; - } -}; - char MIRCanonicalizer::ID; char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID; @@ -190,7 +170,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, if (!MO.isReg()) continue; - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (Register::isVirtualRegister(MO.getReg())) continue; if (!MO.isDef()) @@ -207,7 +187,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, continue; MachineOperand &MO = II->getOperand(0); - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) continue; if (!MO.isDef()) continue; @@ -220,7 +200,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, } if (II->getOperand(i).isReg()) { - if (!TargetRegisterInfo::isVirtualRegister(II->getOperand(i).getReg())) + if (!Register::isVirtualRegister(II->getOperand(i).getReg())) if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) == PhysRegDefs.end()) { continue; @@ -340,12 +320,12 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) { if (!MI->getOperand(1).isReg()) continue; - const unsigned Dst = MI->getOperand(0).getReg(); - const unsigned Src = MI->getOperand(1).getReg(); + const Register Dst = MI->getOperand(0).getReg(); + const Register Src = MI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Dst)) + if (!Register::isVirtualRegister(Dst)) continue; - if (!TargetRegisterInfo::isVirtualRegister(Src)) + if (!Register::isVirtualRegister(Src)) continue; // Not folding COPY instructions if regbankselect has not set the RCs. // Why are we only considering Register Classes? Because the verifier @@ -370,258 +350,6 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) { return Changed; } -/// Here we find our candidates. What makes an interesting candidate? -/// An candidate for a canonicalization tree root is normally any kind of -/// instruction that causes side effects such as a store to memory or a copy to -/// a physical register or a return instruction. We use these as an expression -/// tree root that we walk inorder to build a canonical walk which should result -/// in canoncal vreg renaming. -static std::vector populateCandidates(MachineBasicBlock *MBB) { - std::vector Candidates; - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - - for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { - MachineInstr *MI = &*II; - - bool DoesMISideEffect = false; - - if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) { - const unsigned Dst = MI->getOperand(0).getReg(); - DoesMISideEffect |= !TargetRegisterInfo::isVirtualRegister(Dst); - - for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) { - if (DoesMISideEffect) - break; - DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent()); - } - } - - if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect) - continue; - - LLVM_DEBUG(dbgs() << "Found Candidate: "; MI->dump();); - Candidates.push_back(MI); - } - - return Candidates; -} - -static void doCandidateWalk(std::vector &VRegs, - std::queue &RegQueue, - std::vector &VisitedMIs, - const MachineBasicBlock *MBB) { - - const MachineFunction &MF = *MBB->getParent(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - - while (!RegQueue.empty()) { - - auto TReg = RegQueue.front(); - RegQueue.pop(); - - if (TReg.isFrameIndex()) { - LLVM_DEBUG(dbgs() << "Popping frame index.\n";); - VRegs.push_back(TypedVReg(RSE_FrameIndex)); - continue; - } - - assert(TReg.isReg() && "Expected vreg or physreg."); - unsigned Reg = TReg.getReg(); - - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - LLVM_DEBUG({ - dbgs() << "Popping vreg "; - MRI.def_begin(Reg)->dump(); - dbgs() << "\n"; - }); - - if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) { - return TR.isReg() && TR.getReg() == Reg; - })) { - VRegs.push_back(TypedVReg(Reg)); - } - } else { - LLVM_DEBUG(dbgs() << "Popping physreg.\n";); - VRegs.push_back(TypedVReg(Reg)); - continue; - } - - for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) { - MachineInstr *Def = RI->getParent(); - - if (Def->getParent() != MBB) - continue; - - if (llvm::any_of(VisitedMIs, - [&](const MachineInstr *VMI) { return Def == VMI; })) { - break; - } - - LLVM_DEBUG({ - dbgs() << "\n========================\n"; - dbgs() << "Visited MI: "; - Def->dump(); - dbgs() << "BB Name: " << Def->getParent()->getName() << "\n"; - dbgs() << "\n========================\n"; - }); - VisitedMIs.push_back(Def); - for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) { - - MachineOperand &MO = Def->getOperand(I); - if (MO.isFI()) { - LLVM_DEBUG(dbgs() << "Pushing frame index.\n";); - RegQueue.push(TypedVReg(RSE_FrameIndex)); - } - - if (!MO.isReg()) - continue; - RegQueue.push(TypedVReg(MO.getReg())); - } - } - } -} - -namespace { -class NamedVRegCursor { - MachineRegisterInfo &MRI; - unsigned virtualVRegNumber; - -public: - NamedVRegCursor(MachineRegisterInfo &MRI) : MRI(MRI), virtualVRegNumber(0) {} - - void SkipVRegs() { - unsigned VRegGapIndex = 1; - if (!virtualVRegNumber) { - VRegGapIndex = 0; - virtualVRegNumber = MRI.createIncompleteVirtualRegister(); - } - const unsigned VR_GAP = (++VRegGapIndex * 1000); - - unsigned I = virtualVRegNumber; - const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; - - virtualVRegNumber = E; - } - - unsigned getVirtualVReg() const { return virtualVRegNumber; } - - unsigned incrementVirtualVReg(unsigned incr = 1) { - virtualVRegNumber += incr; - return virtualVRegNumber; - } - - unsigned createVirtualRegister(unsigned VReg) { - if (!virtualVRegNumber) - SkipVRegs(); - std::string S; - raw_string_ostream OS(S); - OS << "namedVReg" << (virtualVRegNumber & ~0x80000000); - OS.flush(); - virtualVRegNumber++; - if (auto RC = MRI.getRegClassOrNull(VReg)) - return MRI.createVirtualRegister(RC, OS.str()); - return MRI.createGenericVirtualRegister(MRI.getType(VReg), OS.str()); - } -}; -} // namespace - -static std::map -GetVRegRenameMap(const std::vector &VRegs, - const std::vector &renamedInOtherBB, - MachineRegisterInfo &MRI, NamedVRegCursor &NVC) { - std::map VRegRenameMap; - bool FirstCandidate = true; - - for (auto &vreg : VRegs) { - if (vreg.isFrameIndex()) { - // We skip one vreg for any frame index because there is a good chance - // (especially when comparing SelectionDAG to GlobalISel generated MIR) - // that in the other file we are just getting an incoming vreg that comes - // from a copy from a frame index. So it's safe to skip by one. - unsigned LastRenameReg = NVC.incrementVirtualVReg(); - (void)LastRenameReg; - LLVM_DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";); - continue; - } else if (vreg.isCandidate()) { - - // After the first candidate, for every subsequent candidate, we skip mod - // 10 registers so that the candidates are more likely to start at the - // same vreg number making it more likely that the canonical walk from the - // candidate insruction. We don't need to skip from the first candidate of - // the BasicBlock because we already skip ahead several vregs for each BB. - unsigned LastRenameReg = NVC.getVirtualVReg(); - if (FirstCandidate) - NVC.incrementVirtualVReg(LastRenameReg % 10); - FirstCandidate = false; - continue; - } else if (!TargetRegisterInfo::isVirtualRegister(vreg.getReg())) { - unsigned LastRenameReg = NVC.incrementVirtualVReg(); - (void)LastRenameReg; - LLVM_DEBUG({ - dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n"; - }); - continue; - } - - auto Reg = vreg.getReg(); - if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) { - LLVM_DEBUG(dbgs() << "Vreg " << Reg - << " already renamed in other BB.\n";); - continue; - } - - auto Rename = NVC.createVirtualRegister(Reg); - - if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) { - LLVM_DEBUG(dbgs() << "Mapping vreg ";); - if (MRI.reg_begin(Reg) != MRI.reg_end()) { - LLVM_DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump();); - } else { - LLVM_DEBUG(dbgs() << Reg;); - } - LLVM_DEBUG(dbgs() << " to ";); - if (MRI.reg_begin(Rename) != MRI.reg_end()) { - LLVM_DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump();); - } else { - LLVM_DEBUG(dbgs() << Rename;); - } - LLVM_DEBUG(dbgs() << "\n";); - - VRegRenameMap.insert(std::pair(Reg, Rename)); - } - } - - return VRegRenameMap; -} - -static bool doVRegRenaming(std::vector &RenamedInOtherBB, - const std::map &VRegRenameMap, - MachineRegisterInfo &MRI) { - bool Changed = false; - for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) { - - auto VReg = I->first; - auto Rename = I->second; - - RenamedInOtherBB.push_back(Rename); - - std::vector RenameMOs; - for (auto &MO : MRI.reg_operands(VReg)) { - RenameMOs.push_back(&MO); - } - - for (auto *MO : RenameMOs) { - Changed = true; - MO->setReg(Rename); - - if (!MO->isDef()) - MO->setIsKill(false); - } - } - - return Changed; -} - static bool doDefKillClear(MachineBasicBlock *MBB) { bool Changed = false; @@ -646,9 +374,7 @@ static bool doDefKillClear(MachineBasicBlock *MBB) { static bool runOnBasicBlock(MachineBasicBlock *MBB, std::vector &bbNames, - std::vector &renamedInOtherBB, - unsigned &basicBlockNum, unsigned &VRegGapIndex, - NamedVRegCursor &NVC) { + unsigned &basicBlockNum, NamedVRegCursor &NVC) { if (CanonicalizeBasicBlockNumber != ~0U) { if (CanonicalizeBasicBlockNumber != basicBlockNum++) @@ -687,74 +413,20 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB, Changed |= rescheduleCanonically(IdempotentInstCount, MBB); LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); - std::vector Candidates = populateCandidates(MBB); - std::vector VisitedMIs; - llvm::copy(Candidates, std::back_inserter(VisitedMIs)); - - std::vector VRegs; - for (auto candidate : Candidates) { - VRegs.push_back(TypedVReg(RSE_NewCandidate)); - - std::queue RegQueue; - - // Here we walk the vreg operands of a non-root node along our walk. - // The root nodes are the original candidates (stores normally). - // These are normally not the root nodes (except for the case of copies to - // physical registers). - for (unsigned i = 1; i < candidate->getNumOperands(); i++) { - if (candidate->mayStore() || candidate->isBranch()) - break; - - MachineOperand &MO = candidate->getOperand(i); - if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) - continue; - - LLVM_DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";); - RegQueue.push(TypedVReg(MO.getReg())); - } - - // Here we walk the root candidates. We start from the 0th operand because - // the root is normally a store to a vreg. - for (unsigned i = 0; i < candidate->getNumOperands(); i++) { - - if (!candidate->mayStore() && !candidate->isBranch()) - break; - - MachineOperand &MO = candidate->getOperand(i); - - // TODO: Do we want to only add vregs here? - if (!MO.isReg() && !MO.isFI()) - continue; - - LLVM_DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";); - - RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) - : TypedVReg(RSE_FrameIndex)); - } - - doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB); - } - - // If we have populated no vregs to rename then bail. - // The rest of this function does the vreg remaping. - if (VRegs.size() == 0) - return Changed; - - auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, NVC); - Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI); + Changed |= NVC.renameVRegs(MBB); // Here we renumber the def vregs for the idempotent instructions from the top // of the MachineBasicBlock so that they are named in the order that we sorted // them alphabetically. Eventually we wont need SkipVRegs because we will use // named vregs instead. if (IdempotentInstCount) - NVC.SkipVRegs(); + NVC.skipVRegs(); auto MII = MBB->begin(); for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) { MachineInstr &MI = *MII++; Changed = true; - unsigned vRegToRename = MI.getOperand(0).getReg(); + Register vRegToRename = MI.getOperand(0).getReg(); auto Rename = NVC.createVirtualRegister(vRegToRename); std::vector RenameMOs; @@ -799,9 +471,7 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { << "\n\n================================================\n\n";); std::vector BBNames; - std::vector RenamedInOtherBB; - unsigned GapIdx = 0; unsigned BBNum = 0; bool Changed = false; @@ -809,8 +479,7 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); NamedVRegCursor NVC(MRI); for (auto MBB : RPOList) - Changed |= - runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx, NVC); + Changed |= runOnBasicBlock(MBB, BBNames, BBNum, NVC); return Changed; } diff --git a/lib/CodeGen/MIRNamerPass.cpp b/lib/CodeGen/MIRNamerPass.cpp new file mode 100644 index 00000000000..9d719f3917c --- /dev/null +++ b/lib/CodeGen/MIRNamerPass.cpp @@ -0,0 +1,77 @@ +//===----------------------- MIRNamer.cpp - MIR Namer ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The purpose of this pass is to rename virtual register operands with the goal +// of making it easier to author easier to read tests for MIR. This pass reuses +// the vreg renamer used by MIRCanonicalizerPass. +// +// Basic Usage: +// +// llc -o - -run-pass mir-namer example.mir +// +//===----------------------------------------------------------------------===// + +#include "MIRVRegNamerUtils.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" + +using namespace llvm; + +namespace llvm { +extern char &MIRNamerID; +} // namespace llvm + +#define DEBUG_TYPE "mir-namer" + +namespace { + +class MIRNamer : public MachineFunctionPass { +public: + static char ID; + MIRNamer() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "Rename virtual register operands"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override { + bool Changed = false; + + if (MF.empty()) + return Changed; + + NamedVRegCursor NVC(MF.getRegInfo()); + + ReversePostOrderTraversal RPOT(&*MF.begin()); + for (auto &MBB : RPOT) + Changed |= NVC.renameVRegs(MBB); + + return Changed; + } +}; + +} // end anonymous namespace + +char MIRNamer::ID; + +char &llvm::MIRNamerID = MIRNamer::ID; + +INITIALIZE_PASS_BEGIN(MIRNamer, "mir-namer", "Rename Register Operands", false, + false) + +INITIALIZE_PASS_END(MIRNamer, "mir-namer", "Rename Register Operands", false, + false) diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp index 4899bd3f581..ad5c617623f 100644 --- a/lib/CodeGen/MIRParser/MILexer.cpp +++ b/lib/CodeGen/MIRParser/MILexer.cpp @@ -249,6 +249,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("successors", MIToken::kw_successors) .Case("floatpred", MIToken::kw_floatpred) .Case("intpred", MIToken::kw_intpred) + .Case("shufflemask", MIToken::kw_shufflemask) .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol) .Case("post-instr-symbol", MIToken::kw_post_instr_symbol) .Case("unknown-size", MIToken::kw_unknown_size) diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h index 0fe3f9f706d..200f9d026cc 100644 --- a/lib/CodeGen/MIRParser/MILexer.h +++ b/lib/CodeGen/MIRParser/MILexer.h @@ -117,6 +117,7 @@ struct MIToken { kw_successors, kw_floatpred, kw_intpred, + kw_shufflemask, kw_pre_instr_symbol, kw_post_instr_symbol, kw_unknown_size, @@ -146,6 +147,7 @@ struct MIToken { IntegerLiteral, FloatingPointLiteral, HexLiteral, + VectorLiteral, VirtualRegister, ConstantPoolItem, JumpTableIndex, diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index c0b800a0b87..6498acc9fa5 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -451,6 +451,7 @@ public: bool parseBlockAddressOperand(MachineOperand &Dest); bool parseIntrinsicOperand(MachineOperand &Dest); bool parsePredicateOperand(MachineOperand &Dest); + bool parseShuffleMaskOperand(MachineOperand &Dest); bool parseTargetIndexOperand(MachineOperand &Dest); bool parseCustomRegisterMaskOperand(MachineOperand &Dest); bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest); @@ -640,7 +641,7 @@ bool MIParser::parseBasicBlockDefinition( return error(Loc, Twine("redefinition of machine basic block with id #") + Twine(ID)); if (Alignment) - MBB->setAlignment(Alignment); + MBB->setAlignment(Align(Alignment)); if (HasAddressTaken) MBB->setHasAddressTaken(); MBB->setIsEHPad(IsLandingPad); @@ -1078,7 +1079,7 @@ static const char *printImplicitRegisterFlag(const MachineOperand &MO) { static std::string getRegisterName(const TargetRegisterInfo *TRI, unsigned Reg) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "expected phys reg"); + assert(Register::isPhysicalRegister(Reg) && "expected phys reg"); return StringRef(TRI->getName(Reg)).lower(); } @@ -1408,11 +1409,11 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, if (Token.is(MIToken::dot)) { if (parseSubRegisterIndex(SubReg)) return true; - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return error("subregister index expects a virtual register"); } if (Token.is(MIToken::colon)) { - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return error("register class specification expects a virtual register"); lex(); if (parseRegisterClassOrBank(*RegInfo)) @@ -1436,12 +1437,13 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty) return error("inconsistent type for generic virtual register"); + MRI.setRegClassOrRegBank(Reg, static_cast(nullptr)); MRI.setType(Reg, Ty); } } } else if (consumeIfPresent(MIToken::lparen)) { // Virtual registers may have a tpe with GlobalISel. - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return error("unexpected type on physical register"); LLT Ty; @@ -1454,8 +1456,9 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty) return error("inconsistent type for generic virtual register"); + MRI.setRegClassOrRegBank(Reg, static_cast(nullptr)); MRI.setType(Reg, Ty); - } else if (TargetRegisterInfo::isVirtualRegister(Reg)) { + } else if (Register::isVirtualRegister(Reg)) { // Generic virtual registers must have a type. // If we end up here this means the type hasn't been specified and // this is bad! @@ -2285,6 +2288,49 @@ bool MIParser::parsePredicateOperand(MachineOperand &Dest) { return false; } +bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_shufflemask)); + + lex(); + if (expectAndConsume(MIToken::lparen)) + return error("expected syntax shufflemask(, ...)"); + + SmallVector ShufMask; + LLVMContext &Ctx = MF.getFunction().getContext(); + Type *I32Ty = Type::getInt32Ty(Ctx); + + bool AllZero = true; + bool AllUndef = true; + + do { + if (Token.is(MIToken::kw_undef)) { + ShufMask.push_back(UndefValue::get(I32Ty)); + AllZero = false; + } else if (Token.is(MIToken::IntegerLiteral)) { + AllUndef = false; + const APSInt &Int = Token.integerValue(); + if (!Int.isNullValue()) + AllZero = false; + ShufMask.push_back(ConstantInt::get(I32Ty, Int.getExtValue())); + } else + return error("expected integer constant"); + + lex(); + } while (consumeIfPresent(MIToken::comma)); + + if (expectAndConsume(MIToken::rparen)) + return error("shufflemask should be terminated by ')'."); + + if (AllZero || AllUndef) { + VectorType *VT = VectorType::get(I32Ty, ShufMask.size()); + Constant *C = AllZero ? Constant::getNullValue(VT) : UndefValue::get(VT); + Dest = MachineOperand::CreateShuffleMask(C); + } else + Dest = MachineOperand::CreateShuffleMask(ConstantVector::get(ShufMask)); + + return false; +} + bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) { assert(Token.is(MIToken::kw_target_index)); lex(); @@ -2432,6 +2478,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest, case MIToken::kw_floatpred: case MIToken::kw_intpred: return parsePredicateOperand(Dest); + case MIToken::kw_shufflemask: + return parseShuffleMaskOperand(Dest); case MIToken::Error: return true; case MIToken::Identifier: diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp index b242934def8..55fac93d899 100644 --- a/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/lib/CodeGen/MIRParser/MIRParser.cpp @@ -216,7 +216,7 @@ std::unique_ptr MIRParserImpl::parseIRModule() { return nullptr; // Create an empty module when the MIR file is empty. NoMIRDocuments = true; - return llvm::make_unique(Filename, Context); + return std::make_unique(Filename, Context); } std::unique_ptr M; @@ -236,7 +236,7 @@ std::unique_ptr MIRParserImpl::parseIRModule() { NoMIRDocuments = true; } else { // Create an new, empty module. - M = llvm::make_unique(Filename, Context); + M = std::make_unique(Filename, Context); NoLLVMIR = true; } return M; @@ -306,7 +306,7 @@ bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) { static bool isSSA(const MachineFunction &MF) { const MachineRegisterInfo &MRI = MF.getRegInfo(); for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); if (!MRI.hasOneDef(Reg) && !MRI.def_empty(Reg)) return false; } @@ -355,10 +355,10 @@ bool MIRParserImpl::initializeCallSiteInfo( if (MILoc.Offset >= CallB->size()) return error(Twine(MF.getName()) + Twine(" call instruction offset out of range.") + - "Unable to reference instruction at bb: " + + " Unable to reference instruction at bb: " + Twine(MILoc.BlockNum) + " at offset:" + Twine(MILoc.Offset)); - auto CallI = std::next(CallB->begin(), MILoc.Offset); - if (!CallI->isCall()) + auto CallI = std::next(CallB->instr_begin(), MILoc.Offset); + if (!CallI->isCall(MachineInstr::IgnoreBundle)) return error(Twine(MF.getName()) + Twine(" call site info should reference call " "instruction. Instruction at bb:") + @@ -393,7 +393,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, } if (YamlMF.Alignment) - MF.setAlignment(YamlMF.Alignment); + MF.setAlignment(Align(YamlMF.Alignment)); MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); MF.setHasWinCFI(YamlMF.HasWinCFI); @@ -949,6 +949,6 @@ llvm::createMIRParser(std::unique_ptr Contents, "Can't read MIR with a Context that discards named Values"))); return nullptr; } - return llvm::make_unique( - llvm::make_unique(std::move(Contents), Filename, Context)); + return std::make_unique( + std::make_unique(std::move(Contents), Filename, Context)); } diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index 0a95a0ced0f..1a4e21ac06a 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -197,7 +197,7 @@ void MIRPrinter::print(const MachineFunction &MF) { yaml::MachineFunction YamlMF; YamlMF.Name = MF.getName(); - YamlMF.Alignment = MF.getAlignment(); + YamlMF.Alignment = MF.getAlignment().value(); YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); YamlMF.HasWinCFI = MF.hasWinCFI(); @@ -290,7 +290,7 @@ void MIRPrinter::convert(yaml::MachineFunction &MF, // Print the virtual register definitions. for (unsigned I = 0, E = RegInfo.getNumVirtRegs(); I < E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); yaml::VirtualRegisterDefinition VReg; VReg.ID = I; if (RegInfo.getVRegName(Reg) != "") @@ -473,10 +473,11 @@ void MIRPrinter::convertCallSiteObjects(yaml::MachineFunction &YMF, yaml::CallSiteInfo::MachineInstrLoc CallLocation; // Prepare instruction position. - MachineBasicBlock::const_iterator CallI = CSInfo.first->getIterator(); + MachineBasicBlock::const_instr_iterator CallI = CSInfo.first->getIterator(); CallLocation.BlockNum = CallI->getParent()->getNumber(); // Get call instruction offset from the beginning of block. - CallLocation.Offset = std::distance(CallI->getParent()->begin(), CallI); + CallLocation.Offset = + std::distance(CallI->getParent()->instr_begin(), CallI); YmlCS.CallLocation = CallLocation; // Construct call arguments and theirs forwarding register info. for (auto ArgReg : CSInfo.second) { @@ -628,9 +629,9 @@ void MIPrinter::print(const MachineBasicBlock &MBB) { OS << "landing-pad"; HasAttributes = true; } - if (MBB.getAlignment()) { + if (MBB.getAlignment() != Align::None()) { OS << (HasAttributes ? ", " : " ("); - OS << "align " << MBB.getAlignment(); + OS << "align " << MBB.getAlignment().value(); HasAttributes = true; } if (HasAttributes) @@ -842,7 +843,8 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx, case MachineOperand::MO_CFIIndex: case MachineOperand::MO_IntrinsicID: case MachineOperand::MO_Predicate: - case MachineOperand::MO_BlockAddress: { + case MachineOperand::MO_BlockAddress: + case MachineOperand::MO_ShuffleMask: { unsigned TiedOperandIdx = 0; if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef()) TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx); diff --git a/lib/CodeGen/MIRVRegNamerUtils.cpp b/lib/CodeGen/MIRVRegNamerUtils.cpp new file mode 100644 index 00000000000..6629000f468 --- /dev/null +++ b/lib/CodeGen/MIRVRegNamerUtils.cpp @@ -0,0 +1,348 @@ +//===---------- MIRVRegNamerUtils.cpp - MIR VReg Renaming Utilities -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MIRVRegNamerUtils.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "mir-vregnamer-utils" + +namespace { + +// TypedVReg and VRType are used to tell the renamer what to do at points in a +// sequence of values to be renamed. A TypedVReg can either contain +// an actual VReg, a FrameIndex, or it could just be a barrier for the next +// candidate (side-effecting instruction). This tells the renamer to increment +// to the next vreg name, or to skip modulo some skip-gap value. +enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate }; +class TypedVReg { + VRType Type; + Register Reg; + +public: + TypedVReg(Register Reg) : Type(RSE_Reg), Reg(Reg) {} + TypedVReg(VRType Type) : Type(Type), Reg(~0U) { + assert(Type != RSE_Reg && "Expected a non-Register Type."); + } + + bool isReg() const { return Type == RSE_Reg; } + bool isFrameIndex() const { return Type == RSE_FrameIndex; } + bool isCandidate() const { return Type == RSE_NewCandidate; } + + VRType getType() const { return Type; } + Register getReg() const { + assert(this->isReg() && "Expected a virtual or physical Register."); + return Reg; + } +}; + +/// Here we find our candidates. What makes an interesting candidate? +/// A candidate for a canonicalization tree root is normally any kind of +/// instruction that causes side effects such as a store to memory or a copy to +/// a physical register or a return instruction. We use these as an expression +/// tree root that we walk in order to build a canonical walk which should +/// result in canonical vreg renaming. +std::vector populateCandidates(MachineBasicBlock *MBB) { + std::vector Candidates; + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { + MachineInstr *MI = &*II; + + bool DoesMISideEffect = false; + + if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) { + const Register Dst = MI->getOperand(0).getReg(); + DoesMISideEffect |= !Register::isVirtualRegister(Dst); + + for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) { + if (DoesMISideEffect) + break; + DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent()); + } + } + + if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect) + continue; + + LLVM_DEBUG(dbgs() << "Found Candidate: "; MI->dump();); + Candidates.push_back(MI); + } + + return Candidates; +} + +void doCandidateWalk(std::vector &VRegs, + std::queue &RegQueue, + std::vector &VisitedMIs, + const MachineBasicBlock *MBB) { + + const MachineFunction &MF = *MBB->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + while (!RegQueue.empty()) { + + auto TReg = RegQueue.front(); + RegQueue.pop(); + + if (TReg.isFrameIndex()) { + LLVM_DEBUG(dbgs() << "Popping frame index.\n";); + VRegs.push_back(TypedVReg(RSE_FrameIndex)); + continue; + } + + assert(TReg.isReg() && "Expected vreg or physreg."); + Register Reg = TReg.getReg(); + + if (Register::isVirtualRegister(Reg)) { + LLVM_DEBUG({ + dbgs() << "Popping vreg "; + MRI.def_begin(Reg)->dump(); + dbgs() << "\n"; + }); + + if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) { + return TR.isReg() && TR.getReg() == Reg; + })) { + VRegs.push_back(TypedVReg(Reg)); + } + } else { + LLVM_DEBUG(dbgs() << "Popping physreg.\n";); + VRegs.push_back(TypedVReg(Reg)); + continue; + } + + for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) { + MachineInstr *Def = RI->getParent(); + + if (Def->getParent() != MBB) + continue; + + if (llvm::any_of(VisitedMIs, + [&](const MachineInstr *VMI) { return Def == VMI; })) { + break; + } + + LLVM_DEBUG({ + dbgs() << "\n========================\n"; + dbgs() << "Visited MI: "; + Def->dump(); + dbgs() << "BB Name: " << Def->getParent()->getName() << "\n"; + dbgs() << "\n========================\n"; + }); + VisitedMIs.push_back(Def); + for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) { + + MachineOperand &MO = Def->getOperand(I); + if (MO.isFI()) { + LLVM_DEBUG(dbgs() << "Pushing frame index.\n";); + RegQueue.push(TypedVReg(RSE_FrameIndex)); + } + + if (!MO.isReg()) + continue; + RegQueue.push(TypedVReg(MO.getReg())); + } + } + } +} + +std::map +getVRegRenameMap(const std::vector &VRegs, + const std::vector &renamedInOtherBB, + MachineRegisterInfo &MRI, NamedVRegCursor &NVC) { + std::map VRegRenameMap; + bool FirstCandidate = true; + + for (auto &vreg : VRegs) { + if (vreg.isFrameIndex()) { + // We skip one vreg for any frame index because there is a good chance + // (especially when comparing SelectionDAG to GlobalISel generated MIR) + // that in the other file we are just getting an incoming vreg that comes + // from a copy from a frame index. So it's safe to skip by one. + unsigned LastRenameReg = NVC.incrementVirtualVReg(); + (void)LastRenameReg; + LLVM_DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";); + continue; + } else if (vreg.isCandidate()) { + + // After the first candidate, for every subsequent candidate, we skip mod + // 10 registers so that the candidates are more likely to start at the + // same vreg number making it more likely that the canonical walk from the + // candidate insruction. We don't need to skip from the first candidate of + // the BasicBlock because we already skip ahead several vregs for each BB. + unsigned LastRenameReg = NVC.getVirtualVReg(); + if (FirstCandidate) + NVC.incrementVirtualVReg(LastRenameReg % 10); + FirstCandidate = false; + continue; + } else if (!Register::isVirtualRegister(vreg.getReg())) { + unsigned LastRenameReg = NVC.incrementVirtualVReg(); + (void)LastRenameReg; + LLVM_DEBUG({ + dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n"; + }); + continue; + } + + auto Reg = vreg.getReg(); + if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) { + LLVM_DEBUG(dbgs() << "Vreg " << Reg + << " already renamed in other BB.\n";); + continue; + } + + auto Rename = NVC.createVirtualRegister(Reg); + + if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) { + LLVM_DEBUG(dbgs() << "Mapping vreg ";); + if (MRI.reg_begin(Reg) != MRI.reg_end()) { + LLVM_DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump();); + } else { + LLVM_DEBUG(dbgs() << Reg;); + } + LLVM_DEBUG(dbgs() << " to ";); + if (MRI.reg_begin(Rename) != MRI.reg_end()) { + LLVM_DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump();); + } else { + LLVM_DEBUG(dbgs() << Rename;); + } + LLVM_DEBUG(dbgs() << "\n";); + + VRegRenameMap.insert(std::pair(Reg, Rename)); + } + } + + return VRegRenameMap; +} + +bool doVRegRenaming(std::vector &renamedInOtherBB, + const std::map &VRegRenameMap, + MachineRegisterInfo &MRI) { + bool Changed = false; + for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) { + + auto VReg = I->first; + auto Rename = I->second; + + renamedInOtherBB.push_back(Rename); + + std::vector RenameMOs; + for (auto &MO : MRI.reg_operands(VReg)) { + RenameMOs.push_back(&MO); + } + + for (auto *MO : RenameMOs) { + Changed = true; + MO->setReg(Rename); + + if (!MO->isDef()) + MO->setIsKill(false); + } + } + + return Changed; +} + +bool renameVRegs(MachineBasicBlock *MBB, + std::vector &renamedInOtherBB, + NamedVRegCursor &NVC) { + bool Changed = false; + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + std::vector Candidates = populateCandidates(MBB); + std::vector VisitedMIs; + llvm::copy(Candidates, std::back_inserter(VisitedMIs)); + + std::vector VRegs; + for (auto candidate : Candidates) { + VRegs.push_back(TypedVReg(RSE_NewCandidate)); + + std::queue RegQueue; + + // Here we walk the vreg operands of a non-root node along our walk. + // The root nodes are the original candidates (stores normally). + // These are normally not the root nodes (except for the case of copies to + // physical registers). + for (unsigned i = 1; i < candidate->getNumOperands(); i++) { + if (candidate->mayStore() || candidate->isBranch()) + break; + + MachineOperand &MO = candidate->getOperand(i); + if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg()))) + continue; + + LLVM_DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";); + RegQueue.push(TypedVReg(MO.getReg())); + } + + // Here we walk the root candidates. We start from the 0th operand because + // the root is normally a store to a vreg. + for (unsigned i = 0; i < candidate->getNumOperands(); i++) { + + if (!candidate->mayStore() && !candidate->isBranch()) + break; + + MachineOperand &MO = candidate->getOperand(i); + + // TODO: Do we want to only add vregs here? + if (!MO.isReg() && !MO.isFI()) + continue; + + LLVM_DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";); + + RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) + : TypedVReg(RSE_FrameIndex)); + } + + doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB); + } + + // If we have populated no vregs to rename then bail. + // The rest of this function does the vreg remaping. + if (VRegs.size() == 0) + return Changed; + + auto VRegRenameMap = getVRegRenameMap(VRegs, renamedInOtherBB, MRI, NVC); + Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI); + return Changed; +} +} // anonymous namespace + +void NamedVRegCursor::skipVRegs() { + unsigned VRegGapIndex = 1; + if (!virtualVRegNumber) { + VRegGapIndex = 0; + virtualVRegNumber = MRI.createIncompleteVirtualRegister(); + } + const unsigned VR_GAP = (++VRegGapIndex * SkipGapSize); + + unsigned I = virtualVRegNumber; + const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; + + virtualVRegNumber = E; +} + +unsigned NamedVRegCursor::createVirtualRegister(unsigned VReg) { + if (!virtualVRegNumber) + skipVRegs(); + std::string S; + raw_string_ostream OS(S); + OS << "namedVReg" << (virtualVRegNumber & ~0x80000000); + OS.flush(); + virtualVRegNumber++; + if (auto RC = MRI.getRegClassOrNull(VReg)) + return MRI.createVirtualRegister(RC, OS.str()); + return MRI.createGenericVirtualRegister(MRI.getType(VReg), OS.str()); +} + +bool NamedVRegCursor::renameVRegs(MachineBasicBlock *MBB) { + return ::renameVRegs(MBB, RenamedInOtherBB, *this); +} diff --git a/lib/CodeGen/MIRVRegNamerUtils.h b/lib/CodeGen/MIRVRegNamerUtils.h new file mode 100644 index 00000000000..c5b52a96853 --- /dev/null +++ b/lib/CodeGen/MIRVRegNamerUtils.h @@ -0,0 +1,91 @@ + +//===------------ MIRVRegNamerUtils.h - MIR VReg Renaming Utilities -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The purpose of these utilities is to abstract out parts of the MIRCanon pass +// that are responsible for renaming virtual registers with the purpose of +// sharing code with a MIRVRegNamer pass that could be the analog of the +// opt -instnamer pass. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_MIRVREGNAMERUTILS_H +#define LLVM_LIB_CODEGEN_MIRVREGNAMERUTILS_H + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/raw_ostream.h" + +#include + +namespace llvm { + +/// NamedVRegCursor - The cursor is an object that keeps track of what the next +/// vreg name should be. It does book keeping to determine when to skip the +/// index value and by how much, or if the next vreg name should be an increment +/// from the previous. +class NamedVRegCursor { + MachineRegisterInfo &MRI; + + /// virtualVRegNumber - Book keeping of the last vreg position. + unsigned virtualVRegNumber; + + /// SkipGapSize - Used to calculate a modulo amount to skip by after every + /// sequence of instructions starting from a given side-effecting + /// MachineInstruction for a given MachineBasicBlock. The general idea is that + /// for a given program compiled with two different opt pipelines, there + /// shouldn't be greater than SkipGapSize difference in how many vregs are in + /// play between the two and for every def-use graph of vregs we rename we + /// will round up to the next SkipGapSize'th number so that we have a high + /// change of landing on the same name for two given matching side-effects + /// for the two compilation outcomes. + const unsigned SkipGapSize; + + /// RenamedInOtherBB - VRegs that we already renamed: ie breadcrumbs. + std::vector RenamedInOtherBB; + +public: + NamedVRegCursor() = delete; + /// 1000 for the SkipGapSize was a good heuristic at the time of the writing + /// of the MIRCanonicalizerPass. Adjust as needed. + NamedVRegCursor(MachineRegisterInfo &MRI, unsigned SkipGapSize = 1000) + : MRI(MRI), virtualVRegNumber(0), SkipGapSize(SkipGapSize) {} + + /// SkipGapSize - Skips modulo a gap value of indices. Indices are used to + /// produce the next vreg name. + void skipVRegs(); + + unsigned getVirtualVReg() const { return virtualVRegNumber; } + + /// incrementVirtualVReg - This increments an index value that us used to + /// create a new vreg name. This is not a Register. + unsigned incrementVirtualVReg(unsigned incr = 1) { + virtualVRegNumber += incr; + return virtualVRegNumber; + } + + /// createVirtualRegister - Given an existing vreg, create a named vreg to + /// take its place. + unsigned createVirtualRegister(unsigned VReg); + + /// renameVRegs - For a given MachineBasicBlock, scan for side-effecting + /// instructions, walk the def-use from each side-effecting root (in sorted + /// root order) and rename the encountered vregs in the def-use graph in a + /// canonical ordering. This method maintains book keeping for which vregs + /// were already renamed in RenamedInOtherBB. + // @return changed + bool renameVRegs(MachineBasicBlock *MBB); +}; + +} // namespace llvm + +#endif diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 4d29e883d87..854bef3aab0 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -39,6 +39,12 @@ using namespace llvm; #define DEBUG_TYPE "codegen" +static cl::opt PrintSlotIndexes( + "print-slotindexes", + cl::desc("When printing machine IR, annotate instructions and blocks with " + "SlotIndexes when available"), + cl::init(true), cl::Hidden); + MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B) : BB(B), Number(-1), xParent(&MF) { Insts.Parent = this; @@ -291,7 +297,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, return; } - if (Indexes) + if (Indexes && PrintSlotIndexes) OS << Indexes->getMBBStartIdx(this) << '\t'; OS << "bb." << getNumber(); @@ -320,9 +326,9 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "landing-pad"; HasAttributes = true; } - if (getAlignment()) { + if (getAlignment() != Align::None()) { OS << (HasAttributes ? ", " : " ("); - OS << "align " << getAlignment(); + OS << "align " << Log2(getAlignment()); HasAttributes = true; } if (HasAttributes) @@ -402,7 +408,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, bool IsInBundle = false; for (const MachineInstr &MI : instrs()) { - if (Indexes) { + if (Indexes && PrintSlotIndexes) { if (Indexes->hasIndex(MI)) OS << Indexes->getInstructionIndex(MI); OS << '\t'; @@ -484,9 +490,9 @@ void MachineBasicBlock::sortUniqueLiveIns() { } unsigned -MachineBasicBlock::addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC) { +MachineBasicBlock::addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC) { assert(getParent() && "MBB must be inserted in function"); - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Expected physreg"); + assert(PhysReg.isPhysical() && "Expected physreg"); assert(RC && "Register class is required"); assert((isEHPad() || this == &getParent()->front()) && "Only the entry block and landing pads can have physreg live ins"); @@ -500,14 +506,14 @@ MachineBasicBlock::addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC) { if (LiveIn) for (;I != E && I->isCopy(); ++I) if (I->getOperand(1).getReg() == PhysReg) { - unsigned VirtReg = I->getOperand(0).getReg(); + Register VirtReg = I->getOperand(0).getReg(); if (!MRI.constrainRegClass(VirtReg, RC)) llvm_unreachable("Incompatible live-in register class."); return VirtReg; } // No luck, create a virtual register. - unsigned VirtReg = MRI.createVirtualRegister(RC); + Register VirtReg = MRI.createVirtualRegister(RC); BuildMI(*this, I, DebugLoc(), TII.get(TargetOpcode::COPY), VirtReg) .addReg(PhysReg, RegState::Kill); if (!LiveIn) @@ -772,7 +778,8 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *FromMBB) { while (!FromMBB->succ_empty()) { MachineBasicBlock *Succ = *FromMBB->succ_begin(); - // If probability list is empty it means we don't use it (disabled optimization). + // If probability list is empty it means we don't use it (disabled + // optimization). if (!FromMBB->Probs.empty()) { auto Prob = *FromMBB->Probs.begin(); addSuccessor(Succ, Prob); @@ -798,13 +805,7 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB) { FromMBB->removeSuccessor(Succ); // Fix up any PHI nodes in the successor. - for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(), - ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI) - for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) { - MachineOperand &MO = MI->getOperand(i); - if (MO.getMBB() == FromMBB) - MO.setMBB(this); - } + Succ->replacePhiUsesWith(FromMBB, this); } normalizeSuccProbs(); } @@ -907,8 +908,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, if (!OI->isReg() || OI->getReg() == 0 || !OI->isUse() || !OI->isKill() || OI->isUndef()) continue; - unsigned Reg = OI->getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg) || + Register Reg = OI->getReg(); + if (Register::isPhysicalRegister(Reg) || LV->getVarInfo(Reg).removeKill(*MI)) { KilledRegs.push_back(Reg); LLVM_DEBUG(dbgs() << "Removing terminator kill: " << *MI); @@ -928,7 +929,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, if (!OI->isReg() || OI->getReg() == 0) continue; - unsigned Reg = OI->getReg(); + Register Reg = OI->getReg(); if (!is_contained(UsedRegs, Reg)) UsedRegs.push_back(Reg); } @@ -979,13 +980,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, } } - // Fix PHI nodes in Succ so they refer to NMBB instead of this - for (MachineBasicBlock::instr_iterator - i = Succ->instr_begin(),e = Succ->instr_end(); - i != e && i->isPHI(); ++i) - for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) - if (i->getOperand(ni+1).getMBB() == this) - i->getOperand(ni+1).setMBB(NMBB); + // Fix PHI nodes in Succ so they refer to NMBB instead of this. + Succ->replacePhiUsesWith(this, NMBB); // Inherit live-ins from the successor for (const auto &LI : Succ->liveins()) @@ -1000,7 +996,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) { if (!(--I)->addRegisterKilled(Reg, TRI, /* AddIfNotFound= */ false)) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) LV->getVarInfo(Reg).Kills.push_back(&*I); LLVM_DEBUG(dbgs() << "Restored terminator kill: " << *I); break; @@ -1033,7 +1029,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) { if (I->getOperand(ni+1).getMBB() == NMBB) { MachineOperand &MO = I->getOperand(ni); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); PHISrcRegs.insert(Reg); if (MO.isUndef()) continue; @@ -1049,7 +1045,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, MachineRegisterInfo *MRI = &getParent()->getRegInfo(); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg)) continue; @@ -1217,6 +1213,16 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, replaceSuccessor(Old, New); } +void MachineBasicBlock::replacePhiUsesWith(MachineBasicBlock *Old, + MachineBasicBlock *New) { + for (MachineInstr &MI : phis()) + for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) { + MachineOperand &MO = MI.getOperand(i); + if (MO.getMBB() == Old) + MO.setMBB(New); + } +} + /// Various pieces of code can cause excess edges in the CFG to be inserted. If /// we have proven that MBB can only branch to DestA and DestB, remove any other /// MBB successors from the CFG. DestA and DestB can be null. diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 639b588766a..ac19bc0bd8e 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -79,16 +79,17 @@ STATISTIC(CondBranchTakenFreq, STATISTIC(UncondBranchTakenFreq, "Potential frequency of taking unconditional branches"); -static cl::opt AlignAllBlock("align-all-blocks", - cl::desc("Force the alignment of all " - "blocks in the function."), - cl::init(0), cl::Hidden); +static cl::opt AlignAllBlock( + "align-all-blocks", + cl::desc("Force the alignment of all blocks in the function in log2 format " + "(e.g 4 means align on 16B boundaries)."), + cl::init(0), cl::Hidden); static cl::opt AlignAllNonFallThruBlocks( "align-all-nofallthru-blocks", - cl::desc("Force the alignment of all " - "blocks that have no fall-through predecessors (i.e. don't add " - "nops that are executed)."), + cl::desc("Force the alignment of all blocks that have no fall-through " + "predecessors (i.e. don't add nops that are executed). In log2 " + "format (e.g 4 means align on 16B boundaries)."), cl::init(0), cl::Hidden); // FIXME: Find a good default for this flag and remove the flag. @@ -2763,8 +2764,8 @@ void MachineBlockPlacement::alignBlocks() { if (!L) continue; - unsigned Align = TLI->getPrefLoopAlignment(L); - if (!Align) + const Align Align = TLI->getPrefLoopAlignment(L); + if (Align == 1) continue; // Don't care about loop alignment. // If the block is cold relative to the function entry don't waste space @@ -2981,7 +2982,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { F = &MF; MBPI = &getAnalysis(); - MBFI = llvm::make_unique( + MBFI = std::make_unique( getAnalysis()); MLI = &getAnalysis(); TII = MF.getSubtarget().getInstrInfo(); @@ -3038,8 +3039,9 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI, *MBPI, TailMergeSize); + auto *MMIWP = getAnalysisIfAvailable(); if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), - getAnalysisIfAvailable(), MLI, + MMIWP ? &MMIWP->getMMI() : nullptr, MLI, /*AfterPlacement=*/true)) { // Redo the layout if tail merging creates/removes/moves blocks. BlockToChain.clear(); @@ -3062,14 +3064,14 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (AlignAllBlock) // Align all of the blocks in the function to a specific alignment. for (MachineBasicBlock &MBB : MF) - MBB.setAlignment(AlignAllBlock); + MBB.setAlignment(Align(1ULL << AlignAllBlock)); else if (AlignAllNonFallThruBlocks) { // Align all of the blocks that have no fall-through predecessors to a // specific alignment. for (auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) { auto LayoutPred = std::prev(MBI); if (!LayoutPred->isSuccessor(&*MBI)) - MBI->setAlignment(AlignAllNonFallThruBlocks); + MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks)); } } if (ViewBlockLayoutWithBFI != GVDT_None && diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 2df6d40d929..d9bd32b2fba 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -66,6 +67,7 @@ namespace { AliasAnalysis *AA; MachineDominatorTree *DT; MachineRegisterInfo *MRI; + MachineBlockFrequencyInfo *MBFI; public: static char ID; // Pass identification @@ -83,6 +85,8 @@ namespace { AU.addPreservedID(MachineLoopInfoID); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); } void releaseMemory() override { @@ -133,6 +137,11 @@ namespace { bool isPRECandidate(MachineInstr *MI); bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB); bool PerformSimplePRE(MachineDominatorTree *DT); + /// Heuristics to see if it's profitable to move common computations of MBB + /// and MBB1 to CandidateBB. + bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB, + MachineBasicBlock *MBB, + MachineBasicBlock *MBB1); }; } // end anonymous namespace @@ -158,15 +167,15 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, for (MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isUse()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg); MachineInstr *DefMI = MRI->getVRegDef(Reg); if (!DefMI->isCopy()) continue; - unsigned SrcReg = DefMI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = DefMI->getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) continue; if (DefMI->getOperand(0).getSubReg()) continue; @@ -189,14 +198,16 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); LLVM_DEBUG(dbgs() << "*** to: " << *MI); - // Update matching debug values. - DefMI->changeDebugValuesDefReg(SrcReg); - // Propagate SrcReg of copies to MI. MO.setReg(SrcReg); MRI->clearKillFlags(SrcReg); // Coalesce single use copies. if (OnlyOneUse) { + // If (and only if) we've eliminated all uses of the copy, also + // copy-propagate to any debug-users of MI, or they'll be left using + // an undefined value. + DefMI->changeDebugValuesDefReg(SrcReg); + DefMI->eraseFromParent(); ++NumCoalesces; } @@ -271,10 +282,10 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) continue; // Reading either caller preserved or constant physregs is ok. if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI)) @@ -290,10 +301,10 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, const MachineOperand &MO = MOP.value(); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) continue; // Check against PhysRefs even if the def is "dead". if (PhysRefs.count(Reg)) @@ -367,8 +378,8 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, return false; if (!MO.isReg() || !MO.isDef()) continue; - unsigned MOReg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(MOReg)) + Register MOReg = MO.getReg(); + if (Register::isVirtualRegister(MOReg)) continue; if (PhysRefs.count(MOReg)) return false; @@ -424,8 +435,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // If CSReg is used at all uses of Reg, CSE should not increase register // pressure of CSReg. bool MayIncreasePressure = true; - if (TargetRegisterInfo::isVirtualRegister(CSReg) && - TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(CSReg) && Register::isVirtualRegister(Reg)) { MayIncreasePressure = false; SmallPtrSet CSUses; for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { @@ -453,8 +463,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // of the redundant computation are copies, do not cse. bool HasVRegUse = false; for (const MachineOperand &MO : MI->operands()) { - if (MO.isReg() && MO.isUse() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isReg() && MO.isUse() && Register::isVirtualRegister(MO.getReg())) { HasVRegUse = true; break; } @@ -586,8 +595,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned OldReg = MO.getReg(); - unsigned NewReg = CSMI->getOperand(i).getReg(); + Register OldReg = MO.getReg(); + Register NewReg = CSMI->getOperand(i).getReg(); // Go through implicit defs of CSMI and MI, if a def is not dead at MI, // we should make sure it is not dead at CSMI. @@ -604,8 +613,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { continue; } - assert(TargetRegisterInfo::isVirtualRegister(OldReg) && - TargetRegisterInfo::isVirtualRegister(NewReg) && + assert(Register::isVirtualRegister(OldReg) && + Register::isVirtualRegister(NewReg) && "Do not CSE physical register defs!"); if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) { @@ -769,11 +778,11 @@ bool MachineCSE::isPRECandidate(MachineInstr *MI) { return false; for (auto def : MI->defs()) - if (!TRI->isVirtualRegister(def.getReg())) + if (!Register::isVirtualRegister(def.getReg())) return false; for (auto use : MI->uses()) - if (use.isReg() && !TRI->isVirtualRegister(use.getReg())) + if (use.isReg() && !Register::isVirtualRegister(use.getReg())) return false; return true; @@ -802,6 +811,9 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, if (!CMBB->isLegalToHoistInto()) continue; + if (!isProfitableToHoistInto(CMBB, MBB, MBB1)) + continue; + // Two instrs are partial redundant if their basic blocks are reachable // from one to another but one doesn't dominate another. if (CMBB != MBB1) { @@ -812,8 +824,8 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, assert(MI->getOperand(0).isDef() && "First operand of instr with one explicit def must be this def"); - unsigned VReg = MI->getOperand(0).getReg(); - unsigned NewReg = MRI->cloneVirtualRegister(VReg); + Register VReg = MI->getOperand(0).getReg(); + Register NewReg = MRI->cloneVirtualRegister(VReg); if (!isProfitableToCSE(NewReg, VReg, CMBB, MI)) continue; MachineInstr &NewMI = @@ -854,6 +866,18 @@ bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) { return Changed; } +bool MachineCSE::isProfitableToHoistInto(MachineBasicBlock *CandidateBB, + MachineBasicBlock *MBB, + MachineBasicBlock *MBB1) { + if (CandidateBB->getParent()->getFunction().hasMinSize()) + return true; + assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB"); + assert(DT->dominates(CandidateBB, MBB1) && + "CandidateBB should dominate MBB1"); + return MBFI->getBlockFreq(CandidateBB) <= + MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1); +} + bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -863,6 +887,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); AA = &getAnalysis().getAAResults(); DT = &getAnalysis(); + MBFI = &getAnalysis(); LookAheadLimit = TII->getMachineCSELookAheadLimit(); bool ChangedPRE, ChangedCSE; ChangedPRE = PerformSimplePRE(DT); diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp index 0584ec0bd2b..e9f462fd1b3 100644 --- a/lib/CodeGen/MachineCombiner.cpp +++ b/lib/CodeGen/MachineCombiner.cpp @@ -137,7 +137,7 @@ void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) { MachineInstr *DefInstr = nullptr; // We need a virtual register definition. - if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) DefInstr = MRI->getUniqueVRegDef(MO.getReg()); // PHI's have no depth etc. if (DefInstr && DefInstr->isPHI()) @@ -168,7 +168,7 @@ MachineCombiner::getDepth(SmallVectorImpl &InsInstrs, unsigned IDepth = 0; for (const MachineOperand &MO : InstrPtr->operands()) { // Check for virtual register operand. - if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) + if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg()))) continue; if (!MO.isUse()) continue; @@ -223,7 +223,7 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, for (const MachineOperand &MO : NewRoot->operands()) { // Check for virtual register operand. - if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) + if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg()))) continue; if (!MO.isDef()) continue; diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 9fc12ac89e1..ebe76e31dca 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -119,8 +119,8 @@ public: void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI) { assert(MI->isCopy() && "Tracking non-copy?"); - unsigned Def = MI->getOperand(0).getReg(); - unsigned Src = MI->getOperand(1).getReg(); + Register Def = MI->getOperand(0).getReg(); + Register Src = MI->getOperand(1).getReg(); // Remember Def is defined by the copy. for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI) @@ -163,8 +163,8 @@ public: // Check that the available copy isn't clobbered by any regmasks between // itself and the destination. - unsigned AvailSrc = AvailCopy->getOperand(1).getReg(); - unsigned AvailDef = AvailCopy->getOperand(0).getReg(); + Register AvailSrc = AvailCopy->getOperand(1).getReg(); + Register AvailDef = AvailCopy->getOperand(0).getReg(); for (const MachineInstr &MI : make_range(AvailCopy->getIterator(), DestCopy.getIterator())) for (const MachineOperand &MO : MI.operands()) @@ -205,8 +205,11 @@ public: } private: + typedef enum { DebugUse = false, RegularUse = true } DebugType; + void ClobberRegister(unsigned Reg); - void ReadRegister(unsigned Reg); + void ReadRegister(unsigned Reg, MachineInstr &Reader, + DebugType DT); void CopyPropagateBlock(MachineBasicBlock &MBB); bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def); void forwardUses(MachineInstr &MI); @@ -217,6 +220,9 @@ private: /// Candidates for deletion. SmallSetVector MaybeDeadCopies; + /// Multimap tracking debug users in current BB + DenseMap> CopyDbgUsers; + CopyTracker Tracker; bool Changed; @@ -231,13 +237,19 @@ char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID; INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE, "Machine Copy Propagation Pass", false, false) -void MachineCopyPropagation::ReadRegister(unsigned Reg) { +void MachineCopyPropagation::ReadRegister(unsigned Reg, MachineInstr &Reader, + DebugType DT) { // If 'Reg' is defined by a copy, the copy is no longer a candidate - // for elimination. + // for elimination. If a copy is "read" by a debug user, record the user + // for propagation. for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) { if (MachineInstr *Copy = Tracker.findCopyForUnit(*RUI, *TRI)) { - LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; Copy->dump()); - MaybeDeadCopies.remove(Copy); + if (DT == RegularUse) { + LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; Copy->dump()); + MaybeDeadCopies.remove(Copy); + } else { + CopyDbgUsers[Copy].push_back(&Reader); + } } } } @@ -250,8 +262,8 @@ void MachineCopyPropagation::ReadRegister(unsigned Reg) { /// isNopCopy("ecx = COPY eax", AH, CL) == false static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src, unsigned Def, const TargetRegisterInfo *TRI) { - unsigned PreviousSrc = PreviousCopy.getOperand(1).getReg(); - unsigned PreviousDef = PreviousCopy.getOperand(0).getReg(); + Register PreviousSrc = PreviousCopy.getOperand(1).getReg(); + Register PreviousDef = PreviousCopy.getOperand(0).getReg(); if (Src == PreviousSrc) { assert(Def == PreviousDef); return true; @@ -288,7 +300,7 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src, // Copy was redundantly redefining either Src or Def. Remove earlier kill // flags between Copy and PrevCopy because the value will be reused now. assert(Copy.isCopy()); - unsigned CopyDef = Copy.getOperand(0).getReg(); + Register CopyDef = Copy.getOperand(0).getReg(); assert(CopyDef == Src || CopyDef == Def); for (MachineInstr &MI : make_range(PrevCopy->getIterator(), Copy.getIterator())) @@ -307,7 +319,7 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) { - unsigned CopySrcReg = Copy.getOperand(1).getReg(); + Register CopySrcReg = Copy.getOperand(1).getReg(); // If the new register meets the opcode register constraints, then allow // forwarding. @@ -398,9 +410,9 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { if (!Copy) continue; - unsigned CopyDstReg = Copy->getOperand(0).getReg(); + Register CopyDstReg = Copy->getOperand(0).getReg(); const MachineOperand &CopySrc = Copy->getOperand(1); - unsigned CopySrcReg = CopySrc.getReg(); + Register CopySrcReg = CopySrc.getReg(); // FIXME: Don't handle partial uses of wider COPYs yet. if (MOUse.getReg() != CopyDstReg) { @@ -456,11 +468,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // Analyze copies (which don't overlap themselves). if (MI->isCopy() && !TRI->regsOverlap(MI->getOperand(0).getReg(), MI->getOperand(1).getReg())) { - unsigned Def = MI->getOperand(0).getReg(); - unsigned Src = MI->getOperand(1).getReg(); + Register Def = MI->getOperand(0).getReg(); + Register Src = MI->getOperand(1).getReg(); - assert(!TargetRegisterInfo::isVirtualRegister(Def) && - !TargetRegisterInfo::isVirtualRegister(Src) && + assert(!Register::isVirtualRegister(Def) && + !Register::isVirtualRegister(Src) && "MachineCopyPropagation should be run after register allocation!"); // The two copies cancel out and the source of the first copy @@ -488,14 +500,14 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // If Src is defined by a previous copy, the previous copy cannot be // eliminated. - ReadRegister(Src); + ReadRegister(Src, *MI, RegularUse); for (const MachineOperand &MO : MI->implicit_operands()) { if (!MO.isReg() || !MO.readsReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - ReadRegister(Reg); + ReadRegister(Reg, *MI, RegularUse); } LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump()); @@ -515,7 +527,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { for (const MachineOperand &MO : MI->implicit_operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; Tracker.clobberRegister(Reg, *TRI); @@ -529,12 +541,12 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // Clobber any earlyclobber regs first. for (const MachineOperand &MO : MI->operands()) if (MO.isReg() && MO.isEarlyClobber()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // If we have a tied earlyclobber, that means it is also read by this // instruction, so we need to make sure we don't remove it as dead // later. if (MO.isTied()) - ReadRegister(Reg); + ReadRegister(Reg, *MI, RegularUse); Tracker.clobberRegister(Reg, *TRI); } @@ -548,18 +560,18 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { RegMask = &MO; if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - assert(!TargetRegisterInfo::isVirtualRegister(Reg) && + assert(!Register::isVirtualRegister(Reg) && "MachineCopyPropagation should be run after register allocation!"); if (MO.isDef() && !MO.isEarlyClobber()) { Defs.push_back(Reg); continue; - } else if (!MO.isDebug() && MO.readsReg()) - ReadRegister(Reg); + } else if (MO.readsReg()) + ReadRegister(Reg, *MI, MO.isDebug() ? DebugUse : RegularUse); } // The instruction has a register mask operand which means that it clobbers @@ -571,7 +583,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { MaybeDeadCopies.begin(); DI != MaybeDeadCopies.end();) { MachineInstr *MaybeDead = *DI; - unsigned Reg = MaybeDead->getOperand(0).getReg(); + Register Reg = MaybeDead->getOperand(0).getReg(); assert(!MRI->isReserved(Reg)); if (!RegMask->clobbersPhysReg(Reg)) { @@ -609,9 +621,10 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { MaybeDead->dump()); assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg())); - // Update matching debug values. + // Update matching debug values, if any. assert(MaybeDead->isCopy()); - MaybeDead->changeDebugValuesDefReg(MaybeDead->getOperand(1).getReg()); + unsigned SrcReg = MaybeDead->getOperand(1).getReg(); + MRI->updateDbgUsersToReg(SrcReg, CopyDbgUsers[MaybeDead]); MaybeDead->eraseFromParent(); Changed = true; @@ -620,6 +633,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { } MaybeDeadCopies.clear(); + CopyDbgUsers.clear(); Tracker.clear(); } diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index 1dfba8638c2..706c706d752 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -18,12 +18,15 @@ using namespace llvm; +namespace llvm { // Always verify dominfo if expensive checking is enabled. #ifdef EXPENSIVE_CHECKS -static bool VerifyMachineDomInfo = true; +bool VerifyMachineDomInfo = true; #else -static bool VerifyMachineDomInfo = false; +bool VerifyMachineDomInfo = false; #endif +} // namespace llvm + static cl::opt VerifyMachineDomInfoX( "verify-machine-dom-info", cl::location(VerifyMachineDomInfo), cl::Hidden, cl::desc("Verify machine dominator info (time consuming)")); @@ -64,21 +67,11 @@ void MachineDominatorTree::releaseMemory() { } void MachineDominatorTree::verifyAnalysis() const { - if (DT && VerifyMachineDomInfo) { - MachineFunction &F = *getRoot()->getParent(); - - DomTreeBase OtherDT; - OtherDT.recalculate(F); - if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() || - DT->compare(OtherDT)) { - errs() << "MachineDominatorTree for function " << F.getName() - << " is not up to date!\nComputed:\n"; - DT->print(errs()); - errs() << "\nActual:\n"; - OtherDT.print(errs()); + if (DT && VerifyMachineDomInfo) + if (!DT->verify(DomTreeT::VerificationLevel::Basic)) { + errs() << "MachineDominatorTree verification failed\n"; abort(); } - } } void MachineDominatorTree::print(raw_ostream &OS, const Module*) const { diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp index bae3a4333bd..604f5145b1a 100644 --- a/lib/CodeGen/MachineFrameInfo.cpp +++ b/lib/CodeGen/MachineFrameInfo.cpp @@ -28,25 +28,26 @@ using namespace llvm; -void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { +void MachineFrameInfo::ensureMaxAlignment(Align Alignment) { if (!StackRealignable) - assert(Align <= StackAlignment && - "For targets without stack realignment, Align is out of limit!"); - if (MaxAlignment < Align) MaxAlignment = Align; + assert(Alignment <= StackAlignment && + "For targets without stack realignment, Alignment is out of limit!"); + if (MaxAlignment < Alignment) + MaxAlignment = Alignment; } /// Clamp the alignment if requested and emit a warning. -static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, - unsigned StackAlign) { - if (!ShouldClamp || Align <= StackAlign) - return Align; - LLVM_DEBUG(dbgs() << "Warning: requested alignment " << Align - << " exceeds the stack alignment " << StackAlign +static inline Align clampStackAlignment(bool ShouldClamp, Align Alignment, + Align StackAlignment) { + if (!ShouldClamp || Alignment <= StackAlignment) + return Alignment; + LLVM_DEBUG(dbgs() << "Warning: requested alignment " << Alignment.value() + << " exceeds the stack alignment " << StackAlignment.value() << " when stack realignment is off" << '\n'); - return StackAlign; + return StackAlignment; } -int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, +int MachineFrameInfo::CreateStackObject(uint64_t Size, Align Alignment, bool IsSpillSlot, const AllocaInst *Alloca, uint8_t StackID) { @@ -61,8 +62,7 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, return Index; } -int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, - unsigned Alignment) { +int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, Align Alignment) { Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); CreateStackObject(Size, Alignment, true); int Index = (int)Objects.size() - NumFixedObjects - 1; @@ -70,7 +70,7 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, return Index; } -int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, +int MachineFrameInfo::CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca) { HasVarSizedObjects = true; Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); @@ -88,7 +88,8 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, // object is 16-byte aligned. Note that unlike the non-fixed case, if the // stack needs realignment, we can't assume that the stack will in fact be // aligned. - unsigned Alignment = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); + Align Alignment = + commonAlignment(ForcedRealign ? Align::None() : StackAlignment, SPOffset); Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); Objects.insert(Objects.begin(), StackObject(Size, Alignment, SPOffset, IsImmutable, @@ -100,7 +101,8 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable) { - unsigned Alignment = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); + Align Alignment = + commonAlignment(ForcedRealign ? Align::None() : StackAlignment, SPOffset); Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); Objects.insert(Objects.begin(), StackObject(Size, Alignment, SPOffset, IsImmutable, @@ -232,7 +234,7 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ OS << "variable sized"; else OS << "size=" << SO.Size; - OS << ", align=" << SO.Alignment; + OS << ", align=" << SO.Alignment.value(); if (i < NumFixedObjects) OS << ", fixed"; diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 4df5ce2dced..7d2ee230ca9 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -78,10 +78,11 @@ using namespace llvm; #define DEBUG_TYPE "codegen" -static cl::opt -AlignAllFunctions("align-all-functions", - cl::desc("Force the alignment of all functions."), - cl::init(0), cl::Hidden); +static cl::opt AlignAllFunctions( + "align-all-functions", + cl::desc("Force the alignment of all functions in log2 format (e.g. 4 " + "means align on 16B boundaries)."), + cl::init(0), cl::Hidden); static const char *getPropertyName(MachineFunctionProperties::Property Prop) { using P = MachineFunctionProperties::Property; @@ -181,7 +182,7 @@ void MachineFunction::init() { STI->getTargetLowering()->getPrefFunctionAlignment()); if (AlignAllFunctions) - Alignment = AlignAllFunctions; + Alignment = Align(1ULL << AlignAllFunctions); JumpTableInfo = nullptr; @@ -200,7 +201,7 @@ void MachineFunction::init() { "Target-incompatible DataLayout attached\n"); PSVManager = - llvm::make_unique(*(getSubtarget(). + std::make_unique(*(getSubtarget(). getInstrInfo())); } @@ -823,30 +824,47 @@ try_next:; return FilterID; } -void MachineFunction::addCodeViewHeapAllocSite(MachineInstr *I, MDNode *MD) { +void MachineFunction::addCodeViewHeapAllocSite(MachineInstr *I, + const MDNode *MD) { MCSymbol *BeginLabel = Ctx.createTempSymbol("heapallocsite", true); MCSymbol *EndLabel = Ctx.createTempSymbol("heapallocsite", true); I->setPreInstrSymbol(*this, BeginLabel); I->setPostInstrSymbol(*this, EndLabel); - DIType *DI = dyn_cast(MD); + const DIType *DI = dyn_cast(MD); CodeViewHeapAllocSites.push_back(std::make_tuple(BeginLabel, EndLabel, DI)); } -void MachineFunction::updateCallSiteInfo(const MachineInstr *Old, - const MachineInstr *New) { - if (!Target.Options.EnableDebugEntryValues || Old == New) - return; +void MachineFunction::moveCallSiteInfo(const MachineInstr *Old, + const MachineInstr *New) { + assert(New->isCall() && "Call site info refers only to call instructions!"); - assert(Old->isCall() && (!New || New->isCall()) && - "Call site info referes only to call instructions!"); - CallSiteInfoMap::iterator CSIt = CallSitesInfo.find(Old); + CallSiteInfoMap::iterator CSIt = getCallSiteInfo(Old); if (CSIt == CallSitesInfo.end()) return; + CallSiteInfo CSInfo = std::move(CSIt->second); CallSitesInfo.erase(CSIt); - if (New) - CallSitesInfo[New] = CSInfo; + CallSitesInfo[New] = CSInfo; +} + +void MachineFunction::eraseCallSiteInfo(const MachineInstr *MI) { + CallSiteInfoMap::iterator CSIt = getCallSiteInfo(MI); + if (CSIt == CallSitesInfo.end()) + return; + CallSitesInfo.erase(CSIt); +} + +void MachineFunction::copyCallSiteInfo(const MachineInstr *Old, + const MachineInstr *New) { + assert(New->isCall() && "Call site info refers only to call instructions!"); + + CallSiteInfoMap::iterator CSIt = getCallSiteInfo(Old); + if (CSIt == CallSitesInfo.end()) + return; + + CallSiteInfo CSInfo = CSIt->second; + CallSitesInfo[New] = CSInfo; } /// \} @@ -881,13 +899,13 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const { // alignment. switch (getEntryKind()) { case MachineJumpTableInfo::EK_BlockAddress: - return TD.getPointerABIAlignment(0); + return TD.getPointerABIAlignment(0).value(); case MachineJumpTableInfo::EK_GPRel64BlockAddress: - return TD.getABIIntegerTypeAlignment(64); + return TD.getABIIntegerTypeAlignment(64).value(); case MachineJumpTableInfo::EK_GPRel32BlockAddress: case MachineJumpTableInfo::EK_LabelDifference32: case MachineJumpTableInfo::EK_Custom32: - return TD.getABIIntegerTypeAlignment(32); + return TD.getABIIntegerTypeAlignment(32).value(); case MachineJumpTableInfo::EK_Inline: return 1; } diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp index 0da4cf3fc90..03149aa7db4 100644 --- a/lib/CodeGen/MachineFunctionPass.cpp +++ b/lib/CodeGen/MachineFunctionPass.cpp @@ -41,7 +41,7 @@ bool MachineFunctionPass::runOnFunction(Function &F) { if (F.hasAvailableExternallyLinkage()) return false; - MachineModuleInfo &MMI = getAnalysis(); + MachineModuleInfo &MMI = getAnalysis().getMMI(); MachineFunction &MF = MMI.getOrCreateMachineFunction(F); MachineFunctionProperties &MFProps = MF.getProperties(); @@ -101,8 +101,8 @@ bool MachineFunctionPass::runOnFunction(Function &F) { } void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); // MachineFunctionPass preserves all LLVM IR passes, but there's no // high-level way to express this. Instead, just list a bunch of diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index e5c398a2d10..fec20b2b1a0 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -636,8 +636,8 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other, if (Check == IgnoreDefs) continue; else if (Check == IgnoreVRegDefs) { - if (!TargetRegisterInfo::isVirtualRegister(MO.getReg()) || - !TargetRegisterInfo::isVirtualRegister(OMO.getReg())) + if (!Register::isVirtualRegister(MO.getReg()) || + !Register::isVirtualRegister(OMO.getReg())) if (!MO.isIdenticalTo(OMO)) return false; } else { @@ -692,8 +692,8 @@ void MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() { for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Reg.isVirtual()) continue; MRI.markUsesInDebugValueAsUndef(Reg); } @@ -832,6 +832,10 @@ const DIExpression *MachineInstr::getDebugExpression() const { return cast(getOperand(3).getMetadata()); } +bool MachineInstr::isDebugEntryValue() const { + return isDebugValue() && getDebugExpression()->isEntryValue(); +} + const TargetRegisterClass* MachineInstr::getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, @@ -873,7 +877,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, } const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg( - unsigned Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII, + Register Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ExploreBundle) const { // Check every operands inside the bundle if we have // been asked to. @@ -890,7 +894,7 @@ const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg( } const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVRegImpl( - unsigned OpIdx, unsigned Reg, const TargetRegisterClass *CurRC, + unsigned OpIdx, Register Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const { assert(CurRC && "Invalid initial register class"); // Check if Reg is constrained by some of its use/def from MI. @@ -933,7 +937,7 @@ unsigned MachineInstr::getBundleSize() const { /// Returns true if the MachineInstr has an implicit-use operand of exactly /// the given register (not considering sub/super-registers). -bool MachineInstr::hasRegisterImplicitUseOperand(unsigned Reg) const { +bool MachineInstr::hasRegisterImplicitUseOperand(Register Reg) const { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == Reg) @@ -946,12 +950,12 @@ bool MachineInstr::hasRegisterImplicitUseOperand(unsigned Reg) const { /// the specific register or -1 if it is not found. It further tightens /// the search criteria to a use that kills the register if isKill is true. int MachineInstr::findRegisterUseOperandIdx( - unsigned Reg, bool isKill, const TargetRegisterInfo *TRI) const { + Register Reg, bool isKill, const TargetRegisterInfo *TRI) const { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; if (MOReg == Reg || (TRI && Reg && MOReg && TRI->regsOverlap(MOReg, Reg))) @@ -965,7 +969,7 @@ int MachineInstr::findRegisterUseOperandIdx( /// indicating if this instruction reads or writes Reg. This also considers /// partial defines. std::pair -MachineInstr::readsWritesVirtualRegister(unsigned Reg, +MachineInstr::readsWritesVirtualRegister(Register Reg, SmallVectorImpl *Ops) const { bool PartDef = false; // Partial redefine. bool FullDef = false; // Full define. @@ -994,9 +998,9 @@ MachineInstr::readsWritesVirtualRegister(unsigned Reg, /// that are not dead are skipped. If TargetRegisterInfo is non-null, then it /// also checks if there is a def of a super-register. int -MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap, +MachineInstr::findRegisterDefOperandIdx(Register Reg, bool isDead, bool Overlap, const TargetRegisterInfo *TRI) const { - bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg); + bool isPhys = Register::isPhysicalRegister(Reg); for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); // Accept regmask operands when Overlap is set. @@ -1005,10 +1009,9 @@ MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap, return i; if (!MO.isReg() || !MO.isDef()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); bool Found = (MOReg == Reg); - if (!Found && TRI && isPhys && - TargetRegisterInfo::isPhysicalRegister(MOReg)) { + if (!Found && TRI && isPhys && Register::isPhysicalRegister(MOReg)) { if (Overlap) Found = TRI->regsOverlap(MOReg, Reg); else @@ -1142,10 +1145,10 @@ void MachineInstr::clearKillInfo() { } } -void MachineInstr::substituteRegister(unsigned FromReg, unsigned ToReg, +void MachineInstr::substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo) { - if (TargetRegisterInfo::isPhysicalRegister(ToReg)) { + if (Register::isPhysicalRegister(ToReg)) { if (SubIdx) ToReg = RegInfo.getSubReg(ToReg, SubIdx); for (MachineOperand &MO : operands()) { @@ -1165,7 +1168,7 @@ void MachineInstr::substituteRegister(unsigned FromReg, unsigned ToReg, /// isSafeToMove - Return true if it is safe to move this instruction. If /// SawStore is set to true, it means that there is a store (or call) between /// the instruction's location and its intended destination. -bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const { +bool MachineInstr::isSafeToMove(AAResults *AA, bool &SawStore) const { // Ignore stuff that we obviously can't move. // // Treat volatile loads as stores. This is not strictly necessary for @@ -1194,7 +1197,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const { return true; } -bool MachineInstr::mayAlias(AliasAnalysis *AA, const MachineInstr &Other, +bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const { const MachineFunction *MF = getMF(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -1206,7 +1209,7 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, const MachineInstr &Other, return false; // Let the target decide if memory accesses cannot possibly overlap. - if (TII->areMemAccessesTriviallyDisjoint(*this, Other, AA)) + if (TII->areMemAccessesTriviallyDisjoint(*this, Other)) return false; // FIXME: Need to handle multiple memory operands to support all targets. @@ -1312,7 +1315,7 @@ bool MachineInstr::hasOrderedMemoryRef() const { /// isDereferenceableInvariantLoad - Return true if this instruction will never /// trap and is loading from a location whose value is invariant across a run of /// this function. -bool MachineInstr::isDereferenceableInvariantLoad(AliasAnalysis *AA) const { +bool MachineInstr::isDereferenceableInvariantLoad(AAResults *AA) const { // If the instruction doesn't load at all, it isn't an invariant load. if (!mayLoad()) return false; @@ -1364,7 +1367,7 @@ unsigned MachineInstr::isConstantValuePHI() const { assert(getNumOperands() >= 3 && "It's illegal to have a PHI without source operands"); - unsigned Reg = getOperand(1).getReg(); + Register Reg = getOperand(1).getReg(); for (unsigned i = 3, e = getNumOperands(); i < e; i += 2) if (getOperand(i).getReg() != Reg) return 0; @@ -1726,7 +1729,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, MFI = &MF->getFrameInfo(); Context = &MF->getFunction().getContext(); } else { - CtxPtr = llvm::make_unique(); + CtxPtr = std::make_unique(); Context = CtxPtr.get(); } @@ -1780,10 +1783,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << '\n'; } -bool MachineInstr::addRegisterKilled(unsigned IncomingReg, +bool MachineInstr::addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound) { - bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); + bool isPhysReg = Register::isPhysicalRegister(IncomingReg); bool hasAliases = isPhysReg && MCRegAliasIterator(IncomingReg, RegInfo, false).isValid(); bool Found = false; @@ -1799,7 +1802,7 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, if (MO.isDebug()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; @@ -1814,8 +1817,7 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, MO.setIsKill(); Found = true; } - } else if (hasAliases && MO.isKill() && - TargetRegisterInfo::isPhysicalRegister(Reg)) { + } else if (hasAliases && MO.isKill() && Register::isPhysicalRegister(Reg)) { // A super-register kill already exists. if (RegInfo->isSuperRegister(IncomingReg, Reg)) return true; @@ -1847,23 +1849,23 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, return Found; } -void MachineInstr::clearRegisterKills(unsigned Reg, +void MachineInstr::clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo) { - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + if (!Register::isPhysicalRegister(Reg)) RegInfo = nullptr; for (MachineOperand &MO : operands()) { if (!MO.isReg() || !MO.isUse() || !MO.isKill()) continue; - unsigned OpReg = MO.getReg(); + Register OpReg = MO.getReg(); if ((RegInfo && RegInfo->regsOverlap(Reg, OpReg)) || Reg == OpReg) MO.setIsKill(false); } } -bool MachineInstr::addRegisterDead(unsigned Reg, +bool MachineInstr::addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound) { - bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(Reg); + bool isPhysReg = Register::isPhysicalRegister(Reg); bool hasAliases = isPhysReg && MCRegAliasIterator(Reg, RegInfo, false).isValid(); bool Found = false; @@ -1872,7 +1874,7 @@ bool MachineInstr::addRegisterDead(unsigned Reg, MachineOperand &MO = getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; @@ -1880,7 +1882,7 @@ bool MachineInstr::addRegisterDead(unsigned Reg, MO.setIsDead(); Found = true; } else if (hasAliases && MO.isDead() && - TargetRegisterInfo::isPhysicalRegister(MOReg)) { + Register::isPhysicalRegister(MOReg)) { // There exists a super-register that's marked dead. if (RegInfo->isSuperRegister(Reg, MOReg)) return true; @@ -1913,7 +1915,7 @@ bool MachineInstr::addRegisterDead(unsigned Reg, return true; } -void MachineInstr::clearRegisterDeads(unsigned Reg) { +void MachineInstr::clearRegisterDeads(Register Reg) { for (MachineOperand &MO : operands()) { if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg) continue; @@ -1921,7 +1923,7 @@ void MachineInstr::clearRegisterDeads(unsigned Reg) { } } -void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) { +void MachineInstr::setRegisterDefReadUndef(Register Reg, bool IsUndef) { for (MachineOperand &MO : operands()) { if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0) continue; @@ -1929,9 +1931,9 @@ void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) { } } -void MachineInstr::addRegisterDefined(unsigned Reg, +void MachineInstr::addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { MachineOperand *MO = findRegisterDefOperand(Reg, false, false, RegInfo); if (MO) return; @@ -1947,7 +1949,7 @@ void MachineInstr::addRegisterDefined(unsigned Reg, true /*IsImp*/)); } -void MachineInstr::setPhysRegsDeadExcept(ArrayRef UsedRegs, +void MachineInstr::setPhysRegsDeadExcept(ArrayRef UsedRegs, const TargetRegisterInfo &TRI) { bool HasRegMask = false; for (MachineOperand &MO : operands()) { @@ -1956,18 +1958,19 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef UsedRegs, continue; } if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Reg.isPhysical()) + continue; // If there are no uses, including partial uses, the def is dead. if (llvm::none_of(UsedRegs, - [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); })) + [&](MCRegister Use) { return TRI.regsOverlap(Use, Reg); })) MO.setIsDead(); } // This is a call with a register mask operand. // Mask clobbers are always dead, so add defs for the non-dead defines. if (HasRegMask) - for (ArrayRef::iterator I = UsedRegs.begin(), E = UsedRegs.end(); + for (ArrayRef::iterator I = UsedRegs.begin(), E = UsedRegs.end(); I != E; ++I) addRegisterDefined(*I, &TRI); } @@ -1979,8 +1982,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { HashComponents.reserve(MI->getNumOperands() + 1); HashComponents.push_back(MI->getOpcode()); for (const MachineOperand &MO : MI->operands()) { - if (MO.isReg() && MO.isDef() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (MO.isReg() && MO.isDef() && Register::isVirtualRegister(MO.getReg())) continue; // Skip virtual register defs. HashComponents.push_back(hash_value(MO)); @@ -2012,7 +2014,7 @@ void MachineInstr::emitError(StringRef Msg) const { MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID, bool IsIndirect, - unsigned Reg, const MDNode *Variable, + Register Reg, const MDNode *Variable, const MDNode *Expr) { assert(isa(Variable) && "not a variable"); assert(cast(Expr)->isValid() && "not an expression"); @@ -2048,7 +2050,7 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, const DebugLoc &DL, const MCInstrDesc &MCID, - bool IsIndirect, unsigned Reg, + bool IsIndirect, Register Reg, const MDNode *Variable, const MDNode *Expr) { MachineFunction &MF = *BB.getParent(); MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, Reg, Variable, Expr); @@ -2118,10 +2120,24 @@ void MachineInstr::collectDebugValues( } } -void MachineInstr::changeDebugValuesDefReg(unsigned Reg) { +void MachineInstr::changeDebugValuesDefReg(Register Reg) { // Collect matching debug values. SmallVector DbgValues; - collectDebugValues(DbgValues); + + if (!getOperand(0).isReg()) + return; + + unsigned DefReg = getOperand(0).getReg(); + auto *MRI = getRegInfo(); + for (auto &MO : MRI->use_operands(DefReg)) { + auto *DI = MO.getParent(); + if (!DI->isDebugValue()) + continue; + if (DI->getOperand(0).isReg() && + DI->getOperand(0).getReg() == DefReg){ + DbgValues.push_back(DI); + } + } // Propagate Reg to debug value instructions. for (auto *DBI : DbgValues) diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index 32e266e9401..feb849ced35 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -154,10 +154,10 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, continue; } - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (LocalDefSet.count(Reg)) { MO.setIsInternalRead(); if (MO.isKill()) @@ -177,7 +177,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, for (unsigned i = 0, e = Defs.size(); i != e; ++i) { MachineOperand &MO = *Defs[i]; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; @@ -194,7 +194,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, DeadDefSet.erase(Reg); } - if (!MO.isDead()) { + if (!MO.isDead() && Register::isPhysicalRegister(Reg)) { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { unsigned SubReg = *SubRegs; if (LocalDefSet.insert(SubReg).second) @@ -316,7 +316,7 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, bool AllDefsDead = true; PhysRegInfo PRI = {false, false, false, false, false, false, false, false}; - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + assert(Register::isPhysicalRegister(Reg) && "analyzePhysReg not given a physical register!"); for (; isValid(); ++*this) { MachineOperand &MO = deref(); @@ -329,8 +329,8 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); - if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg)) + Register MOReg = MO.getReg(); + if (!MOReg || !Register::isPhysicalRegister(MOReg)) continue; if (!TRI->regsOverlap(MOReg, Reg)) diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 1107e609c25..6a898ff6ef8 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -153,7 +153,6 @@ namespace { AU.addRequired(); AU.addRequired(); AU.addPreserved(); - AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -424,10 +423,10 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + assert(Register::isPhysicalRegister(Reg) && "Not expecting virtual register!"); if (!MO.isDef()) { @@ -526,7 +525,7 @@ void MachineLICMBase::HoistRegionPostRA() { for (const MachineOperand &MO : TI->operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) @@ -554,7 +553,7 @@ void MachineLICMBase::HoistRegionPostRA() { for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || MO.isDef() || !MO.getReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)) { // If it's using a non-loop-invariant register, then it's obviously @@ -852,8 +851,8 @@ MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.isImplicit()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; // FIXME: It seems bad to use RegSeen only for some of these calculations. @@ -922,12 +921,12 @@ static bool isInvariantStore(const MachineInstr &MI, // Check that all register operands are caller-preserved physical registers. for (const MachineOperand &MO : MI.operands()) { if (MO.isReg()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // If operand is a virtual register, check if it comes from a copy of a // physical register. - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) Reg = TRI->lookThruCopyLike(MO.getReg(), MRI); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return false; if (!TRI->isCallerPreservedPhysReg(Reg, *MI.getMF())) return false; @@ -955,17 +954,17 @@ static bool isCopyFeedingInvariantStore(const MachineInstr &MI, const MachineFunction *MF = MI.getMF(); // Check that we are copying a constant physical register. - unsigned CopySrcReg = MI.getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(CopySrcReg)) + Register CopySrcReg = MI.getOperand(1).getReg(); + if (Register::isVirtualRegister(CopySrcReg)) return false; if (!TRI->isCallerPreservedPhysReg(CopySrcReg, *MF)) return false; - unsigned CopyDstReg = MI.getOperand(0).getReg(); + Register CopyDstReg = MI.getOperand(0).getReg(); // Check if any of the uses of the copy are invariant stores. - assert (TargetRegisterInfo::isVirtualRegister(CopyDstReg) && - "copy dst is not a virtual reg"); + assert(Register::isVirtualRegister(CopyDstReg) && + "copy dst is not a virtual reg"); for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) { if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI)) @@ -1010,11 +1009,11 @@ bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; // Don't hoist an instruction that uses or defines a physical register. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, @@ -1061,8 +1060,8 @@ bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const { for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { // A PHI may cause a copy to be inserted. @@ -1104,7 +1103,7 @@ bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, const MachineOperand &MO = UseMI.getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (MOReg != Reg) continue; @@ -1132,8 +1131,8 @@ bool MachineLICMBase::IsCheapInstruction(MachineInstr &MI) const { if (!DefMO.isReg() || !DefMO.isDef()) continue; --NumDefs; - unsigned Reg = DefMO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = DefMO.getReg(); + if (Register::isPhysicalRegister(Reg)) continue; if (!TII->hasLowDefLatency(SchedModel, MI, i)) @@ -1225,8 +1224,8 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || MO.isImplicit()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) { LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI); @@ -1304,7 +1303,7 @@ MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) { MachineFunction &MF = *MI->getMF(); const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); // Ok, we're unfolding. Create a temporary register and do the unfold. - unsigned Reg = MRI->createVirtualRegister(RC); + Register Reg = MRI->createVirtualRegister(RC); SmallVector NewMIs; bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg, @@ -1378,20 +1377,20 @@ bool MachineLICMBase::EliminateCSE(MachineInstr *MI, // Physical registers may not differ here. assert((!MO.isReg() || MO.getReg() == 0 || - !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || + !Register::isPhysicalRegister(MO.getReg()) || MO.getReg() == Dup->getOperand(i).getReg()) && "Instructions with different phys regs are not identical!"); if (MO.isReg() && MO.isDef() && - !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + !Register::isPhysicalRegister(MO.getReg())) Defs.push_back(i); } SmallVector OrigRCs; for (unsigned i = 0, e = Defs.size(); i != e; ++i) { unsigned Idx = Defs[i]; - unsigned Reg = MI->getOperand(Idx).getReg(); - unsigned DupReg = Dup->getOperand(Idx).getReg(); + Register Reg = MI->getOperand(Idx).getReg(); + Register DupReg = Dup->getOperand(Idx).getReg(); OrigRCs.push_back(MRI->getRegClass(DupReg)); if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) { @@ -1403,8 +1402,8 @@ bool MachineLICMBase::EliminateCSE(MachineInstr *MI, } for (unsigned Idx : Defs) { - unsigned Reg = MI->getOperand(Idx).getReg(); - unsigned DupReg = Dup->getOperand(Idx).getReg(); + Register Reg = MI->getOperand(Idx).getReg(); + Register DupReg = Dup->getOperand(Idx).getReg(); MRI->replaceRegWith(Reg, DupReg); MRI->clearKillFlags(DupReg); } diff --git a/lib/CodeGen/MachineLoopUtils.cpp b/lib/CodeGen/MachineLoopUtils.cpp new file mode 100644 index 00000000000..e074b76082f --- /dev/null +++ b/lib/CodeGen/MachineLoopUtils.cpp @@ -0,0 +1,132 @@ +//=- MachineLoopUtils.cpp - Functions for manipulating loops ----------------=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineLoopUtils.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +using namespace llvm; + +namespace { +// MI's parent and BB are clones of each other. Find the equivalent copy of MI +// in BB. +MachineInstr &findEquivalentInstruction(MachineInstr &MI, + MachineBasicBlock *BB) { + MachineBasicBlock *PB = MI.getParent(); + unsigned Offset = std::distance(PB->instr_begin(), MachineBasicBlock::instr_iterator(MI)); + return *std::next(BB->instr_begin(), Offset); +} +} // namespace + +MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction, + MachineBasicBlock *Loop, + MachineRegisterInfo &MRI, + const TargetInstrInfo *TII) { + MachineFunction &MF = *Loop->getParent(); + MachineBasicBlock *Preheader = *Loop->pred_begin(); + if (Preheader == Loop) + Preheader = *std::next(Loop->pred_begin()); + MachineBasicBlock *Exit = *Loop->succ_begin(); + if (Exit == Loop) + Exit = *std::next(Loop->succ_begin()); + + MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(Loop->getBasicBlock()); + if (Direction == LPD_Front) + MF.insert(Loop->getIterator(), NewBB); + else + MF.insert(std::next(Loop->getIterator()), NewBB); + + // FIXME: Add DenseMapInfo trait for Register so we can use it as a key. + DenseMap Remaps; + auto InsertPt = NewBB->end(); + for (MachineInstr &MI : *Loop) { + MachineInstr *NewMI = MF.CloneMachineInstr(&MI); + NewBB->insert(InsertPt, NewMI); + for (MachineOperand &MO : NewMI->defs()) { + Register OrigR = MO.getReg(); + if (OrigR.isPhysical()) + continue; + Register &R = Remaps[OrigR]; + R = MRI.createVirtualRegister(MRI.getRegClass(OrigR)); + MO.setReg(R); + + if (Direction == LPD_Back) { + // Replace all uses outside the original loop with the new register. + // FIXME: is the use_iterator stable enough to mutate register uses + // while iterating? + SmallVector Uses; + for (auto &Use : MRI.use_operands(OrigR)) + if (Use.getParent()->getParent() != Loop) + Uses.push_back(&Use); + for (auto *Use : Uses) { + MRI.constrainRegClass(R, MRI.getRegClass(Use->getReg())); + Use->setReg(R); + } + } + } + } + + for (auto I = NewBB->getFirstNonPHI(); I != NewBB->end(); ++I) + for (MachineOperand &MO : I->uses()) + if (MO.isReg() && Remaps.count(MO.getReg())) + MO.setReg(Remaps[MO.getReg()]); + + for (auto I = NewBB->begin(); I->isPHI(); ++I) { + MachineInstr &MI = *I; + unsigned LoopRegIdx = 3, InitRegIdx = 1; + if (MI.getOperand(2).getMBB() != Preheader) + std::swap(LoopRegIdx, InitRegIdx); + MachineInstr &OrigPhi = findEquivalentInstruction(MI, Loop); + assert(OrigPhi.isPHI()); + if (Direction == LPD_Front) { + // When peeling front, we are only left with the initial value from the + // preheader. + Register R = MI.getOperand(LoopRegIdx).getReg(); + if (Remaps.count(R)) + R = Remaps[R]; + OrigPhi.getOperand(InitRegIdx).setReg(R); + MI.RemoveOperand(LoopRegIdx + 1); + MI.RemoveOperand(LoopRegIdx + 0); + } else { + // When peeling back, the initial value is the loop-carried value from + // the original loop. + Register LoopReg = OrigPhi.getOperand(LoopRegIdx).getReg(); + MI.getOperand(LoopRegIdx).setReg(LoopReg); + MI.RemoveOperand(InitRegIdx + 1); + MI.RemoveOperand(InitRegIdx + 0); + } + } + + DebugLoc DL; + if (Direction == LPD_Front) { + Preheader->replaceSuccessor(Loop, NewBB); + NewBB->addSuccessor(Loop); + Loop->replacePhiUsesWith(Preheader, NewBB); + if (TII->removeBranch(*Preheader) > 0) + TII->insertBranch(*Preheader, NewBB, nullptr, {}, DL); + TII->removeBranch(*NewBB); + TII->insertBranch(*NewBB, Loop, nullptr, {}, DL); + } else { + Loop->replaceSuccessor(Exit, NewBB); + Exit->replacePhiUsesWith(Loop, NewBB); + NewBB->addSuccessor(Exit); + + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector Cond; + bool CanAnalyzeBr = !TII->analyzeBranch(*Loop, TBB, FBB, Cond); + (void)CanAnalyzeBr; + assert(CanAnalyzeBr && "Must be able to analyze the loop branch!"); + TII->removeBranch(*Loop); + TII->insertBranch(*Loop, TBB == Exit ? NewBB : TBB, + FBB == Exit ? NewBB : FBB, Cond, DL); + if (TII->removeBranch(*NewBB) > 0) + TII->insertBranch(*NewBB, Exit, nullptr, {}, DL); + } + + return NewBB; +} diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index aadcd731979..e0b4e9cac22 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -36,11 +36,6 @@ using namespace llvm; using namespace llvm::dwarf; -// Handle the Pass registration stuff necessary to use DataLayout's. -INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo", - "Machine Module Information", false, false) -char MachineModuleInfo::ID = 0; - // Out of line virtual method. MachineModuleInfoImpl::~MachineModuleInfoImpl() = default; @@ -121,7 +116,7 @@ ArrayRef MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { BBCallbacks.back().setMap(this); Entry.Index = BBCallbacks.size() - 1; Entry.Fn = BB->getParent(); - Entry.Symbols.push_back(Context.createTempSymbol()); + Entry.Symbols.push_back(Context.createTempSymbol(!BB->hasAddressTaken())); return Entry.Symbols; } @@ -193,27 +188,15 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { Map->UpdateForRAUWBlock(cast(getValPtr()), cast(V2)); } -MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM) - : ImmutablePass(ID), TM(*TM), - Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(), - TM->getObjFileLowering(), nullptr, false) { - initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); -} - -MachineModuleInfo::~MachineModuleInfo() = default; - -bool MachineModuleInfo::doInitialization(Module &M) { +void MachineModuleInfo::initialize() { ObjFileMMI = nullptr; CurCallSite = 0; UsesMSVCFloatingPoint = UsesMorestackAddr = false; HasSplitStack = HasNosplitStack = false; AddrLabelSymbols = nullptr; - TheModule = &M; - DbgInfoAvailable = !llvm::empty(M.debug_compile_units()); - return false; } -bool MachineModuleInfo::doFinalization(Module &M) { +void MachineModuleInfo::finalize() { Personalities.clear(); delete AddrLabelSymbols; @@ -223,10 +206,30 @@ bool MachineModuleInfo::doFinalization(Module &M) { delete ObjFileMMI; ObjFileMMI = nullptr; - - return false; } +MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI) + : TM(std::move(MMI.TM)), + Context(MMI.TM.getMCAsmInfo(), MMI.TM.getMCRegisterInfo(), + MMI.TM.getObjFileLowering(), nullptr, nullptr, false) { + ObjFileMMI = MMI.ObjFileMMI; + CurCallSite = MMI.CurCallSite; + UsesMSVCFloatingPoint = MMI.UsesMSVCFloatingPoint; + UsesMorestackAddr = MMI.UsesMorestackAddr; + HasSplitStack = MMI.HasSplitStack; + HasNosplitStack = MMI.HasNosplitStack; + AddrLabelSymbols = MMI.AddrLabelSymbols; + TheModule = MMI.TheModule; +} + +MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM) + : TM(*TM), Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(), + TM->getObjFileLowering(), nullptr, nullptr, false) { + initialize(); +} + +MachineModuleInfo::~MachineModuleInfo() { finalize(); } + //===- Address of Block Management ----------------------------------------===// ArrayRef @@ -305,12 +308,13 @@ public: FreeMachineFunction() : FunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); } bool runOnFunction(Function &F) override { - MachineModuleInfo &MMI = getAnalysis(); + MachineModuleInfo &MMI = + getAnalysis().getMMI(); MMI.deleteMachineFunctionFor(F); return true; } @@ -327,3 +331,36 @@ char FreeMachineFunction::ID; FunctionPass *llvm::createFreeMachineFunctionPass() { return new FreeMachineFunction(); } + +MachineModuleInfoWrapperPass::MachineModuleInfoWrapperPass( + const LLVMTargetMachine *TM) + : ImmutablePass(ID), MMI(TM) { + initializeMachineModuleInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +// Handle the Pass registration stuff necessary to use DataLayout's. +INITIALIZE_PASS(MachineModuleInfoWrapperPass, "machinemoduleinfo", + "Machine Module Information", false, false) +char MachineModuleInfoWrapperPass::ID = 0; + +bool MachineModuleInfoWrapperPass::doInitialization(Module &M) { + MMI.initialize(); + MMI.TheModule = &M; + MMI.DbgInfoAvailable = !M.debug_compile_units().empty(); + return false; +} + +bool MachineModuleInfoWrapperPass::doFinalization(Module &M) { + MMI.finalize(); + return false; +} + +AnalysisKey MachineModuleAnalysis::Key; + +MachineModuleInfo MachineModuleAnalysis::run(Module &M, + ModuleAnalysisManager &) { + MachineModuleInfo MMI(TM); + MMI.TheModule = &M; + MMI.DbgInfoAvailable = !M.debug_compile_units().empty(); + return MMI; +} diff --git a/lib/CodeGen/MachineOperand.cpp b/lib/CodeGen/MachineOperand.cpp index 4fa4ea7f6cf..8b19501ec3c 100644 --- a/lib/CodeGen/MachineOperand.cpp +++ b/lib/CodeGen/MachineOperand.cpp @@ -49,7 +49,7 @@ static MachineFunction *getMFIfAvailable(MachineOperand &MO) { getMFIfAvailable(const_cast(MO))); } -void MachineOperand::setReg(unsigned Reg) { +void MachineOperand::setReg(Register Reg) { if (getReg() == Reg) return; // No change. @@ -71,9 +71,9 @@ void MachineOperand::setReg(unsigned Reg) { SmallContents.RegNo = Reg; } -void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx, +void MachineOperand::substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &TRI) { - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Reg.isVirtual()); if (SubIdx && getSubReg()) SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg()); setReg(Reg); @@ -81,8 +81,8 @@ void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx, setSubReg(SubIdx); } -void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); +void MachineOperand::substPhysReg(MCRegister Reg, const TargetRegisterInfo &TRI) { + assert(Reg.isPhysical()); if (getSubReg()) { Reg = TRI.getSubReg(Reg, getSubReg()); // Note that getSubReg() may return 0 if the sub-register doesn't exist. @@ -114,7 +114,7 @@ void MachineOperand::setIsDef(bool Val) { bool MachineOperand::isRenamable() const { assert(isReg() && "Wrong MachineOperand accessor"); - assert(TargetRegisterInfo::isPhysicalRegister(getReg()) && + assert(Register::isPhysicalRegister(getReg()) && "isRenamable should only be checked on physical registers"); if (!IsRenamable) return false; @@ -132,7 +132,7 @@ bool MachineOperand::isRenamable() const { void MachineOperand::setIsRenamable(bool Val) { assert(isReg() && "Wrong MachineOperand accessor"); - assert(TargetRegisterInfo::isPhysicalRegister(getReg()) && + assert(Register::isPhysicalRegister(getReg()) && "setIsRenamable should only be called on physical registers"); IsRenamable = Val; } @@ -169,7 +169,7 @@ void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) { } void MachineOperand::ChangeToES(const char *SymName, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into an external symbol"); @@ -182,7 +182,7 @@ void MachineOperand::ChangeToES(const char *SymName, } void MachineOperand::ChangeToGA(const GlobalValue *GV, int64_t Offset, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into a global address"); @@ -215,7 +215,7 @@ void MachineOperand::ChangeToFrameIndex(int Idx) { } void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into a FrameIndex"); @@ -230,7 +230,7 @@ void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset, /// ChangeToRegister - Replace this operand with a new register operand of /// the specified value. If an operand is known to be an register already, /// the setReg method should be used. -void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, +void MachineOperand::ChangeToRegister(Register Reg, bool isDef, bool isImp, bool isKill, bool isDead, bool isUndef, bool isDebug) { MachineRegisterInfo *RegInfo = nullptr; @@ -333,6 +333,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { return getIntrinsicID() == Other.getIntrinsicID(); case MachineOperand::MO_Predicate: return getPredicate() == Other.getPredicate(); + case MachineOperand::MO_ShuffleMask: + return getShuffleMask() == Other.getShuffleMask(); } llvm_unreachable("Invalid machine operand type"); } @@ -381,6 +383,8 @@ hash_code llvm::hash_value(const MachineOperand &MO) { return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID()); case MachineOperand::MO_Predicate: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate()); + case MachineOperand::MO_ShuffleMask: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getShuffleMask()); } llvm_unreachable("Invalid machine operand type"); } @@ -425,12 +429,10 @@ static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS, return; } - int Reg = TRI->getLLVMRegNum(DwarfReg, true); - if (Reg == -1) { + if (Optional Reg = TRI->getLLVMRegNum(DwarfReg, true)) + OS << printReg(*Reg, TRI); + else OS << ""; - return; - } - OS << printReg(Reg, TRI); } static void printIRBlockReference(raw_ostream &OS, const BasicBlock &BB, @@ -746,7 +748,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, printTargetFlags(OS, *this); switch (getType()) { case MachineOperand::MO_Register: { - unsigned Reg = getReg(); + Register Reg = getReg(); if (isImplicit()) OS << (isDef() ? "implicit-def " : "implicit "); else if (PrintDef && isDef()) @@ -762,13 +764,13 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "undef "; if (isEarlyClobber()) OS << "early-clobber "; - if (TargetRegisterInfo::isPhysicalRegister(getReg()) && isRenamable()) + if (Register::isPhysicalRegister(getReg()) && isRenamable()) OS << "renamable "; // isDebug() is exactly true for register operands of a DBG_VALUE. So we // simply infer it when parsing and do not need to print it. const MachineRegisterInfo *MRI = nullptr; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { if (const MachineFunction *MF = getMFIfAvailable(*this)) { MRI = &MF->getRegInfo(); } @@ -783,7 +785,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << ".subreg" << SubReg; } // Print the register class / bank. - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { if (const MachineFunction *MF = getMFIfAvailable(*this)) { const MachineRegisterInfo &MRI = MF->getRegInfo(); if (IsStandalone || !PrintDef || MRI.def_empty(Reg)) { @@ -936,6 +938,20 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, << CmpInst::getPredicateName(Pred) << ')'; break; } + case MachineOperand::MO_ShuffleMask: + OS << "shufflemask("; + const Constant* C = getShuffleMask(); + const int NumElts = C->getType()->getVectorNumElements(); + + StringRef Separator; + for (int I = 0; I != NumElts; ++I) { + OS << Separator; + C->getAggregateElement(I)->printAsOperand(OS, false, MST); + Separator = ", "; + } + + OS << ')'; + break; } } @@ -963,7 +979,8 @@ bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C, return false; return isDereferenceableAndAlignedPointer( - BasePtr, 1, APInt(DL.getPointerSizeInBits(), Offset + Size), DL); + BasePtr, Align::None(), APInt(DL.getPointerSizeInBits(), Offset + Size), + DL); } /// getConstantPool - Return a MachinePointerInfo record that refers to the @@ -1049,17 +1066,6 @@ uint64_t MachineMemOperand::getAlignment() const { return MinAlign(getBaseAlignment(), getOffset()); } -void MachineMemOperand::print(raw_ostream &OS) const { - ModuleSlotTracker DummyMST(nullptr); - print(OS, DummyMST); -} - -void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const { - SmallVector SSNs; - LLVMContext Ctx; - print(OS, MST, SSNs, Ctx, nullptr, nullptr); -} - void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, SmallVectorImpl &SSNs, const LLVMContext &Context, diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index 27db9106b33..b82403ae1b8 100644 --- a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -76,7 +76,7 @@ bool MachineOptimizationRemarkEmitterPass::runOnMachineFunction( else MBFI = nullptr; - ORE = llvm::make_unique(MF, MBFI); + ORE = std::make_unique(MF, MBFI); return false; } diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp index 80a235aeaa5..8cd66825a58 100644 --- a/lib/CodeGen/MachineOutliner.cpp +++ b/lib/CodeGen/MachineOutliner.cpp @@ -846,8 +846,8 @@ struct MachineOutliner : public ModulePass { StringRef getPassName() const override { return "Machine Outliner"; } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); AU.setPreservesAll(); ModulePass::getAnalysisUsage(AU); } @@ -1128,7 +1128,7 @@ MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF, IRBuilder<> Builder(EntryBB); Builder.CreateRetVoid(); - MachineModuleInfo &MMI = getAnalysis(); + MachineModuleInfo &MMI = getAnalysis().getMMI(); MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock(); const TargetSubtargetInfo &STI = MF.getSubtarget(); @@ -1260,7 +1260,7 @@ bool MachineOutliner::outline(Module &M, true /* isImp = true */)); } if (MI.isCall()) - MI.getMF()->updateCallSiteInfo(&MI); + MI.getMF()->eraseCallSiteInfo(&MI); }; // Copy over the defs in the outlined range. // First inst in outlined range <-- Anything that's defined in this @@ -1303,6 +1303,12 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M, if (F.empty()) continue; + // Disable outlining from noreturn functions right now. Noreturn requires + // special handling for the case where what we are outlining could be a + // tail call. + if (F.hasFnAttribute(Attribute::NoReturn)) + continue; + // There's something in F. Check if it has a MachineFunction associated with // it. MachineFunction *MF = MMI.getMachineFunction(F); @@ -1421,7 +1427,7 @@ bool MachineOutliner::runOnModule(Module &M) { if (M.empty()) return false; - MachineModuleInfo &MMI = getAnalysis(); + MachineModuleInfo &MMI = getAnalysis().getMMI(); // If the user passed -enable-machine-outliner=always or // -enable-machine-outliner, the pass will run on all functions in the module. diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp index 54df522d371..89c9f6093a9 100644 --- a/lib/CodeGen/MachinePipeliner.cpp +++ b/lib/CodeGen/MachinePipeliner.cpp @@ -56,6 +56,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePipeliner.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ModuloSchedule.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" @@ -153,6 +154,17 @@ static cl::opt SwpShowResMask("pipeliner-show-mask", cl::Hidden, static cl::opt SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false)); +static cl::opt EmitTestAnnotations( + "pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), + cl::desc("Instead of emitting the pipelined code, annotate instructions " + "with the generated schedule for feeding into the " + "-modulo-schedule-test pass")); + +static cl::opt ExperimentalCodeGen( + "pipeliner-experimental-cg", cl::Hidden, cl::init(false), + cl::desc( + "Use the experimental peeling code generator for software pipelining")); + namespace llvm { // A command line option to enable the CopyToPhi DAG mutation. @@ -314,7 +326,7 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) { LI.LoopInductionVar = nullptr; LI.LoopCompare = nullptr; - if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare)) { + if (!TII->analyzeLoopForPipelining(L.getTopBlock())) { LLVM_DEBUG( dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n"); NumFailLoop++; @@ -349,7 +361,7 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) { // If the operand uses a subregister, replace it with a new register // without subregisters, and generate a copy to the new register. - unsigned NewReg = MRI.createVirtualRegister(RC); + Register NewReg = MRI.createVirtualRegister(RC); MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB(); MachineBasicBlock::iterator At = PredB.getFirstTerminator(); const DebugLoc &DL = PredB.findDebugLoc(At); @@ -515,14 +527,49 @@ void SwingSchedulerDAG::schedule() { return; } - generatePipelinedLoop(Schedule); + // Generate the schedule as a ModuloSchedule. + DenseMap Cycles, Stages; + std::vector OrderedInsts; + for (int Cycle = Schedule.getFirstCycle(); Cycle <= Schedule.getFinalCycle(); + ++Cycle) { + for (SUnit *SU : Schedule.getInstructions(Cycle)) { + OrderedInsts.push_back(SU->getInstr()); + Cycles[SU->getInstr()] = Cycle; + Stages[SU->getInstr()] = Schedule.stageScheduled(SU); + } + } + DenseMap> NewInstrChanges; + for (auto &KV : NewMIs) { + Cycles[KV.first] = Cycles[KV.second]; + Stages[KV.first] = Stages[KV.second]; + NewInstrChanges[KV.first] = InstrChanges[getSUnit(KV.first)]; + } + + ModuloSchedule MS(MF, &Loop, std::move(OrderedInsts), std::move(Cycles), + std::move(Stages)); + if (EmitTestAnnotations) { + assert(NewInstrChanges.empty() && + "Cannot serialize a schedule with InstrChanges!"); + ModuloScheduleTestAnnotater MSTI(MF, MS); + MSTI.annotate(); + return; + } + // The experimental code generator can't work if there are InstChanges. + if (ExperimentalCodeGen && NewInstrChanges.empty()) { + PeelingModuloScheduleExpander MSE(MF, MS, &LIS); + MSE.expand(); + } else { + ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges)); + MSE.expand(); + MSE.cleanup(); + } ++NumPipelined; } /// Clean up after the software pipeliner runs. void SwingSchedulerDAG::finishBlock() { - for (MachineInstr *I : NewMIs) - MF.DeleteMachineInstr(I); + for (auto &KV : NewMIs) + MF.DeleteMachineInstr(KV.second); NewMIs.clear(); // Call the superclass. @@ -546,14 +593,6 @@ static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure."); } -/// Return the Phi register value that comes from the incoming block. -static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { - for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) - if (Phi.getOperand(i + 1).getMBB() != LoopBB) - return Phi.getOperand(i).getReg(); - return 0; -} - /// Return the Phi register value that comes the loop block. static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) @@ -658,7 +697,7 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) { if (BaseOp1->isIdenticalTo(*BaseOp2) && (int)Offset1 < (int)Offset2) { - assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) && + assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) && "What happened to the chain edge?"); SDep Dep(Load, SDep::Barrier); Dep.setLatency(1); @@ -730,7 +769,7 @@ void SwingSchedulerDAG::updatePhiDependences() { MOI != MOE; ++MOI) { if (!MOI->isReg()) continue; - unsigned Reg = MOI->getReg(); + Register Reg = MOI->getReg(); if (MOI->isDef()) { // If the register is used by a Phi, then create an anti dependence. for (MachineRegisterInfo::use_instr_iterator @@ -809,7 +848,7 @@ void SwingSchedulerDAG::changeDependences() { continue; // Get the MI and SUnit for the instruction that defines the original base. - unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg(); + Register OrigBase = I.getInstr()->getOperand(BasePos).getReg(); MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase); if (!DefMI) continue; @@ -958,7 +997,7 @@ struct FuncUnitSorter { unsigned F1 = 0, F2 = 0; unsigned MFUs1 = minFuncUnits(IS1, F1); unsigned MFUs2 = minFuncUnits(IS2, F2); - if (MFUs1 == 1 && MFUs2 == 1) + if (MFUs1 == MFUs2) return Resources.lookup(F1) < Resources.lookup(F2); return MFUs1 > MFUs2; } @@ -1514,8 +1553,8 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, continue; for (const MachineOperand &MO : MI->operands()) if (MO.isReg() && MO.isUse()) { - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (Register::isVirtualRegister(Reg)) Uses.insert(Reg); else if (MRI.isAllocatable(Reg)) for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) @@ -1525,8 +1564,8 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, for (SUnit *SU : NS) for (const MachineOperand &MO : SU->getInstr()->operands()) if (MO.isReg() && MO.isDef() && !MO.isDead()) { - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isVirtualRegister(Reg)) { if (!Uses.count(Reg)) LiveOutRegs.push_back(RegisterMaskPair(Reg, LaneBitmask::getNone())); @@ -2012,836 +2051,6 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { return scheduleFound && Schedule.getMaxStageCount() > 0; } -/// Given a schedule for the loop, generate a new version of the loop, -/// and replace the old version. This function generates a prolog -/// that contains the initial iterations in the pipeline, and kernel -/// loop, and the epilogue that contains the code for the final -/// iterations. -void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) { - // Create a new basic block for the kernel and add it to the CFG. - MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); - - unsigned MaxStageCount = Schedule.getMaxStageCount(); - - // Remember the registers that are used in different stages. The index is - // the iteration, or stage, that the instruction is scheduled in. This is - // a map between register names in the original block and the names created - // in each stage of the pipelined loop. - ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2]; - InstrMapTy InstrMap; - - SmallVector PrologBBs; - - MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader(); - assert(PreheaderBB != nullptr && - "Need to add code to handle loops w/o preheader"); - // Generate the prolog instructions that set up the pipeline. - generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs); - MF.insert(BB->getIterator(), KernelBB); - - // Rearrange the instructions to generate the new, pipelined loop, - // and update register names as needed. - for (int Cycle = Schedule.getFirstCycle(), - LastCycle = Schedule.getFinalCycle(); - Cycle <= LastCycle; ++Cycle) { - std::deque &CycleInstrs = Schedule.getInstructions(Cycle); - // This inner loop schedules each instruction in the cycle. - for (SUnit *CI : CycleInstrs) { - if (CI->getInstr()->isPHI()) - continue; - unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr())); - MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum); - updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap); - KernelBB->push_back(NewMI); - InstrMap[NewMI] = CI->getInstr(); - } - } - - // Copy any terminator instructions to the new kernel, and update - // names as needed. - for (MachineBasicBlock::iterator I = BB->getFirstTerminator(), - E = BB->instr_end(); - I != E; ++I) { - MachineInstr *NewMI = MF.CloneMachineInstr(&*I); - updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap); - KernelBB->push_back(NewMI); - InstrMap[NewMI] = &*I; - } - - KernelBB->transferSuccessors(BB); - KernelBB->replaceSuccessor(BB, KernelBB); - - generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, - VRMap, InstrMap, MaxStageCount, MaxStageCount, false); - generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap, - InstrMap, MaxStageCount, MaxStageCount, false); - - LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump();); - - SmallVector EpilogBBs; - // Generate the epilog instructions to complete the pipeline. - generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs, - PrologBBs); - - // We need this step because the register allocation doesn't handle some - // situations well, so we insert copies to help out. - splitLifetimes(KernelBB, EpilogBBs, Schedule); - - // Remove dead instructions due to loop induction variables. - removeDeadInstructions(KernelBB, EpilogBBs); - - // Add branches between prolog and epilog blocks. - addBranches(*PreheaderBB, PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap); - - // Remove the original loop since it's no longer referenced. - for (auto &I : *BB) - LIS.RemoveMachineInstrFromMaps(I); - BB->clear(); - BB->eraseFromParent(); - - delete[] VRMap; -} - -/// Generate the pipeline prolog code. -void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage, - MachineBasicBlock *KernelBB, - ValueMapTy *VRMap, - MBBVectorTy &PrologBBs) { - MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader(); - assert(PreheaderBB != nullptr && - "Need to add code to handle loops w/o preheader"); - MachineBasicBlock *PredBB = PreheaderBB; - InstrMapTy InstrMap; - - // Generate a basic block for each stage, not including the last stage, - // which will be generated in the kernel. Each basic block may contain - // instructions from multiple stages/iterations. - for (unsigned i = 0; i < LastStage; ++i) { - // Create and insert the prolog basic block prior to the original loop - // basic block. The original loop is removed later. - MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); - PrologBBs.push_back(NewBB); - MF.insert(BB->getIterator(), NewBB); - NewBB->transferSuccessors(PredBB); - PredBB->addSuccessor(NewBB); - PredBB = NewBB; - - // Generate instructions for each appropriate stage. Process instructions - // in original program order. - for (int StageNum = i; StageNum >= 0; --StageNum) { - for (MachineBasicBlock::iterator BBI = BB->instr_begin(), - BBE = BB->getFirstTerminator(); - BBI != BBE; ++BBI) { - if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) { - if (BBI->isPHI()) - continue; - MachineInstr *NewMI = - cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule); - updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule, - VRMap); - NewBB->push_back(NewMI); - InstrMap[NewMI] = &*BBI; - } - } - } - rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap); - LLVM_DEBUG({ - dbgs() << "prolog:\n"; - NewBB->dump(); - }); - } - - PredBB->replaceSuccessor(BB, KernelBB); - - // Check if we need to remove the branch from the preheader to the original - // loop, and replace it with a branch to the new loop. - unsigned numBranches = TII->removeBranch(*PreheaderBB); - if (numBranches) { - SmallVector Cond; - TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc()); - } -} - -/// Generate the pipeline epilog code. The epilog code finishes the iterations -/// that were started in either the prolog or the kernel. We create a basic -/// block for each stage that needs to complete. -void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage, - MachineBasicBlock *KernelBB, - ValueMapTy *VRMap, - MBBVectorTy &EpilogBBs, - MBBVectorTy &PrologBBs) { - // We need to change the branch from the kernel to the first epilog block, so - // this call to analyze branch uses the kernel rather than the original BB. - MachineBasicBlock *TBB = nullptr, *FBB = nullptr; - SmallVector Cond; - bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond); - assert(!checkBranch && "generateEpilog must be able to analyze the branch"); - if (checkBranch) - return; - - MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin(); - if (*LoopExitI == KernelBB) - ++LoopExitI; - assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor"); - MachineBasicBlock *LoopExitBB = *LoopExitI; - - MachineBasicBlock *PredBB = KernelBB; - MachineBasicBlock *EpilogStart = LoopExitBB; - InstrMapTy InstrMap; - - // Generate a basic block for each stage, not including the last stage, - // which was generated for the kernel. Each basic block may contain - // instructions from multiple stages/iterations. - int EpilogStage = LastStage + 1; - for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) { - MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(); - EpilogBBs.push_back(NewBB); - MF.insert(BB->getIterator(), NewBB); - - PredBB->replaceSuccessor(LoopExitBB, NewBB); - NewBB->addSuccessor(LoopExitBB); - - if (EpilogStart == LoopExitBB) - EpilogStart = NewBB; - - // Add instructions to the epilog depending on the current block. - // Process instructions in original program order. - for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) { - for (auto &BBI : *BB) { - if (BBI.isPHI()) - continue; - MachineInstr *In = &BBI; - if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) { - // Instructions with memoperands in the epilog are updated with - // conservative values. - MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0); - updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap); - NewBB->push_back(NewMI); - InstrMap[NewMI] = In; - } - } - } - generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, - VRMap, InstrMap, LastStage, EpilogStage, i == 1); - generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap, - InstrMap, LastStage, EpilogStage, i == 1); - PredBB = NewBB; - - LLVM_DEBUG({ - dbgs() << "epilog:\n"; - NewBB->dump(); - }); - } - - // Fix any Phi nodes in the loop exit block. - for (MachineInstr &MI : *LoopExitBB) { - if (!MI.isPHI()) - break; - for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) { - MachineOperand &MO = MI.getOperand(i); - if (MO.getMBB() == BB) - MO.setMBB(PredBB); - } - } - - // Create a branch to the new epilog from the kernel. - // Remove the original branch and add a new branch to the epilog. - TII->removeBranch(*KernelBB); - TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc()); - // Add a branch to the loop exit. - if (EpilogBBs.size() > 0) { - MachineBasicBlock *LastEpilogBB = EpilogBBs.back(); - SmallVector Cond1; - TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc()); - } -} - -/// Replace all uses of FromReg that appear outside the specified -/// basic block with ToReg. -static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, - MachineBasicBlock *MBB, - MachineRegisterInfo &MRI, - LiveIntervals &LIS) { - for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg), - E = MRI.use_end(); - I != E;) { - MachineOperand &O = *I; - ++I; - if (O.getParent()->getParent() != MBB) - O.setReg(ToReg); - } - if (!LIS.hasInterval(ToReg)) - LIS.createEmptyInterval(ToReg); -} - -/// Return true if the register has a use that occurs outside the -/// specified loop. -static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, - MachineRegisterInfo &MRI) { - for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), - E = MRI.use_end(); - I != E; ++I) - if (I->getParent()->getParent() != BB) - return true; - return false; -} - -/// Generate Phis for the specific block in the generated pipelined code. -/// This function looks at the Phis from the original code to guide the -/// creation of new Phis. -void SwingSchedulerDAG::generateExistingPhis( - MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, - MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap, - InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum, - bool IsLast) { - // Compute the stage number for the initial value of the Phi, which - // comes from the prolog. The prolog to use depends on to which kernel/ - // epilog that we're adding the Phi. - unsigned PrologStage = 0; - unsigned PrevStage = 0; - bool InKernel = (LastStageNum == CurStageNum); - if (InKernel) { - PrologStage = LastStageNum - 1; - PrevStage = CurStageNum; - } else { - PrologStage = LastStageNum - (CurStageNum - LastStageNum); - PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1; - } - - for (MachineBasicBlock::iterator BBI = BB->instr_begin(), - BBE = BB->getFirstNonPHI(); - BBI != BBE; ++BBI) { - unsigned Def = BBI->getOperand(0).getReg(); - - unsigned InitVal = 0; - unsigned LoopVal = 0; - getPhiRegs(*BBI, BB, InitVal, LoopVal); - - unsigned PhiOp1 = 0; - // The Phi value from the loop body typically is defined in the loop, but - // not always. So, we need to check if the value is defined in the loop. - unsigned PhiOp2 = LoopVal; - if (VRMap[LastStageNum].count(LoopVal)) - PhiOp2 = VRMap[LastStageNum][LoopVal]; - - int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI)); - int LoopValStage = - Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal))); - unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum); - if (NumStages == 0) { - // We don't need to generate a Phi anymore, but we need to rename any uses - // of the Phi value. - unsigned NewReg = VRMap[PrevStage][LoopVal]; - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI, - Def, InitVal, NewReg); - if (VRMap[CurStageNum].count(LoopVal)) - VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal]; - } - // Adjust the number of Phis needed depending on the number of prologs left, - // and the distance from where the Phi is first scheduled. The number of - // Phis cannot exceed the number of prolog stages. Each stage can - // potentially define two values. - unsigned MaxPhis = PrologStage + 2; - if (!InKernel && (int)PrologStage <= LoopValStage) - MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1); - unsigned NumPhis = std::min(NumStages, MaxPhis); - - unsigned NewReg = 0; - unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled; - // In the epilog, we may need to look back one stage to get the correct - // Phi name because the epilog and prolog blocks execute the same stage. - // The correct name is from the previous block only when the Phi has - // been completely scheduled prior to the epilog, and Phi value is not - // needed in multiple stages. - int StageDiff = 0; - if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 && - NumPhis == 1) - StageDiff = 1; - // Adjust the computations below when the phi and the loop definition - // are scheduled in different stages. - if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage) - StageDiff = StageScheduled - LoopValStage; - for (unsigned np = 0; np < NumPhis; ++np) { - // If the Phi hasn't been scheduled, then use the initial Phi operand - // value. Otherwise, use the scheduled version of the instruction. This - // is a little complicated when a Phi references another Phi. - if (np > PrologStage || StageScheduled >= (int)LastStageNum) - PhiOp1 = InitVal; - // Check if the Phi has already been scheduled in a prolog stage. - else if (PrologStage >= AccessStage + StageDiff + np && - VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0) - PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal]; - // Check if the Phi has already been scheduled, but the loop instruction - // is either another Phi, or doesn't occur in the loop. - else if (PrologStage >= AccessStage + StageDiff + np) { - // If the Phi references another Phi, we need to examine the other - // Phi to get the correct value. - PhiOp1 = LoopVal; - MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1); - int Indirects = 1; - while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) { - int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1)); - if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects) - PhiOp1 = getInitPhiReg(*InstOp1, BB); - else - PhiOp1 = getLoopPhiReg(*InstOp1, BB); - InstOp1 = MRI.getVRegDef(PhiOp1); - int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1)); - int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0); - if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np && - VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) { - PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1]; - break; - } - ++Indirects; - } - } else - PhiOp1 = InitVal; - // If this references a generated Phi in the kernel, get the Phi operand - // from the incoming block. - if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) - if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) - PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); - - MachineInstr *PhiInst = MRI.getVRegDef(LoopVal); - bool LoopDefIsPhi = PhiInst && PhiInst->isPHI(); - // In the epilog, a map lookup is needed to get the value from the kernel, - // or previous epilog block. How is does this depends on if the - // instruction is scheduled in the previous block. - if (!InKernel) { - int StageDiffAdj = 0; - if (LoopValStage != -1 && StageScheduled > LoopValStage) - StageDiffAdj = StageScheduled - LoopValStage; - // Use the loop value defined in the kernel, unless the kernel - // contains the last definition of the Phi. - if (np == 0 && PrevStage == LastStageNum && - (StageScheduled != 0 || LoopValStage != 0) && - VRMap[PrevStage - StageDiffAdj].count(LoopVal)) - PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal]; - // Use the value defined by the Phi. We add one because we switch - // from looking at the loop value to the Phi definition. - else if (np > 0 && PrevStage == LastStageNum && - VRMap[PrevStage - np + 1].count(Def)) - PhiOp2 = VRMap[PrevStage - np + 1][Def]; - // Use the loop value defined in the kernel. - else if (static_cast(LoopValStage) > PrologStage + 1 && - VRMap[PrevStage - StageDiffAdj - np].count(LoopVal)) - PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal]; - // Use the value defined by the Phi, unless we're generating the first - // epilog and the Phi refers to a Phi in a different stage. - else if (VRMap[PrevStage - np].count(Def) && - (!LoopDefIsPhi || (PrevStage != LastStageNum) || (LoopValStage == StageScheduled))) - PhiOp2 = VRMap[PrevStage - np][Def]; - } - - // Check if we can reuse an existing Phi. This occurs when a Phi - // references another Phi, and the other Phi is scheduled in an - // earlier stage. We can try to reuse an existing Phi up until the last - // stage of the current Phi. - if (LoopDefIsPhi) { - if (static_cast(PrologStage - np) >= StageScheduled) { - int LVNumStages = Schedule.getStagesForPhi(LoopVal); - int StageDiff = (StageScheduled - LoopValStage); - LVNumStages -= StageDiff; - // Make sure the loop value Phi has been processed already. - if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) { - NewReg = PhiOp2; - unsigned ReuseStage = CurStageNum; - if (Schedule.isLoopCarried(this, *PhiInst)) - ReuseStage -= LVNumStages; - // Check if the Phi to reuse has been generated yet. If not, then - // there is nothing to reuse. - if (VRMap[ReuseStage - np].count(LoopVal)) { - NewReg = VRMap[ReuseStage - np][LoopVal]; - - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, - &*BBI, Def, NewReg); - // Update the map with the new Phi name. - VRMap[CurStageNum - np][Def] = NewReg; - PhiOp2 = NewReg; - if (VRMap[LastStageNum - np - 1].count(LoopVal)) - PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal]; - - if (IsLast && np == NumPhis - 1) - replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); - continue; - } - } - } - if (InKernel && StageDiff > 0 && - VRMap[CurStageNum - StageDiff - np].count(LoopVal)) - PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal]; - } - - const TargetRegisterClass *RC = MRI.getRegClass(Def); - NewReg = MRI.createVirtualRegister(RC); - - MachineInstrBuilder NewPhi = - BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(), - TII->get(TargetOpcode::PHI), NewReg); - NewPhi.addReg(PhiOp1).addMBB(BB1); - NewPhi.addReg(PhiOp2).addMBB(BB2); - if (np == 0) - InstrMap[NewPhi] = &*BBI; - - // We define the Phis after creating the new pipelined code, so - // we need to rename the Phi values in scheduled instructions. - - unsigned PrevReg = 0; - if (InKernel && VRMap[PrevStage - np].count(LoopVal)) - PrevReg = VRMap[PrevStage - np][LoopVal]; - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI, - Def, NewReg, PrevReg); - // If the Phi has been scheduled, use the new name for rewriting. - if (VRMap[CurStageNum - np].count(Def)) { - unsigned R = VRMap[CurStageNum - np][Def]; - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI, - R, NewReg); - } - - // Check if we need to rename any uses that occurs after the loop. The - // register to replace depends on whether the Phi is scheduled in the - // epilog. - if (IsLast && np == NumPhis - 1) - replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); - - // In the kernel, a dependent Phi uses the value from this Phi. - if (InKernel) - PhiOp2 = NewReg; - - // Update the map with the new Phi name. - VRMap[CurStageNum - np][Def] = NewReg; - } - - while (NumPhis++ < NumStages) { - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis, - &*BBI, Def, NewReg, 0); - } - - // Check if we need to rename a Phi that has been eliminated due to - // scheduling. - if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal)) - replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS); - } -} - -/// Generate Phis for the specified block in the generated pipelined code. -/// These are new Phis needed because the definition is scheduled after the -/// use in the pipelined sequence. -void SwingSchedulerDAG::generatePhis( - MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, - MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap, - InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum, - bool IsLast) { - // Compute the stage number that contains the initial Phi value, and - // the Phi from the previous stage. - unsigned PrologStage = 0; - unsigned PrevStage = 0; - unsigned StageDiff = CurStageNum - LastStageNum; - bool InKernel = (StageDiff == 0); - if (InKernel) { - PrologStage = LastStageNum - 1; - PrevStage = CurStageNum; - } else { - PrologStage = LastStageNum - StageDiff; - PrevStage = LastStageNum + StageDiff - 1; - } - - for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(), - BBE = BB->instr_end(); - BBI != BBE; ++BBI) { - for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = BBI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; - - int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI)); - assert(StageScheduled != -1 && "Expecting scheduled instruction."); - unsigned Def = MO.getReg(); - unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum); - // An instruction scheduled in stage 0 and is used after the loop - // requires a phi in the epilog for the last definition from either - // the kernel or prolog. - if (!InKernel && NumPhis == 0 && StageScheduled == 0 && - hasUseAfterLoop(Def, BB, MRI)) - NumPhis = 1; - if (!InKernel && (unsigned)StageScheduled > PrologStage) - continue; - - unsigned PhiOp2 = VRMap[PrevStage][Def]; - if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2)) - if (InstOp2->isPHI() && InstOp2->getParent() == NewBB) - PhiOp2 = getLoopPhiReg(*InstOp2, BB2); - // The number of Phis can't exceed the number of prolog stages. The - // prolog stage number is zero based. - if (NumPhis > PrologStage + 1 - StageScheduled) - NumPhis = PrologStage + 1 - StageScheduled; - for (unsigned np = 0; np < NumPhis; ++np) { - unsigned PhiOp1 = VRMap[PrologStage][Def]; - if (np <= PrologStage) - PhiOp1 = VRMap[PrologStage - np][Def]; - if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) { - if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) - PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); - if (InstOp1->isPHI() && InstOp1->getParent() == NewBB) - PhiOp1 = getInitPhiReg(*InstOp1, NewBB); - } - if (!InKernel) - PhiOp2 = VRMap[PrevStage - np][Def]; - - const TargetRegisterClass *RC = MRI.getRegClass(Def); - unsigned NewReg = MRI.createVirtualRegister(RC); - - MachineInstrBuilder NewPhi = - BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(), - TII->get(TargetOpcode::PHI), NewReg); - NewPhi.addReg(PhiOp1).addMBB(BB1); - NewPhi.addReg(PhiOp2).addMBB(BB2); - if (np == 0) - InstrMap[NewPhi] = &*BBI; - - // Rewrite uses and update the map. The actions depend upon whether - // we generating code for the kernel or epilog blocks. - if (InKernel) { - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, - &*BBI, PhiOp1, NewReg); - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, - &*BBI, PhiOp2, NewReg); - - PhiOp2 = NewReg; - VRMap[PrevStage - np - 1][Def] = NewReg; - } else { - VRMap[CurStageNum - np][Def] = NewReg; - if (np == NumPhis - 1) - rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, - &*BBI, Def, NewReg); - } - if (IsLast && np == NumPhis - 1) - replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); - } - } - } -} - -/// Remove instructions that generate values with no uses. -/// Typically, these are induction variable operations that generate values -/// used in the loop itself. A dead instruction has a definition with -/// no uses, or uses that occur in the original loop only. -void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB, - MBBVectorTy &EpilogBBs) { - // For each epilog block, check that the value defined by each instruction - // is used. If not, delete it. - for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(), - MBE = EpilogBBs.rend(); - MBB != MBE; ++MBB) - for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(), - ME = (*MBB)->instr_rend(); - MI != ME;) { - // From DeadMachineInstructionElem. Don't delete inline assembly. - if (MI->isInlineAsm()) { - ++MI; - continue; - } - bool SawStore = false; - // Check if it's safe to remove the instruction due to side effects. - // We can, and want to, remove Phis here. - if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) { - ++MI; - continue; - } - bool used = true; - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); - MOI != MOE; ++MOI) { - if (!MOI->isReg() || !MOI->isDef()) - continue; - unsigned reg = MOI->getReg(); - // Assume physical registers are used, unless they are marked dead. - if (TargetRegisterInfo::isPhysicalRegister(reg)) { - used = !MOI->isDead(); - if (used) - break; - continue; - } - unsigned realUses = 0; - for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg), - EI = MRI.use_end(); - UI != EI; ++UI) { - // Check if there are any uses that occur only in the original - // loop. If so, that's not a real use. - if (UI->getParent()->getParent() != BB) { - realUses++; - used = true; - break; - } - } - if (realUses > 0) - break; - used = false; - } - if (!used) { - LIS.RemoveMachineInstrFromMaps(*MI); - MI++->eraseFromParent(); - continue; - } - ++MI; - } - // In the kernel block, check if we can remove a Phi that generates a value - // used in an instruction removed in the epilog block. - for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(), - BBE = KernelBB->getFirstNonPHI(); - BBI != BBE;) { - MachineInstr *MI = &*BBI; - ++BBI; - unsigned reg = MI->getOperand(0).getReg(); - if (MRI.use_begin(reg) == MRI.use_end()) { - LIS.RemoveMachineInstrFromMaps(*MI); - MI->eraseFromParent(); - } - } -} - -/// For loop carried definitions, we split the lifetime of a virtual register -/// that has uses past the definition in the next iteration. A copy with a new -/// virtual register is inserted before the definition, which helps with -/// generating a better register assignment. -/// -/// v1 = phi(a, v2) v1 = phi(a, v2) -/// v2 = phi(b, v3) v2 = phi(b, v3) -/// v3 = .. v4 = copy v1 -/// .. = V1 v3 = .. -/// .. = v4 -void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB, - MBBVectorTy &EpilogBBs, - SMSchedule &Schedule) { - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - for (auto &PHI : KernelBB->phis()) { - unsigned Def = PHI.getOperand(0).getReg(); - // Check for any Phi definition that used as an operand of another Phi - // in the same block. - for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def), - E = MRI.use_instr_end(); - I != E; ++I) { - if (I->isPHI() && I->getParent() == KernelBB) { - // Get the loop carried definition. - unsigned LCDef = getLoopPhiReg(PHI, KernelBB); - if (!LCDef) - continue; - MachineInstr *MI = MRI.getVRegDef(LCDef); - if (!MI || MI->getParent() != KernelBB || MI->isPHI()) - continue; - // Search through the rest of the block looking for uses of the Phi - // definition. If one occurs, then split the lifetime. - unsigned SplitReg = 0; - for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI), - KernelBB->instr_end())) - if (BBJ.readsRegister(Def)) { - // We split the lifetime when we find the first use. - if (SplitReg == 0) { - SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def)); - BuildMI(*KernelBB, MI, MI->getDebugLoc(), - TII->get(TargetOpcode::COPY), SplitReg) - .addReg(Def); - } - BBJ.substituteRegister(Def, SplitReg, 0, *TRI); - } - if (!SplitReg) - continue; - // Search through each of the epilog blocks for any uses to be renamed. - for (auto &Epilog : EpilogBBs) - for (auto &I : *Epilog) - if (I.readsRegister(Def)) - I.substituteRegister(Def, SplitReg, 0, *TRI); - break; - } - } - } -} - -/// Remove the incoming block from the Phis in a basic block. -static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) { - for (MachineInstr &MI : *BB) { - if (!MI.isPHI()) - break; - for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) - if (MI.getOperand(i + 1).getMBB() == Incoming) { - MI.RemoveOperand(i + 1); - MI.RemoveOperand(i); - break; - } - } -} - -/// Create branches from each prolog basic block to the appropriate epilog -/// block. These edges are needed if the loop ends before reaching the -/// kernel. -void SwingSchedulerDAG::addBranches(MachineBasicBlock &PreheaderBB, - MBBVectorTy &PrologBBs, - MachineBasicBlock *KernelBB, - MBBVectorTy &EpilogBBs, - SMSchedule &Schedule, ValueMapTy *VRMap) { - assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch"); - MachineInstr *IndVar = Pass.LI.LoopInductionVar; - MachineInstr *Cmp = Pass.LI.LoopCompare; - MachineBasicBlock *LastPro = KernelBB; - MachineBasicBlock *LastEpi = KernelBB; - - // Start from the blocks connected to the kernel and work "out" - // to the first prolog and the last epilog blocks. - SmallVector PrevInsts; - unsigned MaxIter = PrologBBs.size() - 1; - unsigned LC = UINT_MAX; - unsigned LCMin = UINT_MAX; - for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) { - // Add branches to the prolog that go to the corresponding - // epilog, and the fall-thru prolog/kernel block. - MachineBasicBlock *Prolog = PrologBBs[j]; - MachineBasicBlock *Epilog = EpilogBBs[i]; - // We've executed one iteration, so decrement the loop count and check for - // the loop end. - SmallVector Cond; - // Check if the LOOP0 has already been removed. If so, then there is no need - // to reduce the trip count. - if (LC != 0) - LC = TII->reduceLoopCount(*Prolog, PreheaderBB, IndVar, *Cmp, Cond, - PrevInsts, j, MaxIter); - - // Record the value of the first trip count, which is used to determine if - // branches and blocks can be removed for constant trip counts. - if (LCMin == UINT_MAX) - LCMin = LC; - - unsigned numAdded = 0; - if (TargetRegisterInfo::isVirtualRegister(LC)) { - Prolog->addSuccessor(Epilog); - numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc()); - } else if (j >= LCMin) { - Prolog->addSuccessor(Epilog); - Prolog->removeSuccessor(LastPro); - LastEpi->removeSuccessor(Epilog); - numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc()); - removePhis(Epilog, LastEpi); - // Remove the blocks that are no longer referenced. - if (LastPro != LastEpi) { - LastEpi->clear(); - LastEpi->eraseFromParent(); - } - LastPro->clear(); - LastPro->eraseFromParent(); - } else { - numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc()); - removePhis(Epilog, Prolog); - } - LastPro = Prolog; - LastEpi = Epilog; - for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(), - E = Prolog->instr_rend(); - I != E && numAdded > 0; ++I, --numAdded) - updateInstruction(&*I, false, j, 0, Schedule, VRMap); - } -} - /// Return true if we can compute the amount the instruction changes /// during each iteration. Set Delta to the amount of the change. bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) { @@ -2854,7 +2063,7 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) { if (!BaseOp->isReg()) return false; - unsigned BaseReg = BaseOp->getReg(); + Register BaseReg = BaseOp->getReg(); MachineRegisterInfo &MRI = MF.getRegInfo(); // Check if there is a Phi. If so, get the definition in the loop. @@ -2874,261 +2083,6 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) { return true; } -/// Update the memory operand with a new offset when the pipeliner -/// generates a new copy of the instruction that refers to a -/// different memory location. -void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI, - MachineInstr &OldMI, unsigned Num) { - if (Num == 0) - return; - // If the instruction has memory operands, then adjust the offset - // when the instruction appears in different stages. - if (NewMI.memoperands_empty()) - return; - SmallVector NewMMOs; - for (MachineMemOperand *MMO : NewMI.memoperands()) { - // TODO: Figure out whether isAtomic is really necessary (see D57601). - if (MMO->isVolatile() || MMO->isAtomic() || - (MMO->isInvariant() && MMO->isDereferenceable()) || - (!MMO->getValue())) { - NewMMOs.push_back(MMO); - continue; - } - unsigned Delta; - if (Num != UINT_MAX && computeDelta(OldMI, Delta)) { - int64_t AdjOffset = Delta * Num; - NewMMOs.push_back( - MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize())); - } else { - NewMMOs.push_back( - MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize)); - } - } - NewMI.setMemRefs(MF, NewMMOs); -} - -/// Clone the instruction for the new pipelined loop and update the -/// memory operands, if needed. -MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI, - unsigned CurStageNum, - unsigned InstStageNum) { - MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); - // Check for tied operands in inline asm instructions. This should be handled - // elsewhere, but I'm not sure of the best solution. - if (OldMI->isInlineAsm()) - for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) { - const auto &MO = OldMI->getOperand(i); - if (MO.isReg() && MO.isUse()) - break; - unsigned UseIdx; - if (OldMI->isRegTiedToUseOperand(i, &UseIdx)) - NewMI->tieOperands(i, UseIdx); - } - updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); - return NewMI; -} - -/// Clone the instruction for the new pipelined loop. If needed, this -/// function updates the instruction using the values saved in the -/// InstrChanges structure. -MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI, - unsigned CurStageNum, - unsigned InstStageNum, - SMSchedule &Schedule) { - MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); - DenseMap>::iterator It = - InstrChanges.find(getSUnit(OldMI)); - if (It != InstrChanges.end()) { - std::pair RegAndOffset = It->second; - unsigned BasePos, OffsetPos; - if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos)) - return nullptr; - int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm(); - MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first); - if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum) - NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum); - NewMI->getOperand(OffsetPos).setImm(NewOffset); - } - updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); - return NewMI; -} - -/// Update the machine instruction with new virtual registers. This -/// function may change the defintions and/or uses. -void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef, - unsigned CurStageNum, - unsigned InstrStageNum, - SMSchedule &Schedule, - ValueMapTy *VRMap) { - for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = NewMI->getOperand(i); - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; - unsigned reg = MO.getReg(); - if (MO.isDef()) { - // Create a new virtual register for the definition. - const TargetRegisterClass *RC = MRI.getRegClass(reg); - unsigned NewReg = MRI.createVirtualRegister(RC); - MO.setReg(NewReg); - VRMap[CurStageNum][reg] = NewReg; - if (LastDef) - replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS); - } else if (MO.isUse()) { - MachineInstr *Def = MRI.getVRegDef(reg); - // Compute the stage that contains the last definition for instruction. - int DefStageNum = Schedule.stageScheduled(getSUnit(Def)); - unsigned StageNum = CurStageNum; - if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) { - // Compute the difference in stages between the defintion and the use. - unsigned StageDiff = (InstrStageNum - DefStageNum); - // Make an adjustment to get the last definition. - StageNum -= StageDiff; - } - if (VRMap[StageNum].count(reg)) - MO.setReg(VRMap[StageNum][reg]); - } - } -} - -/// Return the instruction in the loop that defines the register. -/// If the definition is a Phi, then follow the Phi operand to -/// the instruction in the loop. -MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) { - SmallPtrSet Visited; - MachineInstr *Def = MRI.getVRegDef(Reg); - while (Def->isPHI()) { - if (!Visited.insert(Def).second) - break; - for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) - if (Def->getOperand(i + 1).getMBB() == BB) { - Def = MRI.getVRegDef(Def->getOperand(i).getReg()); - break; - } - } - return Def; -} - -/// Return the new name for the value from the previous stage. -unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage, - unsigned LoopVal, unsigned LoopStage, - ValueMapTy *VRMap, - MachineBasicBlock *BB) { - unsigned PrevVal = 0; - if (StageNum > PhiStage) { - MachineInstr *LoopInst = MRI.getVRegDef(LoopVal); - if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal)) - // The name is defined in the previous stage. - PrevVal = VRMap[StageNum - 1][LoopVal]; - else if (VRMap[StageNum].count(LoopVal)) - // The previous name is defined in the current stage when the instruction - // order is swapped. - PrevVal = VRMap[StageNum][LoopVal]; - else if (!LoopInst->isPHI() || LoopInst->getParent() != BB) - // The loop value hasn't yet been scheduled. - PrevVal = LoopVal; - else if (StageNum == PhiStage + 1) - // The loop value is another phi, which has not been scheduled. - PrevVal = getInitPhiReg(*LoopInst, BB); - else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB) - // The loop value is another phi, which has been scheduled. - PrevVal = - getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB), - LoopStage, VRMap, BB); - } - return PrevVal; -} - -/// Rewrite the Phi values in the specified block to use the mappings -/// from the initial operand. Once the Phi is scheduled, we switch -/// to using the loop value instead of the Phi value, so those names -/// do not need to be rewritten. -void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB, - unsigned StageNum, - SMSchedule &Schedule, - ValueMapTy *VRMap, - InstrMapTy &InstrMap) { - for (auto &PHI : BB->phis()) { - unsigned InitVal = 0; - unsigned LoopVal = 0; - getPhiRegs(PHI, BB, InitVal, LoopVal); - unsigned PhiDef = PHI.getOperand(0).getReg(); - - unsigned PhiStage = - (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef))); - unsigned LoopStage = - (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal))); - unsigned NumPhis = Schedule.getStagesForPhi(PhiDef); - if (NumPhis > StageNum) - NumPhis = StageNum; - for (unsigned np = 0; np <= NumPhis; ++np) { - unsigned NewVal = - getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB); - if (!NewVal) - NewVal = InitVal; - rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI, - PhiDef, NewVal); - } - } -} - -/// Rewrite a previously scheduled instruction to use the register value -/// from the new instruction. Make sure the instruction occurs in the -/// basic block, and we don't change the uses in the new instruction. -void SwingSchedulerDAG::rewriteScheduledInstr( - MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap, - unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, - unsigned NewReg, unsigned PrevReg) { - bool InProlog = (CurStageNum < Schedule.getMaxStageCount()); - int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum; - // Rewrite uses that have been scheduled already to use the new - // Phi register. - for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg), - EI = MRI.use_end(); - UI != EI;) { - MachineOperand &UseOp = *UI; - MachineInstr *UseMI = UseOp.getParent(); - ++UI; - if (UseMI->getParent() != BB) - continue; - if (UseMI->isPHI()) { - if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg) - continue; - if (getLoopPhiReg(*UseMI, BB) != OldReg) - continue; - } - InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI); - assert(OrigInstr != InstrMap.end() && "Instruction not scheduled."); - SUnit *OrigMISU = getSUnit(OrigInstr->second); - int StageSched = Schedule.stageScheduled(OrigMISU); - int CycleSched = Schedule.cycleScheduled(OrigMISU); - unsigned ReplaceReg = 0; - // This is the stage for the scheduled instruction. - if (StagePhi == StageSched && Phi->isPHI()) { - int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi)); - if (PrevReg && InProlog) - ReplaceReg = PrevReg; - else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) && - (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI())) - ReplaceReg = PrevReg; - else - ReplaceReg = NewReg; - } - // The scheduled instruction occurs before the scheduled Phi, and the - // Phi is not loop carried. - if (!InProlog && StagePhi + 1 == StageSched && - !Schedule.isLoopCarried(this, *Phi)) - ReplaceReg = NewReg; - if (StagePhi > StageSched && Phi->isPHI()) - ReplaceReg = NewReg; - if (!InProlog && !Phi->isPHI() && StagePhi < StageSched) - ReplaceReg = NewReg; - if (ReplaceReg) { - MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg)); - UseOp.setReg(ReplaceReg); - } - } -} - /// Check if we can change the instruction to use an offset value from the /// previous iteration. If so, return true and set the base and offset values /// so that we can rewrite the load, if necessary. @@ -3147,7 +2101,7 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI, unsigned BasePosLd, OffsetPosLd; if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd)) return false; - unsigned BaseReg = MI->getOperand(BasePosLd).getReg(); + Register BaseReg = MI->getOperand(BasePosLd).getReg(); // Look for the Phi instruction. MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); @@ -3202,7 +2156,7 @@ void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI, unsigned BasePos, OffsetPos; if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) return; - unsigned BaseReg = MI->getOperand(BasePos).getReg(); + Register BaseReg = MI->getOperand(BasePos).getReg(); MachineInstr *LoopDef = findDefInLoop(BaseReg); int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef)); int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef)); @@ -3221,11 +2175,29 @@ void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI, NewMI->getOperand(OffsetPos).setImm(NewOffset); SU->setInstr(NewMI); MISUnitMap[NewMI] = SU; - NewMIs.insert(NewMI); + NewMIs[MI] = NewMI; } } } +/// Return the instruction in the loop that defines the register. +/// If the definition is a Phi, then follow the Phi operand to +/// the instruction in the loop. +MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) { + SmallPtrSet Visited; + MachineInstr *Def = MRI.getVRegDef(Reg); + while (Def->isPHI()) { + if (!Visited.insert(Def).second) + break; + for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) + if (Def->getOperand(i + 1).getMBB() == BB) { + Def = MRI.getVRegDef(Def->getOperand(i).getReg()); + break; + } + } + return Def; +} + /// Return true for an order or output dependence that is loop carried /// potentially. A dependence is loop carried if the destination defines a valu /// that may be used or defined by the source in a subsequent iteration. @@ -3499,10 +2471,10 @@ void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, ++I, ++Pos) { for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); unsigned BasePos, OffsetPos; if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) if (MI->getOperand(BasePos).getReg() == Reg) @@ -3676,7 +2648,7 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) { assert(StageDef != -1 && "Instruction should have been scheduled."); for (auto &SI : SU.Succs) if (SI.isAssignedRegDep()) - if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg())) + if (Register::isPhysicalRegister(SI.getReg())) if (stageScheduled(SI.getSUnit()) != StageDef) return false; } @@ -3810,7 +2782,7 @@ void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque &Instrs) { NewMI->getOperand(OffsetPos).setImm(NewOffset); SU->setInstr(NewMI); MISUnitMap[NewMI] = SU; - NewMIs.insert(NewMI); + NewMIs[MI] = NewMI; } } OverlapReg = 0; @@ -3847,40 +2819,6 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) { ScheduledInstrs[cycle].push_front(*I); } } - // Iterate over the definitions in each instruction, and compute the - // stage difference for each use. Keep the maximum value. - for (auto &I : InstrToCycle) { - int DefStage = stageScheduled(I.first); - MachineInstr *MI = I.first->getInstr(); - for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { - MachineOperand &Op = MI->getOperand(i); - if (!Op.isReg() || !Op.isDef()) - continue; - - unsigned Reg = Op.getReg(); - unsigned MaxDiff = 0; - bool PhiIsSwapped = false; - for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg), - EI = MRI.use_end(); - UI != EI; ++UI) { - MachineOperand &UseOp = *UI; - MachineInstr *UseMI = UseOp.getParent(); - SUnit *SUnitUse = SSD->getSUnit(UseMI); - int UseStage = stageScheduled(SUnitUse); - unsigned Diff = 0; - if (UseStage != -1 && UseStage >= DefStage) - Diff = UseStage - DefStage; - if (MI->isPHI()) { - if (isLoopCarried(SSD, *MI)) - ++Diff; - else - PhiIsSwapped = true; - } - MaxDiff = std::max(Diff, MaxDiff); - } - RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped); - } - } // Erase all the elements in the later stages. Only one iteration should // remain in the scheduled list, and it contains all the instructions. @@ -4085,4 +3023,3 @@ void ResourceManager::clearResources() { return DFAResources->clearResources(); std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0); } - diff --git a/lib/CodeGen/MachinePostDominators.cpp b/lib/CodeGen/MachinePostDominators.cpp index 7f220ed1fd8..f4daff667e8 100644 --- a/lib/CodeGen/MachinePostDominators.cpp +++ b/lib/CodeGen/MachinePostDominators.cpp @@ -17,7 +17,9 @@ using namespace llvm; namespace llvm { template class DominatorTreeBase; // PostDomTreeBase -} + +extern bool VerifyMachineDomInfo; +} // namespace llvm char MachinePostDominatorTree::ID = 0; @@ -25,33 +27,52 @@ char MachinePostDominatorTree::ID = 0; INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree", "MachinePostDominator Tree Construction", true, true) -MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) { +MachinePostDominatorTree::MachinePostDominatorTree() + : MachineFunctionPass(ID), PDT(nullptr) { initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry()); - DT = new PostDomTreeBase(); } -FunctionPass * -MachinePostDominatorTree::createMachinePostDominatorTreePass() { +FunctionPass *MachinePostDominatorTree::createMachinePostDominatorTreePass() { return new MachinePostDominatorTree(); } -bool -MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) { - DT->recalculate(F); +bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) { + PDT = std::make_unique(); + PDT->recalculate(F); return false; } -MachinePostDominatorTree::~MachinePostDominatorTree() { - delete DT; -} - -void -MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { +void MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } -void -MachinePostDominatorTree::print(llvm::raw_ostream &OS, const Module *M) const { - DT->print(OS); +MachineBasicBlock *MachinePostDominatorTree::findNearestCommonDominator( + ArrayRef Blocks) const { + assert(!Blocks.empty()); + + MachineBasicBlock *NCD = Blocks.front(); + for (MachineBasicBlock *BB : Blocks.drop_front()) { + NCD = PDT->findNearestCommonDominator(NCD, BB); + + // Stop when the root is reached. + if (PDT->isVirtualRoot(PDT->getNode(NCD))) + return nullptr; + } + + return NCD; +} + +void MachinePostDominatorTree::verifyAnalysis() const { + if (PDT && VerifyMachineDomInfo) + if (!PDT->verify(PostDomTreeT::VerificationLevel::Basic)) { + errs() << "MachinePostDominatorTree verification failed\n"; + + abort(); + } +} + +void MachinePostDominatorTree::print(llvm::raw_ostream &OS, + const Module *M) const { + PDT->print(OS); } diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index f0fd0405d69..b88d4ea462e 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -144,7 +144,7 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg) { } unsigned MachineRegisterInfo::createIncompleteVirtualRegister(StringRef Name) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs()); + unsigned Reg = Register::index2VirtReg(getNumVirtRegs()); VRegInfo.grow(Reg); RegAllocHints.grow(Reg); insertVRegByName(Name, Reg); @@ -202,7 +202,7 @@ void MachineRegisterInfo::clearVirtRegTypes() { VRegToType.clear(); } void MachineRegisterInfo::clearVirtRegs() { #ifndef NDEBUG for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (!VRegInfo[Reg].second) continue; verifyUseList(Reg); @@ -255,7 +255,7 @@ void MachineRegisterInfo::verifyUseList(unsigned Reg) const { void MachineRegisterInfo::verifyUseLists() const { #ifndef NDEBUG for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) - verifyUseList(TargetRegisterInfo::index2VirtReg(i)); + verifyUseList(Register::index2VirtReg(i)); for (unsigned i = 1, e = getTargetRegisterInfo()->getNumRegs(); i != e; ++i) verifyUseList(i); #endif @@ -386,7 +386,7 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) { MachineOperand &O = *I; ++I; - if (TargetRegisterInfo::isPhysicalRegister(ToReg)) { + if (Register::isPhysicalRegister(ToReg)) { O.substPhysReg(ToReg, *TRI); } else { O.setReg(ToReg); @@ -498,7 +498,7 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const { // Lane masks are only defined for vregs. - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); const TargetRegisterClass &TRC = *getRegClass(Reg); return TRC.getLaneMask(); } @@ -517,7 +517,7 @@ void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) { } bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg) const { - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); + assert(Register::isPhysicalRegister(PhysReg)); const TargetRegisterInfo *TRI = getTargetRegisterInfo(); if (TRI->isConstantPhysReg(PhysReg)) diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index e8b42047b49..258a5f9e048 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -95,7 +95,7 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB, while (I != BB->end() && I->isPHI()) { bool Same = true; for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) { - unsigned SrcReg = I->getOperand(i).getReg(); + Register SrcReg = I->getOperand(i).getReg(); MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB(); if (AVals[SrcBB] != SrcReg) { Same = false; @@ -118,7 +118,7 @@ MachineInstrBuilder InsertNewDef(unsigned Opcode, const TargetRegisterClass *RC, MachineRegisterInfo *MRI, const TargetInstrInfo *TII) { - unsigned NewVR = MRI->createVirtualRegister(RC); + Register NewVR = MRI->createVirtualRegister(RC); return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR); } @@ -292,7 +292,7 @@ public: MachineSSAUpdater *Updater) { // Insert an implicit_def to represent an undef value. MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, - BB, BB->getFirstTerminator(), + BB, BB->getFirstNonPHI(), Updater->VRC, Updater->MRI, Updater->TII); return NewDef->getOperand(0).getReg(); diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index ae1170ad1be..f0721ea3b76 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -82,6 +82,10 @@ cl::opt DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout")); +cl::opt VerifyScheduling( + "verify-misched", cl::Hidden, + cl::desc("Verify machine instrs before and after machine scheduling")); + } // end namespace llvm #ifndef NDEBUG @@ -122,9 +126,6 @@ static cl::opt EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true)); -static cl::opt VerifyScheduling("verify-misched", cl::Hidden, - cl::desc("Verify machine instrs before and after machine scheduling")); - // DAG subtrees must have at least this many nodes. static const unsigned MinSubtreeSize = 8; @@ -198,6 +199,7 @@ char &llvm::MachineSchedulerID = MachineScheduler::ID; INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) @@ -210,7 +212,7 @@ MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) { void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequiredID(MachineDominatorsID); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -234,7 +236,7 @@ PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) { void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequiredID(MachineDominatorsID); + AU.addRequired(); AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); @@ -933,8 +935,8 @@ void ScheduleDAGMILive::collectVRegUses(SUnit &SU) { if (TrackLaneMasks && !MO.isUse()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; // Ignore re-defs. @@ -985,7 +987,7 @@ void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, "ShouldTrackLaneMasks requires ShouldTrackPressure"); } -// Setup the register pressure trackers for the top scheduled top and bottom +// Setup the register pressure trackers for the top scheduled and bottom // scheduled regions. void ScheduleDAGMILive::initRegPressure() { VRegUses.clear(); @@ -1095,7 +1097,7 @@ void ScheduleDAGMILive::updatePressureDiffs( for (const RegisterMaskPair &P : LiveUses) { unsigned Reg = P.RegUnit; /// FIXME: Currently assuming single-use physregs. - if (!TRI->isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) continue; if (ShouldTrackLaneMasks) { @@ -1319,8 +1321,8 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { // Visit each live out vreg def to find def/use pairs that cross iterations. for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) { unsigned Reg = P.RegUnit; - if (!TRI->isVirtualRegister(Reg)) - continue; + if (!Register::isVirtualRegister(Reg)) + continue; const LiveInterval &LI = LIS->getInterval(Reg); const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); if (!DefVNI) @@ -1538,14 +1540,14 @@ namespace llvm { std::unique_ptr createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - return EnableMemOpCluster ? llvm::make_unique(TII, TRI) + return EnableMemOpCluster ? std::make_unique(TII, TRI) : nullptr; } std::unique_ptr createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - return EnableMemOpCluster ? llvm::make_unique(TII, TRI) + return EnableMemOpCluster ? std::make_unique(TII, TRI) : nullptr; } @@ -1657,7 +1659,7 @@ namespace llvm { std::unique_ptr createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - return llvm::make_unique(TII, TRI); + return std::make_unique(TII, TRI); } } // end namespace llvm @@ -1687,13 +1689,13 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { // Check for pure vreg copies. const MachineOperand &SrcOp = Copy->getOperand(1); - unsigned SrcReg = SrcOp.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg()) + Register SrcReg = SrcOp.getReg(); + if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg()) return; const MachineOperand &DstOp = Copy->getOperand(0); - unsigned DstReg = DstOp.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead()) + Register DstReg = DstOp.getReg(); + if (!Register::isVirtualRegister(DstReg) || DstOp.isDead()) return; // Check if either the dest or source is local. If it's live across a back @@ -2914,14 +2916,12 @@ int biasPhysReg(const SUnit *SU, bool isTop) { unsigned UnscheduledOper = isTop ? 0 : 1; // If we have already scheduled the physreg produce/consumer, immediately // schedule the copy. - if (TargetRegisterInfo::isPhysicalRegister( - MI->getOperand(ScheduledOper).getReg())) + if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg())) return 1; // If the physreg is at the boundary, defer it. Otherwise schedule it // immediately to free the dependent. We can hoist the copy later. bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; - if (TargetRegisterInfo::isPhysicalRegister( - MI->getOperand(UnscheduledOper).getReg())) + if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg())) return AtBoundary ? -1 : 1; } @@ -2931,7 +2931,7 @@ int biasPhysReg(const SUnit *SU, bool isTop) { // physical registers. bool DoBias = true; for (const MachineOperand &Op : MI->defs()) { - if (Op.isReg() && !TargetRegisterInfo::isPhysicalRegister(Op.getReg())) { + if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) { DoBias = false; break; } @@ -3259,7 +3259,8 @@ void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) { // Find already scheduled copies with a single physreg dependence and move // them just above the scheduled instruction. for (SDep &Dep : Deps) { - if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg())) + if (Dep.getKind() != SDep::Data || + !Register::isPhysicalRegister(Dep.getReg())) continue; SUnit *DepSU = Dep.getSUnit(); if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) @@ -3298,7 +3299,7 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { /// default scheduler if the target does not set a default. ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) { ScheduleDAGMILive *DAG = - new ScheduleDAGMILive(C, llvm::make_unique(C)); + new ScheduleDAGMILive(C, std::make_unique(C)); // Register DAG post-processors. // // FIXME: extend the mutation API to allow earlier mutations to instantiate @@ -3450,7 +3451,7 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { } ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) { - return new ScheduleDAGMI(C, llvm::make_unique(C), + return new ScheduleDAGMI(C, std::make_unique(C), /*RemoveKillFlags=*/true); } @@ -3561,10 +3562,10 @@ public: } // end anonymous namespace static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { - return new ScheduleDAGMILive(C, llvm::make_unique(true)); + return new ScheduleDAGMILive(C, std::make_unique(true)); } static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { - return new ScheduleDAGMILive(C, llvm::make_unique(false)); + return new ScheduleDAGMILive(C, std::make_unique(false)); } static MachineSchedRegistry ILPMaxRegistry( @@ -3658,7 +3659,7 @@ static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { assert((TopDown || !ForceTopDown) && "-misched-topdown incompatible with -misched-bottomup"); return new ScheduleDAGMILive( - C, llvm::make_unique(Alternate, TopDown)); + C, std::make_unique(Alternate, TopDown)); } static MachineSchedRegistry ShufflerRegistry( diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 41db2c88ce5..27a2e7023f2 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -36,8 +36,9 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" @@ -114,15 +115,12 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addPreserved(); - AU.addPreserved(); AU.addPreserved(); if (UseBlockFreqInfo) AU.addRequired(); @@ -195,11 +193,10 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI, if (!MI.isCopy()) return false; - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || - !TargetRegisterInfo::isVirtualRegister(DstReg) || - !MRI->hasOneNonDBGUse(SrcReg)) + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + if (!Register::isVirtualRegister(SrcReg) || + !Register::isVirtualRegister(DstReg) || !MRI->hasOneNonDBGUse(SrcReg)) return false; const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); @@ -233,8 +230,7 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *DefMBB, bool &BreakPHIEdge, bool &LocalUse) const { - assert(TargetRegisterInfo::isVirtualRegister(Reg) && - "Only makes sense for vregs"); + assert(Register::isVirtualRegister(Reg) && "Only makes sense for vregs"); // Ignore debug uses because debug info doesn't affect the code. if (MRI->use_nodbg_empty(Reg)) @@ -416,13 +412,13 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI, const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; // We don't move live definitions of physical registers, // so sinking their uses won't enable any opportunities. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) continue; // If this instruction is the only user of a virtual register, @@ -615,10 +611,10 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; // Ignore non-register operands. - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, @@ -817,8 +813,9 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (Reg == 0 || !Register::isPhysicalRegister(Reg)) + continue; if (SuccToSinkTo->isLiveIn(Reg)) return false; } @@ -958,8 +955,9 @@ private: /// Track which register units have been modified and used. LiveRegUnits ModifiedRegUnits, UsedRegUnits; - /// Track DBG_VALUEs of (unmodified) register units. - DenseMap> SeenDbgInstrs; + /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an + /// entry in this map for each unit it touches. + DenseMap> SeenDbgInstrs; /// Sink Copy instructions unused in the same block close to their uses in /// successors. @@ -1030,7 +1028,7 @@ static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB, const TargetRegisterInfo *TRI) { for (auto U : UsedOpsInCopy) { MachineOperand &MO = MI->getOperand(U); - unsigned SrcReg = MO.getReg(); + Register SrcReg = MO.getReg(); if (!UsedRegUnits.available(SrcReg)) { MachineBasicBlock::iterator NI = std::next(MI->getIterator()); for (MachineInstr &UI : make_range(NI, CurBB.end())) { @@ -1053,7 +1051,7 @@ static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB, for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S) SuccBB->removeLiveIn(*S); for (auto U : UsedOpsInCopy) { - unsigned Reg = MI->getOperand(U).getReg(); + Register Reg = MI->getOperand(U).getReg(); if (!SuccBB->isLiveIn(Reg)) SuccBB->addLiveIn(Reg); } @@ -1069,7 +1067,7 @@ static bool hasRegisterDependency(MachineInstr *MI, MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isDef()) { @@ -1094,6 +1092,14 @@ static bool hasRegisterDependency(MachineInstr *MI, return HasRegDependency; } +static SmallSet getRegUnits(unsigned Reg, + const TargetRegisterInfo *TRI) { + SmallSet RegUnits; + for (auto RI = MCRegUnitIterator(Reg, TRI); RI.isValid(); ++RI) + RegUnits.insert(*RI); + return RegUnits; +} + bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, MachineFunction &MF, const TargetRegisterInfo *TRI, @@ -1130,15 +1136,17 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, // for DBG_VALUEs later, record them when they're encountered. if (MI->isDebugValue()) { auto &MO = MI->getOperand(0); - if (MO.isReg() && TRI->isPhysicalRegister(MO.getReg())) { + if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) { // Bail if we can already tell the sink would be rejected, rather // than needlessly accumulating lots of DBG_VALUEs. if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy, ModifiedRegUnits, UsedRegUnits)) continue; - // Record debug use of this register. - SeenDbgInstrs[MO.getReg()].push_back(MI); + // Record debug use of each reg unit. + SmallSet Units = getRegUnits(MO.getReg(), TRI); + for (unsigned Reg : Units) + SeenDbgInstrs[Reg].push_back(MI); } continue; } @@ -1177,15 +1185,22 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) && "Unexpected predecessor"); - // Collect DBG_VALUEs that must sink with this copy. + // Collect DBG_VALUEs that must sink with this copy. We've previously + // recorded which reg units that DBG_VALUEs read, if this instruction + // writes any of those units then the corresponding DBG_VALUEs must sink. + SetVector DbgValsToSinkSet; SmallVector DbgValsToSink; for (auto &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned reg = MO.getReg(); - for (auto *MI : SeenDbgInstrs.lookup(reg)) - DbgValsToSink.push_back(MI); + + SmallSet Units = getRegUnits(MO.getReg(), TRI); + for (unsigned Reg : Units) + for (auto *MI : SeenDbgInstrs.lookup(Reg)) + DbgValsToSinkSet.insert(MI); } + DbgValsToSink.insert(DbgValsToSink.begin(), DbgValsToSinkSet.begin(), + DbgValsToSinkSet.end()); // Clear the kill flag if SrcReg is killed between MI and the end of the // block. diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index f9505df4e7f..66a3bc2f8cc 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -634,7 +634,7 @@ struct DataDep { /// Create a DataDep from an SSA form virtual register. DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp) : UseOp(UseOp) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg)); + assert(Register::isVirtualRegister(VirtReg)); MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg); assert(!DefI.atEnd() && "Register has no defs"); DefMI = DefI->getParent(); @@ -660,10 +660,10 @@ static bool getDataDeps(const MachineInstr &UseMI, const MachineOperand &MO = *I; if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { HasPhysRegs = true; continue; } @@ -687,7 +687,7 @@ static void getPHIDeps(const MachineInstr &UseMI, assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI"); for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) { if (UseMI.getOperand(i + 1).getMBB() == Pred) { - unsigned Reg = UseMI.getOperand(i).getReg(); + Register Reg = UseMI.getOperand(i).getReg(); Deps.push_back(DataDep(MRI, Reg, i)); return; } @@ -708,8 +708,8 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI, const MachineOperand &MO = *MI; if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; // Track live defs and kills for updating RegUnits. if (MO.isDef()) { @@ -765,7 +765,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { assert(TBI.HasValidInstrHeights && "Missing height info"); unsigned MaxLen = 0; for (const LiveInReg &LIR : TBI.LiveIns) { - if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg)) + if (!Register::isVirtualRegister(LIR.Reg)) continue; const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); // Ignore dependencies outside the current trace. @@ -902,8 +902,8 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, const MachineOperand &MO = *MOI; if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; if (MO.readsReg()) ReadOps.push_back(MI.getOperandNo(MOI)); @@ -930,7 +930,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, // Now we know the height of MI. Update any regunits read. for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) { - unsigned Reg = MI.getOperand(ReadOps[i]).getReg(); + Register Reg = MI.getOperand(ReadOps[i]).getReg(); for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { LiveRegUnit &LRU = RegUnits[*Units]; // Set the height to the highest reader of the unit. @@ -979,7 +979,7 @@ addLiveIns(const MachineInstr *DefMI, unsigned DefOp, ArrayRef Trace) { assert(!Trace.empty() && "Trace should contain at least one block"); unsigned Reg = DefMI->getOperand(DefOp).getReg(); - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); const MachineBasicBlock *DefMBB = DefMI->getParent(); // Reg is live-in to all blocks in Trace that follow DefMBB. @@ -1026,7 +1026,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) { if (MBB) { TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; for (LiveInReg &LI : TBI.LiveIns) { - if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) { + if (Register::isVirtualRegister(LI.Reg)) { // For virtual registers, the def latency is included. unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)]; if (Height < LI.Height) diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 0ad792ac62c..969743edca5 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -22,7 +22,6 @@ // the verifier errors. //===----------------------------------------------------------------------===// -#include "LiveRangeCalc.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -37,6 +36,7 @@ #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -122,7 +122,7 @@ namespace { // Add Reg and any sub-registers to RV void addRegWithSubRegs(RegVector &RV, unsigned Reg) { RV.push_back(Reg); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) RV.push_back(*SubRegs); } @@ -159,7 +159,7 @@ namespace { // Add register to vregsPassed if it belongs there. Return true if // anything changed. bool addPassed(unsigned Reg) { - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return false; if (regsKilled.count(Reg) || regsLiveOut.count(Reg)) return false; @@ -178,7 +178,7 @@ namespace { // Add register to vregsRequired if it belongs there. Return true if // anything changed. bool addRequired(unsigned Reg) { - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return false; if (regsLiveOut.count(Reg)) return false; @@ -552,7 +552,7 @@ void MachineVerifier::report_context_vreg(unsigned VReg) const { } void MachineVerifier::report_context_vreg_regunit(unsigned VRegOrUnit) const { - if (TargetRegisterInfo::isVirtualRegister(VRegOrUnit)) { + if (Register::isVirtualRegister(VRegOrUnit)) { report_context_vreg(VRegOrUnit); } else { errs() << "- regunit: " << printRegUnit(VRegOrUnit, TRI) << '\n'; @@ -797,7 +797,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { regsLive.clear(); if (MRI->tracksLiveness()) { for (const auto &LI : MBB->liveins()) { - if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) { + if (!Register::isPhysicalRegister(LI.PhysReg)) { report("MBB live-in list contains non-physical register", MBB); continue; } @@ -957,7 +957,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { // Generic opcodes must not have physical register operands. for (unsigned I = 0; I < MI->getNumOperands(); ++I) { const MachineOperand *MO = &MI->getOperand(I); - if (MO->isReg() && TargetRegisterInfo::isPhysicalRegister(MO->getReg())) + if (MO->isReg() && Register::isPhysicalRegister(MO->getReg())) report("Generic instruction cannot have physical register", MO, I); } @@ -1368,7 +1368,108 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } } + switch (IntrID) { + case Intrinsic::memcpy: + if (MI->getNumOperands() != 5) + report("Expected memcpy intrinsic to have 5 operands", MI); + break; + case Intrinsic::memmove: + if (MI->getNumOperands() != 5) + report("Expected memmove intrinsic to have 5 operands", MI); + break; + case Intrinsic::memset: + if (MI->getNumOperands() != 5) + report("Expected memset intrinsic to have 5 operands", MI); + break; + } + break; + } + case TargetOpcode::G_SEXT_INREG: { + if (!MI->getOperand(2).isImm()) { + report("G_SEXT_INREG expects an immediate operand #2", MI); + break; + } + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + verifyVectorElementMatch(DstTy, SrcTy, MI); + + int64_t Imm = MI->getOperand(2).getImm(); + if (Imm <= 0) + report("G_SEXT_INREG size must be >= 1", MI); + if (Imm >= SrcTy.getScalarSizeInBits()) + report("G_SEXT_INREG size must be less than source bit width", MI); + break; + } + case TargetOpcode::G_SHUFFLE_VECTOR: { + const MachineOperand &MaskOp = MI->getOperand(3); + if (!MaskOp.isShuffleMask()) { + report("Incorrect mask operand type for G_SHUFFLE_VECTOR", MI); + break; + } + + const Constant *Mask = MaskOp.getShuffleMask(); + auto *MaskVT = dyn_cast(Mask->getType()); + if (!MaskVT || !MaskVT->getElementType()->isIntegerTy(32)) { + report("Invalid shufflemask constant type", MI); + break; + } + + if (!Mask->getAggregateElement(0u)) { + report("Invalid shufflemask constant type", MI); + break; + } + + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT Src0Ty = MRI->getType(MI->getOperand(1).getReg()); + LLT Src1Ty = MRI->getType(MI->getOperand(2).getReg()); + + if (Src0Ty != Src1Ty) + report("Source operands must be the same type", MI); + + if (Src0Ty.getScalarType() != DstTy.getScalarType()) + report("G_SHUFFLE_VECTOR cannot change element type", MI); + + // Don't check that all operands are vector because scalars are used in + // place of 1 element vectors. + int SrcNumElts = Src0Ty.isVector() ? Src0Ty.getNumElements() : 1; + int DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1; + + SmallVector MaskIdxes; + ShuffleVectorInst::getShuffleMask(Mask, MaskIdxes); + + if (static_cast(MaskIdxes.size()) != DstNumElts) + report("Wrong result type for shufflemask", MI); + + for (int Idx : MaskIdxes) { + if (Idx < 0) + continue; + + if (Idx >= 2 * SrcNumElts) + report("Out of bounds shuffle index", MI); + } + + break; + } + case TargetOpcode::G_DYN_STACKALLOC: { + const MachineOperand &DstOp = MI->getOperand(0); + const MachineOperand &AllocOp = MI->getOperand(1); + const MachineOperand &AlignOp = MI->getOperand(2); + + if (!DstOp.isReg() || !MRI->getType(DstOp.getReg()).isPointer()) { + report("dst operand 0 must be a pointer type", MI); + break; + } + + if (!AllocOp.isReg() || !MRI->getType(AllocOp.getReg()).isScalar()) { + report("src operand 1 must be a scalar reg type", MI); + break; + } + + if (!AlignOp.isImm()) { + report("src operand 2 must be an immediate type", MI); + break; + } break; } default: @@ -1525,11 +1626,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { report("Operand should be tied", MO, MONum); else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum)) report("Tied def doesn't match MCInstrDesc", MO, MONum); - else if (TargetRegisterInfo::isPhysicalRegister(MO->getReg())) { + else if (Register::isPhysicalRegister(MO->getReg())) { const MachineOperand &MOTied = MI->getOperand(TiedTo); if (!MOTied.isReg()) report("Tied counterpart must be a register", &MOTied, TiedTo); - else if (TargetRegisterInfo::isPhysicalRegister(MOTied.getReg()) && + else if (Register::isPhysicalRegister(MOTied.getReg()) && MO->getReg() != MOTied.getReg()) report("Tied physical registers must match.", &MOTied, TiedTo); } @@ -1543,7 +1644,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { switch (MO->getType()) { case MachineOperand::MO_Register: { - const unsigned Reg = MO->getReg(); + const Register Reg = MO->getReg(); if (!Reg) return; if (MRI->tracksLiveness() && !MI->isDebugValue()) @@ -1581,7 +1682,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // Check register classes. unsigned SubIdx = MO->getSubReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { if (SubIdx) { report("Illegal subregister index for physical register", MO, MONum); return; @@ -1817,7 +1918,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO, if (MO->isDead()) { LiveQueryResult LRQ = LR.Query(DefIdx); if (!LRQ.isDeadDef()) { - assert(TargetRegisterInfo::isVirtualRegister(VRegOrUnit) && + assert(Register::isVirtualRegister(VRegOrUnit) && "Expecting a virtual register."); // A dead subreg def only tells us that the specific subreg is dead. There // could be other non-dead defs of other subregs, or we could have other @@ -1845,8 +1946,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { addRegWithSubRegs(regsKilled, Reg); // Check that LiveVars knows this kill. - if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) && - MO->isKill()) { + if (LiveVars && Register::isVirtualRegister(Reg) && MO->isKill()) { LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); if (!is_contained(VI.Kills, MI)) report("Kill missing from LiveVariables", MO, MONum); @@ -1856,7 +1956,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (LiveInts && !LiveInts->isNotInMIMap(*MI)) { SlotIndex UseIdx = LiveInts->getInstructionIndex(*MI); // Check the cached regunit intervals. - if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) { + if (Register::isPhysicalRegister(Reg) && !isReserved(Reg)) { for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { if (MRI->isReservedRegUnit(*Units)) continue; @@ -1865,7 +1965,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { } } - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { if (LiveInts->hasInterval(Reg)) { // This is a virtual register interval. const LiveInterval &LI = LiveInts->getInterval(Reg); @@ -1900,7 +2000,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { // Use of a dead register. if (!regsLive.count(Reg)) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { // Reserved registers may be used even when 'dead'. bool Bad = !isReserved(Reg); // We are fine if just any subregister has a defined value. @@ -1922,7 +2022,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (!MOP.isReg() || !MOP.isImplicit()) continue; - if (!TargetRegisterInfo::isPhysicalRegister(MOP.getReg())) + if (!Register::isPhysicalRegister(MOP.getReg())) continue; for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid(); @@ -1960,7 +2060,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { addRegWithSubRegs(regsDefined, Reg); // Verify SSA form. - if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) && + if (MRI->isSSA() && Register::isVirtualRegister(Reg) && std::next(MRI->def_begin(Reg)) != MRI->def_end()) report("Multiple virtual register defs in SSA form", MO, MONum); @@ -1969,7 +2069,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { SlotIndex DefIdx = LiveInts->getInstructionIndex(*MI); DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber()); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { if (LiveInts->hasInterval(Reg)) { const LiveInterval &LI = LiveInts->getInterval(Reg); checkLivenessAtDef(MO, MONum, DefIdx, LI, Reg); @@ -2007,7 +2107,7 @@ void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) { while (!regMasks.empty()) { const uint32_t *Mask = regMasks.pop_back_val(); for (RegSet::iterator I = regsLive.begin(), E = regsLive.end(); I != E; ++I) - if (TargetRegisterInfo::isPhysicalRegister(*I) && + if (Register::isPhysicalRegister(*I) && MachineOperand::clobbersPhysReg(Mask, *I)) regsDead.push_back(*I); } @@ -2119,8 +2219,8 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) { if (MODef.isTied() || MODef.isImplicit() || MODef.isInternalRead() || MODef.isEarlyClobber() || MODef.isDebug()) report("Unexpected flag on PHI operand", &MODef, 0); - unsigned DefReg = MODef.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DefReg)) + Register DefReg = MODef.getReg(); + if (!Register::isVirtualRegister(DefReg)) report("Expected first PHI operand to be a virtual register", &MODef, 0); for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) { @@ -2212,7 +2312,7 @@ void MachineVerifier::visitMachineFunctionAfter() { void MachineVerifier::verifyLiveVariables() { assert(LiveVars && "Don't call verifyLiveVariables without LiveVars"); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); for (const auto &MBB : *MF) { BBInfo &MInfo = MBBInfoMap[&MBB]; @@ -2238,7 +2338,7 @@ void MachineVerifier::verifyLiveVariables() { void MachineVerifier::verifyLiveIntervals() { assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts"); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); // Spilling and splitting may leave unused registers around. Skip them. if (MRI->reg_nodbg_empty(Reg)) @@ -2315,11 +2415,11 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) { if (!MOI->isReg() || !MOI->isDef()) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { if (MOI->getReg() != Reg) continue; } else { - if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || + if (!Register::isPhysicalRegister(MOI->getReg()) || !TRI->hasRegUnit(MOI->getReg(), Reg)) continue; } @@ -2402,7 +2502,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, return; // RegUnit intervals are allowed dead phis. - if (!TargetRegisterInfo::isVirtualRegister(Reg) && VNI->isPHIDef() && + if (!Register::isVirtualRegister(Reg) && VNI->isPHIDef() && S.start == VNI->def && S.end == VNI->def.getDeadSlot()) return; @@ -2446,7 +2546,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // The following checks only apply to virtual registers. Physreg liveness // is too weird to check. - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { // A live segment can end with either a redefinition, a kill flag on a // use, or a dead flag on a def. bool hasRead = false; @@ -2519,8 +2619,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, while (true) { assert(LiveInts->isLiveInToMBB(LR, &*MFI)); // We don't know how to track physregs into a landing pad. - if (!TargetRegisterInfo::isVirtualRegister(Reg) && - MFI->isEHPad()) { + if (!Register::isVirtualRegister(Reg) && MFI->isEHPad()) { if (&*MFI == EndMBB) break; ++MFI; @@ -2580,7 +2679,7 @@ void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg, void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { unsigned Reg = LI.reg; - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); verifyLiveRange(LI, Reg); LaneBitmask Mask; diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp index 2db1e86905a..d21eae222af 100644 --- a/lib/CodeGen/MacroFusion.cpp +++ b/lib/CodeGen/MacroFusion.cpp @@ -176,7 +176,7 @@ std::unique_ptr llvm::createMacroFusionDAGMutation( ShouldSchedulePredTy shouldScheduleAdjacent) { if(EnableMacroFusion) - return llvm::make_unique(shouldScheduleAdjacent, true); + return std::make_unique(shouldScheduleAdjacent, true); return nullptr; } @@ -184,6 +184,6 @@ std::unique_ptr llvm::createBranchMacroFusionDAGMutation( ShouldSchedulePredTy shouldScheduleAdjacent) { if(EnableMacroFusion) - return llvm::make_unique(shouldScheduleAdjacent, false); + return std::make_unique(shouldScheduleAdjacent, false); return nullptr; } diff --git a/lib/CodeGen/ModuloSchedule.cpp b/lib/CodeGen/ModuloSchedule.cpp new file mode 100644 index 00000000000..7ce3c586180 --- /dev/null +++ b/lib/CodeGen/ModuloSchedule.cpp @@ -0,0 +1,2022 @@ +//===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ModuloSchedule.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopUtils.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "pipeliner" +using namespace llvm; + +void ModuloSchedule::print(raw_ostream &OS) { + for (MachineInstr *MI : ScheduledInstrs) + OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI; +} + +//===----------------------------------------------------------------------===// +// ModuloScheduleExpander implementation +//===----------------------------------------------------------------------===// + +/// Return the register values for the operands of a Phi instruction. +/// This function assume the instruction is a Phi. +static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, + unsigned &InitVal, unsigned &LoopVal) { + assert(Phi.isPHI() && "Expecting a Phi."); + + InitVal = 0; + LoopVal = 0; + for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) + if (Phi.getOperand(i + 1).getMBB() != Loop) + InitVal = Phi.getOperand(i).getReg(); + else + LoopVal = Phi.getOperand(i).getReg(); + + assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure."); +} + +/// Return the Phi register value that comes from the incoming block. +static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { + for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) + if (Phi.getOperand(i + 1).getMBB() != LoopBB) + return Phi.getOperand(i).getReg(); + return 0; +} + +/// Return the Phi register value that comes the loop block. +static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { + for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) + if (Phi.getOperand(i + 1).getMBB() == LoopBB) + return Phi.getOperand(i).getReg(); + return 0; +} + +void ModuloScheduleExpander::expand() { + BB = Schedule.getLoop()->getTopBlock(); + Preheader = *BB->pred_begin(); + if (Preheader == BB) + Preheader = *std::next(BB->pred_begin()); + + // Iterate over the definitions in each instruction, and compute the + // stage difference for each use. Keep the maximum value. + for (MachineInstr *MI : Schedule.getInstructions()) { + int DefStage = Schedule.getStage(MI); + for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { + MachineOperand &Op = MI->getOperand(i); + if (!Op.isReg() || !Op.isDef()) + continue; + + Register Reg = Op.getReg(); + unsigned MaxDiff = 0; + bool PhiIsSwapped = false; + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg), + EI = MRI.use_end(); + UI != EI; ++UI) { + MachineOperand &UseOp = *UI; + MachineInstr *UseMI = UseOp.getParent(); + int UseStage = Schedule.getStage(UseMI); + unsigned Diff = 0; + if (UseStage != -1 && UseStage >= DefStage) + Diff = UseStage - DefStage; + if (MI->isPHI()) { + if (isLoopCarried(*MI)) + ++Diff; + else + PhiIsSwapped = true; + } + MaxDiff = std::max(Diff, MaxDiff); + } + RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped); + } + } + + generatePipelinedLoop(); +} + +void ModuloScheduleExpander::generatePipelinedLoop() { + LoopInfo = TII->analyzeLoopForPipelining(BB); + assert(LoopInfo && "Must be able to analyze loop!"); + + // Create a new basic block for the kernel and add it to the CFG. + MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); + + unsigned MaxStageCount = Schedule.getNumStages() - 1; + + // Remember the registers that are used in different stages. The index is + // the iteration, or stage, that the instruction is scheduled in. This is + // a map between register names in the original block and the names created + // in each stage of the pipelined loop. + ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2]; + InstrMapTy InstrMap; + + SmallVector PrologBBs; + + // Generate the prolog instructions that set up the pipeline. + generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs); + MF.insert(BB->getIterator(), KernelBB); + + // Rearrange the instructions to generate the new, pipelined loop, + // and update register names as needed. + for (MachineInstr *CI : Schedule.getInstructions()) { + if (CI->isPHI()) + continue; + unsigned StageNum = Schedule.getStage(CI); + MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum); + updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap); + KernelBB->push_back(NewMI); + InstrMap[NewMI] = CI; + } + + // Copy any terminator instructions to the new kernel, and update + // names as needed. + for (MachineBasicBlock::iterator I = BB->getFirstTerminator(), + E = BB->instr_end(); + I != E; ++I) { + MachineInstr *NewMI = MF.CloneMachineInstr(&*I); + updateInstruction(NewMI, false, MaxStageCount, 0, VRMap); + KernelBB->push_back(NewMI); + InstrMap[NewMI] = &*I; + } + + NewKernel = KernelBB; + KernelBB->transferSuccessors(BB); + KernelBB->replaceSuccessor(BB, KernelBB); + + generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, + InstrMap, MaxStageCount, MaxStageCount, false); + generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap, + MaxStageCount, MaxStageCount, false); + + LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump();); + + SmallVector EpilogBBs; + // Generate the epilog instructions to complete the pipeline. + generateEpilog(MaxStageCount, KernelBB, VRMap, EpilogBBs, PrologBBs); + + // We need this step because the register allocation doesn't handle some + // situations well, so we insert copies to help out. + splitLifetimes(KernelBB, EpilogBBs); + + // Remove dead instructions due to loop induction variables. + removeDeadInstructions(KernelBB, EpilogBBs); + + // Add branches between prolog and epilog blocks. + addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap); + + delete[] VRMap; +} + +void ModuloScheduleExpander::cleanup() { + // Remove the original loop since it's no longer referenced. + for (auto &I : *BB) + LIS.RemoveMachineInstrFromMaps(I); + BB->clear(); + BB->eraseFromParent(); +} + +/// Generate the pipeline prolog code. +void ModuloScheduleExpander::generateProlog(unsigned LastStage, + MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, + MBBVectorTy &PrologBBs) { + MachineBasicBlock *PredBB = Preheader; + InstrMapTy InstrMap; + + // Generate a basic block for each stage, not including the last stage, + // which will be generated in the kernel. Each basic block may contain + // instructions from multiple stages/iterations. + for (unsigned i = 0; i < LastStage; ++i) { + // Create and insert the prolog basic block prior to the original loop + // basic block. The original loop is removed later. + MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); + PrologBBs.push_back(NewBB); + MF.insert(BB->getIterator(), NewBB); + NewBB->transferSuccessors(PredBB); + PredBB->addSuccessor(NewBB); + PredBB = NewBB; + + // Generate instructions for each appropriate stage. Process instructions + // in original program order. + for (int StageNum = i; StageNum >= 0; --StageNum) { + for (MachineBasicBlock::iterator BBI = BB->instr_begin(), + BBE = BB->getFirstTerminator(); + BBI != BBE; ++BBI) { + if (Schedule.getStage(&*BBI) == StageNum) { + if (BBI->isPHI()) + continue; + MachineInstr *NewMI = + cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum); + updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap); + NewBB->push_back(NewMI); + InstrMap[NewMI] = &*BBI; + } + } + } + rewritePhiValues(NewBB, i, VRMap, InstrMap); + LLVM_DEBUG({ + dbgs() << "prolog:\n"; + NewBB->dump(); + }); + } + + PredBB->replaceSuccessor(BB, KernelBB); + + // Check if we need to remove the branch from the preheader to the original + // loop, and replace it with a branch to the new loop. + unsigned numBranches = TII->removeBranch(*Preheader); + if (numBranches) { + SmallVector Cond; + TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc()); + } +} + +/// Generate the pipeline epilog code. The epilog code finishes the iterations +/// that were started in either the prolog or the kernel. We create a basic +/// block for each stage that needs to complete. +void ModuloScheduleExpander::generateEpilog(unsigned LastStage, + MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, + MBBVectorTy &EpilogBBs, + MBBVectorTy &PrologBBs) { + // We need to change the branch from the kernel to the first epilog block, so + // this call to analyze branch uses the kernel rather than the original BB. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector Cond; + bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond); + assert(!checkBranch && "generateEpilog must be able to analyze the branch"); + if (checkBranch) + return; + + MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin(); + if (*LoopExitI == KernelBB) + ++LoopExitI; + assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor"); + MachineBasicBlock *LoopExitBB = *LoopExitI; + + MachineBasicBlock *PredBB = KernelBB; + MachineBasicBlock *EpilogStart = LoopExitBB; + InstrMapTy InstrMap; + + // Generate a basic block for each stage, not including the last stage, + // which was generated for the kernel. Each basic block may contain + // instructions from multiple stages/iterations. + int EpilogStage = LastStage + 1; + for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) { + MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(); + EpilogBBs.push_back(NewBB); + MF.insert(BB->getIterator(), NewBB); + + PredBB->replaceSuccessor(LoopExitBB, NewBB); + NewBB->addSuccessor(LoopExitBB); + + if (EpilogStart == LoopExitBB) + EpilogStart = NewBB; + + // Add instructions to the epilog depending on the current block. + // Process instructions in original program order. + for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) { + for (auto &BBI : *BB) { + if (BBI.isPHI()) + continue; + MachineInstr *In = &BBI; + if ((unsigned)Schedule.getStage(In) == StageNum) { + // Instructions with memoperands in the epilog are updated with + // conservative values. + MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0); + updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap); + NewBB->push_back(NewMI); + InstrMap[NewMI] = In; + } + } + } + generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, + InstrMap, LastStage, EpilogStage, i == 1); + generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap, + LastStage, EpilogStage, i == 1); + PredBB = NewBB; + + LLVM_DEBUG({ + dbgs() << "epilog:\n"; + NewBB->dump(); + }); + } + + // Fix any Phi nodes in the loop exit block. + LoopExitBB->replacePhiUsesWith(BB, PredBB); + + // Create a branch to the new epilog from the kernel. + // Remove the original branch and add a new branch to the epilog. + TII->removeBranch(*KernelBB); + TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc()); + // Add a branch to the loop exit. + if (EpilogBBs.size() > 0) { + MachineBasicBlock *LastEpilogBB = EpilogBBs.back(); + SmallVector Cond1; + TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc()); + } +} + +/// Replace all uses of FromReg that appear outside the specified +/// basic block with ToReg. +static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, + MachineBasicBlock *MBB, + MachineRegisterInfo &MRI, + LiveIntervals &LIS) { + for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg), + E = MRI.use_end(); + I != E;) { + MachineOperand &O = *I; + ++I; + if (O.getParent()->getParent() != MBB) + O.setReg(ToReg); + } + if (!LIS.hasInterval(ToReg)) + LIS.createEmptyInterval(ToReg); +} + +/// Return true if the register has a use that occurs outside the +/// specified loop. +static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, + MachineRegisterInfo &MRI) { + for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), + E = MRI.use_end(); + I != E; ++I) + if (I->getParent()->getParent() != BB) + return true; + return false; +} + +/// Generate Phis for the specific block in the generated pipelined code. +/// This function looks at the Phis from the original code to guide the +/// creation of new Phis. +void ModuloScheduleExpander::generateExistingPhis( + MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, + MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap, + unsigned LastStageNum, unsigned CurStageNum, bool IsLast) { + // Compute the stage number for the initial value of the Phi, which + // comes from the prolog. The prolog to use depends on to which kernel/ + // epilog that we're adding the Phi. + unsigned PrologStage = 0; + unsigned PrevStage = 0; + bool InKernel = (LastStageNum == CurStageNum); + if (InKernel) { + PrologStage = LastStageNum - 1; + PrevStage = CurStageNum; + } else { + PrologStage = LastStageNum - (CurStageNum - LastStageNum); + PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1; + } + + for (MachineBasicBlock::iterator BBI = BB->instr_begin(), + BBE = BB->getFirstNonPHI(); + BBI != BBE; ++BBI) { + Register Def = BBI->getOperand(0).getReg(); + + unsigned InitVal = 0; + unsigned LoopVal = 0; + getPhiRegs(*BBI, BB, InitVal, LoopVal); + + unsigned PhiOp1 = 0; + // The Phi value from the loop body typically is defined in the loop, but + // not always. So, we need to check if the value is defined in the loop. + unsigned PhiOp2 = LoopVal; + if (VRMap[LastStageNum].count(LoopVal)) + PhiOp2 = VRMap[LastStageNum][LoopVal]; + + int StageScheduled = Schedule.getStage(&*BBI); + int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal)); + unsigned NumStages = getStagesForReg(Def, CurStageNum); + if (NumStages == 0) { + // We don't need to generate a Phi anymore, but we need to rename any uses + // of the Phi value. + unsigned NewReg = VRMap[PrevStage][LoopVal]; + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def, + InitVal, NewReg); + if (VRMap[CurStageNum].count(LoopVal)) + VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal]; + } + // Adjust the number of Phis needed depending on the number of prologs left, + // and the distance from where the Phi is first scheduled. The number of + // Phis cannot exceed the number of prolog stages. Each stage can + // potentially define two values. + unsigned MaxPhis = PrologStage + 2; + if (!InKernel && (int)PrologStage <= LoopValStage) + MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1); + unsigned NumPhis = std::min(NumStages, MaxPhis); + + unsigned NewReg = 0; + unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled; + // In the epilog, we may need to look back one stage to get the correct + // Phi name because the epilog and prolog blocks execute the same stage. + // The correct name is from the previous block only when the Phi has + // been completely scheduled prior to the epilog, and Phi value is not + // needed in multiple stages. + int StageDiff = 0; + if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 && + NumPhis == 1) + StageDiff = 1; + // Adjust the computations below when the phi and the loop definition + // are scheduled in different stages. + if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage) + StageDiff = StageScheduled - LoopValStage; + for (unsigned np = 0; np < NumPhis; ++np) { + // If the Phi hasn't been scheduled, then use the initial Phi operand + // value. Otherwise, use the scheduled version of the instruction. This + // is a little complicated when a Phi references another Phi. + if (np > PrologStage || StageScheduled >= (int)LastStageNum) + PhiOp1 = InitVal; + // Check if the Phi has already been scheduled in a prolog stage. + else if (PrologStage >= AccessStage + StageDiff + np && + VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0) + PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal]; + // Check if the Phi has already been scheduled, but the loop instruction + // is either another Phi, or doesn't occur in the loop. + else if (PrologStage >= AccessStage + StageDiff + np) { + // If the Phi references another Phi, we need to examine the other + // Phi to get the correct value. + PhiOp1 = LoopVal; + MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1); + int Indirects = 1; + while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) { + int PhiStage = Schedule.getStage(InstOp1); + if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects) + PhiOp1 = getInitPhiReg(*InstOp1, BB); + else + PhiOp1 = getLoopPhiReg(*InstOp1, BB); + InstOp1 = MRI.getVRegDef(PhiOp1); + int PhiOpStage = Schedule.getStage(InstOp1); + int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0); + if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np && + VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) { + PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1]; + break; + } + ++Indirects; + } + } else + PhiOp1 = InitVal; + // If this references a generated Phi in the kernel, get the Phi operand + // from the incoming block. + if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) + if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) + PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); + + MachineInstr *PhiInst = MRI.getVRegDef(LoopVal); + bool LoopDefIsPhi = PhiInst && PhiInst->isPHI(); + // In the epilog, a map lookup is needed to get the value from the kernel, + // or previous epilog block. How is does this depends on if the + // instruction is scheduled in the previous block. + if (!InKernel) { + int StageDiffAdj = 0; + if (LoopValStage != -1 && StageScheduled > LoopValStage) + StageDiffAdj = StageScheduled - LoopValStage; + // Use the loop value defined in the kernel, unless the kernel + // contains the last definition of the Phi. + if (np == 0 && PrevStage == LastStageNum && + (StageScheduled != 0 || LoopValStage != 0) && + VRMap[PrevStage - StageDiffAdj].count(LoopVal)) + PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal]; + // Use the value defined by the Phi. We add one because we switch + // from looking at the loop value to the Phi definition. + else if (np > 0 && PrevStage == LastStageNum && + VRMap[PrevStage - np + 1].count(Def)) + PhiOp2 = VRMap[PrevStage - np + 1][Def]; + // Use the loop value defined in the kernel. + else if (static_cast(LoopValStage) > PrologStage + 1 && + VRMap[PrevStage - StageDiffAdj - np].count(LoopVal)) + PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal]; + // Use the value defined by the Phi, unless we're generating the first + // epilog and the Phi refers to a Phi in a different stage. + else if (VRMap[PrevStage - np].count(Def) && + (!LoopDefIsPhi || (PrevStage != LastStageNum) || + (LoopValStage == StageScheduled))) + PhiOp2 = VRMap[PrevStage - np][Def]; + } + + // Check if we can reuse an existing Phi. This occurs when a Phi + // references another Phi, and the other Phi is scheduled in an + // earlier stage. We can try to reuse an existing Phi up until the last + // stage of the current Phi. + if (LoopDefIsPhi) { + if (static_cast(PrologStage - np) >= StageScheduled) { + int LVNumStages = getStagesForPhi(LoopVal); + int StageDiff = (StageScheduled - LoopValStage); + LVNumStages -= StageDiff; + // Make sure the loop value Phi has been processed already. + if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) { + NewReg = PhiOp2; + unsigned ReuseStage = CurStageNum; + if (isLoopCarried(*PhiInst)) + ReuseStage -= LVNumStages; + // Check if the Phi to reuse has been generated yet. If not, then + // there is nothing to reuse. + if (VRMap[ReuseStage - np].count(LoopVal)) { + NewReg = VRMap[ReuseStage - np][LoopVal]; + + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, + Def, NewReg); + // Update the map with the new Phi name. + VRMap[CurStageNum - np][Def] = NewReg; + PhiOp2 = NewReg; + if (VRMap[LastStageNum - np - 1].count(LoopVal)) + PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal]; + + if (IsLast && np == NumPhis - 1) + replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); + continue; + } + } + } + if (InKernel && StageDiff > 0 && + VRMap[CurStageNum - StageDiff - np].count(LoopVal)) + PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal]; + } + + const TargetRegisterClass *RC = MRI.getRegClass(Def); + NewReg = MRI.createVirtualRegister(RC); + + MachineInstrBuilder NewPhi = + BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(), + TII->get(TargetOpcode::PHI), NewReg); + NewPhi.addReg(PhiOp1).addMBB(BB1); + NewPhi.addReg(PhiOp2).addMBB(BB2); + if (np == 0) + InstrMap[NewPhi] = &*BBI; + + // We define the Phis after creating the new pipelined code, so + // we need to rename the Phi values in scheduled instructions. + + unsigned PrevReg = 0; + if (InKernel && VRMap[PrevStage - np].count(LoopVal)) + PrevReg = VRMap[PrevStage - np][LoopVal]; + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def, + NewReg, PrevReg); + // If the Phi has been scheduled, use the new name for rewriting. + if (VRMap[CurStageNum - np].count(Def)) { + unsigned R = VRMap[CurStageNum - np][Def]; + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R, + NewReg); + } + + // Check if we need to rename any uses that occurs after the loop. The + // register to replace depends on whether the Phi is scheduled in the + // epilog. + if (IsLast && np == NumPhis - 1) + replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); + + // In the kernel, a dependent Phi uses the value from this Phi. + if (InKernel) + PhiOp2 = NewReg; + + // Update the map with the new Phi name. + VRMap[CurStageNum - np][Def] = NewReg; + } + + while (NumPhis++ < NumStages) { + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def, + NewReg, 0); + } + + // Check if we need to rename a Phi that has been eliminated due to + // scheduling. + if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal)) + replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS); + } +} + +/// Generate Phis for the specified block in the generated pipelined code. +/// These are new Phis needed because the definition is scheduled after the +/// use in the pipelined sequence. +void ModuloScheduleExpander::generatePhis( + MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, + MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap, + unsigned LastStageNum, unsigned CurStageNum, bool IsLast) { + // Compute the stage number that contains the initial Phi value, and + // the Phi from the previous stage. + unsigned PrologStage = 0; + unsigned PrevStage = 0; + unsigned StageDiff = CurStageNum - LastStageNum; + bool InKernel = (StageDiff == 0); + if (InKernel) { + PrologStage = LastStageNum - 1; + PrevStage = CurStageNum; + } else { + PrologStage = LastStageNum - StageDiff; + PrevStage = LastStageNum + StageDiff - 1; + } + + for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(), + BBE = BB->instr_end(); + BBI != BBE; ++BBI) { + for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = BBI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || + !Register::isVirtualRegister(MO.getReg())) + continue; + + int StageScheduled = Schedule.getStage(&*BBI); + assert(StageScheduled != -1 && "Expecting scheduled instruction."); + Register Def = MO.getReg(); + unsigned NumPhis = getStagesForReg(Def, CurStageNum); + // An instruction scheduled in stage 0 and is used after the loop + // requires a phi in the epilog for the last definition from either + // the kernel or prolog. + if (!InKernel && NumPhis == 0 && StageScheduled == 0 && + hasUseAfterLoop(Def, BB, MRI)) + NumPhis = 1; + if (!InKernel && (unsigned)StageScheduled > PrologStage) + continue; + + unsigned PhiOp2 = VRMap[PrevStage][Def]; + if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2)) + if (InstOp2->isPHI() && InstOp2->getParent() == NewBB) + PhiOp2 = getLoopPhiReg(*InstOp2, BB2); + // The number of Phis can't exceed the number of prolog stages. The + // prolog stage number is zero based. + if (NumPhis > PrologStage + 1 - StageScheduled) + NumPhis = PrologStage + 1 - StageScheduled; + for (unsigned np = 0; np < NumPhis; ++np) { + unsigned PhiOp1 = VRMap[PrologStage][Def]; + if (np <= PrologStage) + PhiOp1 = VRMap[PrologStage - np][Def]; + if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) { + if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) + PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); + if (InstOp1->isPHI() && InstOp1->getParent() == NewBB) + PhiOp1 = getInitPhiReg(*InstOp1, NewBB); + } + if (!InKernel) + PhiOp2 = VRMap[PrevStage - np][Def]; + + const TargetRegisterClass *RC = MRI.getRegClass(Def); + Register NewReg = MRI.createVirtualRegister(RC); + + MachineInstrBuilder NewPhi = + BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(), + TII->get(TargetOpcode::PHI), NewReg); + NewPhi.addReg(PhiOp1).addMBB(BB1); + NewPhi.addReg(PhiOp2).addMBB(BB2); + if (np == 0) + InstrMap[NewPhi] = &*BBI; + + // Rewrite uses and update the map. The actions depend upon whether + // we generating code for the kernel or epilog blocks. + if (InKernel) { + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1, + NewReg); + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2, + NewReg); + + PhiOp2 = NewReg; + VRMap[PrevStage - np - 1][Def] = NewReg; + } else { + VRMap[CurStageNum - np][Def] = NewReg; + if (np == NumPhis - 1) + rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def, + NewReg); + } + if (IsLast && np == NumPhis - 1) + replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS); + } + } + } +} + +/// Remove instructions that generate values with no uses. +/// Typically, these are induction variable operations that generate values +/// used in the loop itself. A dead instruction has a definition with +/// no uses, or uses that occur in the original loop only. +void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB, + MBBVectorTy &EpilogBBs) { + // For each epilog block, check that the value defined by each instruction + // is used. If not, delete it. + for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(), + MBE = EpilogBBs.rend(); + MBB != MBE; ++MBB) + for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(), + ME = (*MBB)->instr_rend(); + MI != ME;) { + // From DeadMachineInstructionElem. Don't delete inline assembly. + if (MI->isInlineAsm()) { + ++MI; + continue; + } + bool SawStore = false; + // Check if it's safe to remove the instruction due to side effects. + // We can, and want to, remove Phis here. + if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) { + ++MI; + continue; + } + bool used = true; + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); + MOI != MOE; ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) + continue; + Register reg = MOI->getReg(); + // Assume physical registers are used, unless they are marked dead. + if (Register::isPhysicalRegister(reg)) { + used = !MOI->isDead(); + if (used) + break; + continue; + } + unsigned realUses = 0; + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg), + EI = MRI.use_end(); + UI != EI; ++UI) { + // Check if there are any uses that occur only in the original + // loop. If so, that's not a real use. + if (UI->getParent()->getParent() != BB) { + realUses++; + used = true; + break; + } + } + if (realUses > 0) + break; + used = false; + } + if (!used) { + LIS.RemoveMachineInstrFromMaps(*MI); + MI++->eraseFromParent(); + continue; + } + ++MI; + } + // In the kernel block, check if we can remove a Phi that generates a value + // used in an instruction removed in the epilog block. + for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(), + BBE = KernelBB->getFirstNonPHI(); + BBI != BBE;) { + MachineInstr *MI = &*BBI; + ++BBI; + Register reg = MI->getOperand(0).getReg(); + if (MRI.use_begin(reg) == MRI.use_end()) { + LIS.RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); + } + } +} + +/// For loop carried definitions, we split the lifetime of a virtual register +/// that has uses past the definition in the next iteration. A copy with a new +/// virtual register is inserted before the definition, which helps with +/// generating a better register assignment. +/// +/// v1 = phi(a, v2) v1 = phi(a, v2) +/// v2 = phi(b, v3) v2 = phi(b, v3) +/// v3 = .. v4 = copy v1 +/// .. = V1 v3 = .. +/// .. = v4 +void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB, + MBBVectorTy &EpilogBBs) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + for (auto &PHI : KernelBB->phis()) { + Register Def = PHI.getOperand(0).getReg(); + // Check for any Phi definition that used as an operand of another Phi + // in the same block. + for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def), + E = MRI.use_instr_end(); + I != E; ++I) { + if (I->isPHI() && I->getParent() == KernelBB) { + // Get the loop carried definition. + unsigned LCDef = getLoopPhiReg(PHI, KernelBB); + if (!LCDef) + continue; + MachineInstr *MI = MRI.getVRegDef(LCDef); + if (!MI || MI->getParent() != KernelBB || MI->isPHI()) + continue; + // Search through the rest of the block looking for uses of the Phi + // definition. If one occurs, then split the lifetime. + unsigned SplitReg = 0; + for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI), + KernelBB->instr_end())) + if (BBJ.readsRegister(Def)) { + // We split the lifetime when we find the first use. + if (SplitReg == 0) { + SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def)); + BuildMI(*KernelBB, MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), SplitReg) + .addReg(Def); + } + BBJ.substituteRegister(Def, SplitReg, 0, *TRI); + } + if (!SplitReg) + continue; + // Search through each of the epilog blocks for any uses to be renamed. + for (auto &Epilog : EpilogBBs) + for (auto &I : *Epilog) + if (I.readsRegister(Def)) + I.substituteRegister(Def, SplitReg, 0, *TRI); + break; + } + } + } +} + +/// Remove the incoming block from the Phis in a basic block. +static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) { + for (MachineInstr &MI : *BB) { + if (!MI.isPHI()) + break; + for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) + if (MI.getOperand(i + 1).getMBB() == Incoming) { + MI.RemoveOperand(i + 1); + MI.RemoveOperand(i); + break; + } + } +} + +/// Create branches from each prolog basic block to the appropriate epilog +/// block. These edges are needed if the loop ends before reaching the +/// kernel. +void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB, + MBBVectorTy &PrologBBs, + MachineBasicBlock *KernelBB, + MBBVectorTy &EpilogBBs, + ValueMapTy *VRMap) { + assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch"); + MachineBasicBlock *LastPro = KernelBB; + MachineBasicBlock *LastEpi = KernelBB; + + // Start from the blocks connected to the kernel and work "out" + // to the first prolog and the last epilog blocks. + SmallVector PrevInsts; + unsigned MaxIter = PrologBBs.size() - 1; + for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) { + // Add branches to the prolog that go to the corresponding + // epilog, and the fall-thru prolog/kernel block. + MachineBasicBlock *Prolog = PrologBBs[j]; + MachineBasicBlock *Epilog = EpilogBBs[i]; + + SmallVector Cond; + Optional StaticallyGreater = + LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond); + unsigned numAdded = 0; + if (!StaticallyGreater.hasValue()) { + Prolog->addSuccessor(Epilog); + numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc()); + } else if (*StaticallyGreater == false) { + Prolog->addSuccessor(Epilog); + Prolog->removeSuccessor(LastPro); + LastEpi->removeSuccessor(Epilog); + numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc()); + removePhis(Epilog, LastEpi); + // Remove the blocks that are no longer referenced. + if (LastPro != LastEpi) { + LastEpi->clear(); + LastEpi->eraseFromParent(); + } + if (LastPro == KernelBB) { + LoopInfo->disposed(); + NewKernel = nullptr; + } + LastPro->clear(); + LastPro->eraseFromParent(); + } else { + numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc()); + removePhis(Epilog, Prolog); + } + LastPro = Prolog; + LastEpi = Epilog; + for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(), + E = Prolog->instr_rend(); + I != E && numAdded > 0; ++I, --numAdded) + updateInstruction(&*I, false, j, 0, VRMap); + } + + if (NewKernel) { + LoopInfo->setPreheader(PrologBBs[MaxIter]); + LoopInfo->adjustTripCount(-(MaxIter + 1)); + } +} + +/// Return true if we can compute the amount the instruction changes +/// during each iteration. Set Delta to the amount of the change. +bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const MachineOperand *BaseOp; + int64_t Offset; + if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI)) + return false; + + if (!BaseOp->isReg()) + return false; + + Register BaseReg = BaseOp->getReg(); + + MachineRegisterInfo &MRI = MF.getRegInfo(); + // Check if there is a Phi. If so, get the definition in the loop. + MachineInstr *BaseDef = MRI.getVRegDef(BaseReg); + if (BaseDef && BaseDef->isPHI()) { + BaseReg = getLoopPhiReg(*BaseDef, MI.getParent()); + BaseDef = MRI.getVRegDef(BaseReg); + } + if (!BaseDef) + return false; + + int D = 0; + if (!TII->getIncrementValue(*BaseDef, D) && D >= 0) + return false; + + Delta = D; + return true; +} + +/// Update the memory operand with a new offset when the pipeliner +/// generates a new copy of the instruction that refers to a +/// different memory location. +void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI, + MachineInstr &OldMI, + unsigned Num) { + if (Num == 0) + return; + // If the instruction has memory operands, then adjust the offset + // when the instruction appears in different stages. + if (NewMI.memoperands_empty()) + return; + SmallVector NewMMOs; + for (MachineMemOperand *MMO : NewMI.memoperands()) { + // TODO: Figure out whether isAtomic is really necessary (see D57601). + if (MMO->isVolatile() || MMO->isAtomic() || + (MMO->isInvariant() && MMO->isDereferenceable()) || + (!MMO->getValue())) { + NewMMOs.push_back(MMO); + continue; + } + unsigned Delta; + if (Num != UINT_MAX && computeDelta(OldMI, Delta)) { + int64_t AdjOffset = Delta * Num; + NewMMOs.push_back( + MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize())); + } else { + NewMMOs.push_back( + MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize)); + } + } + NewMI.setMemRefs(MF, NewMMOs); +} + +/// Clone the instruction for the new pipelined loop and update the +/// memory operands, if needed. +MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI, + unsigned CurStageNum, + unsigned InstStageNum) { + MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); + // Check for tied operands in inline asm instructions. This should be handled + // elsewhere, but I'm not sure of the best solution. + if (OldMI->isInlineAsm()) + for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) { + const auto &MO = OldMI->getOperand(i); + if (MO.isReg() && MO.isUse()) + break; + unsigned UseIdx; + if (OldMI->isRegTiedToUseOperand(i, &UseIdx)) + NewMI->tieOperands(i, UseIdx); + } + updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); + return NewMI; +} + +/// Clone the instruction for the new pipelined loop. If needed, this +/// function updates the instruction using the values saved in the +/// InstrChanges structure. +MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr( + MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) { + MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); + auto It = InstrChanges.find(OldMI); + if (It != InstrChanges.end()) { + std::pair RegAndOffset = It->second; + unsigned BasePos, OffsetPos; + if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos)) + return nullptr; + int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm(); + MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first); + if (Schedule.getStage(LoopDef) > (signed)InstStageNum) + NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum); + NewMI->getOperand(OffsetPos).setImm(NewOffset); + } + updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); + return NewMI; +} + +/// Update the machine instruction with new virtual registers. This +/// function may change the defintions and/or uses. +void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI, + bool LastDef, + unsigned CurStageNum, + unsigned InstrStageNum, + ValueMapTy *VRMap) { + for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = NewMI->getOperand(i); + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) + continue; + Register reg = MO.getReg(); + if (MO.isDef()) { + // Create a new virtual register for the definition. + const TargetRegisterClass *RC = MRI.getRegClass(reg); + Register NewReg = MRI.createVirtualRegister(RC); + MO.setReg(NewReg); + VRMap[CurStageNum][reg] = NewReg; + if (LastDef) + replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS); + } else if (MO.isUse()) { + MachineInstr *Def = MRI.getVRegDef(reg); + // Compute the stage that contains the last definition for instruction. + int DefStageNum = Schedule.getStage(Def); + unsigned StageNum = CurStageNum; + if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) { + // Compute the difference in stages between the defintion and the use. + unsigned StageDiff = (InstrStageNum - DefStageNum); + // Make an adjustment to get the last definition. + StageNum -= StageDiff; + } + if (VRMap[StageNum].count(reg)) + MO.setReg(VRMap[StageNum][reg]); + } + } +} + +/// Return the instruction in the loop that defines the register. +/// If the definition is a Phi, then follow the Phi operand to +/// the instruction in the loop. +MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) { + SmallPtrSet Visited; + MachineInstr *Def = MRI.getVRegDef(Reg); + while (Def->isPHI()) { + if (!Visited.insert(Def).second) + break; + for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) + if (Def->getOperand(i + 1).getMBB() == BB) { + Def = MRI.getVRegDef(Def->getOperand(i).getReg()); + break; + } + } + return Def; +} + +/// Return the new name for the value from the previous stage. +unsigned ModuloScheduleExpander::getPrevMapVal( + unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage, + ValueMapTy *VRMap, MachineBasicBlock *BB) { + unsigned PrevVal = 0; + if (StageNum > PhiStage) { + MachineInstr *LoopInst = MRI.getVRegDef(LoopVal); + if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal)) + // The name is defined in the previous stage. + PrevVal = VRMap[StageNum - 1][LoopVal]; + else if (VRMap[StageNum].count(LoopVal)) + // The previous name is defined in the current stage when the instruction + // order is swapped. + PrevVal = VRMap[StageNum][LoopVal]; + else if (!LoopInst->isPHI() || LoopInst->getParent() != BB) + // The loop value hasn't yet been scheduled. + PrevVal = LoopVal; + else if (StageNum == PhiStage + 1) + // The loop value is another phi, which has not been scheduled. + PrevVal = getInitPhiReg(*LoopInst, BB); + else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB) + // The loop value is another phi, which has been scheduled. + PrevVal = + getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB), + LoopStage, VRMap, BB); + } + return PrevVal; +} + +/// Rewrite the Phi values in the specified block to use the mappings +/// from the initial operand. Once the Phi is scheduled, we switch +/// to using the loop value instead of the Phi value, so those names +/// do not need to be rewritten. +void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB, + unsigned StageNum, + ValueMapTy *VRMap, + InstrMapTy &InstrMap) { + for (auto &PHI : BB->phis()) { + unsigned InitVal = 0; + unsigned LoopVal = 0; + getPhiRegs(PHI, BB, InitVal, LoopVal); + Register PhiDef = PHI.getOperand(0).getReg(); + + unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef)); + unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal)); + unsigned NumPhis = getStagesForPhi(PhiDef); + if (NumPhis > StageNum) + NumPhis = StageNum; + for (unsigned np = 0; np <= NumPhis; ++np) { + unsigned NewVal = + getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB); + if (!NewVal) + NewVal = InitVal; + rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef, + NewVal); + } + } +} + +/// Rewrite a previously scheduled instruction to use the register value +/// from the new instruction. Make sure the instruction occurs in the +/// basic block, and we don't change the uses in the new instruction. +void ModuloScheduleExpander::rewriteScheduledInstr( + MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum, + unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg, + unsigned PrevReg) { + bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1); + int StagePhi = Schedule.getStage(Phi) + PhiNum; + // Rewrite uses that have been scheduled already to use the new + // Phi register. + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg), + EI = MRI.use_end(); + UI != EI;) { + MachineOperand &UseOp = *UI; + MachineInstr *UseMI = UseOp.getParent(); + ++UI; + if (UseMI->getParent() != BB) + continue; + if (UseMI->isPHI()) { + if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg) + continue; + if (getLoopPhiReg(*UseMI, BB) != OldReg) + continue; + } + InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI); + assert(OrigInstr != InstrMap.end() && "Instruction not scheduled."); + MachineInstr *OrigMI = OrigInstr->second; + int StageSched = Schedule.getStage(OrigMI); + int CycleSched = Schedule.getCycle(OrigMI); + unsigned ReplaceReg = 0; + // This is the stage for the scheduled instruction. + if (StagePhi == StageSched && Phi->isPHI()) { + int CyclePhi = Schedule.getCycle(Phi); + if (PrevReg && InProlog) + ReplaceReg = PrevReg; + else if (PrevReg && !isLoopCarried(*Phi) && + (CyclePhi <= CycleSched || OrigMI->isPHI())) + ReplaceReg = PrevReg; + else + ReplaceReg = NewReg; + } + // The scheduled instruction occurs before the scheduled Phi, and the + // Phi is not loop carried. + if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi)) + ReplaceReg = NewReg; + if (StagePhi > StageSched && Phi->isPHI()) + ReplaceReg = NewReg; + if (!InProlog && !Phi->isPHI() && StagePhi < StageSched) + ReplaceReg = NewReg; + if (ReplaceReg) { + MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg)); + UseOp.setReg(ReplaceReg); + } + } +} + +bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) { + if (!Phi.isPHI()) + return false; + unsigned DefCycle = Schedule.getCycle(&Phi); + int DefStage = Schedule.getStage(&Phi); + + unsigned InitVal = 0; + unsigned LoopVal = 0; + getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal); + MachineInstr *Use = MRI.getVRegDef(LoopVal); + if (!Use || Use->isPHI()) + return true; + unsigned LoopCycle = Schedule.getCycle(Use); + int LoopStage = Schedule.getStage(Use); + return (LoopCycle > DefCycle) || (LoopStage <= DefStage); +} + +//===----------------------------------------------------------------------===// +// PeelingModuloScheduleExpander implementation +//===----------------------------------------------------------------------===// +// This is a reimplementation of ModuloScheduleExpander that works by creating +// a fully correct steady-state kernel and peeling off the prolog and epilogs. +//===----------------------------------------------------------------------===// + +namespace { +// Remove any dead phis in MBB. Dead phis either have only one block as input +// (in which case they are the identity) or have no uses. +void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI, + LiveIntervals *LIS) { + bool Changed = true; + while (Changed) { + Changed = false; + for (auto I = MBB->begin(); I != MBB->getFirstNonPHI();) { + MachineInstr &MI = *I++; + assert(MI.isPHI()); + if (MRI.use_empty(MI.getOperand(0).getReg())) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(MI); + MI.eraseFromParent(); + Changed = true; + } else if (MI.getNumExplicitOperands() == 3) { + MRI.constrainRegClass(MI.getOperand(1).getReg(), + MRI.getRegClass(MI.getOperand(0).getReg())); + MRI.replaceRegWith(MI.getOperand(0).getReg(), + MI.getOperand(1).getReg()); + if (LIS) + LIS->RemoveMachineInstrFromMaps(MI); + MI.eraseFromParent(); + Changed = true; + } + } + } +} + +/// Rewrites the kernel block in-place to adhere to the given schedule. +/// KernelRewriter holds all of the state required to perform the rewriting. +class KernelRewriter { + ModuloSchedule &S; + MachineBasicBlock *BB; + MachineBasicBlock *PreheaderBB, *ExitBB; + MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; + LiveIntervals *LIS; + + // Map from register class to canonical undef register for that class. + DenseMap Undefs; + // Map from to phi register for all created phis. Note that + // this map is only used when InitReg is non-undef. + DenseMap, Register> Phis; + // Map from LoopReg to phi register where the InitReg is undef. + DenseMap UndefPhis; + + // Reg is used by MI. Return the new register MI should use to adhere to the + // schedule. Insert phis as necessary. + Register remapUse(Register Reg, MachineInstr &MI); + // Insert a phi that carries LoopReg from the loop body and InitReg otherwise. + // If InitReg is not given it is chosen arbitrarily. It will either be undef + // or will be chosen so as to share another phi. + Register phi(Register LoopReg, Optional InitReg = {}, + const TargetRegisterClass *RC = nullptr); + // Create an undef register of the given register class. + Register undef(const TargetRegisterClass *RC); + +public: + KernelRewriter(MachineLoop &L, ModuloSchedule &S, + LiveIntervals *LIS = nullptr); + void rewrite(); +}; +} // namespace + +KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S, + LiveIntervals *LIS) + : S(S), BB(L.getTopBlock()), PreheaderBB(L.getLoopPreheader()), + ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()), + TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) { + PreheaderBB = *BB->pred_begin(); + if (PreheaderBB == BB) + PreheaderBB = *std::next(BB->pred_begin()); +} + +void KernelRewriter::rewrite() { + // Rearrange the loop to be in schedule order. Note that the schedule may + // contain instructions that are not owned by the loop block (InstrChanges and + // friends), so we gracefully handle unowned instructions and delete any + // instructions that weren't in the schedule. + auto InsertPt = BB->getFirstTerminator(); + MachineInstr *FirstMI = nullptr; + for (MachineInstr *MI : S.getInstructions()) { + if (MI->isPHI()) + continue; + if (MI->getParent()) + MI->removeFromParent(); + BB->insert(InsertPt, MI); + if (!FirstMI) + FirstMI = MI; + } + assert(FirstMI && "Failed to find first MI in schedule"); + + // At this point all of the scheduled instructions are between FirstMI + // and the end of the block. Kill from the first non-phi to FirstMI. + for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(*I); + (I++)->eraseFromParent(); + } + + // Now remap every instruction in the loop. + for (MachineInstr &MI : *BB) { + if (MI.isPHI() || MI.isTerminator()) + continue; + for (MachineOperand &MO : MI.uses()) { + if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit()) + continue; + Register Reg = remapUse(MO.getReg(), MI); + MO.setReg(Reg); + } + } + EliminateDeadPhis(BB, MRI, LIS); + + // Ensure a phi exists for all instructions that are either referenced by + // an illegal phi or by an instruction outside the loop. This allows us to + // treat remaps of these values the same as "normal" values that come from + // loop-carried phis. + for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) { + if (MI->isPHI()) { + Register R = MI->getOperand(0).getReg(); + phi(R); + continue; + } + + for (MachineOperand &Def : MI->defs()) { + for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) { + if (MI.getParent() != BB) { + phi(Def.getReg()); + break; + } + } + } + } +} + +Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) { + MachineInstr *Producer = MRI.getUniqueVRegDef(Reg); + if (!Producer) + return Reg; + + int ConsumerStage = S.getStage(&MI); + if (!Producer->isPHI()) { + // Non-phi producers are simple to remap. Insert as many phis as the + // difference between the consumer and producer stages. + if (Producer->getParent() != BB) + // Producer was not inside the loop. Use the register as-is. + return Reg; + int ProducerStage = S.getStage(Producer); + assert(ConsumerStage != -1 && + "In-loop consumer should always be scheduled!"); + assert(ConsumerStage >= ProducerStage); + unsigned StageDiff = ConsumerStage - ProducerStage; + + for (unsigned I = 0; I < StageDiff; ++I) + Reg = phi(Reg); + return Reg; + } + + // First, dive through the phi chain to find the defaults for the generated + // phis. + SmallVector, 4> Defaults; + Register LoopReg = Reg; + auto LoopProducer = Producer; + while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) { + LoopReg = getLoopPhiReg(*LoopProducer, BB); + Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB)); + LoopProducer = MRI.getUniqueVRegDef(LoopReg); + assert(LoopProducer); + } + int LoopProducerStage = S.getStage(LoopProducer); + + Optional IllegalPhiDefault; + + if (LoopProducerStage == -1) { + // Do nothing. + } else if (LoopProducerStage > ConsumerStage) { + // This schedule is only representable if ProducerStage == ConsumerStage+1. + // In addition, Consumer's cycle must be scheduled after Producer in the + // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP + // functions. +#ifndef NDEBUG // Silence unused variables in non-asserts mode. + int LoopProducerCycle = S.getCycle(LoopProducer); + int ConsumerCycle = S.getCycle(&MI); +#endif + assert(LoopProducerCycle <= ConsumerCycle); + assert(LoopProducerStage == ConsumerStage + 1); + // Peel off the first phi from Defaults and insert a phi between producer + // and consumer. This phi will not be at the front of the block so we + // consider it illegal. It will only exist during the rewrite process; it + // needs to exist while we peel off prologs because these could take the + // default value. After that we can replace all uses with the loop producer + // value. + IllegalPhiDefault = Defaults.front(); + Defaults.erase(Defaults.begin()); + } else { + assert(ConsumerStage >= LoopProducerStage); + int StageDiff = ConsumerStage - LoopProducerStage; + if (StageDiff > 0) { + LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size() + << " to " << (Defaults.size() + StageDiff) << "\n"); + // If we need more phis than we have defaults for, pad out with undefs for + // the earliest phis, which are at the end of the defaults chain (the + // chain is in reverse order). + Defaults.resize(Defaults.size() + StageDiff, Defaults.empty() + ? Optional() + : Defaults.back()); + } + } + + // Now we know the number of stages to jump back, insert the phi chain. + auto DefaultI = Defaults.rbegin(); + while (DefaultI != Defaults.rend()) + LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg)); + + if (IllegalPhiDefault.hasValue()) { + // The consumer optionally consumes LoopProducer in the same iteration + // (because the producer is scheduled at an earlier cycle than the consumer) + // or the initial value. To facilitate this we create an illegal block here + // by embedding a phi in the middle of the block. We will fix this up + // immediately prior to pruning. + auto RC = MRI.getRegClass(Reg); + Register R = MRI.createVirtualRegister(RC); + BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R) + .addReg(IllegalPhiDefault.getValue()) + .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect. + .addReg(LoopReg) + .addMBB(BB); // Block choice is arbitrary and has no effect. + return R; + } + + return LoopReg; +} + +Register KernelRewriter::phi(Register LoopReg, Optional InitReg, + const TargetRegisterClass *RC) { + // If the init register is not undef, try and find an existing phi. + if (InitReg.hasValue()) { + auto I = Phis.find({LoopReg, InitReg.getValue()}); + if (I != Phis.end()) + return I->second; + } else { + for (auto &KV : Phis) { + if (KV.first.first == LoopReg) + return KV.second; + } + } + + // InitReg is either undef or no existing phi takes InitReg as input. Try and + // find a phi that takes undef as input. + auto I = UndefPhis.find(LoopReg); + if (I != UndefPhis.end()) { + Register R = I->second; + if (!InitReg.hasValue()) + // Found a phi taking undef as input, and this input is undef so return + // without any more changes. + return R; + // Found a phi taking undef as input, so rewrite it to take InitReg. + MachineInstr *MI = MRI.getVRegDef(R); + MI->getOperand(1).setReg(InitReg.getValue()); + Phis.insert({{LoopReg, InitReg.getValue()}, R}); + MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue())); + UndefPhis.erase(I); + return R; + } + + // Failed to find any existing phi to reuse, so create a new one. + if (!RC) + RC = MRI.getRegClass(LoopReg); + Register R = MRI.createVirtualRegister(RC); + if (InitReg.hasValue()) + MRI.constrainRegClass(R, MRI.getRegClass(*InitReg)); + BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R) + .addReg(InitReg.hasValue() ? *InitReg : undef(RC)) + .addMBB(PreheaderBB) + .addReg(LoopReg) + .addMBB(BB); + if (!InitReg.hasValue()) + UndefPhis[LoopReg] = R; + else + Phis[{LoopReg, *InitReg}] = R; + return R; +} + +Register KernelRewriter::undef(const TargetRegisterClass *RC) { + Register &R = Undefs[RC]; + if (R == 0) { + // Create an IMPLICIT_DEF that defines this register if we need it. + // All uses of this should be removed by the time we have finished unrolling + // prologs and epilogs. + R = MRI.createVirtualRegister(RC); + auto *InsertBB = &PreheaderBB->getParent()->front(); + BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), R); + } + return R; +} + +namespace { +/// Describes an operand in the kernel of a pipelined loop. Characteristics of +/// the operand are discovered, such as how many in-loop PHIs it has to jump +/// through and defaults for these phis. +class KernelOperandInfo { + MachineBasicBlock *BB; + MachineRegisterInfo &MRI; + SmallVector PhiDefaults; + MachineOperand *Source; + MachineOperand *Target; + +public: + KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI, + const SmallPtrSetImpl &IllegalPhis) + : MRI(MRI) { + Source = MO; + BB = MO->getParent()->getParent(); + while (isRegInLoop(MO)) { + MachineInstr *MI = MRI.getVRegDef(MO->getReg()); + if (MI->isFullCopy()) { + MO = &MI->getOperand(1); + continue; + } + if (!MI->isPHI()) + break; + // If this is an illegal phi, don't count it in distance. + if (IllegalPhis.count(MI)) { + MO = &MI->getOperand(3); + continue; + } + + Register Default = getInitPhiReg(*MI, BB); + MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1) + : &MI->getOperand(3); + PhiDefaults.push_back(Default); + } + Target = MO; + } + + bool operator==(const KernelOperandInfo &Other) const { + return PhiDefaults.size() == Other.PhiDefaults.size(); + } + + void print(raw_ostream &OS) const { + OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in " + << *Source->getParent(); + } + +private: + bool isRegInLoop(MachineOperand *MO) { + return MO->isReg() && MO->getReg().isVirtual() && + MRI.getVRegDef(MO->getReg())->getParent() == BB; + } +}; +} // namespace + +MachineBasicBlock * +PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) { + MachineBasicBlock *NewBB = PeelSingleBlockLoop(LPD, BB, MRI, TII); + if (LPD == LPD_Front) + PeeledFront.push_back(NewBB); + else + PeeledBack.push_front(NewBB); + for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator(); + ++I, ++NI) { + CanonicalMIs[&*I] = &*I; + CanonicalMIs[&*NI] = &*I; + BlockMIs[{NewBB, &*I}] = &*NI; + BlockMIs[{BB, &*I}] = &*I; + } + return NewBB; +} + +void PeelingModuloScheduleExpander::peelPrologAndEpilogs() { + BitVector LS(Schedule.getNumStages(), true); + BitVector AS(Schedule.getNumStages(), true); + LiveStages[BB] = LS; + AvailableStages[BB] = AS; + + // Peel out the prologs. + LS.reset(); + for (int I = 0; I < Schedule.getNumStages() - 1; ++I) { + LS[I] = 1; + Prologs.push_back(peelKernel(LPD_Front)); + LiveStages[Prologs.back()] = LS; + AvailableStages[Prologs.back()] = LS; + } + + // Create a block that will end up as the new loop exiting block (dominated by + // all prologs and epilogs). It will only contain PHIs, in the same order as + // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property + // that the exiting block is a (sub) clone of BB. This in turn gives us the + // property that any value deffed in BB but used outside of BB is used by a + // PHI in the exiting block. + MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock(); + + // Push out the epilogs, again in reverse order. + // We can't assume anything about the minumum loop trip count at this point, + // so emit a fairly complex epilog: + // K[0, 1, 2] // Kernel runs stages 0, 1, 2 + // E0[2] <- P1 // Epilog runs stage 2 only, so the state after is [0]. + // E1[1, 2] <- P0 // Epilog 1 moves the last item from stage 0 to stage 2. + // + // This creates a single-successor single-predecessor sequence of blocks for + // each epilog, which are kept this way for simplicity at this stage and + // cleaned up by the optimizer later. + for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) { + Epilogs.push_back(nullptr); + for (int J = Schedule.getNumStages() - 1; J >= I; --J) { + LS.reset(); + LS[J] = 1; + Epilogs.back() = peelKernel(LPD_Back); + LiveStages[Epilogs.back()] = LS; + AvailableStages[Epilogs.back()] = AS; + } + } + + // Now we've defined all the prolog and epilog blocks as a fallthrough + // sequence, add the edges that will be followed if the loop trip count is + // lower than the number of stages (connecting prologs directly with epilogs). + auto PI = Prologs.begin(); + auto EI = Epilogs.begin(); + assert(Prologs.size() == Epilogs.size()); + for (; PI != Prologs.end(); ++PI, ++EI) { + MachineBasicBlock *Pred = *(*EI)->pred_begin(); + (*PI)->addSuccessor(*EI); + for (MachineInstr &MI : (*EI)->phis()) { + Register Reg = MI.getOperand(1).getReg(); + MachineInstr *Use = MRI.getUniqueVRegDef(Reg); + if (Use && Use->getParent() == Pred) + Reg = getEquivalentRegisterIn(Reg, *PI); + MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false)); + MI.addOperand(MachineOperand::CreateMBB(*PI)); + } + } + + // Create a list of all blocks in order. + SmallVector Blocks; + llvm::copy(PeeledFront, std::back_inserter(Blocks)); + Blocks.push_back(BB); + llvm::copy(PeeledBack, std::back_inserter(Blocks)); + + // Iterate in reverse order over all instructions, remapping as we go. + for (MachineBasicBlock *B : reverse(Blocks)) { + for (auto I = B->getFirstInstrTerminator()->getReverseIterator(); + I != std::next(B->getFirstNonPHI()->getReverseIterator());) { + MachineInstr *MI = &*I++; + rewriteUsesOf(MI); + } + } + // Now all remapping has been done, we're free to optimize the generated code. + for (MachineBasicBlock *B : reverse(Blocks)) + EliminateDeadPhis(B, MRI, LIS); + EliminateDeadPhis(ExitingBB, MRI, LIS); +} + +MachineBasicBlock *PeelingModuloScheduleExpander::CreateLCSSAExitingBlock() { + MachineFunction &MF = *BB->getParent(); + MachineBasicBlock *Exit = *BB->succ_begin(); + if (Exit == BB) + Exit = *std::next(BB->succ_begin()); + + MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock()); + MF.insert(std::next(BB->getIterator()), NewBB); + + // Clone all phis in BB into NewBB and rewrite. + for (MachineInstr &MI : BB->phis()) { + auto RC = MRI.getRegClass(MI.getOperand(0).getReg()); + Register OldR = MI.getOperand(3).getReg(); + Register R = MRI.createVirtualRegister(RC); + SmallVector Uses; + for (MachineInstr &Use : MRI.use_instructions(OldR)) + if (Use.getParent() != BB) + Uses.push_back(&Use); + for (MachineInstr *Use : Uses) + Use->substituteRegister(OldR, R, /*SubIdx=*/0, + *MRI.getTargetRegisterInfo()); + MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R) + .addReg(OldR) + .addMBB(BB); + BlockMIs[{NewBB, &MI}] = NI; + CanonicalMIs[NI] = &MI; + } + BB->replaceSuccessor(Exit, NewBB); + Exit->replacePhiUsesWith(BB, NewBB); + NewBB->addSuccessor(Exit); + + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector Cond; + bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond); + (void)CanAnalyzeBr; + assert(CanAnalyzeBr && "Must be able to analyze the loop branch!"); + TII->removeBranch(*BB); + TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB, + Cond, DebugLoc()); + TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc()); + return NewBB; +} + +Register +PeelingModuloScheduleExpander::getEquivalentRegisterIn(Register Reg, + MachineBasicBlock *BB) { + MachineInstr *MI = MRI.getUniqueVRegDef(Reg); + unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg); + return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg(); +} + +void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) { + if (MI->isPHI()) { + // This is an illegal PHI. The loop-carried (desired) value is operand 3, + // and it is produced by this block. + Register PhiR = MI->getOperand(0).getReg(); + Register R = MI->getOperand(3).getReg(); + int RMIStage = getStage(MRI.getUniqueVRegDef(R)); + if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage)) + R = MI->getOperand(1).getReg(); + MRI.setRegClass(R, MRI.getRegClass(PhiR)); + MRI.replaceRegWith(PhiR, R); + if (LIS) + LIS->RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); + return; + } + + int Stage = getStage(MI); + if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 || + LiveStages[MI->getParent()].test(Stage)) + // Instruction is live, no rewriting to do. + return; + + for (MachineOperand &DefMO : MI->defs()) { + SmallVector, 4> Subs; + for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) { + // Only PHIs can use values from this block by construction. + // Match with the equivalent PHI in B. + assert(UseMI.isPHI()); + Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(), + MI->getParent()); + Subs.emplace_back(&UseMI, Reg); + } + for (auto &Sub : Subs) + Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0, + *MRI.getTargetRegisterInfo()); + } + if (LIS) + LIS->RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); +} + +void PeelingModuloScheduleExpander::fixupBranches() { + std::unique_ptr Info = + TII->analyzeLoopForPipelining(BB); + assert(Info); + + // Work outwards from the kernel. + bool KernelDisposed = false; + int TC = Schedule.getNumStages() - 1; + for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend(); + ++PI, ++EI, --TC) { + MachineBasicBlock *Prolog = *PI; + MachineBasicBlock *Fallthrough = *Prolog->succ_begin(); + MachineBasicBlock *Epilog = *EI; + SmallVector Cond; + TII->removeBranch(*Prolog); + Optional StaticallyGreater = + Info->createTripCountGreaterCondition(TC, *Prolog, Cond); + if (!StaticallyGreater.hasValue()) { + LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n"); + // Dynamically branch based on Cond. + TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc()); + } else if (*StaticallyGreater == false) { + LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n"); + // Prolog never falls through; branch to epilog and orphan interior + // blocks. Leave it to unreachable-block-elim to clean up. + Prolog->removeSuccessor(Fallthrough); + for (MachineInstr &P : Fallthrough->phis()) { + P.RemoveOperand(2); + P.RemoveOperand(1); + } + TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc()); + KernelDisposed = true; + } else { + LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n"); + // Prolog always falls through; remove incoming values in epilog. + Prolog->removeSuccessor(Epilog); + for (MachineInstr &P : Epilog->phis()) { + P.RemoveOperand(4); + P.RemoveOperand(3); + } + } + } + + if (!KernelDisposed) { + Info->adjustTripCount(-(Schedule.getNumStages() - 1)); + Info->setPreheader(Prologs.back()); + } else { + Info->disposed(); + } +} + +void PeelingModuloScheduleExpander::rewriteKernel() { + KernelRewriter KR(*Schedule.getLoop(), Schedule); + KR.rewrite(); +} + +void PeelingModuloScheduleExpander::expand() { + BB = Schedule.getLoop()->getTopBlock(); + Preheader = Schedule.getLoop()->getLoopPreheader(); + LLVM_DEBUG(Schedule.dump()); + + rewriteKernel(); + peelPrologAndEpilogs(); + fixupBranches(); +} + +void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() { + BB = Schedule.getLoop()->getTopBlock(); + Preheader = Schedule.getLoop()->getLoopPreheader(); + + // Dump the schedule before we invalidate and remap all its instructions. + // Stash it in a string so we can print it if we found an error. + std::string ScheduleDump; + raw_string_ostream OS(ScheduleDump); + Schedule.print(OS); + OS.flush(); + + // First, run the normal ModuleScheduleExpander. We don't support any + // InstrChanges. + assert(LIS && "Requires LiveIntervals!"); + ModuloScheduleExpander MSE(MF, Schedule, *LIS, + ModuloScheduleExpander::InstrChangesTy()); + MSE.expand(); + MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel(); + if (!ExpandedKernel) { + // The expander optimized away the kernel. We can't do any useful checking. + MSE.cleanup(); + return; + } + // Before running the KernelRewriter, re-add BB into the CFG. + Preheader->addSuccessor(BB); + + // Now run the new expansion algorithm. + KernelRewriter KR(*Schedule.getLoop(), Schedule); + KR.rewrite(); + peelPrologAndEpilogs(); + + // Collect all illegal phis that the new algorithm created. We'll give these + // to KernelOperandInfo. + SmallPtrSet IllegalPhis; + for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) { + if (NI->isPHI()) + IllegalPhis.insert(&*NI); + } + + // Co-iterate across both kernels. We expect them to be identical apart from + // phis and full COPYs (we look through both). + SmallVector, 8> KOIs; + auto OI = ExpandedKernel->begin(); + auto NI = BB->begin(); + for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) { + while (OI->isPHI() || OI->isFullCopy()) + ++OI; + while (NI->isPHI() || NI->isFullCopy()) + ++NI; + assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!"); + // Analyze every operand separately. + for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin(); + OOpI != OI->operands_end(); ++OOpI, ++NOpI) + KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis), + KernelOperandInfo(&*NOpI, MRI, IllegalPhis)); + } + + bool Failed = false; + for (auto &OldAndNew : KOIs) { + if (OldAndNew.first == OldAndNew.second) + continue; + Failed = true; + errs() << "Modulo kernel validation error: [\n"; + errs() << " [golden] "; + OldAndNew.first.print(errs()); + errs() << " "; + OldAndNew.second.print(errs()); + errs() << "]\n"; + } + + if (Failed) { + errs() << "Golden reference kernel:\n"; + ExpandedKernel->print(errs()); + errs() << "New kernel:\n"; + BB->print(errs()); + errs() << ScheduleDump; + report_fatal_error( + "Modulo kernel validation (-pipeliner-experimental-cg) failed"); + } + + // Cleanup by removing BB from the CFG again as the original + // ModuloScheduleExpander intended. + Preheader->removeSuccessor(BB); + MSE.cleanup(); +} + +//===----------------------------------------------------------------------===// +// ModuloScheduleTestPass implementation +//===----------------------------------------------------------------------===// +// This pass constructs a ModuloSchedule from its module and runs +// ModuloScheduleExpander. +// +// The module is expected to contain a single-block analyzable loop. +// The total order of instructions is taken from the loop as-is. +// Instructions are expected to be annotated with a PostInstrSymbol. +// This PostInstrSymbol must have the following format: +// "Stage=%d Cycle=%d". +//===----------------------------------------------------------------------===// + +namespace { +class ModuloScheduleTest : public MachineFunctionPass { +public: + static char ID; + + ModuloScheduleTest() : MachineFunctionPass(ID) { + initializeModuloScheduleTestPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + void runOnLoop(MachineFunction &MF, MachineLoop &L); + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} // namespace + +char ModuloScheduleTest::ID = 0; + +INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test", + "Modulo Schedule test pass", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test", + "Modulo Schedule test pass", false, false) + +bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) { + MachineLoopInfo &MLI = getAnalysis(); + for (auto *L : MLI) { + if (L->getTopBlock() != L->getBottomBlock()) + continue; + runOnLoop(MF, *L); + return false; + } + return false; +} + +static void parseSymbolString(StringRef S, int &Cycle, int &Stage) { + std::pair StageAndCycle = getToken(S, "_"); + std::pair StageTokenAndValue = + getToken(StageAndCycle.first, "-"); + std::pair CycleTokenAndValue = + getToken(StageAndCycle.second, "-"); + if (StageTokenAndValue.first != "Stage" || + CycleTokenAndValue.first != "_Cycle") { + llvm_unreachable( + "Bad post-instr symbol syntax: see comment in ModuloScheduleTest"); + return; + } + + StageTokenAndValue.second.drop_front().getAsInteger(10, Stage); + CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle); + + dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n"; +} + +void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) { + LiveIntervals &LIS = getAnalysis(); + MachineBasicBlock *BB = L.getTopBlock(); + dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n"; + + DenseMap Cycle, Stage; + std::vector Instrs; + for (MachineInstr &MI : *BB) { + if (MI.isTerminator()) + continue; + Instrs.push_back(&MI); + if (MCSymbol *Sym = MI.getPostInstrSymbol()) { + dbgs() << "Parsing post-instr symbol for " << MI; + parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]); + } + } + + ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle), + std::move(Stage)); + ModuloScheduleExpander MSE( + MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy()); + MSE.expand(); + MSE.cleanup(); +} + +//===----------------------------------------------------------------------===// +// ModuloScheduleTestAnnotater implementation +//===----------------------------------------------------------------------===// + +void ModuloScheduleTestAnnotater::annotate() { + for (MachineInstr *MI : S.getInstructions()) { + SmallVector SV; + raw_svector_ostream OS(SV); + OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI); + MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str()); + MI->setPostInstrSymbol(MF, Sym); + } +} diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index c70b6225213..1a493964e67 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -97,7 +97,7 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg, InstrSet &PHIsInCycle) { assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction"); - unsigned DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI->getOperand(0).getReg(); // See if we already saw this register. if (!PHIsInCycle.insert(MI).second) @@ -109,16 +109,15 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, // Scan the PHI operands. for (unsigned i = 1; i != MI->getNumOperands(); i += 2) { - unsigned SrcReg = MI->getOperand(i).getReg(); + Register SrcReg = MI->getOperand(i).getReg(); if (SrcReg == DstReg) continue; MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); // Skip over register-to-register moves. - if (SrcMI && SrcMI->isCopy() && - !SrcMI->getOperand(0).getSubReg() && + if (SrcMI && SrcMI->isCopy() && !SrcMI->getOperand(0).getSubReg() && !SrcMI->getOperand(1).getSubReg() && - TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) { + Register::isVirtualRegister(SrcMI->getOperand(1).getReg())) { SrcReg = SrcMI->getOperand(1).getReg(); SrcMI = MRI->getVRegDef(SrcReg); } @@ -142,8 +141,8 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, /// other PHIs in a cycle. bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) { assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction"); - unsigned DstReg = MI->getOperand(0).getReg(); - assert(TargetRegisterInfo::isVirtualRegister(DstReg) && + Register DstReg = MI->getOperand(0).getReg(); + assert(Register::isVirtualRegister(DstReg) && "PHI destination is not a virtual register"); // See if we already saw this register. @@ -177,7 +176,7 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) { InstrSet PHIsInCycle; if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) && SingleValReg != 0) { - unsigned OldReg = MI->getOperand(0).getReg(); + Register OldReg = MI->getOperand(0).getReg(); if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg))) continue; diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 948a5835438..4dd4c4b1084 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -31,7 +31,9 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Pass.h" @@ -168,7 +170,7 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { // Remove dead IMPLICIT_DEF instructions. for (MachineInstr *DefMI : ImpDefs) { - unsigned DefReg = DefMI->getOperand(0).getReg(); + Register DefReg = DefMI->getOperand(0).getReg(); if (MRI->use_nodbg_empty(DefReg)) { if (LIS) LIS->RemoveMachineInstrFromMaps(*DefMI); @@ -183,6 +185,11 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { MF.DeleteMachineInstr(I.first); } + // TODO: we should use the incremental DomTree updater here. + if (Changed) + if (auto *MDT = getAnalysisIfAvailable()) + MDT->getBase().recalculate(MF); + LoweredPHIs.clear(); ImpDefs.clear(); VRegPHIUseCount.clear(); @@ -240,7 +247,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, MachineInstr *MPhi = MBB.remove(&*MBB.begin()); unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2; - unsigned DestReg = MPhi->getOperand(0).getReg(); + Register DestReg = MPhi->getOperand(0).getReg(); assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs"); bool isDead = MPhi->getOperand(0).isDead(); @@ -252,11 +259,12 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // Insert a register to register copy at the top of the current block (but // after any remaining phi nodes) which copies the new incoming register // into the phi node destination. + MachineInstr *PHICopy = nullptr; const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); if (allPhiOperandsUndefined(*MPhi, *MRI)) // If all sources of a PHI node are implicit_def or undef uses, just emit an // implicit_def instead of a copy. - BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), + PHICopy = BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), DestReg); else { // Can we reuse an earlier PHI node? This only happens for critical edges, @@ -273,15 +281,13 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); } - BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), - TII->get(TargetOpcode::COPY), DestReg) - .addReg(IncomingReg); + // Give the target possiblity to handle special cases fallthrough otherwise + PHICopy = TII->createPHIDestinationCopy(MBB, AfterPHIsIt, MPhi->getDebugLoc(), + IncomingReg, DestReg); } // Update live variable information if there is any. if (LV) { - MachineInstr &PHICopy = *std::prev(AfterPHIsIt); - if (IncomingReg) { LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg); @@ -302,7 +308,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // killed. Note that because the value is defined in several places (once // each for each incoming block), the "def" block and instruction fields // for the VarInfo is not filled in. - LV->addVirtualRegisterKilled(IncomingReg, PHICopy); + LV->addVirtualRegisterKilled(IncomingReg, *PHICopy); } // Since we are going to be deleting the PHI node, if it is the last use of @@ -312,15 +318,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // If the result is dead, update LV. if (isDead) { - LV->addVirtualRegisterDead(DestReg, PHICopy); + LV->addVirtualRegisterDead(DestReg, *PHICopy); LV->removeVirtualRegisterDead(DestReg, *MPhi); } } // Update LiveIntervals for the new copy or implicit def. if (LIS) { - SlotIndex DestCopyIndex = - LIS->InsertMachineInstrInMaps(*std::prev(AfterPHIsIt)); + SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(*PHICopy); SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB); if (IncomingReg) { @@ -368,11 +373,11 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // IncomingReg register in the corresponding predecessor basic block. SmallPtrSet MBBsInsertedInto; for (int i = NumSrcs - 1; i >= 0; --i) { - unsigned SrcReg = MPhi->getOperand(i*2+1).getReg(); + Register SrcReg = MPhi->getOperand(i * 2 + 1).getReg(); unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg(); bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() || isImplicitlyDefined(SrcReg, *MRI); - assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && + assert(Register::isVirtualRegister(SrcReg) && "Machine PHI Operands must all be virtual registers!"); // Get the MachineBasicBlock equivalent of the BasicBlock that is the source @@ -406,9 +411,9 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (DefMI->isImplicitDef()) ImpDefs.insert(DefMI); } else { - NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), - TII->get(TargetOpcode::COPY), IncomingReg) - .addReg(SrcReg, 0, SrcSubReg); + NewSrcInstr = + TII->createPHISourceCopy(opBlock, InsertPos, MPhi->getDebugLoc(), + SrcReg, SrcSubReg, IncomingReg); } } @@ -457,7 +462,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, } } else { // We just inserted this copy. - KillInst = std::prev(InsertPos); + KillInst = NewSrcInstr; } } assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); @@ -567,7 +572,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end(); BBI != BBE && BBI->isPHI(); ++BBI) { for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { - unsigned Reg = BBI->getOperand(i).getReg(); + Register Reg = BBI->getOperand(i).getReg(); MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB(); // Is there a critical edge from PreMBB to MBB? if (PreMBB->succ_size() == 1) diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp index a3fa1b0ad8e..529fde84e39 100644 --- a/lib/CodeGen/PatchableFunction.cpp +++ b/lib/CodeGen/PatchableFunction.cpp @@ -78,7 +78,7 @@ bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) { MIB.add(MO); FirstActualI->eraseFromParent(); - MF.ensureAlignment(4); + MF.ensureAlignment(Align(16)); return true; } diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index b918396aa8c..54f1d38ed10 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -418,7 +418,7 @@ namespace { const MachineRegisterInfo &MRI, const TargetInstrInfo *TII = nullptr) : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) { - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (!Register::isPhysicalRegister(Reg)) { Def = MRI.getVRegDef(Reg); DefIdx = MRI.def_begin(Reg).getOperandNo(); } @@ -460,8 +460,8 @@ optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx)) return false; - if (TargetRegisterInfo::isPhysicalRegister(DstReg) || - TargetRegisterInfo::isPhysicalRegister(SrcReg)) + if (Register::isPhysicalRegister(DstReg) || + Register::isPhysicalRegister(SrcReg)) return false; if (MRI->hasOneNonDBGUse(SrcReg)) @@ -581,7 +581,7 @@ optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, MRI->constrainRegClass(DstReg, DstRC); } - unsigned NewVR = MRI->createVirtualRegister(RC); + Register NewVR = MRI->createVirtualRegister(RC); MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(DstReg, 0, SubIdx); @@ -609,8 +609,8 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) { unsigned SrcReg, SrcReg2; int CmpMask, CmpValue; if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || - TargetRegisterInfo::isPhysicalRegister(SrcReg) || - (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2))) + Register::isPhysicalRegister(SrcReg) || + (SrcReg2 != 0 && Register::isPhysicalRegister(SrcReg2))) return false; // Attempt to optimize the comparison instruction. @@ -663,7 +663,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, // Thus, instead of maintaining untested code, we will revisit that if // that changes at some point. unsigned Reg = RegSubReg.Reg; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return false; const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); @@ -675,7 +675,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, do { CurSrcPair = SrcToLook.pop_back_val(); // As explained above, do not handle physical registers - if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg)) + if (Register::isPhysicalRegister(CurSrcPair.Reg)) return false; ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII); @@ -723,7 +723,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, // constraints to the register allocator. Moreover, if we want to extend // the live-range of a physical register, unlike SSA virtual register, // we will have to check that they aren't redefine before the related use. - if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg)) + if (Register::isPhysicalRegister(CurSrcPair.Reg)) return false; // Keep following the chain if the value isn't any better yet. @@ -761,7 +761,7 @@ insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, // NewRC is only correct if no subregisters are involved. findNextSource() // should have rejected those cases already. assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand"); - unsigned NewVR = MRI.createVirtualRegister(NewRC); + Register NewVR = MRI.createVirtualRegister(NewRC); MachineBasicBlock *MBB = OrigPHI.getParent(); MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(), TII.get(TargetOpcode::PHI), NewVR); @@ -1170,7 +1170,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) { "Coalescer can understand multiple defs?!"); const MachineOperand &MODef = MI.getOperand(0); // Do not rewrite physical definitions. - if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) + if (Register::isPhysicalRegister(MODef.getReg())) return false; bool Changed = false; @@ -1221,7 +1221,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) { MachineInstr & PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike, RegSubRegPair Def, RewriteMapTy &RewriteMap) { - assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && + assert(!Register::isPhysicalRegister(Def.Reg) && "We do not rewrite physical registers"); // Find the new source to use in the COPY rewrite. @@ -1229,7 +1229,7 @@ PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike, // Insert the COPY. const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg); - unsigned NewVReg = MRI->createVirtualRegister(DefRC); + Register NewVReg = MRI->createVirtualRegister(DefRC); MachineInstr *NewCopy = BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(), @@ -1280,7 +1280,7 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy( while (CpyRewriter.getNextRewritableSource(Src, Def)) { // If a physical register is here, this is probably for a good reason. // Do not rewrite that. - if (TargetRegisterInfo::isPhysicalRegister(Def.Reg)) + if (Register::isPhysicalRegister(Def.Reg)) return false; // If we do not know how to rewrite this definition, there is no point @@ -1315,12 +1315,11 @@ bool PeepholeOptimizer::isLoadFoldable( if (MCID.getNumDefs() != 1) return false; - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting // loads. It should be checked when processing uses of the load, since // uses can be removed during peephole. - if (!MI.getOperand(0).getSubReg() && - TargetRegisterInfo::isVirtualRegister(Reg) && + if (!MI.getOperand(0).getSubReg() && Register::isVirtualRegister(Reg) && MRI->hasOneNonDBGUser(Reg)) { FoldAsLoadDefCandidates.insert(Reg); return true; @@ -1336,8 +1335,8 @@ bool PeepholeOptimizer::isMoveImmediate( return false; if (MCID.getNumDefs() != 1) return false; - unsigned Reg = MI.getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + Register Reg = MI.getOperand(0).getReg(); + if (Register::isVirtualRegister(Reg)) { ImmDefMIs.insert(std::make_pair(Reg, &MI)); ImmDefRegs.insert(Reg); return true; @@ -1359,8 +1358,8 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr &MI, // Ignore dead implicit defs. if (MO.isImplicit() && MO.isDead()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; if (ImmDefRegs.count(Reg) == 0) continue; @@ -1393,12 +1392,12 @@ bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI, DenseMap &CopyMIs) { assert(MI.isCopy() && "expected a COPY machine instruction"); - unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) return false; - unsigned DstReg = MI.getOperand(0).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + Register DstReg = MI.getOperand(0).getReg(); + if (!Register::isVirtualRegister(DstReg)) return false; if (CopySrcRegs.insert(SrcReg).second) { @@ -1416,7 +1415,7 @@ bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI, if (SrcSubReg != PrevSrcSubReg) return false; - unsigned PrevDstReg = PrevCopy->getOperand(0).getReg(); + Register PrevDstReg = PrevCopy->getOperand(0).getReg(); // Only replace if the copy register class is the same. // @@ -1433,8 +1432,7 @@ bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI, } bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) { - return TargetRegisterInfo::isPhysicalRegister(Reg) && - !MRI->isAllocatable(Reg); + return Register::isPhysicalRegister(Reg) && !MRI->isAllocatable(Reg); } bool PeepholeOptimizer::foldRedundantNAPhysCopy( @@ -1444,9 +1442,9 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( if (DisableNAPhysCopyOpt) return false; - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); - if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + if (isNAPhysCopy(SrcReg) && Register::isVirtualRegister(DstReg)) { // %vreg = COPY %physreg // Avoid using a datastructure which can track multiple live non-allocatable // phys->virt copies since LLVM doesn't seem to do this. @@ -1454,7 +1452,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( return false; } - if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg))) + if (!(Register::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg))) return false; // %physreg = COPY %vreg @@ -1467,7 +1465,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( return false; } - unsigned PrevDstReg = PrevCopy->second->getOperand(0).getReg(); + Register PrevDstReg = PrevCopy->second->getOperand(0).getReg(); if (PrevDstReg == SrcReg) { // Remove the virt->phys copy: we saw the virtual register definition, and // the non-allocatable physical register's state hasn't changed since then. @@ -1489,7 +1487,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( static bool isVirtualRegisterOperand(MachineOperand &MO) { if (!MO.isReg()) return false; - return TargetRegisterInfo::isVirtualRegister(MO.getReg()); + return Register::isVirtualRegister(MO.getReg()); } bool PeepholeOptimizer::findTargetRecurrence( @@ -1662,7 +1660,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { for (const MachineOperand &MO : MI->operands()) { // Visit all operands: definitions can be implicit or explicit. if (MO.isReg()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (MO.isDef() && isNAPhysCopy(Reg)) { const auto &Def = NAPhysToVirtMIs.find(Reg); if (Def != NAPhysToVirtMIs.end()) { @@ -1778,7 +1776,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { LocalMIs.erase(DefMI); LocalMIs.insert(FoldMI); if (MI->isCall()) - MI->getMF()->updateCallSiteInfo(MI, FoldMI); + MI->getMF()->moveCallSiteInfo(MI, FoldMI); MI->eraseFromParent(); DefMI->eraseFromParent(); MRI->markUsesInDebugValueAsUndef(FoldedReg); @@ -1810,7 +1808,11 @@ ValueTrackerResult ValueTracker::getNextSourceFromCopy() { assert(Def->isCopy() && "Invalid definition"); // Copy instruction are supposed to be: Def = Src. // If someone breaks this assumption, bad things will happen everywhere. - assert(Def->getNumOperands() == 2 && "Invalid number of operands"); + // There may be implicit uses preventing the copy to be moved across + // some target specific register definitions + assert(Def->getNumOperands() - Def->getNumImplicitOperands() == 2 && + "Invalid number of operands"); + assert(!Def->hasImplicitDef() && "Only implicit uses are allowed"); if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) // If we look for a different subreg, it means we want a subreg of src. @@ -1855,6 +1857,11 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { SrcIdx = OpIdx; } + // In some rare case, Def has no input, SrcIdx is out of bound, + // getOperand(SrcIdx) will fail below. + if (SrcIdx >= Def->getNumOperands()) + return ValueTrackerResult(); + // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY // will break the assumed guarantees for the upper bits. for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) { @@ -2087,7 +2094,7 @@ ValueTrackerResult ValueTracker::getNextSource() { // If we can still move up in the use-def chain, move to the next // definition. - if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) { + if (!Register::isPhysicalRegister(Reg) && OneRegSrc) { MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg); if (DI != MRI.def_end()) { Def = DI->getParent(); diff --git a/lib/CodeGen/PreISelIntrinsicLowering.cpp b/lib/CodeGen/PreISelIntrinsicLowering.cpp index 2752e186875..0d2f6f99ca9 100644 --- a/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -76,7 +76,7 @@ static bool lowerObjCCall(Function &F, const char *NewFn, } for (auto I = F.use_begin(), E = F.use_end(); I != E;) { - auto *CI = dyn_cast(I->getUser()); + auto *CI = cast(I->getUser()); assert(CI->getCalledFunction() && "Cannot lower an indirect call!"); ++I; diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index b38987ad1c9..11bff45f9ad 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -73,9 +73,9 @@ bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) { void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { LLVM_DEBUG(dbgs() << "Processing " << *MI); - unsigned Reg = MI->getOperand(0).getReg(); + Register Reg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { // For virtual registers, mark all uses as , and convert users to // implicit-def when possible. for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { @@ -100,8 +100,8 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { for (MachineOperand &MO : UserMI->operands()) { if (!MO.isReg()) continue; - unsigned UserReg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(UserReg) || + Register UserReg = MO.getReg(); + if (!Register::isPhysicalRegister(UserReg) || !TRI->regsOverlap(Reg, UserReg)) continue; // UserMI uses or redefines Reg. Set flags on all uses. diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index d463bee6759..729f06dda62 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -898,7 +898,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // frame index registers. Functions which don't want/need this optimization // will continue to use the existing code path. if (MFI.getUseLocalStackAllocationBlock()) { - unsigned Align = MFI.getLocalFrameMaxAlign(); + unsigned Align = MFI.getLocalFrameMaxAlign().value(); // Adjust to alignment boundary. Offset = alignTo(Offset, Align, Skew); diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index da3ef4b771f..74e721dbd13 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -129,7 +129,7 @@ const PseudoSourceValue * PseudoSourceValueManager::getFixedStack(int FI) { std::unique_ptr &V = FSValues[FI]; if (!V) - V = llvm::make_unique(FI, TII); + V = std::make_unique(FI, TII); return V.get(); } @@ -138,7 +138,7 @@ PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) { std::unique_ptr &E = GlobalCallEntries[GV]; if (!E) - E = llvm::make_unique(GV, TII); + E = std::make_unique(GV, TII); return E.get(); } @@ -147,6 +147,6 @@ PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) { std::unique_ptr &E = ExternalCallEntries[ES]; if (!E) - E = llvm::make_unique(ES, TII); + E = std::make_unique(ES, TII); return E.get(); } diff --git a/lib/CodeGen/ReachingDefAnalysis.cpp b/lib/CodeGen/ReachingDefAnalysis.cpp index f05c97ad621..2850033e641 100644 --- a/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/lib/CodeGen/ReachingDefAnalysis.cpp @@ -9,6 +9,7 @@ #include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index 1cbe75c27d1..156daaa03bb 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -73,7 +73,7 @@ void RegAllocBase::seedLiveRegs() { NamedRegionTimer T("seed", "Seed Live Regs", TimerGroupName, TimerGroupDescription, TimePassesIsEnabled); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; enqueue(&LIS->getInterval(Reg)); @@ -154,7 +154,7 @@ void RegAllocBase::allocatePhysRegs() { continue; } LLVM_DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); - assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && + assert(Register::isVirtualRegister(SplitVirtReg->reg) && "expect split value in virtual register"); enqueue(SplitVirtReg); ++NumNewQueued; diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 2ffa5e389f8..44d0233604e 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -90,7 +90,7 @@ namespace { explicit LiveReg(unsigned VirtReg) : VirtReg(VirtReg) {} unsigned getSparseSetIndex() const { - return TargetRegisterInfo::virtReg2Index(VirtReg); + return Register::virtReg2Index(VirtReg); } }; @@ -200,11 +200,11 @@ namespace { void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg); LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) { - return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); + return LiveVirtRegs.find(Register::virtReg2Index(VirtReg)); } LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const { - return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); + return LiveVirtRegs.find(Register::virtReg2Index(VirtReg)); } void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint); @@ -264,7 +264,7 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg) { /// Returns false if \p VirtReg is known to not live out of the current block. bool RegAllocFast::mayLiveOut(unsigned VirtReg) { - if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg))) { + if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) { // Cannot be live-out if there are no successors. return !MBB->succ_empty(); } @@ -272,7 +272,7 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) { // If this block loops back to itself, it would be necessary to check whether // the use comes after the def. if (MBB->isSuccessor(MBB)) { - MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg)); + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); return true; } @@ -282,7 +282,7 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) { unsigned C = 0; for (const MachineInstr &UseInst : MRI->reg_nodbg_instructions(VirtReg)) { if (UseInst.getParent() != MBB || ++C >= Limit) { - MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg)); + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); // Cannot be live-out if there are no successors. return !MBB->succ_empty(); } @@ -293,7 +293,7 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) { /// Returns false if \p VirtReg is known to not be live into the current block. bool RegAllocFast::mayLiveIn(unsigned VirtReg) { - if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg))) + if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) return !MBB->pred_empty(); // See if the first \p Limit def of the register are all in the current block. @@ -301,7 +301,7 @@ bool RegAllocFast::mayLiveIn(unsigned VirtReg) { unsigned C = 0; for (const MachineInstr &DefInst : MRI->def_instructions(VirtReg)) { if (DefInst.getParent() != MBB || ++C >= Limit) { - MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg)); + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); return !MBB->pred_empty(); } } @@ -394,7 +394,7 @@ void RegAllocFast::killVirtReg(LiveReg &LR) { /// Mark virtreg as no longer available. void RegAllocFast::killVirtReg(unsigned VirtReg) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + assert(Register::isVirtualRegister(VirtReg) && "killVirtReg needs a virtual register"); LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); if (LRI != LiveVirtRegs.end() && LRI->PhysReg) @@ -405,7 +405,7 @@ void RegAllocFast::killVirtReg(unsigned VirtReg) { /// stack slot if needed. void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + assert(Register::isVirtualRegister(VirtReg) && "Spilling a physical register is illegal!"); LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && @@ -455,9 +455,8 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) { if (MO.isUndef()) return; - unsigned PhysReg = MO.getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && - "Bad usePhysReg operand"); + Register PhysReg = MO.getReg(); + assert(Register::isPhysicalRegister(PhysReg) && "Bad usePhysReg operand"); markRegUsedInInstr(PhysReg); switch (PhysRegState[PhysReg]) { @@ -626,9 +625,9 @@ unsigned RegAllocFast::traceCopyChain(unsigned Reg) const { static const unsigned ChainLengthLimit = 3; unsigned C = 0; do { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return Reg; - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); MachineInstr *VRegDef = MRI->getUniqueVRegDef(Reg); if (!VRegDef || !isCoalescable(*VRegDef)) @@ -646,7 +645,7 @@ unsigned RegAllocFast::traceCopies(unsigned VirtReg) const { unsigned C = 0; for (const MachineInstr &MI : MRI->def_instructions(VirtReg)) { if (isCoalescable(MI)) { - unsigned Reg = MI.getOperand(1).getReg(); + Register Reg = MI.getOperand(1).getReg(); Reg = traceCopyChain(Reg); if (Reg != 0) return Reg; @@ -662,7 +661,7 @@ unsigned RegAllocFast::traceCopies(unsigned VirtReg) const { void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) { const unsigned VirtReg = LR.VirtReg; - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + assert(Register::isVirtualRegister(VirtReg) && "Can only allocate virtual registers"); const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); @@ -671,8 +670,8 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) { << " with hint " << printReg(Hint0, TRI) << '\n'); // Take hint when possible. - if (TargetRegisterInfo::isPhysicalRegister(Hint0) && - MRI->isAllocatable(Hint0) && RC.contains(Hint0)) { + if (Register::isPhysicalRegister(Hint0) && MRI->isAllocatable(Hint0) && + RC.contains(Hint0)) { // Ignore the hint if we would have to spill a dirty register. unsigned Cost = calcSpillCost(Hint0); if (Cost < spillDirty) { @@ -692,9 +691,8 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) { // Try other hint. unsigned Hint1 = traceCopies(VirtReg); - if (TargetRegisterInfo::isPhysicalRegister(Hint1) && - MRI->isAllocatable(Hint1) && RC.contains(Hint1) && - !isRegUsedInInstr(Hint1)) { + if (Register::isPhysicalRegister(Hint1) && MRI->isAllocatable(Hint1) && + RC.contains(Hint1) && !isRegUsedInInstr(Hint1)) { // Ignore the hint if we would have to spill a dirty register. unsigned Cost = calcSpillCost(Hint1); if (Cost < spillDirty) { @@ -752,8 +750,8 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) { void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) { assert(MO.isUndef() && "expected undef use"); - unsigned VirtReg = MO.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Expected virtreg"); + Register VirtReg = MO.getReg(); + assert(Register::isVirtualRegister(VirtReg) && "Expected virtreg"); LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); MCPhysReg PhysReg; @@ -778,14 +776,13 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) { /// Allocates a register for VirtReg and mark it as dirty. MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, unsigned Hint) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "Not a virtual register"); + assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); if (!LRI->PhysReg) { // If there is no hint, peek at the only use of this register. - if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && + if ((!Hint || !Register::isPhysicalRegister(Hint)) && MRI->hasOneNonDBGUse(VirtReg)) { const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg); // It's a copy, use the destination register as a hint. @@ -812,8 +809,7 @@ RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, unsigned Hint) { - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "Not a virtual register"); + assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); @@ -866,7 +862,7 @@ bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO, } // Handle subregister index. - MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0); + MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : Register()); MO.setIsRenamable(true); MO.setSubReg(0); @@ -893,8 +889,8 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, SmallSet ThroughRegs; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) || (MO.getSubReg() && MI.readsVirtualRegister(Reg))) { @@ -908,8 +904,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, LLVM_DEBUG(dbgs() << "\nChecking for physdef collisions.\n"); for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Reg || !Register::isPhysicalRegister(Reg)) + continue; markRegUsedInInstr(Reg); for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { if (ThroughRegs.count(PhysRegState[*AI])) @@ -922,8 +919,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) + continue; if (MO.isUse()) { if (!MO.isTied()) continue; LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO @@ -947,8 +945,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) + continue; if (!MO.isEarlyClobber()) continue; // Note: defineVirtReg may invalidate MO. @@ -961,8 +960,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, UsedInInstr.clear(); for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; - unsigned Reg = MO.getReg(); - if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Reg || !Register::isPhysicalRegister(Reg)) + continue; LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI) << " as used in instr\n"); markRegUsedInInstr(Reg); @@ -1002,10 +1002,8 @@ void RegAllocFast::dumpState() { e = LiveVirtRegs.end(); i != e; ++i) { if (!i->PhysReg) continue; - assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) && - "Bad map key"); - assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) && - "Bad map value"); + assert(Register::isVirtualRegister(i->VirtReg) && "Bad map key"); + assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value"); assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map"); } } @@ -1045,9 +1043,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { continue; } if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { VirtOpEnd = i+1; if (MO.isUse()) { hasTiedOps = hasTiedOps || @@ -1096,8 +1094,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { for (unsigned I = 0; I != VirtOpEnd; ++I) { MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) + continue; if (MO.isUse()) { if (MO.isUndef()) { HasUndefUse = true; @@ -1124,8 +1123,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { for (MachineOperand &MO : MI.uses()) { if (!MO.isReg() || !MO.isUse()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; assert(MO.isUndef() && "Should only have undef virtreg uses left"); @@ -1139,8 +1138,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { if (hasEarlyClobbers) { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + Register Reg = MO.getReg(); + if (!Reg || !Register::isPhysicalRegister(Reg)) + continue; // Look for physreg defs and tied uses. if (!MO.isDef() && !MO.isTied()) continue; markRegUsedInInstr(Reg); @@ -1166,10 +1166,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { const MachineOperand &MO = MI.getOperand(I); if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); - if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg) || - !MRI->isAllocatable(Reg)) + if (!Reg || !Register::isPhysicalRegister(Reg) || !MRI->isAllocatable(Reg)) continue; definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); } @@ -1180,10 +1179,10 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { const MachineOperand &MO = MI.getOperand(I); if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // We have already dealt with phys regs in the previous scan. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) continue; MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg); if (setPhysReg(MI, MI.getOperand(I), PhysReg)) { @@ -1215,8 +1214,8 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) { // mostly constants and frame indices. if (!MO.isReg()) return; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) return; // See if this virtual register has already been allocated to a physical diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 771fc46415d..d27db678f02 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -685,7 +685,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // The queue holds (size, reg) pairs. const unsigned Size = LI->getSize(); const unsigned Reg = LI->reg; - assert(TargetRegisterInfo::isVirtualRegister(Reg) && + assert(Register::isVirtualRegister(Reg) && "Can only enqueue virtual registers"); unsigned Prio; @@ -899,7 +899,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, // Check if any interfering live range is heavier than MaxWeight. for (unsigned i = Q.interferingVRegs().size(); i; --i) { LiveInterval *Intf = Q.interferingVRegs()[i - 1]; - assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) && + assert(Register::isVirtualRegister(Intf->reg) && "Only expecting virtual register interference from query"); // Do not allow eviction of a virtual register if we are in the middle @@ -984,7 +984,7 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg, continue; // Cannot evict non virtual reg interference. - if (!TargetRegisterInfo::isVirtualRegister(Intf->reg)) + if (!Register::isVirtualRegister(Intf->reg)) return false; // Never evict spill products. They cannot split or spill. if (getStage(*Intf) == RS_Done) @@ -2881,7 +2881,7 @@ void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { continue; } // Get the current assignment. - Register OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg) + Register OtherPhysReg = Register::isPhysicalRegister(OtherReg) ? OtherReg : VRM->getPhys(OtherReg); // Push the collected information. @@ -2919,7 +2919,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { SmallVector RecoloringCandidates; HintsInfo Info; unsigned Reg = VirtReg.reg; - unsigned PhysReg = VRM->getPhys(Reg); + Register PhysReg = VRM->getPhys(Reg); // Start the recoloring algorithm from the input live-interval, then // it will propagate to the ones that are copy-related with it. Visited.insert(Reg); @@ -2932,7 +2932,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { Reg = RecoloringCandidates.pop_back_val(); // We cannot recolor physical register. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) continue; assert(VRM->hasPhys(Reg) && "We have unallocated variable!!"); @@ -2940,7 +2940,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { // Get the live interval mapped with this virtual register to be able // to check for the interference with the new color. LiveInterval &LI = LIS->getInterval(Reg); - unsigned CurrPhys = VRM->getPhys(Reg); + Register CurrPhys = VRM->getPhys(Reg); // Check that the new color matches the register class constraints and // that it is free for this live range. if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) || @@ -3021,7 +3021,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { /// getting rid of 2 copies. void RAGreedy::tryHintsRecoloring() { for (LiveInterval *LI : SetOfBrokenHints) { - assert(TargetRegisterInfo::isVirtualRegister(LI->reg) && + assert(Register::isVirtualRegister(LI->reg) && "Recoloring is possible only for virtual registers"); // Some dead defs may be around (e.g., because of debug uses). // Ignore those. diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 7a5a6c148ed..3c4a46b12f9 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -558,7 +558,7 @@ void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF, // Iterate over all live ranges. for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); if (MRI.reg_nodbg_empty(Reg)) continue; VRegsToAlloc.insert(Reg); @@ -824,11 +824,11 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { if (!VRegsToAlloc.empty()) { const TargetSubtargetInfo &Subtarget = MF.getSubtarget(); std::unique_ptr ConstraintsRoot = - llvm::make_unique(); - ConstraintsRoot->addConstraint(llvm::make_unique()); - ConstraintsRoot->addConstraint(llvm::make_unique()); + std::make_unique(); + ConstraintsRoot->addConstraint(std::make_unique()); + ConstraintsRoot->addConstraint(std::make_unique()); if (PBQPCoalescing) - ConstraintsRoot->addConstraint(llvm::make_unique()); + ConstraintsRoot->addConstraint(std::make_unique()); ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints()); bool PBQPAllocComplete = false; @@ -848,7 +848,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { std::string GraphFileName = FullyQualifiedName + "." + RS.str() + ".pbqpgraph"; std::error_code EC; - raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text); + raw_fd_ostream OS(GraphFileName, EC, sys::fs::OF_Text); LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round << " to \"" << GraphFileName << "\"\n"); G.dump(OS); diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp index b37dfada710..757ff0e4495 100644 --- a/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/lib/CodeGen/RegUsageInfoCollector.cpp @@ -142,6 +142,13 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { auto SetRegAsDefined = [&RegMask] (unsigned Reg) { RegMask[Reg / 32] &= ~(1u << Reg % 32); }; + + // Some targets can clobber registers "inside" a call, typically in + // linker-generated code. + for (const MCPhysReg Reg : TRI->getIntraCallClobberedRegs(&MF)) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + SetRegAsDefined(*AI); + // Scan all the physical registers. When a register is defined in the current // function set it and all the aliasing registers as defined in the regmask. // FIXME: Rewrite to use regunits. @@ -164,7 +171,8 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { SetRegAsDefined(PReg); } - if (TargetFrameLowering::isSafeForNoCSROpt(F)) { + if (TargetFrameLowering::isSafeForNoCSROpt(F) && + MF.getSubtarget().getFrameLowering()->isProfitableForNoCSROpt(F)) { ++NumCSROpt; LLVM_DEBUG(dbgs() << MF.getName() << " function optimized for not having CSR.\n"); diff --git a/lib/CodeGen/RegUsageInfoPropagate.cpp b/lib/CodeGen/RegUsageInfoPropagate.cpp index fc4be82d215..0205e619374 100644 --- a/lib/CodeGen/RegUsageInfoPropagate.cpp +++ b/lib/CodeGen/RegUsageInfoPropagate.cpp @@ -130,7 +130,11 @@ bool RegUsageInfoPropagation::runOnMachineFunction(MachineFunction &MF) { }; if (const Function *F = findCalledFunction(M, MI)) { - UpdateRegMask(*F); + if (F->isDefinitionExact()) { + UpdateRegMask(*F); + } else { + LLVM_DEBUG(dbgs() << "Function definition is not exact\n"); + } } else { LLVM_DEBUG(dbgs() << "Failed to find call target function\n"); } diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 2db6ab454ce..6ff5ddbc023 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -406,8 +406,8 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { Partial = SrcSub || DstSub; // If one register is a physreg, it must be Dst. - if (TargetRegisterInfo::isPhysicalRegister(Src)) { - if (TargetRegisterInfo::isPhysicalRegister(Dst)) + if (Register::isPhysicalRegister(Src)) { + if (Register::isPhysicalRegister(Dst)) return false; std::swap(Src, Dst); std::swap(SrcSub, DstSub); @@ -416,7 +416,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); - if (TargetRegisterInfo::isPhysicalRegister(Dst)) { + if (Register::isPhysicalRegister(Dst)) { // Eliminate DstSub on a physreg. if (DstSub) { Dst = TRI.getSubReg(Dst, DstSub); @@ -474,8 +474,8 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { CrossClass = NewRC != DstRC || NewRC != SrcRC; } // Check our invariants - assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual"); - assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) && + assert(Register::isVirtualRegister(Src) && "Src must be virtual"); + assert(!(Register::isPhysicalRegister(Dst) && DstSub) && "Cannot have a physical SubIdx"); SrcReg = Src; DstReg = Dst; @@ -483,7 +483,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { } bool CoalescerPair::flip() { - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + if (Register::isPhysicalRegister(DstReg)) return false; std::swap(SrcReg, DstReg); std::swap(SrcIdx, DstIdx); @@ -507,8 +507,8 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { } // Now check that Dst matches DstReg. - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { - if (!TargetRegisterInfo::isPhysicalRegister(Dst)) + if (Register::isPhysicalRegister(DstReg)) { + if (!Register::isPhysicalRegister(Dst)) return false; assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state."); // DstSub could be set for a physreg from INSERT_SUBREG. @@ -802,7 +802,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, return { false, false }; MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); - unsigned NewReg = NewDstMO.getReg(); + Register NewReg = NewDstMO.getReg(); if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill()) return { false, false }; @@ -835,8 +835,8 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx); if (!NewMI) return { false, false }; - if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && - TargetRegisterInfo::isVirtualRegister(IntB.reg) && + if (Register::isVirtualRegister(IntA.reg) && + Register::isVirtualRegister(IntB.reg) && !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg))) return { false, false }; if (NewMI != DefMI) { @@ -877,7 +877,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, continue; // Kill flags are no longer accurate. They are recomputed after RA. UseMO.setIsKill(false); - if (TargetRegisterInfo::isPhysicalRegister(NewReg)) + if (Register::isPhysicalRegister(NewReg)) UseMO.substPhysReg(NewReg, *TRI); else UseMO.setReg(NewReg); @@ -1188,7 +1188,7 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, /// Returns true if @p MI defines the full vreg @p Reg, as opposed to just /// defining a subregister. static bool definesFullReg(const MachineInstr &MI, unsigned Reg) { - assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && + assert(!Register::isPhysicalRegister(Reg) && "This code cannot handle physreg aliasing"); for (const MachineOperand &Op : MI.operands()) { if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) @@ -1209,7 +1209,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx(); unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx(); - if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) + if (Register::isPhysicalRegister(SrcReg)) return false; LiveInterval &SrcInt = LIS->getInterval(SrcReg); @@ -1240,7 +1240,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, return false; // Only support subregister destinations when the def is read-undef. MachineOperand &DstOperand = CopyMI->getOperand(0); - unsigned CopyDstReg = DstOperand.getReg(); + Register CopyDstReg = DstOperand.getReg(); if (DstOperand.getSubReg() && !DstOperand.isUndef()) return false; @@ -1254,7 +1254,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF); if (!DefMI->isImplicitDef()) { - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { + if (Register::isPhysicalRegister(DstReg)) { unsigned NewDstReg = DstReg; unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), @@ -1269,7 +1269,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } else { // Theoretically, some stack frame reference could exist. Just make sure // it hasn't actually happened. - assert(TargetRegisterInfo::isVirtualRegister(DstReg) && + assert(Register::isVirtualRegister(DstReg) && "Only expect to deal with virtual or physical registers"); } } @@ -1317,7 +1317,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, if (MO.isReg()) { assert(MO.isImplicit() && "No explicit operands after implicit operands."); // Discard VReg implicit defs. - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + if (Register::isPhysicalRegister(MO.getReg())) ImplicitOps.push_back(MO); } } @@ -1336,12 +1336,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, MachineOperand &MO = NewMI.getOperand(i); if (MO.isReg() && MO.isDef()) { assert(MO.isImplicit() && MO.isDead() && - TargetRegisterInfo::isPhysicalRegister(MO.getReg())); + Register::isPhysicalRegister(MO.getReg())); NewMIImplDefs.push_back(MO.getReg()); } } - if (TargetRegisterInfo::isVirtualRegister(DstReg)) { + if (Register::isVirtualRegister(DstReg)) { unsigned NewIdx = NewMI.getOperand(0).getSubReg(); if (DefRC != nullptr) { @@ -1428,7 +1428,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } else if (NewMI.getOperand(0).getReg() != CopyDstReg) { // The New instruction may be defining a sub-register of what's actually // been asked for. If so it must implicitly define the whole thing. - assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && + assert(Register::isPhysicalRegister(DstReg) && "Only expect virtual or physical registers in remat"); NewMI.getOperand(0).setIsDead(true); NewMI.addOperand(MachineOperand::CreateReg( @@ -1480,7 +1480,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) { MachineInstr *UseMI = UseMO.getParent(); if (UseMI->isDebugValue()) { - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + if (Register::isPhysicalRegister(DstReg)) UseMO.substPhysReg(DstReg, *TRI); else UseMO.setReg(DstReg); @@ -1651,7 +1651,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx) { - bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + bool DstIsPhys = Register::isPhysicalRegister(DstReg); LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) { @@ -2411,8 +2411,8 @@ std::pair JoinVals::followCopyChain( assert(MI && "No defining instruction"); if (!MI->isFullCopy()) return std::make_pair(VNI, TrackReg); - unsigned SrcReg = MI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI->getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) return std::make_pair(VNI, TrackReg); const LiveInterval &LI = LIS->getInterval(SrcReg); @@ -3189,9 +3189,9 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl &ErasedInstrs, MachineInstr *MI = Indexes->getInstructionFromIndex(Def); assert(MI && "No instruction to erase"); if (MI->isCopy()) { - unsigned Reg = MI->getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg) && - Reg != CP.getSrcReg() && Reg != CP.getDstReg()) + Register Reg = MI->getOperand(1).getReg(); + if (Register::isVirtualRegister(Reg) && Reg != CP.getSrcReg() && + Reg != CP.getDstReg()) ShrinkRegs.push_back(Reg); } ErasedInstrs.insert(MI); @@ -3463,10 +3463,10 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) { if (Copy->getOperand(1).isUndef()) return false; - unsigned SrcReg = Copy->getOperand(1).getReg(); - unsigned DstReg = Copy->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(SrcReg) - || TargetRegisterInfo::isPhysicalRegister(DstReg)) + Register SrcReg = Copy->getOperand(1).getReg(); + Register DstReg = Copy->getOperand(0).getReg(); + if (Register::isPhysicalRegister(SrcReg) || + Register::isPhysicalRegister(DstReg)) return false; return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg)) @@ -3526,12 +3526,11 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg)) return false; // Check if the destination of this copy has any other affinity. - if (TargetRegisterInfo::isPhysicalRegister(DstReg) || + if (Register::isPhysicalRegister(DstReg) || // If SrcReg is a physical register, the copy won't be coalesced. // Ignoring it may have other side effect (like missing // rematerialization). So keep it. - TargetRegisterInfo::isPhysicalRegister(SrcReg) || - !isTerminalReg(DstReg, Copy, MRI)) + Register::isPhysicalRegister(SrcReg) || !isTerminalReg(DstReg, Copy, MRI)) return false; // DstReg is a terminal node. Check if it interferes with any other @@ -3554,7 +3553,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { if (OtherReg == SrcReg) OtherReg = OtherSrcReg; // Check if OtherReg is a non-terminal. - if (TargetRegisterInfo::isPhysicalRegister(OtherReg) || + if (Register::isPhysicalRegister(OtherReg) || isTerminalReg(OtherReg, MI, MRI)) continue; // Check that OtherReg interfere with DstReg. diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 7d9b3aa9b2d..bf192d1c530 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -134,6 +134,22 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const { } dbgs() << '\n'; } + +LLVM_DUMP_METHOD +void PressureChange::dump() const { + dbgs() << "[" << getPSetOrMax() << ", " << getUnitInc() << "]\n"; +} + +void RegPressureDelta::dump() const { + dbgs() << "[Excess="; + Excess.dump(); + dbgs() << ", CriticalMax="; + CriticalMax.dump(); + dbgs() << ", CurrentMax="; + CurrentMax.dump(); + dbgs() << "]\n"; +} + #endif void RegPressureTracker::increaseRegPressure(unsigned RegUnit, @@ -219,7 +235,7 @@ void LiveRegSet::clear() { } static const LiveRange *getLiveRange(const LiveIntervals &LIS, unsigned Reg) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return &LIS.getInterval(Reg); return LIS.getCachedRegUnit(Reg); } @@ -345,7 +361,7 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) { assert(isBottomClosed() && "need bottom-up tracking to intialize."); for (const RegisterMaskPair &Pair : P.LiveOutRegs) { unsigned RegUnit = Pair.RegUnit; - if (TargetRegisterInfo::isVirtualRegister(RegUnit) + if (Register::isVirtualRegister(RegUnit) && !RPTracker.hasUntiedDef(RegUnit)) increaseSetPressure(LiveThruPressure, *MRI, RegUnit, LaneBitmask::getNone(), Pair.LaneMask); @@ -406,7 +422,7 @@ static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, bool TrackLaneMasks, unsigned RegUnit, SlotIndex Pos, LaneBitmask SafeDefault, bool(*Property)(const LiveRange &LR, SlotIndex Pos)) { - if (TargetRegisterInfo::isVirtualRegister(RegUnit)) { + if (Register::isVirtualRegister(RegUnit)) { const LiveInterval &LI = LIS.getInterval(RegUnit); LaneBitmask Result; if (TrackLaneMasks && LI.hasSubRanges()) { @@ -483,7 +499,7 @@ class RegisterOperandsCollector { void collectOperand(const MachineOperand &MO) const { if (!MO.isReg() || !MO.getReg()) return; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (MO.isUse()) { if (!MO.isUndef() && !MO.isInternalRead()) pushReg(Reg, RegOpers.Uses); @@ -503,7 +519,7 @@ class RegisterOperandsCollector { void pushReg(unsigned Reg, SmallVectorImpl &RegUnits) const { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneBitmask::getAll())); } else if (MRI.isAllocatable(Reg)) { for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) @@ -514,7 +530,7 @@ class RegisterOperandsCollector { void collectOperandLanes(const MachineOperand &MO) const { if (!MO.isReg() || !MO.getReg()) return; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); unsigned SubRegIdx = MO.getSubReg(); if (MO.isUse()) { if (!MO.isUndef() && !MO.isInternalRead()) @@ -535,7 +551,7 @@ class RegisterOperandsCollector { void pushRegLanes(unsigned Reg, unsigned SubRegIdx, SmallVectorImpl &RegUnits) const { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { LaneBitmask LaneMask = SubRegIdx != 0 ? TRI.getSubRegIndexLaneMask(SubRegIdx) : MRI.getMaxLaneMaskForVReg(Reg); @@ -590,7 +606,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, // If the def is all that is live after the instruction, then in case // of a subregister def we need a read-undef flag. unsigned RegUnit = I->RegUnit; - if (TargetRegisterInfo::isVirtualRegister(RegUnit) && + if (Register::isVirtualRegister(RegUnit) && AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask).none()) AddFlagsMI->setRegisterDefReadUndef(RegUnit); @@ -616,7 +632,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, if (AddFlagsMI != nullptr) { for (const RegisterMaskPair &P : DeadDefs) { unsigned RegUnit = P.RegUnit; - if (!TargetRegisterInfo::isVirtualRegister(RegUnit)) + if (!Register::isVirtualRegister(RegUnit)) continue; LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, RegUnit, Pos.getDeadSlot()); @@ -825,7 +841,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers, if (TrackUntiedDefs) { for (const RegisterMaskPair &Def : RegOpers.Defs) { unsigned RegUnit = Def.RegUnit; - if (TargetRegisterInfo::isVirtualRegister(RegUnit) && + if (Register::isVirtualRegister(RegUnit) && (LiveRegs.contains(RegUnit) & Def.LaneMask).none()) UntiedDefs.insert(RegUnit); } diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index bb19110e6d7..ec0868acab3 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -49,7 +49,7 @@ using namespace llvm; STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); -void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) { +void RegScavenger::setRegUsed(Register Reg, LaneBitmask LaneMask) { LiveUnits.addRegMasked(Reg, LaneMask); } @@ -96,12 +96,12 @@ void RegScavenger::enterBasicBlockEnd(MachineBasicBlock &MBB) { } } -void RegScavenger::addRegUnits(BitVector &BV, unsigned Reg) { +void RegScavenger::addRegUnits(BitVector &BV, Register Reg) { for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) BV.set(*RUI); } -void RegScavenger::removeRegUnits(BitVector &BV, unsigned Reg) { +void RegScavenger::removeRegUnits(BitVector &BV, Register Reg) { for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) BV.reset(*RUI); } @@ -133,8 +133,8 @@ void RegScavenger::determineKillsAndDefs() { } if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg) || isReserved(Reg)) + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg) || isReserved(Reg)) continue; if (MO.isUse()) { @@ -204,8 +204,8 @@ void RegScavenger::forward() { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg) || isReserved(Reg)) + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg) || isReserved(Reg)) continue; if (MO.isUse()) { if (MO.isUndef()) @@ -278,14 +278,14 @@ void RegScavenger::backward() { --MBBI; } -bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const { +bool RegScavenger::isRegUsed(Register Reg, bool includeReserved) const { if (isReserved(Reg)) return includeReserved; return !LiveUnits.available(Reg); } -unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { - for (unsigned Reg : *RC) { +Register RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { + for (Register Reg : *RC) { if (!isRegUsed(Reg)) { LLVM_DEBUG(dbgs() << "Scavenger found unused reg: " << printReg(Reg, TRI) << "\n"); @@ -297,13 +297,13 @@ unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) { BitVector Mask(TRI->getNumRegs()); - for (unsigned Reg : *RC) + for (Register Reg : *RC) if (!isRegUsed(Reg)) Mask.set(Reg); return Mask; } -unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, +Register RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, BitVector &Candidates, unsigned InstrLimit, MachineBasicBlock::iterator &UseMI) { @@ -329,7 +329,7 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, Candidates.clearBitsNotInMask(MO.getRegMask()); if (!MO.isReg() || MO.isUndef() || !MO.getReg()) continue; - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (Register::isVirtualRegister(MO.getReg())) { if (MO.isDef()) isVirtDefInsn = true; else if (MO.isKill()) @@ -430,7 +430,7 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI, // be usefull for this other vreg as well later. bool FoundVReg = false; for (const MachineOperand &MO : MI.operands()) { - if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) { FoundVReg = true; break; } @@ -457,7 +457,7 @@ static unsigned getFrameIndexOperandNum(MachineInstr &MI) { } RegScavenger::ScavengedInfo & -RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj, +RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj, MachineBasicBlock::iterator Before, MachineBasicBlock::iterator &UseMI) { // Find an available scavenging slot with size and alignment matching @@ -531,7 +531,7 @@ RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj, return Scavenged[SI]; } -unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, +Register RegScavenger::scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj, bool AllowSpill) { MachineInstr &MI = *I; @@ -542,7 +542,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // Exclude all the registers being used by the instruction. for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) && - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + !Register::isVirtualRegister(MO.getReg())) for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) Candidates.reset(*AI); } @@ -556,7 +556,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // Find the register whose use is furthest away. MachineBasicBlock::iterator UseMI; - unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); + Register SReg = findSurvivorReg(I, Candidates, 25, UseMI); // If we found an unused register there is no reason to spill it. if (!isRegUsed(SReg)) { @@ -576,7 +576,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, return SReg; } -unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, +Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill) { @@ -620,8 +620,8 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, /// \p ReserveAfter controls whether the scavenged register needs to be reserved /// after the current instruction, otherwise it will only be reserved before the /// current instruction. -static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS, - unsigned VReg, bool ReserveAfter) { +static Register scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS, + Register VReg, bool ReserveAfter) { const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); #ifndef NDEBUG // Verify that all definitions and uses are in the same basic block. @@ -664,7 +664,7 @@ static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS, // spill/reload if necessary. int SPAdj = 0; const TargetRegisterClass &RC = *MRI.getRegClass(VReg); - unsigned SReg = RS.scavengeRegisterBackwards(RC, DefMI.getIterator(), + Register SReg = RS.scavengeRegisterBackwards(RC, DefMI.getIterator(), ReserveAfter, SPAdj); MRI.replaceRegWith(VReg, SReg); ++NumScavengedRegs; @@ -694,17 +694,17 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI, for (const MachineOperand &MO : NMI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // We only care about virtual registers and ignore virtual registers // created by the target callbacks in the process (those will be handled // in a scavenging round). - if (!TargetRegisterInfo::isVirtualRegister(Reg) || - TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs) + if (!Register::isVirtualRegister(Reg) || + Register::virtReg2Index(Reg) >= InitialNumVirtRegs) continue; if (!MO.readsReg()) continue; - unsigned SReg = scavengeVReg(MRI, RS, Reg, true); + Register SReg = scavengeVReg(MRI, RS, Reg, true); N->addRegisterKilled(SReg, &TRI, false); RS.setRegUsed(SReg); } @@ -716,10 +716,10 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI, for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // Only vregs, no newly created vregs (see above). - if (!TargetRegisterInfo::isVirtualRegister(Reg) || - TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs) + if (!Register::isVirtualRegister(Reg) || + Register::virtReg2Index(Reg) >= InitialNumVirtRegs) continue; // We have to look at all operands anyway so we can precalculate here // whether there is a reading operand. This allows use to skip the use @@ -730,14 +730,14 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI, NextInstructionReadsVReg = true; } if (MO.isDef()) { - unsigned SReg = scavengeVReg(MRI, RS, Reg, false); + Register SReg = scavengeVReg(MRI, RS, Reg, false); I->addRegisterDead(SReg, &TRI, false); } } } #ifndef NDEBUG for (const MachineOperand &MO : MBB.front().operands()) { - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) continue; assert(!MO.isInternalRead() && "Cannot assign inside bundles"); assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses"); diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp index 22cff48c305..e3f5abb6301 100644 --- a/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/lib/CodeGen/RenameIndependentSubregs.cpp @@ -138,7 +138,7 @@ bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const { LLVM_DEBUG(dbgs() << printReg(Reg) << ": Splitting into newly created:"); for (unsigned I = 1, NumClasses = Classes.getNumClasses(); I < NumClasses; ++I) { - unsigned NewVReg = MRI->createVirtualRegister(RegClass); + Register NewVReg = MRI->createVirtualRegister(RegClass); LiveInterval &NewLI = LIS->createEmptyInterval(NewVReg); Intervals.push_back(&NewLI); LLVM_DEBUG(dbgs() << ' ' << printReg(NewVReg)); @@ -390,7 +390,7 @@ bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) { // there can't be any further splitting. bool Changed = false; for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); if (!LIS->hasInterval(Reg)) continue; LiveInterval &LI = LIS->getInterval(Reg); diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp index a6bc7330e2c..ddbbd0f8d6e 100644 --- a/lib/CodeGen/SafeStack.cpp +++ b/lib/CodeGen/SafeStack.cpp @@ -871,7 +871,7 @@ public: report_fatal_error("TargetLowering instance is required"); auto *DL = &F.getParent()->getDataLayout(); - auto &TLI = getAnalysis().getTLI(); + auto &TLI = getAnalysis().getTLI(F); auto &ACT = getAnalysis().getAssumptionCache(F); // Compute DT and LI only for functions that have the attribute. diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp index 7776dffb4e9..b4037499d7d 100644 --- a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -173,15 +173,30 @@ static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) { return; } + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] - // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // %mask_1 = and i16 %scalar_mask, i32 1 << Idx + // %cond = icmp ne i16 %mask_1, 0 // br i1 %mask_1, label %cond.load, label %else // - - Value *Predicate = Builder.CreateExtractElement(Mask, Idx); + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx); + } // Create "cond" block // @@ -290,13 +305,29 @@ static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) { return; } + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // - // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // %mask_1 = and i16 %scalar_mask, i32 1 << Idx + // %cond = icmp ne i16 %mask_1, 0 // br i1 %mask_1, label %cond.store, label %else // - Value *Predicate = Builder.CreateExtractElement(Mask, Idx); + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx); + } // Create "cond" block // @@ -392,15 +423,30 @@ static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) { return; } + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // - // %Mask1 = extractelement <16 x i1> %Mask, i32 1 + // %Mask1 = and i16 %scalar_mask, i32 1 << Idx + // %cond = icmp ne i16 %mask_1, 0 // br i1 %Mask1, label %cond.load, label %else // - Value *Predicate = - Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + } // Create "cond" block // @@ -499,14 +545,29 @@ static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) { return; } + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // - // %Mask1 = extractelement <16 x i1> %Mask, i32 Idx + // %Mask1 = and i16 %scalar_mask, i32 1 << Idx + // %cond = icmp ne i16 %mask_1, 0 // br i1 %Mask1, label %cond.store, label %else // - Value *Predicate = - Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + } // Create "cond" block // @@ -555,6 +616,32 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) { // The result vector Value *VResult = PassThru; + // Shorten the way if the mask is a vector of constants. + if (isConstantIntVector(Mask)) { + unsigned MemIndex = 0; + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast(Mask)->getAggregateElement(Idx)->isNullValue()) + continue; + Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); + LoadInst *Load = + Builder.CreateAlignedLoad(EltTy, NewPtr, 1, "Load" + Twine(Idx)); + VResult = + Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); + ++MemIndex; + } + CI->replaceAllUsesWith(VResult); + CI->eraseFromParent(); + return; + } + + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // @@ -563,8 +650,14 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) { // br i1 %mask_1, label %cond.load, label %else // - Value *Predicate = - Builder.CreateExtractElement(Mask, Idx); + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + } // Create "cond" block // @@ -633,13 +726,44 @@ static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) { unsigned VectorWidth = VecType->getNumElements(); + // Shorten the way if the mask is a vector of constants. + if (isConstantIntVector(Mask)) { + unsigned MemIndex = 0; + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast(Mask)->getAggregateElement(Idx)->isNullValue()) + continue; + Value *OneElt = + Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); + Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); + Builder.CreateAlignedStore(OneElt, NewPtr, 1); + ++MemIndex; + } + CI->eraseFromParent(); + return; + } + + // If the mask is not v1i1, use scalar bit test operations. This generates + // better results on X86 at least. + Value *SclrMask; + if (VectorWidth != 1) { + Type *SclrMaskTy = Builder.getIntNTy(VectorWidth); + SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask"); + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // // %mask_1 = extractelement <16 x i1> %mask, i32 Idx // br i1 %mask_1, label %cond.store, label %else // - Value *Predicate = Builder.CreateExtractElement(Mask, Idx); + Value *Predicate; + if (VectorWidth != 1) { + Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), + Builder.getIntN(VectorWidth, 0)); + } else { + Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx)); + } // Create "cond" block // @@ -727,17 +851,24 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI, switch (II->getIntrinsicID()) { default: break; - case Intrinsic::masked_load: + case Intrinsic::masked_load: { // Scalarize unsupported vector masked load - if (TTI->isLegalMaskedLoad(CI->getType())) + unsigned Alignment = + cast(CI->getArgOperand(1))->getZExtValue(); + if (TTI->isLegalMaskedLoad(CI->getType(), MaybeAlign(Alignment))) return false; scalarizeMaskedLoad(CI, ModifiedDT); return true; - case Intrinsic::masked_store: - if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) + } + case Intrinsic::masked_store: { + unsigned Alignment = + cast(CI->getArgOperand(2))->getZExtValue(); + if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(), + MaybeAlign(Alignment))) return false; scalarizeMaskedStore(CI, ModifiedDT); return true; + } case Intrinsic::masked_gather: if (TTI->isLegalMaskedGather(CI->getType())) return false; diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index d5ad7e92299..96a1f86c3e0 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -18,7 +18,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseSet.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LivePhysRegs.h" @@ -205,10 +204,10 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { if (ExitMI) { for (const MachineOperand &MO : ExitMI->operands()) { if (!MO.isReg() || MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); - } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) { + } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) { addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO)); } } @@ -285,7 +284,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { MachineInstr *MI = SU->getInstr(); MachineOperand &MO = MI->getOperand(OperIdx); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // We do not need to track any dependencies for constant registers. if (MRI.isConstantPhysReg(Reg)) return; @@ -361,7 +360,7 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // No point in tracking lanemasks if we don't have interesting subregisters. const TargetRegisterClass &RC = *MRI.getRegClass(Reg); if (!RC.HasDisjunctSubRegs) @@ -373,6 +372,13 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const return TRI->getSubRegIndexLaneMask(SubReg); } +bool ScheduleDAGInstrs::deadDefHasNoUse(const MachineOperand &MO) { + auto RegUse = CurrentVRegUses.find(MO.getReg()); + if (RegUse == CurrentVRegUses.end()) + return true; + return (RegUse->LaneMask & getLaneMaskForMO(MO)).none(); +} + /// Adds register output and data dependencies from this SUnit to instructions /// that occur later in the same scheduling region if they read from or write to /// the virtual register defined at OperIdx. @@ -382,7 +388,7 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { MachineInstr *MI = SU->getInstr(); MachineOperand &MO = MI->getOperand(OperIdx); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); LaneBitmask DefLaneMask; LaneBitmask KillLaneMask; @@ -393,6 +399,18 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { // earlier instruction. KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask; + if (MO.getSubReg() != 0 && MO.isUndef()) { + // There may be other subregister defs on the same instruction of the same + // register in later operands. The lanes of other defs will now be live + // after this instruction, so these should not be treated as killed by the + // instruction even though they appear to be killed in this one operand. + for (int I = OperIdx + 1, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &OtherMO = MI->getOperand(I); + if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg) + KillLaneMask &= ~getLaneMaskForMO(OtherMO); + } + } + // Clear undef flag, we'll re-add it later once we know which subregister // Def is first. MO.setIsUndef(false); @@ -402,8 +420,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { } if (MO.isDead()) { - assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && - "Dead defs should have no uses"); + assert(deadDefHasNoUse(MO) && "Dead defs should have no uses"); } else { // Add data dependence to all uses we found so far. const TargetSubtargetInfo &ST = MF.getSubtarget(); @@ -491,7 +508,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { const MachineInstr *MI = SU->getInstr(); const MachineOperand &MO = MI->getOperand(OperIdx); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // Remember the use. Data dependencies will be added when we find the def. LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) @@ -514,7 +531,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { /// Returns true if MI is an instruction we are unable to reason about /// (like a call or something with unmodeled side effects). -static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { +static inline bool isGlobalMemoryObject(AAResults *AA, MachineInstr *MI) { return MI->isCall() || MI->hasUnmodeledSideEffects() || (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad(AA)); } @@ -701,7 +718,7 @@ void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) { map.reComputeSize(); } -void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, +void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker, PressureDiffs *PDiffs, LiveIntervals *LIS, @@ -821,10 +838,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, const MachineOperand &MO = MI.getOperand(j); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { addPhysRegDeps(SU, j); - } else if (TargetRegisterInfo::isVirtualRegister(Reg)) { + } else if (Register::isVirtualRegister(Reg)) { HasVRegDef = true; addVRegDefDeps(SU, j); } @@ -838,10 +855,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // additional use dependencies. if (!MO.isReg() || !MO.isUse()) continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { addPhysRegDeps(SU, j); - } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) { + } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) { addVRegUseDeps(SU, j); } } @@ -1071,7 +1088,7 @@ static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, for (MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.readsReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; @@ -1102,7 +1119,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { if (MO.isReg()) { if (!MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; LiveRegs.removeReg(Reg); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 49c922f560f..e8950b58d42 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -24,7 +24,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -111,10 +110,20 @@ static cl::opt MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads")); +static cl::opt + EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), + cl::desc("DAG combiner enable merging multiple stores " + "into a wider store")); + static cl::opt TokenFactorInlineLimit( "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors")); +static cl::opt StoreMergeDependenceLimit( + "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), + cl::desc("Limit the number of times for the same StoreNode and RootNode " + "to bail out in store merging dependence check")); + namespace { class DAGCombiner { @@ -152,6 +161,14 @@ namespace { /// which have not yet been combined to the worklist. SmallPtrSet CombinedNodes; + /// Map from candidate StoreNode to the pair of RootNode and count. + /// The count is used to track how many times we have seen the StoreNode + /// with the same RootNode bail out in dependence check. If we have seen + /// the bail out for the same pair many times over a limit, we won't + /// consider the StoreNode with the same RootNode as store merging + /// candidate again. + DenseMap> StoreRootCountMap; + // AA - Used for DAG load/store alias analysis. AliasAnalysis *AA; @@ -236,6 +253,7 @@ namespace { void removeFromWorklist(SDNode *N) { CombinedNodes.erase(N); PruningList.remove(N); + StoreRootCountMap.erase(N); auto It = WorklistMap.find(N); if (It == WorklistMap.end()) @@ -361,6 +379,7 @@ namespace { SDValue visitSUBE(SDNode *N); SDValue visitSUBCARRY(SDNode *N); SDValue visitMUL(SDNode *N); + SDValue visitMULFIX(SDNode *N); SDValue useDivRem(SDNode *N); SDValue visitSDIV(SDNode *N); SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N); @@ -421,7 +440,6 @@ namespace { SDValue visitFP_TO_SINT(SDNode *N); SDValue visitFP_TO_UINT(SDNode *N); SDValue visitFP_ROUND(SDNode *N); - SDValue visitFP_ROUND_INREG(SDNode *N); SDValue visitFP_EXTEND(SDNode *N); SDValue visitFNEG(SDNode *N); SDValue visitFABS(SDNode *N); @@ -470,7 +488,7 @@ namespace { SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags); - SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); + SDValue visitShiftByConstant(SDNode *N); SDValue foldSelectOfConstants(SDNode *N); SDValue foldVSelectOfConstants(SDNode *N); @@ -497,6 +515,7 @@ namespace { bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC) const; bool isOneUseSetCC(SDValue N) const; + bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y); SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); @@ -510,7 +529,7 @@ namespace { SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); SDValue BuildLogBase2(SDValue V, const SDLoc &DL); - SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags); + SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags); SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); @@ -521,11 +540,11 @@ namespace { SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); - SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, + SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL); - SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); + SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); SDValue MatchLoadCombine(SDNode *N); SDValue MatchStoreCombine(StoreSDNode *N); SDValue ReduceLoadWidth(SDNode *N); @@ -742,6 +761,11 @@ CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); } +bool TargetLowering::DAGCombinerInfo:: +recursivelyDeleteUnusedNodes(SDNode *N) { + return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N); +} + void TargetLowering::DAGCombinerInfo:: CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); @@ -766,195 +790,6 @@ void DAGCombiner::deleteAndRecombine(SDNode *N) { DAG.DeleteNode(N); } -/// Return 1 if we can compute the negated form of the specified expression for -/// the same cost as the expression itself, or 2 if we can compute the negated -/// form more cheaply than the expression itself. -static char isNegatibleForFree(SDValue Op, bool LegalOperations, - const TargetLowering &TLI, - const TargetOptions *Options, - bool ForCodeSize, - unsigned Depth = 0) { - // fneg is removable even if it has multiple uses. - if (Op.getOpcode() == ISD::FNEG) - return 2; - - // Don't allow anything with multiple uses unless we know it is free. - EVT VT = Op.getValueType(); - const SDNodeFlags Flags = Op->getFlags(); - if (!Op.hasOneUse() && - !(Op.getOpcode() == ISD::FP_EXTEND && - TLI.isFPExtFree(VT, Op.getOperand(0).getValueType()))) - return 0; - - // Don't recurse exponentially. - if (Depth > 6) - return 0; - - switch (Op.getOpcode()) { - default: return false; - case ISD::ConstantFP: { - if (!LegalOperations) - return 1; - - // Don't invert constant FP values after legalization unless the target says - // the negated constant is legal. - return TLI.isOperationLegal(ISD::ConstantFP, VT) || - TLI.isFPImmLegal(neg(cast(Op)->getValueAPF()), VT, - ForCodeSize); - } - case ISD::BUILD_VECTOR: { - // Only permit BUILD_VECTOR of constants. - if (llvm::any_of(Op->op_values(), [&](SDValue N) { - return !N.isUndef() && !isa(N); - })) - return 0; - if (!LegalOperations) - return 1; - if (TLI.isOperationLegal(ISD::ConstantFP, VT) && - TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) - return 1; - return llvm::all_of(Op->op_values(), [&](SDValue N) { - return N.isUndef() || - TLI.isFPImmLegal(neg(cast(N)->getValueAPF()), VT, - ForCodeSize); - }); - } - case ISD::FADD: - if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros()) - return 0; - - // After operation legalization, it might not be legal to create new FSUBs. - if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) - return 0; - - // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, - Options, ForCodeSize, Depth + 1)) - return V; - // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, - ForCodeSize, Depth + 1); - case ISD::FSUB: - // We can't turn -(A-B) into B-A when we honor signed zeros. - if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) - return 0; - - // fold (fneg (fsub A, B)) -> (fsub B, A) - return 1; - - case ISD::FMUL: - case ISD::FDIV: - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, - Options, ForCodeSize, Depth + 1)) - return V; - - return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, - ForCodeSize, Depth + 1); - - case ISD::FP_EXTEND: - case ISD::FP_ROUND: - case ISD::FSIN: - return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, - ForCodeSize, Depth + 1); - } -} - -/// If isNegatibleForFree returns true, return the newly negated expression. -static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, - bool LegalOperations, bool ForCodeSize, - unsigned Depth = 0) { - // fneg is removable even if it has multiple uses. - if (Op.getOpcode() == ISD::FNEG) - return Op.getOperand(0); - - assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); - const TargetOptions &Options = DAG.getTarget().Options; - const SDNodeFlags Flags = Op->getFlags(); - - switch (Op.getOpcode()) { - default: llvm_unreachable("Unknown code"); - case ISD::ConstantFP: { - APFloat V = cast(Op)->getValueAPF(); - V.changeSign(); - return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); - } - case ISD::BUILD_VECTOR: { - SmallVector Ops; - for (SDValue C : Op->op_values()) { - if (C.isUndef()) { - Ops.push_back(C); - continue; - } - APFloat V = cast(C)->getValueAPF(); - V.changeSign(); - Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType())); - } - return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops); - } - case ISD::FADD: - assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros()); - - // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) - if (isNegatibleForFree(Op.getOperand(0), LegalOperations, - DAG.getTargetLoweringInfo(), &Options, ForCodeSize, - Depth + 1)) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1), Flags); - // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(1), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(0), Flags); - case ISD::FSUB: - // fold (fneg (fsub 0, B)) -> B - if (ConstantFPSDNode *N0CFP = - isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true)) - if (N0CFP->isZero()) - return Op.getOperand(1); - - // fold (fneg (fsub A, B)) -> (fsub B, A) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - Op.getOperand(1), Op.getOperand(0), Flags); - - case ISD::FMUL: - case ISD::FDIV: - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) - if (isNegatibleForFree(Op.getOperand(0), LegalOperations, - DAG.getTargetLoweringInfo(), &Options, ForCodeSize, - Depth + 1)) - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1), Flags); - - // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - Op.getOperand(0), - GetNegatedExpression(Op.getOperand(1), DAG, - LegalOperations, ForCodeSize, - Depth + 1), Flags); - - case ISD::FP_EXTEND: - case ISD::FSIN: - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1)); - case ISD::FP_ROUND: - return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1)); - } -} - // APInts must be the same size for most operations, this helper // function zero extends the shorter of the pair so that they match. // We provide an Offset so that we can create bitwidths that won't overflow. @@ -1124,7 +959,6 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); if (!OpNode.getNode()) return SDValue(); - AddToWorklist(OpNode.getNode()); return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); } } @@ -1438,7 +1272,6 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { SDValue RV = DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1)); - AddToWorklist(N0.getNode()); if (Replace) ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); @@ -1591,8 +1424,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) { bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); for (SDNode *LN : UpdatedNodes) { - AddToWorklist(LN); AddUsersToWorklist(LN); + AddToWorklist(LN); } if (!NIsValid) continue; @@ -1673,6 +1506,10 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ADDCARRY: return visitADDCARRY(N); case ISD::SUBE: return visitSUBE(N); case ISD::SUBCARRY: return visitSUBCARRY(N); + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + case ISD::UMULFIXSAT: return visitMULFIX(N); case ISD::MUL: return visitMUL(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); @@ -1736,7 +1573,6 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); case ISD::FP_ROUND: return visitFP_ROUND(N); - case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); case ISD::FP_EXTEND: return visitFP_EXTEND(N); case ISD::FNEG: return visitFNEG(N); case ISD::FABS: return visitFABS(N); @@ -3308,6 +3144,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } } + if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) { + // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry) + if (SDValue Carry = getAsCarry(TLI, N0)) { + SDValue X = N1; + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X); + return DAG.getNode(ISD::ADDCARRY, DL, + DAG.getVTList(VT, Carry.getValueType()), NegX, Zero, + Carry); + } + } + return SDValue(); } @@ -3442,6 +3290,30 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) { return SDValue(); } +// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and +// UMULFIXSAT here. +SDValue DAGCombiner::visitMULFIX(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue Scale = N->getOperand(2); + EVT VT = N0.getValueType(); + + // fold (mulfix x, undef, scale) -> 0 + if (N0.isUndef() || N1.isUndef()) + return DAG.getConstant(0, SDLoc(N), VT); + + // Canonicalize constant to RHS (vector doesn't have to splat) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale); + + // fold (mulfix x, 0, scale) -> 0 + if (isNullConstant(N1)) + return DAG.getConstant(0, SDLoc(N), VT); + + return SDValue(); +} + SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -3537,7 +3409,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // x * 15 --> (x << 4) - x // x * -33 --> -((x << 5) + x) // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4) - if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) { + if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) { // TODO: We could handle more general decomposition of any constant by // having the target set a limit on number of ops and making a // callback to determine that sequence (similar to sqrt expansion). @@ -4083,10 +3955,10 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { if (VT.isVector()) { // fold (mulhs x, 0) -> 0 - if (ISD::isBuildVectorAllZeros(N1.getNode())) - return N1; - if (ISD::isBuildVectorAllZeros(N0.getNode())) - return N0; + // do not return N0/N1, because undef node may exist. + if (ISD::isBuildVectorAllZeros(N0.getNode()) || + ISD::isBuildVectorAllZeros(N1.getNode())) + return DAG.getConstant(0, DL, VT); } // fold (mulhs x, 0) -> 0 @@ -4095,7 +3967,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { // fold (mulhs x, 1) -> (sra x, size(x)-1) if (isOneConstant(N1)) return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, - DAG.getConstant(N0.getValueSizeInBits() - 1, DL, + DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); // fold (mulhs x, undef) -> 0 @@ -4130,10 +4002,10 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { if (VT.isVector()) { // fold (mulhu x, 0) -> 0 - if (ISD::isBuildVectorAllZeros(N1.getNode())) - return N1; - if (ISD::isBuildVectorAllZeros(N0.getNode())) - return N0; + // do not return N0/N1, because undef node may exist. + if (ISD::isBuildVectorAllZeros(N0.getNode()) || + ISD::isBuildVectorAllZeros(N1.getNode())) + return DAG.getConstant(0, DL, VT); } // fold (mulhu x, 0) -> 0 @@ -4265,6 +4137,18 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + // (umul_lohi N0, 0) -> (0, 0) + if (isNullConstant(N->getOperand(1))) { + SDValue Zero = DAG.getConstant(0, DL, VT); + return CombineTo(N, Zero, Zero); + } + + // (umul_lohi N0, 1) -> (N0, 0) + if (isOneConstant(N->getOperand(1))) { + SDValue Zero = DAG.getConstant(0, DL, VT); + return CombineTo(N, N->getOperand(0), Zero); + } + // If the type is twice as wide is legal, transform the mulhu to a wider // multiply plus a shift. if (VT.isSimple() && !VT.isVector()) { @@ -4290,13 +4174,29 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { } SDValue DAGCombiner::visitMULO(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); bool IsSigned = (ISD::SMULO == N->getOpcode()); + EVT CarryVT = N->getValueType(1); + SDLoc DL(N); + + // canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0); + + // fold (mulo x, 0) -> 0 + no carry out + if (isNullOrNullSplat(N1)) + return CombineTo(N, DAG.getConstant(0, DL, VT), + DAG.getConstant(0, DL, CarryVT)); + // (mulo x, 2) -> (addo x, x) - if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1))) + if (ConstantSDNode *C2 = isConstOrConstSplat(N1)) if (C2->getAPIntValue() == 2) - return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N), - N->getVTList(), N->getOperand(0), N->getOperand(0)); + return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL, + N->getVTList(), N0, N0); return SDValue(); } @@ -4444,7 +4344,9 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) { if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) && Level <= AfterLegalizeTypes) { // Input types must be integer and the same. - if (XVT.isInteger() && XVT == Y.getValueType()) { + if (XVT.isInteger() && XVT == Y.getValueType() && + !(VT.isVector() && TLI.isTypeLegal(VT) && + !XVT.isVector() && !TLI.isTypeLegal(XVT))) { SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); return DAG.getNode(HandOpcode, DL, VT, Logic); } @@ -4770,8 +4672,8 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, return true; } - // Do not change the width of a volatile load. - if (LoadN->isVolatile()) + // Do not change the width of a volatile or atomic loads. + if (!LoadN->isSimple()) return false; // Do not generate loads of non-round integer types since these can @@ -4803,15 +4705,15 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST, if (!MemVT.isRound()) return false; - // Don't change the width of a volatile load. - if (LDST->isVolatile()) + // Don't change the width of a volatile or atomic loads. + if (!LDST->isSimple()) return false; // Verify that we are actually reducing a load width here. if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits()) return false; - // Ensure that this isn't going to produce an unsupported unaligned access. + // Ensure that this isn't going to produce an unsupported memory access. if (ShAmt && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, LDST->getAddressSpace(), ShAmt / 8, @@ -5076,6 +4978,59 @@ SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) { return T1; } +/// Try to replace shift/logic that tests if a bit is clear with mask + setcc. +/// For a target with a bit test, this is expected to become test + set and save +/// at least 1 instruction. +static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) { + assert(And->getOpcode() == ISD::AND && "Expected an 'and' op"); + + // This is probably not worthwhile without a supported type. + EVT VT = And->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(VT)) + return SDValue(); + + // Look through an optional extension and find a 'not'. + // TODO: Should we favor test+set even without the 'not' op? + SDValue Not = And->getOperand(0), And1 = And->getOperand(1); + if (Not.getOpcode() == ISD::ANY_EXTEND) + Not = Not.getOperand(0); + if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1)) + return SDValue(); + + // Look though an optional truncation. The source operand may not be the same + // type as the original 'and', but that is ok because we are masking off + // everything but the low bit. + SDValue Srl = Not.getOperand(0); + if (Srl.getOpcode() == ISD::TRUNCATE) + Srl = Srl.getOperand(0); + + // Match a shift-right by constant. + if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() || + !isa(Srl.getOperand(1))) + return SDValue(); + + // We might have looked through casts that make this transform invalid. + // TODO: If the source type is wider than the result type, do the mask and + // compare in the source type. + const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1); + unsigned VTBitWidth = VT.getSizeInBits(); + if (ShiftAmt.uge(VTBitWidth)) + return SDValue(); + + // Turn this into a bit-test pattern using mask op + setcc: + // and (not (srl X, C)), 1 --> (and X, 1<getOperand(0); SDValue N1 = N->getOperand(1); @@ -5163,6 +5118,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } } + // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must // already be zero by virtue of the width of the base type of the load. @@ -5337,7 +5293,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { unsigned MemBitSize = MemVT.getScalarSizeInBits(); APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize); if (DAG.MaskedValueIsZero(N1, ExtBits) && - ((!LegalOperations && !LN0->isVolatile()) || + ((!LegalOperations && LN0->isSimple()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), @@ -5358,6 +5314,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N)) return Shifts; + if (TLI.hasBitTest(N0, N1)) + if (SDValue V = combineShiftAnd1ToBitTest(N, DAG)) + return V; + return SDValue(); } @@ -5564,6 +5524,23 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef Parts) { return true; } +// Match 2 elements of a packed halfword bswap. +static bool isBSwapHWordPair(SDValue N, MutableArrayRef Parts) { + if (N.getOpcode() == ISD::OR) + return isBSwapHWordElement(N.getOperand(0), Parts) && + isBSwapHWordElement(N.getOperand(1), Parts); + + if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) { + ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1)); + if (!C || C->getAPIntValue() != 16) + return false; + Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode(); + return true; + } + + return false; +} + /// Match a 32-bit packed halfword bswap. That is /// ((x & 0x000000ff) << 8) | /// ((x & 0x0000ff00) >> 8) | @@ -5581,43 +5558,26 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { return SDValue(); // Look for either - // (or (or (and), (and)), (or (and), (and))) - // (or (or (or (and), (and)), (and)), (and)) - if (N0.getOpcode() != ISD::OR) - return SDValue(); - SDValue N00 = N0.getOperand(0); - SDValue N01 = N0.getOperand(1); + // (or (bswaphpair), (bswaphpair)) + // (or (or (bswaphpair), (and)), (and)) + // (or (or (and), (bswaphpair)), (and)) SDNode *Parts[4] = {}; - if (N1.getOpcode() == ISD::OR && - N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { + if (isBSwapHWordPair(N0, Parts)) { // (or (or (and), (and)), (or (and), (and))) - if (!isBSwapHWordElement(N00, Parts)) + if (!isBSwapHWordPair(N1, Parts)) return SDValue(); - - if (!isBSwapHWordElement(N01, Parts)) - return SDValue(); - SDValue N10 = N1.getOperand(0); - if (!isBSwapHWordElement(N10, Parts)) - return SDValue(); - SDValue N11 = N1.getOperand(1); - if (!isBSwapHWordElement(N11, Parts)) - return SDValue(); - } else { + } else if (N0.getOpcode() == ISD::OR) { // (or (or (or (and), (and)), (and)), (and)) if (!isBSwapHWordElement(N1, Parts)) return SDValue(); - if (!isBSwapHWordElement(N01, Parts)) + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) && + !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts))) return SDValue(); - if (N00.getOpcode() != ISD::OR) - return SDValue(); - SDValue N000 = N00.getOperand(0); - if (!isBSwapHWordElement(N000, Parts)) - return SDValue(); - SDValue N001 = N00.getOperand(1); - if (!isBSwapHWordElement(N001, Parts)) - return SDValue(); - } + } else + return SDValue(); // Make sure the parts are all coming from the same node. if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) @@ -5791,15 +5751,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) { SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0); SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0); - bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT); - if (!LegalMask) { - std::swap(NewLHS, NewRHS); - ShuffleVectorSDNode::commuteMask(Mask); - LegalMask = TLI.isShuffleMaskLegal(Mask, VT); - } - - if (LegalMask) - return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask); + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, + Mask, DAG); + if (LegalShuffle) + return LegalShuffle; } } } @@ -5867,8 +5823,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return V; // See if this is some rotate idiom. - if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) - return SDValue(Rot, 0); + if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N))) + return Rot; if (SDValue Load = MatchLoadCombine(N)) return Load; @@ -5914,6 +5870,9 @@ static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift, /// Otherwise, returns an expansion of \p ExtractFrom based on the following /// patterns: /// +/// (or (add v v) (shrl v bitwidth-1)): +/// expands (add v v) -> (shl v 1) +/// /// (or (mul v c0) (shrl (mul v c1) c2)): /// expands (mul v c0) -> (shl (mul v c1) c3) /// @@ -5936,6 +5895,23 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, "Existing shift must be valid as a rotate half"); ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask); + + // Value and Type of the shift. + SDValue OppShiftLHS = OppShift.getOperand(0); + EVT ShiftedVT = OppShiftLHS.getValueType(); + + // Amount of the existing shift. + ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); + + // (add v v) -> (shl v 1) + if (OppShift.getOpcode() == ISD::SRL && OppShiftCst && + ExtractFrom.getOpcode() == ISD::ADD && + ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) && + ExtractFrom.getOperand(0) == OppShiftLHS && + OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1) + return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS, + DAG.getShiftAmountConstant(1, ShiftedVT, DL)); + // Preconditions: // (or (op0 v c0) (shiftl/r (op0 v c1) c2)) // @@ -5959,15 +5935,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, // op0 must be the same opcode on both sides, have the same LHS argument, // and produce the same value type. - SDValue OppShiftLHS = OppShift.getOperand(0); - EVT ShiftedVT = OppShiftLHS.getValueType(); if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() || OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) || ShiftedVT != ExtractFrom.getValueType()) return SDValue(); - // Amount of the existing shift. - ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); // Constant mul/udiv/shift amount from the RHS of the shift's LHS op. ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1)); // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op. @@ -6137,7 +6109,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the // former being preferred if supported. InnerPos and InnerNeg are Pos and // Neg with outer conversions stripped away. -SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, +SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL) { @@ -6152,32 +6124,33 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, - HasPos ? Pos : Neg).getNode(); + HasPos ? Pos : Neg); } - return nullptr; + return SDValue(); } // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. -SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { +SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); - if (!TLI.isTypeLegal(VT)) return nullptr; + if (!TLI.isTypeLegal(VT)) + return SDValue(); // The target must have at least one rotate flavor. bool HasROTL = hasOperation(ISD::ROTL, VT); bool HasROTR = hasOperation(ISD::ROTR, VT); - if (!HasROTL && !HasROTR) return nullptr; + if (!HasROTL && !HasROTR) + return SDValue(); // Check for truncated rotate. if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE && LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) { assert(LHS.getValueType() == RHS.getValueType()); - if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) { - return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), - SDValue(Rot, 0)).getNode(); + if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) { + return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot); } } @@ -6192,7 +6165,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // If neither side matched a rotate half, bail if (!LHSShift && !RHSShift) - return nullptr; + return SDValue(); // InstCombine may have combined a constant shl, srl, mul, or udiv with one // side of the rotate, so try to handle that here. In all cases we need to @@ -6215,15 +6188,15 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // If a side is still missing, nothing else we can do. if (!RHSShift || !LHSShift) - return nullptr; + return SDValue(); // At this point we've matched or extracted a shift op on each side. if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) - return nullptr; // Not shifting the same value. + return SDValue(); // Not shifting the same value. if (LHSShift.getOpcode() == RHSShift.getOpcode()) - return nullptr; // Shifts must disagree. + return SDValue(); // Shifts must disagree. // Canonicalize shl to left side in a shl/srl pair. if (RHSShift.getOpcode() == ISD::SHL) { @@ -6267,13 +6240,13 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); } - return Rot.getNode(); + return Rot; } // If there is a mask here, and we have a variable shift, we can't be sure // that we're masking out the right stuff. if (LHSMask.getNode() || RHSMask.getNode()) - return nullptr; + return SDValue(); // If the shift amount is sign/zext/any-extended just peel it off. SDValue LExtOp0 = LHSShiftAmt; @@ -6290,17 +6263,17 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { RExtOp0 = RHSShiftAmt.getOperand(0); } - SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, + SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); if (TryL) return TryL; - SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, + SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); if (TryR) return TryR; - return nullptr; + return SDValue(); } namespace { @@ -6415,7 +6388,7 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, Depth + 1); case ISD::LOAD: { auto L = cast(Op.getNode()); - if (L->isVolatile() || L->isIndexed()) + if (!L->isSimple() || L->isIndexed()) return None; unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits(); @@ -6504,8 +6477,9 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { SDValue Chain; SmallVector Stores; for (StoreSDNode *Store = N; Store; Store = dyn_cast(Chain)) { + // TODO: Allow unordered atomics when wider type is legal (see D66309) if (Store->getMemoryVT() != MVT::i8 || - Store->isVolatile() || Store->isIndexed()) + !Store->isSimple() || Store->isIndexed()) return SDValue(); Stores.push_back(Store); Chain = Store->getChain(); @@ -6716,7 +6690,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return SDValue(); LoadSDNode *L = P->Load; - assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() && + assert(L->hasNUsesOfValue(1, 0) && L->isSimple() && + !L->isIndexed() && "Must be enforced by calculateByteProvider"); assert(L->getOffset().isUndef() && "Unindexed load must have undef offset"); @@ -6958,25 +6933,25 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() && (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { - SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); - if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { + SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1); + if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) { unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; - LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS - RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS - AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); - return DAG.getNode(NewOpcode, DL, VT, LHS, RHS); + N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00 + N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01 + AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode()); + return DAG.getNode(NewOpcode, DL, VT, N00, N01); } } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants if (isAllOnesConstant(N1) && N0.hasOneUse() && (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { - SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); - if (isa(RHS) || isa(LHS)) { + SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1); + if (isa(N01) || isa(N00)) { unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; - LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS - RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS - AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); - return DAG.getNode(NewOpcode, DL, VT, LHS, RHS); + N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00 + N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01 + AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode()); + return DAG.getNode(NewOpcode, DL, VT, N00, N01); } } @@ -7079,26 +7054,103 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return SDValue(); } +/// If we have a shift-by-constant of a bitwise logic op that itself has a +/// shift-by-constant operand with identical opcode, we may be able to convert +/// that into 2 independent shifts followed by the logic op. This is a +/// throughput improvement. +static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) { + // Match a one-use bitwise logic op. + SDValue LogicOp = Shift->getOperand(0); + if (!LogicOp.hasOneUse()) + return SDValue(); + + unsigned LogicOpcode = LogicOp.getOpcode(); + if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR && + LogicOpcode != ISD::XOR) + return SDValue(); + + // Find a matching one-use shift by constant. + unsigned ShiftOpcode = Shift->getOpcode(); + SDValue C1 = Shift->getOperand(1); + ConstantSDNode *C1Node = isConstOrConstSplat(C1); + assert(C1Node && "Expected a shift with constant operand"); + const APInt &C1Val = C1Node->getAPIntValue(); + auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp, + const APInt *&ShiftAmtVal) { + if (V.getOpcode() != ShiftOpcode || !V.hasOneUse()) + return false; + + ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1)); + if (!ShiftCNode) + return false; + + // Capture the shifted operand and shift amount value. + ShiftOp = V.getOperand(0); + ShiftAmtVal = &ShiftCNode->getAPIntValue(); + + // Shift amount types do not have to match their operand type, so check that + // the constants are the same width. + if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth()) + return false; + + // The fold is not valid if the sum of the shift values exceeds bitwidth. + if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits())) + return false; + + return true; + }; + + // Logic ops are commutative, so check each operand for a match. + SDValue X, Y; + const APInt *C0Val; + if (matchFirstShift(LogicOp.getOperand(0), X, C0Val)) + Y = LogicOp.getOperand(1); + else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val)) + Y = LogicOp.getOperand(0); + else + return SDValue(); + + // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1) + SDLoc DL(Shift); + EVT VT = Shift->getValueType(0); + EVT ShiftAmtVT = Shift->getOperand(1).getValueType(); + SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT); + SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC); + SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1); + return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2); +} + /// Handle transforms common to the three shifts, when the shift amount is a /// constant. /// We are looking for: (shift being one of shl/sra/srl) /// shift (binop X, C0), C1 /// And want to transform into: /// binop (shift X, C1), (shift C0, C1) -SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { +SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { + assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand"); + // Do not turn a 'not' into a regular xor. if (isBitwiseNot(N->getOperand(0))) return SDValue(); // The inner binop must be one-use, since we want to replace it. - SDNode *LHS = N->getOperand(0).getNode(); - if (!LHS->hasOneUse()) return SDValue(); + SDValue LHS = N->getOperand(0); + if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level)) + return SDValue(); + + // TODO: This is limited to early combining because it may reveal regressions + // otherwise. But since we just checked a target hook to see if this is + // desirable, that should have filtered out cases where this interferes + // with some other pattern matching. + if (!LegalTypes) + if (SDValue R = combineShiftOfShiftedLogic(N, DAG)) + return R; // We want to pull some binops through shifts, so that we have (and (shift)) // instead of (shift (and)), likewise for add, or, xor, etc. This sort of // thing happens with address calculations, so it's important to canonicalize // it. - switch (LHS->getOpcode()) { + switch (LHS.getOpcode()) { default: return SDValue(); case ISD::OR: @@ -7112,14 +7164,14 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { } // We require the RHS of the binop to be a constant and not opaque as well. - ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); + ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1)); if (!BinOpCst) return SDValue(); // FIXME: disable this unless the input to the binop is a shift by a constant // or is copy/select. Enable this in other cases when figure out it's exactly // profitable. - SDValue BinOpLHSVal = LHS->getOperand(0); + SDValue BinOpLHSVal = LHS.getOperand(0); bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL || BinOpLHSVal.getOpcode() == ISD::SRA || BinOpLHSVal.getOpcode() == ISD::SRL) && @@ -7133,24 +7185,16 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { if (IsCopyOrSelect && N->hasOneUse()) return SDValue(); - EVT VT = N->getValueType(0); - - if (!TLI.isDesirableToCommuteWithShift(N, Level)) - return SDValue(); - // Fold the constants, shifting the binop RHS by the shift amount. - SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), - N->getValueType(0), - LHS->getOperand(1), N->getOperand(1)); + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1), + N->getOperand(1)); assert(isa(NewRHS) && "Folding was not successful!"); - // Create the new shift. - SDValue NewShift = DAG.getNode(N->getOpcode(), - SDLoc(LHS->getOperand(0)), - VT, LHS->getOperand(0), N->getOperand(1)); - - // Create the new binop. - return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); + SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0), + N->getOperand(1)); + return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS); } SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { @@ -7478,7 +7522,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } if (N1C && !N1C->isOpaque()) - if (SDValue NewSHL = visitShiftByConstant(N, N1C)) + if (SDValue NewSHL = visitShiftByConstant(N)) return NewSHL; return SDValue(); @@ -7597,6 +7641,37 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } } + // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper. + // sra (add (shl X, N1C), AddC), N1C --> + // sext (add (trunc X to (width - N1C)), AddC') + if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C && + N0.getOperand(0).getOpcode() == ISD::SHL && + N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) { + if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) { + SDValue Shl = N0.getOperand(0); + // Determine what the truncate's type would be and ask the target if that + // is a free operation. + LLVMContext &Ctx = *DAG.getContext(); + unsigned ShiftAmt = N1C->getZExtValue(); + EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt); + if (VT.isVector()) + TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); + + // TODO: The simple type check probably belongs in the default hook + // implementation and/or target-specific overrides (because + // non-simple types likely require masking when legalized), but that + // restriction may conflict with other transforms. + if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) { + SDLoc DL(N); + SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT); + SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt). + trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT); + SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC); + return DAG.getSExtOrTrunc(Add, DL, VT); + } + } + } + // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { @@ -7638,7 +7713,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); if (N1C && !N1C->isOpaque()) - if (SDValue NewSRA = visitShiftByConstant(N, N1C)) + if (SDValue NewSRA = visitShiftByConstant(N)) return NewSRA; return SDValue(); @@ -7819,7 +7894,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return SDValue(N, 0); if (N1C && !N1C->isOpaque()) - if (SDValue NewSRL = visitShiftByConstant(N, N1C)) + if (SDValue NewSRL = visitShiftByConstant(N)) return NewSRL; // Attempt to convert a srl of a load into a narrower zero-extending load. @@ -8100,6 +8175,43 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, } } +/// If a (v)select has a condition value that is a sign-bit test, try to smear +/// the condition operand sign-bit across the value width and use it as a mask. +static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) { + SDValue Cond = N->getOperand(0); + SDValue C1 = N->getOperand(1); + SDValue C2 = N->getOperand(2); + assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) && + "Expected select-of-constants"); + + EVT VT = N->getValueType(0); + if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() || + VT != Cond.getOperand(0).getValueType()) + return SDValue(); + + // The inverted-condition + commuted-select variants of these patterns are + // canonicalized to these forms in IR. + SDValue X = Cond.getOperand(0); + SDValue CondC = Cond.getOperand(1); + ISD::CondCode CC = cast(Cond.getOperand(2))->get(); + if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) && + isAllOnesOrAllOnesSplat(C2)) { + // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1 + SDLoc DL(N); + SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); + return DAG.getNode(ISD::OR, DL, VT, Sra, C1); + } + if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) { + // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1 + SDLoc DL(N); + SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); + return DAG.getNode(ISD::AND, DL, VT, Sra, C1); + } + return SDValue(); +} + SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { SDValue Cond = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -8148,22 +8260,36 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { return Cond; } - // For any constants that differ by 1, we can transform the select into an - // extend and add. Use a target hook because some targets may prefer to - // transform in the other direction. + // Use a target hook because some targets may prefer to transform in the + // other direction. if (TLI.convertSelectOfConstantsToMath(VT)) { - if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) { + // For any constants that differ by 1, we can transform the select into an + // extend and add. + const APInt &C1Val = C1->getAPIntValue(); + const APInt &C2Val = C2->getAPIntValue(); + if (C1Val - 1 == C2Val) { // select Cond, C1, C1-1 --> add (zext Cond), C1-1 if (VT != MVT::i1) Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); } - if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) { + if (C1Val + 1 == C2Val) { // select Cond, C1, C1+1 --> add (sext Cond), C1+1 if (VT != MVT::i1) Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); } + + // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) + if (C1Val.isPowerOf2() && C2Val.isNullValue()) { + if (VT != MVT::i1) + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); + SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); + } + + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; } return SDValue(); @@ -8381,23 +8507,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SDValue(); } -static -std::pair SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { - SDLoc DL(N); - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - - // Split the inputs. - SDValue Lo, Hi, LL, LH, RL, RH; - std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); - - Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); - Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); - - return std::make_pair(Lo, Hi); -} - // This function assumes all the vselect's arguments are CONCAT_VECTOR // nodes and that the condition is a BV of ConstantSDNodes (or undefs). static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { @@ -8456,7 +8565,6 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { SDValue DAGCombiner::visitMSCATTER(SDNode *N) { MaskedScatterSDNode *MSC = cast(N); SDValue Mask = MSC->getMask(); - SDValue Data = MSC->getValue(); SDValue Chain = MSC->getChain(); SDLoc DL(N); @@ -8464,123 +8572,19 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return Chain; - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MSCATTER data type requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - if (Mask.getOpcode() != ISD::SETCC) - return SDValue(); - - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != - TargetLowering::TypeSplitVector) - return SDValue(); - SDValue MaskLo, MaskHi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0)); - - EVT MemoryVT = MSC->getMemoryVT(); - unsigned Alignment = MSC->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - SDValue DataLo, DataHi; - std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); - - SDValue Scale = MSC->getScale(); - SDValue BasePtr = MSC->getBasePtr(); - SDValue IndexLo, IndexHi; - std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MSC->getPointerInfo(), - MachineMemOperand::MOStore, LoMemVT.getStoreSize(), - Alignment, MSC->getAAInfo(), MSC->getRanges()); - - SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale }; - SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), - DataLo.getValueType(), DL, OpsLo, MMO); - - // The order of the Scatter operation after split is well defined. The "Hi" - // part comes after the "Lo". So these two operations should be chained one - // after another. - SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale }; - return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), - DL, OpsHi, MMO); + return SDValue(); } SDValue DAGCombiner::visitMSTORE(SDNode *N) { MaskedStoreSDNode *MST = cast(N); SDValue Mask = MST->getMask(); - SDValue Data = MST->getValue(); SDValue Chain = MST->getChain(); - EVT VT = Data.getValueType(); SDLoc DL(N); // Zap masked stores with a zero mask. if (ISD::isBuildVectorAllZeros(Mask.getNode())) return Chain; - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MSTORE data type requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - if (Mask.getOpcode() == ISD::SETCC) { - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), VT) != - TargetLowering::TypeSplitVector) - return SDValue(); - - SDValue MaskLo, MaskHi, Lo, Hi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - SDValue Ptr = MST->getBasePtr(); - - EVT MemoryVT = MST->getMemoryVT(); - unsigned Alignment = MST->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - SDValue DataLo, DataHi; - std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), - MachineMemOperand::MOStore, LoMemVT.getStoreSize(), - Alignment, MST->getAAInfo(), MST->getRanges()); - - Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, - MST->isTruncatingStore(), - MST->isCompressingStore()); - - Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, - MST->isCompressingStore()); - unsigned HiOffset = LoMemVT.getStoreSize(); - - MMO = DAG.getMachineFunction().getMachineMemOperand( - MST->getPointerInfo().getWithOffset(HiOffset), - MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment, - MST->getAAInfo(), MST->getRanges()); - - Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, - MST->isTruncatingStore(), - MST->isCompressingStore()); - - AddToWorklist(Lo.getNode()); - AddToWorklist(Hi.getNode()); - - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); - } return SDValue(); } @@ -8593,76 +8597,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return CombineTo(N, MGT->getPassThru(), MGT->getChain()); - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MGATHER result requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - - if (Mask.getOpcode() != ISD::SETCC) - return SDValue(); - - EVT VT = N->getValueType(0); - - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), VT) != - TargetLowering::TypeSplitVector) - return SDValue(); - - SDValue MaskLo, MaskHi, Lo, Hi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - SDValue PassThru = MGT->getPassThru(); - SDValue PassThruLo, PassThruHi; - std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); - - SDValue Chain = MGT->getChain(); - EVT MemoryVT = MGT->getMemoryVT(); - unsigned Alignment = MGT->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - SDValue Scale = MGT->getScale(); - SDValue BasePtr = MGT->getBasePtr(); - SDValue Index = MGT->getIndex(); - SDValue IndexLo, IndexHi; - std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MGT->getPointerInfo(), - MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), - Alignment, MGT->getAAInfo(), MGT->getRanges()); - - SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale }; - Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, - MMO); - - SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale }; - Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, - MMO); - - AddToWorklist(Lo.getNode()); - AddToWorklist(Hi.getNode()); - - // Build a factor node to remember that this load is independent of the - // other one. - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Legalized the chain result - switch anything that used the old chain to - // use the new one. - DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain); - - SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); - - SDValue RetOps[] = { GatherRes, Chain }; - return DAG.getMergeValues(RetOps, DL); + return SDValue(); } SDValue DAGCombiner::visitMLOAD(SDNode *N) { @@ -8674,76 +8609,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return CombineTo(N, MLD->getPassThru(), MLD->getChain()); - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MLOAD result requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - if (Mask.getOpcode() == ISD::SETCC) { - EVT VT = N->getValueType(0); - - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), VT) != - TargetLowering::TypeSplitVector) - return SDValue(); - - SDValue MaskLo, MaskHi, Lo, Hi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - SDValue PassThru = MLD->getPassThru(); - SDValue PassThruLo, PassThruHi; - std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); - - SDValue Chain = MLD->getChain(); - SDValue Ptr = MLD->getBasePtr(); - EVT MemoryVT = MLD->getMemoryVT(); - unsigned Alignment = MLD->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), - MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), - Alignment, MLD->getAAInfo(), MLD->getRanges()); - - Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT, - MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); - - Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, - MLD->isExpandingLoad()); - unsigned HiOffset = LoMemVT.getStoreSize(); - - MMO = DAG.getMachineFunction().getMachineMemOperand( - MLD->getPointerInfo().getWithOffset(HiOffset), - MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment, - MLD->getAAInfo(), MLD->getRanges()); - - Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT, - MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); - - AddToWorklist(Lo.getNode()); - AddToWorklist(Hi.getNode()); - - // Build a factor node to remember that this load is independent of the - // other one. - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Legalized the chain result - switch anything that used the old chain to - // use the new one. - DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); - - SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); - - SDValue RetOps[] = { LoadRes, Chain }; - return DAG.getMergeValues(RetOps, DL); - } return SDValue(); } @@ -8791,6 +8656,18 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2); } + // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C) + APInt Pow2C; + if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() && + isNullOrNullSplat(N2)) { + SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT); + SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC); + } + + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; + // The general case for select-of-constants: // vselect Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2 // ...but that only makes sense if a vselect is slower than 2 logic ops, so @@ -8832,13 +8709,12 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); if (isAbs) { - EVT VT = LHS.getValueType(); if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) return DAG.getNode(ISD::ABS, DL, VT, LHS); - SDValue Shift = DAG.getNode( - ISD::SRA, DL, VT, LHS, - DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); + SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS, + DAG.getConstant(VT.getScalarSizeInBits() - 1, + DL, getShiftAmountTy(VT))); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); @@ -8851,10 +8727,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { // This is OK if we don't care about what happens if either operand is a // NaN. // - if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), - N0.getOperand(1), TLI)) { - if (SDValue FMinMax = combineMinNumMaxNum( - DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) + if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) { + if (SDValue FMinMax = + combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG)) return FMinMax; } @@ -9209,8 +9084,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { LoadSDNode *LN0 = cast(N0); if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) || - !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() || - !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) + !N0.hasOneUse() || !LN0->isSimple() || + !DstVT.isVector() || !DstVT.isPow2VectorType() || + !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) return SDValue(); SmallVector SetCCs; @@ -9411,7 +9287,8 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, LoadSDNode *LN0 = cast(N0); EVT MemVT = LN0->getMemoryVT(); - if ((LegalOperations || LN0->isVolatile() || VT.isVector()) && + if ((LegalOperations || !LN0->isSimple() || + VT.isVector()) && !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT)) return SDValue(); @@ -9436,7 +9313,7 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, if (!ISD::isNON_EXTLoad(N0.getNode()) || !ISD::isUNINDEXEDLoad(N0.getNode()) || ((LegalOperations || VT.isVector() || - cast(N0)->isVolatile()) && + !cast(N0)->isSimple()) && !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))) return {}; @@ -9468,6 +9345,35 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, return SDValue(N, 0); // Return N so it doesn't get rechecked! } +static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, + const TargetLowering &TLI, EVT VT, + SDNode *N, SDValue N0, + ISD::LoadExtType ExtLoadType, + ISD::NodeType ExtOpc) { + if (!N0.hasOneUse()) + return SDValue(); + + MaskedLoadSDNode *Ld = dyn_cast(N0); + if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD) + return SDValue(); + + if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0))) + return SDValue(); + + if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0))) + return SDValue(); + + SDLoc dl(Ld); + SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru()); + SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(), + Ld->getBasePtr(), Ld->getMask(), + PassThru, Ld->getMemoryVT(), + Ld->getMemOperand(), ExtLoadType, + Ld->isExpandingLoad()); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1)); + return NewLoad; +} + static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations) { assert((N->getOpcode() == ISD::SIGN_EXTEND || @@ -9568,6 +9474,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { ISD::SEXTLOAD, ISD::SIGN_EXTEND)) return foldedExt; + if (SDValue foldedExt = + tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD, + ISD::SIGN_EXTEND)) + return foldedExt; + // fold (sext (load x)) to multiple smaller sextloads. // Only on illegal but splittable vectors. if (SDValue ExtLoad = CombineExtLoad(N)) @@ -9856,6 +9767,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { ISD::ZEXTLOAD, ISD::ZERO_EXTEND)) return foldedExt; + if (SDValue foldedExt = + tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD, + ISD::ZERO_EXTEND)) + return foldedExt; + // fold (zext (load x)) to multiple smaller zextloads. // Only on illegal but splittable vectors. if (SDValue ExtLoad = CombineExtLoad(N)) @@ -10340,7 +10256,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { return SDValue(); LoadSDNode *LN0 = cast(N0); - if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) + // Reducing the width of a volatile load is illegal. For atomics, we may be + // able to reduce the width provided we never widen again. (see D66309) + if (!LN0->isSimple() || + !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) return SDValue(); auto AdjustBigEndianShift = [&](unsigned ShAmt) { @@ -10369,11 +10288,11 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue Load; if (ExtType == ISD::NON_EXTLOAD) - Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, + Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); else - Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr, + Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, NewAlign, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); @@ -10392,7 +10311,6 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // no larger than the source) then the useful bits of the result are // zero; we can't simply return the shortened shift, because the result // of that operation is undefined. - SDLoc DL(N0); if (ShLeftAmt >= VT.getSizeInBits()) Result = DAG.getConstant(0, DL, VT); else @@ -10513,7 +10431,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && EVT == cast(N0)->getMemoryVT() && - ((!LegalOperations && !cast(N0)->isVolatile() && + ((!LegalOperations && cast(N0)->isSimple() && N0.hasOneUse()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast(N0); @@ -10530,7 +10448,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse() && EVT == cast(N0)->getMemoryVT() && - ((!LegalOperations && !cast(N0)->isVolatile()) || + ((!LegalOperations && cast(N0)->isSimple()) && TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, @@ -10757,7 +10675,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // after truncation. if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { LoadSDNode *LN0 = cast(N0); - if (!LN0->isVolatile() && + if (LN0->isSimple() && LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), @@ -11051,7 +10969,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // memory accesses. We don't care if the original type was legal or not // as we assume software couldn't rely on the number of accesses of an // illegal type. - ((!LegalOperations && !cast(N0)->isVolatile()) || + ((!LegalOperations && cast(N0)->isSimple()) || TLI.isOperationLegal(ISD::LOAD, VT))) { LoadSDNode *LN0 = cast(N0); @@ -11237,15 +11155,10 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { for (int i = 0; i != MaskScale; ++i) NewMask.push_back(M < 0 ? -1 : M * MaskScale + i); - bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); - if (!LegalMask) { - std::swap(SV0, SV1); - ShuffleVectorSDNode::commuteMask(NewMask); - LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); - } - - if (LegalMask) - return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask); + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG); + if (LegalShuffle) + return LegalShuffle; } return SDValue(); @@ -11998,7 +11911,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math) ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true); if (N1C && N1C->isZero()) - if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros()) + if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -12006,17 +11919,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2) - return DAG.getNode(ISD::FSUB, DL, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), Flags); + TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize) == 2) + return DAG.getNode( + ISD::FSUB, DL, VT, N0, + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2) - return DAG.getNode(ISD::FSUB, DL, VT, N1, - GetNegatedExpression(N0, DAG, LegalOperations, - ForCodeSize), Flags); + TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize) == 2) + return DAG.getNode( + ISD::FSUB, DL, VT, N1, + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags); auto isFMulNegTwo = [](SDValue FMul) { if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL) @@ -12056,7 +11969,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // If 'unsafe math' or reassoc and nsz, fold lots of things. // TODO: break out portions of the transformations below for which Unsafe is // considered and which do not require both nsz and reassoc - if ((Options.UnsafeFPMath || + if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && AllowNewConst) { // fadd (fadd x, c1), c2 -> fadd x, c1 + c2 @@ -12175,7 +12088,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // (fsub A, 0) -> A if (N1CFP && N1CFP->isZero()) { - if (!N1CFP->isNegative() || Options.UnsafeFPMath || + if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) { return N0; } @@ -12195,16 +12108,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N0CFP && N0CFP->isZero()) { if (N0CFP->isNegative() || (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { - if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize)) - return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize)) + return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); } } - if ((Options.UnsafeFPMath || - (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) - && N1.getOpcode() == ISD::FADD) { + if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || + (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && + N1.getOpcode() == ISD::FADD) { // X - (X + Y) -> -Y if (N0 == N1->getOperand(0)) return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags); @@ -12214,10 +12127,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // fold (fsub A, (fneg B)) -> (fadd A, B) - if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize)) - return DAG.getNode(ISD::FADD, DL, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), Flags); + if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize)) + return DAG.getNode( + ISD::FADD, DL, VT, N0, + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); // FSUB -> FMA combines: if (SDValue Fused = visitFSUBForFMACombine(N)) { @@ -12228,6 +12141,21 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { return SDValue(); } +/// Return true if both inputs are at least as cheap in negated form and at +/// least one input is strictly cheaper in negated form. +bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) { + if (char LHSNeg = + TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize)) + if (char RHSNeg = + TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize)) + // Both negated operands are at least as cheap as their counterparts. + // Check to see if at least one is cheaper negated. + if (LHSNeg == 2 || RHSNeg == 2) + return true; + + return false; +} + SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -12254,10 +12182,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags); - // fold (fmul A, 1.0) -> A - if (N1CFP && N1CFP->isExactlyValue(1.0)) - return N0; - if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -12302,21 +12226,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N0); - // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options, - ForCodeSize)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options, - ForCodeSize)) { - // Both can be negated for free, check to see if at least one is cheaper - // negated. - if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FMUL, DL, VT, - GetNegatedExpression(N0, DAG, LegalOperations, - ForCodeSize), - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), - Flags); - } + // -N0 * -N1 --> N0 * N1 + if (isCheaperToUseNegatedFPOps(N0, N1)) { + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags); } // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) @@ -12395,6 +12311,15 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2); } + // (-N0 * -N1) + N2 --> (N0 * N1) + N2 + if (isCheaperToUseNegatedFPOps(N0, N1)) { + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags); + } + if (UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) return N2; @@ -12602,9 +12527,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { - if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); - } } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), @@ -12645,28 +12569,16 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } // Fold into a reciprocal estimate and multiply instead of a real divide. - if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) { - AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); - } + if (SDValue RV = BuildDivEstimate(N0, N1, Flags)) + return RV; } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options, - ForCodeSize)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options, - ForCodeSize)) { - // Both can be negated for free, check to see if at least one is cheaper - // negated. - if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FDIV, SDLoc(N), VT, - GetNegatedExpression(N0, DAG, LegalOperations, - ForCodeSize), - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), - Flags); - } - } + if (isCheaperToUseNegatedFPOps(N0, N1)) + return DAG.getNode( + ISD::FDIV, SDLoc(N), VT, + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); return SDValue(); } @@ -13112,22 +13024,6 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { - SDValue N0 = N->getOperand(0); - EVT VT = N->getValueType(0); - EVT EVT = cast(N->getOperand(1))->getVT(); - ConstantFPSDNode *N0CFP = dyn_cast(N0); - - // fold (fp_round_inreg c1fp) -> c1fp - if (N0CFP && isTypeLegal(EVT)) { - SDLoc DL(N); - SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT); - return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round); - } - - return SDValue(); -} - SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -13236,9 +13132,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); - if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), - &DAG.getTarget().Options, ForCodeSize)) - return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize)) + return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading // constant pool values. @@ -14004,11 +13899,12 @@ bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) { } SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { - if (OptLevel == CodeGenOpt::None || LD->isVolatile()) + if (OptLevel == CodeGenOpt::None || !LD->isSimple()) return SDValue(); SDValue Chain = LD->getOperand(0); StoreSDNode *ST = dyn_cast(Chain.getNode()); - if (!ST || ST->isVolatile()) + // TODO: Relax this restriction for unordered atomics (see D66309) + if (!ST || !ST->isSimple()) return SDValue(); EVT LDType = LD->getValueType(0); @@ -14107,7 +14003,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // If load is not volatile and there are no uses of the loaded value (and // the updated indexed value in case of indexed loads), change uses of the // chain value into uses of the chain input (i.e. delete the dead load). - if (!LD->isVolatile()) { + // TODO: Allow this for unordered atomics (see D66309) + if (LD->isSimple()) { if (N->getValueType(1) == MVT::Other) { // Unindexed loads. if (!N->hasAnyUseOfValue(0)) { @@ -14241,7 +14138,7 @@ struct LoadedSlice { /// Helper structure used to compute the cost of a slice. struct Cost { /// Are we optimizing for code size. - bool ForCodeSize; + bool ForCodeSize = false; /// Various cost. unsigned Loads = 0; @@ -14250,10 +14147,10 @@ struct LoadedSlice { unsigned ZExts = 0; unsigned Shift = 0; - Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {} + explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {} /// Get the cost of one isolated slice. - Cost(const LoadedSlice &LS, bool ForCodeSize = false) + Cost(const LoadedSlice &LS, bool ForCodeSize) : ForCodeSize(ForCodeSize), Loads(1) { EVT TruncType = LS.Inst->getValueType(0); EVT LoadedType = LS.getLoadedType(); @@ -14678,7 +14575,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { return false; LoadSDNode *LD = cast(N); - if (LD->isVolatile() || !ISD::isNormalLoad(LD) || + if (!LD->isSimple() || !ISD::isNormalLoad(LD) || !LD->getValueType(0).isInteger()) return false; @@ -14829,13 +14726,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { else if (Chain->getOpcode() == ISD::TokenFactor && SDValue(LD, 1).hasOneUse()) { // LD has only 1 chain use so they are no indirect dependencies. - bool isOk = false; - for (const SDValue &ChainOp : Chain->op_values()) - if (ChainOp.getNode() == LD) { - isOk = true; - break; - } - if (!isOk) + if (!LD->isOperandOf(Chain.getNode())) return Result; } else return Result; // Fail. @@ -14848,7 +14739,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { /// Check to see if IVal is something that provides a value as specified by /// MaskInfo. If so, replace the specified store with a narrower store of /// truncated IVal. -static SDNode * +static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC) { @@ -14860,14 +14751,19 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, // that uses this. If not, this is not a replacement. APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), ByteShift*8, (ByteShift+NumBytes)*8); - if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; + if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue(); // Check that it is legal on the target to do this. It is legal if the new // VT we're shrinking to (i8/i16/i32) is legal or we're still before type - // legalization. - MVT VT = MVT::getIntegerVT(NumBytes*8); + // legalization (and the target doesn't explicitly think this is a bad idea). + MVT VT = MVT::getIntegerVT(NumBytes * 8); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!DC->isTypeLegal(VT)) - return nullptr; + return SDValue(); + if (St->getMemOperand() && + !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + *St->getMemOperand())) + return SDValue(); // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. @@ -14901,8 +14797,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, ++OpsNarrowed; return DAG .getStore(St->getChain(), SDLoc(St), IVal, Ptr, - St->getPointerInfo().getWithOffset(StOffset), NewAlign) - .getNode(); + St->getPointerInfo().getWithOffset(StOffset), NewAlign); } /// Look for sequence of load / op / store where op is one of 'or', 'xor', and @@ -14911,7 +14806,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, /// or code size. SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { StoreSDNode *ST = cast(N); - if (ST->isVolatile()) + if (!ST->isSimple()) return SDValue(); SDValue Chain = ST->getChain(); @@ -14933,16 +14828,16 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { std::pair MaskedLoad; MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); if (MaskedLoad.first) - if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, Value.getOperand(1), ST,this)) - return SDValue(NewST, 0); + return NewST; // Or is commutative, so try swapping X and Y. MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); if (MaskedLoad.first) - if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, Value.getOperand(0), ST,this)) - return SDValue(NewST, 0); + return NewST; } if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || @@ -15367,14 +15262,16 @@ void DAGCombiner::getStoreMergeCandidates( // Loads must only have one use. if (!Ld->hasNUsesOfValue(1, 0)) return; - // The memory operands must not be volatile/indexed. - if (Ld->isVolatile() || Ld->isIndexed()) + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!Ld->isSimple() || Ld->isIndexed()) return; } auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, int64_t &Offset) -> bool { - // The memory operands must not be volatile/indexed. - if (Other->isVolatile() || Other->isIndexed()) + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!Other->isSimple() || Other->isIndexed()) return false; // Don't mix temporal stores with non-temporal stores. if (St->isNonTemporal() != Other->isNonTemporal()) @@ -15394,8 +15291,10 @@ void DAGCombiner::getStoreMergeCandidates( // Loads must only have one use. if (!OtherLd->hasNUsesOfValue(1, 0)) return false; - // The memory operands must not be volatile/indexed. - if (OtherLd->isVolatile() || OtherLd->isIndexed()) + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!OtherLd->isSimple() || + OtherLd->isIndexed()) return false; // Don't mix temporal loads with non-temporal loads. if (cast(Val)->isNonTemporal() != OtherLd->isNonTemporal()) @@ -15425,6 +15324,18 @@ void DAGCombiner::getStoreMergeCandidates( return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); }; + // Check if the pair of StoreNode and the RootNode already bail out many + // times which is over the limit in dependence check. + auto OverLimitInDependenceCheck = [&](SDNode *StoreNode, + SDNode *RootNode) -> bool { + auto RootCount = StoreRootCountMap.find(StoreNode); + if (RootCount != StoreRootCountMap.end() && + RootCount->second.first == RootNode && + RootCount->second.second > StoreMergeDependenceLimit) + return true; + return false; + }; + // We looking for a root node which is an ancestor to all mergable // stores. We search up through a load, to our root and then down // through all children. For instance we will find Store{1,2,3} if @@ -15454,7 +15365,8 @@ void DAGCombiner::getStoreMergeCandidates( if (StoreSDNode *OtherST = dyn_cast(*I2)) { BaseIndexOffset Ptr; int64_t PtrDiff; - if (CandidateMatch(OtherST, Ptr, PtrDiff)) + if (CandidateMatch(OtherST, Ptr, PtrDiff) && + !OverLimitInDependenceCheck(OtherST, RootNode)) StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); } } else @@ -15464,7 +15376,8 @@ void DAGCombiner::getStoreMergeCandidates( if (StoreSDNode *OtherST = dyn_cast(*I)) { BaseIndexOffset Ptr; int64_t PtrDiff; - if (CandidateMatch(OtherST, Ptr, PtrDiff)) + if (CandidateMatch(OtherST, Ptr, PtrDiff) && + !OverLimitInDependenceCheck(OtherST, RootNode)) StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); } } @@ -15522,13 +15435,24 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( // Search through DAG. We can stop early if we find a store node. for (unsigned i = 0; i < NumStores; ++i) if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist, - Max)) + Max)) { + // If the searching bail out, record the StoreNode and RootNode in the + // StoreRootCountMap. If we have seen the pair many times over a limit, + // we won't add the StoreNode into StoreNodes set again. + if (Visited.size() >= Max) { + auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode]; + if (RootCount.first == RootNode) + RootCount.second++; + else + RootCount = {RootNode, 1}; + } return false; + } return true; } bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { - if (OptLevel == CodeGenOpt::None) + if (OptLevel == CodeGenOpt::None || !EnableStoreMerging) return false; EVT MemVT = St->getMemoryVT(); @@ -15588,7 +15512,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { bool RV = false; while (StoreNodes.size() > 1) { - unsigned StartIdx = 0; + size_t StartIdx = 0; while ((StartIdx + 1 < StoreNodes.size()) && StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes != StoreNodes[StartIdx + 1].OffsetFromBase) @@ -16113,7 +16037,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { case MVT::ppcf128: return SDValue(); case MVT::f32: - if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || + if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { ; Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). @@ -16125,7 +16049,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { return SDValue(); case MVT::f64: if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && - !ST->isVolatile()) || + ST->isSimple()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { ; Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). @@ -16134,7 +16058,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { Ptr, ST->getMemOperand()); } - if (!ST->isVolatile() && + if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { // Many FP stores are not made apparent until after legalize, e.g. for // argument passing. Since this is so common, custom legalize the @@ -16181,7 +16105,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // memory accesses. We don't care if the original type was legal or not // as we assume software couldn't rely on the number of accesses of an // illegal type. - if (((!LegalOperations && !ST->isVolatile()) || + // TODO: May be able to relax for unordered atomics (see D66309) + if (((!LegalOperations && ST->isSimple()) || TLI.isOperationLegal(ISD::STORE, SVT)) && TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT, DAG, *ST->getMemOperand())) { @@ -16242,9 +16167,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // See if we can simplify the input to this truncstore with knowledge that // only the low bits are being used. For example: // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" - SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits); AddToWorklist(Value.getNode()); - if (Shorter) + if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits)) return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(), ST->getMemOperand()); @@ -16263,9 +16187,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // If this is a load followed by a store to the same location, then the store // is dead/noop. + // TODO: Can relax for unordered atomics (see D66309) if (LoadSDNode *Ld = dyn_cast(Value)) { if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && - ST->isUnindexed() && !ST->isVolatile() && + ST->isUnindexed() && ST->isSimple() && // There can't be any side effects between the load and store, such as // a call or store. Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { @@ -16274,9 +16199,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } } + // TODO: Can relax for unordered atomics (see D66309) if (StoreSDNode *ST1 = dyn_cast(Chain)) { - if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() && - !ST1->isVolatile()) { + if (ST->isUnindexed() && ST->isSimple() && + ST1->isUnindexed() && ST1->isSimple()) { if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT()) { // If this is a store followed by a store with the same value to the @@ -16405,7 +16331,8 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { break; case ISD::STORE: { StoreSDNode *ST = dyn_cast(Chain); - if (ST->isVolatile() || ST->isIndexed()) + // TODO: Can relax for unordered atomics (see D66309) + if (!ST->isSimple() || ST->isIndexed()) continue; const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG); // If we store purely within object bounds just before its lifetime ends, @@ -16456,6 +16383,11 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { if (OptLevel == CodeGenOpt::None) return SDValue(); + // Can't change the number of memory accesses for a volatile store or break + // atomicity for an atomic one. + if (!ST->isSimple()) + return SDValue(); + SDValue Val = ST->getValue(); SDLoc DL(ST); @@ -16531,12 +16463,52 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { } /// Convert a disguised subvector insertion into a shuffle: -/// insert_vector_elt V, (bitcast X from vector type), IdxC --> -/// bitcast(shuffle (bitcast V), (extended X), Mask) -/// Note: We do not use an insert_subvector node because that requires a legal -/// subvector type. SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { SDValue InsertVal = N->getOperand(1); + SDValue Vec = N->getOperand(0); + + // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex) + // --> (vector_shuffle X, Y) + if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() && + InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isa(InsertVal.getOperand(1))) { + ShuffleVectorSDNode *SVN = cast(Vec.getNode()); + ArrayRef Mask = SVN->getMask(); + + SDValue X = Vec.getOperand(0); + SDValue Y = Vec.getOperand(1); + + // Vec's operand 0 is using indices from 0 to N-1 and + // operand 1 from N to 2N - 1, where N is the number of + // elements in the vectors. + int XOffset = -1; + if (InsertVal.getOperand(0) == X) { + XOffset = 0; + } else if (InsertVal.getOperand(0) == Y) { + XOffset = X.getValueType().getVectorNumElements(); + } + + if (XOffset != -1) { + SmallVector NewMask(Mask.begin(), Mask.end()); + + auto *ExtrIndex = cast(InsertVal.getOperand(1)); + NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue(); + assert(NewMask[InsIndex] < + (int)(2 * Vec.getValueType().getVectorNumElements()) && + NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"); + + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X, + Y, NewMask, DAG); + if (LegalShuffle) + return LegalShuffle; + } + } + + // insert_vector_elt V, (bitcast X from vector type), IdxC --> + // bitcast(shuffle (bitcast V), (extended X), Mask) + // Note: We do not use an insert_subvector node because that requires a + // legal subvector type. if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() || !InsertVal.getOperand(0).getValueType().isVector()) return SDValue(); @@ -16674,7 +16646,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { - assert(!OriginalLoad->isVolatile()); + assert(OriginalLoad->isSimple()); EVT ResultVT = EVE->getValueType(0); EVT VecEltVT = InVecVT.getVectorElementType(); @@ -16747,12 +16719,12 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; SDValue To[] = { Load, Chain }; DAG.ReplaceAllUsesOfValuesWith(From, To, 2); - // Since we're explicitly calling ReplaceAllUses, add the new node to the - // worklist explicitly as well. - AddToWorklist(Load.getNode()); - AddUsersToWorklist(Load.getNode()); // Add users too // Make sure to revisit this node to clean it up; it will usually be dead. AddToWorklist(EVE); + // Since we're explicitly calling ReplaceAllUses, add the new node to the + // worklist explicitly as well. + AddUsersToWorklist(Load.getNode()); // Add users too + AddToWorklist(Load.getNode()); ++OpsNarrowed; return SDValue(EVE, 0); } @@ -16982,7 +16954,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { ISD::isNormalLoad(VecOp.getNode()) && !Index->hasPredecessor(VecOp.getNode())) { auto *VecLoad = dyn_cast(VecOp); - if (VecLoad && !VecLoad->isVolatile()) + if (VecLoad && VecLoad->isSimple()) return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad); } @@ -17041,7 +17013,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Make sure we found a non-volatile load and the extractelement is // the only use. - if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) + if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple()) return SDValue(); // If Idx was -1 above, Elt is going to be -1, so just return undef. @@ -17344,17 +17316,16 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) { // the shuffle mask with -1. } - // Turn this into a shuffle with zero if that's legal. - EVT VecVT = Extract.getOperand(0).getValueType(); - if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT)) - return SDValue(); - // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... --> // bitcast (shuffle V, ZeroVec, VectorMask) SDLoc DL(BV); + EVT VecVT = Extract.getOperand(0).getValueType(); SDValue ZeroVec = DAG.getConstant(0, DL, VecVT); - SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec, - ShufMask); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0), + ZeroVec, ShufMask, DAG); + if (!Shuf) + return SDValue(); return DAG.getBitcast(VT, Shuf); } @@ -17656,6 +17627,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } } + // A splat of a single element is a SPLAT_VECTOR if supported on the target. + if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand) + if (SDValue V = cast(N)->getSplatValue()) { + assert(!V.isUndef() && "Splat of undef should have been handled earlier"); + return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V); + } + // Check if we can express BUILD VECTOR via subvector extract. if (!LegalTypes && (N->getNumOperands() > 1)) { SDValue Op0 = N->getOperand(0); @@ -17829,11 +17807,9 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { } } - if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT)) - return SDValue(); - - return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), - DAG.getBitcast(VT, SV1), Mask); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), + DAG.getBitcast(VT, SV1), Mask, DAG); } SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { @@ -17853,6 +17829,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { SDValue In = N->getOperand(0); assert(In.getValueType().isVector() && "Must concat vectors"); + // If the input is a concat_vectors, just make a larger concat by padding + // with smaller undefs. + if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) { + unsigned NumOps = N->getNumOperands() * In.getNumOperands(); + SmallVector Ops(In->op_begin(), In->op_end()); + Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType())); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); + } + SDValue Scalar = peekThroughOneUseBitcasts(In); // concat_vectors(scalar_to_vector(scalar), undef) -> @@ -18002,6 +17987,23 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return SDValue(); } +// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find +// if the subvector can be sourced for free. +static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) { + if (V.getOpcode() == ISD::INSERT_SUBVECTOR && + V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) { + return V.getOperand(1); + } + auto *IndexC = dyn_cast(Index); + if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS && + V.getOperand(0).getValueType() == SubVT && + (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) { + uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements(); + return V.getOperand(SubIdx); + } + return SDValue(); +} + static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -18010,39 +18012,29 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1) return SDValue(); + EVT VecVT = BinOp.getValueType(); SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1); - SDValue Index = Extract->getOperand(1); - EVT VT = Extract->getValueType(0); - - // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find - // if the source subvector is the same type as the one being extracted. - auto GetSubVector = [VT, Index](SDValue V) -> SDValue { - if (V.getOpcode() == ISD::INSERT_SUBVECTOR && - V.getOperand(1).getValueType() == VT && V.getOperand(2) == Index) { - return V.getOperand(1); - } - auto *IndexC = dyn_cast(Index); - if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS && - V.getOperand(0).getValueType() == VT && - (IndexC->getZExtValue() % VT.getVectorNumElements()) == 0) { - uint64_t SubIdx = IndexC->getZExtValue() / VT.getVectorNumElements(); - return V.getOperand(SubIdx); - } + if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType()) return SDValue(); - }; - SDValue Sub0 = GetSubVector(Bop0); - SDValue Sub1 = GetSubVector(Bop1); + + SDValue Index = Extract->getOperand(1); + EVT SubVT = Extract->getValueType(0); + if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT)) + return SDValue(); + + SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT); + SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT); // TODO: We could handle the case where only 1 operand is being inserted by // creating an extract of the other operand, but that requires checking // number of uses and/or costs. - if (!Sub0 || !Sub1 || !TLI.isOperationLegalOrCustom(BinOpcode, VT)) + if (!Sub0 || !Sub1) return SDValue(); // We are inserting both operands of the wide binop only to extract back // to the narrow vector size. Eliminate all of the insert/extract: // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y - return DAG.getNode(BinOpcode, SDLoc(Extract), VT, Sub0, Sub1, + return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1, BinOp->getFlags()); } @@ -18174,7 +18166,8 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { auto *Ld = dyn_cast(Extract->getOperand(0)); auto *ExtIdx = dyn_cast(Extract->getOperand(1)); - if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx) + if (!Ld || Ld->getExtensionType() || !Ld->isSimple() || + !ExtIdx) return SDValue(); // Allow targets to opt-out. @@ -18878,7 +18871,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // build_vector. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { int SplatIndex = SVN->getSplatIndex(); - if (TLI.isExtractVecEltCheap(VT, SplatIndex) && + if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) && TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) { // splat (vector_bo L, R), Index --> // splat (scalar_bo (extelt L, Index), (extelt R, Index)) @@ -19153,22 +19146,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SV1 = DAG.getUNDEF(VT); // Avoid introducing shuffles with illegal mask. - if (!TLI.isShuffleMaskLegal(Mask, VT)) { - ShuffleVectorSDNode::commuteMask(Mask); - - if (!TLI.isShuffleMaskLegal(Mask, VT)) - return SDValue(); - - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) - std::swap(SV0, SV1); - } - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) - return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask); + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) + return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG); } if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG)) @@ -19191,35 +19175,35 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { SmallVector NewMask(InVecT.getVectorNumElements(), -1); int Elt = C0->getZExtValue(); NewMask[0] = Elt; - SDValue Val; // If we have an implict truncate do truncate here as long as it's legal. // if it's not legal, this should if (VT.getScalarType() != InVal.getValueType() && InVal.getValueType().isScalarInteger() && isTypeLegal(VT.getScalarType())) { - Val = + SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal); return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val); } if (VT.getScalarType() == InVecT.getScalarType() && - VT.getVectorNumElements() <= InVecT.getVectorNumElements() && - TLI.isShuffleMaskLegal(NewMask, VT)) { - Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec, - DAG.getUNDEF(InVecT), NewMask); - // If the initial vector is the correct size this shuffle is a - // valid result. - if (VT == InVecT) - return Val; - // If not we must truncate the vector. - if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { - MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); - SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy); - EVT SubVT = - EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), - VT.getVectorNumElements()); - Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val, - ZeroIdx); - return Val; + VT.getVectorNumElements() <= InVecT.getVectorNumElements()) { + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec, + DAG.getUNDEF(InVecT), NewMask, DAG); + if (LegalShuffle) { + // If the initial vector is the correct size this shuffle is a + // valid result. + if (VT == InVecT) + return LegalShuffle; + // If not we must truncate the vector. + if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy); + EVT SubVT = + EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), + VT.getVectorNumElements()); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, + LegalShuffle, ZeroIdx); + } } } } @@ -19627,6 +19611,39 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } } + // Make sure all but the first op are undef or constant. + auto ConcatWithConstantOrUndef = [](SDValue Concat) { + return Concat.getOpcode() == ISD::CONCAT_VECTORS && + std::all_of(std::next(Concat->op_begin()), Concat->op_end(), + [](const SDValue &Op) { + return Op.isUndef() || + ISD::isBuildVectorOfConstantSDNodes(Op.getNode()); + }); + }; + + // The following pattern is likely to emerge with vector reduction ops. Moving + // the binary operation ahead of the concat may allow using a narrower vector + // instruction that has better performance than the wide version of the op: + // VBinOp (concat X, undef/constant), (concat Y, undef/constant) --> + // concat (VBinOp X, Y), VecC + if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) && + (LHS.hasOneUse() || RHS.hasOneUse())) { + EVT NarrowVT = LHS.getOperand(0).getValueType(); + if (NarrowVT == RHS.getOperand(0).getValueType() && + TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) { + SDLoc DL(N); + unsigned NumOperands = LHS.getNumOperands(); + SmallVector ConcatOps; + for (unsigned i = 0; i != NumOperands; ++i) { + // This constant fold for operands 1 and up. + ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i), + RHS.getOperand(i))); + } + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); + } + } + if (SDValue V = scalarizeBinOpOfSplats(N, DAG)) return V; @@ -19723,7 +19740,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // Token chains must be identical. if (LHS.getOperand(0) != RHS.getOperand(0) || // Do not let this transformation reduce the number of volatile loads. - LLD->isVolatile() || RLD->isVolatile() || + // Be conservative for atomics for the moment + // TODO: This does appear to be legal for unordered atomics (see D66309) + !LLD->isSimple() || !RLD->isSimple() || // FIXME: If either is a pre/post inc/dec load, // we'd need to split out the address adjustment. LLD->isIndexed() || RLD->isIndexed() || @@ -19928,7 +19947,7 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset( const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC) { - if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint())) + if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType())) return SDValue(); // If we are before legalize types, we want the other legalization to happen @@ -20016,8 +20035,13 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, // when the condition can be materialized as an all-ones register. Any // single bit-test can be materialized as an all-ones register with // shift-left and shift-right-arith. + // TODO: The operation legality checks could be loosened to include "custom", + // but that may cause regressions for targets that do not have shift + // instructions. if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && - N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) { + N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2) && + TLI.isOperationLegal(ISD::SHL, VT) && + TLI.isOperationLegal(ISD::SRA, VT)) { SDValue AndLHS = N0->getOperand(0); auto *ConstAndRHS = dyn_cast(N0->getOperand(1)); if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { @@ -20209,7 +20233,10 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { /// => /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form /// does not require additional intermediate precision] -SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) { +/// For the last iteration, put numerator N into it to gain more precision: +/// Result = N X_i + X_i (N - N A X_i) +SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, + SDNodeFlags Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -20230,25 +20257,39 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) { if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) { AddToWorklist(Est.getNode()); + SDLoc DL(Op); if (Iterations) { - SDLoc DL(Op); SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); - // Newton iterations: Est = Est + Est (1 - Arg * Est) + // Newton iterations: Est = Est + Est (N - Arg * Est) + // If this is the last iteration, also multiply by the numerator. for (int i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); + SDValue MulEst = Est; + + if (i == Iterations - 1) { + MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags); + AddToWorklist(MulEst.getNode()); + } + + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, + (i == Iterations - 1 ? N : FPOne), NewEst, Flags); AddToWorklist(NewEst.getNode()); NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags); + Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags); AddToWorklist(Est.getNode()); } + } else { + // If no iterations are available, multiply with N. + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags); + AddToWorklist(Est.getNode()); } + return Est; } @@ -20271,31 +20312,19 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); - AddToWorklist(HalfArg.getNode()); - HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); - AddToWorklist(HalfArg.getNode()); // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); - AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); - AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); - AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); - AddToWorklist(Est.getNode()); } // If non-reciprocal square root is requested, multiply the result by Arg. - if (!Reciprocal) { + if (!Reciprocal) Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); - AddToWorklist(Est.getNode()); - } return Est; } @@ -20321,13 +20350,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, // E = (E * -0.5) * ((A * E) * E + -3.0) for (unsigned i = 0; i < Iterations; ++i) { SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); - AddToWorklist(AE.getNode()); - SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); - AddToWorklist(AEE.getNode()); - SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); - AddToWorklist(RHS.getNode()); // When calculating a square root at the last iteration build: // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) @@ -20340,10 +20364,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, // SQRT: LHS = (A * E) * -0.5 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); } - AddToWorklist(LHS.getNode()); Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); - AddToWorklist(Est.getNode()); } return Est; @@ -20400,16 +20422,11 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est); - AddToWorklist(Fabs.getNode()); - AddToWorklist(IsDenorm.getNode()); - AddToWorklist(Est.getNode()); } else { // X == 0.0 ? 0.0 : Est SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est); - AddToWorklist(IsZero.getNode()); - AddToWorklist(Est.getNode()); } } } @@ -20432,6 +20449,7 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { struct MemUseCharacteristics { bool IsVolatile; + bool IsAtomic; SDValue BasePtr; int64_t Offset; Optional NumBytes; @@ -20447,18 +20465,20 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { : (LSN->getAddressingMode() == ISD::PRE_DEC) ? -1 * C->getSExtValue() : 0; - return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/, + return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(), + Offset /*base offset*/, Optional(LSN->getMemoryVT().getStoreSize()), LSN->getMemOperand()}; } if (const auto *LN = cast(N)) - return {false /*isVolatile*/, LN->getOperand(1), + return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1), (LN->hasOffset()) ? LN->getOffset() : 0, (LN->hasOffset()) ? Optional(LN->getSize()) : Optional(), (MachineMemOperand *)nullptr}; // Default. - return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/, + return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(), + (int64_t)0 /*offset*/, Optional() /*size*/, (MachineMemOperand *)nullptr}; }; @@ -20474,6 +20494,11 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { if (MUC0.IsVolatile && MUC1.IsVolatile) return true; + // Be conservative about atomics for the moment + // TODO: This is way overconservative for unordered atomics (see D66309) + if (MUC0.IsAtomic && MUC1.IsAtomic) + return true; + if (MUC0.MMO && MUC1.MMO) { if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) || (MUC1.MMO->isInvariant() && MUC0.MMO->isStore())) @@ -20555,7 +20580,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallPtrSet Visited; // Visited node set. // Get alias information for node. - const bool IsLoad = isa(N) && !cast(N)->isVolatile(); + // TODO: relax aliasing for unordered atomics (see D66309) + const bool IsLoad = isa(N) && cast(N)->isSimple(); // Starting off. Chains.push_back(OriginalChain); @@ -20571,8 +20597,9 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, case ISD::LOAD: case ISD::STORE: { // Get alias information for C. + // TODO: Relax aliasing for unordered atomics (see D66309) bool IsOpLoad = isa(C.getNode()) && - !cast(C.getNode())->isVolatile(); + cast(C.getNode())->isSimple(); if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) { // Look further up the chain. C = C.getOperand(0); @@ -20727,7 +20754,8 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { // If the chain has more than one use, then we can't reorder the mem ops. if (!SDValue(Chain, 0)->hasOneUse()) break; - if (Chain->isVolatile() || Chain->isIndexed()) + // TODO: Relax for unordered atomics (see D66309) + if (!Chain->isSimple() || Chain->isIndexed()) break; // Find the base pointer and offset for this memory node. @@ -20795,11 +20823,11 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps); CombineTo(St, TF); - AddToWorklist(STChain); - // Add TF operands worklist in reverse order. - for (auto I = TF->getNumOperands(); I;) - AddToWorklist(TF->getOperand(--I).getNode()); + // Add TF and its operands to the worklist. AddToWorklist(TF.getNode()); + for (const SDValue &Op : TF->ops()) + AddToWorklist(Op.getNode()); + AddToWorklist(STChain); return true; } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 22c23ba877e..6d7260d7aee 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -174,7 +174,7 @@ static unsigned findSinkableLocalRegDef(MachineInstr &MI) { if (RegDef) return 0; RegDef = MO.getReg(); - } else if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + } else if (Register::isVirtualRegister(MO.getReg())) { // This is another use of a vreg. Don't try to sink it. return 0; } @@ -1213,14 +1213,13 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { if (!FrameAlign) FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL); Flags.setByValSize(FrameSize); - Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Arg.IsNest) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); - unsigned OriginalAlignment = DL.getABITypeAlignment(Arg.Ty); - Flags.setOrigAlign(OriginalAlignment); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty))); CLI.OutVals.push_back(Arg.Val); CLI.OutFlags.push_back(Flags); @@ -1237,8 +1236,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs); // Set labels for heapallocsite call. - if (CLI.CS && CLI.CS->getInstruction()->getMetadata("heapallocsite")) { - MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite"); + if (CLI.CS && CLI.CS->getInstruction()->hasMetadata("heapallocsite")) { + const MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite"); MF->addCodeViewHeapAllocSite(CLI.Call, MD); } @@ -1303,6 +1302,7 @@ bool FastISel::selectCall(const User *I) { ExtraInfo |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; + ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::INLINEASM)) @@ -1388,9 +1388,11 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { "Expected inlined-at fields to agree"); // A dbg.declare describes the address of a source variable, so lower it // into an indirect DBG_VALUE. + auto *Expr = DI->getExpression(); + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, - *Op, DI->getVariable(), DI->getExpression()); + TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ false, + *Op, DI->getVariable(), Expr); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. @@ -1414,19 +1416,19 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { if (CI->getBitWidth() > 64) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addCImm(CI) - .addImm(0U) + .addReg(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addImm(CI->getZExtValue()) - .addImm(0U) + .addReg(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); } else if (const auto *CF = dyn_cast(V)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addFPImm(CF) - .addImm(0U) + .addReg(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); } else if (unsigned Reg = lookUpRegForValue(V)) { @@ -1453,24 +1455,12 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel()); return true; } - case Intrinsic::objectsize: { - ConstantInt *CI = cast(II->getArgOperand(1)); - unsigned long long Res = CI->isZero() ? -1ULL : 0; - Constant *ResCI = ConstantInt::get(II->getType(), Res); - unsigned ResultReg = getRegForValue(ResCI); - if (!ResultReg) - return false; - updateValueMap(II, ResultReg); - return true; - } - case Intrinsic::is_constant: { - Constant *ResCI = ConstantInt::get(II->getType(), 0); - unsigned ResultReg = getRegForValue(ResCI); - if (!ResultReg) - return false; - updateValueMap(II, ResultReg); - return true; - } + case Intrinsic::objectsize: + llvm_unreachable("llvm.objectsize.* should have been lowered already"); + + case Intrinsic::is_constant: + llvm_unreachable("llvm.is.constant.* should have been lowered already"); + case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: case Intrinsic::expect: { @@ -1677,11 +1667,11 @@ bool FastISel::selectInstruction(const Instruction *I) { /// (fall-through) successor, and update the CFG. void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, const DebugLoc &DbgLoc) { - if (FuncInfo.MBB->getBasicBlock()->size() > 1 && + if (FuncInfo.MBB->getBasicBlock()->sizeWithoutDebug() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { - // For more accurate line information if this is the only instruction - // in the block then emit it, otherwise we have the unconditional - // fall-through case, which needs no instructions. + // For more accurate line information if this is the only non-debug + // instruction in the block then emit it, otherwise we have the + // unconditional fall-through case, which needs no instructions. } else { // The unconditional branch case. TII.insertBranch(*FuncInfo.MBB, MSucc, nullptr, @@ -2028,7 +2018,7 @@ unsigned FastISel::createResultReg(const TargetRegisterClass *RC) { unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op, unsigned OpNum) { - if (TargetRegisterInfo::isVirtualRegister(Op)) { + if (Register::isVirtualRegister(Op)) { const TargetRegisterClass *RegClass = TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); if (!MRI.constrainRegClass(Op, RegClass)) { @@ -2236,7 +2226,7 @@ unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode, unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); - assert(TargetRegisterInfo::isVirtualRegister(Op0) && + assert(Register::isVirtualRegister(Op0) && "Cannot yet extract from physregs"); const TargetRegisterClass *RC = MRI.getRegClass(Op0); MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); @@ -2417,10 +2407,9 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { } else return nullptr; - bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal) != nullptr; - bool IsInvariant = I->getMetadata(LLVMContext::MD_invariant_load) != nullptr; - bool IsDereferenceable = - I->getMetadata(LLVMContext::MD_dereferenceable) != nullptr; + bool IsNonTemporal = I->hasMetadata(LLVMContext::MD_nontemporal); + bool IsInvariant = I->hasMetadata(LLVMContext::MD_invariant_load); + bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable); const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range); AAMDNodes AAInfo; diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 8b1759246b7..cf6711adad4 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -424,7 +425,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { unsigned BitWidth = IntVT.getSizeInBits(); unsigned DestReg = ValueMap[PN]; - if (!TargetRegisterInfo::isVirtualRegister(DestReg)) + if (!Register::isVirtualRegister(DestReg)) return; LiveOutRegInfo.grow(DestReg); LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg]; @@ -445,7 +446,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { assert(ValueMap.count(V) && "V should have been placed in ValueMap when its" "CopyToReg node was created."); unsigned SrcReg = ValueMap[V]; - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (!Register::isVirtualRegister(SrcReg)) { DestLOI.IsValid = false; return; } @@ -480,7 +481,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { assert(ValueMap.count(V) && "V should have been placed in ValueMap when " "its CopyToReg node was created."); unsigned SrcReg = ValueMap[V]; - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (!Register::isVirtualRegister(SrcReg)) { DestLOI.IsValid = false; return; } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 9bc07d35dfc..c5095995ec2 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -71,7 +71,7 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses, if (isa(Node->getOperand(I - 1))) continue; if (RegisterSDNode *RN = dyn_cast(Node->getOperand(I - 1))) - if (TargetRegisterInfo::isPhysicalRegister(RN->getReg())) + if (Register::isPhysicalRegister(RN->getReg())) continue; NumImpUses = N - I; break; @@ -86,7 +86,7 @@ void InstrEmitter:: EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, unsigned SrcReg, DenseMap &VRBaseMap) { unsigned VRBase = 0; - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (Register::isVirtualRegister(SrcReg)) { // Just use the input register directly! SDValue Op(Node, ResNo); if (IsClone) @@ -114,7 +114,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == ResNo) { unsigned DestReg = cast(User->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + if (Register::isVirtualRegister(DestReg)) { VRBase = DestReg; Match = false; } else if (DestReg != SrcReg) @@ -139,7 +139,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, UseRC = RC; else if (RC) { const TargetRegisterClass *ComRC = - TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy); + TRI->getCommonSubClass(UseRC, RC); // If multiple uses expect disjoint register classes, we emit // copies in AddRegisterOperand. if (ComRC) @@ -219,7 +219,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. VRBase = cast(Node->getOperand(i-NumResults))->getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(VRBase)); + assert(Register::isPhysicalRegister(VRBase)); MIB.addReg(VRBase, RegState::Define); } @@ -229,7 +229,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == i) { unsigned Reg = cast(User->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { const TargetRegisterClass *RegRC = MRI->getRegClass(Reg); if (RegRC == RC) { VRBase = Reg; @@ -272,7 +272,7 @@ unsigned InstrEmitter::getVR(SDValue Op, // does not include operand register class info. const TargetRegisterClass *RC = TLI->getRegClassFor( Op.getSimpleValueType(), Op.getNode()->isDivergent()); - unsigned VReg = MRI->createVirtualRegister(RC); + Register VReg = MRI->createVirtualRegister(RC); BuildMI(*MBB, InsertPos, Op.getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), VReg); return VReg; @@ -319,7 +319,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, if (!ConstrainedRC) { OpRC = TRI->getAllocatableClass(OpRC); assert(OpRC && "Constraints cannot be fulfilled for allocation"); - unsigned NewVReg = MRI->createVirtualRegister(OpRC); + Register NewVReg = MRI->createVirtualRegister(OpRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); VReg = NewVReg; @@ -385,9 +385,8 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, (IIRC && TRI->isDivergentRegClass(IIRC))) : nullptr; - if (OpRC && IIRC && OpRC != IIRC && - TargetRegisterInfo::isVirtualRegister(VReg)) { - unsigned NewVReg = MRI->createVirtualRegister(IIRC); + if (OpRC && IIRC && OpRC != IIRC && Register::isVirtualRegister(VReg)) { + Register NewVReg = MRI->createVirtualRegister(IIRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); VReg = NewVReg; @@ -465,7 +464,7 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, // register instead. RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx); assert(RC && "No legal register class for VT supports that SubIdx"); - unsigned NewReg = MRI->createVirtualRegister(RC); + Register NewReg = MRI->createVirtualRegister(RC); BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg) .addReg(VReg); return NewReg; @@ -485,7 +484,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node) { unsigned DestReg = cast(User->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + if (Register::isVirtualRegister(DestReg)) { VRBase = DestReg; break; } @@ -503,7 +502,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, unsigned Reg; MachineInstr *DefMI; RegisterSDNode *R = dyn_cast(Node->getOperand(0)); - if (R && TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + if (R && Register::isPhysicalRegister(R->getReg())) { Reg = R->getReg(); DefMI = nullptr; } else { @@ -529,7 +528,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // Reg may not support a SubIdx sub-register, and we may need to // constrain its register class or issue a COPY to a compatible register // class. - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) Reg = ConstrainForSubReg(Reg, SubIdx, Node->getOperand(0).getSimpleValueType(), Node->isDivergent(), Node->getDebugLoc()); @@ -541,7 +540,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, MachineInstrBuilder CopyMI = BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) CopyMI.addReg(Reg, 0, SubIdx); else CopyMI.addReg(TRI->getSubReg(Reg, SubIdx)); @@ -614,7 +613,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, unsigned DstRCIdx = cast(Node->getOperand(1))->getZExtValue(); const TargetRegisterClass *DstRC = TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx)); - unsigned NewVReg = MRI->createVirtualRegister(DstRC); + Register NewVReg = MRI->createVirtualRegister(DstRC); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); @@ -631,7 +630,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, bool IsClone, bool IsCloned) { unsigned DstRCIdx = cast(Node->getOperand(0))->getZExtValue(); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); - unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); + Register NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg); unsigned NumOps = Node->getNumOperands(); @@ -649,7 +648,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, RegisterSDNode *R = dyn_cast(Node->getOperand(i-1)); // Skip physical registers as they don't have a vreg to get and we'll // insert copies for them in TwoAddressInstructionPass anyway. - if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + if (!R || !Register::isPhysicalRegister(R->getReg())) { unsigned SubIdx = cast(Op)->getZExtValue(); unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); @@ -678,7 +677,7 @@ MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, DenseMap &VRBaseMap) { MDNode *Var = SD->getVariable(); - MDNode *Expr = SD->getExpression(); + const DIExpression *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); assert(cast(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); @@ -702,12 +701,11 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, // EmitTargetCodeForFrameDebugValue is responsible for allocation. auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) .addFrameIndex(SD->getFrameIx()); + if (SD->isIndirect()) - // Push [fi + 0] onto the DIExpression stack. - FrameMI.addImm(0); - else - // Push fi onto the DIExpression stack. - FrameMI.addReg(0); + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); + + FrameMI.addReg(0); return FrameMI.addMetadata(Var).addMetadata(Expr); } // Otherwise, we're going to create an instruction here. @@ -753,9 +751,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, // Indirect addressing is indicated by an Imm as the second parameter. if (SD->isIndirect()) - MIB.addImm(0U); - else - MIB.addReg(0U, RegState::Debug); + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); + + MIB.addReg(0U, RegState::Debug); MIB.addMetadata(Var); MIB.addMetadata(Expr); @@ -928,12 +926,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // // Collect all the used physreg defs, and make sure that any unused physreg // defs are marked as dead. - SmallVector UsedRegs; + SmallVector UsedRegs; // Additional results must be physical register defs. if (HasPhysRegOuts) { for (unsigned i = NumDefs; i < NumResults; ++i) { - unsigned Reg = II.getImplicitDefs()[i - NumDefs]; + Register Reg = II.getImplicitDefs()[i - NumDefs]; if (!Node->hasAnyUseOfValue(i)) continue; // This implicitly defined physreg has a use. @@ -960,8 +958,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // direct RegisterSDNode operands. for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) if (RegisterSDNode *R = dyn_cast(F->getOperand(i))) { - unsigned Reg = R->getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = R->getReg(); + if (Reg.isPhysical()) UsedRegs.push_back(Reg); } } @@ -995,8 +993,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case ISD::CopyToReg: { unsigned DestReg = cast(Node->getOperand(1))->getReg(); SDValue SrcVal = Node->getOperand(2); - if (TargetRegisterInfo::isVirtualRegister(DestReg) && - SrcVal.isMachineOpcode() && + if (Register::isVirtualRegister(DestReg) && SrcVal.isMachineOpcode() && SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Instead building a COPY to that vreg destination, build an // IMPLICIT_DEF instruction instead. @@ -1093,16 +1090,18 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // FIXME: Add dead flags for physical and virtual registers defined. // For now, mark physical register defs as implicit to help fast // regalloc. This makes inline asm look a lot like calls. - MIB.addReg(Reg, RegState::Define | - getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); + MIB.addReg(Reg, + RegState::Define | + getImplRegState(Register::isPhysicalRegister(Reg))); } break; case InlineAsm::Kind_RegDefEarlyClobber: case InlineAsm::Kind_Clobber: for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast(Node->getOperand(i))->getReg(); - MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber | - getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); + MIB.addReg(Reg, + RegState::Define | RegState::EarlyClobber | + getImplRegState(Register::isPhysicalRegister(Reg))); ECRegs.push_back(Reg); } break; @@ -1136,7 +1135,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // then remove the early-clobber flag. for (unsigned Reg : ECRegs) { if (MIB->readsRegister(Reg, TRI)) { - MachineOperand *MO = + MachineOperand *MO = MIB->findRegisterDefOperand(Reg, false, false, TRI); assert(MO && "No def operand for clobbered register?"); MO->setIsEarlyClobber(false); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index bf817f00f83..f9fdf525240 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -161,6 +162,7 @@ private: SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, const SDLoc &dl, SDValue ChainIn); SDValue ExpandBUILD_VECTOR(SDNode *Node); + SDValue ExpandSPLAT_VECTOR(SDNode *Node); SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, SmallVectorImpl &Results); @@ -236,6 +238,16 @@ public: } ReplacedNode(Old); } + + void ReplaceNodeWithValue(SDValue Old, SDValue New) { + LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); + + DAG.ReplaceAllUsesOfValueWith(Old, New); + if (UpdatedNodes) + UpdatedNodes->insert(New.getNode()); + ReplacedNode(Old.getNode()); + } }; } // end anonymous namespace @@ -493,8 +505,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // expand it. EVT MemVT = ST->getMemoryVT(); const DataLayout &DL = DAG.getDataLayout(); - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, - *ST->getMemOperand())) { + if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT, + *ST->getMemOperand())) { LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n"); SDValue Result = TLI.expandUnalignedStore(ST, DAG); ReplaceNode(SDValue(ST, 0), Result); @@ -608,8 +620,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { EVT MemVT = ST->getMemoryVT(); // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, - *ST->getMemOperand())) { + if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT, + *ST->getMemOperand())) { SDValue Result = TLI.expandUnalignedStore(ST, DAG); ReplaceNode(SDValue(ST, 0), Result); } @@ -669,8 +681,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { const DataLayout &DL = DAG.getDataLayout(); // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, - *LD->getMemOperand())) { + if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT, + *LD->getMemOperand())) { std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG); } break; @@ -894,11 +906,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { if (SrcVT.getScalarType() == MVT::f16) { EVT ISrcVT = SrcVT.changeTypeToInteger(); EVT IDestVT = DestVT.changeTypeToInteger(); - EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT()); + EVT ILoadVT = TLI.getRegisterType(IDestVT.getSimpleVT()); - SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT, - Chain, Ptr, ISrcVT, - LD->getMemOperand()); + SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, ILoadVT, Chain, + Ptr, ISrcVT, LD->getMemOperand()); Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result); Chain = Result.getValue(1); break; @@ -959,15 +970,13 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { #ifndef NDEBUG for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == - TargetLowering::TypeLegal || - TLI.isTypeLegal(Node->getValueType(i))) && + assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == + TargetLowering::TypeLegal && "Unexpected illegal type!"); for (const SDValue &Op : Node->op_values()) assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == TargetLowering::TypeLegal || - TLI.isTypeLegal(Op.getValueType()) || Op.getOpcode() == ISD::TargetConstant || Op.getOpcode() == ISD::Register) && "Unexpected illegal type!"); @@ -1004,7 +1013,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); break; - case ISD::FP_ROUND_INREG: case ISD::SIGN_EXTEND_INREG: { EVT InnerType = cast(Node->getOperand(1))->getVT(); Action = TLI.getOperationAction(Node->getOpcode(), InnerType); @@ -1097,38 +1105,15 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; } break; - case ISD::STRICT_FADD: - case ISD::STRICT_FSUB: - case ISD::STRICT_FMUL: - case ISD::STRICT_FDIV: - case ISD::STRICT_FREM: - case ISD::STRICT_FSQRT: - case ISD::STRICT_FMA: - case ISD::STRICT_FPOW: - case ISD::STRICT_FPOWI: - case ISD::STRICT_FSIN: - case ISD::STRICT_FCOS: - case ISD::STRICT_FEXP: - case ISD::STRICT_FEXP2: - case ISD::STRICT_FLOG: - case ISD::STRICT_FLOG10: - case ISD::STRICT_FLOG2: - case ISD::STRICT_FRINT: - case ISD::STRICT_FNEARBYINT: - case ISD::STRICT_FMAXNUM: - case ISD::STRICT_FMINNUM: - case ISD::STRICT_FCEIL: - case ISD::STRICT_FFLOOR: - case ISD::STRICT_FROUND: - case ISD::STRICT_FTRUNC: - case ISD::STRICT_FP_ROUND: - case ISD::STRICT_FP_EXTEND: - // These pseudo-ops get legalized as if they were their non-strict - // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT - // is also legal, but if ISD::FSQRT requires expansion then so does - // ISD::STRICT_FSQRT. + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + // These pseudo-ops are the same as the other STRICT_ ops except + // they are registered with setOperationAction() using the input type + // instead of the output type. Action = TLI.getStrictFPOperationAction(Node->getOpcode(), - Node->getValueType(0)); + Node->getOperand(1).getValueType()); break; case ISD::SADDSAT: case ISD::UADDSAT: @@ -1139,7 +1124,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: { + case ISD::UMULFIX: + case ISD::UMULFIXSAT: { unsigned Scale = Node->getConstantOperandVal(2); Action = TLI.getFixedPointOperationAction(Node->getOpcode(), Node->getValueType(0), Scale); @@ -1650,7 +1636,6 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast(CC)->get(); NeedInvert = false; - bool NeedSwap = false; switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: @@ -1664,6 +1649,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, return true; } // Swapping operands didn't work. Try inverting the condition. + bool NeedSwap = false; InvCC = getSetCCInverse(CCCode, OpVT.isInteger()); if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { // If inverting the condition is not enough, try swapping operands @@ -2021,6 +2007,14 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { return ExpandVectorBuildThroughStack(Node); } +SDValue SelectionDAGLegalize::ExpandSPLAT_VECTOR(SDNode *Node) { + SDLoc DL(Node); + EVT VT = Node->getValueType(0); + SDValue SplatVal = Node->getOperand(0); + + return DAG.getSplatBuildVector(VT, DL, SplatVal); +} + // Expand a node into a call to a libcall. If the result value // does not fit into a register, return the lo part and set the hi part to the // by-reg argument. If it does fit into a single register, return the result @@ -2074,12 +2068,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, std::pair CallInfo = TLI.LowerCallTo(CLI); if (!CallInfo.second.getNode()) { - LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump()); + LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump(&DAG)); // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); } - LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump()); + LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump(&DAG)); return CallInfo.first; } @@ -2167,6 +2161,9 @@ SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { + if (Node->isStrictFPOpcode()) + Node = DAG.mutateStrictFPToFP(Node); + RTLIB::Libcall LC; switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); @@ -2815,6 +2812,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } case ISD::STRICT_FP_ROUND: + // This expansion does not honor the "strict" properties anyway, + // so prefer falling back to the non-strict operation if legal. + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) + break; Tmp1 = EmitStackConvert(Node->getOperand(1), Node->getValueType(0), Node->getValueType(0), dl, Node->getOperand(0)); @@ -2829,6 +2832,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; case ISD::STRICT_FP_EXTEND: + // This expansion does not honor the "strict" properties anyway, + // so prefer falling back to the non-strict operation if legal. + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) + break; Tmp1 = EmitStackConvert(Node->getOperand(1), Node->getOperand(1).getValueType(), Node->getValueType(0), dl, Node->getOperand(0)); @@ -2873,19 +2882,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } - case ISD::FP_ROUND_INREG: { - // The only way we can lower this is to turn it into a TRUNCSTORE, - // EXTLOAD pair, targeting a temporary location (a stack slot). - - // NOTE: there is a choice here between constantly creating new stack - // slots and always reusing the same one. We currently always create - // new ones, as reuse may inhibit scheduling. - EVT ExtraVT = cast(Node->getOperand(1))->getVT(); - Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT, - Node->getValueType(0), dl); - Results.push_back(Tmp1); - break; - } case ISD::UINT_TO_FP: if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) { Results.push_back(Tmp1); @@ -2901,33 +2897,26 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) Results.push_back(Tmp1); break; + case ISD::STRICT_FP_TO_SINT: + if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) { + ReplaceNode(Node, Tmp1.getNode()); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_SINT node\n"); + return true; + } + break; case ISD::FP_TO_UINT: - if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG)) + if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG)) Results.push_back(Tmp1); break; - case ISD::LROUND: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32, - RTLIB::LROUND_F64, RTLIB::LROUND_F80, - RTLIB::LROUND_F128, - RTLIB::LROUND_PPCF128)); - break; - case ISD::LLROUND: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32, - RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, - RTLIB::LLROUND_F128, - RTLIB::LLROUND_PPCF128)); - break; - case ISD::LRINT: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32, - RTLIB::LRINT_F64, RTLIB::LRINT_F80, - RTLIB::LRINT_F128, - RTLIB::LRINT_PPCF128)); - break; - case ISD::LLRINT: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32, - RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, - RTLIB::LLRINT_F128, - RTLIB::LLRINT_PPCF128)); + case ISD::STRICT_FP_TO_UINT: + if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG)) { + // Relink the chain. + DAG.ReplaceAllUsesOfValueWith(SDValue(Node,1), Tmp2); + // Replace the new UINT result. + ReplaceNodeWithValue(SDValue(Node, 0), Tmp1); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_UINT node\n"); + return true; + } break; case ISD::VAARG: Results.push_back(DAG.expandVAArg(Node)); @@ -3348,6 +3337,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: + case ISD::UMULFIXSAT: Results.push_back(TLI.expandFixedPointMul(Node, DAG)); break; case ISD::ADDCARRY: @@ -3662,6 +3652,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::BUILD_VECTOR: Results.push_back(ExpandBUILD_VECTOR(Node)); break; + case ISD::SPLAT_VECTOR: + Results.push_back(ExpandSPLAT_VECTOR(Node)); + break; case ISD::SRA: case ISD::SRL: case ISD::SHL: { @@ -3715,6 +3708,33 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } + if (Results.empty() && Node->isStrictFPOpcode()) { + // FIXME: We were asked to expand a strict floating-point operation, + // but there is currently no expansion implemented that would preserve + // the "strict" properties. For now, we just fall back to the non-strict + // version if that is legal on the target. The actual mutation of the + // operation will happen in SelectionDAGISel::DoInstructionSelection. + switch (Node->getOpcode()) { + default: + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) + return true; + break; + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + // These are registered by the operand type instead of the value + // type. Reflect that here. + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getOperand(1).getValueType()) + == TargetLowering::Legal) + return true; + break; + } + } + // Replace the original node with the legalized result. if (Results.empty()) { LLVM_DEBUG(dbgs() << "Cannot expand node\n"); @@ -3956,6 +3976,34 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128)); break; + case ISD::LROUND: + case ISD::STRICT_LROUND: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32, + RTLIB::LROUND_F64, RTLIB::LROUND_F80, + RTLIB::LROUND_F128, + RTLIB::LROUND_PPCF128)); + break; + case ISD::LLROUND: + case ISD::STRICT_LLROUND: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32, + RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, + RTLIB::LLROUND_F128, + RTLIB::LLROUND_PPCF128)); + break; + case ISD::LRINT: + case ISD::STRICT_LRINT: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32, + RTLIB::LRINT_F64, RTLIB::LRINT_F80, + RTLIB::LRINT_F128, + RTLIB::LRINT_PPCF128)); + break; + case ISD::LLRINT: + case ISD::STRICT_LLRINT: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32, + RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, + RTLIB::LLRINT_F128, + RTLIB::LLRINT_PPCF128)); + break; case ISD::FDIV: Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, RTLIB::DIV_F128, diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index b4849b2881e..72d052473f1 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -42,10 +42,10 @@ static RTLIB::Libcall GetFPLibCall(EVT VT, } //===----------------------------------------------------------------------===// -// Convert Float Results to Integer for Non-HW-supported Operations. +// Convert Float Results to Integer //===----------------------------------------------------------------------===// -bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { +void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); dbgs() << "\n"); SDValue R = SDValue(); @@ -58,26 +58,18 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to soften the result of this operator!"); - case ISD::Register: - case ISD::CopyFromReg: - case ISD::CopyToReg: - assert(isLegalInHWReg(N->getValueType(ResNo)) && - "Unsupported SoftenFloatRes opcode!"); - // Only when isLegalInHWReg, we can skip check of the operands. - R = SDValue(N, ResNo); - break; case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; - case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break; + case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break; case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; - case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break; + case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N); break; case ISD::EXTRACT_VECTOR_ELT: R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break; - case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break; + case ISD::FABS: R = SoftenFloatRes_FABS(N); break; case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; case ISD::FADD: R = SoftenFloatRes_FADD(N); break; case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; - case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break; + case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; @@ -89,7 +81,7 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMA: R = SoftenFloatRes_FMA(N); break; case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; - case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break; + case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; @@ -102,30 +94,24 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; - case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break; + case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; - case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break; - case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break; + case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; + case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break; } - if (R.getNode() && R.getNode() != N) { + // If R is null, the sub-method took care of registering the result. + if (R.getNode()) { + assert(R.getNode() != N); SetSoftenedFloat(SDValue(N, ResNo), R); - // Return true only if the node is changed, assuming that the operands - // are also converted when necessary. - return true; } - - // Otherwise, return false to tell caller to scan operands. - return false; } -SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) { - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { return BitConvertToInteger(N->getOperand(0)); } @@ -144,10 +130,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { BitConvertToInteger(N->getOperand(1))); } -SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, we can load better from the constant pool. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N) { ConstantFPSDNode *CN = cast(N); // In ppcf128, the high 64 bits are always first in memory regardless // of Endianness. LLVM's APFloat representation is not Endian sensitive, @@ -172,19 +155,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { } SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, keep the extracted value in register. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NewOp.getValueType().getVectorElementType(), NewOp, N->getOperand(1)); } -SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, FABS can be implemented as native bitwise operations. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned Size = NVT.getSizeInBits(); @@ -200,57 +177,69 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMIN_F32, RTLIB::FMIN_F64, RTLIB::FMIN_F80, RTLIB::FMIN_F128, RTLIB::FMIN_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMAX_F32, RTLIB::FMAX_F64, RTLIB::FMAX_F80, RTLIB::FMAX_F128, RTLIB::FMAX_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::ADD_F32, RTLIB::ADD_F64, RTLIB::ADD_F80, RTLIB::ADD_F128, RTLIB::ADD_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::CEIL_F32, RTLIB::CEIL_F64, RTLIB::CEIL_F80, RTLIB::CEIL_F128, RTLIB::CEIL_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(0)); SDValue RHS = BitConvertToInteger(N->getOperand(1)); SDLoc dl(N); @@ -301,98 +290,123 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) { SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::COS_F32, RTLIB::COS_F64, RTLIB::COS_F80, RTLIB::COS_F128, RTLIB::COS_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::EXP_F32, RTLIB::EXP_F64, RTLIB::EXP_F80, RTLIB::EXP_F128, RTLIB::EXP_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::EXP2_F32, RTLIB::EXP2_F64, RTLIB::EXP2_F80, RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, RTLIB::FLOOR_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::LOG_F32, RTLIB::LOG_F64, RTLIB::LOG_F80, RTLIB::LOG_F128, RTLIB::LOG_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::LOG2_F32, RTLIB::LOG2_F64, RTLIB::LOG2_F80, RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::LOG10_F32, RTLIB::LOG10_F64, RTLIB::LOG10_F80, RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { @@ -400,48 +414,57 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)), GetSoftenedFloat(N->getOperand(2)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[3] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType(), + N->getOperand(2).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::MUL_F32, RTLIB::MUL_F64, RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, FNEG can be implemented as native bitwise operations. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); - EVT FloatVT = N->getValueType(ResNo); + EVT FloatVT = N->getValueType(0); if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) { // Expand Y = FNEG(X) -> Y = X ^ sign mask APInt SignMask = APInt::getSignMask(NVT.getSizeInBits()); @@ -452,13 +475,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) { // Expand Y = FNEG(X) -> Y = SUB -0.0, X SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)), GetSoftenedFloat(N->getOperand(0)) }; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, false, dl).first; + NVT, Ops, CallOptions, dl).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { @@ -485,7 +509,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special @@ -493,15 +520,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32); SDValue Op = N->getOperand(0); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op, - false, SDLoc(N)).first; + CallOptions, SDLoc(N)).first; if (N->getValueType(0) == MVT::f32) return Res32; EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, Res32, false, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, NVT, Res32, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { @@ -515,20 +545,27 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); - return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::POW_F32, RTLIB::POW_F64, RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { @@ -536,87 +573,111 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { "Unsupported power type!"); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::POWI_F32, RTLIB::POWI_F64, RTLIB::POWI_F80, RTLIB::POWI_F128, RTLIB::POWI_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::REM_F32, RTLIB::REM_F64, RTLIB::REM_F80, RTLIB::REM_F128, RTLIB::REM_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::RINT_F32, RTLIB::RINT_F64, RTLIB::RINT_F80, RTLIB::RINT_F128, RTLIB::RINT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::ROUND_F32, RTLIB::ROUND_F64, RTLIB::ROUND_F80, RTLIB::ROUND_F128, RTLIB::ROUND_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SIN_F32, RTLIB::SIN_F64, RTLIB::SIN_F80, RTLIB::SIN_F128, RTLIB::SIN_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SQRT_F32, RTLIB::SQRT_F64, RTLIB::SQRT_F80, RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { @@ -625,17 +686,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, RTLIB::TRUNC_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { - bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo)); +SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { LoadSDNode *L = cast(N); EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); @@ -666,23 +729,17 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL); - if (LegalInHWReg) - return ExtendNode; return BitConvertToInteger(ExtendNode); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) { - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(1)); SDValue RHS = GetSoftenedFloat(N->getOperand(2)); return DAG.getSelect(SDLoc(N), LHS.getValueType(), N->getOperand(0), LHS, RHS); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) { - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(2)); SDValue RHS = GetSoftenedFloat(N->getOperand(3)); return DAG.getNode(ISD::SELECT_CC, SDLoc(N), @@ -736,14 +793,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { // Sign/zero extend the argument if the libcall takes a larger type. SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(Signed); + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - Op, Signed, dl).first; + Op, CallOptions, dl).first; } //===----------------------------------------------------------------------===// -// Convert Float Operand to Integer for Non-HW-supported Operations. +// Convert Float Operand to Integer //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { @@ -753,8 +814,6 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: - if (CanSkipSoftenFloatOperand(N, OpNo)) - return false; #ifndef NDEBUG dbgs() << "SoftenFloatOperand Op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; @@ -762,11 +821,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { llvm_unreachable("Do not know how to soften this operator's operand!"); case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; - case ISD::CopyToReg: Res = SoftenFloatOp_COPY_TO_REG(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; - case ISD::FABS: Res = SoftenFloatOp_FABS(N); break; - case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break; - case ISD::FNEG: Res = SoftenFloatOp_FNEG(N); break; case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; @@ -776,19 +831,9 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break; case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break; case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break; - case ISD::SELECT: Res = SoftenFloatOp_SELECT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; - case ISD::STORE: - Res = SoftenFloatOp_STORE(N, OpNo); - // Do not try to analyze or soften this node again if the value is - // or can be held in a register. In that case, Res.getNode() should - // be equal to N. - if (Res.getNode() == N && - isLegalInHWReg(N->getOperand(OpNo).getValueType())) - return false; - // Otherwise, we need to reanalyze and lower the new Res nodes. - break; + case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; } // If the result is null, the sub-method took care of registering results etc. @@ -800,60 +845,16 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { return true; assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && - "Invalid operand expansion"); + "Invalid operand promotion"); ReplaceValueWith(SDValue(N, 0), Res); return false; } -bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { - if (!isLegalInHWReg(N->getOperand(OpNo).getValueType())) - return false; - - // When the operand type can be kept in registers there is nothing to do for - // the following opcodes. - switch (N->getOperand(OpNo).getOpcode()) { - case ISD::BITCAST: - case ISD::ConstantFP: - case ISD::CopyFromReg: - case ISD::CopyToReg: - case ISD::FABS: - case ISD::FCOPYSIGN: - case ISD::FNEG: - case ISD::Register: - case ISD::SELECT: - case ISD::SELECT_CC: - return true; - } - - switch (N->getOpcode()) { - case ISD::ConstantFP: // Leaf node. - case ISD::CopyFromReg: // Operand is a register that we know to be left - // unchanged by SoftenFloatResult(). - case ISD::Register: // Leaf node. - return true; - } - return false; -} - SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { - return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), - GetSoftenedFloat(N->getOperand(0))); -} + SDValue Op0 = GetSoftenedFloat(N->getOperand(0)); -SDValue DAGTypeLegalizer::SoftenFloatOp_COPY_TO_REG(SDNode *N) { - SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); - SDValue Op2 = GetSoftenedFloat(N->getOperand(2)); - - if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2)) - return SDValue(); - - if (N->getNumOperands() == 3) - return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), 0); - - return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2, - N->getOperand(3)), - 0); + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0); } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { @@ -868,7 +869,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall"); - return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; } @@ -885,7 +889,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { @@ -895,7 +902,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), + N->getOperand(2), N->getOperand(3)); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -911,34 +919,6 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { 0); } -SDValue DAGTypeLegalizer::SoftenFloatOp_FABS(SDNode *N) { - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - - if (Op == N->getOperand(0)) - return SDValue(); - - return SDValue(DAG.UpdateNodeOperands(N, Op), 0); -} - -SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) { - SDValue Op0 = GetSoftenedFloat(N->getOperand(0)); - SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); - - if (Op0 == N->getOperand(0) && Op1 == N->getOperand(1)) - return SDValue(); - - return SDValue(DAG.UpdateNodeOperands(N, Op0, Op1), 0); -} - -SDValue DAGTypeLegalizer::SoftenFloatOp_FNEG(SDNode *N) { - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - - if (Op == N->getOperand(0)) - return SDValue(); - - return SDValue(DAG.UpdateNodeOperands(N, Op), 0); -} - SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { bool Signed = N->getOpcode() == ISD::FP_TO_SINT; EVT SVT = N->getOperand(0).getValueType(); @@ -962,23 +942,15 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, false, dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl).first; // Truncate the result if the libcall returns a larger type. return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res); } -SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT(SDNode *N) { - SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); - SDValue Op2 = GetSoftenedFloat(N->getOperand(2)); - - if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2)) - return SDValue(); - - return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), - 0); -} - SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast(N->getOperand(4))->get(); @@ -986,7 +958,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), + N->getOperand(0), N->getOperand(1)); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -1009,7 +982,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), + N->getOperand(0), N->getOperand(1)); // If softenSetCCOperands returned a scalar, use it. if (!NewRHS.getNode()) { @@ -1047,13 +1021,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LROUND_F32, RTLIB::LROUND_F64, RTLIB::LROUND_F80, RTLIB::LROUND_F128, RTLIB::LROUND_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { @@ -1061,13 +1038,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLROUND_F32, RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, RTLIB::LLROUND_F128, RTLIB::LLROUND_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { @@ -1075,13 +1055,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LRINT_F32, RTLIB::LRINT_F64, RTLIB::LRINT_F80, RTLIB::LRINT_F128, RTLIB::LRINT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { @@ -1089,13 +1072,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLRINT_F32, RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, RTLIB::LLRINT_F128, RTLIB::LLRINT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } //===----------------------------------------------------------------------===// @@ -1267,13 +1253,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), - N->getValueType(0), Ops, false, + N->getValueType(0), Ops, CallOptions, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1341,13 +1328,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + TargetLowering::MakeLibCallOptions CallOptions; SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), - N->getValueType(0), Ops, false, + N->getValueType(0), Ops, CallOptions, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1355,13 +1343,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::MUL_F32, RTLIB::MUL_F64, RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - N->getValueType(0), Ops, false, + N->getValueType(0), Ops, CallOptions, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1470,13 +1459,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - N->getValueType(0), Ops, false, + N->getValueType(0), Ops, CallOptions, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1555,7 +1545,9 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); - Hi = TLI.makeLibCall(DAG, LC, VT, Src, true, dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + Hi = TLI.makeLibCall(DAG, LC, VT, Src, CallOptions, dl).first; GetPairElements(Hi, Lo, Hi); } @@ -1732,7 +1724,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), CallOptions, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { @@ -1741,8 +1734,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0), - false, dl).first; + CallOptions, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { @@ -1807,49 +1801,53 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) { EVT RVT = N->getValueType(0); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LROUND_F32, RTLIB::LROUND_F64, RTLIB::LROUND_F80, RTLIB::LROUND_F128, RTLIB::LROUND_PPCF128), - RVT, N->getOperand(0), false, SDLoc(N)).first; + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) { EVT RVT = N->getValueType(0); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLROUND_F32, RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, RTLIB::LLROUND_F128, RTLIB::LLROUND_PPCF128), - RVT, N->getOperand(0), false, SDLoc(N)).first; + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) { EVT RVT = N->getValueType(0); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LRINT_F32, RTLIB::LRINT_F64, RTLIB::LRINT_F80, RTLIB::LRINT_F128, RTLIB::LRINT_PPCF128), - RVT, N->getOperand(0), false, SDLoc(N)).first; + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) { EVT RVT = N->getValueType(0); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLRINT_F32, RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, RTLIB::LLRINT_F128, RTLIB::LLRINT_PPCF128), - RVT, N->getOperand(0), false, SDLoc(N)).first; + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; } //===----------------------------------------------------------------------===// @@ -2002,6 +2000,12 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { dbgs() << "\n"); SDValue R = SDValue(); + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) { + LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n"); + return; + } + switch (N->getOpcode()) { // These opcodes cannot appear if promotion of FP16 is done in the backend // instead of Clang diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 15ac45c37c6..d5c1b539adb 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -100,6 +100,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { Res = PromoteIntRes_BUILD_VECTOR(N); break; case ISD::SCALAR_TO_VECTOR: Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break; + case ISD::SPLAT_VECTOR: + Res = PromoteIntRes_SPLAT_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = PromoteIntRes_CONCAT_VECTORS(N); break; @@ -112,6 +114,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break; @@ -148,9 +152,12 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UADDSAT: case ISD::SSUBSAT: case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break; + case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: Res = PromoteIntRes_MULFIX(N); break; + case ISD::UMULFIX: + case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break; + case ISD::ABS: Res = PromoteIntRes_ABS(N); break; case ISD::ATOMIC_LOAD: @@ -494,7 +501,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) NewOpc = ISD::FP_TO_SINT; - SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); + if (N->getOpcode() == ISD::STRICT_FP_TO_UINT && + !TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) && + TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT)) + NewOpc = ISD::STRICT_FP_TO_SINT; + + SDValue Res; + if (N->isStrictFPOpcode()) { + Res = DAG.getNode(NewOpc, dl, { NVT, MVT::Other }, + { N->getOperand(0), N->getOperand(1) }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + } else + Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); // Assert that the converted value fits in the original type. If it doesn't // (eg: because the value being converted is too big), then the result of the @@ -503,7 +523,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example: // before legalization: fp-to-uint16, 65534. -> 0xfffe // after legalization: fp-to-sint32, 65534. -> 0x0000fffe - return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? + return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT || + N->getOpcode() == ISD::STRICT_FP_TO_UINT) ? ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, DAG.getValueType(N->getValueType(0).getScalarType())); } @@ -590,7 +611,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { N->getIndex(), N->getScale() }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other), N->getMemoryVT(), dl, Ops, - N->getMemOperand()); + N->getMemOperand(), N->getIndexType()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -623,48 +644,84 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) { - // For promoting iN -> iM, this can be expanded by - // 1. ANY_EXTEND iN to iM - // 2. SHL by M-N - // 3. [US][ADD|SUB]SAT - // 4. L/ASHR by M-N + // If the promoted type is legal, we can convert this to: + // 1. ANY_EXTEND iN to iM + // 2. SHL by M-N + // 3. [US][ADD|SUB]SAT + // 4. L/ASHR by M-N + // Else it is more efficient to convert this to a min and a max + // operation in the higher precision arithmetic. SDLoc dl(N); SDValue Op1 = N->getOperand(0); SDValue Op2 = N->getOperand(1); unsigned OldBits = Op1.getScalarValueSizeInBits(); unsigned Opcode = N->getOpcode(); - unsigned ShiftOp; - switch (Opcode) { - case ISD::SADDSAT: - case ISD::SSUBSAT: - ShiftOp = ISD::SRA; - break; - case ISD::UADDSAT: - case ISD::USUBSAT: - ShiftOp = ISD::SRL; - break; - default: - llvm_unreachable("Expected opcode to be signed or unsigned saturation " - "addition or subtraction"); + + SDValue Op1Promoted, Op2Promoted; + if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) { + Op1Promoted = ZExtPromotedInteger(Op1); + Op2Promoted = ZExtPromotedInteger(Op2); + } else { + Op1Promoted = SExtPromotedInteger(Op1); + Op2Promoted = SExtPromotedInteger(Op2); } - - SDValue Op1Promoted = GetPromotedInteger(Op1); - SDValue Op2Promoted = GetPromotedInteger(Op2); - EVT PromotedType = Op1Promoted.getValueType(); unsigned NewBits = PromotedType.getScalarSizeInBits(); - unsigned SHLAmount = NewBits - OldBits; - EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); - SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT); - Op1Promoted = - DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount); - Op2Promoted = - DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount); - SDValue Result = - DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted); - return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount); + if (TLI.isOperationLegalOrCustom(Opcode, PromotedType)) { + unsigned ShiftOp; + switch (Opcode) { + case ISD::SADDSAT: + case ISD::SSUBSAT: + ShiftOp = ISD::SRA; + break; + case ISD::UADDSAT: + case ISD::USUBSAT: + ShiftOp = ISD::SRL; + break; + default: + llvm_unreachable("Expected opcode to be signed or unsigned saturation " + "addition or subtraction"); + } + + unsigned SHLAmount = NewBits - OldBits; + EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); + SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT); + Op1Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount); + Op2Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount); + + SDValue Result = + DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount); + } else { + if (Opcode == ISD::USUBSAT) { + SDValue Max = + DAG.getNode(ISD::UMAX, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ISD::SUB, dl, PromotedType, Max, Op2Promoted); + } + + if (Opcode == ISD::UADDSAT) { + APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits); + SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); + SDValue Add = + DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax); + } + + unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB; + APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits); + APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits); + SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType); + SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); + SDValue Result = + DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted); + Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax); + Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin); + return Result; + } } SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { @@ -673,6 +730,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { SDValue Op1Promoted, Op2Promoted; bool Signed = N->getOpcode() == ISD::SMULFIX || N->getOpcode() == ISD::SMULFIXSAT; + bool Saturating = + N->getOpcode() == ISD::SMULFIXSAT || N->getOpcode() == ISD::UMULFIXSAT; if (Signed) { Op1Promoted = SExtPromotedInteger(N->getOperand(0)); Op2Promoted = SExtPromotedInteger(N->getOperand(1)); @@ -685,7 +744,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { unsigned DiffSize = PromotedType.getScalarSizeInBits() - OldType.getScalarSizeInBits(); - bool Saturating = N->getOpcode() == ISD::SMULFIXSAT; if (Saturating) { // Promoting the operand and result values changes the saturation width, // which is extends the values that we clamp to on saturation. This could be @@ -1110,6 +1168,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break; case ISD::SCALAR_TO_VECTOR: Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break; + case ISD::SPLAT_VECTOR: + Res = PromoteIntOp_SPLAT_VECTOR(N); break; case ISD::VSELECT: case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break; case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break; @@ -1148,7 +1208,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: Res = PromoteIntOp_MULFIX(N); break; + case ISD::UMULFIX: + case ISD::UMULFIXSAT: Res = PromoteIntOp_MULFIX(N); break; case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break; @@ -1339,6 +1400,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { GetPromotedInteger(N->getOperand(0))), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_SPLAT_VECTOR(SDNode *N) { + // Integer SPLAT_VECTOR operands are implicitly truncated, so just promote the + // operand in place. + return SDValue( + DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0))), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { assert(OpNo == 0 && "Only know how to promote the condition!"); SDValue Cond = N->getOperand(0); @@ -1454,8 +1522,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, EVT DataVT = N->getValueType(0); NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); } else if (OpNo == 4) { - // Need to sign extend the index since the bits will likely be used. - NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + // The Index + if (N->isIndexSigned()) + // Need to sign extend the index since the bits will likely be used. + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + else + NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo)); } else NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); @@ -1470,8 +1542,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, EVT DataVT = N->getValue().getValueType(); NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); } else if (OpNo == 4) { - // Need to sign extend the index since the bits will likely be used. - NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + // The Index + if (N->isIndexSigned()) + // Need to sign extend the index since the bits will likely be used. + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + else + NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo)); } else NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); @@ -1715,7 +1791,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: ExpandIntRes_MULFIX(N, Lo, Hi); break; + case ISD::UMULFIX: + case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break; case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: @@ -2473,7 +2550,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, true/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first, Lo, Hi); } @@ -2488,7 +2567,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, false/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first, Lo, Hi); } @@ -2514,7 +2594,9 @@ void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT RetVT = N->getValueType(0); - SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first, Lo, Hi); } @@ -2540,7 +2622,9 @@ void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT RetVT = N->getValueType(0); - SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first, Lo, Hi); } @@ -2743,7 +2827,9 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, } SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } @@ -2777,38 +2863,53 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); uint64_t Scale = N->getConstantOperandVal(2); - bool Saturating = N->getOpcode() == ISD::SMULFIXSAT; - EVT BoolVT = getSetCCResultType(VT); - SDValue Zero = DAG.getConstant(0, dl, VT); + bool Saturating = (N->getOpcode() == ISD::SMULFIXSAT || + N->getOpcode() == ISD::UMULFIXSAT); + bool Signed = (N->getOpcode() == ISD::SMULFIX || + N->getOpcode() == ISD::SMULFIXSAT); + + // Handle special case when scale is equal to zero. if (!Scale) { SDValue Result; if (!Saturating) { Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); } else { - Result = DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); + EVT BoolVT = getSetCCResultType(VT); + unsigned MulOp = Signed ? ISD::SMULO : ISD::UMULO; + Result = DAG.getNode(MulOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); SDValue Product = Result.getValue(0); SDValue Overflow = Result.getValue(1); - - APInt MinVal = APInt::getSignedMinValue(VTSize); - APInt MaxVal = APInt::getSignedMaxValue(VTSize); - SDValue SatMin = DAG.getConstant(MinVal, dl, VT); - SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); - SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); - Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); - Result = DAG.getSelect(dl, VT, Overflow, Result, Product); + if (Signed) { + APInt MinVal = APInt::getSignedMinValue(VTSize); + APInt MaxVal = APInt::getSignedMaxValue(VTSize); + SDValue SatMin = DAG.getConstant(MinVal, dl, VT); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); + Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); + Result = DAG.getSelect(dl, VT, Overflow, Result, Product); + } else { + // For unsigned multiplication, we only need to check the max since we + // can't really overflow towards zero. + APInt MaxVal = APInt::getMaxValue(VTSize); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + Result = DAG.getSelect(dl, VT, Overflow, SatMax, Product); + } } SplitInteger(Result, Lo, Hi); return; } + // For SMULFIX[SAT] we only expect to find Scale Result; - bool Signed = (N->getOpcode() == ISD::SMULFIX || - N->getOpcode() == ISD::SMULFIXSAT); unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI; if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG, TargetLowering::MulExpansionKind::OnlyLegalOrCustom, @@ -2822,19 +2923,9 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, "the size of the current value type"); EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); - // Shift whole amount by scale. - SDValue ResultLL = Result[0]; - SDValue ResultLH = Result[1]; - SDValue ResultHL = Result[2]; - SDValue ResultHH = Result[3]; - - SDValue SatMax, SatMin; - SDValue NVTZero = DAG.getConstant(0, dl, NVT); - SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT); - EVT BoolNVT = getSetCCResultType(NVT); - - // After getting the multplication result in 4 parts, we need to perform a + // After getting the multiplication result in 4 parts, we need to perform a // shift right by the amount of the scale to get the result in that scale. + // // Let's say we multiply 2 64 bit numbers. The resulting value can be held in // 128 bits that are cut into 4 32-bit parts: // @@ -2846,123 +2937,135 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, // // |NVTSize-| // - // The resulting Lo and Hi will only need to be one of these 32-bit parts - // after shifting. - if (Scale < NVTSize) { - // If the scale is less than the size of the VT we expand to, the Hi and - // Lo of the result will be in the first 2 parts of the result after - // shifting right. This only requires shifting by the scale as far as the - // third part in the result (ResultHL). - SDValue SRLAmnt = DAG.getConstant(Scale, dl, ShiftTy); - SDValue SHLAmnt = DAG.getConstant(NVTSize - Scale, dl, ShiftTy); - Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLL, SRLAmnt); - Lo = DAG.getNode(ISD::OR, dl, NVT, Lo, - DAG.getNode(ISD::SHL, dl, NVT, ResultLH, SHLAmnt)); - Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt); - Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, - DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt)); + // The resulting Lo and Hi would normally be in LL and LH after the shift. But + // to avoid unneccessary shifting of all 4 parts, we can adjust the shift + // amount and get Lo and Hi using two funnel shifts. Or for the special case + // when Scale is a multiple of NVTSize we can just pick the result without + // shifting. + uint64_t Part0 = Scale / NVTSize; // Part holding lowest bit needed. + if (Scale % NVTSize) { + SDValue ShiftAmount = DAG.getConstant(Scale % NVTSize, dl, ShiftTy); + Lo = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 1], Result[Part0], + ShiftAmount); + Hi = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 2], Result[Part0 + 1], + ShiftAmount); + } else { + Lo = Result[Part0]; + Hi = Result[Part0 + 1]; + } - // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the - // highest bit of HH determines saturation direction in the event of - // saturation. + // Unless saturation is requested we are done. The result is in . + if (!Saturating) + return; + + // Can not overflow when there is no integer part. + if (Scale == VTSize) + return; + + // To handle saturation we must check for overflow in the multiplication. + // + // Unsigned overflow happened if the upper (VTSize - Scale) bits (of Result) + // aren't all zeroes. + // + // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of Result) + // aren't all ones or all zeroes. + // + // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the + // highest bit of HH determines saturation direction in the event of signed + // saturation. + + SDValue ResultHL = Result[2]; + SDValue ResultHH = Result[3]; + + SDValue SatMax, SatMin; + SDValue NVTZero = DAG.getConstant(0, dl, NVT); + SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT); + EVT BoolNVT = getSetCCResultType(NVT); + + if (!Signed) { + if (Scale < NVTSize) { + // Overflow happened if ((HH | (HL >> Scale)) != 0). + SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, + DAG.getConstant(Scale, dl, ShiftTy)); + SDValue Tmp = DAG.getNode(ISD::OR, dl, NVT, HLAdjusted, ResultHH); + SatMax = DAG.getSetCC(dl, BoolNVT, Tmp, NVTZero, ISD::SETNE); + } else if (Scale == NVTSize) { + // Overflow happened if (HH != 0). + SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETNE); + } else if (Scale < VTSize) { + // Overflow happened if ((HH >> (Scale - NVTSize)) != 0). + SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, + DAG.getConstant(Scale - NVTSize, dl, + ShiftTy)); + SatMax = DAG.getSetCC(dl, BoolNVT, HLAdjusted, NVTZero, ISD::SETNE); + } else + llvm_unreachable("Scale must be less or equal to VTSize for UMULFIXSAT" + "(and saturation can't happen with Scale==VTSize)."); + + Hi = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Hi); + Lo = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Lo); + return; + } + + if (Scale < NVTSize) { // The number of overflow bits we can check are VTSize - Scale + 1 (we // include the sign bit). If these top bits are > 0, then we overflowed past // the max value. If these top bits are < -1, then we overflowed past the // min value. Otherwise, we did not overflow. - if (Saturating) { - unsigned OverflowBits = VTSize - Scale + 1; - assert(OverflowBits <= VTSize && OverflowBits > NVTSize && - "Extent of overflow bits must start within HL"); - SDValue HLHiMask = DAG.getConstant( - APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT); - SDValue HLLoMask = DAG.getConstant( - APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT); - - // HH > 0 or HH == 0 && HL > HLLoMask - SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); - SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); - SDValue HLPos = - DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT); - SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos, - DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLPos)); - - // HH < -1 or HH == -1 && HL < HLHiMask - SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); - SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); - SDValue HLNeg = - DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT); - SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg, - DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLNeg)); - } + unsigned OverflowBits = VTSize - Scale + 1; + assert(OverflowBits <= VTSize && OverflowBits > NVTSize && + "Extent of overflow bits must start within HL"); + SDValue HLHiMask = DAG.getConstant( + APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT); + SDValue HLLoMask = DAG.getConstant( + APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT); + // We overflow max if HH > 0 or (HH == 0 && HL > HLLoMask). + SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); + SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); + SDValue HLUGT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT); + SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLUGT)); + // We overflow min if HH < -1 or (HH == -1 && HL < HLHiMask). + SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); + SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); + SDValue HLULT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT); + SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLULT)); } else if (Scale == NVTSize) { - // If the scales are equal, Lo and Hi are ResultLH and Result HL, - // respectively. Avoid shifting to prevent undefined behavior. - Lo = ResultLH; - Hi = ResultHL; - - // We overflow max if HH > 0 or HH == 0 && HL sign bit is 1. - // We overflow min if HH < -1 or HH == -1 && HL sign bit is 0. - if (Saturating) { - SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); - SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); - SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT); - SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos, - DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLNeg)); - - SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); - SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); - SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE); - SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg, - DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLPos)); - } + // We overflow max if HH > 0 or (HH == 0 && HL sign bit is 1). + SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); + SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); + SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT); + SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLNeg)); + // We overflow min if HH < -1 or (HH == -1 && HL sign bit is 0). + SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); + SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); + SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE); + SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLPos)); } else if (Scale < VTSize) { - // If the scale is instead less than the old VT size, but greater than or - // equal to the expanded VT size, the first part of the result (ResultLL) is - // no longer a part of Lo because it would be scaled out anyway. Instead we - // can start shifting right from the fourth part (ResultHH) to the second - // part (ResultLH), and Result LH will be the new Lo. - SDValue SRLAmnt = DAG.getConstant(Scale - NVTSize, dl, ShiftTy); - SDValue SHLAmnt = DAG.getConstant(VTSize - Scale, dl, ShiftTy); - Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt); - Lo = DAG.getNode(ISD::OR, dl, NVT, Lo, - DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt)); - Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt); - Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, - DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt)); - // This is similar to the case when we saturate if Scale < NVTSize, but we - // only need to chech HH. - if (Saturating) { - unsigned OverflowBits = VTSize - Scale + 1; - SDValue HHHiMask = DAG.getConstant( - APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT); - SDValue HHLoMask = DAG.getConstant( - APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT); + // only need to check HH. + unsigned OverflowBits = VTSize - Scale + 1; + SDValue HHHiMask = DAG.getConstant( + APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT); + SDValue HHLoMask = DAG.getConstant( + APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT); + SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT); + SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT); + } else + llvm_unreachable("Illegal scale for signed fixed point mul."); - SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT); - SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT); - } - } else if (Scale == VTSize) { - assert( - !Signed && - "Only unsigned types can have a scale equal to the operand bit width"); - - Lo = ResultHL; - Hi = ResultHH; - } else { - llvm_unreachable("Expected the scale to be less than or equal to the width " - "of the operands"); - } - - if (Saturating) { - APInt LHMax = APInt::getSignedMaxValue(NVTSize); - APInt LLMax = APInt::getAllOnesValue(NVTSize); - APInt LHMin = APInt::getSignedMinValue(NVTSize); - Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LHMax, dl, NVT), Hi); - Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(LHMin, dl, NVT), Hi); - Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LLMax, dl, NVT), Lo); - Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo); - } + // Saturate to signed maximum. + APInt MaxHi = APInt::getSignedMaxValue(NVTSize); + APInt MaxLo = APInt::getAllOnesValue(NVTSize); + Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi); + Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo); + // Saturate to signed minimum. + APInt MinHi = APInt::getSignedMinValue(NVTSize); + Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(MinHi, dl, NVT), Hi); + Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo); } void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, @@ -3030,7 +3133,9 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, LC = RTLIB::SDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, @@ -3129,7 +3234,9 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, isSigned, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(isSigned); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); return; } @@ -3217,7 +3324,9 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, LC = RTLIB::SREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, @@ -3373,7 +3482,8 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, LC = RTLIB::UDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, @@ -3399,7 +3509,8 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, LC = RTLIB::UREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, @@ -3759,7 +3870,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this SINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, Op, true, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -3924,7 +4037,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this UINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, Op, true, dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, dl).first; } SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { @@ -4033,6 +4148,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op); } +SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) { + SDLoc dl(N); + + SDValue SplatVal = N->getOperand(0); + + assert(!SplatVal.getValueType().isVector() && "Input must be a scalar"); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "Type must be promoted to a vector type"); + EVT NOutElemVT = NOutVT.getVectorElementType(); + + SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, SplatVal); + + return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op); +} + SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { SDLoc dl(N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 14fd5be23cc..b596c174a28 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -81,7 +81,6 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) { SDValue Res(&Node, i); - EVT VT = Res.getValueType(); bool Failed = false; // Don't create a value in map. auto ResId = (ValueToIdMap.count(Res)) ? ValueToIdMap[Res] : 0; @@ -135,17 +134,13 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { dbgs() << "Unprocessed value in a map!"; Failed = true; } - } else if (isTypeLegal(VT) || IgnoreNodeResults(&Node)) { + } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) { if (Mapped > 1) { dbgs() << "Value with legal type was transformed!"; Failed = true; } } else { - // If the value can be kept in HW registers, softening machinery can - // leave it unchanged and don't put it to any map. - if (Mapped == 0 && - !(getTypeAction(VT) == TargetLowering::TypeSoftenFloat && - isLegalInHWReg(VT))) { + if (Mapped == 0) { dbgs() << "Processed value not in any map!"; Failed = true; } else if (Mapped & (Mapped - 1)) { @@ -257,13 +252,9 @@ bool DAGTypeLegalizer::run() { Changed = true; goto NodeDone; case TargetLowering::TypeSoftenFloat: - Changed = SoftenFloatResult(N, i); - if (Changed) - goto NodeDone; - // If not changed, the result type should be legally in register. - assert(isLegalInHWReg(ResultVT) && - "Unchanged SoftenFloatResult should be legal in register!"); - goto ScanOperands; + SoftenFloatResult(N, i); + Changed = true; + goto NodeDone; case TargetLowering::TypeExpandFloat: ExpandFloatResult(N, i); Changed = true; @@ -439,15 +430,9 @@ NodeDone: bool Failed = false; // Check that all result types are legal. - // A value type is illegal if its TypeAction is not TypeLegal, - // and TLI.RegClassForVT does not have a register class for this type. - // For example, the x86_64 target has f128 that is not TypeLegal, - // to have softened operators, but it also has FR128 register class to - // pass and return f128 values. Hence a legalized node can have f128 type. if (!IgnoreNodeResults(&Node)) for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i) - if (!isTypeLegal(Node.getValueType(i)) && - !TLI.isTypeLegal(Node.getValueType(i))) { + if (!isTypeLegal(Node.getValueType(i))) { dbgs() << "Result type " << i << " illegal: "; Node.dump(&DAG); Failed = true; @@ -456,8 +441,7 @@ NodeDone: // Check that all operand types are legal. for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i) if (!IgnoreNodeResults(Node.getOperand(i).getNode()) && - !isTypeLegal(Node.getOperand(i).getValueType()) && - !TLI.isTypeLegal(Node.getOperand(i).getValueType())) { + !isTypeLegal(Node.getOperand(i).getValueType())) { dbgs() << "Operand type " << i << " illegal: "; Node.getOperand(i).dump(&DAG); Failed = true; @@ -713,23 +697,13 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { - // f128 of x86_64 could be kept in SSE registers, - // but sometimes softened to i128. - assert((Result.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) || - Op.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && "Invalid type for softened float"); AnalyzeNewValue(Result); auto &OpIdEntry = SoftenedFloats[getTableId(Op)]; - // Allow repeated calls to save f128 type nodes - // or any node with type that transforms to itself. - // Many operations on these types are not softened. - assert(((OpIdEntry == 0) || - Op.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && - "Node is already converted to integer!"); + assert((OpIdEntry == 0) && "Node is already converted to integer!"); OpIdEntry = getTableId(Result); } @@ -1003,25 +977,27 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { /// Convert the node into a libcall with the same prototype. SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned) { + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(isSigned); unsigned NumOps = N->getNumOperands(); SDLoc dl(N); if (NumOps == 0) { - return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, CallOptions, dl).first; } else if (NumOps == 1) { SDValue Op = N->getOperand(0); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, CallOptions, dl).first; } else if (NumOps == 2) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first; } SmallVector Ops(NumOps); for (unsigned i = 0; i < NumOps; ++i) Ops[i] = N->getOperand(i); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first; + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first; } /// Expand a node into a call to a libcall. Similar to ExpandLibCall except that diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 1d489b1b3a3..4afbae69128 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -73,15 +73,6 @@ private: return VT.isSimple() && TLI.isTypeLegal(VT); } - /// Return true if this type can be passed in registers. - /// For example, x86_64's f128, should to be legally in registers - /// and only some operations converted to library calls or integer - /// bitwise operations. - bool isLegalInHWReg(EVT VT) const { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - return VT == NVT && isSimpleLegalType(VT); - } - EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } @@ -306,6 +297,7 @@ private: SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N); SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N); SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N); SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N); @@ -363,6 +355,7 @@ private: SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N); SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo); @@ -472,14 +465,11 @@ private: // Float to Integer Conversion Support: LegalizeFloatTypes.cpp //===--------------------------------------------------------------------===// - /// Given an operand Op of Float type, returns the integer if the Op is not - /// supported in target HW and converted to the integer. - /// The integer contains exactly the same bits as Op - only the type changed. - /// For example, if Op is an f32 which was softened to an i32, then this - /// method returns an i32, the bits of which coincide with those of Op. - /// If the Op can be efficiently supported in target HW or the operand must - /// stay in a register, the Op is not converted to an integer. - /// In that case, the given op is returned. + /// GetSoftenedFloat - Given a processed operand Op which was converted to an + /// integer of the same size, this returns the integer. The integer contains + /// exactly the same bits as Op - only the type changed. For example, if Op + /// is an f32 which was softened to an i32, then this method returns an i32, + /// the bits of which coincide with those of Op SDValue GetSoftenedFloat(SDValue Op) { TableId Id = getTableId(Op); auto Iter = SoftenedFloats.find(Id); @@ -494,19 +484,19 @@ private: } void SetSoftenedFloat(SDValue Op, SDValue Result); - // Convert Float Results to Integer for Non-HW-supported Operations. - bool SoftenFloatResult(SDNode *N, unsigned ResNo); + // Convert Float Results to Integer. + void SoftenFloatResult(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_BITCAST(SDNode *N); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); - SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_ConstantFP(SDNode *N); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_FABS(SDNode *N); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); SDValue SoftenFloatRes_FCEIL(SDNode *N); - SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); SDValue SoftenFloatRes_FCOS(SDNode *N); SDValue SoftenFloatRes_FDIV(SDNode *N); SDValue SoftenFloatRes_FEXP(SDNode *N); @@ -518,7 +508,7 @@ private: SDValue SoftenFloatRes_FMA(SDNode *N); SDValue SoftenFloatRes_FMUL(SDNode *N); SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); - SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_FNEG(SDNode *N); SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N); SDValue SoftenFloatRes_FP_ROUND(SDNode *N); @@ -531,27 +521,17 @@ private: SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); SDValue SoftenFloatRes_FTRUNC(SDNode *N); - SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_LOAD(SDNode *N); + SDValue SoftenFloatRes_SELECT(SDNode *N); + SDValue SoftenFloatRes_SELECT_CC(SDNode *N); SDValue SoftenFloatRes_UNDEF(SDNode *N); SDValue SoftenFloatRes_VAARG(SDNode *N); SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); - // Return true if we can skip softening the given operand or SDNode because - // either it was soften before by SoftenFloatResult and references to the - // operand were replaced by ReplaceValueWith or it's value type is legal in HW - // registers and the operand can be left unchanged. - bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo); - - // Convert Float Operand to Integer for Non-HW-supported Operations. + // Convert Float Operand to Integer. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); SDValue SoftenFloatOp_BITCAST(SDNode *N); - SDValue SoftenFloatOp_COPY_TO_REG(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); - SDValue SoftenFloatOp_FABS(SDNode *N); - SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N); - SDValue SoftenFloatOp_FNEG(SDNode *N); SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); @@ -559,7 +539,6 @@ private: SDValue SoftenFloatOp_LLROUND(SDNode *N); SDValue SoftenFloatOp_LRINT(SDNode *N); SDValue SoftenFloatOp_LLRINT(SDNode *N); - SDValue SoftenFloatOp_SELECT(SDNode *N); SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); @@ -715,6 +694,7 @@ private: bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_BITCAST(SDNode *N); SDValue ScalarizeVecOp_UnaryOp(SDNode *N); + SDValue ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_VSELECT(SDNode *N); @@ -830,6 +810,7 @@ private: SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); SDValue WidenVecRes_BinaryCanTrap(SDNode *N); + SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N); SDValue WidenVecRes_StrictFP(SDNode *N); SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo); SDValue WidenVecRes_Convert(SDNode *N); @@ -933,6 +914,8 @@ private: void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVSETCC(const SDNode *N); + //===--------------------------------------------------------------------===// // Generic Expansion: LegalizeTypesGeneric.cpp //===--------------------------------------------------------------------===// diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 943f63f46c4..5562f400b6e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -52,17 +52,11 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypePromoteFloat: llvm_unreachable("Bitcast of a promotion-needing float should never need" "expansion"); - case TargetLowering::TypeSoftenFloat: { - // Expand the floating point operand only if it was converted to integers. - // Otherwise, it is a legal type like f128 that can be saved in a register. - auto SoftenedOp = GetSoftenedFloat(InOp); - if (isLegalInHWReg(SoftenedOp.getValueType())) - break; - SplitInteger(SoftenedOp, Lo, Hi); + case TargetLowering::TypeSoftenFloat: + SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; - } case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: { auto &DL = DAG.getDataLayout(); @@ -509,23 +503,6 @@ void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo, GetSplitOp(Op, Lo, Hi); } -static std::pair SplitVSETCC(const SDNode *N, - SelectionDAG &DAG) { - SDLoc DL(N); - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - - // Split the inputs. - SDValue Lo, Hi, LL, LH, RL, RH; - std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); - - Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); - Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); - - return std::make_pair(Lo, Hi); -} - void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LL, LH, RL, RH, CL, CH; SDLoc dl(N); @@ -537,16 +514,25 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) { if (Cond.getValueType().isVector()) { if (SDValue Res = WidenVSELECTAndMask(N)) std::tie(CL, CH) = DAG.SplitVector(Res->getOperand(0), dl); - // It seems to improve code to generate two narrow SETCCs as opposed to - // splitting a wide result vector. - else if (Cond.getOpcode() == ISD::SETCC) - std::tie(CL, CH) = SplitVSETCC(Cond.getNode(), DAG); // Check if there are already splitted versions of the vector available and // use those instead of splitting the mask operand again. else if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(Cond, CL, CH); - else + // It seems to improve code to generate two narrow SETCCs as opposed to + // splitting a wide result vector. + else if (Cond.getOpcode() == ISD::SETCC) { + // If the condition is a vXi1 vector, and the LHS of the setcc is a legal + // type and the setcc result type is the same vXi1, then leave the setcc + // alone. + EVT CondLHSVT = Cond.getOperand(0).getValueType(); + if (Cond.getValueType().getVectorElementType() == MVT::i1 && + isTypeLegal(CondLHSVT) && + getSetCCResultType(CondLHSVT) == Cond.getValueType()) + std::tie(CL, CH) = DAG.SplitVector(Cond, dl); + else + SplitVecRes_SETCC(Cond.getNode(), CL, CH); + } else std::tie(CL, CH) = DAG.SplitVector(Cond, dl); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 10b8b705869..15c3a0b6cfa 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" @@ -333,14 +334,27 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_ROUND: case ISD::STRICT_FP_EXTEND: - // These pseudo-ops get legalized as if they were their non-strict - // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT - // is also legal, but if ISD::FSQRT requires expansion then so does - // ISD::STRICT_FSQRT. - Action = TLI.getStrictFPOperationAction(Node->getOpcode(), - Node->getValueType(0)); + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + // If we're asked to expand a strict vector floating-point operation, + // by default we're going to simply unroll it. That is usually the + // best approach, except in the case where the resulting strict (scalar) + // operations would themselves use the fallback mutation to non-strict. + // In that specific case, just do the fallback on the vector op. + if (Action == TargetLowering::Expand && + TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) { + EVT EltVT = Node->getValueType(0).getVectorElementType(); + if (TLI.getOperationAction(Node->getOpcode(), EltVT) + == TargetLowering::Expand && + TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT) + == TargetLowering::Legal) + Action = TargetLowering::Legal; + } break; case ISD::ADD: case ISD::SUB: @@ -439,16 +453,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { break; case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: { + case ISD::UMULFIX: + case ISD::UMULFIXSAT: { unsigned Scale = Node->getConstantOperandVal(2); Action = TLI.getFixedPointOperationAction(Node->getOpcode(), Node->getValueType(0), Scale); break; } - case ISD::FP_ROUND_INREG: - Action = TLI.getOperationAction(Node->getOpcode(), - cast(Node->getOperand(1))->getVT()); - break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::VECREDUCE_ADD: @@ -820,6 +831,13 @@ SDValue VectorLegalizer::Expand(SDValue Op) { case ISD::SMULFIX: case ISD::UMULFIX: return ExpandFixedPointMul(Op); + case ISD::SMULFIXSAT: + case ISD::UMULFIXSAT: + // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly + // why. Maybe it results in worse codegen compared to the unroll for some + // targets? This should probably be investigated. And if we still prefer to + // unroll an explanation could be helpful. + return DAG.UnrollVectorOp(Op.getNode()); case ISD::STRICT_FADD: case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: @@ -844,6 +862,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: return ExpandStrictFPOp(Op); case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: @@ -1168,9 +1188,13 @@ SDValue VectorLegalizer::ExpandABS(SDValue Op) { SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) { // Attempt to expand using TargetLowering. - SDValue Result; - if (TLI.expandFP_TO_UINT(Op.getNode(), Result, DAG)) + SDValue Result, Chain; + if (TLI.expandFP_TO_UINT(Op.getNode(), Result, Chain, DAG)) { + if (Op.getNode()->isStrictFPOpcode()) + // Relink the chain + DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain); return Result; + } // Otherwise go ahead and unroll. return DAG.UnrollVectorOp(Op.getNode()); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 7e4d5261797..3763e886cef 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -52,7 +52,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break; case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; - case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; @@ -171,6 +170,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_EXTEND: R = ScalarizeVecRes_StrictFPOp(N); break; @@ -185,6 +186,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: + case ISD::UMULFIXSAT: R = ScalarizeVecRes_MULFIX(N); break; } @@ -604,6 +606,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::UINT_TO_FP: Res = ScalarizeVecOp_UnaryOp(N); break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + Res = ScalarizeVecOp_UnaryOp_StrictFP(N); + break; case ISD::CONCAT_VECTORS: Res = ScalarizeVecOp_CONCAT_VECTORS(N); break; @@ -679,6 +685,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op); } +/// If the input is a vector that needs to be scalarized, it must be <1 x ty>. +/// Do the strict FP operation on the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) { + assert(N->getValueType(0).getVectorNumElements() == 1 && + "Unexpected vector type!"); + SDValue Elt = GetScalarizedVector(N->getOperand(1)); + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), + { N->getValueType(0).getScalarType(), MVT::Other }, + { N->getOperand(0), Elt }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + // Revectorize the result so the types line up with what the uses of this + // expression expect. + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); +} + /// The vectors to concatenate have length one - use a BUILD_VECTOR instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { SmallVector Ops(N->getNumOperands()); @@ -828,7 +851,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; - case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; @@ -883,7 +905,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_ROUND: case ISD::STRICT_FP_ROUND: case ISD::FP_TO_SINT: + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FRINT: case ISD::FROUND: case ISD::FSIN: @@ -977,6 +1001,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: + case ISD::UMULFIXSAT: SplitVecRes_MULFIX(N, Lo, Hi); break; } @@ -1560,10 +1585,14 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, // Split Mask operand SDValue MaskLo, MaskHi; - if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + if (Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + } EVT MemoryVT = MLD->getMemoryVT(); EVT LoMemVT, HiMemVT; @@ -1622,10 +1651,14 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, // Split Mask operand SDValue MaskLo, MaskHi; - if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + if (Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + } EVT MemoryVT = MGT->getMemoryVT(); EVT LoMemVT, HiMemVT; @@ -1651,11 +1684,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, - MMO); + MMO, MGT->getIndexType()); SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, - MMO); + MMO, MGT->getIndexType()); // Build a factor node to remember that this load is independent of the // other one. @@ -1979,6 +2012,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::CTTZ: case ISD::CTLZ: case ISD::CTPOP: @@ -2293,7 +2328,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, - OpsLo, MMO); + OpsLo, MMO, MGT->getIndexType()); MMO = DAG.getMachineFunction(). getMachineMemOperand(MGT->getPointerInfo(), @@ -2303,7 +2338,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, - OpsHi, MMO); + OpsHi, MMO, MGT->getIndexType()); // Build a factor node to remember that this load is independent of the // other one. @@ -2340,12 +2375,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, else std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + // Split Mask operand SDValue MaskLo, MaskHi; - if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) - // Split Mask operand - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + } SDValue Lo, Hi; MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -2397,12 +2436,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, else std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + // Split Mask operand SDValue MaskLo, MaskHi; - if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) - // Split Mask operand - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + } SDValue IndexHi, IndexLo; if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) @@ -2418,7 +2461,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), - DL, OpsLo, MMO); + DL, OpsLo, MMO, N->getIndexType()); MMO = DAG.getMachineFunction(). getMachineMemOperand(N->getPointerInfo(), @@ -2430,7 +2473,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, // after another. SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), - DL, OpsHi, MMO); + DL, OpsHi, MMO, N->getIndexType()); } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -2596,7 +2639,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes); - return PromoteTargetBoolean(Con, N->getValueType(0)); + + EVT OpVT = N->getOperand(0).getValueType(); + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + return DAG.getNode(ExtendCode, DL, N->getValueType(0), Con); } @@ -2663,7 +2710,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; - case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; @@ -2719,6 +2765,15 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_BinaryCanTrap(N); break; + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + case ISD::UMULFIXSAT: + // These are binary operations, but with an extra operand that shouldn't + // be widened (the scale). + Res = WidenVecRes_BinaryWithExtraScalarOp(N); + break; + case ISD::STRICT_FADD: case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: @@ -2790,6 +2845,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FP_EXTEND: case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: Res = WidenVecRes_Convert_StrictFP(N); break; @@ -2866,6 +2923,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags()); } +SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) { + // Binary op widening, but with an extra operand that shouldn't be widened. + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + SDValue InOp3 = N->getOperand(2); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3, + N->getFlags()); +} + // Given a vector of operations that have been broken up to widen, see // if we can collect them together into the next widest legal VT. This // implementation is trap-safe. @@ -3716,7 +3784,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { Scale }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), N->getMemoryVT(), dl, Ops, - N->getMemOperand()); + N->getMemOperand(), N->getIndexType()); // Legalize the chain result - switch anything that used the old chain to // use the new one. @@ -4094,7 +4162,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::FP_EXTEND: case ISD::STRICT_FP_EXTEND: case ISD::FP_TO_SINT: + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::TRUNCATE: @@ -4434,7 +4504,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) { SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index, Scale}; SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops, - MG->getMemOperand()); + MG->getMemOperand(), MG->getIndexType()); ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); ReplaceValueWith(SDValue(N, 0), Res.getValue(0)); return SDValue(); @@ -4472,7 +4542,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), SDLoc(N), Ops, - MSC->getMemOperand()); + MSC->getMemOperand(), MSC->getIndexType()); } SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { @@ -4504,7 +4574,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - return PromoteTargetBoolean(CC, VT); + EVT OpVT = N->getOperand(0).getValueType(); + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + return DAG.getNode(ExtendCode, dl, VT, CC); } SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { @@ -4706,7 +4779,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; - unsigned LdAlign = LD->isVolatile() ? 0 : Align; // Allow wider loads. + unsigned LdAlign = (!LD->isSimple()) ? 0 : Align; // Allow wider loads. // Find the vector type that can load from. EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 2cb850fa1a3..7ee44c808fc 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -498,7 +498,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast(Node->getOperand(i))->getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } else diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 34b4c850235..ff806bdb822 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1188,6 +1188,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (!Pred.isArtificial()) AddPredQueued(NewSU, Pred); + // Make sure the clone comes after the original. (InstrEmitter assumes + // this ordering.) + AddPredQueued(NewSU, SDep(SU, SDep::Artificial)); + // Only copy scheduled successors. Cut them from old node's successor // list and move them over. SmallVector, 4> DelDeps; @@ -1374,7 +1378,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl &LRegs) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast(Node->getOperand(i))->getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); } } else @@ -2358,7 +2362,7 @@ static bool hasOnlyLiveInOpers(const SUnit *SU) { PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { unsigned Reg = cast(PredSU->getNode()->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { RetVal = true; continue; } @@ -2379,7 +2383,7 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) { if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { unsigned Reg = cast(SuccSU->getNode()->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { RetVal = true; continue; } @@ -2948,8 +2952,8 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // like other nodes from the perspective of scheduling heuristics. if (SDNode *N = SU.getNode()) if (N->getOpcode() == ISD::CopyToReg && - TargetRegisterInfo::isVirtualRegister - (cast(N->getOperand(1))->getReg())) + Register::isVirtualRegister( + cast(N->getOperand(1))->getReg())) continue; SDNode *PredFrameSetup = nullptr; @@ -2995,8 +2999,8 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // like other nodes from the perspective of scheduling heuristics. if (SDNode *N = SU.getNode()) if (N->getOpcode() == ISD::CopyFromReg && - TargetRegisterInfo::isVirtualRegister - (cast(N->getOperand(1))->getReg())) + Register::isVirtualRegister( + cast(N->getOperand(1))->getReg())) continue; // Perform checks on the successors of PredSU. diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 568c6191e51..d4c1fb36475 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -115,7 +115,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, return; unsigned Reg = cast(User->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return; unsigned ResNo = User->getOperand(2).getResNo(); @@ -528,7 +528,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// glued together nodes with a single SUnit. -void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { +void ScheduleDAGSDNodes::BuildSchedGraph(AAResults *AA) { // Cluster certain nodes which should be scheduled together. ClusterNodes(); // Populate the SUnits array. @@ -656,7 +656,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg && !BB->succ_empty()) { unsigned Reg = cast(Use->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) // This copy is a liveout value. It is likely coalesced, so reduce the // latency so not to penalize the def. // FIXME: need target specific adjustment here? @@ -808,7 +808,7 @@ EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, } else { // Copy from physical register. assert(I->getReg() && "Unknown physical register!"); - unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); + Register VRBase = MRI.createVirtualRegister(SU->CopyDstRC); bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); @@ -909,6 +909,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Remember the source order of the inserted instruction. if (HasDbg) ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); + + if (MDNode *MD = DAG->getHeapAllocSite(N)) { + if (NewInsn && NewInsn->isCall()) + MF.addCodeViewHeapAllocSite(NewInsn, MD); + } + GluedNodes.pop_back(); } auto NewInsn = @@ -917,6 +923,10 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { if (HasDbg) ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); + if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) { + if (NewInsn && NewInsn->isCall()) + MF.addCodeViewHeapAllocSite(NewInsn, MD); + } } // Insert all the dbg_values which have not already been inserted in source diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 5163b4fa4fd..183ce4b0652 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -26,6 +26,7 @@ namespace llvm { +class AAResults; class InstrItineraryData; /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. @@ -93,7 +94,7 @@ class InstrItineraryData; /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. - void BuildSchedGraph(AliasAnalysis *AA); + void BuildSchedGraph(AAResults *AA); /// InitNumRegDefsLeft - Determine the # of regs defined by this node. /// diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index ab06b55b49f..e7bac73678a 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -63,14 +63,13 @@ private: /// HazardRec - The hazard recognizer to use. ScheduleHazardRecognizer *HazardRec; - /// AA - AliasAnalysis for making memory reference queries. - AliasAnalysis *AA; + /// AA - AAResults for making memory reference queries. + AAResults *AA; public: - ScheduleDAGVLIW(MachineFunction &mf, - AliasAnalysis *aa, + ScheduleDAGVLIW(MachineFunction &mf, AAResults *aa, SchedulingPriorityQueue *availqueue) - : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { + : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { const TargetSubtargetInfo &STI = mf.getSubtarget(); HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 5852e693fa9..52a71b91d93 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -859,9 +859,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { break; case ISD::TargetExternalSymbol: { ExternalSymbolSDNode *ESN = cast(N); - Erased = TargetExternalSymbols.erase( - std::pair(ESN->getSymbol(), - ESN->getTargetFlags())); + Erased = TargetExternalSymbols.erase(std::pair( + ESN->getSymbol(), ESN->getTargetFlags())); break; } case ISD::MCSymbol: { @@ -1084,6 +1083,7 @@ void SelectionDAG::clear() { ExternalSymbols.clear(); TargetExternalSymbols.clear(); MCSymbols.clear(); + SDCallSiteDbgInfo.clear(); std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), static_cast(nullptr)); std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), @@ -1353,7 +1353,7 @@ SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT, SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t Offset, bool isTargetGA, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTargetGA) && "Cannot set target flags on target-independent globals"); @@ -1400,7 +1400,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { } SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent jump tables"); unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; @@ -1421,7 +1421,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, unsigned Alignment, int Offset, bool isTarget, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) @@ -1449,7 +1449,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, unsigned Alignment, int Offset, bool isTarget, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) @@ -1473,7 +1473,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, } SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, - unsigned char TargetFlags) { + unsigned TargetFlags) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None); ID.AddInteger(Index); @@ -1535,10 +1535,9 @@ SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) { } SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, - unsigned char TargetFlags) { + unsigned TargetFlags) { SDNode *&N = - TargetExternalSymbols[std::pair(Sym, - TargetFlags)]; + TargetExternalSymbols[std::pair(Sym, TargetFlags)]; if (N) return SDValue(N, 0); N = newSDNode(true, Sym, TargetFlags, VT); InsertNode(N); @@ -1802,9 +1801,8 @@ SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl, } SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, - int64_t Offset, - bool isTarget, - unsigned char TargetFlags) { + int64_t Offset, bool isTarget, + unsigned TargetFlags) { unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; FoldingSetNodeID ID; @@ -1900,20 +1898,19 @@ SDValue SelectionDAG::expandVAArg(SDNode *Node) { EVT VT = Node->getValueType(0); SDValue Tmp1 = Node->getOperand(0); SDValue Tmp2 = Node->getOperand(1); - unsigned Align = Node->getConstantOperandVal(3); + const MaybeAlign MA(Node->getConstantOperandVal(3)); SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, Tmp2, MachinePointerInfo(V)); SDValue VAList = VAListLoad; - if (Align > TLI.getMinStackArgumentAlignment()) { - assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); - + if (MA && *MA > TLI.getMinStackArgumentAlignment()) { VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, - getConstant(Align - 1, dl, VAList.getValueType())); + getConstant(MA->value() - 1, dl, VAList.getValueType())); - VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList, - getConstant(-(int64_t)Align, dl, VAList.getValueType())); + VAList = + getNode(ISD::AND, dl, VAList.getValueType(), VAList, + getConstant(-(int64_t)MA->value(), dl, VAList.getValueType())); } // Increment the pointer, VAList, to the next vaarg @@ -2154,12 +2151,9 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, } case ISD::OR: case ISD::XOR: - // If the LHS or RHS don't contribute bits to the or, drop them. - if (MaskedValueIsZero(V.getOperand(0), DemandedBits)) - return V.getOperand(1); - if (MaskedValueIsZero(V.getOperand(1), DemandedBits)) - return V.getOperand(0); - break; + case ISD::SIGN_EXTEND_INREG: + return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts, + *this, 0); case ISD::SRL: // Only look at single-use SRLs. if (!V.getNode()->hasOneUse()) @@ -2203,15 +2197,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc); break; } - case ISD::SIGN_EXTEND_INREG: - EVT ExVT = cast(V.getOperand(1))->getVT(); - unsigned ExVTBits = ExVT.getScalarSizeInBits(); - - // If none of the extended bits are demanded, eliminate the sextinreg. - if (DemandedBits.getActiveBits() <= ExVTBits) - return V.getOperand(0); - - break; } return SDValue(); } @@ -2395,15 +2380,39 @@ SDValue SelectionDAG::getSplatValue(SDValue V) { /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that /// is less than the element bit-width of the shift node, return it. static const APInt *getValidShiftAmountConstant(SDValue V) { + unsigned BitWidth = V.getScalarValueSizeInBits(); if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) { // Shifting more than the bitwidth is not valid. const APInt &ShAmt = SA->getAPIntValue(); - if (ShAmt.ult(V.getScalarValueSizeInBits())) + if (ShAmt.ult(BitWidth)) return &ShAmt; } return nullptr; } +/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less +/// than the element bit-width of the shift node, return the minimum value. +static const APInt *getValidMinimumShiftAmountConstant(SDValue V) { + unsigned BitWidth = V.getScalarValueSizeInBits(); + auto *BV = dyn_cast(V.getOperand(1)); + if (!BV) + return nullptr; + const APInt *MinShAmt = nullptr; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + auto *SA = dyn_cast(BV->getOperand(i)); + if (!SA) + return nullptr; + // Shifting more than the bitwidth is not valid. + const APInt &ShAmt = SA->getAPIntValue(); + if (ShAmt.uge(BitWidth)) + return nullptr; + if (MinShAmt && MinShAmt->ule(ShAmt)) + continue; + MinShAmt = &ShAmt; + } + return MinShAmt; +} + /// Determine which bits of Op are known to be either zero or one and return /// them in Known. For vectors, the known bits are those that are shared by /// every vector element. @@ -2437,7 +2446,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, return Known; } - if (Depth == 6) + if (Depth >= MaxRecursionDepth) return Known; // Limit search depth. KnownBits Known2; @@ -2582,14 +2591,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, SDValue Src = Op.getOperand(0); ConstantSDNode *SubIdx = dyn_cast(Op.getOperand(1)); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts); if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { // Offset the demanded elts by the subvector index. uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - Known = computeKnownBits(Src, DemandedSrc, Depth + 1); - } else { - Known = computeKnownBits(Src, Depth + 1); + DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); } + Known = computeKnownBits(Src, DemandedSrc, Depth + 1); break; } case ISD::SCALAR_TO_VECTOR: { @@ -2800,25 +2808,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.One.lshrInPlace(Shift); // High bits are known zero. Known.Zero.setHighBits(Shift); - } else if (auto *BV = dyn_cast(Op.getOperand(1))) { - // If the shift amount is a vector of constants see if we can bound - // the number of upper zero bits. - unsigned ShiftAmountMin = BitWidth; - for (unsigned i = 0; i != BV->getNumOperands(); ++i) { - if (auto *C = dyn_cast(BV->getOperand(i))) { - const APInt &ShAmt = C->getAPIntValue(); - if (ShAmt.ult(BitWidth)) { - ShiftAmountMin = std::min(ShiftAmountMin, - ShAmt.getZExtValue()); - continue; - } - } - // Don't know anything. - ShiftAmountMin = 0; - break; - } - - Known.Zero.setHighBits(ShiftAmountMin); + } else if (const APInt *ShMinAmt = getValidMinimumShiftAmountConstant(Op)) { + // Minimum shift high bits are known zero. + Known.Zero.setHighBits(ShMinAmt->getZExtValue()); } break; case ISD::SRA: @@ -3105,12 +3097,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, // If the first operand is non-negative or has all low bits zero, then // the upper bits are all zero. - if (Known2.Zero[BitWidth-1] || ((Known2.Zero & LowBits) == LowBits)) + if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero)) Known.Zero |= ~LowBits; // If the first operand is negative and not all low bits are zero, then // the upper bits are all one. - if (Known2.One[BitWidth-1] && ((Known2.One & LowBits) != 0)) + if (Known2.isNegative() && LowBits.intersects(Known2.One)) Known.One |= ~LowBits; assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?"); } @@ -3427,7 +3419,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return Val.getNumSignBits(); } - if (Depth == 6) + if (Depth >= MaxRecursionDepth) return 1; // Limit search depth. if (!DemandedElts) @@ -3729,6 +3721,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); if (Tmp == 1) return 1; // Early out. return std::min(Tmp, Tmp2)-1; + case ISD::MUL: { + // The output of the Mul can be at most twice the valid bits in the inputs. + unsigned SignBitsOp0 = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (SignBitsOp0 == 1) + break; + unsigned SignBitsOp1 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + if (SignBitsOp1 == 1) + break; + unsigned OutValidBits = + (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1); + return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1; + } case ISD::TRUNCATE: { // Check if the sign bits of source go down as far as the truncated value. unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits(); @@ -3817,13 +3821,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, SDValue Src = Op.getOperand(0); ConstantSDNode *SubIdx = dyn_cast(Op.getOperand(1)); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts); if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { // Offset the demanded elts by the subvector index. uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); + DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); } - return ComputeNumSignBits(Src, Depth + 1); + return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); } case ISD::CONCAT_VECTORS: { // Determine the minimum number of sign bits across all demanded @@ -3976,7 +3980,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs()) return true; - if (Depth == 6) + if (Depth >= MaxRecursionDepth) return false; // Limit search depth. // TODO: Handle vectors. @@ -4645,7 +4649,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return getUNDEF(VT); // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) && + if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) && OpOpcode == ISD::FSUB) return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), Operand.getOperand(0), Flags); @@ -5156,22 +5160,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N2C && N2C->isNullValue()) return N1; break; - case ISD::FP_ROUND_INREG: { - EVT EVT = cast(N2)->getVT(); - assert(VT == N1.getValueType() && "Not an inreg round!"); - assert(VT.isFloatingPoint() && EVT.isFloatingPoint() && - "Cannot FP_ROUND_INREG integer types"); - assert(EVT.isVector() == VT.isVector() && - "FP_ROUND_INREG type should be vector iff the operand " - "type is vector!"); - assert((!EVT.isVector() || - EVT.getVectorNumElements() == VT.getVectorNumElements()) && - "Vector element counts must match in FP_ROUND_INREG"); - assert(EVT.bitsLE(VT) && "Not rounding down!"); - (void)EVT; - if (cast(N2)->getVT() == VT) return N1; // Not actually rounding. - break; - } case ISD::FP_ROUND: assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() && @@ -5382,7 +5370,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, std::swap(N1, N2); } else { switch (Opcode) { - case ISD::FP_ROUND_INREG: case ISD::SIGN_EXTEND_INREG: case ISD::SUB: return getUNDEF(VT); // fold op(undef, arg2) -> undef @@ -5770,7 +5757,7 @@ static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - uint64_t Size, unsigned Align, + uint64_t Size, unsigned Alignment, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { @@ -5795,15 +5782,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; unsigned SrcAlign = DAG.InferPtrAlignment(Src); - if (Align > SrcAlign) - SrcAlign = Align; + if (Alignment > SrcAlign) + SrcAlign = Alignment; ConstantDataArraySlice Slice; bool CopyFromConstant = isMemSrcFromConstant(Src, Slice); bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); if (!TLI.findOptimalMemOpLowering( - MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), + MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment), (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant, /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), @@ -5818,15 +5805,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) - while (NewAlign > Align && - DL.exceedsNaturalStackAlignment(NewAlign)) - NewAlign /= 2; + while (NewAlign > Alignment && + DL.exceedsNaturalStackAlignment(Align(NewAlign))) + NewAlign /= 2; - if (NewAlign > Align) { + if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) MFI.setObjectAlignment(FI->getIndex(), NewAlign); - Align = NewAlign; + Alignment = NewAlign; } } @@ -5869,10 +5856,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, } Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice); if (Value.getNode()) { - Store = DAG.getStore(Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Align, - MMOFlags); + Store = DAG.getStore( + Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags); OutChains.push_back(Store); } } @@ -5900,7 +5886,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags); OutStoreChains.push_back(Store); } SrcOff += VTSize; @@ -6567,7 +6553,7 @@ SDValue SelectionDAG::getMergeValues(ArrayRef Ops, const SDLoc &dl) { SDValue SelectionDAG::getMemIntrinsicNode( unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, - MachineMemOperand::Flags Flags, unsigned Size, const AAMDNodes &AAInfo) { + MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) { if (Align == 0) // Ensure that codegen never sees alignment 0 Align = getEVTAlignment(MemVT); @@ -6619,7 +6605,9 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, createOperands(N, Ops); } InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl, @@ -7022,14 +7010,15 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, - MachineMemOperand *MMO) { + MachineMemOperand *MMO, + ISD::MemIndexType IndexType) { assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); ID.AddInteger(VT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData( - dl.getIROrder(), VTs, VT, MMO)); + dl.getIROrder(), VTs, VT, MMO, IndexType)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -7038,7 +7027,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, } auto *N = newSDNode(dl.getIROrder(), dl.getDebugLoc(), - VTs, VT, MMO); + VTs, VT, MMO, IndexType); createOperands(N, Ops); assert(N->getPassThru().getValueType() == N->getValueType(0) && @@ -7062,14 +7051,15 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, - MachineMemOperand *MMO) { + MachineMemOperand *MMO, + ISD::MemIndexType IndexType) { assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); ID.AddInteger(VT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData( - dl.getIROrder(), VTs, VT, MMO)); + dl.getIROrder(), VTs, VT, MMO, IndexType)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -7077,7 +7067,7 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, return SDValue(E, 0); } auto *N = newSDNode(dl.getIROrder(), dl.getDebugLoc(), - VTs, VT, MMO); + VTs, VT, MMO, IndexType); createOperands(N, Ops); assert(N->getMask().getValueType().getVectorNumElements() == @@ -7766,16 +7756,22 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break; case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break; case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break; + case ISD::STRICT_LRINT: NewOpc = ISD::LRINT; break; + case ISD::STRICT_LLRINT: NewOpc = ISD::LLRINT; break; case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break; case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break; case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break; case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break; case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break; case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break; + case ISD::STRICT_LROUND: NewOpc = ISD::LROUND; break; + case ISD::STRICT_LLROUND: NewOpc = ISD::LLROUND; break; case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break; case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break; case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break; case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break; + case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break; + case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break; } assert(Node->getNumValues() == 2 && "Unexpected number of results!"); @@ -7925,6 +7921,7 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL, CSEMap.InsertNode(N, IP); InsertNode(N); + NewSDValueDbgMsg(SDValue(N, 0), "Creating new machine node: ", this); return N; } @@ -8619,7 +8616,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, // TokenFactor. SDValue OldChain = SDValue(OldLoad, 1); SDValue NewChain = SDValue(NewMemOp.getNode(), 1); - if (!OldLoad->hasAnyUseOfValue(1)) + if (OldChain == NewChain || !OldLoad->hasAnyUseOfValue(1)) return NewChain; SDValue TokenFactor = @@ -8812,7 +8809,7 @@ HandleSDNode::~HandleSDNode() { GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, const GlobalValue *GA, EVT VT, - int64_t o, unsigned char TF) + int64_t o, unsigned TF) : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { TheGlobal = GA; } @@ -8986,7 +8983,7 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, // Loads don't have side effects, look through them. if (LoadSDNode *Ld = dyn_cast(*this)) { - if (!Ld->isVolatile()) + if (Ld->isUnordered()) return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1); } return false; @@ -9005,21 +9002,51 @@ void SDNode::intersectFlagsWith(const SDNodeFlags Flags) { SDValue SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, - ArrayRef CandidateBinOps) { + ArrayRef CandidateBinOps, + bool AllowPartials) { // The pattern must end in an extract from index 0. if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isNullConstant(Extract->getOperand(1))) return SDValue(); - SDValue Op = Extract->getOperand(0); - unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements()); - // Match against one of the candidate binary ops. + SDValue Op = Extract->getOperand(0); if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) { return Op.getOpcode() == unsigned(BinOp); })) return SDValue(); + // Floating-point reductions may require relaxed constraints on the final step + // of the reduction because they may reorder intermediate operations. + unsigned CandidateBinOp = Op.getOpcode(); + if (Op.getValueType().isFloatingPoint()) { + SDNodeFlags Flags = Op->getFlags(); + switch (CandidateBinOp) { + case ISD::FADD: + if (!Flags.hasNoSignedZeros() || !Flags.hasAllowReassociation()) + return SDValue(); + break; + default: + llvm_unreachable("Unhandled FP opcode for binop reduction"); + } + } + + // Matching failed - attempt to see if we did enough stages that a partial + // reduction from a subvector is possible. + auto PartialReduction = [&](SDValue Op, unsigned NumSubElts) { + if (!AllowPartials || !Op) + return SDValue(); + EVT OpVT = Op.getValueType(); + EVT OpSVT = OpVT.getScalarType(); + EVT SubVT = EVT::getVectorVT(*getContext(), OpSVT, NumSubElts); + if (!TLI->isExtractSubvectorCheap(SubVT, OpVT, 0)) + return SDValue(); + BinOp = (ISD::NodeType)CandidateBinOp; + return getNode( + ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op, + getConstant(0, SDLoc(Op), TLI->getVectorIdxTy(getDataLayout()))); + }; + // At each stage, we're looking for something that looks like: // %s = shufflevector <8 x i32> %op, <8 x i32> undef, // <8 x i32> // <2,3,u,u,u,u,u,u> // <1,u,u,u,u,u,u,u> - unsigned CandidateBinOp = Op.getOpcode(); + // While a partial reduction match would be: + // <2,3,u,u,u,u,u,u> + // <1,u,u,u,u,u,u,u> + unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements()); + SDValue PrevOp; for (unsigned i = 0; i < Stages; ++i) { + unsigned MaskEnd = (1 << i); + if (Op.getOpcode() != CandidateBinOp) - return SDValue(); + return PartialReduction(PrevOp, MaskEnd); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -9049,12 +9082,14 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, // The first operand of the shuffle should be the same as the other operand // of the binop. if (!Shuffle || Shuffle->getOperand(0) != Op) - return SDValue(); + return PartialReduction(PrevOp, MaskEnd); // Verify the shuffle has the expected (at this stage of the pyramid) mask. - for (int Index = 0, MaskEnd = 1 << i; Index < MaskEnd; ++Index) - if (Shuffle->getMaskElt(Index) != MaskEnd + Index) - return SDValue(); + for (int Index = 0; Index < (int)MaskEnd; ++Index) + if (Shuffle->getMaskElt(Index) != (int)(MaskEnd + Index)) + return PartialReduction(PrevOp, MaskEnd); + + PrevOp = Op; } BinOp = (ISD::NodeType)CandidateBinOp; @@ -9114,8 +9149,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { getShiftAmountOperand(Operands[0].getValueType(), Operands[1]))); break; - case ISD::SIGN_EXTEND_INREG: - case ISD::FP_ROUND_INREG: { + case ISD::SIGN_EXTEND_INREG: { EVT ExtVT = cast(Operands[1])->getVT().getVectorElementType(); Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], @@ -9187,6 +9221,9 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, int Dist) const { if (LD->isVolatile() || Base->isVolatile()) return false; + // TODO: probably too restrictive for atomics, revisit + if (!LD->isSimple()) + return false; if (LD->isIndexed() || Base->isIndexed()) return false; if (LD->getChain() != Base->getChain()) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 9592bc30a4e..3a53ab9717a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" #include using namespace llvm; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e818dd27c05..8c15563fcd2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -833,7 +833,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // If the source register was virtual and if we know something about it, // add an assert node. - if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || + if (!Register::isVirtualRegister(Regs[Part + i]) || !RegisterVT.isInteger()) continue; @@ -948,8 +948,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); if (HasMatching) Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); - else if (!Regs.empty() && - TargetRegisterInfo::isVirtualRegister(Regs.front())) { + else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) { // Put the register class of the virtual registers in the flag word. That // way, later passes can recompute register class constraints for inline // assembly as well as normal instructions. @@ -1810,7 +1809,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // offsets to its parts don't wrap either. SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]); - SDValue Val = RetOp.getValue(i); + SDValue Val = RetOp.getValue(RetOp.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]); Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val, @@ -2263,7 +2262,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { if (const BinaryOperator *BOp = dyn_cast(CondVal)) { Instruction::BinaryOps Opcode = BOp->getOpcode(); if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && - !I.getMetadata(LLVMContext::MD_unpredictable) && + !I.hasMetadata(LLVMContext::MD_unpredictable) && (Opcode == Instruction::And || Opcode == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode, @@ -2600,9 +2599,11 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, void SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setDiscardResult(true); SDValue Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, - None, false, getCurSDLoc(), false, false).second; + None, CallOptions, getCurSDLoc()).second; // On PS4, the "return address" must still be within the calling function, // even if it's at the very end, so emit an explicit TRAP here. // Passing 'true' for doesNotReturn above won't generate the trap for us. @@ -2618,24 +2619,18 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB) { SDLoc dl = getCurSDLoc(); - // Subtract the minimum value + // Subtract the minimum value. SDValue SwitchOp = getValue(B.SValue); EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, - DAG.getConstant(B.First, dl, VT)); - - // Check range - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue RangeCmp = DAG.getSetCC( - dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT); + SDValue RangeSub = + DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(B.First, dl, VT)); // Determine the type of the test operands. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); bool UsePtrType = false; - if (!TLI.isTypeLegal(VT)) + if (!TLI.isTypeLegal(VT)) { UsePtrType = true; - else { + } else { for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) { // Switch table case range are encoded into series of masks. @@ -2644,6 +2639,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, break; } } + SDValue Sub = RangeSub; if (UsePtrType) { VT = TLI.getPointerTy(DAG.getDataLayout()); Sub = DAG.getZExtOrTrunc(Sub, dl, VT); @@ -2655,20 +2651,29 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock* MBB = B.Cases[0].ThisBB; - addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); + if (!B.OmitRangeCheck) + addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); addSuccessorWithProb(SwitchBB, MBB, B.Prob); SwitchBB->normalizeSuccProbs(); - SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, - MVT::Other, CopyTo, RangeCmp, - DAG.getBasicBlock(B.Default)); + SDValue Root = CopyTo; + if (!B.OmitRangeCheck) { + // Conditional branch to the default block. + SDValue RangeCmp = DAG.getSetCC(dl, + TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + RangeSub.getValueType()), + RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()), + ISD::SETUGT); + + Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp, + DAG.getBasicBlock(B.Default)); + } // Avoid emitting unnecessary branches to the next block. if (MBB != NextBlock(SwitchBB)) - BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange, - DAG.getBasicBlock(MBB)); + Root = DAG.getNode(ISD::BR, dl, MVT::Other, Root, DAG.getBasicBlock(MBB)); - DAG.setRoot(BrRange); + DAG.setRoot(Root); } /// visitBitTestCase - this function produces one "bit test" @@ -3266,8 +3271,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { // We care about the legality of the operation after it has been type // legalized. - while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal && - VT != TLI.getTypeToTransformTo(Ctx, VT)) + while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal) VT = TLI.getTypeToTransformTo(Ctx, VT); // If the vselect is legal, assume we want to leave this as a vector setcc + @@ -3534,17 +3538,32 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); + Constant *MaskV = cast(I.getOperand(2)); SDLoc DL = getCurSDLoc(); - - SmallVector Mask; - ShuffleVectorInst::getShuffleMask(cast(I.getOperand(2)), Mask); - unsigned MaskNumElts = Mask.size(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); + if (MaskV->isNullValue() && VT.isScalableVector()) { + // Canonical splat form of first element of first input vector. + SDValue FirstElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + SrcVT.getScalarType(), Src1, + DAG.getConstant(0, DL, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt)); + return; + } + + // For now, we only handle splats for scalable vectors. + // The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation + // for targets that support a SPLAT_VECTOR for non-scalable vector types. + assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle"); + + SmallVector Mask; + ShuffleVectorInst::getShuffleMask(MaskV, Mask); + unsigned MaskNumElts = Mask.size(); + if (SrcNumElts == MaskNumElts) { setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask)); return; @@ -3825,7 +3844,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. unsigned VectorWidth = I.getType()->isVectorTy() ? - cast(I.getType())->getVectorNumElements() : 0; + I.getType()->getVectorNumElements() : 0; if (VectorWidth && !N.getValueType().isVector()) { LLVMContext &Context = *DAG.getContext(); @@ -3858,12 +3877,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // If this is a scalar constant or a splat vector of constants, // handle it quickly. - const auto *CI = dyn_cast(Idx); - if (!CI && isa(Idx) && - cast(Idx)->getSplatValue()) - CI = cast(cast(Idx)->getSplatValue()); + const auto *C = dyn_cast(Idx); + if (C && isa(C->getType())) + C = C->getSplatValue(); - if (CI) { + if (const auto *CI = dyn_cast_or_null(C)) { if (CI->isZero()) continue; APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize); @@ -3872,7 +3890,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) : DAG.getConstant(Offs, dl, IdxTy); - // In an inbouds GEP with an offset that is nonnegative even when + // In an inbounds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. SDNodeFlags Flags; if (Offs.isNonNegative() && cast(I).isInBounds()) @@ -4002,8 +4020,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Type *Ty = I.getType(); bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; - bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; + bool isNonTemporal = I.hasMetadata(LLVMContext::MD_nontemporal); + bool isInvariant = I.hasMetadata(LLVMContext::MD_invariant_load); bool isDereferenceable = isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout()); unsigned Alignment = I.getAlignment(); @@ -4118,7 +4136,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { SDValue Src = getValue(SrcV); // Create a virtual register, then update the virtual register. - unsigned VReg = + Register VReg = SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand()); // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue // Chain can be getRoot or getControlRoot. @@ -4132,8 +4150,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { "call visitLoadFromSwiftError when backend supports swifterror"); assert(!I.isVolatile() && - I.getMetadata(LLVMContext::MD_nontemporal) == nullptr && - I.getMetadata(LLVMContext::MD_invariant_load) == nullptr && + !I.hasMetadata(LLVMContext::MD_nontemporal) && + !I.hasMetadata(LLVMContext::MD_invariant_load) && "Support volatile, non temporal, invariant for load_from_swift_error"); const Value *SV = I.getOperand(0); @@ -4209,7 +4227,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { auto MMOFlags = MachineMemOperand::MONone; if (I.isVolatile()) MMOFlags |= MachineMemOperand::MOVolatile; - if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr) + if (I.hasMetadata(LLVMContext::MD_nontemporal)) MMOFlags |= MachineMemOperand::MONonTemporal; MMOFlags |= TLI.getMMOFlags(I); @@ -4309,8 +4327,9 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // are looking for. If first operand of the GEP is a splat vector - we // extract the splat value and use it as a uniform base. // In all other cases the function returns 'false'. -static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, - SDValue &Scale, SelectionDAGBuilder* SDB) { +static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index, + ISD::MemIndexType &IndexType, SDValue &Scale, + SelectionDAGBuilder *SDB) { SelectionDAG& DAG = SDB->DAG; LLVMContext &Context = *DAG.getContext(); @@ -4330,8 +4349,13 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, // Ensure all the other indices are 0. for (unsigned i = 1; i < FinalIndex; ++i) { - auto *C = dyn_cast(GEP->getOperand(i)); - if (!C || !C->isZero()) + auto *C = dyn_cast(GEP->getOperand(i)); + if (!C) + return false; + if (isa(C->getType())) + C = C->getSplatValue(); + auto *CI = dyn_cast_or_null(C); + if (!CI || !CI->isZero()) return false; } @@ -4346,6 +4370,7 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, SDB->getCurSDLoc(), TLI.getPointerTy(DL)); Base = SDB->getValue(Ptr); Index = SDB->getValue(IndexVal); + IndexType = ISD::SIGNED_SCALED; if (!Index.getValueType().isVector()) { unsigned GEPWidth = GEP->getType()->getVectorNumElements(); @@ -4373,9 +4398,11 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDValue Base; SDValue Index; + ISD::MemIndexType IndexType; SDValue Scale; const Value *BasePtr = Ptr; - bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this); + bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale, + this); const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -4385,11 +4412,12 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); + IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale }; SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, - Ops, MMO); + Ops, MMO, IndexType); DAG.setRoot(Scatter); setValue(&I, Scatter); } @@ -4476,9 +4504,11 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDValue Root = DAG.getRoot(); SDValue Base; SDValue Index; + ISD::MemIndexType IndexType; SDValue Scale; const Value *BasePtr = Ptr; - bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this); + bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale, + this); bool ConstantMemory = false; if (UniformBase && AA && AA->pointsToConstantMemory( @@ -4500,11 +4530,12 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); + IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale }; SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, - Ops, MMO); + Ops, MMO, IndexType); SDValue OutChain = Gather.getValue(1); if (!ConstantMemory) @@ -4628,7 +4659,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { auto Flags = MachineMemOperand::MOLoad; if (I.isVolatile()) Flags |= MachineMemOperand::MOVolatile; - if (I.getMetadata(LLVMContext::MD_invariant_load) != nullptr) + if (I.hasMetadata(LLVMContext::MD_invariant_load)) Flags |= MachineMemOperand::MOInvariant; if (isDereferenceablePointer(I.getPointerOperand(), I.getType(), DAG.getDataLayout())) @@ -4645,9 +4676,27 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { AAMDNodes(), nullptr, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); - SDValue L = - DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, - getValue(I.getPointerOperand()), MMO); + + SDValue Ptr = getValue(I.getPointerOperand()); + + if (TLI.lowerAtomicLoadAsLoadSDNode(I)) { + // TODO: Once this is better exercised by tests, it should be merged with + // the normal path for loads to prevent future divergence. + SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO); + if (MemVT != VT) + L = DAG.getPtrExtOrTrunc(L, dl, VT); + + setValue(&I, L); + SDValue OutChain = L.getValue(1); + if (!I.isUnordered()) + DAG.setRoot(OutChain); + else + PendingLoads.push_back(OutChain); + return; + } + + SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, + Ptr, MMO); SDValue OutChain = L.getValue(1); if (MemVT != VT) @@ -4686,9 +4735,17 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDValue Val = getValue(I.getValueOperand()); if (Val.getValueType() != MemVT) Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT); + SDValue Ptr = getValue(I.getPointerOperand()); + if (TLI.lowerAtomicStoreAsStoreSDNode(I)) { + // TODO: Once this is better exercised by tests, it should be merged with + // the normal path for stores to prevent future divergence. + SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO); + DAG.setRoot(S); + return; + } SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain, - getValue(I.getPointerOperand()), Val, MMO); + Ptr, Val, MMO); DAG.setRoot(OutChain); @@ -4731,8 +4788,22 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { - SDValue Op = getValue(I.getArgOperand(i)); - Ops.push_back(Op); + const Value *Arg = I.getArgOperand(i); + if (!I.paramHasAttr(i, Attribute::ImmArg)) { + Ops.push_back(getValue(Arg)); + continue; + } + + // Use TargetConstant instead of a regular constant for immarg. + EVT VT = TLI.getValueType(*DL, Arg->getType(), true); + if (const ConstantInt *CI = dyn_cast(Arg)) { + assert(CI->getBitWidth() <= 64 && + "large intrinsic immediates not handled"); + Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT)); + } else { + Ops.push_back( + DAG.getTargetConstantFP(*cast(Arg), SDLoc(), VT)); + } } SmallVector ValueVTs; @@ -4749,10 +4820,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // This is target intrinsic that touches memory AAMDNodes AAInfo; I.getAAMetadata(AAInfo); - Result = - DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, - MachinePointerInfo(Info.ptrVal, Info.offset), - Info.align, Info.flags, Info.size, AAInfo); + Result = DAG.getMemIntrinsicNode( + Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, + MachinePointerInfo(Info.ptrVal, Info.offset), + Info.align ? Info.align->value() : 0, Info.flags, Info.size, AAInfo); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -4918,12 +4989,11 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, // Put the exponent in the right bit position for later addition to the // final result: // - // #define LOG2OFe 1.4426950f - // t0 = Op * LOG2OFe + // t0 = Op * log2(e) // TODO: What fast-math-flags should be set here? SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, - getF32Constant(DAG, 0x3fb8aa3b, dl)); + DAG.getConstantFP(numbers::log2ef, dl, MVT::f32)); return getLimitedPrecisionExp2(t0, dl, DAG); } @@ -4941,10 +5011,11 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); - // Scale the exponent by log(2) [0.69314718f]. + // Scale the exponent by log(2). SDValue Exp = GetExponent(DAG, Op1, TLI, dl); - SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, - getF32Constant(DAG, 0x3f317218, dl)); + SDValue LogOfExponent = + DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, + DAG.getConstantFP(numbers::ln2f, dl, MVT::f32)); // Get the significand and build it into a floating-point number with // exponent of 1. @@ -5311,19 +5382,32 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } -// getUnderlyingArgReg - Find underlying register used for a truncated or -// bitcasted argument. -static unsigned getUnderlyingArgReg(const SDValue &N) { +// getUnderlyingArgRegs - Find underlying registers used for a truncated, +// bitcasted, or split argument. Returns a list of +static void +getUnderlyingArgRegs(SmallVectorImpl> &Regs, + const SDValue &N) { switch (N.getOpcode()) { - case ISD::CopyFromReg: - return cast(N.getOperand(1))->getReg(); + case ISD::CopyFromReg: { + SDValue Op = N.getOperand(1); + Regs.emplace_back(cast(Op)->getReg(), + Op.getValueType().getSizeInBits()); + return; + } case ISD::BITCAST: case ISD::AssertZext: case ISD::AssertSext: case ISD::TRUNCATE: - return getUnderlyingArgReg(N.getOperand(0)); + getUnderlyingArgRegs(Regs, N.getOperand(0)); + return; + case ISD::BUILD_PAIR: + case ISD::BUILD_VECTOR: + case ISD::CONCAT_VECTORS: + for (SDValue Op : N->op_values()) + getUnderlyingArgRegs(Regs, Op); + return; default: - return 0; + return; } } @@ -5412,11 +5496,16 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (FI != std::numeric_limits::max()) Op = MachineOperand::CreateFI(FI); + SmallVector, 8> ArgRegsAndSizes; if (!Op && N.getNode()) { - unsigned Reg = getUnderlyingArgReg(N); - if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { + getUnderlyingArgRegs(ArgRegsAndSizes, N); + Register Reg; + if (ArgRegsAndSizes.size() == 1) + Reg = ArgRegsAndSizes.front().first; + + if (Reg && Reg.isVirtual()) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); - unsigned PR = RegInfo.getLiveInPhysReg(Reg); + Register PR = RegInfo.getLiveInPhysReg(Reg); if (PR) Reg = PR; } @@ -5436,29 +5525,42 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } if (!Op) { + // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg + auto splitMultiRegDbgValue + = [&](ArrayRef> SplitRegs) { + unsigned Offset = 0; + for (auto RegAndSize : SplitRegs) { + auto FragmentExpr = DIExpression::createFragmentExpression( + Expr, Offset, RegAndSize.second); + if (!FragmentExpr) + continue; + assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?"); + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false, + RegAndSize.first, Variable, *FragmentExpr)); + Offset += RegAndSize.second; + } + }; + // Check if ValueMap has reg number. - DenseMap::iterator VMI = FuncInfo.ValueMap.find(V); + DenseMap::const_iterator + VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) { const auto &TLI = DAG.getTargetLoweringInfo(); RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, V->getType(), getABIRegCopyCC(V)); if (RFV.occupiesMultipleRegs()) { - unsigned Offset = 0; - for (auto RegAndSize : RFV.getRegsAndSizes()) { - Op = MachineOperand::CreateReg(RegAndSize.first, false); - auto FragmentExpr = DIExpression::createFragmentExpression( - Expr, Offset, RegAndSize.second); - if (!FragmentExpr) - continue; - FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, - Op->getReg(), Variable, *FragmentExpr)); - Offset += RegAndSize.second; - } + splitMultiRegDbgValue(RFV.getRegsAndSizes()); return true; } + Op = MachineOperand::CreateReg(VMI->second, false); IsIndirect = IsDbgDeclare; + } else if (ArgRegsAndSizes.size() > 1) { + // This was split due to the calling convention, and no virtual register + // mapping exists for the value. + splitMultiRegDbgValue(ArgRegsAndSizes); + return true; } } @@ -5468,8 +5570,10 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( assert(Variable->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); IsIndirect = (Op->isReg()) ? IsIndirect : true; + if (IsIndirect) + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false, *Op, Variable, Expr)); return true; @@ -5554,11 +5658,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; case Intrinsic::sponentry: setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl, - TLI.getPointerTy(DAG.getDataLayout()))); + TLI.getFrameIndexTy(DAG.getDataLayout()))); return; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, - TLI.getPointerTy(DAG.getDataLayout()), + TLI.getFrameIndexTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return; case Intrinsic::read_register: { @@ -5888,65 +5992,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::masked_compressstore: visitMaskedStore(I, true /* IsCompressing */); return; - case Intrinsic::x86_mmx_pslli_w: - case Intrinsic::x86_mmx_pslli_d: - case Intrinsic::x86_mmx_pslli_q: - case Intrinsic::x86_mmx_psrli_w: - case Intrinsic::x86_mmx_psrli_d: - case Intrinsic::x86_mmx_psrli_q: - case Intrinsic::x86_mmx_psrai_w: - case Intrinsic::x86_mmx_psrai_d: { - SDValue ShAmt = getValue(I.getArgOperand(1)); - if (isa(ShAmt)) { - visitTargetIntrinsic(I, Intrinsic); - return; - } - unsigned NewIntrinsic = 0; - EVT ShAmtVT = MVT::v2i32; - switch (Intrinsic) { - case Intrinsic::x86_mmx_pslli_w: - NewIntrinsic = Intrinsic::x86_mmx_psll_w; - break; - case Intrinsic::x86_mmx_pslli_d: - NewIntrinsic = Intrinsic::x86_mmx_psll_d; - break; - case Intrinsic::x86_mmx_pslli_q: - NewIntrinsic = Intrinsic::x86_mmx_psll_q; - break; - case Intrinsic::x86_mmx_psrli_w: - NewIntrinsic = Intrinsic::x86_mmx_psrl_w; - break; - case Intrinsic::x86_mmx_psrli_d: - NewIntrinsic = Intrinsic::x86_mmx_psrl_d; - break; - case Intrinsic::x86_mmx_psrli_q: - NewIntrinsic = Intrinsic::x86_mmx_psrl_q; - break; - case Intrinsic::x86_mmx_psrai_w: - NewIntrinsic = Intrinsic::x86_mmx_psra_w; - break; - case Intrinsic::x86_mmx_psrai_d: - NewIntrinsic = Intrinsic::x86_mmx_psra_d; - break; - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - } - - // The vector shift intrinsics with scalars uses 32b shift amounts but - // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits - // to be zero. - // We must do this early because v2i32 is not a legal type. - SDValue ShOps[2]; - ShOps[0] = ShAmt; - ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); - ShAmt = DAG.getBuildVector(ShAmtVT, sdl, ShOps); - EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); - Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, - DAG.getConstant(NewIntrinsic, sdl, MVT::i32), - getValue(I.getArgOperand(0)), ShAmt); - setValue(&I, Res); - return; - } case Intrinsic::powi: setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); @@ -6063,6 +6108,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: @@ -6075,12 +6122,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::experimental_constrained_log: case Intrinsic::experimental_constrained_log10: case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_lrint: + case Intrinsic::experimental_constrained_llrint: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_maxnum: case Intrinsic::experimental_constrained_minnum: case Intrinsic::experimental_constrained_ceil: case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_lround: + case Intrinsic::experimental_constrained_llround: case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_trunc: visitConstrainedFPIntrinsic(cast(I)); @@ -6272,6 +6323,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Op3)); return; } + case Intrinsic::umul_fix_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + setValue(&I, DAG.getNode(ISD::UMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2, + Op3)); + return; + } case Intrinsic::stacksave: { SDValue Op = getRoot(); Res = DAG.getNode( @@ -6347,29 +6406,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.setRoot(Res); return; } - case Intrinsic::objectsize: { - // If we don't know by now, we're never going to know. - ConstantInt *CI = dyn_cast(I.getArgOperand(1)); - - assert(CI && "Non-constant type in __builtin_object_size?"); - - SDValue Arg = getValue(I.getCalledValue()); - EVT Ty = Arg.getValueType(); - - if (CI->isZero()) - Res = DAG.getConstant(-1ULL, sdl, Ty); - else - Res = DAG.getConstant(0, sdl, Ty); - - setValue(&I, Res); - return; - } + case Intrinsic::objectsize: + llvm_unreachable("llvm.objectsize.* should have been lowered already"); case Intrinsic::is_constant: - // If this wasn't constant-folded away by now, then it's not a - // constant. - setValue(&I, DAG.getConstant(0, sdl, MVT::i1)); - return; + llvm_unreachable("llvm.is.constant.* should have been lowered already"); case Intrinsic::annotation: case Intrinsic::ptr_annotation: @@ -6818,6 +6859,17 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, Val); return; } + case Intrinsic::ptrmask: { + SDValue Ptr = getValue(I.getOperand(0)); + SDValue Const = getValue(I.getOperand(1)); + + EVT DestVT = + EVT(DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); + + setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), DestVT, Ptr, + DAG.getZExtOrTrunc(Const, getCurSDLoc(), DestVT))); + return; + } } } @@ -6845,6 +6897,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_fma: Opcode = ISD::STRICT_FMA; break; + case Intrinsic::experimental_constrained_fptosi: + Opcode = ISD::STRICT_FP_TO_SINT; + break; + case Intrinsic::experimental_constrained_fptoui: + Opcode = ISD::STRICT_FP_TO_UINT; + break; case Intrinsic::experimental_constrained_fptrunc: Opcode = ISD::STRICT_FP_ROUND; break; @@ -6881,6 +6939,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_log2: Opcode = ISD::STRICT_FLOG2; break; + case Intrinsic::experimental_constrained_lrint: + Opcode = ISD::STRICT_LRINT; + break; + case Intrinsic::experimental_constrained_llrint: + Opcode = ISD::STRICT_LLRINT; + break; case Intrinsic::experimental_constrained_rint: Opcode = ISD::STRICT_FRINT; break; @@ -6899,6 +6963,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_floor: Opcode = ISD::STRICT_FFLOOR; break; + case Intrinsic::experimental_constrained_lround: + Opcode = ISD::STRICT_LROUND; + break; + case Intrinsic::experimental_constrained_llround: + Opcode = ISD::STRICT_LLROUND; + break; case Intrinsic::experimental_constrained_round: Opcode = ISD::STRICT_FROUND; break; @@ -7102,7 +7172,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, if (SwiftErrorVal && TLI.supportSwiftError()) { // Get the last element of InVals. SDValue Src = CLI.InVals.back(); - unsigned VReg = SwiftError.getOrCreateVRegDefAt( + Register VReg = SwiftError.getOrCreateVRegDefAt( CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal); SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); DAG.setRoot(CopyNode); @@ -8021,6 +8091,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(T, SDValue()); + if (T.ConstraintType == TargetLowering::C_Immediate && + OpInfo.CallOperand && !isa(OpInfo.CallOperand)) + // We've delayed emitting a diagnostic like the "n" constraint because + // inlining could cause an integer showing up. + return emitInlineAsmError( + CS, "constraint '" + Twine(T.ConstraintCode) + "' expects an " + "integer constant expression"); + ExtraInfo.update(T); } @@ -8105,7 +8183,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { switch (OpInfo.Type) { case InlineAsm::isOutput: if (OpInfo.ConstraintType == TargetLowering::C_Memory || - (OpInfo.ConstraintType == TargetLowering::C_Other && + ((OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) && OpInfo.isIndirect)) { unsigned ConstraintID = TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); @@ -8119,13 +8198,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { MVT::i32)); AsmNodeOperands.push_back(OpInfo.CallOperand); break; - } else if ((OpInfo.ConstraintType == TargetLowering::C_Other && + } else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) && !OpInfo.isIndirect) || OpInfo.ConstraintType == TargetLowering::C_Register || OpInfo.ConstraintType == TargetLowering::C_RegisterClass) { // Otherwise, this outputs to a register (directly for C_Register / - // C_RegisterClass, and a target-defined fashion for C_Other). Find a - // register that we can use. + // C_RegisterClass, and a target-defined fashion for + // C_Immediate/C_Other). Find a register that we can use. if (OpInfo.AssignedRegs.Regs.empty()) { emitInlineAsmError( CS, "couldn't allocate output register for constraint '" + @@ -8205,15 +8285,24 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Treat indirect 'X' constraint as memory. - if (OpInfo.ConstraintType == TargetLowering::C_Other && + if ((OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) && OpInfo.isIndirect) OpInfo.ConstraintType = TargetLowering::C_Memory; - if (OpInfo.ConstraintType == TargetLowering::C_Other) { + if (OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); if (Ops.empty()) { + if (OpInfo.ConstraintType == TargetLowering::C_Immediate) + if (isa(InOperandVal)) { + emitInlineAsmError(CS, "value out of range for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; + } + emitInlineAsmError(CS, "invalid operand for inline asm constraint '" + Twine(OpInfo.ConstraintCode) + "'"); return; @@ -8250,7 +8339,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || - OpInfo.ConstraintType == TargetLowering::C_Register) && + OpInfo.ConstraintType == TargetLowering::C_Register || + OpInfo.ConstraintType == TargetLowering::C_Immediate) && "Unknown constraint type!"); // TODO: Support this. @@ -8356,6 +8446,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { Val = OpInfo.AssignedRegs.getCopyFromRegs( DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); break; + case TargetLowering::C_Immediate: case TargetLowering::C_Other: Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(), OpInfo, DAG); @@ -9018,7 +9109,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // Certain targets (such as MIPS), may have a different ABI alignment // for a type depending on the context. Give the target a chance to // specify the alignment it wants. - unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL); + const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL)); if (Args[i].Ty->isPointerTy()) { Flags.setPointer(); @@ -9073,7 +9164,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { FrameAlign = Args[i].Alignment; else FrameAlign = getByValTypeAlignment(ElementTy, DL); - Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Args[i].IsNest) Flags.setNest(); @@ -9129,7 +9220,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) { - MyFlags.Flags.setOrigAlign(1); + MyFlags.Flags.setOrigAlign(Align::None()); if (j == NumParts - 1) MyFlags.Flags.setSplitEnd(); } @@ -9259,7 +9350,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { assert((Op.getOpcode() != ISD::CopyFromReg || cast(Op.getOperand(1))->getReg() != Reg) && "Copy from a reg to the same reg!"); - assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); + assert(!Register::isPhysicalRegister(Reg) && "Is a physreg"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // If this is an InlineAsm we have to match the registers required, not the @@ -9516,8 +9607,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Certain targets (such as MIPS), may have a different ABI alignment // for a type depending on the context. Give the target a chance to // specify the alignment it wants. - unsigned OriginalAlignment = - TLI->getABIAlignmentForCallingConv(ArgTy, DL); + const Align OriginalAlignment( + TLI->getABIAlignmentForCallingConv(ArgTy, DL)); if (Arg.getType()->isPointerTy()) { Flags.setPointer(); @@ -9577,7 +9668,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { FrameAlign = Arg.getParamAlignment(); else FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); - Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Arg.hasAttribute(Attribute::Nest)) Flags.setNest(); @@ -9586,6 +9677,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setOrigAlign(OriginalAlignment); if (ArgCopyElisionCandidates.count(&Arg)) Flags.setCopyElisionCandidate(); + if (Arg.hasAttribute(Attribute::Returned)) + Flags.setReturned(); MVT RegisterVT = TLI->getRegisterTypeForCallingConv( *CurDAG->getContext(), F.getCallingConv(), VT); @@ -9598,7 +9691,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 else if (i > 0) { - MyFlags.Flags.setOrigAlign(1); + MyFlags.Flags.setOrigAlign(Align::None()); if (i == NumRegs - 1) MyFlags.Flags.setSplitEnd(); } @@ -9650,7 +9743,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); - unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); + Register SRetReg = + RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); FuncInfo->DemoteRegister = SRetReg; NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue); @@ -9748,10 +9842,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) { FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); } + // Analyses past this point are naive and don't expect an assertion. + if (Res.getOpcode() == ISD::AssertZext) + Res = Res.getOperand(0); + // Update the SwiftErrorVRegDefMap. if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) { unsigned Reg = cast(Res.getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(), Reg); } @@ -9763,7 +9861,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // FIXME: This isn't very clean... it would be nice to make this more // general. unsigned Reg = cast(Res.getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { FuncInfo->ValueMap[&Arg] = Reg; continue; } @@ -10087,8 +10185,6 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, break; } case CC_BitTests: { - // FIXME: If Fallthrough is unreachable, skip the range check. - // FIXME: Optimize away range check based on pivot comparisons. BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex]; @@ -10109,6 +10205,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, BTB->DefaultProb -= DefaultProb / 2; } + if (FallthroughUnreachable) { + // Skip the range check if the fallthrough block is unreachable. + BTB->OmitRangeCheck = true; + } + // If we're in the right place, emit the bit test header right now. if (CurMBB == SwitchMBB) { visitBitTestHeader(*BTB, SwitchMBB); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 0072e33f23b..bfcf30b430b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -426,7 +426,7 @@ public: SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol) : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag), - SL(make_unique(this, funcinfo)), FuncInfo(funcinfo), + SL(std::make_unique(this, funcinfo)), FuncInfo(funcinfo), SwiftError(swifterror) {} void init(GCFunctionInfo *gfi, AliasAnalysis *AA, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index da3049881d3..bc10f762123 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -280,6 +280,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; + case ISD::SPLAT_VECTOR: return "splat_vector"; case ISD::CARRY_FALSE: return "carry_false"; case ISD::ADDC: return "addc"; case ISD::ADDE: return "adde"; @@ -305,6 +306,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SMULFIX: return "smulfix"; case ISD::SMULFIXSAT: return "smulfixsat"; case ISD::UMULFIX: return "umulfix"; + case ISD::UMULFIXSAT: return "umulfixsat"; // Conversion operators. case ISD::SIGN_EXTEND: return "sign_extend"; @@ -318,22 +320,27 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FP_ROUND: return "fp_round"; case ISD::STRICT_FP_ROUND: return "strict_fp_round"; case ISD::FLT_ROUNDS_: return "flt_rounds"; - case ISD::FP_ROUND_INREG: return "fp_round_inreg"; case ISD::FP_EXTEND: return "fp_extend"; case ISD::STRICT_FP_EXTEND: return "strict_fp_extend"; case ISD::SINT_TO_FP: return "sint_to_fp"; case ISD::UINT_TO_FP: return "uint_to_fp"; case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::STRICT_FP_TO_UINT: return "strict_fp_to_uint"; case ISD::BITCAST: return "bitcast"; case ISD::ADDRSPACECAST: return "addrspacecast"; case ISD::FP16_TO_FP: return "fp16_to_fp"; case ISD::FP_TO_FP16: return "fp_to_fp16"; case ISD::LROUND: return "lround"; + case ISD::STRICT_LROUND: return "strict_lround"; case ISD::LLROUND: return "llround"; + case ISD::STRICT_LLROUND: return "strict_llround"; case ISD::LRINT: return "lrint"; + case ISD::STRICT_LRINT: return "strict_lrint"; case ISD::LLRINT: return "llrint"; + case ISD::STRICT_LLRINT: return "strict_llrint"; // Control flow instructions case ISD::BR: return "br"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index bdf9f2c166e..1f07a241a82 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -434,9 +435,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TII = MF->getSubtarget().getInstrInfo(); TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); - LibInfo = &getAnalysis().getTLI(); + LibInfo = &getAnalysis().getTLI(Fn); GFI = Fn.hasGC() ? &getAnalysis().getFunctionInfo(Fn) : nullptr; - ORE = make_unique(&Fn); + ORE = std::make_unique(&Fn); auto *DTWP = getAnalysisIfAvailable(); DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *LIWP = getAnalysisIfAvailable(); @@ -524,8 +525,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { To = J->second; } // Make sure the new register has a sufficiently constrained register class. - if (TargetRegisterInfo::isVirtualRegister(From) && - TargetRegisterInfo::isVirtualRegister(To)) + if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To)) MRI.constrainRegClass(To, MRI.getRegClass(From)); // Replace it. @@ -572,7 +572,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { bool hasFI = MI->getOperand(0).isFI(); Register Reg = hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) EntryMBB->insert(EntryMBB->begin(), MI); else { MachineInstr *Def = RegInfo->getVRegDef(Reg); @@ -582,7 +582,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { Def->getParent()->insert(std::next(InsertPos), MI); } else LLVM_DEBUG(dbgs() << "Dropping debug info for dead vreg" - << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); + << Register::virtReg2Index(Reg) << "\n"); } // If Reg is live-in then update debug info to track its copy in a vreg. @@ -671,8 +671,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { To = J->second; } // Make sure the new register has a sufficiently constrained register class. - if (TargetRegisterInfo::isVirtualRegister(From) && - TargetRegisterInfo::isVirtualRegister(To)) + if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To)) MRI.constrainRegClass(To, MRI.getRegClass(From)); // Replace it. @@ -760,7 +759,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { continue; unsigned DestReg = cast(N->getOperand(1))->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DestReg)) + if (!Register::isVirtualRegister(DestReg)) continue; // Ignore non-integer values. @@ -1652,9 +1651,8 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) { // Make sure that the copy dest is not a vreg when the copy source is a // physical register. - if (!OPI2->isReg() || - (!TargetRegisterInfo::isPhysicalRegister(OPI->getReg()) && - TargetRegisterInfo::isPhysicalRegister(OPI2->getReg()))) + if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) && + Register::isPhysicalRegister(OPI2->getReg()))) return false; return true; @@ -2234,9 +2232,9 @@ void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast(Op->getOperand(1)); const MDString *RegStr = dyn_cast(MD->getMD()->getOperand(0)); - unsigned Reg = + Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0), - *CurDAG); + CurDAG->getMachineFunction()); SDValue New = CurDAG->getCopyFromReg( Op->getOperand(0), dl, Reg, Op->getValueType(0)); New->setNodeId(-1); @@ -2248,9 +2246,9 @@ void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) { SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast(Op->getOperand(1)); const MDString *RegStr = dyn_cast(MD->getMD()->getOperand(0)); - unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(), + Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Op->getOperand(2).getValueType(), - *CurDAG); + CurDAG->getMachineFunction()); SDValue New = CurDAG->getCopyToReg( Op->getOperand(0), dl, Reg, Op->getOperand(2)); New->setNodeId(-1); @@ -3323,10 +3321,13 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, continue; } - case OPC_EmitCopyToReg: { + case OPC_EmitCopyToReg: + case OPC_EmitCopyToReg2: { unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg"); unsigned DestPhysReg = MatcherTable[MatcherIndex++]; + if (Opcode == OPC_EmitCopyToReg2) + DestPhysReg |= MatcherTable[MatcherIndex++] << 8; if (!InputChain.getNode()) InputChain = CurDAG->getEntryNode(); diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 395e9a8a4fc..fad98b6f50d 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -378,7 +378,6 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, // We use TargetFrameIndex so that isel will not select it into LEA Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy()); -#ifndef NDEBUG // Right now we always allocate spill slots that are of the same // size as the value we're about to spill (the size of spillee can // vary since we spill vectors of pointers too). At some point we @@ -387,12 +386,18 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() && "Bad spill: stack slot does not match!"); -#endif + // Note: Using the alignment of the spill slot (rather than the abi or + // preferred alignment) is required for correctness when dealing with spill + // slots with preferred alignments larger than frame alignment.. auto &MF = Builder.DAG.getMachineFunction(); auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index); + auto *StoreMMO = + MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, + MFI.getObjectSize(Index), + MFI.getObjectAlignment(Index)); Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc, - PtrInfo); + StoreMMO); MMO = getMachineMemOperand(MF, *cast(Loc)); @@ -1011,20 +1016,27 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { return; } - SDValue SpillSlot = - DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy()); + unsigned Index = *DerivedPtrLocation; + SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy()); // Note: We know all of these reloads are independent, but don't bother to // exploit that chain wise. DAGCombine will happily do so as needed, so // doing it here would be a small compile time win at most. SDValue Chain = getRoot(); - SDValue SpillLoad = - DAG.getLoad(DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), - Relocate.getType()), - getCurSDLoc(), Chain, SpillSlot, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), - *DerivedPtrLocation)); + auto &MF = DAG.getMachineFunction(); + auto &MFI = MF.getFrameInfo(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index); + auto *LoadMMO = + MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, + MFI.getObjectSize(Index), + MFI.getObjectAlignment(Index)); + + auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + Relocate.getType()); + + SDValue SpillLoad = DAG.getLoad(LoadVT, getCurSDLoc(), Chain, + SpillSlot, LoadMMO); DAG.setRoot(SpillLoad.getValue(1)); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b260cd91d46..9ab1324533f 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/TargetLowering.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -37,7 +36,7 @@ using namespace llvm; /// NOTE: The TargetMachine owns TLOF. TargetLowering::TargetLowering(const TargetMachine &tm) - : TargetLoweringBase(tm) {} + : TargetLoweringBase(tm) {} const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; @@ -80,7 +79,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, const CCValAssign &ArgLoc = ArgLocs[I]; if (!ArgLoc.isRegLoc()) continue; - unsigned Reg = ArgLoc.getLocReg(); + Register Reg = ArgLoc.getLocReg(); // Only look at callee saved registers. if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg)) continue; @@ -121,19 +120,25 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, /// result of type RetVT. std::pair TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, - ArrayRef Ops, bool isSigned, - const SDLoc &dl, bool doesNotReturn, - bool isReturnValueUsed, - bool isPostTypeLegalization) const { + ArrayRef Ops, + MakeLibCallOptions CallOptions, + const SDLoc &dl) const { TargetLowering::ArgListTy Args; Args.reserve(Ops.size()); TargetLowering::ArgListEntry Entry; - for (SDValue Op : Ops) { - Entry.Node = Op; + for (unsigned i = 0; i < Ops.size(); ++i) { + SDValue NewOp = Ops[i]; + Entry.Node = NewOp; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); - Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); + Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(), + CallOptions.IsSExt); + Entry.IsZExt = !Entry.IsSExt; + + if (CallOptions.IsSoften && + !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) { + Entry.IsSExt = Entry.IsZExt = false; + } Args.push_back(Entry); } @@ -144,15 +149,22 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); - bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned); + bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt); + bool zeroExtend = !signExtend; + + if (CallOptions.IsSoften && + !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) { + signExtend = zeroExtend = false; + } + CLI.setDebugLoc(dl) .setChain(DAG.getEntryNode()) .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) - .setNoReturn(doesNotReturn) - .setDiscardResult(!isReturnValueUsed) - .setIsPostTypeLegalization(isPostTypeLegalization) + .setNoReturn(CallOptions.DoesNotReturn) + .setDiscardResult(!CallOptions.IsReturnValueUsed) + .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization) .setSExtResult(signExtend) - .setZExtResult(!signExtend); + .setZExtResult(zeroExtend); return LowerCallTo(CLI); } @@ -263,7 +275,8 @@ TargetLowering::findOptimalMemOpLowering(std::vector &MemOps, void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - const SDLoc &dl) const { + const SDLoc &dl, const SDValue OldLHS, + const SDValue OldRHS) const { assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128) && "Unsupported setcc type!"); @@ -365,8 +378,11 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, // Use the target specific return value for comparions lib calls. EVT RetVT = getCmpLibcallReturnType(); SDValue Ops[2] = {NewLHS, NewRHS}; - NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/, - dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { OldLHS.getValueType(), + OldRHS.getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true); + NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first; NewRHS = DAG.getConstant(0, dl, RetVT); CCCode = getCmpLibcallCC(LC1); @@ -378,8 +394,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, ISD::SETCC, dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), NewLHS, NewRHS, DAG.getCondCode(CCCode)); - NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/, - dl).first; + NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first; NewLHS = DAG.getNode( ISD::SETCC, dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), @@ -564,6 +579,170 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, AssumeSingleUse); } +// TODO: Can we merge SelectionDAG::GetDemandedBits into this? +// TODO: Under what circumstances can we create nodes? Constant folding? +SDValue TargetLowering::SimplifyMultipleUseDemandedBits( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + SelectionDAG &DAG, unsigned Depth) const { + // Limit search depth. + if (Depth >= SelectionDAG::MaxRecursionDepth) + return SDValue(); + + // Ignore UNDEFs. + if (Op.isUndef()) + return SDValue(); + + // Not demanding any bits/elts from Op. + if (DemandedBits == 0 || DemandedElts == 0) + return DAG.getUNDEF(Op.getValueType()); + + unsigned NumElts = DemandedElts.getBitWidth(); + KnownBits LHSKnown, RHSKnown; + switch (Op.getOpcode()) { + case ISD::BITCAST: { + SDValue Src = peekThroughBitcasts(Op.getOperand(0)); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Op.getValueType(); + unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits(); + unsigned NumDstEltBits = DstVT.getScalarSizeInBits(); + + if (NumSrcEltBits == NumDstEltBits) + if (SDValue V = SimplifyMultipleUseDemandedBits( + Src, DemandedBits, DemandedElts, DAG, Depth + 1)) + return DAG.getBitcast(DstVT, V); + + // TODO - bigendian once we have test coverage. + if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 && + DAG.getDataLayout().isLittleEndian()) { + unsigned Scale = NumDstEltBits / NumSrcEltBits; + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + for (unsigned i = 0; i != Scale; ++i) { + unsigned Offset = i * NumSrcEltBits; + APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); + if (!Sub.isNullValue()) { + DemandedSrcBits |= Sub; + for (unsigned j = 0; j != NumElts; ++j) + if (DemandedElts[j]) + DemandedSrcElts.setBit((j * Scale) + i); + } + } + + if (SDValue V = SimplifyMultipleUseDemandedBits( + Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1)) + return DAG.getBitcast(DstVT, V); + } + + // TODO - bigendian once we have test coverage. + if ((NumSrcEltBits % NumDstEltBits) == 0 && + DAG.getDataLayout().isLittleEndian()) { + unsigned Scale = NumSrcEltBits / NumDstEltBits; + unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) { + unsigned Offset = (i % Scale) * NumDstEltBits; + DemandedSrcBits.insertBits(DemandedBits, Offset); + DemandedSrcElts.setBit(i / Scale); + } + + if (SDValue V = SimplifyMultipleUseDemandedBits( + Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1)) + return DAG.getBitcast(DstVT, V); + } + + break; + } + case ISD::AND: { + LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + + // If all of the demanded bits are known 1 on one side, return the other. + // These bits cannot contribute to the result of the 'and' in this + // context. + if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One)) + return Op.getOperand(0); + if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One)) + return Op.getOperand(1); + break; + } + case ISD::OR: { + LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + + // If all of the demanded bits are known zero on one side, return the + // other. These bits cannot contribute to the result of the 'or' in this + // context. + if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero)) + return Op.getOperand(0); + if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero)) + return Op.getOperand(1); + break; + } + case ISD::XOR: { + LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + + // If all of the demanded bits are known zero on one side, return the + // other. + if (DemandedBits.isSubsetOf(RHSKnown.Zero)) + return Op.getOperand(0); + if (DemandedBits.isSubsetOf(LHSKnown.Zero)) + return Op.getOperand(1); + break; + } + case ISD::SIGN_EXTEND_INREG: { + // If none of the extended bits are demanded, eliminate the sextinreg. + EVT ExVT = cast(Op.getOperand(1))->getVT(); + if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits()) + return Op.getOperand(0); + break; + } + case ISD::INSERT_VECTOR_ELT: { + // If we don't demand the inserted element, return the base vector. + SDValue Vec = Op.getOperand(0); + auto *CIdx = dyn_cast(Op.getOperand(2)); + EVT VecVT = Vec.getValueType(); + if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) && + !DemandedElts[CIdx->getZExtValue()]) + return Vec; + break; + } + case ISD::VECTOR_SHUFFLE: { + ArrayRef ShuffleMask = cast(Op)->getMask(); + + // If all the demanded elts are from one operand and are inline, + // then we can use the operand directly. + bool AllUndef = true, IdentityLHS = true, IdentityRHS = true; + for (unsigned i = 0; i != NumElts; ++i) { + int M = ShuffleMask[i]; + if (M < 0 || !DemandedElts[i]) + continue; + AllUndef = false; + IdentityLHS &= (M == (int)i); + IdentityRHS &= ((M - NumElts) == i); + } + + if (AllUndef) + return DAG.getUNDEF(Op.getValueType()); + if (IdentityLHS) + return Op.getOperand(0); + if (IdentityRHS) + return Op.getOperand(1); + break; + } + default: + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) + if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode( + Op, DemandedBits, DemandedElts, DAG, Depth)) + return V; + break; + } + return SDValue(); +} + /// Look at Op. At this point, we know that only the OriginalDemandedBits of the /// result of Op are ever used downstream. If we can use this information to /// simplify Op, create a new simplified DAG node and return true, returning the @@ -619,12 +798,15 @@ bool TargetLowering::SimplifyDemandedBits( } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) { // Not demanding any bits/elts from Op. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); - } else if (Depth == 6) { // Limit search depth. + } else if (Depth >= SelectionDAG::MaxRecursionDepth) { + // Limit search depth. return false; } KnownBits Known2, KnownOut; switch (Op.getOpcode()) { + case ISD::TargetConstant: + llvm_unreachable("Can't simplify this node"); case ISD::SCALAR_TO_VECTOR: { if (!DemandedElts[0]) return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); @@ -728,6 +910,21 @@ bool TargetLowering::SimplifyDemandedBits( } break; } + case ISD::EXTRACT_SUBVECTOR: { + // If index isn't constant, assume we need all the source vector elements. + SDValue Src = Op.getOperand(0); + ConstantSDNode *SubIdx = dyn_cast(Op.getOperand(1)); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt SrcElts = APInt::getAllOnesValue(NumSrcElts); + if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { + // Offset the demanded elts by the subvector index. + uint64_t Idx = SubIdx->getZExtValue(); + SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + } + if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1)) + return true; + break; + } case ISD::CONCAT_VECTORS: { Known.Zero.setAllBits(); Known.One.setAllBits(); @@ -773,22 +970,37 @@ bool TargetLowering::SimplifyDemandedBits( } if (!!DemandedLHS || !!DemandedRHS) { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + Known.Zero.setAllBits(); Known.One.setAllBits(); if (!!DemandedLHS) { - if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits, DemandedLHS, - Known2, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO, + Depth + 1)) return true; Known.One &= Known2.One; Known.Zero &= Known2.Zero; } if (!!DemandedRHS) { - if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedRHS, - Known2, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO, + Depth + 1)) return true; Known.One &= Known2.One; Known.Zero &= Known2.Zero; } + + // Attempt to avoid multi-use ops if we don't need anything from them. + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask); + return TLO.CombineTo(Op, NewOp); + } } break; } @@ -834,6 +1046,20 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + // If all of the demanded bits are known one on one side, return the other. // These bits cannot contribute to the result of the 'and'. if (DemandedBits.isSubsetOf(Known2.Zero | Known.One)) @@ -869,6 +1095,20 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. if (DemandedBits.isSubsetOf(Known2.One | Known.Zero)) @@ -901,6 +1141,20 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. if (DemandedBits.isSubsetOf(Known.Zero)) @@ -1034,7 +1288,7 @@ bool TargetLowering::SimplifyDemandedBits( // out) are never demanded. // TODO - support non-uniform vector amounts. if (Op0.getOpcode() == ISD::SRL) { - if ((DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { + if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) { if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) { if (SA2->getAPIntValue().ult(BitWidth)) { @@ -1141,7 +1395,8 @@ bool TargetLowering::SimplifyDemandedBits( if (Op0.getOpcode() == ISD::SHL) { if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) { - if ((DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) { + if (!DemandedBits.intersects( + APInt::getHighBitsSet(BitWidth, ShAmt))) { if (SA2->getAPIntValue().ult(BitWidth)) { unsigned C1 = SA2->getZExtValue(); unsigned Opc = ISD::SRL; @@ -1479,6 +1734,11 @@ bool TargetLowering::SimplifyDemandedBits( return true; Known = Known.trunc(BitWidth); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( + Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1)) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc)); + // If the input is only used by this truncate, see if we can shrink it based // on the known demanded bits. if (Src.getNode()->hasOneUse()) { @@ -1595,9 +1855,7 @@ bool TargetLowering::SimplifyDemandedBits( // Bitcast from a vector using SimplifyDemanded Bits/VectorElts. // Demand the elt/bit if any of the original elts/bits are demanded. // TODO - bigendian once we have test coverage. - // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support. - if (SrcVT.isVector() && NumSrcEltBits > 1 && - (BitWidth % NumSrcEltBits) == 0 && + if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 && TLO.DAG.getDataLayout().isLittleEndian()) { unsigned Scale = BitWidth / NumSrcEltBits; unsigned NumSrcElts = SrcVT.getVectorNumElements(); @@ -1663,6 +1921,7 @@ bool TargetLowering::SimplifyDemandedBits( // Add, Sub, and Mul don't demand any bits in positions beyond that // of the highest bit demanded of them. SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); + SDNodeFlags Flags = Op.getNode()->getFlags(); unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros(); APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ); if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO, @@ -1671,7 +1930,6 @@ bool TargetLowering::SimplifyDemandedBits( Depth + 1) || // See if the operation should be performed at a smaller bit width. ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) { - SDNodeFlags Flags = Op.getNode()->getFlags(); if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) { // Disable the nsw and nuw flags. We can no longer guarantee that we // won't wrap after simplification. @@ -1684,6 +1942,23 @@ bool TargetLowering::SimplifyDemandedBits( return true; } + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Flags.setNoSignedWrap(false); + Flags.setNoUnsignedWrap(false); + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = + TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags); + return TLO.CombineTo(Op, NewOp); + } + } + // If we have a constant operand, we may be able to turn it into -1 if we // do not demand the high bits. This can make the constant smaller to // encode, allow more general folding, or match specialized instruction @@ -1694,10 +1969,8 @@ bool TargetLowering::SimplifyDemandedBits( if (C && !C->isAllOnesValue() && !C->isOne() && (C->getAPIntValue() | HighMask).isAllOnesValue()) { SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT); - // We can't guarantee that the new math op doesn't wrap, so explicitly - // clear those flags to prevent folding with a potential existing node - // that has those flags set. - SDNodeFlags Flags; + // Disable the nsw and nuw flags. We can no longer guarantee that we + // won't wrap after simplification. Flags.setNoSignedWrap(false); Flags.setNoUnsignedWrap(false); SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags); @@ -1837,7 +2110,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( } // Limit search depth. - if (Depth >= 6) + if (Depth >= SelectionDAG::MaxRecursionDepth) return false; SDLoc DL(Op); @@ -2001,6 +2274,15 @@ bool TargetLowering::SimplifyDemandedVectorElts( return true; APInt BaseElts = DemandedElts; BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx); + + // If none of the base operand elements are demanded, replace it with undef. + if (!BaseElts && !Base.isUndef()) + return TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, + TLO.DAG.getUNDEF(VT), + Op.getOperand(1), + Op.getOperand(2))); + if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO, Depth + 1)) return true; @@ -2134,11 +2416,13 @@ bool TargetLowering::SimplifyDemandedVectorElts( // Update legal shuffle masks based on demanded elements if it won't reduce // to Identity which can cause premature removal of the shuffle mask. - if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps && - isShuffleMaskLegal(NewMask, VT)) - return TLO.CombineTo(Op, - TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0), - Op.getOperand(1), NewMask)); + if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) { + SDValue LegalShuffle = + buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1), + NewMask, TLO.DAG); + if (LegalShuffle) + return TLO.CombineTo(Op, LegalShuffle); + } // Propagate undef/zero elements from LHS/RHS. for (unsigned i = 0; i != NumElts; ++i) { @@ -2304,6 +2588,13 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.resetAll(); } +void TargetLowering::computeKnownBitsForTargetInstr( + GISelKnownBits &Analysis, Register R, KnownBits &Known, + const APInt &DemandedElts, const MachineRegisterInfo &MRI, + unsigned Depth) const { + Known.resetAll(); +} + void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, @@ -2357,6 +2648,36 @@ bool TargetLowering::SimplifyDemandedBitsForTargetNode( return false; } +SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + SelectionDAG &DAG, unsigned Depth) const { + assert( + (Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op" + " is a target node!"); + return SDValue(); +} + +SDValue +TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, + SDValue N1, MutableArrayRef Mask, + SelectionDAG &DAG) const { + bool LegalMask = isShuffleMaskLegal(Mask, VT); + if (!LegalMask) { + std::swap(N0, N1); + ShuffleVectorSDNode::commuteMask(Mask); + LegalMask = isShuffleMaskLegal(Mask, VT); + } + + if (!LegalMask) + return SDValue(); + + return DAG.getVectorShuffle(VT, DL, N0, N1, Mask); +} + const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const { return nullptr; } @@ -2610,6 +2931,77 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck( return T2; } +// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <> Y) & C) ==/!= 0 +SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift( + EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL) const { + assert(isConstOrConstSplat(N1C) && + isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() && + "Should be a comparison with 0."); + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + "Valid only for [in]equality comparisons."); + + unsigned NewShiftOpcode; + SDValue X, C, Y; + + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Look for '(C l>>/<< Y)'. + auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) { + // The shift should be one-use. + if (!V.hasOneUse()) + return false; + unsigned OldShiftOpcode = V.getOpcode(); + switch (OldShiftOpcode) { + case ISD::SHL: + NewShiftOpcode = ISD::SRL; + break; + case ISD::SRL: + NewShiftOpcode = ISD::SHL; + break; + default: + return false; // must be a logical shift. + } + // We should be shifting a constant. + // FIXME: best to use isConstantOrConstantVector(). + C = V.getOperand(0); + ConstantSDNode *CC = + isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true); + if (!CC) + return false; + Y = V.getOperand(1); + + ConstantSDNode *XC = + isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true); + return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG); + }; + + // LHS of comparison should be an one-use 'and'. + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) + return SDValue(); + + X = N0.getOperand(0); + SDValue Mask = N0.getOperand(1); + + // 'and' is commutative! + if (!Match(Mask)) { + std::swap(X, Mask); + if (!Match(Mask)) + return SDValue(); + } + + EVT VT = X.getValueType(); + + // Produce: + // ((X 'OppositeShiftOpcode' Y) & C) Cond 0 + SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y); + SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C); + SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond); + return T2; +} + /// Try to fold an equality comparison with a {add/sub/xor} binary operation as /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to /// handle the commuted versions of these patterns. @@ -2726,9 +3118,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // (ctpop x) u< 2 -> (x & x-1) == 0 // (ctpop x) u> 1 -> (x & x-1) != 0 if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){ - SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp, - DAG.getConstant(1, dl, CTVT)); - SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub); + SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); + SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); + SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC); } @@ -2852,7 +3244,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, LoadSDNode *Lod = cast(N0.getOperand(0)); APInt bestMask; unsigned bestWidth = 0, bestOffset = 0; - if (!Lod->isVolatile() && Lod->isUnindexed()) { + if (Lod->isSimple() && Lod->isUnindexed()) { unsigned origWidth = N0.getValueSizeInBits(); unsigned maskWidth = origWidth; // We can narrow (e.g.) 16-bit extending loads on 32-bit target to @@ -3178,6 +3570,14 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } + if (Cond == ISD::SETEQ || Cond == ISD::SETNE) { + // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <> Y) & C) ==/!= 0 + if (C1.isNullValue()) + if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift( + VT, N0, N1, Cond, DCI, dl)) + return CC; + } + // If we have "setcc X, C0", check to see if we can shrink the immediate // by changing cc. // TODO: Support this for vectors after legalize ops. @@ -3203,33 +3603,35 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Back to non-vector simplifications. // TODO: Can we do these for vector splats? if (auto *N1C = dyn_cast(N1.getNode())) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const APInt &C1 = N1C->getAPIntValue(); + EVT ShValTy = N0.getValueType(); // Fold bit comparisons when we can. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && - (VT == N0.getValueType() || - (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) && + (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) && N0.getOpcode() == ISD::AND) { auto &DL = DAG.getDataLayout(); if (auto *AndRHS = dyn_cast(N0.getOperand(1))) { - EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, - !DCI.isBeforeLegalize()); + EVT ShiftTy = getShiftAmountTy(ShValTy, DL, !DCI.isBeforeLegalize()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. - if (AndRHS->getAPIntValue().isPowerOf2()) { + unsigned ShCt = AndRHS->getAPIntValue().logBase2(); + if (AndRHS->getAPIntValue().isPowerOf2() && + ShCt <= TLI.getShiftAmountThreshold(ShValTy)) { return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, - DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl, - ShiftTy))); + DAG.getNode(ISD::SRL, dl, ShValTy, N0, + DAG.getConstant(ShCt, dl, ShiftTy))); } } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) { // (X & 8) == 8 --> (X & 8) >> 3 // Perform the xform if C1 is a single bit. - if (C1.isPowerOf2()) { + unsigned ShCt = C1.logBase2(); + if (C1.isPowerOf2() && + ShCt <= TLI.getShiftAmountThreshold(ShValTy)) { return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, - DAG.getConstant(C1.logBase2(), dl, - ShiftTy))); + DAG.getNode(ISD::SRL, dl, ShValTy, N0, + DAG.getConstant(ShCt, dl, ShiftTy))); } } } @@ -3452,15 +3854,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } // Fold remainder of division by a constant. - if (N0.getOpcode() == ISD::UREM && N0.hasOneUse() && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) && + N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); // When division is cheap or optimizing for minimum size, // fall through to DIVREM creation by skipping this fold. - if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) - if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl)) - return Folded; + if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) { + if (N0.getOpcode() == ISD::UREM) { + if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl)) + return Folded; + } else if (N0.getOpcode() == ISD::SREM) { + if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl)) + return Folded; + } + } } // Fold away ALL boolean setcc's. @@ -3567,15 +3975,17 @@ TargetLowering::getConstraintType(StringRef Constraint) const { if (S == 1) { switch (Constraint[0]) { default: break; - case 'r': return C_RegisterClass; + case 'r': + return C_RegisterClass; case 'm': // memory case 'o': // offsetable case 'V': // not offsetable return C_Memory; - case 'i': // Simple Integer or Relocatable Constant case 'n': // Simple Integer case 'E': // Floating Point Constant case 'F': // Floating Point Constant + return C_Immediate; + case 'i': // Simple Integer or Relocatable Constant case 's': // Relocatable Constant case 'p': // Address. case 'X': // Allow ANY value. @@ -3950,6 +4360,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL, /// Return an integer indicating how general CT is. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { switch (CT) { + case TargetLowering::C_Immediate: case TargetLowering::C_Other: case TargetLowering::C_Unknown: return 0; @@ -4069,11 +4480,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[i]); - // If this is an 'other' constraint, see if the operand is valid for it. - // For example, on X86 we might have an 'rI' constraint. If the operand - // is an integer in the range [0..31] we want to use I (saving a load - // of a register), otherwise we must use 'r'. - if (CType == TargetLowering::C_Other && Op.getNode()) { + // If this is an 'other' or 'immediate' constraint, see if the operand is + // valid for it. For example, on X86 we might have an 'rI' constraint. If + // the operand is an integer in the range [0..31] we want to use I (saving a + // load of a register), otherwise we must use 'r'. + if ((CType == TargetLowering::C_Other || + CType == TargetLowering::C_Immediate) && Op.getNode()) { assert(OpInfo.Codes[i].size() == 1 && "Unhandled multi-letter 'other' constraint"); std::vector ResultOps; @@ -4455,6 +4867,34 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, return DAG.getSelect(dl, VT, IsOne, N0, Q); } +/// If all values in Values that *don't* match the predicate are same 'splat' +/// value, then replace all values with that splat value. +/// Else, if AlternativeReplacement was provided, then replace all values that +/// do match predicate with AlternativeReplacement value. +static void +turnVectorIntoSplatVector(MutableArrayRef Values, + std::function Predicate, + SDValue AlternativeReplacement = SDValue()) { + SDValue Replacement; + // Is there a value for which the Predicate does *NOT* match? What is it? + auto SplatValue = llvm::find_if_not(Values, Predicate); + if (SplatValue != Values.end()) { + // Does Values consist only of SplatValue's and values matching Predicate? + if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) { + return Value == *SplatValue || Predicate(Value); + })) // Then we shall replace values matching predicate with SplatValue. + Replacement = *SplatValue; + } + if (!Replacement) { + // Oops, we did not find the "baseline" splat value. + if (!AlternativeReplacement) + return; // Nothing to do. + // Let's replace with provided value then. + Replacement = AlternativeReplacement; + } + std::replace_if(Values.begin(), Values.end(), Predicate, Replacement); +} + /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE /// where the divisor is constant and the comparison target is zero, /// return a DAG expression that will generate the same comparison result @@ -4482,77 +4922,409 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, DAGCombinerInfo &DCI, const SDLoc &DL, SmallVectorImpl &Created) const { // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q) - // - D must be constant with D = D0 * 2^K where D0 is odd and D0 != 1 + // - D must be constant, with D = D0 * 2^K where D0 is odd // - P is the multiplicative inverse of D0 modulo 2^W - // - Q = floor((2^W - 1) / D0) + // - Q = floor(((2^W) - 1) / D) // where W is the width of the common type of N and D. assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Only applicable for (in)equality comparisons."); + SelectionDAG &DAG = DCI.DAG; + EVT VT = REMNode.getValueType(); + EVT SVT = VT.getScalarType(); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShSVT = ShVT.getScalarType(); // If MUL is unavailable, we cannot proceed in any case. if (!isOperationLegalOrCustom(ISD::MUL, VT)) return SDValue(); - // TODO: Add non-uniform constant support. - ConstantSDNode *Divisor = isConstOrConstSplat(REMNode->getOperand(1)); + // TODO: Could support comparing with non-zero too. ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); - if (!Divisor || !CompTarget || Divisor->isNullValue() || - !CompTarget->isNullValue()) + if (!CompTarget || !CompTarget->isNullValue()) return SDValue(); - const APInt &D = Divisor->getAPIntValue(); + bool HadOneDivisor = false; + bool AllDivisorsAreOnes = true; + bool HadEvenDivisor = false; + bool AllDivisorsArePowerOfTwo = true; + SmallVector PAmts, KAmts, QAmts; - // Decompose D into D0 * 2^K - unsigned K = D.countTrailingZeros(); - bool DivisorIsEven = (K != 0); - APInt D0 = D.lshr(K); + auto BuildUREMPattern = [&](ConstantSDNode *C) { + // Division by 0 is UB. Leave it to be constant-folded elsewhere. + if (C->isNullValue()) + return false; - // The fold is invalid when D0 == 1. - // This is reachable because visitSetCC happens before visitREM. - if (D0.isOneValue()) + const APInt &D = C->getAPIntValue(); + // If all divisors are ones, we will prefer to avoid the fold. + HadOneDivisor |= D.isOneValue(); + AllDivisorsAreOnes &= D.isOneValue(); + + // Decompose D into D0 * 2^K + unsigned K = D.countTrailingZeros(); + assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate."); + APInt D0 = D.lshr(K); + + // D is even if it has trailing zeros. + HadEvenDivisor |= (K != 0); + // D is a power-of-two if D0 is one. + // If all divisors are power-of-two, we will prefer to avoid the fold. + AllDivisorsArePowerOfTwo &= D0.isOneValue(); + + // P = inv(D0, 2^W) + // 2^W requires W + 1 bits, so we have to extend and then truncate. + unsigned W = D.getBitWidth(); + APInt P = D0.zext(W + 1) + .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) + .trunc(W); + assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable + assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + + // Q = floor((2^W - 1) / D) + APInt Q = APInt::getAllOnesValue(W).udiv(D); + + assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && + "We are expecting that K is always less than all-ones for ShSVT"); + + // If the divisor is 1 the result can be constant-folded. + if (D.isOneValue()) { + // Set P and K amount to a bogus values so we can try to splat them. + P = 0; + K = -1; + assert(Q.isAllOnesValue() && + "Expecting all-ones comparison for one divisor"); + } + + PAmts.push_back(DAG.getConstant(P, DL, SVT)); + KAmts.push_back( + DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT)); + QAmts.push_back(DAG.getConstant(Q, DL, SVT)); + return true; + }; + + SDValue N = REMNode.getOperand(0); + SDValue D = REMNode.getOperand(1); + + // Collect the values from each element. + if (!ISD::matchUnaryPredicate(D, BuildUREMPattern)) return SDValue(); - // P = inv(D0, 2^W) - // 2^W requires W + 1 bits, so we have to extend and then truncate. - unsigned W = D.getBitWidth(); - APInt P = D0.zext(W + 1) - .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) - .trunc(W); - assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable - assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + // If this is a urem by a one, avoid the fold since it can be constant-folded. + if (AllDivisorsAreOnes) + return SDValue(); - // Q = floor((2^W - 1) / D) - APInt Q = APInt::getAllOnesValue(W).udiv(D); + // If this is a urem by a powers-of-two, avoid the fold since it can be + // best implemented as a bit test. + if (AllDivisorsArePowerOfTwo) + return SDValue(); - SelectionDAG &DAG = DCI.DAG; + SDValue PVal, KVal, QVal; + if (VT.isVector()) { + if (HadOneDivisor) { + // Try to turn PAmts into a splat, since we don't care about the values + // that are currently '0'. If we can't, just keep '0'`s. + turnVectorIntoSplatVector(PAmts, isNullConstant); + // Try to turn KAmts into a splat, since we don't care about the values + // that are currently '-1'. If we can't, change them to '0'`s. + turnVectorIntoSplatVector(KAmts, isAllOnesConstant, + DAG.getConstant(0, DL, ShSVT)); + } + + PVal = DAG.getBuildVector(VT, DL, PAmts); + KVal = DAG.getBuildVector(ShVT, DL, KAmts); + QVal = DAG.getBuildVector(VT, DL, QAmts); + } else { + PVal = PAmts[0]; + KVal = KAmts[0]; + QVal = QAmts[0]; + } - SDValue PVal = DAG.getConstant(P, DL, VT); - SDValue QVal = DAG.getConstant(Q, DL, VT); // (mul N, P) - SDValue Op1 = DAG.getNode(ISD::MUL, DL, VT, REMNode->getOperand(0), PVal); - Created.push_back(Op1.getNode()); + SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal); + Created.push_back(Op0.getNode()); - // Rotate right only if D was even. - if (DivisorIsEven) { + // Rotate right only if any divisor was even. We avoid rotates for all-odd + // divisors as a performance improvement, since rotating by 0 is a no-op. + if (HadEvenDivisor) { // We need ROTR to do this. if (!isOperationLegalOrCustom(ISD::ROTR, VT)) return SDValue(); - SDValue ShAmt = - DAG.getConstant(K, DL, getShiftAmountTy(VT, DAG.getDataLayout())); SDNodeFlags Flags; Flags.setExact(true); // UREM: (rotr (mul N, P), K) - Op1 = DAG.getNode(ISD::ROTR, DL, VT, Op1, ShAmt, Flags); - Created.push_back(Op1.getNode()); + Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags); + Created.push_back(Op0.getNode()); } // UREM: (setule/setugt (rotr (mul N, P), K), Q) - return DAG.getSetCC(DL, SETCCVT, Op1, QVal, + return DAG.getSetCC(DL, SETCCVT, Op0, QVal, ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); } +/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE +/// where the divisor is constant and the comparison target is zero, +/// return a DAG expression that will generate the same comparison result +/// using only multiplications, additions and shifts/rotations. +/// Ref: "Hacker's Delight" 10-17. +SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode, + SDValue CompTargetNode, + ISD::CondCode Cond, + DAGCombinerInfo &DCI, + const SDLoc &DL) const { + SmallVector Built; + if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond, + DCI, DL, Built)) { + assert(Built.size() <= 7 && "Max size prediction failed."); + for (SDNode *N : Built) + DCI.AddToWorklist(N); + return Folded; + } + + return SDValue(); +} + +SDValue +TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, + SDValue CompTargetNode, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL, + SmallVectorImpl &Created) const { + // Fold: + // (seteq/ne (srem N, D), 0) + // To: + // (setule/ugt (rotr (add (mul N, P), A), K), Q) + // + // - D must be constant, with D = D0 * 2^K where D0 is odd + // - P is the multiplicative inverse of D0 modulo 2^W + // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k))) + // - Q = floor((2 * A) / (2^K)) + // where W is the width of the common type of N and D. + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + "Only applicable for (in)equality comparisons."); + + SelectionDAG &DAG = DCI.DAG; + + EVT VT = REMNode.getValueType(); + EVT SVT = VT.getScalarType(); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShSVT = ShVT.getScalarType(); + + // If MUL is unavailable, we cannot proceed in any case. + if (!isOperationLegalOrCustom(ISD::MUL, VT)) + return SDValue(); + + // TODO: Could support comparing with non-zero too. + ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); + if (!CompTarget || !CompTarget->isNullValue()) + return SDValue(); + + bool HadIntMinDivisor = false; + bool HadOneDivisor = false; + bool AllDivisorsAreOnes = true; + bool HadEvenDivisor = false; + bool NeedToApplyOffset = false; + bool AllDivisorsArePowerOfTwo = true; + SmallVector PAmts, AAmts, KAmts, QAmts; + + auto BuildSREMPattern = [&](ConstantSDNode *C) { + // Division by 0 is UB. Leave it to be constant-folded elsewhere. + if (C->isNullValue()) + return false; + + // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine. + + // WARNING: this fold is only valid for positive divisors! + APInt D = C->getAPIntValue(); + if (D.isNegative()) + D.negate(); // `rem %X, -C` is equivalent to `rem %X, C` + + HadIntMinDivisor |= D.isMinSignedValue(); + + // If all divisors are ones, we will prefer to avoid the fold. + HadOneDivisor |= D.isOneValue(); + AllDivisorsAreOnes &= D.isOneValue(); + + // Decompose D into D0 * 2^K + unsigned K = D.countTrailingZeros(); + assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate."); + APInt D0 = D.lshr(K); + + if (!D.isMinSignedValue()) { + // D is even if it has trailing zeros; unless it's INT_MIN, in which case + // we don't care about this lane in this fold, we'll special-handle it. + HadEvenDivisor |= (K != 0); + } + + // D is a power-of-two if D0 is one. This includes INT_MIN. + // If all divisors are power-of-two, we will prefer to avoid the fold. + AllDivisorsArePowerOfTwo &= D0.isOneValue(); + + // P = inv(D0, 2^W) + // 2^W requires W + 1 bits, so we have to extend and then truncate. + unsigned W = D.getBitWidth(); + APInt P = D0.zext(W + 1) + .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) + .trunc(W); + assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable + assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + + // A = floor((2^(W - 1) - 1) / D0) & -2^K + APInt A = APInt::getSignedMaxValue(W).udiv(D0); + A.clearLowBits(K); + + if (!D.isMinSignedValue()) { + // If divisor INT_MIN, then we don't care about this lane in this fold, + // we'll special-handle it. + NeedToApplyOffset |= A != 0; + } + + // Q = floor((2 * A) / (2^K)) + APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K)); + + assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) && + "We are expecting that A is always less than all-ones for SVT"); + assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && + "We are expecting that K is always less than all-ones for ShSVT"); + + // If the divisor is 1 the result can be constant-folded. Likewise, we + // don't care about INT_MIN lanes, those can be set to undef if appropriate. + if (D.isOneValue()) { + // Set P, A and K to a bogus values so we can try to splat them. + P = 0; + A = -1; + K = -1; + + // x ?% 1 == 0 <--> true <--> x u<= -1 + Q = -1; + } + + PAmts.push_back(DAG.getConstant(P, DL, SVT)); + AAmts.push_back(DAG.getConstant(A, DL, SVT)); + KAmts.push_back( + DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT)); + QAmts.push_back(DAG.getConstant(Q, DL, SVT)); + return true; + }; + + SDValue N = REMNode.getOperand(0); + SDValue D = REMNode.getOperand(1); + + // Collect the values from each element. + if (!ISD::matchUnaryPredicate(D, BuildSREMPattern)) + return SDValue(); + + // If this is a srem by a one, avoid the fold since it can be constant-folded. + if (AllDivisorsAreOnes) + return SDValue(); + + // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold + // since it can be best implemented as a bit test. + if (AllDivisorsArePowerOfTwo) + return SDValue(); + + SDValue PVal, AVal, KVal, QVal; + if (VT.isVector()) { + if (HadOneDivisor) { + // Try to turn PAmts into a splat, since we don't care about the values + // that are currently '0'. If we can't, just keep '0'`s. + turnVectorIntoSplatVector(PAmts, isNullConstant); + // Try to turn AAmts into a splat, since we don't care about the + // values that are currently '-1'. If we can't, change them to '0'`s. + turnVectorIntoSplatVector(AAmts, isAllOnesConstant, + DAG.getConstant(0, DL, SVT)); + // Try to turn KAmts into a splat, since we don't care about the values + // that are currently '-1'. If we can't, change them to '0'`s. + turnVectorIntoSplatVector(KAmts, isAllOnesConstant, + DAG.getConstant(0, DL, ShSVT)); + } + + PVal = DAG.getBuildVector(VT, DL, PAmts); + AVal = DAG.getBuildVector(VT, DL, AAmts); + KVal = DAG.getBuildVector(ShVT, DL, KAmts); + QVal = DAG.getBuildVector(VT, DL, QAmts); + } else { + PVal = PAmts[0]; + AVal = AAmts[0]; + KVal = KAmts[0]; + QVal = QAmts[0]; + } + + // (mul N, P) + SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal); + Created.push_back(Op0.getNode()); + + if (NeedToApplyOffset) { + // We need ADD to do this. + if (!isOperationLegalOrCustom(ISD::ADD, VT)) + return SDValue(); + + // (add (mul N, P), A) + Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal); + Created.push_back(Op0.getNode()); + } + + // Rotate right only if any divisor was even. We avoid rotates for all-odd + // divisors as a performance improvement, since rotating by 0 is a no-op. + if (HadEvenDivisor) { + // We need ROTR to do this. + if (!isOperationLegalOrCustom(ISD::ROTR, VT)) + return SDValue(); + SDNodeFlags Flags; + Flags.setExact(true); + // SREM: (rotr (add (mul N, P), A), K) + Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags); + Created.push_back(Op0.getNode()); + } + + // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q) + SDValue Fold = + DAG.getSetCC(DL, SETCCVT, Op0, QVal, + ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); + + // If we didn't have lanes with INT_MIN divisor, then we're done. + if (!HadIntMinDivisor) + return Fold; + + // That fold is only valid for positive divisors. Which effectively means, + // it is invalid for INT_MIN divisors. So if we have such a lane, + // we must fix-up results for said lanes. + assert(VT.isVector() && "Can/should only get here for vectors."); + + if (!isOperationLegalOrCustom(ISD::SETEQ, VT) || + !isOperationLegalOrCustom(ISD::AND, VT) || + !isOperationLegalOrCustom(Cond, VT) || + !isOperationLegalOrCustom(ISD::VSELECT, VT)) + return SDValue(); + + Created.push_back(Fold.getNode()); + + SDValue IntMin = DAG.getConstant( + APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT); + SDValue IntMax = DAG.getConstant( + APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT); + SDValue Zero = + DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT); + + // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded. + SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ); + Created.push_back(DivisorIsIntMin.getNode()); + + // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0 + SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax); + Created.push_back(Masked.getNode()); + SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond); + Created.push_back(MaskedIsZero.getNode()); + + // To produce final result we need to blend 2 vectors: 'SetCC' and + // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick + // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is + // constant-folded, select can get lowered to a shuffle with constant mask. + SDValue Blended = + DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold); + + return Blended; +} + bool TargetLowering:: verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { if (!isa(Op.getOperand(0))) { @@ -4564,6 +5336,246 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { return false; } +char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, bool ForCodeSize, + unsigned Depth) const { + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) + return 2; + + // Don't allow anything with multiple uses unless we know it is free. + EVT VT = Op.getValueType(); + const SDNodeFlags Flags = Op->getFlags(); + const TargetOptions &Options = DAG.getTarget().Options; + if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND && + isFPExtFree(VT, Op.getOperand(0).getValueType()))) + return 0; + + // Don't recurse exponentially. + if (Depth > SelectionDAG::MaxRecursionDepth) + return 0; + + switch (Op.getOpcode()) { + case ISD::ConstantFP: { + if (!LegalOperations) + return 1; + + // Don't invert constant FP values after legalization unless the target says + // the negated constant is legal. + return isOperationLegal(ISD::ConstantFP, VT) || + isFPImmLegal(neg(cast(Op)->getValueAPF()), VT, + ForCodeSize); + } + case ISD::BUILD_VECTOR: { + // Only permit BUILD_VECTOR of constants. + if (llvm::any_of(Op->op_values(), [&](SDValue N) { + return !N.isUndef() && !isa(N); + })) + return 0; + if (!LegalOperations) + return 1; + if (isOperationLegal(ISD::ConstantFP, VT) && + isOperationLegal(ISD::BUILD_VECTOR, VT)) + return 1; + return llvm::all_of(Op->op_values(), [&](SDValue N) { + return N.isUndef() || + isFPImmLegal(neg(cast(N)->getValueAPF()), VT, + ForCodeSize); + }); + } + case ISD::FADD: + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + return 0; + + // After operation legalization, it might not be legal to create new FSUBs. + if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT)) + return 0; + + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) + if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1)) + return V; + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + case ISD::FSUB: + // We can't turn -(A-B) into B-A when we honor signed zeros. + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + return 0; + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return 1; + + case ISD::FMUL: + case ISD::FDIV: + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) + if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1)) + return V; + + // Ignore X * 2.0 because that is expected to be canonicalized to X + X. + if (auto *C = isConstOrConstSplatFP(Op.getOperand(1))) + if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL) + return 0; + + return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + + case ISD::FMA: + case ISD::FMAD: { + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + return 0; + + // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) + // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) + char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations, + ForCodeSize, Depth + 1); + if (!V2) + return 0; + + // One of Op0/Op1 must be cheaply negatible, then select the cheapest. + char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1); + char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + char V01 = std::max(V0, V1); + return V01 ? std::max(V01, V2) : 0; + } + + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FSIN: + return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1); + } + + return 0; +} + +SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, + bool ForCodeSize, + unsigned Depth) const { + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) + return Op.getOperand(0); + + assert(Depth <= SelectionDAG::MaxRecursionDepth && + "getNegatedExpression doesn't match isNegatibleForFree"); + const SDNodeFlags Flags = Op->getFlags(); + + switch (Op.getOpcode()) { + case ISD::ConstantFP: { + APFloat V = cast(Op)->getValueAPF(); + V.changeSign(); + return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); + } + case ISD::BUILD_VECTOR: { + SmallVector Ops; + for (SDValue C : Op->op_values()) { + if (C.isUndef()) { + Ops.push_back(C); + continue; + } + APFloat V = cast(C)->getValueAPF(); + V.changeSign(); + Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType())); + } + return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops); + } + case ISD::FADD: + assert((DAG.getTarget().Options.NoSignedZerosFPMath || + Flags.hasNoSignedZeros()) && + "Expected NSZ fp-flag"); + + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) + if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, + Depth + 1)) + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(1), Flags); + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(1), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(0), Flags); + case ISD::FSUB: + // fold (fneg (fsub 0, B)) -> B + if (ConstantFPSDNode *N0CFP = + isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true)) + if (N0CFP->isZero()) + return Op.getOperand(1); + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(0), Flags); + + case ISD::FMUL: + case ISD::FDIV: + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) + if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, + Depth + 1)) + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(1), Flags); + + // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) + return DAG.getNode( + Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0), + getNegatedExpression(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1), + Flags); + + case ISD::FMA: + case ISD::FMAD: { + assert((DAG.getTarget().Options.NoSignedZerosFPMath || + Flags.hasNoSignedZeros()) && + "Expected NSZ fp-flag"); + + SDValue Neg2 = getNegatedExpression(Op.getOperand(2), DAG, LegalOperations, + ForCodeSize, Depth + 1); + + char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1); + char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + if (V0 >= V1) { + // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) + SDValue Neg0 = getNegatedExpression( + Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0, + Op.getOperand(1), Neg2, Flags); + } + + // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) + SDValue Neg1 = getNegatedExpression(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + Op.getOperand(0), Neg1, Neg2, Flags); + } + + case ISD::FP_EXTEND: + case ISD::FSIN: + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1)); + case ISD::FP_ROUND: + return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(1)); + } + + llvm_unreachable("Unknown code"); +} + //===----------------------------------------------------------------------===// // Legalization Utilities //===----------------------------------------------------------------------===// @@ -4862,7 +5874,8 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result, bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const { - SDValue Src = Node->getOperand(0); + unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; + SDValue Src = Node->getOperand(OpNo); EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); SDLoc dl(SDValue(Node, 0)); @@ -4871,6 +5884,13 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, if (SrcVT != MVT::f32 || DstVT != MVT::i64) return false; + if (Node->isStrictFPOpcode()) + // When a NaN is converted to an integer a trap is allowed. We can't + // use this expansion here because it would eliminate that trap. Other + // traps are also allowed and cannot be eliminated. See + // IEEE 754-2008 sec 5.8. + return false; + // Expand f32 -> i64 conversion // This algorithm comes from compiler-rt's implementation of fixsfdi: // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c @@ -4924,9 +5944,11 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, } bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, + SDValue &Chain, SelectionDAG &DAG) const { SDLoc dl(SDValue(Node, 0)); - SDValue Src = Node->getOperand(0); + unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; + SDValue Src = Node->getOperand(OpNo); EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); @@ -4934,7 +5956,9 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); // Only expand vector types if we have the appropriate vector bit operations. - if (DstVT.isVector() && (!isOperationLegalOrCustom(ISD::FP_TO_SINT, DstVT) || + unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT : + ISD::FP_TO_SINT; + if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) || !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT))) return false; @@ -4946,14 +5970,21 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits()); if (APFloat::opOverflow & APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) { - Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src); + if (Node->isStrictFPOpcode()) { + Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, + { Node->getOperand(0), Src }); + Chain = Result.getValue(1); + } else + Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src); return true; } SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT); - bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false); + bool Strict = Node->isStrictFPOpcode() || + shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false); + if (Strict) { // Expand based on maximum range of FP_TO_SINT, if the value exceeds the // signmask then offset (the result of which should be fully representable). @@ -4963,12 +5994,23 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, // Result = fp_to_sint(Val) ^ Ofs // TODO: Should any fast-math-flags be set for the FSUB? - SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, - DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst)); + SDValue SrcBiased; + if (Node->isStrictFPOpcode()) + SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, + { Node->getOperand(0), Src, Cst }); + else + SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst); + SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased); SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), DAG.getConstant(SignMask, dl, DstVT)); - Result = DAG.getNode(ISD::XOR, dl, DstVT, - DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs); + SDValue SInt; + if (Node->isStrictFPOpcode()) { + SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, + { SrcBiased.getValue(1), Val }); + Chain = SInt.getValue(1); + } else + SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val); + Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs); } else { // Expand based on maximum range of FP_TO_SINT: // True = fp_to_sint(Src) @@ -5918,7 +6960,8 @@ SDValue TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { assert((Node->getOpcode() == ISD::SMULFIX || Node->getOpcode() == ISD::UMULFIX || - Node->getOpcode() == ISD::SMULFIXSAT) && + Node->getOpcode() == ISD::SMULFIXSAT || + Node->getOpcode() == ISD::UMULFIXSAT) && "Expected a fixed point multiplication opcode"); SDLoc dl(Node); @@ -5926,15 +6969,19 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { SDValue RHS = Node->getOperand(1); EVT VT = LHS.getValueType(); unsigned Scale = Node->getConstantOperandVal(2); - bool Saturating = Node->getOpcode() == ISD::SMULFIXSAT; + bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT || + Node->getOpcode() == ISD::UMULFIXSAT); + bool Signed = (Node->getOpcode() == ISD::SMULFIX || + Node->getOpcode() == ISD::SMULFIXSAT); EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); unsigned VTSize = VT.getScalarSizeInBits(); if (!Scale) { // [us]mul.fix(a, b, 0) -> mul(a, b) - if (!Saturating && isOperationLegalOrCustom(ISD::MUL, VT)) { - return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); - } else if (Saturating && isOperationLegalOrCustom(ISD::SMULO, VT)) { + if (!Saturating) { + if (isOperationLegalOrCustom(ISD::MUL, VT)) + return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) { SDValue Result = DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); SDValue Product = Result.getValue(0); @@ -5948,11 +6995,18 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); return DAG.getSelect(dl, VT, Overflow, Result, Product); + } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) { + SDValue Result = + DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); + SDValue Product = Result.getValue(0); + SDValue Overflow = Result.getValue(1); + + APInt MaxVal = APInt::getMaxValue(VTSize); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + return DAG.getSelect(dl, VT, Overflow, SatMax, Product); } } - bool Signed = - Node->getOpcode() == ISD::SMULFIX || Node->getOpcode() == ISD::SMULFIXSAT; assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) && "Expected scale to be less than the number of bits if signed or at " "most the number of bits if unsigned."); @@ -5978,7 +7032,8 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { if (Scale == VTSize) // Result is just the top half since we'd be shifting by the width of the - // operand. + // operand. Overflow impossible so this works for both UMULFIX and + // UMULFIXSAT. return Hi; // The result will need to be shifted right by the scale since both operands @@ -5990,20 +7045,55 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { if (!Saturating) return Result; - unsigned OverflowBits = VTSize - Scale + 1; // +1 for the sign - SDValue HiMask = - DAG.getConstant(APInt::getHighBitsSet(VTSize, OverflowBits), dl, VT); - SDValue LoMask = DAG.getConstant( - APInt::getLowBitsSet(VTSize, VTSize - OverflowBits), dl, VT); - APInt MaxVal = APInt::getSignedMaxValue(VTSize); - APInt MinVal = APInt::getSignedMinValue(VTSize); + if (!Signed) { + // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the + // widened multiplication) aren't all zeroes. - Result = DAG.getSelectCC(dl, Hi, LoMask, - DAG.getConstant(MaxVal, dl, VT), Result, - ISD::SETGT); - return DAG.getSelectCC(dl, Hi, HiMask, - DAG.getConstant(MinVal, dl, VT), Result, - ISD::SETLT); + // Saturate to max if ((Hi >> Scale) != 0), + // which is the same as if (Hi > ((1 << Scale) - 1)) + APInt MaxVal = APInt::getMaxValue(VTSize); + SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale), + dl, VT); + Result = DAG.getSelectCC(dl, Hi, LowMask, + DAG.getConstant(MaxVal, dl, VT), Result, + ISD::SETUGT); + + return Result; + } + + // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the + // widened multiplication) aren't all ones or all zeroes. + + SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT); + SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT); + + if (Scale == 0) { + SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo, + DAG.getConstant(VTSize - 1, dl, ShiftTy)); + SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE); + // Saturated to SatMin if wide product is negative, and SatMax if wide + // product is positive ... + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax, + ISD::SETLT); + // ... but only if we overflowed. + return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result); + } + + // We handled Scale==0 above so all the bits to examine is in Hi. + + // Saturate to max if ((Hi >> (Scale - 1)) > 0), + // which is the same as if (Hi > (1 << (Scale - 1)) - 1) + SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1), + dl, VT); + Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT); + // Saturate to min if (Hi >> (Scale - 1)) < -1), + // which is the same as if (HI < (-1 << (Scale - 1)) + SDValue HighMask = + DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1), + dl, VT); + Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT); + return Result; } void TargetLowering::expandUADDSUBO( @@ -6060,24 +7150,19 @@ void TargetLowering::expandSADDSUBO( SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); - // LHSSign -> LHS >= 0 - // RHSSign -> RHS >= 0 - // SumSign -> Result >= 0 - // - // Add: - // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) - // Sub: - // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) - SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); - SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); - SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, - IsAdd ? ISD::SETEQ : ISD::SETNE); + // For an addition, the result should be less than one of the operands (LHS) + // if and only if the other operand (RHS) is negative, otherwise there will + // be overflow. + // For a subtraction, the result should be less than one of the operands + // (LHS) if and only if the other operand (RHS) is (non-zero) positive, + // otherwise there will be overflow. + SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT); + SDValue ConditionRHS = + DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT); - SDValue SumSign = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETGE); - SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); - - SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); - Overflow = DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType); + Overflow = DAG.getBoolExtOrTrunc( + DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl, + ResultType, ResultType); } bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, @@ -6176,20 +7261,19 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, // being a legal type for the architecture and thus has to be split to // two arguments. SDValue Ret; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(isSigned); + CallOptions.setIsPostTypeLegalization(true); if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) { // Halves of WideVT are packed into registers in different order // depending on platform endianness. This is usually handled by // the C calling convention, but we can't defer to it in // the legalizer. SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; - Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl, - /* doesNotReturn */ false, /* isReturnValueUsed */ true, - /* isPostTypeLegalization */ true).first; + Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first; } else { SDValue Args[] = { HiLHS, LHS, HiRHS, RHS }; - Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl, - /* doesNotReturn */ false, /* isReturnValueUsed */ true, - /* isPostTypeLegalization */ true).first; + Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first; } assert(Ret.getOpcode() == ISD::MERGE_VALUES && "Ret value is a collection of constituent nodes holding result."); diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp index 2db0ea57059..412a00095b9 100644 --- a/lib/CodeGen/ShrinkWrap.cpp +++ b/lib/CodeGen/ShrinkWrap.cpp @@ -278,11 +278,10 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, // Ignore instructions like DBG_VALUE which don't read/def the register. if (!MO.isDef() && !MO.readsReg()) continue; - unsigned PhysReg = MO.getReg(); + Register PhysReg = MO.getReg(); if (!PhysReg) continue; - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && - "Unallocated register?!"); + assert(Register::isPhysicalRegister(PhysReg) && "Unallocated register?!"); // The stack pointer is not normally described as a callee-saved register // in calling convention definitions, so we need to watch for it // separately. An SP mentioned by a call instruction, we can ignore, diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 23e5ce0acae..db520d4e640 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -477,7 +477,10 @@ bool SjLjEHPrepare::runOnFunction(Function &F) { UnregisterFn = M.getOrInsertFunction( "_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()), PointerType::getUnqual(FunctionContextTy)); - FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); + FrameAddrFn = Intrinsic::getDeclaration( + &M, Intrinsic::frameaddress, + {Type::getInt8PtrTy(M.getContext(), + M.getDataLayout().getAllocaAddrSpace())}); StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); BuiltinSetupDispatchFn = diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 5c944fe3f6b..0c1f1220c42 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "SplitKit.h" -#include "LiveRangeCalc.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" @@ -22,6 +21,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -437,7 +437,7 @@ void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) { assert(DefMI != nullptr); LaneBitmask LM; for (const MachineOperand &DefOp : DefMI->defs()) { - unsigned R = DefOp.getReg(); + Register R = DefOp.getReg(); if (R != LI.reg) continue; if (unsigned SR = DefOp.getSubReg()) @@ -1373,7 +1373,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { assert(LI.hasSubRanges()); LiveRangeCalc SubLRC; - unsigned Reg = EP.MO.getReg(), Sub = EP.MO.getSubReg(); + Register Reg = EP.MO.getReg(), Sub = EP.MO.getSubReg(); LaneBitmask LM = Sub != 0 ? TRI.getSubRegIndexLaneMask(Sub) : MRI.getMaxLaneMaskForVReg(Reg); for (LiveInterval::SubRange &S : LI.subranges()) { diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index 86ad3811e3a..78f0bbd24db 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -14,7 +14,6 @@ #ifndef LLVM_LIB_CODEGEN_SPLITKIT_H #define LLVM_LIB_CODEGEN_SPLITKIT_H -#include "LiveRangeCalc.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" @@ -25,6 +24,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SlotIndexes.h" diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index ae9401b8970..383c91259ff 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -113,7 +113,7 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, unsigned Size = DL.getPointerSizeInBits(); assert((Size % 8) == 0 && "Need pointer size in bytes."); Size /= 8; - unsigned Reg = (++MOI)->getReg(); + Register Reg = (++MOI)->getReg(); int64_t Imm = (++MOI)->getImm(); Locs.emplace_back(StackMaps::Location::Direct, Size, getDwarfRegNum(Reg, TRI), Imm); @@ -122,7 +122,7 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, case StackMaps::IndirectMemRefOp: { int64_t Size = (++MOI)->getImm(); assert(Size > 0 && "Need a valid size for indirect memory locations."); - unsigned Reg = (++MOI)->getReg(); + Register Reg = (++MOI)->getReg(); int64_t Imm = (++MOI)->getImm(); Locs.emplace_back(StackMaps::Location::Indirect, Size, getDwarfRegNum(Reg, TRI), Imm); @@ -148,14 +148,14 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, if (MOI->isImplicit()) return ++MOI; - assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) && + assert(Register::isPhysicalRegister(MOI->getReg()) && "Virtreg operands should have been rewritten before now."); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg()); assert(!MOI->getSubReg() && "Physical subreg still around."); unsigned Offset = 0; unsigned DwarfRegNum = getDwarfRegNum(MOI->getReg(), TRI); - unsigned LLVMRegNum = TRI->getLLVMRegNum(DwarfRegNum, false); + unsigned LLVMRegNum = *TRI->getLLVMRegNum(DwarfRegNum, false); unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNum, MOI->getReg()); if (SubRegIdx) Offset = TRI->getSubRegIdxOffset(SubRegIdx); diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 809960c7fdf..5683d1db473 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -17,7 +17,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/Passes.h" @@ -157,6 +156,68 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge, return NeedsProtector; } +bool StackProtector::HasAddressTaken(const Instruction *AI) { + for (const User *U : AI->users()) { + const auto *I = cast(U); + switch (I->getOpcode()) { + case Instruction::Store: + if (AI == cast(I)->getValueOperand()) + return true; + break; + case Instruction::AtomicCmpXchg: + // cmpxchg conceptually includes both a load and store from the same + // location. So, like store, the value being stored is what matters. + if (AI == cast(I)->getNewValOperand()) + return true; + break; + case Instruction::PtrToInt: + if (AI == cast(I)->getOperand(0)) + return true; + break; + case Instruction::Call: { + // Ignore intrinsics that do not become real instructions. + // TODO: Narrow this to intrinsics that have store-like effects. + const auto *CI = cast(I); + if (!isa(CI) && !CI->isLifetimeStartOrEnd()) + return true; + break; + } + case Instruction::Invoke: + return true; + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::Select: + case Instruction::AddrSpaceCast: + if (HasAddressTaken(I)) + return true; + break; + case Instruction::PHI: { + // Keep track of what PHI nodes we have already visited to ensure + // they are only visited once. + const auto *PN = cast(I); + if (VisitedPHIs.insert(PN).second) + if (HasAddressTaken(PN)) + return true; + break; + } + case Instruction::Load: + case Instruction::AtomicRMW: + case Instruction::Ret: + // These instructions take an address operand, but have load-like or + // other innocuous behavior that should not trigger a stack protector. + // atomicrmw conceptually has both load and store semantics, but the + // value being stored must be integer; so if a pointer is being stored, + // we'll catch it in the PtrToInt case above. + break; + default: + // Conservatively return true for any instruction that takes an address + // operand, but is not handled above. + return true; + } + } + return false; +} + /// Search for the first call to the llvm.stackprotector intrinsic and return it /// if present. static const CallInst *findStackProtectorIntrinsic(Function &F) { @@ -264,9 +325,7 @@ bool StackProtector::RequiresStackProtector() { continue; } - if (Strong && PointerMayBeCaptured(AI, - /* ReturnCaptures */ false, - /* StoreCaptures */ true)) { + if (Strong && HasAddressTaken(AI)) { ++NumAddrTaken; Layout.insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf)); ORE.emit([&]() { diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 99b533e10b8..9c8143c55dc 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -221,7 +221,7 @@ void StackSlotColoring::InitializeSlots() { for (auto *I : Intervals) { LiveInterval &li = I->second; LLVM_DEBUG(li.dump()); - int FI = TargetRegisterInfo::stackSlot2Index(li.reg); + int FI = Register::stackSlot2Index(li.reg); if (MFI->isDeadObjectIndex(FI)) continue; @@ -268,7 +268,7 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const { int StackSlotColoring::ColorSlot(LiveInterval *li) { int Color = -1; bool Share = false; - int FI = TargetRegisterInfo::stackSlot2Index(li->reg); + int FI = Register::stackSlot2Index(li->reg); uint8_t StackID = MFI->getStackID(FI); if (!DisableSharing) { @@ -330,7 +330,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { bool Changed = false; for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; - int SS = TargetRegisterInfo::stackSlot2Index(li->reg); + int SS = Register::stackSlot2Index(li->reg); int NewSS = ColorSlot(li); assert(NewSS >= 0 && "Stack coloring failed?"); SlotMapping[SS] = NewSS; @@ -343,7 +343,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "\nSpill slots after coloring:\n"); for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; - int SS = TargetRegisterInfo::stackSlot2Index(li->reg); + int SS = Register::stackSlot2Index(li->reg); li->weight = SlotWeights[SS]; } // Sort them by new weight. diff --git a/lib/CodeGen/SwiftErrorValueTracking.cpp b/lib/CodeGen/SwiftErrorValueTracking.cpp index 96821cadb1b..c72a04276a4 100644 --- a/lib/CodeGen/SwiftErrorValueTracking.cpp +++ b/lib/CodeGen/SwiftErrorValueTracking.cpp @@ -13,9 +13,10 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SwiftErrorValueTracking.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/Value.h" diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp index a0590a8a6cc..03c68a37e45 100644 --- a/lib/CodeGen/TailDuplicator.cpp +++ b/lib/CodeGen/TailDuplicator.cpp @@ -235,8 +235,8 @@ bool TailDuplicator::tailDuplicateAndUpdate( MachineInstr *Copy = Copies[i]; if (!Copy->isCopy()) continue; - unsigned Dst = Copy->getOperand(0).getReg(); - unsigned Src = Copy->getOperand(1).getReg(); + Register Dst = Copy->getOperand(0).getReg(); + Register Src = Copy->getOperand(1).getReg(); if (MRI->hasOneNonDBGUse(Src) && MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) { // Copy is the only use. Do trivial copy propagation here. @@ -312,7 +312,7 @@ static void getRegsUsedByPHIs(const MachineBasicBlock &BB, if (!MI.isPHI()) break; for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { - unsigned SrcReg = MI.getOperand(i).getReg(); + Register SrcReg = MI.getOperand(i).getReg(); UsedByPhi->insert(SrcReg); } } @@ -340,17 +340,17 @@ void TailDuplicator::processPHI( DenseMap &LocalVRMap, SmallVectorImpl> &Copies, const DenseSet &RegsUsedByPhi, bool Remove) { - unsigned DefReg = MI->getOperand(0).getReg(); + Register DefReg = MI->getOperand(0).getReg(); unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); assert(SrcOpIdx && "Unable to find matching PHI source?"); - unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); + Register SrcReg = MI->getOperand(SrcOpIdx).getReg(); unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg(); const TargetRegisterClass *RC = MRI->getRegClass(DefReg); LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg))); // Insert a copy from source to the end of the block. The def register is the // available value liveout of the block. - unsigned NewDef = MRI->createVirtualRegister(RC); + Register NewDef = MRI->createVirtualRegister(RC); Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg))); if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg)) addSSAUpdateEntry(DefReg, NewDef, PredBB); @@ -384,12 +384,12 @@ void TailDuplicator::duplicateInstruction( MachineOperand &MO = NewMI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; if (MO.isDef()) { const TargetRegisterClass *RC = MRI->getRegClass(Reg); - unsigned NewReg = MRI->createVirtualRegister(RC); + Register NewReg = MRI->createVirtualRegister(RC); MO.setReg(NewReg); LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0))); if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg)) @@ -433,7 +433,7 @@ void TailDuplicator::duplicateInstruction( auto *NewRC = MI->getRegClassConstraint(i, TII, TRI); if (NewRC == nullptr) NewRC = OrigRC; - unsigned NewReg = MRI->createVirtualRegister(NewRC); + Register NewReg = MRI->createVirtualRegister(NewRC); BuildMI(*PredBB, NewMI, NewMI.getDebugLoc(), TII->get(TargetOpcode::COPY), NewReg) .addReg(VI->second.Reg, 0, VI->second.SubReg); @@ -477,7 +477,7 @@ void TailDuplicator::updateSuccessorsPHIs( assert(Idx != 0); MachineOperand &MO0 = MI.getOperand(Idx); - unsigned Reg = MO0.getReg(); + Register Reg = MO0.getReg(); if (isDead) { // Folded into the previous BB. // There could be duplicate phi source entries. FIXME: Should sdisel diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp index 9c4483cb240..9eeacc2584c 100644 --- a/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCRegisterInfo.h" @@ -71,7 +72,9 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF, // When interprocedural register allocation is enabled caller saved registers // are preferred over callee saved registers. - if (MF.getTarget().Options.EnableIPRA && isSafeForNoCSROpt(MF.getFunction())) + if (MF.getTarget().Options.EnableIPRA && + isSafeForNoCSROpt(MF.getFunction()) && + isProfitableForNoCSROpt(MF.getFunction())) return; // Get the callee saved register list... @@ -118,6 +121,18 @@ unsigned TargetFrameLowering::getStackAlignmentSkew( return 0; } +bool TargetFrameLowering::isSafeForNoCSROpt(const Function &F) { + if (!F.hasLocalLinkage() || F.hasAddressTaken() || + !F.hasFnAttribute(Attribute::NoRecurse)) + return false; + // Function should not be optimized as tail call. + for (const User *U : F.users()) + if (auto CS = ImmutableCallSite(U)) + if (CS.isTailCall()) + return false; + return true; +} + int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { llvm_unreachable("getInitialCFAOffset() not implemented!"); } @@ -125,4 +140,4 @@ int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { unsigned TargetFrameLowering::getInitialCFARegister(const MachineFunction &MF) const { llvm_unreachable("getInitialCFARegister() not implemented!"); -} \ No newline at end of file +} diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index 868617ffe14..6cae3b86950 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" @@ -142,7 +143,7 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, while (Tail != MBB->end()) { auto MI = Tail++; if (MI->isCall()) - MBB->getParent()->updateCallSiteInfo(&*MI); + MBB->getParent()->eraseCallSiteInfo(&*MI); MBB->erase(MI); } @@ -183,10 +184,10 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool Reg2IsInternal = MI.getOperand(Idx2).isInternalRead(); // Avoid calling isRenamable for virtual registers since we assert that // renamable property is only queried/set for physical registers. - bool Reg1IsRenamable = TargetRegisterInfo::isPhysicalRegister(Reg1) + bool Reg1IsRenamable = Register::isPhysicalRegister(Reg1) ? MI.getOperand(Idx1).isRenamable() : false; - bool Reg2IsRenamable = TargetRegisterInfo::isPhysicalRegister(Reg2) + bool Reg2IsRenamable = Register::isPhysicalRegister(Reg2) ? MI.getOperand(Idx2).isRenamable() : false; // If destination is tied to either of the commuted source register, then @@ -228,9 +229,9 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, CommutedMI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal); // Avoid calling setIsRenamable for virtual registers since we assert that // renamable property is only queried/set for physical registers. - if (TargetRegisterInfo::isPhysicalRegister(Reg1)) + if (Register::isPhysicalRegister(Reg1)) CommutedMI->getOperand(Idx2).setIsRenamable(Reg1IsRenamable); - if (TargetRegisterInfo::isPhysicalRegister(Reg2)) + if (Register::isPhysicalRegister(Reg2)) CommutedMI->getOperand(Idx1).setIsRenamable(Reg2IsRenamable); return CommutedMI; } @@ -281,7 +282,7 @@ bool TargetInstrInfo::fixCommutedOpIndices(unsigned &ResultIdx1, return true; } -bool TargetInstrInfo::findCommutedOpIndices(MachineInstr &MI, +bool TargetInstrInfo::findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { assert(!MI.isBundle() && @@ -393,7 +394,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, if (BitOffset < 0 || BitOffset % 8) return false; - Size = BitSize /= 8; + Size = BitSize / 8; Offset = (unsigned)BitOffset / 8; assert(TRI->getSpillSize(*RC) >= (Offset + Size) && "bad subregister range"); @@ -442,16 +443,15 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI, if (FoldOp.getSubReg() || LiveOp.getSubReg()) return nullptr; - unsigned FoldReg = FoldOp.getReg(); - unsigned LiveReg = LiveOp.getReg(); + Register FoldReg = FoldOp.getReg(); + Register LiveReg = LiveOp.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(FoldReg) && - "Cannot fold physregs"); + assert(Register::isVirtualRegister(FoldReg) && "Cannot fold physregs"); const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); const TargetRegisterClass *RC = MRI.getRegClass(FoldReg); - if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg())) + if (Register::isPhysicalRegister(LiveOp.getReg())) return RC->contains(LiveOp.getReg()) ? RC : nullptr; if (RC->hasSubClassEq(MRI.getRegClass(LiveReg))) @@ -674,9 +674,9 @@ bool TargetInstrInfo::hasReassociableOperands( // reassociate. MachineInstr *MI1 = nullptr; MachineInstr *MI2 = nullptr; - if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg())) + if (Op1.isReg() && Register::isVirtualRegister(Op1.getReg())) MI1 = MRI.getUniqueVRegDef(Op1.getReg()); - if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg())) + if (Op2.isReg() && Register::isVirtualRegister(Op2.getReg())) MI2 = MRI.getUniqueVRegDef(Op2.getReg()); // And they need to be in the trace (otherwise, they won't have a depth). @@ -805,27 +805,27 @@ void TargetInstrInfo::reassociateOps( MachineOperand &OpY = Root.getOperand(OpIdx[Row][3]); MachineOperand &OpC = Root.getOperand(0); - unsigned RegA = OpA.getReg(); - unsigned RegB = OpB.getReg(); - unsigned RegX = OpX.getReg(); - unsigned RegY = OpY.getReg(); - unsigned RegC = OpC.getReg(); + Register RegA = OpA.getReg(); + Register RegB = OpB.getReg(); + Register RegX = OpX.getReg(); + Register RegY = OpY.getReg(); + Register RegC = OpC.getReg(); - if (TargetRegisterInfo::isVirtualRegister(RegA)) + if (Register::isVirtualRegister(RegA)) MRI.constrainRegClass(RegA, RC); - if (TargetRegisterInfo::isVirtualRegister(RegB)) + if (Register::isVirtualRegister(RegB)) MRI.constrainRegClass(RegB, RC); - if (TargetRegisterInfo::isVirtualRegister(RegX)) + if (Register::isVirtualRegister(RegX)) MRI.constrainRegClass(RegX, RC); - if (TargetRegisterInfo::isVirtualRegister(RegY)) + if (Register::isVirtualRegister(RegY)) MRI.constrainRegClass(RegY, RC); - if (TargetRegisterInfo::isVirtualRegister(RegC)) + if (Register::isVirtualRegister(RegC)) MRI.constrainRegClass(RegC, RC); // Create a new virtual register for the result of (X op Y) instead of // recycling RegB because the MachineCombiner's computation of the critical // path requires a new register definition rather than an existing one. - unsigned NewVR = MRI.createVirtualRegister(RC); + Register NewVR = MRI.createVirtualRegister(RC); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); unsigned Opcode = Root.getOpcode(); @@ -880,21 +880,21 @@ void TargetInstrInfo::genAlternativeCodeSequence( } bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( - const MachineInstr &MI, AliasAnalysis *AA) const { + const MachineInstr &MI, AAResults *AA) const { const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); // Remat clients assume operand 0 is the defined register. if (!MI.getNumOperands() || !MI.getOperand(0).isReg()) return false; - unsigned DefReg = MI.getOperand(0).getReg(); + Register DefReg = MI.getOperand(0).getReg(); // A sub-register definition can only be rematerialized if the instruction // doesn't read the other parts of the register. Otherwise it is really a // read-modify-write operation on the full virtual register which cannot be // moved safely. - if (TargetRegisterInfo::isVirtualRegister(DefReg) && - MI.getOperand(0).getSubReg() && MI.readsVirtualRegister(DefReg)) + if (Register::isVirtualRegister(DefReg) && MI.getOperand(0).getSubReg() && + MI.readsVirtualRegister(DefReg)) return false; // A load from a fixed stack slot can be rematerialized. This may be @@ -924,12 +924,12 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; // Check for a well-behaved physical register. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, @@ -1120,6 +1120,24 @@ bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, return (DefCycle != -1 && DefCycle <= 1); } +Optional +TargetInstrInfo::describeLoadedValue(const MachineInstr &MI) const { + const MachineFunction *MF = MI.getMF(); + const MachineOperand *Op = nullptr; + DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {});; + const MachineOperand *SrcRegOp, *DestRegOp; + + if (isCopyInstr(MI, SrcRegOp, DestRegOp)) { + Op = SrcRegOp; + return ParamLoadedValue(*Op, Expr); + } else if (MI.isMoveImmediate()) { + Op = &MI.getOperand(1); + return ParamLoadedValue(*Op, Expr); + } + + return None; +} + /// Both DefMI and UseMI must be valid. By default, call directly to the /// itinerary. This may be overriden by the target. int TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, @@ -1227,3 +1245,5 @@ bool TargetInstrInfo::getInsertSubregInputs( InsertedReg.SubIdx = (unsigned)MOSubIdx.getImm(); return true; } + +TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {} diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 9b28c1a6c45..9b23012f47e 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -167,6 +167,7 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { setLibcallName(RTLIB::BZERO, "__bzero"); break; case Triple::aarch64: + case Triple::aarch64_32: setLibcallName(RTLIB::BZERO, "bzero"); break; default: @@ -197,6 +198,11 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { setLibcallName(RTLIB::SINCOS_PPCF128, "sincosl"); } + if (TT.isPS4CPU()) { + setLibcallName(RTLIB::SINCOS_F32, "sincosf"); + setLibcallName(RTLIB::SINCOS_F64, "sincos"); + } + if (TT.isOSOpenBSD()) { setLibcallName(RTLIB::STACKPROTECTOR_CHECK_FAIL, nullptr); } @@ -578,13 +584,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { BooleanFloatContents = UndefinedBooleanContent; BooleanVectorContents = UndefinedBooleanContent; SchedPreferenceInfo = Sched::ILP; - JumpBufSize = 0; - JumpBufAlignment = 0; - MinFunctionAlignment = 0; - PrefFunctionAlignment = 0; - PrefLoopAlignment = 0; GatherAllAliasesMaxDepth = 18; - MinStackArgumentAlignment = 1; // TODO: the default will be switched to 0 in the next commit, along // with the Target-specific changes necessary. MaxAtomicSizeInBitsSupported = 1024; @@ -653,6 +653,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::SMULFIX, VT, Expand); setOperationAction(ISD::SMULFIXSAT, VT, Expand); setOperationAction(ISD::UMULFIX, VT, Expand); + setOperationAction(ISD::UMULFIXSAT, VT, Expand); // Overflow operations default to expand setOperationAction(ISD::SADDO, VT, Expand); @@ -689,6 +690,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); + setOperationAction(ISD::SPLAT_VECTOR, VT, Expand); } // Constrained floating-point operations default to expand. @@ -708,16 +710,22 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::STRICT_FLOG, VT, Expand); setOperationAction(ISD::STRICT_FLOG10, VT, Expand); setOperationAction(ISD::STRICT_FLOG2, VT, Expand); + setOperationAction(ISD::STRICT_LRINT, VT, Expand); + setOperationAction(ISD::STRICT_LLRINT, VT, Expand); setOperationAction(ISD::STRICT_FRINT, VT, Expand); setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand); setOperationAction(ISD::STRICT_FCEIL, VT, Expand); setOperationAction(ISD::STRICT_FFLOOR, VT, Expand); + setOperationAction(ISD::STRICT_LROUND, VT, Expand); + setOperationAction(ISD::STRICT_LLROUND, VT, Expand); setOperationAction(ISD::STRICT_FROUND, VT, Expand); setOperationAction(ISD::STRICT_FTRUNC, VT, Expand); setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand); setOperationAction(ISD::STRICT_FMINNUM, VT, Expand); setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand); setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand); + setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Expand); + setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Expand); // For most targets @llvm.get.dynamic.area.offset just returns 0. setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); @@ -824,7 +832,8 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT); assert((LA == TypeLegal || LA == TypeSoftenFloat || - ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger) && + (NVT.isVector() || + ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger)) && "Promote may not follow Expand or Promote"); if (LA == TypeSplitVector) @@ -1257,17 +1266,23 @@ void TargetLoweringBase::computeRegisterProperties( MVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); bool IsLegalWiderType = false; + bool IsScalable = VT.isScalableVector(); LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT); switch (PreferredAction) { - case TypePromoteInteger: + case TypePromoteInteger: { + MVT::SimpleValueType EndVT = IsScalable ? + MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE : + MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE; // Try to promote the elements of integer vectors. If no legal // promotion was found, fall through to the widen-vector method. - for (unsigned nVT = i + 1; nVT <= MVT::LAST_INTEGER_VECTOR_VALUETYPE; ++nVT) { + for (unsigned nVT = i + 1; + (MVT::SimpleValueType)nVT <= EndVT; ++nVT) { MVT SVT = (MVT::SimpleValueType) nVT; // Promote vectors of integers to vectors with the same number // of elements, with a wider element type. if (SVT.getScalarSizeInBits() > EltVT.getSizeInBits() && - SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)) { + SVT.getVectorNumElements() == NElts && + SVT.isScalableVector() == IsScalable && isTypeLegal(SVT)) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; @@ -1279,23 +1294,37 @@ void TargetLoweringBase::computeRegisterProperties( if (IsLegalWiderType) break; LLVM_FALLTHROUGH; + } case TypeWidenVector: - // Try to widen the vector. - for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType) nVT; - if (SVT.getVectorElementType() == EltVT - && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) { - TransformToType[i] = SVT; - RegisterTypeForVT[i] = SVT; - NumRegistersForVT[i] = 1; + if (isPowerOf2_32(NElts)) { + // Try to widen the vector. + for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType) nVT; + if (SVT.getVectorElementType() == EltVT + && SVT.getVectorNumElements() > NElts + && SVT.isScalableVector() == IsScalable && isTypeLegal(SVT)) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, TypeWidenVector); + IsLegalWiderType = true; + break; + } + } + if (IsLegalWiderType) + break; + } else { + // Only widen to the next power of 2 to keep consistency with EVT. + MVT NVT = VT.getPow2VectorType(); + if (isTypeLegal(NVT)) { + TransformToType[i] = NVT; ValueTypeActions.setTypeAction(VT, TypeWidenVector); - IsLegalWiderType = true; + RegisterTypeForVT[i] = NVT; + NumRegistersForVT[i] = 1; break; } } - if (IsLegalWiderType) - break; LLVM_FALLTHROUGH; case TypeSplitVector: @@ -1488,12 +1517,9 @@ unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty, return DL.getABITypeAlignment(Ty); } -bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, - const DataLayout &DL, EVT VT, - unsigned AddrSpace, - unsigned Alignment, - MachineMemOperand::Flags Flags, - bool *Fast) const { +bool TargetLoweringBase::allowsMemoryAccessForAlignment( + LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, + unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast) const { // Check if the specified alignment is sufficient based on the data layout. // TODO: While using the data layout works in practice, a better solution // would be to implement this check directly (make this a virtual function). @@ -1511,6 +1537,21 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast); } +bool TargetLoweringBase::allowsMemoryAccessForAlignment( + LLVMContext &Context, const DataLayout &DL, EVT VT, + const MachineMemOperand &MMO, bool *Fast) const { + return allowsMemoryAccessForAlignment(Context, DL, VT, MMO.getAddrSpace(), + MMO.getAlignment(), MMO.getFlags(), + Fast); +} + +bool TargetLoweringBase::allowsMemoryAccess( + LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, + unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast) const { + return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment, + Flags, Fast); +} + bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, const MachineMemOperand &MMO, diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 4c8f75b237a..4978f4b9500 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -43,6 +43,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSectionWasm.h" +#include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" @@ -154,6 +155,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, break; case Triple::aarch64: case Triple::aarch64_be: + case Triple::aarch64_32: // The small model guarantees static code/data size < 4GB, but not where it // will be in memory. Most of these could end up >2GB away so even a signed // pc-relative 32-bit address is insufficient, theoretically. @@ -375,7 +377,7 @@ void TargetLoweringObjectFileELF::emitPersonalityValue( ELF::SHT_PROGBITS, Flags, 0); unsigned Size = DL.getPointerSize(); Streamer.SwitchSection(Sec); - Streamer.EmitValueToAlignment(DL.getPointerABIAlignment(0)); + Streamer.EmitValueToAlignment(DL.getPointerABIAlignment(0).value()); Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); const MCExpr *E = MCConstantExpr::create(Size, getContext()); Streamer.emitELFSize(Label, E); @@ -524,8 +526,8 @@ static const MCSymbolELF *getAssociatedSymbol(const GlobalObject *GO, if (!VM) report_fatal_error("MD_associated operand is not ValueAsMetadata"); - GlobalObject *OtherGO = dyn_cast(VM->getValue()); - return OtherGO ? dyn_cast(TM.getSymbol(OtherGO)) : nullptr; + auto *OtherGV = dyn_cast(VM->getValue()); + return OtherGV ? dyn_cast(TM.getSymbol(OtherGV)) : nullptr; } static unsigned getEntrySizeForKind(SectionKind Kind) { @@ -566,6 +568,8 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( SectionName = Attrs.getAttribute("bss-section").getValueAsString(); } else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()) { SectionName = Attrs.getAttribute("rodata-section").getValueAsString(); + } else if (Attrs.hasAttribute("relro-section") && Kind.isReadOnlyWithRel()) { + SectionName = Attrs.getAttribute("relro-section").getValueAsString(); } else if (Attrs.hasAttribute("data-section") && Kind.isData()) { SectionName = Attrs.getAttribute("data-section").getValueAsString(); } @@ -1107,8 +1111,8 @@ MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol( } const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( - const MCSymbol *Sym, const MCValue &MV, int64_t Offset, - MachineModuleInfo *MMI, MCStreamer &Streamer) const { + const GlobalValue *GV, const MCSymbol *Sym, const MCValue &MV, + int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const { // Although MachO 32-bit targets do not explicitly have a GOTPCREL relocation // as 64-bit do, we replace the GOT equivalent by accessing the final symbol // through a non_lazy_ptr stub instead. One advantage is that it allows the @@ -1165,12 +1169,10 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( MCSymbol *Stub = Ctx.getOrCreateSymbol(Name); MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub); - if (!StubSym.getPointer()) { - bool IsIndirectLocal = Sym->isDefined() && !Sym->isExternal(); - // With the assumption that IsIndirectLocal == GV->hasLocalLinkage(). + + if (!StubSym.getPointer()) StubSym = MachineModuleInfoImpl::StubValueTy(const_cast(Sym), - !IsIndirectLocal); - } + !GV->hasLocalLinkage()); const MCExpr *BSymExpr = MCSymbolRefExpr::create(BaseSym, MCSymbolRefExpr::VK_None, Ctx); @@ -1519,7 +1521,8 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx, // internally, so we use ".CRT$XCA00001" for them. SmallString<24> Name; raw_svector_ostream OS(Name); - OS << ".CRT$XC" << (Priority < 200 ? 'A' : 'T') << format("%05u", Priority); + OS << ".CRT$X" << (IsCtor ? "C" : "T") << + (Priority < 200 ? 'A' : 'T') << format("%05u", Priority); MCSectionCOFF *Sec = Ctx.getCOFFSection( Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); @@ -1595,7 +1598,8 @@ const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference( static std::string APIntToHexString(const APInt &AI) { unsigned Width = (AI.getBitWidth() / 8) * 2; - std::string HexString = utohexstr(AI.getLimitedValue(), /*LowerCase=*/true); + std::string HexString = AI.toString(16, /*Signed=*/false); + transform(HexString.begin(), HexString.end(), HexString.begin(), tolower); unsigned Size = HexString.size(); assert(Width >= Size && "hex string is too large!"); HexString.insert(HexString.begin(), Width - Size, '0'); @@ -1819,3 +1823,82 @@ MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection( llvm_unreachable("@llvm.global_dtors should have been lowered already"); return nullptr; } + +//===----------------------------------------------------------------------===// +// XCOFF +//===----------------------------------------------------------------------===// +MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal( + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + report_fatal_error("XCOFF explicit sections not yet implemented."); +} + +MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + assert(!TM.getFunctionSections() && !TM.getDataSections() && + "XCOFF unique sections not yet implemented."); + + // Common symbols go into a csect with matching name which will get mapped + // into the .bss section. + if (Kind.isBSSLocal() || Kind.isCommon()) { + SmallString<128> Name; + getNameWithPrefix(Name, GO, TM); + XCOFF::StorageClass SC = + TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO); + return getContext().getXCOFFSection( + Name, Kind.isBSSLocal() ? XCOFF::XMC_BS : XCOFF::XMC_RW, XCOFF::XTY_CM, + SC, Kind, /* BeginSymbolName */ nullptr); + } + + if (Kind.isText()) + return TextSection; + + if (Kind.isData()) + return DataSection; + + report_fatal_error("XCOFF other section types not yet implemented."); +} + +bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection( + bool UsesLabelDifference, const Function &F) const { + report_fatal_error("TLOF XCOFF not yet implemented."); +} + +void TargetLoweringObjectFileXCOFF::Initialize(MCContext &Ctx, + const TargetMachine &TgtM) { + TargetLoweringObjectFile::Initialize(Ctx, TgtM); + TTypeEncoding = 0; + PersonalityEncoding = 0; + LSDAEncoding = 0; +} + +MCSection *TargetLoweringObjectFileXCOFF::getStaticCtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + report_fatal_error("XCOFF ctor section not yet implemented."); +} + +MCSection *TargetLoweringObjectFileXCOFF::getStaticDtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + report_fatal_error("XCOFF dtor section not yet implemented."); +} + +const MCExpr *TargetLoweringObjectFileXCOFF::lowerRelativeReference( + const GlobalValue *LHS, const GlobalValue *RHS, + const TargetMachine &TM) const { + report_fatal_error("XCOFF not yet implemented."); +} + +XCOFF::StorageClass TargetLoweringObjectFileXCOFF::getStorageClassForGlobal( + const GlobalObject *GO) { + switch (GO->getLinkage()) { + case GlobalValue::InternalLinkage: + return XCOFF::C_HIDEXT; + case GlobalValue::ExternalLinkage: + case GlobalValue::CommonLinkage: + return XCOFF::C_EXT; + case GlobalValue::ExternalWeakLinkage: + return XCOFF::C_WEAKEXT; + default: + report_fatal_error( + "Unhandled linkage when mapping linkage to StorageClass."); + } +} diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp index 36df02692f8..f1f4f65adf7 100644 --- a/lib/CodeGen/TargetPassConfig.cpp +++ b/lib/CodeGen/TargetPassConfig.cpp @@ -49,9 +49,10 @@ using namespace llvm; -cl::opt EnableIPRA("enable-ipra", cl::init(false), cl::Hidden, - cl::desc("Enable interprocedural register allocation " - "to reduce load/store at procedure calls.")); +static cl::opt + EnableIPRA("enable-ipra", cl::init(false), cl::Hidden, + cl::desc("Enable interprocedural register allocation " + "to reduce load/store at procedure calls.")); static cl::opt DisablePostRASched("disable-post-ra", cl::Hidden, cl::desc("Disable Post Regalloc Scheduler")); static cl::opt DisableBranchFold("disable-branch-fold", cl::Hidden, @@ -152,8 +153,10 @@ static cl::opt EnableGlobalISelAbort( // substitutePass(&PostRASchedulerID, &PostMachineSchedulerID). // Targets can return true in targetSchedulesPostRAScheduling() and // insert a PostRA scheduling pass wherever it wants. -cl::opt MISchedPostRA("misched-postra", cl::Hidden, - cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)")); +static cl::opt MISchedPostRA( + "misched-postra", cl::Hidden, + cl::desc( + "Run MachineScheduler post regalloc (independent of preRA sched)")); // Experimental option to run live interval analysis early. static cl::opt EarlyLiveIntervals("early-live-intervals", cl::Hidden, @@ -175,10 +178,10 @@ static cl::opt UseCFLAA( /// Option names for limiting the codegen pipeline. /// Those are used in error reporting and we didn't want /// to duplicate their names all over the place. -const char *StartAfterOptName = "start-after"; -const char *StartBeforeOptName = "start-before"; -const char *StopAfterOptName = "stop-after"; -const char *StopBeforeOptName = "stop-before"; +static const char *StartAfterOptName = "start-after"; +static const char *StartBeforeOptName = "start-before"; +static const char *StopAfterOptName = "stop-after"; +static const char *StopBeforeOptName = "stop-before"; static cl::opt StartAfterOpt(StringRef(StartAfterOptName), @@ -654,6 +657,7 @@ void TargetPassConfig::addIRPasses() { // TODO: add a pass insertion point here addPass(createGCLoweringPass()); addPass(createShadowStackGCLoweringPass()); + addPass(createLowerConstantIntrinsicsPass()); // Make sure that no unreachable blocks are instruction selected. addPass(createUnreachableBlockEliminationPass()); @@ -1231,5 +1235,5 @@ bool TargetPassConfig::isGISelCSEEnabled() const { } std::unique_ptr TargetPassConfig::getCSEConfig() const { - return make_unique(); + return std::make_unique(); } diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index f1b2ecf3243..e5592c31098 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -86,22 +86,21 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet, namespace llvm { -Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI, +Printable printReg(Register Reg, const TargetRegisterInfo *TRI, unsigned SubIdx, const MachineRegisterInfo *MRI) { return Printable([Reg, TRI, SubIdx, MRI](raw_ostream &OS) { if (!Reg) OS << "$noreg"; - else if (TargetRegisterInfo::isStackSlot(Reg)) - OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); - else if (TargetRegisterInfo::isVirtualRegister(Reg)) { + else if (Register::isStackSlot(Reg)) + OS << "SS#" << Register::stackSlot2Index(Reg); + else if (Register::isVirtualRegister(Reg)) { StringRef Name = MRI ? MRI->getVRegName(Reg) : ""; if (Name != "") { OS << '%' << Name; } else { - OS << '%' << TargetRegisterInfo::virtReg2Index(Reg); + OS << '%' << Register::virtReg2Index(Reg); } - } - else if (!TRI) + } else if (!TRI) OS << '$' << "physreg" << Reg; else if (Reg < TRI->getNumRegs()) { OS << '$'; @@ -143,8 +142,8 @@ Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { return Printable([Unit, TRI](raw_ostream &OS) { - if (TRI && TRI->isVirtualRegister(Unit)) { - OS << '%' << TargetRegisterInfo::virtReg2Index(Unit); + if (Register::isVirtualRegister(Unit)) { + OS << '%' << Register::virtReg2Index(Unit); } else { OS << printRegUnit(Unit, TRI); } @@ -189,7 +188,8 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const { /// the right type that contains this physreg. const TargetRegisterClass * TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const { - assert(isPhysicalRegister(reg) && "reg must be a physical register"); + assert(Register::isPhysicalRegister(reg) && + "reg must be a physical register"); // Pick the most sub register class of the right type that contains // this physreg. @@ -238,24 +238,16 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, static inline const TargetRegisterClass *firstCommonClass(const uint32_t *A, const uint32_t *B, - const TargetRegisterInfo *TRI, - const MVT::SimpleValueType SVT = - MVT::SimpleValueType::Any) { - const MVT VT(SVT); + const TargetRegisterInfo *TRI) { for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32) - if (unsigned Common = *A++ & *B++) { - const TargetRegisterClass *RC = - TRI->getRegClass(I + countTrailingZeros(Common)); - if (SVT == MVT::SimpleValueType::Any || TRI->isTypeLegalForClass(*RC, VT)) - return RC; - } + if (unsigned Common = *A++ & *B++) + return TRI->getRegClass(I + countTrailingZeros(Common)); return nullptr; } const TargetRegisterClass * TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, - const TargetRegisterClass *B, - const MVT::SimpleValueType SVT) const { + const TargetRegisterClass *B) const { // First take care of the trivial cases. if (A == B) return A; @@ -264,7 +256,7 @@ TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, // Register classes are ordered topologically, so the largest common // sub-class it the common sub-class with the smallest ID. - return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this, SVT); + return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this); } const TargetRegisterClass * @@ -409,7 +401,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, // Target-independent hints are either a physical or a virtual register. unsigned Phys = Reg; - if (VRM && isVirtualRegister(Phys)) + if (VRM && Register::isVirtualRegister(Phys)) Phys = VRM->getPhys(Phys); // Don't add the same reg twice (Hints_MRI may contain multiple virtual @@ -417,7 +409,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, if (!HintedRegs.insert(Phys).second) continue; // Check that Phys is a valid hint in VirtReg's register class. - if (!isPhysicalRegister(Phys)) + if (!Register::isPhysicalRegister(Phys)) continue; if (MRI.isReserved(Phys)) continue; @@ -433,6 +425,20 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, return false; } +bool TargetRegisterInfo::isCalleeSavedPhysReg( + unsigned PhysReg, const MachineFunction &MF) const { + if (PhysReg == 0) + return false; + const uint32_t *callerPreservedRegs = + getCallPreservedMask(MF, MF.getFunction().getCallingConv()); + if (callerPreservedRegs) { + assert(Register::isPhysicalRegister(PhysReg) && + "Expected physical register"); + return (callerPreservedRegs[PhysReg / 32] >> PhysReg % 32) & 1; + } + return false; +} + bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const { return !MF.getFunction().hasFnAttribute("no-realign-stack"); } @@ -466,7 +472,7 @@ bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0, unsigned TargetRegisterInfo::getRegSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI) const { const TargetRegisterClass *RC{}; - if (isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { // The size is not directly available for physical registers. // Instead, we need to access a register class that contains Reg and // get the size of that register class. @@ -501,7 +507,7 @@ TargetRegisterInfo::lookThruCopyLike(unsigned SrcReg, CopySrcReg = MI->getOperand(2).getReg(); } - if (!isVirtualRegister(CopySrcReg)) + if (!Register::isVirtualRegister(CopySrcReg)) return CopySrcReg; SrcReg = CopySrcReg; diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index 195279719ad..ce59452fd1b 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -300,7 +300,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, // TODO: The following hack exists because predication passes do not // correctly append imp-use operands, and readsReg() strangely returns false // for predicated defs. - unsigned Reg = DefMI->getOperand(DefOperIdx).getReg(); + Register Reg = DefMI->getOperand(DefOperIdx).getReg(); const MachineFunction &MF = *DefMI->getMF(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(*DepMI)) diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 43d87664696..ea971809d4e 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -230,7 +230,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; if (MO.isUse() && MOReg != SavedReg) @@ -299,7 +299,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, MachineOperand &MO = OtherMI.getOperand(i); if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; if (DefReg == MOReg) @@ -418,8 +418,8 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, } else return false; - IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); - IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + IsSrcPhys = Register::isPhysicalRegister(SrcReg); + IsDstPhys = Register::isPhysicalRegister(DstReg); return true; } @@ -427,8 +427,7 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, /// given instruction, is killed by the given instruction. static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS) { - if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) && - !LIS->isNotInMIMap(*MI)) { + if (LIS && Register::isVirtualRegister(Reg) && !LIS->isNotInMIMap(*MI)) { // FIXME: Sometimes tryInstructionTransform() will add instructions and // test whether they can be folded before keeping them. In this case it // sets a kill before recursively calling tryInstructionTransform() again. @@ -475,12 +474,12 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, MachineInstr *DefMI = &MI; while (true) { // All uses of physical registers are likely to be kills. - if (TargetRegisterInfo::isPhysicalRegister(Reg) && + if (Register::isPhysicalRegister(Reg) && (allowFalsePositives || MRI->hasOneUse(Reg))) return true; if (!isPlainlyKilled(DefMI, Reg, LIS)) return false; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return true; MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg); // If there are multiple defs, we can't do a simple analysis, so just @@ -536,7 +535,7 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, } IsDstPhys = false; if (isTwoAddrUse(UseMI, Reg, DstReg)) { - IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + IsDstPhys = Register::isPhysicalRegister(DstReg); return &UseMI; } return nullptr; @@ -546,13 +545,13 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, /// to. static unsigned getMappedReg(unsigned Reg, DenseMap &RegMap) { - while (TargetRegisterInfo::isVirtualRegister(Reg)) { + while (Register::isVirtualRegister(Reg)) { DenseMap::iterator SI = RegMap.find(Reg); if (SI == RegMap.end()) return 0; Reg = SI->second; } - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return Reg; return 0; } @@ -683,7 +682,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI, unsigned RegBIdx, unsigned RegCIdx, unsigned Dist) { - unsigned RegC = MI->getOperand(RegCIdx).getReg(); + Register RegC = MI->getOperand(RegCIdx).getReg(); LLVM_DEBUG(dbgs() << "2addr: COMMUTING : " << *MI); MachineInstr *NewMI = TII->commuteInstruction(*MI, false, RegBIdx, RegCIdx); @@ -700,7 +699,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI, // Update source register map. unsigned FromRegC = getMappedReg(RegC, SrcRegMap); if (FromRegC) { - unsigned RegA = MI->getOperand(DstIdx).getReg(); + Register RegA = MI->getOperand(DstIdx).getReg(); SrcRegMap[RegA] = FromRegC; } @@ -911,7 +910,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; if (MO.isDef()) @@ -955,7 +954,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, for (const MachineOperand &MO : OtherMI.operands()) { if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; if (MO.isDef()) { @@ -1093,7 +1092,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, for (const MachineOperand &MO : KillMI->operands()) { if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (MO.isUse()) { if (!MOReg) continue; @@ -1105,7 +1104,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, Uses.insert(MOReg); if (isKill && MOReg != Reg) Kills.insert(MOReg); - } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) { + } else if (Register::isPhysicalRegister(MOReg)) { Defs.insert(MOReg); if (!MO.isDead()) LiveDefs.insert(MOReg); @@ -1130,7 +1129,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, for (const MachineOperand &MO : OtherMI.operands()) { if (!MO.isReg()) continue; - unsigned MOReg = MO.getReg(); + Register MOReg = MO.getReg(); if (!MOReg) continue; if (MO.isUse()) { @@ -1154,8 +1153,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, unsigned MOReg = OtherDefs[i]; if (Uses.count(MOReg)) return false; - if (TargetRegisterInfo::isPhysicalRegister(MOReg) && - LiveDefs.count(MOReg)) + if (Register::isPhysicalRegister(MOReg) && LiveDefs.count(MOReg)) return false; // Physical register def is seen. Defs.erase(MOReg); @@ -1208,8 +1206,8 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, return false; bool MadeChange = false; - unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg(); - unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg(); + Register DstOpReg = MI->getOperand(DstOpIdx).getReg(); + Register BaseOpReg = MI->getOperand(BaseOpIdx).getReg(); unsigned OpsNum = MI->getDesc().getNumOperands(); unsigned OtherOpIdx = MI->getDesc().getNumDefs(); for (; OtherOpIdx < OpsNum; OtherOpIdx++) { @@ -1221,7 +1219,7 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, !TII->findCommutedOpIndices(*MI, BaseOpIdx, OtherOpIdx)) continue; - unsigned OtherOpReg = MI->getOperand(OtherOpIdx).getReg(); + Register OtherOpReg = MI->getOperand(OtherOpIdx).getReg(); bool AggressiveCommute = false; // If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp @@ -1276,14 +1274,14 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, return false; MachineInstr &MI = *mi; - unsigned regA = MI.getOperand(DstIdx).getReg(); - unsigned regB = MI.getOperand(SrcIdx).getReg(); + Register regA = MI.getOperand(DstIdx).getReg(); + Register regB = MI.getOperand(SrcIdx).getReg(); - assert(TargetRegisterInfo::isVirtualRegister(regB) && + assert(Register::isVirtualRegister(regB) && "cannot make instruction into two-address form"); bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true); - if (TargetRegisterInfo::isVirtualRegister(regA)) + if (Register::isVirtualRegister(regA)) scanUses(regA); bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist); @@ -1363,7 +1361,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, const TargetRegisterClass *RC = TRI->getAllocatableClass( TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF)); - unsigned Reg = MRI->createVirtualRegister(RC); + Register Reg = MRI->createVirtualRegister(RC); SmallVector NewMIs; if (!TII->unfoldMemoryOperand(*MF, MI, Reg, /*UnfoldLoad=*/true, @@ -1399,8 +1397,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, if (LV) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); - if (MO.isReg() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) { if (MO.isUse()) { if (MO.isKill()) { if (NewMIs[0]->killsRegister(MO.getReg())) @@ -1474,8 +1471,8 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { AnyOps = true; MachineOperand &SrcMO = MI->getOperand(SrcIdx); MachineOperand &DstMO = MI->getOperand(DstIdx); - unsigned SrcReg = SrcMO.getReg(); - unsigned DstReg = DstMO.getReg(); + Register SrcReg = SrcMO.getReg(); + Register DstReg = DstMO.getReg(); // Tied constraint already satisfied? if (SrcReg == DstReg) continue; @@ -1485,7 +1482,7 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { // Deal with undef uses immediately - simply rewrite the src operand. if (SrcMO.isUndef() && !DstMO.getSubReg()) { // Constrain the DstReg register class if required. - if (TargetRegisterInfo::isVirtualRegister(DstReg)) + if (Register::isVirtualRegister(DstReg)) if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, TRI, *MF)) MRI->constrainRegClass(DstReg, RC); @@ -1522,7 +1519,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, unsigned DstIdx = TiedPairs[tpi].second; const MachineOperand &DstMO = MI->getOperand(DstIdx); - unsigned RegA = DstMO.getReg(); + Register RegA = DstMO.getReg(); // Grab RegB from the instruction because it may have changed if the // instruction was commuted. @@ -1538,7 +1535,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } LastCopiedReg = RegA; - assert(TargetRegisterInfo::isVirtualRegister(RegB) && + assert(Register::isVirtualRegister(RegB) && "cannot make instruction into two-address form"); #ifndef NDEBUG @@ -1559,14 +1556,13 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, MIB.addReg(RegB, 0, SubRegB); const TargetRegisterClass *RC = MRI->getRegClass(RegB); if (SubRegB) { - if (TargetRegisterInfo::isVirtualRegister(RegA)) { + if (Register::isVirtualRegister(RegA)) { assert(TRI->getMatchingSuperRegClass(RC, MRI->getRegClass(RegA), SubRegB) && "tied subregister must be a truncation"); // The superreg class will not be used to constrain the subreg class. RC = nullptr; - } - else { + } else { assert(TRI->getMatchingSuperReg(RegA, SubRegB, MRI->getRegClass(RegB)) && "tied subregister must be a truncation"); } @@ -1581,7 +1577,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, if (LIS) { LastCopyIdx = LIS->InsertMachineInstrInMaps(*PrevMI).getRegSlot(); - if (TargetRegisterInfo::isVirtualRegister(RegA)) { + if (Register::isVirtualRegister(RegA)) { LiveInterval &LI = LIS->getInterval(RegA); VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); SlotIndex endIdx = @@ -1601,8 +1597,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } // Make sure regA is a legal regclass for the SrcIdx operand. - if (TargetRegisterInfo::isVirtualRegister(RegA) && - TargetRegisterInfo::isVirtualRegister(RegB)) + if (Register::isVirtualRegister(RegA) && Register::isVirtualRegister(RegB)) MRI->constrainRegClass(RegA, RC); MO.setReg(RegA); // The getMatchingSuper asserts guarantee that the register class projected @@ -1744,8 +1739,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { if (TiedPairs.size() == 1) { unsigned SrcIdx = TiedPairs[0].first; unsigned DstIdx = TiedPairs[0].second; - unsigned SrcReg = mi->getOperand(SrcIdx).getReg(); - unsigned DstReg = mi->getOperand(DstIdx).getReg(); + Register SrcReg = mi->getOperand(SrcIdx).getReg(); + Register DstReg = mi->getOperand(DstIdx).getReg(); if (SrcReg != DstReg && tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) { // The tied operands have been eliminated or shifted further down @@ -1803,9 +1798,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { void TwoAddressInstructionPass:: eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; - unsigned DstReg = MI.getOperand(0).getReg(); - if (MI.getOperand(0).getSubReg() || - TargetRegisterInfo::isPhysicalRegister(DstReg) || + Register DstReg = MI.getOperand(0).getReg(); + if (MI.getOperand(0).getSubReg() || Register::isPhysicalRegister(DstReg) || !(MI.getNumOperands() & 1)) { LLVM_DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI); llvm_unreachable(nullptr); @@ -1821,7 +1815,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { bool DefEmitted = false; for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) { MachineOperand &UseMO = MI.getOperand(i); - unsigned SrcReg = UseMO.getReg(); + Register SrcReg = UseMO.getReg(); unsigned SubIdx = MI.getOperand(i+1).getImm(); // Nothing needs to be inserted for undef operands. if (UseMO.isUndef()) @@ -1855,7 +1849,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { DefEmitted = true; // Update LiveVariables' kill info. - if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg)) + if (LV && isKill && !Register::isPhysicalRegister(SrcReg)) LV->replaceKillInstruction(SrcReg, MI, *CopyMI); LLVM_DEBUG(dbgs() << "Inserted: " << *CopyMI); diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index 177bab32bcc..3289eff7133 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -103,7 +103,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { df_iterator_default_set Reachable; bool ModifiedPHI = false; - MMI = getAnalysisIfAvailable(); + auto *MMIWP = getAnalysisIfAvailable(); + MMI = MMIWP ? &MMIWP->getMMI() : nullptr; MachineDominatorTree *MDT = getAnalysisIfAvailable(); MachineLoopInfo *MLI = getAnalysisIfAvailable(); @@ -146,8 +147,14 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { } // Actually remove the blocks now. - for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) + for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) { + // Remove any call site information for calls in the block. + for (auto &I : DeadBlocks[i]->instrs()) + if (I.isCall(MachineInstr::IgnoreBundle)) + DeadBlocks[i]->getParent()->eraseCallSiteInfo(&I); + DeadBlocks[i]->eraseFromParent(); + } // Cleanup PHI nodes. for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { @@ -167,8 +174,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { if (phi->getNumOperands() == 3) { const MachineOperand &Input = phi->getOperand(1); const MachineOperand &Output = phi->getOperand(0); - unsigned InputReg = Input.getReg(); - unsigned OutputReg = Output.getReg(); + Register InputReg = Input.getReg(); + Register OutputReg = Output.getReg(); assert(Output.getSubReg() == 0 && "Cannot have output subregister"); ModifiedPHI = true; diff --git a/lib/CodeGen/ValueTypes.cpp b/lib/CodeGen/ValueTypes.cpp index a911cdcbec9..73b862d51c0 100644 --- a/lib/CodeGen/ValueTypes.cpp +++ b/lib/CodeGen/ValueTypes.cpp @@ -115,8 +115,8 @@ std::string EVT::getEVTString() const { switch (V.SimpleTy) { default: if (isVector()) - return "v" + utostr(getVectorNumElements()) + - getVectorElementType().getEVTString(); + return (isScalableVector() ? "nxv" : "v") + utostr(getVectorNumElements()) + + getVectorElementType().getEVTString(); if (isInteger()) return "i" + utostr(getSizeInBits()); llvm_unreachable("Invalid EVT!"); @@ -144,6 +144,7 @@ std::string EVT::getEVTString() const { case MVT::v32i1: return "v32i1"; case MVT::v64i1: return "v64i1"; case MVT::v128i1: return "v128i1"; + case MVT::v256i1: return "v256i1"; case MVT::v512i1: return "v512i1"; case MVT::v1024i1: return "v1024i1"; case MVT::v1i8: return "v1i8"; @@ -157,6 +158,7 @@ std::string EVT::getEVTString() const { case MVT::v256i8: return "v256i8"; case MVT::v1i16: return "v1i16"; case MVT::v2i16: return "v2i16"; + case MVT::v3i16: return "v3i16"; case MVT::v4i16: return "v4i16"; case MVT::v8i16: return "v8i16"; case MVT::v16i16: return "v16i16"; @@ -187,8 +189,11 @@ std::string EVT::getEVTString() const { case MVT::v1f32: return "v1f32"; case MVT::v2f32: return "v2f32"; case MVT::v2f16: return "v2f16"; + case MVT::v3f16: return "v3f16"; case MVT::v4f16: return "v4f16"; case MVT::v8f16: return "v8f16"; + case MVT::v16f16: return "v16f16"; + case MVT::v32f16: return "v32f16"; case MVT::v3f32: return "v3f32"; case MVT::v4f32: return "v4f32"; case MVT::v5f32: return "v5f32"; @@ -205,6 +210,48 @@ std::string EVT::getEVTString() const { case MVT::v2f64: return "v2f64"; case MVT::v4f64: return "v4f64"; case MVT::v8f64: return "v8f64"; + case MVT::nxv1i1: return "nxv1i1"; + case MVT::nxv2i1: return "nxv2i1"; + case MVT::nxv4i1: return "nxv4i1"; + case MVT::nxv8i1: return "nxv8i1"; + case MVT::nxv16i1: return "nxv16i1"; + case MVT::nxv32i1: return "nxv32i1"; + case MVT::nxv1i8: return "nxv1i8"; + case MVT::nxv2i8: return "nxv2i8"; + case MVT::nxv4i8: return "nxv4i8"; + case MVT::nxv8i8: return "nxv8i8"; + case MVT::nxv16i8: return "nxv16i8"; + case MVT::nxv32i8: return "nxv32i8"; + case MVT::nxv1i16: return "nxv1i16"; + case MVT::nxv2i16: return "nxv2i16"; + case MVT::nxv4i16: return "nxv4i16"; + case MVT::nxv8i16: return "nxv8i16"; + case MVT::nxv16i16:return "nxv16i16"; + case MVT::nxv32i16:return "nxv32i16"; + case MVT::nxv1i32: return "nxv1i32"; + case MVT::nxv2i32: return "nxv2i32"; + case MVT::nxv4i32: return "nxv4i32"; + case MVT::nxv8i32: return "nxv8i32"; + case MVT::nxv16i32:return "nxv16i32"; + case MVT::nxv32i32:return "nxv32i32"; + case MVT::nxv1i64: return "nxv1i64"; + case MVT::nxv2i64: return "nxv2i64"; + case MVT::nxv4i64: return "nxv4i64"; + case MVT::nxv8i64: return "nxv8i64"; + case MVT::nxv16i64:return "nxv16i64"; + case MVT::nxv32i64:return "nxv32i64"; + case MVT::nxv2f16: return "nxv2f16"; + case MVT::nxv4f16: return "nxv4f16"; + case MVT::nxv8f16: return "nxv8f16"; + case MVT::nxv1f32: return "nxv1f32"; + case MVT::nxv2f32: return "nxv2f32"; + case MVT::nxv4f32: return "nxv4f32"; + case MVT::nxv8f32: return "nxv8f32"; + case MVT::nxv16f32:return "nxv16f32"; + case MVT::nxv1f64: return "nxv1f64"; + case MVT::nxv2f64: return "nxv2f64"; + case MVT::nxv4f64: return "nxv4f64"; + case MVT::nxv8f64: return "nxv8f64"; case MVT::Metadata:return "Metadata"; case MVT::Untyped: return "Untyped"; case MVT::exnref : return "exnref"; @@ -241,6 +288,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32); case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64); case MVT::v128i1: return VectorType::get(Type::getInt1Ty(Context), 128); + case MVT::v256i1: return VectorType::get(Type::getInt1Ty(Context), 256); case MVT::v512i1: return VectorType::get(Type::getInt1Ty(Context), 512); case MVT::v1024i1: return VectorType::get(Type::getInt1Ty(Context), 1024); case MVT::v1i8: return VectorType::get(Type::getInt8Ty(Context), 1); @@ -254,6 +302,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v256i8: return VectorType::get(Type::getInt8Ty(Context), 256); case MVT::v1i16: return VectorType::get(Type::getInt16Ty(Context), 1); case MVT::v2i16: return VectorType::get(Type::getInt16Ty(Context), 2); + case MVT::v3i16: return VectorType::get(Type::getInt16Ty(Context), 3); case MVT::v4i16: return VectorType::get(Type::getInt16Ty(Context), 4); case MVT::v8i16: return VectorType::get(Type::getInt16Ty(Context), 8); case MVT::v16i16: return VectorType::get(Type::getInt16Ty(Context), 16); @@ -282,8 +331,11 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v32i64: return VectorType::get(Type::getInt64Ty(Context), 32); case MVT::v1i128: return VectorType::get(Type::getInt128Ty(Context), 1); case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2); + case MVT::v3f16: return VectorType::get(Type::getHalfTy(Context), 3); case MVT::v4f16: return VectorType::get(Type::getHalfTy(Context), 4); case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8); + case MVT::v16f16: return VectorType::get(Type::getHalfTy(Context), 16); + case MVT::v32f16: return VectorType::get(Type::getHalfTy(Context), 32); case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1); case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2); case MVT::v3f32: return VectorType::get(Type::getFloatTy(Context), 3); @@ -302,8 +354,92 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2); case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4); case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8); + case MVT::nxv1i1: + return VectorType::get(Type::getInt1Ty(Context), 1, /*Scalable=*/ true); + case MVT::nxv2i1: + return VectorType::get(Type::getInt1Ty(Context), 2, /*Scalable=*/ true); + case MVT::nxv4i1: + return VectorType::get(Type::getInt1Ty(Context), 4, /*Scalable=*/ true); + case MVT::nxv8i1: + return VectorType::get(Type::getInt1Ty(Context), 8, /*Scalable=*/ true); + case MVT::nxv16i1: + return VectorType::get(Type::getInt1Ty(Context), 16, /*Scalable=*/ true); + case MVT::nxv32i1: + return VectorType::get(Type::getInt1Ty(Context), 32, /*Scalable=*/ true); + case MVT::nxv1i8: + return VectorType::get(Type::getInt8Ty(Context), 1, /*Scalable=*/ true); + case MVT::nxv2i8: + return VectorType::get(Type::getInt8Ty(Context), 2, /*Scalable=*/ true); + case MVT::nxv4i8: + return VectorType::get(Type::getInt8Ty(Context), 4, /*Scalable=*/ true); + case MVT::nxv8i8: + return VectorType::get(Type::getInt8Ty(Context), 8, /*Scalable=*/ true); + case MVT::nxv16i8: + return VectorType::get(Type::getInt8Ty(Context), 16, /*Scalable=*/ true); + case MVT::nxv32i8: + return VectorType::get(Type::getInt8Ty(Context), 32, /*Scalable=*/ true); + case MVT::nxv1i16: + return VectorType::get(Type::getInt16Ty(Context), 1, /*Scalable=*/ true); + case MVT::nxv2i16: + return VectorType::get(Type::getInt16Ty(Context), 2, /*Scalable=*/ true); + case MVT::nxv4i16: + return VectorType::get(Type::getInt16Ty(Context), 4, /*Scalable=*/ true); + case MVT::nxv8i16: + return VectorType::get(Type::getInt16Ty(Context), 8, /*Scalable=*/ true); + case MVT::nxv16i16: + return VectorType::get(Type::getInt16Ty(Context), 16, /*Scalable=*/ true); + case MVT::nxv32i16: + return VectorType::get(Type::getInt16Ty(Context), 32, /*Scalable=*/ true); + case MVT::nxv1i32: + return VectorType::get(Type::getInt32Ty(Context), 1, /*Scalable=*/ true); + case MVT::nxv2i32: + return VectorType::get(Type::getInt32Ty(Context), 2, /*Scalable=*/ true); + case MVT::nxv4i32: + return VectorType::get(Type::getInt32Ty(Context), 4, /*Scalable=*/ true); + case MVT::nxv8i32: + return VectorType::get(Type::getInt32Ty(Context), 8, /*Scalable=*/ true); + case MVT::nxv16i32: + return VectorType::get(Type::getInt32Ty(Context), 16,/*Scalable=*/ true); + case MVT::nxv32i32: + return VectorType::get(Type::getInt32Ty(Context), 32,/*Scalable=*/ true); + case MVT::nxv1i64: + return VectorType::get(Type::getInt64Ty(Context), 1, /*Scalable=*/ true); + case MVT::nxv2i64: + return VectorType::get(Type::getInt64Ty(Context), 2, /*Scalable=*/ true); + case MVT::nxv4i64: + return VectorType::get(Type::getInt64Ty(Context), 4, /*Scalable=*/ true); + case MVT::nxv8i64: + return VectorType::get(Type::getInt64Ty(Context), 8, /*Scalable=*/ true); + case MVT::nxv16i64: + return VectorType::get(Type::getInt64Ty(Context), 16, /*Scalable=*/ true); + case MVT::nxv32i64: + return VectorType::get(Type::getInt64Ty(Context), 32, /*Scalable=*/ true); + case MVT::nxv2f16: + return VectorType::get(Type::getHalfTy(Context), 2, /*Scalable=*/ true); + case MVT::nxv4f16: + return VectorType::get(Type::getHalfTy(Context), 4, /*Scalable=*/ true); + case MVT::nxv8f16: + return VectorType::get(Type::getHalfTy(Context), 8, /*Scalable=*/ true); + case MVT::nxv1f32: + return VectorType::get(Type::getFloatTy(Context), 1, /*Scalable=*/ true); + case MVT::nxv2f32: + return VectorType::get(Type::getFloatTy(Context), 2, /*Scalable=*/ true); + case MVT::nxv4f32: + return VectorType::get(Type::getFloatTy(Context), 4, /*Scalable=*/ true); + case MVT::nxv8f32: + return VectorType::get(Type::getFloatTy(Context), 8, /*Scalable=*/ true); + case MVT::nxv16f32: + return VectorType::get(Type::getFloatTy(Context), 16, /*Scalable=*/ true); + case MVT::nxv1f64: + return VectorType::get(Type::getDoubleTy(Context), 1, /*Scalable=*/ true); + case MVT::nxv2f64: + return VectorType::get(Type::getDoubleTy(Context), 2, /*Scalable=*/ true); + case MVT::nxv4f64: + return VectorType::get(Type::getDoubleTy(Context), 4, /*Scalable=*/ true); + case MVT::nxv8f64: + return VectorType::get(Type::getDoubleTy(Context), 8, /*Scalable=*/ true); case MVT::Metadata: return Type::getMetadataTy(Context); - } + } } /// Return the value type corresponding to the specified type. This returns all @@ -329,7 +465,8 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){ case Type::VectorTyID: { VectorType *VTy = cast(Ty); return getVectorVT( - getVT(VTy->getElementType(), false), VTy->getNumElements()); + getVT(VTy->getElementType(), /*HandleUnknown=*/ false), + VTy->getElementCount()); } } } @@ -345,8 +482,9 @@ EVT EVT::getEVT(Type *Ty, bool HandleUnknown){ return getIntegerVT(Ty->getContext(), cast(Ty)->getBitWidth()); case Type::VectorTyID: { VectorType *VTy = cast(Ty); - return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false), - VTy->getNumElements()); + return getVectorVT(Ty->getContext(), + getEVT(VTy->getElementType(), /*HandleUnknown=*/ false), + VTy->getElementCount()); } } } diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 4a06704a887..5312e2eea96 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -80,15 +80,14 @@ void VirtRegMap::grow() { Virt2SplitMap.resize(NumRegs); } -void VirtRegMap::assignVirt2Phys(unsigned virtReg, MCPhysReg physReg) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg) && - TargetRegisterInfo::isPhysicalRegister(physReg)); - assert(Virt2PhysMap[virtReg] == NO_PHYS_REG && +void VirtRegMap::assignVirt2Phys(Register virtReg, MCPhysReg physReg) { + assert(virtReg.isVirtual() && Register::isPhysicalRegister(physReg)); + assert(Virt2PhysMap[virtReg.id()] == NO_PHYS_REG && "attempt to assign physical register to already mapped " "virtual register"); assert(!getRegInfo().isReserved(physReg) && "Attempt to map virtReg to a reserved physReg"); - Virt2PhysMap[virtReg] = physReg; + Virt2PhysMap[virtReg.id()] = physReg; } unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { @@ -99,46 +98,46 @@ unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { return SS; } -bool VirtRegMap::hasPreferredPhys(unsigned VirtReg) { - unsigned Hint = MRI->getSimpleHint(VirtReg); - if (!Hint) +bool VirtRegMap::hasPreferredPhys(Register VirtReg) { + Register Hint = MRI->getSimpleHint(VirtReg); + if (!Hint.isValid()) return false; - if (TargetRegisterInfo::isVirtualRegister(Hint)) + if (Hint.isVirtual()) Hint = getPhys(Hint); return getPhys(VirtReg) == Hint; } -bool VirtRegMap::hasKnownPreference(unsigned VirtReg) { +bool VirtRegMap::hasKnownPreference(Register VirtReg) { std::pair Hint = MRI->getRegAllocationHint(VirtReg); - if (TargetRegisterInfo::isPhysicalRegister(Hint.second)) + if (Register::isPhysicalRegister(Hint.second)) return true; - if (TargetRegisterInfo::isVirtualRegister(Hint.second)) + if (Register::isVirtualRegister(Hint.second)) return hasPhys(Hint.second); return false; } -int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && +int VirtRegMap::assignVirt2StackSlot(Register virtReg) { + assert(virtReg.isVirtual()); + assert(Virt2StackSlotMap[virtReg.id()] == NO_STACK_SLOT && "attempt to assign stack slot to already spilled register"); const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg); - return Virt2StackSlotMap[virtReg] = createSpillSlot(RC); + return Virt2StackSlotMap[virtReg.id()] = createSpillSlot(RC); } -void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && +void VirtRegMap::assignVirt2StackSlot(Register virtReg, int SS) { + assert(virtReg.isVirtual()); + assert(Virt2StackSlotMap[virtReg.id()] == NO_STACK_SLOT && "attempt to assign stack slot to already spilled register"); assert((SS >= 0 || (SS >= MF->getFrameInfo().getObjectIndexBegin())) && "illegal fixed frame index"); - Virt2StackSlotMap[virtReg] = SS; + Virt2StackSlotMap[virtReg.id()] = SS; } void VirtRegMap::print(raw_ostream &OS, const Module*) const { OS << "********** REGISTER MAP **********\n"; for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) { OS << '[' << printReg(Reg, TRI) << " -> " << printReg(Virt2PhysMap[Reg], TRI) << "] " @@ -147,7 +146,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const { } for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) { OS << '[' << printReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg] << "] " << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n"; @@ -185,10 +184,10 @@ class VirtRegRewriter : public MachineFunctionPass { void rewrite(); void addMBBLiveIns(); bool readsUndefSubreg(const MachineOperand &MO) const; - void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const; + void addLiveInsForSubRanges(const LiveInterval &LI, Register PhysReg) const; void handleIdentityCopy(MachineInstr &MI) const; void expandCopyBundle(MachineInstr &MI) const; - bool subRegLiveThrough(const MachineInstr &MI, unsigned SuperPhysReg) const; + bool subRegLiveThrough(const MachineInstr &MI, Register SuperPhysReg) const; public: static char ID; @@ -265,7 +264,7 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { } void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI, - unsigned PhysReg) const { + Register PhysReg) const { assert(!LI.empty()); assert(LI.hasSubRanges()); @@ -312,7 +311,7 @@ void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI, // assignments. void VirtRegRewriter::addMBBLiveIns() { for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) { - unsigned VirtReg = TargetRegisterInfo::index2VirtReg(Idx); + Register VirtReg = Register::index2VirtReg(Idx); if (MRI->reg_nodbg_empty(VirtReg)) continue; LiveInterval &LI = LIS->getInterval(VirtReg); @@ -320,7 +319,7 @@ void VirtRegRewriter::addMBBLiveIns() { continue; // This is a virtual register that is live across basic blocks. Its // assigned PhysReg must be marked as live-in to those blocks. - unsigned PhysReg = VRM->getPhys(VirtReg); + Register PhysReg = VRM->getPhys(VirtReg); assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register."); if (LI.hasSubRanges()) { @@ -353,7 +352,7 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const { if (MO.isUndef()) return true; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); const LiveInterval &LI = LIS->getInterval(Reg); const MachineInstr &MI = *MO.getParent(); SlotIndex BaseIndex = LIS->getInstructionIndex(MI); @@ -469,7 +468,7 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const { /// \pre \p MI defines a subregister of a virtual register that /// has been assigned to \p SuperPhysReg. bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI, - unsigned SuperPhysReg) const { + Register SuperPhysReg) const { SlotIndex MIIndex = LIS->getInstructionIndex(MI); SlotIndex BeforeMIUses = MIIndex.getBaseIndex(); SlotIndex AfterMIDefs = MIIndex.getBoundaryIndex(); @@ -493,9 +492,9 @@ bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI, void VirtRegRewriter::rewrite() { bool NoSubRegLiveness = !MRI->subRegLivenessEnabled(); - SmallVector SuperDeads; - SmallVector SuperDefs; - SmallVector SuperKills; + SmallVector SuperDeads; + SmallVector SuperDefs; + SmallVector SuperKills; for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { @@ -513,10 +512,10 @@ void VirtRegRewriter::rewrite() { if (MO.isRegMask()) MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !MO.getReg().isVirtual()) continue; - unsigned VirtReg = MO.getReg(); - unsigned PhysReg = VRM->getPhys(VirtReg); + Register VirtReg = MO.getReg(); + Register PhysReg = VRM->getPhys(VirtReg); assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Instruction uses unmapped VirtReg"); assert(!MRI->isReserved(PhysReg) && "Reserved register assignment"); @@ -562,7 +561,7 @@ void VirtRegRewriter::rewrite() { // PhysReg operands cannot have subregister indexes. PhysReg = TRI->getSubReg(PhysReg, SubReg); - assert(PhysReg && "Invalid SubReg for physical register"); + assert(PhysReg.isValid() && "Invalid SubReg for physical register"); MO.setSubReg(0); } // Rewrite. Note we could have used MachineOperand::substPhysReg(), but diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp index 19c59e9542b..119c3fd1ec7 100644 --- a/lib/CodeGen/XRayInstrumentation.cpp +++ b/lib/CodeGen/XRayInstrumentation.cpp @@ -111,7 +111,7 @@ void XRayInstrumentation::replaceRetWithPatchableRet( MIB.add(MO); Terminators.push_back(&T); if (T.isCall()) - MF.updateCallSiteInfo(&T); + MF.eraseCallSiteInfo(&T); } } } diff --git a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp index ec4773d571c..dd6f75f97a4 100644 --- a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp +++ b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp @@ -209,14 +209,6 @@ struct VisitHelper { } } - VisitHelper(TypeVisitorCallbackPipeline &Callbacks, VisitorDataSource Source) - : Visitor((Source == VDS_BytesPresent) ? Pipeline : Callbacks) { - if (Source == VDS_BytesPresent) { - Pipeline = Callbacks; - Pipeline.addCallbackToPipelineFront(Deserializer); - } - } - TypeDeserializer Deserializer; TypeVisitorCallbackPipeline Pipeline; CVTypeVisitor Visitor; @@ -230,13 +222,6 @@ Error llvm::codeview::visitTypeRecord(CVType &Record, TypeIndex Index, return V.Visitor.visitTypeRecord(Record, Index); } -Error llvm::codeview::visitTypeRecord(CVType &Record, TypeIndex Index, - TypeVisitorCallbackPipeline &Callbacks, - VisitorDataSource Source) { - VisitHelper V(Callbacks, Source); - return V.Visitor.visitTypeRecord(Record, Index); -} - Error llvm::codeview::visitTypeRecord(CVType &Record, TypeVisitorCallbacks &Callbacks, VisitorDataSource Source) { diff --git a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp index 2f49474115a..36a384baa13 100644 --- a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp +++ b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp @@ -20,7 +20,6 @@ Error CodeViewRecordIO::beginRecord(Optional MaxLength) { Limit.MaxLength = MaxLength; Limit.BeginOffset = getCurrentOffset(); Limits.push_back(Limit); - resetStreamedLen(); return Error::success(); } @@ -50,6 +49,7 @@ Error CodeViewRecordIO::endRecord() { Streamer->EmitBytes(BytesSR); --PaddingBytes; } + resetStreamedLen(); } return Error::success(); } @@ -126,7 +126,11 @@ Error CodeViewRecordIO::mapByteVectorTail(std::vector &Bytes, Error CodeViewRecordIO::mapInteger(TypeIndex &TypeInd, const Twine &Comment) { if (isStreaming()) { - emitComment(Comment); + std::string TypeNameStr = Streamer->getTypeName(TypeInd); + if (!TypeNameStr.empty()) + emitComment(Comment + ": " + TypeNameStr); + else + emitComment(Comment); Streamer->EmitIntValue(TypeInd.getIndex(), sizeof(TypeInd.getIndex())); incrStreamedLen(sizeof(TypeInd.getIndex())); } else if (isWriting()) { diff --git a/lib/DebugInfo/CodeView/EnumTables.cpp b/lib/DebugInfo/CodeView/EnumTables.cpp index 54e68ae4ea9..82f6713a88f 100644 --- a/lib/DebugInfo/CodeView/EnumTables.cpp +++ b/lib/DebugInfo/CodeView/EnumTables.cpp @@ -300,6 +300,128 @@ static const EnumEntry CV_ENUM_ENT(COFF, IMAGE_SCN_MEM_READ), CV_ENUM_ENT(COFF, IMAGE_SCN_MEM_WRITE)}; +static const EnumEntry ClassOptionNames[] = { + CV_ENUM_CLASS_ENT(ClassOptions, Packed), + CV_ENUM_CLASS_ENT(ClassOptions, HasConstructorOrDestructor), + CV_ENUM_CLASS_ENT(ClassOptions, HasOverloadedOperator), + CV_ENUM_CLASS_ENT(ClassOptions, Nested), + CV_ENUM_CLASS_ENT(ClassOptions, ContainsNestedClass), + CV_ENUM_CLASS_ENT(ClassOptions, HasOverloadedAssignmentOperator), + CV_ENUM_CLASS_ENT(ClassOptions, HasConversionOperator), + CV_ENUM_CLASS_ENT(ClassOptions, ForwardReference), + CV_ENUM_CLASS_ENT(ClassOptions, Scoped), + CV_ENUM_CLASS_ENT(ClassOptions, HasUniqueName), + CV_ENUM_CLASS_ENT(ClassOptions, Sealed), + CV_ENUM_CLASS_ENT(ClassOptions, Intrinsic), +}; + +static const EnumEntry MemberAccessNames[] = { + CV_ENUM_CLASS_ENT(MemberAccess, None), + CV_ENUM_CLASS_ENT(MemberAccess, Private), + CV_ENUM_CLASS_ENT(MemberAccess, Protected), + CV_ENUM_CLASS_ENT(MemberAccess, Public), +}; + +static const EnumEntry MethodOptionNames[] = { + CV_ENUM_CLASS_ENT(MethodOptions, Pseudo), + CV_ENUM_CLASS_ENT(MethodOptions, NoInherit), + CV_ENUM_CLASS_ENT(MethodOptions, NoConstruct), + CV_ENUM_CLASS_ENT(MethodOptions, CompilerGenerated), + CV_ENUM_CLASS_ENT(MethodOptions, Sealed), +}; + +static const EnumEntry MemberKindNames[] = { + CV_ENUM_CLASS_ENT(MethodKind, Vanilla), + CV_ENUM_CLASS_ENT(MethodKind, Virtual), + CV_ENUM_CLASS_ENT(MethodKind, Static), + CV_ENUM_CLASS_ENT(MethodKind, Friend), + CV_ENUM_CLASS_ENT(MethodKind, IntroducingVirtual), + CV_ENUM_CLASS_ENT(MethodKind, PureVirtual), + CV_ENUM_CLASS_ENT(MethodKind, PureIntroducingVirtual), +}; + +static const EnumEntry PtrKindNames[] = { + CV_ENUM_CLASS_ENT(PointerKind, Near16), + CV_ENUM_CLASS_ENT(PointerKind, Far16), + CV_ENUM_CLASS_ENT(PointerKind, Huge16), + CV_ENUM_CLASS_ENT(PointerKind, BasedOnSegment), + CV_ENUM_CLASS_ENT(PointerKind, BasedOnValue), + CV_ENUM_CLASS_ENT(PointerKind, BasedOnSegmentValue), + CV_ENUM_CLASS_ENT(PointerKind, BasedOnAddress), + CV_ENUM_CLASS_ENT(PointerKind, BasedOnSegmentAddress), + CV_ENUM_CLASS_ENT(PointerKind, BasedOnType), + CV_ENUM_CLASS_ENT(PointerKind, BasedOnSelf), + CV_ENUM_CLASS_ENT(PointerKind, Near32), + CV_ENUM_CLASS_ENT(PointerKind, Far32), + CV_ENUM_CLASS_ENT(PointerKind, Near64), +}; + +static const EnumEntry PtrModeNames[] = { + CV_ENUM_CLASS_ENT(PointerMode, Pointer), + CV_ENUM_CLASS_ENT(PointerMode, LValueReference), + CV_ENUM_CLASS_ENT(PointerMode, PointerToDataMember), + CV_ENUM_CLASS_ENT(PointerMode, PointerToMemberFunction), + CV_ENUM_CLASS_ENT(PointerMode, RValueReference), +}; + +static const EnumEntry PtrMemberRepNames[] = { + CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, Unknown), + CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, SingleInheritanceData), + CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, MultipleInheritanceData), + CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, VirtualInheritanceData), + CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, GeneralData), + CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, SingleInheritanceFunction), + CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, + MultipleInheritanceFunction), + CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, + VirtualInheritanceFunction), + CV_ENUM_CLASS_ENT(PointerToMemberRepresentation, GeneralFunction), +}; + +static const EnumEntry TypeModifierNames[] = { + CV_ENUM_CLASS_ENT(ModifierOptions, Const), + CV_ENUM_CLASS_ENT(ModifierOptions, Volatile), + CV_ENUM_CLASS_ENT(ModifierOptions, Unaligned), +}; + +static const EnumEntry CallingConventions[] = { + CV_ENUM_CLASS_ENT(CallingConvention, NearC), + CV_ENUM_CLASS_ENT(CallingConvention, FarC), + CV_ENUM_CLASS_ENT(CallingConvention, NearPascal), + CV_ENUM_CLASS_ENT(CallingConvention, FarPascal), + CV_ENUM_CLASS_ENT(CallingConvention, NearFast), + CV_ENUM_CLASS_ENT(CallingConvention, FarFast), + CV_ENUM_CLASS_ENT(CallingConvention, NearStdCall), + CV_ENUM_CLASS_ENT(CallingConvention, FarStdCall), + CV_ENUM_CLASS_ENT(CallingConvention, NearSysCall), + CV_ENUM_CLASS_ENT(CallingConvention, FarSysCall), + CV_ENUM_CLASS_ENT(CallingConvention, ThisCall), + CV_ENUM_CLASS_ENT(CallingConvention, MipsCall), + CV_ENUM_CLASS_ENT(CallingConvention, Generic), + CV_ENUM_CLASS_ENT(CallingConvention, AlphaCall), + CV_ENUM_CLASS_ENT(CallingConvention, PpcCall), + CV_ENUM_CLASS_ENT(CallingConvention, SHCall), + CV_ENUM_CLASS_ENT(CallingConvention, ArmCall), + CV_ENUM_CLASS_ENT(CallingConvention, AM33Call), + CV_ENUM_CLASS_ENT(CallingConvention, TriCall), + CV_ENUM_CLASS_ENT(CallingConvention, SH5Call), + CV_ENUM_CLASS_ENT(CallingConvention, M32RCall), + CV_ENUM_CLASS_ENT(CallingConvention, ClrCall), + CV_ENUM_CLASS_ENT(CallingConvention, Inline), + CV_ENUM_CLASS_ENT(CallingConvention, NearVector), +}; + +static const EnumEntry FunctionOptionEnum[] = { + CV_ENUM_CLASS_ENT(FunctionOptions, CxxReturnUdt), + CV_ENUM_CLASS_ENT(FunctionOptions, Constructor), + CV_ENUM_CLASS_ENT(FunctionOptions, ConstructorWithVirtualBases), +}; + +static const EnumEntry LabelTypeEnum[] = { + CV_ENUM_CLASS_ENT(LabelType, Near), + CV_ENUM_CLASS_ENT(LabelType, Far), +}; + namespace llvm { namespace codeview { @@ -379,5 +501,49 @@ getImageSectionCharacteristicNames() { return makeArrayRef(ImageSectionCharacteristicNames); } +ArrayRef> getClassOptionNames() { + return makeArrayRef(ClassOptionNames); +} + +ArrayRef> getMemberAccessNames() { + return makeArrayRef(MemberAccessNames); +} + +ArrayRef> getMethodOptionNames() { + return makeArrayRef(MethodOptionNames); +} + +ArrayRef> getMemberKindNames() { + return makeArrayRef(MemberKindNames); +} + +ArrayRef> getPtrKindNames() { + return makeArrayRef(PtrKindNames); +} + +ArrayRef> getPtrModeNames() { + return makeArrayRef(PtrModeNames); +} + +ArrayRef> getPtrMemberRepNames() { + return makeArrayRef(PtrMemberRepNames); +} + +ArrayRef> getTypeModifierNames() { + return makeArrayRef(TypeModifierNames); +} + +ArrayRef> getCallingConventions() { + return makeArrayRef(CallingConventions); +} + +ArrayRef> getFunctionOptionEnum() { + return makeArrayRef(FunctionOptionEnum); +} + +ArrayRef> getLabelTypeEnum() { + return makeArrayRef(LabelTypeEnum); +} + } // end namespace codeview } // end namespace llvm diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp index 27cb7e35234..45b63983beb 100644 --- a/lib/DebugInfo/CodeView/SymbolDumper.cpp +++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp @@ -315,7 +315,7 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeFramePointerRelSym &DefRangeFramePointerRel) { - W.printNumber("Offset", DefRangeFramePointerRel.Offset); + W.printNumber("Offset", DefRangeFramePointerRel.Hdr.Offset); printLocalVariableAddrRange(DefRangeFramePointerRel.Range, DefRangeFramePointerRel.getRelocationOffset()); printLocalVariableAddrGap(DefRangeFramePointerRel.Gaps); diff --git a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp index 70889839ef4..3b627930e27 100644 --- a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp +++ b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp @@ -229,7 +229,7 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, DataSym &Data) { Error SymbolRecordMapping::visitKnownRecord( CVSymbol &CVR, DefRangeFramePointerRelSym &DefRangeFramePointerRel) { - error(IO.mapInteger(DefRangeFramePointerRel.Offset)); + error(IO.mapObject(DefRangeFramePointerRel.Hdr.Offset)); error(mapLocalVariableAddrRange(IO, DefRangeFramePointerRel.Range)); error(IO.mapVectorTail(DefRangeFramePointerRel.Gaps, MapGap())); diff --git a/lib/DebugInfo/CodeView/TypeRecordMapping.cpp b/lib/DebugInfo/CodeView/TypeRecordMapping.cpp index 47928c2eef6..1aded589e56 100644 --- a/lib/DebugInfo/CodeView/TypeRecordMapping.cpp +++ b/lib/DebugInfo/CodeView/TypeRecordMapping.cpp @@ -7,24 +7,125 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/CodeView/TypeRecordMapping.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/DebugInfo/CodeView/EnumTables.h" using namespace llvm; using namespace llvm::codeview; +namespace { + #define error(X) \ if (auto EC = X) \ return EC; -namespace { +static const EnumEntry LeafTypeNames[] = { +#define CV_TYPE(enum, val) {#enum, enum}, +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" +}; + +static StringRef getLeafTypeName(TypeLeafKind LT) { + switch (LT) { +#define TYPE_RECORD(ename, value, name) \ + case ename: \ + return #name; +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" + default: + break; + } + return "UnknownLeaf"; +} + +template +static bool compEnumNames(const EnumEntry &lhs, const EnumEntry &rhs) { + return lhs.Name < rhs.Name; +} + +template +static std::string getFlagNames(CodeViewRecordIO &IO, T Value, + ArrayRef> Flags) { + if (!IO.isStreaming()) + return std::string(""); + typedef EnumEntry FlagEntry; + typedef SmallVector FlagVector; + FlagVector SetFlags; + for (const auto &Flag : Flags) { + if (Flag.Value == 0) + continue; + if ((Value & Flag.Value) == Flag.Value) { + SetFlags.push_back(Flag); + } + } + + llvm::sort(SetFlags, &compEnumNames); + + std::string FlagLabel; + bool FirstOcc = true; + for (const auto &Flag : SetFlags) { + if (FirstOcc) + FirstOcc = false; + else + FlagLabel += (" | "); + + FlagLabel += (Flag.Name.str() + " (0x" + utohexstr(Flag.Value) + ")"); + } + + if (!FlagLabel.empty()) { + std::string LabelWithBraces(" ( "); + LabelWithBraces += FlagLabel + " )"; + return LabelWithBraces; + } else + return FlagLabel; +} + +template +static StringRef getEnumName(CodeViewRecordIO &IO, T Value, + ArrayRef> EnumValues) { + if (!IO.isStreaming()) + return ""; + StringRef Name; + for (const auto &EnumItem : EnumValues) { + if (EnumItem.Value == Value) { + Name = EnumItem.Name; + break; + } + } + + return Name; +} + +static std::string getMemberAttributes(CodeViewRecordIO &IO, + MemberAccess Access, MethodKind Kind, + MethodOptions Options) { + if (!IO.isStreaming()) + return ""; + std::string AccessSpecifier = + getEnumName(IO, uint8_t(Access), makeArrayRef(getMemberAccessNames())); + std::string MemberAttrs(AccessSpecifier); + if (Kind != MethodKind::Vanilla) { + std::string MethodKind = + getEnumName(IO, unsigned(Kind), makeArrayRef(getMemberKindNames())); + MemberAttrs += ", " + MethodKind; + } + if (Options != MethodOptions::None) { + std::string MethodOptions = getFlagNames( + IO, unsigned(Options), makeArrayRef(getMethodOptionNames())); + MemberAttrs += ", " + MethodOptions; + } + return MemberAttrs; +} + struct MapOneMethodRecord { explicit MapOneMethodRecord(bool IsFromOverloadList) : IsFromOverloadList(IsFromOverloadList) {} Error operator()(CodeViewRecordIO &IO, OneMethodRecord &Method) const { - error(IO.mapInteger(Method.Attrs.Attrs, "AccessSpecifier")); + std::string Attrs = getMemberAttributes( + IO, Method.getAccess(), Method.getMethodKind(), Method.getOptions()); + error(IO.mapInteger(Method.Attrs.Attrs, "Attrs: " + Attrs)); if (IsFromOverloadList) { uint16_t Padding = 0; - error(IO.mapInteger(Padding, "Padding")); + error(IO.mapInteger(Padding)); } error(IO.mapInteger(Method.Type, "Type")); if (Method.isIntroducingVirtual()) { @@ -41,7 +142,7 @@ struct MapOneMethodRecord { private: bool IsFromOverloadList; }; -} +} // namespace static Error mapNameAndUniqueName(CodeViewRecordIO &IO, StringRef &Name, StringRef &UniqueName, bool HasUniqueName) { @@ -96,10 +197,22 @@ Error TypeRecordMapping::visitTypeBegin(CVType &CVR) { MaxLen = MaxRecordLength - sizeof(RecordPrefix); error(IO.beginRecord(MaxLen)); TypeKind = CVR.kind(); + + if (IO.isStreaming()) { + auto RecordKind = CVR.kind(); + uint16_t RecordLen = CVR.length() - 2; + std::string RecordKindName = + getEnumName(IO, unsigned(RecordKind), makeArrayRef(LeafTypeNames)); + error(IO.mapInteger(RecordLen, "Record length")); + error(IO.mapEnum(RecordKind, "Record kind: " + RecordKindName)); + } return Error::success(); } Error TypeRecordMapping::visitTypeBegin(CVType &CVR, TypeIndex Index) { + if (IO.isStreaming()) + IO.emitRawComment(" " + getLeafTypeName(CVR.kind()) + " (0x" + + utohexstr(Index.getIndex()) + ")"); return visitTypeBegin(CVR); } @@ -121,11 +234,21 @@ Error TypeRecordMapping::visitMemberBegin(CVMemberRecord &Record) { // followed by the subrecord, followed by a continuation, and that entire // sequence spaws `MaxRecordLength` bytes. So the record's length is // calculated as follows. + constexpr uint32_t ContinuationLength = 8; error(IO.beginRecord(MaxRecordLength - sizeof(RecordPrefix) - ContinuationLength)); MemberKind = Record.Kind; + if (IO.isStreaming()) { + std::string MemberKindName = getLeafTypeName(Record.Kind); + MemberKindName += + " ( " + + (getEnumName(IO, unsigned(Record.Kind), makeArrayRef(LeafTypeNames))) + .str() + + " )"; + error(IO.mapEnum(Record.Kind, "Member kind: " + MemberKindName)); + } return Error::success(); } @@ -144,16 +267,24 @@ Error TypeRecordMapping::visitMemberEnd(CVMemberRecord &Record) { } Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ModifierRecord &Record) { + std::string ModifierNames = + getFlagNames(IO, static_cast(Record.Modifiers), + makeArrayRef(getTypeModifierNames())); error(IO.mapInteger(Record.ModifiedType, "ModifiedType")); - error(IO.mapEnum(Record.Modifiers, "Modifiers")); + error(IO.mapEnum(Record.Modifiers, "Modifiers" + ModifierNames)); return Error::success(); } Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ProcedureRecord &Record) { + std::string CallingConvName = getEnumName( + IO, uint8_t(Record.CallConv), makeArrayRef(getCallingConventions())); + std::string FuncOptionNames = + getFlagNames(IO, static_cast(Record.Options), + makeArrayRef(getFunctionOptionEnum())); error(IO.mapInteger(Record.ReturnType, "ReturnType")); - error(IO.mapEnum(Record.CallConv, "CallingConvention")); - error(IO.mapEnum(Record.Options, "FunctionOptions")); + error(IO.mapEnum(Record.CallConv, "CallingConvention: " + CallingConvName)); + error(IO.mapEnum(Record.Options, "FunctionOptions" + FuncOptionNames)); error(IO.mapInteger(Record.ParameterCount, "NumParameters")); error(IO.mapInteger(Record.ArgumentList, "ArgListType")); @@ -162,11 +293,16 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, Error TypeRecordMapping::visitKnownRecord(CVType &CVR, MemberFunctionRecord &Record) { + std::string CallingConvName = getEnumName( + IO, uint8_t(Record.CallConv), makeArrayRef(getCallingConventions())); + std::string FuncOptionNames = + getFlagNames(IO, static_cast(Record.Options), + makeArrayRef(getFunctionOptionEnum())); error(IO.mapInteger(Record.ReturnType, "ReturnType")); error(IO.mapInteger(Record.ClassType, "ClassType")); error(IO.mapInteger(Record.ThisType, "ThisType")); - error(IO.mapEnum(Record.CallConv, "CallingConvention")); - error(IO.mapEnum(Record.Options, "FunctionOptions")); + error(IO.mapEnum(Record.CallConv, "CallingConvention: " + CallingConvName)); + error(IO.mapEnum(Record.Options, "FunctionOptions" + FuncOptionNames)); error(IO.mapInteger(Record.ParameterCount, "NumParameters")); error(IO.mapInteger(Record.ArgumentList, "ArgListType")); error(IO.mapInteger(Record.ThisPointerAdjustment, "ThisAdjustment")); @@ -197,8 +333,40 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, } Error TypeRecordMapping::visitKnownRecord(CVType &CVR, PointerRecord &Record) { + + SmallString<128> Attr("Attrs: "); + + if (IO.isStreaming()) { + std::string PtrType = getEnumName(IO, unsigned(Record.getPointerKind()), + makeArrayRef(getPtrKindNames())); + Attr += "[ Type: " + PtrType; + + std::string PtrMode = getEnumName(IO, unsigned(Record.getMode()), + makeArrayRef(getPtrModeNames())); + Attr += ", Mode: " + PtrMode; + + auto PtrSizeOf = Record.getSize(); + Attr += ", SizeOf: " + itostr(PtrSizeOf); + + if (Record.isFlat()) + Attr += ", isFlat"; + if (Record.isConst()) + Attr += ", isConst"; + if (Record.isVolatile()) + Attr += ", isVolatile"; + if (Record.isUnaligned()) + Attr += ", isUnaligned"; + if (Record.isRestrict()) + Attr += ", isRestricted"; + if (Record.isLValueReferenceThisPtr()) + Attr += ", isThisPtr&"; + if (Record.isRValueReferenceThisPtr()) + Attr += ", isThisPtr&&"; + Attr += " ]"; + } + error(IO.mapInteger(Record.ReferentType, "PointeeType")); - error(IO.mapInteger(Record.Attrs, "Attributes")); + error(IO.mapInteger(Record.Attrs, Attr)); if (Record.isPointerToMember()) { if (IO.isReading()) @@ -206,7 +374,10 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, PointerRecord &Record) { MemberPointerInfo &M = *Record.MemberInfo; error(IO.mapInteger(M.ContainingType, "ClassType")); - error(IO.mapEnum(M.Representation, "Representation")); + std::string PtrMemberGetRepresentation = getEnumName( + IO, uint16_t(M.Representation), makeArrayRef(getPtrMemberRepNames())); + error(IO.mapEnum(M.Representation, + "Representation: " + PtrMemberGetRepresentation)); } return Error::success(); @@ -226,8 +397,11 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ClassRecord &Record) { (CVR.kind() == TypeLeafKind::LF_CLASS) || (CVR.kind() == TypeLeafKind::LF_INTERFACE)); + std::string PropertiesNames = + getFlagNames(IO, static_cast(Record.Options), + makeArrayRef(getClassOptionNames())); error(IO.mapInteger(Record.MemberCount, "MemberCount")); - error(IO.mapEnum(Record.Options, "Properties")); + error(IO.mapEnum(Record.Options, "Properties" + PropertiesNames)); error(IO.mapInteger(Record.FieldList, "FieldList")); error(IO.mapInteger(Record.DerivationList, "DerivedFrom")); error(IO.mapInteger(Record.VTableShape, "VShape")); @@ -239,8 +413,11 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ClassRecord &Record) { } Error TypeRecordMapping::visitKnownRecord(CVType &CVR, UnionRecord &Record) { + std::string PropertiesNames = + getFlagNames(IO, static_cast(Record.Options), + makeArrayRef(getClassOptionNames())); error(IO.mapInteger(Record.MemberCount, "MemberCount")); - error(IO.mapEnum(Record.Options, "Properties")); + error(IO.mapEnum(Record.Options, "Properties" + PropertiesNames)); error(IO.mapInteger(Record.FieldList, "FieldList")); error(IO.mapEncodedInteger(Record.Size, "SizeOf")); error(mapNameAndUniqueName(IO, Record.Name, Record.UniqueName, @@ -250,8 +427,11 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, UnionRecord &Record) { } Error TypeRecordMapping::visitKnownRecord(CVType &CVR, EnumRecord &Record) { + std::string PropertiesNames = + getFlagNames(IO, static_cast(Record.Options), + makeArrayRef(getClassOptionNames())); error(IO.mapInteger(Record.MemberCount, "NumEnumerators")); - error(IO.mapEnum(Record.Options, "Properties")); + error(IO.mapEnum(Record.Options, "Properties" + PropertiesNames)); error(IO.mapInteger(Record.UnderlyingType, "UnderlyingType")); error(IO.mapInteger(Record.FieldList, "FieldListType")); error(mapNameAndUniqueName(IO, Record.Name, Record.UniqueName, @@ -383,7 +563,11 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, Error TypeRecordMapping::visitKnownRecord(CVType &CVR, FieldListRecord &Record) { - error(IO.mapByteVectorTail(Record.Data)); + if (IO.isStreaming()) { + if (auto EC = codeview::visitMemberRecordStream(Record.Data, *this)) + return EC; + } else + error(IO.mapByteVectorTail(Record.Data)); return Error::success(); } @@ -397,13 +581,17 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, } Error TypeRecordMapping::visitKnownRecord(CVType &CVR, LabelRecord &Record) { - error(IO.mapEnum(Record.Mode, "Mode")); + std::string ModeName = + getEnumName(IO, uint16_t(Record.Mode), makeArrayRef(getLabelTypeEnum())); + error(IO.mapEnum(Record.Mode, "Mode: " + ModeName)); return Error::success(); } Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, BaseClassRecord &Record) { - error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier")); + std::string Attrs = getMemberAttributes( + IO, Record.getAccess(), MethodKind::Vanilla, MethodOptions::None); + error(IO.mapInteger(Record.Attrs.Attrs, "Attrs: " + Attrs)); error(IO.mapInteger(Record.Type, "BaseType")); error(IO.mapEncodedInteger(Record.Offset, "BaseOffset")); @@ -412,7 +600,9 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, EnumeratorRecord &Record) { - error(IO.mapInteger(Record.Attrs.Attrs)); + std::string Attrs = getMemberAttributes( + IO, Record.getAccess(), MethodKind::Vanilla, MethodOptions::None); + error(IO.mapInteger(Record.Attrs.Attrs, "Attrs: " + Attrs)); // FIXME: Handle full APInt such as __int128. error(IO.mapEncodedInteger(Record.Value, "EnumValue")); @@ -423,7 +613,9 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, DataMemberRecord &Record) { - error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier")); + std::string Attrs = getMemberAttributes( + IO, Record.getAccess(), MethodKind::Vanilla, MethodOptions::None); + error(IO.mapInteger(Record.Attrs.Attrs, "Attrs: " + Attrs)); error(IO.mapInteger(Record.Type, "Type")); error(IO.mapEncodedInteger(Record.FieldOffset, "FieldOffset")); error(IO.mapStringZ(Record.Name, "Name")); @@ -460,7 +652,9 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, StaticDataMemberRecord &Record) { - error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier")); + std::string Attrs = getMemberAttributes( + IO, Record.getAccess(), MethodKind::Vanilla, MethodOptions::None); + error(IO.mapInteger(Record.Attrs.Attrs, "Attrs: " + Attrs)); error(IO.mapInteger(Record.Type, "Type")); error(IO.mapStringZ(Record.Name, "Name")); @@ -470,7 +664,9 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, VirtualBaseClassRecord &Record) { - error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier")); + std::string Attrs = getMemberAttributes( + IO, Record.getAccess(), MethodKind::Vanilla, MethodOptions::None); + error(IO.mapInteger(Record.Attrs.Attrs, "Attrs: " + Attrs)); error(IO.mapInteger(Record.BaseType, "BaseType")); error(IO.mapInteger(Record.VBPtrType, "VBPtrType")); error(IO.mapEncodedInteger(Record.VBPtrOffset, "VBPtrOffset")); diff --git a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp index f4dd7993760..abbea3a868c 100644 --- a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp +++ b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp @@ -38,9 +38,9 @@ DWARFAbbreviationDeclaration::DWARFAbbreviationDeclaration() { bool DWARFAbbreviationDeclaration::extract(DataExtractor Data, - uint32_t* OffsetPtr) { + uint64_t* OffsetPtr) { clear(); - const uint32_t Offset = *OffsetPtr; + const uint64_t Offset = *OffsetPtr; Code = Data.getULEB128(OffsetPtr); if (Code == 0) { return false; @@ -148,7 +148,7 @@ DWARFAbbreviationDeclaration::findAttributeIndex(dwarf::Attribute Attr) const { } Optional DWARFAbbreviationDeclaration::getAttributeValue( - const uint32_t DIEOffset, const dwarf::Attribute Attr, + const uint64_t DIEOffset, const dwarf::Attribute Attr, const DWARFUnit &U) const { Optional MatchAttrIndex = findAttributeIndex(Attr); if (!MatchAttrIndex) @@ -158,7 +158,7 @@ Optional DWARFAbbreviationDeclaration::getAttributeValue( // Add the byte size of ULEB that for the abbrev Code so we can start // skipping the attribute data. - uint32_t Offset = DIEOffset + CodeByteSize; + uint64_t Offset = DIEOffset + CodeByteSize; uint32_t AttrIndex = 0; for (const auto &Spec : AttributeSpecs) { if (*MatchAttrIndex == AttrIndex) { diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp index 0721efb40f6..875f5e9989a 100644 --- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -42,7 +42,7 @@ static Atom formatAtom(unsigned Atom) { return {Atom}; } DWARFAcceleratorTable::~DWARFAcceleratorTable() = default; Error AppleAcceleratorTable::extract() { - uint32_t Offset = 0; + uint64_t Offset = 0; // Check that we can at least read the header. if (!AccelSection.isValidOffset(offsetof(Header, HeaderDataLength) + 4)) @@ -111,15 +111,15 @@ bool AppleAcceleratorTable::validateForms() { return true; } -std::pair -AppleAcceleratorTable::readAtoms(uint32_t &HashDataOffset) { - uint32_t DieOffset = dwarf::DW_INVALID_OFFSET; +std::pair +AppleAcceleratorTable::readAtoms(uint64_t *HashDataOffset) { + uint64_t DieOffset = dwarf::DW_INVALID_OFFSET; dwarf::Tag DieTag = dwarf::DW_TAG_null; dwarf::FormParams FormParams = {Hdr.Version, 0, dwarf::DwarfFormat::DWARF32}; for (auto Atom : getAtomsDesc()) { DWARFFormValue FormValue(Atom.second); - FormValue.extractValue(AccelSection, &HashDataOffset, FormParams); + FormValue.extractValue(AccelSection, HashDataOffset, FormParams); switch (Atom.first) { case dwarf::DW_ATOM_die_offset: DieOffset = *FormValue.getAsUnsignedConstant(); @@ -163,19 +163,19 @@ Optional AppleAcceleratorTable::HeaderData::extractOffset( bool AppleAcceleratorTable::dumpName(ScopedPrinter &W, SmallVectorImpl &AtomForms, - uint32_t *DataOffset) const { + uint64_t *DataOffset) const { dwarf::FormParams FormParams = {Hdr.Version, 0, dwarf::DwarfFormat::DWARF32}; - uint32_t NameOffset = *DataOffset; + uint64_t NameOffset = *DataOffset; if (!AccelSection.isValidOffsetForDataOfSize(*DataOffset, 4)) { W.printString("Incorrectly terminated list."); return false; } - unsigned StringOffset = AccelSection.getRelocatedValue(4, DataOffset); + uint64_t StringOffset = AccelSection.getRelocatedValue(4, DataOffset); if (!StringOffset) return false; // End of list DictScope NameScope(W, ("Name@0x" + Twine::utohexstr(NameOffset)).str()); - W.startLine() << format("String: 0x%08x", StringOffset); + W.startLine() << format("String: 0x%08" PRIx64, StringOffset); W.getOStream() << " \"" << StringSection.getCStr(&StringOffset) << "\"\n"; unsigned NumData = AccelSection.getU32(DataOffset); @@ -223,9 +223,9 @@ LLVM_DUMP_METHOD void AppleAcceleratorTable::dump(raw_ostream &OS) const { } // Now go through the actual tables and dump them. - uint32_t Offset = sizeof(Hdr) + Hdr.HeaderDataLength; - unsigned HashesBase = Offset + Hdr.BucketCount * 4; - unsigned OffsetsBase = HashesBase + Hdr.HashCount * 4; + uint64_t Offset = sizeof(Hdr) + Hdr.HeaderDataLength; + uint64_t HashesBase = Offset + Hdr.BucketCount * 4; + uint64_t OffsetsBase = HashesBase + Hdr.HashCount * 4; for (unsigned Bucket = 0; Bucket < Hdr.BucketCount; ++Bucket) { unsigned Index = AccelSection.getU32(&Offset); @@ -237,14 +237,14 @@ LLVM_DUMP_METHOD void AppleAcceleratorTable::dump(raw_ostream &OS) const { } for (unsigned HashIdx = Index; HashIdx < Hdr.HashCount; ++HashIdx) { - unsigned HashOffset = HashesBase + HashIdx*4; - unsigned OffsetsOffset = OffsetsBase + HashIdx*4; + uint64_t HashOffset = HashesBase + HashIdx*4; + uint64_t OffsetsOffset = OffsetsBase + HashIdx*4; uint32_t Hash = AccelSection.getU32(&HashOffset); if (Hash % Hdr.BucketCount != Bucket) break; - unsigned DataOffset = AccelSection.getU32(&OffsetsOffset); + uint64_t DataOffset = AccelSection.getU32(&OffsetsOffset); ListScope HashScope(W, ("Hash 0x" + Twine::utohexstr(Hash)).str()); if (!AccelSection.isValidOffset(DataOffset)) { W.printString("Invalid section offset"); @@ -265,7 +265,7 @@ AppleAcceleratorTable::Entry::Entry( } void AppleAcceleratorTable::Entry::extract( - const AppleAcceleratorTable &AccelTable, uint32_t *Offset) { + const AppleAcceleratorTable &AccelTable, uint64_t *Offset) { dwarf::FormParams FormParams = {AccelTable.Hdr.Version, 0, dwarf::DwarfFormat::DWARF32}; @@ -302,7 +302,7 @@ Optional AppleAcceleratorTable::Entry::getTag() const { } AppleAcceleratorTable::ValueIterator::ValueIterator( - const AppleAcceleratorTable &AccelTable, unsigned Offset) + const AppleAcceleratorTable &AccelTable, uint64_t Offset) : AccelTable(&AccelTable), Current(AccelTable.HdrData), DataOffset(Offset) { if (!AccelTable.AccelSection.isValidOffsetForDataOfSize(DataOffset, 4)) return; @@ -333,25 +333,25 @@ AppleAcceleratorTable::equal_range(StringRef Key) const { // Find the bucket. unsigned HashValue = djbHash(Key); unsigned Bucket = HashValue % Hdr.BucketCount; - unsigned BucketBase = sizeof(Hdr) + Hdr.HeaderDataLength; - unsigned HashesBase = BucketBase + Hdr.BucketCount * 4; - unsigned OffsetsBase = HashesBase + Hdr.HashCount * 4; + uint64_t BucketBase = sizeof(Hdr) + Hdr.HeaderDataLength; + uint64_t HashesBase = BucketBase + Hdr.BucketCount * 4; + uint64_t OffsetsBase = HashesBase + Hdr.HashCount * 4; - unsigned BucketOffset = BucketBase + Bucket * 4; + uint64_t BucketOffset = BucketBase + Bucket * 4; unsigned Index = AccelSection.getU32(&BucketOffset); // Search through all hashes in the bucket. for (unsigned HashIdx = Index; HashIdx < Hdr.HashCount; ++HashIdx) { - unsigned HashOffset = HashesBase + HashIdx * 4; - unsigned OffsetsOffset = OffsetsBase + HashIdx * 4; + uint64_t HashOffset = HashesBase + HashIdx * 4; + uint64_t OffsetsOffset = OffsetsBase + HashIdx * 4; uint32_t Hash = AccelSection.getU32(&HashOffset); if (Hash % Hdr.BucketCount != Bucket) // We are already in the next bucket. break; - unsigned DataOffset = AccelSection.getU32(&OffsetsOffset); - unsigned StringOffset = AccelSection.getRelocatedValue(4, &DataOffset); + uint64_t DataOffset = AccelSection.getU32(&OffsetsOffset); + uint64_t StringOffset = AccelSection.getRelocatedValue(4, &DataOffset); if (!StringOffset) break; @@ -377,7 +377,7 @@ void DWARFDebugNames::Header::dump(ScopedPrinter &W) const { } Error DWARFDebugNames::Header::extract(const DWARFDataExtractor &AS, - uint32_t *Offset) { + uint64_t *Offset) { // Check that we can read the fixed-size part. if (!AS.isValidOffset(*Offset + sizeof(HeaderPOD) - 1)) return createStringError(errc::illegal_byte_sequence, @@ -437,7 +437,7 @@ DWARFDebugNames::Abbrev DWARFDebugNames::AbbrevMapInfo::getTombstoneKey() { } Expected -DWARFDebugNames::NameIndex::extractAttributeEncoding(uint32_t *Offset) { +DWARFDebugNames::NameIndex::extractAttributeEncoding(uint64_t *Offset) { if (*Offset >= EntriesBase) { return createStringError(errc::illegal_byte_sequence, "Incorrectly terminated abbreviation table."); @@ -449,7 +449,7 @@ DWARFDebugNames::NameIndex::extractAttributeEncoding(uint32_t *Offset) { } Expected> -DWARFDebugNames::NameIndex::extractAttributeEncodings(uint32_t *Offset) { +DWARFDebugNames::NameIndex::extractAttributeEncodings(uint64_t *Offset) { std::vector Result; for (;;) { auto AttrEncOr = extractAttributeEncoding(Offset); @@ -463,7 +463,7 @@ DWARFDebugNames::NameIndex::extractAttributeEncodings(uint32_t *Offset) { } Expected -DWARFDebugNames::NameIndex::extractAbbrev(uint32_t *Offset) { +DWARFDebugNames::NameIndex::extractAbbrev(uint64_t *Offset) { if (*Offset >= EntriesBase) { return createStringError(errc::illegal_byte_sequence, "Incorrectly terminated abbreviation table."); @@ -482,7 +482,7 @@ DWARFDebugNames::NameIndex::extractAbbrev(uint32_t *Offset) { Error DWARFDebugNames::NameIndex::extract() { const DWARFDataExtractor &AS = Section.AccelSection; - uint32_t Offset = Base; + uint64_t Offset = Base; if (Error E = Hdr.extract(AS, &Offset)) return E; @@ -577,27 +577,27 @@ std::error_code DWARFDebugNames::SentinelError::convertToErrorCode() const { return inconvertibleErrorCode(); } -uint32_t DWARFDebugNames::NameIndex::getCUOffset(uint32_t CU) const { +uint64_t DWARFDebugNames::NameIndex::getCUOffset(uint32_t CU) const { assert(CU < Hdr.CompUnitCount); - uint32_t Offset = CUsBase + 4 * CU; + uint64_t Offset = CUsBase + 4 * CU; return Section.AccelSection.getRelocatedValue(4, &Offset); } -uint32_t DWARFDebugNames::NameIndex::getLocalTUOffset(uint32_t TU) const { +uint64_t DWARFDebugNames::NameIndex::getLocalTUOffset(uint32_t TU) const { assert(TU < Hdr.LocalTypeUnitCount); - uint32_t Offset = CUsBase + 4 * (Hdr.CompUnitCount + TU); + uint64_t Offset = CUsBase + 4 * (Hdr.CompUnitCount + TU); return Section.AccelSection.getRelocatedValue(4, &Offset); } uint64_t DWARFDebugNames::NameIndex::getForeignTUSignature(uint32_t TU) const { assert(TU < Hdr.ForeignTypeUnitCount); - uint32_t Offset = + uint64_t Offset = CUsBase + 4 * (Hdr.CompUnitCount + Hdr.LocalTypeUnitCount) + 8 * TU; return Section.AccelSection.getU64(&Offset); } Expected -DWARFDebugNames::NameIndex::getEntry(uint32_t *Offset) const { +DWARFDebugNames::NameIndex::getEntry(uint64_t *Offset) const { const DWARFDataExtractor &AS = Section.AccelSection; if (!AS.isValidOffset(*Offset)) return createStringError(errc::illegal_byte_sequence, @@ -625,12 +625,12 @@ DWARFDebugNames::NameIndex::getEntry(uint32_t *Offset) const { DWARFDebugNames::NameTableEntry DWARFDebugNames::NameIndex::getNameTableEntry(uint32_t Index) const { assert(0 < Index && Index <= Hdr.NameCount); - uint32_t StringOffsetOffset = StringOffsetsBase + 4 * (Index - 1); - uint32_t EntryOffsetOffset = EntryOffsetsBase + 4 * (Index - 1); + uint64_t StringOffsetOffset = StringOffsetsBase + 4 * (Index - 1); + uint64_t EntryOffsetOffset = EntryOffsetsBase + 4 * (Index - 1); const DWARFDataExtractor &AS = Section.AccelSection; - uint32_t StringOffset = AS.getRelocatedValue(4, &StringOffsetOffset); - uint32_t EntryOffset = AS.getU32(&EntryOffsetOffset); + uint64_t StringOffset = AS.getRelocatedValue(4, &StringOffsetOffset); + uint64_t EntryOffset = AS.getU32(&EntryOffsetOffset); EntryOffset += EntriesBase; return {Section.StringSection, Index, StringOffset, EntryOffset}; } @@ -638,13 +638,13 @@ DWARFDebugNames::NameIndex::getNameTableEntry(uint32_t Index) const { uint32_t DWARFDebugNames::NameIndex::getBucketArrayEntry(uint32_t Bucket) const { assert(Bucket < Hdr.BucketCount); - uint32_t BucketOffset = BucketsBase + 4 * Bucket; + uint64_t BucketOffset = BucketsBase + 4 * Bucket; return Section.AccelSection.getU32(&BucketOffset); } uint32_t DWARFDebugNames::NameIndex::getHashArrayEntry(uint32_t Index) const { assert(0 < Index && Index <= Hdr.NameCount); - uint32_t HashOffset = HashesBase + 4 * (Index - 1); + uint64_t HashOffset = HashesBase + 4 * (Index - 1); return Section.AccelSection.getU32(&HashOffset); } @@ -653,8 +653,8 @@ uint32_t DWARFDebugNames::NameIndex::getHashArrayEntry(uint32_t Index) const { // it's not possible to recover this entry list (but the other lists may still // parse OK). bool DWARFDebugNames::NameIndex::dumpEntry(ScopedPrinter &W, - uint32_t *Offset) const { - uint32_t EntryId = *Offset; + uint64_t *Offset) const { + uint64_t EntryId = *Offset; auto EntryOr = getEntry(Offset); if (!EntryOr) { handleAllErrors(EntryOr.takeError(), [](const SentinelError &) {}, @@ -674,10 +674,10 @@ void DWARFDebugNames::NameIndex::dumpName(ScopedPrinter &W, if (Hash) W.printHex("Hash", *Hash); - W.startLine() << format("String: 0x%08x", NTE.getStringOffset()); + W.startLine() << format("String: 0x%08" PRIx64, NTE.getStringOffset()); W.getOStream() << " \"" << NTE.getString() << "\"\n"; - uint32_t EntryOffset = NTE.getEntryOffset(); + uint64_t EntryOffset = NTE.getEntryOffset(); while (dumpEntry(W, &EntryOffset)) /*empty*/; } @@ -685,7 +685,7 @@ void DWARFDebugNames::NameIndex::dumpName(ScopedPrinter &W, void DWARFDebugNames::NameIndex::dumpCUs(ScopedPrinter &W) const { ListScope CUScope(W, "Compilation Unit offsets"); for (uint32_t CU = 0; CU < Hdr.CompUnitCount; ++CU) - W.startLine() << format("CU[%u]: 0x%08x\n", CU, getCUOffset(CU)); + W.startLine() << format("CU[%u]: 0x%08" PRIx64 "\n", CU, getCUOffset(CU)); } void DWARFDebugNames::NameIndex::dumpLocalTUs(ScopedPrinter &W) const { @@ -694,7 +694,8 @@ void DWARFDebugNames::NameIndex::dumpLocalTUs(ScopedPrinter &W) const { ListScope TUScope(W, "Local Type Unit offsets"); for (uint32_t TU = 0; TU < Hdr.LocalTypeUnitCount; ++TU) - W.startLine() << format("LocalTU[%u]: 0x%08x\n", TU, getLocalTUOffset(TU)); + W.startLine() << format("LocalTU[%u]: 0x%08" PRIx64 "\n", TU, + getLocalTUOffset(TU)); } void DWARFDebugNames::NameIndex::dumpForeignTUs(ScopedPrinter &W) const { @@ -756,7 +757,7 @@ LLVM_DUMP_METHOD void DWARFDebugNames::NameIndex::dump(ScopedPrinter &W) const { } Error DWARFDebugNames::extract() { - uint32_t Offset = 0; + uint64_t Offset = 0; while (AccelSection.isValidOffset(Offset)) { NameIndex Next(*this, Offset); if (Error E = Next.extract()) @@ -778,7 +779,7 @@ LLVM_DUMP_METHOD void DWARFDebugNames::dump(raw_ostream &OS) const { NI.dump(W); } -Optional +Optional DWARFDebugNames::ValueIterator::findEntryOffsetInCurrentIndex() { const Header &Hdr = CurrentIndex->Hdr; if (Hdr.BucketCount == 0) { @@ -822,7 +823,7 @@ bool DWARFDebugNames::ValueIterator::getEntryAtCurrentOffset() { } bool DWARFDebugNames::ValueIterator::findInCurrentIndex() { - Optional Offset = findEntryOffsetInCurrentIndex(); + Optional Offset = findEntryOffsetInCurrentIndex(); if (!Offset) return false; DataOffset = *Offset; @@ -877,7 +878,7 @@ DWARFDebugNames::equal_range(StringRef Key) const { } const DWARFDebugNames::NameIndex * -DWARFDebugNames::getCUNameIndex(uint32_t CUOffset) { +DWARFDebugNames::getCUNameIndex(uint64_t CUOffset) { if (CUToNameIndex.size() == 0 && NameIndices.size() > 0) { for (const auto &NI : *this) { for (uint32_t CU = 0; CU < NI.getCUCount(); ++CU) diff --git a/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp index 74cce42466d..f59e4926828 100644 --- a/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp +++ b/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp @@ -15,16 +15,18 @@ using namespace llvm; void DWARFCompileUnit::dump(raw_ostream &OS, DIDumpOptions DumpOpts) { - OS << format("0x%08x", getOffset()) << ": Compile Unit:" - << " length = " << format("0x%08x", getLength()) + OS << format("0x%08" PRIx64, getOffset()) << ": Compile Unit:" + << " length = " << format("0x%08" PRIx64, getLength()) << " version = " << format("0x%04x", getVersion()); if (getVersion() >= 5) OS << " unit_type = " << dwarf::UnitTypeString(getUnitType()); - OS << " abbr_offset = " << format("0x%04x", getAbbreviations()->getOffset()) + OS << " abbr_offset = " + << format("0x%04" PRIx64, getAbbreviations()->getOffset()) << " addr_size = " << format("0x%02x", getAddressByteSize()); if (getVersion() >= 5 && getUnitType() != dwarf::DW_UT_compile) OS << " DWO_id = " << format("0x%016" PRIx64, *getDWOId()); - OS << " (next unit at " << format("0x%08x", getNextUnitOffset()) << ")\n"; + OS << " (next unit at " << format("0x%08" PRIx64, getNextUnitOffset()) + << ")\n"; if (DWARFDie CUDie = getUnitDIE(false)) CUDie.dump(OS, 0, DumpOpts); diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp index 5ede9bf5961..c06d85d5060 100644 --- a/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -138,7 +138,7 @@ static void dumpDWARFv5StringOffsetsSection( DWARFDataExtractor StrOffsetExt(Obj, StringOffsetsSection, LittleEndian, 0); DataExtractor StrData(StringSection, LittleEndian, 0); uint64_t SectionSize = StringOffsetsSection.Data.size(); - uint32_t Offset = 0; + uint64_t Offset = 0; for (auto &Contribution : Contributions) { // Report an ill-formed contribution. if (!Contribution) { @@ -166,10 +166,10 @@ static void dumpDWARFv5StringOffsetsSection( } // Report a gap in the table. if (Offset < ContributionHeader) { - OS << format("0x%8.8x: Gap, length = ", Offset); + OS << format("0x%8.8" PRIx64 ": Gap, length = ", Offset); OS << (ContributionHeader - Offset) << "\n"; } - OS << format("0x%8.8x: ", (uint32_t)ContributionHeader); + OS << format("0x%8.8" PRIx64 ": ", ContributionHeader); // In DWARF v5 the contribution size in the descriptor does not equal // the originally encoded length (it does not contain the length of the // version field and the padding, a total of 4 bytes). Add them back in @@ -181,26 +181,19 @@ static void dumpDWARFv5StringOffsetsSection( Offset = Contribution->Base; unsigned EntrySize = Contribution->getDwarfOffsetByteSize(); while (Offset - Contribution->Base < Contribution->Size) { - OS << format("0x%8.8x: ", Offset); - // FIXME: We can only extract strings if the offset fits in 32 bits. + OS << format("0x%8.8" PRIx64 ": ", Offset); uint64_t StringOffset = StrOffsetExt.getRelocatedValue(EntrySize, &Offset); - // Extract the string if we can and display it. Otherwise just report - // the offset. - if (StringOffset <= std::numeric_limits::max()) { - uint32_t StringOffset32 = (uint32_t)StringOffset; - OS << format("%8.8x ", StringOffset32); - const char *S = StrData.getCStr(&StringOffset32); - if (S) - OS << format("\"%s\"", S); - } else - OS << format("%16.16" PRIx64 " ", StringOffset); + OS << format("%8.8" PRIx64 " ", StringOffset); + const char *S = StrData.getCStr(&StringOffset); + if (S) + OS << format("\"%s\"", S); OS << "\n"; } } // Report a gap at the end of the table. if (Offset < SectionSize) { - OS << format("0x%8.8x: Gap, length = ", Offset); + OS << format("0x%8.8" PRIx64 ": Gap, length = ", Offset); OS << (SectionSize - Offset) << "\n"; } } @@ -225,7 +218,7 @@ static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName, StringSection, Units, LittleEndian); else { DataExtractor strOffsetExt(StringOffsetsSection.Data, LittleEndian, 0); - uint32_t offset = 0; + uint64_t offset = 0; uint64_t size = StringOffsetsSection.Data.size(); // Ensure that size is a multiple of the size of an entry. if (size & ((uint64_t)(sizeof(uint32_t) - 1))) { @@ -235,9 +228,9 @@ static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName, } DataExtractor StrData(StringSection, LittleEndian, 0); while (offset < size) { - OS << format("0x%8.8x: ", offset); - uint32_t StringOffset = strOffsetExt.getU32(&offset); - OS << format("%8.8x ", StringOffset); + OS << format("0x%8.8" PRIx64 ": ", offset); + uint64_t StringOffset = strOffsetExt.getU32(&offset); + OS << format("%8.8" PRIx64 " ", StringOffset); const char *S = StrData.getCStr(&StringOffset); if (S) OS << format("\"%s\"", S); @@ -250,10 +243,10 @@ static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName, static void dumpAddrSection(raw_ostream &OS, DWARFDataExtractor &AddrData, DIDumpOptions DumpOpts, uint16_t Version, uint8_t AddrSize) { - uint32_t Offset = 0; + uint64_t Offset = 0; while (AddrData.isValidOffset(Offset)) { DWARFDebugAddrTable AddrTable; - uint32_t TableOffset = Offset; + uint64_t TableOffset = Offset; if (Error Err = AddrTable.extract(AddrData, &Offset, Version, AddrSize, DWARFContext::dumpWarning)) { WithColor::error() << toString(std::move(Err)) << '\n'; @@ -261,8 +254,7 @@ static void dumpAddrSection(raw_ostream &OS, DWARFDataExtractor &AddrData, // could be read. If it couldn't, stop reading the section. if (!AddrTable.hasValidLength()) break; - uint64_t Length = AddrTable.getLength(); - Offset = TableOffset + Length; + Offset = TableOffset + AddrTable.getLength(); } else { AddrTable.dump(OS, DumpOpts); } @@ -275,10 +267,10 @@ static void dumpRnglistsSection( llvm::function_ref(uint32_t)> LookupPooledAddress, DIDumpOptions DumpOpts) { - uint32_t Offset = 0; + uint64_t Offset = 0; while (rnglistData.isValidOffset(Offset)) { llvm::DWARFDebugRnglistTable Rnglists; - uint32_t TableOffset = Offset; + uint64_t TableOffset = Offset; if (Error Err = Rnglists.extract(rnglistData, &Offset)) { WithColor::error() << toString(std::move(Err)) << '\n'; uint64_t Length = Rnglists.length(); @@ -297,21 +289,25 @@ static void dumpLoclistsSection(raw_ostream &OS, DIDumpOptions DumpOpts, DWARFDataExtractor Data, const MCRegisterInfo *MRI, Optional DumpOffset) { - uint32_t Offset = 0; - DWARFDebugLoclists Loclists; + uint64_t Offset = 0; - DWARFListTableHeader Header(".debug_loclists", "locations"); - if (Error E = Header.extract(Data, &Offset)) { - WithColor::error() << toString(std::move(E)) << '\n'; - return; + while (Data.isValidOffset(Offset)) { + DWARFListTableHeader Header(".debug_loclists", "locations"); + if (Error E = Header.extract(Data, &Offset)) { + WithColor::error() << toString(std::move(E)) << '\n'; + return; + } + + Header.dump(OS, DumpOpts); + DataExtractor LocData(Data.getData(), + Data.isLittleEndian(), Header.getAddrSize()); + + DWARFDebugLoclists Loclists; + uint64_t EndOffset = Header.length() + Header.getHeaderOffset(); + Loclists.parse(LocData, Offset, EndOffset, Header.getVersion()); + Loclists.dump(OS, 0, MRI, DumpOpts, DumpOffset); + Offset = EndOffset; } - - Header.dump(OS, DumpOpts); - DataExtractor LocData(Data.getData().drop_front(Offset), - Data.isLittleEndian(), Header.getAddrSize()); - - Loclists.parse(LocData, Header.getVersion()); - Loclists.dump(OS, 0, MRI, DumpOffset); } void DWARFContext::dump( @@ -386,7 +382,7 @@ void DWARFContext::dump( if (const auto *Off = shouldDump(Explicit, ".debug_loc", DIDT_ID_DebugLoc, DObj->getLocSection().Data)) { - getDebugLoc()->dump(OS, getRegisterInfo(), *Off); + getDebugLoc()->dump(OS, getRegisterInfo(), DumpOpts, *Off); } if (const auto *Off = shouldDump(Explicit, ".debug_loclists", DIDT_ID_DebugLoclists, @@ -398,15 +394,15 @@ void DWARFContext::dump( if (const auto *Off = shouldDump(ExplicitDWO, ".debug_loc.dwo", DIDT_ID_DebugLoc, DObj->getLocDWOSection().Data)) { - getDebugLocDWO()->dump(OS, 0, getRegisterInfo(), *Off); + getDebugLocDWO()->dump(OS, 0, getRegisterInfo(), DumpOpts, *Off); } if (const auto *Off = shouldDump(Explicit, ".debug_frame", DIDT_ID_DebugFrame, - DObj->getDebugFrameSection())) + DObj->getFrameSection().Data)) getDebugFrame()->dump(OS, getRegisterInfo(), *Off); if (const auto *Off = shouldDump(Explicit, ".eh_frame", DIDT_ID_DebugFrame, - DObj->getEHFrameSection())) + DObj->getEHFrameSection().Data)) getEHFrame()->dump(OS, getRegisterInfo(), *Off); if (DumpType & DIDT_DebugMacro) { @@ -417,9 +413,9 @@ void DWARFContext::dump( } if (shouldDump(Explicit, ".debug_aranges", DIDT_ID_DebugAranges, - DObj->getARangeSection())) { - uint32_t offset = 0; - DataExtractor arangesData(DObj->getARangeSection(), isLittleEndian(), 0); + DObj->getArangesSection())) { + uint64_t offset = 0; + DataExtractor arangesData(DObj->getArangesSection(), isLittleEndian(), 0); DWARFDebugArangeSet set; while (set.extract(arangesData, &offset)) set.dump(OS); @@ -433,7 +429,8 @@ void DWARFContext::dump( Parser.skip(dumpWarning); continue; } - OS << "debug_line[" << format("0x%8.8x", Parser.getOffset()) << "]\n"; + OS << "debug_line[" << format("0x%8.8" PRIx64, Parser.getOffset()) + << "]\n"; if (DumpOpts.Verbose) { Parser.parseNext(dumpWarning, dumpWarning, &OS); } else { @@ -474,32 +471,32 @@ void DWARFContext::dump( } if (shouldDump(Explicit, ".debug_str", DIDT_ID_DebugStr, - DObj->getStringSection())) { - DataExtractor strData(DObj->getStringSection(), isLittleEndian(), 0); - uint32_t offset = 0; - uint32_t strOffset = 0; + DObj->getStrSection())) { + DataExtractor strData(DObj->getStrSection(), isLittleEndian(), 0); + uint64_t offset = 0; + uint64_t strOffset = 0; while (const char *s = strData.getCStr(&offset)) { - OS << format("0x%8.8x: \"%s\"\n", strOffset, s); + OS << format("0x%8.8" PRIx64 ": \"%s\"\n", strOffset, s); strOffset = offset; } } if (shouldDump(ExplicitDWO, ".debug_str.dwo", DIDT_ID_DebugStr, - DObj->getStringDWOSection())) { - DataExtractor strDWOData(DObj->getStringDWOSection(), isLittleEndian(), 0); - uint32_t offset = 0; - uint32_t strDWOOffset = 0; + DObj->getStrDWOSection())) { + DataExtractor strDWOData(DObj->getStrDWOSection(), isLittleEndian(), 0); + uint64_t offset = 0; + uint64_t strDWOOffset = 0; while (const char *s = strDWOData.getCStr(&offset)) { - OS << format("0x%8.8x: \"%s\"\n", strDWOOffset, s); + OS << format("0x%8.8" PRIx64 ": \"%s\"\n", strDWOOffset, s); strDWOOffset = offset; } } if (shouldDump(Explicit, ".debug_line_str", DIDT_ID_DebugLineStr, - DObj->getLineStringSection())) { - DataExtractor strData(DObj->getLineStringSection(), isLittleEndian(), 0); - uint32_t offset = 0; - uint32_t strOffset = 0; + DObj->getLineStrSection())) { + DataExtractor strData(DObj->getLineStrSection(), isLittleEndian(), 0); + uint64_t offset = 0; + uint64_t strOffset = 0; while (const char *s = strData.getCStr(&offset)) { - OS << format("0x%8.8x: \"", strOffset); + OS << format("0x%8.8" PRIx64 ": \"", strOffset); OS.write_escaped(s); OS << "\"\n"; strOffset = offset; @@ -514,11 +511,11 @@ void DWARFContext::dump( } if (shouldDump(Explicit, ".debug_ranges", DIDT_ID_DebugRanges, - DObj->getRangeSection().Data)) { + DObj->getRangesSection().Data)) { uint8_t savedAddressByteSize = getCUAddrSize(); - DWARFDataExtractor rangesData(*DObj, DObj->getRangeSection(), + DWARFDataExtractor rangesData(*DObj, DObj->getRangesSection(), isLittleEndian(), savedAddressByteSize); - uint32_t offset = 0; + uint64_t offset = 0; DWARFDebugRangeList rangeList; while (rangesData.isValidOffset(offset)) { if (Error E = rangeList.extract(rangesData, &offset)) { @@ -552,38 +549,38 @@ void DWARFContext::dump( } if (shouldDump(Explicit, ".debug_pubnames", DIDT_ID_DebugPubnames, - DObj->getPubNamesSection().Data)) - DWARFDebugPubTable(*DObj, DObj->getPubNamesSection(), isLittleEndian(), false) + DObj->getPubnamesSection().Data)) + DWARFDebugPubTable(*DObj, DObj->getPubnamesSection(), isLittleEndian(), false) .dump(OS); if (shouldDump(Explicit, ".debug_pubtypes", DIDT_ID_DebugPubtypes, - DObj->getPubTypesSection().Data)) - DWARFDebugPubTable(*DObj, DObj->getPubTypesSection(), isLittleEndian(), false) + DObj->getPubtypesSection().Data)) + DWARFDebugPubTable(*DObj, DObj->getPubtypesSection(), isLittleEndian(), false) .dump(OS); if (shouldDump(Explicit, ".debug_gnu_pubnames", DIDT_ID_DebugGnuPubnames, - DObj->getGnuPubNamesSection().Data)) - DWARFDebugPubTable(*DObj, DObj->getGnuPubNamesSection(), isLittleEndian(), + DObj->getGnuPubnamesSection().Data)) + DWARFDebugPubTable(*DObj, DObj->getGnuPubnamesSection(), isLittleEndian(), true /* GnuStyle */) .dump(OS); if (shouldDump(Explicit, ".debug_gnu_pubtypes", DIDT_ID_DebugGnuPubtypes, - DObj->getGnuPubTypesSection().Data)) - DWARFDebugPubTable(*DObj, DObj->getGnuPubTypesSection(), isLittleEndian(), + DObj->getGnuPubtypesSection().Data)) + DWARFDebugPubTable(*DObj, DObj->getGnuPubtypesSection(), isLittleEndian(), true /* GnuStyle */) .dump(OS); if (shouldDump(Explicit, ".debug_str_offsets", DIDT_ID_DebugStrOffsets, - DObj->getStringOffsetSection().Data)) + DObj->getStrOffsetsSection().Data)) dumpStringOffsetsSection(OS, "debug_str_offsets", *DObj, - DObj->getStringOffsetSection(), - DObj->getStringSection(), normal_units(), + DObj->getStrOffsetsSection(), + DObj->getStrSection(), normal_units(), isLittleEndian(), getMaxVersion()); if (shouldDump(ExplicitDWO, ".debug_str_offsets.dwo", DIDT_ID_DebugStrOffsets, - DObj->getStringOffsetDWOSection().Data)) + DObj->getStrOffsetsDWOSection().Data)) dumpStringOffsetsSection(OS, "debug_str_offsets.dwo", *DObj, - DObj->getStringOffsetDWOSection(), - DObj->getStringDWOSection(), dwo_units(), + DObj->getStrOffsetsDWOSection(), + DObj->getStrDWOSection(), dwo_units(), isLittleEndian(), getMaxDWOVersion()); if (shouldDump(Explicit, ".gdb_index", DIDT_ID_GdbIndex, @@ -607,7 +604,7 @@ void DWARFContext::dump( DObj->getAppleObjCSection().Data)) getAppleObjC().dump(OS); if (shouldDump(Explicit, ".debug_names", DIDT_ID_DebugNames, - DObj->getDebugNamesSection().Data)) + DObj->getNamesSection().Data)) getDebugNames().dump(OS); } @@ -641,7 +638,7 @@ DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) { return nullptr; } -DWARFDie DWARFContext::getDIEForOffset(uint32_t Offset) { +DWARFDie DWARFContext::getDIEForOffset(uint64_t Offset) { parseNormalUnits(); if (auto *CU = NormalUnits.getUnitForOffset(Offset)) return CU->getDIEForOffset(Offset); @@ -667,7 +664,7 @@ const DWARFUnitIndex &DWARFContext::getCUIndex() { DataExtractor CUIndexData(DObj->getCUIndexSection(), isLittleEndian(), 0); - CUIndex = llvm::make_unique(DW_SECT_INFO); + CUIndex = std::make_unique(DW_SECT_INFO); CUIndex->parse(CUIndexData); return *CUIndex; } @@ -678,7 +675,7 @@ const DWARFUnitIndex &DWARFContext::getTUIndex() { DataExtractor TUIndexData(DObj->getTUIndexSection(), isLittleEndian(), 0); - TUIndex = llvm::make_unique(DW_SECT_TYPES); + TUIndex = std::make_unique(DW_SECT_TYPES); TUIndex->parse(TUIndexData); return *TUIndex; } @@ -688,7 +685,7 @@ DWARFGdbIndex &DWARFContext::getGdbIndex() { return *GdbIndex; DataExtractor GdbIndexData(DObj->getGdbIndexSection(), true /*LE*/, 0); - GdbIndex = llvm::make_unique(); + GdbIndex = std::make_unique(); GdbIndex->parse(GdbIndexData); return *GdbIndex; } @@ -740,7 +737,7 @@ const DWARFDebugLoclists *DWARFContext::getDebugLocDWO() { // Use version 4. DWO does not support the DWARF v5 .debug_loclists yet and // that means we are parsing the new style .debug_loc (pre-standatized version // of the .debug_loclists). - LocDWO->parse(LocData, 4 /* Version */); + LocDWO->parse(LocData, 0, LocData.getData().size(), 4 /* Version */); return LocDWO.get(); } @@ -766,7 +763,7 @@ const DWARFDebugFrame *DWARFContext::getDebugFrame() { // provides this information). This problem is fixed in DWARFv4 // See this dwarf-discuss discussion for more details: // http://lists.dwarfstd.org/htdig.cgi/dwarf-discuss-dwarfstd.org/2011-December/001173.html - DWARFDataExtractor debugFrameData(DObj->getDebugFrameSection(), + DWARFDataExtractor debugFrameData(*DObj, DObj->getFrameSection(), isLittleEndian(), DObj->getAddressSize()); DebugFrame.reset(new DWARFDebugFrame(getArch(), false /* IsEH */)); DebugFrame->parse(debugFrameData); @@ -777,8 +774,8 @@ const DWARFDebugFrame *DWARFContext::getEHFrame() { if (EHFrame) return EHFrame.get(); - DWARFDataExtractor debugFrameData(DObj->getEHFrameSection(), isLittleEndian(), - DObj->getAddressSize()); + DWARFDataExtractor debugFrameData(*DObj, DObj->getEHFrameSection(), + isLittleEndian(), DObj->getAddressSize()); DebugFrame.reset(new DWARFDebugFrame(getArch(), true /* IsEH */)); DebugFrame->parse(debugFrameData); return DebugFrame.get(); @@ -809,29 +806,29 @@ static T &getAccelTable(std::unique_ptr &Cache, const DWARFObject &Obj, } const DWARFDebugNames &DWARFContext::getDebugNames() { - return getAccelTable(Names, *DObj, DObj->getDebugNamesSection(), - DObj->getStringSection(), isLittleEndian()); + return getAccelTable(Names, *DObj, DObj->getNamesSection(), + DObj->getStrSection(), isLittleEndian()); } const AppleAcceleratorTable &DWARFContext::getAppleNames() { return getAccelTable(AppleNames, *DObj, DObj->getAppleNamesSection(), - DObj->getStringSection(), isLittleEndian()); + DObj->getStrSection(), isLittleEndian()); } const AppleAcceleratorTable &DWARFContext::getAppleTypes() { return getAccelTable(AppleTypes, *DObj, DObj->getAppleTypesSection(), - DObj->getStringSection(), isLittleEndian()); + DObj->getStrSection(), isLittleEndian()); } const AppleAcceleratorTable &DWARFContext::getAppleNamespaces() { return getAccelTable(AppleNamespaces, *DObj, DObj->getAppleNamespacesSection(), - DObj->getStringSection(), isLittleEndian()); + DObj->getStrSection(), isLittleEndian()); } const AppleAcceleratorTable &DWARFContext::getAppleObjC() { return getAccelTable(AppleObjC, *DObj, DObj->getAppleObjCSection(), - DObj->getStringSection(), isLittleEndian()); + DObj->getStrSection(), isLittleEndian()); } const DWARFDebugLine::LineTable * @@ -858,7 +855,7 @@ Expected DWARFContext::getLineTableForUnit( if (!Offset) return nullptr; // No line table for this compile unit. - uint32_t stmtOffset = *Offset + U->getLineTableOffset(); + uint64_t stmtOffset = *Offset + U->getLineTableOffset(); // See if the line table is cached. if (const DWARFLineTable *lt = Line->getLineTable(stmtOffset)) return lt; @@ -898,7 +895,7 @@ void DWARFContext::parseDWOUnits(bool Lazy) { }); } -DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) { +DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint64_t Offset) { parseNormalUnits(); return dyn_cast_or_null( NormalUnits.getUnitForOffset(Offset)); @@ -906,7 +903,7 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) { DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) { // First, get the offset of the compile unit. - uint32_t CUOffset = getDebugAranges()->findAddress(Address); + uint64_t CUOffset = getDebugAranges()->findAddress(Address); // Retrieve the compile unit. return getCompileUnitForOffset(CUOffset); } @@ -1118,8 +1115,8 @@ DILineInfoTable DWARFContext::getLineInfoForAddressRange( if (!CU) return Lines; - std::string FunctionName = ""; uint32_t StartLine = 0; + std::string FunctionName(DILineInfo::BadString); getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind, FunctionName, StartLine); @@ -1379,46 +1376,50 @@ class DWARFObjInMemory final : public DWARFObject { InfoSectionMap TypesDWOSections; DWARFSectionMap LocSection; - DWARFSectionMap LocListsSection; + DWARFSectionMap LoclistsSection; DWARFSectionMap LineSection; - DWARFSectionMap RangeSection; + DWARFSectionMap RangesSection; DWARFSectionMap RnglistsSection; - DWARFSectionMap StringOffsetSection; + DWARFSectionMap StrOffsetsSection; DWARFSectionMap LineDWOSection; + DWARFSectionMap FrameSection; + DWARFSectionMap EHFrameSection; DWARFSectionMap LocDWOSection; - DWARFSectionMap StringOffsetDWOSection; - DWARFSectionMap RangeDWOSection; + DWARFSectionMap StrOffsetsDWOSection; + DWARFSectionMap RangesDWOSection; DWARFSectionMap RnglistsDWOSection; DWARFSectionMap AddrSection; DWARFSectionMap AppleNamesSection; DWARFSectionMap AppleTypesSection; DWARFSectionMap AppleNamespacesSection; DWARFSectionMap AppleObjCSection; - DWARFSectionMap DebugNamesSection; - DWARFSectionMap PubNamesSection; - DWARFSectionMap PubTypesSection; - DWARFSectionMap GnuPubNamesSection; - DWARFSectionMap GnuPubTypesSection; + DWARFSectionMap NamesSection; + DWARFSectionMap PubnamesSection; + DWARFSectionMap PubtypesSection; + DWARFSectionMap GnuPubnamesSection; + DWARFSectionMap GnuPubtypesSection; DWARFSectionMap *mapNameToDWARFSection(StringRef Name) { return StringSwitch(Name) .Case("debug_loc", &LocSection) - .Case("debug_loclists", &LocListsSection) + .Case("debug_loclists", &LoclistsSection) .Case("debug_line", &LineSection) - .Case("debug_str_offsets", &StringOffsetSection) - .Case("debug_ranges", &RangeSection) + .Case("debug_frame", &FrameSection) + .Case("eh_frame", &EHFrameSection) + .Case("debug_str_offsets", &StrOffsetsSection) + .Case("debug_ranges", &RangesSection) .Case("debug_rnglists", &RnglistsSection) .Case("debug_loc.dwo", &LocDWOSection) .Case("debug_line.dwo", &LineDWOSection) - .Case("debug_names", &DebugNamesSection) + .Case("debug_names", &NamesSection) .Case("debug_rnglists.dwo", &RnglistsDWOSection) - .Case("debug_str_offsets.dwo", &StringOffsetDWOSection) + .Case("debug_str_offsets.dwo", &StrOffsetsDWOSection) .Case("debug_addr", &AddrSection) .Case("apple_names", &AppleNamesSection) - .Case("debug_pubnames", &PubNamesSection) - .Case("debug_pubtypes", &PubTypesSection) - .Case("debug_gnu_pubnames", &GnuPubNamesSection) - .Case("debug_gnu_pubtypes", &GnuPubTypesSection) + .Case("debug_pubnames", &PubnamesSection) + .Case("debug_pubtypes", &PubtypesSection) + .Case("debug_gnu_pubnames", &GnuPubnamesSection) + .Case("debug_gnu_pubtypes", &GnuPubtypesSection) .Case("apple_types", &AppleTypesSection) .Case("apple_namespaces", &AppleNamespacesSection) .Case("apple_namespac", &AppleNamespacesSection) @@ -1427,17 +1428,15 @@ class DWARFObjInMemory final : public DWARFObject { } StringRef AbbrevSection; - StringRef ARangeSection; - StringRef DebugFrameSection; - StringRef EHFrameSection; - StringRef StringSection; + StringRef ArangesSection; + StringRef StrSection; StringRef MacinfoSection; StringRef AbbrevDWOSection; - StringRef StringDWOSection; + StringRef StrDWOSection; StringRef CUIndexSection; StringRef GdbIndexSection; StringRef TUIndexSection; - StringRef LineStringSection; + StringRef LineStrSection; // A deque holding section data whose iterators are not invalidated when // new decompressed sections are inserted at the end. @@ -1448,17 +1447,15 @@ class DWARFObjInMemory final : public DWARFObject { return &Sec->Data; return StringSwitch(Name) .Case("debug_abbrev", &AbbrevSection) - .Case("debug_aranges", &ARangeSection) - .Case("debug_frame", &DebugFrameSection) - .Case("eh_frame", &EHFrameSection) - .Case("debug_str", &StringSection) + .Case("debug_aranges", &ArangesSection) + .Case("debug_str", &StrSection) .Case("debug_macinfo", &MacinfoSection) .Case("debug_abbrev.dwo", &AbbrevDWOSection) - .Case("debug_str.dwo", &StringDWOSection) + .Case("debug_str.dwo", &StrDWOSection) .Case("debug_cu_index", &CUIndexSection) .Case("debug_tu_index", &TUIndexSection) .Case("gdb_index", &GdbIndexSection) - .Case("debug_line_str", &LineStringSection) + .Case("debug_line_str", &LineStrSection) // Any more debug info sections go here. .Default(nullptr); } @@ -1513,7 +1510,11 @@ public: StringMap SectionAmountMap; for (const SectionRef &Section : Obj.sections()) { StringRef Name; - Section.getName(Name); + if (auto NameOrErr = Section.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + ++SectionAmountMap[Name]; SectionNames.push_back({ Name, true }); @@ -1526,10 +1527,19 @@ public: continue; StringRef Data; - section_iterator RelocatedSection = Section.getRelocatedSection(); + Expected SecOrErr = Section.getRelocatedSection(); + if (!SecOrErr) { + ErrorPolicy EP = HandleError(createError( + "failed to get relocated section: ", SecOrErr.takeError())); + if (EP == ErrorPolicy::Halt) + return; + continue; + } + // Try to obtain an already relocated version of this section. // Else use the unrelocated section from the object file. We'll have to // apply relocations ourselves later. + section_iterator RelocatedSection = *SecOrErr; if (!L || !L->getLoadedSectionContents(*RelocatedSection, Data)) { Expected E = Section.getContents(); if (E) @@ -1560,7 +1570,7 @@ public: *SectionData = Data; if (Name == "debug_ranges") { // FIXME: Use the other dwo range section when we emit it. - RangeDWOSection.Data = Data; + RangesDWOSection.Data = Data; } } else if (Name == "debug_info") { // Find debug_info and debug_types data by section rather than name as @@ -1578,12 +1588,15 @@ public: continue; StringRef RelSecName; - StringRef RelSecData; - RelocatedSection->getName(RelSecName); + if (auto NameOrErr = RelocatedSection->getName()) + RelSecName = *NameOrErr; + else + consumeError(NameOrErr.takeError()); // If the section we're relocating was relocated already by the JIT, // then we used the relocated version above, so we do not need to process // relocations for it now. + StringRef RelSecData; if (L && L->getLoadedSectionContents(*RelocatedSection, RelSecData)) continue; @@ -1710,12 +1723,12 @@ public: const DWARFSection &getLocDWOSection() const override { return LocDWOSection; } - StringRef getStringDWOSection() const override { return StringDWOSection; } - const DWARFSection &getStringOffsetDWOSection() const override { - return StringOffsetDWOSection; + StringRef getStrDWOSection() const override { return StrDWOSection; } + const DWARFSection &getStrOffsetsDWOSection() const override { + return StrOffsetsDWOSection; } - const DWARFSection &getRangeDWOSection() const override { - return RangeDWOSection; + const DWARFSection &getRangesDWOSection() const override { + return RangesDWOSection; } const DWARFSection &getRnglistsDWOSection() const override { return RnglistsDWOSection; @@ -1726,10 +1739,10 @@ public: StringRef getTUIndexSection() const override { return TUIndexSection; } // DWARF v5 - const DWARFSection &getStringOffsetSection() const override { - return StringOffsetSection; + const DWARFSection &getStrOffsetsSection() const override { + return StrOffsetsSection; } - StringRef getLineStringSection() const override { return LineStringSection; } + StringRef getLineStrSection() const override { return LineStrSection; } // Sections for DWARF5 split dwarf proposal. void forEachInfoDWOSections( @@ -1745,24 +1758,28 @@ public: StringRef getAbbrevSection() const override { return AbbrevSection; } const DWARFSection &getLocSection() const override { return LocSection; } - const DWARFSection &getLoclistsSection() const override { return LocListsSection; } - StringRef getARangeSection() const override { return ARangeSection; } - StringRef getDebugFrameSection() const override { return DebugFrameSection; } - StringRef getEHFrameSection() const override { return EHFrameSection; } + const DWARFSection &getLoclistsSection() const override { return LoclistsSection; } + StringRef getArangesSection() const override { return ArangesSection; } + const DWARFSection &getFrameSection() const override { + return FrameSection; + } + const DWARFSection &getEHFrameSection() const override { + return EHFrameSection; + } const DWARFSection &getLineSection() const override { return LineSection; } - StringRef getStringSection() const override { return StringSection; } - const DWARFSection &getRangeSection() const override { return RangeSection; } + StringRef getStrSection() const override { return StrSection; } + const DWARFSection &getRangesSection() const override { return RangesSection; } const DWARFSection &getRnglistsSection() const override { return RnglistsSection; } StringRef getMacinfoSection() const override { return MacinfoSection; } - const DWARFSection &getPubNamesSection() const override { return PubNamesSection; } - const DWARFSection &getPubTypesSection() const override { return PubTypesSection; } - const DWARFSection &getGnuPubNamesSection() const override { - return GnuPubNamesSection; + const DWARFSection &getPubnamesSection() const override { return PubnamesSection; } + const DWARFSection &getPubtypesSection() const override { return PubtypesSection; } + const DWARFSection &getGnuPubnamesSection() const override { + return GnuPubnamesSection; } - const DWARFSection &getGnuPubTypesSection() const override { - return GnuPubTypesSection; + const DWARFSection &getGnuPubtypesSection() const override { + return GnuPubtypesSection; } const DWARFSection &getAppleNamesSection() const override { return AppleNamesSection; @@ -1776,8 +1793,8 @@ public: const DWARFSection &getAppleObjCSection() const override { return AppleObjCSection; } - const DWARFSection &getDebugNamesSection() const override { - return DebugNamesSection; + const DWARFSection &getNamesSection() const override { + return NamesSection; } StringRef getFileName() const override { return FileName; } @@ -1799,16 +1816,16 @@ std::unique_ptr DWARFContext::create(const object::ObjectFile &Obj, const LoadedObjectInfo *L, function_ref HandleError, std::string DWPName) { - auto DObj = llvm::make_unique(Obj, L, HandleError); - return llvm::make_unique(std::move(DObj), std::move(DWPName)); + auto DObj = std::make_unique(Obj, L, HandleError); + return std::make_unique(std::move(DObj), std::move(DWPName)); } std::unique_ptr DWARFContext::create(const StringMap> &Sections, uint8_t AddrSize, bool isLittleEndian) { auto DObj = - llvm::make_unique(Sections, AddrSize, isLittleEndian); - return llvm::make_unique(std::move(DObj), ""); + std::make_unique(Sections, AddrSize, isLittleEndian); + return std::make_unique(std::move(DObj), ""); } Error DWARFContext::loadRegisterInfo(const object::ObjectFile &Obj) { diff --git a/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp b/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp index b9adf8cb1d9..53e676bc703 100644 --- a/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp +++ b/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp @@ -12,14 +12,15 @@ using namespace llvm; -uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint32_t *Off, - uint64_t *SecNdx) const { +uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint64_t *Off, + uint64_t *SecNdx, + Error *Err) const { if (SecNdx) *SecNdx = object::SectionedAddress::UndefSection; if (!Section) - return getUnsigned(Off, Size); + return getUnsigned(Off, Size, Err); Optional E = Obj->find(*Section, *Off); - uint64_t A = getUnsigned(Off, Size); + uint64_t A = getUnsigned(Off, Size, Err); if (!E) return A; if (SecNdx) @@ -31,13 +32,13 @@ uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint32_t *Off, } Optional -DWARFDataExtractor::getEncodedPointer(uint32_t *Offset, uint8_t Encoding, +DWARFDataExtractor::getEncodedPointer(uint64_t *Offset, uint8_t Encoding, uint64_t PCRelOffset) const { if (Encoding == dwarf::DW_EH_PE_omit) return None; uint64_t Result = 0; - uint32_t OldOffset = *Offset; + uint64_t OldOffset = *Offset; // First get value switch (Encoding & 0x0F) { case dwarf::DW_EH_PE_absptr: diff --git a/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp index 31b324e5eb2..4afac2f9950 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp @@ -26,9 +26,9 @@ void DWARFAbbreviationDeclarationSet::clear() { } bool DWARFAbbreviationDeclarationSet::extract(DataExtractor Data, - uint32_t *OffsetPtr) { + uint64_t *OffsetPtr) { clear(); - const uint32_t BeginOffset = *OffsetPtr; + const uint64_t BeginOffset = *OffsetPtr; Offset = BeginOffset; DWARFAbbreviationDeclaration AbbrDecl; uint32_t PrevAbbrCode = 0; @@ -82,12 +82,12 @@ void DWARFDebugAbbrev::extract(DataExtractor Data) { void DWARFDebugAbbrev::parse() const { if (!Data) return; - uint32_t Offset = 0; + uint64_t Offset = 0; auto I = AbbrDeclSets.begin(); while (Data->isValidOffset(Offset)) { while (I != AbbrDeclSets.end() && I->first < Offset) ++I; - uint32_t CUAbbrOffset = Offset; + uint64_t CUAbbrOffset = Offset; DWARFAbbreviationDeclarationSet AbbrDecls; if (!AbbrDecls.extract(*Data, &Offset)) break; @@ -124,7 +124,7 @@ DWARFDebugAbbrev::getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const { } if (Data && CUAbbrOffset < Data->getData().size()) { - uint32_t Offset = CUAbbrOffset; + uint64_t Offset = CUAbbrOffset; DWARFAbbreviationDeclarationSet AbbrDecls; if (!AbbrDecls.extract(*Data, &Offset)) return nullptr; diff --git a/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp b/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp index 58626539bba..f71543799e2 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp @@ -19,7 +19,7 @@ void DWARFDebugAddrTable::clear() { } Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data, - uint32_t *OffsetPtr, + uint64_t *OffsetPtr, uint16_t Version, uint8_t AddrSize, std::function WarnCallback) { @@ -30,7 +30,7 @@ Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data, return createStringError(errc::invalid_argument, "section is not large enough to contain a " ".debug_addr table length at offset 0x%" - PRIx32, *OffsetPtr); + PRIx64, *OffsetPtr); uint16_t UnitVersion; if (Version == 0) { WarnCallback(createStringError(errc::invalid_argument, @@ -44,28 +44,28 @@ Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data, Format = dwarf::DwarfFormat::DWARF32; if (UnitVersion >= 5) { HeaderData.Length = Data.getU32(OffsetPtr); - if (HeaderData.Length == 0xffffffffu) { + if (HeaderData.Length == dwarf::DW_LENGTH_DWARF64) { invalidateLength(); return createStringError(errc::not_supported, - "DWARF64 is not supported in .debug_addr at offset 0x%" PRIx32, + "DWARF64 is not supported in .debug_addr at offset 0x%" PRIx64, HeaderOffset); } if (HeaderData.Length + sizeof(uint32_t) < sizeof(Header)) { uint32_t TmpLength = getLength(); invalidateLength(); return createStringError(errc::invalid_argument, - ".debug_addr table at offset 0x%" PRIx32 + ".debug_addr table at offset 0x%" PRIx64 " has too small length (0x%" PRIx32 ") to contain a complete header", HeaderOffset, TmpLength); } - uint32_t End = HeaderOffset + getLength(); + uint64_t End = HeaderOffset + getLength(); if (!Data.isValidOffsetForDataOfSize(HeaderOffset, End - HeaderOffset)) { uint32_t TmpLength = getLength(); invalidateLength(); return createStringError(errc::invalid_argument, "section is not large enough to contain a .debug_addr table " - "of length 0x%" PRIx32 " at offset 0x%" PRIx32, + "of length 0x%" PRIx32 " at offset 0x%" PRIx64, TmpLength, HeaderOffset); } @@ -88,7 +88,7 @@ Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data, // and consists only of a series of addresses. if (HeaderData.Version > 5) { return createStringError(errc::not_supported, "version %" PRIu16 - " of .debug_addr section at offset 0x%" PRIx32 " is not supported", + " of .debug_addr section at offset 0x%" PRIx64 " is not supported", HeaderData.Version, HeaderOffset); } // FIXME: For now we just treat version mismatch as an error, @@ -97,19 +97,19 @@ Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data, // attribute in the info table. if (HeaderData.Version != UnitVersion) return createStringError(errc::invalid_argument, - ".debug_addr table at offset 0x%" PRIx32 + ".debug_addr table at offset 0x%" PRIx64 " has version %" PRIu16 " which is different from the version suggested" " by the DWARF unit header: %" PRIu16, HeaderOffset, HeaderData.Version, UnitVersion); if (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8) return createStringError(errc::not_supported, - ".debug_addr table at offset 0x%" PRIx32 + ".debug_addr table at offset 0x%" PRIx64 " has unsupported address size %" PRIu8, HeaderOffset, HeaderData.AddrSize); if (HeaderData.AddrSize != AddrSize && AddrSize != 0) return createStringError(errc::invalid_argument, - ".debug_addr table at offset 0x%" PRIx32 + ".debug_addr table at offset 0x%" PRIx64 " has address size %" PRIu8 " which is different from CU address size %" PRIu8, HeaderOffset, HeaderData.AddrSize, AddrSize); @@ -117,13 +117,13 @@ Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data, // TODO: add support for non-zero segment selector size. if (HeaderData.SegSize != 0) return createStringError(errc::not_supported, - ".debug_addr table at offset 0x%" PRIx32 + ".debug_addr table at offset 0x%" PRIx64 " has unsupported segment selector size %" PRIu8, HeaderOffset, HeaderData.SegSize); if (DataSize % HeaderData.AddrSize != 0) { invalidateLength(); return createStringError(errc::invalid_argument, - ".debug_addr table at offset 0x%" PRIx32 + ".debug_addr table at offset 0x%" PRIx64 " contains data of size %" PRIu32 " which is not a multiple of addr size %" PRIu8, HeaderOffset, DataSize, HeaderData.AddrSize); @@ -162,7 +162,7 @@ Expected DWARFDebugAddrTable::getAddrEntry(uint32_t Index) const { return Addrs[Index]; return createStringError(errc::invalid_argument, "Index %" PRIu32 " is out of range of the " - ".debug_addr table at offset 0x%" PRIx32, + ".debug_addr table at offset 0x%" PRIx64, Index, HeaderOffset); } diff --git a/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp b/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp index 6551b61accb..200b2d52a02 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp @@ -24,13 +24,13 @@ void DWARFDebugArangeSet::Descriptor::dump(raw_ostream &OS, } void DWARFDebugArangeSet::clear() { - Offset = -1U; + Offset = -1ULL; std::memset(&HeaderData, 0, sizeof(Header)); ArangeDescriptors.clear(); } bool -DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) { +DWARFDebugArangeSet::extract(DataExtractor data, uint64_t *offset_ptr) { if (data.isValidOffset(*offset_ptr)) { ArangeDescriptors.clear(); Offset = *offset_ptr; diff --git a/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp index 6460c9feeab..ca6043109cd 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp @@ -23,11 +23,11 @@ using namespace llvm; void DWARFDebugAranges::extract(DataExtractor DebugArangesData) { if (!DebugArangesData.isValidOffset(0)) return; - uint32_t Offset = 0; + uint64_t Offset = 0; DWARFDebugArangeSet Set; while (Set.extract(DebugArangesData, &Offset)) { - uint32_t CUOffset = Set.getCompileUnitDIEOffset(); + uint64_t CUOffset = Set.getCompileUnitDIEOffset(); for (const auto &Desc : Set.descriptors()) { uint64_t LowPC = Desc.Address; uint64_t HighPC = Desc.getEndAddress(); @@ -43,7 +43,7 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) { return; // Extract aranges from .debug_aranges section. - DataExtractor ArangesData(CTX->getDWARFObj().getARangeSection(), + DataExtractor ArangesData(CTX->getDWARFObj().getArangesSection(), CTX->isLittleEndian(), 0); extract(ArangesData); @@ -51,7 +51,7 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) { // it may describe only a small subset of compilation units, so we need to // manually build aranges for the rest of them. for (const auto &CU : CTX->compile_units()) { - uint32_t CUOffset = CU->getOffset(); + uint64_t CUOffset = CU->getOffset(); if (ParsedCUOffsets.insert(CUOffset).second) { Expected CURanges = CU->collectAddressRanges(); if (!CURanges) @@ -71,7 +71,7 @@ void DWARFDebugAranges::clear() { ParsedCUOffsets.clear(); } -void DWARFDebugAranges::appendRange(uint32_t CUOffset, uint64_t LowPC, +void DWARFDebugAranges::appendRange(uint64_t CUOffset, uint64_t LowPC, uint64_t HighPC) { if (LowPC >= HighPC) return; @@ -80,7 +80,7 @@ void DWARFDebugAranges::appendRange(uint32_t CUOffset, uint64_t LowPC, } void DWARFDebugAranges::construct() { - std::multiset ValidCUs; // Maintain the set of CUs describing + std::multiset ValidCUs; // Maintain the set of CUs describing // a current address range. llvm::sort(Endpoints); uint64_t PrevAddress = -1ULL; diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index b3f23366f2a..81b00f65741 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -34,10 +34,10 @@ using namespace dwarf; const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f; -Error CFIProgram::parse(DataExtractor Data, uint32_t *Offset, - uint32_t EndOffset) { +Error CFIProgram::parse(DWARFDataExtractor Data, uint64_t *Offset, + uint64_t EndOffset) { while (*Offset < EndOffset) { - uint8_t Opcode = Data.getU8(Offset); + uint8_t Opcode = Data.getRelocatedValue(1, Offset); // Some instructions have a primary opcode encoded in the top bits. uint8_t Primary = Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK; @@ -74,19 +74,19 @@ Error CFIProgram::parse(DataExtractor Data, uint32_t *Offset, break; case DW_CFA_set_loc: // Operands: Address - addInstruction(Opcode, Data.getAddress(Offset)); + addInstruction(Opcode, Data.getRelocatedAddress(Offset)); break; case DW_CFA_advance_loc1: // Operands: 1-byte delta - addInstruction(Opcode, Data.getU8(Offset)); + addInstruction(Opcode, Data.getRelocatedValue(1, Offset)); break; case DW_CFA_advance_loc2: // Operands: 2-byte delta - addInstruction(Opcode, Data.getU16(Offset)); + addInstruction(Opcode, Data.getRelocatedValue(2, Offset)); break; case DW_CFA_advance_loc4: // Operands: 4-byte delta - addInstruction(Opcode, Data.getU32(Offset)); + addInstruction(Opcode, Data.getRelocatedValue(4, Offset)); break; case DW_CFA_restore_extended: case DW_CFA_undefined: @@ -331,7 +331,7 @@ DWARFDebugFrame::DWARFDebugFrame(Triple::ArchType Arch, DWARFDebugFrame::~DWARFDebugFrame() = default; static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data, - uint32_t Offset, int Length) { + uint64_t Offset, int Length) { errs() << "DUMP: "; for (int i = 0; i < Length; ++i) { uint8_t c = Data.getU8(&Offset); @@ -344,7 +344,7 @@ static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data, // noreturn attribute usage in lambdas. Once the support for those // compilers are phased out, we can remove this and return back to // a ReportError lambda: [StartOffset](const char *ErrorMsg). -static void LLVM_ATTRIBUTE_NORETURN ReportError(uint32_t StartOffset, +static void LLVM_ATTRIBUTE_NORETURN ReportError(uint64_t StartOffset, const char *ErrorMsg) { std::string Str; raw_string_ostream OS(Str); @@ -354,32 +354,30 @@ static void LLVM_ATTRIBUTE_NORETURN ReportError(uint32_t StartOffset, } void DWARFDebugFrame::parse(DWARFDataExtractor Data) { - uint32_t Offset = 0; - DenseMap CIEs; + uint64_t Offset = 0; + DenseMap CIEs; while (Data.isValidOffset(Offset)) { - uint32_t StartOffset = Offset; + uint64_t StartOffset = Offset; bool IsDWARF64 = false; - uint64_t Length = Data.getU32(&Offset); + uint64_t Length = Data.getRelocatedValue(4, &Offset); uint64_t Id; - if (Length == UINT32_MAX) { + if (Length == dwarf::DW_LENGTH_DWARF64) { // DWARF-64 is distinguished by the first 32 bits of the initial length // field being 0xffffffff. Then, the next 64 bits are the actual entry // length. IsDWARF64 = true; - Length = Data.getU64(&Offset); + Length = Data.getRelocatedValue(8, &Offset); } // At this point, Offset points to the next field after Length. // Length is the structure size excluding itself. Compute an offset one // past the end of the structure (needed to know how many instructions to // read). - // TODO: For honest DWARF64 support, DataExtractor will have to treat - // offset_ptr as uint64_t* - uint32_t StartStructureOffset = Offset; - uint32_t EndStructureOffset = Offset + static_cast(Length); + uint64_t StartStructureOffset = Offset; + uint64_t EndStructureOffset = Offset + Length; // The Id field's size depends on the DWARF format Id = Data.getUnsigned(&Offset, (IsDWARF64 && !IsEH) ? 8 : 4); @@ -407,22 +405,23 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) { Optional PersonalityEncoding; if (IsEH) { Optional AugmentationLength; - uint32_t StartAugmentationOffset; - uint32_t EndAugmentationOffset; + uint64_t StartAugmentationOffset; + uint64_t EndAugmentationOffset; // Walk the augmentation string to get all the augmentation data. for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) { switch (AugmentationString[i]) { default: - ReportError(StartOffset, - "Unknown augmentation character in entry at %lx"); + ReportError( + StartOffset, + "Unknown augmentation character in entry at %" PRIx64); case 'L': LSDAPointerEncoding = Data.getU8(&Offset); break; case 'P': { if (Personality) ReportError(StartOffset, - "Duplicate personality in entry at %lx"); + "Duplicate personality in entry at %" PRIx64); PersonalityEncoding = Data.getU8(&Offset); Personality = Data.getEncodedPointer( &Offset, *PersonalityEncoding, @@ -438,13 +437,12 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) { case 'z': if (i) ReportError(StartOffset, - "'z' must be the first character at %lx"); + "'z' must be the first character at %" PRIx64); // Parse the augmentation length first. We only parse it if // the string contains a 'z'. AugmentationLength = Data.getULEB128(&Offset); StartAugmentationOffset = Offset; - EndAugmentationOffset = Offset + - static_cast(*AugmentationLength); + EndAugmentationOffset = Offset + *AugmentationLength; break; case 'B': // B-Key is used for signing functions associated with this @@ -455,14 +453,15 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) { if (AugmentationLength.hasValue()) { if (Offset != EndAugmentationOffset) - ReportError(StartOffset, "Parsing augmentation data at %lx failed"); + ReportError(StartOffset, + "Parsing augmentation data at %" PRIx64 " failed"); AugmentationData = Data.getData().slice(StartAugmentationOffset, EndAugmentationOffset); } } - auto Cie = llvm::make_unique( + auto Cie = std::make_unique( StartOffset, Length, Version, AugmentationString, AddressSize, SegmentDescriptorSize, CodeAlignmentFactor, DataAlignmentFactor, ReturnAddressRegister, AugmentationData, FDEPointerEncoding, @@ -480,8 +479,8 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) { if (IsEH) { // The address size is encoded in the CIE we reference. if (!Cie) - ReportError(StartOffset, - "Parsing FDE data at %lx failed due to missing CIE"); + ReportError(StartOffset, "Parsing FDE data at %" PRIx64 + " failed due to missing CIE"); if (auto Val = Data.getEncodedPointer( &Offset, Cie->getFDEPointerEncoding(), @@ -498,8 +497,7 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) { // Parse the augmentation length and data for this FDE. uint64_t AugmentationLength = Data.getULEB128(&Offset); - uint32_t EndAugmentationOffset = - Offset + static_cast(AugmentationLength); + uint64_t EndAugmentationOffset = Offset + AugmentationLength; // Decode the LSDA if the CIE augmentation string said we should. if (Cie->getLSDAPointerEncoding() != DW_EH_PE_omit) { @@ -509,11 +507,12 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) { } if (Offset != EndAugmentationOffset) - ReportError(StartOffset, "Parsing augmentation data at %lx failed"); + ReportError(StartOffset, + "Parsing augmentation data at %" PRIx64 " failed"); } } else { - InitialLocation = Data.getAddress(&Offset); - AddressRange = Data.getAddress(&Offset); + InitialLocation = Data.getRelocatedAddress(&Offset); + AddressRange = Data.getRelocatedAddress(&Offset); } Entries.emplace_back(new FDE(StartOffset, Length, CIEPointer, @@ -527,7 +526,8 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) { } if (Offset != EndStructureOffset) - ReportError(StartOffset, "Parsing entry instructions at %lx failed"); + ReportError(StartOffset, + "Parsing entry instructions at %" PRIx64 " failed"); } } diff --git a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp index d8a755e90df..87eab34d58e 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp @@ -19,15 +19,15 @@ using namespace llvm; using namespace dwarf; bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, - uint32_t *OffsetPtr) { + uint64_t *OffsetPtr) { DWARFDataExtractor DebugInfoData = U.getDebugInfoExtractor(); - const uint32_t UEndOffset = U.getNextUnitOffset(); + const uint64_t UEndOffset = U.getNextUnitOffset(); return extractFast(U, OffsetPtr, DebugInfoData, UEndOffset, 0); } -bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint32_t *OffsetPtr, +bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr, const DWARFDataExtractor &DebugInfoData, - uint32_t UEndOffset, uint32_t D) { + uint64_t UEndOffset, uint32_t D) { Offset = *OffsetPtr; Depth = D; if (Offset >= UEndOffset || !DebugInfoData.isValidOffset(Offset)) diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index a1cb1e8582e..dbee28ff5ab 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -16,7 +16,6 @@ #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Format.h" -#include "llvm/Support/Path.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" #include @@ -156,7 +155,7 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS, // Parse v2-v4 directory and file tables. static void parseV2DirFileTables(const DWARFDataExtractor &DebugLineData, - uint32_t *OffsetPtr, uint64_t EndPrologueOffset, + uint64_t *OffsetPtr, uint64_t EndPrologueOffset, DWARFDebugLine::ContentTypeTracker &ContentTypes, std::vector &IncludeDirectories, std::vector &FileNames) { @@ -187,18 +186,24 @@ parseV2DirFileTables(const DWARFDataExtractor &DebugLineData, } // Parse v5 directory/file entry content descriptions. -// Returns the descriptors, or an empty vector if we did not find a path or -// ran off the end of the prologue. -static ContentDescriptors -parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint32_t - *OffsetPtr, uint64_t EndPrologueOffset, DWARFDebugLine::ContentTypeTracker - *ContentTypes) { +// Returns the descriptors, or an error if we did not find a path or ran off +// the end of the prologue. +static llvm::Expected +parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr, + uint64_t EndPrologueOffset, + DWARFDebugLine::ContentTypeTracker *ContentTypes) { ContentDescriptors Descriptors; int FormatCount = DebugLineData.getU8(OffsetPtr); bool HasPath = false; for (int I = 0; I != FormatCount; ++I) { if (*OffsetPtr >= EndPrologueOffset) - return ContentDescriptors(); + return createStringError( + errc::invalid_argument, + "failed to parse entry content descriptions at offset " + "0x%8.8" PRIx64 + " because offset extends beyond the prologue end at offset " + "0x%8.8" PRIx64, + *OffsetPtr, EndPrologueOffset); ContentDescriptor Descriptor; Descriptor.Type = dwarf::LineNumberEntryFormat(DebugLineData.getULEB128(OffsetPtr)); @@ -209,60 +214,82 @@ parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint32_t ContentTypes->trackContentType(Descriptor.Type); Descriptors.push_back(Descriptor); } - return HasPath ? Descriptors : ContentDescriptors(); + + if (!HasPath) + return createStringError(errc::invalid_argument, + "failed to parse entry content descriptions" + " because no path was found"); + return Descriptors; } -static bool +static Error parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, - uint32_t *OffsetPtr, uint64_t EndPrologueOffset, + uint64_t *OffsetPtr, uint64_t EndPrologueOffset, const dwarf::FormParams &FormParams, const DWARFContext &Ctx, const DWARFUnit *U, DWARFDebugLine::ContentTypeTracker &ContentTypes, std::vector &IncludeDirectories, std::vector &FileNames) { // Get the directory entry description. - ContentDescriptors DirDescriptors = + llvm::Expected DirDescriptors = parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset, nullptr); - if (DirDescriptors.empty()) - return false; + if (!DirDescriptors) + return DirDescriptors.takeError(); // Get the directory entries, according to the format described above. int DirEntryCount = DebugLineData.getU8(OffsetPtr); for (int I = 0; I != DirEntryCount; ++I) { if (*OffsetPtr >= EndPrologueOffset) - return false; - for (auto Descriptor : DirDescriptors) { + return createStringError( + errc::invalid_argument, + "failed to parse directory entry at offset " + "0x%8.8" PRIx64 + " because offset extends beyond the prologue end at offset " + "0x%8.8" PRIx64, + *OffsetPtr, EndPrologueOffset); + for (auto Descriptor : *DirDescriptors) { DWARFFormValue Value(Descriptor.Form); switch (Descriptor.Type) { case DW_LNCT_path: if (!Value.extractValue(DebugLineData, OffsetPtr, FormParams, &Ctx, U)) - return false; + return createStringError(errc::invalid_argument, + "failed to parse directory entry because " + "extracting the form value failed."); IncludeDirectories.push_back(Value); break; default: if (!Value.skipValue(DebugLineData, OffsetPtr, FormParams)) - return false; + return createStringError(errc::invalid_argument, + "failed to parse directory entry because " + "skipping the form value failed."); } } } // Get the file entry description. - ContentDescriptors FileDescriptors = - parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset, - &ContentTypes); - if (FileDescriptors.empty()) - return false; + llvm::Expected FileDescriptors = parseV5EntryFormat( + DebugLineData, OffsetPtr, EndPrologueOffset, &ContentTypes); + if (!FileDescriptors) + return FileDescriptors.takeError(); // Get the file entries, according to the format described above. int FileEntryCount = DebugLineData.getU8(OffsetPtr); for (int I = 0; I != FileEntryCount; ++I) { if (*OffsetPtr >= EndPrologueOffset) - return false; + return createStringError( + errc::invalid_argument, + "failed to parse file entry at offset " + "0x%8.8" PRIx64 + " because offset extends beyond the prologue end at offset " + "0x%8.8" PRIx64, + *OffsetPtr, EndPrologueOffset); DWARFDebugLine::FileNameEntry FileEntry; - for (auto Descriptor : FileDescriptors) { + for (auto Descriptor : *FileDescriptors) { DWARFFormValue Value(Descriptor.Form); if (!Value.extractValue(DebugLineData, OffsetPtr, FormParams, &Ctx, U)) - return false; + return createStringError(errc::invalid_argument, + "failed to parse file entry because " + "extracting the form value failed."); switch (Descriptor.Type) { case DW_LNCT_path: FileEntry.Name = Value; @@ -280,7 +307,10 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, FileEntry.Length = Value.getAsUnsignedConstant().getValue(); break; case DW_LNCT_MD5: - assert(Value.getAsBlock().getValue().size() == 16); + if (!Value.getAsBlock() || Value.getAsBlock().getValue().size() != 16) + return createStringError( + errc::invalid_argument, + "failed to parse file entry because the MD5 hash is invalid"); std::uninitialized_copy_n(Value.getAsBlock().getValue().begin(), 16, FileEntry.Checksum.Bytes.begin()); break; @@ -290,21 +320,21 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, } FileNames.push_back(FileEntry); } - return true; + return Error::success(); } Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData, - uint32_t *OffsetPtr, + uint64_t *OffsetPtr, const DWARFContext &Ctx, const DWARFUnit *U) { const uint64_t PrologueOffset = *OffsetPtr; clear(); TotalLength = DebugLineData.getRelocatedValue(4, OffsetPtr); - if (TotalLength == UINT32_MAX) { + if (TotalLength == dwarf::DW_LENGTH_DWARF64) { FormParams.Format = dwarf::DWARF64; TotalLength = DebugLineData.getU64(OffsetPtr); - } else if (TotalLength >= 0xfffffff0) { + } else if (TotalLength >= dwarf::DW_LENGTH_lo_reserved) { return createStringError(errc::invalid_argument, "parsing line table prologue at offset 0x%8.8" PRIx64 " unsupported reserved unit length found of value 0x%8.8" PRIx64, @@ -343,14 +373,17 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData, } if (getVersion() >= 5) { - if (!parseV5DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset, - FormParams, Ctx, U, ContentTypes, - IncludeDirectories, FileNames)) { - return createStringError(errc::invalid_argument, - "parsing line table prologue at 0x%8.8" PRIx64 - " found an invalid directory or file table description at" - " 0x%8.8" PRIx64, - PrologueOffset, (uint64_t)*OffsetPtr); + if (Error e = parseV5DirFileTables( + DebugLineData, OffsetPtr, EndPrologueOffset, FormParams, Ctx, U, + ContentTypes, IncludeDirectories, FileNames)) { + return joinErrors( + createStringError( + errc::invalid_argument, + "parsing line table prologue at 0x%8.8" PRIx64 + " found an invalid directory or file table description at" + " 0x%8.8" PRIx64, + PrologueOffset, *OffsetPtr), + std::move(e)); } } else parseV2DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset, @@ -361,7 +394,7 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData, "parsing line table prologue at 0x%8.8" PRIx64 " should have ended at 0x%8.8" PRIx64 " but it ended at 0x%8.8" PRIx64, - PrologueOffset, EndPrologueOffset, (uint64_t)*OffsetPtr); + PrologueOffset, EndPrologueOffset, *OffsetPtr); return Error::success(); } @@ -468,7 +501,7 @@ void DWARFDebugLine::ParsingState::appendRowToMatrix() { } const DWARFDebugLine::LineTable * -DWARFDebugLine::getLineTable(uint32_t Offset) const { +DWARFDebugLine::getLineTable(uint64_t Offset) const { LineTableConstIter Pos = LineTableMap.find(Offset); if (Pos != LineTableMap.end()) return &Pos->second; @@ -476,10 +509,10 @@ DWARFDebugLine::getLineTable(uint32_t Offset) const { } Expected DWARFDebugLine::getOrParseLineTable( - DWARFDataExtractor &DebugLineData, uint32_t Offset, const DWARFContext &Ctx, + DWARFDataExtractor &DebugLineData, uint64_t Offset, const DWARFContext &Ctx, const DWARFUnit *U, std::function RecoverableErrorCallback) { if (!DebugLineData.isValidOffset(Offset)) - return createStringError(errc::invalid_argument, "offset 0x%8.8" PRIx32 + return createStringError(errc::invalid_argument, "offset 0x%8.8" PRIx64 " is not a valid debug line section offset", Offset); @@ -496,10 +529,10 @@ Expected DWARFDebugLine::getOrParseLineTable( } Error DWARFDebugLine::LineTable::parse( - DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr, + DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr, const DWARFContext &Ctx, const DWARFUnit *U, std::function RecoverableErrorCallback, raw_ostream *OS) { - const uint32_t DebugLineOffset = *OffsetPtr; + const uint64_t DebugLineOffset = *OffsetPtr; clear(); @@ -515,7 +548,7 @@ Error DWARFDebugLine::LineTable::parse( if (PrologueErr) return PrologueErr; - const uint32_t EndOffset = + const uint64_t EndOffset = DebugLineOffset + Prologue.TotalLength + Prologue.sizeofTotalLength(); // See if we should tell the data extractor the address size. @@ -529,7 +562,7 @@ Error DWARFDebugLine::LineTable::parse( while (*OffsetPtr < EndOffset) { if (OS) - *OS << format("0x%08.08" PRIx32 ": ", *OffsetPtr); + *OS << format("0x%08.08" PRIx64 ": ", *OffsetPtr); uint8_t Opcode = DebugLineData.getU8(OffsetPtr); @@ -540,7 +573,7 @@ Error DWARFDebugLine::LineTable::parse( // Extended Opcodes always start with a zero opcode followed by // a uleb128 length so you can skip ones you don't know about uint64_t Len = DebugLineData.getULEB128(OffsetPtr); - uint32_t ExtOffset = *OffsetPtr; + uint64_t ExtOffset = *OffsetPtr; // Tolerate zero-length; assume length is correct and soldier on. if (Len == 0) { @@ -585,7 +618,7 @@ Error DWARFDebugLine::LineTable::parse( DebugLineData.setAddressSize(Len - 1); else if (DebugLineData.getAddressSize() != Len - 1) { return createStringError(errc::invalid_argument, - "mismatching address size at offset 0x%8.8" PRIx32 + "mismatching address size at offset 0x%8.8" PRIx64 " expected 0x%2.2" PRIx8 " found 0x%2.2" PRIx64, ExtOffset, DebugLineData.getAddressSize(), Len - 1); @@ -652,8 +685,8 @@ Error DWARFDebugLine::LineTable::parse( // Otherwise we have an unparseable line-number program. if (*OffsetPtr - ExtOffset != Len) return createStringError(errc::illegal_byte_sequence, - "unexpected line op length at offset 0x%8.8" PRIx32 - " expected 0x%2.2" PRIx64 " found 0x%2.2" PRIx32, + "unexpected line op length at offset 0x%8.8" PRIx64 + " expected 0x%2.2" PRIx64 " found 0x%2.2" PRIx64, ExtOffset, Len, *OffsetPtr - ExtOffset); } else if (Opcode < Prologue.OpcodeBase) { if (OS) @@ -1007,10 +1040,9 @@ static bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) { sys::path::is_absolute(Path, sys::path::Style::windows); } -bool DWARFDebugLine::Prologue::getFileNameByIndex(uint64_t FileIndex, - StringRef CompDir, - FileLineInfoKind Kind, - std::string &Result) const { +bool DWARFDebugLine::Prologue::getFileNameByIndex( + uint64_t FileIndex, StringRef CompDir, FileLineInfoKind Kind, + std::string &Result, sys::path::Style Style) const { if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex)) return false; const FileNameEntry &Entry = getFileNameEntry(FileIndex); @@ -1036,11 +1068,11 @@ bool DWARFDebugLine::Prologue::getFileNameByIndex(uint64_t FileIndex, // We know that FileName is not absolute, the only way to have an // absolute path at this point would be if IncludeDir is absolute. if (!CompDir.empty() && !isPathAbsoluteOnWindowsOrPosix(IncludeDir)) - sys::path::append(FilePath, CompDir); + sys::path::append(FilePath, Style, CompDir); } // sys::path::append skips empty strings. - sys::path::append(FilePath, IncludeDir, FileName); + sys::path::append(FilePath, Style, IncludeDir, FileName); Result = FilePath.str(); return true; } @@ -1092,7 +1124,8 @@ DWARFDebugLine::SectionParser::SectionParser(DWARFDataExtractor &Data, } bool DWARFDebugLine::Prologue::totalLengthIsValid() const { - return TotalLength == 0xffffffff || TotalLength < 0xfffffff0; + return TotalLength == dwarf::DW_LENGTH_DWARF64 || + TotalLength < dwarf::DW_LENGTH_lo_reserved; } DWARFDebugLine::LineTable DWARFDebugLine::SectionParser::parseNext( @@ -1101,7 +1134,7 @@ DWARFDebugLine::LineTable DWARFDebugLine::SectionParser::parseNext( assert(DebugLineData.isValidOffset(Offset) && "parsing should have terminated"); DWARFUnit *U = prepareToParse(Offset); - uint32_t OldOffset = Offset; + uint64_t OldOffset = Offset; LineTable LT; if (Error Err = LT.parse(DebugLineData, &Offset, Context, U, RecoverableErrorCallback, OS)) @@ -1115,14 +1148,14 @@ void DWARFDebugLine::SectionParser::skip( assert(DebugLineData.isValidOffset(Offset) && "parsing should have terminated"); DWARFUnit *U = prepareToParse(Offset); - uint32_t OldOffset = Offset; + uint64_t OldOffset = Offset; LineTable LT; if (Error Err = LT.Prologue.parse(DebugLineData, &Offset, Context, U)) ErrorCallback(std::move(Err)); moveToNextTable(OldOffset, LT.Prologue); } -DWARFUnit *DWARFDebugLine::SectionParser::prepareToParse(uint32_t Offset) { +DWARFUnit *DWARFDebugLine::SectionParser::prepareToParse(uint64_t Offset) { DWARFUnit *U = nullptr; auto It = LineToUnit.find(Offset); if (It != LineToUnit.end()) @@ -1131,7 +1164,7 @@ DWARFUnit *DWARFDebugLine::SectionParser::prepareToParse(uint32_t Offset) { return U; } -void DWARFDebugLine::SectionParser::moveToNextTable(uint32_t OldOffset, +void DWARFDebugLine::SectionParser::moveToNextTable(uint64_t OldOffset, const Prologue &P) { // If the length field is not valid, we don't know where the next table is, so // cannot continue to parse. Mark the parser as done, and leave the Offset diff --git a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp index 6d8f4bee77c..4f7b01130a4 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp @@ -28,19 +28,18 @@ using namespace llvm; // expression that LLVM doesn't produce. Guessing the wrong version means we // won't be able to pretty print expressions in DWARF2 binaries produced by // non-LLVM tools. -static void dumpExpression(raw_ostream &OS, ArrayRef Data, +static void dumpExpression(raw_ostream &OS, ArrayRef Data, bool IsLittleEndian, unsigned AddressSize, const MCRegisterInfo *MRI, DWARFUnit *U) { - DWARFDataExtractor Extractor(StringRef(Data.data(), Data.size()), - IsLittleEndian, AddressSize); + DWARFDataExtractor Extractor(toStringRef(Data), IsLittleEndian, AddressSize); DWARFExpression(Extractor, dwarf::DWARF_VERSION, AddressSize).print(OS, MRI, U); } -void DWARFDebugLoc::LocationList::dump(raw_ostream &OS, bool IsLittleEndian, +void DWARFDebugLoc::LocationList::dump(raw_ostream &OS, uint64_t BaseAddress, + bool IsLittleEndian, unsigned AddressSize, - const MCRegisterInfo *MRI, - DWARFUnit *U, - uint64_t BaseAddress, + const MCRegisterInfo *MRI, DWARFUnit *U, + DIDumpOptions DumpOpts, unsigned Indent) const { for (const Entry &E : Entries) { OS << '\n'; @@ -64,12 +63,12 @@ DWARFDebugLoc::getLocationListAtOffset(uint64_t Offset) const { return nullptr; } -void DWARFDebugLoc::dump(raw_ostream &OS, const MCRegisterInfo *MRI, +void DWARFDebugLoc::dump(raw_ostream &OS, const MCRegisterInfo *MRI, DIDumpOptions DumpOpts, Optional Offset) const { auto DumpLocationList = [&](const LocationList &L) { - OS << format("0x%8.8x: ", L.Offset); - L.dump(OS, IsLittleEndian, AddressSize, MRI, nullptr, 0, 12); - OS << "\n\n"; + OS << format("0x%8.8" PRIx64 ": ", L.Offset); + L.dump(OS, 0, IsLittleEndian, AddressSize, MRI, nullptr, DumpOpts, 12); + OS << "\n"; }; if (Offset) { @@ -80,50 +79,47 @@ void DWARFDebugLoc::dump(raw_ostream &OS, const MCRegisterInfo *MRI, for (const LocationList &L : Locations) { DumpLocationList(L); + if (&L != &Locations.back()) + OS << '\n'; } } -Optional -DWARFDebugLoc::parseOneLocationList(DWARFDataExtractor Data, unsigned *Offset) { +Expected +DWARFDebugLoc::parseOneLocationList(const DWARFDataExtractor &Data, + uint64_t *Offset) { LocationList LL; LL.Offset = *Offset; + AddressSize = Data.getAddressSize(); + DataExtractor::Cursor C(*Offset); // 2.6.2 Location Lists // A location list entry consists of: while (true) { Entry E; - if (!Data.isValidOffsetForDataOfSize(*Offset, 2 * Data.getAddressSize())) { - WithColor::error() << "location list overflows the debug_loc section.\n"; - return None; - } // 1. A beginning address offset. ... - E.Begin = Data.getRelocatedAddress(Offset); + E.Begin = Data.getRelocatedAddress(C); // 2. An ending address offset. ... - E.End = Data.getRelocatedAddress(Offset); + E.End = Data.getRelocatedAddress(C); + + if (Error Err = C.takeError()) + return std::move(Err); // The end of any given location list is marked by an end of list entry, // which consists of a 0 for the beginning address offset and a 0 for the // ending address offset. - if (E.Begin == 0 && E.End == 0) + if (E.Begin == 0 && E.End == 0) { + *Offset = C.tell(); return LL; - - if (!Data.isValidOffsetForDataOfSize(*Offset, 2)) { - WithColor::error() << "location list overflows the debug_loc section.\n"; - return None; } - unsigned Bytes = Data.getU16(Offset); - if (!Data.isValidOffsetForDataOfSize(*Offset, Bytes)) { - WithColor::error() << "location list overflows the debug_loc section.\n"; - return None; + if (E.Begin != (AddressSize == 4 ? -1U : -1ULL)) { + unsigned Bytes = Data.getU16(C); + // A single location description describing the location of the object... + Data.getU8(C, E.Loc, Bytes); } - // A single location description describing the location of the object... - StringRef str = Data.getData().substr(*Offset, Bytes); - *Offset += Bytes; - E.Loc.reserve(str.size()); - llvm::copy(str, std::back_inserter(E.Loc)); + LL.Entries.push_back(std::move(E)); } } @@ -132,81 +128,89 @@ void DWARFDebugLoc::parse(const DWARFDataExtractor &data) { IsLittleEndian = data.isLittleEndian(); AddressSize = data.getAddressSize(); - uint32_t Offset = 0; - while (data.isValidOffset(Offset + data.getAddressSize() - 1)) { + uint64_t Offset = 0; + while (Offset < data.getData().size()) { if (auto LL = parseOneLocationList(data, &Offset)) Locations.push_back(std::move(*LL)); - else + else { + logAllUnhandledErrors(LL.takeError(), WithColor::error()); break; + } } - if (data.isValidOffset(Offset)) - WithColor::error() << "failed to consume entire .debug_loc section\n"; } -Optional -DWARFDebugLoclists::parseOneLocationList(DataExtractor Data, unsigned *Offset, - unsigned Version) { +Expected +DWARFDebugLoclists::parseOneLocationList(const DataExtractor &Data, + uint64_t *Offset, unsigned Version) { LocationList LL; LL.Offset = *Offset; + DataExtractor::Cursor C(*Offset); // dwarf::DW_LLE_end_of_list_entry is 0 and indicates the end of the list. - while (auto Kind = - static_cast(Data.getU8(Offset))) { - + while (auto Kind = Data.getU8(C)) { Entry E; E.Kind = Kind; + E.Offset = C.tell() - 1; switch (Kind) { + case dwarf::DW_LLE_base_addressx: + E.Value0 = Data.getULEB128(C); + break; case dwarf::DW_LLE_startx_length: - E.Value0 = Data.getULEB128(Offset); + E.Value0 = Data.getULEB128(C); // Pre-DWARF 5 has different interpretation of the length field. We have // to support both pre- and standartized styles for the compatibility. if (Version < 5) - E.Value1 = Data.getU32(Offset); + E.Value1 = Data.getU32(C); else - E.Value1 = Data.getULEB128(Offset); + E.Value1 = Data.getULEB128(C); break; case dwarf::DW_LLE_start_length: - E.Value0 = Data.getAddress(Offset); - E.Value1 = Data.getULEB128(Offset); + E.Value0 = Data.getAddress(C); + E.Value1 = Data.getULEB128(C); break; case dwarf::DW_LLE_offset_pair: - E.Value0 = Data.getULEB128(Offset); - E.Value1 = Data.getULEB128(Offset); + E.Value0 = Data.getULEB128(C); + E.Value1 = Data.getULEB128(C); break; case dwarf::DW_LLE_base_address: - E.Value0 = Data.getAddress(Offset); + E.Value0 = Data.getAddress(C); break; default: - WithColor::error() << "dumping support for LLE of kind " << (int)Kind - << " not implemented\n"; - return None; + cantFail(C.takeError()); + return createStringError(errc::illegal_byte_sequence, + "LLE of kind %x not supported", (int)Kind); } - if (Kind != dwarf::DW_LLE_base_address) { - unsigned Bytes = - Version >= 5 ? Data.getULEB128(Offset) : Data.getU16(Offset); + if (Kind != dwarf::DW_LLE_base_address && + Kind != dwarf::DW_LLE_base_addressx) { + unsigned Bytes = Version >= 5 ? Data.getULEB128(C) : Data.getU16(C); // A single location description describing the location of the object... - StringRef str = Data.getData().substr(*Offset, Bytes); - *Offset += Bytes; - E.Loc.resize(str.size()); - llvm::copy(str, E.Loc.begin()); + Data.getU8(C, E.Loc, Bytes); } LL.Entries.push_back(std::move(E)); } + if (Error Err = C.takeError()) + return std::move(Err); + Entry E; + E.Kind = dwarf::DW_LLE_end_of_list; + E.Offset = C.tell() - 1; + LL.Entries.push_back(E); + *Offset = C.tell(); return LL; } -void DWARFDebugLoclists::parse(DataExtractor data, unsigned Version) { +void DWARFDebugLoclists::parse(DataExtractor data, uint64_t Offset, uint64_t EndOffset, uint16_t Version) { IsLittleEndian = data.isLittleEndian(); AddressSize = data.getAddressSize(); - uint32_t Offset = 0; - while (data.isValidOffset(Offset)) { + while (Offset < EndOffset) { if (auto LL = parseOneLocationList(data, &Offset, Version)) Locations.push_back(std::move(*LL)); - else + else { + logAllUnhandledErrors(LL.takeError(), WithColor::error()); return; + } } } @@ -219,51 +223,106 @@ DWARFDebugLoclists::getLocationListAtOffset(uint64_t Offset) const { return nullptr; } +void DWARFDebugLoclists::Entry::dump(raw_ostream &OS, uint64_t &BaseAddr, + bool IsLittleEndian, unsigned AddressSize, + const MCRegisterInfo *MRI, DWARFUnit *U, + DIDumpOptions DumpOpts, unsigned Indent, + size_t MaxEncodingStringLength) const { + if (DumpOpts.Verbose) { + OS << "\n"; + OS.indent(Indent); + auto EncodingString = dwarf::LocListEncodingString(Kind); + // Unsupported encodings should have been reported during parsing. + assert(!EncodingString.empty() && "Unknown loclist entry encoding"); + OS << format("%s%*c", EncodingString.data(), + MaxEncodingStringLength - EncodingString.size() + 1, '('); + switch (Kind) { + case dwarf::DW_LLE_startx_length: + case dwarf::DW_LLE_start_length: + case dwarf::DW_LLE_offset_pair: + OS << format("0x%*.*" PRIx64 ", 0x%*.*" PRIx64, AddressSize * 2, + AddressSize * 2, Value0, AddressSize * 2, AddressSize * 2, + Value1); + break; + case dwarf::DW_LLE_base_addressx: + case dwarf::DW_LLE_base_address: + OS << format("0x%*.*" PRIx64, AddressSize * 2, AddressSize * 2, + Value0); + break; + case dwarf::DW_LLE_end_of_list: + break; + } + OS << ')'; + } + auto PrintPrefix = [&] { + OS << "\n"; + OS.indent(Indent); + if (DumpOpts.Verbose) + OS << format("%*s", MaxEncodingStringLength, (const char *)"=> "); + }; + switch (Kind) { + case dwarf::DW_LLE_startx_length: + PrintPrefix(); + OS << "Addr idx " << Value0 << " (w/ length " << Value1 << "): "; + break; + case dwarf::DW_LLE_start_length: + PrintPrefix(); + DWARFAddressRange(Value0, Value0 + Value1) + .dump(OS, AddressSize, DumpOpts); + OS << ": "; + break; + case dwarf::DW_LLE_offset_pair: + PrintPrefix(); + DWARFAddressRange(BaseAddr + Value0, BaseAddr + Value1) + .dump(OS, AddressSize, DumpOpts); + OS << ": "; + break; + case dwarf::DW_LLE_base_addressx: + if (!DumpOpts.Verbose) + return; + break; + case dwarf::DW_LLE_end_of_list: + if (!DumpOpts.Verbose) + return; + break; + case dwarf::DW_LLE_base_address: + BaseAddr = Value0; + if (!DumpOpts.Verbose) + return; + break; + default: + llvm_unreachable("unreachable locations list kind"); + } + + dumpExpression(OS, Loc, IsLittleEndian, AddressSize, MRI, U); +} void DWARFDebugLoclists::LocationList::dump(raw_ostream &OS, uint64_t BaseAddr, bool IsLittleEndian, unsigned AddressSize, const MCRegisterInfo *MRI, DWARFUnit *U, + DIDumpOptions DumpOpts, unsigned Indent) const { - for (const Entry &E : Entries) { - switch (E.Kind) { - case dwarf::DW_LLE_startx_length: - OS << '\n'; - OS.indent(Indent); - OS << "Addr idx " << E.Value0 << " (w/ length " << E.Value1 << "): "; - break; - case dwarf::DW_LLE_start_length: - OS << '\n'; - OS.indent(Indent); - OS << format("[0x%*.*" PRIx64 ", 0x%*.*" PRIx64 "): ", AddressSize * 2, - AddressSize * 2, E.Value0, AddressSize * 2, AddressSize * 2, - E.Value0 + E.Value1); - break; - case dwarf::DW_LLE_offset_pair: - OS << '\n'; - OS.indent(Indent); - OS << format("[0x%*.*" PRIx64 ", 0x%*.*" PRIx64 "): ", AddressSize * 2, - AddressSize * 2, BaseAddr + E.Value0, AddressSize * 2, - AddressSize * 2, BaseAddr + E.Value1); - break; - case dwarf::DW_LLE_base_address: - BaseAddr = E.Value0; - break; - default: - llvm_unreachable("unreachable locations list kind"); - } + size_t MaxEncodingStringLength = 0; + if (DumpOpts.Verbose) + for (const auto &Entry : Entries) + MaxEncodingStringLength = + std::max(MaxEncodingStringLength, + dwarf::LocListEncodingString(Entry.Kind).size()); - dumpExpression(OS, E.Loc, IsLittleEndian, AddressSize, MRI, U); - } + for (const Entry &E : Entries) + E.dump(OS, BaseAddr, IsLittleEndian, AddressSize, MRI, U, DumpOpts, Indent, + MaxEncodingStringLength); } void DWARFDebugLoclists::dump(raw_ostream &OS, uint64_t BaseAddr, - const MCRegisterInfo *MRI, + const MCRegisterInfo *MRI, DIDumpOptions DumpOpts, Optional Offset) const { auto DumpLocationList = [&](const LocationList &L) { - OS << format("0x%8.8x: ", L.Offset); - L.dump(OS, BaseAddr, IsLittleEndian, AddressSize, MRI, nullptr, /*Indent=*/12); - OS << "\n\n"; + OS << format("0x%8.8" PRIx64 ": ", L.Offset); + L.dump(OS, BaseAddr, IsLittleEndian, AddressSize, MRI, nullptr, DumpOpts, + /*Indent=*/12); + OS << "\n"; }; if (Offset) { @@ -274,5 +333,7 @@ void DWARFDebugLoclists::dump(raw_ostream &OS, uint64_t BaseAddr, for (const LocationList &L : Locations) { DumpLocationList(L); + if (&L != &Locations.back()) + OS << '\n'; } } diff --git a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp index 3317a778cc7..9a0e770aed3 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp @@ -53,7 +53,7 @@ void DWARFDebugMacro::dump(raw_ostream &OS) const { } void DWARFDebugMacro::parse(DataExtractor data) { - uint32_t Offset = 0; + uint64_t Offset = 0; while (data.isValidOffset(Offset)) { // A macro list entry consists of: Entry E; diff --git a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp index 963ec64f5e9..ab71b239cb6 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp @@ -23,7 +23,7 @@ DWARFDebugPubTable::DWARFDebugPubTable(const DWARFObject &Obj, bool LittleEndian, bool GnuStyle) : GnuStyle(GnuStyle) { DWARFDataExtractor PubNames(Obj, Sec, LittleEndian, 0); - uint32_t Offset = 0; + uint64_t Offset = 0; while (PubNames.isValidOffset(Offset)) { Sets.push_back({}); Set &SetData = Sets.back(); @@ -49,13 +49,13 @@ void DWARFDebugPubTable::dump(raw_ostream &OS) const { for (const Set &S : Sets) { OS << "length = " << format("0x%08x", S.Length); OS << " version = " << format("0x%04x", S.Version); - OS << " unit_offset = " << format("0x%08x", S.Offset); + OS << " unit_offset = " << format("0x%08" PRIx64, S.Offset); OS << " unit_size = " << format("0x%08x", S.Size) << '\n'; OS << (GnuStyle ? "Offset Linkage Kind Name\n" : "Offset Name\n"); for (const Entry &E : S.Entries) { - OS << format("0x%8.8x ", E.SecOffset); + OS << format("0x%8.8" PRIx64 " ", E.SecOffset); if (GnuStyle) { StringRef EntryLinkage = GDBIndexEntryLinkageString(E.Descriptor.Linkage); diff --git a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp index d8df81a0aa0..1a1857d8cd7 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp @@ -17,17 +17,17 @@ using namespace llvm; void DWARFDebugRangeList::clear() { - Offset = -1U; + Offset = -1ULL; AddressSize = 0; Entries.clear(); } Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data, - uint32_t *offset_ptr) { + uint64_t *offset_ptr) { clear(); if (!data.isValidOffset(*offset_ptr)) return createStringError(errc::invalid_argument, - "invalid range list offset 0x%" PRIx32, *offset_ptr); + "invalid range list offset 0x%" PRIx64, *offset_ptr); AddressSize = data.getAddressSize(); if (AddressSize != 4 && AddressSize != 8) @@ -38,7 +38,7 @@ Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data, RangeListEntry Entry; Entry.SectionIndex = -1ULL; - uint32_t prev_offset = *offset_ptr; + uint64_t prev_offset = *offset_ptr; Entry.StartAddress = data.getRelocatedAddress(offset_ptr); Entry.EndAddress = data.getRelocatedAddress(offset_ptr, &Entry.SectionIndex); @@ -47,7 +47,7 @@ Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data, if (*offset_ptr != prev_offset + 2 * AddressSize) { clear(); return createStringError(errc::invalid_argument, - "invalid range list entry at offset 0x%" PRIx32, + "invalid range list entry at offset 0x%" PRIx64, prev_offset); } if (Entry.isEndOfListEntry()) @@ -59,12 +59,12 @@ Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data, void DWARFDebugRangeList::dump(raw_ostream &OS) const { for (const RangeListEntry &RLE : Entries) { - const char *format_str = (AddressSize == 4 - ? "%08x %08" PRIx64 " %08" PRIx64 "\n" - : "%08x %016" PRIx64 " %016" PRIx64 "\n"); + const char *format_str = + (AddressSize == 4 ? "%08" PRIx64 " %08" PRIx64 " %08" PRIx64 "\n" + : "%08" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n"); OS << format(format_str, Offset, RLE.StartAddress, RLE.EndAddress); } - OS << format("%08x \n", Offset); + OS << format("%08" PRIx64 " \n", Offset); } DWARFAddressRangesVector DWARFDebugRangeList::getAbsoluteRanges( diff --git a/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp b/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp index 5ac3326f668..f6785b89e86 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp @@ -16,8 +16,8 @@ using namespace llvm; -Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End, - uint32_t *OffsetPtr) { +Error RangeListEntry::extract(DWARFDataExtractor Data, uint64_t End, + uint64_t *OffsetPtr) { Offset = *OffsetPtr; SectionIndex = -1ULL; // The caller should guarantee that we have at least 1 byte available, so @@ -32,41 +32,41 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End, break; // TODO: Support other encodings. case dwarf::DW_RLE_base_addressx: { - uint32_t PreviousOffset = *OffsetPtr - 1; + uint64_t PreviousOffset = *OffsetPtr - 1; Value0 = Data.getULEB128(OffsetPtr); if (End < *OffsetPtr) return createStringError( errc::invalid_argument, "read past end of table when reading " - "DW_RLE_base_addressx encoding at offset 0x%" PRIx32, + "DW_RLE_base_addressx encoding at offset 0x%" PRIx64, PreviousOffset); break; } case dwarf::DW_RLE_startx_endx: return createStringError(errc::not_supported, "unsupported rnglists encoding DW_RLE_startx_endx at " - "offset 0x%" PRIx32, + "offset 0x%" PRIx64, *OffsetPtr - 1); case dwarf::DW_RLE_startx_length: { - uint32_t PreviousOffset = *OffsetPtr - 1; + uint64_t PreviousOffset = *OffsetPtr - 1; Value0 = Data.getULEB128(OffsetPtr); Value1 = Data.getULEB128(OffsetPtr); if (End < *OffsetPtr) return createStringError( errc::invalid_argument, "read past end of table when reading " - "DW_RLE_startx_length encoding at offset 0x%" PRIx32, + "DW_RLE_startx_length encoding at offset 0x%" PRIx64, PreviousOffset); break; } case dwarf::DW_RLE_offset_pair: { - uint32_t PreviousOffset = *OffsetPtr - 1; + uint64_t PreviousOffset = *OffsetPtr - 1; Value0 = Data.getULEB128(OffsetPtr); Value1 = Data.getULEB128(OffsetPtr); if (End < *OffsetPtr) return createStringError(errc::invalid_argument, "read past end of table when reading " - "DW_RLE_offset_pair encoding at offset 0x%" PRIx32, + "DW_RLE_offset_pair encoding at offset 0x%" PRIx64, PreviousOffset); break; } @@ -74,7 +74,7 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End, if ((End - *OffsetPtr) < Data.getAddressSize()) return createStringError(errc::invalid_argument, "insufficient space remaining in table for " - "DW_RLE_base_address encoding at offset 0x%" PRIx32, + "DW_RLE_base_address encoding at offset 0x%" PRIx64, *OffsetPtr - 1); Value0 = Data.getRelocatedAddress(OffsetPtr, &SectionIndex); break; @@ -84,27 +84,27 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End, return createStringError(errc::invalid_argument, "insufficient space remaining in table for " "DW_RLE_start_end encoding " - "at offset 0x%" PRIx32, + "at offset 0x%" PRIx64, *OffsetPtr - 1); Value0 = Data.getRelocatedAddress(OffsetPtr, &SectionIndex); Value1 = Data.getRelocatedAddress(OffsetPtr); break; } case dwarf::DW_RLE_start_length: { - uint32_t PreviousOffset = *OffsetPtr - 1; + uint64_t PreviousOffset = *OffsetPtr - 1; Value0 = Data.getRelocatedAddress(OffsetPtr, &SectionIndex); Value1 = Data.getULEB128(OffsetPtr); if (End < *OffsetPtr) return createStringError(errc::invalid_argument, "read past end of table when reading " - "DW_RLE_start_length encoding at offset 0x%" PRIx32, + "DW_RLE_start_length encoding at offset 0x%" PRIx64, PreviousOffset); break; } default: return createStringError(errc::not_supported, "unknown rnglists encoding 0x%" PRIx32 - " at offset 0x%" PRIx32, + " at offset 0x%" PRIx64, uint32_t(Encoding), *OffsetPtr - 1); } @@ -187,7 +187,7 @@ void RangeListEntry::dump( if (DumpOpts.Verbose) { // Print the section offset in verbose mode. - OS << format("0x%8.8" PRIx32 ":", Offset); + OS << format("0x%8.8" PRIx64 ":", Offset); auto EncodingString = dwarf::RangeListEncodingString(EntryKind); // Unsupported encodings should have been reported during parsing. assert(!EncodingString.empty() && "Unknown range entry encoding"); diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp index d638dc4239f..cec194e8b6b 100644 --- a/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -21,6 +21,7 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Format.h" +#include "llvm/Support/FormatAdapters.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/WithColor.h" @@ -91,21 +92,29 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue, } FormValue.dump(OS, DumpOpts); + const auto &DumpLL = [&](auto ExpectedLL) { + if (ExpectedLL) { + uint64_t BaseAddr = 0; + if (Optional BA = U->getBaseAddress()) + BaseAddr = BA->Address; + auto LLDumpOpts = DumpOpts; + LLDumpOpts.Verbose = false; + ExpectedLL->dump(OS, BaseAddr, Ctx.isLittleEndian(), Obj.getAddressSize(), + MRI, U, LLDumpOpts, Indent); + } else { + OS << '\n'; + OS.indent(Indent); + OS << formatv("error extracting location list: {0}", + fmt_consume(ExpectedLL.takeError())); + } + }; if (FormValue.isFormClass(DWARFFormValue::FC_SectionOffset)) { - uint32_t Offset = *FormValue.getAsSectionOffset(); + uint64_t Offset = *FormValue.getAsSectionOffset(); if (!U->isDWOUnit() && !U->getLocSection()->Data.empty()) { DWARFDebugLoc DebugLoc; DWARFDataExtractor Data(Obj, *U->getLocSection(), Ctx.isLittleEndian(), Obj.getAddressSize()); - auto LL = DebugLoc.parseOneLocationList(Data, &Offset); - if (LL) { - uint64_t BaseAddr = 0; - if (Optional BA = U->getBaseAddress()) - BaseAddr = BA->Address; - LL->dump(OS, Ctx.isLittleEndian(), Obj.getAddressSize(), MRI, U, - BaseAddr, Indent); - } else - OS << "error extracting location list."; + DumpLL(DebugLoc.parseOneLocationList(Data, &Offset)); return; } @@ -121,18 +130,8 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue, // Modern locations list (.debug_loclists) are used starting from v5. // Ideally we should take the version from the .debug_loclists section // header, but using CU's version for simplicity. - auto LL = DWARFDebugLoclists::parseOneLocationList( - Data, &Offset, UseLocLists ? U->getVersion() : 4); - - uint64_t BaseAddr = 0; - if (Optional BA = U->getBaseAddress()) - BaseAddr = BA->Address; - - if (LL) - LL->dump(OS, BaseAddr, Ctx.isLittleEndian(), Obj.getAddressSize(), MRI, - U, Indent); - else - OS << "error extracting location list."; + DumpLL(DWARFDebugLoclists::parseOneLocationList( + Data, &Offset, UseLocLists ? U->getVersion() : 4)); } } } @@ -264,7 +263,7 @@ static void dumpTypeName(raw_ostream &OS, const DWARFDie &D) { } static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, - uint32_t *OffsetPtr, dwarf::Attribute Attr, + uint64_t *OffsetPtr, dwarf::Attribute Attr, dwarf::Form Form, unsigned Indent, DIDumpOptions DumpOpts) { if (!Die.isValid()) @@ -568,8 +567,8 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent, if (!isValid()) return; DWARFDataExtractor debug_info_data = U->getDebugInfoExtractor(); - const uint32_t Offset = getOffset(); - uint32_t offset = Offset; + const uint64_t Offset = getOffset(); + uint64_t offset = Offset; if (DumpOpts.ShowParents) { DIDumpOptions ParentDumpOpts = DumpOpts; ParentDumpOpts.ShowParents = false; @@ -581,7 +580,7 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent, uint32_t abbrCode = debug_info_data.getULEB128(&offset); if (DumpOpts.ShowAddresses) WithColor(OS, HighlightColor::Address).get() - << format("\n0x%8.8x: ", Offset); + << format("\n0x%8.8" PRIx64 ": ", Offset); if (abbrCode) { auto AbbrevDecl = getAbbreviationDeclarationPtr(); @@ -685,7 +684,7 @@ void DWARFDie::attribute_iterator::updateForIndex( AttrValue.Attr = AbbrDecl.getAttrByIndex(Index); // Add the previous byte size of any previous attribute value. AttrValue.Offset += AttrValue.ByteSize; - uint32_t ParseOffset = AttrValue.Offset; + uint64_t ParseOffset = AttrValue.Offset; auto U = Die.getDwarfUnit(); assert(U && "Die must have valid DWARF unit"); AttrValue.Value = DWARFFormValue::createFromUnit( @@ -733,6 +732,7 @@ bool DWARFAttribute::mayHaveLocationDescription(dwarf::Attribute Attr) { case DW_AT_call_data_value: // Extensions. case DW_AT_GNU_call_site_value: + case DW_AT_GNU_call_site_target: return true; default: return false; diff --git a/lib/DebugInfo/DWARF/DWARFExpression.cpp b/lib/DebugInfo/DWARF/DWARFExpression.cpp index 470d4b5364b..5009b1b7b41 100644 --- a/lib/DebugInfo/DWARF/DWARFExpression.cpp +++ b/lib/DebugInfo/DWARF/DWARFExpression.cpp @@ -119,7 +119,7 @@ static uint8_t getRefAddrSize(uint8_t AddrSize, uint16_t Version) { } bool DWARFExpression::Operation::extract(DataExtractor Data, uint16_t Version, - uint8_t AddressSize, uint32_t Offset) { + uint8_t AddressSize, uint64_t Offset) { Opcode = Data.getU8(&Offset); Desc = getOpDesc(Opcode); @@ -218,9 +218,8 @@ static bool prettyPrintRegisterOp(raw_ostream &OS, uint8_t Opcode, else DwarfRegNum = Opcode - DW_OP_reg0; - int LLVMRegNum = MRI->getLLVMRegNum(DwarfRegNum, isEH); - if (LLVMRegNum >= 0) { - if (const char *RegName = MRI->getName(LLVMRegNum)) { + if (Optional LLVMRegNum = MRI->getLLVMRegNum(DwarfRegNum, isEH)) { + if (const char *RegName = MRI->getName(*LLVMRegNum)) { if ((Opcode >= DW_OP_breg0 && Opcode <= DW_OP_breg31) || Opcode == DW_OP_bregx) OS << format(" %s%+" PRId64, RegName, Operands[OpNum]); @@ -263,7 +262,7 @@ bool DWARFExpression::Operation::print(raw_ostream &OS, if (Size == Operation::BaseTypeRef && U) { auto Die = U->getDIEForOffset(U->getOffset() + Operands[Operand]); if (Die && Die.getTag() == dwarf::DW_TAG_base_type) { - OS << format(" (0x%08x)", U->getOffset() + Operands[Operand]); + OS << format(" (0x%08" PRIx64 ")", U->getOffset() + Operands[Operand]); if (auto Name = Die.find(dwarf::DW_AT_name)) OS << " \"" << Name->getAsCString() << "\""; } else { @@ -271,7 +270,7 @@ bool DWARFExpression::Operation::print(raw_ostream &OS, Operands[Operand]); } } else if (Size == Operation::SizeBlock) { - uint32_t Offset = Operands[Operand]; + uint64_t Offset = Operands[Operand]; for (unsigned i = 0; i < Operands[Operand - 1]; ++i) OS << format(" 0x%02x", Expr->Data.getU8(&Offset)); } else { @@ -290,7 +289,7 @@ void DWARFExpression::print(raw_ostream &OS, const MCRegisterInfo *RegInfo, uint32_t EntryValExprSize = 0; for (auto &Op : *this) { if (!Op.print(OS, this, RegInfo, U, IsEH)) { - uint32_t FailOffset = Op.getEndOffset(); + uint64_t FailOffset = Op.getEndOffset(); while (FailOffset < Data.getData().size()) OS << format(" %02x", Data.getU8(&FailOffset)); return; diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp index 290d35511cd..26090638b34 100644 --- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -98,7 +98,7 @@ DWARFFormValue DWARFFormValue::createFromBlockValue(dwarf::Form F, } DWARFFormValue DWARFFormValue::createFromUnit(dwarf::Form F, const DWARFUnit *U, - uint32_t *OffsetPtr) { + uint64_t *OffsetPtr) { DWARFFormValue FormValue(F); FormValue.extractValue(U->getDebugInfoExtractor(), OffsetPtr, U->getFormParams(), U); @@ -106,7 +106,7 @@ DWARFFormValue DWARFFormValue::createFromUnit(dwarf::Form F, const DWARFUnit *U, } bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData, - uint32_t *OffsetPtr, + uint64_t *OffsetPtr, const dwarf::FormParams Params) { bool Indirect = false; do { @@ -234,7 +234,7 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const { } bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data, - uint32_t *OffsetPtr, dwarf::FormParams FP, + uint64_t *OffsetPtr, dwarf::FormParams FP, const DWARFContext *Ctx, const DWARFUnit *CU) { if (!Ctx && CU) @@ -590,7 +590,7 @@ Optional DWARFFormValue::getAsCString() const { // FIXME: Add support for DW_FORM_GNU_strp_alt if (Form == DW_FORM_GNU_strp_alt || C == nullptr) return None; - uint32_t Offset = Value.uval; + uint64_t Offset = Value.uval; if (Form == DW_FORM_line_strp) { // .debug_line_str is tracked in the Context. if (const char *Str = C->getLineStringExtractor().getCStr(&Offset)) @@ -624,6 +624,7 @@ Optional DWARFFormValue::getAsAddress() const { return SA->Address; return None; } + Optional DWARFFormValue::getAsSectionedAddress() const { if (!isFormClass(FC_Address)) diff --git a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp index f5f97557808..252b58e5a59 100644 --- a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp +++ b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp @@ -112,7 +112,7 @@ void DWARFGdbIndex::dump(raw_ostream &OS) { } bool DWARFGdbIndex::parseImpl(DataExtractor Data) { - uint32_t Offset = 0; + uint64_t Offset = 0; // Only version 7 is supported at this moment. Version = Data.getU32(&Offset); diff --git a/lib/DebugInfo/DWARF/DWARFListTable.cpp b/lib/DebugInfo/DWARF/DWARFListTable.cpp index e38e706227d..269ea9f79a6 100644 --- a/lib/DebugInfo/DWARF/DWARFListTable.cpp +++ b/lib/DebugInfo/DWARF/DWARFListTable.cpp @@ -16,33 +16,42 @@ using namespace llvm; Error DWARFListTableHeader::extract(DWARFDataExtractor Data, - uint32_t *OffsetPtr) { + uint64_t *OffsetPtr) { HeaderOffset = *OffsetPtr; // Read and verify the length field. if (!Data.isValidOffsetForDataOfSize(*OffsetPtr, sizeof(uint32_t))) return createStringError(errc::invalid_argument, "section is not large enough to contain a " - "%s table length at offset 0x%" PRIx32, + "%s table length at offset 0x%" PRIx64, SectionName.data(), *OffsetPtr); - // TODO: Add support for DWARF64. - HeaderData.Length = Data.getRelocatedValue(4, OffsetPtr); - if (HeaderData.Length == 0xffffffffu) - return createStringError(errc::not_supported, - "DWARF64 is not supported in %s at offset 0x%" PRIx32, - SectionName.data(), HeaderOffset); Format = dwarf::DwarfFormat::DWARF32; - if (HeaderData.Length + sizeof(uint32_t) < sizeof(Header)) + uint8_t OffsetByteSize = 4; + HeaderData.Length = Data.getRelocatedValue(4, OffsetPtr); + if (HeaderData.Length == dwarf::DW_LENGTH_DWARF64) { + Format = dwarf::DwarfFormat::DWARF64; + OffsetByteSize = 8; + HeaderData.Length = Data.getU64(OffsetPtr); + } else if (HeaderData.Length >= dwarf::DW_LENGTH_lo_reserved) { return createStringError(errc::invalid_argument, - "%s table at offset 0x%" PRIx32 - " has too small length (0x%" PRIx32 + "%s table at offset 0x%" PRIx64 + " has unsupported reserved unit length of value 0x%8.8" PRIx64, + SectionName.data(), HeaderOffset, HeaderData.Length); + } + uint64_t FullLength = + HeaderData.Length + dwarf::getUnitLengthFieldByteSize(Format); + assert(FullLength == length()); + if (FullLength < getHeaderSize(Format)) + return createStringError(errc::invalid_argument, + "%s table at offset 0x%" PRIx64 + " has too small length (0x%" PRIx64 ") to contain a complete header", - SectionName.data(), HeaderOffset, length()); - uint32_t End = HeaderOffset + length(); - if (!Data.isValidOffsetForDataOfSize(HeaderOffset, End - HeaderOffset)) + SectionName.data(), HeaderOffset, FullLength); + uint64_t End = HeaderOffset + FullLength; + if (!Data.isValidOffsetForDataOfSize(HeaderOffset, FullLength)) return createStringError(errc::invalid_argument, "section is not large enough to contain a %s table " - "of length 0x%" PRIx32 " at offset 0x%" PRIx32, - SectionName.data(), length(), HeaderOffset); + "of length 0x%" PRIx64 " at offset 0x%" PRIx64, + SectionName.data(), FullLength, HeaderOffset); HeaderData.Version = Data.getU16(OffsetPtr); HeaderData.AddrSize = Data.getU8(OffsetPtr); @@ -53,35 +62,35 @@ Error DWARFListTableHeader::extract(DWARFDataExtractor Data, if (HeaderData.Version != 5) return createStringError(errc::invalid_argument, "unrecognised %s table version %" PRIu16 - " in table at offset 0x%" PRIx32, + " in table at offset 0x%" PRIx64, SectionName.data(), HeaderData.Version, HeaderOffset); if (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8) return createStringError(errc::not_supported, - "%s table at offset 0x%" PRIx32 + "%s table at offset 0x%" PRIx64 " has unsupported address size %" PRIu8, SectionName.data(), HeaderOffset, HeaderData.AddrSize); if (HeaderData.SegSize != 0) return createStringError(errc::not_supported, - "%s table at offset 0x%" PRIx32 + "%s table at offset 0x%" PRIx64 " has unsupported segment selector size %" PRIu8, SectionName.data(), HeaderOffset, HeaderData.SegSize); - if (End < HeaderOffset + sizeof(HeaderData) + - HeaderData.OffsetEntryCount * sizeof(uint32_t)) + if (End < HeaderOffset + getHeaderSize(Format) + + HeaderData.OffsetEntryCount * OffsetByteSize) return createStringError(errc::invalid_argument, - "%s table at offset 0x%" PRIx32 " has more offset entries (%" PRIu32 + "%s table at offset 0x%" PRIx64 " has more offset entries (%" PRIu32 ") than there is space for", SectionName.data(), HeaderOffset, HeaderData.OffsetEntryCount); Data.setAddressSize(HeaderData.AddrSize); for (uint32_t I = 0; I < HeaderData.OffsetEntryCount; ++I) - Offsets.push_back(Data.getRelocatedValue(4, OffsetPtr)); + Offsets.push_back(Data.getRelocatedValue(OffsetByteSize, OffsetPtr)); return Error::success(); } void DWARFListTableHeader::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { if (DumpOpts.Verbose) - OS << format("0x%8.8" PRIx32 ": ", HeaderOffset); + OS << format("0x%8.8" PRIx64 ": ", HeaderOffset); OS << format( - "%s list header: length = 0x%8.8" PRIx32 ", version = 0x%4.4" PRIx16 ", " + "%s list header: length = 0x%8.8" PRIx64 ", version = 0x%4.4" PRIx16 ", " "addr_size = 0x%2.2" PRIx8 ", seg_size = 0x%2.2" PRIx8 ", offset_entry_count = " "0x%8.8" PRIx32 "\n", @@ -91,18 +100,17 @@ void DWARFListTableHeader::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { if (HeaderData.OffsetEntryCount > 0) { OS << "offsets: ["; for (const auto &Off : Offsets) { - OS << format("\n0x%8.8" PRIx32, Off); + OS << format("\n0x%8.8" PRIx64, Off); if (DumpOpts.Verbose) - OS << format(" => 0x%8.8" PRIx32, - Off + HeaderOffset + sizeof(HeaderData)); + OS << format(" => 0x%8.8" PRIx64, + Off + HeaderOffset + getHeaderSize(Format)); } OS << "\n]\n"; } } -uint32_t DWARFListTableHeader::length() const { +uint64_t DWARFListTableHeader::length() const { if (HeaderData.Length == 0) return 0; - // TODO: DWARF64 support. - return HeaderData.Length + sizeof(uint32_t); + return HeaderData.Length + dwarf::getUnitLengthFieldByteSize(Format); } diff --git a/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp b/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp index 844920ba5b1..bb81090ba25 100644 --- a/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp +++ b/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp @@ -24,21 +24,23 @@ void DWARFTypeUnit::dump(raw_ostream &OS, DIDumpOptions DumpOpts) { if (DumpOpts.SummarizeTypes) { OS << "name = '" << Name << "'" << " type_signature = " << format("0x%016" PRIx64, getTypeHash()) - << " length = " << format("0x%08x", getLength()) << '\n'; + << " length = " << format("0x%08" PRIx64, getLength()) << '\n'; return; } - OS << format("0x%08x", getOffset()) << ": Type Unit:" - << " length = " << format("0x%08x", getLength()) + OS << format("0x%08" PRIx64, getOffset()) << ": Type Unit:" + << " length = " << format("0x%08" PRIx64, getLength()) << " version = " << format("0x%04x", getVersion()); if (getVersion() >= 5) OS << " unit_type = " << dwarf::UnitTypeString(getUnitType()); - OS << " abbr_offset = " << format("0x%04x", getAbbreviations()->getOffset()) + OS << " abbr_offset = " + << format("0x%04" PRIx64, getAbbreviations()->getOffset()) << " addr_size = " << format("0x%02x", getAddressByteSize()) << " name = '" << Name << "'" << " type_signature = " << format("0x%016" PRIx64, getTypeHash()) - << " type_offset = " << format("0x%04x", getTypeOffset()) - << " (next unit at " << format("0x%08x", getNextUnitOffset()) << ")\n"; + << " type_offset = " << format("0x%04" PRIx64, getTypeOffset()) + << " (next unit at " << format("0x%08" PRIx64, getNextUnitOffset()) + << ")\n"; if (DWARFDie TU = getUnitDIE(false)) TU.dump(OS, 0, DumpOpts); diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp index b74acf60c74..a56402a707a 100644 --- a/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -37,9 +37,9 @@ void DWARFUnitVector::addUnitsForSection(DWARFContext &C, const DWARFSection &Section, DWARFSectionKind SectionKind) { const DWARFObject &D = C.getDWARFObj(); - addUnitsImpl(C, D, Section, C.getDebugAbbrev(), &D.getRangeSection(), - &D.getLocSection(), D.getStringSection(), - D.getStringOffsetSection(), &D.getAddrSection(), + addUnitsImpl(C, D, Section, C.getDebugAbbrev(), &D.getRangesSection(), + &D.getLocSection(), D.getStrSection(), + D.getStrOffsetsSection(), &D.getAddrSection(), D.getLineSection(), D.isLittleEndian(), false, false, SectionKind); } @@ -49,9 +49,9 @@ void DWARFUnitVector::addUnitsForDWOSection(DWARFContext &C, DWARFSectionKind SectionKind, bool Lazy) { const DWARFObject &D = C.getDWARFObj(); - addUnitsImpl(C, D, DWOSection, C.getDebugAbbrevDWO(), &D.getRangeDWOSection(), - &D.getLocDWOSection(), D.getStringDWOSection(), - D.getStringOffsetDWOSection(), &D.getAddrSection(), + addUnitsImpl(C, D, DWOSection, C.getDebugAbbrevDWO(), &D.getRangesDWOSection(), + &D.getLocDWOSection(), D.getStrDWOSection(), + D.getStrOffsetsDWOSection(), &D.getAddrSection(), D.getLineDWOSection(), C.isLittleEndian(), true, Lazy, SectionKind); } @@ -66,7 +66,7 @@ void DWARFUnitVector::addUnitsImpl( // Lazy initialization of Parser, now that we have all section info. if (!Parser) { Parser = [=, &Context, &Obj, &Section, &SOS, - &LS](uint32_t Offset, DWARFSectionKind SectionKind, + &LS](uint64_t Offset, DWARFSectionKind SectionKind, const DWARFSection *CurSection, const DWARFUnitIndex::Entry *IndexEntry) -> std::unique_ptr { @@ -83,11 +83,11 @@ void DWARFUnitVector::addUnitsImpl( return nullptr; std::unique_ptr U; if (Header.isTypeUnit()) - U = llvm::make_unique(Context, InfoSection, Header, DA, + U = std::make_unique(Context, InfoSection, Header, DA, RS, LocSection, SS, SOS, AOS, LS, LE, IsDWO, *this); else - U = llvm::make_unique(Context, InfoSection, Header, + U = std::make_unique(Context, InfoSection, Header, DA, RS, LocSection, SS, SOS, AOS, LS, LE, IsDWO, *this); return U; @@ -101,7 +101,7 @@ void DWARFUnitVector::addUnitsImpl( // within a section, although not necessarily within the object file, // even if we do lazy parsing. auto I = this->begin(); - uint32_t Offset = 0; + uint64_t Offset = 0; while (Data.isValidOffset(Offset)) { if (I != this->end() && (&(*I)->getInfoSection() != &Section || (*I)->getOffset() == Offset)) { @@ -126,11 +126,11 @@ DWARFUnit *DWARFUnitVector::addUnit(std::unique_ptr Unit) { return this->insert(I, std::move(Unit))->get(); } -DWARFUnit *DWARFUnitVector::getUnitForOffset(uint32_t Offset) const { +DWARFUnit *DWARFUnitVector::getUnitForOffset(uint64_t Offset) const { auto end = begin() + getNumInfoUnits(); auto *CU = std::upper_bound(begin(), end, Offset, - [](uint32_t LHS, const std::unique_ptr &RHS) { + [](uint64_t LHS, const std::unique_ptr &RHS) { return LHS < RHS->getNextUnitOffset(); }); if (CU != end && (*CU)->getOffset() <= Offset) @@ -149,7 +149,7 @@ DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) { auto *CU = std::upper_bound(begin(), end, CUOff->Offset, - [](uint32_t LHS, const std::unique_ptr &RHS) { + [](uint64_t LHS, const std::unique_ptr &RHS) { return LHS < RHS->getNextUnitOffset(); }); if (CU != end && (*CU)->getOffset() <= Offset) @@ -209,7 +209,7 @@ DWARFUnit::getAddrOffsetSectionItem(uint32_t Index) const { if (I != R.end() && std::next(I) == R.end()) return (*I)->getAddrOffsetSectionItem(Index); } - uint32_t Offset = AddrOffsetSectionBase + Index * getAddressByteSize(); + uint64_t Offset = AddrOffsetSectionBase + Index * getAddressByteSize(); if (AddrOffsetSection->Data.size() < Offset + getAddressByteSize()) return None; DWARFDataExtractor DA(Context.getDWARFObj(), *AddrOffsetSection, @@ -223,7 +223,7 @@ Optional DWARFUnit::getStringOffsetSectionItem(uint32_t Index) const { if (!StringOffsetsTableContribution) return None; unsigned ItemSize = getDwarfStringOffsetsByteSize(); - uint32_t Offset = getStringOffsetsBase() + Index * ItemSize; + uint64_t Offset = getStringOffsetsBase() + Index * ItemSize; if (StringOffsetSection.Data.size() < Offset + ItemSize) return None; DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection, @@ -233,7 +233,7 @@ Optional DWARFUnit::getStringOffsetSectionItem(uint32_t Index) const { bool DWARFUnitHeader::extract(DWARFContext &Context, const DWARFDataExtractor &debug_info, - uint32_t *offset_ptr, + uint64_t *offset_ptr, DWARFSectionKind SectionKind, const DWARFUnitIndex *Index, const DWARFUnitIndex::Entry *Entry) { @@ -243,11 +243,9 @@ bool DWARFUnitHeader::extract(DWARFContext &Context, IndexEntry = Index->getFromOffset(*offset_ptr); Length = debug_info.getRelocatedValue(4, offset_ptr); FormParams.Format = DWARF32; - unsigned SizeOfLength = 4; - if (Length == 0xffffffff) { + if (Length == dwarf::DW_LENGTH_DWARF64) { Length = debug_info.getU64(offset_ptr); FormParams.Format = DWARF64; - SizeOfLength = 8; } FormParams.Version = debug_info.getU16(offset_ptr); if (FormParams.Version >= 5) { @@ -277,7 +275,8 @@ bool DWARFUnitHeader::extract(DWARFContext &Context, } if (isTypeUnit()) { TypeHash = debug_info.getU64(offset_ptr); - TypeOffset = debug_info.getU32(offset_ptr); + TypeOffset = + debug_info.getUnsigned(offset_ptr, FormParams.getDwarfOffsetByteSize()); } else if (UnitType == DW_UT_split_compile || UnitType == DW_UT_skeleton) DWOId = debug_info.getU64(offset_ptr); @@ -290,7 +289,8 @@ bool DWARFUnitHeader::extract(DWARFContext &Context, bool TypeOffsetOK = !isTypeUnit() ? true - : TypeOffset >= Size && TypeOffset < getLength() + SizeOfLength; + : TypeOffset >= Size && + TypeOffset < getLength() + getUnitLengthFieldByteSize(); bool LengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1); bool VersionOK = DWARFContext::isSupportedVersion(getVersion()); bool AddrSizeOK = getAddressByteSize() == 4 || getAddressByteSize() == 8; @@ -306,16 +306,18 @@ bool DWARFUnitHeader::extract(DWARFContext &Context, // Parse the rangelist table header, including the optional array of offsets // following it (DWARF v5 and later). static Expected -parseRngListTableHeader(DWARFDataExtractor &DA, uint32_t Offset) { - // TODO: Support DWARF64 +parseRngListTableHeader(DWARFDataExtractor &DA, uint64_t Offset, + DwarfFormat Format) { // We are expected to be called with Offset 0 or pointing just past the table - // header, which is 12 bytes long for DWARF32. + // header. Correct Offset in the latter case so that it points to the start + // of the header. if (Offset > 0) { - if (Offset < 12U) + uint64_t HeaderSize = DWARFListTableHeader::getHeaderSize(Format); + if (Offset < HeaderSize) return createStringError(errc::invalid_argument, "Did not detect a valid" - " range list table with base = 0x%" PRIu32, + " range list table with base = 0x%" PRIx64 "\n", Offset); - Offset -= 12U; + Offset -= HeaderSize; } llvm::DWARFDebugRnglistTable Table; if (Error E = Table.extractHeaderAndOffsets(DA, &Offset)) @@ -323,13 +325,13 @@ parseRngListTableHeader(DWARFDataExtractor &DA, uint32_t Offset) { return Table; } -Error DWARFUnit::extractRangeList(uint32_t RangeListOffset, +Error DWARFUnit::extractRangeList(uint64_t RangeListOffset, DWARFDebugRangeList &RangeList) const { // Require that compile unit is extracted. assert(!DieArray.empty()); DWARFDataExtractor RangesData(Context.getDWARFObj(), *RangeSection, isLittleEndian, getAddressByteSize()); - uint32_t ActualRangeListOffset = RangeSectionBase + RangeListOffset; + uint64_t ActualRangeListOffset = RangeSectionBase + RangeListOffset; return RangeList.extract(RangesData, &ActualRangeListOffset); } @@ -354,8 +356,8 @@ void DWARFUnit::extractDIEsToVector( // Set the offset to that of the first DIE and calculate the start of the // next compilation unit header. - uint32_t DIEOffset = getOffset() + getHeaderSize(); - uint32_t NextCUOffset = getNextUnitOffset(); + uint64_t DIEOffset = getOffset() + getHeaderSize(); + uint64_t NextCUOffset = getNextUnitOffset(); DWARFDebugInfoEntry DIE; DWARFDataExtractor DebugInfoData = getDebugInfoExtractor(); uint32_t Depth = 0; @@ -396,90 +398,98 @@ void DWARFUnit::extractDIEsToVector( // unit header). if (DIEOffset > NextCUOffset) WithColor::warning() << format("DWARF compile unit extends beyond its " - "bounds cu 0x%8.8x at 0x%8.8x\n", + "bounds cu 0x%8.8" PRIx64 " " + "at 0x%8.8" PRIx64 "\n", getOffset(), DIEOffset); } -size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { +void DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { + if (Error e = tryExtractDIEsIfNeeded(CUDieOnly)) + WithColor::error() << toString(std::move(e)); +} + +Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) { if ((CUDieOnly && !DieArray.empty()) || DieArray.size() > 1) - return 0; // Already parsed. + return Error::success(); // Already parsed. bool HasCUDie = !DieArray.empty(); extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray); if (DieArray.empty()) - return 0; + return Error::success(); // If CU DIE was just parsed, copy several attribute values from it. - if (!HasCUDie) { - DWARFDie UnitDie = getUnitDIE(); - if (Optional DWOId = toUnsigned(UnitDie.find(DW_AT_GNU_dwo_id))) - Header.setDWOId(*DWOId); - if (!IsDWO) { - assert(AddrOffsetSectionBase == 0); - assert(RangeSectionBase == 0); - AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_addr_base), 0); - if (!AddrOffsetSectionBase) - AddrOffsetSectionBase = - toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base), 0); - RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0); + if (HasCUDie) + return Error::success(); + + DWARFDie UnitDie(this, &DieArray[0]); + if (Optional DWOId = toUnsigned(UnitDie.find(DW_AT_GNU_dwo_id))) + Header.setDWOId(*DWOId); + if (!IsDWO) { + assert(AddrOffsetSectionBase == 0); + assert(RangeSectionBase == 0); + AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_addr_base), 0); + if (!AddrOffsetSectionBase) + AddrOffsetSectionBase = + toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base), 0); + RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0); + } + + // In general, in DWARF v5 and beyond we derive the start of the unit's + // contribution to the string offsets table from the unit DIE's + // DW_AT_str_offsets_base attribute. Split DWARF units do not use this + // attribute, so we assume that there is a contribution to the string + // offsets table starting at offset 0 of the debug_str_offsets.dwo section. + // In both cases we need to determine the format of the contribution, + // which may differ from the unit's format. + DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection, + isLittleEndian, 0); + if (IsDWO || getVersion() >= 5) { + auto StringOffsetOrError = + IsDWO ? determineStringOffsetsTableContributionDWO(DA) + : determineStringOffsetsTableContribution(DA); + if (!StringOffsetOrError) + return createStringError(errc::invalid_argument, + "invalid reference to or invalid content in " + ".debug_str_offsets[.dwo]: " + + toString(StringOffsetOrError.takeError())); + + StringOffsetsTableContribution = *StringOffsetOrError; + } + + // DWARF v5 uses the .debug_rnglists and .debug_rnglists.dwo sections to + // describe address ranges. + if (getVersion() >= 5) { + if (IsDWO) + setRangesSection(&Context.getDWARFObj().getRnglistsDWOSection(), 0); + else + setRangesSection(&Context.getDWARFObj().getRnglistsSection(), + toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0)); + if (RangeSection->Data.size()) { + // Parse the range list table header. Individual range lists are + // extracted lazily. + DWARFDataExtractor RangesDA(Context.getDWARFObj(), *RangeSection, + isLittleEndian, 0); + auto TableOrError = parseRngListTableHeader(RangesDA, RangeSectionBase, + Header.getFormat()); + if (!TableOrError) + return createStringError(errc::invalid_argument, + "parsing a range list table: " + + toString(TableOrError.takeError())); + + RngListTable = TableOrError.get(); + + // In a split dwarf unit, there is no DW_AT_rnglists_base attribute. + // Adjust RangeSectionBase to point past the table header. + if (IsDWO && RngListTable) + RangeSectionBase = RngListTable->getHeaderSize(); } + } - // In general, in DWARF v5 and beyond we derive the start of the unit's - // contribution to the string offsets table from the unit DIE's - // DW_AT_str_offsets_base attribute. Split DWARF units do not use this - // attribute, so we assume that there is a contribution to the string - // offsets table starting at offset 0 of the debug_str_offsets.dwo section. - // In both cases we need to determine the format of the contribution, - // which may differ from the unit's format. - DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection, - isLittleEndian, 0); - if (IsDWO || getVersion() >= 5) { - auto StringOffsetOrError = - IsDWO ? determineStringOffsetsTableContributionDWO(DA) - : determineStringOffsetsTableContribution(DA); - if (!StringOffsetOrError) { - WithColor::error() << "invalid contribution to string offsets table in section .debug_str_offsets[.dwo]: " - << toString(StringOffsetOrError.takeError()) << '\n'; - } else { - StringOffsetsTableContribution = *StringOffsetOrError; - } - } - - // DWARF v5 uses the .debug_rnglists and .debug_rnglists.dwo sections to - // describe address ranges. - if (getVersion() >= 5) { - if (IsDWO) - setRangesSection(&Context.getDWARFObj().getRnglistsDWOSection(), 0); - else - setRangesSection(&Context.getDWARFObj().getRnglistsSection(), - toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0)); - if (RangeSection->Data.size()) { - // Parse the range list table header. Individual range lists are - // extracted lazily. - DWARFDataExtractor RangesDA(Context.getDWARFObj(), *RangeSection, - isLittleEndian, 0); - if (auto TableOrError = - parseRngListTableHeader(RangesDA, RangeSectionBase)) - RngListTable = TableOrError.get(); - else - WithColor::error() << "parsing a range list table: " - << toString(TableOrError.takeError()) - << '\n'; - - // In a split dwarf unit, there is no DW_AT_rnglists_base attribute. - // Adjust RangeSectionBase to point past the table header. - if (IsDWO && RngListTable) - RangeSectionBase = RngListTable->getHeaderSize(); - } - } - - // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for - // skeleton CU DIE, so that DWARF users not aware of it are not broken. - } - - return DieArray.size(); + // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for + // skeleton CU DIE, so that DWARF users not aware of it are not broken. + return Error::success(); } bool DWARFUnit::parseDWO() { @@ -517,7 +527,8 @@ bool DWARFUnit::parseDWO() { DWO->setRangesSection(&Context.getDWARFObj().getRnglistsDWOSection(), 0); DWARFDataExtractor RangesDA(Context.getDWARFObj(), *RangeSection, isLittleEndian, 0); - if (auto TableOrError = parseRngListTableHeader(RangesDA, RangeSectionBase)) + if (auto TableOrError = parseRngListTableHeader(RangesDA, RangeSectionBase, + Header.getFormat())) DWO->RngListTable = TableOrError.get(); else WithColor::error() << "parsing a range list table: " @@ -541,7 +552,7 @@ void DWARFUnit::clearDIEs(bool KeepCUDie) { } Expected -DWARFUnit::findRnglistFromOffset(uint32_t Offset) { +DWARFUnit::findRnglistFromOffset(uint64_t Offset) { if (getVersion() <= 4) { DWARFDebugRangeList RangeList; if (Error E = extractRangeList(Offset, RangeList)) @@ -569,9 +580,9 @@ DWARFUnit::findRnglistFromIndex(uint32_t Index) { if (RngListTable) return createStringError(errc::invalid_argument, "invalid range list table index %d", Index); - else - return createStringError(errc::invalid_argument, - "missing or invalid range list table"); + + return createStringError(errc::invalid_argument, + "missing or invalid range list table"); } Expected DWARFUnit::collectAddressRanges() { @@ -780,11 +791,11 @@ StrOffsetsContributionDescriptor::validateContributionSize( // Look for a DWARF64-formatted contribution to the string offsets table // starting at a given offset and record it in a descriptor. static Expected -parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) { +parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint64_t Offset) { if (!DA.isValidOffsetForDataOfSize(Offset, 16)) return createStringError(errc::invalid_argument, "section offset exceeds section size"); - if (DA.getU32(&Offset) != 0xffffffff) + if (DA.getU32(&Offset) != dwarf::DW_LENGTH_DWARF64) return createStringError(errc::invalid_argument, "32 bit contribution referenced from a 64 bit unit"); uint64_t Size = DA.getU64(&Offset); @@ -798,12 +809,12 @@ parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) { // Look for a DWARF32-formatted contribution to the string offsets table // starting at a given offset and record it in a descriptor. static Expected -parseDWARF32StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) { +parseDWARF32StringOffsetsTableHeader(DWARFDataExtractor &DA, uint64_t Offset) { if (!DA.isValidOffsetForDataOfSize(Offset, 8)) return createStringError(errc::invalid_argument, "section offset exceeds section size"); uint32_t ContributionSize = DA.getU32(&Offset); - if (ContributionSize >= 0xfffffff0) + if (ContributionSize >= dwarf::DW_LENGTH_lo_reserved) return createStringError(errc::invalid_argument, "invalid length"); uint8_t Version = DA.getU16(&Offset); @@ -823,7 +834,7 @@ parseDWARFStringOffsetsTableHeader(DWARFDataExtractor &DA, case dwarf::DwarfFormat::DWARF64: { if (Offset < 16) return createStringError(errc::invalid_argument, "insufficient space for 64 bit header prefix"); - auto DescOrError = parseDWARF64StringOffsetsTableHeader(DA, (uint32_t)Offset - 16); + auto DescOrError = parseDWARF64StringOffsetsTableHeader(DA, Offset - 16); if (!DescOrError) return DescOrError.takeError(); Desc = *DescOrError; @@ -832,7 +843,7 @@ parseDWARFStringOffsetsTableHeader(DWARFDataExtractor &DA, case dwarf::DwarfFormat::DWARF32: { if (Offset < 8) return createStringError(errc::invalid_argument, "insufficient space for 32 bit header prefix"); - auto DescOrError = parseDWARF32StringOffsetsTableHeader(DA, (uint32_t)Offset - 8); + auto DescOrError = parseDWARF32StringOffsetsTableHeader(DA, Offset - 8); if (!DescOrError) return DescOrError.takeError(); Desc = *DescOrError; diff --git a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp index 047c63461cc..f29c1e6cc5c 100644 --- a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp +++ b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp @@ -18,7 +18,7 @@ using namespace llvm; bool DWARFUnitIndex::Header::parse(DataExtractor IndexData, - uint32_t *OffsetPtr) { + uint64_t *OffsetPtr) { if (!IndexData.isValidOffsetForDataOfSize(*OffsetPtr, 16)) return false; Version = IndexData.getU32(OffsetPtr); @@ -45,7 +45,7 @@ bool DWARFUnitIndex::parse(DataExtractor IndexData) { } bool DWARFUnitIndex::parseImpl(DataExtractor IndexData) { - uint32_t Offset = 0; + uint64_t Offset = 0; if (!Header.parse(IndexData, &Offset)) return false; @@ -54,10 +54,10 @@ bool DWARFUnitIndex::parseImpl(DataExtractor IndexData) { (2 * Header.NumUnits + 1) * 4 * Header.NumColumns)) return false; - Rows = llvm::make_unique(Header.NumBuckets); + Rows = std::make_unique(Header.NumBuckets); auto Contribs = - llvm::make_unique(Header.NumUnits); - ColumnKinds = llvm::make_unique(Header.NumColumns); + std::make_unique(Header.NumUnits); + ColumnKinds = std::make_unique(Header.NumColumns); // Read Hash Table of Signatures for (unsigned i = 0; i != Header.NumBuckets; ++i) @@ -70,7 +70,7 @@ bool DWARFUnitIndex::parseImpl(DataExtractor IndexData) { continue; Rows[i].Index = this; Rows[i].Contributions = - llvm::make_unique(Header.NumColumns); + std::make_unique(Header.NumColumns); Contribs[Index - 1] = Rows[i].Contributions.get(); } diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp index c2b3189514a..bf499b6ee09 100644 --- a/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -34,11 +34,11 @@ DWARFVerifier::DieRangeInfo::insert(const DWARFAddressRange &R) { if (Pos != End) { if (Pos->intersects(R)) - return Pos; + return std::move(Pos); if (Pos != Begin) { auto Iter = Pos - 1; if (Iter->intersects(R)) - return Iter; + return std::move(Iter); } } @@ -98,7 +98,7 @@ bool DWARFVerifier::DieRangeInfo::intersects(const DieRangeInfo &RHS) const { } bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData, - uint32_t *Offset, unsigned UnitIndex, + uint64_t *Offset, unsigned UnitIndex, uint8_t &UnitType, bool &isUnitDWARF64) { uint64_t AbbrOffset, Length; uint8_t AddrSize = 0; @@ -111,9 +111,9 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData, bool ValidType = true; bool ValidAbbrevOffset = true; - uint32_t OffsetStart = *Offset; + uint64_t OffsetStart = *Offset; Length = DebugInfoData.getU32(Offset); - if (Length == UINT32_MAX) { + if (Length == dwarf::DW_LENGTH_DWARF64) { Length = DebugInfoData.getU64(Offset); isUnitDWARF64 = true; } @@ -139,7 +139,7 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData, if (!ValidLength || !ValidVersion || !ValidAddrSize || !ValidAbbrevOffset || !ValidType) { Success = false; - error() << format("Units[%d] - start offset: 0x%08x \n", UnitIndex, + error() << format("Units[%d] - start offset: 0x%08" PRIx64 " \n", UnitIndex, OffsetStart); if (!ValidLength) note() << "The length for this unit is too " @@ -203,7 +203,7 @@ unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit) { } unsigned DWARFVerifier::verifyDebugInfoCallSite(const DWARFDie &Die) { - if (Die.getTag() != DW_TAG_call_site) + if (Die.getTag() != DW_TAG_call_site && Die.getTag() != DW_TAG_GNU_call_site) return 0; DWARFDie Curr = Die.getParent(); @@ -223,7 +223,9 @@ unsigned DWARFVerifier::verifyDebugInfoCallSite(const DWARFDie &Die) { Optional CallAttr = Curr.find({DW_AT_call_all_calls, DW_AT_call_all_source_calls, - DW_AT_call_all_tail_calls}); + DW_AT_call_all_tail_calls, DW_AT_GNU_all_call_sites, + DW_AT_GNU_all_source_call_sites, + DW_AT_GNU_all_tail_call_sites}); if (!CallAttr) { error() << "Subprogram with call site entry has no DW_AT_call attribute:"; Curr.dump(OS); @@ -273,7 +275,7 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S, const DWARFObject &DObj = DCtx.getDWARFObj(); DWARFDataExtractor DebugInfoData(DObj, S, DCtx.isLittleEndian(), 0); unsigned NumDebugInfoErrors = 0; - uint32_t OffsetStart = 0, Offset = 0, UnitIdx = 0; + uint64_t OffsetStart = 0, Offset = 0, UnitIdx = 0; uint8_t UnitType = 0; bool isUnitDWARF64 = false; bool isHeaderChainValid = true; @@ -294,10 +296,10 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S, switch (UnitType) { case dwarf::DW_UT_type: case dwarf::DW_UT_split_type: { - Unit = TypeUnitVector.addUnit(llvm::make_unique( - DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangeSection(), - &DObj.getLocSection(), DObj.getStringSection(), - DObj.getStringOffsetSection(), &DObj.getAppleObjCSection(), + Unit = TypeUnitVector.addUnit(std::make_unique( + DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangesSection(), + &DObj.getLocSection(), DObj.getStrSection(), + DObj.getStrOffsetsSection(), &DObj.getAppleObjCSection(), DObj.getLineSection(), DCtx.isLittleEndian(), false, TypeUnitVector)); break; @@ -308,10 +310,10 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S, case dwarf::DW_UT_partial: // UnitType = 0 means that we are verifying a compile unit in DWARF v4. case 0: { - Unit = CompileUnitVector.addUnit(llvm::make_unique( - DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangeSection(), - &DObj.getLocSection(), DObj.getStringSection(), - DObj.getStringOffsetSection(), &DObj.getAppleObjCSection(), + Unit = CompileUnitVector.addUnit(std::make_unique( + DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangesSection(), + &DObj.getLocSection(), DObj.getStrSection(), + DObj.getStrOffsetsSection(), &DObj.getAppleObjCSection(), DObj.getLineSection(), DCtx.isLittleEndian(), false, CompileUnitVector)); break; @@ -449,7 +451,7 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, case DW_AT_ranges: // Make sure the offset in the DW_AT_ranges attribute is valid. if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { - if (*SectionOffset >= DObj.getRangeSection().Data.size()) + if (*SectionOffset >= DObj.getRangesSection().Data.size()) ReportError("DW_AT_ranges offset is beyond .debug_ranges bounds:"); break; } @@ -466,9 +468,9 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, ReportError("DIE has invalid DW_AT_stmt_list encoding:"); break; case DW_AT_location: { - auto VerifyLocationExpr = [&](StringRef D) { + auto VerifyLocationExpr = [&](ArrayRef D) { DWARFUnit *U = Die.getDwarfUnit(); - DataExtractor Data(D, DCtx.isLittleEndian(), 0); + DataExtractor Data(toStringRef(D), DCtx.isLittleEndian(), 0); DWARFExpression Expression(Data, U->getVersion(), U->getAddressByteSize()); bool Error = llvm::any_of(Expression, [](DWARFExpression::Operation &Op) { @@ -479,13 +481,13 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, }; if (Optional> Expr = AttrValue.Value.getAsBlock()) { // Verify inlined location. - VerifyLocationExpr(llvm::toStringRef(*Expr)); + VerifyLocationExpr(*Expr); } else if (auto LocOffset = AttrValue.Value.getAsSectionOffset()) { // Verify location list. if (auto DebugLoc = DCtx.getDebugLoc()) if (auto LocList = DebugLoc->getLocationListAtOffset(*LocOffset)) for (const auto &Entry : LocList->Entries) - VerifyLocationExpr({Entry.Loc.data(), Entry.Loc.size()}); + VerifyLocationExpr(Entry.Loc); } break; } @@ -500,6 +502,9 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, break; if (DieTag == DW_TAG_variable && RefTag == DW_TAG_member) break; + // This might be reference to a function declaration. + if (DieTag == DW_TAG_GNU_call_site && RefTag == DW_TAG_subprogram) + break; ReportError("DIE with tag " + TagString(DieTag) + " has " + AttributeString(Attr) + " that points to DIE with " @@ -545,7 +550,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, error() << FormEncodingString(Form) << " CU offset " << format("0x%08" PRIx64, CUOffset) << " is invalid (must be less than CU size of " - << format("0x%08" PRIx32, CUSize) << "):\n"; + << format("0x%08" PRIx64, CUSize) << "):\n"; Die.dump(OS, 0, DumpOpts); dump(Die) << '\n'; } else { @@ -578,7 +583,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, case DW_FORM_strp: { auto SecOffset = AttrValue.Value.getAsSectionOffset(); assert(SecOffset); // DW_FORM_strp is a section offset. - if (SecOffset && *SecOffset >= DObj.getStringSection().size()) { + if (SecOffset && *SecOffset >= DObj.getStrSection().size()) { ++NumErrors; error() << "DW_FORM_strp offset beyond .debug_str bounds:\n"; dump(Die) << '\n'; @@ -605,7 +610,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, // Use a 64-bit type to calculate the offset to guard against overflow. uint64_t Offset = (uint64_t)DieCU->getStringOffsetsBase() + Index * ItemSize; - if (DObj.getStringOffsetSection().Data.size() < Offset + ItemSize) { + if (DObj.getStrOffsetsSection().Data.size() < Offset + ItemSize) { ++NumErrors; error() << FormEncodingString(Form) << " uses index " << format("%" PRIu64, Index) << ", which is too large:\n"; @@ -614,7 +619,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, } // Check that the string offset is valid. uint64_t StringOffset = *DieCU->getStringOffsetSectionItem(Index); - if (StringOffset >= DObj.getStringSection().size()) { + if (StringOffset >= DObj.getStrSection().size()) { ++NumErrors; error() << FormEncodingString(Form) << " uses index " << format("%" PRIu64, Index) @@ -635,7 +640,7 @@ unsigned DWARFVerifier::verifyDebugInfoReferences() { // getting the DIE by offset and emitting an error OS << "Verifying .debug_info references...\n"; unsigned NumErrors = 0; - for (const std::pair> &Pair : + for (const std::pair> &Pair : ReferenceToDIEOffsets) { if (DCtx.getDIEForOffset(Pair.first)) continue; @@ -659,12 +664,12 @@ void DWARFVerifier::verifyDebugLineStmtOffsets() { auto StmtSectionOffset = toSectionOffset(Die.find(DW_AT_stmt_list)); if (!StmtSectionOffset) continue; - const uint32_t LineTableOffset = *StmtSectionOffset; + const uint64_t LineTableOffset = *StmtSectionOffset; auto LineTable = DCtx.getLineTableForUnit(CU.get()); if (LineTableOffset < DCtx.getDWARFObj().getLineSection().Data.size()) { if (!LineTable) { ++NumDebugLineErrors; - error() << ".debug_line[" << format("0x%08" PRIx32, LineTableOffset) + error() << ".debug_line[" << format("0x%08" PRIx64, LineTableOffset) << "] was not able to be parsed for CU:\n"; dump(Die) << '\n'; continue; @@ -680,8 +685,8 @@ void DWARFVerifier::verifyDebugLineStmtOffsets() { if (Iter != StmtListToDie.end()) { ++NumDebugLineErrors; error() << "two compile unit DIEs, " - << format("0x%08" PRIx32, Iter->second.getOffset()) << " and " - << format("0x%08" PRIx32, Die.getOffset()) + << format("0x%08" PRIx64, Iter->second.getOffset()) << " and " + << format("0x%08" PRIx64, Die.getOffset()) << ", have the same DW_AT_stmt_list section offset:\n"; dump(Iter->second); dump(Die) << '\n'; @@ -826,10 +831,10 @@ unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection, uint32_t NumBuckets = AccelTable.getNumBuckets(); uint32_t NumHashes = AccelTable.getNumHashes(); - uint32_t BucketsOffset = + uint64_t BucketsOffset = AccelTable.getSizeHdr() + AccelTable.getHeaderDataLength(); - uint32_t HashesBase = BucketsOffset + NumBuckets * 4; - uint32_t OffsetsBase = HashesBase + NumHashes * 4; + uint64_t HashesBase = BucketsOffset + NumBuckets * 4; + uint64_t OffsetsBase = HashesBase + NumHashes * 4; for (uint32_t BucketIdx = 0; BucketIdx < NumBuckets; ++BucketIdx) { uint32_t HashIdx = AccelSectionData.getU32(&BucketsOffset); if (HashIdx >= NumHashes && HashIdx != UINT32_MAX) { @@ -849,28 +854,29 @@ unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection, } for (uint32_t HashIdx = 0; HashIdx < NumHashes; ++HashIdx) { - uint32_t HashOffset = HashesBase + 4 * HashIdx; - uint32_t DataOffset = OffsetsBase + 4 * HashIdx; + uint64_t HashOffset = HashesBase + 4 * HashIdx; + uint64_t DataOffset = OffsetsBase + 4 * HashIdx; uint32_t Hash = AccelSectionData.getU32(&HashOffset); - uint32_t HashDataOffset = AccelSectionData.getU32(&DataOffset); + uint64_t HashDataOffset = AccelSectionData.getU32(&DataOffset); if (!AccelSectionData.isValidOffsetForDataOfSize(HashDataOffset, sizeof(uint64_t))) { - error() << format("Hash[%d] has invalid HashData offset: 0x%08x.\n", + error() << format("Hash[%d] has invalid HashData offset: " + "0x%08" PRIx64 ".\n", HashIdx, HashDataOffset); ++NumErrors; } - uint32_t StrpOffset; - uint32_t StringOffset; + uint64_t StrpOffset; + uint64_t StringOffset; uint32_t StringCount = 0; - unsigned Offset; + uint64_t Offset; unsigned Tag; while ((StrpOffset = AccelSectionData.getU32(&HashDataOffset)) != 0) { const uint32_t NumHashDataObjects = AccelSectionData.getU32(&HashDataOffset); for (uint32_t HashDataIdx = 0; HashDataIdx < NumHashDataObjects; ++HashDataIdx) { - std::tie(Offset, Tag) = AccelTable.readAtoms(HashDataOffset); + std::tie(Offset, Tag) = AccelTable.readAtoms(&HashDataOffset); auto Die = DCtx.getDIEForOffset(Offset); if (!Die) { const uint32_t BucketIdx = @@ -882,8 +888,8 @@ unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection, error() << format( "%s Bucket[%d] Hash[%d] = 0x%08x " - "Str[%u] = 0x%08x " - "DIE[%d] = 0x%08x is not a valid DIE offset for \"%s\".\n", + "Str[%u] = 0x%08" PRIx64 " DIE[%d] = 0x%08" PRIx64 " " + "is not a valid DIE offset for \"%s\".\n", SectionName, BucketIdx, HashIdx, Hash, StringCount, StrpOffset, HashDataIdx, Offset, Name); @@ -908,8 +914,8 @@ unsigned DWARFVerifier::verifyDebugNamesCULists(const DWARFDebugNames &AccelTable) { // A map from CU offset to the (first) Name Index offset which claims to index // this CU. - DenseMap CUMap; - const uint32_t NotIndexed = std::numeric_limits::max(); + DenseMap CUMap; + const uint64_t NotIndexed = std::numeric_limits::max(); CUMap.reserve(DCtx.getNumCompileUnits()); for (const auto &CU : DCtx.compile_units()) @@ -924,7 +930,7 @@ DWARFVerifier::verifyDebugNamesCULists(const DWARFDebugNames &AccelTable) { continue; } for (uint32_t CU = 0, End = NI.getCUCount(); CU < End; ++CU) { - uint32_t Offset = NI.getCUOffset(CU); + uint64_t Offset = NI.getCUOffset(CU); auto Iter = CUMap.find(Offset); if (Iter == CUMap.end()) { @@ -1205,8 +1211,8 @@ unsigned DWARFVerifier::verifyNameIndexEntries( unsigned NumErrors = 0; unsigned NumEntries = 0; - uint32_t EntryID = NTE.getEntryOffset(); - uint32_t NextEntryID = EntryID; + uint64_t EntryID = NTE.getEntryOffset(); + uint64_t NextEntryID = EntryID; Expected EntryOr = NI.getEntry(&NextEntryID); for (; EntryOr; ++NumEntries, EntryID = NextEntryID, EntryOr = NI.getEntry(&NextEntryID)) { @@ -1218,7 +1224,7 @@ unsigned DWARFVerifier::verifyNameIndexEntries( ++NumErrors; continue; } - uint32_t CUOffset = NI.getCUOffset(CUIndex); + uint64_t CUOffset = NI.getCUOffset(CUIndex); uint64_t DIEOffset = CUOffset + *EntryOr->getDIEUnitOffset(); DWARFDie DIE = DCtx.getDIEForOffset(DIEOffset); if (!DIE) { @@ -1276,9 +1282,9 @@ static bool isVariableIndexable(const DWARFDie &Die, DWARFContext &DCtx) { if (!Location) return false; - auto ContainsInterestingOperators = [&](StringRef D) { + auto ContainsInterestingOperators = [&](ArrayRef D) { DWARFUnit *U = Die.getDwarfUnit(); - DataExtractor Data(D, DCtx.isLittleEndian(), U->getAddressByteSize()); + DataExtractor Data(toStringRef(D), DCtx.isLittleEndian(), U->getAddressByteSize()); DWARFExpression Expression(Data, U->getVersion(), U->getAddressByteSize()); return any_of(Expression, [](DWARFExpression::Operation &Op) { return !Op.isError() && (Op.getCode() == DW_OP_addr || @@ -1289,7 +1295,7 @@ static bool isVariableIndexable(const DWARFDie &Die, DWARFContext &DCtx) { if (Optional> Expr = Location->getAsBlock()) { // Inlined location. - if (ContainsInterestingOperators(toStringRef(*Expr))) + if (ContainsInterestingOperators(*Expr)) return true; } else if (Optional Offset = Location->getAsSectionOffset()) { // Location list. @@ -1297,7 +1303,7 @@ static bool isVariableIndexable(const DWARFDie &Die, DWARFContext &DCtx) { if (const DWARFDebugLoc::LocationList *LocList = DebugLoc->getLocationListAtOffset(*Offset)) { if (any_of(LocList->Entries, [&](const DWARFDebugLoc::Entry &E) { - return ContainsInterestingOperators({E.Loc.data(), E.Loc.size()}); + return ContainsInterestingOperators(E.Loc); })) return true; } @@ -1455,7 +1461,7 @@ unsigned DWARFVerifier::verifyDebugNames(const DWARFSection &AccelSection, bool DWARFVerifier::handleAccelTables() { const DWARFObject &D = DCtx.getDWARFObj(); - DataExtractor StrData(D.getStringSection(), DCtx.isLittleEndian(), 0); + DataExtractor StrData(D.getStrSection(), DCtx.isLittleEndian(), 0); unsigned NumErrors = 0; if (!D.getAppleNamesSection().Data.empty()) NumErrors += verifyAppleAccelTable(&D.getAppleNamesSection(), &StrData, @@ -1470,8 +1476,8 @@ bool DWARFVerifier::handleAccelTables() { NumErrors += verifyAppleAccelTable(&D.getAppleObjCSection(), &StrData, ".apple_objc"); - if (!D.getDebugNamesSection().Data.empty()) - NumErrors += verifyDebugNames(D.getDebugNamesSection(), StrData); + if (!D.getNamesSection().Data.empty()) + NumErrors += verifyDebugNames(D.getNamesSection(), StrData); return NumErrors == 0; } diff --git a/lib/DebugInfo/GSYM/FileWriter.cpp b/lib/DebugInfo/GSYM/FileWriter.cpp new file mode 100644 index 00000000000..4b30dcb60a7 --- /dev/null +++ b/lib/DebugInfo/GSYM/FileWriter.cpp @@ -0,0 +1,78 @@ +//===- FileWriter.cpp -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; +using namespace gsym; + +FileWriter::~FileWriter() { OS.flush(); } + +void FileWriter::writeSLEB(int64_t S) { + uint8_t Bytes[32]; + auto Length = encodeSLEB128(S, Bytes); + assert(Length < sizeof(Bytes)); + OS.write(reinterpret_cast(Bytes), Length); +} + +void FileWriter::writeULEB(uint64_t U) { + uint8_t Bytes[32]; + auto Length = encodeULEB128(U, Bytes); + assert(Length < sizeof(Bytes)); + OS.write(reinterpret_cast(Bytes), Length); +} + +void FileWriter::writeU8(uint8_t U) { + OS.write(reinterpret_cast(&U), sizeof(U)); +} + +void FileWriter::writeU16(uint16_t U) { + const uint16_t Swapped = support::endian::byte_swap(U, ByteOrder); + OS.write(reinterpret_cast(&Swapped), sizeof(Swapped)); +} + +void FileWriter::writeU32(uint32_t U) { + const uint32_t Swapped = support::endian::byte_swap(U, ByteOrder); + OS.write(reinterpret_cast(&Swapped), sizeof(Swapped)); +} + +void FileWriter::writeU64(uint64_t U) { + const uint64_t Swapped = support::endian::byte_swap(U, ByteOrder); + OS.write(reinterpret_cast(&Swapped), sizeof(Swapped)); +} + +void FileWriter::fixup32(uint32_t U, uint64_t Offset) { + const uint32_t Swapped = support::endian::byte_swap(U, ByteOrder); + OS.pwrite(reinterpret_cast(&Swapped), sizeof(Swapped), + Offset); +} + +void FileWriter::writeData(llvm::ArrayRef Data) { + OS.write(reinterpret_cast(Data.data()), Data.size()); +} + +void FileWriter::writeNullTerminated(llvm::StringRef Str) { + OS << Str << '\0'; +} + +uint64_t FileWriter::tell() { + return OS.tell(); +} + +void FileWriter::alignTo(size_t Align) { + off_t Offset = OS.tell(); + off_t AlignedOffset = (Offset + Align - 1) / Align * Align; + if (AlignedOffset == Offset) + return; + off_t PadCount = AlignedOffset - Offset; + OS.write_zeros(PadCount); +} diff --git a/lib/DebugInfo/GSYM/FunctionInfo.cpp b/lib/DebugInfo/GSYM/FunctionInfo.cpp index 55c36a55b4b..ad022fec9e3 100644 --- a/lib/DebugInfo/GSYM/FunctionInfo.cpp +++ b/lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -1,22 +1,147 @@ -//===- FunctionInfo.cpp -----------------------------------------*- C++ -*-===// +//===- FunctionInfo.cpp ---------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/Support/DataExtractor.h" using namespace llvm; using namespace gsym; +/// FunctionInfo information type that is used to encode the optional data +/// that is associated with a FunctionInfo object. +enum InfoType : uint32_t { + EndOfList = 0u, + LineTableInfo = 1u, + InlineInfo = 2u +}; + raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) { OS << '[' << HEX64(FI.Range.Start) << '-' << HEX64(FI.Range.End) << "): " - << "Name=" << HEX32(FI.Name) << '\n'; - for (const auto &Line : FI.Lines) - OS << Line << '\n'; - OS << FI.Inline; + << "Name=" << HEX32(FI.Name) << '\n' << FI.OptLineTable << FI.Inline; return OS; } + +llvm::Expected FunctionInfo::decode(DataExtractor &Data, + uint64_t BaseAddr) { + FunctionInfo FI; + FI.Range.Start = BaseAddr; + uint64_t Offset = 0; + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo Size", Offset); + FI.Range.End = FI.Range.Start + Data.getU32(&Offset); + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo Name", Offset); + FI.Name = Data.getU32(&Offset); + if (FI.Name == 0) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": invalid FunctionInfo Name value 0x%8.8x", + Offset - 4, FI.Name); + bool Done = false; + while (!Done) { + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo InfoType value", Offset); + const uint32_t IT = Data.getU32(&Offset); + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo InfoType length", Offset); + const uint32_t InfoLength = Data.getU32(&Offset); + if (!Data.isValidOffsetForDataOfSize(Offset, InfoLength)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing FunctionInfo data for InfoType %u", + Offset, IT); + DataExtractor InfoData(Data.getData().substr(Offset, InfoLength), + Data.isLittleEndian(), + Data.getAddressSize()); + switch (IT) { + case InfoType::EndOfList: + Done = true; + break; + + case InfoType::LineTableInfo: + if (Expected LT = LineTable::decode(InfoData, BaseAddr)) + FI.OptLineTable = std::move(LT.get()); + else + return LT.takeError(); + break; + + case InfoType::InlineInfo: + if (Expected II = InlineInfo::decode(InfoData, BaseAddr)) + FI.Inline = std::move(II.get()); + else + return II.takeError(); + break; + + default: + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": unsupported InfoType %u", + Offset-8, IT); + } + Offset += InfoLength; + } + return std::move(FI); +} + +llvm::Expected FunctionInfo::encode(FileWriter &O) const { + if (!isValid()) + return createStringError(std::errc::invalid_argument, + "attempted to encode invalid FunctionInfo object"); + // Align FunctionInfo data to a 4 byte alignment. + O.alignTo(4); + const uint64_t FuncInfoOffset = O.tell(); + // Write the size in bytes of this function as a uint32_t. This can be zero + // if we just have a symbol from a symbol table and that symbol has no size. + O.writeU32(size()); + // Write the name of this function as a uint32_t string table offset. + O.writeU32(Name); + + if (OptLineTable.hasValue()) { + O.writeU32(InfoType::LineTableInfo); + // Write a uint32_t length as zero for now, we will fix this up after + // writing the LineTable out with the number of bytes that were written. + O.writeU32(0); + const auto StartOffset = O.tell(); + llvm::Error err = OptLineTable->encode(O, Range.Start); + if (err) + return std::move(err); + const off_t Length = O.tell() - StartOffset; + if (Length > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "LineTable length is greater than UINT32_MAX"); + // Fixup the size of the LineTable data with the correct size. + O.fixup32(static_cast(Length), StartOffset - 4); + } + + // Write out the inline function info if we have any and if it is valid. + if (Inline.hasValue()) { + O.writeU32(InfoType::InlineInfo); + // Write a uint32_t length as zero for now, we will fix this up after + // writing the LineTable out with the number of bytes that were written. + O.writeU32(0); + const auto StartOffset = O.tell(); + llvm::Error err = Inline->encode(O, Range.Start); + if (err) + return std::move(err); + const off_t Length = O.tell() - StartOffset; + if (Length > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "InlineInfo length is greater than UINT32_MAX"); + // Fixup the size of the InlineInfo data with the correct size. + O.fixup32(static_cast(Length), StartOffset - 4); + } + + // Terminate the data chunks with and end of list with zero size + O.writeU32(InfoType::EndOfList); + O.writeU32(0); + return FuncInfoOffset; +} diff --git a/lib/DebugInfo/GSYM/GsymCreator.cpp b/lib/DebugInfo/GSYM/GsymCreator.cpp new file mode 100644 index 00000000000..f371426f201 --- /dev/null +++ b/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -0,0 +1,275 @@ +//===- GsymCreator.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/DebugInfo/GSYM/Header.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include +#include +#include + +using namespace llvm; +using namespace gsym; + + +GsymCreator::GsymCreator() : StrTab(StringTableBuilder::ELF) { + insertFile(StringRef()); +} + +uint32_t GsymCreator::insertFile(StringRef Path, + llvm::sys::path::Style Style) { + llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style); + llvm::StringRef filename = llvm::sys::path::filename(Path, Style); + FileEntry FE(insertString(directory), insertString(filename)); + + std::lock_guard Guard(Mutex); + const auto NextIndex = Files.size(); + // Find FE in hash map and insert if not present. + auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex)); + if (R.second) + Files.emplace_back(FE); + return R.first->second; +} + +llvm::Error GsymCreator::save(StringRef Path, + llvm::support::endianness ByteOrder) const { + std::error_code EC; + raw_fd_ostream OutStrm(Path, EC); + if (EC) + return llvm::errorCodeToError(EC); + FileWriter O(OutStrm, ByteOrder); + return encode(O); +} + +llvm::Error GsymCreator::encode(FileWriter &O) const { + std::lock_guard Guard(Mutex); + if (Funcs.empty()) + return createStringError(std::errc::invalid_argument, + "no functions to encode"); + if (!Finalized) + return createStringError(std::errc::invalid_argument, + "GsymCreator wasn't finalized prior to encoding"); + + if (Funcs.size() > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "too many FunctionInfos"); + const uint64_t MinAddr = Funcs.front().startAddress(); + const uint64_t MaxAddr = Funcs.back().startAddress(); + const uint64_t AddrDelta = MaxAddr - MinAddr; + Header Hdr; + Hdr.Magic = GSYM_MAGIC; + Hdr.Version = GSYM_VERSION; + Hdr.AddrOffSize = 0; + Hdr.UUIDSize = static_cast(UUID.size()); + Hdr.BaseAddress = MinAddr; + Hdr.NumAddresses = static_cast(Funcs.size()); + Hdr.StrtabOffset = 0; // We will fix this up later. + Hdr.StrtabOffset = 0; // We will fix this up later. + memset(Hdr.UUID, 0, sizeof(Hdr.UUID)); + if (UUID.size() > sizeof(Hdr.UUID)) + return createStringError(std::errc::invalid_argument, + "invalid UUID size %u", (uint32_t)UUID.size()); + // Set the address offset size correctly in the GSYM header. + if (AddrDelta <= UINT8_MAX) + Hdr.AddrOffSize = 1; + else if (AddrDelta <= UINT16_MAX) + Hdr.AddrOffSize = 2; + else if (AddrDelta <= UINT32_MAX) + Hdr.AddrOffSize = 4; + else + Hdr.AddrOffSize = 8; + // Copy the UUID value if we have one. + if (UUID.size() > 0) + memcpy(Hdr.UUID, UUID.data(), UUID.size()); + // Write out the header. + llvm::Error Err = Hdr.encode(O); + if (Err) + return Err; + + // Write out the address offsets. + O.alignTo(Hdr.AddrOffSize); + for (const auto &FuncInfo : Funcs) { + uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; + switch(Hdr.AddrOffSize) { + case 1: O.writeU8(static_cast(AddrOffset)); break; + case 2: O.writeU16(static_cast(AddrOffset)); break; + case 4: O.writeU32(static_cast(AddrOffset)); break; + case 8: O.writeU64(AddrOffset); break; + } + } + + // Write out all zeros for the AddrInfoOffsets. + O.alignTo(4); + const off_t AddrInfoOffsetsOffset = O.tell(); + for (size_t i = 0, n = Funcs.size(); i < n; ++i) + O.writeU32(0); + + // Write out the file table + O.alignTo(4); + assert(!Files.empty()); + assert(Files[0].Dir == 0); + assert(Files[0].Base == 0); + size_t NumFiles = Files.size(); + if (NumFiles > UINT32_MAX) + return createStringError(std::errc::invalid_argument, + "too many files"); + O.writeU32(static_cast(NumFiles)); + for (auto File: Files) { + O.writeU32(File.Dir); + O.writeU32(File.Base); + } + + // Write out the sting table. + const off_t StrtabOffset = O.tell(); + StrTab.write(O.get_stream()); + const off_t StrtabSize = O.tell() - StrtabOffset; + std::vector AddrInfoOffsets; + + // Write out the address infos for each function info. + for (const auto &FuncInfo : Funcs) { + if (Expected OffsetOrErr = FuncInfo.encode(O)) + AddrInfoOffsets.push_back(OffsetOrErr.get()); + else + return OffsetOrErr.takeError(); + } + // Fixup the string table offset and size in the header + O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset)); + O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize)); + + // Fixup all address info offsets + uint64_t Offset = 0; + for (auto AddrInfoOffset: AddrInfoOffsets) { + O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset); + Offset += 4; + } + return ErrorSuccess(); +} + +llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { + std::lock_guard Guard(Mutex); + if (Finalized) + return createStringError(std::errc::invalid_argument, + "already finalized"); + Finalized = true; + + // Sort function infos so we can emit sorted functions. + llvm::sort(Funcs.begin(), Funcs.end()); + + // Don't let the string table indexes change by finalizing in order. + StrTab.finalizeInOrder(); + + // Remove duplicates function infos that have both entries from debug info + // (DWARF or Breakpad) and entries from the SymbolTable. + // + // Also handle overlapping function. Usually there shouldn't be any, but they + // can and do happen in some rare cases. + // + // (a) (b) (c) + // ^ ^ ^ ^ + // |X |Y |X ^ |X + // | | | |Y | ^ + // | | | v v |Y + // v v v v + // + // In (a) and (b), Y is ignored and X will be reported for the full range. + // In (c), both functions will be included in the result and lookups for an + // address in the intersection will return Y because of binary search. + // + // Note that in case of (b), we cannot include Y in the result because then + // we wouldn't find any function for range (end of Y, end of X) + // with binary search + auto NumBefore = Funcs.size(); + auto Curr = Funcs.begin(); + auto Prev = Funcs.end(); + while (Curr != Funcs.end()) { + // Can't check for overlaps or same address ranges if we don't have a + // previous entry + if (Prev != Funcs.end()) { + if (Prev->Range.intersects(Curr->Range)) { + // Overlapping address ranges. + if (Prev->Range == Curr->Range) { + // Same address range. Check if one is from debug info and the other + // is from a symbol table. If so, then keep the one with debug info. + // Our sorting guarantees that entries with matching address ranges + // that have debug info are last in the sort. + if (*Prev == *Curr) { + // FunctionInfo entries match exactly (range, lines, inlines) + OS << "warning: duplicate function info entries, removing " + "duplicate:\n" + << *Curr << '\n'; + Curr = Funcs.erase(Prev); + } else { + if (!Prev->hasRichInfo() && Curr->hasRichInfo()) { + // Same address range, one with no debug info (symbol) and the + // next with debug info. Keep the latter. + Curr = Funcs.erase(Prev); + } else { + OS << "warning: same address range contains different debug " + << "info. Removing:\n" + << *Prev << "\nIn favor of this one:\n" + << *Curr << "\n"; + Curr = Funcs.erase(Prev); + } + } + } else { + // print warnings about overlaps + OS << "warning: function ranges overlap:\n" + << *Prev << "\n" + << *Curr << "\n"; + } + } else if (Prev->Range.size() == 0 && + Curr->Range.contains(Prev->Range.Start)) { + OS << "warning: removing symbol:\n" + << *Prev << "\nKeeping:\n" + << *Curr << "\n"; + Curr = Funcs.erase(Prev); + } + } + if (Curr == Funcs.end()) + break; + Prev = Curr++; + } + + OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " + << Funcs.size() << " total\n"; + return Error::success(); +} + +uint32_t GsymCreator::insertString(StringRef S) { + std::lock_guard Guard(Mutex); + if (S.empty()) + return 0; + return StrTab.add(S); +} + +void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { + std::lock_guard Guard(Mutex); + Funcs.emplace_back(FI); +} + +void GsymCreator::forEachFunctionInfo( + std::function const &Callback) { + std::lock_guard Guard(Mutex); + for (auto &FI : Funcs) { + if (!Callback(FI)) + break; + } +} + +void GsymCreator::forEachFunctionInfo( + std::function const &Callback) const { + std::lock_guard Guard(Mutex); + for (const auto &FI : Funcs) { + if (!Callback(FI)) + break; + } +} diff --git a/lib/DebugInfo/GSYM/GsymReader.cpp b/lib/DebugInfo/GSYM/GsymReader.cpp new file mode 100644 index 00000000000..1b448cf80b7 --- /dev/null +++ b/lib/DebugInfo/GSYM/GsymReader.cpp @@ -0,0 +1,265 @@ +//===- GsymReader.cpp -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/GsymReader.h" + +#include +#include +#include +#include + +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace gsym; + +GsymReader::GsymReader(std::unique_ptr Buffer) : + MemBuffer(std::move(Buffer)), + Endian(support::endian::system_endianness()) {} + + GsymReader::GsymReader(GsymReader &&RHS) = default; + +GsymReader::~GsymReader() = default; + +llvm::Expected GsymReader::openFile(StringRef Filename) { + // Open the input file and return an appropriate error if needed. + ErrorOr> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(Filename); + auto Err = BuffOrErr.getError(); + if (Err) + return llvm::errorCodeToError(Err); + return create(BuffOrErr.get()); +} + +llvm::Expected GsymReader::copyBuffer(StringRef Bytes) { + auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes"); + return create(MemBuffer); +} + +llvm::Expected +GsymReader::create(std::unique_ptr &MemBuffer) { + if (!MemBuffer.get()) + return createStringError(std::errc::invalid_argument, + "invalid memory buffer"); + GsymReader GR(std::move(MemBuffer)); + llvm::Error Err = GR.parse(); + if (Err) + return std::move(Err); + return std::move(GR); +} + +llvm::Error +GsymReader::parse() { + BinaryStreamReader FileData(MemBuffer->getBuffer(), + support::endian::system_endianness()); + // Check for the magic bytes. This file format is designed to be mmap'ed + // into a process and accessed as read only. This is done for performance + // and efficiency for symbolicating and parsing GSYM data. + if (FileData.readObject(Hdr)) + return createStringError(std::errc::invalid_argument, + "not enough data for a GSYM header"); + + const auto HostByteOrder = support::endian::system_endianness(); + switch (Hdr->Magic) { + case GSYM_MAGIC: + Endian = HostByteOrder; + break; + case GSYM_CIGAM: + // This is a GSYM file, but not native endianness. + Endian = sys::IsBigEndianHost ? support::little : support::big; + Swap.reset(new SwappedData); + break; + default: + return createStringError(std::errc::invalid_argument, + "not a GSYM file"); + } + + bool DataIsLittleEndian = HostByteOrder != support::little; + // Read a correctly byte swapped header if we need to. + if (Swap) { + DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4); + if (auto ExpectedHdr = Header::decode(Data)) + Swap->Hdr = ExpectedHdr.get(); + else + return ExpectedHdr.takeError(); + Hdr = &Swap->Hdr; + } + + // Detect errors in the header and report any that are found. If we make it + // past this without errors, we know we have a good magic value, a supported + // version number, verified address offset size and a valid UUID size. + if (Error Err = Hdr->checkForError()) + return Err; + + if (!Swap) { + // This is the native endianness case that is most common and optimized for + // efficient lookups. Here we just grab pointers to the native data and + // use ArrayRef objects to allow efficient read only access. + + // Read the address offsets. + if (FileData.padToAlignment(Hdr->AddrOffSize) || + FileData.readArray(AddrOffsets, + Hdr->NumAddresses * Hdr->AddrOffSize)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + + // Read the address info offsets. + if (FileData.padToAlignment(4) || + FileData.readArray(AddrInfoOffsets, Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address info offsets table"); + + // Read the file table. + uint32_t NumFiles = 0; + if (FileData.readInteger(NumFiles) || FileData.readArray(Files, NumFiles)) + return createStringError(std::errc::invalid_argument, + "failed to read file table"); + + // Get the string table. + FileData.setOffset(Hdr->StrtabOffset); + if (FileData.readFixedString(StrTab.Data, Hdr->StrtabSize)) + return createStringError(std::errc::invalid_argument, + "failed to read string table"); +} else { + // This is the non native endianness case that is not common and not + // optimized for lookups. Here we decode the important tables into local + // storage and then set the ArrayRef objects to point to these swapped + // copies of the read only data so lookups can be as efficient as possible. + DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4); + + // Read the address offsets. + uint64_t Offset = alignTo(sizeof(Header), Hdr->AddrOffSize); + Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize); + switch (Hdr->AddrOffSize) { + case 1: + if (!Data.getU8(&Offset, Swap->AddrOffsets.data(), Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + break; + case 2: + if (!Data.getU16(&Offset, + reinterpret_cast(Swap->AddrOffsets.data()), + Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + break; + case 4: + if (!Data.getU32(&Offset, + reinterpret_cast(Swap->AddrOffsets.data()), + Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + break; + case 8: + if (!Data.getU64(&Offset, + reinterpret_cast(Swap->AddrOffsets.data()), + Hdr->NumAddresses)) + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + } + AddrOffsets = ArrayRef(Swap->AddrOffsets); + + // Read the address info offsets. + Offset = alignTo(Offset, 4); + Swap->AddrInfoOffsets.resize(Hdr->NumAddresses); + if (Data.getU32(&Offset, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses)) + AddrInfoOffsets = ArrayRef(Swap->AddrInfoOffsets); + else + return createStringError(std::errc::invalid_argument, + "failed to read address table"); + // Read the file table. + const uint32_t NumFiles = Data.getU32(&Offset); + if (NumFiles > 0) { + Swap->Files.resize(NumFiles); + if (Data.getU32(&Offset, &Swap->Files[0].Dir, NumFiles*2)) + Files = ArrayRef(Swap->Files); + else + return createStringError(std::errc::invalid_argument, + "failed to read file table"); + } + // Get the string table. + StrTab.Data = MemBuffer->getBuffer().substr(Hdr->StrtabOffset, + Hdr->StrtabSize); + if (StrTab.Data.empty()) + return createStringError(std::errc::invalid_argument, + "failed to read string table"); + } + return Error::success(); + +} + +const Header &GsymReader::getHeader() const { + // The only way to get a GsymReader is from GsymReader::openFile(...) or + // GsymReader::copyBuffer() and the header must be valid and initialized to + // a valid pointer value, so the assert below should not trigger. + assert(Hdr); + return *Hdr; +} + +Optional GsymReader::getAddress(size_t Index) const { + switch (Hdr->AddrOffSize) { + case 1: return addressForIndex(Index); + case 2: return addressForIndex(Index); + case 4: return addressForIndex(Index); + case 8: return addressForIndex(Index); + } + return llvm::None; +} + +Optional GsymReader::getAddressInfoOffset(size_t Index) const { + const auto NumAddrInfoOffsets = AddrInfoOffsets.size(); + if (Index < NumAddrInfoOffsets) + return AddrInfoOffsets[Index]; + return llvm::None; +} + +Expected +GsymReader::getAddressIndex(const uint64_t Addr) const { + if (Addr < Hdr->BaseAddress) + return createStringError(std::errc::invalid_argument, + "address 0x%" PRIx64 " not in GSYM", Addr); + const uint64_t AddrOffset = Addr - Hdr->BaseAddress; + switch (Hdr->AddrOffSize) { + case 1: return getAddressOffsetIndex(AddrOffset); + case 2: return getAddressOffsetIndex(AddrOffset); + case 4: return getAddressOffsetIndex(AddrOffset); + case 8: return getAddressOffsetIndex(AddrOffset); + default: break; + } + return createStringError(std::errc::invalid_argument, + "unsupported address offset size %u", + Hdr->AddrOffSize); +} + +llvm::Expected GsymReader::getFunctionInfo(uint64_t Addr) const { + Expected AddressIndex = getAddressIndex(Addr); + if (!AddressIndex) + return AddressIndex.takeError(); + // Address info offsets size should have been checked in parse(). + assert(*AddressIndex < AddrInfoOffsets.size()); + auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex]; + DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), Endian, 4); + if (Optional OptAddr = getAddress(*AddressIndex)) { + auto ExpectedFI = FunctionInfo::decode(Data, *OptAddr); + if (ExpectedFI) { + if (ExpectedFI->Range.contains(Addr) || ExpectedFI->Range.size() == 0) + return ExpectedFI; + return createStringError(std::errc::invalid_argument, + "address 0x%" PRIx64 " not in GSYM", Addr); + } + } + return createStringError(std::errc::invalid_argument, + "failed to extract address[%" PRIu64 "]", + *AddressIndex); +} diff --git a/lib/DebugInfo/GSYM/Header.cpp b/lib/DebugInfo/GSYM/Header.cpp new file mode 100644 index 00000000000..0b3fb9c4989 --- /dev/null +++ b/lib/DebugInfo/GSYM/Header.cpp @@ -0,0 +1,109 @@ +//===- Header.cpp -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/Header.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +#define HEX8(v) llvm::format_hex(v, 4) +#define HEX16(v) llvm::format_hex(v, 6) +#define HEX32(v) llvm::format_hex(v, 10) +#define HEX64(v) llvm::format_hex(v, 18) + +using namespace llvm; +using namespace gsym; + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const Header &H) { + OS << "Header:\n"; + OS << " Magic = " << HEX32(H.Magic) << "\n"; + OS << " Version = " << HEX16(H.Version) << '\n'; + OS << " AddrOffSize = " << HEX8(H.AddrOffSize) << '\n'; + OS << " UUIDSize = " << HEX8(H.UUIDSize) << '\n'; + OS << " BaseAddress = " << HEX64(H.BaseAddress) << '\n'; + OS << " NumAddresses = " << HEX32(H.NumAddresses) << '\n'; + OS << " StrtabOffset = " << HEX32(H.StrtabOffset) << '\n'; + OS << " StrtabSize = " << HEX32(H.StrtabSize) << '\n'; + OS << " UUID = "; + for (uint8_t I = 0; I < H.UUIDSize; ++I) + OS << format_hex_no_prefix(H.UUID[I], 2); + OS << '\n'; + return OS; +} + +/// Check the header and detect any errors. +llvm::Error Header::checkForError() const { + if (Magic != GSYM_MAGIC) + return createStringError(std::errc::invalid_argument, + "invalid GSYM magic 0x%8.8x", Magic); + if (Version != GSYM_VERSION) + return createStringError(std::errc::invalid_argument, + "unsupported GSYM version %u", Version); + switch (AddrOffSize) { + case 1: break; + case 2: break; + case 4: break; + case 8: break; + default: + return createStringError(std::errc::invalid_argument, + "invalid address offset size %u", + AddrOffSize); + } + if (UUIDSize > GSYM_MAX_UUID_SIZE) + return createStringError(std::errc::invalid_argument, + "invalid UUID size %u", UUIDSize); + return Error::success(); +} + +llvm::Expected
Header::decode(DataExtractor &Data) { + uint64_t Offset = 0; + // The header is stored as a single blob of data that has a fixed byte size. + if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Header))) + return createStringError(std::errc::invalid_argument, + "not enough data for a gsym::Header"); + Header H; + H.Magic = Data.getU32(&Offset); + H.Version = Data.getU16(&Offset); + H.AddrOffSize = Data.getU8(&Offset); + H.UUIDSize = Data.getU8(&Offset); + H.BaseAddress = Data.getU64(&Offset); + H.NumAddresses = Data.getU32(&Offset); + H.StrtabOffset = Data.getU32(&Offset); + H.StrtabSize = Data.getU32(&Offset); + Data.getU8(&Offset, H.UUID, GSYM_MAX_UUID_SIZE); + if (llvm::Error Err = H.checkForError()) + return std::move(Err); + return H; +} + +llvm::Error Header::encode(FileWriter &O) const { + // Users must verify the Header is valid prior to calling this funtion. + if (llvm::Error Err = checkForError()) + return Err; + O.writeU32(Magic); + O.writeU16(Version); + O.writeU8(AddrOffSize); + O.writeU8(UUIDSize); + O.writeU64(BaseAddress); + O.writeU32(NumAddresses); + O.writeU32(StrtabOffset); + O.writeU32(StrtabSize); + O.writeData(llvm::ArrayRef(UUID)); + return Error::success(); +} + +bool llvm::gsym::operator==(const Header &LHS, const Header &RHS) { + return LHS.Magic == RHS.Magic && LHS.Version == RHS.Version && + LHS.AddrOffSize == RHS.AddrOffSize && LHS.UUIDSize == RHS.UUIDSize && + LHS.BaseAddress == RHS.BaseAddress && + LHS.NumAddresses == RHS.NumAddresses && + LHS.StrtabOffset == RHS.StrtabOffset && + LHS.StrtabSize == RHS.StrtabSize && + memcmp(LHS.UUID, RHS.UUID, LHS.UUIDSize) == 0; +} diff --git a/lib/DebugInfo/GSYM/InlineInfo.cpp b/lib/DebugInfo/GSYM/InlineInfo.cpp index 781c1755241..32ed2c70957 100644 --- a/lib/DebugInfo/GSYM/InlineInfo.cpp +++ b/lib/DebugInfo/GSYM/InlineInfo.cpp @@ -8,7 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" #include "llvm/DebugInfo/GSYM/InlineInfo.h" +#include "llvm/Support/DataExtractor.h" #include #include @@ -57,3 +59,101 @@ llvm::Optional InlineInfo::getInlineStack(uint64_t Addr return Result; return llvm::None; } + +/// Decode an InlineInfo in Data at the specified offset. +/// +/// A local helper function to decode InlineInfo objects. This function is +/// called recursively when parsing child InlineInfo objects. +/// +/// \param Data The data extractor to decode from. +/// \param Offset The offset within \a Data to decode from. +/// \param BaseAddr The base address to use when decoding address ranges. +/// \returns An InlineInfo or an error describing the issue that was +/// encountered during decoding. +static llvm::Expected decode(DataExtractor &Data, uint64_t &Offset, + uint64_t BaseAddr) { + InlineInfo Inline; + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing InlineInfo address ranges data", Offset); + Inline.Ranges.decode(Data, BaseAddr, Offset); + if (Inline.Ranges.empty()) + return Inline; + if (!Data.isValidOffsetForDataOfSize(Offset, 1)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing InlineInfo uint8_t indicating children", + Offset); + bool HasChildren = Data.getU8(&Offset) != 0; + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing InlineInfo uint32_t for name", Offset); + Inline.Name = Data.getU32(&Offset); + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing ULEB128 for InlineInfo call file", Offset); + Inline.CallFile = (uint32_t)Data.getULEB128(&Offset); + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing ULEB128 for InlineInfo call line", Offset); + Inline.CallLine = (uint32_t)Data.getULEB128(&Offset); + if (HasChildren) { + // Child address ranges are encoded relative to the first address in the + // parent InlineInfo object. + const auto ChildBaseAddr = Inline.Ranges[0].Start; + while (true) { + llvm::Expected Child = decode(Data, Offset, ChildBaseAddr); + if (!Child) + return Child.takeError(); + // InlineInfo with empty Ranges termintes a child sibling chain. + if (Child.get().Ranges.empty()) + break; + Inline.Children.emplace_back(std::move(*Child)); + } + } + return Inline; +} + +llvm::Expected InlineInfo::decode(DataExtractor &Data, + uint64_t BaseAddr) { + uint64_t Offset = 0; + return ::decode(Data, Offset, BaseAddr); +} + +llvm::Error InlineInfo::encode(FileWriter &O, uint64_t BaseAddr) const { + // Users must verify the InlineInfo is valid prior to calling this funtion. + // We don't want to emit any InlineInfo objects if they are not valid since + // it will waste space in the GSYM file. + if (!isValid()) + return createStringError(std::errc::invalid_argument, + "attempted to encode invalid InlineInfo object"); + Ranges.encode(O, BaseAddr); + bool HasChildren = !Children.empty(); + O.writeU8(HasChildren); + O.writeU32(Name); + O.writeULEB(CallFile); + O.writeULEB(CallLine); + if (HasChildren) { + // Child address ranges are encoded as relative to the first + // address in the Ranges for this object. This keeps the offsets + // small and allows for efficient encoding using ULEB offsets. + const uint64_t ChildBaseAddr = Ranges[0].Start; + for (const auto &Child : Children) { + // Make sure all child address ranges are contained in the parent address + // ranges. + for (const auto &ChildRange: Child.Ranges) { + if (!Ranges.contains(ChildRange)) + return createStringError(std::errc::invalid_argument, + "child range not contained in parent"); + } + llvm::Error Err = Child.encode(O, ChildBaseAddr); + if (Err) + return Err; + } + + // Terminate child sibling chain by emitting a zero. This zero will cause + // the decodeAll() function above to return false and stop the decoding + // of child InlineInfo objects that are siblings. + O.writeULEB(0); + } + return Error::success(); +} diff --git a/lib/DebugInfo/GSYM/LineTable.cpp b/lib/DebugInfo/GSYM/LineTable.cpp new file mode 100644 index 00000000000..824c0041be9 --- /dev/null +++ b/lib/DebugInfo/GSYM/LineTable.cpp @@ -0,0 +1,287 @@ +//===- LineTable.cpp --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/LineTable.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/DataExtractor.h" + +using namespace llvm; +using namespace gsym; + +enum LineTableOpCode { + EndSequence = 0x00, ///< End of the line table. + SetFile = 0x01, ///< Set LineTableRow.file_idx, don't push a row. + AdvancePC = 0x02, ///< Increment LineTableRow.address, and push a row. + AdvanceLine = 0x03, ///< Set LineTableRow.file_line, don't push a row. + FirstSpecial = 0x04, ///< All special opcodes push a row. +}; + +struct DeltaInfo { + int64_t Delta; + uint32_t Count; + DeltaInfo(int64_t D, uint32_t C) : Delta(D), Count(C) {} +}; + +inline bool operator<(const DeltaInfo &LHS, int64_t Delta) { + return LHS.Delta < Delta; +} + +static bool encodeSpecial(int64_t MinLineDelta, int64_t MaxLineDelta, + int64_t LineDelta, uint64_t AddrDelta, + uint8_t &SpecialOp) { + if (LineDelta < MinLineDelta) + return false; + if (LineDelta > MaxLineDelta) + return false; + int64_t LineRange = MaxLineDelta - MinLineDelta + 1; + int64_t AdjustedOp = ((LineDelta - MinLineDelta) + AddrDelta * LineRange); + int64_t Op = AdjustedOp + FirstSpecial; + if (Op < 0) + return false; + if (Op > 255) + return false; + SpecialOp = (uint8_t)Op; + return true; +} + +typedef std::function LineEntryCallback; + +static llvm::Error parse(DataExtractor &Data, uint64_t BaseAddr, + LineEntryCallback const &Callback) { + uint64_t Offset = 0; + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing LineTable MinDelta", Offset); + int64_t MinDelta = Data.getSLEB128(&Offset); + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing LineTable MaxDelta", Offset); + int64_t MaxDelta = Data.getSLEB128(&Offset); + int64_t LineRange = MaxDelta - MinDelta + 1; + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": missing LineTable FirstLine", Offset); + const uint32_t FirstLine = (uint32_t)Data.getULEB128(&Offset); + LineEntry Row(BaseAddr, 1, FirstLine); + bool Done = false; + while (!Done) { + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": EOF found before EndSequence", Offset); + uint8_t Op = Data.getU8(&Offset); + switch (Op) { + case EndSequence: + Done = true; + break; + case SetFile: + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": EOF found before SetFile value", + Offset); + Row.File = (uint32_t)Data.getULEB128(&Offset); + break; + case AdvancePC: + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": EOF found before AdvancePC value", + Offset); + Row.Addr += Data.getULEB128(&Offset); + // If the function callback returns false, we stop parsing. + if (Callback(Row) == false) + return Error::success(); + break; + case AdvanceLine: + if (!Data.isValidOffset(Offset)) + return createStringError(std::errc::io_error, + "0x%8.8" PRIx64 ": EOF found before AdvanceLine value", + Offset); + Row.Line += Data.getSLEB128(&Offset); + break; + default: { + // A byte that contains both address and line increment. + uint8_t AdjustedOp = Op - FirstSpecial; + int64_t LineDelta = MinDelta + (AdjustedOp % LineRange); + uint64_t AddrDelta = (AdjustedOp / LineRange); + Row.Line += LineDelta; + Row.Addr += AddrDelta; + // If the function callback returns false, we stop parsing. + if (Callback(Row) == false) + return Error::success(); + break; + } + } + } + return Error::success(); +} + +llvm::Error LineTable::encode(FileWriter &Out, uint64_t BaseAddr) const { + // Users must verify the LineTable is valid prior to calling this funtion. + // We don't want to emit any LineTable objects if they are not valid since + // it will waste space in the GSYM file. + if (!isValid()) + return createStringError(std::errc::invalid_argument, + "attempted to encode invalid LineTable object"); + + int64_t MinLineDelta = INT64_MAX; + int64_t MaxLineDelta = INT64_MIN; + std::vector DeltaInfos; + if (Lines.size() == 1) { + MinLineDelta = 0; + MaxLineDelta = 0; + } else { + int64_t PrevLine = 1; + bool First = true; + for (const auto &line_entry : Lines) { + if (First) + First = false; + else { + int64_t LineDelta = (int64_t)line_entry.Line - PrevLine; + auto End = DeltaInfos.end(); + auto Pos = std::lower_bound(DeltaInfos.begin(), End, LineDelta); + if (Pos != End && Pos->Delta == LineDelta) + ++Pos->Count; + else + DeltaInfos.insert(Pos, DeltaInfo(LineDelta, 1)); + if (LineDelta < MinLineDelta) + MinLineDelta = LineDelta; + if (LineDelta > MaxLineDelta) + MaxLineDelta = LineDelta; + } + PrevLine = (int64_t)line_entry.Line; + } + assert(MinLineDelta <= MaxLineDelta); + } + // Set the min and max line delta intelligently based on the counts of + // the line deltas. if our range is too large. + const int64_t MaxLineRange = 14; + if (MaxLineDelta - MinLineDelta > MaxLineRange) { + uint32_t BestIndex = 0; + uint32_t BestEndIndex = 0; + uint32_t BestCount = 0; + const size_t NumDeltaInfos = DeltaInfos.size(); + for (uint32_t I = 0; I < NumDeltaInfos; ++I) { + const int64_t FirstDelta = DeltaInfos[I].Delta; + uint32_t CurrCount = 0; + uint32_t J; + for (J = I; J < NumDeltaInfos; ++J) { + auto LineRange = DeltaInfos[J].Delta - FirstDelta; + if (LineRange > MaxLineRange) + break; + CurrCount += DeltaInfos[J].Count; + } + if (CurrCount > BestCount) { + BestIndex = I; + BestEndIndex = J - 1; + BestCount = CurrCount; + } + } + MinLineDelta = DeltaInfos[BestIndex].Delta; + MaxLineDelta = DeltaInfos[BestEndIndex].Delta; + } + if (MinLineDelta == MaxLineDelta && MinLineDelta > 0 && + MinLineDelta < MaxLineRange) + MinLineDelta = 0; + assert(MinLineDelta <= MaxLineDelta); + + // Initialize the line entry state as a starting point. All line entries + // will be deltas from this. + LineEntry Prev(BaseAddr, 1, Lines.front().Line); + + // Write out the min and max line delta as signed LEB128. + Out.writeSLEB(MinLineDelta); + Out.writeSLEB(MaxLineDelta); + // Write out the starting line number as a unsigned LEB128. + Out.writeULEB(Prev.Line); + + for (const auto &Curr : Lines) { + if (Curr.Addr < BaseAddr) + return createStringError(std::errc::invalid_argument, + "LineEntry has address 0x%" PRIx64 " which is " + "less than the function start address 0x%" + PRIx64, Curr.Addr, BaseAddr); + if (Curr.Addr < Prev.Addr) + return createStringError(std::errc::invalid_argument, + "LineEntry in LineTable not in ascending order"); + const uint64_t AddrDelta = Curr.Addr - Prev.Addr; + int64_t LineDelta = 0; + if (Curr.Line > Prev.Line) + LineDelta = Curr.Line - Prev.Line; + else if (Prev.Line > Curr.Line) + LineDelta = -((int32_t)(Prev.Line - Curr.Line)); + + // Set the file if it doesn't match the current one. + if (Curr.File != Prev.File) { + Out.writeU8(SetFile); + Out.writeULEB(Curr.File); + } + + uint8_t SpecialOp; + if (encodeSpecial(MinLineDelta, MaxLineDelta, LineDelta, AddrDelta, + SpecialOp)) { + // Advance the PC and line and push a row. + Out.writeU8(SpecialOp); + } else { + // We can't encode the address delta and line delta into + // a single special opcode, we must do them separately. + + // Advance the line. + if (LineDelta != 0) { + Out.writeU8(AdvanceLine); + Out.writeSLEB(LineDelta); + } + + // Advance the PC and push a row. + Out.writeU8(AdvancePC); + Out.writeULEB(AddrDelta); + } + Prev = Curr; + } + Out.writeU8(EndSequence); + return Error::success(); +} + +// Parse all line table entries into the "LineTable" vector. We can +// cache the results of this if needed, or we can call LineTable::lookup() +// below. +llvm::Expected LineTable::decode(DataExtractor &Data, + uint64_t BaseAddr) { + LineTable LT; + llvm::Error Err = parse(Data, BaseAddr, [&](const LineEntry &Row) -> bool { + LT.Lines.push_back(Row); + return true; // Keep parsing by returning true. + }); + if (Err) + return std::move(Err); + return LT; +} +// Parse the line table on the fly and find the row we are looking for. +// We will need to determine if we need to cache the line table by calling +// LineTable::parseAllEntries(...) or just call this function each time. +// There is a CPU vs memory tradeoff we will need to determine. +LineEntry LineTable::lookup(DataExtractor &Data, uint64_t BaseAddr, uint64_t Addr) { + LineEntry Result; + llvm::Error Err = parse(Data, BaseAddr, + [Addr, &Result](const LineEntry &Row) -> bool { + if (Addr < Row.Addr) + return false; // Stop parsing, result contains the line table row! + Result = Row; + if (Addr == Row.Addr) { + // Stop parsing, this is the row we are looking for since the address + // matches. + return false; + } + return true; // Keep parsing till we find the right row. + }); + return Result; +} + +raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LineTable <) { + for (const auto &LineEntry : LT) + OS << LineEntry << '\n'; + return OS; +} diff --git a/lib/DebugInfo/GSYM/Range.cpp b/lib/DebugInfo/GSYM/Range.cpp index ca61984dacb..19ab700fdd5 100644 --- a/lib/DebugInfo/GSYM/Range.cpp +++ b/lib/DebugInfo/GSYM/Range.cpp @@ -8,6 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/DataExtractor.h" #include #include @@ -40,6 +42,17 @@ bool AddressRanges::contains(uint64_t Addr) const { return It != Ranges.begin() && Addr < It[-1].End; } +bool AddressRanges::contains(AddressRange Range) const { + if (Range.size() == 0) + return false; + auto It = std::partition_point( + Ranges.begin(), Ranges.end(), + [=](const AddressRange &R) { return R.Start <= Range.Start; }); + if (It == Ranges.begin()) + return false; + return Range.End <= It[-1].End; +} + raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRange &R) { return OS << '[' << HEX64(R.Start) << " - " << HEX64(R.End) << ")"; } @@ -53,3 +66,37 @@ raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRanges &AR) { } return OS; } + +void AddressRange::encode(FileWriter &O, uint64_t BaseAddr) const { + assert(Start >= BaseAddr); + O.writeULEB(Start - BaseAddr); + O.writeULEB(size()); +} + +void AddressRange::decode(DataExtractor &Data, uint64_t BaseAddr, + uint64_t &Offset) { + const uint64_t AddrOffset = Data.getULEB128(&Offset); + const uint64_t Size = Data.getULEB128(&Offset); + const uint64_t StartAddr = BaseAddr + AddrOffset; + Start = StartAddr; + End = StartAddr + Size; +} + +void AddressRanges::encode(FileWriter &O, uint64_t BaseAddr) const { + O.writeULEB(Ranges.size()); + if (Ranges.empty()) + return; + for (auto Range : Ranges) + Range.encode(O, BaseAddr); +} + +void AddressRanges::decode(DataExtractor &Data, uint64_t BaseAddr, + uint64_t &Offset) { + clear(); + uint64_t NumRanges = Data.getULEB128(&Offset); + if (NumRanges == 0) + return; + Ranges.resize(NumRanges); + for (auto &Range : Ranges) + Range.decode(Data, BaseAddr, Offset); +} diff --git a/lib/DebugInfo/MSF/MappedBlockStream.cpp b/lib/DebugInfo/MSF/MappedBlockStream.cpp index df925771f0d..5dc9c86b34f 100644 --- a/lib/DebugInfo/MSF/MappedBlockStream.cpp +++ b/lib/DebugInfo/MSF/MappedBlockStream.cpp @@ -52,7 +52,7 @@ MappedBlockStream::MappedBlockStream(uint32_t BlockSize, std::unique_ptr MappedBlockStream::createStream( uint32_t BlockSize, const MSFStreamLayout &Layout, BinaryStreamRef MsfData, BumpPtrAllocator &Allocator) { - return llvm::make_unique>( + return std::make_unique>( BlockSize, Layout, MsfData, Allocator); } @@ -63,7 +63,7 @@ std::unique_ptr MappedBlockStream::createIndexedStream( MSFStreamLayout SL; SL.Blocks = Layout.StreamMap[StreamIndex]; SL.Length = Layout.StreamSizes[StreamIndex]; - return llvm::make_unique>( + return std::make_unique>( Layout.SB->BlockSize, SL, MsfData, Allocator); } @@ -318,7 +318,7 @@ WritableMappedBlockStream::createStream(uint32_t BlockSize, const MSFStreamLayout &Layout, WritableBinaryStreamRef MsfData, BumpPtrAllocator &Allocator) { - return llvm::make_unique>( + return std::make_unique>( BlockSize, Layout, MsfData, Allocator); } diff --git a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp index a8ae076e1d6..c2552f55703 100644 --- a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp +++ b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp @@ -405,7 +405,7 @@ DIARawSymbol::findChildren(PDB_SymType Type) const { return nullptr; } - return llvm::make_unique(Session, DiaEnumerator); + return std::make_unique(Session, DiaEnumerator); } std::unique_ptr @@ -423,7 +423,7 @@ DIARawSymbol::findChildren(PDB_SymType Type, StringRef Name, Symbol->findChildrenEx(EnumVal, Name16Str, CompareFlags, &DiaEnumerator)) return nullptr; - return llvm::make_unique(Session, DiaEnumerator); + return std::make_unique(Session, DiaEnumerator); } std::unique_ptr @@ -443,7 +443,7 @@ DIARawSymbol::findChildrenByAddr(PDB_SymType Type, StringRef Name, Section, Offset, &DiaEnumerator)) return nullptr; - return llvm::make_unique(Session, DiaEnumerator); + return std::make_unique(Session, DiaEnumerator); } std::unique_ptr @@ -462,7 +462,7 @@ DIARawSymbol::findChildrenByVA(PDB_SymType Type, StringRef Name, &DiaEnumerator)) return nullptr; - return llvm::make_unique(Session, DiaEnumerator); + return std::make_unique(Session, DiaEnumerator); } std::unique_ptr @@ -480,7 +480,7 @@ DIARawSymbol::findChildrenByRVA(PDB_SymType Type, StringRef Name, &DiaEnumerator)) return nullptr; - return llvm::make_unique(Session, DiaEnumerator); + return std::make_unique(Session, DiaEnumerator); } std::unique_ptr @@ -489,7 +489,7 @@ DIARawSymbol::findInlineFramesByAddr(uint32_t Section, uint32_t Offset) const { if (S_OK != Symbol->findInlineFramesByAddr(Section, Offset, &DiaEnumerator)) return nullptr; - return llvm::make_unique(Session, DiaEnumerator); + return std::make_unique(Session, DiaEnumerator); } std::unique_ptr @@ -498,7 +498,7 @@ DIARawSymbol::findInlineFramesByRVA(uint32_t RVA) const { if (S_OK != Symbol->findInlineFramesByRVA(RVA, &DiaEnumerator)) return nullptr; - return llvm::make_unique(Session, DiaEnumerator); + return std::make_unique(Session, DiaEnumerator); } std::unique_ptr @@ -507,7 +507,7 @@ DIARawSymbol::findInlineFramesByVA(uint64_t VA) const { if (S_OK != Symbol->findInlineFramesByVA(VA, &DiaEnumerator)) return nullptr; - return llvm::make_unique(Session, DiaEnumerator); + return std::make_unique(Session, DiaEnumerator); } std::unique_ptr DIARawSymbol::findInlineeLines() const { @@ -515,7 +515,7 @@ std::unique_ptr DIARawSymbol::findInlineeLines() const { if (S_OK != Symbol->findInlineeLines(&DiaEnumerator)) return nullptr; - return llvm::make_unique(DiaEnumerator); + return std::make_unique(DiaEnumerator); } std::unique_ptr @@ -526,7 +526,7 @@ DIARawSymbol::findInlineeLinesByAddr(uint32_t Section, uint32_t Offset, Symbol->findInlineeLinesByAddr(Section, Offset, Length, &DiaEnumerator)) return nullptr; - return llvm::make_unique(DiaEnumerator); + return std::make_unique(DiaEnumerator); } std::unique_ptr @@ -535,7 +535,7 @@ DIARawSymbol::findInlineeLinesByRVA(uint32_t RVA, uint32_t Length) const { if (S_OK != Symbol->findInlineeLinesByRVA(RVA, Length, &DiaEnumerator)) return nullptr; - return llvm::make_unique(DiaEnumerator); + return std::make_unique(DiaEnumerator); } std::unique_ptr @@ -544,7 +544,7 @@ DIARawSymbol::findInlineeLinesByVA(uint64_t VA, uint32_t Length) const { if (S_OK != Symbol->findInlineeLinesByVA(VA, Length, &DiaEnumerator)) return nullptr; - return llvm::make_unique(DiaEnumerator); + return std::make_unique(DiaEnumerator); } void DIARawSymbol::getDataBytes(llvm::SmallVector &bytes) const { @@ -776,7 +776,7 @@ std::unique_ptr DIARawSymbol::getSrcLineOnTypeDefn() const { if (FAILED(Symbol->getSrcLineOnTypeDefn(&LineNumber)) || !LineNumber) return nullptr; - return llvm::make_unique(LineNumber); + return std::make_unique(LineNumber); } uint32_t DIARawSymbol::getStride() const { @@ -871,7 +871,7 @@ DIARawSymbol::getVirtualBaseTableType() const { if (FAILED(Symbol->get_virtualBaseTableType(&TableType)) || !TableType) return nullptr; - auto RawVT = llvm::make_unique(Session, TableType); + auto RawVT = std::make_unique(Session, TableType); auto Pointer = PDBSymbol::createAs(Session, std::move(RawVT)); return unique_dyn_cast(Pointer->getPointeeType()); diff --git a/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp b/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp index e2d928f2c4b..4f0e078e671 100644 --- a/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp +++ b/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp @@ -23,7 +23,7 @@ std::unique_ptr DIASectionContrib::getCompiland() const { if (FAILED(Section->get_compiland(&Symbol))) return nullptr; - auto RawSymbol = llvm::make_unique(Session, Symbol); + auto RawSymbol = std::make_unique(Session, Symbol); return PDBSymbol::createAs(Session, std::move(RawSymbol)); } diff --git a/lib/DebugInfo/PDB/DIA/DIASession.cpp b/lib/DebugInfo/PDB/DIA/DIASession.cpp index 4e0b8587c61..64ffa776bbd 100644 --- a/lib/DebugInfo/PDB/DIA/DIASession.cpp +++ b/lib/DebugInfo/PDB/DIA/DIASession.cpp @@ -73,15 +73,7 @@ static Error LoadDIA(CComPtr &DiaDataSource) { #if !defined(_MSC_VER) return llvm::make_error(pdb_error_code::dia_failed_loading); #else - const wchar_t *msdia_dll = nullptr; -#if _MSC_VER >= 1900 && _MSC_VER < 2000 - msdia_dll = L"msdia140.dll"; // VS2015 -#elif _MSC_VER >= 1800 - msdia_dll = L"msdia120.dll"; // VS2013 -#else -#error "Unknown Visual Studio version." -#endif - + const wchar_t *msdia_dll = L"msdia140.dll"; HRESULT HR; if (FAILED(HR = NoRegCoCreate(msdia_dll, CLSID_DiaSource, IID_IDiaDataSource, reinterpret_cast(&DiaDataSource)))) @@ -158,7 +150,7 @@ std::unique_ptr DIASession::getGlobalScope() { if (S_OK != Session->get_globalScope(&GlobalScope)) return nullptr; - auto RawSymbol = llvm::make_unique(*this, GlobalScope); + auto RawSymbol = std::make_unique(*this, GlobalScope); auto PdbSymbol(PDBSymbol::create(*this, std::move(RawSymbol))); std::unique_ptr ExeSymbol( static_cast(PdbSymbol.release())); @@ -193,7 +185,7 @@ DIASession::getSymbolById(SymIndexId SymbolId) const { if (S_OK != Session->symbolById(SymbolId, &LocatedSymbol)) return nullptr; - auto RawSymbol = llvm::make_unique(*this, LocatedSymbol); + auto RawSymbol = std::make_unique(*this, LocatedSymbol); return PDBSymbol::create(*this, std::move(RawSymbol)); } @@ -210,7 +202,7 @@ DIASession::findSymbolByAddress(uint64_t Address, PDB_SymType Type) const { if (S_OK != Session->findSymbolByRVA(RVA, EnumVal, &Symbol)) return nullptr; } - auto RawSymbol = llvm::make_unique(*this, Symbol); + auto RawSymbol = std::make_unique(*this, Symbol); return PDBSymbol::create(*this, std::move(RawSymbol)); } @@ -222,7 +214,7 @@ std::unique_ptr DIASession::findSymbolByRVA(uint32_t RVA, if (S_OK != Session->findSymbolByRVA(RVA, EnumVal, &Symbol)) return nullptr; - auto RawSymbol = llvm::make_unique(*this, Symbol); + auto RawSymbol = std::make_unique(*this, Symbol); return PDBSymbol::create(*this, std::move(RawSymbol)); } @@ -235,7 +227,7 @@ DIASession::findSymbolBySectOffset(uint32_t Sect, uint32_t Offset, if (S_OK != Session->findSymbolByAddr(Sect, Offset, EnumVal, &Symbol)) return nullptr; - auto RawSymbol = llvm::make_unique(*this, Symbol); + auto RawSymbol = std::make_unique(*this, Symbol); return PDBSymbol::create(*this, std::move(RawSymbol)); } @@ -251,7 +243,7 @@ DIASession::findLineNumbers(const PDBSymbolCompiland &Compiland, RawFile.getDiaFile(), &LineNumbers)) return nullptr; - return llvm::make_unique(LineNumbers); + return std::make_unique(LineNumbers); } std::unique_ptr @@ -265,7 +257,7 @@ DIASession::findLineNumbersByAddress(uint64_t Address, uint32_t Length) const { if (S_OK != Session->findLinesByRVA(RVA, Length, &LineNumbers)) return nullptr; } - return llvm::make_unique(LineNumbers); + return std::make_unique(LineNumbers); } std::unique_ptr @@ -274,7 +266,7 @@ DIASession::findLineNumbersByRVA(uint32_t RVA, uint32_t Length) const { if (S_OK != Session->findLinesByRVA(RVA, Length, &LineNumbers)) return nullptr; - return llvm::make_unique(LineNumbers); + return std::make_unique(LineNumbers); } std::unique_ptr @@ -284,7 +276,7 @@ DIASession::findLineNumbersBySectOffset(uint32_t Section, uint32_t Offset, if (S_OK != Session->findLinesByAddr(Section, Offset, Length, &LineNumbers)) return nullptr; - return llvm::make_unique(LineNumbers); + return std::make_unique(LineNumbers); } std::unique_ptr @@ -306,7 +298,7 @@ DIASession::findSourceFiles(const PDBSymbolCompiland *Compiland, if (S_OK != Session->findFile(DiaCompiland, Utf16Pattern.m_str, Flags, &SourceFiles)) return nullptr; - return llvm::make_unique(*this, SourceFiles); + return std::make_unique(*this, SourceFiles); } std::unique_ptr @@ -342,7 +334,7 @@ std::unique_ptr DIASession::getAllSourceFiles() const { if (S_OK != Session->findFile(nullptr, nullptr, nsNone, &Files)) return nullptr; - return llvm::make_unique(*this, Files); + return std::make_unique(*this, Files); } std::unique_ptr DIASession::getSourceFilesForCompiland( @@ -355,7 +347,7 @@ std::unique_ptr DIASession::getSourceFilesForCompiland( Session->findFile(RawSymbol.getDiaSymbol(), nullptr, nsNone, &Files)) return nullptr; - return llvm::make_unique(*this, Files); + return std::make_unique(*this, Files); } std::unique_ptr @@ -364,7 +356,7 @@ DIASession::getSourceFileById(uint32_t FileId) const { if (S_OK != Session->findFileById(FileId, &LocatedFile)) return nullptr; - return llvm::make_unique(*this, LocatedFile); + return std::make_unique(*this, LocatedFile); } std::unique_ptr DIASession::getDebugStreams() const { @@ -372,7 +364,7 @@ std::unique_ptr DIASession::getDebugStreams() const { if (S_OK != Session->getEnumDebugStreams(&DiaEnumerator)) return nullptr; - return llvm::make_unique(DiaEnumerator); + return std::make_unique(DiaEnumerator); } std::unique_ptr DIASession::getEnumTables() const { @@ -380,7 +372,7 @@ std::unique_ptr DIASession::getEnumTables() const { if (S_OK != Session->getEnumTables(&DiaEnumerator)) return nullptr; - return llvm::make_unique(DiaEnumerator); + return std::make_unique(DiaEnumerator); } template static CComPtr getTableEnumerator(IDiaSession &Session) { @@ -407,7 +399,7 @@ DIASession::getInjectedSources() const { if (!Files) return nullptr; - return llvm::make_unique(Files); + return std::make_unique(Files); } std::unique_ptr @@ -417,7 +409,7 @@ DIASession::getSectionContribs() const { if (!Sections) return nullptr; - return llvm::make_unique(*this, Sections); + return std::make_unique(*this, Sections); } std::unique_ptr @@ -427,5 +419,5 @@ DIASession::getFrameData() const { if (!FD) return nullptr; - return llvm::make_unique(FD); + return std::make_unique(FD); } diff --git a/lib/DebugInfo/PDB/GenericError.cpp b/lib/DebugInfo/PDB/GenericError.cpp index 70dc094c42e..0e4cba3174b 100644 --- a/lib/DebugInfo/PDB/GenericError.cpp +++ b/lib/DebugInfo/PDB/GenericError.cpp @@ -34,8 +34,8 @@ public: return "The PDB file path is an invalid UTF8 sequence."; case pdb_error_code::signature_out_of_date: return "The signature does not match; the file(s) might be out of date."; - case pdb_error_code::external_cmdline_ref: - return "The path to this file must be provided on the command-line."; + case pdb_error_code::no_matching_pch: + return "No matching precompiled header could be located."; } llvm_unreachable("Unrecognized generic_error_code"); } diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp index 20b6c614254..419734771cc 100644 --- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -180,12 +180,12 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter, void DbiModuleDescriptorBuilder::addDebugSubsection( std::shared_ptr Subsection) { assert(Subsection); - C13Builders.push_back(llvm::make_unique( + C13Builders.push_back(std::make_unique( std::move(Subsection), CodeViewContainer::Pdb)); } void DbiModuleDescriptorBuilder::addDebugSubsection( const DebugSubsectionRecord &SubsectionContents) { - C13Builders.push_back(llvm::make_unique( + C13Builders.push_back(std::make_unique( SubsectionContents, CodeViewContainer::Pdb)); } diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp index b7ade0072ee..0e00c2f7ff9 100644 --- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp @@ -114,7 +114,7 @@ Expected DbiStreamBuilder::addModuleInfo(StringRef ModuleName) { uint32_t Index = ModiList.size(); ModiList.push_back( - llvm::make_unique(ModuleName, Index, Msf)); + std::make_unique(ModuleName, Index, Msf)); return *ModiList.back(); } diff --git a/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp index 8ed5b8b44c5..432f1e9b24d 100644 --- a/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp @@ -183,8 +183,8 @@ void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) { } GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf) - : Msf(Msf), PSH(llvm::make_unique()), - GSH(llvm::make_unique()) {} + : Msf(Msf), PSH(std::make_unique()), + GSH(std::make_unique()) {} GSIStreamBuilder::~GSIStreamBuilder() {} diff --git a/lib/DebugInfo/PDB/Native/Hash.cpp b/lib/DebugInfo/PDB/Native/Hash.cpp index b5c139ecbec..7fb6b4bd5d3 100644 --- a/lib/DebugInfo/PDB/Native/Hash.cpp +++ b/lib/DebugInfo/PDB/Native/Hash.cpp @@ -8,8 +8,8 @@ #include "llvm/DebugInfo/PDB/Native/Hash.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/CRC.h" #include "llvm/Support/Endian.h" -#include "llvm/Support/JamCRC.h" #include using namespace llvm; @@ -79,7 +79,6 @@ uint32_t pdb::hashStringV2(StringRef Str) { // Corresponds to `SigForPbCb` in langapi/shared/crc32.h. uint32_t pdb::hashBufferV8(ArrayRef Buf) { JamCRC JC(/*Init=*/0U); - JC.update(makeArrayRef(reinterpret_cast(Buf.data()), - Buf.size())); + JC.update(Buf); return JC.getCRC(); } diff --git a/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp b/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp index f17ff5bb01f..2f6a5bc3d57 100644 --- a/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp +++ b/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp @@ -46,30 +46,31 @@ public: uint64_t getCodeByteSize() const override { return Entry.FileSize; } std::string getFileName() const override { - auto Name = Strings.getStringForID(Entry.FileNI); - assert(Name && "InjectedSourceStream should have rejected this"); - return *Name; + StringRef Ret = cantFail(Strings.getStringForID(Entry.FileNI), + "InjectedSourceStream should have rejected this"); + return Ret; } std::string getObjectFileName() const override { - auto ObjName = Strings.getStringForID(Entry.ObjNI); - assert(ObjName && "InjectedSourceStream should have rejected this"); - return *ObjName; + StringRef Ret = cantFail(Strings.getStringForID(Entry.ObjNI), + "InjectedSourceStream should have rejected this"); + return Ret; } std::string getVirtualFileName() const override { - auto VName = Strings.getStringForID(Entry.VFileNI); - assert(VName && "InjectedSourceStream should have rejected this"); - return *VName; + StringRef Ret = cantFail(Strings.getStringForID(Entry.VFileNI), + "InjectedSourceStream should have rejected this"); + return Ret; } uint32_t getCompression() const override { return Entry.Compression; } std::string getCode() const override { // Get name of stream storing the data. - auto VName = Strings.getStringForID(Entry.VFileNI); - assert(VName && "InjectedSourceStream should have rejected this"); - std::string StreamName = ("/src/files/" + *VName).str(); + StringRef VName = + cantFail(Strings.getStringForID(Entry.VFileNI), + "InjectedSourceStream should have rejected this"); + std::string StreamName = ("/src/files/" + VName).str(); // Find stream with that name and read its data. // FIXME: Consider validating (or even loading) all this in @@ -104,14 +105,14 @@ std::unique_ptr NativeEnumInjectedSources::getChildAtIndex(uint32_t N) const { if (N >= getChildCount()) return nullptr; - return make_unique(std::next(Stream.begin(), N)->second, + return std::make_unique(std::next(Stream.begin(), N)->second, File, Strings); } std::unique_ptr NativeEnumInjectedSources::getNext() { if (Cur == Stream.end()) return nullptr; - return make_unique((Cur++)->second, File, Strings); + return std::make_unique((Cur++)->second, File, Strings); } void NativeEnumInjectedSources::reset() { Cur = Stream.begin(); } diff --git a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp index 8e43cf24495..2ad552470b6 100644 --- a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp +++ b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp @@ -30,68 +30,68 @@ void NativeRawSymbol::dump(raw_ostream &OS, int Indent, std::unique_ptr NativeRawSymbol::findChildren(PDB_SymType Type) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findChildren(PDB_SymType Type, StringRef Name, PDB_NameSearchFlags Flags) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findChildrenByAddr(PDB_SymType Type, StringRef Name, PDB_NameSearchFlags Flags, uint32_t Section, uint32_t Offset) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findChildrenByVA(PDB_SymType Type, StringRef Name, PDB_NameSearchFlags Flags, uint64_t VA) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findChildrenByRVA(PDB_SymType Type, StringRef Name, PDB_NameSearchFlags Flags, uint32_t RVA) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findInlineFramesByAddr(uint32_t Section, uint32_t Offset) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findInlineFramesByRVA(uint32_t RVA) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findInlineFramesByVA(uint64_t VA) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findInlineeLines() const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findInlineeLinesByAddr(uint32_t Section, uint32_t Offset, uint32_t Length) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findInlineeLinesByRVA(uint32_t RVA, uint32_t Length) const { - return llvm::make_unique>(); + return std::make_unique>(); } std::unique_ptr NativeRawSymbol::findInlineeLinesByVA(uint64_t VA, uint32_t Length) const { - return llvm::make_unique>(); + return std::make_unique>(); } void NativeRawSymbol::getDataBytes(SmallVector &bytes) const { diff --git a/lib/DebugInfo/PDB/Native/NativeSession.cpp b/lib/DebugInfo/PDB/Native/NativeSession.cpp index 8a49cb1c596..b45a5881dcb 100644 --- a/lib/DebugInfo/PDB/Native/NativeSession.cpp +++ b/lib/DebugInfo/PDB/Native/NativeSession.cpp @@ -59,18 +59,18 @@ NativeSession::~NativeSession() = default; Error NativeSession::createFromPdb(std::unique_ptr Buffer, std::unique_ptr &Session) { StringRef Path = Buffer->getBufferIdentifier(); - auto Stream = llvm::make_unique( + auto Stream = std::make_unique( std::move(Buffer), llvm::support::little); - auto Allocator = llvm::make_unique(); - auto File = llvm::make_unique(Path, std::move(Stream), *Allocator); + auto Allocator = std::make_unique(); + auto File = std::make_unique(Path, std::move(Stream), *Allocator); if (auto EC = File->parseFileHeaders()) return EC; if (auto EC = File->parseStreamData()) return EC; Session = - llvm::make_unique(std::move(File), std::move(Allocator)); + std::make_unique(std::move(File), std::move(Allocator)); return Error::success(); } @@ -202,7 +202,7 @@ NativeSession::getInjectedSources() const { consumeError(Strings.takeError()); return nullptr; } - return make_unique(*Pdb, *ISS, *Strings); + return std::make_unique(*Pdb, *ISS, *Strings); } std::unique_ptr diff --git a/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp b/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp index 9f5e86281a2..26ccb7daece 100644 --- a/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp +++ b/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp @@ -163,14 +163,14 @@ void NativeTypeEnum::dump(raw_ostream &OS, int Indent, std::unique_ptr NativeTypeEnum::findChildren(PDB_SymType Type) const { if (Type != PDB_SymType::Data) - return llvm::make_unique>(); + return std::make_unique>(); const NativeTypeEnum *ClassParent = nullptr; if (!Modifiers) ClassParent = this; else ClassParent = UnmodifiedType; - return llvm::make_unique(Session, *ClassParent); + return std::make_unique(Session, *ClassParent); } PDB_SymType NativeTypeEnum::getSymTag() const { return PDB_SymType::Enum; } diff --git a/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp b/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp index 405303469c1..f98a4c3043e 100644 --- a/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp +++ b/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp @@ -65,7 +65,7 @@ private: std::unique_ptr wrap(std::unique_ptr S) const { if (!S) return nullptr; - auto NTFA = llvm::make_unique(Session, std::move(S)); + auto NTFA = std::make_unique(Session, std::move(S)); return PDBSymbol::create(Session, std::move(NTFA)); } NativeSession &Session; @@ -133,9 +133,9 @@ void NativeTypeFunctionSig::dump(raw_ostream &OS, int Indent, std::unique_ptr NativeTypeFunctionSig::findChildren(PDB_SymType Type) const { if (Type != PDB_SymType::FunctionArg) - return llvm::make_unique>(); + return std::make_unique>(); - auto NET = llvm::make_unique(Session, + auto NET = std::make_unique(Session, /* copy */ ArgList.ArgIndices); return std::unique_ptr( new NativeEnumFunctionArgs(Session, std::move(NET))); diff --git a/lib/DebugInfo/PDB/Native/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp index 983031dfcb7..9ac226b8913 100644 --- a/lib/DebugInfo/PDB/Native/PDBFile.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp @@ -264,7 +264,7 @@ Expected PDBFile::getPDBGlobalsStream() { safelyCreateIndexedStream(DbiS->getGlobalSymbolStreamIndex()); if (!GlobalS) return GlobalS.takeError(); - auto TempGlobals = llvm::make_unique(std::move(*GlobalS)); + auto TempGlobals = std::make_unique(std::move(*GlobalS)); if (auto EC = TempGlobals->reload()) return std::move(EC); Globals = std::move(TempGlobals); @@ -277,7 +277,7 @@ Expected PDBFile::getPDBInfoStream() { auto InfoS = safelyCreateIndexedStream(StreamPDB); if (!InfoS) return InfoS.takeError(); - auto TempInfo = llvm::make_unique(std::move(*InfoS)); + auto TempInfo = std::make_unique(std::move(*InfoS)); if (auto EC = TempInfo->reload()) return std::move(EC); Info = std::move(TempInfo); @@ -290,7 +290,7 @@ Expected PDBFile::getPDBDbiStream() { auto DbiS = safelyCreateIndexedStream(StreamDBI); if (!DbiS) return DbiS.takeError(); - auto TempDbi = llvm::make_unique(std::move(*DbiS)); + auto TempDbi = std::make_unique(std::move(*DbiS)); if (auto EC = TempDbi->reload(this)) return std::move(EC); Dbi = std::move(TempDbi); @@ -303,7 +303,7 @@ Expected PDBFile::getPDBTpiStream() { auto TpiS = safelyCreateIndexedStream(StreamTPI); if (!TpiS) return TpiS.takeError(); - auto TempTpi = llvm::make_unique(*this, std::move(*TpiS)); + auto TempTpi = std::make_unique(*this, std::move(*TpiS)); if (auto EC = TempTpi->reload()) return std::move(EC); Tpi = std::move(TempTpi); @@ -319,7 +319,7 @@ Expected PDBFile::getPDBIpiStream() { auto IpiS = safelyCreateIndexedStream(StreamIPI); if (!IpiS) return IpiS.takeError(); - auto TempIpi = llvm::make_unique(*this, std::move(*IpiS)); + auto TempIpi = std::make_unique(*this, std::move(*IpiS)); if (auto EC = TempIpi->reload()) return std::move(EC); Ipi = std::move(TempIpi); @@ -337,7 +337,7 @@ Expected PDBFile::getPDBPublicsStream() { safelyCreateIndexedStream(DbiS->getPublicSymbolStreamIndex()); if (!PublicS) return PublicS.takeError(); - auto TempPublics = llvm::make_unique(std::move(*PublicS)); + auto TempPublics = std::make_unique(std::move(*PublicS)); if (auto EC = TempPublics->reload()) return std::move(EC); Publics = std::move(TempPublics); @@ -356,7 +356,7 @@ Expected PDBFile::getPDBSymbolStream() { if (!SymbolS) return SymbolS.takeError(); - auto TempSymbols = llvm::make_unique(std::move(*SymbolS)); + auto TempSymbols = std::make_unique(std::move(*SymbolS)); if (auto EC = TempSymbols->reload()) return std::move(EC); Symbols = std::move(TempSymbols); @@ -370,7 +370,7 @@ Expected PDBFile::getStringTable() { if (!NS) return NS.takeError(); - auto N = llvm::make_unique(); + auto N = std::make_unique(); BinaryStreamReader Reader(**NS); if (auto EC = N->reload(Reader)) return std::move(EC); @@ -391,7 +391,7 @@ Expected PDBFile::getInjectedSourceStream() { if (!Strings) return Strings.takeError(); - auto IJ = llvm::make_unique(std::move(*IJS)); + auto IJ = std::make_unique(std::move(*IJS)); if (auto EC = IJ->reload(*Strings)) return std::move(EC); InjectedSources = std::move(IJ); diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index 8f5a048ea4b..aa328872439 100644 --- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -22,7 +22,7 @@ #include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h" #include "llvm/Support/BinaryStream.h" #include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/JamCRC.h" +#include "llvm/Support/CRC.h" #include "llvm/Support/Path.h" #include "llvm/Support/xxhash.h" @@ -42,7 +42,7 @@ Error PDBFileBuilder::initialize(uint32_t BlockSize) { auto ExpectedMsf = MSFBuilder::create(Allocator, BlockSize); if (!ExpectedMsf) return ExpectedMsf.takeError(); - Msf = llvm::make_unique(std::move(*ExpectedMsf)); + Msf = std::make_unique(std::move(*ExpectedMsf)); return Error::success(); } @@ -50,25 +50,25 @@ MSFBuilder &PDBFileBuilder::getMsfBuilder() { return *Msf; } InfoStreamBuilder &PDBFileBuilder::getInfoBuilder() { if (!Info) - Info = llvm::make_unique(*Msf, NamedStreams); + Info = std::make_unique(*Msf, NamedStreams); return *Info; } DbiStreamBuilder &PDBFileBuilder::getDbiBuilder() { if (!Dbi) - Dbi = llvm::make_unique(*Msf); + Dbi = std::make_unique(*Msf); return *Dbi; } TpiStreamBuilder &PDBFileBuilder::getTpiBuilder() { if (!Tpi) - Tpi = llvm::make_unique(*Msf, StreamTPI); + Tpi = std::make_unique(*Msf, StreamTPI); return *Tpi; } TpiStreamBuilder &PDBFileBuilder::getIpiBuilder() { if (!Ipi) - Ipi = llvm::make_unique(*Msf, StreamIPI); + Ipi = std::make_unique(*Msf, StreamIPI); return *Ipi; } @@ -78,7 +78,7 @@ PDBStringTableBuilder &PDBFileBuilder::getStringTableBuilder() { GSIStreamBuilder &PDBFileBuilder::getGsiBuilder() { if (!Gsi) - Gsi = llvm::make_unique(*Msf); + Gsi = std::make_unique(*Msf); return *Gsi; } @@ -174,8 +174,7 @@ Error PDBFileBuilder::finalizeMsfLayout() { if (!InjectedSources.empty()) { for (const auto &IS : InjectedSources) { JamCRC CRC(0); - CRC.update(makeArrayRef(IS.Content->getBufferStart(), - IS.Content->getBufferSize())); + CRC.update(arrayRefFromStringRef(IS.Content->getBuffer())); SrcHeaderBlockEntry Entry; ::memset(&Entry, 0, sizeof(SrcHeaderBlockEntry)); diff --git a/lib/DebugInfo/PDB/Native/TpiHashing.cpp b/lib/DebugInfo/PDB/Native/TpiHashing.cpp index b21b82bf76f..b71b2b15814 100644 --- a/lib/DebugInfo/PDB/Native/TpiHashing.cpp +++ b/lib/DebugInfo/PDB/Native/TpiHashing.cpp @@ -10,7 +10,7 @@ #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/PDB/Native/Hash.h" -#include "llvm/Support/JamCRC.h" +#include "llvm/Support/CRC.h" using namespace llvm; using namespace llvm::codeview; @@ -124,8 +124,6 @@ Expected llvm::pdb::hashTypeRecord(const CVType &Rec) { // Run CRC32 over the bytes. This corresponds to `hashBufv8`. JamCRC JC(/*Init=*/0U); - ArrayRef Bytes(reinterpret_cast(Rec.data().data()), - Rec.data().size()); - JC.update(Bytes); + JC.update(Rec.data()); return JC.getCRC(); } diff --git a/lib/DebugInfo/PDB/Native/TpiStream.cpp b/lib/DebugInfo/PDB/Native/TpiStream.cpp index 8ee7f897b8b..ac19db03fab 100644 --- a/lib/DebugInfo/PDB/Native/TpiStream.cpp +++ b/lib/DebugInfo/PDB/Native/TpiStream.cpp @@ -112,7 +112,7 @@ Error TpiStream::reload() { HashStream = std::move(*HS); } - Types = llvm::make_unique( + Types = std::make_unique( TypeRecords, getNumTypeRecords(), getTypeIndexOffsets()); return Error::success(); } diff --git a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp index 6b308453c2d..4f10f8524a9 100644 --- a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -135,7 +135,7 @@ Error TpiStreamBuilder::finalizeMsfLayout() { reinterpret_cast(HashBuffer.data()), calculateHashBufferSize()); HashValueStream = - llvm::make_unique(Bytes, llvm::support::little); + std::make_unique(Bytes, llvm::support::little); } return Error::success(); } diff --git a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp index 7c3ba981fd6..cb0329bc0ed 100644 --- a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp @@ -79,7 +79,7 @@ private: std::unique_ptr> PDBSymbolFunc::getArguments() const { - return llvm::make_unique(Session, *this); + return std::make_unique(Session, *this); } void PDBSymbolFunc::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); } diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp index 292320a6fe6..1373615522e 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp @@ -63,7 +63,7 @@ private: std::unique_ptr PDBSymbolTypeFunctionSig::getArguments() const { - return llvm::make_unique(Session, *this); + return std::make_unique(Session, *this); } void PDBSymbolTypeFunctionSig::dump(PDBSymDumper &Dumper) const { diff --git a/lib/DebugInfo/PDB/UDTLayout.cpp b/lib/DebugInfo/PDB/UDTLayout.cpp index acb1599480b..a8e1d0a619c 100644 --- a/lib/DebugInfo/PDB/UDTLayout.cpp +++ b/lib/DebugInfo/PDB/UDTLayout.cpp @@ -71,7 +71,7 @@ DataMemberLayoutItem::DataMemberLayoutItem( DataMember(std::move(Member)) { auto Type = DataMember->getType(); if (auto UDT = unique_dyn_cast(Type)) { - UdtLayout = llvm::make_unique(std::move(UDT)); + UdtLayout = std::make_unique(std::move(UDT)); UsedBytes = UdtLayout->usedBytes(); } } @@ -84,7 +84,7 @@ VBPtrLayoutItem::VBPtrLayoutItem(const UDTLayoutBase &Parent, } const PDBSymbolData &DataMemberLayoutItem::getDataMember() { - return *dyn_cast(Symbol); + return *cast(Symbol); } bool DataMemberLayoutItem::hasUDTLayout() const { return UdtLayout != nullptr; } @@ -205,7 +205,7 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { for (auto &Base : Bases) { uint32_t Offset = Base->getOffset(); // Non-virtual bases never get elided. - auto BL = llvm::make_unique(*this, Offset, false, + auto BL = std::make_unique(*this, Offset, false, std::move(Base)); AllBases.push_back(BL.get()); @@ -216,7 +216,7 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { assert(VTables.size() <= 1); if (!VTables.empty()) { auto VTLayout = - llvm::make_unique(*this, std::move(VTables[0])); + std::make_unique(*this, std::move(VTables[0])); VTable = VTLayout.get(); @@ -224,7 +224,7 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { } for (auto &Data : Members) { - auto DM = llvm::make_unique(*this, std::move(Data)); + auto DM = std::make_unique(*this, std::move(Data)); addChildToLayout(std::move(DM)); } @@ -236,7 +236,7 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { int VBPO = VB->getVirtualBasePointerOffset(); if (!hasVBPtrAtOffset(VBPO)) { if (auto VBP = VB->getRawSymbol().getVirtualBaseTableType()) { - auto VBPL = llvm::make_unique(*this, std::move(VBP), + auto VBPL = std::make_unique(*this, std::move(VBP), VBPO, VBP->getLength()); VBPtr = VBPL.get(); addChildToLayout(std::move(VBPL)); @@ -250,7 +250,7 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { uint32_t Offset = UsedBytes.find_last() + 1; bool Elide = (Parent != nullptr); auto BL = - llvm::make_unique(*this, Offset, Elide, std::move(VB)); + std::make_unique(*this, Offset, Elide, std::move(VB)); AllBases.push_back(BL.get()); // Only lay this virtual base out directly inside of *this* class if this diff --git a/lib/DebugInfo/Symbolize/DIPrinter.cpp b/lib/DebugInfo/Symbolize/DIPrinter.cpp index b2bfef25148..b1a80cbc458 100644 --- a/lib/DebugInfo/Symbolize/DIPrinter.cpp +++ b/lib/DebugInfo/Symbolize/DIPrinter.cpp @@ -30,11 +30,6 @@ namespace llvm { namespace symbolize { -// By default, DILineInfo contains "" for function/filename it -// cannot fetch. We replace it to "??" to make our output closer to addr2line. -static const char kDILineInfoBadString[] = ""; -static const char kBadString[] = "??"; - // Prints source code around in the FileName the Line. void DIPrinter::printContext(const std::string &FileName, int64_t Line) { if (PrintSourceContext <= 0) @@ -68,16 +63,16 @@ void DIPrinter::printContext(const std::string &FileName, int64_t Line) { void DIPrinter::print(const DILineInfo &Info, bool Inlined) { if (PrintFunctionNames) { std::string FunctionName = Info.FunctionName; - if (FunctionName == kDILineInfoBadString) - FunctionName = kBadString; + if (FunctionName == DILineInfo::BadString) + FunctionName = DILineInfo::Addr2LineBadString; StringRef Delimiter = PrintPretty ? " at " : "\n"; StringRef Prefix = (PrintPretty && Inlined) ? " (inlined by) " : ""; OS << Prefix << FunctionName << Delimiter; } std::string Filename = Info.FileName; - if (Filename == kDILineInfoBadString) - Filename = kBadString; + if (Filename == DILineInfo::BadString) + Filename = DILineInfo::Addr2LineBadString; else if (Basenames) Filename = llvm::sys::path::filename(Filename); if (!Verbose) { @@ -115,8 +110,8 @@ DIPrinter &DIPrinter::operator<<(const DIInliningInfo &Info) { DIPrinter &DIPrinter::operator<<(const DIGlobal &Global) { std::string Name = Global.Name; - if (Name == kDILineInfoBadString) - Name = kBadString; + if (Name == DILineInfo::BadString) + Name = DILineInfo::Addr2LineBadString; OS << Name << "\n"; OS << Global.Start << " " << Global.Size << "\n"; return *this; diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp index 2765bf44d50..b4d49d9ff95 100644 --- a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -43,20 +43,22 @@ getDILineInfoSpecifier(FunctionNameKind FNKind) { ErrorOr> SymbolizableObjectFile::create(const object::ObjectFile *Obj, - std::unique_ptr DICtx) { + std::unique_ptr DICtx, + bool UntagAddresses) { assert(DICtx); std::unique_ptr res( - new SymbolizableObjectFile(Obj, std::move(DICtx))); + new SymbolizableObjectFile(Obj, std::move(DICtx), UntagAddresses)); std::unique_ptr OpdExtractor; uint64_t OpdAddress = 0; // Find the .opd (function descriptor) section if any, for big-endian // PowerPC64 ELF. if (Obj->getArch() == Triple::ppc64) { for (section_iterator Section : Obj->sections()) { - StringRef Name; - if (auto EC = Section->getName(Name)) - return EC; - if (Name == ".opd") { + Expected NameOrErr = Section->getName(); + if (!NameOrErr) + return errorToErrorCode(NameOrErr.takeError()); + + if (*NameOrErr == ".opd") { Expected E = Section->getContents(); if (!E) return errorToErrorCode(E.takeError()); @@ -103,8 +105,10 @@ SymbolizableObjectFile::create(const object::ObjectFile *Obj, } SymbolizableObjectFile::SymbolizableObjectFile(const ObjectFile *Obj, - std::unique_ptr DICtx) - : Module(Obj), DebugInfoContext(std::move(DICtx)) {} + std::unique_ptr DICtx, + bool UntagAddresses) + : Module(Obj), DebugInfoContext(std::move(DICtx)), + UntagAddresses(UntagAddresses) {} namespace { @@ -172,6 +176,12 @@ std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol, if (!SymbolAddressOrErr) return errorToErrorCode(SymbolAddressOrErr.takeError()); uint64_t SymbolAddress = *SymbolAddressOrErr; + if (UntagAddresses) { + // For kernel addresses, bits 56-63 need to be set, so we sign extend bit 55 + // into bits 56-63 instead of masking them out. + SymbolAddress &= (1ull << 56) - 1; + SymbolAddress = (int64_t(SymbolAddress) << 8) >> 8; + } if (OpdExtractor) { // For big-endian PowerPC64 ELF, symbols in the .opd section refer to // function descriptors. The first word of the descriptor is a pointer to @@ -179,10 +189,8 @@ std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol, // For the purposes of symbolization, pretend the symbol's address is that // of the function's code, not the descriptor. uint64_t OpdOffset = SymbolAddress - OpdAddress; - uint32_t OpdOffset32 = OpdOffset; - if (OpdOffset == OpdOffset32 && - OpdExtractor->isValidOffsetForAddress(OpdOffset32)) - SymbolAddress = OpdExtractor->getAddress(&OpdOffset32); + if (OpdExtractor->isValidOffsetForAddress(OpdOffset)) + SymbolAddress = OpdExtractor->getAddress(&OpdOffset); } Expected SymbolNameOrErr = Symbol.getName(); if (!SymbolNameOrErr) diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h index 9cab94178c1..b5b9793a44d 100644 --- a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h +++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h @@ -31,7 +31,8 @@ namespace symbolize { class SymbolizableObjectFile : public SymbolizableModule { public: static ErrorOr> - create(const object::ObjectFile *Obj, std::unique_ptr DICtx); + create(const object::ObjectFile *Obj, std::unique_ptr DICtx, + bool UntagAddresses); DILineInfo symbolizeCode(object::SectionedAddress ModuleOffset, FunctionNameKind FNKind, @@ -70,6 +71,7 @@ private: const object::ObjectFile *Module; std::unique_ptr DebugInfoContext; + bool UntagAddresses; struct SymbolDesc { uint64_t Addr; @@ -85,7 +87,8 @@ private: std::vector> Objects; SymbolizableObjectFile(const object::ObjectFile *Obj, - std::unique_ptr DICtx); + std::unique_ptr DICtx, + bool UntagAddresses); }; } // end namespace symbolize diff --git a/lib/DebugInfo/Symbolize/Symbolize.cpp b/lib/DebugInfo/Symbolize/Symbolize.cpp index 6a619f8f2f3..be79d9e637c 100644 --- a/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -35,19 +35,6 @@ #include #include -#if defined(_MSC_VER) -#include - -// This must be included after windows.h. -#include -#pragma comment(lib, "dbghelp.lib") - -// Windows.h conflicts with our COFF header definitions. -#ifdef IMAGE_FILE_MACHINE_I386 -#undef IMAGE_FILE_MACHINE_I386 -#endif -#endif - namespace llvm { namespace symbolize { @@ -205,7 +192,7 @@ bool checkFileCRC(StringRef Path, uint32_t CRCHash) { MemoryBuffer::getFileOrSTDIN(Path); if (!MB) return false; - return CRCHash == llvm::crc32(0, MB.get()->getBuffer()); + return CRCHash == llvm::crc32(arrayRefFromStringRef(MB.get()->getBuffer())); } bool findDebugBinary(const std::string &OrigPath, @@ -259,7 +246,11 @@ bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, return false; for (const SectionRef &Section : Obj->sections()) { StringRef Name; - Section.getName(Name); + if (Expected NameOrErr = Section.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + Name = Name.substr(Name.find_first_not_of("._")); if (Name == "gnu_debuglink") { Expected ContentsOrErr = Section.getContents(); @@ -268,7 +259,7 @@ bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, return false; } DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0); - uint32_t Offset = 0; + uint64_t Offset = 0; if (const char *DebugNameStr = DE.getCStr(&Offset)) { // 4-byte align the offset. Offset = (Offset + 3) & ~0x3; @@ -397,7 +388,7 @@ LLVMSymbolizer::getOrCreateObject(const std::string &Path, return I->second.get(); Expected> ObjOrErr = - UB->getObjectForArch(ArchName); + UB->getMachOObjectForArch(ArchName); if (!ObjOrErr) { ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName), std::unique_ptr()); @@ -418,8 +409,8 @@ Expected LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj, std::unique_ptr Context, StringRef ModuleName) { - auto InfoOrErr = - SymbolizableObjectFile::create(Obj, std::move(Context)); + auto InfoOrErr = SymbolizableObjectFile::create(Obj, std::move(Context), + Opts.UntagAddresses); std::unique_ptr SymMod; if (InfoOrErr) SymMod = std::move(*InfoOrErr); @@ -530,21 +521,20 @@ LLVMSymbolizer::DemangleName(const std::string &Name, return Result; } -#if defined(_MSC_VER) if (!Name.empty() && Name.front() == '?') { // Only do MSVC C++ demangling on symbols starting with '?'. - char DemangledName[1024] = {0}; - DWORD result = ::UnDecorateSymbolName( - Name.c_str(), DemangledName, 1023, - UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected - UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc - UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications - UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers - UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords - UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types - return (result == 0) ? Name : std::string(DemangledName); + int status = 0; + char *DemangledName = microsoftDemangle( + Name.c_str(), nullptr, nullptr, &status, + MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention | + MSDF_NoMemberType | MSDF_NoReturnType)); + if (status != 0) + return Name; + std::string Result = DemangledName; + free(DemangledName); + return Result; } -#endif + if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) return std::string(demanglePE32ExternCFunc(Name)); return Name; diff --git a/lib/Demangle/ItaniumDemangle.cpp b/lib/Demangle/ItaniumDemangle.cpp index 5c99c70e3cc..760d28b3ab9 100644 --- a/lib/Demangle/ItaniumDemangle.cpp +++ b/lib/Demangle/ItaniumDemangle.cpp @@ -174,6 +174,16 @@ struct DumpVisitor { return printStr("SpecialSubKind::iostream"); } } + void print(TemplateParamKind TPK) { + switch (TPK) { + case TemplateParamKind::Type: + return printStr("TemplateParamKind::Type"); + case TemplateParamKind::NonType: + return printStr("TemplateParamKind::NonType"); + case TemplateParamKind::Template: + return printStr("TemplateParamKind::Template"); + } + } void newLine() { printStr("\n"); diff --git a/lib/Demangle/MicrosoftDemangle.cpp b/lib/Demangle/MicrosoftDemangle.cpp index bf7d77638f3..c681d6e25b8 100644 --- a/lib/Demangle/MicrosoftDemangle.cpp +++ b/lib/Demangle/MicrosoftDemangle.cpp @@ -783,8 +783,26 @@ SymbolNode *Demangler::demangleMD5Name(StringView &MangledName) { return S; } +SymbolNode *Demangler::demangleTypeinfoName(StringView &MangledName) { + assert(MangledName.startsWith('.')); + MangledName.consumeFront('.'); + + TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result); + if (Error || !MangledName.empty()) { + Error = true; + return nullptr; + } + return synthesizeVariable(Arena, T, "`RTTI Type Descriptor Name'"); +} + // Parser entry point. SymbolNode *Demangler::parse(StringView &MangledName) { + // Typeinfo names are strings stored in RTTI data. They're not symbol names. + // It's still useful to demangle them. They're the only demangled entity + // that doesn't start with a "?" but a ".". + if (MangledName.startsWith('.')) + return demangleTypeinfoName(MangledName); + if (MangledName.startsWith("??@")) return demangleMD5Name(MangledName); @@ -2161,7 +2179,7 @@ NodeArrayNode *Demangler::demangleFunctionParameterList(StringView &MangledName, NodeArrayNode * Demangler::demangleTemplateParameterList(StringView &MangledName) { - NodeList *Head; + NodeList *Head = nullptr; NodeList **Current = &Head; size_t Count = 0; @@ -2328,12 +2346,22 @@ char *llvm::microsoftDemangle(const char *MangledName, char *Buf, size_t *N, if (Flags & MSDF_DumpBackrefs) D.dumpBackReferences(); + OutputFlags OF = OF_Default; + if (Flags & MSDF_NoCallingConvention) + OF = OutputFlags(OF | OF_NoCallingConvention); + if (Flags & MSDF_NoAccessSpecifier) + OF = OutputFlags(OF | OF_NoAccessSpecifier); + if (Flags & MSDF_NoReturnType) + OF = OutputFlags(OF | OF_NoReturnType); + if (Flags & MSDF_NoMemberType) + OF = OutputFlags(OF | OF_NoMemberType); + if (D.Error) InternalStatus = demangle_invalid_mangled_name; else if (!initializeOutputStream(Buf, N, S, 1024)) InternalStatus = demangle_memory_alloc_failure; else { - AST->output(S, OF_Default); + AST->output(S, OF); S += '\0'; if (N != nullptr) *N = S.getCurrentPosition(); diff --git a/lib/Demangle/MicrosoftDemangleNodes.cpp b/lib/Demangle/MicrosoftDemangleNodes.cpp index 63ca475ec1f..9cee975231a 100644 --- a/lib/Demangle/MicrosoftDemangleNodes.cpp +++ b/lib/Demangle/MicrosoftDemangleNodes.cpp @@ -120,8 +120,6 @@ std::string Node::toString(OutputFlags Flags) const { return {OS.getBuffer()}; } -void TypeNode::outputQuals(bool SpaceBefore, bool SpaceAfter) const {} - void PrimitiveTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const { switch (PrimKind) { OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Void, "void"); @@ -380,24 +378,28 @@ void LiteralOperatorIdentifierNode::output(OutputStream &OS, void FunctionSignatureNode::outputPre(OutputStream &OS, OutputFlags Flags) const { - if (FunctionClass & FC_Public) - OS << "public: "; - if (FunctionClass & FC_Protected) - OS << "protected: "; - if (FunctionClass & FC_Private) - OS << "private: "; - - if (!(FunctionClass & FC_Global)) { - if (FunctionClass & FC_Static) - OS << "static "; + if (!(Flags & OF_NoAccessSpecifier)) { + if (FunctionClass & FC_Public) + OS << "public: "; + if (FunctionClass & FC_Protected) + OS << "protected: "; + if (FunctionClass & FC_Private) + OS << "private: "; } - if (FunctionClass & FC_Virtual) - OS << "virtual "; - if (FunctionClass & FC_ExternC) - OS << "extern \"C\" "; + if (!(Flags & OF_NoMemberType)) { + if (!(FunctionClass & FC_Global)) { + if (FunctionClass & FC_Static) + OS << "static "; + } + if (FunctionClass & FC_Virtual) + OS << "virtual "; - if (ReturnType) { + if (FunctionClass & FC_ExternC) + OS << "extern \"C\" "; + } + + if (!(Flags & OF_NoReturnType) && ReturnType) { ReturnType->outputPre(OS, Flags); OS << " "; } @@ -440,7 +442,7 @@ void FunctionSignatureNode::outputPost(OutputStream &OS, else if (RefQualifier == FunctionRefQualifier::RValueReference) OS << " &&"; - if (ReturnType) + if (!(Flags & OF_NoReturnType) && ReturnType) ReturnType->outputPost(OS, Flags); } @@ -582,19 +584,26 @@ void FunctionSymbolNode::output(OutputStream &OS, OutputFlags Flags) const { } void VariableSymbolNode::output(OutputStream &OS, OutputFlags Flags) const { + const char *AccessSpec = nullptr; + bool IsStatic = true; switch (SC) { case StorageClass::PrivateStatic: - OS << "private: static "; + AccessSpec = "private"; break; case StorageClass::PublicStatic: - OS << "public: static "; + AccessSpec = "public"; break; case StorageClass::ProtectedStatic: - OS << "protected: static "; + AccessSpec = "protected"; break; default: + IsStatic = false; break; } + if (!(Flags & OF_NoAccessSpecifier) && AccessSpec) + OS << AccessSpec << ": "; + if (!(Flags & OF_NoMemberType) && IsStatic) + OS << "static "; if (Type) { Type->outputPre(OS, Flags); diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 1c6c0406d04..ee7a7cb60bc 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -32,12 +32,12 @@ #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Host.h" -#include "llvm/Support/MutexGuard.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include #include +#include using namespace llvm; #define DEBUG_TYPE "jit" @@ -191,7 +191,7 @@ uint64_t ExecutionEngineState::RemoveMapping(StringRef Name) { std::string ExecutionEngine::getMangledName(const GlobalValue *GV) { assert(GV->hasName() && "Global must have name."); - MutexGuard locked(lock); + std::lock_guard locked(lock); SmallString<128> FullName; const DataLayout &DL = @@ -204,12 +204,12 @@ std::string ExecutionEngine::getMangledName(const GlobalValue *GV) { } void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) { - MutexGuard locked(lock); + std::lock_guard locked(lock); addGlobalMapping(getMangledName(GV), (uint64_t) Addr); } void ExecutionEngine::addGlobalMapping(StringRef Name, uint64_t Addr) { - MutexGuard locked(lock); + std::lock_guard locked(lock); assert(!Name.empty() && "Empty GlobalMapping symbol name!"); @@ -228,14 +228,14 @@ void ExecutionEngine::addGlobalMapping(StringRef Name, uint64_t Addr) { } void ExecutionEngine::clearAllGlobalMappings() { - MutexGuard locked(lock); + std::lock_guard locked(lock); EEState.getGlobalAddressMap().clear(); EEState.getGlobalAddressReverseMap().clear(); } void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) { - MutexGuard locked(lock); + std::lock_guard locked(lock); for (GlobalObject &GO : M->global_objects()) EEState.RemoveMapping(getMangledName(&GO)); @@ -243,12 +243,12 @@ void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) { uint64_t ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) { - MutexGuard locked(lock); + std::lock_guard locked(lock); return updateGlobalMapping(getMangledName(GV), (uint64_t) Addr); } uint64_t ExecutionEngine::updateGlobalMapping(StringRef Name, uint64_t Addr) { - MutexGuard locked(lock); + std::lock_guard locked(lock); ExecutionEngineState::GlobalAddressMapTy &Map = EEState.getGlobalAddressMap(); @@ -275,7 +275,7 @@ uint64_t ExecutionEngine::updateGlobalMapping(StringRef Name, uint64_t Addr) { } uint64_t ExecutionEngine::getAddressToGlobalIfAvailable(StringRef S) { - MutexGuard locked(lock); + std::lock_guard locked(lock); uint64_t Address = 0; ExecutionEngineState::GlobalAddressMapTy::iterator I = EEState.getGlobalAddressMap().find(S); @@ -286,19 +286,19 @@ uint64_t ExecutionEngine::getAddressToGlobalIfAvailable(StringRef S) { void *ExecutionEngine::getPointerToGlobalIfAvailable(StringRef S) { - MutexGuard locked(lock); + std::lock_guard locked(lock); if (void* Address = (void *) getAddressToGlobalIfAvailable(S)) return Address; return nullptr; } void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) { - MutexGuard locked(lock); + std::lock_guard locked(lock); return getPointerToGlobalIfAvailable(getMangledName(GV)); } const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) { - MutexGuard locked(lock); + std::lock_guard locked(lock); // If we haven't computed the reverse mapping yet, do so first. if (EEState.getGlobalAddressReverseMap().empty()) { @@ -340,14 +340,14 @@ void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE, Values.clear(); // Free the old contents. Values.reserve(InputArgv.size()); unsigned PtrSize = EE->getDataLayout().getPointerSize(); - Array = make_unique((InputArgv.size()+1)*PtrSize); + Array = std::make_unique((InputArgv.size()+1)*PtrSize); LLVM_DEBUG(dbgs() << "JIT: ARGV = " << (void *)Array.get() << "\n"); Type *SBytePtr = Type::getInt8PtrTy(C); for (unsigned i = 0; i != InputArgv.size(); ++i) { unsigned Size = InputArgv[i].size()+1; - auto Dest = make_unique(Size); + auto Dest = std::make_unique(Size); LLVM_DEBUG(dbgs() << "JIT: ARGV[" << i << "] = " << (void *)Dest.get() << "\n"); @@ -575,7 +575,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) { if (Function *F = const_cast(dyn_cast(GV))) return getPointerToFunction(F); - MutexGuard locked(lock); + std::lock_guard locked(lock); if (void* P = getPointerToGlobalIfAvailable(GV)) return P; @@ -626,7 +626,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { break; case Type::VectorTyID: // if the whole vector is 'undef' just reserve memory for the value. - auto* VTy = dyn_cast(C->getType()); + auto* VTy = cast(C->getType()); Type *ElemTy = VTy->getElementType(); unsigned int elemNum = VTy->getNumElements(); Result.AggregateVal.resize(elemNum); @@ -925,7 +925,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { elemNum = CDV->getNumElements(); ElemTy = CDV->getElementType(); } else if (CV || CAZ) { - VectorType* VTy = dyn_cast(C->getType()); + auto* VTy = cast(C->getType()); elemNum = VTy->getNumElements(); ElemTy = VTy->getElementType(); } else { diff --git a/lib/ExecutionEngine/GDBRegistrationListener.cpp b/lib/ExecutionEngine/GDBRegistrationListener.cpp index 08d20156a59..7ed025fbb48 100644 --- a/lib/ExecutionEngine/GDBRegistrationListener.cpp +++ b/lib/ExecutionEngine/GDBRegistrationListener.cpp @@ -14,7 +14,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" -#include "llvm/Support/MutexGuard.h" +#include using namespace llvm; using namespace llvm::object; @@ -135,7 +135,7 @@ void NotifyDebugger(jit_code_entry* JITCodeEntry) { GDBJITRegistrationListener::~GDBJITRegistrationListener() { // Free all registered object files. - llvm::MutexGuard locked(*JITDebugLock); + std::lock_guard locked(*JITDebugLock); for (RegisteredObjectBufferMap::iterator I = ObjectBufferMap.begin(), E = ObjectBufferMap.end(); I != E; ++I) { @@ -159,7 +159,7 @@ void GDBJITRegistrationListener::notifyObjectLoaded( const char *Buffer = DebugObj.getBinary()->getMemoryBufferRef().getBufferStart(); size_t Size = DebugObj.getBinary()->getMemoryBufferRef().getBufferSize(); - llvm::MutexGuard locked(*JITDebugLock); + std::lock_guard locked(*JITDebugLock); assert(ObjectBufferMap.find(K) == ObjectBufferMap.end() && "Second attempt to perform debug registration."); jit_code_entry* JITCodeEntry = new jit_code_entry(); @@ -178,7 +178,7 @@ void GDBJITRegistrationListener::notifyObjectLoaded( } void GDBJITRegistrationListener::notifyFreeingObject(ObjectKey K) { - llvm::MutexGuard locked(*JITDebugLock); + std::lock_guard locked(*JITDebugLock); RegisteredObjectBufferMap::iterator I = ObjectBufferMap.find(K); if (I != ObjectBufferMap.end()) { diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index c3a2ccc582c..71b7f893d71 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -32,7 +32,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" -#include "llvm/Support/UniqueLock.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -41,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -258,7 +258,7 @@ GenericValue Interpreter::callExternalFunction(Function *F, ArrayRef ArgVals) { TheInterpreter = this; - unique_lock Guard(*FunctionsLock); + std::unique_lock Guard(*FunctionsLock); // Do a lookup to see if the function is in our cache... this should just be a // deferred annotation! diff --git a/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h b/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h index 1271ad962b3..b47a798c760 100644 --- a/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h +++ b/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h @@ -20,24 +20,23 @@ namespace jitlink { template class BasicGOTAndStubsBuilder { public: - BasicGOTAndStubsBuilder(AtomGraph &G) : G(G) {} + BasicGOTAndStubsBuilder(LinkGraph &G) : G(G) {} void run() { - // We're going to be adding new atoms, but we don't want to iterate over - // the newly added ones, so just copy the existing atoms out. - std::vector DAs(G.defined_atoms().begin(), - G.defined_atoms().end()); + // We're going to be adding new blocks, but we don't want to iterate over + // the newly added ones, so just copy the existing blocks out. + std::vector Blocks(G.blocks().begin(), G.blocks().end()); - for (auto *DA : DAs) - for (auto &E : DA->edges()) + for (auto *B : Blocks) + for (auto &E : B->edges()) if (impl().isGOTEdge(E)) - impl().fixGOTEdge(E, getGOTEntryAtom(E.getTarget())); + impl().fixGOTEdge(E, getGOTEntrySymbol(E.getTarget())); else if (impl().isExternalBranchEdge(E)) - impl().fixExternalBranchEdge(E, getStubAtom(E.getTarget())); + impl().fixExternalBranchEdge(E, getStubSymbol(E.getTarget())); } protected: - Atom &getGOTEntryAtom(Atom &Target) { + Symbol &getGOTEntrySymbol(Symbol &Target) { assert(Target.hasName() && "GOT edge cannot point to anonymous target"); auto GOTEntryI = GOTEntries.find(Target.getName()); @@ -49,31 +48,31 @@ protected: GOTEntries.insert(std::make_pair(Target.getName(), &GOTEntry)).first; } - assert(GOTEntryI != GOTEntries.end() && "Could not get GOT entry atom"); + assert(GOTEntryI != GOTEntries.end() && "Could not get GOT entry symbol"); return *GOTEntryI->second; } - Atom &getStubAtom(Atom &Target) { + Symbol &getStubSymbol(Symbol &Target) { assert(Target.hasName() && "External branch edge can not point to an anonymous target"); auto StubI = Stubs.find(Target.getName()); if (StubI == Stubs.end()) { - auto &StubAtom = impl().createStub(Target); - StubI = Stubs.insert(std::make_pair(Target.getName(), &StubAtom)).first; + auto &StubSymbol = impl().createStub(Target); + StubI = Stubs.insert(std::make_pair(Target.getName(), &StubSymbol)).first; } - assert(StubI != Stubs.end() && "Count not get stub atom"); + assert(StubI != Stubs.end() && "Count not get stub symbol"); return *StubI->second; } - AtomGraph &G; + LinkGraph &G; private: BuilderImpl &impl() { return static_cast(*this); } - DenseMap GOTEntries; - DenseMap Stubs; + DenseMap GOTEntries; + DenseMap Stubs; }; } // end namespace jitlink diff --git a/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp b/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp index 25f0e9040ff..f80b0e7f890 100644 --- a/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp +++ b/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp @@ -17,18 +17,14 @@ namespace llvm { namespace jitlink { -EHFrameParser::EHFrameParser(AtomGraph &G, Section &EHFrameSection, - StringRef EHFrameContent, - JITTargetAddress EHFrameAddress, - Edge::Kind FDEToCIERelocKind, - Edge::Kind FDEToTargetRelocKind) - : G(G), EHFrameSection(EHFrameSection), EHFrameContent(EHFrameContent), - EHFrameAddress(EHFrameAddress), - EHFrameReader(EHFrameContent, G.getEndianness()), - FDEToCIERelocKind(FDEToCIERelocKind), - FDEToTargetRelocKind(FDEToTargetRelocKind) {} +EHFrameBinaryParser::EHFrameBinaryParser(JITTargetAddress EHFrameAddress, + StringRef EHFrameContent, + unsigned PointerSize, + support::endianness Endianness) + : EHFrameAddress(EHFrameAddress), EHFrameContent(EHFrameContent), + PointerSize(PointerSize), EHFrameReader(EHFrameContent, Endianness) {} -Error EHFrameParser::atomize() { +Error EHFrameBinaryParser::addToGraph() { while (!EHFrameReader.empty()) { size_t RecordOffset = EHFrameReader.getOffset(); @@ -38,44 +34,39 @@ Error EHFrameParser::atomize() { << " (offset " << RecordOffset << ")\n"; }); - size_t CIELength = 0; - uint32_t CIELengthField; - if (auto Err = EHFrameReader.readInteger(CIELengthField)) + size_t RecordLength = 0; + uint32_t RecordLengthField; + if (auto Err = EHFrameReader.readInteger(RecordLengthField)) return Err; - // Process CIE length/extended-length fields to build the atom. + // Process CIE/FDE length/extended-length fields to build the blocks. // // The value of these fields describe the length of the *rest* of the CIE // (not including data up to the end of the field itself) so we have to - // bump CIELength to include the data up to the end of the field: 4 bytes + // bump RecordLength to include the data up to the end of the field: 4 bytes // for Length, or 12 bytes (4 bytes + 8 bytes) for ExtendedLength. - if (CIELengthField == 0) // Length 0 means end of __eh_frame section. + if (RecordLengthField == 0) // Length 0 means end of __eh_frame section. break; // If the regular length field's value is 0xffffffff, use extended length. - if (CIELengthField == 0xffffffff) { - uint64_t CIEExtendedLengthField; - if (auto Err = EHFrameReader.readInteger(CIEExtendedLengthField)) + if (RecordLengthField == 0xffffffff) { + uint64_t ExtendedLengthField; + if (auto Err = EHFrameReader.readInteger(ExtendedLengthField)) return Err; - if (CIEExtendedLengthField > EHFrameReader.bytesRemaining()) + if (ExtendedLengthField > EHFrameReader.bytesRemaining()) return make_error("CIE record extends past the end of " "the __eh_frame section"); - if (CIEExtendedLengthField + 12 > std::numeric_limits::max()) + if (ExtendedLengthField + 12 > std::numeric_limits::max()) return make_error("CIE record too large to process"); - CIELength = CIEExtendedLengthField + 12; + RecordLength = ExtendedLengthField + 12; } else { - if (CIELengthField > EHFrameReader.bytesRemaining()) + if (RecordLengthField > EHFrameReader.bytesRemaining()) return make_error("CIE record extends past the end of " "the __eh_frame section"); - CIELength = CIELengthField + 4; + RecordLength = RecordLengthField + 4; } - LLVM_DEBUG(dbgs() << " length: " << CIELength << "\n"); - - // Add an atom for this record. - CurRecordAtom = &G.addAnonymousAtom( - EHFrameSection, EHFrameAddress + RecordOffset, G.getPointerSize()); - CurRecordAtom->setContent(EHFrameContent.substr(RecordOffset, CIELength)); + LLVM_DEBUG(dbgs() << " length: " << RecordLength << "\n"); // Read the CIE Pointer. size_t CIEPointerAddress = EHFrameAddress + EHFrameReader.getOffset(); @@ -85,21 +76,24 @@ Error EHFrameParser::atomize() { // Based on the CIE pointer value, parse this as a CIE or FDE record. if (CIEPointer == 0) { - if (auto Err = processCIE()) + if (auto Err = processCIE(RecordOffset, RecordLength)) return Err; } else { - if (auto Err = processFDE(CIEPointerAddress, CIEPointer)) + if (auto Err = processFDE(RecordOffset, RecordLength, CIEPointerAddress, + CIEPointer)) return Err; } - EHFrameReader.setOffset(RecordOffset + CIELength); + EHFrameReader.setOffset(RecordOffset + RecordLength); } return Error::success(); } -Expected -EHFrameParser::parseAugmentationString() { +void EHFrameBinaryParser::anchor() {} + +Expected +EHFrameBinaryParser::parseAugmentationString() { AugmentationInfo AugInfo; uint8_t NextChar; uint8_t *NextField = &AugInfo.Fields[0]; @@ -139,14 +133,14 @@ EHFrameParser::parseAugmentationString() { return std::move(AugInfo); } -Expected EHFrameParser::readAbsolutePointer() { +Expected EHFrameBinaryParser::readAbsolutePointer() { static_assert(sizeof(JITTargetAddress) == sizeof(uint64_t), "Result must be able to hold a uint64_t"); JITTargetAddress Addr; - if (G.getPointerSize() == 8) { + if (PointerSize == 8) { if (auto Err = EHFrameReader.readInteger(Addr)) return std::move(Err); - } else if (G.getPointerSize() == 4) { + } else if (PointerSize == 4) { uint32_t Addr32; if (auto Err = EHFrameReader.readInteger(Addr32)) return std::move(Err); @@ -156,14 +150,19 @@ Expected EHFrameParser::readAbsolutePointer() { return Addr; } -Error EHFrameParser::processCIE() { +Error EHFrameBinaryParser::processCIE(size_t RecordOffset, + size_t RecordLength) { // Use the dwarf namespace for convenient access to pointer encoding // constants. using namespace dwarf; LLVM_DEBUG(dbgs() << " Record is CIE\n"); - CIEInformation CIEInfo(*CurRecordAtom); + auto &CIESymbol = + createCIERecord(EHFrameAddress + RecordOffset, + EHFrameContent.substr(RecordOffset, RecordLength)); + + CIEInformation CIEInfo(CIESymbol); uint8_t Version = 0; if (auto Err = EHFrameReader.readInteger(Version)) @@ -179,7 +178,7 @@ Error EHFrameParser::processCIE() { // Skip the EH Data field if present. if (AugInfo->EHDataFieldPresent) - if (auto Err = EHFrameReader.skip(G.getPointerSize())) + if (auto Err = EHFrameReader.skip(PointerSize)) return Err; // Read and sanity check the code alignment factor. @@ -226,7 +225,7 @@ Error EHFrameParser::processCIE() { return make_error( "Unsupported LSDA pointer encoding " + formatv("{0:x2}", LSDAPointerEncoding) + " in CIE at " + - formatv("{0:x16}", CurRecordAtom->getAddress())); + formatv("{0:x16}", CIESymbol.getAddress())); break; } case 'P': { @@ -239,7 +238,7 @@ Error EHFrameParser::processCIE() { "Unspported personality pointer " "encoding " + formatv("{0:x2}", PersonalityPointerEncoding) + " in CIE at " + - formatv("{0:x16}", CurRecordAtom->getAddress())); + formatv("{0:x16}", CIESymbol.getAddress())); uint32_t PersonalityPointerAddress; if (auto Err = EHFrameReader.readInteger(PersonalityPointerAddress)) return Err; @@ -254,7 +253,7 @@ Error EHFrameParser::processCIE() { "Unsupported FDE address pointer " "encoding " + formatv("{0:x2}", FDEPointerEncoding) + " in CIE at " + - formatv("{0:x16}", CurRecordAtom->getAddress())); + formatv("{0:x16}", CIESymbol.getAddress())); break; } default: @@ -267,15 +266,16 @@ Error EHFrameParser::processCIE() { return make_error("Read past the end of the augmentation " "data while parsing fields"); - assert(!CIEInfos.count(CurRecordAtom->getAddress()) && + assert(!CIEInfos.count(CIESymbol.getAddress()) && "Multiple CIEs recorded at the same address?"); - CIEInfos[CurRecordAtom->getAddress()] = std::move(CIEInfo); + CIEInfos[CIESymbol.getAddress()] = std::move(CIEInfo); return Error::success(); } -Error EHFrameParser::processFDE(JITTargetAddress CIEPointerAddress, - uint32_t CIEPointer) { +Error EHFrameBinaryParser::processFDE(size_t RecordOffset, size_t RecordLength, + JITTargetAddress CIEPointerAddress, + uint32_t CIEPointer) { LLVM_DEBUG(dbgs() << " Record is FDE\n"); LLVM_DEBUG({ @@ -286,16 +286,11 @@ Error EHFrameParser::processFDE(JITTargetAddress CIEPointerAddress, auto CIEInfoItr = CIEInfos.find(CIEPointerAddress - CIEPointer); if (CIEInfoItr == CIEInfos.end()) return make_error( - "FDE at " + formatv("{0:x16}", CurRecordAtom->getAddress()) + + "FDE at " + formatv("{0:x16}", EHFrameAddress + RecordOffset) + " points to non-existant CIE at " + formatv("{0:x16}", CIEPointerAddress - CIEPointer)); auto &CIEInfo = CIEInfoItr->second; - // The CIEPointer looks good. Add a relocation. - CurRecordAtom->addEdge(FDEToCIERelocKind, - CIEPointerAddress - CurRecordAtom->getAddress(), - *CIEInfo.CIEAtom, 0); - // Read and sanity check the PC-start pointer and size. JITTargetAddress PCBeginAddress = EHFrameAddress + EHFrameReader.getOffset(); @@ -305,83 +300,68 @@ Error EHFrameParser::processFDE(JITTargetAddress CIEPointerAddress, JITTargetAddress PCBegin = PCBeginAddress + *PCBeginDelta; LLVM_DEBUG({ - dbgs() << " PC begin: " << format("0x%016" PRIx64, PCBegin) << "\n"; + dbgs() << " PC begin: " << format("0x%016" PRIx64, PCBegin) << "\n"; }); - auto *TargetAtom = G.getAtomByAddress(PCBegin); + auto *TargetSymbol = getSymbolAtAddress(PCBegin); - if (!TargetAtom) + if (!TargetSymbol) return make_error("FDE PC-begin " + formatv("{0:x16}", PCBegin) + - " does not point at atom"); + " does not point at symbol"); - if (TargetAtom->getAddress() != PCBegin) + if (TargetSymbol->getAddress() != PCBegin) return make_error( "FDE PC-begin " + formatv("{0:x16}", PCBegin) + - " does not point to start of atom at " + - formatv("{0:x16}", TargetAtom->getAddress())); + " does not point to start of symbol at " + + formatv("{0:x16}", TargetSymbol->getAddress())); - LLVM_DEBUG(dbgs() << " FDE target: " << *TargetAtom << "\n"); - - // The PC-start pointer and size look good. Add relocations. - CurRecordAtom->addEdge(FDEToTargetRelocKind, - PCBeginAddress - CurRecordAtom->getAddress(), - *TargetAtom, 0); - - // Add a keep-alive relocation from the function to the FDE to ensure it is - // not dead stripped. - TargetAtom->addEdge(Edge::KeepAlive, 0, *CurRecordAtom, 0); + LLVM_DEBUG(dbgs() << " FDE target: " << *TargetSymbol << "\n"); // Skip over the PC range size field. - if (auto Err = EHFrameReader.skip(G.getPointerSize())) + if (auto Err = EHFrameReader.skip(PointerSize)) return Err; + Symbol *LSDASymbol = nullptr; + JITTargetAddress LSDAAddress = 0; if (CIEInfo.FDEsHaveLSDAField) { uint64_t AugmentationDataSize; if (auto Err = EHFrameReader.readULEB128(AugmentationDataSize)) return Err; - if (AugmentationDataSize != G.getPointerSize()) + if (AugmentationDataSize != PointerSize) return make_error( "Unexpected FDE augmentation data size (expected " + - Twine(G.getPointerSize()) + ", got " + Twine(AugmentationDataSize) + - ") for FDE at " + formatv("{0:x16}", CurRecordAtom->getAddress())); - JITTargetAddress LSDAAddress = EHFrameAddress + EHFrameReader.getOffset(); + Twine(PointerSize) + ", got " + Twine(AugmentationDataSize) + + ") for FDE at " + formatv("{0:x16}", EHFrameAddress + RecordOffset)); + LSDAAddress = EHFrameAddress + EHFrameReader.getOffset(); auto LSDADelta = readAbsolutePointer(); if (!LSDADelta) return LSDADelta.takeError(); JITTargetAddress LSDA = LSDAAddress + *LSDADelta; - auto *LSDAAtom = G.getAtomByAddress(LSDA); + LSDASymbol = getSymbolAtAddress(LSDA); - if (!LSDAAtom) + if (!LSDASymbol) return make_error("FDE LSDA " + formatv("{0:x16}", LSDA) + - " does not point at atom"); + " does not point at symbol"); - if (LSDAAtom->getAddress() != LSDA) + if (LSDASymbol->getAddress() != LSDA) return make_error( "FDE LSDA " + formatv("{0:x16}", LSDA) + - " does not point to start of atom at " + - formatv("{0:x16}", LSDAAtom->getAddress())); + " does not point to start of symbol at " + + formatv("{0:x16}", LSDASymbol->getAddress())); - LLVM_DEBUG(dbgs() << " FDE LSDA: " << *LSDAAtom << "\n"); - - // LSDA looks good. Add relocations. - CurRecordAtom->addEdge(FDEToTargetRelocKind, - LSDAAddress - CurRecordAtom->getAddress(), *LSDAAtom, - 0); + LLVM_DEBUG(dbgs() << " FDE LSDA: " << *LSDASymbol << "\n"); } - return Error::success(); -} + JITTargetAddress RecordAddress = EHFrameAddress + RecordOffset; + auto FDESymbol = createFDERecord( + RecordAddress, EHFrameContent.substr(RecordOffset, RecordLength), + *CIEInfo.CIESymbol, CIEPointerAddress - RecordAddress, *TargetSymbol, + PCBeginAddress - RecordAddress, LSDASymbol, LSDAAddress - RecordAddress); -Error addEHFrame(AtomGraph &G, Section &EHFrameSection, - StringRef EHFrameContent, JITTargetAddress EHFrameAddress, - Edge::Kind FDEToCIERelocKind, - Edge::Kind FDEToTargetRelocKind) { - return EHFrameParser(G, EHFrameSection, EHFrameContent, EHFrameAddress, - FDEToCIERelocKind, FDEToTargetRelocKind) - .atomize(); + return FDESymbol.takeError(); } // Determine whether we can register EH tables. @@ -451,11 +431,13 @@ static Error deregisterFrameWrapper(const void *P) { template Error walkAppleEHFrameSection(const char *const SectionStart, + size_t SectionSize, HandleFDEFn HandleFDE) { const char *CurCFIRecord = SectionStart; + const char *End = SectionStart + SectionSize; uint64_t Size = *reinterpret_cast(CurCFIRecord); - while (Size != 0) { + while (CurCFIRecord != End && Size != 0) { const char *OffsetField = CurCFIRecord + (Size == 0xffffffff ? 12 : 4); if (Size == 0xffffffff) Size = *reinterpret_cast(CurCFIRecord + 4) + 12; @@ -484,10 +466,12 @@ Error walkAppleEHFrameSection(const char *const SectionStart, #endif // __APPLE__ -Error registerEHFrameSection(const void *EHFrameSectionAddr) { +Error registerEHFrameSection(const void *EHFrameSectionAddr, + size_t EHFrameSectionSize) { #ifdef __APPLE__ // On Darwin __register_frame has to be called for each FDE entry. return walkAppleEHFrameSection(static_cast(EHFrameSectionAddr), + EHFrameSectionSize, registerFrameWrapper); #else // On Linux __register_frame takes a single argument: @@ -499,9 +483,11 @@ Error registerEHFrameSection(const void *EHFrameSectionAddr) { #endif } -Error deregisterEHFrameSection(const void *EHFrameSectionAddr) { +Error deregisterEHFrameSection(const void *EHFrameSectionAddr, + size_t EHFrameSectionSize) { #ifdef __APPLE__ return walkAppleEHFrameSection(static_cast(EHFrameSectionAddr), + EHFrameSectionSize, deregisterFrameWrapper); #else return deregisterFrameWrapper(EHFrameSectionAddr); @@ -517,23 +503,31 @@ InProcessEHFrameRegistrar &InProcessEHFrameRegistrar::getInstance() { InProcessEHFrameRegistrar::InProcessEHFrameRegistrar() {} -AtomGraphPassFunction +LinkGraphPassFunction createEHFrameRecorderPass(const Triple &TT, - StoreFrameAddressFunction StoreFrameAddress) { + StoreFrameRangeFunction StoreRangeAddress) { const char *EHFrameSectionName = nullptr; if (TT.getObjectFormat() == Triple::MachO) EHFrameSectionName = "__eh_frame"; else EHFrameSectionName = ".eh_frame"; - auto RecordEHFrame = [EHFrameSectionName, - StoreFrameAddress](AtomGraph &G) -> Error { - // Search for a non-empty eh-frame and record the address of the first atom - // in it. + auto RecordEHFrame = + [EHFrameSectionName, + StoreFrameRange = std::move(StoreRangeAddress)](LinkGraph &G) -> Error { + // Search for a non-empty eh-frame and record the address of the first + // symbol in it. JITTargetAddress Addr = 0; - if (auto *S = G.findSectionByName(EHFrameSectionName)) - Addr = S->getRange().getStart(); - StoreFrameAddress(Addr); + size_t Size = 0; + if (auto *S = G.findSectionByName(EHFrameSectionName)) { + auto R = SectionRange(*S); + Addr = R.getStart(); + Size = R.getSize(); + } + if (Addr == 0 && Size != 0) + return make_error("__eh_frame section can not have zero " + "address with non-zero size"); + StoreFrameRange(Addr, Size); return Error::success(); }; diff --git a/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h b/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h index d679edef7ea..6f9f68ad838 100644 --- a/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h +++ b/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h @@ -21,18 +21,31 @@ namespace llvm { namespace jitlink { -/// A generic parser for eh-frame sections. +/// A generic binary parser for eh-frame sections. /// -/// Adds atoms representing CIE and FDE entries, using the given FDE-to-CIE and -/// FDEToTarget relocation kinds. -class EHFrameParser { +/// Adds blocks and symbols representing CIE and FDE entries to a JITLink graph. +/// +/// This parser assumes that the user has already verified that the EH-frame's +/// address range does not overlap any other section/symbol, so that generated +/// CIE/FDE records do not overlap other sections/symbols. +class EHFrameBinaryParser { public: - EHFrameParser(AtomGraph &G, Section &EHFrameSection, StringRef EHFrameContent, - JITTargetAddress EHFrameAddress, Edge::Kind FDEToCIERelocKind, - Edge::Kind FDEToTargetRelocKind); - Error atomize(); + EHFrameBinaryParser(JITTargetAddress EHFrameAddress, StringRef EHFrameContent, + unsigned PointerSize, support::endianness Endianness); + virtual ~EHFrameBinaryParser() {} + + Error addToGraph(); private: + virtual void anchor(); + virtual Symbol *getSymbolAtAddress(JITTargetAddress Addr) = 0; + virtual Symbol &createCIERecord(JITTargetAddress RecordAddr, + StringRef RecordContent) = 0; + virtual Expected + createFDERecord(JITTargetAddress RecordAddr, StringRef RecordContent, + Symbol &CIE, size_t CIEOffset, Symbol &Func, + size_t FuncOffset, Symbol *LSDA, size_t LSDAOffset) = 0; + struct AugmentationInfo { bool AugmentationDataPresent = false; bool EHDataFieldPresent = false; @@ -41,31 +54,24 @@ private: Expected parseAugmentationString(); Expected readAbsolutePointer(); - Error processCIE(); - Error processFDE(JITTargetAddress CIEPointerAddress, uint32_t CIEPointer); + Error processCIE(size_t RecordOffset, size_t RecordLength); + Error processFDE(size_t RecordOffset, size_t RecordLength, + JITTargetAddress CIEPointerOffset, uint32_t CIEPointer); struct CIEInformation { CIEInformation() = default; - CIEInformation(DefinedAtom &CIEAtom) : CIEAtom(&CIEAtom) {} - DefinedAtom *CIEAtom = nullptr; + CIEInformation(Symbol &CIESymbol) : CIESymbol(&CIESymbol) {} + Symbol *CIESymbol = nullptr; bool FDEsHaveLSDAField = false; }; - AtomGraph &G; - Section &EHFrameSection; - StringRef EHFrameContent; JITTargetAddress EHFrameAddress; + StringRef EHFrameContent; + unsigned PointerSize; BinaryStreamReader EHFrameReader; - DefinedAtom *CurRecordAtom = nullptr; DenseMap CIEInfos; - Edge::Kind FDEToCIERelocKind; - Edge::Kind FDEToTargetRelocKind; }; -Error addEHFrame(AtomGraph &G, Section &EHFrameSection, - StringRef EHFrameContent, JITTargetAddress EHFrameAddress, - Edge::Kind FDEToCIERelocKind, Edge::Kind FDEToTargetRelocKind); - } // end namespace jitlink } // end namespace llvm diff --git a/lib/ExecutionEngine/JITLink/JITLink.cpp b/lib/ExecutionEngine/JITLink/JITLink.cpp index 9d0a7459dc0..1e19038951a 100644 --- a/lib/ExecutionEngine/JITLink/JITLink.cpp +++ b/lib/ExecutionEngine/JITLink/JITLink.cpp @@ -56,95 +56,151 @@ std::error_code JITLinkError::convertToErrorCode() const { return std::error_code(GenericJITLinkError, *JITLinkerErrorCategory); } -const StringRef getGenericEdgeKindName(Edge::Kind K) { +const char *getGenericEdgeKindName(Edge::Kind K) { switch (K) { case Edge::Invalid: return "INVALID RELOCATION"; case Edge::KeepAlive: return "Keep-Alive"; - case Edge::LayoutNext: - return "Layout-Next"; default: llvm_unreachable("Unrecognized relocation kind"); } } -raw_ostream &operator<<(raw_ostream &OS, const Atom &A) { +const char *getLinkageName(Linkage L) { + switch (L) { + case Linkage::Strong: + return "strong"; + case Linkage::Weak: + return "weak"; + } + llvm_unreachable("Unrecognized llvm.jitlink.Linkage enum"); +} + +const char *getScopeName(Scope S) { + switch (S) { + case Scope::Default: + return "default"; + case Scope::Hidden: + return "hidden"; + case Scope::Local: + return "local"; + } + llvm_unreachable("Unrecognized llvm.jitlink.Scope enum"); +} + +raw_ostream &operator<<(raw_ostream &OS, const Block &B) { + return OS << formatv("{0:x16}", B.getAddress()) << " -- " + << formatv("{0:x16}", B.getAddress() + B.getSize()) << ": " + << (B.isZeroFill() ? "zero-fill" : "content") + << ", align = " << B.getAlignment() + << ", align-ofs = " << B.getAlignmentOffset() + << ", section = " << B.getSection().getName(); +} + +raw_ostream &operator<<(raw_ostream &OS, const Symbol &Sym) { OS << "<"; - if (A.getName().empty()) - OS << "anon@" << format("0x%016" PRIx64, A.getAddress()); + if (Sym.getName().empty()) + OS << "*anon*"; else - OS << A.getName(); - OS << " ["; - if (A.isDefined()) { - auto &DA = static_cast(A); - OS << " section=" << DA.getSection().getName(); - if (DA.isLive()) - OS << " live"; - if (DA.shouldDiscard()) - OS << " should-discard"; - } else - OS << " external"; - OS << " ]>"; + OS << Sym.getName(); + OS << ": flags = "; + switch (Sym.getLinkage()) { + case Linkage::Strong: + OS << 'S'; + break; + case Linkage::Weak: + OS << 'W'; + break; + } + switch (Sym.getScope()) { + case Scope::Default: + OS << 'D'; + break; + case Scope::Hidden: + OS << 'H'; + break; + case Scope::Local: + OS << 'L'; + break; + } + OS << (Sym.isLive() ? '+' : '-') + << ", size = " << formatv("{0:x8}", Sym.getSize()) + << ", addr = " << formatv("{0:x16}", Sym.getAddress()) << " (" + << formatv("{0:x16}", Sym.getAddressable().getAddress()) << " + " + << formatv("{0:x8}", Sym.getOffset()); + if (Sym.isDefined()) + OS << " " << Sym.getBlock().getSection().getName(); + OS << ")>"; return OS; } -void printEdge(raw_ostream &OS, const Atom &FixupAtom, const Edge &E, +void printEdge(raw_ostream &OS, const Block &B, const Edge &E, StringRef EdgeKindName) { - OS << "edge@" << formatv("{0:x16}", FixupAtom.getAddress() + E.getOffset()) - << ": " << FixupAtom << " + " << E.getOffset() << " -- " << EdgeKindName - << " -> " << E.getTarget() << " + " << E.getAddend(); + OS << "edge@" << formatv("{0:x16}", B.getAddress() + E.getOffset()) << ": " + << formatv("{0:x16}", B.getAddress()) << " + " << E.getOffset() << " -- " + << EdgeKindName << " -> " << E.getTarget() << " + " << E.getAddend(); } Section::~Section() { - for (auto *DA : DefinedAtoms) - DA->~DefinedAtom(); + for (auto *Sym : Symbols) + Sym->~Symbol(); } -void AtomGraph::dump(raw_ostream &OS, +LinkGraph::~LinkGraph() { + // Destroy blocks. + for (auto *B : Blocks) + B->~Block(); +} + +void LinkGraph::dump(raw_ostream &OS, std::function EdgeKindToName) { if (!EdgeKindToName) EdgeKindToName = [](Edge::Kind K) { return StringRef(); }; - OS << "Defined atoms:\n"; - for (auto *DA : defined_atoms()) { - OS << " " << format("0x%016" PRIx64, DA->getAddress()) << ": " << *DA + OS << "Symbols:\n"; + for (auto *Sym : defined_symbols()) { + OS << " " << format("0x%016" PRIx64, Sym->getAddress()) << ": " << *Sym << "\n"; - for (auto &E : DA->edges()) { - OS << " "; - StringRef EdgeName = (E.getKind() < Edge::FirstRelocation - ? getGenericEdgeKindName(E.getKind()) - : EdgeKindToName(E.getKind())); + if (Sym->isDefined()) { + for (auto &E : Sym->getBlock().edges()) { + OS << " "; + StringRef EdgeName = (E.getKind() < Edge::FirstRelocation + ? getGenericEdgeKindName(E.getKind()) + : EdgeKindToName(E.getKind())); - if (!EdgeName.empty()) - printEdge(OS, *DA, E, EdgeName); - else { - auto EdgeNumberString = std::to_string(E.getKind()); - printEdge(OS, *DA, E, EdgeNumberString); + if (!EdgeName.empty()) + printEdge(OS, Sym->getBlock(), E, EdgeName); + else { + auto EdgeNumberString = std::to_string(E.getKind()); + printEdge(OS, Sym->getBlock(), E, EdgeNumberString); + } + OS << "\n"; } - OS << "\n"; } } - OS << "Absolute atoms:\n"; - for (auto *A : absolute_atoms()) - OS << " " << format("0x%016" PRIx64, A->getAddress()) << ": " << *A + OS << "Absolute symbols:\n"; + for (auto *Sym : absolute_symbols()) + OS << " " << format("0x%016" PRIx64, Sym->getAddress()) << ": " << *Sym << "\n"; - OS << "External atoms:\n"; - for (auto *A : external_atoms()) - OS << " " << format("0x%016" PRIx64, A->getAddress()) << ": " << *A + OS << "External symbols:\n"; + for (auto *Sym : external_symbols()) + OS << " " << format("0x%016" PRIx64, Sym->getAddress()) << ": " << *Sym << "\n"; } +void JITLinkAsyncLookupContinuation::anchor() {} + JITLinkContext::~JITLinkContext() {} bool JITLinkContext::shouldAddDefaultTargetPasses(const Triple &TT) const { return true; } -AtomGraphPassFunction JITLinkContext::getMarkLivePass(const Triple &TT) const { - return AtomGraphPassFunction(); +LinkGraphPassFunction JITLinkContext::getMarkLivePass(const Triple &TT) const { + return LinkGraphPassFunction(); } Error JITLinkContext::modifyPassConfig(const Triple &TT, @@ -152,9 +208,9 @@ Error JITLinkContext::modifyPassConfig(const Triple &TT, return Error::success(); } -Error markAllAtomsLive(AtomGraph &G) { - for (auto *DA : G.defined_atoms()) - DA->setLive(true); +Error markAllSymbolsLive(LinkGraph &G) { + for (auto *Sym : G.defined_symbols()) + Sym->setLive(true); return Error::success(); } diff --git a/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp index 96e074da122..d4270b5aa79 100644 --- a/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp +++ b/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "JITLinkGeneric.h" -#include "EHFrameSupportImpl.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/MemoryBuffer.h" @@ -25,7 +24,7 @@ JITLinkerBase::~JITLinkerBase() {} void JITLinkerBase::linkPhase1(std::unique_ptr Self) { - // Build the atom graph. + // Build the link graph. if (auto GraphOrErr = buildGraph(Ctx->getObjectBuffer())) G = std::move(*GraphOrErr); else @@ -33,33 +32,33 @@ void JITLinkerBase::linkPhase1(std::unique_ptr Self) { assert(G && "Graph should have been created by buildGraph above"); // Prune and optimize the graph. - if (auto Err = runPasses(Passes.PrePrunePasses, *G)) + if (auto Err = runPasses(Passes.PrePrunePasses)) return Ctx->notifyFailed(std::move(Err)); LLVM_DEBUG({ - dbgs() << "Atom graph \"" << G->getName() << "\" pre-pruning:\n"; + dbgs() << "Link graph \"" << G->getName() << "\" pre-pruning:\n"; dumpGraph(dbgs()); }); prune(*G); LLVM_DEBUG({ - dbgs() << "Atom graph \"" << G->getName() << "\" post-pruning:\n"; + dbgs() << "Link graph \"" << G->getName() << "\" post-pruning:\n"; dumpGraph(dbgs()); }); // Run post-pruning passes. - if (auto Err = runPasses(Passes.PostPrunePasses, *G)) + if (auto Err = runPasses(Passes.PostPrunePasses)) return Ctx->notifyFailed(std::move(Err)); - // Sort atoms into segments. - layOutAtoms(); + // Sort blocks into segments. + auto Layout = layOutBlocks(); // Allocate memory for segments. if (auto Err = allocateSegments(Layout)) return Ctx->notifyFailed(std::move(Err)); - // Notify client that the defined atoms have been assigned addresses. + // Notify client that the defined symbols have been assigned addresses. Ctx->notifyResolved(*G); auto ExternalSymbols = getExternalSymbolNames(); @@ -74,42 +73,42 @@ void JITLinkerBase::linkPhase1(std::unique_ptr Self) { // [Self=std::move(Self)](Expected Result) { // Self->linkPhase2(std::move(Self), std::move(Result)); // }); - // - // FIXME: Use move capture once we have c++14. auto *TmpCtx = Ctx.get(); - auto *UnownedSelf = Self.release(); - auto Phase2Continuation = - [UnownedSelf](Expected LookupResult) { - std::unique_ptr Self(UnownedSelf); - UnownedSelf->linkPhase2(std::move(Self), std::move(LookupResult)); - }; - TmpCtx->lookup(std::move(ExternalSymbols), std::move(Phase2Continuation)); + TmpCtx->lookup(std::move(ExternalSymbols), + createLookupContinuation( + [S = std::move(Self), L = std::move(Layout)]( + Expected LookupResult) mutable { + auto &TmpSelf = *S; + TmpSelf.linkPhase2(std::move(S), std::move(LookupResult), + std::move(L)); + })); } void JITLinkerBase::linkPhase2(std::unique_ptr Self, - Expected LR) { + Expected LR, + SegmentLayoutMap Layout) { // If the lookup failed, bail out. if (!LR) return deallocateAndBailOut(LR.takeError()); - // Assign addresses to external atoms. + // Assign addresses to external addressables. applyLookupResult(*LR); LLVM_DEBUG({ - dbgs() << "Atom graph \"" << G->getName() << "\" before copy-and-fixup:\n"; + dbgs() << "Link graph \"" << G->getName() << "\" before copy-and-fixup:\n"; dumpGraph(dbgs()); }); - // Copy atom content to working memory and fix up. - if (auto Err = copyAndFixUpAllAtoms(Layout, *Alloc)) + // Copy block content to working memory and fix up. + if (auto Err = copyAndFixUpBlocks(Layout, *Alloc)) return deallocateAndBailOut(std::move(Err)); LLVM_DEBUG({ - dbgs() << "Atom graph \"" << G->getName() << "\" after copy-and-fixup:\n"; + dbgs() << "Link graph \"" << G->getName() << "\" after copy-and-fixup:\n"; dumpGraph(dbgs()); }); - if (auto Err = runPasses(Passes.PostFixupPasses, *G)) + if (auto Err = runPasses(Passes.PostFixupPasses)) return deallocateAndBailOut(std::move(Err)); // FIXME: Use move capture once we have c++14. @@ -128,82 +127,38 @@ void JITLinkerBase::linkPhase3(std::unique_ptr Self, Error Err) { Ctx->notifyFinalized(std::move(Alloc)); } -Error JITLinkerBase::runPasses(AtomGraphPassList &Passes, AtomGraph &G) { +Error JITLinkerBase::runPasses(LinkGraphPassList &Passes) { for (auto &P : Passes) - if (auto Err = P(G)) + if (auto Err = P(*G)) return Err; return Error::success(); } -void JITLinkerBase::layOutAtoms() { - // Group sections by protections, and whether or not they're zero-fill. - for (auto &S : G->sections()) { +JITLinkerBase::SegmentLayoutMap JITLinkerBase::layOutBlocks() { - // Skip empty sections. - if (S.atoms_empty()) - continue; + SegmentLayoutMap Layout; - auto &SL = Layout[S.getProtectionFlags()]; - if (S.isZeroFill()) - SL.ZeroFillSections.push_back(SegmentLayout::SectionLayout(S)); + /// Partition blocks based on permissions and content vs. zero-fill. + for (auto *B : G->blocks()) { + auto &SegLists = Layout[B->getSection().getProtectionFlags()]; + if (!B->isZeroFill()) + SegLists.ContentBlocks.push_back(B); else - SL.ContentSections.push_back(SegmentLayout::SectionLayout(S)); + SegLists.ZeroFillBlocks.push_back(B); } - // Sort sections within the layout by ordinal. - { - auto CompareByOrdinal = [](const SegmentLayout::SectionLayout &LHS, - const SegmentLayout::SectionLayout &RHS) { - return LHS.S->getSectionOrdinal() < RHS.S->getSectionOrdinal(); - }; - for (auto &KV : Layout) { - auto &SL = KV.second; - std::sort(SL.ContentSections.begin(), SL.ContentSections.end(), - CompareByOrdinal); - std::sort(SL.ZeroFillSections.begin(), SL.ZeroFillSections.end(), - CompareByOrdinal); - } - } - - // Add atoms to the sections. + /// Sort blocks within each list. for (auto &KV : Layout) { - auto &SL = KV.second; - for (auto *SIList : {&SL.ContentSections, &SL.ZeroFillSections}) { - for (auto &SI : *SIList) { - // First build the set of layout-heads (i.e. "heads" of layout-next - // chains) by copying the section atoms, then eliminating any that - // appear as layout-next targets. - DenseSet LayoutHeads; - for (auto *DA : SI.S->atoms()) - LayoutHeads.insert(DA); - for (auto *DA : SI.S->atoms()) - if (DA->hasLayoutNext()) - LayoutHeads.erase(&DA->getLayoutNext()); + auto CompareBlocks = [](const Block *LHS, const Block *RHS) { + if (LHS->getSection().getOrdinal() != RHS->getSection().getOrdinal()) + return LHS->getSection().getOrdinal() < RHS->getSection().getOrdinal(); + return LHS->getOrdinal() < RHS->getOrdinal(); + }; - // Next, sort the layout heads by address order. - std::vector OrderedLayoutHeads; - OrderedLayoutHeads.reserve(LayoutHeads.size()); - for (auto *DA : LayoutHeads) - OrderedLayoutHeads.push_back(DA); - - // Now sort the list of layout heads by address. - std::sort(OrderedLayoutHeads.begin(), OrderedLayoutHeads.end(), - [](const DefinedAtom *LHS, const DefinedAtom *RHS) { - return LHS->getAddress() < RHS->getAddress(); - }); - - // Now populate the SI.Atoms field by appending each of the chains. - for (auto *DA : OrderedLayoutHeads) { - SI.Atoms.push_back(DA); - while (DA->hasLayoutNext()) { - auto &Next = DA->getLayoutNext(); - SI.Atoms.push_back(&Next); - DA = &Next; - } - } - } - } + auto &SegLists = KV.second; + llvm::sort(SegLists.ContentBlocks, CompareBlocks); + llvm::sort(SegLists.ZeroFillBlocks, CompareBlocks); } LLVM_DEBUG({ @@ -213,18 +168,16 @@ void JITLinkerBase::layOutAtoms() { << static_cast(KV.first) << ":\n"; auto &SL = KV.second; for (auto &SIEntry : - {std::make_pair(&SL.ContentSections, "content sections"), - std::make_pair(&SL.ZeroFillSections, "zero-fill sections")}) { - auto &SIList = *SIEntry.first; + {std::make_pair(&SL.ContentBlocks, "content block"), + std::make_pair(&SL.ZeroFillBlocks, "zero-fill block")}) { dbgs() << " " << SIEntry.second << ":\n"; - for (auto &SI : SIList) { - dbgs() << " " << SI.S->getName() << ":\n"; - for (auto *DA : SI.Atoms) - dbgs() << " " << *DA << "\n"; - } + for (auto *B : *SIEntry.first) + dbgs() << " " << *B << "\n"; } } }); + + return Layout; } Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) { @@ -234,74 +187,36 @@ Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) { JITLinkMemoryManager::SegmentsRequestMap Segments; for (auto &KV : Layout) { auto &Prot = KV.first; - auto &SegLayout = KV.second; + auto &SegLists = KV.second; + + uint64_t SegAlign = 1; // Calculate segment content size. size_t SegContentSize = 0; - for (auto &SI : SegLayout.ContentSections) { - assert(!SI.S->atoms_empty() && "Sections in layout must not be empty"); - assert(!SI.Atoms.empty() && "Section layouts must not be empty"); - - // Bump to section alignment before processing atoms. - SegContentSize = alignTo(SegContentSize, SI.S->getAlignment()); - - for (auto *DA : SI.Atoms) { - SegContentSize = alignTo(SegContentSize, DA->getAlignment()); - SegContentSize += DA->getSize(); - } + for (auto *B : SegLists.ContentBlocks) { + SegAlign = std::max(SegAlign, B->getAlignment()); + SegContentSize = alignToBlock(SegContentSize, *B); + SegContentSize += B->getSize(); } - // Get segment content alignment. - unsigned SegContentAlign = 1; - if (!SegLayout.ContentSections.empty()) { - auto &FirstContentSection = SegLayout.ContentSections.front(); - SegContentAlign = - std::max(FirstContentSection.S->getAlignment(), - FirstContentSection.Atoms.front()->getAlignment()); + uint64_t SegZeroFillStart = SegContentSize; + uint64_t SegZeroFillEnd = SegZeroFillStart; + + for (auto *B : SegLists.ZeroFillBlocks) { + SegAlign = std::max(SegAlign, B->getAlignment()); + SegZeroFillEnd = alignToBlock(SegZeroFillEnd, *B); + SegZeroFillEnd += B->getSize(); } - // Calculate segment zero-fill size. - uint64_t SegZeroFillSize = 0; - for (auto &SI : SegLayout.ZeroFillSections) { - assert(!SI.S->atoms_empty() && "Sections in layout must not be empty"); - assert(!SI.Atoms.empty() && "Section layouts must not be empty"); - - // Bump to section alignment before processing atoms. - SegZeroFillSize = alignTo(SegZeroFillSize, SI.S->getAlignment()); - - for (auto *DA : SI.Atoms) { - SegZeroFillSize = alignTo(SegZeroFillSize, DA->getAlignment()); - SegZeroFillSize += DA->getSize(); - } - } - - // Calculate segment zero-fill alignment. - uint32_t SegZeroFillAlign = 1; - - if (!SegLayout.ZeroFillSections.empty()) { - auto &FirstZeroFillSection = SegLayout.ZeroFillSections.front(); - SegZeroFillAlign = - std::max(FirstZeroFillSection.S->getAlignment(), - FirstZeroFillSection.Atoms.front()->getAlignment()); - } - - if (SegContentSize == 0) - SegContentAlign = SegZeroFillAlign; - - if (SegContentAlign % SegZeroFillAlign != 0) - return make_error("First content atom alignment does not " - "accommodate first zero-fill atom " - "alignment"); - - Segments[Prot] = {SegContentSize, SegContentAlign, SegZeroFillSize, - SegZeroFillAlign}; + Segments[Prot] = {SegAlign, SegContentSize, + SegZeroFillEnd - SegZeroFillStart}; LLVM_DEBUG({ dbgs() << (&KV == &*Layout.begin() ? "" : "; ") - << static_cast(Prot) << ": " - << SegContentSize << " content bytes (alignment " - << SegContentAlign << ") + " << SegZeroFillSize - << " zero-fill bytes (alignment " << SegZeroFillAlign << ")"; + << static_cast(Prot) + << ": alignment = " << SegAlign + << ", content size = " << SegContentSize + << ", zero-fill size = " << (SegZeroFillEnd - SegZeroFillStart); }); } LLVM_DEBUG(dbgs() << " }\n"); @@ -320,22 +235,19 @@ Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) { } }); - // Update atom target addresses. + // Update block target addresses. for (auto &KV : Layout) { auto &Prot = KV.first; auto &SL = KV.second; - JITTargetAddress AtomTargetAddr = + JITTargetAddress NextBlockAddr = Alloc->getTargetMemory(static_cast(Prot)); - for (auto *SIList : {&SL.ContentSections, &SL.ZeroFillSections}) - for (auto &SI : *SIList) { - AtomTargetAddr = alignTo(AtomTargetAddr, SI.S->getAlignment()); - for (auto *DA : SI.Atoms) { - AtomTargetAddr = alignTo(AtomTargetAddr, DA->getAlignment()); - DA->setAddress(AtomTargetAddr); - AtomTargetAddr += DA->getSize(); - } + for (auto *SIList : {&SL.ContentBlocks, &SL.ZeroFillBlocks}) + for (auto *B : *SIList) { + NextBlockAddr = alignToBlock(NextBlockAddr, *B); + B->setAddress(NextBlockAddr); + NextBlockAddr += B->getSize(); } } @@ -343,34 +255,35 @@ Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) { } DenseSet JITLinkerBase::getExternalSymbolNames() const { - // Identify unresolved external atoms. + // Identify unresolved external symbols. DenseSet UnresolvedExternals; - for (auto *DA : G->external_atoms()) { - assert(DA->getAddress() == 0 && + for (auto *Sym : G->external_symbols()) { + assert(Sym->getAddress() == 0 && "External has already been assigned an address"); - assert(DA->getName() != StringRef() && DA->getName() != "" && + assert(Sym->getName() != StringRef() && Sym->getName() != "" && "Externals must be named"); - UnresolvedExternals.insert(DA->getName()); + UnresolvedExternals.insert(Sym->getName()); } return UnresolvedExternals; } void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) { - for (auto &KV : Result) { - Atom &A = G->getAtomByName(KV.first); - assert(A.getAddress() == 0 && "Atom already resolved"); - A.setAddress(KV.second.getAddress()); + for (auto *Sym : G->external_symbols()) { + assert(Sym->getAddress() == 0 && "Symbol already resolved"); + assert(!Sym->isDefined() && "Symbol being resolved is already defined"); + assert(Result.count(Sym->getName()) && "Missing resolution for symbol"); + Sym->getAddressable().setAddress(Result[Sym->getName()].getAddress()); } LLVM_DEBUG({ dbgs() << "Externals after applying lookup result:\n"; - for (auto *A : G->external_atoms()) - dbgs() << " " << A->getName() << ": " - << formatv("{0:x16}", A->getAddress()) << "\n"; + for (auto *Sym : G->external_symbols()) + dbgs() << " " << Sym->getName() << ": " + << formatv("{0:x16}", Sym->getAddress()) << "\n"; }); - assert(llvm::all_of(G->external_atoms(), - [](Atom *A) { return A->getAddress() != 0; }) && - "All atoms should have been resolved by this point"); + assert(llvm::all_of(G->external_symbols(), + [](Symbol *Sym) { return Sym->getAddress() != 0; }) && + "All symbols should have been resolved by this point"); } void JITLinkerBase::deallocateAndBailOut(Error Err) { @@ -384,96 +297,60 @@ void JITLinkerBase::dumpGraph(raw_ostream &OS) { G->dump(dbgs(), [this](Edge::Kind K) { return getEdgeKindName(K); }); } -void prune(AtomGraph &G) { - std::vector Worklist; - DenseMap> EdgesToUpdate; +void prune(LinkGraph &G) { + std::vector Worklist; + DenseSet VisitedBlocks; - // Build the initial worklist from all atoms initially live. - for (auto *DA : G.defined_atoms()) { - if (!DA->isLive() || DA->shouldDiscard()) - continue; + // Build the initial worklist from all symbols initially live. + for (auto *Sym : G.defined_symbols()) + if (Sym->isLive()) + Worklist.push_back(Sym); - for (auto &E : DA->edges()) { - if (!E.getTarget().isDefined()) - continue; - - auto &EDT = static_cast(E.getTarget()); - - if (EDT.shouldDiscard()) - EdgesToUpdate[&EDT].push_back(&E); - else if (E.isKeepAlive() && !EDT.isLive()) - Worklist.push_back(&EDT); - } - } - - // Propagate live flags to all atoms reachable from the initial live set. + // Propagate live flags to all symbols reachable from the initial live set. while (!Worklist.empty()) { - DefinedAtom &NextLive = *Worklist.back(); + auto *Sym = Worklist.back(); Worklist.pop_back(); - assert(!NextLive.shouldDiscard() && - "should-discard nodes should never make it into the worklist"); + auto &B = Sym->getBlock(); - // If this atom has already been marked as live, or is marked to be - // discarded, then skip it. - if (NextLive.isLive()) + // Skip addressables that we've visited before. + if (VisitedBlocks.count(&B)) continue; - // Otherwise set it as live and add any non-live atoms that it points to - // to the worklist. - NextLive.setLive(true); + VisitedBlocks.insert(&B); - for (auto &E : NextLive.edges()) { - if (!E.getTarget().isDefined()) - continue; - - auto &EDT = static_cast(E.getTarget()); - - if (EDT.shouldDiscard()) - EdgesToUpdate[&EDT].push_back(&E); - else if (E.isKeepAlive() && !EDT.isLive()) - Worklist.push_back(&EDT); + for (auto &E : Sym->getBlock().edges()) { + if (E.getTarget().isDefined() && !E.getTarget().isLive()) { + E.getTarget().setLive(true); + Worklist.push_back(&E.getTarget()); + } } } - // Collect atoms to remove, then remove them from the graph. - std::vector AtomsToRemove; - for (auto *DA : G.defined_atoms()) - if (DA->shouldDiscard() || !DA->isLive()) - AtomsToRemove.push_back(DA); - - LLVM_DEBUG(dbgs() << "Pruning atoms:\n"); - for (auto *DA : AtomsToRemove) { - LLVM_DEBUG(dbgs() << " " << *DA << "... "); - - // Check whether we need to replace this atom with an external atom. - // - // We replace if all of the following hold: - // (1) The atom is marked should-discard, - // (2) it has live edges (i.e. edges from live atoms) pointing to it. - // - // Otherwise we simply delete the atom. - - G.removeDefinedAtom(*DA); - - auto EdgesToUpdateItr = EdgesToUpdate.find(DA); - if (EdgesToUpdateItr != EdgesToUpdate.end()) { - auto &ExternalReplacement = G.addExternalAtom(DA->getName()); - for (auto *EdgeToUpdate : EdgesToUpdateItr->second) - EdgeToUpdate->setTarget(ExternalReplacement); - LLVM_DEBUG(dbgs() << "replaced with " << ExternalReplacement << "\n"); - } else - LLVM_DEBUG(dbgs() << "deleted\n"); + // Collect all the symbols to remove, then remove them. + { + LLVM_DEBUG(dbgs() << "Dead-stripping symbols:\n"); + std::vector SymbolsToRemove; + for (auto *Sym : G.defined_symbols()) + if (!Sym->isLive()) + SymbolsToRemove.push_back(Sym); + for (auto *Sym : SymbolsToRemove) { + LLVM_DEBUG(dbgs() << " " << *Sym << "...\n"); + G.removeDefinedSymbol(*Sym); + } } - // Finally, discard any absolute symbols that were marked should-discard. + // Delete any unused blocks. { - std::vector AbsoluteAtomsToRemove; - for (auto *A : G.absolute_atoms()) - if (A->shouldDiscard() || A->isLive()) - AbsoluteAtomsToRemove.push_back(A); - for (auto *A : AbsoluteAtomsToRemove) - G.removeAbsoluteAtom(*A); + LLVM_DEBUG(dbgs() << "Dead-stripping blocks:\n"); + std::vector BlocksToRemove; + for (auto *B : G.blocks()) + if (!VisitedBlocks.count(B)) + BlocksToRemove.push_back(B); + for (auto *B : BlocksToRemove) { + LLVM_DEBUG(dbgs() << " " << *B << "...\n"); + G.removeBlock(*B); + } } } diff --git a/lib/ExecutionEngine/JITLink/JITLinkGeneric.h b/lib/ExecutionEngine/JITLink/JITLinkGeneric.h index e6fd6e38f7a..07dee6cee20 100644 --- a/lib/ExecutionEngine/JITLink/JITLinkGeneric.h +++ b/lib/ExecutionEngine/JITLink/JITLinkGeneric.h @@ -41,39 +41,32 @@ public: protected: struct SegmentLayout { - using SectionAtomsList = std::vector; - struct SectionLayout { - SectionLayout(Section &S) : S(&S) {} + using BlocksList = std::vector; - Section *S; - SectionAtomsList Atoms; - }; - - using SectionLayoutList = std::vector; - - SectionLayoutList ContentSections; - SectionLayoutList ZeroFillSections; + BlocksList ContentBlocks; + BlocksList ZeroFillBlocks; }; using SegmentLayoutMap = DenseMap; // Phase 1: - // 1.1: Build atom graph + // 1.1: Build link graph // 1.2: Run pre-prune passes // 1.2: Prune graph // 1.3: Run post-prune passes - // 1.4: Sort atoms into segments + // 1.4: Sort blocks into segments // 1.5: Allocate segment memory // 1.6: Identify externals and make an async call to resolve function void linkPhase1(std::unique_ptr Self); // Phase 2: // 2.1: Apply resolution results - // 2.2: Fix up atom contents + // 2.2: Fix up block contents // 2.3: Call OnResolved callback // 2.3: Make an async call to transfer and finalize memory. void linkPhase2(std::unique_ptr Self, - Expected LookupResult); + Expected LookupResult, + SegmentLayoutMap Layout); // Phase 3: // 3.1: Call OnFinalized callback, handing off allocation. @@ -81,24 +74,37 @@ protected: // Build a graph from the given object buffer. // To be implemented by the client. - virtual Expected> + virtual Expected> buildGraph(MemoryBufferRef ObjBuffer) = 0; - // For debug dumping of the atom graph. + // For debug dumping of the link graph. virtual StringRef getEdgeKindName(Edge::Kind K) const = 0; + // Alight a JITTargetAddress to conform with block alignment requirements. + static JITTargetAddress alignToBlock(JITTargetAddress Addr, Block &B) { + uint64_t Delta = (B.getAlignmentOffset() - Addr) % B.getAlignment(); + return Addr + Delta; + } + + // Alight a pointer to conform with block alignment requirements. + static char *alignToBlock(char *P, Block &B) { + uint64_t PAddr = static_cast(reinterpret_cast(P)); + uint64_t Delta = (B.getAlignmentOffset() - PAddr) % B.getAlignment(); + return P + Delta; + } + private: // Run all passes in the given pass list, bailing out immediately if any pass // returns an error. - Error runPasses(AtomGraphPassList &Passes, AtomGraph &G); + Error runPasses(LinkGraphPassList &Passes); - // Copy atom contents and apply relocations. + // Copy block contents and apply relocations. // Implemented in JITLinker. virtual Error - copyAndFixUpAllAtoms(const SegmentLayoutMap &Layout, - JITLinkMemoryManager::Allocation &Alloc) const = 0; + copyAndFixUpBlocks(const SegmentLayoutMap &Layout, + JITLinkMemoryManager::Allocation &Alloc) const = 0; - void layOutAtoms(); + SegmentLayoutMap layOutBlocks(); Error allocateSegments(const SegmentLayoutMap &Layout); DenseSet getExternalSymbolNames() const; void applyLookupResult(AsyncLookupResult LR); @@ -108,8 +114,7 @@ private: std::unique_ptr Ctx; PassConfiguration Passes; - std::unique_ptr G; - SegmentLayoutMap Layout; + std::unique_ptr G; std::unique_ptr Alloc; }; @@ -121,7 +126,7 @@ public: /// Link should be called with the constructor arguments for LinkerImpl, which /// will be forwarded to the constructor. template static void link(ArgTs &&... Args) { - auto L = llvm::make_unique(std::forward(Args)...); + auto L = std::make_unique(std::forward(Args)...); // Ownership of the linker is passed into the linker's doLink function to // allow it to be passed on to async continuations. @@ -140,17 +145,17 @@ private: } Error - copyAndFixUpAllAtoms(const SegmentLayoutMap &Layout, - JITLinkMemoryManager::Allocation &Alloc) const override { - LLVM_DEBUG(dbgs() << "Copying and fixing up atoms:\n"); + copyAndFixUpBlocks(const SegmentLayoutMap &Layout, + JITLinkMemoryManager::Allocation &Alloc) const override { + LLVM_DEBUG(dbgs() << "Copying and fixing up blocks:\n"); for (auto &KV : Layout) { auto &Prot = KV.first; auto &SegLayout = KV.second; auto SegMem = Alloc.getWorkingMemory( static_cast(Prot)); - char *LastAtomEnd = SegMem.data(); - char *AtomDataPtr = LastAtomEnd; + char *LastBlockEnd = SegMem.data(); + char *BlockDataPtr = LastBlockEnd; LLVM_DEBUG({ dbgs() << " Processing segment " @@ -160,93 +165,79 @@ private: << " ]\n Processing content sections:\n"; }); - for (auto &SI : SegLayout.ContentSections) { - LLVM_DEBUG(dbgs() << " " << SI.S->getName() << ":\n"); + for (auto *B : SegLayout.ContentBlocks) { + LLVM_DEBUG(dbgs() << " " << *B << ":\n"); - AtomDataPtr += alignmentAdjustment(AtomDataPtr, SI.S->getAlignment()); + // Pad to alignment/alignment-offset. + BlockDataPtr = alignToBlock(BlockDataPtr, *B); LLVM_DEBUG({ - dbgs() << " Bumped atom pointer to " << (const void *)AtomDataPtr - << " to meet section alignment " - << " of " << SI.S->getAlignment() << "\n"; + dbgs() << " Bumped block pointer to " + << (const void *)BlockDataPtr << " to meet block alignment " + << B->getAlignment() << " and alignment offset " + << B->getAlignmentOffset() << "\n"; }); - for (auto *DA : SI.Atoms) { + // Zero pad up to alignment. + LLVM_DEBUG({ + if (LastBlockEnd != BlockDataPtr) + dbgs() << " Zero padding from " << (const void *)LastBlockEnd + << " to " << (const void *)BlockDataPtr << "\n"; + }); - // Align. - AtomDataPtr += alignmentAdjustment(AtomDataPtr, DA->getAlignment()); - LLVM_DEBUG({ - dbgs() << " Bumped atom pointer to " - << (const void *)AtomDataPtr << " to meet alignment of " - << DA->getAlignment() << "\n"; - }); + while (LastBlockEnd != BlockDataPtr) + *LastBlockEnd++ = 0; - // Zero pad up to alignment. - LLVM_DEBUG({ - if (LastAtomEnd != AtomDataPtr) - dbgs() << " Zero padding from " << (const void *)LastAtomEnd - << " to " << (const void *)AtomDataPtr << "\n"; - }); - while (LastAtomEnd != AtomDataPtr) - *LastAtomEnd++ = 0; + // Copy initial block content. + LLVM_DEBUG({ + dbgs() << " Copying block " << *B << " content, " + << B->getContent().size() << " bytes, from " + << (const void *)B->getContent().data() << " to " + << (const void *)BlockDataPtr << "\n"; + }); + memcpy(BlockDataPtr, B->getContent().data(), B->getContent().size()); - // Copy initial atom content. - LLVM_DEBUG({ - dbgs() << " Copying atom " << *DA << " content, " - << DA->getContent().size() << " bytes, from " - << (const void *)DA->getContent().data() << " to " - << (const void *)AtomDataPtr << "\n"; - }); - memcpy(AtomDataPtr, DA->getContent().data(), DA->getContent().size()); + // Copy Block data and apply fixups. + LLVM_DEBUG(dbgs() << " Applying fixups.\n"); + for (auto &E : B->edges()) { - // Copy atom data and apply fixups. - LLVM_DEBUG(dbgs() << " Applying fixups.\n"); - for (auto &E : DA->edges()) { + // Skip non-relocation edges. + if (!E.isRelocation()) + continue; - // Skip non-relocation edges. - if (!E.isRelocation()) - continue; - - // Dispatch to LinkerImpl for fixup. - if (auto Err = impl().applyFixup(*DA, E, AtomDataPtr)) - return Err; - } - - // Point the atom's content to the fixed up buffer. - DA->setContent(StringRef(AtomDataPtr, DA->getContent().size())); - - // Update atom end pointer. - LastAtomEnd = AtomDataPtr + DA->getContent().size(); - AtomDataPtr = LastAtomEnd; + // Dispatch to LinkerImpl for fixup. + if (auto Err = impl().applyFixup(*B, E, BlockDataPtr)) + return Err; } + + // Point the block's content to the fixed up buffer. + B->setContent(StringRef(BlockDataPtr, B->getContent().size())); + + // Update block end pointer. + LastBlockEnd = BlockDataPtr + B->getContent().size(); + BlockDataPtr = LastBlockEnd; } // Zero pad the rest of the segment. LLVM_DEBUG({ dbgs() << " Zero padding end of segment from " - << (const void *)LastAtomEnd << " to " + << (const void *)LastBlockEnd << " to " << (const void *)((char *)SegMem.data() + SegMem.size()) << "\n"; }); - while (LastAtomEnd != SegMem.data() + SegMem.size()) - *LastAtomEnd++ = 0; + while (LastBlockEnd != SegMem.data() + SegMem.size()) + *LastBlockEnd++ = 0; } return Error::success(); } }; -/// Dead strips and replaces discarded definitions with external atoms. +/// Removes dead symbols/blocks/addressables. /// -/// Finds the set of nodes reachable from any node initially marked live -/// (nodes marked should-discard are treated as not live, even if they are -/// reachable). All nodes not marked as live at the end of this process, -/// are deleted. Nodes that are live, but marked should-discard are replaced -/// with external atoms and all edges to them are re-written. -void prune(AtomGraph &G); - -Error addEHFrame(AtomGraph &G, Section &EHFrameSection, - StringRef EHFrameContent, JITTargetAddress EHFrameAddress, - Edge::Kind FDEToCIERelocKind, Edge::Kind FDEToTargetRelocKind); +/// Finds the set of symbols and addressables reachable from any symbol +/// initially marked live. All symbols/addressables not marked live at the end +/// of this process are removed. +void prune(LinkGraph &G); } // end namespace jitlink } // end namespace llvm diff --git a/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp index 267307cfde0..9e0d207e8bd 100644 --- a/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp +++ b/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp @@ -38,9 +38,21 @@ InProcessMemoryManager::allocate(const SegmentsRequestMap &Request) { OnFinalize(applyProtections()); } Error deallocate() override { - for (auto &KV : SegBlocks) - if (auto EC = sys::Memory::releaseMappedMemory(KV.second)) - return errorCodeToError(EC); + if (SegBlocks.empty()) + return Error::success(); + void *SlabStart = SegBlocks.begin()->second.base(); + char *SlabEnd = (char *)SlabStart; + for (auto &KV : SegBlocks) { + SlabStart = std::min(SlabStart, KV.second.base()); + SlabEnd = std::max(SlabEnd, (char *)(KV.second.base()) + + KV.second.allocatedSize()); + } + size_t SlabSize = SlabEnd - (char *)SlabStart; + assert((SlabSize % sys::Process::getPageSizeEstimate()) == 0 && + "Slab size is not a multiple of page size"); + sys::MemoryBlock Slab(SlabStart, SlabSize); + if (auto EC = sys::Memory::releaseMappedMemory(Slab)) + return errorCodeToError(EC); return Error::success(); } @@ -61,37 +73,52 @@ InProcessMemoryManager::allocate(const SegmentsRequestMap &Request) { AllocationMap SegBlocks; }; + if (!isPowerOf2_64((uint64_t)sys::Process::getPageSizeEstimate())) + return make_error("Page size is not a power of 2", + inconvertibleErrorCode()); + AllocationMap Blocks; const sys::Memory::ProtectionFlags ReadWrite = static_cast(sys::Memory::MF_READ | sys::Memory::MF_WRITE); + // Compute the total number of pages to allocate. + size_t TotalSize = 0; for (auto &KV : Request) { - auto &Seg = KV.second; + const auto &Seg = KV.second; - if (Seg.getContentAlignment() > sys::Process::getPageSizeEstimate()) + if (Seg.getAlignment() > sys::Process::getPageSizeEstimate()) return make_error("Cannot request higher than page " "alignment", inconvertibleErrorCode()); - if (sys::Process::getPageSizeEstimate() % Seg.getContentAlignment() != 0) - return make_error("Page size is not a multiple of " - "alignment", - inconvertibleErrorCode()); + TotalSize = alignTo(TotalSize, sys::Process::getPageSizeEstimate()); + TotalSize += Seg.getContentSize(); + TotalSize += Seg.getZeroFillSize(); + } - uint64_t ZeroFillStart = - alignTo(Seg.getContentSize(), Seg.getZeroFillAlignment()); - uint64_t SegmentSize = ZeroFillStart + Seg.getZeroFillSize(); + // Allocate one slab to cover all the segments. + std::error_code EC; + auto SlabRemaining = + sys::Memory::allocateMappedMemory(TotalSize, nullptr, ReadWrite, EC); - std::error_code EC; - auto SegMem = - sys::Memory::allocateMappedMemory(SegmentSize, nullptr, ReadWrite, EC); + if (EC) + return errorCodeToError(EC); - if (EC) - return errorCodeToError(EC); + // Allocate segment memory from the slab. + for (auto &KV : Request) { + + const auto &Seg = KV.second; + + uint64_t SegmentSize = alignTo(Seg.getContentSize() + Seg.getZeroFillSize(), + sys::Process::getPageSizeEstimate()); + + sys::MemoryBlock SegMem(SlabRemaining.base(), SegmentSize); + SlabRemaining = sys::MemoryBlock((char *)SlabRemaining.base() + SegmentSize, + SegmentSize); // Zero out the zero-fill memory. - memset(static_cast(SegMem.base()) + ZeroFillStart, 0, + memset(static_cast(SegMem.base()) + Seg.getContentSize(), 0, Seg.getZeroFillSize()); // Record the block for this segment. diff --git a/lib/ExecutionEngine/JITLink/MachO.cpp b/lib/ExecutionEngine/JITLink/MachO.cpp index 15995b8ce98..58bc0f56e15 100644 --- a/lib/ExecutionEngine/JITLink/MachO.cpp +++ b/lib/ExecutionEngine/JITLink/MachO.cpp @@ -14,6 +14,7 @@ #include "llvm/ExecutionEngine/JITLink/MachO.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/ExecutionEngine/JITLink/MachO_arm64.h" #include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Format.h" @@ -64,6 +65,8 @@ void jitLink_MachO(std::unique_ptr Ctx) { }); switch (Header.cputype) { + case MachO::CPU_TYPE_ARM64: + return jitLink_MachO_arm64(std::move(Ctx)); case MachO::CPU_TYPE_X86_64: return jitLink_MachO_x86_64(std::move(Ctx)); } diff --git a/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp b/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp deleted file mode 100644 index 1501c7ad0bc..00000000000 --- a/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp +++ /dev/null @@ -1,411 +0,0 @@ -//=--------- MachOAtomGraphBuilder.cpp - MachO AtomGraph builder ----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Generic MachO AtomGraph buliding code. -// -//===----------------------------------------------------------------------===// - -#include "MachOAtomGraphBuilder.h" - -#define DEBUG_TYPE "jitlink" - -namespace llvm { -namespace jitlink { - -MachOAtomGraphBuilder::~MachOAtomGraphBuilder() {} - -Expected> MachOAtomGraphBuilder::buildGraph() { - if (auto Err = parseSections()) - return std::move(Err); - - if (auto Err = addAtoms()) - return std::move(Err); - - if (auto Err = addRelocations()) - return std::move(Err); - - return std::move(G); -} - -MachOAtomGraphBuilder::MachOAtomGraphBuilder(const object::MachOObjectFile &Obj) - : Obj(Obj), - G(llvm::make_unique(Obj.getFileName(), getPointerSize(Obj), - getEndianness(Obj))) {} - -void MachOAtomGraphBuilder::addCustomAtomizer(StringRef SectionName, - CustomAtomizeFunction Atomizer) { - assert(!CustomAtomizeFunctions.count(SectionName) && - "Custom atomizer for this section already exists"); - CustomAtomizeFunctions[SectionName] = std::move(Atomizer); -} - -bool MachOAtomGraphBuilder::areLayoutLocked(const Atom &A, const Atom &B) { - // If these atoms are the same then they're trivially "locked". - if (&A == &B) - return true; - - // If A and B are different, check whether either is undefined. (in which - // case they are not locked). - if (!A.isDefined() || !B.isDefined()) - return false; - - // A and B are different, but they're both defined atoms. We need to check - // whether they're part of the same alt_entry chain. - auto &DA = static_cast(A); - auto &DB = static_cast(B); - - auto AStartItr = AltEntryStarts.find(&DA); - if (AStartItr == AltEntryStarts.end()) // If A is not in a chain bail out. - return false; - - auto BStartItr = AltEntryStarts.find(&DB); - if (BStartItr == AltEntryStarts.end()) // If B is not in a chain bail out. - return false; - - // A and B are layout locked if they're in the same chain. - return AStartItr->second == BStartItr->second; -} - -unsigned -MachOAtomGraphBuilder::getPointerSize(const object::MachOObjectFile &Obj) { - return Obj.is64Bit() ? 8 : 4; -} - -support::endianness -MachOAtomGraphBuilder::getEndianness(const object::MachOObjectFile &Obj) { - return Obj.isLittleEndian() ? support::little : support::big; -} - -MachOAtomGraphBuilder::MachOSection &MachOAtomGraphBuilder::getCommonSection() { - if (!CommonSymbolsSection) { - auto Prot = static_cast( - sys::Memory::MF_READ | sys::Memory::MF_WRITE); - auto &GenericSection = G->createSection("", 1, Prot, true); - CommonSymbolsSection = MachOSection(GenericSection); - } - return *CommonSymbolsSection; -} - -Error MachOAtomGraphBuilder::parseSections() { - for (auto &SecRef : Obj.sections()) { - assert((SecRef.getAlignment() <= std::numeric_limits::max()) && - "Section alignment does not fit in 32 bits"); - - StringRef Name; - if (auto EC = SecRef.getName(Name)) - return errorCodeToError(EC); - - unsigned SectionIndex = SecRef.getIndex() + 1; - - uint32_t Align = SecRef.getAlignment(); - if (!isPowerOf2_32(Align)) - return make_error("Section " + Name + - " has non-power-of-2 " - "alignment"); - - // FIXME: Get real section permissions - // How, exactly, on MachO? - sys::Memory::ProtectionFlags Prot; - if (SecRef.isText()) - Prot = static_cast(sys::Memory::MF_READ | - sys::Memory::MF_EXEC); - else - Prot = static_cast(sys::Memory::MF_READ | - sys::Memory::MF_WRITE); - - auto &GenericSection = G->createSection(Name, Align, Prot, SecRef.isBSS()); - - LLVM_DEBUG({ - dbgs() << "Adding section " << Name << ": " - << format("0x%016" PRIx64, SecRef.getAddress()) - << ", align: " << SecRef.getAlignment() << "\n"; - }); - - assert(!Sections.count(SectionIndex) && "Section index already in use"); - - auto &MachOSec = - Sections - .try_emplace(SectionIndex, GenericSection, SecRef.getAddress(), - SecRef.getAlignment()) - .first->second; - - if (!SecRef.isVirtual()) { - // If this section has content then record it. - Expected Content = SecRef.getContents(); - if (!Content) - return Content.takeError(); - if (Content->size() != SecRef.getSize()) - return make_error("Section content size does not match " - "declared size for " + - Name); - MachOSec.setContent(*Content); - } else { - // If this is a zero-fill section then just record the size. - MachOSec.setZeroFill(SecRef.getSize()); - } - - uint32_t SectionFlags = - Obj.is64Bit() ? Obj.getSection64(SecRef.getRawDataRefImpl()).flags - : Obj.getSection(SecRef.getRawDataRefImpl()).flags; - - MachOSec.setNoDeadStrip(SectionFlags & MachO::S_ATTR_NO_DEAD_STRIP); - } - - return Error::success(); -} - -// Adds atoms with identified start addresses (but not lengths) for all named -// atoms. -// Also, for every section that contains named atoms, but does not have an -// atom at offset zero of that section, constructs an anonymous atom covering -// that range. -Error MachOAtomGraphBuilder::addNonCustomAtoms() { - using AddrToAtomMap = std::map; - DenseMap SecToAtoms; - - DenseMap FirstOrdinal; - std::vector AltEntryAtoms; - - DenseSet ProcessedSymbols; // Used to check for duplicate defs. - - for (auto SymI = Obj.symbol_begin(), SymE = Obj.symbol_end(); SymI != SymE; - ++SymI) { - object::SymbolRef Sym(SymI->getRawDataRefImpl(), &Obj); - - auto Name = Sym.getName(); - if (!Name) - return Name.takeError(); - - // Bail out on duplicate definitions: There should never be more than one - // definition for a symbol in a given object file. - if (ProcessedSymbols.count(*Name)) - return make_error("Duplicate definition within object: " + - *Name); - else - ProcessedSymbols.insert(*Name); - - auto Addr = Sym.getAddress(); - if (!Addr) - return Addr.takeError(); - - auto SymType = Sym.getType(); - if (!SymType) - return SymType.takeError(); - - auto Flags = Sym.getFlags(); - - if (Flags & object::SymbolRef::SF_Undefined) { - LLVM_DEBUG(dbgs() << "Adding undef atom \"" << *Name << "\"\n"); - G->addExternalAtom(*Name); - continue; - } else if (Flags & object::SymbolRef::SF_Absolute) { - LLVM_DEBUG(dbgs() << "Adding absolute \"" << *Name << "\" addr: " - << format("0x%016" PRIx64, *Addr) << "\n"); - auto &A = G->addAbsoluteAtom(*Name, *Addr); - A.setGlobal(Flags & object::SymbolRef::SF_Global); - A.setExported(Flags & object::SymbolRef::SF_Exported); - A.setWeak(Flags & object::SymbolRef::SF_Weak); - continue; - } else if (Flags & object::SymbolRef::SF_Common) { - LLVM_DEBUG({ - dbgs() << "Adding common \"" << *Name - << "\" addr: " << format("0x%016" PRIx64, *Addr) << "\n"; - }); - auto &A = - G->addCommonAtom(getCommonSection().getGenericSection(), *Name, *Addr, - std::max(Sym.getAlignment(), 1U), - Obj.getCommonSymbolSize(Sym.getRawDataRefImpl())); - A.setGlobal(Flags & object::SymbolRef::SF_Global); - A.setExported(Flags & object::SymbolRef::SF_Exported); - continue; - } - - LLVM_DEBUG(dbgs() << "Adding defined atom \"" << *Name << "\"\n"); - - // This atom is neither undefined nor absolute, so it must be defined in - // this object. Get its section index. - auto SecItr = Sym.getSection(); - if (!SecItr) - return SecItr.takeError(); - - uint64_t SectionIndex = (*SecItr)->getIndex() + 1; - - LLVM_DEBUG(dbgs() << " to section index " << SectionIndex << "\n"); - - auto SecByIndexItr = Sections.find(SectionIndex); - if (SecByIndexItr == Sections.end()) - return make_error("Unrecognized section index in macho"); - - auto &Sec = SecByIndexItr->second; - - auto &DA = G->addDefinedAtom(Sec.getGenericSection(), *Name, *Addr, - std::max(Sym.getAlignment(), 1U)); - - DA.setGlobal(Flags & object::SymbolRef::SF_Global); - DA.setExported(Flags & object::SymbolRef::SF_Exported); - DA.setWeak(Flags & object::SymbolRef::SF_Weak); - - DA.setCallable(*SymType & object::SymbolRef::ST_Function); - - // Check NDesc flags. - { - uint16_t NDesc = 0; - if (Obj.is64Bit()) - NDesc = Obj.getSymbol64TableEntry(SymI->getRawDataRefImpl()).n_desc; - else - NDesc = Obj.getSymbolTableEntry(SymI->getRawDataRefImpl()).n_desc; - - // Record atom for alt-entry post-processing (where the layout-next - // constraints will be added). - if (NDesc & MachO::N_ALT_ENTRY) - AltEntryAtoms.push_back(&DA); - - // If this atom has a no-dead-strip attr attached then mark it live. - if (NDesc & MachO::N_NO_DEAD_STRIP) - DA.setLive(true); - } - - LLVM_DEBUG({ - dbgs() << " Added " << *Name - << " addr: " << format("0x%016" PRIx64, *Addr) - << ", align: " << DA.getAlignment() - << ", section: " << Sec.getGenericSection().getName() << "\n"; - }); - - auto &SecAtoms = SecToAtoms[&Sec]; - SecAtoms[DA.getAddress() - Sec.getAddress()] = &DA; - } - - // Add anonymous atoms. - for (auto &KV : Sections) { - auto &S = KV.second; - - // Skip empty sections. - if (S.empty()) - continue; - - // Skip sections with custom handling. - if (CustomAtomizeFunctions.count(S.getName())) - continue; - - auto SAI = SecToAtoms.find(&S); - - // If S is not in the SecToAtoms map then it contained no named atom. Add - // one anonymous atom to cover the whole section. - if (SAI == SecToAtoms.end()) { - SecToAtoms[&S][0] = &G->addAnonymousAtom( - S.getGenericSection(), S.getAddress(), S.getAlignment()); - continue; - } - - // Otherwise, check whether this section had an atom covering offset zero. - // If not, add one. - auto &SecAtoms = SAI->second; - if (!SecAtoms.count(0)) - SecAtoms[0] = &G->addAnonymousAtom(S.getGenericSection(), S.getAddress(), - S.getAlignment()); - } - - LLVM_DEBUG(dbgs() << "MachOGraphBuilder setting atom content\n"); - - // Set atom contents and any section-based flags. - for (auto &KV : SecToAtoms) { - auto &S = *KV.first; - auto &SecAtoms = KV.second; - - // Iterate the atoms in reverse order and set up their contents. - JITTargetAddress LastAtomAddr = S.getSize(); - for (auto I = SecAtoms.rbegin(), E = SecAtoms.rend(); I != E; ++I) { - auto Offset = I->first; - auto &A = *I->second; - LLVM_DEBUG({ - dbgs() << " " << A << " to [ " << S.getAddress() + Offset << " .. " - << S.getAddress() + LastAtomAddr << " ]\n"; - }); - - if (S.isZeroFill()) - A.setZeroFill(LastAtomAddr - Offset); - else - A.setContent(S.getContent().substr(Offset, LastAtomAddr - Offset)); - - // If the section has no-dead-strip set then mark the atom as live. - if (S.isNoDeadStrip()) - A.setLive(true); - - LastAtomAddr = Offset; - } - } - - LLVM_DEBUG(dbgs() << "Adding alt-entry starts\n"); - - // Sort alt-entry atoms by address in ascending order. - llvm::sort(AltEntryAtoms.begin(), AltEntryAtoms.end(), - [](const DefinedAtom *LHS, const DefinedAtom *RHS) { - return LHS->getAddress() < RHS->getAddress(); - }); - - // Process alt-entry atoms in address order to build the table of alt-entry - // atoms to alt-entry chain starts. - for (auto *DA : AltEntryAtoms) { - assert(!AltEntryStarts.count(DA) && "Duplicate entry in AltEntryStarts"); - - // DA is an alt-entry atom. Look for the predecessor atom that it is locked - // to, bailing out if we do not find one. - auto AltEntryPred = G->findAtomByAddress(DA->getAddress() - 1); - if (!AltEntryPred) - return AltEntryPred.takeError(); - - // Add a LayoutNext edge from the predecessor to this atom. - AltEntryPred->setLayoutNext(*DA); - - // Check to see whether the predecessor itself is an alt-entry atom. - auto AltEntryStartItr = AltEntryStarts.find(&*AltEntryPred); - if (AltEntryStartItr != AltEntryStarts.end()) { - // If the predecessor was an alt-entry atom then re-use its value. - LLVM_DEBUG({ - dbgs() << " " << *DA << " -> " << *AltEntryStartItr->second - << " (based on existing entry for " << *AltEntryPred << ")\n"; - }); - AltEntryStarts[DA] = AltEntryStartItr->second; - } else { - // If the predecessor does not have an entry then add an entry for this - // atom (i.e. the alt_entry atom) and a self-reference entry for the - /// predecessory atom that is the start of this chain. - LLVM_DEBUG({ - dbgs() << " " << *AltEntryPred << " -> " << *AltEntryPred << "\n" - << " " << *DA << " -> " << *AltEntryPred << "\n"; - }); - AltEntryStarts[&*AltEntryPred] = &*AltEntryPred; - AltEntryStarts[DA] = &*AltEntryPred; - } - } - - return Error::success(); -} - -Error MachOAtomGraphBuilder::addAtoms() { - // Add all named atoms. - if (auto Err = addNonCustomAtoms()) - return Err; - - // Process special sections. - for (auto &KV : Sections) { - auto &S = KV.second; - auto HI = CustomAtomizeFunctions.find(S.getGenericSection().getName()); - if (HI != CustomAtomizeFunctions.end()) { - auto &Atomize = HI->second; - if (auto Err = Atomize(S)) - return Err; - } - } - - return Error::success(); -} - -} // end namespace jitlink -} // end namespace llvm diff --git a/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h b/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h deleted file mode 100644 index 72d441b24d0..00000000000 --- a/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h +++ /dev/null @@ -1,138 +0,0 @@ -//===----- MachOAtomGraphBuilder.h - MachO AtomGraph builder ----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Generic MachO AtomGraph building code. -// -//===----------------------------------------------------------------------===// - -#ifndef LIB_EXECUTIONENGINE_JITLINK_MACHOATOMGRAPHBUILDER_H -#define LIB_EXECUTIONENGINE_JITLINK_MACHOATOMGRAPHBUILDER_H - -#include "llvm/ExecutionEngine/JITLink/JITLink.h" - -#include "JITLinkGeneric.h" - -#include "llvm/Object/MachO.h" - -namespace llvm { -namespace jitlink { - -class MachOAtomGraphBuilder { -public: - virtual ~MachOAtomGraphBuilder(); - Expected> buildGraph(); - -protected: - using OffsetToAtomMap = std::map; - - class MachOSection { - public: - MachOSection() = default; - - /// Create a MachO section with the given address and alignment. - MachOSection(Section &GenericSection, JITTargetAddress Address, - unsigned Alignment) - : Address(Address), GenericSection(&GenericSection), - Alignment(Alignment) {} - - /// Create a section without address, content or size (used for common - /// symbol sections). - MachOSection(Section &GenericSection) : GenericSection(&GenericSection) {} - - Section &getGenericSection() const { - assert(GenericSection && "Section is null"); - return *GenericSection; - } - - StringRef getName() const { - assert(GenericSection && "No generic section attached"); - return GenericSection->getName(); - } - - MachOSection &setContent(StringRef Content) { - assert(!ContentPtr && !Size && "Content/zeroFill already set"); - ContentPtr = Content.data(); - Size = Content.size(); - return *this; - } - - MachOSection &setZeroFill(uint64_t Size) { - assert(!ContentPtr && !this->Size && "Content/zeroFill already set"); - this->Size = Size; - return *this; - } - - bool isZeroFill() const { return !ContentPtr; } - - bool empty() const { return getSize() == 0; } - - size_t getSize() const { return Size; } - - StringRef getContent() const { - assert(ContentPtr && "getContent() called on zero-fill section"); - return {ContentPtr, static_cast(Size)}; - } - - JITTargetAddress getAddress() const { return Address; } - - unsigned getAlignment() const { return Alignment; } - - MachOSection &setNoDeadStrip(bool NoDeadStrip) { - this->NoDeadStrip = NoDeadStrip; - return *this; - } - - bool isNoDeadStrip() const { return NoDeadStrip; } - - private: - JITTargetAddress Address = 0; - Section *GenericSection = nullptr; - const char *ContentPtr = nullptr; - uint64_t Size = 0; - unsigned Alignment = 0; - bool NoDeadStrip = false; - }; - - using CustomAtomizeFunction = std::function; - - MachOAtomGraphBuilder(const object::MachOObjectFile &Obj); - - AtomGraph &getGraph() const { return *G; } - - const object::MachOObjectFile &getObject() const { return Obj; } - - void addCustomAtomizer(StringRef SectionName, CustomAtomizeFunction Atomizer); - - virtual Error addRelocations() = 0; - - /// Returns true if Atom A and Atom B are at a fixed offset from one another - /// (i.e. if they're part of the same alt-entry chain). - bool areLayoutLocked(const Atom &A, const Atom &B); - -private: - static unsigned getPointerSize(const object::MachOObjectFile &Obj); - static support::endianness getEndianness(const object::MachOObjectFile &Obj); - - MachOSection &getCommonSection(); - - Error parseSections(); - Error addNonCustomAtoms(); - Error addAtoms(); - - const object::MachOObjectFile &Obj; - std::unique_ptr G; - DenseMap AltEntryStarts; - DenseMap Sections; - StringMap CustomAtomizeFunctions; - Optional CommonSymbolsSection; -}; - -} // end namespace jitlink -} // end namespace llvm - -#endif // LIB_EXECUTIONENGINE_JITLINK_MACHOATOMGRAPHBUILDER_H diff --git a/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp b/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp new file mode 100644 index 00000000000..7366f53ebf3 --- /dev/null +++ b/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp @@ -0,0 +1,535 @@ +//=--------- MachOLinkGraphBuilder.cpp - MachO LinkGraph builder ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generic MachO LinkGraph buliding code. +// +//===----------------------------------------------------------------------===// + +#include "MachOLinkGraphBuilder.h" + +#define DEBUG_TYPE "jitlink" + +static const char *CommonSectionName = "__common"; + +namespace llvm { +namespace jitlink { + +MachOLinkGraphBuilder::~MachOLinkGraphBuilder() {} + +Expected> MachOLinkGraphBuilder::buildGraph() { + + // Sanity check: we only operate on relocatable objects. + if (!Obj.isRelocatableObject()) + return make_error("Object is not a relocatable MachO"); + + if (auto Err = createNormalizedSections()) + return std::move(Err); + + if (auto Err = createNormalizedSymbols()) + return std::move(Err); + + if (auto Err = graphifyRegularSymbols()) + return std::move(Err); + + if (auto Err = graphifySectionsWithCustomParsers()) + return std::move(Err); + + if (auto Err = addRelocations()) + return std::move(Err); + + return std::move(G); +} + +MachOLinkGraphBuilder::MachOLinkGraphBuilder(const object::MachOObjectFile &Obj) + : Obj(Obj), + G(std::make_unique(Obj.getFileName(), getPointerSize(Obj), + getEndianness(Obj))) {} + +void MachOLinkGraphBuilder::addCustomSectionParser( + StringRef SectionName, SectionParserFunction Parser) { + assert(!CustomSectionParserFunctions.count(SectionName) && + "Custom parser for this section already exists"); + CustomSectionParserFunctions[SectionName] = std::move(Parser); +} + +Linkage MachOLinkGraphBuilder::getLinkage(uint16_t Desc) { + if ((Desc & MachO::N_WEAK_DEF) || (Desc & MachO::N_WEAK_REF)) + return Linkage::Weak; + return Linkage::Strong; +} + +Scope MachOLinkGraphBuilder::getScope(StringRef Name, uint8_t Type) { + if (Name.startswith("l")) + return Scope::Local; + if (Type & MachO::N_PEXT) + return Scope::Hidden; + if (Type & MachO::N_EXT) + return Scope::Default; + return Scope::Local; +} + +bool MachOLinkGraphBuilder::isAltEntry(const NormalizedSymbol &NSym) { + return NSym.Desc & MachO::N_ALT_ENTRY; +} + +unsigned +MachOLinkGraphBuilder::getPointerSize(const object::MachOObjectFile &Obj) { + return Obj.is64Bit() ? 8 : 4; +} + +support::endianness +MachOLinkGraphBuilder::getEndianness(const object::MachOObjectFile &Obj) { + return Obj.isLittleEndian() ? support::little : support::big; +} + +Section &MachOLinkGraphBuilder::getCommonSection() { + if (!CommonSection) { + auto Prot = static_cast( + sys::Memory::MF_READ | sys::Memory::MF_WRITE); + CommonSection = &G->createSection(CommonSectionName, Prot); + } + return *CommonSection; +} + +Error MachOLinkGraphBuilder::createNormalizedSections() { + // Build normalized sections. Verifies that section data is in-range (for + // sections with content) and that address ranges are non-overlapping. + + LLVM_DEBUG(dbgs() << "Creating normalized sections...\n"); + + for (auto &SecRef : Obj.sections()) { + NormalizedSection NSec; + uint32_t DataOffset = 0; + + auto SecIndex = Obj.getSectionIndex(SecRef.getRawDataRefImpl()); + + auto Name = SecRef.getName(); + if (!Name) + return Name.takeError(); + + if (Obj.is64Bit()) { + const MachO::section_64 &Sec64 = + Obj.getSection64(SecRef.getRawDataRefImpl()); + + NSec.Address = Sec64.addr; + NSec.Size = Sec64.size; + NSec.Alignment = 1ULL << Sec64.align; + NSec.Flags = Sec64.flags; + DataOffset = Sec64.offset; + } else { + const MachO::section &Sec32 = Obj.getSection(SecRef.getRawDataRefImpl()); + NSec.Address = Sec32.addr; + NSec.Size = Sec32.size; + NSec.Alignment = 1ULL << Sec32.align; + NSec.Flags = Sec32.flags; + DataOffset = Sec32.offset; + } + + LLVM_DEBUG({ + dbgs() << " " << *Name << ": " << formatv("{0:x16}", NSec.Address) + << " -- " << formatv("{0:x16}", NSec.Address + NSec.Size) + << ", align: " << NSec.Alignment << ", index: " << SecIndex + << "\n"; + }); + + // Get the section data if any. + { + unsigned SectionType = NSec.Flags & MachO::SECTION_TYPE; + if (SectionType != MachO::S_ZEROFILL && + SectionType != MachO::S_GB_ZEROFILL) { + + if (DataOffset + NSec.Size > Obj.getData().size()) + return make_error( + "Section data extends past end of file"); + + NSec.Data = Obj.getData().data() + DataOffset; + } + } + + // Get prot flags. + // FIXME: Make sure this test is correct (it's probably missing cases + // as-is). + sys::Memory::ProtectionFlags Prot; + if (NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) + Prot = static_cast(sys::Memory::MF_READ | + sys::Memory::MF_EXEC); + else + Prot = static_cast(sys::Memory::MF_READ | + sys::Memory::MF_WRITE); + + NSec.GraphSection = &G->createSection(*Name, Prot); + IndexToSection.insert(std::make_pair(SecIndex, std::move(NSec))); + } + + std::vector Sections; + Sections.reserve(IndexToSection.size()); + for (auto &KV : IndexToSection) + Sections.push_back(&KV.second); + + // If we didn't end up creating any sections then bail out. The code below + // assumes that we have at least one section. + if (Sections.empty()) + return Error::success(); + + llvm::sort(Sections, + [](const NormalizedSection *LHS, const NormalizedSection *RHS) { + assert(LHS && RHS && "Null section?"); + return LHS->Address < RHS->Address; + }); + + for (unsigned I = 0, E = Sections.size() - 1; I != E; ++I) { + auto &Cur = *Sections[I]; + auto &Next = *Sections[I + 1]; + if (Next.Address < Cur.Address + Cur.Size) + return make_error( + "Address range for section " + Cur.GraphSection->getName() + + formatv(" [ {0:x16} -- {1:x16} ] ", Cur.Address, + Cur.Address + Cur.Size) + + "overlaps " + + formatv(" [ {0:x16} -- {1:x16} ] ", Next.Address, + Next.Address + Next.Size)); + } + + return Error::success(); +} + +Error MachOLinkGraphBuilder::createNormalizedSymbols() { + LLVM_DEBUG(dbgs() << "Creating normalized symbols...\n"); + + for (auto &SymRef : Obj.symbols()) { + + unsigned SymbolIndex = Obj.getSymbolIndex(SymRef.getRawDataRefImpl()); + uint64_t Value; + uint32_t NStrX; + uint8_t Type; + uint8_t Sect; + uint16_t Desc; + + if (Obj.is64Bit()) { + const MachO::nlist_64 &NL64 = + Obj.getSymbol64TableEntry(SymRef.getRawDataRefImpl()); + Value = NL64.n_value; + NStrX = NL64.n_strx; + Type = NL64.n_type; + Sect = NL64.n_sect; + Desc = NL64.n_desc; + } else { + const MachO::nlist &NL32 = + Obj.getSymbolTableEntry(SymRef.getRawDataRefImpl()); + Value = NL32.n_value; + NStrX = NL32.n_strx; + Type = NL32.n_type; + Sect = NL32.n_sect; + Desc = NL32.n_desc; + } + + // Skip stabs. + // FIXME: Are there other symbols we should be skipping? + if (Type & MachO::N_STAB) + continue; + + Optional Name; + if (NStrX) { + if (auto NameOrErr = SymRef.getName()) + Name = *NameOrErr; + else + return NameOrErr.takeError(); + } + + LLVM_DEBUG({ + dbgs() << " "; + if (!Name) + dbgs() << ""; + else + dbgs() << *Name; + dbgs() << ": value = " << formatv("{0:x16}", Value) + << ", type = " << formatv("{0:x2}", Type) + << ", desc = " << formatv("{0:x4}", Desc) << ", sect = "; + if (Sect) + dbgs() << static_cast(Sect - 1); + else + dbgs() << "none"; + dbgs() << "\n"; + }); + + // If this symbol has a section, sanity check that the addresses line up. + NormalizedSection *NSec = nullptr; + if (Sect != 0) { + if (auto NSecOrErr = findSectionByIndex(Sect - 1)) + NSec = &*NSecOrErr; + else + return NSecOrErr.takeError(); + + if (Value < NSec->Address || Value > NSec->Address + NSec->Size) + return make_error("Symbol address does not fall within " + "section"); + } + + IndexToSymbol[SymbolIndex] = + &createNormalizedSymbol(*Name, Value, Type, Sect, Desc, + getLinkage(Type), getScope(*Name, Type)); + } + + return Error::success(); +} + +void MachOLinkGraphBuilder::addSectionStartSymAndBlock( + Section &GraphSec, uint64_t Address, const char *Data, uint64_t Size, + uint32_t Alignment, bool IsLive) { + Block &B = + Data ? G->createContentBlock(GraphSec, StringRef(Data, Size), Address, + Alignment, 0) + : G->createZeroFillBlock(GraphSec, Size, Address, Alignment, 0); + auto &Sym = G->addAnonymousSymbol(B, 0, Size, false, IsLive); + assert(!AddrToCanonicalSymbol.count(Sym.getAddress()) && + "Anonymous block start symbol clashes with existing symbol address"); + AddrToCanonicalSymbol[Sym.getAddress()] = &Sym; +} + +Error MachOLinkGraphBuilder::graphifyRegularSymbols() { + + LLVM_DEBUG(dbgs() << "Creating graph symbols...\n"); + + /// We only have 256 section indexes: Use a vector rather than a map. + std::vector> SecIndexToSymbols; + SecIndexToSymbols.resize(256); + + // Create commons, externs, and absolutes, and partition all other symbols by + // section. + for (auto &KV : IndexToSymbol) { + auto &NSym = *KV.second; + + switch (NSym.Type & MachO::N_TYPE) { + case MachO::N_UNDF: + if (NSym.Value) { + if (!NSym.Name) + return make_error("Anonymous common symbol at index " + + Twine(KV.first)); + NSym.GraphSymbol = &G->addCommonSymbol( + *NSym.Name, NSym.S, getCommonSection(), NSym.Value, 0, + 1ull << MachO::GET_COMM_ALIGN(NSym.Desc), + NSym.Desc & MachO::N_NO_DEAD_STRIP); + } else { + if (!NSym.Name) + return make_error("Anonymous external symbol at " + "index " + + Twine(KV.first)); + NSym.GraphSymbol = &G->addExternalSymbol(*NSym.Name, 0); + } + break; + case MachO::N_ABS: + if (!NSym.Name) + return make_error("Anonymous absolute symbol at index " + + Twine(KV.first)); + NSym.GraphSymbol = &G->addAbsoluteSymbol( + *NSym.Name, NSym.Value, 0, Linkage::Strong, Scope::Default, + NSym.Desc & MachO::N_NO_DEAD_STRIP); + break; + case MachO::N_SECT: + SecIndexToSymbols[NSym.Sect - 1].push_back(&NSym); + break; + case MachO::N_PBUD: + return make_error( + "Unupported N_PBUD symbol " + + (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("")) + + " at index " + Twine(KV.first)); + case MachO::N_INDR: + return make_error( + "Unupported N_INDR symbol " + + (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("")) + + " at index " + Twine(KV.first)); + default: + return make_error( + "Unrecognized symbol type " + Twine(NSym.Type & MachO::N_TYPE) + + " for symbol " + + (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("")) + + " at index " + Twine(KV.first)); + } + } + + // Loop over sections performing regular graphification for those that + // don't have custom parsers. + for (auto &KV : IndexToSection) { + auto SecIndex = KV.first; + auto &NSec = KV.second; + + // Skip sections with custom parsers. + if (CustomSectionParserFunctions.count(NSec.GraphSection->getName())) { + LLVM_DEBUG({ + dbgs() << " Skipping section " << NSec.GraphSection->getName() + << " as it has a custom parser.\n"; + }); + continue; + } else + LLVM_DEBUG({ + dbgs() << " Processing section " << NSec.GraphSection->getName() + << "...\n"; + }); + + bool SectionIsNoDeadStrip = NSec.Flags & MachO::S_ATTR_NO_DEAD_STRIP; + bool SectionIsText = NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS; + + auto &SecNSymStack = SecIndexToSymbols[SecIndex]; + + // If this section is non-empty but there are no symbols covering it then + // create one block and anonymous symbol to cover the entire section. + if (SecNSymStack.empty()) { + if (NSec.Size > 0) { + LLVM_DEBUG({ + dbgs() << " Section non-empty, but contains no symbols. " + "Creating anonymous block to cover " + << formatv("{0:x16}", NSec.Address) << " -- " + << formatv("{0:x16}", NSec.Address + NSec.Size) << "\n"; + }); + addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data, + NSec.Size, NSec.Alignment, + SectionIsNoDeadStrip); + } else + LLVM_DEBUG({ + dbgs() << " Section empty and contains no symbols. Skipping.\n"; + }); + continue; + } + + // Sort the symbol stack in by address, alt-entry status, scope, and name. + // We sort in reverse order so that symbols will be visited in the right + // order when we pop off the stack below. + llvm::sort(SecNSymStack, [](const NormalizedSymbol *LHS, + const NormalizedSymbol *RHS) { + if (LHS->Value != RHS->Value) + return LHS->Value > RHS->Value; + if (isAltEntry(*LHS) != isAltEntry(*RHS)) + return isAltEntry(*RHS); + if (LHS->S != RHS->S) + return static_cast(LHS->S) < static_cast(RHS->S); + return LHS->Name < RHS->Name; + }); + + // The first symbol in a section can not be an alt-entry symbol. + if (!SecNSymStack.empty() && isAltEntry(*SecNSymStack.back())) + return make_error( + "First symbol in " + NSec.GraphSection->getName() + " is alt-entry"); + + // If the section is non-empty but there is no symbol covering the start + // address then add an anonymous one. + if (SecNSymStack.back()->Value != NSec.Address) { + auto AnonBlockSize = SecNSymStack.back()->Value - NSec.Address; + LLVM_DEBUG({ + dbgs() << " Section start not covered by symbol. " + << "Creating anonymous block to cover [ " + << formatv("{0:x16}", NSec.Address) << " -- " + << formatv("{0:x16}", NSec.Address + AnonBlockSize) << " ]\n"; + }); + addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data, + AnonBlockSize, NSec.Alignment, + SectionIsNoDeadStrip); + } + + // Visit section symbols in order by popping off the reverse-sorted stack, + // building blocks for each alt-entry chain and creating symbols as we go. + while (!SecNSymStack.empty()) { + SmallVector BlockSyms; + + BlockSyms.push_back(SecNSymStack.back()); + SecNSymStack.pop_back(); + while (!SecNSymStack.empty() && + (isAltEntry(*SecNSymStack.back()) || + SecNSymStack.back()->Value == BlockSyms.back()->Value)) { + BlockSyms.push_back(SecNSymStack.back()); + SecNSymStack.pop_back(); + } + + // BlockNSyms now contains the block symbols in reverse canonical order. + JITTargetAddress BlockStart = BlockSyms.front()->Value; + JITTargetAddress BlockEnd = SecNSymStack.empty() + ? NSec.Address + NSec.Size + : SecNSymStack.back()->Value; + JITTargetAddress BlockOffset = BlockStart - NSec.Address; + JITTargetAddress BlockSize = BlockEnd - BlockStart; + + LLVM_DEBUG({ + dbgs() << " Creating block for " << formatv("{0:x16}", BlockStart) + << " -- " << formatv("{0:x16}", BlockEnd) << ": " + << NSec.GraphSection->getName() << " + " + << formatv("{0:x16}", BlockOffset) << " with " + << BlockSyms.size() << " symbol(s)...\n"; + }); + + Block &B = + NSec.Data + ? G->createContentBlock( + *NSec.GraphSection, + StringRef(NSec.Data + BlockOffset, BlockSize), BlockStart, + NSec.Alignment, BlockStart % NSec.Alignment) + : G->createZeroFillBlock(*NSec.GraphSection, BlockSize, + BlockStart, NSec.Alignment, + BlockStart % NSec.Alignment); + + Optional LastCanonicalAddr; + JITTargetAddress SymEnd = BlockEnd; + while (!BlockSyms.empty()) { + auto &NSym = *BlockSyms.back(); + BlockSyms.pop_back(); + + bool SymLive = + (NSym.Desc & MachO::N_NO_DEAD_STRIP) || SectionIsNoDeadStrip; + + LLVM_DEBUG({ + dbgs() << " " << formatv("{0:x16}", NSym.Value) << " -- " + << formatv("{0:x16}", SymEnd) << ": "; + if (!NSym.Name) + dbgs() << ""; + else + dbgs() << NSym.Name; + if (SymLive) + dbgs() << " [no-dead-strip]"; + if (LastCanonicalAddr == NSym.Value) + dbgs() << " [non-canonical]"; + dbgs() << "\n"; + }); + + auto &Sym = + NSym.Name + ? G->addDefinedSymbol(B, NSym.Value - BlockStart, *NSym.Name, + SymEnd - NSym.Value, NSym.L, NSym.S, + SectionIsText, SymLive) + : G->addAnonymousSymbol(B, NSym.Value - BlockStart, + SymEnd - NSym.Value, SectionIsText, + SymLive); + NSym.GraphSymbol = &Sym; + if (LastCanonicalAddr != Sym.getAddress()) { + if (LastCanonicalAddr) + SymEnd = *LastCanonicalAddr; + LastCanonicalAddr = Sym.getAddress(); + setCanonicalSymbol(Sym); + } + } + } + } + + return Error::success(); +} + +Error MachOLinkGraphBuilder::graphifySectionsWithCustomParsers() { + // Graphify special sections. + for (auto &KV : IndexToSection) { + auto &NSec = KV.second; + + auto HI = CustomSectionParserFunctions.find(NSec.GraphSection->getName()); + if (HI != CustomSectionParserFunctions.end()) { + auto &Parse = HI->second; + if (auto Err = Parse(NSec)) + return Err; + } + } + + return Error::success(); +} + +} // end namespace jitlink +} // end namespace llvm diff --git a/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h b/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h new file mode 100644 index 00000000000..e1123cd1104 --- /dev/null +++ b/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h @@ -0,0 +1,269 @@ +//===----- MachOLinkGraphBuilder.h - MachO LinkGraph builder ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generic MachO LinkGraph building code. +// +//===----------------------------------------------------------------------===// + +#ifndef LIB_EXECUTIONENGINE_JITLINK_MACHOLINKGRAPHBUILDER_H +#define LIB_EXECUTIONENGINE_JITLINK_MACHOLINKGRAPHBUILDER_H + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" + +#include "EHFrameSupportImpl.h" +#include "JITLinkGeneric.h" +#include "llvm/Object/MachO.h" + +#include + +namespace llvm { +namespace jitlink { + +class MachOLinkGraphBuilder { +public: + virtual ~MachOLinkGraphBuilder(); + Expected> buildGraph(); + +protected: + class MachOEHFrameBinaryParser : public EHFrameBinaryParser { + public: + MachOEHFrameBinaryParser(MachOLinkGraphBuilder &Builder, + JITTargetAddress EHFrameAddress, + StringRef EHFrameContent, Section &EHFrameSection, + uint64_t CIEAlignment, uint64_t FDEAlignment, + Edge::Kind FDEToCIERelocKind, + Edge::Kind FDEToTargetRelocKind) + : EHFrameBinaryParser(EHFrameAddress, EHFrameContent, + Builder.getGraph().getPointerSize(), + Builder.getGraph().getEndianness()), + Builder(Builder), EHFrameSection(EHFrameSection), + CIEAlignment(CIEAlignment), FDEAlignment(FDEAlignment), + FDEToCIERelocKind(FDEToCIERelocKind), + FDEToTargetRelocKind(FDEToTargetRelocKind) {} + + Symbol *getSymbolAtAddress(JITTargetAddress Address) override { + if (auto *Sym = Builder.getSymbolByAddress(Address)) + if (Sym->getAddress() == Address) + return Sym; + return nullptr; + } + + Symbol &createCIERecord(JITTargetAddress RecordAddr, + StringRef RecordContent) override { + auto &G = Builder.getGraph(); + auto &B = G.createContentBlock(EHFrameSection, RecordContent, RecordAddr, + CIEAlignment, 0); + auto &CIESymbol = + G.addAnonymousSymbol(B, 0, RecordContent.size(), false, false); + Builder.setCanonicalSymbol(CIESymbol); + return CIESymbol; + } + + Expected createFDERecord(JITTargetAddress RecordAddr, + StringRef RecordContent, Symbol &CIE, + size_t CIEOffset, Symbol &Func, + size_t FuncOffset, Symbol *LSDA, + size_t LSDAOffset) override { + auto &G = Builder.getGraph(); + auto &B = G.createContentBlock(EHFrameSection, RecordContent, RecordAddr, + FDEAlignment, 0); + + // Add edges to CIE, Func, and (conditionally) LSDA. + B.addEdge(FDEToCIERelocKind, CIEOffset, CIE, 0); + B.addEdge(FDEToTargetRelocKind, FuncOffset, Func, 0); + + if (LSDA) + B.addEdge(FDEToTargetRelocKind, LSDAOffset, *LSDA, 0); + + auto &FDESymbol = + G.addAnonymousSymbol(B, 0, RecordContent.size(), false, false); + + // Add a keep-alive relocation from the function to the FDE to ensure it + // is not dead stripped. + Func.getBlock().addEdge(Edge::KeepAlive, 0, FDESymbol, 0); + + return FDESymbol; + } + + private: + MachOLinkGraphBuilder &Builder; + Section &EHFrameSection; + uint64_t CIEAlignment; + uint64_t FDEAlignment; + Edge::Kind FDEToCIERelocKind; + Edge::Kind FDEToTargetRelocKind; + }; + + struct NormalizedSymbol { + friend class MachOLinkGraphBuilder; + + private: + NormalizedSymbol(Optional Name, uint64_t Value, uint8_t Type, + uint8_t Sect, uint16_t Desc, Linkage L, Scope S) + : Name(Name), Value(Value), Type(Type), Sect(Sect), Desc(Desc), L(L), + S(S) { + assert((!Name || !Name->empty()) && "Name must be none or non-empty"); + } + + public: + NormalizedSymbol(const NormalizedSymbol &) = delete; + NormalizedSymbol &operator=(const NormalizedSymbol &) = delete; + NormalizedSymbol(NormalizedSymbol &&) = delete; + NormalizedSymbol &operator=(NormalizedSymbol &&) = delete; + + Optional Name; + uint64_t Value = 0; + uint8_t Type = 0; + uint8_t Sect = 0; + uint16_t Desc = 0; + Linkage L = Linkage::Strong; + Scope S = Scope::Default; + Symbol *GraphSymbol = nullptr; + }; + + class NormalizedSection { + friend class MachOLinkGraphBuilder; + + private: + NormalizedSection() = default; + + public: + Section *GraphSection = nullptr; + uint64_t Address = 0; + uint64_t Size = 0; + uint64_t Alignment = 0; + uint32_t Flags = 0; + const char *Data = nullptr; + }; + + using SectionParserFunction = std::function; + + MachOLinkGraphBuilder(const object::MachOObjectFile &Obj); + + LinkGraph &getGraph() const { return *G; } + + const object::MachOObjectFile &getObject() const { return Obj; } + + void addCustomSectionParser(StringRef SectionName, + SectionParserFunction Parse); + + virtual Error addRelocations() = 0; + + /// Create a symbol. + template + NormalizedSymbol &createNormalizedSymbol(ArgTs &&... Args) { + NormalizedSymbol *Sym = reinterpret_cast( + Allocator.Allocate()); + new (Sym) NormalizedSymbol(std::forward(Args)...); + return *Sym; + } + + /// Index is zero-based (MachO section indexes are usually one-based) and + /// assumed to be in-range. Client is responsible for checking. + NormalizedSection &getSectionByIndex(unsigned Index) { + auto I = IndexToSection.find(Index); + assert(I != IndexToSection.end() && "No section recorded at index"); + return I->second; + } + + /// Try to get the section at the given index. Will return an error if the + /// given index is out of range, or if no section has been added for the given + /// index. + Expected findSectionByIndex(unsigned Index) { + auto I = IndexToSection.find(Index); + if (I == IndexToSection.end()) + return make_error("No section recorded for index " + + formatv("{0:u}", Index)); + return I->second; + } + + /// Try to get the symbol at the given index. Will return an error if the + /// given index is out of range, or if no symbol has been added for the given + /// index. + Expected findSymbolByIndex(uint64_t Index) { + if (Index >= IndexToSymbol.size()) + return make_error("Symbol index out of range"); + auto *Sym = IndexToSymbol[Index]; + if (!Sym) + return make_error("No symbol at index " + + formatv("{0:u}", Index)); + return *Sym; + } + + /// Returns the symbol with the highest address not greater than the search + /// address, or null if no such symbol exists. + Symbol *getSymbolByAddress(JITTargetAddress Address) { + auto I = AddrToCanonicalSymbol.upper_bound(Address); + if (I == AddrToCanonicalSymbol.begin()) + return nullptr; + return std::prev(I)->second; + } + + /// Returns the symbol with the highest address not greater than the search + /// address, or an error if no such symbol exists. + Expected findSymbolByAddress(JITTargetAddress Address) { + auto *Sym = getSymbolByAddress(Address); + if (Sym) + if (Address < Sym->getAddress() + Sym->getSize()) + return *Sym; + return make_error("No symbol covering address " + + formatv("{0:x16}", Address)); + } + + static Linkage getLinkage(uint16_t Desc); + static Scope getScope(StringRef Name, uint8_t Type); + static bool isAltEntry(const NormalizedSymbol &NSym); + +private: + static unsigned getPointerSize(const object::MachOObjectFile &Obj); + static support::endianness getEndianness(const object::MachOObjectFile &Obj); + + void setCanonicalSymbol(Symbol &Sym) { + auto *&CanonicalSymEntry = AddrToCanonicalSymbol[Sym.getAddress()]; + // There should be no symbol at this address, or, if there is, + // it should be a zero-sized symbol from an empty section (which + // we can safely override). + assert((!CanonicalSymEntry || CanonicalSymEntry->getSize() == 0) && + "Duplicate canonical symbol at address"); + CanonicalSymEntry = &Sym; + } + + Section &getCommonSection(); + void addSectionStartSymAndBlock(Section &GraphSec, uint64_t Address, + const char *Data, uint64_t Size, + uint32_t Alignment, bool IsLive); + + Error createNormalizedSections(); + Error createNormalizedSymbols(); + + /// Create graph blocks and symbols for externals, absolutes, commons and + /// all defined symbols in sections without custom parsers. + Error graphifyRegularSymbols(); + + /// Create graph blocks and symbols for all sections. + Error graphifySectionsWithCustomParsers(); + + // Put the BumpPtrAllocator first so that we don't free any of the underlying + // memory until the Symbol/Addressable destructors have been run. + BumpPtrAllocator Allocator; + + const object::MachOObjectFile &Obj; + std::unique_ptr G; + + DenseMap IndexToSection; + Section *CommonSection = nullptr; + + DenseMap IndexToSymbol; + std::map AddrToCanonicalSymbol; + StringMap CustomSectionParserFunctions; +}; + +} // end namespace jitlink +} // end namespace llvm + +#endif // LIB_EXECUTIONENGINE_JITLINK_MACHOLINKGRAPHBUILDER_H diff --git a/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/lib/ExecutionEngine/JITLink/MachO_arm64.cpp new file mode 100644 index 00000000000..945343bff89 --- /dev/null +++ b/lib/ExecutionEngine/JITLink/MachO_arm64.cpp @@ -0,0 +1,736 @@ +//===---- MachO_arm64.cpp - JIT linker implementation for MachO/arm64 -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// MachO/arm64 jit-link implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/JITLink/MachO_arm64.h" + +#include "BasicGOTAndStubsBuilder.h" +#include "MachOLinkGraphBuilder.h" + +#define DEBUG_TYPE "jitlink" + +using namespace llvm; +using namespace llvm::jitlink; +using namespace llvm::jitlink::MachO_arm64_Edges; + +namespace { + +class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder { +public: + MachOLinkGraphBuilder_arm64(const object::MachOObjectFile &Obj) + : MachOLinkGraphBuilder(Obj), + NumSymbols(Obj.getSymtabLoadCommand().nsyms) { + addCustomSectionParser( + "__eh_frame", [this](NormalizedSection &EHFrameSection) { + if (!EHFrameSection.Data) + return make_error( + "__eh_frame section is marked zero-fill"); + return MachOEHFrameBinaryParser( + *this, EHFrameSection.Address, + StringRef(EHFrameSection.Data, EHFrameSection.Size), + *EHFrameSection.GraphSection, 8, 4, NegDelta32, Delta64) + .addToGraph(); + }); + } + +private: + static Expected + getRelocationKind(const MachO::relocation_info &RI) { + switch (RI.r_type) { + case MachO::ARM64_RELOC_UNSIGNED: + if (!RI.r_pcrel) { + if (RI.r_length == 3) + return RI.r_extern ? Pointer64 : Pointer64Anon; + else if (RI.r_length == 2) + return Pointer32; + } + break; + case MachO::ARM64_RELOC_SUBTRACTOR: + // SUBTRACTOR must be non-pc-rel, extern, with length 2 or 3. + // Initially represent SUBTRACTOR relocations with 'Delta'. + // They may be turned into NegDelta by parsePairRelocation. + if (!RI.r_pcrel && RI.r_extern) { + if (RI.r_length == 2) + return Delta32; + else if (RI.r_length == 3) + return Delta64; + } + break; + case MachO::ARM64_RELOC_BRANCH26: + if (RI.r_pcrel && RI.r_extern && RI.r_length == 2) + return Branch26; + break; + case MachO::ARM64_RELOC_PAGE21: + if (RI.r_pcrel && RI.r_extern && RI.r_length == 2) + return Page21; + break; + case MachO::ARM64_RELOC_PAGEOFF12: + if (!RI.r_pcrel && RI.r_extern && RI.r_length == 2) + return PageOffset12; + break; + case MachO::ARM64_RELOC_GOT_LOAD_PAGE21: + if (RI.r_pcrel && RI.r_extern && RI.r_length == 2) + return GOTPage21; + break; + case MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12: + if (!RI.r_pcrel && RI.r_extern && RI.r_length == 2) + return GOTPageOffset12; + break; + case MachO::ARM64_RELOC_POINTER_TO_GOT: + if (RI.r_pcrel && RI.r_extern && RI.r_length == 2) + return PointerToGOT; + break; + case MachO::ARM64_RELOC_ADDEND: + if (!RI.r_pcrel && !RI.r_extern && RI.r_length == 2) + return PairedAddend; + break; + } + + return make_error( + "Unsupported arm64 relocation: address=" + + formatv("{0:x8}", RI.r_address) + + ", symbolnum=" + formatv("{0:x6}", RI.r_symbolnum) + + ", kind=" + formatv("{0:x1}", RI.r_type) + + ", pc_rel=" + (RI.r_pcrel ? "true" : "false") + + ", extern=" + (RI.r_extern ? "true" : "false") + + ", length=" + formatv("{0:d}", RI.r_length)); + } + + MachO::relocation_info + getRelocationInfo(const object::relocation_iterator RelItr) { + MachO::any_relocation_info ARI = + getObject().getRelocation(RelItr->getRawDataRefImpl()); + MachO::relocation_info RI; + memcpy(&RI, &ARI, sizeof(MachO::relocation_info)); + return RI; + } + + using PairRelocInfo = + std::tuple; + + // Parses paired SUBTRACTOR/UNSIGNED relocations and, on success, + // returns the edge kind and addend to be used. + Expected + parsePairRelocation(Block &BlockToFix, Edge::Kind SubtractorKind, + const MachO::relocation_info &SubRI, + JITTargetAddress FixupAddress, const char *FixupContent, + object::relocation_iterator &UnsignedRelItr, + object::relocation_iterator &RelEnd) { + using namespace support; + + assert(((SubtractorKind == Delta32 && SubRI.r_length == 2) || + (SubtractorKind == Delta64 && SubRI.r_length == 3)) && + "Subtractor kind should match length"); + assert(SubRI.r_extern && "SUBTRACTOR reloc symbol should be extern"); + assert(!SubRI.r_pcrel && "SUBTRACTOR reloc should not be PCRel"); + + if (UnsignedRelItr == RelEnd) + return make_error("arm64 SUBTRACTOR without paired " + "UNSIGNED relocation"); + + auto UnsignedRI = getRelocationInfo(UnsignedRelItr); + + if (SubRI.r_address != UnsignedRI.r_address) + return make_error("arm64 SUBTRACTOR and paired UNSIGNED " + "point to different addresses"); + + if (SubRI.r_length != UnsignedRI.r_length) + return make_error("length of arm64 SUBTRACTOR and paired " + "UNSIGNED reloc must match"); + + Symbol *FromSymbol; + if (auto FromSymbolOrErr = findSymbolByIndex(SubRI.r_symbolnum)) + FromSymbol = FromSymbolOrErr->GraphSymbol; + else + return FromSymbolOrErr.takeError(); + + // Read the current fixup value. + uint64_t FixupValue = 0; + if (SubRI.r_length == 3) + FixupValue = *(const little64_t *)FixupContent; + else + FixupValue = *(const little32_t *)FixupContent; + + // Find 'ToSymbol' using symbol number or address, depending on whether the + // paired UNSIGNED relocation is extern. + Symbol *ToSymbol = nullptr; + if (UnsignedRI.r_extern) { + // Find target symbol by symbol index. + if (auto ToSymbolOrErr = findSymbolByIndex(UnsignedRI.r_symbolnum)) + ToSymbol = ToSymbolOrErr->GraphSymbol; + else + return ToSymbolOrErr.takeError(); + } else { + if (auto ToSymbolOrErr = findSymbolByAddress(FixupValue)) + ToSymbol = &*ToSymbolOrErr; + else + return ToSymbolOrErr.takeError(); + FixupValue -= ToSymbol->getAddress(); + } + + MachOARM64RelocationKind DeltaKind; + Symbol *TargetSymbol; + uint64_t Addend; + if (&BlockToFix == &FromSymbol->getAddressable()) { + TargetSymbol = ToSymbol; + DeltaKind = (SubRI.r_length == 3) ? Delta64 : Delta32; + Addend = FixupValue + (FixupAddress - FromSymbol->getAddress()); + // FIXME: handle extern 'from'. + } else if (&BlockToFix == &ToSymbol->getAddressable()) { + TargetSymbol = &*FromSymbol; + DeltaKind = (SubRI.r_length == 3) ? NegDelta64 : NegDelta32; + Addend = FixupValue - (FixupAddress - ToSymbol->getAddress()); + } else { + // BlockToFix was neither FromSymbol nor ToSymbol. + return make_error("SUBTRACTOR relocation must fix up " + "either 'A' or 'B' (or a symbol in one " + "of their alt-entry groups)"); + } + + return PairRelocInfo(DeltaKind, TargetSymbol, Addend); + } + + Error addRelocations() override { + using namespace support; + auto &Obj = getObject(); + + for (auto &S : Obj.sections()) { + + JITTargetAddress SectionAddress = S.getAddress(); + + for (auto RelItr = S.relocation_begin(), RelEnd = S.relocation_end(); + RelItr != RelEnd; ++RelItr) { + + MachO::relocation_info RI = getRelocationInfo(RelItr); + + // Sanity check the relocation kind. + auto Kind = getRelocationKind(RI); + if (!Kind) + return Kind.takeError(); + + // Find the address of the value to fix up. + JITTargetAddress FixupAddress = SectionAddress + (uint32_t)RI.r_address; + + LLVM_DEBUG({ + dbgs() << "Processing " << getMachOARM64RelocationKindName(*Kind) + << " relocation at " << format("0x%016" PRIx64, FixupAddress) + << "\n"; + }); + + // Find the block that the fixup points to. + Block *BlockToFix = nullptr; + { + auto SymbolToFixOrErr = findSymbolByAddress(FixupAddress); + if (!SymbolToFixOrErr) + return SymbolToFixOrErr.takeError(); + BlockToFix = &SymbolToFixOrErr->getBlock(); + } + + if (FixupAddress + static_cast(1ULL << RI.r_length) > + BlockToFix->getAddress() + BlockToFix->getContent().size()) + return make_error( + "Relocation content extends past end of fixup block"); + + // Get a pointer to the fixup content. + const char *FixupContent = BlockToFix->getContent().data() + + (FixupAddress - BlockToFix->getAddress()); + + // The target symbol and addend will be populated by the switch below. + Symbol *TargetSymbol = nullptr; + uint64_t Addend = 0; + + if (*Kind == PairedAddend) { + // If this is an Addend relocation then process it and move to the + // paired reloc. + + Addend = RI.r_symbolnum; + + if (RelItr == RelEnd) + return make_error("Unpaired Addend reloc at " + + formatv("{0:x16}", FixupAddress)); + ++RelItr; + RI = getRelocationInfo(RelItr); + + Kind = getRelocationKind(RI); + if (!Kind) + return Kind.takeError(); + + if (*Kind != Branch26 && *Kind != Page21 && *Kind != PageOffset12) + return make_error( + "Invalid relocation pair: Addend + " + + getMachOARM64RelocationKindName(*Kind)); + else + LLVM_DEBUG({ + dbgs() << " pair is " << getMachOARM64RelocationKindName(*Kind) + << "`\n"; + }); + + // Find the address of the value to fix up. + JITTargetAddress PairedFixupAddress = + SectionAddress + (uint32_t)RI.r_address; + if (PairedFixupAddress != FixupAddress) + return make_error("Paired relocation points at " + "different target"); + } + + switch (*Kind) { + case Branch26: { + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + uint32_t Instr = *(const ulittle32_t *)FixupContent; + if ((Instr & 0x7fffffff) != 0x14000000) + return make_error("BRANCH26 target is not a B or BL " + "instruction with a zero addend"); + break; + } + case Pointer32: + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + Addend = *(const ulittle32_t *)FixupContent; + break; + case Pointer64: + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + Addend = *(const ulittle64_t *)FixupContent; + break; + case Pointer64Anon: { + JITTargetAddress TargetAddress = *(const ulittle64_t *)FixupContent; + if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress)) + TargetSymbol = &*TargetSymbolOrErr; + else + return TargetSymbolOrErr.takeError(); + Addend = TargetAddress - TargetSymbol->getAddress(); + break; + } + case Page21: + case GOTPage21: { + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + uint32_t Instr = *(const ulittle32_t *)FixupContent; + if ((Instr & 0xffffffe0) != 0x90000000) + return make_error("PAGE21/GOTPAGE21 target is not an " + "ADRP instruction with a zero " + "addend"); + break; + } + case PageOffset12: { + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + break; + } + case GOTPageOffset12: { + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + uint32_t Instr = *(const ulittle32_t *)FixupContent; + if ((Instr & 0xfffffc00) != 0xf9400000) + return make_error("GOTPAGEOFF12 target is not an LDR " + "immediate instruction with a zero " + "addend"); + break; + } + case PointerToGOT: + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); + break; + case Delta32: + case Delta64: { + // We use Delta32/Delta64 to represent SUBTRACTOR relocations. + // parsePairRelocation handles the paired reloc, and returns the + // edge kind to be used (either Delta32/Delta64, or + // NegDelta32/NegDelta64, depending on the direction of the + // subtraction) along with the addend. + auto PairInfo = + parsePairRelocation(*BlockToFix, *Kind, RI, FixupAddress, + FixupContent, ++RelItr, RelEnd); + if (!PairInfo) + return PairInfo.takeError(); + std::tie(*Kind, TargetSymbol, Addend) = *PairInfo; + assert(TargetSymbol && "No target symbol from parsePairRelocation?"); + break; + } + default: + llvm_unreachable("Special relocation kind should not appear in " + "mach-o file"); + } + + LLVM_DEBUG({ + Edge GE(*Kind, FixupAddress - BlockToFix->getAddress(), *TargetSymbol, + Addend); + printEdge(dbgs(), *BlockToFix, GE, + getMachOARM64RelocationKindName(*Kind)); + dbgs() << "\n"; + }); + BlockToFix->addEdge(*Kind, FixupAddress - BlockToFix->getAddress(), + *TargetSymbol, Addend); + } + } + return Error::success(); + } + + unsigned NumSymbols = 0; +}; + +class MachO_arm64_GOTAndStubsBuilder + : public BasicGOTAndStubsBuilder { +public: + MachO_arm64_GOTAndStubsBuilder(LinkGraph &G) + : BasicGOTAndStubsBuilder(G) {} + + bool isGOTEdge(Edge &E) const { + return E.getKind() == GOTPage21 || E.getKind() == GOTPageOffset12 || + E.getKind() == PointerToGOT; + } + + Symbol &createGOTEntry(Symbol &Target) { + auto &GOTEntryBlock = G.createContentBlock( + getGOTSection(), getGOTEntryBlockContent(), 0, 8, 0); + GOTEntryBlock.addEdge(Pointer64, 0, Target, 0); + return G.addAnonymousSymbol(GOTEntryBlock, 0, 8, false, false); + } + + void fixGOTEdge(Edge &E, Symbol &GOTEntry) { + if (E.getKind() == GOTPage21 || E.getKind() == GOTPageOffset12) { + // Update the target, but leave the edge addend as-is. + E.setTarget(GOTEntry); + } else if (E.getKind() == PointerToGOT) { + E.setTarget(GOTEntry); + E.setKind(Delta32); + } else + llvm_unreachable("Not a GOT edge?"); + } + + bool isExternalBranchEdge(Edge &E) { + return E.getKind() == Branch26 && !E.getTarget().isDefined(); + } + + Symbol &createStub(Symbol &Target) { + auto &StubContentBlock = + G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 1, 0); + // Re-use GOT entries for stub targets. + auto &GOTEntrySymbol = getGOTEntrySymbol(Target); + StubContentBlock.addEdge(LDRLiteral19, 0, GOTEntrySymbol, 0); + return G.addAnonymousSymbol(StubContentBlock, 0, 8, true, false); + } + + void fixExternalBranchEdge(Edge &E, Symbol &Stub) { + assert(E.getKind() == Branch26 && "Not a Branch32 edge?"); + assert(E.getAddend() == 0 && "Branch32 edge has non-zero addend?"); + E.setTarget(Stub); + } + +private: + Section &getGOTSection() { + if (!GOTSection) + GOTSection = &G.createSection("$__GOT", sys::Memory::MF_READ); + return *GOTSection; + } + + Section &getStubsSection() { + if (!StubsSection) { + auto StubsProt = static_cast( + sys::Memory::MF_READ | sys::Memory::MF_EXEC); + StubsSection = &G.createSection("$__STUBS", StubsProt); + } + return *StubsSection; + } + + StringRef getGOTEntryBlockContent() { + return StringRef(reinterpret_cast(NullGOTEntryContent), + sizeof(NullGOTEntryContent)); + } + + StringRef getStubBlockContent() { + return StringRef(reinterpret_cast(StubContent), + sizeof(StubContent)); + } + + static const uint8_t NullGOTEntryContent[8]; + static const uint8_t StubContent[8]; + Section *GOTSection = nullptr; + Section *StubsSection = nullptr; +}; + +const uint8_t MachO_arm64_GOTAndStubsBuilder::NullGOTEntryContent[8] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +const uint8_t MachO_arm64_GOTAndStubsBuilder::StubContent[8] = { + 0x10, 0x00, 0x00, 0x58, // LDR x16, + 0x00, 0x02, 0x1f, 0xd6 // BR x16 +}; + +} // namespace + +namespace llvm { +namespace jitlink { + +class MachOJITLinker_arm64 : public JITLinker { + friend class JITLinker; + +public: + MachOJITLinker_arm64(std::unique_ptr Ctx, + PassConfiguration PassConfig) + : JITLinker(std::move(Ctx), std::move(PassConfig)) {} + +private: + StringRef getEdgeKindName(Edge::Kind R) const override { + return getMachOARM64RelocationKindName(R); + } + + Expected> + buildGraph(MemoryBufferRef ObjBuffer) override { + auto MachOObj = object::ObjectFile::createMachOObjectFile(ObjBuffer); + if (!MachOObj) + return MachOObj.takeError(); + return MachOLinkGraphBuilder_arm64(**MachOObj).buildGraph(); + } + + static Error targetOutOfRangeError(const Block &B, const Edge &E) { + std::string ErrMsg; + { + raw_string_ostream ErrStream(ErrMsg); + ErrStream << "Relocation target out of range: "; + printEdge(ErrStream, B, E, getMachOARM64RelocationKindName(E.getKind())); + ErrStream << "\n"; + } + return make_error(std::move(ErrMsg)); + } + + static unsigned getPageOffset12Shift(uint32_t Instr) { + constexpr uint32_t LDRLiteralMask = 0x3ffffc00; + + // Check for a GPR LDR immediate with a zero embedded literal. + // If found, the top two bits contain the shift. + if ((Instr & LDRLiteralMask) == 0x39400000) + return Instr >> 30; + + // Check for a Neon LDR immediate of size 64-bit or less with a zero + // embedded literal. If found, the top two bits contain the shift. + if ((Instr & LDRLiteralMask) == 0x3d400000) + return Instr >> 30; + + // Check for a Neon LDR immediate of size 128-bit with a zero embedded + // literal. + constexpr uint32_t SizeBitsMask = 0xc0000000; + if ((Instr & (LDRLiteralMask | SizeBitsMask)) == 0x3dc00000) + return 4; + + return 0; + } + + Error applyFixup(Block &B, const Edge &E, char *BlockWorkingMem) const { + using namespace support; + + char *FixupPtr = BlockWorkingMem + E.getOffset(); + JITTargetAddress FixupAddress = B.getAddress() + E.getOffset(); + + switch (E.getKind()) { + case Branch26: { + assert((FixupAddress & 0x3) == 0 && "Branch-inst is not 32-bit aligned"); + + int64_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend(); + + if (static_cast(Value) & 0x3) + return make_error("Branch26 target is not 32-bit " + "aligned"); + + if (Value < -(1 << 27) || Value > ((1 << 27) - 1)) + return targetOutOfRangeError(B, E); + + uint32_t RawInstr = *(little32_t *)FixupPtr; + assert((RawInstr & 0x7fffffff) == 0x14000000 && + "RawInstr isn't a B or BR immediate instruction"); + uint32_t Imm = (static_cast(Value) & ((1 << 28) - 1)) >> 2; + uint32_t FixedInstr = RawInstr | Imm; + *(little32_t *)FixupPtr = FixedInstr; + break; + } + case Pointer32: { + uint64_t Value = E.getTarget().getAddress() + E.getAddend(); + if (Value > std::numeric_limits::max()) + return targetOutOfRangeError(B, E); + *(ulittle32_t *)FixupPtr = Value; + break; + } + case Pointer64: { + uint64_t Value = E.getTarget().getAddress() + E.getAddend(); + *(ulittle64_t *)FixupPtr = Value; + break; + } + case Page21: + case GOTPage21: { + assert(E.getAddend() == 0 && "PAGE21/GOTPAGE21 with non-zero addend"); + uint64_t TargetPage = + E.getTarget().getAddress() & ~static_cast(4096 - 1); + uint64_t PCPage = B.getAddress() & ~static_cast(4096 - 1); + + int64_t PageDelta = TargetPage - PCPage; + if (PageDelta < -(1 << 30) || PageDelta > ((1 << 30) - 1)) + return targetOutOfRangeError(B, E); + + uint32_t RawInstr = *(ulittle32_t *)FixupPtr; + assert((RawInstr & 0xffffffe0) == 0x90000000 && + "RawInstr isn't an ADRP instruction"); + uint32_t ImmLo = (static_cast(PageDelta) >> 12) & 0x3; + uint32_t ImmHi = (static_cast(PageDelta) >> 14) & 0x7ffff; + uint32_t FixedInstr = RawInstr | (ImmLo << 29) | (ImmHi << 5); + *(ulittle32_t *)FixupPtr = FixedInstr; + break; + } + case PageOffset12: { + assert(E.getAddend() == 0 && "PAGEOFF12 with non-zero addend"); + uint64_t TargetOffset = E.getTarget().getAddress() & 0xfff; + + uint32_t RawInstr = *(ulittle32_t *)FixupPtr; + unsigned ImmShift = getPageOffset12Shift(RawInstr); + + if (TargetOffset & ((1 << ImmShift) - 1)) + return make_error("PAGEOFF12 target is not aligned"); + + uint32_t EncodedImm = (TargetOffset >> ImmShift) << 10; + uint32_t FixedInstr = RawInstr | EncodedImm; + *(ulittle32_t *)FixupPtr = FixedInstr; + break; + } + case GOTPageOffset12: { + assert(E.getAddend() == 0 && "GOTPAGEOF12 with non-zero addend"); + + uint32_t RawInstr = *(ulittle32_t *)FixupPtr; + assert((RawInstr & 0xfffffc00) == 0xf9400000 && + "RawInstr isn't a 64-bit LDR immediate"); + + uint32_t TargetOffset = E.getTarget().getAddress() & 0xfff; + assert((TargetOffset & 0x7) == 0 && "GOT entry is not 8-byte aligned"); + uint32_t EncodedImm = (TargetOffset >> 3) << 10; + uint32_t FixedInstr = RawInstr | EncodedImm; + *(ulittle32_t *)FixupPtr = FixedInstr; + break; + } + case LDRLiteral19: { + assert((FixupAddress & 0x3) == 0 && "LDR is not 32-bit aligned"); + assert(E.getAddend() == 0 && "LDRLiteral19 with non-zero addend"); + uint32_t RawInstr = *(ulittle32_t *)FixupPtr; + assert(RawInstr == 0x58000010 && "RawInstr isn't a 64-bit LDR literal"); + int64_t Delta = E.getTarget().getAddress() - FixupAddress; + if (Delta & 0x3) + return make_error("LDR literal target is not 32-bit " + "aligned"); + if (Delta < -(1 << 20) || Delta > ((1 << 20) - 1)) + return targetOutOfRangeError(B, E); + + uint32_t EncodedImm = (static_cast(Delta) >> 2) << 5; + uint32_t FixedInstr = RawInstr | EncodedImm; + *(ulittle32_t *)FixupPtr = FixedInstr; + break; + } + case Delta32: + case Delta64: + case NegDelta32: + case NegDelta64: { + int64_t Value; + if (E.getKind() == Delta32 || E.getKind() == Delta64) + Value = E.getTarget().getAddress() - FixupAddress + E.getAddend(); + else + Value = FixupAddress - E.getTarget().getAddress() + E.getAddend(); + + if (E.getKind() == Delta32 || E.getKind() == NegDelta32) { + if (Value < std::numeric_limits::min() || + Value > std::numeric_limits::max()) + return targetOutOfRangeError(B, E); + *(little32_t *)FixupPtr = Value; + } else + *(little64_t *)FixupPtr = Value; + break; + } + default: + llvm_unreachable("Unrecognized edge kind"); + } + + return Error::success(); + } + + uint64_t NullValue = 0; +}; + +void jitLink_MachO_arm64(std::unique_ptr Ctx) { + PassConfiguration Config; + Triple TT("arm64-apple-ios"); + + if (Ctx->shouldAddDefaultTargetPasses(TT)) { + // Add a mark-live pass. + if (auto MarkLive = Ctx->getMarkLivePass(TT)) + Config.PrePrunePasses.push_back(std::move(MarkLive)); + else + Config.PrePrunePasses.push_back(markAllSymbolsLive); + + // Add an in-place GOT/Stubs pass. + Config.PostPrunePasses.push_back([](LinkGraph &G) -> Error { + MachO_arm64_GOTAndStubsBuilder(G).run(); + return Error::success(); + }); + } + + if (auto Err = Ctx->modifyPassConfig(TT, Config)) + return Ctx->notifyFailed(std::move(Err)); + + // Construct a JITLinker and run the link function. + MachOJITLinker_arm64::link(std::move(Ctx), std::move(Config)); +} + +StringRef getMachOARM64RelocationKindName(Edge::Kind R) { + switch (R) { + case Branch26: + return "Branch26"; + case Pointer64: + return "Pointer64"; + case Pointer64Anon: + return "Pointer64Anon"; + case Page21: + return "Page21"; + case PageOffset12: + return "PageOffset12"; + case GOTPage21: + return "GOTPage21"; + case GOTPageOffset12: + return "GOTPageOffset12"; + case PointerToGOT: + return "PointerToGOT"; + case PairedAddend: + return "PairedAddend"; + case LDRLiteral19: + return "LDRLiteral19"; + case Delta32: + return "Delta32"; + case Delta64: + return "Delta64"; + case NegDelta32: + return "NegDelta32"; + case NegDelta64: + return "NegDelta64"; + default: + return getGenericEdgeKindName(static_cast(R)); + } +} + +} // end namespace jitlink +} // end namespace llvm diff --git a/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp index 4010678c6d3..d83787ffd59 100644 --- a/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp +++ b/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp @@ -13,7 +13,7 @@ #include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h" #include "BasicGOTAndStubsBuilder.h" -#include "MachOAtomGraphBuilder.h" +#include "MachOLinkGraphBuilder.h" #define DEBUG_TYPE "jitlink" @@ -23,16 +23,21 @@ using namespace llvm::jitlink::MachO_x86_64_Edges; namespace { -class MachOAtomGraphBuilder_x86_64 : public MachOAtomGraphBuilder { +class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { public: - MachOAtomGraphBuilder_x86_64(const object::MachOObjectFile &Obj) - : MachOAtomGraphBuilder(Obj), - NumSymbols(Obj.getSymtabLoadCommand().nsyms) { - addCustomAtomizer("__eh_frame", [this](MachOSection &EHFrameSection) { - return addEHFrame(getGraph(), EHFrameSection.getGenericSection(), - EHFrameSection.getContent(), - EHFrameSection.getAddress(), NegDelta32, Delta64); - }); + MachOLinkGraphBuilder_x86_64(const object::MachOObjectFile &Obj) + : MachOLinkGraphBuilder(Obj) { + addCustomSectionParser( + "__eh_frame", [this](NormalizedSection &EHFrameSection) { + if (!EHFrameSection.Data) + return make_error( + "__eh_frame section is marked zero-fill"); + return MachOEHFrameBinaryParser( + *this, EHFrameSection.Address, + StringRef(EHFrameSection.Data, EHFrameSection.Size), + *EHFrameSection.GraphSection, 8, 4, NegDelta32, Delta64) + .addToGraph(); + }); } private: @@ -40,8 +45,12 @@ private: getRelocationKind(const MachO::relocation_info &RI) { switch (RI.r_type) { case MachO::X86_64_RELOC_UNSIGNED: - if (!RI.r_pcrel && RI.r_length == 3) - return RI.r_extern ? Pointer64 : Pointer64Anon; + if (!RI.r_pcrel) { + if (RI.r_length == 3) + return RI.r_extern ? Pointer64 : Pointer64Anon; + else if (RI.r_extern && RI.r_length == 2) + return Pointer32; + } break; case MachO::X86_64_RELOC_SIGNED: if (RI.r_pcrel && RI.r_length == 2) @@ -94,21 +103,10 @@ private: ", symbolnum=" + formatv("{0:x6}", RI.r_symbolnum) + ", kind=" + formatv("{0:x1}", RI.r_type) + ", pc_rel=" + (RI.r_pcrel ? "true" : "false") + - ", extern= " + (RI.r_extern ? "true" : "false") + + ", extern=" + (RI.r_extern ? "true" : "false") + ", length=" + formatv("{0:d}", RI.r_length)); } - Expected findAtomBySymbolIndex(const MachO::relocation_info &RI) { - auto &Obj = getObject(); - if (RI.r_symbolnum >= NumSymbols) - return make_error("Symbol index out of range"); - auto SymI = Obj.getSymbolByIndex(RI.r_symbolnum); - auto Name = SymI->getName(); - if (!Name) - return Name.takeError(); - return getGraph().getAtomByName(*Name); - } - MachO::relocation_info getRelocationInfo(const object::relocation_iterator RelItr) { MachO::any_relocation_info ARI = @@ -118,12 +116,12 @@ private: return RI; } - using PairRelocInfo = std::tuple; + using PairRelocInfo = std::tuple; // Parses paired SUBTRACTOR/UNSIGNED relocations and, on success, // returns the edge kind and addend to be used. Expected - parsePairRelocation(DefinedAtom &AtomToFix, Edge::Kind SubtractorKind, + parsePairRelocation(Block &BlockToFix, Edge::Kind SubtractorKind, const MachO::relocation_info &SubRI, JITTargetAddress FixupAddress, const char *FixupContent, object::relocation_iterator &UnsignedRelItr, @@ -150,9 +148,11 @@ private: return make_error("length of x86_64 SUBTRACTOR and paired " "UNSIGNED reloc must match"); - auto FromAtom = findAtomBySymbolIndex(SubRI); - if (!FromAtom) - return FromAtom.takeError(); + Symbol *FromSymbol; + if (auto FromSymbolOrErr = findSymbolByIndex(SubRI.r_symbolnum)) + FromSymbol = FromSymbolOrErr->GraphSymbol; + else + return FromSymbolOrErr.takeError(); // Read the current fixup value. uint64_t FixupValue = 0; @@ -161,54 +161,60 @@ private: else FixupValue = *(const little32_t *)FixupContent; - // Find 'ToAtom' using symbol number or address, depending on whether the + // Find 'ToSymbol' using symbol number or address, depending on whether the // paired UNSIGNED relocation is extern. - Atom *ToAtom = nullptr; + Symbol *ToSymbol = nullptr; if (UnsignedRI.r_extern) { - // Find target atom by symbol index. - if (auto ToAtomOrErr = findAtomBySymbolIndex(UnsignedRI)) - ToAtom = &*ToAtomOrErr; + // Find target symbol by symbol index. + if (auto ToSymbolOrErr = findSymbolByIndex(UnsignedRI.r_symbolnum)) + ToSymbol = ToSymbolOrErr->GraphSymbol; else - return ToAtomOrErr.takeError(); + return ToSymbolOrErr.takeError(); } else { - if (auto ToAtomOrErr = getGraph().findAtomByAddress(FixupValue)) - ToAtom = &*ToAtomOrErr; + if (auto ToSymbolOrErr = findSymbolByAddress(FixupValue)) + ToSymbol = &*ToSymbolOrErr; else - return ToAtomOrErr.takeError(); - FixupValue -= ToAtom->getAddress(); + return ToSymbolOrErr.takeError(); + FixupValue -= ToSymbol->getAddress(); } MachOX86RelocationKind DeltaKind; - Atom *TargetAtom; + Symbol *TargetSymbol; uint64_t Addend; - if (areLayoutLocked(AtomToFix, *FromAtom)) { - TargetAtom = ToAtom; + if (&BlockToFix == &FromSymbol->getAddressable()) { + TargetSymbol = ToSymbol; DeltaKind = (SubRI.r_length == 3) ? Delta64 : Delta32; - Addend = FixupValue + (FixupAddress - FromAtom->getAddress()); + Addend = FixupValue + (FixupAddress - FromSymbol->getAddress()); // FIXME: handle extern 'from'. - } else if (areLayoutLocked(AtomToFix, *ToAtom)) { - TargetAtom = &*FromAtom; + } else if (&BlockToFix == &ToSymbol->getAddressable()) { + TargetSymbol = FromSymbol; DeltaKind = (SubRI.r_length == 3) ? NegDelta64 : NegDelta32; - Addend = FixupValue - (FixupAddress - ToAtom->getAddress()); + Addend = FixupValue - (FixupAddress - ToSymbol->getAddress()); } else { - // AtomToFix was neither FromAtom nor ToAtom. + // BlockToFix was neither FromSymbol nor ToSymbol. return make_error("SUBTRACTOR relocation must fix up " - "either 'A' or 'B' (or an atom in one " - "of their alt-entry groups)"); + "either 'A' or 'B' (or a symbol in one " + "of their alt-entry chains)"); } - return PairRelocInfo(DeltaKind, TargetAtom, Addend); + return PairRelocInfo(DeltaKind, TargetSymbol, Addend); } Error addRelocations() override { using namespace support; - auto &G = getGraph(); auto &Obj = getObject(); for (auto &S : Obj.sections()) { JITTargetAddress SectionAddress = S.getAddress(); + if (S.isVirtual()) { + if (S.relocation_begin() != S.relocation_end()) + return make_error("Virtual section contains " + "relocations"); + continue; + } + for (auto RelItr = S.relocation_begin(), RelEnd = S.relocation_end(); RelItr != RelEnd; ++RelItr) { @@ -227,26 +233,26 @@ private: << format("0x%016" PRIx64, FixupAddress) << "\n"; }); - // Find the atom that the fixup points to. - DefinedAtom *AtomToFix = nullptr; + // Find the block that the fixup points to. + Block *BlockToFix = nullptr; { - auto AtomToFixOrErr = G.findAtomByAddress(FixupAddress); - if (!AtomToFixOrErr) - return AtomToFixOrErr.takeError(); - AtomToFix = &*AtomToFixOrErr; + auto SymbolToFixOrErr = findSymbolByAddress(FixupAddress); + if (!SymbolToFixOrErr) + return SymbolToFixOrErr.takeError(); + BlockToFix = &SymbolToFixOrErr->getBlock(); } if (FixupAddress + static_cast(1ULL << RI.r_length) > - AtomToFix->getAddress() + AtomToFix->getContent().size()) + BlockToFix->getAddress() + BlockToFix->getContent().size()) return make_error( - "Relocation content extends past end of fixup atom"); + "Relocation extends past end of fixup block"); // Get a pointer to the fixup content. - const char *FixupContent = AtomToFix->getContent().data() + - (FixupAddress - AtomToFix->getAddress()); + const char *FixupContent = BlockToFix->getContent().data() + + (FixupAddress - BlockToFix->getAddress()); - // The target atom and addend will be populated by the switch below. - Atom *TargetAtom = nullptr; + // The target symbol and addend will be populated by the switch below. + Symbol *TargetSymbol = nullptr; uint64_t Addend = 0; switch (*Kind) { @@ -254,46 +260,53 @@ private: case PCRel32: case PCRel32GOTLoad: case PCRel32GOT: - if (auto TargetAtomOrErr = findAtomBySymbolIndex(RI)) - TargetAtom = &*TargetAtomOrErr; + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; else - return TargetAtomOrErr.takeError(); + return TargetSymbolOrErr.takeError(); + Addend = *(const ulittle32_t *)FixupContent; + break; + case Pointer32: + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; + else + return TargetSymbolOrErr.takeError(); Addend = *(const ulittle32_t *)FixupContent; break; case Pointer64: - if (auto TargetAtomOrErr = findAtomBySymbolIndex(RI)) - TargetAtom = &*TargetAtomOrErr; + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; else - return TargetAtomOrErr.takeError(); + return TargetSymbolOrErr.takeError(); Addend = *(const ulittle64_t *)FixupContent; break; case Pointer64Anon: { JITTargetAddress TargetAddress = *(const ulittle64_t *)FixupContent; - if (auto TargetAtomOrErr = G.findAtomByAddress(TargetAddress)) - TargetAtom = &*TargetAtomOrErr; + if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress)) + TargetSymbol = &*TargetSymbolOrErr; else - return TargetAtomOrErr.takeError(); - Addend = TargetAddress - TargetAtom->getAddress(); + return TargetSymbolOrErr.takeError(); + Addend = TargetAddress - TargetSymbol->getAddress(); break; } case PCRel32Minus1: case PCRel32Minus2: case PCRel32Minus4: - if (auto TargetAtomOrErr = findAtomBySymbolIndex(RI)) - TargetAtom = &*TargetAtomOrErr; + if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) + TargetSymbol = TargetSymbolOrErr->GraphSymbol; else - return TargetAtomOrErr.takeError(); + return TargetSymbolOrErr.takeError(); Addend = *(const ulittle32_t *)FixupContent + (1 << (*Kind - PCRel32Minus1)); break; case PCRel32Anon: { JITTargetAddress TargetAddress = FixupAddress + 4 + *(const ulittle32_t *)FixupContent; - if (auto TargetAtomOrErr = G.findAtomByAddress(TargetAddress)) - TargetAtom = &*TargetAtomOrErr; + if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress)) + TargetSymbol = &*TargetSymbolOrErr; else - return TargetAtomOrErr.takeError(); - Addend = TargetAddress - TargetAtom->getAddress(); + return TargetSymbolOrErr.takeError(); + Addend = TargetAddress - TargetSymbol->getAddress(); break; } case PCRel32Minus1Anon: @@ -303,11 +316,11 @@ private: static_cast(1ULL << (*Kind - PCRel32Minus1Anon)); JITTargetAddress TargetAddress = FixupAddress + 4 + Delta + *(const ulittle32_t *)FixupContent; - if (auto TargetAtomOrErr = G.findAtomByAddress(TargetAddress)) - TargetAtom = &*TargetAtomOrErr; + if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress)) + TargetSymbol = &*TargetSymbolOrErr; else - return TargetAtomOrErr.takeError(); - Addend = TargetAddress - TargetAtom->getAddress(); + return TargetSymbolOrErr.takeError(); + Addend = TargetAddress - TargetSymbol->getAddress(); break; } case Delta32: @@ -318,12 +331,12 @@ private: // NegDelta32/NegDelta64, depending on the direction of the // subtraction) along with the addend. auto PairInfo = - parsePairRelocation(*AtomToFix, *Kind, RI, FixupAddress, + parsePairRelocation(*BlockToFix, *Kind, RI, FixupAddress, FixupContent, ++RelItr, RelEnd); if (!PairInfo) return PairInfo.takeError(); - std::tie(*Kind, TargetAtom, Addend) = *PairInfo; - assert(TargetAtom && "No target atom from parsePairRelocation?"); + std::tie(*Kind, TargetSymbol, Addend) = *PairInfo; + assert(TargetSymbol && "No target symbol from parsePairRelocation?"); break; } default: @@ -332,41 +345,38 @@ private: } LLVM_DEBUG({ - Edge GE(*Kind, FixupAddress - AtomToFix->getAddress(), *TargetAtom, + Edge GE(*Kind, FixupAddress - BlockToFix->getAddress(), *TargetSymbol, Addend); - printEdge(dbgs(), *AtomToFix, GE, + printEdge(dbgs(), *BlockToFix, GE, getMachOX86RelocationKindName(*Kind)); dbgs() << "\n"; }); - AtomToFix->addEdge(*Kind, FixupAddress - AtomToFix->getAddress(), - *TargetAtom, Addend); + BlockToFix->addEdge(*Kind, FixupAddress - BlockToFix->getAddress(), + *TargetSymbol, Addend); } } return Error::success(); } - - unsigned NumSymbols = 0; }; class MachO_x86_64_GOTAndStubsBuilder : public BasicGOTAndStubsBuilder { public: - MachO_x86_64_GOTAndStubsBuilder(AtomGraph &G) + MachO_x86_64_GOTAndStubsBuilder(LinkGraph &G) : BasicGOTAndStubsBuilder(G) {} bool isGOTEdge(Edge &E) const { return E.getKind() == PCRel32GOT || E.getKind() == PCRel32GOTLoad; } - DefinedAtom &createGOTEntry(Atom &Target) { - auto &GOTEntryAtom = G.addAnonymousAtom(getGOTSection(), 0x0, 8); - GOTEntryAtom.setContent( - StringRef(reinterpret_cast(NullGOTEntryContent), 8)); - GOTEntryAtom.addEdge(Pointer64, 0, Target, 0); - return GOTEntryAtom; + Symbol &createGOTEntry(Symbol &Target) { + auto &GOTEntryBlock = G.createContentBlock( + getGOTSection(), getGOTEntryBlockContent(), 0, 8, 0); + GOTEntryBlock.addEdge(Pointer64, 0, Target, 0); + return G.addAnonymousSymbol(GOTEntryBlock, 0, 8, false, false); } - void fixGOTEdge(Edge &E, Atom &GOTEntry) { + void fixGOTEdge(Edge &E, Symbol &GOTEntry) { assert((E.getKind() == PCRel32GOT || E.getKind() == PCRel32GOTLoad) && "Not a GOT edge?"); E.setKind(PCRel32); @@ -378,19 +388,16 @@ public: return E.getKind() == Branch32 && !E.getTarget().isDefined(); } - DefinedAtom &createStub(Atom &Target) { - auto &StubAtom = G.addAnonymousAtom(getStubsSection(), 0x0, 2); - StubAtom.setContent( - StringRef(reinterpret_cast(StubContent), 6)); - + Symbol &createStub(Symbol &Target) { + auto &StubContentBlock = + G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 1, 0); // Re-use GOT entries for stub targets. - auto &GOTEntryAtom = getGOTEntryAtom(Target); - StubAtom.addEdge(PCRel32, 2, GOTEntryAtom, 0); - - return StubAtom; + auto &GOTEntrySymbol = getGOTEntrySymbol(Target); + StubContentBlock.addEdge(PCRel32, 2, GOTEntrySymbol, 0); + return G.addAnonymousSymbol(StubContentBlock, 0, 6, true, false); } - void fixExternalBranchEdge(Edge &E, Atom &Stub) { + void fixExternalBranchEdge(Edge &E, Symbol &Stub) { assert(E.getKind() == Branch32 && "Not a Branch32 edge?"); assert(E.getAddend() == 0 && "Branch32 edge has non-zero addend?"); E.setTarget(Stub); @@ -399,7 +406,7 @@ public: private: Section &getGOTSection() { if (!GOTSection) - GOTSection = &G.createSection("$__GOT", 8, sys::Memory::MF_READ, false); + GOTSection = &G.createSection("$__GOT", sys::Memory::MF_READ); return *GOTSection; } @@ -407,11 +414,21 @@ private: if (!StubsSection) { auto StubsProt = static_cast( sys::Memory::MF_READ | sys::Memory::MF_EXEC); - StubsSection = &G.createSection("$__STUBS", 8, StubsProt, false); + StubsSection = &G.createSection("$__STUBS", StubsProt); } return *StubsSection; } + StringRef getGOTEntryBlockContent() { + return StringRef(reinterpret_cast(NullGOTEntryContent), + sizeof(NullGOTEntryContent)); + } + + StringRef getStubBlockContent() { + return StringRef(reinterpret_cast(StubContent), + sizeof(StubContent)); + } + static const uint8_t NullGOTEntryContent[8]; static const uint8_t StubContent[6]; Section *GOTSection = nullptr; @@ -440,30 +457,31 @@ private: return getMachOX86RelocationKindName(R); } - Expected> + Expected> buildGraph(MemoryBufferRef ObjBuffer) override { auto MachOObj = object::ObjectFile::createMachOObjectFile(ObjBuffer); if (!MachOObj) return MachOObj.takeError(); - return MachOAtomGraphBuilder_x86_64(**MachOObj).buildGraph(); + return MachOLinkGraphBuilder_x86_64(**MachOObj).buildGraph(); } - static Error targetOutOfRangeError(const Atom &A, const Edge &E) { + static Error targetOutOfRangeError(const Block &B, const Edge &E) { std::string ErrMsg; { raw_string_ostream ErrStream(ErrMsg); ErrStream << "Relocation target out of range: "; - printEdge(ErrStream, A, E, getMachOX86RelocationKindName(E.getKind())); + printEdge(ErrStream, B, E, getMachOX86RelocationKindName(E.getKind())); ErrStream << "\n"; } return make_error(std::move(ErrMsg)); } - Error applyFixup(DefinedAtom &A, const Edge &E, char *AtomWorkingMem) const { + Error applyFixup(Block &B, const Edge &E, char *BlockWorkingMem) const { + using namespace support; - char *FixupPtr = AtomWorkingMem + E.getOffset(); - JITTargetAddress FixupAddress = A.getAddress() + E.getOffset(); + char *FixupPtr = BlockWorkingMem + E.getOffset(); + JITTargetAddress FixupAddress = B.getAddress() + E.getOffset(); switch (E.getKind()) { case Branch32: @@ -473,7 +491,7 @@ private: E.getTarget().getAddress() - (FixupAddress + 4) + E.getAddend(); if (Value < std::numeric_limits::min() || Value > std::numeric_limits::max()) - return targetOutOfRangeError(A, E); + return targetOutOfRangeError(B, E); *(little32_t *)FixupPtr = Value; break; } @@ -491,7 +509,7 @@ private: E.getTarget().getAddress() - (FixupAddress + Delta) + E.getAddend(); if (Value < std::numeric_limits::min() || Value > std::numeric_limits::max()) - return targetOutOfRangeError(A, E); + return targetOutOfRangeError(B, E); *(little32_t *)FixupPtr = Value; break; } @@ -503,7 +521,7 @@ private: E.getTarget().getAddress() - (FixupAddress + Delta) + E.getAddend(); if (Value < std::numeric_limits::min() || Value > std::numeric_limits::max()) - return targetOutOfRangeError(A, E); + return targetOutOfRangeError(B, E); *(little32_t *)FixupPtr = Value; break; } @@ -520,12 +538,19 @@ private: if (E.getKind() == Delta32 || E.getKind() == NegDelta32) { if (Value < std::numeric_limits::min() || Value > std::numeric_limits::max()) - return targetOutOfRangeError(A, E); + return targetOutOfRangeError(B, E); *(little32_t *)FixupPtr = Value; } else *(little64_t *)FixupPtr = Value; break; } + case Pointer32: { + uint64_t Value = E.getTarget().getAddress() + E.getAddend(); + if (Value > std::numeric_limits::max()) + return targetOutOfRangeError(B, E); + *(ulittle32_t *)FixupPtr = Value; + break; + } default: llvm_unreachable("Unrecognized edge kind"); } @@ -545,10 +570,10 @@ void jitLink_MachO_x86_64(std::unique_ptr Ctx) { if (auto MarkLive = Ctx->getMarkLivePass(TT)) Config.PrePrunePasses.push_back(std::move(MarkLive)); else - Config.PrePrunePasses.push_back(markAllAtomsLive); + Config.PrePrunePasses.push_back(markAllSymbolsLive); // Add an in-place GOT/Stubs pass. - Config.PostPrunePasses.push_back([](AtomGraph &G) -> Error { + Config.PostPrunePasses.push_back([](LinkGraph &G) -> Error { MachO_x86_64_GOTAndStubsBuilder(G).run(); return Error::success(); }); @@ -565,6 +590,8 @@ StringRef getMachOX86RelocationKindName(Edge::Kind R) { switch (R) { case Branch32: return "Branch32"; + case Pointer32: + return "Pointer32"; case Pointer64: return "Pointer64"; case Pointer64Anon: diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 08815b7a80a..94741f5f01d 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -23,7 +23,7 @@ #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/MutexGuard.h" +#include using namespace llvm; @@ -88,7 +88,7 @@ MCJIT::MCJIT(std::unique_ptr M, std::unique_ptr TM, } MCJIT::~MCJIT() { - MutexGuard locked(lock); + std::lock_guard locked(lock); Dyld.deregisterEHFrames(); @@ -100,7 +100,7 @@ MCJIT::~MCJIT() { } void MCJIT::addModule(std::unique_ptr M) { - MutexGuard locked(lock); + std::lock_guard locked(lock); if (M->getDataLayout().isDefault()) M->setDataLayout(getDataLayout()); @@ -109,7 +109,7 @@ void MCJIT::addModule(std::unique_ptr M) { } bool MCJIT::removeModule(Module *M) { - MutexGuard locked(lock); + std::lock_guard locked(lock); return OwnedModules.removeModule(M); } @@ -136,14 +136,14 @@ void MCJIT::addArchive(object::OwningBinary A) { } void MCJIT::setObjectCache(ObjectCache* NewCache) { - MutexGuard locked(lock); + std::lock_guard locked(lock); ObjCache = NewCache; } std::unique_ptr MCJIT::emitObject(Module *M) { assert(M && "Can not emit a null module"); - MutexGuard locked(lock); + std::lock_guard locked(lock); // Materialize all globals in the module if they have not been // materialized already. @@ -185,7 +185,7 @@ std::unique_ptr MCJIT::emitObject(Module *M) { void MCJIT::generateCodeForModule(Module *M) { // Get a thread lock to make sure we aren't trying to load multiple times - MutexGuard locked(lock); + std::lock_guard locked(lock); // This must be a module which has already been added to this MCJIT instance. assert(OwnedModules.ownsModule(M) && @@ -234,7 +234,7 @@ void MCJIT::generateCodeForModule(Module *M) { } void MCJIT::finalizeLoadedModules() { - MutexGuard locked(lock); + std::lock_guard locked(lock); // Resolve any outstanding relocations. Dyld.resolveRelocations(); @@ -250,7 +250,7 @@ void MCJIT::finalizeLoadedModules() { // FIXME: Rename this. void MCJIT::finalizeObject() { - MutexGuard locked(lock); + std::lock_guard locked(lock); // Generate code for module is going to move objects out of the 'added' list, // so we need to copy that out before using it: @@ -265,7 +265,7 @@ void MCJIT::finalizeObject() { } void MCJIT::finalizeModule(Module *M) { - MutexGuard locked(lock); + std::lock_guard locked(lock); // This must be a module which has already been added to this MCJIT instance. assert(OwnedModules.ownsModule(M) && "MCJIT::finalizeModule: Unknown module."); @@ -292,7 +292,7 @@ Module *MCJIT::findModuleForSymbol(const std::string &Name, if (DemangledName[0] == getDataLayout().getGlobalPrefix()) DemangledName = DemangledName.substr(1); - MutexGuard locked(lock); + std::lock_guard locked(lock); // If it hasn't already been generated, see if it's in one of our modules. for (ModulePtrSet::iterator I = OwnedModules.begin_added(), @@ -332,7 +332,7 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name, JITSymbol MCJIT::findSymbol(const std::string &Name, bool CheckFunctionsOnly) { - MutexGuard locked(lock); + std::lock_guard locked(lock); // First, check to see if we already have this symbol. if (auto Sym = findExistingSymbol(Name)) @@ -388,7 +388,7 @@ JITSymbol MCJIT::findSymbol(const std::string &Name, } uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) { - MutexGuard locked(lock); + std::lock_guard locked(lock); uint64_t Result = getSymbolAddress(Name, false); if (Result != 0) finalizeLoadedModules(); @@ -396,7 +396,7 @@ uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) { } uint64_t MCJIT::getFunctionAddress(const std::string &Name) { - MutexGuard locked(lock); + std::lock_guard locked(lock); uint64_t Result = getSymbolAddress(Name, true); if (Result != 0) finalizeLoadedModules(); @@ -405,7 +405,7 @@ uint64_t MCJIT::getFunctionAddress(const std::string &Name) { // Deprecated. Use getFunctionAddress instead. void *MCJIT::getPointerToFunction(Function *F) { - MutexGuard locked(lock); + std::lock_guard locked(lock); Mangler Mang; SmallString<128> Name; @@ -632,14 +632,14 @@ void *MCJIT::getPointerToNamedFunction(StringRef Name, bool AbortOnFailure) { void MCJIT::RegisterJITEventListener(JITEventListener *L) { if (!L) return; - MutexGuard locked(lock); + std::lock_guard locked(lock); EventListeners.push_back(L); } void MCJIT::UnregisterJITEventListener(JITEventListener *L) { if (!L) return; - MutexGuard locked(lock); + std::lock_guard locked(lock); auto I = find(reverse(EventListeners), L); if (I != EventListeners.rend()) { std::swap(*I, EventListeners.back()); @@ -651,7 +651,7 @@ void MCJIT::notifyObjectLoaded(const object::ObjectFile &Obj, const RuntimeDyld::LoadedObjectInfo &L) { uint64_t Key = static_cast(reinterpret_cast(Obj.getData().data())); - MutexGuard locked(lock); + std::lock_guard locked(lock); MemMgr->notifyObjectLoaded(this, Obj); for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { EventListeners[I]->notifyObjectLoaded(Key, Obj, L); @@ -661,7 +661,7 @@ void MCJIT::notifyObjectLoaded(const object::ObjectFile &Obj, void MCJIT::notifyFreeingObject(const object::ObjectFile &Obj) { uint64_t Key = static_cast(reinterpret_cast(Obj.getData().data())); - MutexGuard locked(lock); + std::lock_guard locked(lock); for (JITEventListener *L : EventListeners) L->notifyFreeingObject(Key); } diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp index 2ad9d24555f..bb5d96051da 100644 --- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp @@ -177,7 +177,7 @@ void OProfileJITEventListener::notifyFreeingObject(ObjectKey Key) { namespace llvm { JITEventListener *JITEventListener::createOProfileJITEventListener() { - return new OProfileJITEventListener(llvm::make_unique()); + return new OProfileJITEventListener(std::make_unique()); } } // namespace llvm diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp index 1a266773692..b78d2531382 100644 --- a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp +++ b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp @@ -17,11 +17,11 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/Mutex.h" -#include "llvm/Support/MutexGuard.h" #include "llvm/Support/raw_ostream.h" #include #include #include +#include #include #include #include @@ -54,7 +54,7 @@ bool OProfileWrapper::initialize() { using namespace llvm; using namespace llvm::sys; - MutexGuard Guard(OProfileInitializationMutex); + std::lock_guard Guard(OProfileInitializationMutex); if (Initialized) return OpenAgentFunc != 0; diff --git a/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp index 99bf53bc3af..75ddbc30445 100644 --- a/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp +++ b/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp @@ -54,11 +54,12 @@ static ThreadSafeModule extractSubModule(ThreadSafeModule &TSM, llvm_unreachable("Unsupported global type"); }; - auto NewTSMod = cloneToNewContext(TSM, ShouldExtract, DeleteExtractedDefs); - auto &M = *NewTSMod.getModule(); - M.setModuleIdentifier((M.getModuleIdentifier() + Suffix).str()); + auto NewTSM = cloneToNewContext(TSM, ShouldExtract, DeleteExtractedDefs); + NewTSM.withModuleDo([&](Module &M) { + M.setModuleIdentifier((M.getModuleIdentifier() + Suffix).str()); + }); - return NewTSMod; + return NewTSM; } namespace llvm { @@ -117,39 +118,44 @@ void CompileOnDemandLayer::setPartitionFunction(PartitionFunction Partition) { this->Partition = std::move(Partition); } +void CompileOnDemandLayer::setImplMap(ImplSymbolMap *Imp) { + this->AliaseeImpls = Imp; +} void CompileOnDemandLayer::emit(MaterializationResponsibility R, ThreadSafeModule TSM) { - assert(TSM.getModule() && "Null module"); + assert(TSM && "Null module"); auto &ES = getExecutionSession(); - auto &M = *TSM.getModule(); - // First, do some cleanup on the module: - cleanUpModule(M); - - // Now sort the callables and non-callables, build re-exports and lodge the + // Sort the callables and non-callables, build re-exports and lodge the // actual module with the implementation dylib. auto &PDR = getPerDylibResources(R.getTargetJITDylib()); - MangleAndInterner Mangle(ES, M.getDataLayout()); SymbolAliasMap NonCallables; SymbolAliasMap Callables; - for (auto &GV : M.global_values()) { - if (GV.isDeclaration() || GV.hasLocalLinkage() || GV.hasAppendingLinkage()) - continue; + TSM.withModuleDo([&](Module &M) { + // First, do some cleanup on the module: + cleanUpModule(M); - auto Name = Mangle(GV.getName()); - auto Flags = JITSymbolFlags::fromGlobalValue(GV); - if (Flags.isCallable()) - Callables[Name] = SymbolAliasMapEntry(Name, Flags); - else - NonCallables[Name] = SymbolAliasMapEntry(Name, Flags); - } + MangleAndInterner Mangle(ES, M.getDataLayout()); + for (auto &GV : M.global_values()) { + if (GV.isDeclaration() || GV.hasLocalLinkage() || + GV.hasAppendingLinkage()) + continue; + + auto Name = Mangle(GV.getName()); + auto Flags = JITSymbolFlags::fromGlobalValue(GV); + if (Flags.isCallable()) + Callables[Name] = SymbolAliasMapEntry(Name, Flags); + else + NonCallables[Name] = SymbolAliasMapEntry(Name, Flags); + } + }); // Create a partitioning materialization unit and lodge it with the // implementation dylib. if (auto Err = PDR.getImplDylib().define( - llvm::make_unique( + std::make_unique( ES, std::move(TSM), R.getVModuleKey(), *this))) { ES.reportError(std::move(Err)); R.failMaterialization(); @@ -158,7 +164,7 @@ void CompileOnDemandLayer::emit(MaterializationResponsibility R, R.replace(reexports(PDR.getImplDylib(), std::move(NonCallables), true)); R.replace(lazyReexports(LCTMgr, PDR.getISManager(), PDR.getImplDylib(), - std::move(Callables))); + std::move(Callables), AliaseeImpls)); } CompileOnDemandLayer::PerDylibResources & @@ -239,14 +245,16 @@ void CompileOnDemandLayer::emitPartition( // memory manager instance to the linking layer. auto &ES = getExecutionSession(); - GlobalValueSet RequestedGVs; for (auto &Name : R.getRequestedSymbols()) { assert(Defs.count(Name) && "No definition for symbol"); RequestedGVs.insert(Defs[Name]); } - auto GVsToExtract = Partition(RequestedGVs); + /// Perform partitioning with the context lock held, since the partition + /// function is allowed to access the globals to compute the partition. + auto GVsToExtract = + TSM.withModuleDo([&](Module &M) { return Partition(RequestedGVs); }); // Take a 'None' partition to mean the whole module (as opposed to an empty // partition, which means "materialize nothing"). Emit the whole module @@ -259,43 +267,52 @@ void CompileOnDemandLayer::emitPartition( // If the partition is empty, return the whole module to the symbol table. if (GVsToExtract->empty()) { - R.replace(llvm::make_unique( + R.replace(std::make_unique( std::move(TSM), R.getSymbols(), std::move(Defs), *this)); return; } // Ok -- we actually need to partition the symbols. Promote the symbol - // linkages/names. - // FIXME: We apply this once per partitioning. It's safe, but overkill. - { - auto PromotedGlobals = PromoteSymbols(*TSM.getModule()); - if (!PromotedGlobals.empty()) { - MangleAndInterner Mangle(ES, TSM.getModule()->getDataLayout()); - SymbolFlagsMap SymbolFlags; - for (auto &GV : PromotedGlobals) - SymbolFlags[Mangle(GV->getName())] = - JITSymbolFlags::fromGlobalValue(*GV); - if (auto Err = R.defineMaterializing(SymbolFlags)) { - ES.reportError(std::move(Err)); - R.failMaterialization(); - return; - } - } + // linkages/names, expand the partition to include any required symbols + // (i.e. symbols that can't be separated from our partition), and + // then extract the partition. + // + // FIXME: We apply this promotion once per partitioning. It's safe, but + // overkill. + + auto ExtractedTSM = + TSM.withModuleDo([&](Module &M) -> Expected { + auto PromotedGlobals = PromoteSymbols(M); + if (!PromotedGlobals.empty()) { + MangleAndInterner Mangle(ES, M.getDataLayout()); + SymbolFlagsMap SymbolFlags; + for (auto &GV : PromotedGlobals) + SymbolFlags[Mangle(GV->getName())] = + JITSymbolFlags::fromGlobalValue(*GV); + if (auto Err = R.defineMaterializing(SymbolFlags)) + return std::move(Err); + } + + expandPartition(*GVsToExtract); + + // Extract the requested partiton (plus any necessary aliases) and + // put the rest back into the impl dylib. + auto ShouldExtract = [&](const GlobalValue &GV) -> bool { + return GVsToExtract->count(&GV); + }; + + return extractSubModule(TSM, ".submodule", ShouldExtract); + }); + + if (!ExtractedTSM) { + ES.reportError(ExtractedTSM.takeError()); + R.failMaterialization(); + return; } - expandPartition(*GVsToExtract); - - // Extract the requested partiton (plus any necessary aliases) and - // put the rest back into the impl dylib. - auto ShouldExtract = [&](const GlobalValue &GV) -> bool { - return GVsToExtract->count(&GV); - }; - - auto ExtractedTSM = extractSubModule(TSM, ".submodule", ShouldExtract); - R.replace(llvm::make_unique( + R.replace(std::make_unique( ES, std::move(TSM), R.getVModuleKey(), *this)); - - BaseLayer.emit(std::move(R), std::move(ExtractedTSM)); + BaseLayer.emit(std::move(R), std::move(*ExtractedTSM)); } } // end namespace orc diff --git a/lib/ExecutionEngine/Orc/CompileUtils.cpp b/lib/ExecutionEngine/Orc/CompileUtils.cpp index d46b6fcf9a5..f8251627a4e 100644 --- a/lib/ExecutionEngine/Orc/CompileUtils.cpp +++ b/lib/ExecutionEngine/Orc/CompileUtils.cpp @@ -42,7 +42,7 @@ SimpleCompiler::CompileResult SimpleCompiler::operator()(Module &M) { PM.run(M); } - auto ObjBuffer = llvm::make_unique( + auto ObjBuffer = std::make_unique( std::move(ObjBufferSV), ""); diff --git a/lib/ExecutionEngine/Orc/Core.cpp b/lib/ExecutionEngine/Orc/Core.cpp index dac37e030e0..5c7d888c2d6 100644 --- a/lib/ExecutionEngine/Orc/Core.cpp +++ b/lib/ExecutionEngine/Orc/Core.cpp @@ -151,6 +151,8 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolNameSet &Symbols) { } raw_ostream &operator<<(raw_ostream &OS, const JITSymbolFlags &Flags) { + if (Flags.hasError()) + OS << "[*ERROR*]"; if (Flags.isCallable()) OS << "[Callable]"; else @@ -224,7 +226,7 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolAliasMap &Aliases) { for (auto &KV : Aliases) OS << " " << *KV.first << ": " << KV.second.Aliasee << " " << KV.second.AliasFlags; - OS << " }\n"; + OS << " }"; return OS; } @@ -238,15 +240,18 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolState &S) { return OS << "Materializing"; case SymbolState::Resolved: return OS << "Resolved"; + case SymbolState::Emitted: + return OS << "Emitted"; case SymbolState::Ready: return OS << "Ready"; } llvm_unreachable("Invalid state"); } -FailedToMaterialize::FailedToMaterialize(SymbolNameSet Symbols) +FailedToMaterialize::FailedToMaterialize( + std::shared_ptr Symbols) : Symbols(std::move(Symbols)) { - assert(!this->Symbols.empty() && "Can not fail to resolve an empty set"); + assert(!this->Symbols->empty() && "Can not fail to resolve an empty set"); } std::error_code FailedToMaterialize::convertToErrorCode() const { @@ -254,7 +259,7 @@ std::error_code FailedToMaterialize::convertToErrorCode() const { } void FailedToMaterialize::log(raw_ostream &OS) const { - OS << "Failed to materialize symbols: " << Symbols; + OS << "Failed to materialize symbols: " << *Symbols; } SymbolsNotFound::SymbolsNotFound(SymbolNameSet Symbols) @@ -367,35 +372,35 @@ SymbolNameSet MaterializationResponsibility::getRequestedSymbols() const { return JD.getRequestedSymbols(SymbolFlags); } -void MaterializationResponsibility::notifyResolved(const SymbolMap &Symbols) { +Error MaterializationResponsibility::notifyResolved(const SymbolMap &Symbols) { LLVM_DEBUG({ dbgs() << "In " << JD.getName() << " resolving " << Symbols << "\n"; }); #ifndef NDEBUG for (auto &KV : Symbols) { + auto WeakFlags = JITSymbolFlags::Weak | JITSymbolFlags::Common; auto I = SymbolFlags.find(KV.first); assert(I != SymbolFlags.end() && "Resolving symbol outside this responsibility set"); - if (I->second.isWeak()) - assert(I->second == (KV.second.getFlags() | JITSymbolFlags::Weak) && - "Resolving symbol with incorrect flags"); - else - assert(I->second == KV.second.getFlags() && - "Resolving symbol with incorrect flags"); + assert((KV.second.getFlags() & ~WeakFlags) == (I->second & ~WeakFlags) && + "Resolving symbol with incorrect flags"); } #endif - JD.resolve(Symbols); + return JD.resolve(Symbols); } -void MaterializationResponsibility::notifyEmitted() { +Error MaterializationResponsibility::notifyEmitted() { LLVM_DEBUG({ dbgs() << "In " << JD.getName() << " emitting " << SymbolFlags << "\n"; }); - JD.emit(SymbolFlags); + if (auto Err = JD.emit(SymbolFlags)) + return Err; + SymbolFlags.clear(); + return Error::success(); } Error MaterializationResponsibility::defineMaterializing( @@ -417,12 +422,13 @@ void MaterializationResponsibility::failMaterialization() { << SymbolFlags << "\n"; }); - SymbolNameSet FailedSymbols; - for (auto &KV : SymbolFlags) - FailedSymbols.insert(KV.first); + JITDylib::FailedSymbolsWorklist Worklist; - JD.notifyFailed(FailedSymbols); + for (auto &KV : SymbolFlags) + Worklist.push_back(std::make_pair(&JD, KV.first)); SymbolFlags.clear(); + + JD.notifyFailed(std::move(Worklist)); } void MaterializationResponsibility::replace( @@ -485,8 +491,9 @@ StringRef AbsoluteSymbolsMaterializationUnit::getName() const { void AbsoluteSymbolsMaterializationUnit::materialize( MaterializationResponsibility R) { - R.notifyResolved(Symbols); - R.notifyEmitted(); + // No dependencies, so these calls can't fail. + cantFail(R.notifyResolved(Symbols)); + cantFail(R.notifyEmitted()); } void AbsoluteSymbolsMaterializationUnit::discard(const JITDylib &JD, @@ -625,6 +632,7 @@ void ReExportsMaterializationUnit::materialize( }; auto OnComplete = [QueryInfo](Expected Result) { + auto &ES = QueryInfo->R.getTargetJITDylib().getExecutionSession(); if (Result) { SymbolMap ResolutionMap; for (auto &KV : QueryInfo->Aliases) { @@ -633,10 +641,17 @@ void ReExportsMaterializationUnit::materialize( ResolutionMap[KV.first] = JITEvaluatedSymbol( (*Result)[KV.second.Aliasee].getAddress(), KV.second.AliasFlags); } - QueryInfo->R.notifyResolved(ResolutionMap); - QueryInfo->R.notifyEmitted(); + if (auto Err = QueryInfo->R.notifyResolved(ResolutionMap)) { + ES.reportError(std::move(Err)); + QueryInfo->R.failMaterialization(); + return; + } + if (auto Err = QueryInfo->R.notifyEmitted()) { + ES.reportError(std::move(Err)); + QueryInfo->R.failMaterialization(); + return; + } } else { - auto &ES = QueryInfo->R.getTargetJITDylib().getExecutionSession(); ES.reportError(Result.takeError()); QueryInfo->R.failMaterialization(); } @@ -694,7 +709,7 @@ ReexportsGenerator::ReexportsGenerator(JITDylib &SourceJD, Allow(std::move(Allow)) {} Expected -ReexportsGenerator::operator()(JITDylib &JD, const SymbolNameSet &Names) { +ReexportsGenerator::tryToGenerate(JITDylib &JD, const SymbolNameSet &Names) { orc::SymbolNameSet Added; orc::SymbolAliasMap AliasMap; @@ -716,6 +731,19 @@ ReexportsGenerator::operator()(JITDylib &JD, const SymbolNameSet &Names) { return Added; } +JITDylib::DefinitionGenerator::~DefinitionGenerator() {} + +void JITDylib::removeGenerator(DefinitionGenerator &G) { + ES.runSessionLocked([&]() { + auto I = std::find_if(DefGenerators.begin(), DefGenerators.end(), + [&](const std::unique_ptr &H) { + return H.get() == &G; + }); + assert(I != DefGenerators.end() && "Generator not found"); + DefGenerators.erase(I); + }); +} + Error JITDylib::defineMaterializing(const SymbolFlagsMap &SymbolFlags) { return ES.runSessionLocked([&]() -> Error { std::vector AddedSyms; @@ -823,26 +851,52 @@ void JITDylib::addDependencies(const SymbolStringPtr &Name, assert(Symbols[Name].isInMaterializationPhase() && "Can not add dependencies for a symbol that is not materializing"); - auto &MI = MaterializingInfos[Name]; - assert(!MI.IsEmitted && "Can not add dependencies to an emitted symbol"); + // If Name is already in an error state then just bail out. + if (Symbols[Name].getFlags().hasError()) + return; + auto &MI = MaterializingInfos[Name]; + assert(Symbols[Name].getState() != SymbolState::Emitted && + "Can not add dependencies to an emitted symbol"); + + bool DependsOnSymbolInErrorState = false; + + // Register dependencies, record whether any depenendency is in the error + // state. for (auto &KV : Dependencies) { assert(KV.first && "Null JITDylib in dependency?"); auto &OtherJITDylib = *KV.first; auto &DepsOnOtherJITDylib = MI.UnemittedDependencies[&OtherJITDylib]; for (auto &OtherSymbol : KV.second) { + + // Check the sym entry for the dependency. + auto OtherSymI = OtherJITDylib.Symbols.find(OtherSymbol); + #ifndef NDEBUG - // Assert that this symbol exists and has not been emitted already. - auto SymI = OtherJITDylib.Symbols.find(OtherSymbol); - assert(SymI != OtherJITDylib.Symbols.end() && - (SymI->second.getState() != SymbolState::Ready && - "Dependency on emitted symbol")); + // Assert that this symbol exists and has not reached the ready state + // already. + assert(OtherSymI != OtherJITDylib.Symbols.end() && + (OtherSymI->second.getState() != SymbolState::Ready && + "Dependency on emitted/ready symbol")); #endif + auto &OtherSymEntry = OtherSymI->second; + + // If the dependency is in an error state then note this and continue, + // we will move this symbol to the error state below. + if (OtherSymEntry.getFlags().hasError()) { + DependsOnSymbolInErrorState = true; + continue; + } + + // If the dependency was not in the error state then add it to + // our list of dependencies. + assert(OtherJITDylib.MaterializingInfos.count(OtherSymbol) && + "No MaterializingInfo for dependency"); auto &OtherMI = OtherJITDylib.MaterializingInfos[OtherSymbol]; - if (OtherMI.IsEmitted) + if (OtherSymEntry.getState() == SymbolState::Emitted) transferEmittedNodeDependencies(MI, Name, OtherMI); else if (&OtherJITDylib != this || OtherSymbol != Name) { OtherMI.Dependants[this].insert(Name); @@ -853,63 +907,142 @@ void JITDylib::addDependencies(const SymbolStringPtr &Name, if (DepsOnOtherJITDylib.empty()) MI.UnemittedDependencies.erase(&OtherJITDylib); } + + // If this symbol dependended on any symbols in the error state then move + // this symbol to the error state too. + if (DependsOnSymbolInErrorState) + Symbols[Name].setFlags(Symbols[Name].getFlags() | JITSymbolFlags::HasError); } -void JITDylib::resolve(const SymbolMap &Resolved) { - auto CompletedQueries = ES.runSessionLocked([&, this]() { - AsynchronousSymbolQuerySet CompletedQueries; +Error JITDylib::resolve(const SymbolMap &Resolved) { + SymbolNameSet SymbolsInErrorState; + AsynchronousSymbolQuerySet CompletedQueries; + + ES.runSessionLocked([&, this]() { + struct WorklistEntry { + SymbolTable::iterator SymI; + JITEvaluatedSymbol ResolvedSym; + }; + + std::vector Worklist; + Worklist.reserve(Resolved.size()); + + // Build worklist and check for any symbols in the error state. for (const auto &KV : Resolved) { - auto &Name = KV.first; - auto Sym = KV.second; - auto I = Symbols.find(Name); + assert(!KV.second.getFlags().hasError() && + "Resolution result can not have error flag set"); - assert(I != Symbols.end() && "Symbol not found"); - assert(!I->second.hasMaterializerAttached() && + auto SymI = Symbols.find(KV.first); + + assert(SymI != Symbols.end() && "Symbol not found"); + assert(!SymI->second.hasMaterializerAttached() && "Resolving symbol with materializer attached?"); - assert(I->second.getState() == SymbolState::Materializing && + assert(SymI->second.getState() == SymbolState::Materializing && "Symbol should be materializing"); - assert(I->second.getAddress() == 0 && "Symbol has already been resolved"); + assert(SymI->second.getAddress() == 0 && + "Symbol has already been resolved"); - assert((Sym.getFlags() & ~JITSymbolFlags::Weak) == - (I->second.getFlags() & ~JITSymbolFlags::Weak) && - "Resolved flags should match the declared flags"); + if (SymI->second.getFlags().hasError()) + SymbolsInErrorState.insert(KV.first); + else { + auto Flags = KV.second.getFlags(); + Flags &= ~(JITSymbolFlags::Weak | JITSymbolFlags::Common); + assert(Flags == (SymI->second.getFlags() & + ~(JITSymbolFlags::Weak | JITSymbolFlags::Common)) && + "Resolved flags should match the declared flags"); - // Once resolved, symbols can never be weak. - JITSymbolFlags ResolvedFlags = Sym.getFlags(); - ResolvedFlags &= ~JITSymbolFlags::Weak; - I->second.setAddress(Sym.getAddress()); - I->second.setFlags(ResolvedFlags); - I->second.setState(SymbolState::Resolved); + Worklist.push_back( + {SymI, JITEvaluatedSymbol(KV.second.getAddress(), Flags)}); + } + } + + // If any symbols were in the error state then bail out. + if (!SymbolsInErrorState.empty()) + return; + + while (!Worklist.empty()) { + auto SymI = Worklist.back().SymI; + auto ResolvedSym = Worklist.back().ResolvedSym; + Worklist.pop_back(); + + auto &Name = SymI->first; + + // Resolved symbols can not be weak: discard the weak flag. + JITSymbolFlags ResolvedFlags = ResolvedSym.getFlags(); + SymI->second.setAddress(ResolvedSym.getAddress()); + SymI->second.setFlags(ResolvedFlags); + SymI->second.setState(SymbolState::Resolved); auto &MI = MaterializingInfos[Name]; for (auto &Q : MI.takeQueriesMeeting(SymbolState::Resolved)) { - Q->notifySymbolMetRequiredState(Name, Sym); + Q->notifySymbolMetRequiredState(Name, ResolvedSym); + Q->removeQueryDependence(*this, Name); if (Q->isComplete()) CompletedQueries.insert(std::move(Q)); } } - - return CompletedQueries; }); + assert((SymbolsInErrorState.empty() || CompletedQueries.empty()) && + "Can't fail symbols and completed queries at the same time"); + + // If we failed any symbols then return an error. + if (!SymbolsInErrorState.empty()) { + auto FailedSymbolsDepMap = std::make_shared(); + (*FailedSymbolsDepMap)[this] = std::move(SymbolsInErrorState); + return make_error(std::move(FailedSymbolsDepMap)); + } + + // Otherwise notify all the completed queries. for (auto &Q : CompletedQueries) { assert(Q->isComplete() && "Q not completed"); Q->handleComplete(); } + + return Error::success(); } -void JITDylib::emit(const SymbolFlagsMap &Emitted) { - auto CompletedQueries = ES.runSessionLocked([&, this]() { - AsynchronousSymbolQuerySet CompletedQueries; +Error JITDylib::emit(const SymbolFlagsMap &Emitted) { + AsynchronousSymbolQuerySet CompletedQueries; + SymbolNameSet SymbolsInErrorState; + ES.runSessionLocked([&, this]() { + std::vector Worklist; + + // Scan to build worklist, record any symbols in the erorr state. for (const auto &KV : Emitted) { - const auto &Name = KV.first; + auto &Name = KV.first; + + auto SymI = Symbols.find(Name); + assert(SymI != Symbols.end() && "No symbol table entry for Name"); + + if (SymI->second.getFlags().hasError()) + SymbolsInErrorState.insert(Name); + else + Worklist.push_back(SymI); + } + + // If any symbols were in the error state then bail out. + if (!SymbolsInErrorState.empty()) + return; + + // Otherwise update dependencies and move to the emitted state. + while (!Worklist.empty()) { + auto SymI = Worklist.back(); + Worklist.pop_back(); + + auto &Name = SymI->first; + auto &SymEntry = SymI->second; + + // Move symbol to the emitted state. + assert(SymEntry.getState() == SymbolState::Resolved && + "Emitting from state other than Resolved"); + SymEntry.setState(SymbolState::Emitted); auto MII = MaterializingInfos.find(Name); assert(MII != MaterializingInfos.end() && "Missing MaterializingInfo entry"); - auto &MI = MII->second; // For each dependant, transfer this node's emitted dependencies to @@ -926,8 +1059,12 @@ void JITDylib::emit(const SymbolFlagsMap &Emitted) { auto &DependantMI = DependantMII->second; // Remove the dependant's dependency on this node. + assert(DependantMI.UnemittedDependencies.count(this) && + "Dependant does not have an unemitted dependencies record for " + "this JITDylib"); assert(DependantMI.UnemittedDependencies[this].count(Name) && "Dependant does not count this symbol as a dependency?"); + DependantMI.UnemittedDependencies[this].erase(Name); if (DependantMI.UnemittedDependencies[this].empty()) DependantMI.UnemittedDependencies.erase(this); @@ -936,20 +1073,22 @@ void JITDylib::emit(const SymbolFlagsMap &Emitted) { DependantJD.transferEmittedNodeDependencies(DependantMI, DependantName, MI); + auto DependantSymI = DependantJD.Symbols.find(DependantName); + assert(DependantSymI != DependantJD.Symbols.end() && + "Dependant has no entry in the Symbols table"); + auto &DependantSymEntry = DependantSymI->second; + // If the dependant is emitted and this node was the last of its // unemitted dependencies then the dependant node is now ready, so // notify any pending queries on the dependant node. - if (DependantMI.IsEmitted && + if (DependantSymEntry.getState() == SymbolState::Emitted && DependantMI.UnemittedDependencies.empty()) { assert(DependantMI.Dependants.empty() && "Dependants should be empty by now"); // Since this dependant is now ready, we erase its MaterializingInfo // and update its materializing state. - auto DependantSymI = DependantJD.Symbols.find(DependantName); - assert(DependantSymI != DependantJD.Symbols.end() && - "Dependant has no entry in the Symbols table"); - DependantSymI->second.setState(SymbolState::Ready); + DependantSymEntry.setState(SymbolState::Ready); for (auto &Q : DependantMI.takeQueriesMeeting(SymbolState::Ready)) { Q->notifySymbolMetRequiredState( @@ -963,12 +1102,9 @@ void JITDylib::emit(const SymbolFlagsMap &Emitted) { } } } - MI.Dependants.clear(); - MI.IsEmitted = true; + MI.Dependants.clear(); if (MI.UnemittedDependencies.empty()) { - auto SymI = Symbols.find(Name); - assert(SymI != Symbols.end() && "Symbol has no entry in Symbols table"); SymI->second.setState(SymbolState::Ready); for (auto &Q : MI.takeQueriesMeeting(SymbolState::Ready)) { Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol()); @@ -979,80 +1115,138 @@ void JITDylib::emit(const SymbolFlagsMap &Emitted) { MaterializingInfos.erase(MII); } } - - return CompletedQueries; }); + assert((SymbolsInErrorState.empty() || CompletedQueries.empty()) && + "Can't fail symbols and completed queries at the same time"); + + // If we failed any symbols then return an error. + if (!SymbolsInErrorState.empty()) { + auto FailedSymbolsDepMap = std::make_shared(); + (*FailedSymbolsDepMap)[this] = std::move(SymbolsInErrorState); + return make_error(std::move(FailedSymbolsDepMap)); + } + + // Otherwise notify all the completed queries. for (auto &Q : CompletedQueries) { assert(Q->isComplete() && "Q is not complete"); Q->handleComplete(); } + + return Error::success(); } -void JITDylib::notifyFailed(const SymbolNameSet &FailedSymbols) { +void JITDylib::notifyFailed(FailedSymbolsWorklist Worklist) { + AsynchronousSymbolQuerySet FailedQueries; + auto FailedSymbolsMap = std::make_shared(); - // FIXME: This should fail any transitively dependant symbols too. + // Failing no symbols is a no-op. + if (Worklist.empty()) + return; - auto FailedQueriesToNotify = ES.runSessionLocked([&, this]() { - AsynchronousSymbolQuerySet FailedQueries; - std::vector MIIsToRemove; + auto &ES = Worklist.front().first->getExecutionSession(); - for (auto &Name : FailedSymbols) { - auto I = Symbols.find(Name); - assert(I != Symbols.end() && "Symbol not present in this JITDylib"); - Symbols.erase(I); + ES.runSessionLocked([&]() { + while (!Worklist.empty()) { + assert(Worklist.back().first && "Failed JITDylib can not be null"); + auto &JD = *Worklist.back().first; + auto Name = std::move(Worklist.back().second); + Worklist.pop_back(); - auto MII = MaterializingInfos.find(Name); + (*FailedSymbolsMap)[&JD].insert(Name); - // If we have not created a MaterializingInfo for this symbol yet then - // there is nobody to notify. - if (MII == MaterializingInfos.end()) + assert(JD.Symbols.count(Name) && "No symbol table entry for Name"); + auto &Sym = JD.Symbols[Name]; + + // Move the symbol into the error state. + // Note that this may be redundant: The symbol might already have been + // moved to this state in response to the failure of a dependence. + Sym.setFlags(Sym.getFlags() | JITSymbolFlags::HasError); + + // FIXME: Come up with a sane mapping of state to + // presence-of-MaterializingInfo so that we can assert presence / absence + // here, rather than testing it. + auto MII = JD.MaterializingInfos.find(Name); + + if (MII == JD.MaterializingInfos.end()) continue; - // Remove this symbol from the dependants list of any dependencies. - for (auto &KV : MII->second.UnemittedDependencies) { - auto *DependencyJD = KV.first; - auto &Dependencies = KV.second; - for (auto &DependencyName : Dependencies) { - auto DependencyMII = - DependencyJD->MaterializingInfos.find(DependencyName); - assert(DependencyMII != DependencyJD->MaterializingInfos.end() && - "Unemitted dependency must have a MaterializingInfo entry"); - assert(DependencyMII->second.Dependants.count(this) && - "Dependency's dependants list does not contain this JITDylib"); - assert(DependencyMII->second.Dependants[this].count(Name) && - "Dependency's dependants list does not contain dependant"); - DependencyMII->second.Dependants[this].erase(Name); + auto &MI = MII->second; + + // Move all dependants to the error state and disconnect from them. + for (auto &KV : MI.Dependants) { + auto &DependantJD = *KV.first; + for (auto &DependantName : KV.second) { + assert(DependantJD.Symbols.count(DependantName) && + "No symbol table entry for DependantName"); + auto &DependantSym = DependantJD.Symbols[DependantName]; + DependantSym.setFlags(DependantSym.getFlags() | + JITSymbolFlags::HasError); + + assert(DependantJD.MaterializingInfos.count(DependantName) && + "No MaterializingInfo for dependant"); + auto &DependantMI = DependantJD.MaterializingInfos[DependantName]; + + auto UnemittedDepI = DependantMI.UnemittedDependencies.find(&JD); + assert(UnemittedDepI != DependantMI.UnemittedDependencies.end() && + "No UnemittedDependencies entry for this JITDylib"); + assert(UnemittedDepI->second.count(Name) && + "No UnemittedDependencies entry for this symbol"); + UnemittedDepI->second.erase(Name); + if (UnemittedDepI->second.empty()) + DependantMI.UnemittedDependencies.erase(UnemittedDepI); + + // If this symbol is already in the emitted state then we need to + // take responsibility for failing its queries, so add it to the + // worklist. + if (DependantSym.getState() == SymbolState::Emitted) { + assert(DependantMI.Dependants.empty() && + "Emitted symbol should not have dependants"); + Worklist.push_back(std::make_pair(&DependantJD, DependantName)); + } } } + MI.Dependants.clear(); - // Copy all the queries to the FailedQueries list, then abandon them. - // This has to be a copy, and the copy has to come before the abandon - // operation: Each Q.detach() call will reach back into this - // PendingQueries list to remove Q. - for (auto &Q : MII->second.pendingQueries()) + // Disconnect from all unemitted depenencies. + for (auto &KV : MI.UnemittedDependencies) { + auto &UnemittedDepJD = *KV.first; + for (auto &UnemittedDepName : KV.second) { + auto UnemittedDepMII = + UnemittedDepJD.MaterializingInfos.find(UnemittedDepName); + assert(UnemittedDepMII != UnemittedDepJD.MaterializingInfos.end() && + "Missing MII for unemitted dependency"); + assert(UnemittedDepMII->second.Dependants.count(&JD) && + "JD not listed as a dependant of unemitted dependency"); + assert(UnemittedDepMII->second.Dependants[&JD].count(Name) && + "Name is not listed as a dependant of unemitted dependency"); + UnemittedDepMII->second.Dependants[&JD].erase(Name); + if (UnemittedDepMII->second.Dependants[&JD].empty()) + UnemittedDepMII->second.Dependants.erase(&JD); + } + } + MI.UnemittedDependencies.clear(); + + // Collect queries to be failed for this MII. + for (auto &Q : MII->second.pendingQueries()) { + // Add the query to the list to be failed and detach it. FailedQueries.insert(Q); + Q->detach(); + } - MIIsToRemove.push_back(std::move(MII)); + assert(MI.Dependants.empty() && + "Can not delete MaterializingInfo with dependants still attached"); + assert(MI.UnemittedDependencies.empty() && + "Can not delete MaterializingInfo with unemitted dependencies " + "still attached"); + assert(!MI.hasQueriesPending() && + "Can not delete MaterializingInfo with queries pending"); + JD.MaterializingInfos.erase(MII); } - - // Detach failed queries. - for (auto &Q : FailedQueries) - Q->detach(); - - // Remove the MaterializingInfos. - for (auto &MII : MIIsToRemove) { - assert(!MII->second.hasQueriesPending() && - "Queries remain after symbol was failed"); - - MaterializingInfos.erase(MII); - } - - return FailedQueries; }); - for (auto &Q : FailedQueriesToNotify) - Q->handleFailed(make_error(FailedSymbols)); + for (auto &Q : FailedQueries) + Q->handleFailed(make_error(FailedSymbolsMap)); } void JITDylib::setSearchOrder(JITDylibSearchList NewSearchOrder, @@ -1159,10 +1353,18 @@ Expected JITDylib::lookupFlags(const SymbolNameSet &Names) { if (!Unresolved) return Unresolved.takeError(); - if (DefGenerator && !Unresolved->empty()) { - auto NewDefs = DefGenerator(*this, *Unresolved); + /// Run any definition generators. + for (auto &DG : DefGenerators) { + + // Bail out early if we've resolved everything. + if (Unresolved->empty()) + break; + + // Run this generator. + auto NewDefs = DG->tryToGenerate(*this, *Unresolved); if (!NewDefs) return NewDefs.takeError(); + if (!NewDefs->empty()) { auto Unresolved2 = lookupFlagsImpl(Result, *NewDefs); if (!Unresolved2) @@ -1171,7 +1373,10 @@ Expected JITDylib::lookupFlags(const SymbolNameSet &Names) { assert(Unresolved2->empty() && "All fallback defs should have been found by lookupFlagsImpl"); } - }; + + for (auto &Name : *NewDefs) + Unresolved->erase(Name); + } return Result; }); } @@ -1197,15 +1402,34 @@ Error JITDylib::lodgeQuery(std::shared_ptr &Q, MaterializationUnitList &MUs) { assert(Q && "Query can not be null"); - lodgeQueryImpl(Q, Unresolved, MatchNonExported, MUs); - if (DefGenerator && !Unresolved.empty()) { - auto NewDefs = DefGenerator(*this, Unresolved); + if (auto Err = lodgeQueryImpl(Q, Unresolved, MatchNonExported, MUs)) + return Err; + + // Run any definition generators. + for (auto &DG : DefGenerators) { + + // Bail out early if we have resolved everything. + if (Unresolved.empty()) + break; + + // Run the generator. + auto NewDefs = DG->tryToGenerate(*this, Unresolved); + + // If the generator returns an error then bail out. if (!NewDefs) return NewDefs.takeError(); + + // If the generator was able to generate new definitions for any of the + // unresolved symbols then lodge the query against them. if (!NewDefs->empty()) { for (auto &D : *NewDefs) Unresolved.erase(D); - lodgeQueryImpl(Q, *NewDefs, MatchNonExported, MUs); + + // Lodge query. This can not fail as any new definitions were added + // by the generator under the session locked. Since they can't have + // started materializing yet the can not have failed. + cantFail(lodgeQueryImpl(Q, *NewDefs, MatchNonExported, MUs)); + assert(NewDefs->empty() && "All fallback defs should have been found by lookupImpl"); } @@ -1214,7 +1438,7 @@ Error JITDylib::lodgeQuery(std::shared_ptr &Q, return Error::success(); } -void JITDylib::lodgeQueryImpl( +Error JITDylib::lodgeQueryImpl( std::shared_ptr &Q, SymbolNameSet &Unresolved, bool MatchNonExported, std::vector> &MUs) { @@ -1235,6 +1459,14 @@ void JITDylib::lodgeQueryImpl( // Unresolved set. ToRemove.push_back(Name); + // If we matched against this symbol but it is in the error state then + // bail out and treat it as a failure to materialize. + if (SymI->second.getFlags().hasError()) { + auto FailedSymbolsMap = std::make_shared(); + (*FailedSymbolsMap)[this] = {Name}; + return make_error(std::move(FailedSymbolsMap)); + } + // If this symbol already meets the required state for then notify the // query and continue. if (SymI->second.getState() >= Q->getRequiredState()) { @@ -1277,6 +1509,8 @@ void JITDylib::lodgeQueryImpl( // Remove any symbols that we found. for (auto &Name : ToRemove) Unresolved.erase(Name); + + return Error::success(); } Expected @@ -1292,9 +1526,16 @@ JITDylib::legacyLookup(std::shared_ptr Q, SymbolNameSet Unresolved = std::move(Names); auto Err = ES.runSessionLocked([&, this]() -> Error { QueryComplete = lookupImpl(Q, MUs, Unresolved); - if (DefGenerator && !Unresolved.empty()) { + + // Run any definition generators. + for (auto &DG : DefGenerators) { + + // Bail out early if we have resolved everything. + if (Unresolved.empty()) + break; + assert(!QueryComplete && "query complete but unresolved symbols remain?"); - auto NewDefs = DefGenerator(*this, Unresolved); + auto NewDefs = DG->tryToGenerate(*this, Unresolved); if (!NewDefs) return NewDefs.takeError(); if (!NewDefs->empty()) { @@ -1432,8 +1673,6 @@ void JITDylib::dump(raw_ostream &OS) { OS << " MaterializingInfos entries:\n"; for (auto &KV : MaterializingInfos) { OS << " \"" << *KV.first << "\":\n" - << " IsEmitted = " << (KV.second.IsEmitted ? "true" : "false") - << "\n" << " " << KV.second.pendingQueries().size() << " pending queries: { "; for (const auto &Q : KV.second.pendingQueries()) @@ -1486,13 +1725,6 @@ JITDylib::MaterializingInfo::takeQueriesMeeting(SymbolState RequiredState) { return Result; } -JITDylib::AsynchronousSymbolQueryList -JITDylib::MaterializingInfo::takeAllQueries() { - AsynchronousSymbolQueryList Result; - std::swap(Result, PendingQueries); - return Result; -} - JITDylib::JITDylib(ExecutionSession &ES, std::string Name) : ES(ES), JITDylibName(std::move(Name)) { SearchOrder.push_back({this, true}); diff --git a/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/lib/ExecutionEngine/Orc/ExecutionUtils.cpp index f7fc5f8f179..4a886ac0597 100644 --- a/lib/ExecutionEngine/Orc/ExecutionUtils.cpp +++ b/lib/ExecutionEngine/Orc/ExecutionUtils.cpp @@ -8,6 +8,7 @@ #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" +#include "llvm/ExecutionEngine/Orc/Layer.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" @@ -67,7 +68,7 @@ CtorDtorIterator::Element CtorDtorIterator::operator*() const { } } - ConstantInt *Priority = dyn_cast(CS->getOperand(0)); + auto *Priority = cast(CS->getOperand(0)); Value *Data = CS->getNumOperands() == 3 ? CS->getOperand(2) : nullptr; if (Data && !isa(Data)) Data = nullptr; @@ -87,7 +88,7 @@ iterator_range getDestructors(const Module &M) { } void CtorDtorRunner::add(iterator_range CtorDtors) { - if (empty(CtorDtors)) + if (CtorDtors.empty()) return; MangleAndInterner Mangle( @@ -178,20 +179,20 @@ DynamicLibrarySearchGenerator::DynamicLibrarySearchGenerator( : Dylib(std::move(Dylib)), Allow(std::move(Allow)), GlobalPrefix(GlobalPrefix) {} -Expected +Expected> DynamicLibrarySearchGenerator::Load(const char *FileName, char GlobalPrefix, SymbolPredicate Allow) { std::string ErrMsg; auto Lib = sys::DynamicLibrary::getPermanentLibrary(FileName, &ErrMsg); if (!Lib.isValid()) return make_error(std::move(ErrMsg), inconvertibleErrorCode()); - return DynamicLibrarySearchGenerator(std::move(Lib), GlobalPrefix, - std::move(Allow)); + return std::make_unique( + std::move(Lib), GlobalPrefix, std::move(Allow)); } Expected -DynamicLibrarySearchGenerator::operator()(JITDylib &JD, - const SymbolNameSet &Names) { +DynamicLibrarySearchGenerator::tryToGenerate(JITDylib &JD, + const SymbolNameSet &Names) { orc::SymbolNameSet Added; orc::SymbolMap NewSymbols; @@ -226,5 +227,82 @@ DynamicLibrarySearchGenerator::operator()(JITDylib &JD, return Added; } +Expected> +StaticLibraryDefinitionGenerator::Load(ObjectLayer &L, const char *FileName) { + auto ArchiveBuffer = errorOrToExpected(MemoryBuffer::getFile(FileName)); + + if (!ArchiveBuffer) + return ArchiveBuffer.takeError(); + + return Create(L, std::move(*ArchiveBuffer)); +} + +Expected> +StaticLibraryDefinitionGenerator::Create( + ObjectLayer &L, std::unique_ptr ArchiveBuffer) { + Error Err = Error::success(); + + std::unique_ptr ADG( + new StaticLibraryDefinitionGenerator(L, std::move(ArchiveBuffer), Err)); + + if (Err) + return std::move(Err); + + return std::move(ADG); +} + +Expected +StaticLibraryDefinitionGenerator::tryToGenerate(JITDylib &JD, + const SymbolNameSet &Names) { + + DenseSet> ChildBufferInfos; + SymbolNameSet NewDefs; + + for (const auto &Name : Names) { + auto Child = Archive.findSym(*Name); + if (!Child) + return Child.takeError(); + if (*Child == None) + continue; + auto ChildBuffer = (*Child)->getMemoryBufferRef(); + if (!ChildBuffer) + return ChildBuffer.takeError(); + ChildBufferInfos.insert( + {ChildBuffer->getBuffer(), ChildBuffer->getBufferIdentifier()}); + NewDefs.insert(Name); + } + + for (auto ChildBufferInfo : ChildBufferInfos) { + MemoryBufferRef ChildBufferRef(ChildBufferInfo.first, + ChildBufferInfo.second); + + if (auto Err = + L.add(JD, MemoryBuffer::getMemBuffer(ChildBufferRef), VModuleKey())) + return std::move(Err); + + --UnrealizedObjects; + } + + return NewDefs; +} + +StaticLibraryDefinitionGenerator::StaticLibraryDefinitionGenerator( + ObjectLayer &L, std::unique_ptr ArchiveBuffer, Error &Err) + : L(L), ArchiveBuffer(std::move(ArchiveBuffer)), + Archive(*this->ArchiveBuffer, Err) { + + if (Err) + return; + + Error Err2 = Error::success(); + for (auto _ : Archive.children(Err2)) { + (void)_; + ++UnrealizedObjects; + } + + // No need to check this: We will leave it to the caller. + Err = std::move(Err2); +} + } // End namespace orc. } // End namespace llvm. diff --git a/lib/ExecutionEngine/Orc/IRCompileLayer.cpp b/lib/ExecutionEngine/Orc/IRCompileLayer.cpp index 81dfc02f55b..d311f34179c 100644 --- a/lib/ExecutionEngine/Orc/IRCompileLayer.cpp +++ b/lib/ExecutionEngine/Orc/IRCompileLayer.cpp @@ -22,9 +22,9 @@ void IRCompileLayer::setNotifyCompiled(NotifyCompiledFunction NotifyCompiled) { void IRCompileLayer::emit(MaterializationResponsibility R, ThreadSafeModule TSM) { - assert(TSM.getModule() && "Module must not be null"); + assert(TSM && "Module must not be null"); - if (auto Obj = Compile(*TSM.getModule())) { + if (auto Obj = TSM.withModuleDo(Compile)) { { std::lock_guard Lock(IRLayerMutex); if (NotifyCompiled) diff --git a/lib/ExecutionEngine/Orc/IRTransformLayer.cpp b/lib/ExecutionEngine/Orc/IRTransformLayer.cpp index e3519284613..845ecc71eb8 100644 --- a/lib/ExecutionEngine/Orc/IRTransformLayer.cpp +++ b/lib/ExecutionEngine/Orc/IRTransformLayer.cpp @@ -19,7 +19,7 @@ IRTransformLayer::IRTransformLayer(ExecutionSession &ES, void IRTransformLayer::emit(MaterializationResponsibility R, ThreadSafeModule TSM) { - assert(TSM.getModule() && "Module must not be null"); + assert(TSM && "Module must not be null"); if (auto TransformedTSM = Transform(std::move(TSM), R)) BaseLayer.emit(std::move(R), std::move(*TransformedTSM)); diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index cc3656fe5dc..0295db7633d 100644 --- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -37,8 +37,9 @@ private: void materialize(MaterializationResponsibility R) override { SymbolMap Result; Result[Name] = JITEvaluatedSymbol(Compile(), JITSymbolFlags::Exported); - R.notifyResolved(Result); - R.notifyEmitted(); + // No dependencies, so these calls cannot fail. + cantFail(R.notifyResolved(Result)); + cantFail(R.notifyEmitted()); } void discard(const JITDylib &JD, const SymbolStringPtr &Name) override { @@ -66,7 +67,7 @@ JITCompileCallbackManager::getCompileCallback(CompileFunction Compile) { std::lock_guard Lock(CCMgrMutex); AddrToSymbol[*TrampolineAddr] = CallbackName; cantFail(CallbacksJD.define( - llvm::make_unique( + std::make_unique( std::move(CallbackName), std::move(Compile), ES.allocateVModule()))); return *TrampolineAddr; @@ -119,7 +120,8 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES, return make_error( std::string("No callback manager available for ") + T.str(), inconvertibleErrorCode()); - case Triple::aarch64: { + case Triple::aarch64: + case Triple::aarch64_32: { typedef orc::LocalJITCompileCallbackManager CCMgrT; return CCMgrT::Create(ES, ErrorHandlerAddress); } @@ -162,50 +164,51 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) { switch (T.getArch()) { default: return [](){ - return llvm::make_unique< + return std::make_unique< orc::LocalIndirectStubsManager>(); }; case Triple::aarch64: + case Triple::aarch64_32: return [](){ - return llvm::make_unique< + return std::make_unique< orc::LocalIndirectStubsManager>(); }; case Triple::x86: return [](){ - return llvm::make_unique< + return std::make_unique< orc::LocalIndirectStubsManager>(); }; case Triple::mips: return [](){ - return llvm::make_unique< + return std::make_unique< orc::LocalIndirectStubsManager>(); }; case Triple::mipsel: return [](){ - return llvm::make_unique< + return std::make_unique< orc::LocalIndirectStubsManager>(); }; case Triple::mips64: case Triple::mips64el: return [](){ - return llvm::make_unique< + return std::make_unique< orc::LocalIndirectStubsManager>(); }; case Triple::x86_64: if (T.getOS() == Triple::OSType::Win32) { return [](){ - return llvm::make_unique< + return std::make_unique< orc::LocalIndirectStubsManager>(); }; } else { return [](){ - return llvm::make_unique< + return std::make_unique< orc::LocalIndirectStubsManager>(); }; } diff --git a/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp index df23547a9de..1d3e6db913e 100644 --- a/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp +++ b/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp @@ -8,6 +8,7 @@ #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" +#include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" namespace llvm { @@ -22,7 +23,21 @@ JITTargetMachineBuilder::JITTargetMachineBuilder(Triple TT) Expected JITTargetMachineBuilder::detectHost() { // FIXME: getProcessTriple is bogus. It returns the host LLVM was compiled on, // rather than a valid triple for the current process. - return JITTargetMachineBuilder(Triple(sys::getProcessTriple())); + JITTargetMachineBuilder TMBuilder((Triple(sys::getProcessTriple()))); + + // Retrieve host CPU name and sub-target features and add them to builder. + // Relocation model, code model and codegen opt level are kept to default + // values. + llvm::SubtargetFeatures SubtargetFeatures; + llvm::StringMap FeatureMap; + llvm::sys::getHostCPUFeatures(FeatureMap); + for (auto &Feature : FeatureMap) + SubtargetFeatures.AddFeature(Feature.first(), Feature.second); + + TMBuilder.setCPU(llvm::sys::getHostCPUName()); + TMBuilder.addFeatures(SubtargetFeatures.getFeatures()); + + return TMBuilder; } Expected> diff --git a/lib/ExecutionEngine/Orc/LLJIT.cpp b/lib/ExecutionEngine/Orc/LLJIT.cpp index b120691faf0..a80f78afe80 100644 --- a/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -41,7 +41,8 @@ Error LLJIT::defineAbsolute(StringRef Name, JITEvaluatedSymbol Sym) { Error LLJIT::addIRModule(JITDylib &JD, ThreadSafeModule TSM) { assert(TSM && "Can not add null module"); - if (auto Err = applyDataLayout(*TSM.getModule())) + if (auto Err = + TSM.withModuleDo([&](Module &M) { return applyDataLayout(M); })) return Err; return CompileLayer->add(JD, std::move(TSM), ES->allocateVModule()); @@ -63,12 +64,21 @@ LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) { // If the config state provided an ObjectLinkingLayer factory then use it. if (S.CreateObjectLinkingLayer) - return S.CreateObjectLinkingLayer(ES); + return S.CreateObjectLinkingLayer(ES, S.JTMB->getTargetTriple()); // Otherwise default to creating an RTDyldObjectLinkingLayer that constructs // a new SectionMemoryManager for each object. - auto GetMemMgr = []() { return llvm::make_unique(); }; - return llvm::make_unique(ES, std::move(GetMemMgr)); + auto GetMemMgr = []() { return std::make_unique(); }; + auto ObjLinkingLayer = + std::make_unique(ES, std::move(GetMemMgr)); + + if (S.JTMB->getTargetTriple().isOSBinFormatCOFF()) + ObjLinkingLayer->setOverrideObjectFlagsWithResponsibilityFlags(true); + + // FIXME: Explicit conversion to std::unique_ptr added to silence + // errors from some GCC / libstdc++ bots. Remove this conversion (i.e. + // just return ObjLinkingLayer) once those bots are upgraded. + return std::unique_ptr(std::move(ObjLinkingLayer)); } Expected @@ -92,7 +102,7 @@ LLJIT::createCompileFunction(LLJITBuilderState &S, } LLJIT::LLJIT(LLJITBuilderState &S, Error &Err) - : ES(S.ES ? std::move(S.ES) : llvm::make_unique()), + : ES(S.ES ? std::move(S.ES) : std::make_unique()), Main(this->ES->getMainJITDylib()), DL(""), CtorRunner(Main), DtorRunner(Main) { @@ -113,13 +123,13 @@ LLJIT::LLJIT(LLJITBuilderState &S, Error &Err) Err = CompileFunction.takeError(); return; } - CompileLayer = llvm::make_unique( + CompileLayer = std::make_unique( *ES, *ObjLinkingLayer, std::move(*CompileFunction)); } if (S.NumCompileThreads > 0) { CompileLayer->setCloneToNewContextOnEmit(true); - CompileThreads = llvm::make_unique(S.NumCompileThreads); + CompileThreads = std::make_unique(S.NumCompileThreads); ES->setDispatchMaterialization( [this](JITDylib &JD, std::unique_ptr MU) { // FIXME: Switch to move capture once we have c++14. @@ -166,10 +176,14 @@ Error LLLazyJITBuilderState::prepareForConstruction() { Error LLLazyJIT::addLazyIRModule(JITDylib &JD, ThreadSafeModule TSM) { assert(TSM && "Can not add null module"); - if (auto Err = applyDataLayout(*TSM.getModule())) - return Err; + if (auto Err = TSM.withModuleDo([&](Module &M) -> Error { + if (auto Err = applyDataLayout(M)) + return Err; - recordCtorDtors(*TSM.getModule()); + recordCtorDtors(M); + return Error::success(); + })) + return Err; return CODLayer->add(JD, std::move(TSM), ES->allocateVModule()); } @@ -212,10 +226,10 @@ LLLazyJIT::LLLazyJIT(LLLazyJITBuilderState &S, Error &Err) : LLJIT(S, Err) { } // Create the transform layer. - TransformLayer = llvm::make_unique(*ES, *CompileLayer); + TransformLayer = std::make_unique(*ES, *CompileLayer); // Create the COD layer. - CODLayer = llvm::make_unique( + CODLayer = std::make_unique( *ES, *TransformLayer, *LCTMgr, std::move(ISMBuilder)); if (S.NumCompileThreads > 0) diff --git a/lib/ExecutionEngine/Orc/Layer.cpp b/lib/ExecutionEngine/Orc/Layer.cpp index 3ed2dabf454..580e2682ec8 100644 --- a/lib/ExecutionEngine/Orc/Layer.cpp +++ b/lib/ExecutionEngine/Orc/Layer.cpp @@ -19,7 +19,7 @@ IRLayer::IRLayer(ExecutionSession &ES) : ES(ES) {} IRLayer::~IRLayer() {} Error IRLayer::add(JITDylib &JD, ThreadSafeModule TSM, VModuleKey K) { - return JD.define(llvm::make_unique( + return JD.define(std::make_unique( *this, std::move(K), std::move(TSM))); } @@ -29,15 +29,17 @@ IRMaterializationUnit::IRMaterializationUnit(ExecutionSession &ES, assert(this->TSM && "Module must not be null"); - MangleAndInterner Mangle(ES, this->TSM.getModule()->getDataLayout()); - for (auto &G : this->TSM.getModule()->global_values()) { - if (G.hasName() && !G.isDeclaration() && !G.hasLocalLinkage() && - !G.hasAvailableExternallyLinkage() && !G.hasAppendingLinkage()) { - auto MangledName = Mangle(G.getName()); - SymbolFlags[MangledName] = JITSymbolFlags::fromGlobalValue(G); - SymbolToDefinition[MangledName] = &G; + MangleAndInterner Mangle(ES, this->TSM.getModuleUnlocked()->getDataLayout()); + this->TSM.withModuleDo([&](Module &M) { + for (auto &G : M.global_values()) { + if (G.hasName() && !G.isDeclaration() && !G.hasLocalLinkage() && + !G.hasAvailableExternallyLinkage() && !G.hasAppendingLinkage()) { + auto MangledName = Mangle(G.getName()); + SymbolFlags[MangledName] = JITSymbolFlags::fromGlobalValue(G); + SymbolToDefinition[MangledName] = &G; + } } - } + }); } IRMaterializationUnit::IRMaterializationUnit( @@ -47,8 +49,9 @@ IRMaterializationUnit::IRMaterializationUnit( TSM(std::move(TSM)), SymbolToDefinition(std::move(SymbolToDefinition)) {} StringRef IRMaterializationUnit::getName() const { - if (TSM.getModule()) - return TSM.getModule()->getModuleIdentifier(); + if (TSM) + return TSM.withModuleDo( + [](const Module &M) -> StringRef { return M.getModuleIdentifier(); }); return ""; } @@ -90,7 +93,6 @@ void BasicIRLayerMaterializationUnit::materialize( auto &N = R.getTargetJITDylib().getName(); #endif // NDEBUG - auto Lock = TSM.getContextLock(); LLVM_DEBUG(ES.runSessionLocked( [&]() { dbgs() << "Emitting, for " << N << ", " << *this << "\n"; });); L.emit(std::move(R), std::move(TSM)); diff --git a/lib/ExecutionEngine/Orc/LazyReexports.cpp b/lib/ExecutionEngine/Orc/LazyReexports.cpp index fc820584565..93aabd817d6 100644 --- a/lib/ExecutionEngine/Orc/LazyReexports.cpp +++ b/lib/ExecutionEngine/Orc/LazyReexports.cpp @@ -50,7 +50,6 @@ LazyCallThroughManager::callThroughToSymbol(JITTargetAddress TrampolineAddr) { SourceJD = I->second.first; SymbolName = I->second.second; } - auto LookupResult = ES.lookup(JITDylibSearchList({{SourceJD, true}}), SymbolName); @@ -91,6 +90,7 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES, inconvertibleErrorCode()); case Triple::aarch64: + case Triple::aarch64_32: return LocalLazyCallThroughManager::Create(ES, ErrorHandlerAddr); @@ -121,7 +121,8 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES, LazyReexportsMaterializationUnit::LazyReexportsMaterializationUnit( LazyCallThroughManager &LCTManager, IndirectStubsManager &ISManager, - JITDylib &SourceJD, SymbolAliasMap CallableAliases, VModuleKey K) + JITDylib &SourceJD, SymbolAliasMap CallableAliases, ImplSymbolMap *SrcJDLoc, + VModuleKey K) : MaterializationUnit(extractFlags(CallableAliases), std::move(K)), LCTManager(LCTManager), ISManager(ISManager), SourceJD(SourceJD), CallableAliases(std::move(CallableAliases)), @@ -129,7 +130,8 @@ LazyReexportsMaterializationUnit::LazyReexportsMaterializationUnit( [&ISManager](JITDylib &JD, const SymbolStringPtr &SymbolName, JITTargetAddress ResolvedAddr) { return ISManager.updatePointer(*SymbolName, ResolvedAddr); - })) {} + })), + AliaseeTable(SrcJDLoc) {} StringRef LazyReexportsMaterializationUnit::getName() const { return ""; @@ -149,7 +151,7 @@ void LazyReexportsMaterializationUnit::materialize( if (!CallableAliases.empty()) R.replace(lazyReexports(LCTManager, ISManager, SourceJD, - std::move(CallableAliases))); + std::move(CallableAliases), AliaseeTable)); IndirectStubsManager::StubInitsMap StubInits; for (auto &Alias : RequestedAliases) { @@ -168,6 +170,9 @@ void LazyReexportsMaterializationUnit::materialize( std::make_pair(*CallThroughTrampoline, Alias.second.AliasFlags); } + if (AliaseeTable != nullptr && !RequestedAliases.empty()) + AliaseeTable->trackImpls(RequestedAliases, &SourceJD); + if (auto Err = ISManager.createStubs(StubInits)) { SourceJD.getExecutionSession().reportError(std::move(Err)); R.failMaterialization(); @@ -178,8 +183,9 @@ void LazyReexportsMaterializationUnit::materialize( for (auto &Alias : RequestedAliases) Stubs[Alias.first] = ISManager.findStub(*Alias.first, false); - R.notifyResolved(Stubs); - R.notifyEmitted(); + // No registered dependencies, so these calls cannot fail. + cantFail(R.notifyResolved(Stubs)); + cantFail(R.notifyEmitted()); } void LazyReexportsMaterializationUnit::discard(const JITDylib &JD, diff --git a/lib/ExecutionEngine/Orc/Legacy.cpp b/lib/ExecutionEngine/Orc/Legacy.cpp index ce6368b57a8..9f9a6730b2c 100644 --- a/lib/ExecutionEngine/Orc/Legacy.cpp +++ b/lib/ExecutionEngine/Orc/Legacy.cpp @@ -23,7 +23,8 @@ void JITSymbolResolverAdapter::lookup(const LookupSet &Symbols, for (auto &S : Symbols) InternedSymbols.insert(ES.intern(S)); - auto OnResolvedWithUnwrap = [OnResolved](Expected InternedResult) { + auto OnResolvedWithUnwrap = [OnResolved = std::move(OnResolved)]( + Expected InternedResult) mutable { if (!InternedResult) { OnResolved(InternedResult.takeError()); return; @@ -36,7 +37,7 @@ void JITSymbolResolverAdapter::lookup(const LookupSet &Symbols, }; auto Q = std::make_shared( - InternedSymbols, SymbolState::Resolved, OnResolvedWithUnwrap); + InternedSymbols, SymbolState::Resolved, std::move(OnResolvedWithUnwrap)); auto Unresolved = R.lookup(Q, InternedSymbols); if (Unresolved.empty()) { diff --git a/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index def0b300eca..874decb2ade 100644 --- a/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -29,6 +29,13 @@ public: std::unique_ptr ObjBuffer) : Layer(Layer), MR(std::move(MR)), ObjBuffer(std::move(ObjBuffer)) {} + ~ObjectLinkingLayerJITLinkContext() { + // If there is an object buffer return function then use it to + // return ownership of the buffer. + if (Layer.ReturnObjectBuffer) + Layer.ReturnObjectBuffer(std::move(ObjBuffer)); + } + JITLinkMemoryManager &getMemoryManager() override { return Layer.MemMgr; } MemoryBufferRef getObjectBuffer() const override { @@ -41,7 +48,7 @@ public: } void lookup(const DenseSet &Symbols, - JITLinkAsyncLookupContinuation LookupContinuation) override { + std::unique_ptr LC) override { JITDylibSearchList SearchOrder; MR.getTargetJITDylib().withSearchOrderDo( @@ -54,18 +61,16 @@ public: InternedSymbols.insert(ES.intern(S)); // OnResolve -- De-intern the symbols and pass the result to the linker. - // FIXME: Capture LookupContinuation by move once we have c++14. - auto SharedLookupContinuation = - std::make_shared( - std::move(LookupContinuation)); - auto OnResolve = [SharedLookupContinuation](Expected Result) { + auto OnResolve = [this, LookupContinuation = std::move(LC)]( + Expected Result) mutable { + auto Main = Layer.getExecutionSession().intern("_main"); if (!Result) - (*SharedLookupContinuation)(Result.takeError()); + LookupContinuation->run(Result.takeError()); else { AsyncLookupResult LR; for (auto &KV : *Result) LR[*KV.first] = KV.second; - (*SharedLookupContinuation)(std::move(LR)); + LookupContinuation->run(std::move(LR)); } }; @@ -75,29 +80,25 @@ public: }); } - void notifyResolved(AtomGraph &G) override { + void notifyResolved(LinkGraph &G) override { auto &ES = Layer.getExecutionSession(); SymbolFlagsMap ExtraSymbolsToClaim; bool AutoClaim = Layer.AutoClaimObjectSymbols; SymbolMap InternedResult; - for (auto *DA : G.defined_atoms()) - if (DA->hasName() && DA->isGlobal()) { - auto InternedName = ES.intern(DA->getName()); + for (auto *Sym : G.defined_symbols()) + if (Sym->hasName() && Sym->getScope() != Scope::Local) { + auto InternedName = ES.intern(Sym->getName()); JITSymbolFlags Flags; - if (DA->isExported()) - Flags |= JITSymbolFlags::Exported; - if (DA->isWeak()) - Flags |= JITSymbolFlags::Weak; - if (DA->isCallable()) + if (Sym->isCallable()) Flags |= JITSymbolFlags::Callable; - if (DA->isCommon()) - Flags |= JITSymbolFlags::Common; + if (Sym->getScope() == Scope::Default) + Flags |= JITSymbolFlags::Exported; InternedResult[InternedName] = - JITEvaluatedSymbol(DA->getAddress(), Flags); + JITEvaluatedSymbol(Sym->getAddress(), Flags); if (AutoClaim && !MR.getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); @@ -105,17 +106,17 @@ public: } } - for (auto *A : G.absolute_atoms()) - if (A->hasName()) { - auto InternedName = ES.intern(A->getName()); + for (auto *Sym : G.absolute_symbols()) + if (Sym->hasName()) { + auto InternedName = ES.intern(Sym->getName()); JITSymbolFlags Flags; Flags |= JITSymbolFlags::Absolute; - if (A->isWeak()) - Flags |= JITSymbolFlags::Weak; - if (A->isCallable()) + if (Sym->isCallable()) Flags |= JITSymbolFlags::Callable; + if (Sym->getLinkage() == Linkage::Weak) + Flags |= JITSymbolFlags::Weak; InternedResult[InternedName] = - JITEvaluatedSymbol(A->getAddress(), Flags); + JITEvaluatedSymbol(Sym->getAddress(), Flags); if (AutoClaim && !MR.getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); @@ -126,35 +127,38 @@ public: if (!ExtraSymbolsToClaim.empty()) if (auto Err = MR.defineMaterializing(ExtraSymbolsToClaim)) return notifyFailed(std::move(Err)); - - MR.notifyResolved(InternedResult); - + if (auto Err = MR.notifyResolved(InternedResult)) { + Layer.getExecutionSession().reportError(std::move(Err)); + MR.failMaterialization(); + return; + } Layer.notifyLoaded(MR); } void notifyFinalized( std::unique_ptr A) override { - if (auto Err = Layer.notifyEmitted(MR, std::move(A))) { Layer.getExecutionSession().reportError(std::move(Err)); MR.failMaterialization(); - return; } - MR.notifyEmitted(); + if (auto Err = MR.notifyEmitted()) { + Layer.getExecutionSession().reportError(std::move(Err)); + MR.failMaterialization(); + } } - AtomGraphPassFunction getMarkLivePass(const Triple &TT) const override { - return [this](AtomGraph &G) { return markResponsibilitySymbolsLive(G); }; + LinkGraphPassFunction getMarkLivePass(const Triple &TT) const override { + return [this](LinkGraph &G) { return markResponsibilitySymbolsLive(G); }; } Error modifyPassConfig(const Triple &TT, PassConfiguration &Config) override { // Add passes to mark duplicate defs as should-discard, and to walk the - // atom graph to build the symbol dependence graph. + // link graph to build the symbol dependence graph. Config.PrePrunePasses.push_back( - [this](AtomGraph &G) { return markSymbolsToDiscard(G); }); + [this](LinkGraph &G) { return externalizeWeakAndCommonSymbols(G); }); Config.PostPrunePasses.push_back( - [this](AtomGraph &G) { return computeNamedSymbolDependencies(G); }); + [this](LinkGraph &G) { return computeNamedSymbolDependencies(G); }); Layer.modifyPassConfig(MR, TT, Config); @@ -162,65 +166,59 @@ public: } private: - using AnonAtomNamedDependenciesMap = - DenseMap; + using AnonToNamedDependenciesMap = DenseMap; - Error markSymbolsToDiscard(AtomGraph &G) { + Error externalizeWeakAndCommonSymbols(LinkGraph &G) { auto &ES = Layer.getExecutionSession(); - for (auto *DA : G.defined_atoms()) - if (DA->isWeak() && DA->hasName()) { - auto S = ES.intern(DA->getName()); - auto I = MR.getSymbols().find(S); - if (I == MR.getSymbols().end()) - DA->setShouldDiscard(true); + for (auto *Sym : G.defined_symbols()) + if (Sym->hasName() && Sym->getLinkage() == Linkage::Weak) { + if (!MR.getSymbols().count(ES.intern(Sym->getName()))) + G.makeExternal(*Sym); } - for (auto *A : G.absolute_atoms()) - if (A->isWeak() && A->hasName()) { - auto S = ES.intern(A->getName()); - auto I = MR.getSymbols().find(S); - if (I == MR.getSymbols().end()) - A->setShouldDiscard(true); + for (auto *Sym : G.absolute_symbols()) + if (Sym->hasName() && Sym->getLinkage() == Linkage::Weak) { + if (!MR.getSymbols().count(ES.intern(Sym->getName()))) + G.makeExternal(*Sym); } return Error::success(); } - Error markResponsibilitySymbolsLive(AtomGraph &G) const { + Error markResponsibilitySymbolsLive(LinkGraph &G) const { auto &ES = Layer.getExecutionSession(); - for (auto *DA : G.defined_atoms()) - if (DA->hasName() && - MR.getSymbols().count(ES.intern(DA->getName()))) - DA->setLive(true); + for (auto *Sym : G.defined_symbols()) + if (Sym->hasName() && MR.getSymbols().count(ES.intern(Sym->getName()))) + Sym->setLive(true); return Error::success(); } - Error computeNamedSymbolDependencies(AtomGraph &G) { + Error computeNamedSymbolDependencies(LinkGraph &G) { auto &ES = MR.getTargetJITDylib().getExecutionSession(); auto AnonDeps = computeAnonDeps(G); - for (auto *DA : G.defined_atoms()) { + for (auto *Sym : G.defined_symbols()) { // Skip anonymous and non-global atoms: we do not need dependencies for // these. - if (!DA->hasName() || !DA->isGlobal()) + if (Sym->getScope() == Scope::Local) continue; - auto DAName = ES.intern(DA->getName()); - SymbolNameSet &DADeps = NamedSymbolDeps[DAName]; + auto SymName = ES.intern(Sym->getName()); + SymbolNameSet &SymDeps = NamedSymbolDeps[SymName]; - for (auto &E : DA->edges()) { - auto &TA = E.getTarget(); + for (auto &E : Sym->getBlock().edges()) { + auto &TargetSym = E.getTarget(); - if (TA.hasName()) - DADeps.insert(ES.intern(TA.getName())); + if (TargetSym.getScope() != Scope::Local) + SymDeps.insert(ES.intern(TargetSym.getName())); else { - assert(TA.isDefined() && "Anonymous atoms must be defined"); - auto &DTA = static_cast(TA); - auto I = AnonDeps.find(&DTA); + assert(TargetSym.isDefined() && + "Anonymous/local symbols must be defined"); + auto I = AnonDeps.find(&TargetSym); if (I != AnonDeps.end()) for (auto &S : I->second) - DADeps.insert(S); + SymDeps.insert(S); } } } @@ -228,58 +226,59 @@ private: return Error::success(); } - AnonAtomNamedDependenciesMap computeAnonDeps(AtomGraph &G) { + AnonToNamedDependenciesMap computeAnonDeps(LinkGraph &G) { auto &ES = MR.getTargetJITDylib().getExecutionSession(); - AnonAtomNamedDependenciesMap DepMap; + AnonToNamedDependenciesMap DepMap; - // For all anonymous atoms: + // For all anonymous symbols: // (1) Add their named dependencies. // (2) Add them to the worklist for further iteration if they have any - // depend on any other anonymous atoms. + // depend on any other anonymous symbols. struct WorklistEntry { - WorklistEntry(DefinedAtom *DA, DenseSet DAAnonDeps) - : DA(DA), DAAnonDeps(std::move(DAAnonDeps)) {} + WorklistEntry(Symbol *Sym, DenseSet SymAnonDeps) + : Sym(Sym), SymAnonDeps(std::move(SymAnonDeps)) {} - DefinedAtom *DA = nullptr; - DenseSet DAAnonDeps; + Symbol *Sym = nullptr; + DenseSet SymAnonDeps; }; std::vector Worklist; - for (auto *DA : G.defined_atoms()) - if (!DA->hasName()) { - auto &DANamedDeps = DepMap[DA]; - DenseSet DAAnonDeps; + for (auto *Sym : G.defined_symbols()) + if (!Sym->hasName()) { + auto &SymNamedDeps = DepMap[Sym]; + DenseSet SymAnonDeps; - for (auto &E : DA->edges()) { - auto &TA = E.getTarget(); - if (TA.hasName()) - DANamedDeps.insert(ES.intern(TA.getName())); + for (auto &E : Sym->getBlock().edges()) { + auto &TargetSym = E.getTarget(); + if (TargetSym.hasName()) + SymNamedDeps.insert(ES.intern(TargetSym.getName())); else { - assert(TA.isDefined() && "Anonymous atoms must be defined"); - DAAnonDeps.insert(static_cast(&TA)); + assert(TargetSym.isDefined() && + "Anonymous symbols must be defined"); + SymAnonDeps.insert(&TargetSym); } } - if (!DAAnonDeps.empty()) - Worklist.push_back(WorklistEntry(DA, std::move(DAAnonDeps))); + if (!SymAnonDeps.empty()) + Worklist.push_back(WorklistEntry(Sym, std::move(SymAnonDeps))); } - // Loop over all anonymous atoms with anonymous dependencies, propagating + // Loop over all anonymous symbols with anonymous dependencies, propagating // their respective *named* dependencies. Iterate until we hit a stable // state. bool Changed; do { Changed = false; for (auto &WLEntry : Worklist) { - auto *DA = WLEntry.DA; - auto &DANamedDeps = DepMap[DA]; - auto &DAAnonDeps = WLEntry.DAAnonDeps; + auto *Sym = WLEntry.Sym; + auto &SymNamedDeps = DepMap[Sym]; + auto &SymAnonDeps = WLEntry.SymAnonDeps; - for (auto *TA : DAAnonDeps) { - auto I = DepMap.find(TA); + for (auto *TargetSym : SymAnonDeps) { + auto I = DepMap.find(TargetSym); if (I != DepMap.end()) for (const auto &S : I->second) - Changed |= DANamedDeps.insert(S).second; + Changed |= SymNamedDeps.insert(S).second; } } } while (Changed); @@ -330,7 +329,7 @@ ObjectLinkingLayer::~ObjectLinkingLayer() { void ObjectLinkingLayer::emit(MaterializationResponsibility R, std::unique_ptr O) { assert(O && "Object must not be null"); - jitLink(llvm::make_unique( + jitLink(std::make_unique( *this, std::move(R), std::move(O))); } @@ -410,7 +409,7 @@ Error ObjectLinkingLayer::removeAllModules() { } EHFrameRegistrationPlugin::EHFrameRegistrationPlugin( - jitlink::EHFrameRegistrar &Registrar) + EHFrameRegistrar &Registrar) : Registrar(Registrar) {} void EHFrameRegistrationPlugin::modifyPassConfig( @@ -419,61 +418,66 @@ void EHFrameRegistrationPlugin::modifyPassConfig( assert(!InProcessLinks.count(&MR) && "Link for MR already being tracked?"); PassConfig.PostFixupPasses.push_back( - createEHFrameRecorderPass(TT, [this, &MR](JITTargetAddress Addr) { + createEHFrameRecorderPass(TT, [this, &MR](JITTargetAddress Addr, + size_t Size) { if (Addr) - InProcessLinks[&MR] = Addr; + InProcessLinks[&MR] = { Addr, Size }; })); } Error EHFrameRegistrationPlugin::notifyEmitted( MaterializationResponsibility &MR) { - auto EHFrameAddrItr = InProcessLinks.find(&MR); - if (EHFrameAddrItr == InProcessLinks.end()) + auto EHFrameRangeItr = InProcessLinks.find(&MR); + if (EHFrameRangeItr == InProcessLinks.end()) return Error::success(); - auto EHFrameAddr = EHFrameAddrItr->second; - assert(EHFrameAddr && "eh-frame addr to register can not be null"); + auto EHFrameRange = EHFrameRangeItr->second; + assert(EHFrameRange.Addr && + "eh-frame addr to register can not be null"); - InProcessLinks.erase(EHFrameAddrItr); + InProcessLinks.erase(EHFrameRangeItr); if (auto Key = MR.getVModuleKey()) - TrackedEHFrameAddrs[Key] = EHFrameAddr; + TrackedEHFrameRanges[Key] = EHFrameRange; else - UntrackedEHFrameAddrs.push_back(EHFrameAddr); + UntrackedEHFrameRanges.push_back(EHFrameRange); - return Registrar.registerEHFrames(EHFrameAddr); + return Registrar.registerEHFrames(EHFrameRange.Addr, EHFrameRange.Size); } Error EHFrameRegistrationPlugin::notifyRemovingModule(VModuleKey K) { - auto EHFrameAddrItr = TrackedEHFrameAddrs.find(K); - if (EHFrameAddrItr == TrackedEHFrameAddrs.end()) + auto EHFrameRangeItr = TrackedEHFrameRanges.find(K); + if (EHFrameRangeItr == TrackedEHFrameRanges.end()) return Error::success(); - auto EHFrameAddr = EHFrameAddrItr->second; - assert(EHFrameAddr && "Tracked eh-frame addr must not be null"); + auto EHFrameRange = EHFrameRangeItr->second; + assert(EHFrameRange.Addr && "Tracked eh-frame range must not be null"); - TrackedEHFrameAddrs.erase(EHFrameAddrItr); + TrackedEHFrameRanges.erase(EHFrameRangeItr); - return Registrar.deregisterEHFrames(EHFrameAddr); + return Registrar.deregisterEHFrames(EHFrameRange.Addr, EHFrameRange.Size); } Error EHFrameRegistrationPlugin::notifyRemovingAllModules() { - std::vector EHFrameAddrs = std::move(UntrackedEHFrameAddrs); - EHFrameAddrs.reserve(EHFrameAddrs.size() + TrackedEHFrameAddrs.size()); + std::vector EHFrameRanges = + std::move(UntrackedEHFrameRanges); + EHFrameRanges.reserve(EHFrameRanges.size() + TrackedEHFrameRanges.size()); - for (auto &KV : TrackedEHFrameAddrs) - EHFrameAddrs.push_back(KV.second); + for (auto &KV : TrackedEHFrameRanges) + EHFrameRanges.push_back(KV.second); - TrackedEHFrameAddrs.clear(); + TrackedEHFrameRanges.clear(); Error Err = Error::success(); - while (!EHFrameAddrs.empty()) { - auto EHFrameAddr = EHFrameAddrs.back(); - assert(EHFrameAddr && "Untracked eh-frame addr must not be null"); - EHFrameAddrs.pop_back(); - Err = joinErrors(std::move(Err), Registrar.deregisterEHFrames(EHFrameAddr)); + while (!EHFrameRanges.empty()) { + auto EHFrameRange = EHFrameRanges.back(); + assert(EHFrameRange.Addr && "Untracked eh-frame range must not be null"); + EHFrameRanges.pop_back(); + Err = joinErrors(std::move(Err), + Registrar.deregisterEHFrames(EHFrameRange.Addr, + EHFrameRange.Size)); } return Err; diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h index 98129e1690d..e0af3df9d01 100644 --- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h +++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h @@ -97,7 +97,7 @@ public: template std::unique_ptr> createGenericLayer(LayerT &Layer) { - return llvm::make_unique>(Layer); + return std::make_unique>(Layer); } } // end namespace detail @@ -316,7 +316,8 @@ public: if (auto Err = CtorRunner.runViaLayer(*this)) return std::move(Err); - IRStaticDestructorRunners.emplace_back(std::move(DtorNames), K); + IRStaticDestructorRunners.emplace_back(AcknowledgeORCv1Deprecation, + std::move(DtorNames), K); return K; } @@ -326,7 +327,7 @@ public: LLVMOrcSymbolResolverFn ExternalResolver, void *ExternalResolverCtx) { return addIRModule(CompileLayer, std::move(M), - llvm::make_unique(), + std::make_unique(), std::move(ExternalResolver), ExternalResolverCtx); } @@ -340,7 +341,7 @@ public: inconvertibleErrorCode()); return addIRModule(*CODLayer, std::move(M), - llvm::make_unique(), + std::make_unique(), std::move(ExternalResolver), ExternalResolverCtx); } @@ -468,7 +469,7 @@ private: if (!CCMgr) return nullptr; - return llvm::make_unique( + return std::make_unique( AcknowledgeORCv1Deprecation, ES, CompileLayer, [&Resolvers](orc::VModuleKey K) { auto ResolverI = Resolvers.find(K); diff --git a/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp index b22ecd5f80a..939cd539d1f 100644 --- a/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp +++ b/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp @@ -27,9 +27,9 @@ public: // Build an OnResolve callback to unwrap the interned strings and pass them // to the OnResolved callback. - // FIXME: Switch to move capture of OnResolved once we have c++14. auto OnResolvedWithUnwrap = - [OnResolved](Expected InternedResult) { + [OnResolved = std::move(OnResolved)]( + Expected InternedResult) mutable { if (!InternedResult) { OnResolved(InternedResult.takeError()); return; @@ -50,7 +50,7 @@ public: MR.getTargetJITDylib().withSearchOrderDo( [&](const JITDylibSearchList &JDs) { SearchOrder = JDs; }); ES.lookup(SearchOrder, InternedSymbols, SymbolState::Resolved, - OnResolvedWithUnwrap, RegisterDependencies); + std::move(OnResolvedWithUnwrap), RegisterDependencies); } Expected getResponsibilitySet(const LookupSet &Symbols) { @@ -133,8 +133,6 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R, JITDylibSearchOrderResolver Resolver(*SharedR); - // FIXME: Switch to move-capture for the 'O' buffer once we have c++14. - MemoryBuffer *UnownedObjBuffer = O.release(); jitLinkForORC( **Obj, std::move(O), *MemMgr, Resolver, ProcessAllSections, [this, K, SharedR, &Obj, InternalSymbols]( @@ -143,9 +141,8 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R, return onObjLoad(K, *SharedR, **Obj, std::move(LoadedObjInfo), ResolvedSymbols, *InternalSymbols); }, - [this, K, SharedR, UnownedObjBuffer](Error Err) { - std::unique_ptr ObjBuffer(UnownedObjBuffer); - onObjEmit(K, std::move(ObjBuffer), *SharedR, std::move(Err)); + [this, K, SharedR, O = std::move(O)](Error Err) mutable { + onObjEmit(K, std::move(O), *SharedR, std::move(Err)); }); } @@ -184,7 +181,10 @@ Error RTDyldObjectLinkingLayer::onObjLoad( if (auto Err = R.defineMaterializing(ExtraSymbolsToClaim)) return Err; - R.notifyResolved(Symbols); + if (auto Err = R.notifyResolved(Symbols)) { + R.failMaterialization(); + return Err; + } if (NotifyLoaded) NotifyLoaded(K, Obj, *LoadedObjInfo); @@ -201,7 +201,11 @@ void RTDyldObjectLinkingLayer::onObjEmit( return; } - R.notifyEmitted(); + if (auto Err = R.notifyEmitted()) { + getExecutionSession().reportError(std::move(Err)); + R.failMaterialization(); + return; + } if (NotifyEmitted) NotifyEmitted(K, std::move(ObjBuffer)); diff --git a/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp b/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp new file mode 100644 index 00000000000..f22acf50419 --- /dev/null +++ b/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp @@ -0,0 +1,307 @@ +//===-- SpeculateAnalyses.cpp --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/SpeculateAnalyses.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/ErrorHandling.h" + +#include + +namespace { +using namespace llvm; +SmallVector findBBwithCalls(const Function &F, + bool IndirectCall = false) { + SmallVector BBs; + + auto findCallInst = [&IndirectCall](const Instruction &I) { + if (auto Call = dyn_cast(&I)) + return Call->isIndirectCall() ? IndirectCall : true; + else + return false; + }; + for (auto &BB : F) + if (findCallInst(*BB.getTerminator()) || + llvm::any_of(BB.instructionsWithoutDebug(), findCallInst)) + BBs.emplace_back(&BB); + + return BBs; +} +} // namespace + +// Implementations of Queries shouldn't need to lock the resources +// such as LLVMContext, each argument (function) has a non-shared LLVMContext +// Plus, if Queries contain states necessary locking scheme should be provided. +namespace llvm { +namespace orc { + +// Collect direct calls only +void SpeculateQuery::findCalles(const BasicBlock *BB, + DenseSet &CallesNames) { + assert(BB != nullptr && "Traversing Null BB to find calls?"); + + auto getCalledFunction = [&CallesNames](const CallBase *Call) { + auto CalledValue = Call->getCalledOperand()->stripPointerCasts(); + if (auto DirectCall = dyn_cast(CalledValue)) + CallesNames.insert(DirectCall->getName()); + }; + for (auto &I : BB->instructionsWithoutDebug()) + if (auto CI = dyn_cast(&I)) + getCalledFunction(CI); + + if (auto II = dyn_cast(BB->getTerminator())) + getCalledFunction(II); +} + +bool SpeculateQuery::isStraightLine(const Function &F) { + return llvm::all_of(F.getBasicBlockList(), [](const BasicBlock &BB) { + return BB.getSingleSuccessor() != nullptr; + }); +} + +// BlockFreqQuery Implementations + +size_t BlockFreqQuery::numBBToGet(size_t numBB) { + // small CFG + if (numBB < 4) + return numBB; + // mid-size CFG + else if (numBB < 20) + return (numBB / 2); + else + return (numBB / 2) + (numBB / 4); +} + +BlockFreqQuery::ResultTy BlockFreqQuery::operator()(Function &F) { + DenseMap> CallerAndCalles; + DenseSet Calles; + SmallVector, 8> BBFreqs; + + PassBuilder PB; + FunctionAnalysisManager FAM; + PB.registerFunctionAnalyses(FAM); + + auto IBBs = findBBwithCalls(F); + + if (IBBs.empty()) + return None; + + auto &BFI = FAM.getResult(F); + + for (const auto I : IBBs) + BBFreqs.push_back({I, BFI.getBlockFreq(I).getFrequency()}); + + assert(IBBs.size() == BBFreqs.size() && "BB Count Mismatch"); + + llvm::sort(BBFreqs.begin(), BBFreqs.end(), + [](decltype(BBFreqs)::const_reference BBF, + decltype(BBFreqs)::const_reference BBS) { + return BBF.second > BBS.second ? true : false; + }); + + // ignoring number of direct calls in a BB + auto Topk = numBBToGet(BBFreqs.size()); + + for (size_t i = 0; i < Topk; i++) + findCalles(BBFreqs[i].first, Calles); + + assert(!Calles.empty() && "Running Analysis on Function with no calls?"); + + CallerAndCalles.insert({F.getName(), std::move(Calles)}); + + return CallerAndCalles; +} + +// SequenceBBQuery Implementation +std::size_t SequenceBBQuery::getHottestBlocks(std::size_t TotalBlocks) { + if (TotalBlocks == 1) + return TotalBlocks; + return TotalBlocks / 2; +} + +// FIXME : find good implementation. +SequenceBBQuery::BlockListTy +SequenceBBQuery::rearrangeBB(const Function &F, const BlockListTy &BBList) { + BlockListTy RearrangedBBSet; + + for (auto &Block : F.getBasicBlockList()) + if (llvm::is_contained(BBList, &Block)) + RearrangedBBSet.push_back(&Block); + + assert(RearrangedBBSet.size() == BBList.size() && + "BasicBlock missing while rearranging?"); + return RearrangedBBSet; +} + +void SequenceBBQuery::traverseToEntryBlock(const BasicBlock *AtBB, + const BlockListTy &CallerBlocks, + const BackEdgesInfoTy &BackEdgesInfo, + const BranchProbabilityInfo *BPI, + VisitedBlocksInfoTy &VisitedBlocks) { + auto Itr = VisitedBlocks.find(AtBB); + if (Itr != VisitedBlocks.end()) { // already visited. + if (!Itr->second.Upward) + return; + Itr->second.Upward = false; + } else { + // Create hint for newly discoverd blocks. + WalkDirection BlockHint; + BlockHint.Upward = false; + // FIXME: Expensive Check + if (llvm::is_contained(CallerBlocks, AtBB)) + BlockHint.CallerBlock = true; + VisitedBlocks.insert(std::make_pair(AtBB, BlockHint)); + } + + const_pred_iterator PIt = pred_begin(AtBB), EIt = pred_end(AtBB); + // Move this check to top, when we have code setup to launch speculative + // compiles for function in entry BB, this triggers the speculative compiles + // before running the program. + if (PIt == EIt) // No Preds. + return; + + DenseSet PredSkipNodes; + + // Since we are checking for predecessor's backedges, this Block + // occurs in second position. + for (auto &I : BackEdgesInfo) + if (I.second == AtBB) + PredSkipNodes.insert(I.first); + + // Skip predecessors which source of back-edges. + for (; PIt != EIt; ++PIt) + // checking EdgeHotness is cheaper + if (BPI->isEdgeHot(*PIt, AtBB) && !PredSkipNodes.count(*PIt)) + traverseToEntryBlock(*PIt, CallerBlocks, BackEdgesInfo, BPI, + VisitedBlocks); +} + +void SequenceBBQuery::traverseToExitBlock(const BasicBlock *AtBB, + const BlockListTy &CallerBlocks, + const BackEdgesInfoTy &BackEdgesInfo, + const BranchProbabilityInfo *BPI, + VisitedBlocksInfoTy &VisitedBlocks) { + auto Itr = VisitedBlocks.find(AtBB); + if (Itr != VisitedBlocks.end()) { // already visited. + if (!Itr->second.Downward) + return; + Itr->second.Downward = false; + } else { + // Create hint for newly discoverd blocks. + WalkDirection BlockHint; + BlockHint.Downward = false; + // FIXME: Expensive Check + if (llvm::is_contained(CallerBlocks, AtBB)) + BlockHint.CallerBlock = true; + VisitedBlocks.insert(std::make_pair(AtBB, BlockHint)); + } + + succ_const_iterator PIt = succ_begin(AtBB), EIt = succ_end(AtBB); + if (PIt == EIt) // No succs. + return; + + // If there are hot edges, then compute SuccSkipNodes. + DenseSet SuccSkipNodes; + + // Since we are checking for successor's backedges, this Block + // occurs in first position. + for (auto &I : BackEdgesInfo) + if (I.first == AtBB) + SuccSkipNodes.insert(I.second); + + for (; PIt != EIt; ++PIt) + if (BPI->isEdgeHot(AtBB, *PIt) && !SuccSkipNodes.count(*PIt)) + traverseToExitBlock(*PIt, CallerBlocks, BackEdgesInfo, BPI, + VisitedBlocks); +} + +// Get Block frequencies for blocks and take most frquently executed block, +// walk towards the entry block from those blocks and discover the basic blocks +// with call. +SequenceBBQuery::BlockListTy +SequenceBBQuery::queryCFG(Function &F, const BlockListTy &CallerBlocks) { + + BlockFreqInfoTy BBFreqs; + VisitedBlocksInfoTy VisitedBlocks; + BackEdgesInfoTy BackEdgesInfo; + + PassBuilder PB; + FunctionAnalysisManager FAM; + PB.registerFunctionAnalyses(FAM); + + auto &BFI = FAM.getResult(F); + + llvm::FindFunctionBackedges(F, BackEdgesInfo); + + for (const auto I : CallerBlocks) + BBFreqs.push_back({I, BFI.getBlockFreq(I).getFrequency()}); + + llvm::sort(BBFreqs, [](decltype(BBFreqs)::const_reference Bbf, + decltype(BBFreqs)::const_reference Bbs) { + return Bbf.second > Bbs.second; + }); + + ArrayRef> HotBlocksRef(BBFreqs); + HotBlocksRef = + HotBlocksRef.drop_back(BBFreqs.size() - getHottestBlocks(BBFreqs.size())); + + BranchProbabilityInfo *BPI = + FAM.getCachedResult(F); + + // visit NHotBlocks, + // traverse upwards to entry + // traverse downwards to end. + + for (auto I : HotBlocksRef) { + traverseToEntryBlock(I.first, CallerBlocks, BackEdgesInfo, BPI, + VisitedBlocks); + traverseToExitBlock(I.first, CallerBlocks, BackEdgesInfo, BPI, + VisitedBlocks); + } + + BlockListTy MinCallerBlocks; + for (auto &I : VisitedBlocks) + if (I.second.CallerBlock) + MinCallerBlocks.push_back(std::move(I.first)); + + return rearrangeBB(F, MinCallerBlocks); +} + +SpeculateQuery::ResultTy SequenceBBQuery::operator()(Function &F) { + // reduce the number of lists! + DenseMap> CallerAndCalles; + DenseSet Calles; + BlockListTy SequencedBlocks; + BlockListTy CallerBlocks; + + CallerBlocks = findBBwithCalls(F); + if (CallerBlocks.empty()) + return None; + + if (isStraightLine(F)) + SequencedBlocks = rearrangeBB(F, CallerBlocks); + else + SequencedBlocks = queryCFG(F, CallerBlocks); + + for (auto BB : SequencedBlocks) + findCalles(BB, Calles); + + CallerAndCalles.insert({F.getName(), std::move(Calles)}); + return CallerAndCalles; +} + +} // namespace orc +} // namespace llvm diff --git a/lib/ExecutionEngine/Orc/Speculation.cpp b/lib/ExecutionEngine/Orc/Speculation.cpp new file mode 100644 index 00000000000..f29201c147a --- /dev/null +++ b/lib/ExecutionEngine/Orc/Speculation.cpp @@ -0,0 +1,146 @@ +//===---------- speculation.cpp - Utilities for Speculation ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/Speculation.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/Debug.h" + +#include + +namespace llvm { + +namespace orc { + +// ImplSymbolMap methods +void ImplSymbolMap::trackImpls(SymbolAliasMap ImplMaps, JITDylib *SrcJD) { + assert(SrcJD && "Tracking on Null Source .impl dylib"); + std::lock_guard Lockit(ConcurrentAccess); + for (auto &I : ImplMaps) { + auto It = Maps.insert({I.first, {I.second.Aliasee, SrcJD}}); + // check rationale when independent dylibs have same symbol name? + assert(It.second && "ImplSymbols are already tracked for this Symbol?"); + (void)(It); + } +} + +// Trigger Speculative Compiles. +void Speculator::speculateForEntryPoint(Speculator *Ptr, uint64_t StubId) { + assert(Ptr && " Null Address Received in orc_speculate_for "); + Ptr->speculateFor(StubId); +} + +Error Speculator::addSpeculationRuntime(JITDylib &JD, + MangleAndInterner &Mangle) { + JITEvaluatedSymbol ThisPtr(pointerToJITTargetAddress(this), + JITSymbolFlags::Exported); + JITEvaluatedSymbol SpeculateForEntryPtr( + pointerToJITTargetAddress(&speculateForEntryPoint), + JITSymbolFlags::Exported); + return JD.define(absoluteSymbols({ + {Mangle("__orc_speculator"), ThisPtr}, // Data Symbol + {Mangle("__orc_speculate_for"), SpeculateForEntryPtr} // Callable Symbol + })); +} + +// If two modules, share the same LLVMContext, different threads must +// not access them concurrently without locking the associated LLVMContext +// this implementation follows this contract. +void IRSpeculationLayer::emit(MaterializationResponsibility R, + ThreadSafeModule TSM) { + + assert(TSM && "Speculation Layer received Null Module ?"); + assert(TSM.getContext().getContext() != nullptr && + "Module with null LLVMContext?"); + + // Instrumentation of runtime calls, lock the Module + TSM.withModuleDo([this, &R](Module &M) { + auto &MContext = M.getContext(); + auto SpeculatorVTy = StructType::create(MContext, "Class.Speculator"); + auto RuntimeCallTy = FunctionType::get( + Type::getVoidTy(MContext), + {SpeculatorVTy->getPointerTo(), Type::getInt64Ty(MContext)}, false); + auto RuntimeCall = + Function::Create(RuntimeCallTy, Function::LinkageTypes::ExternalLinkage, + "__orc_speculate_for", &M); + auto SpeclAddr = new GlobalVariable( + M, SpeculatorVTy, false, GlobalValue::LinkageTypes::ExternalLinkage, + nullptr, "__orc_speculator"); + + IRBuilder<> Mutator(MContext); + + // QueryAnalysis allowed to transform the IR source, one such example is + // Simplify CFG helps the static branch prediction heuristics! + for (auto &Fn : M.getFunctionList()) { + if (!Fn.isDeclaration()) { + + auto IRNames = QueryAnalysis(Fn); + // Instrument and register if Query has result + if (IRNames.hasValue()) { + + // Emit globals for each function. + auto LoadValueTy = Type::getInt8Ty(MContext); + auto SpeculatorGuard = new GlobalVariable( + M, LoadValueTy, false, GlobalValue::LinkageTypes::InternalLinkage, + ConstantInt::get(LoadValueTy, 0), + "__orc_speculate.guard.for." + Fn.getName()); + SpeculatorGuard->setAlignment(Align::None()); + SpeculatorGuard->setUnnamedAddr(GlobalValue::UnnamedAddr::Local); + + BasicBlock &ProgramEntry = Fn.getEntryBlock(); + // Create BasicBlocks before the program's entry basicblock + BasicBlock *SpeculateBlock = BasicBlock::Create( + MContext, "__orc_speculate.block", &Fn, &ProgramEntry); + BasicBlock *SpeculateDecisionBlock = BasicBlock::Create( + MContext, "__orc_speculate.decision.block", &Fn, SpeculateBlock); + + assert(SpeculateDecisionBlock == &Fn.getEntryBlock() && + "SpeculateDecisionBlock not updated?"); + Mutator.SetInsertPoint(SpeculateDecisionBlock); + + auto LoadGuard = + Mutator.CreateLoad(LoadValueTy, SpeculatorGuard, "guard.value"); + // if just loaded value equal to 0,return true. + auto CanSpeculate = + Mutator.CreateICmpEQ(LoadGuard, ConstantInt::get(LoadValueTy, 0), + "compare.to.speculate"); + Mutator.CreateCondBr(CanSpeculate, SpeculateBlock, &ProgramEntry); + + Mutator.SetInsertPoint(SpeculateBlock); + auto ImplAddrToUint = + Mutator.CreatePtrToInt(&Fn, Type::getInt64Ty(MContext)); + Mutator.CreateCall(RuntimeCallTy, RuntimeCall, + {SpeclAddr, ImplAddrToUint}); + Mutator.CreateStore(ConstantInt::get(LoadValueTy, 1), + SpeculatorGuard); + Mutator.CreateBr(&ProgramEntry); + + assert(Mutator.GetInsertBlock()->getParent() == &Fn && + "IR builder association mismatch?"); + S.registerSymbols(internToJITSymbols(IRNames.getValue()), + &R.getTargetJITDylib()); + } + } + } + }); + + assert(!TSM.withModuleDo([](const Module &M) { return verifyModule(M); }) && + "Speculation Instrumentation breaks IR?"); + + NextLayer.emit(std::move(R), std::move(TSM)); +} + +} // namespace orc +} // namespace llvm diff --git a/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp b/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp index 4cb7376758a..1f4e6f13211 100644 --- a/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp +++ b/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp @@ -23,41 +23,41 @@ ThreadSafeModule cloneToNewContext(ThreadSafeModule &TSM, if (!ShouldCloneDef) ShouldCloneDef = [](const GlobalValue &) { return true; }; - auto Lock = TSM.getContextLock(); + return TSM.withModuleDo([&](Module &M) { + SmallVector ClonedModuleBuffer; - SmallVector ClonedModuleBuffer; + { + std::set ClonedDefsInSrc; + ValueToValueMapTy VMap; + auto Tmp = CloneModule(M, VMap, [&](const GlobalValue *GV) { + if (ShouldCloneDef(*GV)) { + ClonedDefsInSrc.insert(const_cast(GV)); + return true; + } + return false; + }); - { - std::set ClonedDefsInSrc; - ValueToValueMapTy VMap; - auto Tmp = CloneModule(*TSM.getModule(), VMap, [&](const GlobalValue *GV) { - if (ShouldCloneDef(*GV)) { - ClonedDefsInSrc.insert(const_cast(GV)); - return true; - } - return false; - }); + if (UpdateClonedDefSource) + for (auto *GV : ClonedDefsInSrc) + UpdateClonedDefSource(*GV); - if (UpdateClonedDefSource) - for (auto *GV : ClonedDefsInSrc) - UpdateClonedDefSource(*GV); + BitcodeWriter BCWriter(ClonedModuleBuffer); - BitcodeWriter BCWriter(ClonedModuleBuffer); + BCWriter.writeModule(*Tmp); + BCWriter.writeSymtab(); + BCWriter.writeStrtab(); + } - BCWriter.writeModule(*Tmp); - BCWriter.writeSymtab(); - BCWriter.writeStrtab(); - } + MemoryBufferRef ClonedModuleBufferRef( + StringRef(ClonedModuleBuffer.data(), ClonedModuleBuffer.size()), + "cloned module buffer"); + ThreadSafeContext NewTSCtx(std::make_unique()); - MemoryBufferRef ClonedModuleBufferRef( - StringRef(ClonedModuleBuffer.data(), ClonedModuleBuffer.size()), - "cloned module buffer"); - ThreadSafeContext NewTSCtx(llvm::make_unique()); - - auto ClonedModule = - cantFail(parseBitcodeFile(ClonedModuleBufferRef, *NewTSCtx.getContext())); - ClonedModule->setModuleIdentifier(TSM.getModule()->getName()); - return ThreadSafeModule(std::move(ClonedModule), std::move(NewTSCtx)); + auto ClonedModule = cantFail( + parseBitcodeFile(ClonedModuleBufferRef, *NewTSCtx.getContext())); + ClonedModule->setModuleIdentifier(M.getName()); + return ThreadSafeModule(std::move(ClonedModule), std::move(NewTSCtx)); + }); } } // end namespace orc diff --git a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp index 5606421a3cb..184388dc4d7 100644 --- a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp +++ b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp @@ -26,11 +26,11 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Mutex.h" -#include "llvm/Support/MutexGuard.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/Threading.h" #include "llvm/Support/raw_ostream.h" +#include #include // mmap() #include // getpid() @@ -203,7 +203,7 @@ PerfJITEventListener::PerfJITEventListener() : Pid(::getpid()) { return; } - Dumpstream = make_unique(DumpFd, true); + Dumpstream = std::make_unique(DumpFd, true); LLVMPerfJitHeader Header = {0}; if (!FillMachine(Header)) @@ -420,7 +420,7 @@ void PerfJITEventListener::NotifyCode(Expected &Symbol, rec.Tid = get_threadid(); // avoid interspersing output - MutexGuard Guard(Mutex); + std::lock_guard Guard(Mutex); rec.CodeIndex = CodeGeneration++; // under lock! @@ -462,7 +462,7 @@ void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr, // * char name[n] : source file name in ASCII, including null termination // avoid interspersing output - MutexGuard Guard(Mutex); + std::lock_guard Guard(Mutex); Dumpstream->write(reinterpret_cast(&rec), sizeof(rec)); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index e26e6ce45db..2df71a5e5e7 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -17,10 +17,11 @@ #include "RuntimeDyldMachO.h" #include "llvm/Object/COFF.h" #include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/MSVCErrorWorkarounds.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/MutexGuard.h" +#include #include @@ -120,7 +121,7 @@ static void dumpSectionMemory(const SectionEntry &S, StringRef State) { // Resolve the relocations for all symbols we currently know about. void RuntimeDyldImpl::resolveRelocations() { - MutexGuard locked(lock); + std::lock_guard locked(lock); // Print out the sections prior to relocation. LLVM_DEBUG(for (int i = 0, e = Sections.size(); i != e; ++i) @@ -156,7 +157,7 @@ void RuntimeDyldImpl::resolveLocalRelocations() { void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress, uint64_t TargetAddress) { - MutexGuard locked(lock); + std::lock_guard locked(lock); for (unsigned i = 0, e = Sections.size(); i != e; ++i) { if (Sections[i].getAddress() == LocalAddress) { reassignSectionAddress(i, TargetAddress); @@ -177,7 +178,7 @@ static Error getOffset(const SymbolRef &Sym, SectionRef Sec, Expected RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { - MutexGuard locked(lock); + std::lock_guard locked(lock); // Save information about our target Arch = (Triple::ArchType)Obj.getArch(); @@ -347,8 +348,12 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end(); SI != SE; ++SI) { StubMap Stubs; - section_iterator RelocatedSection = SI->getRelocatedSection(); + Expected RelSecOrErr = SI->getRelocatedSection(); + if (!RelSecOrErr) + return RelSecOrErr.takeError(); + + section_iterator RelocatedSection = *RelSecOrErr; if (RelocatedSection == SE) continue; @@ -535,9 +540,10 @@ Error RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj, bool IsCode = Section.isText(); bool IsReadOnly = isReadOnlyData(Section); - StringRef Name; - if (auto EC = Section.getName(Name)) - return errorCodeToError(EC); + Expected NameOrErr = Section.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = *NameOrErr; uint64_t StubBufSize = computeSectionStubBufSize(Obj, Section); @@ -646,7 +652,12 @@ unsigned RuntimeDyldImpl::computeSectionStubBufSize(const ObjectFile &Obj, unsigned StubBufSize = 0; for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end(); SI != SE; ++SI) { - section_iterator RelSecI = SI->getRelocatedSection(); + + Expected RelSecOrErr = SI->getRelocatedSection(); + if (!RelSecOrErr) + report_fatal_error(toString(RelSecOrErr.takeError())); + + section_iterator RelSecI = *RelSecOrErr; if (!(RelSecI == Section)) continue; @@ -727,16 +738,17 @@ Error RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj, // Assign the address of each symbol for (auto &Sym : SymbolsToAllocate) { - uint32_t Align = Sym.getAlignment(); + uint32_t Alignment = Sym.getAlignment(); uint64_t Size = Sym.getCommonSize(); StringRef Name; if (auto NameOrErr = Sym.getName()) Name = *NameOrErr; else return NameOrErr.takeError(); - if (Align) { + if (Alignment) { // This symbol has an alignment requirement. - uint64_t AlignOffset = OffsetToAlignment((uint64_t)Addr, Align); + uint64_t AlignOffset = + offsetToAlignment((uint64_t)Addr, Align(Alignment)); Addr += AlignOffset; Offset += AlignOffset; } @@ -777,9 +789,10 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj, // anyway, so we should guarantee that the alignment is always at least 1. Alignment = std::max(1u, Alignment); - StringRef Name; - if (auto EC = Section.getName(Name)) - return errorCodeToError(EC); + Expected NameOrErr = Section.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = *NameOrErr; StubBufSize = computeSectionStubBufSize(Obj, Section); @@ -917,7 +930,8 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE, uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr, unsigned AbiVariant) { - if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) { + if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be || + Arch == Triple::aarch64_32) { // This stub has to be able to access the full address space, // since symbol lookup won't necessarily find a handy, in-range, // PLT stub for functions which could be anywhere. @@ -1175,17 +1189,15 @@ Error RuntimeDyldImpl::resolveExternalSymbols() { } void RuntimeDyldImpl::finalizeAsync( - std::unique_ptr This, std::function OnEmitted, + std::unique_ptr This, + unique_function OnEmitted, std::unique_ptr UnderlyingBuffer) { - // FIXME: Move-capture OnRelocsApplied and UnderlyingBuffer once we have - // c++14. - auto SharedUnderlyingBuffer = - std::shared_ptr(std::move(UnderlyingBuffer)); auto SharedThis = std::shared_ptr(std::move(This)); auto PostResolveContinuation = - [SharedThis, OnEmitted, SharedUnderlyingBuffer]( - Expected Result) { + [SharedThis, OnEmitted = std::move(OnEmitted), + UnderlyingBuffer = std::move(UnderlyingBuffer)]( + Expected Result) mutable { if (!Result) { OnEmitted(Result.takeError()); return; @@ -1219,7 +1231,7 @@ void RuntimeDyldImpl::finalizeAsync( } if (!Symbols.empty()) { - SharedThis->Resolver.lookup(Symbols, PostResolveContinuation); + SharedThis->Resolver.lookup(Symbols, std::move(PostResolveContinuation)); } else PostResolveContinuation(std::map()); } @@ -1395,11 +1407,11 @@ void jitLinkForORC(object::ObjectFile &Obj, std::unique_ptr UnderlyingBuffer, RuntimeDyld::MemoryManager &MemMgr, JITSymbolResolver &Resolver, bool ProcessAllSections, - std::function LoadedObj, std::map)> OnLoaded, - std::function OnEmitted) { + unique_function OnEmitted) { RuntimeDyld RTDyld(MemMgr, Resolver); RTDyld.setProcessAllSections(ProcessAllSections); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp index d4e3b0ba767..27a7690db34 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp @@ -50,18 +50,18 @@ llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch, switch (Arch) { default: llvm_unreachable("Unsupported target for RuntimeDyldCOFF."); case Triple::x86: - return make_unique(MemMgr, Resolver); + return std::make_unique(MemMgr, Resolver); case Triple::thumb: - return make_unique(MemMgr, Resolver); + return std::make_unique(MemMgr, Resolver); case Triple::x86_64: - return make_unique(MemMgr, Resolver); + return std::make_unique(MemMgr, Resolver); } } std::unique_ptr RuntimeDyldCOFF::loadObject(const object::ObjectFile &O) { if (auto ObjSectionToIDOrErr = loadObjectImpl(O)) { - return llvm::make_unique(*this, *ObjSectionToIDOrErr); + return std::make_unique(*this, *ObjSectionToIDOrErr); } else { HasError = true; raw_string_ostream ErrStream(ErrorStr); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp index ec31ea4e573..b9c5a12e08d 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp @@ -851,7 +851,7 @@ RuntimeDyldChecker::RuntimeDyldChecker( GetGOTInfoFunction GetGOTInfo, support::endianness Endianness, MCDisassembler *Disassembler, MCInstPrinter *InstPrinter, raw_ostream &ErrStream) - : Impl(::llvm::make_unique( + : Impl(::std::make_unique( std::move(IsSymbolValid), std::move(GetSymbolInfo), std::move(GetSectionInfo), std::move(GetStubInfo), std::move(GetGOTInfo), Endianness, Disassembler, InstPrinter, diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 60041a45e2b..440ab4174a5 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -160,9 +160,13 @@ createRTDyldELFObject(MemoryBufferRef Buffer, const ObjectFile &SourceObject, // Iterate over all sections in the object. auto SI = SourceObject.section_begin(); for (const auto &Sec : Obj->sections()) { - StringRef SectionName; - Sec.getName(SectionName); - if (SectionName != "") { + Expected NameOrErr = Sec.getName(); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + continue; + } + + if (*NameOrErr != "") { DataRefImpl ShdrRef = Sec.getRawDataRefImpl(); Elf_Shdr *shdr = const_cast( reinterpret_cast(ShdrRef.p)); @@ -238,19 +242,19 @@ llvm::RuntimeDyldELF::create(Triple::ArchType Arch, JITSymbolResolver &Resolver) { switch (Arch) { default: - return make_unique(MemMgr, Resolver); + return std::make_unique(MemMgr, Resolver); case Triple::mips: case Triple::mipsel: case Triple::mips64: case Triple::mips64el: - return make_unique(MemMgr, Resolver); + return std::make_unique(MemMgr, Resolver); } } std::unique_ptr RuntimeDyldELF::loadObject(const object::ObjectFile &O) { if (auto ObjSectionToIDOrErr = loadObjectImpl(O)) - return llvm::make_unique(*this, *ObjSectionToIDOrErr); + return std::make_unique(*this, *ObjSectionToIDOrErr); else { HasError = true; raw_string_ostream ErrStream(ErrorStr); @@ -567,10 +571,11 @@ Error RuntimeDyldELF::findPPC64TOCSection(const ELFObjectFileBase &Obj, // The TOC consists of sections .got, .toc, .tocbss, .plt in that // order. The TOC starts where the first of these sections starts. - for (auto &Section: Obj.sections()) { - StringRef SectionName; - if (auto EC = Section.getName(SectionName)) - return errorCodeToError(EC); + for (auto &Section : Obj.sections()) { + Expected NameOrErr = Section.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef SectionName = *NameOrErr; if (SectionName == ".got" || SectionName == ".toc" @@ -601,13 +606,19 @@ Error RuntimeDyldELF::findOPDEntrySection(const ELFObjectFileBase &Obj, // .opd entries for (section_iterator si = Obj.section_begin(), se = Obj.section_end(); si != se; ++si) { - section_iterator RelSecI = si->getRelocatedSection(); + + Expected RelSecOrErr = si->getRelocatedSection(); + if (!RelSecOrErr) + report_fatal_error(toString(RelSecOrErr.takeError())); + + section_iterator RelSecI = *RelSecOrErr; if (RelSecI == Obj.section_end()) continue; - StringRef RelSectionName; - if (auto EC = RelSecI->getName(RelSectionName)) - return errorCodeToError(EC); + Expected NameOrErr = RelSecI->getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef RelSectionName = *NameOrErr; if (RelSectionName != ".opd") continue; @@ -1865,7 +1876,12 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj, for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end(); SI != SE; ++SI) { if (SI->relocation_begin() != SI->relocation_end()) { - section_iterator RelocatedSection = SI->getRelocatedSection(); + Expected RelSecOrErr = SI->getRelocatedSection(); + if (!RelSecOrErr) + return make_error( + toString(RelSecOrErr.takeError())); + + section_iterator RelocatedSection = *RelSecOrErr; ObjSectionToIDMap::iterator i = SectionMap.find(*RelocatedSection); assert (i != SectionMap.end()); SectionToGOTMap[i->second] = GOTSectionID; @@ -1879,8 +1895,14 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj, ObjSectionToIDMap::iterator i, e; for (i = SectionMap.begin(), e = SectionMap.end(); i != e; ++i) { const SectionRef &Section = i->first; + StringRef Name; - Section.getName(Name); + Expected NameOrErr = Section.getName(); + if (NameOrErr) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + if (Name == ".eh_frame") { UnregisteredEHFrameSections.push_back(i->second); break; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 68b3468fbc9..cec7b92b8c4 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -549,7 +549,7 @@ public: void resolveLocalRelocations(); static void finalizeAsync(std::unique_ptr This, - std::function OnEmitted, + unique_function OnEmitted, std::unique_ptr UnderlyingBuffer); void reassignSectionAddress(unsigned SectionID, uint64_t Addr); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index 202c3ca1c50..9ca76602ea1 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -233,7 +233,10 @@ RuntimeDyldMachOCRTPBase::finalizeLoad(const ObjectFile &Obj, for (const auto &Section : Obj.sections()) { StringRef Name; - Section.getName(Name); + if (Expected NameOrErr = Section.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); // Force emission of the __text, __eh_frame, and __gcc_except_tab sections // if they're present. Otherwise call down to the impl to handle other @@ -351,20 +354,22 @@ RuntimeDyldMachO::create(Triple::ArchType Arch, llvm_unreachable("Unsupported target for RuntimeDyldMachO."); break; case Triple::arm: - return make_unique(MemMgr, Resolver); + return std::make_unique(MemMgr, Resolver); case Triple::aarch64: - return make_unique(MemMgr, Resolver); + return std::make_unique(MemMgr, Resolver); + case Triple::aarch64_32: + return std::make_unique(MemMgr, Resolver); case Triple::x86: - return make_unique(MemMgr, Resolver); + return std::make_unique(MemMgr, Resolver); case Triple::x86_64: - return make_unique(MemMgr, Resolver); + return std::make_unique(MemMgr, Resolver); } } std::unique_ptr RuntimeDyldMachO::loadObject(const object::ObjectFile &O) { if (auto ObjSectionToIDOrErr = loadObjectImpl(O)) - return llvm::make_unique(*this, + return std::make_unique(*this, *ObjSectionToIDOrErr); else { HasError = true; diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h index d2d74534cf9..dc4af08583d 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h @@ -284,14 +284,14 @@ public: // Look for and record the EH frame section IDs. for (const auto &SectionPair : SectionMap) { const object::SectionRef &Section = SectionPair.first; - StringRef Name; - if (auto EC = Section.getName(Name)) - return errorCodeToError(EC); + Expected NameOrErr = Section.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); // Note unwind info is stored in .pdata but often points to .xdata // with an IMAGE_REL_AMD64_ADDR32NB relocation. Using a memory manager // that keeps sections ordered in relation to __ImageBase is necessary. - if (Name == ".pdata") + if ((*NameOrErr) == ".pdata") UnregisteredEHFrameSections.push_back(SectionPair.second); } return Error::success(); diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h index 3bec8b979f7..a76958a9e2c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h @@ -289,7 +289,10 @@ public: Error finalizeSection(const ObjectFile &Obj, unsigned SectionID, const SectionRef &Section) { StringRef Name; - Section.getName(Name); + if (Expected NameOrErr = Section.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); if (Name == "__nl_symbol_ptr") return populateIndirectSymbolPointersSection(cast(Obj), diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h index f0de27ba14b..523deb29b72 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h @@ -128,7 +128,10 @@ public: Error finalizeSection(const ObjectFile &Obj, unsigned SectionID, const SectionRef &Section) { StringRef Name; - Section.getName(Name); + if (Expected NameOrErr = Section.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); if (Name == "__jump_table") return populateJumpTable(cast(Obj), Section, SectionID); diff --git a/lib/FuzzMutate/FuzzerCLI.cpp b/lib/FuzzMutate/FuzzerCLI.cpp index 63d31c03539..f2368ea7f26 100644 --- a/lib/FuzzMutate/FuzzerCLI.cpp +++ b/lib/FuzzMutate/FuzzerCLI.cpp @@ -171,7 +171,7 @@ std::unique_ptr llvm::parseModule( if (Size <= 1) // We get bogus data given an empty corpus - just create a new module. - return llvm::make_unique("M", Context); + return std::make_unique("M", Context); auto Buffer = MemoryBuffer::getMemBuffer( StringRef(reinterpret_cast(Data), Size), "Fuzzer input", diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index eb5760daecb..b0c26e0ecaf 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -352,6 +352,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::PreserveAll: Out << "preserve_allcc"; break; case CallingConv::CXX_FAST_TLS: Out << "cxx_fast_tlscc"; break; case CallingConv::GHC: Out << "ghccc"; break; + case CallingConv::Tail: Out << "tailcc"; break; case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break; case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break; case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break; @@ -835,7 +836,7 @@ SlotTracker *ModuleSlotTracker::getMachine() { ShouldCreateStorage = false; MachineStorage = - llvm::make_unique(M, ShouldInitializeAllMetadata); + std::make_unique(M, ShouldInitializeAllMetadata); Machine = MachineStorage.get(); return Machine; } @@ -2312,7 +2313,7 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD, if (const MDNode *N = dyn_cast(MD)) { std::unique_ptr MachineStorage; if (!Machine) { - MachineStorage = make_unique(Context); + MachineStorage = std::make_unique(Context); Machine = MachineStorage.get(); } int Slot = Machine->getMetadataSlot(N); @@ -2950,7 +2951,7 @@ void AssemblyWriter::printFunctionSummary(const FunctionSummary *FS) { FunctionSummary::FFlags FFlags = FS->fflags(); if (FFlags.ReadNone | FFlags.ReadOnly | FFlags.NoRecurse | - FFlags.ReturnDoesNotAlias) { + FFlags.ReturnDoesNotAlias | FFlags.NoInline) { Out << ", funcFlags: ("; Out << "readNone: " << FFlags.ReadNone; Out << ", readOnly: " << FFlags.ReadOnly; @@ -3553,6 +3554,10 @@ void AssemblyWriter::printArgument(const Argument *Arg, AttributeSet Attrs) { if (Arg->hasName()) { Out << ' '; PrintLLVMName(Out, Arg); + } else { + int Slot = Machine.getLocalSlot(Arg); + assert(Slot != -1 && "expect argument in function here"); + Out << " %" << Slot; } } diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index f989fa3b910..15e488bbb13 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -159,7 +159,7 @@ public: }; class TypeAttributeImpl : public EnumAttributeImpl { - virtual void anchor(); + void anchor() override; Type *Ty; @@ -208,8 +208,8 @@ public: Attribute getAttribute(Attribute::AttrKind Kind) const; Attribute getAttribute(StringRef Kind) const; - unsigned getAlignment() const; - unsigned getStackAlignment() const; + MaybeAlign getAlignment() const; + MaybeAlign getStackAlignment() const; uint64_t getDereferenceableBytes() const; uint64_t getDereferenceableOrNullBytes() const; std::pair> getAllocSizeArgs() const; diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index bb90bcd7dd7..cc370e628e9 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -142,17 +142,14 @@ Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind, return Attribute(PA); } -Attribute Attribute::getWithAlignment(LLVMContext &Context, uint64_t Align) { - assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); - assert(Align <= 0x40000000 && "Alignment too large."); - return get(Context, Alignment, Align); +Attribute Attribute::getWithAlignment(LLVMContext &Context, Align A) { + assert(A <= 0x40000000 && "Alignment too large."); + return get(Context, Alignment, A.value()); } -Attribute Attribute::getWithStackAlignment(LLVMContext &Context, - uint64_t Align) { - assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); - assert(Align <= 0x100 && "Alignment too large."); - return get(Context, StackAlignment, Align); +Attribute Attribute::getWithStackAlignment(LLVMContext &Context, Align A) { + assert(A <= 0x100 && "Alignment too large."); + return get(Context, StackAlignment, A.value()); } Attribute Attribute::getWithDereferenceableBytes(LLVMContext &Context, @@ -244,16 +241,16 @@ bool Attribute::hasAttribute(StringRef Kind) const { return pImpl && pImpl->hasAttribute(Kind); } -unsigned Attribute::getAlignment() const { +MaybeAlign Attribute::getAlignment() const { assert(hasAttribute(Attribute::Alignment) && "Trying to get alignment from non-alignment attribute!"); - return pImpl->getValueAsInt(); + return MaybeAlign(pImpl->getValueAsInt()); } -unsigned Attribute::getStackAlignment() const { +MaybeAlign Attribute::getStackAlignment() const { assert(hasAttribute(Attribute::StackAlignment) && "Trying to get alignment from non-alignment attribute!"); - return pImpl->getValueAsInt(); + return MaybeAlign(pImpl->getValueAsInt()); } uint64_t Attribute::getDereferenceableBytes() const { @@ -670,12 +667,12 @@ Attribute AttributeSet::getAttribute(StringRef Kind) const { return SetNode ? SetNode->getAttribute(Kind) : Attribute(); } -unsigned AttributeSet::getAlignment() const { - return SetNode ? SetNode->getAlignment() : 0; +MaybeAlign AttributeSet::getAlignment() const { + return SetNode ? SetNode->getAlignment() : None; } -unsigned AttributeSet::getStackAlignment() const { - return SetNode ? SetNode->getStackAlignment() : 0; +MaybeAlign AttributeSet::getStackAlignment() const { + return SetNode ? SetNode->getStackAlignment() : None; } uint64_t AttributeSet::getDereferenceableBytes() const { @@ -782,10 +779,12 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, const AttrBuilder &B) { Attr = Attribute::getWithByValType(C, B.getByValType()); break; case Attribute::Alignment: - Attr = Attribute::getWithAlignment(C, B.getAlignment()); + assert(B.getAlignment() && "Alignment must be set"); + Attr = Attribute::getWithAlignment(C, *B.getAlignment()); break; case Attribute::StackAlignment: - Attr = Attribute::getWithStackAlignment(C, B.getStackAlignment()); + assert(B.getStackAlignment() && "StackAlignment must be set"); + Attr = Attribute::getWithStackAlignment(C, *B.getStackAlignment()); break; case Attribute::Dereferenceable: Attr = Attribute::getWithDereferenceableBytes( @@ -836,18 +835,18 @@ Attribute AttributeSetNode::getAttribute(StringRef Kind) const { return {}; } -unsigned AttributeSetNode::getAlignment() const { +MaybeAlign AttributeSetNode::getAlignment() const { for (const auto I : *this) if (I.hasAttribute(Attribute::Alignment)) return I.getAlignment(); - return 0; + return None; } -unsigned AttributeSetNode::getStackAlignment() const { +MaybeAlign AttributeSetNode::getStackAlignment() const { for (const auto I : *this) if (I.hasAttribute(Attribute::StackAlignment)) return I.getStackAlignment(); - return 0; + return None; } Type *AttributeSetNode::getByValType() const { @@ -1164,8 +1163,8 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, #ifndef NDEBUG // FIXME it is not obvious how this should work for alignment. For now, say // we can't change a known alignment. - unsigned OldAlign = getAttributes(Index).getAlignment(); - unsigned NewAlign = B.getAlignment(); + const MaybeAlign OldAlign = getAttributes(Index).getAlignment(); + const MaybeAlign NewAlign = B.getAlignment(); assert((!OldAlign || !NewAlign || OldAlign == NewAlign) && "Attempt to change alignment!"); #endif @@ -1349,11 +1348,11 @@ Attribute AttributeList::getAttribute(unsigned Index, StringRef Kind) const { return getAttributes(Index).getAttribute(Kind); } -unsigned AttributeList::getRetAlignment() const { +MaybeAlign AttributeList::getRetAlignment() const { return getAttributes(ReturnIndex).getAlignment(); } -unsigned AttributeList::getParamAlignment(unsigned ArgNo) const { +MaybeAlign AttributeList::getParamAlignment(unsigned ArgNo) const { return getAttributes(ArgNo + FirstArgIndex).getAlignment(); } @@ -1361,8 +1360,7 @@ Type *AttributeList::getParamByValType(unsigned Index) const { return getAttributes(Index+FirstArgIndex).getByValType(); } - -unsigned AttributeList::getStackAlignment(unsigned Index) const { +MaybeAlign AttributeList::getStackAlignment(unsigned Index) const { return getAttributes(Index).getStackAlignment(); } @@ -1438,7 +1436,9 @@ AttrBuilder::AttrBuilder(AttributeSet AS) { void AttrBuilder::clear() { Attrs.reset(); TargetDepAttrs.clear(); - Alignment = StackAlignment = DerefBytes = DerefOrNullBytes = 0; + Alignment.reset(); + StackAlignment.reset(); + DerefBytes = DerefOrNullBytes = 0; AllocSizeArgs = 0; ByValType = nullptr; } @@ -1486,9 +1486,9 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { Attrs[Val] = false; if (Val == Attribute::Alignment) - Alignment = 0; + Alignment.reset(); else if (Val == Attribute::StackAlignment) - StackAlignment = 0; + StackAlignment.reset(); else if (Val == Attribute::ByVal) ByValType = nullptr; else if (Val == Attribute::Dereferenceable) @@ -1517,23 +1517,23 @@ std::pair> AttrBuilder::getAllocSizeArgs() const { return unpackAllocSizeArgs(AllocSizeArgs); } -AttrBuilder &AttrBuilder::addAlignmentAttr(unsigned Align) { - if (Align == 0) return *this; +AttrBuilder &AttrBuilder::addAlignmentAttr(MaybeAlign Align) { + if (!Align) + return *this; - assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); - assert(Align <= 0x40000000 && "Alignment too large."); + assert(*Align <= 0x40000000 && "Alignment too large."); Attrs[Attribute::Alignment] = true; Alignment = Align; return *this; } -AttrBuilder &AttrBuilder::addStackAlignmentAttr(unsigned Align) { +AttrBuilder &AttrBuilder::addStackAlignmentAttr(MaybeAlign Align) { // Default alignment, allow the target to define how to align it. - if (Align == 0) return *this; + if (!Align) + return *this; - assert(isPowerOf2_32(Align) && "Alignment must be a power of two."); - assert(Align <= 0x100 && "Alignment too large."); + assert(*Align <= 0x100 && "Alignment too large."); Attrs[Attribute::StackAlignment] = true; StackAlignment = Align; @@ -1610,10 +1610,10 @@ AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) { AttrBuilder &AttrBuilder::remove(const AttrBuilder &B) { // FIXME: What if both have alignments, but they don't match?! if (B.Alignment) - Alignment = 0; + Alignment.reset(); if (B.StackAlignment) - StackAlignment = 0; + StackAlignment.reset(); if (B.DerefBytes) DerefBytes = 0; diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index a2d82035282..79f580d0e14 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -490,12 +490,6 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name, static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { assert(F && "Illegal to upgrade a non-existent Function."); - // Upgrade intrinsics "clang.arc.use" which doesn't start with "llvm.". - if (F->getName() == "clang.arc.use") { - NewFn = nullptr; - return true; - } - // Quickly eliminate it, if it's not a candidate. StringRef Name = F->getName(); if (Name.size() <= 8 || !Name.startswith("llvm.")) @@ -528,7 +522,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { F->arg_begin()->getType()); return true; } - Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); + static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); if (vldRegex.match(Name)) { auto fArgs = F->getFunctionType()->params(); SmallVector Tys(fArgs.begin(), fArgs.end()); @@ -539,7 +533,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { "llvm." + Name + ".p0i8", F->getParent()); return true; } - Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); + static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); if (vstRegex.match(Name)) { static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2, @@ -604,7 +598,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { } case 'e': { SmallVector Groups; - Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+"); + static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+"); if (R.match(Name, &Groups)) { Intrinsic::ID ID = Intrinsic::not_intrinsic; if (Groups[1] == "fadd") @@ -789,6 +783,19 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { } break; + case 'p': + if (Name == "prefetch") { + // Handle address space overloading. + Type *Tys[] = {F->arg_begin()->getType()}; + if (F->getName() != Intrinsic::getName(Intrinsic::prefetch, Tys)) { + rename(F); + NewFn = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys); + return true; + } + } + break; + case 's': if (Name == "stackprotectorcheck") { NewFn = nullptr; @@ -1648,14 +1655,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // Get the Function's name. StringRef Name = F->getName(); - // clang.arc.use is an old name for llvm.arc.clang.arc.use. It is dropped - // from upgrader because the optimizer now only recognizes intrinsics for - // ARC runtime calls. - if (Name == "clang.arc.use") { - CI->eraseFromParent(); - return; - } - assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'"); Name = Name.substr(5); @@ -3831,7 +3830,9 @@ bool llvm::UpgradeDebugInfo(Module &M) { return Modified; } -bool llvm::UpgradeRetainReleaseMarker(Module &M) { +/// This checks for objc retain release marker which should be upgraded. It +/// returns true if module is modified. +static bool UpgradeRetainReleaseMarker(Module &M) { bool Changed = false; const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker"; NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey); @@ -3855,6 +3856,106 @@ bool llvm::UpgradeRetainReleaseMarker(Module &M) { return Changed; } +void llvm::UpgradeARCRuntime(Module &M) { + // This lambda converts normal function calls to ARC runtime functions to + // intrinsic calls. + auto UpgradeToIntrinsic = [&](const char *OldFunc, + llvm::Intrinsic::ID IntrinsicFunc) { + Function *Fn = M.getFunction(OldFunc); + + if (!Fn) + return; + + Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc); + + for (auto I = Fn->user_begin(), E = Fn->user_end(); I != E;) { + CallInst *CI = dyn_cast(*I++); + if (!CI || CI->getCalledFunction() != Fn) + continue; + + IRBuilder<> Builder(CI->getParent(), CI->getIterator()); + FunctionType *NewFuncTy = NewFn->getFunctionType(); + SmallVector Args; + + for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) { + Value *Arg = CI->getArgOperand(I); + // Bitcast argument to the parameter type of the new function if it's + // not a variadic argument. + if (I < NewFuncTy->getNumParams()) + Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I)); + Args.push_back(Arg); + } + + // Create a call instruction that calls the new function. + CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args); + NewCall->setTailCallKind(cast(CI)->getTailCallKind()); + NewCall->setName(CI->getName()); + + // Bitcast the return value back to the type of the old call. + Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType()); + + if (!CI->use_empty()) + CI->replaceAllUsesWith(NewRetVal); + CI->eraseFromParent(); + } + + if (Fn->use_empty()) + Fn->eraseFromParent(); + }; + + // Unconditionally convert a call to "clang.arc.use" to a call to + // "llvm.objc.clang.arc.use". + UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use); + + // Upgrade the retain release marker. If there is no need to upgrade + // the marker, that means either the module is already new enough to contain + // new intrinsics or it is not ARC. There is no need to upgrade runtime call. + if (!UpgradeRetainReleaseMarker(M)) + return; + + std::pair RuntimeFuncs[] = { + {"objc_autorelease", llvm::Intrinsic::objc_autorelease}, + {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop}, + {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush}, + {"objc_autoreleaseReturnValue", + llvm::Intrinsic::objc_autoreleaseReturnValue}, + {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak}, + {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak}, + {"objc_initWeak", llvm::Intrinsic::objc_initWeak}, + {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak}, + {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained}, + {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak}, + {"objc_release", llvm::Intrinsic::objc_release}, + {"objc_retain", llvm::Intrinsic::objc_retain}, + {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease}, + {"objc_retainAutoreleaseReturnValue", + llvm::Intrinsic::objc_retainAutoreleaseReturnValue}, + {"objc_retainAutoreleasedReturnValue", + llvm::Intrinsic::objc_retainAutoreleasedReturnValue}, + {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock}, + {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong}, + {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak}, + {"objc_unsafeClaimAutoreleasedReturnValue", + llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue}, + {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject}, + {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject}, + {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer}, + {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease}, + {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter}, + {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit}, + {"objc_arc_annotation_topdown_bbstart", + llvm::Intrinsic::objc_arc_annotation_topdown_bbstart}, + {"objc_arc_annotation_topdown_bbend", + llvm::Intrinsic::objc_arc_annotation_topdown_bbend}, + {"objc_arc_annotation_bottomup_bbstart", + llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart}, + {"objc_arc_annotation_bottomup_bbend", + llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}}; + + for (auto &I : RuntimeFuncs) + UpgradeToIntrinsic(I.first, I.second); +} + bool llvm::UpgradeModuleFlags(Module &M) { NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); if (!ModFlags) @@ -4012,3 +4113,23 @@ MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) { return MDTuple::get(T->getContext(), Ops); } + +std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { + std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64"; + + // If X86, and the datalayout matches the expected format, add pointer size + // address spaces to the datalayout. + Triple::ArchType Arch = Triple(TT).getArch(); + if ((Arch != llvm::Triple::x86 && Arch != llvm::Triple::x86_64) || + DL.contains(AddrSpaces)) + return DL; + + SmallVector Groups; + Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)"); + if (!R.match(DL, &Groups)) + return DL; + + SmallString<1024> Buf; + std::string Res = (Groups[1] + AddrSpaces + Groups[3]).toStringRef(Buf).str(); + return Res; +} diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp index 34410712645..bdee6990f93 100644 --- a/lib/IR/BasicBlock.cpp +++ b/lib/IR/BasicBlock.cpp @@ -107,6 +107,13 @@ BasicBlock::instructionsWithoutDebug() { return make_filter_range(*this, Fn); } +filter_iterator>::difference_type +BasicBlock::sizeWithoutDebug() const { + return std::distance(instructionsWithoutDebug().begin(), + instructionsWithoutDebug().end()); +} + void BasicBlock::removeFromParent() { getParent()->getBasicBlockList().remove(getIterator()); } diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index 835fbb3443b..71fa795ec29 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -746,7 +746,7 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond, ConstantInt::get(Ty, i)); Constant *V2Element = ConstantExpr::getExtractElement(V2, ConstantInt::get(Ty, i)); - Constant *Cond = dyn_cast(CondV->getOperand(i)); + auto *Cond = cast(CondV->getOperand(i)); if (V1Element == V2Element) { V = V1Element; } else if (isa(Cond)) { @@ -787,12 +787,9 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond, Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx) { - if (isa(Val)) // ee(undef, x) -> undef - return UndefValue::get(Val->getType()->getVectorElementType()); - if (Val->isNullValue()) // ee(zero, x) -> zero - return Constant::getNullValue(Val->getType()->getVectorElementType()); - // ee({w,x,y,z}, undef) -> undef - if (isa(Idx)) + // extractelt undef, C -> undef + // extractelt C, undef -> undef + if (isa(Val) || isa(Idx)) return UndefValue::get(Val->getType()->getVectorElementType()); if (ConstantInt *CIdx = dyn_cast(Idx)) { @@ -1125,7 +1122,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, isa(CE1->getOperand(0))) { GlobalValue *GV = cast(CE1->getOperand(0)); - unsigned GVAlign; + MaybeAlign GVAlign; if (Module *TheModule = GV->getParent()) { GVAlign = GV->getPointerAlignment(TheModule->getDataLayout()); @@ -1139,19 +1136,19 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, // increased code size (see https://reviews.llvm.org/D55115) // FIXME: This code should be deleted once existing targets have // appropriate defaults - if (GVAlign == 0U && isa(GV)) - GVAlign = 4U; + if (!GVAlign && isa(GV)) + GVAlign = Align(4); } else if (isa(GV)) { // Without a datalayout we have to assume the worst case: that the // function pointer isn't aligned at all. - GVAlign = 0U; + GVAlign = llvm::None; } else { - GVAlign = GV->getAlignment(); + GVAlign = MaybeAlign(GV->getAlignment()); } - if (GVAlign > 1) { + if (GVAlign && *GVAlign > 1) { unsigned DstWidth = CI2->getType()->getBitWidth(); - unsigned SrcWidth = std::min(DstWidth, Log2_32(GVAlign)); + unsigned SrcWidth = std::min(DstWidth, Log2(*GVAlign)); APInt BitsNotSet(APInt::getLowBitsSet(DstWidth, SrcWidth)); // If checking bits we know are clear, return zero. diff --git a/lib/IR/ConstantRange.cpp b/lib/IR/ConstantRange.cpp index 920fdc01a14..642bf0f3934 100644 --- a/lib/IR/ConstantRange.cpp +++ b/lib/IR/ConstantRange.cpp @@ -269,6 +269,27 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp, return makeExactMulNSWRegion(Other.getSignedMin()) .intersectWith(makeExactMulNSWRegion(Other.getSignedMax())); + + case Instruction::Shl: { + // For given range of shift amounts, if we ignore all illegal shift amounts + // (that always produce poison), what shift amount range is left? + ConstantRange ShAmt = Other.intersectWith( + ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, (BitWidth - 1) + 1))); + if (ShAmt.isEmptySet()) { + // If the entire range of shift amounts is already poison-producing, + // then we can freely add more poison-producing flags ontop of that. + return getFull(BitWidth); + } + // There are some legal shift amounts, we can compute conservatively-correct + // range of no-wrap inputs. Note that by now we have clamped the ShAmtUMax + // to be at most bitwidth-1, which results in most conservative range. + APInt ShAmtUMax = ShAmt.getUnsignedMax(); + if (Unsigned) + return getNonEmpty(APInt::getNullValue(BitWidth), + APInt::getMaxValue(BitWidth).lshr(ShAmtUMax) + 1); + return getNonEmpty(APInt::getSignedMinValue(BitWidth).ashr(ShAmtUMax), + APInt::getSignedMaxValue(BitWidth).ashr(ShAmtUMax) + 1); + } } } @@ -815,14 +836,55 @@ ConstantRange::add(const ConstantRange &Other) const { return X; } -ConstantRange ConstantRange::addWithNoSignedWrap(const APInt &Other) const { - // Calculate the subset of this range such that "X + Other" is - // guaranteed not to wrap (overflow) for all X in this subset. - auto NSWRange = ConstantRange::makeExactNoWrapRegion( - BinaryOperator::Add, Other, OverflowingBinaryOperator::NoSignedWrap); - auto NSWConstrainedRange = intersectWith(NSWRange); +ConstantRange ConstantRange::addWithNoWrap(const ConstantRange &Other, + unsigned NoWrapKind, + PreferredRangeType RangeType) const { + // Calculate the range for "X + Y" which is guaranteed not to wrap(overflow). + // (X is from this, and Y is from Other) + if (isEmptySet() || Other.isEmptySet()) + return getEmpty(); + if (isFullSet() && Other.isFullSet()) + return getFull(); - return NSWConstrainedRange.add(ConstantRange(Other)); + using OBO = OverflowingBinaryOperator; + ConstantRange Result = add(Other); + + auto addWithNoUnsignedWrap = [this](const ConstantRange &Other) { + APInt LMin = getUnsignedMin(), LMax = getUnsignedMax(); + APInt RMin = Other.getUnsignedMin(), RMax = Other.getUnsignedMax(); + bool Overflow; + APInt NewMin = LMin.uadd_ov(RMin, Overflow); + if (Overflow) + return getEmpty(); + APInt NewMax = LMax.uadd_sat(RMax); + return getNonEmpty(std::move(NewMin), std::move(NewMax) + 1); + }; + + auto addWithNoSignedWrap = [this](const ConstantRange &Other) { + APInt LMin = getSignedMin(), LMax = getSignedMax(); + APInt RMin = Other.getSignedMin(), RMax = Other.getSignedMax(); + if (LMin.isNonNegative()) { + bool Overflow; + APInt Temp = LMin.sadd_ov(RMin, Overflow); + if (Overflow) + return getEmpty(); + } + if (LMax.isNegative()) { + bool Overflow; + APInt Temp = LMax.sadd_ov(RMax, Overflow); + if (Overflow) + return getEmpty(); + } + APInt NewMin = LMin.sadd_sat(RMin); + APInt NewMax = LMax.sadd_sat(RMax); + return getNonEmpty(std::move(NewMin), std::move(NewMax) + 1); + }; + + if (NoWrapKind & OBO::NoSignedWrap) + Result = Result.intersectWith(addWithNoSignedWrap(Other), RangeType); + if (NoWrapKind & OBO::NoUnsignedWrap) + Result = Result.intersectWith(addWithNoUnsignedWrap(Other), RangeType); + return Result; } ConstantRange diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index ff551da29ae..f792f01efc1 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" @@ -250,6 +251,20 @@ bool Constant::isNaN() const { return true; } +bool Constant::isElementWiseEqual(Value *Y) const { + // Are they fully identical? + if (this == Y) + return true; + // They may still be identical element-wise (if they have `undef`s). + auto *Cy = dyn_cast(Y); + if (!Cy) + return false; + return PatternMatch::match(ConstantExpr::getICmp(ICmpInst::Predicate::ICMP_EQ, + const_cast(this), + Cy), + PatternMatch::m_One()); +} + bool Constant::containsUndefElement() const { if (!getType()->isVectorTy()) return false; @@ -502,22 +517,32 @@ bool Constant::needsRelocation() const { if (const BlockAddress *BA = dyn_cast(this)) return BA->getFunction()->needsRelocation(); - // While raw uses of blockaddress need to be relocated, differences between - // two of them don't when they are for labels in the same function. This is a - // common idiom when creating a table for the indirect goto extension, so we - // handle it efficiently here. - if (const ConstantExpr *CE = dyn_cast(this)) + if (const ConstantExpr *CE = dyn_cast(this)) { if (CE->getOpcode() == Instruction::Sub) { ConstantExpr *LHS = dyn_cast(CE->getOperand(0)); ConstantExpr *RHS = dyn_cast(CE->getOperand(1)); if (LHS && RHS && LHS->getOpcode() == Instruction::PtrToInt && - RHS->getOpcode() == Instruction::PtrToInt && - isa(LHS->getOperand(0)) && - isa(RHS->getOperand(0)) && - cast(LHS->getOperand(0))->getFunction() == - cast(RHS->getOperand(0))->getFunction()) - return false; + RHS->getOpcode() == Instruction::PtrToInt) { + Constant *LHSOp0 = LHS->getOperand(0); + Constant *RHSOp0 = RHS->getOperand(0); + + // While raw uses of blockaddress need to be relocated, differences + // between two of them don't when they are for labels in the same + // function. This is a common idiom when creating a table for the + // indirect goto extension, so we handle it efficiently here. + if (isa(LHSOp0) && isa(RHSOp0) && + cast(LHSOp0)->getFunction() == + cast(RHSOp0)->getFunction()) + return false; + + // Relative pointers do not need to be dynamically relocated. + if (auto *LHSGV = dyn_cast(LHSOp0->stripPointerCasts())) + if (auto *RHSGV = dyn_cast(RHSOp0->stripPointerCasts())) + if (LHSGV->isDSOLocal() && RHSGV->isDSOLocal()) + return false; + } } + } bool Result = false; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) @@ -563,13 +588,10 @@ void Constant::removeDeadConstantUsers() const { } // If the constant was dead, then the iterator is invalidated. - if (LastNonDeadUser == E) { + if (LastNonDeadUser == E) I = user_begin(); - if (I == E) break; - } else { - I = LastNonDeadUser; - ++I; - } + else + I = std::next(LastNonDeadUser); } } diff --git a/lib/IR/ConstantsContext.h b/lib/IR/ConstantsContext.h index 7614dab9f15..1ec9087551f 100644 --- a/lib/IR/ConstantsContext.h +++ b/lib/IR/ConstantsContext.h @@ -480,14 +480,16 @@ struct ConstantExprKeyType { : Opcode(CE->getOpcode()), SubclassOptionalData(CE->getRawSubclassOptionalData()), SubclassData(CE->isCompare() ? CE->getPredicate() : 0), Ops(Operands), - Indexes(CE->hasIndices() ? CE->getIndices() : ArrayRef()) {} + Indexes(CE->hasIndices() ? CE->getIndices() : ArrayRef()), + ExplicitTy(nullptr) {} ConstantExprKeyType(const ConstantExpr *CE, SmallVectorImpl &Storage) : Opcode(CE->getOpcode()), SubclassOptionalData(CE->getRawSubclassOptionalData()), SubclassData(CE->isCompare() ? CE->getPredicate() : 0), - Indexes(CE->hasIndices() ? CE->getIndices() : ArrayRef()) { + Indexes(CE->hasIndices() ? CE->getIndices() : ArrayRef()), + ExplicitTy(nullptr) { assert(Storage.empty() && "Expected empty storage"); for (unsigned I = 0, E = CE->getNumOperands(); I != E; ++I) Storage.push_back(CE->getOperand(I)); @@ -676,9 +678,9 @@ public: /// Hash once, and reuse it for the lookup and the insertion if needed. LookupKeyHashed Lookup(MapInfo::getHashValue(Key), Key); - auto I = Map.find_as(Lookup); - if (I != Map.end()) - return *I; + auto ItMap = Map.find_as(Lookup); + if (ItMap != Map.end()) + return *ItMap; // Update to the new value. Optimize for the case when we have a single // operand that we're changing, but handle bulk updates efficiently. diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index 310935b5213..a5f46b16e60 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -140,7 +140,16 @@ unsigned LLVMGetLastEnumAttributeKind(void) { LLVMAttributeRef LLVMCreateEnumAttribute(LLVMContextRef C, unsigned KindID, uint64_t Val) { - return wrap(Attribute::get(*unwrap(C), (Attribute::AttrKind)KindID, Val)); + auto &Ctx = *unwrap(C); + auto AttrKind = (Attribute::AttrKind)KindID; + + if (AttrKind == Attribute::AttrKind::ByVal) { + // After r362128, byval attributes need to have a type attribute. Provide a + // NULL one until a proper API is added for this. + return wrap(Attribute::getWithByValType(Ctx, NULL)); + } else { + return wrap(Attribute::get(Ctx, AttrKind, Val)); + } } unsigned LLVMGetEnumAttributeKind(LLVMAttributeRef A) { @@ -386,7 +395,7 @@ void LLVMDumpModule(LLVMModuleRef M) { LLVMBool LLVMPrintModuleToFile(LLVMModuleRef M, const char *Filename, char **ErrorMessage) { std::error_code EC; - raw_fd_ostream dest(Filename, EC, sys::fs::F_Text); + raw_fd_ostream dest(Filename, EC, sys::fs::OF_Text); if (EC) { *ErrorMessage = strdup(EC.message().c_str()); return true; @@ -1999,13 +2008,13 @@ unsigned LLVMGetAlignment(LLVMValueRef V) { void LLVMSetAlignment(LLVMValueRef V, unsigned Bytes) { Value *P = unwrap(V); if (GlobalObject *GV = dyn_cast(P)) - GV->setAlignment(Bytes); + GV->setAlignment(MaybeAlign(Bytes)); else if (AllocaInst *AI = dyn_cast(P)) - AI->setAlignment(Bytes); + AI->setAlignment(MaybeAlign(Bytes)); else if (LoadInst *LI = dyn_cast(P)) - LI->setAlignment(Bytes); + LI->setAlignment(MaybeAlign(Bytes)); else if (StoreInst *SI = dyn_cast(P)) - SI->setAlignment(Bytes); + SI->setAlignment(MaybeAlign(Bytes)); else llvm_unreachable( "only GlobalValue, AllocaInst, LoadInst and StoreInst have alignment"); @@ -2480,7 +2489,7 @@ LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) { void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) { Argument *A = unwrap(Arg); - A->addAttr(Attribute::getWithAlignment(A->getContext(), align)); + A->addAttr(Attribute::getWithAlignment(A->getContext(), Align(align))); } /*--.. Operations on ifuncs ................................................--*/ @@ -2779,7 +2788,8 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) { void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, unsigned align) { auto *Call = unwrap(Instr); - Attribute AlignAttr = Attribute::getWithAlignment(Call->getContext(), align); + Attribute AlignAttr = + Attribute::getWithAlignment(Call->getContext(), Align(align)); Call->addAttribute(index, AlignAttr); } @@ -3518,6 +3528,47 @@ static LLVMAtomicOrdering mapToLLVMOrdering(AtomicOrdering Ordering) { llvm_unreachable("Invalid AtomicOrdering value!"); } +static AtomicRMWInst::BinOp mapFromLLVMRMWBinOp(LLVMAtomicRMWBinOp BinOp) { + switch (BinOp) { + case LLVMAtomicRMWBinOpXchg: return AtomicRMWInst::Xchg; + case LLVMAtomicRMWBinOpAdd: return AtomicRMWInst::Add; + case LLVMAtomicRMWBinOpSub: return AtomicRMWInst::Sub; + case LLVMAtomicRMWBinOpAnd: return AtomicRMWInst::And; + case LLVMAtomicRMWBinOpNand: return AtomicRMWInst::Nand; + case LLVMAtomicRMWBinOpOr: return AtomicRMWInst::Or; + case LLVMAtomicRMWBinOpXor: return AtomicRMWInst::Xor; + case LLVMAtomicRMWBinOpMax: return AtomicRMWInst::Max; + case LLVMAtomicRMWBinOpMin: return AtomicRMWInst::Min; + case LLVMAtomicRMWBinOpUMax: return AtomicRMWInst::UMax; + case LLVMAtomicRMWBinOpUMin: return AtomicRMWInst::UMin; + case LLVMAtomicRMWBinOpFAdd: return AtomicRMWInst::FAdd; + case LLVMAtomicRMWBinOpFSub: return AtomicRMWInst::FSub; + } + + llvm_unreachable("Invalid LLVMAtomicRMWBinOp value!"); +} + +static LLVMAtomicRMWBinOp mapToLLVMRMWBinOp(AtomicRMWInst::BinOp BinOp) { + switch (BinOp) { + case AtomicRMWInst::Xchg: return LLVMAtomicRMWBinOpXchg; + case AtomicRMWInst::Add: return LLVMAtomicRMWBinOpAdd; + case AtomicRMWInst::Sub: return LLVMAtomicRMWBinOpSub; + case AtomicRMWInst::And: return LLVMAtomicRMWBinOpAnd; + case AtomicRMWInst::Nand: return LLVMAtomicRMWBinOpNand; + case AtomicRMWInst::Or: return LLVMAtomicRMWBinOpOr; + case AtomicRMWInst::Xor: return LLVMAtomicRMWBinOpXor; + case AtomicRMWInst::Max: return LLVMAtomicRMWBinOpMax; + case AtomicRMWInst::Min: return LLVMAtomicRMWBinOpMin; + case AtomicRMWInst::UMax: return LLVMAtomicRMWBinOpUMax; + case AtomicRMWInst::UMin: return LLVMAtomicRMWBinOpUMin; + case AtomicRMWInst::FAdd: return LLVMAtomicRMWBinOpFAdd; + case AtomicRMWInst::FSub: return LLVMAtomicRMWBinOpFSub; + default: break; + } + + llvm_unreachable("Invalid AtomicRMWBinOp value!"); +} + // TODO: Should this and other atomic instructions support building with // "syncscope"? LLVMValueRef LLVMBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering Ordering, @@ -3593,14 +3644,30 @@ LLVMBool LLVMGetVolatile(LLVMValueRef MemAccessInst) { Value *P = unwrap(MemAccessInst); if (LoadInst *LI = dyn_cast(P)) return LI->isVolatile(); - return cast(P)->isVolatile(); + if (StoreInst *SI = dyn_cast(P)) + return SI->isVolatile(); + if (AtomicRMWInst *AI = dyn_cast(P)) + return AI->isVolatile(); + return cast(P)->isVolatile(); } void LLVMSetVolatile(LLVMValueRef MemAccessInst, LLVMBool isVolatile) { Value *P = unwrap(MemAccessInst); if (LoadInst *LI = dyn_cast(P)) return LI->setVolatile(isVolatile); - return cast(P)->setVolatile(isVolatile); + if (StoreInst *SI = dyn_cast(P)) + return SI->setVolatile(isVolatile); + if (AtomicRMWInst *AI = dyn_cast(P)) + return AI->setVolatile(isVolatile); + return cast(P)->setVolatile(isVolatile); +} + +LLVMBool LLVMGetWeak(LLVMValueRef CmpXchgInst) { + return unwrap(CmpXchgInst)->isWeak(); +} + +void LLVMSetWeak(LLVMValueRef CmpXchgInst, LLVMBool isWeak) { + return unwrap(CmpXchgInst)->setWeak(isWeak); } LLVMAtomicOrdering LLVMGetOrdering(LLVMValueRef MemAccessInst) { @@ -3608,8 +3675,10 @@ LLVMAtomicOrdering LLVMGetOrdering(LLVMValueRef MemAccessInst) { AtomicOrdering O; if (LoadInst *LI = dyn_cast(P)) O = LI->getOrdering(); + else if (StoreInst *SI = dyn_cast(P)) + O = SI->getOrdering(); else - O = cast(P)->getOrdering(); + O = cast(P)->getOrdering(); return mapToLLVMOrdering(O); } @@ -3622,6 +3691,14 @@ void LLVMSetOrdering(LLVMValueRef MemAccessInst, LLVMAtomicOrdering Ordering) { return cast(P)->setOrdering(O); } +LLVMAtomicRMWBinOp LLVMGetAtomicRMWBinOp(LLVMValueRef Inst) { + return mapToLLVMRMWBinOp(unwrap(Inst)->getOperation()); +} + +void LLVMSetAtomicRMWBinOp(LLVMValueRef Inst, LLVMAtomicRMWBinOp BinOp) { + unwrap(Inst)->setOperation(mapFromLLVMRMWBinOp(BinOp)); +} + /*--.. Casts ...............................................................--*/ LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef B, LLVMValueRef Val, @@ -3840,20 +3917,7 @@ LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op, LLVMValueRef PTR, LLVMValueRef Val, LLVMAtomicOrdering ordering, LLVMBool singleThread) { - AtomicRMWInst::BinOp intop; - switch (op) { - case LLVMAtomicRMWBinOpXchg: intop = AtomicRMWInst::Xchg; break; - case LLVMAtomicRMWBinOpAdd: intop = AtomicRMWInst::Add; break; - case LLVMAtomicRMWBinOpSub: intop = AtomicRMWInst::Sub; break; - case LLVMAtomicRMWBinOpAnd: intop = AtomicRMWInst::And; break; - case LLVMAtomicRMWBinOpNand: intop = AtomicRMWInst::Nand; break; - case LLVMAtomicRMWBinOpOr: intop = AtomicRMWInst::Or; break; - case LLVMAtomicRMWBinOpXor: intop = AtomicRMWInst::Xor; break; - case LLVMAtomicRMWBinOpMax: intop = AtomicRMWInst::Max; break; - case LLVMAtomicRMWBinOpMin: intop = AtomicRMWInst::Min; break; - case LLVMAtomicRMWBinOpUMax: intop = AtomicRMWInst::UMax; break; - case LLVMAtomicRMWBinOpUMin: intop = AtomicRMWInst::UMin; break; - } + AtomicRMWInst::BinOp intop = mapFromLLVMRMWBinOp(op); return wrap(unwrap(B)->CreateAtomicRMW(intop, unwrap(PTR), unwrap(Val), mapFromLLVMOrdering(ordering), singleThread ? SyncScope::SingleThread : SyncScope::System)); diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 2493c6cbe53..5d567122743 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -25,7 +25,7 @@ using namespace llvm; using namespace llvm::dwarf; -cl::opt +static cl::opt UseDbgAddr("use-dbg-addr", llvm::cl::desc("Use llvm.dbg.addr for all local variables"), cl::init(false), cl::Hidden); diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp index 6e0ebbd4a73..5fe7a2e94b6 100644 --- a/lib/IR/DataLayout.cpp +++ b/lib/IR/DataLayout.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/TypeSize.h" #include #include #include @@ -44,7 +45,6 @@ using namespace llvm; StructLayout::StructLayout(StructType *ST, const DataLayout &DL) { assert(!ST->isOpaque() && "Cannot get layout of opaque structs"); - StructAlignment = 0; StructSize = 0; IsPadded = false; NumElements = ST->getNumElements(); @@ -52,10 +52,10 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) { // Loop over each of the elements, placing them in memory. for (unsigned i = 0, e = NumElements; i != e; ++i) { Type *Ty = ST->getElementType(i); - unsigned TyAlign = ST->isPacked() ? 1 : DL.getABITypeAlignment(Ty); + const Align TyAlign(ST->isPacked() ? 1 : DL.getABITypeAlignment(Ty)); // Add padding if necessary to align the data element properly. - if ((StructSize & (TyAlign-1)) != 0) { + if (!isAligned(TyAlign, StructSize)) { IsPadded = true; StructSize = alignTo(StructSize, TyAlign); } @@ -67,12 +67,9 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) { StructSize += DL.getTypeAllocSize(Ty); // Consume space for this data item } - // Empty structures have alignment of 1 byte. - if (StructAlignment == 0) StructAlignment = 1; - // Add padding to the end of the struct so that it could be put in an array // and all array elements would be aligned correctly. - if ((StructSize & (StructAlignment-1)) != 0) { + if (!isAligned(StructAlignment, StructSize)) { IsPadded = true; StructSize = alignTo(StructSize, StructAlignment); } @@ -102,9 +99,8 @@ unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const { // LayoutAlignElem, LayoutAlign support //===----------------------------------------------------------------------===// -LayoutAlignElem -LayoutAlignElem::get(AlignTypeEnum align_type, unsigned abi_align, - unsigned pref_align, uint32_t bit_width) { +LayoutAlignElem LayoutAlignElem::get(AlignTypeEnum align_type, Align abi_align, + Align pref_align, uint32_t bit_width) { assert(abi_align <= pref_align && "Preferred alignment worse than ABI!"); LayoutAlignElem retval; retval.AlignType = align_type; @@ -126,10 +122,9 @@ LayoutAlignElem::operator==(const LayoutAlignElem &rhs) const { // PointerAlignElem, PointerAlign support //===----------------------------------------------------------------------===// -PointerAlignElem -PointerAlignElem::get(uint32_t AddressSpace, unsigned ABIAlign, - unsigned PrefAlign, uint32_t TypeByteWidth, - uint32_t IndexWidth) { +PointerAlignElem PointerAlignElem::get(uint32_t AddressSpace, Align ABIAlign, + Align PrefAlign, uint32_t TypeByteWidth, + uint32_t IndexWidth) { assert(ABIAlign <= PrefAlign && "Preferred alignment worse than ABI!"); PointerAlignElem retval; retval.AddressSpace = AddressSpace; @@ -162,18 +157,18 @@ const char *DataLayout::getManglingComponent(const Triple &T) { } static const LayoutAlignElem DefaultAlignments[] = { - { INTEGER_ALIGN, 1, 1, 1 }, // i1 - { INTEGER_ALIGN, 8, 1, 1 }, // i8 - { INTEGER_ALIGN, 16, 2, 2 }, // i16 - { INTEGER_ALIGN, 32, 4, 4 }, // i32 - { INTEGER_ALIGN, 64, 4, 8 }, // i64 - { FLOAT_ALIGN, 16, 2, 2 }, // half - { FLOAT_ALIGN, 32, 4, 4 }, // float - { FLOAT_ALIGN, 64, 8, 8 }, // double - { FLOAT_ALIGN, 128, 16, 16 }, // ppcf128, quad, ... - { VECTOR_ALIGN, 64, 8, 8 }, // v2i32, v1i64, ... - { VECTOR_ALIGN, 128, 16, 16 }, // v16i8, v8i16, v4i32, ... - { AGGREGATE_ALIGN, 0, 0, 8 } // struct + {INTEGER_ALIGN, 1, Align(1), Align(1)}, // i1 + {INTEGER_ALIGN, 8, Align(1), Align(1)}, // i8 + {INTEGER_ALIGN, 16, Align(2), Align(2)}, // i16 + {INTEGER_ALIGN, 32, Align(4), Align(4)}, // i32 + {INTEGER_ALIGN, 64, Align(4), Align(8)}, // i64 + {FLOAT_ALIGN, 16, Align(2), Align(2)}, // half + {FLOAT_ALIGN, 32, Align(4), Align(4)}, // float + {FLOAT_ALIGN, 64, Align(8), Align(8)}, // double + {FLOAT_ALIGN, 128, Align(16), Align(16)}, // ppcf128, quad, ... + {VECTOR_ALIGN, 64, Align(8), Align(8)}, // v2i32, v1i64, ... + {VECTOR_ALIGN, 128, Align(16), Align(16)}, // v16i8, v8i16, v4i32, ... + {AGGREGATE_ALIGN, 0, Align(1), Align(8)} // struct }; void DataLayout::reset(StringRef Desc) { @@ -182,9 +177,9 @@ void DataLayout::reset(StringRef Desc) { LayoutMap = nullptr; BigEndian = false; AllocaAddrSpace = 0; - StackNaturalAlign = 0; + StackNaturalAlign.reset(); ProgramAddrSpace = 0; - FunctionPtrAlign = 0; + FunctionPtrAlign.reset(); TheFunctionPtrAlignType = FunctionPtrAlignType::Independent; ManglingMode = MM_None; NonIntegralAddressSpaces.clear(); @@ -194,7 +189,7 @@ void DataLayout::reset(StringRef Desc) { setAlignment((AlignTypeEnum)E.AlignType, E.ABIAlign, E.PrefAlign, E.TypeBitWidth); } - setPointerAlignment(0, 8, 8, 8, 8); + setPointerAlignment(0, Align(8), Align(8), 8, 8); parseSpecifier(Desc); } @@ -320,8 +315,9 @@ void DataLayout::parseSpecifier(StringRef Desc) { report_fatal_error("Invalid index size of 0 bytes"); } } - setPointerAlignment(AddrSpace, PointerABIAlign, PointerPrefAlign, - PointerMemSize, IndexSize); + setPointerAlignment(AddrSpace, assumeAligned(PointerABIAlign), + assumeAligned(PointerPrefAlign), PointerMemSize, + IndexSize); break; } case 'i': @@ -349,11 +345,16 @@ void DataLayout::parseSpecifier(StringRef Desc) { report_fatal_error( "Missing alignment specification in datalayout string"); Split = split(Rest, ':'); - unsigned ABIAlign = inBytes(getInt(Tok)); + const unsigned ABIAlign = inBytes(getInt(Tok)); if (AlignType != AGGREGATE_ALIGN && !ABIAlign) report_fatal_error( "ABI alignment specification must be >0 for non-aggregate types"); + if (!isUInt<16>(ABIAlign)) + report_fatal_error("Invalid ABI alignment, must be a 16bit integer"); + if (ABIAlign != 0 && !isPowerOf2_64(ABIAlign)) + report_fatal_error("Invalid ABI alignment, must be a power of 2"); + // Preferred alignment. unsigned PrefAlign = ABIAlign; if (!Rest.empty()) { @@ -361,7 +362,14 @@ void DataLayout::parseSpecifier(StringRef Desc) { PrefAlign = inBytes(getInt(Tok)); } - setAlignment(AlignType, ABIAlign, PrefAlign, Size); + if (!isUInt<16>(PrefAlign)) + report_fatal_error( + "Invalid preferred alignment, must be a 16bit integer"); + if (PrefAlign != 0 && !isPowerOf2_64(PrefAlign)) + report_fatal_error("Invalid preferred alignment, must be a power of 2"); + + setAlignment(AlignType, assumeAligned(ABIAlign), assumeAligned(PrefAlign), + Size); break; } @@ -378,7 +386,10 @@ void DataLayout::parseSpecifier(StringRef Desc) { } break; case 'S': { // Stack natural alignment. - StackNaturalAlign = inBytes(getInt(Tok)); + uint64_t Alignment = inBytes(getInt(Tok)); + if (Alignment != 0 && !llvm::isPowerOf2_64(Alignment)) + report_fatal_error("Alignment is neither 0 nor a power of 2"); + StackNaturalAlign = MaybeAlign(Alignment); break; } case 'F': { @@ -394,7 +405,10 @@ void DataLayout::parseSpecifier(StringRef Desc) { "datalayout string"); } Tok = Tok.substr(1); - FunctionPtrAlign = inBytes(getInt(Tok)); + uint64_t Alignment = inBytes(getInt(Tok)); + if (Alignment != 0 && !llvm::isPowerOf2_64(Alignment)) + report_fatal_error("Alignment is neither 0 nor a power of 2"); + FunctionPtrAlign = MaybeAlign(Alignment); break; } case 'P': { // Function address space. @@ -468,20 +482,15 @@ DataLayout::findAlignmentLowerBound(AlignTypeEnum AlignType, }); } -void -DataLayout::setAlignment(AlignTypeEnum align_type, unsigned abi_align, - unsigned pref_align, uint32_t bit_width) { +void DataLayout::setAlignment(AlignTypeEnum align_type, Align abi_align, + Align pref_align, uint32_t bit_width) { + // AlignmentsTy::ABIAlign and AlignmentsTy::PrefAlign were once stored as + // uint16_t, it is unclear if there are requirements for alignment to be less + // than 2^16 other than storage. In the meantime we leave the restriction as + // an assert. See D67400 for context. + assert(Log2(abi_align) < 16 && Log2(pref_align) < 16 && "Alignment too big"); if (!isUInt<24>(bit_width)) report_fatal_error("Invalid bit width, must be a 24bit integer"); - if (!isUInt<16>(abi_align)) - report_fatal_error("Invalid ABI alignment, must be a 16bit integer"); - if (!isUInt<16>(pref_align)) - report_fatal_error("Invalid preferred alignment, must be a 16bit integer"); - if (abi_align != 0 && !isPowerOf2_64(abi_align)) - report_fatal_error("Invalid ABI alignment, must be a power of 2"); - if (pref_align != 0 && !isPowerOf2_64(pref_align)) - report_fatal_error("Invalid preferred alignment, must be a power of 2"); - if (pref_align < abi_align) report_fatal_error( "Preferred alignment cannot be less than the ABI alignment"); @@ -507,8 +516,8 @@ DataLayout::findPointerLowerBound(uint32_t AddressSpace) { }); } -void DataLayout::setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign, - unsigned PrefAlign, uint32_t TypeByteWidth, +void DataLayout::setPointerAlignment(uint32_t AddrSpace, Align ABIAlign, + Align PrefAlign, uint32_t TypeByteWidth, uint32_t IndexWidth) { if (PrefAlign < ABIAlign) report_fatal_error( @@ -528,9 +537,8 @@ void DataLayout::setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign, /// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or /// preferred if ABIInfo = false) the layout wants for the specified datatype. -unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType, - uint32_t BitWidth, bool ABIInfo, - Type *Ty) const { +Align DataLayout::getAlignmentInfo(AlignTypeEnum AlignType, uint32_t BitWidth, + bool ABIInfo, Type *Ty) const { AlignmentsTy::const_iterator I = findAlignmentLowerBound(AlignType, BitWidth); // See if we found an exact match. Of if we are looking for an integer type, // but don't have an exact match take the next largest integer. This is where @@ -549,10 +557,11 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType, } else if (AlignType == VECTOR_ALIGN) { // By default, use natural alignment for vector types. This is consistent // with what clang and llvm-gcc do. - unsigned Align = getTypeAllocSize(cast(Ty)->getElementType()); - Align *= cast(Ty)->getNumElements(); - Align = PowerOf2Ceil(Align); - return Align; + unsigned Alignment = + getTypeAllocSize(cast(Ty)->getElementType()); + Alignment *= cast(Ty)->getNumElements(); + Alignment = PowerOf2Ceil(Alignment); + return Align(Alignment); } // If we still couldn't find a reasonable default alignment, fall back @@ -561,9 +570,9 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType, // approximation of reality, and if the user wanted something less // less conservative, they should have specified it explicitly in the data // layout. - unsigned Align = getTypeStoreSize(Ty); - Align = PowerOf2Ceil(Align); - return Align; + unsigned Alignment = getTypeStoreSize(Ty); + Alignment = PowerOf2Ceil(Alignment); + return Align(Alignment); } namespace { @@ -624,7 +633,7 @@ const StructLayout *DataLayout::getStructLayout(StructType *Ty) const { return L; } -unsigned DataLayout::getPointerABIAlignment(unsigned AS) const { +Align DataLayout::getPointerABIAlignment(unsigned AS) const { PointersTy::const_iterator I = findPointerLowerBound(AS); if (I == Pointers.end() || I->AddressSpace != AS) { I = findPointerLowerBound(0); @@ -633,7 +642,7 @@ unsigned DataLayout::getPointerABIAlignment(unsigned AS) const { return I->ABIAlign; } -unsigned DataLayout::getPointerPrefAlignment(unsigned AS) const { +Align DataLayout::getPointerPrefAlignment(unsigned AS) const { PointersTy::const_iterator I = findPointerLowerBound(AS); if (I == Pointers.end() || I->AddressSpace != AS) { I = findPointerLowerBound(0); @@ -690,21 +699,18 @@ unsigned DataLayout::getIndexTypeSizeInBits(Type *Ty) const { Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref == false) for the requested type \a Ty. */ -unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const { +Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const { AlignTypeEnum AlignType; assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); switch (Ty->getTypeID()) { // Early escape for the non-numeric types. case Type::LabelTyID: - return (abi_or_pref - ? getPointerABIAlignment(0) - : getPointerPrefAlignment(0)); + return abi_or_pref ? getPointerABIAlignment(0) : getPointerPrefAlignment(0); case Type::PointerTyID: { unsigned AS = cast(Ty)->getAddressSpace(); - return (abi_or_pref - ? getPointerABIAlignment(AS) - : getPointerPrefAlignment(AS)); + return abi_or_pref ? getPointerABIAlignment(AS) + : getPointerPrefAlignment(AS); } case Type::ArrayTyID: return getAlignment(cast(Ty)->getElementType(), abi_or_pref); @@ -712,11 +718,11 @@ unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const { case Type::StructTyID: { // Packed structure types always have an ABI alignment of one. if (cast(Ty)->isPacked() && abi_or_pref) - return 1; + return Align::None(); // Get the layout annotation... which is lazily created on demand. const StructLayout *Layout = getStructLayout(cast(Ty)); - unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty); + const Align Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty); return std::max(Align, Layout->getAlignment()); } case Type::IntegerTyID: @@ -740,27 +746,24 @@ unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const { llvm_unreachable("Bad type for getAlignment!!!"); } - return getAlignmentInfo(AlignType, getTypeSizeInBits(Ty), abi_or_pref, Ty); + // If we're dealing with a scalable vector, we just need the known minimum + // size for determining alignment. If not, we'll get the exact size. + return getAlignmentInfo(AlignType, getTypeSizeInBits(Ty).getKnownMinSize(), + abi_or_pref, Ty); } unsigned DataLayout::getABITypeAlignment(Type *Ty) const { - return getAlignment(Ty, true); + return getAlignment(Ty, true).value(); } /// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for /// an integer type of the specified bitwidth. -unsigned DataLayout::getABIIntegerTypeAlignment(unsigned BitWidth) const { +Align DataLayout::getABIIntegerTypeAlignment(unsigned BitWidth) const { return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, nullptr); } unsigned DataLayout::getPrefTypeAlignment(Type *Ty) const { - return getAlignment(Ty, false); -} - -unsigned DataLayout::getPreferredTypeAlignmentShift(Type *Ty) const { - unsigned Align = getPrefTypeAlignment(Ty); - assert(!(Align & (Align-1)) && "Alignment is not a power of two!"); - return Log2_32(Align); + return getAlignment(Ty, false).value(); } IntegerType *DataLayout::getIntPtrType(LLVMContext &C, diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index ce47ef20743..1bbe6b85d26 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -279,7 +279,7 @@ bool DebugInfoFinder::addScope(DIScope *Scope) { } static MDNode *stripDebugLocFromLoopID(MDNode *N) { - assert(!empty(N->operands()) && "Missing self reference?"); + assert(!N->operands().empty() && "Missing self reference?"); // if there is no debug location, we do not have to rewrite this MDNode. if (std::none_of(N->op_begin() + 1, N->op_end(), [](const MDOperand &Op) { @@ -929,6 +929,26 @@ const char *LLVMDIFileGetSource(LLVMMetadataRef File, unsigned *Len) { return ""; } +LLVMMetadataRef LLVMDIBuilderCreateMacro(LLVMDIBuilderRef Builder, + LLVMMetadataRef ParentMacroFile, + unsigned Line, + LLVMDWARFMacinfoRecordType RecordType, + const char *Name, size_t NameLen, + const char *Value, size_t ValueLen) { + return wrap( + unwrap(Builder)->createMacro(unwrapDI(ParentMacroFile), Line, + static_cast(RecordType), + {Name, NameLen}, {Value, ValueLen})); +} + +LLVMMetadataRef +LLVMDIBuilderCreateTempMacroFile(LLVMDIBuilderRef Builder, + LLVMMetadataRef ParentMacroFile, unsigned Line, + LLVMMetadataRef File) { + return wrap(unwrap(Builder)->createTempMacroFile( + unwrapDI(ParentMacroFile), Line, unwrapDI(File))); +} + LLVMMetadataRef LLVMDIBuilderCreateEnumerator(LLVMDIBuilderRef Builder, const char *Name, size_t NameLen, int64_t Value, diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp index 900df27d1d3..94ec3abfa7a 100644 --- a/lib/IR/DebugInfoMetadata.cpp +++ b/lib/IR/DebugInfoMetadata.cpp @@ -828,15 +828,23 @@ DIExpression *DIExpression::getImpl(LLVMContext &Context, } unsigned DIExpression::ExprOperand::getSize() const { - switch (getOp()) { + uint64_t Op = getOp(); + + if (Op >= dwarf::DW_OP_breg0 && Op <= dwarf::DW_OP_breg31) + return 2; + + switch (Op) { case dwarf::DW_OP_LLVM_convert: case dwarf::DW_OP_LLVM_fragment: + case dwarf::DW_OP_bregx: return 3; case dwarf::DW_OP_constu: + case dwarf::DW_OP_consts: case dwarf::DW_OP_deref_size: case dwarf::DW_OP_plus_uconst: case dwarf::DW_OP_LLVM_tag_offset: - case dwarf::DW_OP_entry_value: + case dwarf::DW_OP_LLVM_entry_value: + case dwarf::DW_OP_regx: return 2; default: return 1; @@ -849,8 +857,13 @@ bool DIExpression::isValid() const { if (I->get() + I->getSize() > E->get()) return false; + uint64_t Op = I->getOp(); + if ((Op >= dwarf::DW_OP_reg0 && Op <= dwarf::DW_OP_reg31) || + (Op >= dwarf::DW_OP_breg0 && Op <= dwarf::DW_OP_breg31)) + return true; + // Check that the operand is valid. - switch (I->getOp()) { + switch (Op) { default: return false; case dwarf::DW_OP_LLVM_fragment: @@ -877,10 +890,12 @@ bool DIExpression::isValid() const { return false; break; } - case dwarf::DW_OP_entry_value: { - // An entry value operator must appear at the begin and the size - // of following expression should be 1, because we support only - // entry values of a simple register location. + case dwarf::DW_OP_LLVM_entry_value: { + // An entry value operator must appear at the beginning and the number of + // operations it cover can currently only be 1, because we support only + // entry values of a simple register location. One reason for this is that + // we currently can't calculate the size of the resulting DWARF block for + // other expressions. return I->get() == expr_op_begin()->get() && I->getArg(0) == 1 && getNumElements() == 2; } @@ -905,6 +920,8 @@ bool DIExpression::isValid() const { case dwarf::DW_OP_lit0: case dwarf::DW_OP_not: case dwarf::DW_OP_dup: + case dwarf::DW_OP_regx: + case dwarf::DW_OP_bregx: break; } } @@ -1035,7 +1052,7 @@ DIExpression *DIExpression::prependOpcodes(const DIExpression *Expr, assert(Expr && "Can't prepend ops to this expression"); if (EntryValue) { - Ops.push_back(dwarf::DW_OP_entry_value); + Ops.push_back(dwarf::DW_OP_LLVM_entry_value); // Add size info needed for entry value expression. // Add plus one for target register operand. Ops.push_back(Expr->getNumElements() + 1); @@ -1146,6 +1163,7 @@ Optional DIExpression::createFragmentExpression( Op.appendToVector(Ops); } } + assert(Expr && "Unknown DIExpression"); Ops.push_back(dwarf::DW_OP_LLVM_fragment); Ops.push_back(OffsetInBits); Ops.push_back(SizeInBits); diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp index 4a8e3cca349..99d5aec3f04 100644 --- a/lib/IR/DiagnosticInfo.cpp +++ b/lib/IR/DiagnosticInfo.cpp @@ -370,5 +370,16 @@ std::string DiagnosticInfoOptimizationBase::getMsg() const { return OS.str(); } +DiagnosticInfoMisExpect::DiagnosticInfoMisExpect(const Instruction *Inst, + Twine &Msg) + : DiagnosticInfoWithLocationBase(DK_MisExpect, DS_Warning, + *Inst->getParent()->getParent(), + Inst->getDebugLoc()), + Msg(Msg) {} + +void DiagnosticInfoMisExpect::print(DiagnosticPrinter &DP) const { + DP << getLocationStr() << ": " << getMsg(); +} + void OptimizationRemarkAnalysisFPCommute::anchor() {} void OptimizationRemarkAnalysisAliasing::anchor() {} diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index dc28d22548d..3f70d2c904e 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -251,7 +251,7 @@ Function::Function(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, // We only need a symbol table for a function if the context keeps value names if (!getContext().shouldDiscardValueNames()) - SymTab = make_unique(); + SymTab = std::make_unique(); // If the function has arguments, mark them as lazily built. if (Ty->getNumParams()) @@ -293,7 +293,8 @@ void Function::BuildLazyArguments() const { // Clear the lazy arguments bit. unsigned SDC = getSubclassDataFromValue(); - const_cast(this)->setValueSubclassData(SDC &= ~(1<<0)); + SDC &= ~(1 << 0); + const_cast(this)->setValueSubclassData(SDC); assert(!hasLazyArguments()); } @@ -611,9 +612,11 @@ static std::string getMangledTypeStr(Type* Ty) { Result += "vararg"; // Ensure nested function types are distinguishable. Result += "f"; - } else if (isa(Ty)) { - Result += "v" + utostr(Ty->getVectorNumElements()) + - getMangledTypeStr(Ty->getVectorElementType()); + } else if (VectorType* VTy = dyn_cast(Ty)) { + if (VTy->isScalable()) + Result += "nx"; + Result += "v" + utostr(VTy->getVectorNumElements()) + + getMangledTypeStr(VTy->getVectorElementType()); } else if (Ty) { switch (Ty->getTypeID()) { default: llvm_unreachable("Unhandled type"); @@ -700,7 +703,11 @@ enum IIT_Info { IIT_STRUCT7 = 39, IIT_STRUCT8 = 40, IIT_F128 = 41, - IIT_VEC_ELEMENT = 42 + IIT_VEC_ELEMENT = 42, + IIT_SCALABLE_VEC = 43, + IIT_SUBDIVIDE2_ARG = 44, + IIT_SUBDIVIDE4_ARG = 45, + IIT_VEC_OF_BITCASTS_TO_INT = 46 }; static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, @@ -865,12 +872,36 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, DecodeIITType(NextElt, Infos, OutputTable); return; } + case IIT_SUBDIVIDE2_ARG: { + unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Subdivide2Argument, + ArgInfo)); + return; + } + case IIT_SUBDIVIDE4_ARG: { + unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Subdivide4Argument, + ArgInfo)); + return; + } case IIT_VEC_ELEMENT: { unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecElementArgument, ArgInfo)); return; } + case IIT_SCALABLE_VEC: { + OutputTable.push_back(IITDescriptor::get(IITDescriptor::ScalableVecArgument, + 0)); + DecodeIITType(NextElt, Infos, OutputTable); + return; + } + case IIT_VEC_OF_BITCASTS_TO_INT: { + unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecOfBitcastsToInt, + ArgInfo)); + return; + } } llvm_unreachable("unhandled"); } @@ -961,6 +992,14 @@ static Type *DecodeFixedType(ArrayRef &Infos, assert(ITy->getBitWidth() % 2 == 0); return IntegerType::get(Context, ITy->getBitWidth() / 2); } + case IITDescriptor::Subdivide2Argument: + case IITDescriptor::Subdivide4Argument: { + Type *Ty = Tys[D.getArgumentNumber()]; + VectorType *VTy = dyn_cast(Ty); + assert(VTy && "Expected an argument of Vector Type"); + int SubDivs = D.Kind == IITDescriptor::Subdivide2Argument ? 1 : 2; + return VectorType::getSubdividedVectorType(VTy, SubDivs); + } case IITDescriptor::HalfVecArgument: return VectorType::getHalfElementsVectorType(cast( Tys[D.getArgumentNumber()])); @@ -968,7 +1007,7 @@ static Type *DecodeFixedType(ArrayRef &Infos, Type *EltTy = DecodeFixedType(Infos, Tys, Context); Type *Ty = Tys[D.getArgumentNumber()]; if (auto *VTy = dyn_cast(Ty)) - return VectorType::get(EltTy, VTy->getNumElements()); + return VectorType::get(EltTy, VTy->getElementCount()); return EltTy; } case IITDescriptor::PtrToArgument: { @@ -989,9 +1028,20 @@ static Type *DecodeFixedType(ArrayRef &Infos, return VTy->getElementType(); llvm_unreachable("Expected an argument of Vector Type"); } + case IITDescriptor::VecOfBitcastsToInt: { + Type *Ty = Tys[D.getArgumentNumber()]; + VectorType *VTy = dyn_cast(Ty); + assert(VTy && "Expected an argument of Vector Type"); + return VectorType::getInteger(VTy); + } case IITDescriptor::VecOfAnyPtrsToElt: // Return the overloaded type (which determines the pointers address space) return Tys[D.getOverloadArgNumber()]; + case IITDescriptor::ScalableVecArgument: { + Type *Ty = DecodeFixedType(Infos, Tys, Context); + return VectorType::get(Ty->getVectorElementType(), + { Ty->getVectorNumElements(), true }); + } } llvm_unreachable("unhandled"); } @@ -1174,8 +1224,9 @@ static bool matchIntrinsicType( } case IITDescriptor::HalfVecArgument: // If this is a forward reference, defer the check for later. - return D.getArgumentNumber() >= ArgTys.size() || - !isa(ArgTys[D.getArgumentNumber()]) || + if (D.getArgumentNumber() >= ArgTys.size()) + return IsDeferredCheck || DeferCheck(Ty); + return !isa(ArgTys[D.getArgumentNumber()]) || VectorType::getHalfElementsVectorType( cast(ArgTys[D.getArgumentNumber()])) != Ty; case IITDescriptor::SameVecWidthArgument: { @@ -1191,8 +1242,8 @@ static bool matchIntrinsicType( return true; Type *EltTy = Ty; if (ThisArgType) { - if (ReferenceType->getVectorNumElements() != - ThisArgType->getVectorNumElements()) + if (ReferenceType->getElementCount() != + ThisArgType->getElementCount()) return true; EltTy = ThisArgType->getVectorElementType(); } @@ -1255,6 +1306,36 @@ static bool matchIntrinsicType( auto *ReferenceType = dyn_cast(ArgTys[D.getArgumentNumber()]); return !ReferenceType || Ty != ReferenceType->getElementType(); } + case IITDescriptor::Subdivide2Argument: + case IITDescriptor::Subdivide4Argument: { + // If this is a forward reference, defer the check for later. + if (D.getArgumentNumber() >= ArgTys.size()) + return IsDeferredCheck || DeferCheck(Ty); + + Type *NewTy = ArgTys[D.getArgumentNumber()]; + if (auto *VTy = dyn_cast(NewTy)) { + int SubDivs = D.Kind == IITDescriptor::Subdivide2Argument ? 1 : 2; + NewTy = VectorType::getSubdividedVectorType(VTy, SubDivs); + return Ty != NewTy; + } + return true; + } + case IITDescriptor::ScalableVecArgument: { + VectorType *VTy = dyn_cast(Ty); + if (!VTy || !VTy->isScalable()) + return true; + return matchIntrinsicType(VTy, Infos, ArgTys, DeferredChecks, + IsDeferredCheck); + } + case IITDescriptor::VecOfBitcastsToInt: { + if (D.getArgumentNumber() >= ArgTys.size()) + return IsDeferredCheck || DeferCheck(Ty); + auto *ReferenceType = dyn_cast(ArgTys[D.getArgumentNumber()]); + auto *ThisArgVecTy = dyn_cast(Ty); + if (!ThisArgVecTy || !ReferenceType) + return true; + return ThisArgVecTy != VectorType::getInteger(ReferenceType); + } } llvm_unreachable("unhandled"); } diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp index e2bfc0420bc..46a9696b294 100644 --- a/lib/IR/Globals.cpp +++ b/lib/IR/Globals.cpp @@ -114,18 +114,22 @@ unsigned GlobalValue::getAddressSpace() const { } void GlobalObject::setAlignment(unsigned Align) { - assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!"); - assert(Align <= MaximumAlignment && + setAlignment(MaybeAlign(Align)); +} + +void GlobalObject::setAlignment(MaybeAlign Align) { + assert((!Align || Align <= MaximumAlignment) && "Alignment is greater than MaximumAlignment!"); - unsigned AlignmentData = Log2_32(Align) + 1; + unsigned AlignmentData = encode(Align); unsigned OldData = getGlobalValueSubClassData(); setGlobalValueSubClassData((OldData & ~AlignmentMask) | AlignmentData); - assert(getAlignment() == Align && "Alignment representation error!"); + assert(MaybeAlign(getAlignment()) == Align && + "Alignment representation error!"); } void GlobalObject::copyAttributesFrom(const GlobalObject *Src) { GlobalValue::copyAttributesFrom(Src); - setAlignment(Src->getAlignment()); + setAlignment(MaybeAlign(Src->getAlignment())); setSection(Src->getSection()); } @@ -427,6 +431,43 @@ GlobalIndirectSymbol::GlobalIndirectSymbol(Type *Ty, ValueTy VTy, Op<0>() = Symbol; } +static const GlobalObject * +findBaseObject(const Constant *C, DenseSet &Aliases) { + if (auto *GO = dyn_cast(C)) + return GO; + if (auto *GA = dyn_cast(C)) + if (Aliases.insert(GA).second) + return findBaseObject(GA->getOperand(0), Aliases); + if (auto *CE = dyn_cast(C)) { + switch (CE->getOpcode()) { + case Instruction::Add: { + auto *LHS = findBaseObject(CE->getOperand(0), Aliases); + auto *RHS = findBaseObject(CE->getOperand(1), Aliases); + if (LHS && RHS) + return nullptr; + return LHS ? LHS : RHS; + } + case Instruction::Sub: { + if (findBaseObject(CE->getOperand(1), Aliases)) + return nullptr; + return findBaseObject(CE->getOperand(0), Aliases); + } + case Instruction::IntToPtr: + case Instruction::PtrToInt: + case Instruction::BitCast: + case Instruction::GetElementPtr: + return findBaseObject(CE->getOperand(0), Aliases); + default: + break; + } + } + return nullptr; +} + +const GlobalObject *GlobalIndirectSymbol::getBaseObject() const { + DenseSet Aliases; + return findBaseObject(getOperand(0), Aliases); +} //===----------------------------------------------------------------------===// // GlobalAlias Implementation diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp index 0c6461c9078..b782012e973 100644 --- a/lib/IR/IRBuilder.cpp +++ b/lib/IR/IRBuilder.cpp @@ -49,7 +49,7 @@ GlobalVariable *IRBuilderBase::CreateGlobalString(StringRef Str, nullptr, GlobalVariable::NotThreadLocal, AddressSpace); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - GV->setAlignment(1); + GV->setAlignment(Align::None()); return GV; } @@ -289,8 +289,10 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemMove( CallInst *CI = createCallHelper(TheFn, Ops, this); // Set the alignment of the pointer args. - CI->addParamAttr(0, Attribute::getWithAlignment(CI->getContext(), DstAlign)); - CI->addParamAttr(1, Attribute::getWithAlignment(CI->getContext(), SrcAlign)); + CI->addParamAttr( + 0, Attribute::getWithAlignment(CI->getContext(), Align(DstAlign))); + CI->addParamAttr( + 1, Attribute::getWithAlignment(CI->getContext(), Align(SrcAlign))); // Set the TBAA info if present. if (TBAATag) diff --git a/lib/IR/IRPrintingPasses.cpp b/lib/IR/IRPrintingPasses.cpp index 35b06135a82..953cf941016 100644 --- a/lib/IR/IRPrintingPasses.cpp +++ b/lib/IR/IRPrintingPasses.cpp @@ -26,14 +26,22 @@ PrintModulePass::PrintModulePass(raw_ostream &OS, const std::string &Banner, ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {} PreservedAnalyses PrintModulePass::run(Module &M, ModuleAnalysisManager &) { - if (!Banner.empty()) - OS << Banner << "\n"; - if (llvm::isFunctionInPrintList("*")) + if (llvm::isFunctionInPrintList("*")) { + if (!Banner.empty()) + OS << Banner << "\n"; M.print(OS, nullptr, ShouldPreserveUseListOrder); + } else { - for(const auto &F : M.functions()) - if (llvm::isFunctionInPrintList(F.getName())) + bool BannerPrinted = false; + for(const auto &F : M.functions()) { + if (llvm::isFunctionInPrintList(F.getName())) { + if (!BannerPrinted && !Banner.empty()) { + OS << Banner << "\n"; + BannerPrinted = true; + } F.print(OS); + } + } } return PreservedAnalyses::all(); } diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp index 99da7caaccf..fd732f9eda8 100644 --- a/lib/IR/InlineAsm.cpp +++ b/lib/IR/InlineAsm.cpp @@ -181,6 +181,16 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str, // FIXME: For now assuming these are 2-character constraints. pCodes->push_back(StringRef(I+1, 2)); I += 3; + } else if (*I == '@') { + // Multi-letter constraint + ++I; + unsigned char C = static_cast(*I); + assert(isdigit(C) && "Expected a digit!"); + int N = C - '0'; + assert(N > 0 && "Found a zero letter constraint!"); + ++I; + pCodes->push_back(StringRef(I, N)); + I += N; } else { // Single letter constraint. pCodes->push_back(StringRef(I, 1)); diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index ba5629d1662..b157c7bb34b 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -524,7 +524,7 @@ bool Instruction::mayReadFromMemory() const { case Instruction::Call: case Instruction::Invoke: case Instruction::CallBr: - return !cast(this)->doesNotAccessMemory(); + return !cast(this)->doesNotReadMemory(); case Instruction::Store: return !cast(this)->isUnordered(); } diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 2e7cad103c1..245c7628b08 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -38,6 +38,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/TypeSize.h" #include #include #include @@ -45,12 +46,6 @@ using namespace llvm; -static cl::opt SwitchInstProfUpdateWrapperStrict( - "switch-inst-prof-update-wrapper-strict", cl::Hidden, - cl::desc("Assert that prof branch_weights metadata is valid when creating " - "an instance of SwitchInstProfUpdateWrapper"), - cl::init(false)); - //===----------------------------------------------------------------------===// // AllocaInst Class //===----------------------------------------------------------------------===// @@ -822,6 +817,17 @@ void CallBrInst::init(FunctionType *FTy, Value *Fn, BasicBlock *Fallthrough, setName(NameStr); } +void CallBrInst::updateArgBlockAddresses(unsigned i, BasicBlock *B) { + assert(getNumIndirectDests() > i && "IndirectDest # out of range for callbr"); + if (BasicBlock *OldBB = getIndirectDest(i)) { + BlockAddress *Old = BlockAddress::get(OldBB); + BlockAddress *New = BlockAddress::get(B); + for (unsigned ArgNo = 0, e = getNumArgOperands(); ArgNo != e; ++ArgNo) + if (dyn_cast(getArgOperand(ArgNo)) == Old) + setArgOperand(ArgNo, New); + } +} + CallBrInst::CallBrInst(const CallBrInst &CBI) : CallBase(CBI.Attrs, CBI.FTy, CBI.getType(), Instruction::CallBr, OperandTraits::op_end(this) - CBI.getNumOperands(), @@ -1223,7 +1229,7 @@ AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, : UnaryInstruction(PointerType::get(Ty, AddrSpace), Alloca, getAISize(Ty->getContext(), ArraySize), InsertBefore), AllocatedType(Ty) { - setAlignment(Align); + setAlignment(MaybeAlign(Align)); assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); } @@ -1234,18 +1240,21 @@ AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, : UnaryInstruction(PointerType::get(Ty, AddrSpace), Alloca, getAISize(Ty->getContext(), ArraySize), InsertAtEnd), AllocatedType(Ty) { - setAlignment(Align); + setAlignment(MaybeAlign(Align)); assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); } -void AllocaInst::setAlignment(unsigned Align) { - assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!"); - assert(Align <= MaximumAlignment && +void AllocaInst::setAlignment(MaybeAlign Align) { + assert((!Align || *Align <= MaximumAlignment) && "Alignment is greater than MaximumAlignment!"); setInstructionSubclassData((getSubclassDataFromInstruction() & ~31) | - (Log2_32(Align) + 1)); - assert(getAlignment() == Align && "Alignment representation error!"); + encode(Align)); + if (Align) + assert(getAlignment() == Align->value() && + "Alignment representation error!"); + else + assert(getAlignment() == 0 && "Alignment representation error!"); } bool AllocaInst::isArrayAllocation() const { @@ -1287,36 +1296,36 @@ LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, Instruction *InsertBef) - : LoadInst(Ty, Ptr, Name, isVolatile, /*Align=*/0, InsertBef) {} + : LoadInst(Ty, Ptr, Name, isVolatile, /*Align=*/None, InsertBef) {} LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, BasicBlock *InsertAE) - : LoadInst(Ty, Ptr, Name, isVolatile, /*Align=*/0, InsertAE) {} + : LoadInst(Ty, Ptr, Name, isVolatile, /*Align=*/None, InsertAE) {} LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, - unsigned Align, Instruction *InsertBef) + MaybeAlign Align, Instruction *InsertBef) : LoadInst(Ty, Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic, SyncScope::System, InsertBef) {} LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, - unsigned Align, BasicBlock *InsertAE) + MaybeAlign Align, BasicBlock *InsertAE) : LoadInst(Ty, Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic, SyncScope::System, InsertAE) {} LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, - unsigned Align, AtomicOrdering Order, - SyncScope::ID SSID, Instruction *InsertBef) + MaybeAlign Align, AtomicOrdering Order, SyncScope::ID SSID, + Instruction *InsertBef) : UnaryInstruction(Ty, Load, Ptr, InsertBef) { assert(Ty == cast(Ptr->getType())->getElementType()); setVolatile(isVolatile); - setAlignment(Align); + setAlignment(MaybeAlign(Align)); setAtomic(Order, SSID); AssertOK(); setName(Name); } LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, - unsigned Align, AtomicOrdering Order, SyncScope::ID SSID, + MaybeAlign Align, AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAE) : UnaryInstruction(Ty, Load, Ptr, InsertAE) { assert(Ty == cast(Ptr->getType())->getElementType()); @@ -1327,13 +1336,16 @@ LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, setName(Name); } -void LoadInst::setAlignment(unsigned Align) { - assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!"); - assert(Align <= MaximumAlignment && +void LoadInst::setAlignment(MaybeAlign Align) { + assert((!Align || *Align <= MaximumAlignment) && "Alignment is greater than MaximumAlignment!"); setInstructionSubclassData((getSubclassDataFromInstruction() & ~(31 << 1)) | - ((Log2_32(Align)+1)<<1)); - assert(getAlignment() == Align && "Alignment representation error!"); + (encode(Align) << 1)); + if (Align) + assert(getAlignment() == Align->value() && + "Alignment representation error!"); + else + assert(getAlignment() == 0 && "Alignment representation error!"); } //===----------------------------------------------------------------------===// @@ -1359,30 +1371,28 @@ StoreInst::StoreInst(Value *val, Value *addr, BasicBlock *InsertAtEnd) StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Instruction *InsertBefore) - : StoreInst(val, addr, isVolatile, /*Align=*/0, InsertBefore) {} + : StoreInst(val, addr, isVolatile, /*Align=*/None, InsertBefore) {} StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, BasicBlock *InsertAtEnd) - : StoreInst(val, addr, isVolatile, /*Align=*/0, InsertAtEnd) {} + : StoreInst(val, addr, isVolatile, /*Align=*/None, InsertAtEnd) {} -StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, +StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, MaybeAlign Align, Instruction *InsertBefore) : StoreInst(val, addr, isVolatile, Align, AtomicOrdering::NotAtomic, SyncScope::System, InsertBefore) {} -StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, +StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, MaybeAlign Align, BasicBlock *InsertAtEnd) : StoreInst(val, addr, isVolatile, Align, AtomicOrdering::NotAtomic, SyncScope::System, InsertAtEnd) {} -StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, - unsigned Align, AtomicOrdering Order, - SyncScope::ID SSID, +StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, MaybeAlign Align, + AtomicOrdering Order, SyncScope::ID SSID, Instruction *InsertBefore) - : Instruction(Type::getVoidTy(val->getContext()), Store, - OperandTraits::op_begin(this), - OperandTraits::operands(this), - InsertBefore) { + : Instruction(Type::getVoidTy(val->getContext()), Store, + OperandTraits::op_begin(this), + OperandTraits::operands(this), InsertBefore) { Op<0>() = val; Op<1>() = addr; setVolatile(isVolatile); @@ -1391,14 +1401,12 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, AssertOK(); } -StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, - unsigned Align, AtomicOrdering Order, - SyncScope::ID SSID, +StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, MaybeAlign Align, + AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd) - : Instruction(Type::getVoidTy(val->getContext()), Store, - OperandTraits::op_begin(this), - OperandTraits::operands(this), - InsertAtEnd) { + : Instruction(Type::getVoidTy(val->getContext()), Store, + OperandTraits::op_begin(this), + OperandTraits::operands(this), InsertAtEnd) { Op<0>() = val; Op<1>() = addr; setVolatile(isVolatile); @@ -1407,13 +1415,16 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, AssertOK(); } -void StoreInst::setAlignment(unsigned Align) { - assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!"); - assert(Align <= MaximumAlignment && +void StoreInst::setAlignment(MaybeAlign Align) { + assert((!Align || *Align <= MaximumAlignment) && "Alignment is greater than MaximumAlignment!"); setInstructionSubclassData((getSubclassDataFromInstruction() & ~(31 << 1)) | - ((Log2_32(Align)+1) << 1)); - assert(getAlignment() == Align && "Alignment representation error!"); + (encode(Align) << 1)); + if (Align) + assert(getAlignment() == Align->value() && + "Alignment representation error!"); + else + assert(getAlignment() == 0 && "Alignment representation error!"); } //===----------------------------------------------------------------------===// @@ -1778,7 +1789,7 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask, const Twine &Name, Instruction *InsertBefore) : Instruction(VectorType::get(cast(V1->getType())->getElementType(), - cast(Mask->getType())->getNumElements()), + cast(Mask->getType())->getElementCount()), ShuffleVector, OperandTraits::op_begin(this), OperandTraits::operands(this), @@ -1795,7 +1806,7 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask, const Twine &Name, BasicBlock *InsertAtEnd) : Instruction(VectorType::get(cast(V1->getType())->getElementType(), - cast(Mask->getType())->getNumElements()), + cast(Mask->getType())->getElementCount()), ShuffleVector, OperandTraits::op_begin(this), OperandTraits::operands(this), @@ -2968,8 +2979,8 @@ bool CastInst::isCastable(Type *SrcTy, Type *DestTy) { } // Get the bit sizes, we'll need these - unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr - unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr + TypeSize SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr + TypeSize DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr // Run through the possibilities ... if (DestTy->isIntegerTy()) { // Casting to integral @@ -3016,7 +3027,7 @@ bool CastInst::isBitCastable(Type *SrcTy, Type *DestTy) { if (VectorType *SrcVecTy = dyn_cast(SrcTy)) { if (VectorType *DestVecTy = dyn_cast(DestTy)) { - if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) { + if (SrcVecTy->getElementCount() == DestVecTy->getElementCount()) { // An element by element cast. Valid if casting the elements is valid. SrcTy = SrcVecTy->getElementType(); DestTy = DestVecTy->getElementType(); @@ -3030,12 +3041,12 @@ bool CastInst::isBitCastable(Type *SrcTy, Type *DestTy) { } } - unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr - unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr + TypeSize SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr + TypeSize DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr // Could still have vectors of pointers if the number of elements doesn't // match - if (SrcBits == 0 || DestBits == 0) + if (SrcBits.getKnownMinSize() == 0 || DestBits.getKnownMinSize() == 0) return false; if (SrcBits != DestBits) @@ -3886,7 +3897,7 @@ SwitchInstProfUpdateWrapper::getProfBranchWeightsMD(const SwitchInst &SI) { } MDNode *SwitchInstProfUpdateWrapper::buildProfBranchWeightsMD() { - assert(State == Changed && "called only if metadata has changed"); + assert(Changed && "called only if metadata has changed"); if (!Weights) return nullptr; @@ -3905,17 +3916,12 @@ MDNode *SwitchInstProfUpdateWrapper::buildProfBranchWeightsMD() { void SwitchInstProfUpdateWrapper::init() { MDNode *ProfileData = getProfBranchWeightsMD(SI); - if (!ProfileData) { - State = Initialized; + if (!ProfileData) return; - } if (ProfileData->getNumOperands() != SI.getNumSuccessors() + 1) { - State = Invalid; - if (SwitchInstProfUpdateWrapperStrict) - llvm_unreachable("number of prof branch_weights metadata operands does " - "not correspond to number of succesors"); - return; + llvm_unreachable("number of prof branch_weights metadata operands does " + "not correspond to number of succesors"); } SmallVector Weights; @@ -3924,7 +3930,6 @@ void SwitchInstProfUpdateWrapper::init() { uint32_t CW = C->getValue().getZExtValue(); Weights.push_back(CW); } - State = Initialized; this->Weights = std::move(Weights); } @@ -3933,7 +3938,7 @@ SwitchInstProfUpdateWrapper::removeCase(SwitchInst::CaseIt I) { if (Weights) { assert(SI.getNumSuccessors() == Weights->size() && "num of prof branch_weights must accord with num of successors"); - State = Changed; + Changed = true; // Copy the last case to the place of the removed one and shrink. // This is tightly coupled with the way SwitchInst::removeCase() removes // the cases in SwitchInst::removeCase(CaseIt). @@ -3948,15 +3953,12 @@ void SwitchInstProfUpdateWrapper::addCase( SwitchInstProfUpdateWrapper::CaseWeightOpt W) { SI.addCase(OnVal, Dest); - if (State == Invalid) - return; - if (!Weights && W && *W) { - State = Changed; + Changed = true; Weights = SmallVector(SI.getNumSuccessors(), 0); Weights.getValue()[SI.getNumSuccessors() - 1] = *W; } else if (Weights) { - State = Changed; + Changed = true; Weights.getValue().push_back(W ? *W : 0); } if (Weights) @@ -3967,11 +3969,9 @@ void SwitchInstProfUpdateWrapper::addCase( SymbolTableList::iterator SwitchInstProfUpdateWrapper::eraseFromParent() { // Instruction is erased. Mark as unchanged to not touch it in the destructor. - if (State != Invalid) { - State = Initialized; - if (Weights) - Weights->resize(0); - } + Changed = false; + if (Weights) + Weights->resize(0); return SI.eraseFromParent(); } @@ -3984,7 +3984,7 @@ SwitchInstProfUpdateWrapper::getSuccessorWeight(unsigned idx) { void SwitchInstProfUpdateWrapper::setSuccessorWeight( unsigned idx, SwitchInstProfUpdateWrapper::CaseWeightOpt W) { - if (!W || State == Invalid) + if (!W) return; if (!Weights && *W) @@ -3993,7 +3993,7 @@ void SwitchInstProfUpdateWrapper::setSuccessorWeight( if (Weights) { auto &OldW = Weights.getValue()[idx]; if (*W != OldW) { - State = Changed; + Changed = true; OldW = *W; } } @@ -4136,13 +4136,14 @@ AllocaInst *AllocaInst::cloneImpl() const { LoadInst *LoadInst::cloneImpl() const { return new LoadInst(getType(), getOperand(0), Twine(), isVolatile(), - getAlignment(), getOrdering(), getSyncScopeID()); + MaybeAlign(getAlignment()), getOrdering(), + getSyncScopeID()); } StoreInst *StoreInst::cloneImpl() const { return new StoreInst(getOperand(0), getOperand(1), isVolatile(), - getAlignment(), getOrdering(), getSyncScopeID()); - + MaybeAlign(getAlignment()), getOrdering(), + getSyncScopeID()); } AtomicCmpXchgInst *AtomicCmpXchgInst::cloneImpl() const { diff --git a/lib/IR/IntrinsicInst.cpp b/lib/IR/IntrinsicInst.cpp index 7a042326f67..26ed46a9cd9 100644 --- a/lib/IR/IntrinsicInst.cpp +++ b/lib/IR/IntrinsicInst.cpp @@ -67,13 +67,12 @@ int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef NameTable, // size 1. During the search, we can skip the prefix that we already know is // identical. By using strncmp we consider names with differing suffixes to // be part of the equal range. - size_t CmpStart = 0; size_t CmpEnd = 4; // Skip the "llvm" component. const char *const *Low = NameTable.begin(); const char *const *High = NameTable.end(); const char *const *LastLow = Low; while (CmpEnd < Name.size() && High - Low > 0) { - CmpStart = CmpEnd; + size_t CmpStart = CmpEnd; CmpEnd = Name.find('.', CmpStart + 1); CmpEnd = CmpEnd == StringRef::npos ? Name.size() : CmpEnd; auto Cmp = [CmpStart, CmpEnd](const char *LHS, const char *RHS) { @@ -107,7 +106,7 @@ Optional ConstrainedFPIntrinsic::getRoundingMode() const { unsigned NumOperands = getNumArgOperands(); Metadata *MD = - dyn_cast(getArgOperand(NumOperands - 2))->getMetadata(); + cast(getArgOperand(NumOperands - 2))->getMetadata(); if (!MD || !isa(MD)) return None; return StrToRoundingMode(cast(MD)->getString()); @@ -143,7 +142,7 @@ ConstrainedFPIntrinsic::RoundingModeToStr(RoundingMode UseRounding) { RoundingStr = "round.upward"; break; case ConstrainedFPIntrinsic::rmTowardZero: - RoundingStr = "round.tozero"; + RoundingStr = "round.towardzero"; break; } return RoundingStr; @@ -153,7 +152,7 @@ Optional ConstrainedFPIntrinsic::getExceptionBehavior() const { unsigned NumOperands = getNumArgOperands(); Metadata *MD = - dyn_cast(getArgOperand(NumOperands - 1))->getMetadata(); + cast(getArgOperand(NumOperands - 1))->getMetadata(); if (!MD || !isa(MD)) return None; return StrToExceptionBehavior(cast(MD)->getString()); @@ -189,6 +188,8 @@ bool ConstrainedFPIntrinsic::isUnaryOp() const { switch (getIntrinsicID()) { default: return false; + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: @@ -199,10 +200,14 @@ bool ConstrainedFPIntrinsic::isUnaryOp() const { case Intrinsic::experimental_constrained_log: case Intrinsic::experimental_constrained_log10: case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_lrint: + case Intrinsic::experimental_constrained_llrint: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_ceil: case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_lround: + case Intrinsic::experimental_constrained_llround: case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_trunc: return true; diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp index e1cdf6b539d..5e8772186a2 100644 --- a/lib/IR/LLVMContext.cpp +++ b/lib/IR/LLVMContext.cpp @@ -36,34 +36,9 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { // Create the fixed metadata kinds. This is done in the same order as the // MD_* enum values so that they correspond. std::pair MDKinds[] = { - {MD_dbg, "dbg"}, - {MD_tbaa, "tbaa"}, - {MD_prof, "prof"}, - {MD_fpmath, "fpmath"}, - {MD_range, "range"}, - {MD_tbaa_struct, "tbaa.struct"}, - {MD_invariant_load, "invariant.load"}, - {MD_alias_scope, "alias.scope"}, - {MD_noalias, "noalias"}, - {MD_nontemporal, "nontemporal"}, - {MD_mem_parallel_loop_access, "llvm.mem.parallel_loop_access"}, - {MD_nonnull, "nonnull"}, - {MD_dereferenceable, "dereferenceable"}, - {MD_dereferenceable_or_null, "dereferenceable_or_null"}, - {MD_make_implicit, "make.implicit"}, - {MD_unpredictable, "unpredictable"}, - {MD_invariant_group, "invariant.group"}, - {MD_align, "align"}, - {MD_loop, "llvm.loop"}, - {MD_type, "type"}, - {MD_section_prefix, "section_prefix"}, - {MD_absolute_symbol, "absolute_symbol"}, - {MD_associated, "associated"}, - {MD_callees, "callees"}, - {MD_irr_loop, "irr_loop"}, - {MD_access_group, "llvm.access.group"}, - {MD_callback, "callback"}, - {MD_preserve_access_index, "llvm.preserve.access.index"}, +#define LLVM_FIXED_MD_KIND(EnumID, Name, Value) {EnumID, Name}, +#include "llvm/IR/FixedMetadataKinds.def" +#undef LLVM_FIXED_MD_KIND }; for (auto &MDKind : MDKinds) { diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp index c6ab2c6f213..5f978271417 100644 --- a/lib/IR/LLVMContextImpl.cpp +++ b/lib/IR/LLVMContextImpl.cpp @@ -21,7 +21,7 @@ using namespace llvm; LLVMContextImpl::LLVMContextImpl(LLVMContext &C) - : DiagHandler(llvm::make_unique()), + : DiagHandler(std::make_unique()), VoidTy(C, Type::VoidTyID), LabelTy(C, Type::LabelTyID), HalfTy(C, Type::HalfTyID), diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp index c575d6e782b..3a03c493100 100644 --- a/lib/IR/LegacyPassManager.cpp +++ b/lib/IR/LegacyPassManager.cpp @@ -1680,7 +1680,6 @@ bool FPPassManager::runOnFunction(Function &F) { bool FPPassManager::runOnModule(Module &M) { bool Changed = false; - llvm::TimeTraceScope TimeScope("OptModule", M.getName()); for (Function &F : M) Changed |= runOnFunction(F); @@ -1999,10 +1998,28 @@ void FunctionPass::assignPassManager(PMStack &PMS, FPP->add(this); } +void BasicBlockPass::preparePassManager(PMStack &PMS) { + // Find BBPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_BasicBlockPassManager) + PMS.pop(); + + // If this pass is destroying high level information that is used + // by other passes that are managed by BBPM then do not insert + // this pass in current BBPM. Use new BBPassManager. + if (PMS.top()->getPassManagerType() == PMT_BasicBlockPassManager && + !PMS.top()->preserveHigherLevelAnalysis(this)) + PMS.pop(); +} + /// Find appropriate Basic Pass Manager or Call Graph Pass Manager /// in the PM Stack and add self into that manager. void BasicBlockPass::assignPassManager(PMStack &PMS, PassManagerType PreferredType) { + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_BasicBlockPassManager) + PMS.pop(); + BBPassManager *BBP; // Basic Pass Manager is a leaf pass manager. It does not handle @@ -2018,6 +2035,7 @@ void BasicBlockPass::assignPassManager(PMStack &PMS, // [1] Create new Basic Block Manager BBP = new BBPassManager(); + BBP->populateInheritedAnalysis(PMS); // [2] Set up new manager's top level manager // Basic Block Pass Manager does not live by itself diff --git a/lib/IR/MDBuilder.cpp b/lib/IR/MDBuilder.cpp index 14bcb3a29b0..7bdb85ace52 100644 --- a/lib/IR/MDBuilder.cpp +++ b/lib/IR/MDBuilder.cpp @@ -309,3 +309,15 @@ MDNode *MDBuilder::createIrrLoopHeaderWeight(uint64_t Weight) { }; return MDNode::get(Context, Vals); } + +MDNode *MDBuilder::createMisExpect(uint64_t Index, uint64_t LikleyWeight, + uint64_t UnlikleyWeight) { + auto *IntType = Type::getInt64Ty(Context); + Metadata *Vals[] = { + createString("misexpect"), + createConstant(ConstantInt::get(IntType, Index)), + createConstant(ConstantInt::get(IntType, LikleyWeight)), + createConstant(ConstantInt::get(IntType, UnlikleyWeight)), + }; + return MDNode::get(Context, Vals); +} diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp index 748a2238e64..62c2aa86f3b 100644 --- a/lib/IR/Metadata.cpp +++ b/lib/IR/Metadata.cpp @@ -1497,6 +1497,24 @@ void GlobalObject::addTypeMetadata(unsigned Offset, Metadata *TypeID) { TypeID})); } +void GlobalObject::addVCallVisibilityMetadata(VCallVisibility Visibility) { + addMetadata(LLVMContext::MD_vcall_visibility, + *MDNode::get(getContext(), + {ConstantAsMetadata::get(ConstantInt::get( + Type::getInt64Ty(getContext()), Visibility))})); +} + +GlobalObject::VCallVisibility GlobalObject::getVCallVisibility() const { + if (MDNode *MD = getMetadata(LLVMContext::MD_vcall_visibility)) { + uint64_t Val = cast( + cast(MD->getOperand(0))->getValue()) + ->getZExtValue(); + assert(Val <= 2 && "unknown vcall visibility!"); + return (VCallVisibility)Val; + } + return VCallVisibility::VCallVisibilityPublic; +} + void Function::setSubprogram(DISubprogram *SP) { setMetadata(LLVMContext::MD_dbg, SP); } diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp index dbf4035ac7c..25efd009194 100644 --- a/lib/IR/Module.cpp +++ b/lib/IR/Module.cpp @@ -604,7 +604,7 @@ GlobalVariable *llvm::collectUsedGlobalVariables( const ConstantArray *Init = cast(GV->getInitializer()); for (Value *Op : Init->operands()) { - GlobalValue *G = cast(Op->stripPointerCastsNoFollowAliases()); + GlobalValue *G = cast(Op->stripPointerCasts()); Set.insert(G); } return GV; diff --git a/lib/IR/RemarkStreamer.cpp b/lib/IR/RemarkStreamer.cpp index 5b4c7e72b47..0fcc06b961f 100644 --- a/lib/IR/RemarkStreamer.cpp +++ b/lib/IR/RemarkStreamer.cpp @@ -15,15 +15,17 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/Remarks/BitstreamRemarkSerializer.h" #include "llvm/Remarks/RemarkFormat.h" +#include "llvm/Remarks/RemarkSerializer.h" using namespace llvm; -RemarkStreamer::RemarkStreamer(StringRef Filename, - std::unique_ptr Serializer) - : Filename(Filename), PassFilter(), Serializer(std::move(Serializer)) { - assert(!Filename.empty() && "This needs to be a real filename."); -} +RemarkStreamer::RemarkStreamer( + std::unique_ptr RemarkSerializer, + Optional FilenameIn) + : PassFilter(), RemarkSerializer(std::move(RemarkSerializer)), + Filename(FilenameIn ? Optional(FilenameIn->str()) : None) {} Error RemarkStreamer::setFilter(StringRef Filter) { Regex R = Regex(Filter); @@ -99,24 +101,13 @@ void RemarkStreamer::emit(const DiagnosticInfoOptimizationBase &Diag) { // First, convert the diagnostic to a remark. remarks::Remark R = toRemark(Diag); // Then, emit the remark through the serializer. - Serializer->emit(R); + RemarkSerializer->emit(R); } char RemarkSetupFileError::ID = 0; char RemarkSetupPatternError::ID = 0; char RemarkSetupFormatError::ID = 0; -static std::unique_ptr -formatToSerializer(remarks::Format RemarksFormat, raw_ostream &OS) { - switch (RemarksFormat) { - default: - llvm_unreachable("Unknown remark serializer format."); - return nullptr; - case remarks::Format::YAML: - return llvm::make_unique(OS); - }; -} - Expected> llvm::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses, StringRef RemarksFormat, @@ -131,20 +122,28 @@ llvm::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename, if (RemarksFilename.empty()) return nullptr; + Expected Format = remarks::parseFormat(RemarksFormat); + if (Error E = Format.takeError()) + return make_error(std::move(E)); + std::error_code EC; + auto Flags = *Format == remarks::Format::YAML ? sys::fs::OF_Text + : sys::fs::OF_None; auto RemarksFile = - llvm::make_unique(RemarksFilename, EC, sys::fs::F_None); + std::make_unique(RemarksFilename, EC, Flags); // We don't use llvm::FileError here because some diagnostics want the file // name separately. if (EC) return make_error(errorCodeToError(EC)); - Expected Format = remarks::parseFormat(RemarksFormat); - if (Error E = Format.takeError()) + Expected> RemarkSerializer = + remarks::createRemarkSerializer( + *Format, remarks::SerializerMode::Separate, RemarksFile->os()); + if (Error E = RemarkSerializer.takeError()) return make_error(std::move(E)); - Context.setRemarkStreamer(llvm::make_unique( - RemarksFilename, formatToSerializer(*Format, RemarksFile->os()))); + Context.setRemarkStreamer(std::make_unique( + std::move(*RemarkSerializer), RemarksFilename)); if (!RemarksPasses.empty()) if (Error E = Context.getRemarkStreamer()->setFilter(RemarksPasses)) @@ -152,3 +151,34 @@ llvm::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename, return std::move(RemarksFile); } + +Error llvm::setupOptimizationRemarks(LLVMContext &Context, raw_ostream &OS, + StringRef RemarksPasses, + StringRef RemarksFormat, + bool RemarksWithHotness, + unsigned RemarksHotnessThreshold) { + if (RemarksWithHotness) + Context.setDiagnosticsHotnessRequested(true); + + if (RemarksHotnessThreshold) + Context.setDiagnosticsHotnessThreshold(RemarksHotnessThreshold); + + Expected Format = remarks::parseFormat(RemarksFormat); + if (Error E = Format.takeError()) + return make_error(std::move(E)); + + Expected> RemarkSerializer = + remarks::createRemarkSerializer(*Format, + remarks::SerializerMode::Separate, OS); + if (Error E = RemarkSerializer.takeError()) + return make_error(std::move(E)); + + Context.setRemarkStreamer( + std::make_unique(std::move(*RemarkSerializer))); + + if (!RemarksPasses.empty()) + if (Error E = Context.getRemarkStreamer()->setFilter(RemarksPasses)) + return make_error(std::move(E)); + + return Error::success(); +} diff --git a/lib/IR/SafepointIRVerifier.cpp b/lib/IR/SafepointIRVerifier.cpp index 7f3dea5e6a6..c90347ec48f 100644 --- a/lib/IR/SafepointIRVerifier.cpp +++ b/lib/IR/SafepointIRVerifier.cpp @@ -102,11 +102,11 @@ public: } bool isDeadEdge(const Use *U) const { - assert(dyn_cast(U->getUser())->isTerminator() && + assert(cast(U->getUser())->isTerminator() && "edge must be operand of terminator"); assert(cast_or_null(U->get()) && "edge must refer to basic block"); - assert(!isDeadBlock(dyn_cast(U->getUser())->getParent()) && + assert(!isDeadBlock(cast(U->getUser())->getParent()) && "isDeadEdge() must be applied to edge from live block"); return DeadEdges.count(U); } diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp index 8ece7f223dd..3eab5042b54 100644 --- a/lib/IR/Type.cpp +++ b/lib/IR/Type.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TypeSize.h" #include #include @@ -111,18 +112,22 @@ bool Type::isEmptyTy() const { return false; } -unsigned Type::getPrimitiveSizeInBits() const { +TypeSize Type::getPrimitiveSizeInBits() const { switch (getTypeID()) { - case Type::HalfTyID: return 16; - case Type::FloatTyID: return 32; - case Type::DoubleTyID: return 64; - case Type::X86_FP80TyID: return 80; - case Type::FP128TyID: return 128; - case Type::PPC_FP128TyID: return 128; - case Type::X86_MMXTyID: return 64; - case Type::IntegerTyID: return cast(this)->getBitWidth(); - case Type::VectorTyID: return cast(this)->getBitWidth(); - default: return 0; + case Type::HalfTyID: return TypeSize::Fixed(16); + case Type::FloatTyID: return TypeSize::Fixed(32); + case Type::DoubleTyID: return TypeSize::Fixed(64); + case Type::X86_FP80TyID: return TypeSize::Fixed(80); + case Type::FP128TyID: return TypeSize::Fixed(128); + case Type::PPC_FP128TyID: return TypeSize::Fixed(128); + case Type::X86_MMXTyID: return TypeSize::Fixed(64); + case Type::IntegerTyID: + return TypeSize::Fixed(cast(this)->getBitWidth()); + case Type::VectorTyID: { + const VectorType *VTy = cast(this); + return TypeSize(VTy->getBitWidth(), VTy->isScalable()); + } + default: return TypeSize::Fixed(0); } } diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index b7f77dc3043..3c8a5b53669 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -444,15 +444,11 @@ void Value::replaceUsesOutsideBlock(Value *New, BasicBlock *BB) { "replaceUses of value with new value of different type!"); assert(BB && "Basic block that may contain a use of 'New' must be defined\n"); - use_iterator UI = use_begin(), E = use_end(); - for (; UI != E;) { - Use &U = *UI; - ++UI; - auto *Usr = dyn_cast(U.getUser()); - if (Usr && Usr->getParent() == BB) - continue; - U.set(New); - } + replaceUsesWithIf(New, [BB](Use &U) { + auto *I = dyn_cast(U.getUser()); + // Don't replace if it's an instruction in the BB basic block. + return !I || I->getParent() != BB; + }); } namespace { @@ -460,8 +456,8 @@ namespace { enum PointerStripKind { PSK_ZeroIndices, PSK_ZeroIndicesAndAliases, - PSK_ZeroIndicesAndAliasesSameRepresentation, - PSK_ZeroIndicesAndAliasesAndInvariantGroups, + PSK_ZeroIndicesSameRepresentation, + PSK_ZeroIndicesAndInvariantGroups, PSK_InBoundsConstantIndices, PSK_InBounds }; @@ -479,10 +475,10 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) { do { if (auto *GEP = dyn_cast(V)) { switch (StripKind) { - case PSK_ZeroIndicesAndAliases: - case PSK_ZeroIndicesAndAliasesSameRepresentation: - case PSK_ZeroIndicesAndAliasesAndInvariantGroups: case PSK_ZeroIndices: + case PSK_ZeroIndicesAndAliases: + case PSK_ZeroIndicesSameRepresentation: + case PSK_ZeroIndicesAndInvariantGroups: if (!GEP->hasAllZeroIndices()) return V; break; @@ -498,15 +494,13 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) { V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { V = cast(V)->getOperand(0); - } else if (StripKind != PSK_ZeroIndicesAndAliasesSameRepresentation && + } else if (StripKind != PSK_ZeroIndicesSameRepresentation && Operator::getOpcode(V) == Instruction::AddrSpaceCast) { // TODO: If we know an address space cast will not change the // representation we could look through it here as well. V = cast(V)->getOperand(0); - } else if (auto *GA = dyn_cast(V)) { - if (StripKind == PSK_ZeroIndices || GA->isInterposable()) - return V; - V = GA->getAliasee(); + } else if (StripKind == PSK_ZeroIndicesAndAliases && isa(V)) { + V = cast(V)->getAliasee(); } else { if (const auto *Call = dyn_cast(V)) { if (const Value *RV = Call->getReturnedArgOperand()) { @@ -516,7 +510,7 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) { // The result of launder.invariant.group must alias it's argument, // but it can't be marked with returned attribute, that's why it needs // special case. - if (StripKind == PSK_ZeroIndicesAndAliasesAndInvariantGroups && + if (StripKind == PSK_ZeroIndicesAndInvariantGroups && (Call->getIntrinsicID() == Intrinsic::launder_invariant_group || Call->getIntrinsicID() == Intrinsic::strip_invariant_group)) { V = Call->getArgOperand(0); @@ -533,16 +527,15 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) { } // end anonymous namespace const Value *Value::stripPointerCasts() const { + return stripPointerCastsAndOffsets(this); +} + +const Value *Value::stripPointerCastsAndAliases() const { return stripPointerCastsAndOffsets(this); } const Value *Value::stripPointerCastsSameRepresentation() const { - return stripPointerCastsAndOffsets< - PSK_ZeroIndicesAndAliasesSameRepresentation>(this); -} - -const Value *Value::stripPointerCastsNoFollowAliases() const { - return stripPointerCastsAndOffsets(this); + return stripPointerCastsAndOffsets(this); } const Value *Value::stripInBoundsConstantOffsets() const { @@ -550,8 +543,7 @@ const Value *Value::stripInBoundsConstantOffsets() const { } const Value *Value::stripPointerCastsAndInvariantGroups() const { - return stripPointerCastsAndOffsets( - this); + return stripPointerCastsAndOffsets(this); } const Value * @@ -650,6 +642,19 @@ uint64_t Value::getPointerDereferenceableBytes(const DataLayout &DL, } CanBeNull = true; } + } else if (auto *IP = dyn_cast(this)) { + if (MDNode *MD = IP->getMetadata(LLVMContext::MD_dereferenceable)) { + ConstantInt *CI = mdconst::extract(MD->getOperand(0)); + DerefBytes = CI->getLimitedValue(); + } + if (DerefBytes == 0) { + if (MDNode *MD = + IP->getMetadata(LLVMContext::MD_dereferenceable_or_null)) { + ConstantInt *CI = mdconst::extract(MD->getOperand(0)); + DerefBytes = CI->getLimitedValue(); + } + CanBeNull = true; + } } else if (auto *AI = dyn_cast(this)) { if (!AI->isArrayAllocation()) { DerefBytes = DL.getTypeStoreSize(AI->getAllocatedType()); @@ -666,21 +671,21 @@ uint64_t Value::getPointerDereferenceableBytes(const DataLayout &DL, return DerefBytes; } -unsigned Value::getPointerAlignment(const DataLayout &DL) const { +MaybeAlign Value::getPointerAlignment(const DataLayout &DL) const { assert(getType()->isPointerTy() && "must be pointer"); - - unsigned Align = 0; if (auto *GO = dyn_cast(this)) { if (isa(GO)) { + const MaybeAlign FunctionPtrAlign = DL.getFunctionPtrAlign(); switch (DL.getFunctionPtrAlignType()) { case DataLayout::FunctionPtrAlignType::Independent: - return DL.getFunctionPtrAlign(); + return FunctionPtrAlign; case DataLayout::FunctionPtrAlignType::MultipleOfFunctionAlign: - return std::max(DL.getFunctionPtrAlign(), GO->getAlignment()); + return std::max(FunctionPtrAlign, MaybeAlign(GO->getAlignment())); } + llvm_unreachable("Unhandled FunctionPtrAlignType"); } - Align = GO->getAlignment(); - if (Align == 0) { + const MaybeAlign Alignment(GO->getAlignment()); + if (!Alignment) { if (auto *GVar = dyn_cast(GO)) { Type *ObjectType = GVar->getValueType(); if (ObjectType->isSized()) { @@ -688,37 +693,43 @@ unsigned Value::getPointerAlignment(const DataLayout &DL) const { // it the preferred alignment. Otherwise, we have to assume that it // may only have the minimum ABI alignment. if (GVar->isStrongDefinitionForLinker()) - Align = DL.getPreferredAlignment(GVar); + return MaybeAlign(DL.getPreferredAlignment(GVar)); else - Align = DL.getABITypeAlignment(ObjectType); + return Align(DL.getABITypeAlignment(ObjectType)); } } } + return Alignment; } else if (const Argument *A = dyn_cast(this)) { - Align = A->getParamAlignment(); - - if (!Align && A->hasStructRetAttr()) { + const MaybeAlign Alignment(A->getParamAlignment()); + if (!Alignment && A->hasStructRetAttr()) { // An sret parameter has at least the ABI alignment of the return type. Type *EltTy = cast(A->getType())->getElementType(); if (EltTy->isSized()) - Align = DL.getABITypeAlignment(EltTy); + return Align(DL.getABITypeAlignment(EltTy)); } + return Alignment; } else if (const AllocaInst *AI = dyn_cast(this)) { - Align = AI->getAlignment(); - if (Align == 0) { + const MaybeAlign Alignment(AI->getAlignment()); + if (!Alignment) { Type *AllocatedType = AI->getAllocatedType(); if (AllocatedType->isSized()) - Align = DL.getPrefTypeAlignment(AllocatedType); + return MaybeAlign(DL.getPrefTypeAlignment(AllocatedType)); } - } else if (const auto *Call = dyn_cast(this)) - Align = Call->getAttributes().getRetAlignment(); - else if (const LoadInst *LI = dyn_cast(this)) + return Alignment; + } else if (const auto *Call = dyn_cast(this)) { + const MaybeAlign Alignment(Call->getRetAlignment()); + if (!Alignment && Call->getCalledFunction()) + return MaybeAlign( + Call->getCalledFunction()->getAttributes().getRetAlignment()); + return Alignment; + } else if (const LoadInst *LI = dyn_cast(this)) { if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) { ConstantInt *CI = mdconst::extract(MD->getOperand(0)); - Align = CI->getLimitedValue(); + return MaybeAlign(CI->getLimitedValue()); } - - return Align; + } + return llvm::None; } const Value *Value::DoPHITranslation(const BasicBlock *CurBB, diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 9346c8bda75..b17fc433ed7 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -119,6 +119,7 @@ struct VerifierSupport { raw_ostream *OS; const Module &M; ModuleSlotTracker MST; + Triple TT; const DataLayout &DL; LLVMContext &Context; @@ -130,7 +131,8 @@ struct VerifierSupport { bool TreatBrokenDebugInfoAsError = true; explicit VerifierSupport(raw_ostream *OS, const Module &M) - : OS(OS), M(M), MST(&M), DL(M.getDataLayout()), Context(M.getContext()) {} + : OS(OS), M(M), MST(&M), TT(M.getTargetTriple()), DL(M.getDataLayout()), + Context(M.getContext()) {} private: void Write(const Module *M) { @@ -416,6 +418,7 @@ private: void visitBasicBlock(BasicBlock &BB); void visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty); void visitDereferenceableMetadata(Instruction &I, MDNode *MD); + void visitProfMetadata(Instruction &I, MDNode *MD); template bool isValidMetadataArray(const MDTuple &N); #define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N); @@ -515,6 +518,7 @@ private: DIExpression::FragmentInfo Fragment, ValueOrMetadata *Desc); void verifyFnArgs(const DbgVariableIntrinsic &I); + void verifyNotEntryValue(const DbgVariableIntrinsic &I); /// Module-level debug info verification... void verifyCompileUnits(); @@ -670,7 +674,7 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) { Assert(InitArray, "wrong initalizer for intrinsic global variable", Init); for (Value *Op : InitArray->operands()) { - Value *V = Op->stripPointerCastsNoFollowAliases(); + Value *V = Op->stripPointerCasts(); Assert(isa(V) || isa(V) || isa(V), "invalid llvm.used member", V); @@ -979,6 +983,9 @@ void Verifier::visitDICompositeType(const DICompositeType &N) { N.getRawVTableHolder()); AssertDI(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags", &N); + unsigned DIBlockByRefStruct = 1 << 4; + AssertDI((N.getFlags() & DIBlockByRefStruct) == 0, + "DIBlockByRefStruct on DICompositeType is no longer supported", &N); if (N.isVector()) { const DINodeArray Elements = N.getElements(); @@ -1306,11 +1313,12 @@ void Verifier::visitDIImportedEntity(const DIImportedEntity &N) { } void Verifier::visitComdat(const Comdat &C) { - // The Module is invalid if the GlobalValue has private linkage. Entities - // with private linkage don't have entries in the symbol table. - if (const GlobalValue *GV = M.getNamedValue(C.getName())) - Assert(!GV->hasPrivateLinkage(), "comdat global value has private linkage", - GV); + // In COFF the Module is invalid if the GlobalValue has private linkage. + // Entities with private linkage don't have entries in the symbol table. + if (TT.isOSBinFormatCOFF()) + if (const GlobalValue *GV = M.getNamedValue(C.getName())) + Assert(!GV->hasPrivateLinkage(), + "comdat global value has private linkage", GV); } void Verifier::visitModuleIdents(const Module &M) { @@ -2497,6 +2505,15 @@ void Verifier::visitCallBrInst(CallBrInst &CBI) { Assert(CBI.getOperand(i) != CBI.getOperand(j), "Duplicate callbr destination!", &CBI); } + { + SmallPtrSet ArgBBs; + for (Value *V : CBI.args()) + if (auto *BA = dyn_cast(V)) + ArgBBs.insert(BA->getBasicBlock()); + for (BasicBlock *BB : CBI.getIndirectDests()) + Assert(ArgBBs.find(BB) != ArgBBs.end(), + "Indirect label missing from arglist.", &CBI); + } visitTerminator(CBI); } @@ -2715,8 +2732,8 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) { &I); if (SrcTy->isVectorTy()) { - VectorType *VSrc = dyn_cast(SrcTy); - VectorType *VDest = dyn_cast(DestTy); + VectorType *VSrc = cast(SrcTy); + VectorType *VDest = cast(DestTy); Assert(VSrc->getNumElements() == VDest->getNumElements(), "PtrToInt Vector width mismatch", &I); } @@ -2740,8 +2757,8 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) { Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), "IntToPtr type mismatch", &I); if (SrcTy->isVectorTy()) { - VectorType *VSrc = dyn_cast(SrcTy); - VectorType *VDest = dyn_cast(DestTy); + VectorType *VSrc = cast(SrcTy); + VectorType *VDest = cast(DestTy); Assert(VSrc->getNumElements() == VDest->getNumElements(), "IntToPtr Vector width mismatch", &I); } @@ -3983,9 +4000,9 @@ void Verifier::verifyDominatesUse(Instruction &I, unsigned i) { void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) { Assert(I.getType()->isPointerTy(), "dereferenceable, dereferenceable_or_null " "apply only to pointer types", &I); - Assert(isa(I), + Assert((isa(I) || isa(I)), "dereferenceable, dereferenceable_or_null apply only to load" - " instructions, use attributes for calls or invokes", &I); + " and inttoptr instructions, use attributes for calls or invokes", &I); Assert(MD->getNumOperands() == 1, "dereferenceable, dereferenceable_or_null " "take one operand!", &I); ConstantInt *CI = mdconst::dyn_extract(MD->getOperand(0)); @@ -3993,6 +4010,45 @@ void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) { "dereferenceable_or_null metadata value must be an i64!", &I); } +void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { + Assert(MD->getNumOperands() >= 2, + "!prof annotations should have no less than 2 operands", MD); + + // Check first operand. + Assert(MD->getOperand(0) != nullptr, "first operand should not be null", MD); + Assert(isa(MD->getOperand(0)), + "expected string with name of the !prof annotation", MD); + MDString *MDS = cast(MD->getOperand(0)); + StringRef ProfName = MDS->getString(); + + // Check consistency of !prof branch_weights metadata. + if (ProfName.equals("branch_weights")) { + unsigned ExpectedNumOperands = 0; + if (BranchInst *BI = dyn_cast(&I)) + ExpectedNumOperands = BI->getNumSuccessors(); + else if (SwitchInst *SI = dyn_cast(&I)) + ExpectedNumOperands = SI->getNumSuccessors(); + else if (isa(&I) || isa(&I)) + ExpectedNumOperands = 1; + else if (IndirectBrInst *IBI = dyn_cast(&I)) + ExpectedNumOperands = IBI->getNumDestinations(); + else if (isa(&I)) + ExpectedNumOperands = 2; + else + CheckFailed("!prof branch_weights are not allowed for this instruction", + MD); + + Assert(MD->getNumOperands() == 1 + ExpectedNumOperands, + "Wrong number of operands", MD); + for (unsigned i = 1; i < MD->getNumOperands(); ++i) { + auto &MDO = MD->getOperand(i); + Assert(MDO, "second operand should not be null", MD); + Assert(mdconst::dyn_extract(MDO), + "!prof brunch_weights operand is not a const int"); + } + } +} + /// verifyInstruction - Verify that an instruction is well formed. /// void Verifier::visitInstruction(Instruction &I) { @@ -4150,13 +4206,18 @@ void Verifier::visitInstruction(Instruction &I) { "alignment is larger that implementation defined limit", &I); } + if (MDNode *MD = I.getMetadata(LLVMContext::MD_prof)) + visitProfMetadata(I, MD); + if (MDNode *N = I.getDebugLoc().getAsMDNode()) { AssertDI(isa(N), "invalid !dbg metadata attachment", &I, N); visitMDNode(*N); } - if (auto *DII = dyn_cast(&I)) + if (auto *DII = dyn_cast(&I)) { verifyFragmentExpression(*DII); + verifyNotEntryValue(*DII); + } InstsInThisBlock.insert(&I); } @@ -4236,6 +4297,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: @@ -4248,12 +4311,16 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { case Intrinsic::experimental_constrained_log: case Intrinsic::experimental_constrained_log10: case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_lrint: + case Intrinsic::experimental_constrained_llrint: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_maxnum: case Intrinsic::experimental_constrained_minnum: case Intrinsic::experimental_constrained_ceil: case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_lround: + case Intrinsic::experimental_constrained_llround: case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_trunc: visitConstrainedFPIntrinsic(cast(Call)); @@ -4623,7 +4690,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { } case Intrinsic::smul_fix: case Intrinsic::smul_fix_sat: - case Intrinsic::umul_fix: { + case Intrinsic::umul_fix: + case Intrinsic::umul_fix_sat: { Value *Op1 = Call.getArgOperand(0); Value *Op2 = Call.getArgOperand(1); Assert(Op1->getType()->isIntOrIntVectorTy(), @@ -4705,6 +4773,31 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { HasRoundingMD = true; break; + case Intrinsic::experimental_constrained_lrint: + case Intrinsic::experimental_constrained_llrint: { + Assert((NumOperands == 3), "invalid arguments for constrained FP intrinsic", + &FPI); + Type *ValTy = FPI.getArgOperand(0)->getType(); + Type *ResultTy = FPI.getType(); + Assert(!ValTy->isVectorTy() && !ResultTy->isVectorTy(), + "Intrinsic does not support vectors", &FPI); + HasExceptionMD = true; + HasRoundingMD = true; + } + break; + + case Intrinsic::experimental_constrained_lround: + case Intrinsic::experimental_constrained_llround: { + Assert((NumOperands == 2), "invalid arguments for constrained FP intrinsic", + &FPI); + Type *ValTy = FPI.getArgOperand(0)->getType(); + Type *ResultTy = FPI.getType(); + Assert(!ValTy->isVectorTy() && !ResultTy->isVectorTy(), + "Intrinsic does not support vectors", &FPI); + HasExceptionMD = true; + break; + } + case Intrinsic::experimental_constrained_fma: Assert((NumOperands == 5), "invalid arguments for constrained FP intrinsic", &FPI); @@ -4727,6 +4820,33 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { HasRoundingMD = true; break; + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: { + Assert((NumOperands == 2), + "invalid arguments for constrained FP intrinsic", &FPI); + HasExceptionMD = true; + + Value *Operand = FPI.getArgOperand(0); + uint64_t NumSrcElem = 0; + Assert(Operand->getType()->isFPOrFPVectorTy(), + "Intrinsic first argument must be floating point", &FPI); + if (auto *OperandT = dyn_cast(Operand->getType())) { + NumSrcElem = OperandT->getNumElements(); + } + + Operand = &FPI; + Assert((NumSrcElem > 0) == Operand->getType()->isVectorTy(), + "Intrinsic first argument and result disagree on vector use", &FPI); + Assert(Operand->getType()->isIntOrIntVectorTy(), + "Intrinsic result must be an integer", &FPI); + if (auto *OperandT = dyn_cast(Operand->getType())) { + Assert(NumSrcElem == OperandT->getNumElements(), + "Intrinsic first argument and result vector lengths must be equal", + &FPI); + } + } + break; + case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: { if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) { @@ -4826,11 +4946,6 @@ void Verifier::visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII) { // This check is redundant with one in visitLocalVariable(). AssertDI(isType(Var->getRawType()), "invalid type ref", Var, Var->getRawType()); - if (auto *Type = dyn_cast_or_null(Var->getRawType())) - if (Type->isBlockByrefStruct()) - AssertDI(DII.getExpression() && DII.getExpression()->getNumElements(), - "BlockByRef variable without complex expression", Var, &DII); - verifyFnArgs(DII); } @@ -4935,6 +5050,16 @@ void Verifier::verifyFnArgs(const DbgVariableIntrinsic &I) { Prev, Var); } +void Verifier::verifyNotEntryValue(const DbgVariableIntrinsic &I) { + DIExpression *E = dyn_cast_or_null(I.getRawExpression()); + + // We don't know whether this intrinsic verified correctly. + if (!E || !E->isValid()) + return; + + AssertDI(!E->isEntryValue(), "Entry values are only allowed in MIR", &I); +} + void Verifier::verifyCompileUnits() { // When more than one Module is imported into the same context, such as during // an LTO build before linking the modules, ODR type uniquing may cause types @@ -5021,7 +5146,7 @@ struct VerifierLegacyPass : public FunctionPass { } bool doInitialization(Module &M) override { - V = llvm::make_unique( + V = std::make_unique( &dbgs(), /*ShouldTreatBrokenDebugInfoAsError=*/false, M); return false; } diff --git a/lib/LTO/Caching.cpp b/lib/LTO/Caching.cpp index 000ab91dba7..12dcd182de2 100644 --- a/lib/LTO/Caching.cpp +++ b/lib/LTO/Caching.cpp @@ -142,8 +142,8 @@ Expected lto::localCache(StringRef CacheDirectoryPath, } // This CacheStream will move the temporary file into the cache when done. - return llvm::make_unique( - llvm::make_unique(Temp->FD, /* ShouldClose */ false), + return std::make_unique( + std::make_unique(Temp->FD, /* ShouldClose */ false), AddBuffer, std::move(*Temp), EntryPath.str(), Task); }; }; diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp index 64506890956..1e345e7dd89 100644 --- a/lib/LTO/LTO.cpp +++ b/lib/LTO/LTO.cpp @@ -44,6 +44,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" #include "llvm/Transforms/Utils/SplitModule.h" @@ -383,7 +384,9 @@ static bool isWeakObjectWithRWAccess(GlobalValueSummary *GVS) { static void thinLTOInternalizeAndPromoteGUID( GlobalValueSummaryList &GVSummaryList, GlobalValue::GUID GUID, - function_ref isExported) { + function_ref isExported, + function_ref + isPrevailing) { for (auto &S : GVSummaryList) { if (isExported(S->modulePath(), GUID)) { if (GlobalValue::isLocalLinkage(S->linkage())) @@ -392,6 +395,8 @@ static void thinLTOInternalizeAndPromoteGUID( // Ignore local and appending linkage values since the linker // doesn't resolve them. !GlobalValue::isLocalLinkage(S->linkage()) && + (!GlobalValue::isInterposableLinkage(S->linkage()) || + isPrevailing(GUID, S.get())) && S->linkage() != GlobalValue::AppendingLinkage && // We can't internalize available_externally globals because this // can break function pointer equality. @@ -410,9 +415,12 @@ static void thinLTOInternalizeAndPromoteGUID( // as external and non-exported values as internal. void llvm::thinLTOInternalizeAndPromoteInIndex( ModuleSummaryIndex &Index, - function_ref isExported) { + function_ref isExported, + function_ref + isPrevailing) { for (auto &I : Index) - thinLTOInternalizeAndPromoteGUID(I.second.SummaryList, I.first, isExported); + thinLTOInternalizeAndPromoteGUID(I.second.SummaryList, I.first, isExported, + isPrevailing); } // Requires a destructor for std::vector. @@ -459,8 +467,8 @@ BitcodeModule &InputFile::getSingleBitcodeModule() { LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel, Config &Conf) : ParallelCodeGenParallelismLevel(ParallelCodeGenParallelismLevel), - Ctx(Conf), CombinedModule(llvm::make_unique("ld-temp.o", Ctx)), - Mover(llvm::make_unique(*CombinedModule)) {} + Ctx(Conf), CombinedModule(std::make_unique("ld-temp.o", Ctx)), + Mover(std::make_unique(*CombinedModule)) {} LTO::ThinLTOState::ThinLTOState(ThinBackend Backend) : Backend(Backend), CombinedIndex(/*HaveGVs*/ false) { @@ -754,7 +762,8 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef Syms, // For now they aren't reported correctly by ModuleSymbolTable. auto &CommonRes = RegularLTO.Commons[Sym.getIRName()]; CommonRes.Size = std::max(CommonRes.Size, Sym.getCommonSize()); - CommonRes.Align = std::max(CommonRes.Align, Sym.getCommonAlignment()); + CommonRes.Align = + std::max(CommonRes.Align, MaybeAlign(Sym.getCommonAlignment())); CommonRes.Prevailing |= Res.Prevailing; } @@ -899,8 +908,7 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) { GlobalValue::dropLLVMManglingEscape(Res.second.IRName)); if (Res.second.VisibleOutsideSummary && Res.second.Prevailing) - GUIDPreservedSymbols.insert(GlobalValue::getGUID( - GlobalValue::dropLLVMManglingEscape(Res.second.IRName))); + GUIDPreservedSymbols.insert(GUID); GUIDPrevailingResolutions[GUID] = Res.second.Prevailing ? PrevailingType::Yes : PrevailingType::No; @@ -996,6 +1004,8 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { GV->setLinkage(GlobalValue::InternalLinkage); } + RegularLTO.CombinedModule->addModuleFlag(Module::Error, "LTOPostLink", 1); + if (Conf.PostInternalizeModuleHook && !Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule)) return Error::success(); @@ -1004,6 +1014,16 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { std::move(RegularLTO.CombinedModule), ThinLTO.CombinedIndex); } +static const char *libcallRoutineNames[] = { +#define HANDLE_LIBCALL(code, name) name, +#include "llvm/IR/RuntimeLibcalls.def" +#undef HANDLE_LIBCALL +}; + +ArrayRef LTO::getRuntimeLibcallSymbols() { + return makeArrayRef(libcallRoutineNames); +} + /// This class defines the interface to the ThinLTO backend. class lto::ThinBackendProc { protected: @@ -1141,7 +1161,7 @@ ThinBackend lto::createInProcessThinBackend(unsigned ParallelismLevel) { return [=](Config &Conf, ModuleSummaryIndex &CombinedIndex, const StringMap &ModuleToDefinedGVSummaries, AddStreamFn AddStream, NativeObjectCache Cache) { - return llvm::make_unique( + return std::make_unique( Conf, CombinedIndex, ParallelismLevel, ModuleToDefinedGVSummaries, AddStream, Cache); }; @@ -1204,7 +1224,7 @@ public: std::error_code EC; raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC, - sys::fs::OpenFlags::F_None); + sys::fs::OpenFlags::OF_None); if (EC) return errorCodeToError(EC); WriteIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex); @@ -1231,7 +1251,7 @@ ThinBackend lto::createWriteIndexesThinBackend( return [=](Config &Conf, ModuleSummaryIndex &CombinedIndex, const StringMap &ModuleToDefinedGVSummaries, AddStreamFn AddStream, NativeObjectCache Cache) { - return llvm::make_unique( + return std::make_unique( Conf, CombinedIndex, ModuleToDefinedGVSummaries, OldPrefix, NewPrefix, ShouldEmitImportsFiles, LinkedObjectsFile, OnWrite); }; @@ -1274,6 +1294,15 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, if (DumpThinCGSCCs) ThinLTO.CombinedIndex.dumpSCCs(outs()); + std::set ExportedGUIDs; + + // Perform index-based WPD. This will return immediately if there are + // no index entries in the typeIdMetadata map (e.g. if we are instead + // performing IR-based WPD in hybrid regular/thin LTO mode). + std::map> LocalWPDTargetsMap; + runWholeProgramDevirtOnIndex(ThinLTO.CombinedIndex, ExportedGUIDs, + LocalWPDTargetsMap); + if (Conf.OptLevel > 0) ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, ImportLists, ExportLists); @@ -1282,7 +1311,6 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, // at -O0 because summary-based DCE is implemented using internalization, and // we must apply DCE consistently with the full LTO module in order to avoid // undefined references during the final link. - std::set ExportedGUIDs; for (auto &Res : GlobalResolutions) { // If the symbol does not have external references or it is not prevailing, // then not need to mark it as exported from a ThinLTO partition. @@ -1308,12 +1336,19 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, ExportList->second.count(GUID)) || ExportedGUIDs.count(GUID); }; - thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported); + + // Update local devirtualized targets that were exported by cross-module + // importing or by other devirtualizations marked in the ExportedGUIDs set. + updateIndexWPDForExports(ThinLTO.CombinedIndex, isExported, + LocalWPDTargetsMap); auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) { return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath(); }; + thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported, + isPrevailing); + auto recordNewLinkage = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID, GlobalValue::LinkageTypes NewLinkage) { @@ -1368,7 +1403,7 @@ lto::setupStatsFile(StringRef StatsFilename) { llvm::EnableStatistics(false); std::error_code EC; auto StatsFile = - llvm::make_unique(StatsFilename, EC, sys::fs::F_None); + std::make_unique(StatsFilename, EC, sys::fs::OF_None); if (EC) return errorCodeToError(EC); diff --git a/lib/LTO/LTOBackend.cpp b/lib/LTO/LTOBackend.cpp index 7456e717516..2761f8367b0 100644 --- a/lib/LTO/LTOBackend.cpp +++ b/lib/LTO/LTOBackend.cpp @@ -28,6 +28,7 @@ #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Passes/PassBuilder.h" +#include "llvm/Passes/StandardInstrumentations.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" @@ -57,8 +58,8 @@ Error Config::addSaveTemps(std::string OutputFileName, ShouldDiscardValueNames = false; std::error_code EC; - ResolutionFile = llvm::make_unique( - OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::F_Text); + ResolutionFile = std::make_unique( + OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::OF_Text); if (EC) return errorCodeToError(EC); @@ -83,7 +84,7 @@ Error Config::addSaveTemps(std::string OutputFileName, PathPrefix = M.getModuleIdentifier() + "."; std::string Path = PathPrefix + PathSuffix + ".bc"; std::error_code EC; - raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); // Because -save-temps is a debugging feature, we report the error // directly and exit. if (EC) @@ -103,7 +104,7 @@ Error Config::addSaveTemps(std::string OutputFileName, CombinedIndexHook = [=](const ModuleSummaryIndex &Index) { std::string Path = OutputFileName + "index.bc"; std::error_code EC; - raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None); // Because -save-temps is a debugging feature, we report the error // directly and exit. if (EC) @@ -111,7 +112,7 @@ Error Config::addSaveTemps(std::string OutputFileName, WriteIndexToFile(Index, OS); Path = OutputFileName + "index.dot"; - raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::OF_None); if (EC) reportOpenError(Path, EC.message()); Index.exportToDot(OSDot); @@ -165,7 +166,10 @@ static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM, PGOOptions::IRUse, PGOOptions::CSIRUse); } - PassBuilder PB(TM, PipelineTuningOptions(), PGOOpt); + PassInstrumentationCallbacks PIC; + StandardInstrumentations SI; + SI.registerCallbacks(PIC); + PassBuilder PB(TM, PipelineTuningOptions(),PGOOpt, &PIC); AAManager AA; // Parse a custom AA pipeline if asked to. @@ -329,7 +333,7 @@ void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream, if (!DwoFile.empty()) { std::error_code EC; - DwoOut = llvm::make_unique(DwoFile, EC, sys::fs::F_None); + DwoOut = std::make_unique(DwoFile, EC, sys::fs::OF_None); if (EC) report_fatal_error("Failed to open " + DwoFile + ": " + EC.message()); } diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 6bb3bfaefc9..88219289286 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -151,7 +151,7 @@ void LTOCodeGenerator::initializeLTOPasses() { void LTOCodeGenerator::setAsmUndefinedRefs(LTOModule *Mod) { const std::vector &undefs = Mod->getAsmUndefinedRefs(); for (int i = 0, e = undefs.size(); i != e; ++i) - AsmUndefinedRefs[undefs[i]] = 1; + AsmUndefinedRefs.insert(undefs[i]); } bool LTOCodeGenerator::addModule(LTOModule *Mod) { @@ -174,7 +174,7 @@ void LTOCodeGenerator::setModule(std::unique_ptr Mod) { AsmUndefinedRefs.clear(); MergedModule = Mod->takeModule(); - TheLinker = make_unique(*MergedModule); + TheLinker = std::make_unique(*MergedModule); setAsmUndefinedRefs(&*Mod); // We've just changed the input, so let's make sure we verify it. @@ -229,7 +229,7 @@ bool LTOCodeGenerator::writeMergedModules(StringRef Path) { // create output file std::error_code EC; - ToolOutputFile Out(Path, EC, sys::fs::F_None); + ToolOutputFile Out(Path, EC, sys::fs::OF_None); if (EC) { std::string ErrMsg = "could not open bitcode file for writing: "; ErrMsg += Path.str() + ": " + EC.message(); @@ -365,7 +365,8 @@ bool LTOCodeGenerator::determineTarget() { MCpu = "core2"; else if (Triple.getArch() == llvm::Triple::x86) MCpu = "yonah"; - else if (Triple.getArch() == llvm::Triple::aarch64) + else if (Triple.getArch() == llvm::Triple::aarch64 || + Triple.getArch() == llvm::Triple::aarch64_32) MCpu = "cyclone"; } @@ -462,6 +463,8 @@ void LTOCodeGenerator::applyScopeRestrictions() { internalizeModule(*MergedModule, mustPreserveGV); + MergedModule->addModuleFlag(Module::Error, "LTOPostLink", 1); + ScopeRestrictionsDone = true; } @@ -690,7 +693,7 @@ LTOCodeGenerator::setDiagnosticHandler(lto_diagnostic_handler_t DiagHandler, return Context.setDiagnosticHandler(nullptr); // Register the LTOCodeGenerator stub in the LLVMContext to forward the // diagnostic to the external DiagHandler. - Context.setDiagnosticHandler(llvm::make_unique(this), + Context.setDiagnosticHandler(std::make_unique(this), true); } diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index 7ffe7bf84ba..587b332e706 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -220,7 +220,8 @@ LTOModule::makeLTOModule(MemoryBufferRef Buffer, const TargetOptions &options, CPU = "core2"; else if (Triple.getArch() == llvm::Triple::x86) CPU = "yonah"; - else if (Triple.getArch() == llvm::Triple::aarch64) + else if (Triple.getArch() == llvm::Triple::aarch64 || + Triple.getArch() == llvm::Triple::aarch64_32) CPU = "cyclone"; } diff --git a/lib/LTO/SummaryBasedOptimizations.cpp b/lib/LTO/SummaryBasedOptimizations.cpp index e919fd530fb..6db495de003 100644 --- a/lib/LTO/SummaryBasedOptimizations.cpp +++ b/lib/LTO/SummaryBasedOptimizations.cpp @@ -18,7 +18,7 @@ using namespace llvm; -cl::opt ThinLTOSynthesizeEntryCounts( +static cl::opt ThinLTOSynthesizeEntryCounts( "thinlto-synthesize-entry-counts", cl::init(false), cl::Hidden, cl::desc("Synthesize entry counts based on the summary")); diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp index 1c52218836c..d151de17896 100644 --- a/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/lib/LTO/ThinLTOCodeGenerator.cpp @@ -39,6 +39,7 @@ #include "llvm/Support/CachePruning.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" +#include "llvm/Support/FileUtilities.h" #include "llvm/Support/Path.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/SmallVectorMemoryBuffer.h" @@ -52,6 +53,7 @@ #include "llvm/Transforms/IPO/FunctionImport.h" #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" @@ -89,7 +91,7 @@ static void saveTempBitcode(const Module &TheModule, StringRef TempDir, // User asked to save temps, let dump the bitcode file after import. std::string SaveTempPath = (TempDir + llvm::Twine(count) + Suffix).str(); std::error_code EC; - raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None); + raw_fd_ostream OS(SaveTempPath, EC, sys::fs::OF_None); if (EC) report_fatal_error(Twine("Failed to open ") + SaveTempPath + " to save optimized bitcode\n"); @@ -224,7 +226,8 @@ crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index, } static void optimizeModule(Module &TheModule, TargetMachine &TM, - unsigned OptLevel, bool Freestanding) { + unsigned OptLevel, bool Freestanding, + ModuleSummaryIndex *Index) { // Populate the PassManager PassManagerBuilder PMB; PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple()); @@ -238,6 +241,7 @@ static void optimizeModule(Module &TheModule, TargetMachine &TM, // Already did this in verifyLoadedModule(). PMB.VerifyInput = false; PMB.VerifyOutput = false; + PMB.ImportSummary = Index; legacy::PassManager PM; @@ -295,7 +299,7 @@ std::unique_ptr codegenModule(Module &TheModule, // Run codegen now. resulting binary is in OutputBuffer. PM.run(TheModule); } - return make_unique(std::move(OutputBuffer)); + return std::make_unique(std::move(OutputBuffer)); } /// Manage caching for a single Module. @@ -368,23 +372,26 @@ public: // Write to a temporary to avoid race condition SmallString<128> TempFilename; SmallString<128> CachePath(EntryPath); - int TempFD; llvm::sys::path::remove_filename(CachePath); sys::path::append(TempFilename, CachePath, "Thin-%%%%%%.tmp.o"); - std::error_code EC = - sys::fs::createUniqueFile(TempFilename, TempFD, TempFilename); - if (EC) { - errs() << "Error: " << EC.message() << "\n"; - report_fatal_error("ThinLTO: Can't get a temporary file"); + + if (auto Err = handleErrors( + llvm::writeFileAtomically(TempFilename, EntryPath, + OutputBuffer.getBuffer()), + [](const llvm::AtomicFileWriteError &E) { + std::string ErrorMsgBuffer; + llvm::raw_string_ostream S(ErrorMsgBuffer); + E.log(S); + + if (E.Error == + llvm::atomic_write_error::failed_to_create_uniq_file) { + errs() << "Error: " << ErrorMsgBuffer << "\n"; + report_fatal_error("ThinLTO: Can't get a temporary file"); + } + })) { + // FIXME + consumeError(std::move(Err)); } - { - raw_fd_ostream OS(TempFD, /* ShouldClose */ true); - OS << OutputBuffer.getBuffer(); - } - // Rename temp file to final destination; rename is atomic - EC = sys::fs::rename(TempFilename, EntryPath); - if (EC) - sys::fs::remove(TempFilename); } }; @@ -429,7 +436,7 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index, saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc"); } - optimizeModule(TheModule, TM, OptLevel, Freestanding); + optimizeModule(TheModule, TM, OptLevel, Freestanding, &Index); saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc"); @@ -442,7 +449,7 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index, auto Index = buildModuleSummaryIndex(TheModule, nullptr, &PSI); WriteBitcodeToFile(TheModule, OS, true, &Index); } - return make_unique(std::move(OutputBuffer)); + return std::make_unique(std::move(OutputBuffer)); } return codegenModule(TheModule, TM); @@ -457,10 +464,9 @@ static void resolvePrevailingInIndex( ModuleSummaryIndex &Index, StringMap> &ResolvedODR, - const DenseSet &GUIDPreservedSymbols) { - - DenseMap PrevailingCopy; - computePrevailingCopies(Index, PrevailingCopy); + const DenseSet &GUIDPreservedSymbols, + const DenseMap + &PrevailingCopy) { auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) { const auto &Prevailing = PrevailingCopy.find(GUID); @@ -490,7 +496,8 @@ static void initTMBuilder(TargetMachineBuilder &TMBuilder, TMBuilder.MCpu = "core2"; else if (TheTriple.getArch() == llvm::Triple::x86) TMBuilder.MCpu = "yonah"; - else if (TheTriple.getArch() == llvm::Triple::aarch64) + else if (TheTriple.getArch() == llvm::Triple::aarch64 || + TheTriple.getArch() == llvm::Triple::aarch64_32) TMBuilder.MCpu = "cyclone"; } TMBuilder.TheTriple = std::move(TheTriple); @@ -557,7 +564,7 @@ std::unique_ptr TargetMachineBuilder::create() const { */ std::unique_ptr ThinLTOCodeGenerator::linkCombinedIndex() { std::unique_ptr CombinedIndex = - llvm::make_unique(/*HaveGVs=*/false); + std::make_unique(/*HaveGVs=*/false); uint64_t NextModuleId = 0; for (auto &Mod : Modules) { auto &M = Mod->getSingleBitcodeModule(); @@ -573,19 +580,36 @@ std::unique_ptr ThinLTOCodeGenerator::linkCombinedIndex() { return CombinedIndex; } -static void internalizeAndPromoteInIndex( - const StringMap &ExportLists, - const DenseSet &GUIDPreservedSymbols, - ModuleSummaryIndex &Index) { - auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { +struct IsExported { + const StringMap &ExportLists; + const DenseSet &GUIDPreservedSymbols; + + IsExported(const StringMap &ExportLists, + const DenseSet &GUIDPreservedSymbols) + : ExportLists(ExportLists), GUIDPreservedSymbols(GUIDPreservedSymbols) {} + + bool operator()(StringRef ModuleIdentifier, GlobalValue::GUID GUID) const { const auto &ExportList = ExportLists.find(ModuleIdentifier); return (ExportList != ExportLists.end() && ExportList->second.count(GUID)) || GUIDPreservedSymbols.count(GUID); - }; + } +}; - thinLTOInternalizeAndPromoteInIndex(Index, isExported); -} +struct IsPrevailing { + const DenseMap &PrevailingCopy; + IsPrevailing(const DenseMap + &PrevailingCopy) + : PrevailingCopy(PrevailingCopy) {} + + bool operator()(GlobalValue::GUID GUID, const GlobalValueSummary *S) const { + const auto &Prevailing = PrevailingCopy.find(GUID); + // Not in map means that there was only one copy, which must be prevailing. + if (Prevailing == PrevailingCopy.end()) + return true; + return Prevailing->second == S; + }; +}; static void computeDeadSymbolsInIndex( ModuleSummaryIndex &Index, @@ -629,16 +653,22 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index, ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, ExportLists); + DenseMap PrevailingCopy; + computePrevailingCopies(Index, PrevailingCopy); + // Resolve prevailing symbols StringMap> ResolvedODR; - resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols); + resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols, + PrevailingCopy); thinLTOResolvePrevailingInModule( TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]); // Promote the exported values in the index, so that they are promoted // in the module. - internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, Index); + thinLTOInternalizeAndPromoteInIndex( + Index, IsExported(ExportLists, GUIDPreservedSymbols), + IsPrevailing(PrevailingCopy)); promoteModule(TheModule, Index); } @@ -785,13 +815,19 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule, if (ExportList.empty() && GUIDPreservedSymbols.empty()) return; + DenseMap PrevailingCopy; + computePrevailingCopies(Index, PrevailingCopy); + // Resolve prevailing symbols StringMap> ResolvedODR; - resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols); + resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols, + PrevailingCopy); // Promote the exported values in the index, so that they are promoted // in the module. - internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, Index); + thinLTOInternalizeAndPromoteInIndex( + Index, IsExported(ExportLists, GUIDPreservedSymbols), + IsPrevailing(PrevailingCopy)); promoteModule(TheModule, Index); @@ -810,7 +846,8 @@ void ThinLTOCodeGenerator::optimize(Module &TheModule) { initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); // Optimize now - optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding); + optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding, + nullptr); } /// Write out the generated object file, either from CacheEntryPath or from @@ -845,7 +882,7 @@ ThinLTOCodeGenerator::writeGeneratedObject(int count, StringRef CacheEntryPath, } // No cache entry, just write out the buffer. std::error_code Err; - raw_fd_ostream OS(OutputPath, Err, sys::fs::F_None); + raw_fd_ostream OS(OutputPath, Err, sys::fs::OF_None); if (Err) report_fatal_error("Can't open output '" + OutputPath + "'\n"); OS << OutputBuffer.getBuffer(); @@ -900,7 +937,7 @@ void ThinLTOCodeGenerator::run() { if (!SaveTempsDir.empty()) { auto SaveTempPath = SaveTempsDir + "index.bc"; std::error_code EC; - raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None); + raw_fd_ostream OS(SaveTempPath, EC, sys::fs::OF_None); if (EC) report_fatal_error(Twine("Failed to open ") + SaveTempPath + " to save optimized bitcode\n"); @@ -931,6 +968,15 @@ void ThinLTOCodeGenerator::run() { // Synthesize entry counts for functions in the combined index. computeSyntheticCounts(*Index); + // Perform index-based WPD. This will return immediately if there are + // no index entries in the typeIdMetadata map (e.g. if we are instead + // performing IR-based WPD in hybrid regular/thin LTO mode). + std::map> LocalWPDTargetsMap; + std::set ExportedGUIDs; + runWholeProgramDevirtOnIndex(*Index, ExportedGUIDs, LocalWPDTargetsMap); + for (auto GUID : ExportedGUIDs) + GUIDPreservedSymbols.insert(GUID); + // Collect the import/export lists for all modules from the call-graph in the // combined index. StringMap ImportLists(ModuleCount); @@ -944,14 +990,23 @@ void ThinLTOCodeGenerator::run() { // on the index, and nuke this map. StringMap> ResolvedODR; + DenseMap PrevailingCopy; + computePrevailingCopies(*Index, PrevailingCopy); + // Resolve prevailing symbols, this has to be computed early because it // impacts the caching. - resolvePrevailingInIndex(*Index, ResolvedODR, GUIDPreservedSymbols); + resolvePrevailingInIndex(*Index, ResolvedODR, GUIDPreservedSymbols, + PrevailingCopy); // Use global summary-based analysis to identify symbols that can be // internalized (because they aren't exported or preserved as per callback). // Changes are made in the index, consumed in the ThinLTO backends. - internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, *Index); + updateIndexWPDForExports(*Index, + IsExported(ExportLists, GUIDPreservedSymbols), + LocalWPDTargetsMap); + thinLTOInternalizeAndPromoteInIndex( + *Index, IsExported(ExportLists, GUIDPreservedSymbols), + IsPrevailing(PrevailingCopy)); // Make sure that every module has an entry in the ExportLists, ImportList, // GVSummary and ResolvedODR maps to enable threaded access to these maps diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp index 37515d93ed5..6784d81595e 100644 --- a/lib/Linker/IRMover.cpp +++ b/lib/Linker/IRMover.cpp @@ -398,7 +398,7 @@ class IRLinker { /// due to the use of Value handles which the Linker doesn't actually need, /// but this allows us to reuse the ValueMapper code. ValueToValueMapTy ValueMap; - ValueToValueMapTy AliasValueMap; + ValueToValueMapTy IndirectSymbolValueMap; DenseSet ValuesToLink; std::vector Worklist; @@ -437,7 +437,7 @@ class IRLinker { /// Entry point for mapping values and alternate context for mapping aliases. ValueMapper Mapper; - unsigned AliasMCID; + unsigned IndirectSymbolMCID; /// Handles cloning of a global values from the source module into /// the destination module, including setting the attributes and visibility. @@ -480,13 +480,15 @@ class IRLinker { /// /// Note this code may call the client-provided \p AddLazyFor. bool shouldLink(GlobalValue *DGV, GlobalValue &SGV); - Expected linkGlobalValueProto(GlobalValue *GV, bool ForAlias); + Expected linkGlobalValueProto(GlobalValue *GV, + bool ForIndirectSymbol); Error linkModuleFlagsMetadata(); void linkGlobalVariable(GlobalVariable &Dst, GlobalVariable &Src); Error linkFunctionBody(Function &Dst, Function &Src); - void linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src); + void linkIndirectSymbolBody(GlobalIndirectSymbol &Dst, + GlobalIndirectSymbol &Src); Error linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src); /// Replace all types in the source AttributeList with the @@ -497,7 +499,7 @@ class IRLinker { /// into the destination module. GlobalVariable *copyGlobalVariableProto(const GlobalVariable *SGVar); Function *copyFunctionProto(const Function *SF); - GlobalValue *copyGlobalAliasProto(const GlobalAlias *SGA); + GlobalValue *copyGlobalIndirectSymbolProto(const GlobalIndirectSymbol *SGIS); /// Perform "replace all uses with" operations. These work items need to be /// performed as part of materialization, but we postpone them to happen after @@ -524,8 +526,8 @@ public: SharedMDs(SharedMDs), IsPerformingImport(IsPerformingImport), Mapper(ValueMap, RF_MoveDistinctMDs | RF_IgnoreMissingLocals, &TypeMap, &GValMaterializer), - AliasMCID(Mapper.registerAlternateMappingContext(AliasValueMap, - &LValMaterializer)) { + IndirectSymbolMCID(Mapper.registerAlternateMappingContext( + IndirectSymbolValueMap, &LValMaterializer)) { ValueMap.getMDMap() = std::move(SharedMDs); for (GlobalValue *GV : ValuesToLink) maybeAdd(GV); @@ -535,7 +537,7 @@ public: ~IRLinker() { SharedMDs = std::move(*ValueMap.getMDMap()); } Error run(); - Value *materialize(Value *V, bool ForAlias); + Value *materialize(Value *V, bool ForIndirectSymbol); }; } @@ -568,12 +570,12 @@ Value *LocalValueMaterializer::materialize(Value *SGV) { return TheIRLinker.materialize(SGV, true); } -Value *IRLinker::materialize(Value *V, bool ForAlias) { +Value *IRLinker::materialize(Value *V, bool ForIndirectSymbol) { auto *SGV = dyn_cast(V); if (!SGV) return nullptr; - Expected NewProto = linkGlobalValueProto(SGV, ForAlias); + Expected NewProto = linkGlobalValueProto(SGV, ForIndirectSymbol); if (!NewProto) { setError(NewProto.takeError()); return nullptr; @@ -593,23 +595,23 @@ Value *IRLinker::materialize(Value *V, bool ForAlias) { if (V->hasInitializer() || V->hasAppendingLinkage()) return New; } else { - auto *A = cast(New); - if (A->getAliasee()) + auto *IS = cast(New); + if (IS->getIndirectSymbol()) return New; } - // When linking a global for an alias, it will always be linked. However we - // need to check if it was not already scheduled to satisfy a reference from a - // regular global value initializer. We know if it has been schedule if the - // "New" GlobalValue that is mapped here for the alias is the same as the one - // already mapped. If there is an entry in the ValueMap but the value is - // different, it means that the value already had a definition in the - // destination module (linkonce for instance), but we need a new definition - // for the alias ("New" will be different. - if (ForAlias && ValueMap.lookup(SGV) == New) + // When linking a global for an indirect symbol, it will always be linked. + // However we need to check if it was not already scheduled to satisfy a + // reference from a regular global value initializer. We know if it has been + // schedule if the "New" GlobalValue that is mapped here for the indirect + // symbol is the same as the one already mapped. If there is an entry in the + // ValueMap but the value is different, it means that the value already had a + // definition in the destination module (linkonce for instance), but we need a + // new definition for the indirect symbol ("New" will be different. + if (ForIndirectSymbol && ValueMap.lookup(SGV) == New) return New; - if (ForAlias || shouldLink(New, *SGV)) + if (ForIndirectSymbol || shouldLink(New, *SGV)) setError(linkGlobalValueBody(*New, *SGV)); return New; @@ -627,7 +629,7 @@ GlobalVariable *IRLinker::copyGlobalVariableProto(const GlobalVariable *SGVar) { /*init*/ nullptr, SGVar->getName(), /*insertbefore*/ nullptr, SGVar->getThreadLocalMode(), SGVar->getType()->getAddressSpace()); - NewDGV->setAlignment(SGVar->getAlignment()); + NewDGV->setAlignment(MaybeAlign(SGVar->getAlignment())); NewDGV->copyAttributesFrom(SGVar); return NewDGV; } @@ -660,16 +662,24 @@ Function *IRLinker::copyFunctionProto(const Function *SF) { return F; } -/// Set up prototypes for any aliases that come over from the source module. -GlobalValue *IRLinker::copyGlobalAliasProto(const GlobalAlias *SGA) { +/// Set up prototypes for any indirect symbols that come over from the source +/// module. +GlobalValue * +IRLinker::copyGlobalIndirectSymbolProto(const GlobalIndirectSymbol *SGIS) { // If there is no linkage to be performed or we're linking from the source, // bring over SGA. - auto *Ty = TypeMap.get(SGA->getValueType()); - auto *GA = - GlobalAlias::create(Ty, SGA->getType()->getPointerAddressSpace(), - GlobalValue::ExternalLinkage, SGA->getName(), &DstM); - GA->copyAttributesFrom(SGA); - return GA; + auto *Ty = TypeMap.get(SGIS->getValueType()); + GlobalIndirectSymbol *GIS; + if (isa(SGIS)) + GIS = GlobalAlias::create(Ty, SGIS->getType()->getPointerAddressSpace(), + GlobalValue::ExternalLinkage, SGIS->getName(), + &DstM); + else + GIS = GlobalIFunc::create(Ty, SGIS->getType()->getPointerAddressSpace(), + GlobalValue::ExternalLinkage, SGIS->getName(), + nullptr, &DstM); + GIS->copyAttributesFrom(SGIS); + return GIS; } GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV, @@ -681,7 +691,7 @@ GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV, NewGV = copyFunctionProto(SF); } else { if (ForDefinition) - NewGV = copyGlobalAliasProto(cast(SGV)); + NewGV = copyGlobalIndirectSymbolProto(cast(SGV)); else if (SGV->getValueType()->isFunctionTy()) NewGV = Function::Create(cast(TypeMap.get(SGV->getValueType())), @@ -748,8 +758,18 @@ void IRLinker::computeTypeMapping() { } for (GlobalValue &SGV : *SrcM) - if (GlobalValue *DGV = getLinkedToGlobal(&SGV)) + if (GlobalValue *DGV = getLinkedToGlobal(&SGV)) { + if (DGV->getType() == SGV.getType()) { + // If the types of DGV and SGV are the same, it means that DGV is from + // the source module and got added to DstM from a shared metadata. We + // shouldn't map this type to itself in case the type's components get + // remapped to a new type from DstM (for instance, during the loop over + // SrcM->getIdentifiedStructTypes() below). + continue; + } + TypeMap.addTypeMapping(DGV->getType(), SGV.getType()); + } for (GlobalValue &SGV : SrcM->aliases()) if (GlobalValue *DGV = getLinkedToGlobal(&SGV)) @@ -940,7 +960,7 @@ bool IRLinker::shouldLink(GlobalValue *DGV, GlobalValue &SGV) { } Expected IRLinker::linkGlobalValueProto(GlobalValue *SGV, - bool ForAlias) { + bool ForIndirectSymbol) { GlobalValue *DGV = getLinkedToGlobal(SGV); bool ShouldLink = shouldLink(DGV, *SGV); @@ -951,12 +971,12 @@ Expected IRLinker::linkGlobalValueProto(GlobalValue *SGV, if (I != ValueMap.end()) return cast(I->second); - I = AliasValueMap.find(SGV); - if (I != AliasValueMap.end()) + I = IndirectSymbolValueMap.find(SGV); + if (I != IndirectSymbolValueMap.end()) return cast(I->second); } - if (!ShouldLink && ForAlias) + if (!ShouldLink && ForIndirectSymbol) DGV = nullptr; // Handle the ultra special appending linkage case first. @@ -975,8 +995,8 @@ Expected IRLinker::linkGlobalValueProto(GlobalValue *SGV, if (DoneLinkingBodies) return nullptr; - NewGV = copyGlobalValueProto(SGV, ShouldLink || ForAlias); - if (ShouldLink || !ForAlias) + NewGV = copyGlobalValueProto(SGV, ShouldLink || ForIndirectSymbol); + if (ShouldLink || !ForIndirectSymbol) forceRenaming(NewGV, SGV->getName()); } @@ -987,7 +1007,7 @@ Expected IRLinker::linkGlobalValueProto(GlobalValue *SGV, if (auto Remangled = Intrinsic::remangleIntrinsicFunction(F)) NewGV = Remangled.getValue(); - if (ShouldLink || ForAlias) { + if (ShouldLink || ForIndirectSymbol) { if (const Comdat *SC = SGV->getComdat()) { if (auto *GO = dyn_cast(NewGV)) { Comdat *DC = DstM.getOrInsertComdat(SC->getName()); @@ -997,7 +1017,7 @@ Expected IRLinker::linkGlobalValueProto(GlobalValue *SGV, } } - if (!ShouldLink && ForAlias) + if (!ShouldLink && ForIndirectSymbol) NewGV->setLinkage(GlobalValue::InternalLinkage); Constant *C = NewGV; @@ -1060,8 +1080,10 @@ Error IRLinker::linkFunctionBody(Function &Dst, Function &Src) { return Error::success(); } -void IRLinker::linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src) { - Mapper.scheduleMapGlobalAliasee(Dst, *Src.getAliasee(), AliasMCID); +void IRLinker::linkIndirectSymbolBody(GlobalIndirectSymbol &Dst, + GlobalIndirectSymbol &Src) { + Mapper.scheduleMapGlobalIndirectSymbol(Dst, *Src.getIndirectSymbol(), + IndirectSymbolMCID); } Error IRLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) { @@ -1071,7 +1093,7 @@ Error IRLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) { linkGlobalVariable(cast(Dst), *GVar); return Error::success(); } - linkAliasBody(cast(Dst), cast(Src)); + linkIndirectSymbolBody(cast(Dst), cast(Src)); return Error::success(); } @@ -1411,7 +1433,7 @@ Error IRLinker::run() { // Already mapped. if (ValueMap.find(GV) != ValueMap.end() || - AliasValueMap.find(GV) != AliasValueMap.end()) + IndirectSymbolValueMap.find(GV) != IndirectSymbolValueMap.end()) continue; assert(!GV->isDeclaration()); diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index a18f4cc25bc..35d6290e901 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -351,7 +351,8 @@ bool ModuleLinker::linkIfNeeded(GlobalValue &GV) { SGVar->setConstant(false); } if (DGVar->hasCommonLinkage() && SGVar->hasCommonLinkage()) { - unsigned Align = std::max(DGVar->getAlignment(), SGVar->getAlignment()); + MaybeAlign Align( + std::max(DGVar->getAlignment(), SGVar->getAlignment())); SGVar->setAlignment(Align); DGVar->setAlignment(Align); } diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 2c68723a12f..6f160e491ce 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -36,6 +36,7 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compression.h" @@ -336,7 +337,7 @@ public: } // end anonymous namespace void ELFWriter::align(unsigned Alignment) { - uint64_t Padding = OffsetToAlignment(W.OS.tell(), Alignment); + uint64_t Padding = offsetToAlignment(W.OS.tell(), Align(Alignment)); W.OS.write_zeros(Padding); } @@ -511,6 +512,19 @@ static uint8_t mergeTypeForSet(uint8_t origType, uint8_t newType) { return Type; } +static bool isIFunc(const MCSymbolELF *Symbol) { + while (Symbol->getType() != ELF::STT_GNU_IFUNC) { + const MCSymbolRefExpr *Value; + if (!Symbol->isVariable() || + !(Value = dyn_cast(Symbol->getVariableValue())) || + Value->getKind() != MCSymbolRefExpr::VK_None || + mergeTypeForSet(Symbol->getType(), ELF::STT_GNU_IFUNC) != ELF::STT_GNU_IFUNC) + return false; + Symbol = &cast(Value->getSymbol()); + } + return true; +} + void ELFWriter::writeSymbol(SymbolTableWriter &Writer, uint32_t StringIndex, ELFSymbolData &MSD, const MCAsmLayout &Layout) { const auto &Symbol = cast(*MSD.Symbol); @@ -524,6 +538,8 @@ void ELFWriter::writeSymbol(SymbolTableWriter &Writer, uint32_t StringIndex, // Binding and Type share the same byte as upper and lower nibbles uint8_t Binding = Symbol.getBinding(); uint8_t Type = Symbol.getType(); + if (isIFunc(&Symbol)) + Type = ELF::STT_GNU_IFUNC; if (Base) { Type = mergeTypeForSet(Type, Base->getType()); } @@ -622,7 +638,7 @@ void ELFWriter::computeSymbolTable( unsigned EntrySize = is64Bit() ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32; MCSectionELF *SymtabSection = Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0, EntrySize, ""); - SymtabSection->setAlignment(is64Bit() ? 8 : 4); + SymtabSection->setAlignment(is64Bit() ? Align(8) : Align(4)); SymbolTableIndex = addToSectionTable(SymtabSection); align(SymtabSection->getAlignment()); @@ -720,7 +736,7 @@ void ELFWriter::computeSymbolTable( MCSectionELF *SymtabShndxSection = Ctx.getELFSection(".symtab_shndx", ELF::SHT_SYMTAB_SHNDX, 0, 4, ""); SymtabShndxSectionIndex = addToSectionTable(SymtabShndxSection); - SymtabShndxSection->setAlignment(4); + SymtabShndxSection->setAlignment(Align(4)); } ArrayRef FileNames = Asm.getFileNames(); @@ -808,7 +824,7 @@ MCSectionELF *ELFWriter::createRelocationSection(MCContext &Ctx, MCSectionELF *RelaSection = Ctx.createELFRelSection( RelaSectionName, hasRelocationAddend() ? ELF::SHT_RELA : ELF::SHT_REL, Flags, EntrySize, Sec.getGroup(), &Sec); - RelaSection->setAlignment(is64Bit() ? 8 : 4); + RelaSection->setAlignment(is64Bit() ? Align(8) : Align(4)); return RelaSection; } @@ -895,7 +911,7 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, Section.setFlags(Section.getFlags() | ELF::SHF_COMPRESSED); // Alignment field should reflect the requirements of // the compressed section header. - Section.setAlignment(is64Bit() ? 8 : 4); + Section.setAlignment(is64Bit() ? Align(8) : Align(4)); } else { // Add "z" prefix to section name. This is zlib-gnu style. MC.renameELFSection(&Section, (".z" + SectionName.drop_front(1)).str()); @@ -1119,7 +1135,7 @@ uint64_t ELFWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { if (!GroupIdx) { MCSectionELF *Group = Ctx.createELFGroupSection(SignatureSymbol); GroupIdx = addToSectionTable(Group); - Group->setAlignment(4); + Group->setAlignment(Align(4)); Groups.push_back(Group); } std::vector &Members = @@ -1437,22 +1453,7 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, MCContext &Ctx = Asm.getContext(); if (const MCSymbolRefExpr *RefB = Target.getSymB()) { - // Let A, B and C being the components of Target and R be the location of - // the fixup. If the fixup is not pcrel, we want to compute (A - B + C). - // If it is pcrel, we want to compute (A - B + C - R). - - // In general, ELF has no relocations for -B. It can only represent (A + C) - // or (A + C - R). If B = R + K and the relocation is not pcrel, we can - // replace B to implement it: (A - R - K + C) - if (IsPCRel) { - Ctx.reportError( - Fixup.getLoc(), - "No relocation available to represent this relative expression"); - return; - } - const auto &SymB = cast(RefB->getSymbol()); - if (SymB.isUndefined()) { Ctx.reportError(Fixup.getLoc(), Twine("symbol '") + SymB.getName() + @@ -1468,10 +1469,9 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, return; } - uint64_t SymBOffset = Layout.getSymbolOffset(SymB); - uint64_t K = SymBOffset - FixupOffset; + assert(!IsPCRel && "should have been folded"); IsPCRel = true; - C -= K; + C += FixupOffset - Layout.getSymbolOffset(SymB); } // We either rejected the fixup or folded B into C at this point. @@ -1489,38 +1489,35 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, } } - unsigned Type = TargetObjectWriter->getRelocType(Ctx, Target, Fixup, IsPCRel); - uint64_t OriginalC = C; - bool RelocateWithSymbol = shouldRelocateWithSymbol(Asm, RefA, SymA, C, Type); - if (!RelocateWithSymbol && SymA && !SymA->isUndefined()) - C += Layout.getSymbolOffset(*SymA); - - uint64_t Addend = 0; - if (hasRelocationAddend()) { - Addend = C; - C = 0; - } - - FixedValue = C; - const MCSectionELF *SecA = (SymA && SymA->isInSection()) ? cast(&SymA->getSection()) : nullptr; if (!checkRelocation(Ctx, Fixup.getLoc(), &FixupSection, SecA)) return; + unsigned Type = TargetObjectWriter->getRelocType(Ctx, Target, Fixup, IsPCRel); + bool RelocateWithSymbol = shouldRelocateWithSymbol(Asm, RefA, SymA, C, Type); + uint64_t Addend = 0; + + FixedValue = !RelocateWithSymbol && SymA && !SymA->isUndefined() + ? C + Layout.getSymbolOffset(*SymA) + : C; + if (hasRelocationAddend()) { + Addend = FixedValue; + FixedValue = 0; + } + if (!RelocateWithSymbol) { const auto *SectionSymbol = SecA ? cast(SecA->getBeginSymbol()) : nullptr; if (SectionSymbol) SectionSymbol->setUsedInReloc(); - ELFRelocationEntry Rec(FixupOffset, SectionSymbol, Type, Addend, SymA, - OriginalC); + ELFRelocationEntry Rec(FixupOffset, SectionSymbol, Type, Addend, SymA, C); Relocations[&FixupSection].push_back(Rec); return; } - const auto *RenamedSymA = SymA; + const MCSymbolELF *RenamedSymA = SymA; if (SymA) { if (const MCSymbolELF *R = Renames.lookup(SymA)) RenamedSymA = R; @@ -1530,8 +1527,7 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, else RenamedSymA->setUsedInReloc(); } - ELFRelocationEntry Rec(FixupOffset, RenamedSymA, Type, Addend, SymA, - OriginalC); + ELFRelocationEntry Rec(FixupOffset, RenamedSymA, Type, Addend, SymA, C); Relocations[&FixupSection].push_back(Rec); } @@ -1551,7 +1547,7 @@ bool ELFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( std::unique_ptr llvm::createELFObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS, bool IsLittleEndian) { - return llvm::make_unique(std::move(MOTW), OS, + return std::make_unique(std::move(MOTW), OS, IsLittleEndian); } @@ -1559,6 +1555,6 @@ std::unique_ptr llvm::createELFDwoObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS, raw_pwrite_stream &DwoOS, bool IsLittleEndian) { - return llvm::make_unique(std::move(MOTW), OS, DwoOS, + return std::make_unique(std::move(MOTW), OS, DwoOS, IsLittleEndian); } diff --git a/lib/MC/MCAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp index 9b1102cbe7d..b800e9caee2 100644 --- a/lib/MC/MCAsmBackend.cpp +++ b/lib/MC/MCAsmBackend.cpp @@ -73,6 +73,7 @@ const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"FK_Data_2", 0, 16, 0}, {"FK_Data_4", 0, 32, 0}, {"FK_Data_8", 0, 64, 0}, + {"FK_Data_6b", 0, 6, 0}, {"FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel}, {"FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel}, {"FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, @@ -93,10 +94,12 @@ const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"FK_Data_Add_2", 0, 16, 0}, {"FK_Data_Add_4", 0, 32, 0}, {"FK_Data_Add_8", 0, 64, 0}, + {"FK_Data_Add_6b", 0, 6, 0}, {"FK_Data_Sub_1", 0, 8, 0}, {"FK_Data_Sub_2", 0, 16, 0}, {"FK_Data_Sub_4", 0, 32, 0}, - {"FK_Data_Sub_8", 0, 64, 0}}; + {"FK_Data_Sub_8", 0, 64, 0}, + {"FK_Data_Sub_6b", 0, 6, 0}}; assert((size_t)Kind <= array_lengthof(Builtins) && "Unknown fixup kind"); return Builtins[Kind]; diff --git a/lib/MC/MCAsmInfoXCOFF.cpp b/lib/MC/MCAsmInfoXCOFF.cpp index 74c21f0c9e6..65fe8848e20 100644 --- a/lib/MC/MCAsmInfoXCOFF.cpp +++ b/lib/MC/MCAsmInfoXCOFF.cpp @@ -15,4 +15,21 @@ void MCAsmInfoXCOFF::anchor() {} MCAsmInfoXCOFF::MCAsmInfoXCOFF() { IsLittleEndian = false; HasDotTypeDotSizeDirective = false; + COMMDirectiveAlignmentIsInBytes = false; + LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment; + UseDotAlignForAlignment = true; + AsciiDirective = nullptr; // not supported + AscizDirective = nullptr; // not supported + NeedsFunctionDescriptors = true; + HasDotLGloblDirective = true; + Data64bitsDirective = "\t.llong\t"; + SupportsQuotedNames = false; +} + +bool MCAsmInfoXCOFF::isValidUnquotedName(StringRef Name) const { + // FIXME: Remove this function when we stop using "TOC[TC0]" as a symbol name. + if (Name.equals("TOC[TC0]")) + return true; + + return MCAsmInfo::isValidUnquotedName(Name); } diff --git a/lib/MC/MCAsmMacro.cpp b/lib/MC/MCAsmMacro.cpp index ba4fb7d4f38..186a68b02a2 100644 --- a/lib/MC/MCAsmMacro.cpp +++ b/lib/MC/MCAsmMacro.cpp @@ -11,6 +11,7 @@ using namespace llvm; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MCAsmMacroParameter::dump(raw_ostream &OS) const { OS << "\"" << Name << "\""; if (Required) @@ -39,3 +40,4 @@ void MCAsmMacro::dump(raw_ostream &OS) const { } OS << " (BEGIN BODY)" << Body << "(END BODY)\n"; } +#endif diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 7a2b0b8a122..2d9c2cb2125 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" @@ -23,6 +24,7 @@ #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCRegister.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" @@ -66,7 +68,7 @@ public: std::unique_ptr asmbackend, bool showInst) : MCStreamer(Context), OSOwner(std::move(os)), OS(*OSOwner), MAI(Context.getAsmInfo()), InstPrinter(printer), - Assembler(llvm::make_unique( + Assembler(std::make_unique( Context, std::move(asmbackend), std::move(emitter), (asmbackend) ? asmbackend->createObjectWriter(NullStream) : nullptr)), @@ -162,6 +164,8 @@ public: void EmitCOFFSectionIndex(MCSymbol const *Symbol) override; void EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override; void EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override; + void EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlign) override; void emitELFSize(MCSymbol *Symbol, const MCExpr *Value) override; void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; @@ -254,9 +258,26 @@ public: unsigned SourceLineNum, const MCSymbol *FnStartSym, const MCSymbol *FnEndSym) override; + + void PrintCVDefRangePrefix( + ArrayRef> Ranges); + void EmitCVDefRangeDirective( ArrayRef> Ranges, - StringRef FixedSizePortion) override; + codeview::DefRangeRegisterRelHeader DRHdr) override; + + void EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeSubfieldRegisterHeader DRHdr) override; + + void EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeRegisterHeader DRHdr) override; + + void EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeFramePointerRelHeader DRHdr) override; + void EmitCVStringTableDirective() override; void EmitCVFileChecksumsDirective() override; void EmitCVFileChecksumOffsetDirective(unsigned FileNo) override; @@ -291,13 +312,13 @@ public: void EmitWinCFIFuncletOrFuncEnd(SMLoc Loc) override; void EmitWinCFIStartChained(SMLoc Loc) override; void EmitWinCFIEndChained(SMLoc Loc) override; - void EmitWinCFIPushReg(unsigned Register, SMLoc Loc) override; - void EmitWinCFISetFrame(unsigned Register, unsigned Offset, + void EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) override; + void EmitWinCFISetFrame(MCRegister Register, unsigned Offset, SMLoc Loc) override; void EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) override; - void EmitWinCFISaveReg(unsigned Register, unsigned Offset, + void EmitWinCFISaveReg(MCRegister Register, unsigned Offset, SMLoc Loc) override; - void EmitWinCFISaveXMM(unsigned Register, unsigned Offset, + void EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, SMLoc Loc) override; void EmitWinCFIPushFrame(bool Code, SMLoc Loc) override; void EmitWinCFIEndProlog(SMLoc Loc) override; @@ -630,6 +651,7 @@ bool MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, case MCSA_Global: // .globl/.global OS << MAI->getGlobalDirective(); break; + case MCSA_LGlobal: OS << "\t.lglobl\t"; break; case MCSA_Hidden: OS << "\t.hidden\t"; break; case MCSA_IndirectSymbol: OS << "\t.indirect_symbol\t"; break; case MCSA_Internal: OS << "\t.internal\t"; break; @@ -740,6 +762,24 @@ void MCAsmStreamer::EmitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) { EmitEOL(); } +// We need an XCOFF-specific version of this directive as the AIX syntax +// requires a QualName argument identifying the csect name and storage mapping +// class to appear before the alignment if we are specifying it. +void MCAsmStreamer::EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + assert(MAI->getLCOMMDirectiveAlignmentType() == LCOMM::Log2Alignment && + "We only support writing log base-2 alignment format with XCOFF."); + assert(isPowerOf2_32(ByteAlignment) && "Alignment must be a power of 2."); + + OS << "\t.lcomm\t"; + Symbol->print(OS, MAI); + OS << ',' << Size; + OS << ',' << Symbol->getName(); + OS << ',' << Log2_32(ByteAlignment); + + EmitEOL(); +} + void MCAsmStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) { assert(MAI->hasDotTypeDotSizeDirective()); OS << "\t.size\t"; @@ -1082,6 +1122,16 @@ void MCAsmStreamer::emitFill(const MCExpr &NumValues, int64_t Size, void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, unsigned ValueSize, unsigned MaxBytesToEmit) { + if (MAI->useDotAlignForAlignment()) { + if (!isPowerOf2_32(ByteAlignment)) + report_fatal_error("Only power-of-two alignments are supported " + "with .align."); + OS << "\t.align\t"; + OS << Log2_32(ByteAlignment); + EmitEOL(); + return; + } + // Some assemblers don't support non-power of two alignments, so we always // emit alignments as a power of two if possible. if (isPowerOf2_32(ByteAlignment)) { @@ -1376,9 +1426,8 @@ void MCAsmStreamer::EmitCVInlineLinetableDirective(unsigned PrimaryFunctionId, PrimaryFunctionId, SourceFileId, SourceLineNum, FnStartSym, FnEndSym); } -void MCAsmStreamer::EmitCVDefRangeDirective( - ArrayRef> Ranges, - StringRef FixedSizePortion) { +void MCAsmStreamer::PrintCVDefRangePrefix( + ArrayRef> Ranges) { OS << "\t.cv_def_range\t"; for (std::pair Range : Ranges) { OS << ' '; @@ -1386,10 +1435,43 @@ void MCAsmStreamer::EmitCVDefRangeDirective( OS << ' '; Range.second->print(OS, MAI); } - OS << ", "; - PrintQuotedString(FixedSizePortion, OS); +} + +void MCAsmStreamer::EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeRegisterRelHeader DRHdr) { + PrintCVDefRangePrefix(Ranges); + OS << ", reg_rel, "; + OS << DRHdr.Register << ", " << DRHdr.Flags << ", " + << DRHdr.BasePointerOffset; + EmitEOL(); +} + +void MCAsmStreamer::EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeSubfieldRegisterHeader DRHdr) { + PrintCVDefRangePrefix(Ranges); + OS << ", subfield_reg, "; + OS << DRHdr.Register << ", " << DRHdr.OffsetInParent; + EmitEOL(); +} + +void MCAsmStreamer::EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeRegisterHeader DRHdr) { + PrintCVDefRangePrefix(Ranges); + OS << ", reg, "; + OS << DRHdr.Register; + EmitEOL(); +} + +void MCAsmStreamer::EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeFramePointerRelHeader DRHdr) { + PrintCVDefRangePrefix(Ranges); + OS << ", frame_ptr_rel, "; + OS << DRHdr.Offset; EmitEOL(); - this->MCStreamer::EmitCVDefRangeDirective(Ranges, FixedSizePortion); } void MCAsmStreamer::EmitCVStringTableDirective() { @@ -1453,9 +1535,8 @@ void MCAsmStreamer::EmitRegisterName(int64_t Register) { // just ones that map to LLVM register numbers and have known names. // Fall back to using the original number directly if no name is known. const MCRegisterInfo *MRI = getContext().getRegisterInfo(); - int LLVMRegister = MRI->getLLVMRegNumFromEH(Register); - if (LLVMRegister != -1) { - InstPrinter->printRegName(OS, LLVMRegister); + if (Optional LLVMRegister = MRI->getLLVMRegNum(Register, true)) { + InstPrinter->printRegName(OS, *LLVMRegister); return; } } @@ -1668,6 +1749,12 @@ void MCAsmStreamer::EmitWinEHHandlerData(SMLoc Loc) { // We only do this so the section switch that terminates the handler // data block is visible. WinEH::FrameInfo *CurFrame = getCurrentWinFrameInfo(); + + // Do nothing if no frame is open. MCStreamer should've already reported an + // error. + if (!CurFrame) + return; + MCSection *TextSec = &CurFrame->Function->getSection(); MCSection *XData = getAssociatedXDataSection(TextSec); SwitchSectionNoChange(XData); @@ -1676,18 +1763,21 @@ void MCAsmStreamer::EmitWinEHHandlerData(SMLoc Loc) { EmitEOL(); } -void MCAsmStreamer::EmitWinCFIPushReg(unsigned Register, SMLoc Loc) { +void MCAsmStreamer::EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) { MCStreamer::EmitWinCFIPushReg(Register, Loc); - OS << "\t.seh_pushreg " << Register; + OS << "\t.seh_pushreg "; + InstPrinter->printRegName(OS, Register); EmitEOL(); } -void MCAsmStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset, +void MCAsmStreamer::EmitWinCFISetFrame(MCRegister Register, unsigned Offset, SMLoc Loc) { MCStreamer::EmitWinCFISetFrame(Register, Offset, Loc); - OS << "\t.seh_setframe " << Register << ", " << Offset; + OS << "\t.seh_setframe "; + InstPrinter->printRegName(OS, Register); + OS << ", " << Offset; EmitEOL(); } @@ -1698,19 +1788,23 @@ void MCAsmStreamer::EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) { EmitEOL(); } -void MCAsmStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset, +void MCAsmStreamer::EmitWinCFISaveReg(MCRegister Register, unsigned Offset, SMLoc Loc) { MCStreamer::EmitWinCFISaveReg(Register, Offset, Loc); - OS << "\t.seh_savereg " << Register << ", " << Offset; + OS << "\t.seh_savereg "; + InstPrinter->printRegName(OS, Register); + OS << ", " << Offset; EmitEOL(); } -void MCAsmStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset, +void MCAsmStreamer::EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, SMLoc Loc) { MCStreamer::EmitWinCFISaveXMM(Register, Offset, Loc); - OS << "\t.seh_savexmm " << Register << ", " << Offset; + OS << "\t.seh_savexmm "; + InstPrinter->printRegName(OS, Register); + OS << ", " << Offset; EmitEOL(); } diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index c4f4d4c2870..cf42fe85b8e 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -30,6 +30,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -321,7 +322,7 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, case MCFragment::FT_Align: { const MCAlignFragment &AF = cast(F); unsigned Offset = Layout.getFragmentOffset(&AF); - unsigned Size = OffsetToAlignment(Offset, AF.getAlignment()); + unsigned Size = offsetToAlignment(Offset, Align(AF.getAlignment())); // Insert extra Nops for code alignment if the target define // shouldInsertExtraNopBytesForCodeAlign target hook. @@ -840,6 +841,10 @@ void MCAssembler::layout(MCAsmLayout &Layout) { getBackend().shouldInsertFixupForCodeAlign(*this, Layout, *AF); } continue; + } else if (auto *FragWithFixups = + dyn_cast(&Frag)) { + Fixups = FragWithFixups->getFixups(); + Contents = FragWithFixups->getContents(); } else llvm_unreachable("Unknown fragment with fixups!"); for (const MCFixup &Fixup : Fixups) { @@ -969,13 +974,9 @@ bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout, MCContext &Context = Layout.getAssembler().getContext(); uint64_t OldSize = DF.getContents().size(); int64_t AddrDelta; - bool Abs; - if (getBackend().requiresDiffExpressionRelocations()) - Abs = DF.getAddrDelta().evaluateAsAbsolute(AddrDelta, Layout); - else { - Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout); - assert(Abs && "We created a line delta with an invalid expression"); - } + bool Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout); + assert(Abs && "We created a line delta with an invalid expression"); + (void)Abs; int64_t LineDelta; LineDelta = DF.getLineDelta(); SmallVectorImpl &Data = DF.getContents(); @@ -983,7 +984,7 @@ bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout, raw_svector_ostream OSE(Data); DF.getFixups().clear(); - if (Abs) { + if (!getBackend().requiresDiffExpressionRelocations()) { MCDwarfLineAddr::Encode(Context, getDWARFLinetableParams(), LineDelta, AddrDelta, OSE); } else { @@ -1017,10 +1018,25 @@ bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout, bool Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout); assert(Abs && "We created call frame with an invalid expression"); (void) Abs; - SmallString<8> &Data = DF.getContents(); + SmallVectorImpl &Data = DF.getContents(); Data.clear(); raw_svector_ostream OSE(Data); - MCDwarfFrameEmitter::EncodeAdvanceLoc(Context, AddrDelta, OSE); + DF.getFixups().clear(); + + if (getBackend().requiresDiffExpressionRelocations()) { + uint32_t Offset; + uint32_t Size; + MCDwarfFrameEmitter::EncodeAdvanceLoc(Context, AddrDelta, OSE, &Offset, + &Size); + if (Size) { + DF.getFixups().push_back(MCFixup::create( + Offset, &DF.getAddrDelta(), + MCFixup::getKindForSizeInBits(Size /*In bits.*/, false /*isPCRel*/))); + } + } else { + MCDwarfFrameEmitter::EncodeAdvanceLoc(Context, AddrDelta, OSE); + } + return OldSize != Data.size(); } diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 0dc2e2d37ca..a69ee19e1a1 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -58,11 +58,12 @@ AsSecureLogFileName("as-secure-log-file-name", MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri, const MCObjectFileInfo *mofi, const SourceMgr *mgr, - bool DoAutoReset) + MCTargetOptions const *TargetOpts, bool DoAutoReset) : SrcMgr(mgr), InlineSrcMgr(nullptr), MAI(mai), MRI(mri), MOFI(mofi), Symbols(Allocator), UsedNames(Allocator), + InlineAsmUsedLabelNames(Allocator), CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0), - AutoReset(DoAutoReset) { + AutoReset(DoAutoReset), TargetOptions(TargetOpts) { SecureLogFile = AsSecureLogFileName; if (SrcMgr && SrcMgr->getNumBuffers()) @@ -90,6 +91,7 @@ void MCContext::reset() { XCOFFAllocator.DestroyAll(); MCSubtargetAllocator.DestroyAll(); + InlineAsmUsedLabelNames.clear(); UsedNames.clear(); Symbols.clear(); Allocator.Reset(); @@ -272,6 +274,10 @@ void MCContext::setSymbolValue(MCStreamer &Streamer, Streamer.EmitAssignment(Symbol, MCConstantExpr::create(Val, *this)); } +void MCContext::registerInlineAsmLabel(MCSymbol *Sym) { + InlineAsmUsedLabelNames[Sym->getName()] = Sym; +} + //===----------------------------------------------------------------------===// // Section Management //===----------------------------------------------------------------------===// @@ -531,6 +537,8 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind Kind, MCSectionXCOFF *MCContext::getXCOFFSection(StringRef Section, XCOFF::StorageMappingClass SMC, + XCOFF::SymbolType Type, + XCOFF::StorageClass SC, SectionKind Kind, const char *BeginSymName) { // Do the lookup. If we have a hit, return it. @@ -548,7 +556,7 @@ MCSectionXCOFF *MCContext::getXCOFFSection(StringRef Section, Begin = createTempSymbol(BeginSymName, false); MCSectionXCOFF *Result = new (XCOFFAllocator.Allocate()) - MCSectionXCOFF(CachedName, SMC, Kind, Begin); + MCSectionXCOFF(CachedName, SMC, Type, SC, Kind, Begin); Entry.second = Result; auto *F = new MCDataFragment(); @@ -690,6 +698,21 @@ void MCContext::reportError(SMLoc Loc, const Twine &Msg) { report_fatal_error(Msg, false); } +void MCContext::reportWarning(SMLoc Loc, const Twine &Msg) { + if (TargetOptions && TargetOptions->MCNoWarn) + return; + if (TargetOptions && TargetOptions->MCFatalWarnings) + reportError(Loc, Msg); + else { + // If we have a source manager use it. Otherwise, try using the inline + // source manager. + if (SrcMgr) + SrcMgr->PrintMessage(Loc, SourceMgr::DK_Warning, Msg); + else if (InlineSrcMgr) + InlineSrcMgr->PrintMessage(Loc, SourceMgr::DK_Warning, Msg); + } +} + void MCContext::reportFatalError(SMLoc Loc, const Twine &Msg) { reportError(Loc, Msg); diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index aae6fdf9093..bcc7c45afc0 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -544,8 +544,8 @@ Expected MCDwarfLineTable::tryGetFile(StringRef &Directory, FileNumber); } -bool isRootFile(const MCDwarfFile &RootFile, StringRef &Directory, - StringRef &FileName, Optional Checksum) { +static bool isRootFile(const MCDwarfFile &RootFile, StringRef &Directory, + StringRef &FileName, Optional Checksum) { if (RootFile.Name.empty() || RootFile.Name != FileName.data()) return false; return RootFile.Checksum == Checksum; @@ -1897,26 +1897,54 @@ void MCDwarfFrameEmitter::EmitAdvanceLoc(MCObjectStreamer &Streamer, } void MCDwarfFrameEmitter::EncodeAdvanceLoc(MCContext &Context, - uint64_t AddrDelta, - raw_ostream &OS) { + uint64_t AddrDelta, raw_ostream &OS, + uint32_t *Offset, uint32_t *Size) { // Scale the address delta by the minimum instruction length. AddrDelta = ScaleAddrDelta(Context, AddrDelta); + bool WithFixups = false; + if (Offset && Size) + WithFixups = true; + support::endianness E = Context.getAsmInfo()->isLittleEndian() ? support::little : support::big; if (AddrDelta == 0) { + if (WithFixups) { + *Offset = 0; + *Size = 0; + } } else if (isUIntN(6, AddrDelta)) { uint8_t Opcode = dwarf::DW_CFA_advance_loc | AddrDelta; - OS << Opcode; + if (WithFixups) { + *Offset = OS.tell(); + *Size = 6; + OS << uint8_t(dwarf::DW_CFA_advance_loc); + } else + OS << Opcode; } else if (isUInt<8>(AddrDelta)) { OS << uint8_t(dwarf::DW_CFA_advance_loc1); - OS << uint8_t(AddrDelta); + if (WithFixups) { + *Offset = OS.tell(); + *Size = 8; + OS.write_zeros(1); + } else + OS << uint8_t(AddrDelta); } else if (isUInt<16>(AddrDelta)) { OS << uint8_t(dwarf::DW_CFA_advance_loc2); - support::endian::write(OS, AddrDelta, E); + if (WithFixups) { + *Offset = OS.tell(); + *Size = 16; + OS.write_zeros(2); + } else + support::endian::write(OS, AddrDelta, E); } else { assert(isUInt<32>(AddrDelta)); OS << uint8_t(dwarf::DW_CFA_advance_loc4); - support::endian::write(OS, AddrDelta, E); + if (WithFixups) { + *Offset = OS.tell(); + *Size = 32; + OS.write_zeros(4); + } else + support::endian::write(OS, AddrDelta, E); } } diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index 245dd063004..fa2133078bf 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -139,7 +139,7 @@ static void setSectionAlignmentForBundling(const MCAssembler &Assembler, MCSection *Section) { if (Section && Assembler.isBundlingEnabled() && Section->hasInstructions() && Section->getAlignment() < Assembler.getBundleAlignSize()) - Section->setAlignment(Assembler.getBundleAlignSize()); + Section->setAlignment(Align(Assembler.getBundleAlignSize())); } void MCELFStreamer::ChangeSection(MCSection *Section, @@ -277,6 +277,9 @@ bool MCELFStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) { case MCSA_AltEntry: llvm_unreachable("ELF doesn't support the .alt_entry attribute"); + + case MCSA_LGlobal: + llvm_unreachable("ELF doesn't support the .lglobl attribute"); } return true; @@ -306,7 +309,7 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *S, uint64_t Size, // Update the maximum alignment of the section if necessary. if (ByteAlignment > Section.getAlignment()) - Section.setAlignment(ByteAlignment); + Section.setAlignment(Align(ByteAlignment)); SwitchSection(P.first, P.second); } else { diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index ab53ed42778..813c00f6f3b 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -259,6 +259,8 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_PPC_TOC_LO: return "toc@l"; case VK_PPC_TOC_HI: return "toc@h"; case VK_PPC_TOC_HA: return "toc@ha"; + case VK_PPC_U: return "u"; + case VK_PPC_L: return "l"; case VK_PPC_DTPMOD: return "dtpmod"; case VK_PPC_TPREL_LO: return "tprel@l"; case VK_PPC_TPREL_HI: return "tprel@h"; @@ -373,6 +375,8 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) { .Case("toc@l", VK_PPC_TOC_LO) .Case("toc@h", VK_PPC_TOC_HI) .Case("toc@ha", VK_PPC_TOC_HA) + .Case("u", VK_PPC_U) + .Case("l", VK_PPC_L) .Case("tls", VK_PPC_TLS) .Case("dtpmod", VK_PPC_DTPMOD) .Case("tprel@l", VK_PPC_TPREL_LO) @@ -453,26 +457,28 @@ void MCTargetExpr::anchor() {} /* *** */ bool MCExpr::evaluateAsAbsolute(int64_t &Res) const { - return evaluateAsAbsolute(Res, nullptr, nullptr, nullptr); + return evaluateAsAbsolute(Res, nullptr, nullptr, nullptr, false); } bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout) const { - return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr); + return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr, false); } bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout, const SectionAddrMap &Addrs) const { - return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, &Addrs); + // Setting InSet causes us to absolutize differences across sections and that + // is what the MachO writer uses Addrs for. + return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, &Addrs, true); } bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const { - return evaluateAsAbsolute(Res, &Asm, nullptr, nullptr); + return evaluateAsAbsolute(Res, &Asm, nullptr, nullptr, false); } bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm) const { - return evaluateAsAbsolute(Res, Asm, nullptr, nullptr); + return evaluateAsAbsolute(Res, Asm, nullptr, nullptr, false); } bool MCExpr::evaluateKnownAbsolute(int64_t &Res, @@ -481,15 +487,6 @@ bool MCExpr::evaluateKnownAbsolute(int64_t &Res, true); } -bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm, - const MCAsmLayout *Layout, - const SectionAddrMap *Addrs) const { - // FIXME: The use if InSet = Addrs is a hack. Setting InSet causes us - // absolutize differences across sections and that is what the MachO writer - // uses Addrs for. - return evaluateAsAbsolute(Res, Asm, Layout, Addrs, Addrs); -} - bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm, const MCAsmLayout *Layout, const SectionAddrMap *Addrs, bool InSet) const { @@ -577,6 +574,24 @@ static void AttemptToFoldSymbolOffsetDifference( A = B = nullptr; } +static bool canFold(const MCAssembler *Asm, const MCSymbolRefExpr *A, + const MCSymbolRefExpr *B, bool InSet) { + if (InSet) + return true; + + if (!Asm->getBackend().requiresDiffExpressionRelocations()) + return true; + + const MCSymbol &CheckSym = A ? A->getSymbol() : B->getSymbol(); + if (!CheckSym.isInSection()) + return true; + + if (!CheckSym.getSection().hasInstructions()) + return true; + + return false; +} + /// Evaluate the result of an add between (conceptually) two MCValues. /// /// This routine conceptually attempts to construct an MCValue: @@ -617,8 +632,7 @@ EvaluateSymbolicAdd(const MCAssembler *Asm, const MCAsmLayout *Layout, // the backend requires this to be emitted as individual relocations, unless // the InSet flag is set to get the current difference anyway (used for // example to calculate symbol sizes). - if (Asm && - (InSet || !Asm->getBackend().requiresDiffExpressionRelocations())) { + if (Asm && canFold(Asm, LHS_A, LHS_B, InSet)) { // First, fold out any differences which are fully resolved. By // reassociating terms in // Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst). diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp index 159f4070fe9..c5c06f323e6 100644 --- a/lib/MC/MCInstPrinter.cpp +++ b/lib/MC/MCInstPrinter.cpp @@ -64,12 +64,6 @@ StringRef MCInstPrinter::markup(StringRef s) const { else return ""; } -StringRef MCInstPrinter::markup(StringRef a, StringRef b) const { - if (getUseMarkup()) - return a; - else - return b; -} // For asm-style hex (e.g. 0ffh) the first digit always has to be a number. static bool needsLeadingZero(uint64_t Value) @@ -89,24 +83,25 @@ format_object MCInstPrinter::formatDec(int64_t Value) const { } format_object MCInstPrinter::formatHex(int64_t Value) const { - switch(PrintHexStyle) { + switch (PrintHexStyle) { case HexStyle::C: - if (Value < 0) + if (Value < 0) { + if (Value == std::numeric_limits::min()) + return format("-0x8000000000000000", Value); return format("-0x%" PRIx64, -Value); - else - return format("0x%" PRIx64, Value); + } + return format("0x%" PRIx64, Value); case HexStyle::Asm: if (Value < 0) { - if (needsLeadingZero((uint64_t)(-Value))) + if (Value == std::numeric_limits::min()) + return format("-8000000000000000h", Value); + if (needsLeadingZero(-(uint64_t)(Value))) return format("-0%" PRIx64 "h", -Value); - else - return format("-%" PRIx64 "h", -Value); - } else { - if (needsLeadingZero((uint64_t)(Value))) - return format("0%" PRIx64 "h", Value); - else - return format("%" PRIx64 "h", Value); + return format("-%" PRIx64 "h", -Value); } + if (needsLeadingZero((uint64_t)(Value))) + return format("0%" PRIx64 "h", Value); + return format("%" PRIx64 "h", Value); } llvm_unreachable("unsupported print style"); } diff --git a/lib/MC/MCInstrAnalysis.cpp b/lib/MC/MCInstrAnalysis.cpp index eca87f940bf..54741fdd686 100644 --- a/lib/MC/MCInstrAnalysis.cpp +++ b/lib/MC/MCInstrAnalysis.cpp @@ -33,3 +33,9 @@ bool MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr, Target = Addr+Size+Imm; return true; } + +Optional +MCInstrAnalysis::evaluateMemoryOperandAddress(const MCInst &Inst, uint64_t Addr, + uint64_t Size) const { + return None; +} diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 613f255a4ea..8e558a36b7a 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -330,6 +330,7 @@ bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Sym, case MCSA_Protected: case MCSA_Weak: case MCSA_Local: + case MCSA_LGlobal: return false; case MCSA_Global: diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 9f555abe140..70c0409ece7 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -28,7 +28,7 @@ static bool useCompactUnwind(const Triple &T) { return false; // aarch64 always has it. - if (T.getArch() == Triple::aarch64) + if (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32) return true; // armv7k always has it. @@ -57,7 +57,8 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { MachO::S_ATTR_STRIP_STATIC_SYMS | MachO::S_ATTR_LIVE_SUPPORT, SectionKind::getReadOnly()); - if (T.isOSDarwin() && T.getArch() == Triple::aarch64) + if (T.isOSDarwin() && + (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32)) SupportsCompactUnwindWithoutEHFrame = true; if (T.isWatchABI()) @@ -193,7 +194,7 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86) CompactUnwindDwarfEHFrameOnly = 0x04000000; // UNWIND_X86_64_MODE_DWARF - else if (T.getArch() == Triple::aarch64) + else if (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32) CompactUnwindDwarfEHFrameOnly = 0x03000000; // UNWIND_ARM64_MODE_DWARF else if (T.getArch() == Triple::arm || T.getArch() == Triple::thumb) CompactUnwindDwarfEHFrameOnly = 0x04000000; // UNWIND_ARM_MODE_DWARF @@ -768,7 +769,12 @@ void MCObjectFileInfo::initXCOFFMCObjectFileInfo(const Triple &T) { // the ABI or object file format. For example, the XL compiler uses an unnamed // csect for program code. TextSection = Ctx->getXCOFFSection( - ".text", XCOFF::StorageMappingClass::XMC_PR, SectionKind::getText()); + ".text", XCOFF::StorageMappingClass::XMC_PR, XCOFF::XTY_SD, + XCOFF::C_HIDEXT, SectionKind::getText()); + + DataSection = Ctx->getXCOFFSection( + ".data", XCOFF::StorageMappingClass::XMC_RW, XCOFF::XTY_SD, + XCOFF::C_HIDEXT, SectionKind::getData()); } void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, bool PIC, diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index 1587d849866..83f6ab8fe33 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -27,7 +27,7 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, std::unique_ptr OW, std::unique_ptr Emitter) : MCStreamer(Context), - Assembler(llvm::make_unique( + Assembler(std::make_unique( Context, std::move(TAB), std::move(Emitter), std::move(OW))), EmitEHFrame(true), EmitDebugFrame(false) {} @@ -539,7 +539,7 @@ void MCObjectStreamer::EmitValueToAlignment(unsigned ByteAlignment, // Update the maximum alignment on the current section if necessary. MCSection *CurSec = getCurrentSectionOnly(); if (ByteAlignment > CurSec->getAlignment()) - CurSec->setAlignment(ByteAlignment); + CurSec->setAlignment(Align(ByteAlignment)); } void MCObjectStreamer::EmitCodeAlignment(unsigned ByteAlignment, diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 084f6a7a2e1..b59ac08ad6c 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeView.h" #include "llvm/MC/MCContext.h" @@ -524,6 +525,19 @@ private: /// directives parsed by this class. StringMap DirectiveKindMap; + // Codeview def_range type parsing. + enum CVDefRangeType { + CVDR_DEFRANGE = 0, // Placeholder + CVDR_DEFRANGE_REGISTER, + CVDR_DEFRANGE_FRAMEPOINTER_REL, + CVDR_DEFRANGE_SUBFIELD_REGISTER, + CVDR_DEFRANGE_REGISTER_REL + }; + + /// Maps Codeview def_range types --> CVDefRangeType enum, for + /// Codeview def_range types parsed by this class. + StringMap CVDefRangeTypeMap; + // ".ascii", ".asciz", ".string" bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated); bool parseDirectiveReloc(SMLoc DirectiveLoc); // ".reloc" @@ -671,6 +685,7 @@ private: bool parseDirectiveAddrsigSym(); void initializeDirectiveKindMap(); + void initializeCVDefRangeTypeMap(); }; } // end anonymous namespace @@ -714,12 +729,14 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, PlatformParser.reset(createWasmAsmParser()); break; case MCObjectFileInfo::IsXCOFF: - // TODO: Need to implement createXCOFFAsmParser for XCOFF format. + report_fatal_error( + "Need to implement createXCOFFAsmParser for XCOFF format."); break; } PlatformParser->Initialize(*this); initializeDirectiveKindMap(); + initializeCVDefRangeTypeMap(); NumOfMacroInstantiations = 0; } @@ -1142,7 +1159,9 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { } } - MCSymbol *Sym = getContext().getOrCreateSymbol(SymbolName); + MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName); + if (!Sym) + Sym = getContext().getOrCreateSymbol(SymbolName); // If this is an absolute variable reference, substitute it now to preserve // semantics in the face of reassignment. @@ -1737,6 +1756,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, StringMap::const_iterator DirKindIt = DirectiveKindMap.find(IDVal); DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end()) + ? DK_NO_DIRECTIVE : DirKindIt->getValue(); switch (DirKind) { @@ -2895,11 +2915,27 @@ bool AsmParser::parseEscapedString(std::string &Data) { } // Recognize escaped characters. Note that this escape semantics currently - // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes. + // loosely follows Darwin 'as'. ++i; if (i == e) return TokError("unexpected backslash at end of string"); + // Recognize hex sequences similarly to GNU 'as'. + if (Str[i] == 'x' || Str[i] == 'X') { + size_t length = Str.size(); + if (i + 1 >= length || !isHexDigit(Str[i + 1])) + return TokError("invalid hexadecimal escape sequence"); + + // Consume hex characters. GNU 'as' reads all hexadecimal characters and + // then truncates to the lower 16 bits. Seems reasonable. + unsigned Value = 0; + while (i + 1 < length && isHexDigit(Str[i + 1])) + Value = Value * 16 + hexDigitValue(Str[++i]); + + Data += (unsigned char)(Value & 0xFF); + continue; + } + // Recognize octal sequences. if ((unsigned)(Str[i] - '0') <= 7) { // Consume up to three octal characters. @@ -3825,6 +3861,13 @@ bool AsmParser::parseDirectiveCVInlineLinetable() { return false; } +void AsmParser::initializeCVDefRangeTypeMap() { + CVDefRangeTypeMap["reg"] = CVDR_DEFRANGE_REGISTER; + CVDefRangeTypeMap["frame_ptr_rel"] = CVDR_DEFRANGE_FRAMEPOINTER_REL; + CVDefRangeTypeMap["subfield_reg"] = CVDR_DEFRANGE_SUBFIELD_REGISTER; + CVDefRangeTypeMap["reg_rel"] = CVDR_DEFRANGE_REGISTER_REL; +} + /// parseDirectiveCVDefRange /// ::= .cv_def_range RangeStart RangeEnd (GapStart GapEnd)*, bytes* bool AsmParser::parseDirectiveCVDefRange() { @@ -3846,13 +3889,92 @@ bool AsmParser::parseDirectiveCVDefRange() { Ranges.push_back({GapStartSym, GapEndSym}); } - std::string FixedSizePortion; - if (parseToken(AsmToken::Comma, "unexpected token in directive") || - parseEscapedString(FixedSizePortion)) - return true; + StringRef CVDefRangeTypeStr; + if (parseToken( + AsmToken::Comma, + "expected comma before def_range type in .cv_def_range directive") || + parseIdentifier(CVDefRangeTypeStr)) + return Error(Loc, "expected def_range type in directive"); - getStreamer().EmitCVDefRangeDirective(Ranges, FixedSizePortion); - return false; + StringMap::const_iterator CVTypeIt = + CVDefRangeTypeMap.find(CVDefRangeTypeStr); + CVDefRangeType CVDRType = (CVTypeIt == CVDefRangeTypeMap.end()) + ? CVDR_DEFRANGE + : CVTypeIt->getValue(); + switch (CVDRType) { + case CVDR_DEFRANGE_REGISTER: { + int64_t DRRegister; + if (parseToken(AsmToken::Comma, "expected comma before register number in " + ".cv_def_range directive") || + parseAbsoluteExpression(DRRegister)) + return Error(Loc, "expected register number"); + + codeview::DefRangeRegisterHeader DRHdr; + DRHdr.Register = DRRegister; + DRHdr.MayHaveNoName = 0; + getStreamer().EmitCVDefRangeDirective(Ranges, DRHdr); + break; + } + case CVDR_DEFRANGE_FRAMEPOINTER_REL: { + int64_t DROffset; + if (parseToken(AsmToken::Comma, + "expected comma before offset in .cv_def_range directive") || + parseAbsoluteExpression(DROffset)) + return Error(Loc, "expected offset value"); + + codeview::DefRangeFramePointerRelHeader DRHdr; + DRHdr.Offset = DROffset; + getStreamer().EmitCVDefRangeDirective(Ranges, DRHdr); + break; + } + case CVDR_DEFRANGE_SUBFIELD_REGISTER: { + int64_t DRRegister; + int64_t DROffsetInParent; + if (parseToken(AsmToken::Comma, "expected comma before register number in " + ".cv_def_range directive") || + parseAbsoluteExpression(DRRegister)) + return Error(Loc, "expected register number"); + if (parseToken(AsmToken::Comma, + "expected comma before offset in .cv_def_range directive") || + parseAbsoluteExpression(DROffsetInParent)) + return Error(Loc, "expected offset value"); + + codeview::DefRangeSubfieldRegisterHeader DRHdr; + DRHdr.Register = DRRegister; + DRHdr.MayHaveNoName = 0; + DRHdr.OffsetInParent = DROffsetInParent; + getStreamer().EmitCVDefRangeDirective(Ranges, DRHdr); + break; + } + case CVDR_DEFRANGE_REGISTER_REL: { + int64_t DRRegister; + int64_t DRFlags; + int64_t DRBasePointerOffset; + if (parseToken(AsmToken::Comma, "expected comma before register number in " + ".cv_def_range directive") || + parseAbsoluteExpression(DRRegister)) + return Error(Loc, "expected register value"); + if (parseToken( + AsmToken::Comma, + "expected comma before flag value in .cv_def_range directive") || + parseAbsoluteExpression(DRFlags)) + return Error(Loc, "expected flag value"); + if (parseToken(AsmToken::Comma, "expected comma before base pointer offset " + "in .cv_def_range directive") || + parseAbsoluteExpression(DRBasePointerOffset)) + return Error(Loc, "expected base pointer offset value"); + + codeview::DefRangeRegisterRelHeader DRHdr; + DRHdr.Register = DRRegister; + DRHdr.Flags = DRFlags; + DRHdr.BasePointerOffset = DRBasePointerOffset; + getStreamer().EmitCVDefRangeDirective(Ranges, DRHdr); + break; + } + default: + return Error(Loc, "unexpected def_range type in .cv_def_range directive"); + } + return true; } /// parseDirectiveCVString diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp index 1217ea99e46..06f8310ae06 100644 --- a/lib/MC/MCParser/COFFAsmParser.cpp +++ b/lib/MC/MCParser/COFFAsmParser.cpp @@ -69,6 +69,7 @@ class COFFAsmParser : public MCAsmParserExtension { addDirectiveHandler<&COFFAsmParser::ParseDirectiveSecIdx>(".secidx"); addDirectiveHandler<&COFFAsmParser::ParseDirectiveLinkOnce>(".linkonce"); addDirectiveHandler<&COFFAsmParser::ParseDirectiveRVA>(".rva"); + addDirectiveHandler<&COFFAsmParser::ParseDirectiveSymbolAttribute>(".weak"); // Win64 EH directives. addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartProc>( @@ -83,21 +84,10 @@ class COFFAsmParser : public MCAsmParserExtension { ".seh_handler"); addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveHandlerData>( ".seh_handlerdata"); - addDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushReg>( - ".seh_pushreg"); - addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSetFrame>( - ".seh_setframe"); addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveAllocStack>( ".seh_stackalloc"); - addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveReg>( - ".seh_savereg"); - addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveXMM>( - ".seh_savexmm"); - addDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushFrame>( - ".seh_pushframe"); addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProlog>( ".seh_endprologue"); - addDirectiveHandler<&COFFAsmParser::ParseDirectiveSymbolAttribute>(".weak"); } bool ParseSectionDirectiveText(StringRef, SMLoc) { @@ -143,12 +133,7 @@ class COFFAsmParser : public MCAsmParserExtension { bool ParseSEHDirectiveEndChained(StringRef, SMLoc); bool ParseSEHDirectiveHandler(StringRef, SMLoc); bool ParseSEHDirectiveHandlerData(StringRef, SMLoc); - bool ParseSEHDirectivePushReg(StringRef, SMLoc); - bool ParseSEHDirectiveSetFrame(StringRef, SMLoc); bool ParseSEHDirectiveAllocStack(StringRef, SMLoc); - bool ParseSEHDirectiveSaveReg(StringRef, SMLoc); - bool ParseSEHDirectiveSaveXMM(StringRef, SMLoc); - bool ParseSEHDirectivePushFrame(StringRef, SMLoc); bool ParseSEHDirectiveEndProlog(StringRef, SMLoc); bool ParseAtUnwindOrAtExcept(bool &unwind, bool &except); @@ -682,39 +667,6 @@ bool COFFAsmParser::ParseSEHDirectiveHandlerData(StringRef, SMLoc Loc) { return false; } -bool COFFAsmParser::ParseSEHDirectivePushReg(StringRef, SMLoc Loc) { - unsigned Reg = 0; - if (ParseSEHRegisterNumber(Reg)) - return true; - - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); - - Lex(); - getStreamer().EmitWinCFIPushReg(Reg, Loc); - return false; -} - -bool COFFAsmParser::ParseSEHDirectiveSetFrame(StringRef, SMLoc Loc) { - unsigned Reg = 0; - int64_t Off; - if (ParseSEHRegisterNumber(Reg)) - return true; - if (getLexer().isNot(AsmToken::Comma)) - return TokError("you must specify a stack pointer offset"); - - Lex(); - if (getParser().parseAbsoluteExpression(Off)) - return true; - - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); - - Lex(); - getStreamer().EmitWinCFISetFrame(Reg, Off, Loc); - return false; -} - bool COFFAsmParser::ParseSEHDirectiveAllocStack(StringRef, SMLoc Loc) { int64_t Size; if (getParser().parseAbsoluteExpression(Size)) @@ -728,71 +680,6 @@ bool COFFAsmParser::ParseSEHDirectiveAllocStack(StringRef, SMLoc Loc) { return false; } -bool COFFAsmParser::ParseSEHDirectiveSaveReg(StringRef, SMLoc Loc) { - unsigned Reg = 0; - int64_t Off; - if (ParseSEHRegisterNumber(Reg)) - return true; - if (getLexer().isNot(AsmToken::Comma)) - return TokError("you must specify an offset on the stack"); - - Lex(); - if (getParser().parseAbsoluteExpression(Off)) - return true; - - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); - - Lex(); - // FIXME: Err on %xmm* registers - getStreamer().EmitWinCFISaveReg(Reg, Off, Loc); - return false; -} - -// FIXME: This method is inherently x86-specific. It should really be in the -// x86 backend. -bool COFFAsmParser::ParseSEHDirectiveSaveXMM(StringRef, SMLoc Loc) { - unsigned Reg = 0; - int64_t Off; - if (ParseSEHRegisterNumber(Reg)) - return true; - if (getLexer().isNot(AsmToken::Comma)) - return TokError("you must specify an offset on the stack"); - - Lex(); - if (getParser().parseAbsoluteExpression(Off)) - return true; - - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); - - Lex(); - // FIXME: Err on non-%xmm* registers - getStreamer().EmitWinCFISaveXMM(Reg, Off, Loc); - return false; -} - -bool COFFAsmParser::ParseSEHDirectivePushFrame(StringRef, SMLoc Loc) { - bool Code = false; - StringRef CodeID; - if (getLexer().is(AsmToken::At)) { - SMLoc startLoc = getLexer().getLoc(); - Lex(); - if (!getParser().parseIdentifier(CodeID)) { - if (CodeID != "code") - return Error(startLoc, "expected @code"); - Code = true; - } - } - - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in directive"); - - Lex(); - getStreamer().EmitWinCFIPushFrame(Code, Loc); - return false; -} - bool COFFAsmParser::ParseSEHDirectiveEndProlog(StringRef, SMLoc Loc) { Lex(); getStreamer().EmitWinCFIEndProlog(Loc); @@ -816,46 +703,6 @@ bool COFFAsmParser::ParseAtUnwindOrAtExcept(bool &unwind, bool &except) { return false; } -bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) { - SMLoc startLoc = getLexer().getLoc(); - if (getLexer().is(AsmToken::Percent)) { - const MCRegisterInfo *MRI = getContext().getRegisterInfo(); - SMLoc endLoc; - unsigned LLVMRegNo; - if (getParser().getTargetParser().ParseRegister(LLVMRegNo,startLoc,endLoc)) - return true; - -#if 0 - // FIXME: TargetAsmInfo::getCalleeSavedRegs() commits a serious layering - // violation so this validation code is disabled. - - // Check that this is a non-volatile register. - const unsigned *NVRegs = TAI.getCalleeSavedRegs(); - unsigned i; - for (i = 0; NVRegs[i] != 0; ++i) - if (NVRegs[i] == LLVMRegNo) - break; - if (NVRegs[i] == 0) - return Error(startLoc, "expected non-volatile register"); -#endif - - int SEHRegNo = MRI->getSEHRegNum(LLVMRegNo); - if (SEHRegNo < 0) - return Error(startLoc,"register can't be represented in SEH unwind info"); - RegNo = SEHRegNo; - } - else { - int64_t n; - if (getParser().parseAbsoluteExpression(n)) - return true; - if (n > 15) - return Error(startLoc, "register number is too high"); - RegNo = n; - } - - return false; -} - namespace llvm { MCAsmParserExtension *createCOFFAsmParser() { diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp index 1160934dc62..bd66e5f39c0 100644 --- a/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -778,8 +778,8 @@ bool DarwinAsmParser::parseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) { raw_fd_ostream *OS = getContext().getSecureLog(); if (!OS) { std::error_code EC; - auto NewOS = llvm::make_unique( - StringRef(SecureLogFile), EC, sys::fs::F_Append | sys::fs::F_Text); + auto NewOS = std::make_unique( + StringRef(SecureLogFile), EC, sys::fs::OF_Append | sys::fs::OF_Text); if (EC) return Error(IDLoc, Twine("can't open secure log file: ") + SecureLogFile + " (" + EC.message() + ")"); diff --git a/lib/MC/MCParser/WasmAsmParser.cpp b/lib/MC/MCParser/WasmAsmParser.cpp index 28d4459fecd..0c242aed706 100644 --- a/lib/MC/MCParser/WasmAsmParser.cpp +++ b/lib/MC/MCParser/WasmAsmParser.cpp @@ -123,6 +123,7 @@ public: // See use of .init_array in WasmObjectWriter and // TargetLoweringObjectFileWasm .StartsWith(".init_array", SectionKind::getData()) + .StartsWith(".debug_", SectionKind::getMetadata()) .Default(Optional()); if (!Kind.hasValue()) return Parser->Error(Lexer->getLoc(), "unknown section kind: " + Name); diff --git a/lib/MC/MCRegisterInfo.cpp b/lib/MC/MCRegisterInfo.cpp index 4273b876b7b..d491c0eb7e0 100644 --- a/lib/MC/MCRegisterInfo.cpp +++ b/lib/MC/MCRegisterInfo.cpp @@ -20,15 +20,16 @@ using namespace llvm; -unsigned MCRegisterInfo::getMatchingSuperReg(unsigned Reg, unsigned SubIdx, - const MCRegisterClass *RC) const { +MCRegister +MCRegisterInfo::getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, + const MCRegisterClass *RC) const { for (MCSuperRegIterator Supers(Reg, this); Supers.isValid(); ++Supers) if (RC->contains(*Supers) && Reg == getSubReg(*Supers, SubIdx)) return *Supers; return 0; } -unsigned MCRegisterInfo::getSubReg(unsigned Reg, unsigned Idx) const { +MCRegister MCRegisterInfo::getSubReg(MCRegister Reg, unsigned Idx) const { assert(Idx && Idx < getNumSubRegIndices() && "This is not a subregister index"); // Get a pointer to the corresponding SubRegIndices list. This list has the @@ -40,7 +41,8 @@ unsigned MCRegisterInfo::getSubReg(unsigned Reg, unsigned Idx) const { return 0; } -unsigned MCRegisterInfo::getSubRegIndex(unsigned Reg, unsigned SubReg) const { +unsigned MCRegisterInfo::getSubRegIndex(MCRegister Reg, + MCRegister SubReg) const { assert(SubReg && SubReg < getNumRegs() && "This is not a register"); // Get a pointer to the corresponding SubRegIndices list. This list has the // name of each sub-register in the same order as MCSubRegIterator. @@ -63,7 +65,7 @@ unsigned MCRegisterInfo::getSubRegIdxOffset(unsigned Idx) const { return SubRegIdxRanges[Idx].Offset; } -int MCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { +int MCRegisterInfo::getDwarfRegNum(MCRegister RegNum, bool isEH) const { const DwarfLLVMRegPair *M = isEH ? EHL2DwarfRegs : L2DwarfRegs; unsigned Size = isEH ? EHL2DwarfRegsSize : L2DwarfRegsSize; @@ -76,29 +78,18 @@ int MCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { return I->ToReg; } -int MCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const { +Optional MCRegisterInfo::getLLVMRegNum(unsigned RegNum, + bool isEH) const { const DwarfLLVMRegPair *M = isEH ? EHDwarf2LRegs : Dwarf2LRegs; unsigned Size = isEH ? EHDwarf2LRegsSize : Dwarf2LRegsSize; if (!M) - return -1; + return None; DwarfLLVMRegPair Key = { RegNum, 0 }; const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key); - assert(I != M+Size && I->FromReg == RegNum && "Invalid RegNum"); - return I->ToReg; -} - -int MCRegisterInfo::getLLVMRegNumFromEH(unsigned RegNum) const { - const DwarfLLVMRegPair *M = EHDwarf2LRegs; - unsigned Size = EHDwarf2LRegsSize; - - if (!M) - return -1; - DwarfLLVMRegPair Key = { RegNum, 0 }; - const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key); - if (I == M+Size || I->FromReg != RegNum) - return -1; - return I->ToReg; + if (I != M + Size && I->FromReg == RegNum) + return I->ToReg; + return None; } int MCRegisterInfo::getDwarfRegNumFromDwarfEHRegNum(unsigned RegNum) const { @@ -110,22 +101,21 @@ int MCRegisterInfo::getDwarfRegNumFromDwarfEHRegNum(unsigned RegNum) const { // a corresponding LLVM register number at all. So if we can't map the // EH register number to an LLVM register number, assume it's just a // valid DWARF register number as is. - int LRegNum = getLLVMRegNumFromEH(RegNum); - if (LRegNum != -1) - return getDwarfRegNum(LRegNum, false); + if (Optional LRegNum = getLLVMRegNum(RegNum, true)) + return getDwarfRegNum(*LRegNum, false); return RegNum; } -int MCRegisterInfo::getSEHRegNum(unsigned RegNum) const { - const DenseMap::const_iterator I = L2SEHRegs.find(RegNum); +int MCRegisterInfo::getSEHRegNum(MCRegister RegNum) const { + const DenseMap::const_iterator I = L2SEHRegs.find(RegNum); if (I == L2SEHRegs.end()) return (int)RegNum; return I->second; } -int MCRegisterInfo::getCodeViewRegNum(unsigned RegNum) const { +int MCRegisterInfo::getCodeViewRegNum(MCRegister RegNum) const { if (L2CVRegs.empty()) report_fatal_error("target does not implement codeview register mapping"); - const DenseMap::const_iterator I = L2CVRegs.find(RegNum); + const DenseMap::const_iterator I = L2CVRegs.find(RegNum); if (I == L2CVRegs.end()) report_fatal_error("unknown codeview register " + (RegNum < getNumRegs() ? getName(RegNum) diff --git a/lib/MC/MCSectionXCOFF.cpp b/lib/MC/MCSectionXCOFF.cpp index d1a63734502..d52959f15f9 100644 --- a/lib/MC/MCSectionXCOFF.cpp +++ b/lib/MC/MCSectionXCOFF.cpp @@ -15,19 +15,65 @@ using namespace llvm; MCSectionXCOFF::~MCSectionXCOFF() = default; +static StringRef getMappingClassString(XCOFF::StorageMappingClass SMC) { + switch (SMC) { + case XCOFF::XMC_DS: + return "DS"; + case XCOFF::XMC_RW: + return "RW"; + case XCOFF::XMC_PR: + return "PR"; + default: + report_fatal_error("Unhandled storage-mapping class."); + } +} + void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, raw_ostream &OS, const MCExpr *Subsection) const { if (getKind().isText()) { + if (getMappingClass() != XCOFF::XMC_PR) + report_fatal_error("Unhandled storage-mapping class for .text csect"); + OS << "\t.csect " << getSectionName() << "[" - << "PR" + << getMappingClassString(getMappingClass()) << "]" << '\n'; return; } + if (getKind().isData()) { + switch (getMappingClass()) { + case XCOFF::XMC_RW: + case XCOFF::XMC_DS: + OS << "\t.csect " << getSectionName() << "[" + << getMappingClassString(getMappingClass()) << "]" << '\n'; + break; + case XCOFF::XMC_TC0: + OS << "\t.toc\n"; + break; + default: + report_fatal_error( + "Unhandled storage-mapping class for .data csect."); + } + return; + } + + if (getKind().isBSSLocal() || getKind().isCommon()) { + assert((getMappingClass() == XCOFF::XMC_RW || + getMappingClass() == XCOFF::XMC_BS) && + "Generated a storage-mapping class for a common/bss csect we don't " + "understand how to switch to."); + assert(getCSectType() == XCOFF::XTY_CM && + "wrong csect type for .bss csect"); + // Don't have to print a directive for switching to section for commons. + // '.comm' and '.lcomm' directives for the variable will create the needed + // csect. + return; + } + report_fatal_error("Printing for this SectionKind is unimplemented."); } bool MCSectionXCOFF::UseCodeAlign() const { return getKind().isText(); } -bool MCSectionXCOFF::isVirtualSection() const { return !getKind().isCommon(); } +bool MCSectionXCOFF::isVirtualSection() const { return XCOFF::XTY_CM == Type; } diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index decbb96817e..b8278cb1107 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/COFF.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeView.h" @@ -21,6 +22,8 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCRegister.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSymbol.h" @@ -327,10 +330,56 @@ void MCStreamer::EmitCVInlineLinetableDirective(unsigned PrimaryFunctionId, const MCSymbol *FnStartSym, const MCSymbol *FnEndSym) {} +/// Only call this on endian-specific types like ulittle16_t and little32_t, or +/// structs composed of them. +template +static void copyBytesForDefRange(SmallString<20> &BytePrefix, + codeview::SymbolKind SymKind, + const T &DefRangeHeader) { + BytePrefix.resize(2 + sizeof(T)); + codeview::ulittle16_t SymKindLE = codeview::ulittle16_t(SymKind); + memcpy(&BytePrefix[0], &SymKindLE, 2); + memcpy(&BytePrefix[2], &DefRangeHeader, sizeof(T)); +} + void MCStreamer::EmitCVDefRangeDirective( ArrayRef> Ranges, StringRef FixedSizePortion) {} +void MCStreamer::EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeRegisterRelHeader DRHdr) { + SmallString<20> BytePrefix; + copyBytesForDefRange(BytePrefix, codeview::S_DEFRANGE_REGISTER_REL, DRHdr); + EmitCVDefRangeDirective(Ranges, BytePrefix); +} + +void MCStreamer::EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeSubfieldRegisterHeader DRHdr) { + SmallString<20> BytePrefix; + copyBytesForDefRange(BytePrefix, codeview::S_DEFRANGE_SUBFIELD_REGISTER, + DRHdr); + EmitCVDefRangeDirective(Ranges, BytePrefix); +} + +void MCStreamer::EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeRegisterHeader DRHdr) { + SmallString<20> BytePrefix; + copyBytesForDefRange(BytePrefix, codeview::S_DEFRANGE_REGISTER, DRHdr); + EmitCVDefRangeDirective(Ranges, BytePrefix); +} + +void MCStreamer::EmitCVDefRangeDirective( + ArrayRef> Ranges, + codeview::DefRangeFramePointerRelHeader DRHdr) { + SmallString<20> BytePrefix; + copyBytesForDefRange(BytePrefix, codeview::S_DEFRANGE_FRAMEPOINTER_REL, + DRHdr); + EmitCVDefRangeDirective(Ranges, BytePrefix); +} + void MCStreamer::EmitEHSymAttributes(const MCSymbol *Symbol, MCSymbol *EHSymbol) { } @@ -631,7 +680,7 @@ void MCStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) { MCSymbol *StartProc = EmitCFILabel(); WinFrameInfos.emplace_back( - llvm::make_unique(Symbol, StartProc)); + std::make_unique(Symbol, StartProc)); CurrentWinFrameInfo = WinFrameInfos.back().get(); CurrentWinFrameInfo->TextSection = getCurrentSectionOnly(); } @@ -665,7 +714,7 @@ void MCStreamer::EmitWinCFIStartChained(SMLoc Loc) { MCSymbol *StartProc = EmitCFILabel(); - WinFrameInfos.emplace_back(llvm::make_unique( + WinFrameInfos.emplace_back(std::make_unique( CurFrame->Function, StartProc, CurFrame)); CurrentWinFrameInfo = WinFrameInfos.back().get(); CurrentWinFrameInfo->TextSection = getCurrentSectionOnly(); @@ -763,18 +812,23 @@ MCSection *MCStreamer::getAssociatedXDataSection(const MCSection *TextSec) { void MCStreamer::EmitSyntaxDirective() {} -void MCStreamer::EmitWinCFIPushReg(unsigned Register, SMLoc Loc) { +static unsigned encodeSEHRegNum(MCContext &Ctx, MCRegister Reg) { + return Ctx.getRegisterInfo()->getSEHRegNum(Reg); +} + +void MCStreamer::EmitWinCFIPushReg(MCRegister Register, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) return; MCSymbol *Label = EmitCFILabel(); - WinEH::Instruction Inst = Win64EH::Instruction::PushNonVol(Label, Register); + WinEH::Instruction Inst = Win64EH::Instruction::PushNonVol( + Label, encodeSEHRegNum(Context, Register)); CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset, +void MCStreamer::EmitWinCFISetFrame(MCRegister Register, unsigned Offset, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) @@ -790,8 +844,8 @@ void MCStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset, MCSymbol *Label = EmitCFILabel(); - WinEH::Instruction Inst = - Win64EH::Instruction::SetFPReg(Label, Register, Offset); + WinEH::Instruction Inst = Win64EH::Instruction::SetFPReg( + Label, encodeSEHRegNum(getContext(), Register), Offset); CurFrame->LastFrameInst = CurFrame->Instructions.size(); CurFrame->Instructions.push_back(Inst); } @@ -813,7 +867,7 @@ void MCStreamer::EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) { CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset, +void MCStreamer::EmitWinCFISaveReg(MCRegister Register, unsigned Offset, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) @@ -825,12 +879,12 @@ void MCStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset, MCSymbol *Label = EmitCFILabel(); - WinEH::Instruction Inst = - Win64EH::Instruction::SaveNonVol(Label, Register, Offset); + WinEH::Instruction Inst = Win64EH::Instruction::SaveNonVol( + Label, encodeSEHRegNum(Context, Register), Offset); CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset, +void MCStreamer::EmitWinCFISaveXMM(MCRegister Register, unsigned Offset, SMLoc Loc) { WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); if (!CurFrame) @@ -840,8 +894,8 @@ void MCStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset, MCSymbol *Label = EmitCFILabel(); - WinEH::Instruction Inst = - Win64EH::Instruction::SaveXMM(Label, Register, Offset); + WinEH::Instruction Inst = Win64EH::Instruction::SaveXMM( + Label, encodeSEHRegNum(Context, Register), Offset); CurFrame->Instructions.push_back(Inst); } @@ -1009,6 +1063,10 @@ void MCStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { void MCStreamer::EmitCOFFSymbolType(int Type) { llvm_unreachable("this directive only supported on COFF targets"); } +void MCStreamer::EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlign) { + llvm_unreachable("this directive only supported on XCOFF targets"); +} void MCStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} void MCStreamer::emitELFSymverDirective(StringRef AliasName, const MCSymbol *Aliasee) {} diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp index 5fd48d9e101..c8678df02bf 100644 --- a/lib/MC/MCSubtargetInfo.cpp +++ b/lib/MC/MCSubtargetInfo.cpp @@ -315,3 +315,28 @@ void MCSubtargetInfo::initInstrItins(InstrItineraryData &InstrItins) const { InstrItins = InstrItineraryData(getSchedModel(), Stages, OperandCycles, ForwardingPaths); } + +Optional MCSubtargetInfo::getCacheSize(unsigned Level) const { + return Optional(); +} + +Optional +MCSubtargetInfo::getCacheAssociativity(unsigned Level) const { + return Optional(); +} + +Optional MCSubtargetInfo::getCacheLineSize(unsigned Level) const { + return Optional(); +} + +unsigned MCSubtargetInfo::getPrefetchDistance() const { + return 0; +} + +unsigned MCSubtargetInfo::getMaxPrefetchIterationsAhead() const { + return UINT_MAX; +} + +unsigned MCSubtargetInfo::getMinPrefetchStride() const { + return 1; +} diff --git a/lib/MC/MCWasmObjectTargetWriter.cpp b/lib/MC/MCWasmObjectTargetWriter.cpp index e46257823e3..1ccb3a58d5c 100644 --- a/lib/MC/MCWasmObjectTargetWriter.cpp +++ b/lib/MC/MCWasmObjectTargetWriter.cpp @@ -10,8 +10,9 @@ using namespace llvm; -MCWasmObjectTargetWriter::MCWasmObjectTargetWriter(bool Is64Bit) - : Is64Bit(Is64Bit) {} +MCWasmObjectTargetWriter::MCWasmObjectTargetWriter(bool Is64Bit, + bool IsEmscripten) + : Is64Bit(Is64Bit), IsEmscripten(IsEmscripten) {} // Pin the vtable to this object file MCWasmObjectTargetWriter::~MCWasmObjectTargetWriter() = default; diff --git a/lib/MC/MCWasmStreamer.cpp b/lib/MC/MCWasmStreamer.cpp index 86fa7219785..e7e96ecbb3a 100644 --- a/lib/MC/MCWasmStreamer.cpp +++ b/lib/MC/MCWasmStreamer.cpp @@ -122,7 +122,7 @@ bool MCWasmStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) { break; case MCSA_NoDeadStrip: - Symbol->setExported(); + Symbol->setNoStrip(); break; default: diff --git a/lib/MC/MCWinCOFFStreamer.cpp b/lib/MC/MCWinCOFFStreamer.cpp index 04d5f100a2f..c5a21312140 100644 --- a/lib/MC/MCWinCOFFStreamer.cpp +++ b/lib/MC/MCWinCOFFStreamer.cpp @@ -88,7 +88,19 @@ void MCWinCOFFStreamer::EmitLabel(MCSymbol *S, SMLoc Loc) { } void MCWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { - llvm_unreachable("not implemented"); + // Let the target do whatever target specific stuff it needs to do. + getAssembler().getBackend().handleAssemblerFlag(Flag); + + switch (Flag) { + // None of these require COFF specific handling. + case MCAF_SyntaxUnified: + case MCAF_Code16: + case MCAF_Code32: + case MCAF_Code64: + break; + case MCAF_SubsectionsViaSymbols: + llvm_unreachable("COFF doesn't support .subsections_via_symbols"); + } } void MCWinCOFFStreamer::EmitThumbFunc(MCSymbol *Func) { @@ -180,7 +192,7 @@ void MCWinCOFFStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { MCSection *SXData = getContext().getObjectFileInfo()->getSXDataSection(); getAssembler().registerSection(*SXData); if (SXData->getAlignment() < 4) - SXData->setAlignment(4); + SXData->setAlignment(Align(4)); new MCSymbolIdFragment(Symbol, SXData); @@ -197,7 +209,7 @@ void MCWinCOFFStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) { MCSection *Sec = getCurrentSectionOnly(); getAssembler().registerSection(*Sec); if (Sec->getAlignment() < 4) - Sec->setAlignment(4); + Sec->setAlignment(Align(4)); new MCSymbolIdFragment(Symbol, getCurrentSectionOnly()); diff --git a/lib/MC/MCXCOFFStreamer.cpp b/lib/MC/MCXCOFFStreamer.cpp index 071de024a3f..50937d6adc0 100644 --- a/lib/MC/MCXCOFFStreamer.cpp +++ b/lib/MC/MCXCOFFStreamer.cpp @@ -10,10 +10,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCXCOFFStreamer.h" +#include "llvm/BinaryFormat/XCOFF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSymbolXCOFF.h" +#include "llvm/MC/MCXCOFFStreamer.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -25,14 +27,38 @@ MCXCOFFStreamer::MCXCOFFStreamer(MCContext &Context, : MCObjectStreamer(Context, std::move(MAB), std::move(OW), std::move(Emitter)) {} -bool MCXCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, +bool MCXCOFFStreamer::EmitSymbolAttribute(MCSymbol *Sym, MCSymbolAttr Attribute) { - report_fatal_error("Symbol attributes not implemented for XCOFF."); + auto *Symbol = cast(Sym); + getAssembler().registerSymbol(*Symbol); + + switch (Attribute) { + case MCSA_Global: + Symbol->setStorageClass(XCOFF::C_EXT); + Symbol->setExternal(true); + break; + default: + report_fatal_error("Not implemented yet."); + } + return true; } void MCXCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { - report_fatal_error("Emiting common symbols not implemented for XCOFF."); + getAssembler().registerSymbol(*Symbol); + Symbol->setExternal(cast(Symbol)->getStorageClass() != + XCOFF::C_HIDEXT); + Symbol->setCommon(Size, ByteAlignment); + + // Need to add this symbol to the current Fragment which will belong to the + // containing CSECT. + auto *F = dyn_cast_or_null(getCurrentFragment()); + assert(F && "Expected a valid section with a fragment set."); + Symbol->setFragment(F); + + // Emit the alignment and storage for the variable to the section. + EmitValueToAlignment(ByteAlignment); + EmitZeros(Size); } void MCXCOFFStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol, @@ -42,8 +68,18 @@ void MCXCOFFStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol, } void MCXCOFFStreamer::EmitInstToData(const MCInst &Inst, - const MCSubtargetInfo &) { - report_fatal_error("Instruction emission not implemented for XCOFF."); + const MCSubtargetInfo &STI) { + MCAssembler &Assembler = getAssembler(); + SmallVector Fixups; + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + Assembler.getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI); + + // TODO: Handle Fixups later + + MCDataFragment *DF = getOrCreateDataFragment(&STI); + DF->setHasInstructions(STI); + DF->getContents().append(Code.begin(), Code.end()); } MCStreamer *llvm::createXCOFFStreamer(MCContext &Context, @@ -57,3 +93,9 @@ MCStreamer *llvm::createXCOFFStreamer(MCContext &Context, S->getAssembler().setRelaxAll(true); return S; } + +void MCXCOFFStreamer::EmitXCOFFLocalCommonSymbol(MCSymbol *Symbol, + uint64_t Size, + unsigned ByteAlignment) { + EmitCommonSymbol(Symbol, Size, ByteAlignment); +} diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index f0ceb86b25a..9f6af981aca 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -25,6 +25,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolMachO.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -126,7 +127,7 @@ uint64_t MachObjectWriter::getPaddingSize(const MCSection *Sec, const MCSection &NextSec = *Layout.getSectionOrder()[Next]; if (NextSec.isVirtualSection()) return 0; - return OffsetToAlignment(EndAddr, NextSec.getAlignment()); + return offsetToAlignment(EndAddr, Align(NextSec.getAlignment())); } void MachObjectWriter::writeHeader(MachO::HeaderFileType Type, @@ -444,7 +445,8 @@ void MachObjectWriter::writeLinkerOptionsLoadCommand( } // Pad to a multiple of the pointer size. - W.OS.write_zeros(OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4)); + W.OS.write_zeros( + offsetToAlignment(BytesWritten, is64Bit() ? Align(8) : Align(4))); assert(W.OS.tell() - Start == Size); } @@ -832,7 +834,8 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm, // The section data is padded to 4 bytes. // // FIXME: Is this machine dependent? - unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4); + unsigned SectionDataPadding = + offsetToAlignment(SectionDataFileSize, Align(4)); SectionDataFileSize += SectionDataPadding; // Write the prolog, starting with the header and load command... @@ -997,7 +1000,8 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm, #endif Asm.getLOHContainer().emit(*this, Layout); // Pad to a multiple of the pointer size. - W.OS.write_zeros(OffsetToAlignment(LOHRawSize, is64Bit() ? 8 : 4)); + W.OS.write_zeros( + offsetToAlignment(LOHRawSize, is64Bit() ? Align(8) : Align(4))); assert(W.OS.tell() - Start == LOHSize); } @@ -1043,6 +1047,6 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm, std::unique_ptr llvm::createMachObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS, bool IsLittleEndian) { - return llvm::make_unique(std::move(MOTW), OS, + return std::make_unique(std::move(MOTW), OS, IsLittleEndian); } diff --git a/lib/MC/StringTableBuilder.cpp b/lib/MC/StringTableBuilder.cpp index cb3db8e2268..c9c88ec5843 100644 --- a/lib/MC/StringTableBuilder.cpp +++ b/lib/MC/StringTableBuilder.cpp @@ -38,6 +38,7 @@ void StringTableBuilder::initSize() { // Start the table with a NUL byte. Size = 1; break; + case XCOFF: case WinCOFF: // Make room to write the table size later. Size = 4; @@ -67,9 +68,12 @@ void StringTableBuilder::write(uint8_t *Buf) const { if (!Data.empty()) memcpy(Buf + P.second, Data.data(), Data.size()); } - if (K != WinCOFF) - return; - support::endian::write32le(Buf, Size); + // The COFF formats store the size of the string table in the first 4 bytes. + // For Windows, the format is little-endian; for AIX, it is big-endian. + if (K == WinCOFF) + support::endian::write32le(Buf, Size); + else if (K == XCOFF) + support::endian::write32be(Buf, Size); } // Returns the character at Pos from end of a string. diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp index 098343cd010..c1ff3cc2480 100644 --- a/lib/MC/WasmObjectWriter.cpp +++ b/lib/MC/WasmObjectWriter.cpp @@ -258,6 +258,7 @@ class WasmObjectWriter : public MCObjectWriter { // TargetObjectWriter wrappers. bool is64Bit() const { return TargetObjectWriter->is64Bit(); } + bool isEmscripten() const { return TargetObjectWriter->isEmscripten(); } void startSection(SectionBookkeeping &Section, unsigned SectionId); void startCustomSection(SectionBookkeeping &Section, StringRef Name); @@ -426,9 +427,10 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { - MCAsmBackend &Backend = Asm.getBackend(); - bool IsPCRel = Backend.getFixupKindInfo(Fixup.getKind()).Flags & - MCFixupKindInfo::FKF_IsPCRel; + // The WebAssembly backend should never generate FKF_IsPCRel fixups + assert(!(Asm.getBackend().getFixupKindInfo(Fixup.getKind()).Flags & + MCFixupKindInfo::FKF_IsPCRel)); + const auto &FixupSection = cast(*Fragment->getParent()); uint64_t C = Target.getConstant(); uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); @@ -439,51 +441,22 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, return; if (const MCSymbolRefExpr *RefB = Target.getSymB()) { - assert(RefB->getKind() == MCSymbolRefExpr::VK_None && - "Should not have constructed this"); - - // Let A, B and C being the components of Target and R be the location of - // the fixup. If the fixup is not pcrel, we want to compute (A - B + C). - // If it is pcrel, we want to compute (A - B + C - R). - - // In general, Wasm has no relocations for -B. It can only represent (A + C) - // or (A + C - R). If B = R + K and the relocation is not pcrel, we can - // replace B to implement it: (A - R - K + C) - if (IsPCRel) { - Ctx.reportError( - Fixup.getLoc(), - "No relocation available to represent this relative expression"); - return; - } - + // To get here the A - B expression must have failed evaluateAsRelocatable. + // This means either A or B must be undefined and in WebAssembly we can't + // support either of those cases. const auto &SymB = cast(RefB->getSymbol()); - - if (SymB.isUndefined()) { - Ctx.reportError(Fixup.getLoc(), - Twine("symbol '") + SymB.getName() + - "' can not be undefined in a subtraction expression"); - return; - } - - assert(!SymB.isAbsolute() && "Should have been folded"); - const MCSection &SecB = SymB.getSection(); - if (&SecB != &FixupSection) { - Ctx.reportError(Fixup.getLoc(), - "Cannot represent a difference across sections"); - return; - } - - uint64_t SymBOffset = Layout.getSymbolOffset(SymB); - uint64_t K = SymBOffset - FixupOffset; - IsPCRel = true; - C -= K; + Ctx.reportError( + Fixup.getLoc(), + Twine("symbol '") + SymB.getName() + + "': unsupported subtraction expression used in relocation."); + return; } // We either rejected the fixup or folded B into C at this point. const MCSymbolRefExpr *RefA = Target.getSymA(); - const auto *SymA = RefA ? cast(&RefA->getSymbol()) : nullptr; + const auto *SymA = cast(&RefA->getSymbol()); - if (SymA && SymA->isVariable()) { + if (SymA->isVariable()) { const MCExpr *Expr = SymA->getVariableValue(); const auto *Inner = cast(Expr); if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) @@ -496,8 +469,6 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, FixedValue = 0; unsigned Type = TargetObjectWriter->getRelocType(Target, Fixup); - assert(!IsPCRel); - assert(SymA); // Absolute offset within a section or a function. // Currently only supported for for metadata sections. @@ -1296,12 +1267,12 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, // Separate out the producers and target features sections if (Name == "producers") { - ProducersSection = llvm::make_unique(Name, &Section); + ProducersSection = std::make_unique(Name, &Section); continue; } if (Name == "target_features") { TargetFeaturesSection = - llvm::make_unique(Name, &Section); + std::make_unique(Name, &Section); continue; } @@ -1379,7 +1350,9 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, report_fatal_error(".size expression must be evaluatable"); auto &DataSection = static_cast(WS.getSection()); - assert(DataSection.isWasmData()); + if (!DataSection.isWasmData()) + report_fatal_error("data symbols must live in a data section: " + + WS.getName()); // For each data symbol, export it in the symtab as a reference to the // corresponding Wasm data segment. @@ -1473,8 +1446,12 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, Flags |= wasm::WASM_SYMBOL_BINDING_LOCAL; if (WS.isUndefined()) Flags |= wasm::WASM_SYMBOL_UNDEFINED; - if (WS.isExported()) - Flags |= wasm::WASM_SYMBOL_EXPORTED; + if (WS.isNoStrip()) { + Flags |= wasm::WASM_SYMBOL_NO_STRIP; + if (isEmscripten()) { + Flags |= wasm::WASM_SYMBOL_EXPORTED; + } + } if (WS.getName() != WS.getImportName()) Flags |= wasm::WASM_SYMBOL_EXPLICIT_NAME; @@ -1618,5 +1595,5 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, std::unique_ptr llvm::createWasmObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS) { - return llvm::make_unique(std::move(MOTW), OS); + return std::make_unique(std::move(MOTW), OS); } diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index 0e6c05bc726..749ed8badfa 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -31,10 +31,10 @@ #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" #include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/CRC.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/JamCRC.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -239,7 +239,7 @@ WinCOFFObjectWriter::WinCOFFObjectWriter( } COFFSymbol *WinCOFFObjectWriter::createSymbol(StringRef Name) { - Symbols.push_back(make_unique(Name)); + Symbols.push_back(std::make_unique(Name)); return Symbols.back().get(); } @@ -251,7 +251,7 @@ COFFSymbol *WinCOFFObjectWriter::GetOrCreateCOFFSymbol(const MCSymbol *Symbol) { } COFFSection *WinCOFFObjectWriter::createSection(StringRef Name) { - Sections.emplace_back(make_unique(Name)); + Sections.emplace_back(std::make_unique(Name)); return Sections.back().get(); } @@ -605,7 +605,7 @@ uint32_t WinCOFFObjectWriter::writeSectionContents(MCAssembler &Asm, // Calculate our CRC with an initial value of '0', this is not how // JamCRC is specified but it aligns with the expected output. JamCRC JC(/*Init=*/0); - JC.update(Buf); + JC.update(makeArrayRef(reinterpret_cast(Buf.data()), Buf.size())); return JC.getCRC(); } @@ -1098,5 +1098,5 @@ void MCWinCOFFObjectTargetWriter::anchor() {} std::unique_ptr llvm::createWinCOFFObjectWriter( std::unique_ptr MOTW, raw_pwrite_stream &OS) { - return llvm::make_unique(std::move(MOTW), OS); + return std::make_unique(std::move(MOTW), OS); } diff --git a/lib/MC/XCOFFObjectWriter.cpp b/lib/MC/XCOFFObjectWriter.cpp index 9b9a7b6c118..353c2106873 100644 --- a/lib/MC/XCOFFObjectWriter.cpp +++ b/lib/MC/XCOFFObjectWriter.cpp @@ -10,18 +10,135 @@ // //===----------------------------------------------------------------------===// +#include "llvm/BinaryFormat/XCOFF.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSectionXCOFF.h" +#include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCXCOFFObjectWriter.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MathExtras.h" + +#include using namespace llvm; +// An XCOFF object file has a limited set of predefined sections. The most +// important ones for us (right now) are: +// .text --> contains program code and read-only data. +// .data --> contains initialized data, function descriptors, and the TOC. +// .bss --> contains uninitialized data. +// Each of these sections is composed of 'Control Sections'. A Control Section +// is more commonly referred to as a csect. A csect is an indivisible unit of +// code or data, and acts as a container for symbols. A csect is mapped +// into a section based on its storage-mapping class, with the exception of +// XMC_RW which gets mapped to either .data or .bss based on whether it's +// explicitly initialized or not. +// +// We don't represent the sections in the MC layer as there is nothing +// interesting about them at at that level: they carry information that is +// only relevant to the ObjectWriter, so we materialize them in this class. namespace { +constexpr unsigned DefaultSectionAlign = 4; + +// Packs the csect's alignment and type into a byte. +uint8_t getEncodedType(const MCSectionXCOFF *); + +// Wrapper around an MCSymbolXCOFF. +struct Symbol { + const MCSymbolXCOFF *const MCSym; + uint32_t SymbolTableIndex; + + XCOFF::StorageClass getStorageClass() const { + return MCSym->getStorageClass(); + } + StringRef getName() const { return MCSym->getName(); } + Symbol(const MCSymbolXCOFF *MCSym) : MCSym(MCSym), SymbolTableIndex(-1) {} +}; + +// Wrapper for an MCSectionXCOFF. +struct ControlSection { + const MCSectionXCOFF *const MCCsect; + uint32_t SymbolTableIndex; + uint32_t Address; + uint32_t Size; + + SmallVector Syms; + StringRef getName() const { return MCCsect->getSectionName(); } + ControlSection(const MCSectionXCOFF *MCSec) + : MCCsect(MCSec), SymbolTableIndex(-1), Address(-1), Size(0) {} +}; + +// Represents the data related to a section excluding the csects that make up +// the raw data of the section. The csects are stored separately as not all +// sections contain csects, and some sections contain csects which are better +// stored separately, e.g. the .data section containing read-write, descriptor, +// TOCBase and TOC-entry csects. +struct Section { + char Name[XCOFF::NameSize]; + // The physical/virtual address of the section. For an object file + // these values are equivalent. + uint32_t Address; + uint32_t Size; + uint32_t FileOffsetToData; + uint32_t FileOffsetToRelocations; + uint32_t RelocationCount; + int32_t Flags; + + int16_t Index; + + // Virtual sections do not need storage allocated in the object file. + const bool IsVirtual; + + void reset() { + Address = 0; + Size = 0; + FileOffsetToData = 0; + FileOffsetToRelocations = 0; + RelocationCount = 0; + Index = -1; + } + + Section(const char *N, XCOFF::SectionTypeFlags Flags, bool IsVirtual) + : Address(0), Size(0), FileOffsetToData(0), FileOffsetToRelocations(0), + RelocationCount(0), Flags(Flags), Index(-1), IsVirtual(IsVirtual) { + strncpy(Name, N, XCOFF::NameSize); + } +}; + class XCOFFObjectWriter : public MCObjectWriter { + // Type to be used for a container representing a set of csects with + // (approximately) the same storage mapping class. For example all the csects + // with a storage mapping class of `xmc_pr` will get placed into the same + // container. + using CsectGroup = std::deque; + support::endian::Writer W; std::unique_ptr TargetObjectWriter; + StringTableBuilder Strings; + + // The non-empty sections, in the order they will appear in the section header + // table. + std::vector
Sections; + + // The Predefined sections. + Section Text; + Section BSS; + + // CsectGroups. These store the csects which make up different parts of + // the sections. Should have one for each set of csects that get mapped into + // the same section and get handled in a 'similar' way. + CsectGroup ProgramCodeCsects; + CsectGroup BSSCsects; + + uint32_t SymbolTableEntryCount = 0; + uint32_t SymbolTableOffset = 0; + + virtual void reset() override; void executePostLayoutBinding(MCAssembler &, const MCAsmLayout &) override; @@ -30,6 +147,40 @@ class XCOFFObjectWriter : public MCObjectWriter { uint64_t writeObject(MCAssembler &, const MCAsmLayout &) override; + static bool nameShouldBeInStringTable(const StringRef &); + void writeSymbolName(const StringRef &); + void writeSymbolTableEntryForCsectMemberLabel(const Symbol &, + const ControlSection &, int16_t, + uint64_t); + void writeSymbolTableEntryForControlSection(const ControlSection &, int16_t, + XCOFF::StorageClass); + void writeFileHeader(); + void writeSectionHeaderTable(); + void writeSections(const MCAssembler &Asm, const MCAsmLayout &Layout); + void writeSymbolTable(const MCAsmLayout &Layout); + + // Called after all the csects and symbols have been processed by + // `executePostLayoutBinding`, this function handles building up the majority + // of the structures in the object file representation. Namely: + // *) Calculates physical/virtual addresses, raw-pointer offsets, and section + // sizes. + // *) Assigns symbol table indices. + // *) Builds up the section header table by adding any non-empty sections to + // `Sections`. + void assignAddressesAndIndices(const MCAsmLayout &); + + bool + needsAuxiliaryHeader() const { /* TODO aux header support not implemented. */ + return false; + } + + // Returns the size of the auxiliary header to be written to the object file. + size_t auxiliaryHeaderSize() const { + assert(!needsAuxiliaryHeader() && + "Auxiliary header support not implemented."); + return 0; + } + public: XCOFFObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS); @@ -37,11 +188,100 @@ public: XCOFFObjectWriter::XCOFFObjectWriter( std::unique_ptr MOTW, raw_pwrite_stream &OS) - : W(OS, support::big), TargetObjectWriter(std::move(MOTW)) {} + : W(OS, support::big), TargetObjectWriter(std::move(MOTW)), + Strings(StringTableBuilder::XCOFF), + Text(".text", XCOFF::STYP_TEXT, /* IsVirtual */ false), + BSS(".bss", XCOFF::STYP_BSS, /* IsVirtual */ true) {} -void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &, - const MCAsmLayout &) { - // TODO Implement once we have sections and symbols to handle. +void XCOFFObjectWriter::reset() { + // Reset any sections we have written to, and empty the section header table. + for (auto *Sec : Sections) + Sec->reset(); + Sections.clear(); + + // Clear any csects we have stored. + ProgramCodeCsects.clear(); + BSSCsects.clear(); + + // Reset the symbol table and string table. + SymbolTableEntryCount = 0; + SymbolTableOffset = 0; + Strings.clear(); + + MCObjectWriter::reset(); +} + +void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm, + const MCAsmLayout &Layout) { + if (TargetObjectWriter->is64Bit()) + report_fatal_error("64-bit XCOFF object files are not supported yet."); + + // Maps the MC Section representation to its corresponding ControlSection + // wrapper. Needed for finding the ControlSection to insert an MCSymbol into + // from its containing MCSectionXCOFF. + DenseMap WrapperMap; + + for (const auto &S : Asm) { + const auto *MCSec = cast(&S); + assert(WrapperMap.find(MCSec) == WrapperMap.end() && + "Cannot add a csect twice."); + + // If the name does not fit in the storage provided in the symbol table + // entry, add it to the string table. + if (nameShouldBeInStringTable(MCSec->getSectionName())) + Strings.add(MCSec->getSectionName()); + + switch (MCSec->getMappingClass()) { + case XCOFF::XMC_PR: + assert(XCOFF::XTY_SD == MCSec->getCSectType() && + "Only an initialized csect can contain program code."); + ProgramCodeCsects.emplace_back(MCSec); + WrapperMap[MCSec] = &ProgramCodeCsects.back(); + break; + case XCOFF::XMC_RW: + if (XCOFF::XTY_CM == MCSec->getCSectType()) { + BSSCsects.emplace_back(MCSec); + WrapperMap[MCSec] = &BSSCsects.back(); + break; + } + report_fatal_error("Unhandled mapping of read-write csect to section."); + case XCOFF::XMC_TC0: + // TODO FIXME Handle emiting the TOC base. + break; + case XCOFF::XMC_BS: + assert(XCOFF::XTY_CM == MCSec->getCSectType() && + "Mapping invalid csect. CSECT with bss storage class must be " + "common type."); + BSSCsects.emplace_back(MCSec); + WrapperMap[MCSec] = &BSSCsects.back(); + break; + default: + report_fatal_error("Unhandled mapping of csect to section."); + } + } + + for (const MCSymbol &S : Asm.symbols()) { + // Nothing to do for temporary symbols. + if (S.isTemporary()) + continue; + const MCSymbolXCOFF *XSym = cast(&S); + + // Map the symbol into its containing csect. + const MCSectionXCOFF *ContainingCsect = XSym->getContainingCsect(); + assert(WrapperMap.find(ContainingCsect) != WrapperMap.end() && + "Expected containing csect to exist in map"); + + // Lookup the containing csect and add the symbol to it. + WrapperMap[ContainingCsect]->Syms.emplace_back(XSym); + + // If the name does not fit in the storage provided in the symbol table + // entry, add it to the string table. + if (nameShouldBeInStringTable(XSym->getName())) + Strings.add(XSym->getName()); + } + + Strings.finalize(); + assignAddressesAndIndices(Layout); } void XCOFFObjectWriter::recordRelocation(MCAssembler &, const MCAsmLayout &, @@ -50,7 +290,29 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &, const MCAsmLayout &, report_fatal_error("XCOFF relocations not supported."); } -uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &) { +void XCOFFObjectWriter::writeSections(const MCAssembler &Asm, + const MCAsmLayout &Layout) { + // Write the program code control sections one at a time. + uint32_t CurrentAddressLocation = Text.Address; + for (const auto &Csect : ProgramCodeCsects) { + if (uint32_t PaddingSize = Csect.Address - CurrentAddressLocation) + W.OS.write_zeros(PaddingSize); + Asm.writeSectionData(W.OS, Csect.MCCsect, Layout); + CurrentAddressLocation = Csect.Address + Csect.Size; + } + + if (Text.Index != -1) { + // The size of the tail padding in a section is the end virtual address of + // the current section minus the the end virtual address of the last csect + // in that section. + if (uint32_t PaddingSize = + Text.Address + Text.Size - CurrentAddressLocation) + W.OS.write_zeros(PaddingSize); + } +} + +uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm, + const MCAsmLayout &Layout) { // We always emit a timestamp of 0 for reproducibility, so ensure incremental // linking is not enabled, in case, like with Windows COFF, such a timestamp // is incompatible with incremental linking of XCOFF. @@ -62,27 +324,274 @@ uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &) { uint64_t StartOffset = W.OS.tell(); - // TODO FIXME Assign section numbers/finalize sections. + writeFileHeader(); + writeSectionHeaderTable(); + writeSections(Asm, Layout); + // TODO writeRelocations(); - // TODO FIXME Finalize symbols. + writeSymbolTable(Layout); + // Write the string table. + Strings.write(W.OS); + return W.OS.tell() - StartOffset; +} + +bool XCOFFObjectWriter::nameShouldBeInStringTable(const StringRef &SymbolName) { + return SymbolName.size() > XCOFF::NameSize; +} + +void XCOFFObjectWriter::writeSymbolName(const StringRef &SymbolName) { + if (nameShouldBeInStringTable(SymbolName)) { + W.write(0); + W.write(Strings.getOffset(SymbolName)); + } else { + char Name[XCOFF::NameSize]; + std::strncpy(Name, SymbolName.data(), XCOFF::NameSize); + ArrayRef NameRef(Name, XCOFF::NameSize); + W.write(NameRef); + } +} + +void XCOFFObjectWriter::writeSymbolTableEntryForCsectMemberLabel( + const Symbol &SymbolRef, const ControlSection &CSectionRef, + int16_t SectionIndex, uint64_t SymbolOffset) { + // Name or Zeros and string table offset + writeSymbolName(SymbolRef.getName()); + assert(SymbolOffset <= UINT32_MAX - CSectionRef.Address && + "Symbol address overflows."); + W.write(CSectionRef.Address + SymbolOffset); + W.write(SectionIndex); + // Basic/Derived type. See the description of the n_type field for symbol + // table entries for a detailed description. Since we don't yet support + // visibility, and all other bits are either optionally set or reserved, this + // is always zero. + // TODO FIXME How to assert a symbol's visibilty is default? + // TODO Set the function indicator (bit 10, 0x0020) for functions + // when debugging is enabled. + W.write(0); + W.write(SymbolRef.getStorageClass()); + // Always 1 aux entry for now. + W.write(1); + + // Now output the auxiliary entry. + W.write(CSectionRef.SymbolTableIndex); + // Parameter typecheck hash. Not supported. + W.write(0); + // Typecheck section number. Not supported. + W.write(0); + // Symbol type: Label + W.write(XCOFF::XTY_LD); + // Storage mapping class. + W.write(CSectionRef.MCCsect->getMappingClass()); + // Reserved (x_stab). + W.write(0); + // Reserved (x_snstab). + W.write(0); +} + +void XCOFFObjectWriter::writeSymbolTableEntryForControlSection( + const ControlSection &CSectionRef, int16_t SectionIndex, + XCOFF::StorageClass StorageClass) { + // n_name, n_zeros, n_offset + writeSymbolName(CSectionRef.getName()); + // n_value + W.write(CSectionRef.Address); + // n_scnum + W.write(SectionIndex); + // Basic/Derived type. See the description of the n_type field for symbol + // table entries for a detailed description. Since we don't yet support + // visibility, and all other bits are either optionally set or reserved, this + // is always zero. + // TODO FIXME How to assert a symbol's visibilty is default? + // TODO Set the function indicator (bit 10, 0x0020) for functions + // when debugging is enabled. + W.write(0); + // n_sclass + W.write(StorageClass); + // Always 1 aux entry for now. + W.write(1); + + // Now output the auxiliary entry. + W.write(CSectionRef.Size); + // Parameter typecheck hash. Not supported. + W.write(0); + // Typecheck section number. Not supported. + W.write(0); + // Symbol type. + W.write(getEncodedType(CSectionRef.MCCsect)); + // Storage mapping class. + W.write(CSectionRef.MCCsect->getMappingClass()); + // Reserved (x_stab). + W.write(0); + // Reserved (x_snstab). + W.write(0); +} + +void XCOFFObjectWriter::writeFileHeader() { // Magic. W.write(0x01df); // Number of sections. - W.write(0); + W.write(Sections.size()); // Timestamp field. For reproducible output we write a 0, which represents no // timestamp. W.write(0); // Byte Offset to the start of the symbol table. - W.write(0); + W.write(SymbolTableOffset); // Number of entries in the symbol table. - W.write(0); + W.write(SymbolTableEntryCount); // Size of the optional header. W.write(0); // Flags. W.write(0); +} - return W.OS.tell() - StartOffset; +void XCOFFObjectWriter::writeSectionHeaderTable() { + for (const auto *Sec : Sections) { + // Write Name. + ArrayRef NameRef(Sec->Name, XCOFF::NameSize); + W.write(NameRef); + + // Write the Physical Address and Virtual Address. In an object file these + // are the same. + W.write(Sec->Address); + W.write(Sec->Address); + + W.write(Sec->Size); + W.write(Sec->FileOffsetToData); + + // Relocation pointer and Lineno pointer. Not supported yet. + W.write(0); + W.write(0); + + // Relocation and line-number counts. Not supported yet. + W.write(0); + W.write(0); + + W.write(Sec->Flags); + } +} + +void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) { + // Print out symbol table for the program code. + for (const auto &Csect : ProgramCodeCsects) { + // Write out the control section first and then each symbol in it. + writeSymbolTableEntryForControlSection(Csect, Text.Index, + Csect.MCCsect->getStorageClass()); + for (const auto &Sym : Csect.Syms) + writeSymbolTableEntryForCsectMemberLabel( + Sym, Csect, Text.Index, Layout.getSymbolOffset(*Sym.MCSym)); + } + + // The BSS Section is special in that the csects must contain a single symbol, + // and the contained symbol cannot be represented in the symbol table as a + // label definition. + for (auto &Csect : BSSCsects) { + assert(Csect.Syms.size() == 1 && + "Uninitialized csect cannot contain more then 1 symbol."); + Symbol &Sym = Csect.Syms.back(); + writeSymbolTableEntryForControlSection(Csect, BSS.Index, + Sym.getStorageClass()); + } +} + +void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) { + // The address corrresponds to the address of sections and symbols in the + // object file. We place the shared address 0 immediately after the + // section header table. + uint32_t Address = 0; + // Section indices are 1-based in XCOFF. + int16_t SectionIndex = 1; + // The first symbol table entry is for the file name. We are not emitting it + // yet, so start at index 0. + uint32_t SymbolTableIndex = 0; + + // Text section comes first. + if (!ProgramCodeCsects.empty()) { + Sections.push_back(&Text); + Text.Index = SectionIndex++; + for (auto &Csect : ProgramCodeCsects) { + const MCSectionXCOFF *MCSec = Csect.MCCsect; + Csect.Address = alignTo(Address, MCSec->getAlignment()); + Csect.Size = Layout.getSectionAddressSize(MCSec); + Address = Csect.Address + Csect.Size; + Csect.SymbolTableIndex = SymbolTableIndex; + // 1 main and 1 auxiliary symbol table entry for the csect. + SymbolTableIndex += 2; + for (auto &Sym : Csect.Syms) { + Sym.SymbolTableIndex = SymbolTableIndex; + // 1 main and 1 auxiliary symbol table entry for each contained symbol + SymbolTableIndex += 2; + } + } + Address = alignTo(Address, DefaultSectionAlign); + + // The first csect of a section can be aligned by adjusting the virtual + // address of its containing section instead of writing zeroes into the + // object file. + Text.Address = ProgramCodeCsects.front().Address; + + Text.Size = Address - Text.Address; + } + + // Data section Second. TODO + + // BSS Section third. + if (!BSSCsects.empty()) { + Sections.push_back(&BSS); + BSS.Index = SectionIndex++; + for (auto &Csect : BSSCsects) { + const MCSectionXCOFF *MCSec = Csect.MCCsect; + Csect.Address = alignTo(Address, MCSec->getAlignment()); + Csect.Size = Layout.getSectionAddressSize(MCSec); + Address = Csect.Address + Csect.Size; + Csect.SymbolTableIndex = SymbolTableIndex; + // 1 main and 1 auxiliary symbol table entry for the csect. + SymbolTableIndex += 2; + + assert(Csect.Syms.size() == 1 && + "csect in the BSS can only contain a single symbol."); + Csect.Syms[0].SymbolTableIndex = Csect.SymbolTableIndex; + } + // Pad out Address to the default alignment. This is to match how the system + // assembler handles the .bss section. Its size is always a multiple of 4. + Address = alignTo(Address, DefaultSectionAlign); + + BSS.Address = BSSCsects.front().Address; + BSS.Size = Address - BSS.Address; + } + + SymbolTableEntryCount = SymbolTableIndex; + + // Calculate the RawPointer value for each section. + uint64_t RawPointer = sizeof(XCOFF::FileHeader32) + auxiliaryHeaderSize() + + Sections.size() * sizeof(XCOFF::SectionHeader32); + for (auto *Sec : Sections) { + if (!Sec->IsVirtual) { + Sec->FileOffsetToData = RawPointer; + RawPointer += Sec->Size; + } + } + + // TODO Add in Relocation storage to the RawPointer Calculation. + // TODO What to align the SymbolTable to? + // TODO Error check that the number of symbol table entries fits in 32-bits + // signed ... + if (SymbolTableEntryCount) + SymbolTableOffset = RawPointer; +} + +// Takes the log base 2 of the alignment and shifts the result into the 5 most +// significant bits of a byte, then or's in the csect type into the least +// significant 3 bits. +uint8_t getEncodedType(const MCSectionXCOFF *Sec) { + unsigned Align = Sec->getAlignment(); + assert(isPowerOf2_32(Align) && "Alignment must be a power of 2."); + unsigned Log2Align = Log2_32(Align); + // Result is a number in the range [0, 31] which fits in the 5 least + // significant bits. Shift this value into the 5 most significant bits, and + // bitwise-or in the csect type. + uint8_t EncodedAlign = Log2Align << 3; + return EncodedAlign | Sec->getCSectType(); } } // end anonymous namespace @@ -90,5 +599,5 @@ uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &) { std::unique_ptr llvm::createXCOFFObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS) { - return llvm::make_unique(std::move(MOTW), OS); + return std::make_unique(std::move(MOTW), OS); } diff --git a/lib/MCA/CodeEmitter.cpp b/lib/MCA/CodeEmitter.cpp new file mode 100644 index 00000000000..294107219cb --- /dev/null +++ b/lib/MCA/CodeEmitter.cpp @@ -0,0 +1,37 @@ +//===--------------------- CodeEmitter.cpp ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the CodeEmitter API. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/CodeEmitter.h" + +namespace llvm { +namespace mca { + +CodeEmitter::EncodingInfo +CodeEmitter::getOrCreateEncodingInfo(unsigned MCID) { + EncodingInfo &EI = Encodings[MCID]; + if (EI.second) + return EI; + + SmallVector Fixups; + const MCInst &Inst = Sequence[MCID]; + MCInst Relaxed(Sequence[MCID]); + if (MAB.mayNeedRelaxation(Inst, STI)) + MAB.relaxInstruction(Inst, STI, Relaxed); + + EI.first = Code.size(); + MCE.encodeInstruction(Relaxed, VecOS, Fixups, STI); + EI.second = Code.size() - EI.first; + return EI; +} + +} // namespace mca +} // namespace llvm diff --git a/lib/MCA/Context.cpp b/lib/MCA/Context.cpp index f0e8dfab868..0160e1f9f78 100644 --- a/lib/MCA/Context.cpp +++ b/lib/MCA/Context.cpp @@ -28,24 +28,23 @@ namespace llvm { namespace mca { std::unique_ptr -Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB, - SourceMgr &SrcMgr) { +Context::createDefaultPipeline(const PipelineOptions &Opts, SourceMgr &SrcMgr) { const MCSchedModel &SM = STI.getSchedModel(); // Create the hardware units defining the backend. - auto RCU = llvm::make_unique(SM); - auto PRF = llvm::make_unique(SM, MRI, Opts.RegisterFileSize); - auto LSU = llvm::make_unique(SM, Opts.LoadQueueSize, + auto RCU = std::make_unique(SM); + auto PRF = std::make_unique(SM, MRI, Opts.RegisterFileSize); + auto LSU = std::make_unique(SM, Opts.LoadQueueSize, Opts.StoreQueueSize, Opts.AssumeNoAlias); - auto HWS = llvm::make_unique(SM, *LSU); + auto HWS = std::make_unique(SM, *LSU); // Create the pipeline stages. - auto Fetch = llvm::make_unique(SrcMgr); - auto Dispatch = llvm::make_unique(STI, MRI, Opts.DispatchWidth, + auto Fetch = std::make_unique(SrcMgr); + auto Dispatch = std::make_unique(STI, MRI, Opts.DispatchWidth, *RCU, *PRF); auto Execute = - llvm::make_unique(*HWS, Opts.EnableBottleneckAnalysis); - auto Retire = llvm::make_unique(*RCU, *PRF); + std::make_unique(*HWS, Opts.EnableBottleneckAnalysis); + auto Retire = std::make_unique(*RCU, *PRF, *LSU); // Pass the ownership of all the hardware units to this Context. addHardwareUnit(std::move(RCU)); @@ -54,10 +53,10 @@ Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB, addHardwareUnit(std::move(HWS)); // Build the pipeline. - auto StagePipeline = llvm::make_unique(); + auto StagePipeline = std::make_unique(); StagePipeline->appendStage(std::move(Fetch)); if (Opts.MicroOpQueueSize) - StagePipeline->appendStage(llvm::make_unique( + StagePipeline->appendStage(std::make_unique( Opts.MicroOpQueueSize, Opts.DecodersThroughput)); StagePipeline->appendStage(std::move(Dispatch)); StagePipeline->appendStage(std::move(Execute)); diff --git a/lib/MCA/HardwareUnits/LSUnit.cpp b/lib/MCA/HardwareUnits/LSUnit.cpp index ac1a6a36547..0ee084c7ce1 100644 --- a/lib/MCA/HardwareUnits/LSUnit.cpp +++ b/lib/MCA/HardwareUnits/LSUnit.cpp @@ -29,12 +29,12 @@ LSUnitBase::LSUnitBase(const MCSchedModel &SM, unsigned LQ, unsigned SQ, const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); if (!LQSize && EPI.LoadQueueID) { const MCProcResourceDesc &LdQDesc = *SM.getProcResource(EPI.LoadQueueID); - LQSize = LdQDesc.BufferSize; + LQSize = std::max(0, LdQDesc.BufferSize); } if (!SQSize && EPI.StoreQueueID) { const MCProcResourceDesc &StQDesc = *SM.getProcResource(EPI.StoreQueueID); - SQSize = StQDesc.BufferSize; + SQSize = std::max(0, StQDesc.BufferSize); } } } @@ -72,9 +72,9 @@ unsigned LSUnit::dispatch(const InstRef &IR) { assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!"); if (Desc.MayLoad) - assignLQSlot(); + acquireLQSlot(); if (Desc.MayStore) - assignSQSlot(); + acquireSQSlot(); if (Desc.MayStore) { // Always create a new group for store operations. @@ -160,26 +160,28 @@ LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const { } void LSUnitBase::onInstructionExecuted(const InstRef &IR) { + unsigned GroupID = IR.getInstruction()->getLSUTokenID(); + auto It = Groups.find(GroupID); + assert(It != Groups.end() && "Instruction not dispatched to the LS unit"); + It->second->onInstructionExecuted(); + if (It->second->isExecuted()) + Groups.erase(It); +} + +void LSUnitBase::onInstructionRetired(const InstRef &IR) { const InstrDesc &Desc = IR.getInstruction()->getDesc(); bool IsALoad = Desc.MayLoad; bool IsAStore = Desc.MayStore; assert((IsALoad || IsAStore) && "Expected a memory operation!"); - unsigned GroupID = IR.getInstruction()->getLSUTokenID(); - auto It = Groups.find(GroupID); - It->second->onInstructionExecuted(); - if (It->second->isExecuted()) { - Groups.erase(It); - } - if (IsALoad) { - UsedLQEntries--; + releaseLQSlot(); LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex() << " has been removed from the load queue.\n"); } if (IsAStore) { - UsedSQEntries--; + releaseSQSlot(); LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex() << " has been removed from the store queue.\n"); } diff --git a/lib/MCA/HardwareUnits/RegisterFile.cpp b/lib/MCA/HardwareUnits/RegisterFile.cpp index 86a888ea8ca..7ea5506f11d 100644 --- a/lib/MCA/HardwareUnits/RegisterFile.cpp +++ b/lib/MCA/HardwareUnits/RegisterFile.cpp @@ -147,7 +147,7 @@ void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry, void RegisterFile::addRegisterWrite(WriteRef Write, MutableArrayRef UsedPhysRegs) { WriteState &WS = *Write.getWriteState(); - unsigned RegID = WS.getRegisterID(); + MCPhysReg RegID = WS.getRegisterID(); assert(RegID && "Adding an invalid register definition?"); LLVM_DEBUG({ @@ -194,7 +194,7 @@ void RegisterFile::addRegisterWrite(WriteRef Write, } // Update zero registers. - unsigned ZeroRegisterID = + MCPhysReg ZeroRegisterID = WS.clearsSuperRegisters() ? RegID : WS.getRegisterID(); if (IsWriteZero) { ZeroRegisters.setBit(ZeroRegisterID); @@ -247,7 +247,7 @@ void RegisterFile::removeRegisterWrite( if (WS.isEliminated()) return; - unsigned RegID = WS.getRegisterID(); + MCPhysReg RegID = WS.getRegisterID(); assert(RegID != 0 && "Invalidating an already invalid register?"); assert(WS.getCyclesLeft() != UNKNOWN_CYCLES && @@ -255,7 +255,7 @@ void RegisterFile::removeRegisterWrite( assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!"); bool ShouldFreePhysRegs = !WS.isWriteZero(); - unsigned RenameAs = RegisterMappings[RegID].second.RenameAs; + MCPhysReg RenameAs = RegisterMappings[RegID].second.RenameAs; if (RenameAs && RenameAs != RegID) { RegID = RenameAs; @@ -355,7 +355,7 @@ bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) { void RegisterFile::collectWrites(const ReadState &RS, SmallVectorImpl &Writes) const { - unsigned RegID = RS.getRegisterID(); + MCPhysReg RegID = RS.getRegisterID(); assert(RegID && RegID < RegisterMappings.size()); LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register " << MRI.getName(RegID) << '\n'); @@ -397,7 +397,7 @@ void RegisterFile::collectWrites(const ReadState &RS, void RegisterFile::addRegisterRead(ReadState &RS, const MCSubtargetInfo &STI) const { - unsigned RegID = RS.getRegisterID(); + MCPhysReg RegID = RS.getRegisterID(); const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; RS.setPRF(RRI.IndexPlusCost.first); if (RS.isIndependentFromDef()) @@ -424,11 +424,11 @@ void RegisterFile::addRegisterRead(ReadState &RS, } } -unsigned RegisterFile::isAvailable(ArrayRef Regs) const { +unsigned RegisterFile::isAvailable(ArrayRef Regs) const { SmallVector NumPhysRegs(getNumRegisterFiles()); // Find how many new mappings must be created for each register file. - for (const unsigned RegID : Regs) { + for (const MCPhysReg RegID : Regs) { const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; const IndexPlusCostPairTy &Entry = RRI.IndexPlusCost; if (Entry.first) diff --git a/lib/MCA/HardwareUnits/ResourceManager.cpp b/lib/MCA/HardwareUnits/ResourceManager.cpp index 06f2476353d..088aea3e23c 100644 --- a/lib/MCA/HardwareUnits/ResourceManager.cpp +++ b/lib/MCA/HardwareUnits/ResourceManager.cpp @@ -104,7 +104,7 @@ void ResourceState::dump() const { static std::unique_ptr getStrategyFor(const ResourceState &RS) { if (RS.isAResourceGroup() || RS.getNumUnits() > 1) - return llvm::make_unique(RS.getReadyMask()); + return std::make_unique(RS.getReadyMask()); return std::unique_ptr(nullptr); } @@ -114,7 +114,8 @@ ResourceManager::ResourceManager(const MCSchedModel &SM) Resource2Groups(SM.getNumProcResourceKinds() - 1, 0), ProcResID2Mask(SM.getNumProcResourceKinds(), 0), ResIndex2ProcResID(SM.getNumProcResourceKinds() - 1, 0), - ProcResUnitMask(0), ReservedResourceGroups(0) { + ProcResUnitMask(0), ReservedResourceGroups(0), + AvailableBuffers(~0ULL), ReservedBuffers(0) { computeProcResourceMasks(SM, ProcResID2Mask); // initialize vector ResIndex2ProcResID. @@ -127,7 +128,7 @@ ResourceManager::ResourceManager(const MCSchedModel &SM) uint64_t Mask = ProcResID2Mask[I]; unsigned Index = getResourceStateIndex(Mask); Resources[Index] = - llvm::make_unique(*SM.getProcResource(I), I, Mask); + std::make_unique(*SM.getProcResource(I), I, Mask); Strategies[Index] = getStrategyFor(*Resources[Index]); } @@ -241,33 +242,41 @@ void ResourceManager::release(const ResourceRef &RR) { } ResourceStateEvent -ResourceManager::canBeDispatched(ArrayRef Buffers) const { - ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE; - for (uint64_t Buffer : Buffers) { - ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; - Result = RS.isBufferAvailable(); - if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE) - break; - } - return Result; +ResourceManager::canBeDispatched(uint64_t ConsumedBuffers) const { + if (ConsumedBuffers & ReservedBuffers) + return ResourceStateEvent::RS_RESERVED; + if (ConsumedBuffers & (~AvailableBuffers)) + return ResourceStateEvent::RS_BUFFER_UNAVAILABLE; + return ResourceStateEvent::RS_BUFFER_AVAILABLE; } -void ResourceManager::reserveBuffers(ArrayRef Buffers) { - for (const uint64_t Buffer : Buffers) { - ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; +void ResourceManager::reserveBuffers(uint64_t ConsumedBuffers) { + while (ConsumedBuffers) { + uint64_t CurrentBuffer = ConsumedBuffers & (-ConsumedBuffers); + ResourceState &RS = *Resources[getResourceStateIndex(CurrentBuffer)]; + ConsumedBuffers ^= CurrentBuffer; assert(RS.isBufferAvailable() == ResourceStateEvent::RS_BUFFER_AVAILABLE); - RS.reserveBuffer(); - + if (!RS.reserveBuffer()) + AvailableBuffers ^= CurrentBuffer; if (RS.isADispatchHazard()) { - assert(!RS.isReserved()); - RS.setReserved(); + // Reserve this buffer now, and release it once pipeline resources + // consumed by the instruction become available again. + // We do this to simulate an in-order dispatch/issue of instructions. + ReservedBuffers ^= CurrentBuffer; } } } -void ResourceManager::releaseBuffers(ArrayRef Buffers) { - for (const uint64_t R : Buffers) - Resources[getResourceStateIndex(R)]->releaseBuffer(); +void ResourceManager::releaseBuffers(uint64_t ConsumedBuffers) { + AvailableBuffers |= ConsumedBuffers; + while (ConsumedBuffers) { + uint64_t CurrentBuffer = ConsumedBuffers & (-ConsumedBuffers); + ResourceState &RS = *Resources[getResourceStateIndex(CurrentBuffer)]; + ConsumedBuffers ^= CurrentBuffer; + RS.releaseBuffer(); + // Do not unreserve dispatch hazard resource buffers. Wait until all + // pipeline resources have been freed too. + } } uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const { @@ -322,7 +331,6 @@ void ResourceManager::cycleEvent(SmallVectorImpl &ResourcesFreed) { if (countPopulation(RR.first) == 1) release(RR); - releaseResource(RR.first); ResourcesFreed.push_back(RR); } @@ -336,7 +344,7 @@ void ResourceManager::reserveResource(uint64_t ResourceID) { const unsigned Index = getResourceStateIndex(ResourceID); ResourceState &Resource = *Resources[Index]; assert(Resource.isAResourceGroup() && !Resource.isReserved() && - "Unexpected resource found!"); + "Unexpected resource state found!"); Resource.setReserved(); ReservedResourceGroups ^= 1ULL << Index; } @@ -347,6 +355,9 @@ void ResourceManager::releaseResource(uint64_t ResourceID) { Resource.clearReserved(); if (Resource.isAResourceGroup()) ReservedResourceGroups ^= 1ULL << Index; + // Now it is safe to release dispatch/issue resources. + if (Resource.isADispatchHazard()) + ReservedBuffers ^= 1ULL << Index; } } // namespace mca diff --git a/lib/MCA/HardwareUnits/RetireControlUnit.cpp b/lib/MCA/HardwareUnits/RetireControlUnit.cpp index 068c5062ccd..de519d7fd94 100644 --- a/lib/MCA/HardwareUnits/RetireControlUnit.cpp +++ b/lib/MCA/HardwareUnits/RetireControlUnit.cpp @@ -21,65 +21,78 @@ namespace mca { RetireControlUnit::RetireControlUnit(const MCSchedModel &SM) : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), - AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0) { + NumROBEntries(SM.MicroOpBufferSize), + AvailableEntries(SM.MicroOpBufferSize), MaxRetirePerCycle(0) { // Check if the scheduling model provides extra information about the machine // processor. If so, then use that information to set the reorder buffer size // and the maximum number of instructions retired per cycle. if (SM.hasExtraProcessorInfo()) { const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); if (EPI.ReorderBufferSize) - AvailableSlots = EPI.ReorderBufferSize; + AvailableEntries = EPI.ReorderBufferSize; MaxRetirePerCycle = EPI.MaxRetirePerCycle; } - - assert(AvailableSlots && "Invalid reorder buffer size!"); - Queue.resize(AvailableSlots); + NumROBEntries = AvailableEntries; + assert(NumROBEntries && "Invalid reorder buffer size!"); + Queue.resize(2 * NumROBEntries); } // Reserves a number of slots, and returns a new token. -unsigned RetireControlUnit::reserveSlot(const InstRef &IR, - unsigned NumMicroOps) { - assert(isAvailable(NumMicroOps) && "Reorder Buffer unavailable!"); - unsigned NormalizedQuantity = - std::min(NumMicroOps, static_cast(Queue.size())); - // Zero latency instructions may have zero uOps. Artificially bump this - // value to 1. Although zero latency instructions don't consume scheduler - // resources, they still consume one slot in the retire queue. - NormalizedQuantity = std::max(NormalizedQuantity, 1U); +unsigned RetireControlUnit::dispatch(const InstRef &IR) { + const Instruction &Inst = *IR.getInstruction(); + unsigned Entries = normalizeQuantity(Inst.getNumMicroOps()); + assert((AvailableEntries >= Entries) && "Reorder Buffer unavailable!"); + unsigned TokenID = NextAvailableSlotIdx; - Queue[NextAvailableSlotIdx] = {IR, NormalizedQuantity, false}; - NextAvailableSlotIdx += NormalizedQuantity; + Queue[NextAvailableSlotIdx] = {IR, Entries, false}; + NextAvailableSlotIdx += std::max(1U, Entries); NextAvailableSlotIdx %= Queue.size(); - AvailableSlots -= NormalizedQuantity; + + AvailableEntries -= Entries; return TokenID; } -const RetireControlUnit::RUToken &RetireControlUnit::peekCurrentToken() const { - return Queue[CurrentInstructionSlotIdx]; +const RetireControlUnit::RUToken &RetireControlUnit::getCurrentToken() const { + const RetireControlUnit::RUToken &Current = Queue[CurrentInstructionSlotIdx]; +#ifndef NDEBUG + const Instruction *Inst = Current.IR.getInstruction(); + assert(Inst && "Invalid RUToken in the RCU queue."); +#endif + return Current; +} + +unsigned RetireControlUnit::computeNextSlotIdx() const { + const RetireControlUnit::RUToken &Current = getCurrentToken(); + unsigned NextSlotIdx = CurrentInstructionSlotIdx + std::max(1U, Current.NumSlots); + return NextSlotIdx % Queue.size(); +} + +const RetireControlUnit::RUToken &RetireControlUnit::peekNextToken() const { + return Queue[computeNextSlotIdx()]; } void RetireControlUnit::consumeCurrentToken() { RetireControlUnit::RUToken &Current = Queue[CurrentInstructionSlotIdx]; - assert(Current.NumSlots && "Reserved zero slots?"); - assert(Current.IR && "Invalid RUToken in the RCU queue."); Current.IR.getInstruction()->retire(); // Update the slot index to be the next item in the circular queue. - CurrentInstructionSlotIdx += Current.NumSlots; + CurrentInstructionSlotIdx += std::max(1U, Current.NumSlots); CurrentInstructionSlotIdx %= Queue.size(); - AvailableSlots += Current.NumSlots; + AvailableEntries += Current.NumSlots; + Current = { InstRef(), 0U, false }; } void RetireControlUnit::onInstructionExecuted(unsigned TokenID) { assert(Queue.size() > TokenID); - assert(Queue[TokenID].Executed == false && Queue[TokenID].IR); + assert(Queue[TokenID].IR.getInstruction() && "Instruction was not dispatched!"); + assert(Queue[TokenID].Executed == false && "Instruction already executed!"); Queue[TokenID].Executed = true; } #ifndef NDEBUG void RetireControlUnit::dump() const { - dbgs() << "Retire Unit: { Total Slots=" << Queue.size() - << ", Available Slots=" << AvailableSlots << " }\n"; + dbgs() << "Retire Unit: { Total ROB Entries =" << NumROBEntries + << ", Available ROB entries=" << AvailableEntries << " }\n"; } #endif diff --git a/lib/MCA/HardwareUnits/Scheduler.cpp b/lib/MCA/HardwareUnits/Scheduler.cpp index 0f0f2ffb832..8730336c666 100644 --- a/lib/MCA/HardwareUnits/Scheduler.cpp +++ b/lib/MCA/HardwareUnits/Scheduler.cpp @@ -21,7 +21,7 @@ namespace mca { void Scheduler::initializeStrategy(std::unique_ptr S) { // Ensure we have a valid (non-null) strategy object. - Strategy = S ? std::move(S) : llvm::make_unique(); + Strategy = S ? std::move(S) : std::make_unique(); } // Anchor the vtable of SchedulerStrategy and DefaultSchedulerStrategy. @@ -38,9 +38,8 @@ void Scheduler::dump() const { #endif Scheduler::Status Scheduler::isAvailable(const InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - - ResourceStateEvent RSE = Resources->canBeDispatched(Desc.Buffers); + ResourceStateEvent RSE = + Resources->canBeDispatched(IR.getInstruction()->getUsedBuffers()); HadTokenStall = RSE != RS_BUFFER_AVAILABLE; switch (RSE) { @@ -106,7 +105,7 @@ void Scheduler::issueInstruction( bool HasDependentUsers = Inst.hasDependentUsers(); HasDependentUsers |= Inst.isMemOp() && LSU.hasDependentUsers(IR); - Resources->releaseBuffers(Inst.getDesc().Buffers); + Resources->releaseBuffers(Inst.getUsedBuffers()); issueInstructionImpl(IR, UsedResources); // Instructions that have been issued during this cycle might have unblocked // other dependent instructions. Dependent instructions may be issued during @@ -300,8 +299,7 @@ bool Scheduler::mustIssueImmediately(const InstRef &IR) const { bool Scheduler::dispatch(InstRef &IR) { Instruction &IS = *IR.getInstruction(); - const InstrDesc &Desc = IS.getDesc(); - Resources->reserveBuffers(Desc.Buffers); + Resources->reserveBuffers(IS.getUsedBuffers()); // If necessary, reserve queue entries in the load-store unit (LSU). if (IS.isMemOp()) diff --git a/lib/MCA/InstrBuilder.cpp b/lib/MCA/InstrBuilder.cpp index 829920366c9..bd28c733535 100644 --- a/lib/MCA/InstrBuilder.cpp +++ b/lib/MCA/InstrBuilder.cpp @@ -80,7 +80,7 @@ static void initializeUsedResources(InstrDesc &ID, if (PR.BufferSize < 0) { AllInOrderResources = false; } else { - Buffers.setBit(PRE->ProcResourceIdx); + Buffers.setBit(getResourceStateIndex(Mask)); AnyDispatchHazards |= (PR.BufferSize == 0); AllInOrderResources &= (PR.BufferSize <= 1); } @@ -139,9 +139,6 @@ static void initializeUsedResources(InstrDesc &ID, } } - ID.UsedProcResUnits = UsedResourceUnits; - ID.UsedProcResGroups = UsedResourceGroups; - // A SchedWrite may specify a number of cycles in which a resource group // is reserved. For example (on target x86; cpu Haswell): // @@ -177,20 +174,13 @@ static void initializeUsedResources(InstrDesc &ID, uint64_t Mask = ProcResourceMasks[I]; if (Mask != SR.first && ((Mask & SR.first) == SR.first)) - Buffers.setBit(I); + Buffers.setBit(getResourceStateIndex(Mask)); } } - // Now set the buffers. - if (unsigned NumBuffers = Buffers.countPopulation()) { - ID.Buffers.resize(NumBuffers); - for (unsigned I = 0, E = NumProcResources; I < E && NumBuffers; ++I) { - if (Buffers[I]) { - --NumBuffers; - ID.Buffers[NumBuffers] = ProcResourceMasks[I]; - } - } - } + ID.UsedBuffers = Buffers.getZExtValue(); + ID.UsedProcResUnits = UsedResourceUnits; + ID.UsedProcResGroups = UsedResourceGroups; LLVM_DEBUG({ for (const std::pair &R : ID.Resources) @@ -198,8 +188,12 @@ static void initializeUsedResources(InstrDesc &ID, << "Reserved=" << R.second.isReserved() << ", " << "#Units=" << R.second.NumUnits << ", " << "cy=" << R.second.size() << '\n'; - for (const uint64_t R : ID.Buffers) - dbgs() << "\t\tBuffer Mask=" << format_hex(R, 16) << '\n'; + uint64_t BufferIDs = ID.UsedBuffers; + while (BufferIDs) { + uint64_t Current = BufferIDs & (-BufferIDs); + dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n'; + BufferIDs ^= Current; + } dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n'; dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16) << '\n'; @@ -464,9 +458,8 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no // "unmodeledSideEffects", then this logic optimistically assumes that any - // extra register operands in the variadic sequence are not register + // extra register operand in the variadic sequence is not a register // definition. - bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() && !MCDesc.hasUnmodeledSideEffects(); for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); @@ -493,7 +486,7 @@ Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, return ErrorSuccess(); bool UsesMemory = ID.MayLoad || ID.MayStore; - bool UsesBuffers = !ID.Buffers.empty(); + bool UsesBuffers = ID.UsedBuffers; bool UsesResources = !ID.Resources.empty(); if (!UsesMemory && !UsesBuffers && !UsesResources) return ErrorSuccess(); @@ -550,7 +543,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) { LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); // Create a new empty descriptor. - std::unique_ptr ID = llvm::make_unique(); + std::unique_ptr ID = std::make_unique(); ID->NumMicroOps = SCDesc.NumMicroOps; ID->SchedClassID = SchedClassID; @@ -619,7 +612,7 @@ InstrBuilder::createInstruction(const MCInst &MCI) { if (!DescOrErr) return DescOrErr.takeError(); const InstrDesc &D = *DescOrErr; - std::unique_ptr NewIS = llvm::make_unique(D); + std::unique_ptr NewIS = std::make_unique(D); // Check if this is a dependency breaking instruction. APInt Mask; @@ -636,8 +629,8 @@ InstrBuilder::createInstruction(const MCInst &MCI) { } // Initialize Reads first. + MCPhysReg RegID = 0; for (const ReadDescriptor &RD : D.Reads) { - int RegID = -1; if (!RD.isImplicitRead()) { // explicit read. const MCOperand &Op = MCI.getOperand(RD.OpIndex); @@ -655,7 +648,6 @@ InstrBuilder::createInstruction(const MCInst &MCI) { continue; // Okay, this is a register operand. Create a ReadState for it. - assert(RegID > 0 && "Invalid register ID found!"); NewIS->getUses().emplace_back(RD, RegID); ReadState &RS = NewIS->getUses().back(); @@ -696,8 +688,8 @@ InstrBuilder::createInstruction(const MCInst &MCI) { // Initialize writes. unsigned WriteIndex = 0; for (const WriteDescriptor &WD : D.Writes) { - unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID - : MCI.getOperand(WD.OpIndex).getReg(); + RegID = WD.isImplicitWrite() ? WD.RegisterID + : MCI.getOperand(WD.OpIndex).getReg(); // Check if this is a optional definition that references NoReg. if (WD.IsOptionalDef && !RegID) { ++WriteIndex; diff --git a/lib/MCA/Instruction.cpp b/lib/MCA/Instruction.cpp index 001842bca31..e5f2c4fd1ee 100644 --- a/lib/MCA/Instruction.cpp +++ b/lib/MCA/Instruction.cpp @@ -18,7 +18,7 @@ namespace llvm { namespace mca { -void WriteState::writeStartEvent(unsigned IID, unsigned RegID, +void WriteState::writeStartEvent(unsigned IID, MCPhysReg RegID, unsigned Cycles) { CRD.IID = IID; CRD.RegID = RegID; @@ -27,7 +27,7 @@ void WriteState::writeStartEvent(unsigned IID, unsigned RegID, DependentWrite = nullptr; } -void ReadState::writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles) { +void ReadState::writeStartEvent(unsigned IID, MCPhysReg RegID, unsigned Cycles) { assert(DependentWrites); assert(CyclesLeft == UNKNOWN_CYCLES); diff --git a/lib/MCA/Stages/DispatchStage.cpp b/lib/MCA/Stages/DispatchStage.cpp index 7334a268e9a..3a3d8225916 100644 --- a/lib/MCA/Stages/DispatchStage.cpp +++ b/lib/MCA/Stages/DispatchStage.cpp @@ -44,7 +44,7 @@ void DispatchStage::notifyInstructionDispatched(const InstRef &IR, } bool DispatchStage::checkPRF(const InstRef &IR) const { - SmallVector RegDefs; + SmallVector RegDefs; for (const WriteState &RegDef : IR.getInstruction()->getDefs()) RegDefs.emplace_back(RegDef.getRegisterID()); @@ -60,7 +60,7 @@ bool DispatchStage::checkPRF(const InstRef &IR) const { } bool DispatchStage::checkRCU(const InstRef &IR) const { - const unsigned NumMicroOps = IR.getInstruction()->getDesc().NumMicroOps; + const unsigned NumMicroOps = IR.getInstruction()->getNumMicroOps(); if (RCU.isAvailable(NumMicroOps)) return true; notifyEvent( @@ -79,7 +79,7 @@ Error DispatchStage::dispatch(InstRef IR) { assert(!CarryOver && "Cannot dispatch another instruction!"); Instruction &IS = *IR.getInstruction(); const InstrDesc &Desc = IS.getDesc(); - const unsigned NumMicroOps = Desc.NumMicroOps; + const unsigned NumMicroOps = IS.getNumMicroOps(); if (NumMicroOps > DispatchWidth) { assert(AvailableEntries == DispatchWidth); AvailableEntries = 0; @@ -123,9 +123,10 @@ Error DispatchStage::dispatch(InstRef IR) { for (WriteState &WS : IS.getDefs()) PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), RegisterFiles); - // Reserve slots in the RCU, and notify the instruction that it has been - // dispatched to the schedulers for execution. - IS.dispatch(RCU.reserveSlot(IR, NumMicroOps)); + // Reserve entries in the reorder buffer. + unsigned RCUTokenID = RCU.dispatch(IR); + // Notify the instruction that it has been dispatched. + IS.dispatch(RCUTokenID); // Notify listeners of the "instruction dispatched" event, // and move IR to the next stage. @@ -155,8 +156,10 @@ Error DispatchStage::cycleStart() { } bool DispatchStage::isAvailable(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - unsigned Required = std::min(Desc.NumMicroOps, DispatchWidth); + const Instruction &Inst = *IR.getInstruction(); + unsigned NumMicroOps = Inst.getNumMicroOps(); + const InstrDesc &Desc = Inst.getDesc(); + unsigned Required = std::min(NumMicroOps, DispatchWidth); if (Required > AvailableEntries) return false; diff --git a/lib/MCA/Stages/EntryStage.cpp b/lib/MCA/Stages/EntryStage.cpp index d2f5613a0fb..66135790a4c 100644 --- a/lib/MCA/Stages/EntryStage.cpp +++ b/lib/MCA/Stages/EntryStage.cpp @@ -33,7 +33,7 @@ void EntryStage::getNextInstruction() { if (!SM.hasNext()) return; SourceRef SR = SM.peekNext(); - std::unique_ptr Inst = llvm::make_unique(SR.second); + std::unique_ptr Inst = std::make_unique(SR.second); CurrentInstruction = InstRef(SR.first, Inst.get()); Instructions.emplace_back(std::move(Inst)); SM.updateNext(); diff --git a/lib/MCA/Stages/ExecuteStage.cpp b/lib/MCA/Stages/ExecuteStage.cpp index a2b361fcd1b..2284ed7f281 100644 --- a/lib/MCA/Stages/ExecuteStage.cpp +++ b/lib/MCA/Stages/ExecuteStage.cpp @@ -56,12 +56,13 @@ Error ExecuteStage::issueInstruction(InstRef &IR) { SmallVector Ready; HWS.issueInstruction(IR, Used, Pending, Ready); - NumIssuedOpcodes += IR.getInstruction()->getDesc().NumMicroOps; + Instruction &IS = *IR.getInstruction(); + NumIssuedOpcodes += IS.getNumMicroOps(); notifyReservedOrReleasedBuffers(IR, /* Reserved */ false); notifyInstructionIssued(IR, Used); - if (IR.getInstruction()->isExecuted()) { + if (IS.isExecuted()) { notifyInstructionExecuted(IR); // FIXME: add a buffer of executed instructions. if (Error S = moveToTheNextStage(IR)) @@ -199,7 +200,8 @@ Error ExecuteStage::execute(InstRef &IR) { // units have been consumed. bool IsReadyInstruction = HWS.dispatch(IR); const Instruction &Inst = *IR.getInstruction(); - NumDispatchedOpcodes += Inst.getDesc().NumMicroOps; + unsigned NumMicroOps = Inst.getNumMicroOps(); + NumDispatchedOpcodes += NumMicroOps; notifyReservedOrReleasedBuffers(IR, /* Reserved */ true); if (!IsReadyInstruction) { @@ -269,13 +271,17 @@ void ExecuteStage::notifyInstructionIssued( void ExecuteStage::notifyReservedOrReleasedBuffers(const InstRef &IR, bool Reserved) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - if (Desc.Buffers.empty()) + uint64_t UsedBuffers = IR.getInstruction()->getDesc().UsedBuffers; + if (!UsedBuffers) return; - SmallVector BufferIDs(Desc.Buffers.begin(), Desc.Buffers.end()); - std::transform(Desc.Buffers.begin(), Desc.Buffers.end(), BufferIDs.begin(), - [&](uint64_t Op) { return HWS.getResourceID(Op); }); + SmallVector BufferIDs(countPopulation(UsedBuffers), 0); + for (unsigned I = 0, E = BufferIDs.size(); I < E; ++I) { + uint64_t CurrentBufferMask = UsedBuffers & (-UsedBuffers); + BufferIDs[I] = HWS.getResourceID(CurrentBufferMask); + UsedBuffers ^= CurrentBufferMask; + } + if (Reserved) { for (HWEventListener *Listener : getListeners()) Listener->onReservedBuffers(IR, BufferIDs); diff --git a/lib/MCA/Stages/RetireStage.cpp b/lib/MCA/Stages/RetireStage.cpp index e1789dd7fa2..f792af748bc 100644 --- a/lib/MCA/Stages/RetireStage.cpp +++ b/lib/MCA/Stages/RetireStage.cpp @@ -31,11 +31,11 @@ llvm::Error RetireStage::cycleStart() { while (!RCU.isEmpty()) { if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle) break; - const RetireControlUnit::RUToken &Current = RCU.peekCurrentToken(); + const RetireControlUnit::RUToken &Current = RCU.getCurrentToken(); if (!Current.Executed) break; - RCU.consumeCurrentToken(); notifyInstructionRetired(Current.IR); + RCU.consumeCurrentToken(); NumRetired++; } @@ -52,6 +52,10 @@ void RetireStage::notifyInstructionRetired(const InstRef &IR) const { llvm::SmallVector FreedRegs(PRF.getNumRegisterFiles()); const Instruction &Inst = *IR.getInstruction(); + // Release the load/store queue entries. + if (Inst.isMemOp()) + LSU.onInstructionRetired(IR); + for (const WriteState &WS : Inst.getDefs()) PRF.removeRegisterWrite(WS, FreedRegs); notifyEvent(HWInstructionRetiredEvent(IR, FreedRegs)); diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index 49e66f46ab3..148c011d9cd 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -223,8 +223,8 @@ Expected ArchiveMemberHeader::getName(uint64_t Size) const { return Name.drop_back(1); } -Expected ArchiveMemberHeader::getSize() const { - uint32_t Ret; +Expected ArchiveMemberHeader::getSize() const { + uint64_t Ret; if (StringRef(ArMemHdr->Size, sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) { std::string Buf; @@ -550,7 +550,7 @@ Archive::Archive(MemoryBufferRef Source, Error &Err) } else if (Buffer.startswith(Magic)) { IsThin = false; } else { - Err = make_error("File too small to be an archive", + Err = make_error("file too small to be an archive", object_error::invalid_file_type); return; } diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp index 228f6b40c5e..5234b0e1823 100644 --- a/lib/Object/ArchiveWriter.cpp +++ b/lib/Object/ArchiveWriter.cpp @@ -16,8 +16,10 @@ #include "llvm/BinaryFormat/Magic.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Object/Archive.h" +#include "llvm/Object/Error.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" @@ -147,7 +149,7 @@ static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) { static void printRestOfMemberHeader( raw_ostream &Out, const sys::TimePoint &ModTime, - unsigned UID, unsigned GID, unsigned Perms, unsigned Size) { + unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) { printWithSpacePadding(Out, sys::toTimeT(ModTime), 12); // The format has only 6 chars for uid and gid. Truncate if the provided @@ -164,7 +166,7 @@ static void printGNUSmallMemberHeader(raw_ostream &Out, StringRef Name, const sys::TimePoint &ModTime, unsigned UID, unsigned GID, unsigned Perms, - unsigned Size) { + uint64_t Size) { printWithSpacePadding(Out, Twine(Name) + "/", 16); printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); } @@ -172,11 +174,10 @@ printGNUSmallMemberHeader(raw_ostream &Out, StringRef Name, static void printBSDMemberHeader(raw_ostream &Out, uint64_t Pos, StringRef Name, const sys::TimePoint &ModTime, - unsigned UID, unsigned GID, unsigned Perms, - unsigned Size) { + unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) { uint64_t PosAfterHeader = Pos + 60 + Name.size(); // Pad so that even 64 bit object files are aligned. - unsigned Pad = OffsetToAlignment(PosAfterHeader, 8); + unsigned Pad = offsetToAlignment(PosAfterHeader, Align(8)); unsigned NameWithPadding = Name.size() + Pad; printWithSpacePadding(Out, Twine("#1/") + Twine(NameWithPadding), 16); printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, @@ -208,7 +209,7 @@ static void printMemberHeader(raw_ostream &Out, uint64_t Pos, raw_ostream &StringTable, StringMap &MemberNames, object::Archive::Kind Kind, bool Thin, const NewArchiveMember &M, - sys::TimePoint ModTime, unsigned Size) { + sys::TimePoint ModTime, uint64_t Size) { if (isBSDLike(Kind)) return printBSDMemberHeader(Out, Pos, M.MemberName, ModTime, M.UID, M.GID, M.Perms, Size); @@ -243,7 +244,7 @@ struct MemberData { static MemberData computeStringTable(StringRef Names) { unsigned Size = Names.size(); - unsigned Pad = OffsetToAlignment(Size, 2); + unsigned Pad = offsetToAlignment(Size, Align(2)); std::string Header; raw_string_ostream Out(Header); printWithSpacePadding(Out, "//", 48); @@ -307,8 +308,8 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, // least 4-byte aligned for 32-bit content. Opt for the larger encoding // uniformly. // We do this for all bsd formats because it simplifies aligning members. - unsigned Alignment = isBSDLike(Kind) ? 8 : 2; - unsigned Pad = OffsetToAlignment(Size, Alignment); + const Align Alignment(isBSDLike(Kind) ? 8 : 2); + unsigned Pad = offsetToAlignment(Size, Alignment); Size += Pad; if (isBSDLike(Kind)) { @@ -464,8 +465,9 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, // uniformly. This matches the behaviour with cctools and ensures that ld64 // is happy with archives that we generate. unsigned MemberPadding = - isDarwin(Kind) ? OffsetToAlignment(Data.size(), 8) : 0; - unsigned TailPadding = OffsetToAlignment(Data.size() + MemberPadding, 2); + isDarwin(Kind) ? offsetToAlignment(Data.size(), Align(8)) : 0; + unsigned TailPadding = + offsetToAlignment(Data.size() + MemberPadding, Align(2)); StringRef Padding = StringRef(PaddingData, MemberPadding + TailPadding); sys::TimePoint ModTime; @@ -474,8 +476,17 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, ModTime = sys::toTimePoint(FilenameCount[M.MemberName]++); else ModTime = M.ModTime; + + uint64_t Size = Buf.getBufferSize() + MemberPadding; + if (Size > object::Archive::MaxMemberSize) { + std::string StringMsg = + "File " + M.MemberName.str() + " exceeds size limit"; + return make_error( + std::move(StringMsg), object::object_error::parse_failed); + } + printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, M, - ModTime, Buf.getBufferSize() + MemberPadding); + ModTime, Size); Out.flush(); Expected> Symbols = diff --git a/lib/Object/Binary.cpp b/lib/Object/Binary.cpp index a953c1d8cb8..944d2bc1bca 100644 --- a/lib/Object/Binary.cpp +++ b/lib/Object/Binary.cpp @@ -18,6 +18,7 @@ #include "llvm/Object/MachOUniversal.h" #include "llvm/Object/Minidump.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Object/TapiUniversal.h" #include "llvm/Object/WindowsResource.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -86,6 +87,8 @@ Expected> object::createBinary(MemoryBufferRef Buffer, return errorCodeToError(object_error::invalid_file_type); case file_magic::minidump: return MinidumpFile::create(Buffer); + case file_magic::tapi_file: + return TapiUniversal::create(Buffer); } llvm_unreachable("Unexpected Binary File Type"); } diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index 854664e679d..2c0f6dc2b1e 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -936,29 +936,6 @@ iterator_range COFFObjectFile::base_relocs() const { return make_range(base_reloc_begin(), base_reloc_end()); } -std::error_code -COFFObjectFile::getCOFFHeader(const coff_file_header *&Res) const { - Res = COFFHeader; - return std::error_code(); -} - -std::error_code -COFFObjectFile::getCOFFBigObjHeader(const coff_bigobj_file_header *&Res) const { - Res = COFFBigObjHeader; - return std::error_code(); -} - -std::error_code COFFObjectFile::getPE32Header(const pe32_header *&Res) const { - Res = PE32Header; - return std::error_code(); -} - -std::error_code -COFFObjectFile::getPE32PlusHeader(const pe32plus_header *&Res) const { - Res = PE32PlusHeader; - return std::error_code(); -} - std::error_code COFFObjectFile::getDataDirectory(uint32_t Index, const data_directory *&Res) const { @@ -994,11 +971,12 @@ std::error_code COFFObjectFile::getSection(int32_t Index, std::error_code COFFObjectFile::getSection(StringRef SectionName, const coff_section *&Result) const { Result = nullptr; - StringRef SecName; for (const SectionRef &Section : sections()) { - if (std::error_code E = Section.getName(SecName)) - return E; - if (SecName == SectionName) { + auto NameOrErr = Section.getName(); + if (!NameOrErr) + return errorToErrorCode(NameOrErr.takeError()); + + if (*NameOrErr == SectionName) { Result = getCOFFSection(Section); return std::error_code(); } @@ -1684,9 +1662,12 @@ std::error_code BaseRelocRef::getRVA(uint32_t &Result) const { return std::error_code(); } -#define RETURN_IF_ERROR(E) \ - if (E) \ - return E; +#define RETURN_IF_ERROR(Expr) \ + do { \ + Error E = (Expr); \ + if (E) \ + return std::move(E); \ + } while (0) Expected> ResourceSectionRef::getDirStringAtOffset(uint32_t Offset) { @@ -1715,11 +1696,168 @@ ResourceSectionRef::getTableAtOffset(uint32_t Offset) { return *Table; } +Expected +ResourceSectionRef::getTableEntryAtOffset(uint32_t Offset) { + const coff_resource_dir_entry *Entry = nullptr; + + BinaryStreamReader Reader(BBS); + Reader.setOffset(Offset); + RETURN_IF_ERROR(Reader.readObject(Entry)); + assert(Entry != nullptr); + return *Entry; +} + +Expected +ResourceSectionRef::getDataEntryAtOffset(uint32_t Offset) { + const coff_resource_data_entry *Entry = nullptr; + + BinaryStreamReader Reader(BBS); + Reader.setOffset(Offset); + RETURN_IF_ERROR(Reader.readObject(Entry)); + assert(Entry != nullptr); + return *Entry; +} + Expected ResourceSectionRef::getEntrySubDir(const coff_resource_dir_entry &Entry) { + assert(Entry.Offset.isSubDir()); return getTableAtOffset(Entry.Offset.value()); } +Expected +ResourceSectionRef::getEntryData(const coff_resource_dir_entry &Entry) { + assert(!Entry.Offset.isSubDir()); + return getDataEntryAtOffset(Entry.Offset.value()); +} + Expected ResourceSectionRef::getBaseTable() { return getTableAtOffset(0); } + +Expected +ResourceSectionRef::getTableEntry(const coff_resource_dir_table &Table, + uint32_t Index) { + if (Index >= (uint32_t)(Table.NumberOfNameEntries + Table.NumberOfIDEntries)) + return createStringError(object_error::parse_failed, "index out of range"); + const uint8_t *TablePtr = reinterpret_cast(&Table); + ptrdiff_t TableOffset = TablePtr - BBS.data().data(); + return getTableEntryAtOffset(TableOffset + sizeof(Table) + + Index * sizeof(coff_resource_dir_entry)); +} + +Error ResourceSectionRef::load(const COFFObjectFile *O) { + for (const SectionRef &S : O->sections()) { + Expected Name = S.getName(); + if (!Name) + return Name.takeError(); + + if (*Name == ".rsrc" || *Name == ".rsrc$01") + return load(O, S); + } + return createStringError(object_error::parse_failed, + "no resource section found"); +} + +Error ResourceSectionRef::load(const COFFObjectFile *O, const SectionRef &S) { + Obj = O; + Section = S; + Expected Contents = Section.getContents(); + if (!Contents) + return Contents.takeError(); + BBS = BinaryByteStream(*Contents, support::little); + const coff_section *COFFSect = Obj->getCOFFSection(Section); + ArrayRef OrigRelocs = Obj->getRelocations(COFFSect); + Relocs.reserve(OrigRelocs.size()); + for (const coff_relocation &R : OrigRelocs) + Relocs.push_back(&R); + std::sort(Relocs.begin(), Relocs.end(), + [](const coff_relocation *A, const coff_relocation *B) { + return A->VirtualAddress < B->VirtualAddress; + }); + return Error::success(); +} + +Expected +ResourceSectionRef::getContents(const coff_resource_data_entry &Entry) { + if (!Obj) + return createStringError(object_error::parse_failed, "no object provided"); + + // Find a potential relocation at the DataRVA field (first member of + // the coff_resource_data_entry struct). + const uint8_t *EntryPtr = reinterpret_cast(&Entry); + ptrdiff_t EntryOffset = EntryPtr - BBS.data().data(); + coff_relocation RelocTarget{ulittle32_t(EntryOffset), ulittle32_t(0), + ulittle16_t(0)}; + auto RelocsForOffset = + std::equal_range(Relocs.begin(), Relocs.end(), &RelocTarget, + [](const coff_relocation *A, const coff_relocation *B) { + return A->VirtualAddress < B->VirtualAddress; + }); + + if (RelocsForOffset.first != RelocsForOffset.second) { + // We found a relocation with the right offset. Check that it does have + // the expected type. + const coff_relocation &R = **RelocsForOffset.first; + uint16_t RVAReloc; + switch (Obj->getMachine()) { + case COFF::IMAGE_FILE_MACHINE_I386: + RVAReloc = COFF::IMAGE_REL_I386_DIR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_AMD64: + RVAReloc = COFF::IMAGE_REL_AMD64_ADDR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_ARMNT: + RVAReloc = COFF::IMAGE_REL_ARM_ADDR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_ARM64: + RVAReloc = COFF::IMAGE_REL_ARM64_ADDR32NB; + break; + default: + return createStringError(object_error::parse_failed, + "unsupported architecture"); + } + if (R.Type != RVAReloc) + return createStringError(object_error::parse_failed, + "unexpected relocation type"); + // Get the relocation's symbol + Expected Sym = Obj->getSymbol(R.SymbolTableIndex); + if (!Sym) + return Sym.takeError(); + const coff_section *Section = nullptr; + // And the symbol's section + if (std::error_code EC = Obj->getSection(Sym->getSectionNumber(), Section)) + return errorCodeToError(EC); + // Add the initial value of DataRVA to the symbol's offset to find the + // data it points at. + uint64_t Offset = Entry.DataRVA + Sym->getValue(); + ArrayRef Contents; + if (Error E = Obj->getSectionContents(Section, Contents)) + return std::move(E); + if (Offset + Entry.DataSize > Contents.size()) + return createStringError(object_error::parse_failed, + "data outside of section"); + // Return a reference to the data inside the section. + return StringRef(reinterpret_cast(Contents.data()) + Offset, + Entry.DataSize); + } else { + // Relocatable objects need a relocation for the DataRVA field. + if (Obj->isRelocatableObject()) + return createStringError(object_error::parse_failed, + "no relocation found for DataRVA"); + + // Locate the section that contains the address that DataRVA points at. + uint64_t VA = Entry.DataRVA + Obj->getImageBase(); + for (const SectionRef &S : Obj->sections()) { + if (VA >= S.getAddress() && + VA + Entry.DataSize <= S.getAddress() + S.getSize()) { + uint64_t Offset = VA - S.getAddress(); + Expected Contents = S.getContents(); + if (!Contents) + return Contents.takeError(); + return Contents->slice(Offset, Offset + Entry.DataSize); + } + } + return createStringError(object_error::parse_failed, + "address not found in image"); + } +} diff --git a/lib/Object/Decompressor.cpp b/lib/Object/Decompressor.cpp index ec15e6f69ad..11efd857d1a 100644 --- a/lib/Object/Decompressor.cpp +++ b/lib/Object/Decompressor.cpp @@ -56,7 +56,7 @@ Error Decompressor::consumeCompressedZLibHeader(bool Is64Bit, return createError("corrupted compressed section header"); DataExtractor Extractor(SectionData, IsLittleEndian, 0); - uint32_t Offset = 0; + uint64_t Offset = 0; if (Extractor.getUnsigned(&Offset, Is64Bit ? sizeof(Elf64_Word) : sizeof(Elf32_Word)) != ELFCOMPRESS_ZLIB) @@ -77,10 +77,15 @@ bool Decompressor::isGnuStyle(StringRef Name) { } bool Decompressor::isCompressed(const object::SectionRef &Section) { - StringRef Name; - if (Section.getName(Name)) - return false; - return Section.isCompressed() || isGnuStyle(Name); + if (Section.isCompressed()) + return true; + + Expected SecNameOrErr = Section.getName(); + if (SecNameOrErr) + return isGnuStyle(*SecNameOrErr); + + consumeError(SecNameOrErr.takeError()); + return false; } bool Decompressor::isCompressedELFSection(uint64_t Flags, StringRef Name) { diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp index 8660b1a64bd..d491288579d 100644 --- a/lib/Object/ELF.cpp +++ b/lib/Object/ELF.cpp @@ -255,6 +255,8 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) { STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_ADDRSIG); STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_DEPENDENT_LIBRARIES); STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_SYMPART); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_PART_EHDR); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_PART_PHDR); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH); STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef); diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp index c7b71579304..bf6ffd6c37b 100644 --- a/lib/Object/ELFObjectFile.cpp +++ b/lib/Object/ELFObjectFile.cpp @@ -43,7 +43,16 @@ const EnumEntry llvm::object::ElfSymbolTypes[NumElfSymbolTypes] = { {"File", "FILE", ELF::STT_FILE}, {"Common", "COMMON", ELF::STT_COMMON}, {"TLS", "TLS", ELF::STT_TLS}, - {"GNU_IFunc", "IFUNC", ELF::STT_GNU_IFUNC}}; + {"Unknown", ": 7", 7}, + {"Unknown", ": 8", 8}, + {"Unknown", ": 9", 9}, + {"GNU_IFunc", "IFUNC", ELF::STT_GNU_IFUNC}, + {"OS Specific", ": 11", 11}, + {"OS Specific", ": 12", 12}, + {"Proc Specific", ": 13", 13}, + {"Proc Specific", ": 14", 14}, + {"Proc Specific", ": 15", 15} +}; ELFObjectFileBase::ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source) : ObjectFile(Type, Source) {} @@ -54,7 +63,7 @@ createPtr(MemoryBufferRef Object) { auto Ret = ELFObjectFile::create(Object); if (Error E = Ret.takeError()) return std::move(E); - return make_unique>(std::move(*Ret)); + return std::make_unique>(std::move(*Ret)); } Expected> @@ -194,7 +203,7 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const { default: break; case ARMBuildAttrs::Not_Allowed: - Features.AddFeature("vfp2d16sp", false); + Features.AddFeature("vfp2sp", false); Features.AddFeature("vfp3d16sp", false); Features.AddFeature("vfp4d16sp", false); break; @@ -347,6 +356,21 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const { case ARMBuildAttrs::v7E_M: Triple += "v7em"; break; + case ARMBuildAttrs::v8_A: + Triple += "v8a"; + break; + case ARMBuildAttrs::v8_R: + Triple += "v8r"; + break; + case ARMBuildAttrs::v8_M_Base: + Triple += "v8m.base"; + break; + case ARMBuildAttrs::v8_M_Main: + Triple += "v8m.main"; + break; + case ARMBuildAttrs::v8_1_M_Main: + Triple += "v8.1m.main"; + break; } } if (!isLittleEndian()) @@ -383,9 +407,13 @@ ELFObjectFileBase::getPltAddresses() const { return {}; Optional Plt = None, RelaPlt = None, GotPlt = None; for (const SectionRef &Section : sections()) { - StringRef Name; - if (Section.getName(Name)) + Expected NameOrErr = Section.getName(); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); continue; + } + StringRef Name = *NameOrErr; + if (Name == ".plt") Plt = Section; else if (Name == ".rela.plt" || Name == ".rel.plt") diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 5aec844003c..c0c873f9735 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -57,12 +57,6 @@ namespace { } // end anonymous namespace -static const std::array validArchs = { - "i386", "x86_64", "x86_64h", "armv4t", "arm", "armv5e", - "armv6", "armv6m", "armv7", "armv7em", "armv7k", "armv7m", - "armv7s", "arm64", "arm64_32", "ppc", "ppc64", -}; - static Error malformedError(const Twine &Msg) { return make_error("truncated or malformed object (" + Msg + ")", @@ -1951,6 +1945,11 @@ uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const { return SectSize; } +ArrayRef MachOObjectFile::getSectionContents(uint32_t Offset, + uint64_t Size) const { + return arrayRefFromStringRef(getData().substr(Offset, Size)); +} + Expected> MachOObjectFile::getSectionContents(DataRefImpl Sec) const { uint32_t Offset; @@ -1966,7 +1965,7 @@ MachOObjectFile::getSectionContents(DataRefImpl Sec) const { Size = Sect.size; } - return arrayRefFromStringRef(getData().substr(Offset, Size)); + return getSectionContents(Offset, Size); } uint64_t MachOObjectFile::getSectionAlignment(DataRefImpl Sec) const { @@ -1992,13 +1991,12 @@ Expected MachOObjectFile::getSection(unsigned SectionIndex) const { } Expected MachOObjectFile::getSection(StringRef SectionName) const { - StringRef SecName; for (const SectionRef &Section : sections()) { - if (std::error_code E = Section.getName(SecName)) - return errorCodeToError(E); - if (SecName == SectionName) { + auto NameOrErr = Section.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + if (*NameOrErr == SectionName) return Section; - } } return errorCodeToError(object_error::parse_failed); } @@ -2724,11 +2722,19 @@ Triple MachOObjectFile::getHostArch() { } bool MachOObjectFile::isValidArch(StringRef ArchFlag) { - return std::find(validArchs.cbegin(), validArchs.cend(), ArchFlag) != - validArchs.cend(); + auto validArchs = getValidArchs(); + return llvm::find(validArchs, ArchFlag) != validArchs.end(); } -ArrayRef MachOObjectFile::getValidArchs() { return validArchs; } +ArrayRef MachOObjectFile::getValidArchs() { + static const std::array validArchs = {{ + "i386", "x86_64", "x86_64h", "armv4t", "arm", "armv5e", + "armv6", "armv6m", "armv7", "armv7em", "armv7k", "armv7m", + "armv7s", "arm64", "arm64_32", "ppc", "ppc64", + }}; + + return validArchs; +} Triple::ArchType MachOObjectFile::getArch() const { return getArch(getCPUType(*this)); @@ -3427,7 +3433,7 @@ iterator_range MachOObjectFile::rebaseTable(Error &Err, MachOObjectFile *O, ArrayRef Opcodes, bool is64) { if (O->BindRebaseSectionTable == nullptr) - O->BindRebaseSectionTable = llvm::make_unique(O); + O->BindRebaseSectionTable = std::make_unique(O); MachORebaseEntry Start(&Err, O, Opcodes, is64); Start.moveToFirst(); @@ -3993,7 +3999,11 @@ BindRebaseSegInfo::BindRebaseSegInfo(const object::MachOObjectFile *Obj) { uint64_t CurSegAddress; for (const SectionRef &Section : Obj->sections()) { SectionInfo Info; - Section.getName(Info.SectionName); + Expected NameOrErr = Section.getName(); + if (!NameOrErr) + consumeError(NameOrErr.takeError()); + else + Info.SectionName = *NameOrErr; Info.Address = Section.getAddress(); Info.Size = Section.getSize(); Info.SegmentName = @@ -4094,7 +4104,7 @@ MachOObjectFile::bindTable(Error &Err, MachOObjectFile *O, ArrayRef Opcodes, bool is64, MachOBindEntry::Kind BKind) { if (O->BindRebaseSectionTable == nullptr) - O->BindRebaseSectionTable = llvm::make_unique(O); + O->BindRebaseSectionTable = std::make_unique(O); MachOBindEntry Start(&Err, O, Opcodes, is64, BKind); Start.moveToFirst(); @@ -4610,7 +4620,7 @@ void MachOObjectFile::ReadULEB128s(uint64_t Index, SmallVectorImpl &Out) const { DataExtractor extractor(ObjectFile::getData(), true, 0); - uint32_t offset = Index; + uint64_t offset = Index; uint64_t data = 0; while (uint64_t delta = extractor.getULEB128(&offset)) { data += delta; diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp index b3f0993412c..a178ecde949 100644 --- a/lib/Object/MachOUniversal.cpp +++ b/lib/Object/MachOUniversal.cpp @@ -155,15 +155,16 @@ MachOUniversalBinary::MachOUniversalBinary(MemoryBufferRef Source, Error &Err) ") extends past the end of the file"); return; } -#define MAXSECTALIGN 15 /* 2**15 or 0x8000 */ - if (A.getAlign() > MAXSECTALIGN) { - Err = malformedError("align (2^" + Twine(A.getAlign()) + ") too large " - "for cputype (" + Twine(A.getCPUType()) + ") cpusubtype (" + - Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + - ") (maximum 2^" + Twine(MAXSECTALIGN) + ")"); + + if (A.getAlign() > MaxSectionAlignment) { + Err = malformedError("align (2^" + Twine(A.getAlign()) + + ") too large for cputype (" + Twine(A.getCPUType()) + + ") cpusubtype (" + + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + + ") (maximum 2^" + Twine(MaxSectionAlignment) + ")"); return; } - if(A.getOffset() % (1 << A.getAlign()) != 0){ + if(A.getOffset() % (1ull << A.getAlign()) != 0){ Err = malformedError("offset: " + Twine(A.getOffset()) + " for cputype (" + Twine(A.getCPUType()) + ") cpusubtype (" + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + @@ -209,19 +210,34 @@ MachOUniversalBinary::MachOUniversalBinary(MemoryBufferRef Source, Error &Err) Err = Error::success(); } -Expected> +Expected MachOUniversalBinary::getObjectForArch(StringRef ArchName) const { if (Triple(ArchName).getArch() == Triple::ArchType::UnknownArch) return make_error("Unknown architecture " "named: " + ArchName, object_error::arch_not_found); - - for (auto &Obj : objects()) + for (const auto &Obj : objects()) if (Obj.getArchFlagName() == ArchName) - return Obj.getAsObjectFile(); + return Obj; return make_error("fat file does not " "contain " + ArchName, object_error::arch_not_found); } + +Expected> +MachOUniversalBinary::getMachOObjectForArch(StringRef ArchName) const { + Expected O = getObjectForArch(ArchName); + if (!O) + return O.takeError(); + return O->getAsObjectFile(); +} + +Expected> +MachOUniversalBinary::getArchiveForArch(StringRef ArchName) const { + Expected O = getObjectForArch(ArchName); + if (!O) + return O.takeError(); + return O->getAsArchive(); +} diff --git a/lib/Object/Minidump.cpp b/lib/Object/Minidump.cpp index 7b5b2155869..3e932fe7be2 100644 --- a/lib/Object/Minidump.cpp +++ b/lib/Object/Minidump.cpp @@ -53,13 +53,30 @@ Expected MinidumpFile::getString(size_t Offset) const { return Result; } -template -Expected> MinidumpFile::getListStream(StreamType Stream) const { - auto OptionalStream = getRawStream(Stream); - if (!OptionalStream) +Expected> +MinidumpFile::getMemoryInfoList() const { + Optional> Stream = getRawStream(StreamType::MemoryInfoList); + if (!Stream) return createError("No such stream"); - auto ExpectedSize = - getDataSliceAs(*OptionalStream, 0, 1); + auto ExpectedHeader = + getDataSliceAs(*Stream, 0, 1); + if (!ExpectedHeader) + return ExpectedHeader.takeError(); + const minidump::MemoryInfoListHeader &H = ExpectedHeader.get()[0]; + Expected> Data = + getDataSlice(*Stream, H.SizeOfHeader, H.SizeOfEntry * H.NumberOfEntries); + if (!Data) + return Data.takeError(); + return make_range(MemoryInfoIterator(*Data, H.SizeOfEntry), + MemoryInfoIterator({}, H.SizeOfEntry)); +} + +template +Expected> MinidumpFile::getListStream(StreamType Type) const { + Optional> Stream = getRawStream(Type); + if (!Stream) + return createError("No such stream"); + auto ExpectedSize = getDataSliceAs(*Stream, 0, 1); if (!ExpectedSize) return ExpectedSize.takeError(); @@ -69,10 +86,10 @@ Expected> MinidumpFile::getListStream(StreamType Stream) const { // Some producers insert additional padding bytes to align the list to an // 8-byte boundary. Check for that by comparing the list size with the overall // stream size. - if (ListOffset + sizeof(T) * ListSize < OptionalStream->size()) + if (ListOffset + sizeof(T) * ListSize < Stream->size()) ListOffset = 8; - return getDataSliceAs(*OptionalStream, ListOffset, ListSize); + return getDataSliceAs(*Stream, ListOffset, ListSize); } template Expected> MinidumpFile::getListStream(StreamType) const; @@ -109,13 +126,14 @@ MinidumpFile::create(MemoryBufferRef Source) { return ExpectedStreams.takeError(); DenseMap StreamMap; - for (const auto &Stream : llvm::enumerate(*ExpectedStreams)) { - StreamType Type = Stream.value().Type; - const LocationDescriptor &Loc = Stream.value().Location; + for (const auto &StreamDescriptor : llvm::enumerate(*ExpectedStreams)) { + StreamType Type = StreamDescriptor.value().Type; + const LocationDescriptor &Loc = StreamDescriptor.value().Location; - auto ExpectedStream = getDataSlice(Data, Loc.RVA, Loc.DataSize); - if (!ExpectedStream) - return ExpectedStream.takeError(); + Expected> Stream = + getDataSlice(Data, Loc.RVA, Loc.DataSize); + if (!Stream) + return Stream.takeError(); if (Type == StreamType::Unused && Loc.DataSize == 0) { // Ignore dummy streams. This is technically ill-formed, but a number of @@ -128,7 +146,7 @@ MinidumpFile::create(MemoryBufferRef Source) { return createError("Cannot handle one of the minidump streams"); // Update the directory map, checking for duplicate stream types. - if (!StreamMap.try_emplace(Type, Stream.index()).second) + if (!StreamMap.try_emplace(Type, StreamDescriptor.index()).second) return createError("Duplicate stream type"); } diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp index d84798cc6dd..b486e9f5c9a 100644 --- a/lib/Object/Object.cpp +++ b/lib/Object/Object.cpp @@ -138,7 +138,7 @@ LLVMBinaryRef LLVMMachOUniversalBinaryCopyObjectForArch(LLVMBinaryRef BR, char **ErrorMessage) { auto universal = cast(unwrap(BR)); Expected> ObjOrErr( - universal->getObjectForArch({Arch, ArchLen})); + universal->getMachOObjectForArch({Arch, ArchLen})); if (!ObjOrErr) { *ErrorMessage = strdup(toString(ObjOrErr.takeError()).c_str()); return nullptr; @@ -251,10 +251,10 @@ void LLVMMoveToNextSymbol(LLVMSymbolIteratorRef SI) { // SectionRef accessors const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) { - StringRef ret; - if (std::error_code ec = (*unwrap(SI))->getName(ret)) - report_fatal_error(ec.message()); - return ret.data(); + auto NameOrErr = (*unwrap(SI))->getName(); + if (!NameOrErr) + report_fatal_error(NameOrErr.takeError()); + return NameOrErr->data(); } uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) { diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp index 101f5dcc082..e0e63a5a7d7 100644 --- a/lib/Object/ObjectFile.cpp +++ b/lib/Object/ObjectFile.cpp @@ -67,8 +67,10 @@ Error ObjectFile::printSymbolName(raw_ostream &OS, DataRefImpl Symb) const { uint32_t ObjectFile::getSymbolAlignment(DataRefImpl DRI) const { return 0; } bool ObjectFile::isSectionBitcode(DataRefImpl Sec) const { - if (Expected NameOrErr = getSectionName(Sec)) + Expected NameOrErr = getSectionName(Sec); + if (NameOrErr) return *NameOrErr == ".llvmbc"; + consumeError(NameOrErr.takeError()); return false; } @@ -82,7 +84,8 @@ bool ObjectFile::isBerkeleyData(DataRefImpl Sec) const { return isSectionData(Sec); } -section_iterator ObjectFile::getRelocatedSection(DataRefImpl Sec) const { +Expected +ObjectFile::getRelocatedSection(DataRefImpl Sec) const { return section_iterator(SectionRef(Sec, this)); } @@ -103,7 +106,7 @@ Triple ObjectFile::makeTriple() const { TheTriple.setObjectFormat(Triple::MachO); if (isCOFF()) { - const auto COFFObj = dyn_cast(this); + const auto COFFObj = cast(this); if (COFFObj->getArch() == Triple::thumb) TheTriple.setTriple("thumbv7-windows"); } @@ -127,6 +130,8 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type) { case file_magic::pdb: case file_magic::minidump: return errorCodeToError(object_error::invalid_file_type); + case file_magic::tapi_file: + return errorCodeToError(object_error::invalid_file_type); case file_magic::elf: case file_magic::elf_relocatable: case file_magic::elf_executable: diff --git a/lib/Object/RelocationResolver.cpp b/lib/Object/RelocationResolver.cpp index 0a243f32e12..ca89f5671b8 100644 --- a/lib/Object/RelocationResolver.cpp +++ b/lib/Object/RelocationResolver.cpp @@ -30,6 +30,7 @@ static bool supportsX86_64(uint64_t Type) { case ELF::R_X86_64_DTPOFF32: case ELF::R_X86_64_DTPOFF64: case ELF::R_X86_64_PC32: + case ELF::R_X86_64_PC64: case ELF::R_X86_64_32: case ELF::R_X86_64_32S: return true; @@ -47,6 +48,7 @@ static uint64_t resolveX86_64(RelocationRef R, uint64_t S, uint64_t A) { case ELF::R_X86_64_DTPOFF64: return S + getELFAddend(R); case ELF::R_X86_64_PC32: + case ELF::R_X86_64_PC64: return S + getELFAddend(R) - R.getOffset(); case ELF::R_X86_64_32: case ELF::R_X86_64_32S: @@ -90,9 +92,9 @@ static bool supportsBPF(uint64_t Type) { static uint64_t resolveBPF(RelocationRef R, uint64_t S, uint64_t A) { switch (R.getType()) { case ELF::R_BPF_64_32: - return S & 0xFFFFFFFF; + return (S + A) & 0xFFFFFFFF; case ELF::R_BPF_64_64: - return S; + return S + A; default: llvm_unreachable("Invalid relocation type"); } @@ -335,6 +337,8 @@ static bool supportsRISCV(uint64_t Type) { case ELF::R_RISCV_NONE: case ELF::R_RISCV_32: case ELF::R_RISCV_64: + case ELF::R_RISCV_SET6: + case ELF::R_RISCV_SUB6: case ELF::R_RISCV_ADD8: case ELF::R_RISCV_SUB8: case ELF::R_RISCV_ADD16: @@ -358,6 +362,10 @@ static uint64_t resolveRISCV(RelocationRef R, uint64_t S, uint64_t A) { return (S + RA) & 0xFFFFFFFF; case ELF::R_RISCV_64: return S + RA; + case ELF::R_RISCV_SET6: + return (A + (S + RA)) & 0xFF; + case ELF::R_RISCV_SUB6: + return (A - (S + RA)) & 0xFF; case ELF::R_RISCV_ADD8: return (A + (S + RA)) & 0xFF; case ELF::R_RISCV_SUB8: @@ -420,6 +428,47 @@ static uint64_t resolveCOFFX86_64(RelocationRef R, uint64_t S, uint64_t A) { } } +static bool supportsCOFFARM(uint64_t Type) { + switch (Type) { + case COFF::IMAGE_REL_ARM_SECREL: + case COFF::IMAGE_REL_ARM_ADDR32: + return true; + default: + return false; + } +} + +static uint64_t resolveCOFFARM(RelocationRef R, uint64_t S, uint64_t A) { + switch (R.getType()) { + case COFF::IMAGE_REL_ARM_SECREL: + case COFF::IMAGE_REL_ARM_ADDR32: + return (S + A) & 0xFFFFFFFF; + default: + llvm_unreachable("Invalid relocation type"); + } +} + +static bool supportsCOFFARM64(uint64_t Type) { + switch (Type) { + case COFF::IMAGE_REL_ARM64_SECREL: + case COFF::IMAGE_REL_ARM64_ADDR64: + return true; + default: + return false; + } +} + +static uint64_t resolveCOFFARM64(RelocationRef R, uint64_t S, uint64_t A) { + switch (R.getType()) { + case COFF::IMAGE_REL_ARM64_SECREL: + return (S + A) & 0xFFFFFFFF; + case COFF::IMAGE_REL_ARM64_ADDR64: + return S + A; + default: + llvm_unreachable("Invalid relocation type"); + } +} + static bool supportsMachOX86_64(uint64_t Type) { return Type == MachO::X86_64_RELOC_UNSIGNED; } @@ -472,9 +521,19 @@ static uint64_t resolveWasm32(RelocationRef R, uint64_t S, uint64_t A) { std::pair getRelocationResolver(const ObjectFile &Obj) { if (Obj.isCOFF()) { - if (Obj.getBytesInAddress() == 8) + switch (Obj.getArch()) { + case Triple::x86_64: return {supportsCOFFX86_64, resolveCOFFX86_64}; - return {supportsCOFFX86, resolveCOFFX86}; + case Triple::x86: + return {supportsCOFFX86, resolveCOFFX86}; + case Triple::arm: + case Triple::thumb: + return {supportsCOFFARM, resolveCOFFARM}; + case Triple::aarch64: + return {supportsCOFFARM64, resolveCOFFARM64}; + default: + return {nullptr, nullptr}; + } } else if (Obj.isELF()) { if (Obj.getBytesInAddress() == 8) { switch (Obj.getArch()) { diff --git a/lib/Object/SymbolicFile.cpp b/lib/Object/SymbolicFile.cpp index 2b152b7d8da..3db4ad9ed14 100644 --- a/lib/Object/SymbolicFile.cpp +++ b/lib/Object/SymbolicFile.cpp @@ -53,6 +53,7 @@ SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type, case file_magic::windows_resource: case file_magic::pdb: case file_magic::minidump: + case file_magic::tapi_file: return errorCodeToError(object_error::invalid_file_type); case file_magic::elf: case file_magic::elf_executable: diff --git a/lib/Object/TapiFile.cpp b/lib/Object/TapiFile.cpp new file mode 100644 index 00000000000..c409bd8e599 --- /dev/null +++ b/lib/Object/TapiFile.cpp @@ -0,0 +1,104 @@ +//===- TapiFile.cpp -------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the Text-based Dynamcic Library Stub format. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/TapiFile.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace MachO; +using namespace object; + +static constexpr StringLiteral ObjC1ClassNamePrefix = ".objc_class_name_"; +static constexpr StringLiteral ObjC2ClassNamePrefix = "_OBJC_CLASS_$_"; +static constexpr StringLiteral ObjC2MetaClassNamePrefix = "_OBJC_METACLASS_$_"; +static constexpr StringLiteral ObjC2EHTypePrefix = "_OBJC_EHTYPE_$_"; +static constexpr StringLiteral ObjC2IVarPrefix = "_OBJC_IVAR_$_"; + +static uint32_t getFlags(const Symbol *Sym) { + uint32_t Flags = BasicSymbolRef::SF_Global; + if (Sym->isUndefined()) + Flags |= BasicSymbolRef::SF_Undefined; + else + Flags |= BasicSymbolRef::SF_Exported; + + if (Sym->isWeakDefined() || Sym->isWeakReferenced()) + Flags |= BasicSymbolRef::SF_Weak; + + return Flags; +} + +TapiFile::TapiFile(MemoryBufferRef Source, const InterfaceFile &interface, + Architecture Arch) + : SymbolicFile(ID_TapiFile, Source) { + for (const auto *Symbol : interface.symbols()) { + if (!Symbol->getArchitectures().has(Arch)) + continue; + + switch (Symbol->getKind()) { + case SymbolKind::GlobalSymbol: + Symbols.emplace_back(StringRef(), Symbol->getName(), getFlags(Symbol)); + break; + case SymbolKind::ObjectiveCClass: + if (interface.getPlatforms().count(PlatformKind::macOS) && + Arch == AK_i386) { + Symbols.emplace_back(ObjC1ClassNamePrefix, Symbol->getName(), + getFlags(Symbol)); + } else { + Symbols.emplace_back(ObjC2ClassNamePrefix, Symbol->getName(), + getFlags(Symbol)); + Symbols.emplace_back(ObjC2MetaClassNamePrefix, Symbol->getName(), + getFlags(Symbol)); + } + break; + case SymbolKind::ObjectiveCClassEHType: + Symbols.emplace_back(ObjC2EHTypePrefix, Symbol->getName(), + getFlags(Symbol)); + break; + case SymbolKind::ObjectiveCInstanceVariable: + Symbols.emplace_back(ObjC2IVarPrefix, Symbol->getName(), + getFlags(Symbol)); + break; + } + } +} + +TapiFile::~TapiFile() = default; + +void TapiFile::moveSymbolNext(DataRefImpl &DRI) const { + const auto *Sym = reinterpret_cast(DRI.p); + DRI.p = reinterpret_cast(++Sym); +} + +Error TapiFile::printSymbolName(raw_ostream &OS, DataRefImpl DRI) const { + const auto *Sym = reinterpret_cast(DRI.p); + OS << Sym->Prefix << Sym->Name; + return Error::success(); +} + +uint32_t TapiFile::getSymbolFlags(DataRefImpl DRI) const { + const auto *Sym = reinterpret_cast(DRI.p); + return Sym->Flags; +} + +basic_symbol_iterator TapiFile::symbol_begin() const { + DataRefImpl DRI; + DRI.p = reinterpret_cast(&*Symbols.begin()); + return BasicSymbolRef{DRI, this}; +} + +basic_symbol_iterator TapiFile::symbol_end() const { + DataRefImpl DRI; + DRI.p = reinterpret_cast(&*Symbols.end()); + return BasicSymbolRef{DRI, this}; +} diff --git a/lib/Object/TapiUniversal.cpp b/lib/Object/TapiUniversal.cpp new file mode 100644 index 00000000000..b3273e345a6 --- /dev/null +++ b/lib/Object/TapiUniversal.cpp @@ -0,0 +1,54 @@ +//===- TapiUniversal.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the Text-based Dynamic Library Stub format. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/TapiUniversal.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/TextAPI/MachO/TextAPIReader.h" + +using namespace llvm; +using namespace MachO; +using namespace object; + +TapiUniversal::TapiUniversal(MemoryBufferRef Source, Error &Err) + : Binary(ID_TapiUniversal, Source) { + auto Result = TextAPIReader::get(Source); + ErrorAsOutParameter ErrAsOuParam(&Err); + if (!Result) { + Err = Result.takeError(); + return; + } + ParsedFile = std::move(Result.get()); + + auto Archs = ParsedFile->getArchitectures(); + for (auto Arch : Archs) + Architectures.emplace_back(Arch); +} + +TapiUniversal::~TapiUniversal() = default; + +Expected> +TapiUniversal::ObjectForArch::getAsObjectFile() const { + return std::unique_ptr(new TapiFile(Parent->getMemoryBufferRef(), + *Parent->ParsedFile.get(), + Parent->Architectures[Index])); +} + +Expected> +TapiUniversal::create(MemoryBufferRef Source) { + Error Err = Error::success(); + std::unique_ptr Ret(new TapiUniversal(Source, Err)); + if (Err) + return std::move(Err); + return std::move(Ret); +} diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp index 82aa1830dce..014b403556d 100644 --- a/lib/Object/WasmObjectFile.cpp +++ b/lib/Object/WasmObjectFile.cpp @@ -56,7 +56,7 @@ LLVM_DUMP_METHOD void WasmSymbol::dump() const { print(dbgs()); } Expected> ObjectFile::createWasmObjectFile(MemoryBufferRef Buffer) { Error Err = Error::success(); - auto ObjectFile = llvm::make_unique(Buffer, Err); + auto ObjectFile = std::make_unique(Buffer, Err); if (Err) return std::move(Err); @@ -781,7 +781,7 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) { break; case wasm::R_WASM_GLOBAL_INDEX_LEB: // R_WASM_GLOBAL_INDEX_LEB are can be used against function and data - // symbols to refer to thier GOT enties. + // symbols to refer to their GOT entries. if (!isValidGlobalSymbol(Reloc.Index) && !isValidDataSymbol(Reloc.Index) && !isValidFunctionSymbol(Reloc.Index)) @@ -881,12 +881,9 @@ Error WasmObjectFile::parseTypeSection(ReadContext &Ctx) { Sig.Params.push_back(wasm::ValType(ParamType)); } uint32_t ReturnCount = readVaruint32(Ctx); - if (ReturnCount) { - if (ReturnCount != 1) { - return make_error( - "Multiple return types not supported", object_error::parse_failed); - } - Sig.Returns.push_back(wasm::ValType(readUint8(Ctx))); + while (ReturnCount--) { + uint32_t ReturnType = readUint8(Ctx); + Sig.Returns.push_back(wasm::ValType(ReturnType)); } Signatures.push_back(std::move(Sig)); } diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp index d76e1231684..10717718b20 100644 --- a/lib/Object/WindowsResource.cpp +++ b/lib/Object/WindowsResource.cpp @@ -30,15 +30,24 @@ namespace object { if (auto EC = X) \ return EC; +#define UNWRAP_REF_OR_RETURN(Name, Expr) \ + auto Name##OrErr = Expr; \ + if (!Name##OrErr) \ + return Name##OrErr.takeError(); \ + const auto &Name = *Name##OrErr; + +#define UNWRAP_OR_RETURN(Name, Expr) \ + auto Name##OrErr = Expr; \ + if (!Name##OrErr) \ + return Name##OrErr.takeError(); \ + auto Name = *Name##OrErr; + const uint32_t MIN_HEADER_SIZE = 7 * sizeof(uint32_t) + 2 * sizeof(uint16_t); // COFF files seem to be inconsistent with alignment between sections, just use // 8-byte because it makes everyone happy. const uint32_t SECTION_ALIGNMENT = sizeof(uint64_t); -uint32_t WindowsResourceParser::TreeNode::StringCount = 0; -uint32_t WindowsResourceParser::TreeNode::DataCount = 0; - WindowsResource::WindowsResource(MemoryBufferRef Source) : Binary(Binary::ID_WinRes, Source) { size_t LeadingSize = WIN_RES_MAGIC_SIZE + WIN_RES_NULL_ENTRY_SIZE; @@ -128,7 +137,8 @@ Error ResourceEntryRef::loadNext() { return Error::success(); } -WindowsResourceParser::WindowsResourceParser() : Root(false) {} +WindowsResourceParser::WindowsResourceParser(bool MinGW) + : Root(false), MinGW(MinGW) {} void printResourceTypeName(uint16_t TypeID, raw_ostream &OS) { switch (TypeID) { @@ -200,6 +210,122 @@ static std::string makeDuplicateResourceError( return OS.str(); } +static void printStringOrID(const WindowsResourceParser::StringOrID &S, + raw_string_ostream &OS, bool IsType, bool IsID) { + if (S.IsString) { + std::string UTF8; + if (!convertUTF16LEToUTF8String(S.String, UTF8)) + UTF8 = "(failed conversion from UTF16)"; + OS << '\"' << UTF8 << '\"'; + } else if (IsType) + printResourceTypeName(S.ID, OS); + else if (IsID) + OS << "ID " << S.ID; + else + OS << S.ID; +} + +static std::string makeDuplicateResourceError( + const std::vector &Context, + StringRef File1, StringRef File2) { + std::string Ret; + raw_string_ostream OS(Ret); + + OS << "duplicate resource:"; + + if (Context.size() >= 1) { + OS << " type "; + printStringOrID(Context[0], OS, /* IsType */ true, /* IsID */ true); + } + + if (Context.size() >= 2) { + OS << "/name "; + printStringOrID(Context[1], OS, /* IsType */ false, /* IsID */ true); + } + + if (Context.size() >= 3) { + OS << "/language "; + printStringOrID(Context[2], OS, /* IsType */ false, /* IsID */ false); + } + OS << ", in " << File1 << " and in " << File2; + + return OS.str(); +} + +// MinGW specific. Remove default manifests (with language zero) if there are +// other manifests present, and report an error if there are more than one +// manifest with a non-zero language code. +// GCC has the concept of a default manifest resource object, which gets +// linked in implicitly if present. This default manifest has got language +// id zero, and should be dropped silently if there's another manifest present. +// If the user resources surprisignly had a manifest with language id zero, +// we should also ignore the duplicate default manifest. +void WindowsResourceParser::cleanUpManifests( + std::vector &Duplicates) { + auto TypeIt = Root.IDChildren.find(/* RT_MANIFEST */ 24); + if (TypeIt == Root.IDChildren.end()) + return; + + TreeNode *TypeNode = TypeIt->second.get(); + auto NameIt = + TypeNode->IDChildren.find(/* CREATEPROCESS_MANIFEST_RESOURCE_ID */ 1); + if (NameIt == TypeNode->IDChildren.end()) + return; + + TreeNode *NameNode = NameIt->second.get(); + if (NameNode->IDChildren.size() <= 1) + return; // None or one manifest present, all good. + + // If we have more than one manifest, drop the language zero one if present, + // and check again. + auto LangZeroIt = NameNode->IDChildren.find(0); + if (LangZeroIt != NameNode->IDChildren.end() && + LangZeroIt->second->IsDataNode) { + uint32_t RemovedIndex = LangZeroIt->second->DataIndex; + NameNode->IDChildren.erase(LangZeroIt); + Data.erase(Data.begin() + RemovedIndex); + Root.shiftDataIndexDown(RemovedIndex); + + // If we're now down to one manifest, all is good. + if (NameNode->IDChildren.size() <= 1) + return; + } + + // More than one non-language-zero manifest + auto FirstIt = NameNode->IDChildren.begin(); + uint32_t FirstLang = FirstIt->first; + TreeNode *FirstNode = FirstIt->second.get(); + auto LastIt = NameNode->IDChildren.rbegin(); + uint32_t LastLang = LastIt->first; + TreeNode *LastNode = LastIt->second.get(); + Duplicates.push_back( + ("duplicate non-default manifests with languages " + Twine(FirstLang) + + " in " + InputFilenames[FirstNode->Origin] + " and " + Twine(LastLang) + + " in " + InputFilenames[LastNode->Origin]) + .str()); +} + +// Ignore duplicates of manifests with language zero (the default manifest), +// in case the user has provided a manifest with that language id. See +// the function comment above for context. Only returns true if MinGW is set +// to true. +bool WindowsResourceParser::shouldIgnoreDuplicate( + const ResourceEntryRef &Entry) const { + return MinGW && !Entry.checkTypeString() && + Entry.getTypeID() == /* RT_MANIFEST */ 24 && + !Entry.checkNameString() && + Entry.getNameID() == /* CREATEPROCESS_MANIFEST_RESOURCE_ID */ 1 && + Entry.getLanguage() == 0; +} + +bool WindowsResourceParser::shouldIgnoreDuplicate( + const std::vector &Context) const { + return MinGW && Context.size() == 3 && !Context[0].IsString && + Context[0].ID == /* RT_MANIFEST */ 24 && !Context[1].IsString && + Context[1].ID == /* CREATEPROCESS_MANIFEST_RESOURCE_ID */ 1 && + !Context[2].IsString && Context[2].ID == 0; +} + Error WindowsResourceParser::parse(WindowsResource *WR, std::vector &Duplicates) { auto EntryOrErr = WR->getHeadEntry(); @@ -219,112 +345,176 @@ Error WindowsResourceParser::parse(WindowsResource *WR, } ResourceEntryRef Entry = EntryOrErr.get(); + uint32_t Origin = InputFilenames.size(); + InputFilenames.push_back(WR->getFileName()); bool End = false; while (!End) { - Data.push_back(Entry.getData()); - bool IsNewTypeString = false; - bool IsNewNameString = false; - - TreeNode* Node; - bool IsNewNode = Root.addEntry(Entry, InputFilenames.size(), - IsNewTypeString, IsNewNameString, Node); - InputFilenames.push_back(WR->getFileName()); + TreeNode *Node; + bool IsNewNode = Root.addEntry(Entry, Origin, Data, StringTable, Node); if (!IsNewNode) { - Duplicates.push_back(makeDuplicateResourceError( - Entry, InputFilenames[Node->Origin], WR->getFileName())); + if (!shouldIgnoreDuplicate(Entry)) + Duplicates.push_back(makeDuplicateResourceError( + Entry, InputFilenames[Node->Origin], WR->getFileName())); } - if (IsNewTypeString) - StringTable.push_back(Entry.getTypeString()); - - if (IsNewNameString) - StringTable.push_back(Entry.getNameString()); - RETURN_IF_ERROR(Entry.moveNext(End)); } return Error::success(); } +Error WindowsResourceParser::parse(ResourceSectionRef &RSR, StringRef Filename, + std::vector &Duplicates) { + UNWRAP_REF_OR_RETURN(BaseTable, RSR.getBaseTable()); + uint32_t Origin = InputFilenames.size(); + InputFilenames.push_back(Filename); + std::vector Context; + return addChildren(Root, RSR, BaseTable, Origin, Context, Duplicates); +} + void WindowsResourceParser::printTree(raw_ostream &OS) const { ScopedPrinter Writer(OS); Root.print(Writer, "Resource Tree"); } -bool WindowsResourceParser::TreeNode::addEntry(const ResourceEntryRef &Entry, - uint32_t Origin, - bool &IsNewTypeString, - bool &IsNewNameString, - TreeNode *&Result) { - TreeNode &TypeNode = addTypeNode(Entry, IsNewTypeString); - TreeNode &NameNode = TypeNode.addNameNode(Entry, IsNewNameString); - return NameNode.addLanguageNode(Entry, Origin, Result); +bool WindowsResourceParser::TreeNode::addEntry( + const ResourceEntryRef &Entry, uint32_t Origin, + std::vector> &Data, + std::vector> &StringTable, TreeNode *&Result) { + TreeNode &TypeNode = addTypeNode(Entry, StringTable); + TreeNode &NameNode = TypeNode.addNameNode(Entry, StringTable); + return NameNode.addLanguageNode(Entry, Origin, Data, Result); } -WindowsResourceParser::TreeNode::TreeNode(bool IsStringNode) { - if (IsStringNode) - StringIndex = StringCount++; +Error WindowsResourceParser::addChildren(TreeNode &Node, + ResourceSectionRef &RSR, + const coff_resource_dir_table &Table, + uint32_t Origin, + std::vector &Context, + std::vector &Duplicates) { + + for (int i = 0; i < Table.NumberOfNameEntries + Table.NumberOfIDEntries; + i++) { + UNWRAP_REF_OR_RETURN(Entry, RSR.getTableEntry(Table, i)); + TreeNode *Child; + + if (Entry.Offset.isSubDir()) { + + // Create a new subdirectory and recurse + if (i < Table.NumberOfNameEntries) { + UNWRAP_OR_RETURN(NameString, RSR.getEntryNameString(Entry)); + Child = &Node.addNameChild(NameString, StringTable); + Context.push_back(StringOrID(NameString)); + } else { + Child = &Node.addIDChild(Entry.Identifier.ID); + Context.push_back(StringOrID(Entry.Identifier.ID)); + } + + UNWRAP_REF_OR_RETURN(NextTable, RSR.getEntrySubDir(Entry)); + Error E = + addChildren(*Child, RSR, NextTable, Origin, Context, Duplicates); + if (E) + return E; + Context.pop_back(); + + } else { + + // Data leaves are supposed to have a numeric ID as identifier (language). + if (Table.NumberOfNameEntries > 0) + return createStringError(object_error::parse_failed, + "unexpected string key for data object"); + + // Try adding a data leaf + UNWRAP_REF_OR_RETURN(DataEntry, RSR.getEntryData(Entry)); + TreeNode *Child; + Context.push_back(StringOrID(Entry.Identifier.ID)); + bool Added = Node.addDataChild(Entry.Identifier.ID, Table.MajorVersion, + Table.MinorVersion, Table.Characteristics, + Origin, Data.size(), Child); + if (Added) { + UNWRAP_OR_RETURN(Contents, RSR.getContents(DataEntry)); + Data.push_back(ArrayRef( + reinterpret_cast(Contents.data()), + Contents.size())); + } else { + if (!shouldIgnoreDuplicate(Context)) + Duplicates.push_back(makeDuplicateResourceError( + Context, InputFilenames[Child->Origin], InputFilenames.back())); + } + Context.pop_back(); + + } + } + return Error::success(); } +WindowsResourceParser::TreeNode::TreeNode(uint32_t StringIndex) + : StringIndex(StringIndex) {} + WindowsResourceParser::TreeNode::TreeNode(uint16_t MajorVersion, uint16_t MinorVersion, uint32_t Characteristics, - uint32_t Origin) - : IsDataNode(true), MajorVersion(MajorVersion), MinorVersion(MinorVersion), - Characteristics(Characteristics), Origin(Origin) { - DataIndex = DataCount++; -} + uint32_t Origin, uint32_t DataIndex) + : IsDataNode(true), DataIndex(DataIndex), MajorVersion(MajorVersion), + MinorVersion(MinorVersion), Characteristics(Characteristics), + Origin(Origin) {} std::unique_ptr -WindowsResourceParser::TreeNode::createStringNode() { - return std::unique_ptr(new TreeNode(true)); +WindowsResourceParser::TreeNode::createStringNode(uint32_t Index) { + return std::unique_ptr(new TreeNode(Index)); } std::unique_ptr WindowsResourceParser::TreeNode::createIDNode() { - return std::unique_ptr(new TreeNode(false)); + return std::unique_ptr(new TreeNode(0)); } std::unique_ptr WindowsResourceParser::TreeNode::createDataNode(uint16_t MajorVersion, uint16_t MinorVersion, uint32_t Characteristics, - uint32_t Origin) { - return std::unique_ptr( - new TreeNode(MajorVersion, MinorVersion, Characteristics, Origin)); + uint32_t Origin, + uint32_t DataIndex) { + return std::unique_ptr(new TreeNode( + MajorVersion, MinorVersion, Characteristics, Origin, DataIndex)); } -WindowsResourceParser::TreeNode & -WindowsResourceParser::TreeNode::addTypeNode(const ResourceEntryRef &Entry, - bool &IsNewTypeString) { +WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addTypeNode( + const ResourceEntryRef &Entry, + std::vector> &StringTable) { if (Entry.checkTypeString()) - return addNameChild(Entry.getTypeString(), IsNewTypeString); + return addNameChild(Entry.getTypeString(), StringTable); else return addIDChild(Entry.getTypeID()); } -WindowsResourceParser::TreeNode & -WindowsResourceParser::TreeNode::addNameNode(const ResourceEntryRef &Entry, - bool &IsNewNameString) { +WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addNameNode( + const ResourceEntryRef &Entry, + std::vector> &StringTable) { if (Entry.checkNameString()) - return addNameChild(Entry.getNameString(), IsNewNameString); + return addNameChild(Entry.getNameString(), StringTable); else return addIDChild(Entry.getNameID()); } bool WindowsResourceParser::TreeNode::addLanguageNode( - const ResourceEntryRef &Entry, uint32_t Origin, TreeNode *&Result) { - return addDataChild(Entry.getLanguage(), Entry.getMajorVersion(), - Entry.getMinorVersion(), Entry.getCharacteristics(), - Origin, Result); + const ResourceEntryRef &Entry, uint32_t Origin, + std::vector> &Data, TreeNode *&Result) { + bool Added = addDataChild(Entry.getLanguage(), Entry.getMajorVersion(), + Entry.getMinorVersion(), Entry.getCharacteristics(), + Origin, Data.size(), Result); + if (Added) + Data.push_back(Entry.getData()); + return Added; } bool WindowsResourceParser::TreeNode::addDataChild( uint32_t ID, uint16_t MajorVersion, uint16_t MinorVersion, - uint32_t Characteristics, uint32_t Origin, TreeNode *&Result) { - auto NewChild = - createDataNode(MajorVersion, MinorVersion, Characteristics, Origin); + uint32_t Characteristics, uint32_t Origin, uint32_t DataIndex, + TreeNode *&Result) { + auto NewChild = createDataNode(MajorVersion, MinorVersion, Characteristics, + Origin, DataIndex); auto ElementInserted = IDChildren.emplace(ID, std::move(NewChild)); Result = ElementInserted.first->second.get(); return ElementInserted.second; @@ -342,16 +532,15 @@ WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addIDChild( return *(Child->second); } -WindowsResourceParser::TreeNode & -WindowsResourceParser::TreeNode::addNameChild(ArrayRef NameRef, - bool &IsNewString) { +WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addNameChild( + ArrayRef NameRef, std::vector> &StringTable) { std::string NameString; convertUTF16LEToUTF8String(NameRef, NameString); auto Child = StringChildren.find(NameString); if (Child == StringChildren.end()) { - auto NewChild = createStringNode(); - IsNewString = true; + auto NewChild = createStringNode(StringTable.size()); + StringTable.push_back(NameRef); WindowsResourceParser::TreeNode &Node = *NewChild; StringChildren.emplace(NameString, std::move(NewChild)); return Node; @@ -396,6 +585,19 @@ uint32_t WindowsResourceParser::TreeNode::getTreeSize() const { return Size; } +// Shift DataIndex of all data children with an Index greater or equal to the +// given one, to fill a gap from removing an entry from the Data vector. +void WindowsResourceParser::TreeNode::shiftDataIndexDown(uint32_t Index) { + if (IsDataNode && DataIndex >= Index) { + DataIndex--; + } else { + for (auto &Child : IDChildren) + Child.second->shiftDataIndexDown(Index); + for (auto &Child : StringChildren) + Child.second->shiftDataIndexDown(Index); + } +} + class WindowsResourceCOFFWriter { public: WindowsResourceCOFFWriter(COFF::MachineTypes MachineType, @@ -515,6 +717,14 @@ WindowsResourceCOFFWriter::write(uint32_t TimeDateStamp) { return std::move(OutputBuffer); } +// According to COFF specification, if the Src has a size equal to Dest, +// it's okay to *not* copy the trailing zero. +static void coffnamecpy(char (&Dest)[COFF::NameSize], StringRef Src) { + assert(Src.size() <= COFF::NameSize && + "Src is not larger than COFF::NameSize"); + strncpy(Dest, Src.data(), (size_t)COFF::NameSize); +} + void WindowsResourceCOFFWriter::writeCOFFHeader(uint32_t TimeDateStamp) { // Write the COFF header. auto *Header = reinterpret_cast(BufferStart); @@ -534,7 +744,7 @@ void WindowsResourceCOFFWriter::writeFirstSectionHeader() { CurrentOffset += sizeof(coff_file_header); auto *SectionOneHeader = reinterpret_cast(BufferStart + CurrentOffset); - strncpy(SectionOneHeader->Name, ".rsrc$01", (size_t)COFF::NameSize); + coffnamecpy(SectionOneHeader->Name, ".rsrc$01"); SectionOneHeader->VirtualSize = 0; SectionOneHeader->VirtualAddress = 0; SectionOneHeader->SizeOfRawData = SectionOneSize; @@ -552,7 +762,7 @@ void WindowsResourceCOFFWriter::writeSecondSectionHeader() { CurrentOffset += sizeof(coff_section); auto *SectionTwoHeader = reinterpret_cast(BufferStart + CurrentOffset); - strncpy(SectionTwoHeader->Name, ".rsrc$02", (size_t)COFF::NameSize); + coffnamecpy(SectionTwoHeader->Name, ".rsrc$02"); SectionTwoHeader->VirtualSize = 0; SectionTwoHeader->VirtualAddress = 0; SectionTwoHeader->SizeOfRawData = SectionTwoSize; @@ -590,7 +800,7 @@ void WindowsResourceCOFFWriter::writeSymbolTable() { // Now write the symbol table. // First, the feat symbol. auto *Symbol = reinterpret_cast(BufferStart + CurrentOffset); - strncpy(Symbol->Name.ShortName, "@feat.00", (size_t)COFF::NameSize); + coffnamecpy(Symbol->Name.ShortName, "@feat.00"); Symbol->Value = 0x11; Symbol->SectionNumber = 0xffff; Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; @@ -600,7 +810,7 @@ void WindowsResourceCOFFWriter::writeSymbolTable() { // Now write the .rsrc1 symbol + aux. Symbol = reinterpret_cast(BufferStart + CurrentOffset); - strncpy(Symbol->Name.ShortName, ".rsrc$01", (size_t)COFF::NameSize); + coffnamecpy(Symbol->Name.ShortName, ".rsrc$01"); Symbol->Value = 0; Symbol->SectionNumber = 1; Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; @@ -619,7 +829,7 @@ void WindowsResourceCOFFWriter::writeSymbolTable() { // Now write the .rsrc2 symbol + aux. Symbol = reinterpret_cast(BufferStart + CurrentOffset); - strncpy(Symbol->Name.ShortName, ".rsrc$02", (size_t)COFF::NameSize); + coffnamecpy(Symbol->Name.ShortName, ".rsrc$02"); Symbol->Value = 0; Symbol->SectionNumber = 2; Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; @@ -640,7 +850,7 @@ void WindowsResourceCOFFWriter::writeSymbolTable() { for (unsigned i = 0; i < Data.size(); i++) { auto RelocationName = formatv("$R{0:X-6}", i & 0xffffff).sstr(); Symbol = reinterpret_cast(BufferStart + CurrentOffset); - memcpy(Symbol->Name.ShortName, RelocationName.data(), (size_t) COFF::NameSize); + coffnamecpy(Symbol->Name.ShortName, RelocationName); Symbol->Value = DataOffsets[i]; Symbol->SectionNumber = 2; Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; diff --git a/lib/Object/XCOFFObjectFile.cpp b/lib/Object/XCOFFObjectFile.cpp index 602b7357986..98782c2701c 100644 --- a/lib/Object/XCOFFObjectFile.cpp +++ b/lib/Object/XCOFFObjectFile.cpp @@ -11,17 +11,14 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/XCOFFObjectFile.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" #include #include namespace llvm { namespace object { +enum { FUNCTION_SYM = 0x20, SYM_TYPE_MASK = 0x07, RELOC_OVERFLOW = 65535 }; + // Checks that [Ptr, Ptr + Size) bytes fall inside the memory buffer // 'M'. Returns a pointer to the underlying object on success. template @@ -42,10 +39,25 @@ template static const T *viewAs(uintptr_t in) { return reinterpret_cast(in); } -static StringRef generateStringRef(const char *Name, uint64_t Size) { - auto NulCharPtr = static_cast(memchr(Name, '\0', Size)); +static StringRef generateXCOFFFixedNameStringRef(const char *Name) { + auto NulCharPtr = + static_cast(memchr(Name, '\0', XCOFF::NameSize)); return NulCharPtr ? StringRef(Name, NulCharPtr - Name) - : StringRef(Name, Size); + : StringRef(Name, XCOFF::NameSize); +} + +bool XCOFFRelocation32::isRelocationSigned() const { + return Info & XR_SIGN_INDICATOR_MASK; +} + +bool XCOFFRelocation32::isFixupIndicated() const { + return Info & XR_FIXUP_INDICATOR_MASK; +} + +uint8_t XCOFFRelocation32::getRelocatedLength() const { + // The relocation encodes the bit length being relocated minus 1. Add back + // the 1 to get the actual length being relocated. + return (Info & XR_BIASED_LENGTH_MASK) + 1; } void XCOFFObjectFile::checkSectionAddress(uintptr_t Addr, @@ -83,6 +95,9 @@ XCOFFObjectFile::toSection64(DataRefImpl Ref) const { const XCOFFSymbolEntry *XCOFFObjectFile::toSymbolEntry(DataRefImpl Ref) const { assert(!is64Bit() && "Symbol table support not implemented for 64-bit."); assert(Ref.p != 0 && "Symbol table pointer can not be nullptr!"); +#ifndef NDEBUG + checkSymbolEntryPointer(Ref.p); +#endif auto SymEntPtr = viewAs(Ref.p); return SymEntPtr; } @@ -112,23 +127,19 @@ XCOFFObjectFile::sectionHeaderTable64() const { void XCOFFObjectFile::moveSymbolNext(DataRefImpl &Symb) const { const XCOFFSymbolEntry *SymEntPtr = toSymbolEntry(Symb); SymEntPtr += SymEntPtr->NumberOfAuxEntries + 1; +#ifndef NDEBUG + // This function is used by basic_symbol_iterator, which allows to + // point to the end-of-symbol-table address. + if (reinterpret_cast(SymEntPtr) != getEndOfSymbolTableAddress()) + checkSymbolEntryPointer(reinterpret_cast(SymEntPtr)); +#endif Symb.p = reinterpret_cast(SymEntPtr); } -Expected XCOFFObjectFile::getSymbolName(DataRefImpl Symb) const { - const XCOFFSymbolEntry *SymEntPtr = toSymbolEntry(Symb); - - if (SymEntPtr->NameInStrTbl.Magic != XCOFFSymbolEntry::NAME_IN_STR_TBL_MAGIC) - return generateStringRef(SymEntPtr->SymbolName, XCOFF::SymbolNameSize); - - // A storage class value with the high-order bit on indicates that the name is - // a symbolic debugger stabstring. - if (SymEntPtr->StorageClass & 0x80) - return StringRef("Unimplemented Debug Name"); - - uint32_t Offset = SymEntPtr->NameInStrTbl.Offset; - // The byte offset is relative to the start of the string table - // or .debug section. A byte offset value of 0 is a null or zero-length symbol +Expected +XCOFFObjectFile::getStringTableEntry(uint32_t Offset) const { + // The byte offset is relative to the start of the string table. + // A byte offset value of 0 is a null or zero-length symbol // name. A byte offset in the range 1 to 3 (inclusive) points into the length // field; as a soft-error recovery mechanism, we treat such cases as having an // offset of 0. @@ -138,10 +149,32 @@ Expected XCOFFObjectFile::getSymbolName(DataRefImpl Symb) const { if (StringTable.Data != nullptr && StringTable.Size > Offset) return (StringTable.Data + Offset); - return make_error("Symbol Name parse failed", + return make_error("Bad offset for string table entry", object_error::parse_failed); } +Expected +XCOFFObjectFile::getCFileName(const XCOFFFileAuxEnt *CFileEntPtr) const { + if (CFileEntPtr->NameInStrTbl.Magic != + XCOFFSymbolEntry::NAME_IN_STR_TBL_MAGIC) + return generateXCOFFFixedNameStringRef(CFileEntPtr->Name); + return getStringTableEntry(CFileEntPtr->NameInStrTbl.Offset); +} + +Expected XCOFFObjectFile::getSymbolName(DataRefImpl Symb) const { + const XCOFFSymbolEntry *SymEntPtr = toSymbolEntry(Symb); + + // A storage class value with the high-order bit on indicates that the name is + // a symbolic debugger stabstring. + if (SymEntPtr->StorageClass & 0x80) + return StringRef("Unimplemented Debug Name"); + + if (SymEntPtr->NameInStrTbl.Magic != XCOFFSymbolEntry::NAME_IN_STR_TBL_MAGIC) + return generateXCOFFFixedNameStringRef(SymEntPtr->SymbolName); + + return getStringTableEntry(SymEntPtr->NameInStrTbl.Offset); +} + Expected XCOFFObjectFile::getSymbolAddress(DataRefImpl Symb) const { uint64_t Result = 0; llvm_unreachable("Not yet implemented!"); @@ -149,6 +182,7 @@ Expected XCOFFObjectFile::getSymbolAddress(DataRefImpl Symb) const { } uint64_t XCOFFObjectFile::getSymbolValueImpl(DataRefImpl Symb) const { + assert(!is64Bit() && "Symbol table support not implemented for 64-bit."); return toSymbolEntry(Symb)->Value; } @@ -185,7 +219,7 @@ void XCOFFObjectFile::moveSectionNext(DataRefImpl &Sec) const { } Expected XCOFFObjectFile::getSectionName(DataRefImpl Sec) const { - return generateStringRef(getSectionNameInternal(Sec), XCOFF::SectionNameSize); + return generateXCOFFFixedNameStringRef(getSectionNameInternal(Sec)); } uint64_t XCOFFObjectFile::getSectionAddress(DataRefImpl Sec) const { @@ -393,8 +427,8 @@ XCOFFObjectFile::getSymbolSectionName(const XCOFFSymbolEntry *SymEntPtr) const { default: Expected SecRef = getSectionByNum(SectionNum); if (SecRef) - return generateStringRef(getSectionNameInternal(SecRef.get()), - XCOFF::SectionNameSize); + return generateXCOFFFixedNameStringRef( + getSectionNameInternal(SecRef.get())); return SecRef.takeError(); } } @@ -442,6 +476,48 @@ uint32_t XCOFFObjectFile::getNumberOfSymbolTableEntries64() const { return fileHeader64()->NumberOfSymTableEntries; } +uintptr_t XCOFFObjectFile::getEndOfSymbolTableAddress() const { + uint32_t NumberOfSymTableEntries = + is64Bit() ? getNumberOfSymbolTableEntries64() + : getLogicalNumberOfSymbolTableEntries32(); + return getWithOffset(reinterpret_cast(SymbolTblPtr), + XCOFF::SymbolTableEntrySize * NumberOfSymTableEntries); +} + +void XCOFFObjectFile::checkSymbolEntryPointer(uintptr_t SymbolEntPtr) const { + if (SymbolEntPtr < reinterpret_cast(SymbolTblPtr)) + report_fatal_error("Symbol table entry is outside of symbol table."); + + if (SymbolEntPtr >= getEndOfSymbolTableAddress()) + report_fatal_error("Symbol table entry is outside of symbol table."); + + ptrdiff_t Offset = reinterpret_cast(SymbolEntPtr) - + reinterpret_cast(SymbolTblPtr); + + if (Offset % XCOFF::SymbolTableEntrySize != 0) + report_fatal_error( + "Symbol table entry position is not valid inside of symbol table."); +} + +uint32_t XCOFFObjectFile::getSymbolIndex(uintptr_t SymbolEntPtr) const { + return (reinterpret_cast(SymbolEntPtr) - + reinterpret_cast(SymbolTblPtr)) / + XCOFF::SymbolTableEntrySize; +} + +Expected +XCOFFObjectFile::getSymbolNameByIndex(uint32_t Index) const { + if (is64Bit()) + report_fatal_error("64-bit symbol table support not implemented yet."); + + if (Index >= getLogicalNumberOfSymbolTableEntries32()) + return errorCodeToError(object_error::invalid_symbol_index); + + DataRefImpl SymDRI; + SymDRI.p = reinterpret_cast(getPointerToSymbolTable() + Index); + return getSymbolName(SymDRI); +} + uint16_t XCOFFObjectFile::getFlags() const { return is64Bit() ? fileHeader64()->Flags : fileHeader32()->Flags; } @@ -477,6 +553,46 @@ ArrayRef XCOFFObjectFile::sections32() const { TablePtr + getNumberOfSections()); } +// In an XCOFF32 file, when the field value is 65535, then an STYP_OVRFLO +// section header contains the actual count of relocation entries in the s_paddr +// field. STYP_OVRFLO headers contain the section index of their corresponding +// sections as their raw "NumberOfRelocations" field value. +Expected XCOFFObjectFile::getLogicalNumberOfRelocationEntries( + const XCOFFSectionHeader32 &Sec) const { + + uint16_t SectionIndex = &Sec - sectionHeaderTable32() + 1; + + if (Sec.NumberOfRelocations < RELOC_OVERFLOW) + return Sec.NumberOfRelocations; + for (const auto &Sec : sections32()) { + if (Sec.Flags == XCOFF::STYP_OVRFLO && + Sec.NumberOfRelocations == SectionIndex) + return Sec.PhysicalAddress; + } + return errorCodeToError(object_error::parse_failed); +} + +Expected> +XCOFFObjectFile::relocations(const XCOFFSectionHeader32 &Sec) const { + uintptr_t RelocAddr = getWithOffset(reinterpret_cast(FileHeader), + Sec.FileOffsetToRelocationInfo); + auto NumRelocEntriesOrErr = getLogicalNumberOfRelocationEntries(Sec); + if (Error E = NumRelocEntriesOrErr.takeError()) + return std::move(E); + + uint32_t NumRelocEntries = NumRelocEntriesOrErr.get(); + + auto RelocationOrErr = + getObject(Data, reinterpret_cast(RelocAddr), + NumRelocEntries * sizeof(XCOFFRelocation32)); + if (Error E = RelocationOrErr.takeError()) + return std::move(E); + + const XCOFFRelocation32 *StartReloc = RelocationOrErr.get(); + + return ArrayRef(StartReloc, StartReloc + NumRelocEntries); +} + Expected XCOFFObjectFile::parseStringTable(const XCOFFObjectFile *Obj, uint64_t Offset) { // If there is a string table, then the buffer must contain at least 4 bytes @@ -507,7 +623,7 @@ XCOFFObjectFile::parseStringTable(const XCOFFObjectFile *Obj, uint64_t Offset) { Expected> XCOFFObjectFile::create(unsigned Type, MemoryBufferRef MBR) { - // Can't use make_unique because of the private constructor. + // Can't use std::make_unique because of the private constructor. std::unique_ptr Obj; Obj.reset(new XCOFFObjectFile(Type, MBR)); @@ -573,11 +689,77 @@ ObjectFile::createXCOFFObjectFile(MemoryBufferRef MemBufRef, } StringRef XCOFFSectionHeader32::getName() const { - return generateStringRef(Name, XCOFF::SectionNameSize); + return generateXCOFFFixedNameStringRef(Name); } StringRef XCOFFSectionHeader64::getName() const { - return generateStringRef(Name, XCOFF::SectionNameSize); + return generateXCOFFFixedNameStringRef(Name); +} + +XCOFF::StorageClass XCOFFSymbolRef::getStorageClass() const { + return OwningObjectPtr->toSymbolEntry(SymEntDataRef)->StorageClass; +} + +uint8_t XCOFFSymbolRef::getNumberOfAuxEntries() const { + return OwningObjectPtr->toSymbolEntry(SymEntDataRef)->NumberOfAuxEntries; +} + +const XCOFFCsectAuxEnt32 *XCOFFSymbolRef::getXCOFFCsectAuxEnt32() const { + assert(!OwningObjectPtr->is64Bit() && + "32-bit interface called on 64-bit object file."); + assert(hasCsectAuxEnt() && "No Csect Auxiliary Entry is found."); + + // In XCOFF32, the csect auxilliary entry is always the last auxiliary + // entry for the symbol. + uintptr_t AuxAddr = getWithOffset( + SymEntDataRef.p, XCOFF::SymbolTableEntrySize * getNumberOfAuxEntries()); + +#ifndef NDEBUG + OwningObjectPtr->checkSymbolEntryPointer(AuxAddr); +#endif + + return reinterpret_cast(AuxAddr); +} + +uint16_t XCOFFSymbolRef::getType() const { + return OwningObjectPtr->toSymbolEntry(SymEntDataRef)->SymbolType; +} + +int16_t XCOFFSymbolRef::getSectionNumber() const { + return OwningObjectPtr->toSymbolEntry(SymEntDataRef)->SectionNumber; +} + +bool XCOFFSymbolRef::hasCsectAuxEnt() const { + XCOFF::StorageClass SC = getStorageClass(); + return (SC == XCOFF::C_EXT || SC == XCOFF::C_WEAKEXT || + SC == XCOFF::C_HIDEXT); +} + +bool XCOFFSymbolRef::isFunction() const { + if (OwningObjectPtr->is64Bit()) + report_fatal_error("64-bit support is unimplemented yet."); + + if (getType() & FUNCTION_SYM) + return true; + + if (!hasCsectAuxEnt()) + return false; + + const XCOFFCsectAuxEnt32 *CsectAuxEnt = getXCOFFCsectAuxEnt32(); + + // A function definition should be a label definition. + if ((CsectAuxEnt->SymbolAlignmentAndType & SYM_TYPE_MASK) != XCOFF::XTY_LD) + return false; + + if (CsectAuxEnt->StorageMappingClass != XCOFF::XMC_PR) + return false; + + int16_t SectNum = getSectionNumber(); + Expected SI = OwningObjectPtr->getSectionByNum(SectNum); + if (!SI) + return false; + + return (OwningObjectPtr->getSectionFlags(SI.get()) & XCOFF::STYP_TEXT); } } // namespace object diff --git a/lib/ObjectYAML/COFFEmitter.cpp b/lib/ObjectYAML/COFFEmitter.cpp new file mode 100644 index 00000000000..efcdc51e167 --- /dev/null +++ b/lib/ObjectYAML/COFFEmitter.cpp @@ -0,0 +1,622 @@ +//===- yaml2coff - Convert YAML to a COFF object file ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// The COFF component of yaml2obj. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" +#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" +#include "llvm/Object/COFF.h" +#include "llvm/ObjectYAML/ObjectYAML.h" +#include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +namespace { + +/// This parses a yaml stream that represents a COFF object file. +/// See docs/yaml2obj for the yaml scheema. +struct COFFParser { + COFFParser(COFFYAML::Object &Obj, yaml::ErrorHandler EH) + : Obj(Obj), SectionTableStart(0), SectionTableSize(0), ErrHandler(EH) { + // A COFF string table always starts with a 4 byte size field. Offsets into + // it include this size, so allocate it now. + StringTable.append(4, char(0)); + } + + bool useBigObj() const { + return static_cast(Obj.Sections.size()) > + COFF::MaxNumberOfSections16; + } + + bool isPE() const { return Obj.OptionalHeader.hasValue(); } + bool is64Bit() const { + return Obj.Header.Machine == COFF::IMAGE_FILE_MACHINE_AMD64 || + Obj.Header.Machine == COFF::IMAGE_FILE_MACHINE_ARM64; + } + + uint32_t getFileAlignment() const { + return Obj.OptionalHeader->Header.FileAlignment; + } + + unsigned getHeaderSize() const { + return useBigObj() ? COFF::Header32Size : COFF::Header16Size; + } + + unsigned getSymbolSize() const { + return useBigObj() ? COFF::Symbol32Size : COFF::Symbol16Size; + } + + bool parseSections() { + for (std::vector::iterator i = Obj.Sections.begin(), + e = Obj.Sections.end(); + i != e; ++i) { + COFFYAML::Section &Sec = *i; + + // If the name is less than 8 bytes, store it in place, otherwise + // store it in the string table. + StringRef Name = Sec.Name; + + if (Name.size() <= COFF::NameSize) { + std::copy(Name.begin(), Name.end(), Sec.Header.Name); + } else { + // Add string to the string table and format the index for output. + unsigned Index = getStringIndex(Name); + std::string str = utostr(Index); + if (str.size() > 7) { + ErrHandler("string table got too large"); + return false; + } + Sec.Header.Name[0] = '/'; + std::copy(str.begin(), str.end(), Sec.Header.Name + 1); + } + + if (Sec.Alignment) { + if (Sec.Alignment > 8192) { + ErrHandler("section alignment is too large"); + return false; + } + if (!isPowerOf2_32(Sec.Alignment)) { + ErrHandler("section alignment is not a power of 2"); + return false; + } + Sec.Header.Characteristics |= (Log2_32(Sec.Alignment) + 1) << 20; + } + } + return true; + } + + bool parseSymbols() { + for (std::vector::iterator i = Obj.Symbols.begin(), + e = Obj.Symbols.end(); + i != e; ++i) { + COFFYAML::Symbol &Sym = *i; + + // If the name is less than 8 bytes, store it in place, otherwise + // store it in the string table. + StringRef Name = Sym.Name; + if (Name.size() <= COFF::NameSize) { + std::copy(Name.begin(), Name.end(), Sym.Header.Name); + } else { + // Add string to the string table and format the index for output. + unsigned Index = getStringIndex(Name); + *reinterpret_cast(Sym.Header.Name + 4) = + Index; + } + + Sym.Header.Type = Sym.SimpleType; + Sym.Header.Type |= Sym.ComplexType << COFF::SCT_COMPLEX_TYPE_SHIFT; + } + return true; + } + + bool parse() { + if (!parseSections()) + return false; + if (!parseSymbols()) + return false; + return true; + } + + unsigned getStringIndex(StringRef Str) { + StringMap::iterator i = StringTableMap.find(Str); + if (i == StringTableMap.end()) { + unsigned Index = StringTable.size(); + StringTable.append(Str.begin(), Str.end()); + StringTable.push_back(0); + StringTableMap[Str] = Index; + return Index; + } + return i->second; + } + + COFFYAML::Object &Obj; + + codeview::StringsAndChecksums StringsAndChecksums; + BumpPtrAllocator Allocator; + StringMap StringTableMap; + std::string StringTable; + uint32_t SectionTableStart; + uint32_t SectionTableSize; + + yaml::ErrorHandler ErrHandler; +}; + +enum { DOSStubSize = 128 }; + +} // end anonymous namespace + +// Take a CP and assign addresses and sizes to everything. Returns false if the +// layout is not valid to do. +static bool layoutOptionalHeader(COFFParser &CP) { + if (!CP.isPE()) + return true; + unsigned PEHeaderSize = CP.is64Bit() ? sizeof(object::pe32plus_header) + : sizeof(object::pe32_header); + CP.Obj.Header.SizeOfOptionalHeader = + PEHeaderSize + + sizeof(object::data_directory) * (COFF::NUM_DATA_DIRECTORIES + 1); + return true; +} + +static yaml::BinaryRef +toDebugS(ArrayRef Subsections, + const codeview::StringsAndChecksums &SC, BumpPtrAllocator &Allocator) { + using namespace codeview; + ExitOnError Err("Error occurred writing .debug$S section"); + auto CVSS = + Err(CodeViewYAML::toCodeViewSubsectionList(Allocator, Subsections, SC)); + + std::vector Builders; + uint32_t Size = sizeof(uint32_t); + for (auto &SS : CVSS) { + DebugSubsectionRecordBuilder B(SS, CodeViewContainer::ObjectFile); + Size += B.calculateSerializedLength(); + Builders.push_back(std::move(B)); + } + uint8_t *Buffer = Allocator.Allocate(Size); + MutableArrayRef Output(Buffer, Size); + BinaryStreamWriter Writer(Output, support::little); + + Err(Writer.writeInteger(COFF::DEBUG_SECTION_MAGIC)); + for (const auto &B : Builders) { + Err(B.commit(Writer)); + } + return {Output}; +} + +// Take a CP and assign addresses and sizes to everything. Returns false if the +// layout is not valid to do. +static bool layoutCOFF(COFFParser &CP) { + // The section table starts immediately after the header, including the + // optional header. + CP.SectionTableStart = + CP.getHeaderSize() + CP.Obj.Header.SizeOfOptionalHeader; + if (CP.isPE()) + CP.SectionTableStart += DOSStubSize + sizeof(COFF::PEMagic); + CP.SectionTableSize = COFF::SectionSize * CP.Obj.Sections.size(); + + uint32_t CurrentSectionDataOffset = + CP.SectionTableStart + CP.SectionTableSize; + + for (COFFYAML::Section &S : CP.Obj.Sections) { + // We support specifying exactly one of SectionData or Subsections. So if + // there is already some SectionData, then we don't need to do any of this. + if (S.Name == ".debug$S" && S.SectionData.binary_size() == 0) { + CodeViewYAML::initializeStringsAndChecksums(S.DebugS, + CP.StringsAndChecksums); + if (CP.StringsAndChecksums.hasChecksums() && + CP.StringsAndChecksums.hasStrings()) + break; + } + } + + // Assign each section data address consecutively. + for (COFFYAML::Section &S : CP.Obj.Sections) { + if (S.Name == ".debug$S") { + if (S.SectionData.binary_size() == 0) { + assert(CP.StringsAndChecksums.hasStrings() && + "Object file does not have debug string table!"); + + S.SectionData = + toDebugS(S.DebugS, CP.StringsAndChecksums, CP.Allocator); + } + } else if (S.Name == ".debug$T") { + if (S.SectionData.binary_size() == 0) + S.SectionData = CodeViewYAML::toDebugT(S.DebugT, CP.Allocator, S.Name); + } else if (S.Name == ".debug$P") { + if (S.SectionData.binary_size() == 0) + S.SectionData = CodeViewYAML::toDebugT(S.DebugP, CP.Allocator, S.Name); + } else if (S.Name == ".debug$H") { + if (S.DebugH.hasValue() && S.SectionData.binary_size() == 0) + S.SectionData = CodeViewYAML::toDebugH(*S.DebugH, CP.Allocator); + } + + if (S.SectionData.binary_size() > 0) { + CurrentSectionDataOffset = alignTo(CurrentSectionDataOffset, + CP.isPE() ? CP.getFileAlignment() : 4); + S.Header.SizeOfRawData = S.SectionData.binary_size(); + if (CP.isPE()) + S.Header.SizeOfRawData = + alignTo(S.Header.SizeOfRawData, CP.getFileAlignment()); + S.Header.PointerToRawData = CurrentSectionDataOffset; + CurrentSectionDataOffset += S.Header.SizeOfRawData; + if (!S.Relocations.empty()) { + S.Header.PointerToRelocations = CurrentSectionDataOffset; + S.Header.NumberOfRelocations = S.Relocations.size(); + CurrentSectionDataOffset += + S.Header.NumberOfRelocations * COFF::RelocationSize; + } + } else { + // Leave SizeOfRawData unaltered. For .bss sections in object files, it + // carries the section size. + S.Header.PointerToRawData = 0; + } + } + + uint32_t SymbolTableStart = CurrentSectionDataOffset; + + // Calculate number of symbols. + uint32_t NumberOfSymbols = 0; + for (std::vector::iterator i = CP.Obj.Symbols.begin(), + e = CP.Obj.Symbols.end(); + i != e; ++i) { + uint32_t NumberOfAuxSymbols = 0; + if (i->FunctionDefinition) + NumberOfAuxSymbols += 1; + if (i->bfAndefSymbol) + NumberOfAuxSymbols += 1; + if (i->WeakExternal) + NumberOfAuxSymbols += 1; + if (!i->File.empty()) + NumberOfAuxSymbols += + (i->File.size() + CP.getSymbolSize() - 1) / CP.getSymbolSize(); + if (i->SectionDefinition) + NumberOfAuxSymbols += 1; + if (i->CLRToken) + NumberOfAuxSymbols += 1; + i->Header.NumberOfAuxSymbols = NumberOfAuxSymbols; + NumberOfSymbols += 1 + NumberOfAuxSymbols; + } + + // Store all the allocated start addresses in the header. + CP.Obj.Header.NumberOfSections = CP.Obj.Sections.size(); + CP.Obj.Header.NumberOfSymbols = NumberOfSymbols; + if (NumberOfSymbols > 0 || CP.StringTable.size() > 4) + CP.Obj.Header.PointerToSymbolTable = SymbolTableStart; + else + CP.Obj.Header.PointerToSymbolTable = 0; + + *reinterpret_cast(&CP.StringTable[0]) = + CP.StringTable.size(); + + return true; +} + +template struct binary_le_impl { + value_type Value; + binary_le_impl(value_type V) : Value(V) {} +}; + +template +raw_ostream &operator<<(raw_ostream &OS, + const binary_le_impl &BLE) { + char Buffer[sizeof(BLE.Value)]; + support::endian::write( + Buffer, BLE.Value); + OS.write(Buffer, sizeof(BLE.Value)); + return OS; +} + +template +binary_le_impl binary_le(value_type V) { + return binary_le_impl(V); +} + +template struct zeros_impl {}; + +template +raw_ostream &operator<<(raw_ostream &OS, const zeros_impl &) { + char Buffer[NumBytes]; + memset(Buffer, 0, sizeof(Buffer)); + OS.write(Buffer, sizeof(Buffer)); + return OS; +} + +template zeros_impl zeros(const T &) { + return zeros_impl(); +} + +template +static uint32_t initializeOptionalHeader(COFFParser &CP, uint16_t Magic, + T Header) { + memset(Header, 0, sizeof(*Header)); + Header->Magic = Magic; + Header->SectionAlignment = CP.Obj.OptionalHeader->Header.SectionAlignment; + Header->FileAlignment = CP.Obj.OptionalHeader->Header.FileAlignment; + uint32_t SizeOfCode = 0, SizeOfInitializedData = 0, + SizeOfUninitializedData = 0; + uint32_t SizeOfHeaders = alignTo(CP.SectionTableStart + CP.SectionTableSize, + Header->FileAlignment); + uint32_t SizeOfImage = alignTo(SizeOfHeaders, Header->SectionAlignment); + uint32_t BaseOfData = 0; + for (const COFFYAML::Section &S : CP.Obj.Sections) { + if (S.Header.Characteristics & COFF::IMAGE_SCN_CNT_CODE) + SizeOfCode += S.Header.SizeOfRawData; + if (S.Header.Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA) + SizeOfInitializedData += S.Header.SizeOfRawData; + if (S.Header.Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) + SizeOfUninitializedData += S.Header.SizeOfRawData; + if (S.Name.equals(".text")) + Header->BaseOfCode = S.Header.VirtualAddress; // RVA + else if (S.Name.equals(".data")) + BaseOfData = S.Header.VirtualAddress; // RVA + if (S.Header.VirtualAddress) + SizeOfImage += alignTo(S.Header.VirtualSize, Header->SectionAlignment); + } + Header->SizeOfCode = SizeOfCode; + Header->SizeOfInitializedData = SizeOfInitializedData; + Header->SizeOfUninitializedData = SizeOfUninitializedData; + Header->AddressOfEntryPoint = + CP.Obj.OptionalHeader->Header.AddressOfEntryPoint; // RVA + Header->ImageBase = CP.Obj.OptionalHeader->Header.ImageBase; + Header->MajorOperatingSystemVersion = + CP.Obj.OptionalHeader->Header.MajorOperatingSystemVersion; + Header->MinorOperatingSystemVersion = + CP.Obj.OptionalHeader->Header.MinorOperatingSystemVersion; + Header->MajorImageVersion = CP.Obj.OptionalHeader->Header.MajorImageVersion; + Header->MinorImageVersion = CP.Obj.OptionalHeader->Header.MinorImageVersion; + Header->MajorSubsystemVersion = + CP.Obj.OptionalHeader->Header.MajorSubsystemVersion; + Header->MinorSubsystemVersion = + CP.Obj.OptionalHeader->Header.MinorSubsystemVersion; + Header->SizeOfImage = SizeOfImage; + Header->SizeOfHeaders = SizeOfHeaders; + Header->Subsystem = CP.Obj.OptionalHeader->Header.Subsystem; + Header->DLLCharacteristics = CP.Obj.OptionalHeader->Header.DLLCharacteristics; + Header->SizeOfStackReserve = CP.Obj.OptionalHeader->Header.SizeOfStackReserve; + Header->SizeOfStackCommit = CP.Obj.OptionalHeader->Header.SizeOfStackCommit; + Header->SizeOfHeapReserve = CP.Obj.OptionalHeader->Header.SizeOfHeapReserve; + Header->SizeOfHeapCommit = CP.Obj.OptionalHeader->Header.SizeOfHeapCommit; + Header->NumberOfRvaAndSize = COFF::NUM_DATA_DIRECTORIES + 1; + return BaseOfData; +} + +static bool writeCOFF(COFFParser &CP, raw_ostream &OS) { + if (CP.isPE()) { + // PE files start with a DOS stub. + object::dos_header DH; + memset(&DH, 0, sizeof(DH)); + + // DOS EXEs start with "MZ" magic. + DH.Magic[0] = 'M'; + DH.Magic[1] = 'Z'; + // Initializing the AddressOfRelocationTable is strictly optional but + // mollifies certain tools which expect it to have a value greater than + // 0x40. + DH.AddressOfRelocationTable = sizeof(DH); + // This is the address of the PE signature. + DH.AddressOfNewExeHeader = DOSStubSize; + + // Write out our DOS stub. + OS.write(reinterpret_cast(&DH), sizeof(DH)); + // Write padding until we reach the position of where our PE signature + // should live. + OS.write_zeros(DOSStubSize - sizeof(DH)); + // Write out the PE signature. + OS.write(COFF::PEMagic, sizeof(COFF::PEMagic)); + } + if (CP.useBigObj()) { + OS << binary_le(static_cast(COFF::IMAGE_FILE_MACHINE_UNKNOWN)) + << binary_le(static_cast(0xffff)) + << binary_le( + static_cast(COFF::BigObjHeader::MinBigObjectVersion)) + << binary_le(CP.Obj.Header.Machine) + << binary_le(CP.Obj.Header.TimeDateStamp); + OS.write(COFF::BigObjMagic, sizeof(COFF::BigObjMagic)); + OS << zeros(uint32_t(0)) << zeros(uint32_t(0)) << zeros(uint32_t(0)) + << zeros(uint32_t(0)) << binary_le(CP.Obj.Header.NumberOfSections) + << binary_le(CP.Obj.Header.PointerToSymbolTable) + << binary_le(CP.Obj.Header.NumberOfSymbols); + } else { + OS << binary_le(CP.Obj.Header.Machine) + << binary_le(static_cast(CP.Obj.Header.NumberOfSections)) + << binary_le(CP.Obj.Header.TimeDateStamp) + << binary_le(CP.Obj.Header.PointerToSymbolTable) + << binary_le(CP.Obj.Header.NumberOfSymbols) + << binary_le(CP.Obj.Header.SizeOfOptionalHeader) + << binary_le(CP.Obj.Header.Characteristics); + } + if (CP.isPE()) { + if (CP.is64Bit()) { + object::pe32plus_header PEH; + initializeOptionalHeader(CP, COFF::PE32Header::PE32_PLUS, &PEH); + OS.write(reinterpret_cast(&PEH), sizeof(PEH)); + } else { + object::pe32_header PEH; + uint32_t BaseOfData = + initializeOptionalHeader(CP, COFF::PE32Header::PE32, &PEH); + PEH.BaseOfData = BaseOfData; + OS.write(reinterpret_cast(&PEH), sizeof(PEH)); + } + for (const Optional &DD : + CP.Obj.OptionalHeader->DataDirectories) { + if (!DD.hasValue()) { + OS << zeros(uint32_t(0)); + OS << zeros(uint32_t(0)); + } else { + OS << binary_le(DD->RelativeVirtualAddress); + OS << binary_le(DD->Size); + } + } + OS << zeros(uint32_t(0)); + OS << zeros(uint32_t(0)); + } + + assert(OS.tell() == CP.SectionTableStart); + // Output section table. + for (std::vector::iterator i = CP.Obj.Sections.begin(), + e = CP.Obj.Sections.end(); + i != e; ++i) { + OS.write(i->Header.Name, COFF::NameSize); + OS << binary_le(i->Header.VirtualSize) + << binary_le(i->Header.VirtualAddress) + << binary_le(i->Header.SizeOfRawData) + << binary_le(i->Header.PointerToRawData) + << binary_le(i->Header.PointerToRelocations) + << binary_le(i->Header.PointerToLineNumbers) + << binary_le(i->Header.NumberOfRelocations) + << binary_le(i->Header.NumberOfLineNumbers) + << binary_le(i->Header.Characteristics); + } + assert(OS.tell() == CP.SectionTableStart + CP.SectionTableSize); + + unsigned CurSymbol = 0; + StringMap SymbolTableIndexMap; + for (std::vector::iterator I = CP.Obj.Symbols.begin(), + E = CP.Obj.Symbols.end(); + I != E; ++I) { + SymbolTableIndexMap[I->Name] = CurSymbol; + CurSymbol += 1 + I->Header.NumberOfAuxSymbols; + } + + // Output section data. + for (const COFFYAML::Section &S : CP.Obj.Sections) { + if (S.Header.SizeOfRawData == 0 || S.Header.PointerToRawData == 0) + continue; + assert(S.Header.PointerToRawData >= OS.tell()); + OS.write_zeros(S.Header.PointerToRawData - OS.tell()); + S.SectionData.writeAsBinary(OS); + assert(S.Header.SizeOfRawData >= S.SectionData.binary_size()); + OS.write_zeros(S.Header.SizeOfRawData - S.SectionData.binary_size()); + for (const COFFYAML::Relocation &R : S.Relocations) { + uint32_t SymbolTableIndex; + if (R.SymbolTableIndex) { + if (!R.SymbolName.empty()) + WithColor::error() + << "Both SymbolName and SymbolTableIndex specified\n"; + SymbolTableIndex = *R.SymbolTableIndex; + } else { + SymbolTableIndex = SymbolTableIndexMap[R.SymbolName]; + } + OS << binary_le(R.VirtualAddress) << binary_le(SymbolTableIndex) + << binary_le(R.Type); + } + } + + // Output symbol table. + + for (std::vector::const_iterator i = CP.Obj.Symbols.begin(), + e = CP.Obj.Symbols.end(); + i != e; ++i) { + OS.write(i->Header.Name, COFF::NameSize); + OS << binary_le(i->Header.Value); + if (CP.useBigObj()) + OS << binary_le(i->Header.SectionNumber); + else + OS << binary_le(static_cast(i->Header.SectionNumber)); + OS << binary_le(i->Header.Type) << binary_le(i->Header.StorageClass) + << binary_le(i->Header.NumberOfAuxSymbols); + + if (i->FunctionDefinition) { + OS << binary_le(i->FunctionDefinition->TagIndex) + << binary_le(i->FunctionDefinition->TotalSize) + << binary_le(i->FunctionDefinition->PointerToLinenumber) + << binary_le(i->FunctionDefinition->PointerToNextFunction) + << zeros(i->FunctionDefinition->unused); + OS.write_zeros(CP.getSymbolSize() - COFF::Symbol16Size); + } + if (i->bfAndefSymbol) { + OS << zeros(i->bfAndefSymbol->unused1) + << binary_le(i->bfAndefSymbol->Linenumber) + << zeros(i->bfAndefSymbol->unused2) + << binary_le(i->bfAndefSymbol->PointerToNextFunction) + << zeros(i->bfAndefSymbol->unused3); + OS.write_zeros(CP.getSymbolSize() - COFF::Symbol16Size); + } + if (i->WeakExternal) { + OS << binary_le(i->WeakExternal->TagIndex) + << binary_le(i->WeakExternal->Characteristics) + << zeros(i->WeakExternal->unused); + OS.write_zeros(CP.getSymbolSize() - COFF::Symbol16Size); + } + if (!i->File.empty()) { + unsigned SymbolSize = CP.getSymbolSize(); + uint32_t NumberOfAuxRecords = + (i->File.size() + SymbolSize - 1) / SymbolSize; + uint32_t NumberOfAuxBytes = NumberOfAuxRecords * SymbolSize; + uint32_t NumZeros = NumberOfAuxBytes - i->File.size(); + OS.write(i->File.data(), i->File.size()); + OS.write_zeros(NumZeros); + } + if (i->SectionDefinition) { + OS << binary_le(i->SectionDefinition->Length) + << binary_le(i->SectionDefinition->NumberOfRelocations) + << binary_le(i->SectionDefinition->NumberOfLinenumbers) + << binary_le(i->SectionDefinition->CheckSum) + << binary_le(static_cast(i->SectionDefinition->Number)) + << binary_le(i->SectionDefinition->Selection) + << zeros(i->SectionDefinition->unused) + << binary_le(static_cast(i->SectionDefinition->Number >> 16)); + OS.write_zeros(CP.getSymbolSize() - COFF::Symbol16Size); + } + if (i->CLRToken) { + OS << binary_le(i->CLRToken->AuxType) << zeros(i->CLRToken->unused1) + << binary_le(i->CLRToken->SymbolTableIndex) + << zeros(i->CLRToken->unused2); + OS.write_zeros(CP.getSymbolSize() - COFF::Symbol16Size); + } + } + + // Output string table. + if (CP.Obj.Header.PointerToSymbolTable) + OS.write(&CP.StringTable[0], CP.StringTable.size()); + return true; +} + +namespace llvm { +namespace yaml { + +bool yaml2coff(llvm::COFFYAML::Object &Doc, raw_ostream &Out, + ErrorHandler ErrHandler) { + COFFParser CP(Doc, ErrHandler); + if (!CP.parse()) { + ErrHandler("failed to parse YAML file"); + return false; + } + + if (!layoutOptionalHeader(CP)) { + ErrHandler("failed to layout optional header for COFF file"); + return false; + } + + if (!layoutCOFF(CP)) { + ErrHandler("failed to layout COFF file"); + return false; + } + if (!writeCOFF(CP, Out)) { + ErrHandler("failed to write COFF file"); + return false; + } + return true; +} + +} // namespace yaml +} // namespace llvm diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp index 227107c051d..95409fdc330 100644 --- a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp +++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp @@ -391,7 +391,7 @@ template <> void SymbolRecordImpl::map(IO &IO) { } template <> void SymbolRecordImpl::map(IO &IO) { - IO.mapRequired("Offset", Symbol.Offset); + IO.mapRequired("Offset", Symbol.Hdr.Offset); IO.mapRequired("Range", Symbol.Range); IO.mapRequired("Gaps", Symbol.Gaps); } diff --git a/lib/ObjectYAML/ELFEmitter.cpp b/lib/ObjectYAML/ELFEmitter.cpp new file mode 100644 index 00000000000..e0faed256f6 --- /dev/null +++ b/lib/ObjectYAML/ELFEmitter.cpp @@ -0,0 +1,1152 @@ +//===- yaml2elf - Convert YAML to a ELF object file -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// The ELF component of yaml2obj. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/ObjectYAML/ELFYAML.h" +#include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +// This class is used to build up a contiguous binary blob while keeping +// track of an offset in the output (which notionally begins at +// `InitialOffset`). +namespace { +class ContiguousBlobAccumulator { + const uint64_t InitialOffset; + SmallVector Buf; + raw_svector_ostream OS; + + /// \returns The new offset. + uint64_t padToAlignment(unsigned Align) { + if (Align == 0) + Align = 1; + uint64_t CurrentOffset = InitialOffset + OS.tell(); + uint64_t AlignedOffset = alignTo(CurrentOffset, Align); + OS.write_zeros(AlignedOffset - CurrentOffset); + return AlignedOffset; // == CurrentOffset; + } + +public: + ContiguousBlobAccumulator(uint64_t InitialOffset_) + : InitialOffset(InitialOffset_), Buf(), OS(Buf) {} + template + raw_ostream &getOSAndAlignedOffset(Integer &Offset, unsigned Align) { + Offset = padToAlignment(Align); + return OS; + } + void writeBlobToStream(raw_ostream &Out) { Out << OS.str(); } +}; + +// Used to keep track of section and symbol names, so that in the YAML file +// sections and symbols can be referenced by name instead of by index. +class NameToIdxMap { + StringMap Map; + +public: + /// \Returns false if name is already present in the map. + bool addName(StringRef Name, unsigned Ndx) { + return Map.insert({Name, Ndx}).second; + } + /// \Returns false if name is not present in the map. + bool lookup(StringRef Name, unsigned &Idx) const { + auto I = Map.find(Name); + if (I == Map.end()) + return false; + Idx = I->getValue(); + return true; + } + /// Asserts if name is not present in the map. + unsigned get(StringRef Name) const { + unsigned Idx; + if (lookup(Name, Idx)) + return Idx; + assert(false && "Expected section not found in index"); + return 0; + } + unsigned size() const { return Map.size(); } +}; + +/// "Single point of truth" for the ELF file construction. +/// TODO: This class still has a ways to go before it is truly a "single +/// point of truth". +template class ELFState { + typedef typename ELFT::Ehdr Elf_Ehdr; + typedef typename ELFT::Phdr Elf_Phdr; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Sym Elf_Sym; + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; + typedef typename ELFT::Relr Elf_Relr; + typedef typename ELFT::Dyn Elf_Dyn; + + enum class SymtabType { Static, Dynamic }; + + /// The future ".strtab" section. + StringTableBuilder DotStrtab{StringTableBuilder::ELF}; + + /// The future ".shstrtab" section. + StringTableBuilder DotShStrtab{StringTableBuilder::ELF}; + + /// The future ".dynstr" section. + StringTableBuilder DotDynstr{StringTableBuilder::ELF}; + + NameToIdxMap SN2I; + NameToIdxMap SymN2I; + NameToIdxMap DynSymN2I; + ELFYAML::Object &Doc; + + bool HasError = false; + yaml::ErrorHandler ErrHandler; + void reportError(const Twine &Msg); + + std::vector toELFSymbols(ArrayRef Symbols, + const StringTableBuilder &Strtab); + unsigned toSectionIndex(StringRef S, StringRef LocSec, StringRef LocSym = ""); + unsigned toSymbolIndex(StringRef S, StringRef LocSec, bool IsDynamic); + + void buildSectionIndex(); + void buildSymbolIndexes(); + void initProgramHeaders(std::vector &PHeaders); + bool initImplicitHeader(ContiguousBlobAccumulator &CBA, Elf_Shdr &Header, + StringRef SecName, ELFYAML::Section *YAMLSec); + void initSectionHeaders(std::vector &SHeaders, + ContiguousBlobAccumulator &CBA); + void initSymtabSectionHeader(Elf_Shdr &SHeader, SymtabType STType, + ContiguousBlobAccumulator &CBA, + ELFYAML::Section *YAMLSec); + void initStrtabSectionHeader(Elf_Shdr &SHeader, StringRef Name, + StringTableBuilder &STB, + ContiguousBlobAccumulator &CBA, + ELFYAML::Section *YAMLSec); + void setProgramHeaderLayout(std::vector &PHeaders, + std::vector &SHeaders); + void finalizeStrings(); + void writeELFHeader(ContiguousBlobAccumulator &CBA, raw_ostream &OS); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::RawContentSection &Section, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::RelocationSection &Section, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, const ELFYAML::Group &Group, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::SymtabShndxSection &Shndx, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::SymverSection &Section, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::VerneedSection &Section, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::VerdefSection &Section, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::MipsABIFlags &Section, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::DynamicSection &Section, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::StackSizesSection &Section, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::HashSection &Section, + ContiguousBlobAccumulator &CBA); + void writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::AddrsigSection &Section, + ContiguousBlobAccumulator &CBA); + + ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH); + +public: + static bool writeELF(raw_ostream &OS, ELFYAML::Object &Doc, + yaml::ErrorHandler EH); +}; +} // end anonymous namespace + +template static size_t arrayDataSize(ArrayRef A) { + return A.size() * sizeof(T); +} + +template static void writeArrayData(raw_ostream &OS, ArrayRef A) { + OS.write((const char *)A.data(), arrayDataSize(A)); +} + +template static void zero(T &Obj) { memset(&Obj, 0, sizeof(Obj)); } + +template +ELFState::ELFState(ELFYAML::Object &D, yaml::ErrorHandler EH) + : Doc(D), ErrHandler(EH) { + StringSet<> DocSections; + for (std::unique_ptr &D : Doc.Sections) { + if (!D->Name.empty()) + DocSections.insert(D->Name); + + // Some sections wants to link to .symtab by default. + // That means we want to create the symbol table for them. + if (D->Type == llvm::ELF::SHT_REL || D->Type == llvm::ELF::SHT_RELA) + if (!Doc.Symbols && D->Link.empty()) + Doc.Symbols.emplace(); + } + + // Insert SHT_NULL section implicitly when it is not defined in YAML. + if (Doc.Sections.empty() || Doc.Sections.front()->Type != ELF::SHT_NULL) + Doc.Sections.insert( + Doc.Sections.begin(), + std::make_unique( + ELFYAML::Section::SectionKind::RawContent, /*IsImplicit=*/true)); + + std::vector ImplicitSections; + if (Doc.Symbols) + ImplicitSections.push_back(".symtab"); + ImplicitSections.insert(ImplicitSections.end(), {".strtab", ".shstrtab"}); + + if (!Doc.DynamicSymbols.empty()) + ImplicitSections.insert(ImplicitSections.end(), {".dynsym", ".dynstr"}); + + // Insert placeholders for implicit sections that are not + // defined explicitly in YAML. + for (StringRef SecName : ImplicitSections) { + if (DocSections.count(SecName)) + continue; + + std::unique_ptr Sec = std::make_unique( + ELFYAML::Section::SectionKind::RawContent, true /*IsImplicit*/); + Sec->Name = SecName; + Doc.Sections.push_back(std::move(Sec)); + } +} + +template +void ELFState::writeELFHeader(ContiguousBlobAccumulator &CBA, raw_ostream &OS) { + using namespace llvm::ELF; + + Elf_Ehdr Header; + zero(Header); + Header.e_ident[EI_MAG0] = 0x7f; + Header.e_ident[EI_MAG1] = 'E'; + Header.e_ident[EI_MAG2] = 'L'; + Header.e_ident[EI_MAG3] = 'F'; + Header.e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32; + Header.e_ident[EI_DATA] = Doc.Header.Data; + Header.e_ident[EI_VERSION] = EV_CURRENT; + Header.e_ident[EI_OSABI] = Doc.Header.OSABI; + Header.e_ident[EI_ABIVERSION] = Doc.Header.ABIVersion; + Header.e_type = Doc.Header.Type; + Header.e_machine = Doc.Header.Machine; + Header.e_version = EV_CURRENT; + Header.e_entry = Doc.Header.Entry; + Header.e_phoff = Doc.ProgramHeaders.size() ? sizeof(Header) : 0; + Header.e_flags = Doc.Header.Flags; + Header.e_ehsize = sizeof(Elf_Ehdr); + Header.e_phentsize = Doc.ProgramHeaders.size() ? sizeof(Elf_Phdr) : 0; + Header.e_phnum = Doc.ProgramHeaders.size(); + + Header.e_shentsize = + Doc.Header.SHEntSize ? (uint16_t)*Doc.Header.SHEntSize : sizeof(Elf_Shdr); + // Immediately following the ELF header and program headers. + // Align the start of the section header and write the ELF header. + uint64_t SHOff; + CBA.getOSAndAlignedOffset(SHOff, sizeof(typename ELFT::uint)); + Header.e_shoff = + Doc.Header.SHOff ? typename ELFT::uint(*Doc.Header.SHOff) : SHOff; + Header.e_shnum = + Doc.Header.SHNum ? (uint16_t)*Doc.Header.SHNum : Doc.Sections.size(); + Header.e_shstrndx = Doc.Header.SHStrNdx ? (uint16_t)*Doc.Header.SHStrNdx + : SN2I.get(".shstrtab"); + + OS.write((const char *)&Header, sizeof(Header)); +} + +template +void ELFState::initProgramHeaders(std::vector &PHeaders) { + for (const auto &YamlPhdr : Doc.ProgramHeaders) { + Elf_Phdr Phdr; + Phdr.p_type = YamlPhdr.Type; + Phdr.p_flags = YamlPhdr.Flags; + Phdr.p_vaddr = YamlPhdr.VAddr; + Phdr.p_paddr = YamlPhdr.PAddr; + PHeaders.push_back(Phdr); + } +} + +template +unsigned ELFState::toSectionIndex(StringRef S, StringRef LocSec, + StringRef LocSym) { + unsigned Index; + if (SN2I.lookup(S, Index) || to_integer(S, Index)) + return Index; + + assert(LocSec.empty() || LocSym.empty()); + if (!LocSym.empty()) + reportError("unknown section referenced: '" + S + "' by YAML symbol '" + + LocSym + "'"); + else + reportError("unknown section referenced: '" + S + "' by YAML section '" + + LocSec + "'"); + return 0; +} + +template +unsigned ELFState::toSymbolIndex(StringRef S, StringRef LocSec, + bool IsDynamic) { + const NameToIdxMap &SymMap = IsDynamic ? DynSymN2I : SymN2I; + unsigned Index; + // Here we try to look up S in the symbol table. If it is not there, + // treat its value as a symbol index. + if (!SymMap.lookup(S, Index) && !to_integer(S, Index)) { + reportError("unknown symbol referenced: '" + S + "' by YAML section '" + + LocSec + "'"); + return 0; + } + return Index; +} + +template +bool ELFState::initImplicitHeader(ContiguousBlobAccumulator &CBA, + Elf_Shdr &Header, StringRef SecName, + ELFYAML::Section *YAMLSec) { + // Check if the header was already initialized. + if (Header.sh_offset) + return false; + + if (SecName == ".symtab") + initSymtabSectionHeader(Header, SymtabType::Static, CBA, YAMLSec); + else if (SecName == ".strtab") + initStrtabSectionHeader(Header, SecName, DotStrtab, CBA, YAMLSec); + else if (SecName == ".shstrtab") + initStrtabSectionHeader(Header, SecName, DotShStrtab, CBA, YAMLSec); + else if (SecName == ".dynsym") + initSymtabSectionHeader(Header, SymtabType::Dynamic, CBA, YAMLSec); + else if (SecName == ".dynstr") + initStrtabSectionHeader(Header, SecName, DotDynstr, CBA, YAMLSec); + else + return false; + + // Override the fields if requested. + if (YAMLSec) { + if (YAMLSec->ShName) + Header.sh_name = *YAMLSec->ShName; + if (YAMLSec->ShOffset) + Header.sh_offset = *YAMLSec->ShOffset; + if (YAMLSec->ShSize) + Header.sh_size = *YAMLSec->ShSize; + } + + return true; +} + +StringRef llvm::ELFYAML::dropUniqueSuffix(StringRef S) { + size_t SuffixPos = S.rfind(" ["); + if (SuffixPos == StringRef::npos) + return S; + return S.substr(0, SuffixPos); +} + +template +void ELFState::initSectionHeaders(std::vector &SHeaders, + ContiguousBlobAccumulator &CBA) { + // Ensure SHN_UNDEF entry is present. An all-zero section header is a + // valid SHN_UNDEF entry since SHT_NULL == 0. + SHeaders.resize(Doc.Sections.size()); + + for (size_t I = 0; I < Doc.Sections.size(); ++I) { + ELFYAML::Section *Sec = Doc.Sections[I].get(); + if (I == 0 && Sec->IsImplicit) + continue; + + // We have a few sections like string or symbol tables that are usually + // added implicitly to the end. However, if they are explicitly specified + // in the YAML, we need to write them here. This ensures the file offset + // remains correct. + Elf_Shdr &SHeader = SHeaders[I]; + if (initImplicitHeader(CBA, SHeader, Sec->Name, + Sec->IsImplicit ? nullptr : Sec)) + continue; + + assert(Sec && "It can't be null unless it is an implicit section. But all " + "implicit sections should already have been handled above."); + + SHeader.sh_name = + DotShStrtab.getOffset(ELFYAML::dropUniqueSuffix(Sec->Name)); + SHeader.sh_type = Sec->Type; + if (Sec->Flags) + SHeader.sh_flags = *Sec->Flags; + SHeader.sh_addr = Sec->Address; + SHeader.sh_addralign = Sec->AddressAlign; + + if (!Sec->Link.empty()) + SHeader.sh_link = toSectionIndex(Sec->Link, Sec->Name); + + if (I == 0) { + if (auto RawSec = dyn_cast(Sec)) { + // We do not write any content for special SHN_UNDEF section. + if (RawSec->Size) + SHeader.sh_size = *RawSec->Size; + if (RawSec->Info) + SHeader.sh_info = *RawSec->Info; + } + if (Sec->EntSize) + SHeader.sh_entsize = *Sec->EntSize; + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + SHeader.sh_entsize = 0; + SHeader.sh_size = S->Size; + // SHT_NOBITS section does not have content + // so just to setup the section offset. + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else if (auto S = dyn_cast(Sec)) { + writeSectionContent(SHeader, *S, CBA); + } else { + llvm_unreachable("Unknown section type"); + } + + // Override the fields if requested. + if (Sec) { + if (Sec->ShName) + SHeader.sh_name = *Sec->ShName; + if (Sec->ShOffset) + SHeader.sh_offset = *Sec->ShOffset; + if (Sec->ShSize) + SHeader.sh_size = *Sec->ShSize; + } + } +} + +static size_t findFirstNonGlobal(ArrayRef Symbols) { + for (size_t I = 0; I < Symbols.size(); ++I) + if (Symbols[I].Binding.value != ELF::STB_LOCAL) + return I; + return Symbols.size(); +} + +static uint64_t writeContent(raw_ostream &OS, + const Optional &Content, + const Optional &Size) { + size_t ContentSize = 0; + if (Content) { + Content->writeAsBinary(OS); + ContentSize = Content->binary_size(); + } + + if (!Size) + return ContentSize; + + OS.write_zeros(*Size - ContentSize); + return *Size; +} + +template +std::vector +ELFState::toELFSymbols(ArrayRef Symbols, + const StringTableBuilder &Strtab) { + std::vector Ret; + Ret.resize(Symbols.size() + 1); + + size_t I = 0; + for (const auto &Sym : Symbols) { + Elf_Sym &Symbol = Ret[++I]; + + // If NameIndex, which contains the name offset, is explicitly specified, we + // use it. This is useful for preparing broken objects. Otherwise, we add + // the specified Name to the string table builder to get its offset. + if (Sym.NameIndex) + Symbol.st_name = *Sym.NameIndex; + else if (!Sym.Name.empty()) + Symbol.st_name = Strtab.getOffset(ELFYAML::dropUniqueSuffix(Sym.Name)); + + Symbol.setBindingAndType(Sym.Binding, Sym.Type); + if (!Sym.Section.empty()) + Symbol.st_shndx = toSectionIndex(Sym.Section, "", Sym.Name); + else if (Sym.Index) + Symbol.st_shndx = *Sym.Index; + + Symbol.st_value = Sym.Value; + Symbol.st_other = Sym.Other ? *Sym.Other : 0; + Symbol.st_size = Sym.Size; + } + + return Ret; +} + +template +void ELFState::initSymtabSectionHeader(Elf_Shdr &SHeader, + SymtabType STType, + ContiguousBlobAccumulator &CBA, + ELFYAML::Section *YAMLSec) { + + bool IsStatic = STType == SymtabType::Static; + ArrayRef Symbols; + if (IsStatic && Doc.Symbols) + Symbols = *Doc.Symbols; + else if (!IsStatic) + Symbols = Doc.DynamicSymbols; + + ELFYAML::RawContentSection *RawSec = + dyn_cast_or_null(YAMLSec); + if (RawSec && !Symbols.empty() && (RawSec->Content || RawSec->Size)) { + if (RawSec->Content) + reportError("cannot specify both `Content` and " + + (IsStatic ? Twine("`Symbols`") : Twine("`DynamicSymbols`")) + + " for symbol table section '" + RawSec->Name + "'"); + if (RawSec->Size) + reportError("cannot specify both `Size` and " + + (IsStatic ? Twine("`Symbols`") : Twine("`DynamicSymbols`")) + + " for symbol table section '" + RawSec->Name + "'"); + return; + } + + zero(SHeader); + SHeader.sh_name = DotShStrtab.getOffset(IsStatic ? ".symtab" : ".dynsym"); + + if (YAMLSec) + SHeader.sh_type = YAMLSec->Type; + else + SHeader.sh_type = IsStatic ? ELF::SHT_SYMTAB : ELF::SHT_DYNSYM; + + if (RawSec && !RawSec->Link.empty()) { + // If the Link field is explicitly defined in the document, + // we should use it. + SHeader.sh_link = toSectionIndex(RawSec->Link, RawSec->Name); + } else { + // When we describe the .dynsym section in the document explicitly, it is + // allowed to omit the "DynamicSymbols" tag. In this case .dynstr is not + // added implicitly and we should be able to leave the Link zeroed if + // .dynstr is not defined. + unsigned Link = 0; + if (IsStatic) + Link = SN2I.get(".strtab"); + else + SN2I.lookup(".dynstr", Link); + SHeader.sh_link = Link; + } + + if (YAMLSec && YAMLSec->Flags) + SHeader.sh_flags = *YAMLSec->Flags; + else if (!IsStatic) + SHeader.sh_flags = ELF::SHF_ALLOC; + + // If the symbol table section is explicitly described in the YAML + // then we should set the fields requested. + SHeader.sh_info = (RawSec && RawSec->Info) ? (unsigned)(*RawSec->Info) + : findFirstNonGlobal(Symbols) + 1; + SHeader.sh_entsize = (YAMLSec && YAMLSec->EntSize) + ? (uint64_t)(*YAMLSec->EntSize) + : sizeof(Elf_Sym); + SHeader.sh_addralign = YAMLSec ? (uint64_t)YAMLSec->AddressAlign : 8; + SHeader.sh_addr = YAMLSec ? (uint64_t)YAMLSec->Address : 0; + + auto &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + if (RawSec && (RawSec->Content || RawSec->Size)) { + assert(Symbols.empty()); + SHeader.sh_size = writeContent(OS, RawSec->Content, RawSec->Size); + return; + } + + std::vector Syms = + toELFSymbols(Symbols, IsStatic ? DotStrtab : DotDynstr); + writeArrayData(OS, makeArrayRef(Syms)); + SHeader.sh_size = arrayDataSize(makeArrayRef(Syms)); +} + +template +void ELFState::initStrtabSectionHeader(Elf_Shdr &SHeader, StringRef Name, + StringTableBuilder &STB, + ContiguousBlobAccumulator &CBA, + ELFYAML::Section *YAMLSec) { + zero(SHeader); + SHeader.sh_name = DotShStrtab.getOffset(Name); + SHeader.sh_type = YAMLSec ? YAMLSec->Type : ELF::SHT_STRTAB; + SHeader.sh_addralign = YAMLSec ? (uint64_t)YAMLSec->AddressAlign : 1; + + ELFYAML::RawContentSection *RawSec = + dyn_cast_or_null(YAMLSec); + + auto &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + if (RawSec && (RawSec->Content || RawSec->Size)) { + SHeader.sh_size = writeContent(OS, RawSec->Content, RawSec->Size); + } else { + STB.write(OS); + SHeader.sh_size = STB.getSize(); + } + + if (YAMLSec && YAMLSec->EntSize) + SHeader.sh_entsize = *YAMLSec->EntSize; + + if (RawSec && RawSec->Info) + SHeader.sh_info = *RawSec->Info; + + if (YAMLSec && YAMLSec->Flags) + SHeader.sh_flags = *YAMLSec->Flags; + else if (Name == ".dynstr") + SHeader.sh_flags = ELF::SHF_ALLOC; + + // If the section is explicitly described in the YAML + // then we want to use its section address. + if (YAMLSec) + SHeader.sh_addr = YAMLSec->Address; +} + +template void ELFState::reportError(const Twine &Msg) { + ErrHandler(Msg); + HasError = true; +} + +template +void ELFState::setProgramHeaderLayout(std::vector &PHeaders, + std::vector &SHeaders) { + uint32_t PhdrIdx = 0; + for (auto &YamlPhdr : Doc.ProgramHeaders) { + Elf_Phdr &PHeader = PHeaders[PhdrIdx++]; + + std::vector Sections; + for (const ELFYAML::SectionName &SecName : YamlPhdr.Sections) { + unsigned Index; + if (!SN2I.lookup(SecName.Section, Index)) { + reportError("unknown section referenced: '" + SecName.Section + + "' by program header"); + continue; + } + Sections.push_back(&SHeaders[Index]); + } + + if (YamlPhdr.Offset) { + PHeader.p_offset = *YamlPhdr.Offset; + } else { + if (YamlPhdr.Sections.size()) + PHeader.p_offset = UINT32_MAX; + else + PHeader.p_offset = 0; + + // Find the minimum offset for the program header. + for (Elf_Shdr *SHeader : Sections) + PHeader.p_offset = std::min(PHeader.p_offset, SHeader->sh_offset); + } + + // Find the maximum offset of the end of a section in order to set p_filesz + // and p_memsz. When setting p_filesz, trailing SHT_NOBITS sections are not + // counted. + uint64_t FileOffset = PHeader.p_offset, MemOffset = PHeader.p_offset; + for (Elf_Shdr *SHeader : Sections) { + uint64_t End = SHeader->sh_offset + SHeader->sh_size; + MemOffset = std::max(MemOffset, End); + + if (SHeader->sh_type != llvm::ELF::SHT_NOBITS) + FileOffset = std::max(FileOffset, End); + } + + // Set the file size and the memory size if not set explicitly. + PHeader.p_filesz = YamlPhdr.FileSize ? uint64_t(*YamlPhdr.FileSize) + : FileOffset - PHeader.p_offset; + PHeader.p_memsz = YamlPhdr.MemSize ? uint64_t(*YamlPhdr.MemSize) + : MemOffset - PHeader.p_offset; + + if (YamlPhdr.Align) { + PHeader.p_align = *YamlPhdr.Align; + } else { + // Set the alignment of the segment to be the maximum alignment of the + // sections so that by default the segment has a valid and sensible + // alignment. + PHeader.p_align = 1; + for (Elf_Shdr *SHeader : Sections) + PHeader.p_align = std::max(PHeader.p_align, SHeader->sh_addralign); + } + } +} + +template +void ELFState::writeSectionContent( + Elf_Shdr &SHeader, const ELFYAML::RawContentSection &Section, + ContiguousBlobAccumulator &CBA) { + raw_ostream &OS = + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + SHeader.sh_size = writeContent(OS, Section.Content, Section.Size); + + if (Section.EntSize) + SHeader.sh_entsize = *Section.EntSize; + else if (Section.Type == llvm::ELF::SHT_RELR) + SHeader.sh_entsize = sizeof(Elf_Relr); + else + SHeader.sh_entsize = 0; + + if (Section.Info) + SHeader.sh_info = *Section.Info; +} + +static bool isMips64EL(const ELFYAML::Object &Doc) { + return Doc.Header.Machine == ELFYAML::ELF_EM(llvm::ELF::EM_MIPS) && + Doc.Header.Class == ELFYAML::ELF_ELFCLASS(ELF::ELFCLASS64) && + Doc.Header.Data == ELFYAML::ELF_ELFDATA(ELF::ELFDATA2LSB); +} + +template +void ELFState::writeSectionContent( + Elf_Shdr &SHeader, const ELFYAML::RelocationSection &Section, + ContiguousBlobAccumulator &CBA) { + assert((Section.Type == llvm::ELF::SHT_REL || + Section.Type == llvm::ELF::SHT_RELA) && + "Section type is not SHT_REL nor SHT_RELA"); + + bool IsRela = Section.Type == llvm::ELF::SHT_RELA; + SHeader.sh_entsize = IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); + SHeader.sh_size = SHeader.sh_entsize * Section.Relocations.size(); + + // For relocation section set link to .symtab by default. + if (Section.Link.empty()) + SHeader.sh_link = SN2I.get(".symtab"); + + if (!Section.RelocatableSec.empty()) + SHeader.sh_info = toSectionIndex(Section.RelocatableSec, Section.Name); + + auto &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + for (const auto &Rel : Section.Relocations) { + unsigned SymIdx = Rel.Symbol ? toSymbolIndex(*Rel.Symbol, Section.Name, + Section.Link == ".dynsym") + : 0; + if (IsRela) { + Elf_Rela REntry; + zero(REntry); + REntry.r_offset = Rel.Offset; + REntry.r_addend = Rel.Addend; + REntry.setSymbolAndType(SymIdx, Rel.Type, isMips64EL(Doc)); + OS.write((const char *)&REntry, sizeof(REntry)); + } else { + Elf_Rel REntry; + zero(REntry); + REntry.r_offset = Rel.Offset; + REntry.setSymbolAndType(SymIdx, Rel.Type, isMips64EL(Doc)); + OS.write((const char *)&REntry, sizeof(REntry)); + } + } +} + +template +void ELFState::writeSectionContent( + Elf_Shdr &SHeader, const ELFYAML::SymtabShndxSection &Shndx, + ContiguousBlobAccumulator &CBA) { + raw_ostream &OS = + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + + for (uint32_t E : Shndx.Entries) + support::endian::write(OS, E, ELFT::TargetEndianness); + + SHeader.sh_entsize = Shndx.EntSize ? (uint64_t)*Shndx.EntSize : 4; + SHeader.sh_size = Shndx.Entries.size() * SHeader.sh_entsize; +} + +template +void ELFState::writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::Group &Section, + ContiguousBlobAccumulator &CBA) { + assert(Section.Type == llvm::ELF::SHT_GROUP && + "Section type is not SHT_GROUP"); + + SHeader.sh_entsize = 4; + SHeader.sh_size = SHeader.sh_entsize * Section.Members.size(); + SHeader.sh_info = + toSymbolIndex(Section.Signature, Section.Name, /*IsDynamic=*/false); + + raw_ostream &OS = + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + + for (const ELFYAML::SectionOrType &Member : Section.Members) { + unsigned int SectionIndex = 0; + if (Member.sectionNameOrType == "GRP_COMDAT") + SectionIndex = llvm::ELF::GRP_COMDAT; + else + SectionIndex = toSectionIndex(Member.sectionNameOrType, Section.Name); + support::endian::write(OS, SectionIndex, ELFT::TargetEndianness); + } +} + +template +void ELFState::writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::SymverSection &Section, + ContiguousBlobAccumulator &CBA) { + raw_ostream &OS = + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + for (uint16_t Version : Section.Entries) + support::endian::write(OS, Version, ELFT::TargetEndianness); + + SHeader.sh_entsize = Section.EntSize ? (uint64_t)*Section.EntSize : 2; + SHeader.sh_size = Section.Entries.size() * SHeader.sh_entsize; +} + +template +void ELFState::writeSectionContent( + Elf_Shdr &SHeader, const ELFYAML::StackSizesSection &Section, + ContiguousBlobAccumulator &CBA) { + using uintX_t = typename ELFT::uint; + raw_ostream &OS = + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + + if (Section.Content || Section.Size) { + SHeader.sh_size = writeContent(OS, Section.Content, Section.Size); + return; + } + + for (const ELFYAML::StackSizeEntry &E : *Section.Entries) { + support::endian::write(OS, E.Address, ELFT::TargetEndianness); + SHeader.sh_size += sizeof(uintX_t) + encodeULEB128(E.Size, OS); + } +} + +template +void ELFState::writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::HashSection &Section, + ContiguousBlobAccumulator &CBA) { + raw_ostream &OS = + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + + unsigned Link = 0; + if (Section.Link.empty() && SN2I.lookup(".dynsym", Link)) + SHeader.sh_link = Link; + + if (Section.Content || Section.Size) { + SHeader.sh_size = writeContent(OS, Section.Content, Section.Size); + return; + } + + support::endian::write(OS, Section.Bucket->size(), + ELFT::TargetEndianness); + support::endian::write(OS, Section.Chain->size(), + ELFT::TargetEndianness); + for (uint32_t Val : *Section.Bucket) + support::endian::write(OS, Val, ELFT::TargetEndianness); + for (uint32_t Val : *Section.Chain) + support::endian::write(OS, Val, ELFT::TargetEndianness); + + SHeader.sh_size = (2 + Section.Bucket->size() + Section.Chain->size()) * 4; +} + +template +void ELFState::writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::VerdefSection &Section, + ContiguousBlobAccumulator &CBA) { + typedef typename ELFT::Verdef Elf_Verdef; + typedef typename ELFT::Verdaux Elf_Verdaux; + raw_ostream &OS = + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + + uint64_t AuxCnt = 0; + for (size_t I = 0; I < Section.Entries.size(); ++I) { + const ELFYAML::VerdefEntry &E = Section.Entries[I]; + + Elf_Verdef VerDef; + VerDef.vd_version = E.Version; + VerDef.vd_flags = E.Flags; + VerDef.vd_ndx = E.VersionNdx; + VerDef.vd_hash = E.Hash; + VerDef.vd_aux = sizeof(Elf_Verdef); + VerDef.vd_cnt = E.VerNames.size(); + if (I == Section.Entries.size() - 1) + VerDef.vd_next = 0; + else + VerDef.vd_next = + sizeof(Elf_Verdef) + E.VerNames.size() * sizeof(Elf_Verdaux); + OS.write((const char *)&VerDef, sizeof(Elf_Verdef)); + + for (size_t J = 0; J < E.VerNames.size(); ++J, ++AuxCnt) { + Elf_Verdaux VernAux; + VernAux.vda_name = DotDynstr.getOffset(E.VerNames[J]); + if (J == E.VerNames.size() - 1) + VernAux.vda_next = 0; + else + VernAux.vda_next = sizeof(Elf_Verdaux); + OS.write((const char *)&VernAux, sizeof(Elf_Verdaux)); + } + } + + SHeader.sh_size = Section.Entries.size() * sizeof(Elf_Verdef) + + AuxCnt * sizeof(Elf_Verdaux); + SHeader.sh_info = Section.Info; +} + +template +void ELFState::writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::VerneedSection &Section, + ContiguousBlobAccumulator &CBA) { + typedef typename ELFT::Verneed Elf_Verneed; + typedef typename ELFT::Vernaux Elf_Vernaux; + + auto &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + + uint64_t AuxCnt = 0; + for (size_t I = 0; I < Section.VerneedV.size(); ++I) { + const ELFYAML::VerneedEntry &VE = Section.VerneedV[I]; + + Elf_Verneed VerNeed; + VerNeed.vn_version = VE.Version; + VerNeed.vn_file = DotDynstr.getOffset(VE.File); + if (I == Section.VerneedV.size() - 1) + VerNeed.vn_next = 0; + else + VerNeed.vn_next = + sizeof(Elf_Verneed) + VE.AuxV.size() * sizeof(Elf_Vernaux); + VerNeed.vn_cnt = VE.AuxV.size(); + VerNeed.vn_aux = sizeof(Elf_Verneed); + OS.write((const char *)&VerNeed, sizeof(Elf_Verneed)); + + for (size_t J = 0; J < VE.AuxV.size(); ++J, ++AuxCnt) { + const ELFYAML::VernauxEntry &VAuxE = VE.AuxV[J]; + + Elf_Vernaux VernAux; + VernAux.vna_hash = VAuxE.Hash; + VernAux.vna_flags = VAuxE.Flags; + VernAux.vna_other = VAuxE.Other; + VernAux.vna_name = DotDynstr.getOffset(VAuxE.Name); + if (J == VE.AuxV.size() - 1) + VernAux.vna_next = 0; + else + VernAux.vna_next = sizeof(Elf_Vernaux); + OS.write((const char *)&VernAux, sizeof(Elf_Vernaux)); + } + } + + SHeader.sh_size = Section.VerneedV.size() * sizeof(Elf_Verneed) + + AuxCnt * sizeof(Elf_Vernaux); + SHeader.sh_info = Section.Info; +} + +template +void ELFState::writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::MipsABIFlags &Section, + ContiguousBlobAccumulator &CBA) { + assert(Section.Type == llvm::ELF::SHT_MIPS_ABIFLAGS && + "Section type is not SHT_MIPS_ABIFLAGS"); + + object::Elf_Mips_ABIFlags Flags; + zero(Flags); + SHeader.sh_entsize = sizeof(Flags); + SHeader.sh_size = SHeader.sh_entsize; + + auto &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + Flags.version = Section.Version; + Flags.isa_level = Section.ISALevel; + Flags.isa_rev = Section.ISARevision; + Flags.gpr_size = Section.GPRSize; + Flags.cpr1_size = Section.CPR1Size; + Flags.cpr2_size = Section.CPR2Size; + Flags.fp_abi = Section.FpABI; + Flags.isa_ext = Section.ISAExtension; + Flags.ases = Section.ASEs; + Flags.flags1 = Section.Flags1; + Flags.flags2 = Section.Flags2; + OS.write((const char *)&Flags, sizeof(Flags)); +} + +template +void ELFState::writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::DynamicSection &Section, + ContiguousBlobAccumulator &CBA) { + typedef typename ELFT::uint uintX_t; + + assert(Section.Type == llvm::ELF::SHT_DYNAMIC && + "Section type is not SHT_DYNAMIC"); + + if (!Section.Entries.empty() && Section.Content) + reportError("cannot specify both raw content and explicit entries " + "for dynamic section '" + + Section.Name + "'"); + + if (Section.Content) + SHeader.sh_size = Section.Content->binary_size(); + else + SHeader.sh_size = 2 * sizeof(uintX_t) * Section.Entries.size(); + if (Section.EntSize) + SHeader.sh_entsize = *Section.EntSize; + else + SHeader.sh_entsize = sizeof(Elf_Dyn); + + raw_ostream &OS = + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + for (const ELFYAML::DynamicEntry &DE : Section.Entries) { + support::endian::write(OS, DE.Tag, ELFT::TargetEndianness); + support::endian::write(OS, DE.Val, ELFT::TargetEndianness); + } + if (Section.Content) + Section.Content->writeAsBinary(OS); +} + +template +void ELFState::writeSectionContent(Elf_Shdr &SHeader, + const ELFYAML::AddrsigSection &Section, + ContiguousBlobAccumulator &CBA) { + raw_ostream &OS = + CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + + unsigned Link = 0; + if (Section.Link.empty() && SN2I.lookup(".symtab", Link)) + SHeader.sh_link = Link; + + if (Section.Content || Section.Size) { + SHeader.sh_size = writeContent(OS, Section.Content, Section.Size); + return; + } + + for (const ELFYAML::AddrsigSymbol &Sym : *Section.Symbols) { + uint64_t Val = + Sym.Name ? toSymbolIndex(*Sym.Name, Section.Name, /*IsDynamic=*/false) + : (uint32_t)*Sym.Index; + SHeader.sh_size += encodeULEB128(Val, OS); + } +} + +template void ELFState::buildSectionIndex() { + for (unsigned I = 0, E = Doc.Sections.size(); I != E; ++I) { + StringRef Name = Doc.Sections[I]->Name; + if (Name.empty()) + continue; + + DotShStrtab.add(ELFYAML::dropUniqueSuffix(Name)); + if (!SN2I.addName(Name, I)) + reportError("repeated section name: '" + Name + + "' at YAML section number " + Twine(I)); + } + + DotShStrtab.finalize(); +} + +template void ELFState::buildSymbolIndexes() { + auto Build = [this](ArrayRef V, NameToIdxMap &Map) { + for (size_t I = 0, S = V.size(); I < S; ++I) { + const ELFYAML::Symbol &Sym = V[I]; + if (!Sym.Name.empty() && !Map.addName(Sym.Name, I + 1)) + reportError("repeated symbol name: '" + Sym.Name + "'"); + } + }; + + if (Doc.Symbols) + Build(*Doc.Symbols, SymN2I); + Build(Doc.DynamicSymbols, DynSymN2I); +} + +template void ELFState::finalizeStrings() { + // Add the regular symbol names to .strtab section. + if (Doc.Symbols) + for (const ELFYAML::Symbol &Sym : *Doc.Symbols) + DotStrtab.add(ELFYAML::dropUniqueSuffix(Sym.Name)); + DotStrtab.finalize(); + + // Add the dynamic symbol names to .dynstr section. + for (const ELFYAML::Symbol &Sym : Doc.DynamicSymbols) + DotDynstr.add(ELFYAML::dropUniqueSuffix(Sym.Name)); + + // SHT_GNU_verdef and SHT_GNU_verneed sections might also + // add strings to .dynstr section. + for (const std::unique_ptr &Sec : Doc.Sections) { + if (auto VerNeed = dyn_cast(Sec.get())) { + for (const ELFYAML::VerneedEntry &VE : VerNeed->VerneedV) { + DotDynstr.add(VE.File); + for (const ELFYAML::VernauxEntry &Aux : VE.AuxV) + DotDynstr.add(Aux.Name); + } + } else if (auto VerDef = dyn_cast(Sec.get())) { + for (const ELFYAML::VerdefEntry &E : VerDef->Entries) + for (StringRef Name : E.VerNames) + DotDynstr.add(Name); + } + } + + DotDynstr.finalize(); +} + +template +bool ELFState::writeELF(raw_ostream &OS, ELFYAML::Object &Doc, + yaml::ErrorHandler EH) { + ELFState State(Doc, EH); + + // Finalize .strtab and .dynstr sections. We do that early because want to + // finalize the string table builders before writing the content of the + // sections that might want to use them. + State.finalizeStrings(); + + State.buildSectionIndex(); + State.buildSymbolIndexes(); + + std::vector PHeaders; + State.initProgramHeaders(PHeaders); + + // XXX: This offset is tightly coupled with the order that we write + // things to `OS`. + const size_t SectionContentBeginOffset = + sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * Doc.ProgramHeaders.size(); + ContiguousBlobAccumulator CBA(SectionContentBeginOffset); + + std::vector SHeaders; + State.initSectionHeaders(SHeaders, CBA); + + // Now we can decide segment offsets + State.setProgramHeaderLayout(PHeaders, SHeaders); + + if (State.HasError) + return false; + + State.writeELFHeader(CBA, OS); + writeArrayData(OS, makeArrayRef(PHeaders)); + CBA.writeBlobToStream(OS); + writeArrayData(OS, makeArrayRef(SHeaders)); + return true; +} + +namespace llvm { +namespace yaml { + +bool yaml2elf(llvm::ELFYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH) { + bool IsLE = Doc.Header.Data == ELFYAML::ELF_ELFDATA(ELF::ELFDATA2LSB); + bool Is64Bit = Doc.Header.Class == ELFYAML::ELF_ELFCLASS(ELF::ELFCLASS64); + if (Is64Bit) { + if (IsLE) + return ELFState::writeELF(Out, Doc, EH); + return ELFState::writeELF(Out, Doc, EH); + } + if (IsLE) + return ELFState::writeELF(Out, Doc, EH); + return ELFState::writeELF(Out, Doc, EH); +} + +} // namespace yaml +} // namespace llvm diff --git a/lib/ObjectYAML/ELFYAML.cpp b/lib/ObjectYAML/ELFYAML.cpp index 7497154c757..29585abe6e8 100644 --- a/lib/ObjectYAML/ELFYAML.cpp +++ b/lib/ObjectYAML/ELFYAML.cpp @@ -11,12 +11,14 @@ //===----------------------------------------------------------------------===// #include "llvm/ObjectYAML/ELFYAML.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MipsABIFlags.h" #include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/WithColor.h" #include #include @@ -50,6 +52,8 @@ void ScalarEnumerationTraits::enumeration( ECase(PT_PHDR); ECase(PT_TLS); ECase(PT_GNU_EH_FRAME); + ECase(PT_GNU_STACK); + ECase(PT_GNU_RELRO); #undef ECase IO.enumFallback(Value); } @@ -217,6 +221,7 @@ void ScalarEnumerationTraits::enumeration( ECase(EM_LANAI); ECase(EM_BPF); #undef ECase + IO.enumFallback(Value); } void ScalarEnumerationTraits::enumeration( @@ -459,6 +464,9 @@ void ScalarEnumerationTraits::enumeration( ECase(SHT_LLVM_CALL_GRAPH_PROFILE); ECase(SHT_LLVM_ADDRSIG); ECase(SHT_LLVM_DEPENDENT_LIBRARIES); + ECase(SHT_LLVM_SYMPART); + ECase(SHT_LLVM_PART_EHDR); + ECase(SHT_LLVM_PART_PHDR); ECase(SHT_GNU_ATTRIBUTES); ECase(SHT_GNU_HASH); ECase(SHT_GNU_verdef); @@ -563,7 +571,7 @@ void ScalarEnumerationTraits::enumeration( ECase(SHN_HEXAGON_SCOMMON_4); ECase(SHN_HEXAGON_SCOMMON_8); #undef ECase - IO.enumFallback(Value); + IO.enumFallback(Value); } void ScalarEnumerationTraits::enumeration( @@ -592,34 +600,6 @@ void ScalarEnumerationTraits::enumeration( IO.enumFallback(Value); } -void ScalarEnumerationTraits::enumeration( - IO &IO, ELFYAML::ELF_STV &Value) { -#define ECase(X) IO.enumCase(Value, #X, ELF::X) - ECase(STV_DEFAULT); - ECase(STV_INTERNAL); - ECase(STV_HIDDEN); - ECase(STV_PROTECTED); -#undef ECase -} - -void ScalarBitSetTraits::bitset(IO &IO, - ELFYAML::ELF_STO &Value) { - const auto *Object = static_cast(IO.getContext()); - assert(Object && "The IO context is not initialized"); -#define BCase(X) IO.bitSetCase(Value, #X, ELF::X) - switch (Object->Header.Machine) { - case ELF::EM_MIPS: - BCase(STO_MIPS_OPTIONAL); - BCase(STO_MIPS_PLT); - BCase(STO_MIPS_PIC); - BCase(STO_MIPS_MICROMIPS); - break; - default: - break; // Nothing to do - } -#undef BCase -#undef BCaseMask -} void ScalarEnumerationTraits::enumeration( IO &IO, ELFYAML::ELF_RSS &Value) { @@ -671,8 +651,12 @@ void ScalarEnumerationTraits::enumeration( case ELF::EM_BPF: #include "llvm/BinaryFormat/ELFRelocs/BPF.def" break; + case ELF::EM_PPC64: +#include "llvm/BinaryFormat/ELFRelocs/PowerPC64.def" + break; default: - llvm_unreachable("Unsupported architecture"); + // Nothing to do. + break; } #undef ELF_RELOC IO.enumFallback(Value); @@ -845,7 +829,7 @@ void MappingTraits::mapping(IO &IO, IO.mapOptional("Entry", FileHdr.Entry, Hex64(0)); IO.mapOptional("SHEntSize", FileHdr.SHEntSize); - IO.mapOptional("SHOffset", FileHdr.SHOffset); + IO.mapOptional("SHOff", FileHdr.SHOff); IO.mapOptional("SHNum", FileHdr.SHNum); IO.mapOptional("SHStrNdx", FileHdr.SHStrNdx); } @@ -863,18 +847,111 @@ void MappingTraits::mapping( IO.mapOptional("Offset", Phdr.Offset); } +LLVM_YAML_STRONG_TYPEDEF(StringRef, StOtherPiece) + +template <> struct ScalarTraits { + static void output(const StOtherPiece &Val, void *, raw_ostream &Out) { + Out << Val; + } + static StringRef input(StringRef Scalar, void *, StOtherPiece &Val) { + Val = Scalar; + return {}; + } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } +}; +template <> struct SequenceElementTraits { + static const bool flow = true; +}; + namespace { struct NormalizedOther { - NormalizedOther(IO &) - : Visibility(ELFYAML::ELF_STV(0)), Other(ELFYAML::ELF_STO(0)) {} - NormalizedOther(IO &, uint8_t Original) - : Visibility(Original & 0x3), Other(Original & ~0x3) {} + NormalizedOther(IO &IO) : YamlIO(IO) {} + NormalizedOther(IO &IO, Optional Original) : YamlIO(IO) { + assert(Original && "This constructor is only used for outputting YAML and " + "assumes a non-empty Original"); + std::vector Ret; + const auto *Object = static_cast(YamlIO.getContext()); + for (std::pair &P : + getFlags(Object->Header.Machine).takeVector()) { + uint8_t FlagValue = P.second; + if ((*Original & FlagValue) != FlagValue) + continue; + *Original &= ~FlagValue; + Ret.push_back({P.first}); + } - uint8_t denormalize(IO &) { return Visibility | Other; } + if (*Original != 0) { + UnknownFlagsHolder = std::to_string(*Original); + Ret.push_back({UnknownFlagsHolder}); + } - ELFYAML::ELF_STV Visibility; - ELFYAML::ELF_STO Other; + if (!Ret.empty()) + Other = std::move(Ret); + } + + uint8_t toValue(StringRef Name) { + const auto *Object = static_cast(YamlIO.getContext()); + MapVector Flags = getFlags(Object->Header.Machine); + + auto It = Flags.find(Name); + if (It != Flags.end()) + return It->second; + + uint8_t Val; + if (to_integer(Name, Val)) + return Val; + + YamlIO.setError("an unknown value is used for symbol's 'Other' field: " + + Name); + return 0; + } + + Optional denormalize(IO &) { + if (!Other) + return None; + uint8_t Ret = 0; + for (StOtherPiece &Val : *Other) + Ret |= toValue(Val); + return Ret; + } + + // st_other field is used to encode symbol visibility and platform-dependent + // flags and values. This method returns a name to value map that is used for + // parsing and encoding this field. + MapVector getFlags(unsigned EMachine) { + MapVector Map; + // STV_* values are just enumeration values. We add them in a reversed order + // because when we convert the st_other to named constants when printing + // YAML we want to use a maximum number of bits on each step: + // when we have st_other == 3, we want to print it as STV_PROTECTED (3), but + // not as STV_HIDDEN (2) + STV_INTERNAL (1). + Map["STV_PROTECTED"] = ELF::STV_PROTECTED; + Map["STV_HIDDEN"] = ELF::STV_HIDDEN; + Map["STV_INTERNAL"] = ELF::STV_INTERNAL; + // STV_DEFAULT is used to represent the default visibility and has a value + // 0. We want to be able to read it from YAML documents, but there is no + // reason to print it. + if (!YamlIO.outputting()) + Map["STV_DEFAULT"] = ELF::STV_DEFAULT; + + // MIPS is not consistent. All of the STO_MIPS_* values are bit flags, + // except STO_MIPS_MIPS16 which overlaps them. It should be checked and + // consumed first when we print the output, because we do not want to print + // any other flags that have the same bits instead. + if (EMachine == ELF::EM_MIPS) { + Map["STO_MIPS_MIPS16"] = ELF::STO_MIPS_MIPS16; + Map["STO_MIPS_MICROMIPS"] = ELF::STO_MIPS_MICROMIPS; + Map["STO_MIPS_PIC"] = ELF::STO_MIPS_PIC; + Map["STO_MIPS_PLT"] = ELF::STO_MIPS_PLT; + Map["STO_MIPS_OPTIONAL"] = ELF::STO_MIPS_OPTIONAL; + } + return Map; + } + + IO &YamlIO; + Optional> Other; + std::string UnknownFlagsHolder; }; } // end anonymous namespace @@ -888,17 +965,21 @@ void MappingTraits::mapping(IO &IO, ELFYAML::Symbol &Symbol) { IO.mapOptional("Binding", Symbol.Binding, ELFYAML::ELF_STB(0)); IO.mapOptional("Value", Symbol.Value, Hex64(0)); IO.mapOptional("Size", Symbol.Size, Hex64(0)); - MappingNormalization Keys(IO, Symbol.Other); - IO.mapOptional("Visibility", Keys->Visibility, ELFYAML::ELF_STV(0)); - IO.mapOptional("Other", Keys->Other, ELFYAML::ELF_STO(0)); + + // Symbol's Other field is a bit special. It is usually a field that + // represents st_other and holds the symbol visibility. However, on some + // platforms, it can contain bit fields and regular values, or even sometimes a + // crazy mix of them (see comments for NormalizedOther). Because of this, we + // need special handling. + MappingNormalization> Keys(IO, + Symbol.Other); + IO.mapOptional("Other", Keys->Other); } StringRef MappingTraits::validate(IO &IO, ELFYAML::Symbol &Symbol) { if (Symbol.Index && Symbol.Section.data()) return "Index and Section cannot both be specified for Symbol"; - if (Symbol.Index && *Symbol.Index == ELFYAML::ELF_SHN(ELF::SHN_XINDEX)) - return "Large indexes are not supported"; if (Symbol.NameIndex && !Symbol.Name.empty()) return "Name and NameIndex cannot both be specified for Symbol"; return StringRef(); @@ -914,10 +995,11 @@ static void commonSectionMapping(IO &IO, ELFYAML::Section &Section) { IO.mapOptional("EntSize", Section.EntSize); // obj2yaml does not dump these fields. They are expected to be empty when we - // are producing YAML, because yaml2obj sets appropriate values for sh_offset - // and sh_size automatically when they are not explicitly defined. + // are producing YAML, because yaml2obj sets appropriate values for them + // automatically when they are not explicitly defined. assert(!IO.outputting() || (!Section.ShOffset.hasValue() && !Section.ShSize.hasValue())); + IO.mapOptional("ShName", Section.ShName); IO.mapOptional("ShOffset", Section.ShOffset); IO.mapOptional("ShSize", Section.ShSize); } @@ -935,6 +1017,21 @@ static void sectionMapping(IO &IO, ELFYAML::RawContentSection &Section) { IO.mapOptional("Info", Section.Info); } +static void sectionMapping(IO &IO, ELFYAML::StackSizesSection &Section) { + commonSectionMapping(IO, Section); + IO.mapOptional("Content", Section.Content); + IO.mapOptional("Size", Section.Size); + IO.mapOptional("Entries", Section.Entries); +} + +static void sectionMapping(IO &IO, ELFYAML::HashSection &Section) { + commonSectionMapping(IO, Section); + IO.mapOptional("Content", Section.Content); + IO.mapOptional("Bucket", Section.Bucket); + IO.mapOptional("Chain", Section.Chain); + IO.mapOptional("Size", Section.Size); +} + static void sectionMapping(IO &IO, ELFYAML::NoBitsSection &Section) { commonSectionMapping(IO, Section); IO.mapOptional("Size", Section.Size, Hex64(0)); @@ -969,6 +1066,18 @@ static void groupSectionMapping(IO &IO, ELFYAML::Group &Group) { IO.mapRequired("Members", Group.Members); } +static void sectionMapping(IO &IO, ELFYAML::SymtabShndxSection &Section) { + commonSectionMapping(IO, Section); + IO.mapRequired("Entries", Section.Entries); +} + +static void sectionMapping(IO &IO, ELFYAML::AddrsigSection &Section) { + commonSectionMapping(IO, Section); + IO.mapOptional("Content", Section.Content); + IO.mapOptional("Size", Section.Size); + IO.mapOptional("Symbols", Section.Symbols); +} + void MappingTraits::mapping( IO &IO, ELFYAML::SectionOrType §ionOrType) { IO.mapRequired("SectionOrType", sectionOrType.sectionNameOrType); @@ -1029,6 +1138,11 @@ void MappingTraits>::mapping( Section.reset(new ELFYAML::NoBitsSection()); sectionMapping(IO, *cast(Section.get())); break; + case ELF::SHT_HASH: + if (!IO.outputting()) + Section.reset(new ELFYAML::HashSection()); + sectionMapping(IO, *cast(Section.get())); + break; case ELF::SHT_MIPS_ABIFLAGS: if (!IO.outputting()) Section.reset(new ELFYAML::MipsABIFlags()); @@ -1049,21 +1163,113 @@ void MappingTraits>::mapping( Section.reset(new ELFYAML::VerneedSection()); sectionMapping(IO, *cast(Section.get())); break; - default: + case ELF::SHT_SYMTAB_SHNDX: if (!IO.outputting()) - Section.reset(new ELFYAML::RawContentSection()); - sectionMapping(IO, *cast(Section.get())); + Section.reset(new ELFYAML::SymtabShndxSection()); + sectionMapping(IO, *cast(Section.get())); + break; + case ELF::SHT_LLVM_ADDRSIG: + if (!IO.outputting()) + Section.reset(new ELFYAML::AddrsigSection()); + sectionMapping(IO, *cast(Section.get())); + break; + default: + if (!IO.outputting()) { + StringRef Name; + IO.mapOptional("Name", Name, StringRef()); + Name = ELFYAML::dropUniqueSuffix(Name); + + if (ELFYAML::StackSizesSection::nameMatches(Name)) + Section = std::make_unique(); + else + Section = std::make_unique(); + } + + if (auto S = dyn_cast(Section.get())) + sectionMapping(IO, *S); + else + sectionMapping(IO, *cast(Section.get())); } } StringRef MappingTraits>::validate( IO &io, std::unique_ptr &Section) { - const auto *RawSection = dyn_cast(Section.get()); - if (!RawSection) + if (const auto *RawSection = + dyn_cast(Section.get())) { + if (RawSection->Size && RawSection->Content && + (uint64_t)(*RawSection->Size) < RawSection->Content->binary_size()) + return "Section size must be greater than or equal to the content size"; return {}; - if (RawSection->Size && RawSection->Content && - (uint64_t)(*RawSection->Size) < RawSection->Content->binary_size()) - return "Section size must be greater than or equal to the content size"; + } + + if (const auto *SS = dyn_cast(Section.get())) { + if (!SS->Entries && !SS->Content && !SS->Size) + return ".stack_sizes: one of Content, Entries and Size must be specified"; + + if (SS->Size && SS->Content && + (uint64_t)(*SS->Size) < SS->Content->binary_size()) + return ".stack_sizes: Size must be greater than or equal to the content " + "size"; + + // We accept Content, Size or both together when there are no Entries. + if (!SS->Entries) + return {}; + + if (SS->Size) + return ".stack_sizes: Size and Entries cannot be used together"; + if (SS->Content) + return ".stack_sizes: Content and Entries cannot be used together"; + return {}; + } + + if (const auto *HS = dyn_cast(Section.get())) { + if (!HS->Content && !HS->Bucket && !HS->Chain && !HS->Size) + return "one of \"Content\", \"Size\", \"Bucket\" or \"Chain\" must be " + "specified"; + + if (HS->Content || HS->Size) { + if (HS->Size && HS->Content && + (uint64_t)*HS->Size < HS->Content->binary_size()) + return "\"Size\" must be greater than or equal to the content " + "size"; + + if (HS->Bucket) + return "\"Bucket\" cannot be used with \"Content\" or \"Size\""; + if (HS->Chain) + return "\"Chain\" cannot be used with \"Content\" or \"Size\""; + return {}; + } + + if ((HS->Bucket && !HS->Chain) || (!HS->Bucket && HS->Chain)) + return "\"Bucket\" and \"Chain\" must be used together"; + return {}; + } + + if (const auto *Sec = dyn_cast(Section.get())) { + if (!Sec->Symbols && !Sec->Content && !Sec->Size) + return "one of \"Content\", \"Size\" or \"Symbols\" must be specified"; + + if (Sec->Content || Sec->Size) { + if (Sec->Size && Sec->Content && + (uint64_t)*Sec->Size < Sec->Content->binary_size()) + return "\"Size\" must be greater than or equal to the content " + "size"; + + if (Sec->Symbols) + return "\"Symbols\" cannot be used with \"Content\" or \"Size\""; + return {}; + } + + if (!Sec->Symbols) + return {}; + + for (const ELFYAML::AddrsigSymbol &AS : *Sec->Symbols) + if (AS.Index && AS.Name) + return "\"Index\" and \"Name\" cannot be used together when defining a " + "symbol"; + return {}; + } + return {}; } @@ -1092,6 +1298,13 @@ struct NormalizedMips64RelType { } // end anonymous namespace +void MappingTraits::mapping( + IO &IO, ELFYAML::StackSizeEntry &E) { + assert(IO.getContext() && "The IO context is not initialized"); + IO.mapOptional("Address", E.Address, Hex64(0)); + IO.mapRequired("Size", E.Size); +} + void MappingTraits::mapping(IO &IO, ELFYAML::DynamicEntry &Rel) { assert(IO.getContext() && "The IO context is not initialized"); @@ -1164,6 +1377,12 @@ void MappingTraits::mapping(IO &IO, ELFYAML::Object &Object) { IO.setContext(nullptr); } +void MappingTraits::mapping(IO &IO, ELFYAML::AddrsigSymbol &Sym) { + assert(IO.getContext() && "The IO context is not initialized"); + IO.mapOptional("Name", Sym.Name); + IO.mapOptional("Index", Sym.Index); +} + LLVM_YAML_STRONG_TYPEDEF(uint8_t, MIPS_AFL_REG) LLVM_YAML_STRONG_TYPEDEF(uint8_t, MIPS_ABI_FP) LLVM_YAML_STRONG_TYPEDEF(uint32_t, MIPS_AFL_EXT) diff --git a/lib/ObjectYAML/MachOEmitter.cpp b/lib/ObjectYAML/MachOEmitter.cpp new file mode 100644 index 00000000000..b56f811ce67 --- /dev/null +++ b/lib/ObjectYAML/MachOEmitter.cpp @@ -0,0 +1,580 @@ +//===- yaml2macho - Convert YAML to a Mach object file --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// The Mach component of yaml2obj. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/ObjectYAML/DWARFEmitter.h" +#include "llvm/ObjectYAML/ObjectYAML.h" +#include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" + +#include "llvm/Support/Format.h" + +using namespace llvm; + +namespace { + +class MachOWriter { +public: + MachOWriter(MachOYAML::Object &Obj) : Obj(Obj), is64Bit(true), fileStart(0) { + is64Bit = Obj.Header.magic == MachO::MH_MAGIC_64 || + Obj.Header.magic == MachO::MH_CIGAM_64; + memset(reinterpret_cast(&Header), 0, sizeof(MachO::mach_header_64)); + } + + void writeMachO(raw_ostream &OS); + +private: + void writeHeader(raw_ostream &OS); + void writeLoadCommands(raw_ostream &OS); + void writeSectionData(raw_ostream &OS); + void writeLinkEditData(raw_ostream &OS); + + void writeBindOpcodes(raw_ostream &OS, + std::vector &BindOpcodes); + // LinkEdit writers + void writeRebaseOpcodes(raw_ostream &OS); + void writeBasicBindOpcodes(raw_ostream &OS); + void writeWeakBindOpcodes(raw_ostream &OS); + void writeLazyBindOpcodes(raw_ostream &OS); + void writeNameList(raw_ostream &OS); + void writeStringTable(raw_ostream &OS); + void writeExportTrie(raw_ostream &OS); + + void dumpExportEntry(raw_ostream &OS, MachOYAML::ExportEntry &Entry); + void ZeroToOffset(raw_ostream &OS, size_t offset); + + MachOYAML::Object &Obj; + bool is64Bit; + uint64_t fileStart; + + MachO::mach_header_64 Header; +}; + +void MachOWriter::writeMachO(raw_ostream &OS) { + fileStart = OS.tell(); + writeHeader(OS); + writeLoadCommands(OS); + writeSectionData(OS); +} + +void MachOWriter::writeHeader(raw_ostream &OS) { + Header.magic = Obj.Header.magic; + Header.cputype = Obj.Header.cputype; + Header.cpusubtype = Obj.Header.cpusubtype; + Header.filetype = Obj.Header.filetype; + Header.ncmds = Obj.Header.ncmds; + Header.sizeofcmds = Obj.Header.sizeofcmds; + Header.flags = Obj.Header.flags; + Header.reserved = Obj.Header.reserved; + + if (Obj.IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(Header); + + auto header_size = + is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); + OS.write((const char *)&Header, header_size); +} + +template +SectionType constructSection(MachOYAML::Section Sec) { + SectionType TempSec; + memcpy(reinterpret_cast(&TempSec.sectname[0]), &Sec.sectname[0], 16); + memcpy(reinterpret_cast(&TempSec.segname[0]), &Sec.segname[0], 16); + TempSec.addr = Sec.addr; + TempSec.size = Sec.size; + TempSec.offset = Sec.offset; + TempSec.align = Sec.align; + TempSec.reloff = Sec.reloff; + TempSec.nreloc = Sec.nreloc; + TempSec.flags = Sec.flags; + TempSec.reserved1 = Sec.reserved1; + TempSec.reserved2 = Sec.reserved2; + return TempSec; +} + +template +size_t writeLoadCommandData(MachOYAML::LoadCommand &LC, raw_ostream &OS, + bool IsLittleEndian) { + return 0; +} + +template <> +size_t writeLoadCommandData(MachOYAML::LoadCommand &LC, + raw_ostream &OS, + bool IsLittleEndian) { + size_t BytesWritten = 0; + for (const auto &Sec : LC.Sections) { + auto TempSec = constructSection(Sec); + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(TempSec); + OS.write(reinterpret_cast(&(TempSec)), + sizeof(MachO::section)); + BytesWritten += sizeof(MachO::section); + } + return BytesWritten; +} + +template <> +size_t writeLoadCommandData( + MachOYAML::LoadCommand &LC, raw_ostream &OS, bool IsLittleEndian) { + size_t BytesWritten = 0; + for (const auto &Sec : LC.Sections) { + auto TempSec = constructSection(Sec); + TempSec.reserved3 = Sec.reserved3; + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(TempSec); + OS.write(reinterpret_cast(&(TempSec)), + sizeof(MachO::section_64)); + BytesWritten += sizeof(MachO::section_64); + } + return BytesWritten; +} + +size_t writePayloadString(MachOYAML::LoadCommand &LC, raw_ostream &OS) { + size_t BytesWritten = 0; + if (!LC.PayloadString.empty()) { + OS.write(LC.PayloadString.c_str(), LC.PayloadString.length()); + BytesWritten = LC.PayloadString.length(); + } + return BytesWritten; +} + +template <> +size_t writeLoadCommandData(MachOYAML::LoadCommand &LC, + raw_ostream &OS, + bool IsLittleEndian) { + return writePayloadString(LC, OS); +} + +template <> +size_t writeLoadCommandData(MachOYAML::LoadCommand &LC, + raw_ostream &OS, + bool IsLittleEndian) { + return writePayloadString(LC, OS); +} + +template <> +size_t writeLoadCommandData(MachOYAML::LoadCommand &LC, + raw_ostream &OS, + bool IsLittleEndian) { + return writePayloadString(LC, OS); +} + +template <> +size_t writeLoadCommandData( + MachOYAML::LoadCommand &LC, raw_ostream &OS, bool IsLittleEndian) { + size_t BytesWritten = 0; + for (const auto &T : LC.Tools) { + struct MachO::build_tool_version tool = T; + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(tool); + OS.write(reinterpret_cast(&tool), + sizeof(MachO::build_tool_version)); + BytesWritten += sizeof(MachO::build_tool_version); + } + return BytesWritten; +} + +void ZeroFillBytes(raw_ostream &OS, size_t Size) { + std::vector FillData; + FillData.insert(FillData.begin(), Size, 0); + OS.write(reinterpret_cast(FillData.data()), Size); +} + +void Fill(raw_ostream &OS, size_t Size, uint32_t Data) { + std::vector FillData; + FillData.insert(FillData.begin(), (Size / 4) + 1, Data); + OS.write(reinterpret_cast(FillData.data()), Size); +} + +void MachOWriter::ZeroToOffset(raw_ostream &OS, size_t Offset) { + auto currOffset = OS.tell() - fileStart; + if (currOffset < Offset) + ZeroFillBytes(OS, Offset - currOffset); +} + +void MachOWriter::writeLoadCommands(raw_ostream &OS) { + for (auto &LC : Obj.LoadCommands) { + size_t BytesWritten = 0; + llvm::MachO::macho_load_command Data = LC.Data; + +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ + case MachO::LCName: \ + if (Obj.IsLittleEndian != sys::IsLittleEndianHost) \ + MachO::swapStruct(Data.LCStruct##_data); \ + OS.write(reinterpret_cast(&(Data.LCStruct##_data)), \ + sizeof(MachO::LCStruct)); \ + BytesWritten = sizeof(MachO::LCStruct); \ + BytesWritten += \ + writeLoadCommandData(LC, OS, Obj.IsLittleEndian); \ + break; + + switch (LC.Data.load_command_data.cmd) { + default: + if (Obj.IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(Data.load_command_data); + OS.write(reinterpret_cast(&(Data.load_command_data)), + sizeof(MachO::load_command)); + BytesWritten = sizeof(MachO::load_command); + BytesWritten += + writeLoadCommandData(LC, OS, Obj.IsLittleEndian); + break; +#include "llvm/BinaryFormat/MachO.def" + } + + if (LC.PayloadBytes.size() > 0) { + OS.write(reinterpret_cast(LC.PayloadBytes.data()), + LC.PayloadBytes.size()); + BytesWritten += LC.PayloadBytes.size(); + } + + if (LC.ZeroPadBytes > 0) { + ZeroFillBytes(OS, LC.ZeroPadBytes); + BytesWritten += LC.ZeroPadBytes; + } + + // Fill remaining bytes with 0. This will only get hit in partially + // specified test cases. + auto BytesRemaining = LC.Data.load_command_data.cmdsize - BytesWritten; + if (BytesRemaining > 0) { + ZeroFillBytes(OS, BytesRemaining); + } + } +} + +void MachOWriter::writeSectionData(raw_ostream &OS) { + bool FoundLinkEditSeg = false; + for (auto &LC : Obj.LoadCommands) { + switch (LC.Data.load_command_data.cmd) { + case MachO::LC_SEGMENT: + case MachO::LC_SEGMENT_64: + uint64_t segOff = is64Bit ? LC.Data.segment_command_64_data.fileoff + : LC.Data.segment_command_data.fileoff; + if (0 == + strncmp(&LC.Data.segment_command_data.segname[0], "__LINKEDIT", 16)) { + FoundLinkEditSeg = true; + writeLinkEditData(OS); + } + for (auto &Sec : LC.Sections) { + ZeroToOffset(OS, Sec.offset); + // Zero Fill any data between the end of the last thing we wrote and the + // start of this section. + assert((OS.tell() - fileStart <= Sec.offset || + Sec.offset == (uint32_t)0) && + "Wrote too much data somewhere, section offsets don't line up."); + if (0 == strncmp(&Sec.segname[0], "__DWARF", 16)) { + if (0 == strncmp(&Sec.sectname[0], "__debug_str", 16)) { + DWARFYAML::EmitDebugStr(OS, Obj.DWARF); + } else if (0 == strncmp(&Sec.sectname[0], "__debug_abbrev", 16)) { + DWARFYAML::EmitDebugAbbrev(OS, Obj.DWARF); + } else if (0 == strncmp(&Sec.sectname[0], "__debug_aranges", 16)) { + DWARFYAML::EmitDebugAranges(OS, Obj.DWARF); + } else if (0 == strncmp(&Sec.sectname[0], "__debug_pubnames", 16)) { + DWARFYAML::EmitPubSection(OS, Obj.DWARF.PubNames, + Obj.IsLittleEndian); + } else if (0 == strncmp(&Sec.sectname[0], "__debug_pubtypes", 16)) { + DWARFYAML::EmitPubSection(OS, Obj.DWARF.PubTypes, + Obj.IsLittleEndian); + } else if (0 == strncmp(&Sec.sectname[0], "__debug_info", 16)) { + DWARFYAML::EmitDebugInfo(OS, Obj.DWARF); + } else if (0 == strncmp(&Sec.sectname[0], "__debug_line", 16)) { + DWARFYAML::EmitDebugLine(OS, Obj.DWARF); + } + + continue; + } + + // Skip if it's a virtual section. + if (MachO::isVirtualSection(Sec.flags & MachO::SECTION_TYPE)) + continue; + + if (Sec.content) { + yaml::BinaryRef Content = *Sec.content; + Content.writeAsBinary(OS); + ZeroFillBytes(OS, Sec.size - Content.binary_size()); + } else { + // Fill section data with 0xDEADBEEF. + Fill(OS, Sec.size, 0xDEADBEEFu); + } + } + uint64_t segSize = is64Bit ? LC.Data.segment_command_64_data.filesize + : LC.Data.segment_command_data.filesize; + ZeroToOffset(OS, segOff + segSize); + break; + } + } + // Old PPC Object Files didn't have __LINKEDIT segments, the data was just + // stuck at the end of the file. + if (!FoundLinkEditSeg) + writeLinkEditData(OS); +} + +void MachOWriter::writeBindOpcodes( + raw_ostream &OS, std::vector &BindOpcodes) { + + for (auto Opcode : BindOpcodes) { + uint8_t OpByte = Opcode.Opcode | Opcode.Imm; + OS.write(reinterpret_cast(&OpByte), 1); + for (auto Data : Opcode.ULEBExtraData) { + encodeULEB128(Data, OS); + } + for (auto Data : Opcode.SLEBExtraData) { + encodeSLEB128(Data, OS); + } + if (!Opcode.Symbol.empty()) { + OS.write(Opcode.Symbol.data(), Opcode.Symbol.size()); + OS.write('\0'); + } + } +} + +void MachOWriter::dumpExportEntry(raw_ostream &OS, + MachOYAML::ExportEntry &Entry) { + encodeSLEB128(Entry.TerminalSize, OS); + if (Entry.TerminalSize > 0) { + encodeSLEB128(Entry.Flags, OS); + if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) { + encodeSLEB128(Entry.Other, OS); + OS << Entry.ImportName; + OS.write('\0'); + } else { + encodeSLEB128(Entry.Address, OS); + if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) + encodeSLEB128(Entry.Other, OS); + } + } + OS.write(static_cast(Entry.Children.size())); + for (auto EE : Entry.Children) { + OS << EE.Name; + OS.write('\0'); + encodeSLEB128(EE.NodeOffset, OS); + } + for (auto EE : Entry.Children) + dumpExportEntry(OS, EE); +} + +void MachOWriter::writeExportTrie(raw_ostream &OS) { + dumpExportEntry(OS, Obj.LinkEdit.ExportTrie); +} + +template +void writeNListEntry(MachOYAML::NListEntry &NLE, raw_ostream &OS, + bool IsLittleEndian) { + NListType ListEntry; + ListEntry.n_strx = NLE.n_strx; + ListEntry.n_type = NLE.n_type; + ListEntry.n_sect = NLE.n_sect; + ListEntry.n_desc = NLE.n_desc; + ListEntry.n_value = NLE.n_value; + + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(ListEntry); + OS.write(reinterpret_cast(&ListEntry), sizeof(NListType)); +} + +void MachOWriter::writeLinkEditData(raw_ostream &OS) { + typedef void (MachOWriter::*writeHandler)(raw_ostream &); + typedef std::pair writeOperation; + std::vector WriteQueue; + + MachO::dyld_info_command *DyldInfoOnlyCmd = 0; + MachO::symtab_command *SymtabCmd = 0; + for (auto &LC : Obj.LoadCommands) { + switch (LC.Data.load_command_data.cmd) { + case MachO::LC_SYMTAB: + SymtabCmd = &LC.Data.symtab_command_data; + WriteQueue.push_back( + std::make_pair(SymtabCmd->symoff, &MachOWriter::writeNameList)); + WriteQueue.push_back( + std::make_pair(SymtabCmd->stroff, &MachOWriter::writeStringTable)); + break; + case MachO::LC_DYLD_INFO_ONLY: + DyldInfoOnlyCmd = &LC.Data.dyld_info_command_data; + WriteQueue.push_back(std::make_pair(DyldInfoOnlyCmd->rebase_off, + &MachOWriter::writeRebaseOpcodes)); + WriteQueue.push_back(std::make_pair(DyldInfoOnlyCmd->bind_off, + &MachOWriter::writeBasicBindOpcodes)); + WriteQueue.push_back(std::make_pair(DyldInfoOnlyCmd->weak_bind_off, + &MachOWriter::writeWeakBindOpcodes)); + WriteQueue.push_back(std::make_pair(DyldInfoOnlyCmd->lazy_bind_off, + &MachOWriter::writeLazyBindOpcodes)); + WriteQueue.push_back(std::make_pair(DyldInfoOnlyCmd->export_off, + &MachOWriter::writeExportTrie)); + break; + } + } + + llvm::sort(WriteQueue, [](const writeOperation &a, const writeOperation &b) { + return a.first < b.first; + }); + + for (auto writeOp : WriteQueue) { + ZeroToOffset(OS, writeOp.first); + (this->*writeOp.second)(OS); + } +} + +void MachOWriter::writeRebaseOpcodes(raw_ostream &OS) { + MachOYAML::LinkEditData &LinkEdit = Obj.LinkEdit; + + for (auto Opcode : LinkEdit.RebaseOpcodes) { + uint8_t OpByte = Opcode.Opcode | Opcode.Imm; + OS.write(reinterpret_cast(&OpByte), 1); + for (auto Data : Opcode.ExtraData) + encodeULEB128(Data, OS); + } +} + +void MachOWriter::writeBasicBindOpcodes(raw_ostream &OS) { + writeBindOpcodes(OS, Obj.LinkEdit.BindOpcodes); +} + +void MachOWriter::writeWeakBindOpcodes(raw_ostream &OS) { + writeBindOpcodes(OS, Obj.LinkEdit.WeakBindOpcodes); +} + +void MachOWriter::writeLazyBindOpcodes(raw_ostream &OS) { + writeBindOpcodes(OS, Obj.LinkEdit.LazyBindOpcodes); +} + +void MachOWriter::writeNameList(raw_ostream &OS) { + for (auto NLE : Obj.LinkEdit.NameList) { + if (is64Bit) + writeNListEntry(NLE, OS, Obj.IsLittleEndian); + else + writeNListEntry(NLE, OS, Obj.IsLittleEndian); + } +} + +void MachOWriter::writeStringTable(raw_ostream &OS) { + for (auto Str : Obj.LinkEdit.StringTable) { + OS.write(Str.data(), Str.size()); + OS.write('\0'); + } +} + +class UniversalWriter { +public: + UniversalWriter(yaml::YamlObjectFile &ObjectFile) + : ObjectFile(ObjectFile), fileStart(0) {} + + void writeMachO(raw_ostream &OS); + +private: + void writeFatHeader(raw_ostream &OS); + void writeFatArchs(raw_ostream &OS); + + void ZeroToOffset(raw_ostream &OS, size_t offset); + + yaml::YamlObjectFile &ObjectFile; + uint64_t fileStart; +}; + +void UniversalWriter::writeMachO(raw_ostream &OS) { + fileStart = OS.tell(); + if (ObjectFile.MachO) { + MachOWriter Writer(*ObjectFile.MachO); + Writer.writeMachO(OS); + return; + } + + writeFatHeader(OS); + writeFatArchs(OS); + + auto &FatFile = *ObjectFile.FatMachO; + assert(FatFile.FatArchs.size() == FatFile.Slices.size()); + for (size_t i = 0; i < FatFile.Slices.size(); i++) { + ZeroToOffset(OS, FatFile.FatArchs[i].offset); + MachOWriter Writer(FatFile.Slices[i]); + Writer.writeMachO(OS); + + auto SliceEnd = FatFile.FatArchs[i].offset + FatFile.FatArchs[i].size; + ZeroToOffset(OS, SliceEnd); + } +} + +void UniversalWriter::writeFatHeader(raw_ostream &OS) { + auto &FatFile = *ObjectFile.FatMachO; + MachO::fat_header header; + header.magic = FatFile.Header.magic; + header.nfat_arch = FatFile.Header.nfat_arch; + if (sys::IsLittleEndianHost) + swapStruct(header); + OS.write(reinterpret_cast(&header), sizeof(MachO::fat_header)); +} + +template +FatArchType constructFatArch(MachOYAML::FatArch &Arch) { + FatArchType FatArch; + FatArch.cputype = Arch.cputype; + FatArch.cpusubtype = Arch.cpusubtype; + FatArch.offset = Arch.offset; + FatArch.size = Arch.size; + FatArch.align = Arch.align; + return FatArch; +} + +template +void writeFatArch(MachOYAML::FatArch &LC, raw_ostream &OS) {} + +template <> +void writeFatArch(MachOYAML::FatArch &Arch, raw_ostream &OS) { + auto FatArch = constructFatArch(Arch); + if (sys::IsLittleEndianHost) + swapStruct(FatArch); + OS.write(reinterpret_cast(&FatArch), sizeof(MachO::fat_arch)); +} + +template <> +void writeFatArch(MachOYAML::FatArch &Arch, + raw_ostream &OS) { + auto FatArch = constructFatArch(Arch); + FatArch.reserved = Arch.reserved; + if (sys::IsLittleEndianHost) + swapStruct(FatArch); + OS.write(reinterpret_cast(&FatArch), + sizeof(MachO::fat_arch_64)); +} + +void UniversalWriter::writeFatArchs(raw_ostream &OS) { + auto &FatFile = *ObjectFile.FatMachO; + bool is64Bit = FatFile.Header.magic == MachO::FAT_MAGIC_64; + for (auto Arch : FatFile.FatArchs) { + if (is64Bit) + writeFatArch(Arch, OS); + else + writeFatArch(Arch, OS); + } +} + +void UniversalWriter::ZeroToOffset(raw_ostream &OS, size_t Offset) { + auto currOffset = OS.tell() - fileStart; + if (currOffset < Offset) + ZeroFillBytes(OS, Offset - currOffset); +} + +} // end anonymous namespace + +namespace llvm { +namespace yaml { + +bool yaml2macho(YamlObjectFile &Doc, raw_ostream &Out, ErrorHandler /*EH*/) { + UniversalWriter Writer(Doc); + Writer.writeMachO(Out); + return true; +} + +} // namespace yaml +} // namespace llvm diff --git a/lib/ObjectYAML/MachOYAML.cpp b/lib/ObjectYAML/MachOYAML.cpp index d12f12cf443..0f7cd1e1495 100644 --- a/lib/ObjectYAML/MachOYAML.cpp +++ b/lib/ObjectYAML/MachOYAML.cpp @@ -287,6 +287,15 @@ void MappingTraits::mapping(IO &IO, IO.mapRequired("reserved1", Section.reserved1); IO.mapRequired("reserved2", Section.reserved2); IO.mapOptional("reserved3", Section.reserved3); + IO.mapOptional("content", Section.content); +} + +StringRef +MappingTraits::validate(IO &IO, + MachOYAML::Section &Section) { + if (Section.content && Section.size < Section.content->binary_size()) + return "Section size must be greater than or equal to the content size"; + return {}; } void MappingTraits::mapping( diff --git a/lib/ObjectYAML/MinidumpEmitter.cpp b/lib/ObjectYAML/MinidumpEmitter.cpp new file mode 100644 index 00000000000..bbfd2cd8cba --- /dev/null +++ b/lib/ObjectYAML/MinidumpEmitter.cpp @@ -0,0 +1,247 @@ +//===- yaml2minidump.cpp - Convert a YAML file to a minidump file ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjectYAML/MinidumpYAML.h" +#include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::minidump; +using namespace llvm::MinidumpYAML; + +namespace { +/// A helper class to manage the placement of various structures into the final +/// minidump binary. Space for objects can be allocated via various allocate*** +/// methods, while the final minidump file is written by calling the writeTo +/// method. The plain versions of allocation functions take a reference to the +/// data which is to be written (and hence the data must be available until +/// writeTo is called), while the "New" versions allocate the data in an +/// allocator-managed buffer, which is available until the allocator object is +/// destroyed. For both kinds of functions, it is possible to modify the +/// data for which the space has been "allocated" until the final writeTo call. +/// This is useful for "linking" the allocated structures via their offsets. +class BlobAllocator { +public: + size_t tell() const { return NextOffset; } + + size_t allocateCallback(size_t Size, + std::function Callback) { + size_t Offset = NextOffset; + NextOffset += Size; + Callbacks.push_back(std::move(Callback)); + return Offset; + } + + size_t allocateBytes(ArrayRef Data) { + return allocateCallback( + Data.size(), [Data](raw_ostream &OS) { OS << toStringRef(Data); }); + } + + size_t allocateBytes(yaml::BinaryRef Data) { + return allocateCallback(Data.binary_size(), [Data](raw_ostream &OS) { + Data.writeAsBinary(OS); + }); + } + + template size_t allocateArray(ArrayRef Data) { + return allocateBytes({reinterpret_cast(Data.data()), + sizeof(T) * Data.size()}); + } + + template + std::pair> + allocateNewArray(const iterator_range &Range); + + template size_t allocateObject(const T &Data) { + return allocateArray(makeArrayRef(Data)); + } + + template + std::pair allocateNewObject(Types &&... Args) { + T *Object = new (Temporaries.Allocate()) T(std::forward(Args)...); + return {allocateObject(*Object), Object}; + } + + size_t allocateString(StringRef Str); + + void writeTo(raw_ostream &OS) const; + +private: + size_t NextOffset = 0; + + BumpPtrAllocator Temporaries; + std::vector> Callbacks; +}; +} // namespace + +template +std::pair> +BlobAllocator::allocateNewArray(const iterator_range &Range) { + size_t Num = std::distance(Range.begin(), Range.end()); + MutableArrayRef Array(Temporaries.Allocate(Num), Num); + std::uninitialized_copy(Range.begin(), Range.end(), Array.begin()); + return {allocateArray(Array), Array}; +} + +size_t BlobAllocator::allocateString(StringRef Str) { + SmallVector WStr; + bool OK = convertUTF8ToUTF16String(Str, WStr); + assert(OK && "Invalid UTF8 in Str?"); + (void)OK; + + // The utf16 string is null-terminated, but the terminator is not counted in + // the string size. + WStr.push_back(0); + size_t Result = + allocateNewObject(2 * (WStr.size() - 1)).first; + allocateNewArray(make_range(WStr.begin(), WStr.end())); + return Result; +} + +void BlobAllocator::writeTo(raw_ostream &OS) const { + size_t BeginOffset = OS.tell(); + for (const auto &Callback : Callbacks) + Callback(OS); + assert(OS.tell() == BeginOffset + NextOffset && + "Callbacks wrote an unexpected number of bytes."); + (void)BeginOffset; +} + +static LocationDescriptor layout(BlobAllocator &File, yaml::BinaryRef Data) { + return {support::ulittle32_t(Data.binary_size()), + support::ulittle32_t(File.allocateBytes(Data))}; +} + +static size_t layout(BlobAllocator &File, MinidumpYAML::ExceptionStream &S) { + File.allocateObject(S.MDExceptionStream); + + size_t DataEnd = File.tell(); + + // Lay out the thread context data, (which is not a part of the stream). + // TODO: This usually (always?) matches the thread context of the + // corresponding thread, and may overlap memory regions as well. We could + // add a level of indirection to the MinidumpYAML format (like an array of + // Blobs that the LocationDescriptors index into) to be able to distinguish + // the cases where location descriptions overlap vs happen to reference + // identical data. + S.MDExceptionStream.ThreadContext = layout(File, S.ThreadContext); + + return DataEnd; +} + +static void layout(BlobAllocator &File, MemoryListStream::entry_type &Range) { + Range.Entry.Memory = layout(File, Range.Content); +} + +static void layout(BlobAllocator &File, ModuleListStream::entry_type &M) { + M.Entry.ModuleNameRVA = File.allocateString(M.Name); + + M.Entry.CvRecord = layout(File, M.CvRecord); + M.Entry.MiscRecord = layout(File, M.MiscRecord); +} + +static void layout(BlobAllocator &File, ThreadListStream::entry_type &T) { + T.Entry.Stack.Memory = layout(File, T.Stack); + T.Entry.Context = layout(File, T.Context); +} + +template +static size_t layout(BlobAllocator &File, + MinidumpYAML::detail::ListStream &S) { + + File.allocateNewObject(S.Entries.size()); + for (auto &E : S.Entries) + File.allocateObject(E.Entry); + + size_t DataEnd = File.tell(); + + // Lay out the auxiliary data, (which is not a part of the stream). + DataEnd = File.tell(); + for (auto &E : S.Entries) + layout(File, E); + + return DataEnd; +} + +static Directory layout(BlobAllocator &File, Stream &S) { + Directory Result; + Result.Type = S.Type; + Result.Location.RVA = File.tell(); + Optional DataEnd; + switch (S.Kind) { + case Stream::StreamKind::Exception: + DataEnd = layout(File, cast(S)); + break; + case Stream::StreamKind::MemoryInfoList: { + MemoryInfoListStream &InfoList = cast(S); + File.allocateNewObject( + sizeof(minidump::MemoryInfoListHeader), sizeof(minidump::MemoryInfo), + InfoList.Infos.size()); + File.allocateArray(makeArrayRef(InfoList.Infos)); + break; + } + case Stream::StreamKind::MemoryList: + DataEnd = layout(File, cast(S)); + break; + case Stream::StreamKind::ModuleList: + DataEnd = layout(File, cast(S)); + break; + case Stream::StreamKind::RawContent: { + RawContentStream &Raw = cast(S); + File.allocateCallback(Raw.Size, [&Raw](raw_ostream &OS) { + Raw.Content.writeAsBinary(OS); + assert(Raw.Content.binary_size() <= Raw.Size); + OS << std::string(Raw.Size - Raw.Content.binary_size(), '\0'); + }); + break; + } + case Stream::StreamKind::SystemInfo: { + SystemInfoStream &SystemInfo = cast(S); + File.allocateObject(SystemInfo.Info); + // The CSD string is not a part of the stream. + DataEnd = File.tell(); + SystemInfo.Info.CSDVersionRVA = File.allocateString(SystemInfo.CSDVersion); + break; + } + case Stream::StreamKind::TextContent: + File.allocateArray(arrayRefFromStringRef(cast(S).Text)); + break; + case Stream::StreamKind::ThreadList: + DataEnd = layout(File, cast(S)); + break; + } + // If DataEnd is not set, we assume everything we generated is a part of the + // stream. + Result.Location.DataSize = + DataEnd.getValueOr(File.tell()) - Result.Location.RVA; + return Result; +} + +namespace llvm { +namespace yaml { + +bool yaml2minidump(MinidumpYAML::Object &Obj, raw_ostream &Out, + ErrorHandler /*EH*/) { + BlobAllocator File; + File.allocateObject(Obj.Header); + + std::vector StreamDirectory(Obj.Streams.size()); + Obj.Header.StreamDirectoryRVA = + File.allocateArray(makeArrayRef(StreamDirectory)); + Obj.Header.NumberOfStreams = StreamDirectory.size(); + + for (auto &Stream : enumerate(Obj.Streams)) + StreamDirectory[Stream.index()] = layout(File, *Stream.value()); + + File.writeTo(Out); + return true; +} + +} // namespace yaml +} // namespace llvm diff --git a/lib/ObjectYAML/MinidumpYAML.cpp b/lib/ObjectYAML/MinidumpYAML.cpp index f5f2acd0cc4..21b2a4d7862 100644 --- a/lib/ObjectYAML/MinidumpYAML.cpp +++ b/lib/ObjectYAML/MinidumpYAML.cpp @@ -8,110 +8,11 @@ #include "llvm/ObjectYAML/MinidumpYAML.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/ConvertUTF.h" using namespace llvm; using namespace llvm::MinidumpYAML; using namespace llvm::minidump; -namespace { -/// A helper class to manage the placement of various structures into the final -/// minidump binary. Space for objects can be allocated via various allocate*** -/// methods, while the final minidump file is written by calling the writeTo -/// method. The plain versions of allocation functions take a reference to the -/// data which is to be written (and hence the data must be available until -/// writeTo is called), while the "New" versions allocate the data in an -/// allocator-managed buffer, which is available until the allocator object is -/// destroyed. For both kinds of functions, it is possible to modify the -/// data for which the space has been "allocated" until the final writeTo call. -/// This is useful for "linking" the allocated structures via their offsets. -class BlobAllocator { -public: - size_t tell() const { return NextOffset; } - - size_t allocateCallback(size_t Size, - std::function Callback) { - size_t Offset = NextOffset; - NextOffset += Size; - Callbacks.push_back(std::move(Callback)); - return Offset; - } - - size_t allocateBytes(ArrayRef Data) { - return allocateCallback( - Data.size(), [Data](raw_ostream &OS) { OS << toStringRef(Data); }); - } - - size_t allocateBytes(yaml::BinaryRef Data) { - return allocateCallback(Data.binary_size(), [Data](raw_ostream &OS) { - Data.writeAsBinary(OS); - }); - } - - template size_t allocateArray(ArrayRef Data) { - return allocateBytes({reinterpret_cast(Data.data()), - sizeof(T) * Data.size()}); - } - - template - std::pair> - allocateNewArray(const iterator_range &Range); - - template size_t allocateObject(const T &Data) { - return allocateArray(makeArrayRef(Data)); - } - - template - std::pair allocateNewObject(Types &&... Args) { - T *Object = new (Temporaries.Allocate()) T(std::forward(Args)...); - return {allocateObject(*Object), Object}; - } - - size_t allocateString(StringRef Str); - - void writeTo(raw_ostream &OS) const; - -private: - size_t NextOffset = 0; - - BumpPtrAllocator Temporaries; - std::vector> Callbacks; -}; -} // namespace - -template -std::pair> -BlobAllocator::allocateNewArray(const iterator_range &Range) { - size_t Num = std::distance(Range.begin(), Range.end()); - MutableArrayRef Array(Temporaries.Allocate(Num), Num); - std::uninitialized_copy(Range.begin(), Range.end(), Array.begin()); - return {allocateArray(Array), Array}; -} - -size_t BlobAllocator::allocateString(StringRef Str) { - SmallVector WStr; - bool OK = convertUTF8ToUTF16String(Str, WStr); - assert(OK && "Invalid UTF8 in Str?"); - (void)OK; - - // The utf16 string is null-terminated, but the terminator is not counted in - // the string size. - WStr.push_back(0); - size_t Result = - allocateNewObject(2 * (WStr.size() - 1)).first; - allocateNewArray(make_range(WStr.begin(), WStr.end())); - return Result; -} - -void BlobAllocator::writeTo(raw_ostream &OS) const { - size_t BeginOffset = OS.tell(); - for (const auto &Callback : Callbacks) - Callback(OS); - assert(OS.tell() == BeginOffset + NextOffset && - "Callbacks wrote an unexpected number of bytes."); - (void)BeginOffset; -} - /// Perform an optional yaml-mapping of an endian-aware type EndianType. The /// only purpose of this function is to avoid casting the Default value to the /// endian type; @@ -168,6 +69,10 @@ Stream::~Stream() = default; Stream::StreamKind Stream::getKind(StreamType Type) { switch (Type) { + case StreamType::Exception: + return StreamKind::Exception; + case StreamType::MemoryInfoList: + return StreamKind::MemoryInfoList; case StreamType::MemoryList: return StreamKind::MemoryList; case StreamType::ModuleList: @@ -192,22 +97,45 @@ Stream::StreamKind Stream::getKind(StreamType Type) { std::unique_ptr Stream::create(StreamType Type) { StreamKind Kind = getKind(Type); switch (Kind) { + case StreamKind::Exception: + return std::make_unique(); + case StreamKind::MemoryInfoList: + return std::make_unique(); case StreamKind::MemoryList: - return llvm::make_unique(); + return std::make_unique(); case StreamKind::ModuleList: - return llvm::make_unique(); + return std::make_unique(); case StreamKind::RawContent: - return llvm::make_unique(Type); + return std::make_unique(Type); case StreamKind::SystemInfo: - return llvm::make_unique(); + return std::make_unique(); case StreamKind::TextContent: - return llvm::make_unique(Type); + return std::make_unique(Type); case StreamKind::ThreadList: - return llvm::make_unique(); + return std::make_unique(); } llvm_unreachable("Unhandled stream kind!"); } +void yaml::ScalarBitSetTraits::bitset( + IO &IO, MemoryProtection &Protect) { +#define HANDLE_MDMP_PROTECT(CODE, NAME, NATIVENAME) \ + IO.bitSetCase(Protect, #NATIVENAME, MemoryProtection::NAME); +#include "llvm/BinaryFormat/MinidumpConstants.def" +} + +void yaml::ScalarBitSetTraits::bitset(IO &IO, MemoryState &State) { +#define HANDLE_MDMP_MEMSTATE(CODE, NAME, NATIVENAME) \ + IO.bitSetCase(State, #NATIVENAME, MemoryState::NAME); +#include "llvm/BinaryFormat/MinidumpConstants.def" +} + +void yaml::ScalarBitSetTraits::bitset(IO &IO, MemoryType &Type) { +#define HANDLE_MDMP_MEMTYPE(CODE, NAME, NATIVENAME) \ + IO.bitSetCase(Type, #NATIVENAME, MemoryType::NAME); +#include "llvm/BinaryFormat/MinidumpConstants.def" +} + void yaml::ScalarEnumerationTraits::enumeration( IO &IO, ProcessorArchitecture &Arch) { #define HANDLE_MDMP_ARCH(CODE, NAME) \ @@ -314,6 +242,20 @@ void yaml::MappingTraits::mapping(IO &IO, mapOptionalHex(IO, "AMD Extended Features", Info.AMDExtendedFeatures, 0); } +void yaml::MappingTraits::mapping(IO &IO, MemoryInfo &Info) { + mapRequiredHex(IO, "Base Address", Info.BaseAddress); + mapOptionalHex(IO, "Allocation Base", Info.AllocationBase, Info.BaseAddress); + mapRequiredAs(IO, "Allocation Protect", + Info.AllocationProtect); + mapOptionalHex(IO, "Reserved0", Info.Reserved0, 0); + mapRequiredHex(IO, "Region Size", Info.RegionSize); + mapRequiredAs(IO, "State", Info.State); + mapOptionalAs(IO, "Protect", Info.Protect, + Info.AllocationProtect); + mapRequiredAs(IO, "Type", Info.Type); + mapOptionalHex(IO, "Reserved1", Info.Reserved1, 0); +} + void yaml::MappingTraits::mapping(IO &IO, VSFixedFileInfo &Info) { mapOptionalHex(IO, "Signature", Info.Signature, 0); @@ -336,8 +278,7 @@ void yaml::MappingTraits::mapping( mapRequiredHex(IO, "Base of Image", M.Entry.BaseOfImage); mapRequiredHex(IO, "Size of Image", M.Entry.SizeOfImage); mapOptionalHex(IO, "Checksum", M.Entry.Checksum, 0); - IO.mapOptional("Time Date Stamp", M.Entry.TimeDateStamp, - support::ulittle32_t(0)); + mapOptional(IO, "Time Date Stamp", M.Entry.TimeDateStamp, 0); IO.mapRequired("Module Name", M.Name); IO.mapOptional("Version Info", M.Entry.VersionInfo, VSFixedFileInfo()); IO.mapRequired("CodeView Record", M.CvRecord); @@ -363,6 +304,10 @@ void yaml::MappingTraits::mapping( IO, Range.Entry, Range.Content); } +static void streamMapping(yaml::IO &IO, MemoryInfoListStream &Stream) { + IO.mapRequired("Memory Ranges", Stream.Infos); +} + static void streamMapping(yaml::IO &IO, MemoryListStream &Stream) { IO.mapRequired("Memory Ranges", Stream.Entries); } @@ -425,6 +370,32 @@ static void streamMapping(yaml::IO &IO, ThreadListStream &Stream) { IO.mapRequired("Threads", Stream.Entries); } +static void streamMapping(yaml::IO &IO, MinidumpYAML::ExceptionStream &Stream) { + mapRequiredHex(IO, "Thread ID", Stream.MDExceptionStream.ThreadId); + IO.mapRequired("Exception Record", Stream.MDExceptionStream.ExceptionRecord); + IO.mapRequired("Thread Context", Stream.ThreadContext); +} + +void yaml::MappingTraits::mapping( + yaml::IO &IO, minidump::Exception &Exception) { + mapRequiredHex(IO, "Exception Code", Exception.ExceptionCode); + mapOptionalHex(IO, "Exception Flags", Exception.ExceptionFlags, 0); + mapOptionalHex(IO, "Exception Record", Exception.ExceptionRecord, 0); + mapOptionalHex(IO, "Exception Address", Exception.ExceptionAddress, 0); + mapOptional(IO, "Number of Parameters", Exception.NumberParameters, 0); + + for (size_t Index = 0; Index < Exception.MaxParameters; ++Index) { + SmallString<16> Name("Parameter "); + Twine(Index).toVector(Name); + support::ulittle64_t &Field = Exception.ExceptionInformation[Index]; + + if (Index < Exception.NumberParameters) + mapRequiredHex(IO, Name.c_str(), Field); + else + mapOptionalHex(IO, Name.c_str(), Field, 0); + } +} + void yaml::MappingTraits>::mapping( yaml::IO &IO, std::unique_ptr &S) { StreamType Type; @@ -435,6 +406,12 @@ void yaml::MappingTraits>::mapping( if (!IO.outputting()) S = MinidumpYAML::Stream::create(Type); switch (S->Kind) { + case MinidumpYAML::Stream::StreamKind::Exception: + streamMapping(IO, llvm::cast(*S)); + break; + case MinidumpYAML::Stream::StreamKind::MemoryInfoList: + streamMapping(IO, llvm::cast(*S)); + break; case MinidumpYAML::Stream::StreamKind::MemoryList: streamMapping(IO, llvm::cast(*S)); break; @@ -461,6 +438,8 @@ StringRef yaml::MappingTraits>::validate( switch (S->Kind) { case MinidumpYAML::Stream::StreamKind::RawContent: return streamValidate(cast(*S)); + case MinidumpYAML::Stream::StreamKind::Exception: + case MinidumpYAML::Stream::StreamKind::MemoryInfoList: case MinidumpYAML::Stream::StreamKind::MemoryList: case MinidumpYAML::Stream::StreamKind::ModuleList: case MinidumpYAML::Stream::StreamKind::SystemInfo: @@ -479,118 +458,28 @@ void yaml::MappingTraits::mapping(IO &IO, Object &O) { IO.mapRequired("Streams", O.Streams); } -static LocationDescriptor layout(BlobAllocator &File, yaml::BinaryRef Data) { - return {support::ulittle32_t(Data.binary_size()), - support::ulittle32_t(File.allocateBytes(Data))}; -} - -static void layout(BlobAllocator &File, MemoryListStream::entry_type &Range) { - Range.Entry.Memory = layout(File, Range.Content); -} - -static void layout(BlobAllocator &File, ModuleListStream::entry_type &M) { - M.Entry.ModuleNameRVA = File.allocateString(M.Name); - - M.Entry.CvRecord = layout(File, M.CvRecord); - M.Entry.MiscRecord = layout(File, M.MiscRecord); -} - -static void layout(BlobAllocator &File, ThreadListStream::entry_type &T) { - T.Entry.Stack.Memory = layout(File, T.Stack); - T.Entry.Context = layout(File, T.Context); -} - -template -static size_t layout(BlobAllocator &File, - MinidumpYAML::detail::ListStream &S) { - - File.allocateNewObject(S.Entries.size()); - for (auto &E : S.Entries) - File.allocateObject(E.Entry); - - size_t DataEnd = File.tell(); - - // Lay out the auxiliary data, (which is not a part of the stream). - DataEnd = File.tell(); - for (auto &E : S.Entries) - layout(File, E); - - return DataEnd; -} - -static Directory layout(BlobAllocator &File, Stream &S) { - Directory Result; - Result.Type = S.Type; - Result.Location.RVA = File.tell(); - Optional DataEnd; - switch (S.Kind) { - case Stream::StreamKind::MemoryList: - DataEnd = layout(File, cast(S)); - break; - case Stream::StreamKind::ModuleList: - DataEnd = layout(File, cast(S)); - break; - case Stream::StreamKind::RawContent: { - RawContentStream &Raw = cast(S); - File.allocateCallback(Raw.Size, [&Raw](raw_ostream &OS) { - Raw.Content.writeAsBinary(OS); - assert(Raw.Content.binary_size() <= Raw.Size); - OS << std::string(Raw.Size - Raw.Content.binary_size(), '\0'); - }); - break; - } - case Stream::StreamKind::SystemInfo: { - SystemInfoStream &SystemInfo = cast(S); - File.allocateObject(SystemInfo.Info); - // The CSD string is not a part of the stream. - DataEnd = File.tell(); - SystemInfo.Info.CSDVersionRVA = File.allocateString(SystemInfo.CSDVersion); - break; - } - case Stream::StreamKind::TextContent: - File.allocateArray(arrayRefFromStringRef(cast(S).Text)); - break; - case Stream::StreamKind::ThreadList: - DataEnd = layout(File, cast(S)); - break; - } - // If DataEnd is not set, we assume everything we generated is a part of the - // stream. - Result.Location.DataSize = - DataEnd.getValueOr(File.tell()) - Result.Location.RVA; - return Result; -} - -void MinidumpYAML::writeAsBinary(Object &Obj, raw_ostream &OS) { - BlobAllocator File; - File.allocateObject(Obj.Header); - - std::vector StreamDirectory(Obj.Streams.size()); - Obj.Header.StreamDirectoryRVA = - File.allocateArray(makeArrayRef(StreamDirectory)); - Obj.Header.NumberOfStreams = StreamDirectory.size(); - - for (auto &Stream : enumerate(Obj.Streams)) - StreamDirectory[Stream.index()] = layout(File, *Stream.value()); - - File.writeTo(OS); -} - -Error MinidumpYAML::writeAsBinary(StringRef Yaml, raw_ostream &OS) { - yaml::Input Input(Yaml); - Object Obj; - Input >> Obj; - if (std::error_code EC = Input.error()) - return errorCodeToError(EC); - - writeAsBinary(Obj, OS); - return Error::success(); -} - Expected> Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) { StreamKind Kind = getKind(StreamDesc.Type); switch (Kind) { + case StreamKind::Exception: { + Expected ExpectedExceptionStream = + File.getExceptionStream(); + if (!ExpectedExceptionStream) + return ExpectedExceptionStream.takeError(); + Expected> ExpectedThreadContext = + File.getRawData(ExpectedExceptionStream->ThreadContext); + if (!ExpectedThreadContext) + return ExpectedThreadContext.takeError(); + return std::make_unique(*ExpectedExceptionStream, + *ExpectedThreadContext); + } + case StreamKind::MemoryInfoList: { + if (auto ExpectedList = File.getMemoryInfoList()) + return std::make_unique(*ExpectedList); + else + return ExpectedList.takeError(); + } case StreamKind::MemoryList: { auto ExpectedList = File.getMemoryList(); if (!ExpectedList) @@ -602,7 +491,7 @@ Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) { return ExpectedContent.takeError(); Ranges.push_back({MD, *ExpectedContent}); } - return llvm::make_unique(std::move(Ranges)); + return std::make_unique(std::move(Ranges)); } case StreamKind::ModuleList: { auto ExpectedList = File.getModuleList(); @@ -622,10 +511,10 @@ Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) { Modules.push_back( {M, std::move(*ExpectedName), *ExpectedCv, *ExpectedMisc}); } - return llvm::make_unique(std::move(Modules)); + return std::make_unique(std::move(Modules)); } case StreamKind::RawContent: - return llvm::make_unique(StreamDesc.Type, + return std::make_unique(StreamDesc.Type, File.getRawStream(StreamDesc)); case StreamKind::SystemInfo: { auto ExpectedInfo = File.getSystemInfo(); @@ -634,11 +523,11 @@ Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) { auto ExpectedCSDVersion = File.getString(ExpectedInfo->CSDVersionRVA); if (!ExpectedCSDVersion) return ExpectedInfo.takeError(); - return llvm::make_unique(*ExpectedInfo, + return std::make_unique(*ExpectedInfo, std::move(*ExpectedCSDVersion)); } case StreamKind::TextContent: - return llvm::make_unique( + return std::make_unique( StreamDesc.Type, toStringRef(File.getRawStream(StreamDesc))); case StreamKind::ThreadList: { auto ExpectedList = File.getThreadList(); @@ -654,7 +543,7 @@ Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) { return ExpectedContext.takeError(); Threads.push_back({T, *ExpectedStack, *ExpectedContext}); } - return llvm::make_unique(std::move(Threads)); + return std::make_unique(std::move(Threads)); } } llvm_unreachable("Unhandled stream kind!"); diff --git a/lib/ObjectYAML/WasmEmitter.cpp b/lib/ObjectYAML/WasmEmitter.cpp new file mode 100644 index 00000000000..debc040587a --- /dev/null +++ b/lib/ObjectYAML/WasmEmitter.cpp @@ -0,0 +1,633 @@ +//===- yaml2wasm - Convert YAML to a Wasm object file --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// The Wasm component of yaml2obj. +/// +//===----------------------------------------------------------------------===// +// + +#include "llvm/Object/Wasm.h" +#include "llvm/ObjectYAML/ObjectYAML.h" +#include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/LEB128.h" + +using namespace llvm; + +namespace { +/// This parses a yaml stream that represents a Wasm object file. +/// See docs/yaml2obj for the yaml scheema. +class WasmWriter { +public: + WasmWriter(WasmYAML::Object &Obj, yaml::ErrorHandler EH) + : Obj(Obj), ErrHandler(EH) {} + bool writeWasm(raw_ostream &OS); + +private: + void writeRelocSection(raw_ostream &OS, WasmYAML::Section &Sec, + uint32_t SectionIndex); + + void writeInitExpr(raw_ostream &OS, const wasm::WasmInitExpr &InitExpr); + + void writeSectionContent(raw_ostream &OS, WasmYAML::CustomSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::TypeSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::ImportSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::FunctionSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::TableSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::MemorySection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::GlobalSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::EventSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::ExportSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::StartSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::ElemSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::CodeSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::DataSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::DataCountSection &Section); + + // Custom section types + void writeSectionContent(raw_ostream &OS, WasmYAML::DylinkSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::NameSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::LinkingSection &Section); + void writeSectionContent(raw_ostream &OS, WasmYAML::ProducersSection &Section); + void writeSectionContent(raw_ostream &OS, + WasmYAML::TargetFeaturesSection &Section); + WasmYAML::Object &Obj; + uint32_t NumImportedFunctions = 0; + uint32_t NumImportedGlobals = 0; + uint32_t NumImportedEvents = 0; + + bool HasError = false; + yaml::ErrorHandler ErrHandler; + void reportError(const Twine &Msg); +}; + +class SubSectionWriter { + raw_ostream &OS; + std::string OutString; + raw_string_ostream StringStream; + +public: + SubSectionWriter(raw_ostream &OS) : OS(OS), StringStream(OutString) {} + + void done() { + StringStream.flush(); + encodeULEB128(OutString.size(), OS); + OS << OutString; + OutString.clear(); + } + + raw_ostream &getStream() { return StringStream; } +}; + +} // end anonymous namespace + +static int writeUint64(raw_ostream &OS, uint64_t Value) { + char Data[sizeof(Value)]; + support::endian::write64le(Data, Value); + OS.write(Data, sizeof(Data)); + return 0; +} + +static int writeUint32(raw_ostream &OS, uint32_t Value) { + char Data[sizeof(Value)]; + support::endian::write32le(Data, Value); + OS.write(Data, sizeof(Data)); + return 0; +} + +static int writeUint8(raw_ostream &OS, uint8_t Value) { + char Data[sizeof(Value)]; + memcpy(Data, &Value, sizeof(Data)); + OS.write(Data, sizeof(Data)); + return 0; +} + +static int writeStringRef(const StringRef &Str, raw_ostream &OS) { + encodeULEB128(Str.size(), OS); + OS << Str; + return 0; +} + +static int writeLimits(const WasmYAML::Limits &Lim, raw_ostream &OS) { + writeUint8(OS, Lim.Flags); + encodeULEB128(Lim.Initial, OS); + if (Lim.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX) + encodeULEB128(Lim.Maximum, OS); + return 0; +} + +void WasmWriter::reportError(const Twine &Msg) { + ErrHandler(Msg); + HasError = true; +} + +void WasmWriter::writeInitExpr(raw_ostream &OS, + const wasm::WasmInitExpr &InitExpr) { + writeUint8(OS, InitExpr.Opcode); + switch (InitExpr.Opcode) { + case wasm::WASM_OPCODE_I32_CONST: + encodeSLEB128(InitExpr.Value.Int32, OS); + break; + case wasm::WASM_OPCODE_I64_CONST: + encodeSLEB128(InitExpr.Value.Int64, OS); + break; + case wasm::WASM_OPCODE_F32_CONST: + writeUint32(OS, InitExpr.Value.Float32); + break; + case wasm::WASM_OPCODE_F64_CONST: + writeUint64(OS, InitExpr.Value.Float64); + break; + case wasm::WASM_OPCODE_GLOBAL_GET: + encodeULEB128(InitExpr.Value.Global, OS); + break; + default: + reportError("unknown opcode in init_expr: " + Twine(InitExpr.Opcode)); + return; + } + writeUint8(OS, wasm::WASM_OPCODE_END); +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::DylinkSection &Section) { + writeStringRef(Section.Name, OS); + encodeULEB128(Section.MemorySize, OS); + encodeULEB128(Section.MemoryAlignment, OS); + encodeULEB128(Section.TableSize, OS); + encodeULEB128(Section.TableAlignment, OS); + encodeULEB128(Section.Needed.size(), OS); + for (StringRef Needed : Section.Needed) + writeStringRef(Needed, OS); +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::LinkingSection &Section) { + writeStringRef(Section.Name, OS); + encodeULEB128(Section.Version, OS); + + SubSectionWriter SubSection(OS); + + // SYMBOL_TABLE subsection + if (Section.SymbolTable.size()) { + writeUint8(OS, wasm::WASM_SYMBOL_TABLE); + + encodeULEB128(Section.SymbolTable.size(), SubSection.getStream()); +#ifndef NDEBUG + uint32_t SymbolIndex = 0; +#endif + for (const WasmYAML::SymbolInfo &Info : Section.SymbolTable) { + assert(Info.Index == SymbolIndex++); + writeUint8(SubSection.getStream(), Info.Kind); + encodeULEB128(Info.Flags, SubSection.getStream()); + switch (Info.Kind) { + case wasm::WASM_SYMBOL_TYPE_FUNCTION: + case wasm::WASM_SYMBOL_TYPE_GLOBAL: + case wasm::WASM_SYMBOL_TYPE_EVENT: + encodeULEB128(Info.ElementIndex, SubSection.getStream()); + if ((Info.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0 || + (Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0) + writeStringRef(Info.Name, SubSection.getStream()); + break; + case wasm::WASM_SYMBOL_TYPE_DATA: + writeStringRef(Info.Name, SubSection.getStream()); + if ((Info.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0) { + encodeULEB128(Info.DataRef.Segment, SubSection.getStream()); + encodeULEB128(Info.DataRef.Offset, SubSection.getStream()); + encodeULEB128(Info.DataRef.Size, SubSection.getStream()); + } + break; + case wasm::WASM_SYMBOL_TYPE_SECTION: + encodeULEB128(Info.ElementIndex, SubSection.getStream()); + break; + default: + llvm_unreachable("unexpected kind"); + } + } + + SubSection.done(); + } + + // SEGMENT_NAMES subsection + if (Section.SegmentInfos.size()) { + writeUint8(OS, wasm::WASM_SEGMENT_INFO); + encodeULEB128(Section.SegmentInfos.size(), SubSection.getStream()); + for (const WasmYAML::SegmentInfo &SegmentInfo : Section.SegmentInfos) { + writeStringRef(SegmentInfo.Name, SubSection.getStream()); + encodeULEB128(SegmentInfo.Alignment, SubSection.getStream()); + encodeULEB128(SegmentInfo.Flags, SubSection.getStream()); + } + SubSection.done(); + } + + // INIT_FUNCS subsection + if (Section.InitFunctions.size()) { + writeUint8(OS, wasm::WASM_INIT_FUNCS); + encodeULEB128(Section.InitFunctions.size(), SubSection.getStream()); + for (const WasmYAML::InitFunction &Func : Section.InitFunctions) { + encodeULEB128(Func.Priority, SubSection.getStream()); + encodeULEB128(Func.Symbol, SubSection.getStream()); + } + SubSection.done(); + } + + // COMDAT_INFO subsection + if (Section.Comdats.size()) { + writeUint8(OS, wasm::WASM_COMDAT_INFO); + encodeULEB128(Section.Comdats.size(), SubSection.getStream()); + for (const auto &C : Section.Comdats) { + writeStringRef(C.Name, SubSection.getStream()); + encodeULEB128(0, SubSection.getStream()); // flags for future use + encodeULEB128(C.Entries.size(), SubSection.getStream()); + for (const WasmYAML::ComdatEntry &Entry : C.Entries) { + writeUint8(SubSection.getStream(), Entry.Kind); + encodeULEB128(Entry.Index, SubSection.getStream()); + } + } + SubSection.done(); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::NameSection &Section) { + writeStringRef(Section.Name, OS); + if (Section.FunctionNames.size()) { + writeUint8(OS, wasm::WASM_NAMES_FUNCTION); + + SubSectionWriter SubSection(OS); + + encodeULEB128(Section.FunctionNames.size(), SubSection.getStream()); + for (const WasmYAML::NameEntry &NameEntry : Section.FunctionNames) { + encodeULEB128(NameEntry.Index, SubSection.getStream()); + writeStringRef(NameEntry.Name, SubSection.getStream()); + } + + SubSection.done(); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::ProducersSection &Section) { + writeStringRef(Section.Name, OS); + int Fields = int(!Section.Languages.empty()) + int(!Section.Tools.empty()) + + int(!Section.SDKs.empty()); + if (Fields == 0) + return; + encodeULEB128(Fields, OS); + for (auto &Field : {std::make_pair(StringRef("language"), &Section.Languages), + std::make_pair(StringRef("processed-by"), &Section.Tools), + std::make_pair(StringRef("sdk"), &Section.SDKs)}) { + if (Field.second->empty()) + continue; + writeStringRef(Field.first, OS); + encodeULEB128(Field.second->size(), OS); + for (auto &Entry : *Field.second) { + writeStringRef(Entry.Name, OS); + writeStringRef(Entry.Version, OS); + } + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::TargetFeaturesSection &Section) { + writeStringRef(Section.Name, OS); + encodeULEB128(Section.Features.size(), OS); + for (auto &E : Section.Features) { + writeUint8(OS, E.Prefix); + writeStringRef(E.Name, OS); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::CustomSection &Section) { + if (auto S = dyn_cast(&Section)) { + writeSectionContent(OS, *S); + } else if (auto S = dyn_cast(&Section)) { + writeSectionContent(OS, *S); + } else if (auto S = dyn_cast(&Section)) { + writeSectionContent(OS, *S); + } else if (auto S = dyn_cast(&Section)) { + writeSectionContent(OS, *S); + } else if (auto S = dyn_cast(&Section)) { + writeSectionContent(OS, *S); + } else { + writeStringRef(Section.Name, OS); + Section.Payload.writeAsBinary(OS); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::TypeSection &Section) { + encodeULEB128(Section.Signatures.size(), OS); + uint32_t ExpectedIndex = 0; + for (const WasmYAML::Signature &Sig : Section.Signatures) { + if (Sig.Index != ExpectedIndex) { + reportError("unexpected type index: " + Twine(Sig.Index)); + return; + } + ++ExpectedIndex; + writeUint8(OS, Sig.Form); + encodeULEB128(Sig.ParamTypes.size(), OS); + for (auto ParamType : Sig.ParamTypes) + writeUint8(OS, ParamType); + encodeULEB128(Sig.ReturnTypes.size(), OS); + for (auto ReturnType : Sig.ReturnTypes) + writeUint8(OS, ReturnType); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::ImportSection &Section) { + encodeULEB128(Section.Imports.size(), OS); + for (const WasmYAML::Import &Import : Section.Imports) { + writeStringRef(Import.Module, OS); + writeStringRef(Import.Field, OS); + writeUint8(OS, Import.Kind); + switch (Import.Kind) { + case wasm::WASM_EXTERNAL_FUNCTION: + encodeULEB128(Import.SigIndex, OS); + NumImportedFunctions++; + break; + case wasm::WASM_EXTERNAL_GLOBAL: + writeUint8(OS, Import.GlobalImport.Type); + writeUint8(OS, Import.GlobalImport.Mutable); + NumImportedGlobals++; + break; + case wasm::WASM_EXTERNAL_EVENT: + writeUint32(OS, Import.EventImport.Attribute); + writeUint32(OS, Import.EventImport.SigIndex); + NumImportedGlobals++; + break; + case wasm::WASM_EXTERNAL_MEMORY: + writeLimits(Import.Memory, OS); + break; + case wasm::WASM_EXTERNAL_TABLE: + writeUint8(OS, Import.TableImport.ElemType); + writeLimits(Import.TableImport.TableLimits, OS); + break; + default: + reportError("unknown import type: " +Twine(Import.Kind)); + return; + } + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::FunctionSection &Section) { + encodeULEB128(Section.FunctionTypes.size(), OS); + for (uint32_t FuncType : Section.FunctionTypes) + encodeULEB128(FuncType, OS); +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::ExportSection &Section) { + encodeULEB128(Section.Exports.size(), OS); + for (const WasmYAML::Export &Export : Section.Exports) { + writeStringRef(Export.Name, OS); + writeUint8(OS, Export.Kind); + encodeULEB128(Export.Index, OS); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::StartSection &Section) { + encodeULEB128(Section.StartFunction, OS); +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::TableSection &Section) { + encodeULEB128(Section.Tables.size(), OS); + for (auto &Table : Section.Tables) { + writeUint8(OS, Table.ElemType); + writeLimits(Table.TableLimits, OS); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::MemorySection &Section) { + encodeULEB128(Section.Memories.size(), OS); + for (const WasmYAML::Limits &Mem : Section.Memories) + writeLimits(Mem, OS); +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::GlobalSection &Section) { + encodeULEB128(Section.Globals.size(), OS); + uint32_t ExpectedIndex = NumImportedGlobals; + for (auto &Global : Section.Globals) { + if (Global.Index != ExpectedIndex) { + reportError("unexpected global index: " + Twine(Global.Index)); + return; + } + ++ExpectedIndex; + writeUint8(OS, Global.Type); + writeUint8(OS, Global.Mutable); + writeInitExpr(OS, Global.InitExpr); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::EventSection &Section) { + encodeULEB128(Section.Events.size(), OS); + uint32_t ExpectedIndex = NumImportedEvents; + for (auto &Event : Section.Events) { + if (Event.Index != ExpectedIndex) { + reportError("unexpected event index: " + Twine(Event.Index)); + return; + } + ++ExpectedIndex; + encodeULEB128(Event.Attribute, OS); + encodeULEB128(Event.SigIndex, OS); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::ElemSection &Section) { + encodeULEB128(Section.Segments.size(), OS); + for (auto &Segment : Section.Segments) { + encodeULEB128(Segment.TableIndex, OS); + writeInitExpr(OS, Segment.Offset); + + encodeULEB128(Segment.Functions.size(), OS); + for (auto &Function : Segment.Functions) + encodeULEB128(Function, OS); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::CodeSection &Section) { + encodeULEB128(Section.Functions.size(), OS); + uint32_t ExpectedIndex = NumImportedFunctions; + for (auto &Func : Section.Functions) { + std::string OutString; + raw_string_ostream StringStream(OutString); + if (Func.Index != ExpectedIndex) { + reportError("unexpected function index: " + Twine(Func.Index)); + return; + } + ++ExpectedIndex; + + encodeULEB128(Func.Locals.size(), StringStream); + for (auto &LocalDecl : Func.Locals) { + encodeULEB128(LocalDecl.Count, StringStream); + writeUint8(StringStream, LocalDecl.Type); + } + + Func.Body.writeAsBinary(StringStream); + + // Write the section size followed by the content + StringStream.flush(); + encodeULEB128(OutString.size(), OS); + OS << OutString; + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::DataSection &Section) { + encodeULEB128(Section.Segments.size(), OS); + for (auto &Segment : Section.Segments) { + encodeULEB128(Segment.InitFlags, OS); + if (Segment.InitFlags & wasm::WASM_SEGMENT_HAS_MEMINDEX) + encodeULEB128(Segment.MemoryIndex, OS); + if ((Segment.InitFlags & wasm::WASM_SEGMENT_IS_PASSIVE) == 0) + writeInitExpr(OS, Segment.Offset); + encodeULEB128(Segment.Content.binary_size(), OS); + Segment.Content.writeAsBinary(OS); + } +} + +void WasmWriter::writeSectionContent(raw_ostream &OS, + WasmYAML::DataCountSection &Section) { + encodeULEB128(Section.Count, OS); +} + +void WasmWriter::writeRelocSection(raw_ostream &OS, WasmYAML::Section &Sec, + uint32_t SectionIndex) { + switch (Sec.Type) { + case wasm::WASM_SEC_CODE: + writeStringRef("reloc.CODE", OS); + break; + case wasm::WASM_SEC_DATA: + writeStringRef("reloc.DATA", OS); + break; + case wasm::WASM_SEC_CUSTOM: { + auto *CustomSection = cast(&Sec); + writeStringRef(("reloc." + CustomSection->Name).str(), OS); + break; + } + default: + llvm_unreachable("not yet implemented"); + } + + encodeULEB128(SectionIndex, OS); + encodeULEB128(Sec.Relocations.size(), OS); + + for (auto Reloc : Sec.Relocations) { + writeUint8(OS, Reloc.Type); + encodeULEB128(Reloc.Offset, OS); + encodeULEB128(Reloc.Index, OS); + switch (Reloc.Type) { + case wasm::R_WASM_MEMORY_ADDR_LEB: + case wasm::R_WASM_MEMORY_ADDR_SLEB: + case wasm::R_WASM_MEMORY_ADDR_I32: + case wasm::R_WASM_FUNCTION_OFFSET_I32: + case wasm::R_WASM_SECTION_OFFSET_I32: + encodeULEB128(Reloc.Addend, OS); + } + } +} + +bool WasmWriter::writeWasm(raw_ostream &OS) { + // Write headers + OS.write(wasm::WasmMagic, sizeof(wasm::WasmMagic)); + writeUint32(OS, Obj.Header.Version); + + // Write each section + llvm::object::WasmSectionOrderChecker Checker; + for (const std::unique_ptr &Sec : Obj.Sections) { + StringRef SecName = ""; + if (auto S = dyn_cast(Sec.get())) + SecName = S->Name; + if (!Checker.isValidSectionOrder(Sec->Type, SecName)) { + reportError("out of order section type: " + Twine(Sec->Type)); + return false; + } + encodeULEB128(Sec->Type, OS); + std::string OutString; + raw_string_ostream StringStream(OutString); + if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else if (auto S = dyn_cast(Sec.get())) + writeSectionContent(StringStream, *S); + else + reportError("unknown section type: " + Twine(Sec->Type)); + + if (HasError) + return false; + + StringStream.flush(); + + // Write the section size followed by the content + encodeULEB128(OutString.size(), OS); + OS << OutString; + } + + // write reloc sections for any section that have relocations + uint32_t SectionIndex = 0; + for (const std::unique_ptr &Sec : Obj.Sections) { + if (Sec->Relocations.empty()) { + SectionIndex++; + continue; + } + + writeUint8(OS, wasm::WASM_SEC_CUSTOM); + std::string OutString; + raw_string_ostream StringStream(OutString); + writeRelocSection(StringStream, *Sec, SectionIndex++); + StringStream.flush(); + + encodeULEB128(OutString.size(), OS); + OS << OutString; + } + + return true; +} + +namespace llvm { +namespace yaml { + +bool yaml2wasm(WasmYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH) { + WasmWriter Writer(Doc, EH); + return Writer.writeWasm(Out); +} + +} // namespace yaml +} // namespace llvm diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp index 88491d955c4..232d5122004 100644 --- a/lib/ObjectYAML/WasmYAML.cpp +++ b/lib/ObjectYAML/WasmYAML.cpp @@ -295,8 +295,8 @@ void ScalarEnumerationTraits::enumeration( void MappingTraits::mapping( IO &IO, WasmYAML::Signature &Signature) { IO.mapRequired("Index", Signature.Index); - IO.mapRequired("ReturnType", Signature.ReturnType); IO.mapRequired("ParamTypes", Signature.ParamTypes); + IO.mapRequired("ReturnTypes", Signature.ReturnTypes); } void MappingTraits::mapping(IO &IO, WasmYAML::Table &Table) { @@ -535,6 +535,7 @@ void ScalarBitSetTraits::bitset( BCaseMask(UNDEFINED, UNDEFINED); BCaseMask(EXPORTED, EXPORTED); BCaseMask(EXPLICIT_NAME, EXPLICIT_NAME); + BCaseMask(NO_STRIP, NO_STRIP); #undef BCaseMask } @@ -559,7 +560,6 @@ void ScalarEnumerationTraits::enumeration( ECase(V128); ECase(FUNCREF); ECase(FUNC); - ECase(NORESULT); #undef ECase } diff --git a/lib/ObjectYAML/yaml2obj.cpp b/lib/ObjectYAML/yaml2obj.cpp new file mode 100644 index 00000000000..c18fa5cfdb5 --- /dev/null +++ b/lib/ObjectYAML/yaml2obj.cpp @@ -0,0 +1,77 @@ +//===-- yaml2obj.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/ObjectYAML/ObjectYAML.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/YAMLTraits.h" + +namespace llvm { +namespace yaml { + +bool convertYAML(yaml::Input &YIn, raw_ostream &Out, ErrorHandler ErrHandler, + unsigned DocNum) { + unsigned CurDocNum = 0; + do { + if (++CurDocNum != DocNum) + continue; + + yaml::YamlObjectFile Doc; + YIn >> Doc; + if (std::error_code EC = YIn.error()) { + ErrHandler("failed to parse YAML input: " + EC.message()); + return false; + } + + if (Doc.Elf) + return yaml2elf(*Doc.Elf, Out, ErrHandler); + if (Doc.Coff) + return yaml2coff(*Doc.Coff, Out, ErrHandler); + if (Doc.MachO || Doc.FatMachO) + return yaml2macho(Doc, Out, ErrHandler); + if (Doc.Minidump) + return yaml2minidump(*Doc.Minidump, Out, ErrHandler); + if (Doc.Wasm) + return yaml2wasm(*Doc.Wasm, Out, ErrHandler); + + ErrHandler("unknown document type"); + return false; + + } while (YIn.nextDocument()); + + ErrHandler("cannot find the " + Twine(DocNum) + + getOrdinalSuffix(DocNum).data() + " document"); + return false; +} + +std::unique_ptr +yaml2ObjectFile(SmallVectorImpl &Storage, StringRef Yaml, + ErrorHandler ErrHandler) { + Storage.clear(); + raw_svector_ostream OS(Storage); + + yaml::Input YIn(Yaml); + if (!convertYAML(YIn, OS, ErrHandler)) + return {}; + + Expected> ObjOrErr = + object::ObjectFile::createObjectFile( + MemoryBufferRef(OS.str(), "YamlObject")); + if (ObjOrErr) + return std::move(*ObjOrErr); + + ErrHandler(toString(ObjOrErr.takeError())); + return {}; +} + +} // namespace yaml +} // namespace llvm diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp index f37c142da69..09e921502eb 100644 --- a/lib/Option/ArgList.cpp +++ b/lib/Option/ArgList.cpp @@ -241,7 +241,7 @@ void DerivedArgList::AddSynthesizedArg(Arg *A) { Arg *DerivedArgList::MakeFlagArg(const Arg *BaseArg, const Option Opt) const { SynthesizedArgs.push_back( - make_unique(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), + std::make_unique(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), BaseArgs.MakeIndex(Opt.getName()), BaseArg)); return SynthesizedArgs.back().get(); } @@ -250,7 +250,7 @@ Arg *DerivedArgList::MakePositionalArg(const Arg *BaseArg, const Option Opt, StringRef Value) const { unsigned Index = BaseArgs.MakeIndex(Value); SynthesizedArgs.push_back( - make_unique(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), + std::make_unique(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), Index, BaseArgs.getArgString(Index), BaseArg)); return SynthesizedArgs.back().get(); } @@ -259,7 +259,7 @@ Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt, StringRef Value) const { unsigned Index = BaseArgs.MakeIndex(Opt.getName(), Value); SynthesizedArgs.push_back( - make_unique(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), + std::make_unique(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), Index, BaseArgs.getArgString(Index + 1), BaseArg)); return SynthesizedArgs.back().get(); } @@ -267,7 +267,7 @@ Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt, Arg *DerivedArgList::MakeJoinedArg(const Arg *BaseArg, const Option Opt, StringRef Value) const { unsigned Index = BaseArgs.MakeIndex((Opt.getName() + Value).str()); - SynthesizedArgs.push_back(make_unique( + SynthesizedArgs.push_back(std::make_unique( Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), Index, BaseArgs.getArgString(Index) + Opt.getName().size(), BaseArg)); return SynthesizedArgs.back().get(); diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index e2b2a2b2526..1aaccb510f8 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/CFLSteensAliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/DDG.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/DominanceFrontier.h" @@ -35,6 +36,7 @@ #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopCacheAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemorySSA.h" @@ -51,6 +53,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/UnreachableBlockElim.h" #include "llvm/IR/Dominators.h" @@ -101,6 +104,7 @@ #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Instrumentation/PoisonChecking.h" +#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/Scalar/ADCE.h" #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" @@ -138,13 +142,14 @@ #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" #include "llvm/Transforms/Scalar/LoopUnrollPass.h" #include "llvm/Transforms/Scalar/LowerAtomic.h" +#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h" #include "llvm/Transforms/Scalar/LowerWidenableCondition.h" #include "llvm/Transforms/Scalar/MakeGuardsExplicit.h" #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" -#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" #include "llvm/Transforms/Scalar/MergeICmps.h" +#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" #include "llvm/Transforms/Scalar/NaryReassociate.h" #include "llvm/Transforms/Scalar/NewGVN.h" #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" @@ -206,7 +211,7 @@ static cl::opt EnableSyntheticCounts( cl::desc("Run synthetic function entry count generation " "pass")); -static Regex DefaultAliasRegex( +static const Regex DefaultAliasRegex( "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$"); // This option is used in simplifying testing SampleFDO optimizations for @@ -466,8 +471,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, if ((Phase != ThinLTOPhase::PreLink || !PGOOpt || PGOOpt->Action != PGOOptions::SampleUse) && PTO.LoopUnrolling) - LPM2.addPass( - LoopFullUnrollPass(Level, false, PTO.ForgetAllSCEVInLoopUnroll)); + LPM2.addPass(LoopFullUnrollPass(Level, /*OnlyWhenForced=*/false, + PTO.ForgetAllSCEVInLoopUnroll)); for (auto &C : LoopOptimizerEndEPCallbacks) C(LPM2, Level); @@ -475,10 +480,15 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // We provide the opt remark emitter pass for LICM to use. We only need to do // this once as it is immutable. FPM.addPass(RequireAnalysisPass()); - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), DebugLogging)); + FPM.addPass(createFunctionToLoopPassAdaptor( + std::move(LPM1), EnableMSSALoopDependency, DebugLogging)); FPM.addPass(SimplifyCFGPass()); FPM.addPass(InstCombinePass()); - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), DebugLogging)); + // The loop passes in LPM2 (IndVarSimplifyPass, LoopIdiomRecognizePass, + // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. + // *All* loop passes must preserve it, in order to be able to use it. + FPM.addPass(createFunctionToLoopPassAdaptor( + std::move(LPM2), /*UseMemorySSA=*/false, DebugLogging)); // Eliminate redundancies. if (Level != O1) { @@ -515,7 +525,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(DSEPass()); FPM.addPass(createFunctionToLoopPassAdaptor( LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), - DebugLogging)); + EnableMSSALoopDependency, DebugLogging)); for (auto &C : ScalarOptimizerLateEPCallbacks) C(FPM, Level); @@ -540,6 +550,7 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, bool RunProfileGen, bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile) { + assert(Level != O0 && "Not expecting O0 here!"); // Generally running simplification passes and the inliner with an high // threshold results in smaller executables, but there may be cases where // the size grows, so let's be conservative here and skip this simplification @@ -570,34 +581,63 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPipeline))); + + // Delete anything that is now dead to make sure that we don't instrument + // dead code. Instrumentation can end up keeping dead code around and + // dramatically increase code size. + MPM.addPass(GlobalDCEPass()); } - // Delete anything that is now dead to make sure that we don't instrument - // dead code. Instrumentation can end up keeping dead code around and - // dramatically increase code size. - MPM.addPass(GlobalDCEPass()); - - if (RunProfileGen) { - MPM.addPass(PGOInstrumentationGen(IsCS)); - - FunctionPassManager FPM; - FPM.addPass( - createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging)); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - - // Add the profile lowering pass. - InstrProfOptions Options; - if (!ProfileFile.empty()) - Options.InstrProfileOutput = ProfileFile; - Options.DoCounterPromotion = true; - Options.UseBFIInPromotion = IsCS; - MPM.addPass(InstrProfiling(Options, IsCS)); - } else if (!ProfileFile.empty()) { + if (!RunProfileGen) { + assert(!ProfileFile.empty() && "Profile use expecting a profile file!"); MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); // Cache ProfileSummaryAnalysis once to avoid the potential need to insert // RequireAnalysisPass for PSI before subsequent non-module passes. MPM.addPass(RequireAnalysisPass()); + return; } + + // Perform PGO instrumentation. + MPM.addPass(PGOInstrumentationGen(IsCS)); + + FunctionPassManager FPM; + FPM.addPass(createFunctionToLoopPassAdaptor( + LoopRotatePass(), EnableMSSALoopDependency, DebugLogging)); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + + // Add the profile lowering pass. + InstrProfOptions Options; + if (!ProfileFile.empty()) + Options.InstrProfileOutput = ProfileFile; + // Do counter promotion at Level greater than O0. + Options.DoCounterPromotion = true; + Options.UseBFIInPromotion = IsCS; + MPM.addPass(InstrProfiling(Options, IsCS)); +} + +void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM, + bool DebugLogging, bool RunProfileGen, + bool IsCS, std::string ProfileFile, + std::string ProfileRemappingFile) { + if (!RunProfileGen) { + assert(!ProfileFile.empty() && "Profile use expecting a profile file!"); + MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); + // Cache ProfileSummaryAnalysis once to avoid the potential need to insert + // RequireAnalysisPass for PSI before subsequent non-module passes. + MPM.addPass(RequireAnalysisPass()); + return; + } + + // Perform PGO instrumentation. + MPM.addPass(PGOInstrumentationGen(IsCS)); + // Add the profile lowering pass. + InstrProfOptions Options; + if (!ProfileFile.empty()) + Options.InstrProfileOutput = ProfileFile; + // Do not do counter promotion at O0. + Options.DoCounterPromotion = false; + Options.UseBFIInPromotion = IsCS; + MPM.addPass(InstrProfiling(Options, IsCS)); } static InlineParams @@ -852,6 +892,8 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline( FunctionPassManager OptimizePM(DebugLogging); OptimizePM.addPass(Float2IntPass()); + OptimizePM.addPass(LowerConstantIntrinsicsPass()); + // FIXME: We need to run some loop optimizations to re-rotate loops after // simplify-cfg and others undo their rotation. @@ -863,8 +905,8 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline( C(OptimizePM, Level); // First rotate loops that may have been un-rotated by prior passes. - OptimizePM.addPass( - createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging)); + OptimizePM.addPass(createFunctionToLoopPassAdaptor( + LoopRotatePass(), EnableMSSALoopDependency, DebugLogging)); // Distribute loops to allow partial vectorization. I.e. isolate dependences // into separate loop that would otherwise inhibit vectorization. This is @@ -911,19 +953,19 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline( // combiner for cleanup here so that the unrolling and LICM can be pipelined // across the loop nests. // We do UnrollAndJam in a separate LPM to ensure it happens before unroll - if (EnableUnrollAndJam) { + if (EnableUnrollAndJam && PTO.LoopUnrolling) { OptimizePM.addPass( createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level))); } - if (PTO.LoopUnrolling) - OptimizePM.addPass(LoopUnrollPass( - LoopUnrollOptions(Level, false, PTO.ForgetAllSCEVInLoopUnroll))); + OptimizePM.addPass(LoopUnrollPass( + LoopUnrollOptions(Level, /*OnlyWhenForced=*/!PTO.LoopUnrolling, + PTO.ForgetAllSCEVInLoopUnroll))); OptimizePM.addPass(WarnMissedTransformationsPass()); OptimizePM.addPass(InstCombinePass()); OptimizePM.addPass(RequireAnalysisPass()); OptimizePM.addPass(createFunctionToLoopPassAdaptor( LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), - DebugLogging)); + EnableMSSALoopDependency, DebugLogging)); // Now that we've vectorized and unrolled loops, we may have more refined // alignment information, try to re-derive it here. @@ -1422,12 +1464,23 @@ Expected parseLoopUnrollOptions(StringRef Params) { UnrollOpts.setOptLevel(OptLevel); continue; } + if (ParamName.consume_front("full-unroll-max=")) { + int Count; + if (ParamName.getAsInteger(0, Count)) + return make_error( + formatv("invalid LoopUnrollPass parameter '{0}' ", ParamName).str(), + inconvertibleErrorCode()); + UnrollOpts.setFullUnrollMaxCount(Count); + continue; + } bool Enable = !ParamName.consume_front("no-"); if (ParamName == "partial") { UnrollOpts.setPartial(Enable); } else if (ParamName == "peeling") { UnrollOpts.setPeeling(Enable); + } else if (ParamName == "profile-peeling") { + UnrollOpts.setProfileBasedPeeling(Enable); } else if (ParamName == "runtime") { UnrollOpts.setRuntime(Enable); } else if (ParamName == "upperbound") { @@ -1542,6 +1595,26 @@ Expected parseLoopUnswitchOptions(StringRef Params) { } return Result; } + +Expected parseMergedLoadStoreMotionOptions(StringRef Params) { + bool Result = false; + while (!Params.empty()) { + StringRef ParamName; + std::tie(ParamName, Params) = Params.split(';'); + + bool Enable = !ParamName.consume_front("no-"); + if (ParamName == "split-footer-bb") { + Result = Enable; + } else { + return make_error( + formatv("invalid MergedLoadStoreMotion pass parameter '{0}' ", + ParamName) + .str(), + inconvertibleErrorCode()); + } + } + return Result; +} } // namespace /// Tests whether a pass name starts with a valid prefix for a default pipeline @@ -1629,7 +1702,7 @@ static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) { // Explicitly handle pass manager names. if (Name == "function") return true; - if (Name == "loop") + if (Name == "loop" || Name == "loop-mssa") return true; // Explicitly handle custom-parsed pass names. @@ -1653,7 +1726,7 @@ static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) { template static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks) { // Explicitly handle pass manager names. - if (Name == "loop") + if (Name == "loop" || Name == "loop-mssa") return true; // Explicitly handle custom-parsed pass names. @@ -1800,9 +1873,19 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM, .Case("O3", O3) .Case("Os", Os) .Case("Oz", Oz); - if (L == O0) - // At O0 we do nothing at all! + if (L == O0) { + // Add instrumentation PGO passes -- at O0 we can still do PGO. + if (PGOOpt && Matches[1] != "thinlto" && + (PGOOpt->Action == PGOOptions::IRInstr || + PGOOpt->Action == PGOOptions::IRUse)) + addPGOInstrPassesForO0( + MPM, DebugLogging, + /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr), + /* IsCS */ false, PGOOpt->ProfileFile, + PGOOpt->ProfileRemappingFile); + // Do nothing else at all! return Error::success(); + } if (Matches[1] == "default") { MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging)); @@ -1947,14 +2030,15 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM, FPM.addPass(std::move(NestedFPM)); return Error::success(); } - if (Name == "loop") { + if (Name == "loop" || Name == "loop-mssa") { LoopPassManager LPM(DebugLogging); if (auto Err = parseLoopPassPipeline(LPM, InnerPipeline, VerifyEachPass, DebugLogging)) return Err; // Add the nested pass manager with the appropriate adaptor. - FPM.addPass( - createFunctionToLoopPassAdaptor(std::move(LPM), DebugLogging)); + bool UseMemorySSA = (Name == "loop-mssa"); + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA, + DebugLogging)); return Error::success(); } if (auto Count = parseRepeatPassName(Name)) { diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def index 347f75870eb..1fa274d172b 100644 --- a/lib/Passes/PassRegistry.def +++ b/lib/Passes/PassRegistry.def @@ -24,7 +24,6 @@ MODULE_ANALYSIS("module-summary", ModuleSummaryIndexAnalysis()) MODULE_ANALYSIS("no-op-module", NoOpModuleAnalysis()) MODULE_ANALYSIS("profile-summary", ProfileSummaryAnalysis()) MODULE_ANALYSIS("stack-safety", StackSafetyGlobalAnalysis()) -MODULE_ANALYSIS("targetlibinfo", TargetLibraryAnalysis()) MODULE_ANALYSIS("verify", VerifierAnalysis()) MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) MODULE_ANALYSIS("asan-globals-md", ASanGlobalsMetadataAnalysis()) @@ -87,7 +86,10 @@ MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation()) MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr)) MODULE_PASS("verify", VerifierPass()) MODULE_PASS("asan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/false, false, true, false)) +MODULE_PASS("msan-module", MemorySanitizerPass({})) +MODULE_PASS("tsan-module", ThreadSanitizerPass()) MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false)) +MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass()) MODULE_PASS("poison-checking", PoisonCheckingPass()) #undef MODULE_PASS @@ -185,6 +187,7 @@ FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass()) FUNCTION_PASS("loweratomic", LowerAtomicPass()) FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass()) FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass()) +FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass()) FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass()) FUNCTION_PASS("guard-widening", GuardWideningPass()) FUNCTION_PASS("gvn", GVN()) @@ -195,7 +198,6 @@ FUNCTION_PASS("lowerinvoke", LowerInvokePass()) FUNCTION_PASS("mem2reg", PromotePass()) FUNCTION_PASS("memcpyopt", MemCpyOptPass()) FUNCTION_PASS("mergeicmps", MergeICmpsPass()) -FUNCTION_PASS("mldst-motion", MergedLoadStoreMotionPass()) FUNCTION_PASS("nary-reassociate", NaryReassociatePass()) FUNCTION_PASS("newgvn", NewGVNPass()) FUNCTION_PASS("jump-threading", JumpThreadingPass()) @@ -270,6 +272,11 @@ FUNCTION_PASS_WITH_PARAMS("loop-vectorize", return LoopVectorizePass(Opts); }, parseLoopVectorizeOptions) +FUNCTION_PASS_WITH_PARAMS("mldst-motion", + [](MergedLoadStoreMotionOptions Opts) { + return MergedLoadStoreMotionPass(Opts); + }, + parseMergedLoadStoreMotionOptions) #undef FUNCTION_PASS_WITH_PARAMS #ifndef LOOP_ANALYSIS @@ -277,6 +284,7 @@ FUNCTION_PASS_WITH_PARAMS("loop-vectorize", #endif LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis()) LOOP_ANALYSIS("access-info", LoopAccessAnalysis()) +LOOP_ANALYSIS("ddg", DDGAnalysis()) LOOP_ANALYSIS("ivusers", IVUsersAnalysis()) LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) #undef LOOP_ANALYSIS @@ -299,7 +307,9 @@ LOOP_PASS("irce", IRCEPass()) LOOP_PASS("unroll-and-jam", LoopUnrollAndJamPass()) LOOP_PASS("unroll-full", LoopFullUnrollPass()) LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) +LOOP_PASS("print", DDGAnalysisPrinterPass(dbgs())) LOOP_PASS("print", IVUsersPrinterPass(dbgs())) +LOOP_PASS("print", LoopCachePrinterPass(dbgs())) LOOP_PASS("loop-predication", LoopPredicationPass()) LOOP_PASS("guard-widening", GuardWideningPass()) #undef LOOP_PASS diff --git a/lib/ProfileData/Coverage/CoverageMapping.cpp b/lib/ProfileData/Coverage/CoverageMapping.cpp index afd6618e7cb..8d5e56e26c0 100644 --- a/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -194,6 +194,15 @@ void FunctionRecordIterator::skipOtherFiles() { *this = FunctionRecordIterator(); } +ArrayRef CoverageMapping::getImpreciseRecordIndicesForFilename( + StringRef Filename) const { + size_t FilenameHash = hash_value(Filename); + auto RecordIt = FilenameHash2RecordIndices.find(FilenameHash); + if (RecordIt == FilenameHash2RecordIndices.end()) + return {}; + return RecordIt->second; +} + Error CoverageMapping::loadFunctionRecord( const CoverageMappingRecord &Record, IndexedInstrProfReader &ProfileReader) { @@ -249,6 +258,20 @@ Error CoverageMapping::loadFunctionRecord( return Error::success(); Functions.push_back(std::move(Function)); + + // Performance optimization: keep track of the indices of the function records + // which correspond to each filename. This can be used to substantially speed + // up queries for coverage info in a file. + unsigned RecordIndex = Functions.size() - 1; + for (StringRef Filename : Record.Filenames) { + auto &RecordIndices = FilenameHash2RecordIndices[hash_value(Filename)]; + // Note that there may be duplicates in the filename set for a function + // record, because of e.g. macro expansions in the function in which both + // the macro and the function are defined in the same file. + if (RecordIndices.empty() || RecordIndices.back() != RecordIndex) + RecordIndices.push_back(RecordIndex); + } + return Error::success(); } @@ -270,6 +293,16 @@ Expected> CoverageMapping::load( return std::move(Coverage); } +// If E is a no_data_found error, returns success. Otherwise returns E. +static Error handleMaybeNoDataFoundError(Error E) { + return handleErrors( + std::move(E), [](const CoverageMapError &CME) { + if (CME.get() == coveragemap_error::no_data_found) + return static_cast(Error::success()); + return make_error(CME.get()); + }); +} + Expected> CoverageMapping::load(ArrayRef ObjectFilenames, StringRef ProfileFilename, ArrayRef Arches) { @@ -289,12 +322,21 @@ CoverageMapping::load(ArrayRef ObjectFilenames, CovMappingBufOrErr.get()->getMemBufferRef(); auto CoverageReadersOrErr = BinaryCoverageReader::create(CovMappingBufRef, Arch, Buffers); - if (Error E = CoverageReadersOrErr.takeError()) - return std::move(E); + if (Error E = CoverageReadersOrErr.takeError()) { + E = handleMaybeNoDataFoundError(std::move(E)); + if (E) + return std::move(E); + // E == success (originally a no_data_found error). + continue; + } for (auto &Reader : CoverageReadersOrErr.get()) Readers.push_back(std::move(Reader)); Buffers.push_back(std::move(CovMappingBufOrErr.get())); } + // If no readers were created, either no objects were provided or none of them + // had coverage data. Return an error in the latter case. + if (Readers.empty() && !ObjectFilenames.empty()) + return make_error(coveragemap_error::no_data_found); return load(Readers, *ProfileReader); } @@ -607,7 +649,12 @@ CoverageData CoverageMapping::getCoverageForFile(StringRef Filename) const { CoverageData FileCoverage(Filename); std::vector Regions; - for (const auto &Function : Functions) { + // Look up the function records in the given file. Due to hash collisions on + // the filename, we may get back some records that are not in the file. + ArrayRef RecordIndices = + getImpreciseRecordIndicesForFilename(Filename); + for (unsigned RecordIndex : RecordIndices) { + const FunctionRecord &Function = Functions[RecordIndex]; auto MainFileID = findMainViewFileID(Filename, Function); auto FileIDs = gatherFileIDs(Filename, Function); for (const auto &CR : Function.CountedRegions) @@ -627,7 +674,12 @@ CoverageData CoverageMapping::getCoverageForFile(StringRef Filename) const { std::vector CoverageMapping::getInstantiationGroups(StringRef Filename) const { FunctionInstantiationSetCollector InstantiationSetCollector; - for (const auto &Function : Functions) { + // Look up the function records in the given file. Due to hash collisions on + // the filename, we may get back some records that are not in the file. + ArrayRef RecordIndices = + getImpreciseRecordIndicesForFilename(Filename); + for (unsigned RecordIndex : RecordIndices) { + const FunctionRecord &Function = Functions[RecordIndex]; auto MainFileID = findMainViewFileID(Filename, Function); if (!MainFileID) continue; diff --git a/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/lib/ProfileData/Coverage/CoverageMappingReader.cpp index e193e10f91d..679ff3525ee 100644 --- a/lib/ProfileData/Coverage/CoverageMappingReader.cpp +++ b/lib/ProfileData/Coverage/CoverageMappingReader.cpp @@ -506,7 +506,7 @@ public: return make_error(coveragemap_error::malformed); // Each coverage map has an alignment of 8, so we need to adjust alignment // before reading the next map. - Buf += alignmentAdjustment(Buf, 8); + Buf += offsetToAlignedAddr(Buf, Align(8)); auto CFR = reinterpret_cast(FunBuf); while ((const char *)CFR < FunEnd) { @@ -539,7 +539,7 @@ Expected> CovMapFuncRecordReader::get( switch (Version) { case CovMapVersion::Version1: - return llvm::make_unique>(P, R, F); case CovMapVersion::Version2: case CovMapVersion::Version3: @@ -547,10 +547,10 @@ Expected> CovMapFuncRecordReader::get( if (Error E = P.create(P.getNameData())) return std::move(E); if (Version == CovMapVersion::Version2) - return llvm::make_unique>(P, R, F); else - return llvm::make_unique>(P, R, F); } llvm_unreachable("Unsupported version"); @@ -648,7 +648,7 @@ loadTestingFormat(StringRef Data) { // Skip the padding bytes because coverage map data has an alignment of 8. if (CoverageMapping.empty()) return make_error(coveragemap_error::truncated); - size_t Pad = alignmentAdjustment(CoverageMapping.data(), 8); + size_t Pad = offsetToAlignedAddr(CoverageMapping.data(), Align(8)); if (CoverageMapping.size() < Pad) return make_error(coveragemap_error::malformed); CoverageMapping = CoverageMapping.substr(Pad); @@ -666,11 +666,11 @@ static Expected lookupSection(ObjectFile &OF, StringRef Name) { }; Name = stripSuffix(Name); - StringRef FoundName; for (const auto &Section : OF.sections()) { - if (auto EC = Section.getName(FoundName)) - return errorCodeToError(EC); - if (stripSuffix(FoundName) == Name) + Expected NameOrErr = Section.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + if (stripSuffix(*NameOrErr) == Name) return Section; } return make_error(coveragemap_error::no_data_found); @@ -682,7 +682,7 @@ loadBinaryFormat(std::unique_ptr Bin, StringRef Arch) { if (auto *Universal = dyn_cast(Bin.get())) { // If we have a universal binary, try to look up the object for the // appropriate architecture. - auto ObjectFileOrErr = Universal->getObjectForArch(Arch); + auto ObjectFileOrErr = Universal->getMachOObjectForArch(Arch); if (!ObjectFileOrErr) return ObjectFileOrErr.takeError(); OF = std::move(ObjectFileOrErr.get()); diff --git a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp index 432b20f217c..d75854a60d1 100644 --- a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp +++ b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp @@ -24,6 +24,16 @@ using namespace llvm; using namespace coverage; +CoverageFilenamesSectionWriter::CoverageFilenamesSectionWriter( + ArrayRef Filenames) + : Filenames(Filenames) { +#ifndef NDEBUG + StringSet<> NameSet; + for (StringRef Name : Filenames) + assert(NameSet.insert(Name).second && "Duplicate filename"); +#endif +} + void CoverageFilenamesSectionWriter::write(raw_ostream &OS) { encodeULEB128(Filenames.size(), OS); for (const auto &Filename : Filenames) { diff --git a/lib/ProfileData/GCOV.cpp b/lib/ProfileData/GCOV.cpp index fa4e433d7aa..00e6294c57a 100644 --- a/lib/ProfileData/GCOV.cpp +++ b/lib/ProfileData/GCOV.cpp @@ -40,7 +40,7 @@ bool GCOVFile::readGCNO(GCOVBuffer &Buffer) { while (true) { if (!Buffer.readFunctionTag()) break; - auto GFun = make_unique(*this); + auto GFun = std::make_unique(*this); if (!GFun->readGCNO(Buffer, Version)) return false; Functions.push_back(std::move(GFun)); @@ -164,7 +164,7 @@ bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) { for (uint32_t i = 0, e = BlockCount; i != e; ++i) { if (!Buff.readInt(Dummy)) return false; // Block flags; - Blocks.push_back(make_unique(*this, i)); + Blocks.push_back(std::make_unique(*this, i)); } // read edges. @@ -185,7 +185,7 @@ bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) { uint32_t Dst; if (!Buff.readInt(Dst)) return false; - Edges.push_back(make_unique(*Blocks[BlockNo], *Blocks[Dst])); + Edges.push_back(std::make_unique(*Blocks[BlockNo], *Blocks[Dst])); GCOVEdge *Edge = Edges.back().get(); Blocks[BlockNo]->addDstEdge(Edge); Blocks[Dst]->addSrcEdge(Edge); @@ -702,14 +702,14 @@ std::string FileInfo::getCoveragePath(StringRef Filename, std::unique_ptr FileInfo::openCoveragePath(StringRef CoveragePath) { if (Options.NoOutput) - return llvm::make_unique(); + return std::make_unique(); std::error_code EC; auto OS = - llvm::make_unique(CoveragePath, EC, sys::fs::F_Text); + std::make_unique(CoveragePath, EC, sys::fs::OF_Text); if (EC) { errs() << EC.message() << "\n"; - return llvm::make_unique(); + return std::make_unique(); } return std::move(OS); } diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp index 510fd9887d9..57d4fbc59f8 100644 --- a/lib/ProfileData/InstrProf.cpp +++ b/lib/ProfileData/InstrProf.cpp @@ -478,7 +478,7 @@ Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) { return Error::success(); } -void InstrProfRecord::accumuateCounts(CountSumOrPercent &Sum) const { +void InstrProfRecord::accumulateCounts(CountSumOrPercent &Sum) const { uint64_t FuncSum = 0; Sum.NumEntries += Counts.size(); for (size_t F = 0, E = Counts.size(); F < E; ++F) @@ -552,7 +552,7 @@ void InstrProfRecord::overlap(InstrProfRecord &Other, OverlapStats &Overlap, uint64_t ValueCutoff) { // FuncLevel CountSum for other should already computed and nonzero. assert(FuncLevelOverlap.Test.CountSum >= 1.0f); - accumuateCounts(FuncLevelOverlap.Base); + accumulateCounts(FuncLevelOverlap.Base); bool Mismatch = (Counts.size() != Other.Counts.size()); // Check if the value profiles mismatch. @@ -1078,12 +1078,10 @@ bool isIRPGOFlagSet(const Module *M) { if (!IRInstrVar->hasInitializer()) return false; - const Constant *InitVal = IRInstrVar->getInitializer(); + auto *InitVal = dyn_cast_or_null(IRInstrVar->getInitializer()); if (!InitVal) return false; - - return (dyn_cast(InitVal)->getZExtValue() & - VARIANT_MASK_IR_PROF) != 0; + return (InitVal->getZExtValue() & VARIANT_MASK_IR_PROF) != 0; } // Check if we can safely rename this Comdat function. @@ -1166,9 +1164,9 @@ void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput) { } } -Error OverlapStats::accumuateCounts(const std::string &BaseFilename, - const std::string &TestFilename, - bool IsCS) { +Error OverlapStats::accumulateCounts(const std::string &BaseFilename, + const std::string &TestFilename, + bool IsCS) { auto getProfileSum = [IsCS](const std::string &Filename, CountSumOrPercent &Sum) -> Error { auto ReaderOrErr = InstrProfReader::create(Filename); @@ -1176,7 +1174,7 @@ Error OverlapStats::accumuateCounts(const std::string &BaseFilename, return E; } auto Reader = std::move(ReaderOrErr.get()); - Reader->accumuateCounts(Sum, IsCS); + Reader->accumulateCounts(Sum, IsCS); return Error::success(); }; auto Ret = getProfileSum(BaseFilename, Base); diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index fec1c152991..23d078a3dde 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -119,7 +119,7 @@ IndexedInstrProfReader::create(std::unique_ptr Buffer, // Create the reader. if (!IndexedInstrProfReader::hasFormat(*Buffer)) return make_error(instrprof_error::bad_magic); - auto Result = llvm::make_unique( + auto Result = std::make_unique( std::move(Buffer), std::move(RemappingBuffer)); // Initialize the reader and return the result. @@ -385,7 +385,7 @@ Error RawInstrProfReader::readHeader( NamesStart = Start + NamesOffset; ValueDataStart = reinterpret_cast(Start + ValueDataOffset); - std::unique_ptr NewSymtab = make_unique(); + std::unique_ptr NewSymtab = std::make_unique(); if (Error E = createSymtab(*NewSymtab.get())) return E; @@ -413,13 +413,19 @@ Error RawInstrProfReader::readRawCounts( if (NumCounters == 0) return error(instrprof_error::malformed); - auto RawCounts = makeArrayRef(getCounter(CounterPtr), NumCounters); auto *NamesStartAsCounter = reinterpret_cast(NamesStart); + ptrdiff_t MaxNumCounters = NamesStartAsCounter - CountersStart; - // Check bounds. - if (RawCounts.data() < CountersStart || - RawCounts.data() + RawCounts.size() > NamesStartAsCounter) + // Check bounds. Note that the counter pointer embedded in the data record + // may itself be corrupt. + if (NumCounters > MaxNumCounters) return error(instrprof_error::malformed); + ptrdiff_t CounterOffset = getCounterOffset(CounterPtr); + if (CounterOffset < 0 || CounterOffset > MaxNumCounters || + (CounterOffset + NumCounters) > MaxNumCounters) + return error(instrprof_error::malformed); + + auto RawCounts = makeArrayRef(getCounter(CounterOffset), NumCounters); if (ShouldSwapBytes) { Record.Counts.clear(); @@ -767,7 +773,7 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, UseCS ? this->CS_Summary : this->Summary; // initialize InstrProfSummary using the SummaryData from disk. - Summary = llvm::make_unique( + Summary = std::make_unique( UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr, DetailedSummary, SummaryData->get(Summary::TotalBlockCount), SummaryData->get(Summary::MaxBlockCount), @@ -777,13 +783,13 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, SummaryData->get(Summary::TotalNumFunctions)); return Cur + SummarySize; } else { - // For older version of profile data, we need to compute on the fly: - using namespace IndexedInstrProf; - + // The older versions do not support a profile summary. This just computes + // an empty summary, which will not result in accurate hot/cold detection. + // We would need to call addRecord for all NamedInstrProfRecords to get the + // correct summary. However, this version is old (prior to early 2016) and + // has not been supporting an accurate summary for several years. InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); - // FIXME: This only computes an empty summary. Need to call addRecord for - // all NamedInstrProfRecords to get the correct summary. - this->Summary = Builder.getSummary(); + Summary = Builder.getSummary(); return Cur; } } @@ -827,18 +833,18 @@ Error IndexedInstrProfReader::readHeader() { // The rest of the file is an on disk hash table. auto IndexPtr = - llvm::make_unique>( + std::make_unique>( Start + HashOffset, Cur, Start, HashType, FormatVersion); // Load the remapping table now if requested. if (RemappingBuffer) { - Remapper = llvm::make_unique< + Remapper = std::make_unique< InstrProfReaderItaniumRemapper>( std::move(RemappingBuffer), *IndexPtr); if (Error E = Remapper->populateRemappings()) return E; } else { - Remapper = llvm::make_unique(*IndexPtr); + Remapper = std::make_unique(*IndexPtr); } Index = std::move(IndexPtr); @@ -849,7 +855,7 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() { if (Symtab.get()) return *Symtab.get(); - std::unique_ptr NewSymtab = make_unique(); + std::unique_ptr NewSymtab = std::make_unique(); if (Error E = Index->populateSymtab(*NewSymtab.get())) { consumeError(error(InstrProfError::take(std::move(E)))); } @@ -901,7 +907,7 @@ Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { return success(); } -void InstrProfReader::accumuateCounts(CountSumOrPercent &Sum, bool IsCS) { +void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) { uint64_t NumFuncs = 0; for (const auto &Func : *this) { if (isIRLevelProfile()) { @@ -909,7 +915,7 @@ void InstrProfReader::accumuateCounts(CountSumOrPercent &Sum, bool IsCS) { if (FuncIsCS != IsCS) continue; } - Func.accumuateCounts(Sum); + Func.accumulateCounts(Sum); ++NumFuncs; } Sum.NumEntries = NumFuncs; diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp index 4ca2defd26d..ccb270e0b71 100644 --- a/lib/ProfileData/InstrProfWriter.cpp +++ b/lib/ProfileData/InstrProfWriter.cpp @@ -193,7 +193,7 @@ void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other, const OverlapFuncFilters &FuncFilter) { auto Name = Other.Name; auto Hash = Other.Hash; - Other.accumuateCounts(FuncLevelOverlap.Test); + Other.accumulateCounts(FuncLevelOverlap.Test); if (FunctionData.find(Name) == FunctionData.end()) { Overlap.addOneUnique(FuncLevelOverlap.Test); return; diff --git a/lib/ProfileData/ProfileSummaryBuilder.cpp b/lib/ProfileData/ProfileSummaryBuilder.cpp index 4d5b0093574..3299b5f9206 100644 --- a/lib/ProfileData/ProfileSummaryBuilder.cpp +++ b/lib/ProfileData/ProfileSummaryBuilder.cpp @@ -93,14 +93,14 @@ void ProfileSummaryBuilder::computeDetailedSummary() { std::unique_ptr SampleProfileSummaryBuilder::getSummary() { computeDetailedSummary(); - return llvm::make_unique( + return std::make_unique( ProfileSummary::PSK_Sample, DetailedSummary, TotalCount, MaxCount, 0, MaxFunctionCount, NumCounts, NumFunctions); } std::unique_ptr InstrProfSummaryBuilder::getSummary() { computeDetailedSummary(); - return llvm::make_unique( + return std::make_unique( ProfileSummary::PSK_Instr, DetailedSummary, TotalCount, MaxCount, MaxInternalBlockCount, MaxFunctionCount, NumCounts, NumFunctions); } diff --git a/lib/ProfileData/SampleProf.cpp b/lib/ProfileData/SampleProf.cpp index e17865cd15a..003e8d4d429 100644 --- a/lib/ProfileData/SampleProf.cpp +++ b/lib/ProfileData/SampleProf.cpp @@ -16,7 +16,9 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" #include @@ -28,8 +30,6 @@ using namespace sampleprof; namespace llvm { namespace sampleprof { SampleProfileFormat FunctionSamples::Format; -DenseMap FunctionSamples::GUIDToFuncNameMap; -Module *FunctionSamples::CurrentModule; } // namespace sampleprof } // namespace llvm @@ -68,6 +68,12 @@ class SampleProfErrorCategoryType : public std::error_category { return "Counter overflow"; case sampleprof_error::ostream_seek_unsupported: return "Ostream does not support seek"; + case sampleprof_error::compress_failed: + return "Compress failure"; + case sampleprof_error::uncompress_failed: + return "Uncompress failure"; + case sampleprof_error::zlib_unavailable: + return "Zlib is unavailable"; } llvm_unreachable("A value of sampleprof_error has no message."); } @@ -102,8 +108,8 @@ void SampleRecord::print(raw_ostream &OS, unsigned Indent) const { OS << NumSamples; if (hasCalls()) { OS << ", calls:"; - for (const auto &I : getCallTargets()) - OS << " " << I.first() << ":" << I.second; + for (const auto &I : getSortedCallTargets()) + OS << " " << I.first << ":" << I.second; } OS << "\n"; } @@ -149,6 +155,7 @@ void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const { FS.second.print(OS, Indent + 4); } } + OS.indent(Indent); OS << "}\n"; } else { OS << "No inlined callsites in this function\n"; @@ -190,3 +197,44 @@ FunctionSamples::findFunctionSamples(const DILocation *DIL) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void FunctionSamples::dump() const { print(dbgs(), 0); } #endif + +std::error_code ProfileSymbolList::read(const uint8_t *Data, + uint64_t ListSize) { + const char *ListStart = reinterpret_cast(Data); + uint64_t Size = 0; + while (Size < ListSize) { + StringRef Str(ListStart + Size); + add(Str); + Size += Str.size() + 1; + } + if (Size != ListSize) + return sampleprof_error::malformed; + return sampleprof_error::success; +} + +std::error_code ProfileSymbolList::write(raw_ostream &OS) { + // Sort the symbols before output. If doing compression. + // It will make the compression much more effective. + std::vector SortedList; + SortedList.insert(SortedList.begin(), Syms.begin(), Syms.end()); + llvm::sort(SortedList); + + std::string OutputString; + for (auto &Sym : SortedList) { + OutputString.append(Sym.str()); + OutputString.append(1, '\0'); + } + + OS << OutputString; + return sampleprof_error::success; +} + +void ProfileSymbolList::dump(raw_ostream &OS) const { + OS << "======== Dump profile symbol list ========\n"; + std::vector SortedList; + SortedList.insert(SortedList.begin(), Syms.begin(), Syms.end()); + llvm::sort(SortedList); + + for (auto &Sym : SortedList) + OS << Sym << "\n"; +} diff --git a/lib/ProfileData/SampleProfReader.cpp b/lib/ProfileData/SampleProfReader.cpp index 192b6c71156..001aafce7bf 100644 --- a/lib/ProfileData/SampleProfReader.cpp +++ b/lib/ProfileData/SampleProfReader.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/Compression.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/LineIterator.h" @@ -190,7 +191,7 @@ static bool ParseLine(const StringRef &Input, bool &IsCallsite, uint32_t &Depth, /// the expected format. /// /// \returns true if the file was loaded successfully, false otherwise. -std::error_code SampleProfileReaderText::read() { +std::error_code SampleProfileReaderText::readImpl() { line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#'); sampleprof_error Result = sampleprof_error::success; @@ -345,7 +346,7 @@ inline ErrorOr SampleProfileReaderBinary::readStringIndex(T &Table) { return *Idx; } -ErrorOr SampleProfileReaderRawBinary::readStringFromTable() { +ErrorOr SampleProfileReaderBinary::readStringFromTable() { auto Idx = readStringIndex(NameTable); if (std::error_code EC = Idx.getError()) return EC; @@ -438,7 +439,9 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { return sampleprof_error::success; } -std::error_code SampleProfileReaderBinary::readFuncProfile() { +std::error_code +SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) { + Data = Start; auto NumHeadSamples = readNumber(); if (std::error_code EC = NumHeadSamples.getError()) return EC; @@ -458,25 +461,210 @@ std::error_code SampleProfileReaderBinary::readFuncProfile() { return sampleprof_error::success; } -std::error_code SampleProfileReaderBinary::read() { +std::error_code SampleProfileReaderBinary::readImpl() { while (!at_eof()) { - if (std::error_code EC = readFuncProfile()) + if (std::error_code EC = readFuncProfile(Data)) return EC; } return sampleprof_error::success; } -std::error_code SampleProfileReaderCompactBinary::read() { - for (auto Name : FuncsToUse) { - auto GUID = std::to_string(MD5Hash(Name)); - auto iter = FuncOffsetTable.find(StringRef(GUID)); - if (iter == FuncOffsetTable.end()) +std::error_code +SampleProfileReaderExtBinary::readOneSection(const uint8_t *Start, + uint64_t Size, SecType Type) { + Data = Start; + End = Start + Size; + switch (Type) { + case SecProfSummary: + if (std::error_code EC = readSummary()) + return EC; + break; + case SecNameTable: + if (std::error_code EC = readNameTable()) + return EC; + break; + case SecLBRProfile: + if (std::error_code EC = readFuncProfiles()) + return EC; + break; + case SecProfileSymbolList: + if (std::error_code EC = readProfileSymbolList()) + return EC; + break; + case SecFuncOffsetTable: + if (std::error_code EC = readFuncOffsetTable()) + return EC; + break; + default: + break; + } + return sampleprof_error::success; +} + +void SampleProfileReaderExtBinary::collectFuncsFrom(const Module &M) { + UseAllFuncs = false; + FuncsToUse.clear(); + for (auto &F : M) + FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F)); +} + +std::error_code SampleProfileReaderExtBinary::readFuncOffsetTable() { + auto Size = readNumber(); + if (std::error_code EC = Size.getError()) + return EC; + + FuncOffsetTable.reserve(*Size); + for (uint32_t I = 0; I < *Size; ++I) { + auto FName(readStringFromTable()); + if (std::error_code EC = FName.getError()) + return EC; + + auto Offset = readNumber(); + if (std::error_code EC = Offset.getError()) + return EC; + + FuncOffsetTable[*FName] = *Offset; + } + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderExtBinary::readFuncProfiles() { + const uint8_t *Start = Data; + if (UseAllFuncs) { + while (Data < End) { + if (std::error_code EC = readFuncProfile(Data)) + return EC; + } + assert(Data == End && "More data is read than expected"); + return sampleprof_error::success; + } + + if (Remapper) { + for (auto Name : FuncsToUse) { + Remapper->insert(Name); + } + } + + for (auto NameOffset : FuncOffsetTable) { + auto FuncName = NameOffset.first; + if (!FuncsToUse.count(FuncName) && + (!Remapper || !Remapper->exist(FuncName))) continue; + const uint8_t *FuncProfileAddr = Start + NameOffset.second; + assert(FuncProfileAddr < End && "out of LBRProfile section"); + if (std::error_code EC = readFuncProfile(FuncProfileAddr)) + return EC; + } + + Data = End; + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderExtBinary::readProfileSymbolList() { + if (!ProfSymList) + ProfSymList = std::make_unique(); + + if (std::error_code EC = ProfSymList->read(Data, End - Data)) + return EC; + + Data = End; + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderExtBinaryBase::decompressSection( + const uint8_t *SecStart, const uint64_t SecSize, + const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) { + Data = SecStart; + End = SecStart + SecSize; + auto DecompressSize = readNumber(); + if (std::error_code EC = DecompressSize.getError()) + return EC; + DecompressBufSize = *DecompressSize; + + auto CompressSize = readNumber(); + if (std::error_code EC = CompressSize.getError()) + return EC; + + if (!llvm::zlib::isAvailable()) + return sampleprof_error::zlib_unavailable; + + StringRef CompressedStrings(reinterpret_cast(Data), + *CompressSize); + char *Buffer = Allocator.Allocate(DecompressBufSize); + size_t UCSize = DecompressBufSize; + llvm::Error E = + zlib::uncompress(CompressedStrings, Buffer, UCSize); + if (E) + return sampleprof_error::uncompress_failed; + DecompressBuf = reinterpret_cast(Buffer); + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderExtBinaryBase::readImpl() { + const uint8_t *BufStart = + reinterpret_cast(Buffer->getBufferStart()); + + for (auto &Entry : SecHdrTable) { + // Skip empty section. + if (!Entry.Size) + continue; + + const uint8_t *SecStart = BufStart + Entry.Offset; + uint64_t SecSize = Entry.Size; + + // If the section is compressed, decompress it into a buffer + // DecompressBuf before reading the actual data. The pointee of + // 'Data' will be changed to buffer hold by DecompressBuf + // temporarily when reading the actual data. + bool isCompressed = hasSecFlag(Entry, SecFlagCompress); + if (isCompressed) { + const uint8_t *DecompressBuf; + uint64_t DecompressBufSize; + if (std::error_code EC = decompressSection( + SecStart, SecSize, DecompressBuf, DecompressBufSize)) + return EC; + SecStart = DecompressBuf; + SecSize = DecompressBufSize; + } + + if (std::error_code EC = readOneSection(SecStart, SecSize, Entry.Type)) + return EC; + if (Data != SecStart + SecSize) + return sampleprof_error::malformed; + + // Change the pointee of 'Data' from DecompressBuf to original Buffer. + if (isCompressed) { + Data = BufStart + Entry.Offset; + End = BufStart + Buffer->getBufferSize(); + } + } + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderCompactBinary::readImpl() { + std::vector OffsetsToUse; + if (UseAllFuncs) { + for (auto FuncEntry : FuncOffsetTable) { + OffsetsToUse.push_back(FuncEntry.second); + } + } + else { + for (auto Name : FuncsToUse) { + auto GUID = std::to_string(MD5Hash(Name)); + auto iter = FuncOffsetTable.find(StringRef(GUID)); + if (iter == FuncOffsetTable.end()) + continue; + OffsetsToUse.push_back(iter->second); + } + } + + for (auto Offset : OffsetsToUse) { const uint8_t *SavedData = Data; - Data = reinterpret_cast(Buffer->getBufferStart()) + - iter->second; - if (std::error_code EC = readFuncProfile()) + if (std::error_code EC = readFuncProfile( + reinterpret_cast(Buffer->getBufferStart()) + + Offset)) return EC; Data = SavedData; } @@ -489,6 +677,12 @@ std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) { return sampleprof_error::bad_magic; } +std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) { + if (Magic == SPMagic(SPF_Ext_Binary)) + return sampleprof_error::success; + return sampleprof_error::bad_magic; +} + std::error_code SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) { if (Magic == SPMagic(SPF_Compact_Binary)) @@ -496,7 +690,7 @@ SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) { return sampleprof_error::bad_magic; } -std::error_code SampleProfileReaderRawBinary::readNameTable() { +std::error_code SampleProfileReaderBinary::readNameTable() { auto Size = readNumber(); if (std::error_code EC = Size.getError()) return EC; @@ -525,10 +719,98 @@ std::error_code SampleProfileReaderCompactBinary::readNameTable() { return sampleprof_error::success; } -std::error_code SampleProfileReaderBinary::readHeader() { - Data = reinterpret_cast(Buffer->getBufferStart()); - End = Data + Buffer->getBufferSize(); +std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTableEntry() { + SecHdrTableEntry Entry; + auto Type = readUnencodedNumber(); + if (std::error_code EC = Type.getError()) + return EC; + Entry.Type = static_cast(*Type); + auto Flags = readUnencodedNumber(); + if (std::error_code EC = Flags.getError()) + return EC; + Entry.Flags = *Flags; + + auto Offset = readUnencodedNumber(); + if (std::error_code EC = Offset.getError()) + return EC; + Entry.Offset = *Offset; + + auto Size = readUnencodedNumber(); + if (std::error_code EC = Size.getError()) + return EC; + Entry.Size = *Size; + + SecHdrTable.push_back(std::move(Entry)); + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() { + auto EntryNum = readUnencodedNumber(); + if (std::error_code EC = EntryNum.getError()) + return EC; + + for (uint32_t i = 0; i < (*EntryNum); i++) + if (std::error_code EC = readSecHdrTableEntry()) + return EC; + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderExtBinaryBase::readHeader() { + const uint8_t *BufStart = + reinterpret_cast(Buffer->getBufferStart()); + Data = BufStart; + End = BufStart + Buffer->getBufferSize(); + + if (std::error_code EC = readMagicIdent()) + return EC; + + if (std::error_code EC = readSecHdrTable()) + return EC; + + return sampleprof_error::success; +} + +uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) { + for (auto &Entry : SecHdrTable) { + if (Entry.Type == Type) + return Entry.Size; + } + return 0; +} + +uint64_t SampleProfileReaderExtBinaryBase::getFileSize() { + // Sections in SecHdrTable is not necessarily in the same order as + // sections in the profile because section like FuncOffsetTable needs + // to be written after section LBRProfile but needs to be read before + // section LBRProfile, so we cannot simply use the last entry in + // SecHdrTable to calculate the file size. + uint64_t FileSize = 0; + for (auto &Entry : SecHdrTable) { + FileSize = std::max(Entry.Offset + Entry.Size, FileSize); + } + return FileSize; +} + +bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) { + uint64_t TotalSecsSize = 0; + for (auto &Entry : SecHdrTable) { + OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset + << ", Size: " << Entry.Size << "\n"; + TotalSecsSize += getSectionSize(Entry.Type); + } + uint64_t HeaderSize = SecHdrTable.front().Offset; + assert(HeaderSize + TotalSecsSize == getFileSize() && + "Size of 'header + sections' doesn't match the total size of profile"); + + OS << "Header Size: " << HeaderSize << "\n"; + OS << "Total Sections Size: " << TotalSecsSize << "\n"; + OS << "File Size: " << getFileSize() << "\n"; + return true; +} + +std::error_code SampleProfileReaderBinary::readMagicIdent() { // Read and check the magic identifier. auto Magic = readNumber(); if (std::error_code EC = Magic.getError()) @@ -543,6 +825,16 @@ std::error_code SampleProfileReaderBinary::readHeader() { else if (*Version != SPVersion()) return sampleprof_error::unsupported_version; + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderBinary::readHeader() { + Data = reinterpret_cast(Buffer->getBufferStart()); + End = Data + Buffer->getBufferSize(); + + if (std::error_code EC = readMagicIdent()) + return EC; + if (std::error_code EC = readSummary()) return EC; @@ -590,12 +882,11 @@ std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() { return sampleprof_error::success; } -void SampleProfileReaderCompactBinary::collectFuncsToUse(const Module &M) { +void SampleProfileReaderCompactBinary::collectFuncsFrom(const Module &M) { + UseAllFuncs = false; FuncsToUse.clear(); - for (auto &F : M) { - StringRef CanonName = FunctionSamples::getCanonicalFnName(F); - FuncsToUse.insert(CanonName); - } + for (auto &F : M) + FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F)); } std::error_code SampleProfileReaderBinary::readSummaryEntry( @@ -647,7 +938,7 @@ std::error_code SampleProfileReaderBinary::readSummary() { if (EC != sampleprof_error::success) return EC; } - Summary = llvm::make_unique( + Summary = std::make_unique( ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0, *MaxFunctionCount, *NumBlocks, *NumFunctions); @@ -661,6 +952,13 @@ bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) { return Magic == SPMagic(); } +bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) { + const uint8_t *Data = + reinterpret_cast(Buffer.getBufferStart()); + uint64_t Magic = decodeULEB128(Data); + return Magic == SPMagic(SPF_Ext_Binary); +} + bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) { const uint8_t *Data = reinterpret_cast(Buffer.getBufferStart()); @@ -894,7 +1192,7 @@ std::error_code SampleProfileReaderGCC::readOneFunctionProfile( /// /// This format is generated by the Linux Perf conversion tool at /// https://github.com/google/autofdo. -std::error_code SampleProfileReaderGCC::read() { +std::error_code SampleProfileReaderGCC::readImpl() { // Read the string table. if (std::error_code EC = readNameTable()) return EC; @@ -911,38 +1209,31 @@ bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) { return Magic == "adcg*704"; } -std::error_code SampleProfileReaderItaniumRemapper::read() { - // If the underlying data is in compact format, we can't remap it because +void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) { + // If the reader is in compact format, we can't remap it because // we don't know what the original function names were. - if (getFormat() == SPF_Compact_Binary) { + if (Reader.getFormat() == SPF_Compact_Binary) { Ctx.diagnose(DiagnosticInfoSampleProfile( - Buffer->getBufferIdentifier(), + Reader.getBuffer()->getBufferIdentifier(), "Profile data remapping cannot be applied to profile data " "in compact format (original mangled names are not available).", DS_Warning)); - return sampleprof_error::success; + return; } - if (Error E = Remappings.read(*Buffer)) { - handleAllErrors( - std::move(E), [&](const SymbolRemappingParseError &ParseError) { - reportError(ParseError.getLineNum(), ParseError.getMessage()); - }); - return sampleprof_error::malformed; - } - - for (auto &Sample : getProfiles()) - if (auto Key = Remappings.insert(Sample.first())) + assert(Remappings && "should be initialized while creating remapper"); + for (auto &Sample : Reader.getProfiles()) + if (auto Key = Remappings->insert(Sample.first())) SampleMap.insert({Key, &Sample.second}); - return sampleprof_error::success; + RemappingApplied = true; } FunctionSamples * SampleProfileReaderItaniumRemapper::getSamplesFor(StringRef Fname) { - if (auto Key = Remappings.lookup(Fname)) + if (auto Key = Remappings->lookup(Fname)) return SampleMap.lookup(Key); - return SampleProfileReader::getSamplesFor(Fname); + return nullptr; } /// Prepare a memory buffer for the contents of \p Filename. @@ -968,13 +1259,16 @@ setupMemoryBuffer(const Twine &Filename) { /// /// \param C The LLVM context to use to emit diagnostics. /// +/// \param RemapFilename The file used for profile remapping. +/// /// \returns an error code indicating the status of the created reader. ErrorOr> -SampleProfileReader::create(const Twine &Filename, LLVMContext &C) { +SampleProfileReader::create(const std::string Filename, LLVMContext &C, + const std::string RemapFilename) { auto BufferOrError = setupMemoryBuffer(Filename); if (std::error_code EC = BufferOrError.getError()) return EC; - return create(BufferOrError.get(), C); + return create(BufferOrError.get(), C, RemapFilename); } /// Create a sample profile remapper from the given input, to remap the @@ -982,20 +1276,48 @@ SampleProfileReader::create(const Twine &Filename, LLVMContext &C) { /// /// \param Filename The file to open. /// +/// \param Reader The profile reader the remapper is going to be applied to. +/// /// \param C The LLVM context to use to emit diagnostics. /// -/// \param Underlying The underlying profile data reader to remap. -/// /// \returns an error code indicating the status of the created reader. -ErrorOr> -SampleProfileReaderItaniumRemapper::create( - const Twine &Filename, LLVMContext &C, - std::unique_ptr Underlying) { +ErrorOr> +SampleProfileReaderItaniumRemapper::create(const std::string Filename, + SampleProfileReader &Reader, + LLVMContext &C) { auto BufferOrError = setupMemoryBuffer(Filename); if (std::error_code EC = BufferOrError.getError()) return EC; - return llvm::make_unique( - std::move(BufferOrError.get()), C, std::move(Underlying)); + return create(BufferOrError.get(), Reader, C); +} + +/// Create a sample profile remapper from the given input, to remap the +/// function names in the given profile data. +/// +/// \param B The memory buffer to create the reader from (assumes ownership). +/// +/// \param C The LLVM context to use to emit diagnostics. +/// +/// \param Reader The profile reader the remapper is going to be applied to. +/// +/// \returns an error code indicating the status of the created reader. +ErrorOr> +SampleProfileReaderItaniumRemapper::create(std::unique_ptr &B, + SampleProfileReader &Reader, + LLVMContext &C) { + auto Remappings = std::make_unique(); + if (Error E = Remappings->read(*B.get())) { + handleAllErrors( + std::move(E), [&](const SymbolRemappingParseError &ParseError) { + C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(), + ParseError.getLineNum(), + ParseError.getMessage())); + }); + return sampleprof_error::malformed; + } + + return std::make_unique( + std::move(B), std::move(Remappings), Reader); } /// Create a sample profile reader based on the format of the input data. @@ -1004,12 +1326,17 @@ SampleProfileReaderItaniumRemapper::create( /// /// \param C The LLVM context to use to emit diagnostics. /// +/// \param RemapFilename The file used for profile remapping. +/// /// \returns an error code indicating the status of the created reader. ErrorOr> -SampleProfileReader::create(std::unique_ptr &B, LLVMContext &C) { +SampleProfileReader::create(std::unique_ptr &B, LLVMContext &C, + const std::string RemapFilename) { std::unique_ptr Reader; if (SampleProfileReaderRawBinary::hasFormat(*B)) Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C)); + else if (SampleProfileReaderExtBinary::hasFormat(*B)) + Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C)); else if (SampleProfileReaderCompactBinary::hasFormat(*B)) Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C)); else if (SampleProfileReaderGCC::hasFormat(*B)) @@ -1019,9 +1346,21 @@ SampleProfileReader::create(std::unique_ptr &B, LLVMContext &C) { else return sampleprof_error::unrecognized_format; + if (!RemapFilename.empty()) { + auto ReaderOrErr = + SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C); + if (std::error_code EC = ReaderOrErr.getError()) { + std::string Msg = "Could not create remapper: " + EC.message(); + C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg)); + return EC; + } + Reader->Remapper = std::move(ReaderOrErr.get()); + } + FunctionSamples::Format = Reader->getFormat(); - if (std::error_code EC = Reader->readHeader()) + if (std::error_code EC = Reader->readHeader()) { return EC; + } return std::move(Reader); } diff --git a/lib/ProfileData/SampleProfWriter.cpp b/lib/ProfileData/SampleProfWriter.cpp index 8b876e0aa5d..8d09af31f94 100644 --- a/lib/ProfileData/SampleProfWriter.cpp +++ b/lib/ProfileData/SampleProfWriter.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/ErrorOr.h" @@ -39,11 +40,8 @@ using namespace llvm; using namespace sampleprof; -std::error_code -SampleProfileWriter::write(const StringMap &ProfileMap) { - if (std::error_code EC = writeHeader(ProfileMap)) - return EC; - +std::error_code SampleProfileWriter::writeFuncProfiles( + const StringMap &ProfileMap) { // Sort the ProfileMap by total samples. typedef std::pair NameFunctionSamples; std::vector V; @@ -58,12 +56,161 @@ SampleProfileWriter::write(const StringMap &ProfileMap) { }); for (const auto &I : V) { - if (std::error_code EC = write(*I.second)) + if (std::error_code EC = writeSample(*I.second)) return EC; } return sampleprof_error::success; } +std::error_code +SampleProfileWriter::write(const StringMap &ProfileMap) { + if (std::error_code EC = writeHeader(ProfileMap)) + return EC; + + if (std::error_code EC = writeFuncProfiles(ProfileMap)) + return EC; + + return sampleprof_error::success; +} + +SecHdrTableEntry & +SampleProfileWriterExtBinaryBase::getEntryInLayout(SecType Type) { + auto SecIt = std::find_if( + SectionHdrLayout.begin(), SectionHdrLayout.end(), + [=](const auto &Entry) -> bool { return Entry.Type == Type; }); + return *SecIt; +} + +/// Return the current position and prepare to use it as the start +/// position of a section. +uint64_t SampleProfileWriterExtBinaryBase::markSectionStart(SecType Type) { + uint64_t SectionStart = OutputStream->tell(); + auto &Entry = getEntryInLayout(Type); + // Use LocalBuf as a temporary output for writting data. + if (hasSecFlag(Entry, SecFlagCompress)) + LocalBufStream.swap(OutputStream); + return SectionStart; +} + +std::error_code SampleProfileWriterExtBinaryBase::compressAndOutput() { + if (!llvm::zlib::isAvailable()) + return sampleprof_error::zlib_unavailable; + std::string &UncompressedStrings = + static_cast(LocalBufStream.get())->str(); + if (UncompressedStrings.size() == 0) + return sampleprof_error::success; + auto &OS = *OutputStream; + SmallString<128> CompressedStrings; + llvm::Error E = zlib::compress(UncompressedStrings, CompressedStrings, + zlib::BestSizeCompression); + if (E) + return sampleprof_error::compress_failed; + encodeULEB128(UncompressedStrings.size(), OS); + encodeULEB128(CompressedStrings.size(), OS); + OS << CompressedStrings.str(); + UncompressedStrings.clear(); + return sampleprof_error::success; +} + +/// Add a new section into section header table. +std::error_code +SampleProfileWriterExtBinaryBase::addNewSection(SecType Type, + uint64_t SectionStart) { + auto Entry = getEntryInLayout(Type); + if (hasSecFlag(Entry, SecFlagCompress)) { + LocalBufStream.swap(OutputStream); + if (std::error_code EC = compressAndOutput()) + return EC; + } + SecHdrTable.push_back({Type, Entry.Flags, SectionStart - FileStart, + OutputStream->tell() - SectionStart}); + return sampleprof_error::success; +} + +std::error_code SampleProfileWriterExtBinaryBase::write( + const StringMap &ProfileMap) { + if (std::error_code EC = writeHeader(ProfileMap)) + return EC; + + std::string LocalBuf; + LocalBufStream = std::make_unique(LocalBuf); + if (std::error_code EC = writeSections(ProfileMap)) + return EC; + + if (std::error_code EC = writeSecHdrTable()) + return EC; + + return sampleprof_error::success; +} + +std::error_code +SampleProfileWriterExtBinary::writeSample(const FunctionSamples &S) { + uint64_t Offset = OutputStream->tell(); + StringRef Name = S.getName(); + FuncOffsetTable[Name] = Offset - SecLBRProfileStart; + encodeULEB128(S.getHeadSamples(), *OutputStream); + return writeBody(S); +} + +std::error_code SampleProfileWriterExtBinary::writeFuncOffsetTable() { + auto &OS = *OutputStream; + + // Write out the table size. + encodeULEB128(FuncOffsetTable.size(), OS); + + // Write out FuncOffsetTable. + for (auto entry : FuncOffsetTable) { + writeNameIdx(entry.first); + encodeULEB128(entry.second, OS); + } + return sampleprof_error::success; +} + +std::error_code SampleProfileWriterExtBinary::writeSections( + const StringMap &ProfileMap) { + uint64_t SectionStart = markSectionStart(SecProfSummary); + computeSummary(ProfileMap); + if (auto EC = writeSummary()) + return EC; + if (std::error_code EC = addNewSection(SecProfSummary, SectionStart)) + return EC; + + // Generate the name table for all the functions referenced in the profile. + SectionStart = markSectionStart(SecNameTable); + for (const auto &I : ProfileMap) { + addName(I.first()); + addNames(I.second); + } + writeNameTable(); + if (std::error_code EC = addNewSection(SecNameTable, SectionStart)) + return EC; + + SectionStart = markSectionStart(SecLBRProfile); + SecLBRProfileStart = OutputStream->tell(); + if (std::error_code EC = writeFuncProfiles(ProfileMap)) + return EC; + if (std::error_code EC = addNewSection(SecLBRProfile, SectionStart)) + return EC; + + if (ProfSymList && ProfSymList->toCompress()) + setToCompressSection(SecProfileSymbolList); + + SectionStart = markSectionStart(SecProfileSymbolList); + if (ProfSymList && ProfSymList->size() > 0) + if (std::error_code EC = ProfSymList->write(*OutputStream)) + return EC; + if (std::error_code EC = addNewSection(SecProfileSymbolList, SectionStart)) + return EC; + + SectionStart = markSectionStart(SecFuncOffsetTable); + if (std::error_code EC = writeFuncOffsetTable()) + return EC; + if (std::error_code EC = addNewSection(SecFuncOffsetTable, SectionStart)) + return EC; + + return sampleprof_error::success; +} + std::error_code SampleProfileWriterCompactBinary::write( const StringMap &ProfileMap) { if (std::error_code EC = SampleProfileWriter::write(ProfileMap)) @@ -81,7 +228,7 @@ std::error_code SampleProfileWriterCompactBinary::write( /// /// The format used here is more structured and deliberate because /// it needs to be parsed by the SampleProfileReaderText class. -std::error_code SampleProfileWriterText::write(const FunctionSamples &S) { +std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) { auto &OS = *OutputStream; OS << S.getName() << ":" << S.getTotalSamples(); if (Indent == 0) @@ -100,8 +247,8 @@ std::error_code SampleProfileWriterText::write(const FunctionSamples &S) { OS << Sample.getSamples(); - for (const auto &J : Sample.getCallTargets()) - OS << " " << J.first() << ":" << J.second; + for (const auto &J : Sample.getSortedCallTargets()) + OS << " " << J.first << ":" << J.second; OS << "\n"; } @@ -117,7 +264,7 @@ std::error_code SampleProfileWriterText::write(const FunctionSamples &S) { OS << Loc.LineOffset << ": "; else OS << Loc.LineOffset << "." << Loc.Discriminator << ": "; - if (std::error_code EC = write(CalleeSamples)) + if (std::error_code EC = writeSample(CalleeSamples)) return EC; } Indent -= 1; @@ -163,7 +310,7 @@ void SampleProfileWriterBinary::stablizeNameTable(std::set &V) { NameTable[N] = i++; } -std::error_code SampleProfileWriterRawBinary::writeNameTable() { +std::error_code SampleProfileWriterBinary::writeNameTable() { auto &OS = *OutputStream; std::set V; stablizeNameTable(V); @@ -214,25 +361,18 @@ std::error_code SampleProfileWriterCompactBinary::writeNameTable() { return sampleprof_error::success; } -std::error_code SampleProfileWriterRawBinary::writeMagicIdent() { +std::error_code +SampleProfileWriterBinary::writeMagicIdent(SampleProfileFormat Format) { auto &OS = *OutputStream; // Write file magic identifier. - encodeULEB128(SPMagic(), OS); - encodeULEB128(SPVersion(), OS); - return sampleprof_error::success; -} - -std::error_code SampleProfileWriterCompactBinary::writeMagicIdent() { - auto &OS = *OutputStream; - // Write file magic identifier. - encodeULEB128(SPMagic(SPF_Compact_Binary), OS); + encodeULEB128(SPMagic(Format), OS); encodeULEB128(SPVersion(), OS); return sampleprof_error::success; } std::error_code SampleProfileWriterBinary::writeHeader( const StringMap &ProfileMap) { - writeMagicIdent(); + writeMagicIdent(Format); computeSummary(ProfileMap); if (auto EC = writeSummary()) @@ -248,6 +388,82 @@ std::error_code SampleProfileWriterBinary::writeHeader( return sampleprof_error::success; } +void SampleProfileWriterExtBinaryBase::setToCompressAllSections() { + for (auto &Entry : SectionHdrLayout) + addSecFlags(Entry, SecFlagCompress); +} + +void SampleProfileWriterExtBinaryBase::setToCompressSection(SecType Type) { + addSectionFlags(Type, SecFlagCompress); +} + +void SampleProfileWriterExtBinaryBase::addSectionFlags(SecType Type, + SecFlags Flags) { + for (auto &Entry : SectionHdrLayout) { + if (Entry.Type == Type) + addSecFlags(Entry, Flags); + } +} + +void SampleProfileWriterExtBinaryBase::allocSecHdrTable() { + support::endian::Writer Writer(*OutputStream, support::little); + + Writer.write(static_cast(SectionHdrLayout.size())); + SecHdrTableOffset = OutputStream->tell(); + for (uint32_t i = 0; i < SectionHdrLayout.size(); i++) { + Writer.write(static_cast(-1)); + Writer.write(static_cast(-1)); + Writer.write(static_cast(-1)); + Writer.write(static_cast(-1)); + } +} + +std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() { + auto &OFS = static_cast(*OutputStream); + uint64_t Saved = OutputStream->tell(); + + // Set OutputStream to the location saved in SecHdrTableOffset. + if (OFS.seek(SecHdrTableOffset) == (uint64_t)-1) + return sampleprof_error::ostream_seek_unsupported; + support::endian::Writer Writer(*OutputStream, support::little); + + DenseMap IndexMap; + for (uint32_t i = 0; i < SecHdrTable.size(); i++) { + IndexMap.insert({static_cast(SecHdrTable[i].Type), i}); + } + + // Write the section header table in the order specified in + // SectionHdrLayout. That is the sections order Reader will see. + // Note that the sections order in which Reader expects to read + // may be different from the order in which Writer is able to + // write, so we need to adjust the order in SecHdrTable to be + // consistent with SectionHdrLayout when we write SecHdrTable + // to the memory. + for (uint32_t i = 0; i < SectionHdrLayout.size(); i++) { + uint32_t idx = IndexMap[static_cast(SectionHdrLayout[i].Type)]; + Writer.write(static_cast(SecHdrTable[idx].Type)); + Writer.write(static_cast(SecHdrTable[idx].Flags)); + Writer.write(static_cast(SecHdrTable[idx].Offset)); + Writer.write(static_cast(SecHdrTable[idx].Size)); + } + + // Reset OutputStream. + if (OFS.seek(Saved) == (uint64_t)-1) + return sampleprof_error::ostream_seek_unsupported; + + return sampleprof_error::success; +} + +std::error_code SampleProfileWriterExtBinaryBase::writeHeader( + const StringMap &ProfileMap) { + auto &OS = *OutputStream; + FileStart = OS.tell(); + writeMagicIdent(Format); + + allocSecHdrTable(); + return sampleprof_error::success; +} + std::error_code SampleProfileWriterCompactBinary::writeHeader( const StringMap &ProfileMap) { support::endian::Writer Writer(*OutputStream, support::little); @@ -294,8 +510,8 @@ std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) { encodeULEB128(Loc.Discriminator, OS); encodeULEB128(Sample.getSamples(), OS); encodeULEB128(Sample.getCallTargets().size(), OS); - for (const auto &J : Sample.getCallTargets()) { - StringRef Callee = J.first(); + for (const auto &J : Sample.getSortedCallTargets()) { + StringRef Callee = J.first; uint64_t CalleeSamples = J.second; if (std::error_code EC = writeNameIdx(Callee)) return EC; @@ -324,13 +540,14 @@ std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) { /// Write samples of a top-level function to a binary file. /// /// \returns true if the samples were written successfully, false otherwise. -std::error_code SampleProfileWriterBinary::write(const FunctionSamples &S) { +std::error_code +SampleProfileWriterBinary::writeSample(const FunctionSamples &S) { encodeULEB128(S.getHeadSamples(), *OutputStream); return writeBody(S); } std::error_code -SampleProfileWriterCompactBinary::write(const FunctionSamples &S) { +SampleProfileWriterCompactBinary::writeSample(const FunctionSamples &S) { uint64_t Offset = OutputStream->tell(); StringRef Name = S.getName(); FuncOffsetTable[Name] = Offset; @@ -349,10 +566,11 @@ ErrorOr> SampleProfileWriter::create(StringRef Filename, SampleProfileFormat Format) { std::error_code EC; std::unique_ptr OS; - if (Format == SPF_Binary || Format == SPF_Compact_Binary) - OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_None)); + if (Format == SPF_Binary || Format == SPF_Ext_Binary || + Format == SPF_Compact_Binary) + OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::OF_None)); else - OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_Text)); + OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::OF_Text)); if (EC) return EC; @@ -374,6 +592,8 @@ SampleProfileWriter::create(std::unique_ptr &OS, if (Format == SPF_Binary) Writer.reset(new SampleProfileWriterRawBinary(OS)); + else if (Format == SPF_Ext_Binary) + Writer.reset(new SampleProfileWriterExtBinary(OS)); else if (Format == SPF_Compact_Binary) Writer.reset(new SampleProfileWriterCompactBinary(OS)); else if (Format == SPF_Text) @@ -386,6 +606,7 @@ SampleProfileWriter::create(std::unique_ptr &OS, if (EC) return EC; + Writer->Format = Format; return std::move(Writer); } diff --git a/lib/Remarks/BitstreamRemarkParser.cpp b/lib/Remarks/BitstreamRemarkParser.cpp new file mode 100644 index 00000000000..99a82e1ee3a --- /dev/null +++ b/lib/Remarks/BitstreamRemarkParser.cpp @@ -0,0 +1,597 @@ +//===- BitstreamRemarkParser.cpp ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides utility methods used by clients that want to use the +// parser for remark diagnostics in LLVM. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Remarks/BitstreamRemarkParser.h" +#include "BitstreamRemarkParser.h" +#include "llvm/Remarks/BitstreamRemarkContainer.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" + +using namespace llvm; +using namespace llvm::remarks; + +static Error unknownRecord(const char *BlockName, unsigned RecordID) { + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing %s: unknown record entry (%lu).", BlockName, + RecordID); +} + +static Error malformedRecord(const char *BlockName, const char *RecordName) { + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing %s: malformed record entry (%s).", BlockName, + RecordName); +} + +BitstreamMetaParserHelper::BitstreamMetaParserHelper( + BitstreamCursor &Stream, BitstreamBlockInfo &BlockInfo) + : Stream(Stream), BlockInfo(BlockInfo) {} + +/// Parse a record and fill in the fields in the parser. +static Error parseRecord(BitstreamMetaParserHelper &Parser, unsigned Code) { + BitstreamCursor &Stream = Parser.Stream; + // Note: 2 is used here because it's the max number of fields we have per + // record. + SmallVector Record; + StringRef Blob; + Expected RecordID = Stream.readRecord(Code, Record, &Blob); + if (!RecordID) + return RecordID.takeError(); + + switch (*RecordID) { + case RECORD_META_CONTAINER_INFO: { + if (Record.size() != 2) + return malformedRecord("BLOCK_META", "RECORD_META_CONTAINER_INFO"); + Parser.ContainerVersion = Record[0]; + Parser.ContainerType = Record[1]; + break; + } + case RECORD_META_REMARK_VERSION: { + if (Record.size() != 1) + return malformedRecord("BLOCK_META", "RECORD_META_REMARK_VERSION"); + Parser.RemarkVersion = Record[0]; + break; + } + case RECORD_META_STRTAB: { + if (Record.size() != 0) + return malformedRecord("BLOCK_META", "RECORD_META_STRTAB"); + Parser.StrTabBuf = Blob; + break; + } + case RECORD_META_EXTERNAL_FILE: { + if (Record.size() != 0) + return malformedRecord("BLOCK_META", "RECORD_META_EXTERNAL_FILE"); + Parser.ExternalFilePath = Blob; + break; + } + default: + return unknownRecord("BLOCK_META", *RecordID); + } + return Error::success(); +} + +BitstreamRemarkParserHelper::BitstreamRemarkParserHelper( + BitstreamCursor &Stream) + : Stream(Stream) {} + +/// Parse a record and fill in the fields in the parser. +static Error parseRecord(BitstreamRemarkParserHelper &Parser, unsigned Code) { + BitstreamCursor &Stream = Parser.Stream; + // Note: 5 is used here because it's the max number of fields we have per + // record. + SmallVector Record; + StringRef Blob; + Expected RecordID = Stream.readRecord(Code, Record, &Blob); + if (!RecordID) + return RecordID.takeError(); + + switch (*RecordID) { + case RECORD_REMARK_HEADER: { + if (Record.size() != 4) + return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_HEADER"); + Parser.Type = Record[0]; + Parser.RemarkNameIdx = Record[1]; + Parser.PassNameIdx = Record[2]; + Parser.FunctionNameIdx = Record[3]; + break; + } + case RECORD_REMARK_DEBUG_LOC: { + if (Record.size() != 3) + return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_DEBUG_LOC"); + Parser.SourceFileNameIdx = Record[0]; + Parser.SourceLine = Record[1]; + Parser.SourceColumn = Record[2]; + break; + } + case RECORD_REMARK_HOTNESS: { + if (Record.size() != 1) + return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_HOTNESS"); + Parser.Hotness = Record[0]; + break; + } + case RECORD_REMARK_ARG_WITH_DEBUGLOC: { + if (Record.size() != 5) + return malformedRecord("BLOCK_REMARK", "RECORD_REMARK_ARG_WITH_DEBUGLOC"); + // Create a temporary argument. Use that as a valid memory location for this + // argument entry. + Parser.TmpArgs.emplace_back(); + Parser.TmpArgs.back().KeyIdx = Record[0]; + Parser.TmpArgs.back().ValueIdx = Record[1]; + Parser.TmpArgs.back().SourceFileNameIdx = Record[2]; + Parser.TmpArgs.back().SourceLine = Record[3]; + Parser.TmpArgs.back().SourceColumn = Record[4]; + Parser.Args = + ArrayRef(Parser.TmpArgs); + break; + } + case RECORD_REMARK_ARG_WITHOUT_DEBUGLOC: { + if (Record.size() != 2) + return malformedRecord("BLOCK_REMARK", + "RECORD_REMARK_ARG_WITHOUT_DEBUGLOC"); + // Create a temporary argument. Use that as a valid memory location for this + // argument entry. + Parser.TmpArgs.emplace_back(); + Parser.TmpArgs.back().KeyIdx = Record[0]; + Parser.TmpArgs.back().ValueIdx = Record[1]; + Parser.Args = + ArrayRef(Parser.TmpArgs); + break; + } + default: + return unknownRecord("BLOCK_REMARK", *RecordID); + } + return Error::success(); +} + +template +static Error parseBlock(T &ParserHelper, unsigned BlockID, + const char *BlockName) { + BitstreamCursor &Stream = ParserHelper.Stream; + Expected Next = Stream.advance(); + if (!Next) + return Next.takeError(); + if (Next->Kind != BitstreamEntry::SubBlock || Next->ID != BlockID) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing %s: expecting [ENTER_SUBBLOCK, %s, ...].", + BlockName, BlockName); + if (Stream.EnterSubBlock(BlockID)) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while entering %s.", BlockName); + + // Stop when there is nothing to read anymore or when we encounter an + // END_BLOCK. + while (!Stream.AtEndOfStream()) { + Expected Next = Stream.advance(); + if (!Next) + return Next.takeError(); + switch (Next->Kind) { + case BitstreamEntry::EndBlock: + return Error::success(); + case BitstreamEntry::Error: + case BitstreamEntry::SubBlock: + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing %s: expecting records.", BlockName); + case BitstreamEntry::Record: + if (Error E = parseRecord(ParserHelper, Next->ID)) + return E; + continue; + } + } + // If we're here, it means we didn't get an END_BLOCK yet, but we're at the + // end of the stream. In this case, error. + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing %s: unterminated block.", BlockName); +} + +Error BitstreamMetaParserHelper::parse() { + return parseBlock(*this, META_BLOCK_ID, "META_BLOCK"); +} + +Error BitstreamRemarkParserHelper::parse() { + return parseBlock(*this, REMARK_BLOCK_ID, "REMARK_BLOCK"); +} + +BitstreamParserHelper::BitstreamParserHelper(StringRef Buffer) + : Stream(Buffer) {} + +Expected> BitstreamParserHelper::parseMagic() { + std::array Result; + for (unsigned i = 0; i < 4; ++i) + if (Expected R = Stream.Read(8)) + Result[i] = *R; + else + return R.takeError(); + return Result; +} + +Error BitstreamParserHelper::parseBlockInfoBlock() { + Expected Next = Stream.advance(); + if (!Next) + return Next.takeError(); + if (Next->Kind != BitstreamEntry::SubBlock || + Next->ID != llvm::bitc::BLOCKINFO_BLOCK_ID) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCKINFO_BLOCK: expecting [ENTER_SUBBLOCK, " + "BLOCKINFO_BLOCK, ...]."); + + Expected> MaybeBlockInfo = + Stream.ReadBlockInfoBlock(); + if (!MaybeBlockInfo) + return MaybeBlockInfo.takeError(); + + if (!*MaybeBlockInfo) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCKINFO_BLOCK."); + + BlockInfo = **MaybeBlockInfo; + + Stream.setBlockInfo(&BlockInfo); + return Error::success(); +} + +static Expected isBlock(BitstreamCursor &Stream, unsigned BlockID) { + bool Result = false; + uint64_t PreviousBitNo = Stream.GetCurrentBitNo(); + Expected Next = Stream.advance(); + if (!Next) + return Next.takeError(); + switch (Next->Kind) { + case BitstreamEntry::SubBlock: + // Check for the block id. + Result = Next->ID == BlockID; + break; + case BitstreamEntry::Error: + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Unexpected error while parsing bitstream."); + default: + Result = false; + break; + } + if (Error E = Stream.JumpToBit(PreviousBitNo)) + return std::move(E); + return Result; +} + +Expected BitstreamParserHelper::isMetaBlock() { + return isBlock(Stream, META_BLOCK_ID); +} + +Expected BitstreamParserHelper::isRemarkBlock() { + return isBlock(Stream, META_BLOCK_ID); +} + +static Error validateMagicNumber(StringRef Magic) { + if (Magic != remarks::ContainerMagic) + return createStringError(std::make_error_code(std::errc::invalid_argument), + "Unknown magic number: expecting %s, got %.4s.", + remarks::ContainerMagic.data(), Magic.data()); + return Error::success(); +} + +static Error advanceToMetaBlock(BitstreamParserHelper &Helper) { + Expected> Magic = Helper.parseMagic(); + if (!Magic) + return Magic.takeError(); + if (Error E = validateMagicNumber(StringRef(Magic->data(), Magic->size()))) + return E; + if (Error E = Helper.parseBlockInfoBlock()) + return E; + Expected isMetaBlock = Helper.isMetaBlock(); + if (!isMetaBlock) + return isMetaBlock.takeError(); + if (!*isMetaBlock) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Expecting META_BLOCK after the BLOCKINFO_BLOCK."); + return Error::success(); +} + +Expected> +remarks::createBitstreamParserFromMeta( + StringRef Buf, Optional StrTab, + Optional ExternalFilePrependPath) { + BitstreamParserHelper Helper(Buf); + Expected> Magic = Helper.parseMagic(); + if (!Magic) + return Magic.takeError(); + + if (Error E = validateMagicNumber(StringRef(Magic->data(), Magic->size()))) + return std::move(E); + + auto Parser = + StrTab ? std::make_unique(Buf, std::move(*StrTab)) + : std::make_unique(Buf); + + if (ExternalFilePrependPath) + Parser->ExternalFilePrependPath = *ExternalFilePrependPath; + + return std::move(Parser); +} + +Expected> BitstreamRemarkParser::next() { + if (ParserHelper.atEndOfStream()) + return make_error(); + + if (!ReadyToParseRemarks) { + if (Error E = parseMeta()) + return std::move(E); + ReadyToParseRemarks = true; + } + + return parseRemark(); +} + +Error BitstreamRemarkParser::parseMeta() { + // Advance and to the meta block. + if (Error E = advanceToMetaBlock(ParserHelper)) + return E; + + BitstreamMetaParserHelper MetaHelper(ParserHelper.Stream, + ParserHelper.BlockInfo); + if (Error E = MetaHelper.parse()) + return E; + + if (Error E = processCommonMeta(MetaHelper)) + return E; + + switch (ContainerType) { + case BitstreamRemarkContainerType::Standalone: + return processStandaloneMeta(MetaHelper); + case BitstreamRemarkContainerType::SeparateRemarksFile: + return processSeparateRemarksFileMeta(MetaHelper); + case BitstreamRemarkContainerType::SeparateRemarksMeta: + return processSeparateRemarksMetaMeta(MetaHelper); + } + llvm_unreachable("Unknown BitstreamRemarkContainerType enum"); +} + +Error BitstreamRemarkParser::processCommonMeta( + BitstreamMetaParserHelper &MetaHelper) { + if (Optional Version = MetaHelper.ContainerVersion) + ContainerVersion = *Version; + else + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_META: missing container version."); + + if (Optional Type = MetaHelper.ContainerType) { + // Always >= BitstreamRemarkContainerType::First since it's unsigned. + if (*Type > static_cast(BitstreamRemarkContainerType::Last)) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_META: invalid container type."); + + ContainerType = static_cast(*Type); + } else + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_META: missing container type."); + + return Error::success(); +} + +static Error processStrTab(BitstreamRemarkParser &P, + Optional StrTabBuf) { + if (!StrTabBuf) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_META: missing string table."); + // Parse and assign the string table. + P.StrTab.emplace(*StrTabBuf); + return Error::success(); +} + +static Error processRemarkVersion(BitstreamRemarkParser &P, + Optional RemarkVersion) { + if (!RemarkVersion) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_META: missing remark version."); + P.RemarkVersion = *RemarkVersion; + return Error::success(); +} + +Error BitstreamRemarkParser::processExternalFilePath( + Optional ExternalFilePath) { + if (!ExternalFilePath) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_META: missing external file path."); + + SmallString<80> FullPath(ExternalFilePrependPath); + sys::path::append(FullPath, *ExternalFilePath); + + // External file: open the external file, parse it, check if its metadata + // matches the one from the separate metadata, then replace the current parser + // with the one parsing the remarks. + ErrorOr> BufferOrErr = + MemoryBuffer::getFile(FullPath); + if (std::error_code EC = BufferOrErr.getError()) + return createFileError(FullPath, EC); + TmpRemarkBuffer = std::move(*BufferOrErr); + + // Create a separate parser used for parsing the separate file. + ParserHelper = BitstreamParserHelper(TmpRemarkBuffer->getBuffer()); + // Advance and check until we can parse the meta block. + if (Error E = advanceToMetaBlock(ParserHelper)) + return E; + // Parse the meta from the separate file. + // Note: here we overwrite the BlockInfo with the one from the file. This will + // be used to parse the rest of the file. + BitstreamMetaParserHelper SeparateMetaHelper(ParserHelper.Stream, + ParserHelper.BlockInfo); + if (Error E = SeparateMetaHelper.parse()) + return E; + + uint64_t PreviousContainerVersion = ContainerVersion; + if (Error E = processCommonMeta(SeparateMetaHelper)) + return E; + + if (ContainerType != BitstreamRemarkContainerType::SeparateRemarksFile) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing external file's BLOCK_META: wrong container " + "type."); + + if (PreviousContainerVersion != ContainerVersion) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing external file's BLOCK_META: mismatching versions: " + "original meta: %lu, external file meta: %lu.", + PreviousContainerVersion, ContainerVersion); + + // Process the meta from the separate file. + return processSeparateRemarksFileMeta(SeparateMetaHelper); +} + +Error BitstreamRemarkParser::processStandaloneMeta( + BitstreamMetaParserHelper &Helper) { + if (Error E = processStrTab(*this, Helper.StrTabBuf)) + return E; + return processRemarkVersion(*this, Helper.RemarkVersion); +} + +Error BitstreamRemarkParser::processSeparateRemarksFileMeta( + BitstreamMetaParserHelper &Helper) { + return processRemarkVersion(*this, Helper.RemarkVersion); +} + +Error BitstreamRemarkParser::processSeparateRemarksMetaMeta( + BitstreamMetaParserHelper &Helper) { + if (Error E = processStrTab(*this, Helper.StrTabBuf)) + return E; + return processExternalFilePath(Helper.ExternalFilePath); +} + +Expected> BitstreamRemarkParser::parseRemark() { + BitstreamRemarkParserHelper RemarkHelper(ParserHelper.Stream); + if (Error E = RemarkHelper.parse()) + return std::move(E); + + return processRemark(RemarkHelper); +} + +Expected> +BitstreamRemarkParser::processRemark(BitstreamRemarkParserHelper &Helper) { + std::unique_ptr Result = std::make_unique(); + Remark &R = *Result; + + if (StrTab == None) + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "Error while parsing BLOCK_REMARK: missing string table."); + + if (!Helper.Type) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: missing remark type."); + + // Always >= Type::First since it's unsigned. + if (*Helper.Type > static_cast(Type::Last)) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: unknown remark type."); + + R.RemarkType = static_cast(*Helper.Type); + + if (!Helper.RemarkNameIdx) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: missing remark name."); + + if (Expected RemarkName = (*StrTab)[*Helper.RemarkNameIdx]) + R.RemarkName = *RemarkName; + else + return RemarkName.takeError(); + + if (!Helper.PassNameIdx) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: missing remark pass."); + + if (Expected PassName = (*StrTab)[*Helper.PassNameIdx]) + R.PassName = *PassName; + else + return PassName.takeError(); + + if (!Helper.FunctionNameIdx) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: missing remark function name."); + if (Expected FunctionName = (*StrTab)[*Helper.FunctionNameIdx]) + R.FunctionName = *FunctionName; + else + return FunctionName.takeError(); + + if (Helper.SourceFileNameIdx && Helper.SourceLine && Helper.SourceColumn) { + Expected SourceFileName = (*StrTab)[*Helper.SourceFileNameIdx]; + if (!SourceFileName) + return SourceFileName.takeError(); + R.Loc.emplace(); + R.Loc->SourceFilePath = *SourceFileName; + R.Loc->SourceLine = *Helper.SourceLine; + R.Loc->SourceColumn = *Helper.SourceColumn; + } + + if (Helper.Hotness) + R.Hotness = *Helper.Hotness; + + if (!Helper.Args) + return std::move(Result); + + for (const BitstreamRemarkParserHelper::Argument &Arg : *Helper.Args) { + if (!Arg.KeyIdx) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: missing key in remark argument."); + if (!Arg.ValueIdx) + return createStringError( + std::make_error_code(std::errc::illegal_byte_sequence), + "Error while parsing BLOCK_REMARK: missing value in remark " + "argument."); + + // We have at least a key and a value, create an entry. + R.Args.emplace_back(); + + if (Expected Key = (*StrTab)[*Arg.KeyIdx]) + R.Args.back().Key = *Key; + else + return Key.takeError(); + + if (Expected Value = (*StrTab)[*Arg.ValueIdx]) + R.Args.back().Val = *Value; + else + return Value.takeError(); + + if (Arg.SourceFileNameIdx && Arg.SourceLine && Arg.SourceColumn) { + if (Expected SourceFileName = + (*StrTab)[*Arg.SourceFileNameIdx]) { + R.Args.back().Loc.emplace(); + R.Args.back().Loc->SourceFilePath = *SourceFileName; + R.Args.back().Loc->SourceLine = *Arg.SourceLine; + R.Args.back().Loc->SourceColumn = *Arg.SourceColumn; + } else + return SourceFileName.takeError(); + } + } + + return std::move(Result); +} diff --git a/lib/Remarks/BitstreamRemarkParser.h b/lib/Remarks/BitstreamRemarkParser.h new file mode 100644 index 00000000000..7c9cc2f1e7d --- /dev/null +++ b/lib/Remarks/BitstreamRemarkParser.h @@ -0,0 +1,83 @@ +//===-- BitstreamRemarkParser.h - Parser for Bitstream remarks --*- C++/-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the impementation of the Bitstream remark parser. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_REMARKS_BITSTREAM_REMARK_PARSER_H +#define LLVM_LIB_REMARKS_BITSTREAM_REMARK_PARSER_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Remarks/BitstreamRemarkParser.h" +#include "llvm/Remarks/RemarkFormat.h" +#include "llvm/Remarks/RemarkParser.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +namespace llvm { +namespace remarks { +/// Parses and holds the state of the latest parsed remark. +struct BitstreamRemarkParser : public RemarkParser { + /// The buffer to parse. + BitstreamParserHelper ParserHelper; + /// The string table used for parsing strings. + Optional StrTab; + /// Temporary remark buffer used when the remarks are stored separately. + std::unique_ptr TmpRemarkBuffer; + /// The common metadata used to decide how to parse the buffer. + /// This is filled when parsing the metadata block. + uint64_t ContainerVersion; + uint64_t RemarkVersion; + BitstreamRemarkContainerType ContainerType; + /// Wether the parser is ready to parse remarks. + bool ReadyToParseRemarks = false; + + /// Create a parser that expects to find a string table embedded in the + /// stream. + BitstreamRemarkParser(StringRef Buf) + : RemarkParser(Format::Bitstream), ParserHelper(Buf) {} + + /// Create a parser that uses a pre-parsed string table. + BitstreamRemarkParser(StringRef Buf, ParsedStringTable StrTab) + : RemarkParser(Format::Bitstream), ParserHelper(Buf), + StrTab(std::move(StrTab)) {} + + Expected> next() override; + + static bool classof(const RemarkParser *P) { + return P->ParserFormat == Format::Bitstream; + } + + /// Parse and process the metadata of the buffer. + Error parseMeta(); + + /// Parse a Bitstream remark. + Expected> parseRemark(); + +private: + /// Helper functions. + Error processCommonMeta(BitstreamMetaParserHelper &Helper); + Error processStandaloneMeta(BitstreamMetaParserHelper &Helper); + Error processSeparateRemarksFileMeta(BitstreamMetaParserHelper &Helper); + Error processSeparateRemarksMetaMeta(BitstreamMetaParserHelper &Helper); + Expected> + processRemark(BitstreamRemarkParserHelper &Helper); + Error processExternalFilePath(Optional ExternalFilePath); +}; + +Expected> createBitstreamParserFromMeta( + StringRef Buf, Optional StrTab = None, + Optional ExternalFilePrependPath = None); + +} // end namespace remarks +} // end namespace llvm + +#endif /* LLVM_LIB_REMARKS_BITSTREAM_REMARK_PARSER_H */ diff --git a/lib/Remarks/BitstreamRemarkSerializer.cpp b/lib/Remarks/BitstreamRemarkSerializer.cpp new file mode 100644 index 00000000000..d02782c7954 --- /dev/null +++ b/lib/Remarks/BitstreamRemarkSerializer.cpp @@ -0,0 +1,386 @@ +//===- BitstreamRemarkSerializer.cpp --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the implementation of the LLVM bitstream remark serializer +// using LLVM's bitstream writer. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Remarks/BitstreamRemarkSerializer.h" + +using namespace llvm; +using namespace llvm::remarks; + +BitstreamRemarkSerializerHelper::BitstreamRemarkSerializerHelper( + BitstreamRemarkContainerType ContainerType) + : Encoded(), R(), Bitstream(Encoded), ContainerType(ContainerType) {} + +static void push(SmallVectorImpl &R, StringRef Str) { + for (const char C : Str) + R.push_back(C); +} + +static void setRecordName(unsigned RecordID, BitstreamWriter &Bitstream, + SmallVectorImpl &R, StringRef Str) { + R.clear(); + R.push_back(RecordID); + push(R, Str); + Bitstream.EmitRecord(bitc::BLOCKINFO_CODE_SETRECORDNAME, R); +} + +static void initBlock(unsigned BlockID, BitstreamWriter &Bitstream, + SmallVectorImpl &R, StringRef Str) { + R.clear(); + R.push_back(BlockID); + Bitstream.EmitRecord(bitc::BLOCKINFO_CODE_SETBID, R); + + R.clear(); + push(R, Str); + Bitstream.EmitRecord(bitc::BLOCKINFO_CODE_BLOCKNAME, R); +} + +void BitstreamRemarkSerializerHelper::setupMetaBlockInfo() { + // Setup the metadata block. + initBlock(META_BLOCK_ID, Bitstream, R, MetaBlockName); + + // The container information. + setRecordName(RECORD_META_CONTAINER_INFO, Bitstream, R, + MetaContainerInfoName); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(RECORD_META_CONTAINER_INFO)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Version. + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // Type. + RecordMetaContainerInfoAbbrevID = + Bitstream.EmitBlockInfoAbbrev(META_BLOCK_ID, Abbrev); +} + +void BitstreamRemarkSerializerHelper::setupMetaRemarkVersion() { + setRecordName(RECORD_META_REMARK_VERSION, Bitstream, R, + MetaRemarkVersionName); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(RECORD_META_REMARK_VERSION)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Version. + RecordMetaRemarkVersionAbbrevID = + Bitstream.EmitBlockInfoAbbrev(META_BLOCK_ID, Abbrev); +} + +void BitstreamRemarkSerializerHelper::emitMetaRemarkVersion( + uint64_t RemarkVersion) { + // The remark version is emitted only if we emit remarks. + R.clear(); + R.push_back(RECORD_META_REMARK_VERSION); + R.push_back(RemarkVersion); + Bitstream.EmitRecordWithAbbrev(RecordMetaRemarkVersionAbbrevID, R); +} + +void BitstreamRemarkSerializerHelper::setupMetaStrTab() { + setRecordName(RECORD_META_STRTAB, Bitstream, R, MetaStrTabName); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(RECORD_META_STRTAB)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Raw table. + RecordMetaStrTabAbbrevID = + Bitstream.EmitBlockInfoAbbrev(META_BLOCK_ID, Abbrev); +} + +void BitstreamRemarkSerializerHelper::emitMetaStrTab( + const StringTable &StrTab) { + // The string table is not emitted if we emit remarks separately. + R.clear(); + R.push_back(RECORD_META_STRTAB); + + // Serialize to a blob. + std::string Buf; + raw_string_ostream OS(Buf); + StrTab.serialize(OS); + StringRef Blob = OS.str(); + Bitstream.EmitRecordWithBlob(RecordMetaStrTabAbbrevID, R, Blob); +} + +void BitstreamRemarkSerializerHelper::setupMetaExternalFile() { + setRecordName(RECORD_META_EXTERNAL_FILE, Bitstream, R, MetaExternalFileName); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(RECORD_META_EXTERNAL_FILE)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Filename. + RecordMetaExternalFileAbbrevID = + Bitstream.EmitBlockInfoAbbrev(META_BLOCK_ID, Abbrev); +} + +void BitstreamRemarkSerializerHelper::emitMetaExternalFile(StringRef Filename) { + // The external file is emitted only if we emit the separate metadata. + R.clear(); + R.push_back(RECORD_META_EXTERNAL_FILE); + Bitstream.EmitRecordWithBlob(RecordMetaExternalFileAbbrevID, R, Filename); +} + +void BitstreamRemarkSerializerHelper::setupRemarkBlockInfo() { + // Setup the remark block. + initBlock(REMARK_BLOCK_ID, Bitstream, R, RemarkBlockName); + + // The header of a remark. + { + setRecordName(RECORD_REMARK_HEADER, Bitstream, R, RemarkHeaderName); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(RECORD_REMARK_HEADER)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // Type + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Remark Name + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Pass name + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Function name + RecordRemarkHeaderAbbrevID = + Bitstream.EmitBlockInfoAbbrev(REMARK_BLOCK_ID, Abbrev); + } + + // The location of a remark. + { + setRecordName(RECORD_REMARK_DEBUG_LOC, Bitstream, R, RemarkDebugLocName); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(RECORD_REMARK_DEBUG_LOC)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // File + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Line + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Column + RecordRemarkDebugLocAbbrevID = + Bitstream.EmitBlockInfoAbbrev(REMARK_BLOCK_ID, Abbrev); + } + + // The hotness of a remark. + { + setRecordName(RECORD_REMARK_HOTNESS, Bitstream, R, RemarkHotnessName); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(RECORD_REMARK_HOTNESS)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Hotness + RecordRemarkHotnessAbbrevID = + Bitstream.EmitBlockInfoAbbrev(REMARK_BLOCK_ID, Abbrev); + } + + // An argument entry with a debug location attached. + { + setRecordName(RECORD_REMARK_ARG_WITH_DEBUGLOC, Bitstream, R, + RemarkArgWithDebugLocName); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(RECORD_REMARK_ARG_WITH_DEBUGLOC)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // Key + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // Value + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // File + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Line + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Column + RecordRemarkArgWithDebugLocAbbrevID = + Bitstream.EmitBlockInfoAbbrev(REMARK_BLOCK_ID, Abbrev); + } + + // An argument entry with no debug location attached. + { + setRecordName(RECORD_REMARK_ARG_WITHOUT_DEBUGLOC, Bitstream, R, + RemarkArgWithoutDebugLocName); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(RECORD_REMARK_ARG_WITHOUT_DEBUGLOC)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // Key + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 7)); // Value + RecordRemarkArgWithoutDebugLocAbbrevID = + Bitstream.EmitBlockInfoAbbrev(REMARK_BLOCK_ID, Abbrev); + } +} + +void BitstreamRemarkSerializerHelper::setupBlockInfo() { + // Emit magic number. + for (const char C : ContainerMagic) + Bitstream.Emit(static_cast(C), 8); + + Bitstream.EnterBlockInfoBlock(); + + // Setup the main metadata. Depending on the container type, we'll setup the + // required records next. + setupMetaBlockInfo(); + + switch (ContainerType) { + case BitstreamRemarkContainerType::SeparateRemarksMeta: + // Needs a string table that the separate remark file is using. + setupMetaStrTab(); + // Needs to know where the external remarks file is. + setupMetaExternalFile(); + break; + case BitstreamRemarkContainerType::SeparateRemarksFile: + // Contains remarks: emit the version. + setupMetaRemarkVersion(); + // Contains remarks: emit the remark abbrevs. + setupRemarkBlockInfo(); + break; + case BitstreamRemarkContainerType::Standalone: + // Contains remarks: emit the version. + setupMetaRemarkVersion(); + // Needs a string table. + setupMetaStrTab(); + // Contains remarks: emit the remark abbrevs. + setupRemarkBlockInfo(); + break; + } + + Bitstream.ExitBlock(); +} + +void BitstreamRemarkSerializerHelper::emitMetaBlock( + uint64_t ContainerVersion, Optional RemarkVersion, + Optional StrTab, Optional Filename) { + // Emit the meta block + Bitstream.EnterSubblock(META_BLOCK_ID, 3); + + // The container version and type. + R.clear(); + R.push_back(RECORD_META_CONTAINER_INFO); + R.push_back(ContainerVersion); + R.push_back(static_cast(ContainerType)); + Bitstream.EmitRecordWithAbbrev(RecordMetaContainerInfoAbbrevID, R); + + switch (ContainerType) { + case BitstreamRemarkContainerType::SeparateRemarksMeta: + assert(StrTab != None && *StrTab != nullptr); + emitMetaStrTab(**StrTab); + assert(Filename != None); + emitMetaExternalFile(*Filename); + break; + case BitstreamRemarkContainerType::SeparateRemarksFile: + assert(RemarkVersion != None); + emitMetaRemarkVersion(*RemarkVersion); + break; + case BitstreamRemarkContainerType::Standalone: + assert(RemarkVersion != None); + emitMetaRemarkVersion(*RemarkVersion); + assert(StrTab != None && *StrTab != nullptr); + emitMetaStrTab(**StrTab); + break; + } + + Bitstream.ExitBlock(); +} + +void BitstreamRemarkSerializerHelper::emitRemarkBlock(const Remark &Remark, + StringTable &StrTab) { + Bitstream.EnterSubblock(REMARK_BLOCK_ID, 4); + + R.clear(); + R.push_back(RECORD_REMARK_HEADER); + R.push_back(static_cast(Remark.RemarkType)); + R.push_back(StrTab.add(Remark.RemarkName).first); + R.push_back(StrTab.add(Remark.PassName).first); + R.push_back(StrTab.add(Remark.FunctionName).first); + Bitstream.EmitRecordWithAbbrev(RecordRemarkHeaderAbbrevID, R); + + if (const Optional &Loc = Remark.Loc) { + R.clear(); + R.push_back(RECORD_REMARK_DEBUG_LOC); + R.push_back(StrTab.add(Loc->SourceFilePath).first); + R.push_back(Loc->SourceLine); + R.push_back(Loc->SourceColumn); + Bitstream.EmitRecordWithAbbrev(RecordRemarkDebugLocAbbrevID, R); + } + + if (Optional Hotness = Remark.Hotness) { + R.clear(); + R.push_back(RECORD_REMARK_HOTNESS); + R.push_back(*Hotness); + Bitstream.EmitRecordWithAbbrev(RecordRemarkHotnessAbbrevID, R); + } + + for (const Argument &Arg : Remark.Args) { + R.clear(); + unsigned Key = StrTab.add(Arg.Key).first; + unsigned Val = StrTab.add(Arg.Val).first; + bool HasDebugLoc = Arg.Loc != None; + R.push_back(HasDebugLoc ? RECORD_REMARK_ARG_WITH_DEBUGLOC + : RECORD_REMARK_ARG_WITHOUT_DEBUGLOC); + R.push_back(Key); + R.push_back(Val); + if (HasDebugLoc) { + R.push_back(StrTab.add(Arg.Loc->SourceFilePath).first); + R.push_back(Arg.Loc->SourceLine); + R.push_back(Arg.Loc->SourceColumn); + } + Bitstream.EmitRecordWithAbbrev(HasDebugLoc + ? RecordRemarkArgWithDebugLocAbbrevID + : RecordRemarkArgWithoutDebugLocAbbrevID, + R); + } + Bitstream.ExitBlock(); +} + +void BitstreamRemarkSerializerHelper::flushToStream(raw_ostream &OS) { + OS.write(Encoded.data(), Encoded.size()); + Encoded.clear(); +} + +StringRef BitstreamRemarkSerializerHelper::getBuffer() { + return StringRef(Encoded.data(), Encoded.size()); +} + +BitstreamRemarkSerializer::BitstreamRemarkSerializer(raw_ostream &OS, + SerializerMode Mode) + : RemarkSerializer(Format::Bitstream, OS, Mode), + Helper(BitstreamRemarkContainerType::SeparateRemarksFile) { + assert(Mode == SerializerMode::Separate && + "For SerializerMode::Standalone, a pre-filled string table needs to " + "be provided."); + // We always use a string table with bitstream. + StrTab.emplace(); +} + +BitstreamRemarkSerializer::BitstreamRemarkSerializer(raw_ostream &OS, + SerializerMode Mode, + StringTable StrTabIn) + : RemarkSerializer(Format::Bitstream, OS, Mode), + Helper(Mode == SerializerMode::Separate + ? BitstreamRemarkContainerType::SeparateRemarksFile + : BitstreamRemarkContainerType::Standalone) { + StrTab = std::move(StrTabIn); +} + +void BitstreamRemarkSerializer::emit(const Remark &Remark) { + if (!DidSetUp) { + // Emit the metadata that is embedded in the remark file. + // If we're in standalone mode, serialize the string table as well. + bool IsStandalone = + Helper.ContainerType == BitstreamRemarkContainerType::Standalone; + BitstreamMetaSerializer MetaSerializer( + OS, Helper, + IsStandalone ? &*StrTab : Optional(None)); + MetaSerializer.emit(); + DidSetUp = true; + } + + assert(DidSetUp && + "The Block info block and the meta block were not emitted yet."); + Helper.emitRemarkBlock(Remark, *StrTab); + + Helper.flushToStream(OS); +} + +std::unique_ptr BitstreamRemarkSerializer::metaSerializer( + raw_ostream &OS, Optional ExternalFilename) { + assert(Helper.ContainerType != + BitstreamRemarkContainerType::SeparateRemarksMeta); + bool IsStandalone = + Helper.ContainerType == BitstreamRemarkContainerType::Standalone; + return std::make_unique( + OS, + IsStandalone ? BitstreamRemarkContainerType::Standalone + : BitstreamRemarkContainerType::SeparateRemarksMeta, + &*StrTab, ExternalFilename); +} + +void BitstreamMetaSerializer::emit() { + Helper->setupBlockInfo(); + Helper->emitMetaBlock(CurrentContainerVersion, CurrentRemarkVersion, StrTab, + ExternalFilename); + Helper->flushToStream(OS); +} diff --git a/lib/Remarks/RemarkFormat.cpp b/lib/Remarks/RemarkFormat.cpp index bcd0f753ff6..f2d0331ec6a 100644 --- a/lib/Remarks/RemarkFormat.cpp +++ b/lib/Remarks/RemarkFormat.cpp @@ -19,11 +19,13 @@ using namespace llvm::remarks; Expected llvm::remarks::parseFormat(StringRef FormatStr) { auto Result = StringSwitch(FormatStr) .Cases("", "yaml", Format::YAML) + .Case("yaml-strtab", Format::YAMLStrTab) + .Case("bitstream", Format::Bitstream) .Default(Format::Unknown); if (Result == Format::Unknown) return createStringError(std::make_error_code(std::errc::invalid_argument), - "Unknown remark serializer format: '%s'", + "Unknown remark format: '%s'", FormatStr.data()); return Result; diff --git a/lib/Remarks/RemarkParser.cpp b/lib/Remarks/RemarkParser.cpp index f67464073bd..c5c3d0badd3 100644 --- a/lib/Remarks/RemarkParser.cpp +++ b/lib/Remarks/RemarkParser.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Remarks/RemarkParser.h" +#include "BitstreamRemarkParser.h" #include "YAMLRemarkParser.h" #include "llvm-c/Remarks.h" #include "llvm/ADT/STLExtras.h" @@ -47,32 +48,81 @@ Expected ParsedStringTable::operator[](size_t Index) const { return StringRef(Buffer.data() + Offset, NextOffset - Offset - 1); } -Expected> -llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf, - Optional StrTab) { +Expected> +llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf) { switch (ParserFormat) { case Format::YAML: - return llvm::make_unique(Buf, StrTab); + return std::make_unique(Buf); + case Format::YAMLStrTab: + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "The YAML with string table format requires a parsed string table."); + case Format::Bitstream: + return std::make_unique(Buf); case Format::Unknown: return createStringError(std::make_error_code(std::errc::invalid_argument), "Unknown remark parser format."); } - llvm_unreachable("unknown format"); + llvm_unreachable("unhandled ParseFormat"); } +Expected> +llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf, + ParsedStringTable StrTab) { + switch (ParserFormat) { + case Format::YAML: + return createStringError(std::make_error_code(std::errc::invalid_argument), + "The YAML format can't be used with a string " + "table. Use yaml-strtab instead."); + case Format::YAMLStrTab: + return std::make_unique(Buf, std::move(StrTab)); + case Format::Bitstream: + return std::make_unique(Buf, std::move(StrTab)); + case Format::Unknown: + return createStringError(std::make_error_code(std::errc::invalid_argument), + "Unknown remark parser format."); + } + llvm_unreachable("unhandled ParseFormat"); +} + +Expected> +llvm::remarks::createRemarkParserFromMeta( + Format ParserFormat, StringRef Buf, Optional StrTab, + Optional ExternalFilePrependPath) { + switch (ParserFormat) { + // Depending on the metadata, the format can be either yaml or yaml-strtab, + // regardless of the input argument. + case Format::YAML: + case Format::YAMLStrTab: + return createYAMLParserFromMeta(Buf, std::move(StrTab), + std::move(ExternalFilePrependPath)); + case Format::Bitstream: + return createBitstreamParserFromMeta(Buf, std::move(StrTab), + std::move(ExternalFilePrependPath)); + case Format::Unknown: + return createStringError(std::make_error_code(std::errc::invalid_argument), + "Unknown remark parser format."); + } + llvm_unreachable("unhandled ParseFormat"); +} + +namespace { // Wrapper that holds the state needed to interact with the C API. struct CParser { - std::unique_ptr TheParser; + std::unique_ptr TheParser; Optional Err; CParser(Format ParserFormat, StringRef Buf, - Optional StrTab = None) - : TheParser(cantFail(createRemarkParser(ParserFormat, Buf, StrTab))) {} + Optional StrTab = None) + : TheParser(cantFail( + StrTab ? createRemarkParser(ParserFormat, Buf, std::move(*StrTab)) + : createRemarkParser(ParserFormat, Buf))) {} void handleError(Error E) { Err.emplace(toString(std::move(E))); } bool hasError() const { return Err.hasValue(); } const char *getMessage() const { return Err ? Err->c_str() : nullptr; }; }; +} // namespace // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(CParser, LLVMRemarkParserRef) @@ -83,10 +133,16 @@ extern "C" LLVMRemarkParserRef LLVMRemarkParserCreateYAML(const void *Buf, StringRef(static_cast(Buf), Size))); } +extern "C" LLVMRemarkParserRef LLVMRemarkParserCreateBitstream(const void *Buf, + uint64_t Size) { + return wrap(new CParser(Format::Bitstream, + StringRef(static_cast(Buf), Size))); +} + extern "C" LLVMRemarkEntryRef LLVMRemarkParserGetNext(LLVMRemarkParserRef Parser) { CParser &TheCParser = *unwrap(Parser); - remarks::Parser &TheParser = *TheCParser.TheParser; + remarks::RemarkParser &TheParser = *TheCParser.TheParser; Expected> MaybeRemark = TheParser.next(); if (Error E = MaybeRemark.takeError()) { diff --git a/lib/Remarks/RemarkSerializer.cpp b/lib/Remarks/RemarkSerializer.cpp new file mode 100644 index 00000000000..ab19c84bbad --- /dev/null +++ b/lib/Remarks/RemarkSerializer.cpp @@ -0,0 +1,54 @@ +//===- RemarkSerializer.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides tools for serializing remarks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Remarks/RemarkSerializer.h" +#include "llvm/Remarks/BitstreamRemarkSerializer.h" +#include "llvm/Remarks/YAMLRemarkSerializer.h" + +using namespace llvm; +using namespace llvm::remarks; + +Expected> +remarks::createRemarkSerializer(Format RemarksFormat, SerializerMode Mode, + raw_ostream &OS) { + switch (RemarksFormat) { + case Format::Unknown: + return createStringError(std::errc::invalid_argument, + "Unknown remark serializer format."); + case Format::YAML: + return std::make_unique(OS, Mode); + case Format::YAMLStrTab: + return std::make_unique(OS, Mode); + case Format::Bitstream: + return std::make_unique(OS, Mode); + } + llvm_unreachable("Unknown remarks::Format enum"); +} + +Expected> +remarks::createRemarkSerializer(Format RemarksFormat, SerializerMode Mode, + raw_ostream &OS, remarks::StringTable StrTab) { + switch (RemarksFormat) { + case Format::Unknown: + return createStringError(std::errc::invalid_argument, + "Unknown remark serializer format."); + case Format::YAML: + return std::make_unique(OS, Mode, std::move(StrTab)); + case Format::YAMLStrTab: + return std::make_unique(OS, Mode, + std::move(StrTab)); + case Format::Bitstream: + return std::make_unique(OS, Mode, + std::move(StrTab)); + } + llvm_unreachable("Unknown remarks::Format enum"); +} diff --git a/lib/Remarks/RemarkStringTable.cpp b/lib/Remarks/RemarkStringTable.cpp index 984aa5b33b4..51156465be5 100644 --- a/lib/Remarks/RemarkStringTable.cpp +++ b/lib/Remarks/RemarkStringTable.cpp @@ -11,6 +11,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Remarks/RemarkStringTable.h" +#include "llvm/Remarks/Remark.h" +#include "llvm/Remarks/RemarkParser.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/Error.h" #include @@ -18,6 +20,14 @@ using namespace llvm; using namespace llvm::remarks; +StringTable::StringTable(const ParsedStringTable &Other) : StrTab() { + for (unsigned i = 0, e = Other.size(); i < e; ++i) + if (Expected MaybeStr = Other[i]) + add(*MaybeStr); + else + llvm_unreachable("Unexpected error while building remarks string table."); +} + std::pair StringTable::add(StringRef Str) { size_t NextID = StrTab.size(); auto KV = StrTab.insert({Str, NextID}); @@ -28,10 +38,22 @@ std::pair StringTable::add(StringRef Str) { return {KV.first->second, KV.first->first()}; } +void StringTable::internalize(Remark &R) { + auto Impl = [&](StringRef &S) { S = add(S).second; }; + Impl(R.PassName); + Impl(R.RemarkName); + Impl(R.FunctionName); + if (R.Loc) + Impl(R.Loc->SourceFilePath); + for (Argument &Arg : R.Args) { + Impl(Arg.Key); + Impl(Arg.Val); + if (Arg.Loc) + Impl(Arg.Loc->SourceFilePath); + } +} + void StringTable::serialize(raw_ostream &OS) const { - // Emit the number of strings. - uint64_t StrTabSize = SerializedSize; - support::endian::write(OS, StrTabSize, support::little); // Emit the sequence of strings. for (StringRef Str : serialize()) { OS << Str; diff --git a/lib/Remarks/YAMLRemarkParser.cpp b/lib/Remarks/YAMLRemarkParser.cpp index ed78b7ba5d9..dd834d85676 100644 --- a/lib/Remarks/YAMLRemarkParser.cpp +++ b/lib/Remarks/YAMLRemarkParser.cpp @@ -14,6 +14,8 @@ #include "YAMLRemarkParser.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Remarks/RemarkParser.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Path.h" using namespace llvm; using namespace llvm::remarks; @@ -54,9 +56,123 @@ static SourceMgr setupSM(std::string &LastErrorMessage) { return SM; } +// Parse the magic number. This function returns true if this represents remark +// metadata, false otherwise. +static Expected parseMagic(StringRef &Buf) { + if (!Buf.consume_front(remarks::Magic)) + return false; + + if (Buf.size() < 1 || !Buf.consume_front(StringRef("\0", 1))) + return createStringError(std::errc::illegal_byte_sequence, + "Expecting \\0 after magic number."); + return true; +} + +static Expected parseVersion(StringRef &Buf) { + if (Buf.size() < sizeof(uint64_t)) + return createStringError(std::errc::illegal_byte_sequence, + "Expecting version number."); + + uint64_t Version = + support::endian::read( + Buf.data()); + if (Version != remarks::CurrentRemarkVersion) + return createStringError(std::errc::illegal_byte_sequence, + "Mismatching remark version. Got %" PRId64 + ", expected %" PRId64 ".", + Version, remarks::CurrentRemarkVersion); + Buf = Buf.drop_front(sizeof(uint64_t)); + return Version; +} + +static Expected parseStrTabSize(StringRef &Buf) { + if (Buf.size() < sizeof(uint64_t)) + return createStringError(std::errc::illegal_byte_sequence, + "Expecting string table size."); + uint64_t StrTabSize = + support::endian::read( + Buf.data()); + Buf = Buf.drop_front(sizeof(uint64_t)); + return StrTabSize; +} + +static Expected parseStrTab(StringRef &Buf, + uint64_t StrTabSize) { + if (Buf.size() < StrTabSize) + return createStringError(std::errc::illegal_byte_sequence, + "Expecting string table."); + + // Attach the string table to the parser. + ParsedStringTable Result(StringRef(Buf.data(), StrTabSize)); + Buf = Buf.drop_front(StrTabSize); + return Expected(std::move(Result)); +} + +Expected> +remarks::createYAMLParserFromMeta(StringRef Buf, + Optional StrTab, + Optional ExternalFilePrependPath) { + // We now have a magic number. The metadata has to be correct. + Expected isMeta = parseMagic(Buf); + if (!isMeta) + return isMeta.takeError(); + // If it's not recognized as metadata, roll back. + std::unique_ptr SeparateBuf; + if (*isMeta) { + Expected Version = parseVersion(Buf); + if (!Version) + return Version.takeError(); + + Expected StrTabSize = parseStrTabSize(Buf); + if (!StrTabSize) + return StrTabSize.takeError(); + + // If the size of string table is not 0, try to build one. + if (*StrTabSize != 0) { + if (StrTab) + return createStringError(std::errc::illegal_byte_sequence, + "String table already provided."); + Expected MaybeStrTab = parseStrTab(Buf, *StrTabSize); + if (!MaybeStrTab) + return MaybeStrTab.takeError(); + StrTab = std::move(*MaybeStrTab); + } + // If it starts with "---", there is no external file. + if (!Buf.startswith("---")) { + // At this point, we expect Buf to contain the external file path. + StringRef ExternalFilePath = Buf; + SmallString<80> FullPath; + if (ExternalFilePrependPath) + FullPath = *ExternalFilePrependPath; + sys::path::append(FullPath, ExternalFilePath); + + // Try to open the file and start parsing from there. + ErrorOr> BufferOrErr = + MemoryBuffer::getFile(FullPath); + if (std::error_code EC = BufferOrErr.getError()) + return createFileError(FullPath, EC); + + // Keep the buffer alive. + SeparateBuf = std::move(*BufferOrErr); + Buf = SeparateBuf->getBuffer(); + } + } + + std::unique_ptr Result = + StrTab + ? std::make_unique(Buf, std::move(*StrTab)) + : std::make_unique(Buf); + if (SeparateBuf) + Result->SeparateBuf = std::move(SeparateBuf); + return std::move(Result); +} + +YAMLRemarkParser::YAMLRemarkParser(StringRef Buf) + : YAMLRemarkParser(Buf, None) {} + YAMLRemarkParser::YAMLRemarkParser(StringRef Buf, - Optional StrTab) - : Parser{Format::YAML}, StrTab(StrTab), LastErrorMessage(), + Optional StrTab) + : RemarkParser{Format::YAML}, StrTab(std::move(StrTab)), LastErrorMessage(), SM(setupSM(LastErrorMessage)), Stream(Buf, SM), YAMLIt(Stream.begin()) {} Error YAMLRemarkParser::error(StringRef Message, yaml::Node &Node) { @@ -86,7 +202,7 @@ YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) { if (!Root) return error("document root is not of mapping type.", *YAMLRoot); - std::unique_ptr Result = llvm::make_unique(); + std::unique_ptr Result = std::make_unique(); Remark &TheRemark = *Result; // First, the type. It needs special handling since is not part of the @@ -179,22 +295,7 @@ Expected YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) { auto *Value = dyn_cast(Node.getValue()); if (!Value) return error("expected a value of scalar type.", Node); - StringRef Result; - if (!StrTab) { - Result = Value->getRawValue(); - } else { - // If we have a string table, parse it as an unsigned. - unsigned StrID = 0; - if (Expected MaybeStrID = parseUnsigned(Node)) - StrID = *MaybeStrID; - else - return MaybeStrID.takeError(); - - if (Expected Str = (**StrTab)[StrID]) - Result = *Str; - else - return Str.takeError(); - } + StringRef Result = Value->getRawValue(); if (Result.front() == '\'') Result = Result.drop_front(); @@ -325,3 +426,29 @@ Expected> YAMLRemarkParser::next() { return std::move(*MaybeResult); } + +Expected YAMLStrTabRemarkParser::parseStr(yaml::KeyValueNode &Node) { + auto *Value = dyn_cast(Node.getValue()); + if (!Value) + return error("expected a value of scalar type.", Node); + StringRef Result; + // If we have a string table, parse it as an unsigned. + unsigned StrID = 0; + if (Expected MaybeStrID = parseUnsigned(Node)) + StrID = *MaybeStrID; + else + return MaybeStrID.takeError(); + + if (Expected Str = (*StrTab)[StrID]) + Result = *Str; + else + return Str.takeError(); + + if (Result.front() == '\'') + Result = Result.drop_front(); + + if (Result.back() == '\'') + Result = Result.drop_back(); + + return Result; +} diff --git a/lib/Remarks/YAMLRemarkParser.h b/lib/Remarks/YAMLRemarkParser.h index cea76e63e75..03707433bc0 100644 --- a/lib/Remarks/YAMLRemarkParser.h +++ b/lib/Remarks/YAMLRemarkParser.h @@ -18,6 +18,7 @@ #include "llvm/Remarks/Remark.h" #include "llvm/Remarks/RemarkParser.h" #include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/YAMLParser.h" #include "llvm/Support/YAMLTraits.h" @@ -46,9 +47,9 @@ private: }; /// Regular YAML to Remark parser. -struct YAMLRemarkParser : public Parser { +struct YAMLRemarkParser : public RemarkParser { /// The string table used for parsing strings. - Optional StrTab; + Optional StrTab; /// Last error message that can come from the YAML parser diagnostics. /// We need this for catching errors in the constructor. std::string LastErrorMessage; @@ -58,17 +59,20 @@ struct YAMLRemarkParser : public Parser { yaml::Stream Stream; /// Iterator in the YAML stream. yaml::document_iterator YAMLIt; + /// If we parse remark metadata in separate mode, we need to open a new file + /// and parse that. + std::unique_ptr SeparateBuf; - YAMLRemarkParser(StringRef Buf, - Optional StrTab = None); + YAMLRemarkParser(StringRef Buf); Expected> next() override; - static bool classof(const Parser *P) { + static bool classof(const RemarkParser *P) { return P->ParserFormat == Format::YAML; } -private: +protected: + YAMLRemarkParser(StringRef Buf, Optional StrTab); /// Create a YAMLParseError error from an existing error generated by the YAML /// parser. /// If there is no error, this returns Success. @@ -82,7 +86,7 @@ private: /// Parse one key to a string. Expected parseKey(yaml::KeyValueNode &Node); /// Parse one value to a string. - Expected parseStr(yaml::KeyValueNode &Node); + virtual Expected parseStr(yaml::KeyValueNode &Node); /// Parse one value to an unsigned. Expected parseUnsigned(yaml::KeyValueNode &Node); /// Parse a debug location. @@ -90,6 +94,26 @@ private: /// Parse an argument. Expected parseArg(yaml::Node &Node); }; + +/// YAML with a string table to Remark parser. +struct YAMLStrTabRemarkParser : public YAMLRemarkParser { + YAMLStrTabRemarkParser(StringRef Buf, ParsedStringTable StrTab) + : YAMLRemarkParser(Buf, std::move(StrTab)) {} + + static bool classof(const RemarkParser *P) { + return P->ParserFormat == Format::YAMLStrTab; + } + +protected: + /// Parse one value to a string. + Expected parseStr(yaml::KeyValueNode &Node) override; +}; + +Expected> +createYAMLParserFromMeta(StringRef Buf, + Optional StrTab = None, + Optional ExternalFilePrependPath = None); + } // end namespace remarks } // end namespace llvm diff --git a/lib/Remarks/YAMLRemarkSerializer.cpp b/lib/Remarks/YAMLRemarkSerializer.cpp index d64ae8e12ab..3a42fe0678e 100644 --- a/lib/Remarks/YAMLRemarkSerializer.cpp +++ b/lib/Remarks/YAMLRemarkSerializer.cpp @@ -11,16 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Remarks/RemarkSerializer.h" +#include "llvm/Remarks/YAMLRemarkSerializer.h" #include "llvm/Support/CommandLine.h" using namespace llvm; using namespace llvm::remarks; -cl::opt RemarksYAMLStringTable( - "remarks-yaml-string-table", cl::init(false), cl::Hidden, - cl::desc("Enable the usage of a string table with YAML remarks.")); - // Use the same keys whether we use a string table or not (respectively, T is an // unsigned or a StringRef). template @@ -60,11 +56,14 @@ template <> struct MappingTraits { else llvm_unreachable("Unknown remark type"); - if (Optional &StrTab = - reinterpret_cast(io.getContext())->StrTab) { - unsigned PassID = StrTab->add(Remark->PassName).first; - unsigned NameID = StrTab->add(Remark->RemarkName).first; - unsigned FunctionID = StrTab->add(Remark->FunctionName).first; + if (auto *Serializer = dyn_cast( + reinterpret_cast(io.getContext()))) { + assert(Serializer->StrTab.hasValue() && + "YAMLStrTabSerializer with no StrTab."); + StringTable &StrTab = *Serializer->StrTab; + unsigned PassID = StrTab.add(Remark->PassName).first; + unsigned NameID = StrTab.add(Remark->RemarkName).first; + unsigned FunctionID = StrTab.add(Remark->FunctionName).first; mapRemarkHeader(io, PassID, NameID, Remark->Loc, FunctionID, Remark->Hotness, Remark->Args); } else { @@ -82,9 +81,12 @@ template <> struct MappingTraits { unsigned Line = RL.SourceLine; unsigned Col = RL.SourceColumn; - if (Optional &StrTab = - reinterpret_cast(io.getContext())->StrTab) { - unsigned FileID = StrTab->add(File).first; + if (auto *Serializer = dyn_cast( + reinterpret_cast(io.getContext()))) { + assert(Serializer->StrTab.hasValue() && + "YAMLStrTabSerializer with no StrTab."); + StringTable &StrTab = *Serializer->StrTab; + unsigned FileID = StrTab.add(File).first; io.mapRequired("File", FileID); } else { io.mapRequired("File", File); @@ -101,7 +103,7 @@ template <> struct MappingTraits { /// newlines in strings. struct StringBlockVal { StringRef Value; - StringBlockVal(const std::string &Value) : Value(Value) {} + StringBlockVal(StringRef R) : Value(R) {} }; template <> struct BlockScalarTraits { @@ -134,9 +136,12 @@ template <> struct MappingTraits { static void mapping(IO &io, Argument &A) { assert(io.outputting() && "input not yet implemented"); - if (Optional &StrTab = - reinterpret_cast(io.getContext())->StrTab) { - auto ValueID = StrTab->add(A.Val).first; + if (auto *Serializer = dyn_cast( + reinterpret_cast(io.getContext()))) { + assert(Serializer->StrTab.hasValue() && + "YAMLStrTabSerializer with no StrTab."); + StringTable &StrTab = *Serializer->StrTab; + auto ValueID = StrTab.add(A.Val).first; io.mapRequired(A.Key.data(), ValueID); } else if (StringRef(A.Val).count('\n') > 1) { StringBlockVal S(A.Val); @@ -153,15 +158,100 @@ template <> struct MappingTraits { LLVM_YAML_IS_SEQUENCE_VECTOR(Argument) -YAMLSerializer::YAMLSerializer(raw_ostream &OS, UseStringTable UseStringTable) - : Serializer(OS), YAMLOutput(OS, reinterpret_cast(this)) { - if (UseStringTable == remarks::UseStringTable::Yes || RemarksYAMLStringTable) - StrTab.emplace(); +YAMLRemarkSerializer::YAMLRemarkSerializer(raw_ostream &OS, SerializerMode Mode, + Optional StrTabIn) + : YAMLRemarkSerializer(Format::YAML, OS, Mode, std::move(StrTabIn)) {} + +YAMLRemarkSerializer::YAMLRemarkSerializer(Format SerializerFormat, + raw_ostream &OS, SerializerMode Mode, + Optional StrTabIn) + : RemarkSerializer(SerializerFormat, OS, Mode), + YAMLOutput(OS, reinterpret_cast(this)) { + StrTab = std::move(StrTabIn); } -void YAMLSerializer::emit(const Remark &Remark) { +void YAMLRemarkSerializer::emit(const Remark &Remark) { // Again, YAMLTraits expect a non-const object for inputting, but we're not // using that here. auto R = const_cast(&Remark); YAMLOutput << R; } + +std::unique_ptr +YAMLRemarkSerializer::metaSerializer(raw_ostream &OS, + Optional ExternalFilename) { + return std::make_unique(OS, ExternalFilename); +} + +void YAMLStrTabRemarkSerializer::emit(const Remark &Remark) { + // In standalone mode, for the serializer with a string table, emit the + // metadata first and set DidEmitMeta to avoid emitting it again. + if (Mode == SerializerMode::Standalone && !DidEmitMeta) { + std::unique_ptr MetaSerializer = + metaSerializer(OS, /*ExternalFilename=*/None); + MetaSerializer->emit(); + DidEmitMeta = true; + } + + // Then do the usual remark emission. + YAMLRemarkSerializer::emit(Remark); +} + +std::unique_ptr YAMLStrTabRemarkSerializer::metaSerializer( + raw_ostream &OS, Optional ExternalFilename) { + assert(StrTab); + return std::make_unique(OS, ExternalFilename, + *StrTab); +} + +static void emitMagic(raw_ostream &OS) { + // Emit the magic number. + OS << remarks::Magic; + // Explicitly emit a '\0'. + OS.write('\0'); +} + +static void emitVersion(raw_ostream &OS) { + // Emit the version number: little-endian uint64_t. + std::array Version; + support::endian::write64le(Version.data(), remarks::CurrentRemarkVersion); + OS.write(Version.data(), Version.size()); +} + +static void emitStrTab(raw_ostream &OS, Optional StrTab) { + // Emit the string table in the section. + uint64_t StrTabSize = StrTab ? (*StrTab)->SerializedSize : 0; + // Emit the total size of the string table (the size itself excluded): + // little-endian uint64_t. + // Note: even if no string table is used, emit 0. + std::array StrTabSizeBuf; + support::endian::write64le(StrTabSizeBuf.data(), StrTabSize); + OS.write(StrTabSizeBuf.data(), StrTabSizeBuf.size()); + if (StrTab) + (*StrTab)->serialize(OS); +} + +static void emitExternalFile(raw_ostream &OS, StringRef Filename) { + // Emit the null-terminated absolute path to the remark file. + SmallString<128> FilenameBuf = Filename; + sys::fs::make_absolute(FilenameBuf); + assert(!FilenameBuf.empty() && "The filename can't be empty."); + OS.write(FilenameBuf.data(), FilenameBuf.size()); + OS.write('\0'); +} + +void YAMLMetaSerializer::emit() { + emitMagic(OS); + emitVersion(OS); + emitStrTab(OS, None); + if (ExternalFilename) + emitExternalFile(OS, *ExternalFilename); +} + +void YAMLStrTabMetaSerializer::emit() { + emitMagic(OS); + emitVersion(OS); + emitStrTab(OS, &StrTab); + if (ExternalFilename) + emitExternalFile(OS, *ExternalFilename); +} diff --git a/lib/Support/AArch64TargetParser.cpp b/lib/Support/AArch64TargetParser.cpp index df4caa1f07f..6f1d6d50eee 100644 --- a/lib/Support/AArch64TargetParser.cpp +++ b/lib/Support/AArch64TargetParser.cpp @@ -96,8 +96,8 @@ bool AArch64::getExtensionFeatures(unsigned Extensions, Features.push_back("+sve2-sm4"); if (Extensions & AEK_SVE2SHA3) Features.push_back("+sve2-sha3"); - if (Extensions & AEK_BITPERM) - Features.push_back("+bitperm"); + if (Extensions & AEK_SVE2BITPERM) + Features.push_back("+sve2-bitperm"); if (Extensions & AEK_RCPC) Features.push_back("+rcpc"); diff --git a/lib/Support/ABIBreak.cpp b/lib/Support/ABIBreak.cpp new file mode 100644 index 00000000000..247b635e02b --- /dev/null +++ b/lib/Support/ABIBreak.cpp @@ -0,0 +1,24 @@ +//===----- lib/Support/ABIBreak.cpp - EnableABIBreakingChecks -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Config/abi-breaking.h" + +#ifndef _MSC_VER +namespace llvm { + +// One of these two variables will be referenced by a symbol defined in +// llvm-config.h. We provide a link-time (or load time for DSO) failure when +// there is a mismatch in the build configuration of the API client and LLVM. +#if LLVM_ENABLE_ABI_BREAKING_CHECKS +int EnableABIBreakingChecks; +#else +int DisableABIBreakingChecks; +#endif + +} // end namespace llvm +#endif diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 43173311cd8..758fe8b4f86 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -401,6 +401,33 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { } } +void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) { + uint64_t maskBits = maskTrailingOnes(numBits); + subBits &= maskBits; + if (isSingleWord()) { + U.VAL &= ~(maskBits << bitPosition); + U.VAL |= subBits << bitPosition; + return; + } + + unsigned loBit = whichBit(bitPosition); + unsigned loWord = whichWord(bitPosition); + unsigned hiWord = whichWord(bitPosition + numBits - 1); + if (loWord == hiWord) { + U.pVal[loWord] &= ~(maskBits << loBit); + U.pVal[loWord] |= subBits << loBit; + return; + } + + static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected"); + unsigned wordBits = 8 * sizeof(WordType); + U.pVal[loWord] &= ~(maskBits << loBit); + U.pVal[loWord] |= subBits << loBit; + + U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit)); + U.pVal[hiWord] |= subBits >> (wordBits - loBit); +} + APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { assert(numBits > 0 && "Can't extract zero bits"); assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && @@ -438,6 +465,31 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { return Result.clearUnusedBits(); } +uint64_t APInt::extractBitsAsZExtValue(unsigned numBits, + unsigned bitPosition) const { + assert(numBits > 0 && "Can't extract zero bits"); + assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && + "Illegal bit extraction"); + assert(numBits <= 64 && "Illegal bit extraction"); + + uint64_t maskBits = maskTrailingOnes(numBits); + if (isSingleWord()) + return (U.VAL >> bitPosition) & maskBits; + + unsigned loBit = whichBit(bitPosition); + unsigned loWord = whichWord(bitPosition); + unsigned hiWord = whichWord(bitPosition + numBits - 1); + if (loWord == hiWord) + return (U.pVal[loWord] >> loBit) & maskBits; + + static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected"); + unsigned wordBits = 8 * sizeof(WordType); + uint64_t retBits = U.pVal[loWord] >> loBit; + retBits |= U.pVal[hiWord] << (wordBits - loBit); + retBits &= maskBits; + return retBits; +} + unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { assert(!str.empty() && "Invalid string length"); assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || diff --git a/lib/Support/ARMTargetParser.cpp b/lib/Support/ARMTargetParser.cpp index be948cfc95d..ce5daa7fe58 100644 --- a/lib/Support/ARMTargetParser.cpp +++ b/lib/Support/ARMTargetParser.cpp @@ -176,10 +176,8 @@ bool ARM::getFPUFeatures(unsigned FPUKind, std::vector &Features) { // exist). {"+fpregs", "-fpregs", FPUVersion::VFPV2, FPURestriction::SP_D16}, - {"+vfp2", "-vfp2", FPUVersion::VFPV2, FPURestriction::None}, - {"+vfp2d16", "-vfp2d16", FPUVersion::VFPV2, FPURestriction::D16}, - {"+vfp2d16sp", "-vfp2d16sp", FPUVersion::VFPV2, FPURestriction::SP_D16}, - {"+vfp2sp", "-vfp2sp", FPUVersion::VFPV2, FPURestriction::None}, + {"+vfp2", "-vfp2", FPUVersion::VFPV2, FPURestriction::D16}, + {"+vfp2sp", "-vfp2sp", FPUVersion::VFPV2, FPURestriction::SP_D16}, {"+vfp3", "-vfp3", FPUVersion::VFPV3, FPURestriction::None}, {"+vfp3d16", "-vfp3d16", FPUVersion::VFPV3, FPURestriction::D16}, {"+vfp3d16sp", "-vfp3d16sp", FPUVersion::VFPV3, FPURestriction::SP_D16}, @@ -195,7 +193,7 @@ bool ARM::getFPUFeatures(unsigned FPUKind, std::vector &Features) { {"+fp-armv8sp", "-fp-armv8sp", FPUVersion::VFPV5, FPURestriction::None}, {"+fullfp16", "-fullfp16", FPUVersion::VFPV5_FULLFP16, FPURestriction::SP_D16}, {"+fp64", "-fp64", FPUVersion::VFPV2, FPURestriction::D16}, - {"+d32", "-d32", FPUVersion::VFPV2, FPURestriction::None}, + {"+d32", "-d32", FPUVersion::VFPV3, FPURestriction::None}, }; for (const auto &Info: FPUFeatureInfoList) { diff --git a/lib/Support/CRC.cpp b/lib/Support/CRC.cpp index fd98f3a2400..7c008d3b599 100644 --- a/lib/Support/CRC.cpp +++ b/lib/Support/CRC.cpp @@ -6,63 +6,94 @@ // //===----------------------------------------------------------------------===// // -// This file implements llvm::crc32 function. +// This file contains implementations of CRC functions. +// +// The implementation technique is the one mentioned in: +// D. V. Sarwate. 1988. Computation of cyclic redundancy checks via table +// look-up. Commun. ACM 31, 8 (August 1988) +// +// See also Ross N. Williams "A Painless Guide to CRC Error Detection +// Algorithms" (https://zlib.net/crc_v3.txt) or Hacker's Delight (2nd ed.) +// Chapter 14 (Figure 14-7 in particular) for how the algorithm works. // //===----------------------------------------------------------------------===// #include "llvm/Support/CRC.h" + +#include "llvm/ADT/ArrayRef.h" #include "llvm/Config/config.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/Threading.h" -#include using namespace llvm; #if LLVM_ENABLE_ZLIB == 0 || !HAVE_ZLIB_H -using CRC32Table = std::array; -static void initCRC32Table(CRC32Table *Tbl) { - auto Shuffle = [](uint32_t V) { - return (V & 1) ? (V >> 1) ^ 0xEDB88320U : V >> 1; - }; +static const uint32_t CRCTable[256] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, + 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, + 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, + 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, + 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, + 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, + 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, + 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, + 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, + 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, + 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, + 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, + 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, + 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, + 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, + 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, + 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, + 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d}; - for (size_t I = 0; I < Tbl->size(); ++I) { - uint32_t V = Shuffle(I); - V = Shuffle(V); - V = Shuffle(V); - V = Shuffle(V); - V = Shuffle(V); - V = Shuffle(V); - V = Shuffle(V); - (*Tbl)[I] = Shuffle(V); - } -} - -uint32_t llvm::crc32(uint32_t CRC, StringRef S) { - static llvm::once_flag InitFlag; - static CRC32Table Tbl; - llvm::call_once(InitFlag, initCRC32Table, &Tbl); - - const uint8_t *P = reinterpret_cast(S.data()); - size_t Len = S.size(); +uint32_t llvm::crc32(uint32_t CRC, ArrayRef Data) { CRC ^= 0xFFFFFFFFU; - for (; Len >= 8; Len -= 8) { - CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8); - CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8); - CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8); - CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8); - CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8); - CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8); - CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8); - CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8); + for (uint8_t Byte : Data) { + int TableIdx = (CRC ^ Byte) & 0xff; + CRC = CRCTable[TableIdx] ^ (CRC >> 8); } - while (Len--) - CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8); return CRC ^ 0xFFFFFFFFU; } + #else + #include -uint32_t llvm::crc32(uint32_t CRC, StringRef S) { - return ::crc32(CRC, (const Bytef *)S.data(), S.size()); +uint32_t llvm::crc32(uint32_t CRC, ArrayRef Data) { + return ::crc32(CRC, (const Bytef *)Data.data(), Data.size()); } + #endif + +uint32_t llvm::crc32(ArrayRef Data) { return crc32(0, Data); } + +void JamCRC::update(ArrayRef Data) { + CRC ^= 0xFFFFFFFFU; // Undo CRC-32 Init. + CRC = crc32(CRC, Data); + CRC ^= 0xFFFFFFFFU; // Undo CRC-32 XorOut. +} diff --git a/lib/Support/CachePruning.cpp b/lib/Support/CachePruning.cpp index 9813eec0e43..7a2f6c53435 100644 --- a/lib/Support/CachePruning.cpp +++ b/lib/Support/CachePruning.cpp @@ -45,7 +45,7 @@ struct FileInfo { /// interval option. static void writeTimestampFile(StringRef TimestampFile) { std::error_code EC; - raw_fd_ostream Out(TimestampFile.str(), EC, sys::fs::F_None); + raw_fd_ostream Out(TimestampFile.str(), EC, sys::fs::OF_None); } static Expected parseDuration(StringRef Duration) { diff --git a/lib/Support/CodeGenCoverage.cpp b/lib/Support/CodeGenCoverage.cpp index f39eb7533b4..2db4193ce38 100644 --- a/lib/Support/CodeGenCoverage.cpp +++ b/lib/Support/CodeGenCoverage.cpp @@ -101,9 +101,9 @@ bool CodeGenCoverage::emit(StringRef CoveragePrefix, std::string CoverageFilename = (CoveragePrefix + Pid).str(); std::error_code EC; - sys::fs::OpenFlags OpenFlags = sys::fs::F_Append; + sys::fs::OpenFlags OpenFlags = sys::fs::OF_Append; std::unique_ptr CoverageFile = - llvm::make_unique(CoverageFilename, EC, OpenFlags); + std::make_unique(CoverageFilename, EC, OpenFlags); if (EC) return false; diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 25510fa58ff..620f7ffd4c9 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -692,7 +692,7 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName, return false; } -static bool ProvidePositionalOption(Option *Handler, StringRef Arg, int i) { +bool llvm::cl::ProvidePositionalOption(Option *Handler, StringRef Arg, int i) { int Dummy = i; return ProvideOption(Handler, Handler->ArgStr, Arg, 0, nullptr, Dummy); } diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp index c2459256f8f..9d13fce9cc5 100644 --- a/lib/Support/CrashRecoveryContext.cpp +++ b/lib/Support/CrashRecoveryContext.cpp @@ -10,8 +10,8 @@ #include "llvm/Config/llvm-config.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Mutex.h" #include "llvm/Support/ThreadLocal.h" +#include #include using namespace llvm; @@ -71,7 +71,7 @@ public: } -static ManagedStatic gCrashRecoveryContextMutex; +static ManagedStatic gCrashRecoveryContextMutex; static bool gCrashRecoveryEnabled = false; static ManagedStatic> @@ -116,7 +116,7 @@ CrashRecoveryContext *CrashRecoveryContext::GetCurrent() { } void CrashRecoveryContext::Enable() { - sys::ScopedLock L(*gCrashRecoveryContextMutex); + std::lock_guard L(*gCrashRecoveryContextMutex); // FIXME: Shouldn't this be a refcount or something? if (gCrashRecoveryEnabled) return; @@ -125,7 +125,7 @@ void CrashRecoveryContext::Enable() { } void CrashRecoveryContext::Disable() { - sys::ScopedLock L(*gCrashRecoveryContextMutex); + std::lock_guard L(*gCrashRecoveryContextMutex); if (!gCrashRecoveryEnabled) return; gCrashRecoveryEnabled = false; diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp index 673bbb4d06f..a98297cdb35 100644 --- a/lib/Support/DataExtractor.cpp +++ b/lib/Support/DataExtractor.cpp @@ -7,111 +7,137 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Host.h" -#include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/SwapByteOrder.h" + using namespace llvm; -template -static T getU(uint32_t *offset_ptr, const DataExtractor *de, - bool isLittleEndian, const char *Data) { - T val = 0; - uint32_t offset = *offset_ptr; - if (de->isValidOffsetForDataOfSize(offset, sizeof(val))) { - std::memcpy(&val, &Data[offset], sizeof(val)); - if (sys::IsLittleEndianHost != isLittleEndian) - sys::swapByteOrder(val); +static void unexpectedEndReached(Error *E) { + if (E) + *E = createStringError(errc::illegal_byte_sequence, + "unexpected end of data"); +} - // Advance the offset - *offset_ptr += sizeof(val); +static bool isError(Error *E) { return E && *E; } + +template +static T getU(uint64_t *offset_ptr, const DataExtractor *de, + bool isLittleEndian, const char *Data, llvm::Error *Err) { + ErrorAsOutParameter ErrAsOut(Err); + T val = 0; + if (isError(Err)) + return val; + + uint64_t offset = *offset_ptr; + if (!de->isValidOffsetForDataOfSize(offset, sizeof(T))) { + unexpectedEndReached(Err); + return val; } + std::memcpy(&val, &Data[offset], sizeof(val)); + if (sys::IsLittleEndianHost != isLittleEndian) + sys::swapByteOrder(val); + + // Advance the offset + *offset_ptr += sizeof(val); return val; } template -static T *getUs(uint32_t *offset_ptr, T *dst, uint32_t count, - const DataExtractor *de, bool isLittleEndian, const char *Data){ - uint32_t offset = *offset_ptr; +static T *getUs(uint64_t *offset_ptr, T *dst, uint32_t count, + const DataExtractor *de, bool isLittleEndian, const char *Data, + llvm::Error *Err) { + ErrorAsOutParameter ErrAsOut(Err); + if (isError(Err)) + return nullptr; - if (count > 0 && de->isValidOffsetForDataOfSize(offset, sizeof(*dst)*count)) { - for (T *value_ptr = dst, *end = dst + count; value_ptr != end; - ++value_ptr, offset += sizeof(*dst)) - *value_ptr = getU(offset_ptr, de, isLittleEndian, Data); - // Advance the offset - *offset_ptr = offset; - // Return a non-NULL pointer to the converted data as an indicator of - // success - return dst; + uint64_t offset = *offset_ptr; + + if (!de->isValidOffsetForDataOfSize(offset, sizeof(*dst) * count)) { + unexpectedEndReached(Err); + return nullptr; } - return nullptr; + for (T *value_ptr = dst, *end = dst + count; value_ptr != end; + ++value_ptr, offset += sizeof(*dst)) + *value_ptr = getU(offset_ptr, de, isLittleEndian, Data, Err); + // Advance the offset + *offset_ptr = offset; + // Return a non-NULL pointer to the converted data as an indicator of + // success + return dst; } -uint8_t DataExtractor::getU8(uint32_t *offset_ptr) const { - return getU(offset_ptr, this, IsLittleEndian, Data.data()); +uint8_t DataExtractor::getU8(uint64_t *offset_ptr, llvm::Error *Err) const { + return getU(offset_ptr, this, IsLittleEndian, Data.data(), Err); } uint8_t * -DataExtractor::getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const { +DataExtractor::getU8(uint64_t *offset_ptr, uint8_t *dst, uint32_t count) const { return getUs(offset_ptr, dst, count, this, IsLittleEndian, - Data.data()); + Data.data(), nullptr); } - -uint16_t DataExtractor::getU16(uint32_t *offset_ptr) const { - return getU(offset_ptr, this, IsLittleEndian, Data.data()); +uint8_t *DataExtractor::getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const { + return getUs(&C.Offset, Dst, Count, this, IsLittleEndian, + Data.data(), &C.Err); } -uint16_t *DataExtractor::getU16(uint32_t *offset_ptr, uint16_t *dst, +uint16_t DataExtractor::getU16(uint64_t *offset_ptr, llvm::Error *Err) const { + return getU(offset_ptr, this, IsLittleEndian, Data.data(), Err); +} + +uint16_t *DataExtractor::getU16(uint64_t *offset_ptr, uint16_t *dst, uint32_t count) const { return getUs(offset_ptr, dst, count, this, IsLittleEndian, - Data.data()); + Data.data(), nullptr); } -uint32_t DataExtractor::getU24(uint32_t *offset_ptr) const { +uint32_t DataExtractor::getU24(uint64_t *offset_ptr) const { uint24_t ExtractedVal = - getU(offset_ptr, this, IsLittleEndian, Data.data()); + getU(offset_ptr, this, IsLittleEndian, Data.data(), nullptr); // The 3 bytes are in the correct byte order for the host. return ExtractedVal.getAsUint32(sys::IsLittleEndianHost); } -uint32_t DataExtractor::getU32(uint32_t *offset_ptr) const { - return getU(offset_ptr, this, IsLittleEndian, Data.data()); +uint32_t DataExtractor::getU32(uint64_t *offset_ptr, llvm::Error *Err) const { + return getU(offset_ptr, this, IsLittleEndian, Data.data(), Err); } -uint32_t *DataExtractor::getU32(uint32_t *offset_ptr, uint32_t *dst, +uint32_t *DataExtractor::getU32(uint64_t *offset_ptr, uint32_t *dst, uint32_t count) const { return getUs(offset_ptr, dst, count, this, IsLittleEndian, - Data.data()); + Data.data(), nullptr); } -uint64_t DataExtractor::getU64(uint32_t *offset_ptr) const { - return getU(offset_ptr, this, IsLittleEndian, Data.data()); +uint64_t DataExtractor::getU64(uint64_t *offset_ptr, llvm::Error *Err) const { + return getU(offset_ptr, this, IsLittleEndian, Data.data(), Err); } -uint64_t *DataExtractor::getU64(uint32_t *offset_ptr, uint64_t *dst, +uint64_t *DataExtractor::getU64(uint64_t *offset_ptr, uint64_t *dst, uint32_t count) const { return getUs(offset_ptr, dst, count, this, IsLittleEndian, - Data.data()); + Data.data(), nullptr); } -uint64_t -DataExtractor::getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const { +uint64_t DataExtractor::getUnsigned(uint64_t *offset_ptr, uint32_t byte_size, + llvm::Error *Err) const { switch (byte_size) { case 1: - return getU8(offset_ptr); + return getU8(offset_ptr, Err); case 2: - return getU16(offset_ptr); + return getU16(offset_ptr, Err); case 4: - return getU32(offset_ptr); + return getU32(offset_ptr, Err); case 8: - return getU64(offset_ptr); + return getU64(offset_ptr, Err); } llvm_unreachable("getUnsigned unhandled case!"); } int64_t -DataExtractor::getSigned(uint32_t *offset_ptr, uint32_t byte_size) const { +DataExtractor::getSigned(uint64_t *offset_ptr, uint32_t byte_size) const { switch (byte_size) { case 1: return (int8_t)getU8(offset_ptr); @@ -125,8 +151,8 @@ DataExtractor::getSigned(uint32_t *offset_ptr, uint32_t byte_size) const { llvm_unreachable("getSigned unhandled case!"); } -const char *DataExtractor::getCStr(uint32_t *offset_ptr) const { - uint32_t offset = *offset_ptr; +const char *DataExtractor::getCStr(uint64_t *offset_ptr) const { + uint64_t offset = *offset_ptr; StringRef::size_type pos = Data.find('\0', offset); if (pos != StringRef::npos) { *offset_ptr = pos + 1; @@ -135,31 +161,38 @@ const char *DataExtractor::getCStr(uint32_t *offset_ptr) const { return nullptr; } -StringRef DataExtractor::getCStrRef(uint32_t *OffsetPtr) const { - uint32_t Start = *OffsetPtr; +StringRef DataExtractor::getCStrRef(uint64_t *offset_ptr) const { + uint64_t Start = *offset_ptr; StringRef::size_type Pos = Data.find('\0', Start); if (Pos != StringRef::npos) { - *OffsetPtr = Pos + 1; + *offset_ptr = Pos + 1; return StringRef(Data.data() + Start, Pos - Start); } return StringRef(); } -uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const { +uint64_t DataExtractor::getULEB128(uint64_t *offset_ptr, + llvm::Error *Err) const { assert(*offset_ptr <= Data.size()); + ErrorAsOutParameter ErrAsOut(Err); + if (isError(Err)) + return 0; const char *error; unsigned bytes_read; uint64_t result = decodeULEB128( reinterpret_cast(Data.data() + *offset_ptr), &bytes_read, reinterpret_cast(Data.data() + Data.size()), &error); - if (error) + if (error) { + if (Err) + *Err = createStringError(errc::illegal_byte_sequence, error); return 0; + } *offset_ptr += bytes_read; return result; } -int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const { +int64_t DataExtractor::getSLEB128(uint64_t *offset_ptr) const { assert(*offset_ptr <= Data.size()); const char *error; @@ -172,3 +205,14 @@ int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const { *offset_ptr += bytes_read; return result; } + +void DataExtractor::skip(Cursor &C, uint64_t Length) const { + ErrorAsOutParameter ErrAsOut(&C.Err); + if (isError(&C.Err)) + return; + + if (isValidOffsetForDataOfSize(C.Offset, Length)) + C.Offset += Length; + else + unexpectedEndReached(&C.Err); +} diff --git a/lib/Support/Error.cpp b/lib/Support/Error.cpp index 72bc08af2dd..9ea08c37478 100644 --- a/lib/Support/Error.cpp +++ b/lib/Support/Error.cpp @@ -87,7 +87,7 @@ std::error_code FileError::convertToErrorCode() const { Error errorCodeToError(std::error_code EC) { if (!EC) return Error::success(); - return Error(llvm::make_unique(ECError(EC))); + return Error(std::make_unique(ECError(EC))); } std::error_code errorToErrorCode(Error Err) { @@ -167,18 +167,3 @@ void LLVMDisposeErrorMessage(char *ErrMsg) { delete[] ErrMsg; } LLVMErrorTypeId LLVMGetStringErrorTypeId() { return reinterpret_cast(&StringError::ID); } - -#ifndef _MSC_VER -namespace llvm { - -// One of these two variables will be referenced by a symbol defined in -// llvm-config.h. We provide a link-time (or load time for DSO) failure when -// there is a mismatch in the build configuration of the API client and LLVM. -#if LLVM_ENABLE_ABI_BREAKING_CHECKS -int EnableABIBreakingChecks; -#else -int DisableABIBreakingChecks; -#endif - -} // end namespace llvm -#endif diff --git a/lib/Support/FileCheck.cpp b/lib/Support/FileCheck.cpp index e0f17787bdf..841e406a7b6 100644 --- a/lib/Support/FileCheck.cpp +++ b/lib/Support/FileCheck.cpp @@ -14,31 +14,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/FileCheck.h" +#include "FileCheckImpl.h" #include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/FormatVariadic.h" #include #include -#include #include #include using namespace llvm; -void FileCheckNumericVariable::setValue(uint64_t NewValue) { - assert(!Value && "Overwriting numeric variable's value is not allowed"); - Value = NewValue; -} - -void FileCheckNumericVariable::clearValue() { - if (!Value) - return; - Value = None; -} - Expected FileCheckNumericVariableUse::eval() const { Optional Value = NumericVariable->getValue(); if (Value) return *Value; + return make_error(Name); } @@ -109,7 +100,7 @@ FileCheckPattern::parseVariable(StringRef &Str, const SourceMgr &SM) { // StringRef holding all characters considered as horizontal whitespaces by // FileCheck input canonicalization. -StringRef SpaceChars = " \t"; +constexpr StringLiteral SpaceChars = " \t"; // Parsing helper function that strips the first character in S and returns it. static char popFront(StringRef &S) { @@ -159,7 +150,9 @@ FileCheckPattern::parseNumericVariableDefinition( Expected> FileCheckPattern::parseNumericVariableUse(StringRef Name, bool IsPseudo, - const SourceMgr &SM) const { + Optional LineNumber, + FileCheckPatternContext *Context, + const SourceMgr &SM) { if (IsPseudo && !Name.equals("@LINE")) return FileCheckErrorDiagnostic::get( SM, Name, "invalid pseudo numeric variable '" + Name + "'"); @@ -185,21 +178,25 @@ FileCheckPattern::parseNumericVariableUse(StringRef Name, bool IsPseudo, if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber) return FileCheckErrorDiagnostic::get( SM, Name, - "numeric variable '" + Name + "' defined on the same line as used"); + "numeric variable '" + Name + + "' defined earlier in the same CHECK directive"); - return llvm::make_unique(Name, NumericVariable); + return std::make_unique(Name, NumericVariable); } Expected> FileCheckPattern::parseNumericOperand(StringRef &Expr, AllowedOperand AO, - const SourceMgr &SM) const { + Optional LineNumber, + FileCheckPatternContext *Context, + const SourceMgr &SM) { if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) { // Try to parse as a numeric variable use. Expected ParseVarResult = parseVariable(Expr, SM); if (ParseVarResult) return parseNumericVariableUse(ParseVarResult->Name, - ParseVarResult->IsPseudo, SM); + ParseVarResult->IsPseudo, LineNumber, + Context, SM); if (AO == AllowedOperand::LineVar) return ParseVarResult.takeError(); // Ignore the error and retry parsing as a literal. @@ -209,7 +206,7 @@ FileCheckPattern::parseNumericOperand(StringRef &Expr, AllowedOperand AO, // Otherwise, parse it as a literal. uint64_t LiteralValue; if (!Expr.consumeInteger(/*Radix=*/10, LiteralValue)) - return llvm::make_unique(LiteralValue); + return std::make_unique(LiteralValue); return FileCheckErrorDiagnostic::get(SM, Expr, "invalid operand format '" + Expr + "'"); @@ -223,10 +220,10 @@ static uint64_t sub(uint64_t LeftOp, uint64_t RightOp) { return LeftOp - RightOp; } -Expected> -FileCheckPattern::parseBinop(StringRef &Expr, - std::unique_ptr LeftOp, - bool IsLegacyLineExpr, const SourceMgr &SM) const { +Expected> FileCheckPattern::parseBinop( + StringRef &Expr, std::unique_ptr LeftOp, + bool IsLegacyLineExpr, Optional LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM) { Expr = Expr.ltrim(SpaceChars); if (Expr.empty()) return std::move(LeftOp); @@ -257,12 +254,12 @@ FileCheckPattern::parseBinop(StringRef &Expr, AllowedOperand AO = IsLegacyLineExpr ? AllowedOperand::Literal : AllowedOperand::Any; Expected> RightOpResult = - parseNumericOperand(Expr, AO, SM); + parseNumericOperand(Expr, AO, LineNumber, Context, SM); if (!RightOpResult) return RightOpResult; Expr = Expr.ltrim(SpaceChars); - return llvm::make_unique(EvalBinop, std::move(LeftOp), + return std::make_unique(EvalBinop, std::move(LeftOp), std::move(*RightOpResult)); } @@ -270,56 +267,60 @@ Expected> FileCheckPattern::parseNumericSubstitutionBlock( StringRef Expr, Optional &DefinedNumericVariable, - bool IsLegacyLineExpr, const SourceMgr &SM) const { - // Parse the numeric variable definition. + bool IsLegacyLineExpr, Optional LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM) { + std::unique_ptr ExpressionAST = nullptr; + StringRef DefExpr = StringRef(); DefinedNumericVariable = None; + // Save variable definition expression if any. size_t DefEnd = Expr.find(':'); if (DefEnd != StringRef::npos) { - StringRef DefExpr = Expr.substr(0, DefEnd); - StringRef UseExpr = Expr.substr(DefEnd + 1); - - UseExpr = UseExpr.ltrim(SpaceChars); - if (!UseExpr.empty()) - return FileCheckErrorDiagnostic::get( - SM, UseExpr, - "unexpected string after variable definition: '" + UseExpr + "'"); - - DefExpr = DefExpr.ltrim(SpaceChars); - Expected ParseResult = - parseNumericVariableDefinition(DefExpr, Context, LineNumber, SM); - if (!ParseResult) - return ParseResult.takeError(); - DefinedNumericVariable = *ParseResult; - - return nullptr; + DefExpr = Expr.substr(0, DefEnd); + Expr = Expr.substr(DefEnd + 1); } // Parse the expression itself. Expr = Expr.ltrim(SpaceChars); - // The first operand in a legacy @LINE expression is always the @LINE pseudo - // variable. - AllowedOperand AO = - IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any; - Expected> ParseResult = - parseNumericOperand(Expr, AO, SM); - while (ParseResult && !Expr.empty()) { - ParseResult = - parseBinop(Expr, std::move(*ParseResult), IsLegacyLineExpr, SM); - // Legacy @LINE expressions only allow 2 operands. - if (ParseResult && IsLegacyLineExpr && !Expr.empty()) - return FileCheckErrorDiagnostic::get( - SM, Expr, - "unexpected characters at end of expression '" + Expr + "'"); + if (!Expr.empty()) { + // The first operand in a legacy @LINE expression is always the @LINE + // pseudo variable. + AllowedOperand AO = + IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any; + Expected> ParseResult = + parseNumericOperand(Expr, AO, LineNumber, Context, SM); + while (ParseResult && !Expr.empty()) { + ParseResult = parseBinop(Expr, std::move(*ParseResult), IsLegacyLineExpr, + LineNumber, Context, SM); + // Legacy @LINE expressions only allow 2 operands. + if (ParseResult && IsLegacyLineExpr && !Expr.empty()) + return FileCheckErrorDiagnostic::get( + SM, Expr, + "unexpected characters at end of expression '" + Expr + "'"); + } + if (!ParseResult) + return ParseResult; + ExpressionAST = std::move(*ParseResult); } - if (!ParseResult) - return ParseResult; - return std::move(*ParseResult); + + // Parse the numeric variable definition. + if (DefEnd != StringRef::npos) { + DefExpr = DefExpr.ltrim(SpaceChars); + Expected ParseResult = + parseNumericVariableDefinition(DefExpr, Context, LineNumber, SM); + + if (!ParseResult) + return ParseResult.takeError(); + DefinedNumericVariable = *ParseResult; + } + + return std::move(ExpressionAST); } bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, const FileCheckRequest &Req) { bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot; + IgnoreCase = Req.IgnoreCase; PatternLoc = SMLoc::getFromPointer(PatternStr.data()); @@ -396,14 +397,15 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, continue; } - // String and numeric substitution blocks. String substitution blocks come + // String and numeric substitution blocks. Pattern substitution blocks come // in two forms: [[foo:.*]] and [[foo]]. The former matches .* (or some // other regex) and assigns it to the string variable 'foo'. The latter - // substitutes foo's value. Numeric substitution blocks work the same way - // as string ones, but start with a '#' sign after the double brackets. - // Both string and numeric variable names must satisfy the regular - // expression "[a-zA-Z_][0-9a-zA-Z_]*" to be valid, as this helps catch - // some common errors. + // substitutes foo's value. Numeric substitution blocks recognize the same + // form as string ones, but start with a '#' sign after the double + // brackets. They also accept a combined form which sets a numeric variable + // to the evaluation of an expression. Both string and numeric variable + // names must satisfy the regular expression "[a-zA-Z_][0-9a-zA-Z_]*" to be + // valid, as this helps catch some common errors. if (PatternStr.startswith("[[")) { StringRef UnparsedPatternStr = PatternStr.substr(2); // Find the closing bracket pair ending the match. End is going to be an @@ -424,6 +426,7 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, PatternStr = UnparsedPatternStr.substr(End + 2); bool IsDefinition = false; + bool SubstNeeded = false; // Whether the substitution block is a legacy use of @LINE with string // substitution block syntax. bool IsLegacyLineExpr = false; @@ -454,6 +457,7 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, bool IsPseudo = ParseVarResult->IsPseudo; IsDefinition = (VarEndIdx != StringRef::npos); + SubstNeeded = !IsDefinition; if (IsDefinition) { if ((IsPseudo || !MatchStr.consume_front(":"))) { SM.PrintMessage(SMLoc::getFromPointer(Name.data()), @@ -488,22 +492,61 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, if (IsNumBlock) { Expected> ParseResult = parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable, - IsLegacyLineExpr, SM); + IsLegacyLineExpr, LineNumber, Context, + SM); if (!ParseResult) { logAllUnhandledErrors(ParseResult.takeError(), errs()); return true; } ExpressionAST = std::move(*ParseResult); + SubstNeeded = ExpressionAST != nullptr; if (DefinedNumericVariable) { IsDefinition = true; DefName = (*DefinedNumericVariable)->getName(); - MatchRegexp = StringRef("[0-9]+"); - } else + } + if (SubstNeeded) SubstStr = MatchStr; + else + MatchRegexp = "[0-9]+"; } + // Handle variable definition: [[:(...)]] and [[#(...):(...)]]. + if (IsDefinition) { + RegExStr += '('; + ++SubstInsertIdx; + + if (IsNumBlock) { + FileCheckNumericVariableMatch NumericVariableDefinition = { + *DefinedNumericVariable, CurParen}; + NumericVariableDefs[DefName] = NumericVariableDefinition; + // This store is done here rather than in match() to allow + // parseNumericVariableUse() to get the pointer to the class instance + // of the right variable definition corresponding to a given numeric + // variable use. + Context->GlobalNumericVariableTable[DefName] = + *DefinedNumericVariable; + } else { + VariableDefs[DefName] = CurParen; + // Mark string variable as defined to detect collisions between + // string and numeric variables in parseNumericVariableUse() and + // defineCmdlineVariables() when the latter is created later than the + // former. We cannot reuse GlobalVariableTable for this by populating + // it with an empty string since we would then lose the ability to + // detect the use of an undefined variable in match(). + Context->DefinedVariableTable[DefName] = true; + } + + ++CurParen; + } + + if (!MatchRegexp.empty() && AddRegExToRegEx(MatchRegexp, CurParen, SM)) + return true; + + if (IsDefinition) + RegExStr += ')'; + // Handle substitutions: [[foo]] and [[#]]. - if (!IsDefinition) { + if (SubstNeeded) { // Handle substitution of string variables that were defined earlier on // the same line by emitting a backreference. Expressions do not // support substituting a numeric variable defined on the same line. @@ -526,37 +569,7 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, : Context->makeStringSubstitution(SubstStr, SubstInsertIdx); Substitutions.push_back(Substitution); } - continue; } - - // Handle variable definitions: [[:(...)]] and - // [[#(...):(...)]]. - if (IsNumBlock) { - FileCheckNumericVariableMatch NumericVariableDefinition = { - *DefinedNumericVariable, CurParen}; - NumericVariableDefs[DefName] = NumericVariableDefinition; - // This store is done here rather than in match() to allow - // parseNumericVariableUse() to get the pointer to the class instance - // of the right variable definition corresponding to a given numeric - // variable use. - Context->GlobalNumericVariableTable[DefName] = *DefinedNumericVariable; - } else { - VariableDefs[DefName] = CurParen; - // Mark the string variable as defined to detect collisions between - // string and numeric variables in parseNumericVariableUse() and - // DefineCmdlineVariables() when the latter is created later than the - // former. We cannot reuse GlobalVariableTable for this by populating - // it with an empty string since we would then lose the ability to - // detect the use of an undefined variable in match(). - Context->DefinedVariableTable[DefName] = true; - } - RegExStr += '('; - ++CurParen; - - if (AddRegExToRegEx(MatchRegexp, CurParen, SM)) - return true; - - RegExStr += ')'; } // Handle fixed string matches. @@ -607,7 +620,8 @@ Expected FileCheckPattern::match(StringRef Buffer, size_t &MatchLen, // If this is a fixed string pattern, just match it now. if (!FixedStr.empty()) { MatchLen = FixedStr.size(); - size_t Pos = Buffer.find(FixedStr); + size_t Pos = IgnoreCase ? Buffer.find_lower(FixedStr) + : Buffer.find(FixedStr); if (Pos == StringRef::npos) return make_error(); return Pos; @@ -631,10 +645,8 @@ Expected FileCheckPattern::match(StringRef Buffer, size_t &MatchLen, for (const auto &Substitution : Substitutions) { // Substitute and check for failure (e.g. use of undefined variable). Expected Value = Substitution->getResult(); - if (!Value) { - Context->LineVariable->clearValue(); + if (!Value) return Value.takeError(); - } // Plop it into the regex at the adjusted offset. TmpStr.insert(TmpStr.begin() + Substitution->getIndex() + InsertOffset, @@ -644,11 +656,13 @@ Expected FileCheckPattern::match(StringRef Buffer, size_t &MatchLen, // Match the newly constructed regex. RegExToMatch = TmpStr; - Context->LineVariable->clearValue(); } SmallVector MatchInfo; - if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) + unsigned int Flags = Regex::Newline; + if (IgnoreCase) + Flags |= Regex::IgnoreCase; + if (!Regex(RegExToMatch, Flags).match(Buffer, &MatchInfo)) return make_error(); // Successful regex match. @@ -824,7 +838,7 @@ template FileCheckNumericVariable * FileCheckPatternContext::makeNumericVariable(Types... args) { NumericVariables.push_back( - llvm::make_unique(args...)); + std::make_unique(args...)); return NumericVariables.back().get(); } @@ -832,14 +846,14 @@ FileCheckSubstitution * FileCheckPatternContext::makeStringSubstitution(StringRef VarName, size_t InsertIdx) { Substitutions.push_back( - llvm::make_unique(this, VarName, InsertIdx)); + std::make_unique(this, VarName, InsertIdx)); return Substitutions.back().get(); } FileCheckSubstitution *FileCheckPatternContext::makeNumericSubstitution( StringRef ExpressionStr, std::unique_ptr ExpressionAST, size_t InsertIdx) { - Substitutions.push_back(llvm::make_unique( + Substitutions.push_back(std::make_unique( this, ExpressionStr, std::move(ExpressionAST), InsertIdx)); return Substitutions.back().get(); } @@ -1108,16 +1122,22 @@ void FileCheckPatternContext::createLineVariable() { GlobalNumericVariableTable[LineName] = LineVariable; } -bool FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, - std::vector &CheckStrings) { +FileCheck::FileCheck(FileCheckRequest Req) + : Req(Req), PatternContext(std::make_unique()), + CheckStrings(std::make_unique>()) {} + +FileCheck::~FileCheck() = default; + +bool FileCheck::readCheckFile(SourceMgr &SM, StringRef Buffer, + Regex &PrefixRE) { Error DefineError = - PatternContext.defineCmdlineVariables(Req.GlobalDefines, SM); + PatternContext->defineCmdlineVariables(Req.GlobalDefines, SM); if (DefineError) { logAllUnhandledErrors(std::move(DefineError), errs()); return true; } - PatternContext.createLineVariable(); + PatternContext->createLineVariable(); std::vector ImplicitNegativeChecks; for (const auto &PatternString : Req.ImplicitCheckNot) { @@ -1133,7 +1153,7 @@ bool FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); ImplicitNegativeChecks.push_back( - FileCheckPattern(Check::CheckNot, &PatternContext)); + FileCheckPattern(Check::CheckNot, PatternContext.get())); ImplicitNegativeChecks.back().parsePattern(PatternInBuffer, "IMPLICIT-CHECK", SM, Req); } @@ -1196,7 +1216,7 @@ bool FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); // Parse the pattern. - FileCheckPattern P(CheckTy, &PatternContext, LineNumber); + FileCheckPattern P(CheckTy, PatternContext.get(), LineNumber); if (P.parsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, Req)) return true; @@ -1214,7 +1234,7 @@ bool FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them. if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame || CheckTy == Check::CheckEmpty) && - CheckStrings.empty()) { + CheckStrings->empty()) { StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME"; @@ -1232,21 +1252,21 @@ bool FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE, } // Okay, add the string we captured to the output vector and move on. - CheckStrings.emplace_back(P, UsedPrefix, PatternLoc); - std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); + CheckStrings->emplace_back(P, UsedPrefix, PatternLoc); + std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); DagNotMatches = ImplicitNegativeChecks; } // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first // prefix as a filler for the error message. if (!DagNotMatches.empty()) { - CheckStrings.emplace_back( - FileCheckPattern(Check::CheckEOF, &PatternContext, LineNumber + 1), + CheckStrings->emplace_back( + FileCheckPattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1), *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data())); - std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); + std::swap(DagNotMatches, CheckStrings->back().DagNotStrings); } - if (CheckStrings.empty()) { + if (CheckStrings->empty()) { errs() << "error: no check strings found with prefix" << (Req.CheckPrefixes.size() > 1 ? "es " : " "); auto I = Req.CheckPrefixes.begin(); @@ -1704,7 +1724,7 @@ FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, // A check prefix must contain only alphanumeric, hyphens and underscores. static bool ValidateCheckPrefix(StringRef CheckPrefix) { - Regex Validator("^[a-zA-Z0-9_-]*$"); + static const Regex Validator("^[a-zA-Z0-9_-]*$"); return Validator.match(CheckPrefix); } @@ -1759,11 +1779,32 @@ Error FileCheckPatternContext::defineCmdlineVariables( unsigned I = 0; Error Errs = Error::success(); std::string CmdlineDefsDiag; - StringRef Prefix1 = "Global define #"; - StringRef Prefix2 = ": "; - for (StringRef CmdlineDef : CmdlineDefines) - CmdlineDefsDiag += - (Prefix1 + Twine(++I) + Prefix2 + CmdlineDef + "\n").str(); + SmallVector, 4> CmdlineDefsIndices; + for (StringRef CmdlineDef : CmdlineDefines) { + std::string DefPrefix = ("Global define #" + Twine(++I) + ": ").str(); + size_t EqIdx = CmdlineDef.find('='); + if (EqIdx == StringRef::npos) { + CmdlineDefsIndices.push_back(std::make_pair(CmdlineDefsDiag.size(), 0)); + continue; + } + // Numeric variable definition. + if (CmdlineDef[0] == '#') { + // Append a copy of the command-line definition adapted to use the same + // format as in the input file to be able to reuse + // parseNumericSubstitutionBlock. + CmdlineDefsDiag += (DefPrefix + CmdlineDef + " (parsed as: [[").str(); + std::string SubstitutionStr = CmdlineDef; + SubstitutionStr[EqIdx] = ':'; + CmdlineDefsIndices.push_back( + std::make_pair(CmdlineDefsDiag.size(), SubstitutionStr.size())); + CmdlineDefsDiag += (SubstitutionStr + Twine("]])\n")).str(); + } else { + CmdlineDefsDiag += DefPrefix; + CmdlineDefsIndices.push_back( + std::make_pair(CmdlineDefsDiag.size(), CmdlineDef.size())); + CmdlineDefsDiag += (CmdlineDef + "\n").str(); + } + } // Create a buffer with fake command line content in order to display // parsing diagnostic with location information and point to the @@ -1773,14 +1814,10 @@ Error FileCheckPatternContext::defineCmdlineVariables( StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer(); SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc()); - SmallVector CmdlineDefsDiagVec; - CmdlineDefsDiagRef.split(CmdlineDefsDiagVec, '\n', -1 /*MaxSplit*/, - false /*KeepEmpty*/); - for (StringRef CmdlineDefDiag : CmdlineDefsDiagVec) { - unsigned DefStart = CmdlineDefDiag.find(Prefix2) + Prefix2.size(); - StringRef CmdlineDef = CmdlineDefDiag.substr(DefStart); - size_t EqIdx = CmdlineDef.find('='); - if (EqIdx == StringRef::npos) { + for (std::pair CmdlineDefIndices : CmdlineDefsIndices) { + StringRef CmdlineDef = CmdlineDefsDiagRef.substr(CmdlineDefIndices.first, + CmdlineDefIndices.second); + if (CmdlineDef.empty()) { Errs = joinErrors( std::move(Errs), FileCheckErrorDiagnostic::get( @@ -1790,31 +1827,35 @@ Error FileCheckPatternContext::defineCmdlineVariables( // Numeric variable definition. if (CmdlineDef[0] == '#') { - StringRef CmdlineName = CmdlineDef.substr(1, EqIdx - 1); - Expected ParseResult = - FileCheckPattern::parseNumericVariableDefinition(CmdlineName, this, - None, SM); - if (!ParseResult) { - Errs = joinErrors(std::move(Errs), ParseResult.takeError()); + // Now parse the definition both to check that the syntax is correct and + // to create the necessary class instance. + StringRef CmdlineDefExpr = CmdlineDef.substr(1); + Optional DefinedNumericVariable; + Expected> ExpressionASTResult = + FileCheckPattern::parseNumericSubstitutionBlock( + CmdlineDefExpr, DefinedNumericVariable, false, None, this, SM); + if (!ExpressionASTResult) { + Errs = joinErrors(std::move(Errs), ExpressionASTResult.takeError()); + continue; + } + std::unique_ptr ExpressionAST = + std::move(*ExpressionASTResult); + // Now evaluate the expression whose value this variable should be set + // to, since the expression of a command-line variable definition should + // only use variables defined earlier on the command-line. If not, this + // is an error and we report it. + Expected Value = ExpressionAST->eval(); + if (!Value) { + Errs = joinErrors(std::move(Errs), Value.takeError()); continue; } - StringRef CmdlineVal = CmdlineDef.substr(EqIdx + 1); - uint64_t Val; - if (CmdlineVal.getAsInteger(10, Val)) { - Errs = joinErrors(std::move(Errs), - FileCheckErrorDiagnostic::get( - SM, CmdlineVal, - "invalid value in numeric variable definition '" + - CmdlineVal + "'")); - continue; - } - FileCheckNumericVariable *DefinedNumericVariable = *ParseResult; - DefinedNumericVariable->setValue(Val); + assert(DefinedNumericVariable && "No variable defined"); + (*DefinedNumericVariable)->setValue(*Value); // Record this variable definition. - GlobalNumericVariableTable[DefinedNumericVariable->getName()] = - DefinedNumericVariable; + GlobalNumericVariableTable[(*DefinedNumericVariable)->getName()] = + *DefinedNumericVariable; } else { // String variable definition. std::pair CmdlineNameVal = CmdlineDef.split('='); @@ -1851,7 +1892,7 @@ Error FileCheckPatternContext::defineCmdlineVariables( } GlobalVariableTable.insert(CmdlineNameVal); // Mark the string variable as defined to detect collisions between - // string and numeric variables in DefineCmdlineVariables when the latter + // string and numeric variables in defineCmdlineVariables when the latter // is created later than the former. We cannot reuse GlobalVariableTable // for this by populating it with an empty string since we would then // lose the ability to detect the use of an undefined variable in @@ -1887,18 +1928,17 @@ void FileCheckPatternContext::clearLocalVars() { GlobalNumericVariableTable.erase(Var); } -bool FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer, - ArrayRef CheckStrings, +bool FileCheck::checkInput(SourceMgr &SM, StringRef Buffer, std::vector *Diags) { bool ChecksFailed = false; - unsigned i = 0, j = 0, e = CheckStrings.size(); + unsigned i = 0, j = 0, e = CheckStrings->size(); while (true) { StringRef CheckRegion; if (j == e) { CheckRegion = Buffer; } else { - const FileCheckString &CheckLabelStr = CheckStrings[j]; + const FileCheckString &CheckLabelStr = (*CheckStrings)[j]; if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) { ++j; continue; @@ -1921,10 +1961,10 @@ bool FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer, // CHECK-LABEL and it would clear variables defined on the command-line // before they get used. if (i != 0 && Req.EnableVarScope) - PatternContext.clearLocalVars(); + PatternContext->clearLocalVars(); for (; i != j; ++i) { - const FileCheckString &CheckStr = CheckStrings[i]; + const FileCheckString &CheckStr = (*CheckStrings)[i]; // Check each string within the scanned region, including a second check // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) diff --git a/lib/Support/FileCheckImpl.h b/lib/Support/FileCheckImpl.h new file mode 100644 index 00000000000..06ce8301cec --- /dev/null +++ b/lib/Support/FileCheckImpl.h @@ -0,0 +1,624 @@ +//===-- FileCheckImpl.h - Private FileCheck Interface ------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the private interfaces of FileCheck. Its purpose is to +// allow unit testing of FileCheck and to separate the interface from the +// implementation. It is only meant to be used by FileCheck. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_SUPPORT_FILECHECKIMPL_H +#define LLVM_LIB_SUPPORT_FILECHECKIMPL_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/SourceMgr.h" +#include +#include +#include + +namespace llvm { + +//===----------------------------------------------------------------------===// +// Numeric substitution handling code. +//===----------------------------------------------------------------------===// + +/// Base class representing the AST of a given expression. +class FileCheckExpressionAST { +public: + virtual ~FileCheckExpressionAST() = default; + + /// Evaluates and \returns the value of the expression represented by this + /// AST or an error if evaluation fails. + virtual Expected eval() const = 0; +}; + +/// Class representing an unsigned literal in the AST of an expression. +class FileCheckExpressionLiteral : public FileCheckExpressionAST { +private: + /// Actual value of the literal. + uint64_t Value; + +public: + /// Constructs a literal with the specified value. + FileCheckExpressionLiteral(uint64_t Val) : Value(Val) {} + + /// \returns the literal's value. + Expected eval() const { return Value; } +}; + +/// Class to represent an undefined variable error, which quotes that +/// variable's name when printed. +class FileCheckUndefVarError : public ErrorInfo { +private: + StringRef VarName; + +public: + static char ID; + + FileCheckUndefVarError(StringRef VarName) : VarName(VarName) {} + + StringRef getVarName() const { return VarName; } + + std::error_code convertToErrorCode() const override { + return inconvertibleErrorCode(); + } + + /// Print name of variable associated with this error. + void log(raw_ostream &OS) const override { + OS << "\""; + OS.write_escaped(VarName) << "\""; + } +}; + +/// Class representing a numeric variable and its associated current value. +class FileCheckNumericVariable { +private: + /// Name of the numeric variable. + StringRef Name; + + /// Value of numeric variable, if defined, or None otherwise. + Optional Value; + + /// Line number where this variable is defined, or None if defined before + /// input is parsed. Used to determine whether a variable is defined on the + /// same line as a given use. + Optional DefLineNumber; + +public: + /// Constructor for a variable \p Name defined at line \p DefLineNumber or + /// defined before input is parsed if \p DefLineNumber is None. + explicit FileCheckNumericVariable(StringRef Name, + Optional DefLineNumber = None) + : Name(Name), DefLineNumber(DefLineNumber) {} + + /// \returns name of this numeric variable. + StringRef getName() const { return Name; } + + /// \returns this variable's value. + Optional getValue() const { return Value; } + + /// Sets value of this numeric variable to \p NewValue. + void setValue(uint64_t NewValue) { Value = NewValue; } + + /// Clears value of this numeric variable, regardless of whether it is + /// currently defined or not. + void clearValue() { Value = None; } + + /// \returns the line number where this variable is defined, if any, or None + /// if defined before input is parsed. + Optional getDefLineNumber() { return DefLineNumber; } +}; + +/// Class representing the use of a numeric variable in the AST of an +/// expression. +class FileCheckNumericVariableUse : public FileCheckExpressionAST { +private: + /// Name of the numeric variable. + StringRef Name; + + /// Pointer to the class instance for the variable this use is about. + FileCheckNumericVariable *NumericVariable; + +public: + FileCheckNumericVariableUse(StringRef Name, + FileCheckNumericVariable *NumericVariable) + : Name(Name), NumericVariable(NumericVariable) {} + + /// \returns the value of the variable referenced by this instance. + Expected eval() const; +}; + +/// Type of functions evaluating a given binary operation. +using binop_eval_t = uint64_t (*)(uint64_t, uint64_t); + +/// Class representing a single binary operation in the AST of an expression. +class FileCheckASTBinop : public FileCheckExpressionAST { +private: + /// Left operand. + std::unique_ptr LeftOperand; + + /// Right operand. + std::unique_ptr RightOperand; + + /// Pointer to function that can evaluate this binary operation. + binop_eval_t EvalBinop; + +public: + FileCheckASTBinop(binop_eval_t EvalBinop, + std::unique_ptr LeftOp, + std::unique_ptr RightOp) + : EvalBinop(EvalBinop) { + LeftOperand = std::move(LeftOp); + RightOperand = std::move(RightOp); + } + + /// Evaluates the value of the binary operation represented by this AST, + /// using EvalBinop on the result of recursively evaluating the operands. + /// \returns the expression value or an error if an undefined numeric + /// variable is used in one of the operands. + Expected eval() const; +}; + +class FileCheckPatternContext; + +/// Class representing a substitution to perform in the RegExStr string. +class FileCheckSubstitution { +protected: + /// Pointer to a class instance holding, among other things, the table with + /// the values of live string variables at the start of any given CHECK line. + /// Used for substituting string variables with the text they were defined + /// as. Expressions are linked to the numeric variables they use at + /// parse time and directly access the value of the numeric variable to + /// evaluate their value. + FileCheckPatternContext *Context; + + /// The string that needs to be substituted for something else. For a + /// string variable this is its name, otherwise this is the whole expression. + StringRef FromStr; + + // Index in RegExStr of where to do the substitution. + size_t InsertIdx; + +public: + FileCheckSubstitution(FileCheckPatternContext *Context, StringRef VarName, + size_t InsertIdx) + : Context(Context), FromStr(VarName), InsertIdx(InsertIdx) {} + + virtual ~FileCheckSubstitution() = default; + + /// \returns the string to be substituted for something else. + StringRef getFromString() const { return FromStr; } + + /// \returns the index where the substitution is to be performed in RegExStr. + size_t getIndex() const { return InsertIdx; } + + /// \returns a string containing the result of the substitution represented + /// by this class instance or an error if substitution failed. + virtual Expected getResult() const = 0; +}; + +class FileCheckStringSubstitution : public FileCheckSubstitution { +public: + FileCheckStringSubstitution(FileCheckPatternContext *Context, + StringRef VarName, size_t InsertIdx) + : FileCheckSubstitution(Context, VarName, InsertIdx) {} + + /// \returns the text that the string variable in this substitution matched + /// when defined, or an error if the variable is undefined. + Expected getResult() const override; +}; + +class FileCheckNumericSubstitution : public FileCheckSubstitution { +private: + /// Pointer to the class representing the expression whose value is to be + /// substituted. + std::unique_ptr ExpressionAST; + +public: + FileCheckNumericSubstitution(FileCheckPatternContext *Context, StringRef Expr, + std::unique_ptr ExprAST, + size_t InsertIdx) + : FileCheckSubstitution(Context, Expr, InsertIdx) { + ExpressionAST = std::move(ExprAST); + } + + /// \returns a string containing the result of evaluating the expression in + /// this substitution, or an error if evaluation failed. + Expected getResult() const override; +}; + +//===----------------------------------------------------------------------===// +// Pattern handling code. +//===----------------------------------------------------------------------===// + +struct FileCheckDiag; + +/// Class holding the FileCheckPattern global state, shared by all patterns: +/// tables holding values of variables and whether they are defined or not at +/// any given time in the matching process. +class FileCheckPatternContext { + friend class FileCheckPattern; + +private: + /// When matching a given pattern, this holds the value of all the string + /// variables defined in previous patterns. In a pattern, only the last + /// definition for a given variable is recorded in this table. + /// Back-references are used for uses after any the other definition. + StringMap GlobalVariableTable; + + /// Map of all string variables defined so far. Used at parse time to detect + /// a name conflict between a numeric variable and a string variable when + /// the former is defined on a later line than the latter. + StringMap DefinedVariableTable; + + /// When matching a given pattern, this holds the pointers to the classes + /// representing the numeric variables defined in previous patterns. When + /// matching a pattern all definitions for that pattern are recorded in the + /// NumericVariableDefs table in the FileCheckPattern instance of that + /// pattern. + StringMap GlobalNumericVariableTable; + + /// Pointer to the class instance representing the @LINE pseudo variable for + /// easily updating its value. + FileCheckNumericVariable *LineVariable = nullptr; + + /// Vector holding pointers to all parsed numeric variables. Used to + /// automatically free them once they are guaranteed to no longer be used. + std::vector> NumericVariables; + + /// Vector holding pointers to all substitutions. Used to automatically free + /// them once they are guaranteed to no longer be used. + std::vector> Substitutions; + +public: + /// \returns the value of string variable \p VarName or an error if no such + /// variable has been defined. + Expected getPatternVarValue(StringRef VarName); + + /// Defines string and numeric variables from definitions given on the + /// command line, passed as a vector of [#]VAR=VAL strings in + /// \p CmdlineDefines. \returns an error list containing diagnostics against + /// \p SM for all definition parsing failures, if any, or Success otherwise. + Error defineCmdlineVariables(std::vector &CmdlineDefines, + SourceMgr &SM); + + /// Create @LINE pseudo variable. Value is set when pattern are being + /// matched. + void createLineVariable(); + + /// Undefines local variables (variables whose name does not start with a '$' + /// sign), i.e. removes them from GlobalVariableTable and from + /// GlobalNumericVariableTable and also clears the value of numeric + /// variables. + void clearLocalVars(); + +private: + /// Makes a new numeric variable and registers it for destruction when the + /// context is destroyed. + template + FileCheckNumericVariable *makeNumericVariable(Types... args); + + /// Makes a new string substitution and registers it for destruction when the + /// context is destroyed. + FileCheckSubstitution *makeStringSubstitution(StringRef VarName, + size_t InsertIdx); + + /// Makes a new numeric substitution and registers it for destruction when + /// the context is destroyed. + FileCheckSubstitution * + makeNumericSubstitution(StringRef ExpressionStr, + std::unique_ptr ExpressionAST, + size_t InsertIdx); +}; + +/// Class to represent an error holding a diagnostic with location information +/// used when printing it. +class FileCheckErrorDiagnostic : public ErrorInfo { +private: + SMDiagnostic Diagnostic; + +public: + static char ID; + + FileCheckErrorDiagnostic(SMDiagnostic &&Diag) : Diagnostic(Diag) {} + + std::error_code convertToErrorCode() const override { + return inconvertibleErrorCode(); + } + + /// Print diagnostic associated with this error when printing the error. + void log(raw_ostream &OS) const override { Diagnostic.print(nullptr, OS); } + + static Error get(const SourceMgr &SM, SMLoc Loc, const Twine &ErrMsg) { + return make_error( + SM.GetMessage(Loc, SourceMgr::DK_Error, ErrMsg)); + } + + static Error get(const SourceMgr &SM, StringRef Buffer, const Twine &ErrMsg) { + return get(SM, SMLoc::getFromPointer(Buffer.data()), ErrMsg); + } +}; + +class FileCheckNotFoundError : public ErrorInfo { +public: + static char ID; + + std::error_code convertToErrorCode() const override { + return inconvertibleErrorCode(); + } + + /// Print diagnostic associated with this error when printing the error. + void log(raw_ostream &OS) const override { + OS << "String not found in input"; + } +}; + +class FileCheckPattern { + SMLoc PatternLoc; + + /// A fixed string to match as the pattern or empty if this pattern requires + /// a regex match. + StringRef FixedStr; + + /// A regex string to match as the pattern or empty if this pattern requires + /// a fixed string to match. + std::string RegExStr; + + /// Entries in this vector represent a substitution of a string variable or + /// an expression in the RegExStr regex at match time. For example, in the + /// case of a CHECK directive with the pattern "foo[[bar]]baz[[#N+1]]", + /// RegExStr will contain "foobaz" and we'll get two entries in this vector + /// that tells us to insert the value of string variable "bar" at offset 3 + /// and the value of expression "N+1" at offset 6. + std::vector Substitutions; + + /// Maps names of string variables defined in a pattern to the number of + /// their parenthesis group in RegExStr capturing their last definition. + /// + /// E.g. for the pattern "foo[[bar:.*]]baz([[bar]][[QUUX]][[bar:.*]])", + /// RegExStr will be "foo(.*)baz(\1(.*))" where is + /// the value captured for QUUX on the earlier line where it was defined, and + /// VariableDefs will map "bar" to the third parenthesis group which captures + /// the second definition of "bar". + /// + /// Note: uses std::map rather than StringMap to be able to get the key when + /// iterating over values. + std::map VariableDefs; + + /// Structure representing the definition of a numeric variable in a pattern. + /// It holds the pointer to the class representing the numeric variable whose + /// value is being defined and the number of the parenthesis group in + /// RegExStr to capture that value. + struct FileCheckNumericVariableMatch { + /// Pointer to class representing the numeric variable whose value is being + /// defined. + FileCheckNumericVariable *DefinedNumericVariable; + + /// Number of the parenthesis group in RegExStr that captures the value of + /// this numeric variable definition. + unsigned CaptureParenGroup; + }; + + /// Holds the number of the parenthesis group in RegExStr and pointer to the + /// corresponding FileCheckNumericVariable class instance of all numeric + /// variable definitions. Used to set the matched value of all those + /// variables. + StringMap NumericVariableDefs; + + /// Pointer to a class instance holding the global state shared by all + /// patterns: + /// - separate tables with the values of live string and numeric variables + /// respectively at the start of any given CHECK line; + /// - table holding whether a string variable has been defined at any given + /// point during the parsing phase. + FileCheckPatternContext *Context; + + Check::FileCheckType CheckTy; + + /// Line number for this CHECK pattern or None if it is an implicit pattern. + /// Used to determine whether a variable definition is made on an earlier + /// line to the one with this CHECK. + Optional LineNumber; + + /// Ignore case while matching if set to true. + bool IgnoreCase = false; + +public: + FileCheckPattern(Check::FileCheckType Ty, FileCheckPatternContext *Context, + Optional Line = None) + : Context(Context), CheckTy(Ty), LineNumber(Line) {} + + /// \returns the location in source code. + SMLoc getLoc() const { return PatternLoc; } + + /// \returns the pointer to the global state for all patterns in this + /// FileCheck instance. + FileCheckPatternContext *getContext() const { return Context; } + + /// \returns whether \p C is a valid first character for a variable name. + static bool isValidVarNameStart(char C); + + /// Parsing information about a variable. + struct VariableProperties { + StringRef Name; + bool IsPseudo; + }; + + /// Parses the string at the start of \p Str for a variable name. \returns + /// a VariableProperties structure holding the variable name and whether it + /// is the name of a pseudo variable, or an error holding a diagnostic + /// against \p SM if parsing fail. If parsing was successful, also strips + /// \p Str from the variable name. + static Expected parseVariable(StringRef &Str, + const SourceMgr &SM); + /// Parses \p Expr for a numeric substitution block at line \p LineNumber, + /// or before input is parsed if \p LineNumber is None. Parameter + /// \p IsLegacyLineExpr indicates whether \p Expr should be a legacy @LINE + /// expression and \p Context points to the class instance holding the live + /// string and numeric variables. \returns a pointer to the class instance + /// representing the AST of the expression whose value must be substitued, or + /// an error holding a diagnostic against \p SM if parsing fails. If + /// substitution was successful, sets \p DefinedNumericVariable to point to + /// the class representing the numeric variable defined in this numeric + /// substitution block, or None if this block does not define any variable. + static Expected> + parseNumericSubstitutionBlock( + StringRef Expr, + Optional &DefinedNumericVariable, + bool IsLegacyLineExpr, Optional LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM); + /// Parses the pattern in \p PatternStr and initializes this FileCheckPattern + /// instance accordingly. + /// + /// \p Prefix provides which prefix is being matched, \p Req describes the + /// global options that influence the parsing such as whitespace + /// canonicalization, \p SM provides the SourceMgr used for error reports. + /// \returns true in case of an error, false otherwise. + bool parsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, + const FileCheckRequest &Req); + /// Matches the pattern string against the input buffer \p Buffer + /// + /// \returns the position that is matched or an error indicating why matching + /// failed. If there is a match, updates \p MatchLen with the size of the + /// matched string. + /// + /// The GlobalVariableTable StringMap in the FileCheckPatternContext class + /// instance provides the current values of FileCheck string variables and + /// is updated if this match defines new values. Likewise, the + /// GlobalNumericVariableTable StringMap in the same class provides the + /// current values of FileCheck numeric variables and is updated if this + /// match defines new numeric values. + Expected match(StringRef Buffer, size_t &MatchLen, + const SourceMgr &SM) const; + /// Prints the value of successful substitutions or the name of the undefined + /// string or numeric variables preventing a successful substitution. + void printSubstitutions(const SourceMgr &SM, StringRef Buffer, + SMRange MatchRange = None) const; + void printFuzzyMatch(const SourceMgr &SM, StringRef Buffer, + std::vector *Diags) const; + + bool hasVariable() const { + return !(Substitutions.empty() && VariableDefs.empty()); + } + + Check::FileCheckType getCheckTy() const { return CheckTy; } + + int getCount() const { return CheckTy.getCount(); } + +private: + bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); + void AddBackrefToRegEx(unsigned BackrefNum); + /// Computes an arbitrary estimate for the quality of matching this pattern + /// at the start of \p Buffer; a distance of zero should correspond to a + /// perfect match. + unsigned computeMatchDistance(StringRef Buffer) const; + /// Finds the closing sequence of a regex variable usage or definition. + /// + /// \p Str has to point in the beginning of the definition (right after the + /// opening sequence). \p SM holds the SourceMgr used for error repporting. + /// \returns the offset of the closing sequence within Str, or npos if it + /// was not found. + size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); + + /// Parses \p Expr for the name of a numeric variable to be defined at line + /// \p LineNumber, or before input is parsed if \p LineNumber is None. + /// \returns a pointer to the class instance representing that variable, + /// creating it if needed, or an error holding a diagnostic against \p SM + /// should defining such a variable be invalid. + static Expected parseNumericVariableDefinition( + StringRef &Expr, FileCheckPatternContext *Context, + Optional LineNumber, const SourceMgr &SM); + /// Parses \p Name as a (pseudo if \p IsPseudo is true) numeric variable use + /// at line \p LineNumber, or before input is parsed if \p LineNumber is + /// None. Parameter \p Context points to the class instance holding the live + /// string and numeric variables. \returns the pointer to the class instance + /// representing that variable if successful, or an error holding a + /// diagnostic against \p SM otherwise. + static Expected> + parseNumericVariableUse(StringRef Name, bool IsPseudo, + Optional LineNumber, + FileCheckPatternContext *Context, + const SourceMgr &SM); + enum class AllowedOperand { LineVar, Literal, Any }; + /// Parses \p Expr for use of a numeric operand at line \p LineNumber, or + /// before input is parsed if \p LineNumber is None. Accepts both literal + /// values and numeric variables, depending on the value of \p AO. Parameter + /// \p Context points to the class instance holding the live string and + /// numeric variables. \returns the class representing that operand in the + /// AST of the expression or an error holding a diagnostic against \p SM + /// otherwise. + static Expected> + parseNumericOperand(StringRef &Expr, AllowedOperand AO, + Optional LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM); + /// Parses \p Expr for a binary operation at line \p LineNumber, or before + /// input is parsed if \p LineNumber is None. The left operand of this binary + /// operation is given in \p LeftOp and \p IsLegacyLineExpr indicates whether + /// we are parsing a legacy @LINE expression. Parameter \p Context points to + /// the class instance holding the live string and numeric variables. + /// \returns the class representing the binary operation in the AST of the + /// expression, or an error holding a diagnostic against \p SM otherwise. + static Expected> + parseBinop(StringRef &Expr, std::unique_ptr LeftOp, + bool IsLegacyLineExpr, Optional LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM); +}; + +//===----------------------------------------------------------------------===// +// Check Strings. +//===----------------------------------------------------------------------===// + +/// A check that we found in the input file. +struct FileCheckString { + /// The pattern to match. + FileCheckPattern Pat; + + /// Which prefix name this check matched. + StringRef Prefix; + + /// The location in the match file that the check string was specified. + SMLoc Loc; + + /// All of the strings that are disallowed from occurring between this match + /// string and the previous one (or start of file). + std::vector DagNotStrings; + + FileCheckString(const FileCheckPattern &P, StringRef S, SMLoc L) + : Pat(P), Prefix(S), Loc(L) {} + + /// Matches check string and its "not strings" and/or "dag strings". + size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, + size_t &MatchLen, FileCheckRequest &Req, + std::vector *Diags) const; + + /// Verifies that there is a single line in the given \p Buffer. Errors are + /// reported against \p SM. + bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; + /// Verifies that there is no newline in the given \p Buffer. Errors are + /// reported against \p SM. + bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; + /// Verifies that none of the strings in \p NotStrings are found in the given + /// \p Buffer. Errors are reported against \p SM and diagnostics recorded in + /// \p Diags according to the verbosity level set in \p Req. + bool CheckNot(const SourceMgr &SM, StringRef Buffer, + const std::vector &NotStrings, + const FileCheckRequest &Req, + std::vector *Diags) const; + /// Matches "dag strings" and their mixed "not strings". + size_t CheckDag(const SourceMgr &SM, StringRef Buffer, + std::vector &NotStrings, + const FileCheckRequest &Req, + std::vector *Diags) const; +}; + +} // namespace llvm + +#endif diff --git a/lib/Support/FileCollector.cpp b/lib/Support/FileCollector.cpp new file mode 100644 index 00000000000..47fca641372 --- /dev/null +++ b/lib/Support/FileCollector.cpp @@ -0,0 +1,268 @@ +//===-- FileCollector.cpp ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/FileCollector.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" + +using namespace llvm; + +static bool isCaseSensitivePath(StringRef Path) { + SmallString<256> TmpDest = Path, UpperDest, RealDest; + + // Remove component traversals, links, etc. + if (!sys::fs::real_path(Path, TmpDest)) + return true; // Current default value in vfs.yaml + Path = TmpDest; + + // Change path to all upper case and ask for its real path, if the latter + // exists and is equal to path, it's not case sensitive. Default to case + // sensitive in the absence of real_path, since this is the YAMLVFSWriter + // default. + UpperDest = Path.upper(); + if (sys::fs::real_path(UpperDest, RealDest) && Path.equals(RealDest)) + return false; + return true; +} + +FileCollector::FileCollector(std::string Root, std::string OverlayRoot) + : Root(std::move(Root)), OverlayRoot(std::move(OverlayRoot)) { + sys::fs::create_directories(this->Root, true); +} + +bool FileCollector::getRealPath(StringRef SrcPath, + SmallVectorImpl &Result) { + SmallString<256> RealPath; + StringRef FileName = sys::path::filename(SrcPath); + std::string Directory = sys::path::parent_path(SrcPath).str(); + auto DirWithSymlink = SymlinkMap.find(Directory); + + // Use real_path to fix any symbolic link component present in a path. + // Computing the real path is expensive, cache the search through the parent + // path Directory. + if (DirWithSymlink == SymlinkMap.end()) { + auto EC = sys::fs::real_path(Directory, RealPath); + if (EC) + return false; + SymlinkMap[Directory] = RealPath.str(); + } else { + RealPath = DirWithSymlink->second; + } + + sys::path::append(RealPath, FileName); + Result.swap(RealPath); + return true; +} + +void FileCollector::addFile(const Twine &file) { + std::lock_guard lock(Mutex); + std::string FileStr = file.str(); + if (markAsSeen(FileStr)) + addFileImpl(FileStr); +} + +void FileCollector::addFileImpl(StringRef SrcPath) { + // We need an absolute src path to append to the root. + SmallString<256> AbsoluteSrc = SrcPath; + sys::fs::make_absolute(AbsoluteSrc); + + // Canonicalize src to a native path to avoid mixed separator styles. + sys::path::native(AbsoluteSrc); + + // Remove redundant leading "./" pieces and consecutive separators. + AbsoluteSrc = sys::path::remove_leading_dotslash(AbsoluteSrc); + + // Canonicalize the source path by removing "..", "." components. + SmallString<256> VirtualPath = AbsoluteSrc; + sys::path::remove_dots(VirtualPath, /*remove_dot_dot=*/true); + + // If a ".." component is present after a symlink component, remove_dots may + // lead to the wrong real destination path. Let the source be canonicalized + // like that but make sure we always use the real path for the destination. + SmallString<256> CopyFrom; + if (!getRealPath(AbsoluteSrc, CopyFrom)) + CopyFrom = VirtualPath; + + SmallString<256> DstPath = StringRef(Root); + sys::path::append(DstPath, sys::path::relative_path(CopyFrom)); + + // Always map a canonical src path to its real path into the YAML, by doing + // this we map different virtual src paths to the same entry in the VFS + // overlay, which is a way to emulate symlink inside the VFS; this is also + // needed for correctness, not doing that can lead to module redefinition + // errors. + addFileToMapping(VirtualPath, DstPath); +} + +/// Set the access and modification time for the given file from the given +/// status object. +static std::error_code +copyAccessAndModificationTime(StringRef Filename, + const sys::fs::file_status &Stat) { + int FD; + + if (auto EC = + sys::fs::openFileForWrite(Filename, FD, sys::fs::CD_OpenExisting)) + return EC; + + if (auto EC = sys::fs::setLastAccessAndModificationTime( + FD, Stat.getLastAccessedTime(), Stat.getLastModificationTime())) + return EC; + + if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD)) + return EC; + + return {}; +} + +std::error_code FileCollector::copyFiles(bool StopOnError) { + for (auto &entry : VFSWriter.getMappings()) { + // Create directory tree. + if (std::error_code EC = + sys::fs::create_directories(sys::path::parent_path(entry.RPath), + /*IgnoreExisting=*/true)) { + if (StopOnError) + return EC; + } + + // Get the status of the original file/directory. + sys::fs::file_status Stat; + if (std::error_code EC = sys::fs::status(entry.VPath, Stat)) { + if (StopOnError) + return EC; + continue; + } + + if (Stat.type() == sys::fs::file_type::directory_file) { + // Construct a directory when it's just a directory entry. + if (std::error_code EC = + sys::fs::create_directories(entry.RPath, + /*IgnoreExisting=*/true)) { + if (StopOnError) + return EC; + } + continue; + } + + // Copy file over. + if (std::error_code EC = sys::fs::copy_file(entry.VPath, entry.RPath)) { + if (StopOnError) + return EC; + } + + // Copy over permissions. + if (auto perms = sys::fs::getPermissions(entry.VPath)) { + if (std::error_code EC = sys::fs::setPermissions(entry.RPath, *perms)) { + if (StopOnError) + return EC; + } + } + + // Copy over modification time. + copyAccessAndModificationTime(entry.RPath, Stat); + } + return {}; +} + +std::error_code FileCollector::writeMapping(StringRef mapping_file) { + std::lock_guard lock(Mutex); + + VFSWriter.setOverlayDir(OverlayRoot); + VFSWriter.setCaseSensitivity(isCaseSensitivePath(OverlayRoot)); + VFSWriter.setUseExternalNames(false); + + std::error_code EC; + raw_fd_ostream os(mapping_file, EC, sys::fs::OF_Text); + if (EC) + return EC; + + VFSWriter.write(os); + + return {}; +} + +namespace { + +class FileCollectorFileSystem : public vfs::FileSystem { +public: + explicit FileCollectorFileSystem(IntrusiveRefCntPtr FS, + std::shared_ptr Collector) + : FS(std::move(FS)), Collector(std::move(Collector)) {} + + llvm::ErrorOr status(const Twine &Path) override { + auto Result = FS->status(Path); + if (Result && Result->exists()) + Collector->addFile(Path); + return Result; + } + + llvm::ErrorOr> + openFileForRead(const Twine &Path) override { + auto Result = FS->openFileForRead(Path); + if (Result && *Result) + Collector->addFile(Path); + return Result; + } + + llvm::vfs::directory_iterator dir_begin(const llvm::Twine &Dir, + std::error_code &EC) override { + auto It = FS->dir_begin(Dir, EC); + if (EC) + return It; + // Collect everything that's listed in case the user needs it. + Collector->addFile(Dir); + for (; !EC && It != llvm::vfs::directory_iterator(); It.increment(EC)) { + if (It->type() == sys::fs::file_type::regular_file || + It->type() == sys::fs::file_type::directory_file || + It->type() == sys::fs::file_type::symlink_file) { + Collector->addFile(It->path()); + } + } + if (EC) + return It; + // Return a new iterator. + return FS->dir_begin(Dir, EC); + } + + std::error_code getRealPath(const Twine &Path, + SmallVectorImpl &Output) const override { + auto EC = FS->getRealPath(Path, Output); + if (!EC) { + Collector->addFile(Path); + if (Output.size() > 0) + Collector->addFile(Output); + } + return EC; + } + + std::error_code isLocal(const Twine &Path, bool &Result) override { + return FS->isLocal(Path, Result); + } + + llvm::ErrorOr getCurrentWorkingDirectory() const override { + return FS->getCurrentWorkingDirectory(); + } + + std::error_code setCurrentWorkingDirectory(const llvm::Twine &Path) override { + return FS->setCurrentWorkingDirectory(Path); + } + +private: + IntrusiveRefCntPtr FS; + std::shared_ptr Collector; +}; + +} // end anonymous namespace + +IntrusiveRefCntPtr +FileCollector::createCollectorVFS(IntrusiveRefCntPtr BaseFS, + std::shared_ptr Collector) { + return new FileCollectorFileSystem(std::move(BaseFS), std::move(Collector)); +} diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp index 3d6b569f299..024dd3e57a4 100644 --- a/lib/Support/FileOutputBuffer.cpp +++ b/lib/Support/FileOutputBuffer.cpp @@ -121,7 +121,7 @@ createInMemoryBuffer(StringRef Path, size_t Size, unsigned Mode) { Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC); if (EC) return errorCodeToError(EC); - return llvm::make_unique(Path, MB, Size, Mode); + return std::make_unique(Path, MB, Size, Mode); } static Expected> @@ -146,7 +146,7 @@ createOnDiskBuffer(StringRef Path, size_t Size, unsigned Mode) { // Mmap it. std::error_code EC; - auto MappedFile = llvm::make_unique( + auto MappedFile = std::make_unique( fs::convertFDToNativeFile(File.FD), fs::mapped_file_region::readwrite, Size, 0, EC); @@ -157,7 +157,7 @@ createOnDiskBuffer(StringRef Path, size_t Size, unsigned Mode) { return createInMemoryBuffer(Path, Size, Mode); } - return llvm::make_unique(Path, std::move(File), + return std::make_unique(Path, std::move(File), std::move(MappedFile)); } diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp index 62eb7bfda19..d11fbb54dc0 100644 --- a/lib/Support/FileUtilities.cpp +++ b/lib/Support/FileUtilities.cpp @@ -12,9 +12,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/FileUtilities.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -264,3 +267,66 @@ int llvm::DiffFilesWithTolerance(StringRef NameA, return CompareFailed; } + +void llvm::AtomicFileWriteError::log(raw_ostream &OS) const { + OS << "atomic_write_error: "; + switch (Error) { + case atomic_write_error::failed_to_create_uniq_file: + OS << "failed_to_create_uniq_file"; + return; + case atomic_write_error::output_stream_error: + OS << "output_stream_error"; + return; + case atomic_write_error::failed_to_rename_temp_file: + OS << "failed_to_rename_temp_file"; + return; + } + llvm_unreachable("unknown atomic_write_error value in " + "failed_to_rename_temp_file::log()"); +} + +llvm::Error llvm::writeFileAtomically(StringRef TempPathModel, + StringRef FinalPath, StringRef Buffer) { + return writeFileAtomically(TempPathModel, FinalPath, + [&Buffer](llvm::raw_ostream &OS) { + OS.write(Buffer.data(), Buffer.size()); + return llvm::Error::success(); + }); +} + +llvm::Error llvm::writeFileAtomically( + StringRef TempPathModel, StringRef FinalPath, + std::function Writer) { + SmallString<128> GeneratedUniqPath; + int TempFD; + if (sys::fs::createUniqueFile(TempPathModel.str(), TempFD, + GeneratedUniqPath)) { + return llvm::make_error( + atomic_write_error::failed_to_create_uniq_file); + } + llvm::FileRemover RemoveTmpFileOnFail(GeneratedUniqPath); + + raw_fd_ostream OS(TempFD, /*shouldClose=*/true); + if (llvm::Error Err = Writer(OS)) { + return Err; + } + + OS.close(); + if (OS.has_error()) { + OS.clear_error(); + return llvm::make_error( + atomic_write_error::output_stream_error); + } + + if (const std::error_code Error = + sys::fs::rename(/*from=*/GeneratedUniqPath.c_str(), + /*to=*/FinalPath.str().c_str())) { + return llvm::make_error( + atomic_write_error::failed_to_rename_temp_file); + } + + RemoveTmpFileOnFail.releaseFile(); + return Error::success(); +} + +char llvm::AtomicFileWriteError::ID; diff --git a/lib/Support/GlobPattern.cpp b/lib/Support/GlobPattern.cpp index 6011be86d77..8dae6941ec7 100644 --- a/lib/Support/GlobPattern.cpp +++ b/lib/Support/GlobPattern.cpp @@ -19,7 +19,7 @@ using namespace llvm; static bool hasWildcard(StringRef S) { - return S.find_first_of("?*[") != StringRef::npos; + return S.find_first_of("?*[\\") != StringRef::npos; } // Expands character ranges and returns a bitmap. @@ -60,8 +60,9 @@ static Expected expand(StringRef S, StringRef Original) { } // This is a scanner for the glob pattern. -// A glob pattern token is one of "*", "?", "[]", "[^]" -// (which is a negative form of "[]"), or a non-meta character. +// A glob pattern token is one of "*", "?", "\", "[]", "[^]" +// (which is a negative form of "[]"), "[!]" (which is +// equivalent to "[^]"), or a non-meta character. // This function returns the first token in S. static Expected scan(StringRef &S, StringRef Original) { switch (S[0]) { @@ -74,14 +75,16 @@ static Expected scan(StringRef &S, StringRef Original) { S = S.substr(1); return BitVector(256, true); case '[': { - size_t End = S.find(']', 1); + // ']' is allowed as the first character of a character class. '[]' is + // invalid. So, just skip the first character. + size_t End = S.find(']', 2); if (End == StringRef::npos) return make_error("invalid glob pattern: " + Original, errc::invalid_argument); StringRef Chars = S.substr(1, End - 1); S = S.substr(End + 1); - if (Chars.startswith("^")) { + if (Chars.startswith("^") || Chars.startswith("!")) { Expected BV = expand(Chars.substr(1), Original); if (!BV) return BV.takeError(); @@ -89,6 +92,11 @@ static Expected scan(StringRef &S, StringRef Original) { } return expand(Chars, Original); } + case '\\': + // Eat this character and fall through below to treat it like a non-meta + // character. + S = S.substr(1); + LLVM_FALLTHROUGH; default: BitVector BV(256, false); BV[(uint8_t)S[0]] = true; @@ -107,8 +115,9 @@ Expected GlobPattern::create(StringRef S) { return Pat; } - // S is something like "foo*". We can use startswith(). - if (S.endswith("*") && !hasWildcard(S.drop_back())) { + // S is something like "foo*", and the "* is not escaped. We can use + // startswith(). + if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) { Pat.Prefix = S.drop_back(); return Pat; } diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index d491912bdc0..2a473a1994c 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -316,7 +316,7 @@ StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { unsigned int Id; if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { if (Id >= 8561 && HaveVectorSupport) - return "arch13"; + return "z15"; if (Id >= 3906 && HaveVectorSupport) return "z14"; if (Id >= 2964 && HaveVectorSupport) @@ -680,7 +680,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, // Skylake Xeon: case 0x55: *Type = X86::INTEL_COREI7; - if (Features3 & (1 << (X86::FEATURE_AVX512BF16 - 64))) + if (Features2 & (1 << (X86::FEATURE_AVX512BF16 - 32))) *Subtype = X86::INTEL_COREI7_COOPERLAKE; // "cooperlake" else if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32))) *Subtype = X86::INTEL_COREI7_CASCADELAKE; // "cascadelake" @@ -746,6 +746,13 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, break; default: // Unknown family 6 CPU, try to guess. + // TODO detect tigerlake host + if (Features3 & (1 << (X86::FEATURE_AVX512VP2INTERSECT - 64))) { + *Type = X86::INTEL_COREI7; + *Subtype = X86::INTEL_COREI7_TIGERLAKE; + break; + } + if (Features & (1 << X86::FEATURE_AVX512VBMI2)) { *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; @@ -758,7 +765,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, break; } - if (Features3 & (1 << (X86::FEATURE_AVX512BF16 - 64))) { + if (Features2 & (1 << (X86::FEATURE_AVX512BF16 - 32))) { *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_COOPERLAKE; break; @@ -1034,7 +1041,7 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, setFeature(X86::FEATURE_BMI); if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) setFeature(X86::FEATURE_AVX2); - if (HasLeaf7 && ((EBX >> 9) & 1)) + if (HasLeaf7 && ((EBX >> 8) & 1)) setFeature(X86::FEATURE_BMI2); if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512F); @@ -1078,6 +1085,13 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, setFeature(X86::FEATURE_AVX5124VNNIW); if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX5124FMAPS); + if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX512VP2INTERSECT); + + bool HasLeaf7Subleaf1 = + MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); + if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) + setFeature(X86::FEATURE_AVX512BF16); unsigned MaxExtLevel; getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); @@ -1369,7 +1383,6 @@ bool sys::getHostCPUFeatures(StringMap &Features) { Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1); Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); - Features["mpx"] = HasLeaf7 && ((EBX >> 14) & 1); // AVX512 is only supported if the OS supports the context save for it. Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; @@ -1499,6 +1512,17 @@ bool sys::getHostCPUFeatures(StringMap &Features) { return true; } +#elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64)) +bool sys::getHostCPUFeatures(StringMap &Features) { + if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) + Features["neon"] = true; + if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) + Features["crc"] = true; + if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) + Features["crypto"] = true; + + return true; +} #else bool sys::getHostCPUFeatures(StringMap &Features) { return false; } #endif diff --git a/lib/Support/JSON.cpp b/lib/Support/JSON.cpp index 95e5ed65427..16b1d11efd0 100644 --- a/lib/Support/JSON.cpp +++ b/lib/Support/JSON.cpp @@ -502,7 +502,7 @@ bool Parser::parseError(const char *Msg) { } } Err.emplace( - llvm::make_unique(Msg, Line, P - StartOfLine, P - Start)); + std::make_unique(Msg, Line, P - StartOfLine, P - Start)); return false; } } // namespace diff --git a/lib/Support/JamCRC.cpp b/lib/Support/JamCRC.cpp deleted file mode 100644 index e043a3c33c2..00000000000 --- a/lib/Support/JamCRC.cpp +++ /dev/null @@ -1,96 +0,0 @@ -//===-- JamCRC.cpp - Cyclic Redundancy Check --------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains an implementation of JamCRC. -// -//===----------------------------------------------------------------------===// -// -// The implementation technique is the one mentioned in: -// D. V. Sarwate. 1988. Computation of cyclic redundancy checks via table -// look-up. Commun. ACM 31, 8 (August 1988) -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/JamCRC.h" -#include "llvm/ADT/ArrayRef.h" - -using namespace llvm; - -static const uint32_t CRCTable[256] = { - 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, - 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, - 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, - 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, - 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, - 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, - 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, - 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, - 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, - 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, - 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, - 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, - 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, - 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, - 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, - 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, - 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, - 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, - 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, - 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, - 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, - 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, - 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, - 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, - 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, - 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, - 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, - 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, - 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, - 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, - 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, - 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, - 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, - 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, - 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, - 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, - 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, - 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, - 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, - 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, - 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, - 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, - 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, - 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, - 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, - 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, - 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, - 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, - 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, - 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, - 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, - 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, - 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, - 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, - 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, - 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, - 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, - 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, - 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, - 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, - 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, - 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, - 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, - 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d -}; - -void JamCRC::update(ArrayRef Data) { - for (char Byte : Data) { - int TableIdx = (CRC ^ Byte) & 0xff; - CRC = CRCTable[TableIdx] ^ (CRC >> 8); - } -} diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp index 28ceb1a70e4..053493f72fb 100644 --- a/lib/Support/ManagedStatic.cpp +++ b/lib/Support/ManagedStatic.cpp @@ -12,21 +12,20 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Config/config.h" -#include "llvm/Support/Mutex.h" -#include "llvm/Support/MutexGuard.h" #include "llvm/Support/Threading.h" #include +#include using namespace llvm; static const ManagedStaticBase *StaticList = nullptr; -static sys::Mutex *ManagedStaticMutex = nullptr; +static std::recursive_mutex *ManagedStaticMutex = nullptr; static llvm::once_flag mutex_init_flag; static void initializeMutex() { - ManagedStaticMutex = new sys::Mutex(); + ManagedStaticMutex = new std::recursive_mutex(); } -static sys::Mutex* getManagedStaticMutex() { +static std::recursive_mutex *getManagedStaticMutex() { llvm::call_once(mutex_init_flag, initializeMutex); return ManagedStaticMutex; } @@ -35,7 +34,7 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(), void (*Deleter)(void*)) const { assert(Creator); if (llvm_is_multithreaded()) { - MutexGuard Lock(*getManagedStaticMutex()); + std::lock_guard Lock(*getManagedStaticMutex()); if (!Ptr.load(std::memory_order_relaxed)) { void *Tmp = Creator(); @@ -77,7 +76,7 @@ void ManagedStaticBase::destroy() const { /// llvm_shutdown - Deallocate and destroy all ManagedStatic variables. void llvm::llvm_shutdown() { - MutexGuard Lock(*getManagedStaticMutex()); + std::lock_guard Lock(*getManagedStaticMutex()); while (StaticList) StaticList->destroy(); diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index d0e5bb154c1..e4027ca7bbf 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -211,15 +211,17 @@ static ErrorOr> getMemoryBufferForStream(sys::fs::file_t FD, const Twine &BufferName) { const ssize_t ChunkSize = 4096*4; SmallString Buffer; - size_t ReadBytes; // Read into Buffer until we hit EOF. - do { + for (;;) { Buffer.reserve(Buffer.size() + ChunkSize); - if (auto EC = sys::fs::readNativeFile( - FD, makeMutableArrayRef(Buffer.end(), ChunkSize), &ReadBytes)) - return EC; - Buffer.set_size(Buffer.size() + ReadBytes); - } while (ReadBytes != 0); + Expected ReadBytes = sys::fs::readNativeFile( + FD, makeMutableArrayRef(Buffer.end(), ChunkSize)); + if (!ReadBytes) + return errorToErrorCode(ReadBytes.takeError()); + if (*ReadBytes == 0) + break; + Buffer.set_size(Buffer.size() + *ReadBytes); + } return getMemBufferCopyImpl(Buffer, BufferName); } @@ -458,7 +460,20 @@ getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize, return make_error_code(errc::not_enough_memory); } - sys::fs::readNativeFileSlice(FD, Buf->getBuffer(), Offset); + // Read until EOF, zero-initialize the rest. + MutableArrayRef ToRead = Buf->getBuffer(); + while (!ToRead.empty()) { + Expected ReadBytes = + sys::fs::readNativeFileSlice(FD, ToRead, Offset); + if (!ReadBytes) + return errorToErrorCode(ReadBytes.takeError()); + if (*ReadBytes == 0) { + std::memset(ToRead.data(), 0, ToRead.size()); + break; + } + ToRead = ToRead.drop_front(*ReadBytes); + Offset += *ReadBytes; + } return std::move(Buf); } diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp deleted file mode 100644 index 69b7b8126ab..00000000000 --- a/lib/Support/Mutex.cpp +++ /dev/null @@ -1,123 +0,0 @@ -//===- Mutex.cpp - Mutual Exclusion Lock ------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the llvm::sys::Mutex class. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/Mutex.h" -#include "llvm/Config/config.h" -#include "llvm/Support/ErrorHandling.h" - -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only TRULY operating system -//=== independent code. -//===----------------------------------------------------------------------===// - -#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 -// Define all methods as no-ops if threading is explicitly disabled -namespace llvm { -using namespace sys; -MutexImpl::MutexImpl( bool recursive) { } -MutexImpl::~MutexImpl() { } -bool MutexImpl::acquire() { return true; } -bool MutexImpl::release() { return true; } -bool MutexImpl::tryacquire() { return true; } -} -#else - -#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_MUTEX_LOCK) - -#include -#include -#include - -namespace llvm { -using namespace sys; - -// Construct a Mutex using pthread calls -MutexImpl::MutexImpl( bool recursive) - : data_(nullptr) -{ - // Declare the pthread_mutex data structures - pthread_mutex_t* mutex = - static_cast(safe_malloc(sizeof(pthread_mutex_t))); - - pthread_mutexattr_t attr; - - // Initialize the mutex attributes - int errorcode = pthread_mutexattr_init(&attr); - assert(errorcode == 0); (void)errorcode; - - // Initialize the mutex as a recursive mutex, if requested, or normal - // otherwise. - int kind = ( recursive ? PTHREAD_MUTEX_RECURSIVE : PTHREAD_MUTEX_NORMAL ); - errorcode = pthread_mutexattr_settype(&attr, kind); - assert(errorcode == 0); - - // Initialize the mutex - errorcode = pthread_mutex_init(mutex, &attr); - assert(errorcode == 0); - - // Destroy the attributes - errorcode = pthread_mutexattr_destroy(&attr); - assert(errorcode == 0); - - // Assign the data member - data_ = mutex; -} - -// Destruct a Mutex -MutexImpl::~MutexImpl() -{ - pthread_mutex_t* mutex = static_cast(data_); - assert(mutex != nullptr); - pthread_mutex_destroy(mutex); - free(mutex); -} - -bool -MutexImpl::acquire() -{ - pthread_mutex_t* mutex = static_cast(data_); - assert(mutex != nullptr); - - int errorcode = pthread_mutex_lock(mutex); - return errorcode == 0; -} - -bool -MutexImpl::release() -{ - pthread_mutex_t* mutex = static_cast(data_); - assert(mutex != nullptr); - - int errorcode = pthread_mutex_unlock(mutex); - return errorcode == 0; -} - -bool -MutexImpl::tryacquire() -{ - pthread_mutex_t* mutex = static_cast(data_); - assert(mutex != nullptr); - - int errorcode = pthread_mutex_trylock(mutex); - return errorcode == 0; -} - -} - -#elif defined(LLVM_ON_UNIX) -#include "Unix/Mutex.inc" -#elif defined( _WIN32) -#include "Windows/Mutex.inc" -#else -#warning Neither LLVM_ON_UNIX nor _WIN32 was set in Support/Mutex.cpp -#endif -#endif diff --git a/lib/Support/Parallel.cpp b/lib/Support/Parallel.cpp index 621bccbf2a4..355c64b7d07 100644 --- a/lib/Support/Parallel.cpp +++ b/lib/Support/Parallel.cpp @@ -32,34 +32,6 @@ public: static Executor *getDefaultExecutor(); }; -#if defined(_MSC_VER) -/// An Executor that runs tasks via ConcRT. -class ConcRTExecutor : public Executor { - struct Taskish { - Taskish(std::function Task) : Task(Task) {} - - std::function Task; - - static void run(void *P) { - Taskish *Self = static_cast(P); - Self->Task(); - concurrency::Free(Self); - } - }; - -public: - virtual void add(std::function F) { - Concurrency::CurrentScheduler::ScheduleTask( - Taskish::run, new (concurrency::Alloc(sizeof(Taskish))) Taskish(F)); - } -}; - -Executor *Executor::getDefaultExecutor() { - static ConcRTExecutor exec; - return &exec; -} - -#else /// An implementation of an Executor that runs closures on a thread pool /// in filo order. class ThreadPoolExecutor : public Executor { @@ -117,8 +89,7 @@ Executor *Executor::getDefaultExecutor() { static ThreadPoolExecutor exec; return &exec; } -#endif -} +} // namespace static std::atomic TaskGroupInstances; diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index c49260125db..14def83802d 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -855,11 +855,11 @@ void make_absolute(const Twine ¤t_directory, StringRef p(path.data(), path.size()); bool rootDirectory = path::has_root_directory(p); - bool rootName = - (real_style(Style::native) != Style::windows) || path::has_root_name(p); + bool rootName = path::has_root_name(p); // Already absolute. - if (rootName && rootDirectory) + if ((rootName || real_style(Style::native) != Style::windows) && + rootDirectory) return; // All of the following conditions will need the current directory. diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp index aec00baec0e..bfb238cc853 100644 --- a/lib/Support/PrettyStackTrace.cpp +++ b/lib/Support/PrettyStackTrace.cpp @@ -121,31 +121,63 @@ extern "C" const char *__crashreporter_info__ asm(".desc ___crashreporter_info__, 0x10"); #endif -/// CrashHandler - This callback is run if a fatal signal is delivered to the -/// process, it prints the pretty stack trace. +static void setCrashLogMessage(const char *msg) LLVM_ATTRIBUTE_UNUSED; +static void setCrashLogMessage(const char *msg) { +#ifdef HAVE_CRASHREPORTERCLIENT_H + (void)CRSetCrashLogMessage(msg); +#elif HAVE_CRASHREPORTER_INFO + __crashreporter_info__ = msg; +#endif + // Don't reorder subsequent operations: whatever comes after might crash and + // we want the system crash handling to see the message we just set. + std::atomic_signal_fence(std::memory_order_seq_cst); +} + +#ifdef __APPLE__ +using CrashHandlerString = SmallString<2048>; +using CrashHandlerStringStorage = + std::aligned_storage::type; +static CrashHandlerStringStorage crashHandlerStringStorage; +#endif + +/// This callback is run if a fatal signal is delivered to the process, it +/// prints the pretty stack trace. static void CrashHandler(void *) { #ifndef __APPLE__ // On non-apple systems, just emit the crash stack trace to stderr. PrintCurStackTrace(errs()); #else - // Otherwise, emit to a smallvector of chars, send *that* to stderr, but also - // put it into __crashreporter_info__. - SmallString<2048> TmpStr; + // Emit the crash stack trace to a SmallString, put it where the system crash + // handling will find it, and also send it to stderr. + // + // The SmallString is fairly large in the hope that we don't allocate (we're + // handling a fatal signal, something is already pretty wrong, allocation + // might not work). Further, we don't use a magic static in case that's also + // borked. We leak any allocation that does occur because the program is about + // to die anyways. This is technically racy if we were handling two fatal + // signals, however if we're in that situation a race is the least of our + // worries. + auto &crashHandlerString = + *new (&crashHandlerStringStorage) CrashHandlerString; + + // If we crash while trying to print the stack trace, we still want the system + // crash handling to have some partial information. That'll work out as long + // as the SmallString doesn't allocate. If it does allocate then the system + // crash handling will see some garbage because the inline buffer now contains + // a pointer. + setCrashLogMessage(crashHandlerString.c_str()); + { - raw_svector_ostream Stream(TmpStr); + raw_svector_ostream Stream(crashHandlerString); PrintCurStackTrace(Stream); } - if (!TmpStr.empty()) { -#ifdef HAVE_CRASHREPORTERCLIENT_H - // Cast to void to avoid warning. - (void)CRSetCrashLogMessage(TmpStr.c_str()); -#elif HAVE_CRASHREPORTER_INFO - __crashreporter_info__ = strdup(TmpStr.c_str()); -#endif - errs() << TmpStr.str(); - } - + if (!crashHandlerString.empty()) { + setCrashLogMessage(crashHandlerString.c_str()); + errs() << crashHandlerString.str(); + } else + setCrashLogMessage("No crash information."); #endif } diff --git a/lib/Support/RWMutex.cpp b/lib/Support/RWMutex.cpp index 7ce856b716c..5accf73e5f9 100644 --- a/lib/Support/RWMutex.cpp +++ b/lib/Support/RWMutex.cpp @@ -14,24 +14,20 @@ #include "llvm/Support/RWMutex.h" #include "llvm/Config/config.h" -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only TRULY operating system -//=== independent code. -//===----------------------------------------------------------------------===// +#if defined(LLVM_USE_RW_MUTEX_IMPL) +using namespace llvm; +using namespace sys; #if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 // Define all methods as no-ops if threading is explicitly disabled -using namespace llvm; -using namespace sys; - RWMutexImpl::RWMutexImpl() = default; RWMutexImpl::~RWMutexImpl() = default; -bool RWMutexImpl::reader_acquire() { return true; } -bool RWMutexImpl::reader_release() { return true; } -bool RWMutexImpl::writer_acquire() { return true; } -bool RWMutexImpl::writer_release() { return true; } +bool RWMutexImpl::lock_shared() { return true; } +bool RWMutexImpl::unlock_shared() { return true; } +bool RWMutexImpl::lock() { return true; } +bool RWMutexImpl::unlock() { return true; } #else @@ -41,9 +37,6 @@ bool RWMutexImpl::writer_release() { return true; } #include #include -using namespace llvm; -using namespace sys; - // Construct a RWMutex using pthread calls RWMutexImpl::RWMutexImpl() { @@ -75,7 +68,7 @@ RWMutexImpl::~RWMutexImpl() } bool -RWMutexImpl::reader_acquire() +RWMutexImpl::lock_shared() { pthread_rwlock_t* rwlock = static_cast(data_); assert(rwlock != nullptr); @@ -85,7 +78,7 @@ RWMutexImpl::reader_acquire() } bool -RWMutexImpl::reader_release() +RWMutexImpl::unlock_shared() { pthread_rwlock_t* rwlock = static_cast(data_); assert(rwlock != nullptr); @@ -95,7 +88,7 @@ RWMutexImpl::reader_release() } bool -RWMutexImpl::writer_acquire() +RWMutexImpl::lock() { pthread_rwlock_t* rwlock = static_cast(data_); assert(rwlock != nullptr); @@ -105,7 +98,7 @@ RWMutexImpl::writer_acquire() } bool -RWMutexImpl::writer_release() +RWMutexImpl::unlock() { pthread_rwlock_t* rwlock = static_cast(data_); assert(rwlock != nullptr); @@ -114,11 +107,30 @@ RWMutexImpl::writer_release() return errorcode == 0; } -#elif defined(LLVM_ON_UNIX) -#include "Unix/RWMutex.inc" -#elif defined( _WIN32) -#include "Windows/RWMutex.inc" #else -#warning Neither LLVM_ON_UNIX nor _WIN32 was set in Support/Mutex.cpp + +RWMutexImpl::RWMutexImpl() : data_(new MutexImpl(false)) { } + +RWMutexImpl::~RWMutexImpl() { + delete static_cast(data_); +} + +bool RWMutexImpl::lock_shared() { + return static_cast(data_)->acquire(); +} + +bool RWMutexImpl::unlock_shared() { + return static_cast(data_)->release(); +} + +bool RWMutexImpl::lock() { + return static_cast(data_)->acquire(); +} + +bool RWMutexImpl::unlock() { + return static_cast(data_)->release(); +} + +#endif #endif #endif diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp index 4c1b0703802..8da345d4f14 100644 --- a/lib/Support/Regex.cpp +++ b/lib/Support/Regex.cpp @@ -52,14 +52,24 @@ Regex::~Regex() { } } -bool Regex::isValid(std::string &Error) const { - if (!error) - return true; +namespace { +/// Utility to convert a regex error code into a human-readable string. +void RegexErrorToString(int error, struct llvm_regex *preg, + std::string &Error) { size_t len = llvm_regerror(error, preg, nullptr, 0); Error.resize(len - 1); llvm_regerror(error, preg, &Error[0], len); +} + +} // namespace + +bool Regex::isValid(std::string &Error) const { + if (!error) + return true; + + RegexErrorToString(error, preg, Error); return false; } @@ -69,8 +79,14 @@ unsigned Regex::getNumMatches() const { return preg->re_nsub; } -bool Regex::match(StringRef String, SmallVectorImpl *Matches){ - if (error) +bool Regex::match(StringRef String, SmallVectorImpl *Matches, + std::string *Error) const { + // Reset error, if given. + if (Error && !Error->empty()) + *Error = ""; + + // Check if the regex itself didn't successfully compile. + if (Error ? !isValid(*Error) : !isValid()) return false; unsigned nmatch = Matches ? preg->re_nsub+1 : 0; @@ -83,11 +99,13 @@ bool Regex::match(StringRef String, SmallVectorImpl *Matches){ int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND); + // Failure to match is not an error, it's just a normal return value. + // Any other error code is considered abnormal, and is logged in the Error. if (rc == REG_NOMATCH) return false; if (rc != 0) { - // regexec can fail due to invalid pattern or running out of memory. - error = rc; + if (Error) + RegexErrorToString(error, preg, *Error); return false; } @@ -112,14 +130,11 @@ bool Regex::match(StringRef String, SmallVectorImpl *Matches){ } std::string Regex::sub(StringRef Repl, StringRef String, - std::string *Error) { + std::string *Error) const { SmallVector Matches; - // Reset error, if given. - if (Error && !Error->empty()) *Error = ""; - // Return the input if there was no match. - if (!match(String, &Matches)) + if (!match(String, &Matches, Error)) return String; // Otherwise splice in the replacement string, starting with the prefix before diff --git a/lib/Support/Signposts.cpp b/lib/Support/Signposts.cpp index d456f41d2fa..aa159e1da2a 100644 --- a/lib/Support/Signposts.cpp +++ b/lib/Support/Signposts.cpp @@ -78,6 +78,8 @@ public: #if LLVM_SUPPORT_XCODE_SIGNPOSTS #define HAVE_ANY_SIGNPOST_IMPL 1 +#else +#define HAVE_ANY_SIGNPOST_IMPL 0 #endif SignpostEmitter::SignpostEmitter() { diff --git a/lib/Support/SpecialCaseList.cpp b/lib/Support/SpecialCaseList.cpp index 96e09f9552b..9bd1f18a4ee 100644 --- a/lib/Support/SpecialCaseList.cpp +++ b/lib/Support/SpecialCaseList.cpp @@ -53,7 +53,7 @@ bool SpecialCaseList::Matcher::insert(std::string Regexp, return false; RegExes.emplace_back( - std::make_pair(make_unique(std::move(CheckRE)), LineNumber)); + std::make_pair(std::make_unique(std::move(CheckRE)), LineNumber)); return true; } @@ -175,7 +175,7 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, // Create this section if it has not been seen before. if (SectionsMap.find(Section) == SectionsMap.end()) { - std::unique_ptr M = make_unique(); + std::unique_ptr M = std::make_unique(); std::string REError; if (!M->insert(Section, LineNo, REError)) { Error = (Twine("malformed section ") + Section + ": '" + REError).str(); diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp index e4f0535d21a..8b4177c7fba 100644 --- a/lib/Support/Statistic.cpp +++ b/lib/Support/Statistic.cpp @@ -57,7 +57,7 @@ namespace { /// This class is also used to look up statistic values from applications that /// use LLVM. class StatisticInfo { - std::vector Stats; + std::vector Stats; friend void llvm::PrintStatistics(); friend void llvm::PrintStatistics(raw_ostream &OS); @@ -66,14 +66,12 @@ class StatisticInfo { /// Sort statistics by debugtype,name,description. void sort(); public: - using const_iterator = std::vector::const_iterator; + using const_iterator = std::vector::const_iterator; StatisticInfo(); ~StatisticInfo(); - void addStatistic(Statistic *S) { - Stats.push_back(S); - } + void addStatistic(TrackingStatistic *S) { Stats.push_back(S); } const_iterator begin() const { return Stats.begin(); } const_iterator end() const { return Stats.end(); } @@ -90,7 +88,7 @@ static ManagedStatic > StatLock; /// RegisterStatistic - The first time a statistic is bumped, this method is /// called. -void Statistic::RegisterStatistic() { +void TrackingStatistic::RegisterStatistic() { // If stats are enabled, inform StatInfo that this statistic should be // printed. // llvm_shutdown calls destructors while holding the ManagedStatic mutex. @@ -135,15 +133,16 @@ bool llvm::AreStatisticsEnabled() { } void StatisticInfo::sort() { - llvm::stable_sort(Stats, [](const Statistic *LHS, const Statistic *RHS) { - if (int Cmp = std::strcmp(LHS->getDebugType(), RHS->getDebugType())) - return Cmp < 0; + llvm::stable_sort( + Stats, [](const TrackingStatistic *LHS, const TrackingStatistic *RHS) { + if (int Cmp = std::strcmp(LHS->getDebugType(), RHS->getDebugType())) + return Cmp < 0; - if (int Cmp = std::strcmp(LHS->getName(), RHS->getName())) - return Cmp < 0; + if (int Cmp = std::strcmp(LHS->getName(), RHS->getName())) + return Cmp < 0; - return std::strcmp(LHS->getDesc(), RHS->getDesc()) < 0; - }); + return std::strcmp(LHS->getDesc(), RHS->getDesc()) < 0; + }); } void StatisticInfo::reset() { @@ -207,7 +206,7 @@ void llvm::PrintStatisticsJSON(raw_ostream &OS) { // Print all of the statistics. OS << "{\n"; const char *delim = ""; - for (const Statistic *Stat : Stats.Stats) { + for (const TrackingStatistic *Stat : Stats.Stats) { OS << delim; assert(yaml::needsQuotes(Stat->getDebugType()) == yaml::QuotingType::None && "Statistic group/type name is simple."); diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp index bf28b2be565..af8dd463e12 100644 --- a/lib/Support/StringExtras.cpp +++ b/lib/Support/StringExtras.cpp @@ -60,7 +60,9 @@ void llvm::SplitString(StringRef Source, void llvm::printEscapedString(StringRef Name, raw_ostream &Out) { for (unsigned i = 0, e = Name.size(); i != e; ++i) { unsigned char C = Name[i]; - if (isPrint(C) && C != '\\' && C != '"') + if (C == '\\') + Out << '\\' << C; + else if (isPrint(C) && C != '"') Out << C; else Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F); diff --git a/lib/Support/TimeProfiler.cpp b/lib/Support/TimeProfiler.cpp index bc234081564..ca9119e30b6 100644 --- a/lib/Support/TimeProfiler.cpp +++ b/lib/Support/TimeProfiler.cpp @@ -24,29 +24,38 @@ using namespace std::chrono; namespace llvm { -static cl::opt TimeTraceGranularity( - "time-trace-granularity", - cl::desc( - "Minimum time granularity (in microseconds) traced by time profiler"), - cl::init(500)); - TimeTraceProfiler *TimeTraceProfilerInstance = nullptr; typedef duration DurationType; +typedef time_point TimePointType; typedef std::pair CountAndDurationType; typedef std::pair NameAndCountAndDurationType; struct Entry { - time_point Start; - DurationType Duration; + TimePointType Start; + TimePointType End; std::string Name; std::string Detail; - Entry(time_point &&S, DurationType &&D, std::string &&N, - std::string &&Dt) - : Start(std::move(S)), Duration(std::move(D)), Name(std::move(N)), + Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt) + : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), Detail(std::move(Dt)){}; + + // Calculate timings for FlameGraph. Cast time points to microsecond precision + // rather than casting duration. This avoid truncation issues causing inner + // scopes overruning outer scopes. + steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const { + return (time_point_cast(Start) - + time_point_cast(StartTime)) + .count(); + } + + steady_clock::rep getFlameGraphDurUs() const { + return (time_point_cast(End) - + time_point_cast(Start)) + .count(); + } }; struct TimeTraceProfiler { @@ -55,17 +64,27 @@ struct TimeTraceProfiler { } void begin(std::string Name, llvm::function_ref Detail) { - Stack.emplace_back(steady_clock::now(), DurationType{}, std::move(Name), + Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name), Detail()); } void end() { assert(!Stack.empty() && "Must call begin() first"); auto &E = Stack.back(); - E.Duration = steady_clock::now() - E.Start; + E.End = steady_clock::now(); - // Only include sections longer than TimeTraceGranularity msec. - if (duration_cast(E.Duration).count() > TimeTraceGranularity) + // Check that end times monotonically increase. + assert((Entries.empty() || + (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >= + Entries.back().getFlameGraphStartUs(StartTime) + + Entries.back().getFlameGraphDurUs())) && + "TimeProfiler scope ended earlier than previous scope"); + + // Calculate duration at full precision for overall counts. + DurationType Duration = E.End - E.Start; + + // Only include sections longer or equal to TimeTraceGranularity msec. + if (duration_cast(Duration).count() >= TimeTraceGranularity) Entries.emplace_back(E); // Track total time taken by each "name", but only the topmost levels of @@ -78,7 +97,7 @@ struct TimeTraceProfiler { }) == Stack.rend()) { auto &CountAndTotal = CountAndTotalPerName[E.Name]; CountAndTotal.first++; - CountAndTotal.second += E.Duration; + CountAndTotal.second += Duration; } Stack.pop_back(); @@ -94,8 +113,8 @@ struct TimeTraceProfiler { // Emit all events for the main flame graph. for (const auto &E : Entries) { - auto StartUs = duration_cast(E.Start - StartTime).count(); - auto DurUs = duration_cast(E.Duration).count(); + auto StartUs = E.getFlameGraphStartUs(StartTime); + auto DurUs = E.getFlameGraphDurUs(); J.object([&]{ J.attribute("pid", 1); @@ -160,13 +179,17 @@ struct TimeTraceProfiler { SmallVector Stack; SmallVector Entries; StringMap CountAndTotalPerName; - time_point StartTime; + TimePointType StartTime; + + // Minimum time granularity (in microseconds) + unsigned TimeTraceGranularity; }; -void timeTraceProfilerInitialize() { +void timeTraceProfilerInitialize(unsigned TimeTraceGranularity) { assert(TimeTraceProfilerInstance == nullptr && "Profiler should not be initialized"); TimeTraceProfilerInstance = new TimeTraceProfiler(); + TimeTraceProfilerInstance->TimeTraceGranularity = TimeTraceGranularity; } void timeTraceProfilerCleanup() { diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index 2a7ff1eaaf6..10c9b8e0b32 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -58,23 +58,23 @@ namespace { std::unique_ptr llvm::CreateInfoOutputFile() { const std::string &OutputFilename = getLibSupportInfoOutputFilename(); if (OutputFilename.empty()) - return llvm::make_unique(2, false); // stderr. + return std::make_unique(2, false); // stderr. if (OutputFilename == "-") - return llvm::make_unique(1, false); // stdout. + return std::make_unique(1, false); // stdout. // Append mode is used because the info output file is opened and closed // each time -stats or -time-passes wants to print output to it. To // compensate for this, the test-suite Makefiles have code to delete the // info output file before running commands which write to it. std::error_code EC; - auto Result = llvm::make_unique( - OutputFilename, EC, sys::fs::F_Append | sys::fs::F_Text); + auto Result = std::make_unique( + OutputFilename, EC, sys::fs::OF_Append | sys::fs::OF_Text); if (!EC) return Result; errs() << "Error opening info-output-file '" << OutputFilename << " for appending!\n"; - return llvm::make_unique(2, false); // stderr. + return std::make_unique(2, false); // stderr. } namespace { diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc index a0927da50e4..05f8e32896f 100644 --- a/lib/Support/Unix/Memory.inc +++ b/lib/Support/Unix/Memory.inc @@ -176,7 +176,7 @@ Memory::releaseMappedMemory(MemoryBlock &M) { std::error_code Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) { - static const size_t PageSize = Process::getPageSizeEstimate(); + static const Align PageSize = Align(Process::getPageSizeEstimate()); if (M.Address == nullptr || M.AllocatedSize == 0) return std::error_code(); @@ -184,8 +184,8 @@ Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) { return std::error_code(EINVAL, std::generic_category()); int Protect = getPosixProtectionFlags(Flags); - uintptr_t Start = alignAddr((uint8_t *)M.Address - PageSize + 1, PageSize); - uintptr_t End = alignAddr((uint8_t *)M.Address + M.AllocatedSize, PageSize); + uintptr_t Start = alignAddr((const uint8_t *)M.Address - PageSize.value() + 1, PageSize); + uintptr_t End = alignAddr((const uint8_t *)M.Address + M.AllocatedSize, PageSize); bool InvalidateCache = (Flags & MF_EXEC); diff --git a/lib/Support/Unix/Mutex.inc b/lib/Support/Unix/Mutex.inc deleted file mode 100644 index 2c982b38d6f..00000000000 --- a/lib/Support/Unix/Mutex.inc +++ /dev/null @@ -1,42 +0,0 @@ -//===- llvm/Support/Unix/Mutex.inc - Unix Mutex Implementation ---*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the Unix specific (non-pthread) Mutex class. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only generic UNIX code that -//=== is guaranteed to work on *all* UNIX variants. -//===----------------------------------------------------------------------===// - -namespace llvm -{ -using namespace sys; - -MutexImpl::MutexImpl( bool recursive) -{ -} - -MutexImpl::~MutexImpl() -{ -} - -bool -MutexImpl::release() -{ - return true; -} - -bool -MutexImpl::tryacquire( void ) -{ - return true; -} - -} diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index e80880c6b3c..a617eca3566 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -186,12 +186,12 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) { #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ defined(__minix) || defined(__DragonFly__) || \ defined(__FreeBSD_kernel__) || defined(_AIX) - StringRef curproc("/proc/curproc/file"); + const char *curproc = "/proc/curproc/file"; char exe_path[PATH_MAX]; // /proc is not mounted by default under FreeBSD, but gives more accurate // information than argv[0] when it is. if (sys::fs::exists(curproc)) { - ssize_t len = readlink(curproc.str().c_str(), exe_path, sizeof(exe_path)); + ssize_t len = readlink(curproc, exe_path, sizeof(exe_path)); if (len > 0) { // Null terminate the string for realpath. readlink never null // terminates its output. @@ -205,10 +205,10 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) { return exe_path; #elif defined(__linux__) || defined(__CYGWIN__) char exe_path[MAXPATHLEN]; - StringRef aPath("/proc/self/exe"); + const char *aPath = "/proc/self/exe"; if (sys::fs::exists(aPath)) { // /proc is not always mounted under Linux (chroot for example). - ssize_t len = readlink(aPath.str().c_str(), exe_path, sizeof(exe_path)); + ssize_t len = readlink(aPath, exe_path, sizeof(exe_path)); if (len < 0) return ""; @@ -443,7 +443,7 @@ static bool is_local_impl(struct STATVFS &Vfs) { std::unique_ptr Buf; int Tries = 3; while (Tries--) { - Buf = llvm::make_unique(BufSize); + Buf = std::make_unique(BufSize); Ret = mntctl(MCTL_QUERY, BufSize, Buf.get()); if (Ret != 0) break; @@ -833,7 +833,10 @@ std::error_code detail::directory_iterator_destruct(detail::DirIterState &it) { static file_type direntType(dirent* Entry) { // Most platforms provide the file type in the dirent: Linux/BSD/Mac. // The DTTOIF macro lets us reuse our status -> type conversion. -#if defined(_DIRENT_HAVE_D_TYPE) && defined(DTTOIF) + // Note that while glibc provides a macro to see if this is supported, + // _DIRENT_HAVE_D_TYPE, it's not defined on BSD/Mac, so we test for the + // d_type-to-mode_t conversion macro instead. +#if defined(DTTOIF) return typeForMode(DTTOIF(Entry->d_type)); #else // Other platforms such as Solaris require a stat() to get the type. @@ -884,9 +887,9 @@ static int nativeOpenFlags(CreationDisposition Disp, OpenFlags Flags, else if (Access == (FA_Read | FA_Write)) Result |= O_RDWR; - // This is for compatibility with old code that assumed F_Append implied + // This is for compatibility with old code that assumed OF_Append implied // would open an existing file. See Windows/Path.inc for a longer comment. - if (Flags & F_Append) + if (Flags & OF_Append) Disp = CD_OpenAlways; if (Disp == CD_CreateNew) { @@ -901,7 +904,7 @@ static int nativeOpenFlags(CreationDisposition Disp, OpenFlags Flags, // Nothing special, just don't add O_CREAT and we get these semantics. } - if (Flags & F_Append) + if (Flags & OF_Append) Result |= O_APPEND; #ifdef O_CLOEXEC @@ -996,44 +999,28 @@ file_t getStdinHandle() { return 0; } file_t getStdoutHandle() { return 1; } file_t getStderrHandle() { return 2; } -std::error_code readNativeFile(file_t FD, MutableArrayRef Buf, - size_t *BytesRead) { - *BytesRead = sys::RetryAfterSignal(-1, ::read, FD, Buf.data(), Buf.size()); - if (ssize_t(*BytesRead) == -1) - return std::error_code(errno, std::generic_category()); - return std::error_code(); +Expected readNativeFile(file_t FD, MutableArrayRef Buf) { + ssize_t NumRead = + sys::RetryAfterSignal(-1, ::read, FD, Buf.data(), Buf.size()); + if (ssize_t(NumRead) == -1) + return errorCodeToError(std::error_code(errno, std::generic_category())); + return NumRead; } -std::error_code readNativeFileSlice(file_t FD, MutableArrayRef Buf, - size_t Offset) { - char *BufPtr = Buf.data(); - size_t BytesLeft = Buf.size(); - -#ifndef HAVE_PREAD - // If we don't have pread, seek to Offset. - if (lseek(FD, Offset, SEEK_SET) == -1) - return std::error_code(errno, std::generic_category()); -#endif - - while (BytesLeft) { +Expected readNativeFileSlice(file_t FD, MutableArrayRef Buf, + uint64_t Offset) { #ifdef HAVE_PREAD - ssize_t NumRead = sys::RetryAfterSignal(-1, ::pread, FD, BufPtr, BytesLeft, - Buf.size() - BytesLeft + Offset); + ssize_t NumRead = + sys::RetryAfterSignal(-1, ::pread, FD, Buf.data(), Buf.size(), Offset); #else - ssize_t NumRead = sys::RetryAfterSignal(-1, ::read, FD, BufPtr, BytesLeft); + if (lseek(FD, Offset, SEEK_SET) == -1) + return errorCodeToError(std::error_code(errno, std::generic_category())); + ssize_t NumRead = + sys::RetryAfterSignal(-1, ::read, FD, Buf.data(), Buf.size()); #endif - if (NumRead == -1) { - // Error while reading. - return std::error_code(errno, std::generic_category()); - } - if (NumRead == 0) { - memset(BufPtr, 0, BytesLeft); // zero-initialize rest of the buffer. - break; - } - BytesLeft -= NumRead; - BufPtr += NumRead; - } - return std::error_code(); + if (NumRead == -1) + return errorCodeToError(std::error_code(errno, std::generic_category())); + return NumRead; } std::error_code closeFile(file_t &F) { @@ -1200,7 +1187,7 @@ namespace fs { /// implementation. std::error_code copy_file(const Twine &From, const Twine &To) { uint32_t Flag = COPYFILE_DATA; -#if __has_builtin(__builtin_available) +#if __has_builtin(__builtin_available) && defined(COPYFILE_CLONE) if (__builtin_available(macos 10.12, *)) { bool IsSymlink; if (std::error_code Error = is_symlink_file(From, IsSymlink)) diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc index 4115ee39658..dfe81d7e283 100644 --- a/lib/Support/Unix/Process.inc +++ b/lib/Support/Unix/Process.inc @@ -15,8 +15,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Mutex.h" -#include "llvm/Support/MutexGuard.h" +#include #if HAVE_FCNTL_H #include #endif @@ -327,13 +326,13 @@ extern "C" int tigetnum(char *capname); #endif #ifdef HAVE_TERMINFO -static ManagedStatic TermColorMutex; +static ManagedStatic TermColorMutex; #endif static bool terminalHasColors(int fd) { #ifdef HAVE_TERMINFO // First, acquire a global lock because these C routines are thread hostile. - MutexGuard G(*TermColorMutex); + std::lock_guard G(*TermColorMutex); int errret = 0; if (setupterm(nullptr, fd, &errret) != 0) diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc index c4123a64046..520685a0e98 100644 --- a/lib/Support/Unix/Program.inc +++ b/lib/Support/Unix/Program.inc @@ -136,7 +136,7 @@ static bool RedirectIO_PS(const std::string *Path, int FD, std::string *ErrMsg, if (int Err = posix_spawn_file_actions_addopen( FileActions, FD, File, FD == 0 ? O_RDONLY : O_WRONLY | O_CREAT, 0666)) - return MakeErrMsg(ErrMsg, "Cannot dup2", Err); + return MakeErrMsg(ErrMsg, "Cannot posix_spawn_file_actions_addopen", Err); return false; } #endif @@ -444,7 +444,7 @@ std::error_code llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents, WindowsEncodingMethod Encoding /*unused*/) { std::error_code EC; - llvm::raw_fd_ostream OS(FileName, EC, llvm::sys::fs::OpenFlags::F_Text); + llvm::raw_fd_ostream OS(FileName, EC, llvm::sys::fs::OpenFlags::OF_Text); if (EC) return EC; diff --git a/lib/Support/Unix/RWMutex.inc b/lib/Support/Unix/RWMutex.inc deleted file mode 100644 index 8b47dfa0f85..00000000000 --- a/lib/Support/Unix/RWMutex.inc +++ /dev/null @@ -1,50 +0,0 @@ -//= llvm/Support/Unix/RWMutex.inc - Unix Reader/Writer Mutual Exclusion Lock =// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the Unix specific (non-pthread) RWMutex class. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only generic UNIX code that -//=== is guaranteed to work on *all* UNIX variants. -//===----------------------------------------------------------------------===// - -#include "llvm/Support/Mutex.h" - -namespace llvm { - -using namespace sys; - -// This naive implementation treats readers the same as writers. This -// will therefore deadlock if a thread tries to acquire a read lock -// multiple times. - -RWMutexImpl::RWMutexImpl() : data_(new MutexImpl(false)) { } - -RWMutexImpl::~RWMutexImpl() { - delete static_cast(data_); -} - -bool RWMutexImpl::reader_acquire() { - return static_cast(data_)->acquire(); -} - -bool RWMutexImpl::reader_release() { - return static_cast(data_)->release(); -} - -bool RWMutexImpl::writer_acquire() { - return static_cast(data_)->acquire(); -} - -bool RWMutexImpl::writer_release() { - return static_cast(data_)->release(); -} - -} diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index 634c16aa36c..5e0cde4a81e 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -43,7 +43,6 @@ #include "llvm/Support/Mutex.h" #include "llvm/Support/Program.h" #include "llvm/Support/SaveAndRestore.h" -#include "llvm/Support/UniqueLock.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -83,12 +82,18 @@ using namespace llvm; static RETSIGTYPE SignalHandler(int Sig); // defined below. static RETSIGTYPE InfoSignalHandler(int Sig); // defined below. +static void DefaultPipeSignalFunction() { + exit(EX_IOERR); +} + using SignalHandlerFunctionType = void (*)(); /// The function to call if ctrl-c is pressed. static std::atomic InterruptFunction = ATOMIC_VAR_INIT(nullptr); static std::atomic InfoSignalFunction = ATOMIC_VAR_INIT(nullptr); +static std::atomic PipeSignalFunction = + ATOMIC_VAR_INIT(DefaultPipeSignalFunction); namespace { /// Signal-safe removal of files. @@ -364,7 +369,8 @@ static RETSIGTYPE SignalHandler(int Sig) { // Send a special return code that drivers can check for, from sysexits.h. if (Sig == SIGPIPE) - exit(EX_IOERR); + if (SignalHandlerFunctionType CurrentPipeFunction = PipeSignalFunction) + CurrentPipeFunction(); raise(Sig); // Execute the default handler. return; @@ -404,6 +410,11 @@ void llvm::sys::SetInfoSignalFunction(void (*Handler)()) { RegisterHandlers(); } +void llvm::sys::SetPipeSignalFunction(void (*Handler)()) { + PipeSignalFunction.exchange(Handler); + RegisterHandlers(); +} + // The public API bool llvm::sys::RemoveFileOnSignal(StringRef Filename, std::string* ErrMsg) { diff --git a/lib/Support/VirtualFileSystem.cpp b/lib/Support/VirtualFileSystem.cpp index 5d3480e9714..c390cb1b222 100644 --- a/lib/Support/VirtualFileSystem.cpp +++ b/lib/Support/VirtualFileSystem.cpp @@ -176,9 +176,9 @@ class RealFile : public File { Status S; std::string RealName; - RealFile(file_t FD, StringRef NewName, StringRef NewRealPathName) - : FD(FD), S(NewName, {}, {}, {}, {}, {}, - llvm::sys::fs::file_type::status_error, {}), + RealFile(file_t RawFD, StringRef NewName, StringRef NewRealPathName) + : FD(RawFD), S(NewName, {}, {}, {}, {}, {}, + llvm::sys::fs::file_type::status_error, {}), RealName(NewRealPathName.str()) { assert(FD != kInvalidFile && "Invalid or inactive file descriptor"); } @@ -349,7 +349,7 @@ IntrusiveRefCntPtr vfs::getRealFileSystem() { } std::unique_ptr vfs::createPhysicalFileSystem() { - return llvm::make_unique(false); + return std::make_unique(false); } namespace { @@ -754,7 +754,7 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime, ResolvedUser, ResolvedGroup, 0, sys::fs::file_type::directory_file, NewDirectoryPerms); Dir = cast(Dir->addChild( - Name, llvm::make_unique(std::move(Stat)))); + Name, std::make_unique(std::move(Stat)))); continue; } @@ -989,6 +989,16 @@ std::error_code InMemoryFileSystem::isLocal(const Twine &Path, bool &Result) { // RedirectingFileSystem implementation //===-----------------------------------------------------------------------===/ +RedirectingFileSystem::RedirectingFileSystem(IntrusiveRefCntPtr FS) + : ExternalFS(std::move(FS)) { + if (ExternalFS) + if (auto ExternalWorkingDirectory = + ExternalFS->getCurrentWorkingDirectory()) { + WorkingDirectory = *ExternalWorkingDirectory; + ExternalFSValidWD = true; + } +} + // FIXME: reuse implementation common with OverlayFSDirIterImpl as these // iterators are conceptually similar. class llvm::vfs::VFSFromYamlDirIterImpl @@ -1035,12 +1045,27 @@ public: llvm::ErrorOr RedirectingFileSystem::getCurrentWorkingDirectory() const { - return ExternalFS->getCurrentWorkingDirectory(); + return WorkingDirectory; } std::error_code RedirectingFileSystem::setCurrentWorkingDirectory(const Twine &Path) { - return ExternalFS->setCurrentWorkingDirectory(Path); + // Don't change the working directory if the path doesn't exist. + if (!exists(Path)) + return errc::no_such_file_or_directory; + + // Always change the external FS but ignore its result. + if (ExternalFS) { + auto EC = ExternalFS->setCurrentWorkingDirectory(Path); + ExternalFSValidWD = !static_cast(EC); + } + + SmallString<128> AbsolutePath; + Path.toVector(AbsolutePath); + if (std::error_code EC = makeAbsolute(AbsolutePath)) + return EC; + WorkingDirectory = AbsolutePath.str(); + return {}; } std::error_code RedirectingFileSystem::isLocal(const Twine &Path, @@ -1053,7 +1078,7 @@ directory_iterator RedirectingFileSystem::dir_begin(const Twine &Dir, ErrorOr E = lookupPath(Dir); if (!E) { EC = E.getError(); - if (IsFallthrough && EC == errc::no_such_file_or_directory) + if (shouldUseExternalFS() && EC == errc::no_such_file_or_directory) return ExternalFS->dir_begin(Dir, EC); return {}; } @@ -1071,7 +1096,7 @@ directory_iterator RedirectingFileSystem::dir_begin(const Twine &Dir, auto *D = cast(*E); return directory_iterator(std::make_shared( Dir, D->contents_begin(), D->contents_end(), - /*IterateExternalFS=*/IsFallthrough, *ExternalFS, EC)); + /*IterateExternalFS=*/shouldUseExternalFS(), *ExternalFS, EC)); } void RedirectingFileSystem::setExternalContentsPrefixDir(StringRef PrefixDir) { @@ -1082,20 +1107,19 @@ StringRef RedirectingFileSystem::getExternalContentsPrefixDir() const { return ExternalContentsPrefixDir; } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void RedirectingFileSystem::dump() const { +void RedirectingFileSystem::dump(raw_ostream &OS) const { for (const auto &Root : Roots) - dumpEntry(Root.get()); + dumpEntry(OS, Root.get()); } -LLVM_DUMP_METHOD void -RedirectingFileSystem::dumpEntry(RedirectingFileSystem::Entry *E, - int NumSpaces) const { +void RedirectingFileSystem::dumpEntry(raw_ostream &OS, + RedirectingFileSystem::Entry *E, + int NumSpaces) const { StringRef Name = E->getName(); for (int i = 0, e = NumSpaces; i < e; ++i) - dbgs() << " "; - dbgs() << "'" << Name.str().c_str() << "'" - << "\n"; + OS << " "; + OS << "'" << Name.str().c_str() << "'" + << "\n"; if (E->getKind() == RedirectingFileSystem::EK_Directory) { auto *DE = dyn_cast(E); @@ -1103,9 +1127,12 @@ RedirectingFileSystem::dumpEntry(RedirectingFileSystem::Entry *E, for (std::unique_ptr &SubEntry : llvm::make_range(DE->contents_begin(), DE->contents_end())) - dumpEntry(SubEntry.get(), NumSpaces + 2); + dumpEntry(OS, SubEntry.get(), NumSpaces + 2); } } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void RedirectingFileSystem::dump() const { dump(dbgs()); } #endif /// A helper class to hold the common YAML parsing state. @@ -1209,7 +1236,7 @@ class llvm::vfs::RedirectingFileSystemParser { // ... or create a new one std::unique_ptr E = - llvm::make_unique( + std::make_unique( Name, Status("", getNextVirtualUniqueID(), std::chrono::system_clock::now(), 0, 0, 0, file_type::directory_file, sys::fs::all_all)); @@ -1221,7 +1248,7 @@ class llvm::vfs::RedirectingFileSystemParser { } auto *DE = - dyn_cast(ParentEntry); + cast(ParentEntry); DE->addContent(std::move(E)); return DE->getLastContent(); } @@ -1232,9 +1259,7 @@ class llvm::vfs::RedirectingFileSystemParser { StringRef Name = SrcE->getName(); switch (SrcE->getKind()) { case RedirectingFileSystem::EK_Directory: { - auto *DE = - dyn_cast(SrcE); - assert(DE && "Must be a directory"); + auto *DE = cast(SrcE); // Empty directories could be present in the YAML as a way to // describe a file for a current directory after some of its subdir // is parsed. This only leads to redundant walks, ignore it. @@ -1246,13 +1271,12 @@ class llvm::vfs::RedirectingFileSystemParser { break; } case RedirectingFileSystem::EK_File: { - auto *FE = dyn_cast(SrcE); - assert(FE && "Must be a file"); assert(NewParentE && "Parent entry must exist"); - auto *DE = dyn_cast( - NewParentE); + auto *FE = cast(SrcE); + auto *DE = + cast(NewParentE); DE->addContent( - llvm::make_unique( + std::make_unique( Name, FE->getExternalContentsPath(), FE->getUseName())); break; } @@ -1423,12 +1447,12 @@ class llvm::vfs::RedirectingFileSystemParser { std::unique_ptr Result; switch (Kind) { case RedirectingFileSystem::EK_File: - Result = llvm::make_unique( + Result = std::make_unique( LastComponent, std::move(ExternalContentsPath), UseExternalName); break; case RedirectingFileSystem::EK_Directory: Result = - llvm::make_unique( + std::make_unique( LastComponent, std::move(EntryArrayContents), Status("", getNextVirtualUniqueID(), std::chrono::system_clock::now(), 0, 0, 0, @@ -1447,7 +1471,7 @@ class llvm::vfs::RedirectingFileSystemParser { std::vector> Entries; Entries.push_back(std::move(Result)); Result = - llvm::make_unique( + std::make_unique( *I, std::move(Entries), Status("", getNextVirtualUniqueID(), std::chrono::system_clock::now(), 0, 0, 0, @@ -1573,7 +1597,7 @@ RedirectingFileSystem::create(std::unique_ptr Buffer, RedirectingFileSystemParser P(Stream); std::unique_ptr FS( - new RedirectingFileSystem(std::move(ExternalFS))); + new RedirectingFileSystem(ExternalFS)); if (!YAMLFilePath.empty()) { // Use the YAML path from -ivfsoverlay to compute the dir to be prefixed @@ -1702,7 +1726,7 @@ ErrorOr RedirectingFileSystem::status(const Twine &Path, ErrorOr RedirectingFileSystem::status(const Twine &Path) { ErrorOr Result = lookupPath(Path); if (!Result) { - if (IsFallthrough && + if (shouldUseExternalFS() && Result.getError() == llvm::errc::no_such_file_or_directory) { return ExternalFS->status(Path); } @@ -1740,7 +1764,7 @@ ErrorOr> RedirectingFileSystem::openFileForRead(const Twine &Path) { ErrorOr E = lookupPath(Path); if (!E) { - if (IsFallthrough && + if (shouldUseExternalFS() && E.getError() == llvm::errc::no_such_file_or_directory) { return ExternalFS->openFileForRead(Path); } @@ -1763,7 +1787,7 @@ RedirectingFileSystem::openFileForRead(const Twine &Path) { Status S = getRedirectedFileStatus(Path, F->useExternalName(UseExternalNames), *ExternalStatus); return std::unique_ptr( - llvm::make_unique(std::move(*Result), S)); + std::make_unique(std::move(*Result), S)); } std::error_code @@ -1771,7 +1795,7 @@ RedirectingFileSystem::getRealPath(const Twine &Path, SmallVectorImpl &Output) const { ErrorOr Result = lookupPath(Path); if (!Result) { - if (IsFallthrough && + if (shouldUseExternalFS() && Result.getError() == llvm::errc::no_such_file_or_directory) { return ExternalFS->getRealPath(Path, Output); } @@ -1784,8 +1808,8 @@ RedirectingFileSystem::getRealPath(const Twine &Path, } // Even if there is a directory entry, fall back to ExternalFS if allowed, // because directories don't have a single external contents path. - return IsFallthrough ? ExternalFS->getRealPath(Path, Output) - : llvm::errc::invalid_argument; + return shouldUseExternalFS() ? ExternalFS->getRealPath(Path, Output) + : llvm::errc::invalid_argument; } IntrusiveRefCntPtr diff --git a/lib/Support/Windows/Mutex.inc b/lib/Support/Windows/Mutex.inc deleted file mode 100644 index b55b14febf2..00000000000 --- a/lib/Support/Windows/Mutex.inc +++ /dev/null @@ -1,56 +0,0 @@ -//===- llvm/Support/Win32/Mutex.inc - Win32 Mutex Implementation -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the Win32 specific (non-pthread) Mutex class. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only generic Win32 code that -//=== is guaranteed to work on *all* Win32 variants. -//===----------------------------------------------------------------------===// - -#include "WindowsSupport.h" -#include "llvm/Support/Mutex.h" - -namespace llvm { - -sys::MutexImpl::MutexImpl(bool /*recursive*/) -{ - data_ = new CRITICAL_SECTION; - InitializeCriticalSection((LPCRITICAL_SECTION)data_); -} - -sys::MutexImpl::~MutexImpl() -{ - DeleteCriticalSection((LPCRITICAL_SECTION)data_); - delete (LPCRITICAL_SECTION)data_; - data_ = 0; -} - -bool -sys::MutexImpl::acquire() -{ - EnterCriticalSection((LPCRITICAL_SECTION)data_); - return true; -} - -bool -sys::MutexImpl::release() -{ - LeaveCriticalSection((LPCRITICAL_SECTION)data_); - return true; -} - -bool -sys::MutexImpl::tryacquire() -{ - return TryEnterCriticalSection((LPCRITICAL_SECTION)data_); -} - -} diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index 5704930aeec..c3b13abef5d 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -371,13 +371,19 @@ static std::error_code realPathFromHandle(HANDLE H, if (std::error_code EC = realPathFromHandle(H, Buffer)) return EC; - const wchar_t *Data = Buffer.data(); + // Strip the \\?\ prefix. We don't want it ending up in output, and such + // paths don't get canonicalized by file APIs. + wchar_t *Data = Buffer.data(); DWORD CountChars = Buffer.size(); - if (CountChars >= 4) { - if (0 == ::memcmp(Data, L"\\\\?\\", 8)) { - CountChars -= 4; - Data += 4; - } + if (CountChars >= 8 && ::memcmp(Data, L"\\\\?\\UNC\\", 16) == 0) { + // Convert \\?\UNC\foo\bar to \\foo\bar + CountChars -= 6; + Data += 6; + Data[0] = '\\'; + } else if (CountChars >= 4 && ::memcmp(Data, L"\\\\?\\", 8) == 0) { + // Convert \\?\c:\foo to c:\foo + CountChars -= 4; + Data += 4; } // Convert the result from UTF-16 to UTF-8. @@ -1217,57 +1223,34 @@ file_t getStdinHandle() { return ::GetStdHandle(STD_INPUT_HANDLE); } file_t getStdoutHandle() { return ::GetStdHandle(STD_OUTPUT_HANDLE); } file_t getStderrHandle() { return ::GetStdHandle(STD_ERROR_HANDLE); } -std::error_code readNativeFileImpl(file_t FileHandle, char *BufPtr, size_t BytesToRead, - size_t *BytesRead, OVERLAPPED *Overlap) { +Expected readNativeFileImpl(file_t FileHandle, + MutableArrayRef Buf, + OVERLAPPED *Overlap) { // ReadFile can only read 2GB at a time. The caller should check the number of // bytes and read in a loop until termination. - DWORD BytesToRead32 = - std::min(size_t(std::numeric_limits::max()), BytesToRead); - DWORD BytesRead32 = 0; - bool Success = - ::ReadFile(FileHandle, BufPtr, BytesToRead32, &BytesRead32, Overlap); - *BytesRead = BytesRead32; - if (!Success) { - DWORD Err = ::GetLastError(); - // Pipe EOF is not an error. - if (Err == ERROR_BROKEN_PIPE) - return std::error_code(); - return mapWindowsError(Err); - } - return std::error_code(); + DWORD BytesToRead = + std::min(size_t(std::numeric_limits::max()), Buf.size()); + DWORD BytesRead = 0; + if (::ReadFile(FileHandle, Buf.data(), BytesToRead, &BytesRead, Overlap)) + return BytesRead; + DWORD Err = ::GetLastError(); + // EOF is not an error. + if (Err == ERROR_BROKEN_PIPE || Err == ERROR_HANDLE_EOF) + return BytesRead; + return errorCodeToError(mapWindowsError(Err)); } -std::error_code readNativeFile(file_t FileHandle, MutableArrayRef Buf, - size_t *BytesRead) { - return readNativeFileImpl(FileHandle, Buf.data(), Buf.size(), BytesRead, - /*Overlap=*/nullptr); +Expected readNativeFile(file_t FileHandle, MutableArrayRef Buf) { + return readNativeFileImpl(FileHandle, Buf, /*Overlap=*/nullptr); } -std::error_code readNativeFileSlice(file_t FileHandle, - MutableArrayRef Buf, size_t Offset) { - char *BufPtr = Buf.data(); - size_t BytesLeft = Buf.size(); - - while (BytesLeft) { - uint64_t CurOff = Buf.size() - BytesLeft + Offset; - OVERLAPPED Overlapped = {}; - Overlapped.Offset = uint32_t(CurOff); - Overlapped.OffsetHigh = uint32_t(uint64_t(CurOff) >> 32); - - size_t BytesRead = 0; - if (auto EC = readNativeFileImpl(FileHandle, BufPtr, BytesLeft, &BytesRead, - &Overlapped)) - return EC; - - // Once we reach EOF, zero the remaining bytes in the buffer. - if (BytesRead == 0) { - memset(BufPtr, 0, BytesLeft); - break; - } - BytesLeft -= BytesRead; - BufPtr += BytesRead; - } - return std::error_code(); +Expected readNativeFileSlice(file_t FileHandle, + MutableArrayRef Buf, + uint64_t Offset) { + OVERLAPPED Overlapped = {}; + Overlapped.Offset = uint32_t(Offset); + Overlapped.OffsetHigh = uint32_t(Offset >> 32); + return readNativeFileImpl(FileHandle, Buf, &Overlapped); } std::error_code closeFile(file_t &F) { diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc index 0f54e59ee55..a23ed95fc39 100644 --- a/lib/Support/Windows/Program.inc +++ b/lib/Support/Windows/Program.inc @@ -470,7 +470,7 @@ std::error_code llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents, WindowsEncodingMethod Encoding) { std::error_code EC; - llvm::raw_fd_ostream OS(FileName, EC, llvm::sys::fs::F_Text); + llvm::raw_fd_ostream OS(FileName, EC, llvm::sys::fs::OF_Text); if (EC) return EC; diff --git a/lib/Support/Windows/RWMutex.inc b/lib/Support/Windows/RWMutex.inc deleted file mode 100644 index 8df9bc39416..00000000000 --- a/lib/Support/Windows/RWMutex.inc +++ /dev/null @@ -1,128 +0,0 @@ -//= llvm/Support/Win32/Mutex.inc - Win32 Reader/Writer Mutual Exclusion Lock =// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the Win32 specific (non-pthread) RWMutex class. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only generic Win32 code that -//=== is guaranteed to work on *all* Win32 variants. -//===----------------------------------------------------------------------===// - -#include "WindowsSupport.h" - -namespace llvm { - -// Windows has slim read-writer lock support on Vista and higher, so we -// will attempt to load the APIs. If they exist, we will use them, and -// if not, we will fall back on critical sections. When we drop support -// for XP, we can stop lazy-loading these APIs and just use them directly. -#if defined(__MINGW32__) - // Taken from WinNT.h - typedef struct _RTL_SRWLOCK { - PVOID Ptr; - } RTL_SRWLOCK, *PRTL_SRWLOCK; - - // Taken from WinBase.h - typedef RTL_SRWLOCK SRWLOCK, *PSRWLOCK; -#endif - -static VOID (WINAPI *fpInitializeSRWLock)(PSRWLOCK lock) = NULL; -static VOID (WINAPI *fpAcquireSRWLockExclusive)(PSRWLOCK lock) = NULL; -static VOID (WINAPI *fpAcquireSRWLockShared)(PSRWLOCK lock) = NULL; -static VOID (WINAPI *fpReleaseSRWLockExclusive)(PSRWLOCK lock) = NULL; -static VOID (WINAPI *fpReleaseSRWLockShared)(PSRWLOCK lock) = NULL; - -static bool sHasSRW = false; - -static bool loadSRW() { - static bool sChecked = false; - if (!sChecked) { - sChecked = true; - - if (HMODULE hLib = ::GetModuleHandleW(L"Kernel32.dll")) { - fpInitializeSRWLock = - (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib, - "InitializeSRWLock"); - fpAcquireSRWLockExclusive = - (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib, - "AcquireSRWLockExclusive"); - fpAcquireSRWLockShared = - (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib, - "AcquireSRWLockShared"); - fpReleaseSRWLockExclusive = - (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib, - "ReleaseSRWLockExclusive"); - fpReleaseSRWLockShared = - (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib, - "ReleaseSRWLockShared"); - - if (fpInitializeSRWLock != NULL) { - sHasSRW = true; - } - } - } - return sHasSRW; -} - -sys::RWMutexImpl::RWMutexImpl() { - if (loadSRW()) { - data_ = safe_calloc(1, sizeof(SRWLOCK)); - fpInitializeSRWLock(static_cast(data_)); - } else { - data_ = safe_calloc(1, sizeof(CRITICAL_SECTION)); - InitializeCriticalSection(static_cast(data_)); - } -} - -sys::RWMutexImpl::~RWMutexImpl() { - if (!sHasSRW) - DeleteCriticalSection(static_cast(data_)); - // Nothing to do in the case of slim reader/writers except free the memory. - free(data_); -} - -bool sys::RWMutexImpl::reader_acquire() { - if (sHasSRW) { - fpAcquireSRWLockShared(static_cast(data_)); - } else { - EnterCriticalSection(static_cast(data_)); - } - return true; -} - -bool sys::RWMutexImpl::reader_release() { - if (sHasSRW) { - fpReleaseSRWLockShared(static_cast(data_)); - } else { - LeaveCriticalSection(static_cast(data_)); - } - return true; -} - -bool sys::RWMutexImpl::writer_acquire() { - if (sHasSRW) { - fpAcquireSRWLockExclusive(static_cast(data_)); - } else { - EnterCriticalSection(static_cast(data_)); - } - return true; -} - -bool sys::RWMutexImpl::writer_release() { - if (sHasSRW) { - fpReleaseSRWLockExclusive(static_cast(data_)); - } else { - LeaveCriticalSection(static_cast(data_)); - } - return true; -} - - -} diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc index 6a820ef22b1..d962daf7934 100644 --- a/lib/Support/Windows/Signals.inc +++ b/lib/Support/Windows/Signals.inc @@ -560,6 +560,9 @@ void llvm::sys::SetInfoSignalFunction(void (*Handler)()) { // Unimplemented. } +void llvm::sys::SetPipeSignalFunction(void (*Handler)()) { + // Unimplemented. +} /// Add a function to be called when a signal is delivered to the process. The /// handler can have a cookie passed to it to identify what instance of the diff --git a/lib/Support/Windows/WindowsSupport.h b/lib/Support/Windows/WindowsSupport.h index fed9b2f462e..2e2e97430b7 100644 --- a/lib/Support/Windows/WindowsSupport.h +++ b/lib/Support/Windows/WindowsSupport.h @@ -38,6 +38,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Config/config.h" // Get build system configuration settings +#include "llvm/Support/Allocator.h" #include "llvm/Support/Chrono.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/VersionTuple.h" diff --git a/lib/Support/Windows/explicit_symbols.inc b/lib/Support/Windows/explicit_symbols.inc index bbbf7ea6a77..0a4fda1d4e8 100644 --- a/lib/Support/Windows/explicit_symbols.inc +++ b/lib/Support/Windows/explicit_symbols.inc @@ -90,12 +90,6 @@ INLINE_DEF_FLOAT_SYMBOL(tanf, 1) INLINE_DEF_FLOAT_SYMBOL(tanhf, 1) - // These were added in VS 2013. -#if (1800 <= _MSC_VER && _MSC_VER < 1900) - INLINE_DEF_FLOAT_SYMBOL(copysignf, 2) - INLINE_DEF_FLOAT_SYMBOL(fminf, 2) - INLINE_DEF_FLOAT_SYMBOL(fmaxf, 2) -#endif #undef INLINE_DEF_FLOAT_SYMBOL #endif diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp index 09eb36943de..eba22fd1472 100644 --- a/lib/Support/YAMLTraits.cpp +++ b/lib/Support/YAMLTraits.cpp @@ -40,7 +40,7 @@ IO::IO(void *Context) : Ctxt(Context) {} IO::~IO() = default; -void *IO::getContext() { +void *IO::getContext() const { return Ctxt; } @@ -79,7 +79,7 @@ void Input::ScalarHNode::anchor() {} void Input::MapHNode::anchor() {} void Input::SequenceHNode::anchor() {} -bool Input::outputting() { +bool Input::outputting() const { return false; } @@ -377,12 +377,12 @@ std::unique_ptr Input::createHNodes(Node *N) { // Copy string to permanent storage KeyStr = StringStorage.str().copy(StringAllocator); } - return llvm::make_unique(N, KeyStr); + return std::make_unique(N, KeyStr); } else if (BlockScalarNode *BSN = dyn_cast(N)) { StringRef ValueCopy = BSN->getValue().copy(StringAllocator); - return llvm::make_unique(N, ValueCopy); + return std::make_unique(N, ValueCopy); } else if (SequenceNode *SQ = dyn_cast(N)) { - auto SQHNode = llvm::make_unique(N); + auto SQHNode = std::make_unique(N); for (Node &SN : *SQ) { auto Entry = createHNodes(&SN); if (EC) @@ -391,7 +391,7 @@ std::unique_ptr Input::createHNodes(Node *N) { } return std::move(SQHNode); } else if (MappingNode *Map = dyn_cast(N)) { - auto mapHNode = llvm::make_unique(N); + auto mapHNode = std::make_unique(N); for (KeyValueNode &KVN : *Map) { Node *KeyNode = KVN.getKey(); ScalarNode *Key = dyn_cast(KeyNode); @@ -416,7 +416,7 @@ std::unique_ptr Input::createHNodes(Node *N) { } return std::move(mapHNode); } else if (isa(N)) { - return llvm::make_unique(N); + return std::make_unique(N); } else { setError(N, "unknown node kind"); return nullptr; @@ -440,7 +440,7 @@ Output::Output(raw_ostream &yout, void *context, int WrapColumn) Output::~Output() = default; -bool Output::outputting() { +bool Output::outputting() const { return true; } diff --git a/lib/Support/Z3Solver.cpp b/lib/Support/Z3Solver.cpp index f1a6fdf87cf..a83d0f441a4 100644 --- a/lib/Support/Z3Solver.cpp +++ b/lib/Support/Z3Solver.cpp @@ -886,7 +886,7 @@ public: llvm::SMTSolverRef llvm::CreateZ3Solver() { #if LLVM_WITH_Z3 - return llvm::make_unique(); + return std::make_unique(); #else llvm::report_fatal_error("LLVM was not compiled with Z3 support, rebuild " "with -DLLVM_ENABLE_Z3_SOLVER=ON", diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 2baccaa0cbd..b9989371f5e 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -65,6 +65,17 @@ using namespace llvm; +const raw_ostream::Colors raw_ostream::BLACK; +const raw_ostream::Colors raw_ostream::RED; +const raw_ostream::Colors raw_ostream::GREEN; +const raw_ostream::Colors raw_ostream::YELLOW; +const raw_ostream::Colors raw_ostream::BLUE; +const raw_ostream::Colors raw_ostream::MAGENTA; +const raw_ostream::Colors raw_ostream::CYAN; +const raw_ostream::Colors raw_ostream::WHITE; +const raw_ostream::Colors raw_ostream::SAVEDCOLOR; +const raw_ostream::Colors raw_ostream::RESET; + raw_ostream::~raw_ostream() { // raw_ostream's subclasses should take care to flush the buffer // in their destructors. @@ -133,6 +144,14 @@ raw_ostream &raw_ostream::write_hex(unsigned long long N) { return *this; } +raw_ostream &raw_ostream::operator<<(Colors C) { + if (C == Colors::RESET) + resetColor(); + else + changeColor(C); + return *this; +} + raw_ostream &raw_ostream::write_uuid(const uuid_t UUID) { for (int Idx = 0; Idx < 16; ++Idx) { *this << format("%02" PRIX32, UUID[Idx]); @@ -784,11 +803,15 @@ size_t raw_fd_ostream::preferred_buffer_size() const { raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold, bool bg) { + if (!ColorEnabled) + return *this; + if (sys::Process::ColorNeedsFlush()) flush(); const char *colorcode = - (colors == SAVEDCOLOR) ? sys::Process::OutputBold(bg) - : sys::Process::OutputColor(colors, bold, bg); + (colors == SAVEDCOLOR) + ? sys::Process::OutputBold(bg) + : sys::Process::OutputColor(static_cast(colors), bold, bg); if (colorcode) { size_t len = strlen(colorcode); write(colorcode, len); @@ -799,6 +822,9 @@ raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold, } raw_ostream &raw_fd_ostream::resetColor() { + if (!ColorEnabled) + return *this; + if (sys::Process::ColorNeedsFlush()) flush(); const char *colorcode = sys::Process::ResetColor(); @@ -812,6 +838,9 @@ raw_ostream &raw_fd_ostream::resetColor() { } raw_ostream &raw_fd_ostream::reverseColor() { + if (!ColorEnabled) + return *this; + if (sys::Process::ColorNeedsFlush()) flush(); const char *colorcode = sys::Process::OutputReverse(); @@ -843,7 +872,7 @@ void raw_fd_ostream::anchor() {} raw_ostream &llvm::outs() { // Set buffer settings to model stdout behavior. std::error_code EC; - static raw_fd_ostream S("-", EC, sys::fs::F_None); + static raw_fd_ostream S("-", EC, sys::fs::OF_None); assert(!EC); return S; } diff --git a/lib/Support/regcomp.c b/lib/Support/regcomp.c index 12669ab75d1..ee2a1d87a26 100644 --- a/lib/Support/regcomp.c +++ b/lib/Support/regcomp.c @@ -48,6 +48,7 @@ #include "regex2.h" #include "llvm/Config/config.h" +#include "llvm/Support/Compiler.h" /* character-class table */ static struct cclass { @@ -537,7 +538,7 @@ p_ere_exp(struct parse *p) break; case '{': /* okay as ordinary except if digit follows */ REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT); - /* FALLTHROUGH */ + LLVM_FALLTHROUGH; default: ordinary(p, c); break; @@ -733,7 +734,7 @@ p_simp_re(struct parse *p, break; case '*': REQUIRE(starordinary, REG_BADRPT); - /* FALLTHROUGH */ + LLVM_FALLTHROUGH; default: ordinary(p, (char)c); break; @@ -1635,7 +1636,7 @@ findmust(struct parse *p, struct re_guts *g) return; } } while (OP(s) != O_QUEST && OP(s) != O_CH); - /* fallthrough */ + LLVM_FALLTHROUGH; default: /* things that break a sequence */ if (newlen > g->mlen) { /* ends one */ start = newstart; diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp index 7523b32ca0e..54b063cb4f8 100644 --- a/lib/TableGen/Error.cpp +++ b/lib/TableGen/Error.cpp @@ -39,6 +39,8 @@ static void PrintMessage(ArrayRef Loc, SourceMgr::DiagKind Kind, "instantiated from multiclass"); } +void PrintNote(const Twine &Msg) { WithColor::note() << Msg << "\n"; } + void PrintNote(ArrayRef NoteLoc, const Twine &Msg) { PrintMessage(NoteLoc, SourceMgr::DK_Note, Msg); } diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp index bcd39584e45..48ded6c45a4 100644 --- a/lib/TableGen/Main.cpp +++ b/lib/TableGen/Main.cpp @@ -49,6 +49,9 @@ static cl::list MacroNames("D", cl::desc("Name of the macro to be defined"), cl::value_desc("macro name"), cl::Prefix); +static cl::opt +WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed")); + static int reportError(const char *ProgName, Twine Msg) { errs() << ProgName << ": " << Msg; errs().flush(); @@ -64,7 +67,7 @@ static int createDependencyFile(const TGParser &Parser, const char *argv0) { return reportError(argv0, "the option -d must be used together with -o\n"); std::error_code EC; - ToolOutputFile DepOut(DependFilename, EC, sys::fs::F_Text); + ToolOutputFile DepOut(DependFilename, EC, sys::fs::OF_None); if (EC) return reportError(argv0, "error opening " + DependFilename + ":" + EC.message() + "\n"); @@ -114,15 +117,17 @@ int llvm::TableGenMain(char *argv0, TableGenMainFn *MainFn) { return Ret; } - // Only updates the real output file if there are any differences. - // This prevents recompilation of all the files depending on it if there - // aren't any. - if (auto ExistingOrErr = MemoryBuffer::getFile(OutputFilename)) - if (std::move(ExistingOrErr.get())->getBuffer() == Out.str()) - return 0; + if (WriteIfChanged) { + // Only updates the real output file if there are any differences. + // This prevents recompilation of all the files depending on it if there + // aren't any. + if (auto ExistingOrErr = MemoryBuffer::getFile(OutputFilename)) + if (std::move(ExistingOrErr.get())->getBuffer() == Out.str()) + return 0; + } std::error_code EC; - ToolOutputFile OutFile(OutputFilename, EC, sys::fs::F_Text); + ToolOutputFile OutFile(OutputFilename, EC, sys::fs::OF_None); if (EC) return reportError(argv0, "error opening " + OutputFilename + ":" + EC.message() + "\n"); diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index 27d1bdc7f4c..835ef8c7141 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -438,7 +438,7 @@ Init *BitsInit::resolveReferences(Resolver &R) const { CachedBitVarRef = CurBitVar->getBitVar(); CachedBitVarResolved = CachedBitVarRef->resolveReferences(R); } - + assert(CachedBitVarResolved && "Unresolved bitvar reference"); NewBit = CachedBitVarResolved->getBit(CurBitVar->getBitNum()); } else { // getBit(0) implicitly converts int and bits<1> values to bit. @@ -1616,7 +1616,7 @@ void VarDefInit::Profile(FoldingSetNodeID &ID) const { DefInit *VarDefInit::instantiate() { if (!Def) { RecordKeeper &Records = Class->getRecords(); - auto NewRecOwner = make_unique(Records.getNewAnonymousName(), + auto NewRecOwner = std::make_unique(Records.getNewAnonymousName(), Class->getLoc(), Records, /*IsAnonymous=*/true); Record *NewRec = NewRecOwner.get(); @@ -1930,6 +1930,13 @@ void DagInit::Profile(FoldingSetNodeID &ID) const { ProfileDagInit(ID, Val, ValName, makeArrayRef(getTrailingObjects(), NumArgs), makeArrayRef(getTrailingObjects(), NumArgNames)); } +Record *DagInit::getOperatorAsDef(ArrayRef Loc) const { + if (DefInit *DefI = dyn_cast(Val)) + return DefI->getDef(); + PrintFatalError(Loc, "Expected record as operator"); + return nullptr; +} + Init *DagInit::resolveReferences(Resolver &R) const { SmallVector NewArgs; NewArgs.reserve(arg_size()); diff --git a/lib/TableGen/SetTheory.cpp b/lib/TableGen/SetTheory.cpp index a870e41d58f..5a30ee98cce 100644 --- a/lib/TableGen/SetTheory.cpp +++ b/lib/TableGen/SetTheory.cpp @@ -255,16 +255,16 @@ void SetTheory::Operator::anchor() {} void SetTheory::Expander::anchor() {} SetTheory::SetTheory() { - addOperator("add", llvm::make_unique()); - addOperator("sub", llvm::make_unique()); - addOperator("and", llvm::make_unique()); - addOperator("shl", llvm::make_unique()); - addOperator("trunc", llvm::make_unique()); - addOperator("rotl", llvm::make_unique(false)); - addOperator("rotr", llvm::make_unique(true)); - addOperator("decimate", llvm::make_unique()); - addOperator("interleave", llvm::make_unique()); - addOperator("sequence", llvm::make_unique()); + addOperator("add", std::make_unique()); + addOperator("sub", std::make_unique()); + addOperator("and", std::make_unique()); + addOperator("shl", std::make_unique()); + addOperator("trunc", std::make_unique()); + addOperator("rotl", std::make_unique(false)); + addOperator("rotr", std::make_unique(true)); + addOperator("decimate", std::make_unique()); + addOperator("interleave", std::make_unique()); + addOperator("sequence", std::make_unique()); } void SetTheory::addOperator(StringRef Name, std::unique_ptr Op) { @@ -276,7 +276,7 @@ void SetTheory::addExpander(StringRef ClassName, std::unique_ptr E) { } void SetTheory::addFieldExpander(StringRef ClassName, StringRef FieldName) { - addExpander(ClassName, llvm::make_unique(FieldName)); + addExpander(ClassName, std::make_unique(FieldName)); } void SetTheory::evaluate(Init *Expr, RecSet &Elts, ArrayRef Loc) { diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp index d28c62b3133..da2286e41fe 100644 --- a/lib/TableGen/TGLexer.cpp +++ b/lib/TableGen/TGLexer.cpp @@ -51,7 +51,7 @@ TGLexer::TGLexer(SourceMgr &SM, ArrayRef Macros) : SrcMgr(SM) { // Pretend that we enter the "top-level" include file. PrepIncludeStack.push_back( - make_unique>()); + std::make_unique>()); // Put all macros defined in the command line into the DefinedMacros set. std::for_each(Macros.begin(), Macros.end(), @@ -393,7 +393,7 @@ bool TGLexer::LexInclude() { CurPtr = CurBuf.begin(); PrepIncludeStack.push_back( - make_unique>()); + std::make_unique>()); return false; } diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index a9ace152d59..c373e2899a5 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -378,7 +378,7 @@ bool TGParser::resolve(const ForeachLoop &Loop, SubstStack &Substs, auto LI = dyn_cast(List); if (!LI) { if (!Final) { - Dest->emplace_back(make_unique(Loop.Loc, Loop.IterVar, + Dest->emplace_back(std::make_unique(Loop.Loc, Loop.IterVar, List)); return resolve(Loop.Entries, Substs, Final, &Dest->back().Loop->Entries, Loc); @@ -413,7 +413,7 @@ bool TGParser::resolve(const std::vector &Source, if (E.Loop) { Error = resolve(*E.Loop, Substs, Final, Dest); } else { - auto Rec = make_unique(*E.Rec); + auto Rec = std::make_unique(*E.Rec); if (Loc) Rec->appendLoc(*Loc); @@ -1147,9 +1147,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { if (!InitList.back()) return nullptr; // All BinOps require their arguments to be of compatible types. - TypedInit *TI = dyn_cast(InitList.back()); + RecTy *ListType = cast(InitList.back())->getType(); if (!ArgType) { - ArgType = TI->getType(); + ArgType = ListType; switch (Code) { case BinOpInit::LISTCONCAT: @@ -1198,11 +1198,11 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { default: llvm_unreachable("other ops have fixed argument types"); } } else { - RecTy *Resolved = resolveTypes(ArgType, TI->getType()); + RecTy *Resolved = resolveTypes(ArgType, ListType); if (!Resolved) { Error(InitLoc, Twine("expected value of type '") + - ArgType->getAsString() + "', got '" + - TI->getType()->getAsString() + "'"); + ArgType->getAsString() + "', got '" + + ListType->getAsString() + "'"); return nullptr; } if (Code != BinOpInit::ADD && Code != BinOpInit::AND && @@ -1330,7 +1330,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { std::unique_ptr ParseRecTmp; Record *ParseRec = CurRec; if (!ParseRec) { - ParseRecTmp = make_unique(".parse", ArrayRef{}, Records); + ParseRecTmp = std::make_unique(".parse", ArrayRef{}, Records); ParseRec = ParseRecTmp.get(); } @@ -1597,7 +1597,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { std::unique_ptr ParseRecTmp; Record *ParseRec = CurRec; if (!ParseRec) { - ParseRecTmp = make_unique(".parse", ArrayRef{}, Records); + ParseRecTmp = std::make_unique(".parse", ArrayRef{}, Records); ParseRec = ParseRecTmp.get(); } @@ -2702,10 +2702,10 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) { return true; if (isa(Name)) - CurRec = make_unique(Records.getNewAnonymousName(), DefLoc, Records, + CurRec = std::make_unique(Records.getNewAnonymousName(), DefLoc, Records, /*Anonymous=*/true); else - CurRec = make_unique(Name, DefLoc, Records); + CurRec = std::make_unique(Name, DefLoc, Records); if (ParseObjectBody(CurRec.get())) return true; @@ -2783,7 +2783,7 @@ bool TGParser::ParseForeach(MultiClass *CurMultiClass) { Lex.Lex(); // Eat the in // Create a loop object and remember it. - Loops.push_back(llvm::make_unique(Loc, IterName, ListValue)); + Loops.push_back(std::make_unique(Loc, IterName, ListValue)); if (Lex.getCode() != tgtok::l_brace) { // FOREACH Declaration IN Object @@ -2834,7 +2834,7 @@ bool TGParser::ParseClass() { } else { // If this is the first reference to this class, create and add it. auto NewRec = - llvm::make_unique(Lex.getCurStrVal(), Lex.getLoc(), Records, + std::make_unique(Lex.getCurStrVal(), Lex.getLoc(), Records, /*Class=*/true); CurRec = NewRec.get(); Records.addClass(std::move(NewRec)); @@ -2963,7 +2963,7 @@ bool TGParser::ParseMultiClass() { auto Result = MultiClasses.insert(std::make_pair(Name, - llvm::make_unique(Name, Lex.getLoc(),Records))); + std::make_unique(Name, Lex.getLoc(),Records))); if (!Result.second) return TokError("multiclass '" + Name + "' already defined"); diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h index 6965403a25a..ac765ebcddc 100644 --- a/lib/Target/AArch64/AArch64.h +++ b/lib/Target/AArch64/AArch64.h @@ -55,8 +55,9 @@ FunctionPass *createAArch64CollectLOHPass(); InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &); -FunctionPass *createAArch64PreLegalizeCombiner(); -FunctionPass *createAArch64StackTaggingPass(); +FunctionPass *createAArch64PreLegalizeCombiner(bool IsOptNone); +FunctionPass *createAArch64StackTaggingPass(bool MergeInit); +FunctionPass *createAArch64StackTaggingPreRAPass(); void initializeAArch64A53Fix835769Pass(PassRegistry&); void initializeAArch64A57FPLoadBalancingPass(PassRegistry&); @@ -80,6 +81,7 @@ void initializeFalkorHWPFFixPass(PassRegistry&); void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&); void initializeLDTLSCleanupPass(PassRegistry&); void initializeAArch64StackTaggingPass(PassRegistry&); +void initializeAArch64StackTaggingPreRAPass(PassRegistry&); } // end namespace llvm #endif diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index e39c6995e36..5b4c9e2149d 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -115,11 +115,12 @@ def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true", def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true", "Enable SHA3 SVE2 instructions", [FeatureSVE2, FeatureSHA3]>; -def FeatureSVE2BitPerm : SubtargetFeature<"bitperm", "HasSVE2BitPerm", "true", +def FeatureSVE2BitPerm : SubtargetFeature<"sve2-bitperm", "HasSVE2BitPerm", "true", "Enable bit permutation SVE2 instructions", [FeatureSVE2]>; def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", "Has zero-cycle register moves">; + def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true", "Has zero-cycle zeroing instructions for generic registers">; @@ -284,6 +285,10 @@ def FeatureSEL2 : SubtargetFeature< "sel2", "HasSEL2", "true", "Enable v8.4-A Secure Exception Level 2 extension">; +def FeaturePMU : SubtargetFeature< + "pmu", "HasPMU", "true", + "Enable v8.4-A PMU extension">; + def FeatureTLB_RMI : SubtargetFeature< "tlb-rmi", "HasTLB_RMI", "true", "Enable v8.4-A TLB Range and Maintenance Instructions">; @@ -345,6 +350,21 @@ def FeatureRandGen : SubtargetFeature<"rand", "HasRandGen", def FeatureMTE : SubtargetFeature<"mte", "HasMTE", "true", "Enable Memory Tagging Extension" >; +def FeatureTRBE : SubtargetFeature<"trbe", "HasTRBE", + "true", "Enable Trace Buffer Extension">; + +def FeatureETE : SubtargetFeature<"ete", "HasETE", + "true", "Enable Embedded Trace Extension", + [FeatureTRBE]>; + +def FeatureTME : SubtargetFeature<"tme", "HasTME", + "true", "Enable Transactional Memory Extension" >; + +def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals", + "AllowTaggedGlobals", + "true", "Use an instruction sequence for taking the address of a global " + "that allows a memory tag in the upper address bits">; + //===----------------------------------------------------------------------===// // Architectures. // @@ -354,7 +374,7 @@ def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", FeaturePAN, FeatureLOR, FeatureVH]>; def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", - "Support ARM v8.2a instructions", [HasV8_1aOps, FeaturePsUAO, + "Support ARM v8.2a instructions", [HasV8_1aOps, FeaturePsUAO, FeaturePAN_RWV, FeatureRAS, FeatureCCPP]>; def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true", @@ -364,7 +384,7 @@ def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true", def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true", "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd, FeatureNV, FeatureRASv8_4, FeatureMPAM, FeatureDIT, - FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeatureTLB_RMI, + FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeaturePMU, FeatureTLB_RMI, FeatureFMI, FeatureRCPC_IMMO]>; def HasV8_5aOps : SubtargetFeature< @@ -390,6 +410,7 @@ include "AArch64Schedule.td" include "AArch64InstrInfo.td" include "AArch64SchedPredicates.td" include "AArch64SchedPredExynos.td" +include "AArch64Combine.td" def AArch64InstrInfo : InstrInfo; @@ -484,6 +505,19 @@ def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", FeaturePredictableSelectIsExpensive ]>; +def ProcA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65", + "Cortex-A65 ARM processors", [ + HasV8_2aOps, + FeatureCrypto, + FeatureDotProd, + FeatureFPARMv8, + FeatureFullFP16, + FeatureNEON, + FeatureRAS, + FeatureRCPC, + FeatureSSBS, + ]>; + def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", "Cortex-A72 ARM processors", [ FeatureCRC, @@ -641,6 +675,33 @@ def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", FeatureSlowSTRQro ]>; +def ProcNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily", + "NeoverseE1", + "Neoverse E1 ARM processors", [ + HasV8_2aOps, + FeatureCrypto, + FeatureDotProd, + FeatureFPARMv8, + FeatureFullFP16, + FeatureNEON, + FeatureRCPC, + FeatureSSBS, + ]>; + +def ProcNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", + "NeoverseN1", + "Neoverse N1 ARM processors", [ + HasV8_2aOps, + FeatureCrypto, + FeatureDotProd, + FeatureFPARMv8, + FeatureFullFP16, + FeatureNEON, + FeatureRCPC, + FeatureSPE, + FeatureSSBS, + ]>; + def ProcSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira", "Qualcomm Saphira processors", [ FeatureCrypto, @@ -732,19 +793,28 @@ def : ProcessorModel<"generic", NoSchedModel, [ FeatureFuseAES, FeatureNEON, FeaturePerfMon, - FeaturePostRAScheduler + FeaturePostRAScheduler, +// ETE and TRBE are future architecture extensions. We temporariliy enable them +// by default for users targeting generic AArch64, until it is decided in which +// armv8.x-a architecture revision they will end up. The extensions do not +// affect code generated by the compiler and can be used only by explicitly +// mentioning the new system register names in assembly. + FeatureETE ]>; -// FIXME: Cortex-A35 and Cortex-A55 are currently modeled as a Cortex-A53. def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>; def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>; def : ProcessorModel<"cortex-a55", CortexA53Model, [ProcA55]>; def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>; +def : ProcessorModel<"cortex-a65", CortexA53Model, [ProcA65]>; +def : ProcessorModel<"cortex-a65ae", CortexA53Model, [ProcA65]>; def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA72]>; def : ProcessorModel<"cortex-a73", CortexA57Model, [ProcA73]>; def : ProcessorModel<"cortex-a75", CortexA57Model, [ProcA75]>; def : ProcessorModel<"cortex-a76", CortexA57Model, [ProcA76]>; def : ProcessorModel<"cortex-a76ae", CortexA57Model, [ProcA76]>; +def : ProcessorModel<"neoverse-e1", CortexA53Model, [ProcNeoverseE1]>; +def : ProcessorModel<"neoverse-n1", CortexA57Model, [ProcNeoverseN1]>; def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>; def : ProcessorModel<"exynos-m1", ExynosM1Model, [ProcExynosM1]>; def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM2]>; diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp index 92c8c4955d5..13d389cec7a 100644 --- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp +++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp @@ -552,7 +552,7 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C, std::vector ToErase; for (auto &U : I.operands()) { if (U.isReg() && U.isUse() && Substs.find(U.getReg()) != Substs.end()) { - unsigned OrigReg = U.getReg(); + Register OrigReg = U.getReg(); U.setReg(Substs[OrigReg]); if (U.isKill()) // Don't erase straight away, because there may be other operands @@ -611,12 +611,12 @@ void AArch64A57FPLoadBalancing::scanInstruction( // Create a new chain. Multiplies don't require forwarding so can go on any // unit. - unsigned DestReg = MI->getOperand(0).getReg(); + Register DestReg = MI->getOperand(0).getReg(); LLVM_DEBUG(dbgs() << "New chain started for register " << printReg(DestReg, TRI) << " at " << *MI); - auto G = llvm::make_unique(MI, Idx, getColor(DestReg)); + auto G = std::make_unique(MI, Idx, getColor(DestReg)); ActiveChains[DestReg] = G.get(); AllChains.push_back(std::move(G)); @@ -624,8 +624,8 @@ void AArch64A57FPLoadBalancing::scanInstruction( // It is beneficial to keep MLAs on the same functional unit as their // accumulator operand. - unsigned DestReg = MI->getOperand(0).getReg(); - unsigned AccumReg = MI->getOperand(3).getReg(); + Register DestReg = MI->getOperand(0).getReg(); + Register AccumReg = MI->getOperand(3).getReg(); maybeKillChain(MI->getOperand(1), Idx, ActiveChains); maybeKillChain(MI->getOperand(2), Idx, ActiveChains); @@ -661,7 +661,7 @@ void AArch64A57FPLoadBalancing::scanInstruction( LLVM_DEBUG(dbgs() << "Creating new chain for dest register " << printReg(DestReg, TRI) << "\n"); - auto G = llvm::make_unique(MI, Idx, getColor(DestReg)); + auto G = std::make_unique(MI, Idx, getColor(DestReg)); ActiveChains[DestReg] = G.get(); AllChains.push_back(std::move(G)); diff --git a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp index 89404463e1f..981b366c14b 100644 --- a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp +++ b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp @@ -105,14 +105,14 @@ static bool isGPR64(unsigned Reg, unsigned SubReg, const MachineRegisterInfo *MRI) { if (SubReg) return false; - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::GPR64RegClass); return AArch64::GPR64RegClass.contains(Reg); } static bool isFPR64(unsigned Reg, unsigned SubReg, const MachineRegisterInfo *MRI) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return (MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR64RegClass) && SubReg == 0) || (MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR128RegClass) && @@ -201,8 +201,8 @@ bool AArch64AdvSIMDScalar::isProfitableToTransform( unsigned NumNewCopies = 3; unsigned NumRemovableCopies = 0; - unsigned OrigSrc0 = MI.getOperand(1).getReg(); - unsigned OrigSrc1 = MI.getOperand(2).getReg(); + Register OrigSrc0 = MI.getOperand(1).getReg(); + Register OrigSrc1 = MI.getOperand(2).getReg(); unsigned SubReg0; unsigned SubReg1; if (!MRI->def_empty(OrigSrc0)) { @@ -236,7 +236,7 @@ bool AArch64AdvSIMDScalar::isProfitableToTransform( // any of the uses is a transformable instruction, it's likely the tranforms // will chain, enabling us to save a copy there, too. This is an aggressive // heuristic that approximates the graph based cost analysis described above. - unsigned Dst = MI.getOperand(0).getReg(); + Register Dst = MI.getOperand(0).getReg(); bool AllUsesAreCopies = true; for (MachineRegisterInfo::use_instr_nodbg_iterator Use = MRI->use_instr_nodbg_begin(Dst), @@ -293,8 +293,8 @@ void AArch64AdvSIMDScalar::transformInstruction(MachineInstr &MI) { assert(OldOpc != NewOpc && "transform an instruction to itself?!"); // Check if we need a copy for the source registers. - unsigned OrigSrc0 = MI.getOperand(1).getReg(); - unsigned OrigSrc1 = MI.getOperand(2).getReg(); + Register OrigSrc0 = MI.getOperand(1).getReg(); + Register OrigSrc1 = MI.getOperand(2).getReg(); unsigned Src0 = 0, SubReg0; unsigned Src1 = 0, SubReg1; bool KillSrc0 = false, KillSrc1 = false; @@ -354,7 +354,7 @@ void AArch64AdvSIMDScalar::transformInstruction(MachineInstr &MI) { // Create a vreg for the destination. // FIXME: No need to do this if the ultimate user expects an FPR64. // Check for that and avoid the copy if possible. - unsigned Dst = MRI->createVirtualRegister(&AArch64::FPR64RegClass); + Register Dst = MRI->createVirtualRegister(&AArch64::FPR64RegClass); // For now, all of the new instructions have the same simple three-register // form, so no need to special case based on what instruction we're diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 094fbd99952..7ea7915c2ca 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -99,7 +99,8 @@ public: void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); - std::map, MCSymbol *> HwasanMemaccessSymbols; + typedef std::tuple HwasanMemaccessTuple; + std::map HwasanMemaccessSymbols; void LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI); void EmitHwasanMemaccessSymbols(Module &M); @@ -150,7 +151,7 @@ private: void printOperand(const MachineInstr *MI, unsigned OpNum, raw_ostream &O); bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O); bool printAsmRegInClass(const MachineOperand &MO, - const TargetRegisterClass *RC, bool isVector, + const TargetRegisterClass *RC, unsigned AltName, raw_ostream &O); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, @@ -236,9 +237,12 @@ void AArch64AsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) } void AArch64AsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) { - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); + bool IsShort = + MI.getOpcode() == AArch64::HWASAN_CHECK_MEMACCESS_SHORTGRANULES; uint32_t AccessInfo = MI.getOperand(1).getImm(); - MCSymbol *&Sym = HwasanMemaccessSymbols[{Reg, AccessInfo}]; + MCSymbol *&Sym = + HwasanMemaccessSymbols[HwasanMemaccessTuple(Reg, IsShort, AccessInfo)]; if (!Sym) { // FIXME: Make this work on non-ELF. if (!TM.getTargetTriple().isOSBinFormatELF()) @@ -246,6 +250,8 @@ void AArch64AsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) { std::string SymName = "__hwasan_check_x" + utostr(Reg - AArch64::X0) + "_" + utostr(AccessInfo); + if (IsShort) + SymName += "_short"; Sym = OutContext.getOrCreateSymbol(SymName); } @@ -263,15 +269,22 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) { std::unique_ptr STI( TM.getTarget().createMCSubtargetInfo(TT.str(), "", "")); - MCSymbol *HwasanTagMismatchSym = + MCSymbol *HwasanTagMismatchV1Sym = OutContext.getOrCreateSymbol("__hwasan_tag_mismatch"); + MCSymbol *HwasanTagMismatchV2Sym = + OutContext.getOrCreateSymbol("__hwasan_tag_mismatch_v2"); - const MCSymbolRefExpr *HwasanTagMismatchRef = - MCSymbolRefExpr::create(HwasanTagMismatchSym, OutContext); + const MCSymbolRefExpr *HwasanTagMismatchV1Ref = + MCSymbolRefExpr::create(HwasanTagMismatchV1Sym, OutContext); + const MCSymbolRefExpr *HwasanTagMismatchV2Ref = + MCSymbolRefExpr::create(HwasanTagMismatchV2Sym, OutContext); for (auto &P : HwasanMemaccessSymbols) { - unsigned Reg = P.first.first; - uint32_t AccessInfo = P.first.second; + unsigned Reg = std::get<0>(P.first); + bool IsShort = std::get<1>(P.first); + uint32_t AccessInfo = std::get<2>(P.first); + const MCSymbolRefExpr *HwasanTagMismatchRef = + IsShort ? HwasanTagMismatchV2Ref : HwasanTagMismatchV1Ref; MCSymbol *Sym = P.second; OutStreamer->SwitchSection(OutContext.getELFSection( @@ -304,82 +317,86 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) { .addReg(Reg) .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSR, 56)), *STI); - MCSymbol *HandlePartialSym = OutContext.createTempSymbol(); + MCSymbol *HandleMismatchOrPartialSym = OutContext.createTempSymbol(); OutStreamer->EmitInstruction( MCInstBuilder(AArch64::Bcc) .addImm(AArch64CC::NE) - .addExpr(MCSymbolRefExpr::create(HandlePartialSym, OutContext)), + .addExpr(MCSymbolRefExpr::create(HandleMismatchOrPartialSym, + OutContext)), *STI); MCSymbol *ReturnSym = OutContext.createTempSymbol(); OutStreamer->EmitLabel(ReturnSym); OutStreamer->EmitInstruction( MCInstBuilder(AArch64::RET).addReg(AArch64::LR), *STI); + OutStreamer->EmitLabel(HandleMismatchOrPartialSym); - OutStreamer->EmitLabel(HandlePartialSym); - OutStreamer->EmitInstruction(MCInstBuilder(AArch64::SUBSWri) - .addReg(AArch64::WZR) - .addReg(AArch64::W16) - .addImm(15) - .addImm(0), - *STI); - MCSymbol *HandleMismatchSym = OutContext.createTempSymbol(); - OutStreamer->EmitInstruction( - MCInstBuilder(AArch64::Bcc) - .addImm(AArch64CC::HI) - .addExpr(MCSymbolRefExpr::create(HandleMismatchSym, OutContext)), - *STI); - - OutStreamer->EmitInstruction( - MCInstBuilder(AArch64::ANDXri) - .addReg(AArch64::X17) - .addReg(Reg) - .addImm(AArch64_AM::encodeLogicalImmediate(0xf, 64)), - *STI); - unsigned Size = 1 << (AccessInfo & 0xf); - if (Size != 1) - OutStreamer->EmitInstruction(MCInstBuilder(AArch64::ADDXri) - .addReg(AArch64::X17) - .addReg(AArch64::X17) - .addImm(Size - 1) + if (IsShort) { + OutStreamer->EmitInstruction(MCInstBuilder(AArch64::SUBSWri) + .addReg(AArch64::WZR) + .addReg(AArch64::W16) + .addImm(15) .addImm(0), *STI); - OutStreamer->EmitInstruction(MCInstBuilder(AArch64::SUBSWrs) - .addReg(AArch64::WZR) - .addReg(AArch64::W16) - .addReg(AArch64::W17) - .addImm(0), - *STI); - OutStreamer->EmitInstruction( - MCInstBuilder(AArch64::Bcc) - .addImm(AArch64CC::LS) - .addExpr(MCSymbolRefExpr::create(HandleMismatchSym, OutContext)), - *STI); + MCSymbol *HandleMismatchSym = OutContext.createTempSymbol(); + OutStreamer->EmitInstruction( + MCInstBuilder(AArch64::Bcc) + .addImm(AArch64CC::HI) + .addExpr(MCSymbolRefExpr::create(HandleMismatchSym, OutContext)), + *STI); - OutStreamer->EmitInstruction( - MCInstBuilder(AArch64::ORRXri) - .addReg(AArch64::X16) - .addReg(Reg) - .addImm(AArch64_AM::encodeLogicalImmediate(0xf, 64)), - *STI); - OutStreamer->EmitInstruction(MCInstBuilder(AArch64::LDRBBui) - .addReg(AArch64::W16) - .addReg(AArch64::X16) - .addImm(0), - *STI); - OutStreamer->EmitInstruction( - MCInstBuilder(AArch64::SUBSXrs) - .addReg(AArch64::XZR) - .addReg(AArch64::X16) - .addReg(Reg) - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSR, 56)), - *STI); - OutStreamer->EmitInstruction( - MCInstBuilder(AArch64::Bcc) - .addImm(AArch64CC::EQ) - .addExpr(MCSymbolRefExpr::create(ReturnSym, OutContext)), - *STI); + OutStreamer->EmitInstruction( + MCInstBuilder(AArch64::ANDXri) + .addReg(AArch64::X17) + .addReg(Reg) + .addImm(AArch64_AM::encodeLogicalImmediate(0xf, 64)), + *STI); + unsigned Size = 1 << (AccessInfo & 0xf); + if (Size != 1) + OutStreamer->EmitInstruction(MCInstBuilder(AArch64::ADDXri) + .addReg(AArch64::X17) + .addReg(AArch64::X17) + .addImm(Size - 1) + .addImm(0), + *STI); + OutStreamer->EmitInstruction(MCInstBuilder(AArch64::SUBSWrs) + .addReg(AArch64::WZR) + .addReg(AArch64::W16) + .addReg(AArch64::W17) + .addImm(0), + *STI); + OutStreamer->EmitInstruction( + MCInstBuilder(AArch64::Bcc) + .addImm(AArch64CC::LS) + .addExpr(MCSymbolRefExpr::create(HandleMismatchSym, OutContext)), + *STI); + + OutStreamer->EmitInstruction( + MCInstBuilder(AArch64::ORRXri) + .addReg(AArch64::X16) + .addReg(Reg) + .addImm(AArch64_AM::encodeLogicalImmediate(0xf, 64)), + *STI); + OutStreamer->EmitInstruction(MCInstBuilder(AArch64::LDRBBui) + .addReg(AArch64::W16) + .addReg(AArch64::X16) + .addImm(0), + *STI); + OutStreamer->EmitInstruction( + MCInstBuilder(AArch64::SUBSXrs) + .addReg(AArch64::XZR) + .addReg(AArch64::X16) + .addReg(Reg) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSR, 56)), + *STI); + OutStreamer->EmitInstruction( + MCInstBuilder(AArch64::Bcc) + .addImm(AArch64CC::EQ) + .addExpr(MCSymbolRefExpr::create(ReturnSym, OutContext)), + *STI); + + OutStreamer->EmitLabel(HandleMismatchSym); + } - OutStreamer->EmitLabel(HandleMismatchSym); OutStreamer->EmitInstruction(MCInstBuilder(AArch64::STPXpre) .addReg(AArch64::SP) .addReg(AArch64::X0) @@ -414,16 +431,16 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) { MCInstBuilder(AArch64::ADRP) .addReg(AArch64::X16) .addExpr(AArch64MCExpr::create( - HwasanTagMismatchRef, - AArch64MCExpr::VariantKind::VK_GOT_PAGE, OutContext)), + HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_PAGE, + OutContext)), *STI); OutStreamer->EmitInstruction( MCInstBuilder(AArch64::LDRXui) .addReg(AArch64::X16) .addReg(AArch64::X16) .addExpr(AArch64MCExpr::create( - HwasanTagMismatchRef, - AArch64MCExpr::VariantKind::VK_GOT_LO12, OutContext)), + HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_LO12, + OutContext)), *STI); OutStreamer->EmitInstruction( MCInstBuilder(AArch64::BR).addReg(AArch64::X16), *STI); @@ -485,15 +502,14 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, default: llvm_unreachable(""); case MachineOperand::MO_Register: { - unsigned Reg = MO.getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + Register Reg = MO.getReg(); + assert(Register::isPhysicalRegister(Reg)); assert(!MO.getSubReg() && "Subregs should be eliminated!"); O << AArch64InstPrinter::getRegisterName(Reg); break; } case MachineOperand::MO_Immediate: { - int64_t Imm = MO.getImm(); - O << '#' << Imm; + O << MO.getImm(); break; } case MachineOperand::MO_GlobalAddress: { @@ -510,7 +526,7 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, bool AArch64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); switch (Mode) { default: return true; // Unknown mode. @@ -531,14 +547,13 @@ bool AArch64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode, // printing. bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO, const TargetRegisterClass *RC, - bool isVector, raw_ostream &O) { + unsigned AltName, raw_ostream &O) { assert(MO.isReg() && "Should only get here with a register!"); const TargetRegisterInfo *RI = STI->getRegisterInfo(); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg)); assert(RI->regsOverlap(RegToPrint, Reg)); - O << AArch64InstPrinter::getRegisterName( - RegToPrint, isVector ? AArch64::vreg : AArch64::NoRegAltName); + O << AArch64InstPrinter::getRegisterName(RegToPrint, AltName); return false; } @@ -574,6 +589,7 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, case 's': // Print S register. case 'd': // Print D register. case 'q': // Print Q register. + case 'z': // Print Z register. if (MO.isReg()) { const TargetRegisterClass *RC; switch (ExtraCode[0]) { @@ -592,10 +608,13 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, case 'q': RC = &AArch64::FPR128RegClass; break; + case 'z': + RC = &AArch64::ZPRRegClass; + break; default: return true; } - return printAsmRegInClass(MO, RC, false /* vector */, O); + return printAsmRegInClass(MO, RC, AArch64::NoRegAltName, O); } printOperand(MI, OpNum, O); return false; @@ -605,16 +624,26 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, // According to ARM, we should emit x and v registers unless we have a // modifier. if (MO.isReg()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // If this is a w or x register, print an x register. if (AArch64::GPR32allRegClass.contains(Reg) || AArch64::GPR64allRegClass.contains(Reg)) return printAsmMRegister(MO, 'x', O); + unsigned AltName = AArch64::NoRegAltName; + const TargetRegisterClass *RegClass; + if (AArch64::ZPRRegClass.contains(Reg)) { + RegClass = &AArch64::ZPRRegClass; + } else if (AArch64::PPRRegClass.contains(Reg)) { + RegClass = &AArch64::PPRRegClass; + } else { + RegClass = &AArch64::FPR128RegClass; + AltName = AArch64::vreg; + } + // If this is a b, h, s, d, or q register, print it as a v register. - return printAsmRegInClass(MO, &AArch64::FPR128RegClass, true /* vector */, - O); + return printAsmRegInClass(MO, RegClass, AltName, O); } printOperand(MI, OpNum, O); @@ -682,7 +711,7 @@ void AArch64AsmPrinter::EmitJumpTableInfo() { if (JTBBs.empty()) continue; unsigned Size = AFI->getJumpTableEntrySize(JTI); - EmitAlignment(Log2_32(Size)); + EmitAlignment(Align(Size)); OutStreamer->EmitLabel(GetJTISymbol(JTI)); for (auto *JTBB : JTBBs) @@ -725,12 +754,12 @@ void AArch64AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI, /// add xDest, xDest, xScratch, lsl #2 void AArch64AsmPrinter::LowerJumpTableDestSmall(llvm::MCStreamer &OutStreamer, const llvm::MachineInstr &MI) { - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned ScratchReg = MI.getOperand(1).getReg(); - unsigned ScratchRegW = + Register DestReg = MI.getOperand(0).getReg(); + Register ScratchReg = MI.getOperand(1).getReg(); + Register ScratchRegW = STI->getRegisterInfo()->getSubReg(ScratchReg, AArch64::sub_32); - unsigned TableReg = MI.getOperand(2).getReg(); - unsigned EntryReg = MI.getOperand(3).getReg(); + Register TableReg = MI.getOperand(2).getReg(); + Register EntryReg = MI.getOperand(3).getReg(); int JTIdx = MI.getOperand(4).getIndex(); bool IsByteEntry = MI.getOpcode() == AArch64::JumpTableDest8; @@ -800,7 +829,7 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, if (CallTarget) { assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget && "High 16 bits of call target should be zero."); - unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); + Register ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); EncodedBytes = 16; // Materialize the jump address: EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVZXi) @@ -830,7 +859,7 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, } void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) { - unsigned DestReg = MI.getOperand(0).getReg(); + Register DestReg = MI.getOperand(0).getReg(); if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) { // Convert H/S/D register to corresponding Q register if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31) @@ -894,32 +923,32 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { default: break; case AArch64::MOVMCSym: { - unsigned DestReg = MI->getOperand(0).getReg(); - const MachineOperand &MO_Sym = MI->getOperand(1); - MachineOperand Hi_MOSym(MO_Sym), Lo_MOSym(MO_Sym); - MCOperand Hi_MCSym, Lo_MCSym; + Register DestReg = MI->getOperand(0).getReg(); + const MachineOperand &MO_Sym = MI->getOperand(1); + MachineOperand Hi_MOSym(MO_Sym), Lo_MOSym(MO_Sym); + MCOperand Hi_MCSym, Lo_MCSym; - Hi_MOSym.setTargetFlags(AArch64II::MO_G1 | AArch64II::MO_S); - Lo_MOSym.setTargetFlags(AArch64II::MO_G0 | AArch64II::MO_NC); + Hi_MOSym.setTargetFlags(AArch64II::MO_G1 | AArch64II::MO_S); + Lo_MOSym.setTargetFlags(AArch64II::MO_G0 | AArch64II::MO_NC); - MCInstLowering.lowerOperand(Hi_MOSym, Hi_MCSym); - MCInstLowering.lowerOperand(Lo_MOSym, Lo_MCSym); + MCInstLowering.lowerOperand(Hi_MOSym, Hi_MCSym); + MCInstLowering.lowerOperand(Lo_MOSym, Lo_MCSym); - MCInst MovZ; - MovZ.setOpcode(AArch64::MOVZXi); - MovZ.addOperand(MCOperand::createReg(DestReg)); - MovZ.addOperand(Hi_MCSym); - MovZ.addOperand(MCOperand::createImm(16)); - EmitToStreamer(*OutStreamer, MovZ); + MCInst MovZ; + MovZ.setOpcode(AArch64::MOVZXi); + MovZ.addOperand(MCOperand::createReg(DestReg)); + MovZ.addOperand(Hi_MCSym); + MovZ.addOperand(MCOperand::createImm(16)); + EmitToStreamer(*OutStreamer, MovZ); - MCInst MovK; - MovK.setOpcode(AArch64::MOVKXi); - MovK.addOperand(MCOperand::createReg(DestReg)); - MovK.addOperand(MCOperand::createReg(DestReg)); - MovK.addOperand(Lo_MCSym); - MovK.addOperand(MCOperand::createImm(0)); - EmitToStreamer(*OutStreamer, MovK); - return; + MCInst MovK; + MovK.setOpcode(AArch64::MOVKXi); + MovK.addOperand(MCOperand::createReg(DestReg)); + MovK.addOperand(MCOperand::createReg(DestReg)); + MovK.addOperand(Lo_MCSym); + MovK.addOperand(MCOperand::createImm(0)); + EmitToStreamer(*OutStreamer, MovK); + return; } case AArch64::MOVIv2d_ns: // If the target has , lower this @@ -1084,6 +1113,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { return; case AArch64::HWASAN_CHECK_MEMACCESS: + case AArch64::HWASAN_CHECK_MEMACCESS_SHORTGRANULES: LowerHWASAN_CHECK_MEMACCESS(*MI); return; @@ -1193,4 +1223,6 @@ extern "C" void LLVMInitializeAArch64AsmPrinter() { RegisterAsmPrinter X(getTheAArch64leTarget()); RegisterAsmPrinter Y(getTheAArch64beTarget()); RegisterAsmPrinter Z(getTheARM64Target()); + RegisterAsmPrinter W(getTheARM64_32Target()); + RegisterAsmPrinter V(getTheAArch64_32Target()); } diff --git a/lib/Target/AArch64/AArch64CallLowering.cpp b/lib/Target/AArch64/AArch64CallLowering.cpp index 59757769c89..ed93d02aa61 100644 --- a/lib/Target/AArch64/AArch64CallLowering.cpp +++ b/lib/Target/AArch64/AArch64CallLowering.cpp @@ -99,7 +99,7 @@ struct IncomingArgHandler : public CallLowering::ValueHandler { /// (it's an implicit-def of the BL). virtual void markPhysRegUsed(unsigned PhysReg) = 0; - bool isArgumentHandler() const override { return true; } + bool isIncomingArgumentHandler() const override { return true; } uint64_t StackUsed; }; @@ -110,6 +110,7 @@ struct FormalArgHandler : public IncomingArgHandler { : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {} void markPhysRegUsed(unsigned PhysReg) override { + MIRBuilder.getMRI()->addLiveIn(PhysReg); MIRBuilder.getMBB().addLiveIn(PhysReg); } }; @@ -129,14 +130,29 @@ struct CallReturnHandler : public IncomingArgHandler { struct OutgoingArgHandler : public CallLowering::ValueHandler { OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstrBuilder MIB, CCAssignFn *AssignFn, - CCAssignFn *AssignFnVarArg) + CCAssignFn *AssignFnVarArg, bool IsTailCall = false, + int FPDiff = 0) : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB), - AssignFnVarArg(AssignFnVarArg), StackSize(0) {} + AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff), + StackSize(0) {} + + bool isIncomingArgumentHandler() const override { return false; } Register getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { + MachineFunction &MF = MIRBuilder.getMF(); LLT p0 = LLT::pointer(0, 64); LLT s64 = LLT::scalar(64); + + if (IsTailCall) { + Offset += FPDiff; + int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true); + Register FIReg = MRI.createGenericVirtualRegister(p0); + MIRBuilder.buildFrameIndex(FIReg, FI); + MPO = MachinePointerInfo::getFixedStack(MF, FI); + return FIReg; + } + Register SPReg = MRI.createGenericVirtualRegister(p0); MIRBuilder.buildCopy(SPReg, Register(AArch64::SP)); @@ -146,7 +162,7 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler { Register AddrReg = MRI.createGenericVirtualRegister(p0); MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg); - MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset); + MPO = MachinePointerInfo::getStack(MF, Offset); return AddrReg; } @@ -173,12 +189,13 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler { bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, const CallLowering::ArgInfo &Info, + ISD::ArgFlagsTy Flags, CCState &State) override { bool Res; if (Info.IsFixed) - Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); + Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); else - Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); + Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State); StackSize = State.getNextStackOffset(); return Res; @@ -186,10 +203,19 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler { MachineInstrBuilder MIB; CCAssignFn *AssignFnVarArg; + bool IsTailCall; + + /// For tail calls, the byte offset of the call's argument area from the + /// callee's. Unused elsewhere. + int FPDiff; uint64_t StackSize; }; } // namespace +static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) { + return CallConv == CallingConv::Fast && TailCallOpt; +} + void AArch64CallLowering::splitToValueTypes( const ArgInfo &OrigArg, SmallVectorImpl &SplitArgs, const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv) const { @@ -207,7 +233,7 @@ void AArch64CallLowering::splitToValueTypes( // No splitting to do, but we want to replace the original type (e.g. [1 x // double] -> double). SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx), - OrigArg.Flags, OrigArg.IsFixed); + OrigArg.Flags[0], OrigArg.IsFixed); return; } @@ -218,13 +244,13 @@ void AArch64CallLowering::splitToValueTypes( OrigArg.Ty, CallConv, false); for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) { Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx); - SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags, + SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags[0], OrigArg.IsFixed); if (NeedsRegBlock) - SplitArgs.back().Flags.setInConsecutiveRegs(); + SplitArgs.back().Flags[0].setInConsecutiveRegs(); } - SplitArgs.back().Flags.setInConsecutiveRegsLast(); + SplitArgs.back().Flags[0].setInConsecutiveRegsLast(); } bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, @@ -344,6 +370,49 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, return Success; } +/// Helper function to compute forwarded registers for musttail calls. Computes +/// the forwarded registers, sets MBB liveness, and emits COPY instructions that +/// can be used to save + restore registers later. +static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder, + CCAssignFn *AssignFn) { + MachineBasicBlock &MBB = MIRBuilder.getMBB(); + MachineFunction &MF = MIRBuilder.getMF(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + + if (!MFI.hasMustTailInVarArgFunc()) + return; + + AArch64FunctionInfo *FuncInfo = MF.getInfo(); + const Function &F = MF.getFunction(); + assert(F.isVarArg() && "Expected F to be vararg?"); + + // Compute the set of forwarded registers. The rest are scratch. + SmallVector ArgLocs; + CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs, + F.getContext()); + SmallVector RegParmTypes; + RegParmTypes.push_back(MVT::i64); + RegParmTypes.push_back(MVT::f128); + + // Later on, we can use this vector to restore the registers if necessary. + SmallVectorImpl &Forwards = + FuncInfo->getForwardedMustTailRegParms(); + CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn); + + // Conservatively forward X8, since it might be used for an aggregate + // return. + if (!CCInfo.isAllocated(AArch64::X8)) { + unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass); + Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64)); + } + + // Add the forwards to the MachineBasicBlock and MachineFunction. + for (const auto &F : Forwards) { + MBB.addLiveIn(F.PReg); + MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg)); + } +} + bool AArch64CallLowering::lowerFormalArguments( MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef> VRegs) const { @@ -376,64 +445,530 @@ bool AArch64CallLowering::lowerFormalArguments( if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) return false; + AArch64FunctionInfo *FuncInfo = MF.getInfo(); + uint64_t StackOffset = Handler.StackUsed; if (F.isVarArg()) { - if (!MF.getSubtarget().isTargetDarwin()) { - // FIXME: we need to reimplement saveVarArgsRegisters from + auto &Subtarget = MF.getSubtarget(); + if (!Subtarget.isTargetDarwin()) { + // FIXME: we need to reimplement saveVarArgsRegisters from // AArch64ISelLowering. return false; } - // We currently pass all varargs at 8-byte alignment. - uint64_t StackOffset = alignTo(Handler.StackUsed, 8); + // We currently pass all varargs at 8-byte alignment, or 4 in ILP32. + StackOffset = alignTo(Handler.StackUsed, Subtarget.isTargetILP32() ? 4 : 8); auto &MFI = MIRBuilder.getMF().getFrameInfo(); - AArch64FunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true)); } + if (doesCalleeRestoreStack(F.getCallingConv(), + MF.getTarget().Options.GuaranteedTailCallOpt)) { + // We have a non-standard ABI, so why not make full use of the stack that + // we're going to pop? It must be aligned to 16 B in any case. + StackOffset = alignTo(StackOffset, 16); + + // If we're expected to restore the stack (e.g. fastcc), then we'll be + // adding a multiple of 16. + FuncInfo->setArgumentStackToRestore(StackOffset); + + // Our own callers will guarantee that the space is free by giving an + // aligned value to CALLSEQ_START. + } + + // When we tail call, we need to check if the callee's arguments + // will fit on the caller's stack. So, whenever we lower formal arguments, + // we should keep track of this information, since we might lower a tail call + // in this function later. + FuncInfo->setBytesInStackArgArea(StackOffset); + auto &Subtarget = MF.getSubtarget(); if (Subtarget.hasCustomCallingConv()) Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF); + handleMustTailForwardedRegisters(MIRBuilder, AssignFn); + // Move back to the end of the basic block. MIRBuilder.setMBB(MBB); return true; } +/// Return true if the calling convention is one that we can guarantee TCO for. +static bool canGuaranteeTCO(CallingConv::ID CC) { + return CC == CallingConv::Fast; +} + +/// Return true if we might ever do TCO for calls with this calling convention. +static bool mayTailCallThisCC(CallingConv::ID CC) { + switch (CC) { + case CallingConv::C: + case CallingConv::PreserveMost: + case CallingConv::Swift: + return true; + default: + return canGuaranteeTCO(CC); + } +} + +/// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for +/// CC. +static std::pair +getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) { + return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)}; +} + +bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay( + CallLoweringInfo &Info, MachineFunction &MF, + SmallVectorImpl &InArgs) const { + const Function &CallerF = MF.getFunction(); + CallingConv::ID CalleeCC = Info.CallConv; + CallingConv::ID CallerCC = CallerF.getCallingConv(); + + // If the calling conventions match, then everything must be the same. + if (CalleeCC == CallerCC) + return true; + + // Check if the caller and callee will handle arguments in the same way. + const AArch64TargetLowering &TLI = *getTLI(); + CCAssignFn *CalleeAssignFnFixed; + CCAssignFn *CalleeAssignFnVarArg; + std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) = + getAssignFnsForCC(CalleeCC, TLI); + + CCAssignFn *CallerAssignFnFixed; + CCAssignFn *CallerAssignFnVarArg; + std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) = + getAssignFnsForCC(CallerCC, TLI); + + if (!resultsCompatible(Info, MF, InArgs, *CalleeAssignFnFixed, + *CalleeAssignFnVarArg, *CallerAssignFnFixed, + *CallerAssignFnVarArg)) + return false; + + // Make sure that the caller and callee preserve all of the same registers. + auto TRI = MF.getSubtarget().getRegisterInfo(); + const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); + const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); + if (MF.getSubtarget().hasCustomCallingConv()) { + TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved); + TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved); + } + + return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved); +} + +bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable( + CallLoweringInfo &Info, MachineFunction &MF, + SmallVectorImpl &OutArgs) const { + // If there are no outgoing arguments, then we are done. + if (OutArgs.empty()) + return true; + + const Function &CallerF = MF.getFunction(); + CallingConv::ID CalleeCC = Info.CallConv; + CallingConv::ID CallerCC = CallerF.getCallingConv(); + const AArch64TargetLowering &TLI = *getTLI(); + + CCAssignFn *AssignFnFixed; + CCAssignFn *AssignFnVarArg; + std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI); + + // We have outgoing arguments. Make sure that we can tail call with them. + SmallVector OutLocs; + CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext()); + + if (!analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg)) { + LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n"); + return false; + } + + // Make sure that they can fit on the caller's stack. + const AArch64FunctionInfo *FuncInfo = MF.getInfo(); + if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) { + LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n"); + return false; + } + + // Verify that the parameters in callee-saved registers match. + // TODO: Port this over to CallLowering as general code once swiftself is + // supported. + auto TRI = MF.getSubtarget().getRegisterInfo(); + const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + for (unsigned i = 0; i < OutLocs.size(); ++i) { + auto &ArgLoc = OutLocs[i]; + // If it's not a register, it's fine. + if (!ArgLoc.isRegLoc()) { + if (Info.IsVarArg) { + // Be conservative and disallow variadic memory operands to match SDAG's + // behaviour. + // FIXME: If the caller's calling convention is C, then we can + // potentially use its argument area. However, for cases like fastcc, + // we can't do anything. + LLVM_DEBUG( + dbgs() + << "... Cannot tail call vararg function with stack arguments\n"); + return false; + } + continue; + } + + Register Reg = ArgLoc.getLocReg(); + + // Only look at callee-saved registers. + if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg)) + continue; + + LLVM_DEBUG( + dbgs() + << "... Call has an argument passed in a callee-saved register.\n"); + + // Check if it was copied from. + ArgInfo &OutInfo = OutArgs[i]; + + if (OutInfo.Regs.size() > 1) { + LLVM_DEBUG( + dbgs() << "... Cannot handle arguments in multiple registers.\n"); + return false; + } + + // Check if we copy the register, walking through copies from virtual + // registers. Note that getDefIgnoringCopies does not ignore copies from + // physical registers. + MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI); + if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) { + LLVM_DEBUG( + dbgs() + << "... Parameter was not copied into a VReg, cannot tail call.\n"); + return false; + } + + // Got a copy. Verify that it's the same as the register we want. + Register CopyRHS = RegDef->getOperand(1).getReg(); + if (CopyRHS != Reg) { + LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into " + "VReg, cannot tail call.\n"); + return false; + } + } + + return true; +} + +bool AArch64CallLowering::isEligibleForTailCallOptimization( + MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, + SmallVectorImpl &InArgs, + SmallVectorImpl &OutArgs) const { + + // Must pass all target-independent checks in order to tail call optimize. + if (!Info.IsTailCall) + return false; + + CallingConv::ID CalleeCC = Info.CallConv; + MachineFunction &MF = MIRBuilder.getMF(); + const Function &CallerF = MF.getFunction(); + + LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n"); + + if (Info.SwiftErrorVReg) { + // TODO: We should handle this. + // Note that this is also handled by the check for no outgoing arguments. + // Proactively disabling this though, because the swifterror handling in + // lowerCall inserts a COPY *after* the location of the call. + LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n"); + return false; + } + + if (!mayTailCallThisCC(CalleeCC)) { + LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n"); + return false; + } + + // Byval parameters hand the function a pointer directly into the stack area + // we want to reuse during a tail call. Working around this *is* possible (see + // X86). + // + // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try + // it? + // + // On Windows, "inreg" attributes signify non-aggregate indirect returns. + // In this case, it is necessary to save/restore X0 in the callee. Tail + // call opt interferes with this. So we disable tail call opt when the + // caller has an argument with "inreg" attribute. + // + // FIXME: Check whether the callee also has an "inreg" argument. + // + // When the caller has a swifterror argument, we don't want to tail call + // because would have to move into the swifterror register before the + // tail call. + if (any_of(CallerF.args(), [](const Argument &A) { + return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr(); + })) { + LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, " + "inreg, or swifterror arguments\n"); + return false; + } + + // Externally-defined functions with weak linkage should not be + // tail-called on AArch64 when the OS does not support dynamic + // pre-emption of symbols, as the AAELF spec requires normal calls + // to undefined weak functions to be replaced with a NOP or jump to the + // next instruction. The behaviour of branch instructions in this + // situation (as used for tail calls) is implementation-defined, so we + // cannot rely on the linker replacing the tail call with a return. + if (Info.Callee.isGlobal()) { + const GlobalValue *GV = Info.Callee.getGlobal(); + const Triple &TT = MF.getTarget().getTargetTriple(); + if (GV->hasExternalWeakLinkage() && + (!TT.isOSWindows() || TT.isOSBinFormatELF() || + TT.isOSBinFormatMachO())) { + LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function " + "with weak linkage for this OS.\n"); + return false; + } + } + + // If we have -tailcallopt, then we're done. + if (MF.getTarget().Options.GuaranteedTailCallOpt) + return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv(); + + // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall). + // Try to find cases where we can do that. + + // I want anyone implementing a new calling convention to think long and hard + // about this assert. + assert((!Info.IsVarArg || CalleeCC == CallingConv::C) && + "Unexpected variadic calling convention"); + + // Verify that the incoming and outgoing arguments from the callee are + // safe to tail call. + if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) { + LLVM_DEBUG( + dbgs() + << "... Caller and callee have incompatible calling conventions.\n"); + return false; + } + + if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs)) + return false; + + LLVM_DEBUG( + dbgs() << "... Call is eligible for tail call optimization.\n"); + return true; +} + +static unsigned getCallOpcode(const Function &CallerF, bool IsIndirect, + bool IsTailCall) { + if (!IsTailCall) + return IsIndirect ? AArch64::BLR : AArch64::BL; + + if (!IsIndirect) + return AArch64::TCRETURNdi; + + // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use + // x16 or x17. + if (CallerF.hasFnAttribute("branch-target-enforcement")) + return AArch64::TCRETURNriBTI; + + return AArch64::TCRETURNri; +} + +bool AArch64CallLowering::lowerTailCall( + MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, + SmallVectorImpl &OutArgs) const { + MachineFunction &MF = MIRBuilder.getMF(); + const Function &F = MF.getFunction(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const AArch64TargetLowering &TLI = *getTLI(); + AArch64FunctionInfo *FuncInfo = MF.getInfo(); + + // True when we're tail calling, but without -tailcallopt. + bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt; + + // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64 + // register class. Until we can do that, we should fall back here. + if (F.hasFnAttribute("branch-target-enforcement")) { + LLVM_DEBUG( + dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n"); + return false; + } + + // Find out which ABI gets to decide where things go. + CallingConv::ID CalleeCC = Info.CallConv; + CCAssignFn *AssignFnFixed; + CCAssignFn *AssignFnVarArg; + std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI); + + MachineInstrBuilder CallSeqStart; + if (!IsSibCall) + CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN); + + unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), true); + auto MIB = MIRBuilder.buildInstrNoInsert(Opc); + MIB.add(Info.Callee); + + // Byte offset for the tail call. When we are sibcalling, this will always + // be 0. + MIB.addImm(0); + + // Tell the call which registers are clobbered. + auto TRI = MF.getSubtarget().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(MF, F.getCallingConv()); + if (MF.getSubtarget().hasCustomCallingConv()) + TRI->UpdateCustomCallPreservedMask(MF, &Mask); + MIB.addRegMask(Mask); + + if (TRI->isAnyArgRegReserved(MF)) + TRI->emitReservedArgRegCallError(MF); + + // FPDiff is the byte offset of the call's argument area from the callee's. + // Stores to callee stack arguments will be placed in FixedStackSlots offset + // by this amount for a tail call. In a sibling call it must be 0 because the + // caller will deallocate the entire stack and the callee still expects its + // arguments to begin at SP+0. + int FPDiff = 0; + + // This will be 0 for sibcalls, potentially nonzero for tail calls produced + // by -tailcallopt. For sibcalls, the memory operands for the call are + // already available in the caller's incoming argument space. + unsigned NumBytes = 0; + if (!IsSibCall) { + // We aren't sibcalling, so we need to compute FPDiff. We need to do this + // before handling assignments, because FPDiff must be known for memory + // arguments. + unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); + SmallVector OutLocs; + CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext()); + analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg); + + // The callee will pop the argument stack as a tail call. Thus, we must + // keep it 16-byte aligned. + NumBytes = alignTo(OutInfo.getNextStackOffset(), 16); + + // FPDiff will be negative if this tail call requires more space than we + // would automatically have in our incoming argument space. Positive if we + // actually shrink the stack. + FPDiff = NumReusableBytes - NumBytes; + + // The stack pointer must be 16-byte aligned at all times it's used for a + // memory operation, which in practice means at *all* times and in + // particular across call boundaries. Therefore our own arguments started at + // a 16-byte aligned SP and the delta applied for the tail call should + // satisfy the same constraint. + assert(FPDiff % 16 == 0 && "unaligned stack on tail call"); + } + + const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); + + // Do the actual argument marshalling. + SmallVector PhysRegs; + OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed, + AssignFnVarArg, true, FPDiff); + if (!handleAssignments(MIRBuilder, OutArgs, Handler)) + return false; + + if (Info.IsVarArg && Info.IsMustTailCall) { + // Now we know what's being passed to the function. Add uses to the call for + // the forwarded registers that we *aren't* passing as parameters. This will + // preserve the copies we build earlier. + for (const auto &F : Forwards) { + Register ForwardedReg = F.PReg; + // If the register is already passed, or aliases a register which is + // already being passed, then skip it. + if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) { + if (!Use.isReg()) + return false; + return TRI->regsOverlap(Use.getReg(), ForwardedReg); + })) + continue; + + // We aren't passing it already, so we should add it to the call. + MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg)); + MIB.addReg(ForwardedReg, RegState::Implicit); + } + } + + // If we have -tailcallopt, we need to adjust the stack. We'll do the call + // sequence start and end here. + if (!IsSibCall) { + MIB->getOperand(1).setImm(FPDiff); + CallSeqStart.addImm(NumBytes).addImm(0); + // End the call sequence *before* emitting the call. Normally, we would + // tidy the frame up after the call. However, here, we've laid out the + // parameters so that when SP is reset, they will be in the correct + // location. + MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(NumBytes).addImm(0); + } + + // Now we can add the actual call instruction to the correct basic block. + MIRBuilder.insertInstr(MIB); + + // If Callee is a reg, since it is used by a target specific instruction, + // it must have a register class matching the constraint of that instruction. + if (Info.Callee.isReg()) + MIB->getOperand(0).setReg(constrainOperandRegClass( + MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), + *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Info.Callee, + 0)); + + MF.getFrameInfo().setHasTailCall(); + Info.LoweredTailCall = true; + return true; +} + bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, - CallingConv::ID CallConv, - const MachineOperand &Callee, - const ArgInfo &OrigRet, - ArrayRef OrigArgs, - Register SwiftErrorVReg) const { + CallLoweringInfo &Info) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); auto &DL = F.getParent()->getDataLayout(); + const AArch64TargetLowering &TLI = *getTLI(); - SmallVector SplitArgs; - for (auto &OrigArg : OrigArgs) { - splitToValueTypes(OrigArg, SplitArgs, DL, MRI, CallConv); + SmallVector OutArgs; + for (auto &OrigArg : Info.OrigArgs) { + splitToValueTypes(OrigArg, OutArgs, DL, MRI, Info.CallConv); // AAPCS requires that we zero-extend i1 to 8 bits by the caller. if (OrigArg.Ty->isIntegerTy(1)) - SplitArgs.back().Flags.setZExt(); + OutArgs.back().Flags[0].setZExt(); } - // Find out which ABI gets to decide where things go. - const AArch64TargetLowering &TLI = *getTLI(); - CCAssignFn *AssignFnFixed = - TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false); - CCAssignFn *AssignFnVarArg = - TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/true); + SmallVector InArgs; + if (!Info.OrigRet.Ty->isVoidTy()) + splitToValueTypes(Info.OrigRet, InArgs, DL, MRI, F.getCallingConv()); - auto CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN); + // If we can lower as a tail call, do that instead. + bool CanTailCallOpt = + isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs); + + // We must emit a tail call if we have musttail. + if (Info.IsMustTailCall && !CanTailCallOpt) { + // There are types of incoming/outgoing arguments we can't handle yet, so + // it doesn't make sense to actually die here like in ISelLowering. Instead, + // fall back to SelectionDAG and let it try to handle this. + LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n"); + return false; + } + + if (CanTailCallOpt) + return lowerTailCall(MIRBuilder, Info, OutArgs); + + // Find out which ABI gets to decide where things go. + CCAssignFn *AssignFnFixed; + CCAssignFn *AssignFnVarArg; + std::tie(AssignFnFixed, AssignFnVarArg) = + getAssignFnsForCC(Info.CallConv, TLI); + + MachineInstrBuilder CallSeqStart; + CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN); // Create a temporarily-floating call instruction so we can add the implicit // uses of arg registers. - auto MIB = MIRBuilder.buildInstrNoInsert(Callee.isReg() ? AArch64::BLR - : AArch64::BL); - MIB.add(Callee); + unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), false); + + auto MIB = MIRBuilder.buildInstrNoInsert(Opc); + MIB.add(Info.Callee); // Tell the call which registers are clobbered. auto TRI = MF.getSubtarget().getRegisterInfo(); @@ -448,8 +983,8 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // Do the actual argument marshalling. SmallVector PhysRegs; OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed, - AssignFnVarArg); - if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) + AssignFnVarArg, false); + if (!handleAssignments(MIRBuilder, OutArgs, Handler)) return false; // Now we can add the actual call instruction to the correct basic block. @@ -458,34 +993,37 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // If Callee is a reg, since it is used by a target specific // instruction, it must have a register class matching the // constraint of that instruction. - if (Callee.isReg()) + if (Info.Callee.isReg()) MIB->getOperand(0).setReg(constrainOperandRegClass( MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), - *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Callee, 0)); + *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Info.Callee, + 0)); // Finally we can copy the returned value back into its virtual-register. In // symmetry with the arugments, the physical register must be an // implicit-define of the call instruction. - CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(F.getCallingConv()); - if (!OrigRet.Ty->isVoidTy()) { - SplitArgs.clear(); - - splitToValueTypes(OrigRet, SplitArgs, DL, MRI, F.getCallingConv()); - + if (!Info.OrigRet.Ty->isVoidTy()) { + CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(F.getCallingConv()); CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn); - if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) + if (!handleAssignments(MIRBuilder, InArgs, Handler)) return false; } - if (SwiftErrorVReg) { + if (Info.SwiftErrorVReg) { MIB.addDef(AArch64::X21, RegState::Implicit); - MIRBuilder.buildCopy(SwiftErrorVReg, Register(AArch64::X21)); + MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21)); } + uint64_t CalleePopBytes = + doesCalleeRestoreStack(Info.CallConv, + MF.getTarget().Options.GuaranteedTailCallOpt) + ? alignTo(Handler.StackSize, 16) + : 0; + CallSeqStart.addImm(Handler.StackSize).addImm(0); MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP) .addImm(Handler.StackSize) - .addImm(0); + .addImm(CalleePopBytes); return true; } diff --git a/lib/Target/AArch64/AArch64CallLowering.h b/lib/Target/AArch64/AArch64CallLowering.h index 4f428f25453..b0c601c7062 100644 --- a/lib/Target/AArch64/AArch64CallLowering.h +++ b/lib/Target/AArch64/AArch64CallLowering.h @@ -40,16 +40,15 @@ public: bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef> VRegs) const override; - bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, - const MachineOperand &Callee, const ArgInfo &OrigRet, - ArrayRef OrigArgs, - Register SwiftErrorVReg) const override; + bool lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const override; - bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, - const MachineOperand &Callee, const ArgInfo &OrigRet, - ArrayRef OrigArgs) const override { - return lowerCall(MIRBuilder, CallConv, Callee, OrigRet, OrigArgs, 0); - } + /// Returns true if the call can be lowered as a tail call. + bool + isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info, + SmallVectorImpl &InArgs, + SmallVectorImpl &OutArgs) const; bool supportSwiftError() const override { return true; } @@ -64,6 +63,18 @@ private: SmallVectorImpl &SplitArgs, const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv) const; + + bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, + SmallVectorImpl &OutArgs) const; + + bool + doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info, + MachineFunction &MF, + SmallVectorImpl &InArgs) const; + + bool + areCalleeOutgoingArgsTailCallable(CallLoweringInfo &Info, MachineFunction &MF, + SmallVectorImpl &OutArgs) const; }; } // end namespace llvm diff --git a/lib/Target/AArch64/AArch64CallingConvention.cpp b/lib/Target/AArch64/AArch64CallingConvention.cpp index 02538a18761..a0695cef615 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.cpp +++ b/lib/Target/AArch64/AArch64CallingConvention.cpp @@ -40,12 +40,14 @@ static bool finishStackBlock(SmallVectorImpl &PendingMembers, MVT LocVT, ISD::ArgFlagsTy &ArgFlags, CCState &State, unsigned SlotAlign) { unsigned Size = LocVT.getSizeInBits() / 8; - unsigned StackAlign = + const Align StackAlign = State.getMachineFunction().getDataLayout().getStackAlignment(); - unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign); + const Align OrigAlign(ArgFlags.getOrigAlign()); + const Align Align = std::min(OrigAlign, StackAlign); for (auto &It : PendingMembers) { - It.convertToMem(State.AllocateStack(Size, std::max(Align, SlotAlign))); + It.convertToMem(State.AllocateStack( + Size, std::max((unsigned)Align.value(), SlotAlign))); State.addLoc(It); SlotAlign = 1; } @@ -79,10 +81,14 @@ static bool CC_AArch64_Custom_Stack_Block( static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { + const AArch64Subtarget &Subtarget = static_cast( + State.getMachineFunction().getSubtarget()); + bool IsDarwinILP32 = Subtarget.isTargetILP32() && Subtarget.isTargetMachO(); + // Try to allocate a contiguous block of registers, each of the correct // size to hold one member. ArrayRef RegList; - if (LocVT.SimpleTy == MVT::i64) + if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32)) RegList = XRegList; else if (LocVT.SimpleTy == MVT::f16) RegList = HRegList; @@ -107,8 +113,12 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, if (!ArgFlags.isInConsecutiveRegsLast()) return true; - unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size()); - if (RegResult) { + // [N x i32] arguments get packed into x-registers on Darwin's arm64_32 + // because that's how the armv7k Clang front-end emits small structs. + unsigned EltsPerReg = (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32) ? 2 : 1; + unsigned RegResult = State.AllocateRegBlock( + RegList, alignTo(PendingMembers.size(), EltsPerReg) / EltsPerReg); + if (RegResult && EltsPerReg == 1) { for (auto &It : PendingMembers) { It.convertToReg(RegResult); State.addLoc(It); @@ -116,14 +126,26 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, } PendingMembers.clear(); return true; + } else if (RegResult) { + assert(EltsPerReg == 2 && "unexpected ABI"); + bool UseHigh = false; + CCValAssign::LocInfo Info; + for (auto &It : PendingMembers) { + Info = UseHigh ? CCValAssign::AExtUpper : CCValAssign::ZExt; + State.addLoc(CCValAssign::getReg(It.getValNo(), MVT::i32, RegResult, + MVT::i64, Info)); + UseHigh = !UseHigh; + if (!UseHigh) + ++RegResult; + } + PendingMembers.clear(); + return true; } // Mark all regs in the class as unavailable for (auto Reg : RegList) State.AllocateReg(Reg); - const AArch64Subtarget &Subtarget = static_cast( - State.getMachineFunction().getSubtarget()); unsigned SlotAlign = Subtarget.isTargetDarwin() ? 1 : 8; return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign); diff --git a/lib/Target/AArch64/AArch64CallingConvention.h b/lib/Target/AArch64/AArch64CallingConvention.h index 13cc0c583fd..5a55d090d7c 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.h +++ b/lib/Target/AArch64/AArch64CallingConvention.h @@ -25,6 +25,9 @@ bool CC_AArch64_DarwinPCS_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State); +bool CC_AArch64_DarwinPCS_ILP32_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); bool CC_AArch64_Win64_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State); diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td index d969a9e1ab3..bccbbd4591e 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.td +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -17,6 +17,10 @@ class CCIfAlign : class CCIfBigEndian : CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>; +class CCIfILP32 : + CCIf<"State.getMachineFunction().getDataLayout().getPointerSize() == 4", A>; + + //===----------------------------------------------------------------------===// // ARM AAPCS64 Calling Convention //===----------------------------------------------------------------------===// @@ -70,6 +74,18 @@ def CC_AArch64_AAPCS : CallingConv<[ CCIfConsecutiveRegs>, + CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, + nxv1f32, nxv2f32, nxv4f32, nxv1f64, nxv2f64], + CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>, + CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, + nxv1f32, nxv2f32, nxv4f32, nxv1f64, nxv2f64], + CCPassIndirect>, + + CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCAssignToReg<[P0, P1, P2, P3]>>, + CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCPassIndirect>, + // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, // up to eight each of GPR and FPR. CCIfType<[i1, i8, i16], CCPromoteToType>, @@ -111,6 +127,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[ CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32], CCBitConvertToType>, + CCIfConsecutiveRegs>, CCIfSwiftError>>, // Big endian vectors must be passed as if they were 1-element vectors so that @@ -135,7 +152,14 @@ def RetCC_AArch64_AAPCS : CallingConv<[ CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], - CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> + CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + + CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, + nxv1f32, nxv2f32, nxv4f32, nxv1f64, nxv2f64], + CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>, + + CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCAssignToReg<[P0, P1, P2, P3]>> ]>; // Vararg functions on windows pass floats in integer registers @@ -202,6 +226,12 @@ def CC_AArch64_DarwinPCS : CallingConv<[ CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>, CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>, CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + + // Re-demote pointers to 32-bits so we don't end up storing 64-bit + // values and clobbering neighbouring stack locations. Not very pretty. + CCIfPtr>>, + CCIfPtr>>, + CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16], CCAssignToStack<8, 8>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], @@ -229,6 +259,29 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ CCAssignToStack<16, 16>> ]>; +// In the ILP32 world, the minimum stack slot size is 4 bytes. Otherwise the +// same as the normal Darwin VarArgs handling. +let Entry = 1 in +def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[ + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, + + // Handle all scalar types as either i32 or f32. + CCIfType<[i8, i16], CCPromoteToType>, + CCIfType<[f16], CCPromoteToType>, + + // Everything is on the stack. + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfPtr>>, + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + CCIfType<[i64], CCIfSplit>>, + CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], + CCAssignToStack<8, 8>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], + CCAssignToStack<16, 16>> +]>; + + // The WebKit_JS calling convention only passes the first argument (the callee) // in register and the remaining arguments on stack. We allow 32bit stack slots, // so that WebKit can write partial values in the stack and define the other @@ -298,6 +351,12 @@ def CC_AArch64_GHC : CallingConv<[ CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>> ]>; +// The order of the callee-saves in this file is important, because the +// FrameLowering code will use this order to determine the layout the +// callee-save area in the stack frame. As can be observed below, Darwin +// requires the frame-record (LR, FP) to be at the top the callee-save area, +// whereas for other platforms they are at the bottom. + // FIXME: LR is only callee-saved in the sense that *we* preserve it and are // presumably a callee to someone. External functions may not do so, but this // is currently safe since BL has LR as an implicit-def and what happens after a @@ -306,7 +365,13 @@ def CC_AArch64_GHC : CallingConv<[ // It would be better to model its preservation semantics properly (create a // vreg on entry, use it in RET & tail call generation; make that vreg def if we // end up saving LR as part of a call frame). Watch this space... -def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, +def CSR_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, LR, FP, + D8, D9, D10, D11, + D12, D13, D14, D15)>; + +// Darwin puts the frame-record at the top of the callee-save area. +def CSR_Darwin_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, X23, X24, X25, X26, X27, X28, D8, D9, D10, D11, D12, D13, D14, D15)>; @@ -314,17 +379,24 @@ def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, // Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x. // We put FP before LR, so that frame lowering logic generates (FP,LR) pairs, // and not (LR,FP) pairs. -def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add FP, LR, X19, X20, X21, X22, - X23, X24, X25, X26, X27, X28, +def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, FP, LR, D8, D9, D10, D11, D12, D13, D14, D15)>; // AArch64 PCS for vector functions (VPCS) // must (additionally) preserve full Q8-Q23 registers -def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, - X23, X24, X25, X26, X27, X28, +def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, LR, FP, (sequence "Q%u", 8, 23))>; +// Functions taking SVE arguments or returning an SVE type +// must (additionally) preserve full Z8-Z23 and predicate registers P4-P15 +def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, LR, FP, + (sequence "Z%u", 8, 23), + (sequence "P%u", 4, 15))>; + // Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since // 'this' and the pointer return value are both passed in X0 in these cases, // this can be partially modelled by treating X0 as a callee-saved register; @@ -336,7 +408,7 @@ def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>; def CSR_AArch64_AAPCS_SwiftError - : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>; + : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X21)>; // The function used by Darwin to obtain the address of a thread-local variable // guarantees more than a normal AAPCS function. x16 and x17 are used on the @@ -352,7 +424,7 @@ def CSR_AArch64_TLS_Darwin // fast path calls a function that follows CSR_AArch64_TLS_Darwin, // CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin. def CSR_AArch64_CXX_TLS_Darwin - : CalleeSavedRegs<(add CSR_AArch64_AAPCS, + : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, (sub (sequence "X%u", 1, 28), X15, X16, X17, X18), (sequence "D%u", 0, 31))>; diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp index 9f324b43320..35e6fef2436 100644 --- a/lib/Target/AArch64/AArch64CollectLOH.cpp +++ b/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -103,6 +103,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -181,6 +182,7 @@ static bool canDefBePartOfLOH(const MachineInstr &MI) { case AArch64::ADDXri: return canAddBePartOfLOH(MI); case AArch64::LDRXui: + case AArch64::LDRWui: // Check immediate to see if the immediate is an address. switch (MI.getOperand(2).getType()) { default: @@ -312,7 +314,8 @@ static void handleUse(const MachineInstr &MI, const MachineOperand &MO, Info.Type = MCLOH_AdrpAdd; Info.IsCandidate = true; Info.MI0 = &MI; - } else if (MI.getOpcode() == AArch64::LDRXui && + } else if ((MI.getOpcode() == AArch64::LDRXui || + MI.getOpcode() == AArch64::LDRWui) && MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) { Info.Type = MCLOH_AdrpLdrGot; Info.IsCandidate = true; @@ -357,7 +360,9 @@ static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo, return true; } } else { - assert(MI.getOpcode() == AArch64::LDRXui && "Expect LDRXui"); + assert((MI.getOpcode() == AArch64::LDRXui || + MI.getOpcode() == AArch64::LDRWui) && + "Expect LDRXui or LDRWui"); assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) && "Expected GOT relocation"); if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) { @@ -474,13 +479,23 @@ static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) { handleClobber(LOHInfos[Idx]); } // Handle uses. + + SmallSet UsesSeen; for (const MachineOperand &MO : MI.uses()) { if (!MO.isReg() || !MO.readsReg()) continue; int Idx = mapRegToGPRIndex(MO.getReg()); if (Idx < 0) continue; - handleUse(MI, MO, LOHInfos[Idx]); + + // Multiple uses of the same register within a single instruction don't + // count as MultiUser or block optimization. This is especially important on + // arm64_32, where any memory operation is likely to be an explicit use of + // xN and an implicit use of wN (the base address register). + if (!UsesSeen.count(Idx)) { + handleUse(MI, MO, LOHInfos[Idx]); + UsesSeen.insert(Idx); + } } } @@ -512,6 +527,7 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) { switch (Opcode) { case AArch64::ADDXri: case AArch64::LDRXui: + case AArch64::LDRWui: if (canDefBePartOfLOH(MI)) { const MachineOperand &Def = MI.getOperand(0); const MachineOperand &Op = MI.getOperand(1); diff --git a/lib/Target/AArch64/AArch64Combine.td b/lib/Target/AArch64/AArch64Combine.td new file mode 100644 index 00000000000..bb99f2516ec --- /dev/null +++ b/lib/Target/AArch64/AArch64Combine.td @@ -0,0 +1,18 @@ +//=- AArch64.td - Define AArch64 Combine Rules ---------------*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +include "llvm/Target/GlobalISel/Combine.td" + +def AArch64PreLegalizerCombinerHelper: GICombinerHelper< + "AArch64GenPreLegalizerCombinerHelper", [all_combines, + elide_br_by_inverting_cond]> { + let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule"; +} diff --git a/lib/Target/AArch64/AArch64CondBrTuning.cpp b/lib/Target/AArch64/AArch64CondBrTuning.cpp index 453132e0966..25e23e4623d 100644 --- a/lib/Target/AArch64/AArch64CondBrTuning.cpp +++ b/lib/Target/AArch64/AArch64CondBrTuning.cpp @@ -78,7 +78,7 @@ void AArch64CondBrTuning::getAnalysisUsage(AnalysisUsage &AU) const { } MachineInstr *AArch64CondBrTuning::getOperandDef(const MachineOperand &MO) { - if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (!Register::isVirtualRegister(MO.getReg())) return nullptr; return MRI->getUniqueVRegDef(MO.getReg()); } @@ -98,7 +98,7 @@ MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI, } bool Is64Bit; unsigned NewOpc = TII->convertToFlagSettingOpc(MI.getOpcode(), Is64Bit); - unsigned NewDestReg = MI.getOperand(0).getReg(); + Register NewDestReg = MI.getOperand(0).getReg(); if (MRI->hasOneNonDBGUse(MI.getOperand(0).getReg())) NewDestReg = Is64Bit ? AArch64::XZR : AArch64::WZR; diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp index 2cfbcc592d6..43ae9f8ec47 100644 --- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp +++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -220,7 +220,7 @@ bool SSACCmpConv::trivialTailPHIs() { // PHI operands come in (VReg, MBB) pairs. for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) { MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB(); - unsigned Reg = I.getOperand(oi).getReg(); + Register Reg = I.getOperand(oi).getReg(); if (MBB == Head) { assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands"); HeadReg = Reg; @@ -259,7 +259,7 @@ bool SSACCmpConv::isDeadDef(unsigned DstReg) { // Writes to the zero register are dead. if (DstReg == AArch64::WZR || DstReg == AArch64::XZR) return true; - if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + if (!Register::isVirtualRegister(DstReg)) return false; // A virtual register def without any uses will be marked dead later, and // eventually replaced by the zero register. @@ -631,7 +631,7 @@ void SSACCmpConv::convert(SmallVectorImpl &RemovedBlocks) { } const MCInstrDesc &MCID = TII->get(Opc); // Create a dummy virtual register for the SUBS def. - unsigned DestReg = + Register DestReg = MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF)); // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz. BuildMI(*Head, Head->end(), TermDL, MCID) diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp index a43077cb88e..bc3808df1db 100644 --- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -145,8 +145,8 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock( continue; // We should not have any relevant physreg defs that are replacable by // zero before register allocation. So we just check for dead vreg defs. - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg) || + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg) || (!MO.isDead() && !MRI->use_nodbg_empty(Reg))) continue; assert(!MO.isImplicit() && "Unexpected implicit def!"); diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 210c10eb184..082e17e44d0 100644 --- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -109,7 +109,7 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned BitSize) { MachineInstr &MI = *MBBI; - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); uint64_t Imm = MI.getOperand(1).getImm(); if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) { @@ -150,7 +150,7 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, } break; case AArch64::MOVKWi: case AArch64::MOVKXi: { - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) .addReg(DstReg, @@ -174,14 +174,14 @@ bool AArch64ExpandPseudo::expandCMP_SWAP( MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); const MachineOperand &Dest = MI.getOperand(0); - unsigned StatusReg = MI.getOperand(1).getReg(); + Register StatusReg = MI.getOperand(1).getReg(); bool StatusDead = MI.getOperand(1).isDead(); // Duplicating undef operands into 2 instructions does not guarantee the same // value on both; However undef should be replaced by xzr anyway. assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); - unsigned AddrReg = MI.getOperand(2).getReg(); - unsigned DesiredReg = MI.getOperand(3).getReg(); - unsigned NewReg = MI.getOperand(4).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register DesiredReg = MI.getOperand(3).getReg(); + Register NewReg = MI.getOperand(4).getReg(); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -254,16 +254,16 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128( DebugLoc DL = MI.getDebugLoc(); MachineOperand &DestLo = MI.getOperand(0); MachineOperand &DestHi = MI.getOperand(1); - unsigned StatusReg = MI.getOperand(2).getReg(); + Register StatusReg = MI.getOperand(2).getReg(); bool StatusDead = MI.getOperand(2).isDead(); // Duplicating undef operands into 2 instructions does not guarantee the same // value on both; However undef should be replaced by xzr anyway. assert(!MI.getOperand(3).isUndef() && "cannot handle undef"); - unsigned AddrReg = MI.getOperand(3).getReg(); - unsigned DesiredLoReg = MI.getOperand(4).getReg(); - unsigned DesiredHiReg = MI.getOperand(5).getReg(); - unsigned NewLoReg = MI.getOperand(6).getReg(); - unsigned NewHiReg = MI.getOperand(7).getReg(); + Register AddrReg = MI.getOperand(3).getReg(); + Register DesiredLoReg = MI.getOperand(4).getReg(); + Register DesiredHiReg = MI.getOperand(5).getReg(); + Register NewLoReg = MI.getOperand(6).getReg(); + Register NewHiReg = MI.getOperand(7).getReg(); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -475,7 +475,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, case AArch64::LOADgot: { MachineFunction *MF = MBB.getParent(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); const MachineOperand &MO1 = MI.getOperand(1); unsigned Flags = MO1.getTargetFlags(); @@ -495,12 +495,26 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, } } else { // Small codemodel expand into ADRP + LDR. + MachineFunction &MF = *MI.getParent()->getParent(); + DebugLoc DL = MI.getDebugLoc(); MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); - MachineInstrBuilder MIB2 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui)) - .add(MI.getOperand(0)) - .addReg(DstReg); + + MachineInstrBuilder MIB2; + if (MF.getSubtarget().isTargetILP32()) { + auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); + unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32); + unsigned DstFlags = MI.getOperand(0).getTargetFlags(); + MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui)) + .addDef(Reg32) + .addReg(DstReg, RegState::Kill) + .addReg(DstReg, DstFlags | RegState::Implicit); + } else { + unsigned DstReg = MI.getOperand(0).getReg(); + MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui)) + .add(MI.getOperand(0)) + .addUse(DstReg, RegState::Kill); + } if (MO1.isGlobal()) { MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); @@ -534,11 +548,28 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, case AArch64::MOVaddrTLS: case AArch64::MOVaddrEXT: { // Expand into ADRP + ADD. - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) .add(MI.getOperand(1)); + if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) { + // MO_TAGGED on the page indicates a tagged address. Set the tag now. + // We do so by creating a MOVK that sets bits 48-63 of the register to + // (global address + 0x100000000 - PC) >> 48. This assumes that we're in + // the small code model so we can assume a binary size of <= 4GB, which + // makes the untagged PC relative offset positive. The binary must also be + // loaded into address range [0, 2^48). Both of these properties need to + // be ensured at runtime when using tagged addresses. + auto Tag = MI.getOperand(1); + Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3); + Tag.setOffset(0x100000000); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg) + .addReg(DstReg) + .add(Tag) + .addImm(48); + } + MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) .add(MI.getOperand(0)) @@ -561,7 +592,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, return true; case AArch64::MOVbaseTLS: { - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); auto SysReg = AArch64SysReg::TPIDR_EL0; MachineFunction *MF = MBB.getParent(); if (MF->getTarget().getTargetTriple().isOSFuchsia() && @@ -642,11 +673,12 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, // instruction sequence. int BaseOffset = -AFI->getTaggedBasePointerOffset(); unsigned FrameReg; - int FrameRegOffset = TFI->resolveFrameOffsetReference( - MF, BaseOffset, false /*isFixed*/, FrameReg, /*PreferFP=*/false, + StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference( + MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg, + /*PreferFP=*/false, /*ForSimm=*/true); Register SrcReg = FrameReg; - if (FrameRegOffset != 0) { + if (FrameRegOffset) { // Use output register as temporary. SrcReg = MI.getOperand(0).getReg(); emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg, diff --git a/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp index 3b3182128c4..b54fc2e51ba 100644 --- a/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp +++ b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp @@ -642,7 +642,7 @@ static Optional getLoadInfo(const MachineInstr &MI) { } // Loads from the stack pointer don't get prefetched. - unsigned BaseReg = MI.getOperand(BaseRegIdx).getReg(); + Register BaseReg = MI.getOperand(BaseRegIdx).getReg(); if (BaseReg == AArch64::SP || BaseReg == AArch64::WSP) return None; diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 8dc2768b959..277a3052f1e 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -459,7 +459,7 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) return 0; - unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); + unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); if (!DestEVT.isSimple()) @@ -474,12 +474,32 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { ADRPReg) .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); - ResultReg = createResultReg(&AArch64::GPR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), + unsigned LdrOpc; + if (Subtarget->isTargetILP32()) { + ResultReg = createResultReg(&AArch64::GPR32RegClass); + LdrOpc = AArch64::LDRWui; + } else { + ResultReg = createResultReg(&AArch64::GPR64RegClass); + LdrOpc = AArch64::LDRXui; + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc), ResultReg) - .addReg(ADRPReg) - .addGlobalAddress(GV, 0, - AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags); + .addReg(ADRPReg) + .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | + AArch64II::MO_NC | OpFlags); + if (!Subtarget->isTargetILP32()) + return ResultReg; + + // LDRWui produces a 32-bit register, but pointers in-register are 64-bits + // so we must extend the result on ILP32. + unsigned Result64 = createResultReg(&AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::SUBREG_TO_REG)) + .addDef(Result64) + .addImm(0) + .addReg(ResultReg, RegState::Kill) + .addImm(AArch64::sub_32); + return Result64; } else { // ADRP + ADDX BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), @@ -504,6 +524,15 @@ unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { if (!CEVT.isSimple()) return 0; MVT VT = CEVT.getSimpleVT(); + // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, + // 'null' pointers need to have a somewhat special treatment. + if (const auto *CPN = dyn_cast(C)) { + (void)CPN; + assert(CPN->getType()->getPointerAddressSpace() == 0 && + "Unexpected address space"); + assert(VT == MVT::i64 && "Expected 64-bit pointers"); + return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); + } if (const auto *CI = dyn_cast(C)) return materializeInt(CI, VT); @@ -946,6 +975,9 @@ bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { EVT evt = TLI.getValueType(DL, Ty, true); + if (Subtarget->isTargetILP32() && Ty->isPointerTy()) + return false; + // Only handle simple types. if (evt == MVT::Other || !evt.isSimple()) return false; @@ -988,6 +1020,9 @@ bool AArch64FastISel::isValueAvailable(const Value *V) const { } bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { + if (Subtarget->isTargetILP32()) + return false; + unsigned ScaleFactor = getImplicitScaleFactor(VT); if (!ScaleFactor) return false; @@ -3165,6 +3200,11 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { if (IsTailCall) return false; + // FIXME: we could and should support this, but for now correctness at -O0 is + // more important. + if (Subtarget->isTargetILP32()) + return false; + CodeModel::Model CM = TM.getCodeModel(); // Only support the small-addressing and large code models. if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) @@ -3434,8 +3474,8 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { MFI.setFrameAddressIsTaken(true); const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); - unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); - unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); + Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); // Recursively load frame address @@ -3796,6 +3836,11 @@ bool AArch64FastISel::selectRet(const Instruction *I) { if (!FuncInfo.CanLowerReturn) return false; + // FIXME: in principle it could. Mostly just a case of zero extending outgoing + // pointers. + if (Subtarget->isTargetILP32()) + return false; + if (F.isVarArg()) return false; @@ -3842,7 +3887,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) { return false; unsigned SrcReg = Reg + VA.getValNo(); - unsigned DestReg = VA.getLocReg(); + Register DestReg = VA.getLocReg(); // Avoid a cross-class copy. This is very unlikely. if (!MRI.getRegClass(SrcReg)->contains(DestReg)) return false; @@ -3970,7 +4015,7 @@ unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { if (DestVT == MVT::i64) { // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. - unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBREG_TO_REG), Reg64) .addImm(0) @@ -4123,7 +4168,7 @@ unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, }; unsigned Opc = OpcTable[IsZExt][Is64Bit]; if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { - unsigned TmpReg = MRI.createVirtualRegister(RC); + Register TmpReg = MRI.createVirtualRegister(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBREG_TO_REG), TmpReg) .addImm(0) @@ -4244,7 +4289,7 @@ unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, }; unsigned Opc = OpcTable[IsZExt][Is64Bit]; if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { - unsigned TmpReg = MRI.createVirtualRegister(RC); + Register TmpReg = MRI.createVirtualRegister(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBREG_TO_REG), TmpReg) .addImm(0) @@ -4353,7 +4398,7 @@ unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, }; unsigned Opc = OpcTable[IsZExt][Is64Bit]; if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { - unsigned TmpReg = MRI.createVirtualRegister(RC); + Register TmpReg = MRI.createVirtualRegister(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBREG_TO_REG), TmpReg) .addImm(0) @@ -4412,7 +4457,7 @@ unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, if (DestVT == MVT::i8 || DestVT == MVT::i16) DestVT = MVT::i32; else if (DestVT == MVT::i64) { - unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBREG_TO_REG), Src64) .addImm(0) @@ -4495,7 +4540,7 @@ bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, const auto *LoadMI = MI; if (LoadMI->getOpcode() == TargetOpcode::COPY && LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { - unsigned LoadReg = MI->getOperand(1).getReg(); + Register LoadReg = MI->getOperand(1).getReg(); LoadMI = MRI.getUniqueVRegDef(LoadReg); assert(LoadMI && "Expected valid instruction"); } diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 8c6e5cbd5c1..68e1e6a3022 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -44,11 +44,19 @@ // | | // |-----------------------------------| // | | -// | prev_fp, prev_lr | +// | callee-saved gpr registers | <--. +// | | | On Darwin platforms these +// |- - - - - - - - - - - - - - - - - -| | callee saves are swapped, +// | | | (frame record first) +// | prev_fp, prev_lr | <--' // | (a.k.a. "frame record") | // |-----------------------------------| <- fp(=x29) // | | -// | other callee-saved registers | +// | callee-saved fp/simd/SVE regs | +// | | +// |-----------------------------------| +// | | +// | SVE stack objects | // | | // |-----------------------------------| // |.empty.space.to.make.part.below....| @@ -80,6 +88,20 @@ // * A frame pointer is definitely needed when there are local variables with // more-than-default alignment requirements. // +// For Darwin platforms the frame-record (fp, lr) is stored at the top of the +// callee-saved area, since the unwind encoding does not allow for encoding +// this dynamically and existing tools depend on this layout. For other +// platforms, the frame-record is stored at the bottom of the (gpr) callee-saved +// area to allow SVE stack objects (allocated directly below the callee-saves, +// if available) to be accessed directly from the framepointer. +// The SVE spill/fill instructions have VL-scaled addressing modes such +// as: +// ldr z8, [fp, #-7 mul vl] +// For SVE the size of the vector length (VL) is not known at compile-time, so +// '#-7 mul vl' is an offset that can only be evaluated at runtime. With this +// layout, we don't need to add an unscaled offset to the framepointer before +// accessing the SVE object in the frame. +// // In some cases when a base pointer is not strictly needed, it is generated // anyway when offsets from the frame pointer to access local variables become // so large that the offset can't be encoded in the immediate fields of loads @@ -94,6 +116,7 @@ #include "AArch64InstrInfo.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64RegisterInfo.h" +#include "AArch64StackOffset.h" #include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" @@ -173,7 +196,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) { if (!MO.isFI()) continue; - int Offset = 0; + StackOffset Offset; if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) == AArch64FrameOffsetCannotUpdate) return 0; @@ -183,6 +206,12 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) { return DefaultSafeSPDisplacement; } +/// Returns the size of the entire SVE stackframe (calleesaves + spills). +static StackOffset getSVEStackSize(const MachineFunction &MF) { + const AArch64FunctionInfo *AFI = MF.getInfo(); + return {(int64_t)AFI->getStackSizeSVE(), MVT::nxv1i8}; +} + bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { if (!EnableRedZone) return false; @@ -195,7 +224,8 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { const AArch64FunctionInfo *AFI = MF.getInfo(); unsigned NumBytes = AFI->getLocalStackSize(); - return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128); + return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128 || + getSVEStackSize(MF)); } /// hasFP - Return true if the specified function should have a dedicated frame @@ -273,14 +303,15 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( // Most call frames will be allocated at the start of a function so // this is OK, but it is a limitation that needs dealing with. assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large"); - emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII); + emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, {Amount, MVT::i8}, + TII); } } else if (CalleePopAmount != 0) { // If the calling convention demands that the callee pops arguments from the // stack, we want to add it back if we have a reserved call frame. assert(CalleePopAmount < 0xffffff && "call frame too large"); - emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount, - TII); + emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, + {-(int64_t)CalleePopAmount, MVT::i8}, TII); } return MBB.erase(I); } @@ -416,6 +447,9 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( const AArch64Subtarget &Subtarget = MF.getSubtarget(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + if (MF.getFunction().hasOptSize()) + return false; + if (AFI->getLocalStackSize() == 0) return false; @@ -436,6 +470,11 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( if (canUseRedZone(MF)) return false; + // When there is an SVE area on the stack, always allocate the + // callee-saves and spills/locals separately. + if (getSVEStackSize(MF)) + return false; + return true; } @@ -474,8 +513,8 @@ static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI, Imm = -Imm; LLVM_FALLTHROUGH; case AArch64::STPXpre: { - unsigned Reg0 = MBBI->getOperand(1).getReg(); - unsigned Reg1 = MBBI->getOperand(2).getReg(); + Register Reg0 = MBBI->getOperand(1).getReg(); + Register Reg1 = MBBI->getOperand(2).getReg(); if (Reg0 == AArch64::FP && Reg1 == AArch64::LR) MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X)) .addImm(Imm * 8) @@ -523,8 +562,8 @@ static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI, } case AArch64::STPXi: case AArch64::LDPXi: { - unsigned Reg0 = MBBI->getOperand(0).getReg(); - unsigned Reg1 = MBBI->getOperand(1).getReg(); + Register Reg0 = MBBI->getOperand(0).getReg(); + Register Reg1 = MBBI->getOperand(1).getReg(); if (Reg0 == AArch64::FP && Reg1 == AArch64::LR) MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR)) .addImm(Imm * 8) @@ -791,6 +830,10 @@ static bool needsWinCFI(const MachineFunction &MF) { F.needsUnwindTableEntry(); } +static bool isTargetDarwin(const MachineFunction &MF) { + return MF.getSubtarget().isTargetDarwin(); +} + void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -846,6 +889,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // Ideally it should match SP value after prologue. AFI->setTaggedBasePointerOffset(MFI.getStackSize()); + const StackOffset &SVEStackSize = getSVEStackSize(MF); + // getStackSize() includes all the locals in its size calculation. We don't // include these locals when computing the stack size of a funclet, as they // are allocated in the parent's stack frame and accessed via the frame @@ -856,6 +901,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, : (int)MFI.getStackSize(); if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { assert(!HasFP && "unexpected function without stack frame but with FP"); + assert(!SVEStackSize && + "unexpected function without stack frame but with SVE objects"); // All of the stack allocation is for locals. AFI->setLocalStackSize(NumBytes); if (!NumBytes) @@ -866,8 +913,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, AFI->setHasRedZone(true); ++NumRedZoneFunctions; } else { - emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); + emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, + {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, + false, NeedsWinCFI, &HasWinCFI); if (!NeedsWinCFI) { // Label used to tie together the PROLOG_LABEL and the MachineMoves. MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); @@ -901,8 +949,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, AFI->setLocalStackSize(NumBytes - PrologueSaveSize); bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); if (CombineSPBump) { - emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); + assert(!SVEStackSize && "Cannot combine SP bump with SVE"); + emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, + {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, false, + NeedsWinCFI, &HasWinCFI); NumBytes = 0; } else if (PrologueSaveSize != 0) { MBBI = convertCalleeSaveRestoreToSPPrePostIncDec( @@ -948,9 +998,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, } if (HasFP) { - // Only set up FP if we actually need to. Frame pointer is fp = - // sp - fixedobject - 16. - int FPOffset = AFI->getCalleeSavedStackSize() - 16; + // Only set up FP if we actually need to. + int FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0; + if (CombineSPBump) FPOffset += AFI->getLocalStackSize(); @@ -958,8 +1008,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // mov fp,sp when FPOffset is zero. // Note: All stores of callee-saved registers are marked as "FrameSetup". // This code marks the instruction(s) that set the FP also. - emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); + emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, + {FPOffset, MVT::i8}, TII, MachineInstr::FrameSetup, false, + NeedsWinCFI, &HasWinCFI); } if (windowsRequiresStackProbe(MF, NumBytes)) { @@ -1056,6 +1107,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, NumBytes = 0; } + emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -SVEStackSize, TII, + MachineInstr::FrameSetup); + // Allocate space for the rest of the frame. if (NumBytes) { const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); @@ -1071,8 +1125,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have // the correct value here, as NumBytes also includes padding bytes, // which shouldn't be counted here. - emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); + emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, + {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, + false, NeedsWinCFI, &HasWinCFI); if (NeedsRealignment) { const unsigned Alignment = MFI.getMaxAlignment(); @@ -1130,8 +1185,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, if (needsFrameMoves) { const DataLayout &TD = MF.getDataLayout(); - const int StackGrowth = -TD.getPointerSize(0); - unsigned FramePtr = RegInfo->getFrameRegister(MF); + const int StackGrowth = isTargetDarwin(MF) + ? (2 * -TD.getPointerSize(0)) + : -AFI->getCalleeSavedStackSize(); + Register FramePtr = RegInfo->getFrameRegister(MF); // An example of the prologue: // // .globl __foo @@ -1202,7 +1259,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // Define the current CFA rule to use the provided FP. unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( - nullptr, Reg, 2 * StackGrowth - FixedObject)); + nullptr, Reg, StackGrowth - FixedObject)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); @@ -1401,11 +1458,14 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameDestroy); } + const StackOffset &SVEStackSize = getSVEStackSize(MF); + // If there is a single SP update, insert it before the ret and we're done. if (CombineSPBump) { + assert(!SVEStackSize && "Cannot combine SP bump with SVE"); emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, - NumBytes + AfterCSRPopSize, TII, MachineInstr::FrameDestroy, - false, NeedsWinCFI, &HasWinCFI); + {NumBytes + (int64_t)AfterCSRPopSize, MVT::i8}, TII, + MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); if (NeedsWinCFI && HasWinCFI) BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) @@ -1416,6 +1476,12 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, NumBytes -= PrologueSaveSize; assert(NumBytes >= 0 && "Negative stack allocation size!?"); + // Deallocate the SVE area. + if (SVEStackSize) + if (!AFI->isStackRealigned()) + emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, SVEStackSize, + TII, MachineInstr::FrameDestroy); + if (!hasFP(MF)) { bool RedZone = canUseRedZone(MF); // If this was a redzone leaf function, we don't need to restore the @@ -1437,8 +1503,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI); emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, - StackRestoreBytes, TII, MachineInstr::FrameDestroy, false, - NeedsWinCFI, &HasWinCFI); + {StackRestoreBytes, MVT::i8}, TII, + MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); if (Done) { if (NeedsWinCFI) { HasWinCFI = true; @@ -1456,13 +1522,16 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. - if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) + if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) { + int64_t OffsetToFrameRecord = + isTargetDarwin(MF) ? (-(int64_t)AFI->getCalleeSavedStackSize() + 16) : 0; emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, - -AFI->getCalleeSavedStackSize() + 16, TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI); - else if (NumBytes) - emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI); + {OffsetToFrameRecord, MVT::i8}, + TII, MachineInstr::FrameDestroy, false, NeedsWinCFI); + } else if (NumBytes) + emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, + {NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy, false, + NeedsWinCFI); // This must be placed after the callee-save restore code because that code // assumes the SP is at the same location as it was after the callee-save save @@ -1483,8 +1552,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, adaptForLdStOpt(MBB, FirstSPPopI, LastPopI); emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP, - AfterCSRPopSize, TII, MachineInstr::FrameDestroy, false, - NeedsWinCFI, &HasWinCFI); + {(int64_t)AfterCSRPopSize, MVT::i8}, TII, + MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); } if (NeedsWinCFI && HasWinCFI) BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd)) @@ -1501,10 +1570,11 @@ int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { return resolveFrameIndexReference( - MF, FI, FrameReg, - /*PreferFP=*/ - MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress), - /*ForSimm=*/false); + MF, FI, FrameReg, + /*PreferFP=*/ + MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress), + /*ForSimm=*/false) + .getBytes(); } int AArch64FrameLowering::getNonLocalFrameIndexReference( @@ -1512,18 +1582,19 @@ int AArch64FrameLowering::getNonLocalFrameIndexReference( return getSEHFrameIndexOffset(MF, FI); } -static int getFPOffset(const MachineFunction &MF, int ObjectOffset) { +static StackOffset getFPOffset(const MachineFunction &MF, int ObjectOffset) { const auto *AFI = MF.getInfo(); const auto &Subtarget = MF.getSubtarget(); bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; - return ObjectOffset + FixedObject + 16; + unsigned FPAdjust = isTargetDarwin(MF) ? 16 : AFI->getCalleeSavedStackSize(); + return {ObjectOffset + FixedObject + FPAdjust, MVT::i8}; } -static int getStackOffset(const MachineFunction &MF, int ObjectOffset) { +static StackOffset getStackOffset(const MachineFunction &MF, int ObjectOffset) { const auto &MFI = MF.getFrameInfo(); - return ObjectOffset + MFI.getStackSize(); + return {ObjectOffset + (int)MFI.getStackSize(), MVT::i8}; } int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF, @@ -1532,23 +1603,23 @@ int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF, MF.getSubtarget().getRegisterInfo()); int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI); return RegInfo->getLocalAddressRegister(MF) == AArch64::FP - ? getFPOffset(MF, ObjectOffset) - : getStackOffset(MF, ObjectOffset); + ? getFPOffset(MF, ObjectOffset).getBytes() + : getStackOffset(MF, ObjectOffset).getBytes(); } -int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, - int FI, unsigned &FrameReg, - bool PreferFP, - bool ForSimm) const { +StackOffset AArch64FrameLowering::resolveFrameIndexReference( + const MachineFunction &MF, int FI, unsigned &FrameReg, bool PreferFP, + bool ForSimm) const { const auto &MFI = MF.getFrameInfo(); int ObjectOffset = MFI.getObjectOffset(FI); bool isFixed = MFI.isFixedObjectIndex(FI); - return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, FrameReg, + bool isSVE = MFI.getStackID(FI) == TargetStackID::SVEVector; + return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg, PreferFP, ForSimm); } -int AArch64FrameLowering::resolveFrameOffsetReference( - const MachineFunction &MF, int ObjectOffset, bool isFixed, +StackOffset AArch64FrameLowering::resolveFrameOffsetReference( + const MachineFunction &MF, int ObjectOffset, bool isFixed, bool isSVE, unsigned &FrameReg, bool PreferFP, bool ForSimm) const { const auto &MFI = MF.getFrameInfo(); const auto *RegInfo = static_cast( @@ -1556,17 +1627,23 @@ int AArch64FrameLowering::resolveFrameOffsetReference( const auto *AFI = MF.getInfo(); const auto &Subtarget = MF.getSubtarget(); - int FPOffset = getFPOffset(MF, ObjectOffset); - int Offset = getStackOffset(MF, ObjectOffset); + int FPOffset = getFPOffset(MF, ObjectOffset).getBytes(); + int Offset = getStackOffset(MF, ObjectOffset).getBytes(); bool isCSR = !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize()); + const StackOffset &SVEStackSize = getSVEStackSize(MF); + // Use frame pointer to reference fixed objects. Use it for locals if // there are VLAs or a dynamically realigned SP (and thus the SP isn't // reliable as a base). Make sure useFPForScavengingIndex() does the // right thing for the emergency spill slot. bool UseFP = false; - if (AFI->hasStackFrame()) { + if (AFI->hasStackFrame() && !isSVE) { + // We shouldn't prefer using the FP when there is an SVE area + // in between the FP and the non-SVE locals/spills. + PreferFP &= !SVEStackSize; + // Note: Keeping the following as multiple 'if' statements rather than // merging to a single expression for readability. // @@ -1594,8 +1671,10 @@ int AArch64FrameLowering::resolveFrameOffsetReference( bool CanUseBP = RegInfo->hasBasePointer(MF); if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best. UseFP = PreferFP; - else if (!CanUseBP) // Can't use BP. Forced to use FP. + else if (!CanUseBP) { // Can't use BP. Forced to use FP. + assert(!SVEStackSize && "Expected BP to be available"); UseFP = true; + } // else we can use BP and FP, but the offset from FP won't fit. // That will make us scavenge registers which we can probably avoid by // using BP. If it won't fit for BP either, we'll scavenge anyway. @@ -1625,9 +1704,36 @@ int AArch64FrameLowering::resolveFrameOffsetReference( "In the presence of dynamic stack pointer realignment, " "non-argument/CSR objects cannot be accessed through the frame pointer"); + if (isSVE) { + int64_t OffsetToSVEArea = + MFI.getStackSize() - AFI->getCalleeSavedStackSize(); + StackOffset FPOffset = {ObjectOffset, MVT::nxv1i8}; + StackOffset SPOffset = SVEStackSize + + StackOffset(ObjectOffset, MVT::nxv1i8) + + StackOffset(OffsetToSVEArea, MVT::i8); + // Always use the FP for SVE spills if available and beneficial. + if (hasFP(MF) && + (SPOffset.getBytes() || + FPOffset.getScalableBytes() < SPOffset.getScalableBytes() || + RegInfo->needsStackRealignment(MF))) { + FrameReg = RegInfo->getFrameRegister(MF); + return FPOffset; + } + + FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister() + : (unsigned)AArch64::SP; + return SPOffset; + } + + StackOffset ScalableOffset = {}; + if (UseFP && !(isFixed || isCSR)) + ScalableOffset = -SVEStackSize; + if (!UseFP && (isFixed || isCSR)) + ScalableOffset = SVEStackSize; + if (UseFP) { FrameReg = RegInfo->getFrameRegister(MF); - return FPOffset; + return StackOffset(FPOffset, MVT::i8) + ScalableOffset; } // Use the base pointer if we have one. @@ -1644,7 +1750,7 @@ int AArch64FrameLowering::resolveFrameOffsetReference( Offset -= AFI->getLocalStackSize(); } - return Offset; + return StackOffset(Offset, MVT::i8) + ScalableOffset; } static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { @@ -1682,6 +1788,23 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, return true; } +/// Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction. +/// WindowsCFI requires that only consecutive registers can be paired. +/// LR and FP need to be allocated together when the frame needs to save +/// the frame-record. This means any other register pairing with LR is invalid. +static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2, + bool NeedsWinCFI, bool NeedsFrameRecord) { + if (NeedsWinCFI) + return invalidateWindowsRegisterPairing(Reg1, Reg2, true); + + // If we need to store the frame record, don't pair any register + // with LR other than FP. + if (NeedsFrameRecord) + return Reg2 == AArch64::LR; + + return false; +} + namespace { struct RegPairInfo { @@ -1701,7 +1824,7 @@ struct RegPairInfo { static void computeCalleeSaveRegisterPairs( MachineFunction &MF, const std::vector &CSI, const TargetRegisterInfo *TRI, SmallVectorImpl &RegPairs, - bool &NeedShadowCallStackProlog) { + bool &NeedShadowCallStackProlog, bool NeedsFrameRecord) { if (CSI.empty()) return; @@ -1743,7 +1866,8 @@ static void computeCalleeSaveRegisterPairs( switch (RPI.Type) { case RegPairInfo::GPR: if (AArch64::GPR64RegClass.contains(NextReg) && - !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI)) + !invalidateRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI, + NeedsFrameRecord)) RPI.Reg2 = NextReg; break; case RegPairInfo::FPR64: @@ -1777,6 +1901,10 @@ static void computeCalleeSaveRegisterPairs( (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && "Out of order callee saved regs!"); + assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP || + RPI.Reg1 == AArch64::LR) && + "FrameRecord must be allocated together with LR"); + // MachO's compact unwind format relies on all registers being stored in // adjacent register pairs. assert((!produceCompactUnwindFrame(MF) || @@ -1825,7 +1953,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( bool NeedShadowCallStackProlog = false; computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, - NeedShadowCallStackProlog); + NeedShadowCallStackProlog, hasFP(MF)); const MachineRegisterInfo &MRI = MF.getRegInfo(); if (NeedShadowCallStackProlog) { @@ -1955,7 +2083,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( bool NeedShadowCallStackProlog = false; computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, - NeedShadowCallStackProlog); + NeedShadowCallStackProlog, hasFP(MF)); auto EmitMI = [&](const RegPairInfo &RPI) { unsigned Reg1 = RPI.Reg1; @@ -2113,19 +2241,26 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(AArch64::LR); } - LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"; + LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nSaved CSRs:"; for (unsigned Reg : SavedRegs.set_bits()) dbgs() << ' ' << printReg(Reg, RegInfo); dbgs() << "\n";); // If any callee-saved registers are used, the frame cannot be eliminated. - bool CanEliminateFrame = SavedRegs.count() == 0; + unsigned MaxAlign = getStackAlignment(); + int64_t SVEStackSize = + alignTo(determineSVEStackSize(MFI, MaxAlign), MaxAlign); + assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes"); + bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize; // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF); - bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit; + + // Conservatively always assume BigStack when there are SVE spills. + bool BigStack = SVEStackSize || + (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit; if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) AFI->setHasStackFrame(true); @@ -2145,7 +2280,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // store the pair. if (produceCompactUnwindFrame(MF)) SavedRegs.set(UnspilledCSGPRPaired); - ExtraCSSpill = UnspilledCSGPRPaired; + ExtraCSSpill = UnspilledCSGPR; } // If we didn't find an extra callee-saved register to spill, create @@ -2181,14 +2316,42 @@ bool AArch64FrameLowering::enableStackSlotScavenging( return AFI->hasCalleeSaveStackFreeSpace(); } +int64_t AArch64FrameLowering::determineSVEStackSize(MachineFrameInfo &MFI, + unsigned &MaxAlign) const { + // Process all fixed stack objects. + int64_t Offset = 0; + for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) + if (MFI.getStackID(I) == TargetStackID::SVEVector) { + int64_t FixedOffset = -MFI.getObjectOffset(I); + if (FixedOffset > Offset) + Offset = FixedOffset; + } + + // Note: We don't take allocatable stack objects into + // account yet, because allocation for those is not yet + // implemented. + return Offset; +} + void AArch64FrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { + MachineFrameInfo &MFI = MF.getFrameInfo(); + + assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown && + "Upwards growing stack unsupported"); + + unsigned MaxAlign = getStackAlignment(); + int64_t SVEStackSize = determineSVEStackSize(MFI, MaxAlign); + + AArch64FunctionInfo *AFI = MF.getInfo(); + AFI->setStackSizeSVE(alignTo(SVEStackSize, MaxAlign)); + assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes"); + // If this function isn't doing Win64-style C++ EH, we don't need to do // anything. if (!MF.hasEHFunclets()) return; const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - MachineFrameInfo &MFI = MF.getFrameInfo(); WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo(); MachineBasicBlock &MBB = MF.front(); diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h index 6dbd34b2189..ac150e86c9e 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.h +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H #define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H +#include "AArch64StackOffset.h" #include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { @@ -20,7 +21,7 @@ namespace llvm { class AArch64FrameLowering : public TargetFrameLowering { public: explicit AArch64FrameLowering() - : TargetFrameLowering(StackGrowsDown, 16, 0, 16, + : TargetFrameLowering(StackGrowsDown, Align(16), 0, Align(16), true /*StackRealignable*/) {} void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, @@ -39,12 +40,13 @@ public: int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; - int resolveFrameIndexReference(const MachineFunction &MF, int FI, - unsigned &FrameReg, bool PreferFP, - bool ForSimm) const; - int resolveFrameOffsetReference(const MachineFunction &MF, int ObjectOffset, - bool isFixed, unsigned &FrameReg, - bool PreferFP, bool ForSimm) const; + StackOffset resolveFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg, bool PreferFP, + bool ForSimm) const; + StackOffset resolveFrameOffsetReference(const MachineFunction &MF, + int ObjectOffset, bool isFixed, + bool isSVE, unsigned &FrameReg, + bool PreferFP, bool ForSimm) const; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, @@ -85,9 +87,21 @@ public: int FI) const override; int getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const; + bool isSupportedStackID(TargetStackID::Value ID) const override { + switch (ID) { + default: + return false; + case TargetStackID::Default: + case TargetStackID::SVEVector: + case TargetStackID::NoAlloc: + return true; + } + } + private: bool shouldCombineCSRLocalStackBump(MachineFunction &MF, unsigned StackBumpBytes) const; + int64_t determineSVEStackSize(MachineFrameInfo &MF, unsigned &MaxAlign) const; }; } // End llvm namespace diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index cd7e927ac80..1f08505f37e 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -2053,7 +2053,7 @@ static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, } static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { - if (Depth >= 6) + if (Depth >= SelectionDAG::MaxRecursionDepth) return; // Initialize UsefulBits if (!Depth) { @@ -2913,49 +2913,6 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { return; break; - case ISD::EXTRACT_VECTOR_ELT: { - // Extracting lane zero is a special case where we can just use a plain - // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for - // the rest of the compiler, especially the register allocator and copyi - // propagation, to reason about, so is preferred when it's possible to - // use it. - ConstantSDNode *LaneNode = cast(Node->getOperand(1)); - // Bail and use the default Select() for non-zero lanes. - if (LaneNode->getZExtValue() != 0) - break; - // If the element type is not the same as the result type, likewise - // bail and use the default Select(), as there's more to do than just - // a cross-class COPY. This catches extracts of i8 and i16 elements - // since they will need an explicit zext. - if (VT != Node->getOperand(0).getValueType().getVectorElementType()) - break; - unsigned SubReg; - switch (Node->getOperand(0) - .getValueType() - .getVectorElementType() - .getSizeInBits()) { - default: - llvm_unreachable("Unexpected vector element type!"); - case 64: - SubReg = AArch64::dsub; - break; - case 32: - SubReg = AArch64::ssub; - break; - case 16: - SubReg = AArch64::hsub; - break; - case 8: - llvm_unreachable("unexpected zext-requiring extract element!"); - } - SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT, - Node->getOperand(0)); - LLVM_DEBUG(dbgs() << "ISEL: Custom selection!\n=> "); - LLVM_DEBUG(Extract->dumpr(CurDAG)); - LLVM_DEBUG(dbgs() << "\n"); - ReplaceNode(Node, Extract.getNode()); - return; - } case ISD::Constant: { // Materialize zero constants as copies from WZR/XZR. This allows // the coalescer to propagate these into other instructions. diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 7becc99fb5c..2746117e8ee 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" @@ -161,6 +162,29 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, addQRTypeForNEON(MVT::v8f16); } + if (Subtarget->hasSVE()) { + // Add legal sve predicate types + addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass); + addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass); + addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass); + addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass); + + // Add legal sve data types + addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass); + + addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv1f32, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv1f64, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass); + } + // Compute derived properties from the register classes computeRegisterProperties(Subtarget->getRegisterInfo()); @@ -283,7 +307,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // AArch64 lacks both left-rotate and popcount instructions. setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); - for (MVT VT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); } @@ -297,7 +321,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::SDIVREM, MVT::i64, Expand); - for (MVT VT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); } @@ -606,6 +630,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4; + MaxLoadsPerMemcmpOptSize = 4; + MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign() + ? MaxLoadsPerMemcmpOptSize : 8; + setStackPointerRegisterToSaveRestore(AArch64::SP); setSchedulingPreference(Sched::Hybrid); @@ -613,10 +641,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, EnableExtLdPromotion = true; // Set required alignment. - setMinFunctionAlignment(2); + setMinFunctionAlignment(Align(4)); // Set preferred alignments. - setPrefFunctionAlignment(STI.getPrefFunctionAlignment()); - setPrefLoopAlignment(STI.getPrefLoopAlignment()); + setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment())); + setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment())); // Only change the limit for entries in a jump table if specified by // the sub target, but not at the command line. @@ -725,7 +753,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); // Likewise, narrowing and extending vector loads/stores aren't handled // directly. - for (MVT VT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) { @@ -741,7 +769,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::BSWAP, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); - for (MVT InnerVT : MVT::vector_valuetypes()) { + for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); @@ -773,6 +801,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom); } + if (Subtarget->hasSVE()) { + for (MVT VT : MVT::integer_scalable_vector_valuetypes()) { + if (isTypeLegal(VT) && VT.getVectorElementType() != MVT::i1) + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + } + } + PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); } @@ -1025,6 +1060,14 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode( Known.One &= Known2.One; break; } + case AArch64ISD::LOADgot: + case AArch64ISD::ADDlow: { + if (!Subtarget->isTargetILP32()) + break; + // In ILP32 mode all valid pointers are in the low 4GB of the address-space. + Known.Zero = APInt::getHighBitsSet(64, 32); + break; + } case ISD::INTRINSIC_W_CHAIN: { ConstantSDNode *CN = cast(Op->getOperand(1)); Intrinsic::ID IntID = static_cast(CN->getZExtValue()); @@ -1100,6 +1143,32 @@ bool AArch64TargetLowering::allowsMisalignedMemoryAccesses( return true; } +// Same as above but handling LLTs instead. +bool AArch64TargetLowering::allowsMisalignedMemoryAccesses( + LLT Ty, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, + bool *Fast) const { + if (Subtarget->requiresStrictAlign()) + return false; + + if (Fast) { + // Some CPUs are fine with unaligned stores except for 128-bit ones. + *Fast = !Subtarget->isMisaligned128StoreSlow() || + Ty.getSizeInBytes() != 16 || + // See comments in performSTORECombine() for more details about + // these conditions. + + // Code that uses clang vector extensions can mark that it + // wants unaligned accesses to be treated as fast by + // underspecifying alignment to be 1 or 2. + Align <= 2 || + + // Disregard v2i64. Memcpy lowering produces those and splitting + // them regresses performance on micro-benchmarks and olden/bh. + Ty == LLT::vector(2, 64); + } + return true; +} + FastISel * AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const { @@ -1238,6 +1307,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::STZG: return "AArch64ISD::STZG"; case AArch64ISD::ST2G: return "AArch64ISD::ST2G"; case AArch64ISD::STZ2G: return "AArch64ISD::STZ2G"; + case AArch64ISD::SUNPKHI: return "AArch64ISD::SUNPKHI"; + case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO"; + case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI"; + case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO"; } return nullptr; } @@ -1263,9 +1336,9 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI, DebugLoc DL = MI.getDebugLoc(); MachineFunction::iterator It = ++MBB->getIterator(); - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned IfTrueReg = MI.getOperand(1).getReg(); - unsigned IfFalseReg = MI.getOperand(2).getReg(); + Register DestReg = MI.getOperand(0).getReg(); + Register IfTrueReg = MI.getOperand(1).getReg(); + Register IfFalseReg = MI.getOperand(2).getReg(); unsigned CondCode = MI.getOperand(3).getImm(); bool NZCVKilled = MI.getOperand(4).isKill(); @@ -2140,7 +2213,8 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, RTLIB::Libcall Call) const { SmallVector Ops(Op->op_begin(), Op->op_end()); - return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first; + MakeLibCallOptions CallOptions; + return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first; } // Returns true if the given Op is the overflow flag result of an overflow @@ -2349,7 +2423,8 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op, // precise. That doesn't take part in the LibCall so we can't directly use // LowerF128Call. SDValue SrcVal = Op.getOperand(0); - return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false, + MakeLibCallOptions CallOptions; + return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, CallOptions, SDLoc(Op)).first; } @@ -2419,7 +2494,8 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); SmallVector Ops(Op->op_begin(), Op->op_end()); - return makeLibCall(DAG, LC, Op.getValueType(), Ops, false, SDLoc(Op)).first; + MakeLibCallOptions CallOptions; + return makeLibCall(DAG, LC, Op.getValueType(), Ops, CallOptions, SDLoc(Op)).first; } static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { @@ -2773,6 +2849,19 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(ISD::UMIN, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::aarch64_sve_sunpkhi: + return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(), + Op.getOperand(1)); + case Intrinsic::aarch64_sve_sunpklo: + return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(), + Op.getOperand(1)); + case Intrinsic::aarch64_sve_uunpkhi: + return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(), + Op.getOperand(1)); + case Intrinsic::aarch64_sve_uunpklo: + return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(), + Op.getOperand(1)); + case Intrinsic::localaddress: { const auto &MF = DAG.getMachineFunction(); const auto *RegInfo = Subtarget->getRegisterInfo(); @@ -2937,6 +3026,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::SPLAT_VECTOR: + return LowerSPLAT_VECTOR(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); case ISD::SRA: @@ -3014,8 +3105,11 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, return CC_AArch64_Win64_VarArg; if (!Subtarget->isTargetDarwin()) return CC_AArch64_AAPCS; - return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS; - case CallingConv::Win64: + if (!IsVarArg) + return CC_AArch64_DarwinPCS; + return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg + : CC_AArch64_DarwinPCS_VarArg; + case CallingConv::Win64: return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS; case CallingConv::AArch64_VectorCall: return CC_AArch64_AAPCS; @@ -3038,6 +3132,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments( // Assign locations to all of the incoming arguments. SmallVector ArgLocs; + DenseMap CopiedRegs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); @@ -3094,11 +3189,10 @@ SDValue AArch64TargetLowering::LowerFormalArguments( continue; } + SDValue ArgValue; if (VA.isRegLoc()) { // Arguments stored in registers. EVT RegVT = VA.getLocVT(); - - SDValue ArgValue; const TargetRegisterClass *RC; if (RegVT == MVT::i32) @@ -3113,6 +3207,11 @@ SDValue AArch64TargetLowering::LowerFormalArguments( RC = &AArch64::FPR64RegClass; else if (RegVT == MVT::f128 || RegVT.is128BitVector()) RC = &AArch64::FPR128RegClass; + else if (RegVT.isScalableVector() && + RegVT.getVectorElementType() == MVT::i1) + RC = &AArch64::PPRRegClass; + else if (RegVT.isScalableVector()) + RC = &AArch64::ZPRRegClass; else llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); @@ -3128,20 +3227,23 @@ SDValue AArch64TargetLowering::LowerFormalArguments( llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; + case CCValAssign::Indirect: + assert(VA.getValVT().isScalableVector() && + "Only scalable vectors can be passed indirectly"); + llvm_unreachable("Spilling of SVE vectors not yet implemented"); case CCValAssign::BCvt: ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue); break; case CCValAssign::AExt: case CCValAssign::SExt: case CCValAssign::ZExt: - // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt - // nodes after our lowering. - assert(RegVT == Ins[i].VT && "incorrect register location selected"); + break; + case CCValAssign::AExtUpper: + ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue, + DAG.getConstant(32, DL, RegVT)); + ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT()); break; } - - InVals.push_back(ArgValue); - } else { // VA.isRegLoc() assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem"); unsigned ArgOffset = VA.getLocMemOffset(); @@ -3156,7 +3258,6 @@ SDValue AArch64TargetLowering::LowerFormalArguments( // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); - SDValue ArgValue; // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT) ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; @@ -3165,9 +3266,14 @@ SDValue AArch64TargetLowering::LowerFormalArguments( switch (VA.getLocInfo()) { default: break; + case CCValAssign::Trunc: case CCValAssign::BCvt: MemVT = VA.getLocVT(); break; + case CCValAssign::Indirect: + assert(VA.getValVT().isScalableVector() && + "Only scalable vectors can be passed indirectly"); + llvm_unreachable("Spilling of SVE vectors not yet implemented"); case CCValAssign::SExt: ExtType = ISD::SEXTLOAD; break; @@ -3184,8 +3290,11 @@ SDValue AArch64TargetLowering::LowerFormalArguments( MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), MemVT); - InVals.push_back(ArgValue); } + if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer()) + ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(), + ArgValue, DAG.getValueType(MVT::i32)); + InVals.push_back(ArgValue); } // varargs @@ -3202,8 +3311,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments( // This will point to the next argument passed via stack. unsigned StackOffset = CCInfo.getNextStackOffset(); - // We currently pass all varargs at 8-byte alignment. - StackOffset = ((StackOffset + 7) & ~7); + // We currently pass all varargs at 8-byte alignment, or 4 for ILP32 + StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8); FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true)); if (MFI.hasMustTailInVarArgFunc()) { @@ -3233,8 +3342,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments( assert(!FuncInfo->getSRetReturnReg()); MVT PtrTy = getPointerTy(DAG.getDataLayout()); - unsigned Reg = - MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); + Register Reg = + MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); FuncInfo->setSRetReturnReg(Reg); SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]); @@ -3366,6 +3475,7 @@ SDValue AArch64TargetLowering::LowerCallResult( : RetCC_AArch64_AAPCS; // Assign locations to each value returned by this call. SmallVector RVLocs; + DenseMap CopiedRegs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC); @@ -3383,10 +3493,16 @@ SDValue AArch64TargetLowering::LowerCallResult( continue; } - SDValue Val = - DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag); - Chain = Val.getValue(1); - InFlag = Val.getValue(2); + // Avoid copying a physreg twice since RegAllocFast is incompetent and only + // allows one use of a physreg per block. + SDValue Val = CopiedRegs.lookup(VA.getLocReg()); + if (!Val) { + Val = + DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + CopiedRegs[VA.getLocReg()] = Val; + } switch (VA.getLocInfo()) { default: @@ -3396,6 +3512,15 @@ SDValue AArch64TargetLowering::LowerCallResult( case CCValAssign::BCvt: Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); break; + case CCValAssign::AExtUpper: + Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val, + DAG.getConstant(32, DL, VA.getLocVT())); + LLVM_FALLTHROUGH; + case CCValAssign::AExt: + LLVM_FALLTHROUGH; + case CCValAssign::ZExt: + Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT()); + break; } InVals.push_back(Val); @@ -3593,6 +3718,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, bool IsVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); + MachineFunction::CallSiteInfo CSInfo; bool IsThisReturn = false; AArch64FunctionInfo *FuncInfo = MF.getInfo(); @@ -3709,6 +3835,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, getPointerTy(DAG.getDataLayout())); SmallVector, 8> RegsToPass; + SmallSet RegsUsed; SmallVector MemOpChains; auto PtrVT = getPointerTy(DAG.getDataLayout()); @@ -3716,7 +3843,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); for (const auto &F : Forwards) { SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT); - RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val)); + RegsToPass.emplace_back(F.PReg, Val); } } @@ -3747,12 +3874,25 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, } Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); break; + case CCValAssign::AExtUpper: + assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"); + Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); + Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg, + DAG.getConstant(32, DL, VA.getLocVT())); + break; case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); + Arg = DAG.getBitcast(VA.getLocVT(), Arg); + break; + case CCValAssign::Trunc: + Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT()); break; case CCValAssign::FPExt: Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg); break; + case CCValAssign::Indirect: + assert(VA.getValVT().isScalableVector() && + "Only scalable vectors can be passed indirectly"); + llvm_unreachable("Spilling of SVE vectors not yet implemented"); } if (VA.isRegLoc()) { @@ -3764,7 +3904,33 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, "unexpected use of 'returned'"); IsThisReturn = true; } - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + if (RegsUsed.count(VA.getLocReg())) { + // If this register has already been used then we're trying to pack + // parts of an [N x i32] into an X-register. The extension type will + // take care of putting the two halves in the right place but we have to + // combine them. + SDValue &Bits = + std::find_if(RegsToPass.begin(), RegsToPass.end(), + [=](const std::pair &Elt) { + return Elt.first == VA.getLocReg(); + }) + ->second; + Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg); + // Call site info is used for function's parameter entry value + // tracking. For now we track only simple cases when parameter + // is transferred through whole register. + CSInfo.erase(std::remove_if(CSInfo.begin(), CSInfo.end(), + [&VA](MachineFunction::ArgRegPair ArgReg) { + return ArgReg.Reg == VA.getLocReg(); + }), + CSInfo.end()); + } else { + RegsToPass.emplace_back(VA.getLocReg(), Arg); + RegsUsed.insert(VA.getLocReg()); + const TargetOptions &Options = DAG.getTarget().Options; + if (Options.EnableDebugEntryValues) + CSInfo.emplace_back(VA.getLocReg(), i); + } } else { assert(VA.isMemLoc()); @@ -3899,6 +4065,20 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Ops.push_back(DAG.getRegister(RegToPass.first, RegToPass.second.getValueType())); + // Check callee args/returns for SVE registers and set calling convention + // accordingly. + if (CallConv == CallingConv::C) { + bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){ + return Out.VT.isScalableVector(); + }); + bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){ + return In.VT.isScalableVector(); + }); + + if (CalleeInSVE || CalleeOutSVE) + CallConv = CallingConv::AArch64_SVE_VectorCall; + } + // Add a register mask operand representing the call-preserved registers. const uint32_t *Mask; const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); @@ -3930,12 +4110,15 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // actual call instruction. if (IsTailCall) { MF.getFrameInfo().setHasTailCall(); - return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops); + SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops); + DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); + return Ret; } // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops); InFlag = Chain.getValue(1); + DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); uint64_t CalleePopBytes = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0; @@ -3983,7 +4166,8 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // Copy the result values into the output registers. SDValue Flag; - SmallVector RetOps(1, Chain); + SmallVector, 4> RetVals; + SmallSet RegsUsed; for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size(); ++i, ++realRVLocIdx) { CCValAssign &VA = RVLocs[i]; @@ -4005,11 +4189,38 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, case CCValAssign::BCvt: Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); break; + case CCValAssign::AExt: + case CCValAssign::ZExt: + Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT()); + break; + case CCValAssign::AExtUpper: + assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"); + Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT()); + Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg, + DAG.getConstant(32, DL, VA.getLocVT())); + break; } - Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag); + if (RegsUsed.count(VA.getLocReg())) { + SDValue &Bits = + std::find_if(RetVals.begin(), RetVals.end(), + [=](const std::pair &Elt) { + return Elt.first == VA.getLocReg(); + }) + ->second; + Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg); + } else { + RetVals.emplace_back(VA.getLocReg(), Arg); + RegsUsed.insert(VA.getLocReg()); + } + } + + SmallVector RetOps(1, Chain); + for (auto &RetVal : RetVals) { + Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag); Flag = Chain.getValue(1); - RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + RetOps.push_back( + DAG.getRegister(RetVal.first, RetVal.second.getValueType())); } // Windows AArch64 ABIs require that for returning structs by value we copy @@ -4139,8 +4350,7 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { GlobalAddressSDNode *GN = cast(Op); const GlobalValue *GV = GN->getGlobal(); - unsigned char OpFlags = - Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); + unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); if (OpFlags != AArch64II::MO_NO_FLAG) assert(cast(Op)->getOffset() == 0 && @@ -4204,6 +4414,7 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, SDLoc DL(Op); MVT PtrVT = getPointerTy(DAG.getDataLayout()); + MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout()); const GlobalValue *GV = cast(Op)->getGlobal(); SDValue TLVPAddr = @@ -4214,13 +4425,15 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, // to obtain the address of the variable. SDValue Chain = DAG.getEntryNode(); SDValue FuncTLVGet = DAG.getLoad( - MVT::i64, DL, Chain, DescAddr, + PtrMemVT, DL, Chain, DescAddr, MachinePointerInfo::getGOT(DAG.getMachineFunction()), - /* Alignment = */ 8, - MachineMemOperand::MONonTemporal | MachineMemOperand::MOInvariant | - MachineMemOperand::MODereferenceable); + /* Alignment = */ PtrMemVT.getSizeInBits() / 8, + MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); Chain = FuncTLVGet.getValue(1); + // Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer. + FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT); + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); MFI.setAdjustsStack(true); @@ -4470,7 +4683,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { // value of a libcall against zero, which is just what the rest of LowerBR_CC // is expecting to deal with. if (LHS.getValueType() == MVT::f128) { - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -4736,7 +4949,7 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // Handle f128 first, since one possible outcome is a normal integer // comparison which gets picked up by the next if statement. if (LHS.getValueType() == MVT::f128) { - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS); // If softenSetCCOperands returned a scalar, use it. if (!RHS.getNode()) { @@ -4798,7 +5011,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, // Handle f128 first, because it will result in a comparison of some RTLIB // call result against zero. if (LHS.getValueType() == MVT::f128) { - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -5096,6 +5309,7 @@ SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op, SDLoc DL(Op); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy(DAG.getDataLayout())); + FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout())); const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), MachinePointerInfo(SV)); @@ -5202,15 +5416,15 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op, // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single // pointer. SDLoc DL(Op); - unsigned VaListSize = - Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32; + unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8; + unsigned VaListSize = (Subtarget->isTargetDarwin() || + Subtarget->isTargetWindows()) ? PtrSize : 32; const Value *DestSV = cast(Op.getOperand(3))->getValue(); const Value *SrcSV = cast(Op.getOperand(4))->getValue(); - return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), - Op.getOperand(2), - DAG.getConstant(VaListSize, DL, MVT::i32), - 8, false, false, false, MachinePointerInfo(DestSV), + return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2), + DAG.getConstant(VaListSize, DL, MVT::i32), PtrSize, + false, false, false, MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); } @@ -5224,12 +5438,15 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Addr = Op.getOperand(1); unsigned Align = Op.getConstantOperandVal(3); + unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8; auto PtrVT = getPointerTy(DAG.getDataLayout()); - - SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V)); + auto PtrMemVT = getPointerMemTy(DAG.getDataLayout()); + SDValue VAList = + DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V)); Chain = VAList.getValue(1); + VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT); - if (Align > 8) { + if (Align > MinSlotSize) { assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2"); VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(Align - 1, DL, PtrVT)); @@ -5238,14 +5455,14 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { } Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); - uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy); + unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy); // Scalar integer and FP values smaller than 64 bits are implicitly extended // up to 64 bits. At the very least, we have to increase the striding of the // vaargs list to match this, and for FP values we need to introduce // FP_ROUND nodes as well. if (VT.isInteger() && !VT.isVector()) - ArgSize = 8; + ArgSize = std::max(ArgSize, MinSlotSize); bool NeedFPTrunc = false; if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) { ArgSize = 8; @@ -5255,6 +5472,8 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { // Increment the pointer, VAList, to the next vaarg SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(ArgSize, DL, PtrVT)); + VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT); + // Store the incremented VAList to the legalized pointer SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V)); @@ -5284,10 +5503,15 @@ SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, SDLoc DL(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); SDValue FrameAddr = - DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT); + DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64); while (Depth--) FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr, MachinePointerInfo()); + + if (Subtarget->isTargetILP32()) + FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr, + DAG.getValueType(VT)); + return FrameAddr; } @@ -5306,9 +5530,9 @@ SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op, // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { - unsigned Reg = MatchRegisterName(RegName); +Register AArch64TargetLowering:: +getRegisterByName(const char* RegName, EVT VT, const MachineFunction &MF) const { + Register Reg = MatchRegisterName(RegName); if (AArch64::X1 <= Reg && Reg <= AArch64::X28) { const MCRegisterInfo *MRI = Subtarget->getRegisterInfo(); unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false); @@ -5653,6 +5877,21 @@ const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const { return "r"; } +enum PredicateConstraint { + Upl, + Upa, + Invalid +}; + +static PredicateConstraint parsePredicateConstraint(StringRef Constraint) { + PredicateConstraint P = PredicateConstraint::Invalid; + if (Constraint == "Upa") + P = PredicateConstraint::Upa; + if (Constraint == "Upl") + P = PredicateConstraint::Upl; + return P; +} + /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. AArch64TargetLowering::ConstraintType @@ -5661,19 +5900,30 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const { switch (Constraint[0]) { default: break; - case 'z': - return C_Other; case 'x': case 'w': + case 'y': return C_RegisterClass; // An address with a single base register. Due to the way we // currently handle addresses it is the same as 'r'. case 'Q': return C_Memory; + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'Y': + case 'Z': + return C_Immediate; + case 'z': case 'S': // A symbolic address return C_Other; } - } + } else if (parsePredicateConstraint(Constraint) != + PredicateConstraint::Invalid) + return C_RegisterClass; return TargetLowering::getConstraintType(Constraint); } @@ -5697,12 +5947,17 @@ AArch64TargetLowering::getSingleConstraintMatchWeight( break; case 'x': case 'w': + case 'y': if (type->isFloatingPointTy() || type->isVectorTy()) weight = CW_Register; break; case 'z': weight = CW_Constant; break; + case 'U': + if (parsePredicateConstraint(constraint) != PredicateConstraint::Invalid) + weight = CW_Register; + break; } return weight; } @@ -5719,6 +5974,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( case 'w': if (!Subtarget->hasFPARMv8()) break; + if (VT.isScalableVector()) + return std::make_pair(0U, &AArch64::ZPRRegClass); if (VT.getSizeInBits() == 16) return std::make_pair(0U, &AArch64::FPR16RegClass); if (VT.getSizeInBits() == 32) @@ -5733,9 +5990,25 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( case 'x': if (!Subtarget->hasFPARMv8()) break; + if (VT.isScalableVector()) + return std::make_pair(0U, &AArch64::ZPR_4bRegClass); if (VT.getSizeInBits() == 128) return std::make_pair(0U, &AArch64::FPR128_loRegClass); break; + case 'y': + if (!Subtarget->hasFPARMv8()) + break; + if (VT.isScalableVector()) + return std::make_pair(0U, &AArch64::ZPR_3bRegClass); + break; + } + } else { + PredicateConstraint PC = parsePredicateConstraint(Constraint); + if (PC != PredicateConstraint::Invalid) { + assert(VT.isScalableVector()); + bool restricted = (PC == PredicateConstraint::Upl); + return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass) + : std::make_pair(0U, &AArch64::PPRRegClass); } } if (StringRef("{cc}").equals_lower(Constraint)) @@ -6279,6 +6552,8 @@ static bool isREVMask(ArrayRef M, EVT VT, unsigned BlockSize) { static bool isZIPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned NumElts = VT.getVectorNumElements(); + if (NumElts % 2 != 0) + return false; WhichResult = (M[0] == 0 ? 0 : 1); unsigned Idx = WhichResult * NumElts / 2; for (unsigned i = 0; i != NumElts; i += 2) { @@ -6446,8 +6721,7 @@ static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) { if (!isConcatMask(Mask, VT, SplitV0)) return SDValue(); - EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), - VT.getVectorNumElements() / 2); + EVT CastVT = VT.getHalfNumVectorElementsVT(*DAG.getContext()); if (SplitV0) { V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0, DAG.getConstant(0, DL, MVT::i64)); @@ -6790,6 +7064,41 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, return GenerateTBL(Op, ShuffleMask, DAG); } +SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + EVT VT = Op.getValueType(); + EVT ElemVT = VT.getScalarType(); + + SDValue SplatVal = Op.getOperand(0); + + // Extend input splat value where needed to fit into a GPR (32b or 64b only) + // FPRs don't have this restriction. + switch (ElemVT.getSimpleVT().SimpleTy) { + case MVT::i8: + case MVT::i16: + SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32); + break; + case MVT::i64: + SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64); + break; + case MVT::i32: + // Fine as is + break; + // TODO: we can support splats of i1s and float types, but haven't added + // patterns yet. + case MVT::i1: + case MVT::f16: + case MVT::f32: + case MVT::f64: + default: + llvm_unreachable("Unsupported SPLAT_VECTOR input operand type"); + break; + } + + return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal); +} + static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, APInt &UndefBits) { EVT VT = BVN->getValueType(0); @@ -8063,7 +8372,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); Info.offset = 0; - Info.align = 0; + Info.align.reset(); // volatile loads with NEON intrinsics not supported Info.flags = MachineMemOperand::MOLoad; return true; @@ -8089,7 +8398,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); Info.offset = 0; - Info.align = 0; + Info.align.reset(); // volatile stores with NEON intrinsics not supported Info.flags = MachineMemOperand::MOStore; return true; @@ -8101,7 +8410,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); + Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType())); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; } @@ -8112,7 +8421,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(1); Info.offset = 0; - Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); + Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType())); Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; } @@ -8122,7 +8431,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::i128; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = 16; + Info.align = Align(16); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; case Intrinsic::aarch64_stlxp: @@ -8131,7 +8440,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::i128; Info.ptrVal = I.getArgOperand(2); Info.offset = 0; - Info.align = 16; + Info.align = Align(16); Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; default: @@ -8278,7 +8587,7 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const { // Get the shift amount based on the scaling factor: // log2(sizeof(IdxTy)) - log2(8). uint64_t ShiftAmt = - countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy)) - 3; + countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedSize()) - 3; // Is the constant foldable in the shift of the addressing mode? // I.e., shift amount is between 1 and 4 inclusive. if (ShiftAmt == 0 || ShiftAmt > 4) @@ -8739,6 +9048,39 @@ EVT AArch64TargetLowering::getOptimalMemOpType( return MVT::Other; } +LLT AArch64TargetLowering::getOptimalMemOpLLT( + uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, + bool ZeroMemset, bool MemcpyStrSrc, + const AttributeList &FuncAttributes) const { + bool CanImplicitFloat = + !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat); + bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat; + bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat; + // Only use AdvSIMD to implement memset of 32-byte and above. It would have + // taken one instruction to materialize the v2i64 zero and one store (with + // restrictive addressing mode). Just do i64 stores. + bool IsSmallMemset = IsMemset && Size < 32; + auto AlignmentIsAcceptable = [&](EVT VT, unsigned AlignCheck) { + if (memOpAlign(SrcAlign, DstAlign, AlignCheck)) + return true; + bool Fast; + return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone, + &Fast) && + Fast; + }; + + if (CanUseNEON && IsMemset && !IsSmallMemset && + AlignmentIsAcceptable(MVT::v2i64, 16)) + return LLT::vector(2, 64); + if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, 16)) + return LLT::scalar(128); + if (Size >= 8 && AlignmentIsAcceptable(MVT::i64, 8)) + return LLT::scalar(64); + if (Size >= 4 && AlignmentIsAcceptable(MVT::i32, 4)) + return LLT::scalar(32); + return LLT(); +} + // 12-bit optionally shifted immediates are legal for adds. bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const { if (Immed == std::numeric_limits::min()) { @@ -10065,6 +10407,14 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) { Opcode = AArch64ISD::SQSHLU_I; IsRightShift = false; break; + case Intrinsic::aarch64_neon_sshl: + case Intrinsic::aarch64_neon_ushl: + // For positive shift amounts we can use SHL, as ushl/sshl perform a regular + // left shift for positive shift amounts. Below, we only replace the current + // node with VSHL, if this condition is met. + Opcode = AArch64ISD::VSHL; + IsRightShift = false; + break; } if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) { @@ -10151,6 +10501,8 @@ static SDValue performIntrinsicCombine(SDNode *N, case Intrinsic::aarch64_neon_sqshlu: case Intrinsic::aarch64_neon_srshl: case Intrinsic::aarch64_neon_urshl: + case Intrinsic::aarch64_neon_sshl: + case Intrinsic::aarch64_neon_ushl: return tryCombineShiftImm(IID, N, DAG); case Intrinsic::aarch64_crc32b: case Intrinsic::aarch64_crc32cb: @@ -10482,10 +10834,10 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, return ReplacedSplat; SDLoc DL(S); - unsigned NumElts = VT.getVectorNumElements() / 2; + // Split VT into two. - EVT HalfVT = - EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts); + EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext()); + unsigned NumElts = HalfVT.getVectorNumElements(); SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal, DAG.getConstant(0, DL, MVT::i64)); SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal, @@ -10567,7 +10919,7 @@ static SDValue performPostLD1Combine(SDNode *N, // are predecessors to each other or the Vector. SmallPtrSet Visited; SmallVector Worklist; - Visited.insert(N); + Visited.insert(Addr.getNode()); Worklist.push_back(User); Worklist.push_back(LD); Worklist.push_back(Vector.getNode()); @@ -11983,6 +12335,27 @@ bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial( return Mask->getValue().isPowerOf2(); } +bool AArch64TargetLowering:: + shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, + unsigned OldShiftOpcode, unsigned NewShiftOpcode, + SelectionDAG &DAG) const { + // Does baseline recommend not to perform the fold by default? + if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG)) + return false; + // Else, if this is a vector shift, prefer 'shl'. + return X.getValueType().isScalarInteger() || NewShiftOpcode == ISD::SHL; +} + +bool AArch64TargetLowering::shouldExpandShift(SelectionDAG &DAG, + SDNode *N) const { + if (DAG.getMachineFunction().getFunction().hasMinSize() && + !Subtarget->isTargetWindows()) + return false; + return true; +} + void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { // Update IsSplitCSR in AArch64unctionInfo. AArch64FunctionInfo *AFI = Entry->getParent()->getInfo(); @@ -12009,7 +12382,7 @@ void AArch64TargetLowering::insertCopiesSplitCSR( else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); - unsigned NewVR = MRI->createVirtualRegister(RC); + Register NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. // FIXME: this currently does not emit CFI pseudo-instructions, it works // fine for CXX_FAST_TLS since the C++-style TLS access functions should be diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 4421c31f65c..00fa96bc4e6 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -191,6 +191,11 @@ enum NodeType : unsigned { FRECPE, FRECPS, FRSQRTE, FRSQRTS, + SUNPKHI, + SUNPKLO, + UUNPKHI, + UUNPKLO, + // NEON Load/Store with post-increment base updates LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, LD3post, @@ -261,6 +266,14 @@ public: const SelectionDAG &DAG, unsigned Depth = 0) const override; + MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { + // Returning i64 unconditionally here (i.e. even for ILP32) means that the + // *DAG* representation of pointers will always be 64-bits. They will be + // truncated and extended when transferred to memory, but the 64-bit DAG + // allows us to use AArch64's addressing modes much more easily. + return MVT::getIntegerVT(64); + } + bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const override; @@ -272,6 +285,10 @@ public: EVT VT, unsigned AddrSpace = 0, unsigned Align = 1, MachineMemOperand::Flags Flags = MachineMemOperand::MONone, bool *Fast = nullptr) const override; + /// LLT variant. + bool allowsMisalignedMemoryAccesses( + LLT Ty, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, + bool *Fast = nullptr) const override; /// Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; @@ -358,6 +375,10 @@ public: bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, const AttributeList &FuncAttributes) const override; + LLT getOptimalMemOpLLT(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, + const AttributeList &FuncAttributes) const override; + /// Return true if the addressing mode represented by AM is legal for this /// target, for a load/store of the specified type. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, @@ -480,11 +501,12 @@ public: return VT.getSizeInBits() >= 64; // vector 'bic' } - bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override { - if (DAG.getMachineFunction().getFunction().hasMinSize()) - return false; - return true; - } + bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, + unsigned OldShiftOpcode, unsigned NewShiftOpcode, + SelectionDAG &DAG) const override; + + bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override { @@ -655,6 +677,7 @@ private: SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; @@ -690,8 +713,8 @@ private: unsigned combineRepeatedFPDivisors() const override; ConstraintType getConstraintType(StringRef Constraint) const override; - unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const override; /// Examine constraint string and operand type and determine a weight value. /// The operand object must already have been set up with the operand type. diff --git a/lib/Target/AArch64/AArch64InstrAtomics.td b/lib/Target/AArch64/AArch64InstrAtomics.td index e22cb44d81a..459b5392362 100644 --- a/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/lib/Target/AArch64/AArch64InstrAtomics.td @@ -204,19 +204,27 @@ def : Pat<(relaxed_store def ldxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i8; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }]; +} def ldxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }]; +} def ldxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }]; +} def ldxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }]; +} def : Pat<(ldxr_1 GPR64sp:$addr), (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>; @@ -237,19 +245,27 @@ def : Pat<(and (ldxr_4 GPR64sp:$addr), 0xffffffff), def ldaxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i8; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }]; +} def ldaxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }]; +} def ldaxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }]; +} def ldaxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }]; +} def : Pat<(ldaxr_1 GPR64sp:$addr), (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>; @@ -271,22 +287,30 @@ def : Pat<(and (ldaxr_4 GPR64sp:$addr), 0xffffffff), def stxr_1 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i8; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }]; +} def stxr_2 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }]; +} def stxr_4 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }]; +} def stxr_8 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }]; +} def : Pat<(stxr_1 GPR64:$val, GPR64sp:$addr), @@ -317,22 +341,30 @@ def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr), def stlxr_1 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stlxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i8; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 1); }]; +} def stlxr_2 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stlxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 2); }]; +} def stlxr_4 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stlxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 4); }]; +} def stlxr_8 : PatFrag<(ops node:$val, node:$ptr), (int_aarch64_stlxr node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; -}]>; +}]> { + let GISelPredicateCode = [{ return isLoadStoreOfNumBytes(MI, 8); }]; +} def : Pat<(stlxr_1 GPR64:$val, GPR64sp:$addr), @@ -422,4 +454,3 @@ let Predicates = [HasLSE] in { defm : LDOPregister_patterns_mod<"LDADD", "atomic_load_sub", "SUB">; defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">; } - diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index d619137b55c..f555e412330 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -480,76 +480,40 @@ def BranchTarget14Operand : BranchTarget<14>; def BranchTarget26Operand : BranchTarget<26>; def PCRelLabel19Operand : PCRelLabel<19>; -def MovZSymbolG3AsmOperand : AsmOperandClass { - let Name = "MovZSymbolG3"; +def MovWSymbolG3AsmOperand : AsmOperandClass { + let Name = "MovWSymbolG3"; let RenderMethod = "addImmOperands"; } -def movz_symbol_g3 : Operand { - let ParserMatchClass = MovZSymbolG3AsmOperand; +def movw_symbol_g3 : Operand { + let ParserMatchClass = MovWSymbolG3AsmOperand; } -def MovZSymbolG2AsmOperand : AsmOperandClass { - let Name = "MovZSymbolG2"; +def MovWSymbolG2AsmOperand : AsmOperandClass { + let Name = "MovWSymbolG2"; let RenderMethod = "addImmOperands"; } -def movz_symbol_g2 : Operand { - let ParserMatchClass = MovZSymbolG2AsmOperand; +def movw_symbol_g2 : Operand { + let ParserMatchClass = MovWSymbolG2AsmOperand; } -def MovZSymbolG1AsmOperand : AsmOperandClass { - let Name = "MovZSymbolG1"; +def MovWSymbolG1AsmOperand : AsmOperandClass { + let Name = "MovWSymbolG1"; let RenderMethod = "addImmOperands"; } -def movz_symbol_g1 : Operand { - let ParserMatchClass = MovZSymbolG1AsmOperand; +def movw_symbol_g1 : Operand { + let ParserMatchClass = MovWSymbolG1AsmOperand; } -def MovZSymbolG0AsmOperand : AsmOperandClass { - let Name = "MovZSymbolG0"; +def MovWSymbolG0AsmOperand : AsmOperandClass { + let Name = "MovWSymbolG0"; let RenderMethod = "addImmOperands"; } -def movz_symbol_g0 : Operand { - let ParserMatchClass = MovZSymbolG0AsmOperand; -} - -def MovKSymbolG3AsmOperand : AsmOperandClass { - let Name = "MovKSymbolG3"; - let RenderMethod = "addImmOperands"; -} - -def movk_symbol_g3 : Operand { - let ParserMatchClass = MovKSymbolG3AsmOperand; -} - -def MovKSymbolG2AsmOperand : AsmOperandClass { - let Name = "MovKSymbolG2"; - let RenderMethod = "addImmOperands"; -} - -def movk_symbol_g2 : Operand { - let ParserMatchClass = MovKSymbolG2AsmOperand; -} - -def MovKSymbolG1AsmOperand : AsmOperandClass { - let Name = "MovKSymbolG1"; - let RenderMethod = "addImmOperands"; -} - -def movk_symbol_g1 : Operand { - let ParserMatchClass = MovKSymbolG1AsmOperand; -} - -def MovKSymbolG0AsmOperand : AsmOperandClass { - let Name = "MovKSymbolG0"; - let RenderMethod = "addImmOperands"; -} - -def movk_symbol_g0 : Operand { - let ParserMatchClass = MovKSymbolG0AsmOperand; +def movw_symbol_g0 : Operand { + let ParserMatchClass = MovWSymbolG0AsmOperand; } class fixedpoint_i32 @@ -673,6 +637,11 @@ def logical_imm64_XFORM : SDNodeXFormgetTargetConstant(enc, SDLoc(N), MVT::i32); }]>; +def gi_logical_imm32_XFORM : GICustomOperandRenderer<"renderLogicalImm32">, + GISDNodeXFormEquiv; +def gi_logical_imm64_XFORM : GICustomOperandRenderer<"renderLogicalImm64">, + GISDNodeXFormEquiv; + let DiagnosticType = "LogicalSecondSource" in { def LogicalImm32Operand : AsmOperandClass { let Name = "LogicalImm32"; @@ -714,12 +683,15 @@ def logical_imm64_not : Operand { let ParserMatchClass = LogicalImm64NotOperand; } -// imm0_65535 predicate - True if the immediate is in the range [0,65535]. -def imm0_65535 : Operand, ImmLeaf, PrintMethod = "printImmHex" in { +def i32_imm0_65535 : Operand, TImmLeaf { - let ParserMatchClass = AsmImmRange<0, 65535>; - let PrintMethod = "printImmHex"; +}]>; + +def i64_imm0_65535 : Operand, TImmLeaf; } // imm0_255 predicate - True if the immediate is in the range [0,255]. @@ -815,6 +787,14 @@ class arith_shifted_reg def arith_shifted_reg32 : arith_shifted_reg; def arith_shifted_reg64 : arith_shifted_reg; +def gi_arith_shifted_reg32 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def gi_arith_shifted_reg64 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + // An arithmetic shifter operand: // {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr, 11 = ror // {5-0} - imm6 @@ -837,6 +817,14 @@ class logical_shifted_reg def logical_shifted_reg32 : logical_shifted_reg; def logical_shifted_reg64 : logical_shifted_reg; +def gi_logical_shifted_reg32 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def gi_logical_shifted_reg64 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + // A logical vector shifter operand: // {7-6} - shift type: 00 = lsl // {5-0} - imm6: #0, #8, #16, or #24 @@ -918,6 +906,14 @@ class neg_addsub_shifted_imm def neg_addsub_shifted_imm32 : neg_addsub_shifted_imm; def neg_addsub_shifted_imm64 : neg_addsub_shifted_imm; +def gi_neg_addsub_shifted_imm32 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def gi_neg_addsub_shifted_imm64 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + // An extend operand: // {5-3} - extend type // {2-0} - imm3 @@ -948,6 +944,21 @@ class arith_extended_reg32to64 : Operand, let MIOperandInfo = (ops GPR32, arith_extend64); } +def arith_extended_reg32_i32 : arith_extended_reg32; +def gi_arith_extended_reg32_i32 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def arith_extended_reg32_i64 : arith_extended_reg32; +def gi_arith_extended_reg32_i64 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def arith_extended_reg32to64_i64 : arith_extended_reg32to64; +def gi_arith_extended_reg32to64_i64 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + // Floating-point immediate. def fpimm16 : Operand, FPImmLeaf : AsmOperandClass { let RenderMethod = "addVectorIndexOperands"; } -class AsmVectorIndexOpnd - : Operand, ImmLeaf { +class AsmVectorIndexOpnd + : Operand, ImmLeaf { let ParserMatchClass = mc; let PrintMethod = "printVectorIndex"; } @@ -1012,11 +1023,17 @@ def VectorIndexHOperand : AsmVectorIndex<0, 7>; def VectorIndexSOperand : AsmVectorIndex<0, 3>; def VectorIndexDOperand : AsmVectorIndex<0, 1>; -def VectorIndex1 : AsmVectorIndexOpnd; -def VectorIndexB : AsmVectorIndexOpnd; -def VectorIndexH : AsmVectorIndexOpnd; -def VectorIndexS : AsmVectorIndexOpnd; -def VectorIndexD : AsmVectorIndexOpnd; +def VectorIndex1 : AsmVectorIndexOpnd; +def VectorIndexB : AsmVectorIndexOpnd; +def VectorIndexH : AsmVectorIndexOpnd; +def VectorIndexS : AsmVectorIndexOpnd; +def VectorIndexD : AsmVectorIndexOpnd; + +def VectorIndex132b : AsmVectorIndexOpnd; +def VectorIndexB32b : AsmVectorIndexOpnd; +def VectorIndexH32b : AsmVectorIndexOpnd; +def VectorIndexS32b : AsmVectorIndexOpnd; +def VectorIndexD32b : AsmVectorIndexOpnd; def SVEVectorIndexExtDupBOperand : AsmVectorIndex<0, 63, "SVE">; def SVEVectorIndexExtDupHOperand : AsmVectorIndex<0, 31, "SVE">; @@ -1025,15 +1042,15 @@ def SVEVectorIndexExtDupDOperand : AsmVectorIndex<0, 7, "SVE">; def SVEVectorIndexExtDupQOperand : AsmVectorIndex<0, 3, "SVE">; def sve_elm_idx_extdup_b - : AsmVectorIndexOpnd; + : AsmVectorIndexOpnd; def sve_elm_idx_extdup_h - : AsmVectorIndexOpnd; + : AsmVectorIndexOpnd; def sve_elm_idx_extdup_s - : AsmVectorIndexOpnd; + : AsmVectorIndexOpnd; def sve_elm_idx_extdup_d - : AsmVectorIndexOpnd; + : AsmVectorIndexOpnd; def sve_elm_idx_extdup_q - : AsmVectorIndexOpnd; + : AsmVectorIndexOpnd; // 8-bit immediate for AdvSIMD where 64-bit values of the form: // aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh @@ -1082,6 +1099,45 @@ class RtSystemI let Inst{4-0} = Rt; } +// System instructions for transactional memory extension +class TMBaseSystemI CRm, bits<3> op2, dag oops, dag iops, + string asm, string operands, list pattern> + : BaseSystemI, + Sched<[WriteSys]> { + let Inst{20-12} = 0b000110011; + let Inst{11-8} = CRm; + let Inst{7-5} = op2; + let DecoderMethod = ""; + + let mayLoad = 1; + let mayStore = 1; +} + +// System instructions for transactional memory - single input operand +class TMSystemI CRm, string asm, list pattern> + : TMBaseSystemI<0b1, CRm, 0b011, + (outs GPR64:$Rt), (ins), asm, "\t$Rt", pattern> { + bits<5> Rt; + let Inst{4-0} = Rt; +} + +// System instructions for transactional memory - no operand +class TMSystemINoOperand CRm, string asm, list pattern> + : TMBaseSystemI<0b0, CRm, 0b011, (outs), (ins), asm, "", pattern> { + let Inst{4-0} = 0b11111; +} + +// System instructions for exit from transactions +class TMSystemException op1, string asm, list pattern> + : I<(outs), (ins i64_imm0_65535:$imm), asm, "\t$imm", "", pattern>, + Sched<[WriteSys]> { + bits<16> imm; + let Inst{31-24} = 0b11010100; + let Inst{23-21} = op1; + let Inst{20-5} = imm; + let Inst{4-0} = 0b00000; +} + // Hint instructions that take both a CRm and a 3-bit immediate. // NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot // model patterns with sufficiently fine granularity @@ -2180,11 +2236,11 @@ multiclass AddSub, mnemonic, OpNode> { + arith_extended_reg32_i32, mnemonic, OpNode> { let Inst{31} = 0; } def Xrx : BaseAddSubEReg, mnemonic, OpNode> { + arith_extended_reg32to64_i64, mnemonic, OpNode> { let Inst{31} = 1; } } @@ -2254,11 +2310,11 @@ multiclass AddSubS, mnemonic, OpNode> { + arith_extended_reg32_i32, mnemonic, OpNode> { let Inst{31} = 0; } def Xrx : BaseAddSubEReg, mnemonic, OpNode> { + arith_extended_reg32_i64, mnemonic, OpNode> { let Inst{31} = 1; } } @@ -2969,6 +3025,22 @@ def ro_Xindexed32 : ComplexPattern", []>; def ro_Xindexed64 : ComplexPattern", []>; def ro_Xindexed128 : ComplexPattern", []>; +def gi_ro_Xindexed8 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_ro_Xindexed16 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_ro_Xindexed32 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_ro_Xindexed64 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_ro_Xindexed128 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; + def ro_Windexed8 : ComplexPattern", []>; def ro_Windexed16 : ComplexPattern", []>; def ro_Windexed32 : ComplexPattern", []>; @@ -4086,7 +4158,7 @@ multiclass MemTagStore opc1, string insn> { let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in class ExceptionGeneration op1, bits<2> ll, string asm> - : I<(outs), (ins imm0_65535:$imm), asm, "\t$imm", "", []>, + : I<(outs), (ins i32_imm0_65535:$imm), asm, "\t$imm", "", []>, Sched<[WriteSys]> { bits<16> imm; let Inst{31-24} = 0b11010100; diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 215e96a82d0..5c35e5bcdd3 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/Casting.h" @@ -82,6 +83,10 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); } + // Meta-instructions emit no code. + if (MI.isMetaInstruction()) + return 0; + // FIXME: We currently only handle pseudoinstructions that don't get expanded // before the assembly printer. unsigned NumBytes = 0; @@ -91,12 +96,6 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { // Anything not explicitly designated otherwise is a normal 4-byte insn. NumBytes = 4; break; - case TargetOpcode::DBG_VALUE: - case TargetOpcode::EH_LABEL: - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - NumBytes = 0; - break; case TargetOpcode::STACKMAP: // The upper bound for a stackmap intrinsic is the full length of its shadow NumBytes = StackMapOpers(&MI).getNumPatchBytes(); @@ -416,7 +415,7 @@ unsigned AArch64InstrInfo::insertBranch( // Find the original register that VReg is copied from. static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { - while (TargetRegisterInfo::isVirtualRegister(VReg)) { + while (Register::isVirtualRegister(VReg)) { const MachineInstr *DefMI = MRI.getVRegDef(VReg); if (!DefMI->isFullCopy()) return VReg; @@ -431,7 +430,7 @@ static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg = nullptr) { VReg = removeCopies(MRI, VReg); - if (!TargetRegisterInfo::isVirtualRegister(VReg)) + if (!Register::isVirtualRegister(VReg)) return 0; bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); @@ -574,7 +573,7 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, CC = AArch64CC::NE; break; } - unsigned SrcReg = Cond[2].getReg(); + Register SrcReg = Cond[2].getReg(); if (Is64Bit) { // cmp reg, #0 is actually subs xzr, reg, #0. MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); @@ -930,7 +929,7 @@ bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, } bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint( - const MachineInstr &MIa, const MachineInstr &MIb, AliasAnalysis *AA) const { + const MachineInstr &MIa, const MachineInstr &MIb) const { const TargetRegisterInfo *TRI = &getRegisterInfo(); const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; int64_t OffsetA = 0, OffsetB = 0; @@ -1071,8 +1070,8 @@ static bool UpdateOperandRegClass(MachineInstr &Instr) { assert(MO.isReg() && "Operand has register constraints without being a register!"); - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) { if (!OpRegCstraints->contains(Reg)) return false; } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && @@ -1472,6 +1471,8 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return false; MachineBasicBlock &MBB = *MI.getParent(); + auto &Subtarget = MBB.getParent()->getSubtarget(); + auto TRI = Subtarget.getRegisterInfo(); DebugLoc DL = MI.getDebugLoc(); if (MI.getOpcode() == AArch64::CATCHRET) { @@ -1497,21 +1498,32 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); const GlobalValue *GV = cast((*MI.memoperands_begin())->getValue()); const TargetMachine &TM = MBB.getParent()->getTarget(); - unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); + unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); const unsigned char MO_NC = AArch64II::MO_NC; if ((OpFlags & AArch64II::MO_GOT) != 0) { BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg) .addGlobalAddress(GV, 0, OpFlags); - BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) - .addReg(Reg, RegState::Kill) - .addImm(0) - .addMemOperand(*MI.memoperands_begin()); + if (Subtarget.isTargetILP32()) { + unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); + BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) + .addDef(Reg32, RegState::Dead) + .addUse(Reg, RegState::Kill) + .addImm(0) + .addMemOperand(*MI.memoperands_begin()) + .addDef(Reg, RegState::Implicit); + } else { + BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) + .addReg(Reg, RegState::Kill) + .addImm(0) + .addMemOperand(*MI.memoperands_begin()); + } } else if (TM.getCodeModel() == CodeModel::Large) { + assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?"); BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC) .addImm(0); @@ -1538,10 +1550,20 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg) .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC; - BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) - .addReg(Reg, RegState::Kill) - .addGlobalAddress(GV, 0, LoFlags) - .addMemOperand(*MI.memoperands_begin()); + if (Subtarget.isTargetILP32()) { + unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); + BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) + .addDef(Reg32, RegState::Dead) + .addUse(Reg, RegState::Kill) + .addGlobalAddress(GV, 0, LoFlags) + .addMemOperand(*MI.memoperands_begin()) + .addDef(Reg, RegState::Implicit); + } else { + BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) + .addReg(Reg, RegState::Kill) + .addGlobalAddress(GV, 0, LoFlags) + .addMemOperand(*MI.memoperands_begin()); + } } MBB.erase(MI); @@ -1581,7 +1603,7 @@ bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) { break; case TargetOpcode::COPY: { // GPR32 copies will by lowered to ORRXrs - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); return (AArch64::GPR32RegClass.contains(DstReg) || AArch64::GPR64RegClass.contains(DstReg)); } @@ -1611,7 +1633,7 @@ bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) { break; case TargetOpcode::COPY: { // FPR64 copies will by lowered to ORR.16b - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); return (AArch64::FPR64RegClass.contains(DstReg) || AArch64::FPR128RegClass.contains(DstReg)); } @@ -1917,7 +1939,7 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const { // e.g., ldr x0, [x0] // This case will never occur with an FI base. if (MI.getOperand(1).isReg()) { - unsigned BaseReg = MI.getOperand(1).getReg(); + Register BaseReg = MI.getOperand(1).getReg(); const TargetRegisterInfo *TRI = &getRegisterInfo(); if (MI.modifiesRegister(BaseReg, TRI)) return false; @@ -1928,6 +1950,17 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const { if (isLdStPairSuppressed(MI)) return false; + // Do not pair any callee-save store/reload instructions in the + // prologue/epilogue if the CFI information encoded the operations as separate + // instructions, as that will cause the size of the actual prologue to mismatch + // with the prologue size recorded in the Windows CFI. + const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo(); + bool NeedsWinCFI = MAI->usesWindowsCFI() && + MI.getMF()->getFunction().needsUnwindTableEntry(); + if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) || + MI.getFlag(MachineInstr::FrameDestroy))) + return false; + // On some CPUs quad load/store pairs are slower than two single load/stores. if (Subtarget.isPaired128Slow()) { switch (MI.getOpcode()) { @@ -2165,6 +2198,18 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, MinOffset = -256; MaxOffset = 255; break; + case AArch64::LDR_PXI: + case AArch64::STR_PXI: + Scale = Width = 2; + MinOffset = -256; + MaxOffset = 255; + break; + case AArch64::LDR_ZXI: + case AArch64::STR_ZXI: + Scale = Width = 16; + MinOffset = -256; + MaxOffset = 255; + break; case AArch64::ST2GOffset: case AArch64::STZ2GOffset: Scale = 16; @@ -2350,7 +2395,7 @@ static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, if (!SubIdx) return MIB.addReg(Reg, State); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); return MIB.addReg(Reg, State, SubIdx); } @@ -2474,6 +2519,27 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + // Copy a Predicate register by ORRing with itself. + if (AArch64::PPRRegClass.contains(DestReg) && + AArch64::PPRRegClass.contains(SrcReg)) { + assert(Subtarget.hasSVE() && "Unexpected SVE register."); + BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg) + .addReg(SrcReg) // Pg + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + + // Copy a Z register by ORRing with itself. + if (AArch64::ZPRRegClass.contains(DestReg) && + AArch64::ZPRRegClass.contains(SrcReg)) { + assert(Subtarget.hasSVE() && "Unexpected SVE register."); + BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (AArch64::GPR64spRegClass.contains(DestReg) && (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { @@ -2722,7 +2788,7 @@ static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineMemOperand *MMO) { unsigned SrcReg0 = SrcReg; unsigned SrcReg1 = SrcReg; - if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + if (Register::isPhysicalRegister(SrcReg)) { SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0); SubIdx0 = 0; SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1); @@ -2761,7 +2827,7 @@ void AArch64InstrInfo::storeRegToStackSlot( case 4: if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { Opc = AArch64::STRWui; - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) + if (Register::isVirtualRegister(SrcReg)) MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); else assert(SrcReg != AArch64::WSP); @@ -2771,7 +2837,7 @@ void AArch64InstrInfo::storeRegToStackSlot( case 8: if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { Opc = AArch64::STRXui; - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) + if (Register::isVirtualRegister(SrcReg)) MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); else assert(SrcReg != AArch64::SP); @@ -2852,7 +2918,7 @@ static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, unsigned DestReg0 = DestReg; unsigned DestReg1 = DestReg; bool IsUndef = true; - if (TargetRegisterInfo::isPhysicalRegister(DestReg)) { + if (Register::isPhysicalRegister(DestReg)) { DestReg0 = TRI.getSubReg(DestReg, SubIdx0); SubIdx0 = 0; DestReg1 = TRI.getSubReg(DestReg, SubIdx1); @@ -2892,7 +2958,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( case 4: if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { Opc = AArch64::LDRWui; - if (TargetRegisterInfo::isVirtualRegister(DestReg)) + if (Register::isVirtualRegister(DestReg)) MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); else assert(DestReg != AArch64::WSP); @@ -2902,7 +2968,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( case 8: if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { Opc = AArch64::LDRXui; - if (TargetRegisterInfo::isVirtualRegister(DestReg)) + if (Register::isVirtualRegister(DestReg)) MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); else assert(DestReg != AArch64::SP); @@ -2972,21 +3038,39 @@ void AArch64InstrInfo::loadRegFromStackSlot( MI.addMemOperand(MMO); } -void llvm::emitFrameOffset(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, const DebugLoc &DL, - unsigned DestReg, unsigned SrcReg, int Offset, - const TargetInstrInfo *TII, - MachineInstr::MIFlag Flag, bool SetNZCV, - bool NeedsWinCFI, bool *HasWinCFI) { - if (DestReg == SrcReg && Offset == 0) - return; - - assert((DestReg != AArch64::SP || Offset % 16 == 0) && - "SP increment/decrement not 16-byte aligned"); - - bool isSub = Offset < 0; - if (isSub) - Offset = -Offset; +// Helper function to emit a frame offset adjustment from a given +// pointer (SrcReg), stored into DestReg. This function is explicit +// in that it requires the opcode. +static void emitFrameOffsetAdj(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned DestReg, + unsigned SrcReg, int64_t Offset, unsigned Opc, + const TargetInstrInfo *TII, + MachineInstr::MIFlag Flag, bool NeedsWinCFI, + bool *HasWinCFI) { + int Sign = 1; + unsigned MaxEncoding, ShiftSize; + switch (Opc) { + case AArch64::ADDXri: + case AArch64::ADDSXri: + case AArch64::SUBXri: + case AArch64::SUBSXri: + MaxEncoding = 0xfff; + ShiftSize = 12; + break; + case AArch64::ADDVL_XXI: + case AArch64::ADDPL_XXI: + MaxEncoding = 31; + ShiftSize = 0; + if (Offset < 0) { + MaxEncoding = 32; + Sign = -1; + Offset = -Offset; + } + break; + default: + llvm_unreachable("Unsupported opcode"); + } // FIXME: If the offset won't fit in 24-bits, compute the offset into a // scratch register. If DestReg is a virtual register, use it as the @@ -2999,65 +3083,94 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, // of code. // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); - unsigned Opc; - if (SetNZCV) - Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri; - else - Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri; - const unsigned MaxEncoding = 0xfff; - const unsigned ShiftSize = 12; const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; - while (((unsigned)Offset) >= (1 << ShiftSize)) { - unsigned ThisVal; - if (((unsigned)Offset) > MaxEncodableValue) { - ThisVal = MaxEncodableValue; - } else { - ThisVal = Offset & MaxEncodableValue; + do { + unsigned ThisVal = std::min(Offset, MaxEncodableValue); + unsigned LocalShiftSize = 0; + if (ThisVal > MaxEncoding) { + ThisVal = ThisVal >> ShiftSize; + LocalShiftSize = ShiftSize; } assert((ThisVal >> ShiftSize) <= MaxEncoding && "Encoding cannot handle value that big"); - BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) - .addReg(SrcReg) - .addImm(ThisVal >> ShiftSize) - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize)) - .setMIFlag(Flag); + auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) + .addReg(SrcReg) + .addImm(Sign * (int)ThisVal); + if (ShiftSize) + MBI = MBI.addImm( + AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize)); + MBI = MBI.setMIFlag(Flag); - if (NeedsWinCFI && SrcReg == AArch64::SP && DestReg == AArch64::SP) { + if (NeedsWinCFI) { + assert(Sign == 1 && "SEH directives should always have a positive sign"); + int Imm = (int)(ThisVal << LocalShiftSize); + if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) || + (SrcReg == AArch64::FP && DestReg == AArch64::SP)) { + if (HasWinCFI) + *HasWinCFI = true; + if (Imm == 0) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag); + else + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)) + .addImm(Imm) + .setMIFlag(Flag); + assert((Offset - Imm) == 0 && "Expected remaining offset to be zero to " + "emit a single SEH directive"); + } else if (DestReg == AArch64::SP) { + if (HasWinCFI) + *HasWinCFI = true; + assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc"); + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) + .addImm(Imm) + .setMIFlag(Flag); + } if (HasWinCFI) *HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) - .addImm(ThisVal) - .setMIFlag(Flag); } SrcReg = DestReg; - Offset -= ThisVal; - if (Offset == 0) - return; - } - BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) - .addReg(SrcReg) - .addImm(Offset) - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) - .setMIFlag(Flag); + Offset -= ThisVal << LocalShiftSize; + } while (Offset); +} - if (NeedsWinCFI) { - if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) || - (SrcReg == AArch64::FP && DestReg == AArch64::SP)) { - if (HasWinCFI) - *HasWinCFI = true; - if (Offset == 0) - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)). - setMIFlag(Flag); - else - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)). - addImm(Offset).setMIFlag(Flag); - } else if (DestReg == AArch64::SP) { - if (HasWinCFI) - *HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)). - addImm(Offset).setMIFlag(Flag); +void llvm::emitFrameOffset(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, + unsigned DestReg, unsigned SrcReg, + StackOffset Offset, const TargetInstrInfo *TII, + MachineInstr::MIFlag Flag, bool SetNZCV, + bool NeedsWinCFI, bool *HasWinCFI) { + int64_t Bytes, NumPredicateVectors, NumDataVectors; + Offset.getForFrameOffset(Bytes, NumPredicateVectors, NumDataVectors); + + // First emit non-scalable frame offsets, or a simple 'mov'. + if (Bytes || (!Offset && SrcReg != DestReg)) { + assert((DestReg != AArch64::SP || Bytes % 16 == 0) && + "SP increment/decrement not 16-byte aligned"); + unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri; + if (Bytes < 0) { + Bytes = -Bytes; + Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri; } + emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag, + NeedsWinCFI, HasWinCFI); + SrcReg = DestReg; + } + + assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) && + "SetNZCV not supported with SVE vectors"); + assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) && + "WinCFI not supported with SVE vectors"); + + if (NumDataVectors) { + emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors, + AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr); + SrcReg = DestReg; + } + + if (NumPredicateVectors) { + assert(DestReg != AArch64::SP && "Unaligned access to SP"); + emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors, + AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr); } } @@ -3079,15 +3192,13 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( // // if (MI.isFullCopy()) { - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); - if (SrcReg == AArch64::SP && - TargetRegisterInfo::isVirtualRegister(DstReg)) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + if (SrcReg == AArch64::SP && Register::isVirtualRegister(DstReg)) { MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); return nullptr; } - if (DstReg == AArch64::SP && - TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (DstReg == AArch64::SP && Register::isVirtualRegister(SrcReg)) { MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); return nullptr; } @@ -3127,14 +3238,13 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( MachineBasicBlock &MBB = *MI.getParent(); const MachineOperand &DstMO = MI.getOperand(0); const MachineOperand &SrcMO = MI.getOperand(1); - unsigned DstReg = DstMO.getReg(); - unsigned SrcReg = SrcMO.getReg(); + Register DstReg = DstMO.getReg(); + Register SrcReg = SrcMO.getReg(); // This is slightly expensive to compute for physical regs since // getMinimalPhysRegClass is slow. auto getRegClass = [&](unsigned Reg) { - return TargetRegisterInfo::isVirtualRegister(Reg) - ? MRI.getRegClass(Reg) - : TRI.getMinimalPhysRegClass(Reg); + return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg) + : TRI.getMinimalPhysRegClass(Reg); }; if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { @@ -3159,8 +3269,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( // // STRXui %xzr, %stack.0 // - if (IsSpill && DstMO.isUndef() && - TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + if (IsSpill && DstMO.isUndef() && Register::isPhysicalRegister(SrcReg)) { assert(SrcMO.getSubReg() == 0 && "Unexpected subreg on physical register"); const TargetRegisterClass *SpillRC; @@ -3243,10 +3352,23 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( return nullptr; } -int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, +static bool isSVEScaledImmInstruction(unsigned Opcode) { + switch (Opcode) { + case AArch64::LDR_ZXI: + case AArch64::STR_ZXI: + case AArch64::LDR_PXI: + case AArch64::STR_PXI: + return true; + default: + return false; + } +} + +int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, + StackOffset &SOffset, bool *OutUseUnscaledOp, unsigned *OutUnscaledOp, - int *EmittableOffset) { + int64_t *EmittableOffset) { // Set output values in case of early exit. if (EmittableOffset) *EmittableOffset = 0; @@ -3285,6 +3407,10 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal"); // Construct the complete offset. + bool IsMulVL = isSVEScaledImmInstruction(MI.getOpcode()); + int64_t Offset = + IsMulVL ? (SOffset.getScalableBytes()) : (SOffset.getBytes()); + const MachineOperand &ImmOpnd = MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode())); Offset += ImmOpnd.getImm() * Scale; @@ -3304,7 +3430,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, "Cannot have remainder when using unscaled op"); assert(MinOff < MaxOff && "Unexpected Min/Max offsets"); - int NewOffset = Offset / Scale; + int64_t NewOffset = Offset / Scale; if (MinOff <= NewOffset && NewOffset <= MaxOff) Offset = Remainder; else { @@ -3319,27 +3445,33 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, if (OutUnscaledOp && UnscaledOp) *OutUnscaledOp = *UnscaledOp; + if (IsMulVL) + SOffset = StackOffset(Offset, MVT::nxv1i8) + + StackOffset(SOffset.getBytes(), MVT::i8); + else + SOffset = StackOffset(Offset, MVT::i8) + + StackOffset(SOffset.getScalableBytes(), MVT::nxv1i8); return AArch64FrameOffsetCanUpdate | - (Offset == 0 ? AArch64FrameOffsetIsLegal : 0); + (SOffset ? 0 : AArch64FrameOffsetIsLegal); } bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int &Offset, + unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII) { unsigned Opcode = MI.getOpcode(); unsigned ImmIdx = FrameRegIdx + 1; if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { - Offset += MI.getOperand(ImmIdx).getImm(); + Offset += StackOffset(MI.getOperand(ImmIdx).getImm(), MVT::i8); emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), MI.getOperand(0).getReg(), FrameReg, Offset, TII, MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); MI.eraseFromParent(); - Offset = 0; + Offset = StackOffset(); return true; } - int NewOffset; + int64_t NewOffset; unsigned UnscaledOp; bool UseUnscaledOp; int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, @@ -3352,7 +3484,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, MI.setDesc(TII->get(UnscaledOp)); MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); - return Offset == 0; + return !Offset; } return false; @@ -3428,13 +3560,19 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { switch (Inst.getOpcode()) { default: break; + case AArch64::FADDHrr: case AArch64::FADDSrr: case AArch64::FADDDrr: + case AArch64::FADDv4f16: + case AArch64::FADDv8f16: case AArch64::FADDv2f32: case AArch64::FADDv2f64: case AArch64::FADDv4f32: + case AArch64::FSUBHrr: case AArch64::FSUBSrr: case AArch64::FSUBDrr: + case AArch64::FSUBv4f16: + case AArch64::FSUBv8f16: case AArch64::FSUBv2f32: case AArch64::FSUBv2f64: case AArch64::FSUBv4f32: @@ -3459,7 +3597,7 @@ static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); MachineInstr *MI = nullptr; - if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) MI = MRI.getUniqueVRegDef(MO.getReg()); // And it needs to be in the trace (otherwise, it won't have a depth). if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc) @@ -3544,86 +3682,48 @@ static bool getMaddPatterns(MachineInstr &Root, Opc = NewOpc; } + auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg, + MachineCombinerPattern Pattern) { + if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) { + Patterns.push_back(Pattern); + Found = true; + } + }; + + typedef MachineCombinerPattern MCP; + switch (Opc) { default: break; case AArch64::ADDWrr: assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && "ADDWrr does not have register operands"); - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, - AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MULADDW_OP1); - Found = true; - } - if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, - AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MULADDW_OP2); - Found = true; - } + setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1); + setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2); break; case AArch64::ADDXrr: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, - AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MULADDX_OP1); - Found = true; - } - if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, - AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MULADDX_OP2); - Found = true; - } + setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1); + setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2); break; case AArch64::SUBWrr: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, - AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1); - Found = true; - } - if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, - AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2); - Found = true; - } + setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1); + setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2); break; case AArch64::SUBXrr: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, - AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1); - Found = true; - } - if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, - AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2); - Found = true; - } + setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1); + setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2); break; case AArch64::ADDWri: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, - AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1); - Found = true; - } + setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1); break; case AArch64::ADDXri: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, - AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1); - Found = true; - } + setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1); break; case AArch64::SUBWri: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, - AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1); - Found = true; - } + setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1); break; case AArch64::SUBXri: - if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, - AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1); - Found = true; - } + setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1); break; } return Found; @@ -3640,204 +3740,135 @@ static bool getFMAPatterns(MachineInstr &Root, MachineBasicBlock &MBB = *Root.getParent(); bool Found = false; + auto Match = [&](int Opcode, int Operand, + MachineCombinerPattern Pattern) -> bool { + if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) { + Patterns.push_back(Pattern); + return true; + } + return false; + }; + + typedef MachineCombinerPattern MCP; + switch (Root.getOpcode()) { default: assert(false && "Unsupported FP instruction in combiner\n"); break; + case AArch64::FADDHrr: + assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && + "FADDHrr does not have register operands"); + + Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1); + Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2); + break; case AArch64::FADDSrr: assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && - "FADDWrr does not have register operands"); - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) { - Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv1i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) { - Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv1i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2); - Found = true; - } + "FADDSrr does not have register operands"); + + Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) || + Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1); + + Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) || + Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2); break; case AArch64::FADDDrr: - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { - Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv1i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) { - Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv1i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2); - Found = true; - } + Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) || + Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1); + + Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) || + Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2); + break; + case AArch64::FADDv4f16: + Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) || + Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1); + + Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) || + Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2); + break; + case AArch64::FADDv8f16: + Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) || + Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1); + + Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) || + Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2); break; case AArch64::FADDv2f32: - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2f32)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2f32)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2); - Found = true; - } + Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) || + Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1); + + Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) || + Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2); break; case AArch64::FADDv2f64: - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2f64)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2f64)) { - Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2); - Found = true; - } + Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) || + Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1); + + Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) || + Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2); break; case AArch64::FADDv4f32: - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv4i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv4f32)) { - Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv4i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv4f32)) { - Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2); - Found = true; - } - break; + Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) || + Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1); + Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) || + Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2); + break; + case AArch64::FSUBHrr: + Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1); + Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2); + Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1); + break; case AArch64::FSUBSrr: - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) { - Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) { - Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv1i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) { - Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1); - Found = true; - } + Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1); + + Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) || + Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2); + + Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1); break; case AArch64::FSUBDrr: - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { - Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) { - Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv1i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) { - Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1); - Found = true; - } + Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1); + + Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) || + Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2); + + Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1); + break; + case AArch64::FSUBv4f16: + Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) || + Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2); + + Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) || + Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1); + break; + case AArch64::FSUBv8f16: + Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) || + Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2); + + Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) || + Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1); break; case AArch64::FSUBv2f32: - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2f32)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2f32)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1); - Found = true; - } + Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) || + Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2); + + Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) || + Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1); break; case AArch64::FSUBv2f64: - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv2f64)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2f64)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1); - Found = true; - } + Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) || + Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2); + + Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) || + Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1); break; case AArch64::FSUBv4f32: - if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv4i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(2), - AArch64::FMULv4f32)) { - Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2); - Found = true; - } - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv4i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv4f32)) { - Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1); - Found = true; - } + Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) || + Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2); + + Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) || + Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1); break; } return Found; @@ -3851,6 +3882,10 @@ bool AArch64InstrInfo::isThroughputPattern( switch (Pattern) { default: break; + case MachineCombinerPattern::FMULADDH_OP1: + case MachineCombinerPattern::FMULADDH_OP2: + case MachineCombinerPattern::FMULSUBH_OP1: + case MachineCombinerPattern::FMULSUBH_OP2: case MachineCombinerPattern::FMULADDS_OP1: case MachineCombinerPattern::FMULADDS_OP2: case MachineCombinerPattern::FMULSUBS_OP1: @@ -3859,12 +3894,21 @@ bool AArch64InstrInfo::isThroughputPattern( case MachineCombinerPattern::FMULADDD_OP2: case MachineCombinerPattern::FMULSUBD_OP1: case MachineCombinerPattern::FMULSUBD_OP2: + case MachineCombinerPattern::FNMULSUBH_OP1: case MachineCombinerPattern::FNMULSUBS_OP1: case MachineCombinerPattern::FNMULSUBD_OP1: + case MachineCombinerPattern::FMLAv4i16_indexed_OP1: + case MachineCombinerPattern::FMLAv4i16_indexed_OP2: + case MachineCombinerPattern::FMLAv8i16_indexed_OP1: + case MachineCombinerPattern::FMLAv8i16_indexed_OP2: case MachineCombinerPattern::FMLAv1i32_indexed_OP1: case MachineCombinerPattern::FMLAv1i32_indexed_OP2: case MachineCombinerPattern::FMLAv1i64_indexed_OP1: case MachineCombinerPattern::FMLAv1i64_indexed_OP2: + case MachineCombinerPattern::FMLAv4f16_OP2: + case MachineCombinerPattern::FMLAv4f16_OP1: + case MachineCombinerPattern::FMLAv8f16_OP1: + case MachineCombinerPattern::FMLAv8f16_OP2: case MachineCombinerPattern::FMLAv2f32_OP2: case MachineCombinerPattern::FMLAv2f32_OP1: case MachineCombinerPattern::FMLAv2f64_OP1: @@ -3877,10 +3921,18 @@ bool AArch64InstrInfo::isThroughputPattern( case MachineCombinerPattern::FMLAv4f32_OP2: case MachineCombinerPattern::FMLAv4i32_indexed_OP1: case MachineCombinerPattern::FMLAv4i32_indexed_OP2: + case MachineCombinerPattern::FMLSv4i16_indexed_OP1: + case MachineCombinerPattern::FMLSv4i16_indexed_OP2: + case MachineCombinerPattern::FMLSv8i16_indexed_OP1: + case MachineCombinerPattern::FMLSv8i16_indexed_OP2: case MachineCombinerPattern::FMLSv1i32_indexed_OP2: case MachineCombinerPattern::FMLSv1i64_indexed_OP2: case MachineCombinerPattern::FMLSv2i32_indexed_OP2: case MachineCombinerPattern::FMLSv2i64_indexed_OP2: + case MachineCombinerPattern::FMLSv4f16_OP1: + case MachineCombinerPattern::FMLSv4f16_OP2: + case MachineCombinerPattern::FMLSv8f16_OP1: + case MachineCombinerPattern::FMLSv8f16_OP2: case MachineCombinerPattern::FMLSv2f32_OP2: case MachineCombinerPattern::FMLSv2f64_OP2: case MachineCombinerPattern::FMLSv4i32_indexed_OP2: @@ -3933,15 +3985,15 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, SmallVectorImpl &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind = FMAInstKind::Default, - const unsigned *ReplacedAddend = nullptr) { + const Register *ReplacedAddend = nullptr) { assert(IdxMulOpd == 1 || IdxMulOpd == 2); unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); - unsigned ResultReg = Root.getOperand(0).getReg(); - unsigned SrcReg0 = MUL->getOperand(1).getReg(); + Register ResultReg = Root.getOperand(0).getReg(); + Register SrcReg0 = MUL->getOperand(1).getReg(); bool Src0IsKill = MUL->getOperand(1).isKill(); - unsigned SrcReg1 = MUL->getOperand(2).getReg(); + Register SrcReg1 = MUL->getOperand(2).getReg(); bool Src1IsKill = MUL->getOperand(2).isKill(); unsigned SrcReg2; @@ -3955,13 +4007,13 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); } - if (TargetRegisterInfo::isVirtualRegister(ResultReg)) + if (Register::isVirtualRegister(ResultReg)) MRI.constrainRegClass(ResultReg, RC); - if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) + if (Register::isVirtualRegister(SrcReg0)) MRI.constrainRegClass(SrcReg0, RC); - if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) + if (Register::isVirtualRegister(SrcReg1)) MRI.constrainRegClass(SrcReg1, RC); - if (TargetRegisterInfo::isVirtualRegister(SrcReg2)) + if (Register::isVirtualRegister(SrcReg2)) MRI.constrainRegClass(SrcReg2, RC); MachineInstrBuilder MIB; @@ -4015,19 +4067,19 @@ static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, assert(IdxMulOpd == 1 || IdxMulOpd == 2); MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); - unsigned ResultReg = Root.getOperand(0).getReg(); - unsigned SrcReg0 = MUL->getOperand(1).getReg(); + Register ResultReg = Root.getOperand(0).getReg(); + Register SrcReg0 = MUL->getOperand(1).getReg(); bool Src0IsKill = MUL->getOperand(1).isKill(); - unsigned SrcReg1 = MUL->getOperand(2).getReg(); + Register SrcReg1 = MUL->getOperand(2).getReg(); bool Src1IsKill = MUL->getOperand(2).isKill(); - if (TargetRegisterInfo::isVirtualRegister(ResultReg)) + if (Register::isVirtualRegister(ResultReg)) MRI.constrainRegClass(ResultReg, RC); - if (TargetRegisterInfo::isVirtualRegister(SrcReg0)) + if (Register::isVirtualRegister(SrcReg0)) MRI.constrainRegClass(SrcReg0, RC); - if (TargetRegisterInfo::isVirtualRegister(SrcReg1)) + if (Register::isVirtualRegister(SrcReg1)) MRI.constrainRegClass(SrcReg1, RC); - if (TargetRegisterInfo::isVirtualRegister(VR)) + if (Register::isVirtualRegister(VR)) MRI.constrainRegClass(VR, RC); MachineInstrBuilder MIB = @@ -4116,7 +4168,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( Opc = AArch64::MADDXrrr; RC = &AArch64::GPR64RegClass; } - unsigned NewVR = MRI.createVirtualRegister(OrrRC); + Register NewVR = MRI.createVirtualRegister(OrrRC); uint64_t Imm = Root.getOperand(2).getImm(); if (Root.getOperand(3).isImm()) { @@ -4158,7 +4210,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( Opc = AArch64::MADDXrrr; RC = &AArch64::GPR64RegClass; } - unsigned NewVR = MRI.createVirtualRegister(SubRC); + Register NewVR = MRI.createVirtualRegister(SubRC); // SUB NewVR, 0, C MachineInstrBuilder MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR) @@ -4208,7 +4260,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( Opc = AArch64::MADDXrrr; RC = &AArch64::GPR64RegClass; } - unsigned NewVR = MRI.createVirtualRegister(OrrRC); + Register NewVR = MRI.createVirtualRegister(OrrRC); uint64_t Imm = Root.getOperand(2).getImm(); if (Root.getOperand(3).isImm()) { unsigned Val = Root.getOperand(3).getImm(); @@ -4228,34 +4280,35 @@ void AArch64InstrInfo::genAlternativeCodeSequence( break; } // Floating Point Support - case MachineCombinerPattern::FMULADDS_OP1: - case MachineCombinerPattern::FMULADDD_OP1: - // MUL I=A,B,0 - // ADD R,I,C - // ==> MADD R,A,B,C - // --- Create(MADD); - if (Pattern == MachineCombinerPattern::FMULADDS_OP1) { - Opc = AArch64::FMADDSrrr; - RC = &AArch64::FPR32RegClass; - } else { - Opc = AArch64::FMADDDrrr; - RC = &AArch64::FPR64RegClass; - } + case MachineCombinerPattern::FMULADDH_OP1: + Opc = AArch64::FMADDHrrr; + RC = &AArch64::FPR16RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; + case MachineCombinerPattern::FMULADDS_OP1: + Opc = AArch64::FMADDSrrr; + RC = &AArch64::FPR32RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::FMULADDD_OP1: + Opc = AArch64::FMADDDrrr; + RC = &AArch64::FPR64RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + + case MachineCombinerPattern::FMULADDH_OP2: + Opc = AArch64::FMADDHrrr; + RC = &AArch64::FPR16RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; case MachineCombinerPattern::FMULADDS_OP2: + Opc = AArch64::FMADDSrrr; + RC = &AArch64::FPR32RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; case MachineCombinerPattern::FMULADDD_OP2: - // FMUL I=A,B,0 - // FADD R,C,I - // ==> FMADD R,A,B,C - // --- Create(FMADD); - if (Pattern == MachineCombinerPattern::FMULADDS_OP2) { - Opc = AArch64::FMADDSrrr; - RC = &AArch64::FPR32RegClass; - } else { - Opc = AArch64::FMADDDrrr; - RC = &AArch64::FPR64RegClass; - } + Opc = AArch64::FMADDDrrr; + RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; @@ -4285,6 +4338,31 @@ void AArch64InstrInfo::genAlternativeCodeSequence( FMAInstKind::Indexed); break; + case MachineCombinerPattern::FMLAv4i16_indexed_OP1: + RC = &AArch64::FPR64RegClass; + Opc = AArch64::FMLAv4i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Indexed); + break; + case MachineCombinerPattern::FMLAv4f16_OP1: + RC = &AArch64::FPR64RegClass; + Opc = AArch64::FMLAv4f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Accumulator); + break; + case MachineCombinerPattern::FMLAv4i16_indexed_OP2: + RC = &AArch64::FPR64RegClass; + Opc = AArch64::FMLAv4i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Indexed); + break; + case MachineCombinerPattern::FMLAv4f16_OP2: + RC = &AArch64::FPR64RegClass; + Opc = AArch64::FMLAv4f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Accumulator); + break; + case MachineCombinerPattern::FMLAv2i32_indexed_OP1: case MachineCombinerPattern::FMLAv2f32_OP1: RC = &AArch64::FPR64RegClass; @@ -4312,6 +4390,31 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; + case MachineCombinerPattern::FMLAv8i16_indexed_OP1: + RC = &AArch64::FPR128RegClass; + Opc = AArch64::FMLAv8i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Indexed); + break; + case MachineCombinerPattern::FMLAv8f16_OP1: + RC = &AArch64::FPR128RegClass; + Opc = AArch64::FMLAv8f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Accumulator); + break; + case MachineCombinerPattern::FMLAv8i16_indexed_OP2: + RC = &AArch64::FPR128RegClass; + Opc = AArch64::FMLAv8i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Indexed); + break; + case MachineCombinerPattern::FMLAv8f16_OP2: + RC = &AArch64::FPR128RegClass; + Opc = AArch64::FMLAv8f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Accumulator); + break; + case MachineCombinerPattern::FMLAv2i64_indexed_OP1: case MachineCombinerPattern::FMLAv2f64_OP1: RC = &AArch64::FPR128RegClass; @@ -4367,56 +4470,53 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; + case MachineCombinerPattern::FMULSUBH_OP1: + Opc = AArch64::FNMSUBHrrr; + RC = &AArch64::FPR16RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; case MachineCombinerPattern::FMULSUBS_OP1: - case MachineCombinerPattern::FMULSUBD_OP1: { - // FMUL I=A,B,0 - // FSUB R,I,C - // ==> FNMSUB R,A,B,C // = -C + A*B - // --- Create(FNMSUB); - if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) { - Opc = AArch64::FNMSUBSrrr; - RC = &AArch64::FPR32RegClass; - } else { - Opc = AArch64::FNMSUBDrrr; - RC = &AArch64::FPR64RegClass; - } + Opc = AArch64::FNMSUBSrrr; + RC = &AArch64::FPR32RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::FMULSUBD_OP1: + Opc = AArch64::FNMSUBDrrr; + RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - } + case MachineCombinerPattern::FNMULSUBH_OP1: + Opc = AArch64::FNMADDHrrr; + RC = &AArch64::FPR16RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; case MachineCombinerPattern::FNMULSUBS_OP1: - case MachineCombinerPattern::FNMULSUBD_OP1: { - // FNMUL I=A,B,0 - // FSUB R,I,C - // ==> FNMADD R,A,B,C // = -A*B - C - // --- Create(FNMADD); - if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) { - Opc = AArch64::FNMADDSrrr; - RC = &AArch64::FPR32RegClass; - } else { - Opc = AArch64::FNMADDDrrr; - RC = &AArch64::FPR64RegClass; - } + Opc = AArch64::FNMADDSrrr; + RC = &AArch64::FPR32RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::FNMULSUBD_OP1: + Opc = AArch64::FNMADDDrrr; + RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - } - case MachineCombinerPattern::FMULSUBS_OP2: - case MachineCombinerPattern::FMULSUBD_OP2: { - // FMUL I=A,B,0 - // FSUB R,C,I - // ==> FMSUB R,A,B,C (computes C - A*B) - // --- Create(FMSUB); - if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) { - Opc = AArch64::FMSUBSrrr; - RC = &AArch64::FPR32RegClass; - } else { - Opc = AArch64::FMSUBDrrr; - RC = &AArch64::FPR64RegClass; - } + case MachineCombinerPattern::FMULSUBH_OP2: + Opc = AArch64::FMSUBHrrr; + RC = &AArch64::FPR16RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::FMULSUBS_OP2: + Opc = AArch64::FMSUBSrrr; + RC = &AArch64::FPR32RegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + case MachineCombinerPattern::FMULSUBD_OP2: + Opc = AArch64::FMSUBDrrr; + RC = &AArch64::FPR64RegClass; MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - } case MachineCombinerPattern::FMLSv1i32_indexed_OP2: Opc = AArch64::FMLSv1i32_indexed; @@ -4432,6 +4532,39 @@ void AArch64InstrInfo::genAlternativeCodeSequence( FMAInstKind::Indexed); break; + case MachineCombinerPattern::FMLSv4f16_OP1: + case MachineCombinerPattern::FMLSv4i16_indexed_OP1: { + RC = &AArch64::FPR64RegClass; + Register NewVR = MRI.createVirtualRegister(RC); + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f16), NewVR) + .add(Root.getOperand(2)); + InsInstrs.push_back(MIB1); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) { + Opc = AArch64::FMLAv4f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Accumulator, &NewVR); + } else { + Opc = AArch64::FMLAv4i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Indexed, &NewVR); + } + break; + } + case MachineCombinerPattern::FMLSv4f16_OP2: + RC = &AArch64::FPR64RegClass; + Opc = AArch64::FMLSv4f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Accumulator); + break; + case MachineCombinerPattern::FMLSv4i16_indexed_OP2: + RC = &AArch64::FPR64RegClass; + Opc = AArch64::FMLSv4i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Indexed); + break; + case MachineCombinerPattern::FMLSv2f32_OP2: case MachineCombinerPattern::FMLSv2i32_indexed_OP2: RC = &AArch64::FPR64RegClass; @@ -4446,6 +4579,39 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; + case MachineCombinerPattern::FMLSv8f16_OP1: + case MachineCombinerPattern::FMLSv8i16_indexed_OP1: { + RC = &AArch64::FPR128RegClass; + Register NewVR = MRI.createVirtualRegister(RC); + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv8f16), NewVR) + .add(Root.getOperand(2)); + InsInstrs.push_back(MIB1); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) { + Opc = AArch64::FMLAv8f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Accumulator, &NewVR); + } else { + Opc = AArch64::FMLAv8i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Indexed, &NewVR); + } + break; + } + case MachineCombinerPattern::FMLSv8f16_OP2: + RC = &AArch64::FPR128RegClass; + Opc = AArch64::FMLSv8f16; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Accumulator); + break; + case MachineCombinerPattern::FMLSv8i16_indexed_OP2: + RC = &AArch64::FPR128RegClass; + Opc = AArch64::FMLSv8i16_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC, + FMAInstKind::Indexed); + break; + case MachineCombinerPattern::FMLSv2f64_OP2: case MachineCombinerPattern::FMLSv2i64_indexed_OP2: RC = &AArch64::FPR128RegClass; @@ -4476,7 +4642,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( case MachineCombinerPattern::FMLSv2f32_OP1: case MachineCombinerPattern::FMLSv2i32_indexed_OP1: { RC = &AArch64::FPR64RegClass; - unsigned NewVR = MRI.createVirtualRegister(RC); + Register NewVR = MRI.createVirtualRegister(RC); MachineInstrBuilder MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR) .add(Root.getOperand(2)); @@ -4496,7 +4662,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( case MachineCombinerPattern::FMLSv4f32_OP1: case MachineCombinerPattern::FMLSv4i32_indexed_OP1: { RC = &AArch64::FPR128RegClass; - unsigned NewVR = MRI.createVirtualRegister(RC); + Register NewVR = MRI.createVirtualRegister(RC); MachineInstrBuilder MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR) .add(Root.getOperand(2)); @@ -4516,7 +4682,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( case MachineCombinerPattern::FMLSv2f64_OP1: case MachineCombinerPattern::FMLSv2i64_indexed_OP1: { RC = &AArch64::FPR128RegClass; - unsigned NewVR = MRI.createVirtualRegister(RC); + Register NewVR = MRI.createVirtualRegister(RC); MachineInstrBuilder MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR) .add(Root.getOperand(2)); @@ -4617,15 +4783,15 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { MachineBasicBlock *MBB = MI.getParent(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); - unsigned VReg = MI.getOperand(0).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(VReg)) + Register VReg = MI.getOperand(0).getReg(); + if (!Register::isVirtualRegister(VReg)) return false; MachineInstr *DefMI = MRI->getVRegDef(VReg); // Look through COPY instructions to find definition. while (DefMI->isCopy()) { - unsigned CopyVReg = DefMI->getOperand(1).getReg(); + Register CopyVReg = DefMI->getOperand(1).getReg(); if (!MRI->hasOneNonDBGUse(CopyVReg)) return false; if (!MRI->hasOneDef(CopyVReg)) @@ -4653,8 +4819,8 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const { return false; MachineOperand &MO = DefMI->getOperand(1); - unsigned NewReg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(NewReg)) + Register NewReg = MO.getReg(); + if (!Register::isVirtualRegister(NewReg)) return false; assert(!MRI->def_empty(NewReg) && "Register must be defined."); @@ -4737,9 +4903,13 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { static const std::pair TargetFlags[] = { {MO_COFFSTUB, "aarch64-coffstub"}, - {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, - {MO_S, "aarch64-s"}, {MO_TLS, "aarch64-tls"}, - {MO_DLLIMPORT, "aarch64-dllimport"}}; + {MO_GOT, "aarch64-got"}, + {MO_NC, "aarch64-nc"}, + {MO_S, "aarch64-s"}, + {MO_TLS, "aarch64-tls"}, + {MO_DLLIMPORT, "aarch64-dllimport"}, + {MO_PREL, "aarch64-prel"}, + {MO_TAGGED, "aarch64-tagged"}}; return makeArrayRef(TargetFlags); } diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index 7be4daba7dc..1688045e4fb 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -15,6 +15,7 @@ #include "AArch64.h" #include "AArch64RegisterInfo.h" +#include "AArch64StackOffset.h" #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -55,8 +56,7 @@ public: bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA = nullptr) const override; + const MachineInstr &MIb) const override; unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; @@ -299,7 +299,7 @@ private: /// if necessary, to be replaced by the scavenger at the end of PEI. void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, - int Offset, const TargetInstrInfo *TII, + StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag = MachineInstr::NoFlags, bool SetNZCV = false, bool NeedsWinCFI = false, bool *HasWinCFI = nullptr); @@ -308,7 +308,7 @@ void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, /// FP. Return false if the offset could not be handled directly in MI, and /// return the left-over portion by reference. bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int &Offset, + unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII); /// Use to report the frame offset status in isAArch64FrameOffsetLegal. @@ -332,10 +332,10 @@ enum AArch64FrameOffsetStatus { /// If set, @p EmittableOffset contains the amount that can be set in @p MI /// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that /// is a legal offset. -int isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, +int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp = nullptr, unsigned *OutUnscaledOp = nullptr, - int *EmittableOffset = nullptr); + int64_t *EmittableOffset = nullptr); static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; } diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index eed53f36d57..1981bd5d3bf 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -62,6 +62,9 @@ def HasAM : Predicate<"Subtarget->hasAM()">, def HasSEL2 : Predicate<"Subtarget->hasSEL2()">, AssemblerPredicate<"FeatureSEL2", "sel2">; +def HasPMU : Predicate<"Subtarget->hasPMU()">, + AssemblerPredicate<"FeaturePMU", "pmu">; + def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">, AssemblerPredicate<"FeatureTLB_RMI", "tlb-rmi">; @@ -116,7 +119,7 @@ def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">, def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">, AssemblerPredicate<"FeatureSVE2SHA3", "sve2-sha3">; def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">, - AssemblerPredicate<"FeatureSVE2BitPerm", "bitperm">; + AssemblerPredicate<"FeatureSVE2BitPerm", "sve2-bitperm">; def HasRCPC : Predicate<"Subtarget->hasRCPC()">, AssemblerPredicate<"FeatureRCPC", "rcpc">; def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, @@ -133,6 +136,12 @@ def HasBTI : Predicate<"Subtarget->hasBTI()">, AssemblerPredicate<"FeatureBranchTargetId", "bti">; def HasMTE : Predicate<"Subtarget->hasMTE()">, AssemblerPredicate<"FeatureMTE", "mte">; +def HasTME : Predicate<"Subtarget->hasTME()">, + AssemblerPredicate<"FeatureTME", "tme">; +def HasETE : Predicate<"Subtarget->hasETE()">, + AssemblerPredicate<"FeatureETE", "ete">; +def HasTRBE : Predicate<"Subtarget->hasTRBE()">, + AssemblerPredicate<"FeatureTRBE", "trbe">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsWindows : Predicate<"Subtarget->isTargetWindows()">; @@ -415,6 +424,14 @@ def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, S def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def SDT_AArch64unpk : SDTypeProfile<1, 1, [ + SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0> +]>; +def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>; +def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>; +def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>; +def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>; + //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -431,6 +448,13 @@ let RecomputePerFunction = 1 in { def UseBTI : Predicate<[{ MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>; def NotUseBTI : Predicate<[{ !MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>; + + // Toggles patterns which aren't beneficial in GlobalISel when we aren't + // optimizing. This allows us to selectively use patterns without impacting + // SelectionDAG's behaviour. + // FIXME: One day there will probably be a nicer way to check for this, but + // today is not that day. + def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">; } include "AArch64InstrFormats.td" @@ -785,7 +809,11 @@ def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in { def HWASAN_CHECK_MEMACCESS : Pseudo< (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), - [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 imm:$accessinfo))]>, + [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, + Sched<[]>; +def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo< + (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), + [(int_hwasan_check_memaccess_shortgranules X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, Sched<[]>; } @@ -804,6 +832,23 @@ def : InstAlias<"sys $op1, $Cn, $Cm, $op2", (SYSxt imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XZR)>; + +let Predicates = [HasTME] in { + +def TSTART : TMSystemI<0b0000, "tstart", + [(set GPR64:$Rt, (int_aarch64_tstart))]>; + +def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>; + +def TCANCEL : TMSystemException<0b011, "tcancel", + [(int_aarch64_tcancel i64_imm0_65535:$imm)]>; + +def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> { + let mayLoad = 0; + let mayStore = 0; +} +} // HasTME + //===----------------------------------------------------------------------===// // Move immediate instructions. //===----------------------------------------------------------------------===// @@ -815,37 +860,37 @@ let PostEncoderMethod = "fixMOVZ" in defm MOVZ : MoveImmediate<0b10, "movz">; // First group of aliases covers an implicit "lsl #0". -def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0), 0>; -def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0), 0>; -def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>; -def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>; -def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>; -def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>; +def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, i32_imm0_65535:$imm, 0), 0>; +def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, i32_imm0_65535:$imm, 0), 0>; +def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, i32_imm0_65535:$imm, 0)>; +def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, i32_imm0_65535:$imm, 0)>; +def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, i32_imm0_65535:$imm, 0)>; +def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, i32_imm0_65535:$imm, 0)>; // Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax. -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; -def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g3:$sym, 48), 0>; -def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32), 0>; -def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16), 0>; -def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>; -def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; -def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; +def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; +def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; -def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; -def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; +def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; +def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; -def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16), 0>; -def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>; +def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>; // Final group of aliases covers true "mov $Rd, $imm" cases. multiclass movw_mov_alias, GISDNodeXFormEquiv; +let Predicates = [OptimizedGISelOrOtherSelector] in { +// The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless +// copies. def : Pat<(i64 i64imm_32bit:$src), (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; +} // Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model). def bitcast_fpimm_to_i32 : SDNodeXForm; let AddedComplexity = 1 in { -def : Pat<(sub GPR32sp:$R2, arith_extended_reg32:$R3), - (SUBSWrx GPR32sp:$R2, arith_extended_reg32:$R3)>; -def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64:$R3), - (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64:$R3)>; +def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3), + (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>; +def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3), + (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>; } // Because of the immediate format for add/sub-imm instructions, the @@ -2165,8 +2214,8 @@ def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>; def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{ if (auto *G = dyn_cast(N)) { const DataLayout &DL = MF->getDataLayout(); - unsigned Align = G->getGlobal()->getPointerAlignment(DL); - return Align >= 4 && G->getOffset() % 4 == 0; + MaybeAlign Align = G->getGlobal()->getPointerAlignment(DL); + return Align && *Align >= 4 && G->getOffset() % 4 == 0; } if (auto *C = dyn_cast(N)) return C->getAlignment() >= 4 && C->getOffset() % 4 == 0; @@ -3281,20 +3330,37 @@ defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", // N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike // the NEON variant. + +// Here we handle first -(a + b*c) for FNMADD: + +let Predicates = [HasNEON, HasFullFP16] in +def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)), + (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; + def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)), (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)), (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -// We handled -(a + b*c) for FNMADD above, now it's time for "(-a) + (-b)*c" and -// "(-a) + b*(-c)". +// Now it's time for "(-a) + (-b)*c" + +let Predicates = [HasNEON, HasFullFP16] in +def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))), + (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; + def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))), (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; +// And here "(-a) + b*(-c)" + +let Predicates = [HasNEON, HasFullFP16] in +def : Pat<(f16 (fma FPR16:$Rn, (fneg FPR16:$Rm), (fneg FPR16:$Ra))), + (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; + def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))), (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; @@ -6939,5 +7005,124 @@ def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>; def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>; +// Extracting lane zero is a special case where we can just use a plain +// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the +// rest of the compiler, especially the register allocator and copy propagation, +// to reason about, so is preferred when it's possible to use it. +let AddedComplexity = 10 in { + def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>; + def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>; + def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>; +} + +// dot_v4i8 +class mul_v4i8 : + PatFrag<(ops node:$Rn, node:$Rm, node:$offset), + (mul (ldop (add node:$Rn, node:$offset)), + (ldop (add node:$Rm, node:$offset)))>; +class mulz_v4i8 : + PatFrag<(ops node:$Rn, node:$Rm), + (mul (ldop node:$Rn), (ldop node:$Rm))>; + +def load_v4i8 : + OutPatFrag<(ops node:$R), + (INSERT_SUBREG + (v2i32 (IMPLICIT_DEF)), + (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)), + ssub)>; + +class dot_v4i8 : + Pat<(i32 (add (mul_v4i8 GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)), + (add (mul_v4i8 GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)), + (add (mul_v4i8 GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)), + (mulz_v4i8 GPR64sp:$Rn, GPR64sp:$Rm))))), + (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR), + (load_v4i8 GPR64sp:$Rn), + (load_v4i8 GPR64sp:$Rm))), + sub_32)>, Requires<[HasDotProd]>; + +// dot_v8i8 +class ee_v8i8 : + PatFrag<(ops node:$V, node:$K), + (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>; + +class mul_v8i8 : + PatFrag<(ops node:$M, node:$N, node:$K), + (mulop (v4i16 (ee_v8i8 node:$M, node:$K)), + (v4i16 (ee_v8i8 node:$N, node:$K)))>; + +class idot_v8i8 : + PatFrag<(ops node:$M, node:$N), + (i32 (extractelt + (v4i32 (AArch64uaddv + (add (mul_v8i8 node:$M, node:$N, (i64 0)), + (mul_v8i8 node:$M, node:$N, (i64 4))))), + (i64 0)))>; + +// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm +def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>; + +class odot_v8i8 : + OutPatFrag<(ops node:$Vm, node:$Vn), + (EXTRACT_SUBREG + (VADDV_32 + (i64 (DOT (DUPv2i32gpr WZR), + (v8i8 node:$Vm), + (v8i8 node:$Vn)))), + sub_32)>; + +class dot_v8i8 : + Pat<(idot_v8i8 V64:$Vm, V64:$Vn), + (odot_v8i8 V64:$Vm, V64:$Vn)>, + Requires<[HasDotProd]>; + +// dot_v16i8 +class ee_v16i8 : + PatFrag<(ops node:$V, node:$K1, node:$K2), + (v4i16 (extract_subvector + (v8i16 (extend + (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>; + +class mul_v16i8 : + PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2), + (v4i32 + (mulop (v4i16 (ee_v16i8 node:$M, node:$K1, node:$K2)), + (v4i16 (ee_v16i8 node:$N, node:$K1, node:$K2))))>; + +class idot_v16i8 : + PatFrag<(ops node:$M, node:$N), + (i32 (extractelt + (v4i32 (AArch64uaddv + (add + (add (mul_v16i8 node:$M, node:$N, (i64 0), (i64 0)), + (mul_v16i8 node:$M, node:$N, (i64 8), (i64 0))), + (add (mul_v16i8 node:$M, node:$N, (i64 0), (i64 4)), + (mul_v16i8 node:$M, node:$N, (i64 8), (i64 4)))))), + (i64 0)))>; + +class odot_v16i8 : + OutPatFrag<(ops node:$Vm, node:$Vn), + (i32 (ADDVv4i32v + (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>; + +class dot_v16i8 : + Pat<(idot_v16i8 V128:$Vm, V128:$Vn), + (odot_v16i8 V128:$Vm, V128:$Vn)>, + Requires<[HasDotProd]>; + +let AddedComplexity = 10 in { + def : dot_v4i8; + def : dot_v4i8; + def : dot_v8i8; + def : dot_v8i8; + def : dot_v16i8; + def : dot_v16i8; + + // FIXME: add patterns to generate vector by element dot product. + // FIXME: add SVE dot-product patterns. +} + include "AArch64InstrAtomics.td" include "AArch64SVEInstrInfo.td" diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp index 4e13fb8e202..961f38cad1e 100644 --- a/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -51,9 +51,19 @@ public: const AArch64Subtarget &STI, const AArch64RegisterBankInfo &RBI); - bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override; + bool select(MachineInstr &I) override; static const char *getName() { return DEBUG_TYPE; } + void setupMF(MachineFunction &MF, GISelKnownBits &KB, + CodeGenCoverage &CoverageInfo) override { + InstructionSelector::setupMF(MF, KB, CoverageInfo); + + // hasFnAttribute() is expensive to call on every BRCOND selection, so + // cache it here for each run of the selector. + ProduceNonFlagSettingCondBr = + !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); + } + private: /// tblgen-erated 'select' implementation, used as the initial selector for /// the patterns that don't require complex C++. @@ -68,6 +78,10 @@ private: bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; + /// Eliminate same-sized cross-bank copies into stores before selectImpl(). + void contractCrossBankCopyIntoStore(MachineInstr &I, + MachineRegisterInfo &MRI) const; + bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const; bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, @@ -101,8 +115,6 @@ private: bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; - void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI, - SmallVectorImpl> &Idxs) const; bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const; @@ -116,6 +128,7 @@ private: bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const; unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const; MachineInstr *emitLoadFromConstantPool(Constant *CPVal, @@ -128,6 +141,8 @@ private: MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, MachineIRBuilder &MIRBuilder) const; + MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const; MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; MachineInstr *emitTST(const Register &LHS, const Register &RHS, @@ -155,7 +170,9 @@ private: ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const; ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const; + ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const; ComplexRendererFns selectArithImmed(MachineOperand &Root) const; + ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const; ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, unsigned Size) const; @@ -183,11 +200,48 @@ private: return selectAddrModeIndexed(Root, Width / 8); } + bool isWorthFoldingIntoExtendedReg(MachineInstr &MI, + const MachineRegisterInfo &MRI) const; + ComplexRendererFns + selectAddrModeShiftedExtendXReg(MachineOperand &Root, + unsigned SizeInBytes) const; + ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; + ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, + unsigned SizeInBytes) const; + template + ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const { + return selectAddrModeXRO(Root, Width / 8); + } + + ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const; + + ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const { + return selectShiftedRegister(Root); + } + + ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const { + // TODO: selectShiftedRegister should allow for rotates on logical shifts. + // For now, make them the same. The only difference between the two is that + // logical shifts are allowed to fold in rotates. Otherwise, these are + // functionally the same. + return selectShiftedRegister(Root); + } + + /// Instructions that accept extend modifiers like UXTW expect the register + /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a + /// subregister copy if necessary. Return either ExtReg, or the result of the + /// new copy. + Register narrowExtendRegIfNeeded(Register ExtReg, + MachineIRBuilder &MIB) const; + ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; + void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const; + void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I) const; + void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I) const; // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. void materializeLargeCMVal(MachineInstr &I, const Value *V, - unsigned char OpFlags) const; + unsigned OpFlags) const; // Optimization methods. bool tryOptVectorShuffle(MachineInstr &I) const; @@ -197,12 +251,22 @@ private: MachineOperand &Predicate, MachineIRBuilder &MIRBuilder) const; + /// Return true if \p MI is a load or store of \p NumBytes bytes. + bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; + + /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit + /// register zeroed out. In other words, the result of MI has been explicitly + /// zero extended. + bool isDef32(const MachineInstr &MI) const; + const AArch64TargetMachine &TM; const AArch64Subtarget &STI; const AArch64InstrInfo &TII; const AArch64RegisterInfo &TRI; const AArch64RegisterBankInfo &RBI; + bool ProduceNonFlagSettingCondBr = false; + #define GET_GLOBALISEL_PREDICATES_DECL #include "AArch64GenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATES_DECL @@ -312,7 +376,7 @@ static bool getSubRegForClass(const TargetRegisterClass *RC, SubReg = AArch64::hsub; break; case 32: - if (RC == &AArch64::GPR32RegClass) + if (RC != &AArch64::FPR32RegClass) SubReg = AArch64::sub_32; else SubReg = AArch64::ssub; @@ -357,7 +421,7 @@ static bool unsupportedBinOp(const MachineInstr &I, // so, this will need to be taught about that, and we'll need to get the // bank out of the minimal class for the register. // Either way, this needs to be documented (and possibly verified). - if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (!Register::isVirtualRegister(MO.getReg())) { LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n"); return true; } @@ -492,8 +556,8 @@ static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) { - const unsigned DstReg = I.getOperand(0).getReg(); - const unsigned SrcReg = I.getOperand(1).getReg(); + const Register DstReg = I.getOperand(0).getReg(); + const Register SrcReg = I.getOperand(1).getReg(); const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); @@ -502,7 +566,7 @@ static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, (DstSize == SrcSize || // Copies are a mean to setup initial types, the number of // bits may not exactly match. - (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || + (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || // Copies are a mean to copy bits around, as long as we are // on the same register class, that's fine. Otherwise, that // means we need some SUBREG_TO_REG or AND & co. @@ -526,7 +590,7 @@ static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, /// SubRegCopy (To class) = COPY CopyReg:SubReg /// Dst = COPY SubRegCopy static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI, - const RegisterBankInfo &RBI, unsigned SrcReg, + const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *From, const TargetRegisterClass *To, unsigned SubReg) { @@ -539,7 +603,7 @@ static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI, // It's possible that the destination register won't be constrained. Make // sure that happens. - if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg())) + if (!Register::isPhysicalRegister(I.getOperand(0).getReg())) RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI); return true; @@ -553,8 +617,8 @@ static std::pair getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) { - unsigned DstReg = I.getOperand(0).getReg(); - unsigned SrcReg = I.getOperand(1).getReg(); + Register DstReg = I.getOperand(0).getReg(); + Register SrcReg = I.getOperand(1).getReg(); const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); @@ -579,8 +643,8 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) { - unsigned DstReg = I.getOperand(0).getReg(); - unsigned SrcReg = I.getOperand(1).getReg(); + Register DstReg = I.getOperand(0).getReg(); + Register SrcReg = I.getOperand(1).getReg(); const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); @@ -607,11 +671,10 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, // result. auto CheckCopy = [&]() { // If we have a bitcast or something, we can't have physical registers. - assert( - (I.isCopy() || - (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) && - !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) && - "No phys reg on generic operator!"); + assert((I.isCopy() || + (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && + !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && + "No phys reg on generic operator!"); assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI)); (void)KnownValid; return true; @@ -626,38 +689,38 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, return false; } - // Is this a cross-bank copy? - if (DstRegBank.getID() != SrcRegBank.getID()) { - // If we're doing a cross-bank copy on different-sized registers, we need - // to do a bit more work. - unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); - unsigned DstSize = TRI.getRegSizeInBits(*DstRC); + unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); + unsigned DstSize = TRI.getRegSizeInBits(*DstRC); - if (SrcSize > DstSize) { - // We're doing a cross-bank copy into a smaller register. We need a - // subregister copy. First, get a register class that's on the same bank - // as the destination, but the same size as the source. - const TargetRegisterClass *SubregRC = - getMinClassForRegBank(DstRegBank, SrcSize, true); - assert(SubregRC && "Didn't get a register class for subreg?"); + // If we're doing a cross-bank copy on different-sized registers, we need + // to do a bit more work. + if (SrcSize > DstSize) { + // We're doing a cross-bank copy into a smaller register. We need a + // subregister copy. First, get a register class that's on the same bank + // as the destination, but the same size as the source. + const TargetRegisterClass *SubregRC = + getMinClassForRegBank(DstRegBank, SrcSize, true); + assert(SubregRC && "Didn't get a register class for subreg?"); - // Get the appropriate subregister for the destination. - unsigned SubReg = 0; - if (!getSubRegForClass(DstRC, TRI, SubReg)) { - LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n"); - return false; - } - - // Now, insert a subregister copy using the new register class. - selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg); - return CheckCopy(); + // Get the appropriate subregister for the destination. + unsigned SubReg = 0; + if (!getSubRegForClass(DstRC, TRI, SubReg)) { + LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n"); + return false; } - else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 && - SrcSize == 16) { + // Now, insert a subregister copy using the new register class. + selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg); + return CheckCopy(); + } + + // Is this a cross-bank copy? + if (DstRegBank.getID() != SrcRegBank.getID()) { + if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 && + SrcSize == 16) { // Special case for FPR16 to GPR32. // FIXME: This can probably be generalized like the above case. - unsigned PromoteReg = + Register PromoteReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG), PromoteReg) @@ -674,7 +737,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, // If the destination is a physical register, then there's nothing to // change, so we're done. - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + if (Register::isPhysicalRegister(DstReg)) return CheckCopy(); } @@ -955,7 +1018,9 @@ bool AArch64InstructionSelector::selectVectorSHL( return false; unsigned Opc = 0; - if (Ty == LLT::vector(4, 32)) { + if (Ty == LLT::vector(2, 64)) { + Opc = AArch64::USHLv2i64; + } else if (Ty == LLT::vector(4, 32)) { Opc = AArch64::USHLv4i32; } else if (Ty == LLT::vector(2, 32)) { Opc = AArch64::USHLv2i32; @@ -989,7 +1054,11 @@ bool AArch64InstructionSelector::selectVectorASHR( unsigned Opc = 0; unsigned NegOpc = 0; const TargetRegisterClass *RC = nullptr; - if (Ty == LLT::vector(4, 32)) { + if (Ty == LLT::vector(2, 64)) { + Opc = AArch64::SSHLv2i64; + NegOpc = AArch64::NEGv2i64; + RC = &AArch64::FPR128RegClass; + } else if (Ty == LLT::vector(4, 32)) { Opc = AArch64::SSHLv4i32; NegOpc = AArch64::NEGv4i32; RC = &AArch64::FPR128RegClass; @@ -1044,7 +1113,7 @@ bool AArch64InstructionSelector::selectVaStartDarwin( } void AArch64InstructionSelector::materializeLargeCMVal( - MachineInstr &I, const Value *V, unsigned char OpFlags) const { + MachineInstr &I, const Value *V, unsigned OpFlags) const { MachineBasicBlock &MBB = *I.getParent(); MachineFunction &MF = *MBB.getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -1097,8 +1166,8 @@ void AArch64InstructionSelector::preISelLower(MachineInstr &I) const { // some reason we receive input GMIR that has an s64 shift amount that's not // a G_CONSTANT, insert a truncate so that we can still select the s32 // register-register variant. - unsigned SrcReg = I.getOperand(1).getReg(); - unsigned ShiftReg = I.getOperand(2).getReg(); + Register SrcReg = I.getOperand(1).getReg(); + Register ShiftReg = I.getOperand(2).getReg(); const LLT ShiftTy = MRI.getType(ShiftReg); const LLT SrcTy = MRI.getType(SrcReg); if (SrcTy.isVector()) @@ -1118,6 +1187,9 @@ void AArch64InstructionSelector::preISelLower(MachineInstr &I) const { } return; } + case TargetOpcode::G_STORE: + contractCrossBankCopyIntoStore(I, MRI); + return; default: return; } @@ -1158,6 +1230,48 @@ bool AArch64InstructionSelector::earlySelectSHL( return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); } +void AArch64InstructionSelector::contractCrossBankCopyIntoStore( + MachineInstr &I, MachineRegisterInfo &MRI) const { + assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE"); + // If we're storing a scalar, it doesn't matter what register bank that + // scalar is on. All that matters is the size. + // + // So, if we see something like this (with a 32-bit scalar as an example): + // + // %x:gpr(s32) = ... something ... + // %y:fpr(s32) = COPY %x:gpr(s32) + // G_STORE %y:fpr(s32) + // + // We can fix this up into something like this: + // + // G_STORE %x:gpr(s32) + // + // And then continue the selection process normally. + MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI); + if (!Def) + return; + Register DefDstReg = Def->getOperand(0).getReg(); + LLT DefDstTy = MRI.getType(DefDstReg); + Register StoreSrcReg = I.getOperand(0).getReg(); + LLT StoreSrcTy = MRI.getType(StoreSrcReg); + + // If we get something strange like a physical register, then we shouldn't + // go any further. + if (!DefDstTy.isValid()) + return; + + // Are the source and dst types the same size? + if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) + return; + + if (RBI.getRegBank(StoreSrcReg, MRI, TRI) == + RBI.getRegBank(DefDstReg, MRI, TRI)) + return; + + // We have a cross-bank copy, which is entering a store. Let's fold it. + I.getOperand(0).setReg(DefDstReg); +} + bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -1169,13 +1283,37 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const { switch (I.getOpcode()) { case TargetOpcode::G_SHL: return earlySelectSHL(I, MRI); + case TargetOpcode::G_CONSTANT: { + bool IsZero = false; + if (I.getOperand(1).isCImm()) + IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0; + else if (I.getOperand(1).isImm()) + IsZero = I.getOperand(1).getImm() == 0; + + if (!IsZero) + return false; + + Register DefReg = I.getOperand(0).getReg(); + LLT Ty = MRI.getType(DefReg); + if (Ty != LLT::scalar(64) && Ty != LLT::scalar(32)) + return false; + + if (Ty == LLT::scalar(64)) { + I.getOperand(1).ChangeToRegister(AArch64::XZR, false); + RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); + } else { + I.getOperand(1).ChangeToRegister(AArch64::WZR, false); + RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI); + } + I.setDesc(TII.get(TargetOpcode::COPY)); + return true; + } default: return false; } } -bool AArch64InstructionSelector::select(MachineInstr &I, - CodeGenCoverage &CoverageInfo) const { +bool AArch64InstructionSelector::select(MachineInstr &I) { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -1244,7 +1382,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I, if (earlySelect(I)) return true; - if (selectImpl(I, CoverageInfo)) + if (selectImpl(I, *CoverageInfo)) return true; LLT Ty = @@ -1439,14 +1577,43 @@ bool AArch64InstructionSelector::select(MachineInstr &I, return true; } case TargetOpcode::G_EXTRACT: { - LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); - LLT DstTy = MRI.getType(I.getOperand(0).getReg()); + Register DstReg = I.getOperand(0).getReg(); + Register SrcReg = I.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + LLT DstTy = MRI.getType(DstReg); (void)DstTy; unsigned SrcSize = SrcTy.getSizeInBits(); - // Larger extracts are vectors, same-size extracts should be something else - // by now (either split up or simplified to a COPY). - if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32) - return false; + + if (SrcTy.getSizeInBits() > 64) { + // This should be an extract of an s128, which is like a vector extract. + if (SrcTy.getSizeInBits() != 128) + return false; + // Only support extracting 64 bits from an s128 at the moment. + if (DstTy.getSizeInBits() != 64) + return false; + + const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); + const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); + // Check we have the right regbank always. + assert(SrcRB.getID() == AArch64::FPRRegBankID && + DstRB.getID() == AArch64::FPRRegBankID && + "Wrong extract regbank!"); + (void)SrcRB; + + // Emit the same code as a vector extract. + // Offset must be a multiple of 64. + unsigned Offset = I.getOperand(2).getImm(); + if (Offset % 64 != 0) + return false; + unsigned LaneIdx = Offset / 64; + MachineIRBuilder MIB(I); + MachineInstr *Extract = emitExtractVectorElt( + DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB); + if (!Extract) + return false; + I.eraseFromParent(); + return true; + } I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri)); MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() + @@ -1458,7 +1625,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I, return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } - Register DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); + DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) .addReg(DstReg, 0, AArch64::sub_32); @@ -1521,11 +1688,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I, case TargetOpcode::G_GLOBAL_VALUE: { auto GV = I.getOperand(1).getGlobal(); - if (GV->isThreadLocal()) { - // FIXME: we don't support TLS yet. - return false; - } - unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM); + if (GV->isThreadLocal()) + return selectTLSGlobalValue(I, MRI); + + unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM); if (OpFlags & AArch64II::MO_GOT) { I.setDesc(TII.get(AArch64::LOADgot)); I.getOperand(1).setTargetFlags(OpFlags); @@ -1562,8 +1728,15 @@ bool AArch64InstructionSelector::select(MachineInstr &I, } auto &MemOp = **I.memoperands_begin(); - if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) { - LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n"); + if (MemOp.isAtomic()) { + // For now we just support s8 acquire loads to be able to compile stack + // protector code. + if (MemOp.getOrdering() == AtomicOrdering::Acquire && + MemOp.getSize() == 1) { + I.setDesc(TII.get(AArch64::LDARB)); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + } + LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n"); return false; } unsigned MemSizeInBits = MemOp.getSize() * 8; @@ -1598,7 +1771,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I, const unsigned Size = MemSizeInBits / 8; const unsigned Scale = Log2_32(Size); if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) { - unsigned Ptr2Reg = PtrMI->getOperand(1).getReg(); + Register Ptr2Reg = PtrMI->getOperand(1).getReg(); I.getOperand(1).setReg(Ptr2Reg); PtrMI = MRI.getVRegDef(Ptr2Reg); Offset = Imm / Size; @@ -1688,8 +1861,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I, return selectVectorSHL(I, MRI); LLVM_FALLTHROUGH; case TargetOpcode::G_OR: - case TargetOpcode::G_LSHR: - case TargetOpcode::G_GEP: { + case TargetOpcode::G_LSHR: { // Reject the various things we don't support yet. if (unsupportedBinOp(I, RBI, MRI, TRI)) return false; @@ -1711,6 +1883,13 @@ bool AArch64InstructionSelector::select(MachineInstr &I, return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } + case TargetOpcode::G_GEP: { + MachineIRBuilder MIRBuilder(I); + emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), + MIRBuilder); + I.eraseFromParent(); + return true; + } case TargetOpcode::G_UADDO: { // TODO: Support other types. unsigned OpSize = Ty.getSizeInBits(); @@ -1816,6 +1995,16 @@ bool AArch64InstructionSelector::select(MachineInstr &I, constrainSelectedInstRegOperands(I, TII, TRI, RBI); return true; } + + if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) { + MachineIRBuilder MIB(I); + MachineInstr *Extract = emitExtractVectorElt( + DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB); + if (!Extract) + return false; + I.eraseFromParent(); + return true; + } } return false; @@ -1868,21 +2057,41 @@ bool AArch64InstructionSelector::select(MachineInstr &I, case TargetOpcode::G_ZEXT: case TargetOpcode::G_SEXT: { unsigned Opcode = I.getOpcode(); - const LLT DstTy = MRI.getType(I.getOperand(0).getReg()), - SrcTy = MRI.getType(I.getOperand(1).getReg()); - const bool isSigned = Opcode == TargetOpcode::G_SEXT; + const bool IsSigned = Opcode == TargetOpcode::G_SEXT; const Register DefReg = I.getOperand(0).getReg(); const Register SrcReg = I.getOperand(1).getReg(); - const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); + const LLT DstTy = MRI.getType(DefReg); + const LLT SrcTy = MRI.getType(SrcReg); + unsigned DstSize = DstTy.getSizeInBits(); + unsigned SrcSize = SrcTy.getSizeInBits(); - if (RB.getID() != AArch64::GPRRegBankID) { - LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB - << ", expected: GPR\n"); - return false; + assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == + AArch64::GPRRegBankID && + "Unexpected ext regbank"); + + MachineIRBuilder MIB(I); + MachineInstr *ExtI; + if (DstTy.isVector()) + return false; // Should be handled by imported patterns. + + // First check if we're extending the result of a load which has a dest type + // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest + // GPR register on AArch64 and all loads which are smaller automatically + // zero-extend the upper bits. E.g. + // %v(s8) = G_LOAD %p, :: (load 1) + // %v2(s32) = G_ZEXT %v(s8) + if (!IsSigned) { + auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); + if (LoadMI && + RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID) { + const MachineMemOperand *MemOp = *LoadMI->memoperands_begin(); + unsigned BytesLoaded = MemOp->getSize(); + if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded) + return selectCopy(I, TII, MRI, TRI, RBI); + } } - MachineInstr *ExtI; - if (DstTy == LLT::scalar(64)) { + if (DstSize == 64) { // FIXME: Can we avoid manually doing this? if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) { LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode) @@ -1890,33 +2099,26 @@ bool AArch64InstructionSelector::select(MachineInstr &I, return false; } - const Register SrcXReg = - MRI.createVirtualRegister(&AArch64::GPR64RegClass); - BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) - .addDef(SrcXReg) - .addImm(0) - .addUse(SrcReg) - .addImm(AArch64::sub_32); + auto SubregToReg = + MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {}) + .addImm(0) + .addUse(SrcReg) + .addImm(AArch64::sub_32); - const unsigned NewOpc = isSigned ? AArch64::SBFMXri : AArch64::UBFMXri; - ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc)) - .addDef(DefReg) - .addUse(SrcXReg) - .addImm(0) - .addImm(SrcTy.getSizeInBits() - 1); - } else if (DstTy.isScalar() && DstTy.getSizeInBits() <= 32) { - const unsigned NewOpc = isSigned ? AArch64::SBFMWri : AArch64::UBFMWri; - ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc)) - .addDef(DefReg) - .addUse(SrcReg) - .addImm(0) - .addImm(SrcTy.getSizeInBits() - 1); + ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, + {DefReg}, {SubregToReg}) + .addImm(0) + .addImm(SrcSize - 1); + } else if (DstSize <= 32) { + ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, + {DefReg}, {SrcReg}) + .addImm(0) + .addImm(SrcSize - 1); } else { return false; } constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); - I.eraseFromParent(); return true; } @@ -2163,6 +2365,37 @@ bool AArch64InstructionSelector::selectJumpTable( return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); } +bool AArch64InstructionSelector::selectTLSGlobalValue( + MachineInstr &I, MachineRegisterInfo &MRI) const { + if (!STI.isTargetMachO()) + return false; + MachineFunction &MF = *I.getParent()->getParent(); + MF.getFrameInfo().setAdjustsStack(true); + + const GlobalValue &GV = *I.getOperand(1).getGlobal(); + MachineIRBuilder MIB(I); + + MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {}) + .addGlobalAddress(&GV, 0, AArch64II::MO_TLS); + + auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass}, + {Register(AArch64::X0)}) + .addImm(0); + + // TLS calls preserve all registers except those that absolutely must be + // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be + // silly). + MIB.buildInstr(AArch64::BLR, {}, {Load}) + .addDef(AArch64::X0, RegState::Implicit) + .addRegMask(TRI.getTLSCallPreservedMask()); + + MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0)); + RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass, + MRI); + I.eraseFromParent(); + return true; +} + bool AArch64InstructionSelector::selectIntrinsicTrunc( MachineInstr &I, MachineRegisterInfo &MRI) const { const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); @@ -2478,16 +2711,40 @@ bool AArch64InstructionSelector::selectMergeValues( const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation"); + const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); - // At the moment we only support merging two s32s into an s64. if (I.getNumOperands() != 3) return false; - if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) - return false; - const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); + + // Merging 2 s64s into an s128. + if (DstTy == LLT::scalar(128)) { + if (SrcTy.getSizeInBits() != 64) + return false; + MachineIRBuilder MIB(I); + Register DstReg = I.getOperand(0).getReg(); + Register Src1Reg = I.getOperand(1).getReg(); + Register Src2Reg = I.getOperand(2).getReg(); + auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {}); + MachineInstr *InsMI = + emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB); + if (!InsMI) + return false; + MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(), + Src2Reg, /* LaneIdx */ 1, RB, MIB); + if (!Ins2MI) + return false; + constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); + constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI); + I.eraseFromParent(); + return true; + } + if (RB.getID() != AArch64::GPRRegBankID) return false; + if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) + return false; + auto *DstRC = &AArch64::GPR64RegClass; Register SubToRegDef = MRI.createVirtualRegister(DstRC); MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(), @@ -2695,7 +2952,8 @@ bool AArch64InstructionSelector::selectUnmergeValues( const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); const LLT WideTy = MRI.getType(SrcReg); (void)WideTy; - assert(WideTy.isVector() && "can only unmerge from vector types!"); + assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) && + "can only unmerge from vector or s128 types!"); assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && "source register size too small!"); @@ -2802,29 +3060,6 @@ bool AArch64InstructionSelector::selectConcatVectors( return true; } -void AArch64InstructionSelector::collectShuffleMaskIndices( - MachineInstr &I, MachineRegisterInfo &MRI, - SmallVectorImpl> &Idxs) const { - MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg()); - assert( - MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR && - "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR"); - // Find the constant indices. - for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) { - // Look through copies. - MachineInstr *ScalarDef = - getDefIgnoringCopies(MaskDef->getOperand(i).getReg(), MRI); - assert(ScalarDef && "Could not find vreg def of shufflevec index op"); - if (ScalarDef->getOpcode() != TargetOpcode::G_CONSTANT) { - // This be an undef if not a constant. - assert(ScalarDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF); - Idxs.push_back(None); - } else { - Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue()); - } - } -} - unsigned AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const { @@ -2905,6 +3140,31 @@ getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { return std::make_pair(Opc, SubregIdx); } +MachineInstr * +AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS, + MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const { + assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); + MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); + static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri}, + {AArch64::ADDWrr, AArch64::ADDWri}}; + bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32; + auto ImmFns = selectArithImmed(RHS); + unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()]; + auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS.getReg()}); + + // If we matched a valid constant immediate, add those operands. + if (ImmFns) { + for (auto &RenderFn : *ImmFns) + RenderFn(AddMI); + } else { + AddMI.addUse(RHS.getReg()); + } + + constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI); + return &*AddMI; +} + MachineInstr * AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const { @@ -3151,7 +3411,7 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const { // Can't see past copies from physregs. if (Opc == TargetOpcode::COPY && - TargetRegisterInfo::isPhysicalRegister(CondDef->getOperand(1).getReg())) + Register::isPhysicalRegister(CondDef->getOperand(1).getReg())) return false; CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg()); @@ -3342,16 +3602,9 @@ bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const { return false; // The shuffle's second operand doesn't matter if the mask is all zero. - auto *ZeroVec = getOpcodeDef(G_BUILD_VECTOR, I.getOperand(3).getReg(), MRI); - if (!ZeroVec) + const Constant *Mask = I.getOperand(3).getShuffleMask(); + if (!isa(Mask)) return false; - int64_t Zero = 0; - if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero) - return false; - for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; i < e; ++i) { - if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg()) - return false; // This wasn't an all zeros vector. - } // We're done, now find out what kind of splat we need. LLT VecTy = MRI.getType(I.getOperand(0).getReg()); @@ -3399,19 +3652,14 @@ bool AArch64InstructionSelector::selectShuffleVector( const LLT Src1Ty = MRI.getType(Src1Reg); Register Src2Reg = I.getOperand(2).getReg(); const LLT Src2Ty = MRI.getType(Src2Reg); + const Constant *ShuffleMask = I.getOperand(3).getShuffleMask(); MachineBasicBlock &MBB = *I.getParent(); MachineFunction &MF = *MBB.getParent(); LLVMContext &Ctx = MF.getFunction().getContext(); - // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask - // operand, it comes in as a normal vector value which we have to analyze to - // find the mask indices. If the mask element is undef, then - // collectShuffleMaskIndices() will add a None entry for that index into - // the list. - SmallVector, 8> Mask; - collectShuffleMaskIndices(I, MRI, Mask); - assert(!Mask.empty() && "Expected to find mask indices"); + SmallVector Mask; + ShuffleVectorInst::getShuffleMask(ShuffleMask, Mask); // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if // it's originated from a <1 x T> type. Those should have been lowered into @@ -3424,10 +3672,10 @@ bool AArch64InstructionSelector::selectShuffleVector( unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; SmallVector CstIdxs; - for (auto &MaybeVal : Mask) { + for (int Val : Mask) { // For now, any undef indexes we'll just assume to be 0. This should be // optimized in future, e.g. to select DUP etc. - int Val = MaybeVal.hasValue() ? *MaybeVal : 0; + Val = Val < 0 ? 0 : Val; for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { unsigned Offset = Byte + Val * BytesPerElt; CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset)); @@ -3684,21 +3932,6 @@ static unsigned findIntrinsicID(MachineInstr &I) { return IntrinOp->getIntrinsicID(); } -/// Helper function to emit the correct opcode for a llvm.aarch64.stlxr -/// intrinsic. -static unsigned getStlxrOpcode(unsigned NumBytesToStore) { - switch (NumBytesToStore) { - // TODO: 1, 2, and 4 byte stores. - case 8: - return AArch64::STLXRX; - default: - LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! (" - << NumBytesToStore << ")\n"); - break; - } - return 0; -} - bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( MachineInstr &I, MachineRegisterInfo &MRI) const { // Find the intrinsic ID. @@ -3719,32 +3952,6 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( return false; MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000); break; - case Intrinsic::aarch64_stlxr: - Register StatReg = I.getOperand(0).getReg(); - assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 && - "Status register must be 32 bits!"); - Register SrcReg = I.getOperand(2).getReg(); - - if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) { - LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n"); - return false; - } - - Register PtrReg = I.getOperand(3).getReg(); - assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand"); - - // Expect only one memory operand. - if (!I.hasOneMemOperand()) - return false; - - const MachineMemOperand *MemOp = *I.memoperands_begin(); - unsigned NumBytesToStore = MemOp->getSize(); - unsigned Opc = getStlxrOpcode(NumBytesToStore); - if (!Opc) - return false; - - auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg}); - constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI); } I.eraseFromParent(); @@ -3860,22 +4067,15 @@ AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const { return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; } -/// SelectArithImmed - Select an immediate value that can be represented as -/// a 12-bit value shifted left by either 0 or 12. If so, return true with -/// Val set to the 12-bit value and Shift set to the shifter operand. +/// Helper to select an immediate value that can be represented as a 12-bit +/// value shifted left by either 0 or 12. If it is possible to do so, return +/// the immediate and shift value. If not, return None. +/// +/// Used by selectArithImmed and selectNegArithImmed. InstructionSelector::ComplexRendererFns -AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { - // This function is called from the addsub_shifted_imm ComplexPattern, - // which lists [imm] as the list of opcode it's interested in, however - // we still need to check whether the operand is actually an immediate - // here because the ComplexPattern opcode list is only used in - // root-level opcode matching. - auto MaybeImmed = getImmedFromMO(Root); - if (MaybeImmed == None) - return None; - uint64_t Immed = *MaybeImmed; +AArch64InstructionSelector::select12BitValueWithLeftShift( + uint64_t Immed) const { unsigned ShiftAmt; - if (Immed >> 12 == 0) { ShiftAmt = 0; } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { @@ -3891,6 +4091,244 @@ AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { }}; } +/// SelectArithImmed - Select an immediate value that can be represented as +/// a 12-bit value shifted left by either 0 or 12. If so, return true with +/// Val set to the 12-bit value and Shift set to the shifter operand. +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { + // This function is called from the addsub_shifted_imm ComplexPattern, + // which lists [imm] as the list of opcode it's interested in, however + // we still need to check whether the operand is actually an immediate + // here because the ComplexPattern opcode list is only used in + // root-level opcode matching. + auto MaybeImmed = getImmedFromMO(Root); + if (MaybeImmed == None) + return None; + return select12BitValueWithLeftShift(*MaybeImmed); +} + +/// SelectNegArithImmed - As above, but negates the value before trying to +/// select it. +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const { + // We need a register here, because we need to know if we have a 64 or 32 + // bit immediate. + if (!Root.isReg()) + return None; + auto MaybeImmed = getImmedFromMO(Root); + if (MaybeImmed == None) + return None; + uint64_t Immed = *MaybeImmed; + + // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" + // have the opposite effect on the C flag, so this pattern mustn't match under + // those circumstances. + if (Immed == 0) + return None; + + // Check if we're dealing with a 32-bit type on the root or a 64-bit type on + // the root. + MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); + if (MRI.getType(Root.getReg()).getSizeInBits() == 32) + Immed = ~((uint32_t)Immed) + 1; + else + Immed = ~Immed + 1ULL; + + if (Immed & 0xFFFFFFFFFF000000ULL) + return None; + + Immed &= 0xFFFFFFULL; + return select12BitValueWithLeftShift(Immed); +} + +/// Return true if it is worth folding MI into an extended register. That is, +/// if it's safe to pull it into the addressing mode of a load or store as a +/// shift. +bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( + MachineInstr &MI, const MachineRegisterInfo &MRI) const { + // Always fold if there is one use, or if we're optimizing for size. + Register DefReg = MI.getOperand(0).getReg(); + if (MRI.hasOneUse(DefReg) || + MI.getParent()->getParent()->getFunction().hasMinSize()) + return true; + + // It's better to avoid folding and recomputing shifts when we don't have a + // fastpath. + if (!STI.hasLSLFast()) + return false; + + // We have a fastpath, so folding a shift in and potentially computing it + // many times may be beneficial. Check if this is only used in memory ops. + // If it is, then we should fold. + return all_of(MRI.use_instructions(DefReg), + [](MachineInstr &Use) { return Use.mayLoadOrStore(); }); +} + +/// This is used for computing addresses like this: +/// +/// ldr x1, [x2, x3, lsl #3] +/// +/// Where x2 is the base register, and x3 is an offset register. The shift-left +/// is a constant value specific to this load instruction. That is, we'll never +/// see anything other than a 3 here (which corresponds to the size of the +/// element being loaded.) +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( + MachineOperand &Root, unsigned SizeInBytes) const { + if (!Root.isReg()) + return None; + MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); + + // Make sure that the memory op is a valid size. + int64_t LegalShiftVal = Log2_32(SizeInBytes); + if (LegalShiftVal == 0) + return None; + + // We want to find something like this: + // + // val = G_CONSTANT LegalShiftVal + // shift = G_SHL off_reg val + // ptr = G_GEP base_reg shift + // x = G_LOAD ptr + // + // And fold it into this addressing mode: + // + // ldr x, [base_reg, off_reg, lsl #LegalShiftVal] + + // Check if we can find the G_GEP. + MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_GEP, Root.getReg(), MRI); + if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI)) + return None; + + // Now, try to match an opcode which will match our specific offset. + // We want a G_SHL or a G_MUL. + MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI); + if (!OffsetInst) + return None; + + unsigned OffsetOpc = OffsetInst->getOpcode(); + if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) + return None; + + if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) + return None; + + // Now, try to find the specific G_CONSTANT. Start by assuming that the + // register we will offset is the LHS, and the register containing the + // constant is the RHS. + Register OffsetReg = OffsetInst->getOperand(1).getReg(); + Register ConstantReg = OffsetInst->getOperand(2).getReg(); + auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); + if (!ValAndVReg) { + // We didn't get a constant on the RHS. If the opcode is a shift, then + // we're done. + if (OffsetOpc == TargetOpcode::G_SHL) + return None; + + // If we have a G_MUL, we can use either register. Try looking at the RHS. + std::swap(OffsetReg, ConstantReg); + ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); + if (!ValAndVReg) + return None; + } + + // The value must fit into 3 bits, and must be positive. Make sure that is + // true. + int64_t ImmVal = ValAndVReg->Value; + + // Since we're going to pull this into a shift, the constant value must be + // a power of 2. If we got a multiply, then we need to check this. + if (OffsetOpc == TargetOpcode::G_MUL) { + if (!isPowerOf2_32(ImmVal)) + return None; + + // Got a power of 2. So, the amount we'll shift is the log base-2 of that. + ImmVal = Log2_32(ImmVal); + } + + if ((ImmVal & 0x7) != ImmVal) + return None; + + // We are only allowed to shift by LegalShiftVal. This shift value is built + // into the instruction, so we can't just use whatever we want. + if (ImmVal != LegalShiftVal) + return None; + + // We can use the LHS of the GEP as the base, and the LHS of the shift as an + // offset. Signify that we are shifting by setting the shift flag to 1. + return {{[=](MachineInstrBuilder &MIB) { + MIB.addUse(Gep->getOperand(1).getReg()); + }, + [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); }, + [=](MachineInstrBuilder &MIB) { + // Need to add both immediates here to make sure that they are both + // added to the instruction. + MIB.addImm(0); + MIB.addImm(1); + }}}; +} + +/// This is used for computing addresses like this: +/// +/// ldr x1, [x2, x3] +/// +/// Where x2 is the base register, and x3 is an offset register. +/// +/// When possible (or profitable) to fold a G_GEP into the address calculation, +/// this will do so. Otherwise, it will return None. +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::selectAddrModeRegisterOffset( + MachineOperand &Root) const { + MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); + + // We need a GEP. + MachineInstr *Gep = MRI.getVRegDef(Root.getReg()); + if (!Gep || Gep->getOpcode() != TargetOpcode::G_GEP) + return None; + + // If this is used more than once, let's not bother folding. + // TODO: Check if they are memory ops. If they are, then we can still fold + // without having to recompute anything. + if (!MRI.hasOneUse(Gep->getOperand(0).getReg())) + return None; + + // Base is the GEP's LHS, offset is its RHS. + return {{[=](MachineInstrBuilder &MIB) { + MIB.addUse(Gep->getOperand(1).getReg()); + }, + [=](MachineInstrBuilder &MIB) { + MIB.addUse(Gep->getOperand(2).getReg()); + }, + [=](MachineInstrBuilder &MIB) { + // Need to add both immediates here to make sure that they are both + // added to the instruction. + MIB.addImm(0); + MIB.addImm(0); + }}}; +} + +/// This is intended to be equivalent to selectAddrModeXRO in +/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads. +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, + unsigned SizeInBytes) const { + MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); + + // If we have a constant offset, then we probably don't want to match a + // register offset. + if (isBaseWithConstantOffset(Root, MRI)) + return None; + + // Try to fold shifts into the addressing mode. + auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes); + if (AddrModeFns) + return AddrModeFns; + + // If that doesn't work, see if it's possible to fold in registers from + // a GEP. + return selectAddrModeRegisterOffset(Root); +} + /// Select a "register plus unscaled signed 9-bit immediate" address. This /// should only match when there is an offset that is not valid for a scaled /// immediate addressing mode. The "Size" argument is the size in bytes of the @@ -3994,6 +4432,205 @@ AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, }}; } +/// Given a shift instruction, return the correct shift type for that +/// instruction. +static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) { + // TODO: Handle AArch64_AM::ROR + switch (MI.getOpcode()) { + default: + return AArch64_AM::InvalidShiftExtend; + case TargetOpcode::G_SHL: + return AArch64_AM::LSL; + case TargetOpcode::G_LSHR: + return AArch64_AM::LSR; + case TargetOpcode::G_ASHR: + return AArch64_AM::ASR; + } +} + +/// Select a "shifted register" operand. If the value is not shifted, set the +/// shift operand to a default value of "lsl 0". +/// +/// TODO: Allow shifted register to be rotated in logical instructions. +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const { + if (!Root.isReg()) + return None; + MachineRegisterInfo &MRI = + Root.getParent()->getParent()->getParent()->getRegInfo(); + + // Check if the operand is defined by an instruction which corresponds to + // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc. + // + // TODO: Handle AArch64_AM::ROR for logical instructions. + MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg()); + if (!ShiftInst) + return None; + AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst); + if (ShType == AArch64_AM::InvalidShiftExtend) + return None; + if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI)) + return None; + + // Need an immediate on the RHS. + MachineOperand &ShiftRHS = ShiftInst->getOperand(2); + auto Immed = getImmedFromMO(ShiftRHS); + if (!Immed) + return None; + + // We have something that we can fold. Fold in the shift's LHS and RHS into + // the instruction. + MachineOperand &ShiftLHS = ShiftInst->getOperand(1); + Register ShiftReg = ShiftLHS.getReg(); + + unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits(); + unsigned Val = *Immed & (NumBits - 1); + unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val); + + return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}}; +} + +/// Get the correct ShiftExtendType for an extend instruction. +static AArch64_AM::ShiftExtendType +getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) { + unsigned Opc = MI.getOpcode(); + + // Handle explicit extend instructions first. + if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) { + unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + assert(Size != 64 && "Extend from 64 bits?"); + switch (Size) { + case 8: + return AArch64_AM::SXTB; + case 16: + return AArch64_AM::SXTH; + case 32: + return AArch64_AM::SXTW; + default: + return AArch64_AM::InvalidShiftExtend; + } + } + + if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) { + unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + assert(Size != 64 && "Extend from 64 bits?"); + switch (Size) { + case 8: + return AArch64_AM::UXTB; + case 16: + return AArch64_AM::UXTH; + case 32: + return AArch64_AM::UXTW; + default: + return AArch64_AM::InvalidShiftExtend; + } + } + + // Don't have an explicit extend. Try to handle a G_AND with a constant mask + // on the RHS. + if (Opc != TargetOpcode::G_AND) + return AArch64_AM::InvalidShiftExtend; + + Optional MaybeAndMask = getImmedFromMO(MI.getOperand(2)); + if (!MaybeAndMask) + return AArch64_AM::InvalidShiftExtend; + uint64_t AndMask = *MaybeAndMask; + switch (AndMask) { + default: + return AArch64_AM::InvalidShiftExtend; + case 0xFF: + return AArch64_AM::UXTB; + case 0xFFFF: + return AArch64_AM::UXTH; + case 0xFFFFFFFF: + return AArch64_AM::UXTW; + } +} + +Register AArch64InstructionSelector::narrowExtendRegIfNeeded( + Register ExtReg, MachineIRBuilder &MIB) const { + MachineRegisterInfo &MRI = *MIB.getMRI(); + if (MRI.getType(ExtReg).getSizeInBits() == 32) + return ExtReg; + + // Insert a copy to move ExtReg to GPR32. + Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg}); + + // Select the copy into a subregister copy. + selectCopy(*Copy, TII, MRI, TRI, RBI); + return Copy.getReg(0); +} + +/// Select an "extended register" operand. This operand folds in an extend +/// followed by an optional left shift. +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::selectArithExtendedRegister( + MachineOperand &Root) const { + if (!Root.isReg()) + return None; + MachineRegisterInfo &MRI = + Root.getParent()->getParent()->getParent()->getRegInfo(); + + uint64_t ShiftVal = 0; + Register ExtReg; + AArch64_AM::ShiftExtendType Ext; + MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI); + if (!RootDef) + return None; + + if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI)) + return None; + + // Check if we can fold a shift and an extend. + if (RootDef->getOpcode() == TargetOpcode::G_SHL) { + // Look for a constant on the RHS of the shift. + MachineOperand &RHS = RootDef->getOperand(2); + Optional MaybeShiftVal = getImmedFromMO(RHS); + if (!MaybeShiftVal) + return None; + ShiftVal = *MaybeShiftVal; + if (ShiftVal > 4) + return None; + // Look for a valid extend instruction on the LHS of the shift. + MachineOperand &LHS = RootDef->getOperand(1); + MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI); + if (!ExtDef) + return None; + Ext = getExtendTypeForInst(*ExtDef, MRI); + if (Ext == AArch64_AM::InvalidShiftExtend) + return None; + ExtReg = ExtDef->getOperand(1).getReg(); + } else { + // Didn't get a shift. Try just folding an extend. + Ext = getExtendTypeForInst(*RootDef, MRI); + if (Ext == AArch64_AM::InvalidShiftExtend) + return None; + ExtReg = RootDef->getOperand(1).getReg(); + + // If we have a 32 bit instruction which zeroes out the high half of a + // register, we get an implicit zero extend for free. Check if we have one. + // FIXME: We actually emit the extend right now even though we don't have + // to. + if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) { + MachineInstr *ExtInst = MRI.getVRegDef(ExtReg); + if (ExtInst && isDef32(*ExtInst)) + return None; + } + } + + // We require a GPR32 here. Narrow the ExtReg if needed using a subregister + // copy. + MachineIRBuilder MIB(*RootDef); + ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB); + + return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, + [=](MachineInstrBuilder &MIB) { + MIB.addImm(getArithExtendImm(Ext, ShiftVal)); + }}}; +} + void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const { const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); @@ -4003,6 +4640,51 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, MIB.addImm(CstVal.getValue()); } +void AArch64InstructionSelector::renderLogicalImm32( + MachineInstrBuilder &MIB, const MachineInstr &I) const { + assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); + uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32); + MIB.addImm(Enc); +} + +void AArch64InstructionSelector::renderLogicalImm64( + MachineInstrBuilder &MIB, const MachineInstr &I) const { + assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); + uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64); + MIB.addImm(Enc); +} + +bool AArch64InstructionSelector::isLoadStoreOfNumBytes( + const MachineInstr &MI, unsigned NumBytes) const { + if (!MI.mayLoadOrStore()) + return false; + assert(MI.hasOneMemOperand() && + "Expected load/store to have only one mem op!"); + return (*MI.memoperands_begin())->getSize() == NumBytes; +} + +bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32) + return false; + + // Only return true if we know the operation will zero-out the high half of + // the 64-bit register. Truncates can be subregister copies, which don't + // zero out the high bits. Copies and other copy-like instructions can be + // fed by truncates, or could be lowered as subregister copies. + switch (MI.getOpcode()) { + default: + return true; + case TargetOpcode::COPY: + case TargetOpcode::G_BITCAST: + case TargetOpcode::G_TRUNC: + case TargetOpcode::G_PHI: + return false; + } +} + namespace llvm { InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &TM, diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp index a985b330eaf..7a1901bd5b1 100644 --- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -13,7 +13,9 @@ #include "AArch64LegalizerInfo.h" #include "AArch64Subtarget.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" @@ -50,6 +52,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { const LLT v2s64 = LLT::vector(2, 64); const LLT v2p0 = LLT::vector(2, p0); + // FIXME: support subtargets which have neon/fp-armv8 disabled. + if (!ST.hasNEON() || !ST.hasFPARMv8()) { + computeTables(); + return; + } + getActionDefinitionsBuilder(G_IMPLICIT_DEF) .legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64}) .clampScalar(0, s1, s64) @@ -74,7 +82,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { getActionDefinitionsBuilder(G_BSWAP) .legalFor({s32, s64, v4s32, v2s32, v2s64}) - .clampScalar(0, s16, s64) + .clampScalar(0, s32, s64) .widenScalarToNextPow2(0); getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) @@ -104,6 +112,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { getActionDefinitionsBuilder({G_SDIV, G_UDIV}) .legalFor({s32, s64}) + .libcallFor({s128}) .clampScalar(0, s32, s64) .widenScalarToNextPow2(0) .scalarize(0); @@ -115,8 +124,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && AmtTy.getSizeInBits() == 32; }) - .legalFor( - {{s32, s32}, {s32, s64}, {s64, s64}, {v2s32, v2s32}, {v4s32, v4s32}}) + .legalFor({{s32, s32}, + {s32, s64}, + {s64, s64}, + {v2s32, v2s32}, + {v4s32, v4s32}, + {v2s64, v2s64}}) .clampScalar(1, s32, s64) .clampScalar(0, s32, s64) .minScalarSameAs(1, 0); @@ -191,14 +204,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { .legalIf([=](const LegalityQuery &Query) { const LLT &Ty0 = Query.Types[0]; const LLT &Ty1 = Query.Types[1]; - if (Ty1 != s32 && Ty1 != s64) + if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128) return false; if (Ty1 == p0) return true; return isPowerOf2_32(Ty0.getSizeInBits()) && (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8); }) - .clampScalar(1, s32, s64) + .clampScalar(1, s32, s128) .widenScalarToNextPow2(1) .maxScalarIf(typeInSet(1, {s32}), 0, s16) .maxScalarIf(typeInSet(1, {s64}), 0, s32) @@ -236,6 +249,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { {s32, p0, 32, 8}, {s64, p0, 64, 8}, {p0, p0, 64, 8}, + {s128, p0, 128, 8}, {v8s8, p0, 64, 8}, {v16s8, p0, 128, 8}, {v4s16, p0, 64, 8}, @@ -247,14 +261,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { .legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}}) .clampScalar(0, s8, s64) - .widenScalarToNextPow2(0) - // TODO: We could support sum-of-pow2's but the lowering code doesn't know - // how to do that yet. - .unsupportedIfMemSizeNotPow2() + .lowerIfMemSizeNotPow2() // Lower any any-extending loads left into G_ANYEXT and G_LOAD .lowerIf([=](const LegalityQuery &Query) { return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; }) + .widenScalarToNextPow2(0) .clampMaxNumElements(0, s32, 2) .clampMaxNumElements(0, s64, 1) .customIf(IsPtrVecPred); @@ -262,9 +274,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { getActionDefinitionsBuilder(G_STORE) .legalForTypesWithMemDesc({{s8, p0, 8, 8}, {s16, p0, 16, 8}, + {s32, p0, 8, 8}, + {s32, p0, 16, 8}, {s32, p0, 32, 8}, {s64, p0, 64, 8}, {p0, p0, 64, 8}, + {s128, p0, 128, 8}, {v16s8, p0, 128, 8}, {v4s16, p0, 64, 8}, {v8s16, p0, 128, 8}, @@ -272,10 +287,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { {v4s32, p0, 128, 8}, {v2s64, p0, 128, 8}}) .clampScalar(0, s8, s64) - .widenScalarToNextPow2(0) - // TODO: We could support sum-of-pow2's but the lowering code doesn't know - // how to do that yet. - .unsupportedIfMemSizeNotPow2() + .lowerIfMemSizeNotPow2() .lowerIf([=](const LegalityQuery &Query) { return Query.Types[0].isScalar() && Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; @@ -305,8 +317,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { {v8s16, v8s16}, {v8s8, v8s8}, {v16s8, v16s8}}) - .clampScalar(0, s32, s32) .clampScalar(1, s32, s64) + .clampScalar(0, s32, s32) .minScalarEltSameAsIf( [=](const LegalityQuery &Query) { const LLT &Ty = Query.Types[0]; @@ -330,33 +342,40 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { .widenScalarToNextPow2(1); // Extensions + auto ExtLegalFunc = [=](const LegalityQuery &Query) { + unsigned DstSize = Query.Types[0].getSizeInBits(); + + if (DstSize == 128 && !Query.Types[0].isVector()) + return false; // Extending to a scalar s128 needs narrowing. + + // Make sure that we have something that will fit in a register, and + // make sure it's a power of 2. + if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize)) + return false; + + const LLT &SrcTy = Query.Types[1]; + + // Special case for s1. + if (SrcTy == s1) + return true; + + // Make sure we fit in a register otherwise. Don't bother checking that + // the source type is below 128 bits. We shouldn't be allowing anything + // through which is wider than the destination in the first place. + unsigned SrcSize = SrcTy.getSizeInBits(); + if (SrcSize < 8 || !isPowerOf2_32(SrcSize)) + return false; + + return true; + }; getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) - .legalIf([=](const LegalityQuery &Query) { - unsigned DstSize = Query.Types[0].getSizeInBits(); - - // Make sure that we have something that will fit in a register, and - // make sure it's a power of 2. - if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize)) - return false; - - const LLT &SrcTy = Query.Types[1]; - - // Special case for s1. - if (SrcTy == s1) - return true; - - // Make sure we fit in a register otherwise. Don't bother checking that - // the source type is below 128 bits. We shouldn't be allowing anything - // through which is wider than the destination in the first place. - unsigned SrcSize = SrcTy.getSizeInBits(); - if (SrcSize < 8 || !isPowerOf2_32(SrcSize)) - return false; - - return true; - }); + .legalIf(ExtLegalFunc) + .clampScalar(0, s64, s64); // Just for s128, others are handled above. getActionDefinitionsBuilder(G_TRUNC).alwaysLegal(); + getActionDefinitionsBuilder(G_SEXT_INREG).lower(); + // FP conversions getActionDefinitionsBuilder(G_FPTRUNC).legalFor( {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}}); @@ -591,6 +610,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { return Query.Types[0] == p0 && Query.Types[1] == s64; }); + getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower(); + computeTables(); verify(*ST.getInstrInfo()); } @@ -617,6 +638,24 @@ bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI, llvm_unreachable("expected switch to return"); } +bool AArch64LegalizerInfo::legalizeIntrinsic( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + switch (MI.getIntrinsicID()) { + case Intrinsic::memcpy: + case Intrinsic::memset: + case Intrinsic::memmove: + if (createMemLibcall(MIRBuilder, MRI, MI) == + LegalizerHelper::UnableToLegalize) + return false; + MI.eraseFromParent(); + return true; + default: + break; + } + return true; +} + bool AArch64LegalizerInfo::legalizeShlAshrLshr( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const { @@ -655,7 +694,7 @@ bool AArch64LegalizerInfo::legalizeLoadStore( // legalized. In order to allow further legalization of the inst, we create // a new instruction and erase the existing one. - unsigned ValReg = MI.getOperand(0).getReg(); + Register ValReg = MI.getOperand(0).getReg(); const LLT ValTy = MRI.getType(ValReg); if (!ValTy.isVector() || !ValTy.getElementType().isPointer() || @@ -672,7 +711,7 @@ bool AArch64LegalizerInfo::legalizeLoadStore( auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg}); MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO); } else { - unsigned NewReg = MRI.createGenericVirtualRegister(NewTy); + Register NewReg = MRI.createGenericVirtualRegister(NewTy); auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO); MIRBuilder.buildBitcast({ValReg}, {NewLoad}); } diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.h b/lib/Target/AArch64/AArch64LegalizerInfo.h index f3362a18620..15161bab466 100644 --- a/lib/Target/AArch64/AArch64LegalizerInfo.h +++ b/lib/Target/AArch64/AArch64LegalizerInfo.h @@ -31,6 +31,9 @@ public: MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const override; + bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const override; + private: bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const; diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 65b5f906e3f..a0c4a25bb5b 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -201,8 +201,22 @@ static bool isNarrowStore(unsigned Opc) { } } +// These instruction set memory tag and either keep memory contents unchanged or +// set it to zero, ignoring the address part of the source register. +static bool isTagStore(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + case AArch64::STGOffset: + case AArch64::STZGOffset: + case AArch64::ST2GOffset: + case AArch64::STZ2GOffset: + return true; + } +} + // Scaling factor for unscaled load or store. -static int getMemScale(MachineInstr &MI) { +static int getMemScale(const MachineInstr &MI) { switch (MI.getOpcode()) { default: llvm_unreachable("Opcode has unknown scale!"); @@ -255,6 +269,11 @@ static int getMemScale(MachineInstr &MI) { case AArch64::STURQi: case AArch64::LDPQi: case AArch64::STPQi: + case AArch64::STGOffset: + case AArch64::STZGOffset: + case AArch64::ST2GOffset: + case AArch64::STZ2GOffset: + case AArch64::STGPi: return 16; } } @@ -449,6 +468,16 @@ static unsigned getPreIndexedOpcode(unsigned Opc) { return AArch64::STPWpre; case AArch64::STPXi: return AArch64::STPXpre; + case AArch64::STGOffset: + return AArch64::STGPreIndex; + case AArch64::STZGOffset: + return AArch64::STZGPreIndex; + case AArch64::ST2GOffset: + return AArch64::ST2GPreIndex; + case AArch64::STZ2GOffset: + return AArch64::STZ2GPreIndex; + case AArch64::STGPi: + return AArch64::STGPpre; } } @@ -518,6 +547,16 @@ static unsigned getPostIndexedOpcode(unsigned Opc) { return AArch64::STPWpost; case AArch64::STPXi: return AArch64::STPXpost; + case AArch64::STGOffset: + return AArch64::STGPostIndex; + case AArch64::STZGOffset: + return AArch64::STZGPostIndex; + case AArch64::ST2GOffset: + return AArch64::ST2GPostIndex; + case AArch64::STZ2GOffset: + return AArch64::STZ2GPostIndex; + case AArch64::STGPi: + return AArch64::STGPpost; } } @@ -536,10 +575,30 @@ static bool isPairedLdSt(const MachineInstr &MI) { case AArch64::STPQi: case AArch64::STPWi: case AArch64::STPXi: + case AArch64::STGPi: return true; } } +// Returns the scale and offset range of pre/post indexed variants of MI. +static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, + int &MinOffset, int &MaxOffset) { + bool IsPaired = isPairedLdSt(MI); + bool IsTagStore = isTagStore(MI); + // ST*G and all paired ldst have the same scale in pre/post-indexed variants + // as in the "unsigned offset" variant. + // All other pre/post indexed ldst instructions are unscaled. + Scale = (IsTagStore || IsPaired) ? getMemScale(MI) : 1; + + if (IsPaired) { + MinOffset = -64; + MaxOffset = 63; + } else { + MinOffset = -256; + MaxOffset = 255; + } +} + static const MachineOperand &getLdStRegOp(const MachineInstr &MI, unsigned PairedRegOp = 0) { assert(PairedRegOp < 2 && "Unexpected register operand idx."); @@ -618,6 +677,11 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) { case AArch64::LDRWui: case AArch64::LDRHHui: case AArch64::LDRBBui: + case AArch64::STGOffset: + case AArch64::STZGOffset: + case AArch64::ST2GOffset: + case AArch64::STZ2GOffset: + case AArch64::STGPi: // Unscaled instructions. case AArch64::STURSi: case AArch64::STURDi: @@ -808,7 +872,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, // STRWui %w1, ... // USE kill %w1 ; need to clear kill flag when moving STRWui downwards // STRW %w0 - unsigned Reg = getLdStRegOp(*I).getReg(); + Register Reg = getLdStRegOp(*I).getReg(); for (MachineInstr &MI : make_range(std::next(I), Paired)) MI.clearRegisterKills(Reg, TRI); } @@ -837,9 +901,9 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineOperand &DstMO = MIB->getOperand(SExtIdx); // Right now, DstMO has the extended register, since it comes from an // extended opcode. - unsigned DstRegX = DstMO.getReg(); + Register DstRegX = DstMO.getReg(); // Get the W variant of that register. - unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32); + Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32); // Update the result of LDP to use the W instead of the X variant. DstMO.setReg(DstRegW); LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs())); @@ -882,9 +946,9 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, int LoadSize = getMemScale(*LoadI); int StoreSize = getMemScale(*StoreI); - unsigned LdRt = getLdStRegOp(*LoadI).getReg(); + Register LdRt = getLdStRegOp(*LoadI).getReg(); const MachineOperand &StMO = getLdStRegOp(*StoreI); - unsigned StRt = getLdStRegOp(*StoreI).getReg(); + Register StRt = getLdStRegOp(*StoreI).getReg(); bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt); assert((IsStoreXReg || @@ -933,10 +997,10 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, ? getLdStOffsetOp(*StoreI).getImm() : getLdStOffsetOp(*StoreI).getImm() * StoreSize; int Width = LoadSize * 8; - unsigned DestReg = IsStoreXReg - ? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32, - &AArch64::GPR64RegClass) - : LdRt; + unsigned DestReg = + IsStoreXReg ? Register(TRI->getMatchingSuperReg( + LdRt, AArch64::sub_32, &AArch64::GPR64RegClass)) + : LdRt; assert((UnscaledLdOffset >= UnscaledStOffset && (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && @@ -1042,7 +1106,7 @@ bool AArch64LoadStoreOpt::findMatchingStore( MachineBasicBlock::iterator B = I->getParent()->begin(); MachineBasicBlock::iterator MBBI = I; MachineInstr &LoadMI = *I; - unsigned BaseReg = getLdStBaseOp(LoadMI).getReg(); + Register BaseReg = getLdStBaseOp(LoadMI).getReg(); // If the load is the first instruction in the block, there's obviously // not any matching store. @@ -1156,8 +1220,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, bool MayLoad = FirstMI.mayLoad(); bool IsUnscaled = TII->isUnscaledLdSt(FirstMI); - unsigned Reg = getLdStRegOp(FirstMI).getReg(); - unsigned BaseReg = getLdStBaseOp(FirstMI).getReg(); + Register Reg = getLdStRegOp(FirstMI).getReg(); + Register BaseReg = getLdStBaseOp(FirstMI).getReg(); int Offset = getLdStOffsetOp(FirstMI).getImm(); int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1; bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI); @@ -1188,7 +1252,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // check for +1/-1. Make sure to check the new instruction offset is // actually an immediate and not a symbolic reference destined for // a relocation. - unsigned MIBaseReg = getLdStBaseOp(MI).getReg(); + Register MIBaseReg = getLdStBaseOp(MI).getReg(); int MIOffset = getLdStOffsetOp(MI).getImm(); bool MIIsUnscaled = TII->isUnscaledLdSt(MI); if (IsUnscaled != MIIsUnscaled) { @@ -1328,18 +1392,19 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode()) : getPostIndexedOpcode(I->getOpcode()); MachineInstrBuilder MIB; + int Scale, MinOffset, MaxOffset; + getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset); if (!isPairedLdSt(*I)) { // Non-paired instruction. MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) .add(getLdStRegOp(*Update)) .add(getLdStRegOp(*I)) .add(getLdStBaseOp(*I)) - .addImm(Value) + .addImm(Value / Scale) .setMemRefs(I->memoperands()) .setMIFlags(I->mergeFlagsWith(*Update)); } else { // Paired instruction. - int Scale = getMemScale(*I); MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) .add(getLdStRegOp(*Update)) .add(getLdStRegOp(*I, 0)) @@ -1395,28 +1460,21 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI, MI.getOperand(1).getReg() != BaseReg) break; - bool IsPairedInsn = isPairedLdSt(MemMI); int UpdateOffset = MI.getOperand(2).getImm(); if (MI.getOpcode() == AArch64::SUBXri) UpdateOffset = -UpdateOffset; - // For non-paired load/store instructions, the immediate must fit in a - // signed 9-bit integer. - if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256)) + // The immediate must be a multiple of the scaling factor of the pre/post + // indexed instruction. + int Scale, MinOffset, MaxOffset; + getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset); + if (UpdateOffset % Scale != 0) break; - // For paired load/store instructions, the immediate must be a multiple of - // the scaling factor. The scaled offset must also fit into a signed 7-bit - // integer. - if (IsPairedInsn) { - int Scale = getMemScale(MemMI); - if (UpdateOffset % Scale != 0) - break; - - int ScaledOffset = UpdateOffset / Scale; - if (ScaledOffset > 63 || ScaledOffset < -64) - break; - } + // Scaled offset must fit in the instruction immediate. + int ScaledOffset = UpdateOffset / Scale; + if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset) + break; // If we have a non-zero Offset, we check that it matches the amount // we're adding to the register. @@ -1433,7 +1491,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( MachineInstr &MemMI = *I; MachineBasicBlock::iterator MBBI = I; - unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); + Register BaseReg = getLdStBaseOp(MemMI).getReg(); int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI); // Scan forward looking for post-index opportunities. Updating instructions @@ -1442,13 +1500,19 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( if (MIUnscaledOffset != UnscaledOffset) return E; - // If the base register overlaps a destination register, we can't - // merge the update. - bool IsPairedInsn = isPairedLdSt(MemMI); - for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { - unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); - if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) - return E; + // If the base register overlaps a source/destination register, we can't + // merge the update. This does not apply to tag store instructions which + // ignore the address part of the source register. + // This does not apply to STGPi as well, which does not have unpredictable + // behavior in this case unlike normal stores, and always performs writeback + // after reading the source register value. + if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) { + bool IsPairedInsn = isPairedLdSt(MemMI); + for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { + Register DestReg = getLdStRegOp(MemMI, i).getReg(); + if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) + return E; + } } // Track which register units have been modified and used between the first @@ -1487,7 +1551,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( MachineInstr &MemMI = *I; MachineBasicBlock::iterator MBBI = I; - unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); + Register BaseReg = getLdStBaseOp(MemMI).getReg(); int Offset = getLdStOffsetOp(MemMI).getImm(); // If the load/store is the first instruction in the block, there's obviously @@ -1496,11 +1560,13 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( return E; // If the base register overlaps a destination register, we can't // merge the update. - bool IsPairedInsn = isPairedLdSt(MemMI); - for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { - unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); - if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) - return E; + if (!isTagStore(MemMI)) { + bool IsPairedInsn = isPairedLdSt(MemMI); + for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { + Register DestReg = getLdStRegOp(MemMI, i).getReg(); + if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) + return E; + } } // Track which register units have been modified and used between the first @@ -1659,7 +1725,7 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate // however, is not, so adjust here. int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI); - // Look forward to try to find a post-index instruction. For example, + // Look forward to try to find a pre-index instruction. For example, // ldr x1, [x0, #64] // add x0, x0, #64 // merged into: diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp index e7d4a2789a2..afd5ae6bcbf 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -148,6 +148,8 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, RefFlags |= AArch64MCExpr::VK_TLSDESC; break; } + } else if (MO.getTargetFlags() & AArch64II::MO_PREL) { + RefFlags |= AArch64MCExpr::VK_PREL; } else { // No modifier means this is a generic reference, classified as absolute for // the cases where it matters (:abs_g0: etc). diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h index 0efeeb272ec..0009fb7b552 100644 --- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCLinkerOptimizationHint.h" #include @@ -95,6 +96,13 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { /// returned struct in a register. This field holds the virtual register into /// which the sret argument is passed. unsigned SRetReturnReg = 0; + /// SVE stack size (for predicates and data vectors) are maintained here + /// rather than in FrameInfo, as the placement and Stack IDs are target + /// specific. + uint64_t StackSizeSVE = 0; + + /// HasCalculatedStackSizeSVE indicates whether StackSizeSVE is valid. + bool HasCalculatedStackSizeSVE = false; /// Has a value when it is known whether or not the function uses a /// redzone, and no value otherwise. @@ -131,6 +139,15 @@ public: ArgumentStackToRestore = bytes; } + bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; } + + void setStackSizeSVE(uint64_t S) { + HasCalculatedStackSizeSVE = true; + StackSizeSVE = S; + } + + uint64_t getStackSizeSVE() const { return StackSizeSVE; } + bool hasStackFrame() const { return HasStackFrame; } void setHasStackFrame(bool s) { HasStackFrame = s; } diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp index aff861aae6b..d503c39b1f9 100644 --- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp +++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp @@ -162,11 +162,11 @@ bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd, LiveIntervals &LIs = G.getMetadata().LIS; - if (TRI->isPhysicalRegister(Rd) || TRI->isPhysicalRegister(Ra)) { - LLVM_DEBUG(dbgs() << "Rd is a physical reg:" << TRI->isPhysicalRegister(Rd) - << '\n'); - LLVM_DEBUG(dbgs() << "Ra is a physical reg:" << TRI->isPhysicalRegister(Ra) - << '\n'); + if (Register::isPhysicalRegister(Rd) || Register::isPhysicalRegister(Ra)) { + LLVM_DEBUG(dbgs() << "Rd is a physical reg:" + << Register::isPhysicalRegister(Rd) << '\n'); + LLVM_DEBUG(dbgs() << "Ra is a physical reg:" + << Register::isPhysicalRegister(Ra) << '\n'); return false; } @@ -359,8 +359,8 @@ void A57ChainingConstraint::apply(PBQPRAGraph &G) { case AArch64::FMADDDrrr: case AArch64::FNMSUBDrrr: case AArch64::FNMADDDrrr: { - unsigned Rd = MI.getOperand(0).getReg(); - unsigned Ra = MI.getOperand(3).getReg(); + Register Rd = MI.getOperand(0).getReg(); + Register Ra = MI.getOperand(3).getReg(); if (addIntraChainConstraint(G, Rd, Ra)) addInterChainConstraint(G, Rd, Ra); @@ -369,7 +369,7 @@ void A57ChainingConstraint::apply(PBQPRAGraph &G) { case AArch64::FMLAv2f32: case AArch64::FMLSv2f32: { - unsigned Rd = MI.getOperand(0).getReg(); + Register Rd = MI.getOperand(0).getReg(); addInterChainConstraint(G, Rd, Rd); break; } diff --git a/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp b/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp index 5f7245bfbd7..d30ea120bae 100644 --- a/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp +++ b/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp @@ -15,7 +15,9 @@ #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" @@ -25,12 +27,31 @@ using namespace llvm; using namespace MIPatternMatch; +#define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS +#include "AArch64GenGICombiner.inc" +#undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS + namespace { +#define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H +#include "AArch64GenGICombiner.inc" +#undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H + class AArch64PreLegalizerCombinerInfo : public CombinerInfo { + GISelKnownBits *KB; + MachineDominatorTree *MDT; + public: - AArch64PreLegalizerCombinerInfo() + AArch64GenPreLegalizerCombinerHelper Generated; + + AArch64PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, + GISelKnownBits *KB, MachineDominatorTree *MDT) : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, - /*LegalizerInfo*/ nullptr) {} + /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize), + KB(KB), MDT(MDT) { + if (!Generated.parseCommandLineOption()) + report_fatal_error("Invalid rule identifier"); + } + virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, MachineIRBuilder &B) const override; }; @@ -38,24 +59,50 @@ public: bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, MachineInstr &MI, MachineIRBuilder &B) const { - CombinerHelper Helper(Observer, B); + CombinerHelper Helper(Observer, B, KB, MDT); switch (MI.getOpcode()) { - default: - return false; - case TargetOpcode::COPY: - return Helper.tryCombineCopy(MI); - case TargetOpcode::G_BR: - return Helper.tryCombineBr(MI); + case TargetOpcode::G_CONCAT_VECTORS: + return Helper.tryCombineConcatVectors(MI); + case TargetOpcode::G_SHUFFLE_VECTOR: + return Helper.tryCombineShuffleVector(MI); case TargetOpcode::G_LOAD: case TargetOpcode::G_SEXTLOAD: - case TargetOpcode::G_ZEXTLOAD: - return Helper.tryCombineExtendingLoads(MI); + case TargetOpcode::G_ZEXTLOAD: { + bool Changed = false; + Changed |= Helper.tryCombineExtendingLoads(MI); + Changed |= Helper.tryCombineIndexedLoadStore(MI); + return Changed; } + case TargetOpcode::G_STORE: + return Helper.tryCombineIndexedLoadStore(MI); + case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: + switch (MI.getIntrinsicID()) { + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: { + // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other + // heuristics decide. + unsigned MaxLen = EnableOpt ? 0 : 32; + // Try to inline memcpy type calls if optimizations are enabled. + return (!EnableMinSize) ? Helper.tryCombineMemCpyFamily(MI, MaxLen) + : false; + } + default: + break; + } + } + + if (Generated.tryCombineAll(Observer, MI, B)) + return true; return false; } +#define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP +#include "AArch64GenGICombiner.inc" +#undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP + // Pass boilerplate // ================ @@ -63,24 +110,33 @@ class AArch64PreLegalizerCombiner : public MachineFunctionPass { public: static char ID; - AArch64PreLegalizerCombiner(); + AArch64PreLegalizerCombiner(bool IsOptNone = false); StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; } bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override; +private: + bool IsOptNone; }; -} +} // end anonymous namespace void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.setPreservesCFG(); getSelectionDAGFallbackAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); + if (!IsOptNone) { + AU.addRequired(); + AU.addPreserved(); + } MachineFunctionPass::getAnalysisUsage(AU); } -AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner() : MachineFunctionPass(ID) { +AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner(bool IsOptNone) + : MachineFunctionPass(ID), IsOptNone(IsOptNone) { initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); } @@ -89,7 +145,14 @@ bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { MachineFunctionProperties::Property::FailedISel)) return false; auto *TPC = &getAnalysis(); - AArch64PreLegalizerCombinerInfo PCInfo; + const Function &F = MF.getFunction(); + bool EnableOpt = + MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); + GISelKnownBits *KB = &getAnalysis().get(MF); + MachineDominatorTree *MDT = + IsOptNone ? nullptr : &getAnalysis(); + AArch64PreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), + F.hasMinSize(), KB, MDT); Combiner C(PCInfo, TPC); return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); } @@ -99,13 +162,14 @@ INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombiner, DEBUG_TYPE, "Combine AArch64 machine instrs before legalization", false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE, "Combine AArch64 machine instrs before legalization", false, false) namespace llvm { -FunctionPass *createAArch64PreLegalizeCombiner() { - return new AArch64PreLegalizerCombiner(); +FunctionPass *createAArch64PreLegalizeCombiner(bool IsOptNone) { + return new AArch64PreLegalizerCombiner(IsOptNone); } } // end namespace llvm diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp index b52259cc9ac..8ec73aa3c04 100644 --- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -563,12 +563,12 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { return getSameKindOfOperandsMapping(MI); } case TargetOpcode::COPY: { - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); // Check if one of the register is not a generic register. - if ((TargetRegisterInfo::isPhysicalRegister(DstReg) || + if ((Register::isPhysicalRegister(DstReg) || !MRI.getType(DstReg).isValid()) || - (TargetRegisterInfo::isPhysicalRegister(SrcReg) || + (Register::isPhysicalRegister(SrcReg) || !MRI.getType(SrcReg).isValid())) { const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI); const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI); @@ -635,6 +635,12 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // Some of the floating-point instructions have mixed GPR and FPR operands: // fine-tune the computed mapping. switch (Opc) { + case TargetOpcode::G_TRUNC: { + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) + OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; + break; + } case TargetOpcode::G_SITOFP: case TargetOpcode::G_UITOFP: if (MRI.getType(MI.getOperand(0).getReg()).isVector()) @@ -687,7 +693,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_STORE: // Check if that store is fed by fp instructions. if (OpRegBankIdx[0] == PMI_FirstGPR) { - unsigned VReg = MI.getOperand(0).getReg(); + Register VReg = MI.getOperand(0).getReg(); if (!VReg) break; MachineInstr *DefMI = MRI.getVRegDef(VReg); @@ -702,11 +708,10 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; // If we're taking in vectors, we have no choice but to put everything on - // FPRs. + // FPRs, except for the condition. The condition must always be on a GPR. LLT SrcTy = MRI.getType(MI.getOperand(2).getReg()); if (SrcTy.isVector()) { - for (unsigned Idx = 0; Idx < 4; ++Idx) - OpRegBankIdx[Idx] = PMI_FirstFPR; + OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR}; break; } @@ -740,7 +745,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // This doesn't check the condition, since it's just whatever is in NZCV. // This isn't passed explicitly in a register to fcsel/csel. for (unsigned Idx = 2; Idx < 4; ++Idx) { - unsigned VReg = MI.getOperand(Idx).getReg(); + Register VReg = MI.getOperand(Idx).getReg(); MachineInstr *DefMI = MRI.getVRegDef(VReg); if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank || onlyDefinesFP(*DefMI, MRI, TRI)) @@ -750,8 +755,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // If we have more FP constraints than not, then move everything over to // FPR. if (NumFP >= 2) - for (unsigned Idx = 0; Idx < 4; ++Idx) - OpRegBankIdx[Idx] = PMI_FirstFPR; + OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR}; break; } @@ -764,7 +768,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg()); // UNMERGE into scalars from a vector should always use FPR. // Likewise if any of the uses are FP instructions. - if (SrcTy.isVector() || + if (SrcTy.isVector() || SrcTy == LLT::scalar(128) || any_of(MRI.use_instructions(MI.getOperand(0).getReg()), [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) { // Set the register bank of every operand to FPR. @@ -795,12 +799,21 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // Index needs to be a GPR. OpRegBankIdx[3] = PMI_FirstGPR; break; + case TargetOpcode::G_EXTRACT: { + // For s128 sources we have to use fpr. + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + if (SrcTy.getSizeInBits() == 128) { + OpRegBankIdx[0] = PMI_FirstFPR; + OpRegBankIdx[1] = PMI_FirstFPR; + } + break; + } case TargetOpcode::G_BUILD_VECTOR: // If the first source operand belongs to a FPR register bank, then make // sure that we preserve that. if (OpRegBankIdx[1] != PMI_FirstGPR) break; - unsigned VReg = MI.getOperand(1).getReg(); + Register VReg = MI.getOperand(1).getReg(); if (!VReg) break; diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index 6d5a4e3d2f7..de176088595 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -15,6 +15,7 @@ #include "AArch64FrameLowering.h" #include "AArch64InstrInfo.h" #include "AArch64MachineFunctionInfo.h" +#include "AArch64StackOffset.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" @@ -23,10 +24,10 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/DiagnosticInfo.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -63,8 +64,9 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_AArch64_AAPCS_SwiftError_SaveList; if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost) return CSR_AArch64_RT_MostRegs_SaveList; - else - return CSR_AArch64_AAPCS_SaveList; + if (MF->getSubtarget().isTargetDarwin()) + return CSR_Darwin_AArch64_AAPCS_SaveList; + return CSR_AArch64_AAPCS_SaveList; } const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy( @@ -120,6 +122,8 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, : CSR_AArch64_CXX_TLS_Darwin_RegMask; if (CC == CallingConv::AArch64_VectorCall) return SCS ? CSR_AArch64_AAVPCS_SCS_RegMask : CSR_AArch64_AAVPCS_RegMask; + if (CC == CallingConv::AArch64_SVE_VectorCall) + return CSR_AArch64_SVE_AAPCS_RegMask; if (MF.getSubtarget().getTargetLowering() ->supportSwiftError() && MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError)) @@ -388,7 +392,7 @@ bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const { assert(Offset <= INT_MAX && "Offset too big to fit in int."); assert(MI && "Unable to get the legal offset for nil instruction."); - int SaveOffset = Offset; + StackOffset SaveOffset(Offset, MVT::i8); return isAArch64FrameOffsetLegal(*MI, SaveOffset) & AArch64FrameOffsetIsLegal; } @@ -418,7 +422,9 @@ void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const { - int Off = Offset; // ARM doesn't need the general 64-bit offsets + // ARM doesn't need the general 64-bit offsets + StackOffset Off(Offset, MVT::i8); + unsigned i = 0; while (!MI.getOperand(i).isFI()) { @@ -441,40 +447,69 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); const AArch64InstrInfo *TII = MF.getSubtarget().getInstrInfo(); const AArch64FrameLowering *TFI = getFrameLowering(MF); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + bool Tagged = + MI.getOperand(FIOperandNum).getTargetFlags() & AArch64II::MO_TAGGED; unsigned FrameReg; - int Offset; // Special handling of dbg_value, stackmap and patchpoint instructions. if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STACKMAP || MI.getOpcode() == TargetOpcode::PATCHPOINT) { - Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, - /*PreferFP=*/true, - /*ForSimm=*/false); - Offset += MI.getOperand(FIOperandNum + 1).getImm(); + StackOffset Offset = + TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, + /*PreferFP=*/true, + /*ForSimm=*/false); + Offset += StackOffset(MI.getOperand(FIOperandNum + 1).getImm(), MVT::i8); MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getBytes()); return; } if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) { MachineOperand &FI = MI.getOperand(FIOperandNum); - Offset = TFI->getNonLocalFrameIndexReference(MF, FrameIndex); + int Offset = TFI->getNonLocalFrameIndexReference(MF, FrameIndex); FI.ChangeToImmediate(Offset); return; } + StackOffset Offset; if (MI.getOpcode() == AArch64::TAGPstack) { // TAGPstack must use the virtual frame register in its 3rd operand. - const MachineFrameInfo &MFI = MF.getFrameInfo(); const AArch64FunctionInfo *AFI = MF.getInfo(); FrameReg = MI.getOperand(3).getReg(); - Offset = - MFI.getObjectOffset(FrameIndex) + AFI->getTaggedBasePointerOffset(); + Offset = {MFI.getObjectOffset(FrameIndex) + + AFI->getTaggedBasePointerOffset(), + MVT::i8}; + } else if (Tagged) { + StackOffset SPOffset = { + MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize(), MVT::i8}; + if (MFI.hasVarSizedObjects() || + isAArch64FrameOffsetLegal(MI, SPOffset, nullptr, nullptr, nullptr) != + (AArch64FrameOffsetCanUpdate | AArch64FrameOffsetIsLegal)) { + // Can't update to SP + offset in place. Precalculate the tagged pointer + // in a scratch register. + Offset = TFI->resolveFrameIndexReference( + MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true); + Register ScratchReg = + MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); + emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, + TII); + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AArch64::LDG), ScratchReg) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm(0); + MI.getOperand(FIOperandNum) + .ChangeToRegister(ScratchReg, false, false, true); + return; + } + FrameReg = AArch64::SP; + Offset = {MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize(), + MVT::i8}; } else { Offset = TFI->resolveFrameIndexReference( MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true); @@ -490,7 +525,7 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // If we get here, the immediate doesn't fit into the instruction. We folded // as much as possible above. Handle the rest, providing a register that is // SP+LargeImm. - unsigned ScratchReg = + Register ScratchReg = MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII); MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true); diff --git a/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp b/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp index 854670079e4..28a7e680849 100644 --- a/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp +++ b/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp @@ -426,16 +426,16 @@ bool AArch64SIMDInstrOpt::optimizeVectElement(MachineInstr &MI) { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); // Get the operands of the current SIMD arithmetic instruction. - unsigned MulDest = MI.getOperand(0).getReg(); - unsigned SrcReg0 = MI.getOperand(1).getReg(); + Register MulDest = MI.getOperand(0).getReg(); + Register SrcReg0 = MI.getOperand(1).getReg(); unsigned Src0IsKill = getKillRegState(MI.getOperand(1).isKill()); - unsigned SrcReg1 = MI.getOperand(2).getReg(); + Register SrcReg1 = MI.getOperand(2).getReg(); unsigned Src1IsKill = getKillRegState(MI.getOperand(2).isKill()); unsigned DupDest; // Instructions of interest have either 4 or 5 operands. if (MI.getNumOperands() == 5) { - unsigned SrcReg2 = MI.getOperand(3).getReg(); + Register SrcReg2 = MI.getOperand(3).getReg(); unsigned Src2IsKill = getKillRegState(MI.getOperand(3).isKill()); unsigned LaneNumber = MI.getOperand(4).getImm(); // Create a new DUP instruction. Note that if an equivalent DUP instruction diff --git a/lib/Target/AArch64/AArch64SVEInstrInfo.td b/lib/Target/AArch64/AArch64SVEInstrInfo.td index 79ab42f4c08..b573eac7675 100644 --- a/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -82,11 +82,11 @@ let Predicates = [HasSVE] in { defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr">; defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr">; - defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot">; - defm UDOT_ZZZ : sve_intx_dot<0b1, "udot">; + defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", int_aarch64_sve_sdot>; + defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", int_aarch64_sve_udot>; - defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot">; - defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot">; + defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot", int_aarch64_sve_sdot_lane>; + defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot", int_aarch64_sve_udot_lane>; defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb">; defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb">; @@ -94,14 +94,14 @@ let Predicates = [HasSVE] in { defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth">; defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw">; defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw">; - defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs">; - defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg">; + defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs", int_aarch64_sve_abs>; + defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg", int_aarch64_sve_neg>; - defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls">; - defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz">; - defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt">; - defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot">; - defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not">; + defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls", null_frag>; + defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz", null_frag>; + defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt", int_aarch64_sve_cnt>; + defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot", null_frag>; + defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not", null_frag>; defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs">; defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg">; @@ -138,12 +138,12 @@ let Predicates = [HasSVE] in { defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr">; defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv">; - defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd">; - defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub">; - defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul">; - defm FTSMUL_ZZZ : sve_fp_3op_u_zd<0b011, "ftsmul">; - defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps">; - defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts">; + defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>; + defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", null_frag>; + defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", null_frag>; + defm FTSMUL_ZZZ : sve_fp_3op_u_zd<0b011, "ftsmul", null_frag>; + defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", null_frag>; + defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", null_frag>; defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel">; @@ -187,7 +187,7 @@ let Predicates = [HasSVE] in { defm FCPY_ZPmI : sve_int_dup_fpimm_pred<"fcpy">; // Splat scalar register (unpredicated, GPR or vector + element index) - defm DUP_ZR : sve_int_perm_dup_r<"dup">; + defm DUP_ZR : sve_int_perm_dup_r<"dup", AArch64dup>; defm DUP_ZZI : sve_int_perm_dup_i<"dup">; // Splat scalar register (predicated) @@ -211,13 +211,13 @@ let Predicates = [HasSVE] in { defm REV_PP : sve_int_perm_reverse_p<"rev">; defm REV_ZZ : sve_int_perm_reverse_z<"rev">; - defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo">; - defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi">; - defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo">; - defm UUNPKHI_ZZ : sve_int_perm_unpk<0b11, "uunpkhi">; + defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>; + defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi", AArch64sunpkhi>; + defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo", AArch64uunpklo>; + defm UUNPKHI_ZZ : sve_int_perm_unpk<0b11, "uunpkhi", AArch64uunpkhi>; - def PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo">; - def PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi">; + defm PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo", int_aarch64_sve_punpklo>; + defm PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi", int_aarch64_sve_punpkhi>; defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">; defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">; @@ -1020,6 +1020,56 @@ let Predicates = [HasSVE] in { (FCMGT_PPzZZ_S PPR32:$Zd, PPR3bAny:$Pg, ZPR32:$Zn, ZPR32:$Zm), 0>; def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn", (FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>; + + def : Pat<(nxv16i8 (bitconvert (nxv8i16 ZPR:$src))), (nxv16i8 ZPR:$src)>; + def : Pat<(nxv16i8 (bitconvert (nxv4i32 ZPR:$src))), (nxv16i8 ZPR:$src)>; + def : Pat<(nxv16i8 (bitconvert (nxv2i64 ZPR:$src))), (nxv16i8 ZPR:$src)>; + def : Pat<(nxv16i8 (bitconvert (nxv8f16 ZPR:$src))), (nxv16i8 ZPR:$src)>; + def : Pat<(nxv16i8 (bitconvert (nxv4f32 ZPR:$src))), (nxv16i8 ZPR:$src)>; + def : Pat<(nxv16i8 (bitconvert (nxv2f64 ZPR:$src))), (nxv16i8 ZPR:$src)>; + + def : Pat<(nxv8i16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8i16 ZPR:$src)>; + def : Pat<(nxv8i16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8i16 ZPR:$src)>; + def : Pat<(nxv8i16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8i16 ZPR:$src)>; + def : Pat<(nxv8i16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8i16 ZPR:$src)>; + def : Pat<(nxv8i16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8i16 ZPR:$src)>; + def : Pat<(nxv8i16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8i16 ZPR:$src)>; + + def : Pat<(nxv4i32 (bitconvert (nxv16i8 ZPR:$src))), (nxv4i32 ZPR:$src)>; + def : Pat<(nxv4i32 (bitconvert (nxv8i16 ZPR:$src))), (nxv4i32 ZPR:$src)>; + def : Pat<(nxv4i32 (bitconvert (nxv2i64 ZPR:$src))), (nxv4i32 ZPR:$src)>; + def : Pat<(nxv4i32 (bitconvert (nxv8f16 ZPR:$src))), (nxv4i32 ZPR:$src)>; + def : Pat<(nxv4i32 (bitconvert (nxv4f32 ZPR:$src))), (nxv4i32 ZPR:$src)>; + def : Pat<(nxv4i32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4i32 ZPR:$src)>; + + def : Pat<(nxv2i64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2i64 ZPR:$src)>; + def : Pat<(nxv2i64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2i64 ZPR:$src)>; + def : Pat<(nxv2i64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2i64 ZPR:$src)>; + def : Pat<(nxv2i64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2i64 ZPR:$src)>; + def : Pat<(nxv2i64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2i64 ZPR:$src)>; + def : Pat<(nxv2i64 (bitconvert (nxv2f64 ZPR:$src))), (nxv2i64 ZPR:$src)>; + + def : Pat<(nxv8f16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8f16 ZPR:$src)>; + def : Pat<(nxv8f16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8f16 ZPR:$src)>; + def : Pat<(nxv8f16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8f16 ZPR:$src)>; + def : Pat<(nxv8f16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8f16 ZPR:$src)>; + def : Pat<(nxv8f16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8f16 ZPR:$src)>; + def : Pat<(nxv8f16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8f16 ZPR:$src)>; + + def : Pat<(nxv4f32 (bitconvert (nxv16i8 ZPR:$src))), (nxv4f32 ZPR:$src)>; + def : Pat<(nxv4f32 (bitconvert (nxv8i16 ZPR:$src))), (nxv4f32 ZPR:$src)>; + def : Pat<(nxv4f32 (bitconvert (nxv4i32 ZPR:$src))), (nxv4f32 ZPR:$src)>; + def : Pat<(nxv4f32 (bitconvert (nxv2i64 ZPR:$src))), (nxv4f32 ZPR:$src)>; + def : Pat<(nxv4f32 (bitconvert (nxv8f16 ZPR:$src))), (nxv4f32 ZPR:$src)>; + def : Pat<(nxv4f32 (bitconvert (nxv2f64 ZPR:$src))), (nxv4f32 ZPR:$src)>; + + def : Pat<(nxv2f64 (bitconvert (nxv16i8 ZPR:$src))), (nxv2f64 ZPR:$src)>; + def : Pat<(nxv2f64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2f64 ZPR:$src)>; + def : Pat<(nxv2f64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2f64 ZPR:$src)>; + def : Pat<(nxv2f64 (bitconvert (nxv2i64 ZPR:$src))), (nxv2f64 ZPR:$src)>; + def : Pat<(nxv2f64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2f64 ZPR:$src)>; + def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>; + } let Predicates = [HasSVE2] in { @@ -1164,6 +1214,13 @@ let Predicates = [HasSVE2] in { defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr">; defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr">; + // SVE2 predicated shifts + defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">; + defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">; + defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">; + defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">; + defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">; + // SVE2 integer add/subtract long defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb">; defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt">; @@ -1199,14 +1256,14 @@ let Predicates = [HasSVE2] in { defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt">; // SVE2 bitwise shift and insert - defm SRI_ZZI : sve2_int_bin_cons_shift_imm_right<0b0, "sri">; - defm SLI_ZZI : sve2_int_bin_cons_shift_imm_left< 0b1, "sli">; + defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri">; + defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli">; // SVE2 bitwise shift right and accumulate - defm SSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b00, "ssra">; - defm USRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b01, "usra">; - defm SRSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b10, "srsra">; - defm URSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b11, "ursra">; + defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra">; + defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra">; + defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra">; + defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra">; // SVE2 complex integer add defm CADD_ZZI : sve2_int_cadd<0b0, "cadd">; @@ -1228,41 +1285,47 @@ let Predicates = [HasSVE2] in { defm SBCLB_ZZZ : sve2_int_addsub_long_carry<0b10, "sbclb">; defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">; - // SVE2 bitwise shift right narrow - defm SQSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0000, "sqshrunb">; - defm SQSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0001, "sqshrunt">; - defm SQRSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0010, "sqrshrunb">; - defm SQRSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0011, "sqrshrunt">; - defm SHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0100, "shrnb">; - defm SHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0101, "shrnt">; - defm RSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0110, "rshrnb">; - defm RSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0111, "rshrnt">; - defm SQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1000, "sqshrnb">; - defm SQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1001, "sqshrnt">; - defm SQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1010, "sqrshrnb">; - defm SQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1011, "sqrshrnt">; - defm UQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1100, "uqshrnb">; - defm UQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1101, "uqshrnt">; - defm UQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1110, "uqrshrnb">; - defm UQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1111, "uqrshrnt">; + // SVE2 bitwise shift right narrow (bottom) + defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb">; + defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb">; + defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb">; + defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb">; + defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb">; + defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb">; + defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb">; + defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb">; - // SVE2 integer add/subtract narrow high part - defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b000, "addhnb">; - defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b001, "addhnt">; - defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b010, "raddhnb">; - defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b011, "raddhnt">; - defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b100, "subhnb">; - defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b101, "subhnt">; - defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b110, "rsubhnb">; - defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b111, "rsubhnt">; + // SVE2 bitwise shift right narrow (top) + defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt">; + defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt">; + defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt">; + defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt">; + defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt">; + defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt">; + defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt">; + defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt">; - // SVE2 saturating extract narrow - defm SQXTNB_ZZ : sve2_int_sat_extract_narrow<0b000, "sqxtnb">; - defm SQXTNT_ZZ : sve2_int_sat_extract_narrow<0b001, "sqxtnt">; - defm UQXTNB_ZZ : sve2_int_sat_extract_narrow<0b010, "uqxtnb">; - defm UQXTNT_ZZ : sve2_int_sat_extract_narrow<0b011, "uqxtnt">; - defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow<0b100, "sqxtunb">; - defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow<0b101, "sqxtunt">; + // SVE2 integer add/subtract narrow high part (bottom) + defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b00, "addhnb">; + defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b01, "raddhnb">; + defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b10, "subhnb">; + defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b11, "rsubhnb">; + + // SVE2 integer add/subtract narrow high part (top) + defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b00, "addhnt">; + defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b01, "raddhnt">; + defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b10, "subhnt">; + defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b11, "rsubhnt">; + + // SVE2 saturating extract narrow (bottom) + defm SQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b00, "sqxtnb">; + defm UQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b01, "uqxtnb">; + defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b10, "sqxtunb">; + + // SVE2 saturating extract narrow (top) + defm SQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b00, "sqxtnt">; + defm UQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b01, "uqxtnt">; + defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt">; // SVE2 character match defm MATCH_PPzZZ : sve2_char_match<0b0, "match">; @@ -1289,10 +1352,14 @@ let Predicates = [HasSVE2] in { // SVE2 histogram generation (vector) defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt">; + // SVE2 floating-point base 2 logarithm as integer + defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">; + // SVE2 floating-point convert precision defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtxnt">; defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt">; defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt">; + def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>; // SVE2 floating-point pairwise operations defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp">; @@ -1321,58 +1388,45 @@ let Predicates = [HasSVE2] in { def BSL2N_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b101, "bsl2n">; def NBSL_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b111, "nbsl">; - // sve_int_rotate_imm + // SVE2 bitwise xor and rotate right by immediate defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">; // SVE2 extract vector (immediate offset, constructive) def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">; - // SVE floating-point convert precision - def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>; + // SVE2 non-temporal gather loads + defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>; + defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>; + defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>; + defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>; + defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>; - // SVE floating-point convert to integer - defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">; - - // Non-temporal contiguous loads (vector + register) - defm LDNT1SB_ZZR_S : sve2_mem_cldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>; - defm LDNT1B_ZZR_S : sve2_mem_cldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>; - defm LDNT1SH_ZZR_S : sve2_mem_cldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>; - defm LDNT1H_ZZR_S : sve2_mem_cldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>; - defm LDNT1W_ZZR_S : sve2_mem_cldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>; - - defm LDNT1SB_ZZR_D : sve2_mem_cldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>; - defm LDNT1B_ZZR_D : sve2_mem_cldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>; - defm LDNT1SH_ZZR_D : sve2_mem_cldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>; - defm LDNT1H_ZZR_D : sve2_mem_cldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>; - defm LDNT1SW_ZZR_D : sve2_mem_cldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>; - defm LDNT1W_ZZR_D : sve2_mem_cldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>; - defm LDNT1D_ZZR_D : sve2_mem_cldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>; + defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>; + defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>; + defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>; + defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>; + defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>; + defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>; + defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>; // SVE2 vector splice (constructive) defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">; - // Predicated shifts - defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">; - defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">; - defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">; - defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">; - defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">; + // SVE2 non-temporal scatter stores + defm STNT1B_ZZR_S : sve2_mem_sstnt_vs<0b001, "stnt1b", Z_s, ZPR32>; + defm STNT1H_ZZR_S : sve2_mem_sstnt_vs<0b011, "stnt1h", Z_s, ZPR32>; + defm STNT1W_ZZR_S : sve2_mem_sstnt_vs<0b101, "stnt1w", Z_s, ZPR32>; - // Non-temporal contiguous stores (vector + register) - defm STNT1B_ZZR_S : sve2_mem_cstnt_vs<0b001, "stnt1b", Z_s, ZPR32>; - defm STNT1H_ZZR_S : sve2_mem_cstnt_vs<0b011, "stnt1h", Z_s, ZPR32>; - defm STNT1W_ZZR_S : sve2_mem_cstnt_vs<0b101, "stnt1w", Z_s, ZPR32>; + defm STNT1B_ZZR_D : sve2_mem_sstnt_vs<0b000, "stnt1b", Z_d, ZPR64>; + defm STNT1H_ZZR_D : sve2_mem_sstnt_vs<0b010, "stnt1h", Z_d, ZPR64>; + defm STNT1W_ZZR_D : sve2_mem_sstnt_vs<0b100, "stnt1w", Z_d, ZPR64>; + defm STNT1D_ZZR_D : sve2_mem_sstnt_vs<0b110, "stnt1d", Z_d, ZPR64>; - defm STNT1B_ZZR_D : sve2_mem_cstnt_vs<0b000, "stnt1b", Z_d, ZPR64>; - defm STNT1H_ZZR_D : sve2_mem_cstnt_vs<0b010, "stnt1h", Z_d, ZPR64>; - defm STNT1W_ZZR_D : sve2_mem_cstnt_vs<0b100, "stnt1w", Z_d, ZPR64>; - defm STNT1D_ZZR_D : sve2_mem_cstnt_vs<0b110, "stnt1d", Z_d, ZPR64>; - - // SVE table lookup (three sources) + // SVE2 table lookup (three sources) defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl">; defm TBX_ZZZ : sve2_int_perm_tbx<"tbx">; - // SVE integer compare scalar count and limit + // SVE2 integer compare scalar count and limit defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege">; defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt">; defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs">; @@ -1383,7 +1437,7 @@ let Predicates = [HasSVE2] in { defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs">; defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi">; - // SVE pointer conflict compare + // SVE2 pointer conflict compare defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr">; defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw">; } diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index 60dbace03ca..ba61ed726e8 100644 --- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -32,7 +32,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( const AArch64TargetLowering &TLI = *STI.getTargetLowering(); EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); - Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Type *IntPtrTy = Type::getInt8PtrTy(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; diff --git a/lib/Target/AArch64/AArch64SpeculationHardening.cpp b/lib/Target/AArch64/AArch64SpeculationHardening.cpp index 3087e6ce441..7307961ddb5 100644 --- a/lib/Target/AArch64/AArch64SpeculationHardening.cpp +++ b/lib/Target/AArch64/AArch64SpeculationHardening.cpp @@ -106,6 +106,7 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" #include @@ -115,9 +116,9 @@ using namespace llvm; #define AARCH64_SPECULATION_HARDENING_NAME "AArch64 speculation hardening pass" -cl::opt HardenLoads("aarch64-slh-loads", cl::Hidden, - cl::desc("Sanitize loads from memory."), - cl::init(true)); +static cl::opt HardenLoads("aarch64-slh-loads", cl::Hidden, + cl::desc("Sanitize loads from memory."), + cl::init(true)); namespace { @@ -521,7 +522,7 @@ bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) { for (auto Use : MI.uses()) { if (!Use.isReg()) continue; - unsigned Reg = Use.getReg(); + Register Reg = Use.getReg(); // Some loads of floating point data have implicit defs/uses on a // super register of that floating point data. Some examples: // $s0 = LDRSui $sp, 22, implicit-def $q0 @@ -561,8 +562,8 @@ bool AArch64SpeculationHardening::expandSpeculationSafeValue( // miss-speculation isn't happening because we're already inserting barriers // to guarantee that. if (!UseControlFlowSpeculationBarrier && !UsesFullSpeculationBarrier) { - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); // Mark this register and all its aliasing registers as needing to be // value speculation hardened before its next use, by using a CSDB // barrier instruction. diff --git a/lib/Target/AArch64/AArch64StackOffset.h b/lib/Target/AArch64/AArch64StackOffset.h new file mode 100644 index 00000000000..13f12a6c9c3 --- /dev/null +++ b/lib/Target/AArch64/AArch64StackOffset.h @@ -0,0 +1,138 @@ +//==--AArch64StackOffset.h ---------------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the StackOffset class, which is used to +// describe scalable and non-scalable offsets during frame lowering. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64STACKOFFSET_H +#define LLVM_LIB_TARGET_AARCH64_AARCH64STACKOFFSET_H + +#include "llvm/Support/MachineValueType.h" + +namespace llvm { + +/// StackOffset is a wrapper around scalable and non-scalable offsets and is +/// used in several functions such as 'isAArch64FrameOffsetLegal' and +/// 'emitFrameOffset()'. StackOffsets are described by MVTs, e.g. +// +/// StackOffset(1, MVT::nxv16i8) +// +/// would describe an offset as being the size of a single SVE vector. +/// +/// The class also implements simple arithmetic (addition/subtraction) on these +/// offsets, e.g. +// +/// StackOffset(1, MVT::nxv16i8) + StackOffset(1, MVT::i64) +// +/// describes an offset that spans the combined storage required for an SVE +/// vector and a 64bit GPR. +class StackOffset { + int64_t Bytes; + int64_t ScalableBytes; + + explicit operator int() const; + +public: + using Part = std::pair; + + StackOffset() : Bytes(0), ScalableBytes(0) {} + + StackOffset(int64_t Offset, MVT::SimpleValueType T) : StackOffset() { + assert(MVT(T).getSizeInBits() % 8 == 0 && + "Offset type is not a multiple of bytes"); + *this += Part(Offset, T); + } + + StackOffset(const StackOffset &Other) + : Bytes(Other.Bytes), ScalableBytes(Other.ScalableBytes) {} + + StackOffset &operator=(const StackOffset &) = default; + + StackOffset &operator+=(const StackOffset::Part &Other) { + int64_t OffsetInBytes = Other.first * (Other.second.getSizeInBits() / 8); + if (Other.second.isScalableVector()) + ScalableBytes += OffsetInBytes; + else + Bytes += OffsetInBytes; + return *this; + } + + StackOffset &operator+=(const StackOffset &Other) { + Bytes += Other.Bytes; + ScalableBytes += Other.ScalableBytes; + return *this; + } + + StackOffset operator+(const StackOffset &Other) const { + StackOffset Res(*this); + Res += Other; + return Res; + } + + StackOffset &operator-=(const StackOffset &Other) { + Bytes -= Other.Bytes; + ScalableBytes -= Other.ScalableBytes; + return *this; + } + + StackOffset operator-(const StackOffset &Other) const { + StackOffset Res(*this); + Res -= Other; + return Res; + } + + StackOffset operator-() const { + StackOffset Res = {}; + const StackOffset Other(*this); + Res -= Other; + return Res; + } + + /// Returns the scalable part of the offset in bytes. + int64_t getScalableBytes() const { return ScalableBytes; } + + /// Returns the non-scalable part of the offset in bytes. + int64_t getBytes() const { return Bytes; } + + /// Returns the offset in parts to which this frame offset can be + /// decomposed for the purpose of describing a frame offset. + /// For non-scalable offsets this is simply its byte size. + void getForFrameOffset(int64_t &NumBytes, int64_t &NumPredicateVectors, + int64_t &NumDataVectors) const { + assert(isValid() && "Invalid frame offset"); + + NumBytes = Bytes; + NumDataVectors = 0; + NumPredicateVectors = ScalableBytes / 2; + // This method is used to get the offsets to adjust the frame offset. + // If the function requires ADDPL to be used and needs more than two ADDPL + // instructions, part of the offset is folded into NumDataVectors so that it + // uses ADDVL for part of it, reducing the number of ADDPL instructions. + if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 || + NumPredicateVectors > 62) { + NumDataVectors = NumPredicateVectors / 8; + NumPredicateVectors -= NumDataVectors * 8; + } + } + + /// Returns whether the offset is known zero. + explicit operator bool() const { return Bytes || ScalableBytes; } + + bool isValid() const { + // The smallest scalable element supported by scaled SVE addressing + // modes are predicates, which are 2 scalable bytes in size. So the scalable + // byte offset must always be a multiple of 2. + return ScalableBytes % 2 == 0; + } +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/AArch64/AArch64StackTagging.cpp b/lib/Target/AArch64/AArch64StackTagging.cpp index 6e99c48bf1d..e6dbe01d380 100644 --- a/lib/Target/AArch64/AArch64StackTagging.cpp +++ b/lib/Target/AArch64/AArch64StackTagging.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -55,9 +56,215 @@ using namespace llvm; #define DEBUG_TYPE "stack-tagging" -static constexpr unsigned kTagGranuleSize = 16; +static cl::opt ClMergeInit( + "stack-tagging-merge-init", cl::Hidden, cl::init(true), cl::ZeroOrMore, + cl::desc("merge stack variable initializers with tagging when possible")); + +static cl::opt ClScanLimit("stack-tagging-merge-init-scan-limit", + cl::init(40), cl::Hidden); + +static const Align kTagGranuleSize = Align(16); namespace { + +class InitializerBuilder { + uint64_t Size; + const DataLayout *DL; + Value *BasePtr; + Function *SetTagFn; + Function *SetTagZeroFn; + Function *StgpFn; + + // List of initializers sorted by start offset. + struct Range { + uint64_t Start, End; + Instruction *Inst; + }; + SmallVector Ranges; + // 8-aligned offset => 8-byte initializer + // Missing keys are zero initialized. + std::map Out; + +public: + InitializerBuilder(uint64_t Size, const DataLayout *DL, Value *BasePtr, + Function *SetTagFn, Function *SetTagZeroFn, + Function *StgpFn) + : Size(Size), DL(DL), BasePtr(BasePtr), SetTagFn(SetTagFn), + SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {} + + bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) { + auto I = std::lower_bound( + Ranges.begin(), Ranges.end(), Start, + [](const Range &LHS, uint64_t RHS) { return LHS.End <= RHS; }); + if (I != Ranges.end() && End > I->Start) { + // Overlap - bail. + return false; + } + Ranges.insert(I, {Start, End, Inst}); + return true; + } + + bool addStore(uint64_t Offset, StoreInst *SI, const DataLayout *DL) { + int64_t StoreSize = DL->getTypeStoreSize(SI->getOperand(0)->getType()); + if (!addRange(Offset, Offset + StoreSize, SI)) + return false; + IRBuilder<> IRB(SI); + applyStore(IRB, Offset, Offset + StoreSize, SI->getOperand(0)); + return true; + } + + bool addMemSet(uint64_t Offset, MemSetInst *MSI) { + uint64_t StoreSize = cast(MSI->getLength())->getZExtValue(); + if (!addRange(Offset, Offset + StoreSize, MSI)) + return false; + IRBuilder<> IRB(MSI); + applyMemSet(IRB, Offset, Offset + StoreSize, + cast(MSI->getValue())); + return true; + } + + void applyMemSet(IRBuilder<> &IRB, int64_t Start, int64_t End, + ConstantInt *V) { + // Out[] does not distinguish between zero and undef, and we already know + // that this memset does not overlap with any other initializer. Nothing to + // do for memset(0). + if (V->isZero()) + return; + for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) { + uint64_t Cst = 0x0101010101010101UL; + int LowBits = Offset < Start ? (Start - Offset) * 8 : 0; + if (LowBits) + Cst = (Cst >> LowBits) << LowBits; + int HighBits = End - Offset < 8 ? (8 - (End - Offset)) * 8 : 0; + if (HighBits) + Cst = (Cst << HighBits) >> HighBits; + ConstantInt *C = + ConstantInt::get(IRB.getInt64Ty(), Cst * V->getZExtValue()); + + Value *&CurrentV = Out[Offset]; + if (!CurrentV) { + CurrentV = C; + } else { + CurrentV = IRB.CreateOr(CurrentV, C); + } + } + } + + // Take a 64-bit slice of the value starting at the given offset (in bytes). + // Offset can be negative. Pad with zeroes on both sides when necessary. + Value *sliceValue(IRBuilder<> &IRB, Value *V, int64_t Offset) { + if (Offset > 0) { + V = IRB.CreateLShr(V, Offset * 8); + V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty()); + } else if (Offset < 0) { + V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty()); + V = IRB.CreateShl(V, -Offset * 8); + } else { + V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty()); + } + return V; + } + + void applyStore(IRBuilder<> &IRB, int64_t Start, int64_t End, + Value *StoredValue) { + StoredValue = flatten(IRB, StoredValue); + for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) { + Value *V = sliceValue(IRB, StoredValue, Offset - Start); + Value *&CurrentV = Out[Offset]; + if (!CurrentV) { + CurrentV = V; + } else { + CurrentV = IRB.CreateOr(CurrentV, V); + } + } + } + + void generate(IRBuilder<> &IRB) { + LLVM_DEBUG(dbgs() << "Combined initializer\n"); + // No initializers => the entire allocation is undef. + if (Ranges.empty()) { + emitUndef(IRB, 0, Size); + return; + } + + // Look through 8-byte initializer list 16 bytes at a time; + // If one of the two 8-byte halfs is non-zero non-undef, emit STGP. + // Otherwise, emit zeroes up to next available item. + uint64_t LastOffset = 0; + for (uint64_t Offset = 0; Offset < Size; Offset += 16) { + auto I1 = Out.find(Offset); + auto I2 = Out.find(Offset + 8); + if (I1 == Out.end() && I2 == Out.end()) + continue; + + if (Offset > LastOffset) + emitZeroes(IRB, LastOffset, Offset - LastOffset); + + Value *Store1 = I1 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty()) + : I1->second; + Value *Store2 = I2 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty()) + : I2->second; + emitPair(IRB, Offset, Store1, Store2); + LastOffset = Offset + 16; + } + + // memset(0) does not update Out[], therefore the tail can be either undef + // or zero. + if (LastOffset < Size) + emitZeroes(IRB, LastOffset, Size - LastOffset); + + for (const auto &R : Ranges) { + R.Inst->eraseFromParent(); + } + } + + void emitZeroes(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) { + LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size + << ") zero\n"); + Value *Ptr = BasePtr; + if (Offset) + Ptr = IRB.CreateConstGEP1_32(Ptr, Offset); + IRB.CreateCall(SetTagZeroFn, + {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)}); + } + + void emitUndef(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) { + LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size + << ") undef\n"); + Value *Ptr = BasePtr; + if (Offset) + Ptr = IRB.CreateConstGEP1_32(Ptr, Offset); + IRB.CreateCall(SetTagFn, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)}); + } + + void emitPair(IRBuilder<> &IRB, uint64_t Offset, Value *A, Value *B) { + LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + 16 << "):\n"); + LLVM_DEBUG(dbgs() << " " << *A << "\n " << *B << "\n"); + Value *Ptr = BasePtr; + if (Offset) + Ptr = IRB.CreateConstGEP1_32(Ptr, Offset); + IRB.CreateCall(StgpFn, {Ptr, A, B}); + } + + Value *flatten(IRBuilder<> &IRB, Value *V) { + if (V->getType()->isIntegerTy()) + return V; + // vector of pointers -> vector of ints + if (VectorType *VecTy = dyn_cast(V->getType())) { + LLVMContext &Ctx = IRB.getContext(); + Type *EltTy = VecTy->getElementType(); + if (EltTy->isPointerTy()) { + uint32_t EltSize = DL->getTypeSizeInBits(EltTy); + Type *NewTy = VectorType::get(IntegerType::get(Ctx, EltSize), + VecTy->getNumElements()); + V = IRB.CreatePointerCast(V, NewTy); + } + } + return IRB.CreateBitOrPointerCast( + V, IRB.getIntNTy(DL->getTypeStoreSize(V->getType()) * 8)); + } +}; + class AArch64StackTagging : public FunctionPass { struct AllocaInfo { AllocaInst *AI; @@ -67,10 +274,15 @@ class AArch64StackTagging : public FunctionPass { int Tag; // -1 for non-tagged allocations }; + bool MergeInit; + public: static char ID; // Pass ID, replacement for typeid - AArch64StackTagging() : FunctionPass(ID) { + AArch64StackTagging(bool MergeInit = true) + : FunctionPass(ID), + MergeInit(ClMergeInit.getNumOccurrences() > 0 ? ClMergeInit + : MergeInit) { initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry()); } @@ -81,6 +293,9 @@ public: uint64_t Size); void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size); + Instruction *collectInitializers(Instruction *StartInst, Value *StartPtr, + uint64_t Size, InitializerBuilder &IB); + Instruction * insertBaseTaggedPointer(const MapVector &Allocas, const DominatorTree *DT); @@ -92,9 +307,12 @@ private: Function *F; Function *SetTagFunc; const DataLayout *DL; + AAResults *AA; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + if (MergeInit) + AU.addRequired(); } }; @@ -107,8 +325,68 @@ INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging", INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging", false, false) -FunctionPass *llvm::createAArch64StackTaggingPass() { - return new AArch64StackTagging(); +FunctionPass *llvm::createAArch64StackTaggingPass(bool MergeInit) { + return new AArch64StackTagging(MergeInit); +} + +Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst, + Value *StartPtr, + uint64_t Size, + InitializerBuilder &IB) { + MemoryLocation AllocaLoc{StartPtr, Size}; + Instruction *LastInst = StartInst; + BasicBlock::iterator BI(StartInst); + + unsigned Count = 0; + for (; Count < ClScanLimit && !BI->isTerminator(); ++BI) { + if (!isa(*BI)) + ++Count; + + if (isNoModRef(AA->getModRefInfo(&*BI, AllocaLoc))) + continue; + + if (!isa(BI) && !isa(BI)) { + // If the instruction is readnone, ignore it, otherwise bail out. We + // don't even allow readonly here because we don't want something like: + // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). + if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) + break; + continue; + } + + if (StoreInst *NextStore = dyn_cast(BI)) { + if (!NextStore->isSimple()) + break; + + // Check to see if this store is to a constant offset from the start ptr. + Optional Offset = + isPointerOffset(StartPtr, NextStore->getPointerOperand(), *DL); + if (!Offset) + break; + + if (!IB.addStore(*Offset, NextStore, DL)) + break; + LastInst = NextStore; + } else { + MemSetInst *MSI = cast(BI); + + if (MSI->isVolatile() || !isa(MSI->getLength())) + break; + + if (!isa(MSI->getValue())) + break; + + // Check to see if this store is to a constant offset from the start ptr. + Optional Offset = isPointerOffset(StartPtr, MSI->getDest(), *DL); + if (!Offset) + break; + + if (!IB.addMemSet(*Offset, MSI)) + break; + LastInst = MSI; + } + } + return LastInst; } bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) { @@ -127,8 +405,23 @@ bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) { void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr, uint64_t Size) { + auto SetTagZeroFunc = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag_zero); + auto StgpFunc = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_stgp); + + InitializerBuilder IB(Size, DL, Ptr, SetTagFunc, SetTagZeroFunc, StgpFunc); + bool LittleEndian = + Triple(AI->getModule()->getTargetTriple()).isLittleEndian(); + // Current implementation of initializer merging assumes little endianness. + if (MergeInit && !F->hasOptNone() && LittleEndian) { + LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI + << ", size = " << Size << "\n"); + InsertBefore = collectInitializers(InsertBefore, Ptr, Size, IB); + } + IRBuilder<> IRB(InsertBefore); - IRB.CreateCall(SetTagFunc, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)}); + IB.generate(IRB); } void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore, @@ -166,7 +459,8 @@ Instruction *AArch64StackTagging::insertBaseTaggedPointer( } void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) { - unsigned NewAlignment = std::max(Info.AI->getAlignment(), kTagGranuleSize); + const Align NewAlignment = + max(MaybeAlign(Info.AI->getAlignment()), kTagGranuleSize); Info.AI->setAlignment(NewAlignment); uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8; @@ -179,7 +473,7 @@ void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) { Info.AI->isArrayAllocation() ? ArrayType::get( Info.AI->getAllocatedType(), - dyn_cast(Info.AI->getArraySize())->getZExtValue()) + cast(Info.AI->getArraySize())->getZExtValue()) : Info.AI->getAllocatedType(); Type *PaddingType = ArrayType::get(Type::getInt8Ty(F->getContext()), AlignedSize - Size); @@ -187,7 +481,7 @@ void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) { auto *NewAI = new AllocaInst( TypeWithPadding, Info.AI->getType()->getAddressSpace(), nullptr, "", Info.AI); NewAI->takeName(Info.AI); - NewAI->setAlignment(Info.AI->getAlignment()); + NewAI->setAlignment(MaybeAlign(Info.AI->getAlignment())); NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca()); NewAI->setSwiftError(Info.AI->isSwiftError()); NewAI->copyMetadata(*Info.AI); @@ -198,6 +492,24 @@ void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) { Info.AI = NewAI; } +// Helper function to check for post-dominance. +static bool postDominates(const PostDominatorTree *PDT, const IntrinsicInst *A, + const IntrinsicInst *B) { + const BasicBlock *ABB = A->getParent(); + const BasicBlock *BBB = B->getParent(); + + if (ABB != BBB) + return PDT->dominates(ABB, BBB); + + for (const Instruction &I : *ABB) { + if (&I == B) + return true; + if (&I == A) + return false; + } + llvm_unreachable("Corrupt instruction list"); +} + // FIXME: check for MTE extension bool AArch64StackTagging::runOnFunction(Function &Fn) { if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag)) @@ -205,6 +517,8 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) { F = &Fn; DL = &Fn.getParent()->getDataLayout(); + if (MergeInit) + AA = &getAnalysis().getAAResults(); MapVector Allocas; // need stable iteration order SmallVector RetVec; @@ -270,23 +584,31 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) { if (NumInterestingAllocas == 0) return true; + std::unique_ptr DeleteDT; + DominatorTree *DT = nullptr; + if (auto *P = getAnalysisIfAvailable()) + DT = &P->getDomTree(); + + if (DT == nullptr && (NumInterestingAllocas > 1 || + !F->hasFnAttribute(Attribute::OptimizeNone))) { + DeleteDT = std::make_unique(*F); + DT = DeleteDT.get(); + } + + std::unique_ptr DeletePDT; + PostDominatorTree *PDT = nullptr; + if (auto *P = getAnalysisIfAvailable()) + PDT = &P->getPostDomTree(); + + if (PDT == nullptr && !F->hasFnAttribute(Attribute::OptimizeNone)) { + DeletePDT = std::make_unique(*F); + PDT = DeletePDT.get(); + } + SetTagFunc = Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag); - // Compute DT only if the function has the attribute, there are more than 1 - // interesting allocas, and it is not available for free. - Instruction *Base; - if (NumInterestingAllocas > 1) { - auto *DTWP = getAnalysisIfAvailable(); - if (DTWP) { - Base = insertBaseTaggedPointer(Allocas, &DTWP->getDomTree()); - } else { - DominatorTree DT(*F); - Base = insertBaseTaggedPointer(Allocas, &DT); - } - } else { - Base = insertBaseTaggedPointer(Allocas, nullptr); - } + Instruction *Base = insertBaseTaggedPointer(Allocas, DT); for (auto &I : Allocas) { const AllocaInfo &Info = I.second; @@ -309,11 +631,37 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) { if (UnrecognizedLifetimes.empty() && Info.LifetimeStart.size() == 1 && Info.LifetimeEnd.size() == 1) { IntrinsicInst *Start = Info.LifetimeStart[0]; + IntrinsicInst *End = Info.LifetimeEnd[0]; uint64_t Size = dyn_cast(Start->getArgOperand(0))->getZExtValue(); Size = alignTo(Size, kTagGranuleSize); tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size); - untagAlloca(AI, Info.LifetimeEnd[0], Size); + // We need to ensure that if we tag some object, we certainly untag it + // before the function exits. + if (PDT != nullptr && postDominates(PDT, End, Start)) { + untagAlloca(AI, End, Size); + } else { + SmallVector ReachableRetVec; + unsigned NumCoveredExits = 0; + for (auto &RI : RetVec) { + if (!isPotentiallyReachable(Start, RI, nullptr, DT)) + continue; + ReachableRetVec.push_back(RI); + if (DT != nullptr && DT->dominates(End, RI)) + ++NumCoveredExits; + } + // If there's a mix of covered and non-covered exits, just put the untag + // on exits, so we avoid the redundancy of untagging twice. + if (NumCoveredExits == ReachableRetVec.size()) { + untagAlloca(AI, End, Size); + } else { + for (auto &RI : ReachableRetVec) + untagAlloca(AI, RI, Size); + // We may have inserted untag outside of the lifetime interval. + // Remove the lifetime end call for this alloca. + End->eraseFromParent(); + } + } } else { uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8; Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy()); diff --git a/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp b/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp new file mode 100644 index 00000000000..3cc556f74ae --- /dev/null +++ b/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp @@ -0,0 +1,209 @@ +//===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#include "AArch64.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64InstrInfo.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-stack-tagging-pre-ra" + +enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways }; + +cl::opt ClUncheckedLdSt( + "stack-tagging-unchecked-ld-st", cl::Hidden, + cl::init(UncheckedSafe), + cl::desc( + "Unconditionally apply unchecked-ld-st optimization (even for large " + "stack frames, or in the presence of variable sized allocas)."), + cl::values( + clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"), + clEnumValN( + UncheckedSafe, "safe", + "apply unchecked-ld-st when the target is definitely within range"), + clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st"))); + +namespace { + +class AArch64StackTaggingPreRA : public MachineFunctionPass { + MachineFunction *MF; + AArch64FunctionInfo *AFI; + MachineFrameInfo *MFI; + MachineRegisterInfo *MRI; + const AArch64RegisterInfo *TRI; + const AArch64InstrInfo *TII; + + SmallVector ReTags; + +public: + static char ID; + AArch64StackTaggingPreRA() : MachineFunctionPass(ID) { + initializeAArch64StackTaggingPreRAPass(*PassRegistry::getPassRegistry()); + } + + bool mayUseUncheckedLoadStore(); + void uncheckUsesOf(unsigned TaggedReg, int FI); + void uncheckLoadsAndStores(); + + bool runOnMachineFunction(MachineFunction &Func) override; + StringRef getPassName() const override { + return "AArch64 Stack Tagging PreRA"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} // end anonymous namespace + +char AArch64StackTaggingPreRA::ID = 0; + +INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra", + "AArch64 Stack Tagging PreRA Pass", false, false) +INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra", + "AArch64 Stack Tagging PreRA Pass", false, false) + +FunctionPass *llvm::createAArch64StackTaggingPreRAPass() { + return new AArch64StackTaggingPreRA(); +} + +static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) { + switch (Opcode) { + case AArch64::LDRWui: + case AArch64::LDRSHWui: + case AArch64::LDRXui: + case AArch64::LDRBui: + case AArch64::LDRBBui: + case AArch64::LDRHui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: + case AArch64::STRWui: + case AArch64::STRXui: + case AArch64::STRBui: + case AArch64::STRBBui: + case AArch64::STRHui: + case AArch64::STRSui: + case AArch64::STRDui: + case AArch64::STRQui: + return true; + default: + return false; + } +} + +bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() { + if (ClUncheckedLdSt == UncheckedNever) + return false; + else if (ClUncheckedLdSt == UncheckedAlways) + return true; + + // This estimate can be improved if we had harder guarantees about stack frame + // layout. With LocalStackAllocation we can estimate SP offset to any + // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged + // objects ahead of non-tagged ones, but that's not always desirable. + // + // Underestimating SP offset here may require the use of LDG to materialize + // the tagged address of the stack slot, along with a scratch register + // allocation (post-regalloc!). + // + // For now we do the safe thing here and require that the entire stack frame + // is within range of the shortest of the unchecked instructions. + unsigned FrameSize = 0; + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) + FrameSize += MFI->getObjectSize(i); + bool EntireFrameReachableFromSP = FrameSize < 0xf00; + return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP; +} + +void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) { + for (auto UI = MRI->use_instr_begin(TaggedReg), E = MRI->use_instr_end(); + UI != E;) { + MachineInstr *UseI = &*(UI++); + if (isUncheckedLoadOrStoreOpcode(UseI->getOpcode())) { + // FI operand is always the one before the immediate offset. + unsigned OpIdx = TII->getLoadStoreImmIdx(UseI->getOpcode()) - 1; + if (UseI->getOperand(OpIdx).isReg() && + UseI->getOperand(OpIdx).getReg() == TaggedReg) { + UseI->getOperand(OpIdx).ChangeToFrameIndex(FI); + UseI->getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED); + } + } else if (UseI->isCopy() && + Register::isVirtualRegister(UseI->getOperand(0).getReg())) { + uncheckUsesOf(UseI->getOperand(0).getReg(), FI); + } + } +} + +void AArch64StackTaggingPreRA::uncheckLoadsAndStores() { + for (auto *I : ReTags) { + unsigned TaggedReg = I->getOperand(0).getReg(); + int FI = I->getOperand(1).getIndex(); + uncheckUsesOf(TaggedReg, FI); + } +} + +bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; + MRI = &MF->getRegInfo(); + AFI = MF->getInfo(); + TII = static_cast(MF->getSubtarget().getInstrInfo()); + TRI = static_cast( + MF->getSubtarget().getRegisterInfo()); + MFI = &MF->getFrameInfo(); + ReTags.clear(); + + assert(MRI->isSSA()); + + LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n" + << "********** Function: " << MF->getName() << '\n'); + + SmallSetVector TaggedSlots; + for (auto &BB : *MF) { + for (auto &I : BB) { + if (I.getOpcode() == AArch64::TAGPstack) { + ReTags.push_back(&I); + int FI = I.getOperand(1).getIndex(); + TaggedSlots.insert(FI); + // There should be no offsets in TAGP yet. + assert(I.getOperand(2).getImm() == 0); + } + } + } + + if (ReTags.empty()) + return false; + + if (mayUseUncheckedLoadStore()) + uncheckLoadsAndStores(); + + return true; +} diff --git a/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp index 0e84a00df00..5deb601822b 100644 --- a/lib/Target/AArch64/AArch64StorePairSuppress.cpp +++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp @@ -151,7 +151,7 @@ bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) { int64_t Offset; if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) && BaseOp->isReg()) { - unsigned BaseReg = BaseOp->getReg(); + Register BaseReg = BaseOp->getReg(); if (PrevBaseReg == BaseReg) { // If this block can take STPs, skip ahead to the next block. if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent())) diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index 3bc89b91c3f..558bea368ef 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -71,19 +71,22 @@ void AArch64Subtarget::initializeProperties() { case CortexA35: break; case CortexA53: - PrefFunctionAlignment = 3; + PrefFunctionLogAlignment = 3; break; case CortexA55: break; case CortexA57: MaxInterleaveFactor = 4; - PrefFunctionAlignment = 4; + PrefFunctionLogAlignment = 4; + break; + case CortexA65: + PrefFunctionLogAlignment = 3; break; case CortexA72: case CortexA73: case CortexA75: case CortexA76: - PrefFunctionAlignment = 4; + PrefFunctionLogAlignment = 4; break; case Cyclone: CacheLineSize = 64; @@ -94,14 +97,14 @@ void AArch64Subtarget::initializeProperties() { case ExynosM1: MaxInterleaveFactor = 4; MaxJumpTableSize = 8; - PrefFunctionAlignment = 4; - PrefLoopAlignment = 3; + PrefFunctionLogAlignment = 4; + PrefLoopLogAlignment = 3; break; case ExynosM3: MaxInterleaveFactor = 4; MaxJumpTableSize = 20; - PrefFunctionAlignment = 5; - PrefLoopAlignment = 4; + PrefFunctionLogAlignment = 5; + PrefLoopLogAlignment = 4; break; case Falkor: MaxInterleaveFactor = 4; @@ -122,6 +125,12 @@ void AArch64Subtarget::initializeProperties() { // FIXME: remove this to enable 64-bit SLP if performance looks good. MinVectorRegisterBitWidth = 128; break; + case NeoverseE1: + PrefFunctionLogAlignment = 3; + break; + case NeoverseN1: + PrefFunctionLogAlignment = 4; + break; case Saphira: MaxInterleaveFactor = 4; // FIXME: remove this to enable 64-bit SLP if performance looks good. @@ -129,8 +138,8 @@ void AArch64Subtarget::initializeProperties() { break; case ThunderX2T99: CacheLineSize = 64; - PrefFunctionAlignment = 3; - PrefLoopAlignment = 2; + PrefFunctionLogAlignment = 3; + PrefLoopLogAlignment = 2; MaxInterleaveFactor = 4; PrefetchDistance = 128; MinPrefetchStride = 1024; @@ -143,15 +152,15 @@ void AArch64Subtarget::initializeProperties() { case ThunderXT81: case ThunderXT83: CacheLineSize = 128; - PrefFunctionAlignment = 3; - PrefLoopAlignment = 2; + PrefFunctionLogAlignment = 3; + PrefLoopLogAlignment = 2; // FIXME: remove this to enable 64-bit SLP if performance looks good. MinVectorRegisterBitWidth = 128; break; case TSV110: CacheLineSize = 64; - PrefFunctionAlignment = 4; - PrefLoopAlignment = 2; + PrefFunctionLogAlignment = 4; + PrefLoopLogAlignment = 2; break; } } @@ -187,7 +196,7 @@ const CallLowering *AArch64Subtarget::getCallLowering() const { return CallLoweringInfo.get(); } -const InstructionSelector *AArch64Subtarget::getInstructionSelector() const { +InstructionSelector *AArch64Subtarget::getInstructionSelector() const { return InstSelector.get(); } @@ -201,7 +210,7 @@ const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const { /// Find the target operand flags that describe how a global value should be /// referenced for the current subtarget. -unsigned char +unsigned AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { // MachO large model always goes via a GOT, simply to get a single 8-byte @@ -224,10 +233,17 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, GV->hasExternalWeakLinkage()) return AArch64II::MO_GOT; + // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate + // that their nominal addresses are tagged and outside of the code model. In + // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the + // tag if necessary based on MO_TAGGED. + if (AllowTaggedGlobals && !isa(GV->getValueType())) + return AArch64II::MO_NC | AArch64II::MO_TAGGED; + return AArch64II::MO_NO_FLAG; } -unsigned char AArch64Subtarget::classifyGlobalFunctionReference( +unsigned AArch64Subtarget::classifyGlobalFunctionReference( const GlobalValue *GV, const TargetMachine &TM) const { // MachO large model always goes via a GOT, because we don't have the // relocations available to do anything else.. @@ -275,7 +291,7 @@ bool AArch64Subtarget::supportsAddressTopByteIgnored() const { std::unique_ptr AArch64Subtarget::getCustomPBQPConstraints() const { - return balanceFPOps() ? llvm::make_unique() : nullptr; + return balanceFPOps() ? std::make_unique() : nullptr; } void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const { diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index 0c84cfb8329..f3212fae8e5 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -42,6 +42,7 @@ public: CortexA53, CortexA55, CortexA57, + CortexA65, CortexA72, CortexA73, CortexA75, @@ -51,6 +52,8 @@ public: ExynosM3, Falkor, Kryo, + NeoverseE1, + NeoverseN1, Saphira, ThunderX2T99, ThunderX, @@ -113,6 +116,7 @@ protected: bool HasTRACEV8_4 = false; bool HasAM = false; bool HasSEL2 = false; + bool HasPMU = false; bool HasTLB_RMI = false; bool HasFMI = false; bool HasRCPC_IMMO = false; @@ -134,6 +138,7 @@ protected: bool HasBTI = false; bool HasRandGen = false; bool HasMTE = false; + bool HasTME = false; // Arm SVE2 extensions bool HasSVE2AES = false; @@ -141,6 +146,10 @@ protected: bool HasSVE2SHA3 = false; bool HasSVE2BitPerm = false; + // Future architecture extensions. + bool HasETE = false; + bool HasTRBE = false; + // HasZeroCycleRegMove - Has zero-cycle register mov instructions. bool HasZeroCycleRegMove = false; @@ -183,14 +192,15 @@ protected: bool UseEL1ForTP = false; bool UseEL2ForTP = false; bool UseEL3ForTP = false; + bool AllowTaggedGlobals = false; uint8_t MaxInterleaveFactor = 2; uint8_t VectorInsertExtractBaseCost = 3; uint16_t CacheLineSize = 0; uint16_t PrefetchDistance = 0; uint16_t MinPrefetchStride = 1; unsigned MaxPrefetchIterationsAhead = UINT_MAX; - unsigned PrefFunctionAlignment = 0; - unsigned PrefLoopAlignment = 0; + unsigned PrefFunctionLogAlignment = 0; + unsigned PrefLoopLogAlignment = 0; unsigned MaxJumpTableSize = 0; unsigned WideningBaseCost = 0; @@ -247,7 +257,7 @@ public: return &getInstrInfo()->getRegisterInfo(); } const CallLowering *getCallLowering() const override; - const InstructionSelector *getInstructionSelector() const override; + InstructionSelector *getInstructionSelector() const override; const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; const Triple &getTargetTriple() const { return TargetTriple; } @@ -344,14 +354,16 @@ public: unsigned getVectorInsertExtractBaseCost() const { return VectorInsertExtractBaseCost; } - unsigned getCacheLineSize() const { return CacheLineSize; } - unsigned getPrefetchDistance() const { return PrefetchDistance; } - unsigned getMinPrefetchStride() const { return MinPrefetchStride; } - unsigned getMaxPrefetchIterationsAhead() const { + unsigned getCacheLineSize() const override { return CacheLineSize; } + unsigned getPrefetchDistance() const override { return PrefetchDistance; } + unsigned getMinPrefetchStride() const override { return MinPrefetchStride; } + unsigned getMaxPrefetchIterationsAhead() const override { return MaxPrefetchIterationsAhead; } - unsigned getPrefFunctionAlignment() const { return PrefFunctionAlignment; } - unsigned getPrefLoopAlignment() const { return PrefLoopAlignment; } + unsigned getPrefFunctionLogAlignment() const { + return PrefFunctionLogAlignment; + } + unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; } unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; } @@ -380,6 +392,7 @@ public: bool hasBTI() const { return HasBTI; } bool hasRandGen() const { return HasRandGen; } bool hasMTE() const { return HasMTE; } + bool hasTME() const { return HasTME; } // Arm SVE2 extensions bool hasSVE2AES() const { return HasSVE2AES; } bool hasSVE2SM4() const { return HasSVE2SM4; } @@ -399,6 +412,8 @@ public: bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } + bool isTargetILP32() const { return TargetTriple.isArch32Bit(); } + bool useAA() const override { return UseAA; } bool hasVH() const { return HasVH; } @@ -421,10 +436,17 @@ public: bool hasTRACEV8_4() const { return HasTRACEV8_4; } bool hasAM() const { return HasAM; } bool hasSEL2() const { return HasSEL2; } + bool hasPMU() const { return HasPMU; } bool hasTLB_RMI() const { return HasTLB_RMI; } bool hasFMI() const { return HasFMI; } bool hasRCPC_IMMO() const { return HasRCPC_IMMO; } + bool addrSinkUsingGEPs() const override { + // Keeping GEPs inbounds is important for exploiting AArch64 + // addressing-modes in ILP32 mode. + return useAA() || isTargetILP32(); + } + bool useSmallAddressing() const { switch (TLInfo.getTargetMachine().getCodeModel()) { case CodeModel::Kernel: @@ -443,11 +465,11 @@ public: /// ClassifyGlobalReference - Find the target operand flags that describe /// how a global value should be referenced for the current subtarget. - unsigned char ClassifyGlobalReference(const GlobalValue *GV, - const TargetMachine &TM) const; + unsigned ClassifyGlobalReference(const GlobalValue *GV, + const TargetMachine &TM) const; - unsigned char classifyGlobalFunctionReference(const GlobalValue *GV, - const TargetMachine &TM) const; + unsigned classifyGlobalFunctionReference(const GlobalValue *GV, + const TargetMachine &TM) const; void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override; diff --git a/lib/Target/AArch64/AArch64SystemOperands.td b/lib/Target/AArch64/AArch64SystemOperands.td index 536a6591478..05249a4ea6a 100644 --- a/lib/Target/AArch64/AArch64SystemOperands.td +++ b/lib/Target/AArch64/AArch64SystemOperands.td @@ -612,6 +612,7 @@ def : ROSysReg<"ISR_EL1", 0b11, 0b000, 0b1100, 0b0001, 0b000>; def : ROSysReg<"CNTPCT_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b001>; def : ROSysReg<"CNTVCT_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b010>; def : ROSysReg<"ID_MMFR4_EL1", 0b11, 0b000, 0b0000, 0b0010, 0b110>; +def : ROSysReg<"ID_MMFR5_EL1", 0b11, 0b000, 0b0000, 0b0011, 0b110>; // Trace registers // Op0 Op1 CRn CRm Op2 @@ -1321,6 +1322,12 @@ def : RWSysReg<"CNTHPS_CTL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b001>; def : RWSysReg<"SDER32_EL2", 0b11, 0b100, 0b0001, 0b0011, 0b001>; } // FeatureSEL2 +// v8.4a PMU registers +// Op0 Op1 CRn CRm Op2 +let Requires = [{ {AArch64::FeaturePMU} }] in { +def : RWSysReg<"PMMIR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b110>; +} // FeaturePMU + // v8.4a RAS registers // Op0 Op1 CRn CRm Op2 let Requires = [{ {AArch64::FeatureRASv8_4} }] in { @@ -1452,14 +1459,37 @@ let Requires = [{ {AArch64::FeatureMTE} }] in { def : RWSysReg<"TCO", 0b11, 0b011, 0b0100, 0b0010, 0b111>; def : RWSysReg<"GCR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b110>; def : RWSysReg<"RGSR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b101>; -def : RWSysReg<"TFSR_EL1", 0b11, 0b000, 0b0110, 0b0101, 0b000>; -def : RWSysReg<"TFSR_EL2", 0b11, 0b100, 0b0110, 0b0101, 0b000>; -def : RWSysReg<"TFSR_EL3", 0b11, 0b110, 0b0110, 0b0110, 0b000>; -def : RWSysReg<"TFSR_EL12", 0b11, 0b101, 0b0110, 0b0110, 0b000>; -def : RWSysReg<"TFSRE0_EL1", 0b11, 0b000, 0b0110, 0b0110, 0b001>; +def : RWSysReg<"TFSR_EL1", 0b11, 0b000, 0b0101, 0b0110, 0b000>; +def : RWSysReg<"TFSR_EL2", 0b11, 0b100, 0b0101, 0b0110, 0b000>; +def : RWSysReg<"TFSR_EL3", 0b11, 0b110, 0b0101, 0b0110, 0b000>; +def : RWSysReg<"TFSR_EL12", 0b11, 0b101, 0b0101, 0b0110, 0b000>; +def : RWSysReg<"TFSRE0_EL1", 0b11, 0b000, 0b0101, 0b0110, 0b001>; def : ROSysReg<"GMID_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b100>; } // HasMTE +// Embedded Trace Extension R/W System registers +let Requires = [{ {AArch64::FeatureETE} }] in { +// Name Op0 Op1 CRn CRm Op2 +def : RWSysReg<"TRCRSR", 0b10, 0b001, 0b0000, 0b1010, 0b000>; +// TRCEXTINSELR0 has the same encoding as ETM TRCEXTINSELR +def : RWSysReg<"TRCEXTINSELR0", 0b10, 0b001, 0b0000, 0b1000, 0b100>; +def : RWSysReg<"TRCEXTINSELR1", 0b10, 0b001, 0b0000, 0b1001, 0b100>; +def : RWSysReg<"TRCEXTINSELR2", 0b10, 0b001, 0b0000, 0b1010, 0b100>; +def : RWSysReg<"TRCEXTINSELR3", 0b10, 0b001, 0b0000, 0b1011, 0b100>; +} // FeatureETE + +// Trace Buffer Extension System registers +let Requires = [{ {AArch64::FeatureTRBE} }] in { +// Name Op0 Op1 CRn CRm Op2 +def : RWSysReg<"TRBLIMITR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b000>; +def : RWSysReg<"TRBPTR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b001>; +def : RWSysReg<"TRBBASER_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b010>; +def : RWSysReg<"TRBSR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b011>; +def : RWSysReg<"TRBMAR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b100>; +def : RWSysReg<"TRBTRG_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b110>; +def : ROSysReg<"TRBIDR_EL1", 0b11, 0b000, 0b1001, 0b1011, 0b111>; +} // FeatureTRBE + // Cyclone specific system registers // Op0 Op1 CRn CRm Op2 let Requires = [{ {AArch64::ProcCyclone} }] in diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 86546148049..b3ed96e815b 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -157,6 +157,8 @@ extern "C" void LLVMInitializeAArch64Target() { RegisterTargetMachine X(getTheAArch64leTarget()); RegisterTargetMachine Y(getTheAArch64beTarget()); RegisterTargetMachine Z(getTheARM64Target()); + RegisterTargetMachine W(getTheARM64_32Target()); + RegisterTargetMachine V(getTheAArch64_32Target()); auto PR = PassRegistry::getPassRegistry(); initializeGlobalISel(*PR); initializeAArch64A53Fix835769Pass(*PR); @@ -180,6 +182,7 @@ extern "C" void LLVMInitializeAArch64Target() { initializeLDTLSCleanupPass(*PR); initializeAArch64SpeculationHardeningPass(*PR); initializeAArch64StackTaggingPass(*PR); + initializeAArch64StackTaggingPreRAPass(*PR); } //===----------------------------------------------------------------------===// @@ -187,11 +190,11 @@ extern "C" void LLVMInitializeAArch64Target() { //===----------------------------------------------------------------------===// static std::unique_ptr createTLOF(const Triple &TT) { if (TT.isOSBinFormatMachO()) - return llvm::make_unique(); + return std::make_unique(); if (TT.isOSBinFormatCOFF()) - return llvm::make_unique(); + return std::make_unique(); - return llvm::make_unique(); + return std::make_unique(); } // Helper function to build a DataLayout string @@ -200,8 +203,11 @@ static std::string computeDataLayout(const Triple &TT, bool LittleEndian) { if (Options.getABIName() == "ilp32") return "e-m:e-p:32:32-i8:8-i16:16-i64:64-S128"; - if (TT.isOSBinFormatMachO()) + if (TT.isOSBinFormatMachO()) { + if (TT.getArch() == Triple::aarch64_32) + return "e-m:o-p:32:32-i64:64-i128:128-n32:64-S128"; return "e-m:o-i64:64-i128:128-n32:64-S128"; + } if (TT.isOSBinFormatCOFF()) return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"; if (LittleEndian) @@ -277,8 +283,11 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT, this->Options.TrapUnreachable = true; } - // Enable GlobalISel at or below EnableGlobalISelAt0. - if (getOptLevel() <= EnableGlobalISelAtO) { + // Enable GlobalISel at or below EnableGlobalISelAt0, unless this is + // MachO/CodeModel::Large, which GlobalISel does not support. + if (getOptLevel() <= EnableGlobalISelAtO && + TT.getArch() != Triple::aarch64_32 && + !(getCodeModel() == CodeModel::Large && TT.isOSBinFormatMachO())) { setGlobalISel(true); setGlobalISelAbort(GlobalISelAbortMode::Disable); } @@ -310,7 +319,7 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique(TargetTriple, CPU, FS, *this, + I = std::make_unique(TargetTriple, CPU, FS, *this, isLittle); } return I.get(); @@ -448,7 +457,8 @@ void AArch64PassConfig::addIRPasses() { addPass(createLICMPass()); } - addPass(createAArch64StackTaggingPass()); + addPass(createAArch64StackTaggingPass(/* MergeInit = */ TM->getOptLevel() != + CodeGenOpt::None)); } // Pass Pipeline Configuration @@ -502,7 +512,8 @@ bool AArch64PassConfig::addIRTranslator() { } void AArch64PassConfig::addPreLegalizeMachineIR() { - addPass(createAArch64PreLegalizeCombiner()); + bool IsOptNone = getOptLevel() == CodeGenOpt::None; + addPass(createAArch64PreLegalizeCombiner(IsOptNone)); } bool AArch64PassConfig::addLegalizeMachineIR() { @@ -516,9 +527,7 @@ bool AArch64PassConfig::addRegBankSelect() { } void AArch64PassConfig::addPreGlobalInstructionSelect() { - // Workaround the deficiency of the fast register allocator. - if (TM->getOptLevel() == CodeGenOpt::None) - addPass(new Localizer()); + addPass(new Localizer()); } bool AArch64PassConfig::addGlobalInstructionSelect() { @@ -540,6 +549,8 @@ bool AArch64PassConfig::addILPOpts() { if (EnableStPairSuppress) addPass(createAArch64StorePairSuppressPass()); addPass(createAArch64SIMDInstrOptPass()); + if (TM->getOptLevel() != CodeGenOpt::None) + addPass(createAArch64StackTaggingPreRAPass()); return true; } diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp index 1c3d5d0743a..54562094fcf 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -59,8 +59,8 @@ MCSymbol *AArch64_MachoTargetObjectFile::getCFIPersonalitySymbol( } const MCExpr *AArch64_MachoTargetObjectFile::getIndirectSymViaGOTPCRel( - const MCSymbol *Sym, const MCValue &MV, int64_t Offset, - MachineModuleInfo *MMI, MCStreamer &Streamer) const { + const GlobalValue *GV, const MCSymbol *Sym, const MCValue &MV, + int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const { assert((Offset+MV.getConstant() == 0) && "Arch64 does not support GOT PC rel with extra offset"); // On ARM64 Darwin, we can reference symbols with foo@GOT-., which diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h index 7ead363d42f..1cb4c028c80 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.h +++ b/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -35,7 +35,8 @@ public: const TargetMachine &TM, MachineModuleInfo *MMI) const override; - const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + const MCExpr *getIndirectSymViaGOTPCRel(const GlobalValue *GV, + const MCSymbol *Sym, const MCValue &MV, int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const override; diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index a4b78f2a7d6..dc916a7b340 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -618,6 +618,19 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } +AArch64TTIImpl::TTI::MemCmpExpansionOptions +AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { + TTI::MemCmpExpansionOptions Options; + Options.AllowOverlappingLoads = !ST->requiresStrictAlign(); + Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); + Options.NumLoadsPerBlock = Options.MaxNumLoads; + // TODO: Though vector loads usually perform well on AArch64, in some targets + // they may wake up the FP unit, which raises the power consumption. Perhaps + // they could be used with no holds barred (-O3). + Options.LoadSizes = {8, 4, 2, 1}; + return Options; +} + int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, unsigned Alignment, unsigned AddressSpace, const Instruction *I) { @@ -879,22 +892,6 @@ bool AArch64TTIImpl::shouldConsiderAddressTypePromotion( return Considerable; } -unsigned AArch64TTIImpl::getCacheLineSize() { - return ST->getCacheLineSize(); -} - -unsigned AArch64TTIImpl::getPrefetchDistance() { - return ST->getPrefetchDistance(); -} - -unsigned AArch64TTIImpl::getMinPrefetchStride() { - return ST->getMinPrefetchStride(); -} - -unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() { - return ST->getMaxPrefetchIterationsAhead(); -} - bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const { assert(isa(Ty) && "Expected Ty to be a vector type"); diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h index 10c15a139b4..32c59f41e1c 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -85,7 +85,8 @@ public: bool enableInterleavedAccessVectorization() { return true; } - unsigned getNumberOfRegisters(bool Vector) { + unsigned getNumberOfRegisters(unsigned ClassID) const { + bool Vector = (ClassID == 1); if (Vector) { if (ST->hasNEON()) return 32; @@ -130,6 +131,9 @@ public: int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I = nullptr); + TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, + bool IsZeroCmp) const; + int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I = nullptr); @@ -153,14 +157,6 @@ public: shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader); - unsigned getCacheLineSize(); - - unsigned getPrefetchDistance(); - - unsigned getMinPrefetchStride(); - - unsigned getMaxPrefetchIterationsAhead(); - bool shouldExpandReduction(const IntrinsicInst *II) const { return false; } diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index f4c55d48d21..4fb409f020d 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -935,48 +935,34 @@ public: return false; } - bool isMovZSymbolG3() const { - return isMovWSymbol(AArch64MCExpr::VK_ABS_G3); + bool isMovWSymbolG3() const { + return isMovWSymbol({AArch64MCExpr::VK_ABS_G3, AArch64MCExpr::VK_PREL_G3}); } - bool isMovZSymbolG2() const { - return isMovWSymbol({AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S, - AArch64MCExpr::VK_TPREL_G2, - AArch64MCExpr::VK_DTPREL_G2}); - } - - bool isMovZSymbolG1() const { - return isMovWSymbol({ - AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S, - AArch64MCExpr::VK_GOTTPREL_G1, AArch64MCExpr::VK_TPREL_G1, - AArch64MCExpr::VK_DTPREL_G1, - }); - } - - bool isMovZSymbolG0() const { - return isMovWSymbol({AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S, - AArch64MCExpr::VK_TPREL_G0, - AArch64MCExpr::VK_DTPREL_G0}); - } - - bool isMovKSymbolG3() const { - return isMovWSymbol(AArch64MCExpr::VK_ABS_G3); - } - - bool isMovKSymbolG2() const { - return isMovWSymbol(AArch64MCExpr::VK_ABS_G2_NC); - } - - bool isMovKSymbolG1() const { - return isMovWSymbol({AArch64MCExpr::VK_ABS_G1_NC, - AArch64MCExpr::VK_TPREL_G1_NC, - AArch64MCExpr::VK_DTPREL_G1_NC}); - } - - bool isMovKSymbolG0() const { + bool isMovWSymbolG2() const { return isMovWSymbol( - {AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC, - AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC}); + {AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S, + AArch64MCExpr::VK_ABS_G2_NC, AArch64MCExpr::VK_PREL_G2, + AArch64MCExpr::VK_PREL_G2_NC, AArch64MCExpr::VK_TPREL_G2, + AArch64MCExpr::VK_DTPREL_G2}); + } + + bool isMovWSymbolG1() const { + return isMovWSymbol( + {AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S, + AArch64MCExpr::VK_ABS_G1_NC, AArch64MCExpr::VK_PREL_G1, + AArch64MCExpr::VK_PREL_G1_NC, AArch64MCExpr::VK_GOTTPREL_G1, + AArch64MCExpr::VK_TPREL_G1, AArch64MCExpr::VK_TPREL_G1_NC, + AArch64MCExpr::VK_DTPREL_G1, AArch64MCExpr::VK_DTPREL_G1_NC}); + } + + bool isMovWSymbolG0() const { + return isMovWSymbol( + {AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S, + AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_PREL_G0, + AArch64MCExpr::VK_PREL_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC, + AArch64MCExpr::VK_TPREL_G0, AArch64MCExpr::VK_TPREL_G0_NC, + AArch64MCExpr::VK_DTPREL_G0, AArch64MCExpr::VK_DTPREL_G0_NC}); } template @@ -1814,7 +1800,7 @@ public: static std::unique_ptr CreateToken(StringRef Str, bool IsSuffix, SMLoc S, MCContext &Ctx) { - auto Op = make_unique(k_Token, Ctx); + auto Op = std::make_unique(k_Token, Ctx); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->Tok.IsSuffix = IsSuffix; @@ -1829,7 +1815,7 @@ public: AArch64_AM::ShiftExtendType ExtTy = AArch64_AM::LSL, unsigned ShiftAmount = 0, unsigned HasExplicitAmount = false) { - auto Op = make_unique(k_Register, Ctx); + auto Op = std::make_unique(k_Register, Ctx); Op->Reg.RegNum = RegNum; Op->Reg.Kind = Kind; Op->Reg.ElementWidth = 0; @@ -1861,7 +1847,7 @@ public: CreateVectorList(unsigned RegNum, unsigned Count, unsigned NumElements, unsigned ElementWidth, RegKind RegisterKind, SMLoc S, SMLoc E, MCContext &Ctx) { - auto Op = make_unique(k_VectorList, Ctx); + auto Op = std::make_unique(k_VectorList, Ctx); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.NumElements = NumElements; @@ -1874,7 +1860,7 @@ public: static std::unique_ptr CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, MCContext &Ctx) { - auto Op = make_unique(k_VectorIndex, Ctx); + auto Op = std::make_unique(k_VectorIndex, Ctx); Op->VectorIndex.Val = Idx; Op->StartLoc = S; Op->EndLoc = E; @@ -1883,7 +1869,7 @@ public: static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, MCContext &Ctx) { - auto Op = make_unique(k_Immediate, Ctx); + auto Op = std::make_unique(k_Immediate, Ctx); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -1894,7 +1880,7 @@ public: unsigned ShiftAmount, SMLoc S, SMLoc E, MCContext &Ctx) { - auto Op = make_unique(k_ShiftedImm, Ctx); + auto Op = std::make_unique(k_ShiftedImm, Ctx); Op->ShiftedImm .Val = Val; Op->ShiftedImm.ShiftAmount = ShiftAmount; Op->StartLoc = S; @@ -1904,7 +1890,7 @@ public: static std::unique_ptr CreateCondCode(AArch64CC::CondCode Code, SMLoc S, SMLoc E, MCContext &Ctx) { - auto Op = make_unique(k_CondCode, Ctx); + auto Op = std::make_unique(k_CondCode, Ctx); Op->CondCode.Code = Code; Op->StartLoc = S; Op->EndLoc = E; @@ -1913,7 +1899,7 @@ public: static std::unique_ptr CreateFPImm(APFloat Val, bool IsExact, SMLoc S, MCContext &Ctx) { - auto Op = make_unique(k_FPImm, Ctx); + auto Op = std::make_unique(k_FPImm, Ctx); Op->FPImm.Val = Val.bitcastToAPInt().getSExtValue(); Op->FPImm.IsExact = IsExact; Op->StartLoc = S; @@ -1925,7 +1911,7 @@ public: StringRef Str, SMLoc S, MCContext &Ctx) { - auto Op = make_unique(k_Barrier, Ctx); + auto Op = std::make_unique(k_Barrier, Ctx); Op->Barrier.Val = Val; Op->Barrier.Data = Str.data(); Op->Barrier.Length = Str.size(); @@ -1939,7 +1925,7 @@ public: uint32_t MSRReg, uint32_t PStateField, MCContext &Ctx) { - auto Op = make_unique(k_SysReg, Ctx); + auto Op = std::make_unique(k_SysReg, Ctx); Op->SysReg.Data = Str.data(); Op->SysReg.Length = Str.size(); Op->SysReg.MRSReg = MRSReg; @@ -1952,7 +1938,7 @@ public: static std::unique_ptr CreateSysCR(unsigned Val, SMLoc S, SMLoc E, MCContext &Ctx) { - auto Op = make_unique(k_SysCR, Ctx); + auto Op = std::make_unique(k_SysCR, Ctx); Op->SysCRImm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -1963,7 +1949,7 @@ public: StringRef Str, SMLoc S, MCContext &Ctx) { - auto Op = make_unique(k_Prefetch, Ctx); + auto Op = std::make_unique(k_Prefetch, Ctx); Op->Prefetch.Val = Val; Op->Barrier.Data = Str.data(); Op->Barrier.Length = Str.size(); @@ -1976,7 +1962,7 @@ public: StringRef Str, SMLoc S, MCContext &Ctx) { - auto Op = make_unique(k_PSBHint, Ctx); + auto Op = std::make_unique(k_PSBHint, Ctx); Op->PSBHint.Val = Val; Op->PSBHint.Data = Str.data(); Op->PSBHint.Length = Str.size(); @@ -1989,7 +1975,7 @@ public: StringRef Str, SMLoc S, MCContext &Ctx) { - auto Op = make_unique(k_BTIHint, Ctx); + auto Op = std::make_unique(k_BTIHint, Ctx); Op->BTIHint.Val = Val << 1 | 32; Op->BTIHint.Data = Str.data(); Op->BTIHint.Length = Str.size(); @@ -2001,7 +1987,7 @@ public: static std::unique_ptr CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val, bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) { - auto Op = make_unique(k_ShiftExtend, Ctx); + auto Op = std::make_unique(k_ShiftExtend, Ctx); Op->ShiftExtend.Type = ShOp; Op->ShiftExtend.Amount = Val; Op->ShiftExtend.HasExplicitAmount = HasExplicitAmount; @@ -2840,7 +2826,7 @@ static const struct Extension { {"sve2-aes", {AArch64::FeatureSVE2AES}}, {"sve2-sm4", {AArch64::FeatureSVE2SM4}}, {"sve2-sha3", {AArch64::FeatureSVE2SHA3}}, - {"bitperm", {AArch64::FeatureSVE2BitPerm}}, + {"sve2-bitperm", {AArch64::FeatureSVE2BitPerm}}, // FIXME: Unsupported extensions {"pan", {}}, {"lor", {}}, @@ -3260,6 +3246,13 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) { .Case("abs_g0", AArch64MCExpr::VK_ABS_G0) .Case("abs_g0_s", AArch64MCExpr::VK_ABS_G0_S) .Case("abs_g0_nc", AArch64MCExpr::VK_ABS_G0_NC) + .Case("prel_g3", AArch64MCExpr::VK_PREL_G3) + .Case("prel_g2", AArch64MCExpr::VK_PREL_G2) + .Case("prel_g2_nc", AArch64MCExpr::VK_PREL_G2_NC) + .Case("prel_g1", AArch64MCExpr::VK_PREL_G1) + .Case("prel_g1_nc", AArch64MCExpr::VK_PREL_G1_NC) + .Case("prel_g0", AArch64MCExpr::VK_PREL_G0) + .Case("prel_g0_nc", AArch64MCExpr::VK_PREL_G0_NC) .Case("dtprel_g2", AArch64MCExpr::VK_DTPREL_G2) .Case("dtprel_g1", AArch64MCExpr::VK_DTPREL_G1) .Case("dtprel_g1_nc", AArch64MCExpr::VK_DTPREL_G1_NC) @@ -5283,7 +5276,7 @@ bool AArch64AsmParser::parseDirectiveInst(SMLoc Loc) { auto parseOp = [&]() -> bool { SMLoc L = getLoc(); - const MCExpr *Expr; + const MCExpr *Expr = nullptr; if (check(getParser().parseExpression(Expr), L, "expected expression")) return true; const MCConstantExpr *Value = dyn_cast_or_null(Expr); @@ -5542,43 +5535,43 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, switch (Kind) { default: return Match_InvalidOperand; - case MCK__35_0: + case MCK__HASH_0: ExpectedVal = 0; break; - case MCK__35_1: + case MCK__HASH_1: ExpectedVal = 1; break; - case MCK__35_12: + case MCK__HASH_12: ExpectedVal = 12; break; - case MCK__35_16: + case MCK__HASH_16: ExpectedVal = 16; break; - case MCK__35_2: + case MCK__HASH_2: ExpectedVal = 2; break; - case MCK__35_24: + case MCK__HASH_24: ExpectedVal = 24; break; - case MCK__35_3: + case MCK__HASH_3: ExpectedVal = 3; break; - case MCK__35_32: + case MCK__HASH_32: ExpectedVal = 32; break; - case MCK__35_4: + case MCK__HASH_4: ExpectedVal = 4; break; - case MCK__35_48: + case MCK__HASH_48: ExpectedVal = 48; break; - case MCK__35_6: + case MCK__HASH_6: ExpectedVal = 6; break; - case MCK__35_64: + case MCK__HASH_64: ExpectedVal = 64; break; - case MCK__35_8: + case MCK__HASH_8: ExpectedVal = 8; break; } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 6418211a4f5..21ce5785ea5 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -153,9 +153,8 @@ static unsigned AdrImmBits(unsigned Value) { static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target, uint64_t Value, MCContext &Ctx, const Triple &TheTriple, bool IsResolved) { - unsigned Kind = Fixup.getKind(); int64_t SignedValue = static_cast(Value); - switch (Kind) { + switch (Fixup.getTargetKind()) { default: llvm_unreachable("Unknown fixup kind!"); case AArch64::fixup_aarch64_pcrel_adr_imm21: @@ -574,7 +573,7 @@ public: case MCCFIInstruction::OpDefCfa: { // Defines a frame pointer. unsigned XReg = - getXRegFromWReg(MRI.getLLVMRegNum(Inst.getRegister(), true)); + getXRegFromWReg(*MRI.getLLVMRegNum(Inst.getRegister(), true)); // Other CFA registers than FP are not supported by compact unwind. // Fallback on DWARF. @@ -593,8 +592,8 @@ public: assert(FPPush.getOperation() == MCCFIInstruction::OpOffset && "Frame pointer not pushed!"); - unsigned LRReg = MRI.getLLVMRegNum(LRPush.getRegister(), true); - unsigned FPReg = MRI.getLLVMRegNum(FPPush.getRegister(), true); + unsigned LRReg = *MRI.getLLVMRegNum(LRPush.getRegister(), true); + unsigned FPReg = *MRI.getLLVMRegNum(FPPush.getRegister(), true); LRReg = getXRegFromWReg(LRReg); FPReg = getXRegFromWReg(FPReg); @@ -615,14 +614,14 @@ public: case MCCFIInstruction::OpOffset: { // Registers are saved in pairs. We expect there to be two consecutive // `.cfi_offset' instructions with the appropriate registers specified. - unsigned Reg1 = MRI.getLLVMRegNum(Inst.getRegister(), true); + unsigned Reg1 = *MRI.getLLVMRegNum(Inst.getRegister(), true); if (i + 1 == e) return CU::UNWIND_ARM64_MODE_DWARF; const MCCFIInstruction &Inst2 = Instrs[++i]; if (Inst2.getOperation() != MCCFIInstruction::OpOffset) return CU::UNWIND_ARM64_MODE_DWARF; - unsigned Reg2 = MRI.getLLVMRegNum(Inst2.getRegister(), true); + unsigned Reg2 = *MRI.getLLVMRegNum(Inst2.getRegister(), true); // N.B. The encodings must be in register number order, and the X // registers before the D registers. diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index c871e2c62ea..0fd1ca187be 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -57,7 +57,7 @@ AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI, bool IsILP32) static bool isNonILP32reloc(const MCFixup &Fixup, AArch64MCExpr::VariantKind RefKind, MCContext &Ctx) { - if ((unsigned)Fixup.getKind() != AArch64::fixup_aarch64_movw) + if (Fixup.getTargetKind() != AArch64::fixup_aarch64_movw) return false; switch (RefKind) { case AArch64MCExpr::VK_ABS_G3: @@ -120,7 +120,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, "Should only be expression-level modifiers here"); if (IsPCRel) { - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { case FK_Data_1: Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported"); return ELF::R_AARCH64_NONE; @@ -184,7 +184,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, } else { if (IsILP32 && isNonILP32reloc(Fixup, RefKind, Ctx)) return ELF::R_AARCH64_NONE; - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { case FK_NONE: return ELF::R_AARCH64_NONE; case FK_Data_1: @@ -394,6 +394,20 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return R_CLS(MOVW_SABS_G0); if (RefKind == AArch64MCExpr::VK_ABS_G0_NC) return R_CLS(MOVW_UABS_G0_NC); + if (RefKind == AArch64MCExpr::VK_PREL_G3) + return ELF::R_AARCH64_MOVW_PREL_G3; + if (RefKind == AArch64MCExpr::VK_PREL_G2) + return ELF::R_AARCH64_MOVW_PREL_G2; + if (RefKind == AArch64MCExpr::VK_PREL_G2_NC) + return ELF::R_AARCH64_MOVW_PREL_G2_NC; + if (RefKind == AArch64MCExpr::VK_PREL_G1) + return R_CLS(MOVW_PREL_G1); + if (RefKind == AArch64MCExpr::VK_PREL_G1_NC) + return ELF::R_AARCH64_MOVW_PREL_G1_NC; + if (RefKind == AArch64MCExpr::VK_PREL_G0) + return R_CLS(MOVW_PREL_G0); + if (RefKind == AArch64MCExpr::VK_PREL_G0_NC) + return R_CLS(MOVW_PREL_G0_NC); if (RefKind == AArch64MCExpr::VK_DTPREL_G2) return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2; if (RefKind == AArch64MCExpr::VK_DTPREL_G1) @@ -434,5 +448,5 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, std::unique_ptr llvm::createAArch64ELFObjectWriter(uint8_t OSABI, bool IsILP32) { - return llvm::make_unique(OSABI, IsILP32); + return std::make_unique(OSABI, IsILP32); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp index d0a544273b8..1a16468484a 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -172,7 +172,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, int ImmS = MI->getOperand(4).getImm(); if ((Op2.getReg() == AArch64::WZR || Op2.getReg() == AArch64::XZR) && - (ImmR == 0 || ImmS < ImmR)) { + (ImmR == 0 || ImmS < ImmR) && + STI.getFeatureBits()[AArch64::HasV8_2aOps]) { // BFC takes precedence over its entire range, sligtly differently to BFI. int BitWidth = Opcode == AArch64::BFMXri ? 64 : 32; int LSB = (BitWidth - ImmR) % BitWidth; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index ecff1ab0a8b..5926a4f8161 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -30,7 +30,7 @@ static cl::opt AsmWriterVariant( cl::values(clEnumValN(Generic, "generic", "Emit generic NEON assembly"), clEnumValN(Apple, "apple", "Emit Apple-style NEON assembly"))); -AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() { +AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin(bool IsILP32) { // We prefer NEON instructions to be printed in the short, Apple-specific // form when targeting Darwin. AssemblerDialect = AsmWriterVariant == Default ? Apple : AsmWriterVariant; @@ -39,7 +39,8 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() { PrivateLabelPrefix = "L"; SeparatorString = "%%"; CommentString = ";"; - CodePointerSize = CalleeSaveStackSlotSize = 8; + CalleeSaveStackSlotSize = 8; + CodePointerSize = IsILP32 ? 4 : 8; AlignmentIsInBytes = false; UsesELFSectionDirectiveForBSS = true; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h index 36ae92afc8c..7274ae79f74 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -23,7 +23,7 @@ class Target; class Triple; struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin { - explicit AArch64MCAsmInfoDarwin(); + explicit AArch64MCAsmInfoDarwin(bool IsILP32); const MCExpr * getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const override; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp index 0a529321edc..548e399e05a 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -42,6 +42,13 @@ StringRef AArch64MCExpr::getVariantKindName() const { case VK_ABS_G0: return ":abs_g0:"; case VK_ABS_G0_S: return ":abs_g0_s:"; case VK_ABS_G0_NC: return ":abs_g0_nc:"; + case VK_PREL_G3: return ":prel_g3:"; + case VK_PREL_G2: return ":prel_g2:"; + case VK_PREL_G2_NC: return ":prel_g2_nc:"; + case VK_PREL_G1: return ":prel_g1:"; + case VK_PREL_G1_NC: return ":prel_g1_nc:"; + case VK_PREL_G0: return ":prel_g0:"; + case VK_PREL_G0_NC: return ":prel_g0_nc:"; case VK_DTPREL_G2: return ":dtprel_g2:"; case VK_DTPREL_G1: return ":dtprel_g1:"; case VK_DTPREL_G1_NC: return ":dtprel_g1_nc:"; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h index ec9c9591162..a82ff2e9142 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -27,12 +27,13 @@ public: // symbol. E.g. direct, via the GOT, ... VK_ABS = 0x001, VK_SABS = 0x002, - VK_GOT = 0x003, - VK_DTPREL = 0x004, - VK_GOTTPREL = 0x005, - VK_TPREL = 0x006, - VK_TLSDESC = 0x007, - VK_SECREL = 0x008, + VK_PREL = 0x003, + VK_GOT = 0x004, + VK_DTPREL = 0x005, + VK_GOTTPREL = 0x006, + VK_TPREL = 0x007, + VK_TLSDESC = 0x008, + VK_SECREL = 0x009, VK_SymLocBits = 0x00f, // Variants specifying which part of the final address calculation is @@ -72,6 +73,13 @@ public: VK_ABS_G0_S = VK_SABS | VK_G0, VK_ABS_G0_NC = VK_ABS | VK_G0 | VK_NC, VK_LO12 = VK_ABS | VK_PAGEOFF | VK_NC, + VK_PREL_G3 = VK_PREL | VK_G3, + VK_PREL_G2 = VK_PREL | VK_G2, + VK_PREL_G2_NC = VK_PREL | VK_G2 | VK_NC, + VK_PREL_G1 = VK_PREL | VK_G1, + VK_PREL_G1_NC = VK_PREL | VK_G1 | VK_NC, + VK_PREL_G0 = VK_PREL | VK_G0, + VK_PREL_G0_NC = VK_PREL | VK_G0 | VK_NC, VK_GOT_LO12 = VK_GOT | VK_PAGEOFF | VK_NC, VK_GOT_PAGE = VK_GOT | VK_PAGE, VK_DTPREL_G2 = VK_DTPREL | VK_G2, diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index df12274d947..1d583ec0087 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -241,7 +241,7 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, const Triple &TheTriple) { MCAsmInfo *MAI; if (TheTriple.isOSBinFormatMachO()) - MAI = new AArch64MCAsmInfoDarwin(); + MAI = new AArch64MCAsmInfoDarwin(TheTriple.getArch() == Triple::aarch64_32); else if (TheTriple.isWindowsMSVCEnvironment()) MAI = new AArch64MCAsmInfoMicrosoftCOFF(); else if (TheTriple.isOSBinFormatCOFF()) diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index b3ce5ef22ee..fc04d37eb36 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -54,7 +54,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo( RelocType = unsigned(MachO::ARM64_RELOC_UNSIGNED); Log2Size = ~0U; - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: return false; @@ -406,6 +406,6 @@ void AArch64MachObjectWriter::recordRelocation( std::unique_ptr llvm::createAArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype, bool IsILP32) { - return llvm::make_unique(CPUType, CPUSubtype, + return std::make_unique(CPUType, CPUSubtype, IsILP32); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp index a45880a0742..aa50bd05cb7 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp @@ -120,7 +120,7 @@ bool AArch64WinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const { namespace llvm { std::unique_ptr createAArch64WinCOFFObjectWriter() { - return llvm::make_unique(); + return std::make_unique(); } } // end namespace llvm diff --git a/lib/Target/AArch64/SVEInstrFormats.td b/lib/Target/AArch64/SVEInstrFormats.td index 808e5946708..8ccf6aa675b 100644 --- a/lib/Target/AArch64/SVEInstrFormats.td +++ b/lib/Target/AArch64/SVEInstrFormats.td @@ -279,6 +279,19 @@ let Predicates = [HasSVE] in { defm PTRUES : sve_int_ptrue<0b001, "ptrues">; } +//===----------------------------------------------------------------------===// +// SVE pattern match helpers. +//===----------------------------------------------------------------------===// + +class SVE_1_Op_Pat +: Pat<(vtd (op vt1:$Op1)), + (inst $Op1)>; + +class SVE_3_Op_Pat +: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)), + (inst $Op1, $Op2, $Op3)>; //===----------------------------------------------------------------------===// // SVE Predicate Misc Group @@ -403,12 +416,12 @@ multiclass sve_int_count_r_x64 opc, string asm> { } class sve_int_count_v sz8_64, bits<5> opc, string asm, - ZPRRegOp zprty> -: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, PPRAny:$Pg), - asm, "\t$Zdn, $Pg", + ZPRRegOp zprty, PPRRegOp pprty> +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, pprty:$Pm), + asm, "\t$Zdn, $Pm", "", []>, Sched<[]> { - bits<4> Pg; + bits<4> Pm; bits<5> Zdn; let Inst{31-24} = 0b00100101; let Inst{23-22} = sz8_64; @@ -416,7 +429,7 @@ class sve_int_count_v sz8_64, bits<5> opc, string asm, let Inst{18-16} = opc{4-2}; let Inst{15-11} = 0b10000; let Inst{10-9} = opc{1-0}; - let Inst{8-5} = Pg; + let Inst{8-5} = Pm; let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; @@ -425,9 +438,16 @@ class sve_int_count_v sz8_64, bits<5> opc, string asm, } multiclass sve_int_count_v opc, string asm> { - def _H : sve_int_count_v<0b01, opc, asm, ZPR16>; - def _S : sve_int_count_v<0b10, opc, asm, ZPR32>; - def _D : sve_int_count_v<0b11, opc, asm, ZPR64>; + def _H : sve_int_count_v<0b01, opc, asm, ZPR16, PPR16>; + def _S : sve_int_count_v<0b10, opc, asm, ZPR32, PPR32>; + def _D : sve_int_count_v<0b11, opc, asm, ZPR64, PPR64>; + + def : InstAlias(NAME # "_H") ZPR16:$Zdn, PPRAny:$Pm), 0>; + def : InstAlias(NAME # "_S") ZPR32:$Zdn, PPRAny:$Pm), 0>; + def : InstAlias(NAME # "_D") ZPR64:$Zdn, PPRAny:$Pm), 0>; } class sve_int_pcount_pred sz8_64, bits<4> opc, string asm, @@ -609,11 +629,12 @@ multiclass sve_int_pred_pattern_b_x64 opc, string asm> { //===----------------------------------------------------------------------===// class sve_int_perm_dup_r sz8_64, string asm, ZPRRegOp zprty, - RegisterClass srcRegType> + ValueType vt, RegisterClass srcRegType, + SDPatternOperator op> : I<(outs zprty:$Zd), (ins srcRegType:$Rn), asm, "\t$Zd, $Rn", "", - []>, Sched<[]> { + [(set (vt zprty:$Zd), (op srcRegType:$Rn))]>, Sched<[]> { bits<5> Rn; bits<5> Zd; let Inst{31-24} = 0b00000101; @@ -623,11 +644,11 @@ class sve_int_perm_dup_r sz8_64, string asm, ZPRRegOp zprty, let Inst{4-0} = Zd; } -multiclass sve_int_perm_dup_r { - def _B : sve_int_perm_dup_r<0b00, asm, ZPR8, GPR32sp>; - def _H : sve_int_perm_dup_r<0b01, asm, ZPR16, GPR32sp>; - def _S : sve_int_perm_dup_r<0b10, asm, ZPR32, GPR32sp>; - def _D : sve_int_perm_dup_r<0b11, asm, ZPR64, GPR64sp>; +multiclass sve_int_perm_dup_r { + def _B : sve_int_perm_dup_r<0b00, asm, ZPR8, nxv16i8, GPR32sp, op>; + def _H : sve_int_perm_dup_r<0b01, asm, ZPR16, nxv8i16, GPR32sp, op>; + def _S : sve_int_perm_dup_r<0b10, asm, ZPR32, nxv4i32, GPR32sp, op>; + def _D : sve_int_perm_dup_r<0b11, asm, ZPR64, nxv2i64, GPR64sp, op>; def : InstAlias<"mov $Zd, $Rn", (!cast(NAME # _B) ZPR8:$Zd, GPR32sp:$Rn), 1>; @@ -744,7 +765,7 @@ multiclass sve2_int_perm_tbl { } class sve2_int_perm_tbx sz8_64, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), +: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, zprty:$Zm), asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { @@ -758,6 +779,8 @@ class sve2_int_perm_tbx sz8_64, string asm, ZPRRegOp zprty> let Inst{15-10} = 0b001011; let Inst{9-5} = Zn; let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; } multiclass sve2_int_perm_tbx { @@ -826,10 +849,14 @@ class sve_int_perm_unpk sz16_64, bits<2> opc, string asm, let Inst{4-0} = Zd; } -multiclass sve_int_perm_unpk opc, string asm> { +multiclass sve_int_perm_unpk opc, string asm, SDPatternOperator op> { def _H : sve_int_perm_unpk<0b01, opc, asm, ZPR16, ZPR8>; def _S : sve_int_perm_unpk<0b10, opc, asm, ZPR32, ZPR16>; def _D : sve_int_perm_unpk<0b11, opc, asm, ZPR64, ZPR32>; + + def : SVE_1_Op_Pat(NAME # _H)>; + def : SVE_1_Op_Pat(NAME # _S)>; + def : SVE_1_Op_Pat(NAME # _D)>; } class sve_int_perm_insrs sz8_64, string asm, ZPRRegOp zprty, @@ -1197,10 +1224,12 @@ multiclass sve_fp_ftmad { //===----------------------------------------------------------------------===// class sve_fp_3op_u_zd sz, bits<3> opc, string asm, - ZPRRegOp zprty> + ZPRRegOp zprty, + ValueType vt, ValueType vt2, SDPatternOperator op> : I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), asm, "\t$Zd, $Zn, $Zm", - "", []>, Sched<[]> { + "", + [(set (vt zprty:$Zd), (op (vt zprty:$Zn), (vt2 zprty:$Zm)))]>, Sched<[]> { bits<5> Zd; bits<5> Zm; bits<5> Zn; @@ -1214,10 +1243,10 @@ class sve_fp_3op_u_zd sz, bits<3> opc, string asm, let Inst{4-0} = Zd; } -multiclass sve_fp_3op_u_zd opc, string asm> { - def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>; - def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>; - def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>; +multiclass sve_fp_3op_u_zd opc, string asm, SDPatternOperator op> { + def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16, nxv8f16, nxv8f16, op>; + def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32, nxv4f32, nxv4f32, op>; + def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64, nxv2f64, nxv2f64, op>; } //===----------------------------------------------------------------------===// @@ -1489,7 +1518,7 @@ multiclass sve_fp_fcadd { class sve2_fp_convert_precision opc, string asm, ZPRRegOp zprty1, ZPRRegOp zprty2> -: I<(outs zprty1:$Zd), (ins PPR3bAny:$Pg, zprty2:$Zn), +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn), asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> { @@ -1504,6 +1533,8 @@ class sve2_fp_convert_precision opc, string asm, let Inst{12-10} = Pg; let Inst{9-5} = Zn; let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; } multiclass sve2_fp_convert_down_narrow { @@ -1998,12 +2029,14 @@ class sve_intx_dot { +multiclass sve_intx_dot { def _S : sve_intx_dot<0b0, opc, asm, ZPR32, ZPR8>; def _D : sve_intx_dot<0b1, opc, asm, ZPR64, ZPR16>; + + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -2028,22 +2061,27 @@ class sve_intx_dot_by_indexed_elem { - def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> { +multiclass sve_intx_dot_by_indexed_elem { + def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b> { bits<2> iop; bits<3> Zm; let Inst{20-19} = iop; let Inst{18-16} = Zm; } - def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> { + def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b> { bits<1> iop; bits<4> Zm; let Inst{20} = iop; let Inst{19-16} = Zm; } + + def : Pat<(nxv4i32 (op nxv4i32:$Op1, nxv16i8:$Op2, nxv16i8:$Op3, (i32 VectorIndexS32b:$idx))), + (!cast(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b:$idx)>; + def : Pat<(nxv2i64 (op nxv2i64:$Op1, nxv8i16:$Op2, nxv8i16:$Op3, (i32 VectorIndexD32b:$idx))), + (!cast(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b:$idx)>; } //===----------------------------------------------------------------------===// @@ -2399,21 +2437,40 @@ multiclass sve2_misc_bitwise opc, string asm> { def _D : sve2_misc<0b11, opc, asm, ZPR64, ZPR64>; } -multiclass sve2_bitwise_xor_interleaved { - let DestructiveInstType = Destructive, ElementSize = ElementSizeNone in { - def _B : sve2_misc<0b00, { 0b010, opc }, asm, ZPR8, ZPR8>; - def _H : sve2_misc<0b01, { 0b010, opc }, asm, ZPR16, ZPR16>; - def _S : sve2_misc<0b10, { 0b010, opc }, asm, ZPR32, ZPR32>; - def _D : sve2_misc<0b11, { 0b010, opc }, asm, ZPR64, ZPR64>; - } -} - multiclass sve2_misc_int_addsub_long_interleaved opc, string asm> { def _H : sve2_misc<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>; def _S : sve2_misc<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>; def _D : sve2_misc<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>; } +class sve2_bitwise_xor_interleaved sz, bits<1> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm), + asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + bits<5> Zm; + let Inst{31-24} = 0b01000101; + let Inst{23-22} = sz; + let Inst{21} = 0b0; + let Inst{20-16} = Zm; + let Inst{15-11} = 0b10010; + let Inst{10} = opc; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve2_bitwise_xor_interleaved { + def _B : sve2_bitwise_xor_interleaved<0b00, opc, asm, ZPR8, ZPR8>; + def _H : sve2_bitwise_xor_interleaved<0b01, opc, asm, ZPR16, ZPR16>; + def _S : sve2_bitwise_xor_interleaved<0b10, opc, asm, ZPR32, ZPR32>; + def _D : sve2_bitwise_xor_interleaved<0b11, opc, asm, ZPR64, ZPR64>; +} + class sve2_bitwise_shift_left_long tsz8_64, bits<2> opc, string asm, ZPRRegOp zprty1, ZPRRegOp zprty2, Operand immtype> @@ -2451,9 +2508,9 @@ multiclass sve2_bitwise_shift_left_long opc, string asm> { // SVE2 Accumulate Group //===----------------------------------------------------------------------===// -class sve2_int_bin_cons_shift_imm tsz8_64, bit opc, string asm, - ZPRRegOp zprty, Operand immtype> -: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm), +class sve2_int_bin_shift_imm tsz8_64, bit opc, string asm, + ZPRRegOp zprty, Operand immtype> +: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, immtype:$imm), asm, "\t$Zd, $Zn, $imm", "", []>, Sched<[]> { bits<5> Zd; @@ -2468,38 +2525,40 @@ class sve2_int_bin_cons_shift_imm tsz8_64, bit opc, string asm, let Inst{10} = opc; let Inst{9-5} = Zn; let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; } -multiclass sve2_int_bin_cons_shift_imm_left { - def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; - def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { +multiclass sve2_int_bin_shift_imm_left { + def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; + def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { let Inst{19} = imm{3}; } - def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { + def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { let Inst{20-19} = imm{4-3}; } - def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { + def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } } -multiclass sve2_int_bin_cons_shift_imm_right { - def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; - def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { +multiclass sve2_int_bin_shift_imm_right { + def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; + def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { let Inst{19} = imm{3}; } - def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { + def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { let Inst{20-19} = imm{4-3}; } - def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { + def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } } -class sve2_int_bin_accum_cons_shift_imm tsz8_64, bits<2> opc, string asm, - ZPRRegOp zprty, Operand immtype> +class sve2_int_bin_accum_shift_imm tsz8_64, bits<2> opc, string asm, + ZPRRegOp zprty, Operand immtype> : I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, immtype:$imm), asm, "\t$Zda, $Zn, $imm", "", []>, Sched<[]> { @@ -2521,15 +2580,15 @@ class sve2_int_bin_accum_cons_shift_imm tsz8_64, bits<2> opc, string asm let ElementSize = ElementSizeNone; } -multiclass sve2_int_bin_accum_cons_shift_imm_right opc, string asm> { - def _B : sve2_int_bin_accum_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; - def _H : sve2_int_bin_accum_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { +multiclass sve2_int_bin_accum_shift_imm_right opc, string asm> { + def _B : sve2_int_bin_accum_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; + def _H : sve2_int_bin_accum_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { let Inst{19} = imm{3}; } - def _S : sve2_int_bin_accum_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { + def _S : sve2_int_bin_accum_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { let Inst{20-19} = imm{4-3}; } - def _D : sve2_int_bin_accum_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { + def _D : sve2_int_bin_accum_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } @@ -2607,9 +2666,9 @@ multiclass sve2_int_addsub_long_carry opc, string asm> { // SVE2 Narrowing Group //===----------------------------------------------------------------------===// -class sve2_int_bin_cons_shift_imm_narrow tsz8_64, bits<4> opc, - string asm, ZPRRegOp zprty1, - ZPRRegOp zprty2, Operand immtype> +class sve2_int_bin_shift_imm_narrow_bottom tsz8_64, bits<3> opc, + string asm, ZPRRegOp zprty1, + ZPRRegOp zprty2, Operand immtype> : I<(outs zprty1:$Zd), (ins zprty2:$Zn, immtype:$imm), asm, "\t$Zd, $Zn, $imm", "", []>, Sched<[]> { @@ -2622,26 +2681,63 @@ class sve2_int_bin_cons_shift_imm_narrow tsz8_64, bits<4> opc, let Inst{20-19} = tsz8_64{1-0}; let Inst{18-16} = imm{2-0}; // imm3 let Inst{15-14} = 0b00; - let Inst{13-10} = opc; + let Inst{13-11} = opc; + let Inst{10} = 0b0; let Inst{9-5} = Zn; let Inst{4-0} = Zd; } -multiclass sve2_int_bin_cons_shift_imm_right_narrow opc, string asm> { - def _B : sve2_int_bin_cons_shift_imm_narrow<{0,0,1}, opc, asm, ZPR8, ZPR16, - vecshiftR8>; - def _H : sve2_int_bin_cons_shift_imm_narrow<{0,1,?}, opc, asm, ZPR16, ZPR32, - vecshiftR16> { +multiclass sve2_int_bin_shift_imm_right_narrow_bottom opc, string asm> { + def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16, + vecshiftR8>; + def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32, + vecshiftR16> { let Inst{19} = imm{3}; } - def _S : sve2_int_bin_cons_shift_imm_narrow<{1,?,?}, opc, asm, ZPR32, ZPR64, - vecshiftR32> { + def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64, + vecshiftR32> { let Inst{20-19} = imm{4-3}; } } -class sve2_int_addsub_narrow_high sz, bits<3> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> +class sve2_int_bin_shift_imm_narrow_top tsz8_64, bits<3> opc, + string asm, ZPRRegOp zprty1, + ZPRRegOp zprty2, Operand immtype> +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, immtype:$imm), + asm, "\t$Zd, $Zn, $imm", + "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + bits<5> imm; + let Inst{31-23} = 0b010001010; + let Inst{22} = tsz8_64{2}; + let Inst{21} = 0b1; + let Inst{20-19} = tsz8_64{1-0}; + let Inst{18-16} = imm{2-0}; // imm3 + let Inst{15-14} = 0b00; + let Inst{13-11} = opc; + let Inst{10} = 0b1; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; +} + +multiclass sve2_int_bin_shift_imm_right_narrow_top opc, string asm> { + def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16, + vecshiftR8>; + def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32, + vecshiftR16> { + let Inst{19} = imm{3}; + } + def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64, + vecshiftR32> { + let Inst{20-19} = imm{4-3}; + } +} + +class sve2_int_addsub_narrow_high_bottom sz, bits<2> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> : I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty2:$Zm), asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { bits<5> Zd; @@ -2652,19 +2748,46 @@ class sve2_int_addsub_narrow_high sz, bits<3> opc, string asm, let Inst{21} = 0b1; let Inst{20-16} = Zm; let Inst{15-13} = 0b011; - let Inst{12-10} = opc; // S, R, T + let Inst{12-11} = opc; // S, R + let Inst{10} = 0b0; // Top let Inst{9-5} = Zn; let Inst{4-0} = Zd; } -multiclass sve2_int_addsub_narrow_high opc, string asm> { - def _B : sve2_int_addsub_narrow_high<0b01, opc, asm, ZPR8, ZPR16>; - def _H : sve2_int_addsub_narrow_high<0b10, opc, asm, ZPR16, ZPR32>; - def _S : sve2_int_addsub_narrow_high<0b11, opc, asm, ZPR32, ZPR64>; +multiclass sve2_int_addsub_narrow_high_bottom opc, string asm> { + def _B : sve2_int_addsub_narrow_high_bottom<0b01, opc, asm, ZPR8, ZPR16>; + def _H : sve2_int_addsub_narrow_high_bottom<0b10, opc, asm, ZPR16, ZPR32>; + def _S : sve2_int_addsub_narrow_high_bottom<0b11, opc, asm, ZPR32, ZPR64>; } -class sve2_int_sat_extract_narrow tsz8_64, bits<3> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> +class sve2_int_addsub_narrow_high_top sz, bits<2> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm), + asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + bits<5> Zm; + let Inst{31-24} = 0b01000101; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-13} = 0b011; + let Inst{12-11} = opc; // S, R + let Inst{10} = 0b1; // Top + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; +} + +multiclass sve2_int_addsub_narrow_high_top opc, string asm> { + def _B : sve2_int_addsub_narrow_high_top<0b01, opc, asm, ZPR8, ZPR16>; + def _H : sve2_int_addsub_narrow_high_top<0b10, opc, asm, ZPR16, ZPR32>; + def _S : sve2_int_addsub_narrow_high_top<0b11, opc, asm, ZPR32, ZPR64>; +} + +class sve2_int_sat_extract_narrow_bottom tsz8_64, bits<2> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> : I<(outs zprty1:$Zd), (ins zprty2:$Zn), asm, "\t$Zd, $Zn", "", []>, Sched<[]> { bits<5> Zd; @@ -2674,15 +2797,41 @@ class sve2_int_sat_extract_narrow tsz8_64, bits<3> opc, string asm, let Inst{21} = 0b1; let Inst{20-19} = tsz8_64{1-0}; let Inst{18-13} = 0b000010; - let Inst{12-10} = opc; + let Inst{12-11} = opc; + let Inst{10} = 0b0; let Inst{9-5} = Zn; let Inst{4-0} = Zd; } -multiclass sve2_int_sat_extract_narrow opc, string asm> { - def _B : sve2_int_sat_extract_narrow<0b001, opc, asm, ZPR8, ZPR16>; - def _H : sve2_int_sat_extract_narrow<0b010, opc, asm, ZPR16, ZPR32>; - def _S : sve2_int_sat_extract_narrow<0b100, opc, asm, ZPR32, ZPR64>; +multiclass sve2_int_sat_extract_narrow_bottom opc, string asm> { + def _B : sve2_int_sat_extract_narrow_bottom<0b001, opc, asm, ZPR8, ZPR16>; + def _H : sve2_int_sat_extract_narrow_bottom<0b010, opc, asm, ZPR16, ZPR32>; + def _S : sve2_int_sat_extract_narrow_bottom<0b100, opc, asm, ZPR32, ZPR64>; +} + +class sve2_int_sat_extract_narrow_top tsz8_64, bits<2> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn), + asm, "\t$Zd, $Zn", "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + let Inst{31-23} = 0b010001010; + let Inst{22} = tsz8_64{2}; + let Inst{21} = 0b1; + let Inst{20-19} = tsz8_64{1-0}; + let Inst{18-13} = 0b000010; + let Inst{12-11} = opc; + let Inst{10} = 0b1; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; +} + +multiclass sve2_int_sat_extract_narrow_top opc, string asm> { + def _B : sve2_int_sat_extract_narrow_top<0b001, opc, asm, ZPR8, ZPR16>; + def _H : sve2_int_sat_extract_narrow_top<0b010, opc, asm, ZPR16, ZPR32>; + def _S : sve2_int_sat_extract_narrow_top<0b100, opc, asm, ZPR32, ZPR64>; } //===----------------------------------------------------------------------===// @@ -2713,11 +2862,17 @@ class sve_int_un_pred_arit sz8_64, bits<4> opc, let ElementSize = zprty.ElementSize; } -multiclass sve_int_un_pred_arit_0 opc, string asm> { +multiclass sve_int_un_pred_arit_0 opc, string asm, + SDPatternOperator op> { def _B : sve_int_un_pred_arit<0b00, { opc, 0b0 }, asm, ZPR8>; def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>; def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>; def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _B)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } multiclass sve_int_un_pred_arit_0_h opc, string asm> { @@ -2735,11 +2890,21 @@ multiclass sve_int_un_pred_arit_0_d opc, string asm> { def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; } -multiclass sve_int_un_pred_arit_1 opc, string asm> { +multiclass sve_int_un_pred_arit_1 opc, string asm, + SDPatternOperator op> { def _B : sve_int_un_pred_arit<0b00, { opc, 0b1 }, asm, ZPR8>; def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>; def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>; def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>; + + def : SVE_3_Op_Pat(NAME # _B)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; + + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } multiclass sve_int_un_pred_arit_1_fp opc, string asm> { @@ -3886,9 +4051,9 @@ multiclass sve_mem_cstnt_ss msz, string asm, RegisterOperand listty, (!cast(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; } -class sve2_mem_cstnt_vs_base opc, dag iops, string asm, - RegisterOperand VecList> -: I<(outs VecList:$Zt), iops, +class sve2_mem_sstnt_vs_base opc, string asm, + RegisterOperand listty, ZPRRegOp zprty> +: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), asm, "\t$Zt, $Pg, [$Zn, $Rm]", "", []>, Sched<[]> { @@ -3908,17 +4073,14 @@ class sve2_mem_cstnt_vs_base opc, dag iops, string asm, let mayStore = 1; } -multiclass sve2_mem_cstnt_vs opc, string asm, +multiclass sve2_mem_sstnt_vs opc, string asm, RegisterOperand listty, ZPRRegOp zprty> { - def _REAL : sve2_mem_cstnt_vs_base; + def _REAL : sve2_mem_sstnt_vs_base; def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>; def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>; - def : InstAlias(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>; def : InstAlias(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>; } @@ -4147,6 +4309,14 @@ class sve_int_perm_punpk let Inst{3-0} = Pd; } +multiclass sve_int_perm_punpk { + def NAME : sve_int_perm_punpk; + + def : SVE_1_Op_Pat(NAME)>; + def : SVE_1_Op_Pat(NAME)>; + def : SVE_1_Op_Pat(NAME)>; +} + class sve_int_rdffr_pred : I<(outs PPR8:$Pd), (ins PPRAny:$Pg), asm, "\t$Pd, $Pg/z", @@ -5094,7 +5264,7 @@ multiclass sve_mem_p_fill { (!cast(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>; } -class sve2_mem_cldnt_vs_base opc, dag iops, string asm, +class sve2_mem_gldnt_vs_base opc, dag iops, string asm, RegisterOperand VecList> : I<(outs VecList:$Zt), iops, asm, "\t$Zt, $Pg/z, [$Zn, $Rm]", @@ -5119,17 +5289,15 @@ class sve2_mem_cldnt_vs_base opc, dag iops, string asm, let mayLoad = 1; } -multiclass sve2_mem_cldnt_vs opc, string asm, +multiclass sve2_mem_gldnt_vs opc, string asm, RegisterOperand listty, ZPRRegOp zprty> { - def _REAL : sve2_mem_cldnt_vs_base; def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>; def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>; - def : InstAlias(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>; def : InstAlias(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>; } diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index 7bb075c36e7..c27fc7a112e 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -125,7 +125,7 @@ namespace llvm { uint32_t AArch64SysReg::parseGenericRegister(StringRef Name) { // Try to parse an S____ register name - Regex GenericRegPattern("^S([0-3])_([0-7])_C([0-9]|1[0-5])_C([0-9]|1[0-5])_([0-7])$"); + static const Regex GenericRegPattern("^S([0-3])_([0-7])_C([0-9]|1[0-5])_C([0-9]|1[0-5])_([0-7])$"); std::string UpperName = Name.upper(); SmallVector Ops; diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index e5e2fc2cb0d..7a4fcac09ec 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -313,9 +313,9 @@ struct SysAlias { uint16_t Encoding; FeatureBitset FeaturesRequired; - SysAlias (const char *N, uint16_t E) : Name(N), Encoding(E) {}; - SysAlias (const char *N, uint16_t E, FeatureBitset F) : - Name(N), Encoding(E), FeaturesRequired(F) {}; + constexpr SysAlias(const char *N, uint16_t E) : Name(N), Encoding(E) {} + constexpr SysAlias(const char *N, uint16_t E, FeatureBitset F) + : Name(N), Encoding(E), FeaturesRequired(F) {} bool haveFeatures(FeatureBitset ActiveFeatures) const { return (FeaturesRequired & ActiveFeatures) == FeaturesRequired; @@ -326,9 +326,10 @@ struct SysAlias { struct SysAliasReg : SysAlias { bool NeedsReg; - SysAliasReg(const char *N, uint16_t E, bool R) : SysAlias(N, E), NeedsReg(R) {}; - SysAliasReg(const char *N, uint16_t E, bool R, FeatureBitset F) : SysAlias(N, E, F), - NeedsReg(R) {}; + constexpr SysAliasReg(const char *N, uint16_t E, bool R) + : SysAlias(N, E), NeedsReg(R) {} + constexpr SysAliasReg(const char *N, uint16_t E, bool R, FeatureBitset F) + : SysAlias(N, E, F), NeedsReg(R) {} }; namespace AArch64AT{ @@ -627,6 +628,18 @@ namespace AArch64II { /// MO_S - Indicates that the bits of the symbol operand represented by /// MO_G0 etc are signed. MO_S = 0x100, + + /// MO_PREL - Indicates that the bits of the symbol operand represented by + /// MO_G0 etc are PC relative. + MO_PREL = 0x200, + + /// MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag + /// in bits 56-63. + /// On a FrameIndex operand, indicates that the underlying memory is tagged + /// with an unknown tag value (MTE); this needs to be lowered either to an + /// SP-relative load or store instruction (which do not check tags), or to + /// an LDG instruction to obtain the tag value. + MO_TAGGED = 0x400, }; } // end namespace AArch64II diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h index 19a8bd90162..b64422ae542 100644 --- a/lib/Target/AMDGPU/AMDGPU.h +++ b/lib/Target/AMDGPU/AMDGPU.h @@ -188,6 +188,10 @@ ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); ModulePass *createR600OpenCLImageTypeLoweringPass(); FunctionPass *createAMDGPUAnnotateUniformValues(); +ModulePass *createAMDGPUPrintfRuntimeBinding(); +void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); +extern char &AMDGPUPrintfRuntimeBindingID; + ModulePass* createAMDGPUUnifyMetadataPass(); void initializeAMDGPUUnifyMetadataPass(PassRegistry&); extern char &AMDGPUUnifyMetadataID; diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index baeba534012..42b477e07b3 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -10,6 +10,15 @@ include "llvm/TableGen/SearchableTable.td" include "llvm/Target/Target.td" include "AMDGPUFeatures.td" +def p0 : PtrValueType; +def p1 : PtrValueType; +def p2 : PtrValueType; +def p3 : PtrValueType; +def p4 : PtrValueType; +def p5 : PtrValueType; +def p6 : PtrValueType; + + class BoolToList { list ret = !if(Value, [1], []); } @@ -145,6 +154,12 @@ def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug", "Some GFX10 bug with misaligned multi-dword LDS access in WGP mode" >; +def FeatureMFMAInlineLiteralBug : SubtargetFeature<"mfma-inline-literal-bug", + "HasMFMAInlineLiteralBug", + "true", + "MFMA cannot use inline literal as SrcC" +>; + def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard", "HasVcmpxPermlaneHazard", "true", @@ -802,6 +817,7 @@ def FeatureISAVersion9_0_8 : FeatureSet< FeaturePkFmacF16Inst, FeatureAtomicFaddInsts, FeatureSRAMECC, + FeatureMFMAInlineLiteralBug, FeatureCodeObjectV3]>; def FeatureISAVersion9_0_9 : FeatureSet< diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index 419ebb2240a..e72b3f4fde6 100644 --- a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -173,6 +173,9 @@ static StringRef intrinsicToAttrName(Intrinsic::ID ID, case Intrinsic::amdgcn_implicitarg_ptr: return "amdgpu-implicitarg-ptr"; case Intrinsic::amdgcn_queue_ptr: + case Intrinsic::amdgcn_is_shared: + case Intrinsic::amdgcn_is_private: + // TODO: Does not require queue ptr on gfx9+ case Intrinsic::trap: case Intrinsic::debugtrap: IsQueuePtr = true; @@ -194,18 +197,12 @@ static bool handleAttr(Function &Parent, const Function &Callee, static void copyFeaturesToFunction(Function &Parent, const Function &Callee, bool &NeedQueuePtr) { // X ids unnecessarily propagated to kernels. - static const StringRef AttrNames[] = { - { "amdgpu-work-item-id-x" }, - { "amdgpu-work-item-id-y" }, - { "amdgpu-work-item-id-z" }, - { "amdgpu-work-group-id-x" }, - { "amdgpu-work-group-id-y" }, - { "amdgpu-work-group-id-z" }, - { "amdgpu-dispatch-ptr" }, - { "amdgpu-dispatch-id" }, - { "amdgpu-kernarg-segment-ptr" }, - { "amdgpu-implicitarg-ptr" } - }; + static constexpr StringLiteral AttrNames[] = { + "amdgpu-work-item-id-x", "amdgpu-work-item-id-y", + "amdgpu-work-item-id-z", "amdgpu-work-group-id-x", + "amdgpu-work-group-id-y", "amdgpu-work-group-id-z", + "amdgpu-dispatch-ptr", "amdgpu-dispatch-id", + "amdgpu-kernarg-segment-ptr", "amdgpu-implicitarg-ptr"}; if (handleAttr(Parent, Callee, "amdgpu-queue-ptr")) NeedQueuePtr = true; diff --git a/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h index 097730441ed..f0e7ee910f9 100644 --- a/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h +++ b/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h @@ -48,8 +48,8 @@ public: return ArgDescriptor(Reg, Mask, false, true); } - static ArgDescriptor createStack(Register Reg, unsigned Mask = ~0u) { - return ArgDescriptor(Reg, Mask, true, true); + static ArgDescriptor createStack(unsigned Offset, unsigned Mask = ~0u) { + return ArgDescriptor(Offset, Mask, true, true); } static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask) { diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 743ac64b8f1..f2d903c8e7b 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -229,7 +229,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() { // alignment. Streamer.EmitValueToAlignment(64, 0, 1, 0); if (ReadOnlySection.getAlignment() < 64) - ReadOnlySection.setAlignment(64); + ReadOnlySection.setAlignment(Align(64)); const MCSubtargetInfo &STI = MF->getSubtarget(); @@ -273,7 +273,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { AsmPrinter::EmitFunctionEntryLabel(); } -void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { +void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) { if (DumpCodeInstEmitter && !isBlockOnlyReachableByFallthrough(&MBB)) { // Write a line for the basic block label if it is not only fallthrough. DisasmLines.push_back( @@ -342,6 +342,8 @@ bool AMDGPUAsmPrinter::doFinalization(Module &M) { // Print comments that apply to both callable functions and entry points. void AMDGPUAsmPrinter::emitCommonFunctionComments( uint32_t NumVGPR, + Optional NumAGPR, + uint32_t TotalNumVGPR, uint32_t NumSGPR, uint64_t ScratchSize, uint64_t CodeSize, @@ -349,6 +351,11 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments( OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false); OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false); OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false); + if (NumAGPR) { + OutStreamer->emitRawComment(" NumAgprs: " + Twine(*NumAGPR), false); + OutStreamer->emitRawComment(" TotalNumVgprs: " + Twine(TotalNumVGPR), + false); + } OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false); OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()), false); @@ -417,7 +424,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // The starting address of all shader programs must be 256 bytes aligned. // Regular functions just need the basic required instruction alignment. - MF.setAlignment(MFI->isEntryFunction() ? 8 : 2); + MF.setAlignment(MFI->isEntryFunction() ? Align(256) : Align(4)); SetupMachineFunction(MF); @@ -474,6 +481,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()]; emitCommonFunctionComments( Info.NumVGPR, + STM.hasMAIInsts() ? Info.NumAGPR : Optional(), + Info.getTotalNumVGPRs(STM), Info.getTotalNumSGPRs(MF.getSubtarget()), Info.PrivateSegmentSize, getFunctionCodeSize(MF), MFI); @@ -481,7 +490,11 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { } OutStreamer->emitRawComment(" Kernel info:", false); - emitCommonFunctionComments(CurrentProgramInfo.NumVGPR, + emitCommonFunctionComments(CurrentProgramInfo.NumArchVGPR, + STM.hasMAIInsts() + ? CurrentProgramInfo.NumAccVGPR + : Optional(), + CurrentProgramInfo.NumVGPR, CurrentProgramInfo.NumSGPR, CurrentProgramInfo.ScratchSize, getFunctionCodeSize(MF), MFI); @@ -506,6 +519,10 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { " NumVGPRsForWavesPerEU: " + Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false); + OutStreamer->emitRawComment( + " Occupancy: " + + Twine(CurrentProgramInfo.Occupancy), false); + OutStreamer->emitRawComment( " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false); @@ -588,6 +605,11 @@ int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs( UsesVCC, UsesFlatScratch); } +int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumVGPRs( + const GCNSubtarget &ST) const { + return std::max(NumVGPR, NumAGPR); +} + AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( const MachineFunction &MF) const { SIFunctionResourceInfo Info; @@ -634,11 +656,18 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( HighestVGPRReg = Reg; break; } - MCPhysReg AReg = AMDGPU::AGPR0 + TRI.getHWRegIndex(Reg); - if (MRI.isPhysRegUsed(AReg)) { - HighestVGPRReg = AReg; - break; + } + + if (ST.hasMAIInsts()) { + MCPhysReg HighestAGPRReg = AMDGPU::NoRegister; + for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) { + if (MRI.isPhysRegUsed(Reg)) { + HighestAGPRReg = Reg; + break; + } } + Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister ? 0 : + TRI.getHWRegIndex(HighestAGPRReg) + 1; } MCPhysReg HighestSGPRReg = AMDGPU::NoRegister; @@ -660,6 +689,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( } int32_t MaxVGPR = -1; + int32_t MaxAGPR = -1; int32_t MaxSGPR = -1; uint64_t CalleeFrameSize = 0; @@ -669,11 +699,12 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( for (const MachineOperand &MO : MI.operands()) { unsigned Width = 0; bool IsSGPR = false; + bool IsAGPR = false; if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); switch (Reg) { case AMDGPU::EXEC: case AMDGPU::EXEC_LO: @@ -744,6 +775,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 1; } else if (AMDGPU::AGPR_32RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 1; } else if (AMDGPU::SReg_64RegClass.contains(Reg)) { assert(!AMDGPU::TTMP_64RegClass.contains(Reg) && @@ -755,6 +787,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 2; } else if (AMDGPU::AReg_64RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 2; } else if (AMDGPU::VReg_96RegClass.contains(Reg)) { IsSGPR = false; @@ -771,6 +804,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 4; } else if (AMDGPU::AReg_128RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 4; } else if (AMDGPU::SReg_256RegClass.contains(Reg)) { assert(!AMDGPU::TTMP_256RegClass.contains(Reg) && @@ -790,6 +824,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 16; } else if (AMDGPU::AReg_512RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 16; } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) { IsSGPR = true; @@ -799,6 +834,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 32; } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 32; } else { llvm_unreachable("Unknown register class"); @@ -807,6 +843,8 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( int MaxUsed = HWReg + Width - 1; if (IsSGPR) { MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR; + } else if (IsAGPR) { + MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR; } else { MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR; } @@ -828,6 +866,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( 47 - IsaInfo::getNumExtraSGPRs(&ST, true, ST.hasFlatAddressSpace()); MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess); MaxVGPR = std::max(MaxVGPR, 23); + MaxAGPR = std::max(MaxAGPR, 23); CalleeFrameSize = std::max(CalleeFrameSize, UINT64_C(16384)); Info.UsesVCC = true; @@ -852,6 +891,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR); MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR); + MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR); CalleeFrameSize = std::max(I->second.PrivateSegmentSize, CalleeFrameSize); Info.UsesVCC |= I->second.UsesVCC; @@ -868,6 +908,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Info.NumExplicitSGPR = MaxSGPR + 1; Info.NumVGPR = MaxVGPR + 1; + Info.NumAGPR = MaxAGPR + 1; Info.PrivateSegmentSize += CalleeFrameSize; return Info; @@ -876,8 +917,11 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, const MachineFunction &MF) { SIFunctionResourceInfo Info = analyzeResourceUsage(MF); + const GCNSubtarget &STM = MF.getSubtarget(); - ProgInfo.NumVGPR = Info.NumVGPR; + ProgInfo.NumArchVGPR = Info.NumVGPR; + ProgInfo.NumAccVGPR = Info.NumAGPR; + ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM); ProgInfo.NumSGPR = Info.NumExplicitSGPR; ProgInfo.ScratchSize = Info.PrivateSegmentSize; ProgInfo.VCCUsed = Info.UsesVCC; @@ -890,7 +934,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, MF.getFunction().getContext().diagnose(DiagStackSize); } - const GCNSubtarget &STM = MF.getSubtarget(); const SIMachineFunctionInfo *MFI = MF.getInfo(); // TODO(scott.linder): The calculations related to SGPR/VGPR blocks are @@ -1057,6 +1100,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP. S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) | S_00B84C_EXCP_EN(0); + + ProgInfo.Occupancy = STM.computeOccupancy(MF, ProgInfo.LDSSize, + ProgInfo.NumSGPRsForWavesPerEU, + ProgInfo.NumVGPRsForWavesPerEU); } static unsigned getRsrcReg(CallingConv::ID CallConv) { @@ -1214,17 +1261,16 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, if (STM.isXNACKEnabled()) Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED; - unsigned MaxKernArgAlign; + Align MaxKernArgAlign; Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign); Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR; Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR; Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize; Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize; - // These alignment values are specified in powers of two, so alignment = - // 2^n. The minimum alignment is 2^4 = 16. - Out.kernarg_segment_alignment = std::max(4, - countTrailingZeros(MaxKernArgAlign)); + // kernarg_segment_alignment is specified as log of the alignment. + // The minimum alignment is 16. + Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign)); } bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index cf77034329e..c50c19a4609 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -43,6 +43,7 @@ private: // Track the number of explicitly used VGPRs. Special registers reserved at // the end are tracked separately. int32_t NumVGPR = 0; + int32_t NumAGPR = 0; int32_t NumExplicitSGPR = 0; uint64_t PrivateSegmentSize = 0; bool UsesVCC = false; @@ -51,6 +52,7 @@ private: bool HasRecursion = false; int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const; + int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const; }; SIProgramInfo CurrentProgramInfo; @@ -77,6 +79,8 @@ private: void EmitPALMetadata(const MachineFunction &MF, const SIProgramInfo &KernelInfo); void emitCommonFunctionComments(uint32_t NumVGPR, + Optional NumAGPR, + uint32_t TotalNumVGPR, uint32_t NumSGPR, uint64_t ScratchSize, uint64_t CodeSize, @@ -125,7 +129,7 @@ public: void EmitFunctionEntryLabel() override; - void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override; + void EmitBasicBlockStart(const MachineBasicBlock &MBB) override; void EmitGlobalVariable(const GlobalVariable *GV) override; @@ -140,8 +144,8 @@ public: const char *ExtraCode, raw_ostream &O) override; protected: - mutable std::vector DisasmLines, HexLines; - mutable size_t DisasmLineMaxLen; + std::vector DisasmLines, HexLines; + size_t DisasmLineMaxLen; }; } // end namespace llvm diff --git a/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp index 8a92e7d923f..ba8343142c6 100644 --- a/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp +++ b/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp @@ -15,6 +15,7 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" +#include "SIDefines.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/IRBuilder.h" @@ -24,20 +25,10 @@ #define DEBUG_TYPE "amdgpu-atomic-optimizer" using namespace llvm; +using namespace llvm::AMDGPU; namespace { -enum DPP_CTRL { - DPP_ROW_SR1 = 0x111, - DPP_ROW_SR2 = 0x112, - DPP_ROW_SR3 = 0x113, - DPP_ROW_SR4 = 0x114, - DPP_ROW_SR8 = 0x118, - DPP_WF_SR1 = 0x138, - DPP_ROW_BCAST15 = 0x142, - DPP_ROW_BCAST31 = 0x143 -}; - struct ReplacementInfo { Instruction *I; AtomicRMWInst::BinOp Op; @@ -52,9 +43,12 @@ private: const LegacyDivergenceAnalysis *DA; const DataLayout *DL; DominatorTree *DT; - bool HasDPP; + const GCNSubtarget *ST; bool IsPixelShader; + Value *buildScan(IRBuilder<> &B, AtomicRMWInst::BinOp Op, Value *V, + Value *const Identity) const; + Value *buildShiftRight(IRBuilder<> &B, Value *V, Value *const Identity) const; void optimizeAtomic(Instruction &I, AtomicRMWInst::BinOp Op, unsigned ValIdx, bool ValDivergent) const; @@ -93,8 +87,7 @@ bool AMDGPUAtomicOptimizer::runOnFunction(Function &F) { DT = DTW ? &DTW->getDomTree() : nullptr; const TargetPassConfig &TPC = getAnalysis(); const TargetMachine &TM = TPC.getTM(); - const GCNSubtarget &ST = TM.getSubtarget(F); - HasDPP = ST.hasDPP(); + ST = &TM.getSubtarget(F); IsPixelShader = F.getCallingConv() == CallingConv::AMDGPU_PS; visit(F); @@ -142,17 +135,18 @@ void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) { // If the pointer operand is divergent, then each lane is doing an atomic // operation on a different address, and we cannot optimize that. - if (DA->isDivergent(I.getOperand(PtrIdx))) { + if (DA->isDivergentUse(&I.getOperandUse(PtrIdx))) { return; } - const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx)); + const bool ValDivergent = DA->isDivergentUse(&I.getOperandUse(ValIdx)); // If the value operand is divergent, each lane is contributing a different // value to the atomic calculation. We can only optimize divergent values if // we have DPP available on our subtarget, and the atomic operation is 32 // bits. - if (ValDivergent && (!HasDPP || (DL->getTypeSizeInBits(I.getType()) != 32))) { + if (ValDivergent && + (!ST->hasDPP() || DL->getTypeSizeInBits(I.getType()) != 32)) { return; } @@ -219,20 +213,21 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) { const unsigned ValIdx = 0; - const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx)); + const bool ValDivergent = DA->isDivergentUse(&I.getOperandUse(ValIdx)); // If the value operand is divergent, each lane is contributing a different // value to the atomic calculation. We can only optimize divergent values if // we have DPP available on our subtarget, and the atomic operation is 32 // bits. - if (ValDivergent && (!HasDPP || (DL->getTypeSizeInBits(I.getType()) != 32))) { + if (ValDivergent && + (!ST->hasDPP() || DL->getTypeSizeInBits(I.getType()) != 32)) { return; } // If any of the other arguments to the intrinsic are divergent, we can't // optimize the operation. for (unsigned Idx = 1; Idx < I.getNumOperands(); Idx++) { - if (DA->isDivergent(I.getOperand(Idx))) { + if (DA->isDivergentUse(&I.getOperandUse(Idx))) { return; } } @@ -282,6 +277,111 @@ static Value *buildNonAtomicBinOp(IRBuilder<> &B, AtomicRMWInst::BinOp Op, return B.CreateSelect(Cond, LHS, RHS); } +// Use the builder to create an inclusive scan of V across the wavefront, with +// all lanes active. +Value *AMDGPUAtomicOptimizer::buildScan(IRBuilder<> &B, AtomicRMWInst::BinOp Op, + Value *V, Value *const Identity) const { + Type *const Ty = V->getType(); + Module *M = B.GetInsertBlock()->getModule(); + Function *UpdateDPP = + Intrinsic::getDeclaration(M, Intrinsic::amdgcn_update_dpp, Ty); + Function *PermLaneX16 = + Intrinsic::getDeclaration(M, Intrinsic::amdgcn_permlanex16, {}); + Function *ReadLane = + Intrinsic::getDeclaration(M, Intrinsic::amdgcn_readlane, {}); + + for (unsigned Idx = 0; Idx < 4; Idx++) { + V = buildNonAtomicBinOp( + B, Op, V, + B.CreateCall(UpdateDPP, + {Identity, V, B.getInt32(DPP::ROW_SHR0 | 1 << Idx), + B.getInt32(0xf), B.getInt32(0xf), B.getFalse()})); + } + if (ST->hasDPPBroadcasts()) { + // GFX9 has DPP row broadcast operations. + V = buildNonAtomicBinOp( + B, Op, V, + B.CreateCall(UpdateDPP, + {Identity, V, B.getInt32(DPP::BCAST15), B.getInt32(0xa), + B.getInt32(0xf), B.getFalse()})); + V = buildNonAtomicBinOp( + B, Op, V, + B.CreateCall(UpdateDPP, + {Identity, V, B.getInt32(DPP::BCAST31), B.getInt32(0xc), + B.getInt32(0xf), B.getFalse()})); + } else { + // On GFX10 all DPP operations are confined to a single row. To get cross- + // row operations we have to use permlane or readlane. + + // Combine lane 15 into lanes 16..31 (and, for wave 64, lane 47 into lanes + // 48..63). + Value *const PermX = + B.CreateCall(PermLaneX16, {V, V, B.getInt32(-1), B.getInt32(-1), + B.getFalse(), B.getFalse()}); + V = buildNonAtomicBinOp( + B, Op, V, + B.CreateCall(UpdateDPP, + {Identity, PermX, B.getInt32(DPP::QUAD_PERM_ID), + B.getInt32(0xa), B.getInt32(0xf), B.getFalse()})); + if (!ST->isWave32()) { + // Combine lane 31 into lanes 32..63. + Value *const Lane31 = B.CreateCall(ReadLane, {V, B.getInt32(31)}); + V = buildNonAtomicBinOp( + B, Op, V, + B.CreateCall(UpdateDPP, + {Identity, Lane31, B.getInt32(DPP::QUAD_PERM_ID), + B.getInt32(0xc), B.getInt32(0xf), B.getFalse()})); + } + } + return V; +} + +// Use the builder to create a shift right of V across the wavefront, with all +// lanes active, to turn an inclusive scan into an exclusive scan. +Value *AMDGPUAtomicOptimizer::buildShiftRight(IRBuilder<> &B, Value *V, + Value *const Identity) const { + Type *const Ty = V->getType(); + Module *M = B.GetInsertBlock()->getModule(); + Function *UpdateDPP = + Intrinsic::getDeclaration(M, Intrinsic::amdgcn_update_dpp, Ty); + Function *ReadLane = + Intrinsic::getDeclaration(M, Intrinsic::amdgcn_readlane, {}); + Function *WriteLane = + Intrinsic::getDeclaration(M, Intrinsic::amdgcn_writelane, {}); + + if (ST->hasDPPWavefrontShifts()) { + // GFX9 has DPP wavefront shift operations. + V = B.CreateCall(UpdateDPP, + {Identity, V, B.getInt32(DPP::WAVE_SHR1), B.getInt32(0xf), + B.getInt32(0xf), B.getFalse()}); + } else { + // On GFX10 all DPP operations are confined to a single row. To get cross- + // row operations we have to use permlane or readlane. + Value *Old = V; + V = B.CreateCall(UpdateDPP, + {Identity, V, B.getInt32(DPP::ROW_SHR0 + 1), + B.getInt32(0xf), B.getInt32(0xf), B.getFalse()}); + + // Copy the old lane 15 to the new lane 16. + V = B.CreateCall(WriteLane, {B.CreateCall(ReadLane, {Old, B.getInt32(15)}), + B.getInt32(16), V}); + + if (!ST->isWave32()) { + // Copy the old lane 31 to the new lane 32. + V = B.CreateCall( + WriteLane, + {B.CreateCall(ReadLane, {Old, B.getInt32(31)}), B.getInt32(32), V}); + + // Copy the old lane 47 to the new lane 48. + V = B.CreateCall( + WriteLane, + {B.CreateCall(ReadLane, {Old, B.getInt32(47)}), B.getInt32(48), V}); + } + } + + return V; +} + static APInt getIdentityValueForAtomicOp(AtomicRMWInst::BinOp Op, unsigned BitWidth) { switch (Op) { @@ -345,23 +445,29 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, // We need to know how many lanes are active within the wavefront, and we do // this by doing a ballot of active lanes. + Type *const WaveTy = B.getIntNTy(ST->getWavefrontSize()); CallInst *const Ballot = B.CreateIntrinsic( - Intrinsic::amdgcn_icmp, {B.getInt64Ty(), B.getInt32Ty()}, + Intrinsic::amdgcn_icmp, {WaveTy, B.getInt32Ty()}, {B.getInt32(1), B.getInt32(0), B.getInt32(CmpInst::ICMP_NE)}); // We need to know how many lanes are active within the wavefront that are // below us. If we counted each lane linearly starting from 0, a lane is // below us only if its associated index was less than ours. We do this by // using the mbcnt intrinsic. - Value *const BitCast = B.CreateBitCast(Ballot, VecTy); - Value *const ExtractLo = B.CreateExtractElement(BitCast, B.getInt32(0)); - Value *const ExtractHi = B.CreateExtractElement(BitCast, B.getInt32(1)); - CallInst *const PartialMbcnt = B.CreateIntrinsic( - Intrinsic::amdgcn_mbcnt_lo, {}, {ExtractLo, B.getInt32(0)}); - Value *const Mbcnt = - B.CreateIntCast(B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {}, - {ExtractHi, PartialMbcnt}), - Ty, false); + Value *Mbcnt; + if (ST->isWave32()) { + Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {}, + {Ballot, B.getInt32(0)}); + } else { + Value *const BitCast = B.CreateBitCast(Ballot, VecTy); + Value *const ExtractLo = B.CreateExtractElement(BitCast, B.getInt32(0)); + Value *const ExtractHi = B.CreateExtractElement(BitCast, B.getInt32(1)); + Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {}, + {ExtractLo, B.getInt32(0)}); + Mbcnt = + B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {}, {ExtractHi, Mbcnt}); + } + Mbcnt = B.CreateIntCast(Mbcnt, Ty, false); Value *const Identity = B.getInt(getIdentityValueForAtomicOp(Op, TyBitWidth)); @@ -373,47 +479,25 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, if (ValDivergent) { // First we need to set all inactive invocations to the identity value, so // that they can correctly contribute to the final result. - CallInst *const SetInactive = - B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity}); + NewV = B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity}); - CallInst *const FirstDPP = - B.CreateIntrinsic(Intrinsic::amdgcn_update_dpp, Ty, - {Identity, SetInactive, B.getInt32(DPP_WF_SR1), - B.getInt32(0xf), B.getInt32(0xf), B.getFalse()}); - ExclScan = FirstDPP; - - const unsigned Iters = 7; - const unsigned DPPCtrl[Iters] = { - DPP_ROW_SR1, DPP_ROW_SR2, DPP_ROW_SR3, DPP_ROW_SR4, - DPP_ROW_SR8, DPP_ROW_BCAST15, DPP_ROW_BCAST31}; - const unsigned RowMask[Iters] = {0xf, 0xf, 0xf, 0xf, 0xf, 0xa, 0xc}; - const unsigned BankMask[Iters] = {0xf, 0xf, 0xf, 0xe, 0xc, 0xf, 0xf}; - - // This loop performs an exclusive scan across the wavefront, with all lanes - // active (by using the WWM intrinsic). - for (unsigned Idx = 0; Idx < Iters; Idx++) { - Value *const UpdateValue = Idx < 3 ? FirstDPP : ExclScan; - CallInst *const DPP = B.CreateIntrinsic( - Intrinsic::amdgcn_update_dpp, Ty, - {Identity, UpdateValue, B.getInt32(DPPCtrl[Idx]), - B.getInt32(RowMask[Idx]), B.getInt32(BankMask[Idx]), B.getFalse()}); - - ExclScan = buildNonAtomicBinOp(B, Op, ExclScan, DPP); - } - - NewV = buildNonAtomicBinOp(B, Op, SetInactive, ExclScan); + const AtomicRMWInst::BinOp ScanOp = + Op == AtomicRMWInst::Sub ? AtomicRMWInst::Add : Op; + NewV = buildScan(B, ScanOp, NewV, Identity); + ExclScan = buildShiftRight(B, NewV, Identity); // Read the value from the last lane, which has accumlated the values of // each active lane in the wavefront. This will be our new value which we // will provide to the atomic operation. + Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1); if (TyBitWidth == 64) { Value *const ExtractLo = B.CreateTrunc(NewV, B.getInt32Ty()); Value *const ExtractHi = - B.CreateTrunc(B.CreateLShr(NewV, B.getInt64(32)), B.getInt32Ty()); + B.CreateTrunc(B.CreateLShr(NewV, 32), B.getInt32Ty()); CallInst *const ReadLaneLo = B.CreateIntrinsic( - Intrinsic::amdgcn_readlane, {}, {ExtractLo, B.getInt32(63)}); + Intrinsic::amdgcn_readlane, {}, {ExtractLo, LastLaneIdx}); CallInst *const ReadLaneHi = B.CreateIntrinsic( - Intrinsic::amdgcn_readlane, {}, {ExtractHi, B.getInt32(63)}); + Intrinsic::amdgcn_readlane, {}, {ExtractHi, LastLaneIdx}); Value *const PartialInsert = B.CreateInsertElement( UndefValue::get(VecTy), ReadLaneLo, B.getInt32(0)); Value *const Insert = @@ -421,7 +505,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, NewV = B.CreateBitCast(Insert, Ty); } else if (TyBitWidth == 32) { NewV = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {}, - {NewV, B.getInt32(63)}); + {NewV, LastLaneIdx}); } else { llvm_unreachable("Unhandled atomic bit width"); } @@ -493,77 +577,80 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, // original instruction. B.SetInsertPoint(&I); - // Create a PHI node to get our new atomic result into the exit block. - PHINode *const PHI = B.CreatePHI(Ty, 2); - PHI->addIncoming(UndefValue::get(Ty), EntryBB); - PHI->addIncoming(NewI, SingleLaneTerminator->getParent()); - - // We need to broadcast the value who was the lowest active lane (the first - // lane) to all other lanes in the wavefront. We use an intrinsic for this, - // but have to handle 64-bit broadcasts with two calls to this intrinsic. - Value *BroadcastI = nullptr; - - if (TyBitWidth == 64) { - Value *const ExtractLo = B.CreateTrunc(PHI, B.getInt32Ty()); - Value *const ExtractHi = - B.CreateTrunc(B.CreateLShr(PHI, B.getInt64(32)), B.getInt32Ty()); - CallInst *const ReadFirstLaneLo = - B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo); - CallInst *const ReadFirstLaneHi = - B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi); - Value *const PartialInsert = B.CreateInsertElement( - UndefValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0)); - Value *const Insert = - B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1)); - BroadcastI = B.CreateBitCast(Insert, Ty); - } else if (TyBitWidth == 32) { - - BroadcastI = B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI); - } else { - llvm_unreachable("Unhandled atomic bit width"); - } - - // Now that we have the result of our single atomic operation, we need to - // get our individual lane's slice into the result. We use the lane offset we - // previously calculated combined with the atomic result value we got from the - // first lane, to get our lane's index into the atomic result. - Value *LaneOffset = nullptr; - if (ValDivergent) { - LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, ExclScan); - } else { - switch (Op) { - default: - llvm_unreachable("Unhandled atomic op"); - case AtomicRMWInst::Add: - case AtomicRMWInst::Sub: - LaneOffset = B.CreateMul(V, Mbcnt); - break; - case AtomicRMWInst::And: - case AtomicRMWInst::Or: - case AtomicRMWInst::Max: - case AtomicRMWInst::Min: - case AtomicRMWInst::UMax: - case AtomicRMWInst::UMin: - LaneOffset = B.CreateSelect(Cond, Identity, V); - break; - case AtomicRMWInst::Xor: - LaneOffset = B.CreateMul(V, B.CreateAnd(Mbcnt, 1)); - break; - } - } - Value *const Result = buildNonAtomicBinOp(B, Op, BroadcastI, LaneOffset); - - if (IsPixelShader) { - // Need a final PHI to reconverge to above the helper lane branch mask. - B.SetInsertPoint(PixelExitBB->getFirstNonPHI()); - + const bool NeedResult = !I.use_empty(); + if (NeedResult) { + // Create a PHI node to get our new atomic result into the exit block. PHINode *const PHI = B.CreatePHI(Ty, 2); - PHI->addIncoming(UndefValue::get(Ty), PixelEntryBB); - PHI->addIncoming(Result, I.getParent()); - I.replaceAllUsesWith(PHI); - } else { - // Replace the original atomic instruction with the new one. - I.replaceAllUsesWith(Result); + PHI->addIncoming(UndefValue::get(Ty), EntryBB); + PHI->addIncoming(NewI, SingleLaneTerminator->getParent()); + + // We need to broadcast the value who was the lowest active lane (the first + // lane) to all other lanes in the wavefront. We use an intrinsic for this, + // but have to handle 64-bit broadcasts with two calls to this intrinsic. + Value *BroadcastI = nullptr; + + if (TyBitWidth == 64) { + Value *const ExtractLo = B.CreateTrunc(PHI, B.getInt32Ty()); + Value *const ExtractHi = + B.CreateTrunc(B.CreateLShr(PHI, 32), B.getInt32Ty()); + CallInst *const ReadFirstLaneLo = + B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo); + CallInst *const ReadFirstLaneHi = + B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi); + Value *const PartialInsert = B.CreateInsertElement( + UndefValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0)); + Value *const Insert = + B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1)); + BroadcastI = B.CreateBitCast(Insert, Ty); + } else if (TyBitWidth == 32) { + + BroadcastI = B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI); + } else { + llvm_unreachable("Unhandled atomic bit width"); + } + + // Now that we have the result of our single atomic operation, we need to + // get our individual lane's slice into the result. We use the lane offset + // we previously calculated combined with the atomic result value we got + // from the first lane, to get our lane's index into the atomic result. + Value *LaneOffset = nullptr; + if (ValDivergent) { + LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, ExclScan); + } else { + switch (Op) { + default: + llvm_unreachable("Unhandled atomic op"); + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: + LaneOffset = B.CreateMul(V, Mbcnt); + break; + case AtomicRMWInst::And: + case AtomicRMWInst::Or: + case AtomicRMWInst::Max: + case AtomicRMWInst::Min: + case AtomicRMWInst::UMax: + case AtomicRMWInst::UMin: + LaneOffset = B.CreateSelect(Cond, Identity, V); + break; + case AtomicRMWInst::Xor: + LaneOffset = B.CreateMul(V, B.CreateAnd(Mbcnt, 1)); + break; + } + } + Value *const Result = buildNonAtomicBinOp(B, Op, BroadcastI, LaneOffset); + + if (IsPixelShader) { + // Need a final PHI to reconverge to above the helper lane branch mask. + B.SetInsertPoint(PixelExitBB->getFirstNonPHI()); + + PHINode *const PHI = B.CreatePHI(Ty, 2); + PHI->addIncoming(UndefValue::get(Ty), PixelEntryBB); + PHI->addIncoming(Result, I.getParent()); + I.replaceAllUsesWith(PHI); + } else { + // Replace the original atomic instruction with the new one. + I.replaceAllUsesWith(Result); + } } // And delete the original. diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index b107c357196..58c44acde1a 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -30,13 +30,15 @@ using namespace llvm; namespace { -struct OutgoingArgHandler : public CallLowering::ValueHandler { - OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, - MachineInstrBuilder MIB, CCAssignFn *AssignFn) - : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} +struct OutgoingValueHandler : public CallLowering::ValueHandler { + OutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, + MachineInstrBuilder MIB, CCAssignFn *AssignFn) + : ValueHandler(B, MRI, AssignFn), MIB(MIB) {} MachineInstrBuilder MIB; + bool isIncomingArgumentHandler() const override { return false; } + Register getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { llvm_unreachable("not implemented"); @@ -49,15 +51,96 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler { void assignValueToReg(Register ValVReg, Register PhysReg, CCValAssign &VA) override { - MIB.addUse(PhysReg); - MIRBuilder.buildCopy(PhysReg, ValVReg); + Register ExtReg; + if (VA.getLocVT().getSizeInBits() < 32) { + // 16-bit types are reported as legal for 32-bit registers. We need to + // extend and do a 32-bit copy to avoid the verifier complaining about it. + ExtReg = MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0); + } else + ExtReg = extendRegister(ValVReg, VA); + + MIRBuilder.buildCopy(PhysReg, ExtReg); + MIB.addUse(PhysReg, RegState::Implicit); } bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, const CallLowering::ArgInfo &Info, + ISD::ArgFlagsTy Flags, CCState &State) override { - return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); + return AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); + } +}; + +struct IncomingArgHandler : public CallLowering::ValueHandler { + uint64_t StackUsed = 0; + + IncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, + CCAssignFn *AssignFn) + : ValueHandler(B, MRI, AssignFn) {} + + Register getStackAddress(uint64_t Size, int64_t Offset, + MachinePointerInfo &MPO) override { + auto &MFI = MIRBuilder.getMF().getFrameInfo(); + int FI = MFI.CreateFixedObject(Size, Offset, true); + MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); + Register AddrReg = MRI.createGenericVirtualRegister( + LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32)); + MIRBuilder.buildFrameIndex(AddrReg, FI); + StackUsed = std::max(StackUsed, Size + Offset); + return AddrReg; + } + + void assignValueToReg(Register ValVReg, Register PhysReg, + CCValAssign &VA) override { + markPhysRegUsed(PhysReg); + + if (VA.getLocVT().getSizeInBits() < 32) { + // 16-bit types are reported as legal for 32-bit registers. We need to do + // a 32-bit copy, and truncate to avoid the verifier complaining about it. + auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg); + MIRBuilder.buildTrunc(ValVReg, Copy); + return; + } + + switch (VA.getLocInfo()) { + case CCValAssign::LocInfo::SExt: + case CCValAssign::LocInfo::ZExt: + case CCValAssign::LocInfo::AExt: { + auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg); + MIRBuilder.buildTrunc(ValVReg, Copy); + break; + } + default: + MIRBuilder.buildCopy(ValVReg, PhysReg); + break; + } + } + + void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, + MachinePointerInfo &MPO, CCValAssign &VA) override { + // FIXME: Get alignment + auto MMO = MIRBuilder.getMF().getMachineMemOperand( + MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 1); + MIRBuilder.buildLoad(ValVReg, Addr, *MMO); + } + + /// How the physical register gets marked varies between formal + /// parameters (it's a basic-block live-in), and a call instruction + /// (it's an implicit-def of the BL). + virtual void markPhysRegUsed(unsigned PhysReg) = 0; + + // FIXME: What is the point of this being a callback? + bool isIncomingArgumentHandler() const override { return true; } +}; + +struct FormalArgHandler : public IncomingArgHandler { + FormalArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI, + CCAssignFn *AssignFn) + : IncomingArgHandler(B, MRI, AssignFn) {} + + void markPhysRegUsed(unsigned PhysReg) override { + MIRBuilder.getMBB().addLiveIn(PhysReg); } }; @@ -67,55 +150,198 @@ AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) : CallLowering(&TLI) { } -bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, +void AMDGPUCallLowering::splitToValueTypes( + const ArgInfo &OrigArg, SmallVectorImpl &SplitArgs, + const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv, + SplitArgTy PerformArgSplit) const { + const SITargetLowering &TLI = *getTLI(); + LLVMContext &Ctx = OrigArg.Ty->getContext(); + + if (OrigArg.Ty->isVoidTy()) + return; + + SmallVector SplitVTs; + ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs); + + assert(OrigArg.Regs.size() == SplitVTs.size()); + + int SplitIdx = 0; + for (EVT VT : SplitVTs) { + unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT); + Type *Ty = VT.getTypeForEVT(Ctx); + + + + if (NumParts == 1) { + // No splitting to do, but we want to replace the original type (e.g. [1 x + // double] -> double). + SplitArgs.emplace_back(OrigArg.Regs[SplitIdx], Ty, + OrigArg.Flags, OrigArg.IsFixed); + + ++SplitIdx; + continue; + } + + LLT LLTy = getLLTForType(*Ty, DL); + + SmallVector SplitRegs; + + EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); + Type *PartTy = PartVT.getTypeForEVT(Ctx); + LLT PartLLT = getLLTForType(*PartTy, DL); + + // FIXME: Should we be reporting all of the part registers for a single + // argument, and let handleAssignments take care of the repacking? + for (unsigned i = 0; i < NumParts; ++i) { + Register PartReg = MRI.createGenericVirtualRegister(PartLLT); + SplitRegs.push_back(PartReg); + SplitArgs.emplace_back(ArrayRef(PartReg), PartTy, OrigArg.Flags); + } + + PerformArgSplit(SplitRegs, LLTy, PartLLT, SplitIdx); + + ++SplitIdx; + } +} + +// Get the appropriate type to make \p OrigTy \p Factor times bigger. +static LLT getMultipleType(LLT OrigTy, int Factor) { + if (OrigTy.isVector()) { + return LLT::vector(OrigTy.getNumElements() * Factor, + OrigTy.getElementType()); + } + + return LLT::scalar(OrigTy.getSizeInBits() * Factor); +} + +// TODO: Move to generic code +static void unpackRegsToOrigType(MachineIRBuilder &B, + ArrayRef DstRegs, + Register SrcReg, + LLT SrcTy, + LLT PartTy) { + assert(DstRegs.size() > 1 && "Nothing to unpack"); + + MachineFunction &MF = B.getMF(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + const unsigned SrcSize = SrcTy.getSizeInBits(); + const unsigned PartSize = PartTy.getSizeInBits(); + + if (SrcTy.isVector() && !PartTy.isVector() && + PartSize > SrcTy.getElementType().getSizeInBits()) { + // Vector was scalarized, and the elements extended. + auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(), + SrcReg); + for (int i = 0, e = DstRegs.size(); i != e; ++i) + B.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i)); + return; + } + + if (SrcSize % PartSize == 0) { + B.buildUnmerge(DstRegs, SrcReg); + return; + } + + const int NumRoundedParts = (SrcSize + PartSize - 1) / PartSize; + + LLT BigTy = getMultipleType(PartTy, NumRoundedParts); + auto ImpDef = B.buildUndef(BigTy); + + Register BigReg = MRI.createGenericVirtualRegister(BigTy); + B.buildInsert(BigReg, ImpDef.getReg(0), SrcReg, 0).getReg(0); + + int64_t Offset = 0; + for (unsigned i = 0, e = DstRegs.size(); i != e; ++i, Offset += PartSize) + B.buildExtract(DstRegs[i], BigReg, Offset); +} + +/// Lower the return value for the already existing \p Ret. This assumes that +/// \p B's insertion point is correct. +bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B, + const Value *Val, ArrayRef VRegs, + MachineInstrBuilder &Ret) const { + if (!Val) + return true; + + auto &MF = B.getMF(); + const auto &F = MF.getFunction(); + const DataLayout &DL = MF.getDataLayout(); + + CallingConv::ID CC = F.getCallingConv(); + const SITargetLowering &TLI = *getTLI(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + ArgInfo OrigRetInfo(VRegs, Val->getType()); + setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F); + SmallVector SplitRetInfos; + + splitToValueTypes( + OrigRetInfo, SplitRetInfos, DL, MRI, CC, + [&](ArrayRef Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) { + unpackRegsToOrigType(B, Regs, VRegs[VTSplitIdx], LLTy, PartLLT); + }); + + CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg()); + + OutgoingValueHandler RetHandler(B, MF.getRegInfo(), Ret, AssignFn); + return handleAssignments(B, SplitRetInfos, RetHandler); +} + +bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val, ArrayRef VRegs) const { - MachineFunction &MF = MIRBuilder.getMF(); + MachineFunction &MF = B.getMF(); MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *MFI = MF.getInfo(); MFI->setIfReturnsVoid(!Val); - if (!Val) { - MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0); + assert(!Val == VRegs.empty() && "Return value without a vreg"); + + CallingConv::ID CC = B.getMF().getFunction().getCallingConv(); + const bool IsShader = AMDGPU::isShader(CC); + const bool IsWaveEnd = (IsShader && MFI->returnsVoid()) || + AMDGPU::isKernel(CC); + if (IsWaveEnd) { + B.buildInstr(AMDGPU::S_ENDPGM) + .addImm(0); return true; } - Register VReg = VRegs[0]; + auto const &ST = B.getMF().getSubtarget(); - const Function &F = MF.getFunction(); - auto &DL = F.getParent()->getDataLayout(); - if (!AMDGPU::isShader(F.getCallingConv())) - return false; + unsigned ReturnOpc = + IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return; - - const AMDGPUTargetLowering &TLI = *getTLI(); - SmallVector SplitVTs; - SmallVector Offsets; - ArgInfo OrigArg{VReg, Val->getType()}; - setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); - ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); - - SmallVector SplitArgs; - CCAssignFn *AssignFn = CCAssignFnForReturn(F.getCallingConv(), false); - for (unsigned i = 0, e = Offsets.size(); i != e; ++i) { - Type *SplitTy = SplitVTs[i].getTypeForEVT(F.getContext()); - SplitArgs.push_back({VRegs[i], SplitTy, OrigArg.Flags, OrigArg.IsFixed}); + auto Ret = B.buildInstrNoInsert(ReturnOpc); + Register ReturnAddrVReg; + if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { + ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass); + Ret.addUse(ReturnAddrVReg); } - auto RetInstr = MIRBuilder.buildInstrNoInsert(AMDGPU::SI_RETURN_TO_EPILOG); - OutgoingArgHandler Handler(MIRBuilder, MRI, RetInstr, AssignFn); - if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) - return false; - MIRBuilder.insertInstr(RetInstr); + if (!lowerReturnVal(B, Val, VRegs, Ret)) + return false; + + if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF), + &AMDGPU::SGPR_64RegClass); + B.buildCopy(ReturnAddrVReg, LiveInReturn); + } + + // TODO: Handle CalleeSavedRegsViaCopy. + + B.insertInstr(Ret); return true; } -Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder, +Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &B, Type *ParamTy, uint64_t Offset) const { - MachineFunction &MF = MIRBuilder.getMF(); + MachineFunction &MF = B.getMF(); const SIMachineFunctionInfo *MFI = MF.getInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = MF.getFunction(); @@ -128,79 +354,37 @@ Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder, Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr); Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); - MIRBuilder.buildConstant(OffsetReg, Offset); + B.buildConstant(OffsetReg, Offset); - MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg); + B.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg); return DstReg; } -void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder, +void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, Type *ParamTy, uint64_t Offset, unsigned Align, Register DstReg) const { - MachineFunction &MF = MIRBuilder.getMF(); + MachineFunction &MF = B.getMF(); const Function &F = MF.getFunction(); const DataLayout &DL = F.getParent()->getDataLayout(); PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); unsigned TypeSize = DL.getTypeStoreSize(ParamTy); - Register PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset); + Register PtrReg = lowerParameterPtr(B, ParamTy, Offset); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad | - MachineMemOperand::MONonTemporal | + MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant, TypeSize, Align); - MIRBuilder.buildLoad(DstReg, PtrReg, *MMO); -} - -static Register findFirstFreeSGPR(CCState &CCInfo) { - unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); - for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) { - if (!CCInfo.isAllocated(AMDGPU::SGPR0 + Reg)) { - return AMDGPU::SGPR0 + Reg; - } - } - llvm_unreachable("Cannot allocate sgpr"); -} - -static void allocateSpecialEntryInputVGPRs(CCState &CCInfo, - MachineFunction &MF, - const SIRegisterInfo &TRI, - SIMachineFunctionInfo &Info) { - const LLT S32 = LLT::scalar(32); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - if (Info.hasWorkItemIDX()) { - Register Reg = AMDGPU::VGPR0; - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); - - CCInfo.AllocateReg(Reg); - Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg)); - } - - if (Info.hasWorkItemIDY()) { - Register Reg = AMDGPU::VGPR1; - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); - - CCInfo.AllocateReg(Reg); - Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg)); - } - - if (Info.hasWorkItemIDZ()) { - Register Reg = AMDGPU::VGPR2; - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); - - CCInfo.AllocateReg(Reg); - Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg)); - } + B.buildLoad(DstReg, PtrReg, *MMO); } // Allocate special inputs passed in user SGPRs. static void allocateHSAUserSGPRs(CCState &CCInfo, - MachineIRBuilder &MIRBuilder, + MachineIRBuilder &B, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) { @@ -229,8 +413,8 @@ static void allocateHSAUserSGPRs(CCState &CCInfo, const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); Register VReg = MRI.createGenericVirtualRegister(P4); MRI.addLiveIn(InputPtrReg, VReg); - MIRBuilder.getMBB().addLiveIn(InputPtrReg); - MIRBuilder.buildCopy(VReg, InputPtrReg); + B.getMBB().addLiveIn(InputPtrReg); + B.buildCopy(VReg, InputPtrReg); CCInfo.AllocateReg(InputPtrReg); } @@ -250,74 +434,22 @@ static void allocateHSAUserSGPRs(CCState &CCInfo, // these from the dispatch pointer. } -static void allocateSystemSGPRs(CCState &CCInfo, - MachineFunction &MF, - SIMachineFunctionInfo &Info, - CallingConv::ID CallConv, - bool IsShader) { - const LLT S32 = LLT::scalar(32); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - if (Info.hasWorkGroupIDX()) { - Register Reg = Info.addWorkGroupIDX(); - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); - CCInfo.AllocateReg(Reg); - } - - if (Info.hasWorkGroupIDY()) { - Register Reg = Info.addWorkGroupIDY(); - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); - CCInfo.AllocateReg(Reg); - } - - if (Info.hasWorkGroupIDZ()) { - unsigned Reg = Info.addWorkGroupIDZ(); - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); - CCInfo.AllocateReg(Reg); - } - - if (Info.hasWorkGroupInfo()) { - unsigned Reg = Info.addWorkGroupInfo(); - MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32); - CCInfo.AllocateReg(Reg); - } - - if (Info.hasPrivateSegmentWaveByteOffset()) { - // Scratch wave offset passed in system SGPR. - unsigned PrivateSegmentWaveByteOffsetReg; - - if (IsShader) { - PrivateSegmentWaveByteOffsetReg = - Info.getPrivateSegmentWaveByteOffsetSystemSGPR(); - - // This is true if the scratch wave byte offset doesn't have a fixed - // location. - if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) { - PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo); - Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg); - } - } else - PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset(); - - MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass); - CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg); - } -} - bool AMDGPUCallLowering::lowerFormalArgumentsKernel( - MachineIRBuilder &MIRBuilder, const Function &F, + MachineIRBuilder &B, const Function &F, ArrayRef> VRegs) const { - MachineFunction &MF = MIRBuilder.getMF(); + MachineFunction &MF = B.getMF(); const GCNSubtarget *Subtarget = &MF.getSubtarget(); MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *Info = MF.getInfo(); - const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); + const SITargetLowering &TLI = *getTLI(); + const DataLayout &DL = F.getParent()->getDataLayout(); SmallVector ArgLocs; CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); - allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info); + allocateHSAUserSGPRs(CCInfo, B, MF, *TRI, *Info); unsigned i = 0; const unsigned KernArgBaseAlign = 16; @@ -343,123 +475,242 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel( : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL)); unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset); ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy)); - lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg); + lowerParameter(B, ArgTy, ArgOffset, Align, ArgReg); if (OrigArgRegs.size() > 1) - unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder); + unpackRegs(OrigArgRegs, ArgReg, ArgTy, B); ++i; } - allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info); - allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false); + TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info); + TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false); return true; } +// TODO: Move this to generic code +static void packSplitRegsToOrigType(MachineIRBuilder &B, + ArrayRef OrigRegs, + ArrayRef Regs, + LLT LLTy, + LLT PartLLT) { + if (!LLTy.isVector() && !PartLLT.isVector()) { + B.buildMerge(OrigRegs[0], Regs); + return; + } + + if (LLTy.isVector() && PartLLT.isVector()) { + assert(LLTy.getElementType() == PartLLT.getElementType()); + + int DstElts = LLTy.getNumElements(); + int PartElts = PartLLT.getNumElements(); + if (DstElts % PartElts == 0) + B.buildConcatVectors(OrigRegs[0], Regs); + else { + // Deal with v3s16 split into v2s16 + assert(PartElts == 2 && DstElts % 2 != 0); + int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts); + + LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType()); + auto RoundedConcat = B.buildConcatVectors(RoundedDestTy, Regs); + B.buildExtract(OrigRegs[0], RoundedConcat, 0); + } + + return; + } + + assert(LLTy.isVector() && !PartLLT.isVector()); + + LLT DstEltTy = LLTy.getElementType(); + if (DstEltTy == PartLLT) { + // Vector was trivially scalarized. + B.buildBuildVector(OrigRegs[0], Regs); + } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) { + // Deal with vector with 64-bit elements decomposed to 32-bit + // registers. Need to create intermediate 64-bit elements. + SmallVector EltMerges; + int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits(); + + assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0); + + for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) { + auto Merge = B.buildMerge(DstEltTy, + Regs.take_front(PartsPerElt)); + EltMerges.push_back(Merge.getReg(0)); + Regs = Regs.drop_front(PartsPerElt); + } + + B.buildBuildVector(OrigRegs[0], EltMerges); + } else { + // Vector was split, and elements promoted to a wider type. + LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT); + auto BV = B.buildBuildVector(BVType, Regs); + B.buildTrunc(OrigRegs[0], BV); + } +} + bool AMDGPUCallLowering::lowerFormalArguments( - MachineIRBuilder &MIRBuilder, const Function &F, + MachineIRBuilder &B, const Function &F, ArrayRef> VRegs) const { + CallingConv::ID CC = F.getCallingConv(); + // The infrastructure for normal calling convention lowering is essentially // useless for kernels. We want to avoid any kind of legalization or argument // splitting. - if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) - return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs); + if (CC == CallingConv::AMDGPU_KERNEL) + return lowerFormalArgumentsKernel(B, F, VRegs); - // AMDGPU_GS and AMDGP_HS are not supported yet. - if (F.getCallingConv() == CallingConv::AMDGPU_GS || - F.getCallingConv() == CallingConv::AMDGPU_HS) - return false; + const bool IsShader = AMDGPU::isShader(CC); + const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC); - MachineFunction &MF = MIRBuilder.getMF(); + MachineFunction &MF = B.getMF(); + MachineBasicBlock &MBB = B.getMBB(); MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *Info = MF.getInfo(); - const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const GCNSubtarget &Subtarget = MF.getSubtarget(); + const SIRegisterInfo *TRI = Subtarget.getRegisterInfo(); const DataLayout &DL = F.getParent()->getDataLayout(); - bool IsShader = AMDGPU::isShader(F.getCallingConv()); SmallVector ArgLocs; - CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); + CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext()); + + if (!IsEntryFunc) { + Register ReturnAddrReg = TRI->getReturnAddressReg(MF); + Register LiveInReturn = MF.addLiveIn(ReturnAddrReg, + &AMDGPU::SGPR_64RegClass); + MBB.addLiveIn(ReturnAddrReg); + B.buildCopy(LiveInReturn, ReturnAddrReg); + } if (Info->hasImplicitBufferPtr()) { - unsigned ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); + Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); CCInfo.AllocateReg(ImplicitBufferPtrReg); } - unsigned NumArgs = F.arg_size(); - Function::const_arg_iterator CurOrigArg = F.arg_begin(); - const AMDGPUTargetLowering &TLI = *getTLI(); + + SmallVector SplitArgs; + unsigned Idx = 0; unsigned PSInputNum = 0; - BitVector Skipped(NumArgs); - for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) { - EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType()); - // We can only hanlde simple value types at the moment. - ISD::ArgFlagsTy Flags; - assert(VRegs[i].size() == 1 && "Can't lower into more than one register"); - ArgInfo OrigArg{VRegs[i][0], CurOrigArg->getType()}; - setArgFlags(OrigArg, i + 1, DL, F); - Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType())); + for (auto &Arg : F.args()) { + if (DL.getTypeStoreSize(Arg.getType()) == 0) + continue; - if (F.getCallingConv() == CallingConv::AMDGPU_PS && - !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() && - PSInputNum <= 15) { - if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) { - Skipped.set(i); - ++PSInputNum; - continue; + const bool InReg = Arg.hasAttribute(Attribute::InReg); + + // SGPR arguments to functions not implemented. + if (!IsShader && InReg) + return false; + + if (Arg.hasAttribute(Attribute::SwiftSelf) || + Arg.hasAttribute(Attribute::SwiftError) || + Arg.hasAttribute(Attribute::Nest)) + return false; + + if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) { + const bool ArgUsed = !Arg.use_empty(); + bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum); + + if (!SkipArg) { + Info->markPSInputAllocated(PSInputNum); + if (ArgUsed) + Info->markPSInputEnabled(PSInputNum); } - Info->markPSInputAllocated(PSInputNum); - if (!CurOrigArg->use_empty()) - Info->markPSInputEnabled(PSInputNum); - ++PSInputNum; + + if (SkipArg) { + for (int I = 0, E = VRegs[Idx].size(); I != E; ++I) + B.buildUndef(VRegs[Idx][I]); + + ++Idx; + continue; + } } - CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(), - /*IsVarArg=*/false); + ArgInfo OrigArg(VRegs[Idx], Arg.getType()); + setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F); - if (ValEVT.isVector()) { - EVT ElemVT = ValEVT.getVectorElementType(); - if (!ValEVT.isSimple()) - return false; - MVT ValVT = ElemVT.getSimpleVT(); - bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, - OrigArg.Flags, CCInfo); - if (!Res) - return false; - } else { - MVT ValVT = ValEVT.getSimpleVT(); - if (!ValEVT.isSimple()) - return false; - bool Res = - AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo); + splitToValueTypes( + OrigArg, SplitArgs, DL, MRI, CC, + // FIXME: We should probably be passing multiple registers to + // handleAssignments to do this + [&](ArrayRef Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) { + packSplitRegsToOrigType(B, VRegs[Idx][VTSplitIdx], Regs, + LLTy, PartLLT); + }); - // Fail if we don't know how to handle this type. - if (Res) - return false; + ++Idx; + } + + // At least one interpolation mode must be enabled or else the GPU will + // hang. + // + // Check PSInputAddr instead of PSInputEnable. The idea is that if the user + // set PSInputAddr, the user wants to enable some bits after the compilation + // based on run-time states. Since we can't know what the final PSInputEna + // will look like, so we shouldn't do anything here and the user should take + // responsibility for the correct programming. + // + // Otherwise, the following restrictions apply: + // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled. + // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be + // enabled too. + if (CC == CallingConv::AMDGPU_PS) { + if ((Info->getPSInputAddr() & 0x7F) == 0 || + ((Info->getPSInputAddr() & 0xF) == 0 && + Info->isPSInputAllocated(11))) { + CCInfo.AllocateReg(AMDGPU::VGPR0); + CCInfo.AllocateReg(AMDGPU::VGPR1); + Info->markPSInputAllocated(0); + Info->markPSInputEnabled(0); + } + + if (Subtarget.isAmdPalOS()) { + // For isAmdPalOS, the user does not enable some bits after compilation + // based on run-time states; the register values being generated here are + // the final ones set in hardware. Therefore we need to apply the + // workaround to PSInputAddr and PSInputEnable together. (The case where + // a bit is set in PSInputAddr but not PSInputEnable is where the frontend + // set up an input arg for a particular interpolation mode, but nothing + // uses that input arg. Really we should have an earlier pass that removes + // such an arg.) + unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable(); + if ((PsInputBits & 0x7F) == 0 || + ((PsInputBits & 0xF) == 0 && + (PsInputBits >> 11 & 1))) + Info->markPSInputEnabled( + countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined)); } } - Function::const_arg_iterator Arg = F.arg_begin(); + const SITargetLowering &TLI = *getTLI(); + CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg()); - if (F.getCallingConv() == CallingConv::AMDGPU_VS || - F.getCallingConv() == CallingConv::AMDGPU_PS) { - for (unsigned i = 0, OrigArgIdx = 0; - OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) { - if (Skipped.test(OrigArgIdx)) - continue; - assert(VRegs[OrigArgIdx].size() == 1 && - "Can't lower into more than 1 reg"); - CCValAssign &VA = ArgLocs[i++]; - MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx][0]); - MIRBuilder.getMBB().addLiveIn(VA.getLocReg()); - MIRBuilder.buildCopy(VRegs[OrigArgIdx][0], VA.getLocReg()); - } + if (!MBB.empty()) + B.setInstr(*MBB.begin()); - allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader); - return true; + FormalArgHandler Handler(B, MRI, AssignFn); + if (!handleAssignments(CCInfo, ArgLocs, B, SplitArgs, Handler)) + return false; + + if (!IsEntryFunc) { + // Special inputs come after user arguments. + TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info); } - return false; + // Start adding system SGPRs. + if (IsEntryFunc) { + TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader); + } else { + CCInfo.AllocateReg(Info->getScratchRSrcReg()); + CCInfo.AllocateReg(Info->getScratchWaveOffsetReg()); + CCInfo.AllocateReg(Info->getFrameOffsetReg()); + TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info); + } + + // Move back to the end of the basic block. + B.setMBB(MBB); + + return true; } diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.h b/lib/Target/AMDGPU/AMDGPUCallLowering.h index 3599659cac6..53a562586bc 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -20,26 +20,37 @@ namespace llvm { class AMDGPUTargetLowering; +class MachineInstrBuilder; class AMDGPUCallLowering: public CallLowering { - Register lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy, + Register lowerParameterPtr(MachineIRBuilder &B, Type *ParamTy, uint64_t Offset) const; - void lowerParameter(MachineIRBuilder &MIRBuilder, Type *ParamTy, - uint64_t Offset, unsigned Align, - Register DstReg) const; + void lowerParameter(MachineIRBuilder &B, Type *ParamTy, uint64_t Offset, + unsigned Align, Register DstReg) const; - public: + /// A function of this type is used to perform value split action. + using SplitArgTy = std::function, LLT, LLT, int)>; + + void splitToValueTypes(const ArgInfo &OrigArgInfo, + SmallVectorImpl &SplitArgs, + const DataLayout &DL, MachineRegisterInfo &MRI, + CallingConv::ID CallConv, + SplitArgTy SplitArg) const; + + bool lowerReturnVal(MachineIRBuilder &B, const Value *Val, + ArrayRef VRegs, MachineInstrBuilder &Ret) const; + +public: AMDGPUCallLowering(const AMDGPUTargetLowering &TLI); - bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, + bool lowerReturn(MachineIRBuilder &B, const Value *Val, ArrayRef VRegs) const override; - bool lowerFormalArgumentsKernel(MachineIRBuilder &MIRBuilder, - const Function &F, + bool lowerFormalArgumentsKernel(MachineIRBuilder &B, const Function &F, ArrayRef> VRegs) const; - bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, + bool lowerFormalArguments(MachineIRBuilder &B, const Function &F, ArrayRef> VRegs) const override; static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg); static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg); diff --git a/lib/Target/AMDGPU/AMDGPUCallingConv.td b/lib/Target/AMDGPU/AMDGPUCallingConv.td index 3688cd77542..f8a54a61aac 100644 --- a/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -24,22 +24,9 @@ def CC_SI : CallingConv<[ SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31, SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39, - SGPR40, SGPR41, SGPR42, SGPR43, SGPR44, SGPR45, SGPR46, SGPR47, - SGPR48, SGPR49, SGPR50, SGPR51, SGPR52, SGPR53, SGPR54, SGPR55, - SGPR56, SGPR57, SGPR58, SGPR59, SGPR60, SGPR61, SGPR62, SGPR63, - SGPR64, SGPR65, SGPR66, SGPR67, SGPR68, SGPR69, SGPR70, SGPR71, - SGPR72, SGPR73, SGPR74, SGPR75, SGPR76, SGPR77, SGPR78, SGPR79, - SGPR80, SGPR81, SGPR82, SGPR83, SGPR84, SGPR85, SGPR86, SGPR87, - SGPR88, SGPR89, SGPR90, SGPR91, SGPR92, SGPR93, SGPR94, SGPR95, - SGPR96, SGPR97, SGPR98, SGPR99, SGPR100, SGPR101, SGPR102, SGPR103, - SGPR104, SGPR105 + SGPR40, SGPR41, SGPR42, SGPR43 ]>>>, - // We have no way of referring to the generated register tuples - // here, so use a custom function. - CCIfInReg>>, - CCIfByVal>>, - // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs. CCIfNotInReg>, // 32*4 + 4 is the minimum for a fetch shader with 32 outputs. @@ -138,7 +117,6 @@ def CC_AMDGPU_Func : CallingConv<[ VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, - CCIfType<[i64, f64, v2i32, v2f32, v3i32, v3f32, v4i32, v4f32, v5i32, v5f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>, CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>, CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>, CCIfType<[v3i32, v3f32], CCAssignToStack<12, 4>>, @@ -157,7 +135,6 @@ def RetCC_AMDGPU_Func : CallingConv<[ VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, - CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">> ]>; def CC_AMDGPU : CallingConv<[ diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index b750c6b5f6d..1640a4a59ee 100644 --- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -55,6 +55,12 @@ static cl::opt WidenLoads( cl::ReallyHidden, cl::init(true)); +static cl::opt UseMul24Intrin( + "amdgpu-codegenprepare-mul24", + cl::desc("Introduce mul24 intrinsics in AMDGPUCodeGenPrepare"), + cl::ReallyHidden, + cl::init(true)); + class AMDGPUCodeGenPrepare : public FunctionPass, public InstVisitor { const GCNSubtarget *ST = nullptr; @@ -509,7 +515,9 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const { } } - I.replaceAllUsesWith(insertValues(Builder, Ty, ResultVals)); + Value *NewVal = insertValues(Builder, Ty, ResultVals); + NewVal->takeName(&I); + I.replaceAllUsesWith(NewVal); I.eraseFromParent(); return true; @@ -879,7 +887,7 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) { DA->isUniform(&I) && promoteUniformOpToI32(I)) return true; - if (replaceMulWithMul24(I)) + if (UseMul24Intrin && replaceMulWithMul24(I)) return true; bool Changed = false; diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp index e8079773636..61ce83b30e0 100644 --- a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp @@ -13,9 +13,9 @@ #include "AMDGPUFrameLowering.h" using namespace llvm; -AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl, - int LAO, unsigned TransAl) - : TargetFrameLowering(D, StackAl, LAO, TransAl) { } +AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, Align StackAl, + int LAO, Align TransAl) + : TargetFrameLowering(D, StackAl, LAO, TransAl) {} AMDGPUFrameLowering::~AMDGPUFrameLowering() = default; diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/lib/Target/AMDGPU/AMDGPUFrameLowering.h index 48b64488303..92e256cf282 100644 --- a/lib/Target/AMDGPU/AMDGPUFrameLowering.h +++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.h @@ -25,8 +25,8 @@ namespace llvm { /// See TargetFrameInfo for more comments. class AMDGPUFrameLowering : public TargetFrameLowering { public: - AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO, - unsigned TransAl = 1); + AMDGPUFrameLowering(StackDirection D, Align StackAl, int LAO, + Align TransAl = Align::None()); ~AMDGPUFrameLowering() override; /// \returns The number of 32-bit sub-registers that are used when storing diff --git a/lib/Target/AMDGPU/AMDGPUGISel.td b/lib/Target/AMDGPU/AMDGPUGISel.td index cad4c2ef404..f2be1ca44d3 100644 --- a/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/lib/Target/AMDGPU/AMDGPUGISel.td @@ -12,10 +12,6 @@ include "AMDGPU.td" -def p0 : PtrValueType; -def p1 : PtrValueType; -def p4 : PtrValueType; - def sd_vsrc0 : ComplexPattern; def gi_vsrc0 : GIComplexOperandMatcher, @@ -38,6 +34,18 @@ def gi_vop3omods : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_vop3omods0clamp0omod : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def gi_vop3opselmods0 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def gi_vop3opselmods : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + def gi_smrd_imm : GIComplexOperandMatcher, GIComplexPatternEquiv; @@ -50,12 +58,19 @@ def gi_smrd_sgpr : GIComplexOperandMatcher, GIComplexPatternEquiv; +// FIXME: Why are the atomic versions separated? def gi_flat_offset : GIComplexOperandMatcher, GIComplexPatternEquiv; def gi_flat_offset_signed : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_flat_atomic : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_flat_signed_atomic : + GIComplexOperandMatcher, + GIComplexPatternEquiv; def gi_mubuf_scratch_offset : GIComplexOperandMatcher, @@ -64,6 +79,44 @@ def gi_mubuf_scratch_offen : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_ds_1addr_1offset : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + + +// Separate load nodes are defined to glue m0 initialization in +// SelectionDAG. The GISel selector can just insert m0 initialization +// directly before before selecting a glue-less load, so hide this +// distinction. + +def : GINodeEquiv { + let CheckMMOIsNonAtomic = 1; +} + +def : GINodeEquiv { + let CheckMMOIsNonAtomic = 1; +} + +def : GINodeEquiv { + bit CheckMMOIsAtomic = 1; +} + + + +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; + +def : GINodeEquiv; class GISelSop2Pat < SDPatternOperator node, @@ -143,20 +196,6 @@ multiclass GISelVop2IntrPat < def : GISelSop2Pat ; def : GISelVop2Pat ; -// FIXME: We can't re-use SelectionDAG patterns here because they match -// against a custom SDNode and we would need to create a generic machine -// instruction that is equivalent to the custom SDNode. This would also require -// us to custom legalize the intrinsic to the new generic machine instruction, -// but I can't get custom legalizing of intrinsic to work and I'm not sure if -// this is even supported yet. -def : GISelVop3Pat2ModsPat < - int_amdgcn_cvt_pkrtz, V_CVT_PKRTZ_F16_F32_e64, v2f16, f32>; - -defm : GISelVop2IntrPat ; -def : GISelVop3Pat2ModsPat ; -defm : GISelVop2IntrPat ; -def : GISelVop3Pat2ModsPat ; - // Since GlobalISel is more flexible then SelectionDAG, I think we can get // away with adding patterns for integer types and not legalizing all // loads and stores to vector types. This should help simplify the load/store @@ -164,3 +203,6 @@ def : GISelVop3Pat2ModsPat ; foreach Ty = [i64, p0, p1, p4] in { defm : SMRD_Pattern <"S_LOAD_DWORDX2", Ty>; } + +def gi_as_i32timm : GICustomOperandRenderer<"renderTruncImm32">, + GISDNodeXFormEquiv; diff --git a/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def index 0a1f48231b1..85d1ad34915 100644 --- a/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def +++ b/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def @@ -22,15 +22,17 @@ enum PartialMappingIdx { PM_SGPR128 = 9, PM_SGPR256 = 10, PM_SGPR512 = 11, - PM_VGPR1 = 12, - PM_VGPR16 = 16, - PM_VGPR32 = 17, - PM_VGPR64 = 18, - PM_VGPR128 = 19, - PM_VGPR256 = 20, - PM_VGPR512 = 21, - PM_SGPR96 = 22, - PM_VGPR96 = 23 + PM_SGPR1024 = 12, + PM_VGPR1 = 13, + PM_VGPR16 = 17, + PM_VGPR32 = 18, + PM_VGPR64 = 19, + PM_VGPR128 = 20, + PM_VGPR256 = 21, + PM_VGPR512 = 22, + PM_VGPR1024 = 23, + PM_SGPR96 = 24, + PM_VGPR96 = 25 }; const RegisterBankInfo::PartialMapping PartMappings[] { @@ -45,6 +47,7 @@ const RegisterBankInfo::PartialMapping PartMappings[] { {0, 128, SGPRRegBank}, {0, 256, SGPRRegBank}, {0, 512, SGPRRegBank}, + {0, 1024, SGPRRegBank}, {0, 1, VGPRRegBank}, // VGPR begin {0, 16, VGPRRegBank}, @@ -53,8 +56,9 @@ const RegisterBankInfo::PartialMapping PartMappings[] { {0, 128, VGPRRegBank}, {0, 256, VGPRRegBank}, {0, 512, VGPRRegBank}, + {0, 1024, VGPRRegBank}, {0, 96, SGPRRegBank}, - {0, 96, VGPRRegBank}, + {0, 96, VGPRRegBank} }; const RegisterBankInfo::ValueMapping ValMappings[] { @@ -65,41 +69,43 @@ const RegisterBankInfo::ValueMapping ValMappings[] { {&PartMappings[1], 1}, // SGPRs - {&PartMappings[2], 1}, + {&PartMappings[2], 1}, // 1 {nullptr, 0}, // Illegal power of 2 sizes {nullptr, 0}, {nullptr, 0}, - {&PartMappings[3], 1}, - {&PartMappings[4], 1}, - {&PartMappings[5], 1}, - {&PartMappings[6], 1}, - {&PartMappings[7], 1}, - {&PartMappings[8], 1}, + {&PartMappings[3], 1}, // 16 + {&PartMappings[4], 1}, // 32 + {&PartMappings[5], 1}, // 64 + {&PartMappings[6], 1}, // 128 + {&PartMappings[7], 1}, // 256 + {&PartMappings[8], 1}, // 512 + {&PartMappings[9], 1}, // 1024 - // VGPRs - {&PartMappings[9], 1}, + // VGPRs + {&PartMappings[10], 1}, // 1 {nullptr, 0}, {nullptr, 0}, {nullptr, 0}, - {&PartMappings[10], 1}, - {&PartMappings[11], 1}, - {&PartMappings[12], 1}, - {&PartMappings[13], 1}, - {&PartMappings[14], 1}, - {&PartMappings[15], 1}, - {&PartMappings[16], 1}, - {&PartMappings[17], 1} + {&PartMappings[11], 1}, // 16 + {&PartMappings[12], 1}, // 32 + {&PartMappings[13], 1}, // 64 + {&PartMappings[14], 1}, // 128 + {&PartMappings[15], 1}, // 256 + {&PartMappings[16], 1}, // 512 + {&PartMappings[17], 1}, // 1024 + {&PartMappings[18], 1}, + {&PartMappings[19], 1} }; const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] { - /*32-bit op*/ {0, 32, SGPRRegBank}, - /*2x32-bit op*/ {0, 32, SGPRRegBank}, - {32, 32, SGPRRegBank}, -/*<2x32-bit> op*/ {0, 64, SGPRRegBank}, + {0, 32, SGPRRegBank}, // 32-bit op + {0, 32, SGPRRegBank}, // 2x32-bit op + {32, 32, SGPRRegBank}, + {0, 64, SGPRRegBank}, // <2x32-bit> op - /*32-bit op*/ {0, 32, VGPRRegBank}, - /*2x32-bit op*/ {0, 32, VGPRRegBank}, - {32, 32, VGPRRegBank}, + {0, 32, VGPRRegBank}, // 32-bit op + {0, 32, VGPRRegBank}, // 2x32-bit op + {32, 32, VGPRRegBank}, }; @@ -116,7 +122,7 @@ const RegisterBankInfo::ValueMapping ValMappingsSGPR64OnlyVGPR32[] { enum ValueMappingIdx { SCCStartIdx = 0, SGPRStartIdx = 2, - VGPRStartIdx = 12 + VGPRStartIdx = 13 }; const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID, diff --git a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index b31de0af501..9f5bcd8ff5f 100644 --- a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -218,12 +218,13 @@ MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF, assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || F.getCallingConv() == CallingConv::SPIR_KERNEL); - unsigned MaxKernArgAlign; + Align MaxKernArgAlign; HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, MaxKernArgAlign); HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; - HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u); + HSACodeProps.mKernargSegmentAlign = + std::max(MaxKernArgAlign, Align(4)).value(); HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; @@ -883,7 +884,7 @@ MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, auto Kern = HSAMetadataDoc->getMapNode(); - unsigned MaxKernArgAlign; + Align MaxKernArgAlign; Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode( STM.getKernArgSegmentSize(F, MaxKernArgAlign)); Kern[".group_segment_fixed_size"] = @@ -891,7 +892,7 @@ MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, Kern[".private_segment_fixed_size"] = Kern.getDocument()->getNode(ProgramInfo.ScratchSize); Kern[".kernarg_segment_align"] = - Kern.getDocument()->getNode(std::max(uint32_t(4), MaxKernArgAlign)); + Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value()); Kern[".wavefront_size"] = Kern.getDocument()->getNode(STM.getWavefrontSize()); Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR); diff --git a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h index 2eecddbd7b0..80ac8ca67bc 100644 --- a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -52,7 +52,7 @@ public: class MetadataStreamerV3 final : public MetadataStreamer { private: std::unique_ptr HSAMetadataDoc = - llvm::make_unique(); + std::make_unique(); void dump(StringRef HSAMetadataString) const; diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index ea730539f83..f330bd7ebcd 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -172,8 +172,9 @@ private: MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const; - SDNode *glueCopyToM0LDSInit(SDNode *N) const; + SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const; SDNode *glueCopyToM0(SDNode *N, SDValue Val) const; + SDNode *glueCopyToM0LDSInit(SDNode *N) const; const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); @@ -186,10 +187,11 @@ private: bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &Offen, SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, - SDValue &TFE, SDValue &DLC) const; + SDValue &TFE, SDValue &DLC, SDValue &SWZ) const; bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &GLC, - SDValue &SLC, SDValue &TFE, SDValue &DLC) const; + SDValue &SLC, SDValue &TFE, SDValue &DLC, + SDValue &SWZ) const; bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &SLC) const; @@ -202,21 +204,20 @@ private: bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, - SDValue &TFE, SDValue &DLC) const; + SDValue &TFE, SDValue &DLC, SDValue &SWZ) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset, SDValue &SLC) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset) const; + template + bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, + SDValue &Offset, SDValue &SLC) const; bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const; bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const; - template - bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, - SDValue &Offset, SDValue &SLC) const; - bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool &Imm) const; SDValue Expand32BitAddress(SDValue Addr) const; @@ -262,6 +263,8 @@ private: SDValue getHi16Elt(SDValue In) const; + SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const; + void SelectADD_SUB_I64(SDNode *N); void SelectAddcSubb(SDNode *N); void SelectUADDO_USUBO(SDNode *N); @@ -282,6 +285,7 @@ private: void SelectDSAppendConsume(SDNode *N, unsigned IntrID); void SelectDS_GWS(SDNode *N, unsigned IntrID); void SelectINTRINSIC_W_CHAIN(SDNode *N); + void SelectINTRINSIC_WO_CHAIN(SDNode *N); void SelectINTRINSIC_VOID(SDNode *N); protected: @@ -543,7 +547,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, if (!N->isMachineOpcode()) { if (N->getOpcode() == ISD::CopyToReg) { unsigned Reg = cast(N->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); return MRI.getRegClass(Reg); } @@ -582,24 +586,25 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, } } +SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain, + SDValue Glue) const { + SmallVector Ops; + Ops.push_back(NewChain); // Replace the chain. + for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) + Ops.push_back(N->getOperand(i)); + + Ops.push_back(Glue); + return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); +} + SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const { const SITargetLowering& Lowering = *static_cast(getTargetLowering()); assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain"); - SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), - Val); - - SDValue Glue = M0.getValue(1); - - SmallVector Ops; - Ops.push_back(M0); // Replace the chain. - for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) - Ops.push_back(N->getOperand(i)); - - Ops.push_back(Glue); - return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); + SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val); + return glueCopyToOp(N, M0, M0.getValue(1)); } SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const { @@ -635,13 +640,13 @@ MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm, static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { switch (NumVectorElts) { case 1: - return AMDGPU::SReg_32_XM0RegClassID; + return AMDGPU::SReg_32RegClassID; case 2: return AMDGPU::SReg_64RegClassID; case 3: return AMDGPU::SGPR_96RegClassID; case 4: - return AMDGPU::SReg_128RegClassID; + return AMDGPU::SGPR_128RegClassID; case 5: return AMDGPU::SGPR_160RegClassID; case 8: @@ -713,12 +718,17 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { return; // Already selected. } - if (isa(N) || + // isa almost works but is slightly too permissive for some DS + // intrinsics. + if (Opc == ISD::LOAD || Opc == ISD::STORE || isa(N) || (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC || Opc == ISD::ATOMIC_LOAD_FADD || Opc == AMDGPUISD::ATOMIC_LOAD_FMIN || - Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) + Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) { N = glueCopyToM0LDSInit(N); + SelectCode(N); + return; + } switch (Opc) { default: @@ -781,7 +791,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { SDValue RC, SubReg0, SubReg1; SDLoc DL(N); if (N->getValueType(0) == MVT::i128) { - RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); + RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32); SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); } else if (N->getValueType(0) == MVT::i64) { @@ -815,14 +825,6 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0))); return; } - case ISD::LOAD: - case ISD::STORE: - case ISD::ATOMIC_LOAD: - case ISD::ATOMIC_STORE: { - N = glueCopyToM0LDSInit(N); - break; - } - case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { // There is a scalar version available, but unlike the vector version which @@ -908,6 +910,10 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { SelectINTRINSIC_W_CHAIN(N); return; } + case ISD::INTRINSIC_WO_CHAIN: { + SelectINTRINSIC_WO_CHAIN(N); + return; + } case ISD::INTRINSIC_VOID: { SelectINTRINSIC_VOID(N); return; @@ -961,6 +967,14 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, return true; } +SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val, + const SDLoc &DL) const { + SDNode *Mov = CurDAG->getMachineNode( + AMDGPU::S_MOV_B32, DL, MVT::i32, + CurDAG->getTargetConstant(Val, DL, MVT::i32)); + return SDValue(Mov, 0); +} + // FIXME: Should only handle addcarry/subcarry void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { SDLoc DL(N); @@ -1308,7 +1322,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &Offset, SDValue &Offen, SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, - SDValue &TFE, SDValue &DLC) const { + SDValue &TFE, SDValue &DLC, + SDValue &SWZ) const { // Subtarget prefers to use flat instruction if (Subtarget->useFlatForGlobal()) return false; @@ -1321,6 +1336,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); DLC = CurDAG->getTargetConstant(0, DL, MVT::i1); + SWZ = CurDAG->getTargetConstant(0, DL, MVT::i1); Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); @@ -1400,7 +1416,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, SDValue &TFE, - SDValue &DLC) const { + SDValue &DLC, SDValue &SWZ) const { SDValue Ptr, Offen, Idxen, Addr64; // addr64 bit was removed for volcanic islands. @@ -1408,7 +1424,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, return false; if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, - GLC, SLC, TFE, DLC)) + GLC, SLC, TFE, DLC, SWZ)) return false; ConstantSDNode *C = cast(Addr64); @@ -1430,9 +1446,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &Offset, SDValue &SLC) const { SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); - SDValue GLC, TFE, DLC; + SDValue GLC, TFE, DLC, SWZ; - return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC); + return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC, SWZ); } static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { @@ -1557,13 +1573,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent, bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, - SDValue &TFE, SDValue &DLC) const { + SDValue &TFE, SDValue &DLC, + SDValue &SWZ) const { SDValue Ptr, VAddr, Offen, Idxen, Addr64; const SIInstrInfo *TII = static_cast(Subtarget->getInstrInfo()); if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, - GLC, SLC, TFE, DLC)) + GLC, SLC, TFE, DLC, SWZ)) return false; if (!cast(Offen)->getSExtValue() && @@ -1585,16 +1602,30 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset ) const { - SDValue GLC, SLC, TFE, DLC; + SDValue GLC, SLC, TFE, DLC, SWZ; - return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC); + return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ); } bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset, SDValue &SLC) const { - SDValue GLC, TFE, DLC; + SDValue GLC, TFE, DLC, SWZ; - return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC); + return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ); +} + +// Find a load or store from corresponding pattern root. +// Roots may be build_vector, bitconvert or their combinations. +static MemSDNode* findMemSDNode(SDNode *N) { + N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode(); + if (MemSDNode *MN = dyn_cast(N)) + return MN; + assert(isa(N)); + for (SDValue V : N->op_values()) + if (MemSDNode *MN = + dyn_cast(AMDGPUTargetLowering::stripBitcast(V))) + return MN; + llvm_unreachable("cannot find MemSDNode in the pattern!"); } template @@ -1603,8 +1634,95 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const { - return static_cast(getTargetLowering())-> - SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC); + int64_t OffsetVal = 0; + + if (Subtarget->hasFlatInstOffsets() && + (!Subtarget->hasFlatSegmentOffsetBug() || + findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) && + CurDAG->isBaseWithConstantOffset(Addr)) { + SDValue N0 = Addr.getOperand(0); + SDValue N1 = Addr.getOperand(1); + uint64_t COffsetVal = cast(N1)->getSExtValue(); + + const SIInstrInfo *TII = Subtarget->getInstrInfo(); + unsigned AS = findMemSDNode(N)->getAddressSpace(); + if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) { + Addr = N0; + OffsetVal = COffsetVal; + } else { + // If the offset doesn't fit, put the low bits into the offset field and + // add the rest. + + SDLoc DL(N); + uint64_t ImmField; + const unsigned NumBits = TII->getNumFlatOffsetBits(AS, IsSigned); + if (IsSigned) { + ImmField = SignExtend64(COffsetVal, NumBits); + + // Don't use a negative offset field if the base offset is positive. + // Since the scheduler currently relies on the offset field, doing so + // could result in strange scheduling decisions. + + // TODO: Should we not do this in the opposite direction as well? + if (static_cast(COffsetVal) > 0) { + if (static_cast(ImmField) < 0) { + const uint64_t OffsetMask = maskTrailingOnes(NumBits - 1); + ImmField = COffsetVal & OffsetMask; + } + } + } else { + // TODO: Should we do this for a negative offset? + const uint64_t OffsetMask = maskTrailingOnes(NumBits); + ImmField = COffsetVal & OffsetMask; + } + + uint64_t RemainderOffset = COffsetVal - ImmField; + + assert(TII->isLegalFLATOffset(ImmField, AS, IsSigned)); + assert(RemainderOffset + ImmField == COffsetVal); + + OffsetVal = ImmField; + + // TODO: Should this try to use a scalar add pseudo if the base address is + // uniform and saddr is usable? + SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); + SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); + + SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + DL, MVT::i32, N0, Sub0); + SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + DL, MVT::i32, N0, Sub1); + + SDValue AddOffsetLo + = getMaterializedScalarImm32(Lo_32(RemainderOffset), DL); + SDValue AddOffsetHi + = getMaterializedScalarImm32(Hi_32(RemainderOffset), DL); + + SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1); + SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); + + SDNode *Add = CurDAG->getMachineNode( + AMDGPU::V_ADD_I32_e64, DL, VTs, + {AddOffsetLo, SDValue(N0Lo, 0), Clamp}); + + SDNode *Addc = CurDAG->getMachineNode( + AMDGPU::V_ADDC_U32_e64, DL, VTs, + {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp}); + + SDValue RegSequenceArgs[] = { + CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32), + SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1 + }; + + Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, + MVT::i64, RegSequenceArgs), 0); + } + } + + VAddr = Addr; + Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); + SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); + return true; } bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N, @@ -1616,10 +1734,10 @@ bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N, } bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N, - SDValue Addr, - SDValue &VAddr, - SDValue &Offset, - SDValue &SLC) const { + SDValue Addr, + SDValue &VAddr, + SDValue &Offset, + SDValue &SLC) const { return SelectFlatOffset(N, Addr, VAddr, Offset, SLC); } @@ -2158,10 +2276,12 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { // offset field) % 64. Some versions of the programming guide omit the m0 // part, or claim it's from offset 0. if (ConstantSDNode *ConstOffset = dyn_cast(BaseOffset)) { - // If we have a constant offset, try to use the default value for m0 as a - // base to possibly avoid setting it up. - glueCopyToM0(N, CurDAG->getTargetConstant(-1, SL, MVT::i32)); - ImmOffset = ConstOffset->getZExtValue() + 1; + // If we have a constant offset, try to use the 0 in m0 as the base. + // TODO: Look into changing the default m0 initialization value. If the + // default -1 only set the low 16-bits, we could leave it as-is and add 1 to + // the immediate offset. + glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32)); + ImmOffset = ConstOffset->getZExtValue(); } else { if (CurDAG->isBaseWithConstantOffset(BaseOffset)) { ImmOffset = BaseOffset.getConstantOperandVal(1); @@ -2182,22 +2302,7 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { glueCopyToM0(N, SDValue(M0Base, 0)); } - SDValue V0; SDValue Chain = N->getOperand(0); - SDValue Glue; - if (HasVSrc) { - SDValue VSrc0 = N->getOperand(2); - - // The manual doesn't mention this, but it seems only v0 works. - V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32); - - SDValue CopyToV0 = CurDAG->getCopyToReg( - N->getOperand(0), SL, V0, VSrc0, - N->getOperand(N->getNumOperands() - 1)); - Chain = CopyToV0; - Glue = CopyToV0.getValue(1); - } - SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32); // TODO: Can this just be removed from the instruction? @@ -2206,14 +2311,11 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { const unsigned Opc = gwsIntrinToOpcode(IntrID); SmallVector Ops; if (HasVSrc) - Ops.push_back(V0); + Ops.push_back(N->getOperand(2)); Ops.push_back(OffsetField); Ops.push_back(GDS); Ops.push_back(Chain); - if (HasVSrc) - Ops.push_back(Glue); - SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); CurDAG->setNodeMemRefs(cast(Selected), {MMO}); } @@ -2233,6 +2335,28 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { SelectCode(N); } +void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) { + unsigned IntrID = cast(N->getOperand(0))->getZExtValue(); + unsigned Opcode; + switch (IntrID) { + case Intrinsic::amdgcn_wqm: + Opcode = AMDGPU::WQM; + break; + case Intrinsic::amdgcn_softwqm: + Opcode = AMDGPU::SOFT_WQM; + break; + case Intrinsic::amdgcn_wwm: + Opcode = AMDGPU::WWM; + break; + default: + SelectCode(N); + return; + } + + SDValue Src = N->getOperand(1); + CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src}); +} + void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) { unsigned IntrID = cast(N->getOperand(1))->getZExtValue(); switch (IntrID) { diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 39016ed3719..1115d8c2362 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -12,10 +12,6 @@ // //===----------------------------------------------------------------------===// -#define AMDGPU_LOG2E_F 1.44269504088896340735992468100189214f -#define AMDGPU_LN2_F 0.693147180559945309417232121458176568f -#define AMDGPU_LN10_F 2.30258509299404568401799145468436421f - #include "AMDGPUISelLowering.h" #include "AMDGPU.h" #include "AMDGPUCallLowering.h" @@ -37,82 +33,9 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MathExtras.h" using namespace llvm; -static bool allocateCCRegs(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State, - const TargetRegisterClass *RC, - unsigned NumRegs) { - ArrayRef RegList = makeArrayRef(RC->begin(), NumRegs); - unsigned RegResult = State.AllocateReg(RegList); - if (RegResult == AMDGPU::NoRegister) - return false; - - State.addLoc(CCValAssign::getReg(ValNo, ValVT, RegResult, LocVT, LocInfo)); - return true; -} - -static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - switch (LocVT.SimpleTy) { - case MVT::i64: - case MVT::f64: - case MVT::v2i32: - case MVT::v2f32: - case MVT::v4i16: - case MVT::v4f16: { - // Up to SGPR0-SGPR105 - return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, - &AMDGPU::SGPR_64RegClass, 53); - } - default: - return false; - } -} - -// Allocate up to VGPR31. -// -// TODO: Since there are no VGPR alignent requirements would it be better to -// split into individual scalar registers? -static bool allocateVGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - switch (LocVT.SimpleTy) { - case MVT::i64: - case MVT::f64: - case MVT::v2i32: - case MVT::v2f32: - case MVT::v4i16: - case MVT::v4f16: { - return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, - &AMDGPU::VReg_64RegClass, 31); - } - case MVT::v4i32: - case MVT::v4f32: - case MVT::v2i64: - case MVT::v2f64: { - return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, - &AMDGPU::VReg_128RegClass, 29); - } - case MVT::v8i32: - case MVT::v8f32: { - return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, - &AMDGPU::VReg_256RegClass, 25); - - } - case MVT::v16i32: - case MVT::v16f32: { - return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, - &AMDGPU::VReg_512RegClass, 17); - - } - default: - return false; - } -} - #include "AMDGPUGenCallingConv.inc" // Find a larger type to do a load / store of a vector with. @@ -208,7 +131,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand); } - for (MVT VT : MVT::integer_vector_valuetypes()) { + for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i8, Expand); @@ -218,6 +141,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i16, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v3i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v3i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v3i16, Expand); setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i16, Expand); @@ -225,8 +151,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); @@ -286,8 +215,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand); + setTruncStoreAction(MVT::v3f32, MVT::v3f16, Expand); setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand); setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand); + setTruncStoreAction(MVT::v16f32, MVT::v16f16, Expand); + setTruncStoreAction(MVT::v32f32, MVT::v32f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); @@ -571,6 +503,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::FABS); setTargetDAGCombine(ISD::AssertZext); setTargetDAGCombine(ISD::AssertSext); + setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); } //===----------------------------------------------------------------------===// @@ -630,15 +563,26 @@ static bool hasSourceMods(const SDNode *N) { case ISD::FREM: case ISD::INLINEASM: case ISD::INLINEASM_BR: - case AMDGPUISD::INTERP_P1: - case AMDGPUISD::INTERP_P2: case AMDGPUISD::DIV_SCALE: + case ISD::INTRINSIC_W_CHAIN: // TODO: Should really be looking at the users of the bitcast. These are // problematic because bitcasts are used to legalize all stores to integer // types. case ISD::BITCAST: return false; + case ISD::INTRINSIC_WO_CHAIN: { + switch (cast(N->getOperand(0))->getZExtValue()) { + case Intrinsic::amdgcn_interp_p1: + case Intrinsic::amdgcn_interp_p2: + case Intrinsic::amdgcn_interp_mov: + case Intrinsic::amdgcn_interp_p1_f16: + case Intrinsic::amdgcn_interp_p2_f16: + return false; + default: + return true; + } + } default: return true; } @@ -745,8 +689,9 @@ bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy, return false; bool Fast = false; - return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), CastTy, - MMO, &Fast) && Fast; + return allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + CastTy, MMO, &Fast) && + Fast; } // SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also @@ -782,9 +727,8 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode * N) const { break; case ISD::LOAD: { - const LoadSDNode * L = dyn_cast(N); - if (L->getMemOperand()->getAddrSpace() - == AMDGPUAS::CONSTANT_ADDRESS_32BIT) + if (cast(N)->getMemOperand()->getAddrSpace() == + AMDGPUAS::CONSTANT_ADDRESS_32BIT) return true; return false; } @@ -1199,9 +1143,9 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, case ISD::FROUND: return LowerFROUND(Op, DAG); case ISD::FFLOOR: return LowerFFLOOR(Op, DAG); case ISD::FLOG: - return LowerFLOG(Op, DAG, 1 / AMDGPU_LOG2E_F); + return LowerFLOG(Op, DAG, 1.0F / numbers::log2ef); case ISD::FLOG10: - return LowerFLOG(Op, DAG, AMDGPU_LN2_F / AMDGPU_LN10_F); + return LowerFLOG(Op, DAG, numbers::ln2f / numbers::ln10f); case ISD::FEXP: return lowerFEXP(Op, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); @@ -1236,7 +1180,7 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N, } } -static bool hasDefinedInitializer(const GlobalValue *GV) { +bool AMDGPUTargetLowering::hasDefinedInitializer(const GlobalValue *GV) { const GlobalVariable *GVar = dyn_cast(GV); if (!GVar || !GVar->hasInitializer()) return false; @@ -2349,30 +2293,13 @@ SDValue AMDGPUTargetLowering::LowerFLOG(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand); } -// Return M_LOG2E of appropriate type -static SDValue getLog2EVal(SelectionDAG &DAG, const SDLoc &SL, EVT VT) { - switch (VT.getScalarType().getSimpleVT().SimpleTy) { - case MVT::f32: - return DAG.getConstantFP(1.44269504088896340735992468100189214f, SL, VT); - case MVT::f16: - return DAG.getConstantFP( - APFloat(APFloat::IEEEhalf(), "1.44269504088896340735992468100189214"), - SL, VT); - case MVT::f64: - return DAG.getConstantFP( - APFloat(APFloat::IEEEdouble(), "0x1.71547652b82fep+0"), SL, VT); - default: - llvm_unreachable("unsupported fp type"); - } -} - // exp2(M_LOG2E_F * f); SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc SL(Op); SDValue Src = Op.getOperand(0); - const SDValue K = getLog2EVal(DAG, SL, VT); + const SDValue K = DAG.getConstantFP(numbers::log2e, SL, VT); SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Src, K, Op->getFlags()); return DAG.getNode(ISD::FEXP2, SL, VT, Mul, Op->getFlags()); } @@ -2836,8 +2763,16 @@ static bool isI24(SDValue Op, SelectionDAG &DAG) { static SDValue simplifyI24(SDNode *Node24, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; - SDValue LHS = Node24->getOperand(0); - SDValue RHS = Node24->getOperand(1); + bool IsIntrin = Node24->getOpcode() == ISD::INTRINSIC_WO_CHAIN; + + SDValue LHS = IsIntrin ? Node24->getOperand(1) : Node24->getOperand(0); + SDValue RHS = IsIntrin ? Node24->getOperand(2) : Node24->getOperand(1); + unsigned NewOpcode = Node24->getOpcode(); + if (IsIntrin) { + unsigned IID = cast(Node24->getOperand(0))->getZExtValue(); + NewOpcode = IID == Intrinsic::amdgcn_mul_i24 ? + AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24; + } APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24); @@ -2847,7 +2782,7 @@ static SDValue simplifyI24(SDNode *Node24, SDValue DemandedLHS = DAG.GetDemandedBits(LHS, Demanded); SDValue DemandedRHS = DAG.GetDemandedBits(RHS, Demanded); if (DemandedLHS || DemandedRHS) - return DAG.getNode(Node24->getOpcode(), SDLoc(Node24), Node24->getVTList(), + return DAG.getNode(NewOpcode, SDLoc(Node24), Node24->getVTList(), DemandedLHS ? DemandedLHS : LHS, DemandedRHS ? DemandedRHS : RHS); @@ -2904,54 +2839,6 @@ bool AMDGPUTargetLowering::shouldCombineMemoryType(EVT VT) const { return true; } -// Find a load or store from corresponding pattern root. -// Roots may be build_vector, bitconvert or their combinations. -static MemSDNode* findMemSDNode(SDNode *N) { - N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode(); - if (MemSDNode *MN = dyn_cast(N)) - return MN; - assert(isa(N)); - for (SDValue V : N->op_values()) - if (MemSDNode *MN = - dyn_cast(AMDGPUTargetLowering::stripBitcast(V))) - return MN; - llvm_unreachable("cannot find MemSDNode in the pattern!"); -} - -bool AMDGPUTargetLowering::SelectFlatOffset(bool IsSigned, - SelectionDAG &DAG, - SDNode *N, - SDValue Addr, - SDValue &VAddr, - SDValue &Offset, - SDValue &SLC) const { - const GCNSubtarget &ST = - DAG.getMachineFunction().getSubtarget(); - int64_t OffsetVal = 0; - - if (ST.hasFlatInstOffsets() && - (!ST.hasFlatSegmentOffsetBug() || - findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) && - DAG.isBaseWithConstantOffset(Addr)) { - SDValue N0 = Addr.getOperand(0); - SDValue N1 = Addr.getOperand(1); - int64_t COffsetVal = cast(N1)->getSExtValue(); - - const SIInstrInfo *TII = ST.getInstrInfo(); - if (TII->isLegalFLATOffset(COffsetVal, findMemSDNode(N)->getAddressSpace(), - IsSigned)) { - Addr = N0; - OffsetVal = COffsetVal; - } - } - - VAddr = Addr; - Offset = DAG.getTargetConstant(OffsetVal, SDLoc(), MVT::i16); - SLC = DAG.getTargetConstant(0, SDLoc(), MVT::i1); - - return true; -} - // Replace load of an illegal type with a store of a bitcast to a friendlier // type. SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N, @@ -3085,6 +2972,19 @@ SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N, return SDValue(); } + +SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine( + SDNode *N, DAGCombinerInfo &DCI) const { + unsigned IID = cast(N->getOperand(0))->getZExtValue(); + switch (IID) { + case Intrinsic::amdgcn_mul_i24: + case Intrinsic::amdgcn_mul_u24: + return simplifyI24(N, DCI); + default: + return SDValue(); + } +} + /// Split the 64-bit value \p LHS into two 32-bit components, and perform the /// binary operation \p Opc to it with the corresponding constant operands. SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl( @@ -4173,6 +4073,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, case ISD::AssertZext: case ISD::AssertSext: return performAssertSZExtCombine(N, DCI); + case ISD::INTRINSIC_WO_CHAIN: + return performIntrinsicWOChainCombine(N, DCI); } return SDValue(); } @@ -4203,14 +4105,28 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, return DAG.getCopyFromReg(DAG.getEntryNode(), SL, VReg, VT); } +// This may be called multiple times, and nothing prevents creating multiple +// objects at the same offset. See if we already defined this object. +static int getOrCreateFixedStackObject(MachineFrameInfo &MFI, unsigned Size, + int64_t Offset) { + for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) { + if (MFI.getObjectOffset(I) == Offset) { + assert(MFI.getObjectSize(I) == Size); + return I; + } + } + + return MFI.CreateFixedObject(Size, Offset, true); +} + SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG, EVT VT, const SDLoc &SL, int64_t Offset) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); + int FI = getOrCreateFixedStackObject(MFI, VT.getStoreSize(), Offset); - int FI = MFI.CreateFixedObject(VT.getStoreSize(), Offset, true); auto SrcPtrInfo = MachinePointerInfo::getStack(MF, Offset); SDValue Ptr = DAG.getFrameIndex(FI, MVT::i32); @@ -4260,7 +4176,7 @@ uint32_t AMDGPUTargetLowering::getImplicitParameterOffset( const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(getTargetMachine(), MF.getFunction()); unsigned ExplicitArgOffset = ST.getExplicitKernelArgOffset(MF.getFunction()); - unsigned Alignment = ST.getAlignmentForImplicitArgPtr(); + const Align Alignment = ST.getAlignmentForImplicitArgPtr(); uint64_t ArgOffset = alignTo(MFI->getExplicitKernArgSize(), Alignment) + ExplicitArgOffset; switch (Param) { @@ -4295,6 +4211,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(FRACT) NODE_NAME_CASE(SETCC) NODE_NAME_CASE(SETREG) + NODE_NAME_CASE(DENORM_MODE) NODE_NAME_CASE(FMA_W_CHAIN) NODE_NAME_CASE(FMUL_W_CHAIN) NODE_NAME_CASE(CLAMP) @@ -4377,13 +4294,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(KILL) NODE_NAME_CASE(DUMMY_CHAIN) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; - NODE_NAME_CASE(INIT_EXEC) - NODE_NAME_CASE(INIT_EXEC_FROM_INPUT) - NODE_NAME_CASE(SENDMSG) - NODE_NAME_CASE(SENDMSGHALT) - NODE_NAME_CASE(INTERP_MOV) - NODE_NAME_CASE(INTERP_P1) - NODE_NAME_CASE(INTERP_P2) NODE_NAME_CASE(INTERP_P1LL_F16) NODE_NAME_CASE(INTERP_P1LV_F16) NODE_NAME_CASE(INTERP_P2_F16) @@ -4428,6 +4338,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BUFFER_ATOMIC_AND) NODE_NAME_CASE(BUFFER_ATOMIC_OR) NODE_NAME_CASE(BUFFER_ATOMIC_XOR) + NODE_NAME_CASE(BUFFER_ATOMIC_INC) + NODE_NAME_CASE(BUFFER_ATOMIC_DEC) NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP) NODE_NAME_CASE(BUFFER_ATOMIC_FADD) NODE_NAME_CASE(BUFFER_ATOMIC_PK_FADD) @@ -4576,9 +4488,9 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( Known.One |= ((LHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I; Known.Zero |= ((LHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I; } else if (SelBits == 0x0c) { - Known.Zero |= 0xff << I; + Known.Zero |= 0xFFull << I; } else if (SelBits > 0x0c) { - Known.One |= 0xff << I; + Known.One |= 0xFFull << I; } Sel >>= 8; } diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index fe7ad694943..dea0d1d4343 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -38,6 +38,7 @@ private: public: static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG); static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG); + static bool hasDefinedInitializer(const GlobalValue *GV); protected: SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; @@ -78,6 +79,7 @@ protected: SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performIntrinsicWOChainCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, unsigned Opc, SDValue LHS, @@ -324,10 +326,6 @@ public: } AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; - - bool SelectFlatOffset(bool IsSigned, SelectionDAG &DAG, SDNode *N, - SDValue Addr, SDValue &VAddr, SDValue &Offset, - SDValue &SLC) const; }; namespace AMDGPUISD { @@ -369,6 +367,9 @@ enum NodeType : unsigned { // result bit per item in the wavefront. SETCC, SETREG, + + DENORM_MODE, + // FP ops with input and output chain. FMA_W_CHAIN, FMUL_W_CHAIN, @@ -475,13 +476,6 @@ enum NodeType : unsigned { BUILD_VERTICAL_VECTOR, /// Pointer to the start of the shader's constant data. CONST_DATA_PTR, - INIT_EXEC, - INIT_EXEC_FROM_INPUT, - SENDMSG, - SENDMSGHALT, - INTERP_MOV, - INTERP_P1, - INTERP_P2, INTERP_P1LL_F16, INTERP_P1LV_F16, INTERP_P2_F16, @@ -532,6 +526,8 @@ enum NodeType : unsigned { BUFFER_ATOMIC_AND, BUFFER_ATOMIC_OR, BUFFER_ATOMIC_XOR, + BUFFER_ATOMIC_INC, + BUFFER_ATOMIC_DEC, BUFFER_ATOMIC_CMPSWAP, BUFFER_ATOMIC_FADD, BUFFER_ATOMIC_PK_FADD, diff --git a/lib/Target/AMDGPU/AMDGPUInline.cpp b/lib/Target/AMDGPU/AMDGPUInline.cpp index f4df20b8f03..a83ec23ec05 100644 --- a/lib/Target/AMDGPU/AMDGPUInline.cpp +++ b/lib/Target/AMDGPU/AMDGPUInline.cpp @@ -51,7 +51,7 @@ ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256), // Inliner constraint to achieve reasonable compilation time static cl::opt -MaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(300), +MaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(1100), cl::desc("Maximum BB number allowed in a function after inlining" " (compile time constraint)")); diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 4a844695549..cf0ce565995 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -110,39 +110,38 @@ def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>; // Force dependencies for vector trunc stores def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>; -def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>; -def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>; - +def AMDGPUcos_impl : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>; +def AMDGPUsin_impl : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>; // out = a - floor(a) -def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>; +def AMDGPUfract_impl : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>; // out = 1.0 / a -def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>; +def AMDGPUrcp_impl : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>; // out = 1.0 / sqrt(a) -def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>; +def AMDGPUrsq_impl : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>; // out = 1.0 / sqrt(a) -def AMDGPUrcp_legacy : SDNode<"AMDGPUISD::RCP_LEGACY", SDTFPUnaryOp>; -def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>; +def AMDGPUrsq_legacy_impl : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>; +def AMDGPUrcp_legacy_impl : SDNode<"AMDGPUISD::RCP_LEGACY", SDTFPUnaryOp>; def AMDGPUrcp_iflag : SDNode<"AMDGPUISD::RCP_IFLAG", SDTFPUnaryOp>; // out = 1.0 / sqrt(a) result clamped to +/- max_float. -def AMDGPUrsq_clamp : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>; +def AMDGPUrsq_clamp_impl : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>; -def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>; +def AMDGPUldexp_impl : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>; -def AMDGPUpkrtz_f16_f32 : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>; -def AMDGPUpknorm_i16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>; -def AMDGPUpknorm_u16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>; -def AMDGPUpk_i16_i32 : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>; -def AMDGPUpk_u16_u32 : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>; +def AMDGPUpkrtz_f16_f32_impl : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>; +def AMDGPUpknorm_i16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>; +def AMDGPUpknorm_u16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>; +def AMDGPUpk_i16_i32_impl : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>; +def AMDGPUpk_u16_u32_impl : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>; def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>; def AMDGPUfp16_zext : SDNode<"AMDGPUISD::FP16_ZEXT" , SDTFPToIntOp>; -def AMDGPUfp_class : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>; +def AMDGPUfp_class_impl : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>; // out = max(a, b) a and b are floats, where a nan comparison fails. // This is not commutative because this gives the second operand: @@ -285,7 +284,7 @@ def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>; def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>; def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>; -def AMDGPUffbh_i32 : SDNode<"AMDGPUISD::FFBH_I32", SDTIntUnaryOp>; +def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntUnaryOp>; def AMDGPUffbl_b32 : SDNode<"AMDGPUISD::FFBL_B32", SDTIntUnaryOp>; @@ -320,7 +319,7 @@ def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp, [] >; -def AMDGPUfmed3 : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>; +def AMDGPUfmed3_impl : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>; def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2", SDTypeProfile<1, 4, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>, @@ -330,35 +329,6 @@ def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2", def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>; -def AMDGPUinit_exec : SDNode<"AMDGPUISD::INIT_EXEC", - SDTypeProfile<0, 1, [SDTCisInt<0>]>, - [SDNPHasChain, SDNPInGlue]>; - -def AMDGPUinit_exec_from_input : SDNode<"AMDGPUISD::INIT_EXEC_FROM_INPUT", - SDTypeProfile<0, 2, - [SDTCisInt<0>, SDTCisInt<1>]>, - [SDNPHasChain, SDNPInGlue]>; - -def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG", - SDTypeProfile<0, 1, [SDTCisInt<0>]>, - [SDNPHasChain, SDNPInGlue]>; - -def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT", - SDTypeProfile<0, 1, [SDTCisInt<0>]>, - [SDNPHasChain, SDNPInGlue]>; - -def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV", - SDTypeProfile<1, 3, [SDTCisFP<0>]>, - [SDNPInGlue]>; - -def AMDGPUinterp_p1 : SDNode<"AMDGPUISD::INTERP_P1", - SDTypeProfile<1, 3, [SDTCisFP<0>]>, - [SDNPInGlue, SDNPOutGlue]>; - -def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2", - SDTypeProfile<1, 4, [SDTCisFP<0>]>, - [SDNPInGlue]>; - def AMDGPUinterp_p1ll_f16 : SDNode<"AMDGPUISD::INTERP_P1LL_F16", SDTypeProfile<1, 7, [SDTCisFP<0>]>, [SDNPInGlue, SDNPOutGlue]>; @@ -425,3 +395,65 @@ def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone, def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] >; + + +//===----------------------------------------------------------------------===// +// Intrinsic/Custom node compatability PatFrags +//===----------------------------------------------------------------------===// + +def AMDGPUrcp : PatFrags<(ops node:$src), [(int_amdgcn_rcp node:$src), + (AMDGPUrcp_impl node:$src)]>; +def AMDGPUrcp_legacy : PatFrags<(ops node:$src), [(int_amdgcn_rcp_legacy node:$src), + (AMDGPUrcp_legacy_impl node:$src)]>; + +def AMDGPUrsq_legacy : PatFrags<(ops node:$src), [(int_amdgcn_rsq_legacy node:$src), + (AMDGPUrsq_legacy_impl node:$src)]>; + +def AMDGPUrsq : PatFrags<(ops node:$src), [(int_amdgcn_rsq node:$src), + (AMDGPUrsq_impl node:$src)]>; + +def AMDGPUrsq_clamp : PatFrags<(ops node:$src), [(int_amdgcn_rsq_clamp node:$src), + (AMDGPUrsq_clamp_impl node:$src)]>; + +def AMDGPUsin : PatFrags<(ops node:$src), [(int_amdgcn_sin node:$src), + (AMDGPUsin_impl node:$src)]>; +def AMDGPUcos : PatFrags<(ops node:$src), [(int_amdgcn_cos node:$src), + (AMDGPUcos_impl node:$src)]>; +def AMDGPUfract : PatFrags<(ops node:$src), [(int_amdgcn_fract node:$src), + (AMDGPUfract_impl node:$src)]>; + +def AMDGPUldexp : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_ldexp node:$src0, node:$src1), + (AMDGPUldexp_impl node:$src0, node:$src1)]>; + +def AMDGPUfp_class : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_class node:$src0, node:$src1), + (AMDGPUfp_class_impl node:$src0, node:$src1)]>; + +def AMDGPUfmed3 : PatFrags<(ops node:$src0, node:$src1, node:$src2), + [(int_amdgcn_fmed3 node:$src0, node:$src1, node:$src2), + (AMDGPUfmed3_impl node:$src0, node:$src1, node:$src2)]>; + +def AMDGPUffbh_i32 : PatFrags<(ops node:$src), + [(int_amdgcn_sffbh node:$src), + (AMDGPUffbh_i32_impl node:$src)]>; + +def AMDGPUpkrtz_f16_f32 : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_cvt_pkrtz node:$src0, node:$src1), + (AMDGPUpkrtz_f16_f32_impl node:$src0, node:$src1)]>; + +def AMDGPUpknorm_i16_f32 : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_cvt_pknorm_i16 node:$src0, node:$src1), + (AMDGPUpknorm_i16_f32_impl node:$src0, node:$src1)]>; + +def AMDGPUpknorm_u16_f32 : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_cvt_pknorm_u16 node:$src0, node:$src1), + (AMDGPUpknorm_u16_f32_impl node:$src0, node:$src1)]>; + +def AMDGPUpk_i16_i32 : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_cvt_pk_i16 node:$src0, node:$src1), + (AMDGPUpk_i16_i32_impl node:$src0, node:$src1)]>; + +def AMDGPUpk_u16_u32 : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_cvt_pk_u16 node:$src0, node:$src1), + (AMDGPUpk_u16_u32_impl node:$src0, node:$src1)]>; diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 901a2eaa882..3cfa9d57ec4 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -19,8 +19,10 @@ #include "AMDGPUTargetMachine.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -61,8 +63,14 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector( const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } +void AMDGPUInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits &KB, + CodeGenCoverage &CoverageInfo) { + MRI = &MF.getRegInfo(); + InstructionSelector::setupMF(MF, KB, CoverageInfo); +} + static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return Reg == AMDGPU::SCC; auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); @@ -71,7 +79,9 @@ static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) { if (RC) { // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the // context of the register bank has been lost. - if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID) + // Has a hack getRegClassForSizeOnBank uses exactly SGPR_32RegClass, which + // won't ever beconstrained any further. + if (RC != &AMDGPU::SGPR_32RegClass) return false; const LLT Ty = MRI.getType(Reg); return Ty.isValid() && Ty.getSizeInBits() == 1; @@ -83,7 +93,7 @@ static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) { bool AMDGPUInstructionSelector::isVCC(Register Reg, const MachineRegisterInfo &MRI) const { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return Reg == TRI.getVCC(); auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); @@ -102,8 +112,6 @@ bool AMDGPUInstructionSelector::isVCC(Register Reg, bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { const DebugLoc &DL = I.getDebugLoc(); MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); I.setDesc(TII.get(TargetOpcode::COPY)); const MachineOperand &Src = I.getOperand(1); @@ -111,33 +119,33 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { Register DstReg = Dst.getReg(); Register SrcReg = Src.getReg(); - if (isVCC(DstReg, MRI)) { + if (isVCC(DstReg, *MRI)) { if (SrcReg == AMDGPU::SCC) { const TargetRegisterClass *RC - = TRI.getConstrainedRegClassForOperand(Dst, MRI); + = TRI.getConstrainedRegClassForOperand(Dst, *MRI); if (!RC) return true; - return RBI.constrainGenericRegister(DstReg, *RC, MRI); + return RBI.constrainGenericRegister(DstReg, *RC, *MRI); } - if (!isVCC(SrcReg, MRI)) { + if (!isVCC(SrcReg, *MRI)) { // TODO: Should probably leave the copy and let copyPhysReg expand it. - if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI)) + if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI)) return false; BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg) .addImm(0) .addReg(SrcReg); - if (!MRI.getRegClassOrNull(SrcReg)) - MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI)); + if (!MRI->getRegClassOrNull(SrcReg)) + MRI->setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, *MRI)); I.eraseFromParent(); return true; } const TargetRegisterClass *RC = - TRI.getConstrainedRegClassForOperand(Dst, MRI); - if (RC && !RBI.constrainGenericRegister(DstReg, *RC, MRI)) + TRI.getConstrainedRegClassForOperand(Dst, *MRI); + if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI)) return false; // Don't constrain the source register to a class so the def instruction @@ -148,8 +156,8 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { // with size 1. An SReg_32 with size 1 is ambiguous with wave32. if (Src.isUndef()) { const TargetRegisterClass *SrcRC = - TRI.getConstrainedRegClassForOperand(Src, MRI); - if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI)) + TRI.getConstrainedRegClassForOperand(Src, *MRI); + if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI)) return false; } @@ -157,30 +165,26 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { } for (const MachineOperand &MO : I.operands()) { - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + if (Register::isPhysicalRegister(MO.getReg())) continue; const TargetRegisterClass *RC = - TRI.getConstrainedRegClassForOperand(MO, MRI); + TRI.getConstrainedRegClassForOperand(MO, *MRI); if (!RC) continue; - RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); + RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI); } return true; } bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const { - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - const Register DefReg = I.getOperand(0).getReg(); - const LLT DefTy = MRI.getType(DefReg); + const LLT DefTy = MRI->getType(DefReg); // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy) const RegClassOrRegBank &RegClassOrBank = - MRI.getRegClassOrRegBank(DefReg); + MRI->getRegClassOrRegBank(DefReg); const TargetRegisterClass *DefRC = RegClassOrBank.dyn_cast(); @@ -196,7 +200,7 @@ bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const { return false; } - DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI); + DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, *MRI); if (!DefRC) { LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); return false; @@ -204,7 +208,7 @@ bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const { } I.setDesc(TII.get(TargetOpcode::PHI)); - return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); + return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI); } MachineOperand @@ -214,13 +218,11 @@ AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, MachineInstr *MI = MO.getParent(); MachineBasicBlock *BB = MO.getParent()->getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - Register DstReg = MRI.createVirtualRegister(&SubRC); + Register DstReg = MRI->createVirtualRegister(&SubRC); if (MO.isReg()) { unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) .addReg(Reg, 0, ComposedSubIdx); @@ -244,10 +246,6 @@ AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, } } -static int64_t getConstant(const MachineInstr *MI) { - return MI->getOperand(1).getCImm()->getSExtValue(); -} - static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) { switch (Opc) { case AMDGPU::G_AND: @@ -262,16 +260,13 @@ static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) { } bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const { - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); MachineOperand &Dst = I.getOperand(0); MachineOperand &Src0 = I.getOperand(1); MachineOperand &Src1 = I.getOperand(2); Register DstReg = Dst.getReg(); - unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); + unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI); - const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); + const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); if (DstRB->getID() == AMDGPU::VCCRegBankID) { const TargetRegisterClass *RC = TRI.getBoolRC(); unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), @@ -282,12 +277,12 @@ bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const { // The selector for G_ICMP relies on seeing the register bank for the result // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will // be ambiguous whether it's a scalar or vector bool. - if (Src0.isUndef() && !MRI.getRegClassOrNull(Src0.getReg())) - MRI.setRegClass(Src0.getReg(), RC); - if (Src1.isUndef() && !MRI.getRegClassOrNull(Src1.getReg())) - MRI.setRegClass(Src1.getReg(), RC); + if (Src0.isUndef() && !MRI->getRegClassOrNull(Src0.getReg())) + MRI->setRegClass(Src0.getReg(), RC); + if (Src1.isUndef() && !MRI->getRegClassOrNull(Src1.getReg())) + MRI->setRegClass(Src1.getReg(), RC); - return RBI.constrainGenericRegister(DstReg, *RC, MRI); + return RBI.constrainGenericRegister(DstReg, *RC, *MRI); } // TODO: Should this allow an SCC bank result, and produce a copy from SCC for @@ -295,14 +290,7 @@ bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const { if (DstRB->getID() == AMDGPU::SGPRRegBankID) { unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32); I.setDesc(TII.get(InstOpc)); - - const TargetRegisterClass *RC - = TRI.getConstrainedRegClassForOperand(Dst, MRI); - if (!RC) - return false; - return RBI.constrainGenericRegister(DstReg, *RC, MRI) && - RBI.constrainGenericRegister(Src0.getReg(), *RC, MRI) && - RBI.constrainGenericRegister(Src1.getReg(), *RC, MRI); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } return false; @@ -311,11 +299,10 @@ bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const { bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); Register DstReg = I.getOperand(0).getReg(); const DebugLoc &DL = I.getDebugLoc(); - unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); - const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); + unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI); + const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID; const bool Sub = I.getOpcode() == TargetOpcode::G_SUB; @@ -340,7 +327,7 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64; - Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass()); + Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass()); MachineInstr *Add = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg) .addDef(UnusedCarry, RegState::Dead) @@ -363,8 +350,8 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1)); MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1)); - Register DstLo = MRI.createVirtualRegister(&HalfRC); - Register DstHi = MRI.createVirtualRegister(&HalfRC); + Register DstLo = MRI->createVirtualRegister(&HalfRC); + Register DstHi = MRI->createVirtualRegister(&HalfRC); if (IsSALU) { BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) @@ -375,14 +362,14 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { .add(Hi2); } else { const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass(); - Register CarryReg = MRI.createVirtualRegister(CarryRC); + Register CarryReg = MRI->createVirtualRegister(CarryRC); BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo) .addDef(CarryReg) .add(Lo1) .add(Lo2) .addImm(0); MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi) - .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead) + .addDef(MRI->createVirtualRegister(CarryRC), RegState::Dead) .add(Hi1) .add(Hi2) .addReg(CarryReg, RegState::Kill) @@ -399,7 +386,48 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { .addImm(AMDGPU::sub1); - if (!RBI.constrainGenericRegister(DstReg, RC, MRI)) + if (!RBI.constrainGenericRegister(DstReg, RC, *MRI)) + return false; + + I.eraseFromParent(); + return true; +} + +bool AMDGPUInstructionSelector::selectG_UADDO_USUBO(MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const DebugLoc &DL = I.getDebugLoc(); + Register Dst0Reg = I.getOperand(0).getReg(); + Register Dst1Reg = I.getOperand(1).getReg(); + const bool IsAdd = I.getOpcode() == AMDGPU::G_UADDO; + + if (!isSCC(Dst1Reg, MRI)) { + // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned + // carry out despite the _i32 name. These were renamed in VI to _U32. + // FIXME: We should probably rename the opcodes here. + unsigned NewOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; + I.setDesc(TII.get(NewOpc)); + I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); + I.addOperand(*MF, MachineOperand::CreateImm(0)); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + } + + Register Src0Reg = I.getOperand(2).getReg(); + Register Src1Reg = I.getOperand(3).getReg(); + unsigned NewOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; + BuildMI(*BB, &I, DL, TII.get(NewOpc), Dst0Reg) + .add(I.getOperand(2)) + .add(I.getOperand(3)); + BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg) + .addReg(AMDGPU::SCC); + + if (!MRI.getRegClassOrNull(Dst1Reg)) + MRI.setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass); + + if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, MRI) || + !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, MRI) || + !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, MRI)) return false; I.eraseFromParent(); @@ -408,10 +436,11 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - assert(I.getOperand(2).getImm() % 32 == 0); - unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32); + unsigned Offset = I.getOperand(2).getImm(); + if (Offset % 32 != 0) + return false; + + unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32); const DebugLoc &DL = I.getDebugLoc(); MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), I.getOperand(0).getReg()) @@ -419,10 +448,10 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { for (const MachineOperand &MO : Copy->operands()) { const TargetRegisterClass *RC = - TRI.getConstrainedRegClassForOperand(MO, MRI); + TRI.getConstrainedRegClassForOperand(MO, *MRI); if (!RC) continue; - RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); + RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI); } I.eraseFromParent(); return true; @@ -430,21 +459,19 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const { MachineBasicBlock *BB = MI.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + LLT DstTy = MRI->getType(DstReg); + LLT SrcTy = MRI->getType(MI.getOperand(1).getReg()); const unsigned SrcSize = SrcTy.getSizeInBits(); if (SrcSize < 32) return false; const DebugLoc &DL = MI.getDebugLoc(); - const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI); + const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI); const unsigned DstSize = DstTy.getSizeInBits(); const TargetRegisterClass *DstRC = - TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI); + TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI); if (!DstRC) return false; @@ -457,12 +484,12 @@ bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const { MIB.addImm(SubRegs[I]); const TargetRegisterClass *SrcRC - = TRI.getConstrainedRegClassForOperand(Src, MRI); - if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI)) + = TRI.getConstrainedRegClassForOperand(Src, *MRI); + if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI)) return false; } - if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) + if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) return false; MI.eraseFromParent(); @@ -471,25 +498,23 @@ bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const { bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const { MachineBasicBlock *BB = MI.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); const int NumDst = MI.getNumOperands() - 1; MachineOperand &Src = MI.getOperand(NumDst); Register SrcReg = Src.getReg(); Register DstReg0 = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg0); - LLT SrcTy = MRI.getType(SrcReg); + LLT DstTy = MRI->getType(DstReg0); + LLT SrcTy = MRI->getType(SrcReg); const unsigned DstSize = DstTy.getSizeInBits(); const unsigned SrcSize = SrcTy.getSizeInBits(); const DebugLoc &DL = MI.getDebugLoc(); - const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI); + const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI); const TargetRegisterClass *SrcRC = - TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI); - if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI)) + TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, *MRI); + if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI)) return false; const unsigned SrcFlags = getUndefRegState(Src.isUndef()); @@ -504,8 +529,8 @@ bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const { .addReg(SrcReg, SrcFlags, SubRegs[I]); const TargetRegisterClass *DstRC = - TRI.getConstrainedRegClassForOperand(Dst, MRI); - if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI)) + TRI.getConstrainedRegClassForOperand(Dst, *MRI); + if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI)) return false; } @@ -518,16 +543,13 @@ bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { } bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const { - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); const MachineOperand &MO = I.getOperand(0); // FIXME: Interface for getConstrainedRegClassForOperand needs work. The // regbank check here is to know why getConstrainedRegClassForOperand failed. - const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI); - if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) || - (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) { + const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI); + if ((!RC && !MRI->getRegBankOrNull(MO.getReg())) || + (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI))) { I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); return true; } @@ -537,44 +559,62 @@ bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const { bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32); - DebugLoc DL = I.getDebugLoc(); - MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG)) - .addDef(I.getOperand(0).getReg()) - .addReg(I.getOperand(1).getReg()) - .addReg(I.getOperand(2).getReg()) - .addImm(SubReg); - for (const MachineOperand &MO : Ins->operands()) { - if (!MO.isReg()) - continue; - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - continue; + Register DstReg = I.getOperand(0).getReg(); + Register Src0Reg = I.getOperand(1).getReg(); + Register Src1Reg = I.getOperand(2).getReg(); + LLT Src1Ty = MRI->getType(Src1Reg); + + unsigned DstSize = MRI->getType(DstReg).getSizeInBits(); + unsigned InsSize = Src1Ty.getSizeInBits(); + + int64_t Offset = I.getOperand(3).getImm(); + if (Offset % 32 != 0) + return false; + + unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32, InsSize / 32); + if (SubReg == AMDGPU::NoSubRegister) + return false; + + const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI); + const TargetRegisterClass *DstRC = + TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI); + if (!DstRC) + return false; + + const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI); + const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI); + const TargetRegisterClass *Src0RC = + TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank, *MRI); + const TargetRegisterClass *Src1RC = + TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank, *MRI); + + // Deal with weird cases where the class only partially supports the subreg + // index. + Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg); + if (!Src0RC) + return false; + + if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) || + !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) || + !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI)) + return false; + + const DebugLoc &DL = I.getDebugLoc(); + BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg) + .addReg(Src0Reg) + .addReg(Src1Reg) + .addImm(SubReg); - const TargetRegisterClass *RC = - TRI.getConstrainedRegClassForOperand(MO, MRI); - if (!RC) - continue; - RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); - } I.eraseFromParent(); return true; } -bool AMDGPUInstructionSelector::selectG_INTRINSIC( - MachineInstr &I, CodeGenCoverage &CoverageInfo) const { - unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID(); +bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { + unsigned IntrinsicID = I.getIntrinsicID(); switch (IntrinsicID) { - case Intrinsic::maxnum: - case Intrinsic::minnum: - case Intrinsic::amdgcn_cvt_pkrtz: - return selectImpl(I, CoverageInfo); case Intrinsic::amdgcn_if_break: { MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick // SelectionDAG uses for wave32 vs wave64. @@ -589,15 +629,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC( I.eraseFromParent(); - for (Register Reg : { DstReg, Src0Reg, Src1Reg }) { - if (!MRI.getRegClassOrNull(Reg)) - MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); - } + for (Register Reg : { DstReg, Src0Reg, Src1Reg }) + MRI->setRegClass(Reg, TRI.getWaveMaskRegClass()); return true; } default: - return selectImpl(I, CoverageInfo); + return selectImpl(I, *CoverageInfo); } } @@ -677,17 +715,15 @@ int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P, bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); const DebugLoc &DL = I.getDebugLoc(); - unsigned SrcReg = I.getOperand(2).getReg(); - unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI); + Register SrcReg = I.getOperand(2).getReg(); + unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI); auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); - unsigned CCReg = I.getOperand(0).getReg(); - if (isSCC(CCReg, MRI)) { + Register CCReg = I.getOperand(0).getReg(); + if (isSCC(CCReg, *MRI)) { int Opcode = getS_CMPOpcode(Pred, Size); if (Opcode == -1) return false; @@ -698,7 +734,7 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const { .addReg(AMDGPU::SCC); bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) && - RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI); + RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI); I.eraseFromParent(); return Ret; } @@ -712,7 +748,7 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const { .add(I.getOperand(2)) .add(I.getOperand(3)); RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), - *TRI.getBoolRC(), MRI); + *TRI.getBoolRC(), *MRI); bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI); I.eraseFromParent(); return Ret; @@ -736,19 +772,273 @@ buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt, .addImm(Enabled); } -bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( - MachineInstr &I, CodeGenCoverage &CoverageInfo) const { - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); +static bool isZero(Register Reg, MachineRegisterInfo &MRI) { + int64_t C; + if (mi_match(Reg, MRI, m_ICst(C)) && C == 0) + return true; - unsigned IntrinsicID = I.getOperand(0).getIntrinsicID(); + // FIXME: matcher should ignore copies + return mi_match(Reg, MRI, m_Copy(m_ICst(C))) && C == 0; +} + +static unsigned extractGLC(unsigned AuxiliaryData) { + return AuxiliaryData & 1; +} + +static unsigned extractSLC(unsigned AuxiliaryData) { + return (AuxiliaryData >> 1) & 1; +} + +static unsigned extractDLC(unsigned AuxiliaryData) { + return (AuxiliaryData >> 2) & 1; +} + +static unsigned extractSWZ(unsigned AuxiliaryData) { + return (AuxiliaryData >> 3) & 1; +} + +// Returns Base register, constant offset, and offset def point. +static std::tuple +getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) { + MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); + if (!Def) + return std::make_tuple(Reg, 0, nullptr); + + if (Def->getOpcode() == AMDGPU::G_CONSTANT) { + unsigned Offset; + const MachineOperand &Op = Def->getOperand(1); + if (Op.isImm()) + Offset = Op.getImm(); + else + Offset = Op.getCImm()->getZExtValue(); + + return std::make_tuple(Register(), Offset, Def); + } + + int64_t Offset; + if (Def->getOpcode() == AMDGPU::G_ADD) { + // TODO: Handle G_OR used for add case + if (mi_match(Def->getOperand(1).getReg(), MRI, m_ICst(Offset))) + return std::make_tuple(Def->getOperand(0).getReg(), Offset, Def); + + // FIXME: matcher should ignore copies + if (mi_match(Def->getOperand(1).getReg(), MRI, m_Copy(m_ICst(Offset)))) + return std::make_tuple(Def->getOperand(0).getReg(), Offset, Def); + } + + return std::make_tuple(Reg, 0, Def); +} + +static unsigned getBufferStoreOpcode(LLT Ty, + const unsigned MemSize, + const bool Offen) { + const int Size = Ty.getSizeInBits(); + switch (8 * MemSize) { + case 8: + return Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact : + AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact; + case 16: + return Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact : + AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact; + default: + unsigned Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact : + AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact; + if (Size > 32) + Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32); + return Opc; + } +} + +static unsigned getBufferStoreFormatOpcode(LLT Ty, + const unsigned MemSize, + const bool Offen) { + bool IsD16Packed = Ty.getScalarSizeInBits() == 16; + bool IsD16Unpacked = 8 * MemSize < Ty.getSizeInBits(); + int NumElts = Ty.isVector() ? Ty.getNumElements() : 1; + + if (IsD16Packed) { + switch (NumElts) { + case 1: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact; + case 2: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFSET_exact; + case 3: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFSET_exact; + case 4: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFSET_exact; + default: + return -1; + } + } + + if (IsD16Unpacked) { + switch (NumElts) { + case 1: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact; + case 2: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFSET_exact; + case 3: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFSET_exact; + case 4: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFSET_exact; + default: + return -1; + } + } + + switch (NumElts) { + case 1: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_X_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_X_OFFSET_exact; + case 2: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XY_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_XY_OFFSET_exact; + case 3: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFSET_exact; + case 4: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFSET_exact; + default: + return -1; + } + + llvm_unreachable("unhandled buffer store"); +} + +// TODO: Move this to combiner +// Returns base register, imm offset, total constant offset. +std::tuple +AMDGPUInstructionSelector::splitBufferOffsets(MachineIRBuilder &B, + Register OrigOffset) const { + const unsigned MaxImm = 4095; + Register BaseReg; + unsigned TotalConstOffset; + MachineInstr *OffsetDef; + + std::tie(BaseReg, TotalConstOffset, OffsetDef) + = getBaseWithConstantOffset(*MRI, OrigOffset); + + unsigned ImmOffset = TotalConstOffset; + + // If the immediate value is too big for the immoffset field, put the value + // and -4096 into the immoffset field so that the value that is copied/added + // for the voffset field is a multiple of 4096, and it stands more chance + // of being CSEd with the copy/add for another similar load/store.f + // However, do not do that rounding down to a multiple of 4096 if that is a + // negative number, as it appears to be illegal to have a negative offset + // in the vgpr, even if adding the immediate offset makes it positive. + unsigned Overflow = ImmOffset & ~MaxImm; + ImmOffset -= Overflow; + if ((int32_t)Overflow < 0) { + Overflow += ImmOffset; + ImmOffset = 0; + } + + if (Overflow != 0) { + // In case this is in a waterfall loop, insert offset code at the def point + // of the offset, not inside the loop. + MachineBasicBlock::iterator OldInsPt = B.getInsertPt(); + MachineBasicBlock &OldMBB = B.getMBB(); + B.setInstr(*OffsetDef); + + if (!BaseReg) { + BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + B.buildInstr(AMDGPU::V_MOV_B32_e32) + .addDef(BaseReg) + .addImm(Overflow); + } else { + Register OverflowVal = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + B.buildInstr(AMDGPU::V_MOV_B32_e32) + .addDef(OverflowVal) + .addImm(Overflow); + + Register NewBaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + TII.getAddNoCarry(B.getMBB(), B.getInsertPt(), B.getDebugLoc(), NewBaseReg) + .addReg(BaseReg) + .addReg(OverflowVal, RegState::Kill) + .addImm(0); + BaseReg = NewBaseReg; + } + + B.setInsertPt(OldMBB, OldInsPt); + } + + return std::make_tuple(BaseReg, ImmOffset, TotalConstOffset); +} + +bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI, + bool IsFormat) const { + MachineIRBuilder B(MI); + MachineFunction &MF = B.getMF(); + Register VData = MI.getOperand(1).getReg(); + LLT Ty = MRI->getType(VData); + + int Size = Ty.getSizeInBits(); + if (Size % 32 != 0) + return false; + + // FIXME: Verifier should enforce 1 MMO for these intrinsics. + MachineMemOperand *MMO = *MI.memoperands_begin(); + const int MemSize = MMO->getSize(); + + Register RSrc = MI.getOperand(2).getReg(); + Register VOffset = MI.getOperand(3).getReg(); + Register SOffset = MI.getOperand(4).getReg(); + unsigned AuxiliaryData = MI.getOperand(5).getImm(); + unsigned ImmOffset; + unsigned TotalOffset; + + std::tie(VOffset, ImmOffset, TotalOffset) = splitBufferOffsets(B, VOffset); + if (TotalOffset != 0) + MMO = MF.getMachineMemOperand(MMO, TotalOffset, MemSize); + + const bool Offen = !isZero(VOffset, *MRI); + + int Opc = IsFormat ? getBufferStoreFormatOpcode(Ty, MemSize, Offen) : + getBufferStoreOpcode(Ty, MemSize, Offen); + if (Opc == -1) + return false; + + MachineInstrBuilder MIB = B.buildInstr(Opc) + .addUse(VData); + + if (Offen) + MIB.addUse(VOffset); + + MIB.addUse(RSrc) + .addUse(SOffset) + .addImm(ImmOffset) + .addImm(extractGLC(AuxiliaryData)) + .addImm(extractSLC(AuxiliaryData)) + .addImm(0) // tfe: FIXME: Remove from inst + .addImm(extractDLC(AuxiliaryData)) + .addImm(extractSWZ(AuxiliaryData)) + .addMemOperand(MMO); + + MI.eraseFromParent(); + + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); +} + +bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( + MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + unsigned IntrinsicID = I.getIntrinsicID(); switch (IntrinsicID) { case Intrinsic::amdgcn_exp: { - int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); - int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); - int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg())); - int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg())); + int64_t Tgt = I.getOperand(1).getImm(); + int64_t Enabled = I.getOperand(2).getImm(); + int64_t Done = I.getOperand(7).getImm(); + int64_t VM = I.getOperand(8).getImm(); MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(), I.getOperand(4).getReg(), @@ -761,13 +1051,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( } case Intrinsic::amdgcn_exp_compr: { const DebugLoc &DL = I.getDebugLoc(); - int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); - int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); - unsigned Reg0 = I.getOperand(3).getReg(); - unsigned Reg1 = I.getOperand(4).getReg(); - unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg())); - int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg())); + int64_t Tgt = I.getOperand(1).getImm(); + int64_t Enabled = I.getOperand(2).getImm(); + Register Reg0 = I.getOperand(3).getReg(); + Register Reg1 = I.getOperand(4).getReg(); + Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + int64_t Done = I.getOperand(5).getImm(); + int64_t VM = I.getOperand(6).getImm(); BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef); MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM, @@ -786,27 +1076,29 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( Register Reg = I.getOperand(1).getReg(); I.eraseFromParent(); - if (!MRI.getRegClassOrNull(Reg)) - MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); + if (!MRI->getRegClassOrNull(Reg)) + MRI->setRegClass(Reg, TRI.getWaveMaskRegClass()); return true; } + case Intrinsic::amdgcn_raw_buffer_store: + return selectStoreIntrinsic(I, false); + case Intrinsic::amdgcn_raw_buffer_store_format: + return selectStoreIntrinsic(I, true); default: - return selectImpl(I, CoverageInfo); + return selectImpl(I, *CoverageInfo); } } bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); const DebugLoc &DL = I.getDebugLoc(); - unsigned DstReg = I.getOperand(0).getReg(); - unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); + Register DstReg = I.getOperand(0).getReg(); + unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI); assert(Size <= 32 || Size == 64); const MachineOperand &CCOp = I.getOperand(1); - unsigned CCReg = CCOp.getReg(); - if (isSCC(CCReg, MRI)) { + Register CCReg = CCOp.getReg(); + if (isSCC(CCReg, *MRI)) { unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) @@ -815,8 +1107,8 @@ bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { // The generic constrainSelectedInstRegOperands doesn't work for the scc register // bank, because it does not cover the register class that we used to represent // for it. So we need to manually set the register class here. - if (!MRI.getRegClassOrNull(CCReg)) - MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI)); + if (!MRI->getRegClassOrNull(CCReg)) + MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI)); MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg) .add(I.getOperand(2)) .add(I.getOperand(3)); @@ -845,52 +1137,8 @@ bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { } bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - DebugLoc DL = I.getDebugLoc(); - unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI); - if (PtrSize != 64) { - LLVM_DEBUG(dbgs() << "Unhandled address space\n"); - return false; - } - - unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); - unsigned Opcode; - - // FIXME: Remove this when integers > s32 naturally selected. - switch (StoreSize) { - default: - return false; - case 32: - Opcode = AMDGPU::FLAT_STORE_DWORD; - break; - case 64: - Opcode = AMDGPU::FLAT_STORE_DWORDX2; - break; - case 96: - Opcode = AMDGPU::FLAT_STORE_DWORDX3; - break; - case 128: - Opcode = AMDGPU::FLAT_STORE_DWORDX4; - break; - } - - MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) - .add(I.getOperand(1)) - .add(I.getOperand(0)) - .addImm(0) // offset - .addImm(0) // glc - .addImm(0) // slc - .addImm(0); // dlc - - - // Now that we selected an opcode, we need to constrain the register - // operands to use appropriate classes. - bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); - - I.eraseFromParent(); - return Ret; + initM0(I); + return selectImpl(I, *CoverageInfo); } static int sizeToSubRegIndex(unsigned Size) { @@ -915,19 +1163,15 @@ static int sizeToSubRegIndex(unsigned Size) { } bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - - unsigned DstReg = I.getOperand(0).getReg(); - unsigned SrcReg = I.getOperand(1).getReg(); - const LLT DstTy = MRI.getType(DstReg); - const LLT SrcTy = MRI.getType(SrcReg); + Register DstReg = I.getOperand(0).getReg(); + Register SrcReg = I.getOperand(1).getReg(); + const LLT DstTy = MRI->getType(DstReg); + const LLT SrcTy = MRI->getType(SrcReg); if (!DstTy.isScalar()) return false; - const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); - const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI); + const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); + const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI); if (SrcRB != DstRB) return false; @@ -935,9 +1179,9 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { unsigned SrcSize = SrcTy.getSizeInBits(); const TargetRegisterClass *SrcRC - = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI); + = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, *MRI); const TargetRegisterClass *DstRC - = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI); + = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, *MRI); if (SrcSize > 32) { int SubRegIdx = sizeToSubRegIndex(DstSize); @@ -953,8 +1197,8 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { I.getOperand(1).setSubReg(SubRegIdx); } - if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || - !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { + if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) || + !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) { LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); return false; } @@ -974,20 +1218,18 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { bool Signed = I.getOpcode() == AMDGPU::G_SEXT; const DebugLoc &DL = I.getDebugLoc(); MachineBasicBlock &MBB = *I.getParent(); - MachineFunction &MF = *MBB.getParent(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - const unsigned DstReg = I.getOperand(0).getReg(); - const unsigned SrcReg = I.getOperand(1).getReg(); + const Register DstReg = I.getOperand(0).getReg(); + const Register SrcReg = I.getOperand(1).getReg(); - const LLT DstTy = MRI.getType(DstReg); - const LLT SrcTy = MRI.getType(SrcReg); + const LLT DstTy = MRI->getType(DstReg); + const LLT SrcTy = MRI->getType(SrcReg); const LLT S1 = LLT::scalar(1); const unsigned SrcSize = SrcTy.getSizeInBits(); const unsigned DstSize = DstTy.getSizeInBits(); if (!DstTy.isScalar()) return false; - const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI); + const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI); if (SrcBank->getID() == AMDGPU::SCCRegBankID) { if (SrcTy != S1 || DstSize > 64) // Invalid @@ -1000,7 +1242,7 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { // FIXME: Create an extra copy to avoid incorrectly constraining the result // of the scc producer. - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + Register TmpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg) .addReg(SrcReg); BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) @@ -1010,7 +1252,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { BuildMI(MBB, I, DL, TII.get(Opcode), DstReg) .addImm(0) .addImm(Signed ? -1 : 1); - return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); + I.eraseFromParent(); + return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI); } if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) { @@ -1024,6 +1267,7 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { .addImm(0) // src1_modifiers .addImm(Signed ? -1 : 1) // src1 .addUse(SrcReg); + I.eraseFromParent(); return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); } @@ -1040,6 +1284,7 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg) .addImm(Mask) .addReg(SrcReg); + I.eraseFromParent(); return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); } @@ -1049,11 +1294,12 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { .addReg(SrcReg) .addImm(0) // Offset .addImm(SrcSize); // Width + I.eraseFromParent(); return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); } if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) { - if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI)) + if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI)) return false; if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) { @@ -1061,7 +1307,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16; BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg) .addReg(SrcReg); - return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); + I.eraseFromParent(); + return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI); } const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64; @@ -1070,10 +1317,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width. if (DstSize > 32 && SrcSize <= 32) { // We need a 64-bit register source, but the high bits don't matter. - unsigned ExtReg - = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - unsigned UndefReg - = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); + Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg); BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg) .addReg(SrcReg) @@ -1085,7 +1330,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { .addReg(ExtReg) .addImm(SrcSize << 16); - return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI); + I.eraseFromParent(); + return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI); } unsigned Mask; @@ -1099,16 +1345,58 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { .addImm(SrcSize << 16); } - return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI); + I.eraseFromParent(); + return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI); } return false; } +static int64_t getFPTrueImmVal(unsigned Size, bool Signed) { + switch (Size) { + case 16: + return Signed ? 0xBC00 : 0x3C00; + case 32: + return Signed ? 0xbf800000 : 0x3f800000; + case 64: + return Signed ? 0xbff0000000000000 : 0x3ff0000000000000; + default: + llvm_unreachable("Invalid FP type size"); + } +} + +bool AMDGPUInstructionSelector::selectG_SITOFP_UITOFP(MachineInstr &I) const { + MachineBasicBlock *MBB = I.getParent(); + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + Register Src = I.getOperand(1).getReg(); + if (!isSCC(Src, MRI)) + return selectImpl(I, *CoverageInfo); + + bool Signed = I.getOpcode() == AMDGPU::G_SITOFP; + Register DstReg = I.getOperand(0).getReg(); + const LLT DstTy = MRI.getType(DstReg); + const unsigned DstSize = DstTy.getSizeInBits(); + const DebugLoc &DL = I.getDebugLoc(); + + BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) + .addReg(Src); + + unsigned NewOpc = + DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; + auto MIB = BuildMI(*MBB, I, DL, TII.get(NewOpc), DstReg) + .addImm(0) + .addImm(getFPTrueImmVal(DstSize, Signed)); + + if (!constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI)) + return false; + + I.eraseFromParent(); + return true; +} + bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); MachineOperand &ImmOp = I.getOperand(1); // The AMDGPU backend only supports Imm operands and not CImm or FPImm. @@ -1119,15 +1407,15 @@ bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue()); } - unsigned DstReg = I.getOperand(0).getReg(); + Register DstReg = I.getOperand(0).getReg(); unsigned Size; bool IsSgpr; - const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg()); + const RegisterBank *RB = MRI->getRegBankOrNull(I.getOperand(0).getReg()); if (RB) { IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID; - Size = MRI.getType(DstReg).getSizeInBits(); + Size = MRI->getType(DstReg).getSizeInBits(); } else { - const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg); + const TargetRegisterClass *RC = TRI.getRegClassForReg(*MRI, DstReg); IsSgpr = TRI.isSGPRClass(RC); Size = TRI.getRegSizeInBits(*RC); } @@ -1142,34 +1430,41 @@ bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } - DebugLoc DL = I.getDebugLoc(); - const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass : - &AMDGPU::VGPR_32RegClass; - unsigned LoReg = MRI.createVirtualRegister(RC); - unsigned HiReg = MRI.createVirtualRegister(RC); - const APInt &Imm = APInt(Size, I.getOperand(1).getImm()); + const DebugLoc &DL = I.getDebugLoc(); - BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) - .addImm(Imm.trunc(32).getZExtValue()); + APInt Imm(Size, I.getOperand(1).getImm()); - BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) - .addImm(Imm.ashr(32).getZExtValue()); + MachineInstr *ResInst; + if (IsSgpr && TII.isInlineConstant(Imm)) { + ResInst = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg) + .addImm(I.getOperand(1).getImm()); + } else { + const TargetRegisterClass *RC = IsSgpr ? + &AMDGPU::SReg_32RegClass : &AMDGPU::VGPR_32RegClass; + Register LoReg = MRI->createVirtualRegister(RC); + Register HiReg = MRI->createVirtualRegister(RC); - const MachineInstr *RS = - BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) - .addReg(LoReg) - .addImm(AMDGPU::sub0) - .addReg(HiReg) - .addImm(AMDGPU::sub1); + BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) + .addImm(Imm.trunc(32).getZExtValue()); + + BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) + .addImm(Imm.ashr(32).getZExtValue()); + + ResInst = BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) + .addReg(LoReg) + .addImm(AMDGPU::sub0) + .addReg(HiReg) + .addImm(AMDGPU::sub1); + } // We can't call constrainSelectedInstRegOperands here, because it doesn't // work for target independent opcodes I.eraseFromParent(); const TargetRegisterClass *DstRC = - TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI); + TRI.getConstrainedRegClassForOperand(ResInst->getOperand(0), *MRI); if (!DstRC) return true; - return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); + return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI); } static bool isConstant(const MachineInstr &MI) { @@ -1188,13 +1483,13 @@ void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, GEPInfo GEPInfo(*PtrMI); - for (unsigned i = 1, e = 3; i < e; ++i) { + for (unsigned i = 1; i != 3; ++i) { const MachineOperand &GEPOp = PtrMI->getOperand(i); const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); assert(OpDef); - if (isConstant(*OpDef)) { - // FIXME: Is it possible to have multiple Imm parts? Maybe if we - // are lacking other optimizations. + if (i == 2 && isConstant(*OpDef)) { + // TODO: Could handle constant base + variable offset, but a combine + // probably should have commuted it. assert(GEPInfo.Imm == 0); GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue(); continue; @@ -1240,16 +1535,26 @@ bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef AddrInfo) const { return false; } -bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { - // TODO: Can/should we insert m0 initialization here for DS instructions and - // call the normal selector? - return false; +void AMDGPUInstructionSelector::initM0(MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + + const LLT PtrTy = MRI->getType(I.getOperand(1).getReg()); + unsigned AS = PtrTy.getAddressSpace(); + if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) && + STI.ldsRequiresM0Init()) { + // If DS instructions require M0 initializtion, insert it before selecting. + BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addImm(-1); + } +} + +bool AMDGPUInstructionSelector::selectG_LOAD_ATOMICRMW(MachineInstr &I) const { + initM0(I); + return selectImpl(I, *CoverageInfo); } bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); MachineOperand &CondOp = I.getOperand(0); Register CondReg = CondOp.getReg(); const DebugLoc &DL = I.getDebugLoc(); @@ -1263,11 +1568,12 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { // GlobalISel, we should push that decision into RegBankSelect. Assume for now // RegBankSelect knows what it's doing if the branch condition is scc, even // though it currently does not. - if (isSCC(CondReg, MRI)) { + if (isSCC(CondReg, *MRI)) { CondPhysReg = AMDGPU::SCC; BrOpcode = AMDGPU::S_CBRANCH_SCC1; - ConstrainRC = &AMDGPU::SReg_32_XM0RegClass; - } else if (isVCC(CondReg, MRI)) { + // FIXME: Hack for isSCC tests + ConstrainRC = &AMDGPU::SGPR_32RegClass; + } else if (isVCC(CondReg, *MRI)) { // FIXME: Do we have to insert an and with exec here, like in SelectionDAG? // We sort of know that a VCC producer based on the register bank, that ands // inactive lanes with 0. What if there was a logical operation with vcc @@ -1279,8 +1585,8 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { } else return false; - if (!MRI.getRegClassOrNull(CondReg)) - MRI.setRegClass(CondReg, ConstrainRC); + if (!MRI->getRegClassOrNull(CondReg)) + MRI->setRegClass(CondReg, ConstrainRC); BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg) .addReg(CondReg); @@ -1292,27 +1598,83 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { } bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const { - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - Register DstReg = I.getOperand(0).getReg(); - const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI); + const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID; I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32)); if (IsVGPR) I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); return RBI.constrainGenericRegister( - DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI); + DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI); } -bool AMDGPUInstructionSelector::select(MachineInstr &I, - CodeGenCoverage &CoverageInfo) const { +bool AMDGPUInstructionSelector::selectG_PTR_MASK(MachineInstr &I) const { + uint64_t Align = I.getOperand(2).getImm(); + const uint64_t Mask = ~((UINT64_C(1) << Align) - 1); + + MachineBasicBlock *BB = I.getParent(); + + Register DstReg = I.getOperand(0).getReg(); + Register SrcReg = I.getOperand(1).getReg(); + + const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); + const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI); + const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID; + unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32; + unsigned MovOpc = IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32; + const TargetRegisterClass &RegRC + = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass; + + LLT Ty = MRI->getType(DstReg); + + const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB, + *MRI); + const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB, + *MRI); + if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) || + !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI)) + return false; + + const DebugLoc &DL = I.getDebugLoc(); + Register ImmReg = MRI->createVirtualRegister(&RegRC); + BuildMI(*BB, &I, DL, TII.get(MovOpc), ImmReg) + .addImm(Mask); + + if (Ty.getSizeInBits() == 32) { + BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg) + .addReg(SrcReg) + .addReg(ImmReg); + I.eraseFromParent(); + return true; + } + + Register HiReg = MRI->createVirtualRegister(&RegRC); + Register LoReg = MRI->createVirtualRegister(&RegRC); + Register MaskLo = MRI->createVirtualRegister(&RegRC); + + BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), LoReg) + .addReg(SrcReg, 0, AMDGPU::sub0); + BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), HiReg) + .addReg(SrcReg, 0, AMDGPU::sub1); + + BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskLo) + .addReg(LoReg) + .addReg(ImmReg); + BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) + .addReg(MaskLo) + .addImm(AMDGPU::sub0) + .addReg(HiReg) + .addImm(AMDGPU::sub1); + I.eraseFromParent(); + return true; +} + +bool AMDGPUInstructionSelector::select(MachineInstr &I) { if (I.isPHI()) return selectPHI(I); - if (!isPreISelGenericOpcode(I.getOpcode())) { + if (!I.isPreISelOpcode()) { if (I.isCopy()) return selectCOPY(I); return true; @@ -1324,16 +1686,18 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I, case TargetOpcode::G_XOR: if (selectG_AND_OR_XOR(I)) return true; - return selectImpl(I, CoverageInfo); + return selectImpl(I, *CoverageInfo); case TargetOpcode::G_ADD: case TargetOpcode::G_SUB: - if (selectG_ADD_SUB(I)) + if (selectImpl(I, *CoverageInfo)) return true; - LLVM_FALLTHROUGH; - default: - return selectImpl(I, CoverageInfo); + return selectG_ADD_SUB(I); + case TargetOpcode::G_UADDO: + case TargetOpcode::G_USUBO: + return selectG_UADDO_USUBO(I); case TargetOpcode::G_INTTOPTR: case TargetOpcode::G_BITCAST: + case TargetOpcode::G_PTRTOINT: return selectCOPY(I); case TargetOpcode::G_CONSTANT: case TargetOpcode::G_FCONSTANT: @@ -1353,32 +1717,40 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I, case TargetOpcode::G_INSERT: return selectG_INSERT(I); case TargetOpcode::G_INTRINSIC: - return selectG_INTRINSIC(I, CoverageInfo); + return selectG_INTRINSIC(I); case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: - return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo); + return selectG_INTRINSIC_W_SIDE_EFFECTS(I); case TargetOpcode::G_ICMP: if (selectG_ICMP(I)) return true; - return selectImpl(I, CoverageInfo); + return selectImpl(I, *CoverageInfo); case TargetOpcode::G_LOAD: - return selectImpl(I, CoverageInfo); + case TargetOpcode::G_ATOMIC_CMPXCHG: + case TargetOpcode::G_ATOMICRMW_XCHG: + case TargetOpcode::G_ATOMICRMW_ADD: + case TargetOpcode::G_ATOMICRMW_SUB: + case TargetOpcode::G_ATOMICRMW_AND: + case TargetOpcode::G_ATOMICRMW_OR: + case TargetOpcode::G_ATOMICRMW_XOR: + case TargetOpcode::G_ATOMICRMW_MIN: + case TargetOpcode::G_ATOMICRMW_MAX: + case TargetOpcode::G_ATOMICRMW_UMIN: + case TargetOpcode::G_ATOMICRMW_UMAX: + case TargetOpcode::G_ATOMICRMW_FADD: + return selectG_LOAD_ATOMICRMW(I); case TargetOpcode::G_SELECT: return selectG_SELECT(I); case TargetOpcode::G_STORE: - if (selectImpl(I, CoverageInfo)) - return true; return selectG_STORE(I); case TargetOpcode::G_TRUNC: return selectG_TRUNC(I); case TargetOpcode::G_SEXT: case TargetOpcode::G_ZEXT: case TargetOpcode::G_ANYEXT: - if (selectG_SZA_EXT(I)) { - I.eraseFromParent(); - return true; - } - - return false; + return selectG_SZA_EXT(I); + case TargetOpcode::G_SITOFP: + case TargetOpcode::G_UITOFP: + return selectG_SITOFP_UITOFP(I); case TargetOpcode::G_BRCOND: return selectG_BRCOND(I); case TargetOpcode::G_FRAME_INDEX: @@ -1388,6 +1760,10 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I, // is checking for G_CONSTANT I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE)); return true; + case TargetOpcode::G_PTR_MASK: + return selectG_PTR_MASK(I); + default: + return selectImpl(I, *CoverageInfo); } return false; } @@ -1402,14 +1778,14 @@ AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const { std::pair AMDGPUInstructionSelector::selectVOP3ModsImpl( - Register Src, const MachineRegisterInfo &MRI) const { + Register Src) const { unsigned Mods = 0; - MachineInstr *MI = MRI.getVRegDef(Src); + MachineInstr *MI = MRI->getVRegDef(Src); if (MI && MI->getOpcode() == AMDGPU::G_FNEG) { Src = MI->getOperand(1).getReg(); Mods |= SISrcMods::NEG; - MI = MRI.getVRegDef(Src); + MI = MRI->getVRegDef(Src); } if (MI && MI->getOpcode() == AMDGPU::G_FABS) { @@ -1432,12 +1808,9 @@ AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const { InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { - MachineRegisterInfo &MRI - = Root.getParent()->getParent()->getParent()->getRegInfo(); - Register Src; unsigned Mods; - std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); + std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg()); return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, @@ -1446,6 +1819,21 @@ AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod }}; } + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVOP3Mods0Clamp0OMod(MachineOperand &Root) const { + Register Src; + unsigned Mods; + std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg()); + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod + }}; +} + InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const { return {{ @@ -1457,12 +1845,9 @@ AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const { InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { - MachineRegisterInfo &MRI - = Root.getParent()->getParent()->getParent()->getRegInfo(); - Register Src; unsigned Mods; - std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); + std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg()); return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, @@ -1471,12 +1856,28 @@ AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { } InstructionSelector::ComplexRendererFns -AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { - MachineRegisterInfo &MRI = - Root.getParent()->getParent()->getParent()->getRegInfo(); +AMDGPUInstructionSelector::selectVOP3OpSelMods0(MachineOperand &Root) const { + // FIXME: Handle clamp and op_sel + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src_mods + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // clamp + }}; +} +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const { + // FIXME: Handle op_sel + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods + }}; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { SmallVector AddrInfo; - getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); + getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo); if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) return None; @@ -1496,11 +1897,8 @@ AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const { - MachineRegisterInfo &MRI = - Root.getParent()->getParent()->getParent()->getRegInfo(); - SmallVector AddrInfo; - getAddrModeInfo(*Root.getParent(), MRI, AddrInfo); + getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo); if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) return None; @@ -1521,10 +1919,9 @@ InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { MachineInstr *MI = Root.getParent(); MachineBasicBlock *MBB = MI->getParent(); - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); SmallVector AddrInfo; - getAddrModeInfo(*MI, MRI, AddrInfo); + getAddrModeInfo(*MI, *MRI, AddrInfo); // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits, // then we can select all ptr + 32-bit offsets not just immediate offsets. @@ -1540,7 +1937,7 @@ AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { // failed trying to select this load into one of the _IMM variants since // the _IMM Patterns are considered before the _SGPR patterns. unsigned PtrReg = GEPInfo.SgprParts[0]; - unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg) .addImm(GEPInfo.Imm); return {{ @@ -1553,8 +1950,6 @@ template InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const { MachineInstr *MI = Root.getParent(); - MachineBasicBlock *MBB = MI->getParent(); - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); InstructionSelector::ComplexRendererFns Default = {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, @@ -1565,12 +1960,12 @@ AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const { if (!STI.hasFlatInstOffsets()) return Default; - const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg()); + const MachineInstr *OpDef = MRI->getVRegDef(Root.getReg()); if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP) return Default; Optional Offset = - getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI); + getConstantVRegVal(OpDef->getOperand(2).getReg(), *MRI); if (!Offset.hasValue()) return Default; @@ -1597,12 +1992,6 @@ AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const { return selectFlatOffsetImpl(Root); } -// FIXME: Implement -static bool signBitIsZero(const MachineOperand &Op, - const MachineRegisterInfo &MRI) { - return false; -} - static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { auto PSV = PtrInfo.V.dyn_cast(); return PSV && PSV->isStack(); @@ -1613,12 +2002,11 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { MachineInstr *MI = Root.getParent(); MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MBB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); const SIMachineFunctionInfo *Info = MF->getInfo(); int64_t Offset = 0; - if (mi_match(Root.getReg(), MRI, m_ICst(Offset))) { - Register HighBits = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + if (mi_match(Root.getReg(), *MRI, m_ICst(Offset))) { + Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); // TODO: Should this be inside the render function? The iterator seems to // move. @@ -1652,18 +2040,18 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { // offsets. Optional FI; Register VAddr = Root.getReg(); - if (const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg())) { - if (isBaseWithConstantOffset(Root, MRI)) { + if (const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg())) { + if (isBaseWithConstantOffset(Root, *MRI)) { const MachineOperand &LHS = RootDef->getOperand(1); const MachineOperand &RHS = RootDef->getOperand(2); - const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); - const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); + const MachineInstr *LHSDef = MRI->getVRegDef(LHS.getReg()); + const MachineInstr *RHSDef = MRI->getVRegDef(RHS.getReg()); if (LHSDef && RHSDef) { int64_t PossibleOffset = RHSDef->getOperand(1).getCImm()->getSExtValue(); if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) && (!STI.privateMemoryResourceIsRangeChecked() || - signBitIsZero(LHS, MRI))) { + KnownBits->signBitIsZero(LHS.getReg()))) { if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX) FI = LHSDef->getOperand(1).getIndex(); else @@ -1700,15 +2088,30 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { }}}; } +bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI, + const MachineOperand &Base, + int64_t Offset, + unsigned OffsetBits) const { + if ((OffsetBits == 16 && !isUInt<16>(Offset)) || + (OffsetBits == 8 && !isUInt<8>(Offset))) + return false; + + if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled()) + return true; + + // On Southern Islands instruction with a negative base value and an offset + // don't seem to work. + return KnownBits->signBitIsZero(Base.getReg()); +} + InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectMUBUFScratchOffset( MachineOperand &Root) const { MachineInstr *MI = Root.getParent(); MachineBasicBlock *MBB = MI->getParent(); - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); int64_t Offset = 0; - if (!mi_match(Root.getReg(), MRI, m_ICst(Offset)) || + if (!mi_match(Root.getReg(), *MRI, m_ICst(Offset)) || !SIInstrInfo::isLegalMUBUFImmOffset(Offset)) return {}; @@ -1728,3 +2131,54 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset( [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset }}; } + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const { + const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg()); + if (!RootDef) { + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } + }}; + } + + int64_t ConstAddr = 0; + if (isBaseWithConstantOffset(Root, *MRI)) { + const MachineOperand &LHS = RootDef->getOperand(1); + const MachineOperand &RHS = RootDef->getOperand(2); + const MachineInstr *LHSDef = MRI->getVRegDef(LHS.getReg()); + const MachineInstr *RHSDef = MRI->getVRegDef(RHS.getReg()); + if (LHSDef && RHSDef) { + int64_t PossibleOffset = + RHSDef->getOperand(1).getCImm()->getSExtValue(); + if (isDSOffsetLegal(*MRI, LHS, PossibleOffset, 16)) { + // (add n0, c0) + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(PossibleOffset); } + }}; + } + } + } else if (RootDef->getOpcode() == AMDGPU::G_SUB) { + + + + } else if (mi_match(Root.getReg(), *MRI, m_ICst(ConstAddr))) { + + + } + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } + }}; +} + +void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB, + const MachineInstr &MI) const { + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + Optional CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI); + assert(CstVal && "Expected constant value"); + MIB.addImm(CstVal.getValue()); +} diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 4f489ddfb23..d3c83a6a872 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -35,6 +35,7 @@ class AMDGPUInstrInfo; class AMDGPURegisterBankInfo; class GCNSubtarget; class MachineInstr; +class MachineIRBuilder; class MachineOperand; class MachineRegisterInfo; class SIInstrInfo; @@ -42,14 +43,20 @@ class SIMachineFunctionInfo; class SIRegisterInfo; class AMDGPUInstructionSelector : public InstructionSelector { +private: + MachineRegisterInfo *MRI; + public: AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM); - bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override; + bool select(MachineInstr &I) override; static const char *getName(); + void setupMF(MachineFunction &MF, GISelKnownBits &KB, + CodeGenCoverage &CoverageInfo) override; + private: struct GEPInfo { const MachineInstr &GEP; @@ -72,32 +79,42 @@ private: bool selectPHI(MachineInstr &I) const; bool selectG_TRUNC(MachineInstr &I) const; bool selectG_SZA_EXT(MachineInstr &I) const; + bool selectG_SITOFP_UITOFP(MachineInstr &I) const; bool selectG_CONSTANT(MachineInstr &I) const; bool selectG_AND_OR_XOR(MachineInstr &I) const; bool selectG_ADD_SUB(MachineInstr &I) const; + bool selectG_UADDO_USUBO(MachineInstr &I) const; bool selectG_EXTRACT(MachineInstr &I) const; bool selectG_MERGE_VALUES(MachineInstr &I) const; bool selectG_UNMERGE_VALUES(MachineInstr &I) const; bool selectG_GEP(MachineInstr &I) const; bool selectG_IMPLICIT_DEF(MachineInstr &I) const; bool selectG_INSERT(MachineInstr &I) const; - bool selectG_INTRINSIC(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; - bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I, - CodeGenCoverage &CoverageInfo) const; + bool selectG_INTRINSIC(MachineInstr &I) const; + + std::tuple + splitBufferOffsets(MachineIRBuilder &B, Register OrigOffset) const; + + bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const; + + bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const; int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const; bool selectG_ICMP(MachineInstr &I) const; bool hasVgprParts(ArrayRef AddrInfo) const; void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI, SmallVectorImpl &AddrInfo) const; bool selectSMRD(MachineInstr &I, ArrayRef AddrInfo) const; - bool selectG_LOAD(MachineInstr &I) const; - bool selectG_SELECT(MachineInstr &I) const; + + void initM0(MachineInstr &I) const; + bool selectG_LOAD_ATOMICRMW(MachineInstr &I) const; bool selectG_STORE(MachineInstr &I) const; + bool selectG_SELECT(MachineInstr &I) const; bool selectG_BRCOND(MachineInstr &I) const; bool selectG_FRAME_INDEX(MachineInstr &I) const; + bool selectG_PTR_MASK(MachineInstr &I) const; std::pair - selectVOP3ModsImpl(Register Src, const MachineRegisterInfo &MRI) const; + selectVOP3ModsImpl(Register Src) const; InstructionSelector::ComplexRendererFns selectVCSRC(MachineOperand &Root) const; @@ -108,10 +125,17 @@ private: InstructionSelector::ComplexRendererFns selectVOP3Mods0(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns + selectVOP3Mods0Clamp0OMod(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns selectVOP3OMods(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns selectVOP3Mods(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectVOP3OpSelMods0(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectVOP3OpSelMods(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns selectSmrdImm(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns @@ -133,6 +157,16 @@ private: InstructionSelector::ComplexRendererFns selectMUBUFScratchOffset(MachineOperand &Root) const; + bool isDSOffsetLegal(const MachineRegisterInfo &MRI, + const MachineOperand &Base, + int64_t Offset, unsigned OffsetBits) const; + + InstructionSelector::ComplexRendererFns + selectDS1Addr1Offset(MachineOperand &Root) const; + + void renderTruncImm32(MachineInstrBuilder &MIB, + const MachineInstr &MI) const; + const SIInstrInfo &TII; const SIRegisterInfo &TRI; const AMDGPURegisterBankInfo &RBI; diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td index 61bc415c839..846e7f577a2 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -75,7 +75,7 @@ class ILFormat pattern> let isCodeGenOnly = 1; } -def TruePredicate : Predicate<"true">; +def TruePredicate : Predicate<"">; class PredicateControl { Predicate SubtargetPredicate = TruePredicate; @@ -220,80 +220,48 @@ def hi_f16_elt : PatLeaf< // PatLeafs for floating-point comparisons //===----------------------------------------------------------------------===// -def COND_OEQ : PatLeaf < - (cond), - [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}] ->; - -def COND_ONE : PatLeaf < - (cond), - [{return N->get() == ISD::SETONE || N->get() == ISD::SETNE;}] ->; - -def COND_OGT : PatLeaf < - (cond), - [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}] ->; - -def COND_OGE : PatLeaf < - (cond), - [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}] ->; - -def COND_OLT : PatLeaf < - (cond), - [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}] ->; - -def COND_OLE : PatLeaf < - (cond), - [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}] ->; - -def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>; -def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>; +def COND_OEQ : PatFrags<(ops), [(OtherVT SETOEQ), (OtherVT SETEQ)]>; +def COND_ONE : PatFrags<(ops), [(OtherVT SETONE), (OtherVT SETNE)]>; +def COND_OGT : PatFrags<(ops), [(OtherVT SETOGT), (OtherVT SETGT)]>; +def COND_OGE : PatFrags<(ops), [(OtherVT SETOGE), (OtherVT SETGE)]>; +def COND_OLT : PatFrags<(ops), [(OtherVT SETOLT), (OtherVT SETLT)]>; +def COND_OLE : PatFrags<(ops), [(OtherVT SETOLE), (OtherVT SETLE)]>; +def COND_O : PatFrags<(ops), [(OtherVT SETO)]>; +def COND_UO : PatFrags<(ops), [(OtherVT SETUO)]>; //===----------------------------------------------------------------------===// // PatLeafs for unsigned / unordered comparisons //===----------------------------------------------------------------------===// -def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>; -def COND_UNE : PatLeaf <(cond), [{return N->get() == ISD::SETUNE;}]>; -def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>; -def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>; -def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>; -def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>; +def COND_UEQ : PatFrag<(ops), (OtherVT SETUEQ)>; +def COND_UNE : PatFrag<(ops), (OtherVT SETUNE)>; +def COND_UGT : PatFrag<(ops), (OtherVT SETUGT)>; +def COND_UGE : PatFrag<(ops), (OtherVT SETUGE)>; +def COND_ULT : PatFrag<(ops), (OtherVT SETULT)>; +def COND_ULE : PatFrag<(ops), (OtherVT SETULE)>; // XXX - For some reason R600 version is preferring to use unordered // for setne? -def COND_UNE_NE : PatLeaf < - (cond), - [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}] ->; +def COND_UNE_NE : PatFrags<(ops), [(OtherVT SETUNE), (OtherVT SETNE)]>; //===----------------------------------------------------------------------===// // PatLeafs for signed comparisons //===----------------------------------------------------------------------===// -def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>; -def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>; -def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>; -def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>; +def COND_SGT : PatFrag<(ops), (OtherVT SETGT)>; +def COND_SGE : PatFrag<(ops), (OtherVT SETGE)>; +def COND_SLT : PatFrag<(ops), (OtherVT SETLT)>; +def COND_SLE : PatFrag<(ops), (OtherVT SETLE)>; //===----------------------------------------------------------------------===// // PatLeafs for integer equality //===----------------------------------------------------------------------===// -def COND_EQ : PatLeaf < - (cond), - [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}] ->; - -def COND_NE : PatLeaf < - (cond), - [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}] ->; +def COND_EQ : PatFrags<(ops), [(OtherVT SETEQ), (OtherVT SETUEQ)]>; +def COND_NE : PatFrags<(ops), [(OtherVT SETNE), (OtherVT SETUNE)]>; +// FIXME: Should not need code predicate +//def COND_NULL : PatLeaf<(OtherVT null_frag)>; def COND_NULL : PatLeaf < (cond), [{(void)N; return false;}] @@ -335,17 +303,17 @@ def TEX_SHADOW_ARRAY : PatLeaf< // Load/Store Pattern Fragments //===----------------------------------------------------------------------===// +def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] +>; + class AddressSpaceList AS> { list AddrSpaces = AS; } -class Aligned8Bytes : PatFrag (N)->getAlignment() % 8 == 0; -}]>; - -class Aligned16Bytes : PatFrag (N)->getAlignment() >= 16; -}]>; +class Aligned { + int MinAlignment = Bytes; +} class LoadFrag : PatFrag<(ops node:$ptr), (op node:$ptr)>; @@ -502,6 +470,35 @@ defm atomic_store_#as : binary_atomic_op; } // End foreach AddrSpace +multiclass ret_noret_binary_atomic_op { + foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { + let AddressSpaces = !cast("LoadAddress_"#as).AddrSpaces in { + defm "_"#as : binary_atomic_op; + + let PredicateCode = [{return (SDValue(N, 0).use_empty());}] in { + defm "_"#as#"_noret" : binary_atomic_op; + } + + let PredicateCode = [{return !(SDValue(N, 0).use_empty());}] in { + defm "_"#as#"_ret" : binary_atomic_op; + } + } + } +} + +defm atomic_swap : ret_noret_binary_atomic_op; +defm atomic_load_add : ret_noret_binary_atomic_op; +defm atomic_load_and : ret_noret_binary_atomic_op; +defm atomic_load_max : ret_noret_binary_atomic_op; +defm atomic_load_min : ret_noret_binary_atomic_op; +defm atomic_load_or : ret_noret_binary_atomic_op; +defm atomic_load_sub : ret_noret_binary_atomic_op; +defm atomic_load_umax : ret_noret_binary_atomic_op; +defm atomic_load_umin : ret_noret_binary_atomic_op; +defm atomic_load_xor : ret_noret_binary_atomic_op; +defm atomic_load_fadd : ret_noret_binary_atomic_op; + + def store_hi16_private : StoreHi16 , PrivateAddress; def truncstorei8_hi16_private : StoreHi16, PrivateAddress; @@ -513,21 +510,31 @@ def store_local_hi16 : StoreHi16 , LocalAddress; def truncstorei8_local_hi16 : StoreHi16, LocalAddress; def atomic_store_local : LocalStore ; -def load_align8_local : Aligned8Bytes < - (ops node:$ptr), (load_local node:$ptr) ->; -def load_align16_local : Aligned16Bytes < - (ops node:$ptr), (load_local node:$ptr) ->; +def load_align8_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> { + let IsLoad = 1; + let IsNonExtLoad = 1; + let MinAlignment = 8; +} -def store_align8_local : Aligned8Bytes < - (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr) ->; +def load_align16_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> { + let IsLoad = 1; + let IsNonExtLoad = 1; + let MinAlignment = 16; +} + +def store_align8_local: PatFrag<(ops node:$val, node:$ptr), + (store_local node:$val, node:$ptr)>, Aligned<8> { + let IsStore = 1; + let IsTruncStore = 0; +} + +def store_align16_local: PatFrag<(ops node:$val, node:$ptr), + (store_local node:$val, node:$ptr)>, Aligned<16> { + let IsStore = 1; + let IsTruncStore = 0; +} -def store_align16_local : Aligned16Bytes < - (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr) ->; def atomic_store_flat : FlatStore ; def truncstorei8_hi16_flat : StoreHi16, FlatStoreAddress; @@ -547,69 +554,26 @@ class region_binary_atomic_op : }]>; -def atomic_swap_local : local_binary_atomic_op; -def atomic_load_add_local : local_binary_atomic_op; -def atomic_load_sub_local : local_binary_atomic_op; -def atomic_load_and_local : local_binary_atomic_op; -def atomic_load_or_local : local_binary_atomic_op; -def atomic_load_xor_local : local_binary_atomic_op; -def atomic_load_nand_local : local_binary_atomic_op; -def atomic_load_min_local : local_binary_atomic_op; -def atomic_load_max_local : local_binary_atomic_op; -def atomic_load_umin_local : local_binary_atomic_op; -def atomic_load_umax_local : local_binary_atomic_op; - def mskor_global : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; }]>; -class AtomicCmpSwapLocal : PatFrag< - (ops node:$ptr, node:$cmp, node:$swap), - (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{ - AtomicSDNode *AN = cast(N); - return AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; -}]>; +let AddressSpaces = StoreAddress_local.AddrSpaces in { +defm atomic_cmp_swap_local : ternary_atomic_op; +defm atomic_cmp_swap_local_m0 : ternary_atomic_op; +} -class AtomicCmpSwapRegion : PatFrag< - (ops node:$ptr, node:$cmp, node:$swap), - (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{ - AtomicSDNode *AN = cast(N); - return AN->getAddressSpace() == AMDGPUAS::REGION_ADDRESS; -}]>; - -def atomic_cmp_swap_local : AtomicCmpSwapLocal ; +let AddressSpaces = StoreAddress_region.AddrSpaces in { +defm atomic_cmp_swap_region : ternary_atomic_op; +defm atomic_cmp_swap_region_m0 : ternary_atomic_op; +} class global_binary_atomic_op_frag : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>; -multiclass global_binary_atomic_op { - def "" : global_binary_atomic_op_frag; - - def _noret : PatFrag< - (ops node:$ptr, node:$value), - (atomic_op node:$ptr, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; - - def _ret : PatFrag< - (ops node:$ptr, node:$value), - (atomic_op node:$ptr, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; -} - -defm atomic_swap_global : global_binary_atomic_op; -defm atomic_add_global : global_binary_atomic_op; -defm atomic_and_global : global_binary_atomic_op; -defm atomic_max_global : global_binary_atomic_op; -defm atomic_min_global : global_binary_atomic_op; -defm atomic_or_global : global_binary_atomic_op; -defm atomic_sub_global : global_binary_atomic_op; -defm atomic_umax_global : global_binary_atomic_op; -defm atomic_umin_global : global_binary_atomic_op; -defm atomic_xor_global : global_binary_atomic_op; - // Legacy. def AMDGPUatomic_cmp_swap_global : PatFrag< (ops node:$ptr, node:$value), diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 670f6225fbf..5aba35a19ce 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -11,6 +11,13 @@ /// \todo This should be generated by TableGen. //===----------------------------------------------------------------------===// +#if defined(_MSC_VER) || defined(__MINGW32__) +// According to Microsoft, one must set _USE_MATH_DEFINES in order to get M_PI +// from the Visual C++ cmath / math.h headers: +// https://docs.microsoft.com/en-us/cpp/c-runtime-library/math-constants?view=vs-2019 +#define _USE_MATH_DEFINES +#endif + #include "AMDGPU.h" #include "AMDGPULegalizerInfo.h" #include "AMDGPUTargetMachine.h" @@ -20,6 +27,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" @@ -32,7 +40,7 @@ using namespace LegalityPredicates; static LegalityPredicate isMultiple32(unsigned TypeIdx, - unsigned MaxSize = 512) { + unsigned MaxSize = 1024) { return [=](const LegalityQuery &Query) { const LLT Ty = Query.Types[TypeIdx]; const LLT EltTy = Ty.getScalarType(); @@ -40,12 +48,27 @@ static LegalityPredicate isMultiple32(unsigned TypeIdx, }; } +static LegalityPredicate sizeIs(unsigned TypeIdx, unsigned Size) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx].getSizeInBits() == Size; + }; +} + static LegalityPredicate isSmallOddVector(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT Ty = Query.Types[TypeIdx]; return Ty.isVector() && Ty.getNumElements() % 2 != 0 && - Ty.getElementType().getSizeInBits() < 32; + Ty.getElementType().getSizeInBits() < 32 && + Ty.getSizeInBits() % 32 != 0; + }; +} + +static LegalityPredicate isWideVec16(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + const LLT EltTy = Ty.getScalarType(); + return EltTy.getSizeInBits() == 16 && Ty.getNumElements() > 2; }; } @@ -68,6 +91,31 @@ static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) { }; } +// Increase the number of vector elements to reach the next multiple of 32-bit +// type. +static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + + const LLT EltTy = Ty.getElementType(); + const int Size = Ty.getSizeInBits(); + const int EltSize = EltTy.getSizeInBits(); + const int NextMul32 = (Size + 31) / 32; + + assert(EltSize < 32); + + const int NewNumElts = (32 * NextMul32 + EltSize - 1) / EltSize; + return std::make_pair(TypeIdx, LLT::vector(NewNumElts, EltTy)); + }; +} + +static LegalityPredicate vectorSmallerThan(unsigned TypeIdx, unsigned Size) { + return [=](const LegalityQuery &Query) { + const LLT QueryTy = Query.Types[TypeIdx]; + return QueryTy.isVector() && QueryTy.getSizeInBits() < Size; + }; +} + static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size) { return [=](const LegalityQuery &Query) { const LLT QueryTy = Query.Types[TypeIdx]; @@ -82,7 +130,7 @@ static LegalityPredicate numElementsNotEven(unsigned TypeIdx) { }; } -// Any combination of 32 or 64-bit elements up to 512 bits, and multiples of +// Any combination of 32 or 64-bit elements up to 1024 bits, and multiples of // v2s16. static LegalityPredicate isRegisterType(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { @@ -94,7 +142,21 @@ static LegalityPredicate isRegisterType(unsigned TypeIdx) { EltSize == 128 || EltSize == 256; } - return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 512; + return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 1024; + }; +} + +static LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT Type) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx].getElementType() == Type; + }; +} + +static LegalityPredicate isWideScalarTruncStore(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + return !Ty.isVector() && Ty.getSizeInBits() > 32 && + Query.MMODescrs[0].SizeInBits < Ty.getSizeInBits(); }; } @@ -112,9 +174,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const LLT S16 = LLT::scalar(16); const LLT S32 = LLT::scalar(32); const LLT S64 = LLT::scalar(64); + const LLT S96 = LLT::scalar(96); const LLT S128 = LLT::scalar(128); const LLT S256 = LLT::scalar(256); - const LLT S512 = LLT::scalar(512); + const LLT S1024 = LLT::scalar(1024); const LLT V2S16 = LLT::vector(2, 16); const LLT V4S16 = LLT::vector(4, 16); @@ -134,6 +197,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const LLT V14S32 = LLT::vector(14, 32); const LLT V15S32 = LLT::vector(15, 32); const LLT V16S32 = LLT::vector(16, 32); + const LLT V32S32 = LLT::vector(32, 32); const LLT V2S64 = LLT::vector(2, 64); const LLT V3S64 = LLT::vector(3, 64); @@ -142,16 +206,19 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const LLT V6S64 = LLT::vector(6, 64); const LLT V7S64 = LLT::vector(7, 64); const LLT V8S64 = LLT::vector(8, 64); + const LLT V16S64 = LLT::vector(16, 64); std::initializer_list AllS32Vectors = {V2S32, V3S32, V4S32, V5S32, V6S32, V7S32, V8S32, - V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32}; + V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32, V32S32}; std::initializer_list AllS64Vectors = - {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64}; + {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64, V16S64}; const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS); const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS); + const LLT Constant32Ptr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS_32BIT); const LLT LocalPtr = GetAddrSpacePtr(AMDGPUAS::LOCAL_ADDRESS); + const LLT RegionPtr = GetAddrSpacePtr(AMDGPUAS::REGION_ADDRESS); const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS); const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS); @@ -162,7 +229,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, }; const std::initializer_list AddrSpaces32 = { - LocalPtr, PrivatePtr + LocalPtr, PrivatePtr, Constant32Ptr, RegionPtr }; const std::initializer_list FPTypesBase = { @@ -216,37 +283,34 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16}) .clampScalar(0, S32, S64) .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) - .fewerElementsIf(vectorWiderThan(0, 32), fewerEltsToSize64Vector(0)) + .fewerElementsIf(vectorWiderThan(0, 64), fewerEltsToSize64Vector(0)) .widenScalarToNextPow2(0) .scalarize(0); - getActionDefinitionsBuilder({G_UADDO, G_SADDO, G_USUBO, G_SSUBO, + getActionDefinitionsBuilder({G_UADDO, G_USUBO, G_UADDE, G_SADDE, G_USUBE, G_SSUBE}) .legalFor({{S32, S1}}) - .clampScalar(0, S32, S32); + .clampScalar(0, S32, S32) + .scalarize(0); // TODO: Implement. + + getActionDefinitionsBuilder({G_SADDO, G_SSUBO}) + .lower(); getActionDefinitionsBuilder(G_BITCAST) - .legalForCartesianProduct({S32, V2S16}) - .legalForCartesianProduct({S64, V2S32, V4S16}) - .legalForCartesianProduct({V2S64, V4S32}) // Don't worry about the size constraint. - .legalIf(all(isPointer(0), isPointer(1))); + .legalIf(all(isRegisterType(0), isRegisterType(1))) + // FIXME: Testing hack + .legalForCartesianProduct({S16, LLT::vector(2, 8), }); - if (ST.has16BitInsts()) { - getActionDefinitionsBuilder(G_FCONSTANT) - .legalFor({S32, S64, S16}) - .clampScalar(0, S16, S64); - } else { - getActionDefinitionsBuilder(G_FCONSTANT) - .legalFor({S32, S64}) - .clampScalar(0, S32, S64); - } + getActionDefinitionsBuilder(G_FCONSTANT) + .legalFor({S32, S64, S16}) + .clampScalar(0, S16, S64); getActionDefinitionsBuilder(G_IMPLICIT_DEF) - .legalFor({S1, S32, S64, V2S32, V4S32, V2S16, V4S16, GlobalPtr, + .legalFor({S1, S32, S64, S16, V2S32, V4S32, V2S16, V4S16, GlobalPtr, ConstantPtr, LocalPtr, FlatPtr, PrivatePtr}) .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) - .clampScalarOrElt(0, S32, S512) + .clampScalarOrElt(0, S32, S1024) .legalIf(isMultiple32(0)) .widenScalarToNextPow2(0, 32) .clampMaxNumElements(0, S32, 16); @@ -256,23 +320,33 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // values may not be legal. We need to figure out how to distinguish // between these two scenarios. getActionDefinitionsBuilder(G_CONSTANT) - .legalFor({S1, S32, S64, GlobalPtr, + .legalFor({S1, S32, S64, S16, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr }) .clampScalar(0, S32, S64) .widenScalarToNextPow2(0) .legalIf(isPointer(0)); setAction({G_FRAME_INDEX, PrivatePtr}, Legal); + getActionDefinitionsBuilder(G_GLOBAL_VALUE) + .customFor({LocalPtr, GlobalPtr, ConstantPtr, Constant32Ptr}); + auto &FPOpActions = getActionDefinitionsBuilder( - { G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA, G_FCANONICALIZE}) + { G_FADD, G_FMUL, G_FMA, G_FCANONICALIZE}) .legalFor({S32, S64}); + auto &TrigActions = getActionDefinitionsBuilder({G_FSIN, G_FCOS}) + .customFor({S32, S64}); + auto &FDIVActions = getActionDefinitionsBuilder(G_FDIV) + .customFor({S32, S64}); if (ST.has16BitInsts()) { if (ST.hasVOP3PInsts()) FPOpActions.legalFor({S16, V2S16}); else FPOpActions.legalFor({S16}); + + TrigActions.customFor({S16}); + FDIVActions.customFor({S16}); } auto &MinNumMaxNum = getActionDefinitionsBuilder({ @@ -293,22 +367,37 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0); } - // TODO: Implement - getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower(); - if (ST.hasVOP3PInsts()) FPOpActions.clampMaxNumElements(0, S16, 2); + FPOpActions .scalarize(0) .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); + TrigActions + .scalarize(0) + .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); + + FDIVActions + .scalarize(0) + .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); + + getActionDefinitionsBuilder({G_FNEG, G_FABS}) + .legalFor(FPTypesPK16) + .clampMaxNumElements(0, S16, 2) + .scalarize(0) + .clampScalar(0, S16, S64); + + // TODO: Implement + getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower(); + if (ST.has16BitInsts()) { - getActionDefinitionsBuilder(G_FSQRT) + getActionDefinitionsBuilder({G_FSQRT, G_FFLOOR}) .legalFor({S32, S64, S16}) .scalarize(0) .clampScalar(0, S16, S64); } else { - getActionDefinitionsBuilder(G_FSQRT) + getActionDefinitionsBuilder({G_FSQRT, G_FFLOOR}) .legalFor({S32, S64}) .scalarize(0) .clampScalar(0, S32, S64); @@ -334,23 +423,43 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0) .clampScalar(0, S32, S64); + // Whether this is legal depends on the floating point mode for the function. + auto &FMad = getActionDefinitionsBuilder(G_FMAD); + if (ST.hasMadF16()) + FMad.customFor({S32, S16}); + else + FMad.customFor({S32}); + FMad.scalarize(0) + .lower(); + getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT}) .legalFor({{S64, S32}, {S32, S16}, {S64, S16}, {S32, S1}, {S64, S1}, {S16, S1}, + {S96, S32}, // FIXME: Hack {S64, LLT::scalar(33)}, {S32, S8}, {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}}) .scalarize(0); - getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) - .legalFor({{S32, S32}, {S64, S32}}) + // TODO: Split s1->s64 during regbankselect for VALU. + auto &IToFP = getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) + .legalFor({{S32, S32}, {S64, S32}, {S16, S32}, {S32, S1}, {S16, S1}, {S64, S1}}) .lowerFor({{S32, S64}}) - .customFor({{S64, S64}}) - .scalarize(0); + .customFor({{S64, S64}}); + if (ST.has16BitInsts()) + IToFP.legalFor({{S16, S16}}); + IToFP.clampScalar(1, S32, S64) + .scalarize(0); - getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) - .legalFor({{S32, S32}, {S32, S64}}) - .scalarize(0); + auto &FPToI = getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) + .legalFor({{S32, S32}, {S32, S64}, {S32, S16}}); + if (ST.has16BitInsts()) + FPToI.legalFor({{S16, S16}}); + else + FPToI.minScalar(1, S32); + + FPToI.minScalar(0, S32) + .scalarize(0); getActionDefinitionsBuilder(G_INTRINSIC_ROUND) .legalFor({S32, S64}) @@ -374,6 +483,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .legalForCartesianProduct(AddrSpaces32, {S32}) .scalarize(0); + getActionDefinitionsBuilder(G_PTR_MASK) + .scalarize(0) + .alwaysLegal(); + setAction({G_BLOCK_ADDR, CodePtr}, Legal); auto &CmpBuilder = @@ -415,7 +528,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .widenScalarToNextPow2(1, 32); // TODO: Expand for > s32 - getActionDefinitionsBuilder(G_BSWAP) + getActionDefinitionsBuilder({G_BSWAP, G_BITREVERSE}) .legalFor({S32}) .clampScalar(0, S32, S32) .scalarize(0); @@ -491,87 +604,239 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits())); }); - if (ST.hasFlatAddressSpace()) { - getActionDefinitionsBuilder(G_ADDRSPACE_CAST) - .scalarize(0) - .custom(); - } + getActionDefinitionsBuilder(G_ADDRSPACE_CAST) + .scalarize(0) + .custom(); // TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we // handle some operations by just promoting the register during // selection. There are also d16 loads on GFX9+ which preserve the high bits. - getActionDefinitionsBuilder({G_LOAD, G_STORE}) - .narrowScalarIf([](const LegalityQuery &Query) { - unsigned Size = Query.Types[0].getSizeInBits(); - unsigned MemSize = Query.MMODescrs[0].SizeInBits; - return (Size > 32 && MemSize < Size); - }, - [](const LegalityQuery &Query) { - return std::make_pair(0, LLT::scalar(32)); - }) - .fewerElementsIf([=](const LegalityQuery &Query) { - unsigned MemSize = Query.MMODescrs[0].SizeInBits; - return (MemSize == 96) && - Query.Types[0].isVector() && - !ST.hasDwordx3LoadStores(); - }, - [=](const LegalityQuery &Query) { - return std::make_pair(0, V2S32); - }) - .legalIf([=](const LegalityQuery &Query) { - const LLT &Ty0 = Query.Types[0]; + auto maxSizeForAddrSpace = [this](unsigned AS) -> unsigned { + switch (AS) { + // FIXME: Private element size. + case AMDGPUAS::PRIVATE_ADDRESS: + return 32; + // FIXME: Check subtarget + case AMDGPUAS::LOCAL_ADDRESS: + return ST.useDS128() ? 128 : 64; - unsigned Size = Ty0.getSizeInBits(); - unsigned MemSize = Query.MMODescrs[0].SizeInBits; - if (Size < 32 || (Size > 32 && MemSize < Size)) - return false; + // Treat constant and global as identical. SMRD loads are sometimes usable + // for global loads (ideally constant address space should be eliminated) + // depending on the context. Legality cannot be context dependent, but + // RegBankSelect can split the load as necessary depending on the pointer + // register bank/uniformity and if the memory is invariant or not written in + // a kernel. + case AMDGPUAS::CONSTANT_ADDRESS: + case AMDGPUAS::GLOBAL_ADDRESS: + return 512; + default: + return 128; + } + }; - if (Ty0.isVector() && Size != MemSize) - return false; + const auto needToSplitLoad = [=](const LegalityQuery &Query) -> bool { + const LLT DstTy = Query.Types[0]; - // TODO: Decompose private loads into 4-byte components. - // TODO: Illegal flat loads on SI - switch (MemSize) { - case 8: - case 16: - return Size == 32; - case 32: - case 64: - case 128: - return true; + // Split vector extloads. + unsigned MemSize = Query.MMODescrs[0].SizeInBits; + if (DstTy.isVector() && DstTy.getSizeInBits() > MemSize) + return true; - case 96: - return ST.hasDwordx3LoadStores(); + const LLT PtrTy = Query.Types[1]; + unsigned AS = PtrTy.getAddressSpace(); + if (MemSize > maxSizeForAddrSpace(AS)) + return true; - case 256: - case 512: - // TODO: Possibly support loads of i256 and i512 . This will require - // adding i256 and i512 types to MVT in order for to be able to use - // TableGen. - // TODO: Add support for other vector types, this will require - // defining more value mappings for the new types. - return Ty0.isVector() && (Ty0.getScalarType().getSizeInBits() == 32 || - Ty0.getScalarType().getSizeInBits() == 64); + // Catch weird sized loads that don't evenly divide into the access sizes + // TODO: May be able to widen depending on alignment etc. + unsigned NumRegs = MemSize / 32; + if (NumRegs == 3 && !ST.hasDwordx3LoadStores()) + return true; - default: - return false; - } - }) - .clampScalar(0, S32, S64); + unsigned Align = Query.MMODescrs[0].AlignInBits; + if (Align < MemSize) { + const SITargetLowering *TLI = ST.getTargetLowering(); + return !TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8); + } + return false; + }; + + unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32; + unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16; + unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8; + + // TODO: Refine based on subtargets which support unaligned access or 128-bit + // LDS + // TODO: Unsupported flat for SI. + + for (unsigned Op : {G_LOAD, G_STORE}) { + const bool IsStore = Op == G_STORE; + + auto &Actions = getActionDefinitionsBuilder(Op); + // Whitelist the common cases. + // TODO: Pointer loads + // TODO: Wide constant loads + // TODO: Only CI+ has 3x loads + // TODO: Loads to s16 on gfx9 + Actions.legalForTypesWithMemDesc({{S32, GlobalPtr, 32, GlobalAlign32}, + {V2S32, GlobalPtr, 64, GlobalAlign32}, + {V3S32, GlobalPtr, 96, GlobalAlign32}, + {S96, GlobalPtr, 96, GlobalAlign32}, + {V4S32, GlobalPtr, 128, GlobalAlign32}, + {S128, GlobalPtr, 128, GlobalAlign32}, + {S64, GlobalPtr, 64, GlobalAlign32}, + {V2S64, GlobalPtr, 128, GlobalAlign32}, + {V2S16, GlobalPtr, 32, GlobalAlign32}, + {S32, GlobalPtr, 8, GlobalAlign8}, + {S32, GlobalPtr, 16, GlobalAlign16}, + + {S32, LocalPtr, 32, 32}, + {S64, LocalPtr, 64, 32}, + {V2S32, LocalPtr, 64, 32}, + {S32, LocalPtr, 8, 8}, + {S32, LocalPtr, 16, 16}, + {V2S16, LocalPtr, 32, 32}, + + {S32, PrivatePtr, 32, 32}, + {S32, PrivatePtr, 8, 8}, + {S32, PrivatePtr, 16, 16}, + {V2S16, PrivatePtr, 32, 32}, + + {S32, FlatPtr, 32, GlobalAlign32}, + {S32, FlatPtr, 16, GlobalAlign16}, + {S32, FlatPtr, 8, GlobalAlign8}, + {V2S16, FlatPtr, 32, GlobalAlign32}, + + {S32, ConstantPtr, 32, GlobalAlign32}, + {V2S32, ConstantPtr, 64, GlobalAlign32}, + {V3S32, ConstantPtr, 96, GlobalAlign32}, + {V4S32, ConstantPtr, 128, GlobalAlign32}, + {S64, ConstantPtr, 64, GlobalAlign32}, + {S128, ConstantPtr, 128, GlobalAlign32}, + {V2S32, ConstantPtr, 32, GlobalAlign32}}); + Actions + .customIf(typeIs(1, Constant32Ptr)) + .narrowScalarIf( + [=](const LegalityQuery &Query) -> bool { + return !Query.Types[0].isVector() && needToSplitLoad(Query); + }, + [=](const LegalityQuery &Query) -> std::pair { + const LLT DstTy = Query.Types[0]; + const LLT PtrTy = Query.Types[1]; + + const unsigned DstSize = DstTy.getSizeInBits(); + unsigned MemSize = Query.MMODescrs[0].SizeInBits; + + // Split extloads. + if (DstSize > MemSize) + return std::make_pair(0, LLT::scalar(MemSize)); + + if (DstSize > 32 && (DstSize % 32 != 0)) { + // FIXME: Need a way to specify non-extload of larger size if + // suitably aligned. + return std::make_pair(0, LLT::scalar(32 * (DstSize / 32))); + } + + unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace()); + if (MemSize > MaxSize) + return std::make_pair(0, LLT::scalar(MaxSize)); + + unsigned Align = Query.MMODescrs[0].AlignInBits; + return std::make_pair(0, LLT::scalar(Align)); + }) + .fewerElementsIf( + [=](const LegalityQuery &Query) -> bool { + return Query.Types[0].isVector() && needToSplitLoad(Query); + }, + [=](const LegalityQuery &Query) -> std::pair { + const LLT DstTy = Query.Types[0]; + const LLT PtrTy = Query.Types[1]; + + LLT EltTy = DstTy.getElementType(); + unsigned MaxSize = maxSizeForAddrSpace(PtrTy.getAddressSpace()); + + // Split if it's too large for the address space. + if (Query.MMODescrs[0].SizeInBits > MaxSize) { + unsigned NumElts = DstTy.getNumElements(); + unsigned NumPieces = Query.MMODescrs[0].SizeInBits / MaxSize; + + // FIXME: Refine when odd breakdowns handled + // The scalars will need to be re-legalized. + if (NumPieces == 1 || NumPieces >= NumElts || + NumElts % NumPieces != 0) + return std::make_pair(0, EltTy); + + return std::make_pair(0, + LLT::vector(NumElts / NumPieces, EltTy)); + } + + // Need to split because of alignment. + unsigned Align = Query.MMODescrs[0].AlignInBits; + unsigned EltSize = EltTy.getSizeInBits(); + if (EltSize > Align && + (EltSize / Align < DstTy.getNumElements())) { + return std::make_pair(0, LLT::vector(EltSize / Align, EltTy)); + } + + // May need relegalization for the scalars. + return std::make_pair(0, EltTy); + }) + .minScalar(0, S32); + + if (IsStore) + Actions.narrowScalarIf(isWideScalarTruncStore(0), changeTo(0, S32)); + + // TODO: Need a bitcast lower option? + Actions + .legalIf([=](const LegalityQuery &Query) { + const LLT Ty0 = Query.Types[0]; + unsigned Size = Ty0.getSizeInBits(); + unsigned MemSize = Query.MMODescrs[0].SizeInBits; + unsigned Align = Query.MMODescrs[0].AlignInBits; + + // No extending vector loads. + if (Size > MemSize && Ty0.isVector()) + return false; + + // FIXME: Widening store from alignment not valid. + if (MemSize < Size) + MemSize = std::max(MemSize, Align); + + switch (MemSize) { + case 8: + case 16: + return Size == 32; + case 32: + case 64: + case 128: + return true; + case 96: + return ST.hasDwordx3LoadStores(); + case 256: + case 512: + return true; + default: + return false; + } + }) + .widenScalarToNextPow2(0) + // TODO: v3s32->v4s32 with alignment + .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0)); + } - // FIXME: Handle alignment requirements. auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) - .legalForTypesWithMemDesc({ - {S32, GlobalPtr, 8, 8}, - {S32, GlobalPtr, 16, 8}, - {S32, LocalPtr, 8, 8}, - {S32, LocalPtr, 16, 8}, - {S32, PrivatePtr, 8, 8}, - {S32, PrivatePtr, 16, 8}}); + .legalForTypesWithMemDesc({{S32, GlobalPtr, 8, 8}, + {S32, GlobalPtr, 16, 2 * 8}, + {S32, LocalPtr, 8, 8}, + {S32, LocalPtr, 16, 16}, + {S32, PrivatePtr, 8, 8}, + {S32, PrivatePtr, 16, 16}, + {S32, ConstantPtr, 8, 8}, + {S32, ConstantPtr, 16, 2 * 8}}); if (ST.hasFlatAddressSpace()) { - ExtLoads.legalForTypesWithMemDesc({{S32, FlatPtr, 8, 8}, - {S32, FlatPtr, 16, 8}}); + ExtLoads.legalForTypesWithMemDesc( + {{S32, FlatPtr, 8, 8}, {S32, FlatPtr, 16, 16}}); } ExtLoads.clampScalar(0, S32, S32) @@ -590,6 +855,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}}); } + getActionDefinitionsBuilder(G_ATOMICRMW_FADD) + .legalFor({{S32, LocalPtr}}); + + getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS) + .lower(); + // TODO: Pointer types, any 32-bit or 64-bit vector getActionDefinitionsBuilder(G_SELECT) .legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16, @@ -643,7 +914,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, return (EltTy.getSizeInBits() == 16 || EltTy.getSizeInBits() % 32 == 0) && VecTy.getSizeInBits() % 32 == 0 && - VecTy.getSizeInBits() <= 512 && + VecTy.getSizeInBits() <= 1024 && IdxTy.getSizeInBits() == 32; }) .clampScalar(EltTypeIdx, S32, S64) @@ -663,6 +934,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // FIXME: Doesn't handle extract of illegal sizes. getActionDefinitionsBuilder(Op) + .lowerIf(all(typeIs(LitTyIdx, S16), sizeIs(BigTyIdx, 32))) + // FIXME: Multiples of 16 should not be legal. .legalIf([=](const LegalityQuery &Query) { const LLT BigTy = Query.Types[BigTyIdx]; const LLT LitTy = Query.Types[LitTyIdx]; @@ -686,18 +959,36 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, } - getActionDefinitionsBuilder(G_BUILD_VECTOR) - .legalForCartesianProduct(AllS32Vectors, {S32}) - .legalForCartesianProduct(AllS64Vectors, {S64}) - .clampNumElements(0, V16S32, V16S32) - .clampNumElements(0, V2S64, V8S64) - .minScalarSameAs(1, 0) - .legalIf(isRegisterType(0)) - .minScalarOrElt(0, S32); + auto &BuildVector = getActionDefinitionsBuilder(G_BUILD_VECTOR) + .legalForCartesianProduct(AllS32Vectors, {S32}) + .legalForCartesianProduct(AllS64Vectors, {S64}) + .clampNumElements(0, V16S32, V32S32) + .clampNumElements(0, V2S64, V16S64) + .fewerElementsIf(isWideVec16(0), changeTo(0, V2S16)); + + if (ST.hasScalarPackInsts()) + BuildVector.legalFor({V2S16, S32}); + + BuildVector + .minScalarSameAs(1, 0) + .legalIf(isRegisterType(0)) + .minScalarOrElt(0, S32); + + if (ST.hasScalarPackInsts()) { + getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC) + .legalFor({V2S16, S32}) + .lower(); + } else { + getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC) + .lower(); + } getActionDefinitionsBuilder(G_CONCAT_VECTORS) .legalIf(isRegisterType(0)); + // TODO: Don't fully scalarize v2s16 pieces + getActionDefinitionsBuilder(G_SHUFFLE_VECTOR).lower(); + // Merge/Unmerge for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; @@ -715,14 +1006,17 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, return false; }; - getActionDefinitionsBuilder(Op) + auto &Builder = getActionDefinitionsBuilder(Op) .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16) // Clamp the little scalar to s8-s256 and make it a power of 2. It's not // worth considering the multiples of 64 since 2*192 and 2*384 are not // valid. .clampScalar(LitTyIdx, S16, S256) .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32) - + .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx)) + .fewerElementsIf(all(typeIs(0, S16), vectorWiderThan(1, 32), + elementTypeIs(1, S16)), + changeTo(1, V2S16)) // Break up vectors with weird elements into scalars .fewerElementsIf( [=](const LegalityQuery &Query) { return notValidElt(Query, 0); }, @@ -730,25 +1024,37 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .fewerElementsIf( [=](const LegalityQuery &Query) { return notValidElt(Query, 1); }, scalarize(1)) - .clampScalar(BigTyIdx, S32, S512) - .widenScalarIf( + .clampScalar(BigTyIdx, S32, S1024) + .lowerFor({{S16, V2S16}}); + + if (Op == G_MERGE_VALUES) { + Builder.widenScalarIf( + // TODO: Use 16-bit shifts if legal for 8-bit values? [=](const LegalityQuery &Query) { - const LLT &Ty = Query.Types[BigTyIdx]; - return !isPowerOf2_32(Ty.getSizeInBits()) && - Ty.getSizeInBits() % 16 != 0; + const LLT Ty = Query.Types[LitTyIdx]; + return Ty.getSizeInBits() < 32; }, - [=](const LegalityQuery &Query) { - // Pick the next power of 2, or a multiple of 64 over 128. - // Whichever is smaller. - const LLT &Ty = Query.Types[BigTyIdx]; - unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1); - if (NewSizeInBits >= 256) { - unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1); - if (RoundedTo < NewSizeInBits) - NewSizeInBits = RoundedTo; - } - return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits)); - }) + changeTo(LitTyIdx, S32)); + } + + Builder.widenScalarIf( + [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[BigTyIdx]; + return !isPowerOf2_32(Ty.getSizeInBits()) && + Ty.getSizeInBits() % 16 != 0; + }, + [=](const LegalityQuery &Query) { + // Pick the next power of 2, or a multiple of 64 over 128. + // Whichever is smaller. + const LLT &Ty = Query.Types[BigTyIdx]; + unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1); + if (NewSizeInBits >= 256) { + unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1); + if (RoundedTo < NewSizeInBits) + NewSizeInBits = RoundedTo; + } + return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits)); + }) .legalIf([=](const LegalityQuery &Query) { const LLT &BigTy = Query.Types[BigTyIdx]; const LLT &LitTy = Query.Types[LitTyIdx]; @@ -760,43 +1066,56 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, return BigTy.getSizeInBits() % 16 == 0 && LitTy.getSizeInBits() % 16 == 0 && - BigTy.getSizeInBits() <= 512; + BigTy.getSizeInBits() <= 1024; }) // Any vectors left are the wrong size. Scalarize them. .scalarize(0) .scalarize(1); } + getActionDefinitionsBuilder(G_SEXT_INREG).lower(); + computeTables(); verify(*ST.getInstrInfo()); } bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder, + MachineIRBuilder &B, GISelChangeObserver &Observer) const { switch (MI.getOpcode()) { case TargetOpcode::G_ADDRSPACE_CAST: - return legalizeAddrSpaceCast(MI, MRI, MIRBuilder); + return legalizeAddrSpaceCast(MI, MRI, B); case TargetOpcode::G_FRINT: - return legalizeFrint(MI, MRI, MIRBuilder); + return legalizeFrint(MI, MRI, B); case TargetOpcode::G_FCEIL: - return legalizeFceil(MI, MRI, MIRBuilder); + return legalizeFceil(MI, MRI, B); case TargetOpcode::G_INTRINSIC_TRUNC: - return legalizeIntrinsicTrunc(MI, MRI, MIRBuilder); + return legalizeIntrinsicTrunc(MI, MRI, B); case TargetOpcode::G_SITOFP: - return legalizeITOFP(MI, MRI, MIRBuilder, true); + return legalizeITOFP(MI, MRI, B, true); case TargetOpcode::G_UITOFP: - return legalizeITOFP(MI, MRI, MIRBuilder, false); + return legalizeITOFP(MI, MRI, B, false); case TargetOpcode::G_FMINNUM: case TargetOpcode::G_FMAXNUM: case TargetOpcode::G_FMINNUM_IEEE: case TargetOpcode::G_FMAXNUM_IEEE: - return legalizeMinNumMaxNum(MI, MRI, MIRBuilder); + return legalizeMinNumMaxNum(MI, MRI, B); case TargetOpcode::G_EXTRACT_VECTOR_ELT: - return legalizeExtractVectorElt(MI, MRI, MIRBuilder); + return legalizeExtractVectorElt(MI, MRI, B); case TargetOpcode::G_INSERT_VECTOR_ELT: - return legalizeInsertVectorElt(MI, MRI, MIRBuilder); + return legalizeInsertVectorElt(MI, MRI, B); + case TargetOpcode::G_FSIN: + case TargetOpcode::G_FCOS: + return legalizeSinCos(MI, MRI, B); + case TargetOpcode::G_GLOBAL_VALUE: + return legalizeGlobalValue(MI, MRI, B); + case TargetOpcode::G_LOAD: + return legalizeLoad(MI, MRI, B, Observer); + case TargetOpcode::G_FMAD: + return legalizeFMad(MI, MRI, B); + case TargetOpcode::G_FDIV: + return legalizeFDIV(MI, MRI, B); default: return false; } @@ -807,11 +1126,13 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI, Register AMDGPULegalizerInfo::getSegmentAperture( unsigned AS, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const { - MachineFunction &MF = MIRBuilder.getMF(); + MachineIRBuilder &B) const { + MachineFunction &MF = B.getMF(); const GCNSubtarget &ST = MF.getSubtarget(); const LLT S32 = LLT::scalar(32); + assert(AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS); + if (ST.hasApertureRegs()) { // FIXME: Use inline constants (src_{shared, private}_base) instead of // getreg. @@ -829,13 +1150,13 @@ Register AMDGPULegalizerInfo::getSegmentAperture( Register ApertureReg = MRI.createGenericVirtualRegister(S32); Register GetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - MIRBuilder.buildInstr(AMDGPU::S_GETREG_B32) + B.buildInstr(AMDGPU::S_GETREG_B32) .addDef(GetReg) .addImm(Encoding); MRI.setType(GetReg, S32); - auto ShiftAmt = MIRBuilder.buildConstant(S32, WidthM1 + 1); - MIRBuilder.buildInstr(TargetOpcode::G_SHL) + auto ShiftAmt = B.buildConstant(S32, WidthM1 + 1); + B.buildInstr(TargetOpcode::G_SHL) .addDef(ApertureReg) .addUse(GetReg) .addUse(ShiftAmt.getReg(0)); @@ -846,8 +1167,9 @@ Register AMDGPULegalizerInfo::getSegmentAperture( Register QueuePtr = MRI.createGenericVirtualRegister( LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); - // FIXME: Placeholder until we can track the input registers. - MIRBuilder.buildConstant(QueuePtr, 0xdeadbeef); + const SIMachineFunctionInfo *MFI = MF.getInfo(); + if (!loadInputValue(QueuePtr, B, &MFI->getArgInfo().QueuePtr)) + return Register(); // Offset into amd_queue_t for group_segment_aperture_base_hi / // private_segment_aperture_base_hi. @@ -870,18 +1192,19 @@ Register AMDGPULegalizerInfo::getSegmentAperture( Register LoadResult = MRI.createGenericVirtualRegister(S32); Register LoadAddr; - MIRBuilder.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset); - MIRBuilder.buildLoad(LoadResult, LoadAddr, *MMO); + B.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset); + B.buildLoad(LoadResult, LoadAddr, *MMO); return LoadResult; } bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const { - MachineFunction &MF = MIRBuilder.getMF(); + MachineIRBuilder &B) const { + MachineFunction &MF = B.getMF(); - MIRBuilder.setInstr(MI); + B.setInstr(MI); + const LLT S32 = LLT::scalar(32); Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); @@ -899,7 +1222,28 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( const GCNSubtarget &ST = MF.getSubtarget(); if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) { - MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BITCAST)); + MI.setDesc(B.getTII().get(TargetOpcode::G_BITCAST)); + return true; + } + + if (DestAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) { + // Truncate. + B.buildExtract(Dst, Src, 0); + MI.eraseFromParent(); + return true; + } + + if (SrcAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) { + const SIMachineFunctionInfo *Info = MF.getInfo(); + uint32_t AddrHiVal = Info->get32BitAddressHighBits(); + + // FIXME: This is a bit ugly due to creating a merge of 2 pointers to + // another. Merge operands are required to be the same type, but creating an + // extra ptrtoint would be kind of pointless. + auto HighAddr = B.buildConstant( + LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS_32BIT, 32), AddrHiVal); + B.buildMerge(Dst, {Src, HighAddr.getReg(0)}); + MI.eraseFromParent(); return true; } @@ -908,47 +1252,52 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( DestAS == AMDGPUAS::PRIVATE_ADDRESS); unsigned NullVal = TM.getNullPointerValue(DestAS); - auto SegmentNull = MIRBuilder.buildConstant(DstTy, NullVal); - auto FlatNull = MIRBuilder.buildConstant(SrcTy, 0); + auto SegmentNull = B.buildConstant(DstTy, NullVal); + auto FlatNull = B.buildConstant(SrcTy, 0); Register PtrLo32 = MRI.createGenericVirtualRegister(DstTy); // Extract low 32-bits of the pointer. - MIRBuilder.buildExtract(PtrLo32, Src, 0); + B.buildExtract(PtrLo32, Src, 0); Register CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1)); - MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNull.getReg(0)); - MIRBuilder.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0)); + B.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNull.getReg(0)); + B.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0)); MI.eraseFromParent(); return true; } - assert(SrcAS == AMDGPUAS::LOCAL_ADDRESS || - SrcAS == AMDGPUAS::PRIVATE_ADDRESS); + if (SrcAS != AMDGPUAS::LOCAL_ADDRESS && SrcAS != AMDGPUAS::PRIVATE_ADDRESS) + return false; + + if (!ST.hasFlatAddressSpace()) + return false; auto SegmentNull = - MIRBuilder.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS)); + B.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS)); auto FlatNull = - MIRBuilder.buildConstant(DstTy, TM.getNullPointerValue(DestAS)); + B.buildConstant(DstTy, TM.getNullPointerValue(DestAS)); - Register ApertureReg = getSegmentAperture(DestAS, MRI, MIRBuilder); + Register ApertureReg = getSegmentAperture(SrcAS, MRI, B); + if (!ApertureReg.isValid()) + return false; Register CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1)); - MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNull.getReg(0)); + B.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNull.getReg(0)); Register BuildPtr = MRI.createGenericVirtualRegister(DstTy); // Coerce the type of the low half of the result so we can use merge_values. - Register SrcAsInt = MRI.createGenericVirtualRegister(LLT::scalar(32)); - MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT) + Register SrcAsInt = MRI.createGenericVirtualRegister(S32); + B.buildInstr(TargetOpcode::G_PTRTOINT) .addDef(SrcAsInt) .addUse(Src); // TODO: Should we allow mismatched types but matching sizes in merges to // avoid the ptrtoint? - MIRBuilder.buildMerge(BuildPtr, {SrcAsInt, ApertureReg}); - MIRBuilder.buildSelect(Dst, CmpRes, BuildPtr, FlatNull.getReg(0)); + B.buildMerge(BuildPtr, {SrcAsInt, ApertureReg}); + B.buildSelect(Dst, CmpRes, BuildPtr, FlatNull.getReg(0)); MI.eraseFromParent(); return true; @@ -956,8 +1305,8 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( bool AMDGPULegalizerInfo::legalizeFrint( MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const { - MIRBuilder.setInstr(MI); + MachineIRBuilder &B) const { + B.setInstr(MI); Register Src = MI.getOperand(1).getReg(); LLT Ty = MRI.getType(Src); @@ -966,18 +1315,18 @@ bool AMDGPULegalizerInfo::legalizeFrint( APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52"); APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51"); - auto C1 = MIRBuilder.buildFConstant(Ty, C1Val); - auto CopySign = MIRBuilder.buildFCopysign(Ty, C1, Src); + auto C1 = B.buildFConstant(Ty, C1Val); + auto CopySign = B.buildFCopysign(Ty, C1, Src); // TODO: Should this propagate fast-math-flags? - auto Tmp1 = MIRBuilder.buildFAdd(Ty, Src, CopySign); - auto Tmp2 = MIRBuilder.buildFSub(Ty, Tmp1, CopySign); + auto Tmp1 = B.buildFAdd(Ty, Src, CopySign); + auto Tmp2 = B.buildFSub(Ty, Tmp1, CopySign); - auto C2 = MIRBuilder.buildFConstant(Ty, C2Val); - auto Fabs = MIRBuilder.buildFAbs(Ty, Src); + auto C2 = B.buildFConstant(Ty, C2Val); + auto Fabs = B.buildFAbs(Ty, Src); - auto Cond = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), Fabs, C2); - MIRBuilder.buildSelect(MI.getOperand(0).getReg(), Cond, Src, Tmp2); + auto Cond = B.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), Fabs, C2); + B.buildSelect(MI.getOperand(0).getReg(), Cond, Src, Tmp2); return true; } @@ -1124,7 +1473,7 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum( MachineIRBuilder HelperBuilder(MI); GISelObserverWrapper DummyObserver; LegalizerHelper Helper(MF, DummyObserver, HelperBuilder); - HelperBuilder.setMBB(*MI.getParent()); + HelperBuilder.setInstr(MI); return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized; } @@ -1187,6 +1536,194 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt( return true; } +bool AMDGPULegalizerInfo::legalizeSinCos( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + B.setInstr(MI); + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(DstReg); + unsigned Flags = MI.getFlags(); + + Register TrigVal; + auto OneOver2Pi = B.buildFConstant(Ty, 0.5 / M_PI); + if (ST.hasTrigReducedRange()) { + auto MulVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags); + TrigVal = B.buildIntrinsic(Intrinsic::amdgcn_fract, {Ty}, false) + .addUse(MulVal.getReg(0)) + .setMIFlags(Flags).getReg(0); + } else + TrigVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags).getReg(0); + + Intrinsic::ID TrigIntrin = MI.getOpcode() == AMDGPU::G_FSIN ? + Intrinsic::amdgcn_sin : Intrinsic::amdgcn_cos; + B.buildIntrinsic(TrigIntrin, makeArrayRef(DstReg), false) + .addUse(TrigVal) + .setMIFlags(Flags); + MI.eraseFromParent(); + return true; +} + +bool AMDGPULegalizerInfo::buildPCRelGlobalAddress( + Register DstReg, LLT PtrTy, + MachineIRBuilder &B, const GlobalValue *GV, + unsigned Offset, unsigned GAFlags) const { + // In order to support pc-relative addressing, SI_PC_ADD_REL_OFFSET is lowered + // to the following code sequence: + // + // For constant address space: + // s_getpc_b64 s[0:1] + // s_add_u32 s0, s0, $symbol + // s_addc_u32 s1, s1, 0 + // + // s_getpc_b64 returns the address of the s_add_u32 instruction and then + // a fixup or relocation is emitted to replace $symbol with a literal + // constant, which is a pc-relative offset from the encoding of the $symbol + // operand to the global variable. + // + // For global address space: + // s_getpc_b64 s[0:1] + // s_add_u32 s0, s0, $symbol@{gotpc}rel32@lo + // s_addc_u32 s1, s1, $symbol@{gotpc}rel32@hi + // + // s_getpc_b64 returns the address of the s_add_u32 instruction and then + // fixups or relocations are emitted to replace $symbol@*@lo and + // $symbol@*@hi with lower 32 bits and higher 32 bits of a literal constant, + // which is a 64-bit pc-relative offset from the encoding of the $symbol + // operand to the global variable. + // + // What we want here is an offset from the value returned by s_getpc + // (which is the address of the s_add_u32 instruction) to the global + // variable, but since the encoding of $symbol starts 4 bytes after the start + // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too + // small. This requires us to add 4 to the global variable offset in order to + // compute the correct address. + + LLT ConstPtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); + + Register PCReg = PtrTy.getSizeInBits() != 32 ? DstReg : + B.getMRI()->createGenericVirtualRegister(ConstPtrTy); + + MachineInstrBuilder MIB = B.buildInstr(AMDGPU::SI_PC_ADD_REL_OFFSET) + .addDef(PCReg); + + MIB.addGlobalAddress(GV, Offset + 4, GAFlags); + if (GAFlags == SIInstrInfo::MO_NONE) + MIB.addImm(0); + else + MIB.addGlobalAddress(GV, Offset + 4, GAFlags + 1); + + B.getMRI()->setRegClass(PCReg, &AMDGPU::SReg_64RegClass); + + if (PtrTy.getSizeInBits() == 32) + B.buildExtract(DstReg, PCReg, 0); + return true; + } + +bool AMDGPULegalizerInfo::legalizeGlobalValue( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + Register DstReg = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(DstReg); + unsigned AS = Ty.getAddressSpace(); + + const GlobalValue *GV = MI.getOperand(1).getGlobal(); + MachineFunction &MF = B.getMF(); + SIMachineFunctionInfo *MFI = MF.getInfo(); + B.setInstr(MI); + + if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) { + if (!MFI->isEntryFunction()) { + const Function &Fn = MF.getFunction(); + DiagnosticInfoUnsupported BadLDSDecl( + Fn, "local memory global used by non-kernel function", MI.getDebugLoc()); + Fn.getContext().diagnose(BadLDSDecl); + } + + // TODO: We could emit code to handle the initialization somewhere. + if (!AMDGPUTargetLowering::hasDefinedInitializer(GV)) { + B.buildConstant(DstReg, MFI->allocateLDSGlobal(B.getDataLayout(), *GV)); + MI.eraseFromParent(); + return true; + } + + const Function &Fn = MF.getFunction(); + DiagnosticInfoUnsupported BadInit( + Fn, "unsupported initializer for address space", MI.getDebugLoc()); + Fn.getContext().diagnose(BadInit); + return true; + } + + const SITargetLowering *TLI = ST.getTargetLowering(); + + if (TLI->shouldEmitFixup(GV)) { + buildPCRelGlobalAddress(DstReg, Ty, B, GV, 0); + MI.eraseFromParent(); + return true; + } + + if (TLI->shouldEmitPCReloc(GV)) { + buildPCRelGlobalAddress(DstReg, Ty, B, GV, 0, SIInstrInfo::MO_REL32); + MI.eraseFromParent(); + return true; + } + + LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); + Register GOTAddr = MRI.createGenericVirtualRegister(PtrTy); + + MachineMemOperand *GOTMMO = MF.getMachineMemOperand( + MachinePointerInfo::getGOT(MF), + MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant, + 8 /*Size*/, 8 /*Align*/); + + buildPCRelGlobalAddress(GOTAddr, PtrTy, B, GV, 0, SIInstrInfo::MO_GOTPCREL32); + + if (Ty.getSizeInBits() == 32) { + // Truncate if this is a 32-bit constant adrdess. + auto Load = B.buildLoad(PtrTy, GOTAddr, *GOTMMO); + B.buildExtract(DstReg, Load, 0); + } else + B.buildLoad(DstReg, GOTAddr, *GOTMMO); + + MI.eraseFromParent(); + return true; +} + +bool AMDGPULegalizerInfo::legalizeLoad( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, GISelChangeObserver &Observer) const { + B.setInstr(MI); + LLT ConstPtr = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); + auto Cast = B.buildAddrSpaceCast(ConstPtr, MI.getOperand(1).getReg()); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(Cast.getReg(0)); + Observer.changedInstr(MI); + return true; +} + +bool AMDGPULegalizerInfo::legalizeFMad( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + assert(Ty.isScalar()); + + // TODO: Always legal with future ftz flag. + if (Ty == LLT::scalar(32) && !ST.hasFP32Denormals()) + return true; + if (Ty == LLT::scalar(16) && !ST.hasFP16Denormals()) + return true; + + MachineFunction &MF = B.getMF(); + + MachineIRBuilder HelperBuilder(MI); + GISelObserverWrapper DummyObserver; + LegalizerHelper Helper(MF, DummyObserver, HelperBuilder); + HelperBuilder.setMBB(*MI.getParent()); + return Helper.lowerFMad(MI) == LegalizerHelper::Legalized; +} + // Return the use branch instruction, otherwise null if the usage is invalid. static MachineInstr *verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI) { @@ -1212,10 +1749,9 @@ Register AMDGPULegalizerInfo::getLiveInRegister(MachineRegisterInfo &MRI, bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B, const ArgDescriptor *Arg) const { - if (!Arg->isRegister()) + if (!Arg->isRegister() || !Arg->getRegister().isValid()) return false; // TODO: Handle these - assert(Arg->getRegister() != 0); assert(Arg->getRegister().isPhysical()); MachineRegisterInfo &MRI = *B.getMRI(); @@ -1229,19 +1765,30 @@ bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B, const unsigned Mask = Arg->getMask(); const unsigned Shift = countTrailingZeros(Mask); - auto ShiftAmt = B.buildConstant(S32, Shift); - auto LShr = B.buildLShr(S32, LiveIn, ShiftAmt); - B.buildAnd(DstReg, LShr, B.buildConstant(S32, Mask >> Shift)); + Register AndMaskSrc = LiveIn; + + if (Shift != 0) { + auto ShiftAmt = B.buildConstant(S32, Shift); + AndMaskSrc = B.buildLShr(S32, LiveIn, ShiftAmt).getReg(0); + } + + B.buildAnd(DstReg, AndMaskSrc, B.buildConstant(S32, Mask >> Shift)); } else B.buildCopy(DstReg, LiveIn); // Insert the argument copy if it doens't already exist. // FIXME: It seems EmitLiveInCopies isn't called anywhere? if (!MRI.getVRegDef(LiveIn)) { + // FIXME: Should have scoped insert pt + MachineBasicBlock &OrigInsBB = B.getMBB(); + auto OrigInsPt = B.getInsertPt(); + MachineBasicBlock &EntryMBB = B.getMF().front(); EntryMBB.addLiveIn(Arg->getRegister()); B.setInsertPt(EntryMBB, EntryMBB.begin()); B.buildCopy(LiveIn, Arg->getRegister()); + + B.setInsertPt(OrigInsBB, OrigInsPt); } return true; @@ -1272,6 +1819,113 @@ bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin( return false; } +bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + B.setInstr(MI); + + if (legalizeFastUnsafeFDIV(MI, MRI, B)) + return true; + + return false; +} + +bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + + uint16_t Flags = MI.getFlags(); + + LLT ResTy = MRI.getType(Res); + LLT S32 = LLT::scalar(32); + LLT S64 = LLT::scalar(64); + + const MachineFunction &MF = B.getMF(); + bool Unsafe = + MF.getTarget().Options.UnsafeFPMath || MI.getFlag(MachineInstr::FmArcp); + + if (!MF.getTarget().Options.UnsafeFPMath && ResTy == S64) + return false; + + if (!Unsafe && ResTy == S32 && ST.hasFP32Denormals()) + return false; + + if (auto CLHS = getConstantFPVRegVal(LHS, MRI)) { + // 1 / x -> RCP(x) + if (CLHS->isExactlyValue(1.0)) { + B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false) + .addUse(RHS) + .setMIFlags(Flags); + + MI.eraseFromParent(); + return true; + } + + // -1 / x -> RCP( FNEG(x) ) + if (CLHS->isExactlyValue(-1.0)) { + auto FNeg = B.buildFNeg(ResTy, RHS, Flags); + B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false) + .addUse(FNeg.getReg(0)) + .setMIFlags(Flags); + + MI.eraseFromParent(); + return true; + } + } + + // x / y -> x * (1.0 / y) + if (Unsafe) { + auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false) + .addUse(RHS) + .setMIFlags(Flags); + B.buildFMul(Res, LHS, RCP, Flags); + + MI.eraseFromParent(); + return true; + } + + return false; +} + +bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + B.setInstr(MI); + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + uint16_t Flags = MI.getFlags(); + + LLT S32 = LLT::scalar(32); + LLT S1 = LLT::scalar(1); + + auto Abs = B.buildFAbs(S32, RHS, Flags); + const APFloat C0Val(1.0f); + + auto C0 = B.buildConstant(S32, 0x6f800000); + auto C1 = B.buildConstant(S32, 0x2f800000); + auto C2 = B.buildConstant(S32, FloatToBits(1.0f)); + + auto CmpRes = B.buildFCmp(CmpInst::FCMP_OGT, S1, Abs, C0, Flags); + auto Sel = B.buildSelect(S32, CmpRes, C1, C2, Flags); + + auto Mul0 = B.buildFMul(S32, RHS, Sel, Flags); + + auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false) + .addUse(Mul0.getReg(0)) + .setMIFlags(Flags); + + auto Mul1 = B.buildFMul(S32, LHS, RCP, Flags); + + B.buildFMul(Res, Sel, Mul1, Flags); + + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { @@ -1306,11 +1960,79 @@ bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI, return true; } +bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B, + unsigned AddrSpace) const { + B.setInstr(MI); + Register ApertureReg = getSegmentAperture(AddrSpace, MRI, B); + auto Hi32 = B.buildExtract(LLT::scalar(32), MI.getOperand(2).getReg(), 32); + B.buildICmp(ICmpInst::ICMP_EQ, MI.getOperand(0), Hi32, ApertureReg); + MI.eraseFromParent(); + return true; +} + +/// Handle register layout difference for f16 images for some subtargets. +Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B, + MachineRegisterInfo &MRI, + Register Reg) const { + if (!ST.hasUnpackedD16VMem()) + return Reg; + + const LLT S16 = LLT::scalar(16); + const LLT S32 = LLT::scalar(32); + LLT StoreVT = MRI.getType(Reg); + assert(StoreVT.isVector() && StoreVT.getElementType() == S16); + + auto Unmerge = B.buildUnmerge(S16, Reg); + + SmallVector WideRegs; + for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) + WideRegs.push_back(B.buildAnyExt(S32, Unmerge.getReg(I)).getReg(0)); + + int NumElts = StoreVT.getNumElements(); + + return B.buildBuildVector(LLT::vector(NumElts, S32), WideRegs).getReg(0); +} + +bool AMDGPULegalizerInfo::legalizeRawBufferStore(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B, + bool IsFormat) const { + // TODO: Reject f16 format on targets where unsupported. + Register VData = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(VData); + + B.setInstr(MI); + + const LLT S32 = LLT::scalar(32); + const LLT S16 = LLT::scalar(16); + + // Fixup illegal register types for i8 stores. + if (Ty == LLT::scalar(8) || Ty == S16) { + Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0); + MI.getOperand(1).setReg(AnyExt); + return true; + } + + if (Ty.isVector()) { + if (Ty.getElementType() == S16 && Ty.getNumElements() <= 4) { + if (IsFormat) + MI.getOperand(1).setReg(handleD16VData(B, MRI, VData)); + return true; + } + + return Ty.getElementType() == S32 && Ty.getNumElements() <= 4; + } + + return Ty == S32; +} + bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { // Replace the use G_BRCOND with the exec manipulate and branch pseudos. - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + switch (MI.getIntrinsicID()) { case Intrinsic::amdgcn_if: { if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) { const SIRegisterInfo *TRI @@ -1386,6 +2108,22 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, case Intrinsic::amdgcn_dispatch_id: return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::DISPATCH_ID); + case Intrinsic::amdgcn_fdiv_fast: + return legalizeFDIVFastIntrin(MI, MRI, B); + case Intrinsic::amdgcn_is_shared: + return legalizeIsAddrSpace(MI, MRI, B, AMDGPUAS::LOCAL_ADDRESS); + case Intrinsic::amdgcn_is_private: + return legalizeIsAddrSpace(MI, MRI, B, AMDGPUAS::PRIVATE_ADDRESS); + case Intrinsic::amdgcn_wavefrontsize: { + B.setInstr(MI); + B.buildConstant(MI.getOperand(0), ST.getWavefrontSize()); + MI.eraseFromParent(); + return true; + } + case Intrinsic::amdgcn_raw_buffer_store: + return legalizeRawBufferStore(MI, MRI, B, false); + case Intrinsic::amdgcn_raw_buffer_store_format: + return legalizeRawBufferStore(MI, MRI, B, true); default: return true; } diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 3f1cc1d265d..d0fba23a868 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -16,6 +16,7 @@ #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "AMDGPUArgumentUsageInfo.h" +#include "SIInstrInfo.h" namespace llvm { @@ -32,29 +33,44 @@ public: const GCNTargetMachine &TM); bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder, + MachineIRBuilder &B, GISelChangeObserver &Observer) const override; Register getSegmentAperture(unsigned AddrSpace, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const; + MachineIRBuilder &B) const; bool legalizeAddrSpaceCast(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const; + MachineIRBuilder &B) const; bool legalizeFrint(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const; + MachineIRBuilder &B) const; bool legalizeFceil(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const; + MachineIRBuilder &B) const; bool legalizeIntrinsicTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const; + MachineIRBuilder &B) const; bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder, bool Signed) const; + MachineIRBuilder &B, bool Signed) const; bool legalizeMinNumMaxNum(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const; + MachineIRBuilder &B) const; bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const; + MachineIRBuilder &B) const; bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const; + MachineIRBuilder &B) const; + bool legalizeSinCos(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + + bool buildPCRelGlobalAddress( + Register DstReg, LLT PtrTy, MachineIRBuilder &B, const GlobalValue *GV, + unsigned Offset, unsigned GAFlags = SIInstrInfo::MO_NONE) const; + + bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeLoad(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, + GISelChangeObserver &Observer) const; + + bool legalizeFMad(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; Register getLiveInRegister(MachineRegisterInfo &MRI, Register Reg, LLT Ty) const; @@ -65,10 +81,24 @@ public: MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const; + bool legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeFastUnsafeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; + bool legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, unsigned AddrSpace) const; + + Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, + Register Reg) const; + bool legalizeRawBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, bool IsFormat) const; bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const override; + MachineIRBuilder &B) const override; }; } // End llvm namespace. diff --git a/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/lib/Target/AMDGPU/AMDGPULibCalls.cpp index ce0a9db7c7f..2c94e004665 100644 --- a/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -48,18 +49,10 @@ static cl::list UseNative("amdgpu-use-native", cl::CommaSeparated, cl::ValueOptional, cl::Hidden); -#define MATH_PI 3.14159265358979323846264338327950288419716939937511 -#define MATH_E 2.71828182845904523536028747135266249775724709369996 -#define MATH_SQRT2 1.41421356237309504880168872420969807856967187537695 - -#define MATH_LOG2E 1.4426950408889634073599246810018921374266459541529859 -#define MATH_LOG10E 0.4342944819032518276511289189166050822943970058036665 -// Value of log2(10) -#define MATH_LOG2_10 3.3219280948873623478703194294893901758648313930245806 -// Value of 1 / log2(10) -#define MATH_RLOG2_10 0.3010299956639811952137388947244930267681898814621085 -// Value of 1 / M_LOG2E_F = 1 / log2(e) -#define MATH_RLOG2_E 0.6931471805599453094172321214581765680755001343602552 +#define MATH_PI numbers::pi +#define MATH_E numbers::e +#define MATH_SQRT2 numbers::sqrt2 +#define MATH_SQRT1_2 numbers::inv_sqrt2 namespace llvm { @@ -254,8 +247,8 @@ struct TableEntry { /* a list of {result, input} */ static const TableEntry tbl_acos[] = { - {MATH_PI/2.0, 0.0}, - {MATH_PI/2.0, -0.0}, + {MATH_PI / 2.0, 0.0}, + {MATH_PI / 2.0, -0.0}, {0.0, 1.0}, {MATH_PI, -1.0} }; @@ -271,8 +264,8 @@ static const TableEntry tbl_acospi[] = { static const TableEntry tbl_asin[] = { {0.0, 0.0}, {-0.0, -0.0}, - {MATH_PI/2.0, 1.0}, - {-MATH_PI/2.0, -1.0} + {MATH_PI / 2.0, 1.0}, + {-MATH_PI / 2.0, -1.0} }; static const TableEntry tbl_asinh[] = { {0.0, 0.0}, @@ -287,8 +280,8 @@ static const TableEntry tbl_asinpi[] = { static const TableEntry tbl_atan[] = { {0.0, 0.0}, {-0.0, -0.0}, - {MATH_PI/4.0, 1.0}, - {-MATH_PI/4.0, -1.0} + {MATH_PI / 4.0, 1.0}, + {-MATH_PI / 4.0, -1.0} }; static const TableEntry tbl_atanh[] = { {0.0, 0.0}, @@ -359,7 +352,7 @@ static const TableEntry tbl_log10[] = { }; static const TableEntry tbl_rsqrt[] = { {1.0, 1.0}, - {1.0/MATH_SQRT2, 2.0} + {MATH_SQRT1_2, 2.0} }; static const TableEntry tbl_sin[] = { {0.0, 0.0}, @@ -868,7 +861,7 @@ static double log2(double V) { #if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L return ::log2(V); #else - return log(V) / 0.693147180559945309417; + return log(V) / numbers::ln2; #endif } } @@ -1430,8 +1423,8 @@ AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B, B.SetInsertPoint(&*ItNew); AllocaInst *Alloc = B.CreateAlloca(RetType, 0, std::string(prefix) + UI->getName()); - Alloc->setAlignment(UCallee->getParent()->getDataLayout() - .getTypeAllocSize(RetType)); + Alloc->setAlignment(MaybeAlign( + UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType))); return Alloc; } diff --git a/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/lib/Target/AMDGPU/AMDGPULibFunc.cpp index a5bac25701a..e1ae496d9cb 100644 --- a/lib/Target/AMDGPU/AMDGPULibFunc.cpp +++ b/lib/Target/AMDGPU/AMDGPULibFunc.cpp @@ -55,7 +55,7 @@ enum EManglingParam { }; struct ManglingRule { - StringRef const Name; + const char *Name; unsigned char Lead[2]; unsigned char Param[5]; @@ -69,7 +69,7 @@ struct ManglingRule { // Information about library functions with unmangled names. class UnmangledFuncInfo { - StringRef const Name; + const char *Name; unsigned NumArgs; // Table for all lib functions with unmangled names. @@ -82,7 +82,7 @@ class UnmangledFuncInfo { public: using ID = AMDGPULibFunc::EFuncId; - UnmangledFuncInfo(StringRef _Name, unsigned _NumArgs) + constexpr UnmangledFuncInfo(const char *_Name, unsigned _NumArgs) : Name(_Name), NumArgs(_NumArgs) {} // Get index to Table by function name. static bool lookup(StringRef Name, ID &Id); @@ -133,8 +133,8 @@ unsigned ManglingRule::getNumArgs() const { // E_ANY - use prev lead type, E_CONSTPTR_ANY - make const pointer out of // prev lead type, etc. see ParamIterator::getNextParam() for details. -static const ManglingRule manglingRules[] = { -{ StringRef(), {0}, {0} }, +static constexpr ManglingRule manglingRules[] = { +{ "", {0}, {0} }, { "abs" , {1}, {E_ANY}}, { "abs_diff" , {1}, {E_ANY,E_COPY}}, { "acos" , {1}, {E_ANY}}, @@ -682,9 +682,9 @@ bool AMDGPULibFunc::parse(StringRef FuncName, AMDGPULibFunc &F) { } if (eatTerm(FuncName, "_Z")) - F.Impl = make_unique(); + F.Impl = std::make_unique(); else - F.Impl = make_unique(); + F.Impl = std::make_unique(); if (F.Impl->parseFuncName(FuncName)) return true; diff --git a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp index 5dd5b3691e0..e64542a395f 100644 --- a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp +++ b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp @@ -72,10 +72,10 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { BasicBlock &EntryBlock = *F.begin(); IRBuilder<> Builder(&*EntryBlock.begin()); - const unsigned KernArgBaseAlign = 16; // FIXME: Increase if necessary + const Align KernArgBaseAlign(16); // FIXME: Increase if necessary const uint64_t BaseOffset = ST.getExplicitKernelArgOffset(F); - unsigned MaxAlign; + Align MaxAlign; // FIXME: Alignment is broken broken with explicit arg offset.; const uint64_t TotalKernArgSize = ST.getKernArgSegmentSize(F, MaxAlign); if (TotalKernArgSize == 0) @@ -94,12 +94,12 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { for (Argument &Arg : F.args()) { Type *ArgTy = Arg.getType(); - unsigned Align = DL.getABITypeAlignment(ArgTy); + unsigned ABITypeAlign = DL.getABITypeAlignment(ArgTy); unsigned Size = DL.getTypeSizeInBits(ArgTy); unsigned AllocSize = DL.getTypeAllocSize(ArgTy); - uint64_t EltOffset = alignTo(ExplicitArgOffset, Align) + BaseOffset; - ExplicitArgOffset = alignTo(ExplicitArgOffset, Align) + AllocSize; + uint64_t EltOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + BaseOffset; + ExplicitArgOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + AllocSize; if (Arg.use_empty()) continue; @@ -128,8 +128,8 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { int64_t AlignDownOffset = alignDown(EltOffset, 4); int64_t OffsetDiff = EltOffset - AlignDownOffset; - unsigned AdjustedAlign = MinAlign(DoShiftOpt ? AlignDownOffset : EltOffset, - KernArgBaseAlign); + Align AdjustedAlign = commonAlignment( + KernArgBaseAlign, DoShiftOpt ? AlignDownOffset : EltOffset); Value *ArgPtr; Type *AdjustedArgTy; @@ -160,7 +160,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { ArgPtr = Builder.CreateBitCast(ArgPtr, AdjustedArgTy->getPointerTo(AS), ArgPtr->getName() + ".cast"); LoadInst *Load = - Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign); + Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign.value()); Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {})); MDBuilder MDB(Ctx); @@ -220,8 +220,8 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { } KernArgSegment->addAttribute( - AttributeList::ReturnIndex, - Attribute::getWithAlignment(Ctx, std::max(KernArgBaseAlign, MaxAlign))); + AttributeList::ReturnIndex, + Attribute::getWithAlignment(Ctx, std::max(KernArgBaseAlign, MaxAlign))); return true; } diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index ae4c32c258a..3760aed87a4 100644 --- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -211,6 +211,10 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { lowerOperand(MO, MCOp); OutMI.addOperand(MCOp); } + + int FIIdx = AMDGPU::getNamedOperandIdx(MCOpcode, AMDGPU::OpName::fi); + if (FIIdx >= (int)OutMI.getNumOperands()) + OutMI.addOperand(MCOperand::createImm(0)); } bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO, diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp index 23749095705..ba72f71f432 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp +++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp @@ -694,7 +694,7 @@ void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, const MachineRegisterInfo *MRI, const TargetRegisterInfo *TRI, PHILinearize &PHIInfo) { - if (TRI->isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI) << "\n"); // If this is a source register to a PHI we are chaining, it @@ -734,7 +734,7 @@ void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg, const MachineRegisterInfo *MRI, const TargetRegisterInfo *TRI, PHILinearize &PHIInfo) { - if (TRI->isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI) << "\n"); for (auto &UI : MRI->use_operands(Reg)) { @@ -949,7 +949,7 @@ void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister, (IncludeLoopPHI && IsLoopPHI); if (ShouldReplace) { - if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { + if (Register::isPhysicalRegister(NewRegister)) { LLVM_DEBUG(dbgs() << "Trying to substitute physical register: " << printReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n"); @@ -1016,13 +1016,15 @@ bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) { // before are no longer register kills. void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) { const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + (void)TRI; // It's used by LLVM_DEBUG. + for (auto MBBI : MBBs) { MachineBasicBlock *MBB = MBBI; for (auto &II : *MBB) { for (auto &RI : II.uses()) { if (RI.isReg()) { - unsigned Reg = RI.getReg(); - if (TRI->isVirtualRegister(Reg)) { + Register Reg = RI.getReg(); + if (Register::isVirtualRegister(Reg)) { if (hasNoDef(Reg, MRI)) continue; if (!MRI->hasOneDef(Reg)) { @@ -1402,7 +1404,7 @@ void AMDGPUMachineCFGStructurizer::storePHILinearizationInfoDest( unsigned AMDGPUMachineCFGStructurizer::storePHILinearizationInfo( MachineInstr &PHI, SmallVector *RegionIndices) { unsigned DestReg = getPHIDestReg(PHI); - unsigned LinearizeDestReg = + Register LinearizeDestReg = MRI->createVirtualRegister(MRI->getRegClass(DestReg)); PHIInfo.addDest(LinearizeDestReg, PHI.getDebugLoc()); storePHILinearizationInfoDest(LinearizeDestReg, PHI, RegionIndices); @@ -1890,7 +1892,7 @@ void AMDGPUMachineCFGStructurizer::ensureCondIsNotKilled( if (!Cond[0].isReg()) return; - unsigned CondReg = Cond[0].getReg(); + Register CondReg = Cond[0].getReg(); for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E; ++UI) { (*UI).setIsKill(false); } @@ -1929,8 +1931,8 @@ void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *Co BBSelectReg, TrueBB->getNumber()); } else { const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg); - unsigned TrueBBReg = MRI->createVirtualRegister(RegClass); - unsigned FalseBBReg = MRI->createVirtualRegister(RegClass); + Register TrueBBReg = MRI->createVirtualRegister(RegClass); + Register FalseBBReg = MRI->createVirtualRegister(RegClass); TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, TrueBBReg, TrueBB->getNumber()); TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, @@ -1996,7 +1998,7 @@ void AMDGPUMachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB, InnerRegion->replaceRegisterOutsideRegion(SourceReg, DestReg, false, MRI); } const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg); - unsigned NextDestReg = MRI->createVirtualRegister(RegClass); + Register NextDestReg = MRI->createVirtualRegister(RegClass); bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1; LLVM_DEBUG(dbgs() << "Insert Chained PHI\n"); insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg, @@ -2056,8 +2058,8 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB, // register, unless it is the outgoing BB select register. We have // already creaed phi nodes for these. const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); - unsigned PHIDestReg = MRI->createVirtualRegister(RegClass); - unsigned IfSourceReg = MRI->createVirtualRegister(RegClass); + Register PHIDestReg = MRI->createVirtualRegister(RegClass); + Register IfSourceReg = MRI->createVirtualRegister(RegClass); // Create initializer, this value is never used, but is needed // to satisfy SSA. LLVM_DEBUG(dbgs() << "Initializer for reg: " << printReg(Reg) << "\n"); @@ -2172,7 +2174,7 @@ void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegio MachineBasicBlock *PHIDefMBB = PHIDefInstr->getParent(); const TargetRegisterClass *RegClass = MRI->getRegClass(CurrentBackedgeReg); - unsigned NewBackedgeReg = MRI->createVirtualRegister(RegClass); + Register NewBackedgeReg = MRI->createVirtualRegister(RegClass); MachineInstrBuilder BackedgePHI = BuildMI(*PHIDefMBB, PHIDefMBB->instr_begin(), DL, TII->get(TargetOpcode::PHI), NewBackedgeReg); @@ -2230,7 +2232,7 @@ void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register, I != E;) { MachineOperand &O = *I; ++I; - if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { + if (Register::isPhysicalRegister(NewRegister)) { LLVM_DEBUG(dbgs() << "Trying to substitute physical register: " << printReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n"); @@ -2309,7 +2311,7 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( } else { // Handle internal block. const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectRegIn); - unsigned CodeBBSelectReg = MRI->createVirtualRegister(RegClass); + Register CodeBBSelectReg = MRI->createVirtualRegister(RegClass); rewriteCodeBBTerminator(CodeBB, MergeBB, CodeBBSelectReg); bool IsRegionEntryBB = CurrentRegion->getEntry() == CodeBB; MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeBB, CodeBB, CodeBB, @@ -2446,7 +2448,7 @@ void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI, } const TargetRegisterClass *RegClass = MRI->getRegClass(PHIDest); - unsigned NewDestReg = MRI->createVirtualRegister(RegClass); + Register NewDestReg = MRI->createVirtualRegister(RegClass); LRegion->replaceRegisterInsideRegion(PHIDest, NewDestReg, false, MRI); MachineInstrBuilder MIB = BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(), @@ -2734,9 +2736,9 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { } const DebugLoc &DL = NewSucc->findDebugLoc(NewSucc->getFirstNonPHI()); unsigned InReg = LRegion->getBBSelectRegIn(); - unsigned InnerSelectReg = + Register InnerSelectReg = MRI->createVirtualRegister(MRI->getRegClass(InReg)); - unsigned NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg)); + Register NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg)); TII->materializeImmediate(*(LRegion->getEntry()), LRegion->getEntry()->getFirstTerminator(), DL, NewInReg, Region->getEntry()->getNumber()); diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index 0d3a1f1a769..89ca702f577 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -17,7 +17,6 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : MachineFunctionInfo(), LocalMemoryObjects(), ExplicitKernArgSize(0), - MaxKernArgAlign(0), LDSSize(0), IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())), NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath), diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h index 52987e2fa41..9818ab1ef14 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -23,7 +23,7 @@ class AMDGPUMachineFunction : public MachineFunctionInfo { protected: uint64_t ExplicitKernArgSize; // Cache for this. - unsigned MaxKernArgAlign; // Cache for this. + Align MaxKernArgAlign; // Cache for this. /// Number of bytes in the LDS that are being used. unsigned LDSSize; @@ -47,9 +47,7 @@ public: return ExplicitKernArgSize; } - unsigned getMaxKernArgAlign() const { - return MaxKernArgAlign; - } + unsigned getMaxKernArgAlign() const { return MaxKernArgAlign.value(); } unsigned getLDSSize() const { return LDSSize; diff --git a/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp new file mode 100644 index 00000000000..5250bf455d7 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp @@ -0,0 +1,592 @@ +//=== AMDGPUPrintfRuntimeBinding.cpp - OpenCL printf implementation -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// \file +// +// The pass bind printfs to a kernel arg pointer that will be bound to a buffer +// later by the runtime. +// +// This pass traverses the functions in the module and converts +// each call to printf to a sequence of operations that +// store the following into the printf buffer: +// - format string (passed as a module's metadata unique ID) +// - bitwise copies of printf arguments +// The backend passes will need to store metadata in the kernel +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +using namespace llvm; + +#define DEBUG_TYPE "printfToRuntime" +#define DWORD_ALIGN 4 + +namespace { +class LLVM_LIBRARY_VISIBILITY AMDGPUPrintfRuntimeBinding final + : public ModulePass { + +public: + static char ID; + + explicit AMDGPUPrintfRuntimeBinding(); + +private: + bool runOnModule(Module &M) override; + void getConversionSpecifiers(SmallVectorImpl &OpConvSpecifiers, + StringRef fmt, size_t num_ops) const; + + bool shouldPrintAsStr(char Specifier, Type *OpType) const; + bool + lowerPrintfForGpu(Module &M, + function_ref GetTLI); + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + } + + Value *simplify(Instruction *I, const TargetLibraryInfo *TLI) { + return SimplifyInstruction(I, {*TD, TLI, DT}); + } + + const DataLayout *TD; + const DominatorTree *DT; + SmallVector Printfs; +}; +} // namespace + +char AMDGPUPrintfRuntimeBinding::ID = 0; + +INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding, + "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding", + "AMDGPU Printf lowering", false, false) + +char &llvm::AMDGPUPrintfRuntimeBindingID = AMDGPUPrintfRuntimeBinding::ID; + +namespace llvm { +ModulePass *createAMDGPUPrintfRuntimeBinding() { + return new AMDGPUPrintfRuntimeBinding(); +} +} // namespace llvm + +AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() + : ModulePass(ID), TD(nullptr), DT(nullptr) { + initializeAMDGPUPrintfRuntimeBindingPass(*PassRegistry::getPassRegistry()); +} + +void AMDGPUPrintfRuntimeBinding::getConversionSpecifiers( + SmallVectorImpl &OpConvSpecifiers, StringRef Fmt, + size_t NumOps) const { + // not all format characters are collected. + // At this time the format characters of interest + // are %p and %s, which use to know if we + // are either storing a literal string or a + // pointer to the printf buffer. + static const char ConvSpecifiers[] = "cdieEfgGaosuxXp"; + size_t CurFmtSpecifierIdx = 0; + size_t PrevFmtSpecifierIdx = 0; + + while ((CurFmtSpecifierIdx = Fmt.find_first_of( + ConvSpecifiers, CurFmtSpecifierIdx)) != StringRef::npos) { + bool ArgDump = false; + StringRef CurFmt = Fmt.substr(PrevFmtSpecifierIdx, + CurFmtSpecifierIdx - PrevFmtSpecifierIdx); + size_t pTag = CurFmt.find_last_of("%"); + if (pTag != StringRef::npos) { + ArgDump = true; + while (pTag && CurFmt[--pTag] == '%') { + ArgDump = !ArgDump; + } + } + + if (ArgDump) + OpConvSpecifiers.push_back(Fmt[CurFmtSpecifierIdx]); + + PrevFmtSpecifierIdx = ++CurFmtSpecifierIdx; + } +} + +bool AMDGPUPrintfRuntimeBinding::shouldPrintAsStr(char Specifier, + Type *OpType) const { + if (Specifier != 's') + return false; + const PointerType *PT = dyn_cast(OpType); + if (!PT || PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) + return false; + Type *ElemType = PT->getContainedType(0); + if (ElemType->getTypeID() != Type::IntegerTyID) + return false; + IntegerType *ElemIType = cast(ElemType); + return ElemIType->getBitWidth() == 8; +} + +bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu( + Module &M, function_ref GetTLI) { + LLVMContext &Ctx = M.getContext(); + IRBuilder<> Builder(Ctx); + Type *I32Ty = Type::getInt32Ty(Ctx); + unsigned UniqID = 0; + // NB: This is important for this string size to be divizable by 4 + const char NonLiteralStr[4] = "???"; + + for (auto CI : Printfs) { + unsigned NumOps = CI->getNumArgOperands(); + + SmallString<16> OpConvSpecifiers; + Value *Op = CI->getArgOperand(0); + + if (auto LI = dyn_cast(Op)) { + Op = LI->getPointerOperand(); + for (auto Use : Op->users()) { + if (auto SI = dyn_cast(Use)) { + Op = SI->getValueOperand(); + break; + } + } + } + + if (auto I = dyn_cast(Op)) { + Value *Op_simplified = simplify(I, &GetTLI(*I->getFunction())); + if (Op_simplified) + Op = Op_simplified; + } + + ConstantExpr *ConstExpr = dyn_cast(Op); + + if (ConstExpr) { + GlobalVariable *GVar = dyn_cast(ConstExpr->getOperand(0)); + + StringRef Str("unknown"); + if (GVar && GVar->hasInitializer()) { + auto Init = GVar->getInitializer(); + if (auto CA = dyn_cast(Init)) { + if (CA->isString()) + Str = CA->getAsCString(); + } else if (isa(Init)) { + Str = ""; + } + // + // we need this call to ascertain + // that we are printing a string + // or a pointer. It takes out the + // specifiers and fills up the first + // arg + getConversionSpecifiers(OpConvSpecifiers, Str, NumOps - 1); + } + // Add metadata for the string + std::string AStreamHolder; + raw_string_ostream Sizes(AStreamHolder); + int Sum = DWORD_ALIGN; + Sizes << CI->getNumArgOperands() - 1; + Sizes << ':'; + for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() && + ArgCount <= OpConvSpecifiers.size(); + ArgCount++) { + Value *Arg = CI->getArgOperand(ArgCount); + Type *ArgType = Arg->getType(); + unsigned ArgSize = TD->getTypeAllocSizeInBits(ArgType); + ArgSize = ArgSize / 8; + // + // ArgSize by design should be a multiple of DWORD_ALIGN, + // expand the arguments that do not follow this rule. + // + if (ArgSize % DWORD_ALIGN != 0) { + llvm::Type *ResType = llvm::Type::getInt32Ty(Ctx); + VectorType *LLVMVecType = llvm::dyn_cast(ArgType); + int NumElem = LLVMVecType ? LLVMVecType->getNumElements() : 1; + if (LLVMVecType && NumElem > 1) + ResType = llvm::VectorType::get(ResType, NumElem); + Builder.SetInsertPoint(CI); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + if (OpConvSpecifiers[ArgCount - 1] == 'x' || + OpConvSpecifiers[ArgCount - 1] == 'X' || + OpConvSpecifiers[ArgCount - 1] == 'u' || + OpConvSpecifiers[ArgCount - 1] == 'o') + Arg = Builder.CreateZExt(Arg, ResType); + else + Arg = Builder.CreateSExt(Arg, ResType); + ArgType = Arg->getType(); + ArgSize = TD->getTypeAllocSizeInBits(ArgType); + ArgSize = ArgSize / 8; + CI->setOperand(ArgCount, Arg); + } + if (OpConvSpecifiers[ArgCount - 1] == 'f') { + ConstantFP *FpCons = dyn_cast(Arg); + if (FpCons) + ArgSize = 4; + else { + FPExtInst *FpExt = dyn_cast(Arg); + if (FpExt && FpExt->getType()->isDoubleTy() && + FpExt->getOperand(0)->getType()->isFloatTy()) + ArgSize = 4; + } + } + if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) { + if (ConstantExpr *ConstExpr = dyn_cast(Arg)) { + GlobalVariable *GV = + dyn_cast(ConstExpr->getOperand(0)); + if (GV && GV->hasInitializer()) { + Constant *Init = GV->getInitializer(); + ConstantDataArray *CA = dyn_cast(Init); + if (Init->isZeroValue() || CA->isString()) { + size_t SizeStr = Init->isZeroValue() + ? 1 + : (strlen(CA->getAsCString().data()) + 1); + size_t Rem = SizeStr % DWORD_ALIGN; + size_t NSizeStr = 0; + LLVM_DEBUG(dbgs() << "Printf string original size = " << SizeStr + << '\n'); + if (Rem) { + NSizeStr = SizeStr + (DWORD_ALIGN - Rem); + } else { + NSizeStr = SizeStr; + } + ArgSize = NSizeStr; + } + } else { + ArgSize = sizeof(NonLiteralStr); + } + } else { + ArgSize = sizeof(NonLiteralStr); + } + } + LLVM_DEBUG(dbgs() << "Printf ArgSize (in buffer) = " << ArgSize + << " for type: " << *ArgType << '\n'); + Sizes << ArgSize << ':'; + Sum += ArgSize; + } + LLVM_DEBUG(dbgs() << "Printf format string in source = " << Str.str() + << '\n'); + for (size_t I = 0; I < Str.size(); ++I) { + // Rest of the C escape sequences (e.g. \') are handled correctly + // by the MDParser + switch (Str[I]) { + case '\a': + Sizes << "\\a"; + break; + case '\b': + Sizes << "\\b"; + break; + case '\f': + Sizes << "\\f"; + break; + case '\n': + Sizes << "\\n"; + break; + case '\r': + Sizes << "\\r"; + break; + case '\v': + Sizes << "\\v"; + break; + case ':': + // ':' cannot be scanned by Flex, as it is defined as a delimiter + // Replace it with it's octal representation \72 + Sizes << "\\72"; + break; + default: + Sizes << Str[I]; + break; + } + } + + // Insert the printf_alloc call + Builder.SetInsertPoint(CI); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + AttributeList Attr = AttributeList::get(Ctx, AttributeList::FunctionIndex, + Attribute::NoUnwind); + + Type *SizetTy = Type::getInt32Ty(Ctx); + + Type *Tys_alloc[1] = {SizetTy}; + Type *I8Ptr = PointerType::get(Type::getInt8Ty(Ctx), 1); + FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false); + FunctionCallee PrintfAllocFn = + M.getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr); + + LLVM_DEBUG(dbgs() << "Printf metadata = " << Sizes.str() << '\n'); + std::string fmtstr = itostr(++UniqID) + ":" + Sizes.str().c_str(); + MDString *fmtStrArray = MDString::get(Ctx, fmtstr); + + // Instead of creating global variables, the + // printf format strings are extracted + // and passed as metadata. This avoids + // polluting llvm's symbol tables in this module. + // Metadata is going to be extracted + // by the backend passes and inserted + // into the OpenCL binary as appropriate. + StringRef amd("llvm.printf.fmts"); + NamedMDNode *metaD = M.getOrInsertNamedMetadata(amd); + MDNode *myMD = MDNode::get(Ctx, fmtStrArray); + metaD->addOperand(myMD); + Value *sumC = ConstantInt::get(SizetTy, Sum, false); + SmallVector alloc_args; + alloc_args.push_back(sumC); + CallInst *pcall = + CallInst::Create(PrintfAllocFn, alloc_args, "printf_alloc_fn", CI); + + // + // Insert code to split basicblock with a + // piece of hammock code. + // basicblock splits after buffer overflow check + // + ConstantPointerNull *zeroIntPtr = + ConstantPointerNull::get(PointerType::get(Type::getInt8Ty(Ctx), 1)); + ICmpInst *cmp = + dyn_cast(Builder.CreateICmpNE(pcall, zeroIntPtr, "")); + if (!CI->use_empty()) { + Value *result = + Builder.CreateSExt(Builder.CreateNot(cmp), I32Ty, "printf_res"); + CI->replaceAllUsesWith(result); + } + SplitBlock(CI->getParent(), cmp); + Instruction *Brnch = + SplitBlockAndInsertIfThen(cmp, cmp->getNextNode(), false); + + Builder.SetInsertPoint(Brnch); + + // store unique printf id in the buffer + // + SmallVector ZeroIdxList; + ConstantInt *zeroInt = + ConstantInt::get(Ctx, APInt(32, StringRef("0"), 10)); + ZeroIdxList.push_back(zeroInt); + + GetElementPtrInst *BufferIdx = + dyn_cast(GetElementPtrInst::Create( + nullptr, pcall, ZeroIdxList, "PrintBuffID", Brnch)); + + Type *idPointer = PointerType::get(I32Ty, AMDGPUAS::GLOBAL_ADDRESS); + Value *id_gep_cast = + new BitCastInst(BufferIdx, idPointer, "PrintBuffIdCast", Brnch); + + StoreInst *stbuff = + new StoreInst(ConstantInt::get(I32Ty, UniqID), id_gep_cast); + stbuff->insertBefore(Brnch); // to Remove unused variable warning + + SmallVector FourthIdxList; + ConstantInt *fourInt = + ConstantInt::get(Ctx, APInt(32, StringRef("4"), 10)); + + FourthIdxList.push_back(fourInt); // 1st 4 bytes hold the printf_id + // the following GEP is the buffer pointer + BufferIdx = cast(GetElementPtrInst::Create( + nullptr, pcall, FourthIdxList, "PrintBuffGep", Brnch)); + + Type *Int32Ty = Type::getInt32Ty(Ctx); + Type *Int64Ty = Type::getInt64Ty(Ctx); + for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() && + ArgCount <= OpConvSpecifiers.size(); + ArgCount++) { + Value *Arg = CI->getArgOperand(ArgCount); + Type *ArgType = Arg->getType(); + SmallVector WhatToStore; + if (ArgType->isFPOrFPVectorTy() && + (ArgType->getTypeID() != Type::VectorTyID)) { + Type *IType = (ArgType->isFloatTy()) ? Int32Ty : Int64Ty; + if (OpConvSpecifiers[ArgCount - 1] == 'f') { + ConstantFP *fpCons = dyn_cast(Arg); + if (fpCons) { + APFloat Val(fpCons->getValueAPF()); + bool Lost = false; + Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, + &Lost); + Arg = ConstantFP::get(Ctx, Val); + IType = Int32Ty; + } else { + FPExtInst *FpExt = dyn_cast(Arg); + if (FpExt && FpExt->getType()->isDoubleTy() && + FpExt->getOperand(0)->getType()->isFloatTy()) { + Arg = FpExt->getOperand(0); + IType = Int32Ty; + } + } + } + Arg = new BitCastInst(Arg, IType, "PrintArgFP", Brnch); + WhatToStore.push_back(Arg); + } else if (ArgType->getTypeID() == Type::PointerTyID) { + if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) { + const char *S = NonLiteralStr; + if (ConstantExpr *ConstExpr = dyn_cast(Arg)) { + GlobalVariable *GV = + dyn_cast(ConstExpr->getOperand(0)); + if (GV && GV->hasInitializer()) { + Constant *Init = GV->getInitializer(); + ConstantDataArray *CA = dyn_cast(Init); + if (Init->isZeroValue() || CA->isString()) { + S = Init->isZeroValue() ? "" : CA->getAsCString().data(); + } + } + } + size_t SizeStr = strlen(S) + 1; + size_t Rem = SizeStr % DWORD_ALIGN; + size_t NSizeStr = 0; + if (Rem) { + NSizeStr = SizeStr + (DWORD_ALIGN - Rem); + } else { + NSizeStr = SizeStr; + } + if (S[0]) { + char *MyNewStr = new char[NSizeStr](); + strcpy(MyNewStr, S); + int NumInts = NSizeStr / 4; + int CharC = 0; + while (NumInts) { + int ANum = *(int *)(MyNewStr + CharC); + CharC += 4; + NumInts--; + Value *ANumV = ConstantInt::get(Int32Ty, ANum, false); + WhatToStore.push_back(ANumV); + } + delete[] MyNewStr; + } else { + // Empty string, give a hint to RT it is no NULL + Value *ANumV = ConstantInt::get(Int32Ty, 0xFFFFFF00, false); + WhatToStore.push_back(ANumV); + } + } else { + uint64_t Size = TD->getTypeAllocSizeInBits(ArgType); + assert((Size == 32 || Size == 64) && "unsupported size"); + Type *DstType = (Size == 32) ? Int32Ty : Int64Ty; + Arg = new PtrToIntInst(Arg, DstType, "PrintArgPtr", Brnch); + WhatToStore.push_back(Arg); + } + } else if (ArgType->getTypeID() == Type::VectorTyID) { + Type *IType = NULL; + uint32_t EleCount = cast(ArgType)->getNumElements(); + uint32_t EleSize = ArgType->getScalarSizeInBits(); + uint32_t TotalSize = EleCount * EleSize; + if (EleCount == 3) { + IntegerType *Int32Ty = Type::getInt32Ty(ArgType->getContext()); + Constant *Indices[4] = { + ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 1), + ConstantInt::get(Int32Ty, 2), ConstantInt::get(Int32Ty, 2)}; + Constant *Mask = ConstantVector::get(Indices); + ShuffleVectorInst *Shuffle = new ShuffleVectorInst(Arg, Arg, Mask); + Shuffle->insertBefore(Brnch); + Arg = Shuffle; + ArgType = Arg->getType(); + TotalSize += EleSize; + } + switch (EleSize) { + default: + EleCount = TotalSize / 64; + IType = dyn_cast(Type::getInt64Ty(ArgType->getContext())); + break; + case 8: + if (EleCount >= 8) { + EleCount = TotalSize / 64; + IType = dyn_cast(Type::getInt64Ty(ArgType->getContext())); + } else if (EleCount >= 3) { + EleCount = 1; + IType = dyn_cast(Type::getInt32Ty(ArgType->getContext())); + } else { + EleCount = 1; + IType = dyn_cast(Type::getInt16Ty(ArgType->getContext())); + } + break; + case 16: + if (EleCount >= 3) { + EleCount = TotalSize / 64; + IType = dyn_cast(Type::getInt64Ty(ArgType->getContext())); + } else { + EleCount = 1; + IType = dyn_cast(Type::getInt32Ty(ArgType->getContext())); + } + break; + } + if (EleCount > 1) { + IType = dyn_cast(VectorType::get(IType, EleCount)); + } + Arg = new BitCastInst(Arg, IType, "PrintArgVect", Brnch); + WhatToStore.push_back(Arg); + } else { + WhatToStore.push_back(Arg); + } + for (unsigned I = 0, E = WhatToStore.size(); I != E; ++I) { + Value *TheBtCast = WhatToStore[I]; + unsigned ArgSize = + TD->getTypeAllocSizeInBits(TheBtCast->getType()) / 8; + SmallVector BuffOffset; + BuffOffset.push_back(ConstantInt::get(I32Ty, ArgSize)); + + Type *ArgPointer = PointerType::get(TheBtCast->getType(), 1); + Value *CastedGEP = + new BitCastInst(BufferIdx, ArgPointer, "PrintBuffPtrCast", Brnch); + StoreInst *StBuff = new StoreInst(TheBtCast, CastedGEP, Brnch); + LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n" + << *StBuff << '\n'); + (void)StBuff; + if (I + 1 == E && ArgCount + 1 == CI->getNumArgOperands()) + break; + BufferIdx = dyn_cast(GetElementPtrInst::Create( + nullptr, BufferIdx, BuffOffset, "PrintBuffNextPtr", Brnch)); + LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n" + << *BufferIdx << '\n'); + } + } + } + } + + // erase the printf calls + for (auto CI : Printfs) + CI->eraseFromParent(); + + Printfs.clear(); + return true; +} + +bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) { + Triple TT(M.getTargetTriple()); + if (TT.getArch() == Triple::r600) + return false; + + auto PrintfFunction = M.getFunction("printf"); + if (!PrintfFunction) + return false; + + for (auto &U : PrintfFunction->uses()) { + if (auto *CI = dyn_cast(U.getUser())) { + if (CI->isCallee(&U)) + Printfs.push_back(CI); + } + } + + if (Printfs.empty()) + return false; + + TD = &M.getDataLayout(); + auto DTWP = getAnalysisIfAvailable(); + DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; + + return lowerPrintfForGpu(M, GetTLI); +} diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index e4c9d6685d4..3e9dcca114a 100644 --- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -801,7 +801,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - GV->setAlignment(I.getAlignment()); + GV->setAlignment(MaybeAlign(I.getAlignment())); Value *TCntY, *TCntZ; diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 815cbc5e26e..4d78188b3dc 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -17,9 +17,9 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -33,6 +33,7 @@ #include "AMDGPUGenRegisterBankInfo.def" using namespace llvm; +using namespace MIPatternMatch; namespace { @@ -84,9 +85,11 @@ public: }; } -AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI) +AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST) : AMDGPUGenRegisterBankInfo(), - TRI(static_cast(&TRI)) { + Subtarget(ST), + TRI(Subtarget.getRegisterInfo()), + TII(Subtarget.getInstrInfo()) { // HACK: Until this is fully tablegen'd. static bool AlreadyInit = false; @@ -163,11 +166,10 @@ unsigned AMDGPURegisterBankInfo::getBreakDownCost( const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass( const TargetRegisterClass &RC) const { + if (&RC == &AMDGPU::SReg_1RegClass) + return AMDGPU::VCCRegBank; - if (TRI->isSGPRClass(&RC)) - return getRegBank(AMDGPU::SGPRRegBankID); - - return getRegBank(AMDGPU::VGPRRegBankID); + return TRI->isSGPRClass(&RC) ? AMDGPU::SGPRRegBank : AMDGPU::VGPRRegBank; } template @@ -192,7 +194,8 @@ AMDGPURegisterBankInfo::addMappingFromTable( Operands[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI); } - unsigned MappingID = 0; + // getInstrMapping's default mapping uses ID 1, so start at 2. + unsigned MappingID = 2; for (const auto &Entry : Table) { for (unsigned I = 0; I < NumOps; ++I) { int OpIdx = RegSrcOpIdx[I]; @@ -210,7 +213,7 @@ AMDGPURegisterBankInfo::addMappingFromTable( RegisterBankInfo::InstructionMappings AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsic( const MachineInstr &MI, const MachineRegisterInfo &MRI) const { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + switch (MI.getIntrinsicID()) { case Intrinsic::amdgcn_readlane: { static const OpRegBankEntry<3> Table[2] = { // Perfectly legal. @@ -251,7 +254,7 @@ RegisterBankInfo::InstructionMappings AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects( const MachineInstr &MI, const MachineRegisterInfo &MRI) const { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + switch (MI.getIntrinsicID()) { case Intrinsic::amdgcn_buffer_load: { static const OpRegBankEntry<3> Table[4] = { // Perfectly legal. @@ -303,6 +306,7 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects( } case Intrinsic::amdgcn_s_sendmsg: case Intrinsic::amdgcn_s_sendmsghalt: { + // FIXME: Should have no register for immediate static const OpRegBankEntry<1> Table[2] = { // Perfectly legal. { { AMDGPU::SGPRRegBankID }, 1 }, @@ -319,12 +323,15 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects( } } -static bool isInstrUniform(const MachineInstr &MI) { +// FIXME: Returns uniform if there's no source value information. This is +// probably wrong. +static bool isInstrUniformNonExtLoadAlign4(const MachineInstr &MI) { if (!MI.hasOneMemOperand()) return false; const MachineMemOperand *MMO = *MI.memoperands_begin(); - return AMDGPUInstrInfo::isUniformMMO(MMO); + return MMO->getSize() >= 4 && MMO->getAlignment() >= 4 && + AMDGPUInstrInfo::isUniformMMO(MMO); } RegisterBankInfo::InstructionMappings @@ -337,6 +344,31 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings( InstructionMappings AltMappings; switch (MI.getOpcode()) { + case TargetOpcode::G_CONSTANT: { + unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); + if (Size == 1) { + static const OpRegBankEntry<1> Table[4] = { + { { AMDGPU::VGPRRegBankID }, 1 }, + { { AMDGPU::SGPRRegBankID }, 1 }, + { { AMDGPU::VCCRegBankID }, 1 }, + { { AMDGPU::SCCRegBankID }, 1 } + }; + + return addMappingFromTable<1>(MI, MRI, {{ 0 }}, Table); + } + + LLVM_FALLTHROUGH; + } + case TargetOpcode::G_FCONSTANT: + case TargetOpcode::G_FRAME_INDEX: + case TargetOpcode::G_GLOBAL_VALUE: { + static const OpRegBankEntry<1> Table[2] = { + { { AMDGPU::VGPRRegBankID }, 1 }, + { { AMDGPU::SGPRRegBankID }, 1 } + }; + + return addMappingFromTable<1>(MI, MRI, {{ 0 }}, Table); + } case TargetOpcode::G_AND: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: { @@ -408,23 +440,29 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings( AltMappings.push_back(&VSMapping); break; } - case TargetOpcode::G_LOAD: { + case TargetOpcode::G_LOAD: + case TargetOpcode::G_ZEXTLOAD: + case TargetOpcode::G_SEXTLOAD: { unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); + LLT PtrTy = MRI.getType(MI.getOperand(1).getReg()); + unsigned PtrSize = PtrTy.getSizeInBits(); + unsigned AS = PtrTy.getAddressSpace(); LLT LoadTy = MRI.getType(MI.getOperand(0).getReg()); - // FIXME: Should we be hard coding the size for these mappings? - if (isInstrUniform(MI)) { + if ((AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS && + AS != AMDGPUAS::PRIVATE_ADDRESS) && + isInstrUniformNonExtLoadAlign4(MI)) { const InstructionMapping &SSMapping = getInstructionMapping( 1, 1, getOperandsMapping( {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), - AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize)}), 2); // Num Operands AltMappings.push_back(&SSMapping); } const InstructionMapping &VVMapping = getInstructionMapping( 2, 1, getOperandsMapping( - {AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy), - AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}), + {AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy), + AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}), 2); // Num Operands AltMappings.push_back(&VVMapping); @@ -620,57 +658,53 @@ static LLT getHalfSizedType(LLT Ty) { /// /// There is additional complexity to try for compare values to identify the /// unique values used. -void AMDGPURegisterBankInfo::executeInWaterfallLoop( - MachineInstr &MI, MachineRegisterInfo &MRI, - ArrayRef OpIndices) const { - MachineFunction *MF = MI.getParent()->getParent(); - const GCNSubtarget &ST = MF->getSubtarget(); - const SIInstrInfo *TII = ST.getInstrInfo(); - MachineBasicBlock::iterator I(MI); - - MachineBasicBlock &MBB = *MI.getParent(); - const DebugLoc &DL = MI.getDebugLoc(); - - // Use a set to avoid extra readfirstlanes in the case where multiple operands - // are the same register. - SmallSet SGPROperandRegs; - for (unsigned Op : OpIndices) { - assert(MI.getOperand(Op).isUse()); - Register Reg = MI.getOperand(Op).getReg(); - const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI); - if (OpBank->getID() == AMDGPU::VGPRRegBankID) - SGPROperandRegs.insert(Reg); - } - - // No operands need to be replaced, so no need to loop. - if (SGPROperandRegs.empty()) - return; - - MachineIRBuilder B(MI); +bool AMDGPURegisterBankInfo::executeInWaterfallLoop( + MachineIRBuilder &B, + iterator_range Range, + SmallSet &SGPROperandRegs, + MachineRegisterInfo &MRI) const { SmallVector ResultRegs; SmallVector InitResultRegs; SmallVector PhiRegs; - for (MachineOperand &Def : MI.defs()) { - LLT ResTy = MRI.getType(Def.getReg()); - const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI); - ResultRegs.push_back(Def.getReg()); - Register InitReg = B.buildUndef(ResTy).getReg(0); - Register PhiReg = MRI.createGenericVirtualRegister(ResTy); - InitResultRegs.push_back(InitReg); - PhiRegs.push_back(PhiReg); - MRI.setRegBank(PhiReg, *DefBank); - MRI.setRegBank(InitReg, *DefBank); + + MachineBasicBlock &MBB = B.getMBB(); + MachineFunction *MF = &B.getMF(); + + const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass(); + const unsigned WaveAndOpc = Subtarget.isWave32() ? + AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; + const unsigned MovTermOpc = Subtarget.isWave32() ? + AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term; + const unsigned XorTermOpc = Subtarget.isWave32() ? + AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term; + const unsigned AndSaveExecOpc = Subtarget.isWave32() ? + AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64; + const unsigned ExecReg = Subtarget.isWave32() ? + AMDGPU::EXEC_LO : AMDGPU::EXEC; + + for (MachineInstr &MI : Range) { + for (MachineOperand &Def : MI.defs()) { + LLT ResTy = MRI.getType(Def.getReg()); + const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI); + ResultRegs.push_back(Def.getReg()); + Register InitReg = B.buildUndef(ResTy).getReg(0); + Register PhiReg = MRI.createGenericVirtualRegister(ResTy); + InitResultRegs.push_back(InitReg); + PhiRegs.push_back(PhiReg); + MRI.setRegBank(PhiReg, *DefBank); + MRI.setRegBank(InitReg, *DefBank); + } } - Register SaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); - Register InitSaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + Register SaveExecReg = MRI.createVirtualRegister(WaveRC); + Register InitSaveExecReg = MRI.createVirtualRegister(WaveRC); // Don't bother using generic instructions/registers for the exec mask. B.buildInstr(TargetOpcode::IMPLICIT_DEF) .addDef(InitSaveExecReg); - Register PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - Register NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + Register PhiExec = MRI.createVirtualRegister(WaveRC); + Register NewExec = MRI.createVirtualRegister(WaveRC); // To insert the loop we need to split the block. Move everything before this // point to a new block, and insert a new empty block before this instruction. @@ -688,7 +722,7 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop( // Move the rest of the block into a new block. RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB); - RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end()); + RemainderBB->splice(RemainderBB->begin(), &MBB, Range.end(), MBB.end()); MBB.addSuccessor(LoopBB); RestoreExecBB->addSuccessor(RemainderBB); @@ -711,164 +745,173 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop( .addMBB(LoopBB); } - // Move the instruction into the loop. - LoopBB->splice(LoopBB->end(), &MBB, I); - I = std::prev(LoopBB->end()); + const DebugLoc &DL = B.getDL(); - B.setInstr(*I); + // Figure out the iterator range after splicing the instructions. + auto NewBegin = std::prev(LoopBB->end()); + + // Move the instruction into the loop. Note we moved everything after + // Range.end() already into a new block, so Range.end() is no longer valid. + LoopBB->splice(LoopBB->end(), &MBB, Range.begin(), MBB.end()); + + auto NewEnd = LoopBB->end(); + + MachineBasicBlock::iterator I = Range.begin(); + B.setInsertPt(*LoopBB, I); Register CondReg; - for (MachineOperand &Op : MI.uses()) { - if (!Op.isReg()) - continue; + for (MachineInstr &MI : make_range(NewBegin, NewEnd)) { + for (MachineOperand &Op : MI.uses()) { + if (!Op.isReg() || Op.isDef()) + continue; - assert(!Op.isDef()); - if (SGPROperandRegs.count(Op.getReg())) { - LLT OpTy = MRI.getType(Op.getReg()); - unsigned OpSize = OpTy.getSizeInBits(); + if (SGPROperandRegs.count(Op.getReg())) { + LLT OpTy = MRI.getType(Op.getReg()); + unsigned OpSize = OpTy.getSizeInBits(); - // Can only do a readlane of 32-bit pieces. - if (OpSize == 32) { - // Avoid extra copies in the simple case of one 32-bit register. - Register CurrentLaneOpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - MRI.setType(CurrentLaneOpReg, OpTy); + // Can only do a readlane of 32-bit pieces. + if (OpSize == 32) { + // Avoid extra copies in the simple case of one 32-bit register. + Register CurrentLaneOpReg + = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + MRI.setType(CurrentLaneOpReg, OpTy); - constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI); - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentLaneOpReg) - .addReg(Op.getReg()); + constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI); + // Read the next variant <- also loop target. + BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), + CurrentLaneOpReg) + .addReg(Op.getReg()); - Register NewCondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - bool First = CondReg == AMDGPU::NoRegister; - if (First) - CondReg = NewCondReg; - - // Compare the just read M0 value to all possible Idx values. - B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64) - .addDef(NewCondReg) - .addReg(CurrentLaneOpReg) - .addReg(Op.getReg()); - Op.setReg(CurrentLaneOpReg); - - if (!First) { - Register AndReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); - - // If there are multiple operands to consider, and the conditions. - B.buildInstr(AMDGPU::S_AND_B64) - .addDef(AndReg) - .addReg(NewCondReg) - .addReg(CondReg); - CondReg = AndReg; - } - } else { - LLT S32 = LLT::scalar(32); - SmallVector ReadlanePieces; - - // The compares can be done as 64-bit, but the extract needs to be done - // in 32-bit pieces. - - bool Is64 = OpSize % 64 == 0; - - LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32); - unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64 - : AMDGPU::V_CMP_EQ_U32_e64; - - // The compares can be done as 64-bit, but the extract needs to be done - // in 32-bit pieces. - - // Insert the unmerge before the loop. - - B.setMBB(MBB); - auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg()); - B.setInstr(*I); - - unsigned NumPieces = Unmerge->getNumOperands() - 1; - for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) { - unsigned UnmergePiece = Unmerge.getReg(PieceIdx); - - Register CurrentLaneOpReg; - if (Is64) { - Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32); - Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32); - - MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass); - MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass); - MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass); - - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - CurrentLaneOpRegLo) - .addReg(UnmergePiece, 0, AMDGPU::sub0); - - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - CurrentLaneOpRegHi) - .addReg(UnmergePiece, 0, AMDGPU::sub1); - - CurrentLaneOpReg = - B.buildMerge(LLT::scalar(64), - {CurrentLaneOpRegLo, CurrentLaneOpRegHi}) - .getReg(0); - - MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass); - - if (OpTy.getScalarSizeInBits() == 64) { - // If we need to produce a 64-bit element vector, so use the - // merged pieces - ReadlanePieces.push_back(CurrentLaneOpReg); - } else { - // 32-bit element type. - ReadlanePieces.push_back(CurrentLaneOpRegLo); - ReadlanePieces.push_back(CurrentLaneOpRegHi); - } - } else { - CurrentLaneOpReg = MRI.createGenericVirtualRegister(LLT::scalar(32)); - MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass); - MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass); - - // Read the next variant <- also loop target. - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - CurrentLaneOpReg) - .addReg(UnmergePiece); - ReadlanePieces.push_back(CurrentLaneOpReg); - } - - Register NewCondReg - = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + Register NewCondReg = MRI.createVirtualRegister(WaveRC); bool First = CondReg == AMDGPU::NoRegister; if (First) CondReg = NewCondReg; - B.buildInstr(CmpOp) + // Compare the just read M0 value to all possible Idx values. + B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64) .addDef(NewCondReg) .addReg(CurrentLaneOpReg) - .addReg(UnmergePiece); + .addReg(Op.getReg()); + Op.setReg(CurrentLaneOpReg); if (!First) { - Register AndReg - = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + Register AndReg = MRI.createVirtualRegister(WaveRC); // If there are multiple operands to consider, and the conditions. - B.buildInstr(AMDGPU::S_AND_B64) + B.buildInstr(WaveAndOpc) .addDef(AndReg) .addReg(NewCondReg) .addReg(CondReg); CondReg = AndReg; } - } - - // FIXME: Build merge seems to switch to CONCAT_VECTORS but not - // BUILD_VECTOR - if (OpTy.isVector()) { - auto Merge = B.buildBuildVector(OpTy, ReadlanePieces); - Op.setReg(Merge.getReg(0)); } else { - auto Merge = B.buildMerge(OpTy, ReadlanePieces); - Op.setReg(Merge.getReg(0)); - } + LLT S32 = LLT::scalar(32); + SmallVector ReadlanePieces; - MRI.setRegBank(Op.getReg(), getRegBank(AMDGPU::SGPRRegBankID)); + // The compares can be done as 64-bit, but the extract needs to be done + // in 32-bit pieces. + + bool Is64 = OpSize % 64 == 0; + + LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32); + unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64 + : AMDGPU::V_CMP_EQ_U32_e64; + + // The compares can be done as 64-bit, but the extract needs to be done + // in 32-bit pieces. + + // Insert the unmerge before the loop. + + B.setMBB(MBB); + auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg()); + B.setInstr(*I); + + unsigned NumPieces = Unmerge->getNumOperands() - 1; + for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) { + Register UnmergePiece = Unmerge.getReg(PieceIdx); + + Register CurrentLaneOpReg; + if (Is64) { + Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32); + Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32); + + MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass); + MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass); + MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass); + + // Read the next variant <- also loop target. + BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), + CurrentLaneOpRegLo) + .addReg(UnmergePiece, 0, AMDGPU::sub0); + + // Read the next variant <- also loop target. + BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), + CurrentLaneOpRegHi) + .addReg(UnmergePiece, 0, AMDGPU::sub1); + + CurrentLaneOpReg = + B.buildMerge(LLT::scalar(64), + {CurrentLaneOpRegLo, CurrentLaneOpRegHi}) + .getReg(0); + + MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass); + + if (OpTy.getScalarSizeInBits() == 64) { + // If we need to produce a 64-bit element vector, so use the + // merged pieces + ReadlanePieces.push_back(CurrentLaneOpReg); + } else { + // 32-bit element type. + ReadlanePieces.push_back(CurrentLaneOpRegLo); + ReadlanePieces.push_back(CurrentLaneOpRegHi); + } + } else { + CurrentLaneOpReg = MRI.createGenericVirtualRegister(S32); + MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass); + MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass); + + // Read the next variant <- also loop target. + BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), + CurrentLaneOpReg) + .addReg(UnmergePiece); + ReadlanePieces.push_back(CurrentLaneOpReg); + } + + Register NewCondReg = MRI.createVirtualRegister(WaveRC); + bool First = CondReg == AMDGPU::NoRegister; + if (First) + CondReg = NewCondReg; + + B.buildInstr(CmpOp) + .addDef(NewCondReg) + .addReg(CurrentLaneOpReg) + .addReg(UnmergePiece); + + if (!First) { + Register AndReg = MRI.createVirtualRegister(WaveRC); + + // If there are multiple operands to consider, and the conditions. + B.buildInstr(WaveAndOpc) + .addDef(AndReg) + .addReg(NewCondReg) + .addReg(CondReg); + CondReg = AndReg; + } + } + + // FIXME: Build merge seems to switch to CONCAT_VECTORS but not + // BUILD_VECTOR + if (OpTy.isVector()) { + auto Merge = B.buildBuildVector(OpTy, ReadlanePieces); + Op.setReg(Merge.getReg(0)); + } else { + auto Merge = B.buildMerge(OpTy, ReadlanePieces); + Op.setReg(Merge.getReg(0)); + } + + MRI.setRegBank(Op.getReg(), getRegBank(AMDGPU::SGPRRegBankID)); + } } } } @@ -876,16 +919,16 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop( B.setInsertPt(*LoopBB, LoopBB->end()); // Update EXEC, save the original EXEC value to VCC. - B.buildInstr(AMDGPU::S_AND_SAVEEXEC_B64) + B.buildInstr(AndSaveExecOpc) .addDef(NewExec) .addReg(CondReg, RegState::Kill); MRI.setSimpleHint(NewExec, CondReg); // Update EXEC, switch all done bits to 0 and all todo bits to 1. - B.buildInstr(AMDGPU::S_XOR_B64_term) - .addDef(AMDGPU::EXEC) - .addReg(AMDGPU::EXEC) + B.buildInstr(XorTermOpc) + .addDef(ExecReg) + .addReg(ExecReg) .addReg(NewExec); // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use @@ -896,14 +939,60 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop( .addMBB(LoopBB); // Save the EXEC mask before the loop. - BuildMI(MBB, MBB.end(), DL, TII->get(AMDGPU::S_MOV_B64_term), SaveExecReg) - .addReg(AMDGPU::EXEC); + BuildMI(MBB, MBB.end(), DL, TII->get(MovTermOpc), SaveExecReg) + .addReg(ExecReg); // Restore the EXEC mask after the loop. B.setMBB(*RestoreExecBB); - B.buildInstr(AMDGPU::S_MOV_B64_term) - .addDef(AMDGPU::EXEC) + B.buildInstr(MovTermOpc) + .addDef(ExecReg) .addReg(SaveExecReg); + + // Restore the insert point before the original instruction. + B.setInsertPt(MBB, MBB.end()); + + return true; +} + +// Return any unique registers used by \p MI at \p OpIndices that need to be +// handled in a waterfall loop. Returns these registers in \p +// SGPROperandRegs. Returns true if there are any operansd to handle and a +// waterfall loop is necessary. +bool AMDGPURegisterBankInfo::collectWaterfallOperands( + SmallSet &SGPROperandRegs, MachineInstr &MI, + MachineRegisterInfo &MRI, ArrayRef OpIndices) const { + for (unsigned Op : OpIndices) { + assert(MI.getOperand(Op).isUse()); + Register Reg = MI.getOperand(Op).getReg(); + const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI); + if (OpBank->getID() == AMDGPU::VGPRRegBankID) + SGPROperandRegs.insert(Reg); + } + + // No operands need to be replaced, so no need to loop. + return !SGPROperandRegs.empty(); +} + +bool AMDGPURegisterBankInfo::executeInWaterfallLoop( + MachineIRBuilder &B, MachineInstr &MI, MachineRegisterInfo &MRI, + ArrayRef OpIndices) const { + // Use a set to avoid extra readfirstlanes in the case where multiple operands + // are the same register. + SmallSet SGPROperandRegs; + + if (!collectWaterfallOperands(SGPROperandRegs, MI, MRI, OpIndices)) + return false; + + MachineBasicBlock::iterator I = MI.getIterator(); + return executeInWaterfallLoop(B, make_range(I, std::next(I)), + SGPROperandRegs, MRI); +} + +bool AMDGPURegisterBankInfo::executeInWaterfallLoop( + MachineInstr &MI, MachineRegisterInfo &MRI, + ArrayRef OpIndices) const { + MachineIRBuilder B(MI); + return executeInWaterfallLoop(B, MI, MRI, OpIndices); } // Legalize an operand that must be an SGPR by inserting a readfirstlane. @@ -960,8 +1049,13 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI, SmallVector SrcRegs(OpdMapper.getVRegs(1)); // If the pointer is an SGPR, we have nothing to do. - if (SrcRegs.empty()) - return false; + if (SrcRegs.empty()) { + Register PtrReg = MI.getOperand(1).getReg(); + const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI); + if (PtrBank == &AMDGPU::SGPRRegBank) + return false; + SrcRegs.push_back(PtrReg); + } assert(LoadSize % MaxNonSmrdLoadSize == 0); @@ -1013,6 +1107,33 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI, return true; } +bool AMDGPURegisterBankInfo::applyMappingImage( + MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, + MachineRegisterInfo &MRI, int RsrcIdx) const { + const int NumDefs = MI.getNumExplicitDefs(); + + // The reported argument index is relative to the IR intrinsic call arguments, + // so we need to shift by the number of defs and the intrinsic ID. + RsrcIdx += NumDefs + 1; + + // Insert copies to VGPR arguments. + applyDefaultMapping(OpdMapper); + + // Fixup any SGPR arguments. + SmallVector SGPRIndexes; + for (int I = NumDefs, NumOps = MI.getNumOperands(); I != NumOps; ++I) { + if (!MI.getOperand(I).isReg()) + continue; + + // If this intrinsic has a sampler, it immediately follows rsrc. + if (I == RsrcIdx || I == RsrcIdx + 1) + SGPRIndexes.push_back(I); + } + + executeInWaterfallLoop(MI, MRI, SGPRIndexes); + return true; +} + // For cases where only a single copy is inserted for matching register banks. // Replace the register in the instruction operand static void substituteSimpleCopyRegs( @@ -1024,6 +1145,184 @@ static void substituteSimpleCopyRegs( } } +/// Handle register layout difference for f16 images for some subtargets. +Register AMDGPURegisterBankInfo::handleD16VData(MachineIRBuilder &B, + MachineRegisterInfo &MRI, + Register Reg) const { + if (!Subtarget.hasUnpackedD16VMem()) + return Reg; + + const LLT S16 = LLT::scalar(16); + LLT StoreVT = MRI.getType(Reg); + if (!StoreVT.isVector() || StoreVT.getElementType() != S16) + return Reg; + + auto Unmerge = B.buildUnmerge(S16, Reg); + + + SmallVector WideRegs; + for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) + WideRegs.push_back(Unmerge.getReg(I)); + + const LLT S32 = LLT::scalar(32); + int NumElts = StoreVT.getNumElements(); + + return B.buildMerge(LLT::vector(NumElts, S32), WideRegs).getReg(0); +} + +static std::pair +getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) { + int64_t Const; + if (mi_match(Reg, MRI, m_ICst(Const))) + return std::make_pair(Register(), Const); + + Register Base; + if (mi_match(Reg, MRI, m_GAdd(m_Reg(Base), m_ICst(Const)))) + return std::make_pair(Base, Const); + + // TODO: Handle G_OR used for add case + return std::make_pair(Reg, 0); +} + +std::pair +AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B, + Register OrigOffset) const { + const unsigned MaxImm = 4095; + Register BaseReg; + unsigned ImmOffset; + const LLT S32 = LLT::scalar(32); + + std::tie(BaseReg, ImmOffset) = getBaseWithConstantOffset(*B.getMRI(), + OrigOffset); + + unsigned C1 = 0; + if (ImmOffset != 0) { + // If the immediate value is too big for the immoffset field, put the value + // and -4096 into the immoffset field so that the value that is copied/added + // for the voffset field is a multiple of 4096, and it stands more chance + // of being CSEd with the copy/add for another similar load/store. + // However, do not do that rounding down to a multiple of 4096 if that is a + // negative number, as it appears to be illegal to have a negative offset + // in the vgpr, even if adding the immediate offset makes it positive. + unsigned Overflow = ImmOffset & ~MaxImm; + ImmOffset -= Overflow; + if ((int32_t)Overflow < 0) { + Overflow += ImmOffset; + ImmOffset = 0; + } + + C1 = ImmOffset; + if (Overflow != 0) { + if (!BaseReg) + BaseReg = B.buildConstant(S32, Overflow).getReg(0); + else { + auto OverflowVal = B.buildConstant(S32, Overflow); + BaseReg = B.buildAdd(S32, BaseReg, OverflowVal).getReg(0); + } + } + } + + if (!BaseReg) + BaseReg = B.buildConstant(S32, 0).getReg(0); + + return {BaseReg, C1}; +} + +static bool isZero(Register Reg, MachineRegisterInfo &MRI) { + int64_t C; + return mi_match(Reg, MRI, m_ICst(C)) && C == 0; +} + +static unsigned extractGLC(unsigned CachePolicy) { + return CachePolicy & 1; +} + +static unsigned extractSLC(unsigned CachePolicy) { + return (CachePolicy >> 1) & 1; +} + +static unsigned extractDLC(unsigned CachePolicy) { + return (CachePolicy >> 2) & 1; +} + +MachineInstr * +AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B, + MachineInstr &MI) const { + MachineRegisterInfo &MRI = *B.getMRI(); + executeInWaterfallLoop(B, MI, MRI, {2, 4}); + + // FIXME: DAG lowering brokenly changes opcode based on FP vs. integer. + + Register VData = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(VData); + + int EltSize = Ty.getScalarSizeInBits(); + int Size = Ty.getSizeInBits(); + + // FIXME: Broken integer truncstore. + if (EltSize != 32) + report_fatal_error("unhandled intrinsic store"); + + // FIXME: Verifier should enforce 1 MMO for these intrinsics. + const int MemSize = (*MI.memoperands_begin())->getSize(); + + + Register RSrc = MI.getOperand(2).getReg(); + Register VOffset = MI.getOperand(3).getReg(); + Register SOffset = MI.getOperand(4).getReg(); + unsigned CachePolicy = MI.getOperand(5).getImm(); + + unsigned ImmOffset; + std::tie(VOffset, ImmOffset) = splitBufferOffsets(B, VOffset); + + const bool Offen = !isZero(VOffset, MRI); + + unsigned Opc = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact; + switch (8 * MemSize) { + case 8: + Opc = Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact : + AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact; + break; + case 16: + Opc = Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact : + AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact; + break; + default: + Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact : + AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact; + if (Size > 32) + Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32); + break; + } + + + // Set the insertion point back to the instruction in case it was moved into a + // loop. + B.setInstr(MI); + + MachineInstrBuilder MIB = B.buildInstr(Opc) + .addUse(VData); + + if (Offen) + MIB.addUse(VOffset); + + MIB.addUse(RSrc) + .addUse(SOffset) + .addImm(ImmOffset) + .addImm(extractGLC(CachePolicy)) + .addImm(extractSLC(CachePolicy)) + .addImm(0) // tfe: FIXME: Remove from inst + .addImm(extractDLC(CachePolicy)) + .cloneMemRefs(MI); + + // FIXME: We need a way to report failure from applyMappingImpl. + // Insert constrain copies before inserting the loop. + if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this)) + report_fatal_error("failed to constrain selected store intrinsic"); + + return MIB; +} + void AMDGPURegisterBankInfo::applyMappingImpl( const OperandsMapper &OpdMapper) const { MachineInstr &MI = OpdMapper.getMI(); @@ -1289,12 +1588,202 @@ void AMDGPURegisterBankInfo::applyMappingImpl( MI.eraseFromParent(); return; } - case AMDGPU::G_EXTRACT_VECTOR_ELT: - applyDefaultMapping(OpdMapper); - executeInWaterfallLoop(MI, MRI, { 2 }); + case AMDGPU::G_BUILD_VECTOR: + case AMDGPU::G_BUILD_VECTOR_TRUNC: { + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + if (DstTy != LLT::vector(2, 16)) + break; + + assert(MI.getNumOperands() == 3 && OpdMapper.getVRegs(0).empty()); + substituteSimpleCopyRegs(OpdMapper, 1); + substituteSimpleCopyRegs(OpdMapper, 2); + + const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI); + if (DstBank == &AMDGPU::SGPRRegBank) + break; // Can use S_PACK_* instructions. + + MachineIRBuilder B(MI); + + Register Lo = MI.getOperand(1).getReg(); + Register Hi = MI.getOperand(2).getReg(); + const LLT S32 = LLT::scalar(32); + + const RegisterBank *BankLo = getRegBank(Lo, MRI, *TRI); + const RegisterBank *BankHi = getRegBank(Hi, MRI, *TRI); + + Register ZextLo; + Register ShiftHi; + + if (Opc == AMDGPU::G_BUILD_VECTOR) { + ZextLo = B.buildZExt(S32, Lo).getReg(0); + MRI.setRegBank(ZextLo, *BankLo); + + Register ZextHi = B.buildZExt(S32, Hi).getReg(0); + MRI.setRegBank(ZextHi, *BankHi); + + auto ShiftAmt = B.buildConstant(S32, 16); + MRI.setRegBank(ShiftAmt.getReg(0), *BankHi); + + ShiftHi = B.buildShl(S32, ZextHi, ShiftAmt).getReg(0); + MRI.setRegBank(ShiftHi, *BankHi); + } else { + Register MaskLo = B.buildConstant(S32, 0xffff).getReg(0); + MRI.setRegBank(MaskLo, *BankLo); + + auto ShiftAmt = B.buildConstant(S32, 16); + MRI.setRegBank(ShiftAmt.getReg(0), *BankHi); + + ShiftHi = B.buildShl(S32, Hi, ShiftAmt).getReg(0); + MRI.setRegBank(ShiftHi, *BankHi); + + ZextLo = B.buildAnd(S32, Lo, MaskLo).getReg(0); + MRI.setRegBank(ZextLo, *BankLo); + } + + auto Or = B.buildOr(S32, ZextLo, ShiftHi); + MRI.setRegBank(Or.getReg(0), *DstBank); + + B.buildBitcast(DstReg, Or); + MI.eraseFromParent(); return; + } + case AMDGPU::G_EXTRACT_VECTOR_ELT: { + SmallVector DstRegs(OpdMapper.getVRegs(0)); + + assert(OpdMapper.getVRegs(1).empty() && OpdMapper.getVRegs(2).empty()); + + if (DstRegs.empty()) { + applyDefaultMapping(OpdMapper); + executeInWaterfallLoop(MI, MRI, { 2 }); + return; + } + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register IdxReg = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(DstReg); + (void)DstTy; + + assert(DstTy.getSizeInBits() == 64); + + LLT SrcTy = MRI.getType(SrcReg); + const LLT S32 = LLT::scalar(32); + LLT Vec32 = LLT::vector(2 * SrcTy.getNumElements(), 32); + + MachineIRBuilder B(MI); + auto CastSrc = B.buildBitcast(Vec32, SrcReg); + auto One = B.buildConstant(S32, 1); + + // Split the vector index into 32-bit pieces. Prepare to move all of the + // new instructions into a waterfall loop if necessary. + // + // Don't put the bitcast or constant in the loop. + MachineInstrSpan Span(MachineBasicBlock::iterator(&MI), &B.getMBB()); + + // Compute 32-bit element indices, (2 * OrigIdx, 2 * OrigIdx + 1). + auto IdxLo = B.buildShl(S32, IdxReg, One); + auto IdxHi = B.buildAdd(S32, IdxLo, One); + B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo); + B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi); + + const ValueMapping &DstMapping + = OpdMapper.getInstrMapping().getOperandMapping(0); + + // FIXME: Should be getting from mapping or not? + const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI); + MRI.setRegBank(DstReg, *DstMapping.BreakDown[0].RegBank); + MRI.setRegBank(CastSrc.getReg(0), *SrcBank); + MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank); + MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank); + MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank); + + SmallSet OpsToWaterfall; + if (!collectWaterfallOperands(OpsToWaterfall, MI, MRI, { 2 })) { + MI.eraseFromParent(); + return; + } + + // Remove the original instruction to avoid potentially confusing the + // waterfall loop logic. + B.setInstr(*Span.begin()); + MI.eraseFromParent(); + executeInWaterfallLoop(B, make_range(Span.begin(), Span.end()), + OpsToWaterfall, MRI); + return; + } + case AMDGPU::G_INSERT_VECTOR_ELT: { + SmallVector InsRegs(OpdMapper.getVRegs(2)); + + assert(OpdMapper.getVRegs(0).empty()); + assert(OpdMapper.getVRegs(1).empty()); + assert(OpdMapper.getVRegs(3).empty()); + + if (InsRegs.empty()) { + applyDefaultMapping(OpdMapper); + executeInWaterfallLoop(MI, MRI, { 3 }); + return; + } + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register InsReg = MI.getOperand(2).getReg(); + Register IdxReg = MI.getOperand(3).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + LLT InsTy = MRI.getType(InsReg); + (void)InsTy; + + assert(InsTy.getSizeInBits() == 64); + + const LLT S32 = LLT::scalar(32); + LLT Vec32 = LLT::vector(2 * SrcTy.getNumElements(), 32); + + MachineIRBuilder B(MI); + auto CastSrc = B.buildBitcast(Vec32, SrcReg); + auto One = B.buildConstant(S32, 1); + + // Split the vector index into 32-bit pieces. Prepare to move all of the + // new instructions into a waterfall loop if necessary. + // + // Don't put the bitcast or constant in the loop. + MachineInstrSpan Span(MachineBasicBlock::iterator(&MI), &B.getMBB()); + + // Compute 32-bit element indices, (2 * OrigIdx, 2 * OrigIdx + 1). + auto IdxLo = B.buildShl(S32, IdxReg, One); + auto IdxHi = B.buildAdd(S32, IdxLo, One); + + auto InsLo = B.buildInsertVectorElement(Vec32, CastSrc, InsRegs[0], IdxLo); + auto InsHi = B.buildInsertVectorElement(Vec32, InsLo, InsRegs[1], IdxHi); + B.buildBitcast(DstReg, InsHi); + + const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI); + const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI); + const RegisterBank *InsSrcBank = getRegBank(InsReg, MRI, *TRI); + + MRI.setRegBank(InsReg, *InsSrcBank); + MRI.setRegBank(CastSrc.getReg(0), *SrcBank); + MRI.setRegBank(InsLo.getReg(0), *DstBank); + MRI.setRegBank(InsHi.getReg(0), *DstBank); + MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank); + MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank); + MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank); + + + SmallSet OpsToWaterfall; + if (!collectWaterfallOperands(OpsToWaterfall, MI, MRI, { 3 })) { + MI.eraseFromParent(); + return; + } + + B.setInstr(*Span.begin()); + MI.eraseFromParent(); + + executeInWaterfallLoop(B, make_range(Span.begin(), Span.end()), + OpsToWaterfall, MRI); + return; + } case AMDGPU::G_INTRINSIC: { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + switch (MI.getIntrinsicID()) { case Intrinsic::amdgcn_s_buffer_load: { // FIXME: Move to G_INTRINSIC_W_SIDE_EFFECTS executeInWaterfallLoop(MI, MRI, { 2, 3 }); @@ -1303,8 +1792,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case Intrinsic::amdgcn_readlane: { substituteSimpleCopyRegs(OpdMapper, 2); - assert(empty(OpdMapper.getVRegs(0))); - assert(empty(OpdMapper.getVRegs(3))); + assert(OpdMapper.getVRegs(0).empty()); + assert(OpdMapper.getVRegs(3).empty()); // Make sure the index is an SGPR. It doesn't make sense to run this in a // waterfall loop, so assume it's a uniform value. @@ -1312,9 +1801,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl( return; } case Intrinsic::amdgcn_writelane: { - assert(empty(OpdMapper.getVRegs(0))); - assert(empty(OpdMapper.getVRegs(2))); - assert(empty(OpdMapper.getVRegs(3))); + assert(OpdMapper.getVRegs(0).empty()); + assert(OpdMapper.getVRegs(2).empty()); + assert(OpdMapper.getVRegs(3).empty()); substituteSimpleCopyRegs(OpdMapper, 4); // VGPR input val constrainOpWithReadfirstlane(MI, MRI, 2); // Source value @@ -1327,7 +1816,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl( break; } case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + auto IntrID = MI.getIntrinsicID(); + switch (IntrID) { case Intrinsic::amdgcn_buffer_load: { executeInWaterfallLoop(MI, MRI, { 2 }); return; @@ -1335,23 +1825,70 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case Intrinsic::amdgcn_ds_ordered_add: case Intrinsic::amdgcn_ds_ordered_swap: { // This is only allowed to execute with 1 lane, so readfirstlane is safe. - assert(empty(OpdMapper.getVRegs(0))); + assert(OpdMapper.getVRegs(0).empty()); substituteSimpleCopyRegs(OpdMapper, 3); constrainOpWithReadfirstlane(MI, MRI, 2); // M0 return; } + case Intrinsic::amdgcn_ds_gws_init: + case Intrinsic::amdgcn_ds_gws_barrier: + case Intrinsic::amdgcn_ds_gws_sema_br: { + // Only the first lane is executes, so readfirstlane is safe. + substituteSimpleCopyRegs(OpdMapper, 1); + constrainOpWithReadfirstlane(MI, MRI, 2); // M0 + return; + } + case Intrinsic::amdgcn_ds_gws_sema_v: + case Intrinsic::amdgcn_ds_gws_sema_p: + case Intrinsic::amdgcn_ds_gws_sema_release_all: { + // Only the first lane is executes, so readfirstlane is safe. + constrainOpWithReadfirstlane(MI, MRI, 1); // M0 + return; + } case Intrinsic::amdgcn_s_sendmsg: case Intrinsic::amdgcn_s_sendmsghalt: { // FIXME: Should this use a waterfall loop? constrainOpWithReadfirstlane(MI, MRI, 2); // M0 return; } - default: + case Intrinsic::amdgcn_raw_buffer_load: + case Intrinsic::amdgcn_raw_buffer_load_format: + case Intrinsic::amdgcn_raw_tbuffer_load: + case Intrinsic::amdgcn_raw_buffer_store: + case Intrinsic::amdgcn_raw_buffer_store_format: + case Intrinsic::amdgcn_raw_tbuffer_store: { + applyDefaultMapping(OpdMapper); + executeInWaterfallLoop(MI, MRI, {2, 4}); + return; + } + case Intrinsic::amdgcn_struct_buffer_load: + case Intrinsic::amdgcn_struct_buffer_store: + case Intrinsic::amdgcn_struct_tbuffer_load: + case Intrinsic::amdgcn_struct_tbuffer_store: { + applyDefaultMapping(OpdMapper); + executeInWaterfallLoop(MI, MRI, {2, 5}); + return; + } + default: { + if (const AMDGPU::RsrcIntrinsic *RSrcIntrin = + AMDGPU::lookupRsrcIntrinsic(IntrID)) { + // Non-images can have complications from operands that allow both SGPR + // and VGPR. For now it's too complicated to figure out the final opcode + // to derive the register bank from the MCInstrDesc. + if (RSrcIntrin->IsImage) { + applyMappingImage(MI, OpdMapper, MRI, RSrcIntrin->RsrcArg); + return; + } + } + break; } + } break; } - case AMDGPU::G_LOAD: { + case AMDGPU::G_LOAD: + case AMDGPU::G_ZEXTLOAD: + case AMDGPU::G_SEXTLOAD: { if (applyMappingWideLoad(MI, OpdMapper, MRI)) return; break; @@ -1451,26 +1988,72 @@ AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const { MI.getNumOperands()); } +const RegisterBankInfo::InstructionMapping & +AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI, + const MachineInstr &MI, + int RsrcIdx) const { + // The reported argument index is relative to the IR intrinsic call arguments, + // so we need to shift by the number of defs and the intrinsic ID. + RsrcIdx += MI.getNumExplicitDefs() + 1; + + const int NumOps = MI.getNumOperands(); + SmallVector OpdsMapping(NumOps); + + // TODO: Should packed/unpacked D16 difference be reported here as part of + // the value mapping? + for (int I = 0; I != NumOps; ++I) { + if (!MI.getOperand(I).isReg()) + continue; + + Register OpReg = MI.getOperand(I).getReg(); + unsigned Size = getSizeInBits(OpReg, MRI, *TRI); + + // FIXME: Probably need a new intrinsic register bank searchable table to + // handle arbitrary intrinsics easily. + // + // If this has a sampler, it immediately follows rsrc. + const bool MustBeSGPR = I == RsrcIdx || I == RsrcIdx + 1; + + if (MustBeSGPR) { + // If this must be an SGPR, so we must report whatever it is as legal. + unsigned NewBank = getRegBankID(OpReg, MRI, *TRI, AMDGPU::SGPRRegBankID); + OpdsMapping[I] = AMDGPU::getValueMapping(NewBank, Size); + } else { + // Some operands must be VGPR, and these are easy to copy to. + OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); + } + } + + return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), NumOps); +} + const RegisterBankInfo::InstructionMapping & AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - SmallVector OpdsMapping(MI.getNumOperands()); + SmallVector OpdsMapping(2); unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); LLT LoadTy = MRI.getType(MI.getOperand(0).getReg()); - unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); + Register PtrReg = MI.getOperand(1).getReg(); + LLT PtrTy = MRI.getType(PtrReg); + unsigned AS = PtrTy.getAddressSpace(); + unsigned PtrSize = PtrTy.getSizeInBits(); const ValueMapping *ValMapping; const ValueMapping *PtrMapping; - if (isInstrUniform(MI)) { + const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI); + + if (PtrBank == &AMDGPU::SGPRRegBank && + (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS && + AS != AMDGPUAS::PRIVATE_ADDRESS) && + isInstrUniformNonExtLoadAlign4(MI)) { // We have a uniform instruction so we want to use an SMRD load ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize); } else { ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy); - // FIXME: What would happen if we used SGPRRegBankID here? PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize); } @@ -1494,6 +2077,31 @@ AMDGPURegisterBankInfo::getRegBankID(Register Reg, return Bank ? Bank->getID() : Default; } + +static unsigned regBankUnion(unsigned RB0, unsigned RB1) { + return (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID) ? + AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; +} + +const RegisterBankInfo::ValueMapping * +AMDGPURegisterBankInfo::getSGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + // Lie and claim anything is legal, even though this needs to be an SGPR + // applyMapping will have to deal with it as a waterfall loop. + unsigned Bank = getRegBankID(Reg, MRI, TRI, AMDGPU::SGPRRegBankID); + unsigned Size = getSizeInBits(Reg, MRI, TRI); + return AMDGPU::getValueMapping(Bank, Size); +} + +const RegisterBankInfo::ValueMapping * +AMDGPURegisterBankInfo::getVGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + unsigned Size = getSizeInBits(Reg, MRI, TRI); + return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); +} + /// /// This function must return a legal mapping, because /// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called @@ -1536,7 +2144,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { int ResultBank = -1; for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { - unsigned Reg = MI.getOperand(I).getReg(); + Register Reg = MI.getOperand(I).getReg(); const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI); // FIXME: Assuming VGPR for any undetermined inputs. @@ -1660,7 +2268,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { LLVM_FALLTHROUGH; } - case AMDGPU::G_GEP: case AMDGPU::G_ADD: case AMDGPU::G_SUB: @@ -1669,15 +2276,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_LSHR: case AMDGPU::G_ASHR: case AMDGPU::G_UADDO: - case AMDGPU::G_SADDO: case AMDGPU::G_USUBO: - case AMDGPU::G_SSUBO: case AMDGPU::G_UADDE: case AMDGPU::G_SADDE: case AMDGPU::G_USUBE: case AMDGPU::G_SSUBE: - case AMDGPU::G_UMULH: - case AMDGPU::G_SMULH: case AMDGPU::G_SMIN: case AMDGPU::G_SMAX: case AMDGPU::G_UMIN: @@ -1692,17 +2295,32 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_FPTOUI: case AMDGPU::G_FMUL: case AMDGPU::G_FMA: + case AMDGPU::G_FMAD: case AMDGPU::G_FSQRT: + case AMDGPU::G_FFLOOR: + case AMDGPU::G_FCEIL: + case AMDGPU::G_FRINT: case AMDGPU::G_SITOFP: case AMDGPU::G_UITOFP: case AMDGPU::G_FPTRUNC: case AMDGPU::G_FPEXT: case AMDGPU::G_FEXP2: case AMDGPU::G_FLOG2: + case AMDGPU::G_FMINNUM: + case AMDGPU::G_FMAXNUM: + case AMDGPU::G_FMINNUM_IEEE: + case AMDGPU::G_FMAXNUM_IEEE: case AMDGPU::G_FCANONICALIZE: case AMDGPU::G_INTRINSIC_TRUNC: case AMDGPU::G_INTRINSIC_ROUND: + case AMDGPU::G_AMDGPU_FFBH_U32: return getDefaultMappingVOP(MI); + case AMDGPU::G_UMULH: + case AMDGPU::G_SMULH: { + if (Subtarget.hasScalarMulHiInsts() && isSALUMapping(MI)) + return getDefaultMappingSOP(MI); + return getDefaultMappingVOP(MI); + } case AMDGPU::G_IMPLICIT_DEF: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); @@ -1710,12 +2328,19 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } case AMDGPU::G_FCONSTANT: case AMDGPU::G_CONSTANT: - case AMDGPU::G_FRAME_INDEX: + case AMDGPU::G_GLOBAL_VALUE: case AMDGPU::G_BLOCK_ADDR: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; } + case AMDGPU::G_FRAME_INDEX: { + // TODO: This should be the same as other constants, but eliminateFrameIndex + // currently assumes VALU uses. + unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); + break; + } case AMDGPU::G_INSERT: { unsigned BankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; @@ -1737,8 +2362,25 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[2] = nullptr; break; } - case AMDGPU::G_MERGE_VALUES: case AMDGPU::G_BUILD_VECTOR: + case AMDGPU::G_BUILD_VECTOR_TRUNC: { + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + if (DstTy == LLT::vector(2, 16)) { + unsigned DstSize = DstTy.getSizeInBits(); + unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + unsigned Src0BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI); + unsigned Src1BankID = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); + unsigned DstBankID = regBankUnion(Src0BankID, Src1BankID); + + OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize); + OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize); + OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize); + break; + } + + LLVM_FALLTHROUGH; + } + case AMDGPU::G_MERGE_VALUES: case AMDGPU::G_CONCAT_VECTORS: { unsigned Bank = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; @@ -1760,6 +2402,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_CTTZ_ZERO_UNDEF: case AMDGPU::G_CTPOP: case AMDGPU::G_BSWAP: + case AMDGPU::G_BITREVERSE: case AMDGPU::G_FABS: case AMDGPU::G_FNEG: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); @@ -1848,7 +2491,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { Op3Bank == AMDGPU::SGPRRegBankID && (Size == 32 || (Size == 64 && (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) && - MF.getSubtarget().hasScalarCompareEq64())); + Subtarget.hasScalarCompareEq64())); unsigned Op0Bank = CanUseSCC ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID; @@ -1859,14 +2502,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; } case AMDGPU::G_EXTRACT_VECTOR_ELT: { - unsigned OutputBankID = isSALUMapping(MI) ? - AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; + // VGPR index can be used for waterfall when indexing a SGPR vector. + unsigned SrcBankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI); + unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); + unsigned OutputBankID = regBankUnion(SrcBankID, IdxBank); - OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize); - OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize); + OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(OutputBankID, DstSize); + OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, SrcSize); // The index can be either if the source vector is VGPR. OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize); @@ -1879,15 +2524,18 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits(); - unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); - unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI); + unsigned SrcBankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI); + unsigned InsertEltBankID = getRegBankID(MI.getOperand(2).getReg(), + MRI, *TRI); + unsigned IdxBankID = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI); OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize); - OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize); - OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize); + OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, VecSize); + OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(InsertEltBankID, + InsertSize); // The index can be either if the source vector is VGPR. - OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize); + OpdsMapping[3] = AMDGPU::getValueMapping(IdxBankID, IdxSize); break; } case AMDGPU::G_UNMERGE_VALUES: { @@ -1903,11 +2551,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; } case AMDGPU::G_INTRINSIC: { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + switch (MI.getIntrinsicID()) { default: return getInvalidInstructionMapping(); - case Intrinsic::maxnum: - case Intrinsic::minnum: case Intrinsic::amdgcn_div_fmas: case Intrinsic::amdgcn_trig_preop: case Intrinsic::amdgcn_sin: @@ -1938,6 +2584,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_mbcnt_hi: case Intrinsic::amdgcn_ubfe: case Intrinsic::amdgcn_sbfe: + case Intrinsic::amdgcn_mul_u24: + case Intrinsic::amdgcn_mul_i24: case Intrinsic::amdgcn_lerp: case Intrinsic::amdgcn_sad_u8: case Intrinsic::amdgcn_msad_u8: @@ -1956,10 +2604,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_udot4: case Intrinsic::amdgcn_sdot8: case Intrinsic::amdgcn_udot8: - case Intrinsic::amdgcn_fdiv_fast: case Intrinsic::amdgcn_wwm: case Intrinsic::amdgcn_wqm: return getDefaultMappingVOP(MI); + case Intrinsic::amdgcn_ds_swizzle: case Intrinsic::amdgcn_ds_permute: case Intrinsic::amdgcn_ds_bpermute: case Intrinsic::amdgcn_update_dpp: @@ -2040,7 +2688,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } case Intrinsic::amdgcn_readlane: { // This must be an SGPR, but accept a VGPR. - unsigned IdxReg = MI.getOperand(3).getReg(); + Register IdxReg = MI.getOperand(3).getReg(); unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits(); unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID); OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize); @@ -2055,10 +2703,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } case Intrinsic::amdgcn_writelane: { unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - unsigned SrcReg = MI.getOperand(2).getReg(); + Register SrcReg = MI.getOperand(2).getReg(); unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits(); unsigned SrcBank = getRegBankID(SrcReg, MRI, *TRI, AMDGPU::SGPRRegBankID); - unsigned IdxReg = MI.getOperand(3).getReg(); + Register IdxReg = MI.getOperand(3).getReg(); unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits(); unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize); @@ -2081,9 +2729,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; } case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { - default: - return getInvalidInstructionMapping(); + auto IntrID = MI.getIntrinsicID(); + switch (IntrID) { case Intrinsic::amdgcn_s_getreg: case Intrinsic::amdgcn_s_memtime: case Intrinsic::amdgcn_s_memrealtime: @@ -2123,18 +2770,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); break; case Intrinsic::amdgcn_exp: - OpdsMapping[0] = nullptr; // IntrinsicID - // FIXME: These are immediate values which can't be read from registers. - OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); - OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); // FIXME: Could we support packed types here? OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); - // FIXME: These are immediate values which can't be read from registers. - OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); - OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); break; case Intrinsic::amdgcn_buffer_load: { Register RSrc = MI.getOperand(2).getReg(); // SGPR @@ -2169,11 +2809,97 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32); break; } - case Intrinsic::amdgcn_end_cf: { + case Intrinsic::amdgcn_end_cf: + case Intrinsic::amdgcn_init_exec: { unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; } + case Intrinsic::amdgcn_else: { + unsigned WaveSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1); + OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize); + OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize); + break; + } + case Intrinsic::amdgcn_kill: { + OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1); + break; + } + case Intrinsic::amdgcn_raw_buffer_load: + case Intrinsic::amdgcn_raw_tbuffer_load: { + // FIXME: Should make intrinsic ID the last operand of the instruction, + // then this would be the same as store + OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + break; + } + case Intrinsic::amdgcn_raw_buffer_store: + case Intrinsic::amdgcn_raw_buffer_store_format: + case Intrinsic::amdgcn_raw_tbuffer_store: { + OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + break; + } + case Intrinsic::amdgcn_struct_buffer_load: + case Intrinsic::amdgcn_struct_tbuffer_load: { + OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI); + break; + } + case Intrinsic::amdgcn_struct_buffer_store: + case Intrinsic::amdgcn_struct_tbuffer_store: { + OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI); + break; + } + case Intrinsic::amdgcn_init_exec_from_input: { + unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); + OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); + OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); + break; + } + case Intrinsic::amdgcn_ds_gws_init: + case Intrinsic::amdgcn_ds_gws_barrier: + case Intrinsic::amdgcn_ds_gws_sema_br: { + OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); + + // This must be an SGPR, but accept a VGPR. + unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI, + AMDGPU::SGPRRegBankID); + OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32); + break; + } + case Intrinsic::amdgcn_ds_gws_sema_v: + case Intrinsic::amdgcn_ds_gws_sema_p: + case Intrinsic::amdgcn_ds_gws_sema_release_all: { + // This must be an SGPR, but accept a VGPR. + unsigned Bank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI, + AMDGPU::SGPRRegBankID); + OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32); + break; + } + default: + if (const AMDGPU::RsrcIntrinsic *RSrcIntrin = + AMDGPU::lookupRsrcIntrinsic(IntrID)) { + // Non-images can have complications from operands that allow both SGPR + // and VGPR. For now it's too complicated to figure out the final opcode + // to derive the register bank from the MCInstrDesc. + if (RSrcIntrin->IsImage) + return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg); + } + + return getInvalidInstructionMapping(); } break; } @@ -2216,6 +2942,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } case AMDGPU::G_LOAD: + case AMDGPU::G_ZEXTLOAD: + case AMDGPU::G_SEXTLOAD: return getInstrMappingForLoad(MI); case AMDGPU::G_ATOMICRMW_XCHG: @@ -2228,6 +2956,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_ATOMICRMW_MIN: case AMDGPU::G_ATOMICRMW_UMAX: case AMDGPU::G_ATOMICRMW_UMIN: + case AMDGPU::G_ATOMICRMW_FADD: case AMDGPU::G_ATOMIC_CMPXCHG: { return getDefaultMappingAllVGPR(MI); } @@ -2247,4 +2976,3 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { getOperandsMapping(OpdsMapping), MI.getNumOperands()); } - diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index f3a96e2a612..a14b7496111 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -13,6 +13,8 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" @@ -23,7 +25,9 @@ namespace llvm { class LLT; +class GCNSubtarget; class MachineIRBuilder; +class SIInstrInfo; class SIRegisterInfo; class TargetRegisterInfo; @@ -36,9 +40,27 @@ protected: #include "AMDGPUGenRegisterBank.inc" }; class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { + const GCNSubtarget &Subtarget; const SIRegisterInfo *TRI; + const SIInstrInfo *TII; - void executeInWaterfallLoop(MachineInstr &MI, + bool collectWaterfallOperands( + SmallSet &SGPROperandRegs, + MachineInstr &MI, + MachineRegisterInfo &MRI, + ArrayRef OpIndices) const; + + bool executeInWaterfallLoop( + MachineIRBuilder &B, + iterator_range Range, + SmallSet &SGPROperandRegs, + MachineRegisterInfo &MRI) const; + + bool executeInWaterfallLoop(MachineIRBuilder &B, + MachineInstr &MI, + MachineRegisterInfo &MRI, + ArrayRef OpIndices) const; + bool executeInWaterfallLoop(MachineInstr &MI, MachineRegisterInfo &MRI, ArrayRef OpIndices) const; @@ -47,6 +69,19 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { bool applyMappingWideLoad(MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, MachineRegisterInfo &MRI) const; + bool + applyMappingImage(MachineInstr &MI, + const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, + MachineRegisterInfo &MRI, int RSrcIdx) const; + + Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, + Register Reg) const; + + std::pair + splitBufferOffsets(MachineIRBuilder &B, Register Offset) const; + + MachineInstr *selectStoreIntrinsic(MachineIRBuilder &B, + MachineInstr &MI) const; /// See RegisterBankInfo::applyMapping. void applyMappingImpl(const OperandsMapper &OpdMapper) const override; @@ -58,6 +93,16 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const TargetRegisterInfo &TRI, unsigned Default = AMDGPU::VGPRRegBankID) const; + // Return a value mapping for an operand that is required to be an SGPR. + const ValueMapping *getSGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; + + // Return a value mapping for an operand that is required to be a VGPR. + const ValueMapping *getVGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; + /// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p /// Regs. This appropriately sets the regbank of the new registers. void split64BitValueForMapping(MachineIRBuilder &B, @@ -90,8 +135,13 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const; const InstructionMapping &getDefaultMappingAllVGPR( const MachineInstr &MI) const; + + const InstructionMapping &getImageMapping(const MachineRegisterInfo &MRI, + const MachineInstr &MI, + int RsrcIdx) const; + public: - AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI); + AMDGPURegisterBankInfo(const GCNSubtarget &STI); unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned Size) const override; diff --git a/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/lib/Target/AMDGPU/AMDGPURegisterBanks.td index 9555694fb10..00f53b15757 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBanks.td +++ b/lib/Target/AMDGPU/AMDGPURegisterBanks.td @@ -7,14 +7,14 @@ //===----------------------------------------------------------------------===// def SGPRRegBank : RegisterBank<"SGPR", - [SReg_32, SReg_64, SReg_128, SReg_256, SReg_512] + [SReg_32, SReg_64, SReg_128, SReg_256, SReg_512, SReg_1024] >; def VGPRRegBank : RegisterBank<"VGPR", - [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512] + [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512, VReg_1024] >; def SCCRegBank : RegisterBank <"SCC", [SReg_32, SCC_CLASS]>; // It is helpful to distinguish conditions from ordinary SGPRs. -def VCCRegBank : RegisterBank <"VCC", [SReg_64]>; +def VCCRegBank : RegisterBank <"VCC", [SReg_1]>; diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp index 7cffdf1a4dc..9806e6b0714 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp @@ -26,19 +26,59 @@ AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {} // they are not supported at this time. //===----------------------------------------------------------------------===// -unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) { - static const unsigned SubRegs[] = { - AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4, - AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, AMDGPU::sub8, AMDGPU::sub9, - AMDGPU::sub10, AMDGPU::sub11, AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, - AMDGPU::sub15, AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19, - AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23, AMDGPU::sub24, - AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27, AMDGPU::sub28, AMDGPU::sub29, - AMDGPU::sub30, AMDGPU::sub31 - }; +// Table of NumRegs sized pieces at every 32-bit offset. +static const uint16_t SubRegFromChannelTable[][32] = { + { AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, + AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, + AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, + AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, + AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19, + AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23, + AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27, + AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31 + }, + { + AMDGPU::sub0_sub1, AMDGPU::sub1_sub2, AMDGPU::sub2_sub3, AMDGPU::sub3_sub4, + AMDGPU::sub4_sub5, AMDGPU::sub5_sub6, AMDGPU::sub6_sub7, AMDGPU::sub7_sub8, + AMDGPU::sub8_sub9, AMDGPU::sub9_sub10, AMDGPU::sub10_sub11, AMDGPU::sub11_sub12, + AMDGPU::sub12_sub13, AMDGPU::sub13_sub14, AMDGPU::sub14_sub15, AMDGPU::sub15_sub16, + AMDGPU::sub16_sub17, AMDGPU::sub17_sub18, AMDGPU::sub18_sub19, AMDGPU::sub19_sub20, + AMDGPU::sub20_sub21, AMDGPU::sub21_sub22, AMDGPU::sub22_sub23, AMDGPU::sub23_sub24, + AMDGPU::sub24_sub25, AMDGPU::sub25_sub26, AMDGPU::sub26_sub27, AMDGPU::sub27_sub28, + AMDGPU::sub28_sub29, AMDGPU::sub29_sub30, AMDGPU::sub30_sub31, AMDGPU::NoSubRegister + }, + { + AMDGPU::sub0_sub1_sub2, AMDGPU::sub1_sub2_sub3, AMDGPU::sub2_sub3_sub4, AMDGPU::sub3_sub4_sub5, + AMDGPU::sub4_sub5_sub6, AMDGPU::sub5_sub6_sub7, AMDGPU::sub6_sub7_sub8, AMDGPU::sub7_sub8_sub9, + AMDGPU::sub8_sub9_sub10, AMDGPU::sub9_sub10_sub11, AMDGPU::sub10_sub11_sub12, AMDGPU::sub11_sub12_sub13, + AMDGPU::sub12_sub13_sub14, AMDGPU::sub13_sub14_sub15, AMDGPU::sub14_sub15_sub16, AMDGPU::sub15_sub16_sub17, + AMDGPU::sub16_sub17_sub18, AMDGPU::sub17_sub18_sub19, AMDGPU::sub18_sub19_sub20, AMDGPU::sub19_sub20_sub21, + AMDGPU::sub20_sub21_sub22, AMDGPU::sub21_sub22_sub23, AMDGPU::sub22_sub23_sub24, AMDGPU::sub23_sub24_sub25, + AMDGPU::sub24_sub25_sub26, AMDGPU::sub25_sub26_sub27, AMDGPU::sub26_sub27_sub28, AMDGPU::sub27_sub28_sub29, + AMDGPU::sub28_sub29_sub30, AMDGPU::sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister + }, + { + AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3_sub4, AMDGPU::sub2_sub3_sub4_sub5, AMDGPU::sub3_sub4_sub5_sub6, + AMDGPU::sub4_sub5_sub6_sub7, AMDGPU::sub5_sub6_sub7_sub8, AMDGPU::sub6_sub7_sub8_sub9, AMDGPU::sub7_sub8_sub9_sub10, + AMDGPU::sub8_sub9_sub10_sub11, AMDGPU::sub9_sub10_sub11_sub12, AMDGPU::sub10_sub11_sub12_sub13, AMDGPU::sub11_sub12_sub13_sub14, + AMDGPU::sub12_sub13_sub14_sub15, AMDGPU::sub13_sub14_sub15_sub16, AMDGPU::sub14_sub15_sub16_sub17, AMDGPU::sub15_sub16_sub17_sub18, + AMDGPU::sub16_sub17_sub18_sub19, AMDGPU::sub17_sub18_sub19_sub20, AMDGPU::sub18_sub19_sub20_sub21, AMDGPU::sub19_sub20_sub21_sub22, + AMDGPU::sub20_sub21_sub22_sub23, AMDGPU::sub21_sub22_sub23_sub24, AMDGPU::sub22_sub23_sub24_sub25, AMDGPU::sub23_sub24_sub25_sub26, + AMDGPU::sub24_sub25_sub26_sub27, AMDGPU::sub25_sub26_sub27_sub28, AMDGPU::sub26_sub27_sub28_sub29, AMDGPU::sub27_sub28_sub29_sub30, + AMDGPU::sub28_sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister + } +}; - assert(Channel < array_lengthof(SubRegs)); - return SubRegs[Channel]; +// FIXME: TableGen should generate something to make this manageable for all +// register classes. At a minimum we could use the opposite of +// composeSubRegIndices and go up from the base 32-bit subreg. +unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel, unsigned NumRegs) { + const unsigned NumRegIndex = NumRegs - 1; + + assert(NumRegIndex < array_lengthof(SubRegFromChannelTable) && + "Not implemented"); + assert(Channel < array_lengthof(SubRegFromChannelTable[0])); + return SubRegFromChannelTable[NumRegIndex][Channel]; } void AMDGPURegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const { diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/lib/Target/AMDGPU/AMDGPURegisterInfo.h index 3453a8c1b0b..9e713ca804a 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterInfo.h +++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.h @@ -28,7 +28,7 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { /// \returns the sub reg enum value for the given \p Channel /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0) - static unsigned getSubRegFromChannel(unsigned Channel); + static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1); void reserveRegisterTuples(BitVector &, unsigned Reg) const; }; diff --git a/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/lib/Target/AMDGPU/AMDGPUSearchableTables.td index f8703c36127..26b8b784027 100644 --- a/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -81,6 +81,8 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; @@ -92,6 +94,8 @@ def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; def : SourceOfDivergence; diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 1eb9b83456c..3bb6dd4571c 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -175,6 +175,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) : HasFminFmaxLegacy(true), EnablePromoteAlloca(false), HasTrigReducedRange(false), + MaxWavesPerEU(10), LocalMemorySize(0), WavefrontSize(0) { } @@ -261,6 +262,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, AddNoCarryInsts(false), HasUnpackedD16VMem(false), LDSMisalignedBug(false), + HasMFMAInlineLiteralBug(false), ScalarizeGlobal(false), @@ -278,9 +280,10 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)), TLInfo(TM, *this), FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) { + MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(this); CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); Legalizer.reset(new AMDGPULegalizerInfo(*this, TM)); - RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); + RegBankInfo.reset(new AMDGPURegisterBankInfo(*this)); InstSelector.reset(new AMDGPUInstructionSelector( *this, *static_cast(RegBankInfo.get()), TM)); } @@ -489,28 +492,28 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { } uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F, - unsigned &MaxAlign) const { + Align &MaxAlign) const { assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || F.getCallingConv() == CallingConv::SPIR_KERNEL); const DataLayout &DL = F.getParent()->getDataLayout(); uint64_t ExplicitArgBytes = 0; - MaxAlign = 1; + MaxAlign = Align::None(); for (const Argument &Arg : F.args()) { Type *ArgTy = Arg.getType(); - unsigned Align = DL.getABITypeAlignment(ArgTy); + const Align Alignment(DL.getABITypeAlignment(ArgTy)); uint64_t AllocSize = DL.getTypeAllocSize(ArgTy); - ExplicitArgBytes = alignTo(ExplicitArgBytes, Align) + AllocSize; - MaxAlign = std::max(MaxAlign, Align); + ExplicitArgBytes = alignTo(ExplicitArgBytes, Alignment) + AllocSize; + MaxAlign = std::max(MaxAlign, Alignment); } return ExplicitArgBytes; } unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F, - unsigned &MaxAlign) const { + Align &MaxAlign) const { uint64_t ExplicitArgBytes = getExplicitKernArgSize(F, MaxAlign); unsigned ExplicitOffset = getExplicitKernelArgOffset(F); @@ -518,7 +521,7 @@ unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F, uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes; unsigned ImplicitBytes = getImplicitArgNumBytes(F); if (ImplicitBytes != 0) { - unsigned Alignment = getAlignmentForImplicitArgPtr(); + const Align Alignment = getAlignmentForImplicitArgPtr(); TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes; } @@ -566,7 +569,7 @@ bool GCNSubtarget::hasMadF16() const { unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { if (getGeneration() >= AMDGPUSubtarget::GFX10) - return 10; + return getMaxWavesPerEU(); if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { if (SGPRs <= 80) @@ -591,25 +594,12 @@ unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { } unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const { - if (VGPRs <= 24) - return 10; - if (VGPRs <= 28) - return 9; - if (VGPRs <= 32) - return 8; - if (VGPRs <= 36) - return 7; - if (VGPRs <= 40) - return 6; - if (VGPRs <= 48) - return 5; - if (VGPRs <= 64) - return 4; - if (VGPRs <= 84) - return 3; - if (VGPRs <= 128) - return 2; - return 1; + unsigned MaxWaves = getMaxWavesPerEU(); + unsigned Granule = getVGPRAllocGranule(); + if (VGPRs < Granule) + return MaxWaves; + unsigned RoundedRegs = ((VGPRs + Granule - 1) / Granule) * Granule; + return std::min(std::max(getTotalNumVGPRs() / RoundedRegs, 1u), MaxWaves); } unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { @@ -629,6 +619,20 @@ unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { return 2; // VCC. } +unsigned GCNSubtarget::computeOccupancy(const MachineFunction &MF, + unsigned LDSSize, + unsigned NumSGPRs, + unsigned NumVGPRs) const { + unsigned Occupancy = + std::min(getMaxWavesPerEU(), + getOccupancyWithLocalMemSize(LDSSize, MF.getFunction())); + if (NumSGPRs) + Occupancy = std::min(Occupancy, getOccupancyWithNumSGPRs(NumSGPRs)); + if (NumVGPRs) + Occupancy = std::min(Occupancy, getOccupancyWithNumVGPRs(NumVGPRs)); + return Occupancy; +} + unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { const Function &F = MF.getFunction(); const SIMachineFunctionInfo &MFI = *MF.getInfo(); @@ -878,8 +882,8 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation { void GCNSubtarget::getPostRAMutations( std::vector> &Mutations) const { - Mutations.push_back(llvm::make_unique(&InstrInfo)); - Mutations.push_back(llvm::make_unique(&InstrInfo)); + Mutations.push_back(std::make_unique(&InstrInfo)); + Mutations.push_back(std::make_unique(&InstrInfo)); } const AMDGPUSubtarget &AMDGPUSubtarget::get(const MachineFunction &MF) { diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 78c3b823946..936feb00c62 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -75,6 +75,7 @@ protected: bool HasFminFmaxLegacy; bool EnablePromoteAlloca; bool HasTrigReducedRange; + unsigned MaxWavesPerEU; int LocalMemorySize; unsigned WavefrontSize; @@ -195,8 +196,8 @@ public: return LocalMemorySize; } - unsigned getAlignmentForImplicitArgPtr() const { - return isAmdHsaOS() ? 8 : 4; + Align getAlignmentForImplicitArgPtr() const { + return isAmdHsaOS() ? Align(8) : Align(4); } /// Returns the offset in bytes from the start of the input buffer @@ -223,7 +224,9 @@ public: /// subtarget. virtual unsigned getMinWavesPerEU() const = 0; - unsigned getMaxWavesPerEU() const { return 10; } + /// \returns Maximum number of waves per execution unit supported by the + /// subtarget without any kind of limitation. + unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; } /// Creates value range metadata on an workitemid.* inrinsic call or load. bool makeLIDRangeMetadata(Instruction *I) const; @@ -235,16 +238,17 @@ public: return 16; return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0); } - uint64_t getExplicitKernArgSize(const Function &F, - unsigned &MaxAlign) const; - unsigned getKernArgSegmentSize(const Function &F, - unsigned &MaxAlign) const; + uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const; + unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const; virtual ~AMDGPUSubtarget() {} }; class GCNSubtarget : public AMDGPUGenSubtargetInfo, public AMDGPUSubtarget { + + using AMDGPUSubtarget::getMaxWavesPerEU; + public: enum TrapHandlerAbi { TrapHandlerAbiNone = 0, @@ -362,6 +366,7 @@ protected: bool CaymanISA; bool CFALUBug; bool LDSMisalignedBug; + bool HasMFMAInlineLiteralBug; bool HasVertexCache; short TexVTXClauseSize; bool ScalarizeGlobal; @@ -416,7 +421,7 @@ public: return CallLoweringInfo.get(); } - const InstructionSelector *getInstructionSelector() const override { + InstructionSelector *getInstructionSelector() const override { return InstSelector.get(); } @@ -544,6 +549,14 @@ public: return GFX9Insts; } + bool hasScalarPackInsts() const { + return GFX9Insts; + } + + bool hasScalarMulHiInsts() const { + return GFX9Insts; + } + TrapHandlerAbi getTrapHandlerAbi() const { return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; } @@ -611,6 +624,11 @@ public: return getGeneration() >= AMDGPUSubtarget::GFX9; } + /// \returns If target supports S_DENORM_MODE. + bool hasDenormModeInst() const { + return getGeneration() >= AMDGPUSubtarget::GFX10; + } + bool useFlatForGlobal() const { return FlatForGlobal; } @@ -848,9 +866,7 @@ public: // on the pointer value itself may rely on the alignment / known low bits of // the pointer. Set this to something above the minimum to avoid needing // dynamic realignment in common cases. - unsigned getStackAlignment() const { - return 16; - } + Align getStackAlignment() const { return Align(16); } bool enableMachineScheduler() const override { return true; @@ -881,12 +897,6 @@ public: return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize); } - /// \returns Maximum number of waves per execution unit supported by the - /// subtarget without any kind of limitation. - unsigned getMaxWavesPerEU() const { - return AMDGPU::IsaInfo::getMaxWavesPerEU(); - } - /// \returns Number of waves per work group supported by the subtarget and /// limited by given \p FlatWorkGroupSize. unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const { @@ -944,6 +954,14 @@ public: return HasDPP; } + bool hasDPPBroadcasts() const { + return HasDPP && getGeneration() < GFX10; + } + + bool hasDPPWavefrontShifts() const { + return HasDPP && getGeneration() < GFX10; + } + bool hasDPP8() const { return HasDPP8; } @@ -974,6 +992,10 @@ public: return SGPRInitBug; } + bool hasMFMAInlineLiteralBug() const { + return HasMFMAInlineLiteralBug; + } + bool has12DWordStoreHazard() const { return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; } @@ -1036,6 +1058,13 @@ public: /// VGPRs unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; + /// Return occupancy for the given function. Used LDS and a number of + /// registers if provided. + /// Note, occupancy can be affected by the scratch allocation as well, but + /// we do not have enough information to compute it. + unsigned computeOccupancy(const MachineFunction &MF, unsigned LDSSize = 0, + unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const; + /// \returns true if the flat_scratch register should be initialized with the /// pointer to the wave's scratch memory rather than a size and offset. bool flatScratchIsPointer() const { @@ -1226,9 +1255,7 @@ public: return Gen; } - unsigned getStackAlignment() const { - return 4; - } + Align getStackAlignment() const { return Align(4); } R600Subtarget &initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS); diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 0ea8db04c29..e8cf77161a1 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -238,16 +238,17 @@ extern "C" void LLVMInitializeAMDGPUTarget() { initializeAMDGPUUseNativeCallsPass(*PR); initializeAMDGPUSimplifyLibCallsPass(*PR); initializeAMDGPUInlinerPass(*PR); + initializeAMDGPUPrintfRuntimeBindingPass(*PR); initializeGCNRegBankReassignPass(*PR); initializeGCNNSAReassignPass(*PR); } static std::unique_ptr createTLOF(const Triple &TT) { - return llvm::make_unique(); + return std::make_unique(); } static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { - return new ScheduleDAGMILive(C, llvm::make_unique()); + return new ScheduleDAGMILive(C, std::make_unique()); } static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) { @@ -257,7 +258,7 @@ static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) { static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { ScheduleDAGMILive *DAG = - new GCNScheduleDAGMILive(C, make_unique(C)); + new GCNScheduleDAGMILive(C, std::make_unique(C)); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); DAG->addMutation(createAMDGPUMacroFusionDAGMutation()); @@ -412,6 +413,7 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { PM.add(createAMDGPUExternalAAWrapperPass()); } PM.add(createAMDGPUUnifyMetadataPass()); + PM.add(createAMDGPUPrintfRuntimeBinding()); PM.add(createAMDGPUPropagateAttributesLatePass(this)); if (Internalize) { PM.add(createInternalizePass(mustPreserveGV)); @@ -482,7 +484,7 @@ const R600Subtarget *R600TargetMachine::getSubtargetImpl( // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique(TargetTriple, GPU, FS, *this); + I = std::make_unique(TargetTriple, GPU, FS, *this); } return I.get(); @@ -518,7 +520,7 @@ const GCNSubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique(TargetTriple, GPU, FS, *this); + I = std::make_unique(TargetTriple, GPU, FS, *this); } I->setScalarizeGlobalBehavior(ScalarizeGlobal); @@ -659,6 +661,8 @@ void AMDGPUPassConfig::addIRPasses() { disablePass(&FuncletLayoutID); disablePass(&PatchableFunctionID); + addPass(createAMDGPUPrintfRuntimeBinding()); + // This must occur before inlining, as the inliner will not look through // bitcast calls. addPass(createAMDGPUFixFunctionBitcastsPass()); @@ -681,12 +685,6 @@ void AMDGPUPassConfig::addIRPasses() { // without ever running any passes on the second. addPass(createBarrierNoopPass()); - if (TM.getTargetTriple().getArch() == Triple::amdgcn) { - // TODO: May want to move later or split into an early and late one. - - addPass(createAMDGPUCodeGenPreparePass()); - } - // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. if (TM.getTargetTriple().getArch() == Triple::r600) addPass(createR600OpenCLImageTypeLoweringPass()); @@ -714,6 +712,11 @@ void AMDGPUPassConfig::addIRPasses() { } } + if (TM.getTargetTriple().getArch() == Triple::amdgcn) { + // TODO: May want to move later or split into an early and late one. + addPass(createAMDGPUCodeGenPreparePass()); + } + TargetPassConfig::addIRPasses(); // EarlyCSE is not always strong enough to clean up what LSR produces. For @@ -1046,7 +1049,7 @@ bool GCNTargetMachine::parseMachineFunctionInfo( return true; if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG && - !AMDGPU::SReg_128RegClass.contains(MFI->ScratchRSrcReg)) { + !AMDGPU::SGPR_128RegClass.contains(MFI->ScratchRSrcReg)) { return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg); } @@ -1095,7 +1098,7 @@ bool GCNTargetMachine::parseMachineFunctionInfo( if (YamlMFI.ArgInfo && (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer, - AMDGPU::SReg_128RegClass, + AMDGPU::SGPR_128RegClass, MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr, AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr, diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index aaed280a127..616196ad5ba 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -57,7 +57,7 @@ using namespace llvm; static cl::opt UnrollThresholdPrivate( "amdgpu-unroll-threshold-private", cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"), - cl::init(2500), cl::Hidden); + cl::init(2000), cl::Hidden); static cl::opt UnrollThresholdLocal( "amdgpu-unroll-threshold-local", @@ -590,6 +590,61 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const { return false; } +bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl &OpIndexes, + Intrinsic::ID IID) const { + switch (IID) { + case Intrinsic::amdgcn_atomic_inc: + case Intrinsic::amdgcn_atomic_dec: + case Intrinsic::amdgcn_ds_fadd: + case Intrinsic::amdgcn_ds_fmin: + case Intrinsic::amdgcn_ds_fmax: + case Intrinsic::amdgcn_is_shared: + case Intrinsic::amdgcn_is_private: + OpIndexes.push_back(0); + return true; + default: + return false; + } +} + +bool GCNTTIImpl::rewriteIntrinsicWithAddressSpace( + IntrinsicInst *II, Value *OldV, Value *NewV) const { + auto IntrID = II->getIntrinsicID(); + switch (IntrID) { + case Intrinsic::amdgcn_atomic_inc: + case Intrinsic::amdgcn_atomic_dec: + case Intrinsic::amdgcn_ds_fadd: + case Intrinsic::amdgcn_ds_fmin: + case Intrinsic::amdgcn_ds_fmax: { + const ConstantInt *IsVolatile = cast(II->getArgOperand(4)); + if (!IsVolatile->isZero()) + return false; + Module *M = II->getParent()->getParent()->getParent(); + Type *DestTy = II->getType(); + Type *SrcTy = NewV->getType(); + Function *NewDecl = + Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy}); + II->setArgOperand(0, NewV); + II->setCalledFunction(NewDecl); + return true; + } + case Intrinsic::amdgcn_is_shared: + case Intrinsic::amdgcn_is_private: { + unsigned TrueAS = IntrID == Intrinsic::amdgcn_is_shared ? + AMDGPUAS::LOCAL_ADDRESS : AMDGPUAS::PRIVATE_ADDRESS; + unsigned NewAS = NewV->getType()->getPointerAddressSpace(); + LLVMContext &Ctx = NewV->getType()->getContext(); + ConstantInt *NewVal = (TrueAS == NewAS) ? + ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx); + II->replaceAllUsesWith(NewVal); + II->eraseFromParent(); + return true; + } + default: + return false; + } +} + unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { if (ST->hasVOP3PInsts()) { @@ -638,6 +693,39 @@ void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, CommonTTI.getUnrollingPreferences(L, SE, UP); } +unsigned GCNTTIImpl::getUserCost(const User *U, + ArrayRef Operands) { + // Estimate extractelement elimination + if (const ExtractElementInst *EE = dyn_cast(U)) { + ConstantInt *CI = dyn_cast(EE->getOperand(1)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + return getVectorInstrCost(EE->getOpcode(), EE->getOperand(0)->getType(), + Idx); + } + + // Estimate insertelement elimination + if (const InsertElementInst *IE = dyn_cast(U)) { + ConstantInt *CI = dyn_cast(IE->getOperand(2)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + return getVectorInstrCost(IE->getOpcode(), IE->getType(), Idx); + } + + // Estimate different intrinsics, e.g. llvm.fabs + if (const IntrinsicInst *II = dyn_cast(U)) { + SmallVector Args(II->arg_operands()); + FastMathFlags FMF; + if (auto *FPMO = dyn_cast(II)) + FMF = FPMO->getFastMathFlags(); + return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args, + FMF); + } + return BaseT::getUserCost(U, Operands); +} + unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const { return 4 * 128; // XXX - 4 channels. Should these count as vector instead? } diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 6f1bf5a26f0..67f7f9074f1 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -46,10 +46,18 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase { Triple TargetTriple; + const TargetSubtargetInfo *ST; + const TargetLoweringBase *TLI; + + const TargetSubtargetInfo *getST() const { return ST; } + const TargetLoweringBase *getTLI() const { return TLI; } + public: explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F) - : BaseT(TM, F.getParent()->getDataLayout()), - TargetTriple(TM->getTargetTriple()) {} + : BaseT(TM, F.getParent()->getDataLayout()), + TargetTriple(TM->getTargetTriple()), + ST(static_cast(TM->getSubtargetImpl(F))), + TLI(ST->getTargetLowering()) {} void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); @@ -183,6 +191,11 @@ public: return AMDGPUAS::FLAT_ADDRESS; } + bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, + Intrinsic::ID IID) const; + bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, + Value *OldV, Value *NewV) const; + unsigned getVectorSplitCost() { return 0; } unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, @@ -191,7 +204,7 @@ public: bool areInlineCompatible(const Function *Caller, const Function *Callee) const; - unsigned getInliningThresholdMultiplier() { return 7; } + unsigned getInliningThresholdMultiplier() { return 9; } int getInlinerVectorBonusPercent() { return 0; } @@ -201,6 +214,7 @@ public: int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, bool IsUnsigned); + unsigned getUserCost(const User *U, ArrayRef Operands); }; class R600TTIImpl final : public BasicTTIImplBase { diff --git a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp index 12f2e9519c9..101ecfc0c87 100644 --- a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp +++ b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp @@ -1307,8 +1307,8 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, if (LandBlkHasOtherPred) { report_fatal_error("Extra register needed to handle CFG"); - unsigned CmpResReg = - HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); + Register CmpResReg = + HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); report_fatal_error("Extra compare instruction needed to handle CFG"); insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, CmpResReg, DebugLoc()); @@ -1316,8 +1316,8 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, // XXX: We are running this after RA, so creating virtual registers will // cause an assertion failure in the PostRA scheduling pass. - unsigned InitReg = - HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); + Register InitReg = + HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg, DebugLoc()); diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 6d678966c98..9dd511fab57 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -143,6 +143,7 @@ public: ImmTyDLC, ImmTyGLC, ImmTySLC, + ImmTySWZ, ImmTyTFE, ImmTyD16, ImmTyClampSI, @@ -216,14 +217,15 @@ public: if (Kind == Token) return true; - if (Kind != Expression || !Expr) - return false; - // When parsing operands, we can't always tell if something was meant to be // a token, like 'gds', or an expression that references a global variable. // In this case, we assume the string is an expression, and if we need to // interpret is a token, then we treat the symbol name as the token. - return isa(Expr); + return isSymbolRefExpr(); + } + + bool isSymbolRefExpr() const { + return isExpr() && Expr && isa(Expr); } bool isImm() const override { @@ -274,8 +276,10 @@ public: isRegClass(AMDGPU::VReg_64RegClassID) || isRegClass(AMDGPU::VReg_96RegClassID) || isRegClass(AMDGPU::VReg_128RegClassID) || + isRegClass(AMDGPU::VReg_160RegClassID) || isRegClass(AMDGPU::VReg_256RegClassID) || - isRegClass(AMDGPU::VReg_512RegClassID); + isRegClass(AMDGPU::VReg_512RegClassID) || + isRegClass(AMDGPU::VReg_1024RegClassID); } bool isVReg32() const { @@ -286,6 +290,10 @@ public: return isOff() || isVReg32(); } + bool isNull() const { + return isRegKind() && getReg() == AMDGPU::SGPR_NULL; + } + bool isSDWAOperand(MVT type) const; bool isSDWAFP16Operand() const; bool isSDWAFP32Operand() const; @@ -325,6 +333,7 @@ public: bool isDLC() const { return isImmTy(ImmTyDLC); } bool isGLC() const { return isImmTy(ImmTyGLC); } bool isSLC() const { return isImmTy(ImmTySLC); } + bool isSWZ() const { return isImmTy(ImmTySWZ); } bool isTFE() const { return isImmTy(ImmTyTFE); } bool isD16() const { return isImmTy(ImmTyD16); } bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } @@ -817,6 +826,7 @@ public: case ImmTyDLC: OS << "DLC"; break; case ImmTyGLC: OS << "GLC"; break; case ImmTySLC: OS << "SLC"; break; + case ImmTySWZ: OS << "SWZ"; break; case ImmTyTFE: OS << "TFE"; break; case ImmTyD16: OS << "D16"; break; case ImmTyFORMAT: OS << "FORMAT"; break; @@ -886,7 +896,7 @@ public: int64_t Val, SMLoc Loc, ImmTy Type = ImmTyNone, bool IsFPImm = false) { - auto Op = llvm::make_unique(Immediate, AsmParser); + auto Op = std::make_unique(Immediate, AsmParser); Op->Imm.Val = Val; Op->Imm.IsFPImm = IsFPImm; Op->Imm.Type = Type; @@ -899,7 +909,7 @@ public: static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, StringRef Str, SMLoc Loc, bool HasExplicitEncodingSize = true) { - auto Res = llvm::make_unique(Token, AsmParser); + auto Res = std::make_unique(Token, AsmParser); Res->Tok.Data = Str.data(); Res->Tok.Length = Str.size(); Res->StartLoc = Loc; @@ -910,7 +920,7 @@ public: static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, unsigned RegNo, SMLoc S, SMLoc E) { - auto Op = llvm::make_unique(Register, AsmParser); + auto Op = std::make_unique(Register, AsmParser); Op->Reg.RegNo = RegNo; Op->Reg.Mods = Modifiers(); Op->StartLoc = S; @@ -920,7 +930,7 @@ public: static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, const class MCExpr *Expr, SMLoc S) { - auto Op = llvm::make_unique(Expression, AsmParser); + auto Op = std::make_unique(Expression, AsmParser); Op->Expr = Expr; Op->StartLoc = S; Op->EndLoc = S; @@ -1051,11 +1061,23 @@ private: std::string &CollectString); bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, - RegisterKind RegKind, unsigned Reg1, - unsigned RegNum); + RegisterKind RegKind, unsigned Reg1); bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, - unsigned& RegNum, unsigned& RegWidth, - unsigned *DwordRegIndex); + unsigned& RegNum, unsigned& RegWidth); + unsigned ParseRegularReg(RegisterKind &RegKind, + unsigned &RegNum, + unsigned &RegWidth); + unsigned ParseSpecialReg(RegisterKind &RegKind, + unsigned &RegNum, + unsigned &RegWidth); + unsigned ParseRegList(RegisterKind &RegKind, + unsigned &RegNum, + unsigned &RegWidth); + bool ParseRegRange(unsigned& Num, unsigned& Width); + unsigned getRegularReg(RegisterKind RegKind, + unsigned RegNum, + unsigned RegWidth); + bool isRegister(); bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; Optional getGprCountSymbolName(RegisterKind RegKind); @@ -1306,6 +1328,7 @@ private: bool validateOpSel(const MCInst &Inst); bool validateVccOperand(unsigned Reg) const; bool validateVOP3Literal(const MCInst &Inst) const; + unsigned getConstantBusLimit(unsigned Opcode) const; bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; @@ -1321,6 +1344,7 @@ private: void peekTokens(MutableArrayRef Tokens); AsmToken::TokenKind getTokenKind() const; bool parseExpr(int64_t &Imm); + bool parseExpr(OperandVector &Operands); StringRef getTokenStr() const; AsmToken peekToken(); AsmToken getToken() const; @@ -1399,9 +1423,12 @@ public: void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); + void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); void cvtSDWA(MCInst &Inst, const OperandVector &Operands, - uint64_t BasicInstType, bool skipVcc = false); + uint64_t BasicInstType, + bool SkipDstVcc = false, + bool SkipSrcVcc = false); AMDGPUOperand::Ptr defaultBLGP() const; AMDGPUOperand::Ptr defaultCBSZ() const; @@ -1636,8 +1663,8 @@ bool AMDGPUOperand::isSDWAInt32Operand() const { } bool AMDGPUOperand::isBoolReg() const { - return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ? - isSCSrcB64() : isSCSrcB32(); + return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || + (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); } uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const @@ -1849,6 +1876,8 @@ static bool isInlineValue(unsigned Reg) { case AMDGPU::SRC_EXECZ: case AMDGPU::SRC_SCC: return true; + case AMDGPU::SGPR_NULL: + return true; default: return false; } @@ -1870,8 +1899,10 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) { case 2: return AMDGPU::VReg_64RegClassID; case 3: return AMDGPU::VReg_96RegClassID; case 4: return AMDGPU::VReg_128RegClassID; + case 5: return AMDGPU::VReg_160RegClassID; case 8: return AMDGPU::VReg_256RegClassID; case 16: return AMDGPU::VReg_512RegClassID; + case 32: return AMDGPU::VReg_1024RegClassID; } } else if (Is == IS_TTMP) { switch (RegWidth) { @@ -1944,7 +1975,7 @@ static unsigned getSpecialRegForName(StringRef RegName) { .Case("tba_lo", AMDGPU::TBA_LO) .Case("tba_hi", AMDGPU::TBA_HI) .Case("null", AMDGPU::SGPR_NULL) - .Default(0); + .Default(AMDGPU::NoRegister); } bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, @@ -1959,8 +1990,7 @@ bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, } bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, - RegisterKind RegKind, unsigned Reg1, - unsigned RegNum) { + RegisterKind RegKind, unsigned Reg1) { switch (RegKind) { case IS_SPECIAL: if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { @@ -2008,14 +2038,37 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, } } -static const StringRef Registers[] = { - { "v" }, - { "s" }, - { "ttmp" }, - { "acc" }, - { "a" }, +struct RegInfo { + StringLiteral Name; + RegisterKind Kind; }; +static constexpr RegInfo RegularRegisters[] = { + {{"v"}, IS_VGPR}, + {{"s"}, IS_SGPR}, + {{"ttmp"}, IS_TTMP}, + {{"acc"}, IS_AGPR}, + {{"a"}, IS_AGPR}, +}; + +static bool isRegularReg(RegisterKind Kind) { + return Kind == IS_VGPR || + Kind == IS_SGPR || + Kind == IS_TTMP || + Kind == IS_AGPR; +} + +static const RegInfo* getRegularRegInfo(StringRef Str) { + for (const RegInfo &Reg : RegularRegisters) + if (Str.startswith(Reg.Name)) + return &Reg; + return nullptr; +} + +static bool getRegNum(StringRef Str, unsigned& Num) { + return !Str.getAsInteger(10, Num); +} + bool AMDGPUAsmParser::isRegister(const AsmToken &Token, const AsmToken &NextToken) const { @@ -2029,24 +2082,24 @@ AMDGPUAsmParser::isRegister(const AsmToken &Token, // A single register like s0 or a range of registers like s[0:1] - StringRef RegName = Token.getString(); - - for (StringRef Reg : Registers) { - if (RegName.startswith(Reg)) { - if (Reg.size() < RegName.size()) { - unsigned RegNum; - // A single register with an index: rXX - if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum)) - return true; - } else { - // A range of registers: r[XX:YY]. - if (NextToken.is(AsmToken::LBrac)) - return true; - } + StringRef Str = Token.getString(); + const RegInfo *Reg = getRegularRegInfo(Str); + if (Reg) { + StringRef RegName = Reg->Name; + StringRef RegSuffix = Str.substr(RegName.size()); + if (!RegSuffix.empty()) { + unsigned Num; + // A single register with an index: rXX + if (getRegNum(RegSuffix, Num)) + return true; + } else { + // A range of registers: r[XX:YY]. + if (NextToken.is(AsmToken::LBrac)) + return true; } } - return getSpecialRegForName(RegName); + return getSpecialRegForName(Str) != AMDGPU::NoRegister; } bool @@ -2055,137 +2108,161 @@ AMDGPUAsmParser::isRegister() return isRegister(getToken(), peekToken()); } -bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, - unsigned &RegNum, unsigned &RegWidth, - unsigned *DwordRegIndex) { - if (DwordRegIndex) { *DwordRegIndex = 0; } - const MCRegisterInfo *TRI = getContext().getRegisterInfo(); - if (getLexer().is(AsmToken::Identifier)) { - StringRef RegName = Parser.getTok().getString(); - if ((Reg = getSpecialRegForName(RegName))) { - Parser.Lex(); - RegKind = IS_SPECIAL; - } else { - unsigned RegNumIndex = 0; - if (RegName[0] == 'v') { - RegNumIndex = 1; - RegKind = IS_VGPR; - } else if (RegName[0] == 's') { - RegNumIndex = 1; - RegKind = IS_SGPR; - } else if (RegName[0] == 'a') { - RegNumIndex = RegName.startswith("acc") ? 3 : 1; - RegKind = IS_AGPR; - } else if (RegName.startswith("ttmp")) { - RegNumIndex = strlen("ttmp"); - RegKind = IS_TTMP; - } else { - return false; - } - if (RegName.size() > RegNumIndex) { - // Single 32-bit register: vXX. - if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) - return false; - Parser.Lex(); - RegWidth = 1; - } else { - // Range of registers: v[XX:YY]. ":YY" is optional. - Parser.Lex(); - int64_t RegLo, RegHi; - if (getLexer().isNot(AsmToken::LBrac)) - return false; - Parser.Lex(); +unsigned +AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, + unsigned RegNum, + unsigned RegWidth) { - if (getParser().parseAbsoluteExpression(RegLo)) - return false; + assert(isRegularReg(RegKind)); - const bool isRBrace = getLexer().is(AsmToken::RBrac); - if (!isRBrace && getLexer().isNot(AsmToken::Colon)) - return false; - Parser.Lex(); - - if (isRBrace) { - RegHi = RegLo; - } else { - if (getParser().parseAbsoluteExpression(RegHi)) - return false; - - if (getLexer().isNot(AsmToken::RBrac)) - return false; - Parser.Lex(); - } - RegNum = (unsigned) RegLo; - RegWidth = (RegHi - RegLo) + 1; - } - } - } else if (getLexer().is(AsmToken::LBrac)) { - // List of consecutive registers: [s0,s1,s2,s3] - Parser.Lex(); - if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) - return false; - if (RegWidth != 1) - return false; - RegisterKind RegKind1; - unsigned Reg1, RegNum1, RegWidth1; - do { - if (getLexer().is(AsmToken::Comma)) { - Parser.Lex(); - } else if (getLexer().is(AsmToken::RBrac)) { - Parser.Lex(); - break; - } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { - if (RegWidth1 != 1) { - return false; - } - if (RegKind1 != RegKind) { - return false; - } - if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { - return false; - } - } else { - return false; - } - } while (true); - } else { - return false; + unsigned AlignSize = 1; + if (RegKind == IS_SGPR || RegKind == IS_TTMP) { + // SGPR and TTMP registers must be aligned. + // Max required alignment is 4 dwords. + AlignSize = std::min(RegWidth, 4u); } - switch (RegKind) { - case IS_SPECIAL: + + if (RegNum % AlignSize != 0) + return AMDGPU::NoRegister; + + unsigned RegIdx = RegNum / AlignSize; + int RCID = getRegClass(RegKind, RegWidth); + if (RCID == -1) + return AMDGPU::NoRegister; + + const MCRegisterInfo *TRI = getContext().getRegisterInfo(); + const MCRegisterClass RC = TRI->getRegClass(RCID); + if (RegIdx >= RC.getNumRegs()) + return AMDGPU::NoRegister; + + return RC.getRegister(RegIdx); +} + +bool +AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { + int64_t RegLo, RegHi; + if (!trySkipToken(AsmToken::LBrac)) + return false; + + if (!parseExpr(RegLo)) + return false; + + if (trySkipToken(AsmToken::Colon)) { + if (!parseExpr(RegHi)) + return false; + } else { + RegHi = RegLo; + } + + if (!trySkipToken(AsmToken::RBrac)) + return false; + + if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi) + return false; + + Num = static_cast(RegLo); + Width = (RegHi - RegLo) + 1; + return true; +} + +unsigned +AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, + unsigned &RegNum, + unsigned &RegWidth) { + assert(isToken(AsmToken::Identifier)); + unsigned Reg = getSpecialRegForName(getTokenStr()); + if (Reg) { RegNum = 0; RegWidth = 1; - break; - case IS_VGPR: - case IS_SGPR: - case IS_AGPR: - case IS_TTMP: - { - unsigned Size = 1; - if (RegKind == IS_SGPR || RegKind == IS_TTMP) { - // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. - Size = std::min(RegWidth, 4u); - } - if (RegNum % Size != 0) - return false; - if (DwordRegIndex) { *DwordRegIndex = RegNum; } - RegNum = RegNum / Size; - int RCID = getRegClass(RegKind, RegWidth); - if (RCID == -1) - return false; - const MCRegisterClass RC = TRI->getRegClass(RCID); - if (RegNum >= RC.getNumRegs()) - return false; - Reg = RC.getRegister(RegNum); - break; + RegKind = IS_SPECIAL; + lex(); // skip register name + } + return Reg; +} + +unsigned +AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, + unsigned &RegNum, + unsigned &RegWidth) { + assert(isToken(AsmToken::Identifier)); + StringRef RegName = getTokenStr(); + + const RegInfo *RI = getRegularRegInfo(RegName); + if (!RI) + return AMDGPU::NoRegister; + lex(); // skip register name + + RegKind = RI->Kind; + StringRef RegSuffix = RegName.substr(RI->Name.size()); + if (!RegSuffix.empty()) { + // Single 32-bit register: vXX. + if (!getRegNum(RegSuffix, RegNum)) + return AMDGPU::NoRegister; + RegWidth = 1; + } else { + // Range of registers: v[XX:YY]. ":YY" is optional. + if (!ParseRegRange(RegNum, RegWidth)) + return AMDGPU::NoRegister; } - default: - llvm_unreachable("unexpected register kind"); + return getRegularReg(RegKind, RegNum, RegWidth); +} + +unsigned +AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, + unsigned &RegNum, + unsigned &RegWidth) { + unsigned Reg = AMDGPU::NoRegister; + + if (!trySkipToken(AsmToken::LBrac)) + return AMDGPU::NoRegister; + + // List of consecutive registers, e.g.: [s0,s1,s2,s3] + + if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) + return AMDGPU::NoRegister; + if (RegWidth != 1) + return AMDGPU::NoRegister; + + for (; trySkipToken(AsmToken::Comma); ) { + RegisterKind NextRegKind; + unsigned NextReg, NextRegNum, NextRegWidth; + + if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth)) + return AMDGPU::NoRegister; + if (NextRegWidth != 1) + return AMDGPU::NoRegister; + if (NextRegKind != RegKind) + return AMDGPU::NoRegister; + if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg)) + return AMDGPU::NoRegister; } - if (!subtargetHasRegister(*TRI, Reg)) - return false; - return true; + if (!trySkipToken(AsmToken::RBrac)) + return AMDGPU::NoRegister; + + if (isRegularReg(RegKind)) + Reg = getRegularReg(RegKind, RegNum, RegWidth); + + return Reg; +} + +bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, + unsigned &Reg, + unsigned &RegNum, + unsigned &RegWidth) { + Reg = AMDGPU::NoRegister; + + if (isToken(AsmToken::Identifier)) { + Reg = ParseSpecialReg(RegKind, RegNum, RegWidth); + if (Reg == AMDGPU::NoRegister) + Reg = ParseRegularReg(RegKind, RegNum, RegWidth); + } else { + Reg = ParseRegList(RegKind, RegNum, RegWidth); + } + + const MCRegisterInfo *TRI = getContext().getRegisterInfo(); + return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg); } Optional @@ -2241,18 +2318,18 @@ std::unique_ptr AMDGPUAsmParser::parseRegister() { SMLoc StartLoc = Tok.getLoc(); SMLoc EndLoc = Tok.getEndLoc(); RegisterKind RegKind; - unsigned Reg, RegNum, RegWidth, DwordRegIndex; + unsigned Reg, RegNum, RegWidth; - if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { + if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { //FIXME: improve error messages (bug 41303). Error(StartLoc, "not a valid operand."); return nullptr; } if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { - if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) + if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) return nullptr; } else - KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); + KernelScope.usesRegister(RegKind, RegNum, RegWidth); return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); } @@ -2648,7 +2725,6 @@ unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { case AMDGPU::VCC_LO: case AMDGPU::VCC_HI: case AMDGPU::M0: - case AMDGPU::SGPR_NULL: return Reg; default: break; @@ -2697,13 +2773,38 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, } } +unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { + if (!isGFX10()) + return 1; + + switch (Opcode) { + // 64-bit shift instructions can use only one scalar value input + case AMDGPU::V_LSHLREV_B64: + case AMDGPU::V_LSHLREV_B64_gfx10: + case AMDGPU::V_LSHL_B64: + case AMDGPU::V_LSHRREV_B64: + case AMDGPU::V_LSHRREV_B64_gfx10: + case AMDGPU::V_LSHR_B64: + case AMDGPU::V_ASHRREV_I64: + case AMDGPU::V_ASHRREV_I64_gfx10: + case AMDGPU::V_ASHR_I64: + return 1; + default: + return 2; + } +} + bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { const MCOperand &MO = Inst.getOperand(OpIdx); if (MO.isImm()) { return !isInlineConstant(Inst, OpIdx); + } else if (MO.isReg()) { + auto Reg = MO.getReg(); + const MCRegisterInfo *TRI = getContext().getRegisterInfo(); + return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; + } else { + return true; } - return !MO.isReg() || - isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); } bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { @@ -2782,10 +2883,7 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { } ConstantBusUseCount += NumLiterals; - if (isGFX10()) - return ConstantBusUseCount <= 2; - - return ConstantBusUseCount <= 1; + return ConstantBusUseCount <= getConstantBusLimit(Opcode); } bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { @@ -3212,6 +3310,7 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { const int OpIndices[] = { Src0Idx, Src1Idx }; + unsigned NumExprs = 0; unsigned NumLiterals = 0; uint32_t LiteralValue; @@ -3219,19 +3318,21 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { if (OpIdx == -1) break; const MCOperand &MO = Inst.getOperand(OpIdx); - if (MO.isImm() && - // Exclude special imm operands (like that used by s_set_gpr_idx_on) - AMDGPU::isSISrcOperand(Desc, OpIdx) && - !isInlineConstant(Inst, OpIdx)) { - uint32_t Value = static_cast(MO.getImm()); - if (NumLiterals == 0 || LiteralValue != Value) { - LiteralValue = Value; - ++NumLiterals; + // Exclude special imm operands (like that used by s_set_gpr_idx_on) + if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { + if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { + uint32_t Value = static_cast(MO.getImm()); + if (NumLiterals == 0 || LiteralValue != Value) { + LiteralValue = Value; + ++NumLiterals; + } + } else if (MO.isExpr()) { + ++NumExprs; } } } - return NumLiterals <= 1; + return NumLiterals + NumExprs <= 1; } bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { @@ -3267,6 +3368,7 @@ bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; + unsigned NumExprs = 0; unsigned NumLiterals = 0; uint32_t LiteralValue; @@ -3274,17 +3376,26 @@ bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { if (OpIdx == -1) break; const MCOperand &MO = Inst.getOperand(OpIdx); - if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx)) + if (!MO.isImm() && !MO.isExpr()) + continue; + if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) continue; - if (!isInlineConstant(Inst, OpIdx)) { + if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && + getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) + return false; + + if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { uint32_t Value = static_cast(MO.getImm()); if (NumLiterals == 0 || LiteralValue != Value) { LiteralValue = Value; ++NumLiterals; } + } else if (MO.isExpr()) { + ++NumExprs; } } + NumLiterals += NumExprs; return !NumLiterals || (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); @@ -3607,37 +3718,44 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, Val, ValRange); - UserSGPRCount += 4; + if (Val) + UserSGPRCount += 4; } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, ValRange); - UserSGPRCount += 2; + if (Val) + UserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, ValRange); - UserSGPRCount += 2; + if (Val) + UserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, Val, ValRange); - UserSGPRCount += 2; + if (Val) + UserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, ValRange); - UserSGPRCount += 2; + if (Val) + UserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, ValRange); - UserSGPRCount += 2; + if (Val) + UserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, Val, ValRange); - UserSGPRCount += 1; + if (Val) + UserSGPRCount += 1; } else if (ID == ".amdhsa_wavefront_size32") { if (IVersion.Major < 10) return getParser().Error(IDRange.Start, "directive requires gfx10+", @@ -5224,6 +5342,23 @@ AMDGPUAsmParser::parseExpr(int64_t &Imm) { return !getParser().parseAbsoluteExpression(Imm); } +bool +AMDGPUAsmParser::parseExpr(OperandVector &Operands) { + SMLoc S = getLoc(); + + const MCExpr *Expr; + if (Parser.parseExpression(Expr)) + return false; + + int64_t IntVal; + if (Expr->evaluateAsAbsolute(IntVal)) { + Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); + } else { + Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); + } + return true; +} + bool AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { if (isToken(AsmToken::String)) { @@ -5605,25 +5740,29 @@ bool AMDGPUOperand::isGPRIdxMode() const { OperandMatchResultTy AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { - SMLoc S = Parser.getTok().getLoc(); - switch (getLexer().getKind()) { - default: return MatchOperand_ParseFail; - case AsmToken::Integer: { - int64_t Imm; - if (getParser().parseAbsoluteExpression(Imm)) - return MatchOperand_ParseFail; - Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); - return MatchOperand_Success; + // Make sure we are not parsing something + // that looks like a label or an expression but is not. + // This will improve error messages. + if (isRegister() || isModifier()) + return MatchOperand_NoMatch; + + if (parseExpr(Operands)) { + + AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); + assert(Opr.isImm() || Opr.isExpr()); + SMLoc Loc = Opr.getStartLoc(); + + // Currently we do not support arbitrary expressions as branch targets. + // Only labels and absolute expressions are accepted. + if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { + Error(Loc, "expected an absolute expression or a label"); + } else if (Opr.isImm() && !Opr.isS16Imm()) { + Error(Loc, "expected a 16-bit signed jump offset"); } - - case AsmToken::Identifier: - Operands.push_back(AMDGPUOperand::CreateExpr(this, - MCSymbolRefExpr::create(getContext().getOrCreateSymbol( - Parser.getTok().getString()), getContext()), S)); - Parser.Lex(); - return MatchOperand_Success; } + + return MatchOperand_Success; // avoid excessive error messages } //===----------------------------------------------------------------------===// @@ -5908,6 +6047,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, + {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, @@ -5941,8 +6081,6 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { }; OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { - unsigned size = Operands.size(); - assert(size > 0); OperandMatchResultTy res = parseOptionalOpr(Operands); @@ -5957,17 +6095,13 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operan // to make sure autogenerated parser of custom operands never hit hardcoded // mandatory operands. - if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { + for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { + if (res != MatchOperand_Success || + isToken(AsmToken::EndOfStatement)) + break; - // We have parsed the first optional operand. - // Parse as many operands as necessary to skip all mandatory operands. - - for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { - if (res != MatchOperand_Success || - getLexer().is(AsmToken::EndOfStatement)) break; - if (getLexer().is(AsmToken::Comma)) Parser.Lex(); - res = parseOptionalOpr(Operands); - } + trySkipToken(AsmToken::Comma); + res = parseOptionalOpr(Operands); } return res; @@ -6682,7 +6816,11 @@ void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { } void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { - cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); + cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); +} + +void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { + cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); } void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { @@ -6690,11 +6828,14 @@ void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { } void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, - uint64_t BasicInstType, bool skipVcc) { + uint64_t BasicInstType, + bool SkipDstVcc, + bool SkipSrcVcc) { using namespace llvm::AMDGPU::SDWA; OptionalImmIndexMap OptionalIdx; - bool skippedVcc = false; + bool SkipVcc = SkipDstVcc || SkipSrcVcc; + bool SkippedVcc = false; unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); @@ -6704,19 +6845,21 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, for (unsigned E = Operands.size(); I != E; ++I) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); - if (skipVcc && !skippedVcc && Op.isReg() && + if (SkipVcc && !SkippedVcc && Op.isReg() && (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. // Skip VCC only if we didn't skip it on previous iteration. + // Note that src0 and src1 occupy 2 slots each because of modifiers. if (BasicInstType == SIInstrFlags::VOP2 && - (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { - skippedVcc = true; + ((SkipDstVcc && Inst.getNumOperands() == 1) || + (SkipSrcVcc && Inst.getNumOperands() == 5))) { + SkippedVcc = true; continue; } else if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) { - skippedVcc = true; + SkippedVcc = true; continue; } } @@ -6728,7 +6871,7 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, } else { llvm_unreachable("Invalid operand type"); } - skippedVcc = false; + SkippedVcc = false; } if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && @@ -6849,6 +6992,14 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; case MCK_AttrChan: return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; + case MCK_SReg_64: + case MCK_SReg_64_XEXEC: + // Null is defined as a 32-bit register but + // it should also be enabled with 64-bit operands. + // The following code enables it for SReg_64 operands + // used as source and destination. Remaining source + // operands are handled in isInlinableImm. + return Operand.isNull() ? Match_Success : Match_InvalidOperand; default: return Match_InvalidOperand; } diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td index 62a19d848af..1b12550aed8 100644 --- a/lib/Target/AMDGPU/BUFInstructions.td +++ b/lib/Target/AMDGPU/BUFInstructions.td @@ -7,13 +7,13 @@ //===----------------------------------------------------------------------===// def MUBUFAddr32 : ComplexPattern; -def MUBUFAddr64 : ComplexPattern; +def MUBUFAddr64 : ComplexPattern; def MUBUFAddr64Atomic : ComplexPattern; def MUBUFScratchOffen : ComplexPattern; def MUBUFScratchOffset : ComplexPattern; -def MUBUFOffset : ComplexPattern; +def MUBUFOffset : ComplexPattern; def MUBUFOffsetNoGLC : ComplexPattern; def MUBUFOffsetAtomic : ComplexPattern; @@ -54,6 +54,17 @@ class MTBUFAddr64Table { // MTBUF classes //===----------------------------------------------------------------------===// +class MTBUFGetBaseOpcode { + string ret = !subst("FORMAT_XY", "FORMAT_X", + !subst("FORMAT_XYZ", "FORMAT_X", + !subst("FORMAT_XYZW", "FORMAT_X", Op))); +} + +class getMTBUFElements { + int ret = 1; +} + + class MTBUF_Pseudo pattern=[]> : InstSI, @@ -67,6 +78,9 @@ class MTBUF_Pseudo (NAME); + Instruction BaseOpcode = !cast(MTBUFGetBaseOpcode.ret); + let VM_CNT = 1; let EXP_CNT = 1; let MTBUF = 1; @@ -90,6 +104,7 @@ class MTBUF_Pseudo has_offset = 1; bits<1> has_slc = 1; bits<1> has_tfe = 1; + bits<4> elements = 0; } class MTBUF_Real : @@ -126,17 +141,17 @@ class getMTBUFInsDA vdataList, RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList)); dag InsNoData = !if(!empty(vaddrList), (ins SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc), + offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz), (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc) + offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz) ); dag InsData = !if(!empty(vaddrList), (ins vdataClass:$vdata, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc, - SLC:$slc, TFE:$tfe, DLC:$dlc), + SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz), (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc, - SLC:$slc, TFE:$tfe, DLC:$dlc) + SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz) ); dag ret = !if(!empty(vdataList), InsNoData, InsData); } @@ -181,51 +196,54 @@ class MTBUF_SetupAddr { class MTBUF_Load_Pseudo pattern=[], // Workaround bug bz30254 int addrKindCopy = addrKind> : MTBUF_Pseudo.ret, - " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe$dlc", + " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe$dlc$swz", pattern>, MTBUF_SetupAddr { let PseudoInstr = opName # "_" # getAddrName.ret; let mayLoad = 1; let mayStore = 0; + let elements = elems; } multiclass MTBUF_Pseudo_Loads { - def _OFFSET : MTBUF_Load_Pseudo , + i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)))]>, MTBUFAddr64Table<0, NAME>; - def _ADDR64 : MTBUF_Load_Pseudo , + i8:$format, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)))]>, MTBUFAddr64Table<1, NAME>; - def _OFFEN : MTBUF_Load_Pseudo ; - def _IDXEN : MTBUF_Load_Pseudo ; - def _BOTHEN : MTBUF_Load_Pseudo ; + def _OFFEN : MTBUF_Load_Pseudo ; + def _IDXEN : MTBUF_Load_Pseudo ; + def _BOTHEN : MTBUF_Load_Pseudo ; let DisableWQM = 1 in { - def _OFFSET_exact : MTBUF_Load_Pseudo ; - def _OFFEN_exact : MTBUF_Load_Pseudo ; - def _IDXEN_exact : MTBUF_Load_Pseudo ; - def _BOTHEN_exact : MTBUF_Load_Pseudo ; + def _OFFSET_exact : MTBUF_Load_Pseudo ; + def _OFFEN_exact : MTBUF_Load_Pseudo ; + def _IDXEN_exact : MTBUF_Load_Pseudo ; + def _BOTHEN_exact : MTBUF_Load_Pseudo ; } } class MTBUF_Store_Pseudo pattern=[], // Workaround bug bz30254 int addrKindCopy = addrKind, @@ -233,39 +251,40 @@ class MTBUF_Store_Pseudo .ret, - " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe$dlc", + " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe$dlc$swz", pattern>, MTBUF_SetupAddr { let PseudoInstr = opName # "_" # getAddrName.ret; let mayLoad = 0; let mayStore = 1; + let elements = elems; } multiclass MTBUF_Pseudo_Stores { - def _OFFSET : MTBUF_Store_Pseudo , + i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, MTBUFAddr64Table<0, NAME>; - def _ADDR64 : MTBUF_Store_Pseudo , + i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, MTBUFAddr64Table<1, NAME>; - def _OFFEN : MTBUF_Store_Pseudo ; - def _IDXEN : MTBUF_Store_Pseudo ; - def _BOTHEN : MTBUF_Store_Pseudo ; + def _OFFEN : MTBUF_Store_Pseudo ; + def _IDXEN : MTBUF_Store_Pseudo ; + def _BOTHEN : MTBUF_Store_Pseudo ; let DisableWQM = 1 in { - def _OFFSET_exact : MTBUF_Store_Pseudo ; - def _OFFEN_exact : MTBUF_Store_Pseudo ; - def _IDXEN_exact : MTBUF_Store_Pseudo ; - def _BOTHEN_exact : MTBUF_Store_Pseudo ; + def _OFFSET_exact : MTBUF_Store_Pseudo ; + def _OFFEN_exact : MTBUF_Store_Pseudo ; + def _IDXEN_exact : MTBUF_Store_Pseudo ; + def _BOTHEN_exact : MTBUF_Store_Pseudo ; } } @@ -320,7 +339,7 @@ class MUBUF_Pseudo has_offset = 1; bits<1> has_slc = 1; bits<1> has_tfe = 1; - bits<4> dwords = 0; + bits<4> elements = 0; } class MUBUF_Real : @@ -393,18 +412,30 @@ class getMUBUFInsDA vdataList, ); dag ret = !con( !if(!empty(vdataList), InsNoData, InsData), - !if(isLds, (ins DLC:$dlc), (ins TFE:$tfe, DLC:$dlc)) + !if(isLds, (ins DLC:$dlc, SWZ:$swz), (ins TFE:$tfe, DLC:$dlc,SWZ:$swz)) ); } -class getMUBUFDwords { - string regClassAsInt = !cast(regClass); +class getMUBUFElements { + // eq does not support ValueType for some reason. + string vtAsStr = !cast(vt); + int ret = - !if(!eq(regClassAsInt, !cast(VGPR_32)), 1, - !if(!eq(regClassAsInt, !cast(VReg_64)), 2, - !if(!eq(regClassAsInt, !cast(VReg_96)), 3, - !if(!eq(regClassAsInt, !cast(VReg_128)), 4, - 0)))); + !if(!eq(vtAsStr, "f16"), 1, + !if(!eq(vtAsStr, "v2f16"), 2, + !if(!eq(vtAsStr, "v3f16"), 3, + !if(!eq(vtAsStr, "v4f16"), 4, + !if(!eq(vt.Size, 32), 1, + !if(!eq(vt.Size, 64), 2, + !if(!eq(vt.Size, 96), 3, + !if(!eq(vt.Size, 128), 4, 0) + ) + ) + ) + ) + ) + ) + ); } class getMUBUFIns vdataList=[], bit isLds = 0> { @@ -442,18 +473,18 @@ class MUBUF_SetupAddr { class MUBUF_Load_Pseudo pattern=[], // Workaround bug bz30254 int addrKindCopy = addrKind> : MUBUF_Pseudo.ret:$vdata), !con(getMUBUFIns.ret, - !if(HasTiedDest, (ins vdataClass:$vdata_in), (ins))), + !if(HasTiedDest, (ins getVregSrcForVT.ret:$vdata_in), (ins))), " $vdata, " # getMUBUFAsmOps.ret # "$glc$slc" # - !if(isLds, " lds", "$tfe") # "$dlc", + !if(isLds, " lds", "$tfe") # "$dlc" # "$swz", pattern>, MUBUF_SetupAddr { let PseudoInstr = opName # !if(isLds, "_lds", "") # @@ -467,19 +498,19 @@ class MUBUF_Load_Pseudo .ret; + let elements = getMUBUFElements.ret; } class MUBUF_Offset_Load_Pat : Pat < - (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))), - (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)) + (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), + (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)) >; class MUBUF_Addr64_Load_Pat : Pat < - (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))), - (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)) + (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), + (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)) >; multiclass MUBUF_Pseudo_Load_Pats { @@ -490,89 +521,87 @@ multiclass MUBUF_Pseudo_Load_Pats { - def _OFFSET : MUBUF_Load_Pseudo , + def _OFFSET : MUBUF_Load_Pseudo , MUBUFAddr64Table<0, NAME # !if(isLds, "_LDS", "")>; - def _ADDR64 : MUBUF_Load_Pseudo , + def _ADDR64 : MUBUF_Load_Pseudo , MUBUFAddr64Table<1, NAME # !if(isLds, "_LDS", "")>; - def _OFFEN : MUBUF_Load_Pseudo ; - def _IDXEN : MUBUF_Load_Pseudo ; - def _BOTHEN : MUBUF_Load_Pseudo ; + def _OFFEN : MUBUF_Load_Pseudo ; + def _IDXEN : MUBUF_Load_Pseudo ; + def _BOTHEN : MUBUF_Load_Pseudo ; let DisableWQM = 1 in { - def _OFFSET_exact : MUBUF_Load_Pseudo ; - def _OFFEN_exact : MUBUF_Load_Pseudo ; - def _IDXEN_exact : MUBUF_Load_Pseudo ; - def _BOTHEN_exact : MUBUF_Load_Pseudo ; + def _OFFSET_exact : MUBUF_Load_Pseudo ; + def _OFFEN_exact : MUBUF_Load_Pseudo ; + def _IDXEN_exact : MUBUF_Load_Pseudo ; + def _BOTHEN_exact : MUBUF_Load_Pseudo ; } } -multiclass MUBUF_Pseudo_Loads_Lds { - defm NAME : MUBUF_Pseudo_Loads; - defm _LDS : MUBUF_Pseudo_Loads; + defm NAME : MUBUF_Pseudo_Loads; + defm _LDS : MUBUF_Pseudo_Loads; } class MUBUF_Store_Pseudo pattern=[], // Workaround bug bz30254 - int addrKindCopy = addrKind, - RegisterClass vdataClassCopy = vdataClass> + int addrKindCopy = addrKind> : MUBUF_Pseudo.ret, - " $vdata, " # getMUBUFAsmOps.ret # "$glc$slc$tfe$dlc", + getMUBUFIns.ret]>.ret, + " $vdata, " # getMUBUFAsmOps.ret # "$glc$slc$tfe$dlc$swz", pattern>, MUBUF_SetupAddr { let PseudoInstr = opName # "_" # getAddrName.ret; let mayLoad = 0; let mayStore = 1; let maybeAtomic = 1; - let dwords = getMUBUFDwords.ret; + let elements = getMUBUFElements.ret; } -multiclass MUBUF_Pseudo_Stores { - def _OFFSET : MUBUF_Store_Pseudo , + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, MUBUFAddr64Table<0, NAME>; - def _ADDR64 : MUBUF_Store_Pseudo , + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, MUBUFAddr64Table<1, NAME>; - def _OFFEN : MUBUF_Store_Pseudo ; - def _IDXEN : MUBUF_Store_Pseudo ; - def _BOTHEN : MUBUF_Store_Pseudo ; + def _OFFEN : MUBUF_Store_Pseudo ; + def _IDXEN : MUBUF_Store_Pseudo ; + def _BOTHEN : MUBUF_Store_Pseudo ; let DisableWQM = 1 in { - def _OFFSET_exact : MUBUF_Store_Pseudo ; - def _OFFEN_exact : MUBUF_Store_Pseudo ; - def _IDXEN_exact : MUBUF_Store_Pseudo ; - def _BOTHEN_exact : MUBUF_Store_Pseudo ; + def _OFFSET_exact : MUBUF_Store_Pseudo ; + def _OFFEN_exact : MUBUF_Store_Pseudo ; + def _IDXEN_exact : MUBUF_Store_Pseudo ; + def _BOTHEN_exact : MUBUF_Store_Pseudo ; } } class MUBUF_Pseudo_Store_Lds : MUBUF_Pseudo { + (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc, SWZ:$swz), + " $srsrc, $soffset$offset lds$glc$slc$swz"> { let mayLoad = 0; let mayStore = 1; let maybeAtomic = 1; @@ -686,7 +715,7 @@ multiclass MUBUF_Pseudo_Atomics_NO_RTN .ret> { + bit isFP = isFloatType.ret> { let FPAtomic = isFP in def _OFFSET : MUBUF_AtomicNoRet_Pseudo , MUBUFAddr64Table <0, NAME>; @@ -710,7 +739,7 @@ multiclass MUBUF_Pseudo_Atomics_RTN .ret> { + bit isFP = isFloatType.ret> { let FPAtomic = isFP in def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo ; defm BUFFER_LOAD_FORMAT_XY : MUBUF_Pseudo_Loads < - "buffer_load_format_xy", VReg_64 + "buffer_load_format_xy", v2f32 >; defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Pseudo_Loads < - "buffer_load_format_xyz", VReg_96 + "buffer_load_format_xyz", v3f32 >; defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Pseudo_Loads < - "buffer_load_format_xyzw", VReg_128 + "buffer_load_format_xyzw", v4f32 >; defm BUFFER_STORE_FORMAT_X : MUBUF_Pseudo_Stores < - "buffer_store_format_x", VGPR_32 + "buffer_store_format_x", f32 >; defm BUFFER_STORE_FORMAT_XY : MUBUF_Pseudo_Stores < - "buffer_store_format_xy", VReg_64 + "buffer_store_format_xy", v2f32 >; defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Pseudo_Stores < - "buffer_store_format_xyz", VReg_96 + "buffer_store_format_xyz", v3f32 >; defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Pseudo_Stores < - "buffer_store_format_xyzw", VReg_128 + "buffer_store_format_xyzw", v4f32 >; let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in { defm BUFFER_LOAD_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Loads < - "buffer_load_format_d16_x", VGPR_32 + "buffer_load_format_d16_x", i32 >; defm BUFFER_LOAD_FORMAT_D16_XY_gfx80 : MUBUF_Pseudo_Loads < - "buffer_load_format_d16_xy", VReg_64 + "buffer_load_format_d16_xy", v2i32 >; defm BUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MUBUF_Pseudo_Loads < - "buffer_load_format_d16_xyz", VReg_96 + "buffer_load_format_d16_xyz", v3i32 >; defm BUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MUBUF_Pseudo_Loads < - "buffer_load_format_d16_xyzw", VReg_128 + "buffer_load_format_d16_xyzw", v4i32 >; defm BUFFER_STORE_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Stores < - "buffer_store_format_d16_x", VGPR_32 + "buffer_store_format_d16_x", i32 >; defm BUFFER_STORE_FORMAT_D16_XY_gfx80 : MUBUF_Pseudo_Stores < - "buffer_store_format_d16_xy", VReg_64 + "buffer_store_format_d16_xy", v2i32 >; defm BUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MUBUF_Pseudo_Stores < - "buffer_store_format_d16_xyz", VReg_96 + "buffer_store_format_d16_xyz", v3i32 >; defm BUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MUBUF_Pseudo_Stores < - "buffer_store_format_d16_xyzw", VReg_128 + "buffer_store_format_d16_xyzw", v4i32 >; } // End HasUnpackedD16VMem. let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in { defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Pseudo_Loads < - "buffer_load_format_d16_x", VGPR_32 + "buffer_load_format_d16_x", f16 >; defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Pseudo_Loads < - "buffer_load_format_d16_xy", VGPR_32 + "buffer_load_format_d16_xy", v2f16 >; defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Pseudo_Loads < - "buffer_load_format_d16_xyz", VReg_64 + "buffer_load_format_d16_xyz", v3f16 >; defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Pseudo_Loads < - "buffer_load_format_d16_xyzw", VReg_64 + "buffer_load_format_d16_xyzw", v4f16 >; defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Pseudo_Stores < - "buffer_store_format_d16_x", VGPR_32 + "buffer_store_format_d16_x", f16 >; defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Pseudo_Stores < - "buffer_store_format_d16_xy", VGPR_32 + "buffer_store_format_d16_xy", v2f16 >; defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Pseudo_Stores < - "buffer_store_format_d16_xyz", VReg_64 + "buffer_store_format_d16_xyz", v3f16 >; defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Pseudo_Stores < - "buffer_store_format_d16_xyzw", VReg_64 + "buffer_store_format_d16_xyzw", v4f16 >; } // End HasPackedD16VMem. defm BUFFER_LOAD_UBYTE : MUBUF_Pseudo_Loads_Lds < - "buffer_load_ubyte", VGPR_32, i32 + "buffer_load_ubyte", i32 >; defm BUFFER_LOAD_SBYTE : MUBUF_Pseudo_Loads_Lds < - "buffer_load_sbyte", VGPR_32, i32 + "buffer_load_sbyte", i32 >; defm BUFFER_LOAD_USHORT : MUBUF_Pseudo_Loads_Lds < - "buffer_load_ushort", VGPR_32, i32 + "buffer_load_ushort", i32 >; defm BUFFER_LOAD_SSHORT : MUBUF_Pseudo_Loads_Lds < - "buffer_load_sshort", VGPR_32, i32 + "buffer_load_sshort", i32 >; defm BUFFER_LOAD_DWORD : MUBUF_Pseudo_Loads_Lds < - "buffer_load_dword", VGPR_32, i32 + "buffer_load_dword", i32 >; defm BUFFER_LOAD_DWORDX2 : MUBUF_Pseudo_Loads < - "buffer_load_dwordx2", VReg_64, v2i32 + "buffer_load_dwordx2", v2i32 >; defm BUFFER_LOAD_DWORDX3 : MUBUF_Pseudo_Loads < - "buffer_load_dwordx3", VReg_96, v3i32 + "buffer_load_dwordx3", v3i32 >; defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads < - "buffer_load_dwordx4", VReg_128, v4i32 + "buffer_load_dwordx4", v4i32 >; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, extloadi8_global>; @@ -867,111 +896,111 @@ defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", v4i32, load_global>; // in at least GFX8+ chips. See Bug 37653. let SubtargetPredicate = isGFX8GFX9 in { defm BUFFER_LOAD_DWORDX2_LDS : MUBUF_Pseudo_Loads < - "buffer_load_dwordx2", VReg_64, v2i32, null_frag, 0, 1 + "buffer_load_dwordx2", v2i32, null_frag, 0, 1 >; defm BUFFER_LOAD_DWORDX3_LDS : MUBUF_Pseudo_Loads < - "buffer_load_dwordx3", VReg_96, untyped, null_frag, 0, 1 + "buffer_load_dwordx3", v3i32, null_frag, 0, 1 >; defm BUFFER_LOAD_DWORDX4_LDS : MUBUF_Pseudo_Loads < - "buffer_load_dwordx4", VReg_128, v4i32, null_frag, 0, 1 + "buffer_load_dwordx4", v4i32, null_frag, 0, 1 >; } defm BUFFER_STORE_BYTE : MUBUF_Pseudo_Stores < - "buffer_store_byte", VGPR_32, i32, truncstorei8_global + "buffer_store_byte", i32, truncstorei8_global >; defm BUFFER_STORE_SHORT : MUBUF_Pseudo_Stores < - "buffer_store_short", VGPR_32, i32, truncstorei16_global + "buffer_store_short", i32, truncstorei16_global >; defm BUFFER_STORE_DWORD : MUBUF_Pseudo_Stores < - "buffer_store_dword", VGPR_32, i32, store_global + "buffer_store_dword", i32, store_global >; defm BUFFER_STORE_DWORDX2 : MUBUF_Pseudo_Stores < - "buffer_store_dwordx2", VReg_64, v2i32, store_global + "buffer_store_dwordx2", v2i32, store_global >; defm BUFFER_STORE_DWORDX3 : MUBUF_Pseudo_Stores < - "buffer_store_dwordx3", VReg_96, v3i32, store_global + "buffer_store_dwordx3", v3i32, store_global >; defm BUFFER_STORE_DWORDX4 : MUBUF_Pseudo_Stores < - "buffer_store_dwordx4", VReg_128, v4i32, store_global + "buffer_store_dwordx4", v4i32, store_global >; defm BUFFER_ATOMIC_SWAP : MUBUF_Pseudo_Atomics < - "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global + "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global_32 >; defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Pseudo_Atomics < "buffer_atomic_cmpswap", VReg_64, v2i32, null_frag >; defm BUFFER_ATOMIC_ADD : MUBUF_Pseudo_Atomics < - "buffer_atomic_add", VGPR_32, i32, atomic_add_global + "buffer_atomic_add", VGPR_32, i32, atomic_load_add_global_32 >; defm BUFFER_ATOMIC_SUB : MUBUF_Pseudo_Atomics < - "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global + "buffer_atomic_sub", VGPR_32, i32, atomic_load_sub_global_32 >; defm BUFFER_ATOMIC_SMIN : MUBUF_Pseudo_Atomics < - "buffer_atomic_smin", VGPR_32, i32, atomic_min_global + "buffer_atomic_smin", VGPR_32, i32, atomic_load_min_global_32 >; defm BUFFER_ATOMIC_UMIN : MUBUF_Pseudo_Atomics < - "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global + "buffer_atomic_umin", VGPR_32, i32, atomic_load_umin_global_32 >; defm BUFFER_ATOMIC_SMAX : MUBUF_Pseudo_Atomics < - "buffer_atomic_smax", VGPR_32, i32, atomic_max_global + "buffer_atomic_smax", VGPR_32, i32, atomic_load_max_global_32 >; defm BUFFER_ATOMIC_UMAX : MUBUF_Pseudo_Atomics < - "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global + "buffer_atomic_umax", VGPR_32, i32, atomic_load_umax_global_32 >; defm BUFFER_ATOMIC_AND : MUBUF_Pseudo_Atomics < - "buffer_atomic_and", VGPR_32, i32, atomic_and_global + "buffer_atomic_and", VGPR_32, i32, atomic_load_and_global_32 >; defm BUFFER_ATOMIC_OR : MUBUF_Pseudo_Atomics < - "buffer_atomic_or", VGPR_32, i32, atomic_or_global + "buffer_atomic_or", VGPR_32, i32, atomic_load_or_global_32 >; defm BUFFER_ATOMIC_XOR : MUBUF_Pseudo_Atomics < - "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global + "buffer_atomic_xor", VGPR_32, i32, atomic_load_xor_global_32 >; defm BUFFER_ATOMIC_INC : MUBUF_Pseudo_Atomics < - "buffer_atomic_inc", VGPR_32, i32, atomic_inc_global + "buffer_atomic_inc", VGPR_32, i32, atomic_inc_global_32 >; defm BUFFER_ATOMIC_DEC : MUBUF_Pseudo_Atomics < - "buffer_atomic_dec", VGPR_32, i32, atomic_dec_global + "buffer_atomic_dec", VGPR_32, i32, atomic_dec_global_32 >; defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_swap_x2", VReg_64, i64, atomic_swap_global + "buffer_atomic_swap_x2", VReg_64, i64, atomic_swap_global_64 >; defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Pseudo_Atomics < "buffer_atomic_cmpswap_x2", VReg_128, v2i64, null_frag >; defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_add_x2", VReg_64, i64, atomic_add_global + "buffer_atomic_add_x2", VReg_64, i64, atomic_load_add_global_64 >; defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_sub_x2", VReg_64, i64, atomic_sub_global + "buffer_atomic_sub_x2", VReg_64, i64, atomic_load_sub_global_64 >; defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_smin_x2", VReg_64, i64, atomic_min_global + "buffer_atomic_smin_x2", VReg_64, i64, atomic_load_min_global_64 >; defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_umin_x2", VReg_64, i64, atomic_umin_global + "buffer_atomic_umin_x2", VReg_64, i64, atomic_load_umin_global_64 >; defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_smax_x2", VReg_64, i64, atomic_max_global + "buffer_atomic_smax_x2", VReg_64, i64, atomic_load_max_global_64 >; defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_umax_x2", VReg_64, i64, atomic_umax_global + "buffer_atomic_umax_x2", VReg_64, i64, atomic_load_umax_global_64 >; defm BUFFER_ATOMIC_AND_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_and_x2", VReg_64, i64, atomic_and_global + "buffer_atomic_and_x2", VReg_64, i64, atomic_load_and_global_64 >; defm BUFFER_ATOMIC_OR_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_or_x2", VReg_64, i64, atomic_or_global + "buffer_atomic_or_x2", VReg_64, i64, atomic_load_or_global_64 >; defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_xor_x2", VReg_64, i64, atomic_xor_global + "buffer_atomic_xor_x2", VReg_64, i64, atomic_load_xor_global_64 >; defm BUFFER_ATOMIC_INC_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_inc_x2", VReg_64, i64, atomic_inc_global + "buffer_atomic_inc_x2", VReg_64, i64, atomic_inc_global_64 >; defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global + "buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global_64 >; let SubtargetPredicate = isGFX8GFX9 in { @@ -981,58 +1010,75 @@ def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">; let SubtargetPredicate = isGFX6 in { // isn't on CI & VI /* defm BUFFER_ATOMIC_RSUB : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub">; -defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap">; -defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics <"buffer_atomic_fmin">; -defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics <"buffer_atomic_fmax">; defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub_x2">; -defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap_x2">; -defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fmin_x2">; -defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fmax_x2">; */ def BUFFER_WBINVL1_SC : MUBUF_Invalidate <"buffer_wbinvl1_sc", int_amdgcn_buffer_wbinvl1_sc>; } +let SubtargetPredicate = isGFX6GFX7GFX10 in { + +defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics < + "buffer_atomic_fcmpswap", VReg_64, v2f32, null_frag +>; +defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics < + "buffer_atomic_fmin", VGPR_32, f32, null_frag +>; +defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics < + "buffer_atomic_fmax", VGPR_32, f32, null_frag +>; +defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_fcmpswap_x2", VReg_128, v2f64, null_frag +>; +defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_fmin_x2", VReg_64, f64, null_frag +>; +defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_fmax_x2", VReg_64, f64, null_frag +>; + +} + let SubtargetPredicate = HasD16LoadStore in { defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Pseudo_Loads < - "buffer_load_ubyte_d16", VGPR_32, i32, null_frag, 1 + "buffer_load_ubyte_d16", i32, null_frag, 1 >; defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Pseudo_Loads < - "buffer_load_ubyte_d16_hi", VGPR_32, i32, null_frag, 1 + "buffer_load_ubyte_d16_hi", i32, null_frag, 1 >; defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Pseudo_Loads < - "buffer_load_sbyte_d16", VGPR_32, i32, null_frag, 1 + "buffer_load_sbyte_d16", i32, null_frag, 1 >; defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Pseudo_Loads < - "buffer_load_sbyte_d16_hi", VGPR_32, i32, null_frag, 1 + "buffer_load_sbyte_d16_hi", i32, null_frag, 1 >; defm BUFFER_LOAD_SHORT_D16 : MUBUF_Pseudo_Loads < - "buffer_load_short_d16", VGPR_32, i32, null_frag, 1 + "buffer_load_short_d16", i32, null_frag, 1 >; defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Pseudo_Loads < - "buffer_load_short_d16_hi", VGPR_32, i32, null_frag, 1 + "buffer_load_short_d16_hi", i32, null_frag, 1 >; defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Pseudo_Stores < - "buffer_store_byte_d16_hi", VGPR_32, i32 + "buffer_store_byte_d16_hi", i32 >; defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Pseudo_Stores < - "buffer_store_short_d16_hi", VGPR_32, i32 + "buffer_store_short_d16_hi", i32 >; defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Pseudo_Loads < - "buffer_load_format_d16_hi_x", VGPR_32 + "buffer_load_format_d16_hi_x", i32 >; defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Pseudo_Stores < - "buffer_store_format_d16_hi_x", VGPR_32 + "buffer_store_format_d16_hi_x", i32 >; } // End HasD16LoadStore @@ -1043,10 +1089,10 @@ def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1", let SubtargetPredicate = HasAtomicFaddInsts in { defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN < - "buffer_atomic_add_f32", VGPR_32, f32, atomic_add_global + "buffer_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret >; defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN < - "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_add_global + "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret >; } // End SubtargetPredicate = HasAtomicFaddInsts @@ -1055,35 +1101,35 @@ defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN < // MTBUF Instructions //===----------------------------------------------------------------------===// -defm TBUFFER_LOAD_FORMAT_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_x", VGPR_32>; -defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_xy", VReg_64>; -defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_96>; -defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyzw", VReg_128>; -defm TBUFFER_STORE_FORMAT_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_x", VGPR_32>; -defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", VReg_64>; -defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_96>; -defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>; +defm TBUFFER_LOAD_FORMAT_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_x", VGPR_32, 1>; +defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_xy", VReg_64, 2>; +defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_96, 3>; +defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyzw", VReg_128, 4>; +defm TBUFFER_STORE_FORMAT_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_x", VGPR_32, 1>; +defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", VReg_64, 2>; +defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_96, 3>; +defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128, 4>; let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in { - defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>; - defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VReg_64>; - defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_96>; - defm TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_128>; - defm TBUFFER_STORE_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32>; - defm TBUFFER_STORE_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VReg_64>; - defm TBUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_96>; - defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_128>; + defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32, 1>; + defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VReg_64, 2>; + defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_96, 3>; + defm TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_128, 4>; + defm TBUFFER_STORE_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32, 1>; + defm TBUFFER_STORE_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VReg_64, 2>; + defm TBUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_96, 3>; + defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_128, 4>; } // End HasUnpackedD16VMem. let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in { - defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>; - defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VGPR_32>; - defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_64>; - defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_64>; - defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32>; - defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VGPR_32>; - defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_64>; - defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_64>; + defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32, 1>; + defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VGPR_32, 2>; + defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_64, 3>; + defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_64, 4>; + defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32, 1>; + defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VGPR_32, 2>; + defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_64, 3>; + defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_64, 4>; } // End HasPackedD16VMem. let SubtargetPredicate = isGFX7Plus in { @@ -1118,6 +1164,10 @@ def extract_dlc : SDNodeXFormgetTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8); }]>; +def extract_swz : SDNodeXFormgetTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8); +}]>; + //===----------------------------------------------------------------------===// // buffer_load/store_format patterns //===----------------------------------------------------------------------===// @@ -1125,33 +1175,37 @@ def extract_dlc : SDNodeXForm { def : GCNPat< - (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0)), + (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + timm:$auxiliary, 0)), (!cast(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0)), + (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + timm:$auxiliary, 0)), (!cast(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm)), + (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + timm:$auxiliary, timm)), (!cast(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm)), + (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, + timm:$auxiliary, timm)), (!cast(opcode # _BOTHEN) (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; } @@ -1182,8 +1236,12 @@ let SubtargetPredicate = HasPackedD16VMem in { defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; +defm : MUBUF_LoadIntrinsicPat; +defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; +defm : MUBUF_LoadIntrinsicPat; +defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; @@ -1196,36 +1254,40 @@ defm : MUBUF_LoadIntrinsicPat; multiclass MUBUF_StoreIntrinsicPat { def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0), + (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + timm:$auxiliary, 0), (!cast(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0), + (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + timm:$auxiliary, 0), (!cast(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, - (as_i16imm $offset), (extract_glc $cachepolicy), - (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (as_i16imm $offset), (extract_glc $auxiliary), + (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm), + (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + timm:$auxiliary, timm), (!cast(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, - (as_i16imm $offset), (extract_glc $cachepolicy), - (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (as_i16imm $offset), (extract_glc $auxiliary), + (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm), + (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, + timm:$auxiliary, timm), (!cast(opcode # _BOTHEN_exact) $vdata, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), - $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy), - (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + $rsrc, $soffset, (as_i16imm $offset), (extract_glc $auxiliary), + (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; } @@ -1256,8 +1318,12 @@ let SubtargetPredicate = HasPackedD16VMem in { defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; +defm : MUBUF_StoreIntrinsicPat; +defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; +defm : MUBUF_StoreIntrinsicPat; +defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; @@ -1273,32 +1339,32 @@ multiclass BufferAtomicPatterns { def : GCNPat< (vt (name vt:$vdata_in, v4i32:$rsrc, 0, - 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0)), + 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0)), (!cast(opcode # _OFFSET_RTN) $vdata_in, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, - 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm)), + 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm)), (!cast(opcode # _IDXEN_RTN) $vdata_in, $vindex, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (vt (name vt:$vdata_in, v4i32:$rsrc, 0, - i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0)), + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0)), (!cast(opcode # _OFFEN_RTN) $vdata_in, $voffset, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, - i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm)), + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm)), (!cast(opcode # _BOTHEN_RTN) $vdata_in, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), @@ -1316,6 +1382,8 @@ defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; +defm : BufferAtomicPatterns; +defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; @@ -1326,37 +1394,39 @@ defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; +defm : BufferAtomicPatterns; +defm : BufferAtomicPatterns; multiclass BufferAtomicPatterns_NO_RTN { def : GCNPat< (name vt:$vdata_in, v4i32:$rsrc, 0, - 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0), + 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0), (!cast(opcode # _OFFSET) $vdata_in, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, - 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm), + 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm), (!cast(opcode # _IDXEN) $vdata_in, $vindex, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (name vt:$vdata_in, v4i32:$rsrc, 0, - i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0), + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0), (!cast(opcode # _OFFEN) $vdata_in, $voffset, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, - i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm), + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm), (!cast(opcode # _BOTHEN) $vdata_in, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), @@ -1370,8 +1440,8 @@ defm : BufferAtomicPatterns_NO_RTN : GCNPat < (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))), - (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc) + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz) >; multiclass MUBUFLoad_Atomic_Pattern ; def : GCNPat < (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))), - (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0) + (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0) >; } @@ -1454,8 +1524,8 @@ multiclass MUBUFLoad_Pattern ; } @@ -1478,12 +1548,12 @@ multiclass MUBUFScratchLoadPat ; def : GCNPat < (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))), - (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0) + (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0) >; } @@ -1493,12 +1563,12 @@ multiclass MUBUFScratchLoadPat_D16 { def : GCNPat < (ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in), - (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, $in) + (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in) >; def : GCNPat < (ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in), - (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, $in) + (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in) >; } @@ -1512,7 +1582,10 @@ defm : MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; + +foreach vt = Reg32Types.types in { defm : MUBUFScratchLoadPat ; +} defm : MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; @@ -1535,16 +1608,16 @@ defm : MUBUFScratchLoadPat_D16 { - // Store follows atomic op convention so address is forst + // Store follows atomic op convention so address is first def : GCNPat < (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc), vt:$val), - (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0) + (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0) >; def : GCNPat < (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val), - (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0) + (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0) >; } let SubtargetPredicate = isGFX6GFX7 in { @@ -1558,8 +1631,8 @@ multiclass MUBUFStore_Pattern ; } @@ -1573,13 +1646,13 @@ multiclass MUBUFScratchStorePat ; def : GCNPat < (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)), - (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0) + (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0) >; } @@ -1587,7 +1660,11 @@ defm : MUBUFScratchStorePat ; defm : MUBUFScratchStorePat ; defm : MUBUFScratchStorePat ; -defm : MUBUFScratchStorePat ; + +foreach vt = Reg32Types.types in { +defm : MUBUFScratchStorePat ; +} + defm : MUBUFScratchStorePat ; defm : MUBUFScratchStorePat ; defm : MUBUFScratchStorePat ; @@ -1613,37 +1690,41 @@ defm : MUBUFScratchStorePat { def : GCNPat< - (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, 0)), + (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + timm:$format, timm:$auxiliary, 0)), (!cast(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, imm)), + (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + timm:$format, timm:$auxiliary, timm)), (!cast(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, 0)), + (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + timm:$format, timm:$auxiliary, 0)), (!cast(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, imm)), + (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, + timm:$format, timm:$auxiliary, timm)), (!cast(opcode # _BOTHEN) (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; } @@ -1671,37 +1752,41 @@ let SubtargetPredicate = HasPackedD16VMem in { multiclass MTBUF_StoreIntrinsicPat { def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, 0), + (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + timm:$format, timm:$auxiliary, 0), (!cast(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, imm), + (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + timm:$format, timm:$auxiliary, timm), (!cast(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, 0), + (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + timm:$format, timm:$auxiliary, 0), (!cast(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; def : GCNPat< (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, - imm:$offset, imm:$format, imm:$cachepolicy, imm), + timm:$offset, timm:$format, timm:$auxiliary, timm), (!cast(opcode # _BOTHEN_exact) $vdata, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) + (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), + (extract_swz $auxiliary)) >; } @@ -1957,10 +2042,9 @@ defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03a>; defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03b>; defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03c>; defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03d>; -// FIXME-GFX6-GFX7-GFX10: Add following instructions: -//defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>; -//defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>; -//defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>; +defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>; +defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>; +defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>; defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x050>; defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x051>; defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x052>; @@ -1975,10 +2059,9 @@ defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05b>; defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05c>; defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05d>; // FIXME-GFX7: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on GFX7. -// FIXME-GFX6-GFX7-GFX10: Add following instructions: -//defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>; -//defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>; -//defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>; +defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>; +defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>; +defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>; defm BUFFER_WBINVL1_SC : MUBUF_Real_gfx6<0x070>; defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>; @@ -2353,7 +2436,7 @@ let SubtargetPredicate = HasPackedD16VMem in { def MUBUFInfoTable : GenericTable { let FilterClass = "MUBUF_Pseudo"; let CppTypeName = "MUBUFInfo"; - let Fields = ["Opcode", "BaseOpcode", "dwords", "has_vaddr", "has_srsrc", "has_soffset"]; + let Fields = ["Opcode", "BaseOpcode", "elements", "has_vaddr", "has_srsrc", "has_soffset"]; let PrimaryKey = ["Opcode"]; let PrimaryKeyName = "getMUBUFOpcodeHelper"; @@ -2364,7 +2447,26 @@ def getMUBUFInfoFromOpcode : SearchIndex { let Key = ["Opcode"]; } -def getMUBUFInfoFromBaseOpcodeAndDwords : SearchIndex { +def getMUBUFInfoFromBaseOpcodeAndElements : SearchIndex { let Table = MUBUFInfoTable; - let Key = ["BaseOpcode", "dwords"]; + let Key = ["BaseOpcode", "elements"]; +} + +def MTBUFInfoTable : GenericTable { + let FilterClass = "MTBUF_Pseudo"; + let CppTypeName = "MTBUFInfo"; + let Fields = ["Opcode", "BaseOpcode", "elements", "has_vaddr", "has_srsrc", "has_soffset"]; + + let PrimaryKey = ["Opcode"]; + let PrimaryKeyName = "getMTBUFOpcodeHelper"; +} + +def getMTBUFInfoFromOpcode : SearchIndex { + let Table = MTBUFInfoTable; + let Key = ["Opcode"]; +} + +def getMTBUFInfoFromBaseOpcodeAndElements : SearchIndex { + let Table = MTBUFInfoTable; + let Key = ["BaseOpcode", "elements"]; } diff --git a/lib/Target/AMDGPU/DSInstructions.td b/lib/Target/AMDGPU/DSInstructions.td index c52eaaa3fdc..816ec14a0e9 100644 --- a/lib/Target/AMDGPU/DSInstructions.td +++ b/lib/Target/AMDGPU/DSInstructions.td @@ -81,6 +81,17 @@ class DS_Real : // DS Pseudo instructions +class DS_0A1D_NORET +: DS_Pseudo { + + let has_addr = 0; + let has_data1 = 0; + let has_vdst = 0; +} + class DS_1A1D_NORET : DS_Pseudo class DS_GWS_0D : DS_GWS; + (ins offset:$offset, gds:$gds), "$offset gds"> { + let hasSideEffects = 1; +} class DS_GWS_1D : DS_GWS { let has_gws_data0 = 1; + let hasSideEffects = 1; } class DS_VOID : DS_Pseudo; def DS_WRITE_B16_D16_HI : DS_1A1D_NORET<"ds_write_b16_d16_hi">; } +} // End has_m0_read = 0 + let SubtargetPredicate = HasDSAddTid in { -def DS_WRITE_ADDTID_B32 : DS_1A1D_NORET<"ds_write_addtid_b32">; +def DS_WRITE_ADDTID_B32 : DS_0A1D_NORET<"ds_write_addtid_b32">; } -} // End has_m0_read = 0 } // End mayLoad = 0 defm DS_MSKOR_B32 : DS_1A2D_NORET_mc<"ds_mskor_b32">; @@ -540,13 +555,14 @@ def DS_READ_I8_D16_HI : DS_1A_RET_Tied<"ds_read_i8_d16_hi">; def DS_READ_U16_D16 : DS_1A_RET_Tied<"ds_read_u16_d16">; def DS_READ_U16_D16_HI : DS_1A_RET_Tied<"ds_read_u16_d16_hi">; } +} // End has_m0_read = 0 let SubtargetPredicate = HasDSAddTid in { -def DS_READ_ADDTID_B32 : DS_1A_RET<"ds_read_addtid_b32">; -} -} // End has_m0_read = 0 +def DS_READ_ADDTID_B32 : DS_0A_RET<"ds_read_addtid_b32">; } +} // End mayStore = 0 + def DS_CONSUME : DS_0A_RET<"ds_consume">; def DS_APPEND : DS_0A_RET<"ds_append">; def DS_ORDERED_COUNT : DS_1A_RET_GDS<"ds_ordered_count">; @@ -600,13 +616,13 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">; //===----------------------------------------------------------------------===// def : GCNPat < - (int_amdgcn_ds_swizzle i32:$src, imm:$offset16), + (int_amdgcn_ds_swizzle i32:$src, timm:$offset16), (DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0)) >; class DSReadPat : GCNPat < - (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))), - (inst $ptr, (as_i16imm $offset), (i1 gds)) + (vt (frag (DS1Addr1Offset i32:$ptr, i16:$offset))), + (inst $ptr, offset:$offset, (i1 gds)) >; multiclass DSReadPat_mc { @@ -621,8 +637,8 @@ multiclass DSReadPat_mc { } class DSReadPat_D16 : GCNPat < - (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$in), - (inst $ptr, (as_i16imm $offset), (i1 0), $in) + (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$in), + (inst $ptr, offset:$offset, (i1 0), $in) >; defm : DSReadPat_mc ; @@ -636,13 +652,20 @@ defm : DSReadPat_mc ; defm : DSReadPat_mc ; defm : DSReadPat_mc ; defm : DSReadPat_mc ; -defm : DSReadPat_mc ; + +foreach vt = Reg32Types.types in { +defm : DSReadPat_mc ; +} + defm : DSReadPat_mc ; defm : DSReadPat_mc ; let AddedComplexity = 100 in { -defm : DSReadPat_mc ; +foreach vt = VReg_64.RegTypes in { +defm : DSReadPat_mc ; +} + defm : DSReadPat_mc ; } // End AddedComplexity = 100 @@ -664,8 +687,8 @@ def : DSReadPat_D16; } class DSWritePat : GCNPat < - (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)), - (inst $ptr, $value, (as_i16imm $offset), (i1 gds)) + (frag vt:$value, (DS1Addr1Offset i32:$ptr, i16:$offset)), + (inst $ptr, getVregSrcForVT.ret:$value, offset:$offset, (i1 gds)) >; multiclass DSWritePat_mc { @@ -681,8 +704,8 @@ multiclass DSWritePat_mc { // Irritatingly, atomic_store reverses the order of operands from a // normal store. class DSAtomicWritePat : GCNPat < - (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value), - (inst $ptr, $value, (as_i16imm $offset), (i1 0)) + (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value), + (inst $ptr, $value, offset:$offset, (i1 0)) >; multiclass DSAtomicWritePat_mc { @@ -699,9 +722,13 @@ defm : DSWritePat_mc ; defm : DSWritePat_mc ; defm : DSWritePat_mc ; defm : DSWritePat_mc ; -defm : DSWritePat_mc ; -defm : DSAtomicWritePat_mc ; -defm : DSAtomicWritePat_mc ; + +foreach vt = VGPR_32.RegTypes in { +defm : DSWritePat_mc ; +} + +defm : DSAtomicWritePat_mc ; +defm : DSAtomicWritePat_mc ; let OtherPredicates = [D16PreservesUnusedBits] in { def : DSWritePat ; @@ -736,46 +763,49 @@ def : DS64Bit4ByteAlignedWritePat; let AddedComplexity = 100 in { -defm : DSWritePat_mc ; +foreach vt = VReg_64.RegTypes in { +defm : DSWritePat_mc ; +} + defm : DSWritePat_mc ; } // End AddedComplexity = 100 class DSAtomicRetPat : GCNPat < - (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value), - (inst $ptr, $value, (as_i16imm $offset), (i1 gds)) + (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value), + (inst $ptr, getVregSrcForVT.ret:$value, offset:$offset, (i1 gds)) >; multiclass DSAtomicRetPat_mc { let OtherPredicates = [LDSRequiresM0Init] in { - def : DSAtomicRetPat(frag#"_local_m0")>; + def : DSAtomicRetPat(frag#"_local_m0_"#vt.Size)>; } let OtherPredicates = [NotLDSRequiresM0Init] in { def : DSAtomicRetPat(!cast(inst)#"_gfx9"), vt, - !cast(frag#"_local")>; + !cast(frag#"_local_"#vt.Size)>; } - def : DSAtomicRetPat(frag#"_region_m0"), 1>; + def : DSAtomicRetPat(frag#"_region_m0_"#vt.Size), 1>; } class DSAtomicCmpXChg : GCNPat < - (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap), - (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 gds)) + (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$cmp, vt:$swap), + (inst $ptr, getVregSrcForVT.ret:$cmp, getVregSrcForVT.ret:$swap, offset:$offset, (i1 gds)) >; multiclass DSAtomicCmpXChg_mc { let OtherPredicates = [LDSRequiresM0Init] in { - def : DSAtomicCmpXChg(frag#"_local_m0")>; + def : DSAtomicCmpXChg(frag#"_local_m0_"#vt.Size)>; } let OtherPredicates = [NotLDSRequiresM0Init] in { def : DSAtomicCmpXChg(!cast(inst)#"_gfx9"), vt, - !cast(frag#"_local")>; + !cast(frag#"_local_"#vt.Size)>; } - def : DSAtomicCmpXChg(frag#"_region_m0"), 1>; + def : DSAtomicCmpXChg(frag#"_region_m0_"#vt.Size), 1>; } diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 4ec4be9bc48..ec2e2c4e8b7 100644 --- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -1095,6 +1095,7 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { case 106: return createRegOperand(VCC); case 108: return createRegOperand(TBA); case 110: return createRegOperand(TMA); + case 125: return createRegOperand(SGPR_NULL); case 126: return createRegOperand(EXEC); case 235: return createRegOperand(SRC_SHARED_BASE); case 236: return createRegOperand(SRC_SHARED_LIMIT); @@ -1172,7 +1173,8 @@ MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { int TTmpIdx = getTTmpIdx(Val); if (TTmpIdx >= 0) { - return createSRegOperand(getTtmpClassId(OPW64), TTmpIdx); + auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32); + return createSRegOperand(TTmpClsId, TTmpIdx); } else if (Val > SGPR_MAX) { return IsWave64 ? decodeSpecialReg64(Val) : decodeSpecialReg32(Val); diff --git a/lib/Target/AMDGPU/EvergreenInstructions.td b/lib/Target/AMDGPU/EvergreenInstructions.td index 0550092ce1d..792e26d21f9 100644 --- a/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/lib/Target/AMDGPU/EvergreenInstructions.td @@ -322,46 +322,46 @@ def : EGOrCaymanPat<(i32 (atomic_cmp_swap_global_noret i32:$ptr, i32:$cmp, i32:$ defm AtomicSwapPat : AtomicPat ; + atomic_swap_global_ret_32, + atomic_swap_global_noret_32>; defm AtomicAddPat : AtomicPat ; + atomic_load_add_global_ret_32, atomic_load_add_global_noret_32>; defm AtomicSubPat : AtomicPat ; + atomic_load_sub_global_ret_32, atomic_load_sub_global_noret_32>; defm AtomicMinPat : AtomicPat ; + atomic_load_min_global_ret_32, atomic_load_min_global_noret_32>; defm AtomicUMinPat : AtomicPat ; + atomic_load_umin_global_ret_32, atomic_load_umin_global_noret_32>; defm AtomicMaxPat : AtomicPat ; + atomic_load_max_global_ret_32, atomic_load_max_global_noret_32>; defm AtomicUMaxPat : AtomicPat ; + atomic_load_umax_global_ret_32, atomic_load_umax_global_noret_32>; defm AtomicAndPat : AtomicPat ; + atomic_load_and_global_ret_32, atomic_load_and_global_noret_32>; defm AtomicOrPat : AtomicPat ; + atomic_load_or_global_ret_32, atomic_load_or_global_noret_32>; defm AtomicXorPat : AtomicPat ; + atomic_load_xor_global_ret_32, atomic_load_xor_global_noret_32>; defm AtomicIncAddPat : AtomicIncDecPat ; + atomic_load_add_global_ret_32, + atomic_load_add_global_noret_32, 1>; defm AtomicIncSubPat : AtomicIncDecPat ; + atomic_load_sub_global_ret_32, + atomic_load_sub_global_noret_32, -1>; defm AtomicDecAddPat : AtomicIncDecPat ; + atomic_load_add_global_ret_32, + atomic_load_add_global_noret_32, -1>; defm AtomicDecSubPat : AtomicIncDecPat ; + atomic_load_sub_global_ret_32, + atomic_load_sub_global_noret_32, 1>; // Should be predicated on FeatureFP64 // def FMA_64 : R600_3OP < @@ -628,37 +628,37 @@ def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE", [(truncstorei16_local i32:$src1, i32:$src0)] >; def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD", - [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_add_local_32 i32:$src0, i32:$src1))] >; def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB", - [(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_sub_local_32 i32:$src0, i32:$src1))] >; def LDS_AND_RET : R600_LDS_1A1D_RET <0x29, "LDS_AND", - [(set i32:$dst, (atomic_load_and_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_and_local_32 i32:$src0, i32:$src1))] >; def LDS_OR_RET : R600_LDS_1A1D_RET <0x2a, "LDS_OR", - [(set i32:$dst, (atomic_load_or_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_or_local_32 i32:$src0, i32:$src1))] >; def LDS_XOR_RET : R600_LDS_1A1D_RET <0x2b, "LDS_XOR", - [(set i32:$dst, (atomic_load_xor_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_xor_local_32 i32:$src0, i32:$src1))] >; def LDS_MIN_INT_RET : R600_LDS_1A1D_RET <0x25, "LDS_MIN_INT", - [(set i32:$dst, (atomic_load_min_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_min_local_32 i32:$src0, i32:$src1))] >; def LDS_MAX_INT_RET : R600_LDS_1A1D_RET <0x26, "LDS_MAX_INT", - [(set i32:$dst, (atomic_load_max_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_max_local_32 i32:$src0, i32:$src1))] >; def LDS_MIN_UINT_RET : R600_LDS_1A1D_RET <0x27, "LDS_MIN_UINT", - [(set i32:$dst, (atomic_load_umin_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_umin_local_32 i32:$src0, i32:$src1))] >; def LDS_MAX_UINT_RET : R600_LDS_1A1D_RET <0x28, "LDS_MAX_UINT", - [(set i32:$dst, (atomic_load_umax_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_umax_local_32 i32:$src0, i32:$src1))] >; def LDS_WRXCHG_RET : R600_LDS_1A1D_RET <0x2d, "LDS_WRXCHG", - [(set i32:$dst, (atomic_swap_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_swap_local_32 i32:$src0, i32:$src1))] >; def LDS_CMPST_RET : R600_LDS_1A2D_RET <0x30, "LDS_CMPST", - [(set i32:$dst, (atomic_cmp_swap_local i32:$src0, i32:$src1, i32:$src2))] + [(set i32:$dst, (atomic_cmp_swap_local_32 i32:$src0, i32:$src1, i32:$src2))] >; def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", [(set (i32 R600_Reg32:$dst), (load_local R600_Reg32:$src0))] diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td index 889f60dae92..80ee17eba14 100644 --- a/lib/Target/AMDGPU/FLATInstructions.td +++ b/lib/Target/AMDGPU/FLATInstructions.td @@ -270,7 +270,7 @@ multiclass FLAT_Atomic_Pseudo< SDPatternOperator atomic = null_frag, ValueType data_vt = vt, RegisterClass data_rc = vdst_rc, - bit isFP = getIsFP.ret> { + bit isFP = isFloatType.ret> { def "" : FLAT_AtomicNoRet_Pseudo .ret> { + bit isFP = isFloatType.ret> { def "" : FLAT_AtomicNoRet_Pseudo .ret> { + bit isFP = isFloatType.ret> { def _RTN : FLAT_AtomicRet_Pseudo ; defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", - VGPR_32, i32, atomic_swap_global>; + VGPR_32, i32, atomic_swap_global_32>; defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2", - VReg_64, i64, atomic_swap_global>; + VReg_64, i64, atomic_swap_global_64>; defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add", - VGPR_32, i32, atomic_add_global>; + VGPR_32, i32, atomic_load_add_global_32>; defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub", - VGPR_32, i32, atomic_sub_global>; + VGPR_32, i32, atomic_load_sub_global_32>; defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin", - VGPR_32, i32, atomic_min_global>; + VGPR_32, i32, atomic_load_min_global_32>; defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin", - VGPR_32, i32, atomic_umin_global>; + VGPR_32, i32, atomic_load_umin_global_32>; defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax", - VGPR_32, i32, atomic_max_global>; + VGPR_32, i32, atomic_load_max_global_32>; defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax", - VGPR_32, i32, atomic_umax_global>; + VGPR_32, i32, atomic_load_umax_global_32>; defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and", - VGPR_32, i32, atomic_and_global>; + VGPR_32, i32, atomic_load_and_global_32>; defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or", - VGPR_32, i32, atomic_or_global>; + VGPR_32, i32, atomic_load_or_global_32>; defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor", - VGPR_32, i32, atomic_xor_global>; + VGPR_32, i32, atomic_load_xor_global_32>; defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc", - VGPR_32, i32, atomic_inc_global>; + VGPR_32, i32, atomic_inc_global_32>; defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec", - VGPR_32, i32, atomic_dec_global>; + VGPR_32, i32, atomic_dec_global_32>; defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2", - VReg_64, i64, atomic_add_global>; + VReg_64, i64, atomic_load_add_global_64>; defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2", - VReg_64, i64, atomic_sub_global>; + VReg_64, i64, atomic_load_sub_global_64>; defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2", - VReg_64, i64, atomic_min_global>; + VReg_64, i64, atomic_load_min_global_64>; defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2", - VReg_64, i64, atomic_umin_global>; + VReg_64, i64, atomic_load_umin_global_64>; defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2", - VReg_64, i64, atomic_max_global>; + VReg_64, i64, atomic_load_max_global_64>; defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2", - VReg_64, i64, atomic_umax_global>; + VReg_64, i64, atomic_load_umax_global_64>; defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2", - VReg_64, i64, atomic_and_global>; + VReg_64, i64, atomic_load_and_global_64>; defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2", - VReg_64, i64, atomic_or_global>; + VReg_64, i64, atomic_load_or_global_64>; defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2", - VReg_64, i64, atomic_xor_global>; + VReg_64, i64, atomic_load_xor_global_64>; defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", - VReg_64, i64, atomic_inc_global>; + VReg_64, i64, atomic_inc_global_64>; defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", - VReg_64, i64, atomic_dec_global>; + VReg_64, i64, atomic_dec_global_64>; } // End is_flat_global = 1 } // End SubtargetPredicate = HasFlatGlobalInsts @@ -686,10 +686,10 @@ let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { let SubtargetPredicate = HasAtomicFaddInsts, is_flat_global = 1 in { defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < - "global_atomic_add_f32", VGPR_32, f32, atomic_add_global + "global_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret >; defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < - "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_add_global + "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret >; } // End SubtargetPredicate = HasAtomicFaddInsts @@ -777,8 +777,6 @@ def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; @@ -787,41 +785,50 @@ def : FlatLoadAtomicPat ; def : FlatStorePat ; def : FlatStorePat ; -def : FlatStorePat ; -def : FlatStorePat ; + +foreach vt = Reg32Types.types in { +def : FlatLoadPat ; +def : FlatStorePat ; +} + +foreach vt = VReg_64.RegTypes in { +def : FlatStorePat ; +def : FlatLoadPat ; +} + def : FlatStorePat ; def : FlatStorePat ; def : FlatStoreAtomicPat ; def : FlatStoreAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; def : FlatAtomicPat ; -def : FlatAtomicPat ; +def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; def : FlatAtomicPat ; -def : FlatAtomicPat ; +def : FlatAtomicPat ; def : FlatStorePat ; def : FlatStorePat ; @@ -847,9 +854,6 @@ def : FlatLoadPat_D16 ; } // End OtherPredicates = [HasFlatAddressSpace] -def atomic_fadd_global : global_binary_atomic_op_frag; -def atomic_pk_fadd_global : global_binary_atomic_op_frag; - let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in { def : FlatLoadSignedPat ; @@ -863,8 +867,16 @@ def : FlatLoadSignedPat ; def : FlatLoadSignedPat ; def : FlatLoadSignedPat ; -def : FlatLoadSignedPat ; -def : FlatLoadSignedPat ; +foreach vt = Reg32Types.types in { +def : FlatLoadSignedPat ; +def : FlatStoreSignedPat ; +} + +foreach vt = VReg_64.RegTypes in { +def : FlatLoadSignedPat ; +def : FlatStoreSignedPat ; +} + def : FlatLoadSignedPat ; def : FlatLoadSignedPat ; @@ -875,8 +887,6 @@ def : FlatStoreSignedPat ; def : FlatStoreSignedPat ; def : FlatStoreSignedPat ; def : FlatStoreSignedPat ; -def : FlatStoreSignedPat ; -def : FlatStoreSignedPat ; def : FlatStoreSignedPat ; def : FlatStoreSignedPat ; @@ -902,36 +912,36 @@ def : FlatSignedLoadPat_D16 ; def : FlatStoreSignedAtomicPat ; def : FlatStoreSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; -def : FlatAtomicPatNoRtn ; -def : FlatAtomicPatNoRtn ; +def : FlatAtomicPatNoRtn ; +def : FlatAtomicPatNoRtn ; } // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 @@ -1174,7 +1184,7 @@ class FLAT_Real_gfx10 op, FLAT_Pseudo ps> : let AssemblerPredicate = isGFX10Plus; let DecoderNamespace = "GFX10"; - let Inst{11-0} = {offset{12}, offset{10-0}}; + let Inst{11-0} = offset{11-0}; let Inst{12} = !if(ps.has_dlc, dlc, ps.dlcValue); let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d); let Inst{55} = 0; diff --git a/lib/Target/AMDGPU/GCNDPPCombine.cpp b/lib/Target/AMDGPU/GCNDPPCombine.cpp index e1845e2e8e8..98678873e37 100644 --- a/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -41,6 +41,7 @@ #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -155,8 +156,6 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, RegSubRegPair CombOldVGPR, bool CombBCZ) const { assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp); - assert(TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg() == - TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)->getReg()); auto OrigOp = OrigMI.getOpcode(); auto DPPOp = getDPPOp(OrigOp); @@ -178,7 +177,9 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, if (OldIdx != -1) { assert(OldIdx == NumOperands); assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)); - DPPInst.addReg(CombOldVGPR.Reg, 0, CombOldVGPR.SubReg); + auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI); + DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef, + CombOldVGPR.SubReg); ++NumOperands; } else { // TODO: this discards MAC/FMA instructions for now, let's add it later @@ -195,6 +196,10 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))); DPPInst.addImm(Mod0->getImm()); ++NumOperands; + } else if (AMDGPU::getNamedOperandIdx(DPPOp, + AMDGPU::OpName::src0_modifiers) != -1) { + DPPInst.addImm(0); + ++NumOperands; } auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0); assert(Src0); @@ -214,6 +219,10 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))); DPPInst.addImm(Mod1->getImm()); ++NumOperands; + } else if (AMDGPU::getNamedOperandIdx(DPPOp, + AMDGPU::OpName::src1_modifiers) != -1) { + DPPInst.addImm(0); + ++NumOperands; } if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) { if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) { @@ -344,6 +353,10 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst); assert(DstOpnd && DstOpnd->isReg()); auto DPPMovReg = DstOpnd->getReg(); + if (DPPMovReg.isPhysical()) { + LLVM_DEBUG(dbgs() << " failed: dpp move writes physreg\n"); + return false; + } if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) { LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same" " for all uses\n"); @@ -362,7 +375,13 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { bool BoundCtrlZero = BCZOpnd->getImm(); auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old); + auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0); assert(OldOpnd && OldOpnd->isReg()); + assert(SrcOpnd && SrcOpnd->isReg()); + if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) { + LLVM_DEBUG(dbgs() << " failed: dpp move reads physreg\n"); + return false; + } auto * const OldOpndValue = getOldOpndValue(*OldOpnd); // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else @@ -408,6 +427,7 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { dbgs() << ", bound_ctrl=" << CombBCZ << '\n'); SmallVector OrigMIs, DPPMIs; + DenseMap> RegSeqWithOpNos; auto CombOldVGPR = getRegSubRegPair(*OldOpnd); // try to reuse previous old reg if its undefined (IMPLICIT_DEF) if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef @@ -420,13 +440,49 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { OrigMIs.push_back(&MovMI); bool Rollback = true; + SmallVector Uses; + for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) { + Uses.push_back(&Use); + } + + while (!Uses.empty()) { + MachineOperand *Use = Uses.pop_back_val(); Rollback = true; - auto &OrigMI = *Use.getParent(); + auto &OrigMI = *Use->getParent(); LLVM_DEBUG(dbgs() << " try: " << OrigMI); auto OrigOp = OrigMI.getOpcode(); + if (OrigOp == AMDGPU::REG_SEQUENCE) { + Register FwdReg = OrigMI.getOperand(0).getReg(); + unsigned FwdSubReg = 0; + + if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) { + LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same" + " for all uses\n"); + break; + } + + unsigned OpNo, E = OrigMI.getNumOperands(); + for (OpNo = 1; OpNo < E; OpNo += 2) { + if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) { + FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm(); + break; + } + } + + if (!FwdSubReg) + break; + + for (auto &Op : MRI->use_nodbg_operands(FwdReg)) { + if (Op.getSubReg() == FwdSubReg) + Uses.push_back(&Op); + } + RegSeqWithOpNos[&OrigMI].push_back(OpNo); + continue; + } + if (TII->isVOP3(OrigOp)) { if (!TII->hasVALU32BitEncoding(OrigOp)) { LLVM_DEBUG(dbgs() << " failed: VOP3 hasn't e32 equivalent\n"); @@ -447,14 +503,14 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { } LLVM_DEBUG(dbgs() << " combining: " << OrigMI); - if (&Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) { + if (Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) { if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR, OldOpndValue, CombBCZ)) { DPPMIs.push_back(DPPInst); Rollback = false; } } else if (OrigMI.isCommutable() && - &Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) { + Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) { auto *BB = OrigMI.getParent(); auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI); BB->insert(OrigMI, NewMI); @@ -475,9 +531,22 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { OrigMIs.push_back(&OrigMI); } + Rollback |= !Uses.empty(); + for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs)) MI->eraseFromParent(); + if (!Rollback) { + for (auto &S : RegSeqWithOpNos) { + if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) { + S.first->eraseFromParent(); + continue; + } + while (!S.second.empty()) + S.first->getOperand(S.second.pop_back_val()).setIsUndef(true); + } + } + return !Rollback; } @@ -498,6 +567,13 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) { if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) { Changed = true; ++NumDPPMovsCombined; + } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) { + auto Split = TII->expandMovDPP64(MI); + for (auto M : { Split.first, Split.second }) { + if (combineDPPMov(*M)) + ++NumDPPMovsCombined; + } + Changed = true; } } } diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 885239e2fae..9528aee4c50 100644 --- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -726,7 +726,7 @@ int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def, if (!TRI->isVGPR(MRI, Def.getReg())) return WaitStatesNeeded; - unsigned Reg = Def.getReg(); + Register Reg = Def.getReg(); auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) { int DataIdx = createsVALUHazard(*MI); return DataIdx >= 0 && @@ -792,7 +792,7 @@ int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) { if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg())) return 0; - unsigned LaneSelectReg = LaneSelectOp->getReg(); + Register LaneSelectReg = LaneSelectOp->getReg(); auto IsHazardFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; @@ -891,7 +891,7 @@ bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) { // Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE* // which is always a VGPR and available. auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0); - unsigned Reg = Src0->getReg(); + Register Reg = Src0->getReg(); bool IsUndef = Src0->isUndef(); BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32)) @@ -952,6 +952,7 @@ bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) { unsigned SDSTName; switch (MI->getOpcode()) { case AMDGPU::V_READLANE_B32: + case AMDGPU::V_READLANE_B32_gfx10: case AMDGPU::V_READFIRSTLANE_B32: SDSTName = AMDGPU::OpName::vdst; break; @@ -976,7 +977,7 @@ bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) { if (!SDST) return false; - const unsigned SDSTReg = SDST->getReg(); + const Register SDSTReg = SDST->getReg(); auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) { return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI); }; @@ -1251,14 +1252,14 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) { const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7; const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15; const int MaxWaitStates = 18; - unsigned Reg = Op.getReg(); + Register Reg = Op.getReg(); unsigned HazardDefLatency = 0; auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, this] (MachineInstr *MI) { if (!IsMFMAFn(MI)) return false; - unsigned DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI->getOperand(0).getReg(); if (DstReg == Reg) return false; HazardDefLatency = std::max(HazardDefLatency, @@ -1304,7 +1305,7 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) { auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) { if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32) return false; - unsigned DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI->getOperand(0).getReg(); return TRI.regsOverlap(Reg, DstReg); }; @@ -1330,14 +1331,14 @@ int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) { const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5; const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13; const int MaxWaitStates = 13; - unsigned DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI->getOperand(0).getReg(); unsigned HazardDefLatency = 0; auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, this] (MachineInstr *MI) { if (!IsMFMAFn(MI)) return false; - unsigned Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg(); + Register Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg(); HazardDefLatency = std::max(HazardDefLatency, TSchedModel.computeInstrLatency(MI)); return TRI.regsOverlap(Reg, DstReg); @@ -1376,7 +1377,7 @@ int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) { if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg())) continue; - unsigned Reg = Op.getReg(); + Register Reg = Op.getReg(); const int AccVgprReadLdStWaitStates = 2; const int VALUWriteAccVgprReadLdStDepVALUWaitStates = 1; diff --git a/lib/Target/AMDGPU/GCNILPSched.cpp b/lib/Target/AMDGPU/GCNILPSched.cpp index 1eb617640c3..39072af7d87 100644 --- a/lib/Target/AMDGPU/GCNILPSched.cpp +++ b/lib/Target/AMDGPU/GCNILPSched.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp index 3525174223b..90ab6a14ce2 100644 --- a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp +++ b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp @@ -237,7 +237,7 @@ public: GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C, StrategyKind S) - : BaseClass(C, llvm::make_unique()) + : BaseClass(C, std::make_unique()) , Context(C) , Strategy(S) , UPTracker(*LIS) { diff --git a/lib/Target/AMDGPU/GCNNSAReassign.cpp b/lib/Target/AMDGPU/GCNNSAReassign.cpp index 51c4c99cfb1..36a8f74150f 100644 --- a/lib/Target/AMDGPU/GCNNSAReassign.cpp +++ b/lib/Target/AMDGPU/GCNNSAReassign.cpp @@ -173,11 +173,11 @@ GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const { bool NSA = false; for (unsigned I = 0; I < Info->VAddrDwords; ++I) { const MachineOperand &Op = MI.getOperand(VAddr0Idx + I); - unsigned Reg = Op.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg)) + Register Reg = Op.getReg(); + if (Register::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg)) return NSA_Status::FIXED; - unsigned PhysReg = VRM->getPhys(Reg); + Register PhysReg = VRM->getPhys(Reg); if (!Fast) { if (!PhysReg) @@ -276,7 +276,7 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) { SlotIndex MinInd, MaxInd; for (unsigned I = 0; I < Info->VAddrDwords; ++I) { const MachineOperand &Op = MI->getOperand(VAddr0Idx + I); - unsigned Reg = Op.getReg(); + Register Reg = Op.getReg(); LiveInterval *LI = &LIS->getInterval(Reg); if (llvm::find(Intervals, LI) != Intervals.end()) { // Same register used, unable to make sequential diff --git a/lib/Target/AMDGPU/GCNRegBankReassign.cpp b/lib/Target/AMDGPU/GCNRegBankReassign.cpp index f0d47eaa4ed..2927d4eb745 100644 --- a/lib/Target/AMDGPU/GCNRegBankReassign.cpp +++ b/lib/Target/AMDGPU/GCNRegBankReassign.cpp @@ -230,7 +230,7 @@ private: public: Printable printReg(unsigned Reg, unsigned SubReg = 0) const { return Printable([Reg, SubReg, this](raw_ostream &OS) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { OS << llvm::printReg(Reg, TRI); return; } @@ -275,7 +275,7 @@ char GCNRegBankReassign::ID = 0; char &llvm::GCNRegBankReassignID = GCNRegBankReassign::ID; unsigned GCNRegBankReassign::getPhysRegBank(unsigned Reg) const { - assert (TargetRegisterInfo::isPhysicalRegister(Reg)); + assert(Register::isPhysicalRegister(Reg)); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); unsigned Size = TRI->getRegSizeInBits(*RC); @@ -293,7 +293,7 @@ unsigned GCNRegBankReassign::getPhysRegBank(unsigned Reg) const { unsigned GCNRegBankReassign::getRegBankMask(unsigned Reg, unsigned SubReg, int Bank) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { if (!VRM->isAssignedReg(Reg)) return 0; @@ -364,7 +364,7 @@ unsigned GCNRegBankReassign::analyzeInst(const MachineInstr& MI, if (!Op.isReg() || Op.isUndef()) continue; - unsigned R = Op.getReg(); + Register R = Op.getReg(); if (TRI->hasAGPRs(TRI->getRegClassForReg(*MRI, R))) continue; @@ -420,12 +420,12 @@ unsigned GCNRegBankReassign::getOperandGatherWeight(const MachineInstr& MI, } bool GCNRegBankReassign::isReassignable(unsigned Reg) const { - if (TargetRegisterInfo::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg)) + if (Register::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg)) return false; const MachineInstr *Def = MRI->getUniqueVRegDef(Reg); - unsigned PhysReg = VRM->getPhys(Reg); + Register PhysReg = VRM->getPhys(Reg); if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg) return false; @@ -654,7 +654,7 @@ unsigned GCNRegBankReassign::tryReassign(Candidate &C) { } std::sort(BankStalls.begin(), BankStalls.end()); - unsigned OrigReg = VRM->getPhys(C.Reg); + Register OrigReg = VRM->getPhys(C.Reg); LRM->unassign(LI); while (!BankStalls.empty()) { BankStall BS = BankStalls.pop_back_val(); diff --git a/lib/Target/AMDGPU/GCNRegPressure.cpp b/lib/Target/AMDGPU/GCNRegPressure.cpp index 39460fbd8a8..d593204cba0 100644 --- a/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -40,7 +40,7 @@ void llvm::printLivesAt(SlotIndex SI, << *LIS.getInstructionFromIndex(SI); unsigned Num = 0; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - const unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + const unsigned Reg = Register::index2VirtReg(I); if (!LIS.hasInterval(Reg)) continue; const auto &LI = LIS.getInterval(Reg); @@ -84,7 +84,7 @@ bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1, unsigned GCNRegPressure::getRegKind(unsigned Reg, const MachineRegisterInfo &MRI) { - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); const auto RC = MRI.getRegClass(Reg); auto STI = static_cast(MRI.getTargetRegisterInfo()); return STI->isSGPRClass(RC) ? @@ -183,7 +183,8 @@ bool GCNRegPressure::less(const GCNSubtarget &ST, #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void GCNRegPressure::print(raw_ostream &OS, const GCNSubtarget *ST) const { - OS << "VGPRs: " << getVGPRNum(); + OS << "VGPRs: " << Value[VGPR32] << ' '; + OS << "AGPRs: " << Value[AGPR32]; if (ST) OS << "(O" << ST->getOccupancyWithNumVGPRs(getVGPRNum()) << ')'; OS << ", SGPRs: " << getSGPRNum(); if (ST) OS << "(O" << ST->getOccupancyWithNumSGPRs(getSGPRNum()) << ')'; @@ -196,8 +197,7 @@ void GCNRegPressure::print(raw_ostream &OS, const GCNSubtarget *ST) const { static LaneBitmask getDefRegMask(const MachineOperand &MO, const MachineRegisterInfo &MRI) { - assert(MO.isDef() && MO.isReg() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())); + assert(MO.isDef() && MO.isReg() && Register::isVirtualRegister(MO.getReg())); // We don't rely on read-undef flag because in case of tentative schedule // tracking it isn't set correctly yet. This works correctly however since @@ -210,8 +210,7 @@ static LaneBitmask getDefRegMask(const MachineOperand &MO, static LaneBitmask getUsedRegMask(const MachineOperand &MO, const MachineRegisterInfo &MRI, const LiveIntervals &LIS) { - assert(MO.isUse() && MO.isReg() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())); + assert(MO.isUse() && MO.isReg() && Register::isVirtualRegister(MO.getReg())); if (auto SubReg = MO.getSubReg()) return MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg); @@ -232,7 +231,7 @@ collectVirtualRegUses(const MachineInstr &MI, const LiveIntervals &LIS, const MachineRegisterInfo &MRI) { SmallVector Res; for (const auto &MO : MI.operands()) { - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) continue; if (!MO.isUse() || !MO.readsReg()) continue; @@ -278,7 +277,7 @@ GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI, const MachineRegisterInfo &MRI) { GCNRPTracker::LiveRegSet LiveRegs; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - auto Reg = TargetRegisterInfo::index2VirtReg(I); + auto Reg = Register::index2VirtReg(I); if (!LIS.hasInterval(Reg)) continue; auto LiveMask = getLiveLaneMask(Reg, SI, LIS, MRI); @@ -329,8 +328,7 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) { MaxPressure = max(AtMIPressure, MaxPressure); for (const auto &MO : MI.defs()) { - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()) || - MO.isDead()) + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()) || MO.isDead()) continue; auto Reg = MO.getReg(); @@ -408,8 +406,8 @@ void GCNDownwardRPTracker::advanceToNext() { for (const auto &MO : LastTrackedMI->defs()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; auto &LiveMask = LiveRegs[Reg]; auto PrevMask = LiveMask; @@ -500,7 +498,7 @@ void GCNRPTracker::printLiveRegs(raw_ostream &OS, const LiveRegSet& LiveRegs, const MachineRegisterInfo &MRI) { const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); auto It = LiveRegs.find(Reg); if (It != LiveRegs.end() && It->second.any()) OS << ' ' << printVRegOrUnit(Reg, TRI) << ':' diff --git a/lib/Target/AMDGPU/GCNRegPressure.h b/lib/Target/AMDGPU/GCNRegPressure.h index e4894418b94..5862cdb0416 100644 --- a/lib/Target/AMDGPU/GCNRegPressure.h +++ b/lib/Target/AMDGPU/GCNRegPressure.h @@ -214,7 +214,7 @@ getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) { DenseMap LiveRegMap; SmallVector LiveIdxs, SRLiveIdxs; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - auto Reg = TargetRegisterInfo::index2VirtReg(I); + auto Reg = Register::index2VirtReg(I); if (!LIS.hasInterval(Reg)) continue; auto &LI = LIS.getInterval(Reg); diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 4ea990ae490..973491a70d3 100644 --- a/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -71,8 +71,8 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU // the tracker, so we need to pass those function a non-const copy. RegPressureTracker &TempTracker = const_cast(RPTracker); - std::vector Pressure; - std::vector MaxPressure; + Pressure.clear(); + MaxPressure.clear(); if (AtTop) TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); @@ -103,10 +103,10 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU // the analysis to look through dependencies to find the path with the least // register pressure. - // We only need to update the RPDelata for instructions that increase - // register pressure. Instructions that decrease or keep reg pressure - // the same will be marked as RegExcess in tryCandidate() when they - // are compared with instructions that increase the register pressure. + // We only need to update the RPDelta for instructions that increase register + // pressure. Instructions that decrease or keep reg pressure the same will be + // marked as RegExcess in tryCandidate() when they are compared with + // instructions that increase the register pressure. if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet()); Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); @@ -160,6 +160,7 @@ void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, if (TryCand.ResDelta == SchedResourceDelta()) TryCand.initResourceDelta(Zone.DAG, SchedModel); Cand.setBest(TryCand); + LLVM_DEBUG(traceCandidate(Cand)); } } } @@ -195,6 +196,15 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { assert(BotCand.Reason != NoCand && "failed to find the first candidate"); } else { LLVM_DEBUG(traceCandidate(BotCand)); +#ifndef NDEBUG + if (VerifyScheduling) { + SchedCandidate TCand; + TCand.reset(CandPolicy()); + pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); + assert(TCand.SU == BotCand.SU && + "Last pick result should correspond to re-picking right now"); + } +#endif } // Check if the top Q has a better candidate. @@ -206,6 +216,15 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { assert(TopCand.Reason != NoCand && "failed to find the first candidate"); } else { LLVM_DEBUG(traceCandidate(TopCand)); +#ifndef NDEBUG + if (VerifyScheduling) { + SchedCandidate TCand; + TCand.reset(CandPolicy()); + pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); + assert(TCand.SU == TopCand.SU && + "Last pick result should correspond to re-picking right now"); + } +#endif } // Pick best from BotCand and TopCand. diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.h b/lib/Target/AMDGPU/GCNSchedStrategy.h index eaf3dee9ba5..dd687a930c7 100644 --- a/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -40,6 +40,9 @@ class GCNMaxOccupancySchedStrategy final : public GenericScheduler { const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure); + std::vector Pressure; + std::vector MaxPressure; + unsigned SGPRExcessLimit; unsigned VGPRExcessLimit; unsigned SGPRCriticalLimit; diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 57c0ba26cc3..1f94ab79912 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -109,7 +109,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, MCContext *Ctx) { int64_t SignedValue = static_cast(Value); - switch (static_cast(Fixup.getKind())) { + switch (Fixup.getTargetKind()) { case AMDGPU::fixup_si_sopp_br: { int64_t BrImm = (SignedValue - 4) / 4; diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp index 6549a8d7d59..d352219a7a9 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -87,7 +87,7 @@ std::unique_ptr llvm::createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI, bool HasRelocationAddend, uint8_t ABIVersion) { - return llvm::make_unique(Is64Bit, OSABI, + return std::make_unique(Is64Bit, OSABI, HasRelocationAddend, ABIVersion); } diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index 01b53432cbb..a9888e6ed92 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -196,6 +196,10 @@ void AMDGPUInstPrinter::printSLC(const MCInst *MI, unsigned OpNo, printNamedBit(MI, OpNo, O, "slc"); } +void AMDGPUInstPrinter::printSWZ(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { +} + void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { printNamedBit(MI, OpNo, O, "tfe"); @@ -292,35 +296,7 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O, } #endif - unsigned AltName = AMDGPU::Reg32; - - if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::SGPR_64RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(RegNo)) - AltName = AMDGPU::Reg64; - else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::SGPR_128RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(RegNo)) - AltName = AMDGPU::Reg128; - else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::SReg_96RegClassID).contains(RegNo)) - AltName = AMDGPU::Reg96; - else if (MRI.getRegClass(AMDGPU::VReg_160RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::SReg_160RegClassID).contains(RegNo)) - AltName = AMDGPU::Reg160; - else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::SGPR_256RegClassID).contains(RegNo)) - AltName = AMDGPU::Reg256; - else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::SGPR_512RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(RegNo)) - AltName = AMDGPU::Reg512; - else if (MRI.getRegClass(AMDGPU::VReg_1024RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::SReg_1024RegClassID).contains(RegNo) || - MRI.getRegClass(AMDGPU::AReg_1024RegClassID).contains(RegNo)) - AltName = AMDGPU::Reg1024; - - O << getRegisterName(RegNo, AltName); + O << getRegisterName(RegNo); } void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo, @@ -623,9 +599,11 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10: case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: + case AMDGPU::V_CNDMASK_B32_dpp_gfx10: case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10: case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10: + case AMDGPU::V_CNDMASK_B32_dpp8_gfx10: case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10: case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10: @@ -689,6 +667,7 @@ void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI, switch (MI->getOpcode()) { default: break; + case AMDGPU::V_CNDMASK_B32_sdwa_gfx10: case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10: case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10: diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index b544d1ef360..66b70831ff9 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -12,7 +12,6 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUINSTPRINTER_H #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUINSTPRINTER_H -#include "AMDGPUMCTargetDesc.h" #include "llvm/MC/MCInstPrinter.h" namespace llvm { @@ -26,8 +25,7 @@ public: //Autogenerated by tblgen void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo, - unsigned AltIdx = AMDGPU::NoRegAltName); + static const char *getRegisterName(unsigned RegNo); void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, const MCSubtargetInfo &STI) override; @@ -74,6 +72,8 @@ private: raw_ostream &O); void printSLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printSWZ(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printTFE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printDMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 8f11433476f..c15da8075a3 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -250,7 +250,7 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( bool AMDGPUTargetAsmStreamer::EmitCodeEnd() { const uint32_t Encoded_s_code_end = 0xbf9f0000; OS << "\t.p2alignl 6, " << Encoded_s_code_end << '\n'; - OS << "\t.fill 32, 4, " << Encoded_s_code_end << '\n'; + OS << "\t.fill 48, 4, " << Encoded_s_code_end << '\n'; return true; } @@ -602,7 +602,7 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd() { MCStreamer &OS = getStreamer(); OS.PushSection(); OS.EmitValueToAlignment(64, Encoded_s_code_end, 4); - for (unsigned I = 0; I < 32; ++I) + for (unsigned I = 0; I < 48; ++I) OS.EmitIntValue(Encoded_s_code_end, 4); OS.PopSection(); return true; diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td index 4735e6cb244..f33ad950d5d 100644 --- a/lib/Target/AMDGPU/MIMGInstructions.td +++ b/lib/Target/AMDGPU/MIMGInstructions.td @@ -26,7 +26,7 @@ def MIMGEncoding : GenericEnum { // Represent an ISA-level opcode, independent of the encoding and the // vdata/vaddr size. -class MIMGBaseOpcode { +class MIMGBaseOpcode : PredicateControl { MIMGBaseOpcode BaseOpcode = !cast(NAME); bit Store = 0; bit Atomic = 0; @@ -291,7 +291,7 @@ multiclass MIMG_NoSampler_Src_Helper op, string asm, multiclass MIMG_NoSampler op, string asm, bit has_d16, bit mip = 0, bit isResInfo = 0> { - def "" : MIMGBaseOpcode, PredicateControl { + def "" : MIMGBaseOpcode { let Coordinates = !if(isResInfo, 0, 1); let LodOrClampOrMip = mip; let HasD16 = has_d16; diff --git a/lib/Target/AMDGPU/R600AsmPrinter.cpp b/lib/Target/AMDGPU/R600AsmPrinter.cpp index 3fb18862fca..b29cd75f75c 100644 --- a/lib/Target/AMDGPU/R600AsmPrinter.cpp +++ b/lib/Target/AMDGPU/R600AsmPrinter.cpp @@ -104,7 +104,7 @@ bool R600AsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Functions needs to be cacheline (256B) aligned. - MF.ensureAlignment(8); + MF.ensureAlignment(Align(256)); SetupMachineFunction(MF); diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp index 8098b81d1ea..e4160ac11c8 100644 --- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -303,7 +303,7 @@ private: if (!MO.isReg()) continue; if (MO.isDef()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (R600::R600_Reg128RegClass.contains(Reg)) DstMI = Reg; else @@ -312,7 +312,7 @@ private: &R600::R600_Reg128RegClass); } if (MO.isUse()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (R600::R600_Reg128RegClass.contains(Reg)) SrcMI = Reg; else diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp index c6e8a060d8a..fd75c41040e 100644 --- a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp +++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp @@ -135,7 +135,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { const R600RegisterInfo &TRI = TII->getRegisterInfo(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK; for (unsigned Chan = 0; Chan < 4; ++Chan) { @@ -155,12 +155,12 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { unsigned Opcode = BMI->getOpcode(); // While not strictly necessary from hw point of view, we force // all src operands of a dot4 inst to belong to the same slot. - unsigned Src0 = BMI->getOperand( - TII->getOperandIdx(Opcode, R600::OpName::src0)) - .getReg(); - unsigned Src1 = BMI->getOperand( - TII->getOperandIdx(Opcode, R600::OpName::src1)) - .getReg(); + Register Src0 = + BMI->getOperand(TII->getOperandIdx(Opcode, R600::OpName::src0)) + .getReg(); + Register Src1 = + BMI->getOperand(TII->getOperandIdx(Opcode, R600::OpName::src1)) + .getReg(); (void) Src0; (void) Src1; if ((TRI.getEncodingValue(Src0) & 0xff) < 127 && @@ -205,10 +205,10 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { // T0_Z = CUBE T1_X, T1_Z // T0_W = CUBE T1_Y, T1_Z for (unsigned Chan = 0; Chan < 4; Chan++) { - unsigned DstReg = MI.getOperand( - TII->getOperandIdx(MI, R600::OpName::dst)).getReg(); - unsigned Src0 = MI.getOperand( - TII->getOperandIdx(MI, R600::OpName::src0)).getReg(); + Register DstReg = + MI.getOperand(TII->getOperandIdx(MI, R600::OpName::dst)).getReg(); + Register Src0 = + MI.getOperand(TII->getOperandIdx(MI, R600::OpName::src0)).getReg(); unsigned Src1 = 0; // Determine the correct source registers diff --git a/lib/Target/AMDGPU/R600FrameLowering.h b/lib/Target/AMDGPU/R600FrameLowering.h index 950e238f497..283e4d1935e 100644 --- a/lib/Target/AMDGPU/R600FrameLowering.h +++ b/lib/Target/AMDGPU/R600FrameLowering.h @@ -15,9 +15,9 @@ namespace llvm { class R600FrameLowering : public AMDGPUFrameLowering { public: - R600FrameLowering(StackDirection D, unsigned StackAl, int LAO, - unsigned TransAl = 1) : - AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {} + R600FrameLowering(StackDirection D, Align StackAl, int LAO, + Align TransAl = Align::None()) + : AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {} ~R600FrameLowering() override; void emitPrologue(MachineFunction &MF, diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index f80a53ba1dc..659458b0b75 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -41,6 +41,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" +#include "llvm/Support/MathExtras.h" #include #include #include @@ -334,8 +335,8 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, } case R600::MASK_WRITE: { - unsigned maskedRegister = MI.getOperand(0).getReg(); - assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); + Register maskedRegister = MI.getOperand(0).getReg(); + assert(Register::isVirtualRegister(maskedRegister)); MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); TII->addFlag(*defInstr, 0, MO_FLAG_MASK); break; @@ -782,7 +783,7 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { return TrigVal; // On R600 hw, COS/SIN input must be between -Pi and Pi. return DAG.getNode(ISD::FMUL, DL, VT, TrigVal, - DAG.getConstantFP(3.14159265359, DL, MVT::f32)); + DAG.getConstantFP(numbers::pif, DL, MVT::f32)); } SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const { diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp index d9e839fe203..04a5e93f621 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -97,8 +97,8 @@ bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(), E = MBBI->operands_end(); I != E; ++I) { - if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) && - I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg())) + if (I->isReg() && !Register::isVirtualRegister(I->getReg()) && I->isUse() && + RI.isPhysRegLiveAcrossClauses(I->getReg())) return false; } return true; @@ -242,8 +242,7 @@ bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const { for (MachineInstr::const_mop_iterator I = MI.operands_begin(), E = MI.operands_end(); I != E; ++I) { - if (!I->isReg() || !I->isUse() || - TargetRegisterInfo::isVirtualRegister(I->getReg())) + if (!I->isReg() || !I->isUse() || Register::isVirtualRegister(I->getReg())) continue; if (R600::R600_LDS_SRC_REGRegClass.contains(I->getReg())) @@ -294,7 +293,7 @@ R600InstrInfo::getSrcs(MachineInstr &MI) const { for (unsigned j = 0; j < 8; j++) { MachineOperand &MO = MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0])); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == R600::ALU_CONST) { MachineOperand &Sel = MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1])); @@ -317,7 +316,7 @@ R600InstrInfo::getSrcs(MachineInstr &MI) const { if (SrcIdx < 0) break; MachineOperand &MO = MI.getOperand(SrcIdx); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == R600::ALU_CONST) { MachineOperand &Sel = MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1])); @@ -348,7 +347,7 @@ R600InstrInfo::ExtractSrcs(MachineInstr &MI, unsigned i = 0; for (const auto &Src : getSrcs(MI)) { ++i; - unsigned Reg = Src.first->getReg(); + Register Reg = Src.first->getReg(); int Index = RI.getEncodingValue(Reg) & 0xff; if (Reg == R600::OQAP) { Result.push_back(std::make_pair(Index, 0U)); @@ -865,7 +864,7 @@ bool R600InstrInfo::isPredicated(const MachineInstr &MI) const { if (idx < 0) return false; - unsigned Reg = MI.getOperand(idx).getReg(); + Register Reg = MI.getOperand(idx).getReg(); switch (Reg) { default: return false; case R600::PRED_SEL_ONE: @@ -1038,7 +1037,7 @@ bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { unsigned RegIndex = MI.getOperand(RegOpIdx).getImm(); unsigned Channel = MI.getOperand(ChanOpIdx).getImm(); unsigned Address = calculateIndirectAddress(RegIndex, Channel); - unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); + Register OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); if (OffsetReg == R600::INDIRECT_BASE_ADDR) { buildMovInstr(MBB, MI, MI.getOperand(DstOpIdx).getReg(), getIndirectAddrRegClass()->getRegister(Address)); @@ -1052,7 +1051,7 @@ bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { unsigned RegIndex = MI.getOperand(RegOpIdx).getImm(); unsigned Channel = MI.getOperand(ChanOpIdx).getImm(); unsigned Address = calculateIndirectAddress(RegIndex, Channel); - unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); + Register OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); if (OffsetReg == R600::INDIRECT_BASE_ADDR) { buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address), MI.getOperand(ValOpIdx).getReg()); @@ -1193,8 +1192,7 @@ int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass(); for (std::pair LI : MRI.liveins()) { unsigned Reg = LI.first; - if (TargetRegisterInfo::isVirtualRegister(Reg) || - !IndirectRC->contains(Reg)) + if (Register::isVirtualRegister(Reg) || !IndirectRC->contains(Reg)) continue; unsigned RegIndex; diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp index 34267a909b5..7569a262953 100644 --- a/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -183,7 +183,7 @@ isPhysicalRegCopy(MachineInstr *MI) { if (MI->getOpcode() != R600::COPY) return false; - return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()); + return !Register::isVirtualRegister(MI->getOperand(1).getReg()); } void R600SchedStrategy::releaseTopNode(SUnit *SU) { @@ -209,7 +209,7 @@ void R600SchedStrategy::releaseBottomNode(SUnit *SU) { bool R600SchedStrategy::regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const { - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + if (!Register::isVirtualRegister(Reg)) { return RC->contains(Reg); } else { return MRI->getRegClass(Reg) == RC; @@ -270,7 +270,7 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { } // Is the result already member of a X/Y/Z/W class ? - unsigned DestReg = MI->getOperand(0).getReg(); + Register DestReg = MI->getOperand(0).getReg(); if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) || regBelongsToClass(DestReg, &R600::R600_AddrRegClass)) return AluT_X; @@ -357,7 +357,7 @@ void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) { if (DstIndex == -1) { return; } - unsigned DestReg = MI->getOperand(DstIndex).getReg(); + Register DestReg = MI->getOperand(DstIndex).getReg(); // PressureRegister crashes if an operand is def and used in the same inst // and we try to constraint its regclass for (MachineInstr::mop_iterator It = MI->operands_begin(), diff --git a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp index 9f1cb6582b5..cec7f563f48 100644 --- a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp +++ b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp @@ -58,7 +58,7 @@ using namespace llvm; static bool isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) { assert(MRI.isSSA()); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return false; const MachineInstr *MI = MRI.getUniqueVRegDef(Reg); return MI && MI->isImplicitDef(); @@ -197,17 +197,17 @@ unsigned getReassignedChan( MachineInstr *R600VectorRegMerger::RebuildVector( RegSeqInfo *RSI, const RegSeqInfo *BaseRSI, const std::vector> &RemapChan) const { - unsigned Reg = RSI->Instr->getOperand(0).getReg(); + Register Reg = RSI->Instr->getOperand(0).getReg(); MachineBasicBlock::iterator Pos = RSI->Instr; MachineBasicBlock &MBB = *Pos->getParent(); DebugLoc DL = Pos->getDebugLoc(); - unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg(); + Register SrcVec = BaseRSI->Instr->getOperand(0).getReg(); DenseMap UpdatedRegToChan = BaseRSI->RegToChan; std::vector UpdatedUndef = BaseRSI->UndefReg; for (DenseMap::iterator It = RSI->RegToChan.begin(), E = RSI->RegToChan.end(); It != E; ++It) { - unsigned DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass); + Register DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass); unsigned SubReg = (*It).first; unsigned Swizzle = (*It).second; unsigned Chan = getReassignedChan(RemapChan, Swizzle); @@ -350,7 +350,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { MachineInstr &MI = *MII; if (MI.getOpcode() != R600::REG_SEQUENCE) { if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) { - unsigned Reg = MI.getOperand(1).getReg(); + Register Reg = MI.getOperand(1).getReg(); for (MachineRegisterInfo::def_instr_iterator It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end(); It != E; ++It) { @@ -363,7 +363,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { RegSeqInfo RSI(*MRI, &MI); // All uses of MI are swizzeable ? - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); if (!areAllUsesSwizzeable(Reg)) continue; diff --git a/lib/Target/AMDGPU/R600Packetizer.cpp b/lib/Target/AMDGPU/R600Packetizer.cpp index df200baf11c..176269f9b68 100644 --- a/lib/Target/AMDGPU/R600Packetizer.cpp +++ b/lib/Target/AMDGPU/R600Packetizer.cpp @@ -90,7 +90,7 @@ private: if (DstIdx == -1) { continue; } - unsigned Dst = BI->getOperand(DstIdx).getReg(); + Register Dst = BI->getOperand(DstIdx).getReg(); if (isTrans || TII->isTransOnly(*BI)) { Result[Dst] = R600::PS; continue; @@ -136,7 +136,7 @@ private: int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]); if (OperandIdx < 0) continue; - unsigned Src = MI.getOperand(OperandIdx).getReg(); + Register Src = MI.getOperand(OperandIdx).getReg(); const DenseMap::const_iterator It = PVs.find(Src); if (It != PVs.end()) MI.getOperand(OperandIdx).setReg(It->second); diff --git a/lib/Target/AMDGPU/R600RegisterInfo.cpp b/lib/Target/AMDGPU/R600RegisterInfo.cpp index 685df74490f..ef12c1d2459 100644 --- a/lib/Target/AMDGPU/R600RegisterInfo.cpp +++ b/lib/Target/AMDGPU/R600RegisterInfo.cpp @@ -93,7 +93,7 @@ const RegClassWeight &R600RegisterInfo::getRegClassWeight( } bool R600RegisterInfo::isPhysRegLiveAcrossClauses(unsigned Reg) const { - assert(!TargetRegisterInfo::isVirtualRegister(Reg)); + assert(!Register::isVirtualRegister(Reg)); switch (Reg) { case R600::OQAP: diff --git a/lib/Target/AMDGPU/SIAddIMGInit.cpp b/lib/Target/AMDGPU/SIAddIMGInit.cpp index f8094e35816..ee011286b8f 100644 --- a/lib/Target/AMDGPU/SIAddIMGInit.cpp +++ b/lib/Target/AMDGPU/SIAddIMGInit.cpp @@ -129,7 +129,7 @@ bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) { continue; // Create a register for the intialization value. - unsigned PrevDst = + Register PrevDst = MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); unsigned NewDst = 0; // Final initialized value will be in here @@ -150,7 +150,7 @@ bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) { NewDst = MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); // Initialize dword - unsigned SubReg = + Register SubReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg) .addImm(0); diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h index a0e1ec6ac23..23ef56afc39 100644 --- a/lib/Target/AMDGPU/SIDefines.h +++ b/lib/Target/AMDGPU/SIDefines.h @@ -99,7 +99,10 @@ enum : uint64_t { FPAtomic = UINT64_C(1) << 53, // Is a MFMA instruction. - IsMAI = UINT64_C(1) << 54 + IsMAI = UINT64_C(1) << 54, + + // Is a DOT instruction. + IsDOT = UINT64_C(1) << 55 }; // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. @@ -444,6 +447,7 @@ namespace DPP { enum DppCtrl : unsigned { QUAD_PERM_FIRST = 0, + QUAD_PERM_ID = 0xE4, // identity permutation QUAD_PERM_LAST = 0xFF, DPP_UNUSED1 = 0x100, ROW_SHL0 = 0x100, diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 624953963cf..65286751c12 100644 --- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -113,10 +113,16 @@ class SIFixSGPRCopies : public MachineFunctionPass { public: static char ID; + MachineRegisterInfo *MRI; + const SIRegisterInfo *TRI; + const SIInstrInfo *TII; + SIFixSGPRCopies() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; + void processPHINode(MachineInstr &MI); + StringRef getPassName() const override { return "SI Fix SGPR copies"; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -148,7 +154,7 @@ static bool hasVectorOperands(const MachineInstr &MI, const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { if (!MI.getOperand(i).isReg() || - !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) + !Register::isVirtualRegister(MI.getOperand(i).getReg())) continue; if (TRI->hasVectorRegisters(MRI.getRegClass(MI.getOperand(i).getReg()))) @@ -161,21 +167,19 @@ static std::pair getCopyRegClasses(const MachineInstr &Copy, const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) { - unsigned DstReg = Copy.getOperand(0).getReg(); - unsigned SrcReg = Copy.getOperand(1).getReg(); + Register DstReg = Copy.getOperand(0).getReg(); + Register SrcReg = Copy.getOperand(1).getReg(); - const TargetRegisterClass *SrcRC = - TargetRegisterInfo::isVirtualRegister(SrcReg) ? - MRI.getRegClass(SrcReg) : - TRI.getPhysRegClass(SrcReg); + const TargetRegisterClass *SrcRC = Register::isVirtualRegister(SrcReg) + ? MRI.getRegClass(SrcReg) + : TRI.getPhysRegClass(SrcReg); // We don't really care about the subregister here. // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg()); - const TargetRegisterClass *DstRC = - TargetRegisterInfo::isVirtualRegister(DstReg) ? - MRI.getRegClass(DstReg) : - TRI.getPhysRegClass(DstReg); + const TargetRegisterClass *DstRC = Register::isVirtualRegister(DstReg) + ? MRI.getRegClass(DstReg) + : TRI.getPhysRegClass(DstReg); return std::make_pair(SrcRC, DstRC); } @@ -199,10 +203,10 @@ static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI, const SIInstrInfo *TII) { MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); auto &Src = MI.getOperand(1); - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = Src.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || - !TargetRegisterInfo::isVirtualRegister(DstReg)) + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = Src.getReg(); + if (!Register::isVirtualRegister(SrcReg) || + !Register::isVirtualRegister(DstReg)) return false; for (const auto &MO : MRI.reg_nodbg_operands(DstReg)) { @@ -238,7 +242,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, MachineRegisterInfo &MRI) { assert(MI.isRegSequence()); - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); if (!TRI->isSGPRClass(MRI.getRegClass(DstReg))) return false; @@ -250,7 +254,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, return false; // It is illegal to have vreg inputs to a physreg defining reg_sequence. - if (TargetRegisterInfo::isPhysicalRegister(CopyUse.getOperand(0).getReg())) + if (Register::isPhysicalRegister(CopyUse.getOperand(0).getReg())) return false; const TargetRegisterClass *SrcRC, *DstRC; @@ -281,7 +285,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, bool IsAGPR = TRI->hasAGPRs(DstRC); for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) { - unsigned SrcReg = MI.getOperand(I).getReg(); + Register SrcReg = MI.getOperand(I).getReg(); unsigned SrcSubReg = MI.getOperand(I).getSubReg(); const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); @@ -291,7 +295,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg); const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC); - unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC); + Register TmpReg = MRI.createVirtualRegister(NewSrcRC); BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg) @@ -299,7 +303,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, if (IsAGPR) { const TargetRegisterClass *NewSrcRC = TRI->getEquivalentAGPRClass(SrcRC); - unsigned TmpAReg = MRI.createVirtualRegister(NewSrcRC); + Register TmpAReg = MRI.createVirtualRegister(NewSrcRC); unsigned Opc = NewSrcRC == &AMDGPU::AGPR_32RegClass ? AMDGPU::V_ACCVGPR_WRITE_B32 : AMDGPU::COPY; BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(Opc), @@ -315,52 +319,6 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, return true; } -static bool phiHasVGPROperands(const MachineInstr &PHI, - const MachineRegisterInfo &MRI, - const SIRegisterInfo *TRI, - const SIInstrInfo *TII) { - for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { - unsigned Reg = PHI.getOperand(i).getReg(); - if (TRI->hasVGPRs(MRI.getRegClass(Reg))) - return true; - } - return false; -} - -static bool phiHasBreakDef(const MachineInstr &PHI, - const MachineRegisterInfo &MRI, - SmallSet &Visited) { - for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { - unsigned Reg = PHI.getOperand(i).getReg(); - if (Visited.count(Reg)) - continue; - - Visited.insert(Reg); - - MachineInstr *DefInstr = MRI.getVRegDef(Reg); - switch (DefInstr->getOpcode()) { - default: - break; - case AMDGPU::SI_IF_BREAK: - return true; - case AMDGPU::PHI: - if (phiHasBreakDef(*DefInstr, MRI, Visited)) - return true; - } - } - return false; -} - -static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB, - const TargetRegisterInfo &TRI) { - for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(), - E = MBB.end(); I != E; ++I) { - if (I->modifiesRegister(AMDGPU::EXEC, &TRI)) - return true; - } - return false; -} - static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, const MachineInstr *MoveImm, const SIInstrInfo *TII, @@ -422,12 +380,6 @@ bool searchPredecessors(const MachineBasicBlock *MBB, return false; } -static bool predsHasDivergentTerminator(MachineBasicBlock *MBB, - const TargetRegisterInfo *TRI) { - return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) { - return hasTerminatorThatModifiesExec(*MBB, *TRI); }); -} - // Checks if there is potential path From instruction To instruction. // If CutOff is specified and it sits in between of that path we ignore // a higher portion of the path and report it is not reachable. @@ -468,6 +420,7 @@ getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII) { // executioon. static bool hoistAndMergeSGPRInits(unsigned Reg, const MachineRegisterInfo &MRI, + const TargetRegisterInfo *TRI, MachineDominatorTree &MDT, const TargetInstrInfo *TII) { // List of inits by immediate value. @@ -482,7 +435,7 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, for (auto &MI : MRI.def_instructions(Reg)) { MachineOperand *Imm = nullptr; - for (auto &MO: MI.operands()) { + for (auto &MO : MI.operands()) { if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) || (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) { Imm = nullptr; @@ -587,8 +540,44 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, } } - for (auto MI : MergedInstrs) - MI->removeFromParent(); + // Remove initializations that were merged into another. + for (auto &Init : Inits) { + auto &Defs = Init.second; + auto I = Defs.begin(); + while (I != Defs.end()) { + if (MergedInstrs.count(*I)) { + (*I)->eraseFromParent(); + I = Defs.erase(I); + } else + ++I; + } + } + + // Try to schedule SGPR initializations as early as possible in the MBB. + for (auto &Init : Inits) { + auto &Defs = Init.second; + for (auto MI : Defs) { + auto MBB = MI->getParent(); + MachineInstr &BoundaryMI = *getFirstNonPrologue(MBB, TII); + MachineBasicBlock::reverse_iterator B(BoundaryMI); + // Check if B should actually be a boundary. If not set the previous + // instruction as the boundary instead. + if (!TII->isBasicBlockPrologue(*B)) + B++; + + auto R = std::next(MI->getReverseIterator()); + const unsigned Threshold = 50; + // Search until B or Threshold for a place to insert the initialization. + for (unsigned I = 0; R != B && I < Threshold; ++R, ++I) + if (R->readsRegister(Reg, TRI) || R->definesRegister(Reg, TRI) || + TII->isSchedulingBoundary(*R, MBB, *MBB->getParent())) + break; + + // Move to directly after R. + if (&*--R != MI) + MBB->splice(*R, MBB, MI); + } + } if (Changed) MRI.clearKillFlags(Reg); @@ -598,9 +587,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - const SIRegisterInfo *TRI = ST.getRegisterInfo(); - const SIInstrInfo *TII = ST.getInstrInfo(); + MRI = &MF.getRegInfo(); + TRI = ST.getRegisterInfo(); + TII = ST.getInstrInfo(); MDT = &getAnalysis(); SmallVector Worklist; @@ -617,22 +606,39 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { continue; case AMDGPU::COPY: case AMDGPU::WQM: + case AMDGPU::SOFT_WQM: case AMDGPU::WWM: { - // If the destination register is a physical register there isn't really - // much we can do to fix this. - if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) - continue; + Register DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *SrcRC, *DstRC; - std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI); + std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, *MRI); + + if (!Register::isVirtualRegister(DstReg)) { + // If the destination register is a physical register there isn't + // really much we can do to fix this. + // Some special instructions use M0 as an input. Some even only use + // the first lane. Insert a readfirstlane and hope for the best. + if (DstReg == AMDGPU::M0 && TRI->hasVectorRegisters(SrcRC)) { + Register TmpReg + = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + + BuildMI(MBB, MI, MI.getDebugLoc(), + TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg) + .add(MI.getOperand(1)); + MI.getOperand(1).setReg(TmpReg); + } + + continue; + } + if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) { - unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + Register SrcReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) { TII->moveToVALU(MI, MDT); break; } - MachineInstr *DefMI = MRI.getVRegDef(SrcReg); + MachineInstr *DefMI = MRI->getVRegDef(SrcReg); unsigned SMovOp; int64_t Imm; // If we are just copying an immediate, we can replace the copy with @@ -651,70 +657,13 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { break; } case AMDGPU::PHI: { - unsigned Reg = MI.getOperand(0).getReg(); - if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) - break; - - // We don't need to fix the PHI if the common dominator of the - // two incoming blocks terminates with a uniform branch. - bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII); - if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) { - MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB(); - MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB(); - - if (!predsHasDivergentTerminator(MBB0, TRI) && - !predsHasDivergentTerminator(MBB1, TRI)) { - LLVM_DEBUG(dbgs() - << "Not fixing PHI for uniform branch: " << MI << '\n'); - break; - } - } - - // If a PHI node defines an SGPR and any of its operands are VGPRs, - // then we need to move it to the VALU. - // - // Also, if a PHI node defines an SGPR and has all SGPR operands - // we must move it to the VALU, because the SGPR operands will - // all end up being assigned the same register, which means - // there is a potential for a conflict if different threads take - // different control flow paths. - // - // For Example: - // - // sgpr0 = def; - // ... - // sgpr1 = def; - // ... - // sgpr2 = PHI sgpr0, sgpr1 - // use sgpr2; - // - // Will Become: - // - // sgpr2 = def; - // ... - // sgpr2 = def; - // ... - // use sgpr2 - // - // The one exception to this rule is when one of the operands - // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK - // instruction. In this case, there we know the program will - // never enter the second block (the loop) without entering - // the first block (where the condition is computed), so there - // is no chance for values to be over-written. - - SmallSet Visited; - if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) { - LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI); - TII->moveToVALU(MI, MDT); - } - + processPHINode(MI); break; } case AMDGPU::REG_SEQUENCE: if (TRI->hasVectorRegisters(TII->getOpRegClass(MI, 0)) || !hasVectorOperands(MI, TRI)) { - foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI); + foldVGPRCopyIntoRegSequence(MI, TRI, TII, *MRI); continue; } @@ -724,9 +673,9 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { break; case AMDGPU::INSERT_SUBREG: { const TargetRegisterClass *DstRC, *Src0RC, *Src1RC; - DstRC = MRI.getRegClass(MI.getOperand(0).getReg()); - Src0RC = MRI.getRegClass(MI.getOperand(1).getReg()); - Src1RC = MRI.getRegClass(MI.getOperand(2).getReg()); + DstRC = MRI->getRegClass(MI.getOperand(0).getReg()); + Src0RC = MRI->getRegClass(MI.getOperand(1).getReg()); + Src1RC = MRI->getRegClass(MI.getOperand(2).getReg()); if (TRI->isSGPRClass(DstRC) && (TRI->hasVectorRegisters(Src0RC) || TRI->hasVectorRegisters(Src1RC))) { @@ -735,12 +684,159 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { } break; } + case AMDGPU::V_WRITELANE_B32: { + // Some architectures allow more than one constant bus access without + // SGPR restriction + if (ST.getConstantBusLimit(MI.getOpcode()) != 1) + break; + + // Writelane is special in that it can use SGPR and M0 (which would + // normally count as using the constant bus twice - but in this case it + // is allowed since the lane selector doesn't count as a use of the + // constant bus). However, it is still required to abide by the 1 SGPR + // rule. Apply a fix here as we might have multiple SGPRs after + // legalizing VGPRs to SGPRs + int Src0Idx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); + int Src1Idx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1); + MachineOperand &Src0 = MI.getOperand(Src0Idx); + MachineOperand &Src1 = MI.getOperand(Src1Idx); + + // Check to see if the instruction violates the 1 SGPR rule + if ((Src0.isReg() && TRI->isSGPRReg(*MRI, Src0.getReg()) && + Src0.getReg() != AMDGPU::M0) && + (Src1.isReg() && TRI->isSGPRReg(*MRI, Src1.getReg()) && + Src1.getReg() != AMDGPU::M0)) { + + // Check for trivially easy constant prop into one of the operands + // If this is the case then perform the operation now to resolve SGPR + // issue. If we don't do that here we will always insert a mov to m0 + // that can't be resolved in later operand folding pass + bool Resolved = false; + for (MachineOperand *MO : {&Src0, &Src1}) { + if (Register::isVirtualRegister(MO->getReg())) { + MachineInstr *DefMI = MRI->getVRegDef(MO->getReg()); + if (DefMI && TII->isFoldableCopy(*DefMI)) { + const MachineOperand &Def = DefMI->getOperand(0); + if (Def.isReg() && + MO->getReg() == Def.getReg() && + MO->getSubReg() == Def.getSubReg()) { + const MachineOperand &Copied = DefMI->getOperand(1); + if (Copied.isImm() && + TII->isInlineConstant(APInt(64, Copied.getImm(), true))) { + MO->ChangeToImmediate(Copied.getImm()); + Resolved = true; + break; + } + } + } + } + } + + if (!Resolved) { + // Haven't managed to resolve by replacing an SGPR with an immediate + // Move src1 to be in M0 + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), + TII->get(AMDGPU::COPY), AMDGPU::M0) + .add(Src1); + Src1.ChangeToRegister(AMDGPU::M0, false); + } + } + break; + } } } } if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge) - hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT, TII); + hoistAndMergeSGPRInits(AMDGPU::M0, *MRI, TRI, *MDT, TII); return true; } + +void SIFixSGPRCopies::processPHINode(MachineInstr &MI) { + unsigned numVGPRUses = 0; + bool AllAGPRUses = true; + SetVector worklist; + SmallSet Visited; + worklist.insert(&MI); + Visited.insert(&MI); + while (!worklist.empty()) { + const MachineInstr *Instr = worklist.pop_back_val(); + unsigned Reg = Instr->getOperand(0).getReg(); + for (const auto &Use : MRI->use_operands(Reg)) { + const MachineInstr *UseMI = Use.getParent(); + AllAGPRUses &= (UseMI->isCopy() && + TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg())) || + TRI->isAGPR(*MRI, Use.getReg()); + if (UseMI->isCopy() || UseMI->isRegSequence()) { + if (UseMI->isCopy() && + UseMI->getOperand(0).getReg().isPhysical() && + !TRI->isSGPRReg(*MRI, UseMI->getOperand(0).getReg())) { + numVGPRUses++; + } + if (Visited.insert(UseMI).second) + worklist.insert(UseMI); + + continue; + } + + if (UseMI->isPHI()) { + const TargetRegisterClass *UseRC = MRI->getRegClass(Use.getReg()); + if (!TRI->isSGPRReg(*MRI, Use.getReg()) && + UseRC != &AMDGPU::VReg_1RegClass) + numVGPRUses++; + continue; + } + + const TargetRegisterClass *OpRC = + TII->getOpRegClass(*UseMI, UseMI->getOperandNo(&Use)); + if (!TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass && + OpRC != &AMDGPU::VS_64RegClass) { + numVGPRUses++; + } + } + } + + Register PHIRes = MI.getOperand(0).getReg(); + const TargetRegisterClass *RC0 = MRI->getRegClass(PHIRes); + if (AllAGPRUses && numVGPRUses && !TRI->hasAGPRs(RC0)) { + LLVM_DEBUG(dbgs() << "Moving PHI to AGPR: " << MI); + MRI->setRegClass(PHIRes, TRI->getEquivalentAGPRClass(RC0)); + } + + bool hasVGPRInput = false; + for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { + unsigned InputReg = MI.getOperand(i).getReg(); + MachineInstr *Def = MRI->getVRegDef(InputReg); + if (TRI->isVectorRegister(*MRI, InputReg)) { + if (Def->isCopy()) { + unsigned SrcReg = Def->getOperand(1).getReg(); + const TargetRegisterClass *RC = + TRI->getRegClassForReg(*MRI, SrcReg); + if (TRI->isSGPRClass(RC)) + continue; + } + hasVGPRInput = true; + break; + } + else if (Def->isCopy() && + TRI->isVectorRegister(*MRI, Def->getOperand(1).getReg())) { + hasVGPRInput = true; + break; + } + } + + if ((!TRI->isVectorRegister(*MRI, PHIRes) && + RC0 != &AMDGPU::VReg_1RegClass) && + (hasVGPRInput || numVGPRUses > 1)) { + LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI); + TII->moveToVALU(MI); + } + else { + LLVM_DEBUG(dbgs() << "Legalizing PHI: " << MI); + TII->legalizeOperands(MI, MDT); + } + +} diff --git a/lib/Target/AMDGPU/SIFixupVectorISel.cpp b/lib/Target/AMDGPU/SIFixupVectorISel.cpp index 5b834c8de13..a0119297b11 100644 --- a/lib/Target/AMDGPU/SIFixupVectorISel.cpp +++ b/lib/Target/AMDGPU/SIFixupVectorISel.cpp @@ -91,8 +91,7 @@ static bool findSRegBaseAndIndex(MachineOperand *Op, Worklist.push_back(Op); while (!Worklist.empty()) { MachineOperand *WOp = Worklist.pop_back_val(); - if (!WOp->isReg() || - !TargetRegisterInfo::isVirtualRegister(WOp->getReg())) + if (!WOp->isReg() || !Register::isVirtualRegister(WOp->getReg())) continue; MachineInstr *DefInst = MRI.getUniqueVRegDef(WOp->getReg()); switch (DefInst->getOpcode()) { diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index 74d77d32801..4eac0316876 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -142,16 +142,20 @@ static bool isInlineConstantIfFolded(const SIInstrInfo *TII, switch (Opc) { case AMDGPU::V_MAC_F32_e64: case AMDGPU::V_MAC_F16_e64: - case AMDGPU::V_FMAC_F32_e64: { + case AMDGPU::V_FMAC_F32_e64: + case AMDGPU::V_FMAC_F16_e64: { // Special case for mac. Since this is replaced with mad when folded into // src2, we need to check the legality for the final instruction. int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); if (static_cast(OpNo) == Src2Idx) { - bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64; - bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64; + bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64 || + Opc == AMDGPU::V_FMAC_F16_e64; + bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64 || + Opc == AMDGPU::V_FMAC_F32_e64; unsigned Opc = IsFMA ? - AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16); + (IsF32 ? AMDGPU::V_FMA_F32 : AMDGPU::V_FMA_F16_gfx9) : + (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16); const MCInstrDesc &MadDesc = TII->get(Opc); return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType); } @@ -235,9 +239,11 @@ static bool updateOperand(FoldCandidate &Fold, if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) { MachineBasicBlock *MBB = MI->getParent(); - auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI); - if (Liveness != MachineBasicBlock::LQR_Dead) + auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI, 16); + if (Liveness != MachineBasicBlock::LQR_Dead) { + LLVM_DEBUG(dbgs() << "Not shrinking " << MI << " due to vcc liveness\n"); return false; + } MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); int Op32 = Fold.getShrinkOpcode(); @@ -248,7 +254,7 @@ static bool updateOperand(FoldCandidate &Fold, bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg()); const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg()); - unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC); + Register NewReg0 = MRI.createVirtualRegister(Dst0RC); MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32); @@ -314,12 +320,15 @@ static bool tryAddToFoldList(SmallVectorImpl &FoldList, // Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2 unsigned Opc = MI->getOpcode(); if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 || - Opc == AMDGPU::V_FMAC_F32_e64) && + Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) && (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) { - bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64; - bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64; + bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64 || + Opc == AMDGPU::V_FMAC_F16_e64; + bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64 || + Opc == AMDGPU::V_FMAC_F32_e64; unsigned NewOpc = IsFMA ? - AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16); + (IsF32 ? AMDGPU::V_FMA_F32 : AMDGPU::V_FMA_F16_gfx9) : + (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16); // Check if changing this to a v_mad_{f16, f32} instruction will allow us // to fold the operand. @@ -435,7 +444,8 @@ static bool tryToFoldACImm(const SIInstrInfo *TII, OpTy > AMDGPU::OPERAND_REG_INLINE_AC_LAST) return false; - if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy)) { + if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy) && + TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) { UseMI->getOperand(UseOpIdx).ChangeToImmediate(OpToFold.getImm()); return true; } @@ -443,8 +453,8 @@ static bool tryToFoldACImm(const SIInstrInfo *TII, if (!OpToFold.isReg()) return false; - unsigned UseReg = OpToFold.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(UseReg)) + Register UseReg = OpToFold.getReg(); + if (!Register::isVirtualRegister(UseReg)) return false; if (llvm::find_if(FoldList, [UseMI](const FoldCandidate &FC) { @@ -481,6 +491,9 @@ static bool tryToFoldACImm(const SIInstrInfo *TII, return false; // Can only fold splat constants } + if (!TII->isOperandLegal(*UseMI, UseOpIdx, Op)) + return false; + FoldList.push_back(FoldCandidate(UseMI, UseOpIdx, Op)); return true; } @@ -518,7 +531,7 @@ void SIFoldOperands::foldOperand( // REG_SEQUENCE instructions, so we have to fold them into the // uses of REG_SEQUENCE. if (UseMI->isRegSequence()) { - unsigned RegSeqDstReg = UseMI->getOperand(0).getReg(); + Register RegSeqDstReg = UseMI->getOperand(0).getReg(); unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm(); MachineRegisterInfo::use_iterator Next; @@ -569,15 +582,18 @@ void SIFoldOperands::foldOperand( OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal(); if (FoldingImmLike && UseMI->isCopy()) { - unsigned DestReg = UseMI->getOperand(0).getReg(); - const TargetRegisterClass *DestRC - = TargetRegisterInfo::isVirtualRegister(DestReg) ? - MRI->getRegClass(DestReg) : - TRI->getPhysRegClass(DestReg); + Register DestReg = UseMI->getOperand(0).getReg(); - unsigned SrcReg = UseMI->getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(DestReg) && - TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Don't fold into a copy to a physical register. Doing so would interfere + // with the register coalescer's logic which would avoid redundant + // initalizations. + if (DestReg.isPhysical()) + return; + + const TargetRegisterClass *DestRC = MRI->getRegClass(DestReg); + + Register SrcReg = UseMI->getOperand(1).getReg(); + if (SrcReg.isVirtual()) { // XXX - This can be an assert? const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg); if (TRI->isSGPRClass(SrcRC) && TRI->hasVectorRegisters(DestRC)) { MachineRegisterInfo::use_iterator NextUse; @@ -613,10 +629,17 @@ void SIFoldOperands::foldOperand( return; UseMI->setDesc(TII->get(MovOp)); + MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin(); + MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end(); + while (ImpOpI != ImpOpE) { + MachineInstr::mop_iterator Tmp = ImpOpI; + ImpOpI++; + UseMI->RemoveOperand(UseMI->getOperandNo(Tmp)); + } CopiesToReplace.push_back(UseMI); } else { if (UseMI->isCopy() && OpToFold.isReg() && - TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(0).getReg()) && + Register::isVirtualRegister(UseMI->getOperand(0).getReg()) && TRI->isVectorRegister(*MRI, UseMI->getOperand(0).getReg()) && TRI->isVectorRegister(*MRI, UseMI->getOperand(1).getReg()) && !UseMI->getOperand(1).getSubReg()) { @@ -677,6 +700,9 @@ void SIFoldOperands::foldOperand( // => // %sgpr1 = COPY %sgpr0 UseMI->setDesc(TII->get(AMDGPU::COPY)); + UseMI->getOperand(1).setReg(OpToFold.getReg()); + UseMI->getOperand(1).setSubReg(OpToFold.getSubReg()); + UseMI->getOperand(1).setIsKill(false); UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane) return; } @@ -708,7 +734,7 @@ void SIFoldOperands::foldOperand( // Split 64-bit constants into 32-bits for folding. if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) { - unsigned UseReg = UseOp.getReg(); + Register UseReg = UseOp.getReg(); const TargetRegisterClass *UseRC = MRI->getRegClass(UseReg); if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64) @@ -810,7 +836,7 @@ static MachineOperand *getImmOrMaterializedImm(MachineRegisterInfo &MRI, if (Op.isReg()) { // If this has a subregister, it obviously is a register source. if (Op.getSubReg() != AMDGPU::NoSubRegister || - !TargetRegisterInfo::isVirtualRegister(Op.getReg())) + !Register::isVirtualRegister(Op.getReg())) return &Op; MachineInstr *Def = MRI.getVRegDef(Op.getReg()); @@ -1073,6 +1099,13 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, Copy->addImplicitDefUseOperands(*MF); for (FoldCandidate &Fold : FoldList) { + if (Fold.isReg() && Register::isVirtualRegister(Fold.OpToFold->getReg())) { + Register Reg = Fold.OpToFold->getReg(); + MachineInstr *DefMI = Fold.OpToFold->getParent(); + if (DefMI->readsRegister(AMDGPU::EXEC, TRI) && + execMayBeModifiedBeforeUse(*MRI, Reg, *DefMI, *Fold.UseMI)) + continue; + } if (updateOperand(Fold, *TII, *TRI, *ST)) { // Clear kill flags. if (Fold.isReg()) { @@ -1316,6 +1349,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock *MBB : depth_first(&MF)) { MachineBasicBlock::iterator I, Next; + + MachineOperand *CurrentKnownM0Val = nullptr; for (I = MBB->begin(); I != MBB->end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; @@ -1328,6 +1363,25 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) || !tryFoldOMod(MI)) tryFoldClamp(MI); + + // Saw an unknown clobber of m0, so we no longer know what it is. + if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI)) + CurrentKnownM0Val = nullptr; + continue; + } + + // Specially track simple redefs of m0 to the same value in a block, so we + // can erase the later ones. + if (MI.getOperand(0).getReg() == AMDGPU::M0) { + MachineOperand &NewM0Val = MI.getOperand(1); + if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) { + MI.eraseFromParent(); + continue; + } + + // We aren't tracking other physical registers + CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical()) ? + nullptr : &NewM0Val; continue; } @@ -1339,8 +1393,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { if (!FoldingImm && !OpToFold.isReg()) continue; - if (OpToFold.isReg() && - !TargetRegisterInfo::isVirtualRegister(OpToFold.getReg())) + if (OpToFold.isReg() && !Register::isVirtualRegister(OpToFold.getReg())) continue; // Prevent folding operands backwards in the function. For example, @@ -1350,8 +1403,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { // ... // %vgpr0 = V_MOV_B32_e32 1, implicit %exec MachineOperand &Dst = MI.getOperand(0); - if (Dst.isReg() && - !TargetRegisterInfo::isVirtualRegister(Dst.getReg())) + if (Dst.isReg() && !Register::isVirtualRegister(Dst.getReg())) continue; foldInstOperand(MI, OpToFold); diff --git a/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/lib/Target/AMDGPU/SIFormMemoryClauses.cpp index f3c9ad63a80..26bae5734df 100644 --- a/lib/Target/AMDGPU/SIFormMemoryClauses.cpp +++ b/lib/Target/AMDGPU/SIFormMemoryClauses.cpp @@ -120,7 +120,7 @@ static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause) { return false; // If this is a load instruction where the result has been coalesced with an operand, then we cannot clause it. for (const MachineOperand &ResMO : MI.defs()) { - unsigned ResReg = ResMO.getReg(); + Register ResReg = ResMO.getReg(); for (const MachineOperand &MO : MI.uses()) { if (!MO.isReg() || MO.isDef()) continue; @@ -144,7 +144,7 @@ static unsigned getMopState(const MachineOperand &MO) { S |= RegState::Kill; if (MO.isEarlyClobber()) S |= RegState::EarlyClobber; - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && MO.isRenamable()) + if (Register::isPhysicalRegister(MO.getReg()) && MO.isRenamable()) S |= RegState::Renamable; return S; } @@ -152,7 +152,7 @@ static unsigned getMopState(const MachineOperand &MO) { template void SIFormMemoryClauses::forAllLanes(unsigned Reg, LaneBitmask LaneMask, Callable Func) const { - if (LaneMask.all() || TargetRegisterInfo::isPhysicalRegister(Reg) || + if (LaneMask.all() || Register::isPhysicalRegister(Reg) || LaneMask == MRI->getMaxLaneMaskForVReg(Reg)) { Func(0); return; @@ -216,7 +216,7 @@ bool SIFormMemoryClauses::canBundle(const MachineInstr &MI, if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // If it is tied we will need to write same register as we read. if (MO.isTied()) @@ -227,7 +227,7 @@ bool SIFormMemoryClauses::canBundle(const MachineInstr &MI, if (Conflict == Map.end()) continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return false; LaneBitmask Mask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); @@ -265,13 +265,13 @@ void SIFormMemoryClauses::collectRegUses(const MachineInstr &MI, for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; - LaneBitmask Mask = TargetRegisterInfo::isVirtualRegister(Reg) ? - TRI->getSubRegIndexLaneMask(MO.getSubReg()) : - LaneBitmask::getAll(); + LaneBitmask Mask = Register::isVirtualRegister(Reg) + ? TRI->getSubRegIndexLaneMask(MO.getSubReg()) + : LaneBitmask::getAll(); RegUse &Map = MO.isDef() ? Defs : Uses; auto Loc = Map.find(Reg); @@ -389,7 +389,7 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) { for (auto &&R : Defs) { unsigned Reg = R.first; Uses.erase(Reg); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) continue; LIS->removeInterval(Reg); LIS->createAndComputeVirtRegInterval(Reg); @@ -397,7 +397,7 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) { for (auto &&R : Uses) { unsigned Reg = R.first; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) continue; LIS->removeInterval(Reg); LIS->createAndComputeVirtRegInterval(Reg); diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index feab6bed260..ed07ed100a1 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -112,6 +112,7 @@ static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, .addImm(0) // slc .addImm(0) // tfe .addImm(0) // dlc + .addImm(0) // swz .addMemOperand(MMO); return; } @@ -132,6 +133,7 @@ static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, .addImm(0) // slc .addImm(0) // tfe .addImm(0) // dlc + .addImm(0) // swz .addMemOperand(MMO); } @@ -157,6 +159,7 @@ static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, .addImm(0) // slc .addImm(0) // tfe .addImm(0) // dlc + .addImm(0) // swz .addMemOperand(MMO); return; } @@ -177,6 +180,7 @@ static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB, .addImm(0) // slc .addImm(0) // tfe .addImm(0) // dlc + .addImm(0) // swz .addMemOperand(MMO); } @@ -202,15 +206,15 @@ void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST, DebugLoc DL; MachineBasicBlock::iterator I = MBB.begin(); - unsigned FlatScratchInitReg - = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT); + Register FlatScratchInitReg = + MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT); MachineRegisterInfo &MRI = MF.getRegInfo(); MRI.addLiveIn(FlatScratchInitReg); MBB.addLiveIn(FlatScratchInitReg); - unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0); - unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1); + Register FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0); + Register FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1); unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); @@ -424,8 +428,8 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF); // We need to insert initialization of the scratch resource descriptor. - unsigned PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg( - AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); + Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg( + AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; if (ST.isAmdHsaOrMesa(F)) { @@ -539,9 +543,9 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST, if (ST.isAmdPalOS()) { // The pointer to the GIT is formed from the offset passed in and either // the amdgpu-git-ptr-high function attribute or the top part of the PC - unsigned RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); - unsigned RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); - unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); + Register RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); + Register RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); + Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); @@ -601,14 +605,14 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST, assert(!ST.isAmdHsaOrMesa(Fn)); const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); - unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); - unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); + Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); + Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); // Use relocations to get the pointer, and setup the other bits manually. uint64_t Rsrc23 = TII->getScratchRsrcWords23(); if (MFI->hasImplicitBufferPtr()) { - unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); + Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); @@ -640,8 +644,8 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST, MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); } } else { - unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); - unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); + Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); + Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); BuildMI(MBB, I, DL, SMovB32, Rsrc0) .addExternalSymbol("SCRATCH_RSRC_DWORD0") @@ -669,6 +673,8 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { case TargetStackID::NoAlloc: case TargetStackID::SGPRSpill: return true; + case TargetStackID::SVEVector: + return false; } llvm_unreachable("Invalid TargetStackID::Value"); } diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h index c644f4726e2..d9970fd6b4b 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.h +++ b/lib/Target/AMDGPU/SIFrameLowering.h @@ -20,9 +20,9 @@ class GCNSubtarget; class SIFrameLowering final : public AMDGPUFrameLowering { public: - SIFrameLowering(StackDirection D, unsigned StackAl, int LAO, - unsigned TransAl = 1) : - AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {} + SIFrameLowering(StackDirection D, Align StackAl, int LAO, + Align TransAl = Align::None()) + : AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {} ~SIFrameLowering() override = default; void emitEntryFunctionPrologue(MachineFunction &MF, diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index db0782e2bf3..56ebf9c0674 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -20,11 +20,11 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" @@ -35,6 +35,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/DAGCombine.h" @@ -44,6 +45,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" @@ -115,7 +117,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass); addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); - addRegisterClass(MVT::i32, &AMDGPU::SReg_32_XM0RegClass); + addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass); addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass); addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass); @@ -125,10 +127,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v3i32, &AMDGPU::SGPR_96RegClass); addRegisterClass(MVT::v3f32, &AMDGPU::VReg_96RegClass); - addRegisterClass(MVT::v2i64, &AMDGPU::SReg_128RegClass); - addRegisterClass(MVT::v2f64, &AMDGPU::SReg_128RegClass); + addRegisterClass(MVT::v2i64, &AMDGPU::SGPR_128RegClass); + addRegisterClass(MVT::v2f64, &AMDGPU::SGPR_128RegClass); - addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); + addRegisterClass(MVT::v4i32, &AMDGPU::SGPR_128RegClass); addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass); @@ -141,12 +143,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass); if (Subtarget->has16BitInsts()) { - addRegisterClass(MVT::i16, &AMDGPU::SReg_32_XM0RegClass); - addRegisterClass(MVT::f16, &AMDGPU::SReg_32_XM0RegClass); + addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass); + addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass); // Unless there are also VOP3P operations, not operations are really legal. - addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32_XM0RegClass); - addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32_XM0RegClass); + addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32RegClass); + addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32RegClass); addRegisterClass(MVT::v4i16, &AMDGPU::SReg_64RegClass); addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass); } @@ -178,6 +180,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::STORE, MVT::v32i32, Custom); setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); + setTruncStoreAction(MVT::v3i32, MVT::v3i16, Expand); setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); @@ -215,31 +218,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v3i16, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f16, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2i16, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom); - - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2f16, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v8f16, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); - - setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::v2i16, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::v2f16, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::v4f16, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom); - setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::BR_CC, MVT::i1, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Expand); @@ -653,6 +635,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::FADD, MVT::v4f16, Custom); setOperationAction(ISD::FMUL, MVT::v4f16, Custom); + setOperationAction(ISD::FMA, MVT::v4f16, Custom); setOperationAction(ISD::FMAXNUM, MVT::v2f16, Custom); setOperationAction(ISD::FMINNUM, MVT::v2f16, Custom); @@ -687,6 +670,33 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT, VT, Custom); } + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f16, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2i16, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom); + + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2f16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2i16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4i16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v8f16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::f16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); + + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::v2i16, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::v2f16, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::v4f16, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::v4i16, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::f16, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom); + setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::ADDCARRY); setTargetDAGCombine(ISD::SUB); @@ -768,19 +778,22 @@ bool SITargetLowering::isShuffleMaskLegal(ArrayRef, EVT) const { MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { - // TODO: Consider splitting all arguments into 32-bit pieces. - if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) { + if (CC == CallingConv::AMDGPU_KERNEL) + return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); + + if (VT.isVector()) { EVT ScalarVT = VT.getScalarType(); unsigned Size = ScalarVT.getSizeInBits(); if (Size == 32) return ScalarVT.getSimpleVT(); - if (Size == 64) + if (Size > 32) return MVT::i32; if (Size == 16 && Subtarget->has16BitInsts()) return VT.isInteger() ? MVT::v2i16 : MVT::v2f16; - } + } else if (VT.getSizeInBits() > 32) + return MVT::i32; return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); } @@ -788,7 +801,10 @@ MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { - if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) { + if (CC == CallingConv::AMDGPU_KERNEL) + return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); + + if (VT.isVector()) { unsigned NumElts = VT.getVectorNumElements(); EVT ScalarVT = VT.getScalarType(); unsigned Size = ScalarVT.getSizeInBits(); @@ -796,12 +812,13 @@ unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, if (Size == 32) return NumElts; - if (Size == 64) - return 2 * NumElts; + if (Size > 32) + return NumElts * ((Size + 31) / 32); if (Size == 16 && Subtarget->has16BitInsts()) - return (VT.getVectorNumElements() + 1) / 2; - } + return (NumElts + 1) / 2; + } else if (VT.getSizeInBits() > 32) + return (VT.getSizeInBits() + 31) / 32; return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); } @@ -821,10 +838,10 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv( return NumIntermediates; } - if (Size == 64) { + if (Size > 32) { RegisterVT = MVT::i32; IntermediateVT = RegisterVT; - NumIntermediates = 2 * NumElts; + NumIntermediates = NumElts * ((Size + 31) / 32); return NumIntermediates; } @@ -901,7 +918,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = MFI->getImagePSV( *MF.getSubtarget().getInstrInfo(), CI.getArgOperand(RsrcIntr->RsrcArg)); - Info.align = 0; + Info.align.reset(); } else { Info.ptrVal = MFI->getBufferPSV( *MF.getSubtarget().getInstrInfo(), @@ -947,7 +964,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(CI.getType()); Info.ptrVal = CI.getOperand(0); - Info.align = 0; + Info.align.reset(); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; const ConstantInt *Vol = cast(CI.getOperand(4)); @@ -964,7 +981,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = MFI->getBufferPSV( *MF.getSubtarget().getInstrInfo(), CI.getArgOperand(1)); - Info.align = 0; + Info.align.reset(); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; const ConstantInt *Vol = dyn_cast(CI.getOperand(4)); @@ -978,7 +995,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(CI.getOperand(0)->getType() ->getPointerElementType()); Info.ptrVal = CI.getOperand(0); - Info.align = 0; + Info.align.reset(); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; return true; @@ -988,7 +1005,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(CI.getType()); Info.ptrVal = CI.getOperand(0); - Info.align = 0; + Info.align.reset(); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; const ConstantInt *Vol = cast(CI.getOperand(1)); @@ -1012,7 +1029,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, // This is an abstract access, but we need to specify a type and size. Info.memVT = MVT::i32; Info.size = 4; - Info.align = 4; + Info.align = Align(4); Info.flags = MachineMemOperand::MOStore; if (IntrID == Intrinsic::amdgcn_ds_gws_barrier) @@ -1215,21 +1232,12 @@ bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT, return true; } -bool SITargetLowering::allowsMisalignedMemoryAccesses( - EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, - bool *IsFast) const { +bool SITargetLowering::allowsMisalignedMemoryAccessesImpl( + unsigned Size, unsigned AddrSpace, unsigned Align, + MachineMemOperand::Flags Flags, bool *IsFast) const { if (IsFast) *IsFast = false; - // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96, - // which isn't a simple VT. - // Until MVT is extended to handle this, simply check for the size and - // rely on the condition below: allow accesses if the size is a multiple of 4. - if (VT == MVT::Other || (VT != MVT::Other && VT.getSizeInBits() > 1024 && - VT.getStoreSize() > 16)) { - return false; - } - if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || AddrSpace == AMDGPUAS::REGION_ADDRESS) { // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte @@ -1268,7 +1276,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses( } // Smaller than dword value must be aligned. - if (VT.bitsLT(MVT::i32)) + if (Size < 32) return false; // 8.1.6 - For Dword or larger reads or writes, the two LSBs of the @@ -1277,7 +1285,26 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses( if (IsFast) *IsFast = true; - return VT.bitsGT(MVT::i32) && Align % 4 == 0; + return Size >= 32 && Align >= 4; +} + +bool SITargetLowering::allowsMisalignedMemoryAccesses( + EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, + bool *IsFast) const { + if (IsFast) + *IsFast = false; + + // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96, + // which isn't a simple VT. + // Until MVT is extended to handle this, simply check for the size and + // rely on the condition below: allow accesses if the size is a multiple of 4. + if (VT == MVT::Other || (VT != MVT::Other && VT.getSizeInBits() > 1024 && + VT.getStoreSize() > 16)) { + return false; + } + + return allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace, + Align, Flags, IsFast); } EVT SITargetLowering::getOptimalMemOpType( @@ -1336,9 +1363,9 @@ bool SITargetLowering::isMemOpUniform(const SDNode *N) const { TargetLoweringBase::LegalizeTypeAction SITargetLowering::getPreferredVectorAction(MVT VT) const { - if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16)) - return TypeSplitVector; - + int NumElts = VT.getVectorNumElements(); + if (NumElts != 1 && VT.getScalarType().bitsLE(MVT::i16)) + return VT.isPow2VectorType() ? TypeSplitVector : TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } @@ -1562,7 +1589,8 @@ static void processShaderInputArgs(SmallVectorImpl &Splits, // entire split argument. if (Arg->Flags.isSplit()) { while (!Arg->Flags.isSplitEnd()) { - assert(!Arg->VT.isVector() && + assert((!Arg->VT.isVector() || + Arg->VT.getScalarSizeInBits() == 16) && "unexpected vector split in ps argument type"); if (!SkipArg) Splits.push_back(*Arg); @@ -1589,29 +1617,32 @@ static void processShaderInputArgs(SmallVectorImpl &Splits, } // Allocate special inputs passed in VGPRs. -static void allocateSpecialEntryInputVGPRs(CCState &CCInfo, - MachineFunction &MF, - const SIRegisterInfo &TRI, - SIMachineFunctionInfo &Info) { +void SITargetLowering::allocateSpecialEntryInputVGPRs(CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const { + const LLT S32 = LLT::scalar(32); + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (Info.hasWorkItemIDX()) { - unsigned Reg = AMDGPU::VGPR0; - MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass); + Register Reg = AMDGPU::VGPR0; + MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); CCInfo.AllocateReg(Reg); Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg)); } if (Info.hasWorkItemIDY()) { - unsigned Reg = AMDGPU::VGPR1; - MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass); + Register Reg = AMDGPU::VGPR1; + MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); CCInfo.AllocateReg(Reg); Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg)); } if (Info.hasWorkItemIDZ()) { - unsigned Reg = AMDGPU::VGPR2; - MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass); + Register Reg = AMDGPU::VGPR2; + MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32); CCInfo.AllocateReg(Reg); Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg)); @@ -1642,7 +1673,8 @@ static ArgDescriptor allocateVGPR32Input(CCState &CCInfo, unsigned Mask = ~0u, assert(Reg != AMDGPU::NoRegister); MachineFunction &MF = CCInfo.getMachineFunction(); - MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass); + Register LiveInVReg = MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass); + MF.getRegInfo().setType(LiveInVReg, LLT::scalar(32)); return ArgDescriptor::createRegister(Reg, Mask); } @@ -1671,10 +1703,10 @@ static ArgDescriptor allocateSGPR64Input(CCState &CCInfo) { return allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_64RegClass, 16); } -static void allocateSpecialInputVGPRs(CCState &CCInfo, - MachineFunction &MF, - const SIRegisterInfo &TRI, - SIMachineFunctionInfo &Info) { +void SITargetLowering::allocateSpecialInputVGPRs(CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const { const unsigned Mask = 0x3ff; ArgDescriptor Arg; @@ -1692,10 +1724,11 @@ static void allocateSpecialInputVGPRs(CCState &CCInfo, Info.setWorkItemIDZ(allocateVGPR32Input(CCInfo, Mask << 20, Arg)); } -static void allocateSpecialInputSGPRs(CCState &CCInfo, - MachineFunction &MF, - const SIRegisterInfo &TRI, - SIMachineFunctionInfo &Info) { +void SITargetLowering::allocateSpecialInputSGPRs( + CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const { auto &ArgInfo = Info.getArgInfo(); // TODO: Unify handling with private memory pointers. @@ -1728,10 +1761,10 @@ static void allocateSpecialInputSGPRs(CCState &CCInfo, } // Allocate special inputs passed in user SGPRs. -static void allocateHSAUserSGPRs(CCState &CCInfo, - MachineFunction &MF, - const SIRegisterInfo &TRI, - SIMachineFunctionInfo &Info) { +void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const { if (Info.hasImplicitBufferPtr()) { unsigned ImplicitBufferPtrReg = Info.addImplicitBufferPtr(TRI); MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); @@ -1758,9 +1791,12 @@ static void allocateHSAUserSGPRs(CCState &CCInfo, } if (Info.hasKernargSegmentPtr()) { - unsigned InputPtrReg = Info.addKernargSegmentPtr(TRI); - MF.addLiveIn(InputPtrReg, &AMDGPU::SGPR_64RegClass); + MachineRegisterInfo &MRI = MF.getRegInfo(); + Register InputPtrReg = Info.addKernargSegmentPtr(TRI); CCInfo.AllocateReg(InputPtrReg); + + Register VReg = MF.addLiveIn(InputPtrReg, &AMDGPU::SGPR_64RegClass); + MRI.setType(VReg, LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); } if (Info.hasDispatchID()) { @@ -1780,32 +1816,32 @@ static void allocateHSAUserSGPRs(CCState &CCInfo, } // Allocate special input registers that are initialized per-wave. -static void allocateSystemSGPRs(CCState &CCInfo, - MachineFunction &MF, - SIMachineFunctionInfo &Info, - CallingConv::ID CallConv, - bool IsShader) { +void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo, + MachineFunction &MF, + SIMachineFunctionInfo &Info, + CallingConv::ID CallConv, + bool IsShader) const { if (Info.hasWorkGroupIDX()) { unsigned Reg = Info.addWorkGroupIDX(); - MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass); + MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass); CCInfo.AllocateReg(Reg); } if (Info.hasWorkGroupIDY()) { unsigned Reg = Info.addWorkGroupIDY(); - MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass); + MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass); CCInfo.AllocateReg(Reg); } if (Info.hasWorkGroupIDZ()) { unsigned Reg = Info.addWorkGroupIDZ(); - MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass); + MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass); CCInfo.AllocateReg(Reg); } if (Info.hasWorkGroupInfo()) { unsigned Reg = Info.addWorkGroupInfo(); - MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass); + MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass); CCInfo.AllocateReg(Reg); } @@ -1860,7 +1896,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM, // resource. For the Code Object V2 ABI, this will be the first 4 user // SGPR inputs. We can reserve those and use them directly. - unsigned PrivateSegmentBufferReg = + Register PrivateSegmentBufferReg = Info.getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER); Info.setScratchRSrcReg(PrivateSegmentBufferReg); } else { @@ -1921,7 +1957,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM, // // FIXME: Should not do this if inline asm is reading/writing these // registers. - unsigned PreloadedSP = Info.getPreloadedReg( + Register PreloadedSP = Info.getPreloadedReg( AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); Info.setStackPtrOffsetReg(PreloadedSP); @@ -1971,7 +2007,7 @@ void SITargetLowering::insertCopiesSplitCSR( else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); - unsigned NewVR = MRI->createVirtualRegister(RC); + Register NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. Entry->addLiveIn(*I); BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) @@ -2134,7 +2170,7 @@ SDValue SITargetLowering::LowerFormalArguments( assert(VA.isRegLoc() && "Parameter must be in a register!"); - unsigned Reg = VA.getLocReg(); + Register Reg = VA.getLocReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); EVT ValVT = VA.getValVT(); @@ -2652,6 +2688,15 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, bool IsThisReturn = false; MachineFunction &MF = DAG.getMachineFunction(); + if (Callee.isUndef() || isNullConstant(Callee)) { + if (!CLI.IsTailCall) { + for (unsigned I = 0, E = CLI.Ins.size(); I != E; ++I) + InVals.push_back(DAG.getUNDEF(CLI.Ins[I].VT)); + } + + return Chain; + } + if (IsVarArg) { return lowerUnhandledCall(CLI, InVals, "unsupported call to variadic function "); @@ -2782,7 +2827,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, int32_t Offset = LocMemOffset; SDValue PtrOff = DAG.getConstant(Offset, DL, PtrVT); - unsigned Align = 0; + MaybeAlign Alignment; if (IsTailCall) { ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; @@ -2790,8 +2835,10 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, Flags.getByValSize() : VA.getValVT().getStoreSize(); // FIXME: We can have better than the minimum byval required alignment. - Align = Flags.isByVal() ? Flags.getByValAlign() : - MinAlign(Subtarget->getStackAlignment(), Offset); + Alignment = + Flags.isByVal() + ? MaybeAlign(Flags.getByValAlign()) + : commonAlignment(Subtarget->getStackAlignment(), Offset); Offset = Offset + FPDiff; int FI = MFI.CreateFixedObject(OpSize, Offset, true); @@ -2810,7 +2857,8 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, } else { DstAddr = PtrOff; DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset); - Align = MinAlign(Subtarget->getStackAlignment(), LocMemOffset); + Alignment = + commonAlignment(Subtarget->getStackAlignment(), LocMemOffset); } if (Outs[i].Flags.isByVal()) { @@ -2825,7 +2873,8 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, MemOpChains.push_back(Cpy); } else { - SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, Align); + SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, + Alignment ? Alignment->value() : 0); MemOpChains.push_back(Store); } } @@ -2937,9 +2986,9 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, IsThisReturn ? OutVals[0] : SDValue()); } -unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { - unsigned Reg = StringSwitch(RegName) +Register SITargetLowering::getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const { + Register Reg = StringSwitch(RegName) .Case("m0", AMDGPU::M0) .Case("exec", AMDGPU::EXEC) .Case("exec_lo", AMDGPU::EXEC_LO) @@ -2947,7 +2996,7 @@ unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT, .Case("flat_scratch", AMDGPU::FLAT_SCR) .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) - .Default(AMDGPU::NoRegister); + .Default(Register()); if (Reg == AMDGPU::NoRegister) { report_fatal_error(Twine("invalid register name \"" @@ -3055,6 +3104,20 @@ splitBlockForLoop(MachineInstr &MI, MachineBasicBlock &MBB, bool InstInLoop) { return std::make_pair(LoopBB, RemainderBB); } +/// Insert \p MI into a BUNDLE with an S_WAITCNT 0 immediately following it. +void SITargetLowering::bundleInstWithWaitcnt(MachineInstr &MI) const { + MachineBasicBlock *MBB = MI.getParent(); + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); + auto I = MI.getIterator(); + auto E = std::next(I); + + BuildMI(*MBB, E, MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT)) + .addImm(0); + + MIBundleBuilder Bundler(*MBB, I, E); + finalizeBundle(*MBB, Bundler.begin()); +} + MachineBasicBlock * SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI, MachineBasicBlock *BB) const { @@ -3066,12 +3129,13 @@ SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI, MachineBasicBlock *RemainderBB; const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); - MachineBasicBlock::iterator Prev = std::prev(MI.getIterator()); + // Apparently kill flags are only valid if the def is in the same block? + if (MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::data0)) + Src->setIsKill(false); std::tie(LoopBB, RemainderBB) = splitBlockForLoop(MI, *BB, true); MachineBasicBlock::iterator I = LoopBB->end(); - MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::data0); const unsigned EncodedReg = AMDGPU::Hwreg::encodeHwreg( AMDGPU::Hwreg::ID_TRAPSTS, AMDGPU::Hwreg::OFFSET_MEM_VIOL, 1); @@ -3081,23 +3145,9 @@ SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI, .addImm(0) .addImm(EncodedReg); - // This is a pain, but we're not allowed to have physical register live-ins - // yet. Insert a pair of copies if the VGPR0 hack is necessary. - if (Src && TargetRegisterInfo::isPhysicalRegister(Src->getReg())) { - unsigned Data0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - BuildMI(*BB, std::next(Prev), DL, TII->get(AMDGPU::COPY), Data0) - .add(*Src); + bundleInstWithWaitcnt(MI); - BuildMI(*LoopBB, LoopBB->begin(), DL, TII->get(AMDGPU::COPY), Src->getReg()) - .addReg(Data0); - - MRI.setSimpleHint(Data0, Src->getReg()); - } - - BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_WAITCNT)) - .addImm(0); - - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); // Load and check TRAP_STS.MEM_VIOL BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_GETREG_B32), Reg) @@ -3138,10 +3188,10 @@ static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop( MachineBasicBlock::iterator I = LoopBB.begin(); const TargetRegisterClass *BoolRC = TRI->getBoolRC(); - unsigned PhiExec = MRI.createVirtualRegister(BoolRC); - unsigned NewExec = MRI.createVirtualRegister(BoolRC); - unsigned CurrentIdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned CondReg = MRI.createVirtualRegister(BoolRC); + Register PhiExec = MRI.createVirtualRegister(BoolRC); + Register NewExec = MRI.createVirtualRegister(BoolRC); + Register CurrentIdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register CondReg = MRI.createVirtualRegister(BoolRC); BuildMI(LoopBB, I, DL, TII->get(TargetOpcode::PHI), PhiReg) .addReg(InitReg) @@ -3240,9 +3290,9 @@ static MachineBasicBlock::iterator loadM0FromVGPR(const SIInstrInfo *TII, MachineBasicBlock::iterator I(&MI); const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SaveExec = MRI.createVirtualRegister(BoolXExecRC); - unsigned TmpExec = MRI.createVirtualRegister(BoolXExecRC); + Register DstReg = MI.getOperand(0).getReg(); + Register SaveExec = MRI.createVirtualRegister(BoolXExecRC); + Register TmpExec = MRI.createVirtualRegister(BoolXExecRC); unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; @@ -3315,7 +3365,7 @@ static bool setM0ToIndexFromSGPR(const SIInstrInfo *TII, SetOn->getOperand(3).setIsUndef(); } else { - unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Tmp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), Tmp) .add(*Idx) .addImm(Offset); @@ -3351,8 +3401,8 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI, MachineFunction *MF = MBB.getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned Dst = MI.getOperand(0).getReg(); - unsigned SrcReg = TII->getNamedOperand(MI, AMDGPU::OpName::src)->getReg(); + Register Dst = MI.getOperand(0).getReg(); + Register SrcReg = TII->getNamedOperand(MI, AMDGPU::OpName::src)->getReg(); int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm(); const TargetRegisterClass *VecRC = MRI.getRegClass(SrcReg); @@ -3390,8 +3440,8 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI, const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock::iterator I(&MI); - unsigned PhiReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned InitReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register PhiReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register InitReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), InitReg); @@ -3442,7 +3492,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI, MachineFunction *MF = MBB.getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned Dst = MI.getOperand(0).getReg(); + Register Dst = MI.getOperand(0).getReg(); const MachineOperand *SrcVec = TII->getNamedOperand(MI, AMDGPU::OpName::src); const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx); const MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val); @@ -3505,7 +3555,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI, const DebugLoc &DL = MI.getDebugLoc(); - unsigned PhiReg = MRI.createVirtualRegister(VecRC); + Register PhiReg = MRI.createVirtualRegister(VecRC); auto InsPt = loadM0FromVGPR(TII, MBB, MI, SrcVec->getReg(), PhiReg, Offset, UseGPRIdxMode, false); @@ -3564,22 +3614,22 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( MachineOperand &Src0 = MI.getOperand(1); MachineOperand &Src1 = MI.getOperand(2); - unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm(MI, MRI, Src0, BoolRC, AMDGPU::sub0, - &AMDGPU::SReg_32_XM0RegClass); + &AMDGPU::SReg_32RegClass); MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm(MI, MRI, Src0, BoolRC, AMDGPU::sub1, - &AMDGPU::SReg_32_XM0RegClass); + &AMDGPU::SReg_32RegClass); MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm(MI, MRI, Src1, BoolRC, AMDGPU::sub0, - &AMDGPU::SReg_32_XM0RegClass); + &AMDGPU::SReg_32RegClass); MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm(MI, MRI, Src1, BoolRC, AMDGPU::sub1, - &AMDGPU::SReg_32_XM0RegClass); + &AMDGPU::SReg_32RegClass); bool IsAdd = (MI.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO); @@ -3632,8 +3682,8 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( // S_CMOV_B64 exec, -1 MachineInstr *FirstMI = &*BB->begin(); MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned InputReg = MI.getOperand(0).getReg(); - unsigned CountReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register InputReg = MI.getOperand(0).getReg(); + Register CountReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); bool Found = false; // Move the COPY of the input reg to the beginning, so that we can use it. @@ -3707,16 +3757,16 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( const GCNSubtarget &ST = MF->getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); - unsigned Dst = MI.getOperand(0).getReg(); - unsigned Src0 = MI.getOperand(1).getReg(); - unsigned Src1 = MI.getOperand(2).getReg(); + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); const DebugLoc &DL = MI.getDebugLoc(); - unsigned SrcCond = MI.getOperand(3).getReg(); + Register SrcCond = MI.getOperand(3).getReg(); - unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register DstLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register DstHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); const auto *CondRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); - unsigned SrcCondCopy = MRI.createVirtualRegister(CondRC); + Register SrcCondCopy = MRI.createVirtualRegister(CondRC); BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy) .addReg(SrcCond); @@ -3814,8 +3864,12 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( case AMDGPU::DS_GWS_SEMA_P: case AMDGPU::DS_GWS_SEMA_RELEASE_ALL: case AMDGPU::DS_GWS_BARRIER: - if (getSubtarget()->hasGWSAutoReplay()) + // A s_waitcnt 0 is required to be the instruction immediately following. + if (getSubtarget()->hasGWSAutoReplay()) { + bundleInstWithWaitcnt(MI); return BB; + } + return emitGWSMemViolTestLoop(MI, BB); default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); @@ -3939,6 +3993,30 @@ SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op, return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi); } +SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op, + SelectionDAG &DAG) const { + unsigned Opc = Op.getOpcode(); + EVT VT = Op.getValueType(); + assert(VT == MVT::v4i16 || VT == MVT::v4f16); + + SDValue Lo0, Hi0; + std::tie(Lo0, Hi0) = DAG.SplitVectorOperand(Op.getNode(), 0); + SDValue Lo1, Hi1; + std::tie(Lo1, Hi1) = DAG.SplitVectorOperand(Op.getNode(), 1); + SDValue Lo2, Hi2; + std::tie(Lo2, Hi2) = DAG.SplitVectorOperand(Op.getNode(), 2); + + SDLoc SL(Op); + + SDValue OpLo = DAG.getNode(Opc, SL, Lo0.getValueType(), Lo0, Lo1, Lo2, + Op->getFlags()); + SDValue OpHi = DAG.getNode(Opc, SL, Hi0.getValueType(), Hi0, Hi1, Hi2, + Op->getFlags()); + + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi); +} + + SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); @@ -3991,6 +4069,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FMINNUM: case ISD::FMAXNUM: return lowerFMINNUM_FMAXNUM(Op, DAG); + case ISD::FMA: + return splitTernaryVectorOp(Op, DAG); case ISD::SHL: case ISD::SRA: case ISD::SRL: @@ -4070,6 +4150,41 @@ SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode, return DAG.getMergeValues({ Adjusted, Load.getValue(1) }, DL); } +SDValue SITargetLowering::lowerIntrinsicLoad(MemSDNode *M, bool IsFormat, + SelectionDAG &DAG, + ArrayRef Ops) const { + SDLoc DL(M); + EVT LoadVT = M->getValueType(0); + EVT EltType = LoadVT.getScalarType(); + EVT IntVT = LoadVT.changeTypeToInteger(); + + bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16); + + unsigned Opc = + IsFormat ? AMDGPUISD::BUFFER_LOAD_FORMAT : AMDGPUISD::BUFFER_LOAD; + + if (IsD16) { + return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16, M, DAG, Ops); + } + + // Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics + if (!IsD16 && !LoadVT.isVector() && EltType.getSizeInBits() < 32) + return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M); + + if (isTypeLegal(LoadVT)) { + return getMemIntrinsicNode(Opc, DL, M->getVTList(), Ops, IntVT, + M->getMemOperand(), DAG); + } + + EVT CastVT = getEquivalentMemType(*DAG.getContext(), LoadVT); + SDVTList VTList = DAG.getVTList(CastVT, MVT::Other); + SDValue MemNode = getMemIntrinsicNode(Opc, DL, VTList, Ops, CastVT, + M->getMemOperand(), DAG); + return DAG.getMergeValues( + {DAG.getNode(ISD::BITCAST, DL, LoadVT, MemNode), MemNode.getValue(1)}, + DL); +} + static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI, SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); @@ -4196,8 +4311,14 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N, } case ISD::INTRINSIC_W_CHAIN: { if (SDValue Res = LowerINTRINSIC_W_CHAIN(SDValue(N, 0), DAG)) { - Results.push_back(Res); - Results.push_back(Res.getValue(1)); + if (Res.getOpcode() == ISD::MERGE_VALUES) { + // FIXME: Hacky + Results.push_back(Res.getOperand(0)); + Results.push_back(Res.getOperand(1)); + } else { + Results.push_back(Res); + Results.push_back(Res.getValue(1)); + } return; } @@ -4935,11 +5056,8 @@ buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV, // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too // small. This requires us to add 4 to the global variable offset in order to // compute the correct address. - unsigned LoFlags = GAFlags; - if (LoFlags == SIInstrInfo::MO_NONE) - LoFlags = SIInstrInfo::MO_REL32; SDValue PtrLo = - DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, LoFlags); + DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, GAFlags); SDValue PtrHi; if (GAFlags == SIInstrInfo::MO_NONE) { PtrHi = DAG.getTargetConstant(0, DL, MVT::i32); @@ -5563,14 +5681,14 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDVTList VTList = DAG.getVTList({LoadVT, MVT::Glue}); unsigned CachePolicy = cast(GLC)->getZExtValue(); SDValue Ops[] = { - DAG.getEntryNode(), // Chain - Rsrc, // rsrc - DAG.getConstant(0, DL, MVT::i32), // vindex - {}, // voffset - {}, // soffset - {}, // offset - DAG.getConstant(CachePolicy, DL, MVT::i32), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + DAG.getEntryNode(), // Chain + Rsrc, // rsrc + DAG.getConstant(0, DL, MVT::i32), // vindex + {}, // voffset + {}, // soffset + {}, // offset + DAG.getTargetConstant(CachePolicy, DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; // Use the alignment to ensure that the required offsets will fit into the @@ -5579,7 +5697,7 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, uint64_t InstOffset = cast(Ops[5])->getZExtValue(); for (unsigned i = 0; i < NumLoads; ++i) { - Ops[5] = DAG.getConstant(InstOffset + 16 * i, DL, MVT::i32); + Ops[5] = DAG.getTargetConstant(InstOffset + 16 * i, DL, MVT::i32); Loads.push_back(DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList, Ops, LoadVT, MMO)); } @@ -5758,45 +5876,31 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } case Intrinsic::amdgcn_fdiv_fast: return lowerFDIV_FAST(Op, DAG); - case Intrinsic::amdgcn_interp_mov: { - SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4)); - SDValue Glue = M0.getValue(1); - return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32, Op.getOperand(1), - Op.getOperand(2), Op.getOperand(3), Glue); - } - case Intrinsic::amdgcn_interp_p1: { - SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4)); - SDValue Glue = M0.getValue(1); - return DAG.getNode(AMDGPUISD::INTERP_P1, DL, MVT::f32, Op.getOperand(1), - Op.getOperand(2), Op.getOperand(3), Glue); - } - case Intrinsic::amdgcn_interp_p2: { - SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5)); - SDValue Glue = SDValue(M0.getNode(), 1); - return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, Op.getOperand(1), - Op.getOperand(2), Op.getOperand(3), Op.getOperand(4), - Glue); - } case Intrinsic::amdgcn_interp_p1_f16: { - SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5)); - SDValue Glue = M0.getValue(1); + SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0, + Op.getOperand(5), SDValue()); if (getSubtarget()->getLDSBankCount() == 16) { // 16 bank LDS - SDValue S = DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32, - DAG.getConstant(2, DL, MVT::i32), // P0 - Op.getOperand(2), // Attrchan - Op.getOperand(3), // Attr - Glue); + + // FIXME: This implicitly will insert a second CopyToReg to M0. + SDValue S = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, DL, MVT::f32, + DAG.getTargetConstant(Intrinsic::amdgcn_interp_mov, DL, MVT::i32), + DAG.getConstant(2, DL, MVT::i32), // P0 + Op.getOperand(2), // Attrchan + Op.getOperand(3), // Attr + Op.getOperand(5)); // m0 + SDValue Ops[] = { Op.getOperand(1), // Src0 Op.getOperand(2), // Attrchan Op.getOperand(3), // Attr - DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers S, // Src2 - holds two f16 values selected by high - DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers Op.getOperand(4), // high - DAG.getConstant(0, DL, MVT::i1), // $clamp - DAG.getConstant(0, DL, MVT::i32) // $omod + DAG.getTargetConstant(0, DL, MVT::i1), // $clamp + DAG.getTargetConstant(0, DL, MVT::i32) // $omod }; return DAG.getNode(AMDGPUISD::INTERP_P1LV_F16, DL, MVT::f32, Ops); } else { @@ -5805,28 +5909,28 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1), // Src0 Op.getOperand(2), // Attrchan Op.getOperand(3), // Attr - DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers Op.getOperand(4), // high - DAG.getConstant(0, DL, MVT::i1), // $clamp - DAG.getConstant(0, DL, MVT::i32), // $omod - Glue + DAG.getTargetConstant(0, DL, MVT::i1), // $clamp + DAG.getTargetConstant(0, DL, MVT::i32), // $omod + ToM0.getValue(1) }; return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops); } } case Intrinsic::amdgcn_interp_p2_f16: { - SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(6)); - SDValue Glue = SDValue(M0.getNode(), 1); + SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0, + Op.getOperand(6), SDValue()); SDValue Ops[] = { Op.getOperand(2), // Src0 Op.getOperand(3), // Attrchan Op.getOperand(4), // Attr - DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers Op.getOperand(1), // Src2 - DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers Op.getOperand(5), // high - DAG.getConstant(0, DL, MVT::i1), // $clamp - Glue + DAG.getTargetConstant(0, DL, MVT::i1), // $clamp + ToM0.getValue(1) }; return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops); } @@ -5947,16 +6051,6 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1), Op.getOperand(2)); return DAG.getNode(ISD::BITCAST, DL, VT, Node); } - case Intrinsic::amdgcn_wqm: { - SDValue Src = Op.getOperand(1); - return SDValue(DAG.getMachineNode(AMDGPU::WQM, DL, Src.getValueType(), Src), - 0); - } - case Intrinsic::amdgcn_wwm: { - SDValue Src = Op.getOperand(1); - return SDValue(DAG.getMachineNode(AMDGPU::WWM, DL, Src.getValueType(), Src), - 0); - } case Intrinsic::amdgcn_fmad_ftz: return DAG.getNode(AMDGPUISD::FMAD_FTZ, DL, VT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); @@ -5977,6 +6071,19 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SIInstrInfo::MO_ABS32_LO); return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0}; } + case Intrinsic::amdgcn_is_shared: + case Intrinsic::amdgcn_is_private: { + SDLoc SL(Op); + unsigned AS = (IntrinsicID == Intrinsic::amdgcn_is_shared) ? + AMDGPUAS::LOCAL_ADDRESS : AMDGPUAS::PRIVATE_ADDRESS; + SDValue Aperture = getSegmentAperture(AS, SL, DAG); + SDValue SrcVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, + Op.getOperand(1)); + + SDValue SrcHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, SrcVec, + DAG.getConstant(1, SL, MVT::i32)); + return DAG.getSetCC(SL, MVT::i1, SrcHi, Aperture, ISD::SETEQ); + } default: if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) @@ -5986,6 +6093,30 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } } +// This function computes an appropriate offset to pass to +// MachineMemOperand::setOffset() based on the offset inputs to +// an intrinsic. If any of the offsets are non-contstant or +// if VIndex is non-zero then this function returns 0. Otherwise, +// it returns the sum of VOffset, SOffset, and Offset. +static unsigned getBufferOffsetForMMO(SDValue VOffset, + SDValue SOffset, + SDValue Offset, + SDValue VIndex = SDValue()) { + + if (!isa(VOffset) || !isa(SOffset) || + !isa(Offset)) + return 0; + + if (VIndex) { + if (!isa(VIndex) || !cast(VIndex)->isNullValue()) + return 0; + } + + return cast(VOffset)->getSExtValue() + + cast(SOffset)->getSExtValue() + + cast(Offset)->getSExtValue(); +} + SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntrID = cast(Op.getOperand(1))->getZExtValue(); @@ -6128,17 +6259,22 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; - setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]); + unsigned Offset = setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]); + // We don't know the offset if vindex is non-zero, so clear it. + if (IdxEn) + Offset = 0; + unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ? AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT; EVT VT = Op.getValueType(); EVT IntVT = VT.changeTypeToInteger(); auto *M = cast(Op); + M->getMemOperand()->setOffset(Offset); EVT LoadVT = Op.getValueType(); if (LoadVT.getScalarType() == MVT::f16) @@ -6155,6 +6291,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, } case Intrinsic::amdgcn_raw_buffer_load: case Intrinsic::amdgcn_raw_buffer_load_format: { + const bool IsFormat = IntrID == Intrinsic::amdgcn_raw_buffer_load_format; + auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG); SDValue Ops[] = { Op.getOperand(0), // Chain @@ -6163,32 +6301,18 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Offsets.first, // voffset Op.getOperand(4), // soffset Offsets.second, // offset - Op.getOperand(5), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + Op.getOperand(5), // cachepolicy, swizzled buffer + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; - unsigned Opc = (IntrID == Intrinsic::amdgcn_raw_buffer_load) ? - AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT; - - EVT VT = Op.getValueType(); - EVT IntVT = VT.changeTypeToInteger(); auto *M = cast(Op); - EVT LoadVT = Op.getValueType(); - - if (LoadVT.getScalarType() == MVT::f16) - return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16, - M, DAG, Ops); - - // Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics - if (LoadVT.getScalarType() == MVT::i8 || - LoadVT.getScalarType() == MVT::i16) - return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M); - - return getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, - M->getMemOperand(), DAG); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[3], Ops[4], Ops[5])); + return lowerIntrinsicLoad(M, IsFormat, DAG, Ops); } case Intrinsic::amdgcn_struct_buffer_load: case Intrinsic::amdgcn_struct_buffer_load_format: { + const bool IsFormat = IntrID == Intrinsic::amdgcn_struct_buffer_load_format; + auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG); SDValue Ops[] = { Op.getOperand(0), // Chain @@ -6197,29 +6321,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Offsets.first, // voffset Op.getOperand(5), // soffset Offsets.second, // offset - Op.getOperand(6), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + Op.getOperand(6), // cachepolicy, swizzled buffer + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; - unsigned Opc = (IntrID == Intrinsic::amdgcn_struct_buffer_load) ? - AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT; - - EVT VT = Op.getValueType(); - EVT IntVT = VT.changeTypeToInteger(); auto *M = cast(Op); - EVT LoadVT = Op.getValueType(); - - if (LoadVT.getScalarType() == MVT::f16) - return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16, - M, DAG, Ops); - - // Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics - if (LoadVT.getScalarType() == MVT::i8 || - LoadVT.getScalarType() == MVT::i16) - return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M); - - return getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, - M->getMemOperand(), DAG); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[3], Ops[4], Ops[5], + Ops[2])); + return lowerIntrinsicLoad(cast(Op), IsFormat, DAG, Ops); } case Intrinsic::amdgcn_tbuffer_load: { MemSDNode *M = cast(Op); @@ -6239,9 +6348,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(4), // voffset Op.getOperand(5), // soffset Op.getOperand(6), // offset - DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format - DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format + DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1) // idxen }; if (LoadVT.getScalarType() == MVT::f16) @@ -6264,8 +6373,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(4), // soffset Offsets.second, // offset Op.getOperand(5), // format - Op.getOperand(6), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + Op.getOperand(6), // cachepolicy, swizzled buffer + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; if (LoadVT.getScalarType() == MVT::f16) @@ -6288,8 +6397,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(5), // soffset Offsets.second, // offset Op.getOperand(6), // format - Op.getOperand(7), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + Op.getOperand(7), // cachepolicy, swizzled buffer + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; if (LoadVT.getScalarType() == MVT::f16) @@ -6321,13 +6430,17 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; - setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + unsigned Offset = setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + // We don't know the offset if vindex is non-zero, so clear it. + if (IdxEn) + Offset = 0; EVT VT = Op.getValueType(); auto *M = cast(Op); + M->getMemOperand()->setOffset(Offset); unsigned Opcode = 0; switch (IntrID) { @@ -6377,7 +6490,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::amdgcn_raw_buffer_atomic_umax: case Intrinsic::amdgcn_raw_buffer_atomic_and: case Intrinsic::amdgcn_raw_buffer_atomic_or: - case Intrinsic::amdgcn_raw_buffer_atomic_xor: { + case Intrinsic::amdgcn_raw_buffer_atomic_xor: + case Intrinsic::amdgcn_raw_buffer_atomic_inc: + case Intrinsic::amdgcn_raw_buffer_atomic_dec: { auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG); SDValue Ops[] = { Op.getOperand(0), // Chain @@ -6388,11 +6503,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(5), // soffset Offsets.second, // offset Op.getOperand(6), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; EVT VT = Op.getValueType(); auto *M = cast(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6])); unsigned Opcode = 0; switch (IntrID) { @@ -6426,6 +6542,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::amdgcn_raw_buffer_atomic_xor: Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR; break; + case Intrinsic::amdgcn_raw_buffer_atomic_inc: + Opcode = AMDGPUISD::BUFFER_ATOMIC_INC; + break; + case Intrinsic::amdgcn_raw_buffer_atomic_dec: + Opcode = AMDGPUISD::BUFFER_ATOMIC_DEC; + break; default: llvm_unreachable("unhandled atomic opcode"); } @@ -6442,7 +6564,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::amdgcn_struct_buffer_atomic_umax: case Intrinsic::amdgcn_struct_buffer_atomic_and: case Intrinsic::amdgcn_struct_buffer_atomic_or: - case Intrinsic::amdgcn_struct_buffer_atomic_xor: { + case Intrinsic::amdgcn_struct_buffer_atomic_xor: + case Intrinsic::amdgcn_struct_buffer_atomic_inc: + case Intrinsic::amdgcn_struct_buffer_atomic_dec: { auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG); SDValue Ops[] = { Op.getOperand(0), // Chain @@ -6453,11 +6577,13 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(6), // soffset Offsets.second, // offset Op.getOperand(7), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; EVT VT = Op.getValueType(); auto *M = cast(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6], + Ops[3])); unsigned Opcode = 0; switch (IntrID) { @@ -6491,6 +6617,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::amdgcn_struct_buffer_atomic_xor: Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR; break; + case Intrinsic::amdgcn_struct_buffer_atomic_inc: + Opcode = AMDGPUISD::BUFFER_ATOMIC_INC; + break; + case Intrinsic::amdgcn_struct_buffer_atomic_dec: + Opcode = AMDGPUISD::BUFFER_ATOMIC_DEC; + break; default: llvm_unreachable("unhandled atomic opcode"); } @@ -6512,12 +6644,16 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; - setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]); + unsigned Offset = setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]); + // We don't know the offset if vindex is non-zero, so clear it. + if (IdxEn) + Offset = 0; EVT VT = Op.getValueType(); auto *M = cast(Op); + M->getMemOperand()->setOffset(Offset); return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL, Op->getVTList(), Ops, VT, M->getMemOperand()); @@ -6534,10 +6670,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(6), // soffset Offsets.second, // offset Op.getOperand(7), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; EVT VT = Op.getValueType(); auto *M = cast(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[5], Ops[6], Ops[7])); return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL, Op->getVTList(), Ops, VT, M->getMemOperand()); @@ -6554,10 +6691,12 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(7), // soffset Offsets.second, // offset Op.getOperand(8), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; EVT VT = Op.getValueType(); auto *M = cast(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[5], Ops[6], Ops[7], + Ops[4])); return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL, Op->getVTList(), Ops, VT, M->getMemOperand()); @@ -6686,23 +6825,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE; return DAG.getNode(Opc, DL, Op->getVTList(), Ops); } - case Intrinsic::amdgcn_s_sendmsg: - case Intrinsic::amdgcn_s_sendmsghalt: { - unsigned NodeOp = (IntrinsicID == Intrinsic::amdgcn_s_sendmsg) ? - AMDGPUISD::SENDMSG : AMDGPUISD::SENDMSGHALT; - Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3)); - SDValue Glue = Chain.getValue(1); - return DAG.getNode(NodeOp, DL, MVT::Other, Chain, - Op.getOperand(2), Glue); - } - case Intrinsic::amdgcn_init_exec: { - return DAG.getNode(AMDGPUISD::INIT_EXEC, DL, MVT::Other, Chain, - Op.getOperand(2)); - } - case Intrinsic::amdgcn_init_exec_from_input: { - return DAG.getNode(AMDGPUISD::INIT_EXEC_FROM_INPUT, DL, MVT::Other, Chain, - Op.getOperand(2), Op.getOperand(3)); - } case Intrinsic::amdgcn_s_barrier: { if (getTargetMachine().getOptLevel() > CodeGenOpt::None) { const GCNSubtarget &ST = MF.getSubtarget(); @@ -6733,9 +6855,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op.getOperand(5), // voffset Op.getOperand(6), // soffset Op.getOperand(7), // offset - DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format - DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idexen + DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format + DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idexen }; unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : AMDGPUISD::TBUFFER_STORE_FORMAT; @@ -6759,8 +6881,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op.getOperand(6), // soffset Offsets.second, // offset Op.getOperand(7), // format - Op.getOperand(8), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idexen + Op.getOperand(8), // cachepolicy, swizzled buffer + DAG.getTargetConstant(1, DL, MVT::i1), // idexen }; unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : AMDGPUISD::TBUFFER_STORE_FORMAT; @@ -6784,8 +6906,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op.getOperand(5), // soffset Offsets.second, // offset Op.getOperand(6), // format - Op.getOperand(7), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idexen + Op.getOperand(7), // cachepolicy, swizzled buffer + DAG.getTargetConstant(0, DL, MVT::i1), // idexen }; unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : AMDGPUISD::TBUFFER_STORE_FORMAT; @@ -6813,14 +6935,18 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; - setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + unsigned Offset = setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + // We don't know the offset if vindex is non-zero, so clear it. + if (IdxEn) + Offset = 0; unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ? AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT; Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc; MemSDNode *M = cast(Op); + M->getMemOperand()->setOffset(Offset); // Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics EVT VDataType = VData.getValueType().getScalarType(); @@ -6833,10 +6959,22 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, case Intrinsic::amdgcn_raw_buffer_store: case Intrinsic::amdgcn_raw_buffer_store_format: { + const bool IsFormat = + IntrinsicID == Intrinsic::amdgcn_raw_buffer_store_format; + SDValue VData = Op.getOperand(2); - bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16); + EVT VDataVT = VData.getValueType(); + EVT EltType = VDataVT.getScalarType(); + bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16); if (IsD16) VData = handleD16VData(VData, DAG); + + if (!isTypeLegal(VDataVT)) { + VData = + DAG.getNode(ISD::BITCAST, DL, + getEquivalentMemType(*DAG.getContext(), VDataVT), VData); + } + auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG); SDValue Ops[] = { Chain, @@ -6846,18 +6984,18 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Offsets.first, // voffset Op.getOperand(5), // soffset Offsets.second, // offset - Op.getOperand(6), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + Op.getOperand(6), // cachepolicy, swizzled buffer + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; - unsigned Opc = IntrinsicID == Intrinsic::amdgcn_raw_buffer_store ? - AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT; + unsigned Opc = + IsFormat ? AMDGPUISD::BUFFER_STORE_FORMAT : AMDGPUISD::BUFFER_STORE; Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc; MemSDNode *M = cast(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6])); // Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics - EVT VDataType = VData.getValueType().getScalarType(); - if (VDataType == MVT::i8 || VDataType == MVT::i16) - return handleByteShortBufferStores(DAG, VDataType, DL, Ops, M); + if (!IsD16 && !VDataVT.isVector() && EltType.getSizeInBits() < 32) + return handleByteShortBufferStores(DAG, VDataVT, DL, Ops, M); return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, M->getMemoryVT(), M->getMemOperand()); @@ -6865,10 +7003,23 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, case Intrinsic::amdgcn_struct_buffer_store: case Intrinsic::amdgcn_struct_buffer_store_format: { + const bool IsFormat = + IntrinsicID == Intrinsic::amdgcn_struct_buffer_store_format; + SDValue VData = Op.getOperand(2); - bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16); + EVT VDataVT = VData.getValueType(); + EVT EltType = VDataVT.getScalarType(); + bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16); + if (IsD16) VData = handleD16VData(VData, DAG); + + if (!isTypeLegal(VDataVT)) { + VData = + DAG.getNode(ISD::BITCAST, DL, + getEquivalentMemType(*DAG.getContext(), VDataVT), VData); + } + auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG); SDValue Ops[] = { Chain, @@ -6878,17 +7029,19 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Offsets.first, // voffset Op.getOperand(6), // soffset Offsets.second, // offset - Op.getOperand(7), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + Op.getOperand(7), // cachepolicy, swizzled buffer + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; unsigned Opc = IntrinsicID == Intrinsic::amdgcn_struct_buffer_store ? AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT; Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc; MemSDNode *M = cast(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6], + Ops[3])); // Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics EVT VDataType = VData.getValueType().getScalarType(); - if (VDataType == MVT::i8 || VDataType == MVT::i16) + if (!IsD16 && !VDataVT.isVector() && EltType.getSizeInBits() < 32) return handleByteShortBufferStores(DAG, VDataType, DL, Ops, M); return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, @@ -6908,13 +7061,17 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; - setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + unsigned Offset = setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + // We don't know the offset if vindex is non-zero, so clear it. + if (IdxEn) + Offset = 0; EVT VT = Op.getOperand(2).getValueType(); auto *M = cast(Op); + M->getMemOperand()->setOffset(Offset); unsigned Opcode = VT.isVector() ? AMDGPUISD::BUFFER_ATOMIC_PK_FADD : AMDGPUISD::BUFFER_ATOMIC_FADD; @@ -6987,7 +7144,7 @@ std::pair SITargetLowering::splitBufferOffsets( Overflow += ImmOffset; ImmOffset = 0; } - C1 = cast(DAG.getConstant(ImmOffset, DL, MVT::i32)); + C1 = cast(DAG.getTargetConstant(ImmOffset, DL, MVT::i32)); if (Overflow) { auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32); if (!N0) @@ -7001,14 +7158,14 @@ std::pair SITargetLowering::splitBufferOffsets( if (!N0) N0 = DAG.getConstant(0, DL, MVT::i32); if (!C1) - C1 = cast(DAG.getConstant(0, DL, MVT::i32)); + C1 = cast(DAG.getTargetConstant(0, DL, MVT::i32)); return {N0, SDValue(C1, 0)}; } // Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the // three offsets (voffset, soffset and instoffset) into the SDValue[3] array // pointed to by Offsets. -void SITargetLowering::setBufferOffsets(SDValue CombinedOffset, +unsigned SITargetLowering::setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG, SDValue *Offsets, unsigned Align) const { SDLoc DL(CombinedOffset); @@ -7018,8 +7175,8 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset, if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget, Align)) { Offsets[0] = DAG.getConstant(0, DL, MVT::i32); Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32); - Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32); - return; + Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32); + return SOffset + ImmOffset; } } if (DAG.isBaseWithConstantOffset(CombinedOffset)) { @@ -7031,13 +7188,14 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset, Subtarget, Align)) { Offsets[0] = N0; Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32); - Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32); - return; + Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32); + return 0; } } Offsets[0] = CombinedOffset; Offsets[1] = DAG.getConstant(0, DL, MVT::i32); - Offsets[2] = DAG.getConstant(0, DL, MVT::i32); + Offsets[2] = DAG.getTargetConstant(0, DL, MVT::i32); + return 0; } // Handle 8 bit and 16 bit buffer loads @@ -7053,9 +7211,10 @@ SDValue SITargetLowering::handleByteShortBufferLoads(SelectionDAG &DAG, SDValue BufferLoad = DAG.getMemIntrinsicNode(Opc, DL, ResList, Ops, IntVT, M->getMemOperand()); - SDValue BufferLoadTrunc = DAG.getNode(ISD::TRUNCATE, DL, - LoadVT.getScalarType(), BufferLoad); - return DAG.getMergeValues({BufferLoadTrunc, BufferLoad.getValue(1)}, DL); + SDValue LoadVal = DAG.getNode(ISD::TRUNCATE, DL, IntVT, BufferLoad); + LoadVal = DAG.getNode(ISD::BITCAST, DL, LoadVT, LoadVal); + + return DAG.getMergeValues({LoadVal, BufferLoad.getValue(1)}, DL); } // Handle 8 bit and 16 bit buffer stores @@ -7063,6 +7222,9 @@ SDValue SITargetLowering::handleByteShortBufferStores(SelectionDAG &DAG, EVT VDataType, SDLoc DL, SDValue Ops[], MemSDNode *M) const { + if (VDataType == MVT::f16) + Ops[1] = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Ops[1]); + SDValue BufferStoreExt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Ops[1]); Ops[1] = BufferStoreExt; unsigned Opc = (VDataType == MVT::i8) ? AMDGPUISD::BUFFER_STORE_BYTE : @@ -7215,8 +7377,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType().getVectorElementType() == MVT::i32 && "Custom lowering for non-i32 vectors hasn't been implemented."); - if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, - *Load->getMemOperand())) { + if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + MemVT, *Load->getMemOperand())) { SDValue Ops[2]; std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG); return DAG.getMergeValues(Ops, DL); @@ -7505,6 +7667,19 @@ SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul); } +// Returns immediate value for setting the F32 denorm mode when using the +// S_DENORM_MODE instruction. +static const SDValue getSPDenormModeValue(int SPDenormMode, SelectionDAG &DAG, + const SDLoc &SL, const GCNSubtarget *ST) { + assert(ST->hasDenormModeInst() && "Requires S_DENORM_MODE"); + int DPDenormModeDefault = ST->hasFP64Denormals() + ? FP_DENORM_FLUSH_NONE + : FP_DENORM_FLUSH_IN_FLUSH_OUT; + + int Mode = SPDenormMode | (DPDenormModeDefault << 2); + return DAG.getTargetConstant(Mode, SL, MVT::i32); +} + SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG)) return FastLowered; @@ -7531,16 +7706,26 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE | (4 << AMDGPU::Hwreg::OFFSET_SHIFT_) | (1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_); - const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i16); if (!Subtarget->hasFP32Denormals()) { SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE, - SL, MVT::i32); - SDValue EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, BindParamVTs, - DAG.getEntryNode(), - EnableDenormValue, BitField); + + SDValue EnableDenorm; + if (Subtarget->hasDenormModeInst()) { + const SDValue EnableDenormValue = + getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, SL, Subtarget); + + EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs, + DAG.getEntryNode(), EnableDenormValue); + } else { + const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE, + SL, MVT::i32); + EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, BindParamVTs, + DAG.getEntryNode(), EnableDenormValue, + BitField); + } + SDValue Ops[3] = { NegDivScale0, EnableDenorm.getValue(0), @@ -7562,19 +7747,29 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { SDValue Fma2 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Mul, NumeratorScaled, Mul); - SDValue Fma3 = getFPTernOp(DAG, ISD::FMA,SL, MVT::f32, Fma2, Fma1, Mul, Fma2); + SDValue Fma3 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, Fma2, Fma1, Mul, Fma2); SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3, NumeratorScaled, Fma3); if (!Subtarget->hasFP32Denormals()) { - const SDValue DisableDenormValue = - DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32); - SDValue DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other, - Fma4.getValue(1), - DisableDenormValue, - BitField, - Fma4.getValue(2)); + + SDValue DisableDenorm; + if (Subtarget->hasDenormModeInst()) { + const SDValue DisableDenormValue = + getSPDenormModeValue(FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, SL, Subtarget); + + DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other, + Fma4.getValue(1), DisableDenormValue, + Fma4.getValue(2)); + } else { + const SDValue DisableDenormValue = + DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32); + + DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other, + Fma4.getValue(1), DisableDenormValue, + BitField, Fma4.getValue(2)); + } SDValue OutputChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, DisableDenorm, DAG.getRoot()); @@ -7684,8 +7879,8 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { assert(VT.isVector() && Store->getValue().getValueType().getScalarType() == MVT::i32); - if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, - *Store->getMemOperand())) { + if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + VT, *Store->getMemOperand())) { return expandUnalignedStore(Store, DAG); } @@ -10065,7 +10260,7 @@ SDNode *SITargetLowering::legalizeTargetIndependentNode(SDNode *Node, // Insert a copy to a VReg_1 virtual register so LowerI1Copies doesn't have // to try understanding copies to physical registers. if (SrcVal.getValueType() == MVT::i1 && - TargetRegisterInfo::isPhysicalRegister(DestReg->getReg())) { + Register::isPhysicalRegister(DestReg->getReg())) { SDLoc SL(Node); MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); SDValue VReg = DAG.getRegister( @@ -10218,7 +10413,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, MachineOperand &Op = MI.getOperand(I); if ((OpInfo[I].RegClass != llvm::AMDGPU::AV_64RegClassID && OpInfo[I].RegClass != llvm::AMDGPU::AV_32RegClassID) || - !TargetRegisterInfo::isVirtualRegister(Op.getReg()) || + !Register::isVirtualRegister(Op.getReg()) || !TRI->isAGPR(MRI, Op.getReg())) continue; auto *Src = MRI.getUniqueVRegDef(Op.getReg()); @@ -10256,7 +10451,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, Node->use_begin()->isMachineOpcode() && Node->use_begin()->getMachineOpcode() == AMDGPU::EXTRACT_SUBREG && !Node->use_begin()->hasAnyUseOfValue(0))) { - unsigned Def = MI.getOperand(0).getReg(); + Register Def = MI.getOperand(0).getReg(); // Change this into a noret atomic. MI.setDesc(TII->get(NoRetAtomicOp)); @@ -10300,7 +10495,7 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG, // Combine the constants and the pointer. const SDValue Ops1[] = { - DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32), + DAG.getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32), Ptr, DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32), SubRegHi, @@ -10330,7 +10525,7 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue DataHi = buildSMovImm32(DAG, DL, RsrcDword2And3 >> 32); const SDValue Ops[] = { - DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32), + DAG.getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32), PtrLo, DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32), PtrHi, @@ -10364,7 +10559,7 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, nullptr); case 32: case 16: - RC = &AMDGPU::SReg_32_XM0RegClass; + RC = &AMDGPU::SReg_32RegClass; break; case 64: RC = &AMDGPU::SGPR_64RegClass; @@ -10373,7 +10568,7 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, RC = &AMDGPU::SReg_96RegClass; break; case 128: - RC = &AMDGPU::SReg_128RegClass; + RC = &AMDGPU::SGPR_128RegClass; break; case 160: RC = &AMDGPU::SReg_160RegClass; @@ -10415,6 +10610,8 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, } break; case 'a': + if (!Subtarget->hasMAIInsts()) + break; switch (VT.getSizeInBits()) { default: return std::make_pair(0U, nullptr); @@ -10548,9 +10745,9 @@ void SITargetLowering::computeKnownBitsForFrameIndex(const SDValue Op, Known.Zero.setHighBits(getSubtarget()->getKnownHighZeroBitsForFrameIndex()); } -unsigned SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { - const unsigned PrefAlign = TargetLowering::getPrefLoopAlignment(ML); - const unsigned CacheLineAlign = 6; // log2(64) +Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { + const Align PrefAlign = TargetLowering::getPrefLoopAlignment(ML); + const Align CacheLineAlign = Align(64); // Pre-GFX10 target did not benefit from loop alignment if (!ML || DisableLoopAlignment || @@ -10578,7 +10775,7 @@ unsigned SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { // If inner loop block is aligned assume in average half of the alignment // size to be added as nops. if (MBB != Header) - LoopSize += (1 << MBB->getAlignment()) / 2; + LoopSize += MBB->getAlignment().value() / 2; for (const MachineInstr &MI : *MBB) { LoopSize += TII->getInstSizeInBytes(MI); @@ -10644,7 +10841,7 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N, const MachineRegisterInfo &MRI = MF->getRegInfo(); const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo(); unsigned Reg = R->getReg(); - if (TRI.isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return !TRI.isSGPRReg(MRI, Reg); if (MRI.isLiveIn(Reg)) { @@ -10683,12 +10880,6 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N, case ISD::INTRINSIC_W_CHAIN: return AMDGPU::isIntrinsicSourceOfDivergence( cast(N->getOperand(1))->getZExtValue()); - // In some cases intrinsics that are a source of divergence have been - // lowered to AMDGPUISD so we also need to check those too. - case AMDGPUISD::INTERP_MOV: - case AMDGPUISD::INTERP_P1: - case AMDGPUISD::INTERP_P2: - return true; } return false; } @@ -10748,3 +10939,110 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW); } + +const TargetRegisterClass * +SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { + const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false); + const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); + if (RC == &AMDGPU::VReg_1RegClass && !isDivergent) + return Subtarget->getWavefrontSize() == 64 ? &AMDGPU::SReg_64RegClass + : &AMDGPU::SReg_32RegClass; + if (!TRI->isSGPRClass(RC) && !isDivergent) + return TRI->getEquivalentSGPRClass(RC); + else if (TRI->isSGPRClass(RC) && isDivergent) + return TRI->getEquivalentVGPRClass(RC); + + return RC; +} + +static bool hasCFUser(const Value *V, SmallPtrSet &Visited) { + if (!Visited.insert(V).second) + return false; + bool Result = false; + for (auto U : V->users()) { + if (const IntrinsicInst *Intrinsic = dyn_cast(U)) { + if (V == U->getOperand(1)) { + switch (Intrinsic->getIntrinsicID()) { + default: + Result = false; + break; + case Intrinsic::amdgcn_if_break: + case Intrinsic::amdgcn_if: + case Intrinsic::amdgcn_else: + Result = true; + break; + } + } + if (V == U->getOperand(0)) { + switch (Intrinsic->getIntrinsicID()) { + default: + Result = false; + break; + case Intrinsic::amdgcn_end_cf: + case Intrinsic::amdgcn_loop: + Result = true; + break; + } + } + } else { + Result = hasCFUser(U, Visited); + } + if (Result) + break; + } + return Result; +} + +bool SITargetLowering::requiresUniformRegister(MachineFunction &MF, + const Value *V) const { + if (const IntrinsicInst *Intrinsic = dyn_cast(V)) { + switch (Intrinsic->getIntrinsicID()) { + default: + return false; + case Intrinsic::amdgcn_if_break: + return true; + } + } + if (const ExtractValueInst *ExtValue = dyn_cast(V)) { + if (const IntrinsicInst *Intrinsic = + dyn_cast(ExtValue->getOperand(0))) { + switch (Intrinsic->getIntrinsicID()) { + default: + return false; + case Intrinsic::amdgcn_if: + case Intrinsic::amdgcn_else: { + ArrayRef Indices = ExtValue->getIndices(); + if (Indices.size() == 1 && Indices[0] == 1) { + return true; + } + } + } + } + } + if (const CallInst *CI = dyn_cast(V)) { + if (isa(CI->getCalledValue())) { + const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo(); + ImmutableCallSite CS(CI); + TargetLowering::AsmOperandInfoVector TargetConstraints = ParseConstraints( + MF.getDataLayout(), Subtarget->getRegisterInfo(), CS); + for (auto &TC : TargetConstraints) { + if (TC.Type == InlineAsm::isOutput) { + ComputeConstraintToUse(TC, SDValue()); + unsigned AssignedReg; + const TargetRegisterClass *RC; + std::tie(AssignedReg, RC) = getRegForInlineAsmConstraint( + SIRI, TC.ConstraintCode, TC.ConstraintVT); + if (RC) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (AssignedReg != 0 && SIRI->isSGPRReg(MRI, AssignedReg)) + return true; + else if (SIRI->isSGPRClass(RC)) + return true; + } + } + } + } + } + SmallPtrSet Visited; + return hasCFUser(V, Visited); +} diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index 21a215e16ce..f0102feb65c 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -94,6 +94,9 @@ private: SelectionDAG &DAG, ArrayRef Ops, bool IsIntrinsic = false) const; + SDValue lowerIntrinsicLoad(MemSDNode *M, bool IsFormat, SelectionDAG &DAG, + ArrayRef Ops) const; + // Call DAG.getMemIntrinsicNode for a load, but first widen a dwordx3 type to // dwordx4 if on SI. SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, @@ -183,6 +186,7 @@ private: unsigned isCFIntrinsic(const SDNode *Intr) const; +public: /// \returns True if fixup needs to be emitted for given global value \p GV, /// false otherwise. bool shouldEmitFixup(const GlobalValue *GV) const; @@ -195,11 +199,14 @@ private: /// global value \p GV, false otherwise. bool shouldEmitPCReloc(const GlobalValue *GV) const; +private: // Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the // three offsets (voffset, soffset and instoffset) into the SDValue[3] array // pointed to by Offsets. - void setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG, - SDValue *Offsets, unsigned Align = 4) const; + /// \returns 0 If there is a non-constant offset or if the offset is 0. + /// Otherwise returns the constant offset. + unsigned setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG, + SDValue *Offsets, unsigned Align = 4) const; // Handle 8 bit and 16 bit buffer loads SDValue handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT, SDLoc DL, @@ -235,6 +242,11 @@ public: bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const override; + bool allowsMisalignedMemoryAccessesImpl( + unsigned Size, unsigned AS, unsigned Align, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool *IsFast = nullptr) const; + bool allowsMisalignedMemoryAccesses( EVT VT, unsigned AS, unsigned Align, MachineMemOperand::Flags Flags = MachineMemOperand::MONone, @@ -309,12 +321,13 @@ public: SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; - unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const override; MachineBasicBlock *splitKillBlock(MachineInstr &MI, MachineBasicBlock *BB) const; + void bundleInstWithWaitcnt(MachineInstr &MI) const; MachineBasicBlock *emitGWSMemViolTestLoop(MachineInstr &MI, MachineBasicBlock *BB) const; @@ -330,6 +343,7 @@ public: bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const; SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const; + SDValue splitTernaryVectorOp(SDValue Op, SelectionDAG &DAG) const; SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, @@ -374,7 +388,37 @@ public: unsigned Depth = 0) const override; AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; - unsigned getPrefLoopAlignment(MachineLoop *ML) const override; + virtual const TargetRegisterClass * + getRegClassFor(MVT VT, bool isDivergent) const override; + virtual bool requiresUniformRegister(MachineFunction &MF, + const Value *V) const override; + Align getPrefLoopAlignment(MachineLoop *ML) const override; + + void allocateHSAUserSGPRs(CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const; + + void allocateSystemSGPRs(CCState &CCInfo, + MachineFunction &MF, + SIMachineFunctionInfo &Info, + CallingConv::ID CallConv, + bool IsShader) const; + + void allocateSpecialEntryInputVGPRs(CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const; + void allocateSpecialInputSGPRs( + CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const; + + void allocateSpecialInputVGPRs(CCState &CCInfo, + MachineFunction &MF, + const SIRegisterInfo &TRI, + SIMachineFunctionInfo &Info) const; }; } // End namespace llvm diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index c89d5b71ec5..dcb04e42658 100644 --- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1483,12 +1483,12 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { if (BI.Incoming) { if (!Brackets) - Brackets = llvm::make_unique(*BI.Incoming); + Brackets = std::make_unique(*BI.Incoming); else *Brackets = *BI.Incoming; } else { if (!Brackets) - Brackets = llvm::make_unique(ST); + Brackets = std::make_unique(ST); else Brackets->clear(); } @@ -1508,7 +1508,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { if (!MoveBracketsToSucc) { MoveBracketsToSucc = &SuccBI; } else { - SuccBI.Incoming = llvm::make_unique(*Brackets); + SuccBI.Incoming = std::make_unique(*Brackets); } } else if (SuccBI.Incoming->merge(*Brackets)) { SuccBI.Dirty = true; diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td index 561a16c3e35..4dcbe92861f 100644 --- a/lib/Target/AMDGPU/SIInstrFormats.td +++ b/lib/Target/AMDGPU/SIInstrFormats.td @@ -124,6 +124,9 @@ class InstSI DisableSIDecoder = 0; diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index ba8ed6993a5..d97e6a62971 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -318,8 +318,25 @@ bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt, if (isMUBUF(LdSt) || isMTBUF(LdSt)) { const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset); - if (SOffset && SOffset->isReg()) - return false; + if (SOffset && SOffset->isReg()) { + // We can only handle this if it's a stack access, as any other resource + // would require reporting multiple base registers. + const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); + if (AddrReg && !AddrReg->isFI()) + return false; + + const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc); + const SIMachineFunctionInfo *MFI + = LdSt.getParent()->getParent()->getInfo(); + if (RSrc->getReg() != MFI->getScratchRSrcReg()) + return false; + + const MachineOperand *OffsetImm = + getNamedOperand(LdSt, AMDGPU::OpName::offset); + BaseOp = SOffset; + Offset = OffsetImm->getImm(); + return true; + } const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); if (!AddrReg) @@ -458,9 +475,9 @@ bool SIInstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1, const MachineRegisterInfo &MRI = FirstLdSt.getParent()->getParent()->getRegInfo(); - const unsigned Reg = FirstDst->getReg(); + const Register Reg = FirstDst->getReg(); - const TargetRegisterClass *DstRC = TargetRegisterInfo::isVirtualRegister(Reg) + const TargetRegisterClass *DstRC = Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg) : RI.getPhysRegClass(Reg); @@ -807,7 +824,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, "Not a VGPR32 reg"); if (Cond.size() == 1) { - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) .add(Cond[0]); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) @@ -820,7 +837,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, assert(Cond[0].isImm() && "Cond[0] is not an immediate"); switch (Cond[0].getImm()) { case SIInstrInfo::SCC_TRUE: { - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64), SReg) .addImm(-1) @@ -834,7 +851,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, break; } case SIInstrInfo::SCC_FALSE: { - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64), SReg) .addImm(0) @@ -850,7 +867,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, case SIInstrInfo::VCCNZ: { MachineOperand RegOp = Cond[1]; RegOp.setImplicit(false); - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) .add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) @@ -864,7 +881,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, case SIInstrInfo::VCCZ: { MachineOperand RegOp = Cond[1]; RegOp.setImplicit(false); - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) .add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) @@ -876,8 +893,8 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, break; } case SIInstrInfo::EXECNZ: { - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); - unsigned SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2) .addImm(0); @@ -894,8 +911,8 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, break; } case SIInstrInfo::EXECZ: { - unsigned SReg = MRI.createVirtualRegister(BoolXExecRC); - unsigned SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); + Register SReg = MRI.createVirtualRegister(BoolXExecRC); + Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC()); BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2) .addImm(0); @@ -925,7 +942,7 @@ unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB, const DebugLoc &DL, unsigned SrcReg, int Value) const { MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - unsigned Reg = MRI.createVirtualRegister(RI.getBoolRC()); + Register Reg = MRI.createVirtualRegister(RI.getBoolRC()); BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg) .addImm(Value) .addReg(SrcReg); @@ -938,7 +955,7 @@ unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB, const DebugLoc &DL, unsigned SrcReg, int Value) const { MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - unsigned Reg = MRI.createVirtualRegister(RI.getBoolRC()); + Register Reg = MRI.createVirtualRegister(RI.getBoolRC()); BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg) .addImm(Value) .addReg(SrcReg); @@ -1052,12 +1069,12 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, // The SGPR spill/restore instructions only work on number sgprs, so we need // to make sure we are using the correct register class. - if (TargetRegisterInfo::isVirtualRegister(SrcReg) && SpillSize == 4) { + if (Register::isVirtualRegister(SrcReg) && SpillSize == 4) { MachineRegisterInfo &MRI = MF->getRegInfo(); MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass); } - MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc) + BuildMI(MBB, MI, DL, OpDesc) .addReg(SrcReg, getKillRegState(isKill)) // data .addFrameIndex(FrameIndex) // addr .addMemOperand(MMO) @@ -1068,11 +1085,6 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, // correctly handled. if (RI.spillSGPRToVGPR()) FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill); - if (ST.hasScalarStores()) { - // m0 is used for offset to scalar stores if used to spill. - Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead); - } - return; } @@ -1083,7 +1095,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, auto MIB = BuildMI(MBB, MI, DL, get(Opcode)); if (RI.hasAGPRs(RC)) { MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); MIB.addReg(Tmp, RegState::Define); } MIB.addReg(SrcReg, getKillRegState(isKill)) // data @@ -1182,24 +1194,18 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, // FIXME: Maybe this should not include a memoperand because it will be // lowered to non-memory instructions. const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize)); - if (TargetRegisterInfo::isVirtualRegister(DestReg) && SpillSize == 4) { + if (Register::isVirtualRegister(DestReg) && SpillSize == 4) { MachineRegisterInfo &MRI = MF->getRegInfo(); MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass); } if (RI.spillSGPRToVGPR()) FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill); - MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg) + BuildMI(MBB, MI, DL, OpDesc, DestReg) .addFrameIndex(FrameIndex) // addr .addMemOperand(MMO) .addReg(MFI->getScratchRSrcReg(), RegState::Implicit) .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit); - - if (ST.hasScalarStores()) { - // m0 is used for offset to scalar stores if used to spill. - Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead); - } - return; } @@ -1208,7 +1214,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, auto MIB = BuildMI(MBB, MI, DL, get(Opcode), DestReg); if (RI.hasAGPRs(RC)) { MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); MIB.addReg(Tmp, RegState::Define); } MIB.addFrameIndex(FrameIndex) // vaddr @@ -1242,13 +1248,13 @@ unsigned SIInstrInfo::calculateLDSSpillAddress( if (!AMDGPU::isShader(MF->getFunction().getCallingConv()) && WorkGroupSize > WavefrontSize) { - unsigned TIDIGXReg - = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X); - unsigned TIDIGYReg - = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y); - unsigned TIDIGZReg - = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); - unsigned InputPtrReg = + Register TIDIGXReg = + MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X); + Register TIDIGYReg = + MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Y); + Register TIDIGZReg = + MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); + Register InputPtrReg = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) { if (!Entry.isLiveIn(Reg)) @@ -1410,9 +1416,9 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { break; case AMDGPU::V_MOV_B64_PSEUDO: { - unsigned Dst = MI.getOperand(0).getReg(); - unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); - unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); + Register Dst = MI.getOperand(0).getReg(); + Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0); + Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1); const MachineOperand &SrcOp = MI.getOperand(1); // FIXME: Will this work for 64-bit floating point immediates? @@ -1437,6 +1443,10 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.eraseFromParent(); break; } + case AMDGPU::V_MOV_B64_DPP_PSEUDO: { + expandMovDPP64(MI); + break; + } case AMDGPU::V_SET_INACTIVE_B32: { unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64; unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; @@ -1469,7 +1479,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case AMDGPU::V_MOVRELD_B32_V8: case AMDGPU::V_MOVRELD_B32_V16: { const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32); - unsigned VecReg = MI.getOperand(0).getReg(); + Register VecReg = MI.getOperand(0).getReg(); bool IsUndef = MI.getOperand(1).isUndef(); unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm(); assert(VecReg == MI.getOperand(1).getReg()); @@ -1492,9 +1502,9 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { } case AMDGPU::SI_PC_ADD_REL_OFFSET: { MachineFunction &MF = *MBB.getParent(); - unsigned Reg = MI.getOperand(0).getReg(); - unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0); - unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1); + Register Reg = MI.getOperand(0).getReg(); + Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0); + Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1); // Create a bundle so these instructions won't be re-ordered by the // post-RA scheduler. @@ -1531,7 +1541,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { break; } case TargetOpcode::BUNDLE: { - if (!MI.mayLoad()) + if (!MI.mayLoad() || MI.hasUnmodeledSideEffects()) return false; // If it is a load it must be a memory clause @@ -1550,6 +1560,64 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } +std::pair +SIInstrInfo::expandMovDPP64(MachineInstr &MI) const { + assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MBB.findDebugLoc(MI); + MachineFunction *MF = MBB.getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + Register Dst = MI.getOperand(0).getReg(); + unsigned Part = 0; + MachineInstr *Split[2]; + + + for (auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) { + auto MovDPP = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_dpp)); + if (Dst.isPhysical()) { + MovDPP.addDef(RI.getSubReg(Dst, Sub)); + } else { + assert(MRI.isSSA()); + auto Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + MovDPP.addDef(Tmp); + } + + for (unsigned I = 1; I <= 2; ++I) { // old and src operands. + const MachineOperand &SrcOp = MI.getOperand(I); + assert(!SrcOp.isFPImm()); + if (SrcOp.isImm()) { + APInt Imm(64, SrcOp.getImm()); + Imm.ashrInPlace(Part * 32); + MovDPP.addImm(Imm.getLoBits(32).getZExtValue()); + } else { + assert(SrcOp.isReg()); + Register Src = SrcOp.getReg(); + if (Src.isPhysical()) + MovDPP.addReg(RI.getSubReg(Src, Sub)); + else + MovDPP.addReg(Src, SrcOp.isUndef() ? RegState::Undef : 0, Sub); + } + } + + for (unsigned I = 3; I < MI.getNumExplicitOperands(); ++I) + MovDPP.addImm(MI.getOperand(I).getImm()); + + Split[Part] = MovDPP; + ++Part; + } + + if (Dst.isVirtual()) + BuildMI(MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), Dst) + .addReg(Split[0]->getOperand(0).getReg()) + .addImm(AMDGPU::sub0) + .addReg(Split[1]->getOperand(0).getReg()) + .addImm(AMDGPU::sub1); + + MI.eraseFromParent(); + return std::make_pair(Split[0], Split[1]); +} + bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, @@ -1574,7 +1642,7 @@ bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI, static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp) { - unsigned Reg = RegOp.getReg(); + Register Reg = RegOp.getReg(); unsigned SubReg = RegOp.getSubReg(); bool IsKill = RegOp.isKill(); bool IsDead = RegOp.isDead(); @@ -1646,7 +1714,8 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, // This needs to be implemented because the source modifiers may be inserted // between the true commutable operands, and the base // TargetInstrInfo::commuteInstruction uses it. -bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0, +bool SIInstrInfo::findCommutedOpIndices(const MachineInstr &MI, + unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const { return findCommutedOpIndices(MI.getDesc(), SrcOpIdx0, SrcOpIdx1); } @@ -1710,7 +1779,7 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, // FIXME: Virtual register workaround for RegScavenger not working with empty // blocks. - unsigned PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + Register PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); auto I = MBB.end(); @@ -2163,7 +2232,7 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB, SmallVector Regs; for (int Idx = 0; Idx != NElts; ++Idx) { - unsigned DstElt = MRI.createVirtualRegister(EltRC); + Register DstElt = MRI.createVirtualRegister(EltRC); Regs.push_back(DstElt); unsigned SubIdx = SubIndices[Idx]; @@ -2327,7 +2396,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, UseMI.RemoveOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); - unsigned Src1Reg = Src1->getReg(); + Register Src1Reg = Src1->getReg(); unsigned Src1SubReg = Src1->getSubReg(); Src0->setReg(Src1Reg); Src0->setSubReg(Src1SubReg); @@ -2367,12 +2436,12 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, MRI->hasOneUse(Src0->getReg())) { Src0->ChangeToImmediate(Def->getOperand(1).getImm()); Src0Inlined = true; - } else if ((RI.isPhysicalRegister(Src0->getReg()) && - (ST.getConstantBusLimit(Opc) <= 1 && - RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) || - (RI.isVirtualRegister(Src0->getReg()) && - (ST.getConstantBusLimit(Opc) <= 1 && - RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))) + } else if ((Register::isPhysicalRegister(Src0->getReg()) && + (ST.getConstantBusLimit(Opc) <= 1 && + RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) || + (Register::isVirtualRegister(Src0->getReg()) && + (ST.getConstantBusLimit(Opc) <= 1 && + RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))) return false; // VGPR is okay as Src0 - fallthrough } @@ -2385,10 +2454,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, MRI->hasOneUse(Src1->getReg()) && commuteInstruction(UseMI)) { Src0->ChangeToImmediate(Def->getOperand(1).getImm()); - } else if ((RI.isPhysicalRegister(Src1->getReg()) && - RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) || - (RI.isVirtualRegister(Src1->getReg()) && - RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))) + } else if ((Register::isPhysicalRegister(Src1->getReg()) && + RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) || + (Register::isVirtualRegister(Src1->getReg()) && + RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))) return false; // VGPR is okay as Src1 - fallthrough } @@ -2472,8 +2541,7 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, } bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA) const { + const MachineInstr &MIb) const { assert((MIa.mayLoad() || MIa.mayStore()) && "MIa must load from or modify a memory location"); assert((MIb.mayLoad() || MIb.mayStore()) && @@ -2664,6 +2732,7 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI, MI.modifiesRegister(AMDGPU::EXEC, &RI) || MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 || MI.getOpcode() == AMDGPU::S_SETREG_B32 || + MI.getOpcode() == AMDGPU::S_DENORM_MODE || changesVGPRIndexingMode(MI); } @@ -2865,8 +2934,16 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, if (OpInfo.RegClass < 0) return false; - if (MO.isImm() && isInlineConstant(MO, OpInfo)) + const MachineFunction *MF = MI.getParent()->getParent(); + const GCNSubtarget &ST = MF->getSubtarget(); + + if (MO.isImm() && isInlineConstant(MO, OpInfo)) { + if (isMAI(MI) && ST.hasMFMAInlineLiteralBug() && + OpNo ==(unsigned)AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::src2)) + return false; return RI.opCanUseInlineConstant(OpInfo.OperandType); + } if (!RI.opCanUseLiteralConstant(OpInfo.OperandType)) return false; @@ -2874,8 +2951,6 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, if (!isVOP3(MI) || !AMDGPU::isSISrcOperand(InstDesc, OpNo)) return true; - const MachineFunction *MF = MI.getParent()->getParent(); - const GCNSubtarget &ST = MF->getSubtarget(); return ST.hasVOP3Literal(); } @@ -3036,7 +3111,7 @@ bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, if (!MO.isUse()) return false; - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (Register::isVirtualRegister(MO.getReg())) return RI.isSGPRClass(MRI.getRegClass(MO.getReg())); // Null is free @@ -3093,7 +3168,8 @@ static bool shouldReadExec(const MachineInstr &MI) { return true; } - if (SIInstrInfo::isGenericOpcode(MI.getOpcode()) || + if (MI.isPreISelOpcode() || + SIInstrInfo::isGenericOpcode(MI.getOpcode()) || SIInstrInfo::isSALU(MI) || SIInstrInfo::isSMRD(MI)) return false; @@ -3104,7 +3180,7 @@ static bool shouldReadExec(const MachineInstr &MI) { static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg) { - if (TargetRegisterInfo::isPhysicalRegister(SubReg.getReg())) + if (Register::isPhysicalRegister(SubReg.getReg())) return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg()); return SubReg.getSubReg() != AMDGPU::NoSubRegister && @@ -3144,8 +3220,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, if (!Op.isReg()) continue; - unsigned Reg = Op.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) { + Register Reg = Op.getReg(); + if (!Register::isVirtualRegister(Reg) && !RC->contains(Reg)) { ErrInfo = "inlineasm operand has incorrect register class."; return false; } @@ -3209,9 +3285,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, continue; if (RegClass != -1) { - unsigned Reg = MI.getOperand(i).getReg(); - if (Reg == AMDGPU::NoRegister || - TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MI.getOperand(i).getReg(); + if (Reg == AMDGPU::NoRegister || Register::isVirtualRegister(Reg)) continue; const TargetRegisterClass *RC = RI.getRegClass(RegClass); @@ -3304,7 +3379,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, ErrInfo = "Dst register should be tied to implicit use of preserved register"; return false; - } else if (TargetRegisterInfo::isPhysicalRegister(TiedMO.getReg()) && + } else if (Register::isPhysicalRegister(TiedMO.getReg()) && Dst.getReg() != TiedMO.getReg()) { ErrInfo = "Dst register should use same physical register as preserved"; return false; @@ -3409,6 +3484,32 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } } + // Special case for writelane - this can break the multiple constant bus rule, + // but still can't use more than one SGPR register + if (Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) { + unsigned SGPRCount = 0; + Register SGPRUsed = AMDGPU::NoRegister; + + for (int OpIdx : {Src0Idx, Src1Idx, Src2Idx}) { + if (OpIdx == -1) + break; + + const MachineOperand &MO = MI.getOperand(OpIdx); + + if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) { + if (MO.isReg() && MO.getReg() != AMDGPU::M0) { + if (MO.getReg() != SGPRUsed) + ++SGPRCount; + SGPRUsed = MO.getReg(); + } + } + if (SGPRCount > ST.getConstantBusLimit(Opcode)) { + ErrInfo = "WRITELANE instruction violates constant bus restriction"; + return false; + } + } + } + // Verify misc. restrictions on specific instructions. if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 || Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) { @@ -3609,7 +3710,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 && ST.getGeneration() >= AMDGPUSubtarget::GFX10) { ErrInfo = "Invalid dpp_ctrl value: " - "broadcats are not supported on GFX10+"; + "broadcasts are not supported on GFX10+"; return false; } if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST && @@ -3631,6 +3732,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { case AMDGPU::PHI: return AMDGPU::PHI; case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; case AMDGPU::WQM: return AMDGPU::WQM; + case AMDGPU::SOFT_WQM: return AMDGPU::SOFT_WQM; case AMDGPU::WWM: return AMDGPU::WWM; case AMDGPU::S_MOV_B32: { const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); @@ -3708,9 +3810,9 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, const MCInstrDesc &Desc = get(MI.getOpcode()); if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || Desc.OpInfo[OpNo].RegClass == -1) { - unsigned Reg = MI.getOperand(OpNo).getReg(); + Register Reg = MI.getOperand(OpNo).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return MRI.getRegClass(Reg); return RI.getPhysRegClass(Reg); } @@ -3741,7 +3843,7 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const { else VRC = &AMDGPU::VGPR_32RegClass; - unsigned Reg = MRI.createVirtualRegister(VRC); + Register Reg = MRI.createVirtualRegister(VRC); DebugLoc DL = MBB->findDebugLoc(I); BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO); MO.ChangeToRegister(Reg, false); @@ -3756,7 +3858,7 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, const { MachineBasicBlock *MBB = MI->getParent(); DebugLoc DL = MI->getDebugLoc(); - unsigned SubReg = MRI.createVirtualRegister(SubRC); + Register SubReg = MRI.createVirtualRegister(SubRC); if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) { BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) @@ -3768,7 +3870,7 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, // value so we don't need to worry about merging its subreg index with the // SubIdx passed to this function. The register coalescer should be able to // eliminate this extra copy. - unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC); + Register NewSuperReg = MRI.createVirtualRegister(SuperRC); BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg) .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg()); @@ -3814,11 +3916,10 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, if (!MO.isReg()) return false; - unsigned Reg = MO.getReg(); - const TargetRegisterClass *RC = - TargetRegisterInfo::isVirtualRegister(Reg) ? - MRI.getRegClass(Reg) : - RI.getPhysRegClass(Reg); + Register Reg = MO.getReg(); + const TargetRegisterClass *RC = Register::isVirtualRegister(Reg) + ? MRI.getRegClass(Reg) + : RI.getPhysRegClass(Reg); const SIRegisterInfo *TRI = static_cast(MRI.getTargetRegisterInfo()); @@ -3935,13 +4036,13 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, if (Opc == AMDGPU::V_WRITELANE_B32) { const DebugLoc &DL = MI.getDebugLoc(); if (Src0.isReg() && RI.isVGPR(MRI, Src0.getReg())) { - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) .add(Src0); Src0.ChangeToRegister(Reg, false); } if (Src1.isReg() && RI.isVGPR(MRI, Src1.getReg())) { - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); const DebugLoc &DL = MI.getDebugLoc(); BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) .add(Src1); @@ -3967,7 +4068,7 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, // select is uniform. if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() && RI.isVGPR(MRI, Src1.getReg())) { - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); const DebugLoc &DL = MI.getDebugLoc(); BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) .add(Src1); @@ -4003,7 +4104,7 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, MI.setDesc(get(CommutedOpc)); - unsigned Src0Reg = Src0.getReg(); + Register Src0Reg = Src0.getReg(); unsigned Src0SubReg = Src0.getSubReg(); bool Src0Kill = Src0.isKill(); @@ -4039,13 +4140,13 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineOperand &Src2 = MI.getOperand(VOP3Idx[2]); const DebugLoc &DL = MI.getDebugLoc(); if (Src1.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) { - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) .add(Src1); Src1.ChangeToRegister(Reg, false); } if (Src2.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src2.getReg()))) { - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) .add(Src2); Src2.ChangeToRegister(Reg, false); @@ -4113,12 +4214,12 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI) const { const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg); const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC); - unsigned DstReg = MRI.createVirtualRegister(SRC); + Register DstReg = MRI.createVirtualRegister(SRC); unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32; if (RI.hasAGPRs(VRC)) { VRC = RI.getEquivalentVGPRClass(VRC); - unsigned NewSrcReg = MRI.createVirtualRegister(VRC); + Register NewSrcReg = MRI.createVirtualRegister(VRC); BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(TargetOpcode::COPY), NewSrcReg) .addReg(SrcReg); @@ -4134,7 +4235,7 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, SmallVector SRegs; for (unsigned i = 0; i < SubRegs; ++i) { - unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(AMDGPU::V_READFIRSTLANE_B32), SGPR) .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); @@ -4176,7 +4277,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const { - unsigned OpReg = Op.getReg(); + Register OpReg = Op.getReg(); unsigned OpSubReg = Op.getSubReg(); const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg( @@ -4186,7 +4287,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, if (DstRC == OpRC) return; - unsigned DstReg = MRI.createVirtualRegister(DstRC); + Register DstReg = MRI.createVirtualRegister(DstRC); MachineInstr *Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op); @@ -4198,8 +4299,19 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, return; // Try to eliminate the copy if it is copying an immediate value. - if (Def->isMoveImmediate()) + if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass) FoldImmediate(*Copy, *Def, OpReg, &MRI); + + bool ImpDef = Def->isImplicitDef(); + while (!ImpDef && Def && Def->isCopy()) { + if (Def->getOperand(1).getReg().isPhysical()) + break; + Def = MRI.getUniqueVRegDef(Def->getOperand(1).getReg()); + ImpDef = Def && Def->isImplicitDef(); + } + if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) && + !ImpDef) + Copy->addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); } // Emit the actual waterfall loop, executing the wrapped instruction for each @@ -4223,18 +4335,18 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock::iterator I = LoopBB.begin(); - unsigned VRsrc = Rsrc.getReg(); + Register VRsrc = Rsrc.getReg(); unsigned VRsrcUndef = getUndefRegState(Rsrc.isUndef()); - unsigned SaveExec = MRI.createVirtualRegister(BoolXExecRC); - unsigned CondReg0 = MRI.createVirtualRegister(BoolXExecRC); - unsigned CondReg1 = MRI.createVirtualRegister(BoolXExecRC); - unsigned AndCond = MRI.createVirtualRegister(BoolXExecRC); - unsigned SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned SRsrcSub3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); + Register SaveExec = MRI.createVirtualRegister(BoolXExecRC); + Register CondReg0 = MRI.createVirtualRegister(BoolXExecRC); + Register CondReg1 = MRI.createVirtualRegister(BoolXExecRC); + Register AndCond = MRI.createVirtualRegister(BoolXExecRC); + Register SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register SRsrcSub3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register SRsrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass); // Beginning of the loop, read the next Rsrc variant. BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub0) @@ -4302,7 +4414,7 @@ static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); - unsigned SaveExec = MRI.createVirtualRegister(BoolXExecRC); + Register SaveExec = MRI.createVirtualRegister(BoolXExecRC); // Save the EXEC mask BuildMI(MBB, I, DL, TII.get(MovExecOpc), SaveExec).addReg(Exec); @@ -4370,10 +4482,10 @@ extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc) { AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass); // Create an empty resource descriptor - unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); + Register Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + Register SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass); uint64_t RsrcDataFormat = TII.getDefaultRsrcDataFormat(); // Zero64 = 0 @@ -4430,7 +4542,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { if (!MI.getOperand(i).isReg() || - !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) + !Register::isVirtualRegister(MI.getOperand(i).getReg())) continue; const TargetRegisterClass *OpRC = MRI.getRegClass(MI.getOperand(i).getReg()); @@ -4447,8 +4559,16 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) { if (!VRC) { assert(SRC); - VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) ? RI.getEquivalentAGPRClass(SRC) - : RI.getEquivalentVGPRClass(SRC); + if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) { + VRC = &AMDGPU::VReg_1RegClass; + } else + VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) + ? RI.getEquivalentAGPRClass(SRC) + : RI.getEquivalentVGPRClass(SRC); + } else { + VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) + ? RI.getEquivalentAGPRClass(VRC) + : RI.getEquivalentVGPRClass(VRC); } RC = VRC; } else { @@ -4458,7 +4578,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, // Update all the operands so they have the same type. for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { MachineOperand &Op = MI.getOperand(I); - if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) + if (!Op.isReg() || !Register::isVirtualRegister(Op.getReg())) continue; // MI is a PHI instruction. @@ -4483,7 +4603,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, // subregister index types e.g. sub0_sub1 + sub2 + sub3 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { MachineOperand &Op = MI.getOperand(I); - if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) + if (!Op.isReg() || !Register::isVirtualRegister(Op.getReg())) continue; const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg()); @@ -4502,8 +4622,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, // Legalize INSERT_SUBREG // src0 must have the same register class as dst if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) { - unsigned Dst = MI.getOperand(0).getReg(); - unsigned Src0 = MI.getOperand(1).getReg(); + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); if (DstRC != Src0RC) { @@ -4577,13 +4697,13 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, if (VAddr && AMDGPU::getIfAddr64Inst(MI.getOpcode()) != -1) { // This is already an ADDR64 instruction so we need to add the pointer // extracted from the resource descriptor to the current value of VAddr. - unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); const auto *BoolXExecRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); - unsigned CondReg0 = MRI.createVirtualRegister(BoolXExecRC); - unsigned CondReg1 = MRI.createVirtualRegister(BoolXExecRC); + Register CondReg0 = MRI.createVirtualRegister(BoolXExecRC); + Register CondReg1 = MRI.createVirtualRegister(BoolXExecRC); unsigned RsrcPtr, NewSRsrc; std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc); @@ -4623,7 +4743,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, unsigned RsrcPtr, NewSRsrc; std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc); - unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata); MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset); @@ -4661,6 +4781,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI, MIB.addImm(TFE->getImm()); } + MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::swz)); + MIB.cloneMemRefs(MI); Addr64 = MIB; } else { @@ -4933,8 +5055,8 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst, bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef(); unsigned NewDstReg = AMDGPU::NoRegister; if (HasDst) { - unsigned DstReg = Inst.getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + Register DstReg = Inst.getOperand(0).getReg(); + if (Register::isPhysicalRegister(DstReg)) continue; // Update the destination register class. @@ -4943,7 +5065,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst, continue; if (Inst.isCopy() && - TargetRegisterInfo::isVirtualRegister(Inst.getOperand(1).getReg()) && + Register::isVirtualRegister(Inst.getOperand(1).getReg()) && NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) { // Instead of creating a copy where src and dst are the same register // class, we just replace all uses of dst with src. These kinds of @@ -4988,8 +5110,8 @@ bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst, MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - unsigned OldDstReg = Inst.getOperand(0).getReg(); - unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register OldDstReg = Inst.getOperand(0).getReg(); + Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned Opc = Inst.getOpcode(); assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32); @@ -5022,8 +5144,8 @@ void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist, MachineOperand &Dest = Inst.getOperand(0); MachineOperand &Src = Inst.getOperand(1); - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned SubOp = ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_I32_e32; @@ -5052,7 +5174,7 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist, MachineOperand &Src1 = Inst.getOperand(2); if (ST.hasDLInsts()) { - unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL); legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL); @@ -5072,8 +5194,8 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist, bool Src1IsSGPR = Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())); MachineInstr *Xor; - unsigned Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); // Build a pair of scalar instructions and add them to the work list. // The next iteration over the work list will lower these to the vector @@ -5117,8 +5239,8 @@ void SIInstrInfo::splitScalarNotBinop(SetVectorType &Worklist, MachineOperand &Src0 = Inst.getOperand(1); MachineOperand &Src1 = Inst.getOperand(2); - unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), Interm) .add(Src0) @@ -5146,8 +5268,8 @@ void SIInstrInfo::splitScalarBinOpN2(SetVectorType& Worklist, MachineOperand &Src0 = Inst.getOperand(1); MachineOperand &Src1 = Inst.getOperand(2); - unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Interm) .add(Src1); @@ -5189,16 +5311,16 @@ void SIInstrInfo::splitScalar64BitUnaryOp( const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); - unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); + Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0); MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC); - unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); + Register DestSub1 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1); - unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); + Register FullDestReg = MRI.createVirtualRegister(NewDestRC); BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) .addReg(DestSub0) .addImm(AMDGPU::sub0) @@ -5226,12 +5348,12 @@ void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist, MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID); - unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); - unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned CarryReg = MRI.createVirtualRegister(CarryRC); - unsigned DeadCarryReg = MRI.createVirtualRegister(CarryRC); + Register CarryReg = MRI.createVirtualRegister(CarryRC); + Register DeadCarryReg = MRI.createVirtualRegister(CarryRC); MachineOperand &Dest = Inst.getOperand(0); MachineOperand &Src0 = Inst.getOperand(1); @@ -5327,17 +5449,17 @@ void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist, const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); - unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); + Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0) .add(SrcReg0Sub0) .add(SrcReg1Sub0); - unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); + Register DestSub1 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1) .add(SrcReg0Sub1) .add(SrcReg1Sub1); - unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); + Register FullDestReg = MRI.createVirtualRegister(NewDestRC); BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) .addReg(DestSub0) .addImm(AMDGPU::sub0) @@ -5368,7 +5490,7 @@ void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist, const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); - unsigned Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); MachineOperand* Op0; MachineOperand* Op1; @@ -5384,7 +5506,7 @@ void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist, BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B64), Interm) .add(*Op0); - unsigned NewDest = MRI.createVirtualRegister(DestRC); + Register NewDest = MRI.createVirtualRegister(DestRC); MachineInstr &Xor = *BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B64), NewDest) .addReg(Interm) @@ -5411,8 +5533,8 @@ void SIInstrInfo::splitScalar64BitBCNT( MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass; - unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0); @@ -5451,9 +5573,9 @@ void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist, Offset == 0 && "Not implemented"); if (BitWidth < 32) { - unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo) .addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0) @@ -5476,8 +5598,8 @@ void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist, } MachineOperand &Src = Inst.getOperand(1); - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg) .addImm(31) @@ -5506,6 +5628,7 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist( switch (UseMI.getOpcode()) { case AMDGPU::COPY: case AMDGPU::WQM: + case AMDGPU::SOFT_WQM: case AMDGPU::WWM: case AMDGPU::REG_SEQUENCE: case AMDGPU::PHI: @@ -5531,7 +5654,7 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist( void SIInstrInfo::movePackToVALU(SetVectorType &Worklist, MachineRegisterInfo &MRI, MachineInstr &Inst) const { - unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); MachineBasicBlock *MBB = Inst.getParent(); MachineOperand &Src0 = Inst.getOperand(1); MachineOperand &Src1 = Inst.getOperand(2); @@ -5539,8 +5662,8 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist, switch (Inst.getOpcode()) { case AMDGPU::S_PACK_LL_B32_B16: { - unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); // FIXME: Can do a lot better if we know the high bits of src0 or src1 are // 0. @@ -5558,7 +5681,7 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist, break; } case AMDGPU::S_PACK_LH_B32_B16: { - unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) .addImm(0xffff); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32), ResultReg) @@ -5568,8 +5691,8 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist, break; } case AMDGPU::S_PACK_HH_B32_B16: { - unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg) .addImm(16) .add(Src0); @@ -5623,17 +5746,27 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( case AMDGPU::REG_SEQUENCE: case AMDGPU::INSERT_SUBREG: case AMDGPU::WQM: + case AMDGPU::SOFT_WQM: case AMDGPU::WWM: { const TargetRegisterClass *SrcRC = getOpRegClass(Inst, 1); if (RI.hasAGPRs(SrcRC)) { if (RI.hasAGPRs(NewDstRC)) return nullptr; - NewDstRC = RI.getEquivalentAGPRClass(NewDstRC); + switch (Inst.getOpcode()) { + case AMDGPU::PHI: + case AMDGPU::REG_SEQUENCE: + case AMDGPU::INSERT_SUBREG: + NewDstRC = RI.getEquivalentAGPRClass(NewDstRC); + break; + default: + NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); + } + if (!NewDstRC) return nullptr; } else { - if (RI.hasVGPRs(NewDstRC)) + if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass) return nullptr; NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); @@ -5686,7 +5819,7 @@ unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI, return MO.getReg(); // If this could be a VGPR or an SGPR, Check the dynamic register class. - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); const TargetRegisterClass *RegRC = MRI.getRegClass(Reg); if (RI.isSGPRClass(RegRC)) UsedSGPRs[i] = Reg; @@ -5941,7 +6074,7 @@ void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry, MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo(); if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { - unsigned DstReg = MRI.createVirtualRegister(RI.getBoolRC()); + Register DstReg = MRI.createVirtualRegister(RI.getBoolRC()); MachineInstr *SIIF = BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg) .add(Branch->getOperand(0)) @@ -5968,8 +6101,8 @@ void SIInstrInfo::convertNonUniformLoopRegion( if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { - unsigned DstReg = MRI.createVirtualRegister(RI.getBoolRC()); - unsigned BackEdgeReg = MRI.createVirtualRegister(RI.getBoolRC()); + Register DstReg = MRI.createVirtualRegister(RI.getBoolRC()); + Register BackEdgeReg = MRI.createVirtualRegister(RI.getBoolRC()); MachineInstrBuilder HeaderPHIBuilder = BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg); for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(), @@ -5979,7 +6112,7 @@ void SIInstrInfo::convertNonUniformLoopRegion( HeaderPHIBuilder.addReg(BackEdgeReg); } else { MachineBasicBlock *PMBB = *PI; - unsigned ZeroReg = MRI.createVirtualRegister(RI.getBoolRC()); + Register ZeroReg = MRI.createVirtualRegister(RI.getBoolRC()); materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(), ZeroReg, 0); HeaderPHIBuilder.addReg(ZeroReg); @@ -6063,13 +6196,30 @@ SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - unsigned UnusedCarry = MRI.createVirtualRegister(RI.getBoolRC()); + Register UnusedCarry = MRI.createVirtualRegister(RI.getBoolRC()); MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC()); return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg) .addReg(UnusedCarry, RegState::Define | RegState::Dead); } +MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, + Register DestReg, + RegScavenger &RS) const { + if (ST.hasAddNoCarry()) + return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e32), DestReg); + + Register UnusedCarry = RS.scavengeRegister(RI.getBoolRC(), I, 0, false); + // TODO: Users need to deal with this. + if (!UnusedCarry.isValid()) + return MachineInstrBuilder(); + + return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg) + .addReg(UnusedCarry, RegState::Define | RegState::Dead); +} + bool SIInstrInfo::isKillTerminator(unsigned Opcode) { switch (Opcode) { case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: @@ -6115,7 +6265,21 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const { return false; const auto RCID = MI.getDesc().OpInfo[Idx].RegClass; - return RCID == AMDGPU::SReg_128RegClassID; + return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass); +} + +unsigned SIInstrInfo::getNumFlatOffsetBits(unsigned AddrSpace, + bool Signed) const { + if (!ST.hasFlatInstOffsets()) + return 0; + + if (ST.hasFlatSegmentOffsetBug() && AddrSpace == AMDGPUAS::FLAT_ADDRESS) + return 0; + + if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) + return Signed ? 12 : 11; + + return Signed ? 13 : 12; } bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, @@ -6254,7 +6418,7 @@ static bool followSubRegDef(MachineInstr &MI, MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI) { assert(MRI.isSSA()); - if (!TargetRegisterInfo::isVirtualRegister(P.Reg)) + if (!Register::isVirtualRegister(P.Reg)) return nullptr; auto RSR = P; @@ -6265,8 +6429,7 @@ MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, case AMDGPU::COPY: case AMDGPU::V_MOV_B32_e32: { auto &Op1 = MI->getOperand(1); - if (Op1.isReg() && - TargetRegisterInfo::isVirtualRegister(Op1.getReg())) { + if (Op1.isReg() && Register::isVirtualRegister(Op1.getReg())) { if (Op1.isUndef()) return nullptr; RSR = getRegSubRegPair(Op1); @@ -6360,3 +6523,40 @@ bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, return true; } } + +MachineInstr *SIInstrInfo::createPHIDestinationCopy( + MachineBasicBlock &MBB, MachineBasicBlock::iterator LastPHIIt, + const DebugLoc &DL, Register Src, Register Dst) const { + auto Cur = MBB.begin(); + if (Cur != MBB.end()) + do { + if (!Cur->isPHI() && Cur->readsRegister(Dst)) + return BuildMI(MBB, Cur, DL, get(TargetOpcode::COPY), Dst).addReg(Src); + ++Cur; + } while (Cur != MBB.end() && Cur != LastPHIIt); + + return TargetInstrInfo::createPHIDestinationCopy(MBB, LastPHIIt, DL, Src, + Dst); +} + +MachineInstr *SIInstrInfo::createPHISourceCopy( + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, + const DebugLoc &DL, Register Src, Register SrcSubReg, Register Dst) const { + if (InsPt != MBB.end() && + (InsPt->getOpcode() == AMDGPU::SI_IF || + InsPt->getOpcode() == AMDGPU::SI_ELSE || + InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) && + InsPt->definesRegister(Src)) { + InsPt++; + return BuildMI(MBB, InsPt, InsPt->getDebugLoc(), + get(ST.isWave32() ? AMDGPU::S_MOV_B32_term + : AMDGPU::S_MOV_B64_term), + Dst) + .addReg(Src, 0, SrcSubReg) + .addReg(AMDGPU::EXEC, RegState::Implicit); + } + return TargetInstrInfo::createPHISourceCopy(MBB, InsPt, DL, Src, SrcSubReg, + Dst); +} + +bool llvm::SIInstrInfo::isWave32() const { return ST.isWave32(); } diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index 3ff35da0b96..be463442c88 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -173,7 +173,7 @@ public: } bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const override; + AAResults *AA) const override; bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, @@ -229,6 +229,14 @@ public: bool expandPostRAPseudo(MachineInstr &MI) const override; + // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp + // instructions. Returns a pair of generated instructions. + // Can split either post-RA with physical registers or pre-RA with + // virtual registers. In latter case IR needs to be in SSA form and + // and a REG_SEQUENCE is produced to define original register. + std::pair + expandMovDPP64(MachineInstr &MI) const; + // Returns an opcode that can be used to move a value to a \p DstRC // register. If there is no hardware instruction that can store to \p // DstRC, then AMDGPU::COPY is returned. @@ -242,7 +250,7 @@ public: return commuteOpcode(MI.getOpcode()); } - bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, + bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; bool findCommutedOpIndices(MCInstrDesc Desc, unsigned & SrcOpIdx0, @@ -303,8 +311,7 @@ public: bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA = nullptr) const override; + const MachineInstr &MIb) const override; bool isFoldableCopy(const MachineInstr &MI) const; @@ -578,6 +585,14 @@ public: return get(Opcode).TSFlags & SIInstrFlags::IsMAI; } + static bool isDOT(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::IsDOT; + } + + bool isDOT(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::IsDOT; + } + static bool isScalarUnit(const MachineInstr &MI) { return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD); } @@ -954,6 +969,19 @@ public: bool isBasicBlockPrologue(const MachineInstr &MI) const override; + MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsPt, + const DebugLoc &DL, Register Src, + Register Dst) const override; + + MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsPt, + const DebugLoc &DL, Register Src, + Register SrcSubReg, + Register Dst) const override; + + bool isWave32() const; + /// Return a partially built integer add instruction without carry. /// Caller must add source operands. /// For pre-GFX9 it will generate unused carry destination operand. @@ -963,6 +991,12 @@ public: const DebugLoc &DL, unsigned DestReg) const; + MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, + Register DestReg, + RegScavenger &RS) const; + static bool isKillTerminator(unsigned Opcode); const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const; @@ -970,6 +1004,8 @@ public: return isUInt<12>(Imm); } + unsigned getNumFlatOffsetBits(unsigned AddrSpace, bool Signed) const; + /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT /// encoded instruction. If \p Signed, this is for an instruction that /// interprets the offset as signed. diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index c382c816e0b..1eecbf55561 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -84,7 +84,7 @@ def SDTtbuffer_load : SDTypeProfile<1, 8, SDTCisVT<4, i32>, // soffset(SGPR) SDTCisVT<5, i32>, // offset(imm) SDTCisVT<6, i32>, // format(imm) - SDTCisVT<7, i32>, // cachecontrol(imm) + SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) SDTCisVT<8, i1> // idxen(imm) ]>; @@ -102,7 +102,7 @@ def SDTtbuffer_store : SDTypeProfile<0, 9, SDTCisVT<4, i32>, // soffset(SGPR) SDTCisVT<5, i32>, // offset(imm) SDTCisVT<6, i32>, // format(imm) - SDTCisVT<7, i32>, // cachecontrol(imm) + SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) SDTCisVT<8, i1> // idxen(imm) ]>; @@ -119,7 +119,7 @@ def SDTBufferLoad : SDTypeProfile<1, 7, SDTCisVT<3, i32>, // voffset(VGPR) SDTCisVT<4, i32>, // soffset(SGPR) SDTCisVT<5, i32>, // offset(imm) - SDTCisVT<6, i32>, // cachepolicy(imm) + SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) SDTCisVT<7, i1>]>; // idxen(imm) def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad, @@ -145,7 +145,7 @@ def SDTBufferStore : SDTypeProfile<0, 8, SDTCisVT<3, i32>, // voffset(VGPR) SDTCisVT<4, i32>, // soffset(SGPR) SDTCisVT<5, i32>, // offset(imm) - SDTCisVT<6, i32>, // cachepolicy(imm) + SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) SDTCisVT<7, i1>]>; // idxen(imm) def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore, @@ -198,6 +198,8 @@ def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">; def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">; def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">; def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">; +def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">; +def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">; def SIbuffer_atomic_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_FADD", f32>; def SIbuffer_atomic_pk_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_PK_FADD", v2f16>; @@ -264,6 +266,11 @@ def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8", [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] >; +def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE", + SDTypeProfile<0 ,1, [SDTCisInt<0>]>, + [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue] +>; + //===----------------------------------------------------------------------===// // ValueType helpers //===----------------------------------------------------------------------===// @@ -277,7 +284,9 @@ class isFloatType { !if(!eq(SrcVT.Value, f64.Value), 1, !if(!eq(SrcVT.Value, v2f16.Value), 1, !if(!eq(SrcVT.Value, v4f16.Value), 1, - 0))))); + !if(!eq(SrcVT.Value, v2f32.Value), 1, + !if(!eq(SrcVT.Value, v2f64.Value), 1, + 0))))))); } class isIntType { @@ -300,14 +309,36 @@ class isPackedType { // PatFrags for global memory operations //===----------------------------------------------------------------------===// -defm atomic_inc_global : global_binary_atomic_op; -defm atomic_dec_global : global_binary_atomic_op; +foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { +let AddressSpaces = !cast("LoadAddress_"#as).AddrSpaces in { -def atomic_inc_local : local_binary_atomic_op; -def atomic_dec_local : local_binary_atomic_op; -def atomic_load_fadd_local : local_binary_atomic_op; -def atomic_load_fmin_local : local_binary_atomic_op; -def atomic_load_fmax_local : local_binary_atomic_op; + +defm atomic_inc_#as : binary_atomic_op; +defm atomic_dec_#as : binary_atomic_op; +defm atomic_load_fmin_#as : binary_atomic_op; +defm atomic_load_fmax_#as : binary_atomic_op; + + +} // End let AddressSpaces = ... +} // End foreach AddrSpace + +def atomic_fadd_global_noret : PatFrag< + (ops node:$ptr, node:$value), + (SIglobal_atomic_fadd node:$ptr, node:$value)> { + // FIXME: Move this + let MemoryVT = f32; + let IsAtomic = 1; + let AddressSpaces = StoreAddress_global.AddrSpaces; +} + +def atomic_pk_fadd_global_noret : PatFrag< + (ops node:$ptr, node:$value), + (SIglobal_atomic_pk_fadd node:$ptr, node:$value)> { + // FIXME: Move this + let MemoryVT = v2f16; + let IsAtomic = 1; + let AddressSpaces = StoreAddress_global.AddrSpaces; +} //===----------------------------------------------------------------------===// // SDNodes PatFrags for loads/stores with a glue input. @@ -328,10 +359,12 @@ def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad, >; def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> { + let IsLoad = 1; let IsUnindexed = 1; } def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> { + let IsLoad = 1; let IsNonExtLoad = 1; } @@ -347,14 +380,15 @@ def atomic_load_64_glue : PatFrag<(ops node:$ptr), let MemoryVT = i64; } -def extload_glue : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { +def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { let IsLoad = 1; let IsAnyExtLoad = 1; } -def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr), [{ - return cast(N)->getExtensionType() == ISD::SEXTLOAD; -}]>; +def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { + let IsLoad = 1; + let IsSignExtLoad = 1; +} def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { let IsLoad = 1; @@ -391,25 +425,50 @@ def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { let MemoryVT = i16; } -def load_glue_align8 : Aligned8Bytes < - (ops node:$ptr), (load_glue node:$ptr) ->; -def load_glue_align16 : Aligned16Bytes < - (ops node:$ptr), (load_glue node:$ptr) ->; +let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { +def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { + let IsNonExtLoad = 1; +} -def load_local_m0 : LoadFrag, LocalAddress; -def sextloadi8_local_m0 : LoadFrag, LocalAddress; -def sextloadi16_local_m0 : LoadFrag, LocalAddress; -def extloadi8_local_m0 : LoadFrag, LocalAddress; -def zextloadi8_local_m0 : LoadFrag, LocalAddress; -def extloadi16_local_m0 : LoadFrag, LocalAddress; -def zextloadi16_local_m0 : LoadFrag, LocalAddress; -def load_align8_local_m0 : LoadFrag , LocalAddress; -def load_align16_local_m0 : LoadFrag , LocalAddress; -def atomic_load_32_local_m0 : LoadFrag, LocalAddress; -def atomic_load_64_local_m0 : LoadFrag, LocalAddress; +let MemoryVT = i8 in { +def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>; +def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>; +def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>; +} + +let MemoryVT = i16 in { +def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>; +def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>; +def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>; +} + +def load_align8_local_m0 : PatFrag<(ops node:$ptr), + (load_local_m0 node:$ptr)> { + let IsLoad = 1; + let IsNonExtLoad = 1; + let MinAlignment = 8; +} +def load_align16_local_m0 : PatFrag<(ops node:$ptr), + (load_local_m0 node:$ptr)> { + let IsLoad = 1; + let IsNonExtLoad = 1; + let MinAlignment = 16; +} + +} // End IsLoad = 1 + +let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { +def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr), + (atomic_load_32_glue node:$ptr)> { + let MemoryVT = i32; +} +def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr), + (atomic_load_64_glue node:$ptr)> { + let MemoryVT = i64; +} + +} // End let AddressSpaces = LoadAddress_local.AddrSpaces def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore, @@ -420,50 +479,88 @@ def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] >; -def atomic_store_glue : PatFrag<(ops node:$ptr, node:$val), - (AMDGPUatomic_st_glue node:$ptr, node:$val)> { +def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr), + (AMDGPUst_glue node:$val, node:$ptr)> { + let IsStore = 1; + let IsUnindexed = 1; } -def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr), - (AMDGPUst_glue node:$val, node:$ptr), [{ - return cast(N)->getAddressingMode() == ISD::UNINDEXED; -}]>; - def store_glue : PatFrag<(ops node:$val, node:$ptr), - (unindexedstore_glue node:$val, node:$ptr), [{ - return !cast(N)->isTruncatingStore(); -}]>; + (unindexedstore_glue node:$val, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 0; +} def truncstore_glue : PatFrag<(ops node:$val, node:$ptr), - (unindexedstore_glue node:$val, node:$ptr), [{ - return cast(N)->isTruncatingStore(); -}]>; + (unindexedstore_glue node:$val, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 1; +} def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr), - (truncstore_glue node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; -}]>; + (truncstore_glue node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i8; +} def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr), - (truncstore_glue node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]>; + (truncstore_glue node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i16; +} -def store_glue_align8 : Aligned8Bytes < - (ops node:$value, node:$ptr), (store_glue node:$value, node:$ptr) ->; +let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { +def store_local_m0 : PatFrag<(ops node:$val, node:$ptr), + (store_glue node:$val, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 0; +} -def store_glue_align16 : Aligned16Bytes < - (ops node:$value, node:$ptr), (store_glue node:$value, node:$ptr) ->; +def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr), + (unindexedstore_glue node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i8; +} -def store_local_m0 : StoreFrag, LocalAddress; -def truncstorei8_local_m0 : StoreFrag, LocalAddress; -def truncstorei16_local_m0 : StoreFrag, LocalAddress; -def atomic_store_local_m0 : StoreFrag, LocalAddress; +def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr), + (unindexedstore_glue node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i16; +} +} + +def store_align16_local_m0 : PatFrag < + (ops node:$value, node:$ptr), + (store_local_m0 node:$value, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 0; + let MinAlignment = 16; +} + +def store_align8_local_m0 : PatFrag < + (ops node:$value, node:$ptr), + (store_local_m0 node:$value, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 0; + let MinAlignment = 8; +} + +let AddressSpaces = StoreAddress_local.AddrSpaces in { + +def atomic_store_local_32_m0 : PatFrag < + (ops node:$value, node:$ptr), + (AMDGPUatomic_st_glue node:$value, node:$ptr)> { + let IsAtomic = 1; + let MemoryVT = i32; +} +def atomic_store_local_64_m0 : PatFrag < + (ops node:$value, node:$ptr), + (AMDGPUatomic_st_glue node:$value, node:$ptr)> { + let IsAtomic = 1; + let MemoryVT = i64; +} +} // End let AddressSpaces = StoreAddress_local.AddrSpaces -def store_align8_local_m0 : StoreFrag, LocalAddress; -def store_align16_local_m0 : StoreFrag, LocalAddress; def si_setcc_uniform : PatFrag < (ops node:$lhs, node:$rhs, node:$cond), @@ -539,16 +636,27 @@ def lshl_rev : PatFrag < (shl $src0, $src1) >; +def add_ctpop : PatFrag < + (ops node:$src0, node:$src1), + (add (ctpop $src0), $src1) +>; + multiclass SIAtomicM0Glue2 { + SDTypeProfile tc = SDTAtomic2, + bit IsInt = 1> { def _glue : SDNode < !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] >; - def _local_m0 : local_binary_atomic_op (NAME#"_glue")>; - def _region_m0 : region_binary_atomic_op (NAME#"_glue")>; + let AddressSpaces = StoreAddress_local.AddrSpaces in { + defm _local_m0 : binary_atomic_op (NAME#"_glue"), IsInt>; + } + + let AddressSpaces = StoreAddress_region.AddrSpaces in { + defm _region_m0 : binary_atomic_op (NAME#"_glue"), IsInt>; + } } defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">; @@ -563,17 +671,9 @@ defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">; defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">; defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">; defm atomic_swap : SIAtomicM0Glue2 <"SWAP">; -defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32>; -defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32>; -defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32>; - -def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3, - [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] ->; - -def atomic_cmp_swap_local_m0 : AtomicCmpSwapLocal; -def atomic_cmp_swap_region_m0 : AtomicCmpSwapRegion; - +defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>; +defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>; +defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>; def as_i1imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); @@ -591,6 +691,10 @@ def as_i32imm: SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); }]>; +def as_i32timm: SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); +}]>; + def as_i64imm: SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); }]>; @@ -627,9 +731,13 @@ def SIMM16bit : ImmLeaf ; def UIMM16bit : ImmLeaf (Imm); }] + [{return isUInt<16>(Imm);}] >; +def i64imm_32bit : ImmLeaf(Imm); +}]>; + class InlineImm : PatLeaf <(vt imm), [{ return isInlineImmediate(N); }]>; @@ -763,6 +871,18 @@ def ExpTgtMatchClass : AsmOperandClass { let RenderMethod = "printExpTgt"; } +def SWaitMatchClass : AsmOperandClass { + let Name = "SWaitCnt"; + let RenderMethod = "addImmOperands"; + let ParserMethod = "parseSWaitCntOps"; +} + +def VReg32OrOffClass : AsmOperandClass { + let Name = "VReg32OrOff"; + let ParserMethod = "parseVReg32OrOff"; +} + +let OperandType = "OPERAND_IMMEDIATE" in { def SendMsgImm : Operand { let PrintMethod = "printSendMsg"; let ParserMatchClass = SendMsgMatchClass; @@ -778,22 +898,11 @@ def EndpgmImm : Operand { let ParserMatchClass = EndpgmMatchClass; } -def SWaitMatchClass : AsmOperandClass { - let Name = "SWaitCnt"; - let RenderMethod = "addImmOperands"; - let ParserMethod = "parseSWaitCntOps"; -} - -def VReg32OrOffClass : AsmOperandClass { - let Name = "VReg32OrOff"; - let ParserMethod = "parseVReg32OrOff"; -} - def WAIT_FLAG : Operand { let ParserMatchClass = SWaitMatchClass; let PrintMethod = "printWaitFlag"; - let OperandType = "OPERAND_IMMEDIATE"; } +} // End OperandType = "OPERAND_IMMEDIATE" include "SIInstrFormats.td" include "VIInstrFormats.td" @@ -929,6 +1038,7 @@ def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>; def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>; def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>; def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>; +def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>; def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>; def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>; def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>; @@ -1317,18 +1427,6 @@ class getVALUDstForVT { VOPDstS64orS32)))); // else VT == i1 } -// Returns true if VT is floating point. -class getIsFP { - bit ret = !if(!eq(VT.Value, f16.Value), 1, - !if(!eq(VT.Value, v2f16.Value), 1, - !if(!eq(VT.Value, v4f16.Value), 1, - !if(!eq(VT.Value, f32.Value), 1, - !if(!eq(VT.Value, v2f32.Value), 1, - !if(!eq(VT.Value, f64.Value), 1, - !if(!eq(VT.Value, v2f64.Value), 1, - 0))))))); -} - // Returns the register class to use for the destination of VOP[12C] // instructions with SDWA extension class getSDWADstForVT { @@ -1340,7 +1438,7 @@ class getSDWADstForVT { // Returns the register class to use for source 0 of VOP[12C] // instructions for the given VT. class getVOPSrc0ForVT { - bit isFP = getIsFP.ret; + bit isFP = isFloatType.ret; RegisterOperand ret = !if(isFP, @@ -1373,11 +1471,14 @@ class getVOPSrc0ForVT { // Returns the vreg register class to use for source operand given VT class getVregSrcForVT { RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128, - !if(!eq(VT.Size, 64), VReg_64, VGPR_32)); + !if(!eq(VT.Size, 96), VReg_96, + !if(!eq(VT.Size, 64), VReg_64, + !if(!eq(VT.Size, 48), VReg_64, + VGPR_32)))); } class getSDWASrcForVT { - bit isFP = getIsFP.ret; + bit isFP = isFloatType.ret; RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32); RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32); RegisterOperand ret = !if(isFP, retFlt, retInt); @@ -1386,7 +1487,7 @@ class getSDWASrcForVT { // Returns the register class to use for sources of VOP3 instructions for the // given VT. class getVOP3SrcForVT { - bit isFP = getIsFP.ret; + bit isFP = isFloatType.ret; RegisterOperand ret = !if(!eq(VT.Size, 128), VSrc_128, @@ -1433,7 +1534,7 @@ class isModifierType { // Return type of input modifiers operand for specified input operand class getSrcMod { - bit isFP = getIsFP.ret; + bit isFP = isFloatType.ret; bit isPacked = isPackedType.ret; Operand ret = !if(!eq(VT.Size, 64), !if(isFP, FP64InputMods, Int64InputMods), @@ -1452,7 +1553,7 @@ class getOpSelMod { // Return type of input modifiers operand specified input operand for DPP class getSrcModExt { - bit isFP = getIsFP.ret; + bit isFP = isFloatType.ret; Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); } @@ -2038,6 +2139,7 @@ class VOPProfile _ArgVT, bit _EnableF32SrcMods = 0, field int NeedPatGen = PatGenMode.NoPattern; field bit IsMAI = 0; + field bit IsDOT = 0; field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 70f20bb6937..21984c6ad91 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -43,8 +43,8 @@ multiclass V_INTERP_P1_F32_m : VINTRP_m < (outs VINTRPDst:$vdst), (ins VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan), "v_interp_p1_f32$vdst, $vsrc, $attr$attrchan", - [(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 imm:$attrchan), - (i32 imm:$attr)))] + [(set f32:$vdst, (int_amdgcn_interp_p1 f32:$vsrc, + (i32 timm:$attrchan), (i32 timm:$attr), M0))] >; let OtherPredicates = [has32BankLDS] in { @@ -66,8 +66,8 @@ defm V_INTERP_P2_F32 : VINTRP_m < (outs VINTRPDst:$vdst), (ins VGPR_32:$src0, VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan), "v_interp_p2_f32$vdst, $vsrc, $attr$attrchan", - [(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 imm:$attrchan), - (i32 imm:$attr)))]>; + [(set f32:$vdst, (int_amdgcn_interp_p2 f32:$src0, f32:$vsrc, + (i32 timm:$attrchan), (i32 timm:$attr), M0))]>; } // End DisableEncoding = "$src0", Constraints = "$src0 = $vdst" @@ -76,8 +76,8 @@ defm V_INTERP_MOV_F32 : VINTRP_m < (outs VINTRPDst:$vdst), (ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan), "v_interp_mov_f32$vdst, $vsrc, $attr$attrchan", - [(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 imm:$attrchan), - (i32 imm:$attr)))]>; + [(set f32:$vdst, (int_amdgcn_interp_mov (i32 imm:$vsrc), + (i32 timm:$attrchan), (i32 timm:$attr), M0))]>; } // End Uses = [M0, EXEC] @@ -92,6 +92,11 @@ def ATOMIC_FENCE : SPseudoInstSI< let maybeAtomic = 1; } +def VOP_I64_I64_DPP : VOPProfile <[i64, i64, untyped, untyped]> { + let HasExt = 1; + let HasExtDPP = 1; +} + let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in { // For use in patterns @@ -107,10 +112,19 @@ def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$vdst), def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst), (ins VSrc_b64:$src0)>; +// 64-bit vector move with dpp. Expanded post-RA. +def V_MOV_B64_DPP_PSEUDO : VOP_DPP_Pseudo <"v_mov_b64_dpp", VOP_I64_I64_DPP> { + let Size = 16; // Requires two 8-byte v_mov_b32_dpp to complete. +} + // Pseudoinstruction for @llvm.amdgcn.wqm. It is turned into a copy after the // WQM pass processes it. def WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>; +// Pseudoinstruction for @llvm.amdgcn.softwqm. Like @llvm.amdgcn.wqm it is +// turned into a copy by WQM pass, but does not seed WQM requirements. +def SOFT_WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>; + // Pseudoinstruction for @llvm.amdgcn.wwm. It is turned into a copy post-RA, so // that the @earlyclobber is respected. The @earlyclobber is to make sure that // the instruction that defines $src0 (which is run in WWM) doesn't @@ -345,13 +359,15 @@ def SI_INIT_M0 : SPseudoInstSI <(outs), (ins SSrc_b32:$src)> { } def SI_INIT_EXEC : SPseudoInstSI < - (outs), (ins i64imm:$src), []> { + (outs), (ins i64imm:$src), + [(int_amdgcn_init_exec (i64 timm:$src))]> { let Defs = [EXEC]; let usesCustomInserter = 1; let isAsCheapAsAMove = 1; let WaveSizePredicate = isWave64; } +// FIXME: Intrinsic should be mangled for wave size. def SI_INIT_EXEC_LO : SPseudoInstSI < (outs), (ins i32imm:$src), []> { let Defs = [EXEC_LO]; @@ -360,12 +376,20 @@ def SI_INIT_EXEC_LO : SPseudoInstSI < let WaveSizePredicate = isWave32; } +// FIXME: Wave32 version def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI < - (outs), (ins SSrc_b32:$input, i32imm:$shift), []> { + (outs), (ins SSrc_b32:$input, i32imm:$shift), + [(int_amdgcn_init_exec_from_input i32:$input, (i32 timm:$shift))]> { let Defs = [EXEC]; let usesCustomInserter = 1; } +def : GCNPat < + (int_amdgcn_init_exec timm:$src), + (SI_INIT_EXEC_LO (as_i32imm imm:$src))> { + let WaveSizePredicate = isWave32; +} + // Return for returning shaders to a shader variant epilog. def SI_RETURN_TO_EPILOG : SPseudoInstSI < (outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> { @@ -604,25 +628,6 @@ def : GCNPat < (SI_PC_ADD_REL_OFFSET $ptr_lo, (i32 0)) >; -def : GCNPat < - (AMDGPUinit_exec i64:$src), - (SI_INIT_EXEC (as_i64imm $src)) -> { - let WaveSizePredicate = isWave64; -} - -def : GCNPat < - (AMDGPUinit_exec i64:$src), - (SI_INIT_EXEC_LO (as_i32imm $src)) -> { - let WaveSizePredicate = isWave32; -} - -def : GCNPat < - (AMDGPUinit_exec_from_input i32:$input, i32:$shift), - (SI_INIT_EXEC_FROM_INPUT (i32 $input), (as_i32imm $shift)) ->; - def : GCNPat< (AMDGPUtrap timm:$trapid), (S_TRAP $trapid) @@ -740,22 +745,22 @@ def : GCNPat < def : GCNPat < (i32 (fp_to_sint f16:$src)), - (V_CVT_I32_F32_e32 (V_CVT_F32_F16_e32 $src)) + (V_CVT_I32_F32_e32 (V_CVT_F32_F16_e32 VSrc_b32:$src)) >; def : GCNPat < (i32 (fp_to_uint f16:$src)), - (V_CVT_U32_F32_e32 (V_CVT_F32_F16_e32 $src)) + (V_CVT_U32_F32_e32 (V_CVT_F32_F16_e32 VSrc_b32:$src)) >; def : GCNPat < (f16 (sint_to_fp i32:$src)), - (V_CVT_F16_F32_e32 (V_CVT_F32_I32_e32 $src)) + (V_CVT_F16_F32_e32 (V_CVT_F32_I32_e32 VSrc_b32:$src)) >; def : GCNPat < (f16 (uint_to_fp i32:$src)), - (V_CVT_F16_F32_e32 (V_CVT_F32_U32_e32 $src)) + (V_CVT_F16_F32_e32 (V_CVT_F32_U32_e32 VSrc_b32:$src)) >; //===----------------------------------------------------------------------===// @@ -808,8 +813,14 @@ def : GCNPat < (V_BCNT_U32_B32_e64 $popcnt, $val) >; } + def : GCNPat < - (i16 (add (i16 (trunc (getDivergentFrag.ret i32:$popcnt))), i16:$val)), + (i32 (ctpop i32:$popcnt)), + (V_BCNT_U32_B32_e64 VSrc_b32:$popcnt, (i32 0)) +>; + +def : GCNPat < + (i16 (add (i16 (trunc (i32 (getDivergentFrag.ret i32:$popcnt)))), i16:$val)), (V_BCNT_U32_B32_e64 $popcnt, $val) >; @@ -1076,53 +1087,158 @@ def : GCNPat < /********** ================================ **********/ // Prevent expanding both fneg and fabs. +// TODO: Add IgnoredBySelectionDAG bit? +let AddedComplexity = 1 in { // Prefer SALU to VALU patterns for DAG def : GCNPat < - (fneg (fabs f32:$src)), - (S_OR_B32 $src, (S_MOV_B32(i32 0x80000000))) // Set sign bit + (fneg (fabs (f32 SReg_32:$src))), + (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000))) // Set sign bit >; -// FIXME: Should use S_OR_B32 def : GCNPat < - (fneg (fabs f64:$src)), + (fabs (f32 SReg_32:$src)), + (S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x7fffffff))) +>; + +def : GCNPat < + (fneg (f32 SReg_32:$src)), + (S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000))) +>; + +def : GCNPat < + (fneg (f16 SReg_32:$src)), + (S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000))) +>; + +def : GCNPat < + (fneg (f16 VGPR_32:$src)), + (V_XOR_B32_e32 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) +>; + +def : GCNPat < + (fabs (f16 SReg_32:$src)), + (S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00007fff))) +>; + +def : GCNPat < + (fneg (fabs (f16 SReg_32:$src))), + (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit +>; + +def : GCNPat < + (fneg (fabs (f16 VGPR_32:$src))), + (V_OR_B32_e32 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) // Set sign bit +>; + +def : GCNPat < + (fneg (v2f16 SReg_32:$src)), + (S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) +>; + +def : GCNPat < + (fabs (v2f16 SReg_32:$src)), + (S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x7fff7fff))) +>; + +// This is really (fneg (fabs v2f16:$src)) +// +// fabs is not reported as free because there is modifier for it in +// VOP3P instructions, so it is turned into the bit op. +def : GCNPat < + (fneg (v2f16 (bitconvert (and_oneuse (i32 SReg_32:$src), 0x7fff7fff)))), + (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit +>; + +def : GCNPat < + (fneg (v2f16 (fabs SReg_32:$src))), + (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit +>; + +// FIXME: The implicit-def of scc from S_[X]OR_B32 is mishandled + // def : GCNPat < +// (fneg (f64 SReg_64:$src)), +// (REG_SEQUENCE SReg_64, +// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), +// sub0, +// (S_XOR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), +// (i32 (S_MOV_B32 (i32 0x80000000)))), +// sub1) +// >; + +// def : GCNPat < +// (fneg (fabs (f64 SReg_64:$src))), +// (REG_SEQUENCE SReg_64, +// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), +// sub0, +// (S_OR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), +// (S_MOV_B32 (i32 0x80000000))), // Set sign bit. +// sub1) +// >; + +} // End let AddedComplexity = 1 + +def : GCNPat < + (fabs (f32 VGPR_32:$src)), + (V_AND_B32_e32 (S_MOV_B32 (i32 0x7fffffff)), VGPR_32:$src) +>; + +def : GCNPat < + (fneg (f32 VGPR_32:$src)), + (V_XOR_B32_e32 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src) +>; + +def : GCNPat < + (fabs (f16 VGPR_32:$src)), + (V_AND_B32_e32 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src) +>; + +def : GCNPat < + (fneg (v2f16 VGPR_32:$src)), + (V_XOR_B32_e32 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src) +>; + +def : GCNPat < + (fabs (v2f16 VGPR_32:$src)), + (V_AND_B32_e32 (S_MOV_B32 (i32 0x7fff7fff)), VGPR_32:$src) +>; + +def : GCNPat < + (fneg (v2f16 (fabs VGPR_32:$src))), + (V_OR_B32_e32 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src) // Set sign bit +>; + +def : GCNPat < + (fabs (f64 VReg_64:$src)), (REG_SEQUENCE VReg_64, - (i32 (EXTRACT_SUBREG f64:$src, sub0)), + (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)), sub0, - (V_OR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)), - (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit. - sub1) ->; - -def : GCNPat < - (fabs f32:$src), - (S_AND_B32 $src, (S_MOV_B32 (i32 0x7fffffff))) ->; - -def : GCNPat < - (fneg f32:$src), - (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x80000000))) ->; - -def : GCNPat < - (fabs f64:$src), - (REG_SEQUENCE VReg_64, - (i32 (EXTRACT_SUBREG f64:$src, sub0)), - sub0, - (V_AND_B32_e64 (i32 (EXTRACT_SUBREG f64:$src, sub1)), + (V_AND_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)), (V_MOV_B32_e32 (i32 0x7fffffff))), // Set sign bit. sub1) >; +// TODO: Use SGPR for constant def : GCNPat < - (fneg f64:$src), + (fneg (f64 VReg_64:$src)), (REG_SEQUENCE VReg_64, - (i32 (EXTRACT_SUBREG f64:$src, sub0)), + (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)), sub0, - (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)), + (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)), (i32 (V_MOV_B32_e32 (i32 0x80000000)))), sub1) >; +// TODO: Use SGPR for constant +def : GCNPat < + (fneg (fabs (f64 VReg_64:$src))), + (REG_SEQUENCE VReg_64, + (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)), + sub0, + (V_OR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)), + (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit. + sub1) +>; + def : GCNPat < (fcopysign f16:$src0, f16:$src1), (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1) @@ -1154,45 +1270,6 @@ def : GCNPat < (V_LSHRREV_B32_e64 (i32 16), (EXTRACT_SUBREG $src1, sub1))) >; -def : GCNPat < - (fneg f16:$src), - (S_XOR_B32 $src, (S_MOV_B32 (i32 0x00008000))) ->; - -def : GCNPat < - (fabs f16:$src), - (S_AND_B32 $src, (S_MOV_B32 (i32 0x00007fff))) ->; - -def : GCNPat < - (fneg (fabs f16:$src)), - (S_OR_B32 $src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit ->; - -def : GCNPat < - (fneg v2f16:$src), - (S_XOR_B32 $src, (S_MOV_B32 (i32 0x80008000))) ->; - -def : GCNPat < - (fabs v2f16:$src), - (S_AND_B32 $src, (S_MOV_B32 (i32 0x7fff7fff))) ->; - -// This is really (fneg (fabs v2f16:$src)) -// -// fabs is not reported as free because there is modifier for it in -// VOP3P instructions, so it is turned into the bit op. -def : GCNPat < - (fneg (v2f16 (bitconvert (and_oneuse i32:$src, 0x7fff7fff)))), - (S_OR_B32 $src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit ->; - -def : GCNPat < - (fneg (v2f16 (fabs v2f16:$src))), - (S_OR_B32 $src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit ->; - /********** ================== **********/ /********** Immediate Patterns **********/ /********** ================== **********/ @@ -1544,7 +1621,7 @@ def : GCNPat < (V_CVT_F16_F32_e32 ( V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE), - $src)) + SSrc_i1:$src)) >; def : GCNPat < @@ -1552,35 +1629,35 @@ def : GCNPat < (V_CVT_F16_F32_e32 ( V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE), - $src)) + SSrc_i1:$src)) >; def : GCNPat < (f32 (sint_to_fp i1:$src)), (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE), - $src) + SSrc_i1:$src) >; def : GCNPat < (f32 (uint_to_fp i1:$src)), (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE), - $src) + SSrc_i1:$src) >; def : GCNPat < (f64 (sint_to_fp i1:$src)), (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), /*src1mod*/(i32 0), /*src1*/(i32 -1), - $src)) + SSrc_i1:$src)) >; def : GCNPat < (f64 (uint_to_fp i1:$src)), (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), /*src1mod*/(i32 0), /*src1*/(i32 1), - $src)) + SSrc_i1:$src)) >; //===----------------------------------------------------------------------===// @@ -1788,6 +1865,22 @@ def : GCNPat < (INSERT_SUBREG (IMPLICIT_DEF), $src0, sub0) >; +def : GCNPat < + (i64 (int_amdgcn_mov_dpp i64:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask, + timm:$bound_ctrl)), + (V_MOV_B64_DPP_PSEUDO $src, $src, (as_i32imm $dpp_ctrl), + (as_i32imm $row_mask), (as_i32imm $bank_mask), + (as_i1imm $bound_ctrl)) +>; + +def : GCNPat < + (i64 (int_amdgcn_update_dpp i64:$old, i64:$src, timm:$dpp_ctrl, timm:$row_mask, + timm:$bank_mask, timm:$bound_ctrl)), + (V_MOV_B64_DPP_PSEUDO $old, $src, (as_i32imm $dpp_ctrl), + (as_i32imm $row_mask), (as_i32imm $bank_mask), + (as_i1imm $bound_ctrl)) +>; + //===----------------------------------------------------------------------===// // Fract Patterns //===----------------------------------------------------------------------===// @@ -1915,3 +2008,13 @@ def : FP16Med3Pat; defm : Int16Med3Pat; defm : Int16Med3Pat; } // End Predicates = [isGFX9Plus] + +class AMDGPUGenericInstruction : GenericInstruction { + let Namespace = "AMDGPU"; +} + +def G_AMDGPU_FFBH_U32 : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src); + let hasSideEffects = 0; +} diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index ae8b967893a..20db1c37f35 100644 --- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -42,10 +42,7 @@ // // Future improvements: // -// - This currently relies on the scheduler to place loads and stores next to -// each other, and then only merges adjacent pairs of instructions. It would -// be good to be more flexible with interleaved instructions, and possibly run -// before scheduling. It currently missing stores of constants because loading +// - This is currently missing stores of constants because loading // the constant into the data register is placed between the stores, although // this is arguably a scheduling problem. // @@ -98,14 +95,9 @@ enum InstClassEnum { DS_READ, DS_WRITE, S_BUFFER_LOAD_IMM, - BUFFER_LOAD_OFFEN = AMDGPU::BUFFER_LOAD_DWORD_OFFEN, - BUFFER_LOAD_OFFSET = AMDGPU::BUFFER_LOAD_DWORD_OFFSET, - BUFFER_STORE_OFFEN = AMDGPU::BUFFER_STORE_DWORD_OFFEN, - BUFFER_STORE_OFFSET = AMDGPU::BUFFER_STORE_DWORD_OFFSET, - BUFFER_LOAD_OFFEN_exact = AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact, - BUFFER_LOAD_OFFSET_exact = AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact, - BUFFER_STORE_OFFEN_exact = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact, - BUFFER_STORE_OFFSET_exact = AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact, + BUFFER_LOAD, + BUFFER_STORE, + MIMG, }; enum RegisterEnum { @@ -114,6 +106,7 @@ enum RegisterEnum { SOFFSET = 0x4, VADDR = 0x8, ADDR = 0x10, + SSAMP = 0x20, }; class SILoadStoreOptimizer : public MachineFunctionPass { @@ -126,6 +119,8 @@ class SILoadStoreOptimizer : public MachineFunctionPass { unsigned Width0; unsigned Width1; unsigned BaseOff; + unsigned DMask0; + unsigned DMask1; InstClassEnum InstClass; bool GLC0; bool GLC1; @@ -135,6 +130,60 @@ class SILoadStoreOptimizer : public MachineFunctionPass { bool DLC1; bool UseST64; SmallVector InstsToMove; + int AddrIdx[5]; + const MachineOperand *AddrReg[5]; + unsigned NumAddresses; + + bool hasSameBaseAddress(const MachineInstr &MI) { + for (unsigned i = 0; i < NumAddresses; i++) { + const MachineOperand &AddrRegNext = MI.getOperand(AddrIdx[i]); + + if (AddrReg[i]->isImm() || AddrRegNext.isImm()) { + if (AddrReg[i]->isImm() != AddrRegNext.isImm() || + AddrReg[i]->getImm() != AddrRegNext.getImm()) { + return false; + } + continue; + } + + // Check same base pointer. Be careful of subregisters, which can occur + // with vectors of pointers. + if (AddrReg[i]->getReg() != AddrRegNext.getReg() || + AddrReg[i]->getSubReg() != AddrRegNext.getSubReg()) { + return false; + } + } + return true; + } + + bool hasMergeableAddress(const MachineRegisterInfo &MRI) { + for (unsigned i = 0; i < NumAddresses; ++i) { + const MachineOperand *AddrOp = AddrReg[i]; + // Immediates are always OK. + if (AddrOp->isImm()) + continue; + + // Don't try to merge addresses that aren't either immediates or registers. + // TODO: Should be possible to merge FrameIndexes and maybe some other + // non-register + if (!AddrOp->isReg()) + return false; + + // TODO: We should be able to merge physical reg addreses. + if (Register::isPhysicalRegister(AddrOp->getReg())) + return false; + + // If an address has only one use then there will be on other + // instructions with the same address, so we can't merge this one. + if (MRI.hasOneNonDBGUse(AddrOp->getReg())) + return false; + } + return true; + } + + void setMI(MachineBasicBlock::iterator MI, const SIInstrInfo &TII, + const GCNSubtarget &STM); + void setPaired(MachineBasicBlock::iterator MI, const SIInstrInfo &TII); }; struct BaseRegisters { @@ -160,14 +209,12 @@ private: AliasAnalysis *AA = nullptr; bool OptimizeAgain; + static bool dmasksCanBeCombined(const CombineInfo &CI, const SIInstrInfo &TII); static bool offsetsCanBeCombined(CombineInfo &CI); static bool widthsFit(const GCNSubtarget &STM, const CombineInfo &CI); static unsigned getNewOpcode(const CombineInfo &CI); static std::pair getSubRegIdxs(const CombineInfo &CI); const TargetRegisterClass *getTargetRegisterClass(const CombineInfo &CI); - unsigned getOpcodeWidth(const MachineInstr &MI); - InstClassEnum getInstClass(unsigned Opc); - unsigned getRegs(unsigned Opc); bool findMatchingInst(CombineInfo &CI); @@ -178,22 +225,27 @@ private: unsigned write2Opcode(unsigned EltSize) const; unsigned write2ST64Opcode(unsigned EltSize) const; MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI); + MachineBasicBlock::iterator mergeImagePair(CombineInfo &CI); MachineBasicBlock::iterator mergeSBufferLoadImmPair(CombineInfo &CI); MachineBasicBlock::iterator mergeBufferLoadPair(CombineInfo &CI); MachineBasicBlock::iterator mergeBufferStorePair(CombineInfo &CI); void updateBaseAndOffset(MachineInstr &I, unsigned NewBase, - int32_t NewOffset); - unsigned computeBase(MachineInstr &MI, const MemAddress &Addr); - MachineOperand createRegOrImm(int32_t Val, MachineInstr &MI); - Optional extractConstOffset(const MachineOperand &Op); - void processBaseWithConstOffset(const MachineOperand &Base, MemAddress &Addr); + int32_t NewOffset) const; + unsigned computeBase(MachineInstr &MI, const MemAddress &Addr) const; + MachineOperand createRegOrImm(int32_t Val, MachineInstr &MI) const; + Optional extractConstOffset(const MachineOperand &Op) const; + void processBaseWithConstOffset(const MachineOperand &Base, MemAddress &Addr) const; /// Promotes constant offset to the immediate by adjusting the base. It /// tries to use a base from the nearby instructions that allows it to have /// a 13bit constant offset which gets promoted to the immediate. bool promoteConstantOffsetToImm(MachineInstr &CI, MemInfoMap &Visited, - SmallPtrSet &Promoted); + SmallPtrSet &Promoted) const; + void addInstToMergeableList(const CombineInfo &CI, + std::list > &MergeableInsts) const; + bool collectMergeableInsts(MachineBasicBlock &MBB, + std::list > &MergeableInsts) const; public: static char ID; @@ -202,7 +254,11 @@ public: initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); } - bool optimizeBlock(MachineBasicBlock &MBB); + void removeCombinedInst(std::list &MergeList, + const MachineInstr &MI); + bool optimizeInstsWithSameBaseAddr(std::list &MergeList, + bool &OptimizeListAgain); + bool optimizeBlock(std::list > &MergeableInsts); bool runOnMachineFunction(MachineFunction &MF) override; @@ -216,6 +272,264 @@ public: } }; +static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) { + const unsigned Opc = MI.getOpcode(); + + if (TII.isMUBUF(Opc)) { + // FIXME: Handle d16 correctly + return AMDGPU::getMUBUFElements(Opc); + } + if (TII.isMIMG(MI)) { + uint64_t DMaskImm = + TII.getNamedOperand(MI, AMDGPU::OpName::dmask)->getImm(); + return countPopulation(DMaskImm); + } + + switch (Opc) { + case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: + return 1; + case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: + return 2; + case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: + return 4; + default: + return 0; + } +} + +/// Maps instruction opcode to enum InstClassEnum. +static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) { + switch (Opc) { + default: + if (TII.isMUBUF(Opc)) { + switch (AMDGPU::getMUBUFBaseOpcode(Opc)) { + default: + return UNKNOWN; + case AMDGPU::BUFFER_LOAD_DWORD_OFFEN: + case AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact: + case AMDGPU::BUFFER_LOAD_DWORD_OFFSET: + case AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact: + return BUFFER_LOAD; + case AMDGPU::BUFFER_STORE_DWORD_OFFEN: + case AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact: + case AMDGPU::BUFFER_STORE_DWORD_OFFSET: + case AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact: + return BUFFER_STORE; + } + } + if (TII.isMIMG(Opc)) { + // Ignore instructions encoded without vaddr. + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) == -1) + return UNKNOWN; + // TODO: Support IMAGE_GET_RESINFO and IMAGE_GET_LOD. + if (TII.get(Opc).mayStore() || !TII.get(Opc).mayLoad() || TII.isGather4(Opc)) + return UNKNOWN; + return MIMG; + } + return UNKNOWN; + case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: + return S_BUFFER_LOAD_IMM; + case AMDGPU::DS_READ_B32: + case AMDGPU::DS_READ_B32_gfx9: + case AMDGPU::DS_READ_B64: + case AMDGPU::DS_READ_B64_gfx9: + return DS_READ; + case AMDGPU::DS_WRITE_B32: + case AMDGPU::DS_WRITE_B32_gfx9: + case AMDGPU::DS_WRITE_B64: + case AMDGPU::DS_WRITE_B64_gfx9: + return DS_WRITE; + } +} + +/// Determines instruction subclass from opcode. Only instructions +/// of the same subclass can be merged together. +static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) { + switch (Opc) { + default: + if (TII.isMUBUF(Opc)) + return AMDGPU::getMUBUFBaseOpcode(Opc); + if (TII.isMIMG(Opc)) { + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); + assert(Info); + return Info->BaseOpcode; + } + return -1; + case AMDGPU::DS_READ_B32: + case AMDGPU::DS_READ_B32_gfx9: + case AMDGPU::DS_READ_B64: + case AMDGPU::DS_READ_B64_gfx9: + case AMDGPU::DS_WRITE_B32: + case AMDGPU::DS_WRITE_B32_gfx9: + case AMDGPU::DS_WRITE_B64: + case AMDGPU::DS_WRITE_B64_gfx9: + return Opc; + case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: + return AMDGPU::S_BUFFER_LOAD_DWORD_IMM; + } +} + +static unsigned getRegs(unsigned Opc, const SIInstrInfo &TII) { + if (TII.isMUBUF(Opc)) { + unsigned result = 0; + + if (AMDGPU::getMUBUFHasVAddr(Opc)) { + result |= VADDR; + } + + if (AMDGPU::getMUBUFHasSrsrc(Opc)) { + result |= SRSRC; + } + + if (AMDGPU::getMUBUFHasSoffset(Opc)) { + result |= SOFFSET; + } + + return result; + } + + if (TII.isMIMG(Opc)) { + unsigned result = VADDR | SRSRC; + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); + if (Info && AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode)->Sampler) + result |= SSAMP; + return result; + } + + switch (Opc) { + default: + return 0; + case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: + return SBASE; + case AMDGPU::DS_READ_B32: + case AMDGPU::DS_READ_B64: + case AMDGPU::DS_READ_B32_gfx9: + case AMDGPU::DS_READ_B64_gfx9: + case AMDGPU::DS_WRITE_B32: + case AMDGPU::DS_WRITE_B64: + case AMDGPU::DS_WRITE_B32_gfx9: + case AMDGPU::DS_WRITE_B64_gfx9: + return ADDR; + } +} + + +void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI, + const SIInstrInfo &TII, + const GCNSubtarget &STM) { + I = MI; + unsigned Opc = MI->getOpcode(); + InstClass = getInstClass(Opc, TII); + + if (InstClass == UNKNOWN) + return; + + switch (InstClass) { + case DS_READ: + EltSize = + (Opc == AMDGPU::DS_READ_B64 || Opc == AMDGPU::DS_READ_B64_gfx9) ? 8 + : 4; + break; + case DS_WRITE: + EltSize = + (Opc == AMDGPU::DS_WRITE_B64 || Opc == AMDGPU::DS_WRITE_B64_gfx9) ? 8 + : 4; + break; + case S_BUFFER_LOAD_IMM: + EltSize = AMDGPU::getSMRDEncodedOffset(STM, 4); + break; + default: + EltSize = 4; + break; + } + + if (InstClass == MIMG) { + DMask0 = TII.getNamedOperand(*I, AMDGPU::OpName::dmask)->getImm(); + } else { + int OffsetIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::offset); + Offset0 = I->getOperand(OffsetIdx).getImm(); + } + + Width0 = getOpcodeWidth(*I, TII); + + if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) { + Offset0 &= 0xffff; + } else if (InstClass != MIMG) { + GLC0 = TII.getNamedOperand(*I, AMDGPU::OpName::glc)->getImm(); + if (InstClass != S_BUFFER_LOAD_IMM) { + SLC0 = TII.getNamedOperand(*I, AMDGPU::OpName::slc)->getImm(); + } + DLC0 = TII.getNamedOperand(*I, AMDGPU::OpName::dlc)->getImm(); + } + + unsigned AddrOpName[5] = {0}; + NumAddresses = 0; + const unsigned Regs = getRegs(I->getOpcode(), TII); + + if (Regs & ADDR) { + AddrOpName[NumAddresses++] = AMDGPU::OpName::addr; + } + + if (Regs & SBASE) { + AddrOpName[NumAddresses++] = AMDGPU::OpName::sbase; + } + + if (Regs & SRSRC) { + AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc; + } + + if (Regs & SOFFSET) { + AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset; + } + + if (Regs & VADDR) { + AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr; + } + + if (Regs & SSAMP) { + AddrOpName[NumAddresses++] = AMDGPU::OpName::ssamp; + } + + for (unsigned i = 0; i < NumAddresses; i++) { + AddrIdx[i] = AMDGPU::getNamedOperandIdx(I->getOpcode(), AddrOpName[i]); + AddrReg[i] = &I->getOperand(AddrIdx[i]); + } + + InstsToMove.clear(); +} + +void SILoadStoreOptimizer::CombineInfo::setPaired(MachineBasicBlock::iterator MI, + const SIInstrInfo &TII) { + Paired = MI; + assert(InstClass == getInstClass(Paired->getOpcode(), TII)); + + if (InstClass == MIMG) { + DMask1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::dmask)->getImm(); + } else { + int OffsetIdx = + AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::offset); + Offset1 = Paired->getOperand(OffsetIdx).getImm(); + } + + Width1 = getOpcodeWidth(*Paired, TII); + if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) { + Offset1 &= 0xffff; + } else if (InstClass != MIMG) { + GLC1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::glc)->getImm(); + if (InstClass != S_BUFFER_LOAD_IMM) { + SLC1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::slc)->getImm(); + } + DLC1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::dlc)->getImm(); + } +} + + } // end anonymous namespace. INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE, @@ -249,8 +563,7 @@ static void addDefsUsesToList(const MachineInstr &MI, if (Op.isReg()) { if (Op.isDef()) RegDefs.insert(Op.getReg()); - else if (Op.readsReg() && - TargetRegisterInfo::isPhysicalRegister(Op.getReg())) + else if (Op.readsReg() && Register::isPhysicalRegister(Op.getReg())) PhysRegUses.insert(Op.getReg()); } } @@ -282,7 +595,7 @@ static bool addToListsIfDependent(MachineInstr &MI, DenseSet &RegDefs, if (Use.isReg() && ((Use.readsReg() && RegDefs.count(Use.getReg())) || (Use.isDef() && RegDefs.count(Use.getReg())) || - (Use.isDef() && TargetRegisterInfo::isPhysicalRegister(Use.getReg()) && + (Use.isDef() && Register::isPhysicalRegister(Use.getReg()) && PhysRegUses.count(Use.getReg())))) { Insts.push_back(&MI); addDefsUsesToList(MI, RegDefs, PhysRegUses); @@ -307,7 +620,59 @@ static bool canMoveInstsAcrossMemOp(MachineInstr &MemOp, return true; } +// This function assumes that \p A and \p B have are identical except for +// size and offset, and they referecne adjacent memory. +static MachineMemOperand *combineKnownAdjacentMMOs(MachineFunction &MF, + const MachineMemOperand *A, + const MachineMemOperand *B) { + unsigned MinOffset = std::min(A->getOffset(), B->getOffset()); + unsigned Size = A->getSize() + B->getSize(); + // This function adds the offset parameter to the existing offset for A, + // so we pass 0 here as the offset and then manually set it to the correct + // value after the call. + MachineMemOperand *MMO = MF.getMachineMemOperand(A, 0, Size); + MMO->setOffset(MinOffset); + return MMO; +} + +bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI, const SIInstrInfo &TII) { + assert(CI.InstClass == MIMG); + + // Ignore instructions with tfe/lwe set. + const auto *TFEOp = TII.getNamedOperand(*CI.I, AMDGPU::OpName::tfe); + const auto *LWEOp = TII.getNamedOperand(*CI.I, AMDGPU::OpName::lwe); + + if ((TFEOp && TFEOp->getImm()) || (LWEOp && LWEOp->getImm())) + return false; + + // Check other optional immediate operands for equality. + unsigned OperandsToMatch[] = {AMDGPU::OpName::glc, AMDGPU::OpName::slc, + AMDGPU::OpName::d16, AMDGPU::OpName::unorm, + AMDGPU::OpName::da, AMDGPU::OpName::r128}; + + for (auto op : OperandsToMatch) { + int Idx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), op); + if (AMDGPU::getNamedOperandIdx(CI.Paired->getOpcode(), op) != Idx) + return false; + if (Idx != -1 && + CI.I->getOperand(Idx).getImm() != CI.Paired->getOperand(Idx).getImm()) + return false; + } + + // Check DMask for overlaps. + unsigned MaxMask = std::max(CI.DMask0, CI.DMask1); + unsigned MinMask = std::min(CI.DMask0, CI.DMask1); + + unsigned AllowedBitsForMin = llvm::countTrailingZeros(MaxMask); + if ((1u << AllowedBitsForMin) <= MinMask) + return false; + + return true; +} + bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) { + assert(CI.InstClass != MIMG); + // XXX - Would the same offset be OK? Is there any reason this would happen or // be useful? if (CI.Offset0 == CI.Offset1) @@ -384,164 +749,24 @@ bool SILoadStoreOptimizer::widthsFit(const GCNSubtarget &STM, } } -unsigned SILoadStoreOptimizer::getOpcodeWidth(const MachineInstr &MI) { - const unsigned Opc = MI.getOpcode(); - - if (TII->isMUBUF(MI)) { - return AMDGPU::getMUBUFDwords(Opc); - } - - switch (Opc) { - default: - return 0; - case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: - return 1; - case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: - return 2; - case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: - return 4; - } -} - -InstClassEnum SILoadStoreOptimizer::getInstClass(unsigned Opc) { - if (TII->isMUBUF(Opc)) { - const int baseOpcode = AMDGPU::getMUBUFBaseOpcode(Opc); - - // If we couldn't identify the opcode, bail out. - if (baseOpcode == -1) { - return UNKNOWN; - } - - switch (baseOpcode) { - default: - return UNKNOWN; - case AMDGPU::BUFFER_LOAD_DWORD_OFFEN: - return BUFFER_LOAD_OFFEN; - case AMDGPU::BUFFER_LOAD_DWORD_OFFSET: - return BUFFER_LOAD_OFFSET; - case AMDGPU::BUFFER_STORE_DWORD_OFFEN: - return BUFFER_STORE_OFFEN; - case AMDGPU::BUFFER_STORE_DWORD_OFFSET: - return BUFFER_STORE_OFFSET; - case AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact: - return BUFFER_LOAD_OFFEN_exact; - case AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact: - return BUFFER_LOAD_OFFSET_exact; - case AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact: - return BUFFER_STORE_OFFEN_exact; - case AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact: - return BUFFER_STORE_OFFSET_exact; - } - } - - switch (Opc) { - default: - return UNKNOWN; - case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: - case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: - case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: - return S_BUFFER_LOAD_IMM; - case AMDGPU::DS_READ_B32: - case AMDGPU::DS_READ_B64: - case AMDGPU::DS_READ_B32_gfx9: - case AMDGPU::DS_READ_B64_gfx9: - return DS_READ; - case AMDGPU::DS_WRITE_B32: - case AMDGPU::DS_WRITE_B64: - case AMDGPU::DS_WRITE_B32_gfx9: - case AMDGPU::DS_WRITE_B64_gfx9: - return DS_WRITE; - } -} - -unsigned SILoadStoreOptimizer::getRegs(unsigned Opc) { - if (TII->isMUBUF(Opc)) { - unsigned result = 0; - - if (AMDGPU::getMUBUFHasVAddr(Opc)) { - result |= VADDR; - } - - if (AMDGPU::getMUBUFHasSrsrc(Opc)) { - result |= SRSRC; - } - - if (AMDGPU::getMUBUFHasSoffset(Opc)) { - result |= SOFFSET; - } - - return result; - } - - switch (Opc) { - default: - return 0; - case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: - case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: - case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: - return SBASE; - case AMDGPU::DS_READ_B32: - case AMDGPU::DS_READ_B64: - case AMDGPU::DS_READ_B32_gfx9: - case AMDGPU::DS_READ_B64_gfx9: - case AMDGPU::DS_WRITE_B32: - case AMDGPU::DS_WRITE_B64: - case AMDGPU::DS_WRITE_B32_gfx9: - case AMDGPU::DS_WRITE_B64_gfx9: - return ADDR; - } -} - bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) { MachineBasicBlock *MBB = CI.I->getParent(); MachineBasicBlock::iterator E = MBB->end(); MachineBasicBlock::iterator MBBI = CI.I; const unsigned Opc = CI.I->getOpcode(); - const InstClassEnum InstClass = getInstClass(Opc); + const InstClassEnum InstClass = getInstClass(Opc, *TII); if (InstClass == UNKNOWN) { return false; } + const unsigned InstSubclass = getInstSubclass(Opc, *TII); - const unsigned Regs = getRegs(Opc); - - unsigned AddrOpName[5] = {0}; - int AddrIdx[5]; - const MachineOperand *AddrReg[5]; - unsigned NumAddresses = 0; - - if (Regs & ADDR) { - AddrOpName[NumAddresses++] = AMDGPU::OpName::addr; - } - - if (Regs & SBASE) { - AddrOpName[NumAddresses++] = AMDGPU::OpName::sbase; - } - - if (Regs & SRSRC) { - AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc; - } - - if (Regs & SOFFSET) { - AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset; - } - - if (Regs & VADDR) { - AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr; - } - - for (unsigned i = 0; i < NumAddresses; i++) { - AddrIdx[i] = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AddrOpName[i]); - AddrReg[i] = &CI.I->getOperand(AddrIdx[i]); - - // We only ever merge operations with the same base address register, so - // don't bother scanning forward if there are no other uses. - if (AddrReg[i]->isReg() && - (TargetRegisterInfo::isPhysicalRegister(AddrReg[i]->getReg()) || - MRI->hasOneNonDBGUse(AddrReg[i]->getReg()))) - return false; - } + // Do not merge VMEM buffer instructions with "swizzled" bit set. + int Swizzled = + AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::swz); + if (Swizzled != -1 && CI.I->getOperand(Swizzled).getImm()) + return false; ++MBBI; @@ -550,11 +775,10 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) { addDefsUsesToList(*CI.I, RegDefsToMove, PhysRegUsesToMove); for (; MBBI != E; ++MBBI) { - const bool IsDS = (InstClass == DS_READ) || (InstClass == DS_WRITE); - if ((getInstClass(MBBI->getOpcode()) != InstClass) || - (IsDS && (MBBI->getOpcode() != Opc))) { - // This is not a matching DS instruction, but we can keep looking as + if ((getInstClass(MBBI->getOpcode(), *TII) != InstClass) || + (getInstSubclass(MBBI->getOpcode(), *TII) != InstSubclass)) { + // This is not a matching instruction, but we can keep looking as // long as one of these conditions are met: // 1. It is safe to move I down past MBBI. // 2. It is safe to move MBBI down past the instruction that I will @@ -599,58 +823,23 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) { CI.InstsToMove)) continue; - bool Match = true; - for (unsigned i = 0; i < NumAddresses; i++) { - const MachineOperand &AddrRegNext = MBBI->getOperand(AddrIdx[i]); - - if (AddrReg[i]->isImm() || AddrRegNext.isImm()) { - if (AddrReg[i]->isImm() != AddrRegNext.isImm() || - AddrReg[i]->getImm() != AddrRegNext.getImm()) { - Match = false; - break; - } - continue; - } - - // Check same base pointer. Be careful of subregisters, which can occur - // with vectors of pointers. - if (AddrReg[i]->getReg() != AddrRegNext.getReg() || - AddrReg[i]->getSubReg() != AddrRegNext.getSubReg()) { - Match = false; - break; - } - } + bool Match = CI.hasSameBaseAddress(*MBBI); if (Match) { - int OffsetIdx = - AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::offset); - CI.Offset0 = CI.I->getOperand(OffsetIdx).getImm(); - CI.Width0 = getOpcodeWidth(*CI.I); - CI.Offset1 = MBBI->getOperand(OffsetIdx).getImm(); - CI.Width1 = getOpcodeWidth(*MBBI); - CI.Paired = MBBI; + CI.setPaired(MBBI, *TII); - if ((CI.InstClass == DS_READ) || (CI.InstClass == DS_WRITE)) { - CI.Offset0 &= 0xffff; - CI.Offset1 &= 0xffff; - } else { - CI.GLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::glc)->getImm(); - CI.GLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::glc)->getImm(); - if (CI.InstClass != S_BUFFER_LOAD_IMM) { - CI.SLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::slc)->getImm(); - CI.SLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::slc)->getImm(); - } - CI.DLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::dlc)->getImm(); - CI.DLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::dlc)->getImm(); - } + // Check both offsets (or masks for MIMG) can be combined and fit in the + // reduced range. + bool canBeCombined = + CI.InstClass == MIMG + ? dmasksCanBeCombined(CI, *TII) + : widthsFit(*STM, CI) && offsetsCanBeCombined(CI); - // Check both offsets fit in the reduced range. // We also need to go through the list of instructions that we plan to // move and make sure they are all safe to move down past the merged // instruction. - if (widthsFit(*STM, CI) && offsetsCanBeCombined(CI)) - if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA)) - return true; + if (canBeCombined && canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA)) + return true; } // We've found a load/store that we couldn't merge for some reason. @@ -711,15 +900,15 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI) { const TargetRegisterClass *SuperRC = (CI.EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass; - unsigned DestReg = MRI->createVirtualRegister(SuperRC); + Register DestReg = MRI->createVirtualRegister(SuperRC); DebugLoc DL = CI.I->getDebugLoc(); - unsigned BaseReg = AddrReg->getReg(); + Register BaseReg = AddrReg->getReg(); unsigned BaseSubReg = AddrReg->getSubReg(); unsigned BaseRegFlags = 0; if (CI.BaseOff) { - unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register ImmReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg) .addImm(CI.BaseOff); @@ -755,12 +944,11 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI) { moveInstsAfter(Copy1, CI.InstsToMove); - MachineBasicBlock::iterator Next = std::next(CI.I); CI.I->eraseFromParent(); CI.Paired->eraseFromParent(); LLVM_DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n'); - return Next; + return Read2; } unsigned SILoadStoreOptimizer::write2Opcode(unsigned EltSize) const { @@ -809,11 +997,11 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) { const MCInstrDesc &Write2Desc = TII->get(Opc); DebugLoc DL = CI.I->getDebugLoc(); - unsigned BaseReg = AddrReg->getReg(); + Register BaseReg = AddrReg->getReg(); unsigned BaseSubReg = AddrReg->getSubReg(); unsigned BaseRegFlags = 0; if (CI.BaseOff) { - unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass); + Register ImmReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg) .addImm(CI.BaseOff); @@ -839,84 +1027,43 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) { moveInstsAfter(Write2, CI.InstsToMove); - MachineBasicBlock::iterator Next = std::next(CI.I); CI.I->eraseFromParent(); CI.Paired->eraseFromParent(); LLVM_DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n'); - return Next; + return Write2; } MachineBasicBlock::iterator -SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) { +SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI) { MachineBasicBlock *MBB = CI.I->getParent(); DebugLoc DL = CI.I->getDebugLoc(); const unsigned Opcode = getNewOpcode(CI); const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI); - unsigned DestReg = MRI->createVirtualRegister(SuperRC); - unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1); - - BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg) - .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase)) - .addImm(MergedOffset) // offset - .addImm(CI.GLC0) // glc - .addImm(CI.DLC0) // dlc - .cloneMergedMemRefs({&*CI.I, &*CI.Paired}); - - std::pair SubRegIdx = getSubRegIdxs(CI); - const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); - const unsigned SubRegIdx1 = std::get<1>(SubRegIdx); - - // Copy to the old destination registers. - const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); - const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::sdst); - const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::sdst); - - BuildMI(*MBB, CI.Paired, DL, CopyDesc) - .add(*Dest0) // Copy to same destination including flags and sub reg. - .addReg(DestReg, 0, SubRegIdx0); - MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc) - .add(*Dest1) - .addReg(DestReg, RegState::Kill, SubRegIdx1); - - moveInstsAfter(Copy1, CI.InstsToMove); - - MachineBasicBlock::iterator Next = std::next(CI.I); - CI.I->eraseFromParent(); - CI.Paired->eraseFromParent(); - return Next; -} - -MachineBasicBlock::iterator -SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) { - MachineBasicBlock *MBB = CI.I->getParent(); - DebugLoc DL = CI.I->getDebugLoc(); - - const unsigned Opcode = getNewOpcode(CI); - - const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI); - - // Copy to the new source register. - unsigned DestReg = MRI->createVirtualRegister(SuperRC); - unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1); + Register DestReg = MRI->createVirtualRegister(SuperRC); + unsigned MergedDMask = CI.DMask0 | CI.DMask1; + unsigned DMaskIdx = + AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::dmask); auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg); + for (unsigned I = 1, E = (*CI.I).getNumOperands(); I != E; ++I) { + if (I == DMaskIdx) + MIB.addImm(MergedDMask); + else + MIB.add((*CI.I).getOperand(I)); + } - const unsigned Regs = getRegs(Opcode); + // It shouldn't be possible to get this far if the two instructions + // don't have a single memoperand, because MachineInstr::mayAlias() + // will return true if this is the case. + assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand()); - if (Regs & VADDR) - MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr)); + const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); + const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin(); - MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) - .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) - .addImm(MergedOffset) // offset - .addImm(CI.GLC0) // glc - .addImm(CI.SLC0) // slc - .addImm(0) // tfe - .addImm(CI.DLC0) // dlc - .cloneMergedMemRefs({&*CI.I, &*CI.Paired}); + MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); std::pair SubRegIdx = getSubRegIdxs(CI); const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); @@ -936,10 +1083,121 @@ SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) { moveInstsAfter(Copy1, CI.InstsToMove); - MachineBasicBlock::iterator Next = std::next(CI.I); CI.I->eraseFromParent(); CI.Paired->eraseFromParent(); - return Next; + return New; +} + +MachineBasicBlock::iterator +SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) { + MachineBasicBlock *MBB = CI.I->getParent(); + DebugLoc DL = CI.I->getDebugLoc(); + const unsigned Opcode = getNewOpcode(CI); + + const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI); + + Register DestReg = MRI->createVirtualRegister(SuperRC); + unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1); + + // It shouldn't be possible to get this far if the two instructions + // don't have a single memoperand, because MachineInstr::mayAlias() + // will return true if this is the case. + assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand()); + + const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); + const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin(); + + MachineInstr *New = + BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg) + .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase)) + .addImm(MergedOffset) // offset + .addImm(CI.GLC0) // glc + .addImm(CI.DLC0) // dlc + .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + + std::pair SubRegIdx = getSubRegIdxs(CI); + const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); + const unsigned SubRegIdx1 = std::get<1>(SubRegIdx); + + // Copy to the old destination registers. + const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::sdst); + const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::sdst); + + BuildMI(*MBB, CI.Paired, DL, CopyDesc) + .add(*Dest0) // Copy to same destination including flags and sub reg. + .addReg(DestReg, 0, SubRegIdx0); + MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc) + .add(*Dest1) + .addReg(DestReg, RegState::Kill, SubRegIdx1); + + moveInstsAfter(Copy1, CI.InstsToMove); + + CI.I->eraseFromParent(); + CI.Paired->eraseFromParent(); + return New; +} + +MachineBasicBlock::iterator +SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) { + MachineBasicBlock *MBB = CI.I->getParent(); + DebugLoc DL = CI.I->getDebugLoc(); + + const unsigned Opcode = getNewOpcode(CI); + + const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI); + + // Copy to the new source register. + Register DestReg = MRI->createVirtualRegister(SuperRC); + unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1); + + auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg); + + const unsigned Regs = getRegs(Opcode, *TII); + + if (Regs & VADDR) + MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr)); + + // It shouldn't be possible to get this far if the two instructions + // don't have a single memoperand, because MachineInstr::mayAlias() + // will return true if this is the case. + assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand()); + + const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); + const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin(); + + MachineInstr *New = + MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) + .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) + .addImm(MergedOffset) // offset + .addImm(CI.GLC0) // glc + .addImm(CI.SLC0) // slc + .addImm(0) // tfe + .addImm(CI.DLC0) // dlc + .addImm(0) // swz + .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + + std::pair SubRegIdx = getSubRegIdxs(CI); + const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); + const unsigned SubRegIdx1 = std::get<1>(SubRegIdx); + + // Copy to the old destination registers. + const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); + const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdata); + + BuildMI(*MBB, CI.Paired, DL, CopyDesc) + .add(*Dest0) // Copy to same destination including flags and sub reg. + .addReg(DestReg, 0, SubRegIdx0); + MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc) + .add(*Dest1) + .addReg(DestReg, RegState::Kill, SubRegIdx1); + + moveInstsAfter(Copy1, CI.InstsToMove); + + CI.I->eraseFromParent(); + CI.Paired->eraseFromParent(); + return New; } unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) { @@ -947,7 +1205,10 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) { switch (CI.InstClass) { default: - return AMDGPU::getMUBUFOpcode(CI.InstClass, Width); + assert(CI.InstClass == BUFFER_LOAD || CI.InstClass == BUFFER_STORE); + // FIXME: Handle d16 correctly + return AMDGPU::getMUBUFOpcode(AMDGPU::getMUBUFBaseOpcode(CI.I->getOpcode()), + Width); case UNKNOWN: llvm_unreachable("Unknown instruction class"); case S_BUFFER_LOAD_IMM: @@ -959,76 +1220,47 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) { case 4: return AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM; } + case MIMG: + assert("No overlaps" && (countPopulation(CI.DMask0 | CI.DMask1) == Width)); + return AMDGPU::getMaskedMIMGOp(CI.I->getOpcode(), Width); } } std::pair SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI) { - if (CI.Offset0 > CI.Offset1) { - switch (CI.Width0) { - default: - return std::make_pair(0, 0); - case 1: - switch (CI.Width1) { - default: - return std::make_pair(0, 0); - case 1: - return std::make_pair(AMDGPU::sub1, AMDGPU::sub0); - case 2: - return std::make_pair(AMDGPU::sub2, AMDGPU::sub0_sub1); - case 3: - return std::make_pair(AMDGPU::sub3, AMDGPU::sub0_sub1_sub2); - } - case 2: - switch (CI.Width1) { - default: - return std::make_pair(0, 0); - case 1: - return std::make_pair(AMDGPU::sub1_sub2, AMDGPU::sub0); - case 2: - return std::make_pair(AMDGPU::sub2_sub3, AMDGPU::sub0_sub1); - } - case 3: - switch (CI.Width1) { - default: - return std::make_pair(0, 0); - case 1: - return std::make_pair(AMDGPU::sub1_sub2_sub3, AMDGPU::sub0); - } - } + + if (CI.Width0 == 0 || CI.Width0 == 0 || CI.Width0 + CI.Width1 > 4) + return std::make_pair(0, 0); + + bool ReverseOrder; + if (CI.InstClass == MIMG) { + assert((countPopulation(CI.DMask0 | CI.DMask1) == CI.Width0 + CI.Width1) && + "No overlaps"); + ReverseOrder = CI.DMask0 > CI.DMask1; + } else + ReverseOrder = CI.Offset0 > CI.Offset1; + + static const unsigned Idxs[4][4] = { + {AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3}, + {AMDGPU::sub1, AMDGPU::sub1_sub2, AMDGPU::sub1_sub2_sub3, 0}, + {AMDGPU::sub2, AMDGPU::sub2_sub3, 0, 0}, + {AMDGPU::sub3, 0, 0, 0}, + }; + unsigned Idx0; + unsigned Idx1; + + assert(CI.Width0 >= 1 && CI.Width0 <= 3); + assert(CI.Width1 >= 1 && CI.Width1 <= 3); + + if (ReverseOrder) { + Idx1 = Idxs[0][CI.Width1 - 1]; + Idx0 = Idxs[CI.Width1][CI.Width0 - 1]; } else { - switch (CI.Width0) { - default: - return std::make_pair(0, 0); - case 1: - switch (CI.Width1) { - default: - return std::make_pair(0, 0); - case 1: - return std::make_pair(AMDGPU::sub0, AMDGPU::sub1); - case 2: - return std::make_pair(AMDGPU::sub0, AMDGPU::sub1_sub2); - case 3: - return std::make_pair(AMDGPU::sub0, AMDGPU::sub1_sub2_sub3); - } - case 2: - switch (CI.Width1) { - default: - return std::make_pair(0, 0); - case 1: - return std::make_pair(AMDGPU::sub0_sub1, AMDGPU::sub2); - case 2: - return std::make_pair(AMDGPU::sub0_sub1, AMDGPU::sub2_sub3); - } - case 3: - switch (CI.Width1) { - default: - return std::make_pair(0, 0); - case 1: - return std::make_pair(AMDGPU::sub0_sub1_sub2, AMDGPU::sub3); - } - } + Idx0 = Idxs[0][CI.Width0 - 1]; + Idx1 = Idxs[CI.Width0][CI.Width1 - 1]; } + + return std::make_pair(Idx0, Idx1); } const TargetRegisterClass * @@ -1040,7 +1272,7 @@ SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI) { case 2: return &AMDGPU::SReg_64_XEXECRegClass; case 4: - return &AMDGPU::SReg_128RegClass; + return &AMDGPU::SGPR_128RegClass; case 8: return &AMDGPU::SReg_256RegClass; case 16: @@ -1073,7 +1305,7 @@ SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) { // Copy to the new source register. const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI); - unsigned SrcReg = MRI->createVirtualRegister(SuperRC); + Register SrcReg = MRI->createVirtualRegister(SuperRC); const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); const auto *Src1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdata); @@ -1087,35 +1319,45 @@ SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) { auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode)) .addReg(SrcReg, RegState::Kill); - const unsigned Regs = getRegs(Opcode); + const unsigned Regs = getRegs(Opcode, *TII); if (Regs & VADDR) MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr)); - MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) - .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) - .addImm(std::min(CI.Offset0, CI.Offset1)) // offset - .addImm(CI.GLC0) // glc - .addImm(CI.SLC0) // slc - .addImm(0) // tfe - .addImm(CI.DLC0) // dlc - .cloneMergedMemRefs({&*CI.I, &*CI.Paired}); + + // It shouldn't be possible to get this far if the two instructions + // don't have a single memoperand, because MachineInstr::mayAlias() + // will return true if this is the case. + assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand()); + + const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); + const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin(); + + MachineInstr *New = + MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) + .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) + .addImm(std::min(CI.Offset0, CI.Offset1)) // offset + .addImm(CI.GLC0) // glc + .addImm(CI.SLC0) // slc + .addImm(0) // tfe + .addImm(CI.DLC0) // dlc + .addImm(0) // swz + .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); moveInstsAfter(MIB, CI.InstsToMove); - MachineBasicBlock::iterator Next = std::next(CI.I); CI.I->eraseFromParent(); CI.Paired->eraseFromParent(); - return Next; + return New; } MachineOperand -SILoadStoreOptimizer::createRegOrImm(int32_t Val, MachineInstr &MI) { +SILoadStoreOptimizer::createRegOrImm(int32_t Val, MachineInstr &MI) const { APInt V(32, Val, true); if (TII->isInlineConstant(V)) return MachineOperand::CreateImm(Val); - unsigned Reg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + Register Reg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); MachineInstr *Mov = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), Reg) @@ -1127,7 +1369,7 @@ SILoadStoreOptimizer::createRegOrImm(int32_t Val, MachineInstr &MI) { // Compute base address using Addr and return the final register. unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI, - const MemAddress &Addr) { + const MemAddress &Addr) const { MachineBasicBlock *MBB = MI.getParent(); MachineBasicBlock::iterator MBBI = MI.getIterator(); DebugLoc DL = MI.getDebugLoc(); @@ -1146,11 +1388,11 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI, createRegOrImm(static_cast(Addr.Offset >> 32), MI); const auto *CarryRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID); - unsigned CarryReg = MRI->createVirtualRegister(CarryRC); - unsigned DeadCarryReg = MRI->createVirtualRegister(CarryRC); + Register CarryReg = MRI->createVirtualRegister(CarryRC); + Register DeadCarryReg = MRI->createVirtualRegister(CarryRC); - unsigned DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); MachineInstr *LoHalf = BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_I32_e64), DestSub0) .addReg(CarryReg, RegState::Define) @@ -1170,7 +1412,7 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI, (void)HiHalf; LLVM_DEBUG(dbgs() << " "; HiHalf->dump();); - unsigned FullDestReg = MRI->createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register FullDestReg = MRI->createVirtualRegister(&AMDGPU::VReg_64RegClass); MachineInstr *FullBase = BuildMI(*MBB, MBBI, DL, TII->get(TargetOpcode::REG_SEQUENCE), FullDestReg) .addReg(DestSub0) @@ -1186,13 +1428,13 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI, // Update base and offset with the NewBase and NewOffset in MI. void SILoadStoreOptimizer::updateBaseAndOffset(MachineInstr &MI, unsigned NewBase, - int32_t NewOffset) { + int32_t NewOffset) const { TII->getNamedOperand(MI, AMDGPU::OpName::vaddr)->setReg(NewBase); TII->getNamedOperand(MI, AMDGPU::OpName::offset)->setImm(NewOffset); } Optional -SILoadStoreOptimizer::extractConstOffset(const MachineOperand &Op) { +SILoadStoreOptimizer::extractConstOffset(const MachineOperand &Op) const { if (Op.isImm()) return Op.getImm(); @@ -1218,7 +1460,7 @@ SILoadStoreOptimizer::extractConstOffset(const MachineOperand &Op) { // %Base:vreg_64 = // REG_SEQUENCE %LO:vgpr_32, %subreg.sub0, %HI:vgpr_32, %subreg.sub1 void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base, - MemAddress &Addr) { + MemAddress &Addr) const { if (!Base.isReg()) return; @@ -1273,15 +1515,16 @@ void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base bool SILoadStoreOptimizer::promoteConstantOffsetToImm( MachineInstr &MI, MemInfoMap &Visited, - SmallPtrSet &AnchorList) { + SmallPtrSet &AnchorList) const { - // TODO: Support flat and scratch. - if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0 || - TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != NULL) + if (!(MI.mayLoad() ^ MI.mayStore())) return false; - // TODO: Support Store. - if (!MI.mayLoad()) + // TODO: Support flat and scratch. + if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0) + return false; + + if (MI.mayLoad() && TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != NULL) return false; if (AnchorList.count(&MI)) @@ -1418,100 +1661,166 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm( return false; } -// Scan through looking for adjacent LDS operations with constant offsets from -// the same base register. We rely on the scheduler to do the hard work of -// clustering nearby loads, and assume these are all adjacent. -bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) { - bool Modified = false; +void SILoadStoreOptimizer::addInstToMergeableList(const CombineInfo &CI, + std::list > &MergeableInsts) const { + for (std::list &AddrList : MergeableInsts) { + if (AddrList.front().hasSameBaseAddress(*CI.I) && + AddrList.front().InstClass == CI.InstClass) { + AddrList.emplace_back(CI); + return; + } + } + // Base address not found, so add a new list. + MergeableInsts.emplace_back(1, CI); +} + +bool SILoadStoreOptimizer::collectMergeableInsts(MachineBasicBlock &MBB, + std::list > &MergeableInsts) const { + bool Modified = false; // Contain the list MemInfoMap Visited; // Contains the list of instructions for which constant offsets are being // promoted to the IMM. SmallPtrSet AnchorList; - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) { - MachineInstr &MI = *I; - + // Sort potential mergeable instructions into lists. One list per base address. + for (MachineInstr &MI : MBB.instrs()) { + // We run this before checking if an address is mergeable, because it can produce + // better code even if the instructions aren't mergeable. if (promoteConstantOffsetToImm(MI, Visited, AnchorList)) Modified = true; + const InstClassEnum InstClass = getInstClass(MI.getOpcode(), *TII); + if (InstClass == UNKNOWN) + continue; + // Don't combine if volatile. - if (MI.hasOrderedMemoryRef()) { - ++I; + if (MI.hasOrderedMemoryRef()) + continue; + + CombineInfo CI; + CI.setMI(MI, *TII, *STM); + + if (!CI.hasMergeableAddress(*MRI)) + continue; + + addInstToMergeableList(CI, MergeableInsts); + } + return Modified; +} + +// Scan through looking for adjacent LDS operations with constant offsets from +// the same base register. We rely on the scheduler to do the hard work of +// clustering nearby loads, and assume these are all adjacent. +bool SILoadStoreOptimizer::optimizeBlock( + std::list > &MergeableInsts) { + bool Modified = false; + + for (std::list &MergeList : MergeableInsts) { + if (MergeList.size() < 2) + continue; + + bool OptimizeListAgain = false; + if (!optimizeInstsWithSameBaseAddr(MergeList, OptimizeListAgain)) { + // We weren't able to make any changes, so clear the list so we don't + // process the same instructions the next time we try to optimize this + // block. + MergeList.clear(); continue; } - const unsigned Opc = MI.getOpcode(); + // We made changes, but also determined that there were no more optimization + // opportunities, so we don't need to reprocess the list + if (!OptimizeListAgain) + MergeList.clear(); - CombineInfo CI; - CI.I = I; - CI.InstClass = getInstClass(Opc); + OptimizeAgain |= OptimizeListAgain; + Modified = true; + } + return Modified; +} + +void +SILoadStoreOptimizer::removeCombinedInst(std::list &MergeList, + const MachineInstr &MI) { + + for (auto CI = MergeList.begin(), E = MergeList.end(); CI != E; ++CI) { + if (&*CI->I == &MI) { + MergeList.erase(CI); + return; + } + } +} + +bool +SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr( + std::list &MergeList, + bool &OptimizeListAgain) { + bool Modified = false; + for (auto I = MergeList.begin(); I != MergeList.end(); ++I) { + CombineInfo &CI = *I; switch (CI.InstClass) { default: break; case DS_READ: - CI.EltSize = - (Opc == AMDGPU::DS_READ_B64 || Opc == AMDGPU::DS_READ_B64_gfx9) ? 8 - : 4; if (findMatchingInst(CI)) { Modified = true; - I = mergeRead2Pair(CI); - } else { - ++I; + removeCombinedInst(MergeList, *CI.Paired); + MachineBasicBlock::iterator NewMI = mergeRead2Pair(CI); + CI.setMI(NewMI, *TII, *STM); } - continue; + break; case DS_WRITE: - CI.EltSize = - (Opc == AMDGPU::DS_WRITE_B64 || Opc == AMDGPU::DS_WRITE_B64_gfx9) ? 8 - : 4; if (findMatchingInst(CI)) { Modified = true; - I = mergeWrite2Pair(CI); - } else { - ++I; + removeCombinedInst(MergeList, *CI.Paired); + MachineBasicBlock::iterator NewMI = mergeWrite2Pair(CI); + CI.setMI(NewMI, *TII, *STM); } - continue; + break; case S_BUFFER_LOAD_IMM: - CI.EltSize = AMDGPU::getSMRDEncodedOffset(*STM, 4); if (findMatchingInst(CI)) { Modified = true; - I = mergeSBufferLoadImmPair(CI); - OptimizeAgain |= (CI.Width0 + CI.Width1) < 16; - } else { - ++I; + removeCombinedInst(MergeList, *CI.Paired); + MachineBasicBlock::iterator NewMI = mergeSBufferLoadImmPair(CI); + CI.setMI(NewMI, *TII, *STM); + OptimizeListAgain |= (CI.Width0 + CI.Width1) < 16; } - continue; - case BUFFER_LOAD_OFFEN: - case BUFFER_LOAD_OFFSET: - case BUFFER_LOAD_OFFEN_exact: - case BUFFER_LOAD_OFFSET_exact: - CI.EltSize = 4; + break; + case BUFFER_LOAD: if (findMatchingInst(CI)) { Modified = true; - I = mergeBufferLoadPair(CI); - OptimizeAgain |= (CI.Width0 + CI.Width1) < 4; - } else { - ++I; + removeCombinedInst(MergeList, *CI.Paired); + MachineBasicBlock::iterator NewMI = mergeBufferLoadPair(CI); + CI.setMI(NewMI, *TII, *STM); + OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4; } - continue; - case BUFFER_STORE_OFFEN: - case BUFFER_STORE_OFFSET: - case BUFFER_STORE_OFFEN_exact: - case BUFFER_STORE_OFFSET_exact: - CI.EltSize = 4; + break; + case BUFFER_STORE: if (findMatchingInst(CI)) { Modified = true; - I = mergeBufferStorePair(CI); - OptimizeAgain |= (CI.Width0 + CI.Width1) < 4; - } else { - ++I; + removeCombinedInst(MergeList, *CI.Paired); + MachineBasicBlock::iterator NewMI = mergeBufferStorePair(CI); + CI.setMI(NewMI, *TII, *STM); + OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4; } - continue; + break; + case MIMG: + if (findMatchingInst(CI)) { + Modified = true; + removeCombinedInst(MergeList, *CI.Paired); + MachineBasicBlock::iterator NewMI = mergeImagePair(CI); + CI.setMI(NewMI, *TII, *STM); + OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4; + } + break; } - - ++I; + // Clear the InstsToMove after we have finished searching so we don't have + // stale values left over if we search for this CI again in another pass + // over the block. + CI.InstsToMove.clear(); } return Modified; @@ -1537,10 +1846,14 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) { bool Modified = false; + for (MachineBasicBlock &MBB : MF) { + std::list > MergeableInsts; + // First pass: Collect list of all instructions we know how to merge. + Modified |= collectMergeableInsts(MBB, MergeableInsts); do { OptimizeAgain = false; - Modified |= optimizeBlock(MBB); + Modified |= optimizeBlock(MergeableInsts); } while (OptimizeAgain); } diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp index 78f409cd955..6f9abd3a8d9 100644 --- a/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -98,6 +98,8 @@ private: void emitLoop(MachineInstr &MI); void emitEndCf(MachineInstr &MI); + Register getSaveExec(MachineInstr* MI); + void findMaskOperands(MachineInstr &MI, unsigned OpNo, SmallVectorImpl &Src) const; @@ -144,7 +146,7 @@ char &llvm::SILowerControlFlowID = SILowerControlFlow::ID; static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI, const SIInstrInfo *TII) { - unsigned SaveExecReg = MI.getOperand(0).getReg(); + Register SaveExecReg = MI.getOperand(0).getReg(); auto U = MRI->use_instr_nodbg_begin(SaveExecReg); if (U == MRI->use_instr_nodbg_end() || @@ -175,17 +177,31 @@ static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI, return true; } +Register SILowerControlFlow::getSaveExec(MachineInstr *MI) { + MachineBasicBlock *MBB = MI->getParent(); + MachineOperand &SaveExec = MI->getOperand(0); + assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister); + + Register SaveExecReg = SaveExec.getReg(); + unsigned FalseTermOpc = + TII->isWave32() ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term; + MachineBasicBlock::iterator I = (MI); + MachineBasicBlock::iterator J = std::next(I); + if (J != MBB->end() && J->getOpcode() == FalseTermOpc && + J->getOperand(1).isReg() && J->getOperand(1).getReg() == SaveExecReg) { + SaveExecReg = J->getOperand(0).getReg(); + J->eraseFromParent(); + } + return SaveExecReg; +} + void SILowerControlFlow::emitIf(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock::iterator I(&MI); - - MachineOperand &SaveExec = MI.getOperand(0); - MachineOperand &Cond = MI.getOperand(1); - assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister && - Cond.getSubReg() == AMDGPU::NoSubRegister); - - Register SaveExecReg = SaveExec.getReg(); + Register SaveExecReg = getSaveExec(&MI); + MachineOperand& Cond = MI.getOperand(1); + assert(Cond.getSubReg() == AMDGPU::NoSubRegister); MachineOperand &ImpDefSCC = MI.getOperand(4); assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef()); @@ -204,7 +220,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) { .addReg(Exec) .addReg(Exec, RegState::ImplicitDefine); - unsigned Tmp = MRI->createVirtualRegister(BoolRC); + Register Tmp = MRI->createVirtualRegister(BoolRC); MachineInstr *And = BuildMI(MBB, I, DL, TII->get(AndOpc), Tmp) @@ -266,8 +282,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); - Register DstReg = MI.getOperand(0).getReg(); - assert(MI.getOperand(0).getSubReg() == AMDGPU::NoSubRegister); + Register DstReg = getSaveExec(&MI); bool ExecModified = MI.getOperand(3).getImm() != 0; MachineBasicBlock::iterator Start = MBB.begin(); @@ -339,7 +354,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) { void SILowerControlFlow::emitIfBreak(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); - auto Dst = MI.getOperand(0).getReg(); + auto Dst = getSaveExec(&MI); // Skip ANDing with exec if the break condition is already masked by exec // because it is a V_CMP in the same basic block. (We know the break @@ -400,13 +415,17 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) { void SILowerControlFlow::emitEndCf(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + unsigned CFMask = MI.getOperand(0).getReg(); + MachineInstr *Def = MRI.getUniqueVRegDef(CFMask); const DebugLoc &DL = MI.getDebugLoc(); - MachineBasicBlock::iterator InsPt = MBB.begin(); - MachineInstr *NewMI = - BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec) - .addReg(Exec) - .add(MI.getOperand(0)); + MachineBasicBlock::iterator InsPt = + Def && Def->getParent() == &MBB ? std::next(MachineBasicBlock::iterator(Def)) + : MBB.begin(); + MachineInstr *NewMI = BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec) + .addReg(Exec) + .add(MI.getOperand(0)); if (LIS) LIS->ReplaceMachineInstrInMaps(MI, *NewMI); @@ -422,7 +441,7 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) { void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo, SmallVectorImpl &Src) const { MachineOperand &Op = MI.getOperand(OpNo); - if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) { + if (!Op.isReg() || !Register::isVirtualRegister(Op.getReg())) { Src.push_back(Op); return; } @@ -442,8 +461,7 @@ void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo, for (const auto &SrcOp : Def->explicit_operands()) if (SrcOp.isReg() && SrcOp.isUse() && - (TargetRegisterInfo::isVirtualRegister(SrcOp.getReg()) || - SrcOp.getReg() == Exec)) + (Register::isVirtualRegister(SrcOp.getReg()) || SrcOp.getReg() == Exec)) Src.push_back(SrcOp); } @@ -466,7 +484,7 @@ void SILowerControlFlow::combineMasks(MachineInstr &MI) { else if (Ops[1].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1; else return; - unsigned Reg = MI.getOperand(OpToReplace).getReg(); + Register Reg = MI.getOperand(OpToReplace).getReg(); MI.RemoveOperand(OpToReplace); MI.addOperand(Ops[UniqueOpndIdx]); if (MRI->use_empty(Reg)) diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp index 1c0f836f07e..b4541253635 100644 --- a/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -96,7 +96,7 @@ private: getSaluInsertionAtEnd(MachineBasicBlock &MBB) const; bool isVreg1(unsigned Reg) const { - return TargetRegisterInfo::isVirtualRegister(Reg) && + return Register::isVirtualRegister(Reg) && MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass; } @@ -489,6 +489,15 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &TheMF) { return true; } +#ifndef NDEBUG +static bool isVRegCompatibleReg(const SIRegisterInfo &TRI, + const MachineRegisterInfo &MRI, + Register Reg) { + unsigned Size = TRI.getRegSizeInBits(Reg, MRI); + return Size == 1 || Size == 32; +} +#endif + void SILowerI1Copies::lowerCopiesFromI1() { SmallVector DeadCopies; @@ -497,8 +506,8 @@ void SILowerI1Copies::lowerCopiesFromI1() { if (MI.getOpcode() != AMDGPU::COPY) continue; - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); if (!isVreg1(SrcReg)) continue; @@ -509,7 +518,7 @@ void SILowerI1Copies::lowerCopiesFromI1() { LLVM_DEBUG(dbgs() << "Lower copy from i1: " << MI); DebugLoc DL = MI.getDebugLoc(); - assert(TII->getRegisterInfo().getRegSizeInBits(DstReg, *MRI) == 32); + assert(isVRegCompatibleReg(TII->getRegisterInfo(), *MRI, DstReg)); assert(!MI.getOperand(0).getSubReg()); ConstrainRegs.insert(SrcReg); @@ -544,7 +553,7 @@ void SILowerI1Copies::lowerPhis() { LF.initialize(MBB); for (MachineInstr &MI : MBB.phis()) { - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); if (!isVreg1(DstReg)) continue; @@ -556,7 +565,7 @@ void SILowerI1Copies::lowerPhis() { // Collect incoming values. for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { assert(i + 1 < MI.getNumOperands()); - unsigned IncomingReg = MI.getOperand(i).getReg(); + Register IncomingReg = MI.getOperand(i).getReg(); MachineBasicBlock *IncomingMBB = MI.getOperand(i + 1).getMBB(); MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg); @@ -580,12 +589,12 @@ void SILowerI1Copies::lowerPhis() { // Phis in a loop that are observed outside the loop receive a simple but // conservatively correct treatment. - MachineBasicBlock *PostDomBound = &MBB; - for (MachineInstr &Use : MRI->use_instructions(DstReg)) { - PostDomBound = - PDT->findNearestCommonDominator(PostDomBound, Use.getParent()); - } + std::vector DomBlocks = {&MBB}; + for (MachineInstr &Use : MRI->use_instructions(DstReg)) + DomBlocks.push_back(Use.getParent()); + MachineBasicBlock *PostDomBound = + PDT->findNearestCommonDominator(DomBlocks); unsigned FoundLoopLevel = LF.findLoop(PostDomBound); SSAUpdater.Initialize(DstReg); @@ -669,7 +678,7 @@ void SILowerI1Copies::lowerCopiesToI1() { MI.getOpcode() != AMDGPU::COPY) continue; - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); if (!isVreg1(DstReg)) continue; @@ -686,10 +695,10 @@ void SILowerI1Copies::lowerCopiesToI1() { continue; DebugLoc DL = MI.getDebugLoc(); - unsigned SrcReg = MI.getOperand(1).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); assert(!MI.getOperand(1).getSubReg()); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || + if (!Register::isVirtualRegister(SrcReg) || (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) { assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32); unsigned TmpReg = createLaneMaskReg(*MF); @@ -702,12 +711,12 @@ void SILowerI1Copies::lowerCopiesToI1() { // Defs in a loop that are observed outside the loop must be transformed // into appropriate bit manipulation. - MachineBasicBlock *PostDomBound = &MBB; - for (MachineInstr &Use : MRI->use_instructions(DstReg)) { - PostDomBound = - PDT->findNearestCommonDominator(PostDomBound, Use.getParent()); - } + std::vector DomBlocks = {&MBB}; + for (MachineInstr &Use : MRI->use_instructions(DstReg)) + DomBlocks.push_back(Use.getParent()); + MachineBasicBlock *PostDomBound = + PDT->findNearestCommonDominator(DomBlocks); unsigned FoundLoopLevel = LF.findLoop(PostDomBound); if (FoundLoopLevel) { SSAUpdater.Initialize(DstReg); @@ -734,7 +743,7 @@ bool SILowerI1Copies::isConstantLaneMask(unsigned Reg, bool &Val) const { break; Reg = MI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return false; if (!isLaneMaskReg(Reg)) return false; diff --git a/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index a8204747337..714d403a3e8 100644 --- a/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -278,8 +278,8 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr); int FI = MI.getOperand(FIOp).getIndex(); - unsigned VReg = TII->getNamedOperand(MI, AMDGPU::OpName::vdata) - ->getReg(); + Register VReg = + TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI, TRI->isAGPR(MRI, VReg))) { TRI->eliminateFrameIndex(MI, 0, FIOp, nullptr); diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 46da974a2f4..7dd0f11c95d 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -53,8 +53,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); WavesPerEU = ST.getWavesPerEU(F); - Occupancy = getMaxWavesPerEU(); - limitOccupancy(MF); + Occupancy = ST.computeOccupancy(MF, getLDSSize()); CallingConv::ID CC = F.getCallingConv(); if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { @@ -190,7 +189,7 @@ unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( const SIRegisterInfo &TRI) { ArgInfo.PrivateSegmentBuffer = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( - getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass)); + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass)); NumUserSGPRs += 4; return ArgInfo.PrivateSegmentBuffer.getRegister(); } @@ -487,6 +486,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()), MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()), + HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()), ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)), ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)), FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)), @@ -501,8 +501,9 @@ void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) { bool SIMachineFunctionInfo::initializeBaseYamlFields( const yaml::SIMachineFunctionInfo &YamlMFI) { ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize; - MaxKernArgAlign = YamlMFI.MaxKernArgAlign; + MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign); LDSSize = YamlMFI.LDSSize; + HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress; IsEntryFunction = YamlMFI.IsEntryFunction; NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath; MemoryBound = YamlMFI.MemoryBound; diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index f19b20ceb5d..7d70c786b59 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -265,6 +265,7 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { bool NoSignedZerosFPMath = false; bool MemoryBound = false; bool WaveLimiter = false; + uint32_t HighBitsOf32BitAddress = 0; StringValue ScratchRSrcReg = "$private_rsrc_reg"; StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg"; @@ -302,6 +303,8 @@ template <> struct MappingTraits { StringValue("$sp_reg")); YamlIO.mapOptional("argumentInfo", MFI.ArgInfo); YamlIO.mapOptional("mode", MFI.Mode, SIMode()); + YamlIO.mapOptional("highBitsOf32BitAddress", + MFI.HighBitsOf32BitAddress, 0u); } }; @@ -670,7 +673,7 @@ public: return GITPtrHigh; } - unsigned get32BitAddressHighBits() const { + uint32_t get32BitAddressHighBits() const { return HighBitsOf32BitAddress; } @@ -873,7 +876,7 @@ public: assert(BufferRsrc); auto PSV = BufferPSVs.try_emplace( BufferRsrc, - llvm::make_unique(TII)); + std::make_unique(TII)); return PSV.first->second.get(); } @@ -882,14 +885,14 @@ public: assert(ImgRsrc); auto PSV = ImagePSVs.try_emplace( ImgRsrc, - llvm::make_unique(TII)); + std::make_unique(TII)); return PSV.first->second.get(); } const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) { if (!GWSResourcePSV) { GWSResourcePSV = - llvm::make_unique(TII); + std::make_unique(TII); } return GWSResourcePSV.get(); diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp index ebbdf80f956..c072ba6b2d1 100644 --- a/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -348,7 +348,7 @@ void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock, // Do not Track Physical Registers, because it messes up. for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) { - if (TargetRegisterInfo::isVirtualRegister(RegMaskPair.RegUnit)) + if (Register::isVirtualRegister(RegMaskPair.RegUnit)) LiveInRegs.insert(RegMaskPair.RegUnit); } LiveOutRegs.clear(); @@ -376,7 +376,7 @@ void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock, // The use of findDefBetween removes the case 4. for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) { unsigned Reg = RegMaskPair.RegUnit; - if (TargetRegisterInfo::isVirtualRegister(Reg) && + if (Register::isVirtualRegister(Reg) && isDefBetween(Reg, LIS->getInstructionIndex(*BeginBlock).getRegSlot(), LIS->getInstructionIndex(*EndBlock).getRegSlot(), MRI, LIS)) { @@ -1228,7 +1228,7 @@ void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVaria unsigned Color = CurrentColoring[SU->NodeNum]; if (RealID.find(Color) == RealID.end()) { int ID = CurrentBlocks.size(); - BlockPtrs.push_back(llvm::make_unique(DAG, this, ID)); + BlockPtrs.push_back(std::make_unique(DAG, this, ID)); CurrentBlocks.push_back(BlockPtrs.rbegin()->get()); RealID[Color] = ID; } @@ -1690,7 +1690,7 @@ SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() { void SIScheduleBlockScheduler::addLiveRegs(std::set &Regs) { for (unsigned Reg : Regs) { // For now only track virtual registers. - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) continue; // If not already in the live set, then add it. (void) LiveRegs.insert(Reg); @@ -1750,7 +1750,7 @@ SIScheduleBlockScheduler::checkRegUsageImpact(std::set &InRegs, for (unsigned Reg : InRegs) { // For now only track virtual registers. - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) continue; if (LiveRegsConsumers[Reg] > 1) continue; @@ -1762,7 +1762,7 @@ SIScheduleBlockScheduler::checkRegUsageImpact(std::set &InRegs, for (unsigned Reg : OutRegs) { // For now only track virtual registers. - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) continue; PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg); for (; PSetI.isValid(); ++PSetI) { @@ -1801,7 +1801,7 @@ SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant, // SIScheduleDAGMI // SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) : - ScheduleDAGMILive(C, llvm::make_unique(C)) { + ScheduleDAGMILive(C, std::make_unique(C)) { SITII = static_cast(TII); SITRI = static_cast(TRI); @@ -1913,7 +1913,7 @@ SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End, for (_Iterator RegI = First; RegI != End; ++RegI) { unsigned Reg = *RegI; // For now only track virtual registers - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) continue; PSetIterator PSetI = MRI.getPressureSets(Reg); for (; PSetI.isValid(); ++PSetI) { diff --git a/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 4320e6c957a..e914573306a 100644 --- a/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -656,10 +656,10 @@ SICacheControl::SICacheControl(const GCNSubtarget &ST) { std::unique_ptr SICacheControl::create(const GCNSubtarget &ST) { GCNSubtarget::Generation Generation = ST.getGeneration(); if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS) - return make_unique(ST); + return std::make_unique(ST); if (Generation < AMDGPUSubtarget::GFX10) - return make_unique(ST); - return make_unique(ST, ST.isCuModeEnabled()); + return std::make_unique(ST); + return std::make_unique(ST, ST.isCuModeEnabled()); } bool SIGfx6CacheControl::enableLoadCacheBypass( diff --git a/lib/Target/AMDGPU/SIModeRegister.cpp b/lib/Target/AMDGPU/SIModeRegister.cpp index a5edd7b3554..52989a280e8 100644 --- a/lib/Target/AMDGPU/SIModeRegister.cpp +++ b/lib/Target/AMDGPU/SIModeRegister.cpp @@ -226,7 +226,7 @@ void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI, // - on exit we have set the Require, Change, and initial Exit modes. void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII) { - auto NewInfo = llvm::make_unique(); + auto NewInfo = std::make_unique(); MachineInstr *InsertionPoint = nullptr; // RequirePending is used to indicate whether we are collecting the initial // requirements for the block, and need to defer the first InsertionPoint to diff --git a/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index 3227bff2051..cc9b46a7558 100644 --- a/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -322,7 +322,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { continue; } - unsigned CopyFromExec = CopyFromExecInst->getOperand(0).getReg(); + Register CopyFromExec = CopyFromExecInst->getOperand(0).getReg(); MachineInstr *SaveExecInst = nullptr; SmallVector OtherUseInsts; diff --git a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index 7e10316eab9..fdd30db6a7c 100644 --- a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -211,7 +211,7 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB, return AMDGPU::NoRegister; MachineOperand *AndCC = &And->getOperand(1); - unsigned CmpReg = AndCC->getReg(); + Register CmpReg = AndCC->getReg(); unsigned CmpSubReg = AndCC->getSubReg(); if (CmpReg == ExecReg) { AndCC = &And->getOperand(2); @@ -234,7 +234,7 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB, if (!Op1->isReg() || !Op2->isImm() || Op2->getImm() != 1) return AMDGPU::NoRegister; - unsigned SelReg = Op1->getReg(); + Register SelReg = Op1->getReg(); auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, MRI, LIS); if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64) return AMDGPU::NoRegister; @@ -250,15 +250,16 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB, Op1->getImm() != 0 || Op2->getImm() != 1) return AMDGPU::NoRegister; - LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' - << *Cmp << '\t' << *And); + LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t' + << *And); - unsigned CCReg = CC->getReg(); + Register CCReg = CC->getReg(); LIS->RemoveMachineInstrFromMaps(*And); - MachineInstr *Andn2 = BuildMI(MBB, *And, And->getDebugLoc(), - TII->get(Andn2Opc), And->getOperand(0).getReg()) - .addReg(ExecReg) - .addReg(CCReg, 0, CC->getSubReg()); + MachineInstr *Andn2 = + BuildMI(MBB, *And, And->getDebugLoc(), TII->get(Andn2Opc), + And->getOperand(0).getReg()) + .addReg(ExecReg) + .addReg(CCReg, getUndefRegState(CC->isUndef()), CC->getSubReg()); And->eraseFromParent(); LIS->InsertMachineInstrInMaps(*Andn2); @@ -266,20 +267,19 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB, // Try to remove compare. Cmp value should not used in between of cmp // and s_and_b64 if VCC or just unused if any other register. - if ((TargetRegisterInfo::isVirtualRegister(CmpReg) && - MRI.use_nodbg_empty(CmpReg)) || + if ((Register::isVirtualRegister(CmpReg) && MRI.use_nodbg_empty(CmpReg)) || (CmpReg == CondReg && std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(), [&](const MachineInstr &MI) { - return MI.readsRegister(CondReg, TRI); }))) { + return MI.readsRegister(CondReg, TRI); + }))) { LLVM_DEBUG(dbgs() << "Erasing: " << *Cmp << '\n'); LIS->RemoveMachineInstrFromMaps(*Cmp); Cmp->eraseFromParent(); // Try to remove v_cndmask_b32. - if (TargetRegisterInfo::isVirtualRegister(SelReg) && - MRI.use_nodbg_empty(SelReg)) { + if (Register::isVirtualRegister(SelReg) && MRI.use_nodbg_empty(SelReg)) { LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n'); LIS->RemoveMachineInstrFromMaps(*Sel); @@ -413,7 +413,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { if (!SaveExec || !SaveExec->isFullCopy()) continue; - unsigned SavedExec = SaveExec->getOperand(0).getReg(); + Register SavedExec = SaveExec->getOperand(0).getReg(); bool SafeToReplace = true; for (auto& U : MRI.use_nodbg_instructions(SavedExec)) { if (U.getParent() != SaveExec->getParent()) { @@ -434,7 +434,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { if (Changed) { for (auto Reg : RecalcRegs) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { LIS->removeInterval(Reg); if (!MRI.reg_empty(Reg)) LIS->createAndComputeVirtRegInterval(Reg); diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 2d71abc0612..9b3b2436475 100644 --- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -574,16 +574,16 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) { MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - if (TRI->isPhysicalRegister(Src1->getReg()) || - TRI->isPhysicalRegister(Dst->getReg())) + if (Register::isPhysicalRegister(Src1->getReg()) || + Register::isPhysicalRegister(Dst->getReg())) break; if (Opcode == AMDGPU::V_LSHLREV_B32_e32 || Opcode == AMDGPU::V_LSHLREV_B32_e64) { - return make_unique( + return std::make_unique( Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD); } else { - return make_unique( + return std::make_unique( Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false, Opcode != AMDGPU::V_LSHRREV_B32_e32 && Opcode != AMDGPU::V_LSHRREV_B32_e64); @@ -613,15 +613,15 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) { MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - if (TRI->isPhysicalRegister(Src1->getReg()) || - TRI->isPhysicalRegister(Dst->getReg())) + if (Register::isPhysicalRegister(Src1->getReg()) || + Register::isPhysicalRegister(Dst->getReg())) break; if (Opcode == AMDGPU::V_LSHLREV_B16_e32 || Opcode == AMDGPU::V_LSHLREV_B16_e64) { - return make_unique(Dst, Src1, BYTE_1, UNUSED_PAD); + return std::make_unique(Dst, Src1, BYTE_1, UNUSED_PAD); } else { - return make_unique( + return std::make_unique( Src1, Dst, BYTE_1, false, false, Opcode != AMDGPU::V_LSHRREV_B16_e32 && Opcode != AMDGPU::V_LSHRREV_B16_e64); @@ -677,11 +677,11 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) { MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - if (TRI->isPhysicalRegister(Src0->getReg()) || - TRI->isPhysicalRegister(Dst->getReg())) + if (Register::isPhysicalRegister(Src0->getReg()) || + Register::isPhysicalRegister(Dst->getReg())) break; - return make_unique( + return std::make_unique( Src0, Dst, SrcSel, false, false, Opcode != AMDGPU::V_BFE_U32); } @@ -706,11 +706,11 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) { MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - if (TRI->isPhysicalRegister(ValSrc->getReg()) || - TRI->isPhysicalRegister(Dst->getReg())) + if (Register::isPhysicalRegister(ValSrc->getReg()) || + Register::isPhysicalRegister(Dst->getReg())) break; - return make_unique( + return std::make_unique( ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0); } @@ -840,7 +840,7 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) { MachineOperand *OrDst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); assert(OrDst && OrDst->isReg()); - return make_unique( + return std::make_unique( OrDst, OrSDWADef, OrOtherDef, DstSel); } @@ -1189,7 +1189,7 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, continue; } - unsigned VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), VGPR); if (Op.isImm()) diff --git a/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp index f9bfe96f65c..6cdd12d0e7b 100644 --- a/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp +++ b/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp @@ -90,12 +90,12 @@ bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) { if (!MO.isReg()) return false; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!TRI->isVGPR(*MRI, Reg)) return false; - if (TRI->isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return false; if (VRM->hasPhys(Reg)) @@ -124,14 +124,14 @@ void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { if (!MO.isReg()) continue; - const unsigned VirtReg = MO.getReg(); - if (TRI->isPhysicalRegister(VirtReg)) + const Register VirtReg = MO.getReg(); + if (Register::isPhysicalRegister(VirtReg)) continue; if (!VRM->hasPhys(VirtReg)) continue; - unsigned PhysReg = VRM->getPhys(VirtReg); + Register PhysReg = VRM->getPhys(VirtReg); const unsigned SubReg = MO.getSubReg(); if (SubReg != 0) { PhysReg = TRI->getSubReg(PhysReg, SubReg); @@ -149,7 +149,7 @@ void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { for (unsigned Reg : RegsToRewrite) { LIS->removeInterval(Reg); - const unsigned PhysReg = VRM->getPhys(Reg); + const Register PhysReg = VRM->getPhys(Reg); assert(PhysReg != 0); MFI->ReserveWWMRegister(PhysReg); } diff --git a/lib/Target/AMDGPU/SIProgramInfo.h b/lib/Target/AMDGPU/SIProgramInfo.h index 168f05f8fdd..7c039a54b57 100644 --- a/lib/Target/AMDGPU/SIProgramInfo.h +++ b/lib/Target/AMDGPU/SIProgramInfo.h @@ -41,6 +41,8 @@ struct SIProgramInfo { uint64_t ComputePGMRSrc2 = 0; uint32_t NumVGPR = 0; + uint32_t NumArchVGPR = 0; + uint32_t NumAccVGPR = 0; uint32_t NumSGPR = 0; uint32_t LDSSize = 0; bool FlatUsed = false; @@ -51,6 +53,9 @@ struct SIProgramInfo { // Number of VGPRs that meets number of waves per execution unit request. uint32_t NumVGPRsForWavesPerEU = 0; + // Final occupancy. + uint32_t Occupancy = 0; + // Whether there is recursion, dynamic allocas, indirect calls or some other // reason there may be statically unknown stack usage. bool DynamicCallStack = false; diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index f152deb2800..f58bc3060c4 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -48,11 +48,6 @@ void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg, } } -static cl::opt EnableSpillSGPRToSMEM( - "amdgpu-spill-sgpr-to-smem", - cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), - cl::init(false)); - static cl::opt EnableSpillSGPRToVGPR( "amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), @@ -61,17 +56,12 @@ static cl::opt EnableSpillSGPRToVGPR( SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) : AMDGPURegisterInfo(), + ST(ST), SGPRPressureSets(getNumRegPressureSets()), VGPRPressureSets(getNumRegPressureSets()), AGPRPressureSets(getNumRegPressureSets()), - SpillSGPRToVGPR(false), - SpillSGPRToSMEM(false), + SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) { - if (EnableSpillSGPRToSMEM && ST.hasScalarStores()) - SpillSGPRToSMEM = true; - else if (EnableSpillSGPRToVGPR) - SpillSGPRToVGPR = true; - unsigned NumRegPressureSets = getNumRegPressureSets(); SGPRSetID = NumRegPressureSets; @@ -118,11 +108,9 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) : unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( const MachineFunction &MF) const { - - const GCNSubtarget &ST = MF.getSubtarget(); unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4; unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); - return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); + return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass); } static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) { @@ -144,7 +132,6 @@ static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) { unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( const MachineFunction &MF) const { - const GCNSubtarget &ST = MF.getSubtarget(); unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF)); return AMDGPU::SGPR_32RegClass.getRegister(Reg); } @@ -202,8 +189,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AMDGPU::VCC_HI); } - const GCNSubtarget &ST = MF.getSubtarget(); - unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF); unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) { @@ -220,6 +205,14 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { reserveRegisterTuples(Reserved, Reg); } + // Reserve all the rest AGPRs if there are no instructions to use it. + if (!ST.hasMAIInsts()) { + for (unsigned i = 0; i < MaxNumVGPRs; ++i) { + unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i); + reserveRegisterTuples(Reserved, Reg); + } + } + const SIMachineFunctionInfo *MFI = MF.getInfo(); unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); @@ -293,32 +286,17 @@ bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const bool SIRegisterInfo::requiresFrameIndexScavenging( const MachineFunction &MF) const { - const MachineFrameInfo &MFI = MF.getFrameInfo(); - if (MFI.hasStackObjects()) - return true; - - // May need to deal with callee saved registers. - const SIMachineFunctionInfo *Info = MF.getInfo(); - return !Info->isEntryFunction(); + // Do not use frame virtual registers. They used to be used for SGPRs, but + // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the + // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a + // spill. + return false; } bool SIRegisterInfo::requiresFrameIndexReplacementScavenging( const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); - if (!MFI.hasStackObjects()) - return false; - - // The scavenger is used for large frames which may require finding a free - // register for large offsets. - if (!isUInt<12>(MFI.getStackSize())) - return true; - - // If using scalar stores, for spills, m0 is needed for the scalar store - // offset (pre-GFX9). m0 is unallocatable, so we can't create a virtual - // register for it during frame index elimination, so the scavenger is - // directly needed. - return MF.getSubtarget().hasScalarStores() && - MF.getInfo()->hasSpilledSGPRs(); + return MFI.hasStackObjects(); } bool SIRegisterInfo::requiresVirtualBaseRegisters( @@ -372,8 +350,7 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, DL = Ins->getDebugLoc(); MachineFunction *MF = MBB->getParent(); - const GCNSubtarget &Subtarget = MF->getSubtarget(); - const SIInstrInfo *TII = Subtarget.getInstrInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); if (Offset == 0) { BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg) @@ -382,9 +359,9 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, } MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) .addImm(Offset); @@ -399,11 +376,7 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const { - - MachineBasicBlock *MBB = MI.getParent(); - MachineFunction *MF = MBB->getParent(); - const GCNSubtarget &Subtarget = MF->getSubtarget(); - const SIInstrInfo *TII = Subtarget.getInstrInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); #ifndef NDEBUG // FIXME: Is it possible to be storing a frame index to itself? @@ -419,12 +392,15 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, #endif MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr); +#ifndef NDEBUG + MachineBasicBlock *MBB = MI.getParent(); + MachineFunction *MF = MBB->getParent(); +#endif assert(FIOp && FIOp->isFI() && "frame index must be address operand"); assert(TII->isMUBUF(MI)); assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() == - MF->getInfo()->getFrameOffsetReg() && - "should only be seeing frame offset relative FrameIndex"); - + MF->getInfo()->getStackPtrOffsetReg() && + "should only be seeing stack pointer offset relative FrameIndex"); MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset); int64_t NewOffset = OffsetOp->getImm() + Offset; @@ -564,7 +540,8 @@ static int getOffsetMUBUFLoad(unsigned Opc) { } } -static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI, +static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, + MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, @@ -572,7 +549,6 @@ static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MI->getParent()->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); - const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane); @@ -595,11 +571,12 @@ static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI, // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not // need to handle the case where an SGPR may need to be spilled while spilling. -static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, +static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset) { + const SIInstrInfo *TII = ST.getInstrInfo(); MachineBasicBlock *MBB = MI->getParent(); const DebugLoc &DL = MI->getDebugLoc(); bool IsStore = MI->mayStore(); @@ -611,7 +588,7 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, return false; const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); - if (spillVGPRtoAGPR(MI, Index, 0, Reg->getReg(), false).getInstr()) + if (spillVGPRtoAGPR(ST, MI, Index, 0, Reg->getReg(), false).getInstr()) return true; MachineInstrBuilder NewMI = @@ -624,6 +601,7 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, .addImm(0) // slc .addImm(0) // tfe .addImm(0) // dlc + .addImm(0) // swz .cloneMemRefs(*MI); const MachineOperand *VDataIn = TII->getNamedOperand(*MI, @@ -645,7 +623,6 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, RegScavenger *RS) const { MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MI->getParent()->getParent(); - const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const MachineFrameInfo &MFI = MF->getFrameInfo(); @@ -707,8 +684,9 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, } for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) { - unsigned SubReg = NumSubRegs == 1 ? - ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i)); + Register SubReg = NumSubRegs == 1 + ? Register(ValueReg) + : getSubReg(ValueReg, getSubRegFromChannel(i)); unsigned SOffsetRegState = 0; unsigned SrcDstRegState = getDefRegState(!IsStore); @@ -718,7 +696,7 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, SrcDstRegState |= getKillRegState(IsKill); } - auto MIB = spillVGPRtoAGPR(MI, Index, i, SubReg, IsKill); + auto MIB = spillVGPRtoAGPR(ST, MI, Index, i, SubReg, IsKill); if (!MIB.getInstr()) { unsigned FinalReg = SubReg; @@ -743,6 +721,7 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, .addImm(0) // slc .addImm(0) // tfe .addImm(0) // dlc + .addImm(0) // swz .addMemOperand(NewMMO); if (!IsStore && TmpReg != AMDGPU::NoRegister) @@ -763,22 +742,6 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, } } -static std::pair getSpillEltSize(unsigned SuperRegSize, - bool Store) { - if (SuperRegSize % 16 == 0) { - return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR : - AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR }; - } - - if (SuperRegSize % 8 == 0) { - return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR : - AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR }; - } - - return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR : - AMDGPU::S_BUFFER_LOAD_DWORD_SGPR}; -} - bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS, @@ -794,98 +757,37 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, if (OnlyToVGPR && !SpillToVGPR) return false; - MachineRegisterInfo &MRI = MF->getRegInfo(); - const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); - unsigned SuperReg = MI->getOperand(0).getReg(); + Register SuperReg = MI->getOperand(0).getReg(); bool IsKill = MI->getOperand(0).isKill(); const DebugLoc &DL = MI->getDebugLoc(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - bool SpillToSMEM = spillSGPRToSMEM(); - if (SpillToSMEM && OnlyToVGPR) - return false; - - Register FrameReg = getFrameRegister(*MF); - assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() && SuperReg != MFI->getFrameOffsetReg() && SuperReg != MFI->getScratchWaveOffsetReg())); assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); - unsigned OffsetReg = AMDGPU::M0; unsigned M0CopyReg = AMDGPU::NoRegister; - if (SpillToSMEM) { - if (RS->isRegUsed(AMDGPU::M0)) { - M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg) - .addReg(AMDGPU::M0); - } - } - - unsigned ScalarStoreOp; unsigned EltSize = 4; const TargetRegisterClass *RC = getPhysRegClass(SuperReg); - if (SpillToSMEM && isSGPRClass(RC)) { - // XXX - if private_element_size is larger than 4 it might be useful to be - // able to spill wider vmem spills. - std::tie(EltSize, ScalarStoreOp) = - getSpillEltSize(getRegSizeInBits(*RC) / 8, true); - } ArrayRef SplitParts = getRegSplitParts(RC, EltSize); unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); + // Scavenged temporary VGPR to use. It must be scavenged once for any number + // of spilled subregs. + Register TmpVGPR; + // SubReg carries the "Kill" flag when SubReg == SuperReg. unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { - unsigned SubReg = NumSubRegs == 1 ? - SuperReg : getSubReg(SuperReg, SplitParts[i]); - - if (SpillToSMEM) { - int64_t FrOffset = FrameInfo.getObjectOffset(Index); - - // The allocated memory size is really the wavefront size * the frame - // index size. The widest register class is 64 bytes, so a 4-byte scratch - // allocation is enough to spill this in a single stack object. - // - // FIXME: Frame size/offsets are computed earlier than this, so the extra - // space is still unnecessarily allocated. - - unsigned Align = FrameInfo.getObjectAlignment(Index); - MachinePointerInfo PtrInfo - = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); - MachineMemOperand *MMO - = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, - EltSize, MinAlign(Align, EltSize * i)); - - // SMEM instructions only support a single offset, so increment the wave - // offset. - - int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i); - if (Offset != 0) { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) - .addReg(FrameReg) - .addImm(Offset); - } else { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) - .addReg(FrameReg); - } - - BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp)) - .addReg(SubReg, getKillRegState(IsKill)) // sdata - .addReg(MFI->getScratchRSrcReg()) // sbase - .addReg(OffsetReg, RegState::Kill) // soff - .addImm(0) // glc - .addImm(0) // dlc - .addMemOperand(MMO); - - continue; - } + Register SubReg = + NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]); if (SpillToVGPR) { SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; @@ -915,15 +817,13 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, return false; // Spill SGPR to a frame index. - // TODO: Should VI try to spill to VGPR and then spill to SMEM? - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - // TODO: Should VI try to spill to VGPR and then spill to SMEM? + if (!TmpVGPR.isValid()) + TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); MachineInstrBuilder Mov - = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) + = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) .addReg(SubReg, SubKillState); - // There could be undef components of a spilled super register. // TODO: Can we detect this and skip the spill? if (NumSubRegs > 1) { @@ -941,7 +841,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, EltSize, MinAlign(Align, EltSize * i)); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) - .addReg(TmpReg, RegState::Kill) // src + .addReg(TmpVGPR, RegState::Kill) // src .addFrameIndex(Index) // vaddr .addReg(MFI->getScratchRSrcReg()) // srrsrc .addReg(MFI->getStackPtrOffsetReg()) // soffset @@ -965,7 +865,6 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, RegScavenger *RS, bool OnlyToVGPR) const { MachineFunction *MF = MI->getParent()->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); @@ -976,84 +875,27 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, return false; MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const DebugLoc &DL = MI->getDebugLoc(); - unsigned SuperReg = MI->getOperand(0).getReg(); - bool SpillToSMEM = spillSGPRToSMEM(); - if (SpillToSMEM && OnlyToVGPR) - return false; + Register SuperReg = MI->getOperand(0).getReg(); assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); - unsigned OffsetReg = AMDGPU::M0; unsigned M0CopyReg = AMDGPU::NoRegister; - if (SpillToSMEM) { - if (RS->isRegUsed(AMDGPU::M0)) { - M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg) - .addReg(AMDGPU::M0); - } - } - unsigned EltSize = 4; - unsigned ScalarLoadOp; - - Register FrameReg = getFrameRegister(*MF); const TargetRegisterClass *RC = getPhysRegClass(SuperReg); - if (SpillToSMEM && isSGPRClass(RC)) { - // XXX - if private_element_size is larger than 4 it might be useful to be - // able to spill wider vmem spills. - std::tie(EltSize, ScalarLoadOp) = - getSpillEltSize(getRegSizeInBits(*RC) / 8, false); - } ArrayRef SplitParts = getRegSplitParts(RC, EltSize); unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); - // SubReg carries the "Kill" flag when SubReg == SuperReg. - int64_t FrOffset = FrameInfo.getObjectOffset(Index); + Register TmpVGPR; for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { - unsigned SubReg = NumSubRegs == 1 ? - SuperReg : getSubReg(SuperReg, SplitParts[i]); - - if (SpillToSMEM) { - // FIXME: Size may be > 4 but extra bytes wasted. - unsigned Align = FrameInfo.getObjectAlignment(Index); - MachinePointerInfo PtrInfo - = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); - MachineMemOperand *MMO - = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, - EltSize, MinAlign(Align, EltSize * i)); - - // Add i * 4 offset - int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i); - if (Offset != 0) { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) - .addReg(FrameReg) - .addImm(Offset); - } else { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) - .addReg(FrameReg); - } - - auto MIB = - BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg) - .addReg(MFI->getScratchRSrcReg()) // sbase - .addReg(OffsetReg, RegState::Kill) // soff - .addImm(0) // glc - .addImm(0) // dlc - .addMemOperand(MMO); - - if (NumSubRegs > 1 && i == 0) - MIB.addReg(SuperReg, RegState::ImplicitDefine); - - continue; - } + Register SubReg = + NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]); if (SpillToVGPR) { SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; @@ -1071,7 +913,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, // Restore SGPR from a stack slot. // FIXME: We should use S_LOAD_DWORD here for VI. - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + if (!TmpVGPR.isValid()) + TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); unsigned Align = FrameInfo.getObjectAlignment(Index); MachinePointerInfo PtrInfo @@ -1081,7 +924,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, MachineMemOperand::MOLoad, EltSize, MinAlign(Align, EltSize * i)); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpVGPR) .addFrameIndex(Index) // vaddr .addReg(MFI->getScratchRSrcReg()) // srsrc .addReg(MFI->getStackPtrOffsetReg()) // soffset @@ -1090,7 +933,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, auto MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) - .addReg(TmpReg, RegState::Kill); + .addReg(TmpVGPR, RegState::Kill); if (NumSubRegs > 1) MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); @@ -1141,11 +984,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { MachineFunction *MF = MI->getParent()->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -1255,13 +1096,16 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, // In an entry function/kernel the offset is already the absolute // address relative to the frame register. - unsigned DiffReg - = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register TmpDiffReg = + RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false); + + // If there's no free SGPR, in-place modify the FP + Register DiffReg = TmpDiffReg.isValid() ? TmpDiffReg : FrameReg; bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32; Register ResultReg = IsCopy ? MI->getOperand(0).getReg() : - MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg) .addReg(FrameReg) @@ -1271,35 +1115,80 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, if (Offset == 0) { // XXX - This never happens because of emergency scavenging slot at 0? BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg) - .addImm(Log2_32(ST.getWavefrontSize())) + .addImm(ST.getWavefrontSizeLog2()) .addReg(DiffReg); } else { - unsigned ScaledReg - = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + if (auto MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) { + Register ScaledReg = + RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MIB, 0); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg) - .addImm(Log2_32(ST.getWavefrontSize())) - .addReg(DiffReg, RegState::Kill); + BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), + ScaledReg) + .addImm(ST.getWavefrontSizeLog2()) + .addReg(DiffReg, RegState::Kill); - // TODO: Fold if use instruction is another add of a constant. - if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { - TII->getAddNoCarry(*MBB, MI, DL, ResultReg) - .addImm(Offset) - .addReg(ScaledReg, RegState::Kill) - .addImm(0); // clamp bit + const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32; + + // TODO: Fold if use instruction is another add of a constant. + if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { + // FIXME: This can fail + MIB.addImm(Offset); + MIB.addReg(ScaledReg, RegState::Kill); + if (!IsVOP2) + MIB.addImm(0); // clamp bit + } else { + Register ConstOffsetReg = + RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MIB, 0, false); + + // This should always be able to use the unused carry out. + assert(ConstOffsetReg && "this scavenge should not be able to fail"); + + BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) + .addImm(Offset); + MIB.addReg(ConstOffsetReg, RegState::Kill); + MIB.addReg(ScaledReg, RegState::Kill); + MIB.addImm(0); // clamp bit + } } else { - unsigned ConstOffsetReg - = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + // We have to produce a carry out, and we there isn't a free SGPR + // pair for it. We can keep the whole computation on the SALU to + // avoid clobbering an additional register at the cost of an extra + // mov. - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) - .addImm(Offset); - TII->getAddNoCarry(*MBB, MI, DL, ResultReg) - .addReg(ConstOffsetReg, RegState::Kill) + // We may have 1 free scratch SGPR even though a carry out is + // unavailable. Only one additional mov is needed. + Register TmpScaledReg = + RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false); + Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : DiffReg; + + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg) + .addReg(DiffReg, RegState::Kill) + .addImm(ST.getWavefrontSizeLog2()); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), ScaledReg) .addReg(ScaledReg, RegState::Kill) - .addImm(0); // clamp bit + .addImm(Offset); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg) + .addReg(ScaledReg, RegState::Kill); + + // If there were truly no free SGPRs, we need to undo everything. + if (!TmpScaledReg.isValid()) { + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScaledReg) + .addReg(ScaledReg, RegState::Kill) + .addImm(Offset); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg) + .addReg(DiffReg, RegState::Kill) + .addImm(ST.getWavefrontSizeLog2()); + } } } + if (!TmpDiffReg.isValid()) { + // Restore the FP. + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), FrameReg) + .addReg(FrameReg) + .addReg(MFI->getScratchWaveOffsetReg()); + } + // Don't introduce an extra copy if we're just materializing in a mov. if (IsCopy) MI->eraseFromParent(); @@ -1325,7 +1214,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int64_t NewOffset = OldImm + Offset; if (isUInt<12>(NewOffset) && - buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) { + buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) { MI->eraseFromParent(); return; } @@ -1337,7 +1226,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int64_t Offset = FrameInfo.getObjectOffset(Index); FIOp.ChangeToImmediate(Offset); if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) { - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) .addImm(Offset); FIOp.ChangeToRegister(TmpReg, false, false, true); @@ -1347,27 +1236,13 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const { - const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg); - unsigned Size = getRegSizeInBits(*RC); - unsigned AltName = AMDGPU::NoRegAltName; - - switch (Size) { - case 32: AltName = AMDGPU::Reg32; break; - case 64: AltName = AMDGPU::Reg64; break; - case 96: AltName = AMDGPU::Reg96; break; - case 128: AltName = AMDGPU::Reg128; break; - case 160: AltName = AMDGPU::Reg160; break; - case 256: AltName = AMDGPU::Reg256; break; - case 512: AltName = AMDGPU::Reg512; break; - case 1024: AltName = AMDGPU::Reg1024; break; - } - return AMDGPUInstPrinter::getRegisterName(Reg, AltName); + return AMDGPUInstPrinter::getRegisterName(Reg); } // FIXME: This is very slow. It might be worth creating a map from physreg to // register class. const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { - assert(!TargetRegisterInfo::isVirtualRegister(Reg)); + assert(!Register::isVirtualRegister(Reg)); static const TargetRegisterClass *const BaseClasses[] = { &AMDGPU::VGPR_32RegClass, @@ -1408,8 +1283,6 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { // TargetRegisterClass to mark which classes are VGPRs to make this trivial. bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { unsigned Size = getRegSizeInBits(*RC); - if (Size < 32) - return false; switch (Size) { case 32: return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr; @@ -1427,8 +1300,11 @@ bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr; case 1024: return getCommonSubClass(&AMDGPU::VReg_1024RegClass, RC) != nullptr; + case 1: + return getCommonSubClass(&AMDGPU::VReg_1RegClass, RC) != nullptr; default: - llvm_unreachable("Invalid register class size"); + assert(Size < 32 && "Invalid register class size"); + return false; } } @@ -1476,6 +1352,8 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( return &AMDGPU::VReg_512RegClass; case 1024: return &AMDGPU::VReg_1024RegClass; + case 1: + return &AMDGPU::VReg_1RegClass; default: llvm_unreachable("Invalid register class size"); } @@ -1509,7 +1387,7 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass( case 96: return &AMDGPU::SReg_96RegClass; case 128: - return &AMDGPU::SReg_128RegClass; + return &AMDGPU::SGPR_128RegClass; case 160: return &AMDGPU::SReg_160RegClass; case 256: @@ -1539,7 +1417,7 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass( case 3: return &AMDGPU::SReg_96RegClass; case 4: - return &AMDGPU::SReg_128RegClass; + return &AMDGPU::SGPR_128RegClass; case 5: return &AMDGPU::SReg_160RegClass; case 8: @@ -1587,6 +1465,15 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass( } } +bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const { + if (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST && + OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST) + return !ST.hasMFMAInlineLiteralBug(); + + return OpType >= AMDGPU::OPERAND_SRC_FIRST && + OpType <= AMDGPU::OPERAND_SRC_LAST; +} + bool SIRegisterInfo::shouldRewriteCopySrc( const TargetRegisterClass *DefRC, unsigned DefSubReg, @@ -1802,7 +1689,7 @@ ArrayRef SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC const TargetRegisterClass* SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const { - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return MRI.getRegClass(Reg); return getPhysRegClass(Reg); @@ -1845,8 +1732,6 @@ bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI, unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - - const GCNSubtarget &ST = MF.getSubtarget(); const SIMachineFunctionInfo *MFI = MF.getInfo(); unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(), @@ -1900,18 +1785,22 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size, return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass : &AMDGPU::SReg_64_XEXECRegClass; case AMDGPU::SGPRRegBankID: - return &AMDGPU::SReg_32_XM0RegClass; + return &AMDGPU::SReg_32RegClass; case AMDGPU::SCCRegBankID: // This needs to return an allocatable class, so don't bother returning // the dummy SCC class. - return &AMDGPU::SReg_32_XM0RegClass; + // + // FIXME: This is a grotesque hack. We use SGPR_32 as an indication this + // was not an VCC bank value since we use the larger class SReg_32 for + // other values. These should all use SReg_32. + return &AMDGPU::SGPR_32RegClass; default: llvm_unreachable("unknown register bank"); } } case 32: return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass : - &AMDGPU::SReg_32_XM0RegClass; + &AMDGPU::SReg_32RegClass; case 64: return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass : &AMDGPU::SReg_64_XEXECRegClass; @@ -1920,7 +1809,7 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size, &AMDGPU::SReg_96RegClass; case 128: return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass : - &AMDGPU::SReg_128RegClass; + &AMDGPU::SGPR_128RegClass; case 160: return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass : &AMDGPU::SReg_160RegClass; @@ -1930,10 +1819,13 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size, case 512: return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass : &AMDGPU::SReg_512RegClass; + case 1024: + return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_1024RegClass : + &AMDGPU::SReg_1024RegClass; default: if (Size < 32) return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass : - &AMDGPU::SReg_32_XM0RegClass; + &AMDGPU::SReg_32RegClass; return nullptr; } } @@ -1941,9 +1833,12 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size, const TargetRegisterClass * SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const { - if (const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg())) + const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg()); + if (const RegisterBank *RB = RCOrRB.dyn_cast()) return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI); - return nullptr; + + const TargetRegisterClass *RC = RCOrRB.get(); + return getAllocatableClass(RC); } unsigned SIRegisterInfo::getVCC() const { @@ -1974,7 +1869,7 @@ MachineInstr *SIRegisterInfo::findReachingDef(unsigned Reg, unsigned SubReg, SlotIndex UseIdx = LIS->getInstructionIndex(Use); SlotIndex DefIdx; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { if (!LIS->hasInterval(Reg)) return nullptr; LiveInterval &LI = LIS->getInterval(Reg); diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h index 34487c96e72..ac3dea1a1a2 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/lib/Target/AMDGPU/SIRegisterInfo.h @@ -27,6 +27,7 @@ class SIMachineFunctionInfo; class SIRegisterInfo final : public AMDGPURegisterInfo { private: + const GCNSubtarget &ST; unsigned SGPRSetID; unsigned VGPRSetID; unsigned AGPRSetID; @@ -34,7 +35,6 @@ private: BitVector VGPRPressureSets; BitVector AGPRPressureSets; bool SpillSGPRToVGPR; - bool SpillSGPRToSMEM; bool isWave32; void classifyPressureSet(unsigned PSetID, unsigned Reg, @@ -46,10 +46,6 @@ public: return SpillSGPRToVGPR; } - bool spillSGPRToSMEM() const { - return SpillSGPRToSMEM; - } - /// Return the end register initially reserved for the scratch buffer in case /// spilling is needed. unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const; @@ -141,7 +137,7 @@ public: bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const { const TargetRegisterClass *RC; - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) RC = MRI.getRegClass(Reg); else RC = getPhysRegClass(Reg); @@ -193,10 +189,7 @@ public: /// \returns True if operands defined with this operand type can accept /// an inline constant. i.e. An integer value in the range (-16, 64) or /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f. - bool opCanUseInlineConstant(unsigned OpType) const { - return OpType >= AMDGPU::OPERAND_SRC_FIRST && - OpType <= AMDGPU::OPERAND_SRC_LAST; - } + bool opCanUseInlineConstant(unsigned OpType) const; unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, @@ -270,7 +263,7 @@ public: const MachineRegisterInfo &MRI) const override; const TargetRegisterClass *getBoolRC() const { - return isWave32 ? &AMDGPU::SReg_32_XM0RegClass + return isWave32 ? &AMDGPU::SReg_32RegClass : &AMDGPU::SReg_64RegClass; } diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td index d5948a7862c..82219cbdf3b 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/lib/Target/AMDGPU/SIRegisterInfo.td @@ -37,50 +37,52 @@ class getSubRegs { !if(!eq(size, 16), ret16, ret32)))))); } -let Namespace = "AMDGPU" in { -defset list AllRegAltNameIndices = { - def Reg32 : RegAltNameIndex; - def Reg64 : RegAltNameIndex; - def Reg96 : RegAltNameIndex; - def Reg128 : RegAltNameIndex; - def Reg160 : RegAltNameIndex; - def Reg256 : RegAltNameIndex; - def Reg512 : RegAltNameIndex; - def Reg1024 : RegAltNameIndex; +// Generates list of sequential register tuple names. +// E.g. RegSeq<3,2,2,"s">.ret -> [ "s[0:1]", "s[2:3]" ] +class RegSeqNames { + int next = !add(start, stride); + int end_reg = !add(!add(start, size), -1); + list ret = + !if(!le(end_reg, last_reg), + !listconcat([prefix # "[" # start # ":" # end_reg # "]"], + RegSeqNames.ret), + []); } + +// Generates list of dags for register tupless. +class RegSeqDags { + dag trunc_rc = (trunc RC, + !if(!and(!eq(stride, 1), !eq(start, 0)), + !add(!add(last_reg, 2), !mul(size, -1)), + !add(last_reg, 1))); + list ret = + !if(!lt(start, size), + !listconcat([(add (decimate (shl trunc_rc, start), stride))], + RegSeqDags.ret), + []); } +class SIRegisterTuples Indices, RegisterClass RC, + int last_reg, int stride, int size, string prefix> : + RegisterTuples.ret, + RegSeqNames.ret>; + //===----------------------------------------------------------------------===// // Declarations that describe the SI registers //===----------------------------------------------------------------------===// -class SIReg regIdx = 0, string prefix = "", - int regNo = !cast(regIdx)> : - Register, +class SIReg regIdx = 0> : + Register, DwarfRegNum<[!cast(HWEncoding)]> { let Namespace = "AMDGPU"; - let RegAltNameIndices = AllRegAltNameIndices; // This is the not yet the complete register encoding. An additional // bit is set for VGPRs. let HWEncoding = regIdx; } -class SIRegisterWithSubRegs subregs> : - RegisterWithSubRegs { - let RegAltNameIndices = AllRegAltNameIndices; - let AltNames = [ n, n, n, n, n, n, n, n ]; -} - // Special Registers def VCC_LO : SIReg<"vcc_lo", 106>; def VCC_HI : SIReg<"vcc_hi", 107>; @@ -93,7 +95,7 @@ def SP_REG : SIReg<"sp", 0>; def SCRATCH_WAVE_OFFSET_REG : SIReg<"scratch_wave_offset", 0>; // VCC for 64-bit instructions -def VCC : SIRegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>, +def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>, DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; @@ -103,7 +105,7 @@ def VCC : SIRegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>, def EXEC_LO : SIReg<"exec_lo", 126>; def EXEC_HI : SIReg<"exec_hi", 127>; -def EXEC : SIRegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>, +def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>, DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; @@ -134,7 +136,7 @@ def LDS_DIRECT : SIReg <"src_lds_direct", 254>; def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>; def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>; -def XNACK_MASK : SIRegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>, +def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>, DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; @@ -145,7 +147,7 @@ def XNACK_MASK : SIRegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_ def TBA_LO : SIReg<"tba_lo", 108>; def TBA_HI : SIReg<"tba_hi", 109>; -def TBA : SIRegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>, +def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>, DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; @@ -155,7 +157,7 @@ def TBA : SIRegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>, def TMA_LO : SIReg<"tma_lo", 110>; def TMA_HI : SIReg<"tma_hi", 111>; -def TMA : SIRegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>, +def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>, DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; @@ -175,7 +177,7 @@ multiclass FLAT_SCR_LOHI_m ci_e, bits<16> vi_e> { } class FlatReg encoding> : - SIRegisterWithSubRegs<"flat_scratch", [lo, hi]>, + RegisterWithSubRegs<"flat_scratch", [lo, hi]>, DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; @@ -191,19 +193,19 @@ def FLAT_SCR : FlatReg; // SGPR registers foreach Index = 0-105 in { - def SGPR#Index : SIReg <"SGPR"#Index, Index, "s">; + def SGPR#Index : SIReg <"s"#Index, Index>; } // VGPR registers foreach Index = 0-255 in { - def VGPR#Index : SIReg <"VGPR"#Index, Index, "v"> { + def VGPR#Index : SIReg <"v"#Index, Index> { let HWEncoding{8} = 1; } } // AccVGPR registers foreach Index = 0-255 in { - def AGPR#Index : SIReg <"AGPR"#Index, Index, "a"> { + def AGPR#Index : SIReg <"a"#Index, Index> { let HWEncoding{8} = 1; } } @@ -226,102 +228,32 @@ def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> { // SGPR 32-bit registers def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add (sequence "SGPR%u", 0, 105)), Reg32> { + (add (sequence "SGPR%u", 0, 105))> { // Give all SGPR classes higher priority than VGPR classes, because // we want to spill SGPRs to VGPRs. let AllocationPriority = 9; } // SGPR 64-bit registers -def SGPR_64Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 2)), - (add (decimate (shl SGPR_32, 1), 2))]>; +def SGPR_64Regs : SIRegisterTuples.ret, SGPR_32, 105, 2, 2, "s">; // SGPR 96-bit registers. No operations use these, but for symmetry with 96-bit VGPRs. -def SGPR_96Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 3)), - (add (decimate (shl SGPR_32, 1), 3)), - (add (decimate (shl SGPR_32, 2), 3))]>; +def SGPR_96Regs : SIRegisterTuples.ret, SGPR_32, 105, 3, 3, "s">; // SGPR 128-bit registers -def SGPR_128Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 4)), - (add (decimate (shl SGPR_32, 1), 4)), - (add (decimate (shl SGPR_32, 2), 4)), - (add (decimate (shl SGPR_32, 3), 4))]>; +def SGPR_128Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 4, "s">; // SGPR 160-bit registers. No operations use these, but for symmetry with 160-bit VGPRs. -def SGPR_160Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 4)), - (add (decimate (shl SGPR_32, 1), 4)), - (add (decimate (shl SGPR_32, 2), 4)), - (add (decimate (shl SGPR_32, 3), 4)), - (add (decimate (shl SGPR_32, 4), 4))]>; +def SGPR_160Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 5, "s">; // SGPR 256-bit registers -def SGPR_256Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 4)), - (add (decimate (shl SGPR_32, 1), 4)), - (add (decimate (shl SGPR_32, 2), 4)), - (add (decimate (shl SGPR_32, 3), 4)), - (add (decimate (shl SGPR_32, 4), 4)), - (add (decimate (shl SGPR_32, 5), 4)), - (add (decimate (shl SGPR_32, 6), 4)), - (add (decimate (shl SGPR_32, 7), 4))]>; +def SGPR_256Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 8, "s">; // SGPR 512-bit registers -def SGPR_512Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 4)), - (add (decimate (shl SGPR_32, 1), 4)), - (add (decimate (shl SGPR_32, 2), 4)), - (add (decimate (shl SGPR_32, 3), 4)), - (add (decimate (shl SGPR_32, 4), 4)), - (add (decimate (shl SGPR_32, 5), 4)), - (add (decimate (shl SGPR_32, 6), 4)), - (add (decimate (shl SGPR_32, 7), 4)), - (add (decimate (shl SGPR_32, 8), 4)), - (add (decimate (shl SGPR_32, 9), 4)), - (add (decimate (shl SGPR_32, 10), 4)), - (add (decimate (shl SGPR_32, 11), 4)), - (add (decimate (shl SGPR_32, 12), 4)), - (add (decimate (shl SGPR_32, 13), 4)), - (add (decimate (shl SGPR_32, 14), 4)), - (add (decimate (shl SGPR_32, 15), 4))]>; +def SGPR_512Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 16, "s">; // SGPR 1024-bit registers -def SGPR_1024Regs : RegisterTuples.ret, - [(add (decimate SGPR_32, 4)), - (add (decimate (shl SGPR_32, 1), 4)), - (add (decimate (shl SGPR_32, 2), 4)), - (add (decimate (shl SGPR_32, 3), 4)), - (add (decimate (shl SGPR_32, 4), 4)), - (add (decimate (shl SGPR_32, 5), 4)), - (add (decimate (shl SGPR_32, 6), 4)), - (add (decimate (shl SGPR_32, 7), 4)), - (add (decimate (shl SGPR_32, 8), 4)), - (add (decimate (shl SGPR_32, 9), 4)), - (add (decimate (shl SGPR_32, 10), 4)), - (add (decimate (shl SGPR_32, 11), 4)), - (add (decimate (shl SGPR_32, 12), 4)), - (add (decimate (shl SGPR_32, 13), 4)), - (add (decimate (shl SGPR_32, 14), 4)), - (add (decimate (shl SGPR_32, 15), 4)), - (add (decimate (shl SGPR_32, 16), 4)), - (add (decimate (shl SGPR_32, 17), 4)), - (add (decimate (shl SGPR_32, 18), 4)), - (add (decimate (shl SGPR_32, 19), 4)), - (add (decimate (shl SGPR_32, 20), 4)), - (add (decimate (shl SGPR_32, 21), 4)), - (add (decimate (shl SGPR_32, 22), 4)), - (add (decimate (shl SGPR_32, 23), 4)), - (add (decimate (shl SGPR_32, 24), 4)), - (add (decimate (shl SGPR_32, 25), 4)), - (add (decimate (shl SGPR_32, 26), 4)), - (add (decimate (shl SGPR_32, 27), 4)), - (add (decimate (shl SGPR_32, 28), 4)), - (add (decimate (shl SGPR_32, 29), 4)), - (add (decimate (shl SGPR_32, 30), 4)), - (add (decimate (shl SGPR_32, 31), 4))]>; +def SGPR_1024Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 32, "s">; // Trap handler TMP 32-bit registers def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32, @@ -330,51 +262,21 @@ def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32, } // Trap handler TMP 64-bit registers -def TTMP_64Regs : RegisterTuples.ret, - [(add (decimate TTMP_32, 2)), - (add (decimate (shl TTMP_32, 1), 2))]>; +def TTMP_64Regs : SIRegisterTuples.ret, TTMP_32, 15, 2, 2, "ttmp">; // Trap handler TMP 128-bit registers -def TTMP_128Regs : RegisterTuples.ret, - [(add (decimate TTMP_32, 4)), - (add (decimate (shl TTMP_32, 1), 4)), - (add (decimate (shl TTMP_32, 2), 4)), - (add (decimate (shl TTMP_32, 3), 4))]>; +def TTMP_128Regs : SIRegisterTuples.ret, TTMP_32, 15, 4, 4, "ttmp">; -def TTMP_256Regs : RegisterTuples.ret, - [(add (decimate TTMP_32, 4)), - (add (decimate (shl TTMP_32, 1), 4)), - (add (decimate (shl TTMP_32, 2), 4)), - (add (decimate (shl TTMP_32, 3), 4)), - (add (decimate (shl TTMP_32, 4), 4)), - (add (decimate (shl TTMP_32, 5), 4)), - (add (decimate (shl TTMP_32, 6), 4)), - (add (decimate (shl TTMP_32, 7), 4))]>; +def TTMP_256Regs : SIRegisterTuples.ret, TTMP_32, 15, 4, 8, "ttmp">; -def TTMP_512Regs : RegisterTuples.ret, - [(add (decimate TTMP_32, 4)), - (add (decimate (shl TTMP_32, 1), 4)), - (add (decimate (shl TTMP_32, 2), 4)), - (add (decimate (shl TTMP_32, 3), 4)), - (add (decimate (shl TTMP_32, 4), 4)), - (add (decimate (shl TTMP_32, 5), 4)), - (add (decimate (shl TTMP_32, 6), 4)), - (add (decimate (shl TTMP_32, 7), 4)), - (add (decimate (shl TTMP_32, 8), 4)), - (add (decimate (shl TTMP_32, 9), 4)), - (add (decimate (shl TTMP_32, 10), 4)), - (add (decimate (shl TTMP_32, 11), 4)), - (add (decimate (shl TTMP_32, 12), 4)), - (add (decimate (shl TTMP_32, 13), 4)), - (add (decimate (shl TTMP_32, 14), 4)), - (add (decimate (shl TTMP_32, 15), 4))]>; +def TTMP_512Regs : SIRegisterTuples.ret, TTMP_32, 15, 4, 16, "ttmp">; class TmpRegTuplesBase subRegs, list indices = getSubRegs.ret, int index1 = !add(index, !add(size, -1)), string name = "ttmp["#index#":"#index1#"]"> : - SIRegisterWithSubRegs { + RegisterWithSubRegs { let HWEncoding = subRegs[0].HWEncoding; let SubRegIndices = indices; } @@ -448,196 +350,80 @@ def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TT TTMP8_gfx9_gfx10, TTMP9_gfx9_gfx10, TTMP10_gfx9_gfx10, TTMP11_gfx9_gfx10, TTMP12_gfx9_gfx10, TTMP13_gfx9_gfx10, TTMP14_gfx9_gfx10, TTMP15_gfx9_gfx10]>; +class RegisterTypes reg_types> { + list types = reg_types; +} + +def Reg16Types : RegisterTypes<[i16, f16]>; +def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, p2, p3, p5, p6]>; + + // VGPR 32-bit registers // i16/f16 only on VI+ -def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add (sequence "VGPR%u", 0, 255)), Reg32> { +def VGPR_32 : RegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32, + (add (sequence "VGPR%u", 0, 255))> { let AllocationPriority = 1; let Size = 32; } // VGPR 64-bit registers -def VGPR_64 : RegisterTuples.ret, - [(add (trunc VGPR_32, 255)), - (add (shl VGPR_32, 1))]>; +def VGPR_64 : SIRegisterTuples.ret, VGPR_32, 255, 1, 2, "v">; // VGPR 96-bit registers -def VGPR_96 : RegisterTuples.ret, - [(add (trunc VGPR_32, 254)), - (add (shl VGPR_32, 1)), - (add (shl VGPR_32, 2))]>; +def VGPR_96 : SIRegisterTuples.ret, VGPR_32, 255, 1, 3, "v">; // VGPR 128-bit registers -def VGPR_128 : RegisterTuples.ret, - [(add (trunc VGPR_32, 253)), - (add (shl VGPR_32, 1)), - (add (shl VGPR_32, 2)), - (add (shl VGPR_32, 3))]>; +def VGPR_128 : SIRegisterTuples.ret, VGPR_32, 255, 1, 4, "v">; // VGPR 160-bit registers -def VGPR_160 : RegisterTuples.ret, - [(add (trunc VGPR_32, 252)), - (add (shl VGPR_32, 1)), - (add (shl VGPR_32, 2)), - (add (shl VGPR_32, 3)), - (add (shl VGPR_32, 4))]>; +def VGPR_160 : SIRegisterTuples.ret, VGPR_32, 255, 1, 5, "v">; // VGPR 256-bit registers -def VGPR_256 : RegisterTuples.ret, - [(add (trunc VGPR_32, 249)), - (add (shl VGPR_32, 1)), - (add (shl VGPR_32, 2)), - (add (shl VGPR_32, 3)), - (add (shl VGPR_32, 4)), - (add (shl VGPR_32, 5)), - (add (shl VGPR_32, 6)), - (add (shl VGPR_32, 7))]>; +def VGPR_256 : SIRegisterTuples.ret, VGPR_32, 255, 1, 8, "v">; // VGPR 512-bit registers -def VGPR_512 : RegisterTuples.ret, - [(add (trunc VGPR_32, 241)), - (add (shl VGPR_32, 1)), - (add (shl VGPR_32, 2)), - (add (shl VGPR_32, 3)), - (add (shl VGPR_32, 4)), - (add (shl VGPR_32, 5)), - (add (shl VGPR_32, 6)), - (add (shl VGPR_32, 7)), - (add (shl VGPR_32, 8)), - (add (shl VGPR_32, 9)), - (add (shl VGPR_32, 10)), - (add (shl VGPR_32, 11)), - (add (shl VGPR_32, 12)), - (add (shl VGPR_32, 13)), - (add (shl VGPR_32, 14)), - (add (shl VGPR_32, 15))]>; +def VGPR_512 : SIRegisterTuples.ret, VGPR_32, 255, 1, 16, "v">; // VGPR 1024-bit registers -def VGPR_1024 : RegisterTuples.ret, - [(add (trunc VGPR_32, 225)), - (add (shl VGPR_32, 1)), - (add (shl VGPR_32, 2)), - (add (shl VGPR_32, 3)), - (add (shl VGPR_32, 4)), - (add (shl VGPR_32, 5)), - (add (shl VGPR_32, 6)), - (add (shl VGPR_32, 7)), - (add (shl VGPR_32, 8)), - (add (shl VGPR_32, 9)), - (add (shl VGPR_32, 10)), - (add (shl VGPR_32, 11)), - (add (shl VGPR_32, 12)), - (add (shl VGPR_32, 13)), - (add (shl VGPR_32, 14)), - (add (shl VGPR_32, 15)), - (add (shl VGPR_32, 16)), - (add (shl VGPR_32, 17)), - (add (shl VGPR_32, 18)), - (add (shl VGPR_32, 19)), - (add (shl VGPR_32, 20)), - (add (shl VGPR_32, 21)), - (add (shl VGPR_32, 22)), - (add (shl VGPR_32, 23)), - (add (shl VGPR_32, 24)), - (add (shl VGPR_32, 25)), - (add (shl VGPR_32, 26)), - (add (shl VGPR_32, 27)), - (add (shl VGPR_32, 28)), - (add (shl VGPR_32, 29)), - (add (shl VGPR_32, 30)), - (add (shl VGPR_32, 31))]>; +def VGPR_1024 : SIRegisterTuples.ret, VGPR_32, 255, 1, 32, "v">; // AccVGPR 32-bit registers def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add (sequence "AGPR%u", 0, 255)), Reg32> { + (add (sequence "AGPR%u", 0, 255))> { let AllocationPriority = 1; let Size = 32; } // AGPR 64-bit registers -def AGPR_64 : RegisterTuples.ret, - [(add (trunc AGPR_32, 255)), - (add (shl AGPR_32, 1))]>; +def AGPR_64 : SIRegisterTuples.ret, AGPR_32, 255, 1, 2, "a">; // AGPR 128-bit registers -def AGPR_128 : RegisterTuples.ret, - [(add (trunc AGPR_32, 253)), - (add (shl AGPR_32, 1)), - (add (shl AGPR_32, 2)), - (add (shl AGPR_32, 3))]>; +def AGPR_128 : SIRegisterTuples.ret, AGPR_32, 255, 1, 4, "a">; // AGPR 512-bit registers -def AGPR_512 : RegisterTuples.ret, - [(add (trunc AGPR_32, 241)), - (add (shl AGPR_32, 1)), - (add (shl AGPR_32, 2)), - (add (shl AGPR_32, 3)), - (add (shl AGPR_32, 4)), - (add (shl AGPR_32, 5)), - (add (shl AGPR_32, 6)), - (add (shl AGPR_32, 7)), - (add (shl AGPR_32, 8)), - (add (shl AGPR_32, 9)), - (add (shl AGPR_32, 10)), - (add (shl AGPR_32, 11)), - (add (shl AGPR_32, 12)), - (add (shl AGPR_32, 13)), - (add (shl AGPR_32, 14)), - (add (shl AGPR_32, 15))]>; +def AGPR_512 : SIRegisterTuples.ret, AGPR_32, 255, 1, 16, "a">; // AGPR 1024-bit registers -def AGPR_1024 : RegisterTuples.ret, - [(add (trunc AGPR_32, 225)), - (add (shl AGPR_32, 1)), - (add (shl AGPR_32, 2)), - (add (shl AGPR_32, 3)), - (add (shl AGPR_32, 4)), - (add (shl AGPR_32, 5)), - (add (shl AGPR_32, 6)), - (add (shl AGPR_32, 7)), - (add (shl AGPR_32, 8)), - (add (shl AGPR_32, 9)), - (add (shl AGPR_32, 10)), - (add (shl AGPR_32, 11)), - (add (shl AGPR_32, 12)), - (add (shl AGPR_32, 13)), - (add (shl AGPR_32, 14)), - (add (shl AGPR_32, 15)), - (add (shl AGPR_32, 16)), - (add (shl AGPR_32, 17)), - (add (shl AGPR_32, 18)), - (add (shl AGPR_32, 19)), - (add (shl AGPR_32, 20)), - (add (shl AGPR_32, 21)), - (add (shl AGPR_32, 22)), - (add (shl AGPR_32, 23)), - (add (shl AGPR_32, 24)), - (add (shl AGPR_32, 25)), - (add (shl AGPR_32, 26)), - (add (shl AGPR_32, 27)), - (add (shl AGPR_32, 28)), - (add (shl AGPR_32, 29)), - (add (shl AGPR_32, 30)), - (add (shl AGPR_32, 31))]>; +def AGPR_1024 : SIRegisterTuples.ret, AGPR_32, 255, 1, 32, "a">; //===----------------------------------------------------------------------===// // Register classes used as source and destination //===----------------------------------------------------------------------===// def Pseudo_SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG), Reg32> { + (add FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG)> { let isAllocatable = 0; let CopyCost = -1; } def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32, - (add PRIVATE_RSRC_REG), Reg128> { + (add PRIVATE_RSRC_REG)> { let isAllocatable = 0; let CopyCost = -1; } def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add LDS_DIRECT), Reg32> { + (add LDS_DIRECT)> { let isAllocatable = 0; let CopyCost = -1; } @@ -648,41 +434,40 @@ def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f1 (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI, SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID, - SRC_VCCZ, SRC_EXECZ, SRC_SCC), Reg32> { + SRC_VCCZ, SRC_EXECZ, SRC_SCC)> { let AllocationPriority = 10; } def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, - (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS), Reg32> { + (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> { let AllocationPriority = 10; } def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, - (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI), Reg32> { + (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> { let AllocationPriority = 10; } // Register class for all scalar registers (SGPRs + Special Registers) def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, - (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI), Reg32> { + (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> { let AllocationPriority = 10; } def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, - (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI, LDS_DIRECT_CLASS), - Reg32> { + (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI, LDS_DIRECT_CLASS)> { let isAllocatable = 0; } def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, - (add SGPR_64Regs), Reg64> { + (add SGPR_64Regs)> { let CopyCost = 1; let AllocationPriority = 11; } // CCR (call clobbered registers) SGPR 64-bit registers def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, - (add (trunc SGPR_64, 16)), Reg64> { + (add (trunc SGPR_64, 16))> { let CopyCost = SGPR_64.CopyCost; let AllocationPriority = SGPR_64.AllocationPriority; } @@ -693,13 +478,13 @@ def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, } def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, - (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA), Reg64> { + (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> { let CopyCost = 1; let AllocationPriority = 13; } def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, - (add SReg_64_XEXEC, EXEC), Reg64> { + (add SReg_64_XEXEC, EXEC)> { let CopyCost = 1; let AllocationPriority = 13; } @@ -722,17 +507,17 @@ let CopyCost = 2 in { // There are no 3-component scalar instructions, but this is needed // for symmetry with VGPRs. def SGPR_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, - (add SGPR_96Regs), Reg96> { + (add SGPR_96Regs)> { let AllocationPriority = 14; } def SReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, - (add SGPR_96), Reg96> { + (add SGPR_96)> { let AllocationPriority = 14; } def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, - (add SGPR_128Regs), Reg128> { + (add SGPR_128Regs)> { let AllocationPriority = 15; } @@ -742,8 +527,9 @@ def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, } def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, - (add SGPR_128, TTMP_128), Reg128> { + (add SGPR_128, TTMP_128)> { let AllocationPriority = 15; + let isAllocatable = 0; } } // End CopyCost = 2 @@ -751,17 +537,16 @@ def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, // There are no 5-component scalar instructions, but this is needed // for symmetry with VGPRs. def SGPR_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, - (add SGPR_160Regs), Reg160> { + (add SGPR_160Regs)> { let AllocationPriority = 16; } def SReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, - (add SGPR_160), Reg160> { + (add SGPR_160)> { let AllocationPriority = 16; } -def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs), - Reg256> { +def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> { let AllocationPriority = 17; } @@ -770,14 +555,14 @@ def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add TTMP_256Regs)> { } def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, - (add SGPR_256, TTMP_256), Reg256> { + (add SGPR_256, TTMP_256)> { // Requires 4 s_mov_b64 to copy let CopyCost = 4; let AllocationPriority = 17; } def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, - (add SGPR_512Regs), Reg512> { + (add SGPR_512Regs)> { let AllocationPriority = 18; } @@ -787,31 +572,31 @@ def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, } def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, - (add SGPR_512, TTMP_512), Reg512> { + (add SGPR_512, TTMP_512)> { // Requires 8 s_mov_b64 to copy let CopyCost = 8; let AllocationPriority = 18; } def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add VGPR_32, LDS_DIRECT_CLASS), Reg32> { + (add VGPR_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; } def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, - (add SGPR_1024Regs), Reg1024> { + (add SGPR_1024Regs)> { let AllocationPriority = 19; } def SReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, - (add SGPR_1024), Reg1024> { + (add SGPR_1024)> { let CopyCost = 16; let AllocationPriority = 19; } // Register class for all vector registers (VGPRs + Interploation Registers) -def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, - (add VGPR_64), Reg64> { +def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4], 32, + (add VGPR_64)> { let Size = 64; // Requires 2 v_mov_b32 to copy @@ -819,7 +604,7 @@ def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32 let AllocationPriority = 2; } -def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96), Reg96> { +def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96)> { let Size = 96; // Requires 3 v_mov_b32 to copy @@ -828,7 +613,7 @@ def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96), Reg96> } def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, - (add VGPR_128), Reg128> { + (add VGPR_128)> { let Size = 128; // Requires 4 v_mov_b32 to copy @@ -837,7 +622,7 @@ def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, } def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, - (add VGPR_160), Reg160> { + (add VGPR_160)> { let Size = 160; // Requires 5 v_mov_b32 to copy @@ -846,28 +631,28 @@ def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, } def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, - (add VGPR_256), Reg256> { + (add VGPR_256)> { let Size = 256; let CopyCost = 8; let AllocationPriority = 6; } def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, - (add VGPR_512), Reg512> { + (add VGPR_512)> { let Size = 512; let CopyCost = 16; let AllocationPriority = 7; } def VReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, - (add VGPR_1024), Reg1024> { + (add VGPR_1024)> { let Size = 1024; let CopyCost = 32; let AllocationPriority = 8; } def AReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, - (add AGPR_64), Reg64> { + (add AGPR_64)> { let Size = 64; let CopyCost = 5; @@ -875,7 +660,7 @@ def AReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32 } def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, - (add AGPR_128), Reg128> { + (add AGPR_128)> { let Size = 128; // Requires 4 v_accvgpr_write and 4 v_accvgpr_read to copy + burn 1 vgpr @@ -884,40 +669,39 @@ def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, } def AReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, - (add AGPR_512), Reg512> { + (add AGPR_512)> { let Size = 512; let CopyCost = 33; let AllocationPriority = 7; } def AReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, - (add AGPR_1024), Reg1024> { + (add AGPR_1024)> { let Size = 1024; let CopyCost = 65; let AllocationPriority = 8; } -def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32), Reg32> { - let Size = 32; +def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> { + let Size = 1; } def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add VGPR_32, SReg_32, LDS_DIRECT_CLASS), Reg32> { + (add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; } -def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64), - Reg64> { +def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> { let isAllocatable = 0; } def AV_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add AGPR_32, VGPR_32), Reg32> { + (add AGPR_32, VGPR_32)> { let isAllocatable = 0; } def AV_64 : RegisterClass<"AMDGPU", [i64, f64, v4f16], 32, - (add AReg_64, VReg_64), Reg64> { + (add AReg_64, VReg_64)> { let isAllocatable = 0; } diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 7ee178149c7..8afca2cdc32 100644 --- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -77,8 +77,8 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, // Try to fold Src0 MachineOperand &Src0 = MI.getOperand(Src0Idx); if (Src0.isReg()) { - unsigned Reg = Src0.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) { + Register Reg = Src0.getReg(); + if (Register::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) { MachineInstr *Def = MRI.getUniqueVRegDef(Reg); if (Def && Def->isMoveImmediate()) { MachineOperand &MovSrc = Def->getOperand(1); @@ -360,8 +360,7 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST, } if (NewImm != 0) { - if (TargetRegisterInfo::isVirtualRegister(Dest->getReg()) && - SrcReg->isReg()) { + if (Register::isVirtualRegister(Dest->getReg()) && SrcReg->isReg()) { MRI.setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg()); MRI.setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg()); return true; @@ -394,12 +393,11 @@ static bool instAccessReg(iterator_range &&R, if (!MO.isReg()) continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg) && - TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + if (Register::isPhysicalRegister(Reg) && + Register::isPhysicalRegister(MO.getReg())) { if (TRI.regsOverlap(Reg, MO.getReg())) return true; - } else if (MO.getReg() == Reg && - TargetRegisterInfo::isVirtualRegister(Reg)) { + } else if (MO.getReg() == Reg && Register::isVirtualRegister(Reg)) { LaneBitmask Overlap = TRI.getSubRegIndexLaneMask(SubReg) & TRI.getSubRegIndexLaneMask(MO.getSubReg()); if (Overlap.any()) @@ -425,7 +423,7 @@ static TargetInstrInfo::RegSubRegPair getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I, const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) { if (TRI.getRegSizeInBits(Reg, MRI) != 32) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I)); } else { LaneBitmask LM = TRI.getSubRegIndexLaneMask(Sub); @@ -459,13 +457,13 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 || MovT.getOpcode() == AMDGPU::COPY); - unsigned T = MovT.getOperand(0).getReg(); + Register T = MovT.getOperand(0).getReg(); unsigned Tsub = MovT.getOperand(0).getSubReg(); MachineOperand &Xop = MovT.getOperand(1); if (!Xop.isReg()) return nullptr; - unsigned X = Xop.getReg(); + Register X = Xop.getReg(); unsigned Xsub = Xop.getSubReg(); unsigned Size = TII->getOpSize(MovT, 0) / 4; @@ -484,7 +482,7 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, MovY.getOperand(1).getSubReg() != Tsub) continue; - unsigned Y = MovY.getOperand(0).getReg(); + Register Y = MovY.getOperand(0).getReg(); unsigned Ysub = MovY.getOperand(0).getSubReg(); if (!TRI.isVGPR(MRI, Y) || MovT.getParent() != MovY.getParent()) @@ -579,7 +577,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { // XXX - not exactly a check for post-regalloc run. MachineOperand &Src = MI.getOperand(1); if (Src.isImm() && - TargetRegisterInfo::isPhysicalRegister(MI.getOperand(0).getReg())) { + Register::isPhysicalRegister(MI.getOperand(0).getReg())) { int32_t ReverseImm; if (isReverseInlineImm(TII, Src, ReverseImm)) { MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32)); @@ -643,8 +641,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { // FIXME: This could work better if hints worked with subregisters. If // we have a vector add of a constant, we usually don't get the correct // allocation due to the subregister usage. - if (TargetRegisterInfo::isVirtualRegister(Dest->getReg()) && - Src0->isReg()) { + if (Register::isVirtualRegister(Dest->getReg()) && Src0->isReg()) { MRI.setRegAllocationHint(Dest->getReg(), 0, Src0->getReg()); MRI.setRegAllocationHint(Src0->getReg(), 0, Dest->getReg()); continue; @@ -672,8 +669,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { const MachineOperand &Dst = MI.getOperand(0); MachineOperand &Src = MI.getOperand(1); - if (Src.isImm() && - TargetRegisterInfo::isPhysicalRegister(Dst.getReg())) { + if (Src.isImm() && Register::isPhysicalRegister(Dst.getReg())) { int32_t ReverseImm; if (isKImmOperand(TII, Src)) MI.setDesc(TII->get(AMDGPU::S_MOVK_I32)); @@ -721,8 +717,8 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { int Op32 = AMDGPU::getVOPe32(MI.getOpcode()); if (TII->isVOPC(Op32)) { - unsigned DstReg = MI.getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(DstReg)) { + Register DstReg = MI.getOperand(0).getReg(); + if (Register::isVirtualRegister(DstReg)) { // VOPC instructions can only write to the VCC register. We can't // force them to use VCC here, because this is only one register and // cannot deal with sequences which would require multiple copies of @@ -745,8 +741,8 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { TII->getNamedOperand(MI, AMDGPU::OpName::src2); if (!Src2->isReg()) continue; - unsigned SReg = Src2->getReg(); - if (TargetRegisterInfo::isVirtualRegister(SReg)) { + Register SReg = Src2->getReg(); + if (Register::isVirtualRegister(SReg)) { MRI.setRegAllocationHint(SReg, 0, VCCReg); continue; } @@ -766,7 +762,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { bool Next = false; if (SDst->getReg() != VCCReg) { - if (TargetRegisterInfo::isVirtualRegister(SDst->getReg())) + if (Register::isVirtualRegister(SDst->getReg())) MRI.setRegAllocationHint(SDst->getReg(), 0, VCCReg); Next = true; } @@ -774,7 +770,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { // All of the instructions with carry outs also have an SGPR input in // src2. if (Src2 && Src2->getReg() != VCCReg) { - if (TargetRegisterInfo::isVirtualRegister(Src2->getReg())) + if (Register::isVirtualRegister(Src2->getReg())) MRI.setRegAllocationHint(Src2->getReg(), 0, VCCReg); Next = true; } diff --git a/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 4e07efff55d..cb4cf68d709 100644 --- a/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -273,12 +273,12 @@ void SIWholeQuadMode::markInstructionUses(const MachineInstr &MI, char Flag, if (!Use.isReg() || !Use.isUse()) continue; - unsigned Reg = Use.getReg(); + Register Reg = Use.getReg(); // Handle physical registers that we need to track; this is mostly relevant // for VCC, which can appear as the (implicit) input of a uniform branch, // e.g. when a loop counter is stored in a VGPR. - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + if (!Register::isVirtualRegister(Reg)) { if (Reg == AMDGPU::EXEC || Reg == AMDGPU::EXEC_LO) continue; @@ -312,6 +312,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, char GlobalFlags = 0; bool WQMOutputs = MF.getFunction().hasFnAttribute("amdgpu-ps-wqm-outputs"); SmallVector SetInactiveInstrs; + SmallVector SoftWQMInstrs; // We need to visit the basic blocks in reverse post-order so that we visit // defs before uses, in particular so that we don't accidentally mark an @@ -340,6 +341,10 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, // correct, so we need it to be in WQM. Flags = StateWQM; LowerToCopyInstrs.push_back(&MI); + } else if (Opcode == AMDGPU::SOFT_WQM) { + LowerToCopyInstrs.push_back(&MI); + SoftWQMInstrs.push_back(&MI); + continue; } else if (Opcode == AMDGPU::WWM) { // The WWM intrinsic doesn't make the same guarantee, and plus it needs // to be executed in WQM or Exact so that its copy doesn't clobber @@ -356,8 +361,8 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, if (Inactive.isUndef()) { LowerToCopyInstrs.push_back(&MI); } else { - unsigned Reg = Inactive.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + Register Reg = Inactive.getReg(); + if (Register::isVirtualRegister(Reg)) { for (MachineInstr &DefMI : MRI->def_instructions(Reg)) markInstruction(DefMI, StateWWM, Worklist); } @@ -385,9 +390,9 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); - if (!TRI->isVirtualRegister(Reg) && + if (!Register::isVirtualRegister(Reg) && TRI->hasVectorRegisters(TRI->getPhysRegClass(Reg))) { Flags = StateWQM; break; @@ -407,9 +412,12 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, // Mark sure that any SET_INACTIVE instructions are computed in WQM if WQM is // ever used anywhere in the function. This implements the corresponding // semantics of @llvm.amdgcn.set.inactive. + // Similarly for SOFT_WQM instructions, implementing @llvm.amdgcn.softwqm. if (GlobalFlags & StateWQM) { for (MachineInstr *MI : SetInactiveInstrs) markInstruction(*MI, StateWQM, Worklist); + for (MachineInstr *MI : SoftWQMInstrs) + markInstruction(*MI, StateWQM, Worklist); } return GlobalFlags; @@ -548,7 +556,7 @@ bool SIWholeQuadMode::requiresCorrectState(const MachineInstr &MI) const { MachineBasicBlock::iterator SIWholeQuadMode::saveSCC(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before) { - unsigned SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + Register SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); MachineInstr *Save = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), SaveReg) @@ -832,7 +840,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) { for (MachineInstr *MI : LiveMaskQueries) { const DebugLoc &DL = MI->getDebugLoc(); - unsigned Dest = MI->getOperand(0).getReg(); + Register Dest = MI->getOperand(0).getReg(); MachineInstr *Copy = BuildMI(*MI->getParent(), MI, DL, TII->get(AMDGPU::COPY), Dest) .addReg(LiveMaskReg); @@ -847,13 +855,12 @@ void SIWholeQuadMode::lowerCopyInstrs() { for (unsigned i = MI->getNumExplicitOperands() - 1; i > 1; i--) MI->RemoveOperand(i); - const unsigned Reg = MI->getOperand(0).getReg(); + const Register Reg = MI->getOperand(0).getReg(); if (TRI->isVGPR(*MRI, Reg)) { - const TargetRegisterClass *regClass = - TargetRegisterInfo::isVirtualRegister(Reg) - ? MRI->getRegClass(Reg) - : TRI->getPhysRegClass(Reg); + const TargetRegisterClass *regClass = Register::isVirtualRegister(Reg) + ? MRI->getRegClass(Reg) + : TRI->getPhysRegClass(Reg); const unsigned MovOp = TII->getMovOpcode(regClass); MI->setDesc(TII->get(MovOp)); @@ -885,7 +892,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; if (!(GlobalFlags & StateWQM)) { lowerLiveMaskQueries(Exec); - if (!(GlobalFlags & StateWWM)) + if (!(GlobalFlags & StateWWM) && LowerToCopyInstrs.empty()) return !LiveMaskQueries.empty(); } else { // Store a copy of the original live mask when required diff --git a/lib/Target/AMDGPU/SMInstructions.td b/lib/Target/AMDGPU/SMInstructions.td index 1b410b6b591..1a74ebbf816 100644 --- a/lib/Target/AMDGPU/SMInstructions.td +++ b/lib/Target/AMDGPU/SMInstructions.td @@ -793,9 +793,18 @@ multiclass SMLoad_Pattern { // selector to prefer those. let AddedComplexity = 100 in { -defm : SMRD_Pattern <"S_LOAD_DWORD", i32>; -defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>; -defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>; +foreach vt = Reg32Types.types in { +defm : SMRD_Pattern <"S_LOAD_DWORD", vt>; +} + +foreach vt = SReg_64.RegTypes in { +defm : SMRD_Pattern <"S_LOAD_DWORDX2", vt>; +} + +foreach vt = SReg_128.RegTypes in { +defm : SMRD_Pattern <"S_LOAD_DWORDX4", vt>; +} + defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>; defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>; diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td index dfafdccc05a..d31a49f428e 100644 --- a/lib/Target/AMDGPU/SOPInstructions.td +++ b/lib/Target/AMDGPU/SOPInstructions.td @@ -181,7 +181,9 @@ def S_BCNT0_I32_B64 : SOP1_32_64 <"s_bcnt0_i32_b64">; def S_BCNT1_I32_B32 : SOP1_32 <"s_bcnt1_i32_b32", [(set i32:$sdst, (ctpop i32:$src0))] >; -def S_BCNT1_I32_B64 : SOP1_32_64 <"s_bcnt1_i32_b64">; +def S_BCNT1_I32_B64 : SOP1_32_64 <"s_bcnt1_i32_b64", + [(set i32:$sdst, (ctpop i64:$src0))] +>; } // End Defs = [SCC] def S_FF0_I32_B32 : SOP1_32 <"s_ff0_i32_b32">; @@ -417,16 +419,16 @@ def S_SUBB_U32 : SOP2_32 <"s_subb_u32", let isCommutable = 1 in { def S_MIN_I32 : SOP2_32 <"s_min_i32", - [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] + [(set i32:$sdst, (smin i32:$src0, i32:$src1))] >; def S_MIN_U32 : SOP2_32 <"s_min_u32", - [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] + [(set i32:$sdst, (umin i32:$src0, i32:$src1))] >; def S_MAX_I32 : SOP2_32 <"s_max_i32", - [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] + [(set i32:$sdst, (smax i32:$src0, i32:$src1))] >; def S_MAX_U32 : SOP2_32 <"s_max_u32", - [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] + [(set i32:$sdst, (umax i32:$src0, i32:$src1))] >; } // End isCommutable = 1 } // End Defs = [SCC] @@ -853,13 +855,13 @@ class SOPC_Base op, RegisterOperand rc0, RegisterOperand rc1, let Defs = [SCC]; } class SOPC_Helper op, RegisterOperand rc, ValueType vt, - string opName, PatLeaf cond> : SOPC_Base < + string opName, SDPatternOperator cond> : SOPC_Base < op, rc, rc, opName, [(set SCC, (si_setcc_uniform vt:$src0, vt:$src1, cond))] > { } class SOPC_CMP_32 op, string opName, - PatLeaf cond = COND_NULL, string revOp = opName> + SDPatternOperator cond = COND_NULL, string revOp = opName> : SOPC_Helper, Commutable_REV, SOPKInstTable<0, opName> { @@ -868,7 +870,7 @@ class SOPC_CMP_32 op, string opName, } class SOPC_CMP_64 op, string opName, - PatLeaf cond = COND_NULL, string revOp = opName> + SDPatternOperator cond = COND_NULL, string revOp = opName> : SOPC_Helper, Commutable_REV { let isCompare = 1; @@ -1076,8 +1078,6 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier", [(int_amdgcn_s_barrier)]> { let SchedRW = [WriteBarrier]; let simm16 = 0; - let mayLoad = 1; - let mayStore = 1; let isConvergent = 1; } @@ -1090,7 +1090,7 @@ def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> { let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16", - [(int_amdgcn_s_waitcnt UIMM16bit:$simm16)]>; + [(int_amdgcn_s_waitcnt timm:$simm16)]>; def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">; def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">; @@ -1099,7 +1099,7 @@ def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">; // maximum reported is 960 cycles, so 960 / 64 = 15 max, so is the // maximum really 15 on VI? def S_SLEEP : SOPP <0x0000000e, (ins i32imm:$simm16), - "s_sleep $simm16", [(int_amdgcn_s_sleep SIMM16bit:$simm16)]> { + "s_sleep $simm16", [(int_amdgcn_s_sleep timm:$simm16)]> { let hasSideEffects = 1; let mayLoad = 1; let mayStore = 1; @@ -1110,12 +1110,11 @@ def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">; let Uses = [EXEC, M0] in { // FIXME: Should this be mayLoad+mayStore? def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16", - [(AMDGPUsendmsg (i32 imm:$simm16))] ->; + [(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]>; def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16", - [(AMDGPUsendmsghalt (i32 imm:$simm16))] ->; + [(int_amdgcn_s_sendmsghalt (i32 timm:$simm16), M0)]>; + } // End Uses = [EXEC, M0] def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16"> { @@ -1126,13 +1125,13 @@ def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> { let simm16 = 0; } def S_INCPERFLEVEL : SOPP <0x00000014, (ins i32imm:$simm16), "s_incperflevel $simm16", - [(int_amdgcn_s_incperflevel SIMM16bit:$simm16)]> { + [(int_amdgcn_s_incperflevel timm:$simm16)]> { let hasSideEffects = 1; let mayLoad = 1; let mayStore = 1; } def S_DECPERFLEVEL : SOPP <0x00000015, (ins i32imm:$simm16), "s_decperflevel $simm16", - [(int_amdgcn_s_decperflevel SIMM16bit:$simm16)]> { + [(int_amdgcn_s_decperflevel timm:$simm16)]> { let hasSideEffects = 1; let mayLoad = 1; let mayStore = 1; @@ -1169,7 +1168,10 @@ let SubtargetPredicate = isGFX10Plus in { def S_ROUND_MODE : SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">; def S_DENORM_MODE : - SOPP<0x025, (ins s16imm:$simm16), "s_denorm_mode $simm16">; + SOPP<0x025, (ins i32imm:$simm16), "s_denorm_mode $simm16", + [(SIdenorm_mode (i32 timm:$simm16))]> { + let hasSideEffects = 1; + } def S_TTRACEDATA_IMM : SOPP<0x028, (ins s16imm:$simm16), "s_ttracedata_imm $simm16">; } // End SubtargetPredicate = isGFX10Plus @@ -1178,7 +1180,7 @@ let SubtargetPredicate = isGFX10Plus in { // S_GETREG_B32 Intrinsic Pattern. //===----------------------------------------------------------------------===// def : GCNPat < - (int_amdgcn_s_getreg imm:$simm16), + (int_amdgcn_s_getreg timm:$simm16), (S_GETREG_B32 (as_i16imm $simm16)) >; diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index e90f40e6abe..afb2fd987af 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -131,29 +131,70 @@ int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { struct MUBUFInfo { uint16_t Opcode; uint16_t BaseOpcode; - uint8_t dwords; + uint8_t elements; bool has_vaddr; bool has_srsrc; bool has_soffset; }; +struct MTBUFInfo { + uint16_t Opcode; + uint16_t BaseOpcode; + uint8_t elements; + bool has_vaddr; + bool has_srsrc; + bool has_soffset; +}; + +#define GET_MTBUFInfoTable_DECL +#define GET_MTBUFInfoTable_IMPL #define GET_MUBUFInfoTable_DECL #define GET_MUBUFInfoTable_IMPL #include "AMDGPUGenSearchableTables.inc" +int getMTBUFBaseOpcode(unsigned Opc) { + const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc); + return Info ? Info->BaseOpcode : -1; +} + +int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) { + const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); + return Info ? Info->Opcode : -1; +} + +int getMTBUFElements(unsigned Opc) { + const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); + return Info ? Info->elements : 0; +} + +bool getMTBUFHasVAddr(unsigned Opc) { + const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); + return Info ? Info->has_vaddr : false; +} + +bool getMTBUFHasSrsrc(unsigned Opc) { + const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); + return Info ? Info->has_srsrc : false; +} + +bool getMTBUFHasSoffset(unsigned Opc) { + const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); + return Info ? Info->has_soffset : false; +} + int getMUBUFBaseOpcode(unsigned Opc) { const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc); return Info ? Info->BaseOpcode : -1; } -int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords) { - const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndDwords(BaseOpc, Dwords); +int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) { + const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); return Info ? Info->Opcode : -1; } -int getMUBUFDwords(unsigned Opc) { +int getMUBUFElements(unsigned Opc) { const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); - return Info ? Info->dwords : 0; + return Info ? Info->elements : 0; } bool getMUBUFHasVAddr(unsigned Opc) { @@ -241,7 +282,7 @@ unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, } unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) { - return getMaxWavesPerEU() * getEUsPerCU(STI); + return getMaxWavesPerEU(STI) * getEUsPerCU(STI); } unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, @@ -253,9 +294,11 @@ unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { return 1; } -unsigned getMaxWavesPerEU() { +unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { // FIXME: Need to take scratch memory into account. - return 10; + if (!isGFX10(*STI)) + return 10; + return 20; } unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, @@ -317,7 +360,7 @@ unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { if (Version.Major >= 10) return 0; - if (WavesPerEU >= getMaxWavesPerEU()) + if (WavesPerEU >= getMaxWavesPerEU(STI)) return 0; unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); @@ -394,17 +437,19 @@ unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, } unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { - return 256; + if (!isGFX10(*STI)) + return 256; + return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512; } unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { - return getTotalNumVGPRs(STI); + return 256; } unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { assert(WavesPerEU != 0); - if (WavesPerEU >= getMaxWavesPerEU()) + if (WavesPerEU >= getMaxWavesPerEU(STI)) return 0; unsigned MinNumVGPRs = alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1), @@ -510,7 +555,7 @@ bool isReadOnlySegment(const GlobalValue *GV) { } bool shouldEmitConstantsToTextSection(const Triple &TT) { - return TT.getOS() != Triple::AMDHSA; + return TT.getOS() == Triple::AMDPAL; } int getIntegerAttribute(const Function &F, StringRef Name, int Default) { diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 209ef7eef74..f78dadd447f 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -94,7 +94,7 @@ unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); /// \returns Maximum number of waves per execution unit for given subtarget \p /// STI without any kind of limitation. -unsigned getMaxWavesPerEU(); +unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); /// \returns Maximum number of waves per execution unit for given subtarget \p /// STI and limited by given \p FlatWorkGroupSize. @@ -263,14 +263,32 @@ struct MIMGInfo { LLVM_READONLY const MIMGInfo *getMIMGInfo(unsigned Opc); +LLVM_READONLY +int getMTBUFBaseOpcode(unsigned Opc); + +LLVM_READONLY +int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); + +LLVM_READONLY +int getMTBUFElements(unsigned Opc); + +LLVM_READONLY +bool getMTBUFHasVAddr(unsigned Opc); + +LLVM_READONLY +bool getMTBUFHasSrsrc(unsigned Opc); + +LLVM_READONLY +bool getMTBUFHasSoffset(unsigned Opc); + LLVM_READONLY int getMUBUFBaseOpcode(unsigned Opc); LLVM_READONLY -int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords); +int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); LLVM_READONLY -int getMUBUFDwords(unsigned Opc); +int getMUBUFElements(unsigned Opc); LLVM_READONLY bool getMUBUFHasVAddr(unsigned Opc); diff --git a/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp index db20d5ccf5f..207e4232e82 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp @@ -21,6 +21,8 @@ #include "SIDefines.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Module.h" #include "llvm/Support/AMDGPUMetadata.h" #include "llvm/Support/EndianStream.h" diff --git a/lib/Target/AMDGPU/VOP1Instructions.td b/lib/Target/AMDGPU/VOP1Instructions.td index 6bc416ed7d4..f1cdc3097dc 100644 --- a/lib/Target/AMDGPU/VOP1Instructions.td +++ b/lib/Target/AMDGPU/VOP1Instructions.td @@ -104,9 +104,21 @@ multiclass VOP1Inst { def _e32 : VOP1_Pseudo ; def _e64 : VOP3_Pseudo .ret>; - def _sdwa : VOP1_SDWA_Pseudo ; + + foreach _ = BoolToList.ret in + def _sdwa : VOP1_SDWA_Pseudo ; + foreach _ = BoolToList.ret in def _dpp : VOP1_DPP_Pseudo ; + + def : MnemonicAlias, LetDummies; + def : MnemonicAlias, LetDummies; + + foreach _ = BoolToList.ret in + def : MnemonicAlias, LetDummies; + + foreach _ = BoolToList.ret in + def : MnemonicAlias, LetDummies; } // Special profile for instructions which have clamp @@ -227,10 +239,10 @@ defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; } // End SchedRW = [WriteQuarterRate32] defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; -defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32>; -defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32>; +defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, bitreverse>; +defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>; defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>; -defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32>; +defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>; let SchedRW = [WriteDoubleAdd] in { defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>; @@ -434,7 +446,7 @@ let SubtargetPredicate = isGFX10Plus in { // Target-specific instruction encodings. //===----------------------------------------------------------------------===// -class VOP1_DPP op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> : +class VOP1_DPP op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> : VOP_DPP { let hasSideEffects = ps.hasSideEffects; let Defs = ps.Defs; @@ -448,8 +460,9 @@ class VOP1_DPP op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = let Inst{31-25} = 0x3f; } -class VOP1_DPP16 op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : - VOP1_DPP { +class VOP1_DPP16 op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl> : + VOP1_DPP, + SIMCInstr { let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst); let SubtargetPredicate = HasDPP16; } @@ -492,6 +505,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { VOP3e_gfx10<{0, 1, 1, op{6-0}}, !cast(NAME#"_e64").Pfl>; } multiclass VOP1_Real_sdwa_gfx10 op> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx10 : VOP_SDWA10_Real(NAME#"_sdwa")>, VOP1_SDWA9Ae(NAME#"_sdwa").Pfl> { @@ -499,11 +513,13 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } } multiclass VOP1_Real_dpp_gfx10 op> { - def _dpp_gfx10 : VOP1_DPP16(NAME#"_e32")> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_gfx10 : VOP1_DPP16(NAME#"_dpp")> { let DecoderNamespace = "SDWA10"; } } multiclass VOP1_Real_dpp8_gfx10 op> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtDPP>.ret in def _dpp8_gfx10 : VOP1_DPP8(NAME#"_e32")> { let DecoderNamespace = "DPP8"; } @@ -704,10 +720,12 @@ multiclass VOP1_Real_e32e64_vi op> { multiclass VOP1_Real_vi op> { defm NAME : VOP1_Real_e32e64_vi ; + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA>.ret in def _sdwa_vi : VOP_SDWA_Real (NAME#"_sdwa")>, VOP1_SDWAe (NAME#"_sdwa").Pfl>; + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx9 : VOP_SDWA9_Real (NAME#"_sdwa")>, VOP1_SDWA9Ae (NAME#"_sdwa").Pfl>; @@ -831,25 +849,25 @@ def V_MOVRELD_B32_V4 : V_MOVRELD_B32_pseudo; def V_MOVRELD_B32_V8 : V_MOVRELD_B32_pseudo; def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo; -let OtherPredicates = [isGFX8GFX9] in { +let OtherPredicates = [isGFX8Plus] in { def : GCNPat < - (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, - imm:$bound_ctrl)), + (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask, + timm:$bound_ctrl)), (V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask), (as_i32imm $bank_mask), (as_i1imm $bound_ctrl)) >; def : GCNPat < - (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask, - imm:$bank_mask, imm:$bound_ctrl)), + (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask, + timm:$bank_mask, timm:$bound_ctrl)), (V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask), (as_i32imm $bank_mask), (as_i1imm $bound_ctrl)) >; -} // End OtherPredicates = [isGFX8GFX9] +} // End OtherPredicates = [isGFX8Plus] let OtherPredicates = [isGFX8Plus] in { def : GCNPat< @@ -885,6 +903,7 @@ multiclass VOP1_Real_gfx9 op> { defm NAME : VOP1_Real_e32e64_vi ; } + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx9 : VOP_SDWA9_Real (NAME#"_sdwa")>, VOP1_SDWA9Ae (NAME#"_sdwa").Pfl>; @@ -904,23 +923,7 @@ defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; let OtherPredicates = [isGFX10Plus] in { def : GCNPat < - (i32 (int_amdgcn_mov_dpp8 i32:$src, imm:$dpp8)), + (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), (V_MOV_B32_dpp8_gfx10 $src, $src, (as_i32imm $dpp8), (i32 DPP8Mode.FI_0)) >; - -def : GCNPat < - (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, - imm:$bound_ctrl)), - (V_MOV_B32_dpp_gfx10 $src, $src, (as_i32imm $dpp_ctrl), - (as_i32imm $row_mask), (as_i32imm $bank_mask), - (as_i1imm $bound_ctrl), (i32 0)) ->; - -def : GCNPat < - (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask, - imm:$bank_mask, imm:$bound_ctrl)), - (V_MOV_B32_dpp_gfx10 $old, $src, (as_i32imm $dpp_ctrl), - (as_i32imm $row_mask), (as_i32imm $bank_mask), - (as_i1imm $bound_ctrl), (i32 0)) ->; } // End OtherPredicates = [isGFX10Plus] diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td index 1b30cd2ed51..1ab0fc1ab58 100644 --- a/lib/Target/AMDGPU/VOP2Instructions.td +++ b/lib/Target/AMDGPU/VOP2Instructions.td @@ -147,7 +147,8 @@ multiclass VOP2Inst_sdwa { let renamedInGFX9 = GFX9Renamed in { - def _sdwa : VOP2_SDWA_Pseudo ; + foreach _ = BoolToList.ret in + def _sdwa : VOP2_SDWA_Pseudo ; } // End renamedInGFX9 = GFX9Renamed } @@ -179,9 +180,10 @@ multiclass VOP2bInst { - let AsmMatchConverter = "cvtSdwaVOP2b"; - } + foreach _ = BoolToList.ret in + def _sdwa : VOP2_SDWA_Pseudo { + let AsmMatchConverter = "cvtSdwaVOP2b"; + } foreach _ = BoolToList.ret in def _dpp : VOP2_DPP_Pseudo ; } @@ -220,9 +222,10 @@ multiclass VOP2eInst , Commutable_REV; - def _sdwa : VOP2_SDWA_Pseudo { - let AsmMatchConverter = "cvtSdwaVOP2b"; - } + foreach _ = BoolToList.ret in + def _sdwa : VOP2_SDWA_Pseudo { + let AsmMatchConverter = "cvtSdwaVOP2e"; + } foreach _ = BoolToList.ret in def _dpp : VOP2_DPP_Pseudo ; @@ -251,7 +254,9 @@ multiclass VOP2eInstAliases { class VOP_MADAK : VOPProfile <[vt, vt, vt, vt]> { field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); - field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm); + field dag Ins32 = !if(!eq(vt.Size, 32), + (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm), + (ins VCSrc_f16:$src0, VGPR_32:$src1, ImmOpType:$imm)); field bit HasExt = 0; // Hack to stop printing _e64 @@ -519,7 +524,7 @@ def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, } // End isConvergent = 1 defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_NO_EXT>; -defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_NO_EXT>; +defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_NO_EXT, add_ctpop>; defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT, int_amdgcn_mbcnt_lo>; defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT, int_amdgcn_mbcnt_hi>; defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT, AMDGPUldexp>; @@ -539,9 +544,9 @@ defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfma let SubtargetPredicate = isGFX6GFX7GFX10 in { let isCommutable = 1 in { defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>; -defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32>; -defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32>; -defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>; +defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32, srl>; +defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32, sra>; +defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32, shl>; } // End isCommutable = 1 } // End SubtargetPredicate = isGFX6GFX7GFX10 @@ -606,9 +611,9 @@ def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>; } // End FPDPRounding = 1 -defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>; -defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16>; -defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16>; +defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16, lshl_rev>; +defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16, lshr_rev>; +defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16, ashr_rev>; let isCommutable = 1 in { let FPDPRounding = 1 in { @@ -618,16 +623,16 @@ defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, fmul>; def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; } // End FPDPRounding = 1 -defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16>; -defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16>; +defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16, add>; +defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16, sub>; defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">; -defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16>; +defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; -defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16>; -defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16>; -defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16>; -defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16>; +defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16, umax>; +defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16, smax>; +defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16, umin>; +defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16, smin>; let Constraints = "$vdst = $src2", DisableEncoding="$src2", isConvertibleToThreeAddress = 1 in { @@ -653,16 +658,17 @@ defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>; let Constraints = "$vdst = $src2", DisableEncoding="$src2", isConvertibleToThreeAddress = 1, - isCommutable = 1 in { + isCommutable = 1, + IsDOT = 1 in { let SubtargetPredicate = HasDot5Insts in - defm V_DOT2C_F32_F16 : VOP2Inst_e32<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>; + defm V_DOT2C_F32_F16 : VOP2Inst<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>; let SubtargetPredicate = HasDot6Insts in - defm V_DOT4C_I32_I8 : VOP2Inst_e32<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; + defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; let SubtargetPredicate = HasDot4Insts in - defm V_DOT2C_I32_I16 : VOP2Inst_e32<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>; + defm V_DOT2C_I32_I16 : VOP2Inst<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>; let SubtargetPredicate = HasDot3Insts in - defm V_DOT8C_I32_I4 : VOP2Inst_e32<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>; + defm V_DOT8C_I32_I4 : VOP2Inst<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>; } let AddedComplexity = 30 in { @@ -719,50 +725,17 @@ defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; // Note: 16-bit instructions produce a 0 result in the high 16-bits // on GFX8 and GFX9 and preserve high 16 bits on GFX10+ -def ClearHI16 : OutPatFrag<(ops node:$op), - (V_AND_B32_e64 $op, (V_MOV_B32_e32 (i32 0xffff)))>; - -multiclass Arithmetic_i16_Pats { - -def : GCNPat< - (op i16:$src0, i16:$src1), - !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src0, $src1)), (inst $src0, $src1)) ->; +multiclass Arithmetic_i16_0Hi_Pats { def : GCNPat< (i32 (zext (op i16:$src0, i16:$src1))), - !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src0, $src1)), (inst $src0, $src1)) + (inst $src0, $src1) >; def : GCNPat< (i64 (zext (op i16:$src0, i16:$src1))), (REG_SEQUENCE VReg_64, - !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src0, $src1)), (inst $src0, $src1)), - sub0, - (V_MOV_B32_e32 (i32 0)), sub1) ->; -} - -multiclass Bits_OpsRev_i16_Pats { - -def : GCNPat< - (op i16:$src0, i16:$src1), - !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0)) ->; - -def : GCNPat< - (i32 (zext (op i16:$src0, i16:$src1))), - !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0)) ->; - - -def : GCNPat< - (i64 (zext (op i16:$src0, i16:$src1))), - (REG_SEQUENCE VReg_64, - !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0)), - sub0, + (inst $src0, $src1), sub0, (V_MOV_B32_e32 (i32 0)), sub1) >; } @@ -774,53 +747,36 @@ class ZExt_i16_i1_Pat : GCNPat < $src) >; +foreach vt = [i16, v2i16] in { +def : GCNPat < + (and vt:$src0, vt:$src1), + (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) +>; + +def : GCNPat < + (or vt:$src0, vt:$src1), + (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) +>; + +def : GCNPat < + (xor vt:$src0, vt:$src1), + (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) +>; +} + let Predicates = [Has16BitInsts] in { let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in { -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -} - -let Predicates = [Has16BitInsts, isGFX10Plus] in { -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -defm : Arithmetic_i16_Pats; -} - -def : GCNPat < - (and i16:$src0, i16:$src1), - (V_AND_B32_e64 $src0, $src1) ->; - -def : GCNPat < - (or i16:$src0, i16:$src1), - (V_OR_B32_e64 $src0, $src1) ->; - -def : GCNPat < - (xor i16:$src0, i16:$src1), - (V_XOR_B32_e64 $src0, $src1) ->; - -let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in { -defm : Bits_OpsRev_i16_Pats; -defm : Bits_OpsRev_i16_Pats; -defm : Bits_OpsRev_i16_Pats; -} - -let Predicates = [Has16BitInsts, isGFX10Plus] in { -defm : Bits_OpsRev_i16_Pats; -defm : Bits_OpsRev_i16_Pats; -defm : Bits_OpsRev_i16_Pats; +defm : Arithmetic_i16_0Hi_Pats; +defm : Arithmetic_i16_0Hi_Pats; +defm : Arithmetic_i16_0Hi_Pats; +defm : Arithmetic_i16_0Hi_Pats; +defm : Arithmetic_i16_0Hi_Pats; +defm : Arithmetic_i16_0Hi_Pats; +defm : Arithmetic_i16_0Hi_Pats; +defm : Arithmetic_i16_0Hi_Pats; +defm : Arithmetic_i16_0Hi_Pats; +defm : Arithmetic_i16_0Hi_Pats; } def : ZExt_i16_i1_Pat; @@ -847,7 +803,7 @@ def : GCNPat< // Target-specific instruction encodings. //===----------------------------------------------------------------------===// -class VOP2_DPP op, VOP2_Pseudo ps, +class VOP2_DPP op, VOP2_DPP_Pseudo ps, string opName = ps.OpName, VOPProfile p = ps.Pfl, bit IsDPP16 = 0> : VOP_DPP { @@ -865,13 +821,18 @@ class VOP2_DPP op, VOP2_Pseudo ps, let Inst{31} = 0x0; } -class VOP2_DPP16 op, VOP2_Pseudo ps, +class Base_VOP2_DPP16 op, VOP2_DPP_Pseudo ps, string opName = ps.OpName, VOPProfile p = ps.Pfl> : VOP2_DPP { let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst); let SubtargetPredicate = HasDPP16; } +class VOP2_DPP16 op, VOP2_DPP_Pseudo ps, + string opName = ps.OpName, VOPProfile p = ps.Pfl> : + Base_VOP2_DPP16, + SIMCInstr ; + class VOP2_DPP8 op, VOP2_Pseudo ps, string opName = ps.OpName, VOPProfile p = ps.Pfl> : VOP_DPP8 { @@ -924,6 +885,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl>; } multiclass VOP2_Real_sdwa_gfx10 op> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx10 : VOP_SDWA10_Real(NAME#"_sdwa")>, VOP2_SDWA9Ae(NAME#"_sdwa").Pfl> { @@ -931,11 +893,13 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } } multiclass VOP2_Real_dpp_gfx10 op> { - def _dpp_gfx10 : VOP2_DPP16(NAME#"_e32")> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_gfx10 : VOP2_DPP16(NAME#"_dpp")> { let DecoderNamespace = "SDWA10"; } } multiclass VOP2_Real_dpp8_gfx10 op> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtDPP>.ret in def _dpp8_gfx10 : VOP2_DPP8(NAME#"_e32")> { let DecoderNamespace = "DPP8"; } @@ -964,6 +928,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { let DecoderNamespace = "SDWA10" in { multiclass VOP2_Real_sdwa_gfx10_with_name op, string opName, string asmName> { + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx10 : VOP_SDWA10_Real(opName#"_sdwa")>, VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { @@ -973,13 +938,15 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } multiclass VOP2_Real_dpp_gfx10_with_name op, string opName, string asmName> { - def _dpp_gfx10 : VOP2_DPP16(opName#"_e32")> { + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_gfx10 : VOP2_DPP16(opName#"_dpp")> { VOP2_Pseudo ps = !cast(opName#"_e32"); let AsmString = asmName # ps.Pfl.AsmDPP16; } } multiclass VOP2_Real_dpp8_gfx10_with_name op, string opName, string asmName> { + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in def _dpp8_gfx10 : VOP2_DPP8(opName#"_e32")> { VOP2_Pseudo ps = !cast(opName#"_e32"); let AsmString = asmName # ps.Pfl.AsmDPP8; @@ -989,13 +956,15 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } // End DecoderNamespace = "SDWA10" //===------------------------------ VOP2be ------------------------------===// - multiclass VOP2be_Real_gfx10 op, string opName, string asmName> { + multiclass VOP2be_Real_e32_gfx10 op, string opName, string asmName> { def _e32_gfx10 : VOP2_Real(opName#"_e32"), SIEncodingFamily.GFX10>, VOP2e(opName#"_e32").Pfl> { VOP2_Pseudo Ps = !cast(opName#"_e32"); let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); } + } + multiclass VOP2be_Real_e64_gfx10 op, string opName, string asmName> { def _e64_gfx10 : VOP3_Real(opName#"_e64"), SIEncodingFamily.GFX10>, VOP3be_gfx10<{0, 1, 0, 0, op{5-0}}, @@ -1003,6 +972,9 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { VOP3_Pseudo Ps = !cast(opName#"_e64"); let AsmString = asmName # Ps.AsmOperands; } + } + multiclass VOP2be_Real_sdwa_gfx10 op, string opName, string asmName> { + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx10 : VOP_SDWA10_Real(opName#"_sdwa")>, VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { @@ -1010,64 +982,76 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); let DecoderNamespace = "SDWA10"; } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in + def _sdwa_w32_gfx10 : + Base_VOP_SDWA10_Real(opName#"_sdwa")>, + VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { + VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); + let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); + let isAsmParserOnly = 1; + let DecoderNamespace = "SDWA10"; + let WaveSizePredicate = isWave32; + } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in + def _sdwa_w64_gfx10 : + Base_VOP_SDWA10_Real(opName#"_sdwa")>, + VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { + VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); + let AsmString = asmName # Ps.AsmOperands; + let isAsmParserOnly = 1; + let DecoderNamespace = "SDWA10"; + let WaveSizePredicate = isWave64; + } + } + multiclass VOP2be_Real_dpp_gfx10 op, string opName, string asmName> { + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in def _dpp_gfx10 : - VOP2_DPP16(opName#"_e32"), asmName> { + VOP2_DPP16(opName#"_dpp"), asmName> { string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; let AsmString = asmName # !subst(", vcc", "", AsmDPP); let DecoderNamespace = "SDWA10"; } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_w32_gfx10 : + Base_VOP2_DPP16(opName#"_dpp"), asmName> { + string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; + let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_w64_gfx10 : + Base_VOP2_DPP16(opName#"_dpp"), asmName> { + string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; + let AsmString = asmName # AsmDPP; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } + } + multiclass VOP2be_Real_dpp8_gfx10 op, string opName, string asmName> { + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in def _dpp8_gfx10 : VOP2_DPP8(opName#"_e32"), asmName> { string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; let AsmString = asmName # !subst(", vcc", "", AsmDPP8); let DecoderNamespace = "DPP8"; } - - let WaveSizePredicate = isWave32 in { - def _sdwa_w32_gfx10 : - Base_VOP_SDWA10_Real(opName#"_sdwa")>, - VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { - VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); - let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); - let isAsmParserOnly = 1; - let DecoderNamespace = "SDWA10"; - } - def _dpp_w32_gfx10 : - VOP2_DPP16(opName#"_e32"), asmName> { - string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; - let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); - let isAsmParserOnly = 1; - } - def _dpp8_w32_gfx10 : - VOP2_DPP8(opName#"_e32"), asmName> { - string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; - let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); - let isAsmParserOnly = 1; - } - } // End WaveSizePredicate = isWave32 - - let WaveSizePredicate = isWave64 in { - def _sdwa_w64_gfx10 : - Base_VOP_SDWA10_Real(opName#"_sdwa")>, - VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { - VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); - let AsmString = asmName # Ps.AsmOperands; - let isAsmParserOnly = 1; - let DecoderNamespace = "SDWA10"; - } - def _dpp_w64_gfx10 : - VOP2_DPP16(opName#"_e32"), asmName> { - string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; - let AsmString = asmName # AsmDPP; - let isAsmParserOnly = 1; - } - def _dpp8_w64_gfx10 : - VOP2_DPP8(opName#"_e32"), asmName> { - string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; - let AsmString = asmName # AsmDPP8; - let isAsmParserOnly = 1; - } - } // End WaveSizePredicate = isWave64 + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp8_w32_gfx10 : + VOP2_DPP8(opName#"_e32"), asmName> { + string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; + let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp8_w64_gfx10 : + VOP2_DPP8(opName#"_e32"), asmName> { + string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; + let AsmString = asmName # AsmDPP8; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } } //===----------------------------- VOP3Only -----------------------------===// @@ -1088,8 +1072,19 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" -multiclass Base_VOP2_Real_gfx10 op> : - VOP2_Real_e32_gfx10, VOP2_Real_e64_gfx10; +multiclass VOP2be_Real_gfx10 op, string opName, string asmName> : + VOP2be_Real_e32_gfx10, + VOP2be_Real_e64_gfx10, + VOP2be_Real_sdwa_gfx10, + VOP2be_Real_dpp_gfx10, + VOP2be_Real_dpp8_gfx10; + +multiclass VOP2e_Real_gfx10 op, string opName, string asmName> : + VOP2_Real_e32_gfx10, + VOP2_Real_e64_gfx10, + VOP2be_Real_sdwa_gfx10, + VOP2be_Real_dpp_gfx10, + VOP2be_Real_dpp8_gfx10; multiclass VOP2_Real_gfx10 op> : VOP2_Real_e32_gfx10, VOP2_Real_e64_gfx10, @@ -1103,7 +1098,6 @@ multiclass VOP2_Real_gfx10_with_name op, string opName, VOP2_Real_dpp_gfx10_with_name, VOP2_Real_dpp8_gfx10_with_name; -defm V_CNDMASK_B32 : Base_VOP2_Real_gfx10<0x001>; defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>; defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>; defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>; @@ -1136,6 +1130,9 @@ defm V_SUB_CO_CI_U32 : defm V_SUBREV_CO_CI_U32 : VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; +defm V_CNDMASK_B32 : + VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; + // VOP3 only. defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>; defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>; @@ -1322,12 +1319,14 @@ multiclass Base_VOP2_Real_e32e64_vi op> : } // End AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" multiclass VOP2_SDWA_Real op> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA>.ret in def _sdwa_vi : VOP_SDWA_Real (NAME#"_sdwa")>, VOP2_SDWAe (NAME#"_sdwa").Pfl>; } multiclass VOP2_SDWA9_Real op> { + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx9 : VOP_SDWA9_Real (NAME#"_sdwa")>, VOP2_SDWA9Ae (NAME#"_sdwa").Pfl>; @@ -1350,12 +1349,13 @@ multiclass VOP2be_Real_e32e64_vi_only op, string OpName, string AsmName let AsmString = AsmName # ps.AsmOperands; let DecoderNamespace = "GFX8"; } - def _sdwa_vi : - VOP_SDWA_Real (OpName#"_sdwa")>, - VOP2_SDWAe (OpName#"_sdwa").Pfl> { - VOP2_SDWA_Pseudo ps = !cast(OpName#"_sdwa"); - let AsmString = AsmName # ps.AsmOperands; - } + foreach _ = BoolToList(OpName#"_e32").Pfl.HasExtSDWA>.ret in + def _sdwa_vi : + VOP_SDWA_Real (OpName#"_sdwa")>, + VOP2_SDWAe (OpName#"_sdwa").Pfl> { + VOP2_SDWA_Pseudo ps = !cast(OpName#"_sdwa"); + let AsmString = AsmName # ps.AsmOperands; + } foreach _ = BoolToList(OpName#"_e32").Pfl.HasExtDPP>.ret in def _dpp_vi : VOP_DPP_Real(OpName#"_dpp"), SIEncodingFamily.VI>, @@ -1383,12 +1383,13 @@ multiclass VOP2be_Real_e32e64_gfx9 op, string OpName, string AsmName> { let AsmString = AsmName # ps.AsmOperands; let DecoderNamespace = "GFX9"; } - def _sdwa_gfx9 : - VOP_SDWA9_Real (OpName#"_sdwa")>, - VOP2_SDWA9Ae (OpName#"_sdwa").Pfl> { - VOP2_SDWA_Pseudo ps = !cast(OpName#"_sdwa"); - let AsmString = AsmName # ps.AsmOperands; - } + foreach _ = BoolToList(OpName#"_e32").Pfl.HasExtSDWA9>.ret in + def _sdwa_gfx9 : + VOP_SDWA9_Real (OpName#"_sdwa")>, + VOP2_SDWA9Ae (OpName#"_sdwa").Pfl> { + VOP2_SDWA_Pseudo ps = !cast(OpName#"_sdwa"); + let AsmString = AsmName # ps.AsmOperands; + } foreach _ = BoolToList(OpName#"_e32").Pfl.HasExtDPP>.ret in def _dpp_gfx9 : VOP_DPP_Real(OpName#"_dpp"), SIEncodingFamily.GFX9>, @@ -1410,10 +1411,11 @@ multiclass VOP2_Real_e32e64_gfx9 op> { VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl> { let DecoderNamespace = "GFX9"; } - def _sdwa_gfx9 : - VOP_SDWA9_Real (NAME#"_sdwa")>, - VOP2_SDWA9Ae (NAME#"_sdwa").Pfl> { - } + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA9>.ret in + def _sdwa_gfx9 : + VOP_SDWA9_Real (NAME#"_sdwa")>, + VOP2_SDWA9Ae (NAME#"_sdwa").Pfl> { + } foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtDPP>.ret in def _dpp_gfx9 : VOP_DPP_Real(NAME#"_dpp"), SIEncodingFamily.GFX9>, @@ -1554,7 +1556,7 @@ defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>; } // End SubtargetPredicate = HasDLInsts multiclass VOP2_Real_DOT_ACC_gfx9 op> : VOP2_Real_e32_vi { - def _dpp : VOP2_DPP(NAME#"_e32")>; + def _dpp_vi : VOP2_DPP(NAME#"_dpp")>; } multiclass VOP2_Real_DOT_ACC_gfx10 op> : diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td index 21dbef9240e..605425972b1 100644 --- a/lib/Target/AMDGPU/VOP3Instructions.td +++ b/lib/Target/AMDGPU/VOP3Instructions.td @@ -112,7 +112,7 @@ class getVOP3ClampPat { class getVOP3MAIPat { list ret = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, - imm:$cbsz, imm:$abid, imm:$blgp))]; + timm:$cbsz, timm:$abid, timm:$blgp))]; } class VOP3Inst : @@ -385,12 +385,12 @@ def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile>, shl>; -def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile>, srl>; -def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile>, sra>; +let SubtargetPredicate = isGFX6GFX7GFX10 in { +def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile, shl>; +def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile, srl>; +def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile, sra>; def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile>; -} // End SubtargetPredicate = isGFX6GFX7GFX10, Predicates = [isGFX6GFX7GFX10] +} // End SubtargetPredicate = isGFX6GFX7GFX10 let SubtargetPredicate = isGFX8Plus in { def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile, lshl_rev>; @@ -399,21 +399,6 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile, as } // End SubtargetPredicate = isGFX8Plus } // End SchedRW = [Write64Bit] -let Predicates = [isGFX8Plus] in { -def : GCNPat < - (getDivergentFrag.ret i64:$x, i32:$y), - (V_LSHLREV_B64 $y, $x) ->; -def : AMDGPUPat < - (getDivergentFrag.ret i64:$x, i32:$y), - (V_LSHRREV_B64 $y, $x) ->; -def : AMDGPUPat < - (getDivergentFrag.ret i64:$x, i32:$y), - (V_ASHRREV_I64 $y, $x) ->; -} - let SchedRW = [Write32Bit] in { let SubtargetPredicate = isGFX8Plus in { @@ -468,13 +453,13 @@ let FPDPRounding = 1 in { def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile, fmad>; let Uses = [M0, EXEC] in { def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>, - [(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 imm:$attrchan), - (i32 imm:$attr), - (i32 imm:$src0_modifiers), + [(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 timm:$attrchan), + (i32 timm:$attr), + (i32 timm:$src0_modifiers), (f32 VRegSrc_32:$src2), - (i32 imm:$src2_modifiers), - (i1 imm:$high), - (i1 imm:$clamp)))]>; + (i32 timm:$src2_modifiers), + (i1 timm:$high), + (i1 timm:$clamp)))]>; } // End Uses = [M0, EXEC] } // End FPDPRounding = 1 } // End renamedInGFX9 = 1 @@ -493,21 +478,21 @@ def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f1 let Uses = [M0, EXEC], FPDPRounding = 1 in { def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>, - [(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 imm:$attrchan), - (i32 imm:$attr), - (i32 imm:$src0_modifiers), - (i1 imm:$high), - (i1 imm:$clamp), - (i32 imm:$omod)))]>; + [(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 timm:$attrchan), + (i32 timm:$attr), + (i32 timm:$src0_modifiers), + (i1 timm:$high), + (i1 timm:$clamp), + (i32 timm:$omod)))]>; def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>, - [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 imm:$attrchan), - (i32 imm:$attr), - (i32 imm:$src0_modifiers), + [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 timm:$attrchan), + (i32 timm:$attr), + (i32 timm:$src0_modifiers), (f32 VRegSrc_32:$src2), - (i32 imm:$src2_modifiers), - (i1 imm:$high), - (i1 imm:$clamp), - (i32 imm:$omod)))]>; + (i32 timm:$src2_modifiers), + (i1 timm:$high), + (i1 timm:$clamp), + (i32 timm:$omod)))]>; } // End Uses = [M0, EXEC], FPDPRounding = 1 } // End SubtargetPredicate = Has16BitInsts, isCommutable = 1 @@ -657,11 +642,11 @@ let SubtargetPredicate = isGFX10Plus in { } // End $vdst = $vdst_in, DisableEncoding $vdst_in def : GCNPat< - (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc), + (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), (V_PERMLANE16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in) >; def : GCNPat< - (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc), + (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), (V_PERMLANEX16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in) >; } // End SubtargetPredicate = isGFX10Plus diff --git a/lib/Target/AMDGPU/VOP3PInstructions.td b/lib/Target/AMDGPU/VOP3PInstructions.td index 55ee5f6577c..0c13f39fec0 100644 --- a/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/lib/Target/AMDGPU/VOP3PInstructions.td @@ -261,6 +261,7 @@ class SDot2Pat : GCNPat < let SubtargetPredicate = !cast(Inst).SubtargetPredicate; } +let IsDOT = 1 in { let SubtargetPredicate = HasDot2Insts in { def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile>; @@ -277,6 +278,7 @@ def V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile>; } // End SubtargetPredicate = HasDot1Insts +} // End let IsDOT = 1 multiclass DotPats { diff --git a/lib/Target/AMDGPU/VOPCInstructions.td b/lib/Target/AMDGPU/VOPCInstructions.td index b3513e383d1..8ef0ec7b71f 100644 --- a/lib/Target/AMDGPU/VOPCInstructions.td +++ b/lib/Target/AMDGPU/VOPCInstructions.td @@ -183,7 +183,7 @@ multiclass VOPCXInstAliases { } -class getVOPCPat64 : LetDummies { +class getVOPCPat64 : LetDummies { list ret = !if(P.HasModifiers, [(set i1:$sdst, (setcc (P.Src0VT @@ -202,7 +202,7 @@ class VCMPXNoSDstTable { multiclass VOPC_Pseudos { @@ -225,6 +225,7 @@ multiclass VOPC_Pseudos .ret in def _sdwa : VOPC_SDWA_Pseudo { let Defs = !if(DefExec, [VCC, EXEC], [VCC]); let SchedRW = P.Schedule; @@ -236,7 +237,7 @@ multiclass VOPC_Pseudos : VOPC_Pseudos { @@ -261,6 +262,7 @@ multiclass VOPCX_Pseudos .ret in def _nosdst_sdwa : VOPC_SDWA_Pseudo { let Defs = [EXEC]; let SchedRW = P_NoSDst.Schedule; @@ -285,22 +287,23 @@ def VOPC_I16_I16 : VOPC_NoSdst_Profile<[Write32Bit], i16>; def VOPC_I32_I32 : VOPC_NoSdst_Profile<[Write32Bit], i32>; def VOPC_I64_I64 : VOPC_NoSdst_Profile<[Write64Bit], i64>; -multiclass VOPC_F16 : +multiclass VOPC_F16 : VOPC_Pseudos ; -multiclass VOPC_F32 : +multiclass VOPC_F32 : VOPC_Pseudos ; -multiclass VOPC_F64 : +multiclass VOPC_F64 : VOPC_Pseudos ; -multiclass VOPC_I16 : +multiclass VOPC_I16 : VOPC_Pseudos ; -multiclass VOPC_I32 : +multiclass VOPC_I32 : VOPC_Pseudos ; -multiclass VOPC_I64 : +multiclass VOPC_I64 : VOPC_Pseudos ; multiclass VOPCX_F16 : @@ -669,6 +672,7 @@ multiclass VOPC_Class_Pseudos .ret in def _sdwa : VOPC_SDWA_Pseudo { let Defs = !if(DefExec, !if(DefVcc, [VCC, EXEC], [EXEC]), !if(DefVcc, [VCC], [])); @@ -698,6 +702,7 @@ multiclass VOPCX_Class_Pseudos .ret in def _nosdst_sdwa : VOPC_SDWA_Pseudo { let Defs = [EXEC]; let SchedRW = P_NoSDst.Schedule; @@ -737,8 +742,11 @@ defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <"v_cmp_class_f32">; defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <"v_cmpx_class_f32">; defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <"v_cmp_class_f64">; defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <"v_cmpx_class_f64">; + +let SubtargetPredicate = Has16BitInsts in { defm V_CMP_CLASS_F16 : VOPC_CLASS_F16 <"v_cmp_class_f16">; defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">; +} //===----------------------------------------------------------------------===// // V_ICMPIntrinsic Pattern. @@ -878,6 +886,7 @@ let AssemblerPredicate = isGFX10Plus in { } } // End DecoderNamespace = "GFX10" + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx10 : VOP_SDWA10_Real(NAME#"_sdwa")>, VOPC_SDWA9e(NAME#"_sdwa").Pfl>; @@ -903,6 +912,7 @@ let AssemblerPredicate = isGFX10Plus in { } } // End DecoderNamespace = "GFX10" + foreach _ = BoolToList(NAME#"_nosdst_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx10 : VOP_SDWA10_Real(NAME#"_nosdst_sdwa")>, VOPC_SDWA9e(NAME#"_nosdst_sdwa").Pfl> { @@ -1223,10 +1233,12 @@ multiclass VOPC_Real_vi op> { } } + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA>.ret in def _sdwa_vi : VOP_SDWA_Real (NAME#"_sdwa")>, VOPC_SDWAe (NAME#"_sdwa").Pfl>; + foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx9 : VOP_SDWA9_Real (NAME#"_sdwa")>, VOPC_SDWA9e (NAME#"_sdwa").Pfl>; diff --git a/lib/Target/AMDGPU/VOPInstructions.td b/lib/Target/AMDGPU/VOPInstructions.td index 677095a354b..f208a1134a5 100644 --- a/lib/Target/AMDGPU/VOPInstructions.td +++ b/lib/Target/AMDGPU/VOPInstructions.td @@ -14,6 +14,7 @@ class LetDummies { bit isReMaterializable; bit isAsCheapAsAMove; bit VOPAsmPrefer32Bit; + bit FPDPRounding; Predicate SubtargetPredicate; string Constraints; string DisableEncoding; @@ -41,9 +42,7 @@ class VOP_Pseudo pattern> : InstSI , VOP , - SIMCInstr , - MnemonicAlias { - + SIMCInstr { let isPseudo = 1; let isCodeGenOnly = 1; let UseNamedOperandTable = 1; @@ -148,6 +147,7 @@ class VOP3_Real : // copy relevant pseudo op flags let SubtargetPredicate = ps.SubtargetPredicate; + let OtherPredicates = ps.OtherPredicates; let AsmMatchConverter = ps.AsmMatchConverter; let AsmVariantName = ps.AsmVariantName; let Constraints = ps.Constraints; @@ -473,8 +473,7 @@ class VOP_SDWA9Be : VOP_SDWA9e

{ class VOP_SDWA_Pseudo pattern=[]> : InstSI , VOP , - SIMCInstr , - MnemonicAlias { + SIMCInstr { let isPseudo = 1; let isCodeGenOnly = 1; @@ -595,8 +594,7 @@ class VOP_DPPe : Enc64 { class VOP_DPP_Pseudo pattern=[]> : InstSI , VOP , - SIMCInstr , - MnemonicAlias { + SIMCInstr { let isPseudo = 1; let isCodeGenOnly = 1; diff --git a/lib/Target/ARC/ARCFrameLowering.h b/lib/Target/ARC/ARCFrameLowering.h index 41b559d1676..9242400fb28 100644 --- a/lib/Target/ARC/ARCFrameLowering.h +++ b/lib/Target/ARC/ARCFrameLowering.h @@ -27,8 +27,8 @@ class ARCInstrInfo; class ARCFrameLowering : public TargetFrameLowering { public: ARCFrameLowering(const ARCSubtarget &st) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0), ST(st) { - } + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(4), 0), + ST(st) {} /// Insert Prologue into the function. void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; diff --git a/lib/Target/ARC/ARCISelLowering.cpp b/lib/Target/ARC/ARCISelLowering.cpp index 847d23f0abd..751fd567bae 100644 --- a/lib/Target/ARC/ARCISelLowering.cpp +++ b/lib/Target/ARC/ARCISelLowering.cpp @@ -716,7 +716,7 @@ SDValue ARCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); assert(cast(Op.getOperand(0))->getZExtValue() == 0 && "Only support lowering frame addr of current frame."); - unsigned FrameReg = ARI.getFrameRegister(MF); + Register FrameReg = ARI.getFrameRegister(MF); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); } diff --git a/lib/Target/ARC/ARCMachineFunctionInfo.h b/lib/Target/ARC/ARCMachineFunctionInfo.h index 31aa5b93246..d4dcf9bf285 100644 --- a/lib/Target/ARC/ARCMachineFunctionInfo.h +++ b/lib/Target/ARC/ARCMachineFunctionInfo.h @@ -34,8 +34,8 @@ public: explicit ARCFunctionInfo(MachineFunction &MF) : ReturnStackOffsetSet(false), VarArgsFrameIndex(0), ReturnStackOffset(-1U), MaxCallStackReq(0) { - // Functions are 4-byte (2**2) aligned. - MF.setAlignment(2); + // Functions are 4-byte aligned. + MF.setAlignment(Align(4)); } ~ARCFunctionInfo() {} diff --git a/lib/Target/ARC/ARCOptAddrMode.cpp b/lib/Target/ARC/ARCOptAddrMode.cpp index c922b99c57b..22a3b9111c8 100644 --- a/lib/Target/ARC/ARCOptAddrMode.cpp +++ b/lib/Target/ARC/ARCOptAddrMode.cpp @@ -139,8 +139,7 @@ static bool dominatesAllUsesOf(const MachineInstr *MI, unsigned VReg, MachineDominatorTree *MDT, MachineRegisterInfo *MRI) { - assert(TargetRegisterInfo::isVirtualRegister(VReg) && - "Expected virtual register!"); + assert(Register::isVirtualRegister(VReg) && "Expected virtual register!"); for (auto it = MRI->use_nodbg_begin(VReg), end = MRI->use_nodbg_end(); it != end; ++it) { @@ -181,7 +180,7 @@ static bool isLoadStoreThatCanHandleDisplacement(const TargetInstrInfo *TII, bool ARCOptAddrMode::noUseOfAddBeforeLoadOrStore(const MachineInstr *Add, const MachineInstr *Ldst) { - unsigned R = Add->getOperand(0).getReg(); + Register R = Add->getOperand(0).getReg(); return dominatesAllUsesOf(Ldst, R, MDT, MRI); } @@ -205,9 +204,8 @@ MachineInstr *ARCOptAddrMode::tryToCombine(MachineInstr &Ldst) { return nullptr; } - unsigned B = Base.getReg(); - if (TargetRegisterInfo::isStackSlot(B) || - !TargetRegisterInfo::isVirtualRegister(B)) { + Register B = Base.getReg(); + if (Register::isStackSlot(B) || !Register::isVirtualRegister(B)) { LLVM_DEBUG(dbgs() << "[ABAW] Base is not VReg\n"); return nullptr; } @@ -285,7 +283,7 @@ ARCOptAddrMode::canJoinInstructions(MachineInstr *Ldst, MachineInstr *Add, return nullptr; } - unsigned BaseReg = Ldst->getOperand(BasePos).getReg(); + Register BaseReg = Ldst->getOperand(BasePos).getReg(); // prohibit this: // v1 = add v0, c @@ -294,7 +292,7 @@ ARCOptAddrMode::canJoinInstructions(MachineInstr *Ldst, MachineInstr *Add, // st v0, [v0, 0] // v1 = add v0, c if (Ldst->mayStore() && Ldst->getOperand(0).isReg()) { - unsigned StReg = Ldst->getOperand(0).getReg(); + Register StReg = Ldst->getOperand(0).getReg(); if (Add->getOperand(0).getReg() == StReg || BaseReg == StReg) { LLVM_DEBUG(dbgs() << "[canJoinInstructions] Store uses result of Add\n"); return nullptr; @@ -447,7 +445,7 @@ void ARCOptAddrMode::changeToAddrMode(MachineInstr &Ldst, unsigned NewOpcode, MachineOperand Src = MachineOperand::CreateImm(0xDEADBEEF); AII->getBaseAndOffsetPosition(Ldst, BasePos, OffPos); - unsigned BaseReg = Ldst.getOperand(BasePos).getReg(); + Register BaseReg = Ldst.getOperand(BasePos).getReg(); Ldst.RemoveOperand(OffPos); Ldst.RemoveOperand(BasePos); diff --git a/lib/Target/ARC/ARCRegisterInfo.cpp b/lib/Target/ARC/ARCRegisterInfo.cpp index 9c8340ac8f8..a7f89b385ff 100644 --- a/lib/Target/ARC/ARCRegisterInfo.cpp +++ b/lib/Target/ARC/ARCRegisterInfo.cpp @@ -206,7 +206,7 @@ void ARCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, LLVM_DEBUG(dbgs() << "Offset : " << Offset << "\n" << "<--------->\n"); - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); assert(ARC::GPR32RegClass.contains(Reg) && "Unexpected register operand"); if (!TFI->hasFP(MF)) { diff --git a/lib/Target/ARC/ARCTargetMachine.cpp b/lib/Target/ARC/ARCTargetMachine.cpp index 9fb45d686c2..34700dc22c5 100644 --- a/lib/Target/ARC/ARCTargetMachine.cpp +++ b/lib/Target/ARC/ARCTargetMachine.cpp @@ -38,7 +38,7 @@ ARCTargetMachine::ARCTargetMachine(const Target &T, const Triple &TT, "f32:32:32-i64:32-f64:32-a:0:32-n32", TT, CPU, FS, Options, getRelocModel(RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), - TLOF(make_unique()), + TLOF(std::make_unique()), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp index fb238bfc9cb..30b9c8071ba 100644 --- a/lib/Target/ARM/A15SDOptimizer.cpp +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -133,9 +133,9 @@ bool A15SDOptimizer::usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC) { if (!MO.isReg()) return false; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return MRI->getRegClass(Reg)->hasSuperClassEq(TRC); else return TRC->contains(Reg); @@ -151,7 +151,7 @@ unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) { // Get the subreg type that is most likely to be coalesced // for an SPR register that will be used in VDUP32d pseudo. unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) { - if (!TRI->isVirtualRegister(SReg)) + if (!Register::isVirtualRegister(SReg)) return getDPRLaneFromSPR(SReg); MachineInstr *MI = MRI->getVRegDef(SReg); @@ -166,7 +166,7 @@ unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) { SReg = MI->getOperand(1).getReg(); } - if (TargetRegisterInfo::isVirtualRegister(SReg)) { + if (Register::isVirtualRegister(SReg)) { if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1; return ARM::ssub_0; } @@ -191,8 +191,8 @@ void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) { for (MachineOperand &MO : MI->operands()) { if ((!MO.isReg()) || (!MO.isUse())) continue; - unsigned Reg = MO.getReg(); - if (!TRI->isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; MachineOperand *Op = MI->findRegisterDefOperand(Reg); @@ -213,8 +213,8 @@ void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) { for (MachineOperand &MODef : Def->operands()) { if ((!MODef.isReg()) || (!MODef.isDef())) continue; - unsigned DefReg = MODef.getReg(); - if (!TRI->isVirtualRegister(DefReg)) { + Register DefReg = MODef.getReg(); + if (!Register::isVirtualRegister(DefReg)) { IsDead = false; break; } @@ -245,10 +245,10 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { } if (MI->isInsertSubreg()) { - unsigned DPRReg = MI->getOperand(1).getReg(); - unsigned SPRReg = MI->getOperand(2).getReg(); + Register DPRReg = MI->getOperand(1).getReg(); + Register SPRReg = MI->getOperand(2).getReg(); - if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) { + if (Register::isVirtualRegister(DPRReg) && Register::isVirtualRegister(SPRReg)) { MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg()); MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg()); @@ -267,7 +267,7 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { // Find the thing we're subreg copying out of - is it of the same // regclass as DPRMI? (i.e. a DPR or QPR). - unsigned FullReg = SPRMI->getOperand(1).getReg(); + Register FullReg = SPRMI->getOperand(1).getReg(); const TargetRegisterClass *TRC = MRI->getRegClass(MI->getOperand(1).getReg()); if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) { @@ -296,9 +296,9 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { if (!MI->getOperand(I).isReg()) continue; ++NumTotal; - unsigned OpReg = MI->getOperand(I).getReg(); + Register OpReg = MI->getOperand(I).getReg(); - if (!TRI->isVirtualRegister(OpReg)) + if (!Register::isVirtualRegister(OpReg)) break; MachineInstr *Def = MRI->getVRegDef(OpReg); @@ -342,7 +342,7 @@ bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) { MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) { if (!MI->isFullCopy()) return MI; - if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) + if (!Register::isVirtualRegister(MI->getOperand(1).getReg())) return nullptr; MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg()); if (!Def) @@ -369,8 +369,8 @@ void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI, Reached.insert(MI); if (MI->isPHI()) { for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { - unsigned Reg = MI->getOperand(I).getReg(); - if (!TRI->isVirtualRegister(Reg)) { + Register Reg = MI->getOperand(I).getReg(); + if (!Register::isVirtualRegister(Reg)) { continue; } MachineInstr *NewMI = MRI->getVRegDef(Reg); @@ -379,7 +379,7 @@ void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI, Front.push_back(NewMI); } } else if (MI->isFullCopy()) { - if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) + if (!Register::isVirtualRegister(MI->getOperand(1).getReg())) continue; MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg()); if (!NewMI) @@ -418,8 +418,8 @@ unsigned A15SDOptimizer::createDupLane(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL, unsigned Reg, unsigned Lane, bool QPR) { - unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : - &ARM::DPRRegClass); + Register Out = + MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : &ARM::DPRRegClass); BuildMI(MBB, InsertBefore, DL, TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), Out) .addReg(Reg) @@ -434,7 +434,7 @@ unsigned A15SDOptimizer::createExtractSubreg( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL, unsigned DReg, unsigned Lane, const TargetRegisterClass *TRC) { - unsigned Out = MRI->createVirtualRegister(TRC); + Register Out = MRI->createVirtualRegister(TRC); BuildMI(MBB, InsertBefore, DL, @@ -448,7 +448,7 @@ unsigned A15SDOptimizer::createExtractSubreg( unsigned A15SDOptimizer::createRegSequence( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL, unsigned Reg1, unsigned Reg2) { - unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass); + Register Out = MRI->createVirtualRegister(&ARM::QPRRegClass); BuildMI(MBB, InsertBefore, DL, @@ -466,7 +466,7 @@ unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL, unsigned Ssub0, unsigned Ssub1) { - unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); + Register Out = MRI->createVirtualRegister(&ARM::DPRRegClass); BuildMI(MBB, InsertBefore, DL, TII->get(ARM::VEXTd32), Out) .addReg(Ssub0) .addReg(Ssub1) @@ -478,7 +478,7 @@ unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB, unsigned A15SDOptimizer::createInsertSubreg( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL, unsigned DReg, unsigned Lane, unsigned ToInsert) { - unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass); + Register Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass); BuildMI(MBB, InsertBefore, DL, @@ -494,7 +494,7 @@ unsigned A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const DebugLoc &DL) { - unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); + Register Out = MRI->createVirtualRegister(&ARM::DPRRegClass); BuildMI(MBB, InsertBefore, DL, @@ -602,7 +602,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { // we can end up with multiple defs of this DPR. SmallVector DefSrcs; - if (!TRI->isVirtualRegister(*I)) + if (!Register::isVirtualRegister(*I)) continue; MachineInstr *Def = MRI->getVRegDef(*I); if (!Def) @@ -622,7 +622,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { // Collect all the uses of this MI's DPR def for updating later. SmallVector Uses; - unsigned DPRDefReg = MI->getOperand(0).getReg(); + Register DPRDefReg = MI->getOperand(0).getReg(); for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg), E = MRI->use_end(); I != E; ++I) Uses.push_back(&*I); diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index bf8ed6562fe..2e6f756d522 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -35,6 +35,7 @@ class MachineInstr; class MCInst; class PassRegistry; +Pass *createMVETailPredicationPass(); FunctionPass *createARMLowOverheadLoopsPass(); Pass *createARMParallelDSPPass(); FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, @@ -67,6 +68,7 @@ void initializeThumb2SizeReducePass(PassRegistry &); void initializeThumb2ITBlockPass(PassRegistry &); void initializeMVEVPTBlockPass(PassRegistry &); void initializeARMLowOverheadLoopsPass(PassRegistry &); +void initializeMVETailPredicationPass(PassRegistry &); } // end namespace llvm diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index b687db12eaf..fed4cb2b931 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -57,12 +57,15 @@ def FeatureD32 : SubtargetFeature<"d32", "HasD32", "true", "Extend FP to 32 double registers">; multiclass VFPver prev = [], - list otherimplies = []> { + list prev, + list otherimplies, + list vfp2prev = []> { def _D16_SP: SubtargetFeature< name#"d16sp", query#"D16SP", "true", description#" with only 16 d-registers and no double precision", - !foreach(v, prev, !cast(v # "_D16_SP")) # otherimplies>; + !foreach(v, prev, !cast(v # "_D16_SP")) # + !foreach(v, vfp2prev, !cast(v # "_SP")) # + otherimplies>; def _SP: SubtargetFeature< name#"sp", query#"SP", "true", description#" with no double precision", @@ -72,6 +75,7 @@ multiclass VFPver(v # "_D16")) # + vfp2prev # otherimplies # [FeatureFP64, !cast(NAME # "_D16_SP")]>; def "": SubtargetFeature< name, query, "true", description, @@ -80,11 +84,17 @@ multiclass VFPver(NAME # "_SP")]>; } -defm FeatureVFP2: VFPver<"vfp2", "HasVFPv2", "Enable VFP2 instructions", - [], [FeatureFPRegs]>; +def FeatureVFP2_SP : SubtargetFeature<"vfp2sp", "HasVFPv2SP", "true", + "Enable VFP2 instructions with " + "no double precision", + [FeatureFPRegs]>; + +def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", + "Enable VFP2 instructions", + [FeatureFP64, FeatureVFP2_SP]>; defm FeatureVFP3: VFPver<"vfp3", "HasVFPv3", "Enable VFP3 instructions", - [FeatureVFP2]>; + [], [], [FeatureVFP2]>; def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable NEON instructions", @@ -98,7 +108,7 @@ defm FeatureVFP4: VFPver<"vfp4", "HasVFPv4", "Enable VFP4 instructions", [FeatureVFP3], [FeatureFP16]>; defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP", - [FeatureVFP4]>; + [FeatureVFP4], []>; def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", "Enable full half-precision " @@ -302,9 +312,18 @@ def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", "Prefer 32-bit Thumb instrs">; -def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopAlignment","2", +def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopLogAlignment","2", "Prefer 32-bit alignment for loops">; +def FeatureMVEVectorCostFactor1 : SubtargetFeature<"mve1beat", "MVEVectorCostFactor", "1", + "Model MVE instructions as a 1 beat per tick architecture">; + +def FeatureMVEVectorCostFactor2 : SubtargetFeature<"mve2beat", "MVEVectorCostFactor", "2", + "Model MVE instructions as a 2 beats per tick architecture">; + +def FeatureMVEVectorCostFactor4 : SubtargetFeature<"mve4beat", "MVEVectorCostFactor", "4", + "Model MVE instructions as a 4 beats per tick architecture">; + /// Some instructions update CPSR partially, which can add false dependency for /// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is /// mapped to a separate physical register. Avoid partial CPSR update for these @@ -1156,6 +1175,13 @@ def : ProcNoItin<"cortex-a76ae", [ARMv82a, ProcA76, FeatureFullFP16, FeatureDotProd]>; +def : ProcNoItin<"neoverse-n1", [ARMv82a, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureDotProd]>; + def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, FeatureHasRetAddrStack, FeatureNEONForFP, diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index e29077266fc..c8c91e53c44 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -168,7 +168,7 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // relatively easy to exceed the thumb branch range within a TU. if (! ThumbIndirectPads.empty()) { OutStreamer->EmitAssemblerFlag(MCAF_Code16); - EmitAlignment(1); + EmitAlignment(Align(2)); for (std::pair &TIP : ThumbIndirectPads) { OutStreamer->EmitLabel(TIP.second); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBX) @@ -203,8 +203,8 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, switch (MO.getType()) { default: llvm_unreachable(""); case MachineOperand::MO_Register: { - unsigned Reg = MO.getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + Register Reg = MO.getReg(); + assert(Register::isPhysicalRegister(Reg)); assert(!MO.getSubReg() && "Subregs should be eliminated!"); if(ARM::GPRPairRegClass.contains(Reg)) { const MachineFunction &MF = *MI->getParent()->getParent(); @@ -275,7 +275,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, return false; case 'y': // Print a VFP single precision register as indexed double. if (MI->getOperand(OpNum).isReg()) { - unsigned Reg = MI->getOperand(OpNum).getReg(); + Register Reg = MI->getOperand(OpNum).getReg(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); // Find the 'd' register that has this 's' register as a sub-register, // and determine the lane number. @@ -302,14 +302,14 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, if (!MI->getOperand(OpNum).isReg()) return true; const MachineOperand &MO = MI->getOperand(OpNum); - unsigned RegBegin = MO.getReg(); + Register RegBegin = MO.getReg(); // This takes advantage of the 2 operand-ness of ldm/stm and that we've // already got the operands in registers that are operands to the // inline asm statement. O << "{"; if (ARM::GPRPairRegClass.contains(RegBegin)) { const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - unsigned Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0); + Register Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0); O << ARMInstPrinter::getRegisterName(Reg0) << ", "; RegBegin = TRI->getSubReg(RegBegin, ARM::gsub_1); } @@ -378,8 +378,8 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, if (!MO.isReg()) return true; const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - unsigned Reg = TRI->getSubReg(MO.getReg(), FirstHalf ? - ARM::gsub_0 : ARM::gsub_1); + Register Reg = + TRI->getSubReg(MO.getReg(), FirstHalf ? ARM::gsub_0 : ARM::gsub_1); O << ARMInstPrinter::getRegisterName(Reg); return false; } @@ -391,7 +391,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, const MachineOperand &MO = MI->getOperand(RegOp); if (!MO.isReg()) return true; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); O << ARMInstPrinter::getRegisterName(Reg); return false; } @@ -400,12 +400,12 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, case 'f': { // The high doubleword register of a NEON quad register. if (!MI->getOperand(OpNum).isReg()) return true; - unsigned Reg = MI->getOperand(OpNum).getReg(); + Register Reg = MI->getOperand(OpNum).getReg(); if (!ARM::QPRRegClass.contains(Reg)) return true; const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ? - ARM::dsub_0 : ARM::dsub_1); + Register SubReg = + TRI->getSubReg(Reg, ExtraCode[0] == 'e' ? ARM::dsub_0 : ARM::dsub_1); O << ARMInstPrinter::getRegisterName(SubReg); return false; } @@ -419,7 +419,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, return true; const MachineFunction &MF = *MI->getParent()->getParent(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if(!ARM::GPRPairRegClass.contains(Reg)) return false; Reg = TRI->getSubReg(Reg, ARM::gsub_1); @@ -526,7 +526,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { if (!Stubs.empty()) { // Switch with ".non_lazy_symbol_pointer" directive. OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); - EmitAlignment(2); + EmitAlignment(Align(4)); for (auto &Stub : Stubs) emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second); @@ -539,7 +539,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { if (!Stubs.empty()) { // Switch with ".non_lazy_symbol_pointer" directive. OutStreamer->SwitchSection(TLOFMacho.getThreadLocalPointerSection()); - EmitAlignment(2); + EmitAlignment(Align(4)); for (auto &Stub : Stubs) emitNonLazySymbolPointer(*OutStreamer, Stub.first, Stub.second); @@ -940,7 +940,7 @@ void ARMAsmPrinter::EmitJumpTableAddrs(const MachineInstr *MI) { // Make sure the Thumb jump table is 4-byte aligned. This will be a nop for // ARM mode tables. - EmitAlignment(2); + EmitAlignment(Align(4)); // Emit a label for the jump table. MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI); @@ -986,7 +986,7 @@ void ARMAsmPrinter::EmitJumpTableInsts(const MachineInstr *MI) { // Make sure the Thumb jump table is 4-byte aligned. This will be a nop for // ARM mode tables. - EmitAlignment(2); + EmitAlignment(Align(4)); // Emit a label for the jump table. MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI); @@ -1015,7 +1015,7 @@ void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI, unsigned JTI = MO1.getIndex(); if (Subtarget->isThumb1Only()) - EmitAlignment(2); + EmitAlignment(Align(4)); MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI); OutStreamer->EmitLabel(JTISymbol); @@ -1058,7 +1058,7 @@ void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI, OutStreamer->EmitDataRegion(MCDR_DataRegionEnd); // Make sure the next instruction is 2-byte aligned. - EmitAlignment(1); + EmitAlignment(Align(2)); } void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { @@ -1072,7 +1072,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { MF.getSubtarget().getRegisterInfo(); const MachineRegisterInfo &MachineRegInfo = MF.getRegInfo(); - unsigned FramePtr = TargetRegInfo->getFrameRegister(MF); + Register FramePtr = TargetRegInfo->getFrameRegister(MF); unsigned Opc = MI->getOpcode(); unsigned SrcReg, DstReg; @@ -1136,7 +1136,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { } // Check for registers that are remapped (for a Thumb1 prologue that // saves high registers). - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (unsigned RemappedReg = AFI->EHPrologueRemappedRegs.lookup(Reg)) Reg = RemappedReg; RegList.push_back(Reg); @@ -1326,7 +1326,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // So here we generate a bl to a small jump pad that does bx rN. // The jump pads are emitted after the function body. - unsigned TReg = MI->getOperand(0).getReg(); + Register TReg = MI->getOperand(0).getReg(); MCSymbol *TRegSym = nullptr; for (std::pair &TIP : ThumbIndirectPads) { if (TIP.first == TReg) { @@ -1663,8 +1663,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { case ARM::tTBH_JT: { bool Is8Bit = MI->getOpcode() == ARM::tTBB_JT; - unsigned Base = MI->getOperand(0).getReg(); - unsigned Idx = MI->getOperand(1).getReg(); + Register Base = MI->getOperand(0).getReg(); + Register Idx = MI->getOperand(1).getReg(); assert(MI->getOperand(1).isKill() && "We need the index register as scratch!"); // Multiply up idx if necessary. @@ -1844,8 +1844,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // b LSJLJEH // movs r0, #1 // LSJLJEH: - unsigned SrcReg = MI->getOperand(0).getReg(); - unsigned ValReg = MI->getOperand(1).getReg(); + Register SrcReg = MI->getOperand(0).getReg(); + Register ValReg = MI->getOperand(1).getReg(); MCSymbol *Label = OutContext.createTempSymbol("SJLJEH", false, true); OutStreamer->AddComment("eh_setjmp begin"); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr) @@ -1910,8 +1910,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // mov r0, #0 // add pc, pc, #0 // mov r0, #1 - unsigned SrcReg = MI->getOperand(0).getReg(); - unsigned ValReg = MI->getOperand(1).getReg(); + Register SrcReg = MI->getOperand(0).getReg(); + Register ValReg = MI->getOperand(1).getReg(); OutStreamer->AddComment("eh_setjmp begin"); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::ADDri) @@ -1967,8 +1967,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // ldr $scratch, [$src, #4] // ldr r7, [$src] // bx $scratch - unsigned SrcReg = MI->getOperand(0).getReg(); - unsigned ScratchReg = MI->getOperand(1).getReg(); + Register SrcReg = MI->getOperand(0).getReg(); + Register ScratchReg = MI->getOperand(1).getReg(); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12) .addReg(ARM::SP) .addReg(SrcReg) @@ -2027,8 +2027,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // ldr $scratch, [$src, #4] // ldr r7, [$src] // bx $scratch - unsigned SrcReg = MI->getOperand(0).getReg(); - unsigned ScratchReg = MI->getOperand(1).getReg(); + Register SrcReg = MI->getOperand(0).getReg(); + Register ScratchReg = MI->getOperand(1).getReg(); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi) .addReg(ScratchReg) @@ -2095,7 +2095,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // ldr.w sp, [$src, #8] // ldr.w pc, [$src, #4] - unsigned SrcReg = MI->getOperand(0).getReg(); + Register SrcReg = MI->getOperand(0).getReg(); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2LDRi12) .addReg(ARM::R11) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 222aa85856a..684cd1def97 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -172,9 +172,9 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress( const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0); const MachineOperand &Base = MI.getOperand(2); const MachineOperand &Offset = MI.getOperand(NumOps - 3); - unsigned WBReg = WB.getReg(); - unsigned BaseReg = Base.getReg(); - unsigned OffReg = Offset.getReg(); + Register WBReg = WB.getReg(); + Register BaseReg = Base.getReg(); + Register OffReg = Offset.getReg(); unsigned OffImm = MI.getOperand(NumOps - 2).getImm(); ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm(); switch (AddrMode) { @@ -276,8 +276,8 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress( if (LV) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); - if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { - unsigned Reg = MO.getReg(); + if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) { + Register Reg = MO.getReg(); LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); if (MO.isDef()) { @@ -966,8 +966,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, SmallSet DstRegs; #endif for (unsigned i = 0; i != SubRegs; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing); - unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing); + Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing); + Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing); assert(Dst && Src && "Bad sub-register"); #ifndef NDEBUG assert(!DstRegs.count(Src) && "destructive vector copy"); @@ -1019,7 +1019,7 @@ ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, if (!SubIdx) return MIB.addReg(Reg, State); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); return MIB.addReg(Reg, State, SubIdx); } @@ -1133,7 +1133,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, case 24: if (ARM::DTripleRegClass.hasSubClassEq(RC)) { // Use aligned spills if the stack can be realigned. - if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF) && + Subtarget.hasNEON()) { BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo)) .addFrameIndex(FI) .addImm(16) @@ -1155,7 +1156,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case 32: if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { - if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF) && + Subtarget.hasNEON()) { // FIXME: It's possible to only store part of the QQ register if the // spilled def has a sub-register index. BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo)) @@ -1337,7 +1339,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); } - if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + if (Register::isPhysicalRegister(DestReg)) MIB.addReg(DestReg, RegState::ImplicitDefine); } else llvm_unreachable("Unknown reg class!"); @@ -1368,7 +1370,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case 24: if (ARM::DTripleRegClass.hasSubClassEq(RC)) { - if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF) && + Subtarget.hasNEON()) { BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) .addFrameIndex(FI) .addImm(16) @@ -1382,7 +1385,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); - if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + if (Register::isPhysicalRegister(DestReg)) MIB.addReg(DestReg, RegState::ImplicitDefine); } } else @@ -1390,7 +1393,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case 32: if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { - if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF) && + Subtarget.hasNEON()) { BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) .addFrameIndex(FI) .addImm(16) @@ -1405,7 +1409,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); - if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + if (Register::isPhysicalRegister(DestReg)) MIB.addReg(DestReg, RegState::ImplicitDefine); } } else @@ -1425,7 +1429,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); - if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + if (Register::isPhysicalRegister(DestReg)) MIB.addReg(DestReg, RegState::ImplicitDefine); } else llvm_unreachable("Unknown reg class!"); @@ -1583,8 +1587,8 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { // Look for a copy between even S-registers. That is where we keep floats // when using NEON v2f32 instructions for f32 arithmetic. - unsigned DstRegS = MI.getOperand(0).getReg(); - unsigned SrcRegS = MI.getOperand(1).getReg(); + Register DstRegS = MI.getOperand(0).getReg(); + Register SrcRegS = MI.getOperand(1).getReg(); if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS)) return false; @@ -1794,12 +1798,11 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0, if (MI0.getNumOperands() != MI1.getNumOperands()) return false; - unsigned Addr0 = MI0.getOperand(1).getReg(); - unsigned Addr1 = MI1.getOperand(1).getReg(); + Register Addr0 = MI0.getOperand(1).getReg(); + Register Addr1 = MI1.getOperand(1).getReg(); if (Addr0 != Addr1) { - if (!MRI || - !TargetRegisterInfo::isVirtualRegister(Addr0) || - !TargetRegisterInfo::isVirtualRegister(Addr1)) + if (!MRI || !Register::isVirtualRegister(Addr0) || + !Register::isVirtualRegister(Addr1)) return false; // This assumes SSA form. @@ -2076,6 +2079,38 @@ isProfitableToIfCvt(MachineBasicBlock &TBB, return PredCost <= UnpredCost; } +unsigned +ARMBaseInstrInfo::extraSizeToPredicateInstructions(const MachineFunction &MF, + unsigned NumInsts) const { + // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions. + // ARM has a condition code field in every predicable instruction, using it + // doesn't change code size. + return Subtarget.isThumb2() ? divideCeil(NumInsts, 4) * 2 : 0; +} + +unsigned +ARMBaseInstrInfo::predictBranchSizeForIfCvt(MachineInstr &MI) const { + // If this branch is likely to be folded into the comparison to form a + // CB(N)Z, then removing it won't reduce code size at all, because that will + // just replace the CB(N)Z with a CMP. + if (MI.getOpcode() == ARM::t2Bcc && + findCMPToFoldIntoCBZ(&MI, &getRegisterInfo())) + return 0; + + unsigned Size = getInstSizeInBytes(MI); + + // For Thumb2, all branches are 32-bit instructions during the if conversion + // pass, but may be replaced with 16-bit instructions during size reduction. + // Since the branches considered by if conversion tend to be forward branches + // over small basic blocks, they are very likely to be in range for the + // narrow instructions, so we assume the final code size will be half what it + // currently is. + if (Subtarget.isThumb2()) + Size /= 2; + + return Size; +} + bool ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const { @@ -2141,7 +2176,7 @@ MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI, MachineInstr * ARMBaseInstrInfo::canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI, const TargetInstrInfo *TII) const { - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return nullptr; if (!MRI.hasOneNonDBGUse(Reg)) return nullptr; @@ -2163,7 +2198,7 @@ ARMBaseInstrInfo::canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI, // MI can't have any tied operands, that would conflict with predication. if (MO.isTied()) return nullptr; - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + if (Register::isPhysicalRegister(MO.getReg())) return nullptr; if (MO.isDef() && !MO.isDead()) return nullptr; @@ -2211,7 +2246,7 @@ ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI, // Find new register class to use. MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1); - unsigned DestReg = MI.getOperand(0).getReg(); + Register DestReg = MI.getOperand(0).getReg(); const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); if (!MRI.constrainRegClass(DestReg, PreviousClass)) return nullptr; @@ -2298,6 +2333,7 @@ static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { {ARM::tSUBSrr, ARM::tSUBrr}, {ARM::tSBCS, ARM::tSBC}, {ARM::tRSBS, ARM::tRSB}, + {ARM::tLSLSri, ARM::tLSLri}, {ARM::t2ADDSri, ARM::t2ADDri}, {ARM::t2ADDSrr, ARM::t2ADDrr}, @@ -2420,7 +2456,8 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineOperand &MO = MI->getOperand(i); RegList.push_back(MO); - if (MO.isReg() && TRI->getEncodingValue(MO.getReg()) < FirstRegEnc) + if (MO.isReg() && !MO.isImplicit() && + TRI->getEncodingValue(MO.getReg()) < FirstRegEnc) FirstRegEnc = TRI->getEncodingValue(MO.getReg()); } @@ -2430,7 +2467,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded; --CurRegEnc) { unsigned CurReg = RegClass->getRegister(CurRegEnc); - if (IsT1PushPop && CurReg > ARM::R7) + if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7)) continue; if (!IsPop) { // Pushing any register is completely harmless, mark the register involved @@ -3039,18 +3076,22 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( break; case ARM::VSELEQD: case ARM::VSELEQS: + case ARM::VSELEQH: CC = ARMCC::EQ; break; case ARM::VSELGTD: case ARM::VSELGTS: + case ARM::VSELGTH: CC = ARMCC::GT; break; case ARM::VSELGED: case ARM::VSELGES: + case ARM::VSELGEH: CC = ARMCC::GE; break; - case ARM::VSELVSS: case ARM::VSELVSD: + case ARM::VSELVSS: + case ARM::VSELVSH: CC = ARMCC::VS; break; } @@ -3271,9 +3312,9 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, } unsigned OpIdx = Commute ? 2 : 1; - unsigned Reg1 = UseMI.getOperand(OpIdx).getReg(); + Register Reg1 = UseMI.getOperand(OpIdx).getReg(); bool isKill = UseMI.getOperand(OpIdx).isKill(); - unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); + Register NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc), NewReg) .addReg(Reg1, getKillRegState(isKill)) @@ -3335,15 +3376,15 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, case ARM::LDRSB_POST: case ARM::LDRSH_POST: { - unsigned Rt = MI.getOperand(0).getReg(); - unsigned Rm = MI.getOperand(3).getReg(); + Register Rt = MI.getOperand(0).getReg(); + Register Rm = MI.getOperand(3).getReg(); return (Rt == Rm) ? 4 : 3; } case ARM::LDR_PRE_REG: case ARM::LDRB_PRE_REG: { - unsigned Rt = MI.getOperand(0).getReg(); - unsigned Rm = MI.getOperand(3).getReg(); + Register Rt = MI.getOperand(0).getReg(); + Register Rm = MI.getOperand(3).getReg(); if (Rt == Rm) return 3; unsigned ShOpVal = MI.getOperand(4).getImm(); @@ -3372,8 +3413,8 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, case ARM::LDRH_PRE: case ARM::STRH_PRE: { - unsigned Rt = MI.getOperand(0).getReg(); - unsigned Rm = MI.getOperand(3).getReg(); + Register Rt = MI.getOperand(0).getReg(); + Register Rm = MI.getOperand(3).getReg(); if (!Rm) return 2; if (Rt == Rm) @@ -3384,8 +3425,8 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, case ARM::LDR_POST_REG: case ARM::LDRB_POST_REG: case ARM::LDRH_POST: { - unsigned Rt = MI.getOperand(0).getReg(); - unsigned Rm = MI.getOperand(3).getReg(); + Register Rt = MI.getOperand(0).getReg(); + Register Rm = MI.getOperand(3).getReg(); return (Rt == Rm) ? 3 : 2; } @@ -3404,10 +3445,10 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, case ARM::LDRSB_PRE: case ARM::LDRSH_PRE: { - unsigned Rm = MI.getOperand(3).getReg(); + Register Rm = MI.getOperand(3).getReg(); if (Rm == 0) return 3; - unsigned Rt = MI.getOperand(0).getReg(); + Register Rt = MI.getOperand(0).getReg(); if (Rt == Rm) return 4; unsigned ShOpVal = MI.getOperand(4).getImm(); @@ -3422,9 +3463,9 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, } case ARM::LDRD: { - unsigned Rt = MI.getOperand(0).getReg(); - unsigned Rn = MI.getOperand(2).getReg(); - unsigned Rm = MI.getOperand(3).getReg(); + Register Rt = MI.getOperand(0).getReg(); + Register Rn = MI.getOperand(2).getReg(); + Register Rm = MI.getOperand(3).getReg(); if (Rm) return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4 : 3; @@ -3432,7 +3473,7 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, } case ARM::STRD: { - unsigned Rm = MI.getOperand(3).getReg(); + Register Rm = MI.getOperand(3).getReg(); if (Rm) return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 4 : 3; @@ -3448,9 +3489,9 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, return 4; case ARM::LDRD_PRE: { - unsigned Rt = MI.getOperand(0).getReg(); - unsigned Rn = MI.getOperand(3).getReg(); - unsigned Rm = MI.getOperand(4).getReg(); + Register Rt = MI.getOperand(0).getReg(); + Register Rn = MI.getOperand(3).getReg(); + Register Rm = MI.getOperand(4).getReg(); if (Rm) return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5 : 4; @@ -3458,13 +3499,13 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, } case ARM::t2LDRD_PRE: { - unsigned Rt = MI.getOperand(0).getReg(); - unsigned Rn = MI.getOperand(3).getReg(); + Register Rt = MI.getOperand(0).getReg(); + Register Rn = MI.getOperand(3).getReg(); return (Rt == Rn) ? 4 : 3; } case ARM::STRD_PRE: { - unsigned Rm = MI.getOperand(4).getReg(); + Register Rm = MI.getOperand(4).getReg(); if (Rm) return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 5 : 4; @@ -3495,8 +3536,8 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, return 2; case ARM::t2LDRDi8: { - unsigned Rt = MI.getOperand(0).getReg(); - unsigned Rn = MI.getOperand(2).getReg(); + Register Rt = MI.getOperand(0).getReg(); + Register Rn = MI.getOperand(2).getReg(); return (Rt == Rn) ? 3 : 2; } @@ -3745,7 +3786,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, } bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const { - unsigned BaseReg = MI.getOperand(0).getReg(); + Register BaseReg = MI.getOperand(0).getReg(); for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) { const auto &Op = MI.getOperand(i); if (Op.isReg() && Op.getReg() == BaseReg) @@ -4219,7 +4260,7 @@ int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return -1; const MachineOperand &DefMO = DefMI.getOperand(DefIdx); - unsigned Reg = DefMO.getReg(); + Register Reg = DefMO.getReg(); const MachineInstr *ResolvedDefMI = &DefMI; unsigned DefAdj = 0; @@ -4328,10 +4369,10 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); - const MachineSDNode *DefMN = dyn_cast(DefNode); + auto *DefMN = cast(DefNode); unsigned DefAlign = !DefMN->memoperands_empty() ? (*DefMN->memoperands_begin())->getAlignment() : 0; - const MachineSDNode *UseMN = dyn_cast(UseNode); + auto *UseMN = cast(UseNode); unsigned UseAlign = !UseMN->memoperands_empty() ? (*UseMN->memoperands_begin())->getAlignment() : 0; int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, @@ -4708,7 +4749,7 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI, if (MI.getOperand(i).isImplicit() || !MI.getOperand(i).isReg()) continue; - unsigned Reg = MI.getOperand(i).getReg(); + Register Reg = MI.getOperand(i).getReg(); if (Reg < ARM::R0 || Reg > ARM::R7) { if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) && !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) { @@ -4731,7 +4772,7 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, MachineBasicBlock &MBB = *MI->getParent(); DebugLoc DL = MI->getDebugLoc(); - unsigned Reg = MI->getOperand(0).getReg(); + Register Reg = MI->getOperand(0).getReg(); const GlobalValue *GV = cast((*MI->memoperands_begin())->getValue()); MachineInstrBuilder MIB; @@ -5104,7 +5145,7 @@ unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance( const MachineOperand &MO = MI.getOperand(OpNum); if (MO.readsReg()) return 0; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); int UseOp = -1; switch (MI.getOpcode()) { @@ -5134,7 +5175,7 @@ unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance( return 0; // We must be able to clobber the whole D-reg. - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { // Virtual register must be a def undef foo:ssub_0 operand. if (!MO.getSubReg() || MI.readsVirtualRegister(Reg)) return 0; @@ -5159,8 +5200,8 @@ void ARMBaseInstrInfo::breakPartialRegDependency( assert(TRI && "Need TRI instance"); const MachineOperand &MO = MI.getOperand(OpNum); - unsigned Reg = MO.getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + Register Reg = MO.getReg(); + assert(Register::isPhysicalRegister(Reg) && "Can't break virtual register dependencies."); unsigned DReg = Reg; @@ -5337,7 +5378,7 @@ MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br, // is not redefined between the cmp and the br. if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri) return nullptr; - unsigned Reg = CmpMI->getOperand(0).getReg(); + Register Reg = CmpMI->getOperand(0).getReg(); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg); if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0) @@ -5349,3 +5390,50 @@ MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br, return &*CmpMI; } + +unsigned llvm::ConstantMaterializationCost(unsigned Val, + const ARMSubtarget *Subtarget, + bool ForCodesize) { + if (Subtarget->isThumb()) { + if (Val <= 255) // MOV + return ForCodesize ? 2 : 1; + if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV + ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW + ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN + return ForCodesize ? 4 : 1; + if (Val <= 510) // MOV + ADDi8 + return ForCodesize ? 4 : 2; + if (~Val <= 255) // MOV + MVN + return ForCodesize ? 4 : 2; + if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL + return ForCodesize ? 4 : 2; + } else { + if (ARM_AM::getSOImmVal(Val) != -1) // MOV + return ForCodesize ? 4 : 1; + if (ARM_AM::getSOImmVal(~Val) != -1) // MVN + return ForCodesize ? 4 : 1; + if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW + return ForCodesize ? 4 : 1; + if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs + return ForCodesize ? 8 : 2; + } + if (Subtarget->useMovt()) // MOVW + MOVT + return ForCodesize ? 8 : 2; + return ForCodesize ? 8 : 3; // Literal pool load +} + +bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, + const ARMSubtarget *Subtarget, + bool ForCodesize) { + // Check with ForCodesize + unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize); + unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize); + if (Cost1 < Cost2) + return true; + if (Cost1 > Cost2) + return false; + + // If they are equal, try with !ForCodesize + return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) < + ConstantMaterializationCost(Val2, Subtarget, !ForCodesize); +} diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index c28983fcc15..c232b6f0b45 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -276,6 +276,10 @@ public: return NumCycles == 1; } + unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, + unsigned NumInsts) const override; + unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const override; + bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB) const override; @@ -601,7 +605,8 @@ bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, - const ARMBaseInstrInfo &TII); + const ARMBaseInstrInfo &TII, + const TargetRegisterInfo *TRI); /// Return true if Reg is defd between From and To bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From, @@ -620,6 +625,20 @@ void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond); void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond, unsigned Inactive); +/// Returns the number of instructions required to materialize the given +/// constant in a register, or 3 if a literal pool load is needed. +/// If ForCodesize is specified, an approximate cost in bytes is returned. +unsigned ConstantMaterializationCost(unsigned Val, + const ARMSubtarget *Subtarget, + bool ForCodesize = false); + +/// Returns true if Val1 has a lower Constant Materialization Cost than Val2. +/// Uses the cost from ConstantMaterializationCost, first with ForCodesize as +/// specified. If the scores are equal, return the comparison for !ForCodesize. +bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, + const ARMSubtarget *Subtarget, + bool ForCodesize = false); + } // end namespace llvm #endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index dc99b37742d..1eaf871867e 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -174,6 +174,12 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, : CSR_AAPCS_ThisReturn_RegMask; } +ArrayRef ARMBaseRegisterInfo::getIntraCallClobberedRegs( + const MachineFunction *MF) const { + static const MCPhysReg IntraCallClobberedRegs[] = {ARM::R12}; + return ArrayRef(IntraCallClobberedRegs); +} + BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { const ARMSubtarget &STI = MF.getSubtarget(); @@ -185,7 +191,7 @@ getReservedRegs(const MachineFunction &MF) const { markSuperRegs(Reserved, ARM::PC); markSuperRegs(Reserved, ARM::FPSCR); markSuperRegs(Reserved, ARM::APSR_NZCV); - if (TFI->hasFP(MF)) + if (TFI->hasFP(MF) || STI.isTargetDarwin()) markSuperRegs(Reserved, getFramePointerReg(STI)); if (hasBasePointer(MF)) markSuperRegs(Reserved, BasePtr); @@ -217,7 +223,7 @@ isAsmClobberable(const MachineFunction &MF, unsigned PhysReg) const { const TargetRegisterClass * ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, - const MachineFunction &) const { + const MachineFunction &MF) const { const TargetRegisterClass *Super = RC; TargetRegisterClass::sc_iterator I = RC->getSuperClasses(); do { @@ -225,11 +231,13 @@ ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, case ARM::GPRRegClassID: case ARM::SPRRegClassID: case ARM::DPRRegClassID: + case ARM::GPRPairRegClassID: + return Super; case ARM::QPRRegClassID: case ARM::QQPRRegClassID: case ARM::QQQQPRRegClassID: - case ARM::GPRPairRegClassID: - return Super; + if (MF.getSubtarget().hasNEON()) + return Super; } Super = *I++; } while (Super); @@ -317,7 +325,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, return false; unsigned PairedPhys = 0; - if (TargetRegisterInfo::isPhysicalRegister(Paired)) { + if (Register::isPhysicalRegister(Paired)) { PairedPhys = Paired; } else if (VRM && VRM->hasPhys(Paired)) { PairedPhys = getPairedGPR(VRM->getPhys(Paired), Odd, this); @@ -347,7 +355,7 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg, std::pair Hint = MRI->getRegAllocationHint(Reg); if ((Hint.first == (unsigned)ARMRI::RegPairOdd || Hint.first == (unsigned)ARMRI::RegPairEven) && - TargetRegisterInfo::isVirtualRegister(Hint.second)) { + Register::isVirtualRegister(Hint.second)) { // If 'Reg' is one of the even / odd register pair and it's now changed // (e.g. coalesced) into a different register. The other register of the // pair allocation hint must be updated to reflect the relationship @@ -357,7 +365,7 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg, // Make sure the pair has not already divorced. if (Hint.second == Reg) { MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg); - if (TargetRegisterInfo::isVirtualRegister(NewReg)) + if (Register::isVirtualRegister(NewReg)) MRI->setRegAllocationHint(NewReg, Hint.first == (unsigned)ARMRI::RegPairOdd ? ARMRI::RegPairEven : ARMRI::RegPairOdd, OtherReg); @@ -663,7 +671,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, Done = rewriteARMFrameIndex(MI, i, BaseReg, Off, TII); else { assert(AFI->isThumb2Function()); - Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII); + Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII, this); } assert(Done && "Unable to resolve frame index!"); (void)Done; @@ -775,7 +783,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Done = rewriteARMFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII); else { assert(AFI->isThumb2Function()); - Done = rewriteT2FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII); + Done = rewriteT2FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII, this); } if (Done) return; @@ -783,21 +791,32 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // If we get here, the immediate doesn't fit into the instruction. We folded // as much as possible above, handle the rest, providing a register that is // SP+LargeImm. - assert((Offset || - (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 || - (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6) && - "This code isn't needed if offset already handled!"); + assert( + (Offset || + (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 || + (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6 || + (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrModeT2_i7 || + (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrModeT2_i7s2 || + (MI.getDesc().TSFlags & ARMII::AddrModeMask) == + ARMII::AddrModeT2_i7s4) && + "This code isn't needed if offset already handled!"); unsigned ScratchReg = 0; int PIdx = MI.findFirstPredOperandIdx(); ARMCC::CondCodes Pred = (PIdx == -1) ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); Register PredReg = (PIdx == -1) ? Register() : MI.getOperand(PIdx+1).getReg(); - if (Offset == 0) + + const MCInstrDesc &MCID = MI.getDesc(); + const TargetRegisterClass *RegClass = + TII.getRegClass(MCID, FIOperandNum, this, *MI.getParent()->getParent()); + + if (Offset == 0 && + (Register::isVirtualRegister(FrameReg) || RegClass->contains(FrameReg))) // Must be addrmode4/6. MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, false); else { - ScratchReg = MF.getRegInfo().createVirtualRegister(&ARM::GPRRegClass); + ScratchReg = MF.getRegInfo().createVirtualRegister(RegClass); if (!AFI->isThumbFunction()) emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, Pred, PredReg, TII); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 7e2c72b4d71..477f3ad0a9a 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -129,6 +129,9 @@ public: const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const; + ArrayRef + getIntraCallClobberedRegs(const MachineFunction *MF) const override; + BitVector getReservedRegs(const MachineFunction &MF) const override; bool isAsmClobberable(const MachineFunction &MF, unsigned PhysReg) const override; @@ -176,8 +179,6 @@ public: Register getFrameRegister(const MachineFunction &MF) const override; unsigned getBaseRegister() const { return BasePtr; } - bool isLowRegister(unsigned Reg) const; - /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. diff --git a/lib/Target/ARM/ARMBasicBlockInfo.cpp b/lib/Target/ARM/ARMBasicBlockInfo.cpp index 2de90e816b3..00a2231f59e 100644 --- a/lib/Target/ARM/ARMBasicBlockInfo.cpp +++ b/lib/Target/ARM/ARMBasicBlockInfo.cpp @@ -6,14 +6,16 @@ // //===----------------------------------------------------------------------===// +#include "ARMBasicBlockInfo.h" #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMBasicBlockInfo.h" #include "ARMMachineFunctionInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/Support/Debug.h" #include #define DEBUG_TYPE "arm-bb-utils" @@ -47,7 +49,7 @@ void ARMBasicBlockUtils::computeBlockSize(MachineBasicBlock *MBB) { BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; BBI.Size = 0; BBI.Unalign = 0; - BBI.PostAlign = 0; + BBI.PostAlign = Align::None(); for (MachineInstr &I : *MBB) { BBI.Size += TII->getInstSizeInBytes(I); @@ -62,8 +64,8 @@ void ARMBasicBlockUtils::computeBlockSize(MachineBasicBlock *MBB) { // tBR_JTr contains a .align 2 directive. if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) { - BBI.PostAlign = 2; - MBB->getParent()->ensureAlignment(2); + BBI.PostAlign = Align(4); + MBB->getParent()->ensureAlignment(Align(4)); } } @@ -126,9 +128,9 @@ void ARMBasicBlockUtils::adjustBBOffsetsAfter(MachineBasicBlock *BB) { for(unsigned i = BBNum + 1, e = MF.getNumBlockIDs(); i < e; ++i) { // Get the offset and known bits at the end of the layout predecessor. // Include the alignment of the current block. - unsigned LogAlign = MF.getBlockNumbered(i)->getAlignment(); - unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); - unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); + const Align Align = MF.getBlockNumbered(i)->getAlignment(); + const unsigned Offset = BBInfo[i - 1].postOffset(Align); + const unsigned KnownBits = BBInfo[i - 1].postKnownBits(Align); // This is where block i begins. Stop if the offset is already correct, // and we have updated 2 blocks. This is the maximum number of blocks diff --git a/lib/Target/ARM/ARMBasicBlockInfo.h b/lib/Target/ARM/ARMBasicBlockInfo.h index 400bba351ce..13df399ed99 100644 --- a/lib/Target/ARM/ARMBasicBlockInfo.h +++ b/lib/Target/ARM/ARMBasicBlockInfo.h @@ -21,17 +21,18 @@ namespace llvm { +struct BasicBlockInfo; using BBInfoVector = SmallVectorImpl; /// UnknownPadding - Return the worst case padding that could result from /// unknown offset bits. This does not include alignment padding caused by /// known offset bits. /// -/// @param LogAlign log2(alignment) +/// @param Alignment alignment /// @param KnownBits Number of known low offset bits. -inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { - if (KnownBits < LogAlign) - return (1u << LogAlign) - (1u << KnownBits); +inline unsigned UnknownPadding(Align Alignment, unsigned KnownBits) { + if (KnownBits < Log2(Alignment)) + return Alignment.value() - (1ull << KnownBits); return 0; } @@ -65,10 +66,9 @@ struct BasicBlockInfo { /// multiple of 1 << Unalign. uint8_t Unalign = 0; - /// PostAlign - When non-zero, the block terminator contains a .align - /// directive, so the end of the block is aligned to 1 << PostAlign - /// bytes. - uint8_t PostAlign = 0; + /// PostAlign - When > 1, the block terminator contains a .align + /// directive, so the end of the block is aligned to PostAlign bytes. + Align PostAlign; BasicBlockInfo() = default; @@ -84,16 +84,16 @@ struct BasicBlockInfo { return Bits; } - /// Compute the offset immediately following this block. If LogAlign is + /// Compute the offset immediately following this block. If Align is /// specified, return the offset the successor block will get if it has /// this alignment. - unsigned postOffset(unsigned LogAlign = 0) const { + unsigned postOffset(Align Alignment = Align::None()) const { unsigned PO = Offset + Size; - unsigned LA = std::max(unsigned(PostAlign), LogAlign); - if (!LA) + const Align PA = std::max(PostAlign, Alignment); + if (PA == Align::None()) return PO; // Add alignment padding from the terminator. - return PO + UnknownPadding(LA, internalKnownBits()); + return PO + UnknownPadding(PA, internalKnownBits()); } /// Compute the number of known low bits of postOffset. If this block @@ -101,9 +101,8 @@ struct BasicBlockInfo { /// instruction alignment. An aligned terminator may increase the number /// of know bits. /// If LogAlign is given, also consider the alignment of the next block. - unsigned postKnownBits(unsigned LogAlign = 0) const { - return std::max(std::max(unsigned(PostAlign), LogAlign), - internalKnownBits()); + unsigned postKnownBits(Align Align = Align::None()) const { + return std::max(Log2(std::max(PostAlign, Align)), internalKnownBits()); } }; diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp index 0cbe6e1871e..d3b595ce832 100644 --- a/lib/Target/ARM/ARMCallLowering.cpp +++ b/lib/Target/ARM/ARMCallLowering.cpp @@ -90,6 +90,8 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { MachineInstrBuilder &MIB, CCAssignFn *AssignFn) : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} + bool isIncomingArgumentHandler() const override { return false; } + Register getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) && @@ -169,8 +171,9 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, - const CallLowering::ArgInfo &Info, CCState &State) override { - if (AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State)) + const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, + CCState &State) override { + if (AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State)) return true; StackSize = @@ -199,9 +202,8 @@ void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg, if (SplitVTs.size() == 1) { // Even if there is no splitting to do, we still want to replace the // original type (e.g. pointer type -> integer). - auto Flags = OrigArg.Flags; - unsigned OriginalAlignment = DL.getABITypeAlignment(OrigArg.Ty); - Flags.setOrigAlign(OriginalAlignment); + auto Flags = OrigArg.Flags[0]; + Flags.setOrigAlign(Align(DL.getABITypeAlignment(OrigArg.Ty))); SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx), Flags, OrigArg.IsFixed); return; @@ -211,10 +213,9 @@ void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg, for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) { EVT SplitVT = SplitVTs[i]; Type *SplitTy = SplitVT.getTypeForEVT(Ctx); - auto Flags = OrigArg.Flags; + auto Flags = OrigArg.Flags[0]; - unsigned OriginalAlignment = DL.getABITypeAlignment(SplitTy); - Flags.setOrigAlign(OriginalAlignment); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(SplitTy))); bool NeedsConsecutiveRegisters = TLI.functionArgumentNeedsConsecutiveRegisters( @@ -286,7 +287,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { CCAssignFn AssignFn) : ValueHandler(MIRBuilder, MRI, AssignFn) {} - bool isArgumentHandler() const override { return true; } + bool isIncomingArgumentHandler() const override { return true; } Register getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { @@ -298,7 +299,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { int FI = MFI.CreateFixedObject(Size, Offset, true); MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); - unsigned AddrReg = + Register AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(MPO.getAddrSpace(), 32)); MIRBuilder.buildFrameIndex(AddrReg, FI); @@ -405,6 +406,7 @@ struct FormalArgHandler : public IncomingValueHandler { : IncomingValueHandler(MIRBuilder, MRI, AssignFn) {} void markPhysRegUsed(unsigned PhysReg) override { + MIRBuilder.getMRI()->addLiveIn(PhysReg); MIRBuilder.getMBB().addLiveIn(PhysReg); } }; @@ -498,11 +500,7 @@ unsigned getCallOpcode(const ARMSubtarget &STI, bool isDirect) { } } // end anonymous namespace -bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, - CallingConv::ID CallConv, - const MachineOperand &Callee, - const ArgInfo &OrigRet, - ArrayRef OrigArgs) const { +bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const { MachineFunction &MF = MIRBuilder.getMF(); const auto &TLI = *getTLI(); const auto &DL = MF.getDataLayout(); @@ -520,7 +518,7 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // Create the call instruction so we can add the implicit uses of arg // registers, but don't insert it yet. - bool IsDirect = !Callee.isReg(); + bool IsDirect = !Info.Callee.isReg(); auto CallOpcode = getCallOpcode(STI, IsDirect); auto MIB = MIRBuilder.buildInstrNoInsert(CallOpcode); @@ -528,35 +526,35 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (IsThumb) MIB.add(predOps(ARMCC::AL)); - MIB.add(Callee); + MIB.add(Info.Callee); if (!IsDirect) { - auto CalleeReg = Callee.getReg(); - if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg)) { + auto CalleeReg = Info.Callee.getReg(); + if (CalleeReg && !Register::isPhysicalRegister(CalleeReg)) { unsigned CalleeIdx = IsThumb ? 2 : 0; MIB->getOperand(CalleeIdx).setReg(constrainOperandRegClass( MF, *TRI, MRI, *STI.getInstrInfo(), *STI.getRegBankInfo(), - *MIB.getInstr(), MIB->getDesc(), Callee, CalleeIdx)); + *MIB.getInstr(), MIB->getDesc(), Info.Callee, CalleeIdx)); } } - MIB.addRegMask(TRI->getCallPreservedMask(MF, CallConv)); + MIB.addRegMask(TRI->getCallPreservedMask(MF, Info.CallConv)); bool IsVarArg = false; SmallVector ArgInfos; - for (auto Arg : OrigArgs) { + for (auto Arg : Info.OrigArgs) { if (!isSupportedType(DL, TLI, Arg.Ty)) return false; if (!Arg.IsFixed) IsVarArg = true; - if (Arg.Flags.isByVal()) + if (Arg.Flags[0].isByVal()) return false; splitToValueTypes(Arg, ArgInfos, MF); } - auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, IsVarArg); + auto ArgAssignFn = TLI.CCAssignFnForCall(Info.CallConv, IsVarArg); OutgoingValueHandler ArgHandler(MIRBuilder, MRI, MIB, ArgAssignFn); if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler)) return false; @@ -564,13 +562,13 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // Now we can add the actual call instruction to the correct basic block. MIRBuilder.insertInstr(MIB); - if (!OrigRet.Ty->isVoidTy()) { - if (!isSupportedType(DL, TLI, OrigRet.Ty)) + if (!Info.OrigRet.Ty->isVoidTy()) { + if (!isSupportedType(DL, TLI, Info.OrigRet.Ty)) return false; ArgInfos.clear(); - splitToValueTypes(OrigRet, ArgInfos, MF); - auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, IsVarArg); + splitToValueTypes(Info.OrigRet, ArgInfos, MF); + auto RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv, IsVarArg); CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn); if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler)) return false; diff --git a/lib/Target/ARM/ARMCallLowering.h b/lib/Target/ARM/ARMCallLowering.h index 794127b5ebc..ddbc9feb90e 100644 --- a/lib/Target/ARM/ARMCallLowering.h +++ b/lib/Target/ARM/ARMCallLowering.h @@ -38,9 +38,8 @@ public: bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef> VRegs) const override; - bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, - const MachineOperand &Callee, const ArgInfo &OrigRet, - ArrayRef OrigArgs) const override; + bool lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const override; private: bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val, diff --git a/lib/Target/ARM/ARMCallingConv.cpp b/lib/Target/ARM/ARMCallingConv.cpp index 5ede7c67f7c..92ebc542b42 100644 --- a/lib/Target/ARM/ARMCallingConv.cpp +++ b/lib/Target/ARM/ARMCallingConv.cpp @@ -193,7 +193,7 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT, // Try to allocate a contiguous block of registers, each of the correct // size to hold one member. auto &DL = State.getMachineFunction().getDataLayout(); - unsigned StackAlign = DL.getStackAlignment(); + unsigned StackAlign = DL.getStackAlignment().value(); unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign); ArrayRef RegList; diff --git a/lib/Target/ARM/ARMCodeGenPrepare.cpp b/lib/Target/ARM/ARMCodeGenPrepare.cpp index 2fc5f4aaab5..1c2c8aef55b 100644 --- a/lib/Target/ARM/ARMCodeGenPrepare.cpp +++ b/lib/Target/ARM/ARMCodeGenPrepare.cpp @@ -179,16 +179,12 @@ public: } static bool GenerateSignBits(Value *V) { - if (auto *Arg = dyn_cast(V)) - return Arg->hasSExtAttr(); - if (!isa(V)) return false; unsigned Opc = cast(V)->getOpcode(); return Opc == Instruction::AShr || Opc == Instruction::SDiv || - Opc == Instruction::SRem || Opc == Instruction::SExt || - Opc == Instruction::SIToFP; + Opc == Instruction::SRem || Opc == Instruction::SExt; } static bool EqualTypeSize(Value *V) { @@ -806,54 +802,48 @@ void IRPromoter::Mutate(Type *OrigTy, /// return value is zeroext. We don't allow opcodes that can introduce sign /// bits. bool ARMCodeGenPrepare::isSupportedValue(Value *V) { - if (auto *I = dyn_cast(V)) { - // Now that we allow small types than TypeSize, only allow icmp of - // TypeSize because they will require a trunc to be legalised. - // TODO: Allow icmp of smaller types, and calculate at the end - // whether the transform would be beneficial. - if (isa(I->getOperand(0)->getType())) + if (auto *I = dyn_cast(V)) { + switch (I->getOpcode()) { + default: + return isa(I) && isSupportedType(I) && + !GenerateSignBits(I); + case Instruction::GetElementPtr: + case Instruction::Store: + case Instruction::Br: + case Instruction::Switch: return true; - return EqualTypeSize(I->getOperand(0)); - } - - if (GenerateSignBits(V)) { - LLVM_DEBUG(dbgs() << "ARM CGP: No, instruction can generate sign bits.\n"); - return false; - } - - // Memory instructions - if (isa(V) || isa(V)) - return true; - - // Branches and targets. - if( isa(V) || isa(V) || isa(V)) - return true; - - // Non-instruction values that we can handle. - if ((isa(V) && !isa(V)) || isa(V)) + case Instruction::PHI: + case Instruction::Select: + case Instruction::Ret: + case Instruction::Load: + case Instruction::Trunc: + case Instruction::BitCast: + return isSupportedType(I); + case Instruction::ZExt: + return isSupportedType(I->getOperand(0)); + case Instruction::ICmp: + // Now that we allow small types than TypeSize, only allow icmp of + // TypeSize because they will require a trunc to be legalised. + // TODO: Allow icmp of smaller types, and calculate at the end + // whether the transform would be beneficial. + if (isa(I->getOperand(0)->getType())) + return true; + return EqualTypeSize(I->getOperand(0)); + case Instruction::Call: { + // Special cases for calls as we need to check for zeroext + // TODO We should accept calls even if they don't have zeroext, as they + // can still be sinks. + auto *Call = cast(I); + return isSupportedType(Call) && + Call->hasRetAttr(Attribute::AttrKind::ZExt); + } + } + } else if (isa(V) && !isa(V)) { + return isSupportedType(V); + } else if (isa(V)) return isSupportedType(V); - if (isa(V) || isa(V) || isa(V) || - isa(V)) - return isSupportedType(V); - - if (auto *Cast = dyn_cast(V)) - return isSupportedType(Cast) || isSupportedType(Cast->getOperand(0)); - - // Special cases for calls as we need to check for zeroext - // TODO We should accept calls even if they don't have zeroext, as they can - // still be sinks. - if (auto *Call = dyn_cast(V)) - return isSupportedType(Call) && - Call->hasRetAttr(Attribute::AttrKind::ZExt); - - if (!isa(V)) - return false; - - if (!isSupportedType(V)) - return false; - - return true; + return isa(V); } /// Check that the type of V would be promoted and that the original type is diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 60e5d7bf609..24ca25f73e9 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -26,8 +26,10 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -69,6 +71,7 @@ STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk"); STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed"); STATISTIC(NumJTMoved, "Number of jump table destination blocks moved"); STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted"); +STATISTIC(NumLEInserted, "Number of LE backwards branches inserted"); static cl::opt AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), @@ -212,6 +215,7 @@ namespace { const ARMBaseInstrInfo *TII; const ARMSubtarget *STI; ARMFunctionInfo *AFI; + MachineDominatorTree *DT = nullptr; bool isThumb; bool isThumb1; bool isThumb2; @@ -224,6 +228,11 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); @@ -238,7 +247,7 @@ namespace { void doInitialJumpTablePlacement(std::vector &CPEMIs); bool BBHasFallthrough(MachineBasicBlock *MBB); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); - unsigned getCPELogAlign(const MachineInstr *CPEMI); + Align getCPEAlign(const MachineInstr *CPEMI); void scanFunctionJumpTables(); void initializeFunctionInfo(const std::vector &CPEMIs); MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); @@ -327,8 +336,7 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() { const BasicBlockInfo &BBI = BBInfo[J]; dbgs() << format("%08x %bb.%u\t", BBI.Offset, J) << " kb=" << unsigned(BBI.KnownBits) - << " ua=" << unsigned(BBI.Unalign) - << " pa=" << unsigned(BBI.PostAlign) + << " ua=" << unsigned(BBI.Unalign) << " pa=" << Log2(BBI.PostAlign) << format(" size=%#x\n", BBInfo[J].Size); } }); @@ -349,6 +357,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { isPositionIndependentOrROPI = STI->getTargetLowering()->isPositionIndependent() || STI->isROPI(); AFI = MF->getInfo(); + DT = &getAnalysis(); isThumb = AFI->isThumbFunction(); isThumb1 = AFI->isThumb1OnlyFunction(); @@ -357,9 +366,6 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { HasFarJump = false; bool GenerateTBB = isThumb2 || (isThumb1 && SynthesizeThumb1TBB); - // This pass invalidates liveness information when it splits basic blocks. - MF->getRegInfo().invalidateLiveness(); - // Renumber all of the machine basic blocks in the function, guaranteeing that // the numbers agree with the position of the block in the function. MF->RenumberBlocks(); @@ -398,7 +404,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // Functions with jump tables need an alignment of 4 because they use the ADR // instruction, which aligns the PC to 4 bytes before adding an offset. if (!T2JumpTables.empty()) - MF->ensureAlignment(2); + MF->ensureAlignment(Align(4)); /// Remove dead constant pool entries. MadeChange |= removeUnusedCPEntries(); @@ -487,8 +493,9 @@ ARMConstantIslands::doInitialConstPlacement(std::vector &CPEMIs) MachineBasicBlock *BB = MF->CreateMachineBasicBlock(); MF->push_back(BB); - // MachineConstantPool measures alignment in bytes. We measure in log2(bytes). - unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment()); + // MachineConstantPool measures alignment in bytes. + const Align MaxAlign(MCP->getConstantPoolAlignment()); + const unsigned MaxLogAlign = Log2(MaxAlign); // Mark the basic block as required by the const-pool. BB->setAlignment(MaxAlign); @@ -501,7 +508,8 @@ ARMConstantIslands::doInitialConstPlacement(std::vector &CPEMIs) // alignment of all entries as long as BB is sufficiently aligned. Keep // track of the insertion point for each alignment. We are going to bucket // sort the entries as they are created. - SmallVector InsPoint(MaxAlign + 1, BB->end()); + SmallVector InsPoint(MaxLogAlign + 1, + BB->end()); // Add all of the constants from the constant pool to the end block, use an // identity mapping of CPI's to CPE's. @@ -526,7 +534,7 @@ ARMConstantIslands::doInitialConstPlacement(std::vector &CPEMIs) // Ensure that future entries with higher alignment get inserted before // CPEMI. This is bucket sort with iterators. - for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a) + for (unsigned a = LogAlign + 1; a <= MaxLogAlign; ++a) if (InsPoint[a] == InsAt) InsPoint[a] = CPEMI; @@ -640,29 +648,27 @@ ARMConstantIslands::findConstPoolEntry(unsigned CPI, return nullptr; } -/// getCPELogAlign - Returns the required alignment of the constant pool entry -/// represented by CPEMI. Alignment is measured in log2(bytes) units. -unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { +/// getCPEAlign - Returns the required alignment of the constant pool entry +/// represented by CPEMI. +Align ARMConstantIslands::getCPEAlign(const MachineInstr *CPEMI) { switch (CPEMI->getOpcode()) { case ARM::CONSTPOOL_ENTRY: break; case ARM::JUMPTABLE_TBB: - return isThumb1 ? 2 : 0; + return isThumb1 ? Align(4) : Align(1); case ARM::JUMPTABLE_TBH: - return isThumb1 ? 2 : 1; + return isThumb1 ? Align(4) : Align(2); case ARM::JUMPTABLE_INSTS: - return 1; + return Align(2); case ARM::JUMPTABLE_ADDRS: - return 2; + return Align(4); default: llvm_unreachable("unknown constpool entry kind"); } unsigned CPI = getCombinedIndex(CPEMI); assert(CPI < MCP->getConstants().size() && "Invalid constant pool index."); - unsigned Align = MCP->getConstants()[CPI].getAlignment(); - assert(isPowerOf2_32(Align) && "Invalid CPE alignment"); - return Log2_32(Align); + return Align(MCP->getConstants()[CPI].getAlignment()); } /// scanFunctionJumpTables - Do a scan of the function, building up @@ -687,7 +693,7 @@ initializeFunctionInfo(const std::vector &CPEMIs) { BBInfoVector &BBInfo = BBUtils->getBBInfo(); // The known bits of the entry block offset are determined by the function // alignment. - BBInfo.front().KnownBits = MF->getAlignment(); + BBInfo.front().KnownBits = Log2(MF->getAlignment()); // Compute block offsets and known bits. BBUtils->adjustBBOffsetsAfter(&MF->front()); @@ -824,11 +830,6 @@ initializeFunctionInfo(const std::vector &CPEMIs) { Scale = 2; // +-(offset_8*2) NegOk = true; break; - - case ARM::tLDRHi: - Bits = 5; - Scale = 2; // +(offset_5*2) - break; } // Remember that this is a user of a CP entry. @@ -885,6 +886,13 @@ void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) { MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { MachineBasicBlock *OrigBB = MI->getParent(); + // Collect liveness information at MI. + LivePhysRegs LRs(*MF->getSubtarget().getRegisterInfo()); + LRs.addLiveOuts(*OrigBB); + auto LivenessEnd = ++MachineBasicBlock::iterator(MI).getReverse(); + for (MachineInstr &LiveMI : make_range(OrigBB->rbegin(), LivenessEnd)) + LRs.stepBackward(LiveMI); + // Create a new MBB for the code after the OrigBB. MachineBasicBlock *NewBB = MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); @@ -913,6 +921,12 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { // OrigBB branches to NewBB. OrigBB->addSuccessor(NewBB); + // Update live-in information in the new block. + MachineRegisterInfo &MRI = MF->getRegInfo(); + for (MCPhysReg L : LRs) + if (!MRI.isReserved(L)) + NewBB->addLiveIn(L); + // Update internal data structures to account for the newly inserted MBB. // This is almost the same as updateForInsertedWaterBlock, except that // the Water goes after OrigBB, not NewBB. @@ -1007,13 +1021,13 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset, MachineBasicBlock* Water, CPUser &U, unsigned &Growth) { BBInfoVector &BBInfo = BBUtils->getBBInfo(); - unsigned CPELogAlign = getCPELogAlign(U.CPEMI); - unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign); - unsigned NextBlockOffset, NextBlockAlignment; + const Align CPEAlign = getCPEAlign(U.CPEMI); + const unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPEAlign); + unsigned NextBlockOffset; + Align NextBlockAlignment; MachineFunction::const_iterator NextBlock = Water->getIterator(); if (++NextBlock == MF->end()) { NextBlockOffset = BBInfo[Water->getNumber()].postOffset(); - NextBlockAlignment = 0; } else { NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset; NextBlockAlignment = NextBlock->getAlignment(); @@ -1028,13 +1042,13 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset, Growth = CPEEnd - NextBlockOffset; // Compute the padding that would go at the end of the CPE to align the next // block. - Growth += OffsetToAlignment(CPEEnd, 1ULL << NextBlockAlignment); + Growth += offsetToAlignment(CPEEnd, NextBlockAlignment); // If the CPE is to be inserted before the instruction, that will raise // the offset of the instruction. Also account for unknown alignment padding // in blocks between CPE and the user. if (CPEOffset < UserOffset) - UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign); + UserOffset += Growth + UnknownPadding(MF->getAlignment(), Log2(CPEAlign)); } else // CPE fits in existing padding. Growth = 0; @@ -1200,8 +1214,8 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, // inserting islands between BB0 and BB1 makes other accesses out of range. MachineBasicBlock *UserBB = U.MI->getParent(); BBInfoVector &BBInfo = BBUtils->getBBInfo(); - unsigned MinNoSplitDisp = - BBInfo[UserBB->getNumber()].postOffset(getCPELogAlign(U.CPEMI)); + const Align CPEAlign = getCPEAlign(U.CPEMI); + unsigned MinNoSplitDisp = BBInfo[UserBB->getNumber()].postOffset(CPEAlign); if (CloserWater && MinNoSplitDisp > U.getMaxDisp() / 2) return false; for (water_iterator IP = std::prev(WaterList.end()), B = WaterList.begin();; @@ -1254,7 +1268,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; - unsigned CPELogAlign = getCPELogAlign(CPEMI); + const Align CPEAlign = getCPEAlign(CPEMI); MachineBasicBlock *UserMBB = UserMI->getParent(); BBInfoVector &BBInfo = BBUtils->getBBInfo(); const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()]; @@ -1267,7 +1281,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, // Size of branch to insert. unsigned Delta = isThumb1 ? 2 : 4; // Compute the offset where the CPE will begin. - unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta; + unsigned CPEOffset = UserBBI.postOffset(CPEAlign) + Delta; if (isOffsetInRange(UserOffset, CPEOffset, U)) { LLVM_DEBUG(dbgs() << "Split at end of " << printMBBReference(*UserMBB) @@ -1308,11 +1322,11 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, // Try to split the block so it's fully aligned. Compute the latest split // point where we can add a 4-byte branch instruction, and then align to - // LogAlign which is the largest possible alignment in the function. - unsigned LogAlign = MF->getAlignment(); - assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry"); + // Align which is the largest possible alignment in the function. + const Align Align = MF->getAlignment(); + assert(Align >= CPEAlign && "Over-aligned constant pool entry"); unsigned KnownBits = UserBBI.internalKnownBits(); - unsigned UPad = UnknownPadding(LogAlign, KnownBits); + unsigned UPad = UnknownPadding(Align, KnownBits); unsigned BaseInsertOffset = UserOffset + U.getMaxDisp() - UPad; LLVM_DEBUG(dbgs() << format("Split in middle of big block before %#x", BaseInsertOffset)); @@ -1323,7 +1337,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, BaseInsertOffset -= 4; LLVM_DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset) - << " la=" << LogAlign << " kb=" << KnownBits + << " la=" << Log2(Align) << " kb=" << KnownBits << " up=" << UPad << '\n'); // This could point off the end of the block if we've already got constant @@ -1337,6 +1351,28 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, BaseInsertOffset = std::max(UserBBI.postOffset() - UPad - 8, UserOffset + TII->getInstSizeInBytes(*UserMI) + 1); + // If the CP is referenced(ie, UserOffset) is in first four instructions + // after IT, this recalculated BaseInsertOffset could be in the middle of + // an IT block. If it is, change the BaseInsertOffset to just after the + // IT block. This still make the CP Entry is in range becuase of the + // following reasons. + // 1. The initial BaseseInsertOffset calculated is (UserOffset + + // U.getMaxDisp() - UPad). + // 2. An IT block is only at most 4 instructions plus the "it" itself (18 + // bytes). + // 3. All the relevant instructions support much larger Maximum + // displacement. + MachineBasicBlock::iterator I = UserMI; + ++I; + for (unsigned Offset = UserOffset + TII->getInstSizeInBytes(*UserMI), + PredReg = 0; + I->getOpcode() != ARM::t2IT && + getITInstrPredicate(*I, PredReg) != ARMCC::AL; + Offset += TII->getInstSizeInBytes(*I), I = std::next(I)) { + BaseInsertOffset = + std::max(BaseInsertOffset, Offset + TII->getInstSizeInBytes(*I) + 1); + assert(I != UserMBB->end() && "Fell off end of block"); + } LLVM_DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset)); } unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad + @@ -1354,8 +1390,8 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, CPUser &U = CPUsers[CPUIndex]; if (!isOffsetInRange(Offset, EndInsertOffset, U)) { // Shift intertion point by one unit of alignment so it is within reach. - BaseInsertOffset -= 1u << LogAlign; - EndInsertOffset -= 1u << LogAlign; + BaseInsertOffset -= Align.value(); + EndInsertOffset -= Align.value(); } // This is overly conservative, as we don't account for CPEMIs being // reused within the block, but it doesn't matter much. Also assume CPEs @@ -1397,9 +1433,10 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, } // We really must not split an IT block. - LLVM_DEBUG(unsigned PredReg; assert( - !isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL)); - +#ifndef NDEBUG + unsigned PredReg; + assert(!isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL); +#endif NewMBB = splitBlockBeforeInstr(&*MI); } @@ -1464,9 +1501,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex, // Always align the new block because CP entries can be smaller than 4 // bytes. Be careful not to decrease the existing alignment, e.g. NewMBB may // be an already aligned constant pool block. - const unsigned Align = isThumb ? 1 : 2; - if (NewMBB->getAlignment() < Align) - NewMBB->setAlignment(Align); + const Align Alignment = isThumb ? Align(2) : Align(4); + if (NewMBB->getAlignment() < Alignment) + NewMBB->setAlignment(Alignment); // Remove the original WaterList entry; we want subsequent insertions in // this vicinity to go after the one we're about to insert. This @@ -1495,7 +1532,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex, decrementCPEReferenceCount(CPI, CPEMI); // Mark the basic block as aligned as required by the const-pool entry. - NewIsland->setAlignment(getCPELogAlign(U.CPEMI)); + NewIsland->setAlignment(getCPEAlign(U.CPEMI)); // Increase the size of the island block to account for the new entry. BBUtils->adjustBBSize(NewIsland, Size); @@ -1529,10 +1566,11 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) { BBInfo[CPEBB->getNumber()].Size = 0; // This block no longer needs to be aligned. - CPEBB->setAlignment(0); - } else + CPEBB->setAlignment(Align::None()); + } else { // Entries are sorted by descending alignment, so realign from the front. - CPEBB->setAlignment(getCPELogAlign(&*CPEBB->begin())); + CPEBB->setAlignment(getCPEAlign(&*CPEBB->begin())); + } BBUtils->adjustBBOffsetsAfter(CPEBB); // An island has only one predecessor BB and one successor BB. Check if @@ -1620,7 +1658,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { // L2: ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm(); CC = ARMCC::getOppositeCondition(CC); - unsigned CCReg = MI->getOperand(2).getReg(); + Register CCReg = MI->getOperand(2).getReg(); // If the branch is at the end of its MBB and that has a fall-through block, // direct the updated conditional branch to the fall-through block. Otherwise, @@ -1778,16 +1816,10 @@ bool ARMConstantIslands::optimizeThumb2Instructions() { return MadeChange; } -bool ARMConstantIslands::optimizeThumb2Branches() { - bool MadeChange = false; - // The order in which branches appear in ImmBranches is approximately their - // order within the function body. By visiting later branches first, we reduce - // the distance between earlier forward branches and their targets, making it - // more likely that the cbn?z optimization, which can only apply to forward - // branches, will succeed. - for (unsigned i = ImmBranches.size(); i != 0; --i) { - ImmBranch &Br = ImmBranches[i-1]; +bool ARMConstantIslands::optimizeThumb2Branches() { + + auto TryShrinkBranch = [this](ImmBranch &Br) { unsigned Opcode = Br.MI->getOpcode(); unsigned NewOpc = 0; unsigned Scale = 1; @@ -1815,47 +1847,115 @@ bool ARMConstantIslands::optimizeThumb2Branches() { BBUtils->adjustBBSize(MBB, -2); BBUtils->adjustBBOffsetsAfter(MBB); ++NumT2BrShrunk; - MadeChange = true; + return true; } } + return false; + }; - Opcode = Br.MI->getOpcode(); - if (Opcode != ARM::tBcc) - continue; + struct ImmCompare { + MachineInstr* MI = nullptr; + unsigned NewOpc = 0; + }; + + auto FindCmpForCBZ = [this](ImmBranch &Br, ImmCompare &ImmCmp, + MachineBasicBlock *DestBB) { + ImmCmp.MI = nullptr; + ImmCmp.NewOpc = 0; // If the conditional branch doesn't kill CPSR, then CPSR can be liveout // so this transformation is not safe. if (!Br.MI->killsRegister(ARM::CPSR)) - continue; + return false; - NewOpc = 0; unsigned PredReg = 0; + unsigned NewOpc = 0; ARMCC::CondCodes Pred = getInstrPredicate(*Br.MI, PredReg); if (Pred == ARMCC::EQ) NewOpc = ARM::tCBZ; else if (Pred == ARMCC::NE) NewOpc = ARM::tCBNZ; - if (!NewOpc) - continue; - MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); + else + return false; + // Check if the distance is within 126. Subtract starting offset by 2 // because the cmp will be eliminated. unsigned BrOffset = BBUtils->getOffsetOf(Br.MI) + 4 - 2; BBInfoVector &BBInfo = BBUtils->getBBInfo(); unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; if (BrOffset >= DestOffset || (DestOffset - BrOffset) > 126) - continue; + return false; // Search backwards to find a tCMPi8 auto *TRI = STI->getRegisterInfo(); MachineInstr *CmpMI = findCMPToFoldIntoCBZ(Br.MI, TRI); if (!CmpMI || CmpMI->getOpcode() != ARM::tCMPi8) + return false; + + ImmCmp.MI = CmpMI; + ImmCmp.NewOpc = NewOpc; + return true; + }; + + auto TryConvertToLE = [this](ImmBranch &Br, ImmCompare &Cmp) { + if (Br.MI->getOpcode() != ARM::t2Bcc || !STI->hasLOB() || + STI->hasMinSize()) + return false; + + MachineBasicBlock *MBB = Br.MI->getParent(); + MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); + if (BBUtils->getOffsetOf(MBB) < BBUtils->getOffsetOf(DestBB) || + !BBUtils->isBBInRange(Br.MI, DestBB, 4094)) + return false; + + if (!DT->dominates(DestBB, MBB)) + return false; + + // We queried for the CBN?Z opcode based upon the 'ExitBB', the opposite + // target of Br. So now we need to reverse the condition. + Cmp.NewOpc = Cmp.NewOpc == ARM::tCBZ ? ARM::tCBNZ : ARM::tCBZ; + + MachineInstrBuilder MIB = BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), + TII->get(ARM::t2LE)); + MIB.add(Br.MI->getOperand(0)); + Br.MI->eraseFromParent(); + Br.MI = MIB; + ++NumLEInserted; + return true; + }; + + bool MadeChange = false; + + // The order in which branches appear in ImmBranches is approximately their + // order within the function body. By visiting later branches first, we reduce + // the distance between earlier forward branches and their targets, making it + // more likely that the cbn?z optimization, which can only apply to forward + // branches, will succeed. + for (ImmBranch &Br : reverse(ImmBranches)) { + MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); + MachineBasicBlock *MBB = Br.MI->getParent(); + MachineBasicBlock *ExitBB = &MBB->back() == Br.MI ? + MBB->getFallThrough() : + MBB->back().getOperand(0).getMBB(); + + ImmCompare Cmp; + if (FindCmpForCBZ(Br, Cmp, ExitBB) && TryConvertToLE(Br, Cmp)) { + DestBB = ExitBB; + MadeChange = true; + } else { + FindCmpForCBZ(Br, Cmp, DestBB); + MadeChange |= TryShrinkBranch(Br); + } + + unsigned Opcode = Br.MI->getOpcode(); + if ((Opcode != ARM::tBcc && Opcode != ARM::t2LE) || !Cmp.NewOpc) continue; - unsigned Reg = CmpMI->getOperand(0).getReg(); + Register Reg = Cmp.MI->getOperand(0).getReg(); // Check for Kill flags on Reg. If they are present remove them and set kill // on the new CBZ. + auto *TRI = STI->getRegisterInfo(); MachineBasicBlock::iterator KillMI = Br.MI; bool RegKilled = false; do { @@ -1865,19 +1965,32 @@ bool ARMConstantIslands::optimizeThumb2Branches() { RegKilled = true; break; } - } while (KillMI != CmpMI); + } while (KillMI != Cmp.MI); // Create the new CBZ/CBNZ - MachineBasicBlock *MBB = Br.MI->getParent(); - LLVM_DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI); + LLVM_DEBUG(dbgs() << "Fold: " << *Cmp.MI << " and: " << *Br.MI); MachineInstr *NewBR = - BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(NewOpc)) + BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(Cmp.NewOpc)) .addReg(Reg, getKillRegState(RegKilled)) .addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags()); - CmpMI->eraseFromParent(); - Br.MI->eraseFromParent(); - Br.MI = NewBR; + + Cmp.MI->eraseFromParent(); + BBInfoVector &BBInfo = BBUtils->getBBInfo(); BBInfo[MBB->getNumber()].Size -= 2; + + if (Br.MI->getOpcode() == ARM::tBcc) { + Br.MI->eraseFromParent(); + Br.MI = NewBR; + } else if (&MBB->back() != Br.MI) { + // We've generated an LE and already erased the original conditional + // branch. The CBN?Z is now used to branch to the other successor, so an + // unconditional branch terminator is now redundant. + MachineInstr *LastMI = &MBB->back(); + if (LastMI != Br.MI) { + BBInfo[MBB->getNumber()].Size -= LastMI->getDesc().getSize(); + LastMI->eraseFromParent(); + } + } BBUtils->adjustBBOffsetsAfter(MBB); ++NumCBZ; MadeChange = true; @@ -1931,8 +2044,8 @@ bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI, // of BaseReg, but only if the t2ADDrs can be removed. // + Some instruction other than t2ADDrs computing the entry. Not seen in // the wild, but we should be careful. - unsigned EntryReg = JumpMI->getOperand(0).getReg(); - unsigned BaseReg = LEAMI->getOperand(0).getReg(); + Register EntryReg = JumpMI->getOperand(0).getReg(); + Register BaseReg = LEAMI->getOperand(0).getReg(); CanDeleteLEA = true; BaseRegKill = false; @@ -2009,7 +2122,7 @@ static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI, // but the JT now uses PC. Finds the last ADD (if any) that def's EntryReg // and is not clobbered / used. MachineInstr *RemovableAdd = nullptr; - unsigned EntryReg = JumpMI->getOperand(0).getReg(); + Register EntryReg = JumpMI->getOperand(0).getReg(); // Find the last ADD to set EntryReg MachineBasicBlock::iterator I(LEAMI); @@ -2106,7 +2219,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { // %idx = tLSLri %idx, 2 // %base = tLEApcrelJT // %t = tLDRr %base, %idx - unsigned BaseReg = User.MI->getOperand(0).getReg(); + Register BaseReg = User.MI->getOperand(0).getReg(); if (User.MI->getIterator() == User.MI->getParent()->begin()) continue; @@ -2116,7 +2229,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { !Shift->getOperand(2).isKill()) continue; IdxReg = Shift->getOperand(2).getReg(); - unsigned ShiftedIdxReg = Shift->getOperand(0).getReg(); + Register ShiftedIdxReg = Shift->getOperand(0).getReg(); // It's important that IdxReg is live until the actual TBB/TBH. Most of // the range is checked later, but the LEA might still clobber it and not @@ -2313,6 +2426,10 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { MachineFunction::iterator MBBI = ++JTBB->getIterator(); MF->insert(MBBI, NewBB); + // Copy live-in information to new block. + for (const MachineBasicBlock::RegisterMaskPair &RegMaskPair : BB->liveins()) + NewBB->addLiveIn(RegMaskPair); + // Add an unconditional branch from NewBB to BB. // There doesn't seem to be meaningful DebugInfo available; this doesn't // correspond directly to anything in the source. diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index 3bdb0e1ef62..72c95f44126 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Type.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index b32ba3eeea1..563fdda5610 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -481,7 +481,7 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { unsigned OpIdx = 0; bool DstIsDead = MI.getOperand(OpIdx).isDead(); - unsigned DstReg = MI.getOperand(OpIdx++).getReg(); + Register DstReg = MI.getOperand(OpIdx++).getReg(); if(TableEntry->RealOpc == ARM::VLD2DUPd8x2 || TableEntry->RealOpc == ARM::VLD2DUPd16x2 || TableEntry->RealOpc == ARM::VLD2DUPd32x2) { @@ -492,7 +492,7 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { assert(RegSpc == OddDblSpc && "Unexpected spacing!"); SubRegIndex = ARM::dsub_1; } - unsigned SubReg = TRI->getSubReg(DstReg, SubRegIndex); + Register SubReg = TRI->getSubReg(DstReg, SubRegIndex); unsigned DstRegPair = TRI->getMatchingSuperReg(SubReg, ARM::dsub_0, &ARM::DPairSpcRegClass); MIB.addReg(DstRegPair, RegState::Define | getDeadRegState(DstIsDead)); @@ -624,7 +624,7 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { bool SrcIsKill = MI.getOperand(OpIdx).isKill(); bool SrcIsUndef = MI.getOperand(OpIdx).isUndef(); - unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); + Register SrcReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3); MIB.addReg(D0, getUndefRegState(SrcIsUndef)); @@ -760,7 +760,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, } bool SrcIsKill = MI.getOperand(OpIdx).isKill(); - unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); + Register SrcReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3); MIB.addReg(D0); @@ -789,6 +789,7 @@ static bool IsAnAddressOperand(const MachineOperand &MO) { case MachineOperand::MO_Immediate: case MachineOperand::MO_CImmediate: case MachineOperand::MO_FPImmediate: + case MachineOperand::MO_ShuffleMask: return false; case MachineOperand::MO_MachineBasicBlock: return true; @@ -828,7 +829,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, unsigned Opcode = MI.getOpcode(); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1); @@ -932,13 +933,13 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); const MachineOperand &Dest = MI.getOperand(0); - unsigned TempReg = MI.getOperand(1).getReg(); + Register TempReg = MI.getOperand(1).getReg(); // Duplicating undef operands into 2 instructions does not guarantee the same // value on both; However undef should be replaced by xzr anyway. assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); - unsigned AddrReg = MI.getOperand(2).getReg(); - unsigned DesiredReg = MI.getOperand(3).getReg(); - unsigned NewReg = MI.getOperand(4).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register DesiredReg = MI.getOperand(3).getReg(); + Register NewReg = MI.getOperand(4).getReg(); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -1035,8 +1036,8 @@ static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg, unsigned Flags, bool IsThumb, const TargetRegisterInfo *TRI) { if (IsThumb) { - unsigned RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0); - unsigned RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1); + Register RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0); + Register RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1); MIB.addReg(RegLo, Flags); MIB.addReg(RegHi, Flags); } else @@ -1051,19 +1052,19 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); MachineOperand &Dest = MI.getOperand(0); - unsigned TempReg = MI.getOperand(1).getReg(); + Register TempReg = MI.getOperand(1).getReg(); // Duplicating undef operands into 2 instructions does not guarantee the same // value on both; However undef should be replaced by xzr anyway. assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); - unsigned AddrReg = MI.getOperand(2).getReg(); - unsigned DesiredReg = MI.getOperand(3).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register DesiredReg = MI.getOperand(3).getReg(); MachineOperand New = MI.getOperand(4); New.setIsKill(false); - unsigned DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0); - unsigned DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1); - unsigned DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0); - unsigned DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1); + Register DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0); + Register DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1); + Register DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0); + Register DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -1204,8 +1205,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) NewMI->addOperand(MBBI->getOperand(i)); - // Delete the pseudo instruction TCRETURN. + + // Update call site info and delete the pseudo instruction TCRETURN. + MBB.getParent()->moveCallSiteInfo(&MI, &*NewMI); MBB.erase(MBBI); + MBBI = NewMI; return true; } @@ -1336,7 +1340,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // for us. Otherwise, expand to nothing. if (RI.hasBasePointer(MF)) { int32_t NumBytes = AFI->getFramePtrSpillOffset(); - unsigned FramePtr = RI.getFrameRegister(MF); + Register FramePtr = RI.getFrameRegister(MF); assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) && "base pointer without frame pointer?"); @@ -1412,7 +1416,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MachineConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(MF->getFunction().getContext(), "__aeabi_read_tp", PCLabelID, 0); - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg) .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4)); @@ -1435,6 +1439,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MIB.cloneMemRefs(MI); TransferImpOps(MI, MIB, MIB); + MI.getMF()->moveCallSiteInfo(&MI, &*MIB); MI.eraseFromParent(); return true; } @@ -1442,7 +1447,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::t2LDRpci_pic: { unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) ? ARM::tLDRpci : ARM::t2LDRpci; - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg) @@ -1464,7 +1469,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::LDRLIT_ga_pcrel_ldr: case ARM::tLDRLIT_ga_abs: case ARM::tLDRLIT_ga_pcrel: { - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); const MachineOperand &MO1 = MI.getOperand(1); auto Flags = MO1.getTargetFlags(); @@ -1522,7 +1527,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::t2MOV_ga_pcrel: { // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode. unsigned LabelId = AFI->createPICLabelUId(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); const MachineOperand &MO1 = MI.getOperand(1); const GlobalValue *GV = MO1.getGlobal(); @@ -1586,7 +1591,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // Grab the Q register destination. bool DstIsDead = MI.getOperand(OpIdx).isDead(); - unsigned DstReg = MI.getOperand(OpIdx++).getReg(); + Register DstReg = MI.getOperand(OpIdx++).getReg(); // Copy the source register. MIB.add(MI.getOperand(OpIdx++)); @@ -1596,8 +1601,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MIB.add(MI.getOperand(OpIdx++)); // Add the destination operands (D subregs). - unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0); - unsigned D1 = TRI->getSubReg(DstReg, ARM::dsub_1); + Register D0 = TRI->getSubReg(DstReg, ARM::dsub_0); + Register D1 = TRI->getSubReg(DstReg, ARM::dsub_1); MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)) .addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); @@ -1617,7 +1622,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // Grab the Q register source. bool SrcIsKill = MI.getOperand(OpIdx).isKill(); - unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); + Register SrcReg = MI.getOperand(OpIdx++).getReg(); // Copy the destination register. MachineOperand Dst(MI.getOperand(OpIdx++)); @@ -1628,8 +1633,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MIB.add(MI.getOperand(OpIdx++)); // Add the source operands (D subregs). - unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); - unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); + Register D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); + Register D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); MIB.addReg(D0, SrcIsKill ? RegState::Kill : 0) .addReg(D1, SrcIsKill ? RegState::Kill : 0); @@ -1915,6 +1920,37 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::CMP_SWAP_64: return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI); + + case ARM::tBL_PUSHLR: + case ARM::BL_PUSHLR: { + const bool Thumb = Opcode == ARM::tBL_PUSHLR; + Register Reg = MI.getOperand(0).getReg(); + assert(Reg == ARM::LR && "expect LR register!"); + MachineInstrBuilder MIB; + if (Thumb) { + // push {lr} + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPUSH)) + .add(predOps(ARMCC::AL)) + .addReg(Reg); + + // bl __gnu_mcount_nc + MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tBL)); + } else { + // stmdb sp!, {lr} + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::STMDB_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(Reg); + + // bl __gnu_mcount_nc + MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BL)); + } + MIB.cloneMemRefs(MI); + for (unsigned i = 1; i < MI.getNumOperands(); ++i) MIB.add(MI.getOperand(i)); + MI.eraseFromParent(); + return true; + } } } diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 6e274d269bf..1fc5ff6921c 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -191,8 +191,8 @@ class ARMFastISel final : public FastISel { bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, - bool isZExt, bool isEquality); - bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, + bool isZExt); + bool ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr, unsigned Alignment = 0, bool isZExt = true, bool allocReg = true); bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, @@ -219,15 +219,15 @@ class ARMFastISel final : public FastISel { bool Return, bool isVarArg); bool ProcessCallArgs(SmallVectorImpl &Args, - SmallVectorImpl &ArgRegs, + SmallVectorImpl &ArgRegs, SmallVectorImpl &ArgVTs, SmallVectorImpl &ArgFlags, - SmallVectorImpl &RegArgs, + SmallVectorImpl &RegArgs, CallingConv::ID CC, unsigned &NumBytes, bool isVarArg); unsigned getLibcallReg(const Twine &Name); - bool FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, + bool FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, const Instruction *I, CallingConv::ID CC, unsigned &NumBytes, bool isVarArg); bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call); @@ -301,7 +301,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill) { - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); // Make sure the input operand is sufficiently constrained to be legal @@ -913,7 +913,7 @@ void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr, AddOptionalDefs(MIB); } -bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, +bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr, unsigned Alignment, bool isZExt, bool allocReg) { unsigned Opc; bool useAM3 = false; @@ -1045,7 +1045,7 @@ bool ARMFastISel::SelectLoad(const Instruction *I) { Address Addr; if (!ARMComputeAddress(I->getOperand(0), Addr)) return false; - unsigned ResultReg; + Register ResultReg; if (!ARMEmitLoad(VT, ResultReg, Addr, cast(I)->getAlignment())) return false; updateValueMap(I, ResultReg); @@ -1259,8 +1259,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { if (ARMPred == ARMCC::AL) return false; // Emit the compare. - if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), - CI->isEquality())) + if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) return false; unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; @@ -1349,7 +1348,7 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) { } bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, - bool isZExt, bool isEquality) { + bool isZExt) { Type *Ty = Src1Value->getType(); EVT SrcEVT = TLI.getValueType(DL, Ty, true); if (!SrcEVT.isSimple()) return false; @@ -1397,19 +1396,11 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, // TODO: Verify compares. case MVT::f32: isICmp = false; - // Equality comparisons shouldn't raise Invalid on uordered inputs. - if (isEquality) - CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS; - else - CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES; + CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS; break; case MVT::f64: isICmp = false; - // Equality comparisons shouldn't raise Invalid on uordered inputs. - if (isEquality) - CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD; - else - CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED; + CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD; break; case MVT::i1: case MVT::i8: @@ -1485,8 +1476,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { if (ARMPred == ARMCC::AL) return false; // Emit the compare. - if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), - CI->isEquality())) + if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) return false; // Now set a register based on the comparison. Explicitly set the predicates @@ -1893,10 +1883,10 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, } bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, - SmallVectorImpl &ArgRegs, + SmallVectorImpl &ArgRegs, SmallVectorImpl &ArgVTs, SmallVectorImpl &ArgFlags, - SmallVectorImpl &RegArgs, + SmallVectorImpl &RegArgs, CallingConv::ID CC, unsigned &NumBytes, bool isVarArg) { @@ -1960,7 +1950,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; const Value *ArgVal = Args[VA.getValNo()]; - unsigned Arg = ArgRegs[VA.getValNo()]; + Register Arg = ArgRegs[VA.getValNo()]; MVT ArgVT = ArgVTs[VA.getValNo()]; assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) && @@ -2039,7 +2029,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, return true; } -bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, +bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, const Instruction *I, CallingConv::ID CC, unsigned &NumBytes, bool isVarArg) { // Issue CALLSEQ_END @@ -2060,7 +2050,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, // double fp reg we want. MVT DestVT = RVLocs[0].getValVT(); const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); - unsigned ResultReg = createResultReg(DstRC); + Register ResultReg = createResultReg(DstRC); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::VMOVDRR), ResultReg) .addReg(RVLocs[0].getLocReg()) @@ -2081,7 +2071,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); - unsigned ResultReg = createResultReg(DstRC); + Register ResultReg = createResultReg(DstRC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(RVLocs[0].getLocReg()); @@ -2162,7 +2152,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) { } // Make the copy. - unsigned DstReg = VA.getLocReg(); + Register DstReg = VA.getLocReg(); const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); // Avoid a cross-class copy. This is very unlikely. if (!SrcRC->contains(DstReg)) @@ -2231,7 +2221,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { // Set up the argument vectors. SmallVector Args; - SmallVector ArgRegs; + SmallVector ArgRegs; SmallVector ArgVTs; SmallVector ArgFlags; Args.reserve(I->getNumOperands()); @@ -2247,8 +2237,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { if (!isTypeLegal(ArgTy, ArgVT)) return false; ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - Flags.setOrigAlign(OriginalAlignment); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(ArgTy))); Args.push_back(Op); ArgRegs.push_back(Arg); @@ -2257,13 +2246,13 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { } // Handle the arguments now that we've gotten them. - SmallVector RegArgs; + SmallVector RegArgs; unsigned NumBytes; if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes, false)) return false; - unsigned CalleeReg = 0; + Register CalleeReg; if (Subtarget->genLongCalls()) { CalleeReg = getLibcallReg(TLI.getLibcallName(Call)); if (CalleeReg == 0) return false; @@ -2282,7 +2271,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { MIB.addExternalSymbol(TLI.getLibcallName(Call)); // Add implicit physical register uses to the call. - for (unsigned R : RegArgs) + for (Register R : RegArgs) MIB.addReg(R, RegState::Implicit); // Add a register mask with the call-preserved registers. @@ -2290,7 +2279,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); // Finish off the call including any return values. - SmallVector UsedRegs; + SmallVector UsedRegs; if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) return false; // Set all unused physreg defs as dead. @@ -2340,7 +2329,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, // Set up the argument vectors. SmallVector Args; - SmallVector ArgRegs; + SmallVector ArgRegs; SmallVector ArgVTs; SmallVector ArgFlags; unsigned arg_size = CS.arg_size(); @@ -2377,12 +2366,11 @@ bool ARMFastISel::SelectCall(const Instruction *I, ArgVT != MVT::i1) return false; - unsigned Arg = getRegForValue(*i); - if (Arg == 0) + Register Arg = getRegForValue(*i); + if (!Arg.isValid()) return false; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - Flags.setOrigAlign(OriginalAlignment); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(ArgTy))); Args.push_back(*i); ArgRegs.push_back(Arg); @@ -2391,7 +2379,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, } // Handle the arguments now that we've gotten them. - SmallVector RegArgs; + SmallVector RegArgs; unsigned NumBytes; if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes, isVarArg)) @@ -2401,7 +2389,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, const GlobalValue *GV = dyn_cast(Callee); if (!GV || Subtarget->genLongCalls()) UseReg = true; - unsigned CalleeReg = 0; + Register CalleeReg; if (UseReg) { if (IntrMemName) CalleeReg = getLibcallReg(IntrMemName); @@ -2427,7 +2415,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, MIB.addExternalSymbol(IntrMemName, 0); // Add implicit physical register uses to the call. - for (unsigned R : RegArgs) + for (Register R : RegArgs) MIB.addReg(R, RegState::Implicit); // Add a register mask with the call-preserved registers. @@ -2435,7 +2423,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); // Finish off the call including any return values. - SmallVector UsedRegs; + SmallVector UsedRegs; if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg)) return false; @@ -2476,7 +2464,7 @@ bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, } bool RV; - unsigned ResultReg; + Register ResultReg; RV = ARMEmitLoad(VT, ResultReg, Src); assert(RV && "Should be able to handle this load."); RV = ARMEmitStore(VT, ResultReg, Dest); @@ -2506,7 +2494,7 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { const ARMBaseRegisterInfo *RegInfo = static_cast(Subtarget->getRegisterInfo()); - unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); + Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); unsigned SrcReg = FramePtr; // Recursively load frame address @@ -2947,7 +2935,7 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, Address Addr; if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; - unsigned ResultReg = MI->getOperand(0).getReg(); + Register ResultReg = MI->getOperand(0).getReg(); if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false)) return false; MachineBasicBlock::iterator I(MI); @@ -2974,7 +2962,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF), MachineMemOperand::MOLoad, 4, 4); - unsigned TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass); + Register TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass); unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp; MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), TempReg) diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index bedb779bcba..01ae93086dc 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -76,7 +76,7 @@ skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs); ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti) - : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4), + : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)), STI(sti) {} bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const { @@ -376,7 +376,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // to determine the end of the prologue. DebugLoc dl; - unsigned FramePtr = RegInfo->getFrameRegister(MF); + Register FramePtr = RegInfo->getFrameRegister(MF); // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. @@ -780,7 +780,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); int NumBytes = (int)MFI.getStackSize(); - unsigned FramePtr = RegInfo->getFrameRegister(MF); + Register FramePtr = RegInfo->getFrameRegister(MF); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. @@ -1503,11 +1503,17 @@ static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF, /// instructions will require a scratch register during their expansion later. // FIXME: Move to TII? static unsigned estimateRSStackSizeLimit(MachineFunction &MF, - const TargetFrameLowering *TFI) { + const TargetFrameLowering *TFI, + bool &HasNonSPFrameIndex) { const ARMFunctionInfo *AFI = MF.getInfo(); + const ARMBaseInstrInfo &TII = + *static_cast(MF.getSubtarget().getInstrInfo()); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); unsigned Limit = (1 << 12) - 1; for (auto &MBB : MF) { for (auto &MI : MBB) { + if (MI.isDebugInstr()) + continue; for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { if (!MI.getOperand(i).isFI()) continue; @@ -1518,13 +1524,29 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, Limit = std::min(Limit, (1U << 8) - 1); break; } + // t2ADDri will not require an extra register, it can reuse the + // destination. + if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12) + break; + + const MCInstrDesc &MCID = MI.getDesc(); + const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF); + if (RegClass && !RegClass->contains(ARM::SP)) + HasNonSPFrameIndex = true; // Otherwise check the addressing mode. switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) { + case ARMII::AddrMode_i12: + case ARMII::AddrMode2: + // Default 12 bit limit. + break; case ARMII::AddrMode3: case ARMII::AddrModeT2_i8: Limit = std::min(Limit, (1U << 8) - 1); break; + case ARMII::AddrMode5FP16: + Limit = std::min(Limit, ((1U << 8) - 1) * 2); + break; case ARMII::AddrMode5: case ARMII::AddrModeT2_i8s4: case ARMII::AddrModeT2_ldrex: @@ -1541,8 +1563,17 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, // Addressing modes 4 & 6 (load/store) instructions can't encode an // immediate offset for stack references. return 0; - default: + case ARMII::AddrModeT2_i7: + Limit = std::min(Limit, ((1U << 7) - 1) * 1); break; + case ARMII::AddrModeT2_i7s2: + Limit = std::min(Limit, ((1U << 7) - 1) * 2); + break; + case ARMII::AddrModeT2_i7s4: + Limit = std::min(Limit, ((1U << 7) - 1) * 4); + break; + default: + llvm_unreachable("Unhandled addressing mode in stack size limit calculation"); } break; // At most one FI per instruction } @@ -1623,7 +1654,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); (void)TRI; // Silence unused warning in non-assert builds. - unsigned FramePtr = RegInfo->getFrameRegister(MF); + Register FramePtr = RegInfo->getFrameRegister(MF); // Spill R4 if Thumb2 function requires stack realignment - it will be used as // scratch register. Also spill R4 if Thumb2 function has varsized objects, @@ -1784,6 +1815,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, EstimatedStackSize += 16; // For possible paddings. unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit; + bool HasNonSPFrameIndex = false; if (AFI->isThumb1OnlyFunction()) { // For Thumb1, don't bother to iterate over the function. The only // instruction that requires an emergency spill slot is a store to a @@ -1804,7 +1836,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, EstimatedRSStackSizeLimit = (1U << 8) * 4; EstimatedRSFixedSizeLimit = (1U << 5) * 4; } else { - EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this); + EstimatedRSStackSizeLimit = + estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex); EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit; } // Final estimate of whether sp or bp-relative accesses might require @@ -1830,12 +1863,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit; bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP || - HasLargeArgumentList; + HasLargeArgumentList || HasNonSPFrameIndex; LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit - << "; EstimatedStack" << EstimatedStackSize - << "; EstimatedFPStack" << MaxFixedOffset - MaxFPOffset - << "; BigFrameOffsets: " << BigFrameOffsets - << "\n"); + << "; EstimatedStack: " << EstimatedStackSize + << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset + << "; BigFrameOffsets: " << BigFrameOffsets << "\n"); if (BigFrameOffsets || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { AFI->setHasStackFrame(true); @@ -2080,9 +2112,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, ExtraCSSpill = true; } } - if (!ExtraCSSpill) { + if (!ExtraCSSpill && RS) { // Reserve a slot closest to SP or frame pointer. - assert(RS && "Register scavenging not provided"); LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n"); const TargetRegisterClass &RC = ARM::GPRRegClass; unsigned Size = TRI->getSpillSize(RC); @@ -2097,6 +2128,12 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, AFI->setLRIsSpilledForFarJump(true); } AFI->setLRIsSpilled(SavedRegs.test(ARM::LR)); + + // If we have the "returned" parameter attribute which guarantees that we + // return the value which was passed in r0 unmodified (e.g. C++ 'structors), + // record that fact for IPRA. + if (AFI->getPreservesR0()) + SavedRegs.set(ARM::R0); } MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr( diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index 7544ca3c38d..6d8aee59794 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -63,6 +63,11 @@ public: bool enableShrinkWrapping(const MachineFunction &MF) const override { return true; } + bool isProfitableForNoCSROpt(const Function &F) const override { + // The no-CSR optimisation is bad for code size on ARM, because we can save + // many registers with a single PUSH/POP pair. + return false; + } private: void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index b349627b67b..8f6515c423e 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -139,6 +139,8 @@ public: bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); + template + bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); // Thumb 2 Addressing Modes: bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); @@ -146,9 +148,12 @@ public: SDValue &OffImm); bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, SDValue &OffImm); - template - bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, - SDValue &OffImm); + template + bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); + bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, + unsigned Shift); + template + bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm); bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); @@ -179,6 +184,7 @@ private: bool tryARMIndexedLoad(SDNode *N); bool tryT1IndexedLoad(SDNode *N); bool tryT2IndexedLoad(SDNode *N); + bool tryMVEIndexedLoad(SDNode *N); /// SelectVLD - Select NEON load intrinsics. NumVecs should be /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for @@ -246,10 +252,6 @@ private: SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, bool is64BitVector); - /// Returns the number of instructions required to materialize the given - /// constant in a register, or 3 if a literal pool load is needed. - unsigned ConstantMaterializationCost(unsigned Val) const; - /// Checks if N is a multiplication by a constant where we can extract out a /// power of two from the constant so that it can be used in a shift, but only /// if it simplifies the materialization of the constant. Returns true if it @@ -450,27 +452,6 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); } -unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const { - if (Subtarget->isThumb()) { - if (Val <= 255) return 1; // MOV - if (Subtarget->hasV6T2Ops() && - (Val <= 0xffff || // MOV - ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW - ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN - return 1; - if (Val <= 510) return 2; // MOV + ADDi8 - if (~Val <= 255) return 2; // MOV + MVN - if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL - } else { - if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV - if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN - if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW - if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs - } - if (Subtarget->useMovt()) return 2; // MOVW + MOVT - return 3; // Literal pool load -} - bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, unsigned &PowerOfTwo, @@ -500,8 +481,8 @@ bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, // Only optimise if the new cost is better unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); - unsigned OldCost = ConstantMaterializationCost(MulConstVal); - unsigned NewCost = ConstantMaterializationCost(NewMulConstVal); + unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); + unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); return NewCost < OldCost; } @@ -1172,6 +1153,28 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, return false; } +template +bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, + SDValue &OffImm) { + if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { + int RHSC; + if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, + RHSC)) { + Base = N.getOperand(0); + if (N.getOpcode() == ISD::SUB) + RHSC = -RHSC; + OffImm = + CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); + return true; + } + } + + // Base only. + Base = N; + OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); + return true; +} + //===----------------------------------------------------------------------===// // Thumb 2 Addressing Modes @@ -1278,35 +1281,59 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, return false; } -template -bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, - SDValue &Base, SDValue &OffImm) { - if (N.getOpcode() == ISD::SUB || - CurDAG->isBaseWithConstantOffset(N)) { - if (auto RHS = dyn_cast(N.getOperand(1))) { - int RHSC = (int)RHS->getZExtValue(); +template +bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, + SDValue &OffImm) { + if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { + int RHSC; + if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, + RHSC)) { + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); + } + if (N.getOpcode() == ISD::SUB) RHSC = -RHSC; - - if (isShiftedInt<7, Shift>(RHSC)) { - Base = N.getOperand(0); - if (Base.getOpcode() == ISD::FrameIndex) { - int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex( - FI, TLI->getPointerTy(CurDAG->getDataLayout())); - } - OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); - return true; - } + OffImm = + CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); + return true; } } // Base only. Base = N; - OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); + OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); return true; } +template +bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, + SDValue &OffImm) { + return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); +} + +bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, + SDValue &OffImm, + unsigned Shift) { + unsigned Opcode = Op->getOpcode(); + ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) + ? cast(Op)->getAddressingMode() + : cast(Op)->getAddressingMode(); + int RHSC; + if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits. + OffImm = + ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) + ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) + : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N), + MVT::i32); + return true; + } + return false; +} + bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm) { @@ -1565,6 +1592,68 @@ bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { return false; } +bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { + LoadSDNode *LD = cast(N); + ISD::MemIndexedMode AM = LD->getAddressingMode(); + if (AM == ISD::UNINDEXED) + return false; + EVT LoadedVT = LD->getMemoryVT(); + if (!LoadedVT.isVector()) + return false; + bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; + SDValue Offset; + bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); + unsigned Opcode = 0; + unsigned Align = LD->getAlignment(); + bool IsLE = Subtarget->isLittle(); + + if (Align >= 2 && LoadedVT == MVT::v4i16 && + SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) { + if (isSExtLd) + Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; + else + Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; + } else if (LoadedVT == MVT::v8i8 && + SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) { + if (isSExtLd) + Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; + else + Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; + } else if (LoadedVT == MVT::v4i8 && + SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) { + if (isSExtLd) + Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; + else + Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; + } else if (Align >= 4 && + (IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) && + SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2)) + Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; + else if (Align >= 2 && + (IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) && + SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) + Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; + else if ((IsLE || LoadedVT == MVT::v16i8) && + SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) + Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; + else + return false; + + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Ops[] = {Base, Offset, + CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32), + CurDAG->getRegister(0, MVT::i32), Chain}; + SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0), + MVT::i32, MVT::Other, Ops); + transferMemOperands(N, New); + ReplaceUses(SDValue(N, 0), SDValue(New, 1)); + ReplaceUses(SDValue(N, 1), SDValue(New, 0)); + ReplaceUses(SDValue(N, 2), SDValue(New, 2)); + CurDAG->RemoveDeadNode(N); + return true; +} + /// Form a GPRPair pseudo register from a pair of GPR regs. SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { SDLoc dl(V0.getNode()); @@ -2701,7 +2790,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { case ISD::Constant: { unsigned Val = cast(N)->getZExtValue(); // If we can't materialize the constant we need to use a literal pool - if (ConstantMaterializationCost(Val) > 2) { + if (ConstantMaterializationCost(Val, Subtarget) > 2) { SDValue CPIdx = CurDAG->getTargetConstantPool( ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), TLI->getPointerTy(CurDAG->getDataLayout())); @@ -2842,8 +2931,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) { bool PreferImmediateEncoding = Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); if (!PreferImmediateEncoding && - ConstantMaterializationCost(Imm) > - ConstantMaterializationCost(~Imm)) { + ConstantMaterializationCost(Imm, Subtarget) > + ConstantMaterializationCost(~Imm, Subtarget)) { // The current immediate costs more to materialize than a negated // immediate, so negate the immediate and use a BIC. SDValue NewImm = @@ -2987,6 +3076,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; } case ISD::LOAD: { + if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) + return; if (Subtarget->isThumb() && Subtarget->hasThumb2()) { if (tryT2IndexedLoad(N)) return; @@ -2998,13 +3089,26 @@ void ARMDAGToDAGISel::Select(SDNode *N) { // Other cases are autogenerated. break; } - case ARMISD::WLS: { - SDValue Ops[] = { N->getOperand(1), // Loop count - N->getOperand(2), // Exit target + case ARMISD::WLS: + case ARMISD::LE: { + SDValue Ops[] = { N->getOperand(1), + N->getOperand(2), N->getOperand(0) }; - SDNode *LoopStart = - CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, Ops); - ReplaceUses(N, LoopStart); + unsigned Opc = N->getOpcode() == ARMISD::WLS ? + ARM::t2WhileLoopStart : ARM::t2LoopEnd; + SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); + ReplaceUses(N, New); + CurDAG->RemoveDeadNode(N); + return; + } + case ARMISD::LOOP_DEC: { + SDValue Ops[] = { N->getOperand(1), + N->getOperand(2), + N->getOperand(0) }; + SDNode *Dec = + CurDAG->getMachineNode(ARM::t2LoopDec, dl, + CurDAG->getVTList(MVT::i32, MVT::Other), Ops); + ReplaceUses(N, Dec); CurDAG->RemoveDeadNode(N); return; } @@ -4365,7 +4469,7 @@ bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to // the original GPRs. - unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); SDValue Chain = SDValue(N,0); @@ -4401,7 +4505,7 @@ bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ // Copy REG_SEQ into a GPRPair-typed VR and replace the original two // i32 VRs of inline asm with it. - unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 18bb9bf3ecc..db26feb5701 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -245,7 +245,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 }; for (auto VT : IntTypes) { - addRegisterClass(VT, &ARM::QPRRegClass); + addRegisterClass(VT, &ARM::MQPRRegClass); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); @@ -258,12 +258,31 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::UMIN, VT, Legal); setOperationAction(ISD::UMAX, VT, Legal); setOperationAction(ISD::ABS, VT, Legal); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::MLOAD, VT, Custom); + setOperationAction(ISD::MSTORE, VT, Legal); + setOperationAction(ISD::CTLZ, VT, Legal); + setOperationAction(ISD::CTTZ, VT, Custom); + setOperationAction(ISD::BITREVERSE, VT, Legal); + setOperationAction(ISD::BSWAP, VT, Legal); + setOperationAction(ISD::SADDSAT, VT, Legal); + setOperationAction(ISD::UADDSAT, VT, Legal); + setOperationAction(ISD::SSUBSAT, VT, Legal); + setOperationAction(ISD::USUBSAT, VT, Legal); // No native support for these. setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::CTPOP, VT, Expand); + + // Vector reductions + setOperationAction(ISD::VECREDUCE_ADD, VT, Legal); + setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal); + setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal); + setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal); + setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal); if (!HasMVEFP) { setOperationAction(ISD::SINT_TO_FP, VT, Expand); @@ -271,11 +290,18 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::FP_TO_SINT, VT, Expand); setOperationAction(ISD::FP_TO_UINT, VT, Expand); } + + // Pre and Post inc are supported on loads and stores + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, VT, Legal); + setIndexedStoreAction(im, VT, Legal); + } } const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 }; for (auto VT : FloatTypes) { - addRegisterClass(VT, &ARM::QPRRegClass); + addRegisterClass(VT, &ARM::MQPRRegClass); if (!HasMVEFP) setAllExpand(VT); @@ -287,6 +313,16 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::MLOAD, VT, Custom); + setOperationAction(ISD::MSTORE, VT, Legal); + + // Pre and Post inc are supported on loads and stores + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, VT, Legal); + setIndexedStoreAction(im, VT, Legal); + } if (HasMVEFP) { setOperationAction(ISD::FMINNUM, VT, Legal); @@ -314,7 +350,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { // vector types is inhibited at integer-only level. const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 }; for (auto VT : LongTypes) { - addRegisterClass(VT, &ARM::QPRRegClass); + addRegisterClass(VT, &ARM::MQPRRegClass); setAllExpand(VT); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); @@ -334,6 +370,33 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); + + // Pre and Post inc on these are legal, given the correct extends + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, MVT::v8i8, Legal); + setIndexedStoreAction(im, MVT::v8i8, Legal); + setIndexedLoadAction(im, MVT::v4i8, Legal); + setIndexedStoreAction(im, MVT::v4i8, Legal); + setIndexedLoadAction(im, MVT::v4i16, Legal); + setIndexedStoreAction(im, MVT::v4i16, Legal); + } + + // Predicate types + const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1}; + for (auto VT : pTypes) { + addRegisterClass(VT, &ARM::VCCRRegClass); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + } } ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, @@ -645,8 +708,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); } - for (MVT VT : MVT::vector_valuetypes()) { - for (MVT InnerVT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { + for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); addAllExtLoads(VT, InnerVT, Expand); } @@ -669,8 +732,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, addMVEVectorTypes(Subtarget->hasMVEFloatOps()); // Combine low-overhead loop intrinsics so that we can lower i1 types. - if (Subtarget->hasLOB()) + if (Subtarget->hasLOB()) { setTargetDAGCombine(ISD::BRCOND); + setTargetDAGCombine(ISD::BR_CC); + } if (Subtarget->hasNEON()) { addDRTypeForNEON(MVT::v2f32); @@ -837,10 +902,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::SRA); - setTargetDAGCombine(ISD::SIGN_EXTEND); - setTargetDAGCombine(ISD::ZERO_EXTEND); - setTargetDAGCombine(ISD::ANY_EXTEND); - setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::FP_TO_SINT); setTargetDAGCombine(ISD::FP_TO_UINT); setTargetDAGCombine(ISD::FDIV); @@ -849,7 +910,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // It is legal to extload from v4i8 to v4i16 or v4i32. for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16, MVT::v2i32}) { - for (MVT VT : MVT::integer_vector_valuetypes()) { + for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal); setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal); setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal); @@ -861,6 +922,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::BUILD_VECTOR); setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); + setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine(ISD::ANY_EXTEND); } if (!Subtarget->hasFP64()) { @@ -901,9 +966,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); } - if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()){ + if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) { setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); - setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); + if (Subtarget->hasFullFP16()) + setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); } if (!Subtarget->hasFP16()) @@ -955,6 +1021,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ADDCARRY, MVT::i32, Custom); setOperationAction(ISD::SUBCARRY, MVT::i32, Custom); + if (Subtarget->hasDSP()) { + setOperationAction(ISD::SADDSAT, MVT::i8, Custom); + setOperationAction(ISD::SSUBSAT, MVT::i8, Custom); + setOperationAction(ISD::SADDSAT, MVT::i16, Custom); + setOperationAction(ISD::SSUBSAT, MVT::i16, Custom); + } + if (Subtarget->hasBaseDSP()) { + setOperationAction(ISD::SADDSAT, MVT::i32, Legal); + setOperationAction(ISD::SSUBSAT, MVT::i32, Legal); + } // i64 operation support. setOperationAction(ISD::MUL, MVT::i64, Expand); @@ -972,6 +1048,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL, MVT::i64, Custom); setOperationAction(ISD::SRA, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); // MVE lowers 64 bit shifts to lsll and lsrl @@ -991,7 +1068,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // ARM does not have ROTL. setOperationAction(ISD::ROTL, MVT::i32, Expand); - for (MVT VT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); } @@ -1365,14 +1442,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // On ARM arguments smaller than 4 bytes are extended, so all arguments // are at least 4 bytes aligned. - setMinStackArgumentAlignment(4); + setMinStackArgumentAlignment(Align(4)); // Prefer likely predicted branches to selects on out-of-order cores. PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder(); - setPrefLoopAlignment(Subtarget->getPrefLoopAlignment()); + setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment())); - setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); + setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4)); if (Subtarget->isThumb() || Subtarget->isThumb2()) setTargetDAGCombine(ISD::ABS); @@ -1472,6 +1549,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::ADDE: return "ARMISD::ADDE"; case ARMISD::SUBC: return "ARMISD::SUBC"; case ARMISD::SUBE: return "ARMISD::SUBE"; + case ARMISD::LSLS: return "ARMISD::LSLS"; case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; @@ -1496,16 +1574,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK"; case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK"; - case ARMISD::VCEQ: return "ARMISD::VCEQ"; - case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; - case ARMISD::VCGE: return "ARMISD::VCGE"; - case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; - case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; - case ARMISD::VCGEU: return "ARMISD::VCGEU"; - case ARMISD::VCGT: return "ARMISD::VCGT"; - case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; - case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; - case ARMISD::VCGTU: return "ARMISD::VCGTU"; + case ARMISD::PREDICATE_CAST: return "ARMISD::PREDICATE_CAST"; + case ARMISD::VCMP: return "ARMISD::VCMP"; + case ARMISD::VCMPZ: return "ARMISD::VCMPZ"; case ARMISD::VTST: return "ARMISD::VTST"; case ARMISD::VSHLs: return "ARMISD::VSHLs"; @@ -1543,6 +1614,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VTRN: return "ARMISD::VTRN"; case ARMISD::VTBL1: return "ARMISD::VTBL1"; case ARMISD::VTBL2: return "ARMISD::VTBL2"; + case ARMISD::VMOVN: return "ARMISD::VMOVN"; case ARMISD::VMULLs: return "ARMISD::VMULLs"; case ARMISD::VMULLu: return "ARMISD::VMULLu"; case ARMISD::UMAAL: return "ARMISD::UMAAL"; @@ -1560,6 +1632,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX"; case ARMISD::SMMLAR: return "ARMISD::SMMLAR"; case ARMISD::SMMLSR: return "ARMISD::SMMLSR"; + case ARMISD::QADD16b: return "ARMISD::QADD16b"; + case ARMISD::QSUB16b: return "ARMISD::QSUB16b"; + case ARMISD::QADD8b: return "ARMISD::QADD8b"; + case ARMISD::QSUB8b: return "ARMISD::QSUB8b"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; @@ -1589,6 +1665,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; case ARMISD::WLS: return "ARMISD::WLS"; + case ARMISD::LE: return "ARMISD::LE"; + case ARMISD::LOOP_DEC: return "ARMISD::LOOP_DEC"; + case ARMISD::CSINV: return "ARMISD::CSINV"; + case ARMISD::CSNEG: return "ARMISD::CSNEG"; + case ARMISD::CSINC: return "ARMISD::CSINC"; } return nullptr; } @@ -1597,6 +1678,11 @@ EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const { if (!VT.isVector()) return getPointerTy(DL); + + // MVE has a predicate register. + if (Subtarget->hasMVEIntegerOps() && + (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8)) + return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); return VT.changeVectorElementTypeToInteger(); } @@ -1726,34 +1812,22 @@ static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, - ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) { + ARMCC::CondCodes &CondCode2) { CondCode2 = ARMCC::AL; - InvalidOnQNaN = true; switch (CC) { default: llvm_unreachable("Unknown FP condition!"); case ISD::SETEQ: - case ISD::SETOEQ: - CondCode = ARMCC::EQ; - InvalidOnQNaN = false; - break; + case ISD::SETOEQ: CondCode = ARMCC::EQ; break; case ISD::SETGT: case ISD::SETOGT: CondCode = ARMCC::GT; break; case ISD::SETGE: case ISD::SETOGE: CondCode = ARMCC::GE; break; case ISD::SETOLT: CondCode = ARMCC::MI; break; case ISD::SETOLE: CondCode = ARMCC::LS; break; - case ISD::SETONE: - CondCode = ARMCC::MI; - CondCode2 = ARMCC::GT; - InvalidOnQNaN = false; - break; + case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; case ISD::SETO: CondCode = ARMCC::VC; break; case ISD::SETUO: CondCode = ARMCC::VS; break; - case ISD::SETUEQ: - CondCode = ARMCC::EQ; - CondCode2 = ARMCC::VS; - InvalidOnQNaN = false; - break; + case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; case ISD::SETUGT: CondCode = ARMCC::HI; break; case ISD::SETUGE: CondCode = ARMCC::PL; break; case ISD::SETLT: @@ -1761,10 +1835,7 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, case ISD::SETLE: case ISD::SETULE: CondCode = ARMCC::LE; break; case ISD::SETNE: - case ISD::SETUNE: - CondCode = ARMCC::NE; - InvalidOnQNaN = false; - break; + case ISD::SETUNE: CondCode = ARMCC::NE; break; } } @@ -1988,6 +2059,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); + MachineFunction::CallSiteInfo CSInfo; bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); bool isThisReturn = false; auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls"); @@ -2112,6 +2184,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, "unexpected use of 'returned'"); isThisReturn = true; } + const TargetOptions &Options = DAG.getTarget().Options; + if (Options.EnableDebugEntryValues) + CSInfo.emplace_back(VA.getLocReg(), i); RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else if (isByVal) { assert(VA.isMemLoc()); @@ -2347,12 +2422,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (isTailCall) { MF.getFrameInfo().setHasTailCall(); - return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); + SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); + DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); + return Ret; } // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); + DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), DAG.getIntPtrConstant(0, dl, true), InFlag, dl); @@ -2431,7 +2509,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, int FI = std::numeric_limits::max(); if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast(Arg.getOperand(1))->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(VR)) + if (!Register::isVirtualRegister(VR)) return false; MachineInstr *Def = MRI->getVRegDef(VR); if (!Def) @@ -3047,12 +3125,12 @@ ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op, // Load the current TEB (thread environment block) SDValue Ops[] = {Chain, - DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32), - DAG.getConstant(15, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32), - DAG.getConstant(13, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32), - DAG.getConstant(2, DL, MVT::i32)}; + DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32), + DAG.getTargetConstant(15, DL, MVT::i32), + DAG.getTargetConstant(0, DL, MVT::i32), + DAG.getTargetConstant(13, DL, MVT::i32), + DAG.getTargetConstant(0, DL, MVT::i32), + DAG.getTargetConstant(2, DL, MVT::i32)}; SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList(MVT::i32, MVT::Other), Ops); @@ -3498,6 +3576,48 @@ SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, Op.getOperand(0)); } +SDValue ARMTargetLowering::LowerINTRINSIC_VOID( + SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { + unsigned IntNo = + cast( + Op.getOperand(Op.getOperand(0).getValueType() == MVT::Other)) + ->getZExtValue(); + switch (IntNo) { + default: + return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::arm_gnu_eabi_mcount: { + MachineFunction &MF = DAG.getMachineFunction(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDLoc dl(Op); + SDValue Chain = Op.getOperand(0); + // call "\01__gnu_mcount_nc" + const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); + const uint32_t *Mask = + ARI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); + assert(Mask && "Missing call preserved mask for calling convention"); + // Mark LR an implicit live-in. + unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); + SDValue ReturnAddress = + DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT); + std::vector ResultTys = {MVT::Other, MVT::Glue}; + SDValue Callee = + DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0); + SDValue RegisterMask = DAG.getRegisterMask(Mask); + if (Subtarget->isThumb()) + return SDValue( + DAG.getMachineNode( + ARM::tBL_PUSHLR, dl, ResultTys, + {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT), + DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}), + 0); + return SDValue( + DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys, + {ReturnAddress, Callee, RegisterMask, Chain}), + 0); + } + } +} + SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { @@ -3898,6 +4018,12 @@ SDValue ARMTargetLowering::LowerFormalArguments( // Transform the arguments in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); + + // If this value is passed in r0 and has the returned attribute (e.g. + // C++ 'structors), record this fact for later use. + if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) { + AFI->setPreservesR0(); + } } // If this is an 8 or 16-bit value, it is really passed promoted @@ -4049,6 +4175,67 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, std::swap(LHS, RHS); } + // Thumb1 has very limited immediate modes, so turning an "and" into a + // shift can save multiple instructions. + // + // If we have (x & C1), and C1 is an appropriate mask, we can transform it + // into "((x << n) >> n)". But that isn't necessarily profitable on its + // own. If it's the operand to an unsigned comparison with an immediate, + // we can eliminate one of the shifts: we transform + // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)". + // + // We avoid transforming cases which aren't profitable due to encoding + // details: + // + // 1. C2 fits into the immediate field of a cmp, and the transformed version + // would not; in that case, we're essentially trading one immediate load for + // another. + // 2. C1 is 255 or 65535, so we can use uxtb or uxth. + // 3. C2 is zero; we have other code for this special case. + // + // FIXME: Figure out profitability for Thumb2; we usually can't save an + // instruction, since the AND is always one instruction anyway, but we could + // use narrow instructions in some cases. + if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND && + LHS->hasOneUse() && isa(LHS.getOperand(1)) && + LHS.getValueType() == MVT::i32 && isa(RHS) && + !isSignedIntSetCC(CC)) { + unsigned Mask = cast(LHS.getOperand(1))->getZExtValue(); + auto *RHSC = cast(RHS.getNode()); + uint64_t RHSV = RHSC->getZExtValue(); + if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) { + unsigned ShiftBits = countLeadingZeros(Mask); + if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) { + SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32); + LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt); + RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32); + } + } + } + + // The specific comparison "(x< 0x80000000U" can be optimized to a + // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same + // way a cmp would. + // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and + // some tweaks to the heuristics for the previous and->shift transform. + // FIXME: Optimize cases where the LHS isn't a shift. + if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL && + isa(RHS) && + cast(RHS)->getZExtValue() == 0x80000000U && + CC == ISD::SETUGT && isa(LHS.getOperand(1)) && + cast(LHS.getOperand(1))->getZExtValue() < 31) { + unsigned ShiftAmt = + cast(LHS.getOperand(1))->getZExtValue() + 1; + SDValue Shift = DAG.getNode(ARMISD::LSLS, dl, + DAG.getVTList(MVT::i32, MVT::i32), + LHS.getOperand(0), + DAG.getConstant(ShiftAmt, dl, MVT::i32)); + SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR, + Shift.getValue(1), SDValue()); + ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32); + return Chain.getValue(1); + } + ARMCC::CondCodes CondCode = IntCCToARMCC(CC); // If the RHS is a constant zero then the V (overflow) flag will never be @@ -4083,15 +4270,13 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, - SelectionDAG &DAG, const SDLoc &dl, - bool InvalidOnQNaN) const { + SelectionDAG &DAG, const SDLoc &dl) const { assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64); SDValue Cmp; - SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32); if (!isFloatingPointZero(RHS)) - Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C); + Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); else - Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C); + Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); } @@ -4108,12 +4293,10 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { Cmp = Cmp.getOperand(0); Opc = Cmp.getOpcode(); if (Opc == ARMISD::CMPFP) - Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0), - Cmp.getOperand(1), Cmp.getOperand(2)); + Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); else { assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); - Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0), - Cmp.getOperand(1)); + Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); } return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); } @@ -4276,6 +4459,35 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op, return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); } +static SDValue LowerSADDSUBSAT(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + EVT VT = Op.getValueType(); + if (!Subtarget->hasDSP()) + return SDValue(); + if (!VT.isSimple()) + return SDValue(); + + unsigned NewOpcode; + bool IsAdd = Op->getOpcode() == ISD::SADDSAT; + switch (VT.getSimpleVT().SimpleTy) { + default: + return SDValue(); + case MVT::i8: + NewOpcode = IsAdd ? ARMISD::QADD8b : ARMISD::QSUB8b; + break; + case MVT::i16: + NewOpcode = IsAdd ? ARMISD::QADD16b : ARMISD::QSUB16b; + break; + } + + SDLoc dl(Op); + SDValue Add = + DAG.getNode(NewOpcode, dl, MVT::i32, + DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32), + DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Add); +} + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); @@ -4656,10 +4868,62 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast(Op.getOperand(4))->get(); SDValue TrueVal = Op.getOperand(2); SDValue FalseVal = Op.getOperand(3); + ConstantSDNode *CFVal = dyn_cast(FalseVal); + ConstantSDNode *CTVal = dyn_cast(TrueVal); + + if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal && + LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) { + unsigned TVal = CTVal->getZExtValue(); + unsigned FVal = CFVal->getZExtValue(); + unsigned Opcode = 0; + + if (TVal == ~FVal) { + Opcode = ARMISD::CSINV; + } else if (TVal == ~FVal + 1) { + Opcode = ARMISD::CSNEG; + } else if (TVal + 1 == FVal) { + Opcode = ARMISD::CSINC; + } else if (TVal == FVal + 1) { + Opcode = ARMISD::CSINC; + std::swap(TrueVal, FalseVal); + std::swap(TVal, FVal); + CC = ISD::getSetCCInverse(CC, true); + } + + if (Opcode) { + // If one of the constants is cheaper than another, materialise the + // cheaper one and let the csel generate the other. + if (Opcode != ARMISD::CSINC && + HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) { + std::swap(TrueVal, FalseVal); + std::swap(TVal, FVal); + CC = ISD::getSetCCInverse(CC, true); + } + + // Attempt to use ZR checking TVal is 0, possibly inverting the condition + // to get there. CSINC not is invertable like the other two (~(~a) == a, + // -(-a) == a, but (a+1)+1 != a). + if (FVal == 0 && Opcode != ARMISD::CSINC) { + std::swap(TrueVal, FalseVal); + std::swap(TVal, FVal); + CC = ISD::getSetCCInverse(CC, true); + } + if (TVal == 0) + TrueVal = DAG.getRegister(ARM::ZR, MVT::i32); + + // Drops F's value because we can get it by inverting/negating TVal. + FalseVal = TrueVal; + + SDValue ARMcc; + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); + EVT VT = TrueVal.getValueType(); + return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp); + } + } if (isUnsupportedFloatingType(LHS.getValueType())) { DAG.getTargetLoweringInfo().softenSetCCOperands( - DAG, LHS.getValueType(), LHS, RHS, CC, dl); + DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS); // If softenSetCCOperands only returned one value, we should compare it to // zero. @@ -4701,8 +4965,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { } ARMCC::CondCodes CondCode, CondCode2; - bool InvalidOnQNaN; - FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN); + FPCCToARMCC(CC, CondCode, CondCode2); // Normalize the fp compare. If RHS is zero we prefer to keep it there so we // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we @@ -4727,13 +4990,13 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { } SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN); + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); if (CondCode2 != ARMCC::AL) { SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32); // FIXME: Needs another CMP because flag can have but one use. - SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN); + SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); } return Result; @@ -4903,7 +5166,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { if (isUnsupportedFloatingType(LHS.getValueType())) { DAG.getTargetLoweringInfo().softenSetCCOperands( - DAG, LHS.getValueType(), LHS, RHS, CC, dl); + DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS); // If softenSetCCOperands only returned one value, we should compare it to // zero. @@ -4960,11 +5223,10 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { } ARMCC::CondCodes CondCode, CondCode2; - bool InvalidOnQNaN; - FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN); + FPCCToARMCC(CC, CondCode, CondCode2); SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN); + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; @@ -5056,8 +5318,9 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { else LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); + MakeLibCallOptions CallOptions; return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0), - /*isSigned*/ false, SDLoc(Op)).first; + CallOptions, SDLoc(Op)).first; } return Op; @@ -5120,8 +5383,9 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { else LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); + MakeLibCallOptions CallOptions; return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0), - /*isSigned*/ false, SDLoc(Op)).first; + CallOptions, SDLoc(Op)).first; } return Op; @@ -5140,7 +5404,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { if (UseNEON) { // Use VBSL to copy the sign bit. - unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); + unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80); SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, DAG.getTargetConstant(EncodedVal, dl, MVT::i32)); EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; @@ -5163,7 +5427,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); - SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), + SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32); AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, @@ -5243,7 +5507,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc dl(Op); // FIXME probably not meaningful unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); - unsigned FrameReg = ARI.getFrameRegister(MF); + Register FrameReg = ARI.getFrameRegister(MF); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, @@ -5253,9 +5517,9 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { - unsigned Reg = StringSwitch(RegName) +Register ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const { + Register Reg = StringSwitch(RegName) .Case("sp", ARM::SP) .Default(0); if (Reg) @@ -5576,8 +5840,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { SDLoc dl(N); EVT VT = N->getValueType(0); - if (VT.isVector()) { - assert(ST->hasNEON()); + if (VT.isVector() && ST->hasNEON()) { // Compute the least significant set bit: LSB = X & -X SDValue X = N->getOperand(0); @@ -5777,14 +6040,15 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, unsigned ShPartsOpc = ARMISD::LSLL; ConstantSDNode *Con = dyn_cast(ShAmt); - // If the shift amount is greater than 32 then do the default optimisation - if (Con && Con->getZExtValue() > 32) + // If the shift amount is greater than 32 or has a greater bitwidth than 64 + // then do the default optimisation + if (ShAmt->getValueType(0).getSizeInBits() > 64 || + (Con && (Con->getZExtValue() == 0 || Con->getZExtValue() >= 32))) return SDValue(); - // Extract the lower 32 bits of the shift amount if it's an i64 - if (ShAmt->getValueType(0) == MVT::i64) - ShAmt = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ShAmt, - DAG.getConstant(0, dl, MVT::i32)); + // Extract the lower 32 bits of the shift amount if it's not an i32 + if (ShAmt->getValueType(0) != MVT::i32) + ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32); if (ShOpc == ISD::SRL) { if (!Con) @@ -5839,20 +6103,37 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); } -static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { - SDValue TmpOp0, TmpOp1; +static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { bool Invert = false; bool Swap = false; - unsigned Opc = 0; + unsigned Opc = ARMCC::AL; SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); - EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger(); EVT VT = Op.getValueType(); ISD::CondCode SetCCOpcode = cast(CC)->get(); SDLoc dl(Op); + EVT CmpVT; + if (ST->hasNEON()) + CmpVT = Op0.getValueType().changeVectorElementTypeToInteger(); + else { + assert(ST->hasMVEIntegerOps() && + "No hardware support for integer vector comparison!"); + + if (Op.getValueType().getVectorElementType() != MVT::i1) + return SDValue(); + + // Make sure we expand floating point setcc to scalar if we do not have + // mve.fp, so that we can handle them from there. + if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps()) + return SDValue(); + + CmpVT = VT; + } + if (Op0.getValueType().getVectorElementType() == MVT::i64 && (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) { // Special-case integer 64-bit equality comparisons. They aren't legal, @@ -5880,60 +6161,74 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { switch (SetCCOpcode) { default: llvm_unreachable("Illegal FP comparison"); case ISD::SETUNE: - case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH; + case ISD::SETNE: + if (ST->hasMVEFloatOps()) { + Opc = ARMCC::NE; break; + } else { + Invert = true; LLVM_FALLTHROUGH; + } case ISD::SETOEQ: - case ISD::SETEQ: Opc = ARMISD::VCEQ; break; + case ISD::SETEQ: Opc = ARMCC::EQ; break; case ISD::SETOLT: case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETOGT: - case ISD::SETGT: Opc = ARMISD::VCGT; break; + case ISD::SETGT: Opc = ARMCC::GT; break; case ISD::SETOLE: case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETOGE: - case ISD::SETGE: Opc = ARMISD::VCGE; break; + case ISD::SETGE: Opc = ARMCC::GE; break; case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; + case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break; case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; + case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break; case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH; - case ISD::SETONE: + case ISD::SETONE: { // Expand this to (OLT | OGT). - TmpOp0 = Op0; - TmpOp1 = Op1; - Opc = ISD::OR; - Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); - Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1); - break; - case ISD::SETUO: - Invert = true; - LLVM_FALLTHROUGH; - case ISD::SETO: + SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0, + DAG.getConstant(ARMCC::GT, dl, MVT::i32)); + SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1, + DAG.getConstant(ARMCC::GT, dl, MVT::i32)); + SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1); + if (Invert) + Result = DAG.getNOT(dl, Result, VT); + return Result; + } + case ISD::SETUO: Invert = true; LLVM_FALLTHROUGH; + case ISD::SETO: { // Expand this to (OLT | OGE). - TmpOp0 = Op0; - TmpOp1 = Op1; - Opc = ISD::OR; - Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); - Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1); - break; + SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0, + DAG.getConstant(ARMCC::GT, dl, MVT::i32)); + SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1, + DAG.getConstant(ARMCC::GE, dl, MVT::i32)); + SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1); + if (Invert) + Result = DAG.getNOT(dl, Result, VT); + return Result; + } } } else { // Integer comparisons. switch (SetCCOpcode) { default: llvm_unreachable("Illegal integer comparison"); - case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH; - case ISD::SETEQ: Opc = ARMISD::VCEQ; break; + case ISD::SETNE: + if (ST->hasMVEIntegerOps()) { + Opc = ARMCC::NE; break; + } else { + Invert = true; LLVM_FALLTHROUGH; + } + case ISD::SETEQ: Opc = ARMCC::EQ; break; case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETGT: Opc = ARMISD::VCGT; break; + case ISD::SETGT: Opc = ARMCC::GT; break; case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETGE: Opc = ARMISD::VCGE; break; + case ISD::SETGE: Opc = ARMCC::GE; break; case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETUGT: Opc = ARMISD::VCGTU; break; + case ISD::SETUGT: Opc = ARMCC::HI; break; case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETUGE: Opc = ARMISD::VCGEU; break; + case ISD::SETUGE: Opc = ARMCC::HS; break; } // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). - if (Opc == ARMISD::VCEQ) { + if (ST->hasNEON() && Opc == ARMCC::EQ) { SDValue AndOp; if (ISD::isBuildVectorAllZeros(Op1.getNode())) AndOp = Op0; @@ -5945,10 +6240,12 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { AndOp = AndOp.getOperand(0); if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { - Opc = ARMISD::VTST; Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0)); Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1)); - Invert = !Invert; + SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1); + if (!Invert) + Result = DAG.getNOT(dl, Result, VT); + return Result; } } } @@ -5962,31 +6259,20 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { if (ISD::isBuildVectorAllZeros(Op1.getNode())) SingleOp = Op0; else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { - if (Opc == ARMISD::VCGE) - Opc = ARMISD::VCLEZ; - else if (Opc == ARMISD::VCGT) - Opc = ARMISD::VCLTZ; + if (Opc == ARMCC::GE) + Opc = ARMCC::LE; + else if (Opc == ARMCC::GT) + Opc = ARMCC::LT; SingleOp = Op1; } SDValue Result; if (SingleOp.getNode()) { - switch (Opc) { - case ARMISD::VCEQ: - Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break; - case ARMISD::VCGE: - Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break; - case ARMISD::VCLEZ: - Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break; - case ARMISD::VCGT: - Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break; - case ARMISD::VCLTZ: - Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break; - default: - Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1); - } + Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, SingleOp, + DAG.getConstant(Opc, dl, MVT::i32)); } else { - Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1); + Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1, + DAG.getConstant(Opc, dl, MVT::i32)); } Result = DAG.getSExtOrTrunc(Result, dl, VT); @@ -6027,13 +6313,13 @@ static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) { CCR, Chain.getValue(1)); } -/// isNEONModifiedImm - Check if the specified splat value corresponds to a -/// valid vector constant for a NEON or MVE instruction with a "modified immediate" -/// operand (e.g., VMOV). If so, return the encoded value. -static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, +/// isVMOVModifiedImm - Check if the specified splat value corresponds to a +/// valid vector constant for a NEON or MVE instruction with a "modified +/// immediate" operand (e.g., VMOV). If so, return the encoded value. +static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, bool is128Bits, - NEONModImmType type) { + VMOVModImmType type) { unsigned OpCmode, Imm; // SplatBitSize is set to the smallest size that splats the vector, so a @@ -6163,10 +6449,10 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, } default: - llvm_unreachable("unexpected size for isNEONModifiedImm"); + llvm_unreachable("unexpected size for isVMOVModifiedImm"); } - unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); + unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm); return DAG.getTargetConstant(EncodedVal, dl, MVT::i32); } @@ -6246,7 +6532,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, return SDValue(); // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too). - SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), + SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT, false, VMOVModImm); if (NewVal != SDValue()) { SDLoc DL(Op); @@ -6263,7 +6549,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, } // Finally, try a VMVN.i32 - NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT, + NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT, false, VMVNModImm); if (NewVal != SDValue()) { SDLoc DL(Op); @@ -6649,6 +6935,29 @@ static bool isReverseMask(ArrayRef M, EVT VT) { return true; } +static bool isVMOVNMask(ArrayRef M, EVT VT, bool Top) { + unsigned NumElts = VT.getVectorNumElements(); + // Make sure the mask has the right size. + if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8)) + return false; + + // If Top + // Look for <0, N, 2, N+2, 4, N+4, ..>. + // This inserts Input2 into Input1 + // else if not Top + // Look for <0, N+1, 2, N+3, 4, N+5, ..> + // This inserts Input1 into Input2 + unsigned Offset = Top ? 0 : 1; + for (unsigned i = 0; i < NumElts; i+=2) { + if (M[i] >= 0 && M[i] != (int)i) + return false; + if (M[i+1] >= 0 && M[i+1] != (int)(NumElts + i + Offset)) + return false; + } + + return true; +} + // If N is an integer constant that can be moved into a register in one // instruction, return an SDValue of such a constant (will become a MOV // instruction). Otherwise return null. @@ -6669,6 +6978,66 @@ static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, return SDValue(); } +static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + SDLoc dl(Op); + EVT VT = Op.getValueType(); + + assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!"); + + unsigned NumElts = VT.getVectorNumElements(); + unsigned BoolMask; + unsigned BitsPerBool; + if (NumElts == 4) { + BitsPerBool = 4; + BoolMask = 0xf; + } else if (NumElts == 8) { + BitsPerBool = 2; + BoolMask = 0x3; + } else if (NumElts == 16) { + BitsPerBool = 1; + BoolMask = 0x1; + } else + return SDValue(); + + // If this is a single value copied into all lanes (a splat), we can just sign + // extend that single value + SDValue FirstOp = Op.getOperand(0); + if (!isa(FirstOp) && + std::all_of(std::next(Op->op_begin()), Op->op_end(), + [&FirstOp](SDUse &U) { + return U.get().isUndef() || U.get() == FirstOp; + })) { + SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp, + DAG.getValueType(MVT::i1)); + return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext); + } + + // First create base with bits set where known + unsigned Bits32 = 0; + for (unsigned i = 0; i < NumElts; ++i) { + SDValue V = Op.getOperand(i); + if (!isa(V) && !V.isUndef()) + continue; + bool BitSet = V.isUndef() ? false : cast(V)->getZExtValue(); + if (BitSet) + Bits32 |= BoolMask << (i * BitsPerBool); + } + + // Add in unknown nodes + SDValue Base = DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, + DAG.getConstant(Bits32, dl, MVT::i32)); + for (unsigned i = 0; i < NumElts; ++i) { + SDValue V = Op.getOperand(i); + if (isa(V) || V.isUndef()) + continue; + Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V, + DAG.getConstant(i, dl, MVT::i32)); + } + + return Base; +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, @@ -6677,6 +7046,9 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SDLoc dl(Op); EVT VT = Op.getValueType(); + if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1) + return LowerBUILD_VECTOR_i1(Op, DAG, ST); + APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; @@ -6688,7 +7060,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, (ST->hasMVEIntegerOps() && SplatBitSize <= 32)) { // Check if an immediate VMOV works. EVT VmovVT; - SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), + SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT, VT.is128BitVector(), VMOVModImm); @@ -6700,7 +7072,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // Try an immediate VMVN. uint64_t NegatedImm = (~SplatBits).getZExtValue(); - Val = isNEONModifiedImm( + Val = isVMOVModifiedImm( NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT, VT.is128BitVector(), ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm); @@ -7088,9 +7460,6 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, LaneMask[j] = ExtractBase + j; } - // Final check before we try to produce nonsense... - if (!isShuffleMaskLegal(Mask, ShuffleVT)) - return SDValue(); // We can't handle more than two sources. This should have already // been checked before this point. @@ -7100,8 +7469,10 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, for (unsigned i = 0; i < Sources.size(); ++i) ShuffleOps[i] = Sources[i].ShuffleVec; - SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0], - ShuffleOps[1], Mask); + SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0], + ShuffleOps[1], Mask, DAG); + if (!Shuffle) + return SDValue(); return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle); } @@ -7168,6 +7539,7 @@ bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { unsigned EltSize = VT.getScalarSizeInBits(); if (EltSize >= 32 || ShuffleVectorSDNode::isSplatMask(&M[0], VT) || + ShuffleVectorInst::isIdentityMask(M) || isVREVMask(M, VT, 64) || isVREVMask(M, VT, 32) || isVREVMask(M, VT, 16)) @@ -7180,6 +7552,9 @@ bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { else if (Subtarget->hasNEON() && (VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)) return true; + else if (Subtarget->hasMVEIntegerOps() && + (isVMOVNMask(M, VT, 0) || isVMOVNMask(M, VT, 1))) + return true; else return false; } @@ -7282,6 +7657,94 @@ static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, DAG.getConstant(ExtractNum, DL, MVT::i32)); } +static EVT getVectorTyFromPredicateVector(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + case MVT::v4i1: + return MVT::v4i32; + case MVT::v8i1: + return MVT::v8i16; + case MVT::v16i1: + return MVT::v16i8; + default: + llvm_unreachable("Unexpected vector predicate type"); + } +} + +static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT, + SelectionDAG &DAG) { + // Converting from boolean predicates to integers involves creating a vector + // of all ones or all zeroes and selecting the lanes based upon the real + // predicate. + SDValue AllOnes = + DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32); + AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllOnes); + + SDValue AllZeroes = + DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0x0), dl, MVT::i32); + AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes); + + // Get full vector type from predicate type + EVT NewVT = getVectorTyFromPredicateVector(VT); + + SDValue RecastV1; + // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast + // this to a v16i1. This cannot be done with an ordinary bitcast because the + // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node, + // since we know in hardware the sizes are really the same. + if (VT != MVT::v16i1) + RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred); + else + RecastV1 = Pred; + + // Select either all ones or zeroes depending upon the real predicate bits. + SDValue PredAsVector = + DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes); + + // Recast our new predicate-as-integer v16i8 vector into something + // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate. + return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector); +} + +static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + EVT VT = Op.getValueType(); + ShuffleVectorSDNode *SVN = cast(Op.getNode()); + ArrayRef ShuffleMask = SVN->getMask(); + + assert(ST->hasMVEIntegerOps() && + "No support for vector shuffle of boolean predicates"); + + SDValue V1 = Op.getOperand(0); + SDLoc dl(Op); + if (isReverseMask(ShuffleMask, VT)) { + SDValue cast = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, V1); + SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast); + SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit, + DAG.getConstant(16, dl, MVT::i32)); + return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl); + } + + // Until we can come up with optimised cases for every single vector + // shuffle in existence we have chosen the least painful strategy. This is + // to essentially promote the boolean predicate to a 8-bit integer, where + // each predicate represents a byte. Then we fall back on a normal integer + // vector shuffle and convert the result back into a predicate vector. In + // many cases the generated code might be even better than scalar code + // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit + // fields in a register into 8 other arbitrary 2-bit fields! + SDValue PredAsVector = PromoteMVEPredVector(dl, V1, VT, DAG); + EVT NewVT = PredAsVector.getValueType(); + + // Do the shuffle! + SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector, + DAG.getUNDEF(NewVT), ShuffleMask); + + // Now return the result of comparing the shuffled vector with zero, + // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. + return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled, + DAG.getConstant(ARMCC::NE, dl, MVT::i32)); +} + static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) { SDValue V1 = Op.getOperand(0); @@ -7289,6 +7752,10 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, SDLoc dl(Op); EVT VT = Op.getValueType(); ShuffleVectorSDNode *SVN = cast(Op.getNode()); + unsigned EltSize = VT.getScalarSizeInBits(); + + if (ST->hasMVEIntegerOps() && EltSize == 1) + return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST); // Convert shuffles that are directly supported on NEON to target-specific // DAG nodes, instead of keeping them as shuffles and matching them again @@ -7298,7 +7765,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, // of the same time so that they get CSEd properly. ArrayRef ShuffleMask = SVN->getMask(); - unsigned EltSize = VT.getScalarSizeInBits(); if (EltSize <= 32) { if (SVN->isSplat()) { int Lane = SVN->getSplatIndex(); @@ -7364,6 +7830,14 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, .getValue(WhichResult); } } + if (ST->hasMVEIntegerOps()) { + if (isVMOVNMask(ShuffleMask, VT, 0)) + return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1, + DAG.getConstant(0, dl, MVT::i32)); + if (isVMOVNMask(ShuffleMask, VT, 1)) + return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2, + DAG.getConstant(1, dl, MVT::i32)); + } // Also check for these shuffles through CONCAT_VECTORS: we canonicalize // shuffles that produce a result larger than their operands with: @@ -7468,8 +7942,29 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, return SDValue(); } -SDValue ARMTargetLowering:: -LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + EVT VecVT = Op.getOperand(0).getValueType(); + SDLoc dl(Op); + + assert(ST->hasMVEIntegerOps() && + "LowerINSERT_VECTOR_ELT_i1 called without MVE!"); + + SDValue Conv = + DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0)); + unsigned Lane = cast(Op.getOperand(2))->getZExtValue(); + unsigned LaneWidth = + getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8; + unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth; + SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, + Op.getOperand(1), DAG.getValueType(MVT::i1)); + SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext, + DAG.getConstant(~Mask, dl, MVT::i32)); + return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI); +} + +SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { // INSERT_VECTOR_ELT is legal only for immediate indexes. SDValue Lane = Op.getOperand(2); if (!isa(Lane)) @@ -7477,6 +7972,11 @@ LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDValue Elt = Op.getOperand(1); EVT EltVT = Elt.getValueType(); + + if (Subtarget->hasMVEIntegerOps() && + Op.getValueType().getScalarSizeInBits() == 1) + return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget); + if (getTypeAction(*DAG.getContext(), EltVT) == TargetLowering::TypePromoteFloat) { // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32, @@ -7505,13 +8005,37 @@ LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { return Op; } -static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + EVT VecVT = Op.getOperand(0).getValueType(); + SDLoc dl(Op); + + assert(ST->hasMVEIntegerOps() && + "LowerINSERT_VECTOR_ELT_i1 called without MVE!"); + + SDValue Conv = + DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0)); + unsigned Lane = cast(Op.getOperand(1))->getZExtValue(); + unsigned LaneWidth = + getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8; + SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv, + DAG.getConstant(Lane * LaneWidth, dl, MVT::i32)); + return Shift; +} + +static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { // EXTRACT_VECTOR_ELT is legal only for immediate indexes. SDValue Lane = Op.getOperand(1); if (!isa(Lane)) return SDValue(); SDValue Vec = Op.getOperand(0); + EVT VT = Vec.getValueType(); + + if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1) + return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST); + if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) { SDLoc dl(Op); return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); @@ -7520,7 +8044,64 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { return Op; } -static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + SDLoc dl(Op); + EVT VT = Op.getValueType(); + EVT Op1VT = V1.getValueType(); + EVT Op2VT = V2.getValueType(); + unsigned NumElts = VT.getVectorNumElements(); + + assert(Op1VT == Op2VT && "Operand types don't match!"); + assert(VT.getScalarSizeInBits() == 1 && + "Unexpected custom CONCAT_VECTORS lowering"); + assert(ST->hasMVEIntegerOps() && + "CONCAT_VECTORS lowering only supported for MVE"); + + SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG); + SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG); + + // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets + // promoted to v8i16, etc. + + MVT ElType = getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT(); + + // Extract the vector elements from Op1 and Op2 one by one and truncate them + // to be the right size for the destination. For example, if Op1 is v4i1 then + // the promoted vector is v4i32. The result of concatentation gives a v8i1, + // which when promoted is v8i16. That means each i32 element from Op1 needs + // truncating to i16 and inserting in the result. + EVT ConcatVT = MVT::getVectorVT(ElType, NumElts); + SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT); + auto ExractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) { + EVT NewVT = NewV.getValueType(); + EVT ConcatVT = ConVec.getValueType(); + for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) { + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV, + DAG.getIntPtrConstant(i, dl)); + ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt, + DAG.getConstant(j, dl, MVT::i32)); + } + return ConVec; + }; + unsigned j = 0; + ConVec = ExractInto(NewV1, ConVec, j); + ConVec = ExractInto(NewV2, ConVec, j); + + // Now return the result of comparing the subvector with zero, + // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. + return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec, + DAG.getConstant(ARMCC::NE, dl, MVT::i32)); +} + +static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + EVT VT = Op->getValueType(0); + if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1) + return LowerCONCAT_VECTORS_i1(Op, DAG, ST); + // The only time a CONCAT_VECTORS operation can have legal types is when // two 64-bit vectors are concatenated to a 128-bit vector. assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && @@ -7540,6 +8121,43 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); } +static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + SDLoc dl(Op); + EVT VT = Op.getValueType(); + EVT Op1VT = V1.getValueType(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned Index = cast(V2)->getZExtValue(); + + assert(VT.getScalarSizeInBits() == 1 && + "Unexpected custom EXTRACT_SUBVECTOR lowering"); + assert(ST->hasMVEIntegerOps() && + "EXTRACT_SUBVECTOR lowering only supported for MVE"); + + SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG); + + // We now have Op1 promoted to a vector of integers, where v8i1 gets + // promoted to v8i16, etc. + + MVT ElType = getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT(); + + EVT SubVT = MVT::getVectorVT(ElType, NumElts); + SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT); + for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) { + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1, + DAG.getIntPtrConstant(i, dl)); + SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt, + DAG.getConstant(j, dl, MVT::i32)); + } + + // Now return the result of comparing the subvector with zero, + // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. + return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec, + DAG.getConstant(ARMCC::NE, dl, MVT::i32)); +} + /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each /// element has been zero/sign-extended, depending on the isSigned parameter, /// from an integer type half its size. @@ -7897,7 +8515,8 @@ static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, return N0; } -static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { EVT VT = Op.getValueType(); assert((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"); @@ -7924,7 +8543,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); - N0 = LowerCONCAT_VECTORS(N0, DAG); + N0 = LowerCONCAT_VECTORS(N0, DAG, ST); N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); return N0; @@ -7932,7 +8551,8 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { return LowerSDIV_v4i16(N0, N1, dl, DAG); } -static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { // TODO: Should this propagate fast-math-flags? EVT VT = Op.getValueType(); assert((VT == MVT::v4i16 || VT == MVT::v8i8) && @@ -7960,7 +8580,7 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); - N0 = LowerCONCAT_VECTORS(N0, DAG); + N0 = LowerCONCAT_VECTORS(N0, DAG, ST); N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl, @@ -8255,6 +8875,96 @@ void ARMTargetLowering::ExpandDIV_Windows( Results.push_back(Upper); } +static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) { + LoadSDNode *LD = cast(Op.getNode()); + EVT MemVT = LD->getMemoryVT(); + assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && + "Expected a predicate type!"); + assert(MemVT == Op.getValueType()); + assert(LD->getExtensionType() == ISD::NON_EXTLOAD && + "Expected a non-extending load"); + assert(LD->isUnindexed() && "Expected a unindexed load"); + + // The basic MVE VLDR on a v4i1/v8i1 actually loads the entire 16bit + // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We + // need to make sure that 8/4 bits are actually loaded into the correct + // place, which means loading the value and then shuffling the values into + // the bottom bits of the predicate. + // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect + // for BE). + + SDLoc dl(Op); + SDValue Load = DAG.getExtLoad( + ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(), + EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()), + LD->getMemOperand()); + SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Load); + if (MemVT != MVT::v16i1) + Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred, + DAG.getConstant(0, dl, MVT::i32)); + return DAG.getMergeValues({Pred, Load.getValue(1)}, dl); +} + +static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) { + StoreSDNode *ST = cast(Op.getNode()); + EVT MemVT = ST->getMemoryVT(); + assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && + "Expected a predicate type!"); + assert(MemVT == ST->getValue().getValueType()); + assert(!ST->isTruncatingStore() && "Expected a non-extending store"); + assert(ST->isUnindexed() && "Expected a unindexed store"); + + // Only store the v4i1 or v8i1 worth of bits, via a buildvector with top bits + // unset and a scalar store. + SDLoc dl(Op); + SDValue Build = ST->getValue(); + if (MemVT != MVT::v16i1) { + SmallVector Ops; + for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build, + DAG.getConstant(I, dl, MVT::i32))); + for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++) + Ops.push_back(DAG.getUNDEF(MVT::i32)); + Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops); + } + SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build); + return DAG.getTruncStore( + ST->getChain(), dl, GRP, ST->getBasePtr(), + EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()), + ST->getMemOperand()); +} + +static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) { + MaskedLoadSDNode *N = cast(Op.getNode()); + MVT VT = Op.getSimpleValueType(); + SDValue Mask = N->getMask(); + SDValue PassThru = N->getPassThru(); + SDLoc dl(Op); + + auto IsZero = [](SDValue PassThru) { + return (ISD::isBuildVectorAllZeros(PassThru.getNode()) || + (PassThru->getOpcode() == ARMISD::VMOVIMM && + isNullConstant(PassThru->getOperand(0)))); + }; + + if (IsZero(PassThru)) + return Op; + + // MVE Masked loads use zero as the passthru value. Here we convert undef to + // zero too, and other values are lowered to a select. + SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT, + DAG.getTargetConstant(0, dl, MVT::i32)); + SDValue NewLoad = DAG.getMaskedLoad( + VT, dl, N->getChain(), N->getBasePtr(), Mask, ZeroVec, N->getMemoryVT(), + N->getMemOperand(), N->getExtensionType(), N->isExpandingLoad()); + SDValue Combo = NewLoad; + if (!PassThru.isUndef() && + (PassThru.getOpcode() != ISD::BITCAST || + !IsZero(PassThru->getOperand(0)))) + Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru); + return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl); +} + static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { if (isStrongerThanMonotonic(cast(Op)->getOrdering())) // Acquire/Release load/store is not legal for targets without a dmb or @@ -8273,12 +8983,12 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N, // Under Power Management extensions, the cycle-count is: // mrc p15, #0, , c9, c13, #0 SDValue Ops[] = { N->getOperand(0), // Chain - DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32), - DAG.getConstant(15, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32), - DAG.getConstant(9, DL, MVT::i32), - DAG.getConstant(13, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32) + DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32), + DAG.getTargetConstant(15, DL, MVT::i32), + DAG.getTargetConstant(0, DL, MVT::i32), + DAG.getTargetConstant(9, DL, MVT::i32), + DAG.getTargetConstant(13, DL, MVT::i32), + DAG.getTargetConstant(0, DL, MVT::i32) }; SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, @@ -8412,6 +9122,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG); + case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, Subtarget); case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget); @@ -8426,24 +9137,25 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget); case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget); - case ISD::SETCC: return LowerVSETCC(Op, DAG); + case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget); case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG); case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget); + case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); - case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); - case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget); + case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::SDIV: if (Subtarget->isTargetWindows() && !Op.getValueType().isVector()) return LowerDIV_Windows(Op, DAG, /* Signed */ true); - return LowerSDIV(Op, DAG); + return LowerSDIV(Op, DAG, Subtarget); case ISD::UDIV: if (Subtarget->isTargetWindows() && !Op.getValueType().isVector()) return LowerDIV_Windows(Op, DAG, /* Signed */ false); - return LowerUDIV(Op, DAG); + return LowerUDIV(Op, DAG, Subtarget); case ISD::ADDCARRY: case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG); case ISD::SADDO: @@ -8452,6 +9164,15 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UADDO: case ISD::USUBO: return LowerUnsignedALUO(Op, DAG); + case ISD::SADDSAT: + case ISD::SSUBSAT: + return LowerSADDSUBSAT(Op, DAG, Subtarget); + case ISD::LOAD: + return LowerPredicateLoad(Op, DAG); + case ISD::STORE: + return LowerPredicateStore(Op, DAG); + case ISD::MLOAD: + return LowerMLOAD(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); @@ -8530,6 +9251,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(Res.getValue(0)); Results.push_back(Res.getValue(1)); return; + case ISD::SADDSAT: + case ISD::SSUBSAT: + Res = LowerSADDSUBSAT(SDValue(N, 0), DAG, Subtarget); + break; case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; @@ -8600,19 +9325,19 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, // orr r5, r5, #1 // add r5, pc // str r5, [$jbuf, #+4] ; &jbuf[1] - unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + Register NewVReg1 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) .addConstantPoolIndex(CPI) .addMemOperand(CPMMO) .add(predOps(ARMCC::AL)); // Set the low bit because of thumb mode. - unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + Register NewVReg2 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) .addReg(NewVReg1, RegState::Kill) .addImm(0x01) .add(predOps(ARMCC::AL)) .add(condCodeOp()); - unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + Register NewVReg3 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3) .addReg(NewVReg2, RegState::Kill) .addImm(PCLabelId); @@ -8630,28 +9355,28 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, // orrs r1, r2 // add r2, $jbuf, #+4 ; &jbuf[1] // str r1, [r2] - unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + Register NewVReg1 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) .addConstantPoolIndex(CPI) .addMemOperand(CPMMO) .add(predOps(ARMCC::AL)); - unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + Register NewVReg2 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2) .addReg(NewVReg1, RegState::Kill) .addImm(PCLabelId); // Set the low bit because of thumb mode. - unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + Register NewVReg3 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) .addReg(ARM::CPSR, RegState::Define) .addImm(1) .add(predOps(ARMCC::AL)); - unsigned NewVReg4 = MRI->createVirtualRegister(TRC); + Register NewVReg4 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) .addReg(ARM::CPSR, RegState::Define) .addReg(NewVReg2, RegState::Kill) .addReg(NewVReg3, RegState::Kill) .add(predOps(ARMCC::AL)); - unsigned NewVReg5 = MRI->createVirtualRegister(TRC); + Register NewVReg5 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5) .addFrameIndex(FI) .addImm(36); // &jbuf[1] :: pc @@ -8666,13 +9391,13 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, // ldr r1, LCPI1_1 // add r1, pc, r1 // str r1, [$jbuf, #+4] ; &jbuf[1] - unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + Register NewVReg1 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) .addConstantPoolIndex(CPI) .addImm(0) .addMemOperand(CPMMO) .add(predOps(ARMCC::AL)); - unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + Register NewVReg2 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) .addReg(NewVReg1, RegState::Kill) .addImm(PCLabelId) @@ -8794,7 +9519,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, bool IsPositionIndependent = isPositionIndependent(); unsigned NumLPads = LPadList.size(); if (Subtarget->isThumb2()) { - unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + Register NewVReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) .addFrameIndex(FI) .addImm(4) @@ -8807,7 +9532,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addImm(LPadList.size()) .add(predOps(ARMCC::AL)); } else { - unsigned VReg1 = MRI->createVirtualRegister(TRC); + Register VReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1) .addImm(NumLPads & 0xFFFF) .add(predOps(ARMCC::AL)); @@ -8832,12 +9557,12 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addImm(ARMCC::HI) .addReg(ARM::CPSR); - unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + Register NewVReg3 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3) .addJumpTableIndex(MJTI) .add(predOps(ARMCC::AL)); - unsigned NewVReg4 = MRI->createVirtualRegister(TRC); + Register NewVReg4 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4) .addReg(NewVReg3, RegState::Kill) .addReg(NewVReg1) @@ -8850,7 +9575,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addReg(NewVReg1) .addJumpTableIndex(MJTI); } else if (Subtarget->isThumb()) { - unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + Register NewVReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) .addFrameIndex(FI) .addImm(1) @@ -8873,7 +9598,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); - unsigned VReg1 = MRI->createVirtualRegister(TRC); + Register VReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci)) .addReg(VReg1, RegState::Define) .addConstantPoolIndex(Idx) @@ -8889,19 +9614,19 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addImm(ARMCC::HI) .addReg(ARM::CPSR); - unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + Register NewVReg2 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) .addReg(ARM::CPSR, RegState::Define) .addReg(NewVReg1) .addImm(2) .add(predOps(ARMCC::AL)); - unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + Register NewVReg3 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) .addJumpTableIndex(MJTI) .add(predOps(ARMCC::AL)); - unsigned NewVReg4 = MRI->createVirtualRegister(TRC); + Register NewVReg4 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) .addReg(ARM::CPSR, RegState::Define) .addReg(NewVReg2, RegState::Kill) @@ -8911,7 +9636,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, MachineMemOperand *JTMMOLd = MF->getMachineMemOperand( MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4); - unsigned NewVReg5 = MRI->createVirtualRegister(TRC); + Register NewVReg5 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) .addReg(NewVReg4, RegState::Kill) .addImm(0) @@ -8932,7 +9657,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addReg(NewVReg6, RegState::Kill) .addJumpTableIndex(MJTI); } else { - unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + Register NewVReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) .addFrameIndex(FI) .addImm(4) @@ -8945,7 +9670,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addImm(NumLPads) .add(predOps(ARMCC::AL)); } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) { - unsigned VReg1 = MRI->createVirtualRegister(TRC); + Register VReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1) .addImm(NumLPads & 0xFFFF) .add(predOps(ARMCC::AL)); @@ -8974,7 +9699,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); - unsigned VReg1 = MRI->createVirtualRegister(TRC); + Register VReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) .addReg(VReg1, RegState::Define) .addConstantPoolIndex(Idx) @@ -8991,20 +9716,20 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addImm(ARMCC::HI) .addReg(ARM::CPSR); - unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + Register NewVReg3 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3) .addReg(NewVReg1) .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)) .add(predOps(ARMCC::AL)) .add(condCodeOp()); - unsigned NewVReg4 = MRI->createVirtualRegister(TRC); + Register NewVReg4 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) .addJumpTableIndex(MJTI) .add(predOps(ARMCC::AL)); MachineMemOperand *JTMMOLd = MF->getMachineMemOperand( MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4); - unsigned NewVReg5 = MRI->createVirtualRegister(TRC); + Register NewVReg5 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) .addReg(NewVReg3, RegState::Kill) .addReg(NewVReg4) @@ -9239,8 +9964,8 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = ++BB->getIterator(); - unsigned dest = MI.getOperand(0).getReg(); - unsigned src = MI.getOperand(1).getReg(); + Register dest = MI.getOperand(0).getReg(); + Register src = MI.getOperand(1).getReg(); unsigned SizeVal = MI.getOperand(2).getImm(); unsigned Align = MI.getOperand(3).getImm(); DebugLoc dl = MI.getDebugLoc(); @@ -9291,9 +10016,9 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, unsigned srcIn = src; unsigned destIn = dest; for (unsigned i = 0; i < LoopSize; i+=UnitSize) { - unsigned srcOut = MRI.createVirtualRegister(TRC); - unsigned destOut = MRI.createVirtualRegister(TRC); - unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); + Register srcOut = MRI.createVirtualRegister(TRC); + Register destOut = MRI.createVirtualRegister(TRC); + Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut, IsThumb1, IsThumb2); emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut, @@ -9306,9 +10031,9 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, // [scratch, srcOut] = LDRB_POST(srcIn, 1) // [destOut] = STRB_POST(scratch, destIn, 1) for (unsigned i = 0; i < BytesLeft; i++) { - unsigned srcOut = MRI.createVirtualRegister(TRC); - unsigned destOut = MRI.createVirtualRegister(TRC); - unsigned scratch = MRI.createVirtualRegister(TRC); + Register srcOut = MRI.createVirtualRegister(TRC); + Register destOut = MRI.createVirtualRegister(TRC); + Register scratch = MRI.createVirtualRegister(TRC); emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut, IsThumb1, IsThumb2); emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut, @@ -9351,7 +10076,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, exitMBB->transferSuccessorsAndUpdatePHIs(BB); // Load an immediate to varEnd. - unsigned varEnd = MRI.createVirtualRegister(TRC); + Register varEnd = MRI.createVirtualRegister(TRC); if (Subtarget->useMovt()) { unsigned Vtmp = varEnd; if ((LoopSize & 0xFFFF0000) != 0) @@ -9401,12 +10126,12 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, // destPhi = PHI(destLoop, dst) MachineBasicBlock *entryBB = BB; BB = loopMBB; - unsigned varLoop = MRI.createVirtualRegister(TRC); - unsigned varPhi = MRI.createVirtualRegister(TRC); - unsigned srcLoop = MRI.createVirtualRegister(TRC); - unsigned srcPhi = MRI.createVirtualRegister(TRC); - unsigned destLoop = MRI.createVirtualRegister(TRC); - unsigned destPhi = MRI.createVirtualRegister(TRC); + Register varLoop = MRI.createVirtualRegister(TRC); + Register varPhi = MRI.createVirtualRegister(TRC); + Register srcLoop = MRI.createVirtualRegister(TRC); + Register srcPhi = MRI.createVirtualRegister(TRC); + Register destLoop = MRI.createVirtualRegister(TRC); + Register destPhi = MRI.createVirtualRegister(TRC); BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi) .addReg(varLoop).addMBB(loopMBB) @@ -9420,7 +10145,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) - unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); + Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop, IsThumb1, IsThumb2); emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop, @@ -9461,9 +10186,9 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, unsigned srcIn = srcLoop; unsigned destIn = destLoop; for (unsigned i = 0; i < BytesLeft; i++) { - unsigned srcOut = MRI.createVirtualRegister(TRC); - unsigned destOut = MRI.createVirtualRegister(TRC); - unsigned scratch = MRI.createVirtualRegister(TRC); + Register srcOut = MRI.createVirtualRegister(TRC); + Register destOut = MRI.createVirtualRegister(TRC); + Register scratch = MRI.createVirtualRegister(TRC); emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut, IsThumb1, IsThumb2); emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut, @@ -9523,7 +10248,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI, break; case CodeModel::Large: { MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass); + Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass); BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg) .addExternalSymbol("__chkstk"); @@ -9771,8 +10496,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // equality. bool RHSisZero = MI.getOpcode() == ARM::BCCZi64; - unsigned LHS1 = MI.getOperand(1).getReg(); - unsigned LHS2 = MI.getOperand(2).getReg(); + Register LHS1 = MI.getOperand(1).getReg(); + Register LHS2 = MI.getOperand(2).getReg(); if (RHSisZero) { BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(LHS1) @@ -9782,8 +10507,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .addReg(LHS2).addImm(0) .addImm(ARMCC::EQ).addReg(ARM::CPSR); } else { - unsigned RHS1 = MI.getOperand(3).getReg(); - unsigned RHS2 = MI.getOperand(4).getReg(); + Register RHS1 = MI.getOperand(3).getReg(); + Register RHS2 = MI.getOperand(4).getReg(); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(LHS1) .addReg(RHS1) @@ -9844,15 +10569,15 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, Fn->insert(BBI, RSBBB); Fn->insert(BBI, SinkBB); - unsigned int ABSSrcReg = MI.getOperand(1).getReg(); - unsigned int ABSDstReg = MI.getOperand(0).getReg(); + Register ABSSrcReg = MI.getOperand(1).getReg(); + Register ABSDstReg = MI.getOperand(0).getReg(); bool ABSSrcKIll = MI.getOperand(1).isKill(); bool isThumb2 = Subtarget->isThumb2(); MachineRegisterInfo &MRI = Fn->getRegInfo(); // In Thumb mode S must not be specified if source register is the SP or // PC and if destination register is the SP, so restrict register class - unsigned NewRsbDstReg = - MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass); + Register NewRsbDstReg = MRI.createVirtualRegister( + isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass); // Transfer the remainder of BB and its successor edges to sinkMBB. SinkBB->splice(SinkBB->begin(), BB, @@ -9931,7 +10656,7 @@ static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, // The MEMCPY both defines and kills the scratch registers. for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) { - unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass + Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); MIB.addReg(TmpReg, RegState::Define|RegState::Dead); } @@ -10369,10 +11094,7 @@ static SDValue findMUL_LOHI(SDValue V) { static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { - if (Subtarget->isThumb()) { - if (!Subtarget->hasDSP()) - return SDValue(); - } else if (!Subtarget->hasV5TEOps()) + if (!Subtarget->hasBaseDSP()) return SDValue(); // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and @@ -11253,7 +11975,7 @@ static SDValue PerformANDCombine(SDNode *N, BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { EVT VbicVT; - SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), + SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VbicVT, VT.is128BitVector(), OtherModImm); @@ -11469,6 +12191,77 @@ static SDValue PerformORCombineToBFI(SDNode *N, return SDValue(); } +static bool isValidMVECond(unsigned CC, bool IsFloat) { + switch (CC) { + case ARMCC::EQ: + case ARMCC::NE: + case ARMCC::LE: + case ARMCC::GT: + case ARMCC::GE: + case ARMCC::LT: + return true; + case ARMCC::HS: + case ARMCC::HI: + return !IsFloat; + default: + return false; + }; +} + +static SDValue PerformORCombine_i1(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain + // together with predicates + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + ARMCC::CondCodes CondCode0 = ARMCC::AL; + ARMCC::CondCodes CondCode1 = ARMCC::AL; + if (N0->getOpcode() == ARMISD::VCMP) + CondCode0 = (ARMCC::CondCodes)cast(N0->getOperand(2)) + ->getZExtValue(); + else if (N0->getOpcode() == ARMISD::VCMPZ) + CondCode0 = (ARMCC::CondCodes)cast(N0->getOperand(1)) + ->getZExtValue(); + if (N1->getOpcode() == ARMISD::VCMP) + CondCode1 = (ARMCC::CondCodes)cast(N1->getOperand(2)) + ->getZExtValue(); + else if (N1->getOpcode() == ARMISD::VCMPZ) + CondCode1 = (ARMCC::CondCodes)cast(N1->getOperand(1)) + ->getZExtValue(); + + if (CondCode0 == ARMCC::AL || CondCode1 == ARMCC::AL) + return SDValue(); + + unsigned Opposite0 = ARMCC::getOppositeCondition(CondCode0); + unsigned Opposite1 = ARMCC::getOppositeCondition(CondCode1); + + if (!isValidMVECond(Opposite0, + N0->getOperand(0)->getValueType(0).isFloatingPoint()) || + !isValidMVECond(Opposite1, + N1->getOperand(0)->getValueType(0).isFloatingPoint())) + return SDValue(); + + SmallVector Ops0; + Ops0.push_back(N0->getOperand(0)); + if (N0->getOpcode() == ARMISD::VCMP) + Ops0.push_back(N0->getOperand(1)); + Ops0.push_back(DCI.DAG.getConstant(Opposite0, SDLoc(N0), MVT::i32)); + SmallVector Ops1; + Ops1.push_back(N1->getOperand(0)); + if (N1->getOpcode() == ARMISD::VCMP) + Ops1.push_back(N1->getOperand(1)); + Ops1.push_back(DCI.DAG.getConstant(Opposite1, SDLoc(N1), MVT::i32)); + + SDValue NewN0 = DCI.DAG.getNode(N0->getOpcode(), SDLoc(N0), VT, Ops0); + SDValue NewN1 = DCI.DAG.getNode(N1->getOpcode(), SDLoc(N1), VT, Ops1); + SDValue And = DCI.DAG.getNode(ISD::AND, SDLoc(N), VT, NewN0, NewN1); + return DCI.DAG.getNode(ISD::XOR, SDLoc(N), VT, And, + DCI.DAG.getAllOnesConstant(SDLoc(N), VT)); +} + /// PerformORCombine - Target-specific dag combine xforms for ISD::OR static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, @@ -11489,7 +12282,7 @@ static SDValue PerformORCombine(SDNode *N, BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { EVT VorrVT; - SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), + SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VorrVT, VT.is128BitVector(), OtherModImm); @@ -11553,6 +12346,10 @@ static SDValue PerformORCombine(SDNode *N, } } + if (Subtarget->hasMVEIntegerOps() && + (VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)) + return PerformORCombine_i1(N, DCI, Subtarget); + // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when // reasonable. if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) { @@ -11921,6 +12718,24 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { return Vec; } +static SDValue +PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + SDLoc dl(N); + + // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x) + if (Op->getOpcode() == ARMISD::PREDICATE_CAST) { + // If the valuetypes are the same, we can remove the cast entirely. + if (Op->getOperand(0).getValueType() == VT) + return Op->getOperand(0); + return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, + Op->getOperand(0).getValueType(), Op->getOperand(0)); + } + + return SDValue(); +} + /// PerformInsertEltCombine - Target-specific dag combine xforms for /// ISD::INSERT_VECTOR_ELT. static SDValue PerformInsertEltCombine(SDNode *N, @@ -12332,7 +13147,7 @@ static SDValue PerformVDUPLANECombine(SDNode *N, // The canonical VMOV for a zero vector uses a 32-bit element size. unsigned Imm = cast(Op.getOperand(0))->getZExtValue(); unsigned EltBits; - if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) + if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0) EltSize = 8; EVT VT = N->getValueType(0); if (EltSize > VT.getScalarSizeInBits()) @@ -12382,95 +13197,163 @@ static SDValue PerformLOADCombine(SDNode *N, return SDValue(); } +// Optimize trunc store (of multiple scalars) to shuffle and store. First, +// pack all of the elements in one place. Next, store to memory in fewer +// chunks. +static SDValue PerformTruncatingStoreCombine(StoreSDNode *St, + SelectionDAG &DAG) { + SDValue StVal = St->getValue(); + EVT VT = StVal.getValueType(); + if (!St->isTruncatingStore() || !VT.isVector()) + return SDValue(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT StVT = St->getMemoryVT(); + unsigned NumElems = VT.getVectorNumElements(); + assert(StVT != VT && "Cannot truncate to the same type"); + unsigned FromEltSz = VT.getScalarSizeInBits(); + unsigned ToEltSz = StVT.getScalarSizeInBits(); + + // From, To sizes and ElemCount must be pow of two + if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) + return SDValue(); + + // We are going to use the original vector elt for storing. + // Accumulated smaller vector elements must be a multiple of the store size. + if (0 != (NumElems * FromEltSz) % ToEltSz) + return SDValue(); + + unsigned SizeRatio = FromEltSz / ToEltSz; + assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); + + // Create a type on which we perform the shuffle. + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), + NumElems * SizeRatio); + assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); + + SDLoc DL(St); + SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); + SmallVector ShuffleVec(NumElems * SizeRatio, -1); + for (unsigned i = 0; i < NumElems; ++i) + ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1 + : i * SizeRatio; + + // Can't shuffle using an illegal type. + if (!TLI.isTypeLegal(WideVecVT)) + return SDValue(); + + SDValue Shuff = DAG.getVectorShuffle( + WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec); + // At this point all of the data is stored at the bottom of the + // register. We now need to save it to mem. + + // Find the largest store unit + MVT StoreType = MVT::i8; + for (MVT Tp : MVT::integer_valuetypes()) { + if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) + StoreType = Tp; + } + // Didn't find a legal store type. + if (!TLI.isTypeLegal(StoreType)) + return SDValue(); + + // Bitcast the original vector into a vector of store-size units + EVT StoreVecVT = + EVT::getVectorVT(*DAG.getContext(), StoreType, + VT.getSizeInBits() / EVT(StoreType).getSizeInBits()); + assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); + SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); + SmallVector Chains; + SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL, + TLI.getPointerTy(DAG.getDataLayout())); + SDValue BasePtr = St->getBasePtr(); + + // Perform one or more big stores into memory. + unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits(); + for (unsigned I = 0; I < E; I++) { + SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType, + ShuffWide, DAG.getIntPtrConstant(I, DL)); + SDValue Ch = + DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(), + St->getAlignment(), St->getMemOperand()->getFlags()); + BasePtr = + DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment); + Chains.push_back(Ch); + } + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); +} + +// Try taking a single vector store from an truncate (which would otherwise turn +// into an expensive buildvector) and splitting it into a series of narrowing +// stores. +static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St, + SelectionDAG &DAG) { + if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed()) + return SDValue(); + SDValue Trunc = St->getValue(); + if (Trunc->getOpcode() != ISD::TRUNCATE) + return SDValue(); + EVT FromVT = Trunc->getOperand(0).getValueType(); + EVT ToVT = Trunc.getValueType(); + if (!ToVT.isVector()) + return SDValue(); + assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements()); + EVT ToEltVT = ToVT.getVectorElementType(); + EVT FromEltVT = FromVT.getVectorElementType(); + + unsigned NumElements = 0; + if (FromEltVT == MVT::i32 && (ToEltVT == MVT::i16 || ToEltVT == MVT::i8)) + NumElements = 4; + if (FromEltVT == MVT::i16 && ToEltVT == MVT::i8) + NumElements = 8; + if (NumElements == 0 || FromVT.getVectorNumElements() == NumElements || + FromVT.getVectorNumElements() % NumElements != 0) + return SDValue(); + + SDLoc DL(St); + // Details about the old store + SDValue Ch = St->getChain(); + SDValue BasePtr = St->getBasePtr(); + unsigned Alignment = St->getOriginalAlignment(); + MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags(); + AAMDNodes AAInfo = St->getAAInfo(); + + EVT NewFromVT = EVT::getVectorVT(*DAG.getContext(), FromEltVT, NumElements); + EVT NewToVT = EVT::getVectorVT(*DAG.getContext(), ToEltVT, NumElements); + + SmallVector Stores; + for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) { + unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8; + SDValue NewPtr = DAG.getObjectPtrOffset(DL, BasePtr, NewOffset); + + SDValue Extract = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0), + DAG.getConstant(i * NumElements, DL, MVT::i32)); + SDValue Store = DAG.getTruncStore( + Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset), + NewToVT, Alignment, MMOFlags, AAInfo); + Stores.push_back(Store); + } + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores); +} + /// PerformSTORECombine - Target-specific dag combine xforms for /// ISD::STORE. static SDValue PerformSTORECombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { StoreSDNode *St = cast(N); if (St->isVolatile()) return SDValue(); - - // Optimize trunc store (of multiple scalars) to shuffle and store. First, - // pack all of the elements in one place. Next, store to memory in fewer - // chunks. SDValue StVal = St->getValue(); EVT VT = StVal.getValueType(); - if (St->isTruncatingStore() && VT.isVector()) { - SelectionDAG &DAG = DCI.DAG; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT StVT = St->getMemoryVT(); - unsigned NumElems = VT.getVectorNumElements(); - assert(StVT != VT && "Cannot truncate to the same type"); - unsigned FromEltSz = VT.getScalarSizeInBits(); - unsigned ToEltSz = StVT.getScalarSizeInBits(); - // From, To sizes and ElemCount must be pow of two - if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); + if (Subtarget->hasNEON()) + if (SDValue Store = PerformTruncatingStoreCombine(St, DCI.DAG)) + return Store; - // We are going to use the original vector elt for storing. - // Accumulated smaller vector elements must be a multiple of the store size. - if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); - - unsigned SizeRatio = FromEltSz / ToEltSz; - assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); - - // Create a type on which we perform the shuffle. - EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), - NumElems*SizeRatio); - assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - - SDLoc DL(St); - SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i < NumElems; ++i) - ShuffleVec[i] = DAG.getDataLayout().isBigEndian() - ? (i + 1) * SizeRatio - 1 - : i * SizeRatio; - - // Can't shuffle using an illegal type. - if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); - - SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, - DAG.getUNDEF(WideVec.getValueType()), - ShuffleVec); - // At this point all of the data is stored at the bottom of the - // register. We now need to save it to mem. - - // Find the largest store unit - MVT StoreType = MVT::i8; - for (MVT Tp : MVT::integer_valuetypes()) { - if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) - StoreType = Tp; - } - // Didn't find a legal store type. - if (!TLI.isTypeLegal(StoreType)) - return SDValue(); - - // Bitcast the original vector into a vector of store-size units - EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), - StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); - assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); - SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); - SmallVector Chains; - SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL, - TLI.getPointerTy(DAG.getDataLayout())); - SDValue BasePtr = St->getBasePtr(); - - // Perform one or more big stores into memory. - unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); - for (unsigned I = 0; I < E; I++) { - SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - StoreType, ShuffWide, - DAG.getIntPtrConstant(I, DL)); - SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, - St->getPointerInfo(), St->getAlignment(), - St->getMemOperand()->getFlags()); - BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, - Increment); - Chains.push_back(Ch); - } - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); - } + if (Subtarget->hasMVEIntegerOps()) + if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG)) + return NewToken; if (!ISD::isNormalStore(St)) return SDValue(); @@ -12522,7 +13405,7 @@ static SDValue PerformSTORECombine(SDNode *N, } // If this is a legal vector store, try to combine it into a VST1_UPD. - if (ISD::isNormalStore(N) && VT.isVector() && + if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() && DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT)) return CombineBaseUpdate(N, DCI); @@ -12890,6 +13773,71 @@ static SDValue PerformShiftCombine(SDNode *N, return SDValue(); } +// Look for a sign/zero extend of a larger than legal load. This can be split +// into two extending loads, which are simpler to deal with than an arbitrary +// sign extend. +static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + if (N0.getOpcode() != ISD::LOAD) + return SDValue(); + LoadSDNode *LD = cast(N0.getNode()); + if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() || + LD->getExtensionType() != ISD::NON_EXTLOAD) + return SDValue(); + EVT FromVT = LD->getValueType(0); + EVT ToVT = N->getValueType(0); + if (!ToVT.isVector()) + return SDValue(); + assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements()); + EVT ToEltVT = ToVT.getVectorElementType(); + EVT FromEltVT = FromVT.getVectorElementType(); + + unsigned NumElements = 0; + if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8)) + NumElements = 4; + if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8) + NumElements = 8; + if (NumElements == 0 || + FromVT.getVectorNumElements() == NumElements || + FromVT.getVectorNumElements() % NumElements != 0 || + !isPowerOf2_32(NumElements)) + return SDValue(); + + SDLoc DL(LD); + // Details about the old load + SDValue Ch = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + unsigned Alignment = LD->getOriginalAlignment(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + ISD::LoadExtType NewExtType = + N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + SDValue Offset = DAG.getUNDEF(BasePtr.getValueType()); + EVT NewFromVT = FromVT.getHalfNumVectorElementsVT(*DAG.getContext()); + EVT NewToVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext()); + unsigned NewOffset = NewFromVT.getSizeInBits() / 8; + SDValue NewPtr = DAG.getObjectPtrOffset(DL, BasePtr, NewOffset); + + // Split the load in half, each side of which is extended separately. This + // is good enough, as legalisation will take it from there. They are either + // already legal or they will be split further into something that is + // legal. + SDValue NewLoad1 = + DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, BasePtr, Offset, + LD->getPointerInfo(), NewFromVT, Alignment, MMOFlags, AAInfo); + SDValue NewLoad2 = + DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset, + LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT, + Alignment, MMOFlags, AAInfo); + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + SDValue(NewLoad1.getNode(), 1), + SDValue(NewLoad2.getNode(), 1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, NewLoad1, NewLoad2); +} + /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, @@ -12927,6 +13875,10 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, } } + if (ST->hasMVEIntegerOps()) + if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG)) + return NewLoad; + return SDValue(); } @@ -13028,43 +13980,169 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D return V; } +// Given N, the value controlling the conditional branch, search for the loop +// intrinsic, returning it, along with how the value is used. We need to handle +// patterns such as the following: +// (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit) +// (brcond (setcc (loop.decrement), 0, eq), exit) +// (brcond (setcc (loop.decrement), 0, ne), header) +static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm, + bool &Negate) { + switch (N->getOpcode()) { + default: + break; + case ISD::XOR: { + if (!isa(N.getOperand(1))) + return SDValue(); + if (!cast(N.getOperand(1))->isOne()) + return SDValue(); + Negate = !Negate; + return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate); + } + case ISD::SETCC: { + auto *Const = dyn_cast(N.getOperand(1)); + if (!Const) + return SDValue(); + if (Const->isNullValue()) + Imm = 0; + else if (Const->isOne()) + Imm = 1; + else + return SDValue(); + CC = cast(N.getOperand(2))->get(); + return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate); + } + case ISD::INTRINSIC_W_CHAIN: { + unsigned IntOp = cast(N.getOperand(1))->getZExtValue(); + if (IntOp != Intrinsic::test_set_loop_iterations && + IntOp != Intrinsic::loop_decrement_reg) + return SDValue(); + return N; + } + } + return SDValue(); +} + static SDValue PerformHWLoopCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST) { - // Look for (brcond (xor test.set.loop.iterations, -1) - SDValue CC = N->getOperand(1); - unsigned Opc = CC->getOpcode(); - SDValue Int; - if ((Opc == ISD::XOR || Opc == ISD::SETCC) && - (CC->getOperand(0)->getOpcode() == ISD::INTRINSIC_W_CHAIN)) { + // The hwloop intrinsics that we're interested are used for control-flow, + // either for entering or exiting the loop: + // - test.set.loop.iterations will test whether its operand is zero. If it + // is zero, the proceeding branch should not enter the loop. + // - loop.decrement.reg also tests whether its operand is zero. If it is + // zero, the proceeding branch should not branch back to the beginning of + // the loop. + // So here, we need to check that how the brcond is using the result of each + // of the intrinsics to ensure that we're branching to the right place at the + // right time. - assert((isa(CC->getOperand(1)) && - cast(CC->getOperand(1))->isOne()) && - "Expected to compare against 1"); + ISD::CondCode CC; + SDValue Cond; + int Imm = 1; + bool Negate = false; + SDValue Chain = N->getOperand(0); + SDValue Dest; - Int = CC->getOperand(0); - } else if (CC->getOpcode() == ISD::INTRINSIC_W_CHAIN) - Int = CC; - else + if (N->getOpcode() == ISD::BRCOND) { + CC = ISD::SETEQ; + Cond = N->getOperand(1); + Dest = N->getOperand(2); + } else { + assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!"); + CC = cast(N->getOperand(1))->get(); + Cond = N->getOperand(2); + Dest = N->getOperand(4); + if (auto *Const = dyn_cast(N->getOperand(3))) { + if (!Const->isOne() && !Const->isNullValue()) + return SDValue(); + Imm = Const->getZExtValue(); + } else + return SDValue(); + } + + SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate); + if (!Int) return SDValue(); - unsigned IntOp = cast(Int.getOperand(1))->getZExtValue(); - if (IntOp != Intrinsic::test_set_loop_iterations) - return SDValue(); + if (Negate) + CC = ISD::getSetCCInverse(CC, true); + + auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) { + return (CC == ISD::SETEQ && Imm == 0) || + (CC == ISD::SETNE && Imm == 1) || + (CC == ISD::SETLT && Imm == 1) || + (CC == ISD::SETULT && Imm == 1); + }; + + auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) { + return (CC == ISD::SETEQ && Imm == 1) || + (CC == ISD::SETNE && Imm == 0) || + (CC == ISD::SETGT && Imm == 0) || + (CC == ISD::SETUGT && Imm == 0) || + (CC == ISD::SETGE && Imm == 1) || + (CC == ISD::SETUGE && Imm == 1); + }; + + assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) && + "unsupported condition"); SDLoc dl(Int); - SDValue Chain = N->getOperand(0); + SelectionDAG &DAG = DCI.DAG; SDValue Elements = Int.getOperand(2); - SDValue ExitBlock = N->getOperand(2); + unsigned IntOp = cast(Int->getOperand(1))->getZExtValue(); + assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR) + && "expected single br user"); + SDNode *Br = *N->use_begin(); + SDValue OtherTarget = Br->getOperand(1); - // TODO: Once we start supporting tail predication, we can add another - // operand to WLS for the number of elements processed in a vector loop. + // Update the unconditional branch to branch to the given Dest. + auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) { + SDValue NewBrOps[] = { Br->getOperand(0), Dest }; + SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps); + DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr); + }; - SDValue Ops[] = { Chain, Elements, ExitBlock }; - SDValue Res = DCI.DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops); - DCI.DAG.ReplaceAllUsesOfValueWith(Int.getValue(1), Int.getOperand(0)); - return Res; + if (IntOp == Intrinsic::test_set_loop_iterations) { + SDValue Res; + // We expect this 'instruction' to branch when the counter is zero. + if (IsTrueIfZero(CC, Imm)) { + SDValue Ops[] = { Chain, Elements, Dest }; + Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops); + } else { + // The logic is the reverse of what we need for WLS, so find the other + // basic block target: the target of the proceeding br. + UpdateUncondBr(Br, Dest, DAG); + + SDValue Ops[] = { Chain, Elements, OtherTarget }; + Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops); + } + DAG.ReplaceAllUsesOfValueWith(Int.getValue(1), Int.getOperand(0)); + return Res; + } else { + SDValue Size = DAG.getTargetConstant( + cast(Int.getOperand(3))->getZExtValue(), dl, MVT::i32); + SDValue Args[] = { Int.getOperand(0), Elements, Size, }; + SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl, + DAG.getVTList(MVT::i32, MVT::Other), Args); + DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode()); + + // We expect this instruction to branch when the count is not zero. + SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget; + + // Update the unconditional branch to target the loop preheader if we've + // found the condition has been reversed. + if (Target == OtherTarget) + UpdateUncondBr(Br, Dest, DAG); + + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + SDValue(LoopDec.getNode(), 1), Chain); + + SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target }; + return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs); + } + return SDValue(); } /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND. @@ -13298,14 +14376,15 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); - case ISD::BRCOND: return PerformHWLoopCombine(N, DCI, Subtarget); + case ISD::BRCOND: + case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget); case ARMISD::ADDC: case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget); case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); - case ISD::STORE: return PerformSTORECombine(N, DCI); + case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget); case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget); case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); @@ -13334,6 +14413,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return PerformVLDCombine(N, DCI); case ARMISD::BUILD_VECTOR: return PerformARMBUILD_VECTORCombine(N, DCI); + case ARMISD::PREDICATE_CAST: + return PerformPREDICATE_CASTCombine(N, DCI); case ARMISD::SMULWB: { unsigned BitWidth = N->getValueType(0).getSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); @@ -13348,7 +14429,9 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); break; } - case ARMISD::SMLALBB: { + case ARMISD::SMLALBB: + case ARMISD::QADD16b: + case ARMISD::QSUB16b: { unsigned BitWidth = N->getValueType(0).getSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) || @@ -13384,6 +14467,15 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); break; } + case ARMISD::QADD8b: + case ARMISD::QSUB8b: { + unsigned BitWidth = N->getValueType(0).getSizeInBits(); + APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8); + if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) || + (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))) + return SDValue(); + break; + } case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast(N->getOperand(1))->getZExtValue()) { @@ -13457,47 +14549,38 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, if (!Subtarget->hasMVEIntegerOps()) return false; - if (Ty != MVT::v16i8 && Ty != MVT::v8i16 && Ty != MVT::v8f16 && - Ty != MVT::v4i32 && Ty != MVT::v4f32 && Ty != MVT::v2i64 && - Ty != MVT::v2f64 && - // These are for truncated stores - Ty != MVT::v4i8 && Ty != MVT::v8i8 && Ty != MVT::v4i16) - return false; - if (Subtarget->isLittle()) { - // In little-endian MVE, the store instructions VSTRB.U8, - // VSTRH.U16 and VSTRW.U32 all store the vector register in - // exactly the same format, and differ only in the range of - // their immediate offset field and the required alignment. - // - // In particular, VSTRB.U8 can store a vector at byte alignment. - // So at this stage we can simply say that loads/stores of all - // 128-bit wide vector types are permitted at any alignment, - // because we know at least _one_ instruction can manage that. - // - // Later on we might find that some of those loads are better - // generated as VLDRW.U32 if alignment permits, to take - // advantage of the larger immediate range. But for the moment, - // all that matters is that if we don't lower the load then - // _some_ instruction can handle it. + // These are for predicates + if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1)) { + if (Fast) + *Fast = true; + return true; + } + + // These are for truncated stores/narrowing loads. They are fine so long as + // the alignment is at least the size of the item being loaded + if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) && + Alignment >= VT.getScalarSizeInBits() / 8) { + if (Fast) + *Fast = true; + return true; + } + + // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and + // VSTRW.U32 all store the vector register in exactly the same format, and + // differ only in the range of their immediate offset field and the required + // alignment. So there is always a store that can be used, regardless of + // actual type. + // + // For big endian, that is not the case. But can still emit a (VSTRB.U8; + // VREV64.8) pair and get the same effect. This will likely be better than + // aligning the vector through the stack. + if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 || + Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 || + Ty == MVT::v2f64) { if (Fast) *Fast = true; return true; - } else { - // In big-endian MVE, those instructions aren't so similar - // after all, because they reorder the bytes of the vector - // differently. So this time we can only store a particular - // kind of vector if its alignment is at least the element - // type. And we can't store vectors of i64 or f64 at all - // without having to do some postprocessing, because there's - // no VSTRD.U64. - if (Ty == MVT::v16i8 || - ((Ty == MVT::v8i16 || Ty == MVT::v8f16) && Alignment >= 2) || - ((Ty == MVT::v4i32 || Ty == MVT::v4f32) && Alignment >= 4)) { - if (Fast) - *Fast = true; - return true; - } } return false; @@ -13617,22 +14700,60 @@ static bool areExtractExts(Value *Ext1, Value *Ext2) { /// sext/zext can be folded into vsubl. bool ARMTargetLowering::shouldSinkOperands(Instruction *I, SmallVectorImpl &Ops) const { - if (!Subtarget->hasNEON() || !I->getType()->isVectorTy()) + if (!I->getType()->isVectorTy()) return false; - switch (I->getOpcode()) { - case Instruction::Sub: - case Instruction::Add: { - if (!areExtractExts(I->getOperand(0), I->getOperand(1))) + if (Subtarget->hasNEON()) { + switch (I->getOpcode()) { + case Instruction::Sub: + case Instruction::Add: { + if (!areExtractExts(I->getOperand(0), I->getOperand(1))) + return false; + Ops.push_back(&I->getOperandUse(0)); + Ops.push_back(&I->getOperandUse(1)); + return true; + } + default: return false; - Ops.push_back(&I->getOperandUse(0)); - Ops.push_back(&I->getOperandUse(1)); - return true; + } } - default: + + if (!Subtarget->hasMVEIntegerOps()) + return false; + + auto IsSinker = [](Instruction *I, int Operand) { + switch (I->getOpcode()) { + case Instruction::Add: + case Instruction::Mul: + return true; + case Instruction::Sub: + return Operand == 1; + default: + return false; + } + }; + + int Op = 0; + if (!isa(I->getOperand(Op))) + Op = 1; + if (!IsSinker(I, Op)) + return false; + if (!match(I->getOperand(Op), + m_ShuffleVector(m_InsertElement(m_Undef(), m_Value(), m_ZeroInt()), + m_Undef(), m_Zero()))) { return false; } - return false; + Instruction *Shuffle = cast(I->getOperand(Op)); + // All uses of the shuffle should be sunk to avoid duplicating it across gpr + // and vector registers + for (Use &U : Shuffle->uses()) { + Instruction *Insn = cast(U.getUser()); + if (!IsSinker(Insn, U.getOperandNo())) + return false; + } + Ops.push_back(&Shuffle->getOperandUse(0)); + Ops.push_back(&I->getOperandUse(Op)); + return true; } bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { @@ -13641,6 +14762,11 @@ bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { if (!isTypeLegal(VT)) return false; + if (auto *Ld = dyn_cast(ExtVal.getOperand(0))) { + if (Ld->isExpandingLoad()) + return false; + } + // Don't create a loadext if we can fold the extension into a wide/long // instruction. // If there's more than one user instruction, the loadext is desirable no @@ -14028,6 +15154,52 @@ static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, return false; } +static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align, + bool isSEXTLoad, bool isLE, SDValue &Base, + SDValue &Offset, bool &isInc, + SelectionDAG &DAG) { + if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) + return false; + if (!isa(Ptr->getOperand(1))) + return false; + + ConstantSDNode *RHS = cast(Ptr->getOperand(1)); + int RHSC = (int)RHS->getZExtValue(); + + auto IsInRange = [&](int RHSC, int Limit, int Scale) { + if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) { + assert(Ptr->getOpcode() == ISD::ADD); + isInc = false; + Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0)); + return true; + } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) { + isInc = Ptr->getOpcode() == ISD::ADD; + Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0)); + return true; + } + return false; + }; + + // Try to find a matching instruction based on s/zext, Alignment, Offset and + // (in BE) type. + Base = Ptr->getOperand(0); + if (VT == MVT::v4i16) { + if (Align >= 2 && IsInRange(RHSC, 0x80, 2)) + return true; + } else if (VT == MVT::v4i8 || VT == MVT::v8i8) { + if (IsInRange(RHSC, 0x80, 1)) + return true; + } else if (Align >= 4 && (isLE || VT == MVT::v4i32 || VT == MVT::v4f32) && + IsInRange(RHSC, 0x80, 4)) + return true; + else if (Align >= 2 && (isLE || VT == MVT::v8i16 || VT == MVT::v8f16) && + IsInRange(RHSC, 0x80, 2)) + return true; + else if ((isLE || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1)) + return true; + return false; +} + /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. @@ -14041,25 +15213,35 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, EVT VT; SDValue Ptr; + unsigned Align; bool isSEXTLoad = false; if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); - VT = LD->getMemoryVT(); + VT = LD->getMemoryVT(); + Align = LD->getAlignment(); isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; } else if (StoreSDNode *ST = dyn_cast(N)) { Ptr = ST->getBasePtr(); - VT = ST->getMemoryVT(); + VT = ST->getMemoryVT(); + Align = ST->getAlignment(); } else return false; bool isInc; bool isLegal = false; - if (Subtarget->isThumb2()) - isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, - Offset, isInc, DAG); - else - isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, - Offset, isInc, DAG); + if (VT.isVector()) + isLegal = Subtarget->hasMVEIntegerOps() && + getMVEIndexedAddressParts(Ptr.getNode(), VT, Align, isSEXTLoad, + Subtarget->isLittle(), Base, Offset, + isInc, DAG); + else { + if (Subtarget->isThumb2()) + isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, + Offset, isInc, DAG); + else + isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, + Offset, isInc, DAG); + } if (!isLegal) return false; @@ -14077,15 +15259,18 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, SelectionDAG &DAG) const { EVT VT; SDValue Ptr; + unsigned Align; bool isSEXTLoad = false, isNonExt; if (LoadSDNode *LD = dyn_cast(N)) { - VT = LD->getMemoryVT(); + VT = LD->getMemoryVT(); Ptr = LD->getBasePtr(); + Align = LD->getAlignment(); isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD; } else if (StoreSDNode *ST = dyn_cast(N)) { - VT = ST->getMemoryVT(); + VT = ST->getMemoryVT(); Ptr = ST->getBasePtr(); + Align = ST->getAlignment(); isNonExt = !ST->isTruncatingStore(); } else return false; @@ -14108,12 +15293,19 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, bool isInc; bool isLegal = false; - if (Subtarget->isThumb2()) - isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, - isInc, DAG); - else - isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + if (VT.isVector()) + isLegal = Subtarget->hasMVEIntegerOps() && + getMVEIndexedAddressParts(Op, VT, Align, isSEXTLoad, + Subtarget->isLittle(), Base, Offset, isInc, DAG); + else { + if (Subtarget->isThumb2()) + isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + isInc, DAG); + else + isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + isInc, DAG); + } if (!isLegal) return false; @@ -14369,7 +15561,8 @@ const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const { /// constraint it is for this target. ARMTargetLowering::ConstraintType ARMTargetLowering::getConstraintType(StringRef Constraint) const { - if (Constraint.size() == 1) { + unsigned S = Constraint.size(); + if (S == 1) { switch (Constraint[0]) { default: break; case 'l': return C_RegisterClass; @@ -14377,12 +15570,12 @@ ARMTargetLowering::getConstraintType(StringRef Constraint) const { case 'h': return C_RegisterClass; case 'x': return C_RegisterClass; case 't': return C_RegisterClass; - case 'j': return C_Other; // Constant for movw. - // An address with a single base register. Due to the way we - // currently handle addresses it is the same as an 'r' memory constraint. + case 'j': return C_Immediate; // Constant for movw. + // An address with a single base register. Due to the way we + // currently handle addresses it is the same as an 'r' memory constraint. case 'Q': return C_Memory; } - } else if (Constraint.size() == 2) { + } else if (S == 2) { switch (Constraint[0]) { default: break; case 'T': return C_RegisterClass; @@ -14535,7 +15728,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'j': // Constant suitable for movw, must be between 0 and // 65535. - if (Subtarget->hasV6T2Ops()) + if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps())) if (CVal >= 0 && CVal <= 65535) break; return; @@ -14643,7 +15836,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; case 'N': - if (Subtarget->isThumb()) { // FIXME thumb2 + if (Subtarget->isThumb1Only()) { // This must be a constant between 0 and 31, for shift amounts. if (CVal >= 0 && CVal <= 31) break; @@ -14651,7 +15844,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; case 'O': - if (Subtarget->isThumb()) { // FIXME thumb2 + if (Subtarget->isThumb1Only()) { // This must be a multiple of 4 between -508 and 508, for // ADD/SUB sp = sp + immediate. if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) @@ -14874,6 +16067,7 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { // without FP16. So we must do a function call. SDLoc Loc(Op); RTLIB::Libcall LC; + MakeLibCallOptions CallOptions; if (SrcSz == 16) { // Instruction from 16 -> 32 if (Subtarget->hasFP16()) @@ -14884,7 +16078,7 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected type for custom-lowering FP_EXTEND"); SrcVal = - makeLibCall(DAG, LC, MVT::f32, SrcVal, /*isSigned*/ false, Loc).first; + makeLibCall(DAG, LC, MVT::f32, SrcVal, CallOptions, Loc).first; } } @@ -14897,7 +16091,7 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { LC = RTLIB::getFPEXT(MVT::f32, MVT::f64); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected type for custom-lowering FP_EXTEND"); - return makeLibCall(DAG, LC, MVT::f64, SrcVal, /*isSigned*/ false, Loc).first; + return makeLibCall(DAG, LC, MVT::f64, SrcVal, CallOptions, Loc).first; } SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { @@ -14923,7 +16117,8 @@ SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected type for custom-lowering FP_ROUND"); - return makeLibCall(DAG, LC, DstVT, SrcVal, /*isSigned*/ false, Loc).first; + MakeLibCallOptions CallOptions; + return makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions, Loc).first; } void ARMTargetLowering::lowerABS(SDNode *N, SmallVectorImpl &Results, @@ -15015,7 +16210,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); - Info.align = cast(AlignArg)->getZExtValue(); + Info.align = MaybeAlign(cast(AlignArg)->getZExtValue()); // volatile loads with NEON intrinsics not supported Info.flags = MachineMemOperand::MOLoad; return true; @@ -15030,7 +16225,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); Info.offset = 0; - Info.align = 0; + Info.align.reset(); // volatile loads with NEON intrinsics not supported Info.flags = MachineMemOperand::MOLoad; return true; @@ -15056,7 +16251,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); - Info.align = cast(AlignArg)->getZExtValue(); + Info.align = MaybeAlign(cast(AlignArg)->getZExtValue()); // volatile stores with NEON intrinsics not supported Info.flags = MachineMemOperand::MOStore; return true; @@ -15077,7 +16272,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = 0; + Info.align.reset(); // volatile stores with NEON intrinsics not supported Info.flags = MachineMemOperand::MOStore; return true; @@ -15090,7 +16285,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); + Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType())); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; } @@ -15102,7 +16297,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(1); Info.offset = 0; - Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); + Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType())); Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; } @@ -15112,7 +16307,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(2); Info.offset = 0; - Info.align = 8; + Info.align = Align(8); Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; @@ -15122,7 +16317,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = 8; + Info.align = Align(8); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; @@ -15473,6 +16668,12 @@ bool ARMTargetLowering::isLegalInterleavedAccessType( return VecSize == 64 || VecSize % 128 == 0; } +unsigned ARMTargetLowering::getMaxSupportedInterleaveFactor() const { + if (Subtarget->hasNEON()) + return 4; + return TargetLoweringBase::getMaxSupportedInterleaveFactor(); +} + /// Lower an interleaved load into a vldN intrinsic. /// /// E.g. Lower an interleaved load (Factor = 2): @@ -15792,15 +16993,15 @@ static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, } /// Return the correct alignment for the current calling convention. -unsigned -ARMTargetLowering::getABIAlignmentForCallingConv(Type *ArgTy, - DataLayout DL) const { +Align ARMTargetLowering::getABIAlignmentForCallingConv(Type *ArgTy, + DataLayout DL) const { + const Align ABITypeAlign(DL.getABITypeAlignment(ArgTy)); if (!ArgTy->isVectorTy()) - return DL.getABITypeAlignment(ArgTy); + return ABITypeAlign; // Avoid over-aligning vector parameters. It would require realigning the // stack and waste space for no real benefit. - return std::min(DL.getABITypeAlignment(ArgTy), DL.getStackAlignment()); + return std::min(ABITypeAlign, DL.getStackAlignment()); } /// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of @@ -15861,7 +17062,7 @@ void ARMTargetLowering::insertCopiesSplitCSR( else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); - unsigned NewVR = MRI->createVirtualRegister(RC); + Register NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. // FIXME: this currently does not emit CFI pseudo-instructions, it works // fine for CXX_FAST_TLS since the C++-style TLS access functions should be diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 1675ec59a35..53813fad5af 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -103,6 +103,7 @@ class VectorType; ADDE, // Add using carry SUBC, // Sub with carry SUBE, // Sub using carry + LSLS, // Shift left producing carry VMOVRRD, // double to two gprs. VMOVDRR, // Two gprs to double. @@ -126,17 +127,13 @@ class VectorType; WIN__DBZCHK, // Windows' divide by zero check WLS, // Low-overhead loops, While Loop Start + LOOP_DEC, // Really a part of LE, performs the sub + LE, // Low-overhead loops, Loop End - VCEQ, // Vector compare equal. - VCEQZ, // Vector compare equal to zero. - VCGE, // Vector compare greater than or equal. - VCGEZ, // Vector compare greater than or equal to zero. - VCLEZ, // Vector compare less than or equal to zero. - VCGEU, // Vector compare unsigned greater than or equal. - VCGT, // Vector compare greater than. - VCGTZ, // Vector compare greater than zero. - VCLTZ, // Vector compare less than zero. - VCGTU, // Vector compare unsigned greater than. + PREDICATE_CAST, // Predicate cast for MVE i1 types + + VCMP, // Vector compare. + VCMPZ, // Vector compare to zero. VTST, // Vector test bits. // Vector shift by vector @@ -200,6 +197,7 @@ class VectorType; VTRN, // transpose VTBL1, // 1-register shuffle with mask VTBL2, // 2-register shuffle with mask + VMOVN, // MVE vmovn // Vector multiply long: VMULLs, // ...signed @@ -221,6 +219,12 @@ class VectorType; SMMLAR, // Signed multiply long, round and add SMMLSR, // Signed multiply long, subtract and round + // Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b stands for. + QADD8b, + QSUB8b, + QADD16b, + QSUB16b, + // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other // operations, but for ARM some BUILD_VECTORs are legal as-is and their @@ -243,6 +247,11 @@ class VectorType; // instructions. MEMCPY, + // V8.1MMainline condition select + CSINV, // Conditional select invert. + CSNEG, // Conditional select negate. + CSINC, // Conditional select increment. + // Vector load N-element structure to all lanes: VLD1DUP = ISD::FIRST_TARGET_MEMORY_OPCODE, VLD2DUP, @@ -539,7 +548,7 @@ class VectorType; Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const override; - unsigned getMaxSupportedInterleaveFactor() const override { return 4; } + unsigned getMaxSupportedInterleaveFactor() const override; bool lowerInterleavedLoad(LoadInst *LI, ArrayRef Shuffles, @@ -608,8 +617,8 @@ class VectorType; void finalizeLowering(MachineFunction &MF) const override; /// Return the correct alignment for the current calling convention. - unsigned getABIAlignmentForCallingConv(Type *ArgTy, - DataLayout DL) const override; + Align getABIAlignmentForCallingConv(Type *ArgTy, + DataLayout DL) const override; bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override; @@ -670,6 +679,8 @@ class VectorType; SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; @@ -721,8 +732,8 @@ class VectorType; void lowerABS(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const; - unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const override; SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl &Created) const override; @@ -814,7 +825,7 @@ class VectorType; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const; SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, - const SDLoc &dl, bool InvalidOnQNaN) const; + const SDLoc &dl) const; SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const; SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const; @@ -838,7 +849,7 @@ class VectorType; void setAllExpand(MVT VT); }; - enum NEONModImmType { + enum VMOVModImmType { VMOVModImm, VMVNModImm, MVEVMVNModImm, diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index bc93a058720..1da32ad2af6 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -188,6 +188,13 @@ def s_cc_out : OptionalDefOperand { let DecoderMethod = "DecodeCCOutOperand"; } +// Transform to generate the inverse of a condition code during ISel +def inv_cond_XFORM : SDNodeXForm(N->getZExtValue()); + return CurDAG->getTargetConstant(ARMCC::getOppositeCondition(CC), SDLoc(N), + MVT::i32); +}]>; + // VPT predicate def VPTPredNOperand : AsmOperandClass { @@ -401,6 +408,8 @@ class InstTemplate(f), "Pseudo"); @@ -412,6 +421,7 @@ class InstTemplate let isCodeGenOnly = 0; // So we get asm matcher for it. let AsmString = asm; let isPseudo = 1; + let hasNoSchedulingInfo = 1; } class ARMAsmPseudo @@ -2282,7 +2293,7 @@ class N1ModImm op21_19, bits<4> op11_8, bit op7, bit op6, let Inst{24} = SIMM{7}; let Inst{18-16} = SIMM{6-4}; let Inst{3-0} = SIMM{3-0}; - let DecoderMethod = "DecodeNEONModImmInstruction"; + let DecoderMethod = "DecodeVMOVModImmInstruction"; } // NEON 2 vector register format. @@ -2724,6 +2735,16 @@ def complexrotateopodd : Operand { let PrintMethod = "printComplexRotationOp<180, 90>"; } +def MveSaturateOperand : AsmOperandClass { + let PredicateMethod = "isMveSaturateOp"; + let DiagnosticString = "saturate operand must be 48 or 64"; + let Name = "MveSaturate"; +} +def saturateop : Operand { + let ParserMatchClass = MveSaturateOperand; + let PrintMethod = "printMveSaturateOp"; +} + // Data type suffix token aliases. Implements Table A7-3 in the ARM ARM. def : TokenAlias<".s8", ".i8">; def : TokenAlias<".u8", ".i8">; diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 388c889349b..a802d5a06f0 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -117,7 +117,7 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const { MachineBasicBlock &MBB = *MI->getParent(); DebugLoc DL = MI->getDebugLoc(); - unsigned Reg = MI->getOperand(0).getReg(); + Register Reg = MI->getOperand(0).getReg(); MachineInstrBuilder MIB; MIB = BuildMI(MBB, MI, DL, get(ARM::MOV_ga_pcrel_ldr), Reg) diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index e3514546385..fe696222ec7 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -51,8 +51,6 @@ def SDT_ARMAnd : SDTypeProfile<1, 2, SDTCisVT<2, i32>]>; def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; -def SDT_ARMFCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, - SDTCisVT<2, i32>]>; def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisPtrTy<1>, SDTCisVT<2, i32>]>; @@ -108,14 +106,24 @@ def SDT_ARMIntShiftParts : SDTypeProfile<2, 3, [SDTCisSameAs<0, 1>, // TODO Add another operand for 'Size' so that we can re-use this node when we // start supporting *TP versions. -def SDT_ARMWhileLoop : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, - SDTCisVT<1, OtherVT>]>; +def SDT_ARMLoLoop : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, + SDTCisVT<1, OtherVT>]>; def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>; def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>; def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>; def ARMSmlsldx : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>; +def SDT_ARMCSel : SDTypeProfile<1, 3, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisInt<3>, + SDTCisVT<3, i32>]>; + +def ARMcsinv : SDNode<"ARMISD::CSINV", SDT_ARMCSel, [SDNPOptInGlue]>; +def ARMcsneg : SDNode<"ARMISD::CSNEG", SDT_ARMCSel, [SDNPOptInGlue]>; +def ARMcsinc : SDNode<"ARMISD::CSINC", SDT_ARMCSel, [SDNPOptInGlue]>; + def SDT_MulHSR : SDTypeProfile<1, 3, [SDTCisVT<0,i32>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, @@ -194,6 +202,7 @@ def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>; def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags, [SDNPCommutative]>; def ARMsubc : SDNode<"ARMISD::SUBC", SDTBinaryArithWithFlags>; +def ARMlsls : SDNode<"ARMISD::LSLS", SDTBinaryArithWithFlags>; def ARMadde : SDNode<"ARMISD::ADDE", SDTBinaryArithWithFlagsInOut>; def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>; @@ -229,6 +238,11 @@ def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>; def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>; def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>; +def ARMqadd8b : SDNode<"ARMISD::QADD8b", SDT_ARMAnd, []>; +def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>; +def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>; +def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>; + // Vector operations shared between NEON and MVE def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; @@ -265,8 +279,16 @@ def ARMvshruImm : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>; def ARMvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>; def ARMvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>; -def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMWhileLoop, - [SDNPHasChain]>; +def SDTARMVCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, + SDTCisInt<3>]>; +def SDTARMVCMPZ : SDTypeProfile<1, 2, [SDTCisInt<2>]>; + +def ARMvcmp : SDNode<"ARMISD::VCMP", SDTARMVCMP>; +def ARMvcmpz : SDNode<"ARMISD::VCMPZ", SDTARMVCMPZ>; + +def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMLoLoop, [SDNPHasChain]>; +def ARMLE : SDNode<"ARMISD::LE", SDT_ARMLoLoop, [SDNPHasChain]>; +def ARMLoopDec : SDNode<"ARMISD::LOOP_DEC", SDTIntBinOp, [SDNPHasChain]>; //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -1948,7 +1970,7 @@ multiclass AI_str1nopc; @@ -2361,6 +2383,12 @@ let isCall = 1, def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins arm_bl_target:$func), 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, Requires<[IsARM]>, Sched<[WriteBr]>; + + // push lr before the call + def BL_PUSHLR : ARMPseudoInst<(outs), (ins GPRlr:$ra, arm_bl_target:$func), + 4, IIC_Br, + []>, + Requires<[IsARM]>, Sched<[WriteBr]>; } let isBranch = 1, isTerminator = 1 in { @@ -3727,6 +3755,23 @@ let DecoderMethod = "DecodeQADDInstruction" in [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))]>; } +def : ARMV5TEPat<(saddsat GPR:$a, GPR:$b), + (QADD GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(ssubsat GPR:$a, GPR:$b), + (QSUB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(saddsat(saddsat rGPR:$Rm, rGPR:$Rm), rGPR:$Rn), + (QDADD rGPR:$Rm, rGPR:$Rn)>; +def : ARMV5TEPat<(ssubsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)), + (QDSUB rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn), + (QADD8 rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn), + (QSUB8 rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMqadd16b rGPR:$Rm, rGPR:$Rn), + (QADD16 rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMqsub16b rGPR:$Rm, rGPR:$Rn), + (QSUB16 rGPR:$Rm, rGPR:$Rn)>; + def UQADD16 : AAIIntrinsic<0b01100110, 0b11110001, "uqadd16", int_arm_uqadd16>; def UQADD8 : AAIIntrinsic<0b01100110, 0b11111001, "uqadd8", int_arm_uqadd8>; def UQSUB16 : AAIIntrinsic<0b01100110, 0b11110111, "uqsub16", int_arm_uqsub16>; @@ -4870,14 +4915,13 @@ def SB : AInoP<(outs), (ins), MiscFrm, NoItinerary, "sb", "", []>, let hasSideEffects = 1; } -let usesCustomInserter = 1, Defs = [CPSR] in { - -// Pseudo instruction that combines movs + predicated rsbmi -// to implement integer ABS +let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in { + // Pseudo instruction that combines movs + predicated rsbmi + // to implement integer ABS def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>; } -let usesCustomInserter = 1, Defs = [CPSR] in { +let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in { def COPY_STRUCT_BYVAL_I32 : PseudoInst< (outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment), NoItinerary, @@ -5085,8 +5129,8 @@ def SWPB: AIswp<1, (outs GPRnopc:$Rt), def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", - [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, - imm:$CRm, imm:$opc2)]>, + [(int_arm_cdp timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn, + timm:$CRm, timm:$opc2)]>, Requires<[IsARM,PreV8]> { bits<4> opc1; bits<4> CRn; @@ -5109,8 +5153,8 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", - [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, - imm:$CRm, imm:$opc2)]>, + [(int_arm_cdp2 timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn, + timm:$CRm, timm:$opc2)]>, Requires<[IsARM,PreV8]> { let Inst{31-28} = 0b1111; bits<4> opc1; @@ -5289,15 +5333,15 @@ multiclass LdSt2Cop pattern> { } } -defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; -defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; -defm STC : LdStCop <0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; -defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +defm STC : LdStCop <0, 0, "stc", [(int_arm_stc timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; } // DecoderNamespace = "CoProc" @@ -5333,8 +5377,8 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>, + [(int_arm_mcr timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn, + timm:$CRm, timm:$opc2)]>, ComplexDeprecationPredicate<"MCR">; def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, @@ -5347,8 +5391,8 @@ def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm", (MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0, pred:$p)>; -def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), - (MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; +def : ARMPat<(int_arm_mrc timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2), + (MRC p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>; class MovRCopro2 pattern> @@ -5379,8 +5423,8 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>, + [(int_arm_mcr2 timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn, + timm:$CRm, timm:$opc2)]>, Requires<[IsARM,PreV8]>; def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm", (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, @@ -5394,9 +5438,9 @@ def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm", (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0)>; -def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, - imm:$CRm, imm:$opc2), - (MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; +def : ARMV5TPat<(int_arm_mrc2 timm:$cop, timm:$opc1, timm:$CRn, + timm:$CRm, timm:$opc2), + (MRC2 p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>; class MovRRCopro pattern = []> @@ -5422,8 +5466,8 @@ class MovRRCopro def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), - [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt, - GPRnopc:$Rt2, imm:$CRm)]>; + [(int_arm_mcrr timm:$cop, timm:$opc1, GPRnopc:$Rt, + GPRnopc:$Rt2, timm:$CRm)]>; def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */, (outs GPRnopc:$Rt, GPRnopc:$Rt2), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>; @@ -5455,8 +5499,8 @@ class MovRRCopro2; + [(int_arm_mcrr2 timm:$cop, timm:$opc1, GPRnopc:$Rt, + GPRnopc:$Rt2, timm:$CRm)]>; def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */, (outs GPRnopc:$Rt, GPRnopc:$Rt2), @@ -5579,12 +5623,12 @@ def MSRbanked : ABI<0b0001, (outs), (ins banked_reg:$banked, GPRnopc:$Rn), def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone, [SDNPHasChain, SDNPSideEffect]>; -let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in +let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP], hasNoSchedulingInfo = 1 in def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>; def win__dbzchk : SDNode<"ARMISD::WIN__DBZCHK", SDT_WIN__DBZCHK, [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; -let usesCustomInserter = 1, Defs = [CPSR] in +let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in def WIN__DBZCHK : PseudoInst<(outs), (ins tGPR:$divisor), NoItinerary, [(win__dbzchk tGPR:$divisor)]>; @@ -6131,10 +6175,10 @@ def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm", def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>, ComplexDeprecationPredicate<"IT">; -let mayLoad = 1, mayStore =1, hasSideEffects = 1 in +let mayLoad = 1, mayStore =1, hasSideEffects = 1, hasNoSchedulingInfo = 1 in def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn), NoItinerary, - [(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>; + [(set GPR:$Rd, (int_arm_space timm:$size, GPR:$Rn))]>; //===---------------------------------- // Atomic cmpxchg for -O0 @@ -6174,4 +6218,5 @@ def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary, let hasSideEffects = 1; let Size = 0; let AsmString = "@ COMPILER BARRIER"; + let hasNoSchedulingInfo = 1; } diff --git a/lib/Target/ARM/ARMInstrMVE.td b/lib/Target/ARM/ARMInstrMVE.td index 3e7ae55c7fc..4f67cd6e47c 100644 --- a/lib/Target/ARM/ARMInstrMVE.td +++ b/lib/Target/ARM/ARMInstrMVE.td @@ -160,7 +160,8 @@ class TMemImm7ShiftOffsetAsmOperand : AsmOperandClass { let RenderMethod = "addMemImmOffsetOperands"; } -class taddrmode_imm7 : MemOperand { +class taddrmode_imm7 : MemOperand, + ComplexPattern", []> { let ParserMatchClass = TMemImm7ShiftOffsetAsmOperand; // They are printed the same way as the T2 imm8 version let PrintMethod = "printT2AddrModeImm8Operand"; @@ -221,7 +222,9 @@ def t2am_imm7shift0OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<0>; def t2am_imm7shift1OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<1>; def t2am_imm7shift2OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<2>; -class t2am_imm7_offset : MemOperand { +class t2am_imm7_offset : MemOperand, + ComplexPattern", + [], [SDNPWantRoot]> { // They are printed the same way as the imm8 version let PrintMethod = "printT2AddrModeImm8OffsetOperand"; let ParserMatchClass = @@ -371,6 +374,8 @@ class MVE_ScalarShiftSRegReg op5_4, list pattern=[]> let Inst{7-6} = 0b00; let Inst{5-4} = op5_4{1-0}; let Inst{3-0} = 0b1101; + + let Unpredictable{8-6} = 0b111; } def MVE_SQRSHR : MVE_ScalarShiftSRegReg<"sqrshr", 0b10>; @@ -403,18 +408,17 @@ class MVE_ScalarShiftDRegImm op5_4, bit op16, let Inst{3-0} = 0b1111; } -class MVE_ScalarShiftDRegReg pattern=[]> +class MVE_ScalarShiftDRegRegBase pattern=[]> : MVE_ScalarShiftDoubleReg< - iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm), - "$RdaLo, $RdaHi, $Rm", "@earlyclobber $RdaHi,@earlyclobber $RdaLo," - "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src", + iname, iops, asm, "@earlyclobber $RdaHi,@earlyclobber $RdaLo," + "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src", pattern> { bits<4> Rm; let Inst{16} = op16; let Inst{15-12} = Rm{3-0}; - let Inst{7-6} = 0b00; + let Inst{6} = 0b0; let Inst{5} = op5; let Inst{4} = 0b0; let Inst{3-0} = 0b1101; @@ -427,27 +431,44 @@ class MVE_ScalarShiftDRegReg pattern=[]> + : MVE_ScalarShiftDRegRegBase< + iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm), + "$RdaLo, $RdaHi, $Rm", op5, 0b0, pattern> { + + let Inst{7} = 0b0; +} + +class MVE_ScalarShiftDRegRegWithSat pattern=[]> + : MVE_ScalarShiftDRegRegBase< + iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm, saturateop:$sat), + "$RdaLo, $RdaHi, $sat, $Rm", op5, 0b1, pattern> { + bit sat; + + let Inst{7} = sat; +} + +def MVE_ASRLr : MVE_ScalarShiftDRegReg<"asrl", 0b1, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, (ARMasrl tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm))]>; def MVE_ASRLi : MVE_ScalarShiftDRegImm<"asrl", 0b10, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, (ARMasrl tGPREven:$RdaLo_src, - tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>; -def MVE_LSLLr : MVE_ScalarShiftDRegReg<"lsll", 0b0, 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, + tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>; +def MVE_LSLLr : MVE_ScalarShiftDRegReg<"lsll", 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, (ARMlsll tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm))]>; def MVE_LSLLi : MVE_ScalarShiftDRegImm<"lsll", 0b00, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, (ARMlsll tGPREven:$RdaLo_src, - tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>; + tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>; def MVE_LSRL : MVE_ScalarShiftDRegImm<"lsrl", 0b01, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, (ARMlsrl tGPREven:$RdaLo_src, - tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>; + tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>; -def MVE_SQRSHRL : MVE_ScalarShiftDRegReg<"sqrshrl", 0b1, 0b1>; +def MVE_SQRSHRL : MVE_ScalarShiftDRegRegWithSat<"sqrshrl", 0b1>; def MVE_SQSHLL : MVE_ScalarShiftDRegImm<"sqshll", 0b11, 0b1>; def MVE_SRSHRL : MVE_ScalarShiftDRegImm<"srshrl", 0b10, 0b1>; -def MVE_UQRSHLL : MVE_ScalarShiftDRegReg<"uqrshll", 0b0, 0b1>; +def MVE_UQRSHLL : MVE_ScalarShiftDRegRegWithSat<"uqrshll", 0b0>; def MVE_UQSHLL : MVE_ScalarShiftDRegImm<"uqshll", 0b00, 0b1>; def MVE_URSHRL : MVE_ScalarShiftDRegImm<"urshrl", 0b01, 0b1>; @@ -531,6 +552,19 @@ defm MVE_VADDVu8 : MVE_VADDV_A<"u8", 0b1, 0b00>; defm MVE_VADDVu16 : MVE_VADDV_A<"u16", 0b1, 0b01>; defm MVE_VADDVu32 : MVE_VADDV_A<"u32", 0b1, 0b10>; +let Predicates = [HasMVEInt] in { + def : Pat<(i32 (vecreduce_add (v4i32 MQPR:$src))), (i32 (MVE_VADDVu32no_acc $src))>; + def : Pat<(i32 (vecreduce_add (v8i16 MQPR:$src))), (i32 (MVE_VADDVu16no_acc $src))>; + def : Pat<(i32 (vecreduce_add (v16i8 MQPR:$src))), (i32 (MVE_VADDVu8no_acc $src))>; + def : Pat<(i32 (add (i32 (vecreduce_add (v4i32 MQPR:$src1))), (i32 tGPR:$src2))), + (i32 (MVE_VADDVu32acc $src2, $src1))>; + def : Pat<(i32 (add (i32 (vecreduce_add (v8i16 MQPR:$src1))), (i32 tGPR:$src2))), + (i32 (MVE_VADDVu16acc $src2, $src1))>; + def : Pat<(i32 (add (i32 (vecreduce_add (v16i8 MQPR:$src1))), (i32 tGPR:$src2))), + (i32 (MVE_VADDVu8acc $src2, $src1))>; + +} + class MVE_VADDLV pattern=[]> : MVE_rDest<(outs tGPREven:$RdaLo, tGPROdd:$RdaHi), iops, NoItinerary, iname, @@ -636,6 +670,35 @@ multiclass MVE_VMINMAXV_ty pattern=[]> { defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 0b1>; defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0b0>; +let Predicates = [HasMVEInt] in { + def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))), + (i32 (MVE_VMAXVs8 (t2MVNi (i32 127)), $src))>; + def : Pat<(i32 (vecreduce_smax (v8i16 MQPR:$src))), + (i32 (MVE_VMAXVs16 (t2MOVi32imm (i32 -32768)), $src))>; + def : Pat<(i32 (vecreduce_smax (v4i32 MQPR:$src))), + (i32 (MVE_VMAXVs32 (t2MOVi (i32 -2147483648)), $src))>; + def : Pat<(i32 (vecreduce_umax (v16i8 MQPR:$src))), + (i32 (MVE_VMAXVu8 (t2MOVi (i32 0)), $src))>; + def : Pat<(i32 (vecreduce_umax (v8i16 MQPR:$src))), + (i32 (MVE_VMAXVu16 (t2MOVi (i32 0)), $src))>; + def : Pat<(i32 (vecreduce_umax (v4i32 MQPR:$src))), + (i32 (MVE_VMAXVu32 (t2MOVi (i32 0)), $src))>; + + def : Pat<(i32 (vecreduce_smin (v16i8 MQPR:$src))), + (i32 (MVE_VMINVs8 (t2MOVi (i32 127)), $src))>; + def : Pat<(i32 (vecreduce_smin (v8i16 MQPR:$src))), + (i32 (MVE_VMINVs16 (t2MOVi16 (i32 32767)), $src))>; + def : Pat<(i32 (vecreduce_smin (v4i32 MQPR:$src))), + (i32 (MVE_VMINVs32 (t2MVNi (i32 -2147483648)), $src))>; + def : Pat<(i32 (vecreduce_umin (v16i8 MQPR:$src))), + (i32 (MVE_VMINVu8 (t2MOVi (i32 255)), $src))>; + def : Pat<(i32 (vecreduce_umin (v8i16 MQPR:$src))), + (i32 (MVE_VMINVu16 (t2MOVi16 (i32 65535)), $src))>; + def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))), + (i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>; + +} + multiclass MVE_VMINMAXAV_ty pattern=[]> { def s8 : MVE_VMINMAXV; def s16 : MVE_VMINMAXV; @@ -667,56 +730,56 @@ class MVE_VMLAMLSDAV pattern=[]> { - def _noexch : MVE_VMLAMLSDAV; - def _exch : MVE_VMLAMLSDAV; +multiclass MVE_VMLAMLSDAV_A pattern=[]> { + def ""#x#suffix : MVE_VMLAMLSDAV; + def "a"#x#suffix : MVE_VMLAMLSDAV; } -multiclass MVE_VMLAMLSDAV_XA pattern=[]> { - defm _noacc : MVE_VMLAMLSDAV_X; - defm _acc : MVE_VMLAMLSDAV_X; +multiclass MVE_VMLAMLSDAV_AX pattern=[]> { + defm "" : MVE_VMLAMLSDAV_A; + defm "" : MVE_VMLAMLSDAV_A; } -multiclass MVE_VMLADAV_multi pattern=[]> { - defm "" : MVE_VMLAMLSDAV_XA<"vmladav", suffix, sz, U, bit_8, 0b0, pattern>; -} - -defm MVE_VMLADAVs16 : MVE_VMLADAV_multi<"s16", 0b0, 0b0, 0b0>; -defm MVE_VMLADAVs32 : MVE_VMLADAV_multi<"s32", 0b1, 0b0, 0b0>; -defm MVE_VMLADAVu16 : MVE_VMLADAV_multi<"u16", 0b0, 0b1, 0b0>; -defm MVE_VMLADAVu32 : MVE_VMLADAV_multi<"u32", 0b1, 0b1, 0b0>; - -defm MVE_VMLADAVs8 : MVE_VMLADAV_multi<"s8", 0b0, 0b0, 0b1>; -defm MVE_VMLADAVu8 : MVE_VMLADAV_multi<"u8", 0b0, 0b1, 0b1>; - -// vmlav aliases vmladav -foreach acc = ["_acc", "_noacc"] in { - foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32"] in { - def : MVEInstAlias("MVE_VMLADAV"#suffix#acc#"_noexch") - tGPREven:$RdaDest, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; - } +multiclass MVE_VMLADAV_multi pattern=[]> { + defm "" : MVE_VMLAMLSDAV_AX<"vmladav", "s"#suffix, + sz, 0b0, bit_8, 0b0, pattern>; + defm "" : MVE_VMLAMLSDAV_A<"vmladav", "", "u"#suffix, + sz, 0b1, 0b0, bit_8, 0b0, pattern>; } multiclass MVE_VMLSDAV_multi pattern=[]> { - defm "" : MVE_VMLAMLSDAV_XA<"vmlsdav", suffix, sz, bit_28, 0b0, 0b1, pattern>; + list pattern=[]> { + defm "" : MVE_VMLAMLSDAV_AX<"vmlsdav", "s"#suffix, + sz, bit_28, 0b0, 0b1, pattern>; } -defm MVE_VMLSDAVs8 : MVE_VMLSDAV_multi<"s8", 0, 0b1>; -defm MVE_VMLSDAVs16 : MVE_VMLSDAV_multi<"s16", 0, 0b0>; -defm MVE_VMLSDAVs32 : MVE_VMLSDAV_multi<"s32", 1, 0b0>; +defm MVE_VMLADAV : MVE_VMLADAV_multi< "8", 0b0, 0b1>; +defm MVE_VMLADAV : MVE_VMLADAV_multi<"16", 0b0, 0b0>; +defm MVE_VMLADAV : MVE_VMLADAV_multi<"32", 0b1, 0b0>; + +defm MVE_VMLSDAV : MVE_VMLSDAV_multi< "8", 0b0, 0b1>; +defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"16", 0b0, 0b0>; +defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"32", 0b1, 0b0>; + +// vmlav aliases vmladav +foreach acc = ["", "a"] in { + foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32"] in { + def : MVEInstAlias<"vmlav"#acc#"${vp}."#suffix#"\t$RdaDest, $Qn, $Qm", + (!cast("MVE_VMLADAV"#acc#suffix) + tGPREven:$RdaDest, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; + } +} // Base class for VMLALDAV and VMLSLDAV, VRMLALDAVH, VRMLSLDAVH class MVE_VMLALDAVBase pattern=[]> { - def _noexch : MVE_VMLALDAVBase; - def _exch : MVE_VMLALDAVBase; -} - -multiclass MVE_VMLALDAVBase_XA pattern=[]> { - defm _noacc : MVE_VMLALDAVBase_X< - iname, suffix, (ins MQPR:$Qn, MQPR:$Qm), "", - sz, bit_28, 0b0, bit_8, bit_0, pattern>; - defm _acc : MVE_VMLALDAVBase_X< - iname # "a", suffix, (ins tGPREven:$RdaLoSrc, tGPROdd:$RdaHiSrc, - MQPR:$Qn, MQPR:$Qm), +multiclass MVE_VMLALDAVBase_A pattern=[]> { + def ""#x#suffix : MVE_VMLALDAVBase< + iname # x, suffix, (ins MQPR:$Qn, MQPR:$Qm), "", + sz, bit_28, 0b0, X, bit_8, bit_0, pattern>; + def "a"#x#suffix : MVE_VMLALDAVBase< + iname # "a" # x, suffix, + (ins tGPREven:$RdaLoSrc, tGPROdd:$RdaHiSrc, MQPR:$Qn, MQPR:$Qm), "$RdaLoDest = $RdaLoSrc,$RdaHiDest = $RdaHiSrc", - sz, bit_28, 0b1, bit_8, bit_0, pattern>; + sz, bit_28, 0b1, X, bit_8, bit_0, pattern>; } -multiclass MVE_VRMLALDAVH_multi pattern=[]> { - defm "" : MVE_VMLALDAVBase_XA< - "vrmlaldavh", suffix, 0b0, U, 0b1, 0b0, pattern>; + +multiclass MVE_VMLALDAVBase_AX pattern=[]> { + defm "" : MVE_VMLALDAVBase_A; + defm "" : MVE_VMLALDAVBase_A; } -defm MVE_VRMLALDAVHs32 : MVE_VRMLALDAVH_multi<"s32", 0>; -defm MVE_VRMLALDAVHu32 : MVE_VRMLALDAVH_multi<"u32", 1>; +multiclass MVE_VRMLALDAVH_multi pattern=[]> { + defm "" : MVE_VMLALDAVBase_AX<"vrmlaldavh", "s"#suffix, + 0b0, 0b0, 0b1, 0b0, pattern>; + defm "" : MVE_VMLALDAVBase_A<"vrmlaldavh", "", "u"#suffix, + 0b0, 0b1, 0b0, 0b1, 0b0, pattern>; +} + +defm MVE_VRMLALDAVH : MVE_VRMLALDAVH_multi<"32">; // vrmlalvh aliases for vrmlaldavh def : MVEInstAlias<"vrmlalvh${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm", - (MVE_VRMLALDAVHs32_noacc_noexch + (MVE_VRMLALDAVHs32 tGPREven:$RdaLo, tGPROdd:$RdaHi, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; def : MVEInstAlias<"vrmlalvha${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm", - (MVE_VRMLALDAVHs32_acc_noexch + (MVE_VRMLALDAVHas32 tGPREven:$RdaLo, tGPROdd:$RdaHi, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; def : MVEInstAlias<"vrmlalvh${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm", - (MVE_VRMLALDAVHu32_noacc_noexch + (MVE_VRMLALDAVHu32 tGPREven:$RdaLo, tGPROdd:$RdaHi, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; def : MVEInstAlias<"vrmlalvha${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm", - (MVE_VRMLALDAVHu32_acc_noexch + (MVE_VRMLALDAVHau32 tGPREven:$RdaLo, tGPROdd:$RdaHi, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; -multiclass MVE_VMLALDAV_multi pattern=[]> { - defm "" : MVE_VMLALDAVBase_XA<"vmlaldav", suffix, sz, U, 0b0, 0b0, pattern>; +multiclass MVE_VMLALDAV_multi pattern=[]> { + defm "" : MVE_VMLALDAVBase_AX<"vmlaldav", "s"#suffix, sz, 0b0, 0b0, 0b0, pattern>; + defm "" : MVE_VMLALDAVBase_A<"vmlaldav", "", "u"#suffix, + sz, 0b1, 0b0, 0b0, 0b0, pattern>; } -defm MVE_VMLALDAVs16 : MVE_VMLALDAV_multi<"s16", 0b0, 0b0>; -defm MVE_VMLALDAVs32 : MVE_VMLALDAV_multi<"s32", 0b1, 0b0>; -defm MVE_VMLALDAVu16 : MVE_VMLALDAV_multi<"u16", 0b0, 0b1>; -defm MVE_VMLALDAVu32 : MVE_VMLALDAV_multi<"u32", 0b1, 0b1>; +defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"16", 0b0>; +defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"32", 0b1>; // vmlalv aliases vmlaldav -foreach acc = ["_acc", "_noacc"] in { +foreach acc = ["", "a"] in { foreach suffix = ["s16", "s32", "u16", "u32"] in { - def : MVEInstAlias("MVE_VMLALDAV"#suffix#acc#"_noexch") + def : MVEInstAlias<"vmlalv" # acc # "${vp}." # suffix # + "\t$RdaLoDest, $RdaHiDest, $Qn, $Qm", + (!cast("MVE_VMLALDAV"#acc#suffix) tGPREven:$RdaLoDest, tGPROdd:$RdaHiDest, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; } } multiclass MVE_VMLSLDAV_multi pattern=[]> { - defm "" : MVE_VMLALDAVBase_XA; + bit bit_28, list pattern=[]> { + defm "" : MVE_VMLALDAVBase_AX; } -defm MVE_VMLSLDAVs16 : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0>; -defm MVE_VMLSLDAVs32 : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0>; -defm MVE_VRMLSLDAVHs32 : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1>; +defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0>; +defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0>; +defm MVE_VRMLSLDAVH : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1>; // end of mve_rDest instructions @@ -967,11 +1031,12 @@ def MVE_VBIC : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), let Inst{6} = 0b1; let Inst{4} = 0b1; let Inst{0} = 0b0; + let validForTailPredication = 1; } -class MVE_VREV size, bits<2> bit_8_7> +class MVE_VREV size, bits<2> bit_8_7, string cstr=""> : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), iname, - suffix, "$Qd, $Qm", ""> { + suffix, "$Qd, $Qm", cstr> { let Inst{28} = 0b1; let Inst{25-23} = 0b111; @@ -985,15 +1050,22 @@ class MVE_VREV size, bits<2> bit_8_7> let Inst{0} = 0b0; } -def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00>; -def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00>; -def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00>; +def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00, "@earlyclobber $Qd">; +def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00, "@earlyclobber $Qd">; +def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00, "@earlyclobber $Qd">; def MVE_VREV32_8 : MVE_VREV<"vrev32", "8", 0b00, 0b01>; def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01>; def MVE_VREV16_8 : MVE_VREV<"vrev16", "8", 0b00, 0b10>; +let Predicates = [HasMVEInt] in { + def : Pat<(v8i16 (bswap (v8i16 MQPR:$src))), + (v8i16 (MVE_VREV16_8 (v8i16 MQPR:$src)))>; + def : Pat<(v4i32 (bswap (v4i32 MQPR:$src))), + (v4i32 (MVE_VREV32_8 (v4i32 MQPR:$src)))>; +} + let Predicates = [HasMVEInt] in { def : Pat<(v4i32 (ARMvrev64 (v4i32 MQPR:$src))), (v4i32 (MVE_VREV64_32 (v4i32 MQPR:$src)))>; @@ -1026,6 +1098,7 @@ def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), let Inst{12-6} = 0b0010111; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } let Predicates = [HasMVEInt] in { @@ -1054,6 +1127,7 @@ class MVE_bit_ops bit_21_20, bit bit_28> let Inst{6} = 0b1; let Inst{4} = 0b1; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VEOR : MVE_bit_ops<"veor", 0b00, 0b1>; @@ -1145,6 +1219,7 @@ class MVE_bit_cmode cmode, dag inOps> class MVE_VORR cmode, ExpandImm imm_type> : MVE_bit_cmode<"vorr", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> { let Inst{5} = 0b0; + let validForTailPredication = 1; } def MVE_VORRIZ0v4i32 : MVE_VORR<"i32", 0b0001, expzero00>; @@ -1173,6 +1248,7 @@ def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm", class MVE_VBIC cmode, ExpandImm imm_type> : MVE_bit_cmode<"vbic", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> { let Inst{5} = 0b1; + let validForTailPredication = 1; } def MVE_VBICIZ0v4i32 : MVE_VBIC<"i32", 0b0001, expzero00>; @@ -1315,8 +1391,12 @@ let Predicates = [HasMVEInt] in { def : Pat<(insertelt (v8f16 MQPR:$src1), HPR:$src2, imm:$lane), (MVE_VMOV_to_lane_16 MQPR:$src1, (COPY_TO_REGCLASS HPR:$src2, rGPR), imm:$lane)>; - def : Pat<(extractelt (v8f16 MQPR:$src), imm:$lane), - (COPY_TO_REGCLASS (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane), HPR)>; + def : Pat<(extractelt (v8f16 MQPR:$src), imm_even:$lane), + (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_even:$lane))>; + def : Pat<(extractelt (v8f16 MQPR:$src), imm_odd:$lane), + (COPY_TO_REGCLASS + (VMOVH (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_odd:$lane))), + HPR)>; def : Pat<(v4f32 (scalar_to_vector SPR:$src)), (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; @@ -1408,6 +1488,7 @@ class MVE_VADDSUB size, bit subtract, let Inst{12-8} = 0b01000; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } class MVE_VADD size, list pattern=[]> @@ -1442,8 +1523,8 @@ let Predicates = [HasMVEInt] in { } class MVE_VQADDSUB size, list pattern=[]> - : MVE_int { + bits<2> size, ValueType vt> + : MVE_int { let Inst{28} = U; let Inst{25-23} = 0b110; @@ -1453,26 +1534,49 @@ class MVE_VQADDSUB size, list pattern=[]> - : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size, pattern>; -class MVE_VQSUB size, list pattern=[]> - : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size, pattern>; +class MVE_VQADD size, ValueType VT> + : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size, VT>; +class MVE_VQSUB size, ValueType VT> + : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size, VT>; -def MVE_VQADDs8 : MVE_VQADD<"s8", 0b0, 0b00>; -def MVE_VQADDs16 : MVE_VQADD<"s16", 0b0, 0b01>; -def MVE_VQADDs32 : MVE_VQADD<"s32", 0b0, 0b10>; -def MVE_VQADDu8 : MVE_VQADD<"u8", 0b1, 0b00>; -def MVE_VQADDu16 : MVE_VQADD<"u16", 0b1, 0b01>; -def MVE_VQADDu32 : MVE_VQADD<"u32", 0b1, 0b10>; +def MVE_VQADDs8 : MVE_VQADD<"s8", 0b0, 0b00, v16i8>; +def MVE_VQADDs16 : MVE_VQADD<"s16", 0b0, 0b01, v8i16>; +def MVE_VQADDs32 : MVE_VQADD<"s32", 0b0, 0b10, v4i32>; +def MVE_VQADDu8 : MVE_VQADD<"u8", 0b1, 0b00, v16i8>; +def MVE_VQADDu16 : MVE_VQADD<"u16", 0b1, 0b01, v8i16>; +def MVE_VQADDu32 : MVE_VQADD<"u32", 0b1, 0b10, v4i32>; + +def MVE_VQSUBs8 : MVE_VQSUB<"s8", 0b0, 0b00, v16i8>; +def MVE_VQSUBs16 : MVE_VQSUB<"s16", 0b0, 0b01, v8i16>; +def MVE_VQSUBs32 : MVE_VQSUB<"s32", 0b0, 0b10, v4i32>; +def MVE_VQSUBu8 : MVE_VQSUB<"u8", 0b1, 0b00, v16i8>; +def MVE_VQSUBu16 : MVE_VQSUB<"u16", 0b1, 0b01, v8i16>; +def MVE_VQSUBu32 : MVE_VQSUB<"u32", 0b1, 0b10, v4i32>; + +let Predicates = [HasMVEInt] in { + foreach instr = [MVE_VQADDu8, MVE_VQADDu16, MVE_VQADDu32] in + foreach VT = [instr.VT] in + def : Pat<(VT (uaddsat (VT MQPR:$Qm), (VT MQPR:$Qn))), + (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; + foreach instr = [MVE_VQADDs8, MVE_VQADDs16, MVE_VQADDs32] in + foreach VT = [instr.VT] in + def : Pat<(VT (saddsat (VT MQPR:$Qm), (VT MQPR:$Qn))), + (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; + foreach instr = [MVE_VQSUBu8, MVE_VQSUBu16, MVE_VQSUBu32] in + foreach VT = [instr.VT] in + def : Pat<(VT (usubsat (VT MQPR:$Qm), (VT MQPR:$Qn))), + (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; + foreach instr = [MVE_VQSUBs8, MVE_VQSUBs16, MVE_VQSUBs32] in + foreach VT = [instr.VT] in + def : Pat<(VT (ssubsat (VT MQPR:$Qm), (VT MQPR:$Qn))), + (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; +} -def MVE_VQSUBs8 : MVE_VQSUB<"s8", 0b0, 0b00>; -def MVE_VQSUBs16 : MVE_VQSUB<"s16", 0b0, 0b01>; -def MVE_VQSUBs32 : MVE_VQSUB<"s32", 0b0, 0b10>; -def MVE_VQSUBu8 : MVE_VQSUB<"u8", 0b1, 0b00>; -def MVE_VQSUBu16 : MVE_VQSUB<"u16", 0b1, 0b01>; -def MVE_VQSUBu32 : MVE_VQSUB<"u32", 0b1, 0b10>; class MVE_VABD_int size, list pattern=[]> : MVE_int<"vabd", suffix, size, pattern> { @@ -1483,6 +1587,7 @@ class MVE_VABD_int size, list pattern=[]> let Inst{12-8} = 0b00111; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VABDs8 : MVE_VABD_int<"s8", 0b0, 0b00>; @@ -1501,6 +1606,7 @@ class MVE_VRHADD size, list pattern=[]> let Inst{12-8} = 0b00001; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VRHADDs8 : MVE_VRHADD<"s8", 0b0, 0b00>; @@ -1522,6 +1628,7 @@ class MVE_VHADDSUB size, @@ -1545,6 +1652,60 @@ def MVE_VHSUBu8 : MVE_VHSUB<"u8", 0b1, 0b00>; def MVE_VHSUBu16 : MVE_VHSUB<"u16", 0b1, 0b01>; def MVE_VHSUBu32 : MVE_VHSUB<"u32", 0b1, 0b10>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (ARMvshrsImm + (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), + (v16i8 (MVE_VHADDs8 + (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; + def : Pat<(v8i16 (ARMvshrsImm + (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), + (v8i16 (MVE_VHADDs16 + (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; + def : Pat<(v4i32 (ARMvshrsImm + (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), + (v4i32 (MVE_VHADDs32 + (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; + + def : Pat<(v16i8 (ARMvshruImm + (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), + (v16i8 (MVE_VHADDu8 + (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; + def : Pat<(v8i16 (ARMvshruImm + (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), + (v8i16 (MVE_VHADDu16 + (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; + def : Pat<(v4i32 (ARMvshruImm + (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), + (v4i32 (MVE_VHADDu32 + (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; + + def : Pat<(v16i8 (ARMvshrsImm + (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), + (v16i8 (MVE_VHSUBs8 + (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; + def : Pat<(v8i16 (ARMvshrsImm + (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), + (v8i16 (MVE_VHSUBs16 + (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; + def : Pat<(v4i32 (ARMvshrsImm + (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), + (v4i32 (MVE_VHSUBs32 + (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; + + def : Pat<(v16i8 (ARMvshruImm + (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), + (v16i8 (MVE_VHSUBu8 + (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; + def : Pat<(v8i16 (ARMvshruImm + (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), + (v8i16 (MVE_VHSUBu16 + (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; + def : Pat<(v4i32 (ARMvshruImm + (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), + (v4i32 (MVE_VHSUBu32 + (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; +} + class MVE_VDUP pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary, "vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> { @@ -1563,6 +1724,7 @@ class MVE_VDUP pattern=[]> let Inst{6} = 0b0; let Inst{5} = E; let Inst{4-0} = 0b10000; + let validForTailPredication = 1; } def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0>; @@ -1625,6 +1787,7 @@ class MVE_VCLSCLZ size, let Inst{6} = 0b1; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VCLSs8 : MVE_VCLSCLZ<"vcls", "s8", 0b00, 0b0>; @@ -1635,6 +1798,15 @@ def MVE_VCLZs8 : MVE_VCLSCLZ<"vclz", "i8", 0b00, 0b1>; def MVE_VCLZs16 : MVE_VCLSCLZ<"vclz", "i16", 0b01, 0b1>; def MVE_VCLZs32 : MVE_VCLSCLZ<"vclz", "i32", 0b10, 0b1>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 ( ctlz (v16i8 MQPR:$val1))), + (v16i8 ( MVE_VCLZs8 (v16i8 MQPR:$val1)))>; + def : Pat<(v4i32 ( ctlz (v4i32 MQPR:$val1))), + (v4i32 ( MVE_VCLZs32 (v4i32 MQPR:$val1)))>; + def : Pat<(v8i16 ( ctlz (v8i16 MQPR:$val1))), + (v8i16 ( MVE_VCLZs16 (v8i16 MQPR:$val1)))>; +} + class MVE_VABSNEG_int size, bit negate, list pattern=[]> : MVEIntSingleSrc { @@ -1648,6 +1820,7 @@ class MVE_VABSNEG_int size, bit negate, let Inst{6} = 0b1; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VABSs8 : MVE_VABSNEG_int<"vabs", "s8", 0b00, 0b0>; @@ -1689,6 +1862,7 @@ class MVE_VQABSNEG size, let Inst{6} = 0b1; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VQABSs8 : MVE_VQABSNEG<"vqabs", "s8", 0b00, 0b0>; @@ -1720,6 +1894,7 @@ class MVE_mod_imm cmode, bit op, let Inst{3-0} = imm{3-0}; let DecoderMethod = "DecodeMVEModImmInstruction"; + let validForTailPredication = 1; } let isReMaterializable = 1 in { @@ -2115,6 +2290,7 @@ class MVE_shift_by_vec { @@ -2163,6 +2339,7 @@ class MVE_shift_with_imm @@ -2175,6 +2352,7 @@ class MVE_VSxI_imm let Inst{21-16} = imm; let Inst{10-9} = 0b10; let Inst{8} = bit_8; + let validForTailPredication = 1; } def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, (ins shr_imm8:$imm)> { @@ -2427,6 +2605,7 @@ class MVE_VRINT op, string suffix, bits<2> size, let Inst{11-10} = 0b01; let Inst{9-7} = op{2-0}; let Inst{4} = 0b0; + let validForTailPredication = 1; } @@ -2489,6 +2668,7 @@ class MVE_VMUL_fp pattern=[]> let Inst{12-8} = 0b01101; let Inst{7} = Qn{3}; let Inst{4} = 0b1; + let validForTailPredication = 1; } def MVE_VMULf32 : MVE_VMUL_fp<"f32", 0b0>; @@ -2556,8 +2736,38 @@ def MVE_VFMSf32 : MVE_VADDSUBFMA_fp<"vfms", "f32", 0b0, 0b1, 0b0, 0b1, def MVE_VFMSf16 : MVE_VADDSUBFMA_fp<"vfms", "f16", 0b1, 0b1, 0b0, 0b1, (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">; -def MVE_VADDf32 : MVE_VADDSUBFMA_fp<"vadd", "f32", 0b0, 0b0, 0b1, 0b0>; -def MVE_VADDf16 : MVE_VADDSUBFMA_fp<"vadd", "f16", 0b1, 0b0, 0b1, 0b0>; +let Predicates = [HasMVEFloat, UseFusedMAC] in { + def : Pat<(v8f16 (fadd (v8f16 MQPR:$src1), + (fmul (v8f16 MQPR:$src2), + (v8f16 MQPR:$src3)))), + (v8f16 (MVE_VFMAf16 $src1, $src2, $src3))>; + def : Pat<(v4f32 (fadd (v4f32 MQPR:$src1), + (fmul (v4f32 MQPR:$src2), + (v4f32 MQPR:$src3)))), + (v4f32 (MVE_VFMAf32 $src1, $src2, $src3))>; + + def : Pat<(v8f16 (fsub (v8f16 MQPR:$src1), + (fmul (v8f16 MQPR:$src2), + (v8f16 MQPR:$src3)))), + (v8f16 (MVE_VFMSf16 $src1, $src2, $src3))>; + def : Pat<(v4f32 (fsub (v4f32 MQPR:$src1), + (fmul (v4f32 MQPR:$src2), + (v4f32 MQPR:$src3)))), + (v4f32 (MVE_VFMSf32 $src1, $src2, $src3))>; +} + +let Predicates = [HasMVEFloat] in { + def : Pat<(v8f16 (fma (v8f16 MQPR:$src1), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))), + (v8f16 (MVE_VFMAf16 $src3, $src1, $src2))>; + def : Pat<(v4f32 (fma (v4f32 MQPR:$src1), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))), + (v4f32 (MVE_VFMAf32 $src3, $src1, $src2))>; +} + + +let validForTailPredication = 1 in { + def MVE_VADDf32 : MVE_VADDSUBFMA_fp<"vadd", "f32", 0b0, 0b0, 0b1, 0b0>; + def MVE_VADDf16 : MVE_VADDSUBFMA_fp<"vadd", "f16", 0b1, 0b0, 0b1, 0b0>; +} let Predicates = [HasMVEFloat] in { def : Pat<(v4f32 (fadd (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))), @@ -2566,8 +2776,11 @@ let Predicates = [HasMVEFloat] in { (v8f16 (MVE_VADDf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; } -def MVE_VSUBf32 : MVE_VADDSUBFMA_fp<"vsub", "f32", 0b0, 0b0, 0b1, 0b1>; -def MVE_VSUBf16 : MVE_VADDSUBFMA_fp<"vsub", "f16", 0b1, 0b0, 0b1, 0b1>; + +let validForTailPredication = 1 in { + def MVE_VSUBf32 : MVE_VADDSUBFMA_fp<"vsub", "f32", 0b0, 0b0, 0b1, 0b1>; + def MVE_VSUBf16 : MVE_VADDSUBFMA_fp<"vsub", "f16", 0b1, 0b0, 0b1, 0b1>; +} let Predicates = [HasMVEFloat] in { def : Pat<(v4f32 (fsub (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))), @@ -2576,10 +2789,10 @@ let Predicates = [HasMVEFloat] in { (v8f16 (MVE_VSUBf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; } -class MVE_VCADD pattern=[]> +class MVE_VCADD pattern=[]> : MVEFloatArithNeon<"vcadd", suffix, size, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot), - "$Qd, $Qn, $Qm, $rot", vpred_r, "", pattern> { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> { bits<4> Qd; bits<4> Qn; bit rot; @@ -2598,7 +2811,7 @@ class MVE_VCADD pattern=[]> } def MVE_VCADDf16 : MVE_VCADD<"f16", 0b0>; -def MVE_VCADDf32 : MVE_VCADD<"f32", 0b1>; +def MVE_VCADDf32 : MVE_VCADD<"f32", 0b1, "@earlyclobber $Qd">; class MVE_VABD_fp : MVE_float<"vabd", suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), @@ -2617,6 +2830,7 @@ class MVE_VABD_fp let Inst{11-8} = 0b1101; let Inst{7} = Qn{3}; let Inst{4} = 0b0; + let validForTailPredication = 1; } def MVE_VABDf32 : MVE_VABD_fp<"f32", 0b0>; @@ -2643,6 +2857,7 @@ class MVE_VCVT_fix : AsmOperandClass { @@ -2693,6 +2908,7 @@ class MVE_VCVT_fp_int_anpm size, bit op, string anpm, let Inst{9-8} = rm; let Inst{7} = op; let Inst{4} = 0b0; + let validForTailPredication = 1; } multiclass MVE_VCVT_fp_int_anpm_multi size, bit op, @@ -2727,6 +2943,7 @@ class MVE_VCVT_fp_int size, bits<2> op, let Inst{12-9} = 0b0011; let Inst{8-7} = op; let Inst{4} = 0b0; + let validForTailPredication = 1; } // The unsuffixed VCVT for float->int implicitly rounds toward zero, @@ -2776,6 +2993,7 @@ class MVE_VABSNEG_fp size, bit negate, let Inst{11-8} = 0b0111; let Inst{7} = negate; let Inst{4} = 0b0; + let validForTailPredication = 1; } def MVE_VABSf16 : MVE_VABSNEG_fp<"vabs", "f16", 0b01, 0b0>; @@ -2863,6 +3081,7 @@ class MVE_VCMPqq bits_21_20, // decoder to emit an operand that isn't affected by any instruction // bit. let DecoderMethod = "DecodeMVEVCMP"; + let validForTailPredication = 1; } class MVE_VCMPqqf @@ -2927,6 +3146,7 @@ class MVE_VCMPqr bits_21_20, let Constraints = ""; // Custom decoder method, for the same reason as MVE_VCMPqq let DecoderMethod = "DecodeMVEVCMP"; + let validForTailPredication = 1; } class MVE_VCMPqrf @@ -2966,6 +3186,168 @@ def MVE_VCMPs8r : MVE_VCMPqrs<"s8", 0b00>; def MVE_VCMPs16r : MVE_VCMPqrs<"s16", 0b01>; def MVE_VCMPs32r : MVE_VCMPqrs<"s32", 0b10>; +multiclass unpred_vcmp_z { + def i8 : Pat<(v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc))>; + def i16 : Pat<(v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc))>; + def i32 : Pat<(v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc))>; + + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; +} + +multiclass unpred_vcmp_r { + def i8 : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc))>; + def i16 : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc))>; + def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>; + + def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc))>; + def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc))>; + def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc))>; + + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, 1, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, 1, VCCR:$p1))>; + + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; +} + +multiclass unpred_vcmpf_z { + def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>; + def f32 : Pat<(v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>; + + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))))), + (v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; +} + +multiclass unpred_vcmpf_r { + def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))), + (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>; + def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))), + (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>; + + def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc))>; + def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>; + + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))))), + (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))))), + (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, 1, VCCR:$p1))>; + + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; +} + +let Predicates = [HasMVEInt] in { + defm MVE_VCEQZ : unpred_vcmp_z<"i", 0>; + defm MVE_VCNEZ : unpred_vcmp_z<"i", 1>; + defm MVE_VCGEZ : unpred_vcmp_z<"s", 10>; + defm MVE_VCLTZ : unpred_vcmp_z<"s", 11>; + defm MVE_VCGTZ : unpred_vcmp_z<"s", 12>; + defm MVE_VCLEZ : unpred_vcmp_z<"s", 13>; + defm MVE_VCGTUZ : unpred_vcmp_z<"u", 8>; + defm MVE_VCGEUZ : unpred_vcmp_z<"u", 2>; + + defm MVE_VCEQ : unpred_vcmp_r<"i", 0>; + defm MVE_VCNE : unpred_vcmp_r<"i", 1>; + defm MVE_VCGE : unpred_vcmp_r<"s", 10>; + defm MVE_VCLT : unpred_vcmp_r<"s", 11>; + defm MVE_VCGT : unpred_vcmp_r<"s", 12>; + defm MVE_VCLE : unpred_vcmp_r<"s", 13>; + defm MVE_VCGTU : unpred_vcmp_r<"u", 8>; + defm MVE_VCGEU : unpred_vcmp_r<"u", 2>; +} + +let Predicates = [HasMVEFloat] in { + defm MVE_VFCEQZ : unpred_vcmpf_z<0>; + defm MVE_VFCNEZ : unpred_vcmpf_z<1>; + defm MVE_VFCGEZ : unpred_vcmpf_z<10>; + defm MVE_VFCLTZ : unpred_vcmpf_z<11>; + defm MVE_VFCGTZ : unpred_vcmpf_z<12>; + defm MVE_VFCLEZ : unpred_vcmpf_z<13>; + + defm MVE_VFCEQ : unpred_vcmpf_r<0>; + defm MVE_VFCNE : unpred_vcmpf_r<1>; + defm MVE_VFCGE : unpred_vcmpf_r<10>; + defm MVE_VFCLT : unpred_vcmpf_r<11>; + defm MVE_VFCGT : unpred_vcmpf_r<12>; + defm MVE_VFCLE : unpred_vcmpf_r<13>; +} + + +// Extra "worst case" and/or/xor partterns, going into and out of GRP +multiclass two_predops { + def v16i1 : Pat<(v16i1 (opnode (v16i1 VCCR:$p1), (v16i1 VCCR:$p2))), + (v16i1 (COPY_TO_REGCLASS + (insn (i32 (COPY_TO_REGCLASS (v16i1 VCCR:$p1), rGPR)), + (i32 (COPY_TO_REGCLASS (v16i1 VCCR:$p2), rGPR))), + VCCR))>; + def v8i1 : Pat<(v8i1 (opnode (v8i1 VCCR:$p1), (v8i1 VCCR:$p2))), + (v8i1 (COPY_TO_REGCLASS + (insn (i32 (COPY_TO_REGCLASS (v8i1 VCCR:$p1), rGPR)), + (i32 (COPY_TO_REGCLASS (v8i1 VCCR:$p2), rGPR))), + VCCR))>; + def v4i1 : Pat<(v4i1 (opnode (v4i1 VCCR:$p1), (v4i1 VCCR:$p2))), + (v4i1 (COPY_TO_REGCLASS + (insn (i32 (COPY_TO_REGCLASS (v4i1 VCCR:$p1), rGPR)), + (i32 (COPY_TO_REGCLASS (v4i1 VCCR:$p2), rGPR))), + VCCR))>; +} + +let Predicates = [HasMVEInt] in { + defm POR : two_predops; + defm PAND : two_predops; + defm PEOR : two_predops; +} + +// Occasionally we need to cast between a i32 and a boolean vector, for +// example when moving between rGPR and VPR.P0 as part of predicate vector +// shuffles. We also sometimes need to cast between different predicate +// vector types (v4i1<>v8i1, etc.) also as part of lowering vector shuffles. + +def predicate_cast : SDNode<"ARMISD::PREDICATE_CAST", SDTUnaryOp>; + +let Predicates = [HasMVEInt] in { + foreach VT = [ v4i1, v8i1, v16i1 ] in { + def : Pat<(i32 (predicate_cast (VT VCCR:$src))), + (i32 (COPY_TO_REGCLASS (VT VCCR:$src), VCCR))>; + def : Pat<(VT (predicate_cast (i32 VCCR:$src))), + (VT (COPY_TO_REGCLASS (i32 VCCR:$src), VCCR))>; + + foreach VT2 = [ v4i1, v8i1, v16i1 ] in + def : Pat<(VT (predicate_cast (VT2 VCCR:$src))), + (VT (COPY_TO_REGCLASS (VT2 VCCR:$src), VCCR))>; + } +} + // end of MVE compares // start of MVE_qDest_qSrc @@ -2989,10 +3371,10 @@ class MVE_qDest_qSrc size, list pattern=[]> + string suffix, bits<2> size, string cstr="", list pattern=[]> : MVE_qDest_qSrc { + vpred_n, "$Qd = $Qd_src"#cstr, pattern> { bits<4> Qn; let Inst{28} = subtract; @@ -3009,7 +3391,7 @@ multiclass MVE_VQxDMLxDH_multi { def s8 : MVE_VQxDMLxDH; def s16 : MVE_VQxDMLxDH; - def s32 : MVE_VQxDMLxDH; + def s32 : MVE_VQxDMLxDH; } defm MVE_VQDMLADH : MVE_VQxDMLxDH_multi<"vqdmladh", 0b0, 0b0, 0b0>; @@ -3021,10 +3403,10 @@ defm MVE_VQDMLSDHX : MVE_VQxDMLxDH_multi<"vqdmlsdhx", 0b1, 0b0, 0b1>; defm MVE_VQRDMLSDH : MVE_VQxDMLxDH_multi<"vqrdmlsdh", 0b0, 0b1, 0b1>; defm MVE_VQRDMLSDHX : MVE_VQxDMLxDH_multi<"vqrdmlsdhx", 0b1, 0b1, 0b1>; -class MVE_VCMUL pattern=[]> +class MVE_VCMUL pattern=[]> : MVE_qDest_qSrc { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> { bits<4> Qn; bits<2> rot; @@ -3041,13 +3423,13 @@ class MVE_VCMUL pattern=[]> } def MVE_VCMULf16 : MVE_VCMUL<"vcmul", "f16", 0b0>; -def MVE_VCMULf32 : MVE_VCMUL<"vcmul", "f32", 0b1>; +def MVE_VCMULf32 : MVE_VCMUL<"vcmul", "f32", 0b1, "@earlyclobber $Qd">; class MVE_VMULL bits_21_20, - bit T, list pattern=[]> + bit T, string cstr, list pattern=[]> : MVE_qDest_qSrc { + vpred_r, cstr, pattern> { bits<4> Qd; bits<4> Qn; bits<4> Qm; @@ -3063,9 +3445,9 @@ class MVE_VMULL bits_21_20, } multiclass MVE_VMULL_multi bits_21_20> { - def bh : MVE_VMULL; - def th : MVE_VMULL; + bit bit_28, bits<2> bits_21_20, string cstr=""> { + def bh : MVE_VMULL; + def th : MVE_VMULL; } // For integer multiplies, bits 21:20 encode size, and bit 28 signedness. @@ -3074,10 +3456,10 @@ multiclass MVE_VMULL_multi; defm MVE_VMULLs16 : MVE_VMULL_multi<"vmull", "s16", 0b0, 0b01>; -defm MVE_VMULLs32 : MVE_VMULL_multi<"vmull", "s32", 0b0, 0b10>; +defm MVE_VMULLs32 : MVE_VMULL_multi<"vmull", "s32", 0b0, 0b10, "@earlyclobber $Qd">; defm MVE_VMULLu8 : MVE_VMULL_multi<"vmull", "u8", 0b1, 0b00>; defm MVE_VMULLu16 : MVE_VMULL_multi<"vmull", "u16", 0b1, 0b01>; -defm MVE_VMULLu32 : MVE_VMULL_multi<"vmull", "u32", 0b1, 0b10>; +defm MVE_VMULLu32 : MVE_VMULL_multi<"vmull", "u32", 0b1, 0b10, "@earlyclobber $Qd">; defm MVE_VMULLp8 : MVE_VMULL_multi<"vmull", "p8", 0b0, 0b11>; defm MVE_VMULLp16 : MVE_VMULL_multi<"vmull", "p16", 0b1, 0b11>; @@ -3144,6 +3526,18 @@ defm MVE_VQMOVNu32 : MVE_VxMOVxN_halves<"vqmovn", "u32", 0b1, 0b1, 0b01>; defm MVE_VQMOVUNs16 : MVE_VxMOVxN_halves<"vqmovun", "s16", 0b0, 0b0, 0b00>; defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>; +def MVEvmovn : SDNode<"ARMISD::VMOVN", SDTARMVEXT>; +let Predicates = [HasMVEInt] in { + def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))), + (v8i16 (MVE_VMOVNi32bh (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>; + def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))), + (v8i16 (MVE_VMOVNi32th (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>; + def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 0))), + (v16i8 (MVE_VMOVNi16bh (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>; + def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 1))), + (v16i8 (MVE_VMOVNi16th (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>; +} + class MVE_VCVT_ff pattern=[]> : MVE_qDest_qSrc; defm MVE_VCVTf32f16 : MVE_VCVT_ff_halves<"f32.f16", 0b1>; class MVE_VxCADD size, bit halve, - list pattern=[]> + string cstr="", list pattern=[]> : MVE_qDest_qSrc { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> { bits<4> Qn; bit rot; @@ -3186,11 +3579,11 @@ class MVE_VxCADD size, bit halve, def MVE_VCADDi8 : MVE_VxCADD<"vcadd", "i8", 0b00, 0b1>; def MVE_VCADDi16 : MVE_VxCADD<"vcadd", "i16", 0b01, 0b1>; -def MVE_VCADDi32 : MVE_VxCADD<"vcadd", "i32", 0b10, 0b1>; +def MVE_VCADDi32 : MVE_VxCADD<"vcadd", "i32", 0b10, 0b1, "@earlyclobber $Qd">; def MVE_VHCADDs8 : MVE_VxCADD<"vhcadd", "s8", 0b00, 0b0>; def MVE_VHCADDs16 : MVE_VxCADD<"vhcadd", "s16", 0b01, 0b0>; -def MVE_VHCADDs32 : MVE_VxCADD<"vhcadd", "s32", 0b10, 0b0>; +def MVE_VHCADDs32 : MVE_VxCADD<"vhcadd", "s32", 0b10, 0b0, "@earlyclobber $Qd">; class MVE_VADCSBC pattern=[]> @@ -3220,10 +3613,10 @@ def MVE_VSBC : MVE_VADCSBC<"vsbc", 0b0, 0b1, (ins cl_FPSCR_NZCV:$carryin)>; def MVE_VSBCI : MVE_VADCSBC<"vsbci", 0b1, 0b1, (ins)>; class MVE_VQDMULL pattern=[]> + string cstr="", list pattern=[]> : MVE_qDest_qSrc { + vpred_r, cstr, pattern> { bits<4> Qn; let Inst{28} = size; @@ -3236,13 +3629,13 @@ class MVE_VQDMULL { - def bh : MVE_VQDMULL<"vqdmullb", suffix, size, 0b0>; - def th : MVE_VQDMULL<"vqdmullt", suffix, size, 0b1>; +multiclass MVE_VQDMULL_halves { + def bh : MVE_VQDMULL<"vqdmullb", suffix, size, 0b0, cstr>; + def th : MVE_VQDMULL<"vqdmullt", suffix, size, 0b1, cstr>; } defm MVE_VQDMULLs16 : MVE_VQDMULL_halves<"s16", 0b0>; -defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<"s32", 0b1>; +defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<"s32", 0b1, "@earlyclobber $Qd">; // end of mve_qDest_qSrc @@ -3267,9 +3660,9 @@ class MVE_qr_base pattern=[]> +class MVE_qDest_rSrc pattern=[]> : MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qn, rGPR:$Rm), - NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_r, "", + NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_r, cstr, pattern>; class MVE_qDestSrc_rSrc pattern=[]> @@ -3291,7 +3684,7 @@ class MVE_qDest_single_rSrc pattern=[]> class MVE_VADDSUB_qr size, bit bit_5, bit bit_12, bit bit_16, bit bit_28, list pattern=[]> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = bit_28; let Inst{21-20} = size; @@ -3299,6 +3692,7 @@ class MVE_VADDSUB_qr size, let Inst{12} = bit_12; let Inst{8} = 0b1; let Inst{5} = bit_5; + let validForTailPredication = 1; } multiclass MVE_VADDSUB_qr_sizes; defm MVE_VQSUB_qr_s : MVE_VADDSUB_qr_sizes<"vqsub", "s", 0b1, 0b1, 0b0, 0b0>; defm MVE_VQSUB_qr_u : MVE_VADDSUB_qr_sizes<"vqsub", "u", 0b1, 0b1, 0b0, 0b1>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (add (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))), + (v16i8 (MVE_VADD_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>; + def : Pat<(v8i16 (add (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))), + (v8i16 (MVE_VADD_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>; + def : Pat<(v4i32 (add (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))), + (v4i32 (MVE_VADD_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>; +} + +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (sub (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))), + (v16i8 (MVE_VSUB_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>; + def : Pat<(v8i16 (sub (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))), + (v8i16 (MVE_VSUB_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>; + def : Pat<(v4i32 (sub (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))), + (v4i32 (MVE_VSUB_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>; +} + class MVE_VQDMULL_qr pattern=[]> - : MVE_qDest_rSrc { + bit T, string cstr="", list pattern=[]> + : MVE_qDest_rSrc { let Inst{28} = size; let Inst{21-20} = 0b11; @@ -3332,18 +3744,18 @@ class MVE_VQDMULL_qr { - def bh : MVE_VQDMULL_qr<"vqdmullb", suffix, size, 0b0>; - def th : MVE_VQDMULL_qr<"vqdmullt", suffix, size, 0b1>; +multiclass MVE_VQDMULL_qr_halves { + def bh : MVE_VQDMULL_qr<"vqdmullb", suffix, size, 0b0, cstr>; + def th : MVE_VQDMULL_qr<"vqdmullt", suffix, size, 0b1, cstr>; } defm MVE_VQDMULL_qr_s16 : MVE_VQDMULL_qr_halves<"s16", 0b0>; -defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<"s32", 0b1>; +defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<"s32", 0b1, "@earlyclobber $Qd">; class MVE_VxADDSUB_qr bits_21_20, bit subtract, list pattern=[]> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = bit_28; let Inst{21-20} = bits_21_20; @@ -3351,6 +3763,7 @@ class MVE_VxADDSUB_qr; @@ -3388,6 +3801,7 @@ class MVE_VxSHL_qr size, let Inst{12-8} = 0b11110; let Inst{7} = bit_7; let Inst{6-4} = 0b110; + let validForTailPredication = 1; } multiclass MVE_VxSHL_qr_types { @@ -3421,7 +3835,7 @@ let Predicates = [HasMVEInt] in { } class MVE_VBRSR size, list pattern=[]> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = 0b1; let Inst{21-20} = size; @@ -3429,15 +3843,27 @@ class MVE_VBRSR size, list pattern=[]> let Inst{12} = 0b1; let Inst{8} = 0b0; let Inst{5} = 0b1; + let validForTailPredication = 1; } def MVE_VBRSR8 : MVE_VBRSR<"vbrsr", "8", 0b00>; def MVE_VBRSR16 : MVE_VBRSR<"vbrsr", "16", 0b01>; def MVE_VBRSR32 : MVE_VBRSR<"vbrsr", "32", 0b10>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 ( bitreverse (v16i8 MQPR:$val1))), + (v16i8 ( MVE_VBRSR8 (v16i8 MQPR:$val1), (t2MOVi (i32 8)) ))>; + + def : Pat<(v4i32 ( bitreverse (v4i32 MQPR:$val1))), + (v4i32 ( MVE_VBRSR32 (v4i32 MQPR:$val1), (t2MOVi (i32 32)) ))>; + + def : Pat<(v8i16 ( bitreverse (v8i16 MQPR:$val1))), + (v8i16 ( MVE_VBRSR16 (v8i16 MQPR:$val1), (t2MOVi (i32 16)) ))>; +} + class MVE_VMUL_qr_int size, list pattern=[]> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = 0b0; let Inst{21-20} = size; @@ -3445,15 +3871,25 @@ class MVE_VMUL_qr_int; def MVE_VMUL_qr_i16 : MVE_VMUL_qr_int<"vmul", "i16", 0b01>; def MVE_VMUL_qr_i32 : MVE_VMUL_qr_int<"vmul", "i32", 0b10>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (mul (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))), + (v16i8 (MVE_VMUL_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>; + def : Pat<(v8i16 (mul (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))), + (v8i16 (MVE_VMUL_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>; + def : Pat<(v4i32 (mul (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))), + (v4i32 (MVE_VMUL_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>; +} + class MVE_VxxMUL_qr bits_21_20, list pattern=[]> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = bit_28; let Inst{21-20} = bits_21_20; @@ -3471,14 +3907,14 @@ def MVE_VQRDMULH_qr_s8 : MVE_VxxMUL_qr<"vqrdmulh", "s8", 0b1, 0b00>; def MVE_VQRDMULH_qr_s16 : MVE_VxxMUL_qr<"vqrdmulh", "s16", 0b1, 0b01>; def MVE_VQRDMULH_qr_s32 : MVE_VxxMUL_qr<"vqrdmulh", "s32", 0b1, 0b10>; -let Predicates = [HasMVEFloat] in { +let Predicates = [HasMVEFloat], validForTailPredication = 1 in { def MVE_VMUL_qr_f16 : MVE_VxxMUL_qr<"vmul", "f16", 0b1, 0b11>; def MVE_VMUL_qr_f32 : MVE_VxxMUL_qr<"vmul", "f32", 0b0, 0b11>; } class MVE_VFMAMLA_qr bits_21_20, bit S, - list pattern=[]> + bit bit_28, bits<2> bits_21_20, bit S, + list pattern=[]> : MVE_qDestSrc_rSrc { let Inst{28} = bit_28; @@ -3487,6 +3923,7 @@ class MVE_VFMAMLA_qr; @@ -3503,6 +3940,21 @@ def MVE_VMLAS_qr_u8 : MVE_VFMAMLA_qr<"vmlas", "u8", 0b1, 0b00, 0b1>; def MVE_VMLAS_qr_u16 : MVE_VFMAMLA_qr<"vmlas", "u16", 0b1, 0b01, 0b1>; def MVE_VMLAS_qr_u32 : MVE_VFMAMLA_qr<"vmlas", "u32", 0b1, 0b10, 0b1>; +let Predicates = [HasMVEInt] in { + def : Pat<(v4i32 (add (v4i32 MQPR:$src1), + (v4i32 (mul (v4i32 MQPR:$src2), + (v4i32 (ARMvdup (i32 rGPR:$x))))))), + (v4i32 (MVE_VMLA_qr_u32 $src1, $src2, $x))>; + def : Pat<(v8i16 (add (v8i16 MQPR:$src1), + (v8i16 (mul (v8i16 MQPR:$src2), + (v8i16 (ARMvdup (i32 rGPR:$x))))))), + (v8i16 (MVE_VMLA_qr_u16 $src1, $src2, $x))>; + def : Pat<(v16i8 (add (v16i8 MQPR:$src1), + (v16i8 (mul (v16i8 MQPR:$src2), + (v16i8 (ARMvdup (i32 rGPR:$x))))))), + (v16i8 (MVE_VMLA_qr_u8 $src1, $src2, $x))>; +} + let Predicates = [HasMVEFloat] in { def MVE_VFMA_qr_f16 : MVE_VFMAMLA_qr<"vfma", "f16", 0b1, 0b11, 0b0>; def MVE_VFMA_qr_f32 : MVE_VFMAMLA_qr<"vfma", "f32", 0b0, 0b11, 0b0>; @@ -3555,6 +4007,7 @@ class MVE_VxDUP size, bit bit_12, let Inst{7} = imm{1}; let Inst{6-1} = 0b110111; let Inst{0} = imm{0}; + let validForTailPredication = 1; } def MVE_VIDUPu8 : MVE_VxDUP<"vidup", "u8", 0b00, 0b0>; @@ -3589,6 +4042,7 @@ class MVE_VxWDUP size, bit bit_12, let Inst{6-4} = 0b110; let Inst{3-1} = Rm{3-1}; let Inst{0} = imm{0}; + let validForTailPredication = 1; } def MVE_VIWDUPu8 : MVE_VxWDUP<"viwdup", "u8", 0b00, 0b0>; @@ -3599,6 +4053,7 @@ def MVE_VDWDUPu8 : MVE_VxWDUP<"vdwdup", "u8", 0b00, 0b1>; def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>; def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>; +let hasSideEffects = 1 in class MVE_VCTP size, list pattern=[]> : MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix, "$Rn", vpred_n, "", pattern> { @@ -3614,6 +4069,7 @@ class MVE_VCTP size, list pattern=[]> let Constraints = ""; let DecoderMethod = "DecodeMveVCTP"; + let validForTailPredication = 1; } def MVE_VCTP8 : MVE_VCTP<"8", 0b00>; @@ -3621,6 +4077,15 @@ def MVE_VCTP16 : MVE_VCTP<"16", 0b01>; def MVE_VCTP32 : MVE_VCTP<"32", 0b10>; def MVE_VCTP64 : MVE_VCTP<"64", 0b11>; +let Predicates = [HasMVEInt] in { + def : Pat<(int_arm_vctp8 rGPR:$Rn), + (v16i1 (MVE_VCTP8 rGPR:$Rn))>; + def : Pat<(int_arm_vctp16 rGPR:$Rn), + (v8i1 (MVE_VCTP16 rGPR:$Rn))>; + def : Pat<(int_arm_vctp32 rGPR:$Rn), + (v4i1 (MVE_VCTP32 rGPR:$Rn))>; +} + // end of mve_qDest_rSrc // start of coproc mov @@ -3863,6 +4328,7 @@ class MVE_VLDRSTR_base size, dag iops, string asm, list patte let Inst{7} = fc{0}; let Inst{4} = 0b0; - let Defs = [VPR, P0]; + let Defs = [VPR]; + let validForTailPredication = 1; } class MVE_VPTt1 size, dag iops> @@ -4177,11 +4644,12 @@ class MVE_VPTt1 size, dag iops> let Inst{5} = Qm{3}; let Inst{3-1} = Qm{2-0}; let Inst{0} = fc{1}; + let validForTailPredication = 1; } class MVE_VPTt1i size> : MVE_VPTt1 { + (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_i:$fc)> { let Inst{12} = 0b0; let Inst{0} = 0b0; } @@ -4192,7 +4660,7 @@ def MVE_VPTv16i8 : MVE_VPTt1i<"i8", 0b00>; class MVE_VPTt1u size> : MVE_VPTt1 { + (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_u:$fc)> { let Inst{12} = 0b0; let Inst{0} = 0b1; } @@ -4203,7 +4671,7 @@ def MVE_VPTv16u8 : MVE_VPTt1u<"u8", 0b00>; class MVE_VPTt1s size> : MVE_VPTt1 { + (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_s:$fc)> { let Inst{12} = 0b1; } @@ -4225,7 +4693,7 @@ class MVE_VPTt2 size, dag iops> class MVE_VPTt2i size> : MVE_VPTt2 { + (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_i:$fc)> { let Inst{12} = 0b0; let Inst{5} = 0b0; } @@ -4236,7 +4704,7 @@ def MVE_VPTv16i8r : MVE_VPTt2i<"i8", 0b00>; class MVE_VPTt2u size> : MVE_VPTt2 { + (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_u:$fc)> { let Inst{12} = 0b0; let Inst{5} = 0b1; } @@ -4247,7 +4715,7 @@ def MVE_VPTv16u8r : MVE_VPTt2u<"u8", 0b00>; class MVE_VPTt2s size> : MVE_VPTt2 { + (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_s:$fc)> { let Inst{12} = 0b1; } @@ -4276,12 +4744,13 @@ class MVE_VPTf pattern= let Inst{7} = fc{0}; let Inst{4} = 0b0; - let Defs = [P0]; + let Defs = [VPR]; let Predicates = [HasMVEFloat]; + let validForTailPredication = 1; } class MVE_VPTft1 - : MVE_VPTf { bits<3> fc; bits<4> Qm; @@ -4296,7 +4765,7 @@ def MVE_VPTv4f32 : MVE_VPTft1<"f32", 0b0>; def MVE_VPTv8f16 : MVE_VPTft1<"f16", 0b1>; class MVE_VPTft2 - : MVE_VPTf { bits<3> fc; bits<4> Rm; @@ -4322,7 +4791,8 @@ def MVE_VPST : MVE_MI<(outs ), (ins vpt_mask:$Mk), NoItinerary, let Unpredictable{7} = 0b1; let Unpredictable{5} = 0b1; - let Defs = [P0]; + let Uses = [VPR]; + let validForTailPredication = 1; } def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary, @@ -4346,6 +4816,7 @@ def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary, let Inst{4} = 0b0; let Inst{3-1} = Qm{2-0}; let Inst{0} = 0b1; + let validForTailPredication = 1; } foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32", @@ -4353,19 +4824,113 @@ foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32", def : MVEInstAlias<"vpsel${vp}." # suffix # "\t$Qd, $Qn, $Qm", (MVE_VPSEL MQPR:$Qd, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; -def MVE_VPNOT : MVE_p<(outs), (ins), NoItinerary, +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), + (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), + (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), + (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + + def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))), + (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))), + (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + + def : Pat<(v16i8 (vselect (v16i8 MQPR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), + (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, + (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), 1)))>; + def : Pat<(v8i16 (vselect (v8i16 MQPR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), + (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, + (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>; + def : Pat<(v4i32 (vselect (v4i32 MQPR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), + (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, + (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>; + + def : Pat<(v8f16 (vselect (v8i16 MQPR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))), + (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, + (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>; + def : Pat<(v4f32 (vselect (v4i32 MQPR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))), + (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, + (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>; + + // Pred <-> Int + def : Pat<(v16i8 (zext (v16i1 VCCR:$pred))), + (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>; + def : Pat<(v8i16 (zext (v8i1 VCCR:$pred))), + (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>; + def : Pat<(v4i32 (zext (v4i1 VCCR:$pred))), + (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>; + + def : Pat<(v16i8 (sext (v16i1 VCCR:$pred))), + (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>; + def : Pat<(v8i16 (sext (v8i1 VCCR:$pred))), + (v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>; + def : Pat<(v4i32 (sext (v4i1 VCCR:$pred))), + (v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>; + + def : Pat<(v16i8 (anyext (v16i1 VCCR:$pred))), + (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>; + def : Pat<(v8i16 (anyext (v8i1 VCCR:$pred))), + (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>; + def : Pat<(v4i32 (anyext (v4i1 VCCR:$pred))), + (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>; + + def : Pat<(v16i1 (trunc (v16i8 MQPR:$v1))), + (v16i1 (MVE_VCMPi32r (v16i8 MQPR:$v1), ZR, 1))>; + def : Pat<(v8i1 (trunc (v8i16 MQPR:$v1))), + (v8i1 (MVE_VCMPi32r (v8i16 MQPR:$v1), ZR, 1))>; + def : Pat<(v4i1 (trunc (v4i32 MQPR:$v1))), + (v4i1 (MVE_VCMPi32r (v4i32 MQPR:$v1), ZR, 1))>; +} + +let Predicates = [HasMVEFloat] in { + // Pred <-> Float + // 112 is 1.0 in float + def : Pat<(v4f32 (uint_to_fp (v4i1 VCCR:$pred))), + (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>; + // 2620 in 1.0 in half + def : Pat<(v8f16 (uint_to_fp (v8i1 VCCR:$pred))), + (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>; + // 240 is -1.0 in float + def : Pat<(v4f32 (sint_to_fp (v4i1 VCCR:$pred))), + (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>; + // 2748 is -1.0 in half + def : Pat<(v8f16 (sint_to_fp (v8i1 VCCR:$pred))), + (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>; + + def : Pat<(v4i1 (fp_to_uint (v4f32 MQPR:$v1))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>; + def : Pat<(v8i1 (fp_to_uint (v8f16 MQPR:$v1))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>; + def : Pat<(v4i1 (fp_to_sint (v4f32 MQPR:$v1))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>; + def : Pat<(v8i1 (fp_to_sint (v8f16 MQPR:$v1))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>; +} + +def MVE_VPNOT : MVE_p<(outs VCCR:$P0), (ins VCCR:$P0_in), NoItinerary, "vpnot", "", "", vpred_n, "", []> { let Inst{31-0} = 0b11111110001100010000111101001101; let Unpredictable{19-17} = 0b111; let Unpredictable{12} = 0b1; let Unpredictable{7} = 0b1; let Unpredictable{5} = 0b1; - let Defs = [P0]; - let Uses = [P0]; let Constraints = ""; + let DecoderMethod = "DecodeMVEVPNOT"; } +let Predicates = [HasMVEInt] in { + def : Pat<(v4i1 (xor (v4i1 VCCR:$pred), (v4i1 (predicate_cast (i32 65535))))), + (v4i1 (MVE_VPNOT (v4i1 VCCR:$pred)))>; + def : Pat<(v8i1 (xor (v8i1 VCCR:$pred), (v8i1 (predicate_cast (i32 65535))))), + (v8i1 (MVE_VPNOT (v8i1 VCCR:$pred)))>; + def : Pat<(v16i1 (xor (v16i1 VCCR:$pred), (v16i1 (predicate_cast (i32 65535))))), + (v16i1 (MVE_VPNOT (v16i1 VCCR:$pred)))>; +} + + class MVE_loltp_start size> : t2LOL<(outs GPRlr:$LR), iops, asm, ops> { bits<4> Rn; @@ -4433,159 +4998,440 @@ def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> { // Patterns //===----------------------------------------------------------------------===// -class MVE_unpred_vector_store_typed + : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7:$addr), + (RegImmInst (Ty MQPR:$val), t2addrmode_imm7:$addr)>; +class MVE_vector_maskedstore_typed + : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7:$addr, VCCR:$pred), + (RegImmInst (Ty MQPR:$val), t2addrmode_imm7:$addr, (i32 1), VCCR:$pred)>; + +multiclass MVE_vector_store { + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; +} + +class MVE_vector_load_typed + : Pat<(Ty (LoadKind t2addrmode_imm7:$addr)), + (Ty (RegImmInst t2addrmode_imm7:$addr))>; +class MVE_vector_maskedload_typed + : Pat<(Ty (LoadKind t2addrmode_imm7:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))), + (Ty (RegImmInst t2addrmode_imm7:$addr, (i32 1), VCCR:$pred))>; + +multiclass MVE_vector_load { + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; +} + +class MVE_vector_offset_store_typed - : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7:$addr), - (RegImmInst (Ty MQPR:$val), t2addrmode_imm7:$addr)>; + : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr), + (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr)>; -multiclass MVE_unpred_vector_store { - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; } -class MVE_unpred_vector_load_typed - : Pat<(Ty (LoadKind t2addrmode_imm7:$addr)), - (Ty (RegImmInst t2addrmode_imm7:$addr))>; +def aligned32_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), + (pre_store node:$val, node:$ptr, node:$offset), [{ + return cast(N)->getAlignment() >= 4; +}]>; +def aligned32_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), + (post_store node:$val, node:$ptr, node:$offset), [{ + return cast(N)->getAlignment() >= 4; +}]>; +def aligned16_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), + (pre_store node:$val, node:$ptr, node:$offset), [{ + return cast(N)->getAlignment() >= 2; +}]>; +def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), + (post_store node:$val, node:$ptr, node:$offset), [{ + return cast(N)->getAlignment() >= 2; +}]>; -multiclass MVE_unpred_vector_load { - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; -} + +def maskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast(N); + return Ld->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def sextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ + return cast(N)->getExtensionType() == ISD::SEXTLOAD; +}]>; +def zextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ + return cast(N)->getExtensionType() == ISD::ZEXTLOAD; +}]>; +def extmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast(N); + EVT ScalarVT = Ld->getMemoryVT().getScalarType(); + return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD; +}]>; +def alignedmaskedload16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast(N); + EVT ScalarVT = Ld->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && Ld->getAlignment() >= 2; +}]>; +def sextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ + return cast(N)->getExtensionType() == ISD::SEXTLOAD; +}]>; +def zextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ + return cast(N)->getExtensionType() == ISD::ZEXTLOAD; +}]>; +def extmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast(N); + EVT ScalarVT = Ld->getMemoryVT().getScalarType(); + return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD; +}]>; +def alignedmaskedload32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast(N); + EVT ScalarVT = Ld->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && Ld->getAlignment() >= 4; +}]>; + +def maskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, node:$pred), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def truncatingmaskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (maskedstore8 node:$val, node:$ptr, node:$pred), [{ + return cast(N)->isTruncatingStore(); +}]>; +def maskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, node:$pred), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; +}]>; + +def truncatingmaskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (maskedstore16 node:$val, node:$ptr, node:$pred), [{ + return cast(N)->isTruncatingStore(); +}]>; +def maskedstore32 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, node:$pred), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4; +}]>; let Predicates = [HasMVEInt, IsLE] in { - defm : MVE_unpred_vector_store; - defm : MVE_unpred_vector_store; - defm : MVE_unpred_vector_store; + // Stores + defm : MVE_vector_store; + defm : MVE_vector_store; + defm : MVE_vector_store; - defm : MVE_unpred_vector_load; - defm : MVE_unpred_vector_load; - defm : MVE_unpred_vector_load; + // Loads + defm : MVE_vector_load; + defm : MVE_vector_load; + defm : MVE_vector_load; - def : Pat<(v16i1 (load t2addrmode_imm7<2>:$addr)), - (v16i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>; - def : Pat<(v8i1 (load t2addrmode_imm7<2>:$addr)), - (v8i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>; - def : Pat<(v4i1 (load t2addrmode_imm7<2>:$addr)), - (v4i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>; + // Pre/post inc stores + defm : MVE_vector_offset_store; + defm : MVE_vector_offset_store; + defm : MVE_vector_offset_store; + defm : MVE_vector_offset_store; + defm : MVE_vector_offset_store; + defm : MVE_vector_offset_store; } let Predicates = [HasMVEInt, IsBE] in { - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; - def : MVE_unpred_vector_store_typed; + // Aligned Stores + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; - def : MVE_unpred_vector_load_typed; + // Aligned Loads + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + + // Other unaligned loads/stores need to go though a VREV + def : Pat<(v2f64 (load t2addrmode_imm7<0>:$addr)), + (v2f64 (MVE_VREV64_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>; + def : Pat<(v2i64 (load t2addrmode_imm7<0>:$addr)), + (v2i64 (MVE_VREV64_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>; + def : Pat<(v4i32 (load t2addrmode_imm7<0>:$addr)), + (v4i32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>; + def : Pat<(v4f32 (load t2addrmode_imm7<0>:$addr)), + (v4f32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>; + def : Pat<(v8i16 (load t2addrmode_imm7<0>:$addr)), + (v8i16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>; + def : Pat<(v8f16 (load t2addrmode_imm7<0>:$addr)), + (v8f16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>; + def : Pat<(store (v2f64 MQPR:$val), t2addrmode_imm7<0>:$addr), + (MVE_VSTRBU8 (MVE_VREV64_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; + def : Pat<(store (v2i64 MQPR:$val), t2addrmode_imm7<0>:$addr), + (MVE_VSTRBU8 (MVE_VREV64_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; + def : Pat<(store (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr), + (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; + def : Pat<(store (v4f32 MQPR:$val), t2addrmode_imm7<0>:$addr), + (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; + def : Pat<(store (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr), + (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; + def : Pat<(store (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr), + (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; + + // Pre/Post inc stores + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; } +let Predicates = [HasMVEInt] in { + // Aligned masked store, shared between LE and BE + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + // Truncating stores + def : Pat<(truncatingmaskedstore8 (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(truncatingmaskedstore8 (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(truncatingmaskedstore16 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr, VCCR:$pred), + (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred)>; + // Aligned masked loads + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; + // Extending masked loads. + def : Pat<(v8i16 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v8i16 NEONimmAllZerosV))), + (v8i16 (MVE_VLDRBS16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRBS32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v8i16 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v8i16 NEONimmAllZerosV))), + (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v8i16 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v8i16 NEONimmAllZerosV))), + (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (sextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRHS32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (zextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (extmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; +} // Widening/Narrowing Loads/Stores +let MinAlignment = 2 in { + def truncstorevi16_align2 : PatFrag<(ops node:$val, node:$ptr), + (truncstorevi16 node:$val, node:$ptr)>; + def post_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncstvi16 node:$val, node:$base, node:$offset)>; + def pre_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncstvi16 node:$val, node:$base, node:$offset)>; +} + let Predicates = [HasMVEInt] in { - def : Pat<(truncstorevi8 (v8i16 MQPR:$val), t2addrmode_imm7<1>:$addr), - (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<1>:$addr)>; - def : Pat<(truncstorevi8 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr), - (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<1>:$addr)>; - def : Pat<(truncstorevi16 (v4i32 MQPR:$val), t2addrmode_imm7<2>:$addr), - (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<2>:$addr)>; + def : Pat<(truncstorevi8 (v8i16 MQPR:$val), taddrmode_imm7<0>:$addr), + (MVE_VSTRB16 MQPR:$val, taddrmode_imm7<0>:$addr)>; + def : Pat<(truncstorevi8 (v4i32 MQPR:$val), taddrmode_imm7<0>:$addr), + (MVE_VSTRB32 MQPR:$val, taddrmode_imm7<0>:$addr)>; + def : Pat<(truncstorevi16_align2 (v4i32 MQPR:$val), taddrmode_imm7<1>:$addr), + (MVE_VSTRH32 MQPR:$val, taddrmode_imm7<1>:$addr)>; + + def : Pat<(post_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(post_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(post_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), + (MVE_VSTRH32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; + + def : Pat<(pre_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB16_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(pre_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(pre_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), + (MVE_VSTRH32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; +} + + +let MinAlignment = 2 in { + def extloadvi16_align2 : PatFrag<(ops node:$ptr), (extloadvi16 node:$ptr)>; + def sextloadvi16_align2 : PatFrag<(ops node:$ptr), (sextloadvi16 node:$ptr)>; + def zextloadvi16_align2 : PatFrag<(ops node:$ptr), (zextloadvi16 node:$ptr)>; } multiclass MVEExtLoad { + string Align, Operand am> { def _Any : Pat<(!cast("v" # DestLanes # "i" # DestElemBits) - (!cast("extloadvi" # SrcElemBits) am:$addr)), + (!cast("extloadvi" # SrcElemBits # Align) am:$addr)), (!cast("MVE_VLDR" # SrcElemType # "U" # DestElemBits) am:$addr)>; def _Z : Pat<(!cast("v" # DestLanes # "i" # DestElemBits) - (!cast("zextloadvi" # SrcElemBits) am:$addr)), + (!cast("zextloadvi" # SrcElemBits # Align) am:$addr)), (!cast("MVE_VLDR" # SrcElemType # "U" # DestElemBits) am:$addr)>; def _S : Pat<(!cast("v" # DestLanes # "i" # DestElemBits) - (!cast("sextloadvi" # SrcElemBits) am:$addr)), + (!cast("sextloadvi" # SrcElemBits # Align) am:$addr)), (!cast("MVE_VLDR" # SrcElemType # "S" # DestElemBits) am:$addr)>; } let Predicates = [HasMVEInt] in { - defm : MVEExtLoad<"4", "32", "8", "B", t2addrmode_imm7<1>>; - defm : MVEExtLoad<"8", "16", "8", "B", t2addrmode_imm7<1>>; - defm : MVEExtLoad<"4", "32", "16", "H", t2addrmode_imm7<2>>; + defm : MVEExtLoad<"4", "32", "8", "B", "", taddrmode_imm7<0>>; + defm : MVEExtLoad<"8", "16", "8", "B", "", taddrmode_imm7<0>>; + defm : MVEExtLoad<"4", "32", "16", "H", "_align2", taddrmode_imm7<1>>; } // Bit convert patterns let Predicates = [HasMVEInt] in { - def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v2i64 MQPR:$src))), (v2f64 MQPR:$src)>; + def : Pat<(v2i64 (bitconvert (v2f64 MQPR:$src))), (v2i64 MQPR:$src)>; - def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v4f32 MQPR:$src))), (v4i32 MQPR:$src)>; + def : Pat<(v4f32 (bitconvert (v4i32 MQPR:$src))), (v4f32 MQPR:$src)>; - def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v8f16 MQPR:$src))), (v8i16 MQPR:$src)>; + def : Pat<(v8f16 (bitconvert (v8i16 MQPR:$src))), (v8f16 MQPR:$src)>; } let Predicates = [IsLE,HasMVEInt] in { - def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v4f32 MQPR:$src))), (v2f64 MQPR:$src)>; + def : Pat<(v2f64 (bitconvert (v4i32 MQPR:$src))), (v2f64 MQPR:$src)>; + def : Pat<(v2f64 (bitconvert (v8f16 MQPR:$src))), (v2f64 MQPR:$src)>; + def : Pat<(v2f64 (bitconvert (v8i16 MQPR:$src))), (v2f64 MQPR:$src)>; + def : Pat<(v2f64 (bitconvert (v16i8 MQPR:$src))), (v2f64 MQPR:$src)>; - def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v4f32 MQPR:$src))), (v2i64 MQPR:$src)>; + def : Pat<(v2i64 (bitconvert (v4i32 MQPR:$src))), (v2i64 MQPR:$src)>; + def : Pat<(v2i64 (bitconvert (v8f16 MQPR:$src))), (v2i64 MQPR:$src)>; + def : Pat<(v2i64 (bitconvert (v8i16 MQPR:$src))), (v2i64 MQPR:$src)>; + def : Pat<(v2i64 (bitconvert (v16i8 MQPR:$src))), (v2i64 MQPR:$src)>; - def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2f64 MQPR:$src))), (v4f32 MQPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2i64 MQPR:$src))), (v4f32 MQPR:$src)>; + def : Pat<(v4f32 (bitconvert (v8f16 MQPR:$src))), (v4f32 MQPR:$src)>; + def : Pat<(v4f32 (bitconvert (v8i16 MQPR:$src))), (v4f32 MQPR:$src)>; + def : Pat<(v4f32 (bitconvert (v16i8 MQPR:$src))), (v4f32 MQPR:$src)>; - def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2f64 MQPR:$src))), (v4i32 MQPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2i64 MQPR:$src))), (v4i32 MQPR:$src)>; + def : Pat<(v4i32 (bitconvert (v8f16 MQPR:$src))), (v4i32 MQPR:$src)>; + def : Pat<(v4i32 (bitconvert (v8i16 MQPR:$src))), (v4i32 MQPR:$src)>; + def : Pat<(v4i32 (bitconvert (v16i8 MQPR:$src))), (v4i32 MQPR:$src)>; - def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>; - def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>; - def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>; - def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>; - def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>; + def : Pat<(v8f16 (bitconvert (v2f64 MQPR:$src))), (v8f16 MQPR:$src)>; + def : Pat<(v8f16 (bitconvert (v2i64 MQPR:$src))), (v8f16 MQPR:$src)>; + def : Pat<(v8f16 (bitconvert (v4f32 MQPR:$src))), (v8f16 MQPR:$src)>; + def : Pat<(v8f16 (bitconvert (v4i32 MQPR:$src))), (v8f16 MQPR:$src)>; + def : Pat<(v8f16 (bitconvert (v16i8 MQPR:$src))), (v8f16 MQPR:$src)>; - def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v2f64 MQPR:$src))), (v8i16 MQPR:$src)>; + def : Pat<(v8i16 (bitconvert (v2i64 MQPR:$src))), (v8i16 MQPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4f32 MQPR:$src))), (v8i16 MQPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4i32 MQPR:$src))), (v8i16 MQPR:$src)>; + def : Pat<(v8i16 (bitconvert (v16i8 MQPR:$src))), (v8i16 MQPR:$src)>; - def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2f64 MQPR:$src))), (v16i8 MQPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2i64 MQPR:$src))), (v16i8 MQPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4f32 MQPR:$src))), (v16i8 MQPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4i32 MQPR:$src))), (v16i8 MQPR:$src)>; + def : Pat<(v16i8 (bitconvert (v8f16 MQPR:$src))), (v16i8 MQPR:$src)>; + def : Pat<(v16i8 (bitconvert (v8i16 MQPR:$src))), (v16i8 MQPR:$src)>; +} + +let Predicates = [IsBE,HasMVEInt] in { + def : Pat<(v2f64 (bitconvert (v4f32 MQPR:$src))), (v2f64 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v2f64 (bitconvert (v4i32 MQPR:$src))), (v2f64 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v2f64 (bitconvert (v8f16 MQPR:$src))), (v2f64 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v2f64 (bitconvert (v8i16 MQPR:$src))), (v2f64 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v2f64 (bitconvert (v16i8 MQPR:$src))), (v2f64 (MVE_VREV64_8 MQPR:$src))>; + + def : Pat<(v2i64 (bitconvert (v4f32 MQPR:$src))), (v2i64 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v2i64 (bitconvert (v4i32 MQPR:$src))), (v2i64 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v2i64 (bitconvert (v8f16 MQPR:$src))), (v2i64 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v2i64 (bitconvert (v8i16 MQPR:$src))), (v2i64 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v2i64 (bitconvert (v16i8 MQPR:$src))), (v2i64 (MVE_VREV64_8 MQPR:$src))>; + + def : Pat<(v4f32 (bitconvert (v2f64 MQPR:$src))), (v4f32 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v4f32 (bitconvert (v2i64 MQPR:$src))), (v4f32 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v4f32 (bitconvert (v8f16 MQPR:$src))), (v4f32 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v4f32 (bitconvert (v8i16 MQPR:$src))), (v4f32 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v4f32 (bitconvert (v16i8 MQPR:$src))), (v4f32 (MVE_VREV32_8 MQPR:$src))>; + + def : Pat<(v4i32 (bitconvert (v2f64 MQPR:$src))), (v4i32 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v4i32 (bitconvert (v2i64 MQPR:$src))), (v4i32 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v4i32 (bitconvert (v8f16 MQPR:$src))), (v4i32 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v4i32 (bitconvert (v8i16 MQPR:$src))), (v4i32 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v4i32 (bitconvert (v16i8 MQPR:$src))), (v4i32 (MVE_VREV32_8 MQPR:$src))>; + + def : Pat<(v8f16 (bitconvert (v2f64 MQPR:$src))), (v8f16 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v8f16 (bitconvert (v2i64 MQPR:$src))), (v8f16 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v8f16 (bitconvert (v4f32 MQPR:$src))), (v8f16 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v8f16 (bitconvert (v4i32 MQPR:$src))), (v8f16 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v8f16 (bitconvert (v16i8 MQPR:$src))), (v8f16 (MVE_VREV16_8 MQPR:$src))>; + + def : Pat<(v8i16 (bitconvert (v2f64 MQPR:$src))), (v8i16 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v8i16 (bitconvert (v2i64 MQPR:$src))), (v8i16 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v8i16 (bitconvert (v4f32 MQPR:$src))), (v8i16 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v8i16 (bitconvert (v4i32 MQPR:$src))), (v8i16 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v8i16 (bitconvert (v16i8 MQPR:$src))), (v8i16 (MVE_VREV16_8 MQPR:$src))>; + + def : Pat<(v16i8 (bitconvert (v2f64 MQPR:$src))), (v16i8 (MVE_VREV64_8 MQPR:$src))>; + def : Pat<(v16i8 (bitconvert (v2i64 MQPR:$src))), (v16i8 (MVE_VREV64_8 MQPR:$src))>; + def : Pat<(v16i8 (bitconvert (v4f32 MQPR:$src))), (v16i8 (MVE_VREV32_8 MQPR:$src))>; + def : Pat<(v16i8 (bitconvert (v4i32 MQPR:$src))), (v16i8 (MVE_VREV32_8 MQPR:$src))>; + def : Pat<(v16i8 (bitconvert (v8f16 MQPR:$src))), (v16i8 (MVE_VREV16_8 MQPR:$src))>; + def : Pat<(v16i8 (bitconvert (v8i16 MQPR:$src))), (v16i8 (MVE_VREV16_8 MQPR:$src))>; } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 806681df102..60ca92e5804 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -15,22 +15,22 @@ // NEON-specific Operands. //===----------------------------------------------------------------------===// def nModImm : Operand { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; } def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } def nImmSplatI8 : Operand { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmSplatI8AsmOperand; } def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } def nImmSplatI16 : Operand { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmSplatI16AsmOperand; } def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } def nImmSplatI32 : Operand { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmSplatI32AsmOperand; } def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; } @@ -43,7 +43,7 @@ def nImmSplatNotI32 : Operand { } def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } def nImmVMOVI32 : Operand { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmVMOVI32AsmOperand; } @@ -62,18 +62,18 @@ class nImmVINVIAsmOperandReplicate } class nImmVMOVIReplicate : Operand { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmVMOVIAsmOperandReplicate; } class nImmVINVIReplicate : Operand { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmVINVIAsmOperandReplicate; } def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } def nImmVMOVI32Neg : Operand { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmVMOVI32NegAsmOperand; } def nImmVMOVF32 : Operand { @@ -82,7 +82,7 @@ def nImmVMOVF32 : Operand { } def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } def nImmSplatI64 : Operand { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmSplatI64AsmOperand; } @@ -478,20 +478,8 @@ def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), // NEON-specific DAG Nodes. //===----------------------------------------------------------------------===// -def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; -def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; - -def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; -def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; -def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; -def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; -def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; -def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; -def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; -def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; -def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; -def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; -def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; +def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; +def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>; // Types for vector shift by immediates. The "SHX" version is for long and // narrow operations where the source and destination vectors have different @@ -559,14 +547,14 @@ def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; def NEONimmAllZerosV: PatLeaf<(ARMvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast(N->getOperand(0)); unsigned EltBits = 0; - uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); + uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits); return (EltBits == 32 && EltVal == 0); }]>; def NEONimmAllOnesV: PatLeaf<(ARMvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast(N->getOperand(0)); unsigned EltBits = 0; - uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); + uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits); return (EltBits == 8 && EltVal == 0xff); }]>; @@ -3326,30 +3314,30 @@ class N2VCvtQ op11_8, bit op7, bit op4, // source operand element sizes of 8, 16 and 32 bits: multiclass N2V_QHS_cmp op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, string opc, string Dt, - string asm, SDNode OpNode> { + string asm, int fc> { // 64-bit vector types. def v8i8 : N2V; + [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), (i32 fc))))]>; def v4i16 : N2V; + [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), (i32 fc))))]>; def v2i32 : N2V; + [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), (i32 fc))))]>; def v2f32 : N2V { + [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), (i32 fc))))]> { let Inst{10} = 1; // overwrite F = 1 } def v4f16 : N2V, + [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), (i32 fc))))]>, Requires<[HasNEON,HasFullFP16]> { let Inst{10} = 1; // overwrite F = 1 } @@ -3358,30 +3346,83 @@ multiclass N2V_QHS_cmp op24_23, bits<2> op21_20, bits<2> op17_16, def v16i8 : N2V; + [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), (i32 fc))))]>; def v8i16 : N2V; + [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), (i32 fc))))]>; def v4i32 : N2V; + [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), (i32 fc))))]>; def v4f32 : N2V { + [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), (i32 fc))))]> { let Inst{10} = 1; // overwrite F = 1 } def v8f16 : N2V, + [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), (i32 fc))))]>, Requires<[HasNEON,HasFullFP16]> { let Inst{10} = 1; // overwrite F = 1 } } +// Neon 3-register comparisons. +class N3VQ_cmp op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, int fc, bit Commutable> + : N3V { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; + let isCommutable = Commutable; +} + +class N3VD_cmp op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, int fc, bit Commutable> + : N3V { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; + let isCommutable = Commutable; +} + +multiclass N3V_QHS_cmp op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, + int fc, bit Commutable = 0> { + // 64-bit vector types. + def v8i8 : N3VD_cmp; + def v4i16 : N3VD_cmp; + def v2i32 : N3VD_cmp; + + // 128-bit vector types. + def v16i8 : N3VQ_cmp; + def v8i16 : N3VQ_cmp; + def v4i32 : N3VQ_cmp; +} + // Neon 2-register vector intrinsics, // element sizes of 8, 16 and 32 bits: @@ -5026,67 +5067,67 @@ def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), // Vector Comparisons. // VCEQ : Vector Compare Equal -defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; -def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, - NEONvceq, 1>; -def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, - NEONvceq, 1>; -def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, - NEONvceq, 1>, +defm VCEQ : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vceq", "i", 0, 1>; +def VCEQfd : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, + 0, 1>; +def VCEQfq : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, + 0, 1>; +def VCEQhd : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, + 0, 1>, Requires<[HasNEON, HasFullFP16]>; -def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, - NEONvceq, 1>, +def VCEQhq : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, + 0, 1>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", - "$Vd, $Vm, #0", NEONvceqz>; + "$Vd, $Vm, #0", 0>; // VCGE : Vector Compare Greater Than or Equal -defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; -defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; -def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, - NEONvcge, 0>; -def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, - NEONvcge, 0>; -def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, - NEONvcge, 0>, +defm VCGEs : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcge", "s", 10, 0>; +defm VCGEu : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcge", "u", 2, 0>; +def VCGEfd : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, + 10, 0>; +def VCGEfq : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, + 10, 0>; +def VCGEhd : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, + 10, 0>, Requires<[HasNEON, HasFullFP16]>; -def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, - NEONvcge, 0>, +def VCGEhq : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, + 10, 0>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", - "$Vd, $Vm, #0", NEONvcgez>; + "$Vd, $Vm, #0", 10>; defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", - "$Vd, $Vm, #0", NEONvclez>; + "$Vd, $Vm, #0", 13>; } // VCGT : Vector Compare Greater Than -defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; -defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; -def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, - NEONvcgt, 0>; -def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, - NEONvcgt, 0>; -def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, - NEONvcgt, 0>, +defm VCGTs : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcgt", "s", 12, 0>; +defm VCGTu : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcgt", "u", 8, 0>; +def VCGTfd : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, + 12, 0>; +def VCGTfq : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, + 12, 0>; +def VCGThd : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, + 12, 0>, Requires<[HasNEON, HasFullFP16]>; -def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, - NEONvcgt, 0>, +def VCGThq : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, + 12, 0>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", - "$Vd, $Vm, #0", NEONvcgtz>; + "$Vd, $Vm, #0", 12>; defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", - "$Vd, $Vm, #0", NEONvcltz>; + "$Vd, $Vm, #0", 11>; } // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index cfeb13c6acb..18bcbda4458 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -565,6 +565,13 @@ let isCall = 1, 4, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, Requires<[IsThumb, IsThumb1Only]>, Sched<[WriteBr]>; + + // Also used for Thumb2 + // push lr before the call + def tBL_PUSHLR : tPseudoInst<(outs), (ins GPRlr:$ra, pred:$p, thumb_bl_target:$func), + 4, IIC_Br, + []>, + Requires<[IsThumb]>, Sched<[WriteBr]>; } let isBranch = 1, isTerminator = 1, isBarrier = 1 in { @@ -592,6 +599,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { [(ARMbrjt tGPR:$target, tjumptable:$jt)]>, Sched<[WriteBrTbl]> { let Size = 2; + let isNotDuplicable = 1; list Predicates = [IsThumb, IsThumb1Only]; } } @@ -1362,6 +1370,12 @@ let hasPostISelHook = 1, Defs = [CPSR] in { [(set tGPR:$Rd, CPSR, (ARMsubc 0, tGPR:$Rn))]>, Requires<[IsThumb1Only]>, Sched<[WriteALU]>; + + def tLSLSri : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rn, imm0_31:$imm5), + 2, IIC_iALUr, + [(set tGPR:$Rd, CPSR, (ARMlsls tGPR:$Rn, imm0_31:$imm5))]>, + Requires<[IsThumb1Only]>, + Sched<[WriteALU]>; } @@ -1465,7 +1479,7 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), // Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them // and make use of the same compressed jump table format as Thumb-2. let Size = 2, isBranch = 1, isTerminator = 1, isBarrier = 1, - isIndirectBranch = 1 in { + isIndirectBranch = 1, isNotDuplicable = 1 in { def tTBB_JT : tPseudoInst<(outs), (ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>, Sched<[WriteBr]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 7cbfaba7a8e..25a45b39fa0 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -45,7 +45,8 @@ def mve_shift_imm : AsmOperandClass { let RenderMethod = "addImmOperands"; let DiagnosticString = "operand must be an immediate in the range [1,32]"; } -def long_shift : Operand { +def long_shift : Operand, + ImmLeaf 0 && Imm <= 32; }]> { let ParserMatchClass = mve_shift_imm; let DecoderMethod = "DecodeLongShiftOperand"; } @@ -2394,6 +2395,23 @@ def : Thumb2DSPPat<(int_arm_qadd(int_arm_qadd rGPR:$Rm, rGPR:$Rm), rGPR:$Rn), def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, (int_arm_qadd rGPR:$Rn, rGPR:$Rn)), (t2QDSUB rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(saddsat rGPR:$Rm, rGPR:$Rn), + (t2QADD rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ssubsat rGPR:$Rm, rGPR:$Rn), + (t2QSUB rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(saddsat(saddsat rGPR:$Rm, rGPR:$Rm), rGPR:$Rn), + (t2QDADD rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ssubsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)), + (t2QDSUB rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn), + (t2QADD8 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn), + (t2QSUB8 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMqadd16b rGPR:$Rm, rGPR:$Rn), + (t2QADD16 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMqsub16b rGPR:$Rm, rGPR:$Rn), + (t2QSUB16 rGPR:$Rm, rGPR:$Rn)>; + // Signed/Unsigned add/subtract def t2SASX : T2I_pam_intrinsics<0b010, 0b0000, "sasx", int_arm_sasx>; @@ -4085,7 +4103,7 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp), // Pseudo isntruction that combines movs + predicated rsbmi // to implement integer ABS -let usesCustomInserter = 1, Defs = [CPSR] in { +let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in { def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src), NoItinerary, []>, Requires<[IsThumb2]>; } @@ -4175,15 +4193,15 @@ multiclass t2LdStCop op31_28, bit load, bit Dbit, string asm, list } let DecoderNamespace = "Thumb2CoProc" in { -defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; -defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; +defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; +defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l", [(int_arm_ldc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; -defm t2STC : t2LdStCop<0b1110, 0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; -defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; +defm t2STC : t2LdStCop<0b1110, 0, 0, "stc", [(int_arm_stc timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl timm:$cop, timm:$CRd, addrmode5:$addr)]>; +defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; +defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; } @@ -4368,8 +4386,8 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0, (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>, + [(int_arm_mcr timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn, + timm:$CRm, timm:$opc2)]>, ComplexDeprecationPredicate<"MCR">; def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", (t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, @@ -4377,8 +4395,8 @@ def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0, (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]> { + [(int_arm_mcr2 timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn, + timm:$CRm, timm:$opc2)]> { let Predicates = [IsThumb2, PreV8]; } def : t2InstAlias<"mcr2${p} $cop, $opc1, $Rt, $CRn, $CRm", @@ -4402,24 +4420,24 @@ def : t2InstAlias<"mrc2${p} $cop, $opc1, $Rt, $CRn, $CRm", (t2MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0, pred:$p)>; -def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), - (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; +def : T2v6Pat<(int_arm_mrc timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2), + (t2MRC p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>; -def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), - (t2MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; +def : T2v6Pat<(int_arm_mrc2 timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2), + (t2MRC2 p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>; /* from ARM core register to coprocessor */ def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), - [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2, - imm:$CRm)]>; + [(int_arm_mcrr timm:$cop, timm:$opc1, GPR:$Rt, GPR:$Rt2, + timm:$CRm)]>; def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), - [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt, - GPR:$Rt2, imm:$CRm)]> { + [(int_arm_mcrr2 timm:$cop, timm:$opc1, GPR:$Rt, + GPR:$Rt2, timm:$CRm)]> { let Predicates = [IsThumb2, PreV8]; } @@ -4439,8 +4457,8 @@ def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1, (outs GPR:$Rt, GPR:$Rt2), def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", - [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, - imm:$CRm, imm:$opc2)]> { + [(int_arm_cdp timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn, + timm:$CRm, timm:$opc2)]> { let Inst{27-24} = 0b1110; bits<4> opc1; @@ -4465,8 +4483,8 @@ def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), "cdp2", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", - [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, - imm:$CRm, imm:$opc2)]> { + [(int_arm_cdp2 timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn, + timm:$CRm, timm:$opc2)]> { let Inst{27-24} = 0b1110; bits<4> opc1; @@ -5087,6 +5105,7 @@ def t2BF_LabelPseudo : t2PseudoInst<(outs ), (ins pclabel:$cp), 0, NoItinerary, []> { let isTerminator = 1; let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB]; + let hasNoSchedulingInfo = 1; } def t2BFi : t2BF<(ins bflabel_u4:$b_label, bflabel_s16:$label, pred:$p), @@ -5217,11 +5236,13 @@ def t2LoopDec : t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size), 4, IIC_Br, []>, Sched<[WriteBr]>; -let isBranch = 1, isTerminator = 1, hasSideEffects = 1 in { +let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in { +// Set WhileLoopStart and LoopEnd to occupy 8 bytes because they may +// get converted into t2CMP and t2Bcc. def t2WhileLoopStart : t2PseudoInst<(outs), (ins rGPR:$elts, brtarget:$target), - 4, IIC_Br, []>, + 8, IIC_Br, []>, Sched<[WriteBr]>; def t2LoopEnd : @@ -5233,7 +5254,7 @@ def t2LoopEnd : } // end isNotDuplicable class CS opcode, list pattern=[]> - : V8_1MI<(outs rGPR:$Rd), (ins GPRwithZR:$Rn, GPRwithZRnosp:$Rm, pred_noal:$fcond), + : V8_1MI<(outs rGPR:$Rd), (ins GPRwithZRnosp:$Rn, GPRwithZRnosp:$Rm, pred_noal:$fcond), AddrModeNone, NoItinerary, iname, "$Rd, $Rn, $Rm, $fcond", "", pattern> { bits<4> Rd; bits<4> Rm; @@ -5255,6 +5276,25 @@ def t2CSINC : CS<"csinc", 0b1001>; def t2CSINV : CS<"csinv", 0b1010>; def t2CSNEG : CS<"csneg", 0b1011>; +let Predicates = [HasV8_1MMainline] in { + def : T2Pat<(ARMcsinc GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm), + (t2CSINC GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>; + def : T2Pat<(ARMcsinv GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm), + (t2CSINV GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>; + def : T2Pat<(ARMcsneg GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm), + (t2CSNEG GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>; + + multiclass ModifiedV8_1CSEL { + def : T2Pat<(ARMcmov modvalue, GPRwithZR:$tval, cmovpred:$imm), + (Insn GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>; + def : T2Pat<(ARMcmov GPRwithZR:$tval, modvalue, cmovpred:$imm), + (Insn GPRwithZR:$tval, GPRwithZR:$fval, + (i32 (inv_cond_XFORM imm:$imm)))>; + } + defm : ModifiedV8_1CSEL; + defm : ModifiedV8_1CSEL; + defm : ModifiedV8_1CSEL; +} // CS aliases. let Predicates = [HasV8_1MMainline] in { diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index a0dd25de07e..fdd961bfbb2 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -def SDT_CMPFP0 : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisVT<1, i32>]>; +def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>; def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, @@ -19,7 +19,7 @@ def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, def SDT_VMOVSR : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>; def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>; -def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMFCmp, [SDNPOutGlue]>; +def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>; def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>; def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>; @@ -324,7 +324,7 @@ defm : VFPDTAnyInstAlias<"vpop${p}", "$r", // However, there is no UAL syntax for them, so we keep them around for // (dis)assembly only. multiclass vfp_ldstx_mult { - let Predicates = [HasFPRegs] in { + let Predicates = [HasFPRegs], hasNoSchedulingInfo = 1 in { // Unknown precision def XIA : AXXI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), @@ -548,12 +548,12 @@ let Defs = [FPSCR_NZCV] in { def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$Dd, DPR:$Dm), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", - [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 1))]>; + [/* For disassembly only; pattern left blank */]>; def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", - [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 1))]> { + [/* For disassembly only; pattern left blank */]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -562,17 +562,17 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins HPR:$Sd, HPR:$Sm), IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm", - [(arm_cmpfp HPR:$Sd, HPR:$Sm, (i32 1))]>; + [/* For disassembly only; pattern left blank */]>; def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$Dd, DPR:$Dm), IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", - [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 0))]>; + [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>; def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", - [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 0))]> { + [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -581,7 +581,7 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins HPR:$Sd, HPR:$Sm), IIC_fpCMP16, "vcmp", ".f16\t$Sd, $Sm", - [(arm_cmpfp HPR:$Sd, HPR:$Sm, (i32 0))]>; + [(arm_cmpfp HPR:$Sd, HPR:$Sm)]>; } // Defs = [FPSCR_NZCV] //===----------------------------------------------------------------------===// @@ -611,7 +611,7 @@ let Defs = [FPSCR_NZCV] in { def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$Dd), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", - [(arm_cmpfp0 (f64 DPR:$Dd), (i32 1))]> { + [/* For disassembly only; pattern left blank */]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -619,7 +619,7 @@ def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins SPR:$Sd), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", - [(arm_cmpfp0 SPR:$Sd, (i32 1))]> { + [/* For disassembly only; pattern left blank */]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -631,7 +631,7 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins HPR:$Sd), IIC_fpCMP16, "vcmpe", ".f16\t$Sd, #0", - [(arm_cmpfp0 HPR:$Sd, (i32 1))]> { + [/* For disassembly only; pattern left blank */]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -639,7 +639,7 @@ def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins DPR:$Dd), IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", - [(arm_cmpfp0 (f64 DPR:$Dd), (i32 0))]> { + [(arm_cmpfp0 (f64 DPR:$Dd))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -647,7 +647,7 @@ def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins SPR:$Sd), IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", - [(arm_cmpfp0 SPR:$Sd, (i32 0))]> { + [(arm_cmpfp0 SPR:$Sd)]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -659,7 +659,7 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins HPR:$Sd), IIC_fpCMP16, "vcmp", ".f16\t$Sd, #0", - [(arm_cmpfp0 HPR:$Sd, (i32 0))]> { + [(arm_cmpfp0 HPR:$Sd)]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -1732,7 +1732,8 @@ def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0, def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), - IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []> { + IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1740,7 +1741,8 @@ def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0, def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), - IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []> { + IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1748,7 +1750,8 @@ def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1, def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), - IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []> { + IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -2297,6 +2300,8 @@ class MovFromVFP opc19_16, dag oops, dag iops, string opc, string asm, let Inst{6-5} = 0b00; let Inst{4} = 1; let Inst{3-0} = 0b0000; + let Unpredictable{7-5} = 0b111; + let Unpredictable{3-0} = 0b1111; } let DecoderMethod = "DecodeForVMRSandVMSR" in { @@ -2370,63 +2375,65 @@ class MovToVFP opc19_16, dag oops, dag iops, string opc, string asm, VFPAI { // Instruction operand. - bits<4> src; - - // Encode instruction operand. - let Inst{15-12} = src; + bits<4> Rt; let Inst{27-20} = 0b11101110; let Inst{19-16} = opc19_16; + let Inst{15-12} = Rt; let Inst{11-8} = 0b1010; let Inst{7} = 0; + let Inst{6-5} = 0b00; let Inst{4} = 1; + let Inst{3-0} = 0b0000; let Predicates = [HasVFP2]; + let Unpredictable{7-5} = 0b111; + let Unpredictable{3-0} = 0b1111; } let DecoderMethod = "DecodeForVMRSandVMSR" in { let Defs = [FPSCR] in { let Predicates = [HasFPRegs] in // Application level GPR -> FPSCR - def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPRnopc:$src), - "vmsr", "\tfpscr, $src", - [(int_arm_set_fpscr GPRnopc:$src)]>; + def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPRnopc:$Rt), + "vmsr", "\tfpscr, $Rt", + [(int_arm_set_fpscr GPRnopc:$Rt)]>; // System level GPR -> FPEXC - def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPRnopc:$src), - "vmsr", "\tfpexc, $src", []>; + def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPRnopc:$Rt), + "vmsr", "\tfpexc, $Rt", []>; // System level GPR -> FPSID - def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPRnopc:$src), - "vmsr", "\tfpsid, $src", []>; - def VMSR_FPINST : MovToVFP<0b1001 /* fpinst */, (outs), (ins GPRnopc:$src), - "vmsr", "\tfpinst, $src", []>; - def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPRnopc:$src), - "vmsr", "\tfpinst2, $src", []>; + def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPRnopc:$Rt), + "vmsr", "\tfpsid, $Rt", []>; + def VMSR_FPINST : MovToVFP<0b1001 /* fpinst */, (outs), (ins GPRnopc:$Rt), + "vmsr", "\tfpinst, $Rt", []>; + def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPRnopc:$Rt), + "vmsr", "\tfpinst2, $Rt", []>; } let Predicates = [HasV8_1MMainline, Has8MSecExt] in { // System level GPR -> FPSCR with context saving for security extensions - def VMSR_FPCXTNS : MovToVFP<0b1110 /* fpcxtns */, (outs), (ins GPR:$src), - "vmsr", "\tfpcxtns, $src", []>; + def VMSR_FPCXTNS : MovToVFP<0b1110 /* fpcxtns */, (outs), (ins GPR:$Rt), + "vmsr", "\tfpcxtns, $Rt", []>; } let Predicates = [HasV8_1MMainline, Has8MSecExt] in { // System level GPR -> FPSCR with context saving for security extensions - def VMSR_FPCXTS : MovToVFP<0b1111 /* fpcxts */, (outs), (ins GPR:$src), - "vmsr", "\tfpcxts, $src", []>; + def VMSR_FPCXTS : MovToVFP<0b1111 /* fpcxts */, (outs), (ins GPR:$Rt), + "vmsr", "\tfpcxts, $Rt", []>; } let Predicates = [HasV8_1MMainline, HasFPRegs] in { // System level GPR -> FPSCR_NZCVQC def VMSR_FPSCR_NZCVQC : MovToVFP<0b0010 /* fpscr_nzcvqc */, - (outs cl_FPSCR_NZCV:$fpscr_out), (ins GPR:$src), - "vmsr", "\tfpscr_nzcvqc, $src", []>; + (outs cl_FPSCR_NZCV:$fpscr_out), (ins GPR:$Rt), + "vmsr", "\tfpscr_nzcvqc, $Rt", []>; } let Predicates = [HasV8_1MMainline, HasMVEInt] in { // System level GPR -> VPR/P0 let Defs = [VPR] in - def VMSR_VPR : MovToVFP<0b1100 /* vpr */, (outs), (ins GPR:$src), - "vmsr", "\tvpr, $src", []>; + def VMSR_VPR : MovToVFP<0b1100 /* vpr */, (outs), (ins GPR:$Rt), + "vmsr", "\tvpr, $Rt", []>; - def VMSR_P0 : MovToVFP<0b1101 /* p0 */, (outs VCCR:$cond), (ins GPR:$src), - "vmsr", "\tp0, $src", []>; + def VMSR_P0 : MovToVFP<0b1101 /* p0 */, (outs VCCR:$cond), (ins GPR:$Rt), + "vmsr", "\tp0, $Rt", []>; } } @@ -2614,7 +2621,8 @@ def VSCCLRMD : VFPXI<(outs), (ins pred:$p, fp_dreglist_with_vpr:$regs, variable_ let Inst{21-16} = 0b011111; let Inst{15-12} = regs{11-8}; let Inst{11-8} = 0b1011; - let Inst{7-0} = regs{7-0}; + let Inst{7-1} = regs{7-1}; + let Inst{0} = 0; let DecoderMethod = "DecodeVSCCLRM"; diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp index 4485a474a6d..8e5e474c0f5 100644 --- a/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/lib/Target/ARM/ARMInstructionSelector.cpp @@ -34,7 +34,7 @@ public: ARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI, const ARMRegisterBankInfo &RBI); - bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override; + bool select(MachineInstr &I) override; static const char *getName() { return DEBUG_TYPE; } private: @@ -210,8 +210,8 @@ static const TargetRegisterClass *guessRegClass(unsigned Reg, static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) { - unsigned DstReg = I.getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + Register DstReg = I.getOperand(0).getReg(); + if (Register::isPhysicalRegister(DstReg)) return true; const TargetRegisterClass *RC = guessRegClass(DstReg, MRI, TRI, RBI); @@ -236,17 +236,17 @@ static bool selectMergeValues(MachineInstrBuilder &MIB, // We only support G_MERGE_VALUES as a way to stick together two scalar GPRs // into one DPR. - unsigned VReg0 = MIB->getOperand(0).getReg(); + Register VReg0 = MIB->getOperand(0).getReg(); (void)VReg0; assert(MRI.getType(VReg0).getSizeInBits() == 64 && RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::FPRRegBankID && "Unsupported operand for G_MERGE_VALUES"); - unsigned VReg1 = MIB->getOperand(1).getReg(); + Register VReg1 = MIB->getOperand(1).getReg(); (void)VReg1; assert(MRI.getType(VReg1).getSizeInBits() == 32 && RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID && "Unsupported operand for G_MERGE_VALUES"); - unsigned VReg2 = MIB->getOperand(2).getReg(); + Register VReg2 = MIB->getOperand(2).getReg(); (void)VReg2; assert(MRI.getType(VReg2).getSizeInBits() == 32 && RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::GPRRegBankID && @@ -268,17 +268,17 @@ static bool selectUnmergeValues(MachineInstrBuilder &MIB, // We only support G_UNMERGE_VALUES as a way to break up one DPR into two // GPRs. - unsigned VReg0 = MIB->getOperand(0).getReg(); + Register VReg0 = MIB->getOperand(0).getReg(); (void)VReg0; assert(MRI.getType(VReg0).getSizeInBits() == 32 && RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::GPRRegBankID && "Unsupported operand for G_UNMERGE_VALUES"); - unsigned VReg1 = MIB->getOperand(1).getReg(); + Register VReg1 = MIB->getOperand(1).getReg(); (void)VReg1; assert(MRI.getType(VReg1).getSizeInBits() == 32 && RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID && "Unsupported operand for G_UNMERGE_VALUES"); - unsigned VReg2 = MIB->getOperand(2).getReg(); + Register VReg2 = MIB->getOperand(2).getReg(); (void)VReg2; assert(MRI.getType(VReg2).getSizeInBits() == 64 && RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::FPRRegBankID && @@ -833,8 +833,7 @@ void ARMInstructionSelector::renderVFPF64Imm( NewInstBuilder.addImm(FPImmEncoding); } -bool ARMInstructionSelector::select(MachineInstr &I, - CodeGenCoverage &CoverageInfo) const { +bool ARMInstructionSelector::select(MachineInstr &I) { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -851,7 +850,7 @@ bool ARMInstructionSelector::select(MachineInstr &I, using namespace TargetOpcode; - if (selectImpl(I, CoverageInfo)) + if (selectImpl(I, *CoverageInfo)) return true; MachineInstrBuilder MIB{MF, I}; @@ -874,10 +873,10 @@ bool ARMInstructionSelector::select(MachineInstr &I, MIB.addImm(1).add(predOps(ARMCC::AL)).add(condCodeOp()); if (isSExt) { - unsigned SExtResult = I.getOperand(0).getReg(); + Register SExtResult = I.getOperand(0).getReg(); // Use a new virtual register for the result of the AND - unsigned AndResult = MRI.createVirtualRegister(&ARM::GPRRegClass); + Register AndResult = MRI.createVirtualRegister(&ARM::GPRRegClass); I.getOperand(0).setReg(AndResult); auto InsertBefore = std::next(I.getIterator()); @@ -928,7 +927,7 @@ bool ARMInstructionSelector::select(MachineInstr &I, assert(MRI.getType(SrcReg).getSizeInBits() == 64 && "Unsupported size"); assert(MRI.getType(DstReg).getSizeInBits() <= 32 && "Unsupported size"); - unsigned IgnoredBits = MRI.createVirtualRegister(&ARM::GPRRegClass); + Register IgnoredBits = MRI.createVirtualRegister(&ARM::GPRRegClass); auto InsertBefore = std::next(I.getIterator()); auto MovI = BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::VMOVRRD)) @@ -1039,7 +1038,7 @@ bool ARMInstructionSelector::select(MachineInstr &I, case G_FCMP: { assert(STI.hasVFP2Base() && "Can't select fcmp without VFP"); - unsigned OpReg = I.getOperand(2).getReg(); + Register OpReg = I.getOperand(2).getReg(); unsigned Size = MRI.getType(OpReg).getSizeInBits(); if (Size == 64 && !STI.hasFP64()) { @@ -1077,12 +1076,12 @@ bool ARMInstructionSelector::select(MachineInstr &I, case G_STORE: case G_LOAD: { const auto &MemOp = **I.memoperands_begin(); - if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) { + if (MemOp.isAtomic()) { LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n"); return false; } - unsigned Reg = I.getOperand(0).getReg(); + Register Reg = I.getOperand(0).getReg(); unsigned RegBank = RBI.getRegBank(Reg, MRI, TRI)->getID(); LLT ValTy = MRI.getType(Reg); @@ -1097,9 +1096,9 @@ bool ARMInstructionSelector::select(MachineInstr &I, if (ValSize == 1 && NewOpc == Opcodes.STORE8) { // Before storing a 1-bit value, make sure to clear out any unneeded bits. - unsigned OriginalValue = I.getOperand(0).getReg(); + Register OriginalValue = I.getOperand(0).getReg(); - unsigned ValueToStore = MRI.createVirtualRegister(&ARM::GPRRegClass); + Register ValueToStore = MRI.createVirtualRegister(&ARM::GPRRegClass); I.getOperand(0).setReg(ValueToStore); auto InsertBefore = I.getIterator(); @@ -1159,7 +1158,7 @@ bool ARMInstructionSelector::select(MachineInstr &I, case G_PHI: { I.setDesc(TII.get(PHI)); - unsigned DstReg = I.getOperand(0).getReg(); + Register DstReg = I.getOperand(0).getReg(); const TargetRegisterClass *RC = guessRegClass(DstReg, MRI, TRI, RBI); if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) { break; diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index 73a57b297ad..81414e6d76f 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -84,6 +84,8 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT}) .legalForCartesianProduct({s8, s16, s32}, {s1, s8, s16}); + getActionDefinitionsBuilder(G_SEXT_INREG).lower(); + getActionDefinitionsBuilder({G_MUL, G_AND, G_OR, G_XOR}) .legalFor({s32}) .minScalar(0, s32); diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 90a1ce238c3..4a193fed04a 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -509,7 +509,7 @@ void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, Offset = MO.getImm() - WordOffset * getImmScale(Opc); // If storing the base register, it needs to be reset first. - unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); + Register InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) MO.setImm(Offset); @@ -859,7 +859,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { // Determine list of registers and list of implicit super-register defs. for (const MachineInstr *MI : Cand.Instrs) { const MachineOperand &MO = getLoadStoreRegOp(*MI); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); bool IsKill = MO.isKill(); if (IsKill) KilledRegs.insert(Reg); @@ -874,7 +874,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { if (!MO.isReg() || !MO.isDef() || MO.isDead()) continue; assert(MO.isImplicit()); - unsigned DefReg = MO.getReg(); + Register DefReg = MO.getReg(); if (is_contained(ImpDefs, DefReg)) continue; @@ -893,7 +893,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { iterator InsertBefore = std::next(iterator(LatestMI)); MachineBasicBlock &MBB = *LatestMI->getParent(); unsigned Offset = getMemoryOpOffset(*First); - unsigned Base = getLoadStoreBaseOp(*First).getReg(); + Register Base = getLoadStoreBaseOp(*First).getReg(); bool BaseKill = LatestMI->killsRegister(Base); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg); @@ -1005,7 +1005,7 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { const MachineInstr *MI = MemOps[SIndex].MI; int Offset = MemOps[SIndex].Offset; const MachineOperand &PMO = getLoadStoreRegOp(*MI); - unsigned PReg = PMO.getReg(); + Register PReg = PMO.getReg(); unsigned PRegNum = PMO.isUndef() ? std::numeric_limits::max() : TRI->getEncodingValue(PReg); unsigned Latest = SIndex; @@ -1052,7 +1052,7 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { if (NewOffset != Offset + (int)Size) break; const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == ARM::SP || Reg == ARM::PC) break; if (Count == Limit) @@ -1261,7 +1261,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { if (isThumb1) return false; const MachineOperand &BaseOP = MI->getOperand(0); - unsigned Base = BaseOP.getReg(); + Register Base = BaseOP.getReg(); bool BaseKill = BaseOP.isKill(); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); @@ -1387,7 +1387,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { // FIXME: Use LDM/STM with single register instead. if (isThumb1) return false; - unsigned Base = getLoadStoreBaseOp(*MI).getReg(); + Register Base = getLoadStoreBaseOp(*MI).getReg(); bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); unsigned Opcode = MI->getOpcode(); DebugLoc DL = MI->getDebugLoc(); @@ -1512,7 +1512,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { // Behaviour for writeback is undefined if base register is the same as one // of the others. const MachineOperand &BaseOp = MI.getOperand(2); - unsigned Base = BaseOp.getReg(); + Register Base = BaseOp.getReg(); const MachineOperand &Reg0Op = MI.getOperand(0); const MachineOperand &Reg1Op = MI.getOperand(1); if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base) @@ -1655,9 +1655,9 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, return false; const MachineOperand &BaseOp = MI->getOperand(2); - unsigned BaseReg = BaseOp.getReg(); - unsigned EvenReg = MI->getOperand(0).getReg(); - unsigned OddReg = MI->getOperand(1).getReg(); + Register BaseReg = BaseOp.getReg(); + Register EvenReg = MI->getOperand(0).getReg(); + Register OddReg = MI->getOperand(1).getReg(); unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); @@ -1783,8 +1783,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { if (isMemoryOp(*MBBI)) { unsigned Opcode = MBBI->getOpcode(); const MachineOperand &MO = MBBI->getOperand(0); - unsigned Reg = MO.getReg(); - unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); + Register Reg = MO.getReg(); + Register Base = getLoadStoreBaseOp(*MBBI).getReg(); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg); int Offset = getMemoryOpOffset(*MBBI); @@ -2121,7 +2121,7 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineOperand &MO = I->getOperand(j); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (MO.isDef() && TRI->regsOverlap(Reg, Base)) return false; if (Reg != Base && !MemRegs.count(Reg)) @@ -2415,7 +2415,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { int Opc = MI.getOpcode(); bool isLd = isLoadSingle(Opc); - unsigned Base = MI.getOperand(1).getReg(); + Register Base = MI.getOperand(1).getReg(); int Offset = getMemoryOpOffset(MI); bool StopHere = false; auto FindBases = [&] (Base2InstMap &Base2Ops, BaseVec &Bases) { diff --git a/lib/Target/ARM/ARMLowOverheadLoops.cpp b/lib/Target/ARM/ARMLowOverheadLoops.cpp index cedf3bd3c74..e1c5a9c3e22 100644 --- a/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -11,8 +11,7 @@ /// The expectation is that the loop contains three pseudo instructions: /// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop /// form should be in the preheader, whereas the while form should be in the -/// preheaders only predecessor. TODO: Could DoLoopStart get moved into the -/// pre-preheader? +/// preheaders only predecessor. /// - t2LoopDec - placed within in the loop body. /// - t2LoopEnd - the loop latch terminator. /// @@ -35,6 +34,7 @@ using namespace llvm; namespace { class ARMLowOverheadLoops : public MachineFunctionPass { + MachineFunction *MF = nullptr; const ARMBaseInstrInfo *TII = nullptr; MachineRegisterInfo *MRI = nullptr; std::unique_ptr BBUtils = nullptr; @@ -52,17 +52,6 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; - bool ProcessLoop(MachineLoop *ML); - - void RevertWhile(MachineInstr *MI) const; - - void RevertLoopDec(MachineInstr *MI) const; - - void RevertLoopEnd(MachineInstr *MI) const; - - void Expand(MachineLoop *ML, MachineInstr *Start, - MachineInstr *Dec, MachineInstr *End, bool Revert); - MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); @@ -71,36 +60,156 @@ namespace { StringRef getPassName() const override { return ARM_LOW_OVERHEAD_LOOPS_NAME; } + + private: + bool ProcessLoop(MachineLoop *ML); + + MachineInstr * IsSafeToDefineLR(MachineInstr *MI); + + bool RevertNonLoops(); + + void RevertWhile(MachineInstr *MI) const; + + bool RevertLoopDec(MachineInstr *MI, bool AllowFlags = false) const; + + void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const; + + void Expand(MachineLoop *ML, MachineInstr *Start, + MachineInstr *InsertPt, MachineInstr *Dec, + MachineInstr *End, bool Revert); + }; } - + char ARMLowOverheadLoops::ID = 0; INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME, false, false) -bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) { - if (!static_cast(MF.getSubtarget()).hasLOB()) +bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { + const ARMSubtarget &ST = static_cast(mf.getSubtarget()); + if (!ST.hasLOB()) return false; - LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n"); + MF = &mf; + LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n"); auto &MLI = getAnalysis(); - MRI = &MF.getRegInfo(); - TII = static_cast( - MF.getSubtarget().getInstrInfo()); - BBUtils = std::unique_ptr(new ARMBasicBlockUtils(MF)); + MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness); + MRI = &MF->getRegInfo(); + TII = static_cast(ST.getInstrInfo()); + BBUtils = std::unique_ptr(new ARMBasicBlockUtils(*MF)); BBUtils->computeAllBlockSizes(); - BBUtils->adjustBBOffsetsAfter(&MF.front()); + BBUtils->adjustBBOffsetsAfter(&MF->front()); bool Changed = false; for (auto ML : MLI) { if (!ML->getParentLoop()) Changed |= ProcessLoop(ML); } + Changed |= RevertNonLoops(); return Changed; } +static bool IsLoopStart(MachineInstr &MI) { + return MI.getOpcode() == ARM::t2DoLoopStart || + MI.getOpcode() == ARM::t2WhileLoopStart; +} + +template +static MachineInstr* SearchForDef(MachineInstr *Begin, T End, unsigned Reg) { + for(auto &MI : make_range(T(Begin), End)) { + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg) + continue; + return &MI; + } + } + return nullptr; +} + +static MachineInstr* SearchForUse(MachineInstr *Begin, + MachineBasicBlock::iterator End, + unsigned Reg) { + for(auto &MI : make_range(MachineBasicBlock::iterator(Begin), End)) { + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) + continue; + return &MI; + } + } + return nullptr; +} + +// Is it safe to define LR with DLS/WLS? +// LR can defined if it is the operand to start, because it's the same value, +// or if it's going to be equivalent to the operand to Start. +MachineInstr *ARMLowOverheadLoops::IsSafeToDefineLR(MachineInstr *Start) { + + auto IsMoveLR = [](MachineInstr *MI, unsigned Reg) { + return MI->getOpcode() == ARM::tMOVr && + MI->getOperand(0).getReg() == ARM::LR && + MI->getOperand(1).getReg() == Reg && + MI->getOperand(2).getImm() == ARMCC::AL; + }; + + MachineBasicBlock *MBB = Start->getParent(); + unsigned CountReg = Start->getOperand(0).getReg(); + // Walk forward and backward in the block to find the closest instructions + // that define LR. Then also filter them out if they're not a mov lr. + MachineInstr *PredLRDef = SearchForDef(Start, MBB->rend(), ARM::LR); + if (PredLRDef && !IsMoveLR(PredLRDef, CountReg)) + PredLRDef = nullptr; + + MachineInstr *SuccLRDef = SearchForDef(Start, MBB->end(), ARM::LR); + if (SuccLRDef && !IsMoveLR(SuccLRDef, CountReg)) + SuccLRDef = nullptr; + + // We've either found one, two or none mov lr instructions... Now figure out + // if they are performing the equilvant mov that the Start instruction will. + // Do this by scanning forward and backward to see if there's a def of the + // register holding the count value. If we find a suitable def, return it as + // the insert point. Later, if InsertPt != Start, then we can remove the + // redundant instruction. + if (SuccLRDef) { + MachineBasicBlock::iterator End(SuccLRDef); + if (!SearchForDef(Start, End, CountReg)) { + return SuccLRDef; + } else + SuccLRDef = nullptr; + } + if (PredLRDef) { + MachineBasicBlock::reverse_iterator End(PredLRDef); + if (!SearchForDef(Start, End, CountReg)) { + return PredLRDef; + } else + PredLRDef = nullptr; + } + + // We can define LR because LR already contains the same value. + if (Start->getOperand(0).getReg() == ARM::LR) + return Start; + + // We've found no suitable LR def and Start doesn't use LR directly. Can we + // just define LR anyway? + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + LivePhysRegs LiveRegs(*TRI); + LiveRegs.addLiveOuts(*MBB); + + // Not if we've haven't found a suitable mov and LR is live out. + if (LiveRegs.contains(ARM::LR)) + return nullptr; + + // If LR is not live out, we can insert the instruction if nothing else + // uses LR after it. + if (!SearchForUse(Start, MBB->end(), ARM::LR)) + return Start; + + LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find suitable insertion point for" + << " LR\n"); + return nullptr; +} + bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { bool Changed = false; @@ -111,15 +220,10 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML); - auto IsLoopStart = [](MachineInstr &MI) { - return MI.getOpcode() == ARM::t2DoLoopStart || - MI.getOpcode() == ARM::t2WhileLoopStart; - }; - // Search the given block for a loop start instruction. If one isn't found, // and there's only one predecessor block, search that one too. std::function SearchForStart = - [&IsLoopStart, &SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* { + [&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* { for (auto &MI : *MBB) { if (IsLoopStart(MI)) return &MI; @@ -165,41 +269,62 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { Dec = &MI; else if (MI.getOpcode() == ARM::t2LoopEnd) End = &MI; - else if (MI.getDesc().isCall()) + else if (IsLoopStart(MI)) + Start = &MI; + else if (MI.getDesc().isCall()) { // TODO: Though the call will require LE to execute again, does this // mean we should revert? Always executing LE hopefully should be // faster than performing a sub,cmp,br or even subs,br. Revert = true; + LLVM_DEBUG(dbgs() << "ARM Loops: Found call.\n"); + } - if (!Dec) + if (!Dec || End) continue; - // If we find that we load/store LR between LoopDec and LoopEnd, expect - // that the decremented value has been spilled to the stack. Because - // this value isn't actually going to be produced until the latch, by LE, - // we would need to generate a real sub. The value is also likely to be - // reloaded for use of LoopEnd - in which in case we'd need to perform - // an add because it gets negated again by LE! The other option is to - // then generate the other form of LE which doesn't perform the sub. - if (MI.mayLoad() || MI.mayStore()) - Revert = - MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == ARM::LR; + // If we find that LR has been written or read between LoopDec and + // LoopEnd, expect that the decremented value is being used else where. + // Because this value isn't actually going to be produced until the + // latch, by LE, we would need to generate a real sub. The value is also + // likely to be copied/reloaded for use of LoopEnd - in which in case + // we'd need to perform an add because it gets subtracted again by LE! + // The other option is to then generate the other form of LE which doesn't + // perform the sub. + for (auto &MO : MI.operands()) { + if (MI.getOpcode() != ARM::t2LoopDec && MO.isReg() && + MO.getReg() == ARM::LR) { + LLVM_DEBUG(dbgs() << "ARM Loops: Found LR Use/Def: " << MI); + Revert = true; + break; + } + } } if (Dec && End && Revert) break; } + LLVM_DEBUG(if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start; + if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec; + if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;); + if (!Start && !Dec && !End) { LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n"); return Changed; - } if (!(Start && Dec && End)) { - report_fatal_error("Failed to find all loop components"); + } else if (!(Start && Dec && End)) { + LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find all loop components.\n"); + return false; } - if (!End->getOperand(1).isMBB() || - End->getOperand(1).getMBB() != ML->getHeader()) - report_fatal_error("Expected LoopEnd to target Loop Header"); + if (!End->getOperand(1).isMBB()) + report_fatal_error("Expected LoopEnd to target basic block"); + + // TODO Maybe there's cases where the target doesn't have to be the header, + // but for now be safe and revert. + if (End->getOperand(1).getMBB() != ML->getHeader()) { + LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n"); + Revert = true; + } // The WLS and LE instructions have 12-bits for the label offset. WLS // requires a positive offset, while LE uses negative. @@ -216,41 +341,57 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { Revert = true; } - LLVM_DEBUG(dbgs() << "ARM Loops:\n - Found Loop Start: " << *Start - << " - Found Loop Dec: " << *Dec - << " - Found Loop End: " << *End); + MachineInstr *InsertPt = Revert ? nullptr : IsSafeToDefineLR(Start); + if (!InsertPt) { + LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n"); + Revert = true; + } else + LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " << *InsertPt); - Expand(ML, Start, Dec, End, Revert); + Expand(ML, Start, InsertPt, Dec, End, Revert); return true; } // WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a // beq that branches to the exit branch. -// FIXME: Need to check that we're not trashing the CPSR when generating the -// cmp. We could also try to generate a cbz if the value in LR is also in +// TODO: We could also try to generate a cbz if the value in LR is also in // another low register. void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI); MachineBasicBlock *MBB = MI->getParent(); MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri)); - MIB.addReg(ARM::LR); + MIB.add(MI->getOperand(0)); MIB.addImm(0); MIB.addImm(ARMCC::AL); - MIB.addReg(ARM::CPSR); + MIB.addReg(ARM::NoRegister); + + MachineBasicBlock *DestBB = MI->getOperand(1).getMBB(); + unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ? + ARM::tBcc : ARM::t2Bcc; - // TODO: Try to use tBcc instead - MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc)); + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc)); MIB.add(MI->getOperand(1)); // branch target MIB.addImm(ARMCC::EQ); // condition code MIB.addReg(ARM::CPSR); MI->eraseFromParent(); } -// TODO: Check flags so that we can possibly generate a tSubs or tSub. -void ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const { +bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI, + bool AllowFlags) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI); MachineBasicBlock *MBB = MI->getParent(); + + // If nothing uses or defines CPSR between LoopDec and LoopEnd, use a t2SUBS. + bool SetFlags = false; + if (AllowFlags) { + if (auto *Def = SearchForDef(MI, MBB->end(), ARM::CPSR)) { + if (!SearchForUse(MI, MBB->end(), ARM::CPSR) && + Def->getOpcode() == ARM::t2LoopEnd) + SetFlags = true; + } + } + MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri)); MIB.addDef(ARM::LR); @@ -258,28 +399,39 @@ void ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const { MIB.add(MI->getOperand(2)); MIB.addImm(ARMCC::AL); MIB.addReg(0); - MIB.addReg(0); + + if (SetFlags) { + MIB.addReg(ARM::CPSR); + MIB->getOperand(5).setIsDef(true); + } else + MIB.addReg(0); + MI->eraseFromParent(); + return SetFlags; } // Generate a subs, or sub and cmp, and a branch instead of an LE. -// FIXME: Need to check that we're not trashing the CPSR when generating -// the cmp. -void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI) const { +void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI); - // Create cmp MachineBasicBlock *MBB = MI->getParent(); - MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), - TII->get(ARM::t2CMPri)); - MIB.addReg(ARM::LR); - MIB.addImm(0); - MIB.addImm(ARMCC::AL); - MIB.addReg(ARM::CPSR); + // Create cmp + if (!SkipCmp) { + MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), + TII->get(ARM::t2CMPri)); + MIB.addReg(ARM::LR); + MIB.addImm(0); + MIB.addImm(ARMCC::AL); + MIB.addReg(ARM::NoRegister); + } + + MachineBasicBlock *DestBB = MI->getOperand(1).getMBB(); + unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ? + ARM::tBcc : ARM::t2Bcc; - // TODO Try to use tBcc instead. // Create bne - MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc)); + MachineInstrBuilder MIB = + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc)); MIB.add(MI->getOperand(1)); // branch target MIB.addImm(ARMCC::NE); // condition code MIB.addReg(ARM::CPSR); @@ -287,33 +439,13 @@ void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI) const { } void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start, + MachineInstr *InsertPt, MachineInstr *Dec, MachineInstr *End, bool Revert) { - auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) { - // The trip count should already been held in LR since the instructions - // within the loop can only read and write to LR. So, there should be a - // mov to setup the count. WLS/DLS perform this move, so find the original - // and delete it - inserting WLS/DLS in its place. - MachineBasicBlock *MBB = Start->getParent(); - MachineInstr *InsertPt = Start; - for (auto &I : MRI->def_instructions(ARM::LR)) { - if (I.getParent() != MBB) - continue; - - // Always execute. - if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL) - continue; - - // Only handle move reg, if the trip count it will need moving into a reg - // before the setup instruction anyway. - if (!I.getDesc().isMoveReg() || - !I.getOperand(1).isIdenticalTo(Start->getOperand(0))) - continue; - InsertPt = &I; - break; - } - + auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start, + MachineInstr *InsertPt) { + MachineBasicBlock *MBB = InsertPt->getParent(); unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ? ARM::t2DLS : ARM::t2WLS; MachineInstrBuilder MIB = @@ -369,16 +501,54 @@ void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start, RevertWhile(Start); else Start->eraseFromParent(); - RevertLoopDec(Dec); - RevertLoopEnd(End); + bool FlagsAlreadySet = RevertLoopDec(Dec, true); + RevertLoopEnd(End, FlagsAlreadySet); } else { - Start = ExpandLoopStart(ML, Start); + Start = ExpandLoopStart(ML, Start, InsertPt); RemoveDeadBranch(Start); End = ExpandLoopEnd(ML, Dec, End); RemoveDeadBranch(End); } } +bool ARMLowOverheadLoops::RevertNonLoops() { + LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n"); + bool Changed = false; + + for (auto &MBB : *MF) { + SmallVector Starts; + SmallVector Decs; + SmallVector Ends; + + for (auto &I : MBB) { + if (IsLoopStart(I)) + Starts.push_back(&I); + else if (I.getOpcode() == ARM::t2LoopDec) + Decs.push_back(&I); + else if (I.getOpcode() == ARM::t2LoopEnd) + Ends.push_back(&I); + } + + if (Starts.empty() && Decs.empty() && Ends.empty()) + continue; + + Changed = true; + + for (auto *Start : Starts) { + if (Start->getOpcode() == ARM::t2WhileLoopStart) + RevertWhile(Start); + else + Start->eraseFromParent(); + } + for (auto *Dec : Decs) + RevertLoopDec(Dec); + + for (auto *End : Ends) + RevertLoopEnd(End); + } + return Changed; +} + FunctionPass *llvm::createARMLowOverheadLoopsPass() { return new ARMLowOverheadLoops(); } diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index 90c5ad025e5..c92689f4942 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -74,8 +74,8 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, switch (MO.getType()) { default: llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: - // Ignore all non-CPSR implicit register operands. - if (MO.isImplicit() && MO.getReg() != ARM::CPSR) + // Ignore all implicit register operands. + if (MO.isImplicit()) return false; assert(!MO.getSubReg() && "Subregs should be eliminated!"); MCOp = MCOperand::createReg(MO.getReg()); diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 90d794cd27b..bb136e92329 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -16,6 +16,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/Support/ErrorHandling.h" #include @@ -130,6 +131,10 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// The amount the literal pool has been increasedby due to promoted globals. int PromotedGlobalsIncrease = 0; + /// True if r0 will be preserved by a call to this function (e.g. C++ + /// con/destructors). + bool PreservesR0 = false; + public: ARMFunctionInfo() = default; @@ -247,6 +252,9 @@ public: } DenseMap EHPrologueRemappedRegs; + + void setPreservesR0() { PreservesR0 = true; } + bool getPreservesR0() const { return PreservesR0; } }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMParallelDSP.cpp b/lib/Target/ARM/ARMParallelDSP.cpp index 5389d09bf7d..ae5657a0a2c 100644 --- a/lib/Target/ARM/ARMParallelDSP.cpp +++ b/lib/Target/ARM/ARMParallelDSP.cpp @@ -1,4 +1,4 @@ -//===- ParallelDSP.cpp - Parallel DSP Pass --------------------------------===// +//===- ARMParallelDSP.cpp - Parallel DSP Pass -----------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -18,13 +18,11 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopAccessAnalysis.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/NoFolder.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Pass.h" #include "llvm/PassRegistry.h" #include "llvm/PassSupport.h" @@ -45,54 +43,39 @@ static cl::opt DisableParallelDSP("disable-arm-parallel-dsp", cl::Hidden, cl::init(false), cl::desc("Disable the ARM Parallel DSP pass")); +static cl::opt +NumLoadLimit("arm-parallel-dsp-load-limit", cl::Hidden, cl::init(16), + cl::desc("Limit the number of loads analysed")); + namespace { - struct OpChain; - struct BinOpChain; + struct MulCandidate; class Reduction; - using OpChainList = SmallVector, 8>; - using ReductionList = SmallVector; - using ValueList = SmallVector; - using MemInstList = SmallVector; - using PMACPair = std::pair; - using PMACPairList = SmallVector; - using Instructions = SmallVector; - using MemLocList = SmallVector; + using MulCandList = SmallVector, 8>; + using MemInstList = SmallVectorImpl; + using MulPairList = SmallVector, 8>; - struct OpChain { + // 'MulCandidate' holds the multiplication instructions that are candidates + // for parallel execution. + struct MulCandidate { Instruction *Root; - ValueList AllValues; - MemInstList VecLd; // List of all load instructions. - MemInstList Loads; + Value* LHS; + Value* RHS; + bool Exchange = false; bool ReadOnly = true; + bool Paired = false; + SmallVector VecLd; // Container for loads to widen. - OpChain(Instruction *I, ValueList &vl) : Root(I), AllValues(vl) { } - virtual ~OpChain() = default; + MulCandidate(Instruction *I, Value *lhs, Value *rhs) : + Root(I), LHS(lhs), RHS(rhs) { } - void PopulateLoads() { - for (auto *V : AllValues) { - if (auto *Ld = dyn_cast(V)) - Loads.push_back(Ld); - } + bool HasTwoLoadInputs() const { + return isa(LHS) && isa(RHS); } - unsigned size() const { return AllValues.size(); } - }; - - // 'BinOpChain' holds the multiplication instructions that are candidates - // for parallel execution. - struct BinOpChain : public OpChain { - ValueList LHS; // List of all (narrow) left hand operands. - ValueList RHS; // List of all (narrow) right hand operands. - bool Exchange = false; - - BinOpChain(Instruction *I, ValueList &lhs, ValueList &rhs) : - OpChain(I, lhs), LHS(lhs), RHS(rhs) { - for (auto *V : RHS) - AllValues.push_back(V); - } - - bool AreSymmetrical(BinOpChain *Other); + LoadInst *getBaseLoad() const { + return VecLd.front(); + } }; /// Represent a sequence of multiply-accumulate operations with the aim to @@ -100,9 +83,9 @@ namespace { class Reduction { Instruction *Root = nullptr; Value *Acc = nullptr; - OpChainList Muls; - PMACPairList MulPairs; - SmallPtrSet Adds; + MulCandList Muls; + MulPairList MulPairs; + SetVector Adds; public: Reduction() = delete; @@ -112,10 +95,35 @@ namespace { /// Record an Add instruction that is a part of the this reduction. void InsertAdd(Instruction *I) { Adds.insert(I); } - /// Record a BinOpChain, rooted at a Mul instruction, that is a part of - /// this reduction. - void InsertMul(Instruction *I, ValueList &LHS, ValueList &RHS) { - Muls.push_back(make_unique(I, LHS, RHS)); + /// Create MulCandidates, each rooted at a Mul instruction, that is a part + /// of this reduction. + void InsertMuls() { + auto GetMulOperand = [](Value *V) -> Instruction* { + if (auto *SExt = dyn_cast(V)) { + if (auto *I = dyn_cast(SExt->getOperand(0))) + if (I->getOpcode() == Instruction::Mul) + return I; + } else if (auto *I = dyn_cast(V)) { + if (I->getOpcode() == Instruction::Mul) + return I; + } + return nullptr; + }; + + auto InsertMul = [this](Instruction *I) { + Value *LHS = cast(I->getOperand(0))->getOperand(0); + Value *RHS = cast(I->getOperand(1))->getOperand(0); + Muls.push_back(std::make_unique(I, LHS, RHS)); + }; + + for (auto *Add : Adds) { + if (Add == Acc) + continue; + if (auto *Mul = GetMulOperand(Add->getOperand(0))) + InsertMul(Mul); + if (auto *Mul = GetMulOperand(Add->getOperand(1))) + InsertMul(Mul); + } } /// Add the incoming accumulator value, returns true if a value had not @@ -128,9 +136,17 @@ namespace { return true; } - /// Set two BinOpChains, rooted at muls, that can be executed as a single + /// Set two MulCandidates, rooted at muls, that can be executed as a single /// parallel operation. - void AddMulPair(BinOpChain *Mul0, BinOpChain *Mul1) { + void AddMulPair(MulCandidate *Mul0, MulCandidate *Mul1, + bool Exchange = false) { + LLVM_DEBUG(dbgs() << "Pairing:\n" + << *Mul0->Root << "\n" + << *Mul1->Root << "\n"); + Mul0->Paired = true; + Mul1->Paired = true; + if (Exchange) + Mul1->Exchange = true; MulPairs.push_back(std::make_pair(Mul0, Mul1)); } @@ -141,24 +157,40 @@ namespace { /// Return the add instruction which is the root of the reduction. Instruction *getRoot() { return Root; } + bool is64Bit() const { return Root->getType()->isIntegerTy(64); } + + Type *getType() const { return Root->getType(); } + /// Return the incoming value to be accumulated. This maybe null. Value *getAccumulator() { return Acc; } /// Return the set of adds that comprise the reduction. - SmallPtrSetImpl &getAdds() { return Adds; } + SetVector &getAdds() { return Adds; } - /// Return the BinOpChain, rooted at mul instruction, that comprise the + /// Return the MulCandidate, rooted at mul instruction, that comprise the /// the reduction. - OpChainList &getMuls() { return Muls; } + MulCandList &getMuls() { return Muls; } - /// Return the BinOpChain, rooted at mul instructions, that have been + /// Return the MulCandidate, rooted at mul instructions, that have been /// paired for parallel execution. - PMACPairList &getMulPairs() { return MulPairs; } + MulPairList &getMulPairs() { return MulPairs; } /// To finalise, replace the uses of the root with the intrinsic call. void UpdateRoot(Instruction *SMLAD) { Root->replaceAllUsesWith(SMLAD); } + + void dump() { + LLVM_DEBUG(dbgs() << "Reduction:\n"; + for (auto *Add : Adds) + LLVM_DEBUG(dbgs() << *Add << "\n"); + for (auto &Mul : Muls) + LLVM_DEBUG(dbgs() << *Mul->Root << "\n" + << " " << *Mul->LHS << "\n" + << " " << *Mul->RHS << "\n"); + LLVM_DEBUG(if (Acc) dbgs() << "Acc in: " << *Acc << "\n") + ); + } }; class WidenedLoad { @@ -176,13 +208,11 @@ namespace { } }; - class ARMParallelDSP : public LoopPass { + class ARMParallelDSP : public FunctionPass { ScalarEvolution *SE; AliasAnalysis *AA; TargetLibraryInfo *TLI; DominatorTree *DT; - LoopInfo *LI; - Loop *L; const DataLayout *DL; Module *M; std::map LoadPairs; @@ -190,13 +220,12 @@ namespace { std::map> WideLoads; template - bool IsNarrowSequence(Value *V, ValueList &VL); - + bool IsNarrowSequence(Value *V); + bool Search(Value *V, BasicBlock *BB, Reduction &R); bool RecordMemoryOps(BasicBlock *BB); void InsertParallelMACs(Reduction &Reduction); bool AreSequentialLoads(LoadInst *Ld0, LoadInst *Ld1, MemInstList &VecMem); - LoadInst* CreateWideLoad(SmallVectorImpl &Loads, - IntegerType *LoadTy); + LoadInst* CreateWideLoad(MemInstList &Loads, IntegerType *LoadTy); bool CreateParallelPairs(Reduction &R); /// Try to match and generate: SMLAD, SMLADX - Signed Multiply Accumulate @@ -204,60 +233,38 @@ namespace { /// products to a 32-bit accumulate operand. Optionally, the instruction can /// exchange the halfwords of the second operand before performing the /// arithmetic. - bool MatchSMLAD(Loop *L); + bool MatchSMLAD(Function &F); public: static char ID; - ARMParallelDSP() : LoopPass(ID) { } - - bool doInitialization(Loop *L, LPPassManager &LPM) override { - LoadPairs.clear(); - WideLoads.clear(); - return true; - } + ARMParallelDSP() : FunctionPass(ID) { } void getAnalysisUsage(AnalysisUsage &AU) const override { - LoopPass::getAnalysisUsage(AU); + FunctionPass::getAnalysisUsage(AU); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); AU.setPreservesCFG(); } - bool runOnLoop(Loop *TheLoop, LPPassManager &) override { + bool runOnFunction(Function &F) override { if (DisableParallelDSP) return false; - L = TheLoop; + if (skipFunction(F)) + return false; + SE = &getAnalysis().getSE(); AA = &getAnalysis().getAAResults(); - TLI = &getAnalysis().getTLI(); + TLI = &getAnalysis().getTLI(F); DT = &getAnalysis().getDomTree(); - LI = &getAnalysis().getLoopInfo(); auto &TPC = getAnalysis(); - BasicBlock *Header = TheLoop->getHeader(); - if (!Header) - return false; - - // TODO: We assume the loop header and latch to be the same block. - // This is not a fundamental restriction, but lifting this would just - // require more work to do the transformation and then patch up the CFG. - if (Header != TheLoop->getLoopLatch()) { - LLVM_DEBUG(dbgs() << "The loop header is not the loop latch: not " - "running pass ARMParallelDSP\n"); - return false; - } - - if (!TheLoop->getLoopPreheader()) - InsertPreheaderForLoop(L, DT, LI, nullptr, true); - - Function &F = *Header->getParent(); M = F.getParent(); DL = &M->getDataLayout(); @@ -282,17 +289,10 @@ namespace { return false; } - LoopAccessInfo LAI(L, SE, TLI, AA, DT, LI); - LLVM_DEBUG(dbgs() << "\n== Parallel DSP pass ==\n"); LLVM_DEBUG(dbgs() << " - " << F.getName() << "\n\n"); - if (!RecordMemoryOps(Header)) { - LLVM_DEBUG(dbgs() << " - No sequential loads found.\n"); - return false; - } - - bool Changes = MatchSMLAD(L); + bool Changes = MatchSMLAD(F); return Changes; } }; @@ -331,40 +331,14 @@ bool ARMParallelDSP::AreSequentialLoads(LoadInst *Ld0, LoadInst *Ld1, // TODO: we currently only collect i16, and will support i8 later, so that's // why we check that types are equal to MaxBitWidth, and not <= MaxBitWidth. template -bool ARMParallelDSP::IsNarrowSequence(Value *V, ValueList &VL) { - ConstantInt *CInt; - - if (match(V, m_ConstantInt(CInt))) { - // TODO: if a constant is used, it needs to fit within the bit width. - return false; - } - - auto *I = dyn_cast(V); - if (!I) - return false; - - Value *Val, *LHS, *RHS; - if (match(V, m_Trunc(m_Value(Val)))) { - if (cast(I)->getDestTy()->getIntegerBitWidth() == MaxBitWidth) - return IsNarrowSequence(Val, VL); - } else if (match(V, m_Add(m_Value(LHS), m_Value(RHS)))) { - // TODO: we need to implement sadd16/sadd8 for this, which enables to - // also do the rewrite for smlad8.ll, but it is unsupported for now. - return false; - } else if (match(V, m_ZExtOrSExt(m_Value(Val)))) { - if (cast(I)->getSrcTy()->getIntegerBitWidth() != MaxBitWidth) +bool ARMParallelDSP::IsNarrowSequence(Value *V) { + if (auto *SExt = dyn_cast(V)) { + if (SExt->getSrcTy()->getIntegerBitWidth() != MaxBitWidth) return false; - if (match(Val, m_Load(m_Value()))) { - auto *Ld = cast(Val); - - // Check that these load could be paired. - if (!LoadPairs.count(Ld) && !OffsetLoads.count(Ld)) - return false; - - VL.push_back(Val); - VL.push_back(I); - return true; + if (auto *Ld = dyn_cast(SExt->getOperand(0))) { + // Check that this load could be paired. + return LoadPairs.count(Ld) || OffsetLoads.count(Ld); } } return false; @@ -375,6 +349,9 @@ bool ARMParallelDSP::IsNarrowSequence(Value *V, ValueList &VL) { bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) { SmallVector Loads; SmallVector Writes; + LoadPairs.clear(); + WideLoads.clear(); + OrderedBasicBlock OrderedBB(BB); // Collect loads and instruction that may write to memory. For now we only // record loads which are simple, sign-extended and have a single user. @@ -389,21 +366,24 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) { Loads.push_back(Ld); } + if (Loads.empty() || Loads.size() > NumLoadLimit) + return false; + using InstSet = std::set; using DepMap = std::map; DepMap RAWDeps; // Record any writes that may alias a load. const auto Size = LocationSize::unknown(); - for (auto Read : Loads) { - for (auto Write : Writes) { + for (auto Write : Writes) { + for (auto Read : Loads) { MemoryLocation ReadLoc = MemoryLocation(Read->getPointerOperand(), Size); if (!isModOrRefSet(intersectModRef(AA->getModRefInfo(Write, ReadLoc), ModRefInfo::ModRef))) continue; - if (DT->dominates(Write, Read)) + if (OrderedBB.dominates(Write, Read)) RAWDeps[Read].insert(Write); } } @@ -411,17 +391,16 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) { // Check whether there's not a write between the two loads which would // prevent them from being safely merged. auto SafeToPair = [&](LoadInst *Base, LoadInst *Offset) { - LoadInst *Dominator = DT->dominates(Base, Offset) ? Base : Offset; - LoadInst *Dominated = DT->dominates(Base, Offset) ? Offset : Base; + LoadInst *Dominator = OrderedBB.dominates(Base, Offset) ? Base : Offset; + LoadInst *Dominated = OrderedBB.dominates(Base, Offset) ? Offset : Base; if (RAWDeps.count(Dominated)) { InstSet &WritesBefore = RAWDeps[Dominated]; for (auto Before : WritesBefore) { - // We can't move the second load backward, past a write, to merge // with the first load. - if (DT->dominates(Dominator, Before)) + if (OrderedBB.dominates(Dominator, Before)) return false; } } @@ -431,7 +410,7 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) { // Record base, offset load pairs. for (auto *Base : Loads) { for (auto *Offset : Loads) { - if (Base == Offset) + if (Base == Offset || OffsetLoads.count(Offset)) continue; if (AreSequentialAccesses(Base, Offset, *DL, *SE) && @@ -453,7 +432,54 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) { return LoadPairs.size() > 1; } -// Loop Pass that needs to identify integer add/sub reductions of 16-bit vector +// Search recursively back through the operands to find a tree of values that +// form a multiply-accumulate chain. The search records the Add and Mul +// instructions that form the reduction and allows us to find a single value +// to be used as the initial input to the accumlator. +bool ARMParallelDSP::Search(Value *V, BasicBlock *BB, Reduction &R) { + // If we find a non-instruction, try to use it as the initial accumulator + // value. This may have already been found during the search in which case + // this function will return false, signaling a search fail. + auto *I = dyn_cast(V); + if (!I) + return R.InsertAcc(V); + + if (I->getParent() != BB) + return false; + + switch (I->getOpcode()) { + default: + break; + case Instruction::PHI: + // Could be the accumulator value. + return R.InsertAcc(V); + case Instruction::Add: { + // Adds should be adding together two muls, or another add and a mul to + // be within the mac chain. One of the operands may also be the + // accumulator value at which point we should stop searching. + R.InsertAdd(I); + Value *LHS = I->getOperand(0); + Value *RHS = I->getOperand(1); + bool ValidLHS = Search(LHS, BB, R); + bool ValidRHS = Search(RHS, BB, R); + + if (ValidLHS && ValidRHS) + return true; + + return R.InsertAcc(I); + } + case Instruction::Mul: { + Value *MulOp0 = I->getOperand(0); + Value *MulOp1 = I->getOperand(1); + return IsNarrowSequence<16>(MulOp0) && IsNarrowSequence<16>(MulOp1); + } + case Instruction::SExt: + return Search(I->getOperand(0), BB, R); + } + return false; +} + +// The pass needs to identify integer add/sub reductions of 16-bit vector // multiplications. // To use SMLAD: // 1) we first need to find integer add then look for this pattern: @@ -484,88 +510,39 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) { // If loop invariants are used instead of loads, these need to be packed // before the loop begins. // -bool ARMParallelDSP::MatchSMLAD(Loop *L) { - // Search recursively back through the operands to find a tree of values that - // form a multiply-accumulate chain. The search records the Add and Mul - // instructions that form the reduction and allows us to find a single value - // to be used as the initial input to the accumlator. - std::function Search = [&] - (Value *V, Reduction &R) -> bool { - - // If we find a non-instruction, try to use it as the initial accumulator - // value. This may have already been found during the search in which case - // this function will return false, signaling a search fail. - auto *I = dyn_cast(V); - if (!I) - return R.InsertAcc(V); - - switch (I->getOpcode()) { - default: - break; - case Instruction::PHI: - // Could be the accumulator value. - return R.InsertAcc(V); - case Instruction::Add: { - // Adds should be adding together two muls, or another add and a mul to - // be within the mac chain. One of the operands may also be the - // accumulator value at which point we should stop searching. - bool ValidLHS = Search(I->getOperand(0), R); - bool ValidRHS = Search(I->getOperand(1), R); - if (!ValidLHS && !ValidLHS) - return false; - else if (ValidLHS && ValidRHS) { - R.InsertAdd(I); - return true; - } else { - R.InsertAdd(I); - return R.InsertAcc(I); - } - } - case Instruction::Mul: { - Value *MulOp0 = I->getOperand(0); - Value *MulOp1 = I->getOperand(1); - if (isa(MulOp0) && isa(MulOp1)) { - ValueList LHS; - ValueList RHS; - if (IsNarrowSequence<16>(MulOp0, LHS) && - IsNarrowSequence<16>(MulOp1, RHS)) { - R.InsertMul(I, LHS, RHS); - return true; - } - } - return false; - } - case Instruction::SExt: - return Search(I->getOperand(0), R); - } - return false; - }; - +bool ARMParallelDSP::MatchSMLAD(Function &F) { bool Changed = false; - SmallPtrSet AllAdds; - BasicBlock *Latch = L->getLoopLatch(); - for (Instruction &I : reverse(*Latch)) { - if (I.getOpcode() != Instruction::Add) + for (auto &BB : F) { + SmallPtrSet AllAdds; + if (!RecordMemoryOps(&BB)) continue; - if (AllAdds.count(&I)) - continue; + for (Instruction &I : reverse(BB)) { + if (I.getOpcode() != Instruction::Add) + continue; - const auto *Ty = I.getType(); - if (!Ty->isIntegerTy(32) && !Ty->isIntegerTy(64)) - continue; + if (AllAdds.count(&I)) + continue; - Reduction R(&I); - if (!Search(&I, R)) - continue; + const auto *Ty = I.getType(); + if (!Ty->isIntegerTy(32) && !Ty->isIntegerTy(64)) + continue; - if (!CreateParallelPairs(R)) - continue; + Reduction R(&I); + if (!Search(&I, &BB, R)) + continue; - InsertParallelMACs(R); - Changed = true; - AllAdds.insert(R.getAdds().begin(), R.getAdds().end()); + R.InsertMuls(); + LLVM_DEBUG(dbgs() << "After search, Reduction:\n"; R.dump()); + + if (!CreateParallelPairs(R)) + continue; + + InsertParallelMACs(R); + Changed = true; + AllAdds.insert(R.getAdds().begin(), R.getAdds().end()); + } } return Changed; @@ -578,87 +555,57 @@ bool ARMParallelDSP::CreateParallelPairs(Reduction &R) { return false; // Check that the muls operate directly upon sign extended loads. - for (auto &MulChain : R.getMuls()) { - // A mul has 2 operands, and a narrow op consist of sext and a load; thus - // we expect at least 4 items in this operand value list. - if (MulChain->size() < 4) { - LLVM_DEBUG(dbgs() << "Operand list too short.\n"); + for (auto &MulCand : R.getMuls()) { + if (!MulCand->HasTwoLoadInputs()) return false; - } - MulChain->PopulateLoads(); - ValueList &LHS = static_cast(MulChain.get())->LHS; - ValueList &RHS = static_cast(MulChain.get())->RHS; - - // Use +=2 to skip over the expected extend instructions. - for (unsigned i = 0, e = LHS.size(); i < e; i += 2) { - if (!isa(LHS[i]) || !isa(RHS[i])) - return false; - } } - auto CanPair = [&](Reduction &R, BinOpChain *PMul0, BinOpChain *PMul1) { - if (!PMul0->AreSymmetrical(PMul1)) - return false; - + auto CanPair = [&](Reduction &R, MulCandidate *PMul0, MulCandidate *PMul1) { // The first elements of each vector should be loads with sexts. If we // find that its two pairs of consecutive loads, then these can be // transformed into two wider loads and the users can be replaced with // DSP intrinsics. - for (unsigned x = 0; x < PMul0->LHS.size(); x += 2) { - auto *Ld0 = dyn_cast(PMul0->LHS[x]); - auto *Ld1 = dyn_cast(PMul1->LHS[x]); - auto *Ld2 = dyn_cast(PMul0->RHS[x]); - auto *Ld3 = dyn_cast(PMul1->RHS[x]); + auto Ld0 = static_cast(PMul0->LHS); + auto Ld1 = static_cast(PMul1->LHS); + auto Ld2 = static_cast(PMul0->RHS); + auto Ld3 = static_cast(PMul1->RHS); - if (!Ld0 || !Ld1 || !Ld2 || !Ld3) - return false; - - LLVM_DEBUG(dbgs() << "Loads:\n" - << " - " << *Ld0 << "\n" - << " - " << *Ld1 << "\n" - << " - " << *Ld2 << "\n" - << " - " << *Ld3 << "\n"); - - if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd)) { - if (AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) { - LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n"); - R.AddMulPair(PMul0, PMul1); - return true; - } else if (AreSequentialLoads(Ld3, Ld2, PMul1->VecLd)) { - LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n"); - LLVM_DEBUG(dbgs() << " exchanging Ld2 and Ld3\n"); - PMul1->Exchange = true; - R.AddMulPair(PMul0, PMul1); - return true; - } - } else if (AreSequentialLoads(Ld1, Ld0, PMul0->VecLd) && - AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) { + if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd)) { + if (AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) { LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n"); - LLVM_DEBUG(dbgs() << " exchanging Ld0 and Ld1\n"); - LLVM_DEBUG(dbgs() << " and swapping muls\n"); - PMul0->Exchange = true; - // Only the second operand can be exchanged, so swap the muls. - R.AddMulPair(PMul1, PMul0); + R.AddMulPair(PMul0, PMul1); + return true; + } else if (AreSequentialLoads(Ld3, Ld2, PMul1->VecLd)) { + LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n"); + LLVM_DEBUG(dbgs() << " exchanging Ld2 and Ld3\n"); + R.AddMulPair(PMul0, PMul1, true); return true; } + } else if (AreSequentialLoads(Ld1, Ld0, PMul0->VecLd) && + AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) { + LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n"); + LLVM_DEBUG(dbgs() << " exchanging Ld0 and Ld1\n"); + LLVM_DEBUG(dbgs() << " and swapping muls\n"); + // Only the second operand can be exchanged, so swap the muls. + R.AddMulPair(PMul1, PMul0, true); + return true; } return false; }; - OpChainList &Muls = R.getMuls(); + MulCandList &Muls = R.getMuls(); const unsigned Elems = Muls.size(); - SmallPtrSet Paired; for (unsigned i = 0; i < Elems; ++i) { - BinOpChain *PMul0 = static_cast(Muls[i].get()); - if (Paired.count(PMul0->Root)) + MulCandidate *PMul0 = static_cast(Muls[i].get()); + if (PMul0->Paired) continue; for (unsigned j = 0; j < Elems; ++j) { if (i == j) continue; - BinOpChain *PMul1 = static_cast(Muls[j].get()); - if (Paired.count(PMul1->Root)) + MulCandidate *PMul1 = static_cast(Muls[j].get()); + if (PMul1->Paired) continue; const Instruction *Mul0 = PMul0->Root; @@ -668,29 +615,19 @@ bool ARMParallelDSP::CreateParallelPairs(Reduction &R) { assert(PMul0 != PMul1 && "expected different chains"); - if (CanPair(R, PMul0, PMul1)) { - Paired.insert(Mul0); - Paired.insert(Mul1); + if (CanPair(R, PMul0, PMul1)) break; - } } } return !R.getMulPairs().empty(); } - void ARMParallelDSP::InsertParallelMACs(Reduction &R) { - auto CreateSMLADCall = [&](SmallVectorImpl &VecLd0, - SmallVectorImpl &VecLd1, - Value *Acc, bool Exchange, - Instruction *InsertAfter) { + auto CreateSMLAD = [&](LoadInst* WideLd0, LoadInst *WideLd1, + Value *Acc, bool Exchange, + Instruction *InsertAfter) { // Replace the reduction chain with an intrinsic call - IntegerType *Ty = IntegerType::get(M->getContext(), 32); - LoadInst *WideLd0 = WideLoads.count(VecLd0[0]) ? - WideLoads[VecLd0[0]]->getLoad() : CreateWideLoad(VecLd0, Ty); - LoadInst *WideLd1 = WideLoads.count(VecLd1[0]) ? - WideLoads[VecLd1[0]]->getLoad() : CreateWideLoad(VecLd1, Ty); Value* Args[] = { WideLd0, WideLd1, Acc }; Function *SMLAD = nullptr; @@ -704,34 +641,95 @@ void ARMParallelDSP::InsertParallelMACs(Reduction &R) { Intrinsic::getDeclaration(M, Intrinsic::arm_smlald); IRBuilder Builder(InsertAfter->getParent(), - ++BasicBlock::iterator(InsertAfter)); + BasicBlock::iterator(InsertAfter)); Instruction *Call = Builder.CreateCall(SMLAD, Args); NumSMLAD++; return Call; }; - Instruction *InsertAfter = R.getRoot(); + // Return the instruction after the dominated instruction. + auto GetInsertPoint = [this](Value *A, Value *B) { + assert((isa(A) || isa(B)) && + "expected at least one instruction"); + + Value *V = nullptr; + if (!isa(A)) + V = B; + else if (!isa(B)) + V = A; + else + V = DT->dominates(cast(A), cast(B)) ? B : A; + + return &*++BasicBlock::iterator(cast(V)); + }; + Value *Acc = R.getAccumulator(); - if (!Acc) - Acc = ConstantInt::get(IntegerType::get(M->getContext(), 32), 0); - LLVM_DEBUG(dbgs() << "Root: " << *InsertAfter << "\n" - << "Acc: " << *Acc << "\n"); + // For any muls that were discovered but not paired, accumulate their values + // as before. + IRBuilder Builder(R.getRoot()->getParent()); + MulCandList &MulCands = R.getMuls(); + for (auto &MulCand : MulCands) { + if (MulCand->Paired) + continue; + + Instruction *Mul = cast(MulCand->Root); + LLVM_DEBUG(dbgs() << "Accumulating unpaired mul: " << *Mul << "\n"); + + if (R.getType() != Mul->getType()) { + assert(R.is64Bit() && "expected 64-bit result"); + Builder.SetInsertPoint(&*++BasicBlock::iterator(Mul)); + Mul = cast(Builder.CreateSExt(Mul, R.getRoot()->getType())); + } + + if (!Acc) { + Acc = Mul; + continue; + } + + // If Acc is the original incoming value to the reduction, it could be a + // phi. But the phi will dominate Mul, meaning that Mul will be the + // insertion point. + Builder.SetInsertPoint(GetInsertPoint(Mul, Acc)); + Acc = Builder.CreateAdd(Mul, Acc); + } + + if (!Acc) { + Acc = R.is64Bit() ? + ConstantInt::get(IntegerType::get(M->getContext(), 64), 0) : + ConstantInt::get(IntegerType::get(M->getContext(), 32), 0); + } else if (Acc->getType() != R.getType()) { + Builder.SetInsertPoint(R.getRoot()); + Acc = Builder.CreateSExt(Acc, R.getType()); + } + + // Roughly sort the mul pairs in their program order. + OrderedBasicBlock OrderedBB(R.getRoot()->getParent()); + llvm::sort(R.getMulPairs(), [&OrderedBB](auto &PairA, auto &PairB) { + const Instruction *A = PairA.first->Root; + const Instruction *B = PairB.first->Root; + return OrderedBB.dominates(A, B); + }); + + IntegerType *Ty = IntegerType::get(M->getContext(), 32); for (auto &Pair : R.getMulPairs()) { - BinOpChain *PMul0 = Pair.first; - BinOpChain *PMul1 = Pair.second; - LLVM_DEBUG(dbgs() << "Muls:\n" - << "- " << *PMul0->Root << "\n" - << "- " << *PMul1->Root << "\n"); + MulCandidate *LHSMul = Pair.first; + MulCandidate *RHSMul = Pair.second; + LoadInst *BaseLHS = LHSMul->getBaseLoad(); + LoadInst *BaseRHS = RHSMul->getBaseLoad(); + LoadInst *WideLHS = WideLoads.count(BaseLHS) ? + WideLoads[BaseLHS]->getLoad() : CreateWideLoad(LHSMul->VecLd, Ty); + LoadInst *WideRHS = WideLoads.count(BaseRHS) ? + WideLoads[BaseRHS]->getLoad() : CreateWideLoad(RHSMul->VecLd, Ty); - Acc = CreateSMLADCall(PMul0->VecLd, PMul1->VecLd, Acc, PMul1->Exchange, - InsertAfter); - InsertAfter = cast(Acc); + Instruction *InsertAfter = GetInsertPoint(WideLHS, WideRHS); + InsertAfter = GetInsertPoint(InsertAfter, Acc); + Acc = CreateSMLAD(WideLHS, WideRHS, Acc, RHSMul->Exchange, InsertAfter); } R.UpdateRoot(cast(Acc)); } -LoadInst* ARMParallelDSP::CreateWideLoad(SmallVectorImpl &Loads, +LoadInst* ARMParallelDSP::CreateWideLoad(MemInstList &Loads, IntegerType *LoadTy) { assert(Loads.size() == 2 && "currently only support widening two loads"); @@ -758,8 +756,8 @@ LoadInst* ARMParallelDSP::CreateWideLoad(SmallVectorImpl &Loads, return; Source->moveBefore(Sink); - for (auto &U : Source->uses()) - MoveBefore(Source, U.getUser()); + for (auto &Op : Source->operands()) + MoveBefore(Op, Source); }; // Insert the load at the point of the original dominating load. @@ -784,57 +782,30 @@ LoadInst* ARMParallelDSP::CreateWideLoad(SmallVectorImpl &Loads, // Loads[0] needs trunc while Loads[1] needs a lshr and trunc. // TODO: Support big-endian as well. Value *Bottom = IRB.CreateTrunc(WideLoad, Base->getType()); - BaseSExt->setOperand(0, Bottom); + Value *NewBaseSExt = IRB.CreateSExt(Bottom, BaseSExt->getType()); + BaseSExt->replaceAllUsesWith(NewBaseSExt); IntegerType *OffsetTy = cast(Offset->getType()); Value *ShiftVal = ConstantInt::get(LoadTy, OffsetTy->getBitWidth()); Value *Top = IRB.CreateLShr(WideLoad, ShiftVal); Value *Trunc = IRB.CreateTrunc(Top, OffsetTy); - OffsetSExt->setOperand(0, Trunc); + Value *NewOffsetSExt = IRB.CreateSExt(Trunc, OffsetSExt->getType()); + OffsetSExt->replaceAllUsesWith(NewOffsetSExt); + LLVM_DEBUG(dbgs() << "From Base and Offset:\n" + << *Base << "\n" << *Offset << "\n" + << "Created Wide Load:\n" + << *WideLoad << "\n" + << *Bottom << "\n" + << *NewBaseSExt << "\n" + << *Top << "\n" + << *Trunc << "\n" + << *NewOffsetSExt << "\n"); WideLoads.emplace(std::make_pair(Base, - make_unique(Loads, WideLoad))); + std::make_unique(Loads, WideLoad))); return WideLoad; } -// Compare the value lists in Other to this chain. -bool BinOpChain::AreSymmetrical(BinOpChain *Other) { - // Element-by-element comparison of Value lists returning true if they are - // instructions with the same opcode or constants with the same value. - auto CompareValueList = [](const ValueList &VL0, - const ValueList &VL1) { - if (VL0.size() != VL1.size()) { - LLVM_DEBUG(dbgs() << "Muls are mismatching operand list lengths: " - << VL0.size() << " != " << VL1.size() << "\n"); - return false; - } - - const unsigned Pairs = VL0.size(); - - for (unsigned i = 0; i < Pairs; ++i) { - const Value *V0 = VL0[i]; - const Value *V1 = VL1[i]; - const auto *Inst0 = dyn_cast(V0); - const auto *Inst1 = dyn_cast(V1); - - if (!Inst0 || !Inst1) - return false; - - if (Inst0->isSameOperationAs(Inst1)) - continue; - - const APInt *C0, *C1; - if (!(match(V0, m_APInt(C0)) && match(V1, m_APInt(C1)) && C0 == C1)) - return false; - } - - return true; - }; - - return CompareValueList(LHS, Other->LHS) && - CompareValueList(RHS, Other->RHS); -} - Pass *llvm::createARMParallelDSPPass() { return new ARMParallelDSP(); } @@ -842,6 +813,6 @@ Pass *llvm::createARMParallelDSPPass() { char ARMParallelDSP::ID = 0; INITIALIZE_PASS_BEGIN(ARMParallelDSP, "arm-parallel-dsp", - "Transform loops to use DSP intrinsics", false, false) + "Transform functions to use DSP intrinsics", false, false) INITIALIZE_PASS_END(ARMParallelDSP, "arm-parallel-dsp", - "Transform loops to use DSP intrinsics", false, false) + "Transform functions to use DSP intrinsics", false, false) diff --git a/lib/Target/ARM/ARMPredicates.td b/lib/Target/ARM/ARMPredicates.td index 0b6b40de80d..b008d3e2e29 100644 --- a/lib/Target/ARM/ARMPredicates.td +++ b/lib/Target/ARM/ARMPredicates.td @@ -71,7 +71,7 @@ def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">, AssemblerPredicate<"HasV8_5aOps", "armv8.5a">; def NoVFP : Predicate<"!Subtarget->hasVFP2Base()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2Base()">, - AssemblerPredicate<"FeatureVFP2_D16_SP", "VFP2">; + AssemblerPredicate<"FeatureVFP2_SP", "VFP2">; def HasVFP3 : Predicate<"Subtarget->hasVFP3Base()">, AssemblerPredicate<"FeatureVFP3_D16_SP", "VFP3">; def HasVFP4 : Predicate<"Subtarget->hasVFP4Base()">, diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 92ae26b3729..56055a15483 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -180,7 +180,7 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>; // models the APSR when it's accessed by some special instructions. In such cases // it has the same encoding as PC. def CPSR : ARMReg<0, "cpsr">; -def APSR : ARMReg<1, "apsr">; +def APSR : ARMReg<15, "apsr">; def APSR_NZCV : ARMReg<15, "apsr_nzcv">; def SPSR : ARMReg<2, "spsr">; def FPSCR : ARMReg<3, "fpscr">; @@ -486,12 +486,20 @@ def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], // Pseudo-registers representing even-odd pairs of GPRs from R1 to R13/SP. // These are needed by instructions (e.g. ldrexd/strexd) requiring even-odd GPRs. -def Tuples2R : RegisterTuples<[gsub_0, gsub_1], - [(add R0, R2, R4, R6, R8, R10, R12), - (add R1, R3, R5, R7, R9, R11, SP)]>; +def Tuples2Rnosp : RegisterTuples<[gsub_0, gsub_1], + [(add R0, R2, R4, R6, R8, R10), + (add R1, R3, R5, R7, R9, R11)]>; + +def Tuples2Rsp : RegisterTuples<[gsub_0, gsub_1], + [(add R12), (add SP)]>; // Register class representing a pair of even-odd GPRs. -def GPRPair : RegisterClass<"ARM", [untyped], 64, (add Tuples2R)> { +def GPRPair : RegisterClass<"ARM", [untyped], 64, (add Tuples2Rnosp, Tuples2Rsp)> { + let Size = 64; // 2 x 32 bits, we have no predefined type of that size. +} + +// Register class representing a pair of even-odd GPRs, except (R12, SP). +def GPRPairnosp : RegisterClass<"ARM", [untyped], 64, (add Tuples2Rnosp)> { let Size = 64; // 2 x 32 bits, we have no predefined type of that size. } diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 21d32bde471..3f0b71afd97 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -2239,9 +2239,9 @@ def A9WriteLMfpPostRA : SchedWriteVariant<[ // Distinguish between our multiple MI-level forms of the same // VLDM/VSTM instructions. def A9PreRA : SchedPredicate< - "TargetRegisterInfo::isVirtualRegister(MI->getOperand(0).getReg())">; + "Register::isVirtualRegister(MI->getOperand(0).getReg())">; def A9PostRA : SchedPredicate< - "TargetRegisterInfo::isPhysicalRegister(MI->getOperand(0).getReg())">; + "Register::isPhysicalRegister(MI->getOperand(0).getReg())">; // VLDM represents all destination registers as a single register // tuple, unlike LDM. So the number of write operands is not variadic. diff --git a/lib/Target/ARM/ARMScheduleM4.td b/lib/Target/ARM/ARMScheduleM4.td index 38c8ea2b4f3..bfa5fc0d713 100644 --- a/lib/Target/ARM/ARMScheduleM4.td +++ b/lib/Target/ARM/ARMScheduleM4.td @@ -18,6 +18,9 @@ def CortexM4Model : SchedMachineModel { let PostRAScheduler = 1; let CompleteModel = 0; + let UnsupportedFeatures = [IsARM, HasNEON, HasDotProd, HasZCZ, HasMVEInt, + IsNotMClass, HasDPVFP, HasFPARMv8, HasFullFP16, Has8MSecExt, HasV8, + HasV8_3a, HasTrustZone, HasDFB, IsWindows]; } @@ -50,6 +53,7 @@ def : M4UnitL2; def : M4UnitL2; def : M4UnitL2I<(instregex "(t|t2)LDM")>; +def : M4UnitL2I<(instregex "(t|t2)LDR")>; // Stores we use a latency of 1 as they have no outputs @@ -78,9 +82,20 @@ def : M4UnitL1; def : M4UnitL1; def : M4UnitL1I<(instregex "(t|t2)MOV")>; def : M4UnitL1I<(instrs COPY)>; -def : M4UnitL1I<(instregex "t2IT")>; -def : M4UnitL1I<(instregex "t2SEL", "t2USAD8", - "t2(S|Q|SH|U|UQ|UH)(ADD16|ASX|SAX|SUB16|ADD8|SUB8)", "t2USADA8", "(t|t2)REV")>; +def : M4UnitL1I<(instregex "t2IT", "t2MSR", "t2MRS")>; +def : M4UnitL1I<(instregex "t2CLREX")>; +def : M4UnitL1I<(instregex "t2SEL", "t2USAD8", "t2SML[AS]", + "t2(S|Q|SH|U|UQ|UH|QD)(ADD|ASX|SAX|SUB)", "t2USADA8", "(t|t2)REV")>; + +// These instructions are not of much interest to scheduling as they will not +// be generated or it is not very useful to schedule them. They are here to make +// the model more complete. +def : M4UnitL1I<(instregex "t2CDP", "t2LDC", "t2MCR", "t2MRC", "t2MRRC", "t2STC")>; +def : M4UnitL1I<(instregex "tCPS", "t2ISB", "t2DSB", "t2DMB", "t2?HINT$")>; +def : M4UnitL1I<(instregex "t2?UDF$", "tBKPT", "t2DBG")>; +def : M4UnitL1I<(instregex "t?2?Int_eh_sjlj_", "tADDframe", "t?ADJCALL")>; +def : M4UnitL1I<(instregex "CMP_SWAP", "JUMPTABLE", "MEMCPY")>; +def : M4UnitL1I<(instregex "VSETLNi32", "VGETLNi32")>; def : ReadAdvance; def : ReadAdvance; @@ -112,6 +127,9 @@ def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; +def : M4UnitL1I<(instregex "VMOVS", "FCONSTS", "VCMP", "VNEG", "VABS")>; +def : M4UnitL2I<(instregex "VMOVD")>; +def : M4UnitL1I<(instregex "VMRS", "VMSR", "FMSTAT")>; def : ReadAdvance; def : ReadAdvance; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 978faed776b..09603057b2c 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -125,7 +125,7 @@ const CallLowering *ARMSubtarget::getCallLowering() const { return CallLoweringInfo.get(); } -const InstructionSelector *ARMSubtarget::getInstructionSelector() const { +InstructionSelector *ARMSubtarget::getInstructionSelector() const { return InstSelector.get(); } @@ -205,9 +205,9 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { NoARM = true; if (isAAPCS_ABI()) - stackAlignment = 8; + stackAlignment = Align(8); if (isTargetNaCl() || isAAPCS16_ABI()) - stackAlignment = 16; + stackAlignment = Align(16); // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo:: // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as @@ -253,6 +253,10 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (isRWPI()) ReserveR9 = true; + // If MVEVectorCostFactor is still 0 (has not been set to anything else), default it to 2 + if (MVEVectorCostFactor == 0) + MVEVectorCostFactor = 2; + // FIXME: Teach TableGen to deal with these instead of doing it manually here. switch (ARMProcFamily) { case Others: @@ -296,13 +300,15 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { LdStMultipleTiming = SingleIssuePlusExtras; MaxInterleaveFactor = 4; if (!isThumb()) - PrefLoopAlignment = 3; + PrefLoopLogAlignment = 3; break; case Kryo: break; case Krait: PreISelOperandLatencyAdjustment = 1; break; + case NeoverseN1: + break; case Swift: MaxInterleaveFactor = 2; LdStMultipleTiming = SingleIssuePlusExtras; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index c2b0f052b84..ef460342a69 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -71,6 +71,7 @@ protected: Exynos, Krait, Kryo, + NeoverseN1, Swift }; enum ARMProcClassEnum { @@ -179,11 +180,9 @@ protected: bool HasVFPv3SP = false; bool HasVFPv4SP = false; bool HasFPARMv8SP = false; - bool HasVFPv2D16 = false; bool HasVFPv3D16 = false; bool HasVFPv4D16 = false; bool HasFPARMv8D16 = false; - bool HasVFPv2D16SP = false; bool HasVFPv3D16SP = false; bool HasVFPv4D16SP = false; bool HasFPARMv8D16SP = false; @@ -450,7 +449,7 @@ protected: /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. - unsigned stackAlignment = 4; + Align stackAlignment = Align(4); /// CPUString - String name of used CPU. std::string CPUString; @@ -469,7 +468,12 @@ protected: int PreISelOperandLatencyAdjustment = 2; /// What alignment is preferred for loop bodies, in log2(bytes). - unsigned PrefLoopAlignment = 0; + unsigned PrefLoopLogAlignment = 0; + + /// The cost factor for MVE instructions, representing the multiple beats an + // instruction can take. The default is 2, (set in initSubtargetFeatures so + // that we can use subtarget features less than 2). + unsigned MVEVectorCostFactor = 0; /// OptMinSize - True if we're optimising for minimum code size, equal to /// the function attribute. @@ -535,7 +539,7 @@ public: } const CallLowering *getCallLowering() const override; - const InstructionSelector *getInstructionSelector() const override; + InstructionSelector *getInstructionSelector() const override; const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; @@ -600,7 +604,7 @@ public: bool hasARMOps() const { return !NoARM; } - bool hasVFP2Base() const { return HasVFPv2D16SP; } + bool hasVFP2Base() const { return HasVFPv2SP; } bool hasVFP3Base() const { return HasVFPv3D16SP; } bool hasVFP4Base() const { return HasVFPv4D16SP; } bool hasFPARMv8Base() const { return HasFPARMv8D16SP; } @@ -668,6 +672,12 @@ public: bool hasSB() const { return HasSB; } bool genLongCalls() const { return GenLongCalls; } bool genExecuteOnly() const { return GenExecuteOnly; } + bool hasBaseDSP() const { + if (isThumb()) + return hasDSP(); + else + return hasV5TEOps(); + } bool hasFP16() const { return HasFP16; } bool hasD32() const { return HasD32; } @@ -812,7 +822,7 @@ public: /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. - unsigned getStackAlignment() const { return stackAlignment; } + Align getStackAlignment() const { return stackAlignment; } unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } @@ -853,9 +863,9 @@ public: return isROPI() || !isTargetELF(); } - unsigned getPrefLoopAlignment() const { - return PrefLoopAlignment; - } + unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; } + + unsigned getMVEVectorCostFactor() const { return MVEVectorCostFactor; } bool ignoreCSRForAllocationOrder(const MachineFunction &MF, unsigned PhysReg) const override; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 7f0aae1739b..5c8007f101d 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -96,15 +96,16 @@ extern "C" void LLVMInitializeARMTarget() { initializeARMExpandPseudoPass(Registry); initializeThumb2SizeReducePass(Registry); initializeMVEVPTBlockPass(Registry); + initializeMVETailPredicationPass(Registry); initializeARMLowOverheadLoopsPass(Registry); } static std::unique_ptr createTLOF(const Triple &TT) { if (TT.isOSBinFormatMachO()) - return llvm::make_unique(); + return std::make_unique(); if (TT.isOSWindows()) - return llvm::make_unique(); - return llvm::make_unique(); + return std::make_unique(); + return std::make_unique(); } static ARMBaseTargetMachine::ARMABI @@ -282,7 +283,7 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique(TargetTriple, CPU, FS, *this, isLittle, + I = std::make_unique(TargetTriple, CPU, FS, *this, isLittle, F.hasMinSize()); if (!I->isThumb() && !I->hasARMOps()) @@ -447,8 +448,10 @@ bool ARMPassConfig::addPreISel() { MergeExternalByDefault)); } - if (TM->getOptLevel() != CodeGenOpt::None) + if (TM->getOptLevel() != CodeGenOpt::None) { addPass(createHardwareLoopsPass()); + addPass(createMVETailPredicationPass()); + } return false; } diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 2a8ec734a05..86c8684d14d 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -36,8 +36,12 @@ using namespace llvm; #define DEBUG_TYPE "armtti" +static cl::opt EnableMaskedLoadStores( + "enable-arm-maskedldst", cl::Hidden, cl::init(false), + cl::desc("Enable the generation of masked loads and stores")); + static cl::opt DisableLowOverheadLoops( - "disable-arm-loloops", cl::Hidden, cl::init(true), + "disable-arm-loloops", cl::Hidden, cl::init(false), cl::desc("Disable the generation of low-overhead loops")); bool ARMTTIImpl::areInlineCompatible(const Function *Caller, @@ -167,6 +171,42 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, if (!SrcTy.isSimple() || !DstTy.isSimple()) return BaseT::getCastInstrCost(Opcode, Dst, Src); + // The extend of a load is free + if (I && isa(I->getOperand(0))) { + static const TypeConversionCostTblEntry LoadConversionTbl[] = { + {ISD::SIGN_EXTEND, MVT::i32, MVT::i16, 0}, + {ISD::ZERO_EXTEND, MVT::i32, MVT::i16, 0}, + {ISD::SIGN_EXTEND, MVT::i32, MVT::i8, 0}, + {ISD::ZERO_EXTEND, MVT::i32, MVT::i8, 0}, + {ISD::SIGN_EXTEND, MVT::i16, MVT::i8, 0}, + {ISD::ZERO_EXTEND, MVT::i16, MVT::i8, 0}, + {ISD::SIGN_EXTEND, MVT::i64, MVT::i32, 1}, + {ISD::ZERO_EXTEND, MVT::i64, MVT::i32, 1}, + {ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 1}, + {ISD::ZERO_EXTEND, MVT::i64, MVT::i16, 1}, + {ISD::SIGN_EXTEND, MVT::i64, MVT::i8, 1}, + {ISD::ZERO_EXTEND, MVT::i64, MVT::i8, 1}, + }; + if (const auto *Entry = ConvertCostTableLookup( + LoadConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) + return Entry->Cost; + + static const TypeConversionCostTblEntry MVELoadConversionTbl[] = { + {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0}, + {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0}, + {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 0}, + {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 0}, + {ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 0}, + {ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 0}, + }; + if (SrcTy.isVector() && ST->hasMVEIntegerOps()) { + if (const auto *Entry = + ConvertCostTableLookup(MVELoadConversionTbl, ISD, + DstTy.getSimpleVT(), SrcTy.getSimpleVT())) + return Entry->Cost; + } + } + // Some arithmetic, load and store operations have specific instructions // to cast up/down their types automatically at no extra cost. // TODO: Get these tables to know at least what the related operations are. @@ -313,6 +353,31 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, return Entry->Cost; } + // MVE extend costs, taken from codegen tests. i8->i16 or i16->i32 is one + // instruction, i8->i32 is two. i64 zexts are an VAND with a constant, sext + // are linearised so take more. + static const TypeConversionCostTblEntry MVEVectorConversionTbl[] = { + { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 }, + { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i8, 10 }, + { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i8, 2 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 10 }, + { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2 }, + { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 8 }, + { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 2 }, + }; + + if (SrcTy.isVector() && ST->hasMVEIntegerOps()) { + if (const auto *Entry = ConvertCostTableLookup(MVEVectorConversionTbl, + ISD, DstTy.getSimpleVT(), + SrcTy.getSimpleVT())) + return Entry->Cost * ST->getMVEVectorCostFactor(); + } + // Scalar integer conversion costs. static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = { // i16 -> i64 requires two dependent operations. @@ -332,7 +397,10 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, return Entry->Cost; } - return BaseT::getCastInstrCost(Opcode, Dst, Src); + int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy() + ? ST->getMVEVectorCostFactor() + : 1; + return BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src); } int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, @@ -343,8 +411,8 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32) return 3; - if ((Opcode == Instruction::InsertElement || - Opcode == Instruction::ExtractElement)) { + if (ST->hasNEON() && (Opcode == Instruction::InsertElement || + Opcode == Instruction::ExtractElement)) { // Cross-class copies are expensive on many microarchitectures, // so assume they are expensive by default. if (ValTy->getVectorElementType()->isIntegerTy()) @@ -357,6 +425,17 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U); } + if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement || + Opcode == Instruction::ExtractElement)) { + // We say MVE moves costs at least the MVEVectorCostFactor, even though + // they are scalar instructions. This helps prevent mixing scalar and + // vector, to prevent vectorising where we end up just scalarising the + // result anyway. + return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), + ST->getMVEVectorCostFactor()) * + ValTy->getVectorNumElements() / 2; + } + return BaseT::getVectorInstrCost(Opcode, ValTy, Index); } @@ -385,7 +464,10 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, return LT.first; } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + int BaseCost = ST->hasMVEIntegerOps() && ValTy->isVectorTy() + ? ST->getMVEVectorCostFactor() + : 1; + return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, @@ -397,13 +479,37 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, unsigned NumVectorInstToHideOverhead = 10; int MaxMergeDistance = 64; - if (Ty->isVectorTy() && SE && - !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1)) - return NumVectorInstToHideOverhead; + if (ST->hasNEON()) { + if (Ty->isVectorTy() && SE && + !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1)) + return NumVectorInstToHideOverhead; - // In many cases the address computation is not merged into the instruction - // addressing mode. - return 1; + // In many cases the address computation is not merged into the instruction + // addressing mode. + return 1; + } + return BaseT::getAddressComputationCost(Ty, SE, Ptr); +} + +bool ARMTTIImpl::isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment) { + if (!EnableMaskedLoadStores || !ST->hasMVEIntegerOps()) + return false; + + if (auto *VecTy = dyn_cast(DataTy)) { + // Don't support v2i1 yet. + if (VecTy->getNumElements() == 2) + return false; + + // We don't support extending fp types. + unsigned VecWidth = DataTy->getPrimitiveSizeInBits(); + if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy()) + return false; + } + + unsigned EltWidth = DataTy->getScalarSizeInBits(); + return (EltWidth == 32 && (!Alignment || Alignment >= 4)) || + (EltWidth == 16 && (!Alignment || Alignment >= 2)) || + (EltWidth == 8); } int ARMTTIImpl::getMemcpyCost(const Instruction *I) { @@ -442,78 +548,96 @@ int ARMTTIImpl::getMemcpyCost(const Instruction *I) { int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { - if (Kind == TTI::SK_Broadcast) { - static const CostTblEntry NEONDupTbl[] = { - // VDUP handles these cases. - {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1}, + if (ST->hasNEON()) { + if (Kind == TTI::SK_Broadcast) { + static const CostTblEntry NEONDupTbl[] = { + // VDUP handles these cases. + {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}}; + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}}; - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - if (const auto *Entry = CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, - LT.second)) - return LT.first * Entry->Cost; + if (const auto *Entry = + CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, LT.second)) + return LT.first * Entry->Cost; + } + if (Kind == TTI::SK_Reverse) { + static const CostTblEntry NEONShuffleTbl[] = { + // Reverse shuffle cost one instruction if we are shuffling within a + // double word (vrev) or two if we shuffle a quad word (vrev, vext). + {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1}, - return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}}; + + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + + if (const auto *Entry = + CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) + return LT.first * Entry->Cost; + } + if (Kind == TTI::SK_Select) { + static const CostTblEntry NEONSelShuffleTbl[] = { + // Select shuffle cost table for ARM. Cost is the number of + // instructions + // required to create the shuffled vector. + + {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, + + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2}, + + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16}, + + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}}; + + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl, + ISD::VECTOR_SHUFFLE, LT.second)) + return LT.first * Entry->Cost; + } } - if (Kind == TTI::SK_Reverse) { - static const CostTblEntry NEONShuffleTbl[] = { - // Reverse shuffle cost one instruction if we are shuffling within a - // double word (vrev) or two if we shuffle a quad word (vrev, vext). - {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1}, + if (ST->hasMVEIntegerOps()) { + if (Kind == TTI::SK_Broadcast) { + static const CostTblEntry MVEDupTbl[] = { + // VDUP handles these cases. + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v8f16, 1}}; - {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, - {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, - {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2}, - {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}}; + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - - if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, - LT.second)) - return LT.first * Entry->Cost; - - return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); + if (const auto *Entry = CostTableLookup(MVEDupTbl, ISD::VECTOR_SHUFFLE, + LT.second)) + return LT.first * Entry->Cost * ST->getMVEVectorCostFactor(); + } } - if (Kind == TTI::SK_Select) { - static const CostTblEntry NEONSelShuffleTbl[] = { - // Select shuffle cost table for ARM. Cost is the number of instructions - // required to create the shuffled vector. - - {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, - - {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, - {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, - {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2}, - - {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16}, - - {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}}; - - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); - } - return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); + int BaseCost = ST->hasMVEIntegerOps() && Tp->isVectorTy() + ? ST->getMVEVectorCostFactor() + : 1; + return BaseCost * BaseT::getShuffleCost(Kind, Tp, Index, SubTp); } int ARMTTIImpl::getArithmeticInstrCost( @@ -567,38 +691,64 @@ int ARMTTIImpl::getArithmeticInstrCost( // Multiplication. }; - if (ST->hasNEON()) + if (ST->hasNEON()) { if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second)) return LT.first * Entry->Cost; - int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, - Opd1PropInfo, Opd2PropInfo); + int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, + Opd1PropInfo, Opd2PropInfo); - // This is somewhat of a hack. The problem that we are facing is that SROA - // creates a sequence of shift, and, or instructions to construct values. - // These sequences are recognized by the ISel and have zero-cost. Not so for - // the vectorized code. Because we have support for v2i64 but not i64 those - // sequences look particularly beneficial to vectorize. - // To work around this we increase the cost of v2i64 operations to make them - // seem less beneficial. - if (LT.second == MVT::v2i64 && - Op2Info == TargetTransformInfo::OK_UniformConstantValue) - Cost += 4; + // This is somewhat of a hack. The problem that we are facing is that SROA + // creates a sequence of shift, and, or instructions to construct values. + // These sequences are recognized by the ISel and have zero-cost. Not so for + // the vectorized code. Because we have support for v2i64 but not i64 those + // sequences look particularly beneficial to vectorize. + // To work around this we increase the cost of v2i64 operations to make them + // seem less beneficial. + if (LT.second == MVT::v2i64 && + Op2Info == TargetTransformInfo::OK_UniformConstantValue) + Cost += 4; - return Cost; + return Cost; + } + + int BaseCost = ST->hasMVEIntegerOps() && Ty->isVectorTy() + ? ST->getMVEVectorCostFactor() + : 1; + + // The rest of this mostly follows what is done in BaseT::getArithmeticInstrCost, + // without treating floats as more expensive that scalars or increasing the + // costs for custom operations. The results is also multiplied by the + // MVEVectorCostFactor where appropriate. + if (TLI->isOperationLegalOrCustomOrPromote(ISDOpcode, LT.second)) + return LT.first * BaseCost; + + // Else this is expand, assume that we need to scalarize this op. + if (Ty->isVectorTy()) { + unsigned Num = Ty->getVectorNumElements(); + unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType()); + // Return the cost of multiple scalar invocation plus the cost of + // inserting and extracting the values. + return BaseT::getScalarizationOverhead(Ty, Args) + Num * Cost; + } + + return BaseCost; } int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I) { std::pair LT = TLI->getTypeLegalizationCost(DL, Src); - if (Src->isVectorTy() && Alignment != 16 && + if (ST->hasNEON() && Src->isVectorTy() && Alignment != 16 && Src->getVectorElementType()->isDoubleTy()) { // Unaligned loads/stores are extremely inefficient. // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr. return LT.first * 4; } - return LT.first; + int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy() + ? ST->getMVEVectorCostFactor() + : 1; + return BaseCost * LT.first; } int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, @@ -893,6 +1043,11 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, } return; } + // Don't unroll vectorised loop. MVE does not benefit from it as much as + // scalar code. + if (I.getType()->isVectorTy()) + return; + SmallVector Operands(I.value_op_begin(), I.value_op_end()); Cost += getUserCost(&I, Operands); @@ -914,3 +1069,28 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, if (Cost < 12) UP.Force = true; } + +bool ARMTTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty, + TTI::ReductionFlags Flags) const { + assert(isa(Ty) && "Expected Ty to be a vector type"); + unsigned ScalarBits = Ty->getScalarSizeInBits(); + if (!ST->hasMVEIntegerOps()) + return false; + + switch (Opcode) { + case Instruction::FAdd: + case Instruction::FMul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Mul: + case Instruction::FCmp: + return false; + case Instruction::ICmp: + case Instruction::Add: + return ScalarBits < 64 && ScalarBits * Ty->getVectorNumElements() == 128; + default: + llvm_unreachable("Unhandled reduction opcode"); + } + return false; +} diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index 52f6ea4a6e2..a878fdcfe3c 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -101,9 +101,9 @@ public: /// Floating-point computation using ARMv8 AArch32 Advanced /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD - /// is IEEE-754 compliant, but it's not covered in this target. + /// and Arm MVE are IEEE-754 compliant. bool isFPVectorizationPotentiallyUnsafe() { - return !ST->isTargetDarwin(); + return !ST->isTargetDarwin() && !ST->hasMVEFloatOps(); } /// \name Scalar TTI Implementations @@ -122,10 +122,13 @@ public: /// \name Vector TTI Implementations /// @{ - unsigned getNumberOfRegisters(bool Vector) { + unsigned getNumberOfRegisters(unsigned ClassID) const { + bool Vector = (ClassID == 1); if (Vector) { if (ST->hasNEON()) return 16; + if (ST->hasMVEIntegerOps()) + return 8; return 0; } @@ -138,6 +141,8 @@ public: if (Vector) { if (ST->hasNEON()) return 128; + if (ST->hasMVEIntegerOps()) + return 128; return 0; } @@ -148,10 +153,23 @@ public: return ST->getMaxInterleaveFactor(); } + bool isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment); + + bool isLegalMaskedStore(Type *DataTy, MaybeAlign Alignment) { + return isLegalMaskedLoad(DataTy, Alignment); + } + int getMemcpyCost(const Instruction *I); int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); + bool useReductionIntrinsic(unsigned Opcode, Type *Ty, + TTI::ReductionFlags Flags) const; + + bool shouldExpandReduction(const IntrinsicInst *II) const { + return false; + } + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I = nullptr); diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 1da9452f1d2..d2c355c1da7 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -2275,6 +2275,14 @@ public: return Value >= 1 && Value <= 32; } + bool isMveSaturateOp() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) return false; + uint64_t Value = CE->getValue(); + return Value == 48 || Value == 64; + } + bool isITCondCodeNoAL() const { if (!isITCondCode()) return false; ARMCC::CondCodes CC = getCondCode(); @@ -2479,28 +2487,28 @@ public: void addModImmNotOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); uint32_t Enc = ARM_AM::getSOImmVal(~CE->getValue()); Inst.addOperand(MCOperand::createImm(Enc)); } void addModImmNegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); uint32_t Enc = ARM_AM::getSOImmVal(-CE->getValue()); Inst.addOperand(MCOperand::createImm(Enc)); } void addThumbModImmNeg8_255Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); uint32_t Val = -CE->getValue(); Inst.addOperand(MCOperand::createImm(Val)); } void addThumbModImmNeg1_7Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); uint32_t Val = -CE->getValue(); Inst.addOperand(MCOperand::createImm(Val)); } @@ -2523,19 +2531,19 @@ public: void addFBits16Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(16 - CE->getValue())); } void addFBits32Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(32 - CE->getValue())); } void addFPImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue())); Inst.addOperand(MCOperand::createImm(Val)); } @@ -2544,7 +2552,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // FIXME: We really want to scale the value here, but the LDRD/STRD // instruction don't encode operands that way yet. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue())); } @@ -2552,35 +2560,31 @@ public: assert(N == 1 && "Invalid number of operands!"); // FIXME: We really want to scale the value here, but the VSTR/VLDR_VSYSR // instruction don't encode operands that way yet. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue())); } void addImm7Shift0Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); - assert(CE != nullptr && "Invalid operand type!"); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue())); } void addImm7Shift1Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); - assert(CE != nullptr && "Invalid operand type!"); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue())); } void addImm7Shift2Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); - assert(CE != nullptr && "Invalid operand type!"); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue())); } void addImm7Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); - assert(CE != nullptr && "Invalid operand type!"); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue())); } @@ -2588,7 +2592,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // The immediate is scaled by four in the encoding and is stored // in the MCInst as such. Lop off the low two bits here. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue() / 4)); } @@ -2596,7 +2600,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // The immediate is scaled by four in the encoding and is stored // in the MCInst as such. Lop off the low two bits here. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(-(CE->getValue() / 4))); } @@ -2604,7 +2608,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // The immediate is scaled by four in the encoding and is stored // in the MCInst as such. Lop off the low two bits here. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue() / 4)); } @@ -2612,7 +2616,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // The constant encodes as the immediate-1, and we store in the instruction // the bits as encoded, so subtract off one here. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue() - 1)); } @@ -2620,7 +2624,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // The constant encodes as the immediate-1, and we store in the instruction // the bits as encoded, so subtract off one here. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue() - 1)); } @@ -2628,7 +2632,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // The constant encodes as the immediate, except for 32, which encodes as // zero. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); unsigned Imm = CE->getValue(); Inst.addOperand(MCOperand::createImm((Imm == 32 ? 0 : Imm))); } @@ -2637,7 +2641,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // An ASR value of 32 encodes as 0, so that's how we want to add it to // the instruction as well. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); int Val = CE->getValue(); Inst.addOperand(MCOperand::createImm(Val == 32 ? 0 : Val)); } @@ -2646,7 +2650,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // The operand is actually a t2_so_imm, but we have its bitwise // negation in the assembly source, so twiddle it here. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(~(uint32_t)CE->getValue())); } @@ -2654,7 +2658,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // The operand is actually a t2_so_imm, but we have its // negation in the assembly source, so twiddle it here. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(-(uint32_t)CE->getValue())); } @@ -2662,7 +2666,7 @@ public: assert(N == 1 && "Invalid number of operands!"); // The operand is actually an imm0_4095, but we have its // negation in the assembly source, so twiddle it here. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(-(uint32_t)CE->getValue())); } @@ -2671,9 +2675,7 @@ public: Inst.addOperand(MCOperand::createImm(CE->getValue() >> 2)); return; } - - const MCSymbolRefExpr *SR = dyn_cast(Imm.Val); - assert(SR && "Unknown value type!"); + const MCSymbolRefExpr *SR = cast(Imm.Val); Inst.addOperand(MCOperand::createExpr(SR)); } @@ -2685,10 +2687,7 @@ public: Inst.addOperand(MCOperand::createImm(CE->getValue())); return; } - - const MCSymbolRefExpr *SR = dyn_cast(Imm.Val); - - assert(SR && "Unknown value type!"); + const MCSymbolRefExpr *SR = cast(Imm.Val); Inst.addOperand(MCOperand::createExpr(SR)); return; } @@ -2750,7 +2749,7 @@ public: return; } - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); int Val = CE->getValue(); Inst.addOperand(MCOperand::createImm(Val)); } @@ -3130,7 +3129,7 @@ public: void addPowerTwoOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue())); } @@ -3225,14 +3224,14 @@ public: assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. // Mask in that this is an i8 splat. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue() | 0xe00)); } void addNEONi16splatOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); unsigned Value = CE->getValue(); Value = ARM_AM::encodeNEONi16splat(Value); Inst.addOperand(MCOperand::createImm(Value)); @@ -3241,7 +3240,7 @@ public: void addNEONi16splatNotOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); unsigned Value = CE->getValue(); Value = ARM_AM::encodeNEONi16splat(~Value & 0xffff); Inst.addOperand(MCOperand::createImm(Value)); @@ -3250,7 +3249,7 @@ public: void addNEONi32splatOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); unsigned Value = CE->getValue(); Value = ARM_AM::encodeNEONi32splat(Value); Inst.addOperand(MCOperand::createImm(Value)); @@ -3259,7 +3258,7 @@ public: void addNEONi32splatNotOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); unsigned Value = CE->getValue(); Value = ARM_AM::encodeNEONi32splat(~Value); Inst.addOperand(MCOperand::createImm(Value)); @@ -3267,7 +3266,7 @@ public: void addNEONi8ReplicateOperands(MCInst &Inst, bool Inv) const { // The immediate encodes the type of constant as well as the value. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); assert((Inst.getOpcode() == ARM::VMOVv8i8 || Inst.getOpcode() == ARM::VMOVv16i8) && "All instructions that wants to replicate non-zero byte " @@ -3298,7 +3297,7 @@ public: void addNEONi32vmovOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); unsigned Value = encodeNeonVMOVImmediate(CE->getValue()); Inst.addOperand(MCOperand::createImm(Value)); } @@ -3310,7 +3309,7 @@ public: void addNEONvmovi16ReplicateOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); assert((Inst.getOpcode() == ARM::VMOVv4i16 || Inst.getOpcode() == ARM::VMOVv8i16 || Inst.getOpcode() == ARM::VMVNv4i16 || @@ -3327,14 +3326,14 @@ public: void addNEONi32vmovNegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); unsigned Value = encodeNeonVMOVImmediate(~CE->getValue()); Inst.addOperand(MCOperand::createImm(Value)); } void addNEONvmovi32ReplicateOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); assert((Inst.getOpcode() == ARM::VMOVv2i32 || Inst.getOpcode() == ARM::VMOVv4i32 || Inst.getOpcode() == ARM::VMVNv2i32 || @@ -3349,7 +3348,7 @@ public: void addNEONi64splatOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); uint64_t Value = CE->getValue(); unsigned Imm = 0; for (unsigned i = 0; i < 8; ++i, Value >>= 8) { @@ -3360,20 +3359,28 @@ public: void addComplexRotationEvenOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm(CE->getValue() / 90)); } void addComplexRotationOddOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); Inst.addOperand(MCOperand::createImm((CE->getValue() - 90) / 180)); } + void addMveSaturateOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = cast(getImm()); + unsigned Imm = CE->getValue(); + assert((Imm == 48 || Imm == 64) && "Invalid saturate operand"); + Inst.addOperand(MCOperand::createImm(Imm == 48 ? 1 : 0)); + } + void print(raw_ostream &OS) const override; static std::unique_ptr CreateITMask(unsigned Mask, SMLoc S) { - auto Op = make_unique(k_ITCondMask); + auto Op = std::make_unique(k_ITCondMask); Op->ITMask.Mask = Mask; Op->StartLoc = S; Op->EndLoc = S; @@ -3382,7 +3389,7 @@ public: static std::unique_ptr CreateCondCode(ARMCC::CondCodes CC, SMLoc S) { - auto Op = make_unique(k_CondCode); + auto Op = std::make_unique(k_CondCode); Op->CC.Val = CC; Op->StartLoc = S; Op->EndLoc = S; @@ -3391,7 +3398,7 @@ public: static std::unique_ptr CreateVPTPred(ARMVCC::VPTCodes CC, SMLoc S) { - auto Op = make_unique(k_VPTPred); + auto Op = std::make_unique(k_VPTPred); Op->VCC.Val = CC; Op->StartLoc = S; Op->EndLoc = S; @@ -3399,7 +3406,7 @@ public: } static std::unique_ptr CreateCoprocNum(unsigned CopVal, SMLoc S) { - auto Op = make_unique(k_CoprocNum); + auto Op = std::make_unique(k_CoprocNum); Op->Cop.Val = CopVal; Op->StartLoc = S; Op->EndLoc = S; @@ -3407,7 +3414,7 @@ public: } static std::unique_ptr CreateCoprocReg(unsigned CopVal, SMLoc S) { - auto Op = make_unique(k_CoprocReg); + auto Op = std::make_unique(k_CoprocReg); Op->Cop.Val = CopVal; Op->StartLoc = S; Op->EndLoc = S; @@ -3416,7 +3423,7 @@ public: static std::unique_ptr CreateCoprocOption(unsigned Val, SMLoc S, SMLoc E) { - auto Op = make_unique(k_CoprocOption); + auto Op = std::make_unique(k_CoprocOption); Op->Cop.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -3424,7 +3431,7 @@ public: } static std::unique_ptr CreateCCOut(unsigned RegNum, SMLoc S) { - auto Op = make_unique(k_CCOut); + auto Op = std::make_unique(k_CCOut); Op->Reg.RegNum = RegNum; Op->StartLoc = S; Op->EndLoc = S; @@ -3432,7 +3439,7 @@ public: } static std::unique_ptr CreateToken(StringRef Str, SMLoc S) { - auto Op = make_unique(k_Token); + auto Op = std::make_unique(k_Token); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = S; @@ -3442,7 +3449,7 @@ public: static std::unique_ptr CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { - auto Op = make_unique(k_Register); + auto Op = std::make_unique(k_Register); Op->Reg.RegNum = RegNum; Op->StartLoc = S; Op->EndLoc = E; @@ -3453,7 +3460,7 @@ public: CreateShiftedRegister(ARM_AM::ShiftOpc ShTy, unsigned SrcReg, unsigned ShiftReg, unsigned ShiftImm, SMLoc S, SMLoc E) { - auto Op = make_unique(k_ShiftedRegister); + auto Op = std::make_unique(k_ShiftedRegister); Op->RegShiftedReg.ShiftTy = ShTy; Op->RegShiftedReg.SrcReg = SrcReg; Op->RegShiftedReg.ShiftReg = ShiftReg; @@ -3466,7 +3473,7 @@ public: static std::unique_ptr CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy, unsigned SrcReg, unsigned ShiftImm, SMLoc S, SMLoc E) { - auto Op = make_unique(k_ShiftedImmediate); + auto Op = std::make_unique(k_ShiftedImmediate); Op->RegShiftedImm.ShiftTy = ShTy; Op->RegShiftedImm.SrcReg = SrcReg; Op->RegShiftedImm.ShiftImm = ShiftImm; @@ -3477,7 +3484,7 @@ public: static std::unique_ptr CreateShifterImm(bool isASR, unsigned Imm, SMLoc S, SMLoc E) { - auto Op = make_unique(k_ShifterImmediate); + auto Op = std::make_unique(k_ShifterImmediate); Op->ShifterImm.isASR = isASR; Op->ShifterImm.Imm = Imm; Op->StartLoc = S; @@ -3487,7 +3494,7 @@ public: static std::unique_ptr CreateRotImm(unsigned Imm, SMLoc S, SMLoc E) { - auto Op = make_unique(k_RotateImmediate); + auto Op = std::make_unique(k_RotateImmediate); Op->RotImm.Imm = Imm; Op->StartLoc = S; Op->EndLoc = E; @@ -3496,7 +3503,7 @@ public: static std::unique_ptr CreateModImm(unsigned Bits, unsigned Rot, SMLoc S, SMLoc E) { - auto Op = make_unique(k_ModifiedImmediate); + auto Op = std::make_unique(k_ModifiedImmediate); Op->ModImm.Bits = Bits; Op->ModImm.Rot = Rot; Op->StartLoc = S; @@ -3506,7 +3513,7 @@ public: static std::unique_ptr CreateConstantPoolImm(const MCExpr *Val, SMLoc S, SMLoc E) { - auto Op = make_unique(k_ConstantPoolImmediate); + auto Op = std::make_unique(k_ConstantPoolImmediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -3515,7 +3522,7 @@ public: static std::unique_ptr CreateBitfield(unsigned LSB, unsigned Width, SMLoc S, SMLoc E) { - auto Op = make_unique(k_BitfieldDescriptor); + auto Op = std::make_unique(k_BitfieldDescriptor); Op->Bitfield.LSB = LSB; Op->Bitfield.Width = Width; Op->StartLoc = S; @@ -3543,16 +3550,15 @@ public: Kind = k_SPRRegisterList; } - // Sort based on the register encoding values. - array_pod_sort(Regs.begin(), Regs.end()); - if (Kind == k_RegisterList && Regs.back().second == ARM::APSR) Kind = k_RegisterListWithAPSR; - auto Op = make_unique(Kind); - for (SmallVectorImpl>::const_iterator - I = Regs.begin(), E = Regs.end(); I != E; ++I) - Op->Registers.push_back(I->second); + assert(std::is_sorted(Regs.begin(), Regs.end()) && + "Register list must be sorted by encoding"); + + auto Op = std::make_unique(Kind); + for (const auto &P : Regs) + Op->Registers.push_back(P.second); Op->StartLoc = StartLoc; Op->EndLoc = EndLoc; @@ -3563,7 +3569,7 @@ public: unsigned Count, bool isDoubleSpaced, SMLoc S, SMLoc E) { - auto Op = make_unique(k_VectorList); + auto Op = std::make_unique(k_VectorList); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.isDoubleSpaced = isDoubleSpaced; @@ -3575,7 +3581,7 @@ public: static std::unique_ptr CreateVectorListAllLanes(unsigned RegNum, unsigned Count, bool isDoubleSpaced, SMLoc S, SMLoc E) { - auto Op = make_unique(k_VectorListAllLanes); + auto Op = std::make_unique(k_VectorListAllLanes); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.isDoubleSpaced = isDoubleSpaced; @@ -3587,7 +3593,7 @@ public: static std::unique_ptr CreateVectorListIndexed(unsigned RegNum, unsigned Count, unsigned Index, bool isDoubleSpaced, SMLoc S, SMLoc E) { - auto Op = make_unique(k_VectorListIndexed); + auto Op = std::make_unique(k_VectorListIndexed); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.LaneIndex = Index; @@ -3599,7 +3605,7 @@ public: static std::unique_ptr CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, MCContext &Ctx) { - auto Op = make_unique(k_VectorIndex); + auto Op = std::make_unique(k_VectorIndex); Op->VectorIndex.Val = Idx; Op->StartLoc = S; Op->EndLoc = E; @@ -3608,7 +3614,7 @@ public: static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { - auto Op = make_unique(k_Immediate); + auto Op = std::make_unique(k_Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -3620,7 +3626,7 @@ public: unsigned OffsetRegNum, ARM_AM::ShiftOpc ShiftType, unsigned ShiftImm, unsigned Alignment, bool isNegative, SMLoc S, SMLoc E, SMLoc AlignmentLoc = SMLoc()) { - auto Op = make_unique(k_Memory); + auto Op = std::make_unique(k_Memory); Op->Memory.BaseRegNum = BaseRegNum; Op->Memory.OffsetImm = OffsetImm; Op->Memory.OffsetRegNum = OffsetRegNum; @@ -3637,7 +3643,7 @@ public: static std::unique_ptr CreatePostIdxReg(unsigned RegNum, bool isAdd, ARM_AM::ShiftOpc ShiftTy, unsigned ShiftImm, SMLoc S, SMLoc E) { - auto Op = make_unique(k_PostIndexRegister); + auto Op = std::make_unique(k_PostIndexRegister); Op->PostIdxReg.RegNum = RegNum; Op->PostIdxReg.isAdd = isAdd; Op->PostIdxReg.ShiftTy = ShiftTy; @@ -3649,7 +3655,7 @@ public: static std::unique_ptr CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) { - auto Op = make_unique(k_MemBarrierOpt); + auto Op = std::make_unique(k_MemBarrierOpt); Op->MBOpt.Val = Opt; Op->StartLoc = S; Op->EndLoc = S; @@ -3658,7 +3664,7 @@ public: static std::unique_ptr CreateInstSyncBarrierOpt(ARM_ISB::InstSyncBOpt Opt, SMLoc S) { - auto Op = make_unique(k_InstSyncBarrierOpt); + auto Op = std::make_unique(k_InstSyncBarrierOpt); Op->ISBOpt.Val = Opt; Op->StartLoc = S; Op->EndLoc = S; @@ -3667,7 +3673,7 @@ public: static std::unique_ptr CreateTraceSyncBarrierOpt(ARM_TSB::TraceSyncBOpt Opt, SMLoc S) { - auto Op = make_unique(k_TraceSyncBarrierOpt); + auto Op = std::make_unique(k_TraceSyncBarrierOpt); Op->TSBOpt.Val = Opt; Op->StartLoc = S; Op->EndLoc = S; @@ -3676,7 +3682,7 @@ public: static std::unique_ptr CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) { - auto Op = make_unique(k_ProcIFlags); + auto Op = std::make_unique(k_ProcIFlags); Op->IFlags.Val = IFlags; Op->StartLoc = S; Op->EndLoc = S; @@ -3684,7 +3690,7 @@ public: } static std::unique_ptr CreateMSRMask(unsigned MMask, SMLoc S) { - auto Op = make_unique(k_MSRMask); + auto Op = std::make_unique(k_MSRMask); Op->MMask.Val = MMask; Op->StartLoc = S; Op->EndLoc = S; @@ -3692,7 +3698,7 @@ public: } static std::unique_ptr CreateBankedReg(unsigned Reg, SMLoc S) { - auto Op = make_unique(k_BankedReg); + auto Op = std::make_unique(k_BankedReg); Op->BankedReg.Val = Reg; Op->StartLoc = S; Op->EndLoc = S; @@ -4253,6 +4259,24 @@ static unsigned getNextRegister(unsigned Reg) { } } +// Insert an pair in an ordered vector. Return true on +// success, or false, if duplicate encoding found. +static bool +insertNoDuplicates(SmallVectorImpl> &Regs, + unsigned Enc, unsigned Reg) { + Regs.emplace_back(Enc, Reg); + for (auto I = Regs.rbegin(), J = I + 1, E = Regs.rend(); J != E; ++I, ++J) { + if (J->first == Enc) { + Regs.erase(J.base()); + return false; + } + if (J->first < Enc) + break; + std::swap(*I, *J); + } + return true; +} + /// Parse a register list. bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder) { @@ -4278,7 +4302,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { Reg = getDRegFromQReg(Reg); EReg = MRI->getEncodingValue(Reg); - Registers.push_back(std::pair(EReg, Reg)); + Registers.emplace_back(EReg, Reg); ++Reg; } const MCRegisterClass *RC; @@ -4295,7 +4319,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, // Store the register. EReg = MRI->getEncodingValue(Reg); - Registers.push_back(std::pair(EReg, Reg)); + Registers.emplace_back(EReg, Reg); // This starts immediately after the first register token in the list, // so we can see either a comma or a minus (range separator) as a legal @@ -4326,7 +4350,11 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, while (Reg != EndReg) { Reg = getNextRegister(Reg); EReg = MRI->getEncodingValue(Reg); - Registers.push_back(std::pair(EReg, Reg)); + if (!insertNoDuplicates(Registers, EReg, Reg)) { + Warning(AfterMinusLoc, StringRef("duplicated register (") + + ARMInstPrinter::getRegisterName(Reg) + + ") in register list"); + } } continue; } @@ -4350,11 +4378,16 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, // subset of GPRRegClassId except it contains APSR as well. RC = &ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID]; } - if (Reg == ARM::VPR && (RC == &ARMMCRegisterClasses[ARM::SPRRegClassID] || - RC == &ARMMCRegisterClasses[ARM::DPRRegClassID])) { + if (Reg == ARM::VPR && + (RC == &ARMMCRegisterClasses[ARM::SPRRegClassID] || + RC == &ARMMCRegisterClasses[ARM::DPRRegClassID] || + RC == &ARMMCRegisterClasses[ARM::FPWithVPRRegClassID])) { RC = &ARMMCRegisterClasses[ARM::FPWithVPRRegClassID]; EReg = MRI->getEncodingValue(Reg); - Registers.push_back(std::pair(EReg, Reg)); + if (!insertNoDuplicates(Registers, EReg, Reg)) { + Warning(RegLoc, "duplicated register (" + RegTok.getString() + + ") in register list"); + } continue; } // The register must be in the same register class as the first. @@ -4371,21 +4404,19 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, else if (!ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(Reg)) return Error(RegLoc, "register list not in ascending order"); } - if (MRI->getEncodingValue(Reg) == MRI->getEncodingValue(OldReg)) { - Warning(RegLoc, "duplicated register (" + RegTok.getString() + - ") in register list"); - continue; - } // VFP register lists must also be contiguous. if (RC != &ARMMCRegisterClasses[ARM::GPRRegClassID] && RC != &ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID] && Reg != OldReg + 1) return Error(RegLoc, "non-contiguous register range"); EReg = MRI->getEncodingValue(Reg); - Registers.push_back(std::pair(EReg, Reg)); + if (!insertNoDuplicates(Registers, EReg, Reg)) { + Warning(RegLoc, "duplicated register (" + RegTok.getString() + + ") in register list"); + } if (isQReg) { EReg = MRI->getEncodingValue(++Reg); - Registers.push_back(std::pair(EReg, Reg)); + Registers.emplace_back(EReg, Reg); } } @@ -5702,14 +5733,16 @@ bool ARMAsmParser::parseMemory(OperandVector &Operands) { return false; } - // If we have a '#', it's an immediate offset, else assume it's a register - // offset. Be friendly and also accept a plain integer (without a leading - // hash) for gas compatibility. + // If we have a '#' or '$', it's an immediate offset, else assume it's a + // register offset. Be friendly and also accept a plain integer or expression + // (without a leading hash) for gas compatibility. if (Parser.getTok().is(AsmToken::Hash) || Parser.getTok().is(AsmToken::Dollar) || + Parser.getTok().is(AsmToken::LParen) || Parser.getTok().is(AsmToken::Integer)) { - if (Parser.getTok().isNot(AsmToken::Integer)) - Parser.Lex(); // Eat '#' or '$'. + if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Dollar)) + Parser.Lex(); // Eat '#' or '$' E = Parser.getTok().getLoc(); bool isNegative = getParser().getTok().is(AsmToken::Minus); @@ -11308,7 +11341,7 @@ bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) { SmallVector Opcodes; auto parseOne = [&]() -> bool { - const MCExpr *OE; + const MCExpr *OE = nullptr; SMLoc OpcodeLoc = getLexer().getLoc(); if (check(getLexer().is(AsmToken::EndOfStatement) || Parser.parseExpression(OE), @@ -11694,14 +11727,14 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) { { ARM::AEK_CRYPTO, {Feature_HasV8Bit}, {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} }, { ARM::AEK_FP, {Feature_HasV8Bit}, - {ARM::FeatureVFP2_D16_SP, ARM::FeatureFPARMv8} }, + {ARM::FeatureVFP2_SP, ARM::FeatureFPARMv8} }, { (ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM), {Feature_HasV7Bit, Feature_IsNotMClassBit}, {ARM::FeatureHWDivThumb, ARM::FeatureHWDivARM} }, { ARM::AEK_MP, {Feature_HasV7Bit, Feature_IsNotMClassBit}, {ARM::FeatureMP} }, { ARM::AEK_SIMD, {Feature_HasV8Bit}, - {ARM::FeatureNEON, ARM::FeatureVFP2_D16_SP, ARM::FeatureFPARMv8} }, + {ARM::FeatureNEON, ARM::FeatureVFP2_SP, ARM::FeatureFPARMv8} }, { ARM::AEK_SEC, {Feature_HasV6KBit}, {ARM::FeatureTrustZone} }, // FIXME: Only available in A-class, isel not predicated { ARM::AEK_VIRT, {Feature_HasV7Bit}, {ARM::FeatureVirtualization} }, @@ -11775,19 +11808,19 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, // immediate in the syntax. switch (Kind) { default: break; - case MCK__35_0: + case MCK__HASH_0: if (Op.isImm()) if (const MCConstantExpr *CE = dyn_cast(Op.getImm())) if (CE->getValue() == 0) return Match_Success; break; - case MCK__35_8: + case MCK__HASH_8: if (Op.isImm()) if (const MCConstantExpr *CE = dyn_cast(Op.getImm())) if (CE->getValue() == 8) return Match_Success; break; - case MCK__35_16: + case MCK__HASH_16: if (Op.isImm()) if (const MCConstantExpr *CE = dyn_cast(Op.getImm())) if (CE->getValue() == 16) diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 673691ebd93..eabc26d05f4 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -314,7 +314,7 @@ static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeNEONModImmInstruction(MCInst &Inst,unsigned Val, +static DecodeStatus DecodeVMOVModImmInstruction(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder); @@ -561,6 +561,8 @@ static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); @@ -3445,7 +3447,7 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn, +DecodeVMOVModImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -5679,7 +5681,7 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, } } } - return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder); + return DecodeVMOVModImmInstruction(Inst, Insn, Address, Decoder); } if (!(imm & 0x20)) return MCDisassembler::Fail; @@ -5738,7 +5740,7 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, } } } - return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder); + return DecodeVMOVModImmInstruction(Inst, Insn, Address, Decoder); } if (!(imm & 0x20)) return MCDisassembler::Fail; @@ -6481,6 +6483,12 @@ static DecodeStatus DecodeMVEOverlappingLongShift( if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; + if (fieldFromInstruction (Insn, 6, 3) != 4) + return MCDisassembler::SoftFail; + + if (Rda == Rm) + return MCDisassembler::SoftFail; + return S; } @@ -6503,6 +6511,13 @@ static DecodeStatus DecodeMVEOverlappingLongShift( if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; + if (Inst.getOpcode() == ARM::MVE_SQRSHRL || + Inst.getOpcode() == ARM::MVE_UQRSHLL) { + unsigned Saturate = fieldFromInstruction(Insn, 7, 1); + // Saturate, the bit position for saturation + Inst.addOperand(MCOperand::createImm(Saturate)); + } + return S; } @@ -6572,3 +6587,11 @@ static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address, return MCDisassembler::Fail; return S; } + +static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + Inst.addOperand(MCOperand::createReg(ARM::VPR)); + Inst.addOperand(MCOperand::createReg(ARM::VPR)); + return S; +} diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h index 7732a6485a8..24a9fabf097 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -518,10 +518,10 @@ namespace ARM_AM { // Valid alignments depend on the specific instruction. //===--------------------------------------------------------------------===// - // NEON Modified Immediates + // NEON/MVE Modified Immediates //===--------------------------------------------------------------------===// // - // Several NEON instructions (e.g., VMOV) take a "modified immediate" + // Several NEON and MVE instructions (e.g., VMOV) take a "modified immediate" // vector operand, where a small immediate encoded in the instruction // specifies a full NEON vector value. These modified immediates are // represented here as encoded integers. The low 8 bits hold the immediate @@ -529,20 +529,20 @@ namespace ARM_AM { // the "Cmode" field of the instruction. The interfaces below treat the // Op and Cmode values as a single 5-bit value. - inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) { + inline unsigned createVMOVModImm(unsigned OpCmode, unsigned Val) { return (OpCmode << 8) | Val; } - inline unsigned getNEONModImmOpCmode(unsigned ModImm) { + inline unsigned getVMOVModImmOpCmode(unsigned ModImm) { return (ModImm >> 8) & 0x1f; } - inline unsigned getNEONModImmVal(unsigned ModImm) { return ModImm & 0xff; } + inline unsigned getVMOVModImmVal(unsigned ModImm) { return ModImm & 0xff; } - /// decodeNEONModImm - Decode a NEON modified immediate value into the + /// decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the /// element value and the element size in bits. (If the element size is /// smaller than the vector, it is splatted into all the elements.) - inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) { - unsigned OpCmode = getNEONModImmOpCmode(ModImm); - unsigned Imm8 = getNEONModImmVal(ModImm); + inline uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits) { + unsigned OpCmode = getVMOVModImmOpCmode(ModImm); + unsigned Imm8 = getVMOVModImmVal(ModImm); uint64_t Val = 0; if (OpCmode == 0xe) { @@ -572,7 +572,7 @@ namespace ARM_AM { } EltBits = 64; } else { - llvm_unreachable("Unsupported NEON immediate"); + llvm_unreachable("Unsupported VMOV immediate"); } return Val; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index aeab5be78ab..6196881a9b8 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -233,7 +233,7 @@ static const char *checkPCRelOffset(uint64_t Value, int64_t Min, int64_t Max) { const char *ARMAsmBackend::reasonForFixupRelaxation(const MCFixup &Fixup, uint64_t Value) const { - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { case ARM::fixup_arm_thumb_br: { // Relaxing tB to t2B. tB has a signed 12-bit displacement with the // low bit being an implied zero. There's an implied +4 offset for the @@ -870,7 +870,7 @@ bool ARMAsmBackend::shouldForceRelocation(const MCAssembler &Asm, const MCValue &Target) { const MCSymbolRefExpr *A = Target.getSymA(); const MCSymbol *Sym = A ? &A->getSymbol() : nullptr; - const unsigned FixupKind = Fixup.getKind() ; + const unsigned FixupKind = Fixup.getKind(); if (FixupKind == FK_NONE) return true; if (FixupKind == ARM::fixup_arm_thumb_bl) { @@ -1105,28 +1105,28 @@ uint32_t ARMAsmBackendDarwin::generateCompactUnwindEncoding( if (Instrs.empty()) return 0; // Start off assuming CFA is at SP+0. - int CFARegister = ARM::SP; + unsigned CFARegister = ARM::SP; int CFARegisterOffset = 0; // Mark savable registers as initially unsaved DenseMap RegOffsets; int FloatRegCount = 0; // Process each .cfi directive and build up compact unwind info. for (size_t i = 0, e = Instrs.size(); i != e; ++i) { - int Reg; + unsigned Reg; const MCCFIInstruction &Inst = Instrs[i]; switch (Inst.getOperation()) { case MCCFIInstruction::OpDefCfa: // DW_CFA_def_cfa CFARegisterOffset = -Inst.getOffset(); - CFARegister = MRI.getLLVMRegNum(Inst.getRegister(), true); + CFARegister = *MRI.getLLVMRegNum(Inst.getRegister(), true); break; case MCCFIInstruction::OpDefCfaOffset: // DW_CFA_def_cfa_offset CFARegisterOffset = -Inst.getOffset(); break; case MCCFIInstruction::OpDefCfaRegister: // DW_CFA_def_cfa_register - CFARegister = MRI.getLLVMRegNum(Inst.getRegister(), true); + CFARegister = *MRI.getLLVMRegNum(Inst.getRegister(), true); break; case MCCFIInstruction::OpOffset: // DW_CFA_offset - Reg = MRI.getLLVMRegNum(Inst.getRegister(), true); + Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true); if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) RegOffsets[Reg] = Inst.getOffset(); else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index c4daafe8ee9..6293a246230 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -393,6 +393,9 @@ namespace ARMII { // in an IT block). ThumbArithFlagSetting = 1 << 19, + // Whether an instruction can be included in an MVE tail-predicated loop. + ValidForTailPredication = 1 << 20, + //===------------------------------------------------------------------===// // Code domain. DomainShift = 15, diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index fda19eea1de..1fee38821a4 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -82,7 +82,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); if (IsPCRel) { - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol"); return ELF::R_ARM_NONE; @@ -145,7 +145,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, return ELF::R_ARM_THM_BF18; } } - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol"); return ELF::R_ARM_NONE; @@ -263,5 +263,5 @@ void ARMELFObjectWriter::addTargetSectionFlags(MCContext &Ctx, std::unique_ptr llvm::createARMELFObjectWriter(uint8_t OSABI) { - return llvm::make_unique(OSABI); + return std::make_unique(OSABI); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp index 45be1ee9634..a1def61b58d 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp @@ -1334,12 +1334,12 @@ void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, << markup(">"); } -void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, +void ARMInstPrinter::printVMOVModImmOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { unsigned EncodedImm = MI->getOperand(OpNum).getImm(); unsigned EltBits; - uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); + uint64_t Val = ARM_AM::decodeVMOVModImm(EncodedImm, EltBits); O << markup(""); @@ -1676,3 +1676,11 @@ void ARMInstPrinter::printExpandedImmOperand(const MCInst *MI, unsigned OpNum, O.write_hex(Val); O << markup(">"); } + +void ARMInstPrinter::printMveSaturateOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + uint32_t Val = MI->getOperand(OpNum).getImm(); + assert(Val <= 1 && "Invalid MVE saturate operand"); + O << "#" << (Val == 1 ? 48 : 64); +} diff --git a/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h index 69026956b60..eeb811e216f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h +++ b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h @@ -191,7 +191,7 @@ public: const MCSubtargetInfo &STI, raw_ostream &O); void printFPImmOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); - void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, + void printVMOVModImmOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); @@ -262,7 +262,8 @@ public: const MCSubtargetInfo &STI, raw_ostream &O); void printExpandedImmOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); - + void printMveSaturateOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); private: unsigned DefaultAltIdx = ARM::NoRegAltName; }; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index dca6fe37d49..268fe7efd9c 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -1720,7 +1720,6 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, unsigned Reg = MI.getOperand(Op).getReg(); bool SPRRegs = ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg); bool DPRRegs = ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg); - bool CLRMRegs = MI.getOpcode() == ARM::t2CLRM; unsigned Binary = 0; @@ -1739,21 +1738,13 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, Binary |= NumRegs * 2; } else { const MCRegisterInfo &MRI = *CTX.getRegisterInfo(); - if (!CLRMRegs) { - assert(std::is_sorted(MI.begin() + Op, MI.end(), - [&](const MCOperand &LHS, const MCOperand &RHS) { - return MRI.getEncodingValue(LHS.getReg()) < - MRI.getEncodingValue(RHS.getReg()); - })); - } - + assert(std::is_sorted(MI.begin() + Op, MI.end(), + [&](const MCOperand &LHS, const MCOperand &RHS) { + return MRI.getEncodingValue(LHS.getReg()) < + MRI.getEncodingValue(RHS.getReg()); + })); for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) { - unsigned RegNo; - if (CLRMRegs && MI.getOperand(I).getReg() == ARM::APSR) { - RegNo = 15; - } else { - RegNo = MRI.getEncodingValue(MI.getOperand(I).getReg()); - } + unsigned RegNo = MRI.getEncodingValue(MI.getOperand(I).getReg()); Binary |= 1 << RegNo; } } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index c49885023cb..ed4000c7e5b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -204,7 +204,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, // relocation entry in the low 16 bits of r_address field. unsigned ThumbBit = 0; unsigned MovtBit = 0; - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: break; case ARM::fixup_arm_movt_hi16: MovtBit = 1; @@ -480,7 +480,7 @@ void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer, // PAIR. I.e. it's correct that we insert the high bits of the addend in the // MOVW case here. relocation entries. uint32_t Value = 0; - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: break; case ARM::fixup_arm_movw_lo16: case ARM::fixup_t2_movw_lo16: @@ -506,5 +506,5 @@ void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer, std::unique_ptr llvm::createARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) { - return llvm::make_unique(Is64Bit, CPUType, CPUSubtype); + return std::make_unique(Is64Bit, CPUType, CPUSubtype); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index b863517c0cc..7b30a61e8cc 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -249,12 +249,12 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { : ARM::FK_VFPV3_D16) : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD))); - else if (STI.hasFeature(ARM::FeatureVFP2_D16_SP)) + else if (STI.hasFeature(ARM::FeatureVFP2_SP)) emitFPU(ARM::FK_VFPV2); } // ABI_HardFP_use attribute to indicate single precision FP. - if (STI.hasFeature(ARM::FeatureVFP2_D16_SP) && !STI.hasFeature(ARM::FeatureFP64)) + if (STI.hasFeature(ARM::FeatureVFP2_SP) && !STI.hasFeature(ARM::FeatureFP64)) emitAttribute(ARMBuildAttrs::ABI_HardFP_use, ARMBuildAttrs::HardFPSinglePrecision); diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp index 054a95dd1e1..900c5fe3036 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -92,7 +92,7 @@ namespace llvm { std::unique_ptr createARMWinCOFFObjectWriter(bool Is64Bit) { - return llvm::make_unique(Is64Bit); + return std::make_unique(Is64Bit); } } // end namespace llvm diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp index 2e816bea5e9..b3c8146a9bd 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp @@ -22,20 +22,10 @@ public: std::unique_ptr OW) : MCWinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)) {} - void EmitAssemblerFlag(MCAssemblerFlag Flag) override; void EmitThumbFunc(MCSymbol *Symbol) override; void FinishImpl() override; }; -void ARMWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { - switch (Flag) { - default: llvm_unreachable("not implemented"); - case MCAF_SyntaxUnified: - case MCAF_Code16: - break; - } -} - void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) { getAssembler().setIsThumbFunc(Symbol); } diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 4b25986b90a..cc31929899b 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -86,8 +86,8 @@ void MLxExpansion::pushStack(MachineInstr *MI) { MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const { // Look past COPY and INSERT_SUBREG instructions to find the // real definition MI. This is important for _sfp instructions. - unsigned Reg = MI->getOperand(1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = MI->getOperand(1).getReg(); + if (Register::isPhysicalRegister(Reg)) return nullptr; MachineBasicBlock *MBB = MI->getParent(); @@ -97,13 +97,13 @@ MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const { break; if (DefMI->isCopyLike()) { Reg = DefMI->getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { DefMI = MRI->getVRegDef(Reg); continue; } } else if (DefMI->isInsertSubreg()) { Reg = DefMI->getOperand(2).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { DefMI = MRI->getVRegDef(Reg); continue; } @@ -114,9 +114,8 @@ MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const { } unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { - unsigned Reg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg) || - !MRI->hasOneNonDBGUse(Reg)) + Register Reg = MI->getOperand(0).getReg(); + if (Register::isPhysicalRegister(Reg) || !MRI->hasOneNonDBGUse(Reg)) return Reg; MachineBasicBlock *MBB = MI->getParent(); @@ -126,8 +125,7 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { while (UseMI->isCopy() || UseMI->isInsertSubreg()) { Reg = UseMI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg) || - !MRI->hasOneNonDBGUse(Reg)) + if (Register::isPhysicalRegister(Reg) || !MRI->hasOneNonDBGUse(Reg)) return Reg; UseMI = &*MRI->use_instr_nodbg_begin(Reg); if (UseMI->getParent() != MBB) @@ -140,8 +138,8 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { /// hasLoopHazard - Check whether an MLx instruction is chained to itself across /// a single-MBB loop. bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const { - unsigned Reg = MI->getOperand(1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = MI->getOperand(1).getReg(); + if (Register::isPhysicalRegister(Reg)) return false; MachineBasicBlock *MBB = MI->getParent(); @@ -154,8 +152,8 @@ outer_continue: if (DefMI->isPHI()) { for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) { if (DefMI->getOperand(i + 1).getMBB() == MBB) { - unsigned SrcReg = DefMI->getOperand(i).getReg(); - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + Register SrcReg = DefMI->getOperand(i).getReg(); + if (Register::isVirtualRegister(SrcReg)) { DefMI = MRI->getVRegDef(SrcReg); goto outer_continue; } @@ -163,13 +161,13 @@ outer_continue: } } else if (DefMI->isCopyLike()) { Reg = DefMI->getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { DefMI = MRI->getVRegDef(Reg); continue; } } else if (DefMI->isInsertSubreg()) { Reg = DefMI->getOperand(2).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { DefMI = MRI->getVRegDef(Reg); continue; } @@ -271,23 +269,23 @@ void MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, unsigned MulOpc, unsigned AddSubOpc, bool NegAcc, bool HasLane) { - unsigned DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI->getOperand(0).getReg(); bool DstDead = MI->getOperand(0).isDead(); - unsigned AccReg = MI->getOperand(1).getReg(); - unsigned Src1Reg = MI->getOperand(2).getReg(); - unsigned Src2Reg = MI->getOperand(3).getReg(); + Register AccReg = MI->getOperand(1).getReg(); + Register Src1Reg = MI->getOperand(2).getReg(); + Register Src2Reg = MI->getOperand(3).getReg(); bool Src1Kill = MI->getOperand(2).isKill(); bool Src2Kill = MI->getOperand(3).isKill(); unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0; unsigned NextOp = HasLane ? 5 : 4; ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm(); - unsigned PredReg = MI->getOperand(++NextOp).getReg(); + Register PredReg = MI->getOperand(++NextOp).getReg(); const MCInstrDesc &MCID1 = TII->get(MulOpc); const MCInstrDesc &MCID2 = TII->get(AddSubOpc); const MachineFunction &MF = *MI->getParent()->getParent(); - unsigned TmpReg = MRI->createVirtualRegister( - TII->getRegClass(MCID1, 0, TRI, MF)); + Register TmpReg = + MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI, MF)); MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg) .addReg(Src1Reg, getKillRegState(Src1Kill)) diff --git a/lib/Target/ARM/MVETailPredication.cpp b/lib/Target/ARM/MVETailPredication.cpp new file mode 100644 index 00000000000..4db8ab17c49 --- /dev/null +++ b/lib/Target/ARM/MVETailPredication.cpp @@ -0,0 +1,519 @@ +//===- MVETailPredication.cpp - MVE Tail Predication ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Armv8.1m introduced MVE, M-Profile Vector Extension, and low-overhead +/// branches to help accelerate DSP applications. These two extensions can be +/// combined to provide implicit vector predication within a low-overhead loop. +/// The HardwareLoops pass inserts intrinsics identifying loops that the +/// backend will attempt to convert into a low-overhead loop. The vectorizer is +/// responsible for generating a vectorized loop in which the lanes are +/// predicated upon the iteration counter. This pass looks at these predicated +/// vector loops, that are targets for low-overhead loops, and prepares it for +/// code generation. Once the vectorizer has produced a masked loop, there's a +/// couple of final forms: +/// - A tail-predicated loop, with implicit predication. +/// - A loop containing multiple VCPT instructions, predicating multiple VPT +/// blocks of instructions operating on different vector types. + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "ARM.h" +#include "ARMSubtarget.h" + +using namespace llvm; + +#define DEBUG_TYPE "mve-tail-predication" +#define DESC "Transform predicated vector loops to use MVE tail predication" + +static cl::opt +DisableTailPredication("disable-mve-tail-predication", cl::Hidden, + cl::init(true), + cl::desc("Disable MVE Tail Predication")); +namespace { + +class MVETailPredication : public LoopPass { + SmallVector MaskedInsts; + Loop *L = nullptr; + ScalarEvolution *SE = nullptr; + TargetTransformInfo *TTI = nullptr; + +public: + static char ID; + + MVETailPredication() : LoopPass(ID) { } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.setPreservesCFG(); + } + + bool runOnLoop(Loop *L, LPPassManager&) override; + +private: + + /// Perform the relevant checks on the loop and convert if possible. + bool TryConvert(Value *TripCount); + + /// Return whether this is a vectorized loop, that contains masked + /// load/stores. + bool IsPredicatedVectorLoop(); + + /// Compute a value for the total number of elements that the predicated + /// loop will process. + Value *ComputeElements(Value *TripCount, VectorType *VecTy); + + /// Is the icmp that generates an i1 vector, based upon a loop counter + /// and a limit that is defined outside the loop. + bool isTailPredicate(Instruction *Predicate, Value *NumElements); +}; + +} // end namespace + +static bool IsDecrement(Instruction &I) { + auto *Call = dyn_cast(&I); + if (!Call) + return false; + + Intrinsic::ID ID = Call->getIntrinsicID(); + return ID == Intrinsic::loop_decrement_reg; +} + +static bool IsMasked(Instruction *I) { + auto *Call = dyn_cast(I); + if (!Call) + return false; + + Intrinsic::ID ID = Call->getIntrinsicID(); + // TODO: Support gather/scatter expand/compress operations. + return ID == Intrinsic::masked_store || ID == Intrinsic::masked_load; +} + +bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) { + if (skipLoop(L) || DisableTailPredication) + return false; + + Function &F = *L->getHeader()->getParent(); + auto &TPC = getAnalysis(); + auto &TM = TPC.getTM(); + auto *ST = &TM.getSubtarget(F); + TTI = &getAnalysis().getTTI(F); + SE = &getAnalysis().getSE(); + this->L = L; + + // The MVE and LOB extensions are combined to enable tail-predication, but + // there's nothing preventing us from generating VCTP instructions for v8.1m. + if (!ST->hasMVEIntegerOps() || !ST->hasV8_1MMainlineOps()) { + LLVM_DEBUG(dbgs() << "TP: Not a v8.1m.main+mve target.\n"); + return false; + } + + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) + return false; + + auto FindLoopIterations = [](BasicBlock *BB) -> IntrinsicInst* { + for (auto &I : *BB) { + auto *Call = dyn_cast(&I); + if (!Call) + continue; + + Intrinsic::ID ID = Call->getIntrinsicID(); + if (ID == Intrinsic::set_loop_iterations || + ID == Intrinsic::test_set_loop_iterations) + return cast(&I); + } + return nullptr; + }; + + // Look for the hardware loop intrinsic that sets the iteration count. + IntrinsicInst *Setup = FindLoopIterations(Preheader); + + // The test.set iteration could live in the pre- preheader. + if (!Setup) { + if (!Preheader->getSinglePredecessor()) + return false; + Setup = FindLoopIterations(Preheader->getSinglePredecessor()); + if (!Setup) + return false; + } + + // Search for the hardware loop intrinic that decrements the loop counter. + IntrinsicInst *Decrement = nullptr; + for (auto *BB : L->getBlocks()) { + for (auto &I : *BB) { + if (IsDecrement(I)) { + Decrement = cast(&I); + break; + } + } + } + + if (!Decrement) + return false; + + LLVM_DEBUG(dbgs() << "TP: Running on Loop: " << *L + << *Setup << "\n" + << *Decrement << "\n"); + bool Changed = TryConvert(Setup->getArgOperand(0)); + return Changed; +} + +bool MVETailPredication::isTailPredicate(Instruction *I, Value *NumElements) { + // Look for the following: + + // %trip.count.minus.1 = add i32 %N, -1 + // %broadcast.splatinsert10 = insertelement <4 x i32> undef, + // i32 %trip.count.minus.1, i32 0 + // %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, + // <4 x i32> undef, + // <4 x i32> zeroinitializer + // ... + // ... + // %index = phi i32 + // %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 + // %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, + // <4 x i32> undef, + // <4 x i32> zeroinitializer + // %induction = add <4 x i32> %broadcast.splat, + // %pred = icmp ule <4 x i32> %induction, %broadcast.splat11 + + // And return whether V == %pred. + + using namespace PatternMatch; + + CmpInst::Predicate Pred; + Instruction *Shuffle = nullptr; + Instruction *Induction = nullptr; + + // The vector icmp + if (!match(I, m_ICmp(Pred, m_Instruction(Induction), + m_Instruction(Shuffle))) || + Pred != ICmpInst::ICMP_ULE || !L->isLoopInvariant(Shuffle)) + return false; + + // First find the stuff outside the loop which is setting up the limit + // vector.... + // The invariant shuffle that broadcast the limit into a vector. + Instruction *Insert = nullptr; + if (!match(Shuffle, m_ShuffleVector(m_Instruction(Insert), m_Undef(), + m_Zero()))) + return false; + + // Insert the limit into a vector. + Instruction *BECount = nullptr; + if (!match(Insert, m_InsertElement(m_Undef(), m_Instruction(BECount), + m_Zero()))) + return false; + + // The limit calculation, backedge count. + Value *TripCount = nullptr; + if (!match(BECount, m_Add(m_Value(TripCount), m_AllOnes()))) + return false; + + if (TripCount != NumElements) + return false; + + // Now back to searching inside the loop body... + // Find the add with takes the index iv and adds a constant vector to it. + Instruction *BroadcastSplat = nullptr; + Constant *Const = nullptr; + if (!match(Induction, m_Add(m_Instruction(BroadcastSplat), + m_Constant(Const)))) + return false; + + // Check that we're adding <0, 1, 2, 3... + if (auto *CDS = dyn_cast(Const)) { + for (unsigned i = 0; i < CDS->getNumElements(); ++i) { + if (CDS->getElementAsInteger(i) != i) + return false; + } + } else + return false; + + // The shuffle which broadcasts the index iv into a vector. + if (!match(BroadcastSplat, m_ShuffleVector(m_Instruction(Insert), m_Undef(), + m_Zero()))) + return false; + + // The insert element which initialises a vector with the index iv. + Instruction *IV = nullptr; + if (!match(Insert, m_InsertElement(m_Undef(), m_Instruction(IV), m_Zero()))) + return false; + + // The index iv. + auto *Phi = dyn_cast(IV); + if (!Phi) + return false; + + // TODO: Don't think we need to check the entry value. + Value *OnEntry = Phi->getIncomingValueForBlock(L->getLoopPreheader()); + if (!match(OnEntry, m_Zero())) + return false; + + Value *InLoop = Phi->getIncomingValueForBlock(L->getLoopLatch()); + unsigned Lanes = cast(Insert->getType())->getNumElements(); + + Instruction *LHS = nullptr; + if (!match(InLoop, m_Add(m_Instruction(LHS), m_SpecificInt(Lanes)))) + return false; + + return LHS == Phi; +} + +static VectorType* getVectorType(IntrinsicInst *I) { + unsigned TypeOp = I->getIntrinsicID() == Intrinsic::masked_load ? 0 : 1; + auto *PtrTy = cast(I->getOperand(TypeOp)->getType()); + return cast(PtrTy->getElementType()); +} + +bool MVETailPredication::IsPredicatedVectorLoop() { + // Check that the loop contains at least one masked load/store intrinsic. + // We only support 'normal' vector instructions - other than masked + // load/stores. + for (auto *BB : L->getBlocks()) { + for (auto &I : *BB) { + if (IsMasked(&I)) { + VectorType *VecTy = getVectorType(cast(&I)); + unsigned Lanes = VecTy->getNumElements(); + unsigned ElementWidth = VecTy->getScalarSizeInBits(); + // MVE vectors are 128-bit, but don't support 128 x i1. + // TODO: Can we support vectors larger than 128-bits? + unsigned MaxWidth = TTI->getRegisterBitWidth(true); + if (Lanes * ElementWidth != MaxWidth || Lanes == MaxWidth) + return false; + MaskedInsts.push_back(cast(&I)); + } else if (auto *Int = dyn_cast(&I)) { + for (auto &U : Int->args()) { + if (isa(U->getType())) + return false; + } + } + } + } + + return !MaskedInsts.empty(); +} + +Value* MVETailPredication::ComputeElements(Value *TripCount, + VectorType *VecTy) { + const SCEV *TripCountSE = SE->getSCEV(TripCount); + ConstantInt *VF = ConstantInt::get(cast(TripCount->getType()), + VecTy->getNumElements()); + + if (VF->equalsInt(1)) + return nullptr; + + // TODO: Support constant trip counts. + auto VisitAdd = [&](const SCEVAddExpr *S) -> const SCEVMulExpr* { + if (auto *Const = dyn_cast(S->getOperand(0))) { + if (Const->getAPInt() != -VF->getValue()) + return nullptr; + } else + return nullptr; + return dyn_cast(S->getOperand(1)); + }; + + auto VisitMul = [&](const SCEVMulExpr *S) -> const SCEVUDivExpr* { + if (auto *Const = dyn_cast(S->getOperand(0))) { + if (Const->getValue() != VF) + return nullptr; + } else + return nullptr; + return dyn_cast(S->getOperand(1)); + }; + + auto VisitDiv = [&](const SCEVUDivExpr *S) -> const SCEV* { + if (auto *Const = dyn_cast(S->getRHS())) { + if (Const->getValue() != VF) + return nullptr; + } else + return nullptr; + + if (auto *RoundUp = dyn_cast(S->getLHS())) { + if (auto *Const = dyn_cast(RoundUp->getOperand(0))) { + if (Const->getAPInt() != (VF->getValue() - 1)) + return nullptr; + } else + return nullptr; + + return RoundUp->getOperand(1); + } + return nullptr; + }; + + // TODO: Can we use SCEV helpers, such as findArrayDimensions, and friends to + // determine the numbers of elements instead? Looks like this is what is used + // for delinearization, but I'm not sure if it can be applied to the + // vectorized form - at least not without a bit more work than I feel + // comfortable with. + + // Search for Elems in the following SCEV: + // (1 + ((-VF + (VF * (((VF - 1) + %Elems) /u VF))) /u VF)) + const SCEV *Elems = nullptr; + if (auto *TC = dyn_cast(TripCountSE)) + if (auto *Div = dyn_cast(TC->getOperand(1))) + if (auto *Add = dyn_cast(Div->getLHS())) + if (auto *Mul = VisitAdd(Add)) + if (auto *Div = VisitMul(Mul)) + if (auto *Res = VisitDiv(Div)) + Elems = Res; + + if (!Elems) + return nullptr; + + Instruction *InsertPt = L->getLoopPreheader()->getTerminator(); + if (!isSafeToExpandAt(Elems, InsertPt, *SE)) + return nullptr; + + auto DL = L->getHeader()->getModule()->getDataLayout(); + SCEVExpander Expander(*SE, DL, "elements"); + return Expander.expandCodeFor(Elems, Elems->getType(), InsertPt); +} + +// Look through the exit block to see whether there's a duplicate predicate +// instruction. This can happen when we need to perform a select on values +// from the last and previous iteration. Instead of doing a straight +// replacement of that predicate with the vctp, clone the vctp and place it +// in the block. This means that the VPR doesn't have to be live into the +// exit block which should make it easier to convert this loop into a proper +// tail predicated loop. +static void Cleanup(DenseMap &NewPredicates, + SetVector &MaybeDead, Loop *L) { + if (BasicBlock *Exit = L->getUniqueExitBlock()) { + for (auto &Pair : NewPredicates) { + Instruction *OldPred = Pair.first; + Instruction *NewPred = Pair.second; + + for (auto &I : *Exit) { + if (I.isSameOperationAs(OldPred)) { + Instruction *PredClone = NewPred->clone(); + PredClone->insertBefore(&I); + I.replaceAllUsesWith(PredClone); + MaybeDead.insert(&I); + break; + } + } + } + } + + // Drop references and add operands to check for dead. + SmallPtrSet Dead; + while (!MaybeDead.empty()) { + auto *I = MaybeDead.front(); + MaybeDead.remove(I); + if (I->hasNUsesOrMore(1)) + continue; + + for (auto &U : I->operands()) { + if (auto *OpI = dyn_cast(U)) + MaybeDead.insert(OpI); + } + I->dropAllReferences(); + Dead.insert(I); + } + + for (auto *I : Dead) + I->eraseFromParent(); + + for (auto I : L->blocks()) + DeleteDeadPHIs(I); +} + +bool MVETailPredication::TryConvert(Value *TripCount) { + if (!IsPredicatedVectorLoop()) + return false; + + LLVM_DEBUG(dbgs() << "TP: Found predicated vector loop.\n"); + + // Walk through the masked intrinsics and try to find whether the predicate + // operand is generated from an induction variable. + Module *M = L->getHeader()->getModule(); + Type *Ty = IntegerType::get(M->getContext(), 32); + SetVector Predicates; + DenseMap NewPredicates; + + for (auto *I : MaskedInsts) { + Intrinsic::ID ID = I->getIntrinsicID(); + unsigned PredOp = ID == Intrinsic::masked_load ? 2 : 3; + auto *Predicate = dyn_cast(I->getArgOperand(PredOp)); + if (!Predicate || Predicates.count(Predicate)) + continue; + + VectorType *VecTy = getVectorType(I); + Value *NumElements = ComputeElements(TripCount, VecTy); + if (!NumElements) + continue; + + if (!isTailPredicate(Predicate, NumElements)) { + LLVM_DEBUG(dbgs() << "TP: Not tail predicate: " << *Predicate << "\n"); + continue; + } + + LLVM_DEBUG(dbgs() << "TP: Found tail predicate: " << *Predicate << "\n"); + Predicates.insert(Predicate); + + // Insert a phi to count the number of elements processed by the loop. + IRBuilder<> Builder(L->getHeader()->getFirstNonPHI()); + PHINode *Processed = Builder.CreatePHI(Ty, 2); + Processed->addIncoming(NumElements, L->getLoopPreheader()); + + // Insert the intrinsic to represent the effect of tail predication. + Builder.SetInsertPoint(cast(Predicate)); + ConstantInt *Factor = + ConstantInt::get(cast(Ty), VecTy->getNumElements()); + Intrinsic::ID VCTPID; + switch (VecTy->getNumElements()) { + default: + llvm_unreachable("unexpected number of lanes"); + case 2: VCTPID = Intrinsic::arm_vctp64; break; + case 4: VCTPID = Intrinsic::arm_vctp32; break; + case 8: VCTPID = Intrinsic::arm_vctp16; break; + case 16: VCTPID = Intrinsic::arm_vctp8; break; + } + Function *VCTP = Intrinsic::getDeclaration(M, VCTPID); + Value *TailPredicate = Builder.CreateCall(VCTP, Processed); + Predicate->replaceAllUsesWith(TailPredicate); + NewPredicates[Predicate] = cast(TailPredicate); + + // Add the incoming value to the new phi. + // TODO: This add likely already exists in the loop. + Value *Remaining = Builder.CreateSub(Processed, Factor); + Processed->addIncoming(Remaining, L->getLoopLatch()); + LLVM_DEBUG(dbgs() << "TP: Insert processed elements phi: " + << *Processed << "\n" + << "TP: Inserted VCTP: " << *TailPredicate << "\n"); + } + + // Now clean up. + Cleanup(NewPredicates, Predicates, L); + return true; +} + +Pass *llvm::createMVETailPredicationPass() { + return new MVETailPredication(); +} + +char MVETailPredication::ID = 0; + +INITIALIZE_PASS_BEGIN(MVETailPredication, DEBUG_TYPE, DESC, false, false) +INITIALIZE_PASS_END(MVETailPredication, DEBUG_TYPE, DESC, false, false) diff --git a/lib/Target/ARM/MVEVPTBlockPass.cpp b/lib/Target/ARM/MVEVPTBlockPass.cpp new file mode 100644 index 00000000000..bc0a80b177e --- /dev/null +++ b/lib/Target/ARM/MVEVPTBlockPass.cpp @@ -0,0 +1,278 @@ +//===-- MVEVPTBlockPass.cpp - Insert MVE VPT blocks -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "Thumb2InstrInfo.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "arm-mve-vpt" + +namespace { + class MVEVPTBlock : public MachineFunctionPass { + public: + static char ID; + const Thumb2InstrInfo *TII; + const TargetRegisterInfo *TRI; + + MVEVPTBlock() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &Fn) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + StringRef getPassName() const override { + return "MVE VPT block insertion pass"; + } + + private: + bool InsertVPTBlocks(MachineBasicBlock &MBB); + }; + + char MVEVPTBlock::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false) + +enum VPTMaskValue { + T = 8, // 0b1000 + TT = 4, // 0b0100 + TE = 12, // 0b1100 + TTT = 2, // 0b0010 + TTE = 6, // 0b0110 + TEE = 10, // 0b1010 + TET = 14, // 0b1110 + TTTT = 1, // 0b0001 + TTTE = 3, // 0b0011 + TTEE = 5, // 0b0101 + TTET = 7, // 0b0111 + TEEE = 9, // 0b1001 + TEET = 11, // 0b1011 + TETT = 13, // 0b1101 + TETE = 15 // 0b1111 +}; + +static unsigned VCMPOpcodeToVPT(unsigned Opcode) { + switch (Opcode) { + case ARM::MVE_VCMPf32: + return ARM::MVE_VPTv4f32; + case ARM::MVE_VCMPf16: + return ARM::MVE_VPTv8f16; + case ARM::MVE_VCMPi8: + return ARM::MVE_VPTv16i8; + case ARM::MVE_VCMPi16: + return ARM::MVE_VPTv8i16; + case ARM::MVE_VCMPi32: + return ARM::MVE_VPTv4i32; + case ARM::MVE_VCMPu8: + return ARM::MVE_VPTv16u8; + case ARM::MVE_VCMPu16: + return ARM::MVE_VPTv8u16; + case ARM::MVE_VCMPu32: + return ARM::MVE_VPTv4u32; + case ARM::MVE_VCMPs8: + return ARM::MVE_VPTv16s8; + case ARM::MVE_VCMPs16: + return ARM::MVE_VPTv8s16; + case ARM::MVE_VCMPs32: + return ARM::MVE_VPTv4s32; + + case ARM::MVE_VCMPf32r: + return ARM::MVE_VPTv4f32r; + case ARM::MVE_VCMPf16r: + return ARM::MVE_VPTv8f16r; + case ARM::MVE_VCMPi8r: + return ARM::MVE_VPTv16i8r; + case ARM::MVE_VCMPi16r: + return ARM::MVE_VPTv8i16r; + case ARM::MVE_VCMPi32r: + return ARM::MVE_VPTv4i32r; + case ARM::MVE_VCMPu8r: + return ARM::MVE_VPTv16u8r; + case ARM::MVE_VCMPu16r: + return ARM::MVE_VPTv8u16r; + case ARM::MVE_VCMPu32r: + return ARM::MVE_VPTv4u32r; + case ARM::MVE_VCMPs8r: + return ARM::MVE_VPTv16s8r; + case ARM::MVE_VCMPs16r: + return ARM::MVE_VPTv8s16r; + case ARM::MVE_VCMPs32r: + return ARM::MVE_VPTv4s32r; + + default: + return 0; + } +} + +static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI, + const TargetRegisterInfo *TRI, + unsigned &NewOpcode) { + // Search backwards to the instruction that defines VPR. This may or not + // be a VCMP, we check that after this loop. If we find another instruction + // that reads cpsr, we return nullptr. + MachineBasicBlock::iterator CmpMI = MI; + while (CmpMI != MI->getParent()->begin()) { + --CmpMI; + if (CmpMI->modifiesRegister(ARM::VPR, TRI)) + break; + if (CmpMI->readsRegister(ARM::VPR, TRI)) + break; + } + + if (CmpMI == MI) + return nullptr; + NewOpcode = VCMPOpcodeToVPT(CmpMI->getOpcode()); + if (NewOpcode == 0) + return nullptr; + + // Search forward from CmpMI to MI, checking if either register was def'd + if (registerDefinedBetween(CmpMI->getOperand(1).getReg(), std::next(CmpMI), + MI, TRI)) + return nullptr; + if (registerDefinedBetween(CmpMI->getOperand(2).getReg(), std::next(CmpMI), + MI, TRI)) + return nullptr; + return &*CmpMI; +} + +bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { + bool Modified = false; + MachineBasicBlock::instr_iterator MBIter = Block.instr_begin(); + MachineBasicBlock::instr_iterator EndIter = Block.instr_end(); + + while (MBIter != EndIter) { + MachineInstr *MI = &*MBIter; + unsigned PredReg = 0; + DebugLoc dl = MI->getDebugLoc(); + + ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg); + + // The idea of the predicate is that None, Then and Else are for use when + // handling assembly language: they correspond to the three possible + // suffixes "", "t" and "e" on the mnemonic. So when instructions are read + // from assembly source or disassembled from object code, you expect to see + // a mixture whenever there's a long VPT block. But in code generation, we + // hope we'll never generate an Else as input to this pass. + assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds"); + + if (Pred == ARMVCC::None) { + ++MBIter; + continue; + } + + LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump()); + int VPTInstCnt = 1; + ARMVCC::VPTCodes NextPred; + + // Look at subsequent instructions, checking if they can be in the same VPT + // block. + ++MBIter; + while (MBIter != EndIter && VPTInstCnt < 4) { + NextPred = getVPTInstrPredicate(*MBIter, PredReg); + assert(NextPred != ARMVCC::Else && + "VPT block pass does not expect Else preds"); + if (NextPred != Pred) + break; + LLVM_DEBUG(dbgs() << " adding : "; MBIter->dump()); + ++VPTInstCnt; + ++MBIter; + }; + + unsigned BlockMask = 0; + switch (VPTInstCnt) { + case 1: + BlockMask = VPTMaskValue::T; + break; + case 2: + BlockMask = VPTMaskValue::TT; + break; + case 3: + BlockMask = VPTMaskValue::TTT; + break; + case 4: + BlockMask = VPTMaskValue::TTTT; + break; + default: + llvm_unreachable("Unexpected number of instruction in a VPT block"); + }; + + // Search back for a VCMP that can be folded to create a VPT, or else create + // a VPST directly + MachineInstrBuilder MIBuilder; + unsigned NewOpcode; + MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode); + if (VCMP) { + LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump()); + MIBuilder = BuildMI(Block, MI, dl, TII->get(NewOpcode)); + MIBuilder.addImm(BlockMask); + MIBuilder.add(VCMP->getOperand(1)); + MIBuilder.add(VCMP->getOperand(2)); + MIBuilder.add(VCMP->getOperand(3)); + VCMP->eraseFromParent(); + } else { + MIBuilder = BuildMI(Block, MI, dl, TII->get(ARM::MVE_VPST)); + MIBuilder.addImm(BlockMask); + } + + finalizeBundle( + Block, MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), MBIter); + + Modified = true; + } + return Modified; +} + +bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) { + const ARMSubtarget &STI = + static_cast(Fn.getSubtarget()); + + if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) + return false; + + TII = static_cast(STI.getInstrInfo()); + TRI = STI.getRegisterInfo(); + + LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n" + << "********** Function: " << Fn.getName() << '\n'); + + bool Modified = false; + for (MachineBasicBlock &MBB : Fn) + Modified |= InsertVPTBlocks(MBB); + + LLVM_DEBUG(dbgs() << "**************************************\n"); + return Modified; +} + +/// createMVEVPTBlock - Returns an instance of the MVE VPT block +/// insertion pass. +FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); } diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 426e9a0ed9b..956d474f1d7 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -164,7 +164,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, // to determine the end of the prologue. DebugLoc dl; - unsigned FramePtr = RegInfo->getFrameRegister(MF); + Register FramePtr = RegInfo->getFrameRegister(MF); unsigned BasePtr = RegInfo->getBaseRegister(); int CFAOffset = 0; @@ -459,8 +459,8 @@ static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) { else if (MI.getOpcode() == ARM::tPOP) { return true; } else if (MI.getOpcode() == ARM::tMOVr) { - unsigned Dst = MI.getOperand(0).getReg(); - unsigned Src = MI.getOperand(1).getReg(); + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); return ((ARM::tGPRRegClass.contains(Src) || Src == ARM::LR) && ARM::hGPRRegClass.contains(Dst)); } @@ -483,7 +483,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, assert((unsigned)NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); - unsigned FramePtr = RegInfo->getFrameRegister(MF); + Register FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { if (NumBytes - ArgRegsSaveSize != 0) diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index f57d93a2e83..fccaa4c9cc8 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -80,12 +80,11 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { assert((RC == &ARM::tGPRRegClass || - (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - isARMLowRegister(SrcReg))) && "Unknown regclass!"); + (Register::isPhysicalRegister(SrcReg) && isARMLowRegister(SrcReg))) && + "Unknown regclass!"); if (RC == &ARM::tGPRRegClass || - (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - isARMLowRegister(SrcReg))) { + (Register::isPhysicalRegister(SrcReg) && isARMLowRegister(SrcReg))) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -108,13 +107,13 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert((RC->hasSuperClassEq(&ARM::tGPRRegClass) || - (TargetRegisterInfo::isPhysicalRegister(DestReg) && - isARMLowRegister(DestReg))) && "Unknown regclass!"); + assert( + (RC->hasSuperClassEq(&ARM::tGPRRegClass) || + (Register::isPhysicalRegister(DestReg) && isARMLowRegister(DestReg))) && + "Unknown regclass!"); if (RC->hasSuperClassEq(&ARM::tGPRRegClass) || - (TargetRegisterInfo::isPhysicalRegister(DestReg) && - isARMLowRegister(DestReg))) { + (Register::isPhysicalRegister(DestReg) && isARMLowRegister(DestReg))) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index 3143eb9840e..786fc78d023 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -87,7 +87,7 @@ static void TrackDefUses(MachineInstr *MI, RegisterSet &Defs, RegisterSet &Uses, for (auto &MO : MI->operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg || Reg == ARM::ITSTATE || Reg == ARM::SP) continue; if (MO.isUse()) @@ -145,8 +145,8 @@ Thumb2ITBlock::MoveCopyOutOfITBlock(MachineInstr *MI, MI->getOperand(1).getSubReg() == 0 && "Sub-register indices still around?"); - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned SrcReg = MI->getOperand(1).getReg(); + Register DstReg = MI->getOperand(0).getReg(); + Register SrcReg = MI->getOperand(1).getReg(); // First check if it's safe to move it. if (Uses.count(DstReg) || Defs.count(SrcReg)) @@ -308,131 +308,3 @@ bool Thumb2ITBlock::runOnMachineFunction(MachineFunction &Fn) { /// createThumb2ITBlockPass - Returns an instance of the Thumb2 IT blocks /// insertion pass. FunctionPass *llvm::createThumb2ITBlockPass() { return new Thumb2ITBlock(); } - -#undef DEBUG_TYPE -#define DEBUG_TYPE "arm-mve-vpt" - -namespace { - class MVEVPTBlock : public MachineFunctionPass { - public: - static char ID; - const Thumb2InstrInfo *TII; - const TargetRegisterInfo *TRI; - - MVEVPTBlock() : MachineFunctionPass(ID) {} - - bool runOnMachineFunction(MachineFunction &Fn) override; - - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); - } - - StringRef getPassName() const override { - return "MVE VPT block insertion pass"; - } - - private: - bool InsertVPTBlocks(MachineBasicBlock &MBB); - }; - - char MVEVPTBlock::ID = 0; - -} // end anonymous namespace - -INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false) - -enum VPTMaskValue { - T = 8, // 0b1000 - TT = 4, // 0b0100 - TE = 12, // 0b1100 - TTT = 2, // 0b0010 - TTE = 6, // 0b0110 - TEE = 10, // 0b1010 - TET = 14, // 0b1110 - TTTT = 1, // 0b0001 - TTTE = 3, // 0b0011 - TTEE = 5, // 0b0101 - TTET = 7, // 0b0111 - TEEE = 9, // 0b1001 - TEET = 11, // 0b1011 - TETT = 13, // 0b1101 - TETE = 15 // 0b1111 -}; - -bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { - bool Modified = false; - MachineBasicBlock::iterator MBIter = Block.begin(); - MachineBasicBlock::iterator EndIter = Block.end(); - - while (MBIter != EndIter) { - MachineInstr *MI = &*MBIter; - unsigned PredReg = 0; - DebugLoc dl = MI->getDebugLoc(); - - ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg); - - // The idea of the predicate is that None, Then and Else are for use when - // handling assembly language: they correspond to the three possible - // suffixes "", "t" and "e" on the mnemonic. So when instructions are read - // from assembly source or disassembled from object code, you expect to see - // a mixture whenever there's a long VPT block. But in code generation, we - // hope we'll never generate an Else as input to this pass. - - assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds"); - - if (Pred == ARMVCC::None) { - ++MBIter; - continue; - } - - MachineInstrBuilder MIBuilder = - BuildMI(Block, MBIter, dl, TII->get(ARM::MVE_VPST)); - // The mask value for the VPST instruction is T = 0b1000 = 8 - MIBuilder.addImm(VPTMaskValue::T); - - MachineBasicBlock::iterator VPSTInsertPos = MIBuilder.getInstr(); - int VPTInstCnt = 1; - ARMVCC::VPTCodes NextPred; - - do { - ++MBIter; - NextPred = getVPTInstrPredicate(*MBIter, PredReg); - } while (NextPred != ARMVCC::None && NextPred == Pred && ++VPTInstCnt < 4); - - MachineInstr *LastMI = &*MBIter; - finalizeBundle(Block, VPSTInsertPos.getInstrIterator(), - ++LastMI->getIterator()); - - Modified = true; - LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump();); - - ++MBIter; - } - return Modified; -} - -bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) { - const ARMSubtarget &STI = - static_cast(Fn.getSubtarget()); - - if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) - return false; - - TII = static_cast(STI.getInstrInfo()); - TRI = STI.getRegisterInfo(); - - LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n" - << "********** Function: " << Fn.getName() << '\n'); - - bool Modified = false; - for (MachineBasicBlock &MBB : Fn) - Modified |= InsertVPTBlocks(MBB); - - LLVM_DEBUG(dbgs() << "**************************************\n"); - return Modified; -} - -/// createMVEVPTBlock - Returns an instance of the MVE VPT block -/// insertion pass. -FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); } diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 5a965f7a6b9..af1f0aeb27b 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -159,9 +159,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for // gsub_0, but needs an extra constraint for gsub_1 (which could be sp // otherwise). - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (Register::isVirtualRegister(SrcReg)) { MachineRegisterInfo *MRI = &MF.getRegInfo(); - MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_GPRwithAPSRnospRegClass); + MRI->constrainRegClass(SrcReg, &ARM::GPRPairnospRegClass); } MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8)); @@ -200,10 +200,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for // gsub_0, but needs an extra constraint for gsub_1 (which could be sp // otherwise). - if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + if (Register::isVirtualRegister(DestReg)) { MachineRegisterInfo *MRI = &MF.getRegInfo(); - MRI->constrainRegClass(DestReg, - &ARM::GPRPair_with_gsub_1_in_GPRwithAPSRnospRegClass); + MRI->constrainRegClass(DestReg, &ARM::GPRPairnospRegClass); } MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8)); @@ -211,7 +210,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO).add(predOps(ARMCC::AL)); - if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + if (Register::isPhysicalRegister(DestReg)) MIB.addReg(DestReg, RegState::ImplicitDefine); return; } @@ -470,12 +469,17 @@ immediateOffsetOpcode(unsigned opcode) bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, - const ARMBaseInstrInfo &TII) { + const ARMBaseInstrInfo &TII, + const TargetRegisterInfo *TRI) { unsigned Opcode = MI.getOpcode(); const MCInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); bool isSub = false; + MachineFunction &MF = *MI.getParent()->getParent(); + const TargetRegisterClass *RegClass = + TII.getRegClass(Desc, FrameRegIdx, TRI, MF); + // Memory operands in inline assembly always use AddrModeT2_i12. if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR) AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2? @@ -554,7 +558,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // register then we change to an immediate version. unsigned NewOpc = Opcode; if (AddrMode == ARMII::AddrModeT2_so) { - unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg(); + Register OffsetReg = MI.getOperand(FrameRegIdx + 1).getReg(); if (OffsetReg != 0) { MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); return Offset == 0; @@ -645,10 +649,21 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, MachineOperand &ImmOp = MI.getOperand(FrameRegIdx+1); // Attempt to fold address computation - // Common case: small offset, fits into instruction. + // Common case: small offset, fits into instruction. We need to make sure + // the register class is correct too, for instructions like the MVE + // VLDRH.32, which only accepts low tGPR registers. int ImmedOffset = Offset / Scale; unsigned Mask = (1 << NumBits) - 1; - if ((unsigned)Offset <= Mask * Scale) { + if ((unsigned)Offset <= Mask * Scale && + (Register::isVirtualRegister(FrameReg) || + RegClass->contains(FrameReg))) { + if (Register::isVirtualRegister(FrameReg)) { + // Make sure the register class for the virtual register is correct + MachineRegisterInfo *MRI = &MF.getRegInfo(); + if (!MRI->constrainRegClass(FrameReg, RegClass)) + llvm_unreachable("Unable to constrain virtual register class."); + } + // Replace the FrameIndex with fp/sp MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); if (isSub) { @@ -681,7 +696,8 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, } Offset = (isSub) ? -Offset : Offset; - return Offset == 0; + return Offset == 0 && (Register::isVirtualRegister(FrameReg) || + RegClass->contains(FrameReg)); } ARMCC::CondCodes llvm::getITInstrPredicate(const MachineInstr &MI, diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 37a85fa3841..c5a62aa3399 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -300,7 +300,7 @@ Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) { for (const MachineOperand &MO : CPSRDef->operands()) { if (!MO.isReg() || MO.isUndef() || MO.isUse()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0 || Reg == ARM::CPSR) continue; Defs.insert(Reg); @@ -309,7 +309,7 @@ Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) { for (const MachineOperand &MO : Use->operands()) { if (!MO.isReg() || MO.isUndef() || MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Defs.count(Reg)) return false; } @@ -380,7 +380,7 @@ static bool VerifyLowRegs(MachineInstr *MI) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.isImplicit()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0 || Reg == ARM::CPSR) continue; if (isPCOk && Reg == ARM::PC) @@ -464,11 +464,11 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, // For this reason we can't reuse the logic at the end of this function; we // have to implement the MI building here. bool IsStore = Entry.WideOpc == ARM::t2STR_POST; - unsigned Rt = MI->getOperand(IsStore ? 1 : 0).getReg(); - unsigned Rn = MI->getOperand(IsStore ? 0 : 1).getReg(); + Register Rt = MI->getOperand(IsStore ? 1 : 0).getReg(); + Register Rn = MI->getOperand(IsStore ? 0 : 1).getReg(); unsigned Offset = MI->getOperand(3).getImm(); unsigned PredImm = MI->getOperand(4).getImm(); - unsigned PredReg = MI->getOperand(5).getReg(); + Register PredReg = MI->getOperand(5).getReg(); assert(isARMLowRegister(Rt)); assert(isARMLowRegister(Rn)); @@ -496,7 +496,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, return true; } case ARM::t2LDMIA: { - unsigned BaseReg = MI->getOperand(0).getReg(); + Register BaseReg = MI->getOperand(0).getReg(); assert(isARMLowRegister(BaseReg)); // For the non-writeback version (this one), the base register must be @@ -524,7 +524,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, break; case ARM::t2LDMIA_RET: { - unsigned BaseReg = MI->getOperand(1).getReg(); + Register BaseReg = MI->getOperand(1).getReg(); if (BaseReg != ARM::SP) return false; Opc = Entry.NarrowOpc2; // tPOP_RET @@ -537,7 +537,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, case ARM::t2STMDB_UPD: { OpNum = 0; - unsigned BaseReg = MI->getOperand(1).getReg(); + Register BaseReg = MI->getOperand(1).getReg(); if (BaseReg == ARM::SP && (Entry.WideOpc == ARM::t2LDMIA_UPD || Entry.WideOpc == ARM::t2STMDB_UPD)) { @@ -743,11 +743,11 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, // are optimizing for size. return false; - unsigned Reg0 = MI->getOperand(0).getReg(); - unsigned Reg1 = MI->getOperand(1).getReg(); + Register Reg0 = MI->getOperand(0).getReg(); + Register Reg1 = MI->getOperand(1).getReg(); // t2MUL is "special". The tied source operand is second, not first. if (MI->getOpcode() == ARM::t2MUL) { - unsigned Reg2 = MI->getOperand(2).getReg(); + Register Reg2 = MI->getOperand(2).getReg(); // Early exit if the regs aren't all low regs. if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1) || !isARMLowRegister(Reg2)) @@ -782,7 +782,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, if (Imm > Limit) return false; } else { - unsigned Reg2 = MI->getOperand(2).getReg(); + Register Reg2 = MI->getOperand(2).getReg(); if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) return false; } @@ -868,7 +868,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, continue; const MachineOperand &MO = MI->getOperand(i); if (MO.isReg()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg || Reg == ARM::CPSR) continue; if (Entry.LowRegs1 && !isARMLowRegister(Reg)) diff --git a/lib/Target/ARM/ThumbRegisterInfo.cpp b/lib/Target/ARM/ThumbRegisterInfo.cpp index a96417ffce4..b0ba58d8dc4 100644 --- a/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -107,8 +107,9 @@ void ThumbRegisterInfo::emitLoadConstPool( MachineFunction &MF = *MBB.getParent(); const ARMSubtarget &STI = MF.getSubtarget(); if (STI.isThumb1Only()) { - assert((isARMLowRegister(DestReg) || isVirtualRegister(DestReg)) && - "Thumb1 does not have ldr to high register"); + assert( + (isARMLowRegister(DestReg) || Register::isVirtualRegister(DestReg)) && + "Thumb1 does not have ldr to high register"); return emitThumb1LoadConstPool(MBB, MBBI, dl, DestReg, SubIdx, Val, Pred, PredReg, MIFlags); } @@ -141,7 +142,7 @@ static void emitThumbRegPlusImmInReg( unsigned LdReg = DestReg; if (DestReg == ARM::SP) assert(BaseReg == ARM::SP && "Unexpected!"); - if (!isARMLowRegister(DestReg) && !MRI.isVirtualRegister(DestReg)) + if (!isARMLowRegister(DestReg) && !Register::isVirtualRegister(DestReg)) LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) { @@ -371,7 +372,7 @@ bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II, if (Opcode == ARM::tADDframe) { Offset += MI.getOperand(FrameRegIdx+1).getImm(); - unsigned DestReg = MI.getOperand(0).getReg(); + Register DestReg = MI.getOperand(0).getReg(); emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII, *this); @@ -509,7 +510,7 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (MI.mayLoad()) { // Use the destination register to materialize sp + offset. - unsigned TmpReg = MI.getOperand(0).getReg(); + Register TmpReg = MI.getOperand(0).getReg(); bool UseRR = false; if (Opcode == ARM::tLDRspi) { if (FrameReg == ARM::SP || STI.genExecuteOnly()) diff --git a/lib/Target/AVR/AVRAsmPrinter.cpp b/lib/Target/AVR/AVRAsmPrinter.cpp index 7586bd7b78f..1db6b2236b4 100644 --- a/lib/Target/AVR/AVRAsmPrinter.cpp +++ b/lib/Target/AVR/AVRAsmPrinter.cpp @@ -97,7 +97,7 @@ bool AVRAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, assert(RegOp.isReg() && "Operand must be a register when you're" "using 'A'..'Z' operand extracodes."); - unsigned Reg = RegOp.getReg(); + Register Reg = RegOp.getReg(); unsigned ByteNumber = ExtraCode[0] - 'A'; diff --git a/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/lib/Target/AVR/AVRExpandPseudoInsts.cpp index c45b2d0e39c..83d0f684533 100644 --- a/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -140,8 +140,8 @@ bool AVRExpandPseudo:: expandArith(unsigned OpLo, unsigned OpHi, Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; unsigned SrcLoReg, SrcHiReg, DstLoReg, DstHiReg; - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(2).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(2).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); bool DstIsKill = MI.getOperand(1).isKill(); bool SrcIsKill = MI.getOperand(2).isKill(); @@ -173,8 +173,8 @@ bool AVRExpandPseudo:: expandLogic(unsigned Op, Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; unsigned SrcLoReg, SrcHiReg, DstLoReg, DstHiReg; - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(2).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(2).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); bool DstIsKill = MI.getOperand(1).isKill(); bool SrcIsKill = MI.getOperand(2).isKill(); @@ -220,7 +220,7 @@ bool AVRExpandPseudo:: expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; unsigned DstLoReg, DstHiReg; - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); bool SrcIsKill = MI.getOperand(1).isKill(); bool ImpIsDead = MI.getOperand(3).isDead(); @@ -874,7 +874,7 @@ unsigned AVRExpandPseudo::scavengeGPR8(MachineInstr &MI) { // Exclude all the registers being used by the instruction. for (MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.getReg() != 0 && !MO.isDef() && - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + !Register::isVirtualRegister(MO.getReg())) Candidates.reset(MO.getReg()); } diff --git a/lib/Target/AVR/AVRFrameLowering.cpp b/lib/Target/AVR/AVRFrameLowering.cpp index 5e91bb8632c..e6c48de5a78 100644 --- a/lib/Target/AVR/AVRFrameLowering.cpp +++ b/lib/Target/AVR/AVRFrameLowering.cpp @@ -30,7 +30,8 @@ namespace llvm { AVRFrameLowering::AVRFrameLowering() - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 1, -2) {} + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align::None(), + -2) {} bool AVRFrameLowering::canSimplifyCallFramePseudos( const MachineFunction &MF) const { @@ -323,7 +324,7 @@ static void fixStackStores(MachineBasicBlock &MBB, "Invalid register, should be SP!"); if (insertPushes) { // Replace this instruction with a push. - unsigned SrcReg = MI.getOperand(2).getReg(); + Register SrcReg = MI.getOperand(2).getReg(); bool SrcIsKill = MI.getOperand(2).isKill(); // We can't use PUSHWRr here because when expanded the order of the new diff --git a/lib/Target/AVR/AVRISelDAGToDAG.cpp b/lib/Target/AVR/AVRISelDAGToDAG.cpp index 5cb4441c438..4c4f4faa050 100644 --- a/lib/Target/AVR/AVRISelDAGToDAG.cpp +++ b/lib/Target/AVR/AVRISelDAGToDAG.cpp @@ -251,7 +251,7 @@ bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, RegisterSDNode *RegNode = cast(CopyFromRegOp->getOperand(1)); Reg = RegNode->getReg(); - CanHandleRegImmOpt &= (TargetRegisterInfo::isVirtualRegister(Reg) || + CanHandleRegImmOpt &= (Register::isVirtualRegister(Reg) || AVR::PTRDISPREGSRegClass.contains(Reg)); } else { CanHandleRegImmOpt = false; diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp index b6ba5f22faf..f12c59b7d8c 100644 --- a/lib/Target/AVR/AVRISelLowering.cpp +++ b/lib/Target/AVR/AVRISelLowering.cpp @@ -236,7 +236,7 @@ AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM, setLibcallName(RTLIB::SIN_F32, "sin"); setLibcallName(RTLIB::COS_F32, "cos"); - setMinFunctionAlignment(1); + setMinFunctionAlignment(Align(2)); setMinimumJumpTableEntries(UINT_MAX); } @@ -1517,11 +1517,11 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI, unsigned ShiftAmtReg = RI.createVirtualRegister(&AVR::LD8RegClass); unsigned ShiftAmtReg2 = RI.createVirtualRegister(&AVR::LD8RegClass); - unsigned ShiftReg = RI.createVirtualRegister(RC); - unsigned ShiftReg2 = RI.createVirtualRegister(RC); - unsigned ShiftAmtSrcReg = MI.getOperand(2).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register ShiftReg = RI.createVirtualRegister(RC); + Register ShiftReg2 = RI.createVirtualRegister(RC); + Register ShiftAmtSrcReg = MI.getOperand(2).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); // BB: // cpi N, 0 @@ -1568,7 +1568,7 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI, static bool isCopyMulResult(MachineBasicBlock::iterator const &I) { if (I->getOpcode() == AVR::COPY) { - unsigned SrcReg = I->getOperand(1).getReg(); + Register SrcReg = I->getOperand(1).getReg(); return (SrcReg == AVR::R0 || SrcReg == AVR::R1); } @@ -1689,6 +1689,8 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { // See http://www.nongnu.org/avr-libc/user-manual/inline_asm.html switch (Constraint[0]) { + default: + break; case 'a': // Simple upper registers case 'b': // Base pointer registers pairs case 'd': // Upper register @@ -1715,9 +1717,7 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const { case 'O': // Integer constant (Range: 8, 16, 24) case 'P': // Integer constant (Range: 1) case 'R': // Integer constant (Range: -6 to 5)x - return C_Other; - default: - break; + return C_Immediate; } } @@ -2006,10 +2006,9 @@ void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } -unsigned AVRTargetLowering::getRegisterByName(const char *RegName, - EVT VT, - SelectionDAG &DAG) const { - unsigned Reg; +Register AVRTargetLowering::getRegisterByName(const char *RegName, EVT VT, + const MachineFunction &MF) const { + Register Reg; if (VT == MVT::i8) { Reg = StringSwitch(RegName) diff --git a/lib/Target/AVR/AVRISelLowering.h b/lib/Target/AVR/AVRISelLowering.h index ed2d0835903..6c722fa5414 100644 --- a/lib/Target/AVR/AVRISelLowering.h +++ b/lib/Target/AVR/AVRISelLowering.h @@ -125,8 +125,8 @@ public: std::vector &Ops, SelectionDAG &DAG) const override; - unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const override; bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const override { diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp index a6b36f80485..8fce05c933b 100644 --- a/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/lib/Target/AVR/AVRRegisterInfo.cpp @@ -158,7 +158,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // We need to materialize the offset via an add instruction. unsigned Opcode; - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); assert(DstReg != AVR::R29R28 && "Dest reg cannot be the frame pointer"); II++; // Skip over the FRMIDX (and now MOVW) instruction. diff --git a/lib/Target/AVR/AVRTargetMachine.cpp b/lib/Target/AVR/AVRTargetMachine.cpp index a36c8b0f964..25304280d00 100644 --- a/lib/Target/AVR/AVRTargetMachine.cpp +++ b/lib/Target/AVR/AVRTargetMachine.cpp @@ -50,7 +50,7 @@ AVRTargetMachine::AVRTargetMachine(const Target &T, const Triple &TT, getEffectiveRelocModel(RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), SubTarget(TT, getCPU(CPU), FS, *this) { - this->TLOF = make_unique(); + this->TLOF = std::make_unique(); initAsmInfo(); } diff --git a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp index aac5644711e..af60bc4fdc9 100644 --- a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp +++ b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp @@ -199,22 +199,22 @@ public: } static std::unique_ptr CreateToken(StringRef Str, SMLoc S) { - return make_unique(Str, S); + return std::make_unique(Str, S); } static std::unique_ptr CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { - return make_unique(RegNum, S, E); + return std::make_unique(RegNum, S, E); } static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { - return make_unique(Val, S, E); + return std::make_unique(Val, S, E); } static std::unique_ptr CreateMemri(unsigned RegNum, const MCExpr *Val, SMLoc S, SMLoc E) { - return make_unique(RegNum, Val, S, E); + return std::make_unique(RegNum, Val, S, E); } void makeToken(StringRef Token) { diff --git a/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp b/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp index 6025e4b2437..1c69fea5962 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp @@ -152,7 +152,7 @@ unsigned AVRELFObjectWriter::getRelocType(MCContext &Ctx, } std::unique_ptr createAVRELFObjectWriter(uint8_t OSABI) { - return make_unique(OSABI); + return std::make_unique(OSABI); } } // end of namespace llvm diff --git a/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/lib/Target/BPF/AsmParser/BPFAsmParser.cpp index 75885fd058a..ce1d2ecd9d2 100644 --- a/lib/Target/BPF/AsmParser/BPFAsmParser.cpp +++ b/lib/Target/BPF/AsmParser/BPFAsmParser.cpp @@ -194,7 +194,7 @@ public: } static std::unique_ptr createToken(StringRef Str, SMLoc S) { - auto Op = make_unique(Token); + auto Op = std::make_unique(Token); Op->Tok = Str; Op->StartLoc = S; Op->EndLoc = S; @@ -203,7 +203,7 @@ public: static std::unique_ptr createReg(unsigned RegNo, SMLoc S, SMLoc E) { - auto Op = make_unique(Register); + auto Op = std::make_unique(Register); Op->Reg.RegNum = RegNo; Op->StartLoc = S; Op->EndLoc = E; @@ -212,7 +212,7 @@ public: static std::unique_ptr createImm(const MCExpr *Val, SMLoc S, SMLoc E) { - auto Op = make_unique(Immediate); + auto Op = std::make_unique(Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; diff --git a/lib/Target/BPF/BPF.h b/lib/Target/BPF/BPF.h index d311fc15409..6e4f35f4c5d 100644 --- a/lib/Target/BPF/BPF.h +++ b/lib/Target/BPF/BPF.h @@ -15,17 +15,19 @@ namespace llvm { class BPFTargetMachine; -ModulePass *createBPFAbstractMemberAccess(); +ModulePass *createBPFAbstractMemberAccess(BPFTargetMachine *TM); FunctionPass *createBPFISelDag(BPFTargetMachine &TM); FunctionPass *createBPFMISimplifyPatchablePass(); FunctionPass *createBPFMIPeepholePass(); +FunctionPass *createBPFMIPeepholeTruncElimPass(); FunctionPass *createBPFMIPreEmitPeepholePass(); FunctionPass *createBPFMIPreEmitCheckingPass(); void initializeBPFAbstractMemberAccessPass(PassRegistry&); void initializeBPFMISimplifyPatchablePass(PassRegistry&); void initializeBPFMIPeepholePass(PassRegistry&); +void initializeBPFMIPeepholeTruncElimPass(PassRegistry&); void initializeBPFMIPreEmitPeepholePass(PassRegistry&); void initializeBPFMIPreEmitCheckingPass(PassRegistry&); } diff --git a/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/lib/Target/BPF/BPFAbstractMemberAccess.cpp index 51d4cbc8a42..400701c4e5c 100644 --- a/lib/Target/BPF/BPFAbstractMemberAccess.cpp +++ b/lib/Target/BPF/BPFAbstractMemberAccess.cpp @@ -50,6 +50,28 @@ // addr = preserve_struct_access_index(base, gep_index, di_index) // !llvm.preserve.access.index // +// Bitfield member access needs special attention. User cannot take the +// address of a bitfield acceess. To facilitate kernel verifier +// for easy bitfield code optimization, a new clang intrinsic is introduced: +// uint32_t __builtin_preserve_field_info(member_access, info_kind) +// In IR, a chain with two (or more) intrinsic calls will be generated: +// ... +// addr = preserve_struct_access_index(base, 1, 1) !struct s +// uint32_t result = bpf_preserve_field_info(addr, info_kind) +// +// Suppose the info_kind is FIELD_SIGNEDNESS, +// The above two IR intrinsics will be replaced with +// a relocatable insn: +// signness = /* signness of member_access */ +// and signness can be changed by bpf loader based on the +// types on the host. +// +// User can also test whether a field exists or not with +// uint32_t result = bpf_preserve_field_info(member_access, FIELD_EXISTENCE) +// The field will be always available (result = 1) during initial +// compilation, but bpf loader can patch with the correct value +// on the target host where the member_access may or may not be available +// //===----------------------------------------------------------------------===// #include "BPF.h" @@ -65,13 +87,12 @@ #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include #define DEBUG_TYPE "bpf-abstract-member-access" namespace llvm { const std::string BPFCoreSharedInfo::AmaAttr = "btf_ama"; -const std::string BPFCoreSharedInfo::PatchableExtSecName = - ".BPF.patchable_externs"; } // namespace llvm using namespace llvm; @@ -87,40 +108,62 @@ class BPFAbstractMemberAccess final : public ModulePass { public: static char ID; - BPFAbstractMemberAccess() : ModulePass(ID) {} + TargetMachine *TM; + // Add optional BPFTargetMachine parameter so that BPF backend can add the phase + // with target machine to find out the endianness. The default constructor (without + // parameters) is used by the pass manager for managing purposes. + BPFAbstractMemberAccess(BPFTargetMachine *TM = nullptr) : ModulePass(ID), TM(TM) {} + + struct CallInfo { + uint32_t Kind; + uint32_t AccessIndex; + MDNode *Metadata; + Value *Base; + }; + typedef std::stack> CallInfoStack; private: enum : uint32_t { BPFPreserveArrayAI = 1, BPFPreserveUnionAI = 2, BPFPreserveStructAI = 3, + BPFPreserveFieldInfoAI = 4, }; std::map GEPGlobals; // A map to link preserve_*_access_index instrinsic calls. - std::map> AIChain; + std::map> AIChain; // A map to hold all the base preserve_*_access_index instrinsic calls. - // The base call is not an input of any other preserve_*_access_index + // The base call is not an input of any other preserve_* // intrinsics. - std::map BaseAICalls; + std::map BaseAICalls; bool doTransformation(Module &M); - void traceAICall(CallInst *Call, uint32_t Kind); - void traceBitCast(BitCastInst *BitCast, CallInst *Parent, uint32_t Kind); - void traceGEP(GetElementPtrInst *GEP, CallInst *Parent, uint32_t Kind); + void traceAICall(CallInst *Call, CallInfo &ParentInfo); + void traceBitCast(BitCastInst *BitCast, CallInst *Parent, + CallInfo &ParentInfo); + void traceGEP(GetElementPtrInst *GEP, CallInst *Parent, + CallInfo &ParentInfo); void collectAICallChains(Module &M, Function &F); - bool IsPreserveDIAccessIndexCall(const CallInst *Call, uint32_t &Kind); + bool IsPreserveDIAccessIndexCall(const CallInst *Call, CallInfo &Cinfo); + bool IsValidAIChain(const MDNode *ParentMeta, uint32_t ParentAI, + const MDNode *ChildMeta); bool removePreserveAccessIndexIntrinsic(Module &M); void replaceWithGEP(std::vector &CallList, uint32_t NumOfZerosIndex, uint32_t DIIndex); + bool HasPreserveFieldInfoCall(CallInfoStack &CallStack); + void GetStorageBitRange(DICompositeType *CTy, DIDerivedType *MemberTy, + uint32_t AccessIndex, uint32_t &StartBitOffset, + uint32_t &EndBitOffset); + uint32_t GetFieldInfo(uint32_t InfoKind, DICompositeType *CTy, + uint32_t AccessIndex, uint32_t PatchImm); - Value *computeBaseAndAccessStr(CallInst *Call, std::string &AccessStr, - std::string &AccessKey, uint32_t Kind, - MDNode *&TypeMeta); - bool getAccessIndex(const Value *IndexValue, uint64_t &AccessIndex); - bool transformGEPChain(Module &M, CallInst *Call, uint32_t Kind); + Value *computeBaseAndAccessKey(CallInst *Call, CallInfo &CInfo, + std::string &AccessKey, MDNode *&BaseMeta); + uint64_t getConstant(const Value *IndexValue); + bool transformGEPChain(Module &M, CallInst *Call, CallInfo &CInfo); }; } // End anonymous namespace @@ -128,23 +171,65 @@ char BPFAbstractMemberAccess::ID = 0; INITIALIZE_PASS(BPFAbstractMemberAccess, DEBUG_TYPE, "abstracting struct/union member accessees", false, false) -ModulePass *llvm::createBPFAbstractMemberAccess() { - return new BPFAbstractMemberAccess(); +ModulePass *llvm::createBPFAbstractMemberAccess(BPFTargetMachine *TM) { + return new BPFAbstractMemberAccess(TM); } bool BPFAbstractMemberAccess::runOnModule(Module &M) { LLVM_DEBUG(dbgs() << "********** Abstract Member Accesses **********\n"); // Bail out if no debug info. - if (empty(M.debug_compile_units())) + if (M.debug_compile_units().empty()) return false; return doTransformation(M); } +static bool SkipDIDerivedTag(unsigned Tag) { + if (Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type && + Tag != dwarf::DW_TAG_volatile_type && + Tag != dwarf::DW_TAG_restrict_type && + Tag != dwarf::DW_TAG_member) + return false; + return true; +} + +static DIType * stripQualifiers(DIType *Ty) { + while (auto *DTy = dyn_cast(Ty)) { + if (!SkipDIDerivedTag(DTy->getTag())) + break; + Ty = DTy->getBaseType(); + } + return Ty; +} + +static const DIType * stripQualifiers(const DIType *Ty) { + while (auto *DTy = dyn_cast(Ty)) { + if (!SkipDIDerivedTag(DTy->getTag())) + break; + Ty = DTy->getBaseType(); + } + return Ty; +} + +static uint32_t calcArraySize(const DICompositeType *CTy, uint32_t StartDim) { + DINodeArray Elements = CTy->getElements(); + uint32_t DimSize = 1; + for (uint32_t I = StartDim; I < Elements.size(); ++I) { + if (auto *Element = dyn_cast_or_null(Elements[I])) + if (Element->getTag() == dwarf::DW_TAG_subrange_type) { + const DISubrange *SR = cast(Element); + auto *CI = SR->getCount().dyn_cast(); + DimSize *= CI->getSExtValue(); + } + } + + return DimSize; +} + /// Check whether a call is a preserve_*_access_index intrinsic call or not. bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call, - uint32_t &Kind) { + CallInfo &CInfo) { if (!Call) return false; @@ -152,15 +237,40 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call, if (!GV) return false; if (GV->getName().startswith("llvm.preserve.array.access.index")) { - Kind = BPFPreserveArrayAI; + CInfo.Kind = BPFPreserveArrayAI; + CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index); + if (!CInfo.Metadata) + report_fatal_error("Missing metadata for llvm.preserve.array.access.index intrinsic"); + CInfo.AccessIndex = getConstant(Call->getArgOperand(2)); + CInfo.Base = Call->getArgOperand(0); return true; } if (GV->getName().startswith("llvm.preserve.union.access.index")) { - Kind = BPFPreserveUnionAI; + CInfo.Kind = BPFPreserveUnionAI; + CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index); + if (!CInfo.Metadata) + report_fatal_error("Missing metadata for llvm.preserve.union.access.index intrinsic"); + CInfo.AccessIndex = getConstant(Call->getArgOperand(1)); + CInfo.Base = Call->getArgOperand(0); return true; } if (GV->getName().startswith("llvm.preserve.struct.access.index")) { - Kind = BPFPreserveStructAI; + CInfo.Kind = BPFPreserveStructAI; + CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index); + if (!CInfo.Metadata) + report_fatal_error("Missing metadata for llvm.preserve.struct.access.index intrinsic"); + CInfo.AccessIndex = getConstant(Call->getArgOperand(2)); + CInfo.Base = Call->getArgOperand(0); + return true; + } + if (GV->getName().startswith("llvm.bpf.preserve.field.info")) { + CInfo.Kind = BPFPreserveFieldInfoAI; + CInfo.Metadata = nullptr; + // Check validity of info_kind as clang did not check this. + uint64_t InfoKind = getConstant(Call->getArgOperand(1)); + if (InfoKind >= BPFCoreSharedInfo::MAX_FIELD_RELOC_KIND) + report_fatal_error("Incorrect info_kind for llvm.bpf.preserve.field.info intrinsic"); + CInfo.AccessIndex = InfoKind; return true; } @@ -173,8 +283,7 @@ void BPFAbstractMemberAccess::replaceWithGEP(std::vector &CallList, for (auto Call : CallList) { uint32_t Dimension = 1; if (DimensionIndex > 0) - Dimension = cast(Call->getArgOperand(DimensionIndex)) - ->getZExtValue(); + Dimension = getConstant(Call->getArgOperand(DimensionIndex)); Constant *Zero = ConstantInt::get(Type::getInt32Ty(Call->getParent()->getContext()), 0); @@ -200,14 +309,14 @@ bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Module &M) { for (auto &BB : F) for (auto &I : BB) { auto *Call = dyn_cast(&I); - uint32_t Kind; - if (!IsPreserveDIAccessIndexCall(Call, Kind)) + CallInfo CInfo; + if (!IsPreserveDIAccessIndexCall(Call, CInfo)) continue; Found = true; - if (Kind == BPFPreserveArrayAI) + if (CInfo.Kind == BPFPreserveArrayAI) PreserveArrayIndexCalls.push_back(Call); - else if (Kind == BPFPreserveUnionAI) + else if (CInfo.Kind == BPFPreserveUnionAI) PreserveUnionIndexCalls.push_back(Call); else PreserveStructIndexCalls.push_back(Call); @@ -233,79 +342,146 @@ bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Module &M) { return Found; } -void BPFAbstractMemberAccess::traceAICall(CallInst *Call, uint32_t Kind) { +/// Check whether the access index chain is valid. We check +/// here because there may be type casts between two +/// access indexes. We want to ensure memory access still valid. +bool BPFAbstractMemberAccess::IsValidAIChain(const MDNode *ParentType, + uint32_t ParentAI, + const MDNode *ChildType) { + if (!ChildType) + return true; // preserve_field_info, no type comparison needed. + + const DIType *PType = stripQualifiers(cast(ParentType)); + const DIType *CType = stripQualifiers(cast(ChildType)); + + // Child is a derived/pointer type, which is due to type casting. + // Pointer type cannot be in the middle of chain. + if (isa(CType)) + return false; + + // Parent is a pointer type. + if (const auto *PtrTy = dyn_cast(PType)) { + if (PtrTy->getTag() != dwarf::DW_TAG_pointer_type) + return false; + return stripQualifiers(PtrTy->getBaseType()) == CType; + } + + // Otherwise, struct/union/array types + const auto *PTy = dyn_cast(PType); + const auto *CTy = dyn_cast(CType); + assert(PTy && CTy && "ParentType or ChildType is null or not composite"); + + uint32_t PTyTag = PTy->getTag(); + assert(PTyTag == dwarf::DW_TAG_array_type || + PTyTag == dwarf::DW_TAG_structure_type || + PTyTag == dwarf::DW_TAG_union_type); + + uint32_t CTyTag = CTy->getTag(); + assert(CTyTag == dwarf::DW_TAG_array_type || + CTyTag == dwarf::DW_TAG_structure_type || + CTyTag == dwarf::DW_TAG_union_type); + + // Multi dimensional arrays, base element should be the same + if (PTyTag == dwarf::DW_TAG_array_type && PTyTag == CTyTag) + return PTy->getBaseType() == CTy->getBaseType(); + + DIType *Ty; + if (PTyTag == dwarf::DW_TAG_array_type) + Ty = PTy->getBaseType(); + else + Ty = dyn_cast(PTy->getElements()[ParentAI]); + + return dyn_cast(stripQualifiers(Ty)) == CTy; +} + +void BPFAbstractMemberAccess::traceAICall(CallInst *Call, + CallInfo &ParentInfo) { for (User *U : Call->users()) { Instruction *Inst = dyn_cast(U); if (!Inst) continue; if (auto *BI = dyn_cast(Inst)) { - traceBitCast(BI, Call, Kind); + traceBitCast(BI, Call, ParentInfo); } else if (auto *CI = dyn_cast(Inst)) { - uint32_t CIKind; - if (IsPreserveDIAccessIndexCall(CI, CIKind)) { - AIChain[CI] = std::make_pair(Call, Kind); - traceAICall(CI, CIKind); + CallInfo ChildInfo; + + if (IsPreserveDIAccessIndexCall(CI, ChildInfo) && + IsValidAIChain(ParentInfo.Metadata, ParentInfo.AccessIndex, + ChildInfo.Metadata)) { + AIChain[CI] = std::make_pair(Call, ParentInfo); + traceAICall(CI, ChildInfo); } else { - BaseAICalls[Call] = Kind; + BaseAICalls[Call] = ParentInfo; } } else if (auto *GI = dyn_cast(Inst)) { if (GI->hasAllZeroIndices()) - traceGEP(GI, Call, Kind); + traceGEP(GI, Call, ParentInfo); else - BaseAICalls[Call] = Kind; + BaseAICalls[Call] = ParentInfo; + } else { + BaseAICalls[Call] = ParentInfo; } } } void BPFAbstractMemberAccess::traceBitCast(BitCastInst *BitCast, - CallInst *Parent, uint32_t Kind) { + CallInst *Parent, + CallInfo &ParentInfo) { for (User *U : BitCast->users()) { Instruction *Inst = dyn_cast(U); if (!Inst) continue; if (auto *BI = dyn_cast(Inst)) { - traceBitCast(BI, Parent, Kind); + traceBitCast(BI, Parent, ParentInfo); } else if (auto *CI = dyn_cast(Inst)) { - uint32_t CIKind; - if (IsPreserveDIAccessIndexCall(CI, CIKind)) { - AIChain[CI] = std::make_pair(Parent, Kind); - traceAICall(CI, CIKind); + CallInfo ChildInfo; + if (IsPreserveDIAccessIndexCall(CI, ChildInfo) && + IsValidAIChain(ParentInfo.Metadata, ParentInfo.AccessIndex, + ChildInfo.Metadata)) { + AIChain[CI] = std::make_pair(Parent, ParentInfo); + traceAICall(CI, ChildInfo); } else { - BaseAICalls[Parent] = Kind; + BaseAICalls[Parent] = ParentInfo; } } else if (auto *GI = dyn_cast(Inst)) { if (GI->hasAllZeroIndices()) - traceGEP(GI, Parent, Kind); + traceGEP(GI, Parent, ParentInfo); else - BaseAICalls[Parent] = Kind; + BaseAICalls[Parent] = ParentInfo; + } else { + BaseAICalls[Parent] = ParentInfo; } } } void BPFAbstractMemberAccess::traceGEP(GetElementPtrInst *GEP, CallInst *Parent, - uint32_t Kind) { + CallInfo &ParentInfo) { for (User *U : GEP->users()) { Instruction *Inst = dyn_cast(U); if (!Inst) continue; if (auto *BI = dyn_cast(Inst)) { - traceBitCast(BI, Parent, Kind); + traceBitCast(BI, Parent, ParentInfo); } else if (auto *CI = dyn_cast(Inst)) { - uint32_t CIKind; - if (IsPreserveDIAccessIndexCall(CI, CIKind)) { - AIChain[CI] = std::make_pair(Parent, Kind); - traceAICall(CI, CIKind); + CallInfo ChildInfo; + if (IsPreserveDIAccessIndexCall(CI, ChildInfo) && + IsValidAIChain(ParentInfo.Metadata, ParentInfo.AccessIndex, + ChildInfo.Metadata)) { + AIChain[CI] = std::make_pair(Parent, ParentInfo); + traceAICall(CI, ChildInfo); } else { - BaseAICalls[Parent] = Kind; + BaseAICalls[Parent] = ParentInfo; } } else if (auto *GI = dyn_cast(Inst)) { if (GI->hasAllZeroIndices()) - traceGEP(GI, Parent, Kind); + traceGEP(GI, Parent, ParentInfo); else - BaseAICalls[Parent] = Kind; + BaseAICalls[Parent] = ParentInfo; + } else { + BaseAICalls[Parent] = ParentInfo; } } } @@ -316,92 +492,345 @@ void BPFAbstractMemberAccess::collectAICallChains(Module &M, Function &F) { for (auto &BB : F) for (auto &I : BB) { - uint32_t Kind; + CallInfo CInfo; auto *Call = dyn_cast(&I); - if (!IsPreserveDIAccessIndexCall(Call, Kind) || + if (!IsPreserveDIAccessIndexCall(Call, CInfo) || AIChain.find(Call) != AIChain.end()) continue; - traceAICall(Call, Kind); + traceAICall(Call, CInfo); } } -/// Get access index from the preserve_*_access_index intrinsic calls. -bool BPFAbstractMemberAccess::getAccessIndex(const Value *IndexValue, - uint64_t &AccessIndex) { +uint64_t BPFAbstractMemberAccess::getConstant(const Value *IndexValue) { const ConstantInt *CV = dyn_cast(IndexValue); - if (!CV) - return false; - - AccessIndex = CV->getValue().getZExtValue(); - return true; + assert(CV); + return CV->getValue().getZExtValue(); } -/// Compute the base of the whole preserve_*_access_index chains, i.e., the base +/// Get the start and the end of storage offset for \p MemberTy. +/// The storage bits are corresponding to the LLVM internal types, +/// and the storage bits for the member determines what load width +/// to use in order to extract the bitfield value. +void BPFAbstractMemberAccess::GetStorageBitRange(DICompositeType *CTy, + DIDerivedType *MemberTy, + uint32_t AccessIndex, + uint32_t &StartBitOffset, + uint32_t &EndBitOffset) { + auto SOff = dyn_cast(MemberTy->getStorageOffsetInBits()); + assert(SOff); + StartBitOffset = SOff->getZExtValue(); + + EndBitOffset = CTy->getSizeInBits(); + uint32_t Index = AccessIndex + 1; + for (; Index < CTy->getElements().size(); ++Index) { + auto Member = cast(CTy->getElements()[Index]); + if (!Member->getStorageOffsetInBits()) { + EndBitOffset = Member->getOffsetInBits(); + break; + } + SOff = dyn_cast(Member->getStorageOffsetInBits()); + assert(SOff); + unsigned BitOffset = SOff->getZExtValue(); + if (BitOffset != StartBitOffset) { + EndBitOffset = BitOffset; + break; + } + } +} + +uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind, + DICompositeType *CTy, + uint32_t AccessIndex, + uint32_t PatchImm) { + if (InfoKind == BPFCoreSharedInfo::FIELD_EXISTENCE) + return 1; + + uint32_t Tag = CTy->getTag(); + if (InfoKind == BPFCoreSharedInfo::FIELD_BYTE_OFFSET) { + if (Tag == dwarf::DW_TAG_array_type) { + auto *EltTy = stripQualifiers(CTy->getBaseType()); + PatchImm += AccessIndex * calcArraySize(CTy, 1) * + (EltTy->getSizeInBits() >> 3); + } else if (Tag == dwarf::DW_TAG_structure_type) { + auto *MemberTy = cast(CTy->getElements()[AccessIndex]); + if (!MemberTy->isBitField()) { + PatchImm += MemberTy->getOffsetInBits() >> 3; + } else { + auto SOffset = dyn_cast(MemberTy->getStorageOffsetInBits()); + assert(SOffset); + PatchImm += SOffset->getZExtValue() >> 3; + } + } + return PatchImm; + } + + if (InfoKind == BPFCoreSharedInfo::FIELD_BYTE_SIZE) { + if (Tag == dwarf::DW_TAG_array_type) { + auto *EltTy = stripQualifiers(CTy->getBaseType()); + return calcArraySize(CTy, 1) * (EltTy->getSizeInBits() >> 3); + } else { + auto *MemberTy = cast(CTy->getElements()[AccessIndex]); + uint32_t SizeInBits = MemberTy->getSizeInBits(); + if (!MemberTy->isBitField()) + return SizeInBits >> 3; + + unsigned SBitOffset, NextSBitOffset; + GetStorageBitRange(CTy, MemberTy, AccessIndex, SBitOffset, NextSBitOffset); + SizeInBits = NextSBitOffset - SBitOffset; + if (SizeInBits & (SizeInBits - 1)) + report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info"); + return SizeInBits >> 3; + } + } + + if (InfoKind == BPFCoreSharedInfo::FIELD_SIGNEDNESS) { + const DIType *BaseTy; + if (Tag == dwarf::DW_TAG_array_type) { + // Signedness only checked when final array elements are accessed. + if (CTy->getElements().size() != 1) + report_fatal_error("Invalid array expression for llvm.bpf.preserve.field.info"); + BaseTy = stripQualifiers(CTy->getBaseType()); + } else { + auto *MemberTy = cast(CTy->getElements()[AccessIndex]); + BaseTy = stripQualifiers(MemberTy->getBaseType()); + } + + // Only basic types and enum types have signedness. + const auto *BTy = dyn_cast(BaseTy); + while (!BTy) { + const auto *CompTy = dyn_cast(BaseTy); + // Report an error if the field expression does not have signedness. + if (!CompTy || CompTy->getTag() != dwarf::DW_TAG_enumeration_type) + report_fatal_error("Invalid field expression for llvm.bpf.preserve.field.info"); + BaseTy = stripQualifiers(CompTy->getBaseType()); + BTy = dyn_cast(BaseTy); + } + uint32_t Encoding = BTy->getEncoding(); + return (Encoding == dwarf::DW_ATE_signed || Encoding == dwarf::DW_ATE_signed_char); + } + + if (InfoKind == BPFCoreSharedInfo::FIELD_LSHIFT_U64) { + // The value is loaded into a value with FIELD_BYTE_SIZE size, + // and then zero or sign extended to U64. + // FIELD_LSHIFT_U64 and FIELD_RSHIFT_U64 are operations + // to extract the original value. + const Triple &Triple = TM->getTargetTriple(); + DIDerivedType *MemberTy = nullptr; + bool IsBitField = false; + uint32_t SizeInBits; + + if (Tag == dwarf::DW_TAG_array_type) { + auto *EltTy = stripQualifiers(CTy->getBaseType()); + SizeInBits = calcArraySize(CTy, 1) * EltTy->getSizeInBits(); + } else { + MemberTy = cast(CTy->getElements()[AccessIndex]); + SizeInBits = MemberTy->getSizeInBits(); + IsBitField = MemberTy->isBitField(); + } + + if (!IsBitField) { + if (SizeInBits > 64) + report_fatal_error("too big field size for llvm.bpf.preserve.field.info"); + return 64 - SizeInBits; + } + + unsigned SBitOffset, NextSBitOffset; + GetStorageBitRange(CTy, MemberTy, AccessIndex, SBitOffset, NextSBitOffset); + if (NextSBitOffset - SBitOffset > 64) + report_fatal_error("too big field size for llvm.bpf.preserve.field.info"); + + unsigned OffsetInBits = MemberTy->getOffsetInBits(); + if (Triple.getArch() == Triple::bpfel) + return SBitOffset + 64 - OffsetInBits - SizeInBits; + else + return OffsetInBits + 64 - NextSBitOffset; + } + + if (InfoKind == BPFCoreSharedInfo::FIELD_RSHIFT_U64) { + DIDerivedType *MemberTy = nullptr; + bool IsBitField = false; + uint32_t SizeInBits; + if (Tag == dwarf::DW_TAG_array_type) { + auto *EltTy = stripQualifiers(CTy->getBaseType()); + SizeInBits = calcArraySize(CTy, 1) * EltTy->getSizeInBits(); + } else { + MemberTy = cast(CTy->getElements()[AccessIndex]); + SizeInBits = MemberTy->getSizeInBits(); + IsBitField = MemberTy->isBitField(); + } + + if (!IsBitField) { + if (SizeInBits > 64) + report_fatal_error("too big field size for llvm.bpf.preserve.field.info"); + return 64 - SizeInBits; + } + + unsigned SBitOffset, NextSBitOffset; + GetStorageBitRange(CTy, MemberTy, AccessIndex, SBitOffset, NextSBitOffset); + if (NextSBitOffset - SBitOffset > 64) + report_fatal_error("too big field size for llvm.bpf.preserve.field.info"); + + return 64 - SizeInBits; + } + + llvm_unreachable("Unknown llvm.bpf.preserve.field.info info kind"); +} + +bool BPFAbstractMemberAccess::HasPreserveFieldInfoCall(CallInfoStack &CallStack) { + // This is called in error return path, no need to maintain CallStack. + while (CallStack.size()) { + auto StackElem = CallStack.top(); + if (StackElem.second.Kind == BPFPreserveFieldInfoAI) + return true; + CallStack.pop(); + } + return false; +} + +/// Compute the base of the whole preserve_* intrinsics chains, i.e., the base /// pointer of the first preserve_*_access_index call, and construct the access /// string, which will be the name of a global variable. -Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call, - std::string &AccessStr, +Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call, + CallInfo &CInfo, std::string &AccessKey, - uint32_t Kind, MDNode *&TypeMeta) { Value *Base = nullptr; - std::vector AccessIndices; - uint64_t TypeNameIndex = 0; - std::string LastTypeName; + std::string TypeName; + CallInfoStack CallStack; + // Put the access chain into a stack with the top as the head of the chain. while (Call) { - // Base of original corresponding GEP - Base = Call->getArgOperand(0); - - // Type Name - std::string TypeName; - MDNode *MDN; - if (Kind == BPFPreserveUnionAI || Kind == BPFPreserveStructAI) { - MDN = Call->getMetadata(LLVMContext::MD_preserve_access_index); - if (!MDN) - return nullptr; - - DIType *Ty = dyn_cast(MDN); - if (!Ty) - return nullptr; - - TypeName = Ty->getName(); - } - - // Access Index - uint64_t AccessIndex; - uint32_t ArgIndex = (Kind == BPFPreserveUnionAI) ? 1 : 2; - if (!getAccessIndex(Call->getArgOperand(ArgIndex), AccessIndex)) - return nullptr; - - AccessIndices.push_back(AccessIndex); - if (TypeName.size()) { - TypeNameIndex = AccessIndices.size() - 1; - LastTypeName = TypeName; - TypeMeta = MDN; - } - - Kind = AIChain[Call].second; + CallStack.push(std::make_pair(Call, CInfo)); + CInfo = AIChain[Call].second; Call = AIChain[Call].first; } - // The intial type name is required. - // FIXME: if the initial type access is an array index, e.g., - // &a[3].b.c, only one dimentional array is supported. - if (!LastTypeName.size() || AccessIndices.size() > TypeNameIndex + 2) - return nullptr; + // The access offset from the base of the head of chain is also + // calculated here as all debuginfo types are available. - // Construct the type string AccessStr. - for (unsigned I = 0; I < AccessIndices.size(); ++I) - AccessStr = std::to_string(AccessIndices[I]) + ":" + AccessStr; + // Get type name and calculate the first index. + // We only want to get type name from structure or union. + // If user wants a relocation like + // int *p; ... __builtin_preserve_access_index(&p[4]) ... + // or + // int a[10][20]; ... __builtin_preserve_access_index(&a[2][3]) ... + // we will skip them. + uint32_t FirstIndex = 0; + uint32_t PatchImm = 0; // AccessOffset or the requested field info + uint32_t InfoKind = BPFCoreSharedInfo::FIELD_BYTE_OFFSET; + while (CallStack.size()) { + auto StackElem = CallStack.top(); + Call = StackElem.first; + CInfo = StackElem.second; - if (TypeNameIndex == AccessIndices.size() - 1) - AccessStr = "0:" + AccessStr; + if (!Base) + Base = CInfo.Base; - // Access key is the type name + access string, uniquely identifying - // one kernel memory access. - AccessKey = LastTypeName + ":" + AccessStr; + DIType *Ty = stripQualifiers(cast(CInfo.Metadata)); + if (CInfo.Kind == BPFPreserveUnionAI || + CInfo.Kind == BPFPreserveStructAI) { + // struct or union type + TypeName = Ty->getName(); + TypeMeta = Ty; + PatchImm += FirstIndex * (Ty->getSizeInBits() >> 3); + break; + } + + assert(CInfo.Kind == BPFPreserveArrayAI); + + // Array entries will always be consumed for accumulative initial index. + CallStack.pop(); + + // BPFPreserveArrayAI + uint64_t AccessIndex = CInfo.AccessIndex; + + DIType *BaseTy = nullptr; + bool CheckElemType = false; + if (const auto *CTy = dyn_cast(Ty)) { + // array type + assert(CTy->getTag() == dwarf::DW_TAG_array_type); + + + FirstIndex += AccessIndex * calcArraySize(CTy, 1); + BaseTy = stripQualifiers(CTy->getBaseType()); + CheckElemType = CTy->getElements().size() == 1; + } else { + // pointer type + auto *DTy = cast(Ty); + assert(DTy->getTag() == dwarf::DW_TAG_pointer_type); + + BaseTy = stripQualifiers(DTy->getBaseType()); + CTy = dyn_cast(BaseTy); + if (!CTy) { + CheckElemType = true; + } else if (CTy->getTag() != dwarf::DW_TAG_array_type) { + FirstIndex += AccessIndex; + CheckElemType = true; + } else { + FirstIndex += AccessIndex * calcArraySize(CTy, 0); + } + } + + if (CheckElemType) { + auto *CTy = dyn_cast(BaseTy); + if (!CTy) { + if (HasPreserveFieldInfoCall(CallStack)) + report_fatal_error("Invalid field access for llvm.preserve.field.info intrinsic"); + return nullptr; + } + + unsigned CTag = CTy->getTag(); + if (CTag == dwarf::DW_TAG_structure_type || CTag == dwarf::DW_TAG_union_type) { + TypeName = CTy->getName(); + } else { + if (HasPreserveFieldInfoCall(CallStack)) + report_fatal_error("Invalid field access for llvm.preserve.field.info intrinsic"); + return nullptr; + } + TypeMeta = CTy; + PatchImm += FirstIndex * (CTy->getSizeInBits() >> 3); + break; + } + } + assert(TypeName.size()); + AccessKey += std::to_string(FirstIndex); + + // Traverse the rest of access chain to complete offset calculation + // and access key construction. + while (CallStack.size()) { + auto StackElem = CallStack.top(); + CInfo = StackElem.second; + CallStack.pop(); + + if (CInfo.Kind == BPFPreserveFieldInfoAI) + break; + + // If the next Call (the top of the stack) is a BPFPreserveFieldInfoAI, + // the action will be extracting field info. + if (CallStack.size()) { + auto StackElem2 = CallStack.top(); + CallInfo CInfo2 = StackElem2.second; + if (CInfo2.Kind == BPFPreserveFieldInfoAI) { + InfoKind = CInfo2.AccessIndex; + assert(CallStack.size() == 1); + } + } + + // Access Index + uint64_t AccessIndex = CInfo.AccessIndex; + AccessKey += ":" + std::to_string(AccessIndex); + + MDNode *MDN = CInfo.Metadata; + // At this stage, it cannot be pointer type. + auto *CTy = cast(stripQualifiers(cast(MDN))); + PatchImm = GetFieldInfo(InfoKind, CTy, AccessIndex, PatchImm); + } + + // Access key is the type name + reloc type + patched imm + access string, + // uniquely identifying one relocation. + AccessKey = TypeName + ":" + std::to_string(InfoKind) + ":" + + std::to_string(PatchImm) + "$" + AccessKey; return Base; } @@ -409,38 +838,51 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call, /// Call/Kind is the base preserve_*_access_index() call. Attempts to do /// transformation to a chain of relocable GEPs. bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call, - uint32_t Kind) { - std::string AccessStr, AccessKey; - MDNode *TypeMeta = nullptr; + CallInfo &CInfo) { + std::string AccessKey; + MDNode *TypeMeta; Value *Base = - computeBaseAndAccessStr(Call, AccessStr, AccessKey, Kind, TypeMeta); + computeBaseAndAccessKey(Call, CInfo, AccessKey, TypeMeta); if (!Base) return false; - // Do the transformation + BasicBlock *BB = Call->getParent(); + GlobalVariable *GV; + + if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) { + IntegerType *VarType; + if (CInfo.Kind == BPFPreserveFieldInfoAI) + VarType = Type::getInt32Ty(BB->getContext()); // 32bit return value + else + VarType = Type::getInt64Ty(BB->getContext()); // 64bit ptr arith + + GV = new GlobalVariable(M, VarType, false, GlobalVariable::ExternalLinkage, + NULL, AccessKey); + GV->addAttribute(BPFCoreSharedInfo::AmaAttr); + GV->setMetadata(LLVMContext::MD_preserve_access_index, TypeMeta); + GEPGlobals[AccessKey] = GV; + } else { + GV = GEPGlobals[AccessKey]; + } + + if (CInfo.Kind == BPFPreserveFieldInfoAI) { + // Load the global variable which represents the returned field info. + auto *LDInst = new LoadInst(Type::getInt32Ty(BB->getContext()), GV); + BB->getInstList().insert(Call->getIterator(), LDInst); + Call->replaceAllUsesWith(LDInst); + Call->eraseFromParent(); + return true; + } + // For any original GEP Call and Base %2 like // %4 = bitcast %struct.net_device** %dev1 to i64* // it is transformed to: - // %6 = load __BTF_0:sk_buff:0:0:2:0: + // %6 = load sk_buff:50:$0:0:0:2:0 // %7 = bitcast %struct.sk_buff* %2 to i8* // %8 = getelementptr i8, i8* %7, %6 // %9 = bitcast i8* %8 to i64* // using %9 instead of %4 // The original Call inst is removed. - BasicBlock *BB = Call->getParent(); - GlobalVariable *GV; - - if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) { - GV = new GlobalVariable(M, Type::getInt64Ty(BB->getContext()), false, - GlobalVariable::ExternalLinkage, NULL, AccessStr); - GV->addAttribute(BPFCoreSharedInfo::AmaAttr); - // Set the metadata (debuginfo types) for the global. - if (TypeMeta) - GV->setMetadata(LLVMContext::MD_preserve_access_index, TypeMeta); - GEPGlobals[AccessKey] = GV; - } else { - GV = GEPGlobals[AccessKey]; - } // Load the global variable. auto *LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV); diff --git a/lib/Target/BPF/BPFAsmPrinter.cpp b/lib/Target/BPF/BPFAsmPrinter.cpp index e61e7346805..218b0302927 100644 --- a/lib/Target/BPF/BPFAsmPrinter.cpp +++ b/lib/Target/BPF/BPFAsmPrinter.cpp @@ -59,7 +59,7 @@ bool BPFAsmPrinter::doInitialization(Module &M) { AsmPrinter::doInitialization(M); // Only emit BTF when debuginfo available. - if (MAI->doesSupportDebugInformation() && !empty(M.debug_compile_units())) { + if (MAI->doesSupportDebugInformation() && !M.debug_compile_units().empty()) { BTF = new BTFDebug(this); Handlers.push_back(HandlerInfo(std::unique_ptr(BTF), "emit", "Debug Info Emission", "BTF", diff --git a/lib/Target/BPF/BPFCORE.h b/lib/Target/BPF/BPFCORE.h index e0950d95f8d..ed4778353e5 100644 --- a/lib/Target/BPF/BPFCORE.h +++ b/lib/Target/BPF/BPFCORE.h @@ -13,10 +13,18 @@ namespace llvm { class BPFCoreSharedInfo { public: - /// The attribute attached to globals representing a member offset + enum OffsetRelocKind : uint32_t { + FIELD_BYTE_OFFSET = 0, + FIELD_BYTE_SIZE, + FIELD_EXISTENCE, + FIELD_SIGNEDNESS, + FIELD_LSHIFT_U64, + FIELD_RSHIFT_U64, + + MAX_FIELD_RELOC_KIND, + }; + /// The attribute attached to globals representing a field access static const std::string AmaAttr; - /// The section name to identify a patchable external global - static const std::string PatchableExtSecName; }; } // namespace llvm diff --git a/lib/Target/BPF/BPFFrameLowering.h b/lib/Target/BPF/BPFFrameLowering.h index 2dc6277d224..a546351ec6c 100644 --- a/lib/Target/BPF/BPFFrameLowering.h +++ b/lib/Target/BPF/BPFFrameLowering.h @@ -21,7 +21,7 @@ class BPFSubtarget; class BPFFrameLowering : public TargetFrameLowering { public: explicit BPFFrameLowering(const BPFSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0) {} + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), 0) {} void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp index 1bd705c5518..f2be0ff070d 100644 --- a/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -45,9 +45,7 @@ class BPFDAGToDAGISel : public SelectionDAGISel { public: explicit BPFDAGToDAGISel(BPFTargetMachine &TM) - : SelectionDAGISel(TM), Subtarget(nullptr) { - curr_func_ = nullptr; - } + : SelectionDAGISel(TM), Subtarget(nullptr) {} StringRef getPassName() const override { return "BPF DAG->DAG Pattern Instruction Selection"; @@ -92,14 +90,8 @@ private: val_vec_type &Vals, int Offset); bool getConstantFieldValue(const GlobalAddressSDNode *Node, uint64_t Offset, uint64_t Size, unsigned char *ByteSeq); - bool checkLoadDef(unsigned DefReg, unsigned match_load_op); - // Mapping from ConstantStruct global value to corresponding byte-list values std::map cs_vals_; - // Mapping from vreg to load memory opcode - std::map load_to_vreg_; - // Current function - const Function *curr_func_; }; } // namespace @@ -325,32 +317,13 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node, } void BPFDAGToDAGISel::PreprocessISelDAG() { - // Iterate through all nodes, interested in the following cases: + // Iterate through all nodes, interested in the following case: // // . loads from ConstantStruct or ConstantArray of constructs // which can be turns into constant itself, with this we can // avoid reading from read-only section at runtime. // - // . reg truncating is often the result of 8/16/32bit->64bit or - // 8/16bit->32bit conversion. If the reg value is loaded with - // masked byte width, the AND operation can be removed since - // BPF LOAD already has zero extension. - // - // This also solved a correctness issue. - // In BPF socket-related program, e.g., __sk_buff->{data, data_end} - // are 32-bit registers, but later on, kernel verifier will rewrite - // it with 64-bit value. Therefore, truncating the value after the - // load will result in incorrect code. - - // clear the load_to_vreg_ map so that we have a clean start - // for this function. - if (!curr_func_) { - curr_func_ = FuncInfo->Fn; - } else if (curr_func_ != FuncInfo->Fn) { - load_to_vreg_.clear(); - curr_func_ = FuncInfo->Fn; - } - + // . Removing redundant AND for intrinsic narrow loads. for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E;) { @@ -358,8 +331,6 @@ void BPFDAGToDAGISel::PreprocessISelDAG() { unsigned Opcode = Node->getOpcode(); if (Opcode == ISD::LOAD) PreprocessLoad(Node, I); - else if (Opcode == ISD::CopyToReg) - PreprocessCopyToReg(Node); else if (Opcode == ISD::AND) PreprocessTrunc(Node, I); } @@ -491,37 +462,6 @@ bool BPFDAGToDAGISel::fillConstantStruct(const DataLayout &DL, return true; } -void BPFDAGToDAGISel::PreprocessCopyToReg(SDNode *Node) { - const RegisterSDNode *RegN = dyn_cast(Node->getOperand(1)); - if (!RegN || !TargetRegisterInfo::isVirtualRegister(RegN->getReg())) - return; - - const LoadSDNode *LD = dyn_cast(Node->getOperand(2)); - if (!LD) - return; - - // Assign a load value to a virtual register. record its load width - unsigned mem_load_op = 0; - switch (LD->getMemOperand()->getSize()) { - default: - return; - case 4: - mem_load_op = BPF::LDW; - break; - case 2: - mem_load_op = BPF::LDH; - break; - case 1: - mem_load_op = BPF::LDB; - break; - } - - LLVM_DEBUG(dbgs() << "Find Load Value to VReg " - << TargetRegisterInfo::virtReg2Index(RegN->getReg()) - << '\n'); - load_to_vreg_[RegN->getReg()] = mem_load_op; -} - void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node, SelectionDAG::allnodes_iterator &I) { ConstantSDNode *MaskN = dyn_cast(Node->getOperand(1)); @@ -535,112 +475,26 @@ void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node, // which the generic optimizer doesn't understand their results are // zero extended. SDValue BaseV = Node->getOperand(0); - if (BaseV.getOpcode() == ISD::INTRINSIC_W_CHAIN) { - unsigned IntNo = cast(BaseV->getOperand(1))->getZExtValue(); - uint64_t MaskV = MaskN->getZExtValue(); - - if (!((IntNo == Intrinsic::bpf_load_byte && MaskV == 0xFF) || - (IntNo == Intrinsic::bpf_load_half && MaskV == 0xFFFF) || - (IntNo == Intrinsic::bpf_load_word && MaskV == 0xFFFFFFFF))) - return; - - LLVM_DEBUG(dbgs() << "Remove the redundant AND operation in: "; - Node->dump(); dbgs() << '\n'); - - I--; - CurDAG->ReplaceAllUsesWith(SDValue(Node, 0), BaseV); - I++; - CurDAG->DeleteNode(Node); - - return; - } - - // Multiple basic blocks case. - if (BaseV.getOpcode() != ISD::CopyFromReg) + if (BaseV.getOpcode() != ISD::INTRINSIC_W_CHAIN) return; - unsigned match_load_op = 0; - switch (MaskN->getZExtValue()) { - default: + unsigned IntNo = cast(BaseV->getOperand(1))->getZExtValue(); + uint64_t MaskV = MaskN->getZExtValue(); + + if (!((IntNo == Intrinsic::bpf_load_byte && MaskV == 0xFF) || + (IntNo == Intrinsic::bpf_load_half && MaskV == 0xFFFF) || + (IntNo == Intrinsic::bpf_load_word && MaskV == 0xFFFFFFFF))) return; - case 0xFFFFFFFF: - match_load_op = BPF::LDW; - break; - case 0xFFFF: - match_load_op = BPF::LDH; - break; - case 0xFF: - match_load_op = BPF::LDB; - break; - } - const RegisterSDNode *RegN = - dyn_cast(BaseV.getNode()->getOperand(1)); - if (!RegN || !TargetRegisterInfo::isVirtualRegister(RegN->getReg())) - return; - unsigned AndOpReg = RegN->getReg(); - LLVM_DEBUG(dbgs() << "Examine " << printReg(AndOpReg) << '\n'); - - // Examine the PHI insns in the MachineBasicBlock to found out the - // definitions of this virtual register. At this stage (DAG2DAG - // transformation), only PHI machine insns are available in the machine basic - // block. - MachineBasicBlock *MBB = FuncInfo->MBB; - MachineInstr *MII = nullptr; - for (auto &MI : *MBB) { - for (unsigned i = 0; i < MI.getNumOperands(); ++i) { - const MachineOperand &MOP = MI.getOperand(i); - if (!MOP.isReg() || !MOP.isDef()) - continue; - unsigned Reg = MOP.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg) && Reg == AndOpReg) { - MII = &MI; - break; - } - } - } - - if (MII == nullptr) { - // No phi definition in this block. - if (!checkLoadDef(AndOpReg, match_load_op)) - return; - } else { - // The PHI node looks like: - // %2 = PHI %0, <%bb.1>, %1, <%bb.3> - // Trace each incoming definition, e.g., (%0, %bb.1) and (%1, %bb.3) - // The AND operation can be removed if both %0 in %bb.1 and %1 in - // %bb.3 are defined with a load matching the MaskN. - LLVM_DEBUG(dbgs() << "Check PHI Insn: "; MII->dump(); dbgs() << '\n'); - unsigned PrevReg = -1; - for (unsigned i = 0; i < MII->getNumOperands(); ++i) { - const MachineOperand &MOP = MII->getOperand(i); - if (MOP.isReg()) { - if (MOP.isDef()) - continue; - PrevReg = MOP.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(PrevReg)) - return; - if (!checkLoadDef(PrevReg, match_load_op)) - return; - } - } - } - - LLVM_DEBUG(dbgs() << "Remove the redundant AND operation in: "; Node->dump(); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "Remove the redundant AND operation in: "; + Node->dump(); dbgs() << '\n'); I--; CurDAG->ReplaceAllUsesWith(SDValue(Node, 0), BaseV); I++; CurDAG->DeleteNode(Node); -} -bool BPFDAGToDAGISel::checkLoadDef(unsigned DefReg, unsigned match_load_op) { - auto it = load_to_vreg_.find(DefReg); - if (it == load_to_vreg_.end()) - return false; // The definition of register is not exported yet. - - return it->second == match_load_op; + return; } FunctionPass *llvm::createBPFISelDag(BPFTargetMachine &TM) { diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp index ff69941d26f..56e0288f26c 100644 --- a/lib/Target/BPF/BPFISelLowering.cpp +++ b/lib/Target/BPF/BPFISelLowering.cpp @@ -132,9 +132,9 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, setBooleanContents(ZeroOrOneBooleanContent); - // Function alignments (log2) - setMinFunctionAlignment(3); - setPrefFunctionAlignment(3); + // Function alignments + setMinFunctionAlignment(Align(8)); + setPrefFunctionAlignment(Align(8)); if (BPFExpandMemcpyInOrder) { // LLVM generic code will try to expand memcpy into load/store pairs at this @@ -236,9 +236,8 @@ SDValue BPFTargetLowering::LowerFormalArguments( } case MVT::i32: case MVT::i64: - unsigned VReg = RegInfo.createVirtualRegister(SimpleTy == MVT::i64 ? - &BPF::GPRRegClass : - &BPF::GPR32RegClass); + Register VReg = RegInfo.createVirtualRegister( + SimpleTy == MVT::i64 ? &BPF::GPRRegClass : &BPF::GPR32RegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT); @@ -571,9 +570,9 @@ BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB, DebugLoc DL = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned PromotedReg0 = RegInfo.createVirtualRegister(RC); - unsigned PromotedReg1 = RegInfo.createVirtualRegister(RC); - unsigned PromotedReg2 = RegInfo.createVirtualRegister(RC); + Register PromotedReg0 = RegInfo.createVirtualRegister(RC); + Register PromotedReg1 = RegInfo.createVirtualRegister(RC); + Register PromotedReg2 = RegInfo.createVirtualRegister(RC); BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg); BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1) .addReg(PromotedReg0).addImm(32); @@ -699,7 +698,7 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, report_fatal_error("unimplemented select CondCode " + Twine(CC)); } - unsigned LHS = MI.getOperand(1).getReg(); + Register LHS = MI.getOperand(1).getReg(); bool isSignedCmp = (CC == ISD::SETGT || CC == ISD::SETGE || CC == ISD::SETLT || @@ -716,7 +715,7 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp); if (isSelectRROp) { - unsigned RHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(2).getReg(); if (is32BitCmp && !HasJmp32) RHS = EmitSubregExt(MI, BB, RHS, isSignedCmp); diff --git a/lib/Target/BPF/BPFInstrInfo.cpp b/lib/Target/BPF/BPFInstrInfo.cpp index 932f718d549..6de3a4084d3 100644 --- a/lib/Target/BPF/BPFInstrInfo.cpp +++ b/lib/Target/BPF/BPFInstrInfo.cpp @@ -43,11 +43,11 @@ void BPFInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } void BPFInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned SrcReg = MI->getOperand(1).getReg(); + Register DstReg = MI->getOperand(0).getReg(); + Register SrcReg = MI->getOperand(1).getReg(); uint64_t CopyLen = MI->getOperand(2).getImm(); uint64_t Alignment = MI->getOperand(3).getImm(); - unsigned ScratchReg = MI->getOperand(4).getReg(); + Register ScratchReg = MI->getOperand(4).getReg(); MachineBasicBlock *BB = MI->getParent(); DebugLoc dl = MI->getDebugLoc(); unsigned LdOpc, StOpc; diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td index c44702a78ec..ae5a82a9930 100644 --- a/lib/Target/BPF/BPFInstrInfo.td +++ b/lib/Target/BPF/BPFInstrInfo.td @@ -473,7 +473,7 @@ class CALL class CALLX : TYPE_ALU_JMP { bits<32> BrDst; diff --git a/lib/Target/BPF/BPFMIChecking.cpp b/lib/Target/BPF/BPFMIChecking.cpp index 4c46289656b..f82f166eda4 100644 --- a/lib/Target/BPF/BPFMIChecking.cpp +++ b/lib/Target/BPF/BPFMIChecking.cpp @@ -19,6 +19,7 @@ #include "BPFTargetMachine.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/lib/Target/BPF/BPFMIPeephole.cpp b/lib/Target/BPF/BPFMIPeephole.cpp index 156ba793e35..e9eecc55c3c 100644 --- a/lib/Target/BPF/BPFMIPeephole.cpp +++ b/lib/Target/BPF/BPFMIPeephole.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; @@ -71,7 +72,7 @@ void BPFMIPeephole::initialize(MachineFunction &MFParm) { MF = &MFParm; MRI = &MF->getRegInfo(); TII = MF->getSubtarget().getInstrInfo(); - LLVM_DEBUG(dbgs() << "*** BPF MachineSSA peephole pass ***\n\n"); + LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n"); } bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI) @@ -104,10 +105,10 @@ bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI) if (!opnd.isReg()) return false; - unsigned Reg = opnd.getReg(); - if ((TargetRegisterInfo::isVirtualRegister(Reg) && + Register Reg = opnd.getReg(); + if ((Register::isVirtualRegister(Reg) && MRI->getRegClass(Reg) == &BPF::GPRRegClass)) - return false; + return false; } LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n"); @@ -134,8 +135,8 @@ bool BPFMIPeephole::eliminateZExtSeq(void) { // SRL_ri rB, rB, 32 if (MI.getOpcode() == BPF::SRL_ri && MI.getOperand(2).getImm() == 32) { - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned ShfReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register ShfReg = MI.getOperand(1).getReg(); MachineInstr *SllMI = MRI->getVRegDef(ShfReg); LLVM_DEBUG(dbgs() << "Starting SRL found:"); @@ -159,7 +160,7 @@ bool BPFMIPeephole::eliminateZExtSeq(void) { LLVM_DEBUG(dbgs() << " Type cast Mov found:"); LLVM_DEBUG(MovMI->dump()); - unsigned SubReg = MovMI->getOperand(1).getReg(); + Register SubReg = MovMI->getOperand(1).getReg(); if (!isMovFrom32Def(MovMI)) { LLVM_DEBUG(dbgs() << " One ZExt elim sequence failed qualifying elim.\n"); @@ -186,7 +187,8 @@ bool BPFMIPeephole::eliminateZExtSeq(void) { } // end default namespace INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE, - "BPF MachineSSA Peephole Optimization", false, false) + "BPF MachineSSA Peephole Optimization For ZEXT Eliminate", + false, false) char BPFMIPeephole::ID = 0; FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); } @@ -253,12 +255,16 @@ bool BPFMIPreEmitPeephole::eliminateRedundantMov(void) { // enabled. The special type cast insn MOV_32_64 involves different // register class on src (i32) and dst (i64), RA could generate useless // instruction due to this. - if (MI.getOpcode() == BPF::MOV_32_64) { - unsigned dst = MI.getOperand(0).getReg(); - unsigned dst_sub = TRI->getSubReg(dst, BPF::sub_32); - unsigned src = MI.getOperand(1).getReg(); + unsigned Opcode = MI.getOpcode(); + if (Opcode == BPF::MOV_32_64 || + Opcode == BPF::MOV_rr || Opcode == BPF::MOV_rr_32) { + Register dst = MI.getOperand(0).getReg(); + Register src = MI.getOperand(1).getReg(); - if (dst_sub != src) + if (Opcode == BPF::MOV_32_64) + dst = TRI->getSubReg(dst, BPF::sub_32); + + if (dst != src) continue; ToErase = &MI; @@ -281,3 +287,177 @@ FunctionPass* llvm::createBPFMIPreEmitPeepholePass() { return new BPFMIPreEmitPeephole(); } + +STATISTIC(TruncElemNum, "Number of truncation eliminated"); + +namespace { + +struct BPFMIPeepholeTruncElim : public MachineFunctionPass { + + static char ID; + const BPFInstrInfo *TII; + MachineFunction *MF; + MachineRegisterInfo *MRI; + + BPFMIPeepholeTruncElim() : MachineFunctionPass(ID) { + initializeBPFMIPeepholeTruncElimPass(*PassRegistry::getPassRegistry()); + } + +private: + // Initialize class variables. + void initialize(MachineFunction &MFParm); + + bool eliminateTruncSeq(void); + +public: + + // Main entry point for this pass. + bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(MF.getFunction())) + return false; + + initialize(MF); + + return eliminateTruncSeq(); + } +}; + +static bool TruncSizeCompatible(int TruncSize, unsigned opcode) +{ + if (TruncSize == 1) + return opcode == BPF::LDB || opcode == BPF::LDB32; + + if (TruncSize == 2) + return opcode == BPF::LDH || opcode == BPF::LDH32; + + if (TruncSize == 4) + return opcode == BPF::LDW || opcode == BPF::LDW32; + + return false; +} + +// Initialize class variables. +void BPFMIPeepholeTruncElim::initialize(MachineFunction &MFParm) { + MF = &MFParm; + MRI = &MF->getRegInfo(); + TII = MF->getSubtarget().getInstrInfo(); + LLVM_DEBUG(dbgs() << "*** BPF MachineSSA TRUNC Elim peephole pass ***\n\n"); +} + +// Reg truncating is often the result of 8/16/32bit->64bit or +// 8/16bit->32bit conversion. If the reg value is loaded with +// masked byte width, the AND operation can be removed since +// BPF LOAD already has zero extension. +// +// This also solved a correctness issue. +// In BPF socket-related program, e.g., __sk_buff->{data, data_end} +// are 32-bit registers, but later on, kernel verifier will rewrite +// it with 64-bit value. Therefore, truncating the value after the +// load will result in incorrect code. +bool BPFMIPeepholeTruncElim::eliminateTruncSeq(void) { + MachineInstr* ToErase = nullptr; + bool Eliminated = false; + + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &MI : MBB) { + // The second insn to remove if the eliminate candidate is a pair. + MachineInstr *MI2 = nullptr; + Register DstReg, SrcReg; + MachineInstr *DefMI; + int TruncSize = -1; + + // If the previous instruction was marked for elimination, remove it now. + if (ToErase) { + ToErase->eraseFromParent(); + ToErase = nullptr; + } + + // AND A, 0xFFFFFFFF will be turned into SLL/SRL pair due to immediate + // for BPF ANDI is i32, and this case only happens on ALU64. + if (MI.getOpcode() == BPF::SRL_ri && + MI.getOperand(2).getImm() == 32) { + SrcReg = MI.getOperand(1).getReg(); + MI2 = MRI->getVRegDef(SrcReg); + DstReg = MI.getOperand(0).getReg(); + + if (!MI2 || + MI2->getOpcode() != BPF::SLL_ri || + MI2->getOperand(2).getImm() != 32) + continue; + + // Update SrcReg. + SrcReg = MI2->getOperand(1).getReg(); + DefMI = MRI->getVRegDef(SrcReg); + if (DefMI) + TruncSize = 4; + } else if (MI.getOpcode() == BPF::AND_ri || + MI.getOpcode() == BPF::AND_ri_32) { + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + DefMI = MRI->getVRegDef(SrcReg); + + if (!DefMI) + continue; + + int64_t imm = MI.getOperand(2).getImm(); + if (imm == 0xff) + TruncSize = 1; + else if (imm == 0xffff) + TruncSize = 2; + } + + if (TruncSize == -1) + continue; + + // The definition is PHI node, check all inputs. + if (DefMI->isPHI()) { + bool CheckFail = false; + + for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) { + MachineOperand &opnd = DefMI->getOperand(i); + if (!opnd.isReg()) { + CheckFail = true; + break; + } + + MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg()); + if (!PhiDef || PhiDef->isPHI() || + !TruncSizeCompatible(TruncSize, PhiDef->getOpcode())) { + CheckFail = true; + break; + } + } + + if (CheckFail) + continue; + } else if (!TruncSizeCompatible(TruncSize, DefMI->getOpcode())) { + continue; + } + + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::MOV_rr), DstReg) + .addReg(SrcReg); + + if (MI2) + MI2->eraseFromParent(); + + // Mark it to ToErase, and erase in the next iteration. + ToErase = &MI; + TruncElemNum++; + Eliminated = true; + } + } + + return Eliminated; +} + +} // end default namespace + +INITIALIZE_PASS(BPFMIPeepholeTruncElim, "bpf-mi-trunc-elim", + "BPF MachineSSA Peephole Optimization For TRUNC Eliminate", + false, false) + +char BPFMIPeepholeTruncElim::ID = 0; +FunctionPass* llvm::createBPFMIPeepholeTruncElimPass() +{ + return new BPFMIPeepholeTruncElim(); +} diff --git a/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/lib/Target/BPF/BPFMISimplifyPatchable.cpp index e9114d7187e..9c689aed641 100644 --- a/lib/Target/BPF/BPFMISimplifyPatchable.cpp +++ b/lib/Target/BPF/BPFMISimplifyPatchable.cpp @@ -11,19 +11,15 @@ // ldd r2, r1, 0 // add r3, struct_base_reg, r2 // -// Here @global should either present a AMA (abstruct member access) or -// a patchable extern variable. And these two kinds of accesses -// are subject to bpf load time patching. After this pass, the +// Here @global should represent an AMA (abstruct member access). +// Such an access is subject to bpf load time patching. After this pass, the // code becomes // ld_imm64 r1, @global // add r3, struct_base_reg, r1 // // Eventually, at BTF output stage, a relocation record will be generated // for ld_imm64 which should be replaced later by bpf loader: -// r1 = or -// add r3, struct_base_reg, r1 -// or -// ld_imm64 r1, +// r1 = // add r3, struct_base_reg, r1 // //===----------------------------------------------------------------------===// @@ -34,6 +30,7 @@ #include "BPFTargetMachine.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; @@ -100,9 +97,8 @@ bool BPFMISimplifyPatchable::removeLD() { if (!MI.getOperand(2).isImm() || MI.getOperand(2).getImm()) continue; - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); - int64_t ImmVal = MI.getOperand(2).getImm(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); MachineInstr *DefInst = MRI->getUniqueVRegDef(SrcReg); if (!DefInst) @@ -118,17 +114,8 @@ bool BPFMISimplifyPatchable::removeLD() { // Global variables representing structure offset or // patchable extern globals. if (GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) { - assert(ImmVal == 0); + assert(MI.getOperand(2).getImm() == 0); IsCandidate = true; - } else if (!GVar->hasInitializer() && GVar->hasExternalLinkage() && - GVar->getSection() == - BPFCoreSharedInfo::PatchableExtSecName) { - if (ImmVal == 0) - IsCandidate = true; - else - errs() << "WARNING: unhandled patchable extern " - << GVar->getName() << " with load offset " << ImmVal - << "\n"; } } } diff --git a/lib/Target/BPF/BPFRegisterInfo.cpp b/lib/Target/BPF/BPFRegisterInfo.cpp index 714af06e11d..8de81a469b8 100644 --- a/lib/Target/BPF/BPFRegisterInfo.cpp +++ b/lib/Target/BPF/BPFRegisterInfo.cpp @@ -77,7 +77,7 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); } - unsigned FrameReg = getFrameRegister(MF); + Register FrameReg = getFrameRegister(MF); int FrameIndex = MI.getOperand(i).getIndex(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); @@ -86,7 +86,7 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, WarnSize(Offset, MF, DL); MI.getOperand(i).ChangeToRegister(FrameReg, false); - unsigned reg = MI.getOperand(i - 1).getReg(); + Register reg = MI.getOperand(i - 1).getReg(); BuildMI(MBB, ++II, DL, TII.get(BPF::ADD_ri), reg) .addReg(reg) .addImm(Offset); @@ -105,7 +105,7 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // architecture does not really support FI_ri, replace it with // MOV_rr , frame_reg // ADD_ri , imm - unsigned reg = MI.getOperand(i - 1).getReg(); + Register reg = MI.getOperand(i - 1).getReg(); BuildMI(MBB, ++II, DL, TII.get(BPF::MOV_rr), reg) .addReg(FrameReg); diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp index 24c0ff0f7f1..0c4f2c74e7a 100644 --- a/lib/Target/BPF/BPFTargetMachine.cpp +++ b/lib/Target/BPF/BPFTargetMachine.cpp @@ -36,6 +36,7 @@ extern "C" void LLVMInitializeBPFTarget() { PassRegistry &PR = *PassRegistry::getPassRegistry(); initializeBPFAbstractMemberAccessPass(PR); initializeBPFMIPeepholePass(PR); + initializeBPFMIPeepholeTruncElimPass(PR); } // DataLayout: little or big endian @@ -61,7 +62,7 @@ BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT, : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, getEffectiveRelocModel(RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), - TLOF(make_unique()), + TLOF(std::make_unique()), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); @@ -94,7 +95,7 @@ TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) { void BPFPassConfig::addIRPasses() { - addPass(createBPFAbstractMemberAccess()); + addPass(createBPFAbstractMemberAccess(&getBPFTargetMachine())); TargetPassConfig::addIRPasses(); } @@ -115,15 +116,16 @@ void BPFPassConfig::addMachineSSAOptimization() { TargetPassConfig::addMachineSSAOptimization(); const BPFSubtarget *Subtarget = getBPFTargetMachine().getSubtargetImpl(); - if (Subtarget->getHasAlu32() && !DisableMIPeephole) - addPass(createBPFMIPeepholePass()); + if (!DisableMIPeephole) { + if (Subtarget->getHasAlu32()) + addPass(createBPFMIPeepholePass()); + addPass(createBPFMIPeepholeTruncElimPass()); + } } void BPFPassConfig::addPreEmitPass() { - const BPFSubtarget *Subtarget = getBPFTargetMachine().getSubtargetImpl(); - addPass(createBPFMIPreEmitCheckingPass()); if (getOptLevel() != CodeGenOpt::None) - if (Subtarget->getHasAlu32() && !DisableMIPeephole) + if (!DisableMIPeephole) addPass(createBPFMIPreEmitPeepholePass()); } diff --git a/lib/Target/BPF/BTF.h b/lib/Target/BPF/BTF.h index ad56716710a..a13c862bf84 100644 --- a/lib/Target/BPF/BTF.h +++ b/lib/Target/BPF/BTF.h @@ -17,7 +17,7 @@ /// /// The binary layout for .BTF.ext section: /// struct ExtHeader -/// FuncInfo, LineInfo, OffsetReloc and ExternReloc subsections +/// FuncInfo, LineInfo, FieldReloc and ExternReloc subsections /// The FuncInfo subsection is defined as below: /// BTFFuncInfo Size /// struct SecFuncInfo for ELF section #1 @@ -32,19 +32,12 @@ /// struct SecLineInfo for ELF section #2 /// A number of struct BPFLineInfo for ELF section #2 /// ... -/// The OffsetReloc subsection is defined as below: -/// BPFOffsetReloc Size -/// struct SecOffsetReloc for ELF section #1 -/// A number of struct BPFOffsetReloc for ELF section #1 -/// struct SecOffsetReloc for ELF section #2 -/// A number of struct BPFOffsetReloc for ELF section #2 -/// ... -/// The ExternReloc subsection is defined as below: -/// BPFExternReloc Size -/// struct SecExternReloc for ELF section #1 -/// A number of struct BPFExternReloc for ELF section #1 -/// struct SecExternReloc for ELF section #2 -/// A number of struct BPFExternReloc for ELF section #2 +/// The FieldReloc subsection is defined as below: +/// BPFFieldReloc Size +/// struct SecFieldReloc for ELF section #1 +/// A number of struct BPFFieldReloc for ELF section #1 +/// struct SecFieldReloc for ELF section #2 +/// A number of struct BPFFieldReloc for ELF section #2 /// ... /// /// The section formats are also defined at @@ -63,7 +56,7 @@ enum : uint32_t { MAGIC = 0xeB9F, VERSION = 1 }; /// Sizes in bytes of various things in the BTF format. enum { HeaderSize = 24, - ExtHeaderSize = 40, + ExtHeaderSize = 32, CommonTypeSize = 12, BTFArraySize = 12, BTFEnumSize = 8, @@ -72,12 +65,10 @@ enum { BTFDataSecVarSize = 12, SecFuncInfoSize = 8, SecLineInfoSize = 8, - SecOffsetRelocSize = 8, - SecExternRelocSize = 8, + SecFieldRelocSize = 8, BPFFuncInfoSize = 8, BPFLineInfoSize = 16, - BPFOffsetRelocSize = 12, - BPFExternRelocSize = 8, + BPFFieldRelocSize = 16, }; /// The .BTF section header definition. @@ -213,10 +204,8 @@ struct ExtHeader { uint32_t FuncInfoLen; ///< Length of func info section uint32_t LineInfoOff; ///< Offset of line info section uint32_t LineInfoLen; ///< Length of line info section - uint32_t OffsetRelocOff; ///< Offset of offset reloc section - uint32_t OffsetRelocLen; ///< Length of offset reloc section - uint32_t ExternRelocOff; ///< Offset of extern reloc section - uint32_t ExternRelocLen; ///< Length of extern reloc section + uint32_t FieldRelocOff; ///< Offset of offset reloc section + uint32_t FieldRelocLen; ///< Length of offset reloc section }; /// Specifying one function info. @@ -247,28 +236,17 @@ struct SecLineInfo { }; /// Specifying one offset relocation. -struct BPFOffsetReloc { +struct BPFFieldReloc { uint32_t InsnOffset; ///< Byte offset in this section uint32_t TypeID; ///< TypeID for the relocation uint32_t OffsetNameOff; ///< The string to traverse types + uint32_t RelocKind; ///< What to patch the instruction }; /// Specifying offset relocation's in one section. -struct SecOffsetReloc { +struct SecFieldReloc { uint32_t SecNameOff; ///< Section name index in the .BTF string table - uint32_t NumOffsetReloc; ///< Number of offset reloc's in this section -}; - -/// Specifying one offset relocation. -struct BPFExternReloc { - uint32_t InsnOffset; ///< Byte offset in this section - uint32_t ExternNameOff; ///< The string for external variable -}; - -/// Specifying extern relocation's in one section. -struct SecExternReloc { - uint32_t SecNameOff; ///< Section name index in the .BTF string table - uint32_t NumExternReloc; ///< Number of extern reloc's in this section + uint32_t NumFieldReloc; ///< Number of offset reloc's in this section }; } // End namespace BTF. diff --git a/lib/Target/BPF/BTFDebug.cpp b/lib/Target/BPF/BTFDebug.cpp index fa35c6619e2..db551e739bd 100644 --- a/lib/Target/BPF/BTFDebug.cpp +++ b/lib/Target/BPF/BTFDebug.cpp @@ -184,9 +184,7 @@ void BTFTypeEnum::emitType(MCStreamer &OS) { } } -BTFTypeArray::BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize, - uint32_t NumElems) - : ElemSize(ElemSize) { +BTFTypeArray::BTFTypeArray(uint32_t ElemTypeId, uint32_t NumElems) { Kind = BTF::BTF_KIND_ARRAY; BTFType.NameOff = 0; BTFType.Info = Kind << 24; @@ -216,12 +214,6 @@ void BTFTypeArray::emitType(MCStreamer &OS) { OS.EmitIntValue(ArrayInfo.Nelems, 4); } -void BTFTypeArray::getLocInfo(uint32_t Loc, uint32_t &LocOffset, - uint32_t &ElementTypeId) { - ElementTypeId = ArrayInfo.ElemType; - LocOffset = Loc * ElemSize; -} - /// Represent either a struct or a union. BTFTypeStruct::BTFTypeStruct(const DICompositeType *STy, bool IsStruct, bool HasBitField, uint32_t Vlen) @@ -251,7 +243,8 @@ void BTFTypeStruct::completeType(BTFDebug &BDebug) { } else { BTFMember.Offset = DDTy->getOffsetInBits(); } - BTFMember.Type = BDebug.getTypeId(DDTy->getBaseType()); + const auto *BaseTy = DDTy->getBaseType(); + BTFMember.Type = BDebug.getTypeId(BaseTy); Members.push_back(BTFMember); } } @@ -268,15 +261,6 @@ void BTFTypeStruct::emitType(MCStreamer &OS) { std::string BTFTypeStruct::getName() { return STy->getName(); } -void BTFTypeStruct::getMemberInfo(uint32_t Loc, uint32_t &MemberOffset, - uint32_t &MemberType) { - MemberType = Members[Loc].Type; - MemberOffset = - HasBitField ? Members[Loc].Offset & 0xffffff : Members[Loc].Offset; -} - -uint32_t BTFTypeStruct::getStructSize() { return STy->getSizeInBits() >> 3; } - /// The Func kind represents both subprogram and pointee of function /// pointers. If the FuncName is empty, it represents a pointee of function /// pointer. Otherwise, it represents a subprogram. The func arg names @@ -428,7 +412,7 @@ void BTFDebug::visitBasicType(const DIBasicType *BTy, uint32_t &TypeId) { // Create a BTF type instance for this DIBasicType and put it into // DIToIdMap for cross-type reference check. - auto TypeEntry = llvm::make_unique( + auto TypeEntry = std::make_unique( Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName()); TypeId = addType(std::move(TypeEntry), BTy); } @@ -447,7 +431,7 @@ void BTFDebug::visitSubroutineType( // a function pointer has an empty name. The subprogram type will // not be added to DIToIdMap as it should not be referenced by // any other types. - auto TypeEntry = llvm::make_unique(STy, VLen, FuncArgNames); + auto TypeEntry = std::make_unique(STy, VLen, FuncArgNames); if (ForSubprog) TypeId = addType(std::move(TypeEntry)); // For subprogram else @@ -478,7 +462,7 @@ void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct, } auto TypeEntry = - llvm::make_unique(CTy, IsStruct, HasBitField, VLen); + std::make_unique(CTy, IsStruct, HasBitField, VLen); StructTypes.push_back(TypeEntry.get()); TypeId = addType(std::move(TypeEntry), CTy); @@ -489,35 +473,29 @@ void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct, void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) { // Visit array element type. - uint32_t ElemTypeId, ElemSize; + uint32_t ElemTypeId; const DIType *ElemType = CTy->getBaseType(); visitTypeEntry(ElemType, ElemTypeId, false, false); - ElemSize = ElemType->getSizeInBits() >> 3; - if (!CTy->getSizeInBits()) { - auto TypeEntry = llvm::make_unique(ElemTypeId, 0, 0); - ArrayTypes.push_back(TypeEntry.get()); - ElemTypeId = addType(std::move(TypeEntry), CTy); - } else { - // Visit array dimensions. - DINodeArray Elements = CTy->getElements(); - for (int I = Elements.size() - 1; I >= 0; --I) { - if (auto *Element = dyn_cast_or_null(Elements[I])) - if (Element->getTag() == dwarf::DW_TAG_subrange_type) { - const DISubrange *SR = cast(Element); - auto *CI = SR->getCount().dyn_cast(); - int64_t Count = CI->getSExtValue(); + // Visit array dimensions. + DINodeArray Elements = CTy->getElements(); + for (int I = Elements.size() - 1; I >= 0; --I) { + if (auto *Element = dyn_cast_or_null(Elements[I])) + if (Element->getTag() == dwarf::DW_TAG_subrange_type) { + const DISubrange *SR = cast(Element); + auto *CI = SR->getCount().dyn_cast(); + int64_t Count = CI->getSExtValue(); - auto TypeEntry = - llvm::make_unique(ElemTypeId, ElemSize, Count); - ArrayTypes.push_back(TypeEntry.get()); - if (I == 0) - ElemTypeId = addType(std::move(TypeEntry), CTy); - else - ElemTypeId = addType(std::move(TypeEntry)); - ElemSize = ElemSize * Count; - } - } + // For struct s { int b; char c[]; }, the c[] will be represented + // as an array with Count = -1. + auto TypeEntry = + std::make_unique(ElemTypeId, + Count >= 0 ? Count : 0); + if (I == 0) + ElemTypeId = addType(std::move(TypeEntry), CTy); + else + ElemTypeId = addType(std::move(TypeEntry)); + } } // The array TypeId is the type id of the outermost dimension. @@ -526,7 +504,7 @@ void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) { // The IR does not have a type for array index while BTF wants one. // So create an array index type if there is none. if (!ArrayIndexTypeId) { - auto TypeEntry = llvm::make_unique(dwarf::DW_ATE_unsigned, 32, + auto TypeEntry = std::make_unique(dwarf::DW_ATE_unsigned, 32, 0, "__ARRAY_SIZE_TYPE__"); ArrayIndexTypeId = addType(std::move(TypeEntry)); } @@ -538,7 +516,7 @@ void BTFDebug::visitEnumType(const DICompositeType *CTy, uint32_t &TypeId) { if (VLen > BTF::MAX_VLEN) return; - auto TypeEntry = llvm::make_unique(CTy, VLen); + auto TypeEntry = std::make_unique(CTy, VLen); TypeId = addType(std::move(TypeEntry), CTy); // No need to visit base type as BTF does not encode it. } @@ -546,7 +524,7 @@ void BTFDebug::visitEnumType(const DICompositeType *CTy, uint32_t &TypeId) { /// Handle structure/union forward declarations. void BTFDebug::visitFwdDeclType(const DICompositeType *CTy, bool IsUnion, uint32_t &TypeId) { - auto TypeEntry = llvm::make_unique(CTy->getName(), IsUnion); + auto TypeEntry = std::make_unique(CTy->getName(), IsUnion); TypeId = addType(std::move(TypeEntry), CTy); } @@ -588,7 +566,7 @@ void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId, /// Find a candidate, generate a fixup. Later on the struct/union /// pointee type will be replaced with either a real type or /// a forward declaration. - auto TypeEntry = llvm::make_unique(DTy, Tag, true); + auto TypeEntry = std::make_unique(DTy, Tag, true); auto &Fixup = FixupDerivedTypes[CTy->getName()]; Fixup.first = CTag == dwarf::DW_TAG_union_type; Fixup.second.push_back(TypeEntry.get()); @@ -602,7 +580,7 @@ void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId, if (Tag == dwarf::DW_TAG_pointer_type || Tag == dwarf::DW_TAG_typedef || Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type || Tag == dwarf::DW_TAG_restrict_type) { - auto TypeEntry = llvm::make_unique(DTy, Tag, false); + auto TypeEntry = std::make_unique(DTy, Tag, false); TypeId = addType(std::move(TypeEntry), DTy); } else if (Tag != dwarf::DW_TAG_member) { return; @@ -669,7 +647,7 @@ void BTFDebug::visitMapDefType(const DIType *Ty, uint32_t &TypeId) { } auto TypeEntry = - llvm::make_unique(CTy, true, HasBitField, Elements.size()); + std::make_unique(CTy, true, HasBitField, Elements.size()); StructTypes.push_back(TypeEntry.get()); TypeId = addType(std::move(TypeEntry), CTy); @@ -774,9 +752,10 @@ void BTFDebug::emitBTFSection() { } void BTFDebug::emitBTFExtSection() { - // Do not emit section if empty FuncInfoTable and LineInfoTable. + // Do not emit section if empty FuncInfoTable and LineInfoTable + // and FieldRelocTable. if (!FuncInfoTable.size() && !LineInfoTable.size() && - !OffsetRelocTable.size() && !ExternRelocTable.size()) + !FieldRelocTable.size()) return; MCContext &Ctx = OS.getContext(); @@ -788,8 +767,8 @@ void BTFDebug::emitBTFExtSection() { // Account for FuncInfo/LineInfo record size as well. uint32_t FuncLen = 4, LineLen = 4; - // Do not account for optional OffsetReloc/ExternReloc. - uint32_t OffsetRelocLen = 0, ExternRelocLen = 0; + // Do not account for optional FieldReloc. + uint32_t FieldRelocLen = 0; for (const auto &FuncSec : FuncInfoTable) { FuncLen += BTF::SecFuncInfoSize; FuncLen += FuncSec.second.size() * BTF::BPFFuncInfoSize; @@ -798,28 +777,20 @@ void BTFDebug::emitBTFExtSection() { LineLen += BTF::SecLineInfoSize; LineLen += LineSec.second.size() * BTF::BPFLineInfoSize; } - for (const auto &OffsetRelocSec : OffsetRelocTable) { - OffsetRelocLen += BTF::SecOffsetRelocSize; - OffsetRelocLen += OffsetRelocSec.second.size() * BTF::BPFOffsetRelocSize; - } - for (const auto &ExternRelocSec : ExternRelocTable) { - ExternRelocLen += BTF::SecExternRelocSize; - ExternRelocLen += ExternRelocSec.second.size() * BTF::BPFExternRelocSize; + for (const auto &FieldRelocSec : FieldRelocTable) { + FieldRelocLen += BTF::SecFieldRelocSize; + FieldRelocLen += FieldRelocSec.second.size() * BTF::BPFFieldRelocSize; } - if (OffsetRelocLen) - OffsetRelocLen += 4; - if (ExternRelocLen) - ExternRelocLen += 4; + if (FieldRelocLen) + FieldRelocLen += 4; OS.EmitIntValue(0, 4); OS.EmitIntValue(FuncLen, 4); OS.EmitIntValue(FuncLen, 4); OS.EmitIntValue(LineLen, 4); OS.EmitIntValue(FuncLen + LineLen, 4); - OS.EmitIntValue(OffsetRelocLen, 4); - OS.EmitIntValue(FuncLen + LineLen + OffsetRelocLen, 4); - OS.EmitIntValue(ExternRelocLen, 4); + OS.EmitIntValue(FieldRelocLen, 4); // Emit func_info table. OS.AddComment("FuncInfo"); @@ -853,35 +824,20 @@ void BTFDebug::emitBTFExtSection() { } } - // Emit offset reloc table. - if (OffsetRelocLen) { - OS.AddComment("OffsetReloc"); - OS.EmitIntValue(BTF::BPFOffsetRelocSize, 4); - for (const auto &OffsetRelocSec : OffsetRelocTable) { - OS.AddComment("Offset reloc section string offset=" + - std::to_string(OffsetRelocSec.first)); - OS.EmitIntValue(OffsetRelocSec.first, 4); - OS.EmitIntValue(OffsetRelocSec.second.size(), 4); - for (const auto &OffsetRelocInfo : OffsetRelocSec.second) { - Asm->EmitLabelReference(OffsetRelocInfo.Label, 4); - OS.EmitIntValue(OffsetRelocInfo.TypeID, 4); - OS.EmitIntValue(OffsetRelocInfo.OffsetNameOff, 4); - } - } - } - - // Emit extern reloc table. - if (ExternRelocLen) { - OS.AddComment("ExternReloc"); - OS.EmitIntValue(BTF::BPFExternRelocSize, 4); - for (const auto &ExternRelocSec : ExternRelocTable) { - OS.AddComment("Extern reloc section string offset=" + - std::to_string(ExternRelocSec.first)); - OS.EmitIntValue(ExternRelocSec.first, 4); - OS.EmitIntValue(ExternRelocSec.second.size(), 4); - for (const auto &ExternRelocInfo : ExternRelocSec.second) { - Asm->EmitLabelReference(ExternRelocInfo.Label, 4); - OS.EmitIntValue(ExternRelocInfo.ExternNameOff, 4); + // Emit field reloc table. + if (FieldRelocLen) { + OS.AddComment("FieldReloc"); + OS.EmitIntValue(BTF::BPFFieldRelocSize, 4); + for (const auto &FieldRelocSec : FieldRelocTable) { + OS.AddComment("Field reloc section string offset=" + + std::to_string(FieldRelocSec.first)); + OS.EmitIntValue(FieldRelocSec.first, 4); + OS.EmitIntValue(FieldRelocSec.second.size(), 4); + for (const auto &FieldRelocInfo : FieldRelocSec.second) { + Asm->EmitLabelReference(FieldRelocInfo.Label, 4); + OS.EmitIntValue(FieldRelocInfo.TypeID, 4); + OS.EmitIntValue(FieldRelocInfo.OffsetNameOff, 4); + OS.EmitIntValue(FieldRelocInfo.RelocKind, 4); } } } @@ -942,7 +898,7 @@ void BTFDebug::beginFunctionImpl(const MachineFunction *MF) { // Construct subprogram func type auto FuncTypeEntry = - llvm::make_unique(SP->getName(), ProtoTypeId); + std::make_unique(SP->getName(), ProtoTypeId); uint32_t FuncTypeId = addType(std::move(FuncTypeEntry)); for (const auto &TypeEntry : TypeEntries) @@ -980,71 +936,27 @@ unsigned BTFDebug::populateStructType(const DIType *Ty) { return Id; } -// Find struct/array debuginfo types given a type id. -void BTFDebug::setTypeFromId(uint32_t TypeId, BTFTypeStruct **PrevStructType, - BTFTypeArray **PrevArrayType) { - for (const auto &StructType : StructTypes) { - if (StructType->getId() == TypeId) { - *PrevStructType = StructType; - return; - } - } - for (const auto &ArrayType : ArrayTypes) { - if (ArrayType->getId() == TypeId) { - *PrevArrayType = ArrayType; - return; - } - } -} - -/// Generate a struct member offset relocation. -void BTFDebug::generateOffsetReloc(const MachineInstr *MI, +/// Generate a struct member field relocation. +void BTFDebug::generateFieldReloc(const MachineInstr *MI, const MCSymbol *ORSym, DIType *RootTy, StringRef AccessPattern) { - BTFTypeStruct *PrevStructType = nullptr; - BTFTypeArray *PrevArrayType = nullptr; unsigned RootId = populateStructType(RootTy); - setTypeFromId(RootId, &PrevStructType, &PrevArrayType); - unsigned RootTySize = PrevStructType->getStructSize(); + size_t FirstDollar = AccessPattern.find_first_of('$'); + size_t FirstColon = AccessPattern.find_first_of(':'); + size_t SecondColon = AccessPattern.find_first_of(':', FirstColon + 1); + StringRef IndexPattern = AccessPattern.substr(FirstDollar + 1); + StringRef RelocKindStr = AccessPattern.substr(FirstColon + 1, + SecondColon - FirstColon); + StringRef PatchImmStr = AccessPattern.substr(SecondColon + 1, + FirstDollar - SecondColon); - BTFOffsetReloc OffsetReloc; - OffsetReloc.Label = ORSym; - OffsetReloc.OffsetNameOff = addString(AccessPattern.drop_back()); - OffsetReloc.TypeID = RootId; - - uint32_t Start = 0, End = 0, Offset = 0; - bool FirstAccess = true; - for (auto C : AccessPattern) { - if (C != ':') { - End++; - } else { - std::string SubStr = AccessPattern.substr(Start, End - Start); - int Loc = std::stoi(SubStr); - - if (FirstAccess) { - Offset = Loc * RootTySize; - FirstAccess = false; - } else if (PrevStructType) { - uint32_t MemberOffset, MemberTypeId; - PrevStructType->getMemberInfo(Loc, MemberOffset, MemberTypeId); - - Offset += MemberOffset >> 3; - PrevStructType = nullptr; - setTypeFromId(MemberTypeId, &PrevStructType, &PrevArrayType); - } else if (PrevArrayType) { - uint32_t LocOffset, ElementTypeId; - PrevArrayType->getLocInfo(Loc, LocOffset, ElementTypeId); - - Offset += LocOffset; - PrevArrayType = nullptr; - setTypeFromId(ElementTypeId, &PrevStructType, &PrevArrayType); - } - Start = End + 1; - End = Start; - } - } - AccessOffsets[RootTy->getName().str() + ":" + AccessPattern.str()] = Offset; - OffsetRelocTable[SecNameOff].push_back(OffsetReloc); + BTFFieldReloc FieldReloc; + FieldReloc.Label = ORSym; + FieldReloc.OffsetNameOff = addString(IndexPattern); + FieldReloc.TypeID = RootId; + FieldReloc.RelocKind = std::stoull(RelocKindStr); + PatchImms[AccessPattern.str()] = std::stoul(PatchImmStr); + FieldRelocTable[SecNameOff].push_back(FieldReloc); } void BTFDebug::processLDimm64(const MachineInstr *MI) { @@ -1052,7 +964,7 @@ void BTFDebug::processLDimm64(const MachineInstr *MI) { // will generate an .BTF.ext record. // // If the insn is "r2 = LD_imm64 @__BTF_...", - // add this insn into the .BTF.ext OffsetReloc subsection. + // add this insn into the .BTF.ext FieldReloc subsection. // Relocation looks like: // . SecName: // . InstOffset @@ -1083,16 +995,7 @@ void BTFDebug::processLDimm64(const MachineInstr *MI) { MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index); DIType *Ty = dyn_cast(MDN); - generateOffsetReloc(MI, ORSym, Ty, GVar->getName()); - } else if (GVar && !GVar->hasInitializer() && GVar->hasExternalLinkage() && - GVar->getSection() == BPFCoreSharedInfo::PatchableExtSecName) { - MCSymbol *ORSym = OS.getContext().createTempSymbol(); - OS.EmitLabel(ORSym); - - BTFExternReloc ExternReloc; - ExternReloc.Label = ORSym; - ExternReloc.ExternNameOff = addString(GVar->getName()); - ExternRelocTable[SecNameOff].push_back(ExternReloc); + generateFieldReloc(MI, ORSym, Ty, GVar->getName()); } } } @@ -1200,12 +1103,12 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) { ? BTF::VAR_GLOBAL_ALLOCATED : BTF::VAR_STATIC; auto VarEntry = - llvm::make_unique(Global.getName(), GVTypeId, GVarInfo); + std::make_unique(Global.getName(), GVTypeId, GVarInfo); uint32_t VarId = addType(std::move(VarEntry)); // Find or create a DataSec if (DataSecEntries.find(SecName) == DataSecEntries.end()) { - DataSecEntries[SecName] = llvm::make_unique(Asm, SecName); + DataSecEntries[SecName] = std::make_unique(Asm, SecName); } // Calculate symbol size @@ -1224,30 +1127,12 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) { const GlobalValue *GVal = MO.getGlobal(); auto *GVar = dyn_cast(GVal); if (GVar && GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) { - MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index); - DIType *Ty = dyn_cast(MDN); - std::string TypeName = Ty->getName(); - int64_t Imm = AccessOffsets[TypeName + ":" + GVar->getName().str()]; - - // Emit "mov ri, " for abstract member accesses. + // Emit "mov ri, " for patched immediate. + uint32_t Imm = PatchImms[GVar->getName().str()]; OutMI.setOpcode(BPF::MOV_ri); OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); OutMI.addOperand(MCOperand::createImm(Imm)); return true; - } else if (GVar && !GVar->hasInitializer() && - GVar->hasExternalLinkage() && - GVar->getSection() == BPFCoreSharedInfo::PatchableExtSecName) { - const IntegerType *IntTy = dyn_cast(GVar->getValueType()); - assert(IntTy); - // For patchable externals, emit "LD_imm64, ri, 0" if the external - // variable is 64bit width, emit "mov ri, 0" otherwise. - if (IntTy->getBitWidth() == 64) - OutMI.setOpcode(BPF::LD_imm64); - else - OutMI.setOpcode(BPF::MOV_ri); - OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); - OutMI.addOperand(MCOperand::createImm(0)); - return true; } } } @@ -1281,7 +1166,7 @@ void BTFDebug::endModule() { } if (StructTypeId == 0) { - auto FwdTypeEntry = llvm::make_unique(TypeName, IsUnion); + auto FwdTypeEntry = std::make_unique(TypeName, IsUnion); StructTypeId = addType(std::move(FwdTypeEntry)); } diff --git a/lib/Target/BPF/BTFDebug.h b/lib/Target/BPF/BTFDebug.h index 6c0cdde17d9..c01e0d1d161 100644 --- a/lib/Target/BPF/BTFDebug.h +++ b/lib/Target/BPF/BTFDebug.h @@ -104,15 +104,13 @@ public: /// Handle array type. class BTFTypeArray : public BTFTypeBase { - uint32_t ElemSize; struct BTF::BTFArray ArrayInfo; public: - BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize, uint32_t NumElems); + BTFTypeArray(uint32_t ElemTypeId, uint32_t NumElems); uint32_t getSize() { return BTFTypeBase::getSize() + BTF::BTFArraySize; } void completeType(BTFDebug &BDebug); void emitType(MCStreamer &OS); - void getLocInfo(uint32_t Loc, uint32_t &LocOffset, uint32_t &ElementTypeId); }; /// Handle struct/union type. @@ -130,8 +128,6 @@ public: void completeType(BTFDebug &BDebug); void emitType(MCStreamer &OS); std::string getName(); - void getMemberInfo(uint32_t Loc, uint32_t &Offset, uint32_t &MemberType); - uint32_t getStructSize(); }; /// Handle function pointer. @@ -199,7 +195,7 @@ class BTFStringTable { /// A mapping from string table offset to the index /// of the Table. It is used to avoid putting /// duplicated strings in the table. - std::unordered_map OffsetToIdMap; + std::map OffsetToIdMap; /// A vector of strings to represent the string table. std::vector Table; @@ -228,16 +224,11 @@ struct BTFLineInfo { }; /// Represent one offset relocation. -struct BTFOffsetReloc { +struct BTFFieldReloc { const MCSymbol *Label; ///< MCSymbol identifying insn for the reloc uint32_t TypeID; ///< Type ID uint32_t OffsetNameOff; ///< The string to traverse types -}; - -/// Represent one extern relocation. -struct BTFExternReloc { - const MCSymbol *Label; ///< MCSymbol identifying insn for the reloc - uint32_t ExternNameOff; ///< The extern variable name + uint32_t RelocKind; ///< What to patch the instruction }; /// Collect and emit BTF information. @@ -253,13 +244,11 @@ class BTFDebug : public DebugHandlerBase { std::unordered_map DIToIdMap; std::map> FuncInfoTable; std::map> LineInfoTable; - std::map> OffsetRelocTable; - std::map> ExternRelocTable; + std::map> FieldRelocTable; StringMap> FileContent; std::map> DataSecEntries; std::vector StructTypes; - std::vector ArrayTypes; - std::map AccessOffsets; + std::map PatchImms; std::map>> FixupDerivedTypes; @@ -305,13 +294,9 @@ class BTFDebug : public DebugHandlerBase { void processGlobals(bool ProcessingMapDef); /// Generate one offset relocation record. - void generateOffsetReloc(const MachineInstr *MI, const MCSymbol *ORSym, + void generateFieldReloc(const MachineInstr *MI, const MCSymbol *ORSym, DIType *RootTy, StringRef AccessPattern); - /// Set the to-be-traversed Struct/Array Type based on TypeId. - void setTypeFromId(uint32_t TypeId, BTFTypeStruct **PrevStructType, - BTFTypeArray **PrevArrayType); - /// Populating unprocessed struct type. unsigned populateStructType(const DIType *Ty); diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp index 057bbf5c3b0..ef4e324c3bd 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp @@ -39,7 +39,7 @@ unsigned BPFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { // determine the type of the relocation - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getKind()) { default: llvm_unreachable("invalid fixup kind!"); case FK_SecRel_8: @@ -85,5 +85,5 @@ unsigned BPFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, std::unique_ptr llvm::createBPFELFObjectWriter(uint8_t OSABI) { - return llvm::make_unique(OSABI); + return std::make_unique(OSABI); } diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index 0881bf841f9..590c4a2eb69 100644 --- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -702,7 +702,7 @@ bool HexagonAsmParser::ParseDirectiveFalign(unsigned Size, SMLoc L) { // Make sure we have a number (false is returned if expression is a number) if (!getParser().parseExpression(Value)) { // Make sure this is a number that is in range - const MCConstantExpr *MCE = dyn_cast(Value); + auto *MCE = cast(Value); uint64_t IntValue = MCE->getValue(); if (!isUIntN(Size, IntValue) && !isIntN(Size, IntValue)) return Error(ExprLoc, "literal value out of range (256) for falign"); diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp index b7e95caf24f..efd5ed91512 100644 --- a/lib/Target/Hexagon/BitTracker.cpp +++ b/lib/Target/Hexagon/BitTracker.cpp @@ -84,7 +84,7 @@ namespace { raw_ostream &operator<< (raw_ostream &OS, const printv &PV) { if (PV.R) - OS << 'v' << TargetRegisterInfo::virtReg2Index(PV.R); + OS << 'v' << Register::virtReg2Index(PV.R); else OS << 's'; return OS; @@ -201,7 +201,7 @@ BitTracker::~BitTracker() { bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) { // An example when "meet" can be invoked with SelfR == 0 is a phi node // with a physical register as an operand. - assert(SelfR == 0 || TargetRegisterInfo::isVirtualRegister(SelfR)); + assert(SelfR == 0 || Register::isVirtualRegister(SelfR)); bool Changed = false; for (uint16_t i = 0, n = Bits.size(); i < n; ++i) { const BitValue &RCV = RC[i]; @@ -335,12 +335,13 @@ uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const { // 1. find a physical register PhysR from the same class as RR.Reg, // 2. find a physical register PhysS that corresponds to PhysR:RR.Sub, // 3. find a register class that contains PhysS. - if (TargetRegisterInfo::isVirtualRegister(RR.Reg)) { + if (Register::isVirtualRegister(RR.Reg)) { const auto &VC = composeWithSubRegIndex(*MRI.getRegClass(RR.Reg), RR.Sub); return TRI.getRegSizeInBits(VC); } - assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg)); - unsigned PhysR = (RR.Sub == 0) ? RR.Reg : TRI.getSubReg(RR.Reg, RR.Sub); + assert(Register::isPhysicalRegister(RR.Reg)); + Register PhysR = + (RR.Sub == 0) ? Register(RR.Reg) : TRI.getSubReg(RR.Reg, RR.Sub); return getPhysRegBitWidth(PhysR); } @@ -350,10 +351,10 @@ BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR, // Physical registers are assumed to be present in the map with an unknown // value. Don't actually insert anything in the map, just return the cell. - if (TargetRegisterInfo::isPhysicalRegister(RR.Reg)) + if (Register::isPhysicalRegister(RR.Reg)) return RegisterCell::self(0, BW); - assert(TargetRegisterInfo::isVirtualRegister(RR.Reg)); + assert(Register::isVirtualRegister(RR.Reg)); // For virtual registers that belong to a class that is not tracked, // generate an "unknown" value as well. const TargetRegisterClass *C = MRI.getRegClass(RR.Reg); @@ -376,7 +377,7 @@ void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC, // While updating the cell map can be done in a meaningful way for // a part of a register, it makes little sense to implement it as the // SSA representation would never contain such "partial definitions". - if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + if (!Register::isVirtualRegister(RR.Reg)) return; assert(RR.Sub == 0 && "Unexpected sub-register in definition"); // Eliminate all ref-to-reg-0 bit values: replace them with "self". @@ -711,7 +712,7 @@ BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const { } uint16_t BT::MachineEvaluator::getPhysRegBitWidth(unsigned Reg) const { - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + assert(Register::isPhysicalRegister(Reg)); const TargetRegisterClass &PC = *TRI.getMinimalPhysRegClass(Reg); return TRI.getRegSizeInBits(PC); } @@ -874,7 +875,7 @@ void BT::visitNonBranch(const MachineInstr &MI) { continue; RegisterRef RD(MO); assert(RD.Sub == 0 && "Unexpected sub-register in definition"); - if (!TargetRegisterInfo::isVirtualRegister(RD.Reg)) + if (!Register::isVirtualRegister(RD.Reg)) continue; bool Changed = false; diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index b07d15609ed..3d771d388e2 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -130,7 +130,7 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!MO.isReg()) return true; - unsigned RegNumber = MO.getReg(); + Register RegNumber = MO.getReg(); // This should be an assert in the frontend. if (Hexagon::DoubleRegsRegClass.contains(RegNumber)) RegNumber = TRI->getSubReg(RegNumber, ExtraCode[0] == 'L' ? diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp index 7b75d251ccd..3068fb6f962 100644 --- a/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -147,11 +147,11 @@ namespace { } static inline unsigned v2x(unsigned v) { - return TargetRegisterInfo::virtReg2Index(v); + return Register::virtReg2Index(v); } static inline unsigned x2v(unsigned x) { - return TargetRegisterInfo::index2VirtReg(x); + return Register::index2VirtReg(x); } }; @@ -290,8 +290,8 @@ void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI, for (auto &Op : MI.operands()) { if (!Op.isReg() || !Op.isDef()) continue; - unsigned R = Op.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = Op.getReg(); + if (!Register::isVirtualRegister(R)) continue; Defs.insert(R); } @@ -302,8 +302,8 @@ void HexagonBitSimplify::getInstrUses(const MachineInstr &MI, for (auto &Op : MI.operands()) { if (!Op.isReg() || !Op.isUse()) continue; - unsigned R = Op.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = Op.getReg(); + if (!Register::isVirtualRegister(R)) continue; Uses.insert(R); } @@ -353,8 +353,7 @@ bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC, bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR, MachineRegisterInfo &MRI) { - if (!TargetRegisterInfo::isVirtualRegister(OldR) || - !TargetRegisterInfo::isVirtualRegister(NewR)) + if (!Register::isVirtualRegister(OldR) || !Register::isVirtualRegister(NewR)) return false; auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); decltype(End) NextI; @@ -367,8 +366,7 @@ bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR, bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) { - if (!TargetRegisterInfo::isVirtualRegister(OldR) || - !TargetRegisterInfo::isVirtualRegister(NewR)) + if (!Register::isVirtualRegister(OldR) || !Register::isVirtualRegister(NewR)) return false; if (hasTiedUse(OldR, MRI, NewSR)) return false; @@ -384,8 +382,7 @@ bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR, bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR, unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) { - if (!TargetRegisterInfo::isVirtualRegister(OldR) || - !TargetRegisterInfo::isVirtualRegister(NewR)) + if (!Register::isVirtualRegister(OldR) || !Register::isVirtualRegister(NewR)) return false; if (OldSR != NewSR && hasTiedUse(OldR, MRI, NewSR)) return false; @@ -896,7 +893,7 @@ bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN, // register class. const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass( const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) { - if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + if (!Register::isVirtualRegister(RR.Reg)) return nullptr; auto *RC = MRI.getRegClass(RR.Reg); if (RR.Sub == 0) @@ -927,8 +924,8 @@ const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass( // with a 32-bit register. bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD, const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) { - if (!TargetRegisterInfo::isVirtualRegister(RD.Reg) || - !TargetRegisterInfo::isVirtualRegister(RS.Reg)) + if (!Register::isVirtualRegister(RD.Reg) || + !Register::isVirtualRegister(RS.Reg)) return false; // Return false if one (or both) classes are nullptr. auto *DRC = getFinalVRegClass(RD, MRI); @@ -979,7 +976,7 @@ bool DeadCodeElimination::isDead(unsigned R) const { continue; if (UseI->isPHI()) { assert(!UseI->getOperand(0).getSubReg()); - unsigned DR = UseI->getOperand(0).getReg(); + Register DR = UseI->getOperand(0).getReg(); if (DR == R) continue; } @@ -1018,8 +1015,8 @@ bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) { for (auto &Op : MI->operands()) { if (!Op.isReg() || !Op.isDef()) continue; - unsigned R = Op.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R) || !isDead(R)) { + Register R = Op.getReg(); + if (!Register::isVirtualRegister(R) || !isDead(R)) { AllDead = false; break; } @@ -1220,8 +1217,8 @@ bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) { return false; MachineInstr &UseI = *I->getParent(); if (UseI.isPHI() || UseI.isCopy()) { - unsigned DefR = UseI.getOperand(0).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DefR)) + Register DefR = UseI.getOperand(0).getReg(); + if (!Register::isVirtualRegister(DefR)) return false; Pending.push_back(DefR); } else { @@ -1345,7 +1342,7 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, // If found, replace the instruction with a COPY. const DebugLoc &DL = MI->getDebugLoc(); const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); - unsigned NewR = MRI.createVirtualRegister(FRC); + Register NewR = MRI.createVirtualRegister(FRC); MachineInstr *CopyI = BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) .addReg(RS.Reg, 0, RS.Sub); @@ -1412,7 +1409,7 @@ bool ConstGeneration::isTfrConst(const MachineInstr &MI) { // register class and the actual value being transferred. unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C, MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL) { - unsigned Reg = MRI.createVirtualRegister(RC); + Register Reg = MRI.createVirtualRegister(RC); if (RC == &Hexagon::IntRegsRegClass) { BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg) .addImm(int32_t(C)); @@ -1470,7 +1467,7 @@ bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) { if (Defs.count() != 1) continue; unsigned DR = Defs.find_first(); - if (!TargetRegisterInfo::isVirtualRegister(DR)) + if (!Register::isVirtualRegister(DR)) continue; uint64_t U; const BitTracker::RegisterCell &DRC = BT.lookup(DR); @@ -1609,7 +1606,7 @@ bool CopyGeneration::processBlock(MachineBasicBlock &B, auto *FRC = HBS::getFinalVRegClass(R, MRI); if (findMatch(R, MR, AVB)) { - unsigned NewR = MRI.createVirtualRegister(FRC); + Register NewR = MRI.createVirtualRegister(FRC); BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) .addReg(MR.Reg, 0, MR.Sub); BT.put(BitTracker::RegisterRef(NewR), BT.get(MR)); @@ -1628,7 +1625,7 @@ bool CopyGeneration::processBlock(MachineBasicBlock &B, BitTracker::RegisterRef ML, MH; if (findMatch(TL, ML, AVB) && findMatch(TH, MH, AVB)) { auto *FRC = HBS::getFinalVRegClass(R, MRI); - unsigned NewR = MRI.createVirtualRegister(FRC); + Register NewR = MRI.createVirtualRegister(FRC); BuildMI(B, At, DL, HII.get(TargetOpcode::REG_SEQUENCE), NewR) .addReg(ML.Reg, 0, ML.Sub) .addImm(SubLo) @@ -1819,7 +1816,7 @@ bool BitSimplification::matchHalf(unsigned SelfR, if (Reg == 0 || Reg == SelfR) // Don't match "self". return false; - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return false; if (!BT.has(Reg)) return false; @@ -2025,7 +2022,7 @@ bool BitSimplification::genPackhl(MachineInstr *MI, return false; MachineBasicBlock &B = *MI->getParent(); - unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass); + Register NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass); DebugLoc DL = MI->getDebugLoc(); auto At = MI->isPHI() ? B.getFirstNonPHI() : MachineBasicBlock::iterator(MI); @@ -2097,7 +2094,7 @@ bool BitSimplification::genCombineHalf(MachineInstr *MI, MachineBasicBlock &B = *MI->getParent(); DebugLoc DL = MI->getDebugLoc(); - unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); auto At = MI->isPHI() ? B.getFirstNonPHI() : MachineBasicBlock::iterator(MI); BuildMI(B, At, DL, HII.get(COpc), NewR) @@ -2154,7 +2151,7 @@ bool BitSimplification::genExtractLow(MachineInstr *MI, if (!validateReg(RS, NewOpc, 1)) continue; - unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); auto At = MI->isPHI() ? B.getFirstNonPHI() : MachineBasicBlock::iterator(MI); auto MIB = BuildMI(B, At, DL, HII.get(NewOpc), NewR) @@ -2368,7 +2365,7 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI, return true; } } else if (V.is(0) || V.is(1)) { - unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); + Register NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); unsigned NewOpc = V.is(0) ? Hexagon::PS_false : Hexagon::PS_true; BuildMI(B, At, DL, HII.get(NewOpc), NewR); HBS::replaceReg(RD.Reg, NewR, MRI); @@ -2541,7 +2538,7 @@ bool BitSimplification::simplifyExtractLow(MachineInstr *MI, DebugLoc DL = MI->getDebugLoc(); MachineBasicBlock &B = *MI->getParent(); - unsigned NewR = MRI.createVirtualRegister(FRC); + Register NewR = MRI.createVirtualRegister(FRC); auto At = MI->isPHI() ? B.getFirstNonPHI() : MachineBasicBlock::iterator(MI); auto MIB = BuildMI(B, At, DL, HII.get(ExtOpc), NewR) @@ -2612,8 +2609,8 @@ bool BitSimplification::simplifyRCmp0(MachineInstr *MI, KnownNZ = true; } - auto ReplaceWithConst = [&] (int C) { - unsigned NewR = MRI.createVirtualRegister(FRC); + auto ReplaceWithConst = [&](int C) { + Register NewR = MRI.createVirtualRegister(FRC); BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), NewR) .addImm(C); HBS::replaceReg(RD.Reg, NewR, MRI); @@ -2678,7 +2675,7 @@ bool BitSimplification::simplifyRCmp0(MachineInstr *MI, // replace the comparison with a C2_muxii, using the same predicate // register, but with operands substituted with 0/1 accordingly. if ((KnownZ1 || KnownNZ1) && (KnownZ2 || KnownNZ2)) { - unsigned NewR = MRI.createVirtualRegister(FRC); + Register NewR = MRI.createVirtualRegister(FRC); BuildMI(B, At, DL, HII.get(Hexagon::C2_muxii), NewR) .addReg(InpDef->getOperand(1).getReg()) .addImm(KnownZ1 == (Opc == Hexagon::A4_rcmpeqi)) @@ -3071,7 +3068,7 @@ void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB, DenseMap RegMap; const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR); - unsigned PhiR = MRI->createVirtualRegister(PhiRC); + Register PhiR = MRI->createVirtualRegister(PhiRC); BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR) .addReg(NewPredR) .addMBB(&PB) @@ -3083,7 +3080,7 @@ void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB, const MachineInstr *SI = G.Ins[i-1]; unsigned DR = getDefReg(SI); const TargetRegisterClass *RC = MRI->getRegClass(DR); - unsigned NewDR = MRI->createVirtualRegister(RC); + Register NewDR = MRI->createVirtualRegister(RC); DebugLoc DL = SI->getDebugLoc(); auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR); @@ -3162,7 +3159,7 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) { if (Defs.count() != 1) continue; unsigned DefR = Defs.find_first(); - if (!TargetRegisterInfo::isVirtualRegister(DefR)) + if (!Register::isVirtualRegister(DefR)) continue; if (!isBitShuffle(&*I, DefR)) continue; diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp index ba50faac2cf..ebd060ce503 100644 --- a/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -111,7 +111,7 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const { } uint16_t HexagonEvaluator::getPhysRegBitWidth(unsigned Reg) const { - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + assert(Register::isPhysicalRegister(Reg)); using namespace Hexagon; const auto &HST = MF.getSubtarget(); @@ -1042,8 +1042,8 @@ unsigned HexagonEvaluator::getUniqueDefVReg(const MachineInstr &MI) const { for (const MachineOperand &Op : MI.operands()) { if (!Op.isReg() || !Op.isDef()) continue; - unsigned R = Op.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = Op.getReg(); + if (!Register::isVirtualRegister(R)) continue; if (DefReg != 0) return 0; @@ -1220,7 +1220,7 @@ bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr &MI, RegisterRef RD = MI.getOperand(0); RegisterRef RS = MI.getOperand(1); assert(RD.Sub == 0); - if (!TargetRegisterInfo::isPhysicalRegister(RS.Reg)) + if (!Register::isPhysicalRegister(RS.Reg)) return false; RegExtMap::const_iterator F = VRX.find(RD.Reg); if (F == VRX.end()) diff --git a/lib/Target/Hexagon/HexagonBlockRanges.cpp b/lib/Target/Hexagon/HexagonBlockRanges.cpp index 999150fc8c6..d1d1b8ee7d4 100644 --- a/lib/Target/Hexagon/HexagonBlockRanges.cpp +++ b/lib/Target/Hexagon/HexagonBlockRanges.cpp @@ -268,14 +268,14 @@ HexagonBlockRanges::RegisterSet HexagonBlockRanges::expandToSubRegs( return SRs; } - if (TargetRegisterInfo::isPhysicalRegister(R.Reg)) { + if (Register::isPhysicalRegister(R.Reg)) { MCSubRegIterator I(R.Reg, &TRI); if (!I.isValid()) SRs.insert({R.Reg, 0}); for (; I.isValid(); ++I) SRs.insert({*I, 0}); } else { - assert(TargetRegisterInfo::isVirtualRegister(R.Reg)); + assert(Register::isVirtualRegister(R.Reg)); auto &RC = *MRI.getRegClass(R.Reg); unsigned PReg = *RC.begin(); MCSubRegIndexIterator I(PReg, &TRI); @@ -321,7 +321,7 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap, if (!Op.isReg() || !Op.isUse() || Op.isUndef()) continue; RegisterRef R = { Op.getReg(), Op.getSubReg() }; - if (TargetRegisterInfo::isPhysicalRegister(R.Reg) && Reserved[R.Reg]) + if (Register::isPhysicalRegister(R.Reg) && Reserved[R.Reg]) continue; bool IsKill = Op.isKill(); for (auto S : expandToSubRegs(R, MRI, TRI)) { @@ -338,7 +338,7 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap, continue; RegisterRef R = { Op.getReg(), Op.getSubReg() }; for (auto S : expandToSubRegs(R, MRI, TRI)) { - if (TargetRegisterInfo::isPhysicalRegister(S.Reg) && Reserved[S.Reg]) + if (Register::isPhysicalRegister(S.Reg) && Reserved[S.Reg]) continue; if (Op.isDead()) Clobbers.insert(S); @@ -374,7 +374,7 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap, // Update maps for defs. for (RegisterRef S : Defs) { // Defs should already be expanded into subregs. - assert(!TargetRegisterInfo::isPhysicalRegister(S.Reg) || + assert(!Register::isPhysicalRegister(S.Reg) || !MCSubRegIterator(S.Reg, &TRI, false).isValid()); if (LastDef[S] != IndexType::None || LastUse[S] != IndexType::None) closeRange(S); @@ -383,7 +383,7 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap, // Update maps for clobbers. for (RegisterRef S : Clobbers) { // Clobbers should already be expanded into subregs. - assert(!TargetRegisterInfo::isPhysicalRegister(S.Reg) || + assert(!Register::isPhysicalRegister(S.Reg) || !MCSubRegIterator(S.Reg, &TRI, false).isValid()); if (LastDef[S] != IndexType::None || LastUse[S] != IndexType::None) closeRange(S); @@ -482,7 +482,7 @@ HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeDeadMap( } } for (auto &P : LiveMap) - if (TargetRegisterInfo::isVirtualRegister(P.first.Reg)) + if (Register::isVirtualRegister(P.first.Reg)) addDeadRanges(P.first); LLVM_DEBUG(dbgs() << __func__ << ": dead map\n" diff --git a/lib/Target/Hexagon/HexagonBranchRelaxation.cpp b/lib/Target/Hexagon/HexagonBranchRelaxation.cpp index ee93739b2c7..08f74080687 100644 --- a/lib/Target/Hexagon/HexagonBranchRelaxation.cpp +++ b/lib/Target/Hexagon/HexagonBranchRelaxation.cpp @@ -105,12 +105,11 @@ void HexagonBranchRelaxation::computeOffset(MachineFunction &MF, // offset of the current instruction from the start. unsigned InstOffset = 0; for (auto &B : MF) { - if (B.getAlignment()) { + if (B.getAlignment() != Align::None()) { // Although we don't know the exact layout of the final code, we need // to account for alignment padding somehow. This heuristic pads each // aligned basic block according to the alignment value. - int ByteAlign = (1u << B.getAlignment()) - 1; - InstOffset = (InstOffset + ByteAlign) & ~(ByteAlign); + InstOffset = alignTo(InstOffset, B.getAlignment()); } OffsetMap[&B] = InstOffset; for (auto &MI : B.instrs()) { diff --git a/lib/Target/Hexagon/HexagonConstExtenders.cpp b/lib/Target/Hexagon/HexagonConstExtenders.cpp index cfed0ecef27..ddc9b847ef1 100644 --- a/lib/Target/Hexagon/HexagonConstExtenders.cpp +++ b/lib/Target/Hexagon/HexagonConstExtenders.cpp @@ -14,9 +14,10 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Pass.h" #include #include #include @@ -235,24 +236,24 @@ namespace { Reg = Op.getReg(); Sub = Op.getSubReg(); } else if (Op.isFI()) { - Reg = TargetRegisterInfo::index2StackSlot(Op.getIndex()); + Reg = llvm::Register::index2StackSlot(Op.getIndex()); } return *this; } bool isVReg() const { - return Reg != 0 && !TargetRegisterInfo::isStackSlot(Reg) && - TargetRegisterInfo::isVirtualRegister(Reg); + return Reg != 0 && !llvm::Register::isStackSlot(Reg) && + llvm::Register::isVirtualRegister(Reg); } bool isSlot() const { - return Reg != 0 && TargetRegisterInfo::isStackSlot(Reg); + return Reg != 0 && llvm::Register::isStackSlot(Reg); } operator MachineOperand() const { if (isVReg()) return MachineOperand::CreateReg(Reg, /*Def*/false, /*Imp*/false, /*Kill*/false, /*Dead*/false, /*Undef*/false, /*EarlyClobber*/false, Sub); - if (TargetRegisterInfo::isStackSlot(Reg)) { - int FI = TargetRegisterInfo::stackSlot2Index(Reg); + if (llvm::Register::isStackSlot(Reg)) { + int FI = llvm::Register::stackSlot2Index(Reg); return MachineOperand::CreateFI(FI); } llvm_unreachable("Cannot create MachineOperand"); @@ -1524,7 +1525,7 @@ void HCE::calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs, } HCE::Register HCE::insertInitializer(Loc DefL, const ExtenderInit &ExtI) { - unsigned DefR = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); + llvm::Register DefR = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); MachineBasicBlock &MBB = *DefL.Block; MachineBasicBlock::iterator At = DefL.At; DebugLoc dl = DefL.Block->findDebugLoc(DefL.At); diff --git a/lib/Target/Hexagon/HexagonConstPropagation.cpp b/lib/Target/Hexagon/HexagonConstPropagation.cpp index d1fde5da5fe..a82501cabb9 100644 --- a/lib/Target/Hexagon/HexagonConstPropagation.cpp +++ b/lib/Target/Hexagon/HexagonConstPropagation.cpp @@ -208,14 +208,14 @@ namespace { bool has(unsigned R) const { // All non-virtual registers are considered "bottom". - if (!TargetRegisterInfo::isVirtualRegister(R)) + if (!Register::isVirtualRegister(R)) return true; MapType::const_iterator F = Map.find(R); return F != Map.end(); } const LatticeCell &get(unsigned R) const { - if (!TargetRegisterInfo::isVirtualRegister(R)) + if (!Register::isVirtualRegister(R)) return Bottom; MapType::const_iterator F = Map.find(R); if (F != Map.end()) @@ -623,7 +623,7 @@ void MachineConstPropagator::visitPHI(const MachineInstr &PN) { const MachineOperand &MD = PN.getOperand(0); RegisterSubReg DefR(MD); - assert(TargetRegisterInfo::isVirtualRegister(DefR.Reg)); + assert(Register::isVirtualRegister(DefR.Reg)); bool Changed = false; @@ -652,7 +652,7 @@ Bottomize: RegisterSubReg UseR(SO); // If the input is not a virtual register, we don't really know what // value it holds. - if (!TargetRegisterInfo::isVirtualRegister(UseR.Reg)) + if (!Register::isVirtualRegister(UseR.Reg)) goto Bottomize; // If there is no cell for an input register, it means top. if (!Cells.has(UseR.Reg)) @@ -694,7 +694,7 @@ void MachineConstPropagator::visitNonBranch(const MachineInstr &MI) { continue; RegisterSubReg DefR(MO); // Only track virtual registers. - if (!TargetRegisterInfo::isVirtualRegister(DefR.Reg)) + if (!Register::isVirtualRegister(DefR.Reg)) continue; bool Changed = false; // If the evaluation failed, set cells for all output registers to bottom. @@ -1070,7 +1070,7 @@ bool MachineConstPropagator::run(MachineFunction &MF) { bool MachineConstEvaluator::getCell(const RegisterSubReg &R, const CellMap &Inputs, LatticeCell &RC) { - if (!TargetRegisterInfo::isVirtualRegister(R.Reg)) + if (!Register::isVirtualRegister(R.Reg)) return false; const LatticeCell &L = Inputs.get(R.Reg); if (!R.SubReg) { @@ -1926,7 +1926,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI, unsigned Opc = MI.getOpcode(); RegisterSubReg DefR(MD); assert(!DefR.SubReg); - if (!TargetRegisterInfo::isVirtualRegister(DefR.Reg)) + if (!Register::isVirtualRegister(DefR.Reg)) return false; if (MI.isCopy()) { @@ -2793,7 +2793,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI, if (!MO.isReg() || !MO.isUse() || MO.isImplicit()) continue; RegisterSubReg R(MO); - if (!TargetRegisterInfo::isVirtualRegister(R.Reg)) + if (!Register::isVirtualRegister(R.Reg)) continue; HasUse = true; // PHIs can legitimately have "top" cells after propagation. @@ -2813,7 +2813,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI, for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isUse() || MO.isImplicit()) continue; - unsigned R = MO.getReg(); + Register R = MO.getReg(); dbgs() << printReg(R, &TRI) << ": " << Inputs.get(R) << "\n"; } } @@ -2831,8 +2831,8 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI, for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned R = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = MO.getReg(); + if (!Register::isVirtualRegister(R)) continue; assert(!MO.getSubReg()); assert(Inputs.has(R)); @@ -2871,7 +2871,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI, const MCInstrDesc *NewD = (Ps & P::Zero) ? &HII.get(Hexagon::PS_false) : &HII.get(Hexagon::PS_true); - unsigned NewR = MRI->createVirtualRegister(PredRC); + Register NewR = MRI->createVirtualRegister(PredRC); const MachineInstrBuilder &MIB = BuildMI(B, At, DL, *NewD, NewR); (void)MIB; #ifndef NDEBUG @@ -2893,7 +2893,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI, NewRC = &Hexagon::IntRegsRegClass; else NewRC = &Hexagon::DoubleRegsRegClass; - unsigned NewR = MRI->createVirtualRegister(NewRC); + Register NewR = MRI->createVirtualRegister(NewRC); const MachineInstr *NewMI; if (W == 32) { @@ -3009,7 +3009,7 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI, if (V < 0) V = -V; const TargetRegisterClass *RC = MRI->getRegClass(DefR.Reg); - unsigned NewR = MRI->createVirtualRegister(RC); + Register NewR = MRI->createVirtualRegister(RC); const MachineOperand &Src1 = MI.getOperand(1); NewMI = BuildMI(B, At, DL, D, NewR) .addReg(Src1.getReg(), getRegState(Src1), Src1.getSubReg()) @@ -3111,8 +3111,8 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI, void HexagonConstEvaluator::replaceAllRegUsesWith(unsigned FromReg, unsigned ToReg) { - assert(TargetRegisterInfo::isVirtualRegister(FromReg)); - assert(TargetRegisterInfo::isVirtualRegister(ToReg)); + assert(Register::isVirtualRegister(FromReg)); + assert(Register::isVirtualRegister(ToReg)); for (auto I = MRI->use_begin(FromReg), E = MRI->use_end(); I != E;) { MachineOperand &O = *I; ++I; diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp index a09ccab483c..394a329ac44 100644 --- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -133,8 +133,8 @@ static bool isCombinableInstType(MachineInstr &MI, const HexagonInstrInfo *TII, const MachineOperand &Op1 = MI.getOperand(1); assert(Op0.isReg() && Op1.isReg()); - unsigned DestReg = Op0.getReg(); - unsigned SrcReg = Op1.getReg(); + Register DestReg = Op0.getReg(); + Register SrcReg = Op1.getReg(); return Hexagon::IntRegsRegClass.contains(DestReg) && Hexagon::IntRegsRegClass.contains(SrcReg); } @@ -146,7 +146,7 @@ static bool isCombinableInstType(MachineInstr &MI, const HexagonInstrInfo *TII, const MachineOperand &Op1 = MI.getOperand(1); assert(Op0.isReg()); - unsigned DestReg = Op0.getReg(); + Register DestReg = Op0.getReg(); // Ensure that TargetFlags are MO_NO_FLAG for a global. This is a // workaround for an ABI bug that prevents GOT relocations on combine // instructions @@ -226,7 +226,7 @@ static bool areCombinableOperations(const TargetRegisterInfo *TRI, } static bool isEvenReg(unsigned Reg) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + assert(Register::isPhysicalRegister(Reg)); if (Hexagon::IntRegsRegClass.contains(Reg)) return (Reg - Hexagon::R0) % 2 == 0; if (Hexagon::HvxVRRegClass.contains(Reg)) @@ -265,7 +265,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr &I1, unsigned I1DestReg, unsigned I2DestReg, bool &DoInsertAtI1) { - unsigned I2UseReg = UseReg(I2.getOperand(1)); + Register I2UseReg = UseReg(I2.getOperand(1)); // It is not safe to move I1 and I2 into one combine if I2 has a true // dependence on I1. @@ -332,7 +332,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr &I1, // At O3 we got better results (dhrystone) by being more conservative here. if (!ShouldCombineAggressively) End = std::next(MachineBasicBlock::iterator(I2)); - unsigned I1UseReg = UseReg(I1.getOperand(1)); + Register I1UseReg = UseReg(I1.getOperand(1)); // Track killed operands. If we move across an instruction that kills our // operand, we need to update the kill information on the moved I1. It kills // the operand now. @@ -410,7 +410,7 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) { continue; // Look for the defining instruction. - unsigned Reg = Op.getReg(); + Register Reg = Op.getReg(); MachineInstr *DefInst = LastDef[Reg]; if (!DefInst) continue; @@ -442,7 +442,7 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) { if (Op.isReg()) { if (!Op.isDef() || !Op.getReg()) continue; - unsigned Reg = Op.getReg(); + Register Reg = Op.getReg(); if (Hexagon::DoubleRegsRegClass.contains(Reg)) { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) LastDef[*SubRegs] = &MI; @@ -528,7 +528,7 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr &I1, while (I2 != I1.getParent()->end() && I2->isDebugInstr()) ++I2; - unsigned I1DestReg = I1.getOperand(0).getReg(); + Register I1DestReg = I1.getOperand(0).getReg(); for (MachineBasicBlock::iterator End = I1.getParent()->end(); I2 != End; ++I2) { @@ -544,7 +544,7 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr &I1, if (ShouldCombineAggressively && PotentiallyNewifiableTFR.count(&*I2)) continue; - unsigned I2DestReg = I2->getOperand(0).getReg(); + Register I2DestReg = I2->getOperand(0).getReg(); // Check that registers are adjacent and that the first destination register // is even. @@ -579,8 +579,8 @@ void HexagonCopyToCombine::combine(MachineInstr &I1, MachineInstr &I2, ++MI; // Figure out whether I1 or I2 goes into the lowreg part. - unsigned I1DestReg = I1.getOperand(0).getReg(); - unsigned I2DestReg = I2.getOperand(0).getReg(); + Register I1DestReg = I1.getOperand(0).getReg(); + Register I2DestReg = I2.getOperand(0).getReg(); bool IsI1Loreg = (I2DestReg - I1DestReg) == 1; unsigned LoRegDef = IsI1Loreg ? I1DestReg : I2DestReg; unsigned SubLo; @@ -758,7 +758,7 @@ void HexagonCopyToCombine::emitCombineIR(MachineBasicBlock::iterator &InsertPt, unsigned DoubleDestReg, MachineOperand &HiOperand, MachineOperand &LoOperand) { - unsigned LoReg = LoOperand.getReg(); + Register LoReg = LoOperand.getReg(); unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill()); DebugLoc DL = InsertPt->getDebugLoc(); @@ -807,7 +807,7 @@ void HexagonCopyToCombine::emitCombineRI(MachineBasicBlock::iterator &InsertPt, MachineOperand &HiOperand, MachineOperand &LoOperand) { unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill()); - unsigned HiReg = HiOperand.getReg(); + Register HiReg = HiOperand.getReg(); DebugLoc DL = InsertPt->getDebugLoc(); MachineBasicBlock *BB = InsertPt->getParent(); @@ -857,8 +857,8 @@ void HexagonCopyToCombine::emitCombineRR(MachineBasicBlock::iterator &InsertPt, MachineOperand &LoOperand) { unsigned LoRegKillFlag = getKillRegState(LoOperand.isKill()); unsigned HiRegKillFlag = getKillRegState(HiOperand.isKill()); - unsigned LoReg = LoOperand.getReg(); - unsigned HiReg = HiOperand.getReg(); + Register LoReg = LoOperand.getReg(); + Register HiReg = HiOperand.getReg(); DebugLoc DL = InsertPt->getDebugLoc(); MachineBasicBlock *BB = InsertPt->getParent(); diff --git a/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td index 2ce1419e479..e4a2ba0ec29 100644 --- a/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td +++ b/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td @@ -37,12 +37,12 @@ def: Pat<(int_hexagon_F2_sfmax IntRegs:$src1, IntRegs:$src2), (F2_sfmax IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vabswsat DoubleRegs:$src1), (A2_vabswsat DoubleRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$src1, u5_0ImmPred:$src2), - (S2_asr_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2), - (S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_combineri IntRegs:$src1, s32_0ImmPred:$src2), - (A4_combineri IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2), + (S2_asr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_combineri IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A4_combineri IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_nac_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpy_nac_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_vpmpyh_acc DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -75,8 +75,8 @@ def: Pat<(int_hexagon_A2_vaddws DoubleRegs:$src1, DoubleRegs:$src2), (A2_vaddws DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_maxup DoubleRegs:$src1, DoubleRegs:$src2), (A2_maxup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred:$src2), - (A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2), + (A4_vcmphgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_interleave DoubleRegs:$src1), (S2_interleave DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vrcmpyi_s0 DoubleRegs:$src1, DoubleRegs:$src2), @@ -89,10 +89,10 @@ def: Pat<(int_hexagon_C2_cmpgtu IntRegs:$src1, IntRegs:$src2), (C2_cmpgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2), (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cmphgtui IntRegs:$src1, u32_0ImmPred:$src2), - (A4_cmphgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2), - (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cmphgtui IntRegs:$src1, u32_0ImmPred_timm:$src2), + (A4_cmphgtui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_cmpgti IntRegs:$src1, s32_0ImmPred_timm:$src2), + (C2_cmpgti IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyi IntRegs:$src1, IntRegs:$src2), (M2_mpyi IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_df2uw_chop DoubleRegs:$src1), @@ -103,12 +103,12 @@ def: Pat<(int_hexagon_M2_mpy_lh_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpy_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_lh_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpy_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vrcnegh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_vrcnegh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_extractup DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3), - (S2_extractup DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_extractup DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3), + (S2_extractup DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S4_ntstbit_r IntRegs:$src1, IntRegs:$src2), (S4_ntstbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_w2sf IntRegs:$src1), @@ -125,10 +125,10 @@ def: Pat<(int_hexagon_A4_cmpbgt IntRegs:$src1, IntRegs:$src2), (A4_cmpbgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_asr_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_rcmpneqi IntRegs:$src1, s32_0ImmPred:$src2), - (A4_rcmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_rcmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A4_rcmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_subacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_subacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_orp DoubleRegs:$src1, DoubleRegs:$src2), @@ -137,28 +137,28 @@ def: Pat<(int_hexagon_M2_mpyu_up IntRegs:$src1, IntRegs:$src2), (M2_mpyu_up IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_acc_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpy_acc_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2), - (S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2), - (S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2), + (S2_asr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_cmpbgtu IntRegs:$src1, IntRegs:$src2), (A4_cmpbgtu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vcmpbeq_any DoubleRegs:$src1, DoubleRegs:$src2), (A4_vcmpbeq_any DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cmpbgti IntRegs:$src1, s8_0ImmPred:$src2), - (A4_cmpbgti IntRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cmpbgti IntRegs:$src1, s8_0ImmPred_timm:$src2), + (A4_cmpbgti IntRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_lh_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpyd_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_addsp IntRegs:$src1, DoubleRegs:$src2), (A2_addsp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S4_vxsubaddw DoubleRegs:$src1, DoubleRegs:$src2), (S4_vxsubaddw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred:$src2), - (A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2), + (A4_vcmpheqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S4_vxsubaddh DoubleRegs:$src1, DoubleRegs:$src2), (S4_vxsubaddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_pmpyw IntRegs:$src1, IntRegs:$src2), @@ -177,10 +177,10 @@ def: Pat<(int_hexagon_A2_pxorf PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), (A2_pxorf PredRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2), (A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred:$src2), - (S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$src1, u5_0ImmPred:$src2), - (S2_asl_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2), + (S2_asl_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asl_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vrminuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (A4_vrminuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sffma IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -199,10 +199,10 @@ def: Pat<(int_hexagon_M4_vrmpyoh_s1 DoubleRegs:$src1, DoubleRegs:$src2), (M4_vrmpyoh_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_bitsset IntRegs:$src1, IntRegs:$src2), (C2_bitsset IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_mpysip IntRegs:$src1, u32_0ImmPred:$src2), - (M2_mpysip IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2), - (M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_mpysip IntRegs:$src1, u32_0ImmPred_timm:$src2), + (M2_mpysip IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_mpysin IntRegs:$src1, u8_0ImmPred_timm:$src2), + (M2_mpysin IntRegs:$src1, u8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_boundscheck IntRegs:$src1, DoubleRegs:$src2), (A4_boundscheck IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M5_vrmpybuu DoubleRegs:$src1, DoubleRegs:$src2), @@ -225,10 +225,10 @@ def: Pat<(int_hexagon_F2_conv_ud2df DoubleRegs:$src1), (F2_conv_ud2df DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vnavgw DoubleRegs:$src1, DoubleRegs:$src2), (A2_vnavgw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_subi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_subi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_subi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_subi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vzxthw IntRegs:$src1), (S2_vzxthw IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sfadd IntRegs:$src1, IntRegs:$src2), @@ -241,12 +241,12 @@ def: Pat<(int_hexagon_M2_vmac2su_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$sr (M2_vmac2su_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2), (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3, u5_0ImmPred:$src4), - (S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3, u5_0ImmPred:$src4)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3, u5_0ImmPred_timm:$src4), + (S2_insert IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3, u5_0ImmPred_timm:$src4)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_packhl IntRegs:$src1, IntRegs:$src2), (S2_packhl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred:$src2), - (A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2), + (A4_vcmpwgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vavguwr DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavguwr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asl_r_r_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -259,8 +259,8 @@ def: Pat<(int_hexagon_M4_and_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_and_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_d2df DoubleRegs:$src1), (F2_conv_d2df DoubleRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2), - (C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_cmpgtui IntRegs:$src1, u32_0ImmPred_timm:$src2), + (C2_cmpgtui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vconj DoubleRegs:$src1), (A2_vconj DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsr_r_vw DoubleRegs:$src1, IntRegs:$src2), @@ -279,8 +279,8 @@ def: Pat<(int_hexagon_C2_any8 PredRegs:$src1), (C2_any8 PredRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_togglebit_r IntRegs:$src1, IntRegs:$src2), (S2_togglebit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_togglebit_i IntRegs:$src1, u5_0ImmPred:$src2), - (S2_togglebit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_togglebit_i IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_togglebit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_uw2sf IntRegs:$src1), (F2_conv_uw2sf IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vsathb_nopack DoubleRegs:$src1), @@ -303,10 +303,10 @@ def: Pat<(int_hexagon_C4_or_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3) (C4_or_andn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_asl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred:$src2), - (A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2), + (A4_vcmpwgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_vrmpyoh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M4_vrmpyoh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_vrmpyoh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), @@ -323,34 +323,34 @@ def: Pat<(int_hexagon_M2_vrcmacr_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleR (M2_vrcmacr_s0c DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vavgwcr DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavgwcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vrmaxw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (A4_vrmaxw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vnavghr DoubleRegs:$src1, DoubleRegs:$src2), (A2_vnavghr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_cmpyi_wh DoubleRegs:$src1, IntRegs:$src2), (M4_cmpyi_wh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_tfrsi s32_0ImmPred:$src1), - (A2_tfrsi s32_0ImmPred:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_tfrsi s32_0ImmPred_timm:$src1), + (A2_tfrsi s32_0ImmPred_timm:$src1)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_svnavgh IntRegs:$src1, IntRegs:$src2), (A2_svnavgh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$src1, u5_0ImmPred:$src2), - (S2_lsr_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_lsr_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vmac2 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_vmac2 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred:$src2), - (A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2), + (A4_vcmphgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_svavgh IntRegs:$src1, IntRegs:$src2), (A2_svavgh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_vrmpyeh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M4_vrmpyeh_acc_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_vrmpyeh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M4_vrmpyeh_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2), - (S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2), + (S2_lsr_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_combine_hl IntRegs:$src1, IntRegs:$src2), (A2_combine_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_up IntRegs:$src1, IntRegs:$src2), @@ -381,10 +381,10 @@ def: Pat<(int_hexagon_M2_cmacr_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3 (M2_cmacr_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_or_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_or_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M4_mpyrr_addi u32_0ImmPred:$src1, IntRegs:$src2, IntRegs:$src3), - (M4_mpyrr_addi u32_0ImmPred:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M4_mpyrr_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, IntRegs:$src3), + (M4_mpyrr_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (S4_or_andi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_hl_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpy_sat_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_hl_s1 IntRegs:$src1, IntRegs:$src2), @@ -453,8 +453,8 @@ def: Pat<(int_hexagon_M2_mpy_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpy_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sffms_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (F2_sffms_lib IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2), - (C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C4_cmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2), + (C4_cmpneqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_and_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_and_xor IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_sat DoubleRegs:$src1), @@ -469,8 +469,8 @@ def: Pat<(int_hexagon_A2_svavghs IntRegs:$src1, IntRegs:$src2), (A2_svavghs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vrsadub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (A2_vrsadub_acc DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_bitsclri IntRegs:$src1, u6_0ImmPred:$src2), - (C2_bitsclri IntRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_bitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2), + (C2_bitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_subh_h16_sat_hh IntRegs:$src1, IntRegs:$src2), (A2_subh_h16_sat_hh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_subh_h16_sat_hl IntRegs:$src1, IntRegs:$src2), @@ -535,10 +535,10 @@ def: Pat<(int_hexagon_C2_vmux PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3 (C2_vmux PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_parityp DoubleRegs:$src1, DoubleRegs:$src2), (S2_parityp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyu_nac_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_nac_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -557,30 +557,30 @@ def: Pat<(int_hexagon_M2_cnacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src (M2_cnacsc_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_cnacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_cnacsc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3), - (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_subaddi IntRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3), + (S4_subaddi IntRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyud_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyud_nac_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyud_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyud_nac_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_tstbit_r IntRegs:$src1, IntRegs:$src2), (S2_tstbit_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3), - (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred_timm:$src3), + (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmachs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M2_mmachs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmachs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M2_mmachs_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_tstbit_i IntRegs:$src1, u5_0ImmPred:$src2), - (S2_tstbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_tstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_tstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_up_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpy_up_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_extractu_rp IntRegs:$src1, DoubleRegs:$src2), (S2_extractu_rp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmpyuh_rs0 DoubleRegs:$src1, DoubleRegs:$src2), (M2_mmpyuh_rs0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2), - (S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_lsr_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpy_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2), @@ -605,14 +605,14 @@ def: Pat<(int_hexagon_F2_conv_w2df IntRegs:$src1), (F2_conv_w2df IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_subh_l16_sat_hl IntRegs:$src1, IntRegs:$src2), (A2_subh_l16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2), - (C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_cmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2), + (C2_cmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asl_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vcnegh DoubleRegs:$src1, IntRegs:$src2), (S2_vcnegh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred:$src2), - (A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2), + (A4_vcmpweqi DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2), (M2_vdmpyrs_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vdmpyrs_s1 DoubleRegs:$src1, DoubleRegs:$src2), @@ -633,8 +633,8 @@ def: Pat<(int_hexagon_S2_asl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3 (S2_asl_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_cl0p DoubleRegs:$src1), (S2_cl0p DoubleRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3), - (S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3), + (S2_valignib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sffixupd IntRegs:$src1, IntRegs:$src2), (F2_sffixupd IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_rnd_hl_s1 IntRegs:$src1, IntRegs:$src2), @@ -653,8 +653,8 @@ def: Pat<(int_hexagon_M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmpyul_rs1 DoubleRegs:$src1, DoubleRegs:$src2), (M2_mmpyul_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_ntstbit_i IntRegs:$src1, u5_0ImmPred:$src2), - (S4_ntstbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_ntstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S4_ntstbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sffixupr IntRegs:$src1), (F2_sffixupr IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), @@ -669,32 +669,32 @@ def: Pat<(int_hexagon_C2_andn PredRegs:$src1, PredRegs:$src2), (C2_andn PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vmpy2s_s0pack IntRegs:$src1, IntRegs:$src2), (M2_vmpy2s_s0pack IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (S4_addaddi IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyd_acc_ll_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_acc_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpy_acc_sat_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_rcmpeqi IntRegs:$src1, s32_0ImmPred:$src2), - (A4_rcmpeqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_rcmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A4_rcmpeqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_xor_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_xor_and IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmpyuh_rs1 DoubleRegs:$src1, DoubleRegs:$src2), (M2_mmpyuh_rs1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_asr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_round_ri IntRegs:$src1, u5_0ImmPred:$src2), - (A4_round_ri IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_round_ri IntRegs:$src1, u5_0ImmPred_timm:$src2), + (A4_round_ri IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_max IntRegs:$src1, IntRegs:$src2), (A2_max IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_round_rr IntRegs:$src1, IntRegs:$src2), (A4_round_rr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_combineii s8_0ImmPred:$src1, u32_0ImmPred:$src2), - (A4_combineii s8_0ImmPred:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_combineir s32_0ImmPred:$src1, IntRegs:$src2), - (A4_combineir s32_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_combineii s8_0ImmPred_timm:$src1, u32_0ImmPred_timm:$src2), + (A4_combineii s8_0ImmPred_timm:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_combineir s32_0ImmPred_timm:$src1, IntRegs:$src2), + (A4_combineir s32_0ImmPred_timm:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C4_and_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3), (C4_and_orn PredRegs:$src1, PredRegs:$src2, PredRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M5_vmacbuu DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -703,8 +703,8 @@ def: Pat<(int_hexagon_A4_rcmpeq IntRegs:$src1, IntRegs:$src2), (A4_rcmpeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_cmpyr_whc DoubleRegs:$src1, IntRegs:$src2), (M4_cmpyr_whc DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vzxtbh IntRegs:$src1), (S2_vzxtbh IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmacuhs_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), @@ -721,8 +721,8 @@ def: Pat<(int_hexagon_M2_cmpyi_s0 IntRegs:$src1, IntRegs:$src2), (M2_cmpyi_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_ori_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_ori_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_ori_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_ori_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C4_nbitsset IntRegs:$src1, IntRegs:$src2), (C4_nbitsset IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -745,10 +745,10 @@ def: Pat<(int_hexagon_M2_mpyd_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs (M2_mpyd_acc_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyd_acc_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_F2_sfimm_p u10_0ImmPred:$src1), - (F2_sfimm_p u10_0ImmPred:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_F2_sfimm_n u10_0ImmPred:$src1), - (F2_sfimm_n u10_0ImmPred:$src1)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_F2_sfimm_p u10_0ImmPred_timm:$src1), + (F2_sfimm_p u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_F2_sfimm_n u10_0ImmPred_timm:$src1), + (F2_sfimm_n u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_cmpyr_wh DoubleRegs:$src1, IntRegs:$src2), (M4_cmpyr_wh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), @@ -759,14 +759,14 @@ def: Pat<(int_hexagon_F2_conv_d2sf DoubleRegs:$src1), (F2_conv_d2sf DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vavguh DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavguh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cmpbeqi IntRegs:$src1, u8_0ImmPred:$src2), - (A4_cmpbeqi IntRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cmpbeqi IntRegs:$src1, u8_0ImmPred_timm:$src2), + (A4_cmpbeqi IntRegs:$src1, u8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sfcmpuo IntRegs:$src1, IntRegs:$src2), (F2_sfcmpuo IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vavguw DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavguw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vsatwh_nopack DoubleRegs:$src1), (S2_vsatwh_nopack DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_hh_s0 IntRegs:$src1, IntRegs:$src2), @@ -783,8 +783,8 @@ def: Pat<(int_hexagon_M4_or_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_or_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_minp DoubleRegs:$src1, DoubleRegs:$src2), (A2_minp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (S4_or_andix IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpy_rnd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_rnd_lh_s1 IntRegs:$src1, IntRegs:$src2), @@ -817,16 +817,16 @@ def: Pat<(int_hexagon_S4_extract_rp IntRegs:$src1, DoubleRegs:$src2), (S4_extract_rp IntRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_lsl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C4_cmplteui IntRegs:$src1, u32_0ImmPred:$src2), - (C4_cmplteui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_addi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_addi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C4_cmplteui IntRegs:$src1, u32_0ImmPred_timm:$src2), + (C4_cmplteui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_addi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_addi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_tfrcpp CtrRegs64:$src1), (A4_tfrcpp CtrRegs64:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred:$src2), - (S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cmphgti IntRegs:$src1, s32_0ImmPred:$src2), - (A4_cmphgti IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asr_i_svw_trun DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cmphgti IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A4_cmphgti IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vrminh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (A4_vrminh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vrminw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), @@ -837,8 +837,8 @@ def: Pat<(int_hexagon_S2_insertp_rp DoubleRegs:$src1, DoubleRegs:$src2, DoubleRe (S2_insertp_rp DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vnavghcr DoubleRegs:$src1, DoubleRegs:$src2), (A2_vnavghcr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_subi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_subi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_subi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_subi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_vh DoubleRegs:$src1, IntRegs:$src2), (S2_lsl_r_vh DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_hh_s0 IntRegs:$src1, IntRegs:$src2), @@ -851,14 +851,14 @@ def: Pat<(int_hexagon_S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs (S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_satb IntRegs:$src1), (A2_satb IntRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3, u6_0ImmPred:$src4), - (S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3, u6_0ImmPred:$src4)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3, u6_0ImmPred_timm:$src4), + (S2_insertp DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3, u6_0ImmPred_timm:$src4)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpyd_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpyd_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_extractup_rp DoubleRegs:$src1, DoubleRegs:$src2), (S2_extractup_rp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S4_vxaddsubw DoubleRegs:$src1, DoubleRegs:$src2), @@ -925,8 +925,8 @@ def: Pat<(int_hexagon_M2_cmpyr_s0 IntRegs:$src1, IntRegs:$src2), (M2_cmpyr_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_dpmpyss_rnd_s0 IntRegs:$src1, IntRegs:$src2), (M2_dpmpyss_rnd_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_muxri PredRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3), - (C2_muxri PredRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_muxri PredRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3), + (C2_muxri PredRegs:$src1, s32_0ImmPred_timm:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vmac2es_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M2_vmac2es_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vmac2es_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), @@ -937,8 +937,8 @@ def: Pat<(int_hexagon_M2_mpyu_lh_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpyu_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_lh_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpyu_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asl_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyd_acc_hl_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_acc_hl_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -947,8 +947,8 @@ def: Pat<(int_hexagon_S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs (S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vaddw DoubleRegs:$src1, DoubleRegs:$src2), (A2_vaddw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vaddh DoubleRegs:$src1, DoubleRegs:$src2), (A2_vaddh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_nac_sat_lh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -957,16 +957,16 @@ def: Pat<(int_hexagon_M2_mpy_nac_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs (M2_mpy_nac_sat_lh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2), (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M4_mpyri_addi u32_0ImmPred:$src1, IntRegs:$src2, u6_0ImmPred:$src3), - (M4_mpyri_addi u32_0ImmPred:$src1, IntRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_andi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_andi_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3), - (M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M4_mpyri_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, u6_0ImmPred_timm:$src3), + (M4_mpyri_addi u32_0ImmPred_timm:$src1, IntRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_andi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_andi_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3), + (M2_macsip IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_tfrcrr CtrRegs:$src1), (A2_tfrcrr CtrRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3), - (M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3), + (M2_macsin IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_orn PredRegs:$src1, PredRegs:$src2), (C2_orn PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_and_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -1005,8 +1005,8 @@ def: Pat<(int_hexagon_M2_vrcmpys_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, IntR (M2_vrcmpys_acc_s1 DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_dfcmpge DoubleRegs:$src1, DoubleRegs:$src2), (F2_dfcmpge DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (M2_accii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A5_vaddhubs DoubleRegs:$src1, DoubleRegs:$src2), (A5_vaddhubs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vmaxw DoubleRegs:$src1, DoubleRegs:$src2), @@ -1017,10 +1017,10 @@ def: Pat<(int_hexagon_A2_vmaxh DoubleRegs:$src1, DoubleRegs:$src2), (A2_vmaxh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vsxthw IntRegs:$src1), (S2_vsxthw IntRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_andi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_andi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_andi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_andi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_cmpgt IntRegs:$src1, IntRegs:$src2), @@ -1035,22 +1035,22 @@ def: Pat<(int_hexagon_F2_conv_sf2w IntRegs:$src1), (F2_conv_sf2w IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_F2_sfclass IntRegs:$src1, u5_0ImmPred:$src2), - (F2_sfclass IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_F2_sfclass IntRegs:$src1, u5_0ImmPred_timm:$src2), + (F2_sfclass IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyud_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyud_acc_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_xor_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_xor_andn IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred:$src3), - (S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred_timm:$src3), + (S2_addasl_rrri IntRegs:$src1, IntRegs:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M5_vdmpybsu DoubleRegs:$src1, DoubleRegs:$src2), (M5_vdmpybsu DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyu_nac_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyu_nac_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_addi IntRegs:$src1, s32_0ImmPred:$src2), - (A2_addi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_addi IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A2_addi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_addp DoubleRegs:$src1, DoubleRegs:$src2), (A2_addp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vmpy2s_s1pack IntRegs:$src1, IntRegs:$src2), @@ -1063,8 +1063,8 @@ def: Pat<(int_hexagon_M2_nacci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_nacci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_shuffeh DoubleRegs:$src1, DoubleRegs:$src2), (S2_shuffeh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpy_sat_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_rnd_hh_s0 IntRegs:$src1, IntRegs:$src2), @@ -1131,12 +1131,12 @@ def: Pat<(int_hexagon_C2_and PredRegs:$src1, PredRegs:$src2), (C2_and PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S5_popcountp DoubleRegs:$src1), (S5_popcountp DoubleRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_extractp DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3), - (S4_extractp DoubleRegs:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_extractp DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3), + (S4_extractp DoubleRegs:$src1, u6_0ImmPred_timm:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_cl0 IntRegs:$src1), (S2_cl0 IntRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred:$src2), - (A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2), + (A4_vcmpbgti DoubleRegs:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmacls_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M2_mmacls_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmacls_s0 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), @@ -1167,8 +1167,8 @@ def: Pat<(int_hexagon_M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vmaxuh DoubleRegs:$src1, DoubleRegs:$src2), (A2_vmaxuh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_bitspliti IntRegs:$src1, u5_0ImmPred:$src2), - (A4_bitspliti IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_bitspliti IntRegs:$src1, u5_0ImmPred_timm:$src2), + (A4_bitspliti IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vmaxub DoubleRegs:$src1, DoubleRegs:$src2), (A2_vmaxub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyud_hh_s0 IntRegs:$src1, IntRegs:$src2), @@ -1185,26 +1185,26 @@ def: Pat<(int_hexagon_F2_conv_sf2d IntRegs:$src1), (F2_conv_sf2d IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_asr_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_F2_dfimm_n u10_0ImmPred:$src1), - (F2_dfimm_n u10_0ImmPred:$src1)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_F2_dfimm_n u10_0ImmPred_timm:$src1), + (F2_dfimm_n u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_cmphgt IntRegs:$src1, IntRegs:$src2), (A4_cmphgt IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_F2_dfimm_p u10_0ImmPred:$src1), - (F2_dfimm_p u10_0ImmPred:$src1)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_F2_dfimm_p u10_0ImmPred_timm:$src1), + (F2_dfimm_p u10_0ImmPred_timm:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyud_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyud_acc_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vcmpy_s1_sat_r DoubleRegs:$src1, DoubleRegs:$src2), (M2_vcmpy_s1_sat_r DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred:$src2, IntRegs:$src3), - (M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred:$src2, IntRegs:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred_timm:$src2, IntRegs:$src3), + (M4_mpyri_addr_u2 IntRegs:$src1, u6_2ImmPred_timm:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vcmpy_s1_sat_i DoubleRegs:$src1, DoubleRegs:$src2), (M2_vcmpy_s1_sat_i DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M5_vrmacbuu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (M5_vrmacbuu DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3), - (S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3), + (S2_vspliceib DoubleRegs:$src1, DoubleRegs:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_cnacs_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -1215,20 +1215,20 @@ def: Pat<(int_hexagon_A2_maxu IntRegs:$src1, IntRegs:$src2), (A2_maxu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_maxp DoubleRegs:$src1, DoubleRegs:$src2), (A2_maxp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_andir IntRegs:$src1, s32_0ImmPred:$src2), - (A2_andir IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_andir IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A2_andir IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sfrecipa IntRegs:$src1, IntRegs:$src2), (F2_sfrecipa IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_combineii s32_0ImmPred:$src1, s8_0ImmPred:$src2), - (A2_combineii s32_0ImmPred:$src1, s8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_combineii s32_0ImmPred_timm:$src1, s8_0ImmPred_timm:$src2), + (A2_combineii s32_0ImmPred_timm:$src1, s8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_orn IntRegs:$src1, IntRegs:$src2), (A4_orn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cmpbgtui IntRegs:$src1, u32_0ImmPred:$src2), - (A4_cmpbgtui IntRegs:$src1, u32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cmpbgtui IntRegs:$src1, u32_0ImmPred_timm:$src2), + (A4_cmpbgtui IntRegs:$src1, u32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_lsr_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred:$src2), - (A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred_timm:$src2), + (A4_vcmpbeqi DoubleRegs:$src1, u8_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_r IntRegs:$src1, IntRegs:$src2), (S2_lsl_r_r IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2), @@ -1251,16 +1251,16 @@ def: Pat<(int_hexagon_A2_satub IntRegs:$src1), (A2_satub IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vrcmpys_s1 DoubleRegs:$src1, IntRegs:$src2), (M2_vrcmpys_s1 DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (S4_or_ori IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C4_fastcorner9_not PredRegs:$src1, PredRegs:$src2), (C4_fastcorner9_not PredRegs:$src1, PredRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_tfrih IntRegs:$src1, u16_0ImmPred:$src2), - (A2_tfrih IntRegs:$src1, u16_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_tfril IntRegs:$src1, u16_0ImmPred:$src2), - (A2_tfril IntRegs:$src1, u16_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3), - (M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_tfrih IntRegs:$src1, u16_0ImmPred_timm:$src2), + (A2_tfrih IntRegs:$src1, u16_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_tfril IntRegs:$src1, u16_0ImmPred_timm:$src2), + (A2_tfril IntRegs:$src1, u16_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3), + (M4_mpyri_addr IntRegs:$src1, IntRegs:$src2, u32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vtrunehb DoubleRegs:$src1), (S2_vtrunehb DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vabsw DoubleRegs:$src1), @@ -1269,14 +1269,14 @@ def: Pat<(int_hexagon_A2_vabsh DoubleRegs:$src1), (A2_vabsh DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sfsub IntRegs:$src1, IntRegs:$src2), (F2_sfsub IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_muxii PredRegs:$src1, s32_0ImmPred:$src2, s8_0ImmPred:$src3), - (C2_muxii PredRegs:$src1, s32_0ImmPred:$src2, s8_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_muxii PredRegs:$src1, s32_0ImmPred_timm:$src2, s8_0ImmPred_timm:$src3), + (C2_muxii PredRegs:$src1, s32_0ImmPred_timm:$src2, s8_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (C2_muxir PredRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_swiz IntRegs:$src1), (A2_swiz IntRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_cmpyrsc_s0 IntRegs:$src1, IntRegs:$src2), (M2_cmpyrsc_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_cmpyrsc_s1 IntRegs:$src1, IntRegs:$src2), @@ -1295,44 +1295,44 @@ def: Pat<(int_hexagon_M2_mpy_nac_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs (M2_mpy_nac_sat_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_nac_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpy_nac_sat_ll_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_extract IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3), - (S4_extract IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_extract IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3), + (S4_extract IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vcmpweq DoubleRegs:$src1, DoubleRegs:$src2), (A2_vcmpweq DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_ud2sf DoubleRegs:$src1), (F2_conv_ud2sf DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_tfr IntRegs:$src1), (A2_tfr IntRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_subri s32_0ImmPred:$src1, IntRegs:$src2), - (A2_subri s32_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_subri s32_0ImmPred_timm:$src1, IntRegs:$src2), + (A2_subri s32_0ImmPred_timm:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vrmaxuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (A4_vrmaxuw DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M5_vmpybuu IntRegs:$src1, IntRegs:$src2), (M5_vmpybuu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_vrmaxuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (A4_vrmaxuh DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2), - (S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asl_i_vw DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vavgw DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavgw DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_brev IntRegs:$src1), (S2_brev IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vavgh DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavgh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_clrbit_i IntRegs:$src1, u5_0ImmPred:$src2), - (S2_clrbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2), - (S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_clrbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_clrbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2), + (S2_asl_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmpyl_rs1 DoubleRegs:$src1, DoubleRegs:$src2), @@ -1343,8 +1343,8 @@ def: Pat<(int_hexagon_M2_mmpyl_s0 DoubleRegs:$src1, DoubleRegs:$src2), (M2_mmpyl_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmpyl_s1 DoubleRegs:$src1, DoubleRegs:$src2), (M2_mmpyl_s1 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3), - (M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3), + (M2_naccii IntRegs:$src1, IntRegs:$src2, s32_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vrndpackwhs DoubleRegs:$src1), (S2_vrndpackwhs DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vtrunewh DoubleRegs:$src1, DoubleRegs:$src2), @@ -1357,24 +1357,24 @@ def: Pat<(int_hexagon_M2_mpyd_ll_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpyd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M4_mac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M4_mac_up_s1_sat IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred:$src4), - (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred:$src4)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred_timm:$src4), + (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, u2_0ImmPred_timm:$src4)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_uw2df IntRegs:$src1), (F2_conv_uw2df IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vaddubs DoubleRegs:$src1, DoubleRegs:$src2), (A2_vaddubs DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_asr_r_r_acc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A2_orir IntRegs:$src1, s32_0ImmPred:$src2), - (A2_orir IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A2_orir IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A2_orir IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_andp DoubleRegs:$src1, DoubleRegs:$src2), (A2_andp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lfsp DoubleRegs:$src1, DoubleRegs:$src2), (S2_lfsp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_min IntRegs:$src1, IntRegs:$src2), (A2_min IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_M2_mpysmi IntRegs:$src1, m32_0ImmPred:$src2), - (M2_mpysmi IntRegs:$src1, m32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_M2_mpysmi IntRegs:$src1, m32_0ImmPred_timm:$src2), + (M2_mpysmi IntRegs:$src1, m32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vcmpy_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2), (M2_vcmpy_s0_sat_r DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyu_acc_ll_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -1397,10 +1397,10 @@ def: Pat<(int_hexagon_M2_mpyd_lh_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpyd_lh_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_df2w DoubleRegs:$src1), (F2_conv_df2w DoubleRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred:$src2), - (S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred_timm:$src2), + (S5_asrhub_sat DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_df2d DoubleRegs:$src1), (F2_conv_df2d DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mmaculs_s1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), @@ -1423,8 +1423,8 @@ def: Pat<(int_hexagon_A2_vavghr DoubleRegs:$src1, DoubleRegs:$src2), (A2_vavghr DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sffma_sc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, PredRegs:$src4), (F2_sffma_sc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3, PredRegs:$src4)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_F2_dfclass DoubleRegs:$src1, u5_0ImmPred:$src2), - (F2_dfclass DoubleRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_F2_dfclass DoubleRegs:$src1, u5_0ImmPred_timm:$src2), + (F2_dfclass DoubleRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_df2ud DoubleRegs:$src1), (F2_conv_df2ud DoubleRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_conv_df2uw DoubleRegs:$src1), @@ -1433,16 +1433,16 @@ def: Pat<(int_hexagon_M2_cmpyrs_s0 IntRegs:$src1, IntRegs:$src2), (M2_cmpyrs_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_cmpyrs_s1 IntRegs:$src1, IntRegs:$src2), (M2_cmpyrs_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C4_cmpltei IntRegs:$src1, s32_0ImmPred:$src2), - (C4_cmpltei IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C4_cmpltei IntRegs:$src1, s32_0ImmPred_timm:$src2), + (C4_cmpltei IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C4_cmplteu IntRegs:$src1, IntRegs:$src2), (C4_cmplteu IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vsubb_map DoubleRegs:$src1, DoubleRegs:$src2), (A2_vsubub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_subh_l16_ll IntRegs:$src1, IntRegs:$src2), (A2_subh_l16_ll IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2), - (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vrmpy_s0 DoubleRegs:$src1, DoubleRegs:$src2), (M2_vrmpy_s0 DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_rnd_hh_s1 IntRegs:$src1, IntRegs:$src2), @@ -1471,14 +1471,14 @@ def: Pat<(int_hexagon_M2_mpyud_hl_s0 IntRegs:$src1, IntRegs:$src2), (M2_mpyud_hl_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vrcmpyi_s0c DoubleRegs:$src1, DoubleRegs:$src2), (M2_vrcmpyi_s0c DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred:$src2), - (S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred_timm:$src2), + (S2_asr_i_p_rnd DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_addpsat DoubleRegs:$src1, DoubleRegs:$src2), (A2_addpsat DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_svaddhs IntRegs:$src1, IntRegs:$src2), (A2_svaddhs IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_ori_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_ori_lsr_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_ori_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_ori_lsr_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpy_sat_rnd_ll_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_sat_rnd_ll_s0 IntRegs:$src1, IntRegs:$src2), @@ -1499,8 +1499,8 @@ def: Pat<(int_hexagon_M2_mpyud_lh_s1 IntRegs:$src1, IntRegs:$src2), (M2_mpyud_lh_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_asl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (S2_asl_r_r_or IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_lsli s6_0ImmPred:$src1, IntRegs:$src2), - (S4_lsli s6_0ImmPred:$src1, IntRegs:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_lsli s6_0ImmPred_timm:$src1, IntRegs:$src2), + (S4_lsli s6_0ImmPred_timm:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsl_r_vw DoubleRegs:$src1, IntRegs:$src2), (S2_lsl_r_vw DoubleRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_hh_s1 IntRegs:$src1, IntRegs:$src2), @@ -1529,8 +1529,8 @@ def: Pat<(int_hexagon_A4_cmpbeq IntRegs:$src1, IntRegs:$src2), (A4_cmpbeq IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_negp DoubleRegs:$src1), (A2_negp DoubleRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred:$src2), - (S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_asl_i_r_sat IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_addh_l16_sat_hl IntRegs:$src1, IntRegs:$src2), (A2_addh_l16_sat_hl IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vsatwuh DoubleRegs:$src1), @@ -1541,10 +1541,10 @@ def: Pat<(int_hexagon_S2_svsathb IntRegs:$src1), (S2_svsathb IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2), (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cround_ri IntRegs:$src1, u5_0ImmPred:$src2), - (A4_cround_ri IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred:$src2), - (S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cround_ri IntRegs:$src1, u5_0ImmPred_timm:$src2), + (A4_cround_ri IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred_timm:$src2), + (S4_clbpaddi DoubleRegs:$src1, s6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_cround_rr IntRegs:$src1, IntRegs:$src2), (A4_cround_rr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C2_mux PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -1563,12 +1563,12 @@ def: Pat<(int_hexagon_A2_vminuh DoubleRegs:$src1, DoubleRegs:$src2), (A2_vminuh DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_vminub DoubleRegs:$src1, DoubleRegs:$src2), (A2_vminub DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_extractu IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3), - (S2_extractu IntRegs:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_extractu IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3), + (S2_extractu IntRegs:$src1, u5_0ImmPred_timm:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A2_svsubh IntRegs:$src1, IntRegs:$src2), (A2_svsubh IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_clbaddi IntRegs:$src1, s6_0ImmPred:$src2), - (S4_clbaddi IntRegs:$src1, s6_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_clbaddi IntRegs:$src1, s6_0ImmPred_timm:$src2), + (S4_clbaddi IntRegs:$src1, s6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_F2_sffms IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (F2_sffms IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_vsxtbh IntRegs:$src1), @@ -1589,16 +1589,16 @@ def: Pat<(int_hexagon_M2_mpy_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$sr (M2_mpy_acc_hh_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpy_acc_hh_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S4_addi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S4_addi_asl_ri u32_0ImmPred:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S4_addi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S4_addi_asl_ri u32_0ImmPred_timm:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyd_nac_hh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyd_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyd_nac_hh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_cmpheqi IntRegs:$src1, s32_0ImmPred:$src2), - (A4_cmpheqi IntRegs:$src1, s32_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_cmpheqi IntRegs:$src1, s32_0ImmPred_timm:$src2), + (A4_cmpheqi IntRegs:$src1, s32_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), (S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_acc_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -1623,8 +1623,8 @@ def: Pat<(int_hexagon_M2_mpyud_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntReg (M2_mpyud_nac_lh_s1 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpyud_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpyud_nac_lh_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_round_ri_sat IntRegs:$src1, u5_0ImmPred:$src2), - (A4_round_ri_sat IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_round_ri_sat IntRegs:$src1, u5_0ImmPred_timm:$src2), + (A4_round_ri_sat IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mpy_nac_hl_s0 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_mpy_nac_hl_s1 IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), @@ -1637,10 +1637,10 @@ def: Pat<(int_hexagon_M2_mmacls_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRe (M2_mmacls_rs1 DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_cmaci_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_cmaci_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_setbit_i IntRegs:$src1, u5_0ImmPred:$src2), - (S2_setbit_i IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_setbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S2_setbit_i IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_andn IntRegs:$src1, IntRegs:$src2), (A4_andn IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M5_vrmpybsu DoubleRegs:$src1, DoubleRegs:$src2), @@ -1655,8 +1655,8 @@ def: Pat<(int_hexagon_C2_bitsclr IntRegs:$src1, IntRegs:$src2), (C2_bitsclr IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_xor_xacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_xor_xacc IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred:$src2), - (A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2), + (A4_vcmpbgtui DoubleRegs:$src1, u7_0ImmPred_timm:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_A4_ornp DoubleRegs:$src1, DoubleRegs:$src2), (A4_ornp DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_C4_and_or PredRegs:$src1, PredRegs:$src2, PredRegs:$src3), @@ -1673,14 +1673,14 @@ def: Pat<(int_hexagon_M2_vmpy2su_s1 IntRegs:$src1, IntRegs:$src2), (M2_vmpy2su_s1 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; def: Pat<(int_hexagon_M2_vmpy2su_s0 IntRegs:$src1, IntRegs:$src2), (M2_vmpy2su_s0 IntRegs:$src1, IntRegs:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_C4_nbitsclri IntRegs:$src1, u6_0ImmPred:$src2), - (C4_nbitsclri IntRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2), - (S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred:$src2)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_C4_nbitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2), + (C4_nbitsclri IntRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2), + (S2_lsr_i_vh DoubleRegs:$src1, u4_0ImmPred_timm:$src2)>, Requires<[HasV5]>; +def: Pat<(int_hexagon_S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV5]>; // V55 Scalar Instructions. @@ -1689,30 +1689,30 @@ def: Pat<(int_hexagon_A5_ACS DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src // V60 Scalar Instructions. -def: Pat<(int_hexagon_S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred:$src2), - (S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred:$src2)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_r IntRegs:$src1, u5_0ImmPred:$src2), - (S6_rol_i_r IntRegs:$src1, u5_0ImmPred:$src2)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3), - (S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred:$src3)>, Requires<[HasV60]>; -def: Pat<(int_hexagon_S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3), - (S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S6_rol_i_p_and DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S6_rol_i_r_xacc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S6_rol_i_r_and IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S6_rol_i_r_acc IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S6_rol_i_p_xacc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2), + (S6_rol_i_p DoubleRegs:$src1, u6_0ImmPred_timm:$src2)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S6_rol_i_p_nac DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S6_rol_i_p_acc DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S6_rol_i_r_or IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2), + (S6_rol_i_r IntRegs:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3), + (S6_rol_i_r_nac IntRegs:$src1, IntRegs:$src2, u5_0ImmPred_timm:$src3)>, Requires<[HasV60]>; +def: Pat<(int_hexagon_S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3), + (S6_rol_i_p_or DoubleRegs:$src1, DoubleRegs:$src2, u6_0ImmPred_timm:$src3)>, Requires<[HasV60]>; // V62 Scalar Instructions. @@ -1744,8 +1744,8 @@ def: Pat<(int_hexagon_F2_dfadd DoubleRegs:$src1, DoubleRegs:$src2), (F2_dfadd DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV66]>; def: Pat<(int_hexagon_M2_mnaci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), (M2_mnaci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>, Requires<[HasV66]>; -def: Pat<(int_hexagon_S2_mask u5_0ImmPred:$src1, u5_0ImmPred:$src2), - (S2_mask u5_0ImmPred:$src1, u5_0ImmPred:$src2)>, Requires<[HasV66]>; +def: Pat<(int_hexagon_S2_mask u5_0ImmPred_timm:$src1, u5_0ImmPred_timm:$src2), + (S2_mask u5_0ImmPred_timm:$src1, u5_0ImmPred_timm:$src2)>, Requires<[HasV66]>; // V60 HVX Instructions. @@ -1773,10 +1773,10 @@ def: Pat<(int_hexagon_V6_vaddh_dv HvxWR:$src1, HvxWR:$src2), (V6_vaddh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vaddh_dv_128B HvxWR:$src1, HvxWR:$src2), (V6_vaddh_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3), - (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vrmpybusi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3), - (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3), + (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vrmpybusi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3), + (V6_vrmpybusi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vshufoh HvxVR:$src1, HvxVR:$src2), (V6_vshufoh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vshufoh_128B HvxVR:$src1, HvxVR:$src2), @@ -1789,10 +1789,10 @@ def: Pat<(int_hexagon_V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2), (V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vdmpyhsuisat_128B HvxWR:$src1, IntRegs:$src2), (V6_vdmpyhsuisat HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4), - (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vrsadubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4), - (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4), + (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vrsadubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4), + (V6_vrsadubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vnavgw HvxVR:$src1, HvxVR:$src2), (V6_vnavgw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vnavgw_128B HvxVR:$src1, HvxVR:$src2), @@ -2369,10 +2369,10 @@ def: Pat<(int_hexagon_V6_vsubhsat HvxVR:$src1, HvxVR:$src2), (V6_vsubhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsubhsat_128B HvxVR:$src1, HvxVR:$src2), (V6_vsubhsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4), - (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vrmpyubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4), - (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4), + (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vrmpyubi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4), + (V6_vrmpyubi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vabsw HvxVR:$src1), (V6_vabsw HvxVR:$src1)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vabsw_128B HvxVR:$src1), @@ -2489,10 +2489,10 @@ def: Pat<(int_hexagon_V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3), (V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vmpybv_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3), (V6_vmpybv_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3), - (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vrsadubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3), - (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3), + (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vrsadubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3), + (V6_vrsadubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3), (V6_vdmpyhb_dv_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vdmpyhb_dv_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3), @@ -2677,10 +2677,10 @@ def: Pat<(int_hexagon_V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), (V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vaddbnq_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), (V6_vaddbnq HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vlalignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vlalignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_vlalignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vsatwh HvxVR:$src1, HvxVR:$src2), (V6_vsatwh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsatwh_128B HvxVR:$src1, HvxVR:$src2), @@ -2721,10 +2721,10 @@ def: Pat<(int_hexagon_V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), (V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_veqh_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), (V6_veqh_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_valignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_valignbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_valignbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vaddwsat HvxVR:$src1, HvxVR:$src2), (V6_vaddwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vaddwsat_128B HvxVR:$src1, HvxVR:$src2), @@ -2885,10 +2885,10 @@ def: Pat<(int_hexagon_V6_vsubh HvxVR:$src1, HvxVR:$src2), (V6_vsubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsubh_128B HvxVR:$src1, HvxVR:$src2), (V6_vsubh HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3), - (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vrmpyubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3), - (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred:$src3)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3), + (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vrmpyubi_128B HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3), + (V6_vrmpyubi HvxWR:$src1, IntRegs:$src2, u1_0ImmPred_timm:$src3)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vminw HvxVR:$src1, HvxVR:$src2), (V6_vminw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vminw_128B HvxVR:$src1, HvxVR:$src2), @@ -2929,10 +2929,10 @@ def: Pat<(int_hexagon_V6_vsubuhw HvxVR:$src1, HvxVR:$src2), (V6_vsubuhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsubuhw_128B HvxVR:$src1, HvxVR:$src2), (V6_vsubuhw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV60, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4), - (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vrmpybusi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4), - (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred:$src4)>, Requires<[HasV60, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4), + (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vrmpybusi_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4), + (V6_vrmpybusi_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3, u1_0ImmPred_timm:$src4)>, Requires<[HasV60, UseHVX128B]>; def: Pat<(int_hexagon_V6_vasrw HvxVR:$src1, IntRegs:$src2), (V6_vasrw HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV60, UseHVX64B]>; def: Pat<(int_hexagon_V6_vasrw_128B HvxVR:$src1, IntRegs:$src2), @@ -3016,10 +3016,10 @@ def: Pat<(int_hexagon_V6_vlsrb HvxVR:$src1, IntRegs:$src2), (V6_vlsrb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_vlsrb_128B HvxVR:$src1, IntRegs:$src2), (V6_vlsrb HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vlutvwhi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vlutvwhi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_vlutvwhi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX128B]>; def: Pat<(int_hexagon_V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2), (V6_vaddububb_sat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_vaddububb_sat_128B HvxVR:$src1, HvxVR:$src2), @@ -3032,10 +3032,10 @@ def: Pat<(int_hexagon_V6_ldtp0 PredRegs:$src1, IntRegs:$src2), (V6_ldtp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_ldtp0_128B PredRegs:$src1, IntRegs:$src2), (V6_ldtp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4), - (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vlutvvb_oracci_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4), - (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4), + (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vlutvvb_oracci_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4), + (V6_vlutvvb_oracci HvxVR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX128B]>; def: Pat<(int_hexagon_V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2), (V6_vsubuwsat_dv HvxWR:$src1, HvxWR:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsubuwsat_dv_128B HvxWR:$src1, HvxWR:$src2), @@ -3124,10 +3124,10 @@ def: Pat<(int_hexagon_V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$sr (V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_vasrwuhrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasrwuhrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV62, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vlutvvbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3), - (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred:$src3)>, Requires<[HasV62, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vlutvvbi_128B HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3), + (V6_vlutvvbi HvxVR:$src1, HvxVR:$src2, u3_0ImmPred_timm:$src3)>, Requires<[HasV62, UseHVX128B]>; def: Pat<(int_hexagon_V6_vsubuwsat HvxVR:$src1, HvxVR:$src2), (V6_vsubuwsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsubuwsat_128B HvxVR:$src1, HvxVR:$src2), @@ -3188,10 +3188,10 @@ def: Pat<(int_hexagon_V6_ldcnp0 PredRegs:$src1, IntRegs:$src2), (V6_ldcnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_ldcnp0_128B PredRegs:$src1, IntRegs:$src2), (V6_ldcnp0 PredRegs:$src1, IntRegs:$src2)>, Requires<[HasV62, UseHVX128B]>; -def: Pat<(int_hexagon_V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4), - (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX64B]>; -def: Pat<(int_hexagon_V6_vlutvwh_oracci_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4), - (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred:$src4)>, Requires<[HasV62, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4), + (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vlutvwh_oracci_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4), + (V6_vlutvwh_oracci HvxWR:$src1, HvxVR:$src2, HvxVR:$src3, u3_0ImmPred_timm:$src4)>, Requires<[HasV62, UseHVX128B]>; def: Pat<(int_hexagon_V6_vsubbsat HvxVR:$src1, HvxVR:$src2), (V6_vsubbsat HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV62, UseHVX64B]>; def: Pat<(int_hexagon_V6_vsubbsat_128B HvxVR:$src1, HvxVR:$src2), diff --git a/lib/Target/Hexagon/HexagonDepOperands.td b/lib/Target/Hexagon/HexagonDepOperands.td index fdba7b97125..8a94d96522c 100644 --- a/lib/Target/Hexagon/HexagonDepOperands.td +++ b/lib/Target/Hexagon/HexagonDepOperands.td @@ -8,120 +8,125 @@ // Automatically generated file, please consult code owner before editing. //===----------------------------------------------------------------------===// +multiclass ImmOpPred { + def "" : PatLeaf<(vt imm), pred>; + def _timm : PatLeaf<(vt timm), pred>; +} + def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s4_0Imm : Operand { let ParserMatchClass = s4_0ImmOperand; let DecoderMethod = "s4_0ImmDecoder"; } -def s4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 0>(N->getSExtValue());}]>; +defm s4_0ImmPred : ImmOpPred<[{ return isShiftedInt<4, 0>(N->getSExtValue());}]>; def s29_3ImmOperand : AsmOperandClass { let Name = "s29_3Imm"; let RenderMethod = "addSignedImmOperands"; } def s29_3Imm : Operand { let ParserMatchClass = s29_3ImmOperand; let DecoderMethod = "s29_3ImmDecoder"; } -def s29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 3>(N->getSExtValue());}]>; +defm s29_3ImmPred : ImmOpPred<[{ return isShiftedInt<32, 3>(N->getSExtValue());}]>; def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; let RenderMethod = "addImmOperands"; } def u6_0Imm : Operand { let ParserMatchClass = u6_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>; +defm u6_0ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>; def a30_2ImmOperand : AsmOperandClass { let Name = "a30_2Imm"; let RenderMethod = "addSignedImmOperands"; } def a30_2Imm : Operand { let ParserMatchClass = a30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; } -def a30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; +defm a30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; def u29_3ImmOperand : AsmOperandClass { let Name = "u29_3Imm"; let RenderMethod = "addImmOperands"; } def u29_3Imm : Operand { let ParserMatchClass = u29_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 3>(N->getSExtValue());}]>; +defm u29_3ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 3>(N->getSExtValue());}]>; def s8_0ImmOperand : AsmOperandClass { let Name = "s8_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s8_0Imm : Operand { let ParserMatchClass = s8_0ImmOperand; let DecoderMethod = "s8_0ImmDecoder"; } -def s8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<8, 0>(N->getSExtValue());}]>; +defm s8_0ImmPred : ImmOpPred<[{ return isShiftedInt<8, 0>(N->getSExtValue());}]>; def u32_0ImmOperand : AsmOperandClass { let Name = "u32_0Imm"; let RenderMethod = "addImmOperands"; } def u32_0Imm : Operand { let ParserMatchClass = u32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 0>(N->getSExtValue());}]>; +defm u32_0ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 0>(N->getSExtValue());}]>; def u4_2ImmOperand : AsmOperandClass { let Name = "u4_2Imm"; let RenderMethod = "addImmOperands"; } def u4_2Imm : Operand { let ParserMatchClass = u4_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u4_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<4, 2>(N->getSExtValue());}]>; +defm u4_2ImmPred : ImmOpPred<[{ return isShiftedUInt<4, 2>(N->getSExtValue());}]>; def u3_0ImmOperand : AsmOperandClass { let Name = "u3_0Imm"; let RenderMethod = "addImmOperands"; } def u3_0Imm : Operand { let ParserMatchClass = u3_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<3, 0>(N->getSExtValue());}]>; +defm u3_0ImmPred : ImmOpPred<[{ return isShiftedUInt<3, 0>(N->getSExtValue());}]>; def b15_2ImmOperand : AsmOperandClass { let Name = "b15_2Imm"; let RenderMethod = "addSignedImmOperands"; } def b15_2Imm : Operand { let ParserMatchClass = b15_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; } -def b15_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<15, 2>(N->getSExtValue());}]>; +defm b15_2ImmPred : ImmOpPred<[{ return isShiftedInt<15, 2>(N->getSExtValue());}]>; def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; let RenderMethod = "addImmOperands"; } def u11_3Imm : Operand { let ParserMatchClass = u11_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u11_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<11, 3>(N->getSExtValue());}]>; +defm u11_3ImmPred : ImmOpPred<[{ return isShiftedUInt<11, 3>(N->getSExtValue());}]>; def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; let RenderMethod = "addSignedImmOperands"; } def s4_3Imm : Operand { let ParserMatchClass = s4_3ImmOperand; let DecoderMethod = "s4_3ImmDecoder"; } -def s4_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 3>(N->getSExtValue());}]>; +defm s4_3ImmPred : ImmOpPred<[{ return isShiftedInt<4, 3>(N->getSExtValue());}]>; def m32_0ImmOperand : AsmOperandClass { let Name = "m32_0Imm"; let RenderMethod = "addImmOperands"; } def m32_0Imm : Operand { let ParserMatchClass = m32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def m32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 0>(N->getSExtValue());}]>; +defm m32_0ImmPred : ImmOpPred<[{ return isShiftedInt<32, 0>(N->getSExtValue());}]>; def u3_1ImmOperand : AsmOperandClass { let Name = "u3_1Imm"; let RenderMethod = "addImmOperands"; } def u3_1Imm : Operand { let ParserMatchClass = u3_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u3_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<3, 1>(N->getSExtValue());}]>; +defm u3_1ImmPred : ImmOpPred<[{ return isShiftedUInt<3, 1>(N->getSExtValue());}]>; def u1_0ImmOperand : AsmOperandClass { let Name = "u1_0Imm"; let RenderMethod = "addImmOperands"; } def u1_0Imm : Operand { let ParserMatchClass = u1_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u1_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<1, 0>(N->getSExtValue());}]>; +defm u1_0ImmPred : ImmOpPred<[{ return isShiftedUInt<1, 0>(N->getSExtValue());}]>; def s31_1ImmOperand : AsmOperandClass { let Name = "s31_1Imm"; let RenderMethod = "addSignedImmOperands"; } def s31_1Imm : Operand { let ParserMatchClass = s31_1ImmOperand; let DecoderMethod = "s31_1ImmDecoder"; } -def s31_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 1>(N->getSExtValue());}]>; +defm s31_1ImmPred : ImmOpPred<[{ return isShiftedInt<32, 1>(N->getSExtValue());}]>; def s3_0ImmOperand : AsmOperandClass { let Name = "s3_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s3_0Imm : Operand { let ParserMatchClass = s3_0ImmOperand; let DecoderMethod = "s3_0ImmDecoder"; } -def s3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<3, 0>(N->getSExtValue());}]>; +defm s3_0ImmPred : ImmOpPred<[{ return isShiftedInt<3, 0>(N->getSExtValue());}]>; def s30_2ImmOperand : AsmOperandClass { let Name = "s30_2Imm"; let RenderMethod = "addSignedImmOperands"; } def s30_2Imm : Operand { let ParserMatchClass = s30_2ImmOperand; let DecoderMethod = "s30_2ImmDecoder"; } -def s30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; +defm s30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; def u4_0ImmOperand : AsmOperandClass { let Name = "u4_0Imm"; let RenderMethod = "addImmOperands"; } def u4_0Imm : Operand { let ParserMatchClass = u4_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<4, 0>(N->getSExtValue());}]>; +defm u4_0ImmPred : ImmOpPred<[{ return isShiftedUInt<4, 0>(N->getSExtValue());}]>; def s6_0ImmOperand : AsmOperandClass { let Name = "s6_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s6_0Imm : Operand { let ParserMatchClass = s6_0ImmOperand; let DecoderMethod = "s6_0ImmDecoder"; } -def s6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<6, 0>(N->getSExtValue());}]>; +defm s6_0ImmPred : ImmOpPred<[{ return isShiftedInt<6, 0>(N->getSExtValue());}]>; def u5_3ImmOperand : AsmOperandClass { let Name = "u5_3Imm"; let RenderMethod = "addImmOperands"; } def u5_3Imm : Operand { let ParserMatchClass = u5_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u5_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 3>(N->getSExtValue());}]>; +defm u5_3ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 3>(N->getSExtValue());}]>; def s32_0ImmOperand : AsmOperandClass { let Name = "s32_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s32_0Imm : Operand { let ParserMatchClass = s32_0ImmOperand; let DecoderMethod = "s32_0ImmDecoder"; } -def s32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 0>(N->getSExtValue());}]>; +defm s32_0ImmPred : ImmOpPred<[{ return isShiftedInt<32, 0>(N->getSExtValue());}]>; def s6_3ImmOperand : AsmOperandClass { let Name = "s6_3Imm"; let RenderMethod = "addSignedImmOperands"; } def s6_3Imm : Operand { let ParserMatchClass = s6_3ImmOperand; let DecoderMethod = "s6_3ImmDecoder"; } -def s6_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<6, 3>(N->getSExtValue());}]>; +defm s6_3ImmPred : ImmOpPred<[{ return isShiftedInt<6, 3>(N->getSExtValue());}]>; def u10_0ImmOperand : AsmOperandClass { let Name = "u10_0Imm"; let RenderMethod = "addImmOperands"; } def u10_0Imm : Operand { let ParserMatchClass = u10_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u10_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<10, 0>(N->getSExtValue());}]>; +defm u10_0ImmPred : ImmOpPred<[{ return isShiftedUInt<10, 0>(N->getSExtValue());}]>; def u31_1ImmOperand : AsmOperandClass { let Name = "u31_1Imm"; let RenderMethod = "addImmOperands"; } def u31_1Imm : Operand { let ParserMatchClass = u31_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u31_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 1>(N->getSExtValue());}]>; +defm u31_1ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 1>(N->getSExtValue());}]>; def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; let RenderMethod = "addSignedImmOperands"; } def s4_1Imm : Operand { let ParserMatchClass = s4_1ImmOperand; let DecoderMethod = "s4_1ImmDecoder"; } -def s4_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 1>(N->getSExtValue());}]>; +defm s4_1ImmPred : ImmOpPred<[{ return isShiftedInt<4, 1>(N->getSExtValue());}]>; def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; let RenderMethod = "addImmOperands"; } def u16_0Imm : Operand { let ParserMatchClass = u16_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u16_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<16, 0>(N->getSExtValue());}]>; +defm u16_0ImmPred : ImmOpPred<[{ return isShiftedUInt<16, 0>(N->getSExtValue());}]>; def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; let RenderMethod = "addImmOperands"; } def u6_1Imm : Operand { let ParserMatchClass = u6_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u6_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 1>(N->getSExtValue());}]>; +defm u6_1ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 1>(N->getSExtValue());}]>; def u5_2ImmOperand : AsmOperandClass { let Name = "u5_2Imm"; let RenderMethod = "addImmOperands"; } def u5_2Imm : Operand { let ParserMatchClass = u5_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u5_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 2>(N->getSExtValue());}]>; +defm u5_2ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 2>(N->getSExtValue());}]>; def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; let RenderMethod = "addImmOperands"; } def u26_6Imm : Operand { let ParserMatchClass = u26_6ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u26_6ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<26, 6>(N->getSExtValue());}]>; +defm u26_6ImmPred : ImmOpPred<[{ return isShiftedUInt<26, 6>(N->getSExtValue());}]>; def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; let RenderMethod = "addImmOperands"; } def u6_2Imm : Operand { let ParserMatchClass = u6_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u6_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 2>(N->getSExtValue());}]>; +defm u6_2ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 2>(N->getSExtValue());}]>; def u7_0ImmOperand : AsmOperandClass { let Name = "u7_0Imm"; let RenderMethod = "addImmOperands"; } def u7_0Imm : Operand { let ParserMatchClass = u7_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u7_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<7, 0>(N->getSExtValue());}]>; +defm u7_0ImmPred : ImmOpPred<[{ return isShiftedUInt<7, 0>(N->getSExtValue());}]>; def b13_2ImmOperand : AsmOperandClass { let Name = "b13_2Imm"; let RenderMethod = "addSignedImmOperands"; } def b13_2Imm : Operand { let ParserMatchClass = b13_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; } -def b13_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<13, 2>(N->getSExtValue());}]>; +defm b13_2ImmPred : ImmOpPred<[{ return isShiftedInt<13, 2>(N->getSExtValue());}]>; def u5_0ImmOperand : AsmOperandClass { let Name = "u5_0Imm"; let RenderMethod = "addImmOperands"; } def u5_0Imm : Operand { let ParserMatchClass = u5_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u5_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 0>(N->getSExtValue());}]>; +defm u5_0ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 0>(N->getSExtValue());}]>; def u2_0ImmOperand : AsmOperandClass { let Name = "u2_0Imm"; let RenderMethod = "addImmOperands"; } def u2_0Imm : Operand { let ParserMatchClass = u2_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u2_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<2, 0>(N->getSExtValue());}]>; +defm u2_0ImmPred : ImmOpPred<[{ return isShiftedUInt<2, 0>(N->getSExtValue());}]>; def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; let RenderMethod = "addSignedImmOperands"; } def s4_2Imm : Operand { let ParserMatchClass = s4_2ImmOperand; let DecoderMethod = "s4_2ImmDecoder"; } -def s4_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 2>(N->getSExtValue());}]>; +defm s4_2ImmPred : ImmOpPred<[{ return isShiftedInt<4, 2>(N->getSExtValue());}]>; def b30_2ImmOperand : AsmOperandClass { let Name = "b30_2Imm"; let RenderMethod = "addSignedImmOperands"; } def b30_2Imm : Operand { let ParserMatchClass = b30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; } -def b30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; +defm b30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; def u8_0ImmOperand : AsmOperandClass { let Name = "u8_0Imm"; let RenderMethod = "addImmOperands"; } def u8_0Imm : Operand { let ParserMatchClass = u8_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<8, 0>(N->getSExtValue());}]>; +defm u8_0ImmPred : ImmOpPred<[{ return isShiftedUInt<8, 0>(N->getSExtValue());}]>; def u30_2ImmOperand : AsmOperandClass { let Name = "u30_2Imm"; let RenderMethod = "addImmOperands"; } def u30_2Imm : Operand { let ParserMatchClass = u30_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; } -def u30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>; +defm u30_2ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>; diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp index c1f32e54e98..0844fb8a862 100644 --- a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp +++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -250,7 +250,7 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B, unsigned Opc = T1I->getOpcode(); if (Opc != Hexagon::J2_jumpt && Opc != Hexagon::J2_jumpf) return false; - unsigned PredR = T1I->getOperand(0).getReg(); + Register PredR = T1I->getOperand(0).getReg(); // Get the layout successor, or 0 if B does not have one. MachineFunction::iterator NextBI = std::next(MachineFunction::iterator(B)); @@ -384,8 +384,8 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B) for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned R = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = MO.getReg(); + if (!Register::isVirtualRegister(R)) continue; if (!isPredicate(R)) continue; @@ -401,8 +401,8 @@ bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const { for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isUse()) continue; - unsigned R = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = MO.getReg(); + if (!Register::isVirtualRegister(R)) continue; const MachineInstr *DefI = MRI->getVRegDef(R); // "Undefined" virtual registers are actually defined via IMPLICIT_DEF. @@ -437,7 +437,7 @@ bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const { break; if (usesUndefVReg(&MI)) return false; - unsigned DefR = MI.getOperand(0).getReg(); + Register DefR = MI.getOperand(0).getReg(); if (isPredicate(DefR)) return false; } @@ -491,8 +491,8 @@ unsigned HexagonEarlyIfConversion::countPredicateDefs( for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned R = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = MO.getReg(); + if (!Register::isVirtualRegister(R)) continue; if (isPredicate(R)) PredDefs++; @@ -798,7 +798,7 @@ unsigned HexagonEarlyIfConversion::buildMux(MachineBasicBlock *B, const MCInstrDesc &D = HII->get(Opc); DebugLoc DL = B->findBranchDebugLoc(); - unsigned MuxR = MRI->createVirtualRegister(DRC); + Register MuxR = MRI->createVirtualRegister(DRC); BuildMI(*B, At, DL, D, MuxR) .addReg(PredR) .addReg(TR, 0, TSR) @@ -837,7 +837,7 @@ void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB, unsigned MuxR = 0, MuxSR = 0; if (TR && FR) { - unsigned DR = PN->getOperand(0).getReg(); + Register DR = PN->getOperand(0).getReg(); const TargetRegisterClass *RC = MRI->getRegClass(DR); MuxR = buildMux(FP.SplitB, FP.SplitB->getFirstTerminator(), RC, FP.PredR, TR, TSR, FR, FSR); @@ -988,8 +988,8 @@ void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) { MachineInstr *PN = &*I; assert(PN->getNumOperands() == 3 && "Invalid phi node"); MachineOperand &UO = PN->getOperand(1); - unsigned UseR = UO.getReg(), UseSR = UO.getSubReg(); - unsigned DefR = PN->getOperand(0).getReg(); + Register UseR = UO.getReg(), UseSR = UO.getSubReg(); + Register DefR = PN->getOperand(0).getReg(); unsigned NewR = UseR; if (UseSR) { // MRI.replaceVregUsesWith does not allow to update the subregister, diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp index c343e426ac7..8984ee82960 100644 --- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp +++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -285,7 +285,7 @@ bool HexagonExpandCondsets::isCondset(const MachineInstr &MI) { } LaneBitmask HexagonExpandCondsets::getLaneMask(unsigned Reg, unsigned Sub) { - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); return Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub) : MRI->getMaxLaneMaskForVReg(Reg); } @@ -364,7 +364,7 @@ void HexagonExpandCondsets::updateKillFlags(unsigned Reg) { void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, LiveRange &Range) { - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); if (Range.empty()) return; @@ -372,8 +372,8 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> std::pair { if (!Op.isReg() || !Op.isDef()) return { false, false }; - unsigned DR = Op.getReg(), DSR = Op.getSubReg(); - if (!TargetRegisterInfo::isVirtualRegister(DR) || DR != Reg) + Register DR = Op.getReg(), DSR = Op.getSubReg(); + if (!Register::isVirtualRegister(DR) || DR != Reg) return { false, false }; LaneBitmask SLM = getLaneMask(DR, DSR); LaneBitmask A = SLM & LM; @@ -551,8 +551,8 @@ void HexagonExpandCondsets::updateLiveness(std::set &RegSet, bool Recalc, bool UpdateKills, bool UpdateDeads) { UpdateKills |= UpdateDeads; for (unsigned R : RegSet) { - if (!TargetRegisterInfo::isVirtualRegister(R)) { - assert(TargetRegisterInfo::isPhysicalRegister(R)); + if (!Register::isVirtualRegister(R)) { + assert(Register::isPhysicalRegister(R)); // There shouldn't be any physical registers as operands, except // possibly reserved registers. assert(MRI->isReserved(R)); @@ -579,17 +579,17 @@ unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, using namespace Hexagon; if (SO.isReg()) { - unsigned PhysR; + Register PhysR; RegisterRef RS = SO; - if (TargetRegisterInfo::isVirtualRegister(RS.Reg)) { + if (Register::isVirtualRegister(RS.Reg)) { const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg); assert(VC->begin() != VC->end() && "Empty register class"); PhysR = *VC->begin(); } else { - assert(TargetRegisterInfo::isPhysicalRegister(RS.Reg)); + assert(Register::isPhysicalRegister(RS.Reg)); PhysR = RS.Reg; } - unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub); + Register PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS); switch (TRI->getRegSizeInBits(*RC)) { case 32: @@ -671,7 +671,7 @@ bool HexagonExpandCondsets::split(MachineInstr &MI, MachineOperand &MD = MI.getOperand(0); // Definition MachineOperand &MP = MI.getOperand(1); // Predicate register assert(MD.isDef()); - unsigned DR = MD.getReg(), DSR = MD.getSubReg(); + Register DR = MD.getReg(), DSR = MD.getSubReg(); bool ReadUndef = MD.isUndef(); MachineBasicBlock::iterator At = MI; @@ -802,7 +802,7 @@ bool HexagonExpandCondsets::canMoveOver(MachineInstr &MI, ReferenceMap &Defs, // For physical register we would need to check register aliases, etc. // and we don't want to bother with that. It would be of little value // before the actual register rewriting (from virtual to physical). - if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + if (!Register::isVirtualRegister(RR.Reg)) return false; // No redefs for any operand. if (isRefInMap(RR, Defs, Exec_Then)) @@ -954,7 +954,7 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond, return false; RegisterRef RT(MS); - unsigned PredR = MP.getReg(); + Register PredR = MP.getReg(); MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond); if (!DefI || !isPredicable(DefI)) return false; @@ -999,7 +999,7 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond, // subregisters are other physical registers, and we are not checking // that. RegisterRef RR = Op; - if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + if (!Register::isVirtualRegister(RR.Reg)) return false; ReferenceMap &Map = Op.isDef() ? Defs : Uses; @@ -1091,7 +1091,7 @@ bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B, } bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) { - if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + if (!Register::isVirtualRegister(RR.Reg)) return false; const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg); if (RC == &Hexagon::IntRegsRegClass) { diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp index f7edc168de4..d21de8ccb5a 100644 --- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp +++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -114,12 +114,11 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { // First pass - compute the offset of each basic block. for (const MachineBasicBlock &MBB : MF) { - if (MBB.getAlignment()) { + if (MBB.getAlignment() != Align::None()) { // Although we don't know the exact layout of the final code, we need // to account for alignment padding somehow. This heuristic pads each // aligned basic block according to the alignment value. - int ByteAlign = (1u << MBB.getAlignment()) - 1; - InstOffset = (InstOffset + ByteAlign) & ~(ByteAlign); + InstOffset = alignTo(InstOffset, MBB.getAlignment()); } BlockToInstOffset[&MBB] = InstOffset; diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 3368ee4fb3b..bfa3372d7fa 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -303,10 +303,10 @@ static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR, if (MO.isFI()) return true; if (MO.isReg()) { - unsigned R = MO.getReg(); + Register R = MO.getReg(); // Virtual registers will need scavenging, which then may require // a stack slot. - if (TargetRegisterInfo::isVirtualRegister(R)) + if (Register::isVirtualRegister(R)) return true; for (MCSubRegIterator S(R, &HRI, true); S.isValid(); ++S) if (CSR[*S]) @@ -973,8 +973,8 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, // understand paired registers for cfi_offset. // Eg .cfi_offset r1:0, -64 - unsigned HiReg = HRI.getSubReg(Reg, Hexagon::isub_hi); - unsigned LoReg = HRI.getSubReg(Reg, Hexagon::isub_lo); + Register HiReg = HRI.getSubReg(Reg, Hexagon::isub_hi); + Register LoReg = HRI.getSubReg(Reg, Hexagon::isub_lo); unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true); unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true); auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg, @@ -1377,10 +1377,10 @@ void HexagonFrameLowering::processFunctionBeforeFrameFinalized( } MFI.setLocalFrameSize(LFS); - unsigned A = MFI.getLocalFrameMaxAlign(); + Align A = MFI.getLocalFrameMaxAlign(); assert(A <= 8 && "Unexpected local frame alignment"); - if (A == 0) - MFI.setLocalFrameMaxAlign(8); + if (A == 1) + MFI.setLocalFrameMaxAlign(Align(8)); MFI.setUseLocalStackAllocationBlock(true); // Set the physical aligned-stack base address register. @@ -1570,13 +1570,13 @@ bool HexagonFrameLowering::expandCopy(MachineBasicBlock &B, const HexagonInstrInfo &HII, SmallVectorImpl &NewRegs) const { MachineInstr *MI = &*It; DebugLoc DL = MI->getDebugLoc(); - unsigned DstR = MI->getOperand(0).getReg(); - unsigned SrcR = MI->getOperand(1).getReg(); + Register DstR = MI->getOperand(0).getReg(); + Register SrcR = MI->getOperand(1).getReg(); if (!Hexagon::ModRegsRegClass.contains(DstR) || !Hexagon::ModRegsRegClass.contains(SrcR)) return false; - unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), TmpR).add(MI->getOperand(1)); BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), DstR) .addReg(TmpR, RegState::Kill); @@ -1595,13 +1595,13 @@ bool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B, DebugLoc DL = MI->getDebugLoc(); unsigned Opc = MI->getOpcode(); - unsigned SrcR = MI->getOperand(2).getReg(); + Register SrcR = MI->getOperand(2).getReg(); bool IsKill = MI->getOperand(2).isKill(); int FI = MI->getOperand(0).getIndex(); // TmpR = C2_tfrpr SrcR if SrcR is a predicate register // TmpR = A2_tfrcrr SrcR if SrcR is a modifier register - unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); unsigned TfrOpc = (Opc == Hexagon::STriw_pred) ? Hexagon::C2_tfrpr : Hexagon::A2_tfrcrr; BuildMI(B, It, DL, HII.get(TfrOpc), TmpR) @@ -1628,11 +1628,11 @@ bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B, DebugLoc DL = MI->getDebugLoc(); unsigned Opc = MI->getOpcode(); - unsigned DstR = MI->getOperand(0).getReg(); + Register DstR = MI->getOperand(0).getReg(); int FI = MI->getOperand(1).getIndex(); // TmpR = L2_loadri_io FI, 0 - unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); BuildMI(B, It, DL, HII.get(Hexagon::L2_loadri_io), TmpR) .addFrameIndex(FI) .addImm(0) @@ -1658,7 +1658,7 @@ bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B, return false; DebugLoc DL = MI->getDebugLoc(); - unsigned SrcR = MI->getOperand(2).getReg(); + Register SrcR = MI->getOperand(2).getReg(); bool IsKill = MI->getOperand(2).isKill(); int FI = MI->getOperand(0).getIndex(); auto *RC = &Hexagon::HvxVRRegClass; @@ -1667,8 +1667,8 @@ bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B, // TmpR0 = A2_tfrsi 0x01010101 // TmpR1 = V6_vandqrt Qx, TmpR0 // store FI, 0, TmpR1 - unsigned TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); - unsigned TmpR1 = MRI.createVirtualRegister(RC); + Register TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register TmpR1 = MRI.createVirtualRegister(RC); BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0) .addImm(0x01010101); @@ -1695,15 +1695,15 @@ bool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B, return false; DebugLoc DL = MI->getDebugLoc(); - unsigned DstR = MI->getOperand(0).getReg(); + Register DstR = MI->getOperand(0).getReg(); int FI = MI->getOperand(1).getIndex(); auto *RC = &Hexagon::HvxVRRegClass; // TmpR0 = A2_tfrsi 0x01010101 // TmpR1 = load FI, 0 // DstR = V6_vandvrt TmpR1, TmpR0 - unsigned TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); - unsigned TmpR1 = MRI.createVirtualRegister(RC); + Register TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register TmpR1 = MRI.createVirtualRegister(RC); BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0) .addImm(0x01010101); @@ -1745,9 +1745,9 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B, } DebugLoc DL = MI->getDebugLoc(); - unsigned SrcR = MI->getOperand(2).getReg(); - unsigned SrcLo = HRI.getSubReg(SrcR, Hexagon::vsub_lo); - unsigned SrcHi = HRI.getSubReg(SrcR, Hexagon::vsub_hi); + Register SrcR = MI->getOperand(2).getReg(); + Register SrcLo = HRI.getSubReg(SrcR, Hexagon::vsub_lo); + Register SrcHi = HRI.getSubReg(SrcR, Hexagon::vsub_hi); bool IsKill = MI->getOperand(2).isKill(); int FI = MI->getOperand(0).getIndex(); @@ -1793,9 +1793,9 @@ bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B, return false; DebugLoc DL = MI->getDebugLoc(); - unsigned DstR = MI->getOperand(0).getReg(); - unsigned DstHi = HRI.getSubReg(DstR, Hexagon::vsub_hi); - unsigned DstLo = HRI.getSubReg(DstR, Hexagon::vsub_lo); + Register DstR = MI->getOperand(0).getReg(); + Register DstHi = HRI.getSubReg(DstR, Hexagon::vsub_hi); + Register DstLo = HRI.getSubReg(DstR, Hexagon::vsub_lo); int FI = MI->getOperand(1).getIndex(); unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass); @@ -1834,7 +1834,7 @@ bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B, auto &HRI = *MF.getSubtarget().getRegisterInfo(); DebugLoc DL = MI->getDebugLoc(); - unsigned SrcR = MI->getOperand(2).getReg(); + Register SrcR = MI->getOperand(2).getReg(); bool IsKill = MI->getOperand(2).isKill(); int FI = MI->getOperand(0).getIndex(); @@ -1863,7 +1863,7 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B, auto &HRI = *MF.getSubtarget().getRegisterInfo(); DebugLoc DL = MI->getDebugLoc(); - unsigned DstR = MI->getOperand(0).getReg(); + Register DstR = MI->getOperand(0).getReg(); int FI = MI->getOperand(1).getIndex(); unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass); @@ -2299,7 +2299,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, int TFI; if (!HII.isLoadFromStackSlot(MI, TFI) || TFI != FI) continue; - unsigned DstR = MI.getOperand(0).getReg(); + Register DstR = MI.getOperand(0).getReg(); assert(MI.getOperand(0).getSubReg() == 0); MachineInstr *CopyOut = nullptr; if (DstR != FoundR) { diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h index 65e8c768664..27265dd5379 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/lib/Target/Hexagon/HexagonFrameLowering.h @@ -30,7 +30,7 @@ class TargetRegisterClass; class HexagonFrameLowering : public TargetFrameLowering { public: explicit HexagonFrameLowering() - : TargetFrameLowering(StackGrowsDown, 8, 0, 1, true) {} + : TargetFrameLowering(StackGrowsDown, Align(8), 0, Align::None(), true) {} // All of the prolog/epilog functionality, including saving and restoring // callee-saved registers is handled in emitPrologue. This is to have the diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp index 3417c74e359..caa0e4d8039 100644 --- a/lib/Target/Hexagon/HexagonGenExtract.cpp +++ b/lib/Target/Hexagon/HexagonGenExtract.cpp @@ -184,7 +184,7 @@ bool HexagonGenExtract::convert(Instruction *In) { // The width of the extracted field is the minimum of the original bits // that remain after the shifts and the number of contiguous 1s in the mask. uint32_t W = std::min(U, T); - if (W == 0) + if (W == 0 || W == 1) return false; // Check if the extracted bits are contained within the mask that it is diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp index 81025c1c532..48881e02f4d 100644 --- a/lib/Target/Hexagon/HexagonGenInsert.cpp +++ b/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -163,11 +163,11 @@ namespace { } static inline unsigned v2x(unsigned v) { - return TargetRegisterInfo::virtReg2Index(v); + return Register::virtReg2Index(v); } static inline unsigned x2v(unsigned x) { - return TargetRegisterInfo::index2VirtReg(x); + return Register::index2VirtReg(x); } }; @@ -267,7 +267,7 @@ namespace { CellMapShadow(const BitTracker &T) : BT(T) {} const BitTracker::RegisterCell &lookup(unsigned VR) { - unsigned RInd = TargetRegisterInfo::virtReg2Index(VR); + unsigned RInd = Register::virtReg2Index(VR); // Grow the vector to at least 32 elements. if (RInd >= CVect.size()) CVect.resize(std::max(RInd+16, 32U), nullptr); @@ -606,9 +606,9 @@ void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const { for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef()) { - unsigned R = MO.getReg(); + Register R = MO.getReg(); assert(MO.getSubReg() == 0 && "Unexpected subregister in definition"); - if (TargetRegisterInfo::isVirtualRegister(R)) + if (Register::isVirtualRegister(R)) RO.insert(std::make_pair(R, Index++)); } } @@ -724,8 +724,8 @@ void HexagonGenInsert::getInstrDefs(const MachineInstr *MI, const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned R = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = MO.getReg(); + if (!Register::isVirtualRegister(R)) continue; Defs.insert(R); } @@ -737,8 +737,8 @@ void HexagonGenInsert::getInstrUses(const MachineInstr *MI, const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; - unsigned R = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = MO.getReg(); + if (!Register::isVirtualRegister(R)) continue; Uses.insert(R); } @@ -1399,7 +1399,7 @@ bool HexagonGenInsert::generateInserts() { for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { unsigned VR = I->first; const TargetRegisterClass *RC = MRI->getRegClass(VR); - unsigned NewVR = MRI->createVirtualRegister(RC); + Register NewVR = MRI->createVirtualRegister(RC); RegMap[VR] = NewVR; } @@ -1477,9 +1477,8 @@ bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) { for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned R = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R) || - !MRI->use_nodbg_empty(R)) { + Register R = MO.getReg(); + if (!Register::isVirtualRegister(R) || !MRI->use_nodbg_empty(R)) { AllDead = false; break; } @@ -1598,7 +1597,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { IterListType Out; for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { - unsigned Idx = TargetRegisterInfo::virtReg2Index(I->first); + unsigned Idx = Register::virtReg2Index(I->first); if (Idx >= Cutoff) Out.push_back(I); } diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp index cdafbc20ab8..b559e7bbbb6 100644 --- a/lib/Target/Hexagon/HexagonGenMux.cpp +++ b/lib/Target/Hexagon/HexagonGenMux.cpp @@ -171,7 +171,7 @@ void HexagonGenMux::getDefsUses(const MachineInstr *MI, BitVector &Defs, for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || MO.isImplicit()) continue; - unsigned R = MO.getReg(); + Register R = MO.getReg(); BitVector &Set = MO.isDef() ? Defs : Uses; expandReg(R, Set); } @@ -239,14 +239,14 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { unsigned Opc = MI->getOpcode(); if (!isCondTransfer(Opc)) continue; - unsigned DR = MI->getOperand(0).getReg(); + Register DR = MI->getOperand(0).getReg(); if (isRegPair(DR)) continue; MachineOperand &PredOp = MI->getOperand(1); if (PredOp.isUndef()) continue; - unsigned PR = PredOp.getReg(); + Register PR = PredOp.getReg(); unsigned Idx = I2X.lookup(MI); CondsetMap::iterator F = CM.find(DR); bool IfTrue = HII->isPredicatedTrue(Opc); diff --git a/lib/Target/Hexagon/HexagonGenPredicate.cpp b/lib/Target/Hexagon/HexagonGenPredicate.cpp index e991fa8b61c..24d33c91a29 100644 --- a/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -133,7 +133,7 @@ INITIALIZE_PASS_END(HexagonGenPredicate, "hexagon-gen-pred", "Hexagon generate predicate operations", false, false) bool HexagonGenPredicate::isPredReg(unsigned R) { - if (!TargetRegisterInfo::isVirtualRegister(R)) + if (!Register::isVirtualRegister(R)) return false; const TargetRegisterClass *RC = MRI->getRegClass(R); return RC == &Hexagon::PredRegsRegClass; @@ -213,7 +213,7 @@ void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) { case TargetOpcode::COPY: if (isPredReg(MI->getOperand(1).getReg())) { RegisterSubReg RD = MI->getOperand(0); - if (TargetRegisterInfo::isVirtualRegister(RD.R)) + if (Register::isVirtualRegister(RD.R)) PredGPRs.insert(RD); } break; @@ -245,7 +245,7 @@ RegisterSubReg HexagonGenPredicate::getPredRegFor(const RegisterSubReg &Reg) { // Create a predicate register for a given Reg. The newly created register // will have its value copied from Reg, so that it can be later used as // an operand in other instructions. - assert(TargetRegisterInfo::isVirtualRegister(Reg.R)); + assert(Register::isVirtualRegister(Reg.R)); RegToRegMap::iterator F = G2P.find(Reg); if (F != G2P.end()) return F->second; @@ -265,7 +265,7 @@ RegisterSubReg HexagonGenPredicate::getPredRegFor(const RegisterSubReg &Reg) { MachineBasicBlock &B = *DefI->getParent(); DebugLoc DL = DefI->getDebugLoc(); const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass; - unsigned NewPR = MRI->createVirtualRegister(PredRC); + Register NewPR = MRI->createVirtualRegister(PredRC); // For convertible instructions, do not modify them, so that they can // be converted later. Generate a copy from Reg to NewPR. @@ -432,7 +432,7 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) { // Generate a copy-out: NewGPR = NewPR, and replace all uses of OutR // with NewGPR. const TargetRegisterClass *RC = MRI->getRegClass(OutR.R); - unsigned NewOutR = MRI->createVirtualRegister(RC); + Register NewOutR = MRI->createVirtualRegister(RC); BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), NewOutR) .addReg(NewPR.R, 0, NewPR.S); MRI->replaceRegWith(OutR.R, NewOutR); @@ -471,9 +471,9 @@ bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) { continue; RegisterSubReg DR = MI.getOperand(0); RegisterSubReg SR = MI.getOperand(1); - if (!TargetRegisterInfo::isVirtualRegister(DR.R)) + if (!Register::isVirtualRegister(DR.R)) continue; - if (!TargetRegisterInfo::isVirtualRegister(SR.R)) + if (!Register::isVirtualRegister(SR.R)) continue; if (MRI->getRegClass(DR.R) != PredRC) continue; diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index cecbaedb6d7..62291790f0f 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -435,17 +435,17 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, if (Phi->getOperand(i+1).getMBB() != Latch) continue; - unsigned PhiOpReg = Phi->getOperand(i).getReg(); + Register PhiOpReg = Phi->getOperand(i).getReg(); MachineInstr *DI = MRI->getVRegDef(PhiOpReg); if (DI->getDesc().isAdd()) { // If the register operand to the add is the PHI we're looking at, this // meets the induction pattern. - unsigned IndReg = DI->getOperand(1).getReg(); + Register IndReg = DI->getOperand(1).getReg(); MachineOperand &Opnd2 = DI->getOperand(2); int64_t V; if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) { - unsigned UpdReg = DI->getOperand(0).getReg(); + Register UpdReg = DI->getOperand(0).getReg(); IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); } } @@ -694,7 +694,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, Cmp = Comparison::getSwappedComparison(Cmp); if (InitialValue->isReg()) { - unsigned R = InitialValue->getReg(); + Register R = InitialValue->getReg(); MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); if (!MDT->properlyDominates(DefBB, Header)) { int64_t V; @@ -704,7 +704,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, OldInsts.push_back(MRI->getVRegDef(R)); } if (EndValue->isReg()) { - unsigned R = EndValue->getReg(); + Register R = EndValue->getReg(); MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); if (!MDT->properlyDominates(DefBB, Header)) { int64_t V; @@ -910,7 +910,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, (RegToImm ? TII->get(Hexagon::A2_subri) : TII->get(Hexagon::A2_addi)); if (RegToReg || RegToImm) { - unsigned SubR = MRI->createVirtualRegister(IntRC); + Register SubR = MRI->createVirtualRegister(IntRC); MachineInstrBuilder SubIB = BuildMI(*PH, InsertPos, DL, SubD, SubR); @@ -931,7 +931,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, EndValInstr->getOperand(2).getImm() == StartV) { DistR = EndValInstr->getOperand(1).getReg(); } else { - unsigned SubR = MRI->createVirtualRegister(IntRC); + Register SubR = MRI->createVirtualRegister(IntRC); MachineInstrBuilder SubIB = BuildMI(*PH, InsertPos, DL, SubD, SubR); SubIB.addReg(End->getReg(), 0, End->getSubReg()) @@ -950,7 +950,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, AdjSR = DistSR; } else { // Generate CountR = ADD DistR, AdjVal - unsigned AddR = MRI->createVirtualRegister(IntRC); + Register AddR = MRI->createVirtualRegister(IntRC); MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi); BuildMI(*PH, InsertPos, DL, AddD, AddR) .addReg(DistR, 0, DistSR) @@ -971,7 +971,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, unsigned Shift = Log2_32(IVBump); // Generate NormR = LSR DistR, Shift. - unsigned LsrR = MRI->createVirtualRegister(IntRC); + Register LsrR = MRI->createVirtualRegister(IntRC); const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r); BuildMI(*PH, InsertPos, DL, LsrD, LsrR) .addReg(AdjR, 0, AdjSR) @@ -1038,7 +1038,7 @@ bool HexagonHardwareLoops::isDead(const MachineInstr *MI, if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (MRI->use_nodbg_empty(Reg)) continue; @@ -1058,7 +1058,7 @@ bool HexagonHardwareLoops::isDead(const MachineInstr *MI, if (!OPO.isReg() || !OPO.isDef()) continue; - unsigned OPReg = OPO.getReg(); + Register OPReg = OPO.getReg(); use_nodbg_iterator nextJ; for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg); J != End; J = nextJ) { @@ -1092,7 +1092,7 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); MachineRegisterInfo::use_iterator nextI; for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), E = MRI->use_end(); I != E; I = nextI) { @@ -1244,7 +1244,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, if (TripCount->isReg()) { // Create a copy of the loop count register. - unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); + Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg) .addReg(TripCount->getReg(), 0, TripCount->getSubReg()); // Add the Loop instruction to the beginning of the loop. @@ -1257,7 +1257,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, // create a new virtual register. int64_t CountImm = TripCount->getImm(); if (!TII->isValidOffset(LOOP_i, CountImm, TRI)) { - unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); + Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg) .addImm(CountImm); BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)) @@ -1333,7 +1333,7 @@ bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, return true; // Out of order. - unsigned PredR = CmpI->getOperand(0).getReg(); + Register PredR = CmpI->getOperand(0).getReg(); bool FoundBump = false; instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt); for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) { @@ -1428,10 +1428,10 @@ bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow( if (checkForImmediate(*InitVal, Imm)) return (EndVal->getImm() == Imm); - unsigned Reg = InitVal->getReg(); + Register Reg = InitVal->getReg(); // We don't know the value of a physical register. - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return true; MachineInstr *Def = MRI->getVRegDef(Reg); @@ -1508,8 +1508,8 @@ bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO, // processed to handle potential subregisters in MO. int64_t TV; - unsigned R = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = MO.getReg(); + if (!Register::isVirtualRegister(R)) return false; MachineInstr *DI = MRI->getVRegDef(R); unsigned DOpc = DI->getOpcode(); @@ -1582,11 +1582,11 @@ void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) { } assert(MO.isReg()); - unsigned R = MO.getReg(); + Register R = MO.getReg(); MachineInstr *DI = MRI->getVRegDef(R); const TargetRegisterClass *RC = MRI->getRegClass(R); - unsigned NewR = MRI->createVirtualRegister(RC); + Register NewR = MRI->createVirtualRegister(RC); MachineBasicBlock &B = *DI->getParent(); DebugLoc DL = DI->getDebugLoc(); BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val); @@ -1634,17 +1634,17 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { if (Phi->getOperand(i+1).getMBB() != Latch) continue; - unsigned PhiReg = Phi->getOperand(i).getReg(); + Register PhiReg = Phi->getOperand(i).getReg(); MachineInstr *DI = MRI->getVRegDef(PhiReg); if (DI->getDesc().isAdd()) { // If the register operand to the add/sub is the PHI we are looking // at, this meets the induction pattern. - unsigned IndReg = DI->getOperand(1).getReg(); + Register IndReg = DI->getOperand(1).getReg(); MachineOperand &Opnd2 = DI->getOperand(2); int64_t V; if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) { - unsigned UpdReg = DI->getOperand(0).getReg(); + Register UpdReg = DI->getOperand(0).getReg(); IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); } } @@ -1702,7 +1702,7 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { if (!Cond[CSz-1].isReg()) return false; - unsigned P = Cond[CSz-1].getReg(); + Register P = Cond[CSz - 1].getReg(); MachineInstr *PredDef = MRI->getVRegDef(P); if (!PredDef->isCompare()) @@ -1903,15 +1903,15 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL); NewPH->insert(NewPH->end(), NewPN); - unsigned PR = PN->getOperand(0).getReg(); + Register PR = PN->getOperand(0).getReg(); const TargetRegisterClass *RC = MRI->getRegClass(PR); - unsigned NewPR = MRI->createVirtualRegister(RC); + Register NewPR = MRI->createVirtualRegister(RC); NewPN->addOperand(MachineOperand::CreateReg(NewPR, true)); // Copy all non-latch operands of a header's PHI node to the newly // created PHI node in the preheader. for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { - unsigned PredR = PN->getOperand(i).getReg(); + Register PredR = PN->getOperand(i).getReg(); unsigned PredRSub = PN->getOperand(i).getSubReg(); MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); if (PredB == Latch) diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 605fcfc2555..4684d8e4781 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -697,7 +697,7 @@ void HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { // void HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) { SDLoc dl(N); - ConstantFPSDNode *CN = dyn_cast(N); + auto *CN = cast(N); APInt A = CN->getValueAPF().bitcastToAPInt(); if (N->getValueType(0) == MVT::f32) { SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i32); diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index fef5a98cdb0..8a8986e232a 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -240,12 +240,12 @@ bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { return true; } -unsigned HexagonTargetLowering::getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { +Register HexagonTargetLowering::getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &) const { // Just support r19, the linux kernel uses it. - unsigned Reg = StringSwitch(RegName) + Register Reg = StringSwitch(RegName) .Case("r19", Hexagon::R19) - .Default(0); + .Default(Register()); if (Reg) return Reg; @@ -286,7 +286,7 @@ SDValue HexagonTargetLowering::LowerCallResult( SDValue FR0 = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(), MVT::i32, Glue); // FR0 = (Value, Chain, Glue) - unsigned PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); + Register PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); SDValue TPR = DAG.getCopyToReg(FR0.getValue(1), dl, PredR, FR0.getValue(0), FR0.getValue(2)); // TPR = (Chain, Glue) @@ -736,7 +736,7 @@ SDValue HexagonTargetLowering::LowerFormalArguments( RegVT = VA.getValVT(); const TargetRegisterClass *RC = getRegClassFor(RegVT); - unsigned VReg = MRI.createVirtualRegister(RC); + Register VReg = MRI.createVirtualRegister(RC); SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT); // Treat values of type MVT::i1 specially: they are passed in @@ -870,15 +870,20 @@ SDValue HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue PredOp = Op.getOperand(0); SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2); - EVT OpVT = Op1.getValueType(); - SDLoc DL(Op); + MVT OpTy = ty(Op1); + const SDLoc &dl(Op); - if (OpVT == MVT::v2i16) { - SDValue X1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op1); - SDValue X2 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op2); - SDValue SL = DAG.getNode(ISD::VSELECT, DL, MVT::v2i32, PredOp, X1, X2); - SDValue TR = DAG.getNode(ISD::TRUNCATE, DL, MVT::v2i16, SL); - return TR; + if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) { + MVT ElemTy = OpTy.getVectorElementType(); + assert(ElemTy.isScalarInteger()); + MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()), + OpTy.getVectorNumElements()); + // Generate (trunc (select (_, sext, sext))). + return DAG.getSExtOrTrunc( + DAG.getSelect(dl, WideTy, PredOp, + DAG.getSExtOrTrunc(Op1, dl, WideTy), + DAG.getSExtOrTrunc(Op2, dl, WideTy)), + dl, OpTy); } return SDValue(); @@ -1230,9 +1235,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, Subtarget(ST) { auto &HRI = *Subtarget.getRegisterInfo(); - setPrefLoopAlignment(4); - setPrefFunctionAlignment(4); - setMinFunctionAlignment(2); + setPrefLoopAlignment(Align(16)); + setMinFunctionAlignment(Align(4)); + setPrefFunctionAlignment(Align(16)); setStackPointerRegisterToSaveRestore(HRI.getStackRegister()); setBooleanContents(TargetLoweringBase::UndefinedBooleanContent); setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent); @@ -1434,12 +1439,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, ISD::CONCAT_VECTORS, ISD::VECTOR_SHUFFLE }; - for (MVT VT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { for (unsigned VectExpOp : VectExpOps) setOperationAction(VectExpOp, VT, Expand); // Expand all extending loads and truncating stores: - for (MVT TargetVT : MVT::vector_valuetypes()) { + for (MVT TargetVT : MVT::fixedlen_vector_valuetypes()) { if (TargetVT == VT) continue; setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand); @@ -1496,16 +1501,21 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STORE, VT, Custom); } - for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v2i32, MVT::v4i16, MVT::v2i32}) { - setCondCodeAction(ISD::SETLT, VT, Expand); + for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v8i8, MVT::v2i32, MVT::v4i16, + MVT::v2i32}) { + setCondCodeAction(ISD::SETNE, VT, Expand); setCondCodeAction(ISD::SETLE, VT, Expand); - setCondCodeAction(ISD::SETULT, VT, Expand); + setCondCodeAction(ISD::SETGE, VT, Expand); + setCondCodeAction(ISD::SETLT, VT, Expand); setCondCodeAction(ISD::SETULE, VT, Expand); + setCondCodeAction(ISD::SETUGE, VT, Expand); + setCondCodeAction(ISD::SETULT, VT, Expand); } // Custom-lower bitcasts from i8 to v8i1. setOperationAction(ISD::BITCAST, MVT::i8, Custom); setOperationAction(ISD::SETCC, MVT::v2i16, Custom); + setOperationAction(ISD::VSELECT, MVT::v4i8, Custom); setOperationAction(ISD::VSELECT, MVT::v2i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); @@ -1554,6 +1564,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSUB, MVT::f64, Legal); } + setTargetDAGCombine(ISD::VSELECT); + if (Subtarget.useHVXOps()) initializeHVXLowering(); @@ -1643,6 +1655,8 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::VINSERTW0: return "HexagonISD::VINSERTW0"; case HexagonISD::VROR: return "HexagonISD::VROR"; case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE"; + case HexagonISD::PTRUE: return "HexagonISD::PTRUE"; + case HexagonISD::PFALSE: return "HexagonISD::PFALSE"; case HexagonISD::VZERO: return "HexagonISD::VZERO"; case HexagonISD::VSPLATW: return "HexagonISD::VSPLATW"; case HexagonISD::D2P: return "HexagonISD::D2P"; @@ -1783,7 +1797,8 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, // The offset value comes through Modifier register. For now, assume the // offset is 0. Info.offset = 0; - Info.align = DL.getABITypeAlignment(Info.memVT.getTypeForEVT(Cont)); + Info.align = + MaybeAlign(DL.getABITypeAlignment(Info.memVT.getTypeForEVT(Cont))); Info.flags = MachineMemOperand::MOLoad; return true; } @@ -1805,7 +1820,8 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(VecTy); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = M.getDataLayout().getTypeAllocSizeInBits(VecTy) / 8; + Info.align = + MaybeAlign(M.getDataLayout().getTypeAllocSizeInBits(VecTy) / 8); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; @@ -1817,6 +1833,10 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return false; } +bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const { + return X.getValueType().isScalarInteger(); // 'tstbit' +} + bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2)); } @@ -1844,26 +1864,33 @@ bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef Mask, TargetLoweringBase::LegalizeTypeAction HexagonTargetLowering::getPreferredVectorAction(MVT VT) const { - if (VT.getVectorNumElements() == 1) + unsigned VecLen = VT.getVectorNumElements(); + MVT ElemTy = VT.getVectorElementType(); + + if (VecLen == 1 || VT.isScalableVector()) return TargetLoweringBase::TypeScalarizeVector; - // Always widen vectors of i1. - MVT ElemTy = VT.getVectorElementType(); - if (ElemTy == MVT::i1) - return TargetLoweringBase::TypeWidenVector; - if (Subtarget.useHVXOps()) { + unsigned HwLen = Subtarget.getVectorLength(); // If the size of VT is at least half of the vector length, // widen the vector. Note: the threshold was not selected in // any scientific way. ArrayRef Tys = Subtarget.getHVXElementTypes(); if (llvm::find(Tys, ElemTy) != Tys.end()) { - unsigned HwWidth = 8*Subtarget.getVectorLength(); + unsigned HwWidth = 8*HwLen; unsigned VecWidth = VT.getSizeInBits(); if (VecWidth >= HwWidth/2 && VecWidth < HwWidth) return TargetLoweringBase::TypeWidenVector; } + // Split vectors of i1 that correspond to (byte) vector pairs. + if (ElemTy == MVT::i1 && VecLen == 2*HwLen) + return TargetLoweringBase::TypeSplitVector; } + + // Always widen (remaining) vectors of i1. + if (ElemTy == MVT::i1) + return TargetLoweringBase::TypeWidenVector; + return TargetLoweringBase::TypeSplitVector; } @@ -2452,6 +2479,23 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return buildVector64(Ops, dl, VecTy, DAG); if (VecTy == MVT::v8i1 || VecTy == MVT::v4i1 || VecTy == MVT::v2i1) { + // Check if this is a special case or all-0 or all-1. + bool All0 = true, All1 = true; + for (SDValue P : Ops) { + auto *CN = dyn_cast(P.getNode()); + if (CN == nullptr) { + All0 = All1 = false; + break; + } + uint32_t C = CN->getZExtValue(); + All0 &= (C == 0); + All1 &= (C == 1); + } + if (All0) + return DAG.getNode(HexagonISD::PFALSE, dl, VecTy); + if (All1) + return DAG.getNode(HexagonISD::PTRUE, dl, VecTy); + // For each i1 element in the resulting predicate register, put 1 // shifted by the index of the element into a general-purpose register, // then or them together and transfer it back into a predicate register. @@ -2629,7 +2673,8 @@ HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) DoDefault = true; if (!AlignLoads) { - if (allowsMemoryAccess(Ctx, DL, LN->getMemoryVT(), *LN->getMemOperand())) + if (allowsMemoryAccessForAlignment(Ctx, DL, LN->getMemoryVT(), + *LN->getMemOperand())) return Op; DoDefault = true; } @@ -2637,7 +2682,8 @@ HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)". MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8 * HaveAlign) : MVT::getVectorVT(MVT::i8, HaveAlign); - DoDefault = allowsMemoryAccess(Ctx, DL, PartTy, *LN->getMemOperand()); + DoDefault = + allowsMemoryAccessForAlignment(Ctx, DL, PartTy, *LN->getMemOperand()); } if (DoDefault) { std::pair P = expandUnalignedLoad(LN, DAG); @@ -2865,12 +2911,54 @@ HexagonTargetLowering::ReplaceNodeResults(SDNode *N, if (N->getValueType(0) == MVT::i8) { SDValue P = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, N->getOperand(0), DAG); - Results.push_back(P); + SDValue T = DAG.getAnyExtOrTrunc(P, dl, MVT::i8); + Results.push_back(T); } break; } } +SDValue +HexagonTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) + const { + SDValue Op(N, 0); + if (isHvxOperation(Op)) { + if (SDValue V = PerformHvxDAGCombine(N, DCI)) + return V; + return SDValue(); + } + + const SDLoc &dl(Op); + unsigned Opc = Op.getOpcode(); + + if (Opc == HexagonISD::P2D) { + SDValue P = Op.getOperand(0); + switch (P.getOpcode()) { + case HexagonISD::PTRUE: + return DCI.DAG.getConstant(-1, dl, ty(Op)); + case HexagonISD::PFALSE: + return getZero(dl, ty(Op), DCI.DAG); + default: + break; + } + } else if (Opc == ISD::VSELECT) { + // This is pretty much duplicated in HexagonISelLoweringHVX... + // + // (vselect (xor x, ptrue), v0, v1) -> (vselect x, v1, v0) + SDValue Cond = Op.getOperand(0); + if (Cond->getOpcode() == ISD::XOR) { + SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1); + if (C1->getOpcode() == HexagonISD::PTRUE) { + SDValue VSel = DCI.DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, + Op.getOperand(2), Op.getOperand(1)); + return VSel; + } + } + } + + return SDValue(); +} + /// Returns relocation base for the given PIC jumptable. SDValue HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table, diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 4e467cb2272..75f553bfec7 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -68,6 +68,8 @@ namespace HexagonISD { EH_RETURN, DCFETCH, READCYCLE, + PTRUE, + PFALSE, D2P, // Convert 8-byte value to 8-bit predicate register. [*] P2D, // Convert 8-bit predicate register to 8-byte value. [*] V2Q, // Convert HVX vector to a vector predicate reg. [*] @@ -127,6 +129,8 @@ namespace HexagonISD { bool isCheapToSpeculateCtlz() const override { return true; } bool isCtlzFast() const override { return true; } + bool hasBitTest(SDValue X, SDValue Y) const override; + bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; /// Return true if an FMA operation is faster than a pair of mul and add @@ -221,10 +225,12 @@ namespace HexagonISD { const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const override; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; - unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const override; /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. @@ -299,7 +305,8 @@ namespace HexagonISD { const AttributeList &FuncAttributes) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, - unsigned Align, MachineMemOperand::Flags Flags, bool *Fast) const override; + unsigned Align, MachineMemOperand::Flags Flags, bool *Fast) + const override; /// Returns relocation base for the given PIC jumptable. SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) @@ -456,6 +463,8 @@ namespace HexagonISD { bool isHvxOperation(SDValue Op) const; SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const; + + SDValue PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; }; } // end namespace llvm diff --git a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 345c657787a..bc8a9959c91 100644 --- a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -193,6 +193,8 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::OR, BoolV, Legal); setOperationAction(ISD::XOR, BoolV, Legal); } + + setTargetDAGCombine(ISD::VSELECT); } SDValue @@ -1580,6 +1582,28 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("Unhandled HVX operation"); } +SDValue +HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) + const { + const SDLoc &dl(N); + SDValue Op(N, 0); + + unsigned Opc = Op.getOpcode(); + if (Opc == ISD::VSELECT) { + // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0) + SDValue Cond = Op.getOperand(0); + if (Cond->getOpcode() == ISD::XOR) { + SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1); + if (C1->getOpcode() == HexagonISD::QTRUE) { + SDValue VSel = DCI.DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, + Op.getOperand(2), Op.getOperand(1)); + return VSel; + } + } + } + return SDValue(); +} + bool HexagonTargetLowering::isHvxOperation(SDValue Op) const { // If the type of the result, or any operand type are HVX vector types, diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index a156de5ba12..767538f92ed 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -193,7 +193,7 @@ static inline void parseOperands(const MachineInstr &MI, if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; @@ -674,86 +674,96 @@ unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB, return 2; } -/// Analyze the loop code to find the loop induction variable and compare used -/// to compute the number of iterations. Currently, we analyze loop that are -/// controlled using hardware loops. In this case, the induction variable -/// instruction is null. For all other cases, this function returns true, which -/// means we're unable to analyze it. -bool HexagonInstrInfo::analyzeLoop(MachineLoop &L, - MachineInstr *&IndVarInst, - MachineInstr *&CmpInst) const { +namespace { +class HexagonPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { + MachineInstr *Loop, *EndLoop; + MachineFunction *MF; + const HexagonInstrInfo *TII; + int64_t TripCount; + Register LoopCount; + DebugLoc DL; - MachineBasicBlock *LoopEnd = L.getBottomBlock(); - MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator(); +public: + HexagonPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop) + : Loop(Loop), EndLoop(EndLoop), MF(Loop->getParent()->getParent()), + TII(MF->getSubtarget().getInstrInfo()), + DL(Loop->getDebugLoc()) { + // Inspect the Loop instruction up-front, as it may be deleted when we call + // createTripCountGreaterCondition. + TripCount = Loop->getOpcode() == Hexagon::J2_loop0r + ? -1 + : Loop->getOperand(1).getImm(); + if (TripCount == -1) + LoopCount = Loop->getOperand(1).getReg(); + } + + bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { + // Only ignore the terminator. + return MI == EndLoop; + } + + Optional + createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB, + SmallVectorImpl &Cond) override { + if (TripCount == -1) { + // Check if we're done with the loop. + unsigned Done = TII->createVR(MF, MVT::i1); + MachineInstr *NewCmp = BuildMI(&MBB, DL, + TII->get(Hexagon::C2_cmpgtui), Done) + .addReg(LoopCount) + .addImm(TC); + Cond.push_back(MachineOperand::CreateImm(Hexagon::J2_jumpf)); + Cond.push_back(NewCmp->getOperand(0)); + return {}; + } + + return TripCount > TC; + } + + void setPreheader(MachineBasicBlock *NewPreheader) override { + NewPreheader->splice(NewPreheader->getFirstTerminator(), Loop->getParent(), + Loop); + } + + void adjustTripCount(int TripCountAdjust) override { + // If the loop trip count is a compile-time value, then just change the + // value. + if (Loop->getOpcode() == Hexagon::J2_loop0i || + Loop->getOpcode() == Hexagon::J2_loop1i) { + int64_t TripCount = Loop->getOperand(1).getImm() + TripCountAdjust; + assert(TripCount > 0 && "Can't create an empty or negative loop!"); + Loop->getOperand(1).setImm(TripCount); + return; + } + + // The loop trip count is a run-time value. We generate code to subtract + // one from the trip count, and update the loop instruction. + Register LoopCount = Loop->getOperand(1).getReg(); + Register NewLoopCount = TII->createVR(MF, MVT::i32); + BuildMI(*Loop->getParent(), Loop, Loop->getDebugLoc(), + TII->get(Hexagon::A2_addi), NewLoopCount) + .addReg(LoopCount) + .addImm(TripCountAdjust); + Loop->getOperand(1).setReg(NewLoopCount); + } + + void disposed() override { Loop->eraseFromParent(); } +}; +} // namespace + +std::unique_ptr +HexagonInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { // We really "analyze" only hardware loops right now. - if (I != LoopEnd->end() && isEndLoopN(I->getOpcode())) { - IndVarInst = nullptr; - CmpInst = &*I; - return false; - } - return true; -} + MachineBasicBlock::iterator I = LoopBB->getFirstTerminator(); -/// Generate code to reduce the loop iteration by one and check if the loop is -/// finished. Return the value/register of the new loop count. this function -/// assumes the nth iteration is peeled first. -unsigned HexagonInstrInfo::reduceLoopCount( - MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar, - MachineInstr &Cmp, SmallVectorImpl &Cond, - SmallVectorImpl &PrevInsts, unsigned Iter, - unsigned MaxIter) const { - // We expect a hardware loop currently. This means that IndVar is set - // to null, and the compare is the ENDLOOP instruction. - assert((!IndVar) && isEndLoopN(Cmp.getOpcode()) - && "Expecting a hardware loop"); - MachineFunction *MF = MBB.getParent(); - DebugLoc DL = Cmp.getDebugLoc(); - SmallPtrSet VisitedBBs; - MachineInstr *Loop = findLoopInstr(&MBB, Cmp.getOpcode(), - Cmp.getOperand(0).getMBB(), VisitedBBs); - if (!Loop) - return 0; - // If the loop trip count is a compile-time value, then just change the - // value. - if (Loop->getOpcode() == Hexagon::J2_loop0i || - Loop->getOpcode() == Hexagon::J2_loop1i) { - int64_t Offset = Loop->getOperand(1).getImm(); - if (Offset <= 1) - Loop->eraseFromParent(); - else - Loop->getOperand(1).setImm(Offset - 1); - return Offset - 1; + if (I != LoopBB->end() && isEndLoopN(I->getOpcode())) { + SmallPtrSet VisitedBBs; + MachineInstr *LoopInst = findLoopInstr( + LoopBB, I->getOpcode(), I->getOperand(0).getMBB(), VisitedBBs); + if (LoopInst) + return std::make_unique(LoopInst, &*I); } - // The loop trip count is a run-time value. We generate code to subtract - // one from the trip count, and update the loop instruction. - assert(Loop->getOpcode() == Hexagon::J2_loop0r && "Unexpected instruction"); - unsigned LoopCount = Loop->getOperand(1).getReg(); - // Check if we're done with the loop. - unsigned LoopEnd = createVR(MF, MVT::i1); - MachineInstr *NewCmp = BuildMI(&MBB, DL, get(Hexagon::C2_cmpgtui), LoopEnd). - addReg(LoopCount).addImm(1); - unsigned NewLoopCount = createVR(MF, MVT::i32); - MachineInstr *NewAdd = BuildMI(&MBB, DL, get(Hexagon::A2_addi), NewLoopCount). - addReg(LoopCount).addImm(-1); - const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); - // Update the previously generated instructions with the new loop counter. - for (SmallVectorImpl::iterator I = PrevInsts.begin(), - E = PrevInsts.end(); I != E; ++I) - (*I)->substituteRegister(LoopCount, NewLoopCount, 0, HRI); - PrevInsts.clear(); - PrevInsts.push_back(NewCmp); - PrevInsts.push_back(NewAdd); - // Insert the new loop instruction if this is the last time the loop is - // decremented. - if (Iter == MaxIter) - BuildMI(&MBB, DL, get(Hexagon::J2_loop0r)). - addMBB(Loop->getOperand(0).getMBB()).addReg(NewLoopCount); - // Delete the old loop instruction. - if (Iter == 0) - Loop->eraseFromParent(); - Cond.push_back(MachineOperand::CreateImm(Hexagon::J2_jumpf)); - Cond.push_back(NewCmp->getOperand(0)); - return NewLoopCount; + return nullptr; } bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, @@ -839,8 +849,8 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } if (Hexagon::HvxWRRegClass.contains(SrcReg, DestReg)) { - unsigned LoSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_lo); - unsigned HiSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_hi); + Register LoSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_lo); + Register HiSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_hi); BuildMI(MBB, I, DL, get(Hexagon::V6_vcombine), DestReg) .addReg(HiSrc, KillFlag) .addReg(LoSrc, KillFlag); @@ -1017,7 +1027,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); auto RealCirc = [&](unsigned Opc, bool HasImm, unsigned MxOp) { - unsigned Mx = MI.getOperand(MxOp).getReg(); + Register Mx = MI.getOperand(MxOp).getReg(); unsigned CSx = (Mx == Hexagon::M0 ? Hexagon::CS0 : Hexagon::CS1); BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrrcr), CSx) .add(MI.getOperand((HasImm ? 5 : 4))); @@ -1049,8 +1059,8 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MBB.erase(MI); return true; case Hexagon::V6_vassignp: { - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); unsigned Kill = getKillRegState(MI.getOperand(1).isKill()); BuildMI(MBB, MI, DL, get(Hexagon::V6_vcombine), DstReg) .addReg(HRI.getSubReg(SrcReg, Hexagon::vsub_hi), Kill) @@ -1059,18 +1069,18 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } case Hexagon::V6_lo: { - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::vsub_lo); + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::vsub_lo); copyPhysReg(MBB, MI, DL, DstReg, SrcSubLo, MI.getOperand(1).isKill()); MBB.erase(MI); MRI.clearKillFlags(SrcSubLo); return true; } case Hexagon::V6_hi: { - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::vsub_hi); + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::vsub_hi); copyPhysReg(MBB, MI, DL, DstReg, SrcSubHi, MI.getOperand(1).isKill()); MBB.erase(MI); MRI.clearKillFlags(SrcSubHi); @@ -1079,9 +1089,9 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case Hexagon::PS_vstorerw_ai: case Hexagon::PS_vstorerwu_ai: { bool Aligned = Opc == Hexagon::PS_vstorerw_ai; - unsigned SrcReg = MI.getOperand(2).getReg(); - unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::vsub_hi); - unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::vsub_lo); + Register SrcReg = MI.getOperand(2).getReg(); + Register SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::vsub_hi); + Register SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::vsub_lo); unsigned NewOpc = Aligned ? Hexagon::V6_vS32b_ai : Hexagon::V6_vS32Ub_ai; unsigned Offset = HRI.getSpillSize(Hexagon::HvxVRRegClass); @@ -1103,7 +1113,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case Hexagon::PS_vloadrw_ai: case Hexagon::PS_vloadrwu_ai: { bool Aligned = Opc == Hexagon::PS_vloadrw_ai; - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); unsigned NewOpc = Aligned ? Hexagon::V6_vL32b_ai : Hexagon::V6_vL32Ub_ai; unsigned Offset = HRI.getSpillSize(Hexagon::HvxVRRegClass); @@ -1122,7 +1132,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } case Hexagon::PS_true: { - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg) .addReg(Reg, RegState::Undef) .addReg(Reg, RegState::Undef); @@ -1130,7 +1140,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } case Hexagon::PS_false: { - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg) .addReg(Reg, RegState::Undef) .addReg(Reg, RegState::Undef); @@ -1152,7 +1162,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } case Hexagon::PS_vdd0: { - unsigned Vd = MI.getOperand(0).getReg(); + Register Vd = MI.getOperand(0).getReg(); BuildMI(MBB, MI, DL, get(Hexagon::V6_vsubw_dv), Vd) .addReg(Vd, RegState::Undef) .addReg(Vd, RegState::Undef); @@ -1161,13 +1171,13 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { } case Hexagon::PS_vmulw: { // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies. - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned Src1Reg = MI.getOperand(1).getReg(); - unsigned Src2Reg = MI.getOperand(2).getReg(); - unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::isub_hi); - unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::isub_lo); - unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::isub_hi); - unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::isub_lo); + Register DstReg = MI.getOperand(0).getReg(); + Register Src1Reg = MI.getOperand(1).getReg(); + Register Src2Reg = MI.getOperand(2).getReg(); + Register Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::isub_hi); + Register Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::isub_lo); + Register Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::isub_hi); + Register Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::isub_lo); BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::M2_mpyi), HRI.getSubReg(DstReg, Hexagon::isub_hi)) .addReg(Src1SubHi) @@ -1185,16 +1195,16 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { } case Hexagon::PS_vmulw_acc: { // Expand 64-bit vector multiply with addition into 2 scalar multiplies. - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned Src1Reg = MI.getOperand(1).getReg(); - unsigned Src2Reg = MI.getOperand(2).getReg(); - unsigned Src3Reg = MI.getOperand(3).getReg(); - unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::isub_hi); - unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::isub_lo); - unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::isub_hi); - unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::isub_lo); - unsigned Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::isub_hi); - unsigned Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::isub_lo); + Register DstReg = MI.getOperand(0).getReg(); + Register Src1Reg = MI.getOperand(1).getReg(); + Register Src2Reg = MI.getOperand(2).getReg(); + Register Src3Reg = MI.getOperand(3).getReg(); + Register Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::isub_hi); + Register Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::isub_lo); + Register Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::isub_hi); + Register Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::isub_lo); + Register Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::isub_hi); + Register Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::isub_lo); BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::M2_maci), HRI.getSubReg(DstReg, Hexagon::isub_hi)) .addReg(Src1SubHi) @@ -1219,10 +1229,10 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { const MachineOperand &Op1 = MI.getOperand(1); const MachineOperand &Op2 = MI.getOperand(2); const MachineOperand &Op3 = MI.getOperand(3); - unsigned Rd = Op0.getReg(); - unsigned Pu = Op1.getReg(); - unsigned Rs = Op2.getReg(); - unsigned Rt = Op3.getReg(); + Register Rd = Op0.getReg(); + Register Pu = Op1.getReg(); + Register Rs = Op2.getReg(); + Register Rt = Op3.getReg(); DebugLoc DL = MI.getDebugLoc(); unsigned K1 = getKillRegState(Op1.isKill()); unsigned K2 = getKillRegState(Op2.isKill()); @@ -1246,7 +1256,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { LivePhysRegs LiveAtMI(HRI); getLiveRegsAt(LiveAtMI, MI); bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg()); - unsigned PReg = Op1.getReg(); + Register PReg = Op1.getReg(); assert(Op1.getSubReg() == 0); unsigned PState = getRegState(Op1); @@ -1280,15 +1290,15 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { LivePhysRegs LiveAtMI(HRI); getLiveRegsAt(LiveAtMI, MI); bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg()); - unsigned PReg = Op1.getReg(); + Register PReg = Op1.getReg(); assert(Op1.getSubReg() == 0); unsigned PState = getRegState(Op1); if (Op0.getReg() != Op2.getReg()) { unsigned S = Op0.getReg() != Op3.getReg() ? PState & ~RegState::Kill : PState; - unsigned SrcLo = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_lo); - unsigned SrcHi = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_hi); + Register SrcLo = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_lo); + Register SrcHi = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_hi); auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vccombine)) .add(Op0) .addReg(PReg, S) @@ -1299,8 +1309,8 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { IsDestLive = true; } if (Op0.getReg() != Op3.getReg()) { - unsigned SrcLo = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_lo); - unsigned SrcHi = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_hi); + Register SrcLo = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_lo); + Register SrcHi = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_hi); auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vnccombine)) .add(Op0) .addReg(PReg, PState) @@ -1856,8 +1866,7 @@ DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState( // S2_storeri_io %r29, 132, killed %r1; flags: mem:ST4[FixedStack1] // Currently AA considers the addresses in these instructions to be aliasing. bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint( - const MachineInstr &MIa, const MachineInstr &MIb, - AliasAnalysis *AA) const { + const MachineInstr &MIa, const MachineInstr &MIb) const { if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) return false; @@ -1872,7 +1881,7 @@ bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint( if (!getBaseAndOffsetPosition(MIa, BasePosA, OffsetPosA)) return false; const MachineOperand &BaseA = MIa.getOperand(BasePosA); - unsigned BaseRegA = BaseA.getReg(); + Register BaseRegA = BaseA.getReg(); unsigned BaseSubA = BaseA.getSubReg(); // Get the base register in MIb. @@ -1880,7 +1889,7 @@ bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint( if (!getBaseAndOffsetPosition(MIb, BasePosB, OffsetPosB)) return false; const MachineOperand &BaseB = MIb.getOperand(BasePosB); - unsigned BaseRegB = BaseB.getReg(); + Register BaseRegB = BaseB.getReg(); unsigned BaseSubB = BaseB.getSubReg(); if (BaseRegA != BaseRegB || BaseSubA != BaseSubB) @@ -1984,7 +1993,7 @@ unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const { llvm_unreachable("Cannot handle this register class"); } - unsigned NewReg = MRI.createVirtualRegister(TRC); + Register NewReg = MRI.createVirtualRegister(TRC); return NewReg; } @@ -2094,12 +2103,12 @@ bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI, if (RegA == RegB) return true; - if (TargetRegisterInfo::isPhysicalRegister(RegA)) + if (Register::isPhysicalRegister(RegA)) for (MCSubRegIterator SubRegs(RegA, &HRI); SubRegs.isValid(); ++SubRegs) if (RegB == *SubRegs) return true; - if (TargetRegisterInfo::isPhysicalRegister(RegB)) + if (Register::isPhysicalRegister(RegB)) for (MCSubRegIterator SubRegs(RegB, &HRI); SubRegs.isValid(); ++SubRegs) if (RegA == *SubRegs) return true; @@ -2605,7 +2614,7 @@ bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr &MI1, const MachineInstr &MI2) const { if (mayBeCurLoad(MI1)) { // if (result of SU is used in Next) return true; - unsigned DstReg = MI1.getOperand(0).getReg(); + Register DstReg = MI1.getOperand(0).getReg(); int N = MI2.getNumOperands(); for (int I = 0; I < N; I++) if (MI2.getOperand(I).isReg() && DstReg == MI2.getOperand(I).getReg()) @@ -3374,7 +3383,7 @@ unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA, if ((GA.getOpcode() != Hexagon::C2_cmpeqi) || (GB.getOpcode() != Hexagon::J2_jumptnew)) return -1u; - unsigned DestReg = GA.getOperand(0).getReg(); + Register DestReg = GA.getOperand(0).getReg(); if (!GB.readsRegister(DestReg)) return -1u; if (DestReg != Hexagon::P0 && DestReg != Hexagon::P1) @@ -4091,7 +4100,7 @@ int HexagonInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // Get DefIdx and UseIdx for super registers. const MachineOperand &DefMO = DefMI.getOperand(DefIdx); - if (DefMO.isReg() && HRI.isPhysicalRegister(DefMO.getReg())) { + if (DefMO.isReg() && Register::isPhysicalRegister(DefMO.getReg())) { if (DefMO.isImplicit()) { for (MCSuperRegIterator SR(DefMO.getReg(), &HRI); SR.isValid(); ++SR) { int Idx = DefMI.findRegisterDefOperandIdx(*SR, false, false, &HRI); diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index e0a999d0f4c..60298cd666b 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -129,21 +129,10 @@ public: const DebugLoc &DL, int *BytesAdded = nullptr) const override; - /// Analyze the loop code, return true if it cannot be understood. Upon - /// success, this function returns false and returns information about the - /// induction variable and compare instruction used at the end. - bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, - MachineInstr *&CmpInst) const override; - - /// Generate code to reduce the loop iteration by one and check if the loop - /// is finished. Return the value/register of the new loop count. We need - /// this function when peeling off one or more iterations of a loop. This - /// function assumes the nth iteration is peeled first. - unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, - MachineInstr *IndVar, MachineInstr &Cmp, - SmallVectorImpl &Cond, - SmallVectorImpl &PrevInsts, - unsigned Iter, unsigned MaxIter) const override; + /// Analyze loop L, which must be a single-basic-block loop, and if the + /// conditions can be understood enough produce a PipelinerLoopInfo object. + std::unique_ptr + analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override; /// Return true if it's profitable to predicate /// instructions with accumulated instruction latency of "NumCycles" @@ -299,8 +288,7 @@ public: // memory addresses and false otherwise. bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA = nullptr) const override; + const MachineInstr &MIb) const override; /// For instructions with a base and offset, return the position of the /// base register and offset operands. diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td index cabfd783eff..c5e3cfd080d 100644 --- a/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/lib/Target/Hexagon/HexagonIntrinsics.td @@ -22,14 +22,14 @@ class T_RP_pat def: Pat<(int_hexagon_A2_add IntRegs:$Rs, IntRegs:$Rt), (A2_add IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(int_hexagon_A2_addi IntRegs:$Rs, imm:$s16), +def: Pat<(int_hexagon_A2_addi IntRegs:$Rs, timm:$s16), (A2_addi IntRegs:$Rs, imm:$s16)>; def: Pat<(int_hexagon_A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt), (A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt)>; def: Pat<(int_hexagon_A2_sub IntRegs:$Rs, IntRegs:$Rt), (A2_sub IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(int_hexagon_A2_subri imm:$s10, IntRegs:$Rs), +def: Pat<(int_hexagon_A2_subri timm:$s10, IntRegs:$Rs), (A2_subri imm:$s10, IntRegs:$Rs)>; def: Pat<(int_hexagon_A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt), (A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt)>; @@ -45,26 +45,26 @@ def: Pat<(int_hexagon_M2_dpmpyss_s0 IntRegs:$Rs, IntRegs:$Rt), def: Pat<(int_hexagon_M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt), (M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$Rs, imm:$u5), +def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$Rs, timm:$u5), (S2_asl_i_r IntRegs:$Rs, imm:$u5)>; -def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$Rs, imm:$u5), +def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$Rs, timm:$u5), (S2_lsr_i_r IntRegs:$Rs, imm:$u5)>; -def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$Rs, imm:$u5), +def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$Rs, timm:$u5), (S2_asr_i_r IntRegs:$Rs, imm:$u5)>; -def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$Rs, imm:$u6), +def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$Rs, timm:$u6), (S2_asl_i_p DoubleRegs:$Rs, imm:$u6)>; -def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$Rs, imm:$u6), +def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$Rs, timm:$u6), (S2_lsr_i_p DoubleRegs:$Rs, imm:$u6)>; -def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$Rs, imm:$u6), +def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$Rs, timm:$u6), (S2_asr_i_p DoubleRegs:$Rs, imm:$u6)>; def: Pat<(int_hexagon_A2_and IntRegs:$Rs, IntRegs:$Rt), (A2_and IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(int_hexagon_A2_andir IntRegs:$Rs, imm:$s10), +def: Pat<(int_hexagon_A2_andir IntRegs:$Rs, timm:$s10), (A2_andir IntRegs:$Rs, imm:$s10)>; def: Pat<(int_hexagon_A2_or IntRegs:$Rs, IntRegs:$Rt), (A2_or IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(int_hexagon_A2_orir IntRegs:$Rs, imm:$s10), +def: Pat<(int_hexagon_A2_orir IntRegs:$Rs, timm:$s10), (A2_orir IntRegs:$Rs, imm:$s10)>; def: Pat<(int_hexagon_A2_xor IntRegs:$Rs, IntRegs:$Rt), (A2_xor IntRegs:$Rs, IntRegs:$Rt)>; @@ -99,13 +99,13 @@ def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, (i32 0)), (S2_vsathub I64:$Rs)>; } -def : Pat <(int_hexagon_S2_asr_i_r_rnd_goodsyntax I32:$Rs, u5_0ImmPred:$imm), +def : Pat <(int_hexagon_S2_asr_i_r_rnd_goodsyntax I32:$Rs, u5_0ImmPred_timm:$imm), (S2_asr_i_r_rnd I32:$Rs, (UDEC1 u5_0ImmPred:$imm))>; -def : Pat <(int_hexagon_S2_asr_i_p_rnd_goodsyntax I64:$Rs, u6_0ImmPred:$imm), +def : Pat <(int_hexagon_S2_asr_i_p_rnd_goodsyntax I64:$Rs, u6_0ImmPred_timm:$imm), (S2_asr_i_p_rnd I64:$Rs, (UDEC1 u6_0ImmPred:$imm))>; -def : Pat <(int_hexagon_S5_vasrhrnd_goodsyntax I64:$Rs, u4_0ImmPred:$imm), +def : Pat <(int_hexagon_S5_vasrhrnd_goodsyntax I64:$Rs, u4_0ImmPred_timm:$imm), (S5_vasrhrnd I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>; -def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, u4_0ImmPred:$imm), +def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, u4_0ImmPred_timm:$imm), (S5_asrhub_rnd_sat I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>; def ImmExt64: SDNodeXForm; -def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32_0ImmPred:$src2), +def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32_0ImmPred_timm:$src2), (C2_tfrpr (C2_cmpgti I32:$src1, (SDEC1 s32_0ImmPred:$src2)))>; -def : Pat <(int_hexagon_C2_cmpgeui I32:$src1, u32_0ImmPred:$src2), +def : Pat <(int_hexagon_C2_cmpgeui I32:$src1, u32_0ImmPred_timm:$src2), (C2_tfrpr (C2_cmpgtui I32:$src1, (UDEC1 u32_0ImmPred:$src2)))>; def : Pat <(int_hexagon_C2_cmpgeui I32:$src, 0), @@ -142,7 +142,7 @@ def : Pat <(int_hexagon_C2_cmpltu I32:$src1, I32:$src2), //===----------------------------------------------------------------------===// class S2op_tableidx_pat - : Pat <(IntID I32:$src1, I32:$src2, u4_0ImmPred:$src3, u5_0ImmPred:$src4), + : Pat <(IntID I32:$src1, I32:$src2, u4_0ImmPred_timm:$src3, u5_0ImmPred_timm:$src4), (OutputInst I32:$src1, I32:$src2, u4_0ImmPred:$src3, (XformImm u5_0ImmPred:$src4))>; @@ -197,11 +197,11 @@ class T_stc_pat : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s), (MI I32:$Rs, Imm:$s, I32:$Ru, Val:$Rt)>; -def: T_stc_pat; -def: T_stc_pat; -def: T_stc_pat; -def: T_stc_pat; -def: T_stc_pat; +def: T_stc_pat; +def: T_stc_pat; +def: T_stc_pat; +def: T_stc_pat; +def: T_stc_pat; multiclass MaskedStore { def : Pat<(IntID HvxQR:$src1, IntRegs:$src2, HvxVR:$src3), diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index ac48e1dc30b..bda3eccac0c 100644 --- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -93,9 +93,9 @@ static cl::opt OnlyNonNestedMemmove("only-nonnested-memmove-idiom", cl::Hidden, cl::init(true), cl::desc("Only enable generating memmove in non-nested loops")); -cl::opt HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy", - cl::Hidden, cl::init(false), - cl::desc("Enable Hexagon-specific memcpy for volatile destination.")); +static cl::opt HexagonVolatileMemcpy( + "disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false), + cl::desc("Enable Hexagon-specific memcpy for volatile destination.")); static cl::opt SimplifyLimit("hlir-simplify-limit", cl::init(10000), cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR")); @@ -632,9 +632,9 @@ Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) { if (!isa(InitV) || !cast(InitV)->isZero()) continue; Value *IterV = PN->getIncomingValueForBlock(BB); - if (!isa(IterV)) - continue; auto *BO = dyn_cast(IterV); + if (!BO) + continue; if (BO->getOpcode() != Instruction::Add) continue; Value *IncV = nullptr; @@ -2020,7 +2020,7 @@ bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop, // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided load. If we have something else, it's a // random load we can't handle. - LoadInst *LI = dyn_cast(SI->getValueOperand()); + auto *LI = cast(SI->getValueOperand()); auto *LoadEv = cast(SE->getSCEV(LI->getPointerOperand())); // The trip count of the loop and the base pointer of the addrec SCEV is @@ -2426,7 +2426,8 @@ bool HexagonLoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { DL = &L->getHeader()->getModule()->getDataLayout(); DT = &getAnalysis().getDomTree(); LF = &getAnalysis().getLoopInfo(); - TLI = &getAnalysis().getTLI(); + TLI = &getAnalysis().getTLI( + *L->getHeader()->getParent()); SE = &getAnalysis().getSE(); HasMemcpy = TLI->has(LibFunc_memcpy); diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index db44901ca70..680d01e12af 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -177,7 +177,7 @@ static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII, (II->getOperand(i).isUse() || II->getOperand(i).isDef())) { MachineBasicBlock::iterator localII = II; ++localII; - unsigned Reg = II->getOperand(i).getReg(); + Register Reg = II->getOperand(i).getReg(); for (MachineBasicBlock::iterator localBegin = localII; localBegin != end; ++localBegin) { if (localBegin == skip) @@ -290,7 +290,7 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, // at machine code level, we don't need this, but if we decide // to move new value jump prior to RA, we would be needing this. MachineRegisterInfo &MRI = MF.getRegInfo(); - if (secondReg && !TargetRegisterInfo::isPhysicalRegister(cmpOp2)) { + if (secondReg && !Register::isPhysicalRegister(cmpOp2)) { MachineInstr *def = MRI.getVRegDef(cmpOp2); if (def->getOpcode() == TargetOpcode::COPY) return false; @@ -516,7 +516,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { jmpPos = MII; jmpInstr = &MI; predReg = MI.getOperand(0).getReg(); - afterRA = TargetRegisterInfo::isPhysicalRegister(predReg); + afterRA = Register::isPhysicalRegister(predReg); // If ifconverter had not messed up with the kill flags of the // operands, the following check on the kill flag would suffice. @@ -603,7 +603,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { (isSecondOpReg && MI.getOperand(0).getReg() == (unsigned)cmpOp2))) { - unsigned feederReg = MI.getOperand(0).getReg(); + Register feederReg = MI.getOperand(0).getReg(); // First try to see if we can get the feeder from the first operand // of the compare. If we can not, and if secondOpReg is true @@ -651,7 +651,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { for (MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isUse()) continue; - unsigned UseR = MO.getReg(); + Register UseR = MO.getReg(); for (auto I = std::next(MI.getIterator()); I != jmpPos; ++I) { if (I == cmpPos) continue; diff --git a/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/lib/Target/Hexagon/HexagonOptAddrMode.cpp index 547da9fd598..9121115020a 100644 --- a/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -162,7 +162,7 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr AddAslSN, if (!OffsetOp.isImm() || OffsetOp.getImm() > 3) return false; - unsigned OffsetReg = MI.getOperand(2).getReg(); + Register OffsetReg = MI.getOperand(2).getReg(); RegisterRef OffsetRR; NodeId OffsetRegRD = 0; for (NodeAddr UA : AddAslSN.Addr->members_if(DFG->IsUse, *DFG)) { @@ -348,7 +348,7 @@ bool HexagonOptAddrMode::processAddUses(NodeAddr AddSN, MachineInstr *AddMI, const NodeList &UNodeList) { - unsigned AddDefR = AddMI->getOperand(0).getReg(); + Register AddDefR = AddMI->getOperand(0).getReg(); for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) { NodeAddr UN = *I; NodeAddr SN = UN.Addr->getOwner(*DFG); @@ -381,7 +381,7 @@ bool HexagonOptAddrMode::processAddUses(NodeAddr AddSN, // Ex: Rx= add(Rt,#10) // memw(Rx+#0) = Rs // will be replaced with => memw(Rt+#10) = Rs - unsigned BaseReg = AddMI->getOperand(1).getReg(); + Register BaseReg = AddMI->getOperand(1).getReg(); if (!isSafeToExtLR(AddSN, AddMI, BaseReg, UNodeList)) return false; } @@ -411,7 +411,7 @@ bool HexagonOptAddrMode::updateAddUses(MachineInstr *AddMI, MachineInstr *UseMI) { const MachineOperand ImmOp = AddMI->getOperand(2); const MachineOperand AddRegOp = AddMI->getOperand(1); - unsigned newReg = AddRegOp.getReg(); + Register newReg = AddRegOp.getReg(); const MCInstrDesc &MID = UseMI->getDesc(); MachineOperand &BaseOp = MID.mayLoad() ? UseMI->getOperand(1) @@ -543,7 +543,7 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp, bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp, unsigned ImmOpNum) { bool Changed = false; - unsigned OpStart; + unsigned OpStart = 0; unsigned OpEnd = OldMI->getNumOperands(); MachineBasicBlock *BB = OldMI->getParent(); auto UsePos = MachineBasicBlock::iterator(OldMI); @@ -724,7 +724,7 @@ bool HexagonOptAddrMode::processBlock(NodeAddr BA) { } short SizeInc = 0; - unsigned DefR = MI->getOperand(0).getReg(); + Register DefR = MI->getOperand(0).getReg(); InstrEvalMap InstrEvalResult; // Analyze all uses and calculate increase in size. Perform the optimization diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td index fb731f56bfb..485e658e1c8 100644 --- a/lib/Target/Hexagon/HexagonPatterns.td +++ b/lib/Target/Hexagon/HexagonPatterns.td @@ -99,13 +99,21 @@ def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>; def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>; def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>; +def SDTVecLeaf: + SDTypeProfile<1, 0, [SDTCisVec<0>]>; def SDTVecVecIntOp: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>, SDTCisVT<3,i32>]>; +def HexagonPTRUE: SDNode<"HexagonISD::PTRUE", SDTVecLeaf>; +def HexagonPFALSE: SDNode<"HexagonISD::PFALSE", SDTVecLeaf>; def HexagonVALIGN: SDNode<"HexagonISD::VALIGN", SDTVecVecIntOp>; def HexagonVALIGNADDR: SDNode<"HexagonISD::VALIGNADDR", SDTIntUnaryOp>; +def ptrue: PatFrag<(ops), (HexagonPTRUE)>; +def pfalse: PatFrag<(ops), (HexagonPFALSE)>; +def pnot: PatFrag<(ops node:$Pu), (xor node:$Pu, ptrue)>; + def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru), (HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>; def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>; @@ -154,6 +162,11 @@ def IsNPow2_64H: PatLeaf<(i64 imm), [{ return isPowerOf2_64(NV) && Log2_64(NV) >= 32; }]>; +class IsULE: PatLeaf<(i32 imm), + "uint64_t V = N->getZExtValue();" # + "return isUInt<" # Width # ">(V) && V <= " # Arg # ";" +>; + class IsUGT: PatLeaf<(i32 imm), "uint64_t V = N->getZExtValue();" # "return isUInt<" # Width # ">(V) && V > " # Arg # ";" @@ -320,6 +333,24 @@ multiclass SelMinMax_pats; } +multiclass MinMax_pats { + def: Pat<(Sel (CmpType (CmpOp CmpPred:$Vs, CmpPred:$Vt)), + CmpPred:$Vt, CmpPred:$Vs), + (PickT CmpPred:$Vs, CmpPred:$Vt)>; + def: Pat<(Sel (CmpType (CmpOp CmpPred:$Vs, CmpPred:$Vt)), + CmpPred:$Vs, CmpPred:$Vt), + (PickS CmpPred:$Vs, CmpPred:$Vt)>; +} + +// Bitcasts between same-size vector types are no-ops, except for the +// actual type change. +multiclass NopCast_pat { + def: Pat<(Ty1 (bitconvert (Ty2 RC:$Val))), (Ty1 RC:$Val)>; + def: Pat<(Ty2 (bitconvert (Ty1 RC:$Val))), (Ty2 RC:$Val)>; +} + // Frags for commonly used SDNodes. def Add: pf2; def And: pf2; def Sra: pf2; @@ -403,17 +434,18 @@ def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>; def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>; def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>; -multiclass Cast_pat { - def: Pat<(Tb (bitconvert (Ta RC:$Rs))), (Tb RC:$Rs)>; - def: Pat<(Ta (bitconvert (Tb RC:$Rs))), (Ta RC:$Rs)>; -} - -// Bit convert vector types to integers. -defm: Cast_pat; -defm: Cast_pat; -defm: Cast_pat; -defm: Cast_pat; -defm: Cast_pat; +// Bit convert 32- and 64-bit types. +// All of these are bitcastable to one another: i32, v2i16, v4i8. +defm: NopCast_pat; +defm: NopCast_pat; +defm: NopCast_pat; +// All of these are bitcastable to one another: i64, v2i32, v4i16, v8i8. +defm: NopCast_pat; +defm: NopCast_pat; +defm: NopCast_pat; +defm: NopCast_pat; +defm: NopCast_pat; +defm: NopCast_pat; // --(3) Extend/truncate ------------------------------------------------- @@ -497,7 +529,9 @@ def: Pat<(v2i16 (trunc V2I32:$Rs)), // def: Pat<(not I1:$Ps), (C2_not I1:$Ps)>; -def: Pat<(not V8I1:$Ps), (C2_not V8I1:$Ps)>; +def: Pat<(pnot V2I1:$Ps), (C2_not V2I1:$Ps)>; +def: Pat<(pnot V4I1:$Ps), (C2_not V4I1:$Ps)>; +def: Pat<(pnot V8I1:$Ps), (C2_not V8I1:$Ps)>; def: Pat<(add I1:$Ps, -1), (C2_not I1:$Ps)>; multiclass BoolOpR_RR_pat { @@ -816,14 +850,6 @@ def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs), def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I), (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>; -def: Pat<(select I1:$Pu, V4I8:$Rs, V4I8:$Rt), - (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>; -def: Pat<(select I1:$Pu, V2I16:$Rs, V2I16:$Rt), - (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>; -def: Pat<(select I1:$Pu, V2I32:$Rs, V2I32:$Rt), - (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), - (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; - def: Pat<(vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt), (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>; def: Pat<(vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt), @@ -831,6 +857,14 @@ def: Pat<(vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt), def: Pat<(vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt), (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; +def: Pat<(vselect (pnot V8I1:$Pu), V8I8:$Rs, V8I8:$Rt), + (C2_vmux V8I1:$Pu, V8I8:$Rt, V8I8:$Rs)>; +def: Pat<(vselect (pnot V4I1:$Pu), V4I16:$Rs, V4I16:$Rt), + (C2_vmux V4I1:$Pu, V4I16:$Rt, V4I16:$Rs)>; +def: Pat<(vselect (pnot V2I1:$Pu), V2I32:$Rs, V2I32:$Rt), + (C2_vmux V2I1:$Pu, V2I32:$Rt, V2I32:$Rs)>; + + // From LegalizeDAG.cpp: (Pu ? Pv : Pw) <=> (Pu & Pv) | (!Pu & Pw). def: Pat<(select I1:$Pu, I1:$Pv, I1:$Pw), (C2_or (C2_and I1:$Pu, I1:$Pv), @@ -863,32 +897,44 @@ let AddedComplexity = 200 in { } let AddedComplexity = 200 in { - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; } let AddedComplexity = 100 in { - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; - defm: SelMinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; } +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; +defm: MinMax_pats; // --(7) Insert/extract -------------------------------------------------- // @@ -1639,19 +1685,19 @@ def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), // // Count leading zeros. -def: Pat<(ctlz I32:$Rs), (S2_cl0 I32:$Rs)>; +def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>; def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>; // Count trailing zeros. -def: Pat<(cttz I32:$Rs), (S2_ct0 I32:$Rs)>; +def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>; def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>; // Count leading ones. -def: Pat<(ctlz (not I32:$Rs)), (S2_cl1 I32:$Rs)>; +def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>; def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>; // Count trailing ones. -def: Pat<(cttz (not I32:$Rs)), (S2_ct1 I32:$Rs)>; +def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>; def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>; // Define leading/trailing patterns that require zero-extensions to 64 bits. @@ -1706,6 +1752,7 @@ let AddedComplexity = 20 in { // Complexity greater than and/or/xor (i32 (LoReg $Rss)))>; } + let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. def: Pat<(i1 (setne (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)), (S2_tstbit_i IntRegs:$Rs, imm:$u5)>; @@ -1717,6 +1764,20 @@ let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>; } +def: Pat<(and (srl I32:$Rs, u5_0ImmPred:$u5), 1), + (I1toI32 (S2_tstbit_i I32:$Rs, imm:$u5))>; +def: Pat<(and (srl I64:$Rss, IsULE<32,31>:$u6), 1), + (ToZext64 (I1toI32 (S2_tstbit_i (LoReg $Rss), imm:$u6)))>; +def: Pat<(and (srl I64:$Rss, IsUGT<32,31>:$u6), 1), + (ToZext64 (I1toI32 (S2_tstbit_i (HiReg $Rss), (UDEC32 $u6))))>; + +def: Pat<(and (not (srl I32:$Rs, u5_0ImmPred:$u5)), 1), + (I1toI32 (S4_ntstbit_i I32:$Rs, imm:$u5))>; +def: Pat<(and (not (srl I64:$Rss, IsULE<32,31>:$u6)), 1), + (ToZext64 (I1toI32 (S4_ntstbit_i (LoReg $Rss), imm:$u6)))>; +def: Pat<(and (not (srl I64:$Rss, IsUGT<32,31>:$u6)), 1), + (ToZext64 (I1toI32 (S4_ntstbit_i (HiReg $Rss), (UDEC32 $u6))))>; + let AddedComplexity = 20 in { // Complexity greater than compare reg-imm. def: Pat<(i1 (seteq (and I32:$Rs, u6_0ImmPred:$u6), 0)), (C2_bitsclri IntRegs:$Rs, imm:$u6)>; @@ -1737,23 +1798,28 @@ def: Pat<(HexagonTSTBIT I32:$Rs, u5_0ImmPred:$u5), def: Pat<(HexagonTSTBIT I32:$Rs, I32:$Rt), (S2_tstbit_r I32:$Rs, I32:$Rt)>; -let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. - def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)), - (S4_ntstbit_i I32:$Rs, imm:$u5)>; - def: Pat<(i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)), - (S4_ntstbit_r I32:$Rs, I32:$Rt)>; -} - // Add extra complexity to prefer these instructions over bitsset/bitsclr. // The reason is that tstbit/ntstbit can be folded into a compound instruction: // if ([!]tstbit(...)) jump ... -let AddedComplexity = 100 in -def: Pat<(i1 (setne (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))), - (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>; +let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. + def: Pat<(i1 (seteq (and I32:$Rs, IsPow2_32:$u5), 0)), + (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>; + def: Pat<(i1 (setne (and I32:$Rs, IsPow2_32:$u5), 0)), + (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>; + def: Pat<(i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)), + (S4_ntstbit_r I32:$Rs, I32:$Rt)>; + def: Pat<(i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)), + (S2_tstbit_r I32:$Rs, I32:$Rt)>; +} -let AddedComplexity = 100 in -def: Pat<(i1 (seteq (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))), - (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>; +def: Pat<(i1 (seteq (and I64:$Rs, IsPow2_64L:$u6), 0)), + (S4_ntstbit_i (LoReg $Rs), (Log2_64 $u6))>; +def: Pat<(i1 (seteq (and I64:$Rs, IsPow2_64H:$u6), 0)), + (S4_ntstbit_i (HiReg $Rs), (UDEC32 (i32 (Log2_64 $u6))))>; +def: Pat<(i1 (setne (and I64:$Rs, IsPow2_64L:$u6), 0)), + (S2_tstbit_i (LoReg $Rs), (Log2_32 imm:$u6))>; +def: Pat<(i1 (setne (and I64:$Rs, IsPow2_64H:$u6), 0)), + (S2_tstbit_i (HiReg $Rs), (UDEC32 (i32 (Log2_32 imm:$u6))))>; // Do not increase complexity of these patterns. In the DAG, "cmp i8" may be // represented as a compare against "value & 0xFF", which is an exact match @@ -1773,10 +1839,18 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), let AddedComplexity = 100 in { // Avoid A4_rcmp[n]eqi in these cases: - def: Pat<(i32 (zext (i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)))), - (I1toI32 (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt))>; def: Pat<(i32 (zext (i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)))), (I1toI32 (S4_ntstbit_r IntRegs:$Rs, IntRegs:$Rt))>; + def: Pat<(i32 (zext (i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)))), + (I1toI32 (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt))>; + def: Pat<(i32 (zext (i1 (seteq (and I32:$Rs, IsPow2_32:$u5), 0)))), + (I1toI32 (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5)))>; + def: Pat<(i32 (zext (i1 (setne (and I32:$Rs, IsPow2_32:$u5), 0)))), + (I1toI32 (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5)))>; + def: Pat<(i32 (zext (i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)))), + (I1toI32 (S4_ntstbit_r I32:$Rs, I32:$Rt))>; + def: Pat<(i32 (zext (i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)))), + (I1toI32 (S2_tstbit_r I32:$Rs, I32:$Rt))>; } // --(11) PIC ------------------------------------------------------------ diff --git a/lib/Target/Hexagon/HexagonPatternsHVX.td b/lib/Target/Hexagon/HexagonPatternsHVX.td index a4cfca9ac7d..078a7135c55 100644 --- a/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -1,5 +1,3 @@ -def SDTVecLeaf: - SDTypeProfile<1, 0, [SDTCisVec<0>]>; def SDTVecBinOp: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>; @@ -162,23 +160,14 @@ let Predicates = [UseHVX] in { // Bitcasts between same-size vector types are no-ops, except for the // actual type change. -class Bitcast - : Pat<(ResTy (bitconvert (InpTy RC:$Val))), (ResTy RC:$Val)>; - let Predicates = [UseHVX] in { - def: Bitcast; - def: Bitcast; - def: Bitcast; - def: Bitcast; - def: Bitcast; - def: Bitcast; + defm: NopCast_pat; + defm: NopCast_pat; + defm: NopCast_pat; - def: Bitcast; - def: Bitcast; - def: Bitcast; - def: Bitcast; - def: Bitcast; - def: Bitcast; + defm: NopCast_pat; + defm: NopCast_pat; + defm: NopCast_pat; } let Predicates = [UseHVX] in { @@ -259,6 +248,21 @@ class Vneg1 class Vnot : PatFrag<(ops node:$Vs), (xor $Vs, Vneg1)>; +let Predicates = [UseHVX] in { + let AddedComplexity = 220 in { + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + } +} + let Predicates = [UseHVX] in { let AddedComplexity = 200 in { def: Pat<(Vnot HVI8:$Vs), (V6_vnot HvxVR:$Vs)>; diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp index 8f761d2d480..0ccfe64ad1e 100644 --- a/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/lib/Target/Hexagon/HexagonPeephole.cpp @@ -136,11 +136,11 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { assert(MI.getNumOperands() == 2); MachineOperand &Dst = MI.getOperand(0); MachineOperand &Src = MI.getOperand(1); - unsigned DstReg = Dst.getReg(); - unsigned SrcReg = Src.getReg(); + Register DstReg = Dst.getReg(); + Register SrcReg = Src.getReg(); // Just handle virtual registers. - if (TargetRegisterInfo::isVirtualRegister(DstReg) && - TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (Register::isVirtualRegister(DstReg) && + Register::isVirtualRegister(SrcReg)) { // Map the following: // %170 = SXTW %166 // PeepholeMap[170] = %166 @@ -157,8 +157,8 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { MachineOperand &Src2 = MI.getOperand(2); if (Src1.getImm() != 0) continue; - unsigned DstReg = Dst.getReg(); - unsigned SrcReg = Src2.getReg(); + Register DstReg = Dst.getReg(); + Register SrcReg = Src2.getReg(); PeepholeMap[DstReg] = SrcReg; } @@ -174,8 +174,8 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { MachineOperand &Src2 = MI.getOperand(2); if (Src2.getImm() != 32) continue; - unsigned DstReg = Dst.getReg(); - unsigned SrcReg = Src1.getReg(); + Register DstReg = Dst.getReg(); + Register SrcReg = Src1.getReg(); PeepholeDoubleRegsMap[DstReg] = std::make_pair(*&SrcReg, Hexagon::isub_hi); } @@ -185,11 +185,11 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { assert(MI.getNumOperands() == 2); MachineOperand &Dst = MI.getOperand(0); MachineOperand &Src = MI.getOperand(1); - unsigned DstReg = Dst.getReg(); - unsigned SrcReg = Src.getReg(); + Register DstReg = Dst.getReg(); + Register SrcReg = Src.getReg(); // Just handle virtual registers. - if (TargetRegisterInfo::isVirtualRegister(DstReg) && - TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (Register::isVirtualRegister(DstReg) && + Register::isVirtualRegister(SrcReg)) { // Map the following: // %170 = NOT_xx %166 // PeepholeMap[170] = %166 @@ -208,10 +208,10 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { if (Src.getSubReg() != Hexagon::isub_lo) continue; - unsigned DstReg = Dst.getReg(); - unsigned SrcReg = Src.getReg(); - if (TargetRegisterInfo::isVirtualRegister(DstReg) && - TargetRegisterInfo::isVirtualRegister(SrcReg)) { + Register DstReg = Dst.getReg(); + Register SrcReg = Src.getReg(); + if (Register::isVirtualRegister(DstReg) && + Register::isVirtualRegister(SrcReg)) { // Try to find in the map. if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) { // Change the 1st operand. @@ -237,12 +237,12 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { bool Done = false; if (QII->isPredicated(MI)) { MachineOperand &Op0 = MI.getOperand(0); - unsigned Reg0 = Op0.getReg(); + Register Reg0 = Op0.getReg(); const TargetRegisterClass *RC0 = MRI->getRegClass(Reg0); if (RC0->getID() == Hexagon::PredRegsRegClassID) { // Handle instructions that have a prediate register in op0 // (most cases of predicable instructions). - if (TargetRegisterInfo::isVirtualRegister(Reg0)) { + if (Register::isVirtualRegister(Reg0)) { // Try to find in the map. if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) { // Change the 1st operand and, flip the opcode. @@ -275,7 +275,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { break; } if (NewOp) { - unsigned PSrc = MI.getOperand(PR).getReg(); + Register PSrc = MI.getOperand(PR).getReg(); if (unsigned POrig = PeepholeMap.lookup(PSrc)) { BuildMI(*MBB, MI.getIterator(), MI.getDebugLoc(), QII->get(NewOp), MI.getOperand(0).getReg()) diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 4f5f750e584..b7171fb1427 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -217,7 +217,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // If the offset is not valid, calculate the address in a temporary // register and use it with offset 0. auto &MRI = MF.getRegInfo(); - unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); const DebugLoc &DL = MI.getDebugLoc(); BuildMI(MB, II, DL, HII.get(Hexagon::A2_addi), TmpR) .addReg(BP) @@ -249,8 +249,8 @@ bool HexagonRegisterInfo::shouldCoalesce(MachineInstr *MI, if (!SmallSrc && !SmallDst) return true; - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned SrcReg = MI->getOperand(1).getReg(); + Register DstReg = MI->getOperand(0).getReg(); + Register SrcReg = MI->getOperand(1).getReg(); const SlotIndexes &Indexes = *LIS.getSlotIndexes(); auto HasCall = [&Indexes] (const LiveInterval::Segment &S) { for (SlotIndex I = S.start.getBaseIndex(), E = S.end.getBaseIndex(); diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp index bd4254aea27..f9fb14c190f 100644 --- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp +++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -76,18 +76,18 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { unsigned Opc = MI.getOpcode(); if (Opc == Hexagon::CONST32) { - unsigned DestReg = MI.getOperand(0).getReg(); + Register DestReg = MI.getOperand(0).getReg(); uint64_t ImmValue = MI.getOperand(1).getImm(); const DebugLoc &DL = MI.getDebugLoc(); BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), DestReg) .addImm(ImmValue); B.erase(&MI); } else if (Opc == Hexagon::CONST64) { - unsigned DestReg = MI.getOperand(0).getReg(); + Register DestReg = MI.getOperand(0).getReg(); int64_t ImmValue = MI.getOperand(1).getImm(); const DebugLoc &DL = MI.getDebugLoc(); - unsigned DestLo = TRI->getSubReg(DestReg, Hexagon::isub_lo); - unsigned DestHi = TRI->getSubReg(DestReg, Hexagon::isub_hi); + Register DestLo = TRI->getSubReg(DestReg, Hexagon::isub_lo); + Register DestHi = TRI->getSubReg(DestReg, Hexagon::isub_hi); int32_t LowWord = (ImmValue & 0xFFFFFFFF); int32_t HighWord = (ImmValue >> 32) & 0xFFFFFFFF; diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp index 013eede2d41..55f31c62885 100644 --- a/lib/Target/Hexagon/HexagonSplitDouble.cpp +++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp @@ -210,8 +210,8 @@ bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const { for (auto &Op : MI->operands()) { if (!Op.isReg()) continue; - unsigned R = Op.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = Op.getReg(); + if (!Register::isVirtualRegister(R)) return true; } return false; @@ -224,14 +224,14 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) { unsigned NumRegs = MRI->getNumVirtRegs(); BitVector DoubleRegs(NumRegs); for (unsigned i = 0; i < NumRegs; ++i) { - unsigned R = TargetRegisterInfo::index2VirtReg(i); + unsigned R = Register::index2VirtReg(i); if (MRI->getRegClass(R) == DoubleRC) DoubleRegs.set(i); } BitVector FixedRegs(NumRegs); for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) { - unsigned R = TargetRegisterInfo::index2VirtReg(x); + unsigned R = Register::index2VirtReg(x); MachineInstr *DefI = MRI->getVRegDef(R); // In some cases a register may exist, but never be defined or used. // It should never appear anywhere, but mark it as "fixed", just to be @@ -244,7 +244,7 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) { for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) { if (FixedRegs[x]) continue; - unsigned R = TargetRegisterInfo::index2VirtReg(x); + unsigned R = Register::index2VirtReg(x); LLVM_DEBUG(dbgs() << printReg(R, TRI) << " ~~"); USet &Asc = AssocMap[R]; for (auto U = MRI->use_nodbg_begin(R), Z = MRI->use_nodbg_end(); @@ -258,14 +258,14 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) { // Skip non-registers or registers with subregisters. if (&MO == &Op || !MO.isReg() || MO.getSubReg()) continue; - unsigned T = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(T)) { + Register T = MO.getReg(); + if (!Register::isVirtualRegister(T)) { FixedRegs.set(x); continue; } if (MRI->getRegClass(T) != DoubleRC) continue; - unsigned u = TargetRegisterInfo::virtReg2Index(T); + unsigned u = Register::virtReg2Index(T); if (FixedRegs[u]) continue; LLVM_DEBUG(dbgs() << ' ' << printReg(T, TRI)); @@ -281,7 +281,7 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) { unsigned NextP = 1; USet Visited; for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) { - unsigned R = TargetRegisterInfo::index2VirtReg(x); + unsigned R = Register::index2VirtReg(x); if (Visited.count(R)) continue; // Create a new partition for R. @@ -372,8 +372,8 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const { case Hexagon::A2_andp: case Hexagon::A2_orp: case Hexagon::A2_xorp: { - unsigned Rs = MI->getOperand(1).getReg(); - unsigned Rt = MI->getOperand(2).getReg(); + Register Rs = MI->getOperand(1).getReg(); + Register Rt = MI->getOperand(2).getReg(); return profit(Rs) + profit(Rt); } @@ -400,7 +400,7 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const { } int32_t HexagonSplitDoubleRegs::profit(unsigned Reg) const { - assert(TargetRegisterInfo::isVirtualRegister(Reg)); + assert(Register::isVirtualRegister(Reg)); const MachineInstr *DefI = MRI->getVRegDef(Reg); switch (DefI->getOpcode()) { @@ -499,7 +499,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L, return; assert(Cond[1].isReg() && "Unexpected Cond vector from analyzeBranch"); // Expect a predicate register. - unsigned PR = Cond[1].getReg(); + Register PR = Cond[1].getReg(); assert(MRI->getRegClass(PR) == &Hexagon::PredRegsRegClass); // Get the registers on which the loop controlling compare instruction @@ -535,7 +535,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L, if (!MI.isPHI()) break; const MachineOperand &MD = MI.getOperand(0); - unsigned R = MD.getReg(); + Register R = MD.getReg(); if (MRI->getRegClass(R) == DoubleRC) DP.push_back(R); } @@ -551,7 +551,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L, // Get the output from the add. If it is one of the inputs to the // loop-controlling compare instruction, then R is likely an induc- // tion register. - unsigned T = UseI->getOperand(0).getReg(); + Register T = UseI->getOperand(0).getReg(); if (T == CmpR1 || T == CmpR2) return false; } @@ -603,9 +603,9 @@ void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI, continue; } // For register operands, set the subregister. - unsigned R = Op.getReg(); + Register R = Op.getReg(); unsigned SR = Op.getSubReg(); - bool isVirtReg = TargetRegisterInfo::isVirtualRegister(R); + bool isVirtReg = Register::isVirtualRegister(R); bool isKill = Op.isKill(); if (isVirtReg && MRI->getRegClass(R) == DoubleRC) { isKill = false; @@ -674,7 +674,7 @@ void HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI, : MI->getOperand(2).getImm(); MachineOperand &UpdOp = Load ? MI->getOperand(1) : MI->getOperand(0); const TargetRegisterClass *RC = MRI->getRegClass(UpdOp.getReg()); - unsigned NewR = MRI->createVirtualRegister(RC); + Register NewR = MRI->createVirtualRegister(RC); assert(!UpdOp.getSubReg() && "Def operand with subreg"); BuildMI(B, MI, DL, TII->get(Hexagon::A2_addi), NewR) .addReg(AdrOp.getReg(), RSA) @@ -789,8 +789,8 @@ void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI, UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); assert(F != PairMap.end()); const UUPair &P = F->second; - unsigned LoR = P.first; - unsigned HiR = P.second; + Register LoR = P.first; + Register HiR = P.second; unsigned Opc = MI->getOpcode(); bool Right = (Opc == S2_lsr_i_p || Opc == S2_asr_i_p); @@ -813,7 +813,7 @@ void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI, .addReg(Op1.getReg(), RS, HiSR); } else if (S < 32) { const TargetRegisterClass *IntRC = &IntRegsRegClass; - unsigned TmpR = MRI->createVirtualRegister(IntRC); + Register TmpR = MRI->createVirtualRegister(IntRC); // Expansion: // Shift left: DR = shl R, #s // LoR = shl R.lo, #s @@ -953,12 +953,12 @@ void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI, .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR) .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR) .addImm(S); - unsigned TmpR1 = MRI->createVirtualRegister(IntRC); + Register TmpR1 = MRI->createVirtualRegister(IntRC); BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR1) .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR) .addImm(S) .addImm(32-S); - unsigned TmpR2 = MRI->createVirtualRegister(IntRC); + Register TmpR2 = MRI->createVirtualRegister(IntRC); BuildMI(B, MI, DL, TII->get(A2_or), TmpR2) .addReg(Op1.getReg(), RS1, HiSR) .addReg(TmpR1); @@ -1002,7 +1002,7 @@ bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI, switch (Opc) { case TargetOpcode::PHI: case TargetOpcode::COPY: { - unsigned DstR = MI->getOperand(0).getReg(); + Register DstR = MI->getOperand(0).getReg(); if (MRI->getRegClass(DstR) == DoubleRC) { createHalfInstr(Opc, MI, PairMap, isub_lo); createHalfInstr(Opc, MI, PairMap, isub_hi); @@ -1079,7 +1079,7 @@ void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI, for (auto &Op : MI->operands()) { if (!Op.isReg() || !Op.isUse() || !Op.getSubReg()) continue; - unsigned R = Op.getReg(); + Register R = Op.getReg(); UUPairMap::const_iterator F = PairMap.find(R); if (F == PairMap.end()) continue; @@ -1104,8 +1104,8 @@ void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI, for (auto &Op : MI->operands()) { if (!Op.isReg() || !Op.isUse()) continue; - unsigned R = Op.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(R)) + Register R = Op.getReg(); + if (!Register::isVirtualRegister(R)) continue; if (MRI->getRegClass(R) != DoubleRC || Op.getSubReg()) continue; @@ -1113,7 +1113,7 @@ void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI, if (F == PairMap.end()) continue; const UUPair &Pr = F->second; - unsigned NewDR = MRI->createVirtualRegister(DoubleRC); + Register NewDR = MRI->createVirtualRegister(DoubleRC); BuildMI(B, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), NewDR) .addReg(Pr.first) .addImm(Hexagon::isub_lo) @@ -1145,8 +1145,8 @@ bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) { U != W; ++U) SplitIns.insert(U->getParent()); - unsigned LoR = MRI->createVirtualRegister(IntRC); - unsigned HiR = MRI->createVirtualRegister(IntRC); + Register LoR = MRI->createVirtualRegister(IntRC); + Register HiR = MRI->createVirtualRegister(IntRC); LLVM_DEBUG(dbgs() << "Created mapping: " << printReg(DR, TRI) << " -> " << printReg(HiR, TRI) << ':' << printReg(LoR, TRI) << '\n'); diff --git a/lib/Target/Hexagon/HexagonStoreWidening.cpp b/lib/Target/Hexagon/HexagonStoreWidening.cpp index b8b61517ff9..27fefa5f5e2 100644 --- a/lib/Target/Hexagon/HexagonStoreWidening.cpp +++ b/lib/Target/Hexagon/HexagonStoreWidening.cpp @@ -441,7 +441,7 @@ bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF); - unsigned VReg = MF->getRegInfo().createVirtualRegister(RC); + Register VReg = MF->getRegInfo().createVirtualRegister(RC); MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg) .addImm(int(Acc)); NG.push_back(TfrI); diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index 7ec63a642b0..6c706fea096 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -119,7 +119,7 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { FeatureBitset Features = getFeatureBits(); if (HexagonDisableDuplex) - setFeatureBits(Features.set(Hexagon::FeatureDuplex, false)); + setFeatureBits(Features.reset(Hexagon::FeatureDuplex)); setFeatureBits(Hexagon_MC::completeHVXFeatures(Features)); return *this; @@ -230,7 +230,7 @@ void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAGInstrs) { else if (SchedRetvalOptimization) { const MachineInstr *MI = DAG->SUnits[su].getInstr(); if (MI->isCopy() && - TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) { + Register::isPhysicalRegister(MI->getOperand(1).getReg())) { // %vregX = COPY %r0 VRegHoldingReg[MI->getOperand(0).getReg()] = MI->getOperand(1).getReg(); LastVRegUse.erase(MI->getOperand(1).getReg()); @@ -243,8 +243,7 @@ void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAGInstrs) { VRegHoldingReg.count(MO.getReg())) { // LastVRegUse[VRegHoldingReg[MO.getReg()]] = &DAG->SUnits[su]; - } else if (MO.isDef() && - TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + } else if (MO.isDef() && Register::isPhysicalRegister(MO.getReg())) { for (MCRegAliasIterator AI(MO.getReg(), &TRI, true); AI.isValid(); ++AI) { if (LastVRegUse.count(*AI) && @@ -345,7 +344,7 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, // If it's a REG_SEQUENCE/COPY, use its destination instruction to determine // the correct latency. if ((DstInst->isRegSequence() || DstInst->isCopy()) && Dst->NumSuccs == 1) { - unsigned DReg = DstInst->getOperand(0).getReg(); + Register DReg = DstInst->getOperand(0).getReg(); MachineInstr *DDst = Dst->Succs[0].getSUnit()->getInstr(); unsigned UseIdx = -1; for (unsigned OpNum = 0; OpNum < DDst->getNumOperands(); OpNum++) { @@ -375,15 +374,15 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, void HexagonSubtarget::getPostRAMutations( std::vector> &Mutations) const { - Mutations.push_back(llvm::make_unique()); - Mutations.push_back(llvm::make_unique()); - Mutations.push_back(llvm::make_unique()); + Mutations.push_back(std::make_unique()); + Mutations.push_back(std::make_unique()); + Mutations.push_back(std::make_unique()); } void HexagonSubtarget::getSMSMutations( std::vector> &Mutations) const { - Mutations.push_back(llvm::make_unique()); - Mutations.push_back(llvm::make_unique()); + Mutations.push_back(std::make_unique()); + Mutations.push_back(std::make_unique()); } // Pin the vtable to this file. diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h index 007423ef190..31157a0065d 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.h +++ b/lib/Target/Hexagon/HexagonSubtarget.h @@ -228,7 +228,7 @@ public: } bool isHVXVectorType(MVT VecTy, bool IncludeBool = false) const { - if (!VecTy.isVector() || !useHVXOps()) + if (!VecTy.isVector() || !useHVXOps() || VecTy.isScalableVector()) return false; MVT ElemTy = VecTy.getVectorElementType(); if (!IncludeBool && ElemTy == MVT::i1) diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 80b8480448f..d709a82be66 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -111,10 +111,10 @@ int HexagonTargetMachineModule = 0; static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) { ScheduleDAGMILive *DAG = - new VLIWMachineScheduler(C, make_unique()); - DAG->addMutation(make_unique()); - DAG->addMutation(make_unique()); - DAG->addMutation(make_unique()); + new VLIWMachineScheduler(C, std::make_unique()); + DAG->addMutation(std::make_unique()); + DAG->addMutation(std::make_unique()); + DAG->addMutation(std::make_unique()); DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); return DAG; } @@ -218,7 +218,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, TT, CPU, FS, Options, getEffectiveRelocModel(RM), getEffectiveCodeModel(CM, CodeModel::Small), (HexagonNoOpt ? CodeGenOpt::None : OL)), - TLOF(make_unique()) { + TLOF(std::make_unique()) { initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry()); initAsmInfo(); } @@ -244,7 +244,7 @@ HexagonTargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique(TargetTriple, CPU, FS, *this); + I = std::make_unique(TargetTriple, CPU, FS, *this); } return I.get(); } diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index 38062e8e922..ddbc5543348 100644 --- a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -45,6 +45,8 @@ bool HexagonTTIImpl::useHVX() const { bool HexagonTTIImpl::isTypeForHVX(Type *VecTy) const { assert(VecTy->isVectorTy()); + if (cast(VecTy)->isScalable()) + return false; // Avoid types like <2 x i32*>. if (!cast(VecTy)->getElementType()->isIntegerTy()) return false; diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/lib/Target/Hexagon/HexagonTargetTransformInfo.h index 27e8fc01900..12ede503af8 100644 --- a/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -68,8 +68,8 @@ public: bool shouldFavorPostInc() const; // L1 cache prefetch. - unsigned getPrefetchDistance() const; - unsigned getCacheLineSize() const; + unsigned getPrefetchDistance() const override; + unsigned getCacheLineSize() const override; /// @} diff --git a/lib/Target/Hexagon/HexagonVExtract.cpp b/lib/Target/Hexagon/HexagonVExtract.cpp index a9692f42e46..0c0266a6839 100644 --- a/lib/Target/Hexagon/HexagonVExtract.cpp +++ b/lib/Target/Hexagon/HexagonVExtract.cpp @@ -67,9 +67,9 @@ unsigned HexagonVExtract::genElemLoad(MachineInstr *ExtI, unsigned BaseR, MachineRegisterInfo &MRI) { MachineBasicBlock &ExtB = *ExtI->getParent(); DebugLoc DL = ExtI->getDebugLoc(); - unsigned ElemR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register ElemR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); - unsigned ExtIdxR = ExtI->getOperand(2).getReg(); + Register ExtIdxR = ExtI->getOperand(2).getReg(); unsigned ExtIdxS = ExtI->getOperand(2).getSubReg(); // Simplified check for a compile-time constant value of ExtIdxR. @@ -86,7 +86,7 @@ unsigned HexagonVExtract::genElemLoad(MachineInstr *ExtI, unsigned BaseR, } } - unsigned IdxR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register IdxR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::A2_andir), IdxR) .add(ExtI->getOperand(2)) .addImm(-4); @@ -111,7 +111,7 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) { unsigned Opc = MI.getOpcode(); if (Opc != Hexagon::V6_extractw) continue; - unsigned VecR = MI.getOperand(1).getReg(); + Register VecR = MI.getOperand(1).getReg(); VExtractMap[VecR].push_back(&MI); } } @@ -144,13 +144,13 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock &ExtB = *ExtI->getParent(); DebugLoc DL = ExtI->getDebugLoc(); - unsigned BaseR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + Register BaseR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::PS_fi), BaseR) .addFrameIndex(FI) .addImm(SR == 0 ? 0 : VecSize/2); unsigned ElemR = genElemLoad(ExtI, BaseR, MRI); - unsigned ExtR = ExtI->getOperand(0).getReg(); + Register ExtR = ExtI->getOperand(0).getReg(); MRI.replaceRegWith(ExtR, ElemR); ExtB.erase(ExtI); Changed = true; diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 3619e4c239d..fab5edefb55 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -57,9 +58,9 @@ static cl::opt DisablePacketizer("disable-packetizer", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Hexagon packetizer pass")); -cl::opt Slot1Store("slot1-store-slot0-load", cl::Hidden, - cl::ZeroOrMore, cl::init(true), - cl::desc("Allow slot1 store and slot0 load")); +static cl::opt Slot1Store("slot1-store-slot0-load", cl::Hidden, + cl::ZeroOrMore, cl::init(true), + cl::desc("Allow slot1 store and slot0 load")); static cl::opt PacketizeVolatiles("hexagon-packetize-volatiles", cl::ZeroOrMore, cl::Hidden, cl::init(true), @@ -129,16 +130,16 @@ INITIALIZE_PASS_END(HexagonPacketizer, "hexagon-packetizer", "Hexagon Packetizer", false, false) HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF, - MachineLoopInfo &MLI, AliasAnalysis *AA, + MachineLoopInfo &MLI, AAResults *AA, const MachineBranchProbabilityInfo *MBPI, bool Minimal) : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI), Minimal(Minimal) { HII = MF.getSubtarget().getInstrInfo(); HRI = MF.getSubtarget().getRegisterInfo(); - addMutation(llvm::make_unique()); - addMutation(llvm::make_unique()); - addMutation(llvm::make_unique()); + addMutation(std::make_unique()); + addMutation(std::make_unique()); + addMutation(std::make_unique()); } // Check if FirstI modifies a register that SecondI reads. @@ -148,7 +149,7 @@ static bool hasWriteToReadDep(const MachineInstr &FirstI, for (auto &MO : FirstI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned R = MO.getReg(); + Register R = MO.getReg(); if (SecondI.readsRegister(R, TRI)) return true; } @@ -422,7 +423,7 @@ bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr &MI, dbgs() << "Checking CUR against "; MJ.dump(); }); - unsigned DestReg = MI.getOperand(0).getReg(); + Register DestReg = MI.getOperand(0).getReg(); bool FoundMatch = false; for (auto &MO : MJ.operands()) if (MO.isReg() && MO.getReg() == DestReg) @@ -515,7 +516,7 @@ bool HexagonPacketizerList::updateOffset(SUnit *SUI, SUnit *SUJ) { unsigned BPJ, OPJ; if (!HII->getBaseAndOffsetPosition(MJ, BPJ, OPJ)) return false; - unsigned Reg = MI.getOperand(BPI).getReg(); + Register Reg = MI.getOperand(BPI).getReg(); if (Reg != MJ.getOperand(BPJ).getReg()) return false; // Make sure that the dependences do not restrict adding MI to the packet. @@ -788,7 +789,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI, return false; if (!MO.isReg() || !MO.isDef() || !MO.isImplicit()) continue; - unsigned R = MO.getReg(); + Register R = MO.getReg(); if (R == DepReg || HRI->isSuperRegister(DepReg, R)) return false; } @@ -1208,7 +1209,7 @@ bool HexagonPacketizerList::hasDeadDependence(const MachineInstr &I, for (auto &MO : J.operands()) { if (!MO.isReg() || !MO.isDef() || !MO.isDead()) continue; - unsigned R = MO.getReg(); + Register R = MO.getReg(); if (R != Hexagon::USR_OVF && DeadDefs[R]) return true; } @@ -1585,7 +1586,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // subset of the volatile register set. for (const MachineOperand &Op : I.operands()) { if (Op.isReg() && Op.isDef()) { - unsigned R = Op.getReg(); + Register R = Op.getReg(); if (!J.readsRegister(R, HRI) && !J.modifiesRegister(R, HRI)) continue; } else if (!Op.isRegMask()) { @@ -1763,6 +1764,16 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) { void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB, MachineBasicBlock::iterator EndMI) { // Replace VLIWPacketizerList::endPacket(MBB, EndMI). + LLVM_DEBUG({ + if (!CurrentPacketMIs.empty()) { + dbgs() << "Finalizing packet:\n"; + unsigned Idx = 0; + for (MachineInstr *MI : CurrentPacketMIs) { + unsigned R = ResourceTracker->getUsedResources(Idx++); + dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI; + } + } + }); bool memShufDisabled = getmemShufDisabled(); if (memShufDisabled && !foundLSInPacket()) { diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/lib/Target/Hexagon/HexagonVLIWPacketizer.h index daa86b6f539..943b9ac7ecc 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.h +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.h @@ -69,8 +69,7 @@ private: public: HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, - AliasAnalysis *AA, - const MachineBranchProbabilityInfo *MBPI, + AAResults *AA, const MachineBranchProbabilityInfo *MBPI, bool Minimal); // initPacketizerState - initialize some internal flags. diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 7c0770926ab..75cb398d409 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -201,9 +201,7 @@ public: bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target) override { - MCFixupKind Kind = Fixup.getKind(); - - switch((unsigned)Kind) { + switch(Fixup.getTargetKind()) { default: llvm_unreachable("Unknown Fixup Kind!"); @@ -583,7 +581,7 @@ public: return false; // If we cannot resolve the fixup value, it requires relaxation. if (!Resolved) { - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { case fixup_Hexagon_B22_PCREL: // GetFixupCount assumes B22 won't relax LLVM_FALLTHROUGH; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp index f678bf49322..cdbeae38b3a 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp @@ -44,7 +44,7 @@ unsigned HexagonELFObjectWriter::getRelocType(MCContext &Ctx, MCFixup const &Fixup, bool IsPCRel) const { MCSymbolRefExpr::VariantKind Variant = Target.getAccessVariant(); - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: report_fatal_error("Unrecognized relocation type"); break; @@ -299,5 +299,5 @@ unsigned HexagonELFObjectWriter::getRelocType(MCContext &Ctx, std::unique_ptr llvm::createHexagonELFObjectWriter(uint8_t OSABI, StringRef CPU) { - return llvm::make_unique(OSABI, CPU); + return std::make_unique(OSABI, CPU); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp index fcd3758600c..8b262bd0248 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -726,9 +726,6 @@ void HexagonMCChecker::reportNote(SMLoc Loc, llvm::Twine const &Msg) { } void HexagonMCChecker::reportWarning(Twine const &Msg) { - if (ReportErrors) { - auto SM = Context.getSourceManager(); - if (SM) - SM->PrintMessage(MCB.getLoc(), SourceMgr::DK_Warning, Msg); - } + if (ReportErrors) + Context.reportWarning(MCB.getLoc(), Msg); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp index f2432883af6..a799f7f7c0b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -116,8 +116,8 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol, } // Update the maximum alignment of the section if necessary. - if (ByteAlignment > Section.getAlignment()) - Section.setAlignment(ByteAlignment); + if (Align(ByteAlignment) > Section.getAlignment()) + Section.setAlignment(Align(ByteAlignment)); SwitchSection(P.first, P.second); } else { diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 9c50b25156c..870ab9e94a6 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -72,7 +72,6 @@ cl::opt MV65("mv65", cl::Hidden, cl::desc("Build for Hexagon V65"), cl::init(false)); cl::opt MV66("mv66", cl::Hidden, cl::desc("Build for Hexagon V66"), cl::init(false)); -} // namespace cl::opt EnableHVX("mhvx", @@ -86,6 +85,7 @@ cl::opt clEnumValN(Hexagon::ArchEnum::Generic, "", "")), // Sentinel for flag not present. cl::init(Hexagon::ArchEnum::NoArch), cl::ValueOptional); +} // namespace static cl::opt DisableHVX("mno-hvx", cl::Hidden, @@ -264,14 +264,12 @@ createHexagonObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { } static void LLVM_ATTRIBUTE_UNUSED clearFeature(MCSubtargetInfo* STI, uint64_t F) { - uint64_t FB = STI->getFeatureBits().to_ullong(); - if (FB & (1ULL << F)) + if (STI->getFeatureBits()[F]) STI->ToggleFeature(F); } static bool LLVM_ATTRIBUTE_UNUSED checkFeature(MCSubtargetInfo* STI, uint64_t F) { - uint64_t FB = STI->getFeatureBits().to_ullong(); - return (FB & (1ULL << F)) != 0; + return STI->getFeatureBits()[F]; } namespace { @@ -398,7 +396,7 @@ MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT, MCSubtargetInfo *X = createHexagonMCSubtargetInfoImpl(TT, CPUName, ArchFS); if (HexagonDisableDuplex) { llvm::FeatureBitset Features = X->getFeatureBits(); - X->setFeatureBits(Features.set(Hexagon::FeatureDuplex, false)); + X->setFeatureBits(Features.reset(Hexagon::FeatureDuplex)); } X->setFeatureBits(completeHVXFeatures(X->getFeatureBits())); diff --git a/lib/Target/Hexagon/RDFCopy.cpp b/lib/Target/Hexagon/RDFCopy.cpp index 7702024f87b..a9d39fd4b2d 100644 --- a/lib/Target/Hexagon/RDFCopy.cpp +++ b/lib/Target/Hexagon/RDFCopy.cpp @@ -45,8 +45,8 @@ bool CopyPropagation::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) { const MachineOperand &Src = MI->getOperand(1); RegisterRef DstR = DFG.makeRegRef(Dst.getReg(), Dst.getSubReg()); RegisterRef SrcR = DFG.makeRegRef(Src.getReg(), Src.getSubReg()); - assert(TargetRegisterInfo::isPhysicalRegister(DstR.Reg)); - assert(TargetRegisterInfo::isPhysicalRegister(SrcR.Reg)); + assert(Register::isPhysicalRegister(DstR.Reg)); + assert(Register::isPhysicalRegister(SrcR.Reg)); const TargetRegisterInfo &TRI = DFG.getTRI(); if (TRI.getMinimalPhysRegClass(DstR.Reg) != TRI.getMinimalPhysRegClass(SrcR.Reg)) diff --git a/lib/Target/Hexagon/RDFDeadCode.cpp b/lib/Target/Hexagon/RDFDeadCode.cpp index 52178931aa6..af86c7b1956 100644 --- a/lib/Target/Hexagon/RDFDeadCode.cpp +++ b/lib/Target/Hexagon/RDFDeadCode.cpp @@ -16,6 +16,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" #include diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp index 9d8f706b8a0..0cb35dc9881 100644 --- a/lib/Target/Hexagon/RDFGraph.cpp +++ b/lib/Target/Hexagon/RDFGraph.cpp @@ -633,7 +633,7 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum) // uses or defs, and those lists do not allow sub-registers. if (Op.getSubReg() != 0) return false; - RegisterId Reg = Op.getReg(); + Register Reg = Op.getReg(); const MCPhysReg *ImpR = Op.isDef() ? D.getImplicitDefs() : D.getImplicitUses(); if (!ImpR) @@ -963,7 +963,7 @@ void DataFlowGraph::build(unsigned Options) { RegisterRef DataFlowGraph::makeRegRef(unsigned Reg, unsigned Sub) const { assert(PhysicalRegisterInfo::isRegMaskId(Reg) || - TargetRegisterInfo::isPhysicalRegister(Reg)); + Register::isPhysicalRegister(Reg)); assert(Reg != 0); if (Sub != 0) Reg = TRI.getSubReg(Reg, Sub); @@ -1291,8 +1291,8 @@ void DataFlowGraph::buildStmt(NodeAddr BA, MachineInstr &In) { MachineOperand &Op = In.getOperand(OpN); if (!Op.isReg() || !Op.isDef() || Op.isImplicit()) continue; - unsigned R = Op.getReg(); - if (!R || !TargetRegisterInfo::isPhysicalRegister(R)) + Register R = Op.getReg(); + if (!R || !Register::isPhysicalRegister(R)) continue; uint16_t Flags = NodeAttrs::None; if (TOI.isPreserving(In, OpN)) { @@ -1336,8 +1336,8 @@ void DataFlowGraph::buildStmt(NodeAddr BA, MachineInstr &In) { MachineOperand &Op = In.getOperand(OpN); if (!Op.isReg() || !Op.isDef() || !Op.isImplicit()) continue; - unsigned R = Op.getReg(); - if (!R || !TargetRegisterInfo::isPhysicalRegister(R) || DoneDefs.test(R)) + Register R = Op.getReg(); + if (!R || !Register::isPhysicalRegister(R) || DoneDefs.test(R)) continue; RegisterRef RR = makeRegRef(Op); uint16_t Flags = NodeAttrs::None; @@ -1365,8 +1365,8 @@ void DataFlowGraph::buildStmt(NodeAddr BA, MachineInstr &In) { MachineOperand &Op = In.getOperand(OpN); if (!Op.isReg() || !Op.isUse()) continue; - unsigned R = Op.getReg(); - if (!R || !TargetRegisterInfo::isPhysicalRegister(R)) + Register R = Op.getReg(); + if (!R || !Register::isPhysicalRegister(R)) continue; uint16_t Flags = NodeAttrs::None; if (Op.isUndef()) diff --git a/lib/Target/Hexagon/RDFLiveness.cpp b/lib/Target/Hexagon/RDFLiveness.cpp index 9cd304aa10b..7d7b89462ff 100644 --- a/lib/Target/Hexagon/RDFLiveness.cpp +++ b/lib/Target/Hexagon/RDFLiveness.cpp @@ -889,8 +889,8 @@ void Liveness::resetKills(MachineBasicBlock *B) { // implicit defs. if (!Op.isReg() || !Op.isDef() || Op.isImplicit()) continue; - unsigned R = Op.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(R)) + Register R = Op.getReg(); + if (!Register::isPhysicalRegister(R)) continue; for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR) Live.reset(*SR); @@ -898,8 +898,8 @@ void Liveness::resetKills(MachineBasicBlock *B) { for (auto &Op : MI->operands()) { if (!Op.isReg() || !Op.isUse() || Op.isUndef()) continue; - unsigned R = Op.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(R)) + Register R = Op.getReg(); + if (!Register::isPhysicalRegister(R)) continue; bool IsLive = false; for (MCRegAliasIterator AR(R, &TRI, true); AR.isValid(); ++AR) { diff --git a/lib/Target/Hexagon/RDFRegisters.cpp b/lib/Target/Hexagon/RDFRegisters.cpp index 6e0f33695f0..b5675784e34 100644 --- a/lib/Target/Hexagon/RDFRegisters.cpp +++ b/lib/Target/Hexagon/RDFRegisters.cpp @@ -101,7 +101,7 @@ RegisterRef PhysicalRegisterInfo::normalize(RegisterRef RR) const { std::set PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const { // Do not include RR in the alias set. std::set AS; - assert(isRegMaskId(Reg) || TargetRegisterInfo::isPhysicalRegister(Reg)); + assert(isRegMaskId(Reg) || Register::isPhysicalRegister(Reg)); if (isRegMaskId(Reg)) { // XXX SLOW const uint32_t *MB = getRegMaskBits(Reg); @@ -129,8 +129,8 @@ std::set PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const { } bool PhysicalRegisterInfo::aliasRR(RegisterRef RA, RegisterRef RB) const { - assert(TargetRegisterInfo::isPhysicalRegister(RA.Reg)); - assert(TargetRegisterInfo::isPhysicalRegister(RB.Reg)); + assert(Register::isPhysicalRegister(RA.Reg)); + assert(Register::isPhysicalRegister(RB.Reg)); MCRegUnitMaskIterator UMA(RA.Reg, &TRI); MCRegUnitMaskIterator UMB(RB.Reg, &TRI); @@ -160,7 +160,7 @@ bool PhysicalRegisterInfo::aliasRR(RegisterRef RA, RegisterRef RB) const { } bool PhysicalRegisterInfo::aliasRM(RegisterRef RR, RegisterRef RM) const { - assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg) && isRegMaskId(RM.Reg)); + assert(Register::isPhysicalRegister(RR.Reg) && isRegMaskId(RM.Reg)); const uint32_t *MB = getRegMaskBits(RM.Reg); bool Preserved = MB[RR.Reg/32] & (1u << (RR.Reg%32)); // If the lane mask information is "full", e.g. when the given lane mask diff --git a/lib/Target/Hexagon/RDFRegisters.h b/lib/Target/Hexagon/RDFRegisters.h index 646233bacda..4afaf80e465 100644 --- a/lib/Target/Hexagon/RDFRegisters.h +++ b/lib/Target/Hexagon/RDFRegisters.h @@ -99,15 +99,15 @@ namespace rdf { const MachineFunction &mf); static bool isRegMaskId(RegisterId R) { - return TargetRegisterInfo::isStackSlot(R); + return Register::isStackSlot(R); } RegisterId getRegMaskId(const uint32_t *RM) const { - return TargetRegisterInfo::index2StackSlot(RegMasks.find(RM)); + return Register::index2StackSlot(RegMasks.find(RM)); } const uint32_t *getRegMaskBits(RegisterId R) const { - return RegMasks.get(TargetRegisterInfo::stackSlot2Index(R)); + return RegMasks.get(Register::stackSlot2Index(R)); } RegisterRef normalize(RegisterRef RR) const; @@ -125,7 +125,7 @@ namespace rdf { } const BitVector &getMaskUnits(RegisterId MaskId) const { - return MaskInfos[TargetRegisterInfo::stackSlot2Index(MaskId)].Units; + return MaskInfos[Register::stackSlot2Index(MaskId)].Units; } RegisterRef mapTo(RegisterRef RR, unsigned R) const; diff --git a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp index 9af8a0b35b2..ec82e3a41f2 100644 --- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp +++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp @@ -469,13 +469,14 @@ public: else if (isa(getImm())) { #ifndef NDEBUG const LanaiMCExpr *SymbolRefExpr = dyn_cast(getImm()); - assert(SymbolRefExpr->getKind() == LanaiMCExpr::VK_Lanai_ABS_LO); + assert(SymbolRefExpr && + SymbolRefExpr->getKind() == LanaiMCExpr::VK_Lanai_ABS_LO); #endif Inst.addOperand(MCOperand::createExpr(getImm())); } else if (isa(getImm())) { #ifndef NDEBUG const MCBinaryExpr *BinaryExpr = dyn_cast(getImm()); - assert(isa(BinaryExpr->getLHS()) && + assert(BinaryExpr && isa(BinaryExpr->getLHS()) && cast(BinaryExpr->getLHS())->getKind() == LanaiMCExpr::VK_Lanai_ABS_LO); #endif @@ -499,13 +500,14 @@ public: else if (isa(getImm())) { #ifndef NDEBUG const LanaiMCExpr *SymbolRefExpr = dyn_cast(getImm()); - assert(SymbolRefExpr->getKind() == LanaiMCExpr::VK_Lanai_ABS_HI); + assert(SymbolRefExpr && + SymbolRefExpr->getKind() == LanaiMCExpr::VK_Lanai_ABS_HI); #endif Inst.addOperand(MCOperand::createExpr(getImm())); } else if (isa(getImm())) { #ifndef NDEBUG const MCBinaryExpr *BinaryExpr = dyn_cast(getImm()); - assert(isa(BinaryExpr->getLHS()) && + assert(BinaryExpr && isa(BinaryExpr->getLHS()) && cast(BinaryExpr->getLHS())->getKind() == LanaiMCExpr::VK_Lanai_ABS_HI); #endif @@ -544,10 +546,9 @@ public: } else if (isa(getImm())) { #ifndef NDEBUG const MCBinaryExpr *BinaryExpr = dyn_cast(getImm()); - const LanaiMCExpr *SymbolRefExpr = - dyn_cast(BinaryExpr->getLHS()); - assert(SymbolRefExpr && - SymbolRefExpr->getKind() == LanaiMCExpr::VK_Lanai_None); + assert(BinaryExpr && isa(BinaryExpr->getLHS()) && + cast(BinaryExpr->getLHS())->getKind() == + LanaiMCExpr::VK_Lanai_None); #endif Inst.addOperand(MCOperand::createExpr(getImm())); } else @@ -580,7 +581,7 @@ public: } static std::unique_ptr CreateToken(StringRef Str, SMLoc Start) { - auto Op = make_unique(TOKEN); + auto Op = std::make_unique(TOKEN); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = Start; @@ -590,7 +591,7 @@ public: static std::unique_ptr createReg(unsigned RegNum, SMLoc Start, SMLoc End) { - auto Op = make_unique(REGISTER); + auto Op = std::make_unique(REGISTER); Op->Reg.RegNum = RegNum; Op->StartLoc = Start; Op->EndLoc = End; @@ -599,7 +600,7 @@ public: static std::unique_ptr createImm(const MCExpr *Value, SMLoc Start, SMLoc End) { - auto Op = make_unique(IMMEDIATE); + auto Op = std::make_unique(IMMEDIATE); Op->Imm.Value = Value; Op->StartLoc = Start; Op->EndLoc = End; diff --git a/lib/Target/Lanai/LanaiAsmPrinter.cpp b/lib/Target/Lanai/LanaiAsmPrinter.cpp index 64d963475e1..12a3202446a 100644 --- a/lib/Target/Lanai/LanaiAsmPrinter.cpp +++ b/lib/Target/Lanai/LanaiAsmPrinter.cpp @@ -133,7 +133,7 @@ bool LanaiAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const MachineOperand &MO = MI->getOperand(RegOp); if (!MO.isReg()) return true; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); O << LanaiInstPrinter::getRegisterName(Reg); return false; } diff --git a/lib/Target/Lanai/LanaiDelaySlotFiller.cpp b/lib/Target/Lanai/LanaiDelaySlotFiller.cpp index 09c63dca23e..b9e577d201f 100644 --- a/lib/Target/Lanai/LanaiDelaySlotFiller.cpp +++ b/lib/Target/Lanai/LanaiDelaySlotFiller.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// Simple pass to fills delay slots with useful instructions. +// Simple pass to fill delay slots with useful instructions. // //===----------------------------------------------------------------------===// diff --git a/lib/Target/Lanai/LanaiFrameLowering.cpp b/lib/Target/Lanai/LanaiFrameLowering.cpp index 142c09c504c..eddc2b8e61f 100644 --- a/lib/Target/Lanai/LanaiFrameLowering.cpp +++ b/lib/Target/Lanai/LanaiFrameLowering.cpp @@ -72,8 +72,8 @@ void LanaiFrameLowering::replaceAdjDynAllocPseudo(MachineFunction &MF) const { MachineInstr &MI = *MBBI++; if (MI.getOpcode() == Lanai::ADJDYNALLOC) { DebugLoc DL = MI.getDebugLoc(); - unsigned Dst = MI.getOperand(0).getReg(); - unsigned Src = MI.getOperand(1).getReg(); + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); BuildMI(*MBB, MI, DL, LII.get(Lanai::ADD_I_LO), Dst) .addReg(Src) diff --git a/lib/Target/Lanai/LanaiFrameLowering.h b/lib/Target/Lanai/LanaiFrameLowering.h index 5fe4535543e..380d63df730 100644 --- a/lib/Target/Lanai/LanaiFrameLowering.h +++ b/lib/Target/Lanai/LanaiFrameLowering.h @@ -31,7 +31,7 @@ protected: public: explicit LanaiFrameLowering(const LanaiSubtarget &Subtarget) : TargetFrameLowering(StackGrowsDown, - /*StackAlignment=*/8, + /*StackAlignment=*/Align(8), /*LocalAreaOffset=*/0), STI(Subtarget) {} diff --git a/lib/Target/Lanai/LanaiISelLowering.cpp b/lib/Target/Lanai/LanaiISelLowering.cpp index 1ed078bb433..43933d062a7 100644 --- a/lib/Target/Lanai/LanaiISelLowering.cpp +++ b/lib/Target/Lanai/LanaiISelLowering.cpp @@ -144,9 +144,9 @@ LanaiTargetLowering::LanaiTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::XOR); - // Function alignments (log2) - setMinFunctionAlignment(2); - setPrefFunctionAlignment(2); + // Function alignments + setMinFunctionAlignment(Align(4)); + setPrefFunctionAlignment(Align(4)); setJumpIsExpensive(true); @@ -212,10 +212,11 @@ SDValue LanaiTargetLowering::LowerOperation(SDValue Op, // Lanai Inline Assembly Support //===----------------------------------------------------------------------===// -unsigned LanaiTargetLowering::getRegisterByName(const char *RegName, EVT /*VT*/, - SelectionDAG & /*DAG*/) const { +Register LanaiTargetLowering::getRegisterByName( + const char *RegName, EVT /*VT*/, + const MachineFunction & /*MF*/) const { // Only unallocatable registers should be matched here. - unsigned Reg = StringSwitch(RegName) + Register Reg = StringSwitch(RegName) .Case("pc", Lanai::PC) .Case("sp", Lanai::SP) .Case("fp", Lanai::FP) @@ -459,7 +460,7 @@ SDValue LanaiTargetLowering::LowerCCCArguments( EVT RegVT = VA.getLocVT(); switch (RegVT.getSimpleVT().SimpleTy) { case MVT::i32: { - unsigned VReg = RegInfo.createVirtualRegister(&Lanai::GPRRegClass); + Register VReg = RegInfo.createVirtualRegister(&Lanai::GPRRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT); diff --git a/lib/Target/Lanai/LanaiISelLowering.h b/lib/Target/Lanai/LanaiISelLowering.h index e7b5755e904..4c35a2c6fb8 100644 --- a/lib/Target/Lanai/LanaiISelLowering.h +++ b/lib/Target/Lanai/LanaiISelLowering.h @@ -90,8 +90,8 @@ public: SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; - unsigned getRegisterByName(const char *RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char *RegName, EVT VT, + const MachineFunction &MF) const override; std::pair getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; diff --git a/lib/Target/Lanai/LanaiInstrInfo.cpp b/lib/Target/Lanai/LanaiInstrInfo.cpp index 700a8606910..b950fd0424e 100644 --- a/lib/Target/Lanai/LanaiInstrInfo.cpp +++ b/lib/Target/Lanai/LanaiInstrInfo.cpp @@ -86,8 +86,7 @@ void LanaiInstrInfo::loadRegFromStackSlot( } bool LanaiInstrInfo::areMemAccessesTriviallyDisjoint( - const MachineInstr &MIa, const MachineInstr &MIb, - AliasAnalysis * /*AA*/) const { + const MachineInstr &MIa, const MachineInstr &MIb) const { assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); @@ -457,7 +456,7 @@ bool LanaiInstrInfo::analyzeSelect(const MachineInstr &MI, // return the defining instruction. static MachineInstr *canFoldIntoSelect(unsigned Reg, const MachineRegisterInfo &MRI) { - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return nullptr; if (!MRI.hasOneNonDBGUse(Reg)) return nullptr; @@ -479,7 +478,7 @@ static MachineInstr *canFoldIntoSelect(unsigned Reg, // MI can't have any tied operands, that would conflict with predication. if (MO.isTied()) return nullptr; - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + if (Register::isPhysicalRegister(MO.getReg())) return nullptr; if (MO.isDef() && !MO.isDead()) return nullptr; @@ -505,7 +504,7 @@ LanaiInstrInfo::optimizeSelect(MachineInstr &MI, // Find new register class to use. MachineOperand FalseReg = MI.getOperand(Invert ? 1 : 2); - unsigned DestReg = MI.getOperand(0).getReg(); + Register DestReg = MI.getOperand(0).getReg(); const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); if (!MRI.constrainRegClass(DestReg, PreviousClass)) return nullptr; diff --git a/lib/Target/Lanai/LanaiInstrInfo.h b/lib/Target/Lanai/LanaiInstrInfo.h index d71424aeb0b..59a04d2cc38 100644 --- a/lib/Target/Lanai/LanaiInstrInfo.h +++ b/lib/Target/Lanai/LanaiInstrInfo.h @@ -36,8 +36,7 @@ public: } bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA) const override; + const MachineInstr &MIb) const override; unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; diff --git a/lib/Target/Lanai/LanaiRegisterInfo.cpp b/lib/Target/Lanai/LanaiRegisterInfo.cpp index d3056a1eba8..7c28debb94d 100644 --- a/lib/Target/Lanai/LanaiRegisterInfo.cpp +++ b/lib/Target/Lanai/LanaiRegisterInfo.cpp @@ -155,7 +155,7 @@ void LanaiRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (!HasFP || (needsStackRealignment(MF) && FrameIndex >= 0)) Offset += MF.getFrameInfo().getStackSize(); - unsigned FrameReg = getFrameRegister(MF); + Register FrameReg = getFrameRegister(MF); if (FrameIndex >= 0) { if (hasBasePointer(MF)) FrameReg = getBaseRegister(); diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp index 4313fa5a82b..919d43ad9b9 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp +++ b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp @@ -88,5 +88,5 @@ bool LanaiELFObjectWriter::needsRelocateWithSymbol(const MCSymbol & /*SD*/, std::unique_ptr llvm::createLanaiELFObjectWriter(uint8_t OSABI) { - return llvm::make_unique(OSABI); + return std::make_unique(OSABI); } diff --git a/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp index a0ec14ae238..85dcc0f152f 100644 --- a/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp +++ b/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp @@ -191,33 +191,33 @@ public: } static std::unique_ptr CreateToken(StringRef Str, SMLoc S) { - return make_unique(Str, S); + return std::make_unique(Str, S); } static std::unique_ptr CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { - return make_unique(k_Reg, RegNum, S, E); + return std::make_unique(k_Reg, RegNum, S, E); } static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { - return make_unique(Val, S, E); + return std::make_unique(Val, S, E); } static std::unique_ptr CreateMem(unsigned RegNum, const MCExpr *Val, SMLoc S, SMLoc E) { - return make_unique(RegNum, Val, S, E); + return std::make_unique(RegNum, Val, S, E); } static std::unique_ptr CreateIndReg(unsigned RegNum, SMLoc S, SMLoc E) { - return make_unique(k_IndReg, RegNum, S, E); + return std::make_unique(k_IndReg, RegNum, S, E); } static std::unique_ptr CreatePostIndReg(unsigned RegNum, SMLoc S, SMLoc E) { - return make_unique(k_PostIndReg, RegNum, S, E); + return std::make_unique(k_PostIndReg, RegNum, S, E); } SMLoc getStartLoc() const { return Start; } diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp index 38b7da32c24..0cdd1f4f701 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp @@ -31,7 +31,7 @@ protected: unsigned getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override { // Translate fixup kind to ELF relocation type. - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { case FK_Data_1: return ELF::R_MSP430_8; case FK_Data_2: return ELF::R_MSP430_16_BYTE; case FK_Data_4: return ELF::R_MSP430_32; @@ -54,5 +54,5 @@ protected: std::unique_ptr llvm::createMSP430ELFObjectWriter(uint8_t OSABI) { - return llvm::make_unique(OSABI); + return std::make_unique(OSABI); } diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp index 3a71a084d1a..a3b91acdc6d 100644 --- a/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -159,8 +159,9 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) { void MSP430AsmPrinter::EmitInterruptVectorSection(MachineFunction &ISR) { MCSection *Cur = OutStreamer->getCurrentSectionOnly(); const auto *F = &ISR.getFunction(); - assert(F->hasFnAttribute("interrupt") && - "Functions with MSP430_INTR CC should have 'interrupt' attribute"); + if (F->getCallingConv() != CallingConv::MSP430_INTR) { + report_fatal_error("Functions with 'interrupt' attribute must have msp430_intrcc CC"); + } StringRef IVIdx = F->getFnAttribute("interrupt").getValueAsString(); MCSection *IV = OutStreamer->getContext().getELFSection( "__interrupt_vector_" + IVIdx, @@ -174,8 +175,9 @@ void MSP430AsmPrinter::EmitInterruptVectorSection(MachineFunction &ISR) { bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Emit separate section for an interrupt vector if ISR - if (MF.getFunction().getCallingConv() == CallingConv::MSP430_INTR) + if (MF.getFunction().hasFnAttribute("interrupt")) { EmitInterruptVectorSection(MF); + } SetupMachineFunction(MF); EmitFunctionBody(); diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp index 45e7c26e4d3..ce5affdc25b 100644 --- a/lib/Target/MSP430/MSP430BranchSelector.cpp +++ b/lib/Target/MSP430/MSP430BranchSelector.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h index 33ce3c70a2a..70e28405302 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.h +++ b/lib/Target/MSP430/MSP430FrameLowering.h @@ -22,7 +22,8 @@ protected: public: explicit MSP430FrameLowering() - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2, 2) {} + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(2), -2, + Align(2)) {} /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index fedfb857bd0..64169d1f5eb 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -327,8 +327,8 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM, setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::MSP430_BUILTIN); // TODO: __mspabi_srall, __mspabi_srlll, __mspabi_sllll - setMinFunctionAlignment(1); - setPrefFunctionAlignment(1); + setMinFunctionAlignment(Align(2)); + setPrefFunctionAlignment(Align(2)); } SDValue MSP430TargetLowering::LowerOperation(SDValue Op, @@ -353,6 +353,9 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op, } } +unsigned MSP430TargetLowering::getShiftAmountThreshold(EVT VT) const { + return 2; +} //===----------------------------------------------------------------------===// // MSP430 Inline Assembly Support //===----------------------------------------------------------------------===// @@ -632,7 +635,7 @@ SDValue MSP430TargetLowering::LowerCCCArguments( llvm_unreachable(nullptr); } case MVT::i16: - unsigned VReg = RegInfo.createVirtualRegister(&MSP430::GR16RegClass); + Register VReg = RegInfo.createVirtualRegister(&MSP430::GR16RegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, RegVT); @@ -1446,8 +1449,8 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr &MI, case MSP430::Rrcl16: { BuildMI(*BB, MI, dl, TII.get(MSP430::BIC16rc), MSP430::SR) .addReg(MSP430::SR).addImm(1); - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); unsigned RrcOpc = MI.getOpcode() == MSP430::Rrcl16 ? MSP430::RRC16r : MSP430::RRC8r; BuildMI(*BB, MI, dl, TII.get(RrcOpc), DstReg) @@ -1479,13 +1482,13 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr &MI, LoopBB->addSuccessor(RemBB); LoopBB->addSuccessor(LoopBB); - unsigned ShiftAmtReg = RI.createVirtualRegister(&MSP430::GR8RegClass); - unsigned ShiftAmtReg2 = RI.createVirtualRegister(&MSP430::GR8RegClass); - unsigned ShiftReg = RI.createVirtualRegister(RC); - unsigned ShiftReg2 = RI.createVirtualRegister(RC); - unsigned ShiftAmtSrcReg = MI.getOperand(2).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register ShiftAmtReg = RI.createVirtualRegister(&MSP430::GR8RegClass); + Register ShiftAmtReg2 = RI.createVirtualRegister(&MSP430::GR8RegClass); + Register ShiftReg = RI.createVirtualRegister(RC); + Register ShiftReg2 = RI.createVirtualRegister(RC); + Register ShiftAmtSrcReg = MI.getOperand(2).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); // BB: // cmp 0, N diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index ee6b6316d7a..9224e5e3d00 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -124,6 +124,8 @@ namespace llvm { bool isZExtFree(EVT VT1, EVT VT2) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; + unsigned getShiftAmountThreshold(EVT VT) const override; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index afbb2f213b4..bec357a1548 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -139,7 +139,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, return; // We need to materialize the offset via add instruction. - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); if (Offset < 0) BuildMI(MBB, std::next(II), dl, TII.get(MSP430::SUB16ri), DstReg) .addReg(DstReg).addImm(-Offset); diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index 8c4ca982c96..e9aeba76de8 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -46,7 +46,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, const Triple &TT, : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, CPU, FS, Options, getEffectiveRelocModel(RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), - TLOF(make_unique()), + TLOF(std::make_unique()), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 1f7d095bf49..21d0df74d45 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -39,6 +39,7 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" @@ -233,9 +234,14 @@ class MipsAsmParser : public MCTargetAsmParser { bool expandLoadImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI); - bool expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR, bool Is64FPU, - SMLoc IDLoc, MCStreamer &Out, - const MCSubtargetInfo *STI); + bool expandLoadSingleImmToGPR(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); + bool expandLoadSingleImmToFPR(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); + bool expandLoadDoubleImmToGPR(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); + bool expandLoadDoubleImmToFPR(MCInst &Inst, bool Is64FPU, SMLoc IDLoc, + MCStreamer &Out, const MCSubtargetInfo *STI); bool expandLoadAddress(unsigned DstReg, unsigned BaseReg, const MCOperand &Offset, bool Is32BitAddress, @@ -512,11 +518,11 @@ public: // Remember the initial assembler options. The user can not modify these. AssemblerOptions.push_back( - llvm::make_unique(getSTI().getFeatureBits())); + std::make_unique(getSTI().getFeatureBits())); // Create an assembler options environment for the user to modify. AssemblerOptions.push_back( - llvm::make_unique(getSTI().getFeatureBits())); + std::make_unique(getSTI().getFeatureBits())); getTargetStreamer().updateABIInfo(*this); @@ -844,7 +850,7 @@ private: const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E, MipsAsmParser &Parser) { - auto Op = llvm::make_unique(k_RegisterIndex, Parser); + auto Op = std::make_unique(k_RegisterIndex, Parser); Op->RegIdx.Index = Index; Op->RegIdx.RegInfo = RegInfo; Op->RegIdx.Kind = RegKind; @@ -1446,7 +1452,7 @@ public: static std::unique_ptr CreateToken(StringRef Str, SMLoc S, MipsAsmParser &Parser) { - auto Op = llvm::make_unique(k_Token, Parser); + auto Op = std::make_unique(k_Token, Parser); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = S; @@ -1521,7 +1527,7 @@ public: static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, MipsAsmParser &Parser) { - auto Op = llvm::make_unique(k_Immediate, Parser); + auto Op = std::make_unique(k_Immediate, Parser); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -1531,7 +1537,7 @@ public: static std::unique_ptr CreateMem(std::unique_ptr Base, const MCExpr *Off, SMLoc S, SMLoc E, MipsAsmParser &Parser) { - auto Op = llvm::make_unique(k_Memory, Parser); + auto Op = std::make_unique(k_Memory, Parser); Op->Mem.Base = Base.release(); Op->Mem.Off = Off; Op->StartLoc = S; @@ -1544,7 +1550,7 @@ public: MipsAsmParser &Parser) { assert(Regs.size() > 0 && "Empty list not allowed"); - auto Op = llvm::make_unique(k_RegList, Parser); + auto Op = std::make_unique(k_RegList, Parser); Op->RegList.List = new SmallVector(Regs.begin(), Regs.end()); Op->StartLoc = StartLoc; Op->EndLoc = EndLoc; @@ -1804,8 +1810,8 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, break; // We'll deal with this situation later on when applying fixups. if (!isIntN(inMicroMipsMode() ? 17 : 18, Offset.getImm())) return Error(IDLoc, "branch target out of range"); - if (OffsetToAlignment(Offset.getImm(), - 1LL << (inMicroMipsMode() ? 1 : 2))) + if (offsetToAlignment(Offset.getImm(), + (inMicroMipsMode() ? Align(2) : Align(4)))) return Error(IDLoc, "branch to misaligned address"); break; case Mips::BGEZ: @@ -1834,8 +1840,8 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, break; // We'll deal with this situation later on when applying fixups. if (!isIntN(inMicroMipsMode() ? 17 : 18, Offset.getImm())) return Error(IDLoc, "branch target out of range"); - if (OffsetToAlignment(Offset.getImm(), - 1LL << (inMicroMipsMode() ? 1 : 2))) + if (offsetToAlignment(Offset.getImm(), + (inMicroMipsMode() ? Align(2) : Align(4)))) return Error(IDLoc, "branch to misaligned address"); break; case Mips::BGEC: case Mips::BGEC_MMR6: @@ -1850,7 +1856,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, break; // We'll deal with this situation later on when applying fixups. if (!isIntN(18, Offset.getImm())) return Error(IDLoc, "branch target out of range"); - if (OffsetToAlignment(Offset.getImm(), 1LL << 2)) + if (offsetToAlignment(Offset.getImm(), Align(4))) return Error(IDLoc, "branch to misaligned address"); break; case Mips::BLEZC: case Mips::BLEZC_MMR6: @@ -1863,7 +1869,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, break; // We'll deal with this situation later on when applying fixups. if (!isIntN(18, Offset.getImm())) return Error(IDLoc, "branch target out of range"); - if (OffsetToAlignment(Offset.getImm(), 1LL << 2)) + if (offsetToAlignment(Offset.getImm(), Align(4))) return Error(IDLoc, "branch to misaligned address"); break; case Mips::BEQZC: case Mips::BEQZC_MMR6: @@ -1874,7 +1880,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, break; // We'll deal with this situation later on when applying fixups. if (!isIntN(23, Offset.getImm())) return Error(IDLoc, "branch target out of range"); - if (OffsetToAlignment(Offset.getImm(), 1LL << 2)) + if (offsetToAlignment(Offset.getImm(), Align(4))) return Error(IDLoc, "branch to misaligned address"); break; case Mips::BEQZ16_MM: @@ -1887,7 +1893,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, break; // We'll deal with this situation later on when applying fixups. if (!isInt<8>(Offset.getImm())) return Error(IDLoc, "branch target out of range"); - if (OffsetToAlignment(Offset.getImm(), 2LL)) + if (offsetToAlignment(Offset.getImm(), Align(2))) return Error(IDLoc, "branch to misaligned address"); break; } @@ -2454,25 +2460,21 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, : MER_Success; case Mips::LoadImmSingleGPR: - return expandLoadImmReal(Inst, true, true, false, IDLoc, Out, STI) - ? MER_Fail - : MER_Success; + return expandLoadSingleImmToGPR(Inst, IDLoc, Out, STI) ? MER_Fail + : MER_Success; case Mips::LoadImmSingleFGR: - return expandLoadImmReal(Inst, true, false, false, IDLoc, Out, STI) - ? MER_Fail - : MER_Success; + return expandLoadSingleImmToFPR(Inst, IDLoc, Out, STI) ? MER_Fail + : MER_Success; case Mips::LoadImmDoubleGPR: - return expandLoadImmReal(Inst, false, true, false, IDLoc, Out, STI) - ? MER_Fail - : MER_Success; + return expandLoadDoubleImmToGPR(Inst, IDLoc, Out, STI) ? MER_Fail + : MER_Success; case Mips::LoadImmDoubleFGR: - return expandLoadImmReal(Inst, false, false, true, IDLoc, Out, STI) - ? MER_Fail - : MER_Success; + return expandLoadDoubleImmToFPR(Inst, true, IDLoc, Out, STI) ? MER_Fail + : MER_Success; case Mips::LoadImmDoubleFGR_32: - return expandLoadImmReal(Inst, false, false, false, IDLoc, Out, STI) - ? MER_Fail - : MER_Success; + return expandLoadDoubleImmToFPR(Inst, false, IDLoc, Out, STI) ? MER_Fail + : MER_Success; + case Mips::Ulh: return expandUlh(Inst, true, IDLoc, Out, STI) ? MER_Fail : MER_Success; case Mips::Ulhu: @@ -2868,12 +2870,12 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, bool Is32BitSym, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI) { - // FIXME: These expansions do not respect -mxgot. MipsTargetStreamer &TOut = getTargetStreamer(); - bool UseSrcReg = SrcReg != Mips::NoRegister; + bool UseSrcReg = SrcReg != Mips::NoRegister && SrcReg != Mips::ZERO && + SrcReg != Mips::ZERO_64; warnIfNoMacro(IDLoc); - if (inPicMode() && ABI.IsO32()) { + if (inPicMode()) { MCValue Res; if (!SymExpr->evaluateAsRelocatable(Res, nullptr, nullptr)) { Error(IDLoc, "expected relocatable expression"); @@ -2884,46 +2886,41 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, return true; } + bool IsPtr64 = ABI.ArePtrs64bit(); + bool IsLocalSym = + Res.getSymA()->getSymbol().isInSection() || + Res.getSymA()->getSymbol().isTemporary() || + (Res.getSymA()->getSymbol().isELF() && + cast(Res.getSymA()->getSymbol()).getBinding() == + ELF::STB_LOCAL); + bool UseXGOT = STI->getFeatureBits()[Mips::FeatureXGOT] && !IsLocalSym; + // The case where the result register is $25 is somewhat special. If the // symbol in the final relocation is external and not modified with a - // constant then we must use R_MIPS_CALL16 instead of R_MIPS_GOT16. + // constant then we must use R_MIPS_CALL16 instead of R_MIPS_GOT16 + // or R_MIPS_CALL16 instead of R_MIPS_GOT_DISP in 64-bit case. if ((DstReg == Mips::T9 || DstReg == Mips::T9_64) && !UseSrcReg && - Res.getConstant() == 0 && - !(Res.getSymA()->getSymbol().isInSection() || - Res.getSymA()->getSymbol().isTemporary() || - (Res.getSymA()->getSymbol().isELF() && - cast(Res.getSymA()->getSymbol()).getBinding() == - ELF::STB_LOCAL))) { - const MCExpr *CallExpr = - MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext()); - TOut.emitRRX(Mips::LW, DstReg, GPReg, MCOperand::createExpr(CallExpr), - IDLoc, STI); + Res.getConstant() == 0 && !IsLocalSym) { + if (UseXGOT) { + const MCExpr *CallHiExpr = MipsMCExpr::create(MipsMCExpr::MEK_CALL_HI16, + SymExpr, getContext()); + const MCExpr *CallLoExpr = MipsMCExpr::create(MipsMCExpr::MEK_CALL_LO16, + SymExpr, getContext()); + TOut.emitRX(Mips::LUi, DstReg, MCOperand::createExpr(CallHiExpr), IDLoc, + STI); + TOut.emitRRR(IsPtr64 ? Mips::DADDu : Mips::ADDu, DstReg, DstReg, GPReg, + IDLoc, STI); + TOut.emitRRX(IsPtr64 ? Mips::LD : Mips::LW, DstReg, DstReg, + MCOperand::createExpr(CallLoExpr), IDLoc, STI); + } else { + const MCExpr *CallExpr = + MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext()); + TOut.emitRRX(IsPtr64 ? Mips::LD : Mips::LW, DstReg, GPReg, + MCOperand::createExpr(CallExpr), IDLoc, STI); + } return false; } - // The remaining cases are: - // External GOT: lw $tmp, %got(symbol+offset)($gp) - // >addiu $tmp, $tmp, %lo(offset) - // >addiu $rd, $tmp, $rs - // Local GOT: lw $tmp, %got(symbol+offset)($gp) - // addiu $tmp, $tmp, %lo(symbol+offset)($gp) - // >addiu $rd, $tmp, $rs - // The addiu's marked with a '>' may be omitted if they are redundant. If - // this happens then the last instruction must use $rd as the result - // register. - const MipsMCExpr *GotExpr = - MipsMCExpr::create(MipsMCExpr::MEK_GOT, SymExpr, getContext()); - const MCExpr *LoExpr = nullptr; - if (Res.getSymA()->getSymbol().isInSection() || - Res.getSymA()->getSymbol().isTemporary()) - LoExpr = MipsMCExpr::create(MipsMCExpr::MEK_LO, SymExpr, getContext()); - else if (Res.getConstant() != 0) { - // External symbols fully resolve the symbol with just the %got(symbol) - // but we must still account for any offset to the symbol for expressions - // like symbol+8. - LoExpr = MCConstantExpr::create(Res.getConstant(), getContext()); - } - unsigned TmpReg = DstReg; if (UseSrcReg && getContext().getRegisterInfo()->isSuperOrSubRegisterEq(DstReg, @@ -2936,94 +2933,102 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, TmpReg = ATReg; } - TOut.emitRRX(Mips::LW, TmpReg, GPReg, MCOperand::createExpr(GotExpr), IDLoc, - STI); + if (UseXGOT) { + // Loading address from XGOT + // External GOT: lui $tmp, %got_hi(symbol)($gp) + // addu $tmp, $tmp, $gp + // lw $tmp, %got_lo(symbol)($tmp) + // >addiu $tmp, $tmp, offset + // >addiu $rd, $tmp, $rs + // The addiu's marked with a '>' may be omitted if they are redundant. If + // this happens then the last instruction must use $rd as the result + // register. + const MCExpr *CallHiExpr = + MipsMCExpr::create(MipsMCExpr::MEK_GOT_HI16, SymExpr, getContext()); + const MCExpr *CallLoExpr = MipsMCExpr::create( + MipsMCExpr::MEK_GOT_LO16, Res.getSymA(), getContext()); - if (LoExpr) - TOut.emitRRX(Mips::ADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), + TOut.emitRX(Mips::LUi, TmpReg, MCOperand::createExpr(CallHiExpr), IDLoc, + STI); + TOut.emitRRR(IsPtr64 ? Mips::DADDu : Mips::ADDu, TmpReg, TmpReg, GPReg, IDLoc, STI); + TOut.emitRRX(IsPtr64 ? Mips::LD : Mips::LW, TmpReg, TmpReg, + MCOperand::createExpr(CallLoExpr), IDLoc, STI); - if (UseSrcReg) - TOut.emitRRR(Mips::ADDu, DstReg, TmpReg, SrcReg, IDLoc, STI); + if (Res.getConstant() != 0) + TOut.emitRRX(IsPtr64 ? Mips::DADDiu : Mips::ADDiu, TmpReg, TmpReg, + MCOperand::createExpr(MCConstantExpr::create( + Res.getConstant(), getContext())), + IDLoc, STI); - return false; - } - - if (inPicMode() && ABI.ArePtrs64bit()) { - MCValue Res; - if (!SymExpr->evaluateAsRelocatable(Res, nullptr, nullptr)) { - Error(IDLoc, "expected relocatable expression"); - return true; - } - if (Res.getSymB() != nullptr) { - Error(IDLoc, "expected relocatable expression with only one symbol"); - return true; - } - - // The case where the result register is $25 is somewhat special. If the - // symbol in the final relocation is external and not modified with a - // constant then we must use R_MIPS_CALL16 instead of R_MIPS_GOT_DISP. - if ((DstReg == Mips::T9 || DstReg == Mips::T9_64) && !UseSrcReg && - Res.getConstant() == 0 && - !(Res.getSymA()->getSymbol().isInSection() || - Res.getSymA()->getSymbol().isTemporary() || - (Res.getSymA()->getSymbol().isELF() && - cast(Res.getSymA()->getSymbol()).getBinding() == - ELF::STB_LOCAL))) { - const MCExpr *CallExpr = - MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext()); - TOut.emitRRX(Mips::LD, DstReg, GPReg, MCOperand::createExpr(CallExpr), - IDLoc, STI); + if (UseSrcReg) + TOut.emitRRR(IsPtr64 ? Mips::DADDu : Mips::ADDu, DstReg, TmpReg, SrcReg, + IDLoc, STI); return false; } - // The remaining cases are: - // Small offset: ld $tmp, %got_disp(symbol)($gp) - // >daddiu $tmp, $tmp, offset - // >daddu $rd, $tmp, $rs - // The daddiu's marked with a '>' may be omitted if they are redundant. If - // this happens then the last instruction must use $rd as the result - // register. - const MipsMCExpr *GotExpr = MipsMCExpr::create(MipsMCExpr::MEK_GOT_DISP, - Res.getSymA(), - getContext()); + const MipsMCExpr *GotExpr = nullptr; const MCExpr *LoExpr = nullptr; - if (Res.getConstant() != 0) { - // Symbols fully resolve with just the %got_disp(symbol) but we - // must still account for any offset to the symbol for - // expressions like symbol+8. - LoExpr = MCConstantExpr::create(Res.getConstant(), getContext()); + if (IsPtr64) { + // The remaining cases are: + // Small offset: ld $tmp, %got_disp(symbol)($gp) + // >daddiu $tmp, $tmp, offset + // >daddu $rd, $tmp, $rs + // The daddiu's marked with a '>' may be omitted if they are redundant. If + // this happens then the last instruction must use $rd as the result + // register. + GotExpr = MipsMCExpr::create(MipsMCExpr::MEK_GOT_DISP, Res.getSymA(), + getContext()); + if (Res.getConstant() != 0) { + // Symbols fully resolve with just the %got_disp(symbol) but we + // must still account for any offset to the symbol for + // expressions like symbol+8. + LoExpr = MCConstantExpr::create(Res.getConstant(), getContext()); - // FIXME: Offsets greater than 16 bits are not yet implemented. - // FIXME: The correct range is a 32-bit sign-extended number. - if (Res.getConstant() < -0x8000 || Res.getConstant() > 0x7fff) { - Error(IDLoc, "macro instruction uses large offset, which is not " - "currently supported"); - return true; + // FIXME: Offsets greater than 16 bits are not yet implemented. + // FIXME: The correct range is a 32-bit sign-extended number. + if (Res.getConstant() < -0x8000 || Res.getConstant() > 0x7fff) { + Error(IDLoc, "macro instruction uses large offset, which is not " + "currently supported"); + return true; + } + } + } else { + // The remaining cases are: + // External GOT: lw $tmp, %got(symbol)($gp) + // >addiu $tmp, $tmp, offset + // >addiu $rd, $tmp, $rs + // Local GOT: lw $tmp, %got(symbol+offset)($gp) + // addiu $tmp, $tmp, %lo(symbol+offset)($gp) + // >addiu $rd, $tmp, $rs + // The addiu's marked with a '>' may be omitted if they are redundant. If + // this happens then the last instruction must use $rd as the result + // register. + if (IsLocalSym) { + GotExpr = + MipsMCExpr::create(MipsMCExpr::MEK_GOT, SymExpr, getContext()); + LoExpr = MipsMCExpr::create(MipsMCExpr::MEK_LO, SymExpr, getContext()); + } else { + // External symbols fully resolve the symbol with just the %got(symbol) + // but we must still account for any offset to the symbol for + // expressions like symbol+8. + GotExpr = MipsMCExpr::create(MipsMCExpr::MEK_GOT, Res.getSymA(), + getContext()); + if (Res.getConstant() != 0) + LoExpr = MCConstantExpr::create(Res.getConstant(), getContext()); } } - unsigned TmpReg = DstReg; - if (UseSrcReg && - getContext().getRegisterInfo()->isSuperOrSubRegisterEq(DstReg, - SrcReg)) { - // If $rs is the same as $rd, we need to use AT. - // If it is not available we exit. - unsigned ATReg = getATReg(IDLoc); - if (!ATReg) - return true; - TmpReg = ATReg; - } - - TOut.emitRRX(Mips::LD, TmpReg, GPReg, MCOperand::createExpr(GotExpr), IDLoc, - STI); + TOut.emitRRX(IsPtr64 ? Mips::LD : Mips::LW, TmpReg, GPReg, + MCOperand::createExpr(GotExpr), IDLoc, STI); if (LoExpr) - TOut.emitRRX(Mips::DADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), - IDLoc, STI); + TOut.emitRRX(IsPtr64 ? Mips::DADDiu : Mips::ADDiu, TmpReg, TmpReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); if (UseSrcReg) - TOut.emitRRR(Mips::DADDu, DstReg, TmpReg, SrcReg, IDLoc, STI); + TOut.emitRRR(IsPtr64 ? Mips::DADDu : Mips::ADDu, DstReg, TmpReg, SrcReg, + IDLoc, STI); return false; } @@ -3289,10 +3294,43 @@ bool MipsAsmParser::emitPartialAddress(MipsTargetStreamer &TOut, SMLoc IDLoc, return false; } -bool MipsAsmParser::expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR, - bool Is64FPU, SMLoc IDLoc, - MCStreamer &Out, - const MCSubtargetInfo *STI) { +static uint64_t convertIntToDoubleImm(uint64_t ImmOp64) { + // If ImmOp64 is AsmToken::Integer type (all bits set to zero in the + // exponent field), convert it to double (e.g. 1 to 1.0) + if ((Hi_32(ImmOp64) & 0x7ff00000) == 0) { + APFloat RealVal(APFloat::IEEEdouble(), ImmOp64); + ImmOp64 = RealVal.bitcastToAPInt().getZExtValue(); + } + return ImmOp64; +} + +static uint32_t covertDoubleImmToSingleImm(uint64_t ImmOp64) { + // Conversion of a double in an uint64_t to a float in a uint32_t, + // retaining the bit pattern of a float. + double DoubleImm = BitsToDouble(ImmOp64); + float TmpFloat = static_cast(DoubleImm); + return FloatToBits(TmpFloat); +} + +bool MipsAsmParser::expandLoadSingleImmToGPR(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI) { + assert(Inst.getNumOperands() == 2 && "Invalid operand count"); + assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() && + "Invalid instruction operand."); + + unsigned FirstReg = Inst.getOperand(0).getReg(); + uint64_t ImmOp64 = Inst.getOperand(1).getImm(); + + uint32_t ImmOp32 = covertDoubleImmToSingleImm(convertIntToDoubleImm(ImmOp64)); + + return loadImmediate(ImmOp32, FirstReg, Mips::NoRegister, true, false, IDLoc, + Out, STI); +} + +bool MipsAsmParser::expandLoadSingleImmToFPR(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI) { MipsTargetStreamer &TOut = getTargetStreamer(); assert(Inst.getNumOperands() == 2 && "Invalid operand count"); assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() && @@ -3301,166 +3339,184 @@ bool MipsAsmParser::expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR, unsigned FirstReg = Inst.getOperand(0).getReg(); uint64_t ImmOp64 = Inst.getOperand(1).getImm(); - uint32_t HiImmOp64 = (ImmOp64 & 0xffffffff00000000) >> 32; - // If ImmOp64 is AsmToken::Integer type (all bits set to zero in the - // exponent field), convert it to double (e.g. 1 to 1.0) - if ((HiImmOp64 & 0x7ff00000) == 0) { - APFloat RealVal(APFloat::IEEEdouble(), ImmOp64); - ImmOp64 = RealVal.bitcastToAPInt().getZExtValue(); + ImmOp64 = convertIntToDoubleImm(ImmOp64); + + uint32_t ImmOp32 = covertDoubleImmToSingleImm(ImmOp64); + + unsigned TmpReg = Mips::ZERO; + if (ImmOp32 != 0) { + TmpReg = getATReg(IDLoc); + if (!TmpReg) + return true; } - uint32_t LoImmOp64 = ImmOp64 & 0xffffffff; - HiImmOp64 = (ImmOp64 & 0xffffffff00000000) >> 32; + if (Lo_32(ImmOp64) == 0) { + if (TmpReg != Mips::ZERO && loadImmediate(ImmOp32, TmpReg, Mips::NoRegister, + true, false, IDLoc, Out, STI)) + return true; + TOut.emitRR(Mips::MTC1, FirstReg, TmpReg, IDLoc, STI); + return false; + } - if (IsSingle) { - // Conversion of a double in an uint64_t to a float in a uint32_t, - // retaining the bit pattern of a float. - uint32_t ImmOp32; - double doubleImm = BitsToDouble(ImmOp64); - float tmp_float = static_cast(doubleImm); - ImmOp32 = FloatToBits(tmp_float); + MCSection *CS = getStreamer().getCurrentSectionOnly(); + // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections + // where appropriate. + MCSection *ReadOnlySection = + getContext().getELFSection(".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); - if (IsGPR) { - if (loadImmediate(ImmOp32, FirstReg, Mips::NoRegister, true, true, IDLoc, - Out, STI)) + MCSymbol *Sym = getContext().createTempSymbol(); + const MCExpr *LoSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *LoExpr = + MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); + + getStreamer().SwitchSection(ReadOnlySection); + getStreamer().EmitLabel(Sym, IDLoc); + getStreamer().EmitIntValue(ImmOp32, 4); + getStreamer().SwitchSection(CS); + + if (emitPartialAddress(TOut, IDLoc, Sym)) + return true; + TOut.emitRRX(Mips::LWC1, FirstReg, TmpReg, MCOperand::createExpr(LoExpr), + IDLoc, STI); + return false; +} + +bool MipsAsmParser::expandLoadDoubleImmToGPR(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI) { + MipsTargetStreamer &TOut = getTargetStreamer(); + assert(Inst.getNumOperands() == 2 && "Invalid operand count"); + assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() && + "Invalid instruction operand."); + + unsigned FirstReg = Inst.getOperand(0).getReg(); + uint64_t ImmOp64 = Inst.getOperand(1).getImm(); + + ImmOp64 = convertIntToDoubleImm(ImmOp64); + + if (Lo_32(ImmOp64) == 0) { + if (isGP64bit()) { + if (loadImmediate(ImmOp64, FirstReg, Mips::NoRegister, false, false, + IDLoc, Out, STI)) return true; - return false; } else { - unsigned ATReg = getATReg(IDLoc); - if (!ATReg) + if (loadImmediate(Hi_32(ImmOp64), FirstReg, Mips::NoRegister, true, false, + IDLoc, Out, STI)) return true; - if (LoImmOp64 == 0) { - if (loadImmediate(ImmOp32, ATReg, Mips::NoRegister, true, true, IDLoc, - Out, STI)) - return true; - TOut.emitRR(Mips::MTC1, FirstReg, ATReg, IDLoc, STI); - return false; - } - MCSection *CS = getStreamer().getCurrentSectionOnly(); - // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections - // where appropriate. - MCSection *ReadOnlySection = getContext().getELFSection( - ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); - - MCSymbol *Sym = getContext().createTempSymbol(); - const MCExpr *LoSym = - MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); - const MipsMCExpr *LoExpr = - MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); - - getStreamer().SwitchSection(ReadOnlySection); - getStreamer().EmitLabel(Sym, IDLoc); - getStreamer().EmitIntValue(ImmOp32, 4); - getStreamer().SwitchSection(CS); - - if(emitPartialAddress(TOut, IDLoc, Sym)) + if (loadImmediate(0, nextReg(FirstReg), Mips::NoRegister, true, false, + IDLoc, Out, STI)) return true; - TOut.emitRRX(Mips::LWC1, FirstReg, ATReg, - MCOperand::createExpr(LoExpr), IDLoc, STI); } return false; } - // if(!IsSingle) - unsigned ATReg = getATReg(IDLoc); - if (!ATReg) + MCSection *CS = getStreamer().getCurrentSectionOnly(); + MCSection *ReadOnlySection = + getContext().getELFSection(".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + + MCSymbol *Sym = getContext().createTempSymbol(); + const MCExpr *LoSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *LoExpr = + MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); + + getStreamer().SwitchSection(ReadOnlySection); + getStreamer().EmitLabel(Sym, IDLoc); + getStreamer().EmitValueToAlignment(8); + getStreamer().EmitIntValue(ImmOp64, 8); + getStreamer().SwitchSection(CS); + + unsigned TmpReg = getATReg(IDLoc); + if (!TmpReg) return true; - if (IsGPR) { - if (LoImmOp64 == 0) { - if(isABI_N32() || isABI_N64()) { - if (loadImmediate(HiImmOp64, FirstReg, Mips::NoRegister, false, true, - IDLoc, Out, STI)) - return true; - return false; - } else { - if (loadImmediate(HiImmOp64, FirstReg, Mips::NoRegister, true, true, - IDLoc, Out, STI)) - return true; + if (emitPartialAddress(TOut, IDLoc, Sym)) + return true; - if (loadImmediate(0, nextReg(FirstReg), Mips::NoRegister, true, true, - IDLoc, Out, STI)) - return true; - return false; - } - } + TOut.emitRRX(isABI_N64() ? Mips::DADDiu : Mips::ADDiu, TmpReg, TmpReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); - MCSection *CS = getStreamer().getCurrentSectionOnly(); - MCSection *ReadOnlySection = getContext().getELFSection( - ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + if (isGP64bit()) + TOut.emitRRI(Mips::LD, FirstReg, TmpReg, 0, IDLoc, STI); + else { + TOut.emitRRI(Mips::LW, FirstReg, TmpReg, 0, IDLoc, STI); + TOut.emitRRI(Mips::LW, nextReg(FirstReg), TmpReg, 4, IDLoc, STI); + } + return false; +} - MCSymbol *Sym = getContext().createTempSymbol(); - const MCExpr *LoSym = - MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); - const MipsMCExpr *LoExpr = - MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); +bool MipsAsmParser::expandLoadDoubleImmToFPR(MCInst &Inst, bool Is64FPU, + SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI) { + MipsTargetStreamer &TOut = getTargetStreamer(); + assert(Inst.getNumOperands() == 2 && "Invalid operand count"); + assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() && + "Invalid instruction operand."); - getStreamer().SwitchSection(ReadOnlySection); - getStreamer().EmitLabel(Sym, IDLoc); - getStreamer().EmitIntValue(HiImmOp64, 4); - getStreamer().EmitIntValue(LoImmOp64, 4); - getStreamer().SwitchSection(CS); + unsigned FirstReg = Inst.getOperand(0).getReg(); + uint64_t ImmOp64 = Inst.getOperand(1).getImm(); - if(emitPartialAddress(TOut, IDLoc, Sym)) + ImmOp64 = convertIntToDoubleImm(ImmOp64); + + unsigned TmpReg = Mips::ZERO; + if (ImmOp64 != 0) { + TmpReg = getATReg(IDLoc); + if (!TmpReg) return true; - if(isABI_N64()) - TOut.emitRRX(Mips::DADDiu, ATReg, ATReg, - MCOperand::createExpr(LoExpr), IDLoc, STI); - else - TOut.emitRRX(Mips::ADDiu, ATReg, ATReg, - MCOperand::createExpr(LoExpr), IDLoc, STI); + } - if(isABI_N32() || isABI_N64()) - TOut.emitRRI(Mips::LD, FirstReg, ATReg, 0, IDLoc, STI); - else { - TOut.emitRRI(Mips::LW, FirstReg, ATReg, 0, IDLoc, STI); - TOut.emitRRI(Mips::LW, nextReg(FirstReg), ATReg, 4, IDLoc, STI); - } - return false; - } else { // if(!IsGPR && !IsSingle) - if ((LoImmOp64 == 0) && - !((HiImmOp64 & 0xffff0000) && (HiImmOp64 & 0x0000ffff))) { - // FIXME: In the case where the constant is zero, we can load the - // register directly from the zero register. - if (loadImmediate(HiImmOp64, ATReg, Mips::NoRegister, true, true, IDLoc, + if ((Lo_32(ImmOp64) == 0) && + !((Hi_32(ImmOp64) & 0xffff0000) && (Hi_32(ImmOp64) & 0x0000ffff))) { + if (isGP64bit()) { + if (TmpReg != Mips::ZERO && + loadImmediate(ImmOp64, TmpReg, Mips::NoRegister, false, false, IDLoc, Out, STI)) return true; - if (isABI_N32() || isABI_N64()) - TOut.emitRR(Mips::DMTC1, FirstReg, ATReg, IDLoc, STI); - else if (hasMips32r2()) { - TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI); - TOut.emitRRR(Mips::MTHC1_D32, FirstReg, FirstReg, ATReg, IDLoc, STI); - } else { - TOut.emitRR(Mips::MTC1, nextReg(FirstReg), ATReg, IDLoc, STI); - TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI); - } + TOut.emitRR(Mips::DMTC1, FirstReg, TmpReg, IDLoc, STI); return false; } - MCSection *CS = getStreamer().getCurrentSectionOnly(); - // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections - // where appropriate. - MCSection *ReadOnlySection = getContext().getELFSection( - ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); - - MCSymbol *Sym = getContext().createTempSymbol(); - const MCExpr *LoSym = - MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); - const MipsMCExpr *LoExpr = - MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); - - getStreamer().SwitchSection(ReadOnlySection); - getStreamer().EmitLabel(Sym, IDLoc); - getStreamer().EmitIntValue(HiImmOp64, 4); - getStreamer().EmitIntValue(LoImmOp64, 4); - getStreamer().SwitchSection(CS); - - if(emitPartialAddress(TOut, IDLoc, Sym)) + if (TmpReg != Mips::ZERO && + loadImmediate(Hi_32(ImmOp64), TmpReg, Mips::NoRegister, true, false, + IDLoc, Out, STI)) return true; - TOut.emitRRX(Is64FPU ? Mips::LDC164 : Mips::LDC1, FirstReg, ATReg, - MCOperand::createExpr(LoExpr), IDLoc, STI); + + if (hasMips32r2()) { + TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI); + TOut.emitRRR(Mips::MTHC1_D32, FirstReg, FirstReg, TmpReg, IDLoc, STI); + } else { + TOut.emitRR(Mips::MTC1, nextReg(FirstReg), TmpReg, IDLoc, STI); + TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI); + } + return false; } + + MCSection *CS = getStreamer().getCurrentSectionOnly(); + // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections + // where appropriate. + MCSection *ReadOnlySection = + getContext().getELFSection(".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + + MCSymbol *Sym = getContext().createTempSymbol(); + const MCExpr *LoSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *LoExpr = + MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); + + getStreamer().SwitchSection(ReadOnlySection); + getStreamer().EmitLabel(Sym, IDLoc); + getStreamer().EmitValueToAlignment(8); + getStreamer().EmitIntValue(ImmOp64, 8); + getStreamer().SwitchSection(CS); + + if (emitPartialAddress(TOut, IDLoc, Sym)) + return true; + + TOut.emitRRX(Is64FPU ? Mips::LDC164 : Mips::LDC1, FirstReg, TmpReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); + return false; } @@ -3489,7 +3545,7 @@ bool MipsAsmParser::expandUncondBranchMMPseudo(MCInst &Inst, SMLoc IDLoc, } else { if (!isInt<17>(Offset.getImm())) return Error(IDLoc, "branch target out of range"); - if (OffsetToAlignment(Offset.getImm(), 1LL << 1)) + if (offsetToAlignment(Offset.getImm(), Align(2))) return Error(IDLoc, "branch to misaligned address"); Inst.clear(); Inst.setOpcode(Mips::BEQ_MM); @@ -3581,7 +3637,6 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, assert(DstRegOp.isReg() && "expected register operand kind"); const MCOperand &BaseRegOp = Inst.getOperand(1); assert(BaseRegOp.isReg() && "expected register operand kind"); - const MCOperand &OffsetOp = Inst.getOperand(2); MipsTargetStreamer &TOut = getTargetStreamer(); unsigned DstReg = DstRegOp.getReg(); @@ -3603,6 +3658,26 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, return; } + if (Inst.getNumOperands() > 3) { + const MCOperand &BaseRegOp = Inst.getOperand(2); + assert(BaseRegOp.isReg() && "expected register operand kind"); + const MCOperand &ExprOp = Inst.getOperand(3); + assert(ExprOp.isExpr() && "expected expression oprand kind"); + + unsigned BaseReg = BaseRegOp.getReg(); + const MCExpr *ExprOffset = ExprOp.getExpr(); + + MCOperand LoOperand = MCOperand::createExpr( + MipsMCExpr::create(MipsMCExpr::MEK_LO, ExprOffset, getContext())); + MCOperand HiOperand = MCOperand::createExpr( + MipsMCExpr::create(MipsMCExpr::MEK_HI, ExprOffset, getContext())); + TOut.emitSCWithSymOffset(Inst.getOpcode(), DstReg, BaseReg, HiOperand, + LoOperand, TmpReg, IDLoc, STI); + return; + } + + const MCOperand &OffsetOp = Inst.getOperand(2); + if (OffsetOp.isImm()) { int64_t LoOffset = OffsetOp.getImm() & 0xffff; int64_t HiOffset = OffsetOp.getImm() & ~0xffff; @@ -3625,21 +3700,54 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, TOut.emitRRR(isGP64bit() ? Mips::DADDu : Mips::ADDu, TmpReg, TmpReg, BaseReg, IDLoc, STI); TOut.emitRRI(Inst.getOpcode(), DstReg, TmpReg, LoOffset, IDLoc, STI); - } else { - assert(OffsetOp.isExpr() && "expected expression operand kind"); - const MCExpr *ExprOffset = OffsetOp.getExpr(); - MCOperand LoOperand = MCOperand::createExpr( - MipsMCExpr::create(MipsMCExpr::MEK_LO, ExprOffset, getContext())); - MCOperand HiOperand = MCOperand::createExpr( - MipsMCExpr::create(MipsMCExpr::MEK_HI, ExprOffset, getContext())); - - if (IsLoad) - TOut.emitLoadWithSymOffset(Inst.getOpcode(), DstReg, BaseReg, HiOperand, - LoOperand, TmpReg, IDLoc, STI); - else - TOut.emitStoreWithSymOffset(Inst.getOpcode(), DstReg, BaseReg, HiOperand, - LoOperand, TmpReg, IDLoc, STI); + return; } + + if (OffsetOp.isExpr()) { + if (inPicMode()) { + // FIXME: + // c) Check that immediates of R_MIPS_GOT16/R_MIPS_LO16 relocations + // do not exceed 16-bit. + // d) Use R_MIPS_GOT_PAGE/R_MIPS_GOT_OFST relocations instead + // of R_MIPS_GOT_DISP in appropriate cases to reduce number + // of GOT entries. + MCValue Res; + if (!OffsetOp.getExpr()->evaluateAsRelocatable(Res, nullptr, nullptr)) { + Error(IDLoc, "expected relocatable expression"); + return; + } + if (Res.getSymB() != nullptr) { + Error(IDLoc, "expected relocatable expression with only one symbol"); + return; + } + + loadAndAddSymbolAddress(Res.getSymA(), TmpReg, BaseReg, + !ABI.ArePtrs64bit(), IDLoc, Out, STI); + TOut.emitRRI(Inst.getOpcode(), DstReg, TmpReg, Res.getConstant(), IDLoc, + STI); + } else { + // FIXME: Implement 64-bit case. + // 1) lw $8, sym => lui $8, %hi(sym) + // lw $8, %lo(sym)($8) + // 2) sw $8, sym => lui $at, %hi(sym) + // sw $8, %lo(sym)($at) + const MCExpr *ExprOffset = OffsetOp.getExpr(); + MCOperand LoOperand = MCOperand::createExpr( + MipsMCExpr::create(MipsMCExpr::MEK_LO, ExprOffset, getContext())); + MCOperand HiOperand = MCOperand::createExpr( + MipsMCExpr::create(MipsMCExpr::MEK_HI, ExprOffset, getContext())); + + // Generate the base address in TmpReg. + TOut.emitRX(Mips::LUi, TmpReg, HiOperand, IDLoc, STI); + if (BaseReg != Mips::ZERO) + TOut.emitRRR(Mips::ADDu, TmpReg, TmpReg, BaseReg, IDLoc, STI); + // Emit the load or store with the adjusted base and offset. + TOut.emitRRX(Inst.getOpcode(), DstReg, TmpReg, LoOperand, IDLoc, STI); + } + return; + } + + llvm_unreachable("unexpected operand type"); } bool MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc, @@ -6976,7 +7084,7 @@ bool MipsAsmParser::parseSetPushDirective() { // Create a copy of the current assembler options environment and push it. AssemblerOptions.push_back( - llvm::make_unique(AssemblerOptions.back().get())); + std::make_unique(AssemblerOptions.back().get())); getTargetStreamer().emitDirectiveSetPush(); return false; diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index ef13507fe63..c3e98fe410c 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -267,6 +267,13 @@ static DecodeStatus DecodeJumpTargetMM(MCInst &Inst, uint64_t Address, const void *Decoder); +// DecodeJumpTargetXMM - Decode microMIPS jump and link exchange target, +// which is shifted left by 2 bit. +static DecodeStatus DecodeJumpTargetXMM(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeMem(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -2291,6 +2298,15 @@ static DecodeStatus DecodeJumpTargetMM(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeJumpTargetXMM(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 2; + Inst.addOperand(MCOperand::createImm(JumpOffset)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeAddiur2Simm7(MCInst &Inst, unsigned Value, uint64_t Address, diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 859f9cbbca0..70f2a7bdf10 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -304,7 +304,6 @@ Optional MipsAsmBackend::getFixupKind(StringRef Name) const { return StringSwitch>(Name) .Case("R_MIPS_NONE", FK_NONE) .Case("R_MIPS_32", FK_Data_4) - .Case("R_MIPS_GOT_PAGE", (MCFixupKind)Mips::fixup_Mips_GOT_PAGE) .Case("R_MIPS_CALL_HI16", (MCFixupKind)Mips::fixup_Mips_CALL_HI16) .Case("R_MIPS_CALL_LO16", (MCFixupKind)Mips::fixup_Mips_CALL_LO16) .Case("R_MIPS_CALL16", (MCFixupKind)Mips::fixup_Mips_CALL16) diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h index 4d7e36995ae..cca75dfc45c 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -66,9 +66,9 @@ public: /// fixupNeedsRelaxation - Target specific predicate for whether a given /// fixup requires the associated instruction to be relaxed. - bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const override { + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { // FIXME. llvm_unreachable("RelaxInstruction() unimplemented"); return false; diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index cf7bae98a27..cc3168790b9 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -219,7 +219,7 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx, const MCFixup &Fixup, bool IsPCRel) const { // Determine the type of the relocation. - unsigned Kind = (unsigned)Fixup.getKind(); + unsigned Kind = Fixup.getTargetKind(); switch (Kind) { case FK_NONE: @@ -690,6 +690,6 @@ llvm::createMipsELFObjectWriter(const Triple &TT, bool IsN32) { uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); bool IsN64 = TT.isArch64Bit() && !IsN32; bool HasRelocationAddend = TT.isArch64Bit(); - return llvm::make_unique(OSABI, HasRelocationAddend, + return std::make_unique(OSABI, HasRelocationAddend, IsN64); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 759a7fdb32b..142e9cebb79 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -485,8 +485,11 @@ getJumpOffset16OpValue(const MCInst &MI, unsigned OpNo, assert(MO.isExpr() && "getJumpOffset16OpValue expects only expressions or an immediate"); - // TODO: Push fixup. - return 0; + const MCExpr *Expr = MO.getExpr(); + Mips::Fixups FixupKind = + isMicroMips(STI) ? Mips::fixup_MICROMIPS_LO16 : Mips::fixup_Mips_LO16; + Fixups.push_back(MCFixup::create(0, Expr, MCFixupKind(FixupKind))); + return 0; } /// getJumpTargetOpValue - Return binary encoding of the jump diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h index ad5aff6552f..a84ca8ccfb2 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h @@ -10,11 +10,12 @@ #define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCNACL_H #include "llvm/MC/MCELFStreamer.h" +#include "llvm/Support/Alignment.h" namespace llvm { -// Log2 of the NaCl MIPS sandbox's instruction bundle size. -static const unsigned MIPS_NACL_BUNDLE_ALIGN = 4u; +// NaCl MIPS sandbox's instruction bundle size. +static const Align MIPS_NACL_BUNDLE_ALIGN = Align(16); bool isBasePlusOffsetMemoryAccess(unsigned Opcode, unsigned *AddrIdx, bool *IsStore = nullptr); diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index ddeec03ba78..79c47d1b650 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -143,12 +143,15 @@ public: return false; switch (Info->get(Inst.getOpcode()).OpInfo[NumOps - 1].OperandType) { case MCOI::OPERAND_UNKNOWN: - case MCOI::OPERAND_IMMEDIATE: - // jal, bal ... - Target = Inst.getOperand(NumOps - 1).getImm(); + case MCOI::OPERAND_IMMEDIATE: { + // j, jal, jalx, jals + // Absolute branch within the current 256 MB-aligned region + uint64_t Region = Addr & ~uint64_t(0xfffffff); + Target = Region + Inst.getOperand(NumOps - 1).getImm(); return true; + } case MCOI::OPERAND_PCREL: - // b, j, beq ... + // b, beq ... Target = Addr + Inst.getOperand(NumOps - 1).getImm(); return true; default: diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp index c050db8a17f..2d53750ad0e 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp @@ -270,7 +270,7 @@ MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, S->getAssembler().setRelaxAll(true); // Set bundle-alignment as required by the NaCl ABI for the target. - S->EmitBundleAlignMode(MIPS_NACL_BUNDLE_ALIGN); + S->EmitBundleAlignMode(Log2(MIPS_NACL_BUNDLE_ALIGN)); return S; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp index b4ebb9d18b7..3ff9c722484 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp @@ -37,7 +37,7 @@ void MipsRegInfoRecord::EmitMipsOptionRecord() { Context.getELFSection(".MIPS.options", ELF::SHT_MIPS_OPTIONS, ELF::SHF_ALLOC | ELF::SHF_MIPS_NOSTRIP, 1, ""); MCA.registerSection(*Sec); - Sec->setAlignment(8); + Sec->setAlignment(Align(8)); Streamer->SwitchSection(Sec); Streamer->EmitIntValue(ELF::ODK_REGINFO, 1); // kind @@ -55,7 +55,7 @@ void MipsRegInfoRecord::EmitMipsOptionRecord() { MCSectionELF *Sec = Context.getELFSection(".reginfo", ELF::SHT_MIPS_REGINFO, ELF::SHF_ALLOC, 24, ""); MCA.registerSection(*Sec); - Sec->setAlignment(MTS->getABI().IsN32() ? 8 : 4); + Sec->setAlignment(MTS->getABI().IsN32() ? Align(8) : Align(4)); Streamer->SwitchSection(Sec); Streamer->EmitIntValue(ri_gprmask, 4); diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index e3bdb3b140a..b6dae9f6dea 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -34,6 +34,15 @@ static cl::opt RoundSectionSizes( cl::desc("Round section sizes up to the section alignment"), cl::Hidden); } // end anonymous namespace +static bool isMipsR6(const MCSubtargetInfo *STI) { + return STI->getFeatureBits()[Mips::FeatureMips32r6] || + STI->getFeatureBits()[Mips::FeatureMips64r6]; +} + +static bool isMicroMips(const MCSubtargetInfo *STI) { + return STI->getFeatureBits()[Mips::FeatureMicroMips]; +} + MipsTargetStreamer::MipsTargetStreamer(MCStreamer &S) : MCTargetStreamer(S), GPReg(Mips::GP), ModuleDirectiveAllowed(true) { GPRInfoSet = FPRInfoSet = FrameInfoSet = false; @@ -216,6 +225,19 @@ void MipsTargetStreamer::emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, emitRRX(Opcode, Reg0, Reg1, MCOperand::createReg(Reg2), IDLoc, STI); } +void MipsTargetStreamer::emitRRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1, + unsigned Reg2, MCOperand Op3, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(MCOperand::createReg(Reg0)); + TmpInst.addOperand(MCOperand::createReg(Reg1)); + TmpInst.addOperand(MCOperand::createReg(Reg2)); + TmpInst.addOperand(Op3); + TmpInst.setLoc(IDLoc); + getStreamer().EmitInstruction(TmpInst, *STI); +} + void MipsTargetStreamer::emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm, SMLoc IDLoc, const MCSubtargetInfo *STI) { @@ -264,8 +286,7 @@ void MipsTargetStreamer::emitEmptyDelaySlot(bool hasShortDelaySlot, SMLoc IDLoc, } void MipsTargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) { - const FeatureBitset &Features = STI->getFeatureBits(); - if (Features[Mips::FeatureMicroMips]) + if (isMicroMips(STI)) emitRR(Mips::MOVE16_MM, Mips::ZERO, Mips::ZERO, IDLoc, STI); else emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI); @@ -311,21 +332,34 @@ void MipsTargetStreamer::emitStoreWithImmOffset( emitRRI(Opcode, SrcReg, ATReg, LoOffset, IDLoc, STI); } -/// Emit a store instruction with an symbol offset. Symbols are assumed to be -/// out of range for a simm16 will be expanded to appropriate instructions. -void MipsTargetStreamer::emitStoreWithSymOffset( - unsigned Opcode, unsigned SrcReg, unsigned BaseReg, MCOperand &HiOperand, - MCOperand &LoOperand, unsigned ATReg, SMLoc IDLoc, - const MCSubtargetInfo *STI) { - // sw $8, sym => lui $at, %hi(sym) - // sw $8, %lo(sym)($at) +/// Emit a store instruction with an symbol offset. +void MipsTargetStreamer::emitSCWithSymOffset(unsigned Opcode, unsigned SrcReg, + unsigned BaseReg, + MCOperand &HiOperand, + MCOperand &LoOperand, + unsigned ATReg, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + // sc $8, sym => lui $at, %hi(sym) + // sc $8, %lo(sym)($at) // Generate the base address in ATReg. emitRX(Mips::LUi, ATReg, HiOperand, IDLoc, STI); - if (BaseReg != Mips::ZERO) - emitRRR(Mips::ADDu, ATReg, ATReg, BaseReg, IDLoc, STI); - // Emit the store with the adjusted base and offset. - emitRRX(Opcode, SrcReg, ATReg, LoOperand, IDLoc, STI); + if (!isMicroMips(STI) && isMipsR6(STI)) { + // For non-micromips r6 offset for 'sc' is not in the lower 16 bits so we + // put it in 'at'. + // sc $8, sym => lui $at, %hi(sym) + // addiu $at, $at, %lo(sym) + // sc $8, 0($at) + emitRRX(Mips::ADDiu, ATReg, ATReg, LoOperand, IDLoc, STI); + MCOperand Offset = MCOperand::createImm(0); + // Emit the store with the adjusted base and offset. + emitRRRX(Opcode, SrcReg, SrcReg, ATReg, Offset, IDLoc, STI); + } else { + if (BaseReg != Mips::ZERO) + emitRRR(Mips::ADDu, ATReg, ATReg, BaseReg, IDLoc, STI); + // Emit the store with the adjusted base and offset. + emitRRRX(Opcode, SrcReg, SrcReg, ATReg, LoOperand, IDLoc, STI); + } } /// Emit a load instruction with an immediate offset. DstReg and TmpReg are @@ -364,30 +398,6 @@ void MipsTargetStreamer::emitLoadWithImmOffset(unsigned Opcode, unsigned DstReg, emitRRI(Opcode, DstReg, TmpReg, LoOffset, IDLoc, STI); } -/// Emit a load instruction with an symbol offset. Symbols are assumed to be -/// out of range for a simm16 will be expanded to appropriate instructions. -/// DstReg and TmpReg are permitted to be the same register iff DstReg is a -/// GPR. It is the callers responsibility to identify such cases and pass the -/// appropriate register in TmpReg. -void MipsTargetStreamer::emitLoadWithSymOffset(unsigned Opcode, unsigned DstReg, - unsigned BaseReg, - MCOperand &HiOperand, - MCOperand &LoOperand, - unsigned TmpReg, SMLoc IDLoc, - const MCSubtargetInfo *STI) { - // 1) lw $8, sym => lui $8, %hi(sym) - // lw $8, %lo(sym)($8) - // 2) ldc1 $f0, sym => lui $at, %hi(sym) - // ldc1 $f0, %lo(sym)($at) - - // Generate the base address in TmpReg. - emitRX(Mips::LUi, TmpReg, HiOperand, IDLoc, STI); - if (BaseReg != Mips::ZERO) - emitRRR(Mips::ADDu, TmpReg, TmpReg, BaseReg, IDLoc, STI); - // Emit the load with the adjusted base and offset. - emitRRX(Opcode, DstReg, TmpReg, LoOperand, IDLoc, STI); -} - MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS) : MipsTargetStreamer(S), OS(OS) {} @@ -891,9 +901,9 @@ void MipsTargetELFStreamer::finish() { MCSection &BSSSection = *OFI.getBSSSection(); MCA.registerSection(BSSSection); - TextSection.setAlignment(std::max(16u, TextSection.getAlignment())); - DataSection.setAlignment(std::max(16u, DataSection.getAlignment())); - BSSSection.setAlignment(std::max(16u, BSSSection.getAlignment())); + TextSection.setAlignment(Align(std::max(16u, TextSection.getAlignment()))); + DataSection.setAlignment(Align(std::max(16u, DataSection.getAlignment()))); + BSSSection.setAlignment(Align(std::max(16u, BSSSection.getAlignment()))); if (RoundSectionSizes) { // Make sections sizes a multiple of the alignment. This is useful for @@ -1016,7 +1026,7 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) { MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Context); MCA.registerSection(*Sec); - Sec->setAlignment(4); + Sec->setAlignment(Align(4)); OS.PushSection(); @@ -1306,7 +1316,7 @@ void MipsTargetELFStreamer::emitMipsAbiFlags() { MCSectionELF *Sec = Context.getELFSection( ".MIPS.abiflags", ELF::SHT_MIPS_ABIFLAGS, ELF::SHF_ALLOC, 24, ""); MCA.registerSection(*Sec); - Sec->setAlignment(8); + Sec->setAlignment(Align(8)); OS.SwitchSection(Sec); OS << ABIFlagsSection; diff --git a/lib/Target/Mips/MicroMipsDSPInstrInfo.td b/lib/Target/Mips/MicroMipsDSPInstrInfo.td index 5a12568893a..9a1e47e5ecc 100644 --- a/lib/Target/Mips/MicroMipsDSPInstrInfo.td +++ b/lib/Target/Mips/MicroMipsDSPInstrInfo.td @@ -360,7 +360,7 @@ class RDDSP_MM_DESC { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins uimm7:$mask); string AsmString = !strconcat("rddsp", "\t$rt, $mask"); - list Pattern = [(set GPR32Opnd:$rt, (int_mips_rddsp immZExt7:$mask))]; + list Pattern = [(set GPR32Opnd:$rt, (int_mips_rddsp timmZExt7:$mask))]; InstrItinClass Itinerary = NoItinerary; } @@ -383,7 +383,7 @@ class WRDSP_MM_DESC { dag OutOperandList = (outs); dag InOperandList = (ins GPR32Opnd:$rt, uimm7:$mask); string AsmString = !strconcat("wrdsp", "\t$rt, $mask"); - list Pattern = [(int_mips_wrdsp GPR32Opnd:$rt, immZExt7:$mask)]; + list Pattern = [(int_mips_wrdsp GPR32Opnd:$rt, timmZExt7:$mask)]; InstrItinClass Itinerary = NoItinerary; bit isMoveReg = 1; } diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index 9b7f7b25fa9..8cc0029fc89 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -955,17 +955,18 @@ let DecoderNamespace = "MicroMips" in { EXT_FM_MM<0x0c>, ISA_MICROMIPS32_NOT_MIPS32R6; /// Jump Instructions - let DecoderMethod = "DecodeJumpTargetMM" in + let DecoderMethod = "DecodeJumpTargetMM" in { def J_MM : MMRel, JumpFJ, J_FM_MM<0x35>, AdditionalRequires<[RelocNotPIC]>, IsBranch, ISA_MICROMIPS32_NOT_MIPS32R6; - - let DecoderMethod = "DecodeJumpTargetMM" in { def JAL_MM : MMRel, JumpLink<"jal", calltarget_mm>, J_FM_MM<0x3d>, ISA_MICROMIPS32_NOT_MIPS32R6; + } + + let DecoderMethod = "DecodeJumpTargetXMM" in def JALX_MM : MMRel, JumpLink<"jalx", calltarget>, J_FM_MM<0x3c>, ISA_MICROMIPS32_NOT_MIPS32R6; - } + def JR_MM : MMRel, IndirectBranch<"jr", GPR32Opnd>, JR_FM_MM<0x3c>, ISA_MICROMIPS32_NOT_MIPS32R6; def JALR_MM : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM_MM<0x03c>, diff --git a/lib/Target/Mips/MicroMipsSizeReduction.cpp b/lib/Target/Mips/MicroMipsSizeReduction.cpp index 70af95592aa..db93b3d80ed 100644 --- a/lib/Target/Mips/MicroMipsSizeReduction.cpp +++ b/lib/Target/Mips/MicroMipsSizeReduction.cpp @@ -361,7 +361,7 @@ static bool CheckXWPInstr(MachineInstr *MI, bool ReduceToLwp, MI->getOpcode() == Mips::SW16_MM)) return false; - unsigned reg = MI->getOperand(0).getReg(); + Register reg = MI->getOperand(0).getReg(); if (reg == Mips::RA) return false; @@ -403,8 +403,8 @@ static bool ConsecutiveInstr(MachineInstr *MI1, MachineInstr *MI2) { if (!GetImm(MI2, 2, Offset2)) return false; - unsigned Reg1 = MI1->getOperand(0).getReg(); - unsigned Reg2 = MI2->getOperand(0).getReg(); + Register Reg1 = MI1->getOperand(0).getReg(); + Register Reg2 = MI2->getOperand(0).getReg(); return ((Offset1 == (Offset2 - 4)) && (ConsecutiveRegisters(Reg1, Reg2))); } @@ -475,8 +475,8 @@ bool MicroMipsSizeReduce::ReduceXWtoXWP(ReduceEntryFunArgs *Arguments) { if (!CheckXWPInstr(MI2, ReduceToLwp, Entry)) return false; - unsigned Reg1 = MI1->getOperand(1).getReg(); - unsigned Reg2 = MI2->getOperand(1).getReg(); + Register Reg1 = MI1->getOperand(1).getReg(); + Register Reg2 = MI2->getOperand(1).getReg(); if (Reg1 != Reg2) return false; @@ -621,8 +621,8 @@ bool MicroMipsSizeReduce::ReduceMoveToMovep(ReduceEntryFunArgs *Arguments) { MachineInstr *MI1 = Arguments->MI; MachineInstr *MI2 = &*NextMII; - unsigned RegDstMI1 = MI1->getOperand(0).getReg(); - unsigned RegSrcMI1 = MI1->getOperand(1).getReg(); + Register RegDstMI1 = MI1->getOperand(0).getReg(); + Register RegSrcMI1 = MI1->getOperand(1).getReg(); if (!IsMovepSrcRegister(RegSrcMI1)) return false; @@ -633,8 +633,8 @@ bool MicroMipsSizeReduce::ReduceMoveToMovep(ReduceEntryFunArgs *Arguments) { if (MI2->getOpcode() != Entry.WideOpc()) return false; - unsigned RegDstMI2 = MI2->getOperand(0).getReg(); - unsigned RegSrcMI2 = MI2->getOperand(1).getReg(); + Register RegDstMI2 = MI2->getOperand(0).getReg(); + Register RegSrcMI2 = MI2->getOperand(1).getReg(); if (!IsMovepSrcRegister(RegSrcMI2)) return false; diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 7b83ea8535a..a5908362e81 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -25,6 +25,8 @@ class PredicateControl { list GPRPredicates = []; // Predicates for the PTR size such as IsPTR64bit list PTRPredicates = []; + // Predicates for a symbol's size such as hasSym32. + list SYMPredicates = []; // Predicates for the FGR size and layout such as IsFP64bit list FGRPredicates = []; // Predicates for the instruction group membership such as ISA's. @@ -38,6 +40,7 @@ class PredicateControl { list Predicates = !listconcat(EncodingPredicates, GPRPredicates, PTRPredicates, + SYMPredicates, FGRPredicates, InsnPredicates, HardFloatPredicate, @@ -206,6 +209,9 @@ def FeatureMT : SubtargetFeature<"mt", "HasMT", "true", "Mips MT ASE">; def FeatureLongCalls : SubtargetFeature<"long-calls", "UseLongCalls", "true", "Disable use of the jal instruction">; +def FeatureXGOT + : SubtargetFeature<"xgot", "UseXGOT", "true", "Assume 32-bit GOT">; + def FeatureUseIndirectJumpsHazard : SubtargetFeature<"use-indirect-jump-hazard", "UseIndirectJumpsHazard", "true", "Use indirect jump" @@ -257,3 +263,9 @@ def Mips : Target { let AssemblyParserVariants = [MipsAsmParserVariant]; let AllowRegisterRenaming = 1; } + +//===----------------------------------------------------------------------===// +// Pfm Counters +//===----------------------------------------------------------------------===// + +include "MipsPfmCounters.td" diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp index 3ab4f1e064d..768d54fc9c2 100644 --- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp @@ -72,7 +72,7 @@ void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); DebugLoc DL; - unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg(); + Register V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg(); const TargetRegisterClass *RC = &Mips::CPU16RegsRegClass; V0 = RegInfo.createVirtualRegister(RC); diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp index 6d8e5aef2a3..5a5b78c9d5f 100644 --- a/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -708,8 +708,8 @@ Mips16TargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, if (DontExpandCondPseudos16) return BB; const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - unsigned regX = MI.getOperand(0).getReg(); - unsigned regY = MI.getOperand(1).getReg(); + Register regX = MI.getOperand(0).getReg(); + Register regY = MI.getOperand(1).getReg(); MachineBasicBlock *target = MI.getOperand(2).getMBB(); BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(CmpOpc)) .addReg(regX) @@ -725,7 +725,7 @@ MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins( if (DontExpandCondPseudos16) return BB; const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - unsigned regX = MI.getOperand(0).getReg(); + Register regX = MI.getOperand(0).getReg(); int64_t imm = MI.getOperand(1).getImm(); MachineBasicBlock *target = MI.getOperand(2).getMBB(); unsigned CmpOpc; @@ -758,9 +758,9 @@ Mips16TargetLowering::emitFEXT_CCRX16_ins(unsigned SltOpc, MachineInstr &MI, if (DontExpandCondPseudos16) return BB; const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - unsigned CC = MI.getOperand(0).getReg(); - unsigned regX = MI.getOperand(1).getReg(); - unsigned regY = MI.getOperand(2).getReg(); + Register CC = MI.getOperand(0).getReg(); + Register regX = MI.getOperand(1).getReg(); + Register regY = MI.getOperand(2).getReg(); BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SltOpc)) .addReg(regX) .addReg(regY); @@ -777,8 +777,8 @@ Mips16TargetLowering::emitFEXT_CCRXI16_ins(unsigned SltiOpc, unsigned SltiXOpc, if (DontExpandCondPseudos16) return BB; const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - unsigned CC = MI.getOperand(0).getReg(); - unsigned regX = MI.getOperand(1).getReg(); + Register CC = MI.getOperand(0).getReg(); + Register regX = MI.getOperand(1).getReg(); int64_t Imm = MI.getOperand(2).getImm(); unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm); BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SltOpc)).addReg(regX).addImm(Imm); diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index c234c309d76..0d735c20ec2 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -358,7 +358,7 @@ unsigned Mips16InstrInfo::loadImmediate(unsigned FrameReg, int64_t Imm, for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) { MachineOperand &MO = II->getOperand(i); if (MO.isReg() && MO.getReg() != 0 && !MO.isDef() && - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + !Register::isVirtualRegister(MO.getReg())) Candidates.reset(MO.getReg()); } diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 7f35280f793..cc15949b0d5 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -16,6 +16,7 @@ // shamt must fit in 6 bits. def immZExt6 : ImmLeaf; +def timmZExt6 : TImmLeaf; // Node immediate fits as 10-bit sign extended on target immediate. // e.g. seqi, snei @@ -651,6 +652,7 @@ def : MipsPat<(MipsTlsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>, let AdditionalPredicates = [NotInMicroMips] in { def : MipsPat<(MipsJmpLink (i64 texternalsym:$dst)), (JAL texternalsym:$dst)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsHighest (i64 tglobaladdr:$in)), (LUi64 tglobaladdr:$in)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(MipsHighest (i64 tblockaddress:$in)), @@ -682,6 +684,20 @@ let AdditionalPredicates = [NotInMicroMips] in { (DADDiu GPR64:$hi, tjumptable:$lo)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tconstpool:$lo))), (DADDiu GPR64:$hi, tconstpool:$lo)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 texternalsym:$lo))), + (DADDiu GPR64:$hi, texternalsym:$lo)>, + ISA_MIPS3, GPR_64, SYM_64; + + def : MipsPat<(MipsHi (i64 tglobaladdr:$in)), + (DADDiu ZERO_64, tglobaladdr:$in)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsHi (i64 tblockaddress:$in)), + (DADDiu ZERO_64, tblockaddress:$in)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsHi (i64 tjumptable:$in)), + (DADDiu ZERO_64, tjumptable:$in)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsHi (i64 tconstpool:$in)), + (DADDiu ZERO_64, tconstpool:$in)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsHi (i64 texternalsym:$in)), + (DADDiu ZERO_64, texternalsym:$in)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tglobaladdr:$lo))), (DADDiu GPR64:$hi, tglobaladdr:$lo)>, ISA_MIPS3, GPR_64, SYM_64; @@ -692,6 +708,23 @@ let AdditionalPredicates = [NotInMicroMips] in { (DADDiu GPR64:$hi, tjumptable:$lo)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tconstpool:$lo))), (DADDiu GPR64:$hi, tconstpool:$lo)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(add GPR64:$hi, (MipsHi (i64 texternalsym:$lo))), + (DADDiu GPR64:$hi, texternalsym:$lo)>, + ISA_MIPS3, GPR_64, SYM_64; + + def : MipsPat<(MipsLo (i64 tglobaladdr:$in)), + (DADDiu ZERO_64, tglobaladdr:$in)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsLo (i64 tblockaddress:$in)), + (DADDiu ZERO_64, tblockaddress:$in)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsLo (i64 tjumptable:$in)), + (DADDiu ZERO_64, tjumptable:$in)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsLo (i64 tconstpool:$in)), + (DADDiu ZERO_64, tconstpool:$in)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsLo (i64 tglobaltlsaddr:$in)), + (DADDiu ZERO_64, tglobaltlsaddr:$in)>, + ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(MipsLo (i64 texternalsym:$in)), + (DADDiu ZERO_64, texternalsym:$in)>, ISA_MIPS3, GPR_64, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaladdr:$lo))), (DADDiu GPR64:$hi, tglobaladdr:$lo)>, ISA_MIPS3, GPR_64, SYM_64; @@ -705,6 +738,9 @@ let AdditionalPredicates = [NotInMicroMips] in { def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaltlsaddr:$lo))), (DADDiu GPR64:$hi, tglobaltlsaddr:$lo)>, ISA_MIPS3, GPR_64, SYM_64; + def : MipsPat<(add GPR64:$hi, (MipsLo (i64 texternalsym:$lo))), + (DADDiu GPR64:$hi, texternalsym:$lo)>, + ISA_MIPS3, GPR_64, SYM_64; } // gp_rel relocs diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index db83fe49cec..2201545adc9 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -56,6 +56,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include #include @@ -376,7 +377,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() { void MipsAsmPrinter::emitFrameDirective() { const TargetRegisterInfo &RI = *MF->getSubtarget().getRegisterInfo(); - unsigned stackReg = RI.getFrameRegister(*MF); + Register stackReg = RI.getFrameRegister(*MF); unsigned returnReg = RI.getRARegister(); unsigned stackSize = MF->getFrameInfo().getStackSize(); @@ -571,7 +572,7 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, // for 2 for 32 bit mode and 1 for 64 bit mode. if (NumVals != 2) { if (Subtarget->isGP64bit() && NumVals == 1 && MO.isReg()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); O << '$' << MipsInstPrinter::getRegisterName(Reg); return false; } @@ -597,7 +598,7 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, const MachineOperand &MO = MI->getOperand(RegOp); if (!MO.isReg()) return true; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); O << '$' << MipsInstPrinter::getRegisterName(Reg); return false; } @@ -780,7 +781,7 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { StringRef CPU = MIPS_MC::selectMipsCPU(TT, TM.getTargetCPU()); StringRef FS = TM.getTargetFeatureString(); const MipsTargetMachine &MTM = static_cast(TM); - const MipsSubtarget STI(TT, CPU, FS, MTM.isLittleEndian(), MTM, 0); + const MipsSubtarget STI(TT, CPU, FS, MTM.isLittleEndian(), MTM, None); bool IsABICalls = STI.isABICalls(); const MipsABIInfo &ABI = MTM.getABI(); @@ -821,6 +822,9 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { // option has changed the default (i.e. FPXX) and omit it otherwise. if (ABI.IsO32() && (!STI.useOddSPReg() || STI.isABI_FPXX())) TS.emitDirectiveModuleOddSPReg(); + + // Switch to the .text section. + OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); } void MipsAsmPrinter::emitInlineAsmStart() const { diff --git a/lib/Target/Mips/MipsCallLowering.cpp b/lib/Target/Mips/MipsCallLowering.cpp index da65689ecff..cad82953af5 100644 --- a/lib/Target/Mips/MipsCallLowering.cpp +++ b/lib/Target/Mips/MipsCallLowering.cpp @@ -106,6 +106,7 @@ private: Register ArgsReg, const EVT &VT) override; virtual void markPhysRegUsed(unsigned PhysReg) { + MIRBuilder.getMRI()->addLiveIn(PhysReg); MIRBuilder.getMBB().addLiveIn(PhysReg); } @@ -357,7 +358,7 @@ bool OutgoingValueHandler::handleSplit(SmallVectorImpl &VRegs, return true; } -static bool isSupportedType(Type *T) { +static bool isSupportedArgumentType(Type *T) { if (T->isIntegerTy()) return true; if (T->isPointerTy()) @@ -367,6 +368,18 @@ static bool isSupportedType(Type *T) { return false; } +static bool isSupportedReturnType(Type *T) { + if (T->isIntegerTy()) + return true; + if (T->isPointerTy()) + return true; + if (T->isFloatingPointTy()) + return true; + if (T->isAggregateType()) + return true; + return false; +} + static CCValAssign::LocInfo determineLocInfo(const MVT RegisterVT, const EVT VT, const ISD::ArgFlagsTy &Flags) { // > does not mean loss of information as type RegisterVT can't hold type VT, @@ -403,7 +416,7 @@ bool MipsCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, MachineInstrBuilder Ret = MIRBuilder.buildInstrNoInsert(Mips::RetRA); - if (Val != nullptr && !isSupportedType(Val->getType())) + if (Val != nullptr && !isSupportedReturnType(Val->getType())) return false; if (!VRegs.empty()) { @@ -411,21 +424,13 @@ bool MipsCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Function &F = MF.getFunction(); const DataLayout &DL = MF.getDataLayout(); const MipsTargetLowering &TLI = *getTLI(); - LLVMContext &Ctx = Val->getType()->getContext(); - - SmallVector SplitEVTs; - ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs); - assert(VRegs.size() == SplitEVTs.size() && - "For each split Type there should be exactly one VReg."); SmallVector RetInfos; SmallVector OrigArgIndices; - for (unsigned i = 0; i < SplitEVTs.size(); ++i) { - ArgInfo CurArgInfo = ArgInfo{VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx)}; - setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); - splitToValueTypes(CurArgInfo, 0, RetInfos, OrigArgIndices); - } + ArgInfo ArgRetInfo(VRegs, Val->getType()); + setArgFlags(ArgRetInfo, AttributeList::ReturnIndex, DL, F); + splitToValueTypes(DL, ArgRetInfo, 0, RetInfos, OrigArgIndices); SmallVector Outs; subTargetRegTypeForCallingConv(F, RetInfos, OrigArgIndices, Outs); @@ -453,12 +458,8 @@ bool MipsCallLowering::lowerFormalArguments( if (F.arg_empty()) return true; - if (F.isVarArg()) { - return false; - } - for (auto &Arg : F.args()) { - if (!isSupportedType(Arg.getType())) + if (!isSupportedArgumentType(Arg.getType())) return false; } @@ -472,7 +473,8 @@ bool MipsCallLowering::lowerFormalArguments( for (auto &Arg : F.args()) { ArgInfo AInfo(VRegs[i], Arg.getType()); setArgFlags(AInfo, i + AttributeList::FirstArgIndex, DL, F); - splitToValueTypes(AInfo, i, ArgInfos, OrigArgIndices); + ArgInfos.push_back(AInfo); + OrigArgIndices.push_back(i); ++i; } @@ -495,30 +497,64 @@ bool MipsCallLowering::lowerFormalArguments( if (!Handler.handle(ArgLocs, ArgInfos)) return false; + if (F.isVarArg()) { + ArrayRef ArgRegs = ABI.GetVarArgRegs(); + unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); + + int VaArgOffset; + unsigned RegSize = 4; + if (ArgRegs.size() == Idx) + VaArgOffset = alignTo(CCInfo.getNextStackOffset(), RegSize); + else { + VaArgOffset = + (int)ABI.GetCalleeAllocdArgSizeInBytes(CCInfo.getCallingConv()) - + (int)(RegSize * (ArgRegs.size() - Idx)); + } + + MachineFrameInfo &MFI = MF.getFrameInfo(); + int FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true); + MF.getInfo()->setVarArgsFrameIndex(FI); + + for (unsigned I = Idx; I < ArgRegs.size(); ++I, VaArgOffset += RegSize) { + MIRBuilder.getMBB().addLiveIn(ArgRegs[I]); + + MachineInstrBuilder Copy = + MIRBuilder.buildCopy(LLT::scalar(RegSize * 8), Register(ArgRegs[I])); + FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true); + MachinePointerInfo MPO = MachinePointerInfo::getFixedStack(MF, FI); + MachineInstrBuilder FrameIndex = + MIRBuilder.buildFrameIndex(LLT::pointer(MPO.getAddrSpace(), 32), FI); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, RegSize, + /* Alignment */ RegSize); + MIRBuilder.buildStore(Copy, FrameIndex, *MMO); + } + } + return true; } bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, - CallingConv::ID CallConv, - const MachineOperand &Callee, - const ArgInfo &OrigRet, - ArrayRef OrigArgs) const { + CallLoweringInfo &Info) const { - if (CallConv != CallingConv::C) + if (Info.CallConv != CallingConv::C) return false; - for (auto &Arg : OrigArgs) { - if (!isSupportedType(Arg.Ty)) + for (auto &Arg : Info.OrigArgs) { + if (!isSupportedArgumentType(Arg.Ty)) return false; - if (Arg.Flags.isByVal() || Arg.Flags.isSRet()) + if (Arg.Flags[0].isByVal()) + return false; + if (Arg.Flags[0].isSRet() && !Arg.Ty->isPointerTy()) return false; } - if (OrigRet.Regs[0] && !isSupportedType(OrigRet.Ty)) + if (!Info.OrigRet.Ty->isVoidTy() && !isSupportedReturnType(Info.OrigRet.Ty)) return false; MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); + const DataLayout &DL = MF.getDataLayout(); const MipsTargetLowering &TLI = *getTLI(); const MipsTargetMachine &TM = static_cast(MF.getTarget()); @@ -528,37 +564,38 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, MIRBuilder.buildInstr(Mips::ADJCALLSTACKDOWN); const bool IsCalleeGlobalPIC = - Callee.isGlobal() && TM.isPositionIndependent(); + Info.Callee.isGlobal() && TM.isPositionIndependent(); MachineInstrBuilder MIB = MIRBuilder.buildInstrNoInsert( - Callee.isReg() || IsCalleeGlobalPIC ? Mips::JALRPseudo : Mips::JAL); + Info.Callee.isReg() || IsCalleeGlobalPIC ? Mips::JALRPseudo : Mips::JAL); MIB.addDef(Mips::SP, RegState::Implicit); if (IsCalleeGlobalPIC) { Register CalleeReg = MF.getRegInfo().createGenericVirtualRegister(LLT::pointer(0, 32)); MachineInstr *CalleeGlobalValue = - MIRBuilder.buildGlobalValue(CalleeReg, Callee.getGlobal()); - if (!Callee.getGlobal()->hasLocalLinkage()) + MIRBuilder.buildGlobalValue(CalleeReg, Info.Callee.getGlobal()); + if (!Info.Callee.getGlobal()->hasLocalLinkage()) CalleeGlobalValue->getOperand(1).setTargetFlags(MipsII::MO_GOT_CALL); MIB.addUse(CalleeReg); } else - MIB.add(Callee); + MIB.add(Info.Callee); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); MIB.addRegMask(TRI->getCallPreservedMask(MF, F.getCallingConv())); TargetLowering::ArgListTy FuncOrigArgs; - FuncOrigArgs.reserve(OrigArgs.size()); + FuncOrigArgs.reserve(Info.OrigArgs.size()); SmallVector ArgInfos; SmallVector OrigArgIndices; unsigned i = 0; - for (auto &Arg : OrigArgs) { + for (auto &Arg : Info.OrigArgs) { TargetLowering::ArgListEntry Entry; Entry.Ty = Arg.Ty; FuncOrigArgs.push_back(Entry); - splitToValueTypes(Arg, i, ArgInfos, OrigArgIndices); + ArgInfos.push_back(Arg); + OrigArgIndices.push_back(i); ++i; } @@ -566,11 +603,17 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, subTargetRegTypeForCallingConv(F, ArgInfos, OrigArgIndices, Outs); SmallVector ArgLocs; - MipsCCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, + bool IsCalleeVarArg = false; + if (Info.Callee.isGlobal()) { + const Function *CF = static_cast(Info.Callee.getGlobal()); + IsCalleeVarArg = CF->isVarArg(); + } + MipsCCState CCInfo(F.getCallingConv(), IsCalleeVarArg, MF, ArgLocs, F.getContext()); - CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1); - const char *Call = Callee.isSymbol() ? Callee.getSymbolName() : nullptr; + CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(Info.CallConv), 1); + const char *Call = + Info.Callee.isSymbol() ? Info.Callee.getSymbolName() : nullptr; CCInfo.AnalyzeCallOperands(Outs, TLI.CCAssignFnForCall(), FuncOrigArgs, Call); setLocInfo(ArgLocs, Outs); @@ -599,11 +642,11 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, *STI.getRegBankInfo()); } - if (OrigRet.Regs[0]) { + if (!Info.OrigRet.Ty->isVoidTy()) { ArgInfos.clear(); SmallVector OrigRetIndices; - splitToValueTypes(OrigRet, 0, ArgInfos, OrigRetIndices); + splitToValueTypes(DL, Info.OrigRet, 0, ArgInfos, OrigRetIndices); SmallVector Ins; subTargetRegTypeForCallingConv(F, ArgInfos, OrigRetIndices, Ins); @@ -612,7 +655,7 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, MipsCCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); - CCInfo.AnalyzeCallResult(Ins, TLI.CCAssignFnForReturn(), OrigRet.Ty, Call); + CCInfo.AnalyzeCallResult(Ins, TLI.CCAssignFnForReturn(), Info.OrigRet.Ty, Call); setLocInfo(ArgLocs, Ins); CallReturnHandler Handler(MIRBuilder, MF.getRegInfo(), MIB); @@ -642,12 +685,12 @@ void MipsCallLowering::subTargetRegTypeForCallingConv( F.getContext(), F.getCallingConv(), VT); for (unsigned i = 0; i < NumRegs; ++i) { - ISD::ArgFlagsTy Flags = Arg.Flags; + ISD::ArgFlagsTy Flags = Arg.Flags[0]; if (i == 0) Flags.setOrigAlign(TLI.getABIAlignmentForCallingConv(Arg.Ty, DL)); else - Flags.setOrigAlign(1); + Flags.setOrigAlign(Align::None()); ISDArgs.emplace_back(Flags, RegisterVT, VT, true, OrigArgIndices[ArgNo], 0); @@ -657,12 +700,21 @@ void MipsCallLowering::subTargetRegTypeForCallingConv( } void MipsCallLowering::splitToValueTypes( - const ArgInfo &OrigArg, unsigned OriginalIndex, + const DataLayout &DL, const ArgInfo &OrigArg, unsigned OriginalIndex, SmallVectorImpl &SplitArgs, SmallVectorImpl &SplitArgsOrigIndices) const { - // TODO : perform structure and array split. For now we only deal with - // types that pass isSupportedType check. - SplitArgs.push_back(OrigArg); - SplitArgsOrigIndices.push_back(OriginalIndex); + SmallVector SplitEVTs; + SmallVector SplitVRegs; + const MipsTargetLowering &TLI = *getTLI(); + LLVMContext &Ctx = OrigArg.Ty->getContext(); + + ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitEVTs); + + for (unsigned i = 0; i < SplitEVTs.size(); ++i) { + ArgInfo Info = ArgInfo{OrigArg.Regs[i], SplitEVTs[i].getTypeForEVT(Ctx)}; + Info.Flags = OrigArg.Flags; + SplitArgs.push_back(Info); + SplitArgsOrigIndices.push_back(OriginalIndex); + } } diff --git a/lib/Target/Mips/MipsCallLowering.h b/lib/Target/Mips/MipsCallLowering.h index 11c2d53ad35..a284cf5e26c 100644 --- a/lib/Target/Mips/MipsCallLowering.h +++ b/lib/Target/Mips/MipsCallLowering.h @@ -68,9 +68,8 @@ public: bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef> VRegs) const override; - bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, - const MachineOperand &Callee, const ArgInfo &OrigRet, - ArrayRef OrigArgs) const override; + bool lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const override; private: /// Based on registers available on target machine split or extend @@ -83,7 +82,8 @@ private: /// Split structures and arrays, save original argument indices since /// Mips calling convention needs info about original argument type. - void splitToValueTypes(const ArgInfo &OrigArg, unsigned OriginalIndex, + void splitToValueTypes(const DataLayout &DL, const ArgInfo &OrigArg, + unsigned OriginalIndex, SmallVectorImpl &SplitArgs, SmallVectorImpl &SplitArgsOrigIndices) const; }; diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp index eea28df7eda..f5064052173 100644 --- a/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -222,12 +222,7 @@ namespace { BasicBlockInfo() = default; - // FIXME: ignore LogAlign for this patch - // - unsigned postOffset(unsigned LogAlign = 0) const { - unsigned PO = Offset + Size; - return PO; - } + unsigned postOffset() const { return Offset + Size; } }; std::vector BBInfo; @@ -376,7 +371,7 @@ namespace { void doInitialPlacement(std::vector &CPEMIs); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); - unsigned getCPELogAlign(const MachineInstr &CPEMI); + Align getCPEAlign(const MachineInstr &CPEMI); void initializeFunctionInfo(const std::vector &CPEMIs); unsigned getOffsetOf(MachineInstr *MI) const; unsigned getUserOffset(CPUser&) const; @@ -534,11 +529,11 @@ MipsConstantIslands::doInitialPlacement(std::vector &CPEMIs) { MF->push_back(BB); // MachineConstantPool measures alignment in bytes. We measure in log2(bytes). - unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment()); + const Align MaxAlign(MCP->getConstantPoolAlignment()); // Mark the basic block as required by the const-pool. // If AlignConstantIslands isn't set, use 4-byte alignment for everything. - BB->setAlignment(AlignConstantIslands ? MaxAlign : 2); + BB->setAlignment(AlignConstantIslands ? MaxAlign : Align(4)); // The function needs to be as aligned as the basic blocks. The linker may // move functions around based on their alignment. @@ -548,7 +543,8 @@ MipsConstantIslands::doInitialPlacement(std::vector &CPEMIs) { // alignment of all entries as long as BB is sufficiently aligned. Keep // track of the insertion point for each alignment. We are going to bucket // sort the entries as they are created. - SmallVector InsPoint(MaxAlign + 1, BB->end()); + SmallVector InsPoint(Log2(MaxAlign) + 1, + BB->end()); // Add all of the constants from the constant pool to the end block, use an // identity mapping of CPI's to CPE's. @@ -576,7 +572,7 @@ MipsConstantIslands::doInitialPlacement(std::vector &CPEMIs) { // Ensure that future entries with higher alignment get inserted before // CPEMI. This is bucket sort with iterators. - for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a) + for (unsigned a = LogAlign + 1; a <= Log2(MaxAlign); ++a) if (InsPoint[a] == InsAt) InsPoint[a] = CPEMI; // Add a new CPEntry, but no corresponding CPUser yet. @@ -621,20 +617,18 @@ MipsConstantIslands::CPEntry return nullptr; } -/// getCPELogAlign - Returns the required alignment of the constant pool entry +/// getCPEAlign - Returns the required alignment of the constant pool entry /// represented by CPEMI. Alignment is measured in log2(bytes) units. -unsigned MipsConstantIslands::getCPELogAlign(const MachineInstr &CPEMI) { +Align MipsConstantIslands::getCPEAlign(const MachineInstr &CPEMI) { assert(CPEMI.getOpcode() == Mips::CONSTPOOL_ENTRY); // Everything is 4-byte aligned unless AlignConstantIslands is set. if (!AlignConstantIslands) - return 2; + return Align(4); unsigned CPI = CPEMI.getOperand(1).getIndex(); assert(CPI < MCP->getConstants().size() && "Invalid constant pool index."); - unsigned Align = MCP->getConstants()[CPI].getAlignment(); - assert(isPowerOf2_32(Align) && "Invalid CPE alignment"); - return Log2_32(Align); + return Align(MCP->getConstants()[CPI].getAlignment()); } /// initializeFunctionInfo - Do the initial scan of the function, building up @@ -940,13 +934,13 @@ bool MipsConstantIslands::isOffsetInRange(unsigned UserOffset, bool MipsConstantIslands::isWaterInRange(unsigned UserOffset, MachineBasicBlock* Water, CPUser &U, unsigned &Growth) { - unsigned CPELogAlign = getCPELogAlign(*U.CPEMI); - unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign); - unsigned NextBlockOffset, NextBlockAlignment; + unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(); + unsigned NextBlockOffset; + Align NextBlockAlignment; MachineFunction::const_iterator NextBlock = ++Water->getIterator(); if (NextBlock == MF->end()) { NextBlockOffset = BBInfo[Water->getNumber()].postOffset(); - NextBlockAlignment = 0; + NextBlockAlignment = Align::None(); } else { NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset; NextBlockAlignment = NextBlock->getAlignment(); @@ -961,7 +955,7 @@ bool MipsConstantIslands::isWaterInRange(unsigned UserOffset, Growth = CPEEnd - NextBlockOffset; // Compute the padding that would go at the end of the CPE to align the next // block. - Growth += OffsetToAlignment(CPEEnd, 1ULL << NextBlockAlignment); + Growth += offsetToAlignment(CPEEnd, NextBlockAlignment); // If the CPE is to be inserted before the instruction, that will raise // the offset of the instruction. Also account for unknown alignment padding @@ -1221,7 +1215,6 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex, CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; - unsigned CPELogAlign = getCPELogAlign(*CPEMI); MachineBasicBlock *UserMBB = UserMI->getParent(); const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()]; @@ -1231,7 +1224,7 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex, // Size of branch to insert. unsigned Delta = 2; // Compute the offset where the CPE will begin. - unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta; + unsigned CPEOffset = UserBBI.postOffset() + Delta; if (isOffsetInRange(UserOffset, CPEOffset, U)) { LLVM_DEBUG(dbgs() << "Split at end of " << printMBBReference(*UserMBB) @@ -1257,9 +1250,8 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex, // Try to split the block so it's fully aligned. Compute the latest split // point where we can add a 4-byte branch instruction, and then align to - // LogAlign which is the largest possible alignment in the function. - unsigned LogAlign = MF->getAlignment(); - assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry"); + // Align which is the largest possible alignment in the function. + const Align Align = MF->getAlignment(); unsigned BaseInsertOffset = UserOffset + U.getMaxDisp(); LLVM_DEBUG(dbgs() << format("Split in middle of big block before %#x", BaseInsertOffset)); @@ -1270,7 +1262,7 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex, BaseInsertOffset -= 4; LLVM_DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset) - << " la=" << LogAlign << '\n'); + << " la=" << Log2(Align) << '\n'); // This could point off the end of the block if we've already got constant // pool entries following this block; only the last one is in the water list. @@ -1295,8 +1287,8 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex, CPUser &U = CPUsers[CPUIndex]; if (!isOffsetInRange(Offset, EndInsertOffset, U)) { // Shift intertion point by one unit of alignment so it is within reach. - BaseInsertOffset -= 1u << LogAlign; - EndInsertOffset -= 1u << LogAlign; + BaseInsertOffset -= Align.value(); + EndInsertOffset -= Align.value(); } // This is overly conservative, as we don't account for CPEMIs being // reused within the block, but it doesn't matter much. Also assume CPEs @@ -1399,7 +1391,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { ++NumCPEs; // Mark the basic block as aligned as required by the const-pool entry. - NewIsland->setAlignment(getCPELogAlign(*U.CPEMI)); + NewIsland->setAlignment(getCPEAlign(*U.CPEMI)); // Increase the size of the island block to account for the new entry. BBInfo[NewIsland->getNumber()].Size += Size; @@ -1431,10 +1423,11 @@ void MipsConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) { BBInfo[CPEBB->getNumber()].Size = 0; // This block no longer needs to be aligned. - CPEBB->setAlignment(0); - } else + CPEBB->setAlignment(Align(1)); + } else { // Entries are sorted by descending alignment, so realign from the front. - CPEBB->setAlignment(getCPELogAlign(*CPEBB->begin())); + CPEBB->setAlignment(getCPEAlign(*CPEBB->begin())); + } adjustBBOffsetsAfter(CPEBB); // An island has only one predecessor BB and one successor BB. Check if @@ -1529,7 +1522,7 @@ MipsConstantIslands::fixupUnconditionalBr(ImmBranch &Br) { // We should have a way to back out this alignment restriction if we "can" later. // but it is not harmful. // - DestBB->setAlignment(2); + DestBB->setAlignment(Align(4)); Br.MaxDisp = ((1<<24)-1) * 2; MI->setDesc(TII->get(Mips::JalB16)); } diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index daca8b90708..d3e68c014fb 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -12,12 +12,19 @@ // ImmLeaf def immZExt1 : ImmLeaf(Imm);}]>; +def timmZExt1 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immZExt2 : ImmLeaf(Imm);}]>; +def timmZExt2 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immZExt3 : ImmLeaf(Imm);}]>; +def timmZExt3 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immZExt4 : ImmLeaf(Imm);}]>; +def timmZExt4 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immZExt8 : ImmLeaf(Imm);}]>; +def timmZExt8 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immZExt10 : ImmLeaf(Imm);}]>; +def timmZExt10 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immSExt6 : ImmLeaf(Imm);}]>; +def timmSExt6 : ImmLeaf(Imm);}], NOOP_SDNodeXForm, timm>; def immSExt10 : ImmLeaf(Imm);}]>; // Mips-specific dsp nodes @@ -306,7 +313,7 @@ class PRECR_SRA_PH_W_DESC_BASE Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, immZExt5:$sa))]; + list Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, timmZExt5:$sa))]; InstrItinClass Itinerary = itin; string Constraints = "$src = $rt"; string BaseOpcode = instr_asm; @@ -443,7 +450,7 @@ class RDDSP_DESC_BASE Pattern = [(set GPR32Opnd:$rd, (OpNode immZExt10:$mask))]; + list Pattern = [(set GPR32Opnd:$rd, (OpNode timmZExt10:$mask))]; InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; bit isMoveReg = 1; @@ -454,7 +461,7 @@ class WRDSP_DESC_BASE Pattern = [(OpNode GPR32Opnd:$rs, immZExt10:$mask)]; + list Pattern = [(OpNode GPR32Opnd:$rs, timmZExt10:$mask)]; InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; bit isMoveReg = 1; @@ -1096,14 +1103,14 @@ class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph, NoItinerary, DSPROpnd>; // Misc -class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, uimm5, immZExt5, +class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, uimm5, timmZExt5, NoItinerary>; -class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, uimm2, immZExt2, +class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, uimm2, timmZExt2, NoItinerary>; class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, uimm5, - immZExt5, NoItinerary>; + timmZExt5, NoItinerary>; // Pseudos. def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASEgetOperand(0).getReg(); - unsigned Ptr = I->getOperand(1).getReg(); - unsigned Mask = I->getOperand(2).getReg(); - unsigned ShiftCmpVal = I->getOperand(3).getReg(); - unsigned Mask2 = I->getOperand(4).getReg(); - unsigned ShiftNewVal = I->getOperand(5).getReg(); - unsigned ShiftAmnt = I->getOperand(6).getReg(); - unsigned Scratch = I->getOperand(7).getReg(); - unsigned Scratch2 = I->getOperand(8).getReg(); + Register Dest = I->getOperand(0).getReg(); + Register Ptr = I->getOperand(1).getReg(); + Register Mask = I->getOperand(2).getReg(); + Register ShiftCmpVal = I->getOperand(3).getReg(); + Register Mask2 = I->getOperand(4).getReg(); + Register ShiftNewVal = I->getOperand(5).getReg(); + Register ShiftAmnt = I->getOperand(6).getReg(); + Register Scratch = I->getOperand(7).getReg(); + Register Scratch2 = I->getOperand(8).getReg(); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB.getBasicBlock(); @@ -240,11 +240,11 @@ bool MipsExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, MOVE = Mips::OR64; } - unsigned Dest = I->getOperand(0).getReg(); - unsigned Ptr = I->getOperand(1).getReg(); - unsigned OldVal = I->getOperand(2).getReg(); - unsigned NewVal = I->getOperand(3).getReg(); - unsigned Scratch = I->getOperand(4).getReg(); + Register Dest = I->getOperand(0).getReg(); + Register Ptr = I->getOperand(1).getReg(); + Register OldVal = I->getOperand(2).getReg(); + Register NewVal = I->getOperand(3).getReg(); + Register Scratch = I->getOperand(4).getReg(); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB.getBasicBlock(); @@ -374,15 +374,15 @@ bool MipsExpandPseudo::expandAtomicBinOpSubword( llvm_unreachable("Unknown subword atomic pseudo for expansion!"); } - unsigned Dest = I->getOperand(0).getReg(); - unsigned Ptr = I->getOperand(1).getReg(); - unsigned Incr = I->getOperand(2).getReg(); - unsigned Mask = I->getOperand(3).getReg(); - unsigned Mask2 = I->getOperand(4).getReg(); - unsigned ShiftAmnt = I->getOperand(5).getReg(); - unsigned OldVal = I->getOperand(6).getReg(); - unsigned BinOpRes = I->getOperand(7).getReg(); - unsigned StoreVal = I->getOperand(8).getReg(); + Register Dest = I->getOperand(0).getReg(); + Register Ptr = I->getOperand(1).getReg(); + Register Incr = I->getOperand(2).getReg(); + Register Mask = I->getOperand(3).getReg(); + Register Mask2 = I->getOperand(4).getReg(); + Register ShiftAmnt = I->getOperand(5).getReg(); + Register OldVal = I->getOperand(6).getReg(); + Register BinOpRes = I->getOperand(7).getReg(); + Register StoreVal = I->getOperand(8).getReg(); const BasicBlock *LLVM_BB = BB.getBasicBlock(); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); @@ -513,10 +513,10 @@ bool MipsExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB, BEQ = Mips::BEQ64; } - unsigned OldVal = I->getOperand(0).getReg(); - unsigned Ptr = I->getOperand(1).getReg(); - unsigned Incr = I->getOperand(2).getReg(); - unsigned Scratch = I->getOperand(3).getReg(); + Register OldVal = I->getOperand(0).getReg(); + Register Ptr = I->getOperand(1).getReg(); + Register Incr = I->getOperand(2).getReg(); + Register Scratch = I->getOperand(3).getReg(); unsigned Opcode = 0; unsigned OR = 0; diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index 123d3cc242f..80f288ac500 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -1162,14 +1162,20 @@ bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI, if (ArgVT == MVT::f32) { VA.convertToReg(Mips::F12); } else if (ArgVT == MVT::f64) { - VA.convertToReg(Mips::D6); + if (Subtarget->isFP64bit()) + VA.convertToReg(Mips::D6_64); + else + VA.convertToReg(Mips::D6); } } else if (i == 1) { if ((firstMVT == MVT::f32) || (firstMVT == MVT::f64)) { if (ArgVT == MVT::f32) { VA.convertToReg(Mips::F14); } else if (ArgVT == MVT::f64) { - VA.convertToReg(Mips::D7); + if (Subtarget->isFP64bit()) + VA.convertToReg(Mips::D7_64); + else + VA.convertToReg(Mips::D7); } } } @@ -1722,7 +1728,7 @@ bool MipsFastISel::selectRet(const Instruction *I) { return false; unsigned SrcReg = Reg + VA.getValNo(); - unsigned DestReg = VA.getLocReg(); + Register DestReg = VA.getLocReg(); // Avoid a cross-class copy. This is very unlikely. if (!MRI.getRegClass(SrcReg)->contains(DestReg)) return false; diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index 0537cfd1cb3..612b2b712fa 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -24,8 +24,9 @@ protected: const MipsSubtarget &STI; public: - explicit MipsFrameLowering(const MipsSubtarget &sti, unsigned Alignment) - : TargetFrameLowering(StackGrowsDown, Alignment, 0, Alignment), STI(sti) {} + explicit MipsFrameLowering(const MipsSubtarget &sti, Align Alignment) + : TargetFrameLowering(StackGrowsDown, Alignment, 0, Alignment), STI(sti) { + } static const MipsFrameLowering *create(const MipsSubtarget &ST); diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 9ba54d6bb73..e5997af3bcc 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -65,7 +65,7 @@ bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { /// getGlobalBaseReg - Output the instructions required to put the /// GOT address into a register. SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { - unsigned GlobalBaseReg = MF->getInfo()->getGlobalBaseReg(); + Register GlobalBaseReg = MF->getInfo()->getGlobalBaseReg(); return CurDAG->getRegister(GlobalBaseReg, getTargetLowering()->getPointerTy( CurDAG->getDataLayout())) .getNode(); @@ -217,6 +217,51 @@ bool MipsDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const { return false; } +/// Convert vector addition with vector subtraction if that allows to encode +/// constant as an immediate and thus avoid extra 'ldi' instruction. +/// add X, <-1, -1...> --> sub X, <1, 1...> +bool MipsDAGToDAGISel::selectVecAddAsVecSubIfProfitable(SDNode *Node) { + assert(Node->getOpcode() == ISD::ADD && "Should only get 'add' here."); + + EVT VT = Node->getValueType(0); + assert(VT.isVector() && "Should only be called for vectors."); + + SDValue X = Node->getOperand(0); + SDValue C = Node->getOperand(1); + + auto *BVN = dyn_cast(C); + if (!BVN) + return false; + + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + 8, !Subtarget->isLittle())) + return false; + + auto IsInlineConstant = [](const APInt &Imm) { return Imm.isIntN(5); }; + + if (IsInlineConstant(SplatValue)) + return false; // Can already be encoded as an immediate. + + APInt NegSplatValue = 0 - SplatValue; + if (!IsInlineConstant(NegSplatValue)) + return false; // Even if we negate it it won't help. + + SDLoc DL(Node); + + SDValue NegC = CurDAG->FoldConstantArithmetic( + ISD::SUB, DL, VT, CurDAG->getConstant(0, DL, VT).getNode(), C.getNode()); + assert(NegC && "Constant-folding failed!"); + SDValue NewNode = CurDAG->getNode(ISD::SUB, DL, VT, X, NegC); + + ReplaceNode(Node, NewNode.getNode()); + SelectCode(NewNode.getNode()); + return true; +} + /// Select instructions not customized! Used for /// expanded, promoted and normal instructions void MipsDAGToDAGISel::Select(SDNode *Node) { @@ -236,6 +281,12 @@ void MipsDAGToDAGISel::Select(SDNode *Node) { switch(Opcode) { default: break; + case ISD::ADD: + if (Node->getSimpleValueType(0).isVector() && + selectVecAddAsVecSubIfProfitable(Node)) + return; + break; + // Get target GOT address. case ISD::GLOBAL_OFFSET_TABLE: ReplaceNode(Node, getGlobalBaseReg()); diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h index bae3bbf71f3..a768589b374 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.h +++ b/lib/Target/Mips/MipsISelDAGToDAG.h @@ -125,6 +125,11 @@ private: /// starting at bit zero. virtual bool selectVSplatMaskR(SDValue N, SDValue &Imm) const; + /// Convert vector addition with vector subtraction if that allows to encode + /// constant as an immediate and thus avoid extra 'ldi' instruction. + /// add X, <-1, -1...> --> sub X, <1, 1...> + bool selectVecAddAsVecSubIfProfitable(SDNode *Node); + void Select(SDNode *N) override; virtual bool trySelect(SDNode *Node) = 0; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 0ff09007da4..bf1b4756b24 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -82,10 +82,6 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); -static cl::opt -LargeGOT("mxgot", cl::Hidden, - cl::desc("MIPS: Enable GOT larger than 64k."), cl::init(false)); - static cl::opt NoZeroDivCheck("mno-check-zero-division", cl::Hidden, cl::desc("MIPS: Don't trap on integer division by zero."), @@ -330,7 +326,7 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, } // Set LoadExtAction for f16 vectors to Expand - for (MVT VT : MVT::fp_vector_valuetypes()) { + for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { MVT F16VT = MVT::getVectorVT(MVT::f16, VT.getVectorNumElements()); if (F16VT.isValid()) setLoadExtAction(ISD::EXTLOAD, VT, F16VT, Expand); @@ -518,11 +514,12 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setLibcallName(RTLIB::SRA_I128, nullptr); } - setMinFunctionAlignment(Subtarget.isGP64bit() ? 3 : 2); + setMinFunctionAlignment(Subtarget.isGP64bit() ? Align(8) : Align(4)); // The arguments on the stack are defined in terms of 4-byte slots on O32 // and 8-byte slots on N32/N64. - setMinStackArgumentAlignment((ABI.IsN32() || ABI.IsN64()) ? 8 : 4); + setMinStackArgumentAlignment((ABI.IsN32() || ABI.IsN64()) ? Align(8) + : Align(4)); setStackPointerRegisterToSaveRestore(ABI.IsN64() ? Mips::SP_64 : Mips::SP); @@ -552,8 +549,9 @@ MipsTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, !Subtarget.inMicroMipsMode(); // Disable if either of the following is true: - // We do not generate PIC, the ABI is not O32, LargeGOT is being used. - if (!TM.isPositionIndependent() || !TM.getABI().IsO32() || LargeGOT) + // We do not generate PIC, the ABI is not O32, XGOT is being used. + if (!TM.isPositionIndependent() || !TM.getABI().IsO32() || + Subtarget.useXGOT()) UseFastISel = false; return UseFastISel ? Mips::createFastISel(funcInfo, libInfo) : nullptr; @@ -1257,7 +1255,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const static unsigned addLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC) { - unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); + Register VReg = MF.getRegInfo().createVirtualRegister(RC); MF.getRegInfo().addLiveIn(PReg, VReg); return VReg; } @@ -1477,10 +1475,10 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr &MI, llvm_unreachable("Unknown pseudo atomic for replacement!"); } - unsigned OldVal = MI.getOperand(0).getReg(); - unsigned Ptr = MI.getOperand(1).getReg(); - unsigned Incr = MI.getOperand(2).getReg(); - unsigned Scratch = RegInfo.createVirtualRegister(RegInfo.getRegClass(OldVal)); + Register OldVal = MI.getOperand(0).getReg(); + Register Ptr = MI.getOperand(1).getReg(); + Register Incr = MI.getOperand(2).getReg(); + Register Scratch = RegInfo.createVirtualRegister(RegInfo.getRegClass(OldVal)); MachineBasicBlock::iterator II(MI); @@ -1519,8 +1517,8 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr &MI, // containing the word. // - unsigned PtrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Ptr)); - unsigned IncrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Incr)); + Register PtrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Ptr)); + Register IncrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Incr)); BuildMI(*BB, II, DL, TII->get(Mips::COPY), IncrCopy).addReg(Incr); BuildMI(*BB, II, DL, TII->get(Mips::COPY), PtrCopy).addReg(Ptr); @@ -1556,7 +1554,7 @@ MachineBasicBlock *MipsTargetLowering::emitSignExtendToI32InReg( MachineFunction *MF = BB->getParent(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); const TargetRegisterClass *RC = getRegClassFor(MVT::i32); - unsigned ScrReg = RegInfo.createVirtualRegister(RC); + Register ScrReg = RegInfo.createVirtualRegister(RC); assert(Size < 32); int64_t ShiftImm = 32 - (Size * 8); @@ -1581,21 +1579,21 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword( const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Dest = MI.getOperand(0).getReg(); - unsigned Ptr = MI.getOperand(1).getReg(); - unsigned Incr = MI.getOperand(2).getReg(); + Register Dest = MI.getOperand(0).getReg(); + Register Ptr = MI.getOperand(1).getReg(); + Register Incr = MI.getOperand(2).getReg(); - unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp); - unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); - unsigned Mask = RegInfo.createVirtualRegister(RC); - unsigned Mask2 = RegInfo.createVirtualRegister(RC); - unsigned Incr2 = RegInfo.createVirtualRegister(RC); - unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); - unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); - unsigned MaskUpper = RegInfo.createVirtualRegister(RC); - unsigned Scratch = RegInfo.createVirtualRegister(RC); - unsigned Scratch2 = RegInfo.createVirtualRegister(RC); - unsigned Scratch3 = RegInfo.createVirtualRegister(RC); + Register AlignedAddr = RegInfo.createVirtualRegister(RCp); + Register ShiftAmt = RegInfo.createVirtualRegister(RC); + Register Mask = RegInfo.createVirtualRegister(RC); + Register Mask2 = RegInfo.createVirtualRegister(RC); + Register Incr2 = RegInfo.createVirtualRegister(RC); + Register MaskLSB2 = RegInfo.createVirtualRegister(RCp); + Register PtrLSB2 = RegInfo.createVirtualRegister(RC); + Register MaskUpper = RegInfo.createVirtualRegister(RC); + Register Scratch = RegInfo.createVirtualRegister(RC); + Register Scratch2 = RegInfo.createVirtualRegister(RC); + Register Scratch3 = RegInfo.createVirtualRegister(RC); unsigned AtomicOp = 0; switch (MI.getOpcode()) { @@ -1678,7 +1676,7 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword( if (Subtarget.isLittle()) { BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); } else { - unsigned Off = RegInfo.createVirtualRegister(RC); + Register Off = RegInfo.createVirtualRegister(RC); BuildMI(BB, DL, TII->get(Mips::XORi), Off) .addReg(PtrLSB2).addImm((Size == 1) ? 3 : 2); BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(Off).addImm(3); @@ -1738,12 +1736,12 @@ MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, unsigned AtomicOp = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I32 ? Mips::ATOMIC_CMP_SWAP_I32_POSTRA : Mips::ATOMIC_CMP_SWAP_I64_POSTRA; - unsigned Dest = MI.getOperand(0).getReg(); - unsigned Ptr = MI.getOperand(1).getReg(); - unsigned OldVal = MI.getOperand(2).getReg(); - unsigned NewVal = MI.getOperand(3).getReg(); + Register Dest = MI.getOperand(0).getReg(); + Register Ptr = MI.getOperand(1).getReg(); + Register OldVal = MI.getOperand(2).getReg(); + Register NewVal = MI.getOperand(3).getReg(); - unsigned Scratch = MRI.createVirtualRegister(RC); + Register Scratch = MRI.createVirtualRegister(RC); MachineBasicBlock::iterator II(MI); // We need to create copies of the various registers and kill them at the @@ -1751,9 +1749,9 @@ MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, // after fast register allocation, the spills will end up outside of the // blocks that their values are defined in, causing livein errors. - unsigned PtrCopy = MRI.createVirtualRegister(MRI.getRegClass(Ptr)); - unsigned OldValCopy = MRI.createVirtualRegister(MRI.getRegClass(OldVal)); - unsigned NewValCopy = MRI.createVirtualRegister(MRI.getRegClass(NewVal)); + Register PtrCopy = MRI.createVirtualRegister(MRI.getRegClass(Ptr)); + Register OldValCopy = MRI.createVirtualRegister(MRI.getRegClass(OldVal)); + Register NewValCopy = MRI.createVirtualRegister(MRI.getRegClass(NewVal)); BuildMI(*BB, II, DL, TII->get(Mips::COPY), PtrCopy).addReg(Ptr); BuildMI(*BB, II, DL, TII->get(Mips::COPY), OldValCopy).addReg(OldVal); @@ -1790,22 +1788,22 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Dest = MI.getOperand(0).getReg(); - unsigned Ptr = MI.getOperand(1).getReg(); - unsigned CmpVal = MI.getOperand(2).getReg(); - unsigned NewVal = MI.getOperand(3).getReg(); + Register Dest = MI.getOperand(0).getReg(); + Register Ptr = MI.getOperand(1).getReg(); + Register CmpVal = MI.getOperand(2).getReg(); + Register NewVal = MI.getOperand(3).getReg(); - unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp); - unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); - unsigned Mask = RegInfo.createVirtualRegister(RC); - unsigned Mask2 = RegInfo.createVirtualRegister(RC); - unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC); - unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC); - unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); - unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); - unsigned MaskUpper = RegInfo.createVirtualRegister(RC); - unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC); - unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC); + Register AlignedAddr = RegInfo.createVirtualRegister(RCp); + Register ShiftAmt = RegInfo.createVirtualRegister(RC); + Register Mask = RegInfo.createVirtualRegister(RC); + Register Mask2 = RegInfo.createVirtualRegister(RC); + Register ShiftedCmpVal = RegInfo.createVirtualRegister(RC); + Register ShiftedNewVal = RegInfo.createVirtualRegister(RC); + Register MaskLSB2 = RegInfo.createVirtualRegister(RCp); + Register PtrLSB2 = RegInfo.createVirtualRegister(RC); + Register MaskUpper = RegInfo.createVirtualRegister(RC); + Register MaskedCmpVal = RegInfo.createVirtualRegister(RC); + Register MaskedNewVal = RegInfo.createVirtualRegister(RC); unsigned AtomicOp = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I8 ? Mips::ATOMIC_CMP_SWAP_I8_POSTRA : Mips::ATOMIC_CMP_SWAP_I16_POSTRA; @@ -1820,8 +1818,8 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( // value isn't a problem. // The Dead flag is needed as the value in scratch isn't used by any other // instruction. Kill isn't used as Dead is more precise. - unsigned Scratch = RegInfo.createVirtualRegister(RC); - unsigned Scratch2 = RegInfo.createVirtualRegister(RC); + Register Scratch = RegInfo.createVirtualRegister(RC); + Register Scratch2 = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -1859,7 +1857,7 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( if (Subtarget.isLittle()) { BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); } else { - unsigned Off = RegInfo.createVirtualRegister(RC); + Register Off = RegInfo.createVirtualRegister(RC); BuildMI(BB, DL, TII->get(Mips::XORi), Off) .addReg(PtrLSB2).addImm((Size == 1) ? 3 : 2); BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(Off).addImm(3); @@ -1967,10 +1965,10 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op, // %gp_rel relocation return getAddrGPRel(N, SDLoc(N), Ty, DAG, ABI.IsN64()); - // %hi/%lo relocation + // %hi/%lo relocation return Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG) - // %highest/%higher/%hi/%lo relocation - : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG); + // %highest/%higher/%hi/%lo relocation + : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG); } // Every other architecture would use shouldAssumeDSOLocal in here, but @@ -1987,7 +1985,7 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op, if (GV->hasLocalLinkage()) return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64()); - if (LargeGOT) + if (Subtarget.useXGOT()) return getAddrGlobalLargeGOT( N, SDLoc(N), Ty, DAG, MipsII::MO_GOT_HI16, MipsII::MO_GOT_LO16, DAG.getEntryNode(), @@ -2149,7 +2147,8 @@ SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const { EVT VT = Node->getValueType(0); SDValue Chain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); - unsigned Align = Node->getConstantOperandVal(3); + const Align Align = + llvm::MaybeAlign(Node->getConstantOperandVal(3)).valueOrOne(); const Value *SV = cast(Node->getOperand(2))->getValue(); SDLoc DL(Node); unsigned ArgSlotSizeInBytes = (ABI.IsN32() || ABI.IsN64()) ? 8 : 4; @@ -2166,14 +2165,13 @@ SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const { // when the pointer is still aligned from the last va_arg (or pair of // va_args for the i64 on O32 case). if (Align > getMinStackArgumentAlignment()) { - assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); + VAList = DAG.getNode( + ISD::ADD, DL, VAList.getValueType(), VAList, + DAG.getConstant(Align.value() - 1, DL, VAList.getValueType())); - VAList = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList, - DAG.getConstant(Align - 1, DL, VAList.getValueType())); - - VAList = DAG.getNode(ISD::AND, DL, VAList.getValueType(), VAList, - DAG.getConstant(-(int64_t)Align, DL, - VAList.getValueType())); + VAList = DAG.getNode( + ISD::AND, DL, VAList.getValueType(), VAList, + DAG.getConstant(-(int64_t)Align.value(), DL, VAList.getValueType())); } // Increment the pointer, VAList, to the next vaarg. @@ -2870,7 +2868,7 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, #include "MipsGenCallingConv.inc" CCAssignFn *MipsTargetLowering::CCAssignFnForCall() const{ - return CC_Mips; + return CC_Mips_FixedArg; } CCAssignFn *MipsTargetLowering::CCAssignFnForReturn() const{ @@ -3167,7 +3165,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Arg, DAG.getConstant(1, DL, MVT::i32)); if (!Subtarget.isLittle()) std::swap(Lo, Hi); - unsigned LocRegLo = VA.getLocReg(); + Register LocRegLo = VA.getLocReg(); unsigned LocRegHigh = getNextIntArgReg(LocRegLo); RegsToPass.push_back(std::make_pair(LocRegLo, Lo)); RegsToPass.push_back(std::make_pair(LocRegHigh, Hi)); @@ -3270,7 +3268,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (InternalLinkage) Callee = getAddrLocal(G, DL, Ty, DAG, ABI.IsN32() || ABI.IsN64()); - else if (LargeGOT) { + else if (Subtarget.useXGOT()) { Callee = getAddrGlobalLargeGOT(G, DL, Ty, DAG, MipsII::MO_CALL_HI16, MipsII::MO_CALL_LO16, Chain, FuncInfo->callPtrInfo(Val)); @@ -3292,7 +3290,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (!IsPIC) // static Callee = DAG.getTargetExternalSymbol( Sym, getPointerTy(DAG.getDataLayout()), MipsII::MO_NO_FLAG); - else if (LargeGOT) { + else if (Subtarget.useXGOT()) { Callee = getAddrGlobalLargeGOT(S, DL, Ty, DAG, MipsII::MO_CALL_HI16, MipsII::MO_CALL_LO16, Chain, FuncInfo->callPtrInfo(Sym)); @@ -3523,7 +3521,7 @@ SDValue MipsTargetLowering::LowerFormalArguments( // Arguments stored on registers if (IsRegLoc) { MVT RegVT = VA.getLocVT(); - unsigned ArgReg = VA.getLocReg(); + Register ArgReg = VA.getLocReg(); const TargetRegisterClass *RC = getRegClassFor(RegVT); // Transform the arguments stored on @@ -4568,20 +4566,20 @@ MachineBasicBlock *MipsTargetLowering::emitPseudoD_SELECT(MachineInstr &MI, // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned MipsTargetLowering::getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { +Register MipsTargetLowering::getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const { // Named registers is expected to be fairly rare. For now, just support $28 // since the linux kernel uses it. if (Subtarget.isGP64bit()) { - unsigned Reg = StringSwitch(RegName) + Register Reg = StringSwitch(RegName) .Case("$28", Mips::GP_64) - .Default(0); + .Default(Register()); if (Reg) return Reg; } else { - unsigned Reg = StringSwitch(RegName) + Register Reg = StringSwitch(RegName) .Case("$28", Mips::GP) - .Default(0); + .Default(Register()); if (Reg) return Reg; } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 2db60e9801f..0a5cddd45af 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -304,11 +304,12 @@ class TargetRegisterClass; unsigned &NumIntermediates, MVT &RegisterVT) const override; /// Return the correct alignment for the current calling convention. - unsigned getABIAlignmentForCallingConv(Type *ArgTy, - DataLayout DL) const override { + Align getABIAlignmentForCallingConv(Type *ArgTy, + DataLayout DL) const override { + const Align ABIAlign(DL.getABITypeAlignment(ArgTy)); if (ArgTy->isVectorTy()) - return std::min(DL.getABITypeAlignment(ArgTy), 8U); - return DL.getABITypeAlignment(ArgTy); + return std::min(ABIAlign, Align(8)); + return ABIAlign; } ISD::NodeType getExtendForAtomicOps() const override { @@ -347,8 +348,8 @@ class TargetRegisterClass; void HandleByVal(CCState *, unsigned &, unsigned) const override; - unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const override; /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index fbd56206b24..6bb25ee5754 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -677,7 +677,8 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc, return MIB; } -bool MipsInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, +bool MipsInstrInfo::findCommutedOpIndices(const MachineInstr &MI, + unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { assert(!MI.isBundle() && "TargetInstrInfo::findCommutedOpIndices() can't handle bundles"); diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index a626c0c3fdb..092a960b4ba 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -148,7 +148,7 @@ public: MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc, MachineBasicBlock::iterator I) const; - bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, + bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; /// Perform target specific instruction verification. diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index a4e85a38ab2..58167e0f344 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -211,9 +211,9 @@ def HasCnMips : Predicate<"Subtarget->hasCnMips()">, AssemblerPredicate<"FeatureCnMips">; def NotCnMips : Predicate<"!Subtarget->hasCnMips()">, AssemblerPredicate<"!FeatureCnMips">; -def IsSym32 : Predicate<"Subtarget->HasSym32()">, +def IsSym32 : Predicate<"Subtarget->hasSym32()">, AssemblerPredicate<"FeatureSym32">; -def IsSym64 : Predicate<"!Subtarget->HasSym32()">, +def IsSym64 : Predicate<"!Subtarget->hasSym32()">, AssemblerPredicate<"!FeatureSym32">; def IsN64 : Predicate<"Subtarget->isABI_N64()">; def IsNotN64 : Predicate<"!Subtarget->isABI_N64()">; @@ -1263,6 +1263,7 @@ def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>; // Node immediate fits as 7-bit zero extended on target immediate. def immZExt7 : PatLeaf<(imm), [{ return isUInt<7>(N->getZExtValue()); }]>; +def timmZExt7 : PatLeaf<(timm), [{ return isUInt<7>(N->getZExtValue()); }]>; // Node immediate fits as 16-bit zero extended on target immediate. // The LO16 param means that only the lower 16 bits of the node @@ -1295,6 +1296,7 @@ def immZExt32 : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>; // shamt field must fit in 5 bits. def immZExt5 : ImmLeaf; +def timmZExt5 : TImmLeaf; def immZExt5Plus1 : PatLeaf<(imm), [{ return isUInt<5>(N->getZExtValue() - 1); @@ -3142,25 +3144,31 @@ multiclass MipsHiLoRelocs; def : MipsPat<(MipsHi texternalsym:$in), (Lui texternalsym:$in)>; - def : MipsPat<(MipsLo tglobaladdr:$in), (Addiu ZeroReg, tglobaladdr:$in)>; + def : MipsPat<(MipsLo tglobaladdr:$in), + (Addiu ZeroReg, tglobaladdr:$in)>; def : MipsPat<(MipsLo tblockaddress:$in), (Addiu ZeroReg, tblockaddress:$in)>; - def : MipsPat<(MipsLo tjumptable:$in), (Addiu ZeroReg, tjumptable:$in)>; - def : MipsPat<(MipsLo tconstpool:$in), (Addiu ZeroReg, tconstpool:$in)>; + def : MipsPat<(MipsLo tjumptable:$in), + (Addiu ZeroReg, tjumptable:$in)>; + def : MipsPat<(MipsLo tconstpool:$in), + (Addiu ZeroReg, tconstpool:$in)>; def : MipsPat<(MipsLo tglobaltlsaddr:$in), (Addiu ZeroReg, tglobaltlsaddr:$in)>; - def : MipsPat<(MipsLo texternalsym:$in), (Addiu ZeroReg, texternalsym:$in)>; + def : MipsPat<(MipsLo texternalsym:$in), + (Addiu ZeroReg, texternalsym:$in)>; def : MipsPat<(add GPROpnd:$hi, (MipsLo tglobaladdr:$lo)), - (Addiu GPROpnd:$hi, tglobaladdr:$lo)>; + (Addiu GPROpnd:$hi, tglobaladdr:$lo)>; def : MipsPat<(add GPROpnd:$hi, (MipsLo tblockaddress:$lo)), - (Addiu GPROpnd:$hi, tblockaddress:$lo)>; + (Addiu GPROpnd:$hi, tblockaddress:$lo)>; def : MipsPat<(add GPROpnd:$hi, (MipsLo tjumptable:$lo)), - (Addiu GPROpnd:$hi, tjumptable:$lo)>; + (Addiu GPROpnd:$hi, tjumptable:$lo)>; def : MipsPat<(add GPROpnd:$hi, (MipsLo tconstpool:$lo)), - (Addiu GPROpnd:$hi, tconstpool:$lo)>; + (Addiu GPROpnd:$hi, tconstpool:$lo)>; def : MipsPat<(add GPROpnd:$hi, (MipsLo tglobaltlsaddr:$lo)), - (Addiu GPROpnd:$hi, tglobaltlsaddr:$lo)>; + (Addiu GPROpnd:$hi, tglobaltlsaddr:$lo)>; + def : MipsPat<(add GPROpnd:$hi, (MipsLo texternalsym:$lo)), + (Addiu GPROpnd:$hi, texternalsym:$lo)>; } // wrapper_pic diff --git a/lib/Target/Mips/MipsInstructionSelector.cpp b/lib/Target/Mips/MipsInstructionSelector.cpp index 45a47ad3c08..f8fc7cb0898 100644 --- a/lib/Target/Mips/MipsInstructionSelector.cpp +++ b/lib/Target/Mips/MipsInstructionSelector.cpp @@ -17,6 +17,7 @@ #include "MipsTargetMachine.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #define DEBUG_TYPE "mips-isel" @@ -33,7 +34,7 @@ public: MipsInstructionSelector(const MipsTargetMachine &TM, const MipsSubtarget &STI, const MipsRegisterBankInfo &RBI); - bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override; + bool select(MachineInstr &I) override; static const char *getName() { return DEBUG_TYPE; } private: @@ -44,6 +45,8 @@ private: const TargetRegisterClass * getRegClassForTypeOnBank(unsigned OpSize, const RegisterBank &RB, const RegisterBankInfo &RBI) const; + unsigned selectLoadStoreOpCode(MachineInstr &I, + MachineRegisterInfo &MRI) const; const MipsTargetMachine &TM; const MipsSubtarget &STI; @@ -84,7 +87,7 @@ MipsInstructionSelector::MipsInstructionSelector( bool MipsInstructionSelector::selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const { Register DstReg = I.getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + if (Register::isPhysicalRegister(DstReg)) return true; const RegisterBank *RegBank = RBI.getRegBank(DstReg, MRI, TRI); @@ -158,9 +161,15 @@ bool MipsInstructionSelector::materialize32BitImm(Register DestReg, APInt Imm, } /// Returning Opc indicates that we failed to select MIPS instruction opcode. -static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned MemSizeInBytes, - unsigned RegBank, bool isFP64) { - bool isStore = Opc == TargetOpcode::G_STORE; +unsigned +MipsInstructionSelector::selectLoadStoreOpCode(MachineInstr &I, + MachineRegisterInfo &MRI) const { + STI.getRegisterInfo(); + const Register DestReg = I.getOperand(0).getReg(); + const unsigned RegBank = RBI.getRegBank(DestReg, MRI, TRI)->getID(); + const unsigned MemSizeInBytes = (*I.memoperands_begin())->getSize(); + unsigned Opc = I.getOpcode(); + const bool isStore = Opc == TargetOpcode::G_STORE; if (RegBank == Mips::GPRBRegBankID) { if (isStore) switch (MemSizeInBytes) { @@ -192,10 +201,24 @@ static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned MemSizeInBytes, case 4: return isStore ? Mips::SWC1 : Mips::LWC1; case 8: - if (isFP64) + if (STI.isFP64bit()) return isStore ? Mips::SDC164 : Mips::LDC164; else return isStore ? Mips::SDC1 : Mips::LDC1; + case 16: { + assert(STI.hasMSA() && "Vector instructions require target with MSA."); + const unsigned VectorElementSizeInBytes = + MRI.getType(DestReg).getElementType().getSizeInBytes(); + if (VectorElementSizeInBytes == 1) + return isStore ? Mips::ST_B : Mips::LD_B; + if (VectorElementSizeInBytes == 2) + return isStore ? Mips::ST_H : Mips::LD_H; + if (VectorElementSizeInBytes == 4) + return isStore ? Mips::ST_W : Mips::LD_W; + if (VectorElementSizeInBytes == 8) + return isStore ? Mips::ST_D : Mips::LD_D; + return Opc; + } default: return Opc; } @@ -203,8 +226,7 @@ static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned MemSizeInBytes, return Opc; } -bool MipsInstructionSelector::select(MachineInstr &I, - CodeGenCoverage &CoverageInfo) const { +bool MipsInstructionSelector::select(MachineInstr &I) { MachineBasicBlock &MBB = *I.getParent(); MachineFunction &MF = *MBB.getParent(); @@ -231,7 +253,7 @@ bool MipsInstructionSelector::select(MachineInstr &I, return true; } - if (selectImpl(I, CoverageInfo)) + if (selectImpl(I, *CoverageInfo)) return true; MachineInstr *MI = nullptr; @@ -265,6 +287,11 @@ bool MipsInstructionSelector::select(MachineInstr &I, .add(I.getOperand(2)); break; } + case G_INTTOPTR: + case G_PTRTOINT: { + I.setDesc(TII.get(COPY)); + return selectCopy(I, MRI); + } case G_FRAME_INDEX: { MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu)) .add(I.getOperand(0)) @@ -279,12 +306,71 @@ bool MipsInstructionSelector::select(MachineInstr &I, .add(I.getOperand(1)); break; } + case G_BRJT: { + unsigned EntrySize = + MF.getJumpTableInfo()->getEntrySize(MF.getDataLayout()); + assert(isPowerOf2_32(EntrySize) && + "Non-power-of-two jump-table entry size not supported."); + + Register JTIndex = MRI.createVirtualRegister(&Mips::GPR32RegClass); + MachineInstr *SLL = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::SLL)) + .addDef(JTIndex) + .addUse(I.getOperand(2).getReg()) + .addImm(Log2_32(EntrySize)); + if (!constrainSelectedInstRegOperands(*SLL, TII, TRI, RBI)) + return false; + + Register DestAddress = MRI.createVirtualRegister(&Mips::GPR32RegClass); + MachineInstr *ADDu = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDu)) + .addDef(DestAddress) + .addUse(I.getOperand(0).getReg()) + .addUse(JTIndex); + if (!constrainSelectedInstRegOperands(*ADDu, TII, TRI, RBI)) + return false; + + Register Dest = MRI.createVirtualRegister(&Mips::GPR32RegClass); + MachineInstr *LW = + BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LW)) + .addDef(Dest) + .addUse(DestAddress) + .addJumpTableIndex(I.getOperand(1).getIndex(), MipsII::MO_ABS_LO) + .addMemOperand(MF.getMachineMemOperand( + MachinePointerInfo(), MachineMemOperand::MOLoad, 4, 4)); + if (!constrainSelectedInstRegOperands(*LW, TII, TRI, RBI)) + return false; + + if (MF.getTarget().isPositionIndependent()) { + Register DestTmp = MRI.createVirtualRegister(&Mips::GPR32RegClass); + LW->getOperand(0).setReg(DestTmp); + MachineInstr *ADDu = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDu)) + .addDef(Dest) + .addUse(DestTmp) + .addUse(MF.getInfo() + ->getGlobalBaseRegForGlobalISel()); + if (!constrainSelectedInstRegOperands(*ADDu, TII, TRI, RBI)) + return false; + } + + MachineInstr *Branch = + BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::PseudoIndirectBranch)) + .addUse(Dest); + if (!constrainSelectedInstRegOperands(*Branch, TII, TRI, RBI)) + return false; + + I.eraseFromParent(); + return true; + } + case G_BRINDIRECT: { + MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::PseudoIndirectBranch)) + .add(I.getOperand(0)); + break; + } case G_PHI: { const Register DestReg = I.getOperand(0).getReg(); const unsigned OpSize = MRI.getType(DestReg).getSizeInBits(); const TargetRegisterClass *DefRC = nullptr; - if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + if (Register::isPhysicalRegister(DestReg)) DefRC = TRI.getRegClass(DestReg); else DefRC = getRegClassForTypeOnBank(OpSize, @@ -297,26 +383,35 @@ bool MipsInstructionSelector::select(MachineInstr &I, case G_LOAD: case G_ZEXTLOAD: case G_SEXTLOAD: { - const Register DestReg = I.getOperand(0).getReg(); - const unsigned DestRegBank = RBI.getRegBank(DestReg, MRI, TRI)->getID(); - const unsigned OpSize = MRI.getType(DestReg).getSizeInBits(); - const unsigned OpMemSizeInBytes = (*I.memoperands_begin())->getSize(); - - if (DestRegBank == Mips::GPRBRegBankID && OpSize != 32) - return false; - - if (DestRegBank == Mips::FPRBRegBankID && OpSize != 32 && OpSize != 64) - return false; - - const unsigned NewOpc = selectLoadStoreOpCode( - I.getOpcode(), OpMemSizeInBytes, DestRegBank, STI.isFP64bit()); + const unsigned NewOpc = selectLoadStoreOpCode(I, MRI); if (NewOpc == I.getOpcode()) return false; + MachineOperand BaseAddr = I.getOperand(1); + int64_t SignedOffset = 0; + // Try to fold load/store + G_GEP + G_CONSTANT + // %SignedOffset:(s32) = G_CONSTANT i32 16_bit_signed_immediate + // %Addr:(p0) = G_GEP %BaseAddr, %SignedOffset + // %LoadResult/%StoreSrc = load/store %Addr(p0) + // into: + // %LoadResult/%StoreSrc = NewOpc %BaseAddr(p0), 16_bit_signed_immediate + + MachineInstr *Addr = MRI.getVRegDef(I.getOperand(1).getReg()); + if (Addr->getOpcode() == G_GEP) { + MachineInstr *Offset = MRI.getVRegDef(Addr->getOperand(2).getReg()); + if (Offset->getOpcode() == G_CONSTANT) { + APInt OffsetValue = Offset->getOperand(1).getCImm()->getValue(); + if (OffsetValue.isSignedIntN(16)) { + BaseAddr = Addr->getOperand(1); + SignedOffset = OffsetValue.getSExtValue(); + } + } + } + MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc)) .add(I.getOperand(0)) - .add(I.getOperand(1)) - .addImm(0) + .add(BaseAddr) + .addImm(SignedOffset) .addMemOperand(*I.memoperands_begin()); break; } @@ -356,6 +451,18 @@ bool MipsInstructionSelector::select(MachineInstr &I, .add(I.getOperand(3)); break; } + case G_IMPLICIT_DEF: { + MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::IMPLICIT_DEF)) + .add(I.getOperand(0)); + + // Set class based on register bank, there can be fpr and gpr implicit def. + MRI.setRegClass(MI->getOperand(0).getReg(), + getRegClassForTypeOnBank( + MRI.getType(I.getOperand(0).getReg()).getSizeInBits(), + *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI), + RBI)); + break; + } case G_CONSTANT: { MachineIRBuilder B(I); if (!materialize32BitImm(I.getOperand(0).getReg(), @@ -423,7 +530,7 @@ bool MipsInstructionSelector::select(MachineInstr &I, Opcode = Mips::TRUNC_W_S; else Opcode = STI.isFP64bit() ? Mips::TRUNC_W_D64 : Mips::TRUNC_W_D32; - unsigned ResultInFPR = MRI.createVirtualRegister(&Mips::FGR32RegClass); + Register ResultInFPR = MRI.createVirtualRegister(&Mips::FGR32RegClass); MachineInstr *Trunc = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Opcode)) .addDef(ResultInFPR) .addUse(I.getOperand(1).getReg()); @@ -496,6 +603,24 @@ bool MipsInstructionSelector::select(MachineInstr &I, I.eraseFromParent(); return true; } + case G_JUMP_TABLE: { + if (MF.getTarget().isPositionIndependent()) { + MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LW)) + .addDef(I.getOperand(0).getReg()) + .addReg(MF.getInfo() + ->getGlobalBaseRegForGlobalISel()) + .addJumpTableIndex(I.getOperand(1).getIndex(), MipsII::MO_GOT) + .addMemOperand( + MF.getMachineMemOperand(MachinePointerInfo::getGOT(MF), + MachineMemOperand::MOLoad, 4, 4)); + } else { + MI = + BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LUi)) + .addDef(I.getOperand(0).getReg()) + .addJumpTableIndex(I.getOperand(1).getIndex(), MipsII::MO_ABS_HI); + } + break; + } case G_ICMP: { struct Instr { unsigned Opcode; @@ -626,7 +751,7 @@ bool MipsInstructionSelector::select(MachineInstr &I, // MipsFCMPCondCode, result is inverted i.e. MOVT_I is used. unsigned MoveOpcode = isLogicallyNegated ? Mips::MOVT_I : Mips::MOVF_I; - unsigned TrueInReg = MRI.createVirtualRegister(&Mips::GPR32RegClass); + Register TrueInReg = MRI.createVirtualRegister(&Mips::GPR32RegClass); BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu)) .addDef(TrueInReg) .addUse(Mips::ZERO) @@ -654,6 +779,33 @@ bool MipsInstructionSelector::select(MachineInstr &I, I.eraseFromParent(); return true; } + case G_FENCE: { + MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::SYNC)).addImm(0); + break; + } + case G_VASTART: { + MipsFunctionInfo *FuncInfo = MF.getInfo(); + int FI = FuncInfo->getVarArgsFrameIndex(); + + Register LeaReg = MRI.createVirtualRegister(&Mips::GPR32RegClass); + MachineInstr *LEA_ADDiu = + BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LEA_ADDiu)) + .addDef(LeaReg) + .addFrameIndex(FI) + .addImm(0); + if (!constrainSelectedInstRegOperands(*LEA_ADDiu, TII, TRI, RBI)) + return false; + + MachineInstr *Store = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::SW)) + .addUse(LeaReg) + .addUse(I.getOperand(0).getReg()) + .addImm(0); + if (!constrainSelectedInstRegOperands(*Store, TII, TRI, RBI)) + return false; + + I.eraseFromParent(); + return true; + } default: return false; } diff --git a/lib/Target/Mips/MipsLegalizerInfo.cpp b/lib/Target/Mips/MipsLegalizerInfo.cpp index e442a81837e..bb4a1d902d7 100644 --- a/lib/Target/Mips/MipsLegalizerInfo.cpp +++ b/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -16,18 +16,65 @@ using namespace llvm; +struct TypesAndMemOps { + LLT ValTy; + LLT PtrTy; + unsigned MemSize; + bool MustBeNaturallyAligned; +}; + +static bool +CheckTy0Ty1MemSizeAlign(const LegalityQuery &Query, + std::initializer_list SupportedValues) { + for (auto &Val : SupportedValues) { + if (Val.ValTy != Query.Types[0]) + continue; + if (Val.PtrTy != Query.Types[1]) + continue; + if (Val.MemSize != Query.MMODescrs[0].SizeInBits) + continue; + if (Val.MustBeNaturallyAligned && + Query.MMODescrs[0].SizeInBits % Query.MMODescrs[0].AlignInBits != 0) + continue; + return true; + } + return false; +} + +static bool CheckTyN(unsigned N, const LegalityQuery &Query, + std::initializer_list SupportedValues) { + for (auto &Val : SupportedValues) + if (Val == Query.Types[N]) + return true; + return false; +} + MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { using namespace TargetOpcode; const LLT s1 = LLT::scalar(1); const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); + const LLT v16s8 = LLT::vector(16, 8); + const LLT v8s16 = LLT::vector(8, 16); + const LLT v4s32 = LLT::vector(4, 32); + const LLT v2s64 = LLT::vector(2, 64); const LLT p0 = LLT::pointer(0, 32); - getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL}) + getActionDefinitionsBuilder({G_SUB, G_MUL}) .legalFor({s32}) .clampScalar(0, s32, s32); + getActionDefinitionsBuilder(G_ADD) + .legalIf([=, &ST](const LegalityQuery &Query) { + if (CheckTyN(0, Query, {s32})) + return true; + if (ST.hasMSA() && CheckTyN(0, Query, {v16s8, v8s16, v4s32, v2s64})) + return true; + return false; + }) + .clampScalar(0, s32, s32); + getActionDefinitionsBuilder({G_UADDO, G_UADDE, G_USUBO, G_USUBE, G_UMULO}) .lowerFor({{s32, s1}}); @@ -36,13 +83,26 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { .maxScalar(0, s32); getActionDefinitionsBuilder({G_LOAD, G_STORE}) - .legalForTypesWithMemDesc({{s32, p0, 8, 8}, - {s32, p0, 16, 8}, - {s32, p0, 32, 8}, - {s64, p0, 64, 8}, - {p0, p0, 32, 8}}) + .legalIf([=, &ST](const LegalityQuery &Query) { + if (CheckTy0Ty1MemSizeAlign(Query, {{s32, p0, 8, ST.hasMips32r6()}, + {s32, p0, 16, ST.hasMips32r6()}, + {s32, p0, 32, ST.hasMips32r6()}, + {p0, p0, 32, ST.hasMips32r6()}, + {s64, p0, 64, ST.hasMips32r6()}})) + return true; + if (ST.hasMSA() && + CheckTy0Ty1MemSizeAlign(Query, {{v16s8, p0, 128, false}, + {v8s16, p0, 128, false}, + {v4s32, p0, 128, false}, + {v2s64, p0, 128, false}})) + return true; + return false; + }) .minScalar(0, s32); + getActionDefinitionsBuilder(G_IMPLICIT_DEF) + .legalFor({s32, s64}); + getActionDefinitionsBuilder(G_UNMERGE_VALUES) .legalFor({{s32, s64}}); @@ -50,9 +110,17 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { .legalFor({{s64, s32}}); getActionDefinitionsBuilder({G_ZEXTLOAD, G_SEXTLOAD}) - .legalForTypesWithMemDesc({{s32, p0, 8, 8}, - {s32, p0, 16, 8}}) - .minScalar(0, s32); + .legalForTypesWithMemDesc({{s32, p0, 8, 8}, + {s32, p0, 16, 8}}) + .clampScalar(0, s32, s32); + + getActionDefinitionsBuilder({G_ZEXT, G_SEXT}) + .legalIf([](const LegalityQuery &Query) { return false; }) + .maxScalar(0, s32); + + getActionDefinitionsBuilder(G_TRUNC) + .legalIf([](const LegalityQuery &Query) { return false; }) + .maxScalar(1, s32); getActionDefinitionsBuilder(G_SELECT) .legalForCartesianProduct({p0, s32, s64}, {s32}) @@ -63,6 +131,12 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { .legalFor({s32}) .minScalar(0, s32); + getActionDefinitionsBuilder(G_BRJT) + .legalFor({{p0, s32}}); + + getActionDefinitionsBuilder(G_BRINDIRECT) + .legalFor({p0}); + getActionDefinitionsBuilder(G_PHI) .legalFor({p0, s32, s64}) .minScalar(0, s32); @@ -77,8 +151,9 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { .libcallFor({s64}); getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR}) - .legalFor({s32, s32}) - .minScalar(1, s32); + .legalFor({{s32, s32}}) + .clampScalar(1, s32, s32) + .clampScalar(0, s32, s32); getActionDefinitionsBuilder(G_ICMP) .legalForCartesianProduct({s32}, {s32, p0}) @@ -89,15 +164,24 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { .legalFor({s32}) .clampScalar(0, s32, s32); - getActionDefinitionsBuilder(G_GEP) + getActionDefinitionsBuilder({G_GEP, G_INTTOPTR}) .legalFor({{p0, s32}}); + getActionDefinitionsBuilder(G_PTRTOINT) + .legalFor({{s32, p0}}); + getActionDefinitionsBuilder(G_FRAME_INDEX) .legalFor({p0}); - getActionDefinitionsBuilder(G_GLOBAL_VALUE) + getActionDefinitionsBuilder({G_GLOBAL_VALUE, G_JUMP_TABLE}) .legalFor({p0}); + getActionDefinitionsBuilder(G_DYN_STACKALLOC) + .lowerFor({{p0, s32}}); + + getActionDefinitionsBuilder(G_VASTART) + .legalFor({p0}); + // FP instructions getActionDefinitionsBuilder(G_FCONSTANT) .legalFor({s32, s64}); @@ -126,6 +210,7 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { getActionDefinitionsBuilder(G_FPTOUI) .libcallForCartesianProduct({s64}, {s64, s32}) + .lowerForCartesianProduct({s32}, {s64, s32}) .minScalar(0, s32); // Int to FP conversion instructions @@ -136,8 +221,11 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { getActionDefinitionsBuilder(G_UITOFP) .libcallForCartesianProduct({s64, s32}, {s64}) + .customForCartesianProduct({s64, s32}, {s32}) .minScalar(1, s32); + getActionDefinitionsBuilder(G_SEXT_INREG).lower(); + computeTables(); verify(*ST.getInstrInfo()); } @@ -150,6 +238,134 @@ bool MipsLegalizerInfo::legalizeCustom(MachineInstr &MI, using namespace TargetOpcode; MIRBuilder.setInstr(MI); + const MipsSubtarget &STI = + static_cast(MIRBuilder.getMF().getSubtarget()); + const LLT s32 = LLT::scalar(32); + const LLT s64 = LLT::scalar(64); - return false; + switch (MI.getOpcode()) { + case G_UITOFP: { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + if (SrcTy != s32) + return false; + if (DstTy != s32 && DstTy != s64) + return false; + + // Let 0xABCDEFGH be given unsigned in MI.getOperand(1). First let's convert + // unsigned to double. Mantissa has 52 bits so we use following trick: + // First make floating point bit mask 0x43300000ABCDEFGH. + // Mask represents 2^52 * 0x1.00000ABCDEFGH i.e. 0x100000ABCDEFGH.0 . + // Next, subtract 2^52 * 0x1.0000000000000 i.e. 0x10000000000000.0 from it. + // Done. Trunc double to float if needed. + + MachineInstrBuilder Bitcast = MIRBuilder.buildInstr( + STI.isFP64bit() ? Mips::BuildPairF64_64 : Mips::BuildPairF64, {s64}, + {Src, MIRBuilder.buildConstant(s32, UINT32_C(0x43300000))}); + Bitcast.constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(), + *STI.getRegBankInfo()); + + MachineInstrBuilder TwoP52FP = MIRBuilder.buildFConstant( + s64, BitsToDouble(UINT64_C(0x4330000000000000))); + + if (DstTy == s64) + MIRBuilder.buildFSub(Dst, Bitcast, TwoP52FP); + else { + MachineInstrBuilder ResF64 = MIRBuilder.buildFSub(s64, Bitcast, TwoP52FP); + MIRBuilder.buildFPTrunc(Dst, ResF64); + } + + MI.eraseFromParent(); + break; + } + default: + return false; + } + + return true; +} + +static bool SelectMSA3OpIntrinsic(MachineInstr &MI, unsigned Opcode, + MachineIRBuilder &MIRBuilder, + const MipsSubtarget &ST) { + assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA."); + if (!MIRBuilder.buildInstr(Opcode) + .add(MI.getOperand(0)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .constrainAllUses(MIRBuilder.getTII(), *ST.getRegisterInfo(), + *ST.getRegBankInfo())) + return false; + MI.eraseFromParent(); + return true; +} + +static bool MSA3OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode, + MachineIRBuilder &MIRBuilder, + const MipsSubtarget &ST) { + assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA."); + MIRBuilder.buildInstr(Opcode) + .add(MI.getOperand(0)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)); + MI.eraseFromParent(); + return true; +} + +bool MipsLegalizerInfo::legalizeIntrinsic(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + const MipsSubtarget &ST = + static_cast(MI.getMF()->getSubtarget()); + const MipsInstrInfo &TII = *ST.getInstrInfo(); + const MipsRegisterInfo &TRI = *ST.getRegisterInfo(); + const RegisterBankInfo &RBI = *ST.getRegBankInfo(); + MIRBuilder.setInstr(MI); + + switch (MI.getIntrinsicID()) { + case Intrinsic::memcpy: + case Intrinsic::memset: + case Intrinsic::memmove: + if (createMemLibcall(MIRBuilder, MRI, MI) == + LegalizerHelper::UnableToLegalize) + return false; + MI.eraseFromParent(); + return true; + case Intrinsic::trap: { + MachineInstr *Trap = MIRBuilder.buildInstr(Mips::TRAP); + MI.eraseFromParent(); + return constrainSelectedInstRegOperands(*Trap, TII, TRI, RBI); + } + case Intrinsic::vacopy: { + Register Tmp = MRI.createGenericVirtualRegister(LLT::pointer(0, 32)); + MachinePointerInfo MPO; + MIRBuilder.buildLoad(Tmp, MI.getOperand(2), + *MI.getMF()->getMachineMemOperand( + MPO, MachineMemOperand::MOLoad, 4, 4)); + MIRBuilder.buildStore(Tmp, MI.getOperand(1), + *MI.getMF()->getMachineMemOperand( + MPO, MachineMemOperand::MOStore, 4, 4)); + MI.eraseFromParent(); + return true; + } + case Intrinsic::mips_addv_b: + case Intrinsic::mips_addv_h: + case Intrinsic::mips_addv_w: + case Intrinsic::mips_addv_d: + return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_ADD, MIRBuilder, ST); + case Intrinsic::mips_addvi_b: + return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_B, MIRBuilder, ST); + case Intrinsic::mips_addvi_h: + return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_H, MIRBuilder, ST); + case Intrinsic::mips_addvi_w: + return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_W, MIRBuilder, ST); + case Intrinsic::mips_addvi_d: + return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_D, MIRBuilder, ST); + default: + break; + } + return true; } diff --git a/lib/Target/Mips/MipsLegalizerInfo.h b/lib/Target/Mips/MipsLegalizerInfo.h index e5021e08189..9696c262b2d 100644 --- a/lib/Target/Mips/MipsLegalizerInfo.h +++ b/lib/Target/Mips/MipsLegalizerInfo.h @@ -28,6 +28,9 @@ public: bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const override; + + bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const override; }; } // end namespace llvm #endif diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 907ed9ef746..f585d9c1a14 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -60,6 +60,11 @@ def immZExt2Ptr : ImmLeaf(Imm);}]>; def immZExt3Ptr : ImmLeaf(Imm);}]>; def immZExt4Ptr : ImmLeaf(Imm);}]>; +def timmZExt1Ptr : TImmLeaf(Imm);}]>; +def timmZExt2Ptr : TImmLeaf(Imm);}]>; +def timmZExt3Ptr : TImmLeaf(Imm);}]>; +def timmZExt4Ptr : TImmLeaf(Imm);}]>; + // Operands def immZExt2Lsa : ImmLeaf(Imm - 1);}]>; @@ -1270,7 +1275,7 @@ class MSA_I8_SHF_DESC_BASE Pattern = [(set ROWD:$wd, (MipsSHF immZExt8:$u8, ROWS:$ws))]; + list Pattern = [(set ROWD:$wd, (MipsSHF timmZExt8:$u8, ROWS:$ws))]; InstrItinClass Itinerary = itin; } @@ -2299,13 +2304,13 @@ class INSERT_FW_VIDX64_PSEUDO_DESC : class INSERT_FD_VIDX64_PSEUDO_DESC : MSA_INSERT_VIDX_PSEUDO_BASE; -class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, immZExt4, +class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, timmZExt4, MSA128BOpnd>; -class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, immZExt3, +class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, timmZExt3, MSA128HOpnd>; -class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, immZExt2, +class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, timmZExt2, MSA128WOpnd>; -class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, immZExt1, +class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, timmZExt1, MSA128DOpnd>; class LD_DESC_BASE; class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", ctpop, MSA128DOpnd>; class SAT_S_B_DESC : MSA_BIT_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b, uimm3, - immZExt3, MSA128BOpnd>; + timmZExt3, MSA128BOpnd>; class SAT_S_H_DESC : MSA_BIT_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h, uimm4, - immZExt4, MSA128HOpnd>; + timmZExt4, MSA128HOpnd>; class SAT_S_W_DESC : MSA_BIT_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w, uimm5, - immZExt5, MSA128WOpnd>; + timmZExt5, MSA128WOpnd>; class SAT_S_D_DESC : MSA_BIT_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d, uimm6, - immZExt6, MSA128DOpnd>; + timmZExt6, MSA128DOpnd>; class SAT_U_B_DESC : MSA_BIT_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b, uimm3, - immZExt3, MSA128BOpnd>; + timmZExt3, MSA128BOpnd>; class SAT_U_H_DESC : MSA_BIT_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h, uimm4, - immZExt4, MSA128HOpnd>; + timmZExt4, MSA128HOpnd>; class SAT_U_W_DESC : MSA_BIT_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w, uimm5, - immZExt5, MSA128WOpnd>; + timmZExt5, MSA128WOpnd>; class SAT_U_D_DESC : MSA_BIT_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d, uimm6, - immZExt6, MSA128DOpnd>; + timmZExt6, MSA128DOpnd>; class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128BOpnd>; class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128HOpnd>; @@ -2546,16 +2551,16 @@ class SLD_D_DESC : MSA_3R_SLD_DESC_BASE<"sld.d", int_mips_sld_d, MSA128DOpnd>; class SLDI_B_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.b", int_mips_sldi_b, MSA128BOpnd, MSA128BOpnd, uimm4, - immZExt4>; + timmZExt4>; class SLDI_H_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.h", int_mips_sldi_h, MSA128HOpnd, MSA128HOpnd, uimm3, - immZExt3>; + timmZExt3>; class SLDI_W_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.w", int_mips_sldi_w, MSA128WOpnd, MSA128WOpnd, uimm2, - immZExt2>; + timmZExt2>; class SLDI_D_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.d", int_mips_sldi_d, MSA128DOpnd, MSA128DOpnd, uimm1, - immZExt1>; + timmZExt1>; class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", shl, MSA128BOpnd>; class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128HOpnd>; @@ -2609,13 +2614,13 @@ class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128WOpnd>; class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128DOpnd>; class SRARI_B_DESC : MSA_BIT_X_DESC_BASE<"srari.b", int_mips_srari_b, uimm3, - immZExt3, MSA128BOpnd>; + timmZExt3, MSA128BOpnd>; class SRARI_H_DESC : MSA_BIT_X_DESC_BASE<"srari.h", int_mips_srari_h, uimm4, - immZExt4, MSA128HOpnd>; + timmZExt4, MSA128HOpnd>; class SRARI_W_DESC : MSA_BIT_X_DESC_BASE<"srari.w", int_mips_srari_w, uimm5, - immZExt5, MSA128WOpnd>; + timmZExt5, MSA128WOpnd>; class SRARI_D_DESC : MSA_BIT_X_DESC_BASE<"srari.d", int_mips_srari_d, uimm6, - immZExt6, MSA128DOpnd>; + timmZExt6, MSA128DOpnd>; class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128BOpnd>; class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128HOpnd>; @@ -2637,13 +2642,13 @@ class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128WOpnd>; class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128DOpnd>; class SRLRI_B_DESC : MSA_BIT_X_DESC_BASE<"srlri.b", int_mips_srlri_b, uimm3, - immZExt3, MSA128BOpnd>; + timmZExt3, MSA128BOpnd>; class SRLRI_H_DESC : MSA_BIT_X_DESC_BASE<"srlri.h", int_mips_srlri_h, uimm4, - immZExt4, MSA128HOpnd>; + timmZExt4, MSA128HOpnd>; class SRLRI_W_DESC : MSA_BIT_X_DESC_BASE<"srlri.w", int_mips_srlri_w, uimm5, - immZExt5, MSA128WOpnd>; + timmZExt5, MSA128WOpnd>; class SRLRI_D_DESC : MSA_BIT_X_DESC_BASE<"srlri.d", int_mips_srlri_d, uimm6, - immZExt6, MSA128DOpnd>; + timmZExt6, MSA128DOpnd>; class ST_DESC_BASEgetParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - unsigned SrcReg = I->getOperand(0).getReg(); + Register SrcReg = I->getOperand(0).getReg(); unsigned DstReg = getRegTy(SrcReg, MF) == MVT::i32 ? Mips::T9 : Mips::T9_64; BuildMI(*MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), DstReg) .addReg(SrcReg); diff --git a/lib/Target/Mips/MipsPfmCounters.td b/lib/Target/Mips/MipsPfmCounters.td new file mode 100644 index 00000000000..c7779b474b9 --- /dev/null +++ b/lib/Target/Mips/MipsPfmCounters.td @@ -0,0 +1,18 @@ +//===-- MipsPfmCounters.td - Mips Hardware Counters --------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This describes the available hardware counters for Mips. +// +//===----------------------------------------------------------------------===// + +def CpuCyclesPfmCounter : PfmCounter<"CYCLES">; + +def DefaultPfmCounters : ProcPfmCounters { + let CycleCounter = CpuCyclesPfmCounter; +} +def : PfmCountersDefaultBinding; diff --git a/lib/Target/Mips/MipsPreLegalizerCombiner.cpp b/lib/Target/Mips/MipsPreLegalizerCombiner.cpp index 85076590d40..ace0735652b 100644 --- a/lib/Target/Mips/MipsPreLegalizerCombiner.cpp +++ b/lib/Target/Mips/MipsPreLegalizerCombiner.cpp @@ -27,7 +27,8 @@ class MipsPreLegalizerCombinerInfo : public CombinerInfo { public: MipsPreLegalizerCombinerInfo() : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, - /*LegalizerInfo*/ nullptr) {} + /*LegalizerInfo*/ nullptr, /*EnableOpt*/ false, + /*EnableOptSize*/ false, /*EnableMinSize*/ false) {} virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, MachineIRBuilder &B) const override; }; diff --git a/lib/Target/Mips/MipsRegisterBankInfo.cpp b/lib/Target/Mips/MipsRegisterBankInfo.cpp index d8bcf16afd5..d334366e727 100644 --- a/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ b/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -12,6 +12,7 @@ #include "MipsRegisterBankInfo.h" #include "MipsInstrInfo.h" +#include "MipsTargetMachine.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" @@ -27,20 +28,23 @@ enum PartialMappingIdx { PMI_GPR, PMI_SPR, PMI_DPR, + PMI_MSA, PMI_Min = PMI_GPR, }; RegisterBankInfo::PartialMapping PartMappings[]{ {0, 32, GPRBRegBank}, {0, 32, FPRBRegBank}, - {0, 64, FPRBRegBank} + {0, 64, FPRBRegBank}, + {0, 128, FPRBRegBank} }; enum ValueMappingIdx { InvalidIdx = 0, GPRIdx = 1, SPRIdx = 4, - DPRIdx = 7 + DPRIdx = 7, + MSAIdx = 10 }; RegisterBankInfo::ValueMapping ValueMappings[] = { @@ -50,14 +54,18 @@ RegisterBankInfo::ValueMapping ValueMappings[] = { {&PartMappings[PMI_GPR - PMI_Min], 1}, {&PartMappings[PMI_GPR - PMI_Min], 1}, {&PartMappings[PMI_GPR - PMI_Min], 1}, - // up to 3 ops operands FPRs - single precission + // up to 3 operands in FPRs - single precission {&PartMappings[PMI_SPR - PMI_Min], 1}, {&PartMappings[PMI_SPR - PMI_Min], 1}, {&PartMappings[PMI_SPR - PMI_Min], 1}, - // up to 3 ops operands FPRs - double precission + // up to 3 operands in FPRs - double precission {&PartMappings[PMI_DPR - PMI_Min], 1}, {&PartMappings[PMI_DPR - PMI_Min], 1}, - {&PartMappings[PMI_DPR - PMI_Min], 1} + {&PartMappings[PMI_DPR - PMI_Min], 1}, + // up to 3 operands in FPRs - MSA + {&PartMappings[PMI_MSA - PMI_Min], 1}, + {&PartMappings[PMI_MSA - PMI_Min], 1}, + {&PartMappings[PMI_MSA - PMI_Min], 1} }; } // end namespace Mips @@ -86,6 +94,10 @@ const RegisterBank &MipsRegisterBankInfo::getRegBankFromRegClass( case Mips::FGR32RegClassID: case Mips::FGR64RegClassID: case Mips::AFGR64RegClassID: + case Mips::MSA128BRegClassID: + case Mips::MSA128HRegClassID: + case Mips::MSA128WRegClassID: + case Mips::MSA128DRegClassID: return getRegBank(Mips::FPRBRegBankID); default: llvm_unreachable("Register class not supported"); @@ -149,6 +161,7 @@ static bool isAmbiguous(unsigned Opc) { case TargetOpcode::G_STORE: case TargetOpcode::G_PHI: case TargetOpcode::G_SELECT: + case TargetOpcode::G_IMPLICIT_DEF: return true; default: return false; @@ -163,8 +176,7 @@ void MipsRegisterBankInfo::AmbiguousRegDefUseContainer::addDefUses( MachineInstr *NonCopyInstr = skipCopiesOutgoing(&UseMI); // Copy with many uses. if (NonCopyInstr->getOpcode() == TargetOpcode::COPY && - !TargetRegisterInfo::isPhysicalRegister( - NonCopyInstr->getOperand(0).getReg())) + !Register::isPhysicalRegister(NonCopyInstr->getOperand(0).getReg())) addDefUses(NonCopyInstr->getOperand(0).getReg(), MRI); else DefUses.push_back(skipCopiesOutgoing(&UseMI)); @@ -186,7 +198,7 @@ MipsRegisterBankInfo::AmbiguousRegDefUseContainer::skipCopiesOutgoing( const MachineRegisterInfo &MRI = MF.getRegInfo(); MachineInstr *Ret = MI; while (Ret->getOpcode() == TargetOpcode::COPY && - !TargetRegisterInfo::isPhysicalRegister(Ret->getOperand(0).getReg()) && + !Register::isPhysicalRegister(Ret->getOperand(0).getReg()) && MRI.hasOneUse(Ret->getOperand(0).getReg())) { Ret = &(*MRI.use_instr_begin(Ret->getOperand(0).getReg())); } @@ -200,7 +212,7 @@ MipsRegisterBankInfo::AmbiguousRegDefUseContainer::skipCopiesIncoming( const MachineRegisterInfo &MRI = MF.getRegInfo(); MachineInstr *Ret = MI; while (Ret->getOpcode() == TargetOpcode::COPY && - !TargetRegisterInfo::isPhysicalRegister(Ret->getOperand(1).getReg())) + !Register::isPhysicalRegister(Ret->getOperand(1).getReg())) Ret = MRI.getVRegDef(Ret->getOperand(1).getReg()); return Ret; } @@ -231,6 +243,9 @@ MipsRegisterBankInfo::AmbiguousRegDefUseContainer::AmbiguousRegDefUseContainer( addUseDef(MI->getOperand(2).getReg(), MRI); addUseDef(MI->getOperand(3).getReg(), MRI); } + + if (MI->getOpcode() == TargetOpcode::G_IMPLICIT_DEF) + addDefUses(MI->getOperand(0).getReg(), MRI); } bool MipsRegisterBankInfo::TypeInfoForMF::visit( @@ -318,8 +333,7 @@ void MipsRegisterBankInfo::TypeInfoForMF::setTypes(const MachineInstr *MI, void MipsRegisterBankInfo::TypeInfoForMF::setTypesAccordingToPhysicalRegister( const MachineInstr *MI, const MachineInstr *CopyInst, unsigned Op) { - assert((TargetRegisterInfo::isPhysicalRegister( - CopyInst->getOperand(Op).getReg())) && + assert((Register::isPhysicalRegister(CopyInst->getOperand(Op).getReg())) && "Copies of non physical registers should not be considered here.\n"); const MachineFunction &MF = *CopyInst->getMF(); @@ -353,6 +367,31 @@ void MipsRegisterBankInfo::TypeInfoForMF::cleanupIfNewFunction( } } +static const MipsRegisterBankInfo::ValueMapping * +getMSAMapping(const MachineFunction &MF) { + assert(static_cast(MF.getSubtarget()).hasMSA() && + "MSA mapping not available on target without MSA."); + return &Mips::ValueMappings[Mips::MSAIdx]; +} + +static const MipsRegisterBankInfo::ValueMapping *getFprbMapping(unsigned Size) { + return Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx] + : &Mips::ValueMappings[Mips::DPRIdx]; +} + +static const unsigned CustomMappingID = 1; + +// Only 64 bit mapping is available in fprb and will be marked as custom, i.e. +// will be split into two 32 bit registers in gprb. +static const MipsRegisterBankInfo::ValueMapping * +getGprbOrCustomMapping(unsigned Size, unsigned &MappingID) { + if (Size == 32) + return &Mips::ValueMappings[Mips::GPRIdx]; + + MappingID = CustomMappingID; + return &Mips::ValueMappings[Mips::DPRIdx]; +} + const RegisterBankInfo::InstructionMapping & MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { @@ -377,17 +416,35 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { unsigned NumOperands = MI.getNumOperands(); const ValueMapping *OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx]; unsigned MappingID = DefaultMappingID; - const unsigned CustomMappingID = 1; + + // Check if LLT sizes match sizes of available register banks. + for (const MachineOperand &Op : MI.operands()) { + if (Op.isReg()) { + LLT RegTy = MRI.getType(Op.getReg()); + + if (RegTy.isScalar() && + (RegTy.getSizeInBits() != 32 && RegTy.getSizeInBits() != 64)) + return getInvalidInstructionMapping(); + + if (RegTy.isVector() && RegTy.getSizeInBits() != 128) + return getInvalidInstructionMapping(); + } + } + + const LLT Op0Ty = MRI.getType(MI.getOperand(0).getReg()); + unsigned Op0Size = Op0Ty.getSizeInBits(); + InstType InstTy = InstType::Integer; switch (Opc) { case G_TRUNC: - case G_ADD: case G_SUB: case G_MUL: case G_UMULH: case G_ZEXTLOAD: case G_SEXTLOAD: case G_GEP: + case G_INTTOPTR: + case G_PTRTOINT: case G_AND: case G_OR: case G_XOR: @@ -398,66 +455,42 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case G_UDIV: case G_SREM: case G_UREM: + case G_BRINDIRECT: + case G_VASTART: OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx]; break; - case G_LOAD: { - unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - InstType InstTy = InstType::Integer; - if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) { - InstTy = TI.determineInstType(&MI); + case G_ADD: + OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx]; + if (Op0Size == 128) + OperandsMapping = getMSAMapping(MF); + break; + case G_STORE: + case G_LOAD: + if (Op0Size == 128) { + OperandsMapping = getOperandsMapping( + {getMSAMapping(MF), &Mips::ValueMappings[Mips::GPRIdx]}); + break; } + if (!Op0Ty.isPointer()) + InstTy = TI.determineInstType(&MI); + if (InstTy == InstType::FloatingPoint || - (Size == 64 && InstTy == InstType::Ambiguous)) { // fprb + (Op0Size == 64 && InstTy == InstType::Ambiguous)) + OperandsMapping = getOperandsMapping( + {getFprbMapping(Op0Size), &Mips::ValueMappings[Mips::GPRIdx]}); + else OperandsMapping = - getOperandsMapping({Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx], + getOperandsMapping({getGprbOrCustomMapping(Op0Size, MappingID), &Mips::ValueMappings[Mips::GPRIdx]}); - break; - } else { // gprb - OperandsMapping = - getOperandsMapping({Size <= 32 ? &Mips::ValueMappings[Mips::GPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx], - &Mips::ValueMappings[Mips::GPRIdx]}); - if (Size == 64) - MappingID = CustomMappingID; - } break; - } - case G_STORE: { - unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - InstType InstTy = InstType::Integer; - if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) { + case G_PHI: + if (!Op0Ty.isPointer()) InstTy = TI.determineInstType(&MI); - } - - if (InstTy == InstType::FloatingPoint || - (Size == 64 && InstTy == InstType::Ambiguous)) { // fprb - OperandsMapping = - getOperandsMapping({Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx], - &Mips::ValueMappings[Mips::GPRIdx]}); - break; - } else { // gprb - OperandsMapping = - getOperandsMapping({Size <= 32 ? &Mips::ValueMappings[Mips::GPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx], - &Mips::ValueMappings[Mips::GPRIdx]}); - if (Size == 64) - MappingID = CustomMappingID; - } - break; - } - case G_PHI: { - unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - InstType InstTy = InstType::Integer; - if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) { - InstTy = TI.determineInstType(&MI); - } // PHI is copylike and should have one regbank in mapping for def register. - if (InstTy == InstType::Integer && Size == 64) { // fprb + if (InstTy == InstType::Integer && Op0Size == 64) { OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::DPRIdx]}); return getInstructionMapping(CustomMappingID, /*Cost=*/1, OperandsMapping, @@ -465,80 +498,63 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } // Use default handling for PHI, i.e. set reg bank of def operand to match // register banks of use operands. - const RegisterBankInfo::InstructionMapping &Mapping = - getInstrMappingImpl(MI); - return Mapping; - } + return getInstrMappingImpl(MI); case G_SELECT: { - unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - InstType InstTy = InstType::Integer; - if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) { + if (!Op0Ty.isPointer()) InstTy = TI.determineInstType(&MI); - } if (InstTy == InstType::FloatingPoint || - (Size == 64 && InstTy == InstType::Ambiguous)) { // fprb - const RegisterBankInfo::ValueMapping *Bank = - Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx]; + (Op0Size == 64 && InstTy == InstType::Ambiguous)) { + const RegisterBankInfo::ValueMapping *Bank = getFprbMapping(Op0Size); OperandsMapping = getOperandsMapping( {Bank, &Mips::ValueMappings[Mips::GPRIdx], Bank, Bank}); break; - } else { // gprb + } else { const RegisterBankInfo::ValueMapping *Bank = - Size <= 32 ? &Mips::ValueMappings[Mips::GPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx]; + getGprbOrCustomMapping(Op0Size, MappingID); OperandsMapping = getOperandsMapping( {Bank, &Mips::ValueMappings[Mips::GPRIdx], Bank, Bank}); - if (Size == 64) - MappingID = CustomMappingID; } break; } - case G_UNMERGE_VALUES: { + case G_IMPLICIT_DEF: + if (!Op0Ty.isPointer()) + InstTy = TI.determineInstType(&MI); + + if (InstTy == InstType::FloatingPoint) + OperandsMapping = getFprbMapping(Op0Size); + else + OperandsMapping = getGprbOrCustomMapping(Op0Size, MappingID); + + break; + case G_UNMERGE_VALUES: OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], &Mips::ValueMappings[Mips::GPRIdx], &Mips::ValueMappings[Mips::DPRIdx]}); MappingID = CustomMappingID; break; - } - case G_MERGE_VALUES: { + case G_MERGE_VALUES: OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::DPRIdx], &Mips::ValueMappings[Mips::GPRIdx], &Mips::ValueMappings[Mips::GPRIdx]}); MappingID = CustomMappingID; break; - } case G_FADD: case G_FSUB: case G_FMUL: case G_FDIV: case G_FABS: - case G_FSQRT:{ - unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - assert((Size == 32 || Size == 64) && "Unsupported floating point size"); - OperandsMapping = Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx]; + case G_FSQRT: + OperandsMapping = getFprbMapping(Op0Size); break; - } - case G_FCONSTANT: { - unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - assert((Size == 32 || Size == 64) && "Unsupported floating point size"); - const RegisterBankInfo::ValueMapping *FPRValueMapping = - Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx]; - OperandsMapping = getOperandsMapping({FPRValueMapping, nullptr}); + case G_FCONSTANT: + OperandsMapping = getOperandsMapping({getFprbMapping(Op0Size), nullptr}); break; - } case G_FCMP: { - unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); - assert((Size == 32 || Size == 64) && "Unsupported floating point size"); - const RegisterBankInfo::ValueMapping *FPRValueMapping = - Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx]; + unsigned Op2Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr, - FPRValueMapping, FPRValueMapping}); + getFprbMapping(Op2Size), getFprbMapping(Op2Size)}); break; } case G_FPEXT: @@ -550,36 +566,31 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { &Mips::ValueMappings[Mips::DPRIdx]}); break; case G_FPTOSI: { + assert((Op0Size == 32) && "Unsupported integer size"); unsigned SizeFP = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); - assert((MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 32) && + OperandsMapping = getOperandsMapping( + {&Mips::ValueMappings[Mips::GPRIdx], getFprbMapping(SizeFP)}); + break; + } + case G_SITOFP: + assert((MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() == 32) && "Unsupported integer size"); - assert((SizeFP == 32 || SizeFP == 64) && "Unsupported floating point size"); - OperandsMapping = getOperandsMapping({ - &Mips::ValueMappings[Mips::GPRIdx], - SizeFP == 32 ? &Mips::ValueMappings[Mips::SPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx], - }); + OperandsMapping = getOperandsMapping( + {getFprbMapping(Op0Size), &Mips::ValueMappings[Mips::GPRIdx]}); break; - } - case G_SITOFP: { - unsigned SizeInt = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); - unsigned SizeFP = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - (void)SizeInt; - assert((SizeInt == 32) && "Unsupported integer size"); - assert((SizeFP == 32 || SizeFP == 64) && "Unsupported floating point size"); - OperandsMapping = - getOperandsMapping({SizeFP == 32 ? &Mips::ValueMappings[Mips::SPRIdx] - : &Mips::ValueMappings[Mips::DPRIdx], - &Mips::ValueMappings[Mips::GPRIdx]}); - break; - } case G_CONSTANT: case G_FRAME_INDEX: case G_GLOBAL_VALUE: + case G_JUMP_TABLE: case G_BRCOND: OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr}); break; + case G_BRJT: + OperandsMapping = + getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr, + &Mips::ValueMappings[Mips::GPRIdx]}); + break; case G_ICMP: OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr, @@ -609,11 +620,41 @@ public: }; } // end anonymous namespace -/// Here we have to narrowScalar s64 operands to s32, combine away -/// G_MERGE/G_UNMERGE and erase instructions that became dead in the process. -/// We manually assign 32 bit gprb to register operands of all new instructions -/// that got created in the process since they will not end up in RegBankSelect -/// loop. Careful not to delete instruction after MI i.e. MI.getIterator()++. +void MipsRegisterBankInfo::setRegBank(MachineInstr &MI, + MachineRegisterInfo &MRI) const { + Register Dest = MI.getOperand(0).getReg(); + switch (MI.getOpcode()) { + case TargetOpcode::G_STORE: + // No def operands, skip this instruction. + break; + case TargetOpcode::G_CONSTANT: + case TargetOpcode::G_LOAD: + case TargetOpcode::G_SELECT: + case TargetOpcode::G_PHI: + case TargetOpcode::G_IMPLICIT_DEF: { + assert(MRI.getType(Dest) == LLT::scalar(32) && "Unexpected operand type."); + MRI.setRegBank(Dest, getRegBank(Mips::GPRBRegBankID)); + break; + } + case TargetOpcode::G_GEP: { + assert(MRI.getType(Dest).isPointer() && "Unexpected operand type."); + MRI.setRegBank(Dest, getRegBank(Mips::GPRBRegBankID)); + break; + } + default: + llvm_unreachable("Unexpected opcode."); + } +} + +static void +combineAwayG_UNMERGE_VALUES(LegalizationArtifactCombiner &ArtCombiner, + MachineInstr &MI) { + SmallVector DeadInstrs; + ArtCombiner.tryCombineMerges(MI, DeadInstrs); + for (MachineInstr *DeadMI : DeadInstrs) + DeadMI->eraseFromParent(); +} + void MipsRegisterBankInfo::applyMappingImpl( const OperandsMapper &OpdMapper) const { MachineInstr &MI = OpdMapper.getMI(); @@ -621,18 +662,19 @@ void MipsRegisterBankInfo::applyMappingImpl( MachineIRBuilder B(MI); MachineFunction *MF = MI.getMF(); MachineRegisterInfo &MRI = OpdMapper.getMRI(); + const LegalizerInfo &LegInfo = *MF->getSubtarget().getLegalizerInfo(); InstManager NewInstrObserver(NewInstrs); GISelObserverWrapper WrapperObserver(&NewInstrObserver); LegalizerHelper Helper(*MF, WrapperObserver, B); - LegalizationArtifactCombiner ArtCombiner( - B, MF->getRegInfo(), *MF->getSubtarget().getLegalizerInfo()); + LegalizationArtifactCombiner ArtCombiner(B, MF->getRegInfo(), LegInfo); switch (MI.getOpcode()) { case TargetOpcode::G_LOAD: case TargetOpcode::G_STORE: case TargetOpcode::G_PHI: - case TargetOpcode::G_SELECT: { + case TargetOpcode::G_SELECT: + case TargetOpcode::G_IMPLICIT_DEF: { Helper.narrowScalar(MI, 0, LLT::scalar(32)); // Handle new instructions. while (!NewInstrs.empty()) { @@ -640,35 +682,21 @@ void MipsRegisterBankInfo::applyMappingImpl( // This is new G_UNMERGE that was created during narrowScalar and will // not be considered for regbank selection. RegBankSelect for mips // visits/makes corresponding G_MERGE first. Combine them here. - if (NewMI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) { - SmallVector DeadInstrs; - ArtCombiner.tryCombineMerges(*NewMI, DeadInstrs); - for (MachineInstr *DeadMI : DeadInstrs) - DeadMI->eraseFromParent(); - } + if (NewMI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) + combineAwayG_UNMERGE_VALUES(ArtCombiner, *NewMI); // This G_MERGE will be combined away when its corresponding G_UNMERGE // gets regBankSelected. else if (NewMI->getOpcode() == TargetOpcode::G_MERGE_VALUES) continue; else - // Manually set register banks for all register operands to 32 bit gprb. - for (auto Op : NewMI->operands()) { - if (Op.isReg()) { - assert(MRI.getType(Op.getReg()).getSizeInBits() == 32 && - "Only 32 bit gprb is handled here.\n"); - MRI.setRegBank(Op.getReg(), getRegBank(Mips::GPRBRegBankID)); - } - } + // Manually set register banks for def operands to 32 bit gprb. + setRegBank(*NewMI, MRI); } return; } - case TargetOpcode::G_UNMERGE_VALUES: { - SmallVector DeadInstrs; - ArtCombiner.tryCombineMerges(MI, DeadInstrs); - for (MachineInstr *DeadMI : DeadInstrs) - DeadMI->eraseFromParent(); + case TargetOpcode::G_UNMERGE_VALUES: + combineAwayG_UNMERGE_VALUES(ArtCombiner, MI); return; - } default: break; } diff --git a/lib/Target/Mips/MipsRegisterBankInfo.h b/lib/Target/Mips/MipsRegisterBankInfo.h index 176813c031e..fa0f1c7bc94 100644 --- a/lib/Target/Mips/MipsRegisterBankInfo.h +++ b/lib/Target/Mips/MipsRegisterBankInfo.h @@ -38,8 +38,17 @@ public: const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override; + /// Here we have to narrowScalar s64 operands to s32, combine away G_MERGE or + /// G_UNMERGE and erase instructions that became dead in the process. We + /// manually assign bank to def operand of all new instructions that were + /// created in the process since they will not end up in RegBankSelect loop. void applyMappingImpl(const OperandsMapper &OpdMapper) const override; + /// RegBankSelect determined that s64 operand is better to be split into two + /// s32 operands in gprb. Here we manually set register banks of def operands + /// of newly created instructions since they will not get regbankselected. + void setRegBank(MachineInstr &MI, MachineRegisterInfo &MRI) const; + private: /// Some instructions are used with both floating point and integer operands. /// We assign InstType to such instructions as it helps us to avoid cross bank diff --git a/lib/Target/Mips/MipsRegisterBanks.td b/lib/Target/Mips/MipsRegisterBanks.td index 14a0181f8f1..7d11475884c 100644 --- a/lib/Target/Mips/MipsRegisterBanks.td +++ b/lib/Target/Mips/MipsRegisterBanks.td @@ -11,4 +11,4 @@ def GPRBRegBank : RegisterBank<"GPRB", [GPR32]>; -def FPRBRegBank : RegisterBank<"FPRB", [FGR64, AFGR64]>; +def FPRBRegBank : RegisterBank<"FPRB", [FGR64, AFGR64, MSA128D]>; diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 4c6cc1ef771..166ddea0431 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -171,8 +171,8 @@ void ExpandPseudo::expandLoadCCond(MachineBasicBlock &MBB, Iter I) { assert(I->getOperand(0).isReg() && I->getOperand(1).isFI()); const TargetRegisterClass *RC = RegInfo.intRegClass(4); - unsigned VR = MRI.createVirtualRegister(RC); - unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); + Register VR = MRI.createVirtualRegister(RC); + Register Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); TII.loadRegFromStack(MBB, I, VR, FI, RC, &RegInfo, 0); BuildMI(MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), Dst) @@ -186,8 +186,8 @@ void ExpandPseudo::expandStoreCCond(MachineBasicBlock &MBB, Iter I) { assert(I->getOperand(0).isReg() && I->getOperand(1).isFI()); const TargetRegisterClass *RC = RegInfo.intRegClass(4); - unsigned VR = MRI.createVirtualRegister(RC); - unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); + Register VR = MRI.createVirtualRegister(RC); + Register Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); BuildMI(MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), VR) .addReg(Src, getKillRegState(I->getOperand(0).isKill())); @@ -204,11 +204,11 @@ void ExpandPseudo::expandLoadACC(MachineBasicBlock &MBB, Iter I, assert(I->getOperand(0).isReg() && I->getOperand(1).isFI()); const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize); - unsigned VR0 = MRI.createVirtualRegister(RC); - unsigned VR1 = MRI.createVirtualRegister(RC); - unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); - unsigned Lo = RegInfo.getSubReg(Dst, Mips::sub_lo); - unsigned Hi = RegInfo.getSubReg(Dst, Mips::sub_hi); + Register VR0 = MRI.createVirtualRegister(RC); + Register VR1 = MRI.createVirtualRegister(RC); + Register Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); + Register Lo = RegInfo.getSubReg(Dst, Mips::sub_lo); + Register Hi = RegInfo.getSubReg(Dst, Mips::sub_hi); DebugLoc DL = I->getDebugLoc(); const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY); @@ -229,9 +229,9 @@ void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I, assert(I->getOperand(0).isReg() && I->getOperand(1).isFI()); const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize); - unsigned VR0 = MRI.createVirtualRegister(RC); - unsigned VR1 = MRI.createVirtualRegister(RC); - unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); + Register VR0 = MRI.createVirtualRegister(RC); + Register VR1 = MRI.createVirtualRegister(RC); + Register Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); unsigned SrcKill = getKillRegState(I->getOperand(0).isKill()); DebugLoc DL = I->getDebugLoc(); @@ -242,7 +242,7 @@ void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I, } bool ExpandPseudo::expandCopy(MachineBasicBlock &MBB, Iter I) { - unsigned Src = I->getOperand(1).getReg(); + Register Src = I->getOperand(1).getReg(); std::pair Opcodes = getMFHiLoOpc(Src); if (!Opcodes.first) @@ -262,11 +262,11 @@ bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I, const TargetRegisterClass *DstRC = RegInfo.getMinimalPhysRegClass(Dst); unsigned VRegSize = RegInfo.getRegSizeInBits(*DstRC) / 16; const TargetRegisterClass *RC = RegInfo.intRegClass(VRegSize); - unsigned VR0 = MRI.createVirtualRegister(RC); - unsigned VR1 = MRI.createVirtualRegister(RC); + Register VR0 = MRI.createVirtualRegister(RC); + Register VR1 = MRI.createVirtualRegister(RC); unsigned SrcKill = getKillRegState(I->getOperand(1).isKill()); - unsigned DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo); - unsigned DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi); + Register DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo); + Register DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi); DebugLoc DL = I->getDebugLoc(); BuildMI(MBB, I, DL, TII.get(MFLoOpc), VR0).addReg(Src); @@ -304,9 +304,9 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB, // stack is used. if (I->getNumOperands() == 4 && I->getOperand(3).isReg() && I->getOperand(3).getReg() == Mips::SP) { - unsigned DstReg = I->getOperand(0).getReg(); - unsigned LoReg = I->getOperand(1).getReg(); - unsigned HiReg = I->getOperand(2).getReg(); + Register DstReg = I->getOperand(0).getReg(); + Register LoReg = I->getOperand(1).getReg(); + Register HiReg = I->getOperand(2).getReg(); // It should be impossible to have FGR64 on MIPS-II or MIPS32r1 (which are // the cases where mthc1 is not available). 64-bit architectures and @@ -346,7 +346,7 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB, const MachineOperand &Op2 = I->getOperand(2); if ((Op1.isReg() && Op1.isUndef()) || (Op2.isReg() && Op2.isUndef())) { - unsigned DstReg = I->getOperand(0).getReg(); + Register DstReg = I->getOperand(0).getReg(); BuildMI(MBB, I, I->getDebugLoc(), TII.get(Mips::IMPLICIT_DEF), DstReg); return true; } @@ -369,8 +369,8 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB, // stack is used. if (I->getNumOperands() == 4 && I->getOperand(3).isReg() && I->getOperand(3).getReg() == Mips::SP) { - unsigned DstReg = I->getOperand(0).getReg(); - unsigned SrcReg = Op1.getReg(); + Register DstReg = I->getOperand(0).getReg(); + Register SrcReg = Op1.getReg(); unsigned N = Op2.getImm(); int64_t Offset = 4 * (Subtarget.isLittle() ? N : (1 - N)); @@ -538,7 +538,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, if (RegInfo.needsStackRealignment(MF)) { // addiu $Reg, $zero, -MaxAlignment // andi $sp, $sp, $Reg - unsigned VR = MF.getRegInfo().createVirtualRegister(RC); + Register VR = MF.getRegInfo().createVirtualRegister(RC); assert(isInt<16>(MFI.getMaxAlignment()) && "Function's alignment size requirement is not supported."); int MaxAlign = -(int)MFI.getMaxAlignment(); @@ -865,12 +865,15 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF, const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); MipsFunctionInfo *MipsFI = MF.getInfo(); MipsABIInfo ABI = STI.getABI(); + unsigned RA = ABI.IsN64() ? Mips::RA_64 : Mips::RA; unsigned FP = ABI.GetFramePtr(); unsigned BP = ABI.IsN64() ? Mips::S7_64 : Mips::S7; - // Mark $fp as used if function has dedicated frame pointer. - if (hasFP(MF)) + // Mark $ra and $fp as used if function has dedicated frame pointer. + if (hasFP(MF)) { + setAliasRegs(MF, SavedRegs, RA); setAliasRegs(MF, SavedRegs, FP); + } // Mark $s7 as used if function has dedicated base pointer. if (hasBP(MF)) setAliasRegs(MF, SavedRegs, BP); diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 703f99f37dd..c8313240a67 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -124,6 +124,33 @@ bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI, return true; } +void MipsSEDAGToDAGISel::emitMCountABI(MachineInstr &MI, MachineBasicBlock &MBB, + MachineFunction &MF) { + MachineInstrBuilder MIB(MF, &MI); + if (!Subtarget->isABI_O32()) { // N32, N64 + // Save current return address. + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Mips::OR64)) + .addDef(Mips::AT_64) + .addUse(Mips::RA_64, RegState::Undef) + .addUse(Mips::ZERO_64); + // Stops instruction above from being removed later on. + MIB.addUse(Mips::AT_64, RegState::Implicit); + } else { // O32 + // Save current return address. + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Mips::OR)) + .addDef(Mips::AT) + .addUse(Mips::RA, RegState::Undef) + .addUse(Mips::ZERO); + // _mcount pops 2 words from stack. + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Mips::ADDiu)) + .addDef(Mips::SP) + .addUse(Mips::SP) + .addImm(-8); + // Stops first instruction above from being removed later on. + MIB.addUse(Mips::AT, RegState::Implicit); + } +} + void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { MF.getInfo()->initGlobalBaseReg(); @@ -150,6 +177,24 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { if (Subtarget->isABI_FPXX() && !Subtarget->hasMTHC1()) MI.addOperand(MachineOperand::CreateReg(Mips::SP, false, true)); break; + case Mips::JAL: + case Mips::JAL_MM: + if (MI.getOperand(0).isGlobal() && + MI.getOperand(0).getGlobal()->getGlobalIdentifier() == "_mcount") + emitMCountABI(MI, MBB, MF); + break; + case Mips::JALRPseudo: + case Mips::JALR64Pseudo: + case Mips::JALR16_MM: + if (MI.getOperand(2).isMCSymbol() && + MI.getOperand(2).getMCSymbol()->getName() == "_mcount") + emitMCountABI(MI, MBB, MF); + break; + case Mips::JALR: + if (MI.getOperand(3).isMCSymbol() && + MI.getOperand(3).getMCSymbol()->getName() == "_mcount") + emitMCountABI(MI, MBB, MF); + break; default: replaceUsesWithZeroReg(MRI, MI); } @@ -247,7 +292,8 @@ bool MipsSEDAGToDAGISel::selectAddrFrameIndexOffset( Base = Addr.getOperand(0); // If base is a FI, additional offset calculation is done in // eliminateFrameIndex, otherwise we need to check the alignment - if (OffsetToAlignment(CN->getZExtValue(), 1ull << ShiftAmount) != 0) + const Align Alignment(1ULL << ShiftAmount); + if (!isAligned(Alignment, CN->getZExtValue())) return false; } @@ -719,7 +765,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { } case ISD::ConstantFP: { - ConstantFPSDNode *CN = dyn_cast(Node); + auto *CN = cast(Node); if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { if (Subtarget->isGP64bit()) { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, @@ -743,7 +789,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { } case ISD::Constant: { - const ConstantSDNode *CN = dyn_cast(Node); + auto *CN = cast(Node); int64_t Imm = CN->getSExtValue(); unsigned Size = CN->getValueSizeInBits(0); @@ -969,7 +1015,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { break; } - SDNode *Res; + SDNode *Res = nullptr; // If we have a signed 10 bit integer, we can splat it directly. // diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h index ce594e1fb4f..39f665be571 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -120,7 +120,7 @@ private: /// power of 2. bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const override; /// Select constant vector splats whose value is a run of set bits - /// ending at the most significant bit + /// ending at the most significant bit. bool selectVSplatMaskL(SDValue N, SDValue &Imm) const override; /// Select constant vector splats whose value is a run of set bits /// starting at bit zero. @@ -128,6 +128,10 @@ private: bool trySelect(SDNode *Node) override; + // Emits proper ABI for _mcount profiling calls. + void emitMCountABI(MachineInstr &MI, MachineBasicBlock &MBB, + MachineFunction &MF); + void processFunctionAfterISel(MachineFunction &MF) override; bool SelectInlineAsmMemoryOperand(const SDValue &Op, diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index edf57a3840d..5bd234f955b 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -71,8 +71,8 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, if (Subtarget.hasDSP() || Subtarget.hasMSA()) { // Expand all truncating stores and extending loads. - for (MVT VT0 : MVT::vector_valuetypes()) { - for (MVT VT1 : MVT::vector_valuetypes()) { + for (MVT VT0 : MVT::fixedlen_vector_valuetypes()) { + for (MVT VT1 : MVT::fixedlen_vector_valuetypes()) { setTruncStoreAction(VT0, VT1, Expand); setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); @@ -327,6 +327,7 @@ addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); + setOperationAction(ISD::UNDEF, Ty, Legal); setOperationAction(ISD::ADD, Ty, Legal); setOperationAction(ISD::AND, Ty, Legal); @@ -2595,7 +2596,8 @@ static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, SDLoc DL(Op); return DAG.getNode(MipsISD::SHF, DL, ResTy, - DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0)); + DAG.getTargetConstant(Imm, DL, MVT::i32), + Op->getOperand(0)); } /// Determine whether a range fits a regular pattern of values. @@ -3062,13 +3064,13 @@ MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI, BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB); // Fill $FBB. - unsigned VR2 = RegInfo.createVirtualRegister(RC); + Register VR2 = RegInfo.createVirtualRegister(RC); BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) .addReg(Mips::ZERO).addImm(0); BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); // Fill $TBB. - unsigned VR1 = RegInfo.createVirtualRegister(RC); + Register VR1 = RegInfo.createVirtualRegister(RC); BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) .addReg(Mips::ZERO).addImm(1); @@ -3131,13 +3133,13 @@ MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo( .addMBB(TBB); // Fill $FBB. - unsigned RD1 = RegInfo.createVirtualRegister(RC); + Register RD1 = RegInfo.createVirtualRegister(RC); BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1) .addReg(Mips::ZERO).addImm(0); BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); // Fill $TBB. - unsigned RD2 = RegInfo.createVirtualRegister(RC); + Register RD2 = RegInfo.createVirtualRegister(RC); BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2) .addReg(Mips::ZERO).addImm(1); @@ -3169,8 +3171,8 @@ MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Fd = MI.getOperand(0).getReg(); - unsigned Ws = MI.getOperand(1).getReg(); + Register Fd = MI.getOperand(0).getReg(); + Register Ws = MI.getOperand(1).getReg(); unsigned Lane = MI.getOperand(2).getImm(); if (Lane == 0) { @@ -3185,9 +3187,9 @@ MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI, BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); } else { - unsigned Wt = RegInfo.createVirtualRegister( - Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : - &Mips::MSA128WEvensRegClass); + Register Wt = RegInfo.createVirtualRegister( + Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass + : &Mips::MSA128WEvensRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane); BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); @@ -3214,15 +3216,15 @@ MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); - unsigned Fd = MI.getOperand(0).getReg(); - unsigned Ws = MI.getOperand(1).getReg(); + Register Fd = MI.getOperand(0).getReg(); + Register Ws = MI.getOperand(1).getReg(); unsigned Lane = MI.getOperand(2).getImm() * 2; DebugLoc DL = MI.getDebugLoc(); if (Lane == 0) BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64); else { - unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); + Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1); BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64); @@ -3244,13 +3246,13 @@ MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Wd = MI.getOperand(0).getReg(); - unsigned Wd_in = MI.getOperand(1).getReg(); + Register Wd = MI.getOperand(0).getReg(); + Register Wd_in = MI.getOperand(1).getReg(); unsigned Lane = MI.getOperand(2).getImm(); - unsigned Fs = MI.getOperand(3).getReg(); - unsigned Wt = RegInfo.createVirtualRegister( - Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : - &Mips::MSA128WEvensRegClass); + Register Fs = MI.getOperand(3).getReg(); + Register Wt = RegInfo.createVirtualRegister( + Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass + : &Mips::MSA128WEvensRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) .addImm(0) @@ -3280,11 +3282,11 @@ MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Wd = MI.getOperand(0).getReg(); - unsigned Wd_in = MI.getOperand(1).getReg(); + Register Wd = MI.getOperand(0).getReg(); + Register Wd_in = MI.getOperand(1).getReg(); unsigned Lane = MI.getOperand(2).getImm(); - unsigned Fs = MI.getOperand(3).getReg(); - unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); + Register Fs = MI.getOperand(3).getReg(); + Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) .addImm(0) @@ -3326,10 +3328,10 @@ MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Wd = MI.getOperand(0).getReg(); - unsigned SrcVecReg = MI.getOperand(1).getReg(); - unsigned LaneReg = MI.getOperand(2).getReg(); - unsigned SrcValReg = MI.getOperand(3).getReg(); + Register Wd = MI.getOperand(0).getReg(); + Register SrcVecReg = MI.getOperand(1).getReg(); + Register LaneReg = MI.getOperand(2).getReg(); + Register SrcValReg = MI.getOperand(3).getReg(); const TargetRegisterClass *VecRC = nullptr; // FIXME: This should be true for N32 too. @@ -3370,7 +3372,7 @@ MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( } if (IsFP) { - unsigned Wt = RegInfo.createVirtualRegister(VecRC); + Register Wt = RegInfo.createVirtualRegister(VecRC); BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) .addImm(0) .addReg(SrcValReg) @@ -3380,7 +3382,7 @@ MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( // Convert the lane index into a byte index if (EltSizeInBytes != 1) { - unsigned LaneTmp1 = RegInfo.createVirtualRegister(GPRRC); + Register LaneTmp1 = RegInfo.createVirtualRegister(GPRRC); BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1) .addReg(LaneReg) .addImm(EltLog2Size); @@ -3388,13 +3390,13 @@ MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( } // Rotate bytes around so that the desired lane is element zero - unsigned WdTmp1 = RegInfo.createVirtualRegister(VecRC); + Register WdTmp1 = RegInfo.createVirtualRegister(VecRC); BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1) .addReg(SrcVecReg) .addReg(SrcVecReg) .addReg(LaneReg, 0, SubRegIdx); - unsigned WdTmp2 = RegInfo.createVirtualRegister(VecRC); + Register WdTmp2 = RegInfo.createVirtualRegister(VecRC); if (IsFP) { // Use insve.df to insert to element zero BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2) @@ -3413,7 +3415,7 @@ MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( // Rotate elements the rest of the way for a full rotation. // sld.df inteprets $rt modulo the number of columns so we only need to negate // the lane index to do this. - unsigned LaneTmp2 = RegInfo.createVirtualRegister(GPRRC); + Register LaneTmp2 = RegInfo.createVirtualRegister(GPRRC); BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB), LaneTmp2) .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO) @@ -3440,12 +3442,12 @@ MipsSETargetLowering::emitFILL_FW(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Wd = MI.getOperand(0).getReg(); - unsigned Fs = MI.getOperand(1).getReg(); - unsigned Wt1 = RegInfo.createVirtualRegister( + Register Wd = MI.getOperand(0).getReg(); + Register Fs = MI.getOperand(1).getReg(); + Register Wt1 = RegInfo.createVirtualRegister( Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : &Mips::MSA128WEvensRegClass); - unsigned Wt2 = RegInfo.createVirtualRegister( + Register Wt2 = RegInfo.createVirtualRegister( Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass : &Mips::MSA128WEvensRegClass); @@ -3475,10 +3477,10 @@ MipsSETargetLowering::emitFILL_FD(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Wd = MI.getOperand(0).getReg(); - unsigned Fs = MI.getOperand(1).getReg(); - unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); - unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); + Register Wd = MI.getOperand(0).getReg(); + Register Fs = MI.getOperand(1).getReg(); + Register Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); + Register Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) @@ -3509,8 +3511,8 @@ MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Ws = MI.getOperand(0).getReg(); - unsigned Rt = MI.getOperand(1).getReg(); + Register Ws = MI.getOperand(0).getReg(); + Register Rt = MI.getOperand(1).getReg(); const MachineMemOperand &MMO = **MI.memoperands_begin(); unsigned Imm = MMO.getOffset(); @@ -3522,11 +3524,11 @@ MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass : &Mips::GPR64RegClass); const bool UsingMips32 = RC == &Mips::GPR32RegClass; - unsigned Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); + Register Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0); if(!UsingMips32) { - unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass); + Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass); BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp) .addImm(0) .addReg(Rs) @@ -3564,7 +3566,7 @@ MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Wd = MI.getOperand(0).getReg(); + Register Wd = MI.getOperand(0).getReg(); // Caution: A load via the GOT can expand to a GPR32 operand, a load via // spill and reload can expand as a GPR64 operand. Examine the @@ -3575,7 +3577,7 @@ MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, : &Mips::GPR64RegClass); const bool UsingMips32 = RC == &Mips::GPR32RegClass; - unsigned Rt = RegInfo.createVirtualRegister(RC); + Register Rt = RegInfo.createVirtualRegister(RC); MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt); @@ -3583,7 +3585,7 @@ MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, MIB.add(MI.getOperand(i)); if(!UsingMips32) { - unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); + Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp).addReg(Rt, 0, Mips::sub_32); Rt = Tmp; } @@ -3658,11 +3660,11 @@ MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Wd = MI.getOperand(0).getReg(); - unsigned Fs = MI.getOperand(1).getReg(); + Register Wd = MI.getOperand(0).getReg(); + Register Fs = MI.getOperand(1).getReg(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); - unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); const TargetRegisterClass *GPRRC = IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; unsigned MFC1Opc = IsFGR64onMips64 @@ -3671,16 +3673,16 @@ MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W; // Perform the register class copy as mentioned above. - unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC); + Register Rtemp = RegInfo.createVirtualRegister(GPRRC); BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs); BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp); unsigned WPHI = Wtemp; if (IsFGR64onMips32) { - unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC); + Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC); BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs); - unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); - unsigned Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + Register Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2) .addReg(Wtemp) .addReg(Rtemp2) @@ -3693,7 +3695,7 @@ MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, } if (IsFGR64) { - unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2) .addReg(WPHI) .addReg(WPHI); @@ -3817,8 +3819,8 @@ MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); const TargetRegisterClass *RC = &Mips::MSA128WRegClass; - unsigned Ws1 = RegInfo.createVirtualRegister(RC); - unsigned Ws2 = RegInfo.createVirtualRegister(RC); + Register Ws1 = RegInfo.createVirtualRegister(RC); + Register Ws2 = RegInfo.createVirtualRegister(RC); DebugLoc DL = MI.getDebugLoc(); // Splat 1.0 into a vector @@ -3846,8 +3848,8 @@ MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); const TargetRegisterClass *RC = &Mips::MSA128DRegClass; - unsigned Ws1 = RegInfo.createVirtualRegister(RC); - unsigned Ws2 = RegInfo.createVirtualRegister(RC); + Register Ws1 = RegInfo.createVirtualRegister(RC); + Register Ws2 = RegInfo.createVirtualRegister(RC); DebugLoc DL = MI.getDebugLoc(); // Splat 1.0 into a vector diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index 4e49f5e7d9d..2126a1bda49 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -628,7 +628,7 @@ unsigned MipsSEInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB, // The first instruction can be a LUi, which is different from other // instructions (ADDiu, ORI and SLL) in that it does not have a register // operand. - unsigned Reg = RegInfo.createVirtualRegister(RC); + Register Reg = RegInfo.createVirtualRegister(RC); if (Inst->Opc == LUi) BuildMI(MBB, II, DL, get(LUi), Reg).addImm(SignExtend64<16>(Inst->ImmOpnd)); @@ -734,9 +734,9 @@ void MipsSEInstrInfo::expandPseudoMTLoHi(MachineBasicBlock &MBB, // Add lo/hi registers if the mtlo/hi instructions created have explicit // def registers. if (HasExplicitDef) { - unsigned DstReg = I->getOperand(0).getReg(); - unsigned DstLo = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo); - unsigned DstHi = getRegisterInfo().getSubReg(DstReg, Mips::sub_hi); + Register DstReg = I->getOperand(0).getReg(); + Register DstLo = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo); + Register DstHi = getRegisterInfo().getSubReg(DstReg, Mips::sub_hi); LoInst.addReg(DstLo, RegState::Define); HiInst.addReg(DstHi, RegState::Define); } @@ -773,14 +773,14 @@ void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool isMicroMips, bool FP64) const { - unsigned DstReg = I->getOperand(0).getReg(); - unsigned SrcReg = I->getOperand(1).getReg(); + Register DstReg = I->getOperand(0).getReg(); + Register SrcReg = I->getOperand(1).getReg(); unsigned N = I->getOperand(2).getImm(); DebugLoc dl = I->getDebugLoc(); assert(N < 2 && "Invalid immediate"); unsigned SubIdx = N ? Mips::sub_hi : Mips::sub_lo; - unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx); + Register SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx); // FPXX on MIPS-II or MIPS32r1 should have been handled with a spill/reload // in MipsSEFrameLowering.cpp. @@ -815,7 +815,7 @@ void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB, void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool isMicroMips, bool FP64) const { - unsigned DstReg = I->getOperand(0).getReg(); + Register DstReg = I->getOperand(0).getReg(); unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg(); const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1); DebugLoc dl = I->getDebugLoc(); @@ -883,8 +883,8 @@ void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB, unsigned RA = Subtarget.isGP64bit() ? Mips::RA_64 : Mips::RA; unsigned T9 = Subtarget.isGP64bit() ? Mips::T9_64 : Mips::T9; unsigned ZERO = Subtarget.isGP64bit() ? Mips::ZERO_64 : Mips::ZERO; - unsigned OffsetReg = I->getOperand(0).getReg(); - unsigned TargetReg = I->getOperand(1).getReg(); + Register OffsetReg = I->getOperand(0).getReg(); + Register TargetReg = I->getOperand(1).getReg(); // addu $ra, $v0, $zero // addu $sp, $sp, $v1 diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index f4b164d5c0a..a48088c2891 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -212,11 +212,9 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, // element size), otherwise it is a 16-bit signed immediate. unsigned OffsetBitSize = getLoadStoreOffsetSizeInBits(MI.getOpcode(), MI.getOperand(OpNo - 1)); - unsigned OffsetAlign = getLoadStoreOffsetAlign(MI.getOpcode()); - + const Align OffsetAlign(getLoadStoreOffsetAlign(MI.getOpcode())); if (OffsetBitSize < 16 && isInt<16>(Offset) && - (!isIntN(OffsetBitSize, Offset) || - OffsetToAlignment(Offset, OffsetAlign) != 0)) { + (!isIntN(OffsetBitSize, Offset) || !isAligned(OffsetAlign, Offset))) { // If we have an offset that needs to fit into a signed n-bit immediate // (where n < 16) and doesn't, but does fit into 16-bits then use an ADDiu MachineBasicBlock &MBB = *MI.getParent(); @@ -224,7 +222,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, const TargetRegisterClass *PtrRC = ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); - unsigned Reg = RegInfo.createVirtualRegister(PtrRC); + Register Reg = RegInfo.createVirtualRegister(PtrRC); const MipsSEInstrInfo &TII = *static_cast( MBB.getParent()->getSubtarget().getInstrInfo()); diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index d021b3d021b..b9245c9fc0e 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -69,7 +69,7 @@ void MipsSubtarget::anchor() {} MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, bool little, const MipsTargetMachine &TM, - unsigned StackAlignOverride) + MaybeAlign StackAlignOverride) : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(MipsDefault), IsLittle(little), IsSoftFloat(false), IsSingleFloat(false), IsFPXX(false), NoABICalls(false), Abs2008(false), IsFP64bit(false), UseOddSPReg(true), @@ -81,10 +81,9 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false), HasEVA(false), DisableMadd4(false), HasMT(false), HasCRC(false), HasVirt(false), HasGINV(false), UseIndirectJumpsHazard(false), - StackAlignOverride(StackAlignOverride), - TM(TM), TargetTriple(TT), TSInfo(), - InstrInfo( - MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))), + StackAlignOverride(StackAlignOverride), TM(TM), TargetTriple(TT), + TSInfo(), InstrInfo(MipsInstrInfo::create( + initializeSubtargetDependencies(CPU, FS, TM))), FrameLowering(MipsFrameLowering::create(*this)), TLInfo(MipsTargetLowering::create(TM, *this)) { @@ -248,12 +247,12 @@ MipsSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS, InMips16HardFloat = true; if (StackAlignOverride) - stackAlignment = StackAlignOverride; + stackAlignment = *StackAlignOverride; else if (isABI_N32() || isABI_N64()) - stackAlignment = 16; + stackAlignment = Align(16); else { assert(isABI_O32() && "Unknown ABI for stack alignment!"); - stackAlignment = 8; + stackAlignment = Align(8); } return *this; @@ -286,6 +285,6 @@ const RegisterBankInfo *MipsSubtarget::getRegBankInfo() const { return RegBankInfo.get(); } -const InstructionSelector *MipsSubtarget::getInstructionSelector() const { +InstructionSelector *MipsSubtarget::getInstructionSelector() const { return InstSelector.get(); } diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index aa1200579fc..0a8c2ef8ae5 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -189,12 +189,15 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // Disable use of the `jal` instruction. bool UseLongCalls = false; + // Assume 32-bit GOT. + bool UseXGOT = false; + /// The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. - unsigned stackAlignment; + Align stackAlignment; /// The overridden stack alignment. - unsigned StackAlignOverride; + MaybeAlign StackAlignOverride; InstrItineraryData InstrItins; @@ -227,7 +230,7 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, bool little, - const MipsTargetMachine &TM, unsigned StackAlignOverride); + const MipsTargetMachine &TM, MaybeAlign StackAlignOverride); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. @@ -323,6 +326,8 @@ public: bool useLongCalls() const { return UseLongCalls; } + bool useXGOT() const { return UseXGOT; } + bool enableLongBranchPass() const { return hasStandardEncoding() || inMicroMipsMode() || allowMixed16_32(); } @@ -344,7 +349,7 @@ public: // really use them if in addition we are in mips16 mode static bool useConstantIslands(); - unsigned getStackAlignment() const { return stackAlignment; } + Align getStackAlignment() const { return stackAlignment; } // Grab relocation model Reloc::Model getRelocationModel() const; @@ -391,7 +396,7 @@ public: const CallLowering *getCallLowering() const override; const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; - const InstructionSelector *getInstructionSelector() const override; + InstructionSelector *getInstructionSelector() const override; }; } // End llvm namespace diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index c878abb042e..e58f316791b 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -117,14 +117,17 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, const Triple &TT, : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT, CPU, FS, Options, getEffectiveRelocModel(JIT, RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), - isLittle(isLittle), TLOF(llvm::make_unique()), + isLittle(isLittle), TLOF(std::make_unique()), ABI(MipsABIInfo::computeTargetABI(TT, CPU, Options.MCOptions)), - Subtarget(nullptr), DefaultSubtarget(TT, CPU, FS, isLittle, *this, - Options.StackAlignmentOverride), + Subtarget(nullptr), + DefaultSubtarget(TT, CPU, FS, isLittle, *this, + MaybeAlign(Options.StackAlignmentOverride)), NoMips16Subtarget(TT, CPU, FS.empty() ? "-mips16" : FS.str() + ",-mips16", - isLittle, *this, Options.StackAlignmentOverride), + isLittle, *this, + MaybeAlign(Options.StackAlignmentOverride)), Mips16Subtarget(TT, CPU, FS.empty() ? "+mips16" : FS.str() + ",+mips16", - isLittle, *this, Options.StackAlignmentOverride) { + isLittle, *this, + MaybeAlign(Options.StackAlignmentOverride)) { Subtarget = &DefaultSubtarget; initAsmInfo(); } @@ -196,8 +199,9 @@ MipsTargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique(TargetTriple, CPU, FS, isLittle, *this, - Options.StackAlignmentOverride); + I = std::make_unique( + TargetTriple, CPU, FS, isLittle, *this, + MaybeAlign(Options.StackAlignmentOverride)); } return I.get(); } diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h index 1fa8ebadd64..298d056ce2c 100644 --- a/lib/Target/Mips/MipsTargetStreamer.h +++ b/lib/Target/Mips/MipsTargetStreamer.h @@ -130,6 +130,8 @@ public: SMLoc IDLoc, const MCSubtargetInfo *STI); void emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, unsigned Reg2, SMLoc IDLoc, const MCSubtargetInfo *STI); + void emitRRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1, unsigned Reg2, + MCOperand Op3, SMLoc IDLoc, const MCSubtargetInfo *STI); void emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm, SMLoc IDLoc, const MCSubtargetInfo *STI); void emitRRIII(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm0, @@ -154,17 +156,13 @@ public: unsigned BaseReg, int64_t Offset, function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI); - void emitStoreWithSymOffset(unsigned Opcode, unsigned SrcReg, - unsigned BaseReg, MCOperand &HiOperand, - MCOperand &LoOperand, unsigned ATReg, SMLoc IDLoc, - const MCSubtargetInfo *STI); + void emitSCWithSymOffset(unsigned Opcode, unsigned SrcReg, unsigned BaseReg, + MCOperand &HiOperand, MCOperand &LoOperand, + unsigned ATReg, SMLoc IDLoc, + const MCSubtargetInfo *STI); void emitLoadWithImmOffset(unsigned Opcode, unsigned DstReg, unsigned BaseReg, int64_t Offset, unsigned TmpReg, SMLoc IDLoc, const MCSubtargetInfo *STI); - void emitLoadWithSymOffset(unsigned Opcode, unsigned DstReg, unsigned BaseReg, - MCOperand &HiOperand, MCOperand &LoOperand, - unsigned ATReg, SMLoc IDLoc, - const MCSubtargetInfo *STI); void emitGPRestore(int Offset, SMLoc IDLoc, const MCSubtargetInfo *STI); void forbidModuleDirective() { ModuleDirectiveAllowed = false; } diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index 6530c40ea10..0acbace5f84 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -44,7 +44,7 @@ MachineFunctionPass *createNVPTXPrologEpilogPass(); MachineFunctionPass *createNVPTXReplaceImageHandlesPass(); FunctionPass *createNVPTXImageOptimizerPass(); FunctionPass *createNVPTXLowerArgsPass(const NVPTXTargetMachine *TM); -BasicBlockPass *createNVPTXLowerAllocaPass(); +FunctionPass *createNVPTXLowerAllocaPass(); MachineFunctionPass *createNVPTXPeephole(); MachineFunctionPass *createNVPTXProxyRegErasurePass(); diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 5f38b4a3c4c..307f4d58c3a 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -282,7 +282,7 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, } unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { const TargetRegisterClass *RC = MRI->getRegClass(Reg); DenseMap &RegMap = VRegMapping[RC]; @@ -434,7 +434,7 @@ bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll( return false; } -void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { +void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) { AsmPrinter::EmitBasicBlockStart(MBB); if (isLoopHeaderOfNoUnroll(MBB)) OutStreamer->EmitRawText(StringRef("\t.pragma \"nounroll\";\n")); @@ -507,8 +507,8 @@ const MCSymbol *NVPTXAsmPrinter::getFunctionFrameSymbol() const { } void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { - unsigned RegNo = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(RegNo)) { + Register RegNo = MI->getOperand(0).getReg(); + if (Register::isVirtualRegister(RegNo)) { OutStreamer->AddComment(Twine("implicit-def: ") + getVirtualRegisterName(RegNo)); } else { @@ -1397,7 +1397,7 @@ static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) { auto *FTy = dyn_cast(Ty); if (FTy) - return DL.getPointerPrefAlignment(); + return DL.getPointerPrefAlignment().value(); return DL.getPrefTypeAlignment(Ty); } @@ -1473,12 +1473,11 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // Just print .param .align .b8 .param[size]; // = PAL.getparamalignment // size = typeallocsize of element type - unsigned align = PAL.getParamAlignment(paramIndex); - if (align == 0) - align = DL.getABITypeAlignment(Ty); + const Align align = DL.getValueOrABITypeAlignment( + PAL.getParamAlignment(paramIndex), Ty); unsigned sz = DL.getTypeAllocSize(Ty); - O << "\t.param .align " << align << " .b8 "; + O << "\t.param .align " << align.value() << " .b8 "; printParamName(I, paramIndex, O); O << "[" << sz << "]"; @@ -1559,9 +1558,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // Just print .param .align .b8 .param[size]; // = PAL.getparamalignment // size = typeallocsize of element type - unsigned align = PAL.getParamAlignment(paramIndex); - if (align == 0) - align = DL.getABITypeAlignment(ETy); + Align align = + DL.getValueOrABITypeAlignment(PAL.getParamAlignment(paramIndex), ETy); // Work around a bug in ptxas. When PTX code takes address of // byval parameter with alignment < 4, ptxas generates code to // spill argument into memory. Alas on sm_50+ ptxas generates @@ -1573,10 +1571,10 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // TODO: this will need to be undone when we get to support multi-TU // device-side compilation as it breaks ABI compatibility with nvcc. // Hopefully ptxas bug is fixed by then. - if (!isKernelFunc && align < 4) - align = 4; + if (!isKernelFunc && align < Align(4)) + align = Align(4); unsigned sz = DL.getTypeAllocSize(ETy); - O << "\t.param .align " << align << " .b8 "; + O << "\t.param .align " << align.value() << " .b8 "; printParamName(I, paramIndex, O); O << "[" << sz << "]"; continue; @@ -1653,7 +1651,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( // We use the per class virtual register number in the ptx output. unsigned int numVRs = MRI->getNumVirtRegs(); for (unsigned i = 0; i < numVRs; i++) { - unsigned int vr = TRI->index2VirtReg(i); + unsigned int vr = Register::index2VirtReg(i); const TargetRegisterClass *RC = MRI->getRegClass(vr); DenseMap ®map = VRegMapping[RC]; int n = regmap.size(); @@ -1861,7 +1859,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, case Type::HalfTyID: case Type::FloatTyID: case Type::DoubleTyID: { - const ConstantFP *CFP = dyn_cast(CPV); + const auto *CFP = cast(CPV); Type *Ty = CFP->getType(); if (Ty == Type::getHalfTy(CPV->getContext())) { APInt API = CFP->getValueAPF().bitcastToAPInt(); @@ -2212,7 +2210,7 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, const MachineOperand &MO = MI->getOperand(opNum); switch (MO.getType()) { case MachineOperand::MO_Register: - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + if (Register::isPhysicalRegister(MO.getReg())) { if (MO.getReg() == NVPTX::VRDepot) O << DEPOTNAME << getFunctionNumber(); else diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 43ae57ac126..7a66854d32f 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -200,7 +200,7 @@ private: const Function *F; std::string CurrentFnName; - void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override; + void EmitBasicBlockStart(const MachineBasicBlock &MBB) override; void EmitFunctionEntryLabel() override; void EmitFunctionBodyStart() override; void EmitFunctionBodyEnd() override; diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp index 46f08b23d31..d26912f47e5 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -25,7 +25,7 @@ using namespace llvm; NVPTXFrameLowering::NVPTXFrameLowering() - : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0) {} + : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, Align(8), 0) {} bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; } diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index ae1aa98da0e..9acd0bea66f 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -480,7 +480,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::TRAP, MVT::Other, Legal); // Register custom handling for vector loads/stores - for (MVT VT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { if (IsPTXVectorType(VT)) { setOperationAction(ISD::LOAD, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); @@ -1291,8 +1291,8 @@ std::string NVPTXTargetLowering::getPrototype( O << ".param .b" << size << " _"; } else if (isa(retTy)) { O << ".param .b" << PtrVT.getSizeInBits() << " _"; - } else if (retTy->isAggregateType() || retTy->isVectorTy() || retTy->isIntegerTy(128)) { - auto &DL = CS.getCalledFunction()->getParent()->getDataLayout(); + } else if (retTy->isAggregateType() || retTy->isVectorTy() || + retTy->isIntegerTy(128)) { O << ".param .align " << retAlignment << " .b8 _[" << DL.getTypeAllocSize(retTy) << "]"; } else { @@ -2230,8 +2230,8 @@ SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType() == MVT::v2f16) { LoadSDNode *Load = cast(Op); EVT MemVT = Load->getMemoryVT(); - if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, - *Load->getMemOperand())) { + if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + MemVT, *Load->getMemOperand())) { SDValue Ops[2]; std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG); return DAG.getMergeValues(Ops, SDLoc(Op)); @@ -2273,8 +2273,8 @@ SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // v2f16 is legal, so we can't rely on legalizer to handle unaligned // stores and have to handle it here. if (VT == MVT::v2f16 && - !allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, - *Store->getMemOperand())) + !allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + VT, *Store->getMemOperand())) return expandUnalignedStore(Store, DAG); if (VT.isVector()) @@ -3497,7 +3497,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; } case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col: @@ -3521,7 +3521,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 8; + Info.align = Align(8); return true; } @@ -3547,7 +3547,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; } @@ -3585,7 +3585,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 4; + Info.align = Align(4); return true; } @@ -3606,7 +3606,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; } @@ -3627,7 +3627,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; } @@ -3648,7 +3648,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; } @@ -3665,7 +3665,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 8; + Info.align = Align(8); return true; } @@ -3686,7 +3686,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOStore; - Info.align = 16; + Info.align = Align(16); return true; } @@ -3707,7 +3707,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOStore; - Info.align = 16; + Info.align = Align(16); return true; } @@ -3728,7 +3728,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOStore; - Info.align = 16; + Info.align = Align(16); return true; } @@ -3745,7 +3745,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOStore; - Info.align = 8; + Info.align = Align(8); return true; } @@ -3780,7 +3780,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; - Info.align = 0; + Info.align.reset(); return true; } @@ -3798,7 +3798,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = cast(I.getArgOperand(1))->getZExtValue(); + Info.align = + MaybeAlign(cast(I.getArgOperand(1))->getZExtValue()); return true; } @@ -3817,7 +3818,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = cast(I.getArgOperand(1))->getZExtValue(); + Info.align = + MaybeAlign(cast(I.getArgOperand(1))->getZExtValue()); return true; } @@ -3883,7 +3885,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = nullptr; Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; case Intrinsic::nvvm_tex_1d_v4s32_s32: @@ -4003,7 +4005,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = nullptr; Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; case Intrinsic::nvvm_suld_1d_i8_clamp: @@ -4056,7 +4058,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = nullptr; Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; case Intrinsic::nvvm_suld_1d_i16_clamp: @@ -4109,7 +4111,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = nullptr; Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; case Intrinsic::nvvm_suld_1d_i32_clamp: @@ -4162,7 +4164,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = nullptr; Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; case Intrinsic::nvvm_suld_1d_i64_clamp: @@ -4200,7 +4202,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = nullptr; Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = 16; + Info.align = Align(16); return true; } return false; diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index 62da3c79f46..fe7a84f9a36 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -143,12 +143,17 @@ def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">; def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">; def hasPTX61 : Predicate<"Subtarget->getPTXVersion() >= 61">; def hasPTX63 : Predicate<"Subtarget->getPTXVersion() >= 63">; +def hasPTX64 : Predicate<"Subtarget->getPTXVersion() >= 64">; def hasSM30 : Predicate<"Subtarget->getSmVersion() >= 30">; def hasSM70 : Predicate<"Subtarget->getSmVersion() >= 70">; def hasSM72 : Predicate<"Subtarget->getSmVersion() >= 72">; def hasSM75 : Predicate<"Subtarget->getSmVersion() >= 75">; +// non-sync shfl instructions are not available on sm_70+ in PTX6.4+ +def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70" + "&& Subtarget->getPTXVersion() >= 64)">; + def useShortPtr : Predicate<"useShortPointers()">; def useFP16Math: Predicate<"Subtarget->allowFP16Math()">; @@ -2908,7 +2913,7 @@ def : Pat<(ctlz Int32Regs:$a), (CLZr32 Int32Regs:$a)>; // ctz instruction always returns a 32-bit value. For ctlz.i64, convert the // ptx value to 64 bits to match the ISD node's semantics, unless we know we're // truncating back down to 32 bits. -def : Pat<(ctlz Int64Regs:$a), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; +def : Pat<(i64 (ctlz Int64Regs:$a)), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; def : Pat<(i32 (trunc (ctlz Int64Regs:$a))), (CLZr64 Int64Regs:$a)>; // For 16-bit ctlz, we zero-extend to 32-bit, perform the count, then trunc the @@ -2925,10 +2930,10 @@ def : Pat<(i32 (trunc (ctlz Int64Regs:$a))), (CLZr64 Int64Regs:$a)>; // and then ctlz that value. This way we don't have to subtract 16 from the // result. Unfortunately today we don't have a way to generate // "mov b32reg, {b16imm, b16reg}", so we don't do this optimization. -def : Pat<(ctlz Int16Regs:$a), +def : Pat<(i16 (ctlz Int16Regs:$a)), (SUBi16ri (CVT_u16_u32 (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE), 16)>; -def : Pat<(i32 (zext (ctlz Int16Regs:$a))), +def : Pat<(i32 (zext (i16 (ctlz Int16Regs:$a)))), (SUBi32ri (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 16)>; // Population count @@ -2953,7 +2958,7 @@ def : Pat<(i32 (trunc (ctpop Int64Regs:$a))), (POPCr64 Int64Regs:$a)>; // If we know that we're storing into an i32, we can avoid the final trunc. def : Pat<(ctpop Int16Regs:$a), (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>; -def : Pat<(i32 (zext (ctpop Int16Regs:$a))), +def : Pat<(i32 (zext (i16 (ctpop Int16Regs:$a)))), (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE))>; // fpround f32 -> f16 diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 1752d3e0575..c52195fb044 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -56,6 +56,10 @@ class RegSeq { []); } +class THREADMASK_INFO { + list ret = !if(sync, [0,1], [0]); +} + //----------------------------------- // Synchronization and shuffle functions //----------------------------------- @@ -129,121 +133,64 @@ def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt), [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>, Requires<[hasPTX60, hasSM30]>; - -// shfl.{up,down,bfly,idx}.b32 -multiclass SHFL { - // The last two parameters to shfl can be regs or imms. ptxas is smart - // enough to inline constant registers, so strictly speaking we don't need to - // handle immediates here. But it's easy enough, and it makes our ptx more - // readable. - def reg : NVPTXInst< - (outs regclass:$dst), - (ins regclass:$src, Int32Regs:$offset, Int32Regs:$mask), - !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"), - [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, Int32Regs:$mask))]>; - - def imm1 : NVPTXInst< - (outs regclass:$dst), - (ins regclass:$src, i32imm:$offset, Int32Regs:$mask), - !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"), - [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, Int32Regs:$mask))]>; - - def imm2 : NVPTXInst< - (outs regclass:$dst), - (ins regclass:$src, Int32Regs:$offset, i32imm:$mask), - !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"), - [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, imm:$mask))]>; - - def imm3 : NVPTXInst< - (outs regclass:$dst), - (ins regclass:$src, i32imm:$offset, i32imm:$mask), - !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"), - [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, imm:$mask))]>; +class SHFL_INSTR + : NVPTXInst<(outs), (ins), "?", []> { + NVPTXRegClass rc = !cond( + !eq(reg, "i32"): Int32Regs, + !eq(reg, "f32"): Float32Regs); + string IntrName = "int_nvvm_shfl_" + # !if(sync, "sync_", "") + # mode + # "_" # reg + # !if(return_pred, "p", ""); + Intrinsic Intr = !cast(IntrName); + let InOperandList = !con( + !if(sync, + !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]), + (ins)), + (ins rc:$src), + !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]), + !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"]) + ); + let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst)); + let AsmString = "shfl." + # !if(sync, "sync.", "") + # mode # ".b32\t" + # "$dst" + # !if(return_pred, "|$pred", "") # ", " + # "$src, $offset, $mask" + # !if(sync, ", $threadmask", "") + # ";" + ; + let Pattern = [!con( + !foreach(tmp, OutOperandList, + !subst(outs, set, + !subst(i32imm, imm, tmp))), + (set !foreach(tmp, InOperandList, + !subst(ins, Intr, + !subst(i32imm, imm, tmp)))) + )]; } -defm INT_SHFL_DOWN_I32 : SHFL; -defm INT_SHFL_DOWN_F32 : SHFL; -defm INT_SHFL_UP_I32 : SHFL; -defm INT_SHFL_UP_F32 : SHFL; -defm INT_SHFL_BFLY_I32 : SHFL; -defm INT_SHFL_BFLY_F32 : SHFL; -defm INT_SHFL_IDX_I32 : SHFL; -defm INT_SHFL_IDX_F32 : SHFL; - -multiclass SHFL_SYNC { - // Threadmask and the last two parameters to shfl.sync can be regs or imms. - // ptxas is smart enough to inline constant registers, so strictly speaking we - // don't need to handle immediates here. But it's easy enough, and it makes - // our ptx more readable. - def rrr : NVPTXInst< - (outs regclass:$dst), - (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask), - !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), - [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src, - Int32Regs:$offset, Int32Regs:$mask))]>; - - def rri : NVPTXInst< - (outs regclass:$dst), - (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask), - !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), - [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src, - Int32Regs:$offset, imm:$mask))]>; - - def rir : NVPTXInst< - (outs regclass:$dst), - (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask), - !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), - [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src, - imm:$offset, Int32Regs:$mask))]>; - - def rii : NVPTXInst< - (outs regclass:$dst), - (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask), - !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), - [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src, - imm:$offset, imm:$mask))]>; - - def irr : NVPTXInst< - (outs regclass:$dst), - (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask), - !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), - [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src, - Int32Regs:$offset, Int32Regs:$mask))]>; - - def iri : NVPTXInst< - (outs regclass:$dst), - (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask), - !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), - [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src, - Int32Regs:$offset, imm:$mask))]>; - - def iir : NVPTXInst< - (outs regclass:$dst), - (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask), - !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), - [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src, - imm:$offset, Int32Regs:$mask))]>; - - def iii : NVPTXInst< - (outs regclass:$dst), - (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask), - !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), - [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src, - imm:$offset, imm:$mask))]>; +foreach sync = [0, 1] in { + foreach mode = ["up", "down", "bfly", "idx"] in { + foreach regclass = ["i32", "f32"] in { + foreach return_pred = [0, 1] in { + foreach offset_imm = [0, 1] in { + foreach mask_imm = [0, 1] in { + foreach threadmask_imm = THREADMASK_INFO.ret in { + def : SHFL_INSTR, + Requires; + } + } + } + } + } + } } -// On sm_70 these don't have to be convergent, so we may eventually want to -// implement non-convergent variant of this intrinsic. -defm INT_SHFL_SYNC_DOWN_I32 : SHFL_SYNC; -defm INT_SHFL_SYNC_DOWN_F32 : SHFL_SYNC; -defm INT_SHFL_SYNC_UP_I32 : SHFL_SYNC; -defm INT_SHFL_SYNC_UP_F32 : SHFL_SYNC; -defm INT_SHFL_SYNC_BFLY_I32 : SHFL_SYNC; -defm INT_SHFL_SYNC_BFLY_F32 : SHFL_SYNC; -defm INT_SHFL_SYNC_IDX_I32 : SHFL_SYNC; -defm INT_SHFL_SYNC_IDX_F32 : SHFL_SYNC; - - // vote.{all,any,uni,ballot} multiclass VOTE { def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred), diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 0743a298671..83039241a7c 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -103,7 +103,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { // Do the transformation of an aggr load/copy/set to a loop // for (LoadInst *LI : AggrLoads) { - StoreInst *SI = dyn_cast(*LI->user_begin()); + auto *SI = cast(*LI->user_begin()); Value *SrcAddr = LI->getOperand(0); Value *DstAddr = SI->getOperand(1); unsigned NumLoads = DL.getTypeStoreSize(LI->getType()); diff --git a/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/lib/Target/NVPTX/NVPTXLowerAlloca.cpp index 76fb9f3fa69..945b7286b03 100644 --- a/lib/Target/NVPTX/NVPTXLowerAlloca.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAlloca.cpp @@ -41,12 +41,12 @@ void initializeNVPTXLowerAllocaPass(PassRegistry &); } namespace { -class NVPTXLowerAlloca : public BasicBlockPass { - bool runOnBasicBlock(BasicBlock &BB) override; +class NVPTXLowerAlloca : public FunctionPass { + bool runOnFunction(Function &F) override; public: static char ID; // Pass identification, replacement for typeid - NVPTXLowerAlloca() : BasicBlockPass(ID) {} + NVPTXLowerAlloca() : FunctionPass(ID) {} StringRef getPassName() const override { return "convert address space of alloca'ed memory to local"; } @@ -61,58 +61,61 @@ INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca", // ============================================================================= // Main function for this pass. // ============================================================================= -bool NVPTXLowerAlloca::runOnBasicBlock(BasicBlock &BB) { - if (skipBasicBlock(BB)) +bool NVPTXLowerAlloca::runOnFunction(Function &F) { + if (skipFunction(F)) return false; bool Changed = false; - for (auto &I : BB) { - if (auto allocaInst = dyn_cast(&I)) { - Changed = true; - auto PTy = dyn_cast(allocaInst->getType()); - auto ETy = PTy->getElementType(); - auto LocalAddrTy = PointerType::get(ETy, ADDRESS_SPACE_LOCAL); - auto NewASCToLocal = new AddrSpaceCastInst(allocaInst, LocalAddrTy, ""); - auto GenericAddrTy = PointerType::get(ETy, ADDRESS_SPACE_GENERIC); - auto NewASCToGeneric = new AddrSpaceCastInst(NewASCToLocal, - GenericAddrTy, ""); - NewASCToLocal->insertAfter(allocaInst); - NewASCToGeneric->insertAfter(NewASCToLocal); - for (Value::use_iterator UI = allocaInst->use_begin(), - UE = allocaInst->use_end(); - UI != UE; ) { - // Check Load, Store, GEP, and BitCast Uses on alloca and make them - // use the converted generic address, in order to expose non-generic - // addrspacecast to NVPTXInferAddressSpaces. For other types - // of instructions this is unnecessary and may introduce redundant - // address cast. - const auto &AllocaUse = *UI++; - auto LI = dyn_cast(AllocaUse.getUser()); - if (LI && LI->getPointerOperand() == allocaInst && !LI->isVolatile()) { - LI->setOperand(LI->getPointerOperandIndex(), NewASCToGeneric); - continue; - } - auto SI = dyn_cast(AllocaUse.getUser()); - if (SI && SI->getPointerOperand() == allocaInst && !SI->isVolatile()) { - SI->setOperand(SI->getPointerOperandIndex(), NewASCToGeneric); - continue; - } - auto GI = dyn_cast(AllocaUse.getUser()); - if (GI && GI->getPointerOperand() == allocaInst) { - GI->setOperand(GI->getPointerOperandIndex(), NewASCToGeneric); - continue; - } - auto BI = dyn_cast(AllocaUse.getUser()); - if (BI && BI->getOperand(0) == allocaInst) { - BI->setOperand(0, NewASCToGeneric); - continue; + for (auto &BB : F) + for (auto &I : BB) { + if (auto allocaInst = dyn_cast(&I)) { + Changed = true; + auto PTy = dyn_cast(allocaInst->getType()); + auto ETy = PTy->getElementType(); + auto LocalAddrTy = PointerType::get(ETy, ADDRESS_SPACE_LOCAL); + auto NewASCToLocal = new AddrSpaceCastInst(allocaInst, LocalAddrTy, ""); + auto GenericAddrTy = PointerType::get(ETy, ADDRESS_SPACE_GENERIC); + auto NewASCToGeneric = + new AddrSpaceCastInst(NewASCToLocal, GenericAddrTy, ""); + NewASCToLocal->insertAfter(allocaInst); + NewASCToGeneric->insertAfter(NewASCToLocal); + for (Value::use_iterator UI = allocaInst->use_begin(), + UE = allocaInst->use_end(); + UI != UE;) { + // Check Load, Store, GEP, and BitCast Uses on alloca and make them + // use the converted generic address, in order to expose non-generic + // addrspacecast to NVPTXInferAddressSpaces. For other types + // of instructions this is unnecessary and may introduce redundant + // address cast. + const auto &AllocaUse = *UI++; + auto LI = dyn_cast(AllocaUse.getUser()); + if (LI && LI->getPointerOperand() == allocaInst && + !LI->isVolatile()) { + LI->setOperand(LI->getPointerOperandIndex(), NewASCToGeneric); + continue; + } + auto SI = dyn_cast(AllocaUse.getUser()); + if (SI && SI->getPointerOperand() == allocaInst && + !SI->isVolatile()) { + SI->setOperand(SI->getPointerOperandIndex(), NewASCToGeneric); + continue; + } + auto GI = dyn_cast(AllocaUse.getUser()); + if (GI && GI->getPointerOperand() == allocaInst) { + GI->setOperand(GI->getPointerOperandIndex(), NewASCToGeneric); + continue; + } + auto BI = dyn_cast(AllocaUse.getUser()); + if (BI && BI->getOperand(0) == allocaInst) { + BI->setOperand(0, NewASCToGeneric); + continue; + } } } } - } return Changed; } -BasicBlockPass *llvm::createNVPTXLowerAllocaPass() { +FunctionPass *llvm::createNVPTXLowerAllocaPass() { return new NVPTXLowerAlloca(); } diff --git a/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/lib/Target/NVPTX/NVPTXLowerArgs.cpp index c5e02e34e25..c3c5f6fbcba 100644 --- a/lib/Target/NVPTX/NVPTXLowerArgs.cpp +++ b/lib/Target/NVPTX/NVPTXLowerArgs.cpp @@ -164,7 +164,7 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) { // Set the alignment to alignment of the byval parameter. This is because, // later load/stores assume that alignment, and we are going to replace // the use of the byval parameter with this alloca instruction. - AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo())); + AllocA->setAlignment(MaybeAlign(Func->getParamAlignment(Arg->getArgNo()))); Arg->replaceAllUsesWith(AllocA); Value *ArgInParam = new AddrSpaceCastInst( diff --git a/lib/Target/NVPTX/NVPTXPeephole.cpp b/lib/Target/NVPTX/NVPTXPeephole.cpp index 629757db870..5e6411c61ea 100644 --- a/lib/Target/NVPTX/NVPTXPeephole.cpp +++ b/lib/Target/NVPTX/NVPTXPeephole.cpp @@ -81,7 +81,7 @@ static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) { auto &Op = Root.getOperand(1); const auto &MRI = MF.getRegInfo(); MachineInstr *GenericAddrDef = nullptr; - if (Op.isReg() && TargetRegisterInfo::isVirtualRegister(Op.getReg())) { + if (Op.isReg() && Register::isVirtualRegister(Op.getReg())) { GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg()); } diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp index 4c5a9adf1f6..a7127b0e9a9 100644 --- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp +++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp @@ -178,7 +178,7 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // frame index registers. Functions which don't want/need this optimization // will continue to use the existing code path. if (MFI.getUseLocalStackAllocationBlock()) { - unsigned Align = MFI.getLocalFrameMaxAlign(); + unsigned Align = MFI.getLocalFrameMaxAlign().value(); // Adjust to alignment boundary. Offset = (Offset + Align - 1) / Align * Align; diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 11b3fe2fa3d..f58fb571777 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -116,7 +116,7 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, CPU, FS, Options, Reloc::PIC_, getEffectiveCodeModel(CM, CodeModel::Small), OL), is64bit(is64bit), UseShortPointers(UseShortPointersOpt), - TLOF(llvm::make_unique()), + TLOF(std::make_unique()), Subtarget(TT, CPU, FS, *this) { if (TT.getOS() == Triple::NVCL) drvInterface = NVPTX::NVCL; diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp index 665eb138325..43c2e992040 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -19,10 +19,11 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/MutexGuard.h" +#include "llvm/Support/Mutex.h" #include #include #include +#include #include #include @@ -38,12 +39,12 @@ static ManagedStatic annotationCache; static sys::Mutex Lock; void clearAnnotationCache(const Module *Mod) { - MutexGuard Guard(Lock); + std::lock_guard Guard(Lock); annotationCache->erase(Mod); } static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { - MutexGuard Guard(Lock); + std::lock_guard Guard(Lock); assert(md && "Invalid mdnode for annotation"); assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands"); // start index = 1, to skip the global variable key @@ -69,7 +70,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { } static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) { - MutexGuard Guard(Lock); + std::lock_guard Guard(Lock); NamedMDNode *NMD = m->getNamedMetadata("nvvm.annotations"); if (!NMD) return; @@ -103,7 +104,7 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) { bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop, unsigned &retval) { - MutexGuard Guard(Lock); + std::lock_guard Guard(Lock); const Module *m = gv->getParent(); if ((*annotationCache).find(m) == (*annotationCache).end()) cacheAnnotationFromMD(m, gv); @@ -117,7 +118,7 @@ bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop, bool findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop, std::vector &retval) { - MutexGuard Guard(Lock); + std::lock_guard Guard(Lock); const Module *m = gv->getParent(); if ((*annotationCache).find(m) == (*annotationCache).end()) cacheAnnotationFromMD(m, gv); diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index c9524da93ac..aedf5b713c3 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -579,7 +579,7 @@ public: static std::unique_ptr CreateToken(StringRef Str, SMLoc S, bool IsPPC64) { - auto Op = make_unique(Token); + auto Op = std::make_unique(Token); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = S; @@ -608,7 +608,7 @@ public: static std::unique_ptr CreateImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) { - auto Op = make_unique(Immediate); + auto Op = std::make_unique(Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -618,7 +618,7 @@ public: static std::unique_ptr CreateExpr(const MCExpr *Val, SMLoc S, SMLoc E, bool IsPPC64) { - auto Op = make_unique(Expression); + auto Op = std::make_unique(Expression); Op->Expr.Val = Val; Op->Expr.CRVal = EvaluateCRExpr(Val); Op->StartLoc = S; @@ -629,7 +629,7 @@ public: static std::unique_ptr CreateTLSReg(const MCSymbolRefExpr *Sym, SMLoc S, SMLoc E, bool IsPPC64) { - auto Op = make_unique(TLSRegister); + auto Op = std::make_unique(TLSRegister); Op->TLSReg.Sym = Sym; Op->StartLoc = S; Op->EndLoc = E; @@ -639,7 +639,7 @@ public: static std::unique_ptr CreateContextImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) { - auto Op = make_unique(ContextImmediate); + auto Op = std::make_unique(ContextImmediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 7a8af57961c..3597fd15eeb 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -167,12 +167,6 @@ static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo, return decodeRegisterClass(Inst, RegNo, QFRegs); } -static DecodeStatus DecodeSPE4RCRegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { - return decodeRegisterClass(Inst, RegNo, RRegs); -} - static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 042ddf48d5d..20f752c3041 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -78,7 +78,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, // determine the type of the relocation unsigned Type; if (IsPCRel) { - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: llvm_unreachable("Unimplemented"); case PPC::fixup_ppc_br24: @@ -131,7 +131,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, break; } } else { - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: llvm_unreachable("invalid fixup kind!"); case FK_NONE: Type = ELF::R_PPC_NONE; @@ -443,5 +443,5 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, std::unique_ptr llvm::createPPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) { - return llvm::make_unique(Is64Bit, OSABI); + return std::make_unique(Is64Bit, OSABI); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp index 0e64ae55ab1..7fc231618fa 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp @@ -66,6 +66,31 @@ void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, const MCSubtargetInfo &STI) { + // Customize printing of the addis instruction on AIX. When an operand is a + // symbol reference, the instruction syntax is changed to look like a load + // operation, i.e: + // Transform: addis $rD, $rA, $src --> addis $rD, $src($rA). + if (TT.isOSAIX() && + (MI->getOpcode() == PPC::ADDIS8 || MI->getOpcode() == PPC::ADDIS) && + MI->getOperand(2).isExpr()) { + assert((MI->getOperand(0).isReg() && MI->getOperand(1).isReg()) && + "The first and the second operand of an addis instruction" + " should be registers."); + + assert(isa(MI->getOperand(2).getExpr()) && + "The third operand of an addis instruction should be a symbol " + "reference expression if it is an expression at all."); + + O << "\taddis "; + printOperand(MI, 0, O); + O << ", "; + printOperand(MI, 2, O); + O << "("; + printOperand(MI, 1, O); + O << ")"; + return; + } + // Check for slwi/srwi mnemonics. if (MI->getOpcode() == PPC::RLWINM) { unsigned char SH = MI->getOperand(2).getImm(); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 5f0005ea1d7..1216cd72728 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -86,4 +86,5 @@ void PPCXCOFFMCAsmInfo::anchor() {} PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) { assert(!IsLittleEndian && "Little-endian XCOFF not supported."); CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4; + ZeroDirective = "\t.space\t"; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index d467f5c4a43..fb9dd5d7aa7 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -19,8 +19,8 @@ using namespace llvm; const PPCMCExpr* PPCMCExpr::create(VariantKind Kind, const MCExpr *Expr, - bool isDarwin, MCContext &Ctx) { - return new (Ctx) PPCMCExpr(Kind, Expr, isDarwin); + bool IsDarwin, MCContext &Ctx) { + return new (Ctx) PPCMCExpr(Kind, Expr, IsDarwin); } void PPCMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index 449e2c34f74..ad145456616 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -45,21 +45,21 @@ public: /// @{ static const PPCMCExpr *create(VariantKind Kind, const MCExpr *Expr, - bool isDarwin, MCContext &Ctx); + bool IsDarwin, MCContext &Ctx); static const PPCMCExpr *createLo(const MCExpr *Expr, - bool isDarwin, MCContext &Ctx) { - return create(VK_PPC_LO, Expr, isDarwin, Ctx); + bool IsDarwin, MCContext &Ctx) { + return create(VK_PPC_LO, Expr, IsDarwin, Ctx); } static const PPCMCExpr *createHi(const MCExpr *Expr, - bool isDarwin, MCContext &Ctx) { - return create(VK_PPC_HI, Expr, isDarwin, Ctx); + bool IsDarwin, MCContext &Ctx) { + return create(VK_PPC_HI, Expr, IsDarwin, Ctx); } static const PPCMCExpr *createHa(const MCExpr *Expr, - bool isDarwin, MCContext &Ctx) { - return create(VK_PPC_HA, Expr, isDarwin, Ctx); + bool IsDarwin, MCContext &Ctx) { + return create(VK_PPC_HA, Expr, IsDarwin, Ctx); } /// @} diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index 4cf7fd15fa7..672f910ab08 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -178,7 +178,7 @@ static uint32_t getFixupOffset(const MCAsmLayout &Layout, uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); // On Mach-O, ppc_fixup_half16 relocations must refer to the // start of the instruction, not the second halfword, as ELF does - if (unsigned(Fixup.getKind()) == PPC::fixup_ppc_half16) + if (Fixup.getTargetKind() == PPC::fixup_ppc_half16) FixupOffset &= ~uint32_t(3); return FixupOffset; } @@ -376,5 +376,5 @@ void PPCMachObjectWriter::RecordPPCRelocation( std::unique_ptr llvm::createPPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) { - return llvm::make_unique(Is64Bit, CPUType, CPUSubtype); + return std::make_unique(Is64Bit, CPUType, CPUSubtype); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp index 9c661286d45..7fdbb8990b5 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp @@ -25,5 +25,5 @@ PPCXCOFFObjectWriter::PPCXCOFFObjectWriter(bool Is64Bit) std::unique_ptr llvm::createPPCXCOFFObjectWriter(bool Is64Bit) { - return llvm::make_unique(Is64Bit); + return std::make_unique(Is64Bit); } diff --git a/lib/Target/PowerPC/P9InstrResources.td b/lib/Target/PowerPC/P9InstrResources.td index 2a10322d3f4..f6cd8ed00c8 100644 --- a/lib/Target/PowerPC/P9InstrResources.td +++ b/lib/Target/PowerPC/P9InstrResources.td @@ -64,6 +64,7 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], XXLAND, XXLANDC, XXLEQV, + XXLEQVOnes, XXLNAND, XXLNOR, XXLOR, @@ -124,8 +125,8 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], (instregex "SRAD(I)?$"), (instregex "EXTSWSLI_32_64$"), (instregex "MFV(S)?RD$"), - (instregex "MTVSRD$"), - (instregex "MTVSRW(A|Z)$"), + (instregex "MTV(S)?RD$"), + (instregex "MTV(S)?RW(A|Z)$"), (instregex "CMP(WI|LWI|W|LW)(8)?$"), (instregex "CMP(L)?D(I)?$"), (instregex "SUBF(I)?C(8)?$"), @@ -148,7 +149,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"), (instregex "ADD(4|8)(TLS)?(_)?$"), (instregex "NEG(8)?$"), - (instregex "ADDI(S)?toc(HA|L)$"), + (instregex "ADDI(S)?toc(HA|L)(8)?$"), COPY, MCRF, MCRXRX, @@ -158,6 +159,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], XSNEGDP, XSCPSGNDP, MFVSRWZ, + MFVRWZ, EXTSWSLI, SRADI_32, RLDIC, diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index c6951ab67b0..0534773c4c9 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -50,10 +50,10 @@ namespace llvm { FunctionPass *createPPCExpandISELPass(); FunctionPass *createPPCPreEmitPeepholePass(); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - AsmPrinter &AP, bool isDarwin); + AsmPrinter &AP, bool IsDarwin); bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO, MCOperand &OutMO, AsmPrinter &AP, - bool isDarwin); + bool IsDarwin); void initializePPCCTRLoopsPass(PassRegistry&); #ifndef NDEBUG @@ -86,8 +86,8 @@ namespace llvm { MO_NO_FLAG, /// On a symbol operand "FOO", this indicates that the reference is actually - /// to "FOO@plt". This is used for calls and jumps to external functions on - /// for PIC calls on Linux and ELF systems. + /// to "FOO@plt". This is used for calls and jumps to external functions + /// and for PIC calls on 32-bit ELF systems. MO_PLT = 1, /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index bd87ce06b4f..66236b72a1a 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -51,9 +51,11 @@ #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" @@ -76,7 +78,7 @@ namespace { class PPCAsmPrinter : public AsmPrinter { protected: - MapVector TOC; + MapVector TOC; const PPCSubtarget *Subtarget; StackMaps SM; @@ -87,7 +89,7 @@ public: StringRef getPassName() const override { return "PowerPC Assembly Printer"; } - MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym); + MCSymbol *lookUpOrCreateTOCEntry(const MCSymbol *Sym); bool doInitialization(Module &M) override { if (!TOC.empty()) @@ -164,6 +166,14 @@ public: : PPCAsmPrinter(TM, std::move(Streamer)) {} StringRef getPassName() const override { return "AIX PPC Assembly Printer"; } + + void SetupMachineFunction(MachineFunction &MF) override; + + void EmitGlobalVariable(const GlobalVariable *GV) override; + + void EmitFunctionDescriptor() override; + + void EmitEndOfAsmFile(Module &) override; }; } // end anonymous namespace @@ -265,7 +275,7 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, return true; // This operand uses VSX numbering. // If the operand is a VMX register, convert it to a VSX register. - unsigned Reg = MI->getOperand(OpNo).getReg(); + Register Reg = MI->getOperand(OpNo).getReg(); if (PPCInstrInfo::isVRRegister(Reg)) Reg = PPC::VSX32 + (Reg - PPC::V0); else if (PPCInstrInfo::isVFRegister(Reg)) @@ -328,7 +338,7 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, /// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry /// exists for it. If not, create one. Then return a symbol that references /// the TOC entry. -MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { +MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(const MCSymbol *Sym) { MCSymbol *&TOCEntry = TOC[Sym]; if (!TOCEntry) TOCEntry = createTempSymbol("C"); @@ -378,7 +388,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) { if (CallTarget) { assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget && "High 16 bits of call target should be zero."); - unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); + Register ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); EncodedBytes = 0; // Materialize the jump address: EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI8) @@ -502,13 +512,32 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, .addExpr(SymVar)); } +/// Map a machine operand for a TOC pseudo-machine instruction to its +/// corresponding MCSymbol. +static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO, + AsmPrinter &AP) { + switch (MO.getType()) { + case MachineOperand::MO_GlobalAddress: + return AP.getSymbol(MO.getGlobal()); + case MachineOperand::MO_ConstantPoolIndex: + return AP.GetCPISymbol(MO.getIndex()); + case MachineOperand::MO_JumpTableIndex: + return AP.GetJTISymbol(MO.getIndex()); + case MachineOperand::MO_BlockAddress: + return AP.GetBlockAddressSymbol(MO.getBlockAddress()); + default: + llvm_unreachable("Unexpected operand type to get symbol."); + } +} + /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to /// the current output stream. /// void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; - bool isPPC64 = Subtarget->isPPC64(); - bool isDarwin = TM.getTargetTriple().isOSDarwin(); + const bool IsDarwin = TM.getTargetTriple().isOSDarwin(); + const bool IsPPC64 = Subtarget->isPPC64(); + const bool IsAIX = Subtarget->isAIXABI(); const Module *M = MF->getFunction().getParent(); PICLevel::Level PL = M->getPICLevel(); @@ -517,7 +546,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (!MI->isInlineAsm()) { for (const MachineOperand &MO: MI->operands()) { if (MO.isReg()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Subtarget->hasSPE()) { if (PPC::F4RCRegClass.contains(Reg) || PPC::F8RCRegClass.contains(Reg) || @@ -595,7 +624,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // addis r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@ha // addi r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@l // Get the offset from the GOT Base Register to the GOT - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); if (Subtarget->isSecurePlt() && isPositionIndependent() ) { unsigned PICR = TmpInst.getOperand(0).getReg(); MCSymbol *BaseSymbol = OutContext.getOrCreateSymbol( @@ -646,43 +675,57 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } } case PPC::LWZtoc: { - // Transform %r3 = LWZtoc @min1, %r2 - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + assert(!IsDarwin && "TOC is an ELF/XCOFF construct."); - // Change the opcode to LWZ, and the global address operand to be a - // reference to the GOT entry we will synthesize later. + // Transform %rN = LWZtoc @op1, %r2 + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); + + // Change the opcode to LWZ. TmpInst.setOpcode(PPC::LWZ); + const MachineOperand &MO = MI->getOperand(1); + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && + "Invalid operand for LWZtoc."); - // Map symbol -> label of TOC entry - assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()); - MCSymbol *MOSymbol = nullptr; - if (MO.isGlobal()) - MOSymbol = getSymbol(MO.getGlobal()); - else if (MO.isCPI()) - MOSymbol = GetCPISymbol(MO.getIndex()); - else if (MO.isJTI()) - MOSymbol = GetJTISymbol(MO.getIndex()); - else if (MO.isBlockAddress()) - MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); + // Map the operand to its corresponding MCSymbol. + const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); - if (PL == PICLevel::SmallPIC) { + // Create a reference to the GOT entry for the symbol. The GOT entry will be + // synthesized later. + if (PL == PICLevel::SmallPIC && !IsAIX) { const MCExpr *Exp = MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_GOT, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); - } else { - MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); - - const MCExpr *Exp = - MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_None, - OutContext); - const MCExpr *PB = - MCSymbolRefExpr::create(OutContext.getOrCreateSymbol(Twine(".LTOC")), - OutContext); - Exp = MCBinaryExpr::createSub(Exp, PB, OutContext); - TmpInst.getOperand(1) = MCOperand::createExpr(Exp); + EmitToStreamer(*OutStreamer, TmpInst); + return; } + + // Otherwise, use the TOC. 'TOCEntry' is a label used to reference the + // storage allocated in the TOC which contains the address of + // 'MOSymbol'. Said TOC entry will be synthesized later. + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + const MCExpr *Exp = + MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_None, OutContext); + + // AIX uses the label directly as the lwz displacement operand for + // references into the toc section. The displacement value will be generated + // relative to the toc-base. + if (IsAIX) { + assert( + TM.getCodeModel() == CodeModel::Small && + "This pseudo should only be selected for 32-bit small code model."); + TmpInst.getOperand(1) = MCOperand::createExpr(Exp); + EmitToStreamer(*OutStreamer, TmpInst); + return; + } + + // Create an explicit subtract expression between the local symbol and + // '.LTOC' to manifest the toc-relative offset. + const MCExpr *PB = MCSymbolRefExpr::create( + OutContext.getOrCreateSymbol(Twine(".LTOC")), OutContext); + Exp = MCBinaryExpr::createSub(Exp, PB, OutContext); + TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; } @@ -690,72 +733,121 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::LDtocCPT: case PPC::LDtocBA: case PPC::LDtoc: { + assert(!IsDarwin && "TOC is an ELF/XCOFF construct"); + // Transform %x3 = LDtoc @min1, %x2 - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); - // Change the opcode to LD, and the global address operand to be a - // reference to the TOC entry we will synthesize later. + // Change the opcode to LD. TmpInst.setOpcode(PPC::LD); + const MachineOperand &MO = MI->getOperand(1); + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && + "Invalid operand!"); - // Map symbol -> label of TOC entry - assert(MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()); - MCSymbol *MOSymbol = nullptr; - if (MO.isGlobal()) - MOSymbol = getSymbol(MO.getGlobal()); - else if (MO.isCPI()) - MOSymbol = GetCPISymbol(MO.getIndex()); - else if (MO.isJTI()) - MOSymbol = GetJTISymbol(MO.getIndex()); - else if (MO.isBlockAddress()) - MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); - - MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + // Map the machine operand to its corresponding MCSymbol, then map the + // global address operand to be a reference to the TOC entry we will + // synthesize later. + MCSymbol *TOCEntry = + lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO, *this)); + const MCSymbolRefExpr::VariantKind VK = + IsAIX ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_PPC_TOC; const MCExpr *Exp = - MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC, - OutContext); + MCSymbolRefExpr::create(TOCEntry, VK, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; } - case PPC::ADDIStocHA: { - // Transform %xd = ADDIStocHA %x2, @sym - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + assert((IsAIX && !IsPPC64 && TM.getCodeModel() == CodeModel::Large) && + "This pseudo should only be selected for 32-bit large code model on" + " AIX."); + + // Transform %rd = ADDIStocHA %rA, @sym(%r2) + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); + + // Change the opcode to ADDIS. + TmpInst.setOpcode(PPC::ADDIS); - // Change the opcode to ADDIS8. If the global address is external, has - // common linkage, is a non-local function address, or is a jump table - // address, then generate a TOC entry and reference that. Otherwise - // reference the symbol directly. - TmpInst.setOpcode(PPC::ADDIS8); const MachineOperand &MO = MI->getOperand(2); - assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || - MO.isBlockAddress()) && - "Invalid operand for ADDIStocHA!"); - MCSymbol *MOSymbol = nullptr; - bool GlobalToc = false; + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && + "Invalid operand for ADDIStocHA."); - if (MO.isGlobal()) { - const GlobalValue *GV = MO.getGlobal(); - MOSymbol = getSymbol(GV); - unsigned char GVFlags = Subtarget->classifyGlobalReference(GV); - GlobalToc = (GVFlags & PPCII::MO_NLP_FLAG); - } else if (MO.isCPI()) { - MOSymbol = GetCPISymbol(MO.getIndex()); - } else if (MO.isJTI()) { - MOSymbol = GetJTISymbol(MO.getIndex()); - } else if (MO.isBlockAddress()) { - MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); - } + // Map the machine operand to its corresponding MCSymbol. + MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + // Always use TOC on AIX. Map the global address operand to be a reference + // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to + // reference the storage allocated in the TOC which contains the address of + // 'MOSymbol'. + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry, + MCSymbolRefExpr::VK_PPC_U, + OutContext); + TmpInst.getOperand(2) = MCOperand::createExpr(Exp); + EmitToStreamer(*OutStreamer, TmpInst); + return; + } + case PPC::LWZtocL: { + assert(IsAIX && !IsPPC64 && TM.getCodeModel() == CodeModel::Large && + "This pseudo should only be selected for 32-bit large code model on" + " AIX."); + + // Transform %rd = LWZtocL @sym, %rs. + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); + + // Change the opcode to lwz. + TmpInst.setOpcode(PPC::LWZ); + + const MachineOperand &MO = MI->getOperand(1); + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && + "Invalid operand for LWZtocL."); + + // Map the machine operand to its corresponding MCSymbol. + MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + + // Always use TOC on AIX. Map the global address operand to be a reference + // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to + // reference the storage allocated in the TOC which contains the address of + // 'MOSymbol'. + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry, + MCSymbolRefExpr::VK_PPC_L, + OutContext); + TmpInst.getOperand(1) = MCOperand::createExpr(Exp); + EmitToStreamer(*OutStreamer, TmpInst); + return; + } + case PPC::ADDIStocHA8: { + assert(!IsDarwin && "TOC is an ELF/XCOFF construct"); + + // Transform %xd = ADDIStocHA8 %x2, @sym + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); + + // Change the opcode to ADDIS8. If the global address is the address of + // an external symbol, is a jump table address, is a block address, or is a + // constant pool index with large code model enabled, then generate a TOC + // entry and reference that. Otherwise, reference the symbol directly. + TmpInst.setOpcode(PPC::ADDIS8); + + const MachineOperand &MO = MI->getOperand(2); + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && + "Invalid operand for ADDIStocHA8!"); + + const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + + const bool GlobalToc = + MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal()); if (GlobalToc || MO.isJTI() || MO.isBlockAddress() || - TM.getCodeModel() == CodeModel::Large) + (MO.isCPI() && TM.getCodeModel() == CodeModel::Large)) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); + const MCSymbolRefExpr::VariantKind VK = + IsAIX ? MCSymbolRefExpr::VK_PPC_U : MCSymbolRefExpr::VK_PPC_TOC_HA; + const MCExpr *Exp = - MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA, - OutContext); + MCSymbolRefExpr::create(MOSymbol, VK, OutContext); if (!MO.isJTI() && MO.getOffset()) Exp = MCBinaryExpr::createAdd(Exp, @@ -768,73 +860,59 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::LDtocL: { - // Transform %xd = LDtocL @sym, %xs - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + assert(!IsDarwin && "TOC is an ELF/XCOFF construct"); - // Change the opcode to LD. If the global address is external, has - // common linkage, or is a jump table address, then reference the - // associated TOC entry. Otherwise reference the symbol directly. + // Transform %xd = LDtocL @sym, %xs + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); + + // Change the opcode to LD. If the global address is the address of + // an external symbol, is a jump table address, is a block address, or is + // a constant pool index with large code model enabled, then generate a + // TOC entry and reference that. Otherwise, reference the symbol directly. TmpInst.setOpcode(PPC::LD); + const MachineOperand &MO = MI->getOperand(1); assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && "Invalid operand for LDtocL!"); - MCSymbol *MOSymbol = nullptr; - if (MO.isJTI()) - MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex())); - else if (MO.isBlockAddress()) { - MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); - MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); - } - else if (MO.isCPI()) { - MOSymbol = GetCPISymbol(MO.getIndex()); - if (TM.getCodeModel() == CodeModel::Large) - MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); - } - else if (MO.isGlobal()) { - const GlobalValue *GV = MO.getGlobal(); - MOSymbol = getSymbol(GV); - LLVM_DEBUG( - unsigned char GVFlags = Subtarget->classifyGlobalReference(GV); - assert((GVFlags & PPCII::MO_NLP_FLAG) && - "LDtocL used on symbol that could be accessed directly is " - "invalid. Must match ADDIStocHA.")); - MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); - } + LLVM_DEBUG(assert( + (!MO.isGlobal() || Subtarget->isGVIndirectSymbol(MO.getGlobal())) && + "LDtocL used on symbol that could be accessed directly is " + "invalid. Must match ADDIStocHA8.")); + const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + + if (!MO.isCPI() || TM.getCodeModel() == CodeModel::Large) + MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); + + const MCSymbolRefExpr::VariantKind VK = + IsAIX ? MCSymbolRefExpr::VK_PPC_L : MCSymbolRefExpr::VK_PPC_TOC_LO; const MCExpr *Exp = - MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, - OutContext); + MCSymbolRefExpr::create(MOSymbol, VK, OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; } case PPC::ADDItocL: { // Transform %xd = ADDItocL %xs, @sym - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); - // Change the opcode to ADDI8. If the global address is external, then - // generate a TOC entry and reference that. Otherwise reference the + // Change the opcode to ADDI8. If the global address is external, then + // generate a TOC entry and reference that. Otherwise, reference the // symbol directly. TmpInst.setOpcode(PPC::ADDI8); - const MachineOperand &MO = MI->getOperand(2); - assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL"); - MCSymbol *MOSymbol = nullptr; - if (MO.isGlobal()) { - const GlobalValue *GV = MO.getGlobal(); - LLVM_DEBUG(unsigned char GVFlags = Subtarget->classifyGlobalReference(GV); - assert(!(GVFlags & PPCII::MO_NLP_FLAG) && - "Interposable definitions must use indirect access.")); - MOSymbol = getSymbol(GV); - } else if (MO.isCPI()) { - MOSymbol = GetCPISymbol(MO.getIndex()); - } + const MachineOperand &MO = MI->getOperand(2); + assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL."); + + LLVM_DEBUG(assert( + !(MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal())) && + "Interposable definitions must use indirect access.")); const MCExpr *Exp = - MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, - OutContext); + MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO, *this), + MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext); TmpInst.getOperand(2) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; @@ -842,13 +920,13 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::ADDISgotTprelHA: { // Transform: %xd = ADDISgotTprelHA %x2, @sym // Into: %xd = ADDIS8 %x2, sym@got@tlsgd@ha - assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC"); + assert(IsPPC64 && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTprel = - MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA, - OutContext); + MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA, + OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS8) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) @@ -858,16 +936,17 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::LDgotTprelL: case PPC::LDgotTprelL32: { // Transform %xd = LDgotTprelL @sym, %xs - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); // Change the opcode to LD. - TmpInst.setOpcode(isPPC64 ? PPC::LD : PPC::LWZ); + TmpInst.setOpcode(IsPPC64 ? PPC::LD : PPC::LWZ); const MachineOperand &MO = MI->getOperand(1); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); - const MCExpr *Exp = - MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO, - OutContext); + const MCExpr *Exp = MCSymbolRefExpr::create( + MOSymbol, IsPPC64 ? MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO + : MCSymbolRefExpr::VK_PPC_GOT_TPREL, + OutContext); TmpInst.getOperand(1) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); return; @@ -920,7 +999,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::ADDIStlsgdHA: { // Transform: %xd = ADDIStlsgdHA %x2, @sym // Into: %xd = ADDIS8 %x2, sym@got@tlsgd@ha - assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC"); + assert(IsPPC64 && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -943,11 +1022,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsGD = MCSymbolRefExpr::create( - MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO - : MCSymbolRefExpr::VK_PPC_GOT_TLSGD, + MOSymbol, IsPPC64 ? MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO + : MCSymbolRefExpr::VK_PPC_GOT_TLSGD, OutContext); EmitToStreamer(*OutStreamer, - MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI) + MCInstBuilder(IsPPC64 ? PPC::ADDI8 : PPC::ADDI) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymGotTlsGD)); @@ -965,7 +1044,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::ADDIStlsldHA: { // Transform: %xd = ADDIStlsldHA %x2, @sym // Into: %xd = ADDIS8 %x2, sym@got@tlsld@ha - assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC"); + assert(IsPPC64 && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -988,11 +1067,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymGotTlsLD = MCSymbolRefExpr::create( - MOSymbol, Subtarget->isPPC64() ? MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO - : MCSymbolRefExpr::VK_PPC_GOT_TLSLD, + MOSymbol, IsPPC64 ? MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO + : MCSymbolRefExpr::VK_PPC_GOT_TLSLD, OutContext); EmitToStreamer(*OutStreamer, - MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI) + MCInstBuilder(IsPPC64 ? PPC::ADDI8 : PPC::ADDI) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymGotTlsLD)); @@ -1021,7 +1100,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutContext); EmitToStreamer( *OutStreamer, - MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDIS8 : PPC::ADDIS) + MCInstBuilder(IsPPC64 ? PPC::ADDIS8 : PPC::ADDIS) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymDtprel)); @@ -1040,7 +1119,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO, OutContext); EmitToStreamer(*OutStreamer, - MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDI8 : PPC::ADDI) + MCInstBuilder(IsPPC64 ? PPC::ADDI8 : PPC::ADDI) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymDtprel)); @@ -1087,7 +1166,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // suite shows a handful of test cases that fail this check for // Darwin. Those need to be investigated before this sanity test // can be enabled for those subtargets. - if (!Subtarget->isDarwin()) { + if (!IsDarwin) { unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1; const MachineOperand &MO = MI->getOperand(OpNum); if (MO.isGlobal() && MO.getGlobal()->getAlignment() < 4) @@ -1098,7 +1177,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } } - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, IsDarwin); EmitToStreamer(*OutStreamer, TmpInst); } @@ -1368,15 +1447,16 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { ".got2", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); OutStreamer->SwitchSection(Section); - for (MapVector::iterator I = TOC.begin(), - E = TOC.end(); I != E; ++I) { - OutStreamer->EmitLabel(I->second); - MCSymbol *S = I->first; + for (const auto &TOCMapPair : TOC) { + const MCSymbol *const TOCEntryTarget = TOCMapPair.first; + MCSymbol *const TOCEntryLabel = TOCMapPair.second; + + OutStreamer->EmitLabel(TOCEntryLabel); if (isPPC64) { - TS.emitTCEntry(*S); + TS.emitTCEntry(*TOCEntryTarget); } else { OutStreamer->EmitValueToAlignment(4); - OutStreamer->EmitSymbolValue(S, 4); + OutStreamer->EmitSymbolValue(TOCEntryTarget, 4); } } } @@ -1602,7 +1682,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { if (!Stubs.empty()) { // Switch with ".non_lazy_symbol_pointer" directive. OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); - EmitAlignment(isPPC64 ? 3 : 2); + EmitAlignment(isPPC64 ? Align(8) : Align(4)); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { // L_foo$stub: @@ -1643,6 +1723,106 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { return AsmPrinter::doFinalization(M); } +void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) { + // Get the function descriptor symbol. + CurrentFnDescSym = getSymbol(&MF.getFunction()); + // Set the containing csect. + MCSectionXCOFF *FnDescSec = OutStreamer->getContext().getXCOFFSection( + CurrentFnDescSym->getName(), XCOFF::XMC_DS, XCOFF::XTY_SD, + XCOFF::C_HIDEXT, SectionKind::getData()); + cast(CurrentFnDescSym)->setContainingCsect(FnDescSec); + + return AsmPrinter::SetupMachineFunction(MF); +} + +void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + // Early error checking limiting what is supported. + if (GV->isThreadLocal()) + report_fatal_error("Thread local not yet supported on AIX."); + + if (GV->hasSection()) + report_fatal_error("Custom section for Data not yet supported."); + + if (GV->hasComdat()) + report_fatal_error("COMDAT not yet supported by AIX."); + + SectionKind GVKind = getObjFileLowering().getKindForGlobal(GV, TM); + if (!GVKind.isCommon() && !GVKind.isBSSLocal() && !GVKind.isData()) + report_fatal_error("Encountered a global variable kind that is " + "not supported yet."); + + // Create the containing csect and switch to it. + MCSectionXCOFF *CSect = cast( + getObjFileLowering().SectionForGlobal(GV, GVKind, TM)); + OutStreamer->SwitchSection(CSect); + + // Create the symbol, set its storage class, and emit it. + MCSymbolXCOFF *GVSym = cast(getSymbol(GV)); + GVSym->setStorageClass( + TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV)); + GVSym->setContainingCsect(CSect); + + const DataLayout &DL = GV->getParent()->getDataLayout(); + + // Handle common symbols. + if (GVKind.isCommon() || GVKind.isBSSLocal()) { + unsigned Align = + GV->getAlignment() ? GV->getAlignment() : DL.getPreferredAlignment(GV); + uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType()); + + if (GVKind.isBSSLocal()) + OutStreamer->EmitXCOFFLocalCommonSymbol(GVSym, Size, Align); + else + OutStreamer->EmitCommonSymbol(GVSym, Size, Align); + return; + } + + MCSymbol *EmittedInitSym = GVSym; + EmitLinkage(GV, EmittedInitSym); + EmitAlignment(getGVAlignment(GV, DL), GV); + OutStreamer->EmitLabel(EmittedInitSym); + EmitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer()); +} + +void PPCAIXAsmPrinter::EmitFunctionDescriptor() { + const DataLayout &DL = getDataLayout(); + const unsigned PointerSize = DL.getPointerSizeInBits() == 64 ? 8 : 4; + + MCSectionSubPair Current = OutStreamer->getCurrentSection(); + // Emit function descriptor. + OutStreamer->SwitchSection( + cast(CurrentFnDescSym)->getContainingCsect()); + OutStreamer->EmitLabel(CurrentFnDescSym); + // Emit function entry point address. + OutStreamer->EmitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext), + PointerSize); + // Emit TOC base address. + MCSymbol *TOCBaseSym = OutContext.getOrCreateSymbol(StringRef("TOC[TC0]")); + OutStreamer->EmitValue(MCSymbolRefExpr::create(TOCBaseSym, OutContext), + PointerSize); + // Emit a null environment pointer. + OutStreamer->EmitIntValue(0, PointerSize); + + OutStreamer->SwitchSection(Current.first, Current.second); +} + +void PPCAIXAsmPrinter::EmitEndOfAsmFile(Module &M) { + // If there are no functions in this module, we will never need to reference + // the TOC base. + if (M.empty()) + return; + + // Emit TOC base. + MCSymbol *TOCBaseSym = OutContext.getOrCreateSymbol(StringRef("TOC[TC0]")); + MCSectionXCOFF *TOCBaseSection = OutStreamer->getContext().getXCOFFSection( + StringRef("TOC"), XCOFF::XMC_TC0, XCOFF::XTY_SD, XCOFF::C_HIDEXT, + SectionKind::getData()); + cast(TOCBaseSym)->setContainingCsect(TOCBaseSection); + // Switch to section to emit TOC base. + OutStreamer->SwitchSection(TOCBaseSection); +} + + /// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code /// for a MachineFunction to the given output stream, in a format that the /// Darwin assembler can deal with. diff --git a/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/lib/Target/PowerPC/PPCBranchCoalescing.cpp index 5e9a661f8f0..d325b078979 100644 --- a/lib/Target/PowerPC/PPCBranchCoalescing.cpp +++ b/lib/Target/PowerPC/PPCBranchCoalescing.cpp @@ -340,9 +340,10 @@ bool PPCBranchCoalescing::identicalOperands( if (Op1.isIdenticalTo(Op2)) { // filter out instructions with physical-register uses - if (Op1.isReg() && TargetRegisterInfo::isPhysicalRegister(Op1.getReg()) - // If the physical register is constant then we can assume the value - // has not changed between uses. + if (Op1.isReg() && + Register::isPhysicalRegister(Op1.getReg()) + // If the physical register is constant then we can assume the value + // has not changed between uses. && !(Op1.isUse() && MRI->isConstantPhysReg(Op1.getReg()))) { LLVM_DEBUG(dbgs() << "The operands are not provably identical.\n"); return false; @@ -355,8 +356,8 @@ bool PPCBranchCoalescing::identicalOperands( // definition of the register produces the same value. If they produce the // same value, consider them to be identical. if (Op1.isReg() && Op2.isReg() && - TargetRegisterInfo::isVirtualRegister(Op1.getReg()) && - TargetRegisterInfo::isVirtualRegister(Op2.getReg())) { + Register::isVirtualRegister(Op1.getReg()) && + Register::isVirtualRegister(Op2.getReg())) { MachineInstr *Op1Def = MRI->getVRegDef(Op1.getReg()); MachineInstr *Op2Def = MRI->getVRegDef(Op2.getReg()); if (TII->produceSameValue(*Op1Def, *Op2Def, MRI)) { @@ -456,7 +457,7 @@ bool PPCBranchCoalescing::canMoveToEnd(const MachineInstr &MI, << TargetMBB.getNumber() << "\n"); for (auto &Use : MI.uses()) { - if (Use.isReg() && TargetRegisterInfo::isVirtualRegister(Use.getReg())) { + if (Use.isReg() && Register::isVirtualRegister(Use.getReg())) { MachineInstr *DefInst = MRI->getVRegDef(Use.getReg()); if (DefInst->isPHI() && DefInst->getParent() == MI.getParent()) { LLVM_DEBUG(dbgs() << " *** Cannot move this instruction ***\n"); diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index 793d690baec..cdff4d383d2 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -81,21 +81,20 @@ FunctionPass *llvm::createPPCBranchSelectionPass() { /// original Offset. unsigned PPCBSel::GetAlignmentAdjustment(MachineBasicBlock &MBB, unsigned Offset) { - unsigned Align = MBB.getAlignment(); - if (!Align) + const Align Alignment = MBB.getAlignment(); + if (Alignment == Align::None()) return 0; - unsigned AlignAmt = 1 << Align; - unsigned ParentAlign = MBB.getParent()->getAlignment(); + const Align ParentAlign = MBB.getParent()->getAlignment(); - if (Align <= ParentAlign) - return OffsetToAlignment(Offset, AlignAmt); + if (Alignment <= ParentAlign) + return offsetToAlignment(Offset, Alignment); // The alignment of this MBB is larger than the function's alignment, so we // can't tell whether or not it will insert nops. Assume that it will. if (FirstImpreciseBlock < 0) FirstImpreciseBlock = MBB.getNumber(); - return AlignAmt + OffsetToAlignment(Offset, AlignAmt); + return Alignment.value() + offsetToAlignment(Offset, Alignment); } /// We need to be careful about the offset of the first block in the function @@ -179,7 +178,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn, const MachineBasicBlock *Dest, unsigned BrOffset) { int BranchSize; - unsigned MaxAlign = 2; + Align MaxAlign = Align(4); bool NeedExtraAdjustment = false; if (Dest->getNumber() <= Src->getNumber()) { // If this is a backwards branch, the delta is the offset from the @@ -192,8 +191,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn, BranchSize += BlockSizes[DestBlock].first; for (unsigned i = DestBlock+1, e = Src->getNumber(); i < e; ++i) { BranchSize += BlockSizes[i].first; - MaxAlign = std::max(MaxAlign, - Fn.getBlockNumbered(i)->getAlignment()); + MaxAlign = std::max(MaxAlign, Fn.getBlockNumbered(i)->getAlignment()); } NeedExtraAdjustment = (FirstImpreciseBlock >= 0) && @@ -207,8 +205,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn, MaxAlign = std::max(MaxAlign, Dest->getAlignment()); for (unsigned i = StartBlock+1, e = Dest->getNumber(); i != e; ++i) { BranchSize += BlockSizes[i].first; - MaxAlign = std::max(MaxAlign, - Fn.getBlockNumbered(i)->getAlignment()); + MaxAlign = std::max(MaxAlign, Fn.getBlockNumbered(i)->getAlignment()); } NeedExtraAdjustment = (FirstImpreciseBlock >= 0) && @@ -258,7 +255,7 @@ int PPCBSel::computeBranchSize(MachineFunction &Fn, // The computed offset is at most ((1 << alignment) - 4) bytes smaller // than actual offset. So we add this number to the offset for safety. if (NeedExtraAdjustment) - BranchSize += (1 << MaxAlign) - 4; + BranchSize += MaxAlign.value() - 4; return BranchSize; } @@ -339,16 +336,16 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { // 1. CR register // 2. Target MBB PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm(); - unsigned CRReg = I->getOperand(1).getReg(); + Register CRReg = I->getOperand(1).getReg(); // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition. BuildMI(MBB, I, dl, TII->get(PPC::BCC)) .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2); } else if (I->getOpcode() == PPC::BC) { - unsigned CRBit = I->getOperand(0).getReg(); + Register CRBit = I->getOperand(0).getReg(); BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2); } else if (I->getOpcode() == PPC::BCn) { - unsigned CRBit = I->getOperand(0).getReg(); + Register CRBit = I->getOperand(0).getReg(); BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2); } else if (I->getOpcode() == PPC::BDNZ) { BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2); diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 264d6b590f9..d8425d89da9 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -162,7 +162,7 @@ class PPCFastISel final : public FastISel { bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt, unsigned DestReg, const PPC::Predicate Pred); - bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, + bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr, const TargetRegisterClass *RC, bool IsZExt = true, unsigned FP64LoadOpc = PPC::LFD); bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr); @@ -451,7 +451,7 @@ void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset, // Emit a load instruction if possible, returning true if we succeeded, // otherwise false. See commentary below for how the register class of // the load is determined. -bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, +bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr, const TargetRegisterClass *RC, bool IsZExt, unsigned FP64LoadOpc) { unsigned Opc; @@ -469,7 +469,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, (ResultReg ? MRI.getRegClass(ResultReg) : (RC ? RC : (VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) : - (VT == MVT::f32 ? (HasSPE ? &PPC::SPE4RCRegClass : &PPC::F4RCRegClass) : + (VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) : (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass : &PPC::GPRC_and_GPRC_NOR0RegClass))))); @@ -612,7 +612,7 @@ bool PPCFastISel::SelectLoad(const Instruction *I) { const TargetRegisterClass *RC = AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; - unsigned ResultReg = 0; + Register ResultReg = 0; if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true, PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD)) return false; @@ -989,7 +989,7 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) { unsigned DestReg; auto RC = MRI.getRegClass(SrcReg); if (PPCSubTarget->hasSPE()) { - DestReg = createResultReg(&PPC::SPE4RCRegClass); + DestReg = createResultReg(&PPC::GPRCRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::EFSCFD), DestReg) .addReg(SrcReg); @@ -1051,7 +1051,7 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, } const TargetRegisterClass *RC = &PPC::F8RCRegClass; - unsigned ResultReg = 0; + Register ResultReg = 0; if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc)) return 0; @@ -1176,7 +1176,7 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, const TargetRegisterClass *RC = AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; - unsigned ResultReg = 0; + Register ResultReg = 0; if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned)) return 0; @@ -1229,9 +1229,9 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { if (PPCSubTarget->hasSPE()) { DestReg = createResultReg(&PPC::GPRCRegClass); if (IsSigned) - Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ; + Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ; else - Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ; + Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ; } else if (isVSFRCRegClass(RC)) { DestReg = createResultReg(&PPC::VSFRCRegClass); if (DstVT == MVT::i32) @@ -1717,7 +1717,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) { if (const ConstantInt *CI = dyn_cast(RV)) { CCValAssign &VA = ValLocs[0]; - unsigned RetReg = VA.getLocReg(); + Register RetReg = VA.getLocReg(); // We still need to worry about properly extending the sign. For example, // we could have only a single bit or a constant that needs zero // extension rather than sign extension. Make sure we pass the return @@ -2002,7 +2002,7 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { const bool HasSPE = PPCSubTarget->hasSPE(); const TargetRegisterClass *RC; if (HasSPE) - RC = ((VT == MVT::f32) ? &PPC::SPE4RCRegClass : &PPC::SPERCRegClass); + RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass); else RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass); @@ -2031,8 +2031,8 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) .addImm(0).addReg(TmpReg).addMemOperand(MMO); } else { - // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)). - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), + // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)). + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA8), TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx); // But for large code model, we must generate a LDtocL followed // by the LF[SD]. @@ -2085,16 +2085,15 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { // or externally available linkage, a non-local function address, or a // jump table address (not yet needed), or if we are generating code // for large code model, we generate: - // LDtocL(GV, ADDIStocHA(%x2, GV)) + // LDtocL(GV, ADDIStocHA8(%x2, GV)) // Otherwise we generate: - // ADDItocL(ADDIStocHA(%x2, GV), GV) - // Either way, start with the ADDIStocHA: + // ADDItocL(ADDIStocHA8(%x2, GV), GV) + // Either way, start with the ADDIStocHA8: unsigned HighPartReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA8), HighPartReg).addReg(PPC::X2).addGlobalAddress(GV); - unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV); - if (GVFlags & PPCII::MO_NLP_FLAG) { + if (PPCSubTarget->isGVIndirectSymbol(GV)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), DestReg).addGlobalAddress(GV).addReg(HighPartReg); } else { @@ -2353,7 +2352,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, if (!PPCComputeAddress(LI->getOperand(0), Addr)) return false; - unsigned ResultReg = MI->getOperand(0).getReg(); + Register ResultReg = MI->getOperand(0).getReg(); if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt, PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD)) @@ -2464,7 +2463,7 @@ namespace llvm { const TargetLibraryInfo *LibInfo) { // Only available on 64-bit ELF for now. const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget(); - if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) + if (Subtarget.is64BitELFABI()) return new PPCFastISel(FuncInfo, LibInfo); return nullptr; } diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index ebfb1ef7f49..06a4d183e78 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -47,13 +47,15 @@ static const MCPhysReg VRRegNo[] = { }; static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { - if (STI.isDarwinABI()) + if (STI.isDarwinABI() || STI.isAIXABI()) return STI.isPPC64() ? 16 : 8; // SVR4 ABI: return STI.isPPC64() ? 16 : 4; } static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { + if (STI.isAIXABI()) + return STI.isPPC64() ? 40 : 20; return STI.isELFv2ABI() ? 24 : 40; } @@ -88,6 +90,11 @@ static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U; } +static unsigned computeCRSaveOffset() { + // The condition register save offset needs to be updated for AIX PPC32. + return 8; +} + PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, STI.getPlatformStackAlignment(), 0), @@ -95,7 +102,8 @@ PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) TOCSaveOffset(computeTOCSaveOffset(Subtarget)), FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), LinkageSize(computeLinkageSize(Subtarget)), - BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {} + BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), + CRSaveOffset(computeCRSaveOffset()) {} // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( @@ -370,8 +378,8 @@ static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) { return; } - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); if ((UsedRegMask & 0xFFFF) == UsedRegMask) { if (DstReg != SrcReg) @@ -781,15 +789,18 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, bool isPPC64 = Subtarget.isPPC64(); // Get the ABI. bool isSVR4ABI = Subtarget.isSVR4ABI(); + bool isAIXABI = Subtarget.isAIXABI(); bool isELFv2ABI = Subtarget.isELFv2ABI(); - assert((Subtarget.isDarwinABI() || isSVR4ABI) && - "Currently only Darwin and SVR4 ABIs are supported for PowerPC."); + assert((Subtarget.isDarwinABI() || isSVR4ABI || isAIXABI) && + "Unsupported PPC ABI."); // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, // process it. if (!isSVR4ABI) for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { + if (isAIXABI) + report_fatal_error("UPDATE_VRSAVE is unexpected on AIX."); HandleVRSaveUpdate(*MBBI, TII); break; } @@ -819,7 +830,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, bool HasRedZone = isPPC64 || !isSVR4ABI; unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; - unsigned BPReg = RegInfo->getBaseRegister(MF); + Register BPReg = RegInfo->getBaseRegister(MF); unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2; @@ -908,6 +919,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, assert((isPPC64 || !MustSaveCR) && "Prologue CR saving supported only in 64-bit mode"); + if (MustSaveCR && isAIXABI) + report_fatal_error("Prologue CR saving is unimplemented on AIX."); + // Check if we can move the stack update instruction (stdu) down the prologue // past the callee saves. Hopefully this will avoid the situation where the // saves are waiting for the update on the store with update to complete. @@ -966,7 +980,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, MIB.addReg(MustSaveCRs[i], CrState); BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) .addReg(TempReg, getKillRegState(true)) - .addImm(8) + .addImm(getCRSaveOffset()) .addReg(SPReg); } @@ -1020,7 +1034,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, assert(HasRedZone && "A red zone is always available on PPC64"); BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) .addReg(TempReg, getKillRegState(true)) - .addImm(8) + .addImm(getCRSaveOffset()) .addReg(SPReg); } @@ -1324,7 +1338,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, // actually saved gets its own CFI record. unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(CRReg, true), 8)); + nullptr, MRI->getDwarfRegNum(CRReg, true), getCRSaveOffset())); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); continue; @@ -1387,7 +1401,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; - unsigned BPReg = RegInfo->getBaseRegister(MF); + Register BPReg = RegInfo->getBaseRegister(MF); unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; unsigned ScratchReg = 0; unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg @@ -1590,7 +1604,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // is live here. assert(HasRedZone && "Expecting red zone"); BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) - .addImm(8) + .addImm(getCRSaveOffset()) .addReg(SPReg); for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) @@ -1614,7 +1628,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, assert(isPPC64 && "Expecting 64-bit mode"); assert(RBReg == SPReg && "Should be using SP as a base register"); BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) - .addImm(8) + .addImm(getCRSaveOffset()) .addReg(RBReg); } @@ -1762,8 +1776,8 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, // Save R31 if necessary int FPSI = FI->getFramePointerSaveIndex(); - bool isPPC64 = Subtarget.isPPC64(); - bool isDarwinABI = Subtarget.isDarwinABI(); + const bool isPPC64 = Subtarget.isPPC64(); + const bool IsDarwinABI = Subtarget.isDarwinABI(); MachineFrameInfo &MFI = MF.getFrameInfo(); // If the frame pointer save index hasn't been defined yet. @@ -1812,7 +1826,7 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the // function uses CR 2, 3, or 4. - if (!isPPC64 && !isDarwinABI && + if (!isPPC64 && !IsDarwinABI && (SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || SavedRegs.test(PPC::CR4))) { @@ -1872,8 +1886,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, assert((!MF.getInfo()->mustSaveTOC() || (Reg != PPC::X2 && Reg != PPC::R2)) && "Not expecting to try to spill R2 in a function that must save TOC"); - if (PPC::GPRCRegClass.contains(Reg) || - PPC::SPE4RCRegClass.contains(Reg)) { + if (PPC::GPRCRegClass.contains(Reg)) { HasGPSaveArea = true; GPRegs.push_back(CSI[i]); @@ -1967,7 +1980,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, assert(FI && "No Base Pointer Save Slot!"); MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); - unsigned BP = RegInfo->getBaseRegister(MF); + Register BP = RegInfo->getBaseRegister(MF); if (PPC::G8RCRegClass.contains(BP)) { MinG8R = std::min(MinG8R, BP); HasG8SaveArea = true; @@ -2428,6 +2441,26 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } +unsigned PPCFrameLowering::getTOCSaveOffset() const { + if (Subtarget.isAIXABI()) + // TOC save/restore is normally handled by the linker. + // Indirect calls should hit this limitation. + report_fatal_error("TOC save is not implemented on AIX yet."); + return TOCSaveOffset; +} + +unsigned PPCFrameLowering::getFramePointerSaveOffset() const { + if (Subtarget.isAIXABI()) + report_fatal_error("FramePointer is not implemented on AIX yet."); + return FramePointerSaveOffset; +} + +unsigned PPCFrameLowering::getBasePointerSaveOffset() const { + if (Subtarget.isAIXABI()) + report_fatal_error("BasePointer is not implemented on AIX yet."); + return BasePointerSaveOffset; +} + bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { if (MF.getInfo()->shrinkWrapDisabled()) return false; diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index d116e9fd22e..a5fbc9acbb2 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -26,6 +26,7 @@ class PPCFrameLowering: public TargetFrameLowering { const unsigned FramePointerSaveOffset; const unsigned LinkageSize; const unsigned BasePointerSaveOffset; + const unsigned CRSaveOffset; /** * Find register[s] that can be used in function prologue and epilogue @@ -142,15 +143,19 @@ public: /// getTOCSaveOffset - Return the previous frame offset to save the /// TOC register -- 64-bit SVR4 ABI only. - unsigned getTOCSaveOffset() const { return TOCSaveOffset; } + unsigned getTOCSaveOffset() const; /// getFramePointerSaveOffset - Return the previous frame offset to save the /// frame pointer. - unsigned getFramePointerSaveOffset() const { return FramePointerSaveOffset; } + unsigned getFramePointerSaveOffset() const; /// getBasePointerSaveOffset - Return the previous frame offset to save the /// base pointer. - unsigned getBasePointerSaveOffset() const { return BasePointerSaveOffset; } + unsigned getBasePointerSaveOffset() const; + + /// getCRSaveOffset - Return the previous frame offset to save the + /// CR register. + unsigned getCRSaveOffset() const { return CRSaveOffset; } /// getLinkageSize - Return the size of the PowerPC ABI linkage area. /// diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 543cac075f5..4ad6c88233f 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -371,7 +371,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { // by the scheduler. Detect them now. bool HasVectorVReg = false; for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + unsigned Reg = Register::index2VirtReg(i); if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) { HasVectorVReg = true; break; @@ -391,8 +391,8 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { // Create two vregs - one to hold the VRSAVE register that is live-in to the // function and one for the value after having bits or'd into it. - unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); - unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); + Register InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); + Register UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); MachineBasicBlock &EntryBB = *Fn.begin(); @@ -447,7 +447,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { } else { BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); - unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); + Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::UpdateGBR), GlobalBaseReg) .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg); @@ -5065,52 +5065,95 @@ void PPCDAGToDAGISel::Select(SDNode *N) { return; } case PPCISD::TOC_ENTRY: { - assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) && - "Only supported for 64-bit ABI and 32-bit SVR4"); - if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) { - SDValue GA = N->getOperand(0); - SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, - N->getOperand(1)); - transferMemOperands(N, MN); - ReplaceNode(N, MN); - return; - } + const bool isPPC64 = PPCSubTarget->isPPC64(); + const bool isELFABI = PPCSubTarget->isSVR4ABI(); + const bool isAIXABI = PPCSubTarget->isAIXABI(); - // For medium and large code model, we generate two instructions as - // described below. Otherwise we allow SelectCodeCommon to handle this, + assert(!PPCSubTarget->isDarwin() && "TOC is an ELF/XCOFF construct"); + + // PowerPC only support small, medium and large code model. + const CodeModel::Model CModel = TM.getCodeModel(); + assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && + "PowerPC doesn't support tiny or kernel code models."); + + if (isAIXABI && CModel == CodeModel::Medium) + report_fatal_error("Medium code model is not supported on AIX."); + + // For 64-bit small code model, we allow SelectCodeCommon to handle this, // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. - CodeModel::Model CModel = TM.getCodeModel(); - if (CModel != CodeModel::Medium && CModel != CodeModel::Large) + if (isPPC64 && CModel == CodeModel::Small) break; - // The first source operand is a TargetGlobalAddress or a TargetJumpTable. - // If it must be toc-referenced according to PPCSubTarget, we generate: - // LDtocL(@sym, ADDIStocHA(%x2, @sym)) + // Handle 32-bit small code model. + if (!isPPC64) { + // Transforms the ISD::TOC_ENTRY node to a PPCISD::LWZtoc. + auto replaceWithLWZtoc = [this, &dl](SDNode *TocEntry) { + SDValue GA = TocEntry->getOperand(0); + SDValue TocBase = TocEntry->getOperand(1); + SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, + TocBase); + transferMemOperands(TocEntry, MN); + ReplaceNode(TocEntry, MN); + }; + + if (isELFABI) { + assert(TM.isPositionIndependent() && + "32-bit ELF can only have TOC entries in position independent" + " code."); + // 32-bit ELF always uses a small code model toc access. + replaceWithLWZtoc(N); + return; + } + + if (isAIXABI && CModel == CodeModel::Small) { + replaceWithLWZtoc(N); + return; + } + } + + assert(CModel != CodeModel::Small && "All small code models handled."); + + assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit" + " ELF/AIX or 32-bit AIX in the following."); + + // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode + // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We + // generate two instructions as described below. The first source operand + // is a symbol reference. If it must be toc-referenced according to + // PPCSubTarget, we generate: + // [32-bit AIX] + // LWZtocL(@sym, ADDIStocHA(%r2, @sym)) + // [64-bit ELF/AIX] + // LDtocL(@sym, ADDIStocHA8(%x2, @sym)) // Otherwise we generate: - // ADDItocL(ADDIStocHA(%x2, @sym), @sym) + // ADDItocL(ADDIStocHA8(%x2, @sym), @sym) SDValue GA = N->getOperand(0); SDValue TOCbase = N->getOperand(1); - SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, - TOCbase, GA); + + EVT VT = isPPC64 ? MVT::i64 : MVT::i32; + SDNode *Tmp = CurDAG->getMachineNode( + isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA); + if (PPCLowering->isAccessedAsGotIndirect(GA)) { - // If it is access as got-indirect, we need an extra LD to load + // If it is accessed as got-indirect, we need an extra LWZ/LD to load // the address. - SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, - SDValue(Tmp, 0)); + SDNode *MN = CurDAG->getMachineNode( + isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0)); + transferMemOperands(N, MN); ReplaceNode(N, MN); return; } - // Build the address relative to the TOC-pointer.. + // Build the address relative to the TOC-pointer. ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, SDValue(Tmp, 0), GA)); return; } case PPCISD::PPC32_PICGOT: // Generate a PIC-safe GOT reference. - assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() && - "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); + assert(PPCSubTarget->is32BitELFABI() && + "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::i32); @@ -6456,7 +6499,7 @@ void PPCDAGToDAGISel::PeepholePPC64() { continue; if (!HBase.isMachineOpcode() || - HBase.getMachineOpcode() != PPC::ADDIStocHA) + HBase.getMachineOpcode() != PPC::ADDIStocHA8) continue; if (!Base.hasOneUse() || !HBase.hasOneUse()) diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 24d50074860..8cf6a660b08 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -139,13 +139,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all // arguments are at least 4/8 bytes aligned. bool isPPC64 = Subtarget.isPPC64(); - setMinStackArgumentAlignment(isPPC64 ? 8:4); + setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4)); // Set up the register classes. addRegisterClass(MVT::i32, &PPC::GPRCRegClass); if (!useSoftFloat()) { if (hasSPE()) { - addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass); + addRegisterClass(MVT::f32, &PPC::GPRCRegClass); addRegisterClass(MVT::f64, &PPC::SPERCRegClass); } else { addRegisterClass(MVT::f32, &PPC::F4RCRegClass); @@ -431,28 +431,26 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - if (Subtarget.isSVR4ABI()) { - if (isPPC64) { - // VAARG always uses double-word chunks, so promote anything smaller. - setOperationAction(ISD::VAARG, MVT::i1, Promote); - AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); - setOperationAction(ISD::VAARG, MVT::i8, Promote); - AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); - setOperationAction(ISD::VAARG, MVT::i16, Promote); - AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); - setOperationAction(ISD::VAARG, MVT::i32, Promote); - AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); - setOperationAction(ISD::VAARG, MVT::Other, Expand); - } else { - // VAARG is custom lowered with the 32-bit SVR4 ABI. - setOperationAction(ISD::VAARG, MVT::Other, Custom); - setOperationAction(ISD::VAARG, MVT::i64, Custom); - } + if (Subtarget.is64BitELFABI()) { + // VAARG always uses double-word chunks, so promote anything smaller. + setOperationAction(ISD::VAARG, MVT::i1, Promote); + AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i8, Promote); + AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i16, Promote); + AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i32, Promote); + AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + } else if (Subtarget.is32BitELFABI()) { + // VAARG is custom lowered with the 32-bit SVR4 ABI. + setOperationAction(ISD::VAARG, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::i64, Custom); } else setOperationAction(ISD::VAARG, MVT::Other, Expand); - if (Subtarget.isSVR4ABI() && !isPPC64) - // VACOPY is custom lowered with the 32-bit SVR4 ABI. + // VACOPY is custom lowered with the 32-bit SVR4 ABI. + if (Subtarget.is32BitELFABI()) setOperationAction(ISD::VACOPY , MVT::Other, Custom); else setOperationAction(ISD::VACOPY , MVT::Other, Expand); @@ -553,17 +551,25 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, if (Subtarget.hasAltivec()) { // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. - for (MVT VT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD, VT, Legal); setOperationAction(ISD::SUB, VT, Legal); // For v2i64, these are only valid with P8Vector. This is corrected after // the loop. - setOperationAction(ISD::SMAX, VT, Legal); - setOperationAction(ISD::SMIN, VT, Legal); - setOperationAction(ISD::UMAX, VT, Legal); - setOperationAction(ISD::UMIN, VT, Legal); + if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) { + setOperationAction(ISD::SMAX, VT, Legal); + setOperationAction(ISD::SMIN, VT, Legal); + setOperationAction(ISD::UMAX, VT, Legal); + setOperationAction(ISD::UMIN, VT, Legal); + } + else { + setOperationAction(ISD::SMAX, VT, Expand); + setOperationAction(ISD::SMIN, VT, Expand); + setOperationAction(ISD::UMAX, VT, Expand); + setOperationAction(ISD::UMIN, VT, Expand); + } if (Subtarget.hasVSX()) { setOperationAction(ISD::FMAXNUM, VT, Legal); @@ -646,7 +652,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); - for (MVT InnerVT : MVT::vector_valuetypes()) { + for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); @@ -944,7 +950,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand); setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal); - setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); setOperationAction(ISD::FNEG , MVT::v4f64, Legal); @@ -1118,6 +1123,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::TRUNCATE); + setTargetDAGCombine(ISD::VECTOR_SHUFFLE); + if (Subtarget.useCRBits()) { setTargetDAGCombine(ISD::TRUNCATE); @@ -1172,9 +1179,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setJumpIsExpensive(); } - setMinFunctionAlignment(2); + setMinFunctionAlignment(Align(4)); if (Subtarget.isDarwin()) - setPrefFunctionAlignment(4); + setPrefFunctionAlignment(Align(16)); switch (Subtarget.getDarwinDirective()) { default: break; @@ -1191,8 +1198,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, case PPC::DIR_PWR7: case PPC::DIR_PWR8: case PPC::DIR_PWR9: - setPrefFunctionAlignment(4); - setPrefLoopAlignment(4); + setPrefLoopAlignment(Align(16)); + setPrefFunctionAlignment(Align(16)); break; } @@ -1352,6 +1359,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::SExtVElems: return "PPCISD::SExtVElems"; case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; + case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE"; + case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE"; case PPCISD::ST_VSR_SCAL_INT: return "PPCISD::ST_VSR_SCAL_INT"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; @@ -1396,7 +1405,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE"; case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH"; - case PPCISD::FP_EXTEND_LH: return "PPCISD::FP_EXTEND_LH"; + case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF"; + case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; } return nullptr; } @@ -1517,7 +1527,7 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { const PPCSubtarget& Subtarget = - static_cast(DAG.getSubtarget()); + static_cast(DAG.getSubtarget()); if (!Subtarget.hasP8Vector()) return false; @@ -1769,10 +1779,10 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to -/// VSPLTB/VSPLTH/VSPLTW. +/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.). bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { - assert(N->getValueType(0) == MVT::v16i8 && - (EltSize == 1 || EltSize == 2 || EltSize == 4)); + assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && + EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"); // The consecutive indices need to specify an element, not part of two // different elements. So abandon ship early if this isn't the case. @@ -2065,10 +2075,11 @@ bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM, } -/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the -/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. -unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, - SelectionDAG &DAG) { +/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is +/// appropriate for PPC mnemonics (which have a big endian bias - namely +/// elements are counted from the left of the vector register). +unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, + SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(N); assert(isSplatShuffleMask(SVOp, EltSize)); if (DAG.getDataLayout().isLittleEndian()) @@ -2667,12 +2678,14 @@ static void setUsesTOCBasePtr(SelectionDAG &DAG) { setUsesTOCBasePtr(DAG.getMachineFunction()); } -static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit, - SDValue GA) { +SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, + SDValue GA) const { + const bool Is64Bit = Subtarget.isPPC64(); EVT VT = Is64Bit ? MVT::i64 : MVT::i32; - SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : - DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); - + SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) + : Subtarget.isAIXABI() + ? DAG.getRegister(PPC::R2, VT) + : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); SDValue Ops[] = { GA, Reg }; return DAG.getMemIntrinsicNode( PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, @@ -2688,10 +2701,10 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. - if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { + if (Subtarget.is64BitELFABI()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); - return getTOCEntry(DAG, SDLoc(CP), true, GA); + return getTOCEntry(DAG, SDLoc(CP), GA); } unsigned MOHiFlag, MOLoFlag; @@ -2701,7 +2714,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), PPCII::MO_PIC_FLAG); - return getTOCEntry(DAG, SDLoc(CP), false, GA); + return getTOCEntry(DAG, SDLoc(CP), GA); } SDValue CPIHi = @@ -2764,10 +2777,10 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. - if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { + if (Subtarget.is64BitELFABI()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); - return getTOCEntry(DAG, SDLoc(JT), true, GA); + return getTOCEntry(DAG, SDLoc(JT), GA); } unsigned MOHiFlag, MOLoFlag; @@ -2777,7 +2790,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, PPCII::MO_PIC_FLAG); - return getTOCEntry(DAG, SDLoc(GA), false, GA); + return getTOCEntry(DAG, SDLoc(GA), GA); } SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); @@ -2793,14 +2806,18 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, // 64-bit SVR4 ABI code is always position-independent. // The actual BlockAddress is stored in the TOC. - if (Subtarget.isSVR4ABI() && - (Subtarget.isPPC64() || isPositionIndependent())) { - if (Subtarget.isPPC64()) - setUsesTOCBasePtr(DAG); + if (Subtarget.is64BitELFABI()) { + setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); - return getTOCEntry(DAG, SDLoc(BASDN), Subtarget.isPPC64(), GA); + return getTOCEntry(DAG, SDLoc(BASDN), GA); } + // 32-bit position-independent ELF stores the BlockAddress in the .got. + if (Subtarget.is32BitELFABI() && isPositionIndependent()) + return getTOCEntry( + DAG, SDLoc(BASDN), + DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset())); + unsigned MOHiFlag, MOLoFlag; bool IsPIC = isPositionIndependent(); getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); @@ -2913,12 +2930,12 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SDLoc DL(GSDN); const GlobalValue *GV = GSDN->getGlobal(); - // 64-bit SVR4 ABI code is always position-independent. + // 64-bit SVR4 ABI & AIX ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. - if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { + if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); - return getTOCEntry(DAG, DL, true, GA); + return getTOCEntry(DAG, DL, GA); } unsigned MOHiFlag, MOLoFlag; @@ -2929,7 +2946,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), PPCII::MO_PIC_FLAG); - return getTOCEntry(DAG, DL, false, GA); + return getTOCEntry(DAG, DL, GA); } SDValue GAHi = @@ -3235,8 +3252,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(SV, nextOffset)); } -/// FPR - The set of FP registers that should be allocated for arguments, -/// on Darwin. +/// FPR - The set of FP registers that should be allocated for arguments +/// on Darwin and AIX. static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13}; @@ -3377,17 +3394,17 @@ SDValue PPCTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { - if (Subtarget.isSVR4ABI()) { - if (Subtarget.isPPC64()) - return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, - dl, DAG, InVals); - else - return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, - dl, DAG, InVals); - } else { - return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, - dl, DAG, InVals); - } + if (Subtarget.is64BitELFABI()) + return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, + InVals); + else if (Subtarget.is32BitELFABI()) + return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, + InVals); + + // FIXME: We are using this for both AIX and Darwin. We should add appropriate + // AIX testing, and rename it appropriately. + return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG, + InVals); } SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( @@ -3467,7 +3484,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( if (Subtarget.hasP8Vector()) RC = &PPC::VSSRCRegClass; else if (Subtarget.hasSPE()) - RC = &PPC::SPE4RCRegClass; + RC = &PPC::GPRCRegClass; else RC = &PPC::F4RCRegClass; break; @@ -4516,7 +4533,7 @@ callsShareTOCBase(const Function *Caller, SDValue Callee, static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl &Outs) { - assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64()); + assert(Subtarget.is64BitELFABI()); const unsigned PtrByteSize = 8; const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); @@ -4926,7 +4943,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, ImmutableCallSite CS, const PPCSubtarget &Subtarget) { bool isPPC64 = Subtarget.isPPC64(); bool isSVR4ABI = Subtarget.isSVR4ABI(); - bool isELFv2ABI = Subtarget.isELFv2ABI(); + bool is64BitELFv1ABI = isPPC64 && isSVR4ABI && !Subtarget.isELFv2ABI(); bool isAIXABI = Subtarget.isAIXABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); @@ -4997,7 +5014,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, // to do the call, we can't use PPCISD::CALL. SDValue MTCTROps[] = {Chain, Callee, InFlag}; - if (isSVR4ABI && isPPC64 && !isELFv2ABI) { + if (is64BitELFv1ABI) { // Function pointers in the 64-bit SVR4 ABI do not point to the function // entry point, but to the function descriptor (the function entry point // address is part of the function descriptor though). @@ -5085,7 +5102,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, CallOpc = PPCISD::BCTRL; Callee.setNode(nullptr); // Add use of X11 (holding environment pointer) - if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest) + if (is64BitELFv1ABI && !hasNest) Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); // Add CTR register as callee so a bctr can be emitted later. if (isTailCall) @@ -6730,8 +6747,12 @@ SDValue PPCTargetLowering::LowerCall_AIX( const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64) : array_lengthof(GPR_32); + const unsigned NumFPRs = array_lengthof(FPR); + assert(NumFPRs == 13 && "Only FPR 1-13 could be used for parameter passing " + "on AIX"); + const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; - unsigned GPR_idx = 0; + unsigned GPR_idx = 0, FPR_idx = 0; SmallVector, 8> RegsToPass; @@ -6768,6 +6789,20 @@ SDValue PPCTargetLowering::LowerCall_AIX( break; case MVT::f32: case MVT::f64: + if (FPR_idx != NumFPRs) { + RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); + + // If we have any FPRs remaining, we may also have GPRs remaining. + // Args passed in FPRs consume 1 or 2 (f64 in 32 bit mode) available + // GPRs. + if (GPR_idx != NumGPRs) + ++GPR_idx; + if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64) + ++GPR_idx; + } else + report_fatal_error("Handling of placing parameters on the stack is " + "unimplemented!"); + break; case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: @@ -8152,6 +8187,18 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { Op0.getOperand(1)); } +static const SDValue *getNormalLoadInput(const SDValue &Op) { + const SDValue *InputLoad = &Op; + if (InputLoad->getOpcode() == ISD::BITCAST) + InputLoad = &InputLoad->getOperand(0); + if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR) + InputLoad = &InputLoad->getOperand(0); + if (InputLoad->getOpcode() != ISD::LOAD) + return nullptr; + LoadSDNode *LD = cast(*InputLoad); + return ISD::isNormalLoad(LD) ? InputLoad : nullptr; +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. If we CAN select this case, and if it // selects to a single instruction, return Op. Otherwise, if we can codegen @@ -8274,6 +8321,34 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || SplatBitSize > 32) { + + const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0)); + // Handle load-and-splat patterns as we have instructions that will do this + // in one go. + if (InputLoad && DAG.isSplatValue(Op, true)) { + LoadSDNode *LD = cast(*InputLoad); + + // We have handling for 4 and 8 byte elements. + unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits(); + + // Checking for a single use of this load, we have to check for vector + // width (128 bits) / ElementSize uses (since each operand of the + // BUILD_VECTOR is a separate use of the value. + if (InputLoad->getNode()->hasNUsesOfValue(128 / ElementSize, 0) && + ((Subtarget.hasVSX() && ElementSize == 64) || + (Subtarget.hasP9Vector() && ElementSize == 32))) { + SDValue Ops[] = { + LD->getChain(), // Chain + LD->getBasePtr(), // Ptr + DAG.getValueType(Op.getValueType()) // VT + }; + return + DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, + DAG.getVTList(Op.getValueType(), MVT::Other), + Ops, LD->getMemoryVT(), LD->getMemOperand()); + } + } + // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be // lowered to VSX instructions under certain conditions. // Without VSX, there is no pattern more efficient than expanding the node. @@ -8759,6 +8834,45 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, unsigned ShiftElts, InsertAtByte; bool Swap = false; + + // If this is a load-and-splat, we can do that with a single instruction + // in some cases. However if the load has multiple uses, we don't want to + // combine it because that will just produce multiple loads. + const SDValue *InputLoad = getNormalLoadInput(V1); + if (InputLoad && Subtarget.hasVSX() && V2.isUndef() && + (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) && + InputLoad->hasOneUse()) { + bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4); + int SplatIdx = + PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG); + + LoadSDNode *LD = cast(*InputLoad); + // For 4-byte load-and-splat, we need Power9. + if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) { + uint64_t Offset = 0; + if (IsFourByte) + Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4; + else + Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8; + SDValue BasePtr = LD->getBasePtr(); + if (Offset != 0) + BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), + BasePtr, DAG.getIntPtrConstant(Offset, dl)); + SDValue Ops[] = { + LD->getChain(), // Chain + BasePtr, // BasePtr + DAG.getValueType(Op.getValueType()) // VT + }; + SDVTList VTL = + DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other); + SDValue LdSplt = + DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL, + Ops, LD->getMemoryVT(), LD->getMemOperand()); + if (LdSplt.getValueType() != SVOp->getValueType(0)) + LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt); + return LdSplt; + } + } if (Subtarget.hasP9Vector() && PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap, isLittleEndian)) { @@ -8835,7 +8949,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { - int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); + int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG); SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, @@ -9880,6 +9994,30 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { switch (Op0.getOpcode()) { default: return SDValue(); + case ISD::EXTRACT_SUBVECTOR: { + assert(Op0.getNumOperands() == 2 && + isa(Op0->getOperand(1)) && + "Node should have 2 operands with second one being a constant!"); + + if (Op0.getOperand(0).getValueType() != MVT::v4f32) + return SDValue(); + + // Custom lower is only done for high or low doubleword. + int Idx = cast(Op0.getOperand(1))->getZExtValue(); + if (Idx % 2 != 0) + return SDValue(); + + // Since input is v4f32, at this point Idx is either 0 or 2. + // Shift to get the doubleword position we want. + int DWord = Idx >> 1; + + // High and low word positions are different on little endian. + if (Subtarget.isLittleEndian()) + DWord ^= 0x1; + + return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, + Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32)); + } case ISD::FADD: case ISD::FMUL: case ISD::FSUB: { @@ -9891,26 +10029,25 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { return SDValue(); // Generate new load node. LoadSDNode *LD = cast(LdOp); - SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() }; - NewLoad[i] = - DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl, - DAG.getVTList(MVT::v4f32, MVT::Other), - LoadOps, LD->getMemoryVT(), - LD->getMemOperand()); + SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()}; + NewLoad[i] = DAG.getMemIntrinsicNode( + PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps, + LD->getMemoryVT(), LD->getMemOperand()); } - SDValue NewOp = DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, - NewLoad[0], NewLoad[1], - Op0.getNode()->getFlags()); - return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewOp); + SDValue NewOp = + DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0], + NewLoad[1], Op0.getNode()->getFlags()); + return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp, + DAG.getConstant(0, dl, MVT::i32)); } case ISD::LOAD: { LoadSDNode *LD = cast(Op0); - SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() }; - SDValue NewLd = - DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl, - DAG.getVTList(MVT::v4f32, MVT::Other), - LoadOps, LD->getMemoryVT(), LD->getMemOperand()); - return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewLd); + SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()}; + SDValue NewLd = DAG.getMemIntrinsicNode( + PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps, + LD->getMemoryVT(), LD->getMemOperand()); + return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd, + DAG.getConstant(0, dl, MVT::i32)); } } llvm_unreachable("ERROR:Should return for all cases within swtich."); @@ -10048,9 +10185,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, return; case ISD::TRUNCATE: { EVT TrgVT = N->getValueType(0); + EVT OpVT = N->getOperand(0).getValueType(); if (TrgVT.isVector() && isOperationCustom(N->getOpcode(), TrgVT) && - N->getOperand(0).getValueType().getSizeInBits() <= 128) + OpVT.getSizeInBits() <= 128 && + isPowerOf2_32(OpVT.getVectorElementType().getSizeInBits())) Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG)); return; } @@ -10192,7 +10331,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, if (CmpOpcode) { // Signed comparisons of byte or halfword values must be sign-extended. if (CmpOpcode == PPC::CMPW && AtomicSize < 4) { - unsigned ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); + Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH), ExtReg).addReg(dest); BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) @@ -10243,10 +10382,10 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( MachineFunction *F = BB->getParent(); MachineFunction::iterator It = ++BB->getIterator(); - unsigned dest = MI.getOperand(0).getReg(); - unsigned ptrA = MI.getOperand(1).getReg(); - unsigned ptrB = MI.getOperand(2).getReg(); - unsigned incr = MI.getOperand(3).getReg(); + Register dest = MI.getOperand(0).getReg(); + Register ptrA = MI.getOperand(1).getReg(); + Register ptrB = MI.getOperand(2).getReg(); + Register incr = MI.getOperand(3).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -10364,7 +10503,7 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( if (CmpOpcode) { // For unsigned comparisons, we can directly compare the shifted values. // For signed comparisons we shift and sign extend. - unsigned SReg = RegInfo.createVirtualRegister(GPRC); + Register SReg = RegInfo.createVirtualRegister(GPRC); BuildMI(BB, dl, TII->get(PPC::AND), SReg) .addReg(TmpDestReg) .addReg(MaskReg); @@ -10375,7 +10514,7 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg) .addReg(SReg) .addReg(ShiftReg); - unsigned ValueSReg = RegInfo.createVirtualRegister(GPRC); + Register ValueSReg = RegInfo.createVirtualRegister(GPRC); BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg) .addReg(ValueReg); ValueReg = ValueSReg; @@ -10426,11 +10565,11 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, const BasicBlock *BB = MBB->getBasicBlock(); MachineFunction::iterator I = ++MBB->getIterator(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); - unsigned mainDstReg = MRI.createVirtualRegister(RC); - unsigned restoreDstReg = MRI.createVirtualRegister(RC); + Register mainDstReg = MRI.createVirtualRegister(RC); + Register restoreDstReg = MRI.createVirtualRegister(RC); MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && @@ -10482,10 +10621,10 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, // Prepare IP either in reg. const TargetRegisterClass *PtrRC = getRegClassFor(PVT); - unsigned LabelReg = MRI.createVirtualRegister(PtrRC); - unsigned BufReg = MI.getOperand(1).getReg(); + Register LabelReg = MRI.createVirtualRegister(PtrRC); + Register BufReg = MI.getOperand(1).getReg(); - if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { + if (Subtarget.is64BitELFABI()) { setUsesTOCBasePtr(*MBB->getParent()); MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) .addReg(PPC::X2) @@ -10570,7 +10709,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, const TargetRegisterClass *RC = (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; - unsigned Tmp = MRI.createVirtualRegister(RC); + Register Tmp = MRI.createVirtualRegister(RC); // Since FP is only updated here but NOT referenced, it's treated as GPR. unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; @@ -10587,7 +10726,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, const int64_t TOCOffset = 3 * PVT.getStoreSize(); const int64_t BPOffset = 4 * PVT.getStoreSize(); - unsigned BufReg = MI.getOperand(0).getReg(); + Register BufReg = MI.getOperand(0).getReg(); // Reload FP (the jumped-to function may not have had a // frame pointer, and if so, then its r31 will be restored @@ -10662,7 +10801,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { if (MI.getOpcode() == TargetOpcode::STACKMAP || MI.getOpcode() == TargetOpcode::PATCHPOINT) { - if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() && + if (Subtarget.is64BitELFABI() && MI.getOpcode() == TargetOpcode::PATCHPOINT) { // Call lowering should have added an r2 operand to indicate a dependence // on the TOC base pointer value. It can't however, because there is no @@ -10828,15 +10967,15 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, BB = readMBB; MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); - unsigned LoReg = MI.getOperand(0).getReg(); - unsigned HiReg = MI.getOperand(1).getReg(); + Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); + Register LoReg = MI.getOperand(0).getReg(); + Register HiReg = MI.getOperand(1).getReg(); BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); - unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); + Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) .addReg(HiReg) @@ -10978,11 +11117,11 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, StoreMnemonic = PPC::STDCX; break; } - unsigned dest = MI.getOperand(0).getReg(); - unsigned ptrA = MI.getOperand(1).getReg(); - unsigned ptrB = MI.getOperand(2).getReg(); - unsigned oldval = MI.getOperand(3).getReg(); - unsigned newval = MI.getOperand(4).getReg(); + Register dest = MI.getOperand(0).getReg(); + Register ptrA = MI.getOperand(1).getReg(); + Register ptrB = MI.getOperand(2).getReg(); + Register oldval = MI.getOperand(3).getReg(); + Register newval = MI.getOperand(4).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -11057,11 +11196,11 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, bool isLittleEndian = Subtarget.isLittleEndian(); bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; - unsigned dest = MI.getOperand(0).getReg(); - unsigned ptrA = MI.getOperand(1).getReg(); - unsigned ptrB = MI.getOperand(2).getReg(); - unsigned oldval = MI.getOperand(3).getReg(); - unsigned newval = MI.getOperand(4).getReg(); + Register dest = MI.getOperand(0).getReg(); + Register ptrA = MI.getOperand(1).getReg(); + Register ptrB = MI.getOperand(2).getReg(); + Register oldval = MI.getOperand(3).getReg(); + Register newval = MI.getOperand(4).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -11238,13 +11377,13 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // This pseudo performs an FADD with rounding mode temporarily forced // to round-to-zero. We emit this via custom inserter since the FPSCR // is not modeled at the SelectionDAG level. - unsigned Dest = MI.getOperand(0).getReg(); - unsigned Src1 = MI.getOperand(1).getReg(); - unsigned Src2 = MI.getOperand(2).getReg(); + Register Dest = MI.getOperand(0).getReg(); + Register Src1 = MI.getOperand(1).getReg(); + Register Src2 = MI.getOperand(2).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); + Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); // Save FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); @@ -11270,7 +11409,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8); MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned Dest = RegInfo.createVirtualRegister( + Register Dest = RegInfo.createVirtualRegister( Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass); DebugLoc dl = MI.getDebugLoc(); @@ -11283,7 +11422,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, } else if (MI.getOpcode() == PPC::TCHECK_RET) { DebugLoc Dl = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); + Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg()) @@ -11297,7 +11436,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .addReg(PPC::CR0EQ); } else if (MI.getOpcode() == PPC::SETRNDi) { DebugLoc dl = MI.getDebugLoc(); - unsigned OldFPSCRReg = MI.getOperand(0).getReg(); + Register OldFPSCRReg = MI.getOperand(0).getReg(); // Save FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); @@ -11378,7 +11517,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, } }; - unsigned OldFPSCRReg = MI.getOperand(0).getReg(); + Register OldFPSCRReg = MI.getOperand(0).getReg(); // Save FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); @@ -11393,12 +11532,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // mtfsf 255, NewFPSCRReg MachineOperand SrcOp = MI.getOperand(1); MachineRegisterInfo &RegInfo = F->getRegInfo(); - unsigned OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg); - unsigned ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); - unsigned ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); // The first operand of INSERT_SUBREG should be a register which has // subregisters, we only care about its RegClass, so we should use an @@ -11409,14 +11548,14 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .add(SrcOp) .addImm(1); - unsigned NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg) .addReg(OldFPSCRTmpReg) .addReg(ExtSrcReg) .addImm(0) .addImm(62); - unsigned NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); + Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg); // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63 @@ -13113,6 +13252,61 @@ SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N, return Val; } +SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN, + LSBaseSDNode *LSBase, + DAGCombinerInfo &DCI) const { + assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) && + "Not a reverse memop pattern!"); + + auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool { + auto Mask = SVN->getMask(); + int i = 0; + auto I = Mask.rbegin(); + auto E = Mask.rend(); + + for (; I != E; ++I) { + if (*I != i) + return false; + i++; + } + return true; + }; + + SelectionDAG &DAG = DCI.DAG; + EVT VT = SVN->getValueType(0); + + if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX()) + return SDValue(); + + // Before P9, we have PPCVSXSwapRemoval pass to hack the element order. + // See comment in PPCVSXSwapRemoval.cpp. + // It is conflict with PPCVSXSwapRemoval opt. So we don't do it. + if (!Subtarget.hasP9Vector()) + return SDValue(); + + if(!IsElementReverse(SVN)) + return SDValue(); + + if (LSBase->getOpcode() == ISD::LOAD) { + SDLoc dl(SVN); + SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()}; + return DAG.getMemIntrinsicNode( + PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps, + LSBase->getMemoryVT(), LSBase->getMemOperand()); + } + + if (LSBase->getOpcode() == ISD::STORE) { + SDLoc dl(LSBase); + SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0), + LSBase->getBasePtr()}; + return DAG.getMemIntrinsicNode( + PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps, + LSBase->getMemoryVT(), LSBase->getMemOperand()); + } + + llvm_unreachable("Expected a load or store node here"); +} + SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -13159,6 +13353,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return combineFPToIntToFP(N, DCI); + case ISD::VECTOR_SHUFFLE: + if (ISD::isNormalLoad(N->getOperand(0).getNode())) { + LSBaseSDNode* LSBase = cast(N->getOperand(0)); + return combineVReverseMemOP(cast(N), LSBase, DCI); + } + break; case ISD::STORE: { EVT Op1VT = N->getOperand(1).getValueType(); @@ -13170,6 +13370,13 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, return Val; } + if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) { + ShuffleVectorSDNode *SVN = cast(N->getOperand(1)); + SDValue Val= combineVReverseMemOP(SVN, cast(N), DCI); + if (Val) + return Val; + } + // Turn STORE (BSWAP) -> sthbrx/stwbrx. if (cast(N)->isUnindexed() && Opcode == ISD::BSWAP && N->getOperand(1).getNode()->hasOneUse() && @@ -13903,7 +14110,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, } } -unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { +Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { switch (Subtarget.getDarwinDirective()) { default: break; case PPC::DIR_970: @@ -13924,7 +14131,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { // Actual alignment of the loop will depend on the hotness check and other // logic in alignBlocks. if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty()) - return 5; + return Align(32); } const PPCInstrInfo *TII = Subtarget.getInstrInfo(); @@ -13940,7 +14147,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { } if (LoopSize > 16 && LoopSize <= 32) - return 5; + return Align(32); break; } @@ -14063,7 +14270,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case 'f': if (Subtarget.hasSPE()) { if (VT == MVT::f32 || VT == MVT::i32) - return std::make_pair(0U, &PPC::SPE4RCRegClass); + return std::make_pair(0U, &PPC::GPRCRegClass); if (VT == MVT::f64 || VT == MVT::i64) return std::make_pair(0U, &PPC::SPERCRegClass); } else { @@ -14306,22 +14513,22 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { +Register PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const { bool isPPC64 = Subtarget.isPPC64(); - bool isDarwinABI = Subtarget.isDarwinABI(); + bool IsDarwinABI = Subtarget.isDarwinABI(); if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || (!isPPC64 && VT != MVT::i32)) report_fatal_error("Invalid register global variable type"); bool is64Bit = isPPC64 && VT == MVT::i64; - unsigned Reg = StringSwitch(RegName) + Register Reg = StringSwitch(RegName) .Case("r1", is64Bit ? PPC::X1 : PPC::R1) - .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2) - .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : + .Case("r2", (IsDarwinABI || isPPC64) ? Register() : PPC::R2) + .Case("r13", (!isPPC64 && IsDarwinABI) ? Register() : (is64Bit ? PPC::X13 : PPC::R13)) - .Default(0); + .Default(Register()); if (Reg) return Reg; @@ -14330,14 +14537,17 @@ unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const { // 32-bit SVR4 ABI access everything as got-indirect. - if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) + if (Subtarget.is32BitELFABI()) + return true; + + // AIX accesses everything indirectly through the TOC, which is similar to + // the GOT. + if (Subtarget.isAIXABI()) return true; CodeModel::Model CModel = getTargetMachine().getCodeModel(); // If it is small or large code model, module locals are accessed - // indirectly by loading their address from .toc/.got. The difference - // is that for large code model we have ADDISTocHa + LDtocL and for - // small code model we simply have LDtoc. + // indirectly by loading their address from .toc/.got. if (CModel == CodeModel::Small || CModel == CodeModel::Large) return true; @@ -14345,14 +14555,8 @@ bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const { if (isa(GA) || isa(GA)) return true; - if (GlobalAddressSDNode *G = dyn_cast(GA)) { - const GlobalValue *GV = G->getGlobal(); - unsigned char GVFlags = Subtarget.classifyGlobalReference(GV); - // The NLP flag indicates that a global access has to use an - // extra indirection. - if (GVFlags & PPCII::MO_NLP_FLAG) - return true; - } + if (GlobalAddressSDNode *G = dyn_cast(GA)) + return Subtarget.isGVIndirectSymbol(G->getGlobal()); return false; } @@ -14417,7 +14621,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.offset = -VT.getStoreSize()+1; Info.size = 2*VT.getStoreSize()-1; - Info.align = 1; + Info.align = Align::None(); Info.flags = MachineMemOperand::MOLoad; return true; } @@ -14451,7 +14655,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.size = VT.getStoreSize(); - Info.align = 1; + Info.align = Align::None(); Info.flags = MachineMemOperand::MOLoad; return true; } @@ -14503,7 +14707,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(1); Info.offset = -VT.getStoreSize()+1; Info.size = 2*VT.getStoreSize()-1; - Info.align = 1; + Info.align = Align::None(); Info.flags = MachineMemOperand::MOStore; return true; } @@ -14536,7 +14740,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(1); Info.offset = 0; Info.size = VT.getStoreSize(); - Info.align = 1; + Info.align = Align::None(); Info.flags = MachineMemOperand::MOStore; return true; } @@ -14786,7 +14990,7 @@ void PPCTargetLowering::insertCopiesSplitCSR( else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); - unsigned NewVR = MRI->createVirtualRegister(RC); + Register NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. // FIXME: this currently does not emit CFI pseudo-instructions, it works // fine for CXX_FAST_TLS since the C++-style TLS access functions should be @@ -15146,7 +15350,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const { bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { // Only duplicate to increase tail-calls for the 64bit SysV ABIs. - if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64()) + if (!Subtarget.is64BitELFABI()) return false; // If not a tail call then no need to proceed. diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 97422c6eda3..62922ea2d4c 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -412,8 +412,9 @@ namespace llvm { /// representation. QBFLT, - /// Custom extend v4f32 to v2f64. - FP_EXTEND_LH, + /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or + /// lower (IDX=1) half of v4f32 to v2f64. + FP_EXTEND_HALF, /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of @@ -456,15 +457,29 @@ namespace llvm { /// an xxswapd. LXVD2X, + /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian. + /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on + /// the vector type to load vector in big-endian element order. + LOAD_VEC_BE, + /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a /// v2f32 value into the lower half of a VSR register. LD_VSX_LH, + /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory + /// instructions such as LXVDSX, LXVWSX. + LD_SPLAT, + /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. /// Maps directly to an stxvd2x instruction that will be preceded by /// an xxswapd. STXVD2X, + /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian. + /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on + /// the vector type to store vector in big-endian element order. + STORE_VEC_BE, + /// Store scalar integers from VSR. ST_VSR_SCAL_INT, @@ -563,9 +578,11 @@ namespace llvm { bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE); - /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the - /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. - unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG); + /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is + /// appropriate for PPC mnemonics (which have a big endian bias - namely + /// elements are counted from the left of the vector register). + unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, + SelectionDAG &DAG); /// get_VSPLTI_elt - If this is a build_vector of constants which can be /// formed by using a vspltis[bhw] instruction of the specified element @@ -716,8 +733,8 @@ namespace llvm { SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl &Created) const override; - unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const override; void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, @@ -725,7 +742,7 @@ namespace llvm { const SelectionDAG &DAG, unsigned Depth = 0) const override; - unsigned getPrefLoopAlignment(MachineLoop *ML) const override; + Align getPrefLoopAlignment(MachineLoop *ML) const override; bool shouldInsertFencesForAtomic(const Instruction *I) const override { return true; @@ -834,6 +851,18 @@ namespace llvm { return true; } + bool isDesirableToTransformToIntegerOp(unsigned Opc, + EVT VT) const override { + // Only handle float load/store pair because float(fpr) load/store + // instruction has more cycles than integer(gpr) load/store in PPC. + if (Opc != ISD::LOAD && Opc != ISD::STORE) + return false; + if (VT != MVT::f32 && VT != MVT::f64) + return false; + + return true; + } + // Returns true if the address of the global is stored in TOC entry. bool isAccessedAsGotIndirect(SDValue N) const; @@ -998,6 +1027,8 @@ namespace llvm { SDValue &FPOpOut, const SDLoc &dl) const; + SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, SDValue GA) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; @@ -1155,6 +1186,8 @@ namespace llvm { SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase, + DAGCombinerInfo &DCI) const; /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces /// SETCC with integer subtraction when (1) there is a legal way of doing it diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index d598567f8e4..f16187149d3 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1099,8 +1099,8 @@ def LDMX : XForm_1<31, 309, (outs g8rc:$rD), (ins memrr:$src), // Support for medium and large code model. let hasSideEffects = 0 in { let isReMaterializable = 1 in { -def ADDIStocHA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), - "#ADDIStocHA", []>, isPPC64; +def ADDIStocHA8: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), + "#ADDIStocHA8", []>, isPPC64; def ADDItocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), "#ADDItocL", []>, isPPC64; } diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 8176c5120a8..fd3fc2af232 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -215,21 +215,21 @@ def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), // VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm. def VSPLTB_get_imm : SDNodeXForm; def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isSplatShuffleMask(cast(N), 1); }], VSPLTB_get_imm>; def VSPLTH_get_imm : SDNodeXForm; def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isSplatShuffleMask(cast(N), 2); }], VSPLTH_get_imm>; def VSPLTW_get_imm : SDNodeXForm; def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ @@ -331,7 +331,7 @@ class VXBX_Int_Ty xo, string opc, Intrinsic IntID, ValueType Ty> class VXCR_Int_Ty xo, string opc, Intrinsic IntID, ValueType Ty> : VXForm_CR; + [(set Ty:$vD, (IntID Ty:$vA, timm:$ST, timm:$SIX))]>; //===----------------------------------------------------------------------===// // Instruction Definitions. @@ -401,10 +401,10 @@ let isCodeGenOnly = 1 in { def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins), "mfvscr $vD", IIC_LdStStore, - [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; + [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB), "mtvscr $vB", IIC_LdStLoad, - [(int_ppc_altivec_mtvscr v4i32:$vB)]>; + [(int_ppc_altivec_mtvscr v4i32:$vB)]>; let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads. def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$vD), (ins memrr:$src), diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index a48eb169069..96b9c9a119c 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -1209,20 +1209,13 @@ class XX3Form opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = XT{5}; } -class XX3Form_Zero opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, +class XX3Form_SameOp opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> : XX3Form { let XA = XT; let XB = XT; } -class XX3Form_SetZero opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : XX3Form { - let XB = XT; - let XA = XT; -} - class XX3Form_1 opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> : I { diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index a787bdd56b9..6b10672965c 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -90,7 +90,6 @@ enum SpillOpcodeKey { SOK_QuadBitSpill, SOK_SpillToVSR, SOK_SPESpill, - SOK_SPE4Spill, SOK_LastOpcodeSpill // This must be last on the enum. }; @@ -184,10 +183,10 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; const MachineOperand &DefMO = DefMI.getOperand(DefIdx); - unsigned Reg = DefMO.getReg(); + Register Reg = DefMO.getReg(); bool IsRegCR; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { const MachineRegisterInfo *MRI = &DefMI.getParent()->getParent()->getRegInfo(); IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) || @@ -330,11 +329,13 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, case PPC::LIS8: case PPC::QVGPCI: case PPC::ADDIStocHA: + case PPC::ADDIStocHA8: case PPC::ADDItocL: case PPC::LOAD_STACK_GUARD: case PPC::XXLXORz: case PPC::XXLXORspz: case PPC::XXLXORdpz: + case PPC::XXLEQVOnes: case PPC::V_SET0B: case PPC::V_SET0H: case PPC::V_SET0: @@ -448,7 +449,8 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, return &MI; } -bool PPCInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, +bool PPCInstrInfo::findCommutedOpIndices(const MachineInstr &MI, + unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { // For VSX A-Type FMA instructions, it is the first two operands that can be // commuted, however, because the non-encoded tied input operand is listed @@ -966,11 +968,11 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, getKillRegState(KillSrc); return; } else if (PPC::SPERCRegClass.contains(SrcReg) && - PPC::SPE4RCRegClass.contains(DestReg)) { + PPC::GPRCRegClass.contains(DestReg)) { BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg); getKillRegState(KillSrc); return; - } else if (PPC::SPE4RCRegClass.contains(SrcReg) && + } else if (PPC::GPRCRegClass.contains(SrcReg) && PPC::SPERCRegClass.contains(DestReg)) { BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg); getKillRegState(KillSrc); @@ -1009,8 +1011,6 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = PPC::QVFMRb; else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::CROR; - else if (PPC::SPE4RCRegClass.contains(DestReg, SrcReg)) - Opc = PPC::OR; else if (PPC::SPERCRegClass.contains(DestReg, SrcReg)) Opc = PPC::EVOR; else @@ -1043,8 +1043,6 @@ unsigned PPCInstrInfo::getStoreOpcodeForSpill(unsigned Reg, OpcodeIndex = SOK_Float4Spill; } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_SPESpill; - } else if (PPC::SPE4RCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_SPE4Spill; } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_CRSpill; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { @@ -1083,8 +1081,6 @@ unsigned PPCInstrInfo::getStoreOpcodeForSpill(unsigned Reg, OpcodeIndex = SOK_Float4Spill; } else if (PPC::SPERCRegClass.contains(Reg)) { OpcodeIndex = SOK_SPESpill; - } else if (PPC::SPE4RCRegClass.contains(Reg)) { - OpcodeIndex = SOK_SPE4Spill; } else if (PPC::CRRCRegClass.contains(Reg)) { OpcodeIndex = SOK_CRSpill; } else if (PPC::CRBITRCRegClass.contains(Reg)) { @@ -1133,8 +1129,6 @@ PPCInstrInfo::getLoadOpcodeForSpill(unsigned Reg, OpcodeIndex = SOK_Float4Spill; } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_SPESpill; - } else if (PPC::SPE4RCRegClass.hasSubClassEq(RC)) { - OpcodeIndex = SOK_SPE4Spill; } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_CRSpill; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { @@ -1173,8 +1167,6 @@ PPCInstrInfo::getLoadOpcodeForSpill(unsigned Reg, OpcodeIndex = SOK_Float4Spill; } else if (PPC::SPERCRegClass.contains(Reg)) { OpcodeIndex = SOK_SPESpill; - } else if (PPC::SPE4RCRegClass.contains(Reg)) { - OpcodeIndex = SOK_SPE4Spill; } else if (PPC::CRRCRegClass.contains(Reg)) { OpcodeIndex = SOK_CRSpill; } else if (PPC::CRBITRCRegClass.contains(Reg)) { @@ -1648,7 +1640,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, return false; int OpC = CmpInstr.getOpcode(); - unsigned CRReg = CmpInstr.getOperand(0).getReg(); + Register CRReg = CmpInstr.getOperand(0).getReg(); // FP record forms set CR1 based on the exception status bits, not a // comparison with zero. @@ -1671,7 +1663,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, // Look through copies unless that gets us to a physical register. unsigned ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI); - if (TargetRegisterInfo::isVirtualRegister(ActualSrc)) + if (Register::isVirtualRegister(ActualSrc)) SrcReg = ActualSrc; // Get the unique definition of SrcReg. @@ -1937,7 +1929,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, // Rotates are expensive instructions. If we're emitting a record-form // rotate that can just be an andi/andis, we should just emit that. if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) { - unsigned GPRRes = MI->getOperand(0).getReg(); + Register GPRRes = MI->getOperand(0).getReg(); int64_t SH = MI->getOperand(2).getImm(); int64_t MB = MI->getOperand(3).getImm(); int64_t ME = MI->getOperand(4).getImm(); @@ -2122,7 +2114,7 @@ bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const { llvm_unreachable("Unknown Operation!"); } - unsigned TargetReg = MI.getOperand(0).getReg(); + Register TargetReg = MI.getOperand(0).getReg(); unsigned Opcode; if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) || (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31)) @@ -2184,7 +2176,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return expandVSXMemPseudo(MI); } case PPC::SPILLTOVSR_LD: { - unsigned TargetReg = MI.getOperand(0).getReg(); + Register TargetReg = MI.getOperand(0).getReg(); if (PPC::VSFRCRegClass.contains(TargetReg)) { MI.setDesc(get(PPC::DFLOADf64)); return expandPostRAPseudo(MI); @@ -2194,7 +2186,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } case PPC::SPILLTOVSR_ST: { - unsigned SrcReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(0).getReg(); if (PPC::VSFRCRegClass.contains(SrcReg)) { NumStoreSPILLVSRRCAsVec++; MI.setDesc(get(PPC::DFSTOREf64)); @@ -2206,7 +2198,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } case PPC::SPILLTOVSR_LDX: { - unsigned TargetReg = MI.getOperand(0).getReg(); + Register TargetReg = MI.getOperand(0).getReg(); if (PPC::VSFRCRegClass.contains(TargetReg)) MI.setDesc(get(PPC::LXSDX)); else @@ -2214,7 +2206,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; } case PPC::SPILLTOVSR_STX: { - unsigned SrcReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(0).getReg(); if (PPC::VSFRCRegClass.contains(SrcReg)) { NumStoreSPILLVSRRCAsVec++; MI.setDesc(get(PPC::STXSDX)); @@ -2279,10 +2271,10 @@ void PPCInstrInfo::replaceInstrOperandWithImm(MachineInstr &MI, int64_t Imm) const { assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG"); // Replace the REG with the Immediate. - unsigned InUseReg = MI.getOperand(OpNo).getReg(); + Register InUseReg = MI.getOperand(OpNo).getReg(); MI.getOperand(OpNo).ChangeToImmediate(Imm); - if (empty(MI.implicit_operands())) + if (MI.implicit_operands().empty()) return; // We need to make sure that the MI didn't have any implicit use @@ -2328,6 +2320,23 @@ void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI, .addImm(LII.Imm); } +MachineInstr *PPCInstrInfo::getDefMIPostRA(unsigned Reg, MachineInstr &MI, + bool &SeenIntermediateUse) const { + assert(!MI.getParent()->getParent()->getRegInfo().isSSA() && + "Should be called after register allocation."); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI; + It++; + SeenIntermediateUse = false; + for (; It != E; ++It) { + if (It->modifiesRegister(Reg, TRI)) + return &*It; + if (It->readsRegister(Reg, TRI)) + SeenIntermediateUse = true; + } + return nullptr; +} + MachineInstr *PPCInstrInfo::getForwardingDefMI( MachineInstr &MI, unsigned &OpNoForForwarding, @@ -2342,11 +2351,11 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI( for (int i = 1, e = MI.getNumOperands(); i < e; i++) { if (!MI.getOperand(i).isReg()) continue; - unsigned Reg = MI.getOperand(i).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = MI.getOperand(i).getReg(); + if (!Register::isVirtualRegister(Reg)) continue; unsigned TrueReg = TRI->lookThruCopyLike(Reg, MRI); - if (TargetRegisterInfo::isVirtualRegister(TrueReg)) { + if (Register::isVirtualRegister(TrueReg)) { DefMI = MRI->getVRegDef(TrueReg); if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) { OpNoForForwarding = i; @@ -2370,7 +2379,10 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI( Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 || Opc == PPC::RLWINM || Opc == PPC::RLWINMo || Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o; - if (!instrHasImmForm(MI, III, true) && !ConvertibleImmForm) + bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg()) + ? isVFRegister(MI.getOperand(0).getReg()) + : false; + if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true)) return nullptr; // Don't convert or %X, %Y, %Y since that's just a register move. @@ -2381,29 +2393,24 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI( MachineOperand &MO = MI.getOperand(i); SeenIntermediateUse = false; if (MO.isReg() && MO.isUse() && !MO.isImplicit()) { - MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI; - It++; - unsigned Reg = MI.getOperand(i).getReg(); - - // Is this register defined by some form of add-immediate (including - // load-immediate) within this basic block? - for ( ; It != E; ++It) { - if (It->modifiesRegister(Reg, &getRegisterInfo())) { - switch (It->getOpcode()) { - default: break; - case PPC::LI: - case PPC::LI8: - case PPC::ADDItocL: - case PPC::ADDI: - case PPC::ADDI8: - OpNoForForwarding = i; - return &*It; - } + Register Reg = MI.getOperand(i).getReg(); + // If we see another use of this reg between the def and the MI, + // we want to flat it so the def isn't deleted. + MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse); + if (DefMI) { + // Is this register defined by some form of add-immediate (including + // load-immediate) within this basic block? + switch (DefMI->getOpcode()) { + default: break; - } else if (It->readsRegister(Reg, &getRegisterInfo())) - // If we see another use of this reg between the def and the MI, - // we want to flat it so the def isn't deleted. - SeenIntermediateUse = true; + case PPC::LI: + case PPC::LI8: + case PPC::ADDItocL: + case PPC::ADDI: + case PPC::ADDI8: + OpNoForForwarding = i; + return DefMI; + } } } } @@ -2417,7 +2424,7 @@ const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const { {PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, PPC::SPILL_VRSAVE, PPC::QVSTFDX, PPC::QVSTFSXs, PPC::QVSTFDXb, - PPC::SPILLTOVSR_ST, PPC::EVSTDD, PPC::SPESTW}, + PPC::SPILLTOVSR_ST, PPC::EVSTDD}, // Power 9 {PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, @@ -2433,7 +2440,7 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const { {PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX, PPC::RESTORE_VRSAVE, PPC::QVLFDX, PPC::QVLFSXs, PPC::QVLFDXb, - PPC::SPILLTOVSR_LD, PPC::EVLDD, PPC::SPELWZ}, + PPC::SPILLTOVSR_LD, PPC::EVLDD}, // Power 9 {PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, PPC::DFLOADf32, @@ -2538,12 +2545,15 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, "The forwarding operand needs to be valid at this point"); bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill(); bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled; - unsigned ForwardingOperandReg = MI.getOperand(ForwardingOperand).getReg(); + Register ForwardingOperandReg = MI.getOperand(ForwardingOperand).getReg(); if (KilledDef && KillFwdDefMI) *KilledDef = DefMI; ImmInstrInfo III; - bool HasImmForm = instrHasImmForm(MI, III, PostRA); + bool IsVFReg = MI.getOperand(0).isReg() + ? isVFRegister(MI.getOperand(0).getReg()) + : false; + bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA); // If this is a reg+reg instruction that has a reg+imm form, // and one of the operands is produced by an add-immediate, // try to convert it. @@ -2591,7 +2601,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, // If a compare-immediate is fed by an immediate and is itself an input of // an ISEL (the most common case) into a COPY of the correct register. bool Changed = false; - unsigned DefReg = MI.getOperand(0).getReg(); + Register DefReg = MI.getOperand(0).getReg(); int64_t Comparand = MI.getOperand(2).getImm(); int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0 ? (Comparand | 0xFFFFFFFFFFFF0000) : Comparand; @@ -2601,8 +2611,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8) continue; unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg(); - unsigned TrueReg = CompareUseMI.getOperand(1).getReg(); - unsigned FalseReg = CompareUseMI.getOperand(2).getReg(); + Register TrueReg = CompareUseMI.getOperand(1).getReg(); + Register FalseReg = CompareUseMI.getOperand(2).getReg(); unsigned RegToCopy = selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg); if (RegToCopy == PPC::NoRegister) @@ -2777,9 +2787,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, return false; } -bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, +bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, bool PostRA) const { - unsigned Opc = MI.getOpcode(); // The vast majority of the instructions would need their operand 2 replaced // with an immediate when switching to the reg+imm form. A marked exception // are the update form loads/stores for which a constant operand 2 would need @@ -3111,7 +3120,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, break; case PPC::LXSSPX: if (PostRA) { - if (isVFRegister(MI.getOperand(0).getReg())) + if (IsVFReg) III.ImmOpcode = PPC::LXSSP; else { III.ImmOpcode = PPC::LFS; @@ -3125,7 +3134,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, break; case PPC::LXSDX: if (PostRA) { - if (isVFRegister(MI.getOperand(0).getReg())) + if (IsVFReg) III.ImmOpcode = PPC::LXSD; else { III.ImmOpcode = PPC::LFD; @@ -3143,7 +3152,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, break; case PPC::STXSSPX: if (PostRA) { - if (isVFRegister(MI.getOperand(0).getReg())) + if (IsVFReg) III.ImmOpcode = PPC::STXSSP; else { III.ImmOpcode = PPC::STFS; @@ -3157,7 +3166,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, break; case PPC::STXSDX: if (PostRA) { - if (isVFRegister(MI.getOperand(0).getReg())) + if (IsVFReg) III.ImmOpcode = PPC::STXSD; else { III.ImmOpcode = PPC::STFD; @@ -3287,7 +3296,7 @@ bool PPCInstrInfo::isRegElgibleForForwarding( if (MRI.isSSA()) return false; - unsigned Reg = RegMO.getReg(); + Register Reg = RegMO.getReg(); // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg. MachineBasicBlock::const_reverse_iterator It = MI; @@ -3511,8 +3520,8 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) { unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig : III.ZeroIsSpecialNew + 1; - unsigned OrigZeroReg = MI.getOperand(PosForOrigZero).getReg(); - unsigned NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg(); + Register OrigZeroReg = MI.getOperand(PosForOrigZero).getReg(); + Register NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg(); // If R0 is in the operand where zero is special for the new instruction, // it is unsafe to transform if the constant operand isn't that operand. if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) && @@ -3563,16 +3572,20 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, } else { // The 32 bit and 64 bit instructions are quite different. if (SpecialShift32) { - // Left shifts use (N, 0, 31-N), right shifts use (32-N, N, 31). - uint64_t SH = RightShift ? 32 - ShAmt : ShAmt; + // Left shifts use (N, 0, 31-N). + // Right shifts use (32-N, N, 31) if 0 < N < 32. + // use (0, 0, 31) if N == 0. + uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 32 - ShAmt : ShAmt; uint64_t MB = RightShift ? ShAmt : 0; uint64_t ME = RightShift ? 31 : 31 - ShAmt; replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH); MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB) .addImm(ME); } else { - // Left shifts use (N, 63-N), right shifts use (64-N, N). - uint64_t SH = RightShift ? 64 - ShAmt : ShAmt; + // Left shifts use (N, 63-N). + // Right shifts use (64-N, N) if 0 < N < 64. + // use (0, 0) if N == 0. + uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 64 - ShAmt : ShAmt; uint64_t ME = RightShift ? ShAmt : 63 - ShAmt; replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH); MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME); @@ -3601,8 +3614,8 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, if (III.ZeroIsSpecialNew) { // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no // need to fix up register class. - unsigned RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg(); - if (TargetRegisterInfo::isVirtualRegister(RegToModify)) { + Register RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg(); + if (Register::isVirtualRegister(RegToModify)) { const TargetRegisterClass *NewRC = MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ? &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass; @@ -3747,7 +3760,7 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const { return false; unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); unsigned StackOffset = MI.getOperand(1).getImm(); - unsigned StackReg = MI.getOperand(2).getReg(); + Register StackReg = MI.getOperand(2).getReg(); if (StackReg == PPC::X1 && StackOffset == TOCSaveOffset) return true; @@ -3772,7 +3785,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, switch (MI.getOpcode()) { case PPC::COPY: { - unsigned SrcReg = MI.getOperand(1).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); // In both ELFv1 and v2 ABI, method parameters and the return value // are sign- or zero-extended. @@ -3781,7 +3794,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, // We check the ZExt/SExt flags for a method parameter. if (MI.getParent()->getBasicBlock() == &MF->getFunction().getEntryBlock()) { - unsigned VReg = MI.getOperand(0).getReg(); + Register VReg = MI.getOperand(0).getReg(); if (MF->getRegInfo().isLiveIn(VReg)) return SignExt ? FuncInfo->isLiveInSExt(VReg) : FuncInfo->isLiveInZExt(VReg); @@ -3818,7 +3831,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, } // If this is a copy from another register, we recursively check source. - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + if (!Register::isVirtualRegister(SrcReg)) return false; const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); if (SrcMI != NULL) @@ -3841,8 +3854,8 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, case PPC::XORIS8: { // logical operation with 16-bit immediate does not change the upper bits. // So, we track the operand register as we do for register copy. - unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) return false; const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); if (SrcMI != NULL) @@ -3870,8 +3883,8 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, for (unsigned I = 1; I != E; I += D) { if (MI.getOperand(I).isReg()) { - unsigned SrcReg = MI.getOperand(I).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI.getOperand(I).getReg(); + if (!Register::isVirtualRegister(SrcReg)) return false; const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); if (SrcMI == NULL || !isSignOrZeroExtended(*SrcMI, SignExt, Depth+1)) @@ -3893,12 +3906,12 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, assert(MI.getOperand(1).isReg() && MI.getOperand(2).isReg()); - unsigned SrcReg1 = MI.getOperand(1).getReg(); - unsigned SrcReg2 = MI.getOperand(2).getReg(); + Register SrcReg1 = MI.getOperand(1).getReg(); + Register SrcReg2 = MI.getOperand(2).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg1) || - !TargetRegisterInfo::isVirtualRegister(SrcReg2)) - return false; + if (!Register::isVirtualRegister(SrcReg1) || + !Register::isVirtualRegister(SrcReg2)) + return false; const MachineInstr *MISrc1 = MRI->getVRegDef(SrcReg1); const MachineInstr *MISrc2 = MRI->getVRegDef(SrcReg2); @@ -3923,21 +3936,99 @@ bool PPCInstrInfo::isBDNZ(unsigned Opcode) const { return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ)); } -bool PPCInstrInfo::analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, - MachineInstr *&CmpInst) const { - MachineBasicBlock *LoopEnd = L.getBottomBlock(); - MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator(); - // We really "analyze" only CTR loops right now. - if (I != LoopEnd->end() && isBDNZ(I->getOpcode())) { - IndVarInst = nullptr; - CmpInst = &*I; - return false; +namespace { +class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { + MachineInstr *Loop, *EndLoop, *LoopCount; + MachineFunction *MF; + const TargetInstrInfo *TII; + int64_t TripCount; + +public: + PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop, + MachineInstr *LoopCount) + : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount), + MF(Loop->getParent()->getParent()), + TII(MF->getSubtarget().getInstrInfo()) { + // Inspect the Loop instruction up-front, as it may be deleted when we call + // createTripCountGreaterCondition. + if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI) + TripCount = LoopCount->getOperand(1).getImm(); + else + TripCount = -1; } - return true; + + bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { + // Only ignore the terminator. + return MI == EndLoop; + } + + Optional + createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB, + SmallVectorImpl &Cond) override { + if (TripCount == -1) { + // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1, + // so we don't need to generate any thing here. + Cond.push_back(MachineOperand::CreateImm(0)); + Cond.push_back(MachineOperand::CreateReg( + MF->getSubtarget().isPPC64() ? PPC::CTR8 : PPC::CTR, + true)); + return {}; + } + + return TripCount > TC; + } + + void setPreheader(MachineBasicBlock *NewPreheader) override { + // Do nothing. We want the LOOP setup instruction to stay in the *old* + // preheader, so we can use BDZ in the prologs to adapt the loop trip count. + } + + void adjustTripCount(int TripCountAdjust) override { + // If the loop trip count is a compile-time value, then just change the + // value. + if (LoopCount->getOpcode() == PPC::LI8 || + LoopCount->getOpcode() == PPC::LI) { + int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust; + LoopCount->getOperand(1).setImm(TripCount); + return; + } + + // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1, + // so we don't need to generate any thing here. + } + + void disposed() override { + Loop->eraseFromParent(); + // Ensure the loop setup instruction is deleted too. + LoopCount->eraseFromParent(); + } +}; +} // namespace + +std::unique_ptr +PPCInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { + // We really "analyze" only hardware loops right now. + MachineBasicBlock::iterator I = LoopBB->getFirstTerminator(); + MachineBasicBlock *Preheader = *LoopBB->pred_begin(); + if (Preheader == LoopBB) + Preheader = *std::next(LoopBB->pred_begin()); + MachineFunction *MF = Preheader->getParent(); + + if (I != LoopBB->end() && isBDNZ(I->getOpcode())) { + SmallPtrSet Visited; + if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) { + Register LoopCountReg = LoopInst->getOperand(0).getReg(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg); + return std::make_unique(LoopInst, &*I, LoopCount); + } + } + return nullptr; } -MachineInstr * -PPCInstrInfo::findLoopInstr(MachineBasicBlock &PreHeader) const { +MachineInstr *PPCInstrInfo::findLoopInstr( + MachineBasicBlock &PreHeader, + SmallPtrSet &Visited) const { unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop); @@ -3948,50 +4039,6 @@ PPCInstrInfo::findLoopInstr(MachineBasicBlock &PreHeader) const { return nullptr; } -unsigned PPCInstrInfo::reduceLoopCount( - MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar, - MachineInstr &Cmp, SmallVectorImpl &Cond, - SmallVectorImpl &PrevInsts, unsigned Iter, - unsigned MaxIter) const { - // We expect a hardware loop currently. This means that IndVar is set - // to null, and the compare is the ENDLOOP instruction. - assert((!IndVar) && isBDNZ(Cmp.getOpcode()) && "Expecting a CTR loop"); - MachineFunction *MF = MBB.getParent(); - DebugLoc DL = Cmp.getDebugLoc(); - MachineInstr *Loop = findLoopInstr(PreHeader); - if (!Loop) - return 0; - unsigned LoopCountReg = Loop->getOperand(0).getReg(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg); - - if (!LoopCount) - return 0; - // If the loop trip count is a compile-time value, then just change the - // value. - if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI) { - int64_t Offset = LoopCount->getOperand(1).getImm(); - if (Offset <= 1) { - LoopCount->eraseFromParent(); - Loop->eraseFromParent(); - return 0; - } - LoopCount->getOperand(1).setImm(Offset - 1); - return Offset - 1; - } - - // The loop trip count is a run-time value. - // We need to subtract one from the trip count, - // and insert branch later to check if we're done with the loop. - - // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1, - // so we don't need to generate any thing here. - Cond.push_back(MachineOperand::CreateImm(0)); - Cond.push_back(MachineOperand::CreateReg( - Subtarget.isPPC64() ? PPC::CTR8 : PPC::CTR, true)); - return LoopCountReg; -} - // Return true if get the base operand, byte offset of an instruction and the // memory width. Width is the size of memory that is being loaded/stored. bool PPCInstrInfo::getMemOperandWithOffsetWidth( @@ -4018,8 +4065,7 @@ bool PPCInstrInfo::getMemOperandWithOffsetWidth( } bool PPCInstrInfo::areMemAccessesTriviallyDisjoint( - const MachineInstr &MIa, const MachineInstr &MIb, - AliasAnalysis * /*AA*/) const { + const MachineInstr &MIa, const MachineInstr &MIb) const { assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 70fb757e8f1..19ab30cb090 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -248,11 +248,11 @@ public: unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const override; + AAResults *AA) const override; unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; - bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, + bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; void insertNoop(MachineBasicBlock &MBB, @@ -370,8 +370,7 @@ public: /// otherwise bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA = nullptr) const override; + const MachineInstr &MIb) const override; /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. @@ -439,9 +438,14 @@ public: void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo, int64_t Imm) const; - bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III, + bool instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, bool PostRA) const; + // In PostRA phase, try to find instruction defines \p Reg before \p MI. + // \p SeenIntermediate is set to true if uses between DefMI and \p MI exist. + MachineInstr *getDefMIPostRA(unsigned Reg, MachineInstr &MI, + bool &SeenIntermediateUse) const; + /// getRegNumForOperand - some operands use different numbering schemes /// for the same registers. For example, a VSX instruction may have any of /// vs0-vs63 allocated whereas an Altivec instruction could only have @@ -481,26 +485,14 @@ public: /// On PPC, we have two instructions used to set-up the hardware loop /// (MTCTRloop, MTCTR8loop) with corresponding endloop (BDNZ, BDNZ8) /// instructions to indicate the end of a loop. - MachineInstr *findLoopInstr(MachineBasicBlock &PreHeader) const; + MachineInstr * + findLoopInstr(MachineBasicBlock &PreHeader, + SmallPtrSet &Visited) const; - /// Analyze the loop code to find the loop induction variable and compare used - /// to compute the number of iterations. Currently, we analyze loop that are - /// controlled using hardware loops. In this case, the induction variable - /// instruction is null. For all other cases, this function returns true, - /// which means we're unable to analyze it. \p IndVarInst and \p CmpInst will - /// return new values when we can analyze the readonly loop \p L, otherwise, - /// nothing got changed - bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, - MachineInstr *&CmpInst) const override; - /// Generate code to reduce the loop iteration by one and check if the loop - /// is finished. Return the value/register of the new loop count. We need - /// this function when peeling off one or more iterations of a loop. This - /// function assumes the last iteration is peeled first. - unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, - MachineInstr *IndVar, MachineInstr &Cmp, - SmallVectorImpl &Cond, - SmallVectorImpl &PrevInsts, - unsigned Iter, unsigned MaxIter) const override; + /// Analyze loop L, which must be a single-basic-block loop, and if the + /// conditions can be understood enough produce a PipelinerLoopInfo object. + std::unique_ptr + analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override; }; } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index c313337047f..24183277519 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -386,7 +386,9 @@ def immZExt16 : PatLeaf<(imm), [{ // field. Used by instructions like 'ori'. return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue(); }], LO16>; -def immAnyExt8 : ImmLeaf(Imm) || isUInt<8>(Imm); }]>; +def immNonAllOneAnyExt8 : ImmLeaf(Imm) && (Imm != -1)) || (isUInt<8>(Imm) && (Imm != 0xFF)); +}]>; def immSExt5NonZero : ImmLeaf(Imm); }]>; // imm16Shifted* - These match immediates where the low 16-bits are zero. There @@ -577,7 +579,7 @@ def sperc : RegisterOperand { def PPCRegSPE4RCAsmOperand : AsmOperandClass { let Name = "RegSPE4RC"; let PredicateMethod = "isRegNumber"; } -def spe4rc : RegisterOperand { +def spe4rc : RegisterOperand { let ParserMatchClass = PPCRegSPE4RCAsmOperand; } @@ -3161,7 +3163,16 @@ def ADDISdtprelHA32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s1 def LWZtoc : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg), "#LWZtoc", [(set i32:$rD, + (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>; +def LWZtocL : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc_nor0:$reg), + "#LWZtocL", + [(set i32:$rD, (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>; +def ADDIStocHA : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, tocentry32:$disp), + "#ADDIStocHA", + [(set i32:$rD, + (PPCtoc_entry i32:$reg, tglobaladdr:$disp))]>; + // Get Global (GOT) Base Register offset, from the word immediately preceding // the function label. def UpdateGBR : PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>; @@ -3177,21 +3188,21 @@ def : Pat<(srl i32:$rS, i32:$rB), def : Pat<(shl i32:$rS, i32:$rB), (SLW $rS, $rB)>; -def : Pat<(zextloadi1 iaddr:$src), +def : Pat<(i32 (zextloadi1 iaddr:$src)), (LBZ iaddr:$src)>; -def : Pat<(zextloadi1 xaddr:$src), +def : Pat<(i32 (zextloadi1 xaddr:$src)), (LBZX xaddr:$src)>; -def : Pat<(extloadi1 iaddr:$src), +def : Pat<(i32 (extloadi1 iaddr:$src)), (LBZ iaddr:$src)>; -def : Pat<(extloadi1 xaddr:$src), +def : Pat<(i32 (extloadi1 xaddr:$src)), (LBZX xaddr:$src)>; -def : Pat<(extloadi8 iaddr:$src), +def : Pat<(i32 (extloadi8 iaddr:$src)), (LBZ iaddr:$src)>; -def : Pat<(extloadi8 xaddr:$src), +def : Pat<(i32 (extloadi8 xaddr:$src)), (LBZX xaddr:$src)>; -def : Pat<(extloadi16 iaddr:$src), +def : Pat<(i32 (extloadi16 iaddr:$src)), (LHZ iaddr:$src)>; -def : Pat<(extloadi16 xaddr:$src), +def : Pat<(i32 (extloadi16 xaddr:$src)), (LHZX xaddr:$src)>; let Predicates = [HasFPU] in { def : Pat<(f64 (extloadf32 iaddr:$src)), @@ -3564,23 +3575,6 @@ def : Pat<(i1 (setcc i32:$s1, imm:$imm, SETEQ)), (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), (LO16 imm:$imm)), sub_eq)>; -defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)), - (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)), - (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)), - (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)), - (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)), - (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>; -defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)), - (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>; - -defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)), - (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), - (LO16 imm:$imm)), sub_eq)>; - def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETULT)), (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETLT)), @@ -3592,17 +3586,6 @@ def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETGT)), def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETEQ)), (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)), - (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)), - (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)), - (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)), - (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)), - (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; - // SETCC for i64. def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULT)), (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>; @@ -3632,6 +3615,47 @@ def : Pat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETEQ)), (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), (LO16 imm:$imm)), sub_eq)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)), + (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>; +def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)), + (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; + +// Instantiations of CRNotPat for i32. +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>; +defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)), + (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), + (LO16 imm:$imm)), sub_eq)>; + +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)), + (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)), + (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; + +// Instantiations of CRNotPat for i64. defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGE)), (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>; defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGE)), @@ -3649,17 +3673,6 @@ defm : CRNotPat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)), (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), (LO16 imm:$imm)), sub_eq)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)), - (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)), - (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)), - (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)), - (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)), - (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; - defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETUGE)), (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETGE)), @@ -3671,6 +3684,56 @@ defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETLE)), defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)), (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; +let Predicates = [HasFPU] in { +// Instantiations of CRNotPat for f32. +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)), + (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; + +// Instantiations of CRNotPat for f64. +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)), + (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; + +// Instantiations of CRNotPat for f128. +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUGE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETGE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETULE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETLE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUNE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETNE)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; +defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETO)), + (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>; +} + // SETCC for f32. let Predicates = [HasFPU] in { def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)), @@ -3688,21 +3751,6 @@ def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)), def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETUO)), (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; - // SETCC for f64. def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; @@ -3719,21 +3767,6 @@ def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)), def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETUO)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; - // SETCC for f128. def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOLT)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; @@ -3750,21 +3783,6 @@ def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETEQ)), def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETUO)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUGE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETGE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETULE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETLE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUNE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETNE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETO)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>; - } // This must be in this file because it relies on patterns defined in this file diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 07f38a61d09..2aad5860d87 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -58,8 +58,12 @@ def SDT_PPCldvsxlh : SDTypeProfile<1, 1, [ SDTCisVT<0, v4f32>, SDTCisPtrTy<1> ]>; -def SDT_PPCfpextlh : SDTypeProfile<1, 1, [ - SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32> +def SDT_PPCfpexth : SDTypeProfile<1, 2, [ + SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32>, SDTCisPtrTy<2> +]>; + +def SDT_PPCldsplat : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisPtrTy<1> ]>; // Little-endian-specific nodes. @@ -78,12 +82,21 @@ def SDTVecConv : SDTypeProfile<1, 2, [ def SDTVabsd : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32> ]>; - +def SDT_PPCld_vec_be : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisPtrTy<1> +]>; +def SDT_PPCst_vec_be : SDTypeProfile<0, 2, [ + SDTCisVec<0>, SDTCisPtrTy<1> +]>; def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x, [SDNPHasChain, SDNPMayStore]>; +def PPCld_vec_be : SDNode<"PPCISD::LOAD_VEC_BE", SDT_PPCld_vec_be, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def PPCst_vec_be : SDNode<"PPCISD::STORE_VEC_BE", SDT_PPCst_vec_be, + [SDNPHasChain, SDNPMayStore]>; def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>; def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>; def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>; @@ -93,9 +106,11 @@ def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>; def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>; def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>; -def PPCfpextlh : SDNode<"PPCISD::FP_EXTEND_LH", SDT_PPCfpextlh, []>; +def PPCfpexth : SDNode<"PPCISD::FP_EXTEND_HALF", SDT_PPCfpexth, []>; def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; multiclass XX3Form_Rcr opcode, bits<7> xo, string asmbase, string asmstr, InstrItinClass itin, Intrinsic Int, @@ -855,14 +870,14 @@ let Uses = [RM] in { let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 in { - def XXLXORz : XX3Form_Zero<60, 154, (outs vsrc:$XT), (ins), + def XXLXORz : XX3Form_SameOp<60, 154, (outs vsrc:$XT), (ins), "xxlxor $XT, $XT, $XT", IIC_VecGeneral, [(set v4i32:$XT, (v4i32 immAllZerosV))]>; - def XXLXORdpz : XX3Form_SetZero<60, 154, + def XXLXORdpz : XX3Form_SameOp<60, 154, (outs vsfrc:$XT), (ins), "xxlxor $XT, $XT, $XT", IIC_VecGeneral, [(set f64:$XT, (fpimm0))]>; - def XXLXORspz : XX3Form_SetZero<60, 154, + def XXLXORspz : XX3Form_SameOp<60, 154, (outs vssrc:$XT), (ins), "xxlxor $XT, $XT, $XT", IIC_VecGeneral, [(set f32:$XT, (fpimm0))]>; @@ -996,21 +1011,21 @@ def : Pat<(f64 (extractelt v2f64:$S, 1)), (f64 (EXTRACT_SUBREG $S, sub_64))>; } -// Additional fnmsub patterns: -a*c + b == -(a*c - b) -def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B), - (XSNMSUBADP $B, $C, $A)>; -def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B), - (XSNMSUBADP $B, $C, $A)>; +// Additional fnmsub patterns: -a*b + c == -(a*b - c) +def : Pat<(fma (fneg f64:$A), f64:$B, f64:$C), + (XSNMSUBADP $C, $A, $B)>; +def : Pat<(fma f64:$A, (fneg f64:$B), f64:$C), + (XSNMSUBADP $C, $A, $B)>; -def : Pat<(fma (fneg v2f64:$A), v2f64:$C, v2f64:$B), - (XVNMSUBADP $B, $C, $A)>; -def : Pat<(fma v2f64:$A, (fneg v2f64:$C), v2f64:$B), - (XVNMSUBADP $B, $C, $A)>; +def : Pat<(fma (fneg v2f64:$A), v2f64:$B, v2f64:$C), + (XVNMSUBADP $C, $A, $B)>; +def : Pat<(fma v2f64:$A, (fneg v2f64:$B), v2f64:$C), + (XVNMSUBADP $C, $A, $B)>; -def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B), - (XVNMSUBASP $B, $C, $A)>; -def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B), - (XVNMSUBASP $B, $C, $A)>; +def : Pat<(fma (fneg v4f32:$A), v4f32:$B, v4f32:$C), + (XVNMSUBASP $C, $A, $B)>; +def : Pat<(fma v4f32:$A, (fneg v4f32:$B), v4f32:$C), + (XVNMSUBASP $C, $A, $B)>; def : Pat<(v2f64 (bitconvert v4f32:$A)), (COPY_TO_REGCLASS $A, VSRC)>; @@ -1077,7 +1092,8 @@ def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)), def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)), (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>; -def : Pat<(v2f64 (PPCfpextlh v4f32:$C)), (XVCVSPDP (XXMRGHW $C, $C))>; +def : Pat<(v2f64 (PPCfpexth v4f32:$C, 0)), (XVCVSPDP (XXMRGHW $C, $C))>; +def : Pat<(v2f64 (PPCfpexth v4f32:$C, 1)), (XVCVSPDP (XXMRGLW $C, $C))>; // Loads. let Predicates = [HasVSX, HasOnlySwappingMemOps] in { @@ -1088,6 +1104,19 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in { (STXVD2X $rS, xoaddr:$dst)>; def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; } + +// Load vector big endian order +let Predicates = [IsLittleEndian, HasVSX] in { + def : Pat<(v2f64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; + def : Pat<(PPCst_vec_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(v4f32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; + def : Pat<(PPCst_vec_be v4f32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; + def : Pat<(v2i64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; + def : Pat<(PPCst_vec_be v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(v4i32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; + def : Pat<(PPCst_vec_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; +} + let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in { def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; @@ -1288,6 +1317,13 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B), (XXLEQV $A, $B)>; + let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1, + isReMaterializable = 1 in { + def XXLEQVOnes : XX3Form_SameOp<60, 186, (outs vsrc:$XT), (ins), + "xxleqv $XT, $XT, $XT", IIC_VecGeneral, + [(set v4i32:$XT, (bitconvert (v16i8 immAllOnesV)))]>; + } + def XXLORC : XX3Form<60, 170, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxlorc $XT, $XA, $XB", IIC_VecGeneral, @@ -1476,6 +1512,12 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. AltVSXFMARel; } + // Additional xsnmsubasp patterns: -a*b + c == -(a*b - c) + def : Pat<(fma (fneg f32:$A), f32:$B, f32:$C), + (XSNMSUBASP $C, $A, $B)>; + def : Pat<(fma f32:$A, (fneg f32:$B), f32:$C), + (XSNMSUBASP $C, $A, $B)>; + // Single Precision Conversions (FP <-> INT) def XSCVSXDSP : XX2Form<60, 312, (outs vssrc:$XT), (ins vsfrc:$XB), @@ -1564,16 +1606,33 @@ let Predicates = [HasDirectMove] in { def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT), "mfvsrwz $rA, $XT", IIC_VecGeneral, [(set i32:$rA, (PPCmfvsr f64:$XT))]>; + let isCodeGenOnly = 1 in + def MFVRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsrc:$XT), + "mfvsrwz $rA, $XT", IIC_VecGeneral, + []>; def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA), "mtvsrd $XT, $rA", IIC_VecGeneral, [(set f64:$XT, (PPCmtvsra i64:$rA))]>, Requires<[In64BitMode]>; + let isCodeGenOnly = 1 in + def MTVRD : XX1_RS6_RD5_XO<31, 179, (outs vsrc:$XT), (ins g8rc:$rA), + "mtvsrd $XT, $rA", IIC_VecGeneral, + []>, + Requires<[In64BitMode]>; def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA), "mtvsrwa $XT, $rA", IIC_VecGeneral, [(set f64:$XT, (PPCmtvsra i32:$rA))]>; + let isCodeGenOnly = 1 in + def MTVRWA : XX1_RS6_RD5_XO<31, 211, (outs vsrc:$XT), (ins gprc:$rA), + "mtvsrwa $XT, $rA", IIC_VecGeneral, + []>; def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA), "mtvsrwz $XT, $rA", IIC_VecGeneral, [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; + let isCodeGenOnly = 1 in + def MTVRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsrc:$XT), (ins gprc:$rA), + "mtvsrwz $XT, $rA", IIC_VecGeneral, + []>; } // HasDirectMove let Predicates = [IsISA3_0, HasDirectMove] in { @@ -1597,6 +1656,22 @@ def : InstAlias<"mfvrd $rA, $XT", (MFVRD g8rc:$rA, vrrc:$XT), 0>; def : InstAlias<"mffprd $rA, $src", (MFVSRD g8rc:$rA, f8rc:$src)>; +def : InstAlias<"mtvrd $XT, $rA", + (MTVRD vrrc:$XT, g8rc:$rA), 0>; +def : InstAlias<"mtfprd $dst, $rA", + (MTVSRD f8rc:$dst, g8rc:$rA)>; +def : InstAlias<"mfvrwz $rA, $XT", + (MFVRWZ gprc:$rA, vrrc:$XT), 0>; +def : InstAlias<"mffprwz $rA, $src", + (MFVSRWZ gprc:$rA, f8rc:$src)>; +def : InstAlias<"mtvrwa $XT, $rA", + (MTVRWA vrrc:$XT, gprc:$rA), 0>; +def : InstAlias<"mtfprwa $dst, $rA", + (MTVSRWA f8rc:$dst, gprc:$rA)>; +def : InstAlias<"mtvrwz $XT, $rA", + (MTVRWZ vrrc:$XT, gprc:$rA), 0>; +def : InstAlias<"mtfprwz $dst, $rA", + (MTVSRWZ f8rc:$dst, gprc:$rA)>; /* Direct moves of various widths from GPR's into VSR's. Each move lines the value up into element 0 (both BE and LE). Namely, entities smaller than @@ -2581,9 +2656,9 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (fneg (int_ppc_fmaf128_round_to_odd f128:$vA, f128:$vB, (fneg f128:$vTi))))]>; - // Additional fnmsub patterns: -a*c + b == -(a*c - b) - def : Pat<(fma (fneg f128:$A), f128:$C, f128:$B), (XSNMSUBQP $B, $C, $A)>; - def : Pat<(fma f128:$A, (fneg f128:$C), f128:$B), (XSNMSUBQP $B, $C, $A)>; + // Additional fnmsub patterns: -a*b + c == -(a*b - c) + def : Pat<(fma (fneg f128:$A), f128:$B, f128:$C), (XSNMSUBQP $C, $A, $B)>; + def : Pat<(fma f128:$A, (fneg f128:$B), f128:$C), (XSNMSUBQP $C, $A, $B)>; //===--------------------------------------------------------------------===// // Quad/Double-Precision Compare Instructions: @@ -2799,12 +2874,12 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP, [(set v4i32: $XT, - (int_ppc_vsx_xvtstdcsp v4f32:$XB, imm:$DCMX))]>; + (int_ppc_vsx_xvtstdcsp v4f32:$XB, timm:$DCMX))]>; def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5, (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB), "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP, [(set v2i64: $XT, - (int_ppc_vsx_xvtstdcdp v2f64:$XB, imm:$DCMX))]>; + (int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>; //===--------------------------------------------------------------------===// @@ -3024,6 +3099,16 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; + + def : Pat<(v8i16 (PPCld_vec_be xoaddr:$src)), + (COPY_TO_REGCLASS (LXVH8X xoaddr:$src), VRRC)>; + def : Pat<(PPCst_vec_be v8i16:$rS, xoaddr:$dst), + (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; + + def : Pat<(v16i8 (PPCld_vec_be xoaddr:$src)), + (COPY_TO_REGCLASS (LXVB16X xoaddr:$src), VRRC)>; + def : Pat<(PPCst_vec_be v16i8:$rS, xoaddr:$dst), + (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; } // IsLittleEndian, HasP9Vector let Predicates = [IsBigEndian, HasP9Vector] in { @@ -3059,7 +3144,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>; def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; - } // IsLittleEndian, HasP9Vector + } // IsBigEndian, HasP9Vector // D-Form Load/Store def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>; @@ -3858,6 +3943,10 @@ let AddedComplexity = 400 in { (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)), (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>; + def : Pat<(v2f64 (PPCldsplat xoaddr:$A)), + (v2f64 (LXVDSX xoaddr:$A))>; + def : Pat<(v2i64 (PPCldsplat xoaddr:$A)), + (v2i64 (LXVDSX xoaddr:$A))>; // Build vectors of floating point converted to i64. def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)), @@ -4063,27 +4152,32 @@ let AddedComplexity = 400 in { (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; } + let Predicates = [HasP8Vector] in { + def : Pat<(v1i128 (bitconvert (v16i8 immAllOnesV))), + (v1i128 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; + def : Pat<(v2i64 (bitconvert (v16i8 immAllOnesV))), + (v2i64 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; + def : Pat<(v8i16 (bitconvert (v16i8 immAllOnesV))), + (v8i16 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; + def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))), + (v16i8 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>; + } + let Predicates = [HasP9Vector] in { // Endianness-neutral patterns for const splats with ISA 3.0 instructions. def : Pat<(v4i32 (scalar_to_vector i32:$A)), (v4i32 (MTVSRWS $A))>; def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), (v4i32 (MTVSRWS $A))>; - def : Pat<(v16i8 (build_vector immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, - immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, - immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, - immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, - immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A, - immAnyExt8:$A)), + def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A, + immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A)), (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>; - def : Pat<(v16i8 immAllOnesV), - (v16i8 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>; - def : Pat<(v8i16 immAllOnesV), - (v8i16 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>; - def : Pat<(v4i32 immAllOnesV), - (v4i32 (XXSPLTIB 255))>; - def : Pat<(v2i64 immAllOnesV), - (v2i64 (XXSPLTIB 255))>; def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)), (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>; def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), @@ -4102,6 +4196,10 @@ let AddedComplexity = 400 in { (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), 0))>; + def : Pat<(v4f32 (PPCldsplat xoaddr:$A)), + (v4f32 (LXVWSX xoaddr:$A))>; + def : Pat<(v4i32 (PPCldsplat xoaddr:$A)), + (v4i32 (LXVWSX xoaddr:$A))>; } let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in { diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp index 4d45d96d447..d252cfbd26b 100644 --- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp +++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp @@ -63,8 +63,24 @@ static cl::opt MaxVars("ppc-preinc-prep-max-vars", cl::desc("Potential PHI threshold for PPC preinc loop prep")); STATISTIC(PHINodeAlreadyExists, "PHI node already in pre-increment form"); +STATISTIC(UpdFormChainRewritten, "Num of update form chain rewritten"); namespace { + struct BucketElement { + BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {} + BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {} + + const SCEVConstant *Offset; + Instruction *Instr; + }; + + struct Bucket { + Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B), + Elements(1, BucketElement(I)) {} + + const SCEV *BaseSCEV; + SmallVector Elements; + }; class PPCLoopPreIncPrep : public FunctionPass { public: @@ -85,21 +101,47 @@ namespace { AU.addRequired(); } - bool alreadyPrepared(Loop *L, Instruction* MemI, - const SCEV *BasePtrStartSCEV, - const SCEVConstant *BasePtrIncSCEV); bool runOnFunction(Function &F) override; - bool runOnLoop(Loop *L); - void simplifyLoopLatch(Loop *L); - bool rotateLoop(Loop *L); - private: PPCTargetMachine *TM = nullptr; + const PPCSubtarget *ST; DominatorTree *DT; LoopInfo *LI; ScalarEvolution *SE; bool PreserveLCSSA; + + bool runOnLoop(Loop *L); + + /// Check if required PHI node is already exist in Loop \p L. + bool alreadyPrepared(Loop *L, Instruction* MemI, + const SCEV *BasePtrStartSCEV, + const SCEVConstant *BasePtrIncSCEV); + + /// Collect condition matched(\p isValidCandidate() returns true) + /// candidates in Loop \p L. + SmallVector + collectCandidates(Loop *L, + std::function + isValidCandidate, + unsigned MaxCandidateNum); + + /// Add a candidate to candidates \p Buckets. + void addOneCandidate(Instruction *MemI, const SCEV *LSCEV, + SmallVector &Buckets, + unsigned MaxCandidateNum); + + /// Prepare all candidates in \p Buckets for update form. + bool updateFormPrep(Loop *L, SmallVector &Buckets); + + /// Prepare for one chain \p BucketChain, find the best base element and + /// update all other elements in \p BucketChain accordingly. + bool prepareBaseForUpdateFormChain(Bucket &BucketChain); + + /// Rewrite load/store instructions in \p BucketChain according to + /// preparation. + bool rewriteLoadStores(Loop *L, Bucket &BucketChain, + SmallSet &BBChanged); }; } // end anonymous namespace @@ -111,30 +153,15 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false) +static const std::string PHINodeNameSuffix = ".phi"; +static const std::string CastNodeNameSuffix = ".cast"; +static const std::string GEPNodeIncNameSuffix = ".inc"; +static const std::string GEPNodeOffNameSuffix = ".off"; + FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) { return new PPCLoopPreIncPrep(TM); } -namespace { - - struct BucketElement { - BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {} - BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {} - - const SCEVConstant *Offset; - Instruction *Instr; - }; - - struct Bucket { - Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B), - Elements(1, BucketElement(I)) {} - - const SCEV *BaseSCEV; - SmallVector Elements; - }; - -} // end anonymous namespace - static bool IsPtrInBounds(Value *BasePtr) { Value *StrippedBasePtr = BasePtr; while (BitCastInst *BC = dyn_cast(StrippedBasePtr)) @@ -145,6 +172,14 @@ static bool IsPtrInBounds(Value *BasePtr) { return false; } +static std::string getInstrName(const Value *I, const std::string Suffix) { + assert(I && "Invalid paramater!"); + if (I->hasName()) + return (I->getName() + Suffix).str(); + else + return ""; +} + static Value *GetPointerOperand(Value *MemI) { if (LoadInst *LMemI = dyn_cast(MemI)) { return LMemI->getPointerOperand(); @@ -167,6 +202,7 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) { auto *DTWP = getAnalysisIfAvailable(); DT = DTWP ? &DTWP->getDomTree() : nullptr; PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); + ST = TM ? TM->getSubtargetImpl(F) : nullptr; bool MadeChange = false; @@ -177,10 +213,280 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) { return MadeChange; } +void PPCLoopPreIncPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV, + SmallVector &Buckets, + unsigned MaxCandidateNum) { + assert((MemI && GetPointerOperand(MemI)) && + "Candidate should be a memory instruction."); + assert(LSCEV && "Invalid SCEV for Ptr value."); + bool FoundBucket = false; + for (auto &B : Buckets) { + const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV); + if (const auto *CDiff = dyn_cast(Diff)) { + B.Elements.push_back(BucketElement(CDiff, MemI)); + FoundBucket = true; + break; + } + } + + if (!FoundBucket) { + if (Buckets.size() == MaxCandidateNum) + return; + Buckets.push_back(Bucket(LSCEV, MemI)); + } +} + +SmallVector PPCLoopPreIncPrep::collectCandidates( + Loop *L, + std::function isValidCandidate, + unsigned MaxCandidateNum) { + SmallVector Buckets; + for (const auto &BB : L->blocks()) + for (auto &J : *BB) { + Value *PtrValue; + Instruction *MemI; + + if (LoadInst *LMemI = dyn_cast(&J)) { + MemI = LMemI; + PtrValue = LMemI->getPointerOperand(); + } else if (StoreInst *SMemI = dyn_cast(&J)) { + MemI = SMemI; + PtrValue = SMemI->getPointerOperand(); + } else if (IntrinsicInst *IMemI = dyn_cast(&J)) { + if (IMemI->getIntrinsicID() == Intrinsic::prefetch) { + MemI = IMemI; + PtrValue = IMemI->getArgOperand(0); + } else continue; + } else continue; + + unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); + if (PtrAddrSpace) + continue; + + if (L->isLoopInvariant(PtrValue)) + continue; + + const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L); + const SCEVAddRecExpr *LARSCEV = dyn_cast(LSCEV); + if (!LARSCEV || LARSCEV->getLoop() != L) + continue; + + if (isValidCandidate(&J, PtrValue)) + addOneCandidate(MemI, LSCEV, Buckets, MaxCandidateNum); + } + return Buckets; +} + +// TODO: implement a more clever base choosing policy. +// Currently we always choose an exist load/store offset. This maybe lead to +// suboptimal code sequences. For example, for one DS chain with offsets +// {-32769, 2003, 2007, 2011}, we choose -32769 as base offset, and left disp +// for load/stores are {0, 34772, 34776, 34780}. Though each offset now is a +// multipler of 4, it cannot be represented by sint16. +bool PPCLoopPreIncPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) { + // We have a choice now of which instruction's memory operand we use as the + // base for the generated PHI. Always picking the first instruction in each + // bucket does not work well, specifically because that instruction might + // be a prefetch (and there are no pre-increment dcbt variants). Otherwise, + // the choice is somewhat arbitrary, because the backend will happily + // generate direct offsets from both the pre-incremented and + // post-incremented pointer values. Thus, we'll pick the first non-prefetch + // instruction in each bucket, and adjust the recurrence and other offsets + // accordingly. + for (int j = 0, je = BucketChain.Elements.size(); j != je; ++j) { + if (auto *II = dyn_cast(BucketChain.Elements[j].Instr)) + if (II->getIntrinsicID() == Intrinsic::prefetch) + continue; + + // If we'd otherwise pick the first element anyway, there's nothing to do. + if (j == 0) + break; + + // If our chosen element has no offset from the base pointer, there's + // nothing to do. + if (!BucketChain.Elements[j].Offset || + BucketChain.Elements[j].Offset->isZero()) + break; + + const SCEV *Offset = BucketChain.Elements[j].Offset; + BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset); + for (auto &E : BucketChain.Elements) { + if (E.Offset) + E.Offset = cast(SE->getMinusSCEV(E.Offset, Offset)); + else + E.Offset = cast(SE->getNegativeSCEV(Offset)); + } + + std::swap(BucketChain.Elements[j], BucketChain.Elements[0]); + break; + } + return true; +} + +bool PPCLoopPreIncPrep::rewriteLoadStores( + Loop *L, Bucket &BucketChain, SmallSet &BBChanged) { + bool MadeChange = false; + const SCEVAddRecExpr *BasePtrSCEV = + cast(BucketChain.BaseSCEV); + if (!BasePtrSCEV->isAffine()) + return MadeChange; + + LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n"); + + assert(BasePtrSCEV->getLoop() == L && "AddRec for the wrong loop?"); + + // The instruction corresponding to the Bucket's BaseSCEV must be the first + // in the vector of elements. + Instruction *MemI = BucketChain.Elements.begin()->Instr; + Value *BasePtr = GetPointerOperand(MemI); + assert(BasePtr && "No pointer operand"); + + Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext()); + Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(), + BasePtr->getType()->getPointerAddressSpace()); + + const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart(); + if (!SE->isLoopInvariant(BasePtrStartSCEV, L)) + return MadeChange; + + const SCEVConstant *BasePtrIncSCEV = + dyn_cast(BasePtrSCEV->getStepRecurrence(*SE)); + if (!BasePtrIncSCEV) + return MadeChange; + BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV); + if (!isSafeToExpand(BasePtrStartSCEV, *SE)) + return MadeChange; + + if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV)) + return MadeChange; + + LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n"); + + BasicBlock *Header = L->getHeader(); + unsigned HeaderLoopPredCount = pred_size(Header); + BasicBlock *LoopPredecessor = L->getLoopPredecessor(); + + PHINode *NewPHI = + PHINode::Create(I8PtrTy, HeaderLoopPredCount, + getInstrName(MemI, PHINodeNameSuffix), + Header->getFirstNonPHI()); + + SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart"); + Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy, + LoopPredecessor->getTerminator()); + + // Note that LoopPredecessor might occur in the predecessor list multiple + // times, and we need to add it the right number of times. + for (const auto &PI : predecessors(Header)) { + if (PI != LoopPredecessor) + continue; + + NewPHI->addIncoming(BasePtrStart, LoopPredecessor); + } + + Instruction *InsPoint = &*Header->getFirstInsertionPt(); + GetElementPtrInst *PtrInc = GetElementPtrInst::Create( + I8Ty, NewPHI, BasePtrIncSCEV->getValue(), + getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint); + PtrInc->setIsInBounds(IsPtrInBounds(BasePtr)); + for (const auto &PI : predecessors(Header)) { + if (PI == LoopPredecessor) + continue; + + NewPHI->addIncoming(PtrInc, PI); + } + + Instruction *NewBasePtr; + if (PtrInc->getType() != BasePtr->getType()) + NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(), + getInstrName(PtrInc, CastNodeNameSuffix), InsPoint); + else + NewBasePtr = PtrInc; + + if (Instruction *IDel = dyn_cast(BasePtr)) + BBChanged.insert(IDel->getParent()); + BasePtr->replaceAllUsesWith(NewBasePtr); + RecursivelyDeleteTriviallyDeadInstructions(BasePtr); + + // Keep track of the replacement pointer values we've inserted so that we + // don't generate more pointer values than necessary. + SmallPtrSet NewPtrs; + NewPtrs.insert(NewBasePtr); + + for (auto I = std::next(BucketChain.Elements.begin()), + IE = BucketChain.Elements.end(); I != IE; ++I) { + Value *Ptr = GetPointerOperand(I->Instr); + assert(Ptr && "No pointer operand"); + if (NewPtrs.count(Ptr)) + continue; + + Instruction *RealNewPtr; + if (!I->Offset || I->Offset->getValue()->isZero()) { + RealNewPtr = NewBasePtr; + } else { + Instruction *PtrIP = dyn_cast(Ptr); + if (PtrIP && isa(NewBasePtr) && + cast(NewBasePtr)->getParent() == PtrIP->getParent()) + PtrIP = nullptr; + else if (PtrIP && isa(PtrIP)) + PtrIP = &*PtrIP->getParent()->getFirstInsertionPt(); + else if (!PtrIP) + PtrIP = I->Instr; + + GetElementPtrInst *NewPtr = GetElementPtrInst::Create( + I8Ty, PtrInc, I->Offset->getValue(), + getInstrName(I->Instr, GEPNodeOffNameSuffix), PtrIP); + if (!PtrIP) + NewPtr->insertAfter(cast(PtrInc)); + NewPtr->setIsInBounds(IsPtrInBounds(Ptr)); + RealNewPtr = NewPtr; + } + + if (Instruction *IDel = dyn_cast(Ptr)) + BBChanged.insert(IDel->getParent()); + + Instruction *ReplNewPtr; + if (Ptr->getType() != RealNewPtr->getType()) { + ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(), + getInstrName(Ptr, CastNodeNameSuffix)); + ReplNewPtr->insertAfter(RealNewPtr); + } else + ReplNewPtr = RealNewPtr; + + Ptr->replaceAllUsesWith(ReplNewPtr); + RecursivelyDeleteTriviallyDeadInstructions(Ptr); + + NewPtrs.insert(RealNewPtr); + } + + MadeChange = true; + UpdFormChainRewritten++; + + return MadeChange; +} + +bool PPCLoopPreIncPrep::updateFormPrep(Loop *L, + SmallVector &Buckets) { + bool MadeChange = false; + if (Buckets.empty()) + return MadeChange; + SmallSet BBChanged; + for (auto &Bucket : Buckets) + // The base address of each bucket is transformed into a phi and the others + // are rewritten based on new base. + if (prepareBaseForUpdateFormChain(Bucket)) + MadeChange |= rewriteLoadStores(L, Bucket, BBChanged); + if (MadeChange) + for (auto &BB : L->blocks()) + if (BBChanged.count(BB)) + DeleteDeadPHIs(BB); + return MadeChange; +} + // In order to prepare for the pre-increment a PHI is added. // This function will check to see if that PHI already exists and will return -// true if it found an existing PHI with the same start and increment as the -// one we wanted to create. +// true if it found an existing PHI with the same start and increment as the +// one we wanted to create. bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI, const SCEV *BasePtrStartSCEV, const SCEVConstant *BasePtrIncSCEV) { @@ -216,10 +522,10 @@ bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI, continue; if (CurrentPHINode->getNumIncomingValues() == 2) { - if ( (CurrentPHINode->getIncomingBlock(0) == LatchBB && - CurrentPHINode->getIncomingBlock(1) == PredBB) || - (CurrentPHINode->getIncomingBlock(1) == LatchBB && - CurrentPHINode->getIncomingBlock(0) == PredBB) ) { + if ((CurrentPHINode->getIncomingBlock(0) == LatchBB && + CurrentPHINode->getIncomingBlock(1) == PredBB) || + (CurrentPHINode->getIncomingBlock(1) == LatchBB && + CurrentPHINode->getIncomingBlock(0) == PredBB)) { if (PHIBasePtrSCEV->getStart() == BasePtrStartSCEV && PHIBasePtrIncSCEV == BasePtrIncSCEV) { // The existing PHI (CurrentPHINode) has the same start and increment @@ -242,89 +548,6 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L << "\n"); - BasicBlock *Header = L->getHeader(); - - const PPCSubtarget *ST = - TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr; - - unsigned HeaderLoopPredCount = pred_size(Header); - - // Collect buckets of comparable addresses used by loads and stores. - SmallVector Buckets; - for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); - I != IE; ++I) { - for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); - J != JE; ++J) { - Value *PtrValue; - Instruction *MemI; - - if (LoadInst *LMemI = dyn_cast(J)) { - MemI = LMemI; - PtrValue = LMemI->getPointerOperand(); - } else if (StoreInst *SMemI = dyn_cast(J)) { - MemI = SMemI; - PtrValue = SMemI->getPointerOperand(); - } else if (IntrinsicInst *IMemI = dyn_cast(J)) { - if (IMemI->getIntrinsicID() == Intrinsic::prefetch) { - MemI = IMemI; - PtrValue = IMemI->getArgOperand(0); - } else continue; - } else continue; - - unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); - if (PtrAddrSpace) - continue; - - // There are no update forms for Altivec vector load/stores. - if (ST && ST->hasAltivec() && - PtrValue->getType()->getPointerElementType()->isVectorTy()) - continue; - - if (L->isLoopInvariant(PtrValue)) - continue; - - const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L); - if (const SCEVAddRecExpr *LARSCEV = dyn_cast(LSCEV)) { - if (LARSCEV->getLoop() != L) - continue; - // See getPreIndexedAddressParts, the displacement for LDU/STDU has to - // be 4's multiple (DS-form). For i64 loads/stores when the displacement - // fits in a 16-bit signed field but isn't a multiple of 4, it will be - // useless and possible to break some original well-form addressing mode - // to make this pre-inc prep for it. - if (PtrValue->getType()->getPointerElementType()->isIntegerTy(64)) { - if (const SCEVConstant *StepConst = - dyn_cast(LARSCEV->getStepRecurrence(*SE))) { - const APInt &ConstInt = StepConst->getValue()->getValue(); - if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0) - continue; - } - } - } else { - continue; - } - - bool FoundBucket = false; - for (auto &B : Buckets) { - const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV); - if (const auto *CDiff = dyn_cast(Diff)) { - B.Elements.push_back(BucketElement(CDiff, MemI)); - FoundBucket = true; - break; - } - } - - if (!FoundBucket) { - if (Buckets.size() == MaxVars) - return MadeChange; - Buckets.push_back(Bucket(LSCEV, MemI)); - } - } - } - - if (Buckets.empty()) - return MadeChange; - BasicBlock *LoopPredecessor = L->getLoopPredecessor(); // If there is no loop predecessor, or the loop predecessor's terminator // returns a value (which might contribute to determining the loop's @@ -335,191 +558,48 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (LoopPredecessor) MadeChange = true; } - if (!LoopPredecessor) + if (!LoopPredecessor) { + LLVM_DEBUG(dbgs() << "PIP fails since no predecessor for current loop.\n"); return MadeChange; - - LLVM_DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n"); - - SmallSet BBChanged; - for (unsigned i = 0, e = Buckets.size(); i != e; ++i) { - // The base address of each bucket is transformed into a phi and the others - // are rewritten as offsets of that variable. - - // We have a choice now of which instruction's memory operand we use as the - // base for the generated PHI. Always picking the first instruction in each - // bucket does not work well, specifically because that instruction might - // be a prefetch (and there are no pre-increment dcbt variants). Otherwise, - // the choice is somewhat arbitrary, because the backend will happily - // generate direct offsets from both the pre-incremented and - // post-incremented pointer values. Thus, we'll pick the first non-prefetch - // instruction in each bucket, and adjust the recurrence and other offsets - // accordingly. - for (int j = 0, je = Buckets[i].Elements.size(); j != je; ++j) { - if (auto *II = dyn_cast(Buckets[i].Elements[j].Instr)) - if (II->getIntrinsicID() == Intrinsic::prefetch) - continue; - - // If we'd otherwise pick the first element anyway, there's nothing to do. - if (j == 0) - break; - - // If our chosen element has no offset from the base pointer, there's - // nothing to do. - if (!Buckets[i].Elements[j].Offset || - Buckets[i].Elements[j].Offset->isZero()) - break; - - const SCEV *Offset = Buckets[i].Elements[j].Offset; - Buckets[i].BaseSCEV = SE->getAddExpr(Buckets[i].BaseSCEV, Offset); - for (auto &E : Buckets[i].Elements) { - if (E.Offset) - E.Offset = cast(SE->getMinusSCEV(E.Offset, Offset)); - else - E.Offset = cast(SE->getNegativeSCEV(Offset)); - } - - std::swap(Buckets[i].Elements[j], Buckets[i].Elements[0]); - break; - } - - const SCEVAddRecExpr *BasePtrSCEV = - cast(Buckets[i].BaseSCEV); - if (!BasePtrSCEV->isAffine()) - continue; - - LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n"); - assert(BasePtrSCEV->getLoop() == L && - "AddRec for the wrong loop?"); - - // The instruction corresponding to the Bucket's BaseSCEV must be the first - // in the vector of elements. - Instruction *MemI = Buckets[i].Elements.begin()->Instr; - Value *BasePtr = GetPointerOperand(MemI); - assert(BasePtr && "No pointer operand"); - - Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext()); - Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(), - BasePtr->getType()->getPointerAddressSpace()); - - const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart(); - if (!SE->isLoopInvariant(BasePtrStartSCEV, L)) - continue; - - const SCEVConstant *BasePtrIncSCEV = - dyn_cast(BasePtrSCEV->getStepRecurrence(*SE)); - if (!BasePtrIncSCEV) - continue; - BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV); - if (!isSafeToExpand(BasePtrStartSCEV, *SE)) - continue; - - LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n"); - - if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV)) - continue; - - PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount, - MemI->hasName() ? MemI->getName() + ".phi" : "", - Header->getFirstNonPHI()); - - SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart"); - Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy, - LoopPredecessor->getTerminator()); - - // Note that LoopPredecessor might occur in the predecessor list multiple - // times, and we need to add it the right number of times. - for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); - PI != PE; ++PI) { - if (*PI != LoopPredecessor) - continue; - - NewPHI->addIncoming(BasePtrStart, LoopPredecessor); - } - - Instruction *InsPoint = &*Header->getFirstInsertionPt(); - GetElementPtrInst *PtrInc = GetElementPtrInst::Create( - I8Ty, NewPHI, BasePtrIncSCEV->getValue(), - MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint); - PtrInc->setIsInBounds(IsPtrInBounds(BasePtr)); - for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); - PI != PE; ++PI) { - if (*PI == LoopPredecessor) - continue; - - NewPHI->addIncoming(PtrInc, *PI); - } - - Instruction *NewBasePtr; - if (PtrInc->getType() != BasePtr->getType()) - NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(), - PtrInc->hasName() ? PtrInc->getName() + ".cast" : "", InsPoint); - else - NewBasePtr = PtrInc; - - if (Instruction *IDel = dyn_cast(BasePtr)) - BBChanged.insert(IDel->getParent()); - BasePtr->replaceAllUsesWith(NewBasePtr); - RecursivelyDeleteTriviallyDeadInstructions(BasePtr); - - // Keep track of the replacement pointer values we've inserted so that we - // don't generate more pointer values than necessary. - SmallPtrSet NewPtrs; - NewPtrs.insert( NewBasePtr); - - for (auto I = std::next(Buckets[i].Elements.begin()), - IE = Buckets[i].Elements.end(); I != IE; ++I) { - Value *Ptr = GetPointerOperand(I->Instr); - assert(Ptr && "No pointer operand"); - if (NewPtrs.count(Ptr)) - continue; - - Instruction *RealNewPtr; - if (!I->Offset || I->Offset->getValue()->isZero()) { - RealNewPtr = NewBasePtr; - } else { - Instruction *PtrIP = dyn_cast(Ptr); - if (PtrIP && isa(NewBasePtr) && - cast(NewBasePtr)->getParent() == PtrIP->getParent()) - PtrIP = nullptr; - else if (isa(PtrIP)) - PtrIP = &*PtrIP->getParent()->getFirstInsertionPt(); - else if (!PtrIP) - PtrIP = I->Instr; - - GetElementPtrInst *NewPtr = GetElementPtrInst::Create( - I8Ty, PtrInc, I->Offset->getValue(), - I->Instr->hasName() ? I->Instr->getName() + ".off" : "", PtrIP); - if (!PtrIP) - NewPtr->insertAfter(cast(PtrInc)); - NewPtr->setIsInBounds(IsPtrInBounds(Ptr)); - RealNewPtr = NewPtr; - } - - if (Instruction *IDel = dyn_cast(Ptr)) - BBChanged.insert(IDel->getParent()); - - Instruction *ReplNewPtr; - if (Ptr->getType() != RealNewPtr->getType()) { - ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(), - Ptr->hasName() ? Ptr->getName() + ".cast" : ""); - ReplNewPtr->insertAfter(RealNewPtr); - } else - ReplNewPtr = RealNewPtr; - - Ptr->replaceAllUsesWith(ReplNewPtr); - RecursivelyDeleteTriviallyDeadInstructions(Ptr); - - NewPtrs.insert(RealNewPtr); - } - - MadeChange = true; } - for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); - I != IE; ++I) { - if (BBChanged.count(*I)) - DeleteDeadPHIs(*I); - } + // Check if a load/store has update form. This lambda is used by function + // collectCandidates which can collect candidates for types defined by lambda. + auto isUpdateFormCandidate = [&] (const Instruction *I, + const Value *PtrValue) { + assert((PtrValue && I) && "Invalid parameter!"); + // There are no update forms for Altivec vector load/stores. + if (ST && ST->hasAltivec() && + PtrValue->getType()->getPointerElementType()->isVectorTy()) + return false; + // See getPreIndexedAddressParts, the displacement for LDU/STDU has to + // be 4's multiple (DS-form). For i64 loads/stores when the displacement + // fits in a 16-bit signed field but isn't a multiple of 4, it will be + // useless and possible to break some original well-form addressing mode + // to make this pre-inc prep for it. + if (PtrValue->getType()->getPointerElementType()->isIntegerTy(64)) { + const SCEV *LSCEV = SE->getSCEVAtScope(const_cast(PtrValue), L); + const SCEVAddRecExpr *LARSCEV = dyn_cast(LSCEV); + if (!LARSCEV || LARSCEV->getLoop() != L) + return false; + if (const SCEVConstant *StepConst = + dyn_cast(LARSCEV->getStepRecurrence(*SE))) { + const APInt &ConstInt = StepConst->getValue()->getValue(); + if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0) + return false; + } + } + return true; + }; + + // Collect buckets of comparable addresses used by loads, stores and prefetch + // intrinsic for update form. + SmallVector UpdateFormBuckets = + collectCandidates(L, isUpdateFormCandidate, MaxVars); + + // Prepare for update form. + if (!UpdateFormBuckets.empty()) + MadeChange |= updateFormPrep(L, UpdateFormBuckets); return MadeChange; } diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 027e6bd1ba0..b6496f189a3 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -79,7 +79,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, } static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, - AsmPrinter &Printer, bool isDarwin) { + AsmPrinter &Printer, bool IsDarwin) { MCContext &Ctx = Printer.OutContext; MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; @@ -137,10 +137,10 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, // Add ha16() / lo16() markers if required. switch (access) { case PPCII::MO_LO: - Expr = PPCMCExpr::createLo(Expr, isDarwin, Ctx); + Expr = PPCMCExpr::createLo(Expr, IsDarwin, Ctx); break; case PPCII::MO_HA: - Expr = PPCMCExpr::createHa(Expr, isDarwin, Ctx); + Expr = PPCMCExpr::createHa(Expr, IsDarwin, Ctx); break; } @@ -148,20 +148,20 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, } void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - AsmPrinter &AP, bool isDarwin) { + AsmPrinter &AP, bool IsDarwin) { OutMI.setOpcode(MI->getOpcode()); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MCOperand MCOp; if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, AP, - isDarwin)) + IsDarwin)) OutMI.addOperand(MCOp); } } bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO, MCOperand &OutMO, AsmPrinter &AP, - bool isDarwin) { + bool IsDarwin) { switch (MO.getType()) { default: llvm_unreachable("unknown operand type"); @@ -181,17 +181,20 @@ bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO, return true; case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: - OutMO = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, isDarwin); + OutMO = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, IsDarwin); return true; case MachineOperand::MO_JumpTableIndex: - OutMO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, isDarwin); + OutMO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, IsDarwin); return true; case MachineOperand::MO_ConstantPoolIndex: - OutMO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, isDarwin); + OutMO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, IsDarwin); return true; case MachineOperand::MO_BlockAddress: OutMO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP, - isDarwin); + IsDarwin); + return true; + case MachineOperand::MO_MCSymbol: + OutMO = GetSymbolRef(MO, MO.getMCSymbol(), AP, IsDarwin); return true; case MachineOperand::MO_RegisterMask: return false; diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp index 446246358e9..ac8ac060f46 100644 --- a/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -148,8 +148,8 @@ static MachineInstr *getVRegDefOrNull(MachineOperand *Op, if (!Op->isReg()) return nullptr; - unsigned Reg = Op->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + Register Reg = Op->getReg(); + if (!Register::isVirtualRegister(Reg)) return nullptr; return MRI->getVRegDef(Reg); @@ -344,8 +344,7 @@ bool PPCMIPeephole::simplifyCode(void) { unsigned TrueReg2 = TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI); - if (TrueReg1 == TrueReg2 - && TargetRegisterInfo::isVirtualRegister(TrueReg1)) { + if (TrueReg1 == TrueReg2 && Register::isVirtualRegister(TrueReg1)) { MachineInstr *DefMI = MRI->getVRegDef(TrueReg1); unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0; @@ -358,7 +357,7 @@ bool PPCMIPeephole::simplifyCode(void) { return false; unsigned DefReg = TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI); - if (TargetRegisterInfo::isVirtualRegister(DefReg)) { + if (Register::isVirtualRegister(DefReg)) { MachineInstr *LoadMI = MRI->getVRegDef(DefReg); if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX) return true; @@ -444,7 +443,7 @@ bool PPCMIPeephole::simplifyCode(void) { unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2; unsigned TrueReg = TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI); - if (!TargetRegisterInfo::isVirtualRegister(TrueReg)) + if (!Register::isVirtualRegister(TrueReg)) break; MachineInstr *DefMI = MRI->getVRegDef(TrueReg); if (!DefMI) @@ -453,8 +452,8 @@ bool PPCMIPeephole::simplifyCode(void) { auto isConvertOfSplat = [=]() -> bool { if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS) return false; - unsigned ConvReg = DefMI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(ConvReg)) + Register ConvReg = DefMI->getOperand(1).getReg(); + if (!Register::isVirtualRegister(ConvReg)) return false; MachineInstr *Splt = MRI->getVRegDef(ConvReg); return Splt && (Splt->getOpcode() == PPC::LXVWSX || @@ -481,9 +480,9 @@ bool PPCMIPeephole::simplifyCode(void) { // Splat fed by a shift. Usually when we align value to splat into // vector element zero. if (DefOpcode == PPC::XXSLDWI) { - unsigned ShiftRes = DefMI->getOperand(0).getReg(); - unsigned ShiftOp1 = DefMI->getOperand(1).getReg(); - unsigned ShiftOp2 = DefMI->getOperand(2).getReg(); + Register ShiftRes = DefMI->getOperand(0).getReg(); + Register ShiftOp1 = DefMI->getOperand(1).getReg(); + Register ShiftOp2 = DefMI->getOperand(2).getReg(); unsigned ShiftImm = DefMI->getOperand(3).getImm(); unsigned SplatImm = MI.getOperand(2).getImm(); if (ShiftOp1 == ShiftOp2) { @@ -507,7 +506,7 @@ bool PPCMIPeephole::simplifyCode(void) { // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant. unsigned TrueReg = TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI); - if (!TargetRegisterInfo::isVirtualRegister(TrueReg)) + if (!Register::isVirtualRegister(TrueReg)) break; MachineInstr *DefMI = MRI->getVRegDef(TrueReg); @@ -518,8 +517,8 @@ bool PPCMIPeephole::simplifyCode(void) { TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI); unsigned DefsReg2 = TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI); - if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) || - !TargetRegisterInfo::isVirtualRegister(DefsReg2)) + if (!Register::isVirtualRegister(DefsReg1) || + !Register::isVirtualRegister(DefsReg2)) break; MachineInstr *P1 = MRI->getVRegDef(DefsReg1); MachineInstr *P2 = MRI->getVRegDef(DefsReg2); @@ -533,8 +532,8 @@ bool PPCMIPeephole::simplifyCode(void) { if (RoundInstr->getOpcode() == PPC::FRSP && MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) { Simplified = true; - unsigned ConvReg1 = RoundInstr->getOperand(1).getReg(); - unsigned FRSPDefines = RoundInstr->getOperand(0).getReg(); + Register ConvReg1 = RoundInstr->getOperand(1).getReg(); + Register FRSPDefines = RoundInstr->getOperand(0).getReg(); MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines)); for (int i = 0, e = Use.getNumOperands(); i < e; ++i) if (Use.getOperand(i).isReg() && @@ -566,8 +565,8 @@ bool PPCMIPeephole::simplifyCode(void) { case PPC::EXTSH8: case PPC::EXTSH8_32_64: { if (!EnableSExtElimination) break; - unsigned NarrowReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(NarrowReg)) + Register NarrowReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(NarrowReg)) break; MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg); @@ -610,8 +609,8 @@ bool PPCMIPeephole::simplifyCode(void) { case PPC::EXTSW_32: case PPC::EXTSW_32_64: { if (!EnableSExtElimination) break; - unsigned NarrowReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(NarrowReg)) + Register NarrowReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(NarrowReg)) break; MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg); @@ -652,8 +651,8 @@ bool PPCMIPeephole::simplifyCode(void) { // We can eliminate EXTSW if the input is known to be already // sign-extended. LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n"); - unsigned TmpReg = - MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass); + Register TmpReg = + MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass); BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF), TmpReg); BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG), @@ -679,8 +678,8 @@ bool PPCMIPeephole::simplifyCode(void) { if (MI.getOperand(2).getImm() != 0) break; - unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) break; MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); @@ -695,8 +694,8 @@ bool PPCMIPeephole::simplifyCode(void) { SrcMI = SubRegMI; if (SubRegMI->getOpcode() == PPC::COPY) { - unsigned CopyReg = SubRegMI->getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(CopyReg)) + Register CopyReg = SubRegMI->getOperand(1).getReg(); + if (Register::isVirtualRegister(CopyReg)) SrcMI = MRI->getVRegDef(CopyReg); } @@ -757,7 +756,7 @@ bool PPCMIPeephole::simplifyCode(void) { break; // We don't have an ADD fed by LI's that can be transformed // Now we know that Op1 is the PHI node and Op2 is the dominator - unsigned DominatorReg = Op2.getReg(); + Register DominatorReg = Op2.getReg(); const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8 ? &PPC::G8RC_and_G8RC_NOX0RegClass @@ -927,7 +926,7 @@ static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1, } else if (Inst->isFullCopy()) NextReg = Inst->getOperand(1).getReg(); - if (NextReg == SrcReg || !TargetRegisterInfo::isVirtualRegister(NextReg)) + if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg)) break; SrcReg = NextReg; } @@ -949,9 +948,8 @@ static bool eligibleForCompareElimination(MachineBasicBlock &MBB, (*BII).getOpcode() == PPC::BCC && (*BII).getOperand(1).isReg()) { // We optimize only if the condition code is used only by one BCC. - unsigned CndReg = (*BII).getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(CndReg) || - !MRI->hasOneNonDBGUse(CndReg)) + Register CndReg = (*BII).getOperand(1).getReg(); + if (!Register::isVirtualRegister(CndReg) || !MRI->hasOneNonDBGUse(CndReg)) return false; MachineInstr *CMPI = MRI->getVRegDef(CndReg); @@ -961,7 +959,7 @@ static bool eligibleForCompareElimination(MachineBasicBlock &MBB, // We skip this BB if a physical register is used in comparison. for (MachineOperand &MO : CMPI->operands()) - if (MO.isReg() && !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (MO.isReg() && !Register::isVirtualRegister(MO.getReg())) return false; return true; @@ -1271,8 +1269,8 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) { // We touch up the compare instruction in MBB2 and move it to // a previous BB to handle partially redundant case. if (SwapOperands) { - unsigned Op1 = CMPI2->getOperand(1).getReg(); - unsigned Op2 = CMPI2->getOperand(2).getReg(); + Register Op1 = CMPI2->getOperand(1).getReg(); + Register Op2 = CMPI2->getOperand(2).getReg(); CMPI2->getOperand(1).setReg(Op2); CMPI2->getOperand(2).setReg(Op1); } @@ -1295,7 +1293,7 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) { MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2)); DebugLoc DL = CMPI2->getDebugLoc(); - unsigned NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass); + Register NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass); BuildMI(MBB2, MBB2.begin(), DL, TII->get(PPC::PHI), NewVReg) .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1) @@ -1334,8 +1332,8 @@ bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) { if (MI.getOpcode() != PPC::RLDICR) return false; - unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) return false; MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); @@ -1414,8 +1412,8 @@ bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI, if (SHMI + MEMI != 63) return false; - unsigned SrcReg = MI.getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + Register SrcReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(SrcReg)) return false; MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); @@ -1428,6 +1426,12 @@ bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI, if (!MRI->hasOneNonDBGUse(SrcReg)) return false; + assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands"); + assert(SrcMI->getOperand(1).isReg() && + "EXTSW's second operand should be a register"); + if (!Register::isVirtualRegister(SrcMI->getOperand(1).getReg())) + return false; + LLVM_DEBUG(dbgs() << "Combining pair: "); LLVM_DEBUG(SrcMI->dump()); LLVM_DEBUG(MI.dump()); diff --git a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp index d83c9227680..b1c0433641d 100644 --- a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -57,6 +57,109 @@ namespace { MachineFunctionProperties::Property::NoVRegs); } + // This function removes any redundant load immediates. It has two level + // loops - The outer loop finds the load immediates BBI that could be used + // to replace following redundancy. The inner loop scans instructions that + // after BBI to find redundancy and update kill/dead flags accordingly. If + // AfterBBI is the same as BBI, it is redundant, otherwise any instructions + // that modify the def register of BBI would break the scanning. + // DeadOrKillToUnset is a pointer to the previous operand that had the + // kill/dead flag set. It keeps track of the def register of BBI, the use + // registers of AfterBBIs and the def registers of AfterBBIs. + bool removeRedundantLIs(MachineBasicBlock &MBB, + const TargetRegisterInfo *TRI) { + LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n"; + MBB.dump(); dbgs() << "\n"); + + DenseSet InstrsToErase; + for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { + // Skip load immediate that is marked to be erased later because it + // cannot be used to replace any other instructions. + if (InstrsToErase.find(&*BBI) != InstrsToErase.end()) + continue; + // Skip non-load immediate. + unsigned Opc = BBI->getOpcode(); + if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS && + Opc != PPC::LIS8) + continue; + // Skip load immediate, where the operand is a relocation (e.g., $r3 = + // LI target-flags(ppc-lo) %const.0). + if (!BBI->getOperand(1).isImm()) + continue; + assert(BBI->getOperand(0).isReg() && + "Expected a register for the first operand"); + + LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump();); + + Register Reg = BBI->getOperand(0).getReg(); + int64_t Imm = BBI->getOperand(1).getImm(); + MachineOperand *DeadOrKillToUnset = nullptr; + if (BBI->getOperand(0).isDead()) { + DeadOrKillToUnset = &BBI->getOperand(0); + LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset + << " from load immediate " << *BBI + << " is a unsetting candidate\n"); + } + // This loop scans instructions after BBI to see if there is any + // redundant load immediate. + for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end(); + ++AfterBBI) { + // Track the operand that kill Reg. We would unset the kill flag of + // the operand if there is a following redundant load immediate. + int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI); + if (KillIdx != -1) { + assert(!DeadOrKillToUnset && "Shouldn't kill same register twice"); + DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx); + LLVM_DEBUG(dbgs() + << " Kill flag of " << *DeadOrKillToUnset << " from " + << *AfterBBI << " is a unsetting candidate\n"); + } + + if (!AfterBBI->modifiesRegister(Reg, TRI)) + continue; + // Finish scanning because Reg is overwritten by a non-load + // instruction. + if (AfterBBI->getOpcode() != Opc) + break; + assert(AfterBBI->getOperand(0).isReg() && + "Expected a register for the first operand"); + // Finish scanning because Reg is overwritten by a relocation or a + // different value. + if (!AfterBBI->getOperand(1).isImm() || + AfterBBI->getOperand(1).getImm() != Imm) + break; + + // It loads same immediate value to the same Reg, which is redundant. + // We would unset kill flag in previous Reg usage to extend live range + // of Reg first, then remove the redundancy. + if (DeadOrKillToUnset) { + LLVM_DEBUG(dbgs() + << " Unset dead/kill flag of " << *DeadOrKillToUnset + << " from " << *DeadOrKillToUnset->getParent()); + if (DeadOrKillToUnset->isDef()) + DeadOrKillToUnset->setIsDead(false); + else + DeadOrKillToUnset->setIsKill(false); + } + DeadOrKillToUnset = + AfterBBI->findRegisterDefOperand(Reg, true, true, TRI); + if (DeadOrKillToUnset) + LLVM_DEBUG(dbgs() + << " Dead flag of " << *DeadOrKillToUnset << " from " + << *AfterBBI << " is a unsetting candidate\n"); + InstrsToErase.insert(&*AfterBBI); + LLVM_DEBUG(dbgs() << " Remove redundant load immediate: "; + AfterBBI->dump()); + } + } + + for (MachineInstr *MI : InstrsToErase) { + MI->eraseFromParent(); + } + NumRemovedInPreEmit += InstrsToErase.size(); + return !InstrsToErase.empty(); + } + bool runOnMachineFunction(MachineFunction &MF) override { if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) return false; @@ -65,6 +168,7 @@ namespace { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); SmallVector InstrsToErase; for (MachineBasicBlock &MBB : MF) { + Changed |= removeRedundantLIs(MBB, TRI); for (MachineInstr &MI : MBB) { unsigned Opc = MI.getOpcode(); // Detect self copies - these can result from running AADB. @@ -111,7 +215,7 @@ namespace { if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn) continue; MachineInstr *CRSetMI = nullptr; - unsigned CRBit = Br->getOperand(0).getReg(); + Register CRBit = Br->getOperand(0).getReg(); unsigned CRReg = getCRFromCRBit(CRBit); bool SeenUse = false; MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend(); diff --git a/lib/Target/PowerPC/PPCQPXLoadSplat.cpp b/lib/Target/PowerPC/PPCQPXLoadSplat.cpp index 3a83cc27439..6e904264382 100644 --- a/lib/Target/PowerPC/PPCQPXLoadSplat.cpp +++ b/lib/Target/PowerPC/PPCQPXLoadSplat.cpp @@ -79,8 +79,8 @@ bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) { for (auto SI = Splats.begin(); SI != Splats.end();) { MachineInstr *SMI = *SI; - unsigned SplatReg = SMI->getOperand(0).getReg(); - unsigned SrcReg = SMI->getOperand(1).getReg(); + Register SplatReg = SMI->getOperand(0).getReg(); + Register SrcReg = SMI->getOperand(1).getReg(); if (MI->modifiesRegister(SrcReg, TRI)) { switch (MI->getOpcode()) { @@ -102,7 +102,7 @@ bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) { // the QPX splat source register. unsigned SubRegIndex = TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg()); - unsigned SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex); + Register SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex); // Substitute both the explicit defined register, and also the // implicit def of the containing QPX register. diff --git a/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/lib/Target/PowerPC/PPCReduceCRLogicals.cpp index 8eaa6dfe2bf..3b71ed219c1 100644 --- a/lib/Target/PowerPC/PPCReduceCRLogicals.cpp +++ b/lib/Target/PowerPC/PPCReduceCRLogicals.cpp @@ -381,10 +381,10 @@ private: const MachineBranchProbabilityInfo *MBPI; // A vector to contain all the CR logical operations - std::vector AllCRLogicalOps; + SmallVector AllCRLogicalOps; void initialize(MachineFunction &MFParm); void collectCRLogicals(); - bool handleCROp(CRLogicalOpInfo &CRI); + bool handleCROp(unsigned Idx); bool splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI); static bool isCRLogical(MachineInstr &MI) { unsigned Opc = MI.getOpcode(); @@ -398,7 +398,7 @@ private: // Not using a range-based for loop here as the vector may grow while being // operated on. for (unsigned i = 0; i < AllCRLogicalOps.size(); i++) - Changed |= handleCROp(AllCRLogicalOps[i]); + Changed |= handleCROp(i); return Changed; } @@ -535,15 +535,15 @@ MachineInstr *PPCReduceCRLogicals::lookThroughCRCopy(unsigned Reg, unsigned &Subreg, MachineInstr *&CpDef) { Subreg = -1; - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return nullptr; MachineInstr *Copy = MRI->getVRegDef(Reg); CpDef = Copy; if (!Copy->isCopy()) return Copy; - unsigned CopySrc = Copy->getOperand(1).getReg(); + Register CopySrc = Copy->getOperand(1).getReg(); Subreg = Copy->getOperand(1).getSubReg(); - if (!TargetRegisterInfo::isVirtualRegister(CopySrc)) { + if (!Register::isVirtualRegister(CopySrc)) { const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); // Set the Subreg if (CopySrc == PPC::CR0EQ || CopySrc == PPC::CR6EQ) @@ -578,10 +578,11 @@ void PPCReduceCRLogicals::initialize(MachineFunction &MFParam) { /// a unary CR logical might be used to change the condition code on a /// comparison feeding it. A nullary CR logical might simply be removable /// if the user of the bit it [un]sets can be transformed. -bool PPCReduceCRLogicals::handleCROp(CRLogicalOpInfo &CRI) { +bool PPCReduceCRLogicals::handleCROp(unsigned Idx) { // We can definitely split a block on the inputs to a binary CR operation // whose defs and (single) use are within the same block. bool Changed = false; + CRLogicalOpInfo CRI = AllCRLogicalOps[Idx]; if (CRI.IsBinary && CRI.ContainedInBlock && CRI.SingleUse && CRI.FeedsBR && CRI.DefsSingleUse) { Changed = splitBlockOnBinaryCROp(CRI); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 12554ea8d07..9ec26a19bda 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -325,13 +325,13 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { bool IsPositionIndependent = TM.isPositionIndependent(); if (hasBasePointer(MF)) { - if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent) + if (Subtarget.is32BitELFABI() && IsPositionIndependent) markSuperRegs(Reserved, PPC::R29); else markSuperRegs(Reserved, PPC::R30); } - if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent) + if (Subtarget.is32BitELFABI() && IsPositionIndependent) markSuperRegs(Reserved, PPC::R30); // Reserve Altivec registers when Altivec is unavailable. @@ -391,7 +391,7 @@ bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) co bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const { - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); + assert(Register::isPhysicalRegister(PhysReg)); const PPCSubtarget &Subtarget = MF.getSubtarget(); const MachineFrameInfo &MFI = MF.getFrameInfo(); if (!TM.isPPC64()) @@ -425,7 +425,6 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, case PPC::G8RC_NOX0RegClassID: case PPC::GPRC_NOR0RegClassID: case PPC::SPERCRegClassID: - case PPC::SPE4RCRegClassID: case PPC::G8RCRegClassID: case PPC::GPRCRegClassID: { unsigned FP = TFI->hasFP(MF) ? 1 : 0; @@ -527,7 +526,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { // Fortunately, a frame greater than 32K is rare. const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) { if (LP64) @@ -549,7 +548,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { } bool KillNegSizeReg = MI.getOperand(1).isKill(); - unsigned NegSizeReg = MI.getOperand(1).getReg(); + Register NegSizeReg = MI.getOperand(1).getReg(); // Grow the stack and update the stack pointer link, then determine the // address of new allocated space. @@ -655,8 +654,8 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); - unsigned SrcReg = MI.getOperand(0).getReg(); + Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + Register SrcReg = MI.getOperand(0).getReg(); // We need to store the CR in the low 4-bits of the saved value. First, issue // an MFOCRF to save all of the CRBits and, if needed, kill the SrcReg. @@ -700,8 +699,8 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); - unsigned DestReg = MI.getOperand(0).getReg(); + Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + Register DestReg = MI.getOperand(0).getReg(); assert(MI.definesRegister(DestReg) && "RESTORE_CR does not define its destination"); @@ -744,8 +743,8 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); - unsigned SrcReg = MI.getOperand(0).getReg(); + Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + Register SrcReg = MI.getOperand(0).getReg(); // Search up the BB to find the definition of the CR bit. MachineBasicBlock::reverse_iterator Ins; @@ -823,8 +822,8 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II, const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); - unsigned DestReg = MI.getOperand(0).getReg(); + Register Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + Register DestReg = MI.getOperand(0).getReg(); assert(MI.definesRegister(DestReg) && "RESTORE_CRBIT does not define its destination"); @@ -833,7 +832,7 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II, BuildMI(MBB, II, dl, TII.get(TargetOpcode::IMPLICIT_DEF), DestReg); - unsigned RegO = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + Register RegO = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), RegO) .addReg(getCRFromCRBit(DestReg)); @@ -870,8 +869,8 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II, DebugLoc dl = MI.getDebugLoc(); const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC); - unsigned SrcReg = MI.getOperand(0).getReg(); + Register Reg = MF.getRegInfo().createVirtualRegister(GPRC); + Register SrcReg = MI.getOperand(0).getReg(); BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg) .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); @@ -896,8 +895,8 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II, DebugLoc dl = MI.getDebugLoc(); const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC); - unsigned DestReg = MI.getOperand(0).getReg(); + Register Reg = MF.getRegInfo().createVirtualRegister(GPRC); + Register DestReg = MI.getOperand(0).getReg(); assert(MI.definesRegister(DestReg) && "RESTORE_VRSAVE does not define its destination"); @@ -1128,7 +1127,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, OperandBase = OffsetOperandNo; } - unsigned StackReg = MI.getOperand(FIOperandNum).getReg(); + Register StackReg = MI.getOperand(FIOperandNum).getReg(); MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true); } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index af0dff6347a..4719e947b17 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -253,15 +253,14 @@ def RM: PPCReg<"**ROUNDING MODE**">; /// Register classes // Allocate volatiles first // then nonvolatiles in reverse order since stmw/lmw save from rN to r31 -def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12), - (sequence "R%u", 30, 13), - R31, R0, R1, FP, BP)> { +def GPRC : RegisterClass<"PPC", [i32,f32], 32, (add (sequence "R%u", 2, 12), + (sequence "R%u", 30, 13), + R31, R0, R1, FP, BP)> { // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so // put it at the end of the list. let AltOrders = [(add (sub GPRC, R2), R2)]; let AltOrderSelect = [{ - const PPCSubtarget &S = MF.getSubtarget(); - return S.isPPC64() && S.isSVR4ABI(); + return MF.getSubtarget().is64BitELFABI(); }]; } @@ -272,21 +271,19 @@ def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12), // put it at the end of the list. let AltOrders = [(add (sub G8RC, X2), X2)]; let AltOrderSelect = [{ - const PPCSubtarget &S = MF.getSubtarget(); - return S.isPPC64() && S.isSVR4ABI(); + return MF.getSubtarget().is64BitELFABI(); }]; } // For some instructions r0 is special (representing the value 0 instead of // the value in the r0 register), and we use these register subclasses to // prevent r0 from being allocated for use by those instructions. -def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)> { +def GPRC_NOR0 : RegisterClass<"PPC", [i32,f32], 32, (add (sub GPRC, R0), ZERO)> { // On non-Darwin PPC64 systems, R2 can be allocated, but must be restored, so // put it at the end of the list. let AltOrders = [(add (sub GPRC_NOR0, R2), R2)]; let AltOrderSelect = [{ - const PPCSubtarget &S = MF.getSubtarget(); - return S.isPPC64() && S.isSVR4ABI(); + return MF.getSubtarget().is64BitELFABI(); }]; } @@ -295,8 +292,7 @@ def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)> { // put it at the end of the list. let AltOrders = [(add (sub G8RC_NOX0, X2), X2)]; let AltOrderSelect = [{ - const PPCSubtarget &S = MF.getSubtarget(); - return S.isPPC64() && S.isSVR4ABI(); + return MF.getSubtarget().is64BitELFABI(); }]; } @@ -304,8 +300,6 @@ def SPERC : RegisterClass<"PPC", [f64], 64, (add (sequence "S%u", 2, 12), (sequence "S%u", 30, 13), S31, S0, S1)>; -def SPE4RC : RegisterClass<"PPC", [f32], 32, (add GPRC)>; - // Allocate volatiles first, then non-volatiles in reverse order. With the SVR4 // ABI the size of the Floating-point register save area is determined by the // allocated non-volatile register with the lowest register number, as FP diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 6aa7528634d..10568ed4b65 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -60,7 +60,7 @@ PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU, InstrInfo(*this), TLInfo(TM, *this) {} void PPCSubtarget::initializeEnvironment() { - StackAlignment = 16; + StackAlignment = Align(16); DarwinDirective = PPC::DIR_NONE; HasMFOCRF = false; Has64BitSupport = false; @@ -145,7 +145,8 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (isDarwin()) HasLazyResolverStubs = true; - if (TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD() || + if ((TargetTriple.isOSFreeBSD() && TargetTriple.getOSMajorVersion() >= 13) || + TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD() || TargetTriple.isMusl()) SecurePlt = true; @@ -228,18 +229,13 @@ bool PPCSubtarget::enableSubRegLiveness() const { return UseSubRegLiveness; } -unsigned char -PPCSubtarget::classifyGlobalReference(const GlobalValue *GV) const { - // Note that currently we don't generate non-pic references. - // If a caller wants that, this will have to be updated. - +bool PPCSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const { // Large code model always uses the TOC even for local symbols. if (TM.getCodeModel() == CodeModel::Large) - return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG; - + return true; if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) - return PPCII::MO_PIC_FLAG; - return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG; + return false; + return true; } bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); } diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 55fec1cb6d9..d96c2893aee 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -78,7 +78,7 @@ protected: /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. - unsigned StackAlignment; + Align StackAlignment; /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; @@ -166,7 +166,7 @@ public: /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. - unsigned getStackAlignment() const { return StackAlignment; } + Align getStackAlignment() const { return StackAlignment; } /// getDarwinDirective - Returns the -m directive specified for the cpu. /// @@ -210,7 +210,11 @@ public: /// instructions, regardless of whether we are in 32-bit or 64-bit mode. bool has64BitSupport() const { return Has64BitSupport; } // useSoftFloat - Return true if soft-float option is turned on. - bool useSoftFloat() const { return !HasHardFloat; } + bool useSoftFloat() const { + if (isAIXABI() && !HasHardFloat) + report_fatal_error("soft-float is not yet supported on AIX."); + return !HasHardFloat; + } /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit /// registers in 32-bit mode when possible. This can only true if @@ -277,11 +281,11 @@ public: bool hasDirectMove() const { return HasDirectMove; } bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; } - unsigned getPlatformStackAlignment() const { + Align getPlatformStackAlignment() const { if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned()) - return 32; + return Align(32); - return 16; + return Align(16); } // DarwinABI has a 224-byte red zone. PPC32 SVR4ABI(Non-DarwinABI) has no @@ -316,6 +320,9 @@ public: bool isSVR4ABI() const { return !isDarwinABI() && !isAIXABI(); } bool isELFv2ABI() const; + bool is64BitELFABI() const { return isSVR4ABI() && isPPC64(); } + bool is32BitELFABI() const { return isSVR4ABI() && !isPPC64(); } + /// Originally, this function return hasISEL(). Now we always enable it, /// but may expand the ISEL instruction later. bool enableEarlyIfConversion() const override { return true; } @@ -337,9 +344,8 @@ public: bool enableSubRegLiveness() const override; - /// classifyGlobalReference - Classify a global variable reference for the - /// current subtarget accourding to how we should reference it. - unsigned char classifyGlobalReference(const GlobalValue *GV) const; + /// True if the GV will be accessed via an indirect symbol. + bool isGVIndirectSymbol(const GlobalValue *GV) const; bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; } }; diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp index fb826c4a32f..8f313d9d01c 100644 --- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -74,8 +74,8 @@ protected: LLVM_DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << MI); - unsigned OutReg = MI.getOperand(0).getReg(); - unsigned InReg = MI.getOperand(1).getReg(); + Register OutReg = MI.getOperand(0).getReg(); + Register InReg = MI.getOperand(1).getReg(); DebugLoc DL = MI.getDebugLoc(); unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3; unsigned Opc1, Opc2; diff --git a/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/lib/Target/PowerPC/PPCTOCRegDeps.cpp index 3eb0569fb95..895ae674442 100644 --- a/lib/Target/PowerPC/PPCTOCRegDeps.cpp +++ b/lib/Target/PowerPC/PPCTOCRegDeps.cpp @@ -95,7 +95,8 @@ namespace { protected: bool hasTOCLoReloc(const MachineInstr &MI) { if (MI.getOpcode() == PPC::LDtocL || - MI.getOpcode() == PPC::ADDItocL) + MI.getOpcode() == PPC::ADDItocL || + MI.getOpcode() == PPC::LWZtocL) return true; for (const MachineOperand &MO : MI.operands()) { @@ -109,11 +110,15 @@ protected: bool processBlock(MachineBasicBlock &MBB) { bool Changed = false; + const bool isPPC64 = + MBB.getParent()->getSubtarget().isPPC64(); + const unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2; + for (auto &MI : MBB) { if (!hasTOCLoReloc(MI)) continue; - MI.addOperand(MachineOperand::CreateReg(PPC::X2, + MI.addOperand(MachineOperand::CreateReg(TOCReg, false /*IsDef*/, true /*IsImp*/)); Changed = true; diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index ce00f848dd7..abefee8b339 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -93,7 +93,7 @@ EnableMachineCombinerPass("ppc-machine-combiner", static cl::opt ReduceCRLogical("ppc-reduce-cr-logicals", cl::desc("Expand eligible cr-logical binary ops to branches"), - cl::init(false), cl::Hidden); + cl::init(true), cl::Hidden); extern "C" void LLVMInitializePowerPCTarget() { // Register the targets RegisterTargetMachine A(getThePPC32Target()); @@ -185,12 +185,13 @@ static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, } static std::unique_ptr createTLOF(const Triple &TT) { - // If it isn't a Mach-O file then it's going to be a linux ELF - // object file. if (TT.isOSDarwin()) - return llvm::make_unique(); + return std::make_unique(); - return llvm::make_unique(); + if (TT.isOSAIX()) + return std::make_unique(); + + return std::make_unique(); } static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, @@ -248,10 +249,19 @@ static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT, report_fatal_error("Target does not support the kernel CodeModel", false); return *CM; } - if (!TT.isOSDarwin() && !JIT && - (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le)) - return CodeModel::Medium; - return CodeModel::Small; + + if (JIT) + return CodeModel::Small; + if (TT.isOSAIX()) + return CodeModel::Small; + + assert(TT.isOSBinFormatELF() && "All remaining PPC OSes are ELF based."); + + if (TT.isArch32Bit()) + return CodeModel::Small; + + assert(TT.isArch64Bit() && "Unsupported PPC architecture."); + return CodeModel::Medium; } @@ -259,8 +269,8 @@ static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) { const PPCSubtarget &ST = C->MF->getSubtarget(); ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, ST.usePPCPreRASchedStrategy() ? - llvm::make_unique(C) : - llvm::make_unique(C)); + std::make_unique(C) : + std::make_unique(C)); // add DAG Mutations here. DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); return DAG; @@ -271,8 +281,8 @@ static ScheduleDAGInstrs *createPPCPostMachineScheduler( const PPCSubtarget &ST = C->MF->getSubtarget(); ScheduleDAGMI *DAG = new ScheduleDAGMI(C, ST.usePPCPostRASchedStrategy() ? - llvm::make_unique(C) : - llvm::make_unique(C), true); + std::make_unique(C) : + std::make_unique(C), true); // add DAG Mutations here. return DAG; } @@ -328,7 +338,7 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique( + I = std::make_unique( TargetTriple, CPU, // FIXME: It would be good to have the subtarget additions here // not necessary. Anything that turns them on/off (overrides) ends diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index ff3dfbfaca0..f51300c656a 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -594,10 +594,37 @@ bool PPCTTIImpl::enableInterleavedAccessVectorization() { return true; } -unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) { - if (Vector && !ST->hasAltivec() && !ST->hasQPX()) - return 0; - return ST->hasVSX() ? 64 : 32; +unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const { + assert(ClassID == GPRRC || ClassID == FPRRC || + ClassID == VRRC || ClassID == VSXRC); + if (ST->hasVSX()) { + assert(ClassID == GPRRC || ClassID == VSXRC); + return ClassID == GPRRC ? 32 : 64; + } + assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC); + return 32; +} + +unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const { + if (Vector) + return ST->hasVSX() ? VSXRC : VRRC; + else if (Ty && Ty->getScalarType()->isFloatTy()) + return ST->hasVSX() ? VSXRC : FPRRC; + else + return GPRRC; +} + +const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const { + + switch (ClassID) { + default: + llvm_unreachable("unknown register class"); + return "PPC::unknown register class"; + case GPRRC: return "PPC::GPRRC"; + case FPRRC: return "PPC::FPRRC"; + case VRRC: return "PPC::VRRC"; + case VSXRC: return "PPC::VSXRC"; + } } unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const { @@ -613,7 +640,7 @@ unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const { } -unsigned PPCTTIImpl::getCacheLineSize() { +unsigned PPCTTIImpl::getCacheLineSize() const { // Check first if the user specified a custom line size. if (CacheLineSize.getNumOccurrences() > 0) return CacheLineSize; @@ -628,7 +655,7 @@ unsigned PPCTTIImpl::getCacheLineSize() { return 64; } -unsigned PPCTTIImpl::getPrefetchDistance() { +unsigned PPCTTIImpl::getPrefetchDistance() const { // This seems like a reasonable default for the BG/Q (this pass is enabled, by // default, only on the BG/Q). return 300; @@ -752,6 +779,35 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { return 0; return Cost; + + } else if (Val->getScalarType()->isIntegerTy() && Index != -1U) { + if (ST->hasP9Altivec()) { + if (ISD == ISD::INSERT_VECTOR_ELT) + // A move-to VSR and a permute/insert. Assume vector operation cost + // for both (cost will be 2x on P9). + return vectorCostAdjustment(2, Opcode, Val, nullptr); + + // It's an extract. Maybe we can do a cheap move-from VSR. + unsigned EltSize = Val->getScalarSizeInBits(); + if (EltSize == 64) { + unsigned MfvsrdIndex = ST->isLittleEndian() ? 1 : 0; + if (Index == MfvsrdIndex) + return 1; + } else if (EltSize == 32) { + unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1; + if (Index == MfvsrwzIndex) + return 1; + } + + // We need a vector extract (or mfvsrld). Assume vector operation cost. + // The cost of the load constant for a vector extract is disregarded + // (invariant, easily schedulable). + return vectorCostAdjustment(1, Opcode, Val, nullptr); + + } else if (ST->hasDirectMove()) + // Assume permute has standard cost. + // Assume move-to/move-from VSR have 2x standard cost. + return 3; } // Estimated cost of a load-hit-store delay. This was obtained diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h index 5d76ee418b6..83a70364bf6 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -72,10 +72,16 @@ public: TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const; bool enableInterleavedAccessVectorization(); - unsigned getNumberOfRegisters(bool Vector); + + enum PPCRegisterClass { + GPRRC, FPRRC, VRRC, VSXRC + }; + unsigned getNumberOfRegisters(unsigned ClassID) const; + unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const; + const char* getRegisterClassName(unsigned ClassID) const; unsigned getRegisterBitWidth(bool Vector) const; - unsigned getCacheLineSize(); - unsigned getPrefetchDistance(); + unsigned getCacheLineSize() const override; + unsigned getPrefetchDistance() const override; unsigned getMaxInterleaveFactor(unsigned VF); int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2); int getArithmeticInstrCost( diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp index 719ed7b6387..3463bbbdc5f 100644 --- a/lib/Target/PowerPC/PPCVSXCopy.cpp +++ b/lib/Target/PowerPC/PPCVSXCopy.cpp @@ -50,7 +50,7 @@ namespace { bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC, MachineRegisterInfo &MRI) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { return RC->hasSubClassEq(MRI.getRegClass(Reg)); } else if (RC->contains(Reg)) { return true; @@ -102,7 +102,7 @@ protected: IsVSFReg(SrcMO.getReg(), MRI)) && "Unknown source for a VSX copy"); - unsigned NewVReg = MRI.createVirtualRegister(SrcRC); + Register NewVReg = MRI.createVirtualRegister(SrcRC); BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg) .addImm(1) // add 1, not 0, because there is no implicit clearing @@ -124,7 +124,7 @@ protected: "Unknown destination for a VSX copy"); // Copy the VSX value into a new VSX register of the correct subclass. - unsigned NewVReg = MRI.createVirtualRegister(DstRC); + Register NewVReg = MRI.createVirtualRegister(DstRC); BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg) .add(SrcMO); diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index ce78239df0a..5e150be544e 100644 --- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -126,8 +126,8 @@ protected: if (!AddendMI->isFullCopy()) continue; - unsigned AddendSrcReg = AddendMI->getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) { + Register AddendSrcReg = AddendMI->getOperand(1).getReg(); + if (Register::isVirtualRegister(AddendSrcReg)) { if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) != MRI.getRegClass(AddendSrcReg)) continue; @@ -182,12 +182,12 @@ protected: // %5 = A-form-op %5, %5, %11; // where %5 and %11 are both kills. This case would be skipped // otherwise. - unsigned OldFMAReg = MI.getOperand(0).getReg(); + Register OldFMAReg = MI.getOperand(0).getReg(); // Find one of the product operands that is killed by this instruction. unsigned KilledProdOp = 0, OtherProdOp = 0; - unsigned Reg2 = MI.getOperand(2).getReg(); - unsigned Reg3 = MI.getOperand(3).getReg(); + Register Reg2 = MI.getOperand(2).getReg(); + Register Reg3 = MI.getOperand(3).getReg(); if (LIS->getInterval(Reg2).Query(FMAIdx).isKill() && Reg2 != OldFMAReg) { KilledProdOp = 2; @@ -208,14 +208,14 @@ protected: // legality checks above, the live range for the addend source register // could be extended), but it seems likely that such a trivial copy can // be coalesced away later, and thus is not worth the effort. - if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) && + if (Register::isVirtualRegister(AddendSrcReg) && !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) continue; // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3. - unsigned KilledProdReg = MI.getOperand(KilledProdOp).getReg(); - unsigned OtherProdReg = MI.getOperand(OtherProdOp).getReg(); + Register KilledProdReg = MI.getOperand(KilledProdOp).getReg(); + Register OtherProdReg = MI.getOperand(OtherProdOp).getReg(); unsigned AddSubReg = AddendMI->getOperand(1).getSubReg(); unsigned KilledProdSubReg = MI.getOperand(KilledProdOp).getSubReg(); @@ -314,7 +314,7 @@ protected: // Extend the live interval of the addend source (it might end at the // copy to be removed, or somewhere in between there and here). This // is necessary only if it is a physical register. - if (!TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) + if (!Register::isVirtualRegister(AddendSrcReg)) for (MCRegUnitIterator Units(AddendSrcReg, TRI); Units.isValid(); ++Units) { unsigned Unit = *Units; diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 44175af7f9b..c3729da0b07 100644 --- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -158,7 +158,7 @@ private: // Return true iff the given register is in the given class. bool isRegInClass(unsigned Reg, const TargetRegisterClass *RC) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return RC->hasSubClassEq(MRI->getRegClass(Reg)); return RC->contains(Reg); } @@ -253,7 +253,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (isAnyVecReg(Reg, Partial)) { RelevantInstr = true; break; @@ -566,7 +566,7 @@ unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg, CopySrcReg = MI->getOperand(2).getReg(); } - if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) { + if (!Register::isVirtualRegister(CopySrcReg)) { if (!isScalarVecReg(CopySrcReg)) SwapVector[VecIdx].MentionsPhysVR = 1; return CopySrcReg; @@ -601,11 +601,11 @@ void PPCVSXSwapRemoval::formWebs() { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!isVecReg(Reg) && !isScalarVecReg(Reg)) continue; - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + if (!Register::isVirtualRegister(Reg)) { if (!(MI->isCopy() && isScalarVecReg(Reg))) SwapVector[EntryIdx].MentionsPhysVR = 1; continue; @@ -667,7 +667,7 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() { // than a swap instruction. else if (SwapVector[EntryIdx].IsLoad && SwapVector[EntryIdx].IsSwap) { MachineInstr *MI = SwapVector[EntryIdx].VSEMI; - unsigned DefReg = MI->getOperand(0).getReg(); + Register DefReg = MI->getOperand(0).getReg(); // We skip debug instructions in the analysis. (Note that debug // location information is still maintained by this optimization @@ -695,9 +695,9 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() { // other than a swap instruction. } else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) { MachineInstr *MI = SwapVector[EntryIdx].VSEMI; - unsigned UseReg = MI->getOperand(0).getReg(); + Register UseReg = MI->getOperand(0).getReg(); MachineInstr *DefMI = MRI->getVRegDef(UseReg); - unsigned DefReg = DefMI->getOperand(0).getReg(); + Register DefReg = DefMI->getOperand(0).getReg(); int DefIdx = SwapMap[DefMI]; if (!SwapVector[DefIdx].IsSwap || SwapVector[DefIdx].IsLoad || @@ -756,7 +756,7 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() { if (!SwapVector[Repr].WebRejected) { MachineInstr *MI = SwapVector[EntryIdx].VSEMI; - unsigned DefReg = MI->getOperand(0).getReg(); + Register DefReg = MI->getOperand(0).getReg(); for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DefReg)) { int UseIdx = SwapMap[&UseMI]; @@ -772,7 +772,7 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() { if (!SwapVector[Repr].WebRejected) { MachineInstr *MI = SwapVector[EntryIdx].VSEMI; - unsigned UseReg = MI->getOperand(0).getReg(); + Register UseReg = MI->getOperand(0).getReg(); MachineInstr *DefMI = MRI->getVRegDef(UseReg); int DefIdx = SwapMap[DefMI]; SwapVector[DefIdx].WillRemove = 1; @@ -869,8 +869,8 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { Selector = 3 - Selector; MI->getOperand(3).setImm(Selector); - unsigned Reg1 = MI->getOperand(1).getReg(); - unsigned Reg2 = MI->getOperand(2).getReg(); + Register Reg1 = MI->getOperand(1).getReg(); + Register Reg2 = MI->getOperand(2).getReg(); MI->getOperand(1).setReg(Reg2); MI->getOperand(2).setReg(Reg1); @@ -894,9 +894,9 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { LLVM_DEBUG(dbgs() << "Changing SUBREG_TO_REG: "); LLVM_DEBUG(MI->dump()); - unsigned DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI->getOperand(0).getReg(); const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); - unsigned NewVReg = MRI->createVirtualRegister(DstRC); + Register NewVReg = MRI->createVirtualRegister(DstRC); MI->getOperand(0).setReg(NewVReg); LLVM_DEBUG(dbgs() << " Into: "); @@ -910,8 +910,8 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { // prior to the swap, and from VSRC to VRRC following the swap. // Coalescing will usually remove all this mess. if (DstRC == &PPC::VRRCRegClass) { - unsigned VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass); - unsigned VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass); + Register VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass); + Register VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass); BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(), TII->get(PPC::COPY), VSRCTmp1) diff --git a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 0172c629877..300ba8dc675 100644 --- a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/CodeGen/Register.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -79,7 +80,7 @@ class RISCVAsmParser : public MCTargetAsmParser { // Helper to emit a combination of LUI, ADDI(W), and SLLI instructions that // synthesize the desired immedate value into the destination register. - void emitLoadImm(unsigned DestReg, int64_t Value, MCStreamer &Out); + void emitLoadImm(Register DestReg, int64_t Value, MCStreamer &Out); // Helper to emit a combination of AUIPC and SecondOpcode. Used to implement // helpers such as emitLoadLocalAddress and emitLoadAddress. @@ -127,6 +128,7 @@ class RISCVAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseRegister(OperandVector &Operands, bool AllowParens = false); OperandMatchResultTy parseMemOpBaseReg(OperandVector &Operands); + OperandMatchResultTy parseAtomicMemOp(OperandVector &Operands); OperandMatchResultTy parseOperandWithModifier(OperandVector &Operands); OperandMatchResultTy parseBareSymbol(OperandVector &Operands); OperandMatchResultTy parseCallSymbol(OperandVector &Operands); @@ -193,7 +195,7 @@ public: /// instruction struct RISCVOperand : public MCParsedAsmOperand { - enum KindTy { + enum class KindTy { Token, Register, Immediate, @@ -203,7 +205,7 @@ struct RISCVOperand : public MCParsedAsmOperand { bool IsRV64; struct RegOp { - unsigned RegNum; + Register RegNum; }; struct ImmOp { @@ -235,26 +237,26 @@ public: StartLoc = o.StartLoc; EndLoc = o.EndLoc; switch (Kind) { - case Register: + case KindTy::Register: Reg = o.Reg; break; - case Immediate: + case KindTy::Immediate: Imm = o.Imm; break; - case Token: + case KindTy::Token: Tok = o.Tok; break; - case SystemRegister: + case KindTy::SystemRegister: SysReg = o.SysReg; break; } } - bool isToken() const override { return Kind == Token; } - bool isReg() const override { return Kind == Register; } - bool isImm() const override { return Kind == Immediate; } + bool isToken() const override { return Kind == KindTy::Token; } + bool isReg() const override { return Kind == KindTy::Register; } + bool isImm() const override { return Kind == KindTy::Immediate; } bool isMem() const override { return false; } - bool isSystemRegister() const { return Kind == SystemRegister; } + bool isSystemRegister() const { return Kind == KindTy::SystemRegister; } static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm, RISCVMCExpr::VariantKind &VK) { @@ -276,7 +278,7 @@ public: // modifiers and isShiftedInt(Op). template bool isBareSimmNLsb0() const { int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; if (!isImm()) return false; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); @@ -292,7 +294,7 @@ public: bool isBareSymbol() const { int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; // Must be of 'immediate' type but not a constant. if (!isImm() || evaluateConstantImm(getImm(), Imm, VK)) return false; @@ -302,7 +304,7 @@ public: bool isCallSymbol() const { int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; // Must be of 'immediate' type but not a constant. if (!isImm() || evaluateConstantImm(getImm(), Imm, VK)) return false; @@ -313,7 +315,7 @@ public: bool isTPRelAddSymbol() const { int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; // Must be of 'immediate' type but not a constant. if (!isImm() || evaluateConstantImm(getImm(), Imm, VK)) return false; @@ -364,7 +366,7 @@ public: bool isImmXLenLI() const { int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; if (!isImm()) return false; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); @@ -372,13 +374,13 @@ public: return true; // Given only Imm, ensuring that the actually specified constant is either // a signed or unsigned 64-bit number is unfortunately impossible. - bool IsInRange = isRV64() ? true : isInt<32>(Imm) || isUInt<32>(Imm); - return IsConstantImm && IsInRange && VK == RISCVMCExpr::VK_RISCV_None; + return IsConstantImm && VK == RISCVMCExpr::VK_RISCV_None && + (isRV64() || (isInt<32>(Imm) || isUInt<32>(Imm))); } bool isUImmLog2XLen() const { int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; if (!isImm()) return false; if (!evaluateConstantImm(getImm(), Imm, VK) || @@ -389,7 +391,7 @@ public: bool isUImmLog2XLenNonZero() const { int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; if (!isImm()) return false; if (!evaluateConstantImm(getImm(), Imm, VK) || @@ -402,7 +404,7 @@ public: bool isUImm5() const { int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; if (!isImm()) return false; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); @@ -411,7 +413,7 @@ public: bool isUImm5NonZero() const { int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; if (!isImm()) return false; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); @@ -422,7 +424,7 @@ public: bool isSImm6() const { if (!isImm()) return false; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; int64_t Imm; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && isInt<6>(Imm) && @@ -432,7 +434,7 @@ public: bool isSImm6NonZero() const { if (!isImm()) return false; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; int64_t Imm; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && isInt<6>(Imm) && (Imm != 0) && @@ -443,7 +445,7 @@ public: if (!isImm()) return false; int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && (Imm != 0) && (isUInt<5>(Imm) || (Imm >= 0xfffe0 && Imm <= 0xfffff)) && @@ -454,7 +456,7 @@ public: if (!isImm()) return false; int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && isShiftedUInt<5, 2>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; @@ -464,7 +466,7 @@ public: if (!isImm()) return false; int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && isShiftedUInt<6, 2>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; @@ -474,7 +476,7 @@ public: if (!isImm()) return false; int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && isShiftedUInt<5, 3>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; @@ -486,7 +488,7 @@ public: if (!isImm()) return false; int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && isShiftedUInt<6, 3>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; @@ -496,14 +498,14 @@ public: if (!isImm()) return false; int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && isShiftedUInt<8, 2>(Imm) && (Imm != 0) && VK == RISCVMCExpr::VK_RISCV_None; } bool isSImm12() const { - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; int64_t Imm; bool IsValid; if (!isImm()) @@ -527,14 +529,14 @@ public: if (!isImm()) return false; int64_t Imm; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && (Imm != 0) && isShiftedInt<6, 4>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; } bool isUImm20LUI() const { - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; int64_t Imm; bool IsValid; if (!isImm()) @@ -552,7 +554,7 @@ public: } bool isUImm20AUIPC() const { - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; int64_t Imm; bool IsValid; if (!isImm()) @@ -575,6 +577,15 @@ public: bool isSImm21Lsb0JAL() const { return isBareSimmNLsb0<21>(); } + bool isImmZero() const { + if (!isImm()) + return false; + int64_t Imm; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + return IsConstantImm && (Imm == 0) && VK == RISCVMCExpr::VK_RISCV_None; + } + /// getStartLoc - Gets location of the first token of this operand SMLoc getStartLoc() const override { return StartLoc; } /// getEndLoc - Gets location of the last token of this operand @@ -583,38 +594,38 @@ public: bool isRV64() const { return IsRV64; } unsigned getReg() const override { - assert(Kind == Register && "Invalid type access!"); - return Reg.RegNum; + assert(Kind == KindTy::Register && "Invalid type access!"); + return Reg.RegNum.id(); } StringRef getSysReg() const { - assert(Kind == SystemRegister && "Invalid access!"); + assert(Kind == KindTy::SystemRegister && "Invalid access!"); return StringRef(SysReg.Data, SysReg.Length); } const MCExpr *getImm() const { - assert(Kind == Immediate && "Invalid type access!"); + assert(Kind == KindTy::Immediate && "Invalid type access!"); return Imm.Val; } StringRef getToken() const { - assert(Kind == Token && "Invalid type access!"); + assert(Kind == KindTy::Token && "Invalid type access!"); return Tok; } void print(raw_ostream &OS) const override { switch (Kind) { - case Immediate: + case KindTy::Immediate: OS << *getImm(); break; - case Register: + case KindTy::Register: OS << ""; break; - case Token: + case KindTy::Token: OS << "'" << getToken() << "'"; break; - case SystemRegister: + case KindTy::SystemRegister: OS << "'; break; } @@ -622,7 +633,7 @@ public: static std::unique_ptr createToken(StringRef Str, SMLoc S, bool IsRV64) { - auto Op = make_unique(Token); + auto Op = std::make_unique(KindTy::Token); Op->Tok = Str; Op->StartLoc = S; Op->EndLoc = S; @@ -632,7 +643,7 @@ public: static std::unique_ptr createReg(unsigned RegNo, SMLoc S, SMLoc E, bool IsRV64) { - auto Op = make_unique(Register); + auto Op = std::make_unique(KindTy::Register); Op->Reg.RegNum = RegNo; Op->StartLoc = S; Op->EndLoc = E; @@ -642,7 +653,7 @@ public: static std::unique_ptr createImm(const MCExpr *Val, SMLoc S, SMLoc E, bool IsRV64) { - auto Op = make_unique(Immediate); + auto Op = std::make_unique(KindTy::Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -652,7 +663,7 @@ public: static std::unique_ptr createSysReg(StringRef Str, SMLoc S, unsigned Encoding, bool IsRV64) { - auto Op = make_unique(SystemRegister); + auto Op = std::make_unique(KindTy::SystemRegister); Op->SysReg.Data = Str.data(); Op->SysReg.Length = Str.size(); Op->SysReg.Encoding = Encoding; @@ -664,7 +675,7 @@ public: void addExpr(MCInst &Inst, const MCExpr *Expr) const { assert(Expr && "Expr shouldn't be null!"); int64_t Imm = 0; - RISCVMCExpr::VariantKind VK; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; bool IsConstant = evaluateConstantImm(Expr, Imm, VK); if (IsConstant) @@ -730,46 +741,9 @@ public: #define GET_MATCHER_IMPLEMENTATION #include "RISCVGenAsmMatcher.inc" -// Return the matching FPR64 register for the given FPR32. -// FIXME: Ideally this function could be removed in favour of using -// information from TableGen. -unsigned convertFPR32ToFPR64(unsigned Reg) { - switch (Reg) { - default: - llvm_unreachable("Not a recognised FPR32 register"); - case RISCV::F0_32: return RISCV::F0_64; - case RISCV::F1_32: return RISCV::F1_64; - case RISCV::F2_32: return RISCV::F2_64; - case RISCV::F3_32: return RISCV::F3_64; - case RISCV::F4_32: return RISCV::F4_64; - case RISCV::F5_32: return RISCV::F5_64; - case RISCV::F6_32: return RISCV::F6_64; - case RISCV::F7_32: return RISCV::F7_64; - case RISCV::F8_32: return RISCV::F8_64; - case RISCV::F9_32: return RISCV::F9_64; - case RISCV::F10_32: return RISCV::F10_64; - case RISCV::F11_32: return RISCV::F11_64; - case RISCV::F12_32: return RISCV::F12_64; - case RISCV::F13_32: return RISCV::F13_64; - case RISCV::F14_32: return RISCV::F14_64; - case RISCV::F15_32: return RISCV::F15_64; - case RISCV::F16_32: return RISCV::F16_64; - case RISCV::F17_32: return RISCV::F17_64; - case RISCV::F18_32: return RISCV::F18_64; - case RISCV::F19_32: return RISCV::F19_64; - case RISCV::F20_32: return RISCV::F20_64; - case RISCV::F21_32: return RISCV::F21_64; - case RISCV::F22_32: return RISCV::F22_64; - case RISCV::F23_32: return RISCV::F23_64; - case RISCV::F24_32: return RISCV::F24_64; - case RISCV::F25_32: return RISCV::F25_64; - case RISCV::F26_32: return RISCV::F26_64; - case RISCV::F27_32: return RISCV::F27_64; - case RISCV::F28_32: return RISCV::F28_64; - case RISCV::F29_32: return RISCV::F29_64; - case RISCV::F30_32: return RISCV::F30_64; - case RISCV::F31_32: return RISCV::F31_64; - } +static Register convertFPR64ToFPR32(Register Reg) { + assert(Reg >= RISCV::F0_D && Reg <= RISCV::F31_D && "Invalid register"); + return Reg - RISCV::F0_D + RISCV::F0_F; } unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, @@ -778,17 +752,17 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, if (!Op.isReg()) return Match_InvalidOperand; - unsigned Reg = Op.getReg(); - bool IsRegFPR32 = - RISCVMCRegisterClasses[RISCV::FPR32RegClassID].contains(Reg); - bool IsRegFPR32C = - RISCVMCRegisterClasses[RISCV::FPR32CRegClassID].contains(Reg); + Register Reg = Op.getReg(); + bool IsRegFPR64 = + RISCVMCRegisterClasses[RISCV::FPR64RegClassID].contains(Reg); + bool IsRegFPR64C = + RISCVMCRegisterClasses[RISCV::FPR64CRegClassID].contains(Reg); // As the parser couldn't differentiate an FPR32 from an FPR64, coerce the - // register from FPR32 to FPR64 or FPR32C to FPR64C if necessary. - if ((IsRegFPR32 && Kind == MCK_FPR64) || - (IsRegFPR32C && Kind == MCK_FPR64C)) { - Op.Reg.RegNum = convertFPR32ToFPR64(Reg); + // register from FPR64 to FPR32 or FPR64C to FPR32C if necessary. + if ((IsRegFPR64 && Kind == MCK_FPR32) || + (IsRegFPR64C && Kind == MCK_FPR32C)) { + Op.Reg.RegNum = convertFPR64ToFPR32(Reg); return Match_Success; } return Match_InvalidOperand; @@ -853,6 +827,10 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return generateImmOutOfRangeError(Operands, ErrorInfo, std::numeric_limits::min(), std::numeric_limits::max()); + case Match_InvalidImmZero: { + SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); + return Error(ErrorLoc, "immediate must be zero"); + } case Match_InvalidUImmLog2XLen: if (isRV64()) return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 6) - 1); @@ -968,14 +946,19 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // alternative ABI names), setting RegNo to the matching register. Upon // failure, returns true and sets RegNo to 0. If IsRV32E then registers // x16-x31 will be rejected. -static bool matchRegisterNameHelper(bool IsRV32E, unsigned &RegNo, +static bool matchRegisterNameHelper(bool IsRV32E, Register &RegNo, StringRef Name) { RegNo = MatchRegisterName(Name); - if (RegNo == 0) + // The 32- and 64-bit FPRs have the same asm name. Check that the initial + // match always matches the 64-bit variant, and not the 32-bit one. + assert(!(RegNo >= RISCV::F0_F && RegNo <= RISCV::F31_F)); + // The default FPR register class is based on the tablegen enum ordering. + static_assert(RISCV::F0_D < RISCV::F0_F, "FPR matching must be updated"); + if (RegNo == RISCV::NoRegister) RegNo = MatchRegisterAltName(Name); if (IsRV32E && RegNo >= RISCV::X16 && RegNo <= RISCV::X31) - RegNo = 0; - return RegNo == 0; + RegNo = RISCV::NoRegister; + return RegNo == RISCV::NoRegister; } bool RISCVAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, @@ -986,7 +969,7 @@ bool RISCVAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, RegNo = 0; StringRef Name = getLexer().getTok().getIdentifier(); - if (matchRegisterNameHelper(isRV32E(), RegNo, Name)) + if (matchRegisterNameHelper(isRV32E(), (Register&)RegNo, Name)) return Error(StartLoc, "invalid register name"); getParser().Lex(); // Eat identifier token. @@ -1018,10 +1001,10 @@ OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands, return MatchOperand_NoMatch; case AsmToken::Identifier: StringRef Name = getLexer().getTok().getIdentifier(); - unsigned RegNo; + Register RegNo; matchRegisterNameHelper(isRV32E(), RegNo, Name); - if (RegNo == 0) { + if (RegNo == RISCV::NoRegister) { if (HadParens) getLexer().UnLex(LParen); return MatchOperand_NoMatch; @@ -1208,6 +1191,24 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) { Res = V; } else Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + + MCBinaryExpr::Opcode Opcode; + switch (getLexer().getKind()) { + default: + Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64())); + return MatchOperand_Success; + case AsmToken::Plus: + Opcode = MCBinaryExpr::Add; + break; + case AsmToken::Minus: + Opcode = MCBinaryExpr::Sub; + break; + } + + const MCExpr *Expr; + if (getParser().parseExpression(Expr)) + return MatchOperand_ParseFail; + Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext()); Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64())); return MatchOperand_Success; } @@ -1282,6 +1283,73 @@ RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) { return MatchOperand_Success; } +OperandMatchResultTy RISCVAsmParser::parseAtomicMemOp(OperandVector &Operands) { + // Atomic operations such as lr.w, sc.w, and amo*.w accept a "memory operand" + // as one of their register operands, such as `(a0)`. This just denotes that + // the register (in this case `a0`) contains a memory address. + // + // Normally, we would be able to parse these by putting the parens into the + // instruction string. However, GNU as also accepts a zero-offset memory + // operand (such as `0(a0)`), and ignores the 0. Normally this would be parsed + // with parseImmediate followed by parseMemOpBaseReg, but these instructions + // do not accept an immediate operand, and we do not want to add a "dummy" + // operand that is silently dropped. + // + // Instead, we use this custom parser. This will: allow (and discard) an + // offset if it is zero; require (and discard) parentheses; and add only the + // parsed register operand to `Operands`. + // + // These operands are printed with RISCVInstPrinter::printAtomicMemOp, which + // will only print the register surrounded by parentheses (which GNU as also + // uses as its canonical representation for these operands). + std::unique_ptr OptionalImmOp; + + if (getLexer().isNot(AsmToken::LParen)) { + // Parse an Integer token. We do not accept arbritrary constant expressions + // in the offset field (because they may include parens, which complicates + // parsing a lot). + int64_t ImmVal; + SMLoc ImmStart = getLoc(); + if (getParser().parseIntToken(ImmVal, + "expected '(' or optional integer offset")) + return MatchOperand_ParseFail; + + // Create a RISCVOperand for checking later (so the error messages are + // nicer), but we don't add it to Operands. + SMLoc ImmEnd = getLoc(); + OptionalImmOp = + RISCVOperand::createImm(MCConstantExpr::create(ImmVal, getContext()), + ImmStart, ImmEnd, isRV64()); + } + + if (getLexer().isNot(AsmToken::LParen)) { + Error(getLoc(), OptionalImmOp ? "expected '(' after optional integer offset" + : "expected '(' or optional integer offset"); + return MatchOperand_ParseFail; + } + getParser().Lex(); // Eat '(' + + if (parseRegister(Operands) != MatchOperand_Success) { + Error(getLoc(), "expected register"); + return MatchOperand_ParseFail; + } + + if (getLexer().isNot(AsmToken::RParen)) { + Error(getLoc(), "expected ')'"); + return MatchOperand_ParseFail; + } + getParser().Lex(); // Eat ')' + + // Deferred Handling of non-zero offsets. This makes the error messages nicer. + if (OptionalImmOp && !OptionalImmOp->isImmZero()) { + Error(OptionalImmOp->getStartLoc(), "optional integer offset must be 0", + SMRange(OptionalImmOp->getStartLoc(), OptionalImmOp->getEndLoc())); + return MatchOperand_ParseFail; + } + + return MatchOperand_Success; +} + /// Looks at a token type and creates the relevant operand from this /// information, adding to Operands. If operand was parsed, returns false, else /// true. @@ -1523,12 +1591,12 @@ void RISCVAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) { S.EmitInstruction((Res ? CInst : Inst), getSTI()); } -void RISCVAsmParser::emitLoadImm(unsigned DestReg, int64_t Value, +void RISCVAsmParser::emitLoadImm(Register DestReg, int64_t Value, MCStreamer &Out) { RISCVMatInt::InstSeq Seq; RISCVMatInt::generateInstSeq(Value, isRV64(), Seq); - unsigned SrcReg = RISCV::X0; + Register SrcReg = RISCV::X0; for (RISCVMatInt::Inst &Inst : Seq) { if (Inst.Opc == RISCV::LUI) { emitToStreamer( @@ -1682,7 +1750,7 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, default: break; case RISCV::PseudoLI: { - unsigned Reg = Inst.getOperand(0).getReg(); + Register Reg = Inst.getOperand(0).getReg(); const MCOperand &Op1 = Inst.getOperand(1); if (Op1.isExpr()) { // We must have li reg, %lo(sym) or li reg, %pcrel_lo(sym) or similar. diff --git a/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index 36200c03f70..15943ba4215 100644 --- a/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -13,6 +13,7 @@ #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "TargetInfo/RISCVTargetInfo.h" #include "Utils/RISCVBaseInfo.h" +#include "llvm/CodeGen/Register.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" @@ -56,17 +57,6 @@ extern "C" void LLVMInitializeRISCVDisassembler() { createRISCVDisassembler); } -static const unsigned GPRDecoderTable[] = { - RISCV::X0, RISCV::X1, RISCV::X2, RISCV::X3, - RISCV::X4, RISCV::X5, RISCV::X6, RISCV::X7, - RISCV::X8, RISCV::X9, RISCV::X10, RISCV::X11, - RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15, - RISCV::X16, RISCV::X17, RISCV::X18, RISCV::X19, - RISCV::X20, RISCV::X21, RISCV::X22, RISCV::X23, - RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27, - RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31 -}; - static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { @@ -76,38 +66,21 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo, .getFeatureBits(); bool IsRV32E = FeatureBits[RISCV::FeatureRV32E]; - if (RegNo > array_lengthof(GPRDecoderTable) || (IsRV32E && RegNo > 15)) + if (RegNo >= 32 || (IsRV32E && RegNo >= 16)) return MCDisassembler::Fail; - // We must define our own mapping from RegNo to register identifier. - // Accessing index RegNo in the register class will work in the case that - // registers were added in ascending order, but not in general. - unsigned Reg = GPRDecoderTable[RegNo]; + Register Reg = RISCV::X0 + RegNo; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } -static const unsigned FPR32DecoderTable[] = { - RISCV::F0_32, RISCV::F1_32, RISCV::F2_32, RISCV::F3_32, - RISCV::F4_32, RISCV::F5_32, RISCV::F6_32, RISCV::F7_32, - RISCV::F8_32, RISCV::F9_32, RISCV::F10_32, RISCV::F11_32, - RISCV::F12_32, RISCV::F13_32, RISCV::F14_32, RISCV::F15_32, - RISCV::F16_32, RISCV::F17_32, RISCV::F18_32, RISCV::F19_32, - RISCV::F20_32, RISCV::F21_32, RISCV::F22_32, RISCV::F23_32, - RISCV::F24_32, RISCV::F25_32, RISCV::F26_32, RISCV::F27_32, - RISCV::F28_32, RISCV::F29_32, RISCV::F30_32, RISCV::F31_32 -}; - static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - if (RegNo > array_lengthof(FPR32DecoderTable)) + if (RegNo >= 32) return MCDisassembler::Fail; - // We must define our own mapping from RegNo to register identifier. - // Accessing index RegNo in the register class will work in the case that - // registers were added in ascending order, but not in general. - unsigned Reg = FPR32DecoderTable[RegNo]; + Register Reg = RISCV::F0_F + RegNo; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -115,35 +88,21 @@ static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeFPR32CRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - if (RegNo > 8) { + if (RegNo >= 8) { return MCDisassembler::Fail; } - unsigned Reg = FPR32DecoderTable[RegNo + 8]; + Register Reg = RISCV::F8_F + RegNo; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } -static const unsigned FPR64DecoderTable[] = { - RISCV::F0_64, RISCV::F1_64, RISCV::F2_64, RISCV::F3_64, - RISCV::F4_64, RISCV::F5_64, RISCV::F6_64, RISCV::F7_64, - RISCV::F8_64, RISCV::F9_64, RISCV::F10_64, RISCV::F11_64, - RISCV::F12_64, RISCV::F13_64, RISCV::F14_64, RISCV::F15_64, - RISCV::F16_64, RISCV::F17_64, RISCV::F18_64, RISCV::F19_64, - RISCV::F20_64, RISCV::F21_64, RISCV::F22_64, RISCV::F23_64, - RISCV::F24_64, RISCV::F25_64, RISCV::F26_64, RISCV::F27_64, - RISCV::F28_64, RISCV::F29_64, RISCV::F30_64, RISCV::F31_64 -}; - static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - if (RegNo > array_lengthof(FPR64DecoderTable)) + if (RegNo >= 32) return MCDisassembler::Fail; - // We must define our own mapping from RegNo to register identifier. - // Accessing index RegNo in the register class will work in the case that - // registers were added in ascending order, but not in general. - unsigned Reg = FPR64DecoderTable[RegNo]; + Register Reg = RISCV::F0_D + RegNo; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -151,10 +110,10 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeFPR64CRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - if (RegNo > 8) { + if (RegNo >= 8) { return MCDisassembler::Fail; } - unsigned Reg = FPR64DecoderTable[RegNo + 8]; + Register Reg = RISCV::F8_D + RegNo; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -182,10 +141,10 @@ static DecodeStatus DecodeGPRNoX0X2RegisterClass(MCInst &Inst, uint64_t RegNo, static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - if (RegNo > 8) + if (RegNo >= 8) return MCDisassembler::Fail; - unsigned Reg = GPRDecoderTable[RegNo + 8]; + Register Reg = RISCV::X8 + RegNo; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } @@ -279,8 +238,80 @@ static DecodeStatus decodeFRMArg(MCInst &Inst, uint64_t Imm, return MCDisassembler::Success; } +static DecodeStatus decodeRVCInstrSImm(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus decodeRVCInstrRdRs1UImm(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); + #include "RISCVGenDisassemblerTables.inc" +static DecodeStatus decodeRVCInstrSImm(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + uint64_t SImm6 = + fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5); + DecodeStatus Result = decodeSImmOperand<6>(Inst, SImm6, Address, Decoder); + (void)Result; + assert(Result == MCDisassembler::Success && "Invalid immediate"); + return MCDisassembler::Success; +} + +static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder) { + DecodeGPRRegisterClass(Inst, 0, Address, Decoder); + uint64_t SImm6 = + fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5); + DecodeStatus Result = decodeSImmOperand<6>(Inst, SImm6, Address, Decoder); + (void)Result; + assert(Result == MCDisassembler::Success && "Invalid immediate"); + return MCDisassembler::Success; +} + +static DecodeStatus decodeRVCInstrRdRs1UImm(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder) { + DecodeGPRRegisterClass(Inst, 0, Address, Decoder); + Inst.addOperand(Inst.getOperand(0)); + uint64_t UImm6 = + fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5); + DecodeStatus Result = decodeUImmOperand<6>(Inst, UImm6, Address, Decoder); + (void)Result; + assert(Result == MCDisassembler::Success && "Invalid immediate"); + return MCDisassembler::Success; +} + +static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned Rd = fieldFromInstruction(Insn, 7, 5); + unsigned Rs2 = fieldFromInstruction(Insn, 2, 5); + DecodeGPRRegisterClass(Inst, Rd, Address, Decoder); + DecodeGPRRegisterClass(Inst, Rs2, Address, Decoder); + return MCDisassembler::Success; +} + +static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned Rd = fieldFromInstruction(Insn, 7, 5); + unsigned Rs2 = fieldFromInstruction(Insn, 2, 5); + DecodeGPRRegisterClass(Inst, Rd, Address, Decoder); + Inst.addOperand(Inst.getOperand(0)); + DecodeGPRRegisterClass(Inst, Rs2, Address, Decoder); + return MCDisassembler::Success; +} + DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, ArrayRef Bytes, uint64_t Address, diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index ee5f760ebcb..f6b727ae37c 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -30,9 +30,16 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm, const MCValue &Target) { bool ShouldForce = false; - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: break; + case FK_Data_1: + case FK_Data_2: + case FK_Data_4: + case FK_Data_8: + if (Target.isAbsolute()) + return false; + break; case RISCV::fixup_riscv_got_hi20: case RISCV::fixup_riscv_tls_got_hi20: case RISCV::fixup_riscv_tls_gd_hi20: @@ -48,7 +55,7 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm, return false; } - switch ((unsigned)T->getKind()) { + switch (T->getTargetKind()) { default: llvm_unreachable("Unexpected fixup kind for pcrel_lo12"); break; @@ -83,7 +90,7 @@ bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, return true; int64_t Offset = int64_t(Value); - switch ((unsigned)Fixup.getKind()) { + switch (Fixup.getTargetKind()) { default: return false; case RISCV::fixup_riscv_rvc_branch: @@ -174,8 +181,7 @@ bool RISCVAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const { static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, MCContext &Ctx) { - unsigned Kind = Fixup.getKind(); - switch (Kind) { + switch (Fixup.getTargetKind()) { default: llvm_unreachable("Unknown fixup kind!"); case RISCV::fixup_riscv_got_hi20: @@ -186,6 +192,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case FK_Data_2: case FK_Data_4: case FK_Data_8: + case FK_Data_6b: return Value; case RISCV::fixup_riscv_lo12_i: case RISCV::fixup_riscv_pcrel_lo12_i: diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp index 3ccbc86d261..cab2bbcb81b 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/RISCVFixupKinds.h" +#include "MCTargetDesc/RISCVMCExpr.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" @@ -47,8 +48,9 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { + const MCExpr *Expr = Fixup.getValue(); // Determine the type of the relocation - unsigned Kind = Fixup.getKind(); + unsigned Kind = Fixup.getTargetKind(); if (IsPCRel) { switch (Kind) { default: @@ -87,6 +89,9 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, default: llvm_unreachable("invalid fixup kind!"); case FK_Data_4: + if (Expr->getKind() == MCExpr::Target && + cast(Expr)->getKind() == RISCVMCExpr::VK_RISCV_32_PCREL) + return ELF::R_RISCV_32_PCREL; return ELF::R_RISCV_32; case FK_Data_8: return ELF::R_RISCV_64; @@ -98,6 +103,8 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_RISCV_ADD32; case FK_Data_Add_8: return ELF::R_RISCV_ADD64; + case FK_Data_Add_6b: + return ELF::R_RISCV_SET6; case FK_Data_Sub_1: return ELF::R_RISCV_SUB8; case FK_Data_Sub_2: @@ -106,6 +113,8 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_RISCV_SUB32; case FK_Data_Sub_8: return ELF::R_RISCV_SUB64; + case FK_Data_Sub_6b: + return ELF::R_RISCV_SUB6; case RISCV::fixup_riscv_hi20: return ELF::R_RISCV_HI20; case RISCV::fixup_riscv_lo12_i: @@ -129,5 +138,5 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, std::unique_ptr llvm::createRISCVELFObjectWriter(uint8_t OSABI, bool Is64Bit) { - return llvm::make_unique(OSABI, Is64Bit); + return std::make_unique(OSABI, Is64Bit); } diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index fe37b70811d..8b5fe6dd825 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -39,6 +39,30 @@ static cl::opt cl::desc("Disable the emission of assembler pseudo instructions"), cl::init(false), cl::Hidden); +static cl::opt + ArchRegNames("riscv-arch-reg-names", + cl::desc("Print architectural register names rather than the " + "ABI names (such as x2 instead of sp)"), + cl::init(false), cl::Hidden); + +// The command-line flags above are used by llvm-mc and llc. They can be used by +// `llvm-objdump`, but we override their values here to handle options passed to +// `llvm-objdump` with `-M` (which matches GNU objdump). There did not seem to +// be an easier way to allow these options in all these tools, without doing it +// this way. +bool RISCVInstPrinter::applyTargetSpecificCLOption(StringRef Opt) { + if (Opt == "no-aliases") { + NoAliases = true; + return true; + } + if (Opt == "numeric") { + ArchRegNames = true; + return true; + } + + return false; +} + void RISCVInstPrinter::printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, const MCSubtargetInfo &STI) { bool Res = false; @@ -112,3 +136,20 @@ void RISCVInstPrinter::printFRMArg(const MCInst *MI, unsigned OpNo, static_cast(MI->getOperand(OpNo).getImm()); O << RISCVFPRndMode::roundingModeToString(FRMArg); } + +void RISCVInstPrinter::printAtomicMemOp(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNo); + + assert(MO.isReg() && "printAtomicMemOp can only print register operands"); + O << "("; + printRegName(O, MO.getReg()); + O << ")"; + return; +} + +const char *RISCVInstPrinter::getRegisterName(unsigned RegNo) { + return getRegisterName(RegNo, ArchRegNames ? RISCV::NoRegAltName + : RISCV::ABIRegAltName); +} diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h index 5ca1d3fa20f..189d72626f3 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h +++ b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h @@ -25,6 +25,8 @@ public: const MCRegisterInfo &MRI) : MCInstPrinter(MAI, MII, MRI) {} + bool applyTargetSpecificCLOption(StringRef Opt) override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, const MCSubtargetInfo &STI) override; void printRegName(raw_ostream &O, unsigned RegNo) const override; @@ -37,6 +39,8 @@ public: const MCSubtargetInfo &STI, raw_ostream &O); void printFRMArg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printAtomicMemOp(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); // Autogenerated by tblgen. void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI, @@ -46,8 +50,8 @@ public: void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, unsigned PrintMethodIdx, const MCSubtargetInfo &STI, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo, - unsigned AltIdx = RISCV::ABIRegAltName); + static const char *getRegisterName(unsigned RegNo); + static const char *getRegisterName(unsigned RegNo, unsigned AltIdx); }; } // namespace llvm diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp index 98362969288..089a2def4c2 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp @@ -11,7 +11,10 @@ //===----------------------------------------------------------------------===// #include "RISCVMCAsmInfo.h" +#include "MCTargetDesc/RISCVMCExpr.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/MC/MCStreamer.h" using namespace llvm; void RISCVMCAsmInfo::anchor() {} @@ -25,3 +28,20 @@ RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) { Data16bitsDirective = "\t.half\t"; Data32bitsDirective = "\t.word\t"; } + +const MCExpr *RISCVMCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym, + unsigned Encoding, + MCStreamer &Streamer) const { + if (!(Encoding & dwarf::DW_EH_PE_pcrel)) + return MCAsmInfo::getExprForFDESymbol(Sym, Encoding, Streamer); + + // The default symbol subtraction results in an ADD/SUB relocation pair. + // Processing this relocation pair is problematic when linker relaxation is + // enabled, so we follow binutils in using the R_RISCV_32_PCREL relocation + // for the FDE initial location. + MCContext &Ctx = Streamer.getContext(); + const MCExpr *ME = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx); + assert(Encoding & dwarf::DW_EH_PE_sdata4 && "Unexpected encoding"); + return RISCVMCExpr::create(ME, RISCVMCExpr::VK_RISCV_32_PCREL, Ctx); +} diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h index 043fdb7c08c..6824baf699a 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h @@ -23,6 +23,9 @@ class RISCVMCAsmInfo : public MCAsmInfoELF { public: explicit RISCVMCAsmInfo(const Triple &TargetTriple); + + const MCExpr *getExprForFDESymbol(const MCSymbol *Sym, unsigned Encoding, + MCStreamer &Streamer) const override; }; } // namespace llvm diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index 0fc775f63ed..de99960848a 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -15,6 +15,7 @@ #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "Utils/RISCVBaseInfo.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Register.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" @@ -100,7 +101,7 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, raw_ostream &OS, const MCSubtargetInfo &STI) const { MCInst TmpInst; MCOperand Func; - unsigned Ra; + Register Ra; if (MI.getOpcode() == RISCV::PseudoTAIL) { Func = MI.getOperand(0); Ra = RISCV::X6; @@ -266,6 +267,7 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo, switch (RVExpr->getKind()) { case RISCVMCExpr::VK_RISCV_None: case RISCVMCExpr::VK_RISCV_Invalid: + case RISCVMCExpr::VK_RISCV_32_PCREL: llvm_unreachable("Unhandled fixup kind!"); case RISCVMCExpr::VK_RISCV_TPREL_ADD: // tprel_add is only used to indicate that a relocation should be emitted diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h index b5a292dc1b1..921df376f3d 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h @@ -36,6 +36,7 @@ public: VK_RISCV_TLS_GD_HI, VK_RISCV_CALL, VK_RISCV_CALL_PLT, + VK_RISCV_32_PCREL, VK_RISCV_Invalid }; diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp index bc45262ab2d..5a4c86e48f1 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp @@ -16,7 +16,9 @@ #include "RISCVMCAsmInfo.h" #include "RISCVTargetStreamer.h" #include "TargetInfo/RISCVTargetInfo.h" +#include "Utils/RISCVBaseInfo.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/Register.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" @@ -52,7 +54,7 @@ static MCAsmInfo *createRISCVMCAsmInfo(const MCRegisterInfo &MRI, const Triple &TT) { MCAsmInfo *MAI = new RISCVMCAsmInfo(TT); - unsigned SP = MRI.getDwarfRegNum(RISCV::X2, true); + Register SP = MRI.getDwarfRegNum(RISCV::X2, true); MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, SP, 0); MAI->addInitialFrameState(Inst); diff --git a/lib/Target/RISCV/RISCV.h b/lib/Target/RISCV/RISCV.h index 834a1d17114..f23f742a478 100644 --- a/lib/Target/RISCV/RISCV.h +++ b/lib/Target/RISCV/RISCV.h @@ -18,9 +18,12 @@ #include "llvm/Target/TargetMachine.h" namespace llvm { +class RISCVRegisterBankInfo; +class RISCVSubtarget; class RISCVTargetMachine; class AsmPrinter; class FunctionPass; +class InstructionSelector; class MCInst; class MCOperand; class MachineInstr; @@ -39,6 +42,10 @@ void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &); FunctionPass *createRISCVExpandPseudoPass(); void initializeRISCVExpandPseudoPass(PassRegistry &); + +InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &, + RISCVSubtarget &, + RISCVRegisterBankInfo &); } #endif diff --git a/lib/Target/RISCV/RISCV.td b/lib/Target/RISCV/RISCV.td index e19b70b8e70..46530a8f74a 100644 --- a/lib/Target/RISCV/RISCV.td +++ b/lib/Target/RISCV/RISCV.td @@ -43,6 +43,11 @@ def FeatureStdExtC def HasStdExtC : Predicate<"Subtarget->hasStdExtC()">, AssemblerPredicate<"FeatureStdExtC">; +def FeatureRVCHints + : SubtargetFeature<"rvc-hints", "EnableRVCHintInstrs", "true", + "Enable RVC Hint Instructions.">; +def HasRVCHints : Predicate<"Subtarget->enableRVCHintInstrs()">, + AssemblerPredicate<"FeatureRVCHints">; def Feature64Bit : SubtargetFeature<"64bit", "HasRV64", "true", "Implements RV64">; @@ -77,14 +82,16 @@ include "RISCVSystemOperands.td" include "RISCVRegisterInfo.td" include "RISCVCallingConv.td" include "RISCVInstrInfo.td" +include "RISCVRegisterBanks.td" //===----------------------------------------------------------------------===// // RISC-V processors supported. //===----------------------------------------------------------------------===// -def : ProcessorModel<"generic-rv32", NoSchedModel, []>; +def : ProcessorModel<"generic-rv32", NoSchedModel, [FeatureRVCHints]>; -def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit]>; +def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit, + FeatureRVCHints]>; //===----------------------------------------------------------------------===// // Define the RISC-V target. diff --git a/lib/Target/RISCV/RISCVCallLowering.cpp b/lib/Target/RISCV/RISCVCallLowering.cpp new file mode 100644 index 00000000000..c63a84739c4 --- /dev/null +++ b/lib/Target/RISCV/RISCVCallLowering.cpp @@ -0,0 +1,50 @@ +//===-- RISCVCallLowering.cpp - Call lowering -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file implements the lowering of LLVM calls to machine code calls for +/// GlobalISel. +// +//===----------------------------------------------------------------------===// + +#include "RISCVCallLowering.h" +#include "RISCVISelLowering.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" + +using namespace llvm; + +RISCVCallLowering::RISCVCallLowering(const RISCVTargetLowering &TLI) + : CallLowering(&TLI) {} + +bool RISCVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, + const Value *Val, + ArrayRef VRegs) const { + + MachineInstrBuilder Ret = MIRBuilder.buildInstrNoInsert(RISCV::PseudoRET); + + if (Val != nullptr) { + return false; + } + MIRBuilder.insertInstr(Ret); + return true; +} + +bool RISCVCallLowering::lowerFormalArguments( + MachineIRBuilder &MIRBuilder, const Function &F, + ArrayRef> VRegs) const { + + if (F.arg_empty()) + return true; + + return false; +} + +bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const { + return false; +} diff --git a/lib/Target/RISCV/RISCVCallLowering.h b/lib/Target/RISCV/RISCVCallLowering.h new file mode 100644 index 00000000000..7ce074a61f0 --- /dev/null +++ b/lib/Target/RISCV/RISCVCallLowering.h @@ -0,0 +1,42 @@ +//===-- RISCVCallLowering.h - Call lowering ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file describes how to lower LLVM calls to machine code calls. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVCALLLOWERING_H +#define LLVM_LIB_TARGET_RISCV_RISCVCALLLOWERING_H + +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/ValueTypes.h" + +namespace llvm { + +class RISCVTargetLowering; + +class RISCVCallLowering : public CallLowering { + +public: + RISCVCallLowering(const RISCVTargetLowering &TLI); + + bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val, + ArrayRef VRegs) const override; + + bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, + ArrayRef> VRegs) const override; + + bool lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const override; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_RISCV_RISCVCALLLOWERING_H diff --git a/lib/Target/RISCV/RISCVCallingConv.td b/lib/Target/RISCV/RISCVCallingConv.td index db13e6e8bec..025454f8fcc 100644 --- a/lib/Target/RISCV/RISCVCallingConv.td +++ b/lib/Target/RISCV/RISCVCallingConv.td @@ -18,11 +18,11 @@ def CSR_ILP32_LP64 def CSR_ILP32F_LP64F : CalleeSavedRegs<(add CSR_ILP32_LP64, - F8_32, F9_32, (sequence "F%u_32", 18, 27))>; + F8_F, F9_F, (sequence "F%u_F", 18, 27))>; def CSR_ILP32D_LP64D : CalleeSavedRegs<(add CSR_ILP32_LP64, - F8_64, F9_64, (sequence "F%u_64", 18, 27))>; + F8_D, F9_D, (sequence "F%u_D", 18, 27))>; // Needed for implementation of RISCVRegisterInfo::getNoPreservedMask() def CSR_NoRegs : CalleeSavedRegs<(add)>; @@ -43,12 +43,12 @@ def CSR_XLEN_F32_Interrupt: CalleeSavedRegs<(add X1, (sequence "X%u", 12, 17), (sequence "X%u", 18, 27), (sequence "X%u", 28, 31), - (sequence "F%u_32", 0, 7), - (sequence "F%u_32", 10, 11), - (sequence "F%u_32", 12, 17), - (sequence "F%u_32", 28, 31), - (sequence "F%u_32", 8, 9), - (sequence "F%u_32", 18, 27))>; + (sequence "F%u_F", 0, 7), + (sequence "F%u_F", 10, 11), + (sequence "F%u_F", 12, 17), + (sequence "F%u_F", 28, 31), + (sequence "F%u_F", 8, 9), + (sequence "F%u_F", 18, 27))>; // Same as CSR_Interrupt, but including all 64-bit FP registers. def CSR_XLEN_F64_Interrupt: CalleeSavedRegs<(add X1, @@ -57,9 +57,9 @@ def CSR_XLEN_F64_Interrupt: CalleeSavedRegs<(add X1, (sequence "X%u", 12, 17), (sequence "X%u", 18, 27), (sequence "X%u", 28, 31), - (sequence "F%u_64", 0, 7), - (sequence "F%u_64", 10, 11), - (sequence "F%u_64", 12, 17), - (sequence "F%u_64", 28, 31), - (sequence "F%u_64", 8, 9), - (sequence "F%u_64", 18, 27))>; + (sequence "F%u_D", 0, 7), + (sequence "F%u_D", 10, 11), + (sequence "F%u_D", 12, 17), + (sequence "F%u_D", 28, 31), + (sequence "F%u_D", 8, 9), + (sequence "F%u_D", 18, 27))>; diff --git a/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 1c5171a7b7a..da5cd16e750 100644 --- a/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -235,10 +235,10 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, MachineBasicBlock *LoopMBB, MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) { - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned ScratchReg = MI.getOperand(1).getReg(); - unsigned AddrReg = MI.getOperand(2).getReg(); - unsigned IncrReg = MI.getOperand(3).getReg(); + Register DestReg = MI.getOperand(0).getReg(); + Register ScratchReg = MI.getOperand(1).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register IncrReg = MI.getOperand(3).getReg(); AtomicOrdering Ordering = static_cast(MI.getOperand(4).getImm()); @@ -271,9 +271,9 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, } static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL, - MachineBasicBlock *MBB, unsigned DestReg, - unsigned OldValReg, unsigned NewValReg, - unsigned MaskReg, unsigned ScratchReg) { + MachineBasicBlock *MBB, Register DestReg, + Register OldValReg, Register NewValReg, + Register MaskReg, Register ScratchReg) { assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique"); assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique"); assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique"); @@ -297,11 +297,11 @@ static void doMaskedAtomicBinOpExpansion( MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB, MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) { assert(Width == 32 && "Should never need to expand masked 64-bit operations"); - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned ScratchReg = MI.getOperand(1).getReg(); - unsigned AddrReg = MI.getOperand(2).getReg(); - unsigned IncrReg = MI.getOperand(3).getReg(); - unsigned MaskReg = MI.getOperand(4).getReg(); + Register DestReg = MI.getOperand(0).getReg(); + Register ScratchReg = MI.getOperand(1).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register IncrReg = MI.getOperand(3).getReg(); + Register MaskReg = MI.getOperand(4).getReg(); AtomicOrdering Ordering = static_cast(MI.getOperand(5).getImm()); @@ -394,8 +394,8 @@ bool RISCVExpandPseudo::expandAtomicBinOp( } static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL, - MachineBasicBlock *MBB, unsigned ValReg, - unsigned ShamtReg) { + MachineBasicBlock *MBB, Register ValReg, + Register ShamtReg) { BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg) .addReg(ValReg) .addReg(ShamtReg); @@ -436,12 +436,12 @@ bool RISCVExpandPseudo::expandAtomicMinMaxOp( DoneMBB->transferSuccessors(&MBB); MBB.addSuccessor(LoopHeadMBB); - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned Scratch1Reg = MI.getOperand(1).getReg(); - unsigned Scratch2Reg = MI.getOperand(2).getReg(); - unsigned AddrReg = MI.getOperand(3).getReg(); - unsigned IncrReg = MI.getOperand(4).getReg(); - unsigned MaskReg = MI.getOperand(5).getReg(); + Register DestReg = MI.getOperand(0).getReg(); + Register Scratch1Reg = MI.getOperand(1).getReg(); + Register Scratch2Reg = MI.getOperand(2).getReg(); + Register AddrReg = MI.getOperand(3).getReg(); + Register IncrReg = MI.getOperand(4).getReg(); + Register MaskReg = MI.getOperand(5).getReg(); bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max; AtomicOrdering Ordering = static_cast(MI.getOperand(IsSigned ? 7 : 6).getImm()); @@ -549,11 +549,11 @@ bool RISCVExpandPseudo::expandAtomicCmpXchg( DoneMBB->transferSuccessors(&MBB); MBB.addSuccessor(LoopHeadMBB); - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned ScratchReg = MI.getOperand(1).getReg(); - unsigned AddrReg = MI.getOperand(2).getReg(); - unsigned CmpValReg = MI.getOperand(3).getReg(); - unsigned NewValReg = MI.getOperand(4).getReg(); + Register DestReg = MI.getOperand(0).getReg(); + Register ScratchReg = MI.getOperand(1).getReg(); + Register AddrReg = MI.getOperand(2).getReg(); + Register CmpValReg = MI.getOperand(3).getReg(); + Register NewValReg = MI.getOperand(4).getReg(); AtomicOrdering Ordering = static_cast(MI.getOperand(IsMasked ? 6 : 5).getImm()); @@ -582,7 +582,7 @@ bool RISCVExpandPseudo::expandAtomicCmpXchg( // lr.w dest, (addr) // and scratch, dest, mask // bne scratch, cmpval, done - unsigned MaskReg = MI.getOperand(5).getReg(); + Register MaskReg = MI.getOperand(5).getReg(); BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg) .addReg(AddrReg); BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg) @@ -629,7 +629,7 @@ bool RISCVExpandPseudo::expandAuipcInstPair( MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); - unsigned DestReg = MI.getOperand(0).getReg(); + Register DestReg = MI.getOperand(0).getReg(); const MachineOperand &Symbol = MI.getOperand(1); MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); diff --git a/lib/Target/RISCV/RISCVFrameLowering.cpp b/lib/Target/RISCV/RISCVFrameLowering.cpp index 32c3b9684d2..6b6f62e18ce 100644 --- a/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -40,8 +40,16 @@ void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const { uint64_t FrameSize = MFI.getStackSize(); // Get the alignment. - uint64_t StackAlign = RI->needsStackRealignment(MF) ? MFI.getMaxAlignment() - : getStackAlignment(); + unsigned StackAlign = getStackAlignment(); + if (RI->needsStackRealignment(MF)) { + unsigned MaxStackAlign = std::max(StackAlign, MFI.getMaxAlignment()); + FrameSize += (MaxStackAlign - StackAlign); + StackAlign = MaxStackAlign; + } + + // Set Max Call Frame Size + uint64_t MaxCallSize = alignTo(MFI.getMaxCallFrameSize(), StackAlign); + MFI.setMaxCallFrameSize(MaxCallSize); // Make sure the frame is aligned. FrameSize = alignTo(FrameSize, StackAlign); @@ -52,8 +60,8 @@ void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const { void RISCVFrameLowering::adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, unsigned DestReg, - unsigned SrcReg, int64_t Val, + const DebugLoc &DL, Register DestReg, + Register SrcReg, int64_t Val, MachineInstr::MIFlag Flag) const { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const RISCVInstrInfo *TII = STI.getInstrInfo(); @@ -66,7 +74,7 @@ void RISCVFrameLowering::adjustReg(MachineBasicBlock &MBB, .addReg(SrcReg) .addImm(Val) .setMIFlag(Flag); - } else if (isInt<32>(Val)) { + } else { unsigned Opc = RISCV::ADD; bool isSub = Val < 0; if (isSub) { @@ -74,22 +82,20 @@ void RISCVFrameLowering::adjustReg(MachineBasicBlock &MBB, Opc = RISCV::SUB; } - unsigned ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); - TII->movImm32(MBB, MBBI, DL, ScratchReg, Val, Flag); + Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + TII->movImm(MBB, MBBI, DL, ScratchReg, Val, Flag); BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) .addReg(SrcReg) .addReg(ScratchReg, RegState::Kill) .setMIFlag(Flag); - } else { - report_fatal_error("adjustReg cannot yet handle adjustments >32 bits"); } } // Returns the register used to hold the frame pointer. -static unsigned getFPReg(const RISCVSubtarget &STI) { return RISCV::X8; } +static Register getFPReg(const RISCVSubtarget &STI) { return RISCV::X8; } // Returns the register used to hold the stack pointer. -static unsigned getSPReg(const RISCVSubtarget &STI) { return RISCV::X2; } +static Register getSPReg(const RISCVSubtarget &STI) { return RISCV::X2; } void RISCVFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { @@ -101,8 +107,14 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, const RISCVInstrInfo *TII = STI.getInstrInfo(); MachineBasicBlock::iterator MBBI = MBB.begin(); - unsigned FPReg = getFPReg(STI); - unsigned SPReg = getSPReg(STI); + if (RI->needsStackRealignment(MF) && MFI.hasVarSizedObjects()) { + report_fatal_error( + "RISC-V backend can't currently handle functions that need stack " + "realignment and have variable sized objects"); + } + + Register FPReg = getFPReg(STI); + Register SPReg = getSPReg(STI); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. @@ -119,6 +131,11 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, if (StackSize == 0 && !MFI.adjustsStack()) return; + uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); + // Split the SP adjustment to reduce the offsets of callee saved spill. + if (FirstSPAdjustAmount) + StackSize = FirstSPAdjustAmount; + // Allocate space on the stack if necessary. adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup); @@ -141,7 +158,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, // directives. for (const auto &Entry : CSI) { int64_t Offset = MFI.getObjectOffset(Entry.getFrameIdx()); - unsigned Reg = Entry.getReg(); + Register Reg = Entry.getReg(); unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, RI->getDwarfRegNum(Reg, true), Offset)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) @@ -159,6 +176,45 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } + + // Emit the second SP adjustment after saving callee saved registers. + if (FirstSPAdjustAmount) { + uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount; + assert(SecondSPAdjustAmount > 0 && + "SecondSPAdjustAmount should be greater than zero"); + adjustReg(MBB, MBBI, DL, SPReg, SPReg, -SecondSPAdjustAmount, + MachineInstr::FrameSetup); + // Emit ".cfi_def_cfa_offset StackSize" + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize())); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + + if (hasFP(MF)) { + // Realign Stack + const RISCVRegisterInfo *RI = STI.getRegisterInfo(); + if (RI->needsStackRealignment(MF)) { + unsigned MaxAlignment = MFI.getMaxAlignment(); + + const RISCVInstrInfo *TII = STI.getInstrInfo(); + if (isInt<12>(-(int)MaxAlignment)) { + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ANDI), SPReg) + .addReg(SPReg) + .addImm(-(int)MaxAlignment); + } else { + unsigned ShiftAmount = countTrailingZeros(MaxAlignment); + Register VR = + MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(MBB, MBBI, DL, TII->get(RISCV::SRLI), VR) + .addReg(SPReg) + .addImm(ShiftAmount); + BuildMI(MBB, MBBI, DL, TII->get(RISCV::SLLI), SPReg) + .addReg(VR) + .addImm(ShiftAmount); + } + } + } } void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, @@ -169,8 +225,8 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, auto *RVFI = MF.getInfo(); DebugLoc DL = MBBI->getDebugLoc(); const RISCVInstrInfo *TII = STI.getInstrInfo(); - unsigned FPReg = getFPReg(STI); - unsigned SPReg = getSPReg(STI); + Register FPReg = getFPReg(STI); + Register SPReg = getSPReg(STI); // Skip to before the restores of callee-saved registers // FIXME: assumes exactly one instruction is used to restore each @@ -189,11 +245,29 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, MachineInstr::FrameDestroy); } + uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); + if (FirstSPAdjustAmount) { + uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount; + assert(SecondSPAdjustAmount > 0 && + "SecondSPAdjustAmount should be greater than zero"); + + adjustReg(MBB, LastFrameDestroy, DL, SPReg, SPReg, SecondSPAdjustAmount, + MachineInstr::FrameDestroy); + + // Emit ".cfi_def_cfa_offset FirstSPAdjustAmount" + unsigned CFIIndex = + MF.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, + -FirstSPAdjustAmount)); + BuildMI(MBB, LastFrameDestroy, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + if (hasFP(MF)) { // To find the instruction restoring FP from stack. for (auto &I = LastFrameDestroy; I != MBBI; ++I) { if (I->mayLoad() && I->getOperand(0).isReg()) { - unsigned DestReg = I->getOperand(0).getReg(); + Register DestReg = I->getOperand(0).getReg(); if (DestReg == FPReg) { // If there is frame pointer, after restoring $fp registers, we // need adjust CFA to ($sp - FPOffset). @@ -214,13 +288,16 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, // Iterate over list of callee-saved registers and emit .cfi_restore // directives. for (const auto &Entry : CSI) { - unsigned Reg = Entry.getReg(); + Register Reg = Entry.getReg(); unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore( nullptr, RI->getDwarfRegNum(Reg, true))); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } + if (FirstSPAdjustAmount) + StackSize = FirstSPAdjustAmount; + // Deallocate stack adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy); @@ -249,6 +326,8 @@ int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + MFI.getOffsetAdjustment(); + uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); + if (CSI.size()) { MinCSFI = CSI[0].getFrameIdx(); MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); @@ -256,6 +335,17 @@ int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, if (FI >= MinCSFI && FI <= MaxCSFI) { FrameReg = RISCV::X2; + + if (FirstSPAdjustAmount) + Offset += FirstSPAdjustAmount; + else + Offset += MF.getFrameInfo().getStackSize(); + } else if (RI->needsStackRealignment(MF)) { + assert(!MFI.hasVarSizedObjects() && + "Unexpected combination of stack realignment and varsized objects"); + // If the stack was realigned, the frame pointer is set in order to allow + // SP to be restored, but we still access stack objects using SP. + FrameReg = RISCV::X2; Offset += MF.getFrameInfo().getStackSize(); } else { FrameReg = RI->getFrameRegister(MF); @@ -338,7 +428,7 @@ bool RISCVFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { - unsigned SPReg = RISCV::X2; + Register SPReg = RISCV::X2; DebugLoc DL = MI->getDebugLoc(); if (!hasReservedCallFrame(MF)) { @@ -362,3 +452,39 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr( return MBB.erase(MI); } + +// We would like to split the SP adjustment to reduce prologue/epilogue +// as following instructions. In this way, the offset of the callee saved +// register could fit in a single store. +// add sp,sp,-2032 +// sw ra,2028(sp) +// sw s0,2024(sp) +// sw s1,2020(sp) +// sw s3,2012(sp) +// sw s4,2008(sp) +// add sp,sp,-64 +uint64_t +RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const std::vector &CSI = MFI.getCalleeSavedInfo(); + uint64_t StackSize = MFI.getStackSize(); + uint64_t StackAlign = getStackAlignment(); + + // FIXME: Disable SplitSPAdjust if save-restore libcall enabled when the patch + // landing. The callee saved registers will be pushed by the + // save-restore libcalls, so we don't have to split the SP adjustment + // in this case. + // + // Return the FirstSPAdjustAmount if the StackSize can not fit in signed + // 12-bit and there exists a callee saved register need to be pushed. + if (!isInt<12>(StackSize) && (CSI.size() > 0)) { + // FirstSPAdjustAmount is choosed as (2048 - StackAlign) + // because 2048 will cause sp = sp + 2048 in epilogue split into + // multi-instructions. The offset smaller than 2048 can fit in signle + // load/store instruction and we have to stick with the stack alignment. + // 2048 is 16-byte alignment. The stack alignment for RV32 and RV64 is 16, + // for RV32E is 4. So (2048 - StackAlign) will satisfy the stack alignment. + return 2048 - StackAlign; + } + return 0; +} diff --git a/lib/Target/RISCV/RISCVFrameLowering.h b/lib/Target/RISCV/RISCVFrameLowering.h index 0e045c3ff85..f4a5949773d 100644 --- a/lib/Target/RISCV/RISCVFrameLowering.h +++ b/lib/Target/RISCV/RISCVFrameLowering.h @@ -22,7 +22,7 @@ class RISCVFrameLowering : public TargetFrameLowering { public: explicit RISCVFrameLowering(const RISCVSubtarget &STI) : TargetFrameLowering(StackGrowsDown, - /*StackAlignment=*/16, + /*StackAlignment=*/Align(16), /*LocalAreaOffset=*/0), STI(STI) {} @@ -45,13 +45,18 @@ public: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + // Get the first stack adjustment amount for SplitSPAdjust. + // Return 0 if we don't want to to split the SP adjustment in prologue and + // epilogue. + uint64_t getFirstSPAdjustAmount(const MachineFunction &MF) const; + protected: const RISCVSubtarget &STI; private: void determineFrameLayout(MachineFunction &MF) const; void adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, + const DebugLoc &DL, Register DestReg, Register SrcReg, int64_t Val, MachineInstr::MIFlag Flag) const; }; } diff --git a/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index d0a3af375a6..1a12d9177d2 100644 --- a/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -68,7 +68,7 @@ static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm, RISCVMatInt::InstSeq Seq; RISCVMatInt::generateInstSeq(Imm, XLenVT == MVT::i64, Seq); - SDNode *Result; + SDNode *Result = nullptr; SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT); for (RISCVMatInt::Inst &Inst : Seq) { SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT); @@ -179,6 +179,9 @@ bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( // operand and need no special handling. OutOps.push_back(Op); return false; + case InlineAsm::Constraint_A: + OutOps.push_back(Op); + return false; default: break; } diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp index ce7b85911ab..dc829fce901 100644 --- a/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/lib/Target/RISCV/RISCVISelLowering.cpp @@ -100,6 +100,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); if (Subtarget.is64Bit()) { + setOperationAction(ISD::ADD, MVT::i32, Custom); + setOperationAction(ISD::SUB, MVT::i32, Custom); setOperationAction(ISD::SHL, MVT::i32, Custom); setOperationAction(ISD::SRA, MVT::i32, Custom); setOperationAction(ISD::SRL, MVT::i32, Custom); @@ -116,6 +118,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { + setOperationAction(ISD::MUL, MVT::i32, Custom); setOperationAction(ISD::SDIV, MVT::i32, Custom); setOperationAction(ISD::UDIV, MVT::i32, Custom); setOperationAction(ISD::UREM, MVT::i32, Custom); @@ -194,8 +197,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setBooleanContents(ZeroOrOneBooleanContent); - // Function alignments (log2). - unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2; + // Function alignments. + const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4); setMinFunctionAlignment(FunctionAlignment); setPrefFunctionAlignment(FunctionAlignment); @@ -231,7 +234,7 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = 4; + Info.align = Align(4); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; @@ -660,7 +663,7 @@ SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setFrameAddressIsTaken(true); - unsigned FrameReg = RI.getFrameRegister(MF); + Register FrameReg = RI.getFrameRegister(MF); int XLenInBytes = Subtarget.getXLen() / 8; EVT VT = Op.getValueType(); @@ -703,7 +706,7 @@ SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, // Return the value of the return address register, marking it an implicit // live-in. - unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); + Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); } @@ -834,6 +837,18 @@ static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); } +// Converts the given 32-bit operation to a i64 operation with signed extension +// semantic to reduce the signed extension instructions. +static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); + SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); + SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); + SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp, + DAG.getValueType(MVT::i32)); + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); +} + void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { @@ -854,6 +869,15 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(RCW.getValue(2)); break; } + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + if (N->getOperand(1).getOpcode() == ISD::Constant) + return; + Results.push_back(customLegalizeToWOpWithSExt(N, DAG)); + break; case ISD::SHL: case ISD::SRA: case ISD::SRL: @@ -1007,12 +1031,14 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift( // We can materialise `c1 << c2` into an add immediate, so it's "free", // and the combine should happen, to potentially allow further combines // later. - if (isLegalAddImmediate(ShiftedC1Int.getSExtValue())) + if (ShiftedC1Int.getMinSignedBits() <= 64 && + isLegalAddImmediate(ShiftedC1Int.getSExtValue())) return true; // We can materialise `c1` in an add immediate, so it's "free", and the // combine should be prevented. - if (isLegalAddImmediate(C1Int.getSExtValue())) + if (C1Int.getMinSignedBits() <= 64 && + isLegalAddImmediate(C1Int.getSExtValue())) return false; // Neither constant will fit into an immediate, so find materialisation @@ -1052,8 +1078,8 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( return 1; } -MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, - MachineBasicBlock *BB) { +static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, + MachineBasicBlock *BB) { assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. @@ -1085,9 +1111,9 @@ MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, BB->addSuccessor(LoopMBB); MachineRegisterInfo &RegInfo = MF.getRegInfo(); - unsigned ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); - unsigned LoReg = MI.getOperand(0).getReg(); - unsigned HiReg = MI.getOperand(1).getReg(); + Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); + Register LoReg = MI.getOperand(0).getReg(); + Register HiReg = MI.getOperand(1).getReg(); DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); @@ -1122,9 +1148,9 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); - unsigned LoReg = MI.getOperand(0).getReg(); - unsigned HiReg = MI.getOperand(1).getReg(); - unsigned SrcReg = MI.getOperand(2).getReg(); + Register LoReg = MI.getOperand(0).getReg(); + Register HiReg = MI.getOperand(1).getReg(); + Register SrcReg = MI.getOperand(2).getReg(); const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; int FI = MF.getInfo()->getMoveF64FrameIndex(); @@ -1154,9 +1180,9 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned LoReg = MI.getOperand(1).getReg(); - unsigned HiReg = MI.getOperand(2).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register LoReg = MI.getOperand(1).getReg(); + Register HiReg = MI.getOperand(2).getReg(); const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; int FI = MF.getInfo()->getMoveF64FrameIndex(); @@ -1215,12 +1241,12 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, // previous selects in the sequence. // These conditions could be further relaxed. See the X86 target for a // related approach and more information. - unsigned LHS = MI.getOperand(1).getReg(); - unsigned RHS = MI.getOperand(2).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); auto CC = static_cast(MI.getOperand(3).getImm()); SmallVector SelectDebugValues; - SmallSet SelectDests; + SmallSet SelectDests; SelectDests.insert(MI.getOperand(0).getReg()); MachineInstr *LastSelectPseudo = &MI; @@ -1363,12 +1389,12 @@ static const MCPhysReg ArgGPRs[] = { RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 }; static const MCPhysReg ArgFPR32s[] = { - RISCV::F10_32, RISCV::F11_32, RISCV::F12_32, RISCV::F13_32, - RISCV::F14_32, RISCV::F15_32, RISCV::F16_32, RISCV::F17_32 + RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, + RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F }; static const MCPhysReg ArgFPR64s[] = { - RISCV::F10_64, RISCV::F11_64, RISCV::F12_64, RISCV::F13_64, - RISCV::F14_64, RISCV::F15_64, RISCV::F16_64, RISCV::F17_64 + RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, + RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D }; // Pass a 2*XLEN argument that has been split into two XLEN values through @@ -1378,7 +1404,7 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2) { unsigned XLenInBytes = XLen / 8; - if (unsigned Reg = State.AllocateReg(ArgGPRs)) { + if (Register Reg = State.AllocateReg(ArgGPRs)) { // At least one half can be passed via register. State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, VA1.getLocVT(), CCValAssign::Full)); @@ -1395,7 +1421,7 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, return false; } - if (unsigned Reg = State.AllocateReg(ArgGPRs)) { + if (Register Reg = State.AllocateReg(ArgGPRs)) { // The second half can also be passed via register. State.addLoc( CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); @@ -1495,7 +1521,7 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, // GPRs, split between a GPR and the stack, or passed completely on the // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these // cases. - unsigned Reg = State.AllocateReg(ArgGPRs); + Register Reg = State.AllocateReg(ArgGPRs); LocVT = MVT::i32; if (!Reg) { unsigned StackOffset = State.AllocateStack(8, 8); @@ -1537,7 +1563,7 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, } // Allocate to a register if possible, or else a stack slot. - unsigned Reg; + Register Reg; if (ValVT == MVT::f32 && !UseGPRForF32) Reg = State.AllocateReg(ArgFPR32s, ArgFPR64s); else if (ValVT == MVT::f64 && !UseGPRForF64) @@ -1673,7 +1699,7 @@ static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, break; } - unsigned VReg = RegInfo.createVirtualRegister(RC); + Register VReg = RegInfo.createVirtualRegister(RC); RegInfo.addLiveIn(VA.getLocReg(), VReg); Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); @@ -1751,7 +1777,7 @@ static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, assert(VA.isRegLoc() && "Expected register VA assignment"); - unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); + Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); RegInfo.addLiveIn(VA.getLocReg(), LoVReg); SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); SDValue Hi; @@ -1763,13 +1789,70 @@ static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, MachinePointerInfo::getFixedStack(MF, FI)); } else { // Second half of f64 is passed in another GPR. - unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); + Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); } return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); } +// FastCC has less than 1% performance improvement for some particular +// benchmark. But theoretically, it may has benenfit for some cases. +static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + if (LocVT == MVT::i32 || LocVT == MVT::i64) { + // X5 and X6 might be used for save-restore libcall. + static const MCPhysReg GPRList[] = { + RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, + RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, + RISCV::X29, RISCV::X30, RISCV::X31}; + if (unsigned Reg = State.AllocateReg(GPRList)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + if (LocVT == MVT::f32) { + static const MCPhysReg FPR32List[] = { + RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, + RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F, + RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F, + RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F}; + if (unsigned Reg = State.AllocateReg(FPR32List)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + if (LocVT == MVT::f64) { + static const MCPhysReg FPR64List[] = { + RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D, + RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D, + RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D, + RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D}; + if (unsigned Reg = State.AllocateReg(FPR64List)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + if (LocVT == MVT::i32 || LocVT == MVT::f32) { + unsigned Offset4 = State.AllocateStack(4, 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo)); + return false; + } + + if (LocVT == MVT::i64 || LocVT == MVT::f64) { + unsigned Offset5 = State.AllocateStack(8, 8); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo)); + return false; + } + + return true; // CC didn't match. +} + // Transform physical registers into virtual registers. SDValue RISCVTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, @@ -1809,7 +1892,11 @@ SDValue RISCVTargetLowering::LowerFormalArguments( // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); + + if (CallConv == CallingConv::Fast) + CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); + else + analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -1877,8 +1964,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments( // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures // offsets to even-numbered registered remain 2*XLEN-aligned. if (Idx % 2) { - FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, - true); + MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true); VarArgsSaveSize += XLenInBytes; } @@ -1886,7 +1972,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments( // to the vararg save area. for (unsigned I = Idx; I < ArgRegs.size(); ++I, VaArgOffset += XLenInBytes) { - const unsigned Reg = RegInfo.createVirtualRegister(RC); + const Register Reg = RegInfo.createVirtualRegister(RC); RegInfo.addLiveIn(ArgRegs[I], Reg); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); @@ -1920,7 +2006,6 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization( auto &Callee = CLI.Callee; auto CalleeCC = CLI.CallConv; - auto IsVarArg = CLI.IsVarArg; auto &Outs = CLI.Outs; auto &Caller = MF.getFunction(); auto CallerCC = Caller.getCallingConv(); @@ -1937,10 +2022,6 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization( if (Caller.hasFnAttribute("interrupt")) return false; - // Do not tail call opt functions with varargs. - if (IsVarArg) - return false; - // Do not tail call opt if the stack is used to pass parameters. if (CCInfo.getNextStackOffset() != 0) return false; @@ -2015,7 +2096,11 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, // Analyze the operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); + + if (CallConv == CallingConv::Fast) + ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); + else + analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); // Check if it's really possible to do a tail call. if (IsTailCall) @@ -2057,7 +2142,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); // Copy argument values to their designated locations. - SmallVector, 8> RegsToPass; + SmallVector, 8> RegsToPass; SmallVector MemOpChains; SDValue StackPtr; for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { @@ -2074,7 +2159,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue Lo = SplitF64.getValue(0); SDValue Hi = SplitF64.getValue(1); - unsigned RegLo = VA.getLocReg(); + Register RegLo = VA.getLocReg(); RegsToPass.push_back(std::make_pair(RegLo, Lo)); if (RegLo == RISCV::X17) { @@ -2087,7 +2172,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); } else { // Second half of f64 is passed in another GPR. - unsigned RegHigh = RegLo + 1; + assert(RegLo < RISCV::X31 && "Invalid register pair"); + Register RegHigh = RegLo + 1; RegsToPass.push_back(std::make_pair(RegHigh, Hi)); } continue; @@ -2302,8 +2388,9 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, DAG.getVTList(MVT::i32, MVT::i32), Val); SDValue Lo = SplitF64.getValue(0); SDValue Hi = SplitF64.getValue(1); - unsigned RegLo = VA.getLocReg(); - unsigned RegHi = RegLo + 1; + Register RegLo = VA.getLocReg(); + assert(RegLo < RISCV::X31 && "Invalid register pair"); + Register RegHi = RegLo + 1; Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); Glue = Chain.getValue(1); RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); @@ -2397,6 +2484,27 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; } +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +RISCVTargetLowering::ConstraintType +RISCVTargetLowering::getConstraintType(StringRef Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: + break; + case 'f': + return C_RegisterClass; + case 'I': + case 'J': + case 'K': + return C_Immediate; + case 'A': + return C_Memory; + } + } + return TargetLowering::getConstraintType(Constraint); +} + std::pair RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, @@ -2407,14 +2515,125 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, switch (Constraint[0]) { case 'r': return std::make_pair(0U, &RISCV::GPRRegClass); + case 'f': + if (Subtarget.hasStdExtF() && VT == MVT::f32) + return std::make_pair(0U, &RISCV::FPR32RegClass); + if (Subtarget.hasStdExtD() && VT == MVT::f64) + return std::make_pair(0U, &RISCV::FPR64RegClass); + break; default: break; } } + // Clang will correctly decode the usage of register name aliases into their + // official names. However, other frontends like `rustc` do not. This allows + // users of these frontends to use the ABI names for registers in LLVM-style + // register constraints. + Register XRegFromAlias = StringSwitch(Constraint.lower()) + .Case("{zero}", RISCV::X0) + .Case("{ra}", RISCV::X1) + .Case("{sp}", RISCV::X2) + .Case("{gp}", RISCV::X3) + .Case("{tp}", RISCV::X4) + .Case("{t0}", RISCV::X5) + .Case("{t1}", RISCV::X6) + .Case("{t2}", RISCV::X7) + .Cases("{s0}", "{fp}", RISCV::X8) + .Case("{s1}", RISCV::X9) + .Case("{a0}", RISCV::X10) + .Case("{a1}", RISCV::X11) + .Case("{a2}", RISCV::X12) + .Case("{a3}", RISCV::X13) + .Case("{a4}", RISCV::X14) + .Case("{a5}", RISCV::X15) + .Case("{a6}", RISCV::X16) + .Case("{a7}", RISCV::X17) + .Case("{s2}", RISCV::X18) + .Case("{s3}", RISCV::X19) + .Case("{s4}", RISCV::X20) + .Case("{s5}", RISCV::X21) + .Case("{s6}", RISCV::X22) + .Case("{s7}", RISCV::X23) + .Case("{s8}", RISCV::X24) + .Case("{s9}", RISCV::X25) + .Case("{s10}", RISCV::X26) + .Case("{s11}", RISCV::X27) + .Case("{t3}", RISCV::X28) + .Case("{t4}", RISCV::X29) + .Case("{t5}", RISCV::X30) + .Case("{t6}", RISCV::X31) + .Default(RISCV::NoRegister); + if (XRegFromAlias != RISCV::NoRegister) + return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass); + + // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the + // TableGen record rather than the AsmName to choose registers for InlineAsm + // constraints, plus we want to match those names to the widest floating point + // register type available, manually select floating point registers here. + // + // The second case is the ABI name of the register, so that frontends can also + // use the ABI names in register constraint lists. + if (Subtarget.hasStdExtF() || Subtarget.hasStdExtD()) { + std::pair FReg = + StringSwitch>(Constraint.lower()) + .Cases("{f0}", "{ft0}", {RISCV::F0_F, RISCV::F0_D}) + .Cases("{f1}", "{ft1}", {RISCV::F1_F, RISCV::F1_D}) + .Cases("{f2}", "{ft2}", {RISCV::F2_F, RISCV::F2_D}) + .Cases("{f3}", "{ft3}", {RISCV::F3_F, RISCV::F3_D}) + .Cases("{f4}", "{ft4}", {RISCV::F4_F, RISCV::F4_D}) + .Cases("{f5}", "{ft5}", {RISCV::F5_F, RISCV::F5_D}) + .Cases("{f6}", "{ft6}", {RISCV::F6_F, RISCV::F6_D}) + .Cases("{f7}", "{ft7}", {RISCV::F7_F, RISCV::F7_D}) + .Cases("{f8}", "{fs0}", {RISCV::F8_F, RISCV::F8_D}) + .Cases("{f9}", "{fs1}", {RISCV::F9_F, RISCV::F9_D}) + .Cases("{f10}", "{fa0}", {RISCV::F10_F, RISCV::F10_D}) + .Cases("{f11}", "{fa1}", {RISCV::F11_F, RISCV::F11_D}) + .Cases("{f12}", "{fa2}", {RISCV::F12_F, RISCV::F12_D}) + .Cases("{f13}", "{fa3}", {RISCV::F13_F, RISCV::F13_D}) + .Cases("{f14}", "{fa4}", {RISCV::F14_F, RISCV::F14_D}) + .Cases("{f15}", "{fa5}", {RISCV::F15_F, RISCV::F15_D}) + .Cases("{f16}", "{fa6}", {RISCV::F16_F, RISCV::F16_D}) + .Cases("{f17}", "{fa7}", {RISCV::F17_F, RISCV::F17_D}) + .Cases("{f18}", "{fs2}", {RISCV::F18_F, RISCV::F18_D}) + .Cases("{f19}", "{fs3}", {RISCV::F19_F, RISCV::F19_D}) + .Cases("{f20}", "{fs4}", {RISCV::F20_F, RISCV::F20_D}) + .Cases("{f21}", "{fs5}", {RISCV::F21_F, RISCV::F21_D}) + .Cases("{f22}", "{fs6}", {RISCV::F22_F, RISCV::F22_D}) + .Cases("{f23}", "{fs7}", {RISCV::F23_F, RISCV::F23_D}) + .Cases("{f24}", "{fs8}", {RISCV::F24_F, RISCV::F24_D}) + .Cases("{f25}", "{fs9}", {RISCV::F25_F, RISCV::F25_D}) + .Cases("{f26}", "{fs10}", {RISCV::F26_F, RISCV::F26_D}) + .Cases("{f27}", "{fs11}", {RISCV::F27_F, RISCV::F27_D}) + .Cases("{f28}", "{ft8}", {RISCV::F28_F, RISCV::F28_D}) + .Cases("{f29}", "{ft9}", {RISCV::F29_F, RISCV::F29_D}) + .Cases("{f30}", "{ft10}", {RISCV::F30_F, RISCV::F30_D}) + .Cases("{f31}", "{ft11}", {RISCV::F31_F, RISCV::F31_D}) + .Default({RISCV::NoRegister, RISCV::NoRegister}); + if (FReg.first != RISCV::NoRegister) + return Subtarget.hasStdExtD() + ? std::make_pair(FReg.second, &RISCV::FPR64RegClass) + : std::make_pair(FReg.first, &RISCV::FPR32RegClass); + } + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } +unsigned +RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { + // Currently only support length 1 constraints. + if (ConstraintCode.size() == 1) { + switch (ConstraintCode[0]) { + case 'A': + return InlineAsm::Constraint_A; + default: + break; + } + } + + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); +} + void RISCVTargetLowering::LowerAsmOperandForConstraint( SDValue Op, std::string &Constraint, std::vector &Ops, SelectionDAG &DAG) const { @@ -2619,3 +2838,13 @@ unsigned RISCVTargetLowering::getExceptionSelectorRegister( const Constant *PersonalityFn) const { return RISCV::X11; } + +bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { + // Return false to suppress the unnecessary extensions if the LibCall + // arguments or return value is f32 type for LP64 ABI. + RISCVABI::ABI ABI = Subtarget.getTargetABI(); + if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) + return false; + + return true; +} diff --git a/lib/Target/RISCV/RISCVISelLowering.h b/lib/Target/RISCV/RISCVISelLowering.h index 17db03bbb69..18fc7350bbb 100644 --- a/lib/Target/RISCV/RISCVISelLowering.h +++ b/lib/Target/RISCV/RISCVISelLowering.h @@ -92,6 +92,10 @@ public: // This method returns the name of a target specific DAG node. const char *getTargetNodeName(unsigned Opcode) const override; + ConstraintType getConstraintType(StringRef Constraint) const override; + + unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override; + std::pair getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; @@ -141,6 +145,8 @@ public: unsigned getExceptionSelectorRegister(const Constant *PersonalityFn) const override; + bool shouldExtendTypeInLibCall(EVT Type) const override; + private: void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, const SmallVectorImpl &Ins, diff --git a/lib/Target/RISCV/RISCVInstrInfo.cpp b/lib/Target/RISCV/RISCVInstrInfo.cpp index 99c8d2ef73d..08483929953 100644 --- a/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -14,6 +14,7 @@ #include "RISCV.h" #include "RISCVSubtarget.h" #include "RISCVTargetMachine.h" +#include "Utils/RISCVMatInt.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -28,8 +29,9 @@ using namespace llvm; -RISCVInstrInfo::RISCVInstrInfo() - : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP) {} +RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI) + : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP), + STI(STI) {} unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const { @@ -156,24 +158,43 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, get(Opcode), DstReg).addFrameIndex(FI).addImm(0); } -void RISCVInstrInfo::movImm32(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, unsigned DstReg, uint64_t Val, - MachineInstr::MIFlag Flag) const { - assert(isInt<32>(Val) && "Can only materialize 32-bit constants"); +void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register DstReg, uint64_t Val, + MachineInstr::MIFlag Flag) const { + MachineFunction *MF = MBB.getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + bool IsRV64 = MF->getSubtarget().is64Bit(); + Register SrcReg = RISCV::X0; + Register Result = MRI.createVirtualRegister(&RISCV::GPRRegClass); + unsigned Num = 0; - // TODO: If the value can be materialized using only one instruction, only - // insert a single instruction. + if (!IsRV64 && !isInt<32>(Val)) + report_fatal_error("Should only materialize 32-bit constants for RV32"); - uint64_t Hi20 = ((Val + 0x800) >> 12) & 0xfffff; - uint64_t Lo12 = SignExtend64<12>(Val); - BuildMI(MBB, MBBI, DL, get(RISCV::LUI), DstReg) - .addImm(Hi20) - .setMIFlag(Flag); - BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg) - .addReg(DstReg, RegState::Kill) - .addImm(Lo12) - .setMIFlag(Flag); + RISCVMatInt::InstSeq Seq; + RISCVMatInt::generateInstSeq(Val, IsRV64, Seq); + assert(Seq.size() > 0); + + for (RISCVMatInt::Inst &Inst : Seq) { + // Write the final result to DstReg if it's the last instruction in the Seq. + // Otherwise, write the result to the temp register. + if (++Num == Seq.size()) + Result = DstReg; + + if (Inst.Opc == RISCV::LUI) { + BuildMI(MBB, MBBI, DL, get(RISCV::LUI), Result) + .addImm(Inst.Imm) + .setMIFlag(Flag); + } else { + BuildMI(MBB, MBBI, DL, get(Inst.Opc), Result) + .addReg(SrcReg, RegState::Kill) + .addImm(Inst.Imm) + .setMIFlag(Flag); + } + // Only the first instruction has X0 as its source. + SrcReg = Result; + } } // The contents of values added to Cond are not examined outside of @@ -372,7 +393,7 @@ unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, // FIXME: A virtual register must be used initially, as the register // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch // uses the same workaround). - unsigned ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); auto II = MBB.end(); MachineInstr &LuiMI = *BuildMI(MBB, II, DL, get(RISCV::LUI), ScratchReg) @@ -466,3 +487,58 @@ bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { } return MI.isAsCheapAsAMove(); } + +bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, + StringRef &ErrInfo) const { + const MCInstrInfo *MCII = STI.getInstrInfo(); + MCInstrDesc const &Desc = MCII->get(MI.getOpcode()); + + for (auto &OI : enumerate(Desc.operands())) { + unsigned OpType = OI.value().OperandType; + if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM && + OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) { + const MachineOperand &MO = MI.getOperand(OI.index()); + if (MO.isImm()) { + int64_t Imm = MO.getImm(); + bool Ok; + switch (OpType) { + default: + llvm_unreachable("Unexpected operand type"); + case RISCVOp::OPERAND_UIMM4: + Ok = isUInt<4>(Imm); + break; + case RISCVOp::OPERAND_UIMM5: + Ok = isUInt<5>(Imm); + break; + case RISCVOp::OPERAND_UIMM12: + Ok = isUInt<12>(Imm); + break; + case RISCVOp::OPERAND_SIMM12: + Ok = isInt<12>(Imm); + break; + case RISCVOp::OPERAND_SIMM13_LSB0: + Ok = isShiftedInt<12, 1>(Imm); + break; + case RISCVOp::OPERAND_UIMM20: + Ok = isUInt<20>(Imm); + break; + case RISCVOp::OPERAND_SIMM21_LSB0: + Ok = isShiftedInt<20, 1>(Imm); + break; + case RISCVOp::OPERAND_UIMMLOG2XLEN: + if (STI.getTargetTriple().isArch64Bit()) + Ok = isUInt<6>(Imm); + else + Ok = isUInt<5>(Imm); + break; + } + if (!Ok) { + ErrInfo = "Invalid immediate"; + return false; + } + } + } + } + + return true; +} diff --git a/lib/Target/RISCV/RISCVInstrInfo.h b/lib/Target/RISCV/RISCVInstrInfo.h index ff098e660d1..d3ae04aefe0 100644 --- a/lib/Target/RISCV/RISCVInstrInfo.h +++ b/lib/Target/RISCV/RISCVInstrInfo.h @@ -21,10 +21,12 @@ namespace llvm { +class RISCVSubtarget; + class RISCVInstrInfo : public RISCVGenInstrInfo { public: - RISCVInstrInfo(); + explicit RISCVInstrInfo(RISCVSubtarget &STI); unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; @@ -46,10 +48,10 @@ public: int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; - // Materializes the given int32 Val into DstReg. - void movImm32(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, unsigned DstReg, uint64_t Val, - MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const; + // Materializes the given integer Val into DstReg. + void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register DstReg, uint64_t Val, + MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const; unsigned getInstSizeInBytes(const MachineInstr &MI) const override; @@ -80,6 +82,12 @@ public: int64_t BrOffset) const override; bool isAsCheapAsAMove(const MachineInstr &MI) const override; + + bool verifyInstruction(const MachineInstr &MI, + StringRef &ErrInfo) const override; + +protected: + const RISCVSubtarget &STI; }; } #endif diff --git a/lib/Target/RISCV/RISCVInstrInfo.td b/lib/Target/RISCV/RISCVInstrInfo.td index 69bde15f121..db2ecc49d14 100644 --- a/lib/Target/RISCV/RISCVInstrInfo.td +++ b/lib/Target/RISCV/RISCVInstrInfo.td @@ -69,6 +69,12 @@ class ImmAsmOperand : AsmOperandClass { let DiagnosticType = !strconcat("Invalid", Name); } +def ImmZeroAsmOperand : AsmOperandClass { + let Name = "ImmZero"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = !strconcat("Invalid", Name); +} + class SImmAsmOperand : ImmAsmOperand<"S", width, suffix> { } @@ -87,6 +93,8 @@ def fencearg : Operand { let ParserMatchClass = FenceArg; let PrintMethod = "printFenceArg"; let DecoderMethod = "decodeUImmOperand<4>"; + let OperandType = "OPERAND_UIMM4"; + let OperandNamespace = "RISCVOp"; } def UImmLog2XLenAsmOperand : AsmOperandClass { @@ -111,11 +119,15 @@ def uimmlog2xlen : Operand, ImmLeaf(Imm); return isUInt<5>(Imm); }]; + let OperandType = "OPERAND_UIMMLOG2XLEN"; + let OperandNamespace = "RISCVOp"; } def uimm5 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = UImmAsmOperand<5>; let DecoderMethod = "decodeUImmOperand<5>"; + let OperandType = "OPERAND_UIMM5"; + let OperandNamespace = "RISCVOp"; } def simm12 : Operand, ImmLeaf(Imm);}]> { @@ -128,6 +140,8 @@ def simm12 : Operand, ImmLeaf(Imm);}]> { return isInt<12>(Imm); return MCOp.isBareSymbolRef(); }]; + let OperandType = "OPERAND_SIMM12"; + let OperandNamespace = "RISCVOp"; } // A 13-bit signed immediate where the least significant bit is zero. @@ -141,6 +155,8 @@ def simm13_lsb0 : Operand { return isShiftedInt<12, 1>(Imm); return MCOp.isBareSymbolRef(); }]; + let OperandType = "OPERAND_SIMM13_LSB0"; + let OperandNamespace = "RISCVOp"; } class UImm20Operand : Operand { @@ -152,6 +168,8 @@ class UImm20Operand : Operand { return isUInt<20>(Imm); return MCOp.isBareSymbolRef(); }]; + let OperandType = "OPERAND_UIMM20"; + let OperandNamespace = "RISCVOp"; } def uimm20_lui : UImm20Operand { @@ -176,6 +194,8 @@ def simm21_lsb0_jal : Operand { return isShiftedInt<20, 1>(Imm); return MCOp.isBareSymbolRef(); }]; + let OperandType = "OPERAND_SIMM21_LSB0"; + let OperandNamespace = "RISCVOp"; } def BareSymbol : AsmOperandClass { @@ -224,6 +244,8 @@ def csr_sysreg : Operand { let ParserMatchClass = CSRSystemRegister; let PrintMethod = "printCSRSystemRegister"; let DecoderMethod = "decodeUImmOperand<12>"; + let OperandType = "OPERAND_UIMM12"; + let OperandNamespace = "RISCVOp"; } // A parameterized register class alternative to i32imm/i64imm from Target.td. diff --git a/lib/Target/RISCV/RISCVInstrInfoA.td b/lib/Target/RISCV/RISCVInstrInfoA.td index b768c9347b3..38ba3f9fb24 100644 --- a/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/lib/Target/RISCV/RISCVInstrInfoA.td @@ -11,6 +11,24 @@ // //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Operand and SDNode transformation definitions. +//===----------------------------------------------------------------------===// + +// A parse method for (${gpr}) or 0(${gpr}), where the 0 is be silently ignored. +// Used for GNU as Compatibility. +def AtomicMemOpOperand : AsmOperandClass { + let Name = "AtomicMemOpOperand"; + let RenderMethod = "addRegOperands"; + let PredicateMethod = "isReg"; + let ParserMethod = "parseAtomicMemOp"; +} + +def GPRMemAtomic : RegisterOperand { + let ParserMatchClass = AtomicMemOpOperand; + let PrintMethod = "printAtomicMemOp"; +} + //===----------------------------------------------------------------------===// // Instruction class templates //===----------------------------------------------------------------------===// @@ -18,8 +36,8 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in class LR_r funct3, string opcodestr> : RVInstRAtomic<0b00010, aq, rl, funct3, OPC_AMO, - (outs GPR:$rd), (ins GPR:$rs1), - opcodestr, "$rd, (${rs1})"> { + (outs GPR:$rd), (ins GPRMemAtomic:$rs1), + opcodestr, "$rd, $rs1"> { let rs2 = 0; } @@ -33,8 +51,8 @@ multiclass LR_r_aq_rl funct3, string opcodestr> { let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in class AMO_rr funct5, bit aq, bit rl, bits<3> funct3, string opcodestr> : RVInstRAtomic; + (outs GPR:$rd), (ins GPRMemAtomic:$rs1, GPR:$rs2), + opcodestr, "$rd, $rs2, $rs1">; multiclass AMO_rr_aq_rl funct5, bits<3> funct3, string opcodestr> { def "" : AMO_rr; @@ -196,12 +214,12 @@ class PseudoMaskedAMOUMinUMax } class PseudoMaskedAMOPat - : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering), + : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering), (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering)>; class PseudoMaskedAMOMinMaxPat : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, - imm:$ordering), + timm:$ordering), (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, imm:$ordering)>; @@ -270,7 +288,7 @@ def PseudoMaskedCmpXchg32 } def : Pat<(int_riscv_masked_cmpxchg_i32 - GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering), + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), (PseudoMaskedCmpXchg32 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>; @@ -347,7 +365,7 @@ def PseudoCmpXchg64 : PseudoCmpXchg; defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64>; def : Pat<(int_riscv_masked_cmpxchg_i64 - GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering), + GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), (PseudoMaskedCmpXchg32 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>; } // Predicates = [HasStdExtA, IsRV64] diff --git a/lib/Target/RISCV/RISCVInstrInfoC.td b/lib/Target/RISCV/RISCVInstrInfoC.td index 94477341eea..fa0050f107b 100644 --- a/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/lib/Target/RISCV/RISCVInstrInfoC.td @@ -61,6 +61,11 @@ def simm6nonzero : Operand, }]; } +def immzero : Operand, + ImmLeaf { + let ParserMatchClass = ImmZeroAsmOperand; +} + def CLUIImmAsmOperand : AsmOperandClass { let Name = "CLUIImm"; let RenderMethod = "addImmOperands"; @@ -132,7 +137,8 @@ def uimm8_lsb000 : Operand, } // A 9-bit signed immediate where the least significant bit is zero. -def simm9_lsb0 : Operand { +def simm9_lsb0 : Operand, + ImmLeaf(Imm);}]> { let ParserMatchClass = SImmAsmOperand<9, "Lsb0">; let EncoderMethod = "getImmOpValueAsr1"; let DecoderMethod = "decodeSImmOperandAndLsl1<9>"; @@ -191,7 +197,8 @@ def simm10_lsb0000nonzero : Operand, } // A 12-bit signed immediate where the least significant bit is zero. -def simm12_lsb0 : Operand { +def simm12_lsb0 : Operand, + ImmLeaf(Imm);}]> { let ParserMatchClass = SImmAsmOperand<12, "Lsb0">; let EncoderMethod = "getImmOpValueAsr1"; let DecoderMethod = "decodeSImmOperandAndLsl1<12>"; @@ -344,7 +351,10 @@ def C_SD : CStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000> { } let rd = 0, imm = 0, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", "">; +def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", ""> +{ + let Inst{6-2} = 0; +} let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb), @@ -354,6 +364,15 @@ def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb), let Inst{6-2} = imm{4-0}; } +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def C_ADDI_NOP : RVInst16CI<0b000, 0b01, (outs GPRX0:$rd_wb), + (ins GPRX0:$rd, immzero:$imm), + "c.addi", "$rd, $imm"> { + let Constraints = "$rd = $rd_wb"; + let Inst{6-2} = 0; + let isAsmParserOnly = 1; +} + let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1, DecoderNamespace = "RISCV32Only_", Defs = [X1], Predicates = [HasStdExtC, IsRV32] in @@ -522,6 +541,105 @@ def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther> { } // Predicates = [HasStdExtC] +//===----------------------------------------------------------------------===// +// HINT Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtC, HasRVCHints], hasSideEffects = 0, mayLoad = 0, + mayStore = 0 in +{ + +let rd = 0 in +def C_NOP_HINT : RVInst16CI<0b000, 0b01, (outs), (ins simm6nonzero:$imm), + "c.nop", "$imm"> { + let Inst{6-2} = imm{4-0}; + let DecoderMethod = "decodeRVCInstrSImm"; +} + +// Just a different syntax for the c.nop hint: c.addi x0, simm6 vs c.nop simm6. +def C_ADDI_HINT_X0 : RVInst16CI<0b000, 0b01, (outs GPRX0:$rd_wb), + (ins GPRX0:$rd, simm6nonzero:$imm), + "c.addi", "$rd, $imm"> { + let Constraints = "$rd = $rd_wb"; + let Inst{6-2} = imm{4-0}; + let isAsmParserOnly = 1; +} + +def C_ADDI_HINT_IMM_ZERO : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb), + (ins GPRNoX0:$rd, immzero:$imm), + "c.addi", "$rd, $imm"> { + let Constraints = "$rd = $rd_wb"; + let Inst{6-2} = 0; + let isAsmParserOnly = 1; +} + +def C_LI_HINT : RVInst16CI<0b010, 0b01, (outs GPRX0:$rd), (ins simm6:$imm), + "c.li", "$rd, $imm"> { + let Inst{6-2} = imm{4-0}; + let Inst{11-7} = 0; + let DecoderMethod = "decodeRVCInstrRdSImm"; +} + +def C_LUI_HINT : RVInst16CI<0b011, 0b01, (outs GPRX0:$rd), + (ins c_lui_imm:$imm), + "c.lui", "$rd, $imm"> { + let Inst{6-2} = imm{4-0}; + let Inst{11-7} = 0; + let DecoderMethod = "decodeRVCInstrRdSImm"; +} + +def C_MV_HINT : RVInst16CR<0b1000, 0b10, (outs GPRX0:$rs1), (ins GPRNoX0:$rs2), + "c.mv", "$rs1, $rs2"> +{ + let Inst{11-7} = 0; + let DecoderMethod = "decodeRVCInstrRdRs2"; +} + +def C_ADD_HINT : RVInst16CR<0b1001, 0b10, (outs GPRX0:$rs1_wb), + (ins GPRX0:$rs1, GPRNoX0:$rs2), + "c.add", "$rs1, $rs2"> { + let Constraints = "$rs1 = $rs1_wb"; + let Inst{11-7} = 0; + let DecoderMethod = "decodeRVCInstrRdRs1Rs2"; +} + +def C_SLLI_HINT : RVInst16CI<0b000, 0b10, (outs GPRX0:$rd_wb), + (ins GPRX0:$rd, uimmlog2xlennonzero:$imm), + "c.slli" ,"$rd, $imm"> { + let Constraints = "$rd = $rd_wb"; + let Inst{6-2} = imm{4-0}; + let Inst{11-7} = 0; + let DecoderMethod = "decodeRVCInstrRdRs1UImm"; +} + +def C_SLLI64_HINT : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb), (ins GPR:$rd), + "c.slli64" ,"$rd"> { + let Constraints = "$rd = $rd_wb"; + let Inst{6-2} = 0; + let Inst{12} = 0; +} + +def C_SRLI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb), + (ins GPRC:$rd), + "c.srli64", "$rd"> { + let Constraints = "$rd = $rd_wb"; + let Inst{6-2} = 0; + let Inst{11-10} = 0; + let Inst{12} = 0; +} + +def C_SRAI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb), + (ins GPRC:$rd), + "c.srai64", "$rd"> { + let Constraints = "$rd = $rd_wb"; + let Inst{6-2} = 0; + let Inst{11-10} = 1; + let Inst{12} = 0; +} + +} // Predicates = [HasStdExtC, HasRVCHints], hasSideEffects = 0, mayLoad = 0, + // mayStore = 0 + //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/RISCV/RISCVInstrInfoF.td b/lib/Target/RISCV/RISCVInstrInfoF.td index 032642942f2..3b73c865ea1 100644 --- a/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/lib/Target/RISCV/RISCVInstrInfoF.td @@ -227,6 +227,12 @@ def : InstAlias<"frcsr $rd", (CSRRS GPR:$rd, FCSR.Encoding, X0), 2>; def : InstAlias<"fscsr $rd, $rs", (CSRRW GPR:$rd, FCSR.Encoding, GPR:$rs)>; def : InstAlias<"fscsr $rs", (CSRRW X0, FCSR.Encoding, GPR:$rs), 2>; +// frsr, fssr are obsolete aliases replaced by frcsr, fscsr, so give them +// zero weight. +def : InstAlias<"frsr $rd", (CSRRS GPR:$rd, FCSR.Encoding, X0), 0>; +def : InstAlias<"fssr $rd, $rs", (CSRRW GPR:$rd, FCSR.Encoding, GPR:$rs), 0>; +def : InstAlias<"fssr $rs", (CSRRW X0, FCSR.Encoding, GPR:$rs), 0>; + def : InstAlias<"frrm $rd", (CSRRS GPR:$rd, FRM.Encoding, X0), 2>; def : InstAlias<"fsrm $rd, $rs", (CSRRW GPR:$rd, FRM.Encoding, GPR:$rs)>; def : InstAlias<"fsrm $rs", (CSRRW X0, FRM.Encoding, GPR:$rs), 2>; diff --git a/lib/Target/RISCV/RISCVInstructionSelector.cpp b/lib/Target/RISCV/RISCVInstructionSelector.cpp new file mode 100644 index 00000000000..5bd09a54611 --- /dev/null +++ b/lib/Target/RISCV/RISCVInstructionSelector.cpp @@ -0,0 +1,103 @@ +//===-- RISCVInstructionSelector.cpp -----------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the InstructionSelector class for +/// RISCV. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#include "RISCVRegisterBankInfo.h" +#include "RISCVSubtarget.h" +#include "RISCVTargetMachine.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "riscv-isel" + +using namespace llvm; + +#define GET_GLOBALISEL_PREDICATE_BITSET +#include "RISCVGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATE_BITSET + +namespace { + +class RISCVInstructionSelector : public InstructionSelector { +public: + RISCVInstructionSelector(const RISCVTargetMachine &TM, + const RISCVSubtarget &STI, + const RISCVRegisterBankInfo &RBI); + + bool select(MachineInstr &I) override; + static const char *getName() { return DEBUG_TYPE; } + +private: + bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; + + const RISCVSubtarget &STI; + const RISCVInstrInfo &TII; + const RISCVRegisterInfo &TRI; + const RISCVRegisterBankInfo &RBI; + + // FIXME: This is necessary because DAGISel uses "Subtarget->" and GlobalISel + // uses "STI." in the code generated by TableGen. We need to unify the name of + // Subtarget variable. + const RISCVSubtarget *Subtarget = &STI; + +#define GET_GLOBALISEL_PREDICATES_DECL +#include "RISCVGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_DECL + +#define GET_GLOBALISEL_TEMPORARIES_DECL +#include "RISCVGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_DECL +}; + +} // end anonymous namespace + +#define GET_GLOBALISEL_IMPL +#include "RISCVGenGlobalISel.inc" +#undef GET_GLOBALISEL_IMPL + +RISCVInstructionSelector::RISCVInstructionSelector( + const RISCVTargetMachine &TM, const RISCVSubtarget &STI, + const RISCVRegisterBankInfo &RBI) + : InstructionSelector(), STI(STI), TII(*STI.getInstrInfo()), + TRI(*STI.getRegisterInfo()), RBI(RBI), + +#define GET_GLOBALISEL_PREDICATES_INIT +#include "RISCVGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_INIT +#define GET_GLOBALISEL_TEMPORARIES_INIT +#include "RISCVGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_INIT +{ +} + +bool RISCVInstructionSelector::select(MachineInstr &I) { + + if (!isPreISelGenericOpcode(I.getOpcode())) { + // Certain non-generic instructions also need some special handling. + return true; + } + + if (selectImpl(I, *CoverageInfo)) + return true; + + return false; +} + +namespace llvm { +InstructionSelector * +createRISCVInstructionSelector(const RISCVTargetMachine &TM, + RISCVSubtarget &Subtarget, + RISCVRegisterBankInfo &RBI) { + return new RISCVInstructionSelector(TM, Subtarget, RBI); +} +} // end namespace llvm diff --git a/lib/Target/RISCV/RISCVLegalizerInfo.cpp b/lib/Target/RISCV/RISCVLegalizerInfo.cpp new file mode 100644 index 00000000000..c92f4a3ee17 --- /dev/null +++ b/lib/Target/RISCV/RISCVLegalizerInfo.cpp @@ -0,0 +1,23 @@ +//===-- RISCVLegalizerInfo.cpp ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the Machinelegalizer class for RISCV. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#include "RISCVLegalizerInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Type.h" + +using namespace llvm; + +RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) { + computeTables(); +} diff --git a/lib/Target/RISCV/RISCVLegalizerInfo.h b/lib/Target/RISCV/RISCVLegalizerInfo.h new file mode 100644 index 00000000000..f2c2b9a3fd4 --- /dev/null +++ b/lib/Target/RISCV/RISCVLegalizerInfo.h @@ -0,0 +1,28 @@ +//===-- RISCVLegalizerInfo.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares the targeting of the Machinelegalizer class for RISCV. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVMACHINELEGALIZER_H +#define LLVM_LIB_TARGET_RISCV_RISCVMACHINELEGALIZER_H + +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" + +namespace llvm { + +class RISCVSubtarget; + +/// This class provides the information for the target register banks. +class RISCVLegalizerInfo : public LegalizerInfo { +public: + RISCVLegalizerInfo(const RISCVSubtarget &ST); +}; +} // end namespace llvm +#endif diff --git a/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/lib/Target/RISCV/RISCVMergeBaseOffset.cpp index 82b1209cb8e..4c9013aa1e2 100644 --- a/lib/Target/RISCV/RISCVMergeBaseOffset.cpp +++ b/lib/Target/RISCV/RISCVMergeBaseOffset.cpp @@ -45,7 +45,7 @@ struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass { bool detectAndFoldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI); void foldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI, MachineInstr &Tail, int64_t Offset); - bool matchLargeOffset(MachineInstr &TailAdd, unsigned GSReg, int64_t &Offset); + bool matchLargeOffset(MachineInstr &TailAdd, Register GSReg, int64_t &Offset); RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {} MachineFunctionProperties getRequiredProperties() const override { @@ -85,7 +85,7 @@ bool RISCVMergeBaseOffsetOpt::detectLuiAddiGlobal(MachineInstr &HiLUI, HiLUI.getOperand(1).getOffset() != 0 || !MRI->hasOneUse(HiLUI.getOperand(0).getReg())) return false; - unsigned HiLuiDestReg = HiLUI.getOperand(0).getReg(); + Register HiLuiDestReg = HiLUI.getOperand(0).getReg(); LoADDI = MRI->use_begin(HiLuiDestReg)->getParent(); if (LoADDI->getOpcode() != RISCV::ADDI || LoADDI->getOperand(2).getTargetFlags() != RISCVII::MO_LO || @@ -132,12 +132,12 @@ void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &HiLUI, // \ / // TailAdd: add vreg4, vreg2, voff bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd, - unsigned GAReg, + Register GAReg, int64_t &Offset) { assert((TailAdd.getOpcode() == RISCV::ADD) && "Expected ADD instruction!"); - unsigned Rs = TailAdd.getOperand(1).getReg(); - unsigned Rt = TailAdd.getOperand(2).getReg(); - unsigned Reg = Rs == GAReg ? Rt : Rs; + Register Rs = TailAdd.getOperand(1).getReg(); + Register Rt = TailAdd.getOperand(2).getReg(); + Register Reg = Rs == GAReg ? Rt : Rs; // Can't fold if the register has more than one use. if (!MRI->hasOneUse(Reg)) @@ -178,7 +178,7 @@ bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd, bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI) { - unsigned DestReg = LoADDI.getOperand(0).getReg(); + Register DestReg = LoADDI.getOperand(0).getReg(); assert(MRI->hasOneUse(DestReg) && "expected one use for LoADDI"); // LoADDI has only one use. MachineInstr &Tail = *MRI->use_begin(DestReg)->getParent(); @@ -232,7 +232,7 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI, return false; // Register defined by LoADDI should be used in the base part of the // load\store instruction. Otherwise, no folding possible. - unsigned BaseAddrReg = Tail.getOperand(1).getReg(); + Register BaseAddrReg = Tail.getOperand(1).getReg(); if (DestReg != BaseAddrReg) return false; MachineOperand &TailImmOp = Tail.getOperand(2); diff --git a/lib/Target/RISCV/RISCVRegisterBankInfo.cpp b/lib/Target/RISCV/RISCVRegisterBankInfo.cpp new file mode 100644 index 00000000000..bd3b95a98b9 --- /dev/null +++ b/lib/Target/RISCV/RISCVRegisterBankInfo.cpp @@ -0,0 +1,26 @@ +//===-- RISCVRegisterBankInfo.cpp -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the RegisterBankInfo class for RISCV. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#include "RISCVRegisterBankInfo.h" +#include "MCTargetDesc/RISCVMCTargetDesc.h" +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" + +#define GET_TARGET_REGBANK_IMPL +#include "RISCVGenRegisterBank.inc" + +using namespace llvm; + +RISCVRegisterBankInfo::RISCVRegisterBankInfo(const TargetRegisterInfo &TRI) + : RISCVGenRegisterBankInfo() {} diff --git a/lib/Target/RISCV/RISCVRegisterBankInfo.h b/lib/Target/RISCV/RISCVRegisterBankInfo.h new file mode 100644 index 00000000000..05fac992734 --- /dev/null +++ b/lib/Target/RISCV/RISCVRegisterBankInfo.h @@ -0,0 +1,37 @@ +//===-- RISCVRegisterBankInfo.h ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares the targeting of the RegisterBankInfo class for RISCV. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVREGISTERBANKINFO_H +#define LLVM_LIB_TARGET_RISCV_RISCVREGISTERBANKINFO_H + +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" + +#define GET_REGBANK_DECLARATIONS +#include "RISCVGenRegisterBank.inc" + +namespace llvm { + +class TargetRegisterInfo; + +class RISCVGenRegisterBankInfo : public RegisterBankInfo { +protected: +#define GET_TARGET_REGBANK_CLASS +#include "RISCVGenRegisterBank.inc" +}; + +/// This class provides the information for the target register banks. +class RISCVRegisterBankInfo final : public RISCVGenRegisterBankInfo { +public: + RISCVRegisterBankInfo(const TargetRegisterInfo &TRI); +}; +} // end namespace llvm +#endif diff --git a/lib/Target/RISCV/RISCVRegisterBanks.td b/lib/Target/RISCV/RISCVRegisterBanks.td new file mode 100644 index 00000000000..400b65a1bf9 --- /dev/null +++ b/lib/Target/RISCV/RISCVRegisterBanks.td @@ -0,0 +1,13 @@ +//=-- RISCVRegisterBank.td - Describe the RISCV Banks --------*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +/// General Purpose Registers: X. +def GPRRegBank : RegisterBank<"GPRB", [GPR]>; diff --git a/lib/Target/RISCV/RISCVRegisterInfo.cpp b/lib/Target/RISCV/RISCVRegisterInfo.cpp index e6a126e3e51..66557687c0b 100644 --- a/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -26,6 +26,15 @@ using namespace llvm; +static_assert(RISCV::X1 == RISCV::X0 + 1, "Register list not consecutive"); +static_assert(RISCV::X31 == RISCV::X0 + 31, "Register list not consecutive"); +static_assert(RISCV::F1_F == RISCV::F0_F + 1, "Register list not consecutive"); +static_assert(RISCV::F31_F == RISCV::F0_F + 31, + "Register list not consecutive"); +static_assert(RISCV::F1_D == RISCV::F0_D + 1, "Register list not consecutive"); +static_assert(RISCV::F31_D == RISCV::F0_D + 31, + "Register list not consecutive"); + RISCVRegisterInfo::RISCVRegisterInfo(unsigned HwMode) : RISCVGenRegisterInfo(RISCV::X1, /*DwarfFlavour*/0, /*EHFlavor*/0, /*PC*/0, HwMode) {} @@ -109,8 +118,8 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(isInt<32>(Offset) && "Int32 expected"); // The offset won't fit in an immediate, so use a scratch register instead // Modify Offset and FrameReg appropriately - unsigned ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); - TII->movImm32(MBB, II, DL, ScratchReg, Offset); + Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + TII->movImm(MBB, II, DL, ScratchReg, Offset); BuildMI(MBB, II, DL, TII->get(RISCV::ADD), ScratchReg) .addReg(FrameReg) .addReg(ScratchReg, RegState::Kill); diff --git a/lib/Target/RISCV/RISCVRegisterInfo.h b/lib/Target/RISCV/RISCVRegisterInfo.h index 4f339475508..56a50fe6ddc 100644 --- a/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/lib/Target/RISCV/RISCVRegisterInfo.h @@ -52,6 +52,12 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { bool trackLivenessAfterRegAlloc(const MachineFunction &) const override { return true; } + + const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, + unsigned Kind = 0) const override { + return &RISCV::GPRRegClass; + } }; } diff --git a/lib/Target/RISCV/RISCVRegisterInfo.td b/lib/Target/RISCV/RISCVRegisterInfo.td index 79f8ab12f6c..82b37afd080 100644 --- a/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/lib/Target/RISCV/RISCVRegisterInfo.td @@ -101,6 +101,12 @@ def GPR : RegisterClass<"RISCV", [XLenVT], 32, (add [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; } +def GPRX0 : RegisterClass<"RISCV", [XLenVT], 32, (add X0)> { + let RegInfos = RegInfoByHwMode< + [RV32, RV64, DefaultMode], + [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; +} + // The order of registers represents the preferred allocation sequence. // Registers are listed in the order caller-save, callee-save, specials. def GPRNoX0 : RegisterClass<"RISCV", [XLenVT], 32, (add @@ -159,41 +165,41 @@ def SP : RegisterClass<"RISCV", [XLenVT], 32, (add X2)> { // Floating point registers let RegAltNameIndices = [ABIRegAltName] in { - def F0_32 : RISCVReg32<0, "f0", ["ft0"]>, DwarfRegNum<[32]>; - def F1_32 : RISCVReg32<1, "f1", ["ft1"]>, DwarfRegNum<[33]>; - def F2_32 : RISCVReg32<2, "f2", ["ft2"]>, DwarfRegNum<[34]>; - def F3_32 : RISCVReg32<3, "f3", ["ft3"]>, DwarfRegNum<[35]>; - def F4_32 : RISCVReg32<4, "f4", ["ft4"]>, DwarfRegNum<[36]>; - def F5_32 : RISCVReg32<5, "f5", ["ft5"]>, DwarfRegNum<[37]>; - def F6_32 : RISCVReg32<6, "f6", ["ft6"]>, DwarfRegNum<[38]>; - def F7_32 : RISCVReg32<7, "f7", ["ft7"]>, DwarfRegNum<[39]>; - def F8_32 : RISCVReg32<8, "f8", ["fs0"]>, DwarfRegNum<[40]>; - def F9_32 : RISCVReg32<9, "f9", ["fs1"]>, DwarfRegNum<[41]>; - def F10_32 : RISCVReg32<10,"f10", ["fa0"]>, DwarfRegNum<[42]>; - def F11_32 : RISCVReg32<11,"f11", ["fa1"]>, DwarfRegNum<[43]>; - def F12_32 : RISCVReg32<12,"f12", ["fa2"]>, DwarfRegNum<[44]>; - def F13_32 : RISCVReg32<13,"f13", ["fa3"]>, DwarfRegNum<[45]>; - def F14_32 : RISCVReg32<14,"f14", ["fa4"]>, DwarfRegNum<[46]>; - def F15_32 : RISCVReg32<15,"f15", ["fa5"]>, DwarfRegNum<[47]>; - def F16_32 : RISCVReg32<16,"f16", ["fa6"]>, DwarfRegNum<[48]>; - def F17_32 : RISCVReg32<17,"f17", ["fa7"]>, DwarfRegNum<[49]>; - def F18_32 : RISCVReg32<18,"f18", ["fs2"]>, DwarfRegNum<[50]>; - def F19_32 : RISCVReg32<19,"f19", ["fs3"]>, DwarfRegNum<[51]>; - def F20_32 : RISCVReg32<20,"f20", ["fs4"]>, DwarfRegNum<[52]>; - def F21_32 : RISCVReg32<21,"f21", ["fs5"]>, DwarfRegNum<[53]>; - def F22_32 : RISCVReg32<22,"f22", ["fs6"]>, DwarfRegNum<[54]>; - def F23_32 : RISCVReg32<23,"f23", ["fs7"]>, DwarfRegNum<[55]>; - def F24_32 : RISCVReg32<24,"f24", ["fs8"]>, DwarfRegNum<[56]>; - def F25_32 : RISCVReg32<25,"f25", ["fs9"]>, DwarfRegNum<[57]>; - def F26_32 : RISCVReg32<26,"f26", ["fs10"]>, DwarfRegNum<[58]>; - def F27_32 : RISCVReg32<27,"f27", ["fs11"]>, DwarfRegNum<[59]>; - def F28_32 : RISCVReg32<28,"f28", ["ft8"]>, DwarfRegNum<[60]>; - def F29_32 : RISCVReg32<29,"f29", ["ft9"]>, DwarfRegNum<[61]>; - def F30_32 : RISCVReg32<30,"f30", ["ft10"]>, DwarfRegNum<[62]>; - def F31_32 : RISCVReg32<31,"f31", ["ft11"]>, DwarfRegNum<[63]>; + def F0_F : RISCVReg32<0, "f0", ["ft0"]>, DwarfRegNum<[32]>; + def F1_F : RISCVReg32<1, "f1", ["ft1"]>, DwarfRegNum<[33]>; + def F2_F : RISCVReg32<2, "f2", ["ft2"]>, DwarfRegNum<[34]>; + def F3_F : RISCVReg32<3, "f3", ["ft3"]>, DwarfRegNum<[35]>; + def F4_F : RISCVReg32<4, "f4", ["ft4"]>, DwarfRegNum<[36]>; + def F5_F : RISCVReg32<5, "f5", ["ft5"]>, DwarfRegNum<[37]>; + def F6_F : RISCVReg32<6, "f6", ["ft6"]>, DwarfRegNum<[38]>; + def F7_F : RISCVReg32<7, "f7", ["ft7"]>, DwarfRegNum<[39]>; + def F8_F : RISCVReg32<8, "f8", ["fs0"]>, DwarfRegNum<[40]>; + def F9_F : RISCVReg32<9, "f9", ["fs1"]>, DwarfRegNum<[41]>; + def F10_F : RISCVReg32<10,"f10", ["fa0"]>, DwarfRegNum<[42]>; + def F11_F : RISCVReg32<11,"f11", ["fa1"]>, DwarfRegNum<[43]>; + def F12_F : RISCVReg32<12,"f12", ["fa2"]>, DwarfRegNum<[44]>; + def F13_F : RISCVReg32<13,"f13", ["fa3"]>, DwarfRegNum<[45]>; + def F14_F : RISCVReg32<14,"f14", ["fa4"]>, DwarfRegNum<[46]>; + def F15_F : RISCVReg32<15,"f15", ["fa5"]>, DwarfRegNum<[47]>; + def F16_F : RISCVReg32<16,"f16", ["fa6"]>, DwarfRegNum<[48]>; + def F17_F : RISCVReg32<17,"f17", ["fa7"]>, DwarfRegNum<[49]>; + def F18_F : RISCVReg32<18,"f18", ["fs2"]>, DwarfRegNum<[50]>; + def F19_F : RISCVReg32<19,"f19", ["fs3"]>, DwarfRegNum<[51]>; + def F20_F : RISCVReg32<20,"f20", ["fs4"]>, DwarfRegNum<[52]>; + def F21_F : RISCVReg32<21,"f21", ["fs5"]>, DwarfRegNum<[53]>; + def F22_F : RISCVReg32<22,"f22", ["fs6"]>, DwarfRegNum<[54]>; + def F23_F : RISCVReg32<23,"f23", ["fs7"]>, DwarfRegNum<[55]>; + def F24_F : RISCVReg32<24,"f24", ["fs8"]>, DwarfRegNum<[56]>; + def F25_F : RISCVReg32<25,"f25", ["fs9"]>, DwarfRegNum<[57]>; + def F26_F : RISCVReg32<26,"f26", ["fs10"]>, DwarfRegNum<[58]>; + def F27_F : RISCVReg32<27,"f27", ["fs11"]>, DwarfRegNum<[59]>; + def F28_F : RISCVReg32<28,"f28", ["ft8"]>, DwarfRegNum<[60]>; + def F29_F : RISCVReg32<29,"f29", ["ft9"]>, DwarfRegNum<[61]>; + def F30_F : RISCVReg32<30,"f30", ["ft10"]>, DwarfRegNum<[62]>; + def F31_F : RISCVReg32<31,"f31", ["ft11"]>, DwarfRegNum<[63]>; foreach Index = 0-31 in { - def F#Index#_64 : RISCVReg64("F"#Index#"_32")>, + def F#Index#_D : RISCVReg64("F"#Index#"_F")>, DwarfRegNum<[!add(Index, 32)]>; } } @@ -201,29 +207,29 @@ let RegAltNameIndices = [ABIRegAltName] in { // The order of registers represents the preferred allocation sequence, // meaning caller-save regs are listed before callee-save. def FPR32 : RegisterClass<"RISCV", [f32], 32, (add - (sequence "F%u_32", 0, 7), - (sequence "F%u_32", 10, 17), - (sequence "F%u_32", 28, 31), - (sequence "F%u_32", 8, 9), - (sequence "F%u_32", 18, 27) + (sequence "F%u_F", 0, 7), + (sequence "F%u_F", 10, 17), + (sequence "F%u_F", 28, 31), + (sequence "F%u_F", 8, 9), + (sequence "F%u_F", 18, 27) )>; def FPR32C : RegisterClass<"RISCV", [f32], 32, (add - (sequence "F%u_32", 10, 15), - (sequence "F%u_32", 8, 9) + (sequence "F%u_F", 10, 15), + (sequence "F%u_F", 8, 9) )>; // The order of registers represents the preferred allocation sequence, // meaning caller-save regs are listed before callee-save. def FPR64 : RegisterClass<"RISCV", [f64], 64, (add - (sequence "F%u_64", 0, 7), - (sequence "F%u_64", 10, 17), - (sequence "F%u_64", 28, 31), - (sequence "F%u_64", 8, 9), - (sequence "F%u_64", 18, 27) + (sequence "F%u_D", 0, 7), + (sequence "F%u_D", 10, 17), + (sequence "F%u_D", 28, 31), + (sequence "F%u_D", 8, 9), + (sequence "F%u_D", 18, 27) )>; def FPR64C : RegisterClass<"RISCV", [f64], 64, (add - (sequence "F%u_64", 10, 15), - (sequence "F%u_64", 8, 9) + (sequence "F%u_D", 10, 15), + (sequence "F%u_D", 8, 9) )>; diff --git a/lib/Target/RISCV/RISCVSubtarget.cpp b/lib/Target/RISCV/RISCVSubtarget.cpp index 6902ed75d85..f114c6ac192 100644 --- a/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/lib/Target/RISCV/RISCVSubtarget.cpp @@ -12,7 +12,11 @@ #include "RISCVSubtarget.h" #include "RISCV.h" +#include "RISCVCallLowering.h" #include "RISCVFrameLowering.h" +#include "RISCVLegalizerInfo.h" +#include "RISCVRegisterBankInfo.h" +#include "RISCVTargetMachine.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -47,4 +51,28 @@ RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU, StringRef FS, StringRef ABIName, const TargetMachine &TM) : RISCVGenSubtargetInfo(TT, CPU, FS), FrameLowering(initializeSubtargetDependencies(TT, CPU, FS, ABIName)), - InstrInfo(), RegInfo(getHwMode()), TLInfo(TM, *this) {} + InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) { + CallLoweringInfo.reset(new RISCVCallLowering(*getTargetLowering())); + Legalizer.reset(new RISCVLegalizerInfo(*this)); + + auto *RBI = new RISCVRegisterBankInfo(*getRegisterInfo()); + RegBankInfo.reset(RBI); + InstSelector.reset(createRISCVInstructionSelector( + *static_cast(&TM), *this, *RBI)); +} + +const CallLowering *RISCVSubtarget::getCallLowering() const { + return CallLoweringInfo.get(); +} + +InstructionSelector *RISCVSubtarget::getInstructionSelector() const { + return InstSelector.get(); +} + +const LegalizerInfo *RISCVSubtarget::getLegalizerInfo() const { + return Legalizer.get(); +} + +const RegisterBankInfo *RISCVSubtarget::getRegBankInfo() const { + return RegBankInfo.get(); +} diff --git a/lib/Target/RISCV/RISCVSubtarget.h b/lib/Target/RISCV/RISCVSubtarget.h index 106ff49f021..7d0373a5253 100644 --- a/lib/Target/RISCV/RISCVSubtarget.h +++ b/lib/Target/RISCV/RISCVSubtarget.h @@ -17,6 +17,10 @@ #include "RISCVISelLowering.h" #include "RISCVInstrInfo.h" #include "Utils/RISCVBaseInfo.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" @@ -38,6 +42,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool HasRV64 = false; bool IsRV32E = false; bool EnableLinkerRelax = false; + bool EnableRVCHintInstrs = false; unsigned XLen = 32; MVT XLenVT = MVT::i32; RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown; @@ -75,6 +80,7 @@ public: const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { return &TSInfo; } + bool enableMachineScheduler() const override { return true; } bool hasStdExtM() const { return HasStdExtM; } bool hasStdExtA() const { return HasStdExtA; } bool hasStdExtF() const { return HasStdExtF; } @@ -83,9 +89,23 @@ public: bool is64Bit() const { return HasRV64; } bool isRV32E() const { return IsRV32E; } bool enableLinkerRelax() const { return EnableLinkerRelax; } + bool enableRVCHintInstrs() const { return EnableRVCHintInstrs; } MVT getXLenVT() const { return XLenVT; } unsigned getXLen() const { return XLen; } RISCVABI::ABI getTargetABI() const { return TargetABI; } + +protected: + // GlobalISel related APIs. + std::unique_ptr CallLoweringInfo; + std::unique_ptr InstSelector; + std::unique_ptr Legalizer; + std::unique_ptr RegBankInfo; + +public: + const CallLowering *getCallLowering() const override; + InstructionSelector *getInstructionSelector() const override; + const LegalizerInfo *getLegalizerInfo() const override; + const RegisterBankInfo *getRegBankInfo() const override; }; } // End llvm namespace diff --git a/lib/Target/RISCV/RISCVTargetMachine.cpp b/lib/Target/RISCV/RISCVTargetMachine.cpp index f4e6ed9f628..5ffc6eda6bd 100644 --- a/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -17,6 +17,10 @@ #include "TargetInfo/RISCVTargetInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -30,6 +34,7 @@ extern "C" void LLVMInitializeRISCVTarget() { RegisterTargetMachine X(getTheRISCV32Target()); RegisterTargetMachine Y(getTheRISCV64Target()); auto PR = PassRegistry::getPassRegistry(); + initializeGlobalISel(*PR); initializeRISCVExpandPseudoPass(*PR); } @@ -58,7 +63,7 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT, : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), - TLOF(make_unique()), + TLOF(std::make_unique()), Subtarget(TT, CPU, FS, Options.MCOptions.getABIName(), *this) { initAsmInfo(); } @@ -80,6 +85,10 @@ public: void addIRPasses() override; bool addInstSelector() override; + bool addIRTranslator() override; + bool addLegalizeMachineIR() override; + bool addRegBankSelect() override; + bool addGlobalInstructionSelect() override; void addPreEmitPass() override; void addPreEmitPass2() override; void addPreRegAlloc() override; @@ -101,6 +110,26 @@ bool RISCVPassConfig::addInstSelector() { return false; } +bool RISCVPassConfig::addIRTranslator() { + addPass(new IRTranslator()); + return false; +} + +bool RISCVPassConfig::addLegalizeMachineIR() { + addPass(new Legalizer()); + return false; +} + +bool RISCVPassConfig::addRegBankSelect() { + addPass(new RegBankSelect()); + return false; +} + +bool RISCVPassConfig::addGlobalInstructionSelect() { + addPass(new InstructionSelect()); + return false; +} + void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); } void RISCVPassConfig::addPreEmitPass2() { diff --git a/lib/Target/RISCV/Utils/RISCVBaseInfo.h b/lib/Target/RISCV/Utils/RISCVBaseInfo.h index c33c72f2431..30e475e80a0 100644 --- a/lib/Target/RISCV/Utils/RISCVBaseInfo.h +++ b/lib/Target/RISCV/Utils/RISCVBaseInfo.h @@ -16,6 +16,7 @@ #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/SubtargetFeature.h" namespace llvm { @@ -63,6 +64,21 @@ enum { }; } // namespace RISCVII +namespace RISCVOp { +enum OperandType : unsigned { + OPERAND_FIRST_RISCV_IMM = MCOI::OPERAND_FIRST_TARGET, + OPERAND_UIMM4 = OPERAND_FIRST_RISCV_IMM, + OPERAND_UIMM5, + OPERAND_UIMM12, + OPERAND_SIMM12, + OPERAND_SIMM13_LSB0, + OPERAND_UIMM20, + OPERAND_SIMM21_LSB0, + OPERAND_UIMMLOG2XLEN, + OPERAND_LAST_RISCV_IMM = OPERAND_UIMMLOG2XLEN +}; +} // namespace RISCVOp + // Describes the predecessor/successor bits used in the FENCE instruction. namespace RISCVFenceField { enum FenceField { diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index 15453ae59a4..f6be9dd0124 100644 --- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -376,7 +376,7 @@ public: } static std::unique_ptr CreateToken(StringRef Str, SMLoc S) { - auto Op = make_unique(k_Token); + auto Op = std::make_unique(k_Token); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = S; @@ -386,7 +386,7 @@ public: static std::unique_ptr CreateReg(unsigned RegNum, unsigned Kind, SMLoc S, SMLoc E) { - auto Op = make_unique(k_Register); + auto Op = std::make_unique(k_Register); Op->Reg.RegNum = RegNum; Op->Reg.Kind = (SparcOperand::RegisterKind)Kind; Op->StartLoc = S; @@ -396,7 +396,7 @@ public: static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { - auto Op = make_unique(k_Immediate); + auto Op = std::make_unique(k_Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -481,7 +481,7 @@ public: static std::unique_ptr CreateMEMr(unsigned Base, SMLoc S, SMLoc E) { - auto Op = make_unique(k_MemoryReg); + auto Op = std::make_unique(k_MemoryReg); Op->Mem.Base = Base; Op->Mem.OffsetReg = Sparc::G0; // always 0 Op->Mem.Off = nullptr; diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index f1ca8e18c22..db8e7850300 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -253,7 +253,7 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, if (!MO.isReg()) continue; // skip - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (MO.isDef()) { // check whether Reg is defined or used before delay slot. @@ -324,7 +324,7 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI, if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg == 0) continue; if (MO.isDef()) @@ -380,7 +380,7 @@ static bool combineRestoreADD(MachineBasicBlock::iterator RestoreMI, // // After : restore , , %o[0-7] - unsigned reg = AddMI->getOperand(0).getReg(); + Register reg = AddMI->getOperand(0).getReg(); if (reg < SP::I0 || reg > SP::I7) return false; @@ -408,7 +408,7 @@ static bool combineRestoreOR(MachineBasicBlock::iterator RestoreMI, // // After : restore , , %o[0-7] - unsigned reg = OrMI->getOperand(0).getReg(); + Register reg = OrMI->getOperand(0).getReg(); if (reg < SP::I0 || reg > SP::I7) return false; @@ -446,7 +446,7 @@ static bool combineRestoreSETHIi(MachineBasicBlock::iterator RestoreMI, // // After : restore %g0, (imm3<<10), %o[0-7] - unsigned reg = SetHiMI->getOperand(0).getReg(); + Register reg = SetHiMI->getOperand(0).getReg(); if (reg < SP::I0 || reg > SP::I7) return false; diff --git a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp index 88547075c5a..c97a30e634c 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp @@ -49,7 +49,7 @@ unsigned SparcELFObjectWriter::getRelocType(MCContext &Ctx, } if (IsPCRel) { - switch((unsigned)Fixup.getKind()) { + switch(Fixup.getTargetKind()) { default: llvm_unreachable("Unimplemented fixup -> relocation"); case FK_Data_1: return ELF::R_SPARC_DISP8; @@ -65,7 +65,7 @@ unsigned SparcELFObjectWriter::getRelocType(MCContext &Ctx, } } - switch((unsigned)Fixup.getKind()) { + switch(Fixup.getTargetKind()) { default: llvm_unreachable("Unimplemented fixup -> relocation"); case FK_Data_1: return ELF::R_SPARC_8; @@ -135,5 +135,5 @@ bool SparcELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, std::unique_ptr llvm::createSparcELFObjectWriter(bool Is64Bit, uint8_t OSABI) { - return llvm::make_unique(Is64Bit, OSABI); + return std::make_unique(Is64Bit, OSABI); } diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp index 1834a6fd861..0f74f2bb344 100644 --- a/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/lib/Target/Sparc/SparcFrameLowering.cpp @@ -34,7 +34,8 @@ DisableLeafProc("disable-sparc-leaf-proc", SparcFrameLowering::SparcFrameLowering(const SparcSubtarget &ST) : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, - ST.is64Bit() ? 16 : 8, 0, ST.is64Bit() ? 16 : 8) {} + ST.is64Bit() ? Align(16) : Align(8), 0, + ST.is64Bit() ? Align(16) : Align(8)) {} void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index 8cff50d19ed..4e61c341b70 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -231,7 +231,7 @@ bool SparcDAGToDAGISel::tryInlineAsm(SDNode *N){ // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to // the original GPRs. - unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass); + Register GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass); PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32); SDValue Chain = SDValue(N,0); @@ -278,7 +278,7 @@ bool SparcDAGToDAGISel::tryInlineAsm(SDNode *N){ // Copy REG_SEQ into a GPRPair-typed VR and replace the original two // i32 VRs of inline asm with it. - unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass); + Register GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass); PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32); Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index a6d440fa8aa..4a2ba00ac6c 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -417,7 +417,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_32( if (VA.needsCustom()) { assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32); - unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); + Register VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi); SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32); @@ -445,7 +445,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_32( InVals.push_back(WholeValue); continue; } - unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); + Register VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); MF.getRegInfo().addLiveIn(VA.getLocReg(), VReg); SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); if (VA.getLocVT() == MVT::f32) @@ -552,7 +552,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_32( std::vector OutChains; for (; CurArgReg != ArgRegEnd; ++CurArgReg) { - unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); + Register VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); MF.getRegInfo().addLiveIn(*CurArgReg, VReg); SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32); @@ -1016,9 +1016,9 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned SparcTargetLowering::getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { - unsigned Reg = StringSwitch(RegName) +Register SparcTargetLowering::getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const { + Register Reg = StringSwitch(RegName) .Case("i0", SP::I0).Case("i1", SP::I1).Case("i2", SP::I2).Case("i3", SP::I3) .Case("i4", SP::I4).Case("i5", SP::I5).Case("i6", SP::I6).Case("i7", SP::I7) .Case("o0", SP::O0).Case("o1", SP::O1).Case("o2", SP::O2).Case("o3", SP::O3) @@ -1438,7 +1438,7 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM, setOperationAction(Op, MVT::v2i32, Expand); } // Truncating/extending stores/loads are also not supported. - for (MVT VT : MVT::integer_vector_valuetypes()) { + for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i32, Expand); setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Expand); @@ -1805,7 +1805,7 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM, setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - setMinFunctionAlignment(2); + setMinFunctionAlignment(Align(4)); computeRegisterProperties(Subtarget->getRegisterInfo()); } @@ -2244,7 +2244,7 @@ SDValue SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS, return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS); } case SPCC::FCC_UL : { - SDValue Mask = DAG.getTargetConstant(1, DL, Result.getValueType()); + SDValue Mask = DAG.getConstant(1, DL, Result.getValueType()); Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask); SDValue RHS = DAG.getTargetConstant(0, DL, Result.getValueType()); SPCC = SPCC::ICC_NE; @@ -2277,14 +2277,14 @@ SDValue SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS, return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS); } case SPCC::FCC_LG : { - SDValue Mask = DAG.getTargetConstant(3, DL, Result.getValueType()); + SDValue Mask = DAG.getConstant(3, DL, Result.getValueType()); Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask); SDValue RHS = DAG.getTargetConstant(0, DL, Result.getValueType()); SPCC = SPCC::ICC_NE; return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS); } case SPCC::FCC_UE : { - SDValue Mask = DAG.getTargetConstant(3, DL, Result.getValueType()); + SDValue Mask = DAG.getConstant(3, DL, Result.getValueType()); Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask); SDValue RHS = DAG.getTargetConstant(0, DL, Result.getValueType()); SPCC = SPCC::ICC_E; @@ -2951,9 +2951,11 @@ static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG, SDValue HiRHS = DAG.getNode(ISD::SRA, dl, MVT::i64, RHS, ShiftAmt); SDValue Args[] = { HiLHS, LHS, HiRHS, RHS }; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(isSigned); SDValue MulResult = TLI.makeLibCall(DAG, RTLIB::MUL_I128, WideVT, - Args, isSigned, dl).first; + Args, CallOptions, dl).first; SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, MulResult, DAG.getIntPtrConstant(0, dl)); SDValue TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, @@ -3183,7 +3185,7 @@ SparcTargetLowering::getConstraintType(StringRef Constraint) const { case 'e': return C_RegisterClass; case 'I': // SIMM13 - return C_Other; + return C_Immediate; } } diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 8d557a4225e..3d798cec0c1 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -98,8 +98,8 @@ namespace llvm { return MVT::i32; } - unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const override; /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. diff --git a/lib/Target/Sparc/SparcInstr64Bit.td b/lib/Target/Sparc/SparcInstr64Bit.td index 2d4f687f72d..d18ab3b1370 100644 --- a/lib/Target/Sparc/SparcInstr64Bit.td +++ b/lib/Target/Sparc/SparcInstr64Bit.td @@ -177,7 +177,7 @@ def LEAX_ADDri : F3_2<2, 0b000000, def : Pat<(SPcmpicc i64:$a, i64:$b), (CMPrr $a, $b)>; def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (CMPri $a, (as_i32imm $b))>; -def : Pat<(ctpop i64:$src), (POPCrr $src)>; +def : Pat<(i64 (ctpop i64:$src)), (POPCrr $src)>; } // Predicates = [Is64Bit] diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index ad343fe6f80..3d3d314a26b 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -375,8 +375,8 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineInstr *MovMI = nullptr; for (unsigned i = 0; i != numSubRegs; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, subRegIdx[i]); - unsigned Src = TRI->getSubReg(SrcReg, subRegIdx[i]); + Register Dst = TRI->getSubReg(DestReg, subRegIdx[i]); + Register Src = TRI->getSubReg(SrcReg, subRegIdx[i]); assert(Dst && Src && "Bad sub-register"); MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(movOpc), Dst); diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 8474c7abffb..73dbdc4f443 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -516,9 +516,9 @@ let DecoderMethod = "DecodeLoadQFP" in defm LDQF : LoadA<"ldq", 0b100010, 0b110010, load, QFPRegs, f128>, Requires<[HasV9, HasHardQuad]>; -let DecoderMethod = "DecodeLoadCP" in - defm LDC : Load<"ld", 0b110000, load, CoprocRegs, i32>; -let DecoderMethod = "DecodeLoadCPPair" in +let DecoderMethod = "DecodeLoadCP" in + defm LDC : Load<"ld", 0b110000, load, CoprocRegs, i32>; +let DecoderMethod = "DecodeLoadCPPair" in defm LDDC : Load<"ldd", 0b110011, load, CoprocPair, v2i32, IIC_ldd>; let DecoderMethod = "DecodeLoadCP", Defs = [CPSR] in { @@ -1508,7 +1508,7 @@ let rs1 = 0 in def POPCrr : F3_1<2, 0b101110, (outs IntRegs:$rd), (ins IntRegs:$rs2), "popc $rs2, $rd", []>, Requires<[HasV9]>; -def : Pat<(ctpop i32:$src), +def : Pat<(i32 (ctpop i32:$src)), (POPCrr (SRLri $src, 0))>; let Predicates = [HasV9], hasSideEffects = 1, rd = 0, rs1 = 0b01111 in diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index ce11a423d10..19a90e98db7 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -182,9 +182,9 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (!Subtarget.isV9() || !Subtarget.hasHardQuad()) { if (MI.getOpcode() == SP::STQFri) { const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); - unsigned SrcReg = MI.getOperand(2).getReg(); - unsigned SrcEvenReg = getSubReg(SrcReg, SP::sub_even64); - unsigned SrcOddReg = getSubReg(SrcReg, SP::sub_odd64); + Register SrcReg = MI.getOperand(2).getReg(); + Register SrcEvenReg = getSubReg(SrcReg, SP::sub_even64); + Register SrcOddReg = getSubReg(SrcReg, SP::sub_odd64); MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(SP::STDFri)) .addReg(FrameReg).addImm(0).addReg(SrcEvenReg); @@ -194,9 +194,9 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset += 8; } else if (MI.getOpcode() == SP::LDQFri) { const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned DestEvenReg = getSubReg(DestReg, SP::sub_even64); - unsigned DestOddReg = getSubReg(DestReg, SP::sub_odd64); + Register DestReg = MI.getOperand(0).getReg(); + Register DestEvenReg = getSubReg(DestReg, SP::sub_even64); + Register DestOddReg = getSubReg(DestReg, SP::sub_odd64); MachineInstr *LdMI = BuildMI(*MI.getParent(), II, dl, TII.get(SP::LDDFri), DestEvenReg) .addReg(FrameReg).addImm(0); diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 195cff79de0..c1e3f8c3698 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -98,7 +98,7 @@ SparcTargetMachine::SparcTargetMachine( getEffectiveSparcCodeModel( CM, getEffectiveRelocModel(RM), is64bit, JIT), OL), - TLOF(make_unique()), + TLOF(std::make_unique()), Subtarget(TT, CPU, FS, *this, is64bit), is64Bit(is64bit) { initAsmInfo(); } @@ -133,7 +133,7 @@ SparcTargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique(TargetTriple, CPU, FS, *this, + I = std::make_unique(TargetTriple, CPU, FS, *this, this->is64Bit); } return I.get(); diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index a259ba3433d..93c4ce4b5cc 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -155,11 +155,11 @@ public: // Create particular kinds of operand. static std::unique_ptr createInvalid(SMLoc StartLoc, SMLoc EndLoc) { - return make_unique(KindInvalid, StartLoc, EndLoc); + return std::make_unique(KindInvalid, StartLoc, EndLoc); } static std::unique_ptr createToken(StringRef Str, SMLoc Loc) { - auto Op = make_unique(KindToken, Loc, Loc); + auto Op = std::make_unique(KindToken, Loc, Loc); Op->Token.Data = Str.data(); Op->Token.Length = Str.size(); return Op; @@ -167,7 +167,7 @@ public: static std::unique_ptr createReg(RegisterKind Kind, unsigned Num, SMLoc StartLoc, SMLoc EndLoc) { - auto Op = make_unique(KindReg, StartLoc, EndLoc); + auto Op = std::make_unique(KindReg, StartLoc, EndLoc); Op->Reg.Kind = Kind; Op->Reg.Num = Num; return Op; @@ -175,7 +175,7 @@ public: static std::unique_ptr createImm(const MCExpr *Expr, SMLoc StartLoc, SMLoc EndLoc) { - auto Op = make_unique(KindImm, StartLoc, EndLoc); + auto Op = std::make_unique(KindImm, StartLoc, EndLoc); Op->Imm = Expr; return Op; } @@ -184,7 +184,7 @@ public: createMem(MemoryKind MemKind, RegisterKind RegKind, unsigned Base, const MCExpr *Disp, unsigned Index, const MCExpr *LengthImm, unsigned LengthReg, SMLoc StartLoc, SMLoc EndLoc) { - auto Op = make_unique(KindMem, StartLoc, EndLoc); + auto Op = std::make_unique(KindMem, StartLoc, EndLoc); Op->Mem.MemKind = MemKind; Op->Mem.RegKind = RegKind; Op->Mem.Base = Base; @@ -200,7 +200,7 @@ public: static std::unique_ptr createImmTLS(const MCExpr *Imm, const MCExpr *Sym, SMLoc StartLoc, SMLoc EndLoc) { - auto Op = make_unique(KindImmTLS, StartLoc, EndLoc); + auto Op = std::make_unique(KindImmTLS, StartLoc, EndLoc); Op->ImmTLS.Imm = Imm; Op->ImmTLS.Sym = Sym; return Op; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp index 8d8ba5644e1..49b6fc49033 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp @@ -162,5 +162,5 @@ unsigned SystemZObjectWriter::getRelocType(MCContext &Ctx, std::unique_ptr llvm::createSystemZObjectWriter(uint8_t OSABI) { - return llvm::make_unique(OSABI); + return std::make_unique(OSABI); } diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h index 2b0f90182d7..88cf589a3f1 100644 --- a/lib/Target/SystemZ/SystemZ.h +++ b/lib/Target/SystemZ/SystemZ.h @@ -190,7 +190,6 @@ static inline bool isImmHF(uint64_t Val) { FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); -FunctionPass *createSystemZExpandPseudoPass(SystemZTargetMachine &TM); FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp index ef378e4ade7..10023e9e169 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -501,6 +501,10 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { } break; + case TargetOpcode::FENTRY_CALL: + LowerFENTRY_CALL(*MI, Lower); + return; + case TargetOpcode::STACKMAP: LowerSTACKMAP(*MI); return; @@ -546,6 +550,22 @@ static unsigned EmitNop(MCContext &OutContext, MCStreamer &OutStreamer, } } +void SystemZAsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, + SystemZMCInstLower &Lower) { + MCContext &Ctx = MF->getContext(); + if (MF->getFunction().getFnAttribute("mnop-mcount") + .getValueAsString() == "true") { + EmitNop(Ctx, *OutStreamer, 6, getSubtargetInfo()); + return; + } + + MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__"); + const MCSymbolRefExpr *Op = + MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_PLT, Ctx); + OutStreamer->EmitInstruction(MCInstBuilder(SystemZ::BRASL) + .addReg(SystemZ::R0D).addExpr(Op), getSubtargetInfo()); +} + void SystemZAsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { const SystemZInstrInfo *TII = static_cast(MF->getSubtarget().getInstrInfo()); diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.h b/lib/Target/SystemZ/SystemZAsmPrinter.h index aa5d3ca78e6..d01a17c2ebe 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.h +++ b/lib/Target/SystemZ/SystemZAsmPrinter.h @@ -46,6 +46,7 @@ public: } private: + void LowerFENTRY_CALL(const MachineInstr &MI, SystemZMCInstLower &MCIL); void LowerSTACKMAP(const MachineInstr &MI); void LowerPATCHPOINT(const MachineInstr &MI, SystemZMCInstLower &Lower); }; diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp index 9cbf6b32050..946eb2ba7c7 100644 --- a/lib/Target/SystemZ/SystemZElimCompare.cpp +++ b/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -152,7 +152,7 @@ Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) { for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI.getOperand(I); if (MO.isReg()) { - if (unsigned MOReg = MO.getReg()) { + if (Register MOReg = MO.getReg()) { if (TRI->regsOverlap(MOReg, Reg)) { if (MO.isUse()) Ref.Use = true; @@ -378,11 +378,8 @@ bool SystemZElimCompare::adjustCCMasksForInstr( } // CC is now live after MI. - if (!ConvOpc) { - int CCDef = MI.findRegisterDefOperandIdx(SystemZ::CC, false, true, TRI); - assert(CCDef >= 0 && "Couldn't find CC set"); - MI.getOperand(CCDef).setIsDead(false); - } + if (!ConvOpc) + MI.clearRegisterDeads(SystemZ::CC); // Check if MI lies before Compare. bool BeforeCmp = false; diff --git a/lib/Target/SystemZ/SystemZExpandPseudo.cpp b/lib/Target/SystemZ/SystemZExpandPseudo.cpp deleted file mode 100644 index 09708fb4241..00000000000 --- a/lib/Target/SystemZ/SystemZExpandPseudo.cpp +++ /dev/null @@ -1,152 +0,0 @@ -//==-- SystemZExpandPseudo.cpp - Expand pseudo instructions -------*- C++ -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass that expands pseudo instructions into target -// instructions to allow proper scheduling and other late optimizations. This -// pass should be run after register allocation but before the post-regalloc -// scheduling pass. -// -//===----------------------------------------------------------------------===// - -#include "SystemZ.h" -#include "SystemZInstrInfo.h" -#include "SystemZSubtarget.h" -#include "llvm/CodeGen/LivePhysRegs.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -using namespace llvm; - -#define SYSTEMZ_EXPAND_PSEUDO_NAME "SystemZ pseudo instruction expansion pass" - -namespace llvm { - void initializeSystemZExpandPseudoPass(PassRegistry&); -} - -namespace { -class SystemZExpandPseudo : public MachineFunctionPass { -public: - static char ID; - SystemZExpandPseudo() : MachineFunctionPass(ID) { - initializeSystemZExpandPseudoPass(*PassRegistry::getPassRegistry()); - } - - const SystemZInstrInfo *TII; - - bool runOnMachineFunction(MachineFunction &Fn) override; - - StringRef getPassName() const override { return SYSTEMZ_EXPAND_PSEUDO_NAME; } - -private: - bool expandMBB(MachineBasicBlock &MBB); - bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); - bool expandLOCRMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); -}; -char SystemZExpandPseudo::ID = 0; -} - -INITIALIZE_PASS(SystemZExpandPseudo, "systemz-expand-pseudo", - SYSTEMZ_EXPAND_PSEUDO_NAME, false, false) - -/// Returns an instance of the pseudo instruction expansion pass. -FunctionPass *llvm::createSystemZExpandPseudoPass(SystemZTargetMachine &TM) { - return new SystemZExpandPseudo(); -} - -// MI is a load-register-on-condition pseudo instruction that could not be -// handled as a single hardware instruction. Replace it by a branch sequence. -bool SystemZExpandPseudo::expandLOCRMux(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { - MachineFunction &MF = *MBB.getParent(); - const BasicBlock *BB = MBB.getBasicBlock(); - MachineInstr &MI = *MBBI; - DebugLoc DL = MI.getDebugLoc(); - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(2).getReg(); - unsigned CCValid = MI.getOperand(3).getImm(); - unsigned CCMask = MI.getOperand(4).getImm(); - - LivePhysRegs LiveRegs(TII->getRegisterInfo()); - LiveRegs.addLiveOuts(MBB); - for (auto I = std::prev(MBB.end()); I != MBBI; --I) - LiveRegs.stepBackward(*I); - - // Splice MBB at MI, moving the rest of the block into RestMBB. - MachineBasicBlock *RestMBB = MF.CreateMachineBasicBlock(BB); - MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB); - RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end()); - RestMBB->transferSuccessors(&MBB); - for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) - RestMBB->addLiveIn(*I); - - // Create a new block MoveMBB to hold the move instruction. - MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB); - MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB); - MoveMBB->addLiveIn(SrcReg); - for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) - MoveMBB->addLiveIn(*I); - - // At the end of MBB, create a conditional branch to RestMBB if the - // condition is false, otherwise fall through to MoveMBB. - BuildMI(&MBB, DL, TII->get(SystemZ::BRC)) - .addImm(CCValid).addImm(CCMask ^ CCValid).addMBB(RestMBB); - MBB.addSuccessor(RestMBB); - MBB.addSuccessor(MoveMBB); - - // In MoveMBB, emit an instruction to move SrcReg into DestReg, - // then fall through to RestMBB. - TII->copyPhysReg(*MoveMBB, MoveMBB->end(), DL, DestReg, SrcReg, - MI.getOperand(2).isKill()); - MoveMBB->addSuccessor(RestMBB); - - NextMBBI = MBB.end(); - MI.eraseFromParent(); - return true; -} - -/// If MBBI references a pseudo instruction that should be expanded here, -/// do the expansion and return true. Otherwise return false. -bool SystemZExpandPseudo::expandMI(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { - MachineInstr &MI = *MBBI; - switch (MI.getOpcode()) { - case SystemZ::LOCRMux: - return expandLOCRMux(MBB, MBBI, NextMBBI); - default: - break; - } - return false; -} - -/// Iterate over the instructions in basic block MBB and expand any -/// pseudo instructions. Return true if anything was modified. -bool SystemZExpandPseudo::expandMBB(MachineBasicBlock &MBB) { - bool Modified = false; - - MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - while (MBBI != E) { - MachineBasicBlock::iterator NMBBI = std::next(MBBI); - Modified |= expandMI(MBB, MBBI, NMBBI); - MBBI = NMBBI; - } - - return Modified; -} - -bool SystemZExpandPseudo::runOnMachineFunction(MachineFunction &MF) { - TII = static_cast(MF.getSubtarget().getInstrInfo()); - - bool Modified = false; - for (auto &MBB : MF) - Modified |= expandMBB(MBB); - return Modified; -} - diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp index da28faebb32..0b8b6880acc 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -46,8 +46,8 @@ static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = { } // end anonymous namespace SystemZFrameLowering::SystemZFrameLowering() - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, - -SystemZMC::CallFrameSize, 8, + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), + -SystemZMC::CallFrameSize, Align(8), false /* StackRealignable */) { // Create a mapping from register number to save slot offset. RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); @@ -118,7 +118,7 @@ static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB, unsigned GPR64, bool IsImplicit) { const TargetRegisterInfo *RI = MBB.getParent()->getSubtarget().getRegisterInfo(); - unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_l32); + Register GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_l32); bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32); if (!IsLive || !IsImplicit) { MIB.addReg(GPR64, getImplRegState(IsImplicit) | getKillRegState(!IsLive)); diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 9dc4512255c..751034c2d41 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -346,6 +346,11 @@ public: : SelectionDAGISel(TM, OptLevel) {} bool runOnMachineFunction(MachineFunction &MF) override { + const Function &F = MF.getFunction(); + if (F.getFnAttribute("mnop-mcount").getValueAsString() == "true" && + F.getFnAttribute("fentry-call").getValueAsString() != "true") + report_fatal_error("mnop-mcount only supported with fentry-call"); + Subtarget = &MF.getSubtarget(); return SelectionDAGISel::runOnMachineFunction(MF); } @@ -1146,7 +1151,7 @@ void SystemZDAGToDAGISel::loadVectorConstant( SDLoc DL(Node); SmallVector Ops; for (unsigned OpVal : VCI.OpVals) - Ops.push_back(CurDAG->getConstant(OpVal, DL, MVT::i32)); + Ops.push_back(CurDAG->getTargetConstant(OpVal, DL, MVT::i32)); SDValue Op = CurDAG->getNode(VCI.Opcode, DL, VCI.VecVT, Ops); if (VCI.VecVT == VT.getSimpleVT()) @@ -1550,8 +1555,8 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) { uint64_t ConstCCMask = cast(CCMask.getNode())->getZExtValue(); // Invert the condition. - CCMask = CurDAG->getConstant(ConstCCValid ^ ConstCCMask, SDLoc(Node), - CCMask.getValueType()); + CCMask = CurDAG->getTargetConstant(ConstCCValid ^ ConstCCMask, + SDLoc(Node), CCMask.getValueType()); SDValue Op4 = Node->getOperand(4); SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(Node, Op1, Op0, CCValid, CCMask, Op4); diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 78820f511ab..e0ca9da9356 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -120,9 +120,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // Instructions are strings of 2-byte aligned 2-byte values. - setMinFunctionAlignment(2); + setMinFunctionAlignment(Align(2)); // For performance reasons we prefer 16-byte alignment. - setPrefFunctionAlignment(4); + setPrefFunctionAlignment(Align(16)); // Handle operations that are handled in a similar way for all types. for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; @@ -206,6 +206,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // the default expansion. if (!Subtarget.hasFPExtension()) setOperationAction(ISD::FP_TO_UINT, VT, Expand); + + // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all + // default to Expand, so need to be modified to Legal where appropriate. + setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal); + if (Subtarget.hasFPExtension()) + setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal); } } @@ -252,7 +258,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); setOperationAction(ISD::CTLZ, MVT::i64, Legal); - // On arch13 we have native support for a 64-bit CTPOP. + // On z15 we have native support for a 64-bit CTPOP. if (Subtarget.hasMiscellaneousExtensions3()) { setOperationAction(ISD::CTPOP, MVT::i32, Promote); setOperationAction(ISD::CTPOP, MVT::i64, Legal); @@ -294,14 +300,14 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // Handle prefetches with PFD or PFDRL. setOperationAction(ISD::PREFETCH, MVT::Other, Custom); - for (MVT VT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { // Assume by default that all vector operations need to be expanded. for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode) if (getOperationAction(Opcode, VT) == Legal) setOperationAction(Opcode, VT, Expand); // Likewise all truncating stores and extending loads. - for (MVT InnerVT : MVT::vector_valuetypes()) { + for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); @@ -327,7 +333,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, } // Handle integer vector types. - for (MVT VT : MVT::integer_vector_valuetypes()) { + for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { if (isTypeLegal(VT)) { // These operations have direct equivalents. setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); @@ -381,6 +387,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal); + + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal); } if (Subtarget.hasVectorEnhancements2()) { @@ -392,6 +403,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal); + + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal); } // Handle floating-point types. @@ -831,7 +847,7 @@ supportedAddressingMode(Instruction *I, bool HasVector) { } if (isa(I) && I->hasOneUse()) { - auto *SingleUser = dyn_cast(*I->user_begin()); + auto *SingleUser = cast(*I->user_begin()); if (SingleUser->getParent() == I->getParent()) { if (isa(SingleUser)) { if (auto *C = dyn_cast(SingleUser->getOperand(1))) @@ -956,7 +972,7 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const { case 'K': // Signed 16-bit constant case 'L': // Signed 20-bit displacement (on all targets we support) case 'M': // 0x7fffffff - return C_Other; + return C_Immediate; default: break; @@ -1335,7 +1351,7 @@ SDValue SystemZTargetLowering::LowerFormalArguments( break; } - unsigned VReg = MRI.createVirtualRegister(RC); + Register VReg = MRI.createVirtualRegister(RC); MRI.addLiveIn(VA.getLocReg(), VReg); ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); } else { @@ -1430,7 +1446,7 @@ static bool canUseSiblingCall(const CCState &ArgCCInfo, return false; if (!VA.isRegLoc()) return false; - unsigned Reg = VA.getLocReg(); + Register Reg = VA.getLocReg(); if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) return false; if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError()) @@ -1674,7 +1690,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue); // Chain and glue the copies together. - unsigned Reg = VA.getLocReg(); + Register Reg = VA.getLocReg(); Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue); Glue = Chain.getValue(1); RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT())); @@ -2533,12 +2549,12 @@ static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { } if (C.Opcode == SystemZISD::ICMP) return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1, - DAG.getConstant(C.ICmpType, DL, MVT::i32)); + DAG.getTargetConstant(C.ICmpType, DL, MVT::i32)); if (C.Opcode == SystemZISD::TM) { bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1, - DAG.getConstant(RegisterOnly, DL, MVT::i32)); + DAG.getTargetConstant(RegisterOnly, DL, MVT::i32)); } return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1); } @@ -2576,10 +2592,10 @@ static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, // in CCValid, so other values can be ignored. static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask) { - SDValue Ops[] = { DAG.getConstant(1, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32), - DAG.getConstant(CCValid, DL, MVT::i32), - DAG.getConstant(CCMask, DL, MVT::i32), CCReg }; + SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + DAG.getTargetConstant(CCValid, DL, MVT::i32), + DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg}; return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops); } @@ -2741,9 +2757,10 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); SDValue CCReg = emitCmp(DAG, DL, C); - return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), - Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32), - DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, CCReg); + return DAG.getNode( + SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0), + DAG.getTargetConstant(C.CCValid, DL, MVT::i32), + DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg); } // Return true if Pos is CmpOp and Neg is the negative of CmpOp, @@ -2794,8 +2811,9 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, } SDValue CCReg = emitCmp(DAG, DL, C); - SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32), - DAG.getConstant(C.CCMask, DL, MVT::i32), CCReg}; + SDValue Ops[] = {TrueOp, FalseOp, + DAG.getTargetConstant(C.CCValid, DL, MVT::i32), + DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg}; return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops); } @@ -3882,11 +3900,8 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, bool IsWrite = cast(Op.getOperand(2))->getZExtValue(); unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; auto *Node = cast(Op.getNode()); - SDValue Ops[] = { - Op.getOperand(0), - DAG.getConstant(Code, DL, MVT::i32), - Op.getOperand(1) - }; + SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32), + Op.getOperand(1)}; return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL, Node->getVTList(), Ops, Node->getMemoryVT(), Node->getMemOperand()); @@ -4228,7 +4243,7 @@ static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1); SDValue Op; if (P.Opcode == SystemZISD::PERMUTE_DWORDS) { - SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32); + SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32); Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2); } else if (P.Opcode == SystemZISD::PACK) { MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8), @@ -4253,7 +4268,8 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, unsigned StartIndex, OpNo0, OpNo1; if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1)) return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0], - Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32)); + Ops[OpNo1], + DAG.getTargetConstant(StartIndex, DL, MVT::i32)); // Fall back on VPERM. Construct an SDNode for the permute vector. SDValue IndexNodes[SystemZ::VectorBytes]; @@ -4751,7 +4767,7 @@ SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index)); // Otherwise keep it as a vector-to-vector operation. return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0), - DAG.getConstant(Index, DL, MVT::i32)); + DAG.getTargetConstant(Index, DL, MVT::i32)); } GeneralShuffle GS(VT); @@ -6041,8 +6057,8 @@ SDValue SystemZTargetLowering::combineBR_CCMASK( if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), Chain, - DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32), - DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32), + DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), + DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), N->getOperand(3), CCReg); return SDValue(); } @@ -6063,10 +6079,9 @@ SDValue SystemZTargetLowering::combineSELECT_CCMASK( if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), - N->getOperand(0), - N->getOperand(1), - DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32), - DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32), + N->getOperand(0), N->getOperand(1), + DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), + DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg); return SDValue(); } @@ -6548,19 +6563,17 @@ static bool isSelectPseudo(MachineInstr &MI) { // Helper function, which inserts PHI functions into SinkMBB: // %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ], -// where %FalseValue(i) and %TrueValue(i) are taken from the consequent Selects -// in [MIItBegin, MIItEnd) range. -static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin, - MachineBasicBlock::iterator MIItEnd, +// where %FalseValue(i) and %TrueValue(i) are taken from Selects. +static void createPHIsForSelects(SmallVector &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB) { MachineFunction *MF = TrueMBB->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - unsigned CCValid = MIItBegin->getOperand(3).getImm(); - unsigned CCMask = MIItBegin->getOperand(4).getImm(); - DebugLoc DL = MIItBegin->getDebugLoc(); + MachineInstr *FirstMI = Selects.front(); + unsigned CCValid = FirstMI->getOperand(3).getImm(); + unsigned CCMask = FirstMI->getOperand(4).getImm(); MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); @@ -6572,16 +6585,15 @@ static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin, // destination registers, and the registers that went into the PHI. DenseMap> RegRewriteTable; - for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; - MIIt = skipDebugInstructionsForward(++MIIt, MIItEnd)) { - unsigned DestReg = MIIt->getOperand(0).getReg(); - unsigned TrueReg = MIIt->getOperand(1).getReg(); - unsigned FalseReg = MIIt->getOperand(2).getReg(); + for (auto MI : Selects) { + Register DestReg = MI->getOperand(0).getReg(); + Register TrueReg = MI->getOperand(1).getReg(); + Register FalseReg = MI->getOperand(2).getReg(); // If this Select we are generating is the opposite condition from // the jump we generated, then we have to swap the operands for the // PHI that is going to be generated. - if (MIIt->getOperand(4).getImm() == (CCValid ^ CCMask)) + if (MI->getOperand(4).getImm() == (CCValid ^ CCMask)) std::swap(TrueReg, FalseReg); if (RegRewriteTable.find(TrueReg) != RegRewriteTable.end()) @@ -6590,6 +6602,7 @@ static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin, if (RegRewriteTable.find(FalseReg) != RegRewriteTable.end()) FalseReg = RegRewriteTable[FalseReg].second; + DebugLoc DL = MI->getDebugLoc(); BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg) .addReg(TrueReg).addMBB(TrueMBB) .addReg(FalseReg).addMBB(FalseMBB); @@ -6605,36 +6618,61 @@ static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin, MachineBasicBlock * SystemZTargetLowering::emitSelect(MachineInstr &MI, MachineBasicBlock *MBB) const { + assert(isSelectPseudo(MI) && "Bad call to emitSelect()"); const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); unsigned CCValid = MI.getOperand(3).getImm(); unsigned CCMask = MI.getOperand(4).getImm(); - DebugLoc DL = MI.getDebugLoc(); // If we have a sequence of Select* pseudo instructions using the // same condition code value, we want to expand all of them into // a single pair of basic blocks using the same condition. - MachineInstr *LastMI = &MI; - MachineBasicBlock::iterator NextMIIt = skipDebugInstructionsForward( - std::next(MachineBasicBlock::iterator(MI)), MBB->end()); - - if (isSelectPseudo(MI)) - while (NextMIIt != MBB->end() && isSelectPseudo(*NextMIIt) && - NextMIIt->getOperand(3).getImm() == CCValid && - (NextMIIt->getOperand(4).getImm() == CCMask || - NextMIIt->getOperand(4).getImm() == (CCValid ^ CCMask))) { - LastMI = &*NextMIIt; - NextMIIt = skipDebugInstructionsForward(++NextMIIt, MBB->end()); + SmallVector Selects; + SmallVector DbgValues; + Selects.push_back(&MI); + unsigned Count = 0; + for (MachineBasicBlock::iterator NextMIIt = + std::next(MachineBasicBlock::iterator(MI)); + NextMIIt != MBB->end(); ++NextMIIt) { + if (NextMIIt->definesRegister(SystemZ::CC)) + break; + if (isSelectPseudo(*NextMIIt)) { + assert(NextMIIt->getOperand(3).getImm() == CCValid && + "Bad CCValid operands since CC was not redefined."); + if (NextMIIt->getOperand(4).getImm() == CCMask || + NextMIIt->getOperand(4).getImm() == (CCValid ^ CCMask)) { + Selects.push_back(&*NextMIIt); + continue; + } + break; } + bool User = false; + for (auto SelMI : Selects) + if (NextMIIt->readsVirtualRegister(SelMI->getOperand(0).getReg())) { + User = true; + break; + } + if (NextMIIt->isDebugInstr()) { + if (User) { + assert(NextMIIt->isDebugValue() && "Unhandled debug opcode."); + DbgValues.push_back(&*NextMIIt); + } + } + else if (User || ++Count > 20) + break; + } + MachineInstr *LastMI = Selects.back(); + bool CCKilled = + (LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB)); MachineBasicBlock *StartMBB = MBB; - MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); + MachineBasicBlock *JoinMBB = splitBlockAfter(LastMI, MBB); MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); // Unless CC was killed in the last Select instruction, mark it as // live-in to both FalseMBB and JoinMBB. - if (!LastMI->killsRegister(SystemZ::CC) && !checkCCKill(*LastMI, JoinMBB)) { + if (!CCKilled) { FalseMBB->addLiveIn(SystemZ::CC); JoinMBB->addLiveIn(SystemZ::CC); } @@ -6643,7 +6681,7 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI, // BRC CCMask, JoinMBB // # fallthrough to FalseMBB MBB = StartMBB; - BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC)) .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); MBB->addSuccessor(JoinMBB); MBB->addSuccessor(FalseMBB); @@ -6657,12 +6695,14 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI, // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] // ... MBB = JoinMBB; - MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI); - MachineBasicBlock::iterator MIItEnd = skipDebugInstructionsForward( - std::next(MachineBasicBlock::iterator(LastMI)), MBB->end()); - createPHIsForSelects(MIItBegin, MIItEnd, StartMBB, FalseMBB, MBB); + createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB); + for (auto SelMI : Selects) + SelMI->eraseFromParent(); + + MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI(); + for (auto DbgMI : DbgValues) + MBB->splice(InsertPos, StartMBB, DbgMI); - StartMBB->erase(MIItBegin, MIItEnd); return JoinMBB; } @@ -6678,10 +6718,10 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); - unsigned SrcReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(0).getReg(); MachineOperand Base = MI.getOperand(1); int64_t Disp = MI.getOperand(2).getImm(); - unsigned IndexReg = MI.getOperand(3).getReg(); + Register IndexReg = MI.getOperand(3).getReg(); unsigned CCValid = MI.getOperand(4).getImm(); unsigned CCMask = MI.getOperand(5).getImm(); DebugLoc DL = MI.getDebugLoc(); @@ -6773,7 +6813,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( // Extract the operands. Base can be a register or a frame index. // Src2 can be a register or immediate. - unsigned Dest = MI.getOperand(0).getReg(); + Register Dest = MI.getOperand(0).getReg(); MachineOperand Base = earlyUseOperand(MI.getOperand(1)); int64_t Disp = MI.getOperand(2).getImm(); MachineOperand Src2 = earlyUseOperand(MI.getOperand(3)); @@ -6833,7 +6873,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( .addReg(OldVal).addReg(BitShift).addImm(0); if (Invert) { // Perform the operation normally and then invert every bit of the field. - unsigned Tmp = MRI.createVirtualRegister(RC); + Register Tmp = MRI.createVirtualRegister(RC); BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2); if (BitSize <= 32) // XILF with the upper BitSize bits set. @@ -6842,7 +6882,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( else { // Use LCGR and add -1 to the result, which is more compact than // an XILF, XILH pair. - unsigned Tmp2 = MRI.createVirtualRegister(RC); + Register Tmp2 = MRI.createVirtualRegister(RC); BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp); BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal) .addReg(Tmp2).addImm(-1); @@ -6891,7 +6931,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( bool IsSubWord = (BitSize < 32); // Extract the operands. Base can be a register or a frame index. - unsigned Dest = MI.getOperand(0).getReg(); + Register Dest = MI.getOperand(0).getReg(); MachineOperand Base = earlyUseOperand(MI.getOperand(1)); int64_t Disp = MI.getOperand(2).getImm(); Register Src2 = MI.getOperand(3).getReg(); @@ -7005,13 +7045,13 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, MachineRegisterInfo &MRI = MF.getRegInfo(); // Extract the operands. Base can be a register or a frame index. - unsigned Dest = MI.getOperand(0).getReg(); + Register Dest = MI.getOperand(0).getReg(); MachineOperand Base = earlyUseOperand(MI.getOperand(1)); int64_t Disp = MI.getOperand(2).getImm(); - unsigned OrigCmpVal = MI.getOperand(3).getReg(); - unsigned OrigSwapVal = MI.getOperand(4).getReg(); - unsigned BitShift = MI.getOperand(5).getReg(); - unsigned NegBitShift = MI.getOperand(6).getReg(); + Register OrigCmpVal = MI.getOperand(3).getReg(); + Register OrigSwapVal = MI.getOperand(4).getReg(); + Register BitShift = MI.getOperand(5).getReg(); + Register NegBitShift = MI.getOperand(6).getReg(); int64_t BitSize = MI.getOperand(7).getImm(); DebugLoc DL = MI.getDebugLoc(); @@ -7023,14 +7063,14 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, assert(LOpcode && CSOpcode && "Displacement out of range"); // Create virtual registers for temporary results. - unsigned OrigOldVal = MRI.createVirtualRegister(RC); - unsigned OldVal = MRI.createVirtualRegister(RC); - unsigned CmpVal = MRI.createVirtualRegister(RC); - unsigned SwapVal = MRI.createVirtualRegister(RC); - unsigned StoreVal = MRI.createVirtualRegister(RC); - unsigned RetryOldVal = MRI.createVirtualRegister(RC); - unsigned RetryCmpVal = MRI.createVirtualRegister(RC); - unsigned RetrySwapVal = MRI.createVirtualRegister(RC); + Register OrigOldVal = MRI.createVirtualRegister(RC); + Register OldVal = MRI.createVirtualRegister(RC); + Register CmpVal = MRI.createVirtualRegister(RC); + Register SwapVal = MRI.createVirtualRegister(RC); + Register StoreVal = MRI.createVirtualRegister(RC); + Register RetryOldVal = MRI.createVirtualRegister(RC); + Register RetryCmpVal = MRI.createVirtualRegister(RC); + Register RetrySwapVal = MRI.createVirtualRegister(RC); // Insert 2 basic blocks for the loop. MachineBasicBlock *StartMBB = MBB; @@ -7129,11 +7169,11 @@ SystemZTargetLowering::emitPair128(MachineInstr &MI, MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Dest = MI.getOperand(0).getReg(); - unsigned Hi = MI.getOperand(1).getReg(); - unsigned Lo = MI.getOperand(2).getReg(); - unsigned Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); - unsigned Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); + Register Dest = MI.getOperand(0).getReg(); + Register Hi = MI.getOperand(1).getReg(); + Register Lo = MI.getOperand(2).getReg(); + Register Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); + Register Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1); BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2) @@ -7157,14 +7197,14 @@ MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned Dest = MI.getOperand(0).getReg(); - unsigned Src = MI.getOperand(1).getReg(); - unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); + Register Dest = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128); if (ClearEven) { - unsigned NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); - unsigned Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); + Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); + Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64) .addImm(0); @@ -7308,7 +7348,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper( // The previous iteration might have created out-of-range displacements. // Apply them using LAY if so. if (!isUInt<12>(DestDisp)) { - unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg) .add(DestBase) .addImm(DestDisp) @@ -7317,7 +7357,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper( DestDisp = 0; } if (!isUInt<12>(SrcDisp)) { - unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); + Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg) .add(SrcBase) .addImm(SrcDisp) @@ -7474,11 +7514,11 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0( static_cast(Subtarget.getInstrInfo()); DebugLoc DL = MI.getDebugLoc(); - unsigned SrcReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(0).getReg(); // Create new virtual register of the same class as source. const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); - unsigned DstReg = MRI->createVirtualRegister(RC); + Register DstReg = MRI->createVirtualRegister(RC); // Replace pseudo with a normal load-and-test that models the def as // well. diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 19c7ec58ed3..9c95e8aec94 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -25,10 +25,10 @@ let Predicates = [FeatureNoVectorEnhancements1] in let Predicates = [FeatureVectorEnhancements1] in def SelectVR128 : SelectWrapper; -defm CondStoreF32 : CondStores; -defm CondStoreF64 : CondStores; +defm CondStoreF32 : CondStores; +defm CondStoreF64 : CondStores; //===----------------------------------------------------------------------===// // Move instructions @@ -276,13 +276,13 @@ let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in { } // fp_to_sint always rounds towards zero, which is modifier value 5. -def : Pat<(i32 (fp_to_sint FP32:$src)), (CFEBR 5, FP32:$src)>; -def : Pat<(i32 (fp_to_sint FP64:$src)), (CFDBR 5, FP64:$src)>; -def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR 5, FP128:$src)>; +def : Pat<(i32 (any_fp_to_sint FP32:$src)), (CFEBR 5, FP32:$src)>; +def : Pat<(i32 (any_fp_to_sint FP64:$src)), (CFDBR 5, FP64:$src)>; +def : Pat<(i32 (any_fp_to_sint FP128:$src)), (CFXBR 5, FP128:$src)>; -def : Pat<(i64 (fp_to_sint FP32:$src)), (CGEBR 5, FP32:$src)>; -def : Pat<(i64 (fp_to_sint FP64:$src)), (CGDBR 5, FP64:$src)>; -def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>; +def : Pat<(i64 (any_fp_to_sint FP32:$src)), (CGEBR 5, FP32:$src)>; +def : Pat<(i64 (any_fp_to_sint FP64:$src)), (CGDBR 5, FP64:$src)>; +def : Pat<(i64 (any_fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>; // The FP extension feature provides versions of the above that allow // also specifying the inexact-exception suppression flag. @@ -309,13 +309,13 @@ let Predicates = [FeatureFPExtension] in { def CLGXBR : TernaryRRFe<"clgxbr", 0xB3AE, GR64, FP128>; } - def : Pat<(i32 (fp_to_uint FP32:$src)), (CLFEBR 5, FP32:$src, 0)>; - def : Pat<(i32 (fp_to_uint FP64:$src)), (CLFDBR 5, FP64:$src, 0)>; - def : Pat<(i32 (fp_to_uint FP128:$src)), (CLFXBR 5, FP128:$src, 0)>; + def : Pat<(i32 (any_fp_to_uint FP32:$src)), (CLFEBR 5, FP32:$src, 0)>; + def : Pat<(i32 (any_fp_to_uint FP64:$src)), (CLFDBR 5, FP64:$src, 0)>; + def : Pat<(i32 (any_fp_to_uint FP128:$src)), (CLFXBR 5, FP128:$src, 0)>; - def : Pat<(i64 (fp_to_uint FP32:$src)), (CLGEBR 5, FP32:$src, 0)>; - def : Pat<(i64 (fp_to_uint FP64:$src)), (CLGDBR 5, FP64:$src, 0)>; - def : Pat<(i64 (fp_to_uint FP128:$src)), (CLGXBR 5, FP128:$src, 0)>; + def : Pat<(i64 (any_fp_to_uint FP32:$src)), (CLGEBR 5, FP32:$src, 0)>; + def : Pat<(i64 (any_fp_to_uint FP64:$src)), (CLGDBR 5, FP64:$src, 0)>; + def : Pat<(i64 (any_fp_to_uint FP128:$src)), (CLGXBR 5, FP128:$src, 0)>; } diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 2a1d14de3dd..c9dbe3da686 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -2141,17 +2141,17 @@ class FixedCondBranchRXY opcode, } class CmpBranchRIEa opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEa; class AsmCmpBranchRIEa opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEa; class FixedCmpBranchRIEa opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEa { let isAsmParserOnly = V.alternate; @@ -2159,7 +2159,7 @@ class FixedCmpBranchRIEa opcode, } multiclass CmpBranchRIEaPair opcode, - RegisterOperand cls, Immediate imm> { + RegisterOperand cls, ImmOpWithPattern imm> { let isCodeGenOnly = 1 in def "" : CmpBranchRIEa; def Asm : AsmCmpBranchRIEa; @@ -2193,19 +2193,19 @@ multiclass CmpBranchRIEbPair opcode, } class CmpBranchRIEc opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEc; class AsmCmpBranchRIEc opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEc; class FixedCmpBranchRIEc opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEc { let isAsmParserOnly = V.alternate; @@ -2213,7 +2213,7 @@ class FixedCmpBranchRIEc opcode, } multiclass CmpBranchRIEcPair opcode, - RegisterOperand cls, Immediate imm> { + RegisterOperand cls, ImmOpWithPattern imm> { let isCodeGenOnly = 1 in def "" : CmpBranchRIEc; def Asm : AsmCmpBranchRIEc; @@ -2272,19 +2272,19 @@ multiclass CmpBranchRRSPair opcode, } class CmpBranchRIS opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIS; class AsmCmpBranchRIS opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIS; class FixedCmpBranchRIS opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIS { let isAsmParserOnly = V.alternate; @@ -2292,7 +2292,7 @@ class FixedCmpBranchRIS opcode, } multiclass CmpBranchRISPair opcode, - RegisterOperand cls, Immediate imm> { + RegisterOperand cls, ImmOpWithPattern imm> { let isCodeGenOnly = 1 in def "" : CmpBranchRIS; def Asm : AsmCmpBranchRIS; @@ -2585,7 +2585,7 @@ multiclass StoreMultipleVRSaAlign opcode> { // We therefore match the address in the same way as a normal store and // only use the StoreSI* instruction if the matched address is suitable. class StoreSI opcode, SDPatternOperator operator, - Immediate imm> + ImmOpWithPattern imm> : InstSI { @@ -2593,7 +2593,7 @@ class StoreSI opcode, SDPatternOperator operator, } class StoreSIY opcode, SDPatternOperator operator, - Immediate imm> + ImmOpWithPattern imm> : InstSIY { @@ -2601,7 +2601,7 @@ class StoreSIY opcode, SDPatternOperator operator, } class StoreSIL opcode, SDPatternOperator operator, - Immediate imm> + ImmOpWithPattern imm> : InstSIL { @@ -2609,7 +2609,7 @@ class StoreSIL opcode, SDPatternOperator operator, } multiclass StoreSIPair siOpcode, bits<16> siyOpcode, - SDPatternOperator operator, Immediate imm> { + SDPatternOperator operator, ImmOpWithPattern imm> { let DispKey = mnemonic in { let DispSize = "12" in def "" : StoreSI; @@ -2665,7 +2665,7 @@ multiclass CondStoreRSYPair opcode, def Asm : AsmCondStoreRSY; } -class SideEffectUnaryI opcode, Immediate imm> +class SideEffectUnaryI opcode, ImmOpWithPattern imm> : InstI; @@ -2761,13 +2761,13 @@ class UnaryMemRRFc opcode, } class UnaryRI opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIa; class UnaryRIL opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRILa; @@ -2885,14 +2885,14 @@ multiclass UnaryRXPair rxOpcode, bits<16> rxyOpcode, } class UnaryVRIa opcode, SDPatternOperator operator, - TypedReg tr, Immediate imm, bits<4> type = 0> + TypedReg tr, ImmOpWithPattern imm, bits<4> type = 0> : InstVRIa { + [(set (tr.vt tr.op:$V1), (operator (i32 timm:$I2)))]> { let M3 = type; } -class UnaryVRIaGeneric opcode, Immediate imm> +class UnaryVRIaGeneric opcode, ImmOpWithPattern imm> : InstVRIa; @@ -3021,7 +3021,7 @@ class SideEffectBinaryRRFc opcode, } class SideEffectBinaryIE opcode, - Immediate imm1, Immediate imm2> + ImmOpWithPattern imm1, ImmOpWithPattern imm2> : InstIE; @@ -3030,7 +3030,7 @@ class SideEffectBinarySI opcode, Operand imm> mnemonic#"\t$BD1, $I2", []>; class SideEffectBinarySIL opcode, - SDPatternOperator operator, Immediate imm> + SDPatternOperator operator, ImmOpWithPattern imm> : InstSIL; @@ -3165,7 +3165,7 @@ class BinaryRRFc opcode, mnemonic#"\t$R1, $R2, $M3", []>; class BinaryMemRRFc opcode, - RegisterOperand cls1, RegisterOperand cls2, Immediate imm> + RegisterOperand cls1, RegisterOperand cls2, ImmOpWithPattern imm> : InstRRFc { let Constraints = "$R1 = $R1src"; @@ -3267,7 +3267,7 @@ multiclass CondBinaryRRFaPair opcode, } class BinaryRI opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIa { @@ -3276,14 +3276,14 @@ class BinaryRI opcode, SDPatternOperator operator, } class BinaryRIE opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEd; multiclass BinaryRIAndK opcode1, bits<16> opcode2, SDPatternOperator operator, RegisterOperand cls, - Immediate imm> { + ImmOpWithPattern imm> { let NumOpsKey = mnemonic in { let NumOpsValue = "3" in def K : BinaryRIE, @@ -3294,7 +3294,7 @@ multiclass BinaryRIAndK opcode1, bits<16> opcode2, } class CondBinaryRIE opcode, RegisterOperand cls, - Immediate imm> + ImmOpWithPattern imm> : InstRIEg opcode, RegisterOperand cls, // Like CondBinaryRIE, but used for the raw assembly form. The condition-code // mask is the third operand rather than being part of the mnemonic. class AsmCondBinaryRIE opcode, RegisterOperand cls, - Immediate imm> + ImmOpWithPattern imm> : InstRIEg { @@ -3318,7 +3318,7 @@ class AsmCondBinaryRIE opcode, RegisterOperand cls, // Like CondBinaryRIE, but with a fixed CC mask. class FixedCondBinaryRIE opcode, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIEg { let Constraints = "$R1 = $R1src"; @@ -3328,14 +3328,14 @@ class FixedCondBinaryRIE opcode, } multiclass CondBinaryRIEPair opcode, - RegisterOperand cls, Immediate imm> { + RegisterOperand cls, ImmOpWithPattern imm> { let isCodeGenOnly = 1 in def "" : CondBinaryRIE; def Asm : AsmCondBinaryRIE; } class BinaryRIL opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRILa { @@ -3484,7 +3484,7 @@ class BinaryVRIb opcode, SDPatternOperator operator, TypedReg tr, bits<4> type> : InstVRIb { + [(set (tr.vt tr.op:$V1), (operator imm32zx8_timm:$I2, imm32zx8_timm:$I3))]> { let M4 = type; } @@ -3498,7 +3498,7 @@ class BinaryVRIc opcode, SDPatternOperator operator, : InstVRIc { + imm32zx16_timm:$I2))]> { let M4 = type; } @@ -3512,7 +3512,7 @@ class BinaryVRIe opcode, SDPatternOperator operator, : InstVRIe { + imm32zx12_timm:$I3))]> { let M4 = type; let M5 = m5; } @@ -3715,7 +3715,7 @@ class BinaryVRX opcode, SDPatternOperator operator, : InstVRX { + imm32zx4_timm:$M3))]> { let mayLoad = 1; let AccessBytes = bytes; } @@ -3765,7 +3765,7 @@ class BinaryVSI opcode, SDPatternOperator operator, } class StoreBinaryVRV opcode, bits<5> bytes, - Immediate index> + ImmOpWithPattern index> : InstVRV { let mayStore = 1; @@ -3774,7 +3774,7 @@ class StoreBinaryVRV opcode, bits<5> bytes, class StoreBinaryVRX opcode, SDPatternOperator operator, TypedReg tr, bits<5> bytes, - Immediate index> + ImmOpWithPattern index> : InstVRX { @@ -3809,7 +3809,7 @@ class CompareRRE opcode, SDPatternOperator operator, } class CompareRI opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRIa { @@ -3817,7 +3817,7 @@ class CompareRI opcode, SDPatternOperator operator, } class CompareRIL opcode, SDPatternOperator operator, - RegisterOperand cls, Immediate imm> + RegisterOperand cls, ImmOpWithPattern imm> : InstRILa { @@ -3924,7 +3924,7 @@ class CompareSSb opcode> } class CompareSI opcode, SDPatternOperator operator, - SDPatternOperator load, Immediate imm, + SDPatternOperator load, ImmOpWithPattern imm, AddressingMode mode = bdaddr12only> : InstSI opcode, SDPatternOperator operator, } class CompareSIL opcode, SDPatternOperator operator, - SDPatternOperator load, Immediate imm> + SDPatternOperator load, ImmOpWithPattern imm> : InstSIL { @@ -3943,7 +3943,7 @@ class CompareSIL opcode, SDPatternOperator operator, } class CompareSIY opcode, SDPatternOperator operator, - SDPatternOperator load, Immediate imm, + SDPatternOperator load, ImmOpWithPattern imm, AddressingMode mode = bdaddr20only> : InstSIY opcode, SDPatternOperator operator, multiclass CompareSIPair siOpcode, bits<16> siyOpcode, SDPatternOperator operator, SDPatternOperator load, - Immediate imm> { + ImmOpWithPattern imm> { let DispKey = mnemonic in { let DispSize = "12" in def "" : CompareSI; @@ -4012,7 +4012,7 @@ class TestRXE opcode, SDPatternOperator operator, } class TestBinarySIL opcode, - SDPatternOperator operator, Immediate imm> + SDPatternOperator operator, ImmOpWithPattern imm> : InstSIL; @@ -4073,7 +4073,7 @@ class SideEffectTernaryMemMemMemRRFb opcode, class SideEffectTernaryRRFc opcode, RegisterOperand cls1, RegisterOperand cls2, - Immediate imm> + ImmOpWithPattern imm> : InstRRFc; @@ -4086,7 +4086,7 @@ multiclass SideEffectTernaryRRFcOpt opcode, class SideEffectTernaryMemMemRRFc opcode, RegisterOperand cls1, RegisterOperand cls2, - Immediate imm> + ImmOpWithPattern imm> : InstRRFc { @@ -4221,7 +4221,7 @@ class TernaryRXF opcode, SDPatternOperator operator, } class TernaryVRIa opcode, SDPatternOperator operator, - TypedReg tr1, TypedReg tr2, Immediate imm, Immediate index> + TypedReg tr1, TypedReg tr2, ImmOpWithPattern imm, ImmOpWithPattern index> : InstVRIa opcode, SDPatternOperator operator, mnemonic#"\t$V1, $V2, $V3, $I4", [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3), - imm32zx8:$I4))]> { + imm32zx8_timm:$I4))]> { let M5 = type; } @@ -4252,8 +4252,8 @@ class TernaryVRRa opcode, SDPatternOperator operator, (ins tr2.op:$V2, imm32zx4:$M4, imm32zx4:$M5), mnemonic#"\t$V1, $V2, $M4, $M5", [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), - imm32zx4:$M4, - imm32zx4:$M5))], + imm32zx4_timm:$M4, + imm32zx4_timm:$M5))], m4or> { let M3 = type; } @@ -4285,13 +4285,13 @@ multiclass TernaryOptVRRbSPair opcode, TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> modifier = 0> { def "" : TernaryVRRb; + imm32zx4even_timm, !and (modifier, 14)>; def : InstAlias(NAME) tr1.op:$V1, tr2.op:$V2, tr2.op:$V3, 0)>; let Defs = [CC] in def S : TernaryVRRb; + imm32zx4even_timm, !add(!and (modifier, 14), 1)>; def : InstAlias(NAME#"S") tr1.op:$V1, tr2.op:$V2, tr2.op:$V3, 0)>; @@ -4314,7 +4314,7 @@ class TernaryVRRc opcode, SDPatternOperator operator, mnemonic#"\t$V1, $V2, $V3, $M4", [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3), - imm32zx4:$M4))]> { + imm32zx4_timm:$M4))]> { let M5 = 0; let M6 = 0; } @@ -4327,7 +4327,7 @@ class TernaryVRRcFloat opcode, mnemonic#"\t$V1, $V2, $V3, $M6", [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3), - imm32zx4:$M6))]> { + imm32zx4_timm:$M6))]> { let M4 = type; let M5 = m5; } @@ -4429,7 +4429,7 @@ class TernaryVRSbGeneric opcode> } class TernaryVRV opcode, bits<5> bytes, - Immediate index> + ImmOpWithPattern index> : InstVRV { @@ -4440,7 +4440,7 @@ class TernaryVRV opcode, bits<5> bytes, } class TernaryVRX opcode, SDPatternOperator operator, - TypedReg tr1, TypedReg tr2, bits<5> bytes, Immediate index> + TypedReg tr1, TypedReg tr2, bits<5> bytes, ImmOpWithPattern index> : InstVRX opcode, SDPatternOperator operato [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V1src), (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3), - imm32zx8:$I4))]> { + imm32zx8_timm:$I4))]> { let Constraints = "$V1 = $V1src"; let DisableEncoding = "$V1src"; let M5 = type; @@ -4480,7 +4480,7 @@ class QuaternaryVRIf opcode> : InstVRIf; + mnemonic#"\t$V1, $V2, $V3, $I4, $M5", []>; class QuaternaryVRIg opcode> : InstVRIg opcode> class QuaternaryVRRd opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, TypedReg tr3, TypedReg tr4, bits<4> type, - SDPatternOperator m6mask = imm32zx4, bits<4> m6or = 0> + SDPatternOperator m6mask = imm32zx4_timm, bits<4> m6or = 0> : InstVRRd opcode, bits<4> modifier = 0> { def "" : QuaternaryVRRd; + imm32zx4even_timm, !and (modifier, 14)>; def : InstAlias(NAME) tr1.op:$V1, tr2.op:$V2, tr2.op:$V3, tr2.op:$V4, 0)>; let Defs = [CC] in def S : QuaternaryVRRd; + imm32zx4even_timm, !add (!and (modifier, 14), 1)>; def : InstAlias(NAME#"S") tr1.op:$V1, tr2.op:$V2, tr2.op:$V3, tr2.op:$V4, 0)>; @@ -4536,7 +4536,7 @@ multiclass QuaternaryOptVRRdSPairGeneric opcode> { def "" : QuaternaryVRRdGeneric; def : InstAlias(NAME) VR128:$V1, VR128:$V2, VR128:$V3, - VR128:$V4, imm32zx4:$M5, 0)>; + VR128:$V4, imm32zx4_timm:$M5, 0)>; } class SideEffectQuaternaryRRFa opcode, @@ -4638,13 +4638,13 @@ class RotateSelectRIEf opcode, RegisterOperand cls1, class PrefetchRXY opcode, SDPatternOperator operator> : InstRXYb; + [(operator imm32zx4_timm:$M1, bdxaddr20only:$XBD2)]>; class PrefetchRILPC opcode, SDPatternOperator operator> - : InstRILc { + [(operator imm32zx4_timm:$M1, pcrel32:$RI2)]> { // We want PC-relative addresses to be tried ahead of BD and BDX addresses. // However, BDXs have two extra operands and are therefore 6 units more // complex. @@ -4691,7 +4691,7 @@ class Pseudo pattern> // Like UnaryRI, but expanded after RA depending on the choice of register. class UnaryRIPseudo + ImmOpWithPattern imm> : Pseudo<(outs cls:$R1), (ins imm:$I2), [(set cls:$R1, (operator imm:$I2))]>; @@ -4720,7 +4720,7 @@ class UnaryRRPseudo + ImmOpWithPattern imm> : Pseudo<(outs cls:$R1), (ins cls:$R1src, imm:$I2), [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { let Constraints = "$R1 = $R1src"; @@ -4728,13 +4728,13 @@ class BinaryRIPseudo + ImmOpWithPattern imm> : Pseudo<(outs cls:$R1), (ins cls:$R3, imm:$I2), [(set cls:$R1, (operator cls:$R3, imm:$I2))]>; // Like BinaryRIAndK, but expanded after RA depending on the choice of register. multiclass BinaryRIAndKPseudo { + RegisterOperand cls, ImmOpWithPattern imm> { let NumOpsKey = key in { let NumOpsValue = "3" in def K : BinaryRIEPseudo, @@ -4764,7 +4764,7 @@ class MemFoldPseudo bytes, // Like CompareRI, but expanded after RA depending on the choice of register. class CompareRIPseudo + ImmOpWithPattern imm> : Pseudo<(outs), (ins cls:$R1, imm:$I2), [(set CC, (operator cls:$R1, imm:$I2))]> { let isCompare = 1; @@ -4783,7 +4783,7 @@ class CompareRXYPseudo +class TestBinarySILPseudo : Pseudo<(outs), (ins bdaddr12only:$BD1, imm:$I2), [(set CC, (operator bdaddr12only:$BD1, imm:$I2))]>; @@ -4812,7 +4812,7 @@ class CondBinaryRRFaPseudo +class CondBinaryRIEPseudo : Pseudo<(outs cls:$R1), (ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3), [(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src, @@ -4876,7 +4876,7 @@ class SelectWrapper : Pseudo<(outs cls:$dst), (ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc), [(set (vt cls:$dst), (z_select_ccmask cls:$src1, cls:$src2, - imm32zx4:$valid, imm32zx4:$cc))]> { + imm32zx4_timm:$valid, imm32zx4_timm:$cc))]> { let usesCustomInserter = 1; let hasNoSchedulingInfo = 1; let Uses = [CC]; @@ -4890,12 +4890,12 @@ multiclass CondStores; def Inv : Pseudo<(outs), (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc), [(store (z_select_ccmask (load mode:$addr), cls:$new, - imm32zx4:$valid, imm32zx4:$cc), + imm32zx4_timm:$valid, imm32zx4_timm:$cc), mode:$addr)]>; } } @@ -4917,11 +4917,11 @@ class AtomicLoadBinary : AtomicLoadBinary; -class AtomicLoadBinaryImm32 +class AtomicLoadBinaryImm32 : AtomicLoadBinary; class AtomicLoadBinaryReg64 : AtomicLoadBinary; -class AtomicLoadBinaryImm64 +class AtomicLoadBinaryImm64 : AtomicLoadBinary; // OPERATOR is ATOMIC_SWAPW or an ATOMIC_LOADW_* operation. PAT and OPERAND @@ -4944,7 +4944,7 @@ class AtomicLoadWBinary : AtomicLoadWBinary; -class AtomicLoadWBinaryImm +class AtomicLoadWBinaryImm : AtomicLoadWBinary; // A pseudo instruction that is a direct alias of a real instruction. @@ -4979,7 +4979,7 @@ class StoreAliasVRX + ImmOpWithPattern imm> : Alias<4, (outs cls:$R1), (ins cls:$R1src, imm:$I2), [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { let Constraints = "$R1 = $R1src"; @@ -4987,7 +4987,7 @@ class BinaryAliasRI + ImmOpWithPattern imm> : Alias<6, (outs cls:$R1), (ins cls:$R1src, imm:$I2), [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> { let Constraints = "$R1 = $R1src"; @@ -4999,7 +4999,7 @@ class BinaryAliasVRRf // An alias of a CompareRI, but with different register sizes. class CompareAliasRI + ImmOpWithPattern imm> : Alias<4, (outs), (ins cls:$R1, imm:$I2), [(set CC, (operator cls:$R1, imm:$I2))]> { let isCompare = 1; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 57c1cf4ec70..bc783608d45 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -46,22 +46,12 @@ using namespace llvm; #include "SystemZGenInstrInfo.inc" #define DEBUG_TYPE "systemz-II" -STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)"); // Return a mask with Count low bits set. static uint64_t allOnes(unsigned int Count) { return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1; } -// Reg should be a 32-bit GPR. Return true if it is a high register rather -// than a low register. -static bool isHighReg(unsigned int Reg) { - if (SystemZ::GRH32BitRegClass.contains(Reg)) - return true; - assert(SystemZ::GR32BitRegClass.contains(Reg) && "Invalid GRX32"); - return false; -} - // Pin the vtable to this file. void SystemZInstrInfo::anchor() {} @@ -85,7 +75,7 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI, // Set up the two 64-bit registers and remember super reg and its flags. MachineOperand &HighRegOp = EarlierMI->getOperand(0); MachineOperand &LowRegOp = MI->getOperand(0); - unsigned Reg128 = LowRegOp.getReg(); + Register Reg128 = LowRegOp.getReg(); unsigned Reg128Killed = getKillRegState(LowRegOp.isKill()); unsigned Reg128Undef = getUndefRegState(LowRegOp.isUndef()); HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_h64)); @@ -147,8 +137,8 @@ void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const { void SystemZInstrInfo::expandRIPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode, bool ConvertHigh) const { - unsigned Reg = MI.getOperand(0).getReg(); - bool IsHigh = isHighReg(Reg); + Register Reg = MI.getOperand(0).getReg(); + bool IsHigh = SystemZ::isHighReg(Reg); MI.setDesc(get(IsHigh ? HighOpcode : LowOpcode)); if (IsHigh && ConvertHigh) MI.getOperand(1).setImm(uint32_t(MI.getOperand(1).getImm())); @@ -161,10 +151,10 @@ void SystemZInstrInfo::expandRIPseudo(MachineInstr &MI, unsigned LowOpcode, void SystemZInstrInfo::expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned LowOpcodeK, unsigned HighOpcode) const { - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); - bool DestIsHigh = isHighReg(DestReg); - bool SrcIsHigh = isHighReg(SrcReg); + Register DestReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + bool DestIsHigh = SystemZ::isHighReg(DestReg); + bool SrcIsHigh = SystemZ::isHighReg(SrcReg); if (!DestIsHigh && !SrcIsHigh) MI.setDesc(get(LowOpcodeK)); else { @@ -184,9 +174,10 @@ void SystemZInstrInfo::expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode, // is a high GR32. void SystemZInstrInfo::expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode) const { - unsigned Reg = MI.getOperand(0).getReg(); - unsigned Opcode = getOpcodeForOffset(isHighReg(Reg) ? HighOpcode : LowOpcode, - MI.getOperand(2).getImm()); + Register Reg = MI.getOperand(0).getReg(); + unsigned Opcode = getOpcodeForOffset( + SystemZ::isHighReg(Reg) ? HighOpcode : LowOpcode, + MI.getOperand(2).getImm()); MI.setDesc(get(Opcode)); } @@ -195,93 +186,11 @@ void SystemZInstrInfo::expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode, // register is a low GR32 and HighOpcode if the register is a high GR32. void SystemZInstrInfo::expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode) const { - unsigned Reg = MI.getOperand(0).getReg(); - unsigned Opcode = isHighReg(Reg) ? HighOpcode : LowOpcode; + Register Reg = MI.getOperand(0).getReg(); + unsigned Opcode = SystemZ::isHighReg(Reg) ? HighOpcode : LowOpcode; MI.setDesc(get(Opcode)); } -// MI is a load-register-on-condition pseudo instruction. Replace it with -// LowOpcode if source and destination are both low GR32s and HighOpcode if -// source and destination are both high GR32s. -void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, - unsigned HighOpcode) const { - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(2).getReg(); - bool DestIsHigh = isHighReg(DestReg); - bool SrcIsHigh = isHighReg(SrcReg); - - if (!DestIsHigh && !SrcIsHigh) - MI.setDesc(get(LowOpcode)); - else if (DestIsHigh && SrcIsHigh) - MI.setDesc(get(HighOpcode)); - else - LOCRMuxJumps++; - - // If we were unable to implement the pseudo with a single instruction, we - // need to convert it back into a branch sequence. This cannot be done here - // since the caller of expandPostRAPseudo does not handle changes to the CFG - // correctly. This change is defered to the SystemZExpandPseudo pass. -} - -// MI is a select pseudo instruction. Replace it with LowOpcode if source -// and destination are all low GR32s and HighOpcode if source and destination -// are all high GR32s. Otherwise, use the two-operand MixedOpcode. -void SystemZInstrInfo::expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode, - unsigned HighOpcode, - unsigned MixedOpcode) const { - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned Src1Reg = MI.getOperand(1).getReg(); - unsigned Src2Reg = MI.getOperand(2).getReg(); - bool DestIsHigh = isHighReg(DestReg); - bool Src1IsHigh = isHighReg(Src1Reg); - bool Src2IsHigh = isHighReg(Src2Reg); - - // If sources and destination aren't all high or all low, we may be able to - // simplify the operation by moving one of the sources to the destination - // first. But only if this doesn't clobber the other source. - if (DestReg != Src1Reg && DestReg != Src2Reg) { - if (DestIsHigh != Src1IsHigh) { - emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src1Reg, - SystemZ::LR, 32, MI.getOperand(1).isKill(), - MI.getOperand(1).isUndef()); - MI.getOperand(1).setReg(DestReg); - Src1Reg = DestReg; - Src1IsHigh = DestIsHigh; - } else if (DestIsHigh != Src2IsHigh) { - emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src2Reg, - SystemZ::LR, 32, MI.getOperand(2).isKill(), - MI.getOperand(2).isUndef()); - MI.getOperand(2).setReg(DestReg); - Src2Reg = DestReg; - Src2IsHigh = DestIsHigh; - } - } - - // If the destination (now) matches one source, prefer this to be first. - if (DestReg != Src1Reg && DestReg == Src2Reg) { - commuteInstruction(MI, false, 1, 2); - std::swap(Src1Reg, Src2Reg); - std::swap(Src1IsHigh, Src2IsHigh); - } - - if (!DestIsHigh && !Src1IsHigh && !Src2IsHigh) - MI.setDesc(get(LowOpcode)); - else if (DestIsHigh && Src1IsHigh && Src2IsHigh) - MI.setDesc(get(HighOpcode)); - else { - // Given the simplifcation above, we must already have a two-operand case. - assert (DestReg == Src1Reg); - MI.setDesc(get(MixedOpcode)); - MI.tieOperands(0, 1); - LOCRMuxJumps++; - } - - // If we were unable to implement the pseudo with a single instruction, we - // need to convert it back into a branch sequence. This cannot be done here - // since the caller of expandPostRAPseudo does not handle changes to the CFG - // correctly. This change is defered to the SystemZExpandPseudo pass. -} - // MI is an RR-style pseudo instruction that zero-extends the low Size bits // of one GRX32 into another. Replace it with LowOpcode if both operands // are low registers, otherwise use RISB[LH]G. @@ -302,8 +211,8 @@ void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, void SystemZInstrInfo::expandLoadStackGuard(MachineInstr *MI) const { MachineBasicBlock *MBB = MI->getParent(); MachineFunction &MF = *MBB->getParent(); - const unsigned Reg64 = MI->getOperand(0).getReg(); - const unsigned Reg32 = RI.getSubReg(Reg64, SystemZ::subreg_l32); + const Register Reg64 = MI->getOperand(0).getReg(); + const Register Reg32 = RI.getSubReg(Reg64, SystemZ::subreg_l32); // EAR can only load the low subregister so us a shift for %a0 to produce // the GR containing %a0 and %a1. @@ -341,8 +250,8 @@ SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB, unsigned Size, bool KillSrc, bool UndefSrc) const { unsigned Opcode; - bool DestIsHigh = isHighReg(DestReg); - bool SrcIsHigh = isHighReg(SrcReg); + bool DestIsHigh = SystemZ::isHighReg(DestReg); + bool SrcIsHigh = SystemZ::isHighReg(SrcReg); if (DestIsHigh && SrcIsHigh) Opcode = SystemZ::RISBHH; else if (DestIsHigh && !SrcIsHigh) @@ -468,7 +377,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB, // Can't handle indirect branches. SystemZII::Branch Branch(getBranchInfo(*I)); - if (!Branch.Target->isMBB()) + if (!Branch.hasMBBTarget()) return true; // Punt on compound branches. @@ -478,7 +387,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB, if (Branch.CCMask == SystemZ::CCMASK_ANY) { // Handle unconditional branches. if (!AllowModify) { - TBB = Branch.Target->getMBB(); + TBB = Branch.getMBBTarget(); continue; } @@ -490,7 +399,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB, FBB = nullptr; // Delete the JMP if it's equivalent to a fall-through. - if (MBB.isLayoutSuccessor(Branch.Target->getMBB())) { + if (MBB.isLayoutSuccessor(Branch.getMBBTarget())) { TBB = nullptr; I->eraseFromParent(); I = MBB.end(); @@ -498,7 +407,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB, } // TBB is used to indicate the unconditinal destination. - TBB = Branch.Target->getMBB(); + TBB = Branch.getMBBTarget(); continue; } @@ -506,7 +415,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB, if (Cond.empty()) { // FIXME: add X86-style branch swap FBB = TBB; - TBB = Branch.Target->getMBB(); + TBB = Branch.getMBBTarget(); Cond.push_back(MachineOperand::CreateImm(Branch.CCValid)); Cond.push_back(MachineOperand::CreateImm(Branch.CCMask)); continue; @@ -517,7 +426,7 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB, // Only handle the case where all conditional branches branch to the same // destination. - if (TBB != Branch.Target->getMBB()) + if (TBB != Branch.getMBBTarget()) return true; // If the conditions are the same, we can leave them alone. @@ -547,7 +456,7 @@ unsigned SystemZInstrInfo::removeBranch(MachineBasicBlock &MBB, continue; if (!I->isBranch()) break; - if (!getBranchInfo(*I).Target->isMBB()) + if (!getBranchInfo(*I).hasMBBTarget()) break; // Remove the branch. I->eraseFromParent(); @@ -676,8 +585,8 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB, else { Opc = SystemZ::LOCR; MRI.constrainRegClass(DstReg, &SystemZ::GR32BitRegClass); - unsigned TReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); - unsigned FReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register TReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register FReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); BuildMI(MBB, I, DL, get(TargetOpcode::COPY), TReg).addReg(TrueReg); BuildMI(MBB, I, DL, get(TargetOpcode::COPY), FReg).addReg(FalseReg); TrueReg = TReg; @@ -1258,13 +1167,14 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( assert(NumOps == 3 && "Expected two source registers."); Register DstReg = MI.getOperand(0).getReg(); Register DstPhys = - (TRI->isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg); + (Register::isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg); Register SrcReg = (OpNum == 2 ? MI.getOperand(1).getReg() : ((OpNum == 1 && MI.isCommutable()) ? MI.getOperand(2).getReg() : Register())); if (DstPhys && !SystemZ::GRH32BitRegClass.contains(DstPhys) && SrcReg && - TRI->isVirtualRegister(SrcReg) && DstPhys == VRM->getPhys(SrcReg)) + Register::isVirtualRegister(SrcReg) && + DstPhys == VRM->getPhys(SrcReg)) NeedsCommute = (OpNum == 1); else MemOpcode = -1; @@ -1358,15 +1268,6 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { expandLOCPseudo(MI, SystemZ::LOCHI, SystemZ::LOCHHI); return true; - case SystemZ::LOCRMux: - expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR); - return true; - - case SystemZ::SELRMux: - expandSELRPseudo(MI, SystemZ::SELR, SystemZ::SELFHR, - SystemZ::LOCRMux); - return true; - case SystemZ::STCMux: expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH); return true; @@ -1468,8 +1369,8 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return true; case SystemZ::RISBMux: { - bool DestIsHigh = isHighReg(MI.getOperand(0).getReg()); - bool SrcIsHigh = isHighReg(MI.getOperand(2).getReg()); + bool DestIsHigh = SystemZ::isHighReg(MI.getOperand(0).getReg()); + bool SrcIsHigh = SystemZ::isHighReg(MI.getOperand(2).getReg()); if (SrcIsHigh == DestIsHigh) MI.setDesc(get(DestIsHigh ? SystemZ::RISBHH : SystemZ::RISBLL)); else { @@ -1545,6 +1446,10 @@ SystemZInstrInfo::getBranchInfo(const MachineInstr &MI) const { return SystemZII::Branch(SystemZII::BranchCLG, SystemZ::CCMASK_ICMP, MI.getOperand(2).getImm(), &MI.getOperand(3)); + case SystemZ::INLINEASM_BR: + // Don't try to analyze asm goto, so pass nullptr as branch target argument. + return SystemZII::Branch(SystemZII::AsmGoto, 0, 0, nullptr); + default: llvm_unreachable("Unrecognized branch opcode"); } @@ -1845,8 +1750,7 @@ void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB, bool SystemZInstrInfo:: areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA) const { + const MachineInstr &MIb) const { if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) return false; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 2edde175542..6dc6e72aa52 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -100,11 +100,18 @@ enum BranchType { // An instruction that decrements a 64-bit register and branches if // the result is nonzero. - BranchCTG + BranchCTG, + + // An instruction representing an asm goto statement. + AsmGoto }; // Information about a branch instruction. -struct Branch { +class Branch { + // The target of the branch. In case of INLINEASM_BR, this is nullptr. + const MachineOperand *Target; + +public: // The type of the branch. BranchType Type; @@ -114,12 +121,15 @@ struct Branch { // CCMASK_ is set if the branch should be taken when CC == N. unsigned CCMask; - // The target of the branch. - const MachineOperand *Target; - Branch(BranchType type, unsigned ccValid, unsigned ccMask, const MachineOperand *target) - : Type(type), CCValid(ccValid), CCMask(ccMask), Target(target) {} + : Target(target), Type(type), CCValid(ccValid), CCMask(ccMask) {} + + bool isIndirect() { return Target != nullptr && Target->isReg(); } + bool hasMBBTarget() { return Target != nullptr && Target->isMBB(); } + MachineBasicBlock *getMBBTarget() { + return hasMBBTarget() ? Target->getMBB() : nullptr; + } }; // Kinds of fused compares in compare-and-* instructions. Together with type @@ -160,10 +170,6 @@ class SystemZInstrInfo : public SystemZGenInstrInfo { unsigned HighOpcode) const; void expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode) const; - void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, - unsigned HighOpcode) const; - void expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode, - unsigned HighOpcode, unsigned MixedOpcode) const; void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned Size) const; void expandLoadStackGuard(MachineInstr *MI) const; @@ -322,8 +328,7 @@ public: // memory addresses and false otherwise. bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA = nullptr) const override; + const MachineInstr &MIb) const override; }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 91856893e3b..8b334756611 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -337,15 +337,15 @@ defm CondStore8Mux : CondStores, Requires<[FeatureHighWord]>; -defm CondStore32Mux : CondStores, +defm CondStore32Mux : CondStores, Requires<[FeatureLoadStoreOnCond2]>; defm CondStore8 : CondStores; defm CondStore16 : CondStores; -defm CondStore32 : CondStores; +defm CondStore32 : CondStores; defm : CondStores64; @@ -353,8 +353,8 @@ defm : CondStores64; defm : CondStores64; -defm CondStore64 : CondStores; +defm CondStore64 : CondStores; //===----------------------------------------------------------------------===// // Move instructions @@ -531,8 +531,8 @@ let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in { // Load on condition. Matched via DAG pattern. // Expands to LOC or LOCFH, depending on the choice of register. - def LOCMux : CondUnaryRSYPseudo; - defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, nonvolatile_load, GRH32, 4>; + def LOCMux : CondUnaryRSYPseudo; + defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, simple_load, GRH32, 4>; // Store on condition. Expanded from CondStore* pseudos. // Expands to STOC or STOCFH, depending on the choice of register. @@ -563,8 +563,8 @@ let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in { } // Load on condition. Matched via DAG pattern. - defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, nonvolatile_load, GR32, 4>; - defm LOCG : CondUnaryRSYPair<"locg", 0xEBE2, nonvolatile_load, GR64, 8>; + defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, simple_load, GR32, 4>; + defm LOCG : CondUnaryRSYPair<"locg", 0xEBE2, simple_load, GR64, 8>; // Store on condition. Expanded from CondStore* pseudos. defm STOC : CondStoreRSYPair<"stoc", 0xEBF3, GR32, 4>; @@ -2082,7 +2082,7 @@ let Predicates = [FeatureProcessorAssist] in { // cleared. We only use the first result here. let Defs = [CC] in def FLOGR : UnaryRRE<"flogr", 0xB983, null_frag, GR128, GR64>; -def : Pat<(ctlz GR64:$src), +def : Pat<(i64 (ctlz GR64:$src)), (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>; // Population count. Counts bits set per byte or doubleword. diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td index 261727f8905..02364bbda5c 100644 --- a/lib/Target/SystemZ/SystemZInstrVector.td +++ b/lib/Target/SystemZ/SystemZInstrVector.td @@ -60,7 +60,7 @@ let Predicates = [FeatureVector] in { // Generate byte mask. def VZERO : InherentVRIa<"vzero", 0xE744, 0>; def VONE : InherentVRIa<"vone", 0xE744, 0xffff>; - def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>; + def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16_timm>; // Generate mask. def VGM : BinaryVRIbGeneric<"vgm", 0xE746>; @@ -71,10 +71,10 @@ let Predicates = [FeatureVector] in { // Replicate immediate. def VREPI : UnaryVRIaGeneric<"vrepi", 0xE745, imm32sx16>; - def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>; - def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>; - def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>; - def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>; + def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16_timm, 0>; + def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16_timm, 1>; + def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16_timm, 2>; + def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16_timm, 3>; } // Load element immediate. @@ -116,7 +116,7 @@ let Predicates = [FeatureVector] in { (ins bdxaddr12only:$XBD2, imm32zx4:$M3), "lcbb\t$R1, $XBD2, $M3", [(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2, - imm32zx4:$M3))]>; + imm32zx4_timm:$M3))]>; // Load with length. The number of loaded bytes is only known at run time. def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>; @@ -362,9 +362,9 @@ let Predicates = [FeatureVector] in { def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>; def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>; def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>; - def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16:$index)), + def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16_timm:$index)), (VREPF VR128:$vec, imm32zx16:$index)>; - def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16:$index)), + def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16_timm:$index)), (VREPG VR128:$vec, imm32zx16:$index)>; // Select. @@ -778,7 +778,7 @@ let Predicates = [FeatureVector] in { // Shift left double by byte. def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>; - def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z), + def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8_timm:$z), (VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>; // Shift left double by bit. @@ -1069,7 +1069,7 @@ let Predicates = [FeatureVector] in { def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>; } // Rounding mode should agree with SystemZInstrFP.td. - def : FPConversion; + def : FPConversion; let Predicates = [FeatureVectorEnhancements2] in { let Uses = [FPC], mayRaiseFPException = 1 in { let isAsmParserOnly = 1 in @@ -1078,7 +1078,7 @@ let Predicates = [FeatureVector] in { def WCFEB : TernaryVRRa<"wcfeb", 0xE7C2, null_frag, v32sb, v32f, 2, 8>; } // Rounding mode should agree with SystemZInstrFP.td. - def : FPConversion; + def : FPConversion; } // Convert to logical. @@ -1088,7 +1088,7 @@ let Predicates = [FeatureVector] in { def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>; } // Rounding mode should agree with SystemZInstrFP.td. - def : FPConversion; + def : FPConversion; let Predicates = [FeatureVectorEnhancements2] in { let Uses = [FPC], mayRaiseFPException = 1 in { let isAsmParserOnly = 1 in @@ -1097,7 +1097,7 @@ let Predicates = [FeatureVector] in { def WCLFEB : TernaryVRRa<"wclfeb", 0xE7C0, null_frag, v32sb, v32f, 2, 8>; } // Rounding mode should agree with SystemZInstrFP.td. - def : FPConversion; + def : FPConversion; } // Divide. diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp index 95d7e22dec3..72411122956 100644 --- a/lib/Target/SystemZ/SystemZLongBranch.cpp +++ b/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -85,9 +85,9 @@ struct MBBInfo { // This value never changes. uint64_t Size = 0; - // The minimum alignment of the block, as a log2 value. + // The minimum alignment of the block. // This value never changes. - unsigned Alignment = 0; + Align Alignment; // The number of terminators in this block. This value never changes. unsigned NumTerminators = 0; @@ -127,7 +127,8 @@ struct BlockPosition { // as the runtime address. unsigned KnownBits; - BlockPosition(unsigned InitialAlignment) : KnownBits(InitialAlignment) {} + BlockPosition(unsigned InitialLogAlignment) + : KnownBits(InitialLogAlignment) {} }; class SystemZLongBranch : public MachineFunctionPass { @@ -178,17 +179,16 @@ const uint64_t MaxForwardRange = 0xfffe; // instructions. void SystemZLongBranch::skipNonTerminators(BlockPosition &Position, MBBInfo &Block) { - if (Block.Alignment > Position.KnownBits) { + if (Log2(Block.Alignment) > Position.KnownBits) { // When calculating the address of Block, we need to conservatively // assume that Block had the worst possible misalignment. - Position.Address += ((uint64_t(1) << Block.Alignment) - - (uint64_t(1) << Position.KnownBits)); - Position.KnownBits = Block.Alignment; + Position.Address += + (Block.Alignment.value() - (uint64_t(1) << Position.KnownBits)); + Position.KnownBits = Log2(Block.Alignment); } // Align the addresses. - uint64_t AlignMask = (uint64_t(1) << Block.Alignment) - 1; - Position.Address = (Position.Address + AlignMask) & ~AlignMask; + Position.Address = alignTo(Position.Address, Block.Alignment); // Record the block's position. Block.Address = Position.Address; @@ -257,7 +257,7 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr &MI) { } Terminator.Branch = &MI; Terminator.TargetBlock = - TII->getBranchInfo(MI).Target->getMBB()->getNumber(); + TII->getBranchInfo(MI).getMBBTarget()->getNumber(); } return Terminator; } @@ -275,7 +275,7 @@ uint64_t SystemZLongBranch::initMBBInfo() { Terminators.clear(); Terminators.reserve(NumBlocks); - BlockPosition Position(MF->getAlignment()); + BlockPosition Position(Log2(MF->getAlignment())); for (unsigned I = 0; I < NumBlocks; ++I) { MachineBasicBlock *MBB = MF->getBlockNumbered(I); MBBInfo &Block = MBBs[I]; @@ -339,7 +339,7 @@ bool SystemZLongBranch::mustRelaxABranch() { // must be long. void SystemZLongBranch::setWorstCaseAddresses() { SmallVector::iterator TI = Terminators.begin(); - BlockPosition Position(MF->getAlignment()); + BlockPosition Position(Log2(MF->getAlignment())); for (auto &Block : MBBs) { skipNonTerminators(Position, Block); for (unsigned BTI = 0, BTE = Block.NumTerminators; BTI != BTE; ++BTI) { @@ -440,7 +440,7 @@ void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) { // Run a shortening pass and relax any branches that need to be relaxed. void SystemZLongBranch::relaxBranches() { SmallVector::iterator TI = Terminators.begin(); - BlockPosition Position(MF->getAlignment()); + BlockPosition Position(Log2(MF->getAlignment())); for (auto &Block : MBBs) { skipNonTerminators(Position, Block); for (unsigned BTI = 0, BTE = Block.NumTerminators; BTI != BTE; ++BTI) { diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/lib/Target/SystemZ/SystemZMachineScheduler.cpp index 0becfaa1d49..3fc25034dde 100644 --- a/lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ b/lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "SystemZMachineScheduler.h" +#include "llvm/CodeGen/MachineLoopInfo.h" using namespace llvm; @@ -108,8 +109,8 @@ void SystemZPostRASchedStrategy::enterMBB(MachineBasicBlock *NextMBB) { I != SinglePredMBB->end(); I++) { LLVM_DEBUG(dbgs() << "** Emitting incoming branch: "; I->dump();); bool TakenBranch = (I->isBranch() && - (TII->getBranchInfo(*I).Target->isReg() || // Relative branch - TII->getBranchInfo(*I).Target->getMBB() == MBB)); + (TII->getBranchInfo(*I).isIndirect() || + TII->getBranchInfo(*I).getMBBTarget() == MBB)); HazardRec->emitInstruction(&*I, TakenBranch); if (TakenBranch) break; diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td index 56632e1529a..b2bab68a627 100644 --- a/lib/Target/SystemZ/SystemZOperands.td +++ b/lib/Target/SystemZ/SystemZOperands.td @@ -21,15 +21,32 @@ class ImmediateTLSAsmOperand let RenderMethod = "addImmTLSOperands"; } +class ImmediateOp : Operand { + let PrintMethod = "print"##asmop##"Operand"; + let DecoderMethod = "decode"##asmop##"Operand"; + let ParserMatchClass = !cast(asmop); +} + +class ImmOpWithPattern : + ImmediateOp, PatLeaf<(vt ImmNode), pred, xform>; + +// class ImmediatePatLeaf +// : PatLeaf<(vt ImmNode), pred, xform>; + + // Constructs both a DAG pattern and instruction operand for an immediate // of type VT. PRED returns true if a node is acceptable and XFORM returns // the operand value associated with the node. ASMOP is the name of the // associated asm operand, and also forms the basis of the asm print method. -class Immediate - : PatLeaf<(vt imm), pred, xform>, Operand { - let PrintMethod = "print"##asmop##"Operand"; - let DecoderMethod = "decode"##asmop##"Operand"; - let ParserMatchClass = !cast(asmop); +multiclass Immediate { + // def "" : ImmediateOp, + // PatLeaf<(vt imm), pred, xform>; + def "" : ImmOpWithPattern; + +// def _timm : PatLeaf<(vt timm), pred, xform>; + def _timm : ImmOpWithPattern; } // Constructs an asm operand for a PC-relative address. SIZE says how @@ -295,87 +312,87 @@ def U48Imm : ImmediateAsmOperand<"U48Imm">; // Immediates for the lower and upper 16 bits of an i32, with the other // bits of the i32 being zero. -def imm32ll16 : ImmediategetZExtValue()); }], LL16, "U16Imm">; -def imm32lh16 : ImmediategetZExtValue()); }], LH16, "U16Imm">; // Immediates for the lower and upper 16 bits of an i32, with the other // bits of the i32 being one. -def imm32ll16c : ImmediategetZExtValue())); }], LL16, "U16Imm">; -def imm32lh16c : ImmediategetZExtValue())); }], LH16, "U16Imm">; // Short immediates -def imm32zx1 : Immediate(N->getZExtValue()); }], NOOP_SDNodeXForm, "U1Imm">; -def imm32zx2 : Immediate(N->getZExtValue()); }], NOOP_SDNodeXForm, "U2Imm">; -def imm32zx3 : Immediate(N->getZExtValue()); }], NOOP_SDNodeXForm, "U3Imm">; -def imm32zx4 : Immediate(N->getZExtValue()); }], NOOP_SDNodeXForm, "U4Imm">; // Note: this enforces an even value during code generation only. // When used from the assembler, any 4-bit value is allowed. -def imm32zx4even : Immediate(N->getZExtValue()); }], UIMM8EVEN, "U4Imm">; -def imm32zx6 : Immediate(N->getZExtValue()); }], NOOP_SDNodeXForm, "U6Imm">; -def imm32sx8 : Immediate(N->getSExtValue()); }], SIMM8, "S8Imm">; -def imm32zx8 : Immediate(N->getZExtValue()); }], UIMM8, "U8Imm">; -def imm32zx8trunc : Immediate; +defm imm32zx8trunc : Immediate; -def imm32zx12 : Immediate(N->getZExtValue()); }], UIMM12, "U12Imm">; -def imm32sx16 : Immediate(N->getSExtValue()); }], SIMM16, "S16Imm">; -def imm32sx16n : Immediate(-N->getSExtValue()); }], NEGSIMM16, "S16Imm">; -def imm32zx16 : Immediate(N->getZExtValue()); }], UIMM16, "U16Imm">; -def imm32sx16trunc : Immediate; -def imm32zx16trunc : Immediate; +defm imm32sx16trunc : Immediate; +defm imm32zx16trunc : Immediate; // Full 32-bit immediates. we need both signed and unsigned versions // because the assembler is picky. E.g. AFI requires signed operands // while NILF requires unsigned ones. -def simm32 : Immediate; -def uimm32 : Immediate; +defm simm32 : Immediate; +defm uimm32 : Immediate; -def simm32n : Immediate(-N->getSExtValue()); }], NEGSIMM32, "S32Imm">; @@ -387,107 +404,107 @@ def imm32 : ImmLeaf; // Immediates for 16-bit chunks of an i64, with the other bits of the // i32 being zero. -def imm64ll16 : ImmediategetZExtValue()); }], LL16, "U16Imm">; -def imm64lh16 : ImmediategetZExtValue()); }], LH16, "U16Imm">; -def imm64hl16 : ImmediategetZExtValue()); }], HL16, "U16Imm">; -def imm64hh16 : ImmediategetZExtValue()); }], HH16, "U16Imm">; // Immediates for 16-bit chunks of an i64, with the other bits of the // i32 being one. -def imm64ll16c : ImmediategetZExtValue())); }], LL16, "U16Imm">; -def imm64lh16c : ImmediategetZExtValue())); }], LH16, "U16Imm">; -def imm64hl16c : ImmediategetZExtValue())); }], HL16, "U16Imm">; -def imm64hh16c : ImmediategetZExtValue())); }], HH16, "U16Imm">; // Immediates for the lower and upper 32 bits of an i64, with the other // bits of the i32 being zero. -def imm64lf32 : ImmediategetZExtValue()); }], LF32, "U32Imm">; -def imm64hf32 : ImmediategetZExtValue()); }], HF32, "U32Imm">; // Immediates for the lower and upper 32 bits of an i64, with the other // bits of the i32 being one. -def imm64lf32c : ImmediategetZExtValue())); }], LF32, "U32Imm">; -def imm64hf32c : ImmediategetZExtValue())); }], HF32, "U32Imm">; // Negated immediates that fit LF32 or LH16. -def imm64lh16n : ImmediategetZExtValue())); }], NEGLH16, "U16Imm">; -def imm64lf32n : ImmediategetZExtValue())); }], NEGLF32, "U32Imm">; // Short immediates. -def imm64sx8 : Immediate(N->getSExtValue()); }], SIMM8, "S8Imm">; -def imm64zx8 : Immediate(N->getSExtValue()); }], UIMM8, "U8Imm">; -def imm64sx16 : Immediate(N->getSExtValue()); }], SIMM16, "S16Imm">; -def imm64sx16n : Immediate(-N->getSExtValue()); }], NEGSIMM16, "S16Imm">; -def imm64zx16 : Immediate(N->getZExtValue()); }], UIMM16, "U16Imm">; -def imm64sx32 : Immediate(N->getSExtValue()); }], SIMM32, "S32Imm">; -def imm64sx32n : Immediate(-N->getSExtValue()); }], NEGSIMM32, "S32Imm">; -def imm64zx32 : Immediate(N->getZExtValue()); }], UIMM32, "U32Imm">; -def imm64zx32n : Immediate(-N->getSExtValue()); }], NEGUIMM32, "U32Imm">; -def imm64zx48 : Immediate(N->getZExtValue()); }], UIMM48, "U48Imm">; @@ -637,7 +654,7 @@ def bdvaddr12only : BDVMode< "64", "12">; //===----------------------------------------------------------------------===// // A 4-bit condition-code mask. -def cond4 : PatLeaf<(i32 imm), [{ return (N->getZExtValue() < 16); }]>, +def cond4 : PatLeaf<(i32 timm), [{ return (N->getZExtValue() < 16); }]>, Operand { let PrintMethod = "printCond4Operand"; } diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 15bd12bc98a..6fe383e64b7 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -472,17 +472,17 @@ def z_subcarry : PatFrag<(ops node:$lhs, node:$rhs), (z_subcarry_1 node:$lhs, node:$rhs, CC)>; // Signed and unsigned comparisons. -def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{ +def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{ unsigned Type = cast(N->getOperand(2))->getZExtValue(); return Type != SystemZICMP::UnsignedOnly; }]>; -def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{ +def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{ unsigned Type = cast(N->getOperand(2))->getZExtValue(); return Type != SystemZICMP::SignedOnly; }]>; // Register- and memory-based TEST UNDER MASK. -def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, imm)>; +def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, timm)>; def z_tm_mem : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, 0)>; // Register sign-extend operations. Sub-32-bit values are represented as i32s. diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td index beaf4de285a..65300fb4762 100644 --- a/lib/Target/SystemZ/SystemZPatterns.td +++ b/lib/Target/SystemZ/SystemZPatterns.td @@ -100,12 +100,12 @@ multiclass CondStores64 { def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr), - imm32zx4:$valid, imm32zx4:$cc), + imm32zx4_timm:$valid, imm32zx4_timm:$cc), mode:$addr), (insn (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr, imm32zx4:$valid, imm32zx4:$cc)>; def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new, - imm32zx4:$valid, imm32zx4:$cc), + imm32zx4_timm:$valid, imm32zx4_timm:$cc), mode:$addr), (insninv (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr, imm32zx4:$valid, imm32zx4:$cc)>; diff --git a/lib/Target/SystemZ/SystemZPostRewrite.cpp b/lib/Target/SystemZ/SystemZPostRewrite.cpp index 8e4060eac74..aaa7f8fc88f 100644 --- a/lib/Target/SystemZ/SystemZPostRewrite.cpp +++ b/lib/Target/SystemZ/SystemZPostRewrite.cpp @@ -25,6 +25,7 @@ using namespace llvm; #define DEBUG_TYPE "systemz-postrewrite" STATISTIC(MemFoldCopies, "Number of copies inserted before folded mem ops."); +STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)"); namespace llvm { void initializeSystemZPostRewritePass(PassRegistry&); @@ -45,12 +46,20 @@ public: StringRef getPassName() const override { return SYSTEMZ_POSTREWRITE_NAME; } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(AU); - } - private: + void selectLOCRMux(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LowOpcode, + unsigned HighOpcode); + void selectSELRMux(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LowOpcode, + unsigned HighOpcode); + bool expandCondMove(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); bool selectMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); bool selectMBB(MachineBasicBlock &MBB); @@ -68,11 +77,141 @@ FunctionPass *llvm::createSystemZPostRewritePass(SystemZTargetMachine &TM) { return new SystemZPostRewrite(); } +// MI is a load-register-on-condition pseudo instruction. Replace it with +// LowOpcode if source and destination are both low GR32s and HighOpcode if +// source and destination are both high GR32s. Otherwise, a branch sequence +// is created. +void SystemZPostRewrite::selectLOCRMux(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LowOpcode, + unsigned HighOpcode) { + Register DestReg = MBBI->getOperand(0).getReg(); + Register SrcReg = MBBI->getOperand(2).getReg(); + bool DestIsHigh = SystemZ::isHighReg(DestReg); + bool SrcIsHigh = SystemZ::isHighReg(SrcReg); + + if (!DestIsHigh && !SrcIsHigh) + MBBI->setDesc(TII->get(LowOpcode)); + else if (DestIsHigh && SrcIsHigh) + MBBI->setDesc(TII->get(HighOpcode)); + else + expandCondMove(MBB, MBBI, NextMBBI); +} + +// MI is a select pseudo instruction. Replace it with LowOpcode if source +// and destination are all low GR32s and HighOpcode if source and destination +// are all high GR32s. Otherwise, a branch sequence is created. +void SystemZPostRewrite::selectSELRMux(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LowOpcode, + unsigned HighOpcode) { + Register DestReg = MBBI->getOperand(0).getReg(); + Register Src1Reg = MBBI->getOperand(1).getReg(); + Register Src2Reg = MBBI->getOperand(2).getReg(); + bool DestIsHigh = SystemZ::isHighReg(DestReg); + bool Src1IsHigh = SystemZ::isHighReg(Src1Reg); + bool Src2IsHigh = SystemZ::isHighReg(Src2Reg); + + // If sources and destination aren't all high or all low, we may be able to + // simplify the operation by moving one of the sources to the destination + // first. But only if this doesn't clobber the other source. + if (DestReg != Src1Reg && DestReg != Src2Reg) { + if (DestIsHigh != Src1IsHigh) { + BuildMI(*MBBI->getParent(), MBBI, MBBI->getDebugLoc(), + TII->get(SystemZ::COPY), DestReg) + .addReg(MBBI->getOperand(1).getReg(), getRegState(MBBI->getOperand(1))); + MBBI->getOperand(1).setReg(DestReg); + Src1Reg = DestReg; + Src1IsHigh = DestIsHigh; + } else if (DestIsHigh != Src2IsHigh) { + BuildMI(*MBBI->getParent(), MBBI, MBBI->getDebugLoc(), + TII->get(SystemZ::COPY), DestReg) + .addReg(MBBI->getOperand(2).getReg(), getRegState(MBBI->getOperand(2))); + MBBI->getOperand(2).setReg(DestReg); + Src2Reg = DestReg; + Src2IsHigh = DestIsHigh; + } + } + + // If the destination (now) matches one source, prefer this to be first. + if (DestReg != Src1Reg && DestReg == Src2Reg) { + TII->commuteInstruction(*MBBI, false, 1, 2); + std::swap(Src1Reg, Src2Reg); + std::swap(Src1IsHigh, Src2IsHigh); + } + + if (!DestIsHigh && !Src1IsHigh && !Src2IsHigh) + MBBI->setDesc(TII->get(LowOpcode)); + else if (DestIsHigh && Src1IsHigh && Src2IsHigh) + MBBI->setDesc(TII->get(HighOpcode)); + else + // Given the simplification above, we must already have a two-operand case. + expandCondMove(MBB, MBBI, NextMBBI); +} + +// Replace MBBI by a branch sequence that performs a conditional move of +// operand 2 to the destination register. Operand 1 is expected to be the +// same register as the destination. +bool SystemZPostRewrite::expandCondMove(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineFunction &MF = *MBB.getParent(); + const BasicBlock *BB = MBB.getBasicBlock(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + Register DestReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(2).getReg(); + unsigned CCValid = MI.getOperand(3).getImm(); + unsigned CCMask = MI.getOperand(4).getImm(); + assert(DestReg == MI.getOperand(1).getReg() && + "Expected destination and first source operand to be the same."); + + LivePhysRegs LiveRegs(TII->getRegisterInfo()); + LiveRegs.addLiveOuts(MBB); + for (auto I = std::prev(MBB.end()); I != MBBI; --I) + LiveRegs.stepBackward(*I); + + // Splice MBB at MI, moving the rest of the block into RestMBB. + MachineBasicBlock *RestMBB = MF.CreateMachineBasicBlock(BB); + MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB); + RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end()); + RestMBB->transferSuccessors(&MBB); + for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) + RestMBB->addLiveIn(*I); + + // Create a new block MoveMBB to hold the move instruction. + MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB); + MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB); + MoveMBB->addLiveIn(SrcReg); + for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) + MoveMBB->addLiveIn(*I); + + // At the end of MBB, create a conditional branch to RestMBB if the + // condition is false, otherwise fall through to MoveMBB. + BuildMI(&MBB, DL, TII->get(SystemZ::BRC)) + .addImm(CCValid).addImm(CCMask ^ CCValid).addMBB(RestMBB); + MBB.addSuccessor(RestMBB); + MBB.addSuccessor(MoveMBB); + + // In MoveMBB, emit an instruction to move SrcReg into DestReg, + // then fall through to RestMBB. + BuildMI(*MoveMBB, MoveMBB->end(), DL, TII->get(SystemZ::COPY), DestReg) + .addReg(MI.getOperand(2).getReg(), getRegState(MI.getOperand(2))); + MoveMBB->addSuccessor(RestMBB); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + LOCRMuxJumps++; + return true; +} + /// If MBBI references a pseudo instruction that should be selected here, /// do it and return true. Otherwise return false. bool SystemZPostRewrite::selectMI(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); @@ -83,7 +222,7 @@ bool SystemZPostRewrite::selectMI(MachineBasicBlock &MBB, if (TargetMemOpcode != -1) { MI.setDesc(TII->get(TargetMemOpcode)); MI.tieOperands(0, 1); - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); MachineOperand &SrcMO = MI.getOperand(1); if (DstReg != SrcMO.getReg()) { BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), DstReg) @@ -94,6 +233,15 @@ bool SystemZPostRewrite::selectMI(MachineBasicBlock &MBB, return true; } + switch (Opcode) { + case SystemZ::LOCRMux: + selectLOCRMux(MBB, MBBI, NextMBBI, SystemZ::LOCR, SystemZ::LOCFHR); + return true; + case SystemZ::SELRMux: + selectSELRMux(MBB, MBBI, NextMBBI, SystemZ::SELR, SystemZ::SELFHR); + return true; + } + return false; } diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td index b27c25beb58..af33a030055 100644 --- a/lib/Target/SystemZ/SystemZProcessors.td +++ b/lib/Target/SystemZ/SystemZProcessors.td @@ -35,5 +35,6 @@ def : ProcessorModel<"z13", Z13Model, Arch11SupportedFeatures.List>; def : ProcessorModel<"arch12", Z14Model, Arch12SupportedFeatures.List>; def : ProcessorModel<"z14", Z14Model, Arch12SupportedFeatures.List>; -def : ProcessorModel<"arch13", Arch13Model, Arch13SupportedFeatures.List>; +def : ProcessorModel<"arch13", Z15Model, Arch13SupportedFeatures.List>; +def : ProcessorModel<"z15", Z15Model, Arch13SupportedFeatures.List>; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index e7cd6871dbb..39ace5594b7 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -41,7 +41,7 @@ static const TargetRegisterClass *getRC32(MachineOperand &MO, return &SystemZ::GRH32BitRegClass; if (VRM && VRM->hasPhys(MO.getReg())) { - unsigned PhysReg = VRM->getPhys(MO.getReg()); + Register PhysReg = VRM->getPhys(MO.getReg()); if (SystemZ::GR32BitRegClass.contains(PhysReg)) return &SystemZ::GR32BitRegClass; assert (SystemZ::GRH32BitRegClass.contains(PhysReg) && @@ -120,8 +120,8 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, } // Add the other operand of the LOCRMux to the worklist. - unsigned OtherReg = - (TrueMO.getReg() == Reg ? FalseMO.getReg() : TrueMO.getReg()); + Register OtherReg = + (TrueMO.getReg() == Reg ? FalseMO.getReg() : TrueMO.getReg()); if (MRI->getRegClass(OtherReg) == &SystemZ::GRX32BitRegClass) Worklist.push_back(OtherReg); } // end LOCRMux @@ -169,7 +169,8 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, auto tryAddHint = [&](const MachineOperand *MO) -> void { Register Reg = MO->getReg(); - Register PhysReg = isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg); + Register PhysReg = + Register::isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg); if (PhysReg) { if (MO->getSubReg()) PhysReg = getSubReg(PhysReg, MO->getSubReg()); @@ -297,8 +298,8 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, assert(Mask && "One offset must be OK"); } while (!OpcodeForOffset); - unsigned ScratchReg = - MF.getRegInfo().createVirtualRegister(&SystemZ::ADDR64BitRegClass); + Register ScratchReg = + MF.getRegInfo().createVirtualRegister(&SystemZ::ADDR64BitRegClass); int64_t HighOffset = OldOffset - Offset; if (MI->getDesc().TSFlags & SystemZII::HasIndex @@ -351,8 +352,8 @@ bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI, // regalloc may run out of registers. unsigned WideOpNo = (getRegSizeInBits(*SrcRC) == 128 ? 1 : 0); - unsigned GR128Reg = MI->getOperand(WideOpNo).getReg(); - unsigned GRNarReg = MI->getOperand((WideOpNo == 1) ? 0 : 1).getReg(); + Register GR128Reg = MI->getOperand(WideOpNo).getReg(); + Register GRNarReg = MI->getOperand((WideOpNo == 1) ? 0 : 1).getReg(); LiveInterval &IntGR128 = LIS.getInterval(GR128Reg); LiveInterval &IntGRNar = LIS.getInterval(GRNarReg); @@ -385,7 +386,7 @@ bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI, MEE++; for (; MII != MEE; ++MII) { for (const MachineOperand &MO : MII->operands()) - if (MO.isReg() && isPhysicalRegister(MO.getReg())) { + if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) { for (MCSuperRegIterator SI(MO.getReg(), this, true/*IncludeSelf*/); SI.isValid(); ++SI) if (NewRC->contains(*SI)) { diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index 4f721ec23e5..7044efef1ac 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -28,6 +28,15 @@ inline unsigned even128(bool Is32bit) { inline unsigned odd128(bool Is32bit) { return Is32bit ? subreg_l32 : subreg_l64; } + +// Reg should be a 32-bit GPR. Return true if it is a high register rather +// than a low register. +inline bool isHighReg(unsigned int Reg) { + if (SystemZ::GRH32BitRegClass.contains(Reg)) + return true; + assert(SystemZ::GR32BitRegClass.contains(Reg) && "Invalid GRX32"); + return false; +} } // end namespace SystemZ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { diff --git a/lib/Target/SystemZ/SystemZSchedule.td b/lib/Target/SystemZ/SystemZSchedule.td index 98eca280224..119e3ee7c22 100644 --- a/lib/Target/SystemZ/SystemZSchedule.td +++ b/lib/Target/SystemZ/SystemZSchedule.td @@ -59,7 +59,7 @@ def VBU : SchedWrite; // Virtual branching unit def MCD : SchedWrite; // Millicode -include "SystemZScheduleArch13.td" +include "SystemZScheduleZ15.td" include "SystemZScheduleZ14.td" include "SystemZScheduleZ13.td" include "SystemZScheduleZEC12.td" diff --git a/lib/Target/SystemZ/SystemZScheduleArch13.td b/lib/Target/SystemZ/SystemZScheduleZ15.td similarity index 97% rename from lib/Target/SystemZ/SystemZScheduleArch13.td rename to lib/Target/SystemZ/SystemZScheduleZ15.td index 9f82f24d0e8..56ceb88f35d 100644 --- a/lib/Target/SystemZ/SystemZScheduleArch13.td +++ b/lib/Target/SystemZ/SystemZScheduleZ15.td @@ -1,4 +1,4 @@ -//-- SystemZScheduleArch13.td - SystemZ Scheduling Definitions ----*- tblgen -*-=// +//-- SystemZScheduleZ15.td - SystemZ Scheduling Definitions ----*- tblgen -*-=// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,14 +6,14 @@ // //===----------------------------------------------------------------------===// // -// This file defines the machine model for Arch13 to support instruction +// This file defines the machine model for Z15 to support instruction // scheduling and other instruction cost heuristics. // // Pseudos expanded right after isel do not need to be modelled here. // //===----------------------------------------------------------------------===// -def Arch13Model : SchedMachineModel { +def Z15Model : SchedMachineModel { let UnsupportedFeatures = Arch13UnsupportedFeatures.List; @@ -27,7 +27,7 @@ def Arch13Model : SchedMachineModel { let MispredictPenalty = 20; } -let SchedModel = Arch13Model in { +let SchedModel = Z15Model in { // These definitions need the SchedModel value. They could be put in a // subtarget common include file, but it seems the include system in Tablegen // currently (2016) rejects multiple includes of same file. @@ -73,43 +73,43 @@ let NumMicroOps = 0 in { } // Execution units. -def Arch13_FXaUnit : ProcResource<2>; -def Arch13_FXbUnit : ProcResource<2>; -def Arch13_LSUnit : ProcResource<2>; -def Arch13_VecUnit : ProcResource<2>; -def Arch13_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ } -def Arch13_VBUnit : ProcResource<2>; -def Arch13_MCD : ProcResource<1>; +def Z15_FXaUnit : ProcResource<2>; +def Z15_FXbUnit : ProcResource<2>; +def Z15_LSUnit : ProcResource<2>; +def Z15_VecUnit : ProcResource<2>; +def Z15_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ } +def Z15_VBUnit : ProcResource<2>; +def Z15_MCD : ProcResource<1>; // Subtarget specific definitions of scheduling resources. let NumMicroOps = 0 in { - def : WriteRes; - def : WriteRes; - def : WriteRes; - def : WriteRes; - def : WriteRes; - def : WriteRes; - def : WriteRes; - def : WriteRes; - def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; foreach Num = 2-5 in { let ResourceCycles = [Num] in { - def : WriteRes("FXa"#Num), [Arch13_FXaUnit]>; - def : WriteRes("FXb"#Num), [Arch13_FXbUnit]>; - def : WriteRes("LSU"#Num), [Arch13_LSUnit]>; - def : WriteRes("VecBF"#Num), [Arch13_VecUnit]>; - def : WriteRes("VecDF"#Num), [Arch13_VecUnit]>; - def : WriteRes("VecDFX"#Num), [Arch13_VecUnit]>; - def : WriteRes("VecMul"#Num), [Arch13_VecUnit]>; - def : WriteRes("VecStr"#Num), [Arch13_VecUnit]>; - def : WriteRes("VecXsPm"#Num), [Arch13_VecUnit]>; + def : WriteRes("FXa"#Num), [Z15_FXaUnit]>; + def : WriteRes("FXb"#Num), [Z15_FXbUnit]>; + def : WriteRes("LSU"#Num), [Z15_LSUnit]>; + def : WriteRes("VecBF"#Num), [Z15_VecUnit]>; + def : WriteRes("VecDF"#Num), [Z15_VecUnit]>; + def : WriteRes("VecDFX"#Num), [Z15_VecUnit]>; + def : WriteRes("VecMul"#Num), [Z15_VecUnit]>; + def : WriteRes("VecStr"#Num), [Z15_VecUnit]>; + def : WriteRes("VecXsPm"#Num), [Z15_VecUnit]>; }} - def : WriteRes { let ResourceCycles = [30]; } + def : WriteRes { let ResourceCycles = [30]; } - def : WriteRes; // Virtual Branching Unit + def : WriteRes; // Virtual Branching Unit } -def : WriteRes { let NumMicroOps = 3; +def : WriteRes { let NumMicroOps = 3; let BeginGroup = 1; let EndGroup = 1; } diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index a50e6aa5971..47c925dcf73 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -209,10 +209,10 @@ std::pair SystemZSelectionDAGInfo::EmitTargetCodeForMemchr( // Now select between End and null, depending on whether the character // was found. - SDValue Ops[] = {End, DAG.getConstant(0, DL, PtrVT), - DAG.getConstant(SystemZ::CCMASK_SRST, DL, MVT::i32), - DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32), - CCReg}; + SDValue Ops[] = { + End, DAG.getConstant(0, DL, PtrVT), + DAG.getTargetConstant(SystemZ::CCMASK_SRST, DL, MVT::i32), + DAG.getTargetConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32), CCReg}; End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, PtrVT, Ops); return std::make_pair(End, Chain); } diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp index e79dfc5b4b9..2aca22c9082 100644 --- a/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -75,7 +75,7 @@ static void tieOpsIfNeeded(MachineInstr &MI) { // instead of IIxF. bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned LLIxL, unsigned LLIxH) { - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); // The new opcode will clear the other half of the GR64 reg, so // cancel if that is live. unsigned thisSubRegIdx = @@ -86,7 +86,7 @@ bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned LLIxL, : SystemZ::subreg_l32); unsigned GR64BitReg = TRI->getMatchingSuperReg(Reg, thisSubRegIdx, &SystemZ::GR64BitRegClass); - unsigned OtherReg = TRI->getSubReg(GR64BitReg, otherSubRegIdx); + Register OtherReg = TRI->getSubReg(GR64BitReg, otherSubRegIdx); if (LiveRegs.contains(OtherReg)) return false; diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 5c49e6eff0b..20865037fe3 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -154,7 +154,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT, getEffectiveRelocModel(RM), getEffectiveSystemZCodeModel(CM, getEffectiveRelocModel(RM), JIT), OL), - TLOF(llvm::make_unique()), + TLOF(std::make_unique()), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } @@ -176,7 +176,7 @@ public: ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { return new ScheduleDAGMI(C, - llvm::make_unique(C), + std::make_unique(C), /*RemoveKillFlags=*/true); } @@ -184,6 +184,7 @@ public: bool addInstSelector() override; bool addILPOpts() override; void addPostRewrite() override; + void addPostRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; }; @@ -217,14 +218,14 @@ void SystemZPassConfig::addPostRewrite() { addPass(createSystemZPostRewritePass(getSystemZTargetMachine())); } -void SystemZPassConfig::addPreSched2() { +void SystemZPassConfig::addPostRegAlloc() { // PostRewrite needs to be run at -O0 also (in which case addPostRewrite() // is not called). if (getOptLevel() == CodeGenOpt::None) addPass(createSystemZPostRewritePass(getSystemZTargetMachine())); +} - addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine())); - +void SystemZPassConfig::addPreSched2() { if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID); } diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 145cf87ef9f..11c99aa1117 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -304,7 +304,8 @@ bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, C2.ScaleCost, C2.SetupCost); } -unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) { +unsigned SystemZTTIImpl::getNumberOfRegisters(unsigned ClassID) const { + bool Vector = (ClassID == 1); if (!Vector) // Discount the stack pointer. Also leave out %r0, since it can't // be used in an address. @@ -707,7 +708,7 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, // TODO: Fix base implementation which could simplify things a bit here // (seems to miss on differentiating on scalar/vector types). - // Only 64 bit vector conversions are natively supported before arch13. + // Only 64 bit vector conversions are natively supported before z15. if (DstScalarBits == 64 || ST->hasVectorEnhancements2()) { if (SrcScalarBits == DstScalarBits) return NumDstVectors; diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 16ce2ef1d7a..3ba80b31439 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -56,12 +56,12 @@ public: /// \name Vector TTI Implementations /// @{ - unsigned getNumberOfRegisters(bool Vector); + unsigned getNumberOfRegisters(unsigned ClassID) const; unsigned getRegisterBitWidth(bool Vector) const; - unsigned getCacheLineSize() { return 256; } - unsigned getPrefetchDistance() { return 2000; } - unsigned getMinPrefetchStride() { return 2048; } + unsigned getCacheLineSize() const override { return 256; } + unsigned getPrefetchDistance() const override { return 2000; } + unsigned getMinPrefetchStride() const override { return 2048; } bool hasDivRemOp(Type *DataType, bool IsSigned); bool prefersVectorizedAddressing() { return false; } diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 17274e1c2c6..dcd3934de0f 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -253,6 +253,7 @@ MCSection *TargetLoweringObjectFile::SectionForGlobal( auto Attrs = GVar->getAttributes(); if ((Attrs.hasAttribute("bss-section") && Kind.isBSS()) || (Attrs.hasAttribute("data-section") && Kind.isData()) || + (Attrs.hasAttribute("relro-section") && Kind.isReadOnlyWithRel()) || (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly())) { return getExplicitSectionGlobal(GO, Kind, TM); } diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 634866d9357..4c98e140f44 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -63,18 +63,6 @@ void TargetMachine::resetTargetOptions(const Function &F) const { RESET_OPTION(NoInfsFPMath, "no-infs-fp-math"); RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math"); RESET_OPTION(NoSignedZerosFPMath, "no-signed-zeros-fp-math"); - RESET_OPTION(NoTrappingFPMath, "no-trapping-math"); - - StringRef Denormal = - F.getFnAttribute("denormal-fp-math").getValueAsString(); - if (Denormal == "ieee") - Options.FPDenormalMode = FPDenormal::IEEE; - else if (Denormal == "preserve-sign") - Options.FPDenormalMode = FPDenormal::PreserveSign; - else if (Denormal == "positive-zero") - Options.FPDenormalMode = FPDenormal::PositiveZero; - else - Options.FPDenormalMode = DefaultOptions.FPDenormalMode; } /// Returns the code generation relocation model. The choices are static, PIC, @@ -140,8 +128,8 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M, // don't assume the variables to be DSO local unless we actually know // that for sure. This only has to be done for variables; for functions // the linker can insert thunks for calling functions from another DLL. - if (TT.isWindowsGNUEnvironment() && GV && GV->isDeclarationForLinker() && - isa(GV)) + if (TT.isWindowsGNUEnvironment() && TT.isOSBinFormatCOFF() && GV && + GV->isDeclarationForLinker() && isa(GV)) return false; // On COFF, don't mark 'extern_weak' symbols as DSO local. If these symbols @@ -154,7 +142,9 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M, // Make an exception for windows OS in the triple: Some firmware builds use // *-win32-macho triples. This (accidentally?) produced windows relocations // without GOT tables in older clang versions; Keep this behaviour. - if (TT.isOSBinFormatCOFF() || (TT.isOSWindows() && TT.isOSBinFormatMachO())) + // Some JIT users use *-win32-elf triples; these shouldn't use GOT tables + // either. + if (TT.isOSBinFormatCOFF() || TT.isOSWindows()) return true; // Most PIC code sequences that assume that a symbol is local cannot diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp index 5d9029682fd..3ac9c38dfc0 100644 --- a/lib/Target/TargetMachineC.cpp +++ b/lib/Target/TargetMachineC.cpp @@ -219,7 +219,7 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M, LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) { std::error_code EC; - raw_fd_ostream dest(Filename, EC, sys::fs::F_None); + raw_fd_ostream dest(Filename, EC, sys::fs::OF_None); if (EC) { *ErrorMessage = strdup(EC.message().c_str()); return true; diff --git a/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp index 09628e872dd..53a96fd6a97 100644 --- a/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ b/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -313,16 +313,17 @@ public: return Optional(); } - WebAssembly::ExprType parseBlockType(StringRef ID) { - return StringSwitch(ID) - .Case("i32", WebAssembly::ExprType::I32) - .Case("i64", WebAssembly::ExprType::I64) - .Case("f32", WebAssembly::ExprType::F32) - .Case("f64", WebAssembly::ExprType::F64) - .Case("v128", WebAssembly::ExprType::V128) - .Case("exnref", WebAssembly::ExprType::Exnref) - .Case("void", WebAssembly::ExprType::Void) - .Default(WebAssembly::ExprType::Invalid); + WebAssembly::BlockType parseBlockType(StringRef ID) { + // Multivalue block types are handled separately in parseSignature + return StringSwitch(ID) + .Case("i32", WebAssembly::BlockType::I32) + .Case("i64", WebAssembly::BlockType::I64) + .Case("f32", WebAssembly::BlockType::F32) + .Case("f64", WebAssembly::BlockType::F64) + .Case("v128", WebAssembly::BlockType::V128) + .Case("exnref", WebAssembly::BlockType::Exnref) + .Case("void", WebAssembly::BlockType::Void) + .Default(WebAssembly::BlockType::Invalid); } bool parseRegTypeList(SmallVectorImpl &Types) { @@ -343,7 +344,7 @@ public: int64_t Val = Int.getIntVal(); if (IsNegative) Val = -Val; - Operands.push_back(make_unique( + Operands.push_back(std::make_unique( WebAssemblyOperand::Integer, Int.getLoc(), Int.getEndLoc(), WebAssemblyOperand::IntOp{Val})); Parser.Lex(); @@ -356,7 +357,7 @@ public: return error("Cannot parse real: ", Flt); if (IsNegative) Val = -Val; - Operands.push_back(make_unique( + Operands.push_back(std::make_unique( WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(), WebAssemblyOperand::FltOp{Val})); Parser.Lex(); @@ -378,7 +379,7 @@ public: } if (IsNegative) Val = -Val; - Operands.push_back(make_unique( + Operands.push_back(std::make_unique( WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(), WebAssemblyOperand::FltOp{Val})); Parser.Lex(); @@ -407,7 +408,7 @@ public: // an opcode until after the assembly matcher, so set a default to fix // up later. auto Tok = Lexer.getTok(); - Operands.push_back(make_unique( + Operands.push_back(std::make_unique( WebAssemblyOperand::Integer, Tok.getLoc(), Tok.getEndLoc(), WebAssemblyOperand::IntOp{-1})); } @@ -416,8 +417,8 @@ public: } void addBlockTypeOperand(OperandVector &Operands, SMLoc NameLoc, - WebAssembly::ExprType BT) { - Operands.push_back(make_unique( + WebAssembly::BlockType BT) { + Operands.push_back(std::make_unique( WebAssemblyOperand::Integer, NameLoc, NameLoc, WebAssemblyOperand::IntOp{static_cast(BT)})); } @@ -449,13 +450,14 @@ public: } // Now construct the name as first operand. - Operands.push_back(make_unique( + Operands.push_back(std::make_unique( WebAssemblyOperand::Token, NameLoc, SMLoc::getFromPointer(Name.end()), WebAssemblyOperand::TokOp{Name})); // If this instruction is part of a control flow structure, ensure // proper nesting. bool ExpectBlockType = false; + bool ExpectFuncType = false; if (Name == "block") { push(Block); ExpectBlockType = true; @@ -489,9 +491,37 @@ public: if (pop(Name, Block)) return true; } else if (Name == "end_function") { + ensureLocals(getStreamer()); CurrentState = EndFunction; if (pop(Name, Function) || ensureEmptyNestingStack()) return true; + } else if (Name == "call_indirect" || Name == "return_call_indirect") { + ExpectFuncType = true; + } + + if (ExpectFuncType || (ExpectBlockType && Lexer.is(AsmToken::LParen))) { + // This has a special TYPEINDEX operand which in text we + // represent as a signature, such that we can re-build this signature, + // attach it to an anonymous symbol, which is what WasmObjectWriter + // expects to be able to recreate the actual unique-ified type indices. + auto Loc = Parser.getTok(); + auto Signature = std::make_unique(); + if (parseSignature(Signature.get())) + return true; + // Got signature as block type, don't need more + ExpectBlockType = false; + auto &Ctx = getStreamer().getContext(); + // The "true" here will cause this to be a nameless symbol. + MCSymbol *Sym = Ctx.createTempSymbol("typeindex", true); + auto *WasmSym = cast(Sym); + WasmSym->setSignature(Signature.get()); + addSignature(std::move(Signature)); + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + const MCExpr *Expr = MCSymbolRefExpr::create( + WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, Ctx); + Operands.push_back(std::make_unique( + WebAssemblyOperand::Symbol, Loc.getLoc(), Loc.getEndLoc(), + WebAssemblyOperand::SymOp{Expr})); } while (Lexer.isNot(AsmToken::EndOfStatement)) { @@ -504,7 +534,7 @@ public: if (ExpectBlockType) { // Assume this identifier is a block_type. auto BT = parseBlockType(Id.getString()); - if (BT == WebAssembly::ExprType::Invalid) + if (BT == WebAssembly::BlockType::Invalid) return error("Unknown block type: ", Id); addBlockTypeOperand(Operands, NameLoc, BT); Parser.Lex(); @@ -514,7 +544,7 @@ public: SMLoc End; if (Parser.parseExpression(Val, End)) return error("Cannot parse symbol: ", Lexer.getTok()); - Operands.push_back(make_unique( + Operands.push_back(std::make_unique( WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(), WebAssemblyOperand::SymOp{Val})); if (checkForP2AlignIfLoadStore(Operands, Name)) @@ -549,7 +579,7 @@ public: } case AsmToken::LCurly: { Parser.Lex(); - auto Op = make_unique( + auto Op = std::make_unique( WebAssemblyOperand::BrList, Tok.getLoc(), Tok.getEndLoc()); if (!Lexer.is(AsmToken::RCurly)) for (;;) { @@ -572,7 +602,7 @@ public: } if (ExpectBlockType && Operands.size() == 1) { // Support blocks with no operands as default to void. - addBlockTypeOperand(Operands, NameLoc, WebAssembly::ExprType::Void); + addBlockTypeOperand(Operands, NameLoc, WebAssembly::BlockType::Void); } Parser.Lex(); return false; @@ -671,7 +701,7 @@ public: LastFunctionLabel = LastLabel; push(Function); } - auto Signature = make_unique(); + auto Signature = std::make_unique(); if (parseSignature(Signature.get())) return true; WasmSym->setSignature(Signature.get()); @@ -687,7 +717,7 @@ public: if (SymName.empty()) return true; auto WasmSym = cast(Ctx.getOrCreateSymbol(SymName)); - auto Signature = make_unique(); + auto Signature = std::make_unique(); if (parseRegTypeList(Signature->Params)) return true; WasmSym->setSignature(Signature.get()); @@ -737,24 +767,30 @@ public: return true; // We didn't process this directive. } + // Called either when the first instruction is parsed of the function ends. + void ensureLocals(MCStreamer &Out) { + if (CurrentState == FunctionStart) { + // We haven't seen a .local directive yet. The streamer requires locals to + // be encoded as a prelude to the instructions, so emit an empty list of + // locals here. + auto &TOut = reinterpret_cast( + *Out.getTargetStreamer()); + TOut.emitLocal(SmallVector()); + CurrentState = FunctionLocals; + } + } + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) override { MCInst Inst; + Inst.setLoc(IDLoc); unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); switch (MatchResult) { case Match_Success: { - if (CurrentState == FunctionStart) { - // This is the first instruction in a function, but we haven't seen - // a .local directive yet. The streamer requires locals to be encoded - // as a prelude to the instructions, so emit an empty list of locals - // here. - auto &TOut = reinterpret_cast( - *Out.getTargetStreamer()); - TOut.emitLocal(SmallVector()); - } + ensureLocals(Out); // Fix unknown p2align operands. auto Align = WebAssembly::GetDefaultP2AlignAny(Inst.getOpcode()); if (Align != -1U) { diff --git a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp index f9bf3f85d30..9a9c31cff2d 100644 --- a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp +++ b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -24,6 +24,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolWasm.h" #include "llvm/Support/Endian.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/TargetRegistry.h" @@ -213,10 +214,29 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( return MCDisassembler::Fail; break; } - // block_type operands (uint8_t). + // block_type operands: case WebAssembly::OPERAND_SIGNATURE: { - if (!parseImmediate(MI, Size, Bytes)) + int64_t Val; + uint64_t PrevSize = Size; + if (!nextLEB(Val, Bytes, Size, true)) return MCDisassembler::Fail; + if (Val < 0) { + // Negative values are single septet value types or empty types + if (Size != PrevSize + 1) { + MI.addOperand( + MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid))); + } else { + MI.addOperand(MCOperand::createImm(Val & 0x7f)); + } + } else { + // We don't have access to the signature, so create a symbol without one + MCSymbol *Sym = getContext().createTempSymbol("typeindex", true); + auto *WasmSym = cast(Sym); + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + const MCExpr *Expr = MCSymbolRefExpr::create( + WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext()); + MI.addOperand(MCOperand::createExpr(Expr)); + } break; } // FP operands. diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp index 70b409cf4a9..8314de41021 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp @@ -31,10 +31,12 @@ namespace { class WebAssemblyAsmBackend final : public MCAsmBackend { bool Is64Bit; + bool IsEmscripten; public: - explicit WebAssemblyAsmBackend(bool Is64Bit) - : MCAsmBackend(support::little), Is64Bit(Is64Bit) {} + explicit WebAssemblyAsmBackend(bool Is64Bit, bool IsEmscripten) + : MCAsmBackend(support::little), Is64Bit(Is64Bit), + IsEmscripten(IsEmscripten) {} unsigned getNumFixupKinds() const override { return WebAssembly::NumTargetFixupKinds; @@ -123,11 +125,11 @@ void WebAssemblyAsmBackend::applyFixup(const MCAssembler &Asm, std::unique_ptr WebAssemblyAsmBackend::createObjectTargetWriter() const { - return createWebAssemblyWasmObjectWriter(Is64Bit); + return createWebAssemblyWasmObjectWriter(Is64Bit, IsEmscripten); } } // end anonymous namespace MCAsmBackend *llvm::createWebAssemblyAsmBackend(const Triple &TT) { - return new WebAssemblyAsmBackend(TT.isArch64Bit()); + return new WebAssemblyAsmBackend(TT.isArch64Bit(), TT.isOSEmscripten()); } diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp index b5d4d369b72..221ac17b833 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp @@ -15,6 +15,7 @@ #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblyUtilities.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -51,7 +52,9 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, // Print any additional variadic operands. const MCInstrDesc &Desc = MII.get(MI->getOpcode()); - if (Desc.isVariadic()) + if (Desc.isVariadic()) { + if (Desc.getNumOperands() == 0 && MI->getNumOperands() > 0) + OS << "\t"; for (auto I = Desc.getNumOperands(), E = MI->getNumOperands(); I < E; ++I) { // FIXME: For CALL_INDIRECT_VOID, don't print a leading comma, because // we have an extra flags operand which is not currently printed, for @@ -62,6 +65,7 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, OS << ", "; printOperand(MI, I, OS); } + } // Print any added annotation. printAnnotation(OS, Annot); @@ -232,7 +236,16 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); - Op.getExpr()->print(O, &MAI); + // call_indirect instructions have a TYPEINDEX operand that we print + // as a signature here, such that the assembler can recover this + // information. + auto SRE = static_cast(Op.getExpr()); + if (SRE->getKind() == MCSymbolRefExpr::VK_WASM_TYPEINDEX) { + auto &Sym = static_cast(SRE->getSymbol()); + O << WebAssembly::signatureToString(Sym.getSignature()); + } else { + Op.getExpr()->print(O, &MAI); + } } } @@ -259,14 +272,26 @@ void WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(const MCInst *MI, void WebAssemblyInstPrinter::printWebAssemblySignatureOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - auto Imm = static_cast(MI->getOperand(OpNo).getImm()); - if (Imm != wasm::WASM_TYPE_NORESULT) - O << WebAssembly::anyTypeToString(Imm); + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + auto Imm = static_cast(Op.getImm()); + if (Imm != wasm::WASM_TYPE_NORESULT) + O << WebAssembly::anyTypeToString(Imm); + } else { + auto Expr = cast(Op.getExpr()); + auto *Sym = cast(&Expr->getSymbol()); + if (Sym->getSignature()) { + O << WebAssembly::signatureToString(Sym->getSignature()); + } else { + // Disassembler does not currently produce a signature + O << "unknown_type"; + } + } } // We have various enums representing a subset of these types, use this // function to convert any of them to text. -const char *llvm::WebAssembly::anyTypeToString(unsigned Ty) { +const char *WebAssembly::anyTypeToString(unsigned Ty) { switch (Ty) { case wasm::WASM_TYPE_I32: return "i32"; @@ -291,6 +316,24 @@ const char *llvm::WebAssembly::anyTypeToString(unsigned Ty) { } } -const char *llvm::WebAssembly::typeToString(wasm::ValType Ty) { +const char *WebAssembly::typeToString(wasm::ValType Ty) { return anyTypeToString(static_cast(Ty)); } + +std::string WebAssembly::typeListToString(ArrayRef List) { + std::string S; + for (auto &Ty : List) { + if (&Ty != &List[0]) S += ", "; + S += WebAssembly::typeToString(Ty); + } + return S; +} + +std::string WebAssembly::signatureToString(const wasm::WasmSignature *Sig) { + std::string S("("); + S += typeListToString(Sig->Params); + S += ") -> ("; + S += typeListToString(Sig->Returns); + S += ")"; + return S; +} diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h index b979de5028b..cf37778099a 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h @@ -58,6 +58,9 @@ namespace WebAssembly { const char *typeToString(wasm::ValType Ty); const char *anyTypeToString(unsigned Ty); +std::string typeListToString(ArrayRef List); +std::string signatureToString(const wasm::WasmSignature *Sig); + } // end namespace WebAssembly } // end namespace llvm diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp index 44b6d6a968a..1a4c57e66d2 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp @@ -152,6 +152,7 @@ void WebAssemblyMCCodeEmitter::encodeInstruction( break; case WebAssembly::OPERAND_FUNCTION32: case WebAssembly::OPERAND_OFFSET32: + case WebAssembly::OPERAND_SIGNATURE: case WebAssembly::OPERAND_TYPEINDEX: case WebAssembly::OPERAND_GLOBAL: case WebAssembly::OPERAND_EVENT: diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 7a9f59b1a4f..b339860a381 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -38,7 +38,7 @@ MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII); MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT); std::unique_ptr -createWebAssemblyWasmObjectWriter(bool Is64Bit); +createWebAssemblyWasmObjectWriter(bool Is64Bit, bool IsEmscripten); namespace WebAssembly { enum OperandType { @@ -122,16 +122,22 @@ enum TOF { namespace llvm { namespace WebAssembly { -/// This is used to indicate block signatures. -enum class ExprType : unsigned { +/// Used as immediate MachineOperands for block signatures +enum class BlockType : unsigned { + Invalid = 0x00, Void = 0x40, - I32 = 0x7F, - I64 = 0x7E, - F32 = 0x7D, - F64 = 0x7C, - V128 = 0x7B, - Exnref = 0x68, - Invalid = 0x00 + I32 = unsigned(wasm::ValType::I32), + I64 = unsigned(wasm::ValType::I64), + F32 = unsigned(wasm::ValType::F32), + F64 = unsigned(wasm::ValType::F64), + V128 = unsigned(wasm::ValType::V128), + Exnref = unsigned(wasm::ValType::EXNREF), + // Multivalue blocks (and other non-void blocks) are only emitted when the + // blocks will never be exited and are at the ends of functions (see + // WebAssemblyCFGStackify::fixEndsAtEndOfFunction). They also are never made + // to pop values off the stack, so the exact multivalue signature can always + // be inferred from the return type of the parent function in MCInstLower. + Multivalue = 0xffff, }; /// Instruction opcodes emitted via means other than CodeGen. @@ -191,6 +197,8 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32_S: case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64: case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64_S: + case WebAssembly::LOAD_SPLAT_v8x16: + case WebAssembly::LOAD_SPLAT_v8x16_S: return 0; case WebAssembly::LOAD16_S_I32: case WebAssembly::LOAD16_S_I32_S: @@ -240,6 +248,8 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32_S: case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64: case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64_S: + case WebAssembly::LOAD_SPLAT_v16x8: + case WebAssembly::LOAD_SPLAT_v16x8_S: return 1; case WebAssembly::LOAD_I32: case WebAssembly::LOAD_I32_S: @@ -295,6 +305,8 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { case WebAssembly::ATOMIC_NOTIFY_S: case WebAssembly::ATOMIC_WAIT_I32: case WebAssembly::ATOMIC_WAIT_I32_S: + case WebAssembly::LOAD_SPLAT_v32x4: + case WebAssembly::LOAD_SPLAT_v32x4_S: return 2; case WebAssembly::LOAD_I64: case WebAssembly::LOAD_I64_S: @@ -324,31 +336,25 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { case WebAssembly::ATOMIC_RMW_CMPXCHG_I64_S: case WebAssembly::ATOMIC_WAIT_I64: case WebAssembly::ATOMIC_WAIT_I64_S: + case WebAssembly::LOAD_SPLAT_v64x2: + case WebAssembly::LOAD_SPLAT_v64x2_S: + case WebAssembly::LOAD_EXTEND_S_v8i16: + case WebAssembly::LOAD_EXTEND_S_v8i16_S: + case WebAssembly::LOAD_EXTEND_U_v8i16: + case WebAssembly::LOAD_EXTEND_U_v8i16_S: + case WebAssembly::LOAD_EXTEND_S_v4i32: + case WebAssembly::LOAD_EXTEND_S_v4i32_S: + case WebAssembly::LOAD_EXTEND_U_v4i32: + case WebAssembly::LOAD_EXTEND_U_v4i32_S: + case WebAssembly::LOAD_EXTEND_S_v2i64: + case WebAssembly::LOAD_EXTEND_S_v2i64_S: + case WebAssembly::LOAD_EXTEND_U_v2i64: + case WebAssembly::LOAD_EXTEND_U_v2i64_S: return 3; - case WebAssembly::LOAD_v16i8: - case WebAssembly::LOAD_v16i8_S: - case WebAssembly::LOAD_v8i16: - case WebAssembly::LOAD_v8i16_S: - case WebAssembly::LOAD_v4i32: - case WebAssembly::LOAD_v4i32_S: - case WebAssembly::LOAD_v2i64: - case WebAssembly::LOAD_v2i64_S: - case WebAssembly::LOAD_v4f32: - case WebAssembly::LOAD_v4f32_S: - case WebAssembly::LOAD_v2f64: - case WebAssembly::LOAD_v2f64_S: - case WebAssembly::STORE_v16i8: - case WebAssembly::STORE_v16i8_S: - case WebAssembly::STORE_v8i16: - case WebAssembly::STORE_v8i16_S: - case WebAssembly::STORE_v4i32: - case WebAssembly::STORE_v4i32_S: - case WebAssembly::STORE_v2i64: - case WebAssembly::STORE_v2i64_S: - case WebAssembly::STORE_v4f32: - case WebAssembly::STORE_v4f32_S: - case WebAssembly::STORE_v2f64: - case WebAssembly::STORE_v2f64_S: + case WebAssembly::LOAD_V128: + case WebAssembly::LOAD_V128_S: + case WebAssembly::STORE_V128: + case WebAssembly::STORE_V128_S: return 4; default: return -1; diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp index e05efef7201..40926201931 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp @@ -60,39 +60,10 @@ void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef Types) { void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; } -void WebAssemblyTargetAsmStreamer::emitSignature( - const wasm::WasmSignature *Sig) { - OS << "("; - emitParamList(Sig); - OS << ") -> ("; - emitReturnList(Sig); - OS << ")"; -} - -void WebAssemblyTargetAsmStreamer::emitParamList( - const wasm::WasmSignature *Sig) { - auto &Params = Sig->Params; - for (auto &Ty : Params) { - if (&Ty != &Params[0]) - OS << ", "; - OS << WebAssembly::typeToString(Ty); - } -} - -void WebAssemblyTargetAsmStreamer::emitReturnList( - const wasm::WasmSignature *Sig) { - auto &Returns = Sig->Returns; - for (auto &Ty : Returns) { - if (&Ty != &Returns[0]) - OS << ", "; - OS << WebAssembly::typeToString(Ty); - } -} - void WebAssemblyTargetAsmStreamer::emitFunctionType(const MCSymbolWasm *Sym) { assert(Sym->isFunction()); OS << "\t.functype\t" << Sym->getName() << " "; - emitSignature(Sym->getSignature()); + OS << WebAssembly::signatureToString(Sym->getSignature()); OS << "\n"; } @@ -107,7 +78,7 @@ void WebAssemblyTargetAsmStreamer::emitGlobalType(const MCSymbolWasm *Sym) { void WebAssemblyTargetAsmStreamer::emitEventType(const MCSymbolWasm *Sym) { assert(Sym->isEvent()); OS << "\t.eventtype\t" << Sym->getName() << " "; - emitParamList(Sym->getSignature()); + OS << WebAssembly::typeListToString(Sym->getSignature()->Params); OS << "\n"; } diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h index 5ea62b179d2..0164f8e572e 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h @@ -56,9 +56,6 @@ protected: /// This part is for ascii assembly output class WebAssemblyTargetAsmStreamer final : public WebAssemblyTargetStreamer { formatted_raw_ostream &OS; - void emitSignature(const wasm::WasmSignature *Sig); - void emitParamList(const wasm::WasmSignature *Sig); - void emitReturnList(const wasm::WasmSignature *Sig); public: WebAssemblyTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp index a1cc3e268e8..e7a599e3e17 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp @@ -31,7 +31,7 @@ using namespace llvm; namespace { class WebAssemblyWasmObjectWriter final : public MCWasmObjectTargetWriter { public: - explicit WebAssemblyWasmObjectWriter(bool Is64Bit); + explicit WebAssemblyWasmObjectWriter(bool Is64Bit, bool IsEmscripten); private: unsigned getRelocType(const MCValue &Target, @@ -39,8 +39,9 @@ private: }; } // end anonymous namespace -WebAssemblyWasmObjectWriter::WebAssemblyWasmObjectWriter(bool Is64Bit) - : MCWasmObjectTargetWriter(Is64Bit) {} +WebAssemblyWasmObjectWriter::WebAssemblyWasmObjectWriter(bool Is64Bit, + bool IsEmscripten) + : MCWasmObjectTargetWriter(Is64Bit, IsEmscripten) {} static const MCSection *getFixupSection(const MCExpr *Expr) { if (auto SyExp = dyn_cast(Expr)) { @@ -116,6 +117,6 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target, } std::unique_ptr -llvm::createWebAssemblyWasmObjectWriter(bool Is64Bit) { - return llvm::make_unique(Is64Bit); +llvm::createWebAssemblyWasmObjectWriter(bool Is64Bit, bool IsEmscripten) { + return std::make_unique(Is64Bit, IsEmscripten); } diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index 7f9d41da397..5d8b873ce23 100644 --- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -67,8 +67,8 @@ MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const { } std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) { - unsigned RegNo = MO.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(RegNo) && + Register RegNo = MO.getReg(); + assert(Register::isVirtualRegister(RegNo) && "Unlowered physical register encountered during assembly printing"); assert(!MFI->isVRegStackified(RegNo)); unsigned WAReg = MFI->getWAReg(RegNo); @@ -332,43 +332,15 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { // These represent values which are live into the function entry, so there's // no instruction to emit. break; - case WebAssembly::FALLTHROUGH_RETURN_I32: - case WebAssembly::FALLTHROUGH_RETURN_I32_S: - case WebAssembly::FALLTHROUGH_RETURN_I64: - case WebAssembly::FALLTHROUGH_RETURN_I64_S: - case WebAssembly::FALLTHROUGH_RETURN_F32: - case WebAssembly::FALLTHROUGH_RETURN_F32_S: - case WebAssembly::FALLTHROUGH_RETURN_F64: - case WebAssembly::FALLTHROUGH_RETURN_F64_S: - case WebAssembly::FALLTHROUGH_RETURN_v16i8: - case WebAssembly::FALLTHROUGH_RETURN_v16i8_S: - case WebAssembly::FALLTHROUGH_RETURN_v8i16: - case WebAssembly::FALLTHROUGH_RETURN_v8i16_S: - case WebAssembly::FALLTHROUGH_RETURN_v4i32: - case WebAssembly::FALLTHROUGH_RETURN_v4i32_S: - case WebAssembly::FALLTHROUGH_RETURN_v2i64: - case WebAssembly::FALLTHROUGH_RETURN_v2i64_S: - case WebAssembly::FALLTHROUGH_RETURN_v4f32: - case WebAssembly::FALLTHROUGH_RETURN_v4f32_S: - case WebAssembly::FALLTHROUGH_RETURN_v2f64: - case WebAssembly::FALLTHROUGH_RETURN_v2f64_S: { + case WebAssembly::FALLTHROUGH_RETURN: { // These instructions represent the implicit return at the end of a - // function body. Always pops one value off the stack. + // function body. if (isVerbose()) { - OutStreamer->AddComment("fallthrough-return-value"); + OutStreamer->AddComment("fallthrough-return"); OutStreamer->AddBlankLine(); } break; } - case WebAssembly::FALLTHROUGH_RETURN_VOID: - case WebAssembly::FALLTHROUGH_RETURN_VOID_S: - // This instruction represents the implicit return at the end of a - // function body with no return value. - if (isVerbose()) { - OutStreamer->AddComment("fallthrough-return-void"); - OutStreamer->AddBlankLine(); - } - break; case WebAssembly::COMPILER_FENCE: // This is a compiler barrier that prevents instruction reordering during // backend compilation, and should not be emitted. diff --git a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp index 4c5d0192fc2..c069af9eed6 100644 --- a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp +++ b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp @@ -97,14 +97,14 @@ public: // If the smallest region containing MBB is a loop if (LoopMap.count(ML)) return LoopMap[ML].get(); - LoopMap[ML] = llvm::make_unique>(ML); + LoopMap[ML] = std::make_unique>(ML); return LoopMap[ML].get(); } else { // If the smallest region containing MBB is an exception if (ExceptionMap.count(WE)) return ExceptionMap[WE].get(); ExceptionMap[WE] = - llvm::make_unique>(WE); + std::make_unique>(WE); return ExceptionMap[WE].get(); } } @@ -317,6 +317,7 @@ static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, // If Next was originally ordered before MBB, and it isn't because it was // loop-rotated above the header, it's not preferred. if (Next->getNumber() < MBB->getNumber() && + (WasmDisableEHPadSort || !Next->isEHPad()) && (!R || !R->contains(Next) || R->getHeader()->getNumber() < Next->getNumber())) { Ready.push(Next); diff --git a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index e6bfc5226e2..7e867edaaa2 100644 --- a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/MC/MCAsmInfo.h" using namespace llvm; @@ -315,12 +316,12 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) { // br_on_exn 0, $__cpp_exception // rethrow // end_block - WebAssembly::ExprType ReturnType = WebAssembly::ExprType::Void; + WebAssembly::BlockType ReturnType = WebAssembly::BlockType::Void; if (IsBrOnExn) { const char *TagName = BrOnExn->getOperand(1).getSymbolName(); if (std::strcmp(TagName, "__cpp_exception") != 0) llvm_unreachable("Only C++ exception is supported"); - ReturnType = WebAssembly::ExprType::I32; + ReturnType = WebAssembly::BlockType::I32; } auto InsertPos = getLatestInsertPos(Header, BeforeSet, AfterSet); @@ -406,7 +407,7 @@ void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) { auto InsertPos = getEarliestInsertPos(&MBB, BeforeSet, AfterSet); MachineInstr *Begin = BuildMI(MBB, InsertPos, MBB.findDebugLoc(InsertPos), TII.get(WebAssembly::LOOP)) - .addImm(int64_t(WebAssembly::ExprType::Void)); + .addImm(int64_t(WebAssembly::BlockType::Void)); // Decide where in Header to put the END_LOOP. BeforeSet.clear(); @@ -526,9 +527,42 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) { AfterSet.insert(&MI); } + // If Header unwinds to MBB (= Header contains 'invoke'), the try block should + // contain the call within it. So the call should go after the TRY. The + // exception is when the header's terminator is a rethrow instruction, in + // which case that instruction, not a call instruction before it, is gonna + // throw. + MachineInstr *ThrowingCall = nullptr; + if (MBB.isPredecessor(Header)) { + auto TermPos = Header->getFirstTerminator(); + if (TermPos == Header->end() || + TermPos->getOpcode() != WebAssembly::RETHROW) { + for (auto &MI : reverse(*Header)) { + if (MI.isCall()) { + AfterSet.insert(&MI); + ThrowingCall = &MI; + // Possibly throwing calls are usually wrapped by EH_LABEL + // instructions. We don't want to split them and the call. + if (MI.getIterator() != Header->begin() && + std::prev(MI.getIterator())->isEHLabel()) { + AfterSet.insert(&*std::prev(MI.getIterator())); + ThrowingCall = &*std::prev(MI.getIterator()); + } + break; + } + } + } + } + // Local expression tree should go after the TRY. - for (auto I = Header->getFirstTerminator(), E = Header->begin(); I != E; - --I) { + // For BLOCK placement, we start the search from the previous instruction of a + // BB's terminator, but in TRY's case, we should start from the previous + // instruction of a call that can throw, or a EH_LABEL that precedes the call, + // because the return values of the call's previous instructions can be + // stackified and consumed by the throwing call. + auto SearchStartPt = ThrowingCall ? MachineBasicBlock::iterator(ThrowingCall) + : Header->getFirstTerminator(); + for (auto I = SearchStartPt, E = Header->begin(); I != E; --I) { if (std::prev(I)->isDebugInstr() || std::prev(I)->isPosition()) continue; if (WebAssembly::isChild(*std::prev(I), MFI)) @@ -537,35 +571,12 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) { break; } - // If Header unwinds to MBB (= Header contains 'invoke'), the try block should - // contain the call within it. So the call should go after the TRY. The - // exception is when the header's terminator is a rethrow instruction, in - // which case that instruction, not a call instruction before it, is gonna - // throw. - if (MBB.isPredecessor(Header)) { - auto TermPos = Header->getFirstTerminator(); - if (TermPos == Header->end() || - TermPos->getOpcode() != WebAssembly::RETHROW) { - for (const auto &MI : reverse(*Header)) { - if (MI.isCall()) { - AfterSet.insert(&MI); - // Possibly throwing calls are usually wrapped by EH_LABEL - // instructions. We don't want to split them and the call. - if (MI.getIterator() != Header->begin() && - std::prev(MI.getIterator())->isEHLabel()) - AfterSet.insert(&*std::prev(MI.getIterator())); - break; - } - } - } - } - // Add the TRY. auto InsertPos = getLatestInsertPos(Header, BeforeSet, AfterSet); MachineInstr *Begin = BuildMI(*Header, InsertPos, Header->findDebugLoc(InsertPos), TII.get(WebAssembly::TRY)) - .addImm(int64_t(WebAssembly::ExprType::Void)); + .addImm(int64_t(WebAssembly::BlockType::Void)); // Decide where in Header to put the END_TRY. BeforeSet.clear(); @@ -694,8 +705,26 @@ void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) { } } +// When MBB is split into MBB and Split, we should unstackify defs in MBB that +// have their uses in Split. +static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB, + MachineBasicBlock &Split, + WebAssemblyFunctionInfo &MFI, + MachineRegisterInfo &MRI) { + for (auto &MI : Split) { + for (auto &MO : MI.explicit_uses()) { + if (!MO.isReg() || Register::isPhysicalRegister(MO.getReg())) + continue; + if (MachineInstr *Def = MRI.getUniqueVRegDef(MO.getReg())) + if (Def->getParent() == &MBB) + MFI.unstackifyVReg(MO.getReg()); + } + } +} + bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { const auto &TII = *MF.getSubtarget().getInstrInfo(); + auto &MFI = *MF.getInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); // Linearizing the control flow by placing TRY / END_TRY markers can create @@ -830,7 +859,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { SmallVector EHPadStack; // Range of intructions to be wrapped in a new nested try/catch using TryRange = std::pair; - // In original CFG, + // In original CFG, DenseMap> UnwindDestToTryRanges; // In new CFG, DenseMap> BrDestToTryRanges; @@ -936,7 +965,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { // of the function with a local.get and a rethrow instruction. if (NeedAppendixBlock) { auto *AppendixBB = getAppendixBlock(MF); - unsigned ExnReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass); + Register ExnReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass); BuildMI(AppendixBB, DebugLoc(), TII.get(WebAssembly::RETHROW)) .addReg(ExnReg); // These instruction ranges should branch to this appendix BB. @@ -967,7 +996,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { // ... // cont: for (auto &P : UnwindDestToTryRanges) { - NumUnwindMismatches++; + NumUnwindMismatches += P.second.size(); // This means the destination is the appendix BB, which was separately // handled above. @@ -1007,6 +1036,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { BrDest->insert(BrDest->end(), EndTry->removeFromParent()); // Take out the handler body from EH pad to the new branch destination BB. BrDest->splice(BrDest->end(), EHPad, SplitPos, EHPad->end()); + unstackifyVRegsUsedInSplitBB(*EHPad, *BrDest, MFI, MRI); // Fix predecessor-successor relationship. BrDest->transferSuccessors(EHPad); EHPad->addSuccessor(BrDest); @@ -1100,7 +1130,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { MachineInstr *NestedTry = BuildMI(*MBB, *RangeBegin, RangeBegin->getDebugLoc(), TII.get(WebAssembly::TRY)) - .addImm(int64_t(WebAssembly::ExprType::Void)); + .addImm(int64_t(WebAssembly::BlockType::Void)); // Create the nested EH pad and fill instructions in. MachineBasicBlock *NestedEHPad = MF.CreateMachineBasicBlock(); @@ -1122,6 +1152,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { // new nested continuation BB. NestedCont->splice(NestedCont->end(), MBB, std::next(RangeEnd->getIterator()), MBB->end()); + unstackifyVRegsUsedInSplitBB(*MBB, *NestedCont, MFI, MRI); registerTryScope(NestedTry, NestedEndTry, NestedEHPad); // Fix predecessor-successor relationship. @@ -1197,54 +1228,32 @@ getDepth(const SmallVectorImpl &Stack, /// checks for such cases and fixes up the signatures. void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) { const auto &MFI = *MF.getInfo(); - assert(MFI.getResults().size() <= 1); if (MFI.getResults().empty()) return; - WebAssembly::ExprType RetType; - switch (MFI.getResults().front().SimpleTy) { - case MVT::i32: - RetType = WebAssembly::ExprType::I32; - break; - case MVT::i64: - RetType = WebAssembly::ExprType::I64; - break; - case MVT::f32: - RetType = WebAssembly::ExprType::F32; - break; - case MVT::f64: - RetType = WebAssembly::ExprType::F64; - break; - case MVT::v16i8: - case MVT::v8i16: - case MVT::v4i32: - case MVT::v2i64: - case MVT::v4f32: - case MVT::v2f64: - RetType = WebAssembly::ExprType::V128; - break; - case MVT::exnref: - RetType = WebAssembly::ExprType::Exnref; - break; - default: - llvm_unreachable("unexpected return type"); - } + // MCInstLower will add the proper types to multivalue signatures based on the + // function return type + WebAssembly::BlockType RetType = + MFI.getResults().size() > 1 + ? WebAssembly::BlockType::Multivalue + : WebAssembly::BlockType( + WebAssembly::toValType(MFI.getResults().front())); for (MachineBasicBlock &MBB : reverse(MF)) { for (MachineInstr &MI : reverse(MBB)) { if (MI.isPosition() || MI.isDebugInstr()) continue; - if (MI.getOpcode() == WebAssembly::END_BLOCK) { + switch (MI.getOpcode()) { + case WebAssembly::END_BLOCK: + case WebAssembly::END_LOOP: + case WebAssembly::END_TRY: EndToBegin[&MI]->getOperand(0).setImm(int32_t(RetType)); continue; + default: + // Something other than an `end`. We're done. + return; } - if (MI.getOpcode() == WebAssembly::END_LOOP) { - EndToBegin[&MI]->getOperand(0).setImm(int32_t(RetType)); - continue; - } - // Something other than an `end`. We're done. - return; } } } @@ -1280,7 +1289,9 @@ void WebAssemblyCFGStackify::placeMarkers(MachineFunction &MF) { } } // Fix mismatches in unwind destinations induced by linearizing the code. - fixUnwindMismatches(MF); + if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm && + MF.getFunction().hasPersonalityFn()) + fixUnwindMismatches(MF); } void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) { diff --git a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp index dbd62179f05..ef75bb21531 100644 --- a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp +++ b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp @@ -168,7 +168,7 @@ static MVT typeForRegClass(const TargetRegisterClass *RC) { static MachineInstr *findStartOfTree(MachineOperand &MO, MachineRegisterInfo &MRI, WebAssemblyFunctionInfo &MFI) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); assert(MFI.isVRegStackified(Reg)); MachineInstr *Def = MRI.getVRegDef(Reg); @@ -207,7 +207,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { MachineInstr &MI = *I++; if (!WebAssembly::isArgument(MI.getOpcode())) break; - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); assert(!MFI.isVRegStackified(Reg)); Reg2Local[Reg] = static_cast(MI.getOperand(1).getImm()); MI.eraseFromParent(); @@ -221,7 +221,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { // drops to their defs. BitVector UseEmpty(MRI.getNumVirtRegs()); for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) - UseEmpty[I] = MRI.use_empty(TargetRegisterInfo::index2VirtReg(I)); + UseEmpty[I] = MRI.use_empty(Register::index2VirtReg(I)); // Visit each instruction in the function. for (MachineBasicBlock &MBB : MF) { @@ -238,13 +238,13 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { if (WebAssembly::isTee(MI.getOpcode())) { assert(MFI.isVRegStackified(MI.getOperand(0).getReg())); assert(!MFI.isVRegStackified(MI.getOperand(1).getReg())); - unsigned OldReg = MI.getOperand(2).getReg(); + Register OldReg = MI.getOperand(2).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(OldReg); // Stackify the input if it isn't stackified yet. if (!MFI.isVRegStackified(OldReg)) { unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg); - unsigned NewReg = MRI.createVirtualRegister(RC); + Register NewReg = MRI.createVirtualRegister(RC); unsigned Opc = getLocalGetOpcode(RC); BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Opc), NewReg) .addImm(LocalId); @@ -270,17 +270,17 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { // we handle at most one def. assert(MI.getDesc().getNumDefs() <= 1); if (MI.getDesc().getNumDefs() == 1) { - unsigned OldReg = MI.getOperand(0).getReg(); + Register OldReg = MI.getOperand(0).getReg(); if (!MFI.isVRegStackified(OldReg)) { const TargetRegisterClass *RC = MRI.getRegClass(OldReg); - unsigned NewReg = MRI.createVirtualRegister(RC); + Register NewReg = MRI.createVirtualRegister(RC); auto InsertPt = std::next(MI.getIterator()); if (MI.getOpcode() == WebAssembly::IMPLICIT_DEF) { MI.eraseFromParent(); Changed = true; continue; } - if (UseEmpty[TargetRegisterInfo::virtReg2Index(OldReg)]) { + if (UseEmpty[Register::virtReg2Index(OldReg)]) { unsigned Opc = getDropOpcode(RC); MachineInstr *Drop = BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc)) @@ -310,7 +310,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { if (!MO.isReg()) continue; - unsigned OldReg = MO.getReg(); + Register OldReg = MO.getReg(); // Inline asm may have a def in the middle of the operands. Our contract // with inline asm register operands is to provide local indices as @@ -345,7 +345,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { // Insert a local.get. unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg); const TargetRegisterClass *RC = MRI.getRegClass(OldReg); - unsigned NewReg = MRI.createVirtualRegister(RC); + Register NewReg = MRI.createVirtualRegister(RC); unsigned Opc = getLocalGetOpcode(RC); InsertPt = BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc), NewReg) @@ -369,7 +369,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { // TODO: Sort the locals for better compression. MFI.setNumLocals(CurLocal - MFI.getParams().size()); for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); auto RL = Reg2Local.find(Reg); if (RL == Reg2Local.end() || RL->second < MFI.getParams().size()) continue; diff --git a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 2552e915083..c932f985489 100644 --- a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -1141,14 +1141,14 @@ bool WebAssemblyFastISel::selectBitCast(const Instruction *I) { return true; } - unsigned Reg = fastEmit_ISD_BITCAST_r(VT.getSimpleVT(), RetVT.getSimpleVT(), + Register Reg = fastEmit_ISD_BITCAST_r(VT.getSimpleVT(), RetVT.getSimpleVT(), In, I->getOperand(0)->hasOneUse()); if (!Reg) return false; MachineBasicBlock::iterator Iter = FuncInfo.InsertPt; --Iter; assert(Iter->isBitcast()); - Iter->setPhysRegsDeadExcept(ArrayRef(), TRI); + Iter->setPhysRegsDeadExcept(ArrayRef(), TRI); updateValueMap(I, Reg); return true; } @@ -1302,51 +1302,33 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) { if (Ret->getNumOperands() == 0) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(WebAssembly::RETURN_VOID)); + TII.get(WebAssembly::RETURN)); return true; } + // TODO: support multiple return in FastISel + if (Ret->getNumOperands() > 1) + return false; + Value *RV = Ret->getOperand(0); if (!Subtarget->hasSIMD128() && RV->getType()->isVectorTy()) return false; - unsigned Opc; switch (getSimpleType(RV->getType())) { case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: - Opc = WebAssembly::RETURN_I32; - break; case MVT::i64: - Opc = WebAssembly::RETURN_I64; - break; case MVT::f32: - Opc = WebAssembly::RETURN_F32; - break; case MVT::f64: - Opc = WebAssembly::RETURN_F64; - break; case MVT::v16i8: - Opc = WebAssembly::RETURN_v16i8; - break; case MVT::v8i16: - Opc = WebAssembly::RETURN_v8i16; - break; case MVT::v4i32: - Opc = WebAssembly::RETURN_v4i32; - break; case MVT::v2i64: - Opc = WebAssembly::RETURN_v2i64; - break; case MVT::v4f32: - Opc = WebAssembly::RETURN_v4f32; - break; case MVT::v2f64: - Opc = WebAssembly::RETURN_v2f64; - break; case MVT::exnref: - Opc = WebAssembly::RETURN_EXNREF; break; default: return false; @@ -1363,7 +1345,9 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) { if (Reg == 0) return false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)).addReg(Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::RETURN)) + .addReg(Reg); return true; } diff --git a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp index b7fc65401fc..6b1bbd7a2b0 100644 --- a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp @@ -70,6 +70,8 @@ static void findUses(Value *V, Function &F, for (Use &U : V->uses()) { if (auto *BC = dyn_cast(U.getUser())) findUses(BC, F, Uses, ConstantBCs); + else if (auto *A = dyn_cast(U.getUser())) + findUses(A, F, Uses, ConstantBCs); else if (U.get()->getType() != F.getType()) { CallSite CS(U.getUser()); if (!CS) diff --git a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp index 7d8e86d9b2c..157ea9d525c 100644 --- a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp @@ -56,6 +56,7 @@ #include "WebAssembly.h" #include "WebAssemblySubtarget.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Debug.h" using namespace llvm; #define DEBUG_TYPE "wasm-fix-irreducible-control-flow" @@ -358,7 +359,7 @@ void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop( // Add the register which will be used to tell the jump table which block to // jump to. MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); MIB.addReg(Reg); // Compute the indices in the superheader, one for each bad block, and diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp index 5299068efdd..71eeebfada4 100644 --- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -183,14 +183,14 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, bool HasBP = hasBP(MF); if (HasBP) { auto FI = MF.getInfo(); - unsigned BasePtr = MRI.createVirtualRegister(PtrRC); + Register BasePtr = MRI.createVirtualRegister(PtrRC); FI->setBasePointerVreg(BasePtr); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), BasePtr) .addReg(SPReg); } if (StackSize) { // Subtract the frame size - unsigned OffsetReg = MRI.createVirtualRegister(PtrRC); + Register OffsetReg = MRI.createVirtualRegister(PtrRC); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) .addImm(StackSize); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32), @@ -199,7 +199,7 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, .addReg(OffsetReg); } if (HasBP) { - unsigned BitmaskReg = MRI.createVirtualRegister(PtrRC); + Register BitmaskReg = MRI.createVirtualRegister(PtrRC); unsigned Alignment = MFI.getMaxAlignment(); assert((1u << countTrailingZeros(Alignment)) == Alignment && "Alignment must be a power of 2"); @@ -244,7 +244,7 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF, } else if (StackSize) { const TargetRegisterClass *PtrRC = MRI.getTargetRegisterInfo()->getPointerRegClass(MF); - unsigned OffsetReg = MRI.createVirtualRegister(PtrRC); + Register OffsetReg = MRI.createVirtualRegister(PtrRC); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) .addImm(StackSize); // In the epilog we don't need to write the result back to the SP32 physreg diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h index daddd4ca16f..fdc0f561dcd 100644 --- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h +++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h @@ -29,9 +29,9 @@ public: static const size_t RedZoneSize = 128; WebAssemblyFrameLowering() - : TargetFrameLowering(StackGrowsDown, /*StackAlignment=*/16, + : TargetFrameLowering(StackGrowsDown, /*StackAlignment=*/Align(16), /*LocalAreaOffset=*/0, - /*TransientStackAlignment=*/16, + /*TransientStackAlignment=*/Align(16), /*StackRealignable=*/true) {} MachineBasicBlock::iterator diff --git a/lib/Target/WebAssembly/WebAssemblyISD.def b/lib/Target/WebAssembly/WebAssemblyISD.def index 77217f16a72..13f0476eb4a 100644 --- a/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/lib/Target/WebAssembly/WebAssemblyISD.def @@ -26,9 +26,11 @@ HANDLE_NODETYPE(WrapperPIC) HANDLE_NODETYPE(BR_IF) HANDLE_NODETYPE(BR_TABLE) HANDLE_NODETYPE(SHUFFLE) +HANDLE_NODETYPE(SWIZZLE) HANDLE_NODETYPE(VEC_SHL) HANDLE_NODETYPE(VEC_SHR_S) HANDLE_NODETYPE(VEC_SHR_U) +HANDLE_NODETYPE(LOAD_SPLAT) HANDLE_NODETYPE(THROW) HANDLE_NODETYPE(MEMORY_COPY) HANDLE_NODETYPE(MEMORY_FILL) diff --git a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index 26339eaef37..f83a8a984ae 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -54,6 +54,12 @@ public: ForCodeSize = MF.getFunction().hasOptSize(); Subtarget = &MF.getSubtarget(); + + // Wasm64 is not fully supported right now (and is not specified) + if (Subtarget->hasAddr64()) + report_fatal_error( + "64-bit WebAssembly (wasm64) is not currently supported"); + return SelectionDAGISel::runOnMachineFunction(MF); } @@ -88,88 +94,36 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { uint64_t SyncScopeID = cast(Node->getOperand(2).getNode())->getZExtValue(); + MachineSDNode *Fence = nullptr; switch (SyncScopeID) { - case SyncScope::SingleThread: { + case SyncScope::SingleThread: // We lower a single-thread fence to a pseudo compiler barrier instruction // preventing instruction reordering. This will not be emitted in final // binary. - MachineSDNode *Fence = - CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE, - DL, // debug loc - MVT::Other, // outchain type - Node->getOperand(0) // inchain - ); - ReplaceNode(Node, Fence); - CurDAG->RemoveDeadNode(Node); - return; - } - - case SyncScope::System: { - // For non-emscripten systems, we have not decided on what we should - // traslate fences to yet. - if (!Subtarget->getTargetTriple().isOSEmscripten()) - report_fatal_error( - "ATOMIC_FENCE is not yet supported in non-emscripten OSes"); - - // Wasm does not have a fence instruction, but because all atomic - // instructions in wasm are sequentially consistent, we translate a - // fence to an idempotent atomic RMW instruction to a linear memory - // address. All atomic instructions in wasm are sequentially consistent, - // but this is to ensure a fence also prevents reordering of non-atomic - // instructions in the VM. Even though LLVM IR's fence instruction does - // not say anything about its relationship with non-atomic instructions, - // we think this is more user-friendly. - // - // While any address can work, here we use a value stored in - // __stack_pointer wasm global because there's high chance that area is - // in cache. - // - // So the selected instructions will be in the form of: - // %addr = get_global $__stack_pointer - // %0 = i32.const 0 - // i32.atomic.rmw.or %addr, %0 - SDValue StackPtrSym = CurDAG->getTargetExternalSymbol( - "__stack_pointer", TLI->getPointerTy(CurDAG->getDataLayout())); - MachineSDNode *GetGlobal = - CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32, // opcode - DL, // debug loc - MVT::i32, // result type - StackPtrSym // __stack_pointer symbol - ); - - SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); - auto *MMO = MF.getMachineMemOperand( - MachinePointerInfo::getUnknownStack(MF), - // FIXME Volatile isn't really correct, but currently all LLVM - // atomic instructions are treated as volatiles in the backend, so - // we should be consistent. - MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad | - MachineMemOperand::MOStore, - 4, 4, AAMDNodes(), nullptr, SyncScope::System, - AtomicOrdering::SequentiallyConsistent); - MachineSDNode *Const0 = - CurDAG->getMachineNode(WebAssembly::CONST_I32, DL, MVT::i32, Zero); - MachineSDNode *AtomicRMW = CurDAG->getMachineNode( - WebAssembly::ATOMIC_RMW_OR_I32, // opcode - DL, // debug loc - MVT::i32, // result type - MVT::Other, // outchain type - { - Zero, // alignment - Zero, // offset - SDValue(GetGlobal, 0), // __stack_pointer - SDValue(Const0, 0), // OR with 0 to make it idempotent - Node->getOperand(0) // inchain - }); - - CurDAG->setNodeMemRefs(AtomicRMW, {MMO}); - ReplaceUses(SDValue(Node, 0), SDValue(AtomicRMW, 1)); - CurDAG->RemoveDeadNode(Node); - return; - } + Fence = CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE, + DL, // debug loc + MVT::Other, // outchain type + Node->getOperand(0) // inchain + ); + break; + case SyncScope::System: + // Currently wasm only supports sequentially consistent atomics, so we + // always set the order to 0 (sequentially consistent). + Fence = CurDAG->getMachineNode( + WebAssembly::ATOMIC_FENCE, + DL, // debug loc + MVT::Other, // outchain type + CurDAG->getTargetConstant(0, DL, MVT::i32), // order + Node->getOperand(0) // inchain + ); + break; default: llvm_unreachable("Unknown scope!"); } + + ReplaceNode(Node, Fence); + CurDAG->RemoveDeadNode(Node); + return; } case ISD::GlobalTLSAddress: { @@ -224,6 +178,33 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { ReplaceNode(Node, TLSSize); return; } + case Intrinsic::wasm_tls_align: { + MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); + assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); + + MachineSDNode *TLSAlign = CurDAG->getMachineNode( + WebAssembly::GLOBAL_GET_I32, DL, PtrVT, + CurDAG->getTargetExternalSymbol("__tls_align", MVT::i32)); + ReplaceNode(Node, TLSAlign); + return; + } + } + break; + } + case ISD::INTRINSIC_W_CHAIN: { + unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); + switch (IntNo) { + case Intrinsic::wasm_tls_base: { + MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); + assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); + + MachineSDNode *TLSBase = CurDAG->getMachineNode( + WebAssembly::GLOBAL_GET_I32, DL, MVT::i32, MVT::Other, + CurDAG->getTargetExternalSymbol("__tls_base", PtrVT), + Node->getOperand(0)); + ReplaceNode(Node, TLSBase); + return; + } } break; } diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 4064a983099..f06afdbcea9 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -205,7 +205,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( for (auto T : {MVT::i8, MVT::i16, MVT::i32}) setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action); } - for (auto T : MVT::integer_vector_valuetypes()) + for (auto T : MVT::integer_fixedlen_vector_valuetypes()) setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand); // Dynamic stack allocation: use the default expansion. @@ -228,7 +228,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( // - Floating-point extending loads. // - Floating-point truncating stores. // - i1 extending loads. - // - extending/truncating SIMD loads/stores + // - truncating SIMD stores and most extending loads setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); for (auto T : MVT::integer_valuetypes()) @@ -237,7 +237,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( if (Subtarget->hasSIMD128()) { for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}) { - for (auto MemT : MVT::vector_valuetypes()) { + for (auto MemT : MVT::fixedlen_vector_valuetypes()) { if (MVT(T) != MemT) { setTruncStoreAction(T, MemT, Expand); for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) @@ -245,6 +245,14 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( } } } + // But some vector extending loads are legal + if (Subtarget->hasUnimplementedSIMD128()) { + for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) { + setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal); + setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal); + setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal); + } + } } // Don't do anything clever with build_pairs @@ -259,16 +267,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setMaxAtomicSizeInBitsSupported(64); - if (Subtarget->hasBulkMemory()) { - // Use memory.copy and friends over multiple loads and stores - MaxStoresPerMemcpy = 1; - MaxStoresPerMemcpyOptSize = 1; - MaxStoresPerMemmove = 1; - MaxStoresPerMemmoveOptSize = 1; - MaxStoresPerMemset = 1; - MaxStoresPerMemsetOptSize = 1; - } - // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is // consistent with the f64 and f128 names. setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); @@ -337,8 +335,8 @@ static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL, bool Float64, unsigned LoweredOpcode) { MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - unsigned OutReg = MI.getOperand(0).getReg(); - unsigned InReg = MI.getOperand(1).getReg(); + Register OutReg = MI.getOperand(0).getReg(); + Register InReg = MI.getOperand(1).getReg(); unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32; unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32; @@ -396,9 +394,9 @@ static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL, // For unsigned numbers, we have to do a separate comparison with zero. if (IsUnsigned) { Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); - unsigned SecondCmpReg = + Register SecondCmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - unsigned AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); BuildMI(BB, DL, TII.get(FConst), Tmp1) .addFPImm(cast(ConstantFP::get(Ty, 0.0))); BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1); @@ -550,6 +548,16 @@ bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, return true; } +bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { + if (!Subtarget->hasUnimplementedSIMD128()) + return false; + MVT ExtT = ExtVal.getSimpleValueType(); + MVT MemT = cast(ExtVal->getOperand(0))->getSimpleValueType(0); + return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) || + (ExtT == MVT::v4i32 && MemT == MVT::v4i16) || + (ExtT == MVT::v2i64 && MemT == MVT::v2i32); +} + EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, EVT VT) const { @@ -569,7 +577,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::i32; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = 4; + Info.align = Align(4); // atomic.notify instruction does not really load the memory specified with // this argument, but MachineMemOperand should either be load or store, so // we set this to a load. @@ -583,7 +591,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::i32; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = 4; + Info.align = Align(4); Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; return true; case Intrinsic::wasm_atomic_wait_i64: @@ -591,7 +599,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = 8; + Info.align = Align(8); Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; return true; default: @@ -623,7 +631,8 @@ static bool callingConvSupported(CallingConv::ID CallConv) { CallConv == CallingConv::Cold || CallConv == CallingConv::PreserveMost || CallConv == CallingConv::PreserveAll || - CallConv == CallingConv::CXX_FAST_TLS; + CallConv == CallingConv::CXX_FAST_TLS || + CallConv == CallingConv::WASM_EmscriptenInvoke; } SDValue @@ -644,13 +653,36 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, if (CLI.IsPatchPoint) fail(DL, DAG, "WebAssembly doesn't support patch point yet"); - // Fail if tail calls are required but not enabled - if (!Subtarget->hasTailCall()) { - if ((CallConv == CallingConv::Fast && CLI.IsTailCall && - MF.getTarget().Options.GuaranteedTailCallOpt) || - (CLI.CS && CLI.CS.isMustTailCall())) - fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled"); - CLI.IsTailCall = false; + if (CLI.IsTailCall) { + bool MustTail = CLI.CS && CLI.CS.isMustTailCall(); + if (Subtarget->hasTailCall() && !CLI.IsVarArg) { + // Do not tail call unless caller and callee return types match + const Function &F = MF.getFunction(); + const TargetMachine &TM = getTargetMachine(); + Type *RetTy = F.getReturnType(); + SmallVector CallerRetTys; + SmallVector CalleeRetTys; + computeLegalValueVTs(F, TM, RetTy, CallerRetTys); + computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys); + bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() && + std::equal(CallerRetTys.begin(), CallerRetTys.end(), + CalleeRetTys.begin()); + if (!TypesMatch) { + // musttail in this case would be an LLVM IR validation failure + assert(!MustTail); + CLI.IsTailCall = false; + } + } else { + CLI.IsTailCall = false; + if (MustTail) { + if (CLI.IsVarArg) { + // The return would pop the argument buffer + fail(DL, DAG, "WebAssembly does not support varargs tail calls"); + } else { + fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled"); + } + } + } } SmallVectorImpl &Ins = CLI.Ins; @@ -659,6 +691,16 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; + + // The generic code may have added an sret argument. If we're lowering an + // invoke function, the ABI requires that the function pointer be the first + // argument, so we may have to swap the arguments. + if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 && + Outs[0].Flags.isSRet()) { + std::swap(Outs[0], Outs[1]); + std::swap(OutVals[0], OutVals[1]); + } + unsigned NumFixedArgs = 0; for (unsigned I = 0; I < Outs.size(); ++I) { const ISD::OutputArg &Out = Outs[I]; @@ -810,8 +852,8 @@ bool WebAssemblyTargetLowering::CanLowerReturn( CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/, const SmallVectorImpl &Outs, LLVMContext & /*Context*/) const { - // WebAssembly can't currently handle returning tuples. - return Outs.size() <= 1; + // WebAssembly can only handle returning tuples with multivalue enabled + return Subtarget->hasMultivalue() || Outs.size() <= 1; } SDValue WebAssemblyTargetLowering::LowerReturn( @@ -819,7 +861,8 @@ SDValue WebAssemblyTargetLowering::LowerReturn( const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { - assert(Outs.size() <= 1 && "WebAssembly can only return up to one value"); + assert((Subtarget->hasMultivalue() || Outs.size() <= 1) && + "MVP WebAssembly can only return up to one value"); if (!callingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); @@ -881,7 +924,7 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments( // the buffer is passed as an argument. if (IsVarArg) { MVT PtrVT = getPointerTy(MF.getDataLayout()); - unsigned VarargVreg = + Register VarargVreg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT)); MFI->setVarargBufferVreg(VarargVreg); Chain = DAG.getCopyToReg( @@ -1022,8 +1065,9 @@ SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op, return SDValue(); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + MakeLibCallOptions CallOptions; return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(), - {DAG.getConstant(Depth, DL, MVT::i32)}, false, DL) + {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL) .first; } @@ -1037,7 +1081,7 @@ SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op, DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); - unsigned FP = + Register FP = Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction()); return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT); } @@ -1249,68 +1293,116 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, const EVT VecT = Op.getValueType(); const EVT LaneT = Op.getOperand(0).getValueType(); const size_t Lanes = Op.getNumOperands(); + bool CanSwizzle = Subtarget->hasUnimplementedSIMD128() && VecT == MVT::v16i8; + + // BUILD_VECTORs are lowered to the instruction that initializes the highest + // possible number of lanes at once followed by a sequence of replace_lane + // instructions to individually initialize any remaining lanes. + + // TODO: Tune this. For example, lanewise swizzling is very expensive, so + // swizzled lanes should be given greater weight. + + // TODO: Investigate building vectors by shuffling together vectors built by + // separately specialized means. + auto IsConstant = [](const SDValue &V) { return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP; }; - // Find the most common operand, which is approximately the best to splat - using Entry = std::pair; - SmallVector ValueCounts; - size_t NumConst = 0, NumDynamic = 0; - for (const SDValue &Lane : Op->op_values()) { - if (Lane.isUndef()) { - continue; - } else if (IsConstant(Lane)) { - NumConst++; - } else { - NumDynamic++; - } - auto CountIt = std::find_if(ValueCounts.begin(), ValueCounts.end(), - [&Lane](Entry A) { return A.first == Lane; }); - if (CountIt == ValueCounts.end()) { - ValueCounts.emplace_back(Lane, 1); + // Returns the source vector and index vector pair if they exist. Checks for: + // (extract_vector_elt + // $src, + // (sign_extend_inreg (extract_vector_elt $indices, $i)) + // ) + auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) { + auto Bail = std::make_pair(SDValue(), SDValue()); + if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return Bail; + const SDValue &SwizzleSrc = Lane->getOperand(0); + const SDValue &IndexExt = Lane->getOperand(1); + if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG) + return Bail; + const SDValue &Index = IndexExt->getOperand(0); + if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return Bail; + const SDValue &SwizzleIndices = Index->getOperand(0); + if (SwizzleSrc.getValueType() != MVT::v16i8 || + SwizzleIndices.getValueType() != MVT::v16i8 || + Index->getOperand(1)->getOpcode() != ISD::Constant || + Index->getConstantOperandVal(1) != I) + return Bail; + return std::make_pair(SwizzleSrc, SwizzleIndices); + }; + + using ValueEntry = std::pair; + SmallVector SplatValueCounts; + + using SwizzleEntry = std::pair, size_t>; + SmallVector SwizzleCounts; + + auto AddCount = [](auto &Counts, const auto &Val) { + auto CountIt = std::find_if(Counts.begin(), Counts.end(), + [&Val](auto E) { return E.first == Val; }); + if (CountIt == Counts.end()) { + Counts.emplace_back(Val, 1); } else { CountIt->second++; } - } - auto CommonIt = - std::max_element(ValueCounts.begin(), ValueCounts.end(), - [](Entry A, Entry B) { return A.second < B.second; }); - assert(CommonIt != ValueCounts.end() && "Unexpected all-undef build_vector"); - SDValue SplatValue = CommonIt->first; - size_t NumCommon = CommonIt->second; + }; - // If v128.const is available, consider using it instead of a splat + auto GetMostCommon = [](auto &Counts) { + auto CommonIt = + std::max_element(Counts.begin(), Counts.end(), + [](auto A, auto B) { return A.second < B.second; }); + assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector"); + return *CommonIt; + }; + + size_t NumConstantLanes = 0; + + // Count eligible lanes for each type of vector creation op + for (size_t I = 0; I < Lanes; ++I) { + const SDValue &Lane = Op->getOperand(I); + if (Lane.isUndef()) + continue; + + AddCount(SplatValueCounts, Lane); + + if (IsConstant(Lane)) { + NumConstantLanes++; + } else if (CanSwizzle) { + auto SwizzleSrcs = GetSwizzleSrcs(I, Lane); + if (SwizzleSrcs.first) + AddCount(SwizzleCounts, SwizzleSrcs); + } + } + + SDValue SplatValue; + size_t NumSplatLanes; + std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts); + + SDValue SwizzleSrc; + SDValue SwizzleIndices; + size_t NumSwizzleLanes = 0; + if (SwizzleCounts.size()) + std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices), + NumSwizzleLanes) = GetMostCommon(SwizzleCounts); + + // Predicate returning true if the lane is properly initialized by the + // original instruction + std::function IsLaneConstructed; + SDValue Result; if (Subtarget->hasUnimplementedSIMD128()) { - // {i32,i64,f32,f64}.const opcode, and value - const size_t ConstBytes = 1 + std::max(size_t(4), 16 / Lanes); - // SIMD prefix and opcode - const size_t SplatBytes = 2; - const size_t SplatConstBytes = SplatBytes + ConstBytes; - // SIMD prefix, opcode, and lane index - const size_t ReplaceBytes = 3; - const size_t ReplaceConstBytes = ReplaceBytes + ConstBytes; - // SIMD prefix, v128.const opcode, and 128-bit value - const size_t VecConstBytes = 18; - // Initial v128.const and a replace_lane for each non-const operand - const size_t ConstInitBytes = VecConstBytes + NumDynamic * ReplaceBytes; - // Initial splat and all necessary replace_lanes - const size_t SplatInitBytes = - IsConstant(SplatValue) - // Initial constant splat - ? (SplatConstBytes + - // Constant replace_lanes - (NumConst - NumCommon) * ReplaceConstBytes + - // Dynamic replace_lanes - (NumDynamic * ReplaceBytes)) - // Initial dynamic splat - : (SplatBytes + - // Constant replace_lanes - (NumConst * ReplaceConstBytes) + - // Dynamic replace_lanes - (NumDynamic - NumCommon) * ReplaceBytes); - if (ConstInitBytes < SplatInitBytes) { - // Create build_vector that will lower to initial v128.const + // Prefer swizzles over vector consts over splats + if (NumSwizzleLanes >= NumSplatLanes && + NumSwizzleLanes >= NumConstantLanes) { + Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc, + SwizzleIndices); + auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices); + IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) { + return Swizzled == GetSwizzleSrcs(I, Lane); + }; + } else if (NumConstantLanes >= NumSplatLanes) { SmallVector ConstLanes; for (const SDValue &Lane : Op->op_values()) { if (IsConstant(Lane)) { @@ -1321,26 +1413,35 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, ConstLanes.push_back(DAG.getConstant(0, DL, LaneT)); } } - SDValue Result = DAG.getBuildVector(VecT, DL, ConstLanes); - // Add replace_lane instructions for non-const lanes - for (size_t I = 0; I < Lanes; ++I) { - const SDValue &Lane = Op->getOperand(I); - if (!Lane.isUndef() && !IsConstant(Lane)) - Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane, - DAG.getConstant(I, DL, MVT::i32)); - } - return Result; + Result = DAG.getBuildVector(VecT, DL, ConstLanes); + IsLaneConstructed = [&](size_t _, const SDValue &Lane) { + return IsConstant(Lane); + }; } } - // Use a splat for the initial vector - SDValue Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); - // Add replace_lane instructions for other values + if (!Result) { + // Use a splat, but possibly a load_splat + LoadSDNode *SplattedLoad; + if (Subtarget->hasUnimplementedSIMD128() && + (SplattedLoad = dyn_cast(SplatValue)) && + SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) { + Result = DAG.getNode(WebAssemblyISD::LOAD_SPLAT, DL, VecT, SplatValue); + } else { + Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); + } + IsLaneConstructed = [&](size_t _, const SDValue &Lane) { + return Lane == SplatValue; + }; + } + + // Add replace_lane instructions for any unhandled values for (size_t I = 0; I < Lanes; ++I) { const SDValue &Lane = Op->getOperand(I); - if (Lane != SplatValue) + if (!Lane.isUndef() && !IsLaneConstructed(I, Lane)) Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane, DAG.getConstant(I, DL, MVT::i32)); } + return Result; } @@ -1415,11 +1516,6 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op, // Only manually lower vector shifts assert(Op.getSimpleValueType().isVector()); - // Expand all vector shifts until V8 fixes its implementation - // TODO: remove this once V8 is fixed - if (!Subtarget->hasUnimplementedSIMD128()) - return unrollVectorShift(Op, DAG); - // Unroll non-splat vector shifts BuildVectorSDNode *ShiftVec; SDValue SplatVal; diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/lib/Target/WebAssembly/WebAssemblyISelLowering.h index b3c7f3defd5..a53e24a0554 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -63,7 +63,7 @@ private: MachineMemOperand::Flags Flags, bool *Fast) const override; bool isIntDivCheap(EVT VT, AttributeList Attr) const override; - + bool isVectorLoadExtDesirable(SDValue ExtVal) const override; EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, diff --git a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td index e85aa57efc4..a9a99d38f9f 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -71,12 +71,6 @@ class NotifyPatImmOff : def : NotifyPatImmOff; def : NotifyPatImmOff; -def NotifyPatGlobalAddr : - Pat<(i32 (int_wasm_atomic_notify (regPlusGA I32:$addr, - (WebAssemblywrapper tglobaladdr:$off)), - I32:$count)), - (ATOMIC_NOTIFY 0, tglobaladdr:$off, I32:$addr, I32:$count)>; - // Select notifys with just a constant offset. def NotifyPatOffsetOnly : Pat<(i32 (int_wasm_atomic_notify imm:$off, I32:$count)), @@ -105,13 +99,6 @@ def : WaitPatImmOff; def : WaitPatImmOff; def : WaitPatImmOff; -class WaitPatGlobalAddr : - Pat<(i32 (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)), - ty:$exp, I64:$timeout)), - (inst 0, tglobaladdr:$off, I32:$addr, ty:$exp, I64:$timeout)>; -def : WaitPatGlobalAddr; -def : WaitPatGlobalAddr; - // Select wait_i32, ATOMIC_WAIT_I32s with just a constant offset. class WaitPatOffsetOnly : Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)), @@ -126,6 +113,19 @@ def : WaitPatGlobalAddrOffOnly; def : WaitPatGlobalAddrOffOnly; } // Predicates = [HasAtomics] +//===----------------------------------------------------------------------===// +// Atomic fences +//===----------------------------------------------------------------------===// + +// A compiler fence instruction that prevents reordering of instructions. +let Defs = [ARGUMENTS] in { +let isPseudo = 1, hasSideEffects = 1 in +defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">; +let hasSideEffects = 1 in +defm ATOMIC_FENCE : ATOMIC_NRI<(outs), (ins i8imm:$flags), [], "atomic.fence", + 0x03>; +} // Defs = [ARGUMENTS] + //===----------------------------------------------------------------------===// // Atomic loads //===----------------------------------------------------------------------===// @@ -151,9 +151,6 @@ def : LoadPatImmOff; def : LoadPatImmOff; def : LoadPatImmOff; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; - // Select loads with just a constant offset. def : LoadPatOffsetOnly; def : LoadPatOffsetOnly; @@ -244,16 +241,6 @@ def : LoadPatImmOff; def : LoadPatImmOff; // No 32->64 patterns, just use i32.atomic.load and i64.extend_s/i64 -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; - // Extending loads with just a constant offset def : LoadPatOffsetOnly; def : LoadPatOffsetOnly; @@ -313,13 +300,6 @@ def : AStorePatImmOff; def : AStorePatImmOff; def : AStorePatImmOff; -class AStorePatGlobalAddr : - Pat<(kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)), - ty:$val), - (inst 0, tglobaladdr:$off, I32:$addr, ty:$val)>; -def : AStorePatGlobalAddr; -def : AStorePatGlobalAddr; - // Select stores with just a constant offset. class AStorePatOffsetOnly : Pat<(kind imm:$off, ty:$val), (inst 0, imm:$off, (CONST_I32 0), ty:$val)>; @@ -374,12 +354,6 @@ def : AStorePatImmOff; def : AStorePatImmOff; def : AStorePatImmOff; -def : AStorePatGlobalAddr; -def : AStorePatGlobalAddr; -def : AStorePatGlobalAddr; -def : AStorePatGlobalAddr; -def : AStorePatGlobalAddr; - // Truncating stores with just a constant offset def : AStorePatOffsetOnly; def : AStorePatOffsetOnly; @@ -500,11 +474,6 @@ class BinRMWPatImmOff : Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$val)), (inst 0, imm:$off, I32:$addr, ty:$val)>; -class BinRMWPatGlobalAddr : - Pat<(ty (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)), - ty:$val)), - (inst 0, tglobaladdr:$off, I32:$addr, ty:$val)>; - // Select binary RMWs with just a constant offset. class BinRMWPatOffsetOnly : Pat<(ty (kind imm:$off, ty:$val)), @@ -525,9 +494,6 @@ multiclass BinRMWPattern; def : BinRMWPatImmOff; - def : BinRMWPatGlobalAddr; - def : BinRMWPatGlobalAddr; - def : BinRMWPatOffsetOnly; def : BinRMWPatOffsetOnly; @@ -622,17 +588,6 @@ multiclass BinRMWTruncExtPattern< def : BinRMWPatImmOff, or_is_add, inst8_64>; def : BinRMWPatImmOff, or_is_add, inst16_64>; - def : BinRMWPatGlobalAddr, inst8_32>; - def : BinRMWPatGlobalAddr, inst16_32>; - def : BinRMWPatGlobalAddr, inst8_64>; - def : BinRMWPatGlobalAddr, inst16_64>; - def : BinRMWPatGlobalAddr, inst32_64>; - - def : BinRMWPatGlobalAddr, inst8_32>; - def : BinRMWPatGlobalAddr, inst16_32>; - def : BinRMWPatGlobalAddr, inst8_64>; - def : BinRMWPatGlobalAddr, inst16_64>; - // Truncating-extending binary RMWs with just a constant offset def : BinRMWPatOffsetOnly, inst8_32>; def : BinRMWPatOffsetOnly, inst16_32>; @@ -732,11 +687,6 @@ class TerRMWPatImmOff : Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$exp, ty:$new)), (inst 0, imm:$off, I32:$addr, ty:$exp, ty:$new)>; -class TerRMWPatGlobalAddr : - Pat<(ty (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)), - ty:$exp, ty:$new)), - (inst 0, tglobaladdr:$off, I32:$addr, ty:$exp, ty:$new)>; - // Select ternary RMWs with just a constant offset. class TerRMWPatOffsetOnly : Pat<(ty (kind imm:$off, ty:$exp, ty:$new)), @@ -757,9 +707,6 @@ multiclass TerRMWPattern; def : TerRMWPatImmOff; - def : TerRMWPatGlobalAddr; - def : TerRMWPatGlobalAddr; - def : TerRMWPatOffsetOnly; def : TerRMWPatOffsetOnly; @@ -846,17 +793,6 @@ multiclass TerRMWTruncExtPattern< def : TerRMWPatImmOff, or_is_add, inst8_64>; def : TerRMWPatImmOff, or_is_add, inst16_64>; - def : TerRMWPatGlobalAddr, inst8_32>; - def : TerRMWPatGlobalAddr, inst16_32>; - def : TerRMWPatGlobalAddr, inst8_64>; - def : TerRMWPatGlobalAddr, inst16_64>; - def : TerRMWPatGlobalAddr, inst32_64>; - - def : TerRMWPatGlobalAddr, inst8_32>; - def : TerRMWPatGlobalAddr, inst16_32>; - def : TerRMWPatGlobalAddr, inst8_64>; - def : TerRMWPatGlobalAddr, inst16_64>; - // Truncating-extending ternary RMWs with just a constant offset def : TerRMWPatOffsetOnly, inst8_32>; def : TerRMWPatOffsetOnly, inst16_32>; @@ -887,13 +823,3 @@ defm : TerRMWTruncExtPattern< ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32, ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64, ATOMIC_RMW32_U_CMPXCHG_I64>; - -//===----------------------------------------------------------------------===// -// Atomic fences -//===----------------------------------------------------------------------===// - -// A compiler fence instruction that prevents reordering of instructions. -let Defs = [ARGUMENTS] in { -let isPseudo = 1, hasSideEffects = 1 in -defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">; -} // Defs = [ARGUMENTS] diff --git a/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td index f4352e3d12e..05735cf6d31 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td @@ -39,7 +39,7 @@ defm MEMORY_INIT : (ins i32imm_op:$seg, i32imm_op:$idx, I32:$dest, I32:$offset, I32:$size), (outs), (ins i32imm_op:$seg, i32imm_op:$idx), - [(int_wasm_memory_init (i32 imm:$seg), (i32 imm:$idx), I32:$dest, + [(int_wasm_memory_init (i32 timm:$seg), (i32 timm:$idx), I32:$dest, I32:$offset, I32:$size )], "memory.init\t$seg, $idx, $dest, $offset, $size", @@ -48,7 +48,7 @@ defm MEMORY_INIT : let hasSideEffects = 1 in defm DATA_DROP : BULK_I<(outs), (ins i32imm_op:$seg), (outs), (ins i32imm_op:$seg), - [(int_wasm_data_drop (i32 imm:$seg))], + [(int_wasm_data_drop (i32 timm:$seg))], "data.drop\t$seg", "data.drop\t$seg", 0x09>; let mayLoad = 1, mayStore = 1 in diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td index 1870c5bc34b..1afc9a8790d 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -84,49 +84,19 @@ let isTerminator = 1, isBarrier = 1 in defm END_FUNCTION : NRI<(outs), (ins), [], "end_function", 0x0b>; } // Uses = [VALUE_STACK], Defs = [VALUE_STACK] -multiclass RETURN { - defm RETURN_#vt : I<(outs), (ins vt:$val), (outs), (ins), - [(WebAssemblyreturn vt:$val)], - "return \t$val", "return", 0x0f>; - // Equivalent to RETURN_#vt, for use at the end of a function when wasm - // semantics return by falling off the end of the block. - let isCodeGenOnly = 1 in - defm FALLTHROUGH_RETURN_#vt : I<(outs), (ins vt:$val), (outs), (ins), []>; -} - -multiclass SIMD_RETURN { - defm RETURN_#vt : I<(outs), (ins V128:$val), (outs), (ins), - [(WebAssemblyreturn (vt V128:$val))], - "return \t$val", "return", 0x0f>, - Requires<[HasSIMD128]>; - // Equivalent to RETURN_#vt, for use at the end of a function when wasm - // semantics return by falling off the end of the block. - let isCodeGenOnly = 1 in - defm FALLTHROUGH_RETURN_#vt : I<(outs), (ins V128:$val), (outs), (ins), - []>, - Requires<[HasSIMD128]>; -} let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { let isReturn = 1 in { - defm "": RETURN; - defm "": RETURN; - defm "": RETURN; - defm "": RETURN; - defm "": RETURN; - defm "": SIMD_RETURN; - defm "": SIMD_RETURN; - defm "": SIMD_RETURN; - defm "": SIMD_RETURN; - defm "": SIMD_RETURN; - defm "": SIMD_RETURN; - defm RETURN_VOID : NRI<(outs), (ins), [(WebAssemblyreturn)], "return", 0x0f>; +defm RETURN : I<(outs), (ins variable_ops), (outs), (ins), + [(WebAssemblyreturn)], + "return", "return", 0x0f>; +// Equivalent to RETURN, for use at the end of a function when wasm +// semantics return by falling off the end of the block. +let isCodeGenOnly = 1 in +defm FALLTHROUGH_RETURN : I<(outs), (ins variable_ops), (outs), (ins), []>; - // This is to RETURN_VOID what FALLTHROUGH_RETURN_#vt is to RETURN_#vt. - let isCodeGenOnly = 1 in - defm FALLTHROUGH_RETURN_VOID : NRI<(outs), (ins), []>; } // isReturn = 1 defm UNREACHABLE : NRI<(outs), (ins), [(trap)], "unreachable", 0x00>; diff --git a/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/lib/Target/WebAssembly/WebAssemblyInstrConv.td index 661fee2715b..f3d9c5d5032 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrConv.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrConv.td @@ -171,6 +171,23 @@ defm I64_TRUNC_U_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins), 0xb1>; } // hasSideEffects = 1 +def : Pat<(int_wasm_trunc_signed F32:$src), + (I32_TRUNC_S_F32 F32:$src)>; +def : Pat<(int_wasm_trunc_unsigned F32:$src), + (I32_TRUNC_U_F32 F32:$src)>; +def : Pat<(int_wasm_trunc_signed F64:$src), + (I32_TRUNC_S_F64 F64:$src)>; +def : Pat<(int_wasm_trunc_unsigned F64:$src), + (I32_TRUNC_U_F64 F64:$src)>; +def : Pat<(int_wasm_trunc_signed F32:$src), + (I64_TRUNC_S_F32 F32:$src)>; +def : Pat<(int_wasm_trunc_unsigned F32:$src), + (I64_TRUNC_U_F32 F32:$src)>; +def : Pat<(int_wasm_trunc_signed F64:$src), + (I64_TRUNC_S_F64 F64:$src)>; +def : Pat<(int_wasm_trunc_unsigned F64:$src), + (I64_TRUNC_U_F64 F64:$src)>; + defm F32_CONVERT_S_I32 : I<(outs F32:$dst), (ins I32:$src), (outs), (ins), [(set F32:$dst, (sint_to_fp I32:$src))], "f32.convert_i32_s\t$dst, $src", "f32.convert_i32_s", diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index a86c9af28f0..8e8126c90e7 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -38,7 +38,7 @@ WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI) RI(STI.getTargetTriple()) {} bool WebAssemblyInstrInfo::isReallyTriviallyReMaterializable( - const MachineInstr &MI, AliasAnalysis *AA) const { + const MachineInstr &MI, AAResults *AA) const { switch (MI.getOpcode()) { case WebAssembly::CONST_I32: case WebAssembly::CONST_I64: @@ -60,7 +60,7 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // exist. However we need to handle both here. auto &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterClass *RC = - TargetRegisterInfo::isVirtualRegister(DestReg) + Register::isVirtualRegister(DestReg) ? MRI.getRegClass(DestReg) : MRI.getTargetRegisterInfo()->getMinimalPhysRegClass(DestReg); diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h index df1051b4f42..fe6211663c3 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -43,7 +43,7 @@ public: const WebAssemblyRegisterInfo &getRegisterInfo() const { return RI; } bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const override; + AAResults *AA) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index 73ddbe85d55..04490148138 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -106,7 +106,8 @@ def WebAssemblybr_table : SDNode<"WebAssemblyISD::BR_TABLE", def WebAssemblyargument : SDNode<"WebAssemblyISD::ARGUMENT", SDT_WebAssemblyArgument>; def WebAssemblyreturn : SDNode<"WebAssemblyISD::RETURN", - SDT_WebAssemblyReturn, [SDNPHasChain]>; + SDT_WebAssemblyReturn, + [SDNPHasChain, SDNPVariadic]>; def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper", SDT_WebAssemblyWrapper>; def WebAssemblywrapperPIC : SDNode<"WebAssemblyISD::WrapperPIC", diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index 6916b165f97..eba9b80d328 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -37,16 +37,6 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ return (~Known0.Zero & ~Known1.Zero) == 0; }]>; -// GlobalAddresses are conceptually unsigned values, so we can also fold them -// into immediate values as long as the add is 'nuw'. -// TODO: We'd like to also match GA offsets but there are cases where the -// register can have a negative value. Find out what more we can do. -def regPlusGA : PatFrag<(ops node:$addr, node:$off), - (add node:$addr, node:$off), - [{ - return N->getFlags().hasNoUnsignedWrap(); -}]>; - // We don't need a regPlusES because external symbols never have constant // offsets folded into them, so we can just use add. @@ -93,15 +83,6 @@ def : LoadPatImmOff; def : LoadPatImmOff; def : LoadPatImmOff; -class LoadPatGlobalAddr : - Pat<(ty (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (inst 0, tglobaladdr:$off, I32:$addr)>, Requires<[IsNotPIC]>; - -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; - // Select loads with just a constant offset. class LoadPatOffsetOnly : Pat<(ty (kind imm:$off)), (inst 0, imm:$off, (CONST_I32 0))>; @@ -167,18 +148,6 @@ def : LoadPatImmOff; def : LoadPatImmOff; def : LoadPatImmOff; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; - -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; - // Select extending loads with just a constant offset. def : LoadPatOffsetOnly; def : LoadPatOffsetOnly; @@ -224,11 +193,6 @@ def : LoadPatImmOff; def : LoadPatImmOff; def : LoadPatImmOff; def : LoadPatImmOff; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; // Select "don't care" extending loads with just a constant offset. def : LoadPatOffsetOnly; @@ -282,15 +246,6 @@ def : StorePatImmOff; def : StorePatImmOff; def : StorePatImmOff; -class StorePatGlobalAddr : - Pat<(kind ty:$val, - (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (inst 0, tglobaladdr:$off, I32:$addr, ty:$val)>, Requires<[IsNotPIC]>; -def : StorePatGlobalAddr; -def : StorePatGlobalAddr; -def : StorePatGlobalAddr; -def : StorePatGlobalAddr; - // Select stores with just a constant offset. class StorePatOffsetOnly : Pat<(kind ty:$val, imm:$off), (inst 0, imm:$off, (CONST_I32 0), ty:$val)>; @@ -333,12 +288,6 @@ def : StorePatImmOff; def : StorePatImmOff; def : StorePatImmOff; -def : StorePatGlobalAddr; -def : StorePatGlobalAddr; -def : StorePatGlobalAddr; -def : StorePatGlobalAddr; -def : StorePatGlobalAddr; - // Select truncating stores with just a constant offset. def : StorePatOffsetOnly; def : StorePatOffsetOnly; diff --git a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index dd8930f079b..fc5d73dac52 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -40,47 +40,124 @@ def LaneIdx#SIZE : ImmLeaf; //===----------------------------------------------------------------------===// // Load: v128.load -multiclass SIMDLoad { - let mayLoad = 1, UseNamedOperandTable = 1 in - defm LOAD_#vec_t : +let mayLoad = 1, UseNamedOperandTable = 1 in +defm LOAD_V128 : + SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + "v128.load\t$dst, ${off}(${addr})$p2align", + "v128.load\t$off$p2align", 0>; + +// Def load and store patterns from WebAssemblyInstrMemory.td for vector types +foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { +def : LoadPatNoOffset; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatOffsetOnly; +def : LoadPatGlobalAddrOffOnly; +} + +// vNxM.load_splat +multiclass SIMDLoadSplat simdop> { + let mayLoad = 1, UseNamedOperandTable = 1, + Predicates = [HasUnimplementedSIMD128] in + defm LOAD_SPLAT_#vec : SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), (outs), (ins P2Align:$p2align, offset32_op:$off), [], - "v128.load\t$dst, ${off}(${addr})$p2align", - "v128.load\t$off$p2align", 0>; + vec#".load_splat\t$dst, ${off}(${addr})$p2align", + vec#".load_splat\t$off$p2align", simdop>; } -foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { -defm "" : SIMDLoad; +defm "" : SIMDLoadSplat<"v8x16", 194>; +defm "" : SIMDLoadSplat<"v16x8", 195>; +defm "" : SIMDLoadSplat<"v32x4", 196>; +defm "" : SIMDLoadSplat<"v64x2", 197>; -// Def load and store patterns from WebAssemblyInstrMemory.td for vector types -def : LoadPatNoOffset("LOAD_"#vec_t)>; -def : LoadPatImmOff("LOAD_"#vec_t)>; -def : LoadPatImmOff("LOAD_"#vec_t)>; -def : LoadPatGlobalAddr("LOAD_"#vec_t)>; -def : LoadPatOffsetOnly("LOAD_"#vec_t)>; -def : LoadPatGlobalAddrOffOnly("LOAD_"#vec_t)>; +def wasm_load_splat_t : SDTypeProfile<1, 1, []>; +def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t>; + +foreach args = [["v16i8", "i32", "extloadi8"], ["v8i16", "i32", "extloadi16"], + ["v4i32", "i32", "load"], ["v2i64", "i64", "load"], + ["v4f32", "f32", "load"], ["v2f64", "f64", "load"]] in +def load_splat_#args[0] : + PatFrag<(ops node:$addr), (wasm_load_splat + (!cast(args[1]) (!cast(args[2]) node:$addr)))>; + +let Predicates = [HasUnimplementedSIMD128] in +foreach args = [["v16i8", "v8x16"], ["v8i16", "v16x8"], ["v4i32", "v32x4"], + ["v2i64", "v64x2"], ["v4f32", "v32x4"], ["v2f64", "v64x2"]] in { +def : LoadPatNoOffset(args[0]), + !cast("load_splat_"#args[0]), + !cast("LOAD_SPLAT_"#args[1])>; +def : LoadPatImmOff(args[0]), + !cast("load_splat_"#args[0]), + regPlusImm, + !cast("LOAD_SPLAT_"#args[1])>; +def : LoadPatImmOff(args[0]), + !cast("load_splat_"#args[0]), + or_is_add, + !cast("LOAD_SPLAT_"#args[1])>; +def : LoadPatOffsetOnly(args[0]), + !cast("load_splat_"#args[0]), + !cast("LOAD_SPLAT_"#args[1])>; +def : LoadPatGlobalAddrOffOnly(args[0]), + !cast("load_splat_"#args[0]), + !cast("LOAD_SPLAT_"#args[1])>; } +// Load and extend +multiclass SIMDLoadExtend simdop> { + let mayLoad = 1, UseNamedOperandTable = 1, + Predicates = [HasUnimplementedSIMD128] in { + defm LOAD_EXTEND_S_#vec_t : + SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + name#"_s\t$dst, ${off}(${addr})$p2align", + name#"_s\t$off$p2align", simdop>; + defm LOAD_EXTEND_U_#vec_t : + SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + name#"_u\t$dst, ${off}(${addr})$p2align", + name#"_u\t$off$p2align", !add(simdop, 1)>; + } +} + +defm "" : SIMDLoadExtend; +defm "" : SIMDLoadExtend; +defm "" : SIMDLoadExtend; + +let Predicates = [HasUnimplementedSIMD128] in +foreach types = [[v8i16, i8], [v4i32, i16], [v2i64, i32]] in +foreach exts = [["sextloadv", "_S"], + ["zextloadv", "_U"], + ["extloadv", "_U"]] in { +def : LoadPatNoOffset(exts[0]#types[1]), + !cast("LOAD_EXTEND"#exts[1]#"_"#types[0])>; +def : LoadPatImmOff(exts[0]#types[1]), regPlusImm, + !cast("LOAD_EXTEND"#exts[1]#"_"#types[0])>; +def : LoadPatImmOff(exts[0]#types[1]), or_is_add, + !cast("LOAD_EXTEND"#exts[1]#"_"#types[0])>; +def : LoadPatOffsetOnly(exts[0]#types[1]), + !cast("LOAD_EXTEND"#exts[1]#"_"#types[0])>; +def : LoadPatGlobalAddrOffOnly(exts[0]#types[1]), + !cast("LOAD_EXTEND"#exts[1]#"_"#types[0])>; +} + + // Store: v128.store -multiclass SIMDStore { - let mayStore = 1, UseNamedOperandTable = 1 in - defm STORE_#vec_t : - SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec), - (outs), (ins P2Align:$p2align, offset32_op:$off), [], - "v128.store\t${off}(${addr})$p2align, $vec", - "v128.store\t$off$p2align", 1>; -} +let mayStore = 1, UseNamedOperandTable = 1 in +defm STORE_V128 : + SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + "v128.store\t${off}(${addr})$p2align, $vec", + "v128.store\t$off$p2align", 1>; foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { -defm "" : SIMDStore; - // Def load and store patterns from WebAssemblyInstrMemory.td for vector types -def : StorePatNoOffset("STORE_"#vec_t)>; -def : StorePatImmOff("STORE_"#vec_t)>; -def : StorePatImmOff("STORE_"#vec_t)>; -def : StorePatGlobalAddr("STORE_"#vec_t)>; -def : StorePatOffsetOnly("STORE_"#vec_t)>; -def : StorePatGlobalAddrOffOnly("STORE_"#vec_t)>; +def : StorePatNoOffset; +def : StorePatImmOff; +def : StorePatImmOff; +def : StorePatOffsetOnly; +def : StorePatGlobalAddrOffOnly; } //===----------------------------------------------------------------------===// @@ -90,7 +167,7 @@ def : StorePatGlobalAddrOffOnly("STORE_"#vec_t)>; // Constant: v128.const multiclass ConstVec { let isMoveImm = 1, isReMaterializable = 1, - Predicates = [HasSIMD128, HasUnimplementedSIMD128] in + Predicates = [HasUnimplementedSIMD128] in defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops, [(set V128:$dst, (vec_t pat))], "v128.const\t$dst, "#args, @@ -198,6 +275,19 @@ def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y), (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF)))>; } +// Swizzle lanes: v8x16.swizzle +def wasm_swizzle_t : SDTypeProfile<1, 2, []>; +def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>; +let Predicates = [HasUnimplementedSIMD128] in +defm SWIZZLE : + SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), + [(set (v16i8 V128:$dst), + (wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], + "v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 192>; + +def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)), + (SWIZZLE V128:$src, V128:$mask)>; + // Create vector with identical lanes: splat def splat2 : PatFrag<(ops node:$x), (build_vector node:$x, node:$x)>; def splat4 : PatFrag<(ops node:$x), (build_vector @@ -286,7 +376,7 @@ multiclass ExtractLaneExtended baseInst> { } defm "" : ExtractLaneExtended<"_s", 5>; -let Predicates = [HasSIMD128, HasUnimplementedSIMD128] in +let Predicates = [HasUnimplementedSIMD128] in defm "" : ExtractLaneExtended<"_u", 6>; defm "" : ExtractLane; defm "" : ExtractLane; @@ -472,6 +562,11 @@ defm OR : SIMDBitwise; defm XOR : SIMDBitwise; } // isCommutable = 1 +// Bitwise logic: v128.andnot +def andnot : PatFrag<(ops node:$left, node:$right), (and $left, (vnot $right))>; +let Predicates = [HasUnimplementedSIMD128] in +defm ANDNOT : SIMDBitwise; + // Bitwise select: v128.bitselect foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in defm BITSELECT_#vec_t : @@ -655,7 +750,7 @@ defm ABS : SIMDUnaryFP; defm NEG : SIMDUnaryFP; // Square root: sqrt -let Predicates = [HasSIMD128, HasUnimplementedSIMD128] in +let Predicates = [HasUnimplementedSIMD128] in defm SQRT : SIMDUnaryFP; //===----------------------------------------------------------------------===// @@ -679,7 +774,7 @@ let isCommutable = 1 in defm MUL : SIMDBinaryFP; // Division: div -let Predicates = [HasSIMD128, HasUnimplementedSIMD128] in +let Predicates = [HasUnimplementedSIMD128] in defm DIV : SIMDBinaryFP; // NaN-propagating minimum: min @@ -712,6 +807,42 @@ defm "" : SIMDConvert; defm "" : SIMDConvert; defm "" : SIMDConvert; +// Widening operations +multiclass SIMDWiden baseInst> { + defm "" : SIMDConvert; + defm "" : SIMDConvert; + defm "" : SIMDConvert; + defm "" : SIMDConvert; +} + +defm "" : SIMDWiden; +defm "" : SIMDWiden; + +// Narrowing operations +multiclass SIMDNarrow baseInst> { + defm NARROW_S_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins), + [(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_signed + (arg_t V128:$low), (arg_t V128:$high))))], + vec#".narrow_"#arg#"_s\t$dst, $low, $high", vec#".narrow_"#arg#"_s", + baseInst>; + defm NARROW_U_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins), + [(set (vec_t V128:$dst), (vec_t (int_wasm_narrow_unsigned + (arg_t V128:$low), (arg_t V128:$high))))], + vec#".narrow_"#arg#"_u\t$dst, $low, $high", vec#".narrow_"#arg#"_u", + !add(baseInst, 1)>; +} + +defm "" : SIMDNarrow; +defm "" : SIMDNarrow; + // Lower llvm.wasm.trunc.saturate.* to saturating instructions def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))), (fp_to_sint_v4i32_v4f32 (v4f32 V128:$src))>; @@ -732,3 +863,25 @@ foreach t2 = !foldl( ) ) in def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>; + +//===----------------------------------------------------------------------===// +// Quasi-Fused Multiply- Add and Subtract (QFMA/QFMS) +//===----------------------------------------------------------------------===// + +multiclass SIMDQFM baseInst> { + defm QFMA_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), + (outs), (ins), + [(set (vec_t V128:$dst), + (int_wasm_qfma (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], + vec#".qfma\t$dst, $a, $b, $c", vec#".qfma", baseInst>; + defm QFMS_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), + (outs), (ins), + [(set (vec_t V128:$dst), + (int_wasm_qfms (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], + vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", !add(baseInst, 1)>; +} + +defm "" : SIMDQFM; +defm "" : SIMDQFM; diff --git a/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp index e92b3443027..75d04252cbe 100644 --- a/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp +++ b/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/WasmEHFuncInfo.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/Debug.h" using namespace llvm; #define DEBUG_TYPE "wasm-late-eh-prepare" @@ -131,7 +132,7 @@ bool WebAssemblyLateEHPrepare::addCatches(MachineFunction &MF) { auto InsertPos = MBB.begin(); if (InsertPos->isEHLabel()) // EH pad starts with an EH label ++InsertPos; - unsigned DstReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass); + Register DstReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass); BuildMI(MBB, InsertPos, MBB.begin()->getDebugLoc(), TII.get(WebAssembly::CATCH), DstReg); } @@ -168,7 +169,7 @@ bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) { if (CatchPos->isEHLabel()) // EH pad starts with an EH label ++CatchPos; MachineInstr *Catch = &*CatchPos; - unsigned ExnReg = Catch->getOperand(0).getReg(); + Register ExnReg = Catch->getOperand(0).getReg(); BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW)) .addReg(ExnReg); TI->eraseFromParent(); @@ -233,6 +234,7 @@ bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables( // it. The pseudo instruction will be deleted later. bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) { const auto &TII = *MF.getSubtarget().getInstrInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); auto *EHInfo = MF.getWasmEHFuncInfo(); SmallVector ExtractInstrs; SmallVector ToDelete; @@ -292,7 +294,7 @@ bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) { // thenbb: // %exn:i32 = extract_exception // ... use exn ... - unsigned ExnReg = Catch->getOperand(0).getReg(); + Register ExnReg = Catch->getOperand(0).getReg(); auto *ThenMBB = MF.CreateMachineBasicBlock(); auto *ElseMBB = MF.CreateMachineBasicBlock(); MF.insert(std::next(MachineFunction::iterator(EHPad)), ElseMBB); @@ -339,9 +341,11 @@ bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) { WebAssembly::ClangCallTerminateFn); assert(ClangCallTerminateFn && "There is no __clang_call_terminate() function"); + Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + BuildMI(ElseMBB, DL, TII.get(WebAssembly::CONST_I32), Reg).addImm(0); BuildMI(ElseMBB, DL, TII.get(WebAssembly::CALL_VOID)) .addGlobalAddress(ClangCallTerminateFn) - .addImm(0); + .addReg(Reg); BuildMI(ElseMBB, DL, TII.get(WebAssembly::UNREACHABLE)); } else { diff --git a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp index 34a8195ac4b..4314aa61154 100644 --- a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp +++ b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp @@ -68,7 +68,7 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) { if (MI->getOpcode() != WebAssembly::BR_UNLESS) continue; - unsigned Cond = MI->getOperand(1).getReg(); + Register Cond = MI->getOperand(1).getReg(); bool Inverted = false; // Attempt to invert the condition in place. @@ -188,7 +188,7 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) { // If we weren't able to invert the condition in place. Insert an // instruction to invert it. if (!Inverted) { - unsigned Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + Register Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass); BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQZ_I32), Tmp) .addReg(Cond); MFI.stackifyVReg(Tmp); diff --git a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index 960d5134f6e..1cf397dd060 100644 --- a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -227,15 +227,6 @@ static cl::list namespace { class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass { - static const char *ResumeFName; - static const char *EHTypeIDFName; - static const char *EmLongjmpFName; - static const char *EmLongjmpJmpbufFName; - static const char *SaveSetjmpFName; - static const char *TestSetjmpFName; - static const char *FindMatchingCatchPrefix; - static const char *InvokePrefix; - bool EnableEH; // Enable exception handling bool EnableSjLj; // Enable setjmp/longjmp handling @@ -274,6 +265,7 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass { bool areAllExceptionsAllowed() const { return EHWhitelistSet.empty(); } bool canLongjmp(Module &M, const Value *Callee) const; + bool isEmAsmCall(Module &M, const Value *Callee) const; void rebuildSSA(Function &F); @@ -292,19 +284,6 @@ public: }; } // End anonymous namespace -const char *WebAssemblyLowerEmscriptenEHSjLj::ResumeFName = "__resumeException"; -const char *WebAssemblyLowerEmscriptenEHSjLj::EHTypeIDFName = - "llvm_eh_typeid_for"; -const char *WebAssemblyLowerEmscriptenEHSjLj::EmLongjmpFName = - "emscripten_longjmp"; -const char *WebAssemblyLowerEmscriptenEHSjLj::EmLongjmpJmpbufFName = - "emscripten_longjmp_jmpbuf"; -const char *WebAssemblyLowerEmscriptenEHSjLj::SaveSetjmpFName = "saveSetjmp"; -const char *WebAssemblyLowerEmscriptenEHSjLj::TestSetjmpFName = "testSetjmp"; -const char *WebAssemblyLowerEmscriptenEHSjLj::FindMatchingCatchPrefix = - "__cxa_find_matching_catch_"; -const char *WebAssemblyLowerEmscriptenEHSjLj::InvokePrefix = "__invoke_"; - char WebAssemblyLowerEmscriptenEHSjLj::ID = 0; INITIALIZE_PASS(WebAssemblyLowerEmscriptenEHSjLj, DEBUG_TYPE, "WebAssembly Lower Emscripten Exceptions / Setjmp / Longjmp", @@ -335,7 +314,8 @@ static bool canThrow(const Value *V) { static GlobalVariable *getGlobalVariableI32(Module &M, IRBuilder<> &IRB, const char *Name) { - auto* GV = dyn_cast(M.getOrInsertGlobal(Name, IRB.getInt32Ty())); + auto *GV = + dyn_cast(M.getOrInsertGlobal(Name, IRB.getInt32Ty())); if (!GV) report_fatal_error(Twine("unable to create global: ") + Name); @@ -376,9 +356,9 @@ WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M, PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext()); SmallVector Args(NumClauses, Int8PtrTy); FunctionType *FTy = FunctionType::get(Int8PtrTy, Args, false); - Function *F = - Function::Create(FTy, GlobalValue::ExternalLinkage, - FindMatchingCatchPrefix + Twine(NumClauses + 2), &M); + Function *F = Function::Create( + FTy, GlobalValue::ExternalLinkage, + "__cxa_find_matching_catch_" + Twine(NumClauses + 2), &M); FindMatchingCatches[NumClauses] = F; return F; } @@ -418,7 +398,7 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) { Args.append(CI->arg_begin(), CI->arg_end()); CallInst *NewCall = IRB.CreateCall(getInvokeWrapper(CI), Args); NewCall->takeName(CI); - NewCall->setCallingConv(CI->getCallingConv()); + NewCall->setCallingConv(CallingConv::WASM_EmscriptenInvoke); NewCall->setDebugLoc(CI->getDebugLoc()); // Because we added the pointer to the callee as first argument, all @@ -432,9 +412,22 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) { for (unsigned I = 0, E = CI->getNumArgOperands(); I < E; ++I) ArgAttributes.push_back(InvokeAL.getParamAttributes(I)); + AttrBuilder FnAttrs(InvokeAL.getFnAttributes()); + if (FnAttrs.contains(Attribute::AllocSize)) { + // The allocsize attribute (if any) referes to parameters by index and needs + // to be adjusted. + unsigned SizeArg; + Optional NEltArg; + std::tie(SizeArg, NEltArg) = FnAttrs.getAllocSizeArgs(); + SizeArg += 1; + if (NEltArg.hasValue()) + NEltArg = NEltArg.getValue() + 1; + FnAttrs.addAllocSizeAttr(SizeArg, NEltArg); + } + // Reconstruct the AttributesList based on the vector we constructed. AttributeList NewCallAL = - AttributeList::get(C, InvokeAL.getFnAttributes(), + AttributeList::get(C, AttributeSet::get(C, FnAttrs), InvokeAL.getRetAttributes(), ArgAttributes); NewCall->setAttributes(NewCallAL); @@ -473,8 +466,8 @@ Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallOrInvoke *CI) { FunctionType *FTy = FunctionType::get(CalleeFTy->getReturnType(), ArgTys, CalleeFTy->isVarArg()); - Function *F = Function::Create(FTy, GlobalValue::ExternalLinkage, - InvokePrefix + Sig, M); + Function *F = + Function::Create(FTy, GlobalValue::ExternalLinkage, "__invoke_" + Sig, M); InvokeWrappers[Sig] = F; return F; } @@ -491,39 +484,44 @@ bool WebAssemblyLowerEmscriptenEHSjLj::canLongjmp(Module &M, // and can't be passed by pointer. The result is a crash with illegal IR. if (isa(Callee)) return false; + StringRef CalleeName = Callee->getName(); // The reason we include malloc/free here is to exclude the malloc/free // calls generated in setjmp prep / cleanup routines. - Function *SetjmpF = M.getFunction("setjmp"); - Function *MallocF = M.getFunction("malloc"); - Function *FreeF = M.getFunction("free"); - if (Callee == SetjmpF || Callee == MallocF || Callee == FreeF) + if (CalleeName == "setjmp" || CalleeName == "malloc" || CalleeName == "free") return false; // There are functions in JS glue code - if (Callee == ResumeF || Callee == EHTypeIDF || Callee == SaveSetjmpF || - Callee == TestSetjmpF) + if (CalleeName == "__resumeException" || CalleeName == "llvm_eh_typeid_for" || + CalleeName == "saveSetjmp" || CalleeName == "testSetjmp" || + CalleeName == "getTempRet0" || CalleeName == "setTempRet0") return false; // __cxa_find_matching_catch_N functions cannot longjmp - if (Callee->getName().startswith(FindMatchingCatchPrefix)) + if (Callee->getName().startswith("__cxa_find_matching_catch_")) return false; // Exception-catching related functions - Function *BeginCatchF = M.getFunction("__cxa_begin_catch"); - Function *EndCatchF = M.getFunction("__cxa_end_catch"); - Function *AllocExceptionF = M.getFunction("__cxa_allocate_exception"); - Function *ThrowF = M.getFunction("__cxa_throw"); - Function *TerminateF = M.getFunction("__clang_call_terminate"); - if (Callee == BeginCatchF || Callee == EndCatchF || - Callee == AllocExceptionF || Callee == ThrowF || Callee == TerminateF || - Callee == GetTempRet0Func || Callee == SetTempRet0Func) + if (CalleeName == "__cxa_begin_catch" || CalleeName == "__cxa_end_catch" || + CalleeName == "__cxa_allocate_exception" || CalleeName == "__cxa_throw" || + CalleeName == "__clang_call_terminate") return false; // Otherwise we don't know return true; } +bool WebAssemblyLowerEmscriptenEHSjLj::isEmAsmCall(Module &M, + const Value *Callee) const { + StringRef CalleeName = Callee->getName(); + // This is an exhaustive list from Emscripten's . + return CalleeName == "emscripten_asm_const_int" || + CalleeName == "emscripten_asm_const_double" || + CalleeName == "emscripten_asm_const_int_sync_on_main_thread" || + CalleeName == "emscripten_asm_const_double_sync_on_main_thread" || + CalleeName == "emscripten_asm_const_async_on_main_thread"; +} + // Generate testSetjmp function call seqence with preamble and postamble. // The code this generates is equivalent to the following JavaScript code: // if (%__THREW__.val != 0 & threwValue != 0) { @@ -605,15 +603,12 @@ void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) { SSAUpdater SSA; for (BasicBlock &BB : F) { for (Instruction &I : BB) { + SSA.Initialize(I.getType(), I.getName()); + SSA.AddAvailableValue(&BB, &I); for (auto UI = I.use_begin(), UE = I.use_end(); UI != UE;) { Use &U = *UI; ++UI; - SSA.Initialize(I.getType(), I.getName()); - SSA.AddAvailableValue(&BB, &I); auto *User = cast(U.getUser()); - if (User->getParent() == &BB) - continue; - if (auto *UserPN = dyn_cast(User)) if (UserPN->getIncomingBlock(U) == &BB) continue; @@ -660,13 +655,13 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { FunctionType *ResumeFTy = FunctionType::get(IRB.getVoidTy(), IRB.getInt8PtrTy(), false); ResumeF = Function::Create(ResumeFTy, GlobalValue::ExternalLinkage, - ResumeFName, &M); + "__resumeException", &M); // Register llvm_eh_typeid_for function FunctionType *EHTypeIDTy = FunctionType::get(IRB.getInt32Ty(), IRB.getInt8PtrTy(), false); EHTypeIDF = Function::Create(EHTypeIDTy, GlobalValue::ExternalLinkage, - EHTypeIDFName, &M); + "llvm_eh_typeid_for", &M); for (Function &F : M) { if (F.isDeclaration()) @@ -684,7 +679,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { // defined in JS code EmLongjmpJmpbufF = Function::Create(LongjmpF->getFunctionType(), GlobalValue::ExternalLinkage, - EmLongjmpJmpbufFName, &M); + "emscripten_longjmp_jmpbuf", &M); LongjmpF->replaceAllUsesWith(EmLongjmpJmpbufF); } @@ -697,19 +692,19 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { IRB.getInt32Ty()}; FunctionType *FTy = FunctionType::get(Type::getInt32PtrTy(C), Params, false); - SaveSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage, - SaveSetjmpFName, &M); + SaveSetjmpF = + Function::Create(FTy, GlobalValue::ExternalLinkage, "saveSetjmp", &M); // Register testSetjmp function Params = {IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()}; FTy = FunctionType::get(IRB.getInt32Ty(), Params, false); - TestSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage, - TestSetjmpFName, &M); + TestSetjmpF = + Function::Create(FTy, GlobalValue::ExternalLinkage, "testSetjmp", &M); FTy = FunctionType::get(IRB.getVoidTy(), {IRB.getInt32Ty(), IRB.getInt32Ty()}, false); EmLongjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage, - EmLongjmpFName, &M); + "emscripten_longjmp", &M); // Only traverse functions that uses setjmp in order not to insert // unnecessary prep / cleanup code in every function @@ -970,10 +965,16 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) { const Value *Callee = CI->getCalledValue(); if (!canLongjmp(M, Callee)) continue; + if (isEmAsmCall(M, Callee)) + report_fatal_error("Cannot use EM_ASM* alongside setjmp/longjmp in " + + F.getName() + + ". Please consider using EM_JS, or move the " + "EM_ASM into another function.", + false); Value *Threw = nullptr; BasicBlock *Tail; - if (Callee->getName().startswith(InvokePrefix)) { + if (Callee->getName().startswith("__invoke_")) { // If invoke wrapper has already been generated for this call in // previous EH phase, search for the load instruction // %__THREW__.val = __THREW__; diff --git a/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp b/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp index 494d3fadbc8..750b2233e67 100644 --- a/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp +++ b/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp @@ -94,7 +94,7 @@ bool LowerGlobalDtors::runOnModule(Module &M) { break; // Found a null terminator, skip the rest. Constant *Associated = CS->getOperand(2); - Associated = cast(Associated->stripPointerCastsNoFollowAliases()); + Associated = cast(Associated->stripPointerCasts()); DtorFuncs[PriorityValue][Associated].push_back(DtorFunc); } diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp index 288b991ae2c..59c10243c54 100644 --- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -79,7 +79,7 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol( // Clang-provided symbols. if (strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0 || strcmp(Name, "__memory_base") == 0 || strcmp(Name, "__table_base") == 0 || - strcmp(Name, "__tls_size") == 0) { + strcmp(Name, "__tls_size") == 0 || strcmp(Name, "__tls_align") == 0) { bool Mutable = strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0; WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); @@ -115,7 +115,7 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol( getLibcallSignature(Subtarget, Name, Returns, Params); } auto Signature = - make_unique(std::move(Returns), std::move(Params)); + std::make_unique(std::move(Returns), std::move(Params)); WasmSym->setSignature(Signature.get()); Printer.addSignature(std::move(Signature)); @@ -163,6 +163,21 @@ MCOperand WebAssemblyMCInstLower::lowerSymbolOperand(const MachineOperand &MO, return MCOperand::createExpr(Expr); } +MCOperand WebAssemblyMCInstLower::lowerTypeIndexOperand( + SmallVector &&Returns, + SmallVector &&Params) const { + auto Signature = std::make_unique(std::move(Returns), + std::move(Params)); + MCSymbol *Sym = Printer.createTempSymbol("typeindex"); + auto *WasmSym = cast(Sym); + WasmSym->setSignature(Signature.get()); + Printer.addSignature(std::move(Signature)); + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + const MCExpr *Expr = + MCSymbolRefExpr::create(WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, Ctx); + return MCOperand::createExpr(Expr); +} + // Return the WebAssembly type associated with the given register class. static wasm::ValType getType(const TargetRegisterClass *RC) { if (RC == &WebAssembly::I32RegClass) @@ -178,6 +193,16 @@ static wasm::ValType getType(const TargetRegisterClass *RC) { llvm_unreachable("Unexpected register class"); } +static void getFunctionReturns(const MachineInstr *MI, + SmallVectorImpl &Returns) { + const Function &F = MI->getMF()->getFunction(); + const TargetMachine &TM = MI->getMF()->getTarget(); + Type *RetTy = F.getReturnType(); + SmallVector CallerRetTys; + computeLegalValueVTs(F, TM, RetTy, CallerRetTys); + valTypesFromMVTs(CallerRetTys, Returns); +} + void WebAssemblyMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); @@ -208,8 +233,6 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI, if (I < Desc.NumOperands) { const MCOperandInfo &Info = Desc.OpInfo[I]; if (Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) { - MCSymbol *Sym = Printer.createTempSymbol("typeindex"); - SmallVector Returns; SmallVector Params; @@ -226,17 +249,23 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI, if (WebAssembly::isCallIndirect(MI->getOpcode())) Params.pop_back(); - auto *WasmSym = cast(Sym); - auto Signature = make_unique(std::move(Returns), - std::move(Params)); - WasmSym->setSignature(Signature.get()); - Printer.addSignature(std::move(Signature)); - WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + // return_call_indirect instructions have the return type of the + // caller + if (MI->getOpcode() == WebAssembly::RET_CALL_INDIRECT) + getFunctionReturns(MI, Returns); - const MCExpr *Expr = MCSymbolRefExpr::create( - WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, Ctx); - MCOp = MCOperand::createExpr(Expr); + MCOp = lowerTypeIndexOperand(std::move(Returns), std::move(Params)); break; + } else if (Info.OperandType == WebAssembly::OPERAND_SIGNATURE) { + auto BT = static_cast(MO.getImm()); + assert(BT != WebAssembly::BlockType::Invalid); + if (BT == WebAssembly::BlockType::Multivalue) { + SmallVector Returns; + getFunctionReturns(MI, Returns); + MCOp = lowerTypeIndexOperand(std::move(Returns), + SmallVector()); + break; + } } } MCOp = MCOperand::createImm(MO.getImm()); diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h index 2c375a01a7f..d79c54097eb 100644 --- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.h +++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMCINSTLOWER_H #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMCINSTLOWER_H +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/Compiler.h" @@ -33,6 +34,8 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower { MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; + MCOperand lowerTypeIndexOperand(SmallVector &&, + SmallVector &&) const; public: WebAssemblyMCInstLower(MCContext &ctx, WebAssemblyAsmPrinter &printer) diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp index d31c1226bfd..e4cc2389147 100644 --- a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp @@ -49,10 +49,12 @@ void llvm::computeSignatureVTs(const FunctionType *Ty, const Function &F, computeLegalValueVTs(F, TM, Ty->getReturnType(), Results); MVT PtrVT = MVT::getIntegerVT(TM.createDataLayout().getPointerSizeInBits()); - if (Results.size() > 1) { - // WebAssembly currently can't lower returns of multiple values without - // demoting to sret (see WebAssemblyTargetLowering::CanLowerReturn). So - // replace multiple return values with a pointer parameter. + if (Results.size() > 1 && + !TM.getSubtarget(F).hasMultivalue()) { + // WebAssembly can't lower returns of multiple values without demoting to + // sret unless multivalue is enabled (see + // WebAssemblyTargetLowering::CanLowerReturn). So replace multiple return + // values with a poitner parameter. Results.clear(); Params.push_back(PtrVT); } @@ -72,7 +74,7 @@ void llvm::valTypesFromMVTs(const ArrayRef &In, std::unique_ptr llvm::signatureFromMVTs(const SmallVectorImpl &Results, const SmallVectorImpl &Params) { - auto Sig = make_unique(); + auto Sig = std::make_unique(); valTypesFromMVTs(Results, Sig->Returns); valTypesFromMVTs(Params, Sig->Params); return Sig; diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h index 4b9ba491dee..16e2f439298 100644 --- a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -96,13 +96,18 @@ public: void stackifyVReg(unsigned VReg) { assert(MF.getRegInfo().getUniqueVRegDef(VReg)); - auto I = TargetRegisterInfo::virtReg2Index(VReg); + auto I = Register::virtReg2Index(VReg); if (I >= VRegStackified.size()) VRegStackified.resize(I + 1); VRegStackified.set(I); } + void unstackifyVReg(unsigned VReg) { + auto I = Register::virtReg2Index(VReg); + if (I < VRegStackified.size()) + VRegStackified.reset(I); + } bool isVRegStackified(unsigned VReg) const { - auto I = TargetRegisterInfo::virtReg2Index(VReg); + auto I = Register::virtReg2Index(VReg); if (I >= VRegStackified.size()) return false; return VRegStackified.test(I); @@ -111,12 +116,12 @@ public: void initWARegs(); void setWAReg(unsigned VReg, unsigned WAReg) { assert(WAReg != UnusedReg); - auto I = TargetRegisterInfo::virtReg2Index(VReg); + auto I = Register::virtReg2Index(VReg); assert(I < WARegs.size()); WARegs[I] = WAReg; } unsigned getWAReg(unsigned VReg) const { - auto I = TargetRegisterInfo::virtReg2Index(VReg); + auto I = Register::virtReg2Index(VReg); assert(I < WARegs.size()); return WARegs[I]; } diff --git a/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp b/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp index 7ac0511c28b..ac428fcc826 100644 --- a/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp +++ b/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp @@ -166,8 +166,8 @@ static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI, if (!LibInfo.getLibFunc(Name, Func)) return false; - unsigned FromReg = MI.getOperand(2).getReg(); - unsigned ToReg = MI.getOperand(0).getReg(); + Register FromReg = MI.getOperand(2).getReg(); + Register ToReg = MI.getOperand(0).getReg(); if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg)) report_fatal_error("Memory Intrinsic results: call to builtin function " "with wrong signature, from/to mismatch"); @@ -184,7 +184,8 @@ bool WebAssemblyMemIntrinsicResults::runOnMachineFunction(MachineFunction &MF) { auto &MDT = getAnalysis(); const WebAssemblyTargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); - const auto &LibInfo = getAnalysis().getTLI(); + const auto &LibInfo = + getAnalysis().getTLI(MF.getFunction()); auto &LIS = getAnalysis(); bool Changed = false; diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp index 8c7c3305c20..0bd30791e57 100644 --- a/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp +++ b/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp @@ -81,7 +81,7 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction( // Split multiple-VN LiveIntervals into multiple LiveIntervals. SmallVector SplitLIs; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); if (MRI.reg_nodbg_empty(Reg)) continue; diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp index d20352259e0..9b60596e42b 100644 --- a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp +++ b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp @@ -64,11 +64,8 @@ void OptimizeReturned::visitCallSite(CallSite CS) { if (isa(Arg)) continue; // Like replaceDominatedUsesWith but using Instruction/Use dominance. - for (auto UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE;) { - Use &U = *UI++; - if (DT->dominates(Inst, U)) - U.set(Inst); - } + Arg->replaceUsesWithIf(Inst, + [&](Use &U) { return DT->dominates(Inst, U); }); } } diff --git a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp index e11cdeaa0e7..ea6cd09a604 100644 --- a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp +++ b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp @@ -63,7 +63,7 @@ static bool maybeRewriteToDrop(unsigned OldReg, unsigned NewReg, bool Changed = false; if (OldReg == NewReg) { Changed = true; - unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); MO.setReg(NewReg); MO.setIsDead(); MFI.stackifyVReg(NewReg); @@ -75,9 +75,7 @@ static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB, const MachineFunction &MF, WebAssemblyFunctionInfo &MFI, MachineRegisterInfo &MRI, - const WebAssemblyInstrInfo &TII, - unsigned FallthroughOpc, - unsigned CopyLocalOpc) { + const WebAssemblyInstrInfo &TII) { if (DisableWebAssemblyFallthroughReturnOpt) return false; if (&MBB != &MF.back()) @@ -90,13 +88,36 @@ static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB, if (&MI != &*End) return false; - if (FallthroughOpc != WebAssembly::FALLTHROUGH_RETURN_VOID) { - // If the operand isn't stackified, insert a COPY to read the operand and - // stackify it. - MachineOperand &MO = MI.getOperand(0); - unsigned Reg = MO.getReg(); + for (auto &MO : MI.explicit_operands()) { + // If the operand isn't stackified, insert a COPY to read the operands and + // stackify them. + Register Reg = MO.getReg(); if (!MFI.isVRegStackified(Reg)) { - unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg)); + unsigned CopyLocalOpc; + const TargetRegisterClass *RegClass = MRI.getRegClass(Reg); + switch (RegClass->getID()) { + case WebAssembly::I32RegClassID: + CopyLocalOpc = WebAssembly::COPY_I32; + break; + case WebAssembly::I64RegClassID: + CopyLocalOpc = WebAssembly::COPY_I64; + break; + case WebAssembly::F32RegClassID: + CopyLocalOpc = WebAssembly::COPY_F32; + break; + case WebAssembly::F64RegClassID: + CopyLocalOpc = WebAssembly::COPY_F64; + break; + case WebAssembly::V128RegClassID: + CopyLocalOpc = WebAssembly::COPY_V128; + break; + case WebAssembly::EXNREFRegClassID: + CopyLocalOpc = WebAssembly::COPY_EXNREF; + break; + default: + llvm_unreachable("Unexpected register class for return operand"); + } + Register NewReg = MRI.createVirtualRegister(RegClass); BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(CopyLocalOpc), NewReg) .addReg(Reg); MO.setReg(NewReg); @@ -104,8 +125,7 @@ static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB, } } - // Rewrite the return. - MI.setDesc(TII.get(FallthroughOpc)); + MI.setDesc(TII.get(WebAssembly::FALLTHROUGH_RETURN)); return true; } @@ -120,7 +140,8 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { const auto &TII = *MF.getSubtarget().getInstrInfo(); const WebAssemblyTargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); - auto &LibInfo = getAnalysis().getTLI(); + auto &LibInfo = + getAnalysis().getTLI(MF.getFunction()); bool Changed = false; for (auto &MBB : MF) @@ -143,8 +164,8 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { report_fatal_error("Peephole: call to builtin function with " "wrong signature, not consuming reg"); MachineOperand &MO = MI.getOperand(0); - unsigned OldReg = MO.getReg(); - unsigned NewReg = Op2.getReg(); + Register OldReg = MO.getReg(); + Register NewReg = Op2.getReg(); if (MRI.getRegClass(NewReg) != MRI.getRegClass(OldReg)) report_fatal_error("Peephole: call to builtin function with " @@ -156,60 +177,8 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { break; } // Optimize away an explicit void return at the end of the function. - case WebAssembly::RETURN_I32: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_I32, - WebAssembly::COPY_I32); - break; - case WebAssembly::RETURN_I64: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_I64, - WebAssembly::COPY_I64); - break; - case WebAssembly::RETURN_F32: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F32, - WebAssembly::COPY_F32); - break; - case WebAssembly::RETURN_F64: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F64, - WebAssembly::COPY_F64); - break; - case WebAssembly::RETURN_v16i8: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v16i8, - WebAssembly::COPY_V128); - break; - case WebAssembly::RETURN_v8i16: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v8i16, - WebAssembly::COPY_V128); - break; - case WebAssembly::RETURN_v4i32: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4i32, - WebAssembly::COPY_V128); - break; - case WebAssembly::RETURN_v2i64: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v2i64, - WebAssembly::COPY_V128); - break; - case WebAssembly::RETURN_v4f32: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4f32, - WebAssembly::COPY_V128); - break; - case WebAssembly::RETURN_v2f64: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v2f64, - WebAssembly::COPY_V128); - break; - case WebAssembly::RETURN_VOID: - Changed |= maybeRewriteToFallthrough( - MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_VOID, - WebAssembly::INSTRUCTION_LIST_END); + case WebAssembly::RETURN: + Changed |= maybeRewriteToFallthrough(MI, MBB, MF, MFI, MRI, TII); break; } diff --git a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp index 3bfbf607344..799b9388097 100644 --- a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp +++ b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp @@ -95,7 +95,7 @@ bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction( // TODO: This is fairly heavy-handed; find a better approach. // for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + unsigned Reg = Register::index2VirtReg(I); // Skip unused registers. if (MRI.use_nodbg_empty(Reg)) diff --git a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp index 6f09c45b664..043b6f1b7d1 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp @@ -98,7 +98,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "Interesting register intervals:\n"); for (unsigned I = 0; I < NumVRegs; ++I) { - unsigned VReg = TargetRegisterInfo::index2VirtReg(I); + unsigned VReg = Register::index2VirtReg(I); if (MFI.isVRegStackified(VReg)) continue; // Skip unused registers, which can use $drop. @@ -157,9 +157,8 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { Changed |= Old != New; UsedColors.set(Color); Assignments[Color].push_back(LI); - LLVM_DEBUG( - dbgs() << "Assigning vreg" << TargetRegisterInfo::virtReg2Index(LI->reg) - << " to vreg" << TargetRegisterInfo::virtReg2Index(New) << "\n"); + LLVM_DEBUG(dbgs() << "Assigning vreg" << Register::virtReg2Index(LI->reg) + << " to vreg" << Register::virtReg2Index(New) << "\n"); } if (!Changed) return false; diff --git a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp index cdca23f55b2..72e7a7cf504 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp @@ -89,7 +89,7 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) { // Start the numbering for locals after the arg regs unsigned CurReg = MFI.getParams().size(); for (unsigned VRegIdx = 0; VRegIdx < NumVRegs; ++VRegIdx) { - unsigned VReg = TargetRegisterInfo::index2VirtReg(VRegIdx); + unsigned VReg = Register::index2VirtReg(VRegIdx); // Skip unused registers. if (MRI.use_empty(VReg)) continue; diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index a120a647101..421d353a89e 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -120,7 +120,7 @@ static void convertImplicitDefToConstZero(MachineInstr *MI, Type::getDoubleTy(MF.getFunction().getContext()))); MI->addOperand(MachineOperand::CreateFPImm(Val)); } else if (RegClass == &WebAssembly::V128RegClass) { - unsigned TempReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + Register TempReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); MI->setDesc(TII->get(WebAssembly::SPLAT_v4i32)); MI->addOperand(MachineOperand::CreateReg(TempReg, false)); MachineInstr *Const = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), @@ -334,14 +334,14 @@ static bool isSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, for (const MachineOperand &MO : Def->operands()) { if (!MO.isReg() || MO.isUndef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // If the register is dead here and at Insert, ignore it. if (MO.isDead() && Insert->definesRegister(Reg) && !Insert->readsRegister(Reg)) continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions // from moving down, and we've already checked for that. if (Reg == WebAssembly::ARGUMENTS) @@ -436,8 +436,8 @@ static bool oneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse, const MachineOperand &MO = UseInst->getOperand(0); if (!MO.isReg()) return false; - unsigned DefReg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DefReg) || + Register DefReg = MO.getReg(); + if (!Register::isVirtualRegister(DefReg) || !MFI.isVRegStackified(DefReg)) return false; assert(MRI.hasOneNonDBGUse(DefReg)); @@ -499,7 +499,7 @@ static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op, } else { // The register may have unrelated uses or defs; create a new register for // just our one def and use so that we can stackify it. - unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg)); + Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg)); Def->getOperand(0).setReg(NewReg); Op.setReg(NewReg); @@ -535,7 +535,7 @@ static MachineInstr *rematerializeCheapDef( WebAssemblyDebugValueManager DefDIs(&Def); - unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg)); + Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg)); TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI); Op.setReg(NewReg); MachineInstr *Clone = &*std::prev(Insert); @@ -607,8 +607,8 @@ static MachineInstr *moveAndTeeForMultiUse( // Create the Tee and attach the registers. const auto *RegClass = MRI.getRegClass(Reg); - unsigned TeeReg = MRI.createVirtualRegister(RegClass); - unsigned DefReg = MRI.createVirtualRegister(RegClass); + Register TeeReg = MRI.createVirtualRegister(RegClass); + Register DefReg = MRI.createVirtualRegister(RegClass); MachineOperand &DefMO = Def->getOperand(0); MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(), TII->get(getTeeOpcode(RegClass)), TeeReg) @@ -807,11 +807,11 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { if (!Op.isReg()) continue; - unsigned Reg = Op.getReg(); + Register Reg = Op.getReg(); assert(Op.isUse() && "explicit_uses() should only iterate over uses"); assert(!Op.isImplicit() && "explicit_uses() should only iterate over explicit operands"); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) continue; // Identify the definition for this register at this point. @@ -915,7 +915,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { for (MachineOperand &MO : reverse(MI.explicit_operands())) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (MFI.isVRegStackified(Reg)) { if (MO.isDef()) diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp index ea9cfc00adf..789a025794e 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -91,8 +91,8 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex( if (MI.getOpcode() == WebAssembly::ADD_I32) { MachineOperand &OtherMO = MI.getOperand(3 - FIOperandNum); if (OtherMO.isReg()) { - unsigned OtherMOReg = OtherMO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(OtherMOReg)) { + Register OtherMOReg = OtherMO.getReg(); + if (Register::isVirtualRegister(OtherMOReg)) { MachineInstr *Def = MF.getRegInfo().getUniqueVRegDef(OtherMOReg); // TODO: For now we just opportunistically do this in the case where // the CONST_I32 happens to have exactly one def and one use. We @@ -117,7 +117,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex( // Create i32.add SP, offset and make it the operand. const TargetRegisterClass *PtrRC = MRI.getTargetRegisterInfo()->getPointerRegClass(MF); - unsigned OffsetOp = MRI.createVirtualRegister(PtrRC); + Register OffsetOp = MRI.createVirtualRegister(PtrRC); BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32), OffsetOp) .addImm(FrameOffset); diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 7e65368e671..bdf5fe2620a 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -140,7 +140,7 @@ WebAssemblyTargetMachine::getSubtargetImpl(std::string CPU, std::string FS) const { auto &I = SubtargetMap[CPU + FS]; if (!I) { - I = llvm::make_unique(TargetTriple, CPU, FS, *this); + I = std::make_unique(TargetTriple, CPU, FS, *this); } return I.get(); } diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 46ef765ce0f..1c53e90daea 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -25,10 +25,11 @@ WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const { return TargetTransformInfo::PSK_FastHardware; } -unsigned WebAssemblyTTIImpl::getNumberOfRegisters(bool Vector) { - unsigned Result = BaseT::getNumberOfRegisters(Vector); +unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const { + unsigned Result = BaseT::getNumberOfRegisters(ClassID); // For SIMD, use at least 16 registers, as a rough guess. + bool Vector = (ClassID == 1); if (Vector) Result = std::max(Result, 16u); diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index 1b11b4b631e..f0ecc73e91d 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -53,7 +53,7 @@ public: /// \name Vector TTI Implementations /// @{ - unsigned getNumberOfRegisters(bool Vector); + unsigned getNumberOfRegisters(unsigned ClassID) const; unsigned getRegisterBitWidth(bool Vector) const; unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, diff --git a/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/lib/Target/WebAssembly/WebAssemblyUtilities.cpp index e9d88d4818a..a237da8154a 100644 --- a/lib/Target/WebAssembly/WebAssemblyUtilities.cpp +++ b/lib/Target/WebAssembly/WebAssemblyUtilities.cpp @@ -32,9 +32,8 @@ bool WebAssembly::isChild(const MachineInstr &MI, const MachineOperand &MO = MI.getOperand(0); if (!MO.isReg() || MO.isImplicit() || !MO.isDef()) return false; - unsigned Reg = MO.getReg(); - return TargetRegisterInfo::isVirtualRegister(Reg) && - MFI.isVRegStackified(Reg); + Register Reg = MO.getReg(); + return Register::isVirtualRegister(Reg) && MFI.isVRegStackified(Reg); } bool WebAssembly::mayThrow(const MachineInstr &MI) { @@ -51,7 +50,21 @@ bool WebAssembly::mayThrow(const MachineInstr &MI) { return false; const MachineOperand &MO = MI.getOperand(getCalleeOpNo(MI.getOpcode())); - assert(MO.isGlobal()); + assert(MO.isGlobal() || MO.isSymbol()); + + if (MO.isSymbol()) { + // Some intrinsics are lowered to calls to external symbols, which are then + // lowered to calls to library functions. Most of libcalls don't throw, but + // we only list some of them here now. + // TODO Consider adding 'nounwind' info in TargetLowering::CallLoweringInfo + // instead for more accurate info. + const char *Name = MO.getSymbolName(); + if (strcmp(Name, "memcpy") == 0 || strcmp(Name, "memmove") == 0 || + strcmp(Name, "memset") == 0) + return false; + return true; + } + const auto *F = dyn_cast(MO.getGlobal()); if (!F) return true; diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 95cbf46d37e..25be79ec2b1 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -870,6 +870,14 @@ private: bool parseDirectiveFPOEndProc(SMLoc L); bool parseDirectiveFPOData(SMLoc L); + /// SEH directives. + bool parseSEHRegisterNumber(unsigned RegClassID, unsigned &RegNo); + bool parseDirectiveSEHPushReg(SMLoc); + bool parseDirectiveSEHSetFrame(SMLoc); + bool parseDirectiveSEHSaveReg(SMLoc); + bool parseDirectiveSEHSaveXMM(SMLoc); + bool parseDirectiveSEHPushFrame(SMLoc); + unsigned checkTargetMatchPredicate(MCInst &Inst) override; bool validateInstruction(MCInst &Inst, const OperandVector &Ops); @@ -955,6 +963,8 @@ private: public: enum X86MatchResultTy { Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY, +#define GET_OPERAND_DIAGNOSTIC_TYPES +#include "X86GenAsmMatcher.inc" }; X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser, @@ -3173,6 +3183,13 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, EmitInstruction(Inst, Operands, Out); Opcode = Inst.getOpcode(); return false; + case Match_InvalidImmUnsignedi4: { + SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + return Error(ErrorLoc, "immediate must be an integer in range [0, 15]", + EmptyRange, MatchingInlineAsm); + } case Match_MissingFeature: return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm); case Match_InvalidOperand: @@ -3520,6 +3537,15 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, MatchingInlineAsm); } + if (std::count(std::begin(Match), std::end(Match), + Match_InvalidImmUnsignedi4) == 1) { + SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + return Error(ErrorLoc, "immediate must be an integer in range [0, 15]", + EmptyRange, MatchingInlineAsm); + } + // If all of these were an outright failure, report it in a useless way. return Error(IDLoc, "unknown instruction mnemonic", EmptyRange, MatchingInlineAsm); @@ -3572,6 +3598,16 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveFPOEndPrologue(DirectiveID.getLoc()); else if (IDVal == ".cv_fpo_endproc") return parseDirectiveFPOEndProc(DirectiveID.getLoc()); + else if (IDVal == ".seh_pushreg") + return parseDirectiveSEHPushReg(DirectiveID.getLoc()); + else if (IDVal == ".seh_setframe") + return parseDirectiveSEHSetFrame(DirectiveID.getLoc()); + else if (IDVal == ".seh_savereg") + return parseDirectiveSEHSaveReg(DirectiveID.getLoc()); + else if (IDVal == ".seh_savexmm") + return parseDirectiveSEHSaveXMM(DirectiveID.getLoc()); + else if (IDVal == ".seh_pushframe") + return parseDirectiveSEHPushFrame(DirectiveID.getLoc()); return true; } @@ -3708,6 +3744,140 @@ bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) { return getTargetStreamer().emitFPOEndProc(L); } +bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID, + unsigned &RegNo) { + SMLoc startLoc = getLexer().getLoc(); + const MCRegisterInfo *MRI = getContext().getRegisterInfo(); + + // Try parsing the argument as a register first. + if (getLexer().getTok().isNot(AsmToken::Integer)) { + SMLoc endLoc; + if (ParseRegister(RegNo, startLoc, endLoc)) + return true; + + if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) { + return Error(startLoc, + "register is not supported for use with this directive"); + } + } else { + // Otherwise, an integer number matching the encoding of the desired + // register may appear. + int64_t EncodedReg; + if (getParser().parseAbsoluteExpression(EncodedReg)) + return true; + + // The SEH register number is the same as the encoding register number. Map + // from the encoding back to the LLVM register number. + RegNo = 0; + for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) { + if (MRI->getEncodingValue(Reg) == EncodedReg) { + RegNo = Reg; + break; + } + } + if (RegNo == 0) { + return Error(startLoc, + "incorrect register number for use with this directive"); + } + } + + return false; +} + +bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) { + unsigned Reg = 0; + if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + getParser().Lex(); + getStreamer().EmitWinCFIPushReg(Reg, Loc); + return false; +} + +bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) { + unsigned Reg = 0; + int64_t Off; + if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) + return true; + if (getLexer().isNot(AsmToken::Comma)) + return TokError("you must specify a stack pointer offset"); + + getParser().Lex(); + if (getParser().parseAbsoluteExpression(Off)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + getParser().Lex(); + getStreamer().EmitWinCFISetFrame(Reg, Off, Loc); + return false; +} + +bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) { + unsigned Reg = 0; + int64_t Off; + if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) + return true; + if (getLexer().isNot(AsmToken::Comma)) + return TokError("you must specify an offset on the stack"); + + getParser().Lex(); + if (getParser().parseAbsoluteExpression(Off)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + getParser().Lex(); + getStreamer().EmitWinCFISaveReg(Reg, Off, Loc); + return false; +} + +bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) { + unsigned Reg = 0; + int64_t Off; + if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg)) + return true; + if (getLexer().isNot(AsmToken::Comma)) + return TokError("you must specify an offset on the stack"); + + getParser().Lex(); + if (getParser().parseAbsoluteExpression(Off)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + getParser().Lex(); + getStreamer().EmitWinCFISaveXMM(Reg, Off, Loc); + return false; +} + +bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) { + bool Code = false; + StringRef CodeID; + if (getLexer().is(AsmToken::At)) { + SMLoc startLoc = getLexer().getLoc(); + getParser().Lex(); + if (!getParser().parseIdentifier(CodeID)) { + if (CodeID != "code") + return Error(startLoc, "expected @code"); + Code = true; + } + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + getParser().Lex(); + getStreamer().EmitWinCFIPushFrame(Code, Loc); + return false; +} + // Force static initialization. extern "C" void LLVMInitializeX86AsmParser() { RegisterMCAsmParser X(getTheX86_32Target()); diff --git a/lib/Target/X86/AsmParser/X86AsmParserCommon.h b/lib/Target/X86/AsmParser/X86AsmParserCommon.h index 5bc979d1f18..e9be28ca77b 100644 --- a/lib/Target/X86/AsmParser/X86AsmParserCommon.h +++ b/lib/Target/X86/AsmParser/X86AsmParserCommon.h @@ -35,6 +35,10 @@ inline bool isImmUnsignedi8Value(uint64_t Value) { return isUInt<8>(Value) || isInt<8>(Value); } +inline bool isImmUnsignedi4Value(uint64_t Value) { + return isUInt<4>(Value); +} + } // End of namespace llvm #endif diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h index a771ba36631..3a76d023e64 100644 --- a/lib/Target/X86/AsmParser/X86Operand.h +++ b/lib/Target/X86/AsmParser/X86Operand.h @@ -260,6 +260,15 @@ struct X86Operand final : public MCParsedAsmOperand { return isImmSExti64i32Value(CE->getValue()); } + bool isImmUnsignedi4() const { + if (!isImm()) return false; + // If this isn't a constant expr, reject it. The immediate byte is shared + // with a register encoding. We can't have it affected by a relocation. + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) return false; + return isImmUnsignedi4Value(CE->getValue()); + } + bool isImmUnsignedi8() const { if (!isImm()) return false; // If this isn't a constant expr, just assume it fits and let relaxation @@ -491,7 +500,7 @@ struct X86Operand final : public MCParsedAsmOperand { void addGR32orGR64Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - unsigned RegNo = getReg(); + MCRegister RegNo = getReg(); if (X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo)) RegNo = getX86SubSuperRegister(RegNo, 32); Inst.addOperand(MCOperand::createReg(RegNo)); @@ -572,7 +581,7 @@ struct X86Operand final : public MCParsedAsmOperand { static std::unique_ptr CreateToken(StringRef Str, SMLoc Loc) { SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size()); - auto Res = llvm::make_unique(Token, Loc, EndLoc); + auto Res = std::make_unique(Token, Loc, EndLoc); Res->Tok.Data = Str.data(); Res->Tok.Length = Str.size(); return Res; @@ -582,7 +591,7 @@ struct X86Operand final : public MCParsedAsmOperand { CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc, bool AddressOf = false, SMLoc OffsetOfLoc = SMLoc(), StringRef SymName = StringRef(), void *OpDecl = nullptr) { - auto Res = llvm::make_unique(Register, StartLoc, EndLoc); + auto Res = std::make_unique(Register, StartLoc, EndLoc); Res->Reg.RegNo = RegNo; Res->AddressOf = AddressOf; Res->OffsetOfLoc = OffsetOfLoc; @@ -593,19 +602,19 @@ struct X86Operand final : public MCParsedAsmOperand { static std::unique_ptr CreateDXReg(SMLoc StartLoc, SMLoc EndLoc) { - return llvm::make_unique(DXRegister, StartLoc, EndLoc); + return std::make_unique(DXRegister, StartLoc, EndLoc); } static std::unique_ptr CreatePrefix(unsigned Prefixes, SMLoc StartLoc, SMLoc EndLoc) { - auto Res = llvm::make_unique(Prefix, StartLoc, EndLoc); + auto Res = std::make_unique(Prefix, StartLoc, EndLoc); Res->Pref.Prefixes = Prefixes; return Res; } static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc) { - auto Res = llvm::make_unique(Immediate, StartLoc, EndLoc); + auto Res = std::make_unique(Immediate, StartLoc, EndLoc); Res->Imm.Val = Val; return Res; } @@ -615,7 +624,7 @@ struct X86Operand final : public MCParsedAsmOperand { CreateMem(unsigned ModeSize, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, unsigned Size = 0, StringRef SymName = StringRef(), void *OpDecl = nullptr, unsigned FrontendSize = 0) { - auto Res = llvm::make_unique(Memory, StartLoc, EndLoc); + auto Res = std::make_unique(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; Res->Mem.BaseReg = 0; @@ -643,7 +652,7 @@ struct X86Operand final : public MCParsedAsmOperand { // The scale should always be one of {1,2,4,8}. assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) && "Invalid scale!"); - auto Res = llvm::make_unique(Memory, StartLoc, EndLoc); + auto Res = std::make_unique(Memory, StartLoc, EndLoc); Res->Mem.SegReg = SegReg; Res->Mem.Disp = Disp; Res->Mem.BaseReg = BaseReg; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index a241362a271..e287f662511 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -12,13 +12,14 @@ // //===----------------------------------------------------------------------===// +#include "X86DisassemblerDecoder.h" +#include "llvm/ADT/StringRef.h" + #include /* for va_*() */ #include /* for vsnprintf() */ #include /* for exit() */ #include /* for memset() */ -#include "X86DisassemblerDecoder.h" - using namespace llvm::X86Disassembler; /// Specifies whether a ModR/M byte is needed and (if so) which diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 54413fa1a02..f08fcb575bf 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -287,7 +287,7 @@ bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const { // Relax if the value is too big for a (signed) i8. - return int64_t(Value) != int64_t(int8_t(Value)); + return !isInt<8>(Value); } // FIXME: Can tblgen help at all here to verify there aren't other instructions @@ -557,7 +557,7 @@ protected: // If the frame pointer is other than esp/rsp, we do not have a way to // generate a compact unwinding representation, so bail out. - if (MRI.getLLVMRegNum(Inst.getRegister(), true) != + if (*MRI.getLLVMRegNum(Inst.getRegister(), true) != (Is64Bit ? X86::RBP : X86::EBP)) return 0; @@ -605,7 +605,7 @@ protected: // unwind encoding. return CU::UNWIND_MODE_DWARF; - unsigned Reg = MRI.getLLVMRegNum(Inst.getRegister(), true); + unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true); SavedRegs[SavedRegIdx++] = Reg; StackAdjust += OffsetSize; InstrOffset += PushInstrSize(Reg); diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index 232a0659323..bd009da6085 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -46,10 +46,10 @@ X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, enum X86_64RelType { RT64_NONE, RT64_64, RT64_32, RT64_32S, RT64_16, RT64_8 }; -static X86_64RelType getType64(unsigned Kind, +static X86_64RelType getType64(MCFixupKind Kind, MCSymbolRefExpr::VariantKind &Modifier, bool &IsPCRel) { - switch (Kind) { + switch (unsigned(Kind)) { default: llvm_unreachable("Unimplemented"); case FK_NONE: @@ -97,7 +97,7 @@ static void checkIs32(MCContext &Ctx, SMLoc Loc, X86_64RelType Type) { static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc, MCSymbolRefExpr::VariantKind Modifier, X86_64RelType Type, bool IsPCRel, - unsigned Kind) { + MCFixupKind Kind) { switch (Modifier) { default: llvm_unreachable("Unimplemented"); @@ -202,7 +202,7 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc, // and we want to keep back-compatibility. if (!Ctx.getAsmInfo()->canRelaxRelocations()) return ELF::R_X86_64_GOTPCREL; - switch (Kind) { + switch (unsigned(Kind)) { default: return ELF::R_X86_64_GOTPCREL; case X86::reloc_riprel_4byte_relax: @@ -237,7 +237,7 @@ static X86_32RelType getType32(X86_64RelType T) { static unsigned getRelocType32(MCContext &Ctx, MCSymbolRefExpr::VariantKind Modifier, X86_32RelType Type, bool IsPCRel, - unsigned Kind) { + MCFixupKind Kind) { switch (Modifier) { default: llvm_unreachable("Unimplemented"); @@ -265,8 +265,9 @@ static unsigned getRelocType32(MCContext &Ctx, if (!Ctx.getAsmInfo()->canRelaxRelocations()) return ELF::R_386_GOT32; - return Kind == X86::reloc_signed_4byte_relax ? ELF::R_386_GOT32X - : ELF::R_386_GOT32; + return Kind == MCFixupKind(X86::reloc_signed_4byte_relax) + ? ELF::R_386_GOT32X + : ELF::R_386_GOT32; case MCSymbolRefExpr::VK_GOTOFF: assert(Type == RT32_32); assert(!IsPCRel); @@ -317,7 +318,7 @@ unsigned X86ELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); - unsigned Kind = Fixup.getKind(); + MCFixupKind Kind = Fixup.getKind(); X86_64RelType Type = getType64(Kind, Modifier, IsPCRel); if (getEMachine() == ELF::EM_X86_64) return getRelocType64(Ctx, Fixup.getLoc(), Modifier, Type, IsPCRel, Kind); @@ -329,5 +330,5 @@ unsigned X86ELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, std::unique_ptr llvm::createX86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine) { - return llvm::make_unique(IsELF64, OSABI, EMachine); + return std::make_unique(IsELF64, OSABI, EMachine); } diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index e1125c176b2..d986c829d98 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -163,5 +163,7 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) { TextAlignFillValue = 0x90; + AllowAtInName = true; + UseIntegratedAssembler = true; } diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 31d26d08a63..ac36bf3a12f 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -862,6 +862,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_B = ~(BaseRegEnc >> 3) & 1; unsigned IndexRegEnc = getX86RegEncoding(MI, MemOperand+X86::AddrIndexReg); VEX_X = ~(IndexRegEnc >> 3) & 1; + if (!HasVEX_4V) // Only needed with VSIB which don't use VVVV. + EVEX_V2 = ~(IndexRegEnc >> 4) & 1; + break; } case X86II::MRMSrcReg: { diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index ce05ad97450..ced9eacc8b9 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -70,6 +70,10 @@ unsigned X86_MC::getDwarfRegFlavour(const Triple &TT, bool isEH) { return DWARFFlavour::X86_32_Generic; } +bool X86_MC::hasLockPrefix(const MCInst &MI) { + return MI.getFlags() & X86::IP_HAS_LOCK; +} + void X86_MC::initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI) { // FIXME: TableGen these. for (unsigned Reg = X86::NoRegister + 1; Reg < X86::NUM_TARGET_REGS; ++Reg) { @@ -399,6 +403,9 @@ public: findPltEntries(uint64_t PltSectionVA, ArrayRef PltContents, uint64_t GotSectionVA, const Triple &TargetTriple) const override; + Optional evaluateMemoryOperandAddress(const MCInst &Inst, + uint64_t Addr, + uint64_t Size) const override; }; #define GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS @@ -511,7 +518,31 @@ std::vector> X86MCInstrAnalysis::findPltEntries( return findX86_64PltEntries(PltSectionVA, PltContents); default: return {}; - } + } +} + +Optional X86MCInstrAnalysis::evaluateMemoryOperandAddress( + const MCInst &Inst, uint64_t Addr, uint64_t Size) const { + const MCInstrDesc &MCID = Info->get(Inst.getOpcode()); + int MemOpStart = X86II::getMemoryOperandNo(MCID.TSFlags); + if (MemOpStart == -1) + return None; + MemOpStart += X86II::getOperandBias(MCID); + + const MCOperand &SegReg = Inst.getOperand(MemOpStart + X86::AddrSegmentReg); + const MCOperand &BaseReg = Inst.getOperand(MemOpStart + X86::AddrBaseReg); + const MCOperand &IndexReg = Inst.getOperand(MemOpStart + X86::AddrIndexReg); + const MCOperand &ScaleAmt = Inst.getOperand(MemOpStart + X86::AddrScaleAmt); + const MCOperand &Disp = Inst.getOperand(MemOpStart + X86::AddrDisp); + if (SegReg.getReg() != 0 || IndexReg.getReg() != 0 || ScaleAmt.getImm() != 1 || + !Disp.isImm()) + return None; + + // RIP-relative addressing. + if (BaseReg.getReg() == X86::RIP) + return Addr + Size + Disp.getImm(); + + return None; } } // end of namespace X86_MC @@ -567,13 +598,13 @@ extern "C" void LLVMInitializeX86TargetMC() { createX86_64AsmBackend); } -unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size, - bool High) { +MCRegister llvm::getX86SubSuperRegisterOrZero(MCRegister Reg, unsigned Size, + bool High) { switch (Size) { - default: return 0; + default: return X86::NoRegister; case 8: if (High) { - switch (Reg) { + switch (Reg.id()) { default: return getX86SubSuperRegisterOrZero(Reg, 64); case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: return X86::SI; @@ -593,8 +624,8 @@ unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size, return X86::BH; } } else { - switch (Reg) { - default: return 0; + switch (Reg.id()) { + default: return X86::NoRegister; case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::AL; case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: @@ -630,8 +661,8 @@ unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size, } } case 16: - switch (Reg) { - default: return 0; + switch (Reg.id()) { + default: return X86::NoRegister; case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::AX; case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: @@ -666,8 +697,8 @@ unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size, return X86::R15W; } case 32: - switch (Reg) { - default: return 0; + switch (Reg.id()) { + default: return X86::NoRegister; case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::EAX; case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: @@ -702,7 +733,7 @@ unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size, return X86::R15D; } case 64: - switch (Reg) { + switch (Reg.id()) { default: return 0; case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::RAX; @@ -740,9 +771,9 @@ unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size, } } -unsigned llvm::getX86SubSuperRegister(unsigned Reg, unsigned Size, bool High) { - unsigned Res = getX86SubSuperRegisterOrZero(Reg, Size, High); - assert(Res != 0 && "Unexpected register or VT"); +MCRegister llvm::getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High) { + MCRegister Res = getX86SubSuperRegisterOrZero(Reg, Size, High); + assert(Res != X86::NoRegister && "Unexpected register or VT"); return Res; } diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index 00dd5908cbf..0c789061f0e 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCTARGETDESC_H #define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCTARGETDESC_H +#include "llvm/MC/MCRegister.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/DataTypes.h" #include @@ -57,6 +58,10 @@ unsigned getDwarfRegFlavour(const Triple &TT, bool isEH); void initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI); + +/// Returns true if this instruction has a LOCK prefix. +bool hasLockPrefix(const MCInst &MI); + /// Create a X86 MCSubtargetInfo instance. This is exposed so Asm parser, etc. /// do not need to go through TargetRegistry. MCSubtargetInfo *createX86MCSubtargetInfo(const Triple &TT, StringRef CPU, @@ -111,12 +116,12 @@ createX86WinCOFFObjectWriter(bool Is64Bit); /// Returns the sub or super register of a specific X86 register. /// e.g. getX86SubSuperRegister(X86::EAX, 16) returns X86::AX. /// Aborts on error. -unsigned getX86SubSuperRegister(unsigned, unsigned, bool High=false); +MCRegister getX86SubSuperRegister(MCRegister, unsigned, bool High=false); /// Returns the sub or super register of a specific X86 register. /// Like getX86SubSuperRegister() but returns 0 on error. -unsigned getX86SubSuperRegisterOrZero(unsigned, unsigned, - bool High = false); +MCRegister getX86SubSuperRegisterOrZero(MCRegister, unsigned, + bool High = false); } // End llvm namespace diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index fc7e99f61e5..b67a7508fe7 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -276,7 +276,7 @@ void X86MachObjectWriter::RecordX86_64Relocation( // x86_64 distinguishes movq foo@GOTPCREL so that the linker can // rewrite the movq to an leaq at link time if the symbol ends up in // the same linkage unit. - if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load) + if (Fixup.getTargetKind() == X86::reloc_riprel_4byte_movq_load) Type = MachO::X86_64_RELOC_GOT_LOAD; else Type = MachO::X86_64_RELOC_GOT; @@ -339,8 +339,7 @@ void X86MachObjectWriter::RecordX86_64Relocation( return; } else { Type = MachO::X86_64_RELOC_UNSIGNED; - unsigned Kind = Fixup.getKind(); - if (Kind == X86::reloc_signed_4byte) { + if (Fixup.getTargetKind() == X86::reloc_signed_4byte) { Asm.getContext().reportError( Fixup.getLoc(), "32-bit absolute addressing is not supported in 64-bit mode"); @@ -600,5 +599,5 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, std::unique_ptr llvm::createX86MachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) { - return llvm::make_unique(Is64Bit, CPUType, CPUSubtype); + return std::make_unique(Is64Bit, CPUType, CPUSubtype); } diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp index 3baab9da1c4..760239f7650 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp @@ -109,5 +109,5 @@ unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx, std::unique_ptr llvm::createX86WinCOFFObjectWriter(bool Is64Bit) { - return llvm::make_unique(Is64Bit); + return std::make_unique(Is64Bit); } diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp index 796a27a1725..db624378d51 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp @@ -35,8 +35,9 @@ void X86WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) { MCStreamer::EmitWinEHHandlerData(Loc); // We have to emit the unwind info now, because this directive - // actually switches to the .xdata section! - EHStreamer.EmitUnwindInfo(*this, getCurrentWinFrameInfo()); + // actually switches to the .xdata section. + if (WinEH::FrameInfo *CurFrame = getCurrentWinFrameInfo()) + EHStreamer.EmitUnwindInfo(*this, CurFrame); } void X86WinCOFFStreamer::EmitWindowsUnwindTables() { diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp index e9987d1f62b..d5494ef1237 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp @@ -170,7 +170,7 @@ bool X86WinCOFFTargetStreamer::emitFPOProc(const MCSymbol *ProcSym, L, "opening new .cv_fpo_proc before closing previous frame"); return true; } - CurFPOData = llvm::make_unique(); + CurFPOData = std::make_unique(); CurFPOData->Function = ProcSym; CurFPOData->Begin = emitFPOLabel(); CurFPOData->ParamsSize = ParamsSize; diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index a95f68434d1..6840fc12751 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -81,6 +81,12 @@ FunctionPass *createX86FlagsCopyLoweringPass(); /// Return a pass that expands WinAlloca pseudo-instructions. FunctionPass *createX86WinAllocaExpander(); +/// Return a pass that inserts int3 at the end of the function if it ends with a +/// CALL instruction. The pass does the same for each funclet as well. This +/// ensures that the open interval of function start and end PCs contains all +/// return addresses for the benefit of the Windows x64 unwinder. +FunctionPass *createX86AvoidTrailingCallPass(); + /// Return a pass that optimizes the code-size of x86 call sequences. This is /// done by replacing esp-relative movs with pushes. FunctionPass *createX86CallFrameOptimization(); @@ -137,13 +143,13 @@ void initializeWinEHStatePassPass(PassRegistry &); void initializeX86AvoidSFBPassPass(PassRegistry &); void initializeX86CallFrameOptimizationPass(PassRegistry &); void initializeX86CmovConverterPassPass(PassRegistry &); -void initializeX86ExpandPseudoPass(PassRegistry&); void initializeX86CondBrFoldingPassPass(PassRegistry &); void initializeX86DomainReassignmentPass(PassRegistry &); void initializeX86ExecutionDomainFixPass(PassRegistry &); +void initializeX86ExpandPseudoPass(PassRegistry &); void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); +void initializeX86OptimizeLEAPassPass(PassRegistry &); void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &); - } // End llvm namespace #endif diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 3112f00c91f..d8631aca273 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -95,7 +95,8 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", "Support 64-bit instructions">; def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true", - "64-bit with cmpxchg16b">; + "64-bit with cmpxchg16b", + [FeatureCMPXCHG8B]>; def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", "SHLD instruction is slow">; def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", @@ -240,8 +241,11 @@ def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true", "Enable Cache Demote">; def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true", "Support ptwrite instruction">; -def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true", - "Support MPX instructions">; +// FIXME: This feature is deprecated in 10.0 and should not be used for +// anything, but removing it would break IR files that may contain it in a +// target-feature attribute. +def FeatureDeprecatedMPX : SubtargetFeature<"mpx", "DeprecatedHasMPX", "false", + "Deprecated. Support MPX instructions">; def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", @@ -374,6 +378,10 @@ def FeatureHasFastGather : SubtargetFeature<"fast-gather", "HasFastGather", "true", "Indicates if gather is reasonably fast">; +def FeaturePrefer128Bit + : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true", + "Prefer 128-bit AVX instructions">; + def FeaturePrefer256Bit : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true", "Prefer 256-bit AVX instructions">; @@ -449,6 +457,10 @@ def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch", "Merge branches to a three-way " "conditional branch">; +// Enable use of alias analysis during code generation. +def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true", + "Use alias analysis during codegen">; + // Bonnell def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">; // Silvermont @@ -579,7 +591,6 @@ def ProcessorFeatures { // Skylake list SKLAdditionalFeatures = [FeatureAES, - FeatureMPX, FeatureXSAVEC, FeatureXSAVES, FeatureCLFLUSHOPT, @@ -594,6 +605,7 @@ def ProcessorFeatures { // Skylake-AVX512 list SKXAdditionalFeatures = [FeatureAVX512, + FeaturePrefer256Bit, FeatureCDI, FeatureDQI, FeatureBWI, @@ -627,6 +639,7 @@ def ProcessorFeatures { // Cannonlake list CNLAdditionalFeatures = [FeatureAVX512, + FeaturePrefer256Bit, FeatureCDI, FeatureDQI, FeatureBWI, @@ -665,6 +678,17 @@ def ProcessorFeatures { list ICXFeatures = !listconcat(ICLInheritableFeatures, ICXSpecificFeatures); + //Tigerlake + list TGLAdditionalFeatures = [FeatureVP2INTERSECT, + FeatureMOVDIRI, + FeatureMOVDIR64B, + FeatureSHSTK]; + list TGLSpecificFeatures = [FeatureHasFastGather]; + list TGLInheritableFeatures = + !listconcat(TGLAdditionalFeatures ,TGLSpecificFeatures); + list TGLFeatures = + !listconcat(ICLFeatures, TGLInheritableFeatures ); + // Atom list AtomInheritableFeatures = [FeatureX87, FeatureCMPXCHG8B, @@ -707,7 +731,6 @@ def ProcessorFeatures { // Goldmont list GLMAdditionalFeatures = [FeatureAES, - FeatureMPX, FeatureSHA, FeatureRDSEED, FeatureXSAVE, @@ -786,6 +809,22 @@ def ProcessorFeatures { list KNMFeatures = !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]); + // Barcelona + list BarcelonaInheritableFeatures = [FeatureX87, + FeatureCMPXCHG8B, + FeatureSSE4A, + Feature3DNowA, + FeatureFXSR, + FeatureNOPL, + FeatureCMPXCHG16B, + FeatureLZCNT, + FeaturePOPCNT, + FeatureSlowSHLD, + FeatureLAHFSAHF, + FeatureCMOV, + Feature64Bit, + FeatureFastScalarShiftMasks]; + list BarcelonaFeatures = BarcelonaInheritableFeatures; // Bobcat list BtVer1InheritableFeatures = [FeatureX87, @@ -1093,6 +1132,8 @@ def : ProcessorModel<"icelake-client", SkylakeServerModel, ProcessorFeatures.ICLFeatures>; def : ProcessorModel<"icelake-server", SkylakeServerModel, ProcessorFeatures.ICXFeatures>; +def : ProcessorModel<"tigerlake", SkylakeServerModel, + ProcessorFeatures.TGLFeatures>; // AMD CPUs. @@ -1129,10 +1170,7 @@ foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { } foreach P = ["amdfam10", "barcelona"] in { - def : Proc; + def : Proc; } // Bobcat diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 80120722e0e..8d27be30a27 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -242,7 +242,7 @@ void X86AsmPrinter::PrintModifiedOperand(const MachineInstr *MI, unsigned OpNo, return PrintOperand(MI, OpNo, O); if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT) O << '%'; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (strncmp(Modifier, "subreg", strlen("subreg")) == 0) { unsigned Size = (strcmp(Modifier+6,"64") == 0) ? 64 : (strcmp(Modifier+6,"32") == 0) ? 32 : @@ -388,7 +388,7 @@ void X86AsmPrinter::PrintIntelMemReference(const MachineInstr *MI, static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO, char Mode, raw_ostream &O) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); bool EmitPercent = true; if (!X86::GR8RegClass.contains(Reg) && @@ -575,7 +575,7 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) { // Emitting note header. int WordSize = TT.isArch64Bit() ? 8 : 4; - EmitAlignment(WordSize == 4 ? 2 : 3); + EmitAlignment(WordSize == 4 ? Align(4) : Align(8)); OutStreamer->EmitIntValue(4, 4 /*size*/); // data size for "GNU\0" OutStreamer->EmitIntValue(8 + WordSize, 4 /*size*/); // Elf_Prop size OutStreamer->EmitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4 /*size*/); @@ -585,7 +585,7 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) { OutStreamer->EmitIntValue(ELF::GNU_PROPERTY_X86_FEATURE_1_AND, 4); OutStreamer->EmitIntValue(4, 4); // data size OutStreamer->EmitIntValue(FeatureFlagsAnd, 4); // data - EmitAlignment(WordSize == 4 ? 2 : 3); // padding + EmitAlignment(WordSize == 4 ? Align(4) : Align(8)); // padding OutStreamer->endSection(Nt); OutStreamer->SwitchSection(Cur); diff --git a/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp index 3dcc1015dc7..69c6b3356cb 100644 --- a/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp +++ b/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp @@ -35,6 +35,7 @@ #include "X86InstrInfo.h" #include "X86Subtarget.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -390,7 +391,7 @@ void X86AvoidSFBPass::buildCopy(MachineInstr *LoadInst, unsigned NLoadOpcode, MachineMemOperand *LMMO = *LoadInst->memoperands_begin(); MachineMemOperand *SMMO = *StoreInst->memoperands_begin(); - unsigned Reg1 = MRI->createVirtualRegister( + Register Reg1 = MRI->createVirtualRegister( TII->getRegClass(TII->get(NLoadOpcode), 0, TRI, *(MBB->getParent()))); MachineInstr *NewLoad = BuildMI(*MBB, LoadInst, LoadInst->getDebugLoc(), TII->get(NLoadOpcode), diff --git a/lib/Target/X86/X86AvoidTrailingCall.cpp b/lib/Target/X86/X86AvoidTrailingCall.cpp new file mode 100644 index 00000000000..fb4f9e2901d --- /dev/null +++ b/lib/Target/X86/X86AvoidTrailingCall.cpp @@ -0,0 +1,108 @@ +//===----- X86AvoidTrailingCall.cpp - Insert int3 after trailing calls ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The Windows x64 unwinder has trouble unwinding the stack when a return +// address points to the end of the function. This pass maintains the invariant +// that every return address is inside the bounds of its parent function or +// funclet by inserting int3 if the last instruction would otherwise be a call. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +#define DEBUG_TYPE "x86-avoid-trailing-call" + +using namespace llvm; + +namespace { + +class X86AvoidTrailingCallPass : public MachineFunctionPass { +public: + X86AvoidTrailingCallPass() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + StringRef getPassName() const override { + return "X86 avoid trailing call pass"; + } + static char ID; +}; + +char X86AvoidTrailingCallPass::ID = 0; + +} // end anonymous namespace + +FunctionPass *llvm::createX86AvoidTrailingCallPass() { + return new X86AvoidTrailingCallPass(); +} + +// A real instruction is a non-meta, non-pseudo instruction. Some pseudos +// expand to nothing, and some expand to code. This logic conservatively assumes +// they might expand to nothing. +static bool isRealInstruction(MachineInstr &MI) { + return !MI.isPseudo() && !MI.isMetaInstruction(); +} + +// Return true if this is a call instruction, but not a tail call. +static bool isCallInstruction(const MachineInstr &MI) { + return MI.isCall() && !MI.isReturn(); +} + +bool X86AvoidTrailingCallPass::runOnMachineFunction(MachineFunction &MF) { + const X86Subtarget &STI = MF.getSubtarget(); + const X86InstrInfo &TII = *STI.getInstrInfo(); + assert(STI.isTargetWin64() && "pass only runs on Win64"); + + // FIXME: Perhaps this pass should also replace SEH_Epilogue by inserting nops + // before epilogues. + + bool Changed = false; + for (MachineBasicBlock &MBB : MF) { + // Look for basic blocks that precede funclet entries or are at the end of + // the function. + MachineBasicBlock *NextMBB = MBB.getNextNode(); + if (NextMBB && !NextMBB->isEHFuncletEntry()) + continue; + + // Find the last real instruction in this block, or previous blocks if this + // block is empty. + MachineBasicBlock::reverse_iterator LastRealInstr; + for (MachineBasicBlock &RMBB : + make_range(MBB.getReverseIterator(), MF.rend())) { + LastRealInstr = llvm::find_if(reverse(RMBB), isRealInstruction); + if (LastRealInstr != RMBB.rend()) + break; + } + + // Do nothing if this function or funclet has no instructions. + if (LastRealInstr == MF.begin()->rend()) + continue; + + // If this is a call instruction, insert int3 right after it with the same + // DebugLoc. Convert back to a forward iterator and advance the insertion + // position once. + if (isCallInstruction(*LastRealInstr)) { + LLVM_DEBUG({ + dbgs() << "inserting int3 after trailing call instruction:\n"; + LastRealInstr->dump(); + dbgs() << '\n'; + }); + + MachineBasicBlock::iterator MBBI = std::next(LastRealInstr.getReverse()); + BuildMI(*LastRealInstr->getParent(), MBBI, LastRealInstr->getDebugLoc(), + TII.get(X86::INT3)); + Changed = true; + } + } + + return Changed; +} diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp index 4df849a2e14..ad7e32b4efc 100644 --- a/lib/Target/X86/X86CallFrameOptimization.cpp +++ b/lib/Target/X86/X86CallFrameOptimization.cpp @@ -155,12 +155,22 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) { // This is bad, and breaks SP adjustment. // So, check that all of the frames in the function are closed inside // the same block, and, for good measure, that there are no nested frames. + // + // If any call allocates more argument stack memory than the stack + // probe size, don't do this optimization. Otherwise, this pass + // would need to synthesize additional stack probe calls to allocate + // memory for arguments. unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode(); unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); + bool UseStackProbe = + !STI->getTargetLowering()->getStackProbeSymbolName(MF).empty(); + unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF); for (MachineBasicBlock &BB : MF) { bool InsideFrameSequence = false; for (MachineInstr &MI : BB) { if (MI.getOpcode() == FrameSetupOpcode) { + if (TII->getFrameSize(MI) >= StackProbeSize && UseStackProbe) + return false; if (InsideFrameSequence) return false; InsideFrameSequence = true; @@ -325,8 +335,8 @@ X86CallFrameOptimization::classifyInstruction( for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; - unsigned int Reg = MO.getReg(); - if (!RegInfo.isPhysicalRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; if (RegInfo.regsOverlap(Reg, RegInfo.getStackRegister())) return Exit; @@ -370,7 +380,7 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, while (I->getOpcode() == X86::LEA32r || I->isDebugInstr()) ++I; - unsigned StackPtr = RegInfo.getStackRegister(); + Register StackPtr = RegInfo.getStackRegister(); auto StackPtrCopyInst = MBB.end(); // SelectionDAG (but not FastISel) inserts a copy of ESP into a virtual // register. If it's there, use that virtual register as stack pointer @@ -443,8 +453,8 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, for (const MachineOperand &MO : I->uses()) { if (!MO.isReg()) continue; - unsigned int Reg = MO.getReg(); - if (RegInfo.isPhysicalRegister(Reg)) + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) UsedRegs.insert(Reg); } } @@ -524,12 +534,12 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, break; case X86::MOV32mr: case X86::MOV64mr: { - unsigned int Reg = PushOp.getReg(); + Register Reg = PushOp.getReg(); // If storing a 32-bit vreg on 64-bit targets, extend to a 64-bit vreg // in preparation for the PUSH64. The upper 32 bits can be undef. if (Is64Bit && Store->getOpcode() == X86::MOV32mr) { - unsigned UndefReg = MRI->createVirtualRegister(&X86::GR64RegClass); + Register UndefReg = MRI->createVirtualRegister(&X86::GR64RegClass); Reg = MRI->createVirtualRegister(&X86::GR64RegClass); BuildMI(MBB, Context.Call, DL, TII->get(X86::IMPLICIT_DEF), UndefReg); BuildMI(MBB, Context.Call, DL, TII->get(X86::INSERT_SUBREG), Reg) @@ -598,7 +608,7 @@ MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush( // movl %eax, (%esp) // call // Get rid of those with prejudice. - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return nullptr; // Make sure this is the only use of Reg. diff --git a/lib/Target/X86/X86CallLowering.cpp b/lib/Target/X86/X86CallLowering.cpp index b16b3839c85..7ee637cfd52 100644 --- a/lib/Target/X86/X86CallLowering.cpp +++ b/lib/Target/X86/X86CallLowering.cpp @@ -102,6 +102,8 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { DL(MIRBuilder.getMF().getDataLayout()), STI(MIRBuilder.getMF().getSubtarget()) {} + bool isIncomingArgumentHandler() const override { return false; } + Register getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { LLT p0 = LLT::pointer(0, DL.getPointerSizeInBits(0)); @@ -155,8 +157,9 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, - const CallLowering::ArgInfo &Info, CCState &State) override { - bool Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); + const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, + CCState &State) override { + bool Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); StackSize = State.getNextStackOffset(); static const MCPhysReg XMMArgRegs[] = {X86::XMM0, X86::XMM1, X86::XMM2, @@ -229,7 +232,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { : ValueHandler(MIRBuilder, MRI, AssignFn), DL(MIRBuilder.getMF().getDataLayout()) {} - bool isArgumentHandler() const override { return true; } + bool isIncomingArgumentHandler() const override { return true; } Register getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { @@ -237,7 +240,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { int FI = MFI.CreateFixedObject(Size, Offset, true); MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); - unsigned AddrReg = MRI.createGenericVirtualRegister( + Register AddrReg = MRI.createGenericVirtualRegister( LLT::pointer(0, DL.getPointerSizeInBits(0))); MIRBuilder.buildFrameIndex(AddrReg, FI); return AddrReg; @@ -301,6 +304,7 @@ struct FormalArgHandler : public IncomingValueHandler { : IncomingValueHandler(MIRBuilder, MRI, AssignFn) {} void markPhysRegUsed(unsigned PhysReg) override { + MIRBuilder.getMRI()->addLiveIn(PhysReg); MIRBuilder.getMBB().addLiveIn(PhysReg); } }; @@ -372,10 +376,7 @@ bool X86CallLowering::lowerFormalArguments( } bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, - CallingConv::ID CallConv, - const MachineOperand &Callee, - const ArgInfo &OrigRet, - ArrayRef OrigArgs) const { + CallLoweringInfo &Info) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -385,8 +386,8 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, auto TRI = STI.getRegisterInfo(); // Handle only Linux C, X86_64_SysV calling conventions for now. - if (!STI.isTargetLinux() || - !(CallConv == CallingConv::C || CallConv == CallingConv::X86_64_SysV)) + if (!STI.isTargetLinux() || !(Info.CallConv == CallingConv::C || + Info.CallConv == CallingConv::X86_64_SysV)) return false; unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); @@ -395,18 +396,19 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // Create a temporarily-floating call instruction so we can add the implicit // uses of arg registers. bool Is64Bit = STI.is64Bit(); - unsigned CallOpc = Callee.isReg() + unsigned CallOpc = Info.Callee.isReg() ? (Is64Bit ? X86::CALL64r : X86::CALL32r) : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32); - auto MIB = MIRBuilder.buildInstrNoInsert(CallOpc).add(Callee).addRegMask( - TRI->getCallPreservedMask(MF, CallConv)); + auto MIB = MIRBuilder.buildInstrNoInsert(CallOpc) + .add(Info.Callee) + .addRegMask(TRI->getCallPreservedMask(MF, Info.CallConv)); SmallVector SplitArgs; - for (const auto &OrigArg : OrigArgs) { + for (const auto &OrigArg : Info.OrigArgs) { // TODO: handle not simple cases. - if (OrigArg.Flags.isByVal()) + if (OrigArg.Flags[0].isByVal()) return false; if (OrigArg.Regs.size() > 1) @@ -423,8 +425,8 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) return false; - bool IsFixed = OrigArgs.empty() ? true : OrigArgs.back().IsFixed; - if (STI.is64Bit() && !IsFixed && !STI.isCallingConvWin64(CallConv)) { + bool IsFixed = Info.OrigArgs.empty() ? true : Info.OrigArgs.back().IsFixed; + if (STI.is64Bit() && !IsFixed && !STI.isCallingConvWin64(Info.CallConv)) { // From AMD64 ABI document: // For calls that may call functions that use varargs or stdargs // (prototype-less calls or calls to functions containing ellipsis (...) in @@ -445,23 +447,24 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // If Callee is a reg, since it is used by a target specific // instruction, it must have a register class matching the // constraint of that instruction. - if (Callee.isReg()) + if (Info.Callee.isReg()) MIB->getOperand(0).setReg(constrainOperandRegClass( MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), - *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Callee, 0)); + *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(), Info.Callee, + 0)); // Finally we can copy the returned value back into its virtual-register. In // symmetry with the arguments, the physical register must be an // implicit-define of the call instruction. - if (!OrigRet.Ty->isVoidTy()) { - if (OrigRet.Regs.size() > 1) + if (!Info.OrigRet.Ty->isVoidTy()) { + if (Info.OrigRet.Regs.size() > 1) return false; SplitArgs.clear(); SmallVector NewRegs; - if (!splitToValueTypes(OrigRet, SplitArgs, DL, MRI, + if (!splitToValueTypes(Info.OrigRet, SplitArgs, DL, MRI, [&](ArrayRef Regs) { NewRegs.assign(Regs.begin(), Regs.end()); })) @@ -472,7 +475,7 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, return false; if (!NewRegs.empty()) - MIRBuilder.buildMerge(OrigRet.Regs[0], NewRegs); + MIRBuilder.buildMerge(Info.OrigRet.Regs[0], NewRegs); } CallSeqStart.addImm(Handler.getStackSize()) diff --git a/lib/Target/X86/X86CallLowering.h b/lib/Target/X86/X86CallLowering.h index 0445331bc3f..444a0c7d012 100644 --- a/lib/Target/X86/X86CallLowering.h +++ b/lib/Target/X86/X86CallLowering.h @@ -34,9 +34,8 @@ public: bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef> VRegs) const override; - bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, - const MachineOperand &Callee, const ArgInfo &OrigRet, - ArrayRef OrigArgs) const override; + bool lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const override; private: /// A function of this type is used to perform value split action. diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 1c3034a5116..4c49d68bec9 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -433,6 +433,7 @@ defm X86_SysV64_RegCall : def RetCC_X86_32 : CallingConv<[ // If FastCC, use RetCC_X86_32_Fast. CCIfCC<"CallingConv::Fast", CCDelegateTo>, + CCIfCC<"CallingConv::Tail", CCDelegateTo>, // If HiPE, use RetCC_X86_32_HiPE. CCIfCC<"CallingConv::HiPE", CCDelegateTo>, CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo>, @@ -1000,6 +1001,7 @@ def CC_X86_32 : CallingConv<[ CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo>, CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo>, CCIfCC<"CallingConv::Fast", CCDelegateTo>, + CCIfCC<"CallingConv::Tail", CCDelegateTo>, CCIfCC<"CallingConv::GHC", CCDelegateTo>, CCIfCC<"CallingConv::HiPE", CCDelegateTo>, CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo>, diff --git a/lib/Target/X86/X86CmovConversion.cpp b/lib/Target/X86/X86CmovConversion.cpp index a61fa3246f0..5123853f545 100644 --- a/lib/Target/X86/X86CmovConversion.cpp +++ b/lib/Target/X86/X86CmovConversion.cpp @@ -436,8 +436,8 @@ bool X86CmovConverterPass::checkForProfitableCmovCandidates( // Checks for "isUse()" as "uses()" returns also implicit definitions. if (!MO.isReg() || !MO.isUse()) continue; - unsigned Reg = MO.getReg(); - auto &RDM = RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)]; + Register Reg = MO.getReg(); + auto &RDM = RegDefMaps[Register::isVirtualRegister(Reg)]; if (MachineInstr *DefMI = RDM.lookup(Reg)) { OperandToDefMap[&MO] = DefMI; DepthInfo Info = DepthMap.lookup(DefMI); @@ -456,8 +456,8 @@ bool X86CmovConverterPass::checkForProfitableCmovCandidates( for (auto &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - RegDefMaps[TargetRegisterInfo::isVirtualRegister(Reg)][Reg] = &MI; + Register Reg = MO.getReg(); + RegDefMaps[Register::isVirtualRegister(Reg)][Reg] = &MI; } unsigned Latency = TSchedModel.computeInstrLatency(&MI); @@ -710,7 +710,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches( // Skip any CMOVs in this group which don't load from memory. if (!MI.mayLoad()) { // Remember the false-side register input. - unsigned FalseReg = + Register FalseReg = MI.getOperand(X86::getCondFromCMov(MI) == CC ? 1 : 2).getReg(); // Walk back through any intermediate cmovs referenced. while (true) { @@ -753,7 +753,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches( // Get a fresh register to use as the destination of the MOV. const TargetRegisterClass *RC = MRI->getRegClass(MI.getOperand(0).getReg()); - unsigned TmpReg = MRI->createVirtualRegister(RC); + Register TmpReg = MRI->createVirtualRegister(RC); SmallVector NewMIs; bool Unfolded = TII->unfoldMemoryOperand(*MBB->getParent(), MI, TmpReg, @@ -810,9 +810,9 @@ void X86CmovConverterPass::convertCmovInstsToBranches( DenseMap> RegRewriteTable; for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { - unsigned DestReg = MIIt->getOperand(0).getReg(); - unsigned Op1Reg = MIIt->getOperand(1).getReg(); - unsigned Op2Reg = MIIt->getOperand(2).getReg(); + Register DestReg = MIIt->getOperand(0).getReg(); + Register Op1Reg = MIIt->getOperand(1).getReg(); + Register Op2Reg = MIIt->getOperand(2).getReg(); // If this CMOV we are processing is the opposite condition from the jump we // generated, then we have to swap the operands for the PHI that is going to diff --git a/lib/Target/X86/X86CondBrFolding.cpp b/lib/Target/X86/X86CondBrFolding.cpp index 9dea94f1368..1bf2d5ba7b8 100644 --- a/lib/Target/X86/X86CondBrFolding.cpp +++ b/lib/Target/X86/X86CondBrFolding.cpp @@ -564,7 +564,7 @@ X86CondBrFolding::analyzeMBB(MachineBasicBlock &MBB) { Modified = false; break; } - return llvm::make_unique(TargetMBBInfo{ + return std::make_unique(TargetMBBInfo{ TBB, FBB, BrInstr, CmpInstr, CC, SrcReg, CmpValue, Modified, CmpBrOnly}); } diff --git a/lib/Target/X86/X86DomainReassignment.cpp b/lib/Target/X86/X86DomainReassignment.cpp index 18bbfa32e11..b4cf5cafbc6 100644 --- a/lib/Target/X86/X86DomainReassignment.cpp +++ b/lib/Target/X86/X86DomainReassignment.cpp @@ -182,7 +182,7 @@ public: MachineBasicBlock *MBB = MI->getParent(); auto &DL = MI->getDebugLoc(); - unsigned Reg = MRI->createVirtualRegister( + Register Reg = MRI->createVirtualRegister( TII->getRegClass(TII->get(DstOpcode), 0, MRI->getTargetRegisterInfo(), *MBB->getParent())); MachineInstrBuilder Bld = BuildMI(*MBB, MI, DL, TII->get(DstOpcode), Reg); @@ -219,13 +219,13 @@ public: // Don't allow copies to/flow GR8/GR16 physical registers. // FIXME: Is there some better way to support this? - unsigned DstReg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg) && + Register DstReg = MI->getOperand(0).getReg(); + if (Register::isPhysicalRegister(DstReg) && (X86::GR8RegClass.contains(DstReg) || X86::GR16RegClass.contains(DstReg))) return false; - unsigned SrcReg = MI->getOperand(1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && + Register SrcReg = MI->getOperand(1).getReg(); + if (Register::isPhysicalRegister(SrcReg) && (X86::GR8RegClass.contains(SrcReg) || X86::GR16RegClass.contains(SrcReg))) return false; @@ -241,7 +241,7 @@ public: // Physical registers will not be converted. Assume that converting the // COPY to the destination domain will eventually result in a actual // instruction. - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + if (Register::isPhysicalRegister(MO.getReg())) return 1; RegDomain OpDomain = getDomain(MRI->getRegClass(MO.getReg()), @@ -436,7 +436,7 @@ void X86DomainReassignment::visitRegister(Closure &C, unsigned Reg, if (EnclosedEdges.count(Reg)) return; - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!Register::isVirtualRegister(Reg)) return; if (!MRI->hasOneDef(Reg)) @@ -593,8 +593,8 @@ void X86DomainReassignment::buildClosure(Closure &C, unsigned Reg) { if (!DefOp.isReg()) continue; - unsigned DefReg = DefOp.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DefReg)) { + Register DefReg = DefOp.getReg(); + if (!Register::isVirtualRegister(DefReg)) { C.setAllIllegal(); continue; } @@ -751,7 +751,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { // Go over all virtual registers and calculate a closure. unsigned ClosureID = 0; for (unsigned Idx = 0; Idx < MRI->getNumVirtRegs(); ++Idx) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(Idx); + unsigned Reg = Register::index2VirtReg(Idx); // GPR only current source domain supported. if (!isGPR(MRI->getRegClass(Reg))) diff --git a/lib/Target/X86/X86EvexToVex.cpp b/lib/Target/X86/X86EvexToVex.cpp index 58680f1815b..24c8e6d6f6e 100755 --- a/lib/Target/X86/X86EvexToVex.cpp +++ b/lib/Target/X86/X86EvexToVex.cpp @@ -131,7 +131,7 @@ static bool usesExtendedRegister(const MachineInstr &MI) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); assert(!(Reg >= X86::ZMM0 && Reg <= X86::ZMM31) && "ZMM instructions should not be in the EVEX->VEX tables"); diff --git a/lib/Target/X86/X86ExpandPseudo.cpp b/lib/Target/X86/X86ExpandPseudo.cpp index b8624b40f2f..9126a1fbea5 100644 --- a/lib/Target/X86/X86ExpandPseudo.cpp +++ b/lib/Target/X86/X86ExpandPseudo.cpp @@ -194,7 +194,8 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case X86::TCRETURNmi64: { bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64; MachineOperand &JumpTarget = MBBI->getOperand(0); - MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); + MachineOperand &StackAdjust = MBBI->getOperand(isMem ? X86::AddrNumOperands + : 1); assert(StackAdjust.isImm() && "Expecting immediate value."); // Adjust stack pointer. @@ -259,7 +260,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ? X86::TAILJMPm : (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op)); - for (unsigned i = 0; i != 5; ++i) + for (unsigned i = 0; i != X86::AddrNumOperands; ++i) MIB.add(MBBI->getOperand(i)); } else if (Opcode == X86::TCRETURNri64) { JumpTarget.setIsKill(); @@ -274,7 +275,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MachineInstr &NewMI = *std::prev(MBBI); NewMI.copyImplicitOps(*MBBI->getParent()->getParent(), *MBBI); - MBB.getParent()->updateCallSiteInfo(&*MBBI, &NewMI); + MBB.getParent()->moveCallSiteInfo(&*MBBI, &NewMI); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); @@ -287,7 +288,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, assert(DestAddr.isReg() && "Offset should be in register!"); const bool Uses64BitFramePtr = STI->isTarget64BitLP64() || STI->isTargetNaCl64(); - unsigned StackPtr = TRI->getStackRegister(); + Register StackPtr = TRI->getStackRegister(); BuildMI(MBB, MBBI, DL, TII->get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), StackPtr) .addReg(DestAddr.getReg()); @@ -347,7 +348,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // actualcmpxchg Addr // [E|R]BX = SaveRbx const MachineOperand &InArg = MBBI->getOperand(6); - unsigned SaveRbx = MBBI->getOperand(7).getReg(); + Register SaveRbx = MBBI->getOperand(7).getReg(); unsigned ActualInArg = Opcode == X86::LCMPXCHG8B_SAVE_EBX ? X86::EBX : X86::RBX; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 7b9ce027120..e5e089d07d5 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1160,6 +1160,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { CallingConv::ID CC = F.getCallingConv(); if (CC != CallingConv::C && CC != CallingConv::Fast && + CC != CallingConv::Tail && CC != CallingConv::X86_FastCall && CC != CallingConv::X86_StdCall && CC != CallingConv::X86_ThisCall && @@ -1173,7 +1174,8 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. - if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) + if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) || + CC == CallingConv::Tail) return false; // Let SDISel handle vararg functions. @@ -1241,7 +1243,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { } // Make the copy. - unsigned DstReg = VA.getLocReg(); + Register DstReg = VA.getLocReg(); const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); // Avoid a cross-class copy. This is very unlikely. if (!SrcRC->contains(DstReg)) @@ -3157,7 +3159,7 @@ static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, if (Subtarget->getTargetTriple().isOSMSVCRT()) return 0; if (CC == CallingConv::Fast || CC == CallingConv::GHC || - CC == CallingConv::HiPE) + CC == CallingConv::HiPE || CC == CallingConv::Tail) return 0; if (CS) @@ -3208,6 +3210,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { default: return false; case CallingConv::C: case CallingConv::Fast: + case CallingConv::Tail: case CallingConv::WebKit_JS: case CallingConv::Swift: case CallingConv::X86_FastCall: @@ -3224,7 +3227,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. - if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) + if ((CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) || + CC == CallingConv::Tail) return false; // Don't know how to handle Win64 varargs yet. Nothing special needed for @@ -3387,6 +3391,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { case CCValAssign::SExtUpper: case CCValAssign::ZExtUpper: case CCValAssign::FPExt: + case CCValAssign::Trunc: llvm_unreachable("Unexpected loc info!"); case CCValAssign::Indirect: // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully @@ -3547,7 +3552,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { CCValAssign &VA = RVLocs[i]; EVT CopyVT = VA.getValVT(); unsigned CopyReg = ResultReg + i; - unsigned SrcReg = VA.getLocReg(); + Register SrcReg = VA.getLocReg(); // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && diff --git a/lib/Target/X86/X86FixupBWInsts.cpp b/lib/Target/X86/X86FixupBWInsts.cpp index bf541d93379..9f7c4afde76 100644 --- a/lib/Target/X86/X86FixupBWInsts.cpp +++ b/lib/Target/X86/X86FixupBWInsts.cpp @@ -80,7 +80,7 @@ class FixupBWInstPass : public MachineFunctionPass { /// destination register of the MachineInstr passed in. It returns true if /// that super register is dead just prior to \p OrigMI, and false if not. bool getSuperRegDestIfDead(MachineInstr *OrigMI, - unsigned &SuperDestReg) const; + Register &SuperDestReg) const; /// Change the MachineInstr \p MI into the equivalent extending load to 32 bit /// register if it is safe to do so. Return the replacement instruction if @@ -92,6 +92,12 @@ class FixupBWInstPass : public MachineFunctionPass { /// nullptr. MachineInstr *tryReplaceCopy(MachineInstr *MI) const; + /// Change the MachineInstr \p MI into the equivalent extend to 32 bit + /// register if it is safe to do so. Return the replacement instruction if + /// OK, otherwise return nullptr. + MachineInstr *tryReplaceExtend(unsigned New32BitOpcode, + MachineInstr *MI) const; + // Change the MachineInstr \p MI into an eqivalent 32 bit instruction if // possible. Return the replacement instruction if OK, return nullptr // otherwise. @@ -169,10 +175,10 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) { /// /// If so, return that super register in \p SuperDestReg. bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI, - unsigned &SuperDestReg) const { + Register &SuperDestReg) const { auto *TRI = &TII->getRegisterInfo(); - unsigned OrigDestReg = OrigMI->getOperand(0).getReg(); + Register OrigDestReg = OrigMI->getOperand(0).getReg(); SuperDestReg = getX86SubSuperRegister(OrigDestReg, 32); const auto SubRegIdx = TRI->getSubRegIndex(SuperDestReg, OrigDestReg); @@ -232,12 +238,12 @@ bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI, // %ax = KILL %ax, implicit killed %eax // RET 0, %ax unsigned Opc = OrigMI->getOpcode(); (void)Opc; - // These are the opcodes currently handled by the pass, if something - // else will be added we need to ensure that new opcode has the same - // properties. - assert((Opc == X86::MOV8rm || Opc == X86::MOV16rm || Opc == X86::MOV8rr || - Opc == X86::MOV16rr) && - "Unexpected opcode."); + // These are the opcodes currently known to work with the code below, if + // something // else will be added we need to ensure that new opcode has the + // same properties. + if (Opc != X86::MOV8rm && Opc != X86::MOV16rm && Opc != X86::MOV8rr && + Opc != X86::MOV16rr) + return false; bool IsDefined = false; for (auto &MO: OrigMI->implicit_operands()) { @@ -247,7 +253,7 @@ bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI, assert((MO.isDef() || MO.isUse()) && "Expected Def or Use only!"); if (MO.isDef() && TRI->isSuperRegisterEq(OrigDestReg, MO.getReg())) - IsDefined = true; + IsDefined = true; // If MO is a use of any part of the destination register but is not equal // to OrigDestReg or one of its subregisters, we cannot use SuperDestReg. @@ -268,7 +274,7 @@ bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI, MachineInstr *FixupBWInstPass::tryReplaceLoad(unsigned New32BitOpcode, MachineInstr *MI) const { - unsigned NewDestReg; + Register NewDestReg; // We are going to try to rewrite this load to a larger zero-extending // load. This is safe if all portions of the 32 bit super-register @@ -295,11 +301,11 @@ MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const { auto &OldDest = MI->getOperand(0); auto &OldSrc = MI->getOperand(1); - unsigned NewDestReg; + Register NewDestReg; if (!getSuperRegDestIfDead(MI, NewDestReg)) return nullptr; - unsigned NewSrcReg = getX86SubSuperRegister(OldSrc.getReg(), 32); + Register NewSrcReg = getX86SubSuperRegister(OldSrc.getReg(), 32); // This is only correct if we access the same subregister index: otherwise, // we could try to replace "movb %ah, %al" with "movl %eax, %eax". @@ -326,6 +332,33 @@ MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const { return MIB; } +MachineInstr *FixupBWInstPass::tryReplaceExtend(unsigned New32BitOpcode, + MachineInstr *MI) const { + Register NewDestReg; + if (!getSuperRegDestIfDead(MI, NewDestReg)) + return nullptr; + + // Don't interfere with formation of CBW instructions which should be a + // shorter encoding than even the MOVSX32rr8. It's also immunte to partial + // merge issues on Intel CPUs. + if (MI->getOpcode() == X86::MOVSX16rr8 && + MI->getOperand(0).getReg() == X86::AX && + MI->getOperand(1).getReg() == X86::AL) + return nullptr; + + // Safe to change the instruction. + MachineInstrBuilder MIB = + BuildMI(*MF, MI->getDebugLoc(), TII->get(New32BitOpcode), NewDestReg); + + unsigned NumArgs = MI->getNumOperands(); + for (unsigned i = 1; i < NumArgs; ++i) + MIB.add(MI->getOperand(i)); + + MIB.setMemRefs(MI->memoperands()); + + return MIB; +} + MachineInstr *FixupBWInstPass::tryReplaceInstr(MachineInstr *MI, MachineBasicBlock &MBB) const { // See if this is an instruction of the type we are currently looking for. @@ -355,6 +388,15 @@ MachineInstr *FixupBWInstPass::tryReplaceInstr(MachineInstr *MI, // of the register. return tryReplaceCopy(MI); + case X86::MOVSX16rr8: + return tryReplaceExtend(X86::MOVSX32rr8, MI); + case X86::MOVSX16rm8: + return tryReplaceExtend(X86::MOVSX32rm8, MI); + case X86::MOVZX16rr8: + return tryReplaceExtend(X86::MOVZX32rr8, MI); + case X86::MOVZX16rm8: + return tryReplaceExtend(X86::MOVZX32rm8, MI); + default: // nothing to do here. break; diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp index 041529a0be6..543dc8b00fa 100644 --- a/lib/Target/X86/X86FixupLEAs.cpp +++ b/lib/Target/X86/X86FixupLEAs.cpp @@ -67,8 +67,8 @@ class FixupLEAPass : public MachineFunctionPass { /// - LEA that uses RIP relative addressing mode /// - LEA that uses 16-bit addressing mode " /// This function currently handles the first 2 cases only. - MachineInstr *processInstrForSlow3OpLEA(MachineInstr &MI, - MachineBasicBlock &MBB); + void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, + MachineBasicBlock &MBB, bool OptIncDec); /// Look for LEAs that are really two address LEAs that we might be able to /// turn into regular ADD instructions. @@ -216,14 +216,10 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) { if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP)) continue; - if (IsSlowLEA) { + if (IsSlowLEA) processInstructionForSlowLEA(I, MBB); - } else if (IsSlow3OpsLEA) { - if (auto *NewMI = processInstrForSlow3OpLEA(*I, MBB)) { - MBB.erase(I); - I = NewMI; - } - } + else if (IsSlow3OpsLEA) + processInstrForSlow3OpLEA(I, MBB, OptIncDec); } // Second pass for creating LEAs. This may reverse some of the @@ -301,18 +297,14 @@ static inline bool isInefficientLEAReg(unsigned Reg) { Reg == X86::R13D || Reg == X86::R13; } -static inline bool isRegOperand(const MachineOperand &Op) { - return Op.isReg() && Op.getReg() != X86::NoRegister; -} - /// Returns true if this LEA uses base an index registers, and the base register /// is known to be inefficient for the subtarget. // TODO: use a variant scheduling class to model the latency profile // of LEA instructions, and implement this logic as a scheduling predicate. static inline bool hasInefficientLEABaseReg(const MachineOperand &Base, const MachineOperand &Index) { - return Base.isReg() && isInefficientLEAReg(Base.getReg()) && - isRegOperand(Index); + return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() && + Index.getReg() != X86::NoRegister; } static inline bool hasLEAOffset(const MachineOperand &Offset) { @@ -372,9 +364,9 @@ bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I, !TII->isSafeToClobberEFLAGS(MBB, I)) return false; - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned BaseReg = Base.getReg(); - unsigned IndexReg = Index.getReg(); + Register DestReg = MI.getOperand(0).getReg(); + Register BaseReg = Base.getReg(); + Register IndexReg = Index.getReg(); // Don't change stack adjustment LEAs. if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP)) @@ -500,9 +492,9 @@ void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I, if (Segment.getReg() != 0 || !Offset.isImm() || !TII->isSafeToClobberEFLAGS(MBB, I)) return; - const unsigned DstR = Dst.getReg(); - const unsigned SrcR1 = Base.getReg(); - const unsigned SrcR2 = Index.getReg(); + const Register DstR = Dst.getReg(); + const Register SrcR1 = Base.getReg(); + const Register SrcR2 = Index.getReg(); if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR)) return; if (Scale.getImm() > 1) @@ -534,111 +526,150 @@ void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I, } } -MachineInstr * -FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI, - MachineBasicBlock &MBB) { +void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I, + MachineBasicBlock &MBB, + bool OptIncDec) { + MachineInstr &MI = *I; const unsigned LEAOpcode = MI.getOpcode(); - const MachineOperand &Dst = MI.getOperand(0); + const MachineOperand &Dest = MI.getOperand(0); const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg); const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt); const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg); const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp); const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg); - if (!(TII->isThreeOperandsLEA(MI) || - hasInefficientLEABaseReg(Base, Index)) || + if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) || !TII->isSafeToClobberEFLAGS(MBB, MI) || Segment.getReg() != X86::NoRegister) - return nullptr; + return; + + Register DestReg = Dest.getReg(); + Register BaseReg = Base.getReg(); + Register IndexReg = Index.getReg(); + + if (MI.getOpcode() == X86::LEA64_32r) { + if (BaseReg != 0) + BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit); + if (IndexReg != 0) + IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit); + } - unsigned DstR = Dst.getReg(); - unsigned BaseR = Base.getReg(); - unsigned IndexR = Index.getReg(); - unsigned SSDstR = - (LEAOpcode == X86::LEA64_32r) ? getX86SubSuperRegister(DstR, 64) : DstR; bool IsScale1 = Scale.getImm() == 1; - bool IsInefficientBase = isInefficientLEAReg(BaseR); - bool IsInefficientIndex = isInefficientLEAReg(IndexR); + bool IsInefficientBase = isInefficientLEAReg(BaseReg); + bool IsInefficientIndex = isInefficientLEAReg(IndexReg); // Skip these cases since it takes more than 2 instructions // to replace the LEA instruction. - if (IsInefficientBase && SSDstR == BaseR && !IsScale1) - return nullptr; - if (LEAOpcode == X86::LEA64_32r && IsInefficientBase && - (IsInefficientIndex || !IsScale1)) - return nullptr; - - const DebugLoc DL = MI.getDebugLoc(); - const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(LEAOpcode)); - const MCInstrDesc &ADDri = TII->get(getADDriFromLEA(LEAOpcode, Offset)); + if (IsInefficientBase && DestReg == BaseReg && !IsScale1) + return; LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump();); LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";); + MachineInstr *NewMI = nullptr; + // First try to replace LEA with one or two (for the 3-op LEA case) // add instructions: // 1.lea (%base,%index,1), %base => add %index,%base // 2.lea (%base,%index,1), %index => add %base,%index - if (IsScale1 && (DstR == BaseR || DstR == IndexR)) { - const MachineOperand &Src = DstR == BaseR ? Index : Base; - MachineInstr *NewMI = - BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Src); + if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) { + unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); + if (DestReg != BaseReg) + std::swap(BaseReg, IndexReg); + + if (MI.getOpcode() == X86::LEA64_32r) { + // TODO: Do we need the super register implicit use? + NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) + .addReg(BaseReg) + .addReg(IndexReg) + .addReg(Base.getReg(), RegState::Implicit) + .addReg(Index.getReg(), RegState::Implicit); + } else { + NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) + .addReg(BaseReg) + .addReg(IndexReg); + } + } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) { + // If the base is inefficient try switching the index and base operands, + // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: + // lea offset(%base,%index,scale),%dst => + // lea (%base,%index,scale); add offset,%dst + NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) + .add(Dest) + .add(IsInefficientBase ? Index : Base) + .add(Scale) + .add(IsInefficientBase ? Base : Index) + .addImm(0) + .add(Segment); LLVM_DEBUG(NewMI->dump();); + } + + // If either replacement succeeded above, add the offset if needed, then + // replace the instruction. + if (NewMI) { // Create ADD instruction for the Offset in case of 3-Ops LEA. if (hasLEAOffset(Offset)) { - NewMI = BuildMI(MBB, MI, DL, ADDri, DstR).addReg(DstR).add(Offset); - LLVM_DEBUG(NewMI->dump();); + if (OptIncDec && Offset.isImm() && + (Offset.getImm() == 1 || Offset.getImm() == -1)) { + unsigned NewOpc = + getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1); + NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) + .addReg(DestReg); + LLVM_DEBUG(NewMI->dump();); + } else { + unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset); + NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg) + .addReg(DestReg) + .add(Offset); + LLVM_DEBUG(NewMI->dump();); + } } - return NewMI; - } - // If the base is inefficient try switching the index and base operands, - // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: - // lea offset(%base,%index,scale),%dst => - // lea (%base,%index,scale); add offset,%dst - if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) { - MachineInstr *NewMI = BuildMI(MBB, MI, DL, TII->get(LEAOpcode)) - .add(Dst) - .add(IsInefficientBase ? Index : Base) - .add(Scale) - .add(IsInefficientBase ? Base : Index) - .addImm(0) - .add(Segment); - LLVM_DEBUG(NewMI->dump();); - // Create ADD instruction for the Offset in case of 3-Ops LEA. - if (hasLEAOffset(Offset)) { - NewMI = BuildMI(MBB, MI, DL, ADDri, DstR).addReg(DstR).add(Offset); - LLVM_DEBUG(NewMI->dump();); - } - return NewMI; + + MBB.erase(I); + I = NewMI; + return; } + // Handle the rest of the cases with inefficient base register: - assert(SSDstR != BaseR && "SSDstR == BaseR should be handled already!"); + assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!"); assert(IsInefficientBase && "efficient base should be handled already!"); + // FIXME: Handle LEA64_32r. + if (LEAOpcode == X86::LEA64_32r) + return; + // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst if (IsScale1 && !hasLEAOffset(Offset)) { - bool BIK = Base.isKill() && BaseR != IndexR; - TII->copyPhysReg(MBB, MI, DL, DstR, BaseR, BIK); + bool BIK = Base.isKill() && BaseReg != IndexReg; + TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK); LLVM_DEBUG(MI.getPrevNode()->dump();); - MachineInstr *NewMI = - BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Index); + unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); + NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) + .addReg(DestReg) + .add(Index); LLVM_DEBUG(NewMI->dump();); - return NewMI; + return; } + // lea offset(%base,%index,scale), %dst => // lea offset( ,%index,scale), %dst; add %base,%dst - MachineInstr *NewMI = BuildMI(MBB, MI, DL, TII->get(LEAOpcode)) - .add(Dst) - .addReg(0) - .add(Scale) - .add(Index) - .add(Offset) - .add(Segment); + NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode)) + .add(Dest) + .addReg(0) + .add(Scale) + .add(Index) + .add(Offset) + .add(Segment); LLVM_DEBUG(NewMI->dump();); - NewMI = BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Base); + unsigned NewOpc = getADDrrFromLEA(MI.getOpcode()); + NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg) + .addReg(DestReg) + .add(Base); LLVM_DEBUG(NewMI->dump();); - return NewMI; + + MBB.erase(I); + I = NewMI; } diff --git a/lib/Target/X86/X86FixupSetCC.cpp b/lib/Target/X86/X86FixupSetCC.cpp index e2d4d1ede6f..cbde280aa28 100644 --- a/lib/Target/X86/X86FixupSetCC.cpp +++ b/lib/Target/X86/X86FixupSetCC.cpp @@ -136,8 +136,8 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) { const TargetRegisterClass *RC = MF.getSubtarget().is64Bit() ? &X86::GR32RegClass : &X86::GR32_ABCDRegClass; - unsigned ZeroReg = MRI->createVirtualRegister(RC); - unsigned InsertReg = MRI->createVirtualRegister(RC); + Register ZeroReg = MRI->createVirtualRegister(RC); + Register InsertReg = MRI->createVirtualRegister(RC); // Initialize a register with 0. This must go before the eflags def BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0), diff --git a/lib/Target/X86/X86FlagsCopyLowering.cpp b/lib/Target/X86/X86FlagsCopyLowering.cpp index 5ce3255ea96..cfba06fb653 100644 --- a/lib/Target/X86/X86FlagsCopyLowering.cpp +++ b/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -721,8 +721,9 @@ CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs( for (MachineInstr &MI : llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) { X86::CondCode Cond = X86::getCondFromSETCC(MI); - if (Cond != X86::COND_INVALID && !MI.mayStore() && MI.getOperand(0).isReg() && - TRI->isVirtualRegister(MI.getOperand(0).getReg())) { + if (Cond != X86::COND_INVALID && !MI.mayStore() && + MI.getOperand(0).isReg() && + Register::isVirtualRegister(MI.getOperand(0).getReg())) { assert(MI.getOperand(0).isDef() && "A non-storing SETcc should always define a register!"); CondRegs[Cond] = MI.getOperand(0).getReg(); @@ -739,7 +740,7 @@ CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs( unsigned X86FlagsCopyLoweringPass::promoteCondToReg( MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, X86::CondCode Cond) { - unsigned Reg = MRI->createVirtualRegister(PromoteRC); + Register Reg = MRI->createVirtualRegister(PromoteRC); auto SetI = BuildMI(TestMBB, TestPos, TestLoc, TII->get(X86::SETCCr), Reg).addImm(Cond); (void)SetI; @@ -813,7 +814,7 @@ void X86FlagsCopyLoweringPass::rewriteArithmetic( MachineBasicBlock &MBB = *MI.getParent(); // Insert an instruction that will set the flag back to the desired value. - unsigned TmpReg = MRI->createVirtualRegister(PromoteRC); + Register TmpReg = MRI->createVirtualRegister(PromoteRC); auto AddI = BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), TII->get(X86::ADD8ri)) .addDef(TmpReg, RegState::Dead) @@ -974,7 +975,7 @@ void X86FlagsCopyLoweringPass::rewriteSetCarryExtended( // Now we need to turn this into a bitmask. We do this by subtracting it from // zero. - unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass); + Register ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass); BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg); ZeroReg = AdjustReg(ZeroReg); @@ -999,7 +1000,7 @@ void X86FlagsCopyLoweringPass::rewriteSetCarryExtended( default: llvm_unreachable("Invalid SETB_C* opcode!"); } - unsigned ResultReg = MRI->createVirtualRegister(&SetBRC); + Register ResultReg = MRI->createVirtualRegister(&SetBRC); BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg) .addReg(ZeroReg) .addReg(ExtCondReg); diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 074cf21d03f..fcfb5bc9131 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -288,8 +288,8 @@ namespace { // Check if a COPY instruction is using FP registers. static bool isFPCopy(MachineInstr &MI) { - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); return X86::RFP80RegClass.contains(DstReg) || X86::RFP80RegClass.contains(SrcReg); @@ -313,7 +313,7 @@ FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); } /// For example, this returns 3 for X86::FP3. static unsigned getFPReg(const MachineOperand &MO) { assert(MO.isReg() && "Expected an FP register!"); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!"); return Reg - X86::FP0; } diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index e310fe06911..1b469a814ad 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -35,8 +35,8 @@ using namespace llvm; X86FrameLowering::X86FrameLowering(const X86Subtarget &STI, - unsigned StackAlignOverride) - : TargetFrameLowering(StackGrowsDown, StackAlignOverride, + MaybeAlign StackAlignOverride) + : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(), STI.is64Bit() ? -8 : -4), STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) { // Cache a bunch of frame-related predicates for this subtarget. @@ -176,7 +176,7 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, MachineOperand &MO = MBBI->getOperand(i); if (!MO.isReg() || MO.isDef()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!Reg) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) @@ -216,7 +216,7 @@ flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg != X86::EFLAGS) continue; @@ -995,11 +995,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO; bool NeedsDwarfCFI = !IsWin64Prologue && (MMI.hasDebugInfo() || Fn.needsUnwindTableEntry()); - unsigned FramePtr = TRI->getFrameRegister(MF); - const unsigned MachineFramePtr = + Register FramePtr = TRI->getFrameRegister(MF); + const Register MachineFramePtr = STI.isTarget64BitILP32() - ? getX86SubSuperRegister(FramePtr, 64) : FramePtr; - unsigned BasePtr = TRI->getBaseRegister(); + ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr; + Register BasePtr = TRI->getBaseRegister(); bool HasWinCFI = false; // Debug location must be unknown since the first debug location is used @@ -1016,14 +1016,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty(); - - // The default stack probe size is 4096 if the function has no stackprobesize - // attribute. - unsigned StackProbeSize = 4096; - if (Fn.hasFnAttribute("stack-probe-size")) - Fn.getFnAttribute("stack-probe-size") - .getValueAsString() - .getAsInteger(0, StackProbeSize); + unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF); // Re-align the stack on 64-bit if the x86-interrupt calling convention is // used and an error code was pushed, since the x86-64 ABI requires a 16-byte @@ -1081,7 +1074,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, int stackGrowth = -SlotSize; // Find the funclet establisher parameter - unsigned Establisher = X86::NoRegister; + Register Establisher = X86::NoRegister; if (IsClrFunclet) Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX; else if (IsFunclet) @@ -1192,7 +1185,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, (MBBI->getOpcode() == X86::PUSH32r || MBBI->getOpcode() == X86::PUSH64r)) { PushedRegs = true; - unsigned Reg = MBBI->getOperand(0).getReg(); + Register Reg = MBBI->getOperand(0).getReg(); ++MBBI; if (!HasFP && NeedsDwarfCFI) { @@ -1396,9 +1389,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, int FI; if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) { if (X86::FR64RegClass.contains(Reg)) { + int Offset; unsigned IgnoredFrameReg; - int Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg); - Offset += SEHFrameOffset; + if (IsWin64Prologue && IsFunclet) + Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg); + else + Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg) + + SEHFrameOffset; HasWinCFI = true; assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data"); @@ -1554,9 +1551,13 @@ X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const { unsigned X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const { + const X86MachineFunctionInfo *X86FI = MF.getInfo(); // This is the size of the pushed CSRs. - unsigned CSSize = - MF.getInfo()->getCalleeSavedFrameSize(); + unsigned CSSize = X86FI->getCalleeSavedFrameSize(); + // This is the size of callee saved XMMs. + const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); + unsigned XMMSize = WinEHXMMSlotInfo.size() * + TRI->getSpillSize(X86::VR128RegClass); // This is the amount of stack a funclet needs to allocate. unsigned UsedSize; EHPersonality Personality = @@ -1576,7 +1577,7 @@ X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const { unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlignment()); // Subtract out the size of the callee saved registers. This is how much stack // each funclet will allocate. - return FrameSizeMinusRBP - CSSize; + return FrameSizeMinusRBP + XMMSize - CSSize; } static bool isTailCallOpcode(unsigned Opc) { @@ -1597,9 +1598,9 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, DL = MBBI->getDebugLoc(); // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. const bool Is64BitILP32 = STI.isTarget64BitILP32(); - unsigned FramePtr = TRI->getFrameRegister(MF); + Register FramePtr = TRI->getFrameRegister(MF); unsigned MachineFramePtr = - Is64BitILP32 ? getX86SubSuperRegister(FramePtr, 64) : FramePtr; + Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr; bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); bool NeedsWin64CFI = @@ -1850,6 +1851,20 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, return Offset + FPDelta; } +int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, + int FI, unsigned &FrameReg) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const X86MachineFunctionInfo *X86FI = MF.getInfo(); + const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); + const auto it = WinEHXMMSlotInfo.find(FI); + + if (it == WinEHXMMSlotInfo.end()) + return getFrameIndexReference(MF, FI, FrameReg); + + FrameReg = TRI->getStackRegister(); + return alignTo(MFI.getMaxCallFrameSize(), getStackAlignment()) + it->second; +} + int X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF, int FI, unsigned &FrameReg, int Adjustment) const { @@ -1948,6 +1963,8 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( X86MachineFunctionInfo *X86FI = MF.getInfo(); unsigned CalleeSavedFrameSize = 0; + unsigned XMMCalleeSavedFrameSize = 0; + auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); @@ -1984,7 +2001,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( // Since emitPrologue and emitEpilogue will handle spilling and restoring of // the frame register, we can delete it from CSI list and not have to worry // about avoiding it later. - unsigned FPReg = TRI->getFrameRegister(MF); + Register FPReg = TRI->getFrameRegister(MF); for (unsigned i = 0; i < CSI.size(); ++i) { if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) { CSI.erase(CSI.begin() + i); @@ -2025,12 +2042,20 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( unsigned Size = TRI->getSpillSize(*RC); unsigned Align = TRI->getSpillAlignment(*RC); // ensure alignment - SpillSlotOffset -= std::abs(SpillSlotOffset) % Align; + assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86"); + SpillSlotOffset = -alignTo(-SpillSlotOffset, Align); + // spill into slot SpillSlotOffset -= Size; int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset); CSI[i - 1].setFrameIdx(SlotIndex); MFI.ensureMaxAlignment(Align); + + // Save the start offset and size of XMM in stack frame for funclets. + if (X86::VR128RegClass.contains(Reg)) { + WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize; + XMMCalleeSavedFrameSize += Size; + } } return true; @@ -2200,7 +2225,7 @@ void X86FrameLowering::determineCalleeSaves(MachineFunction &MF, // Spill the BasePtr if it's used. if (TRI->hasBasePointer(MF)){ - unsigned BasePtr = TRI->getBaseRegister(); + Register BasePtr = TRI->getBaseRegister(); if (STI.isTarget64BitILP32()) BasePtr = getX86SubSuperRegister(BasePtr, 64); SavedRegs.set(BasePtr); @@ -2212,7 +2237,7 @@ HasNestArgument(const MachineFunction *MF) { const Function &F = MF->getFunction(); for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; I++) { - if (I->hasNestAttr()) + if (I->hasNestAttr() && !I->use_empty()) return true; } return false; @@ -2244,7 +2269,8 @@ GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Pr bool IsNested = HasNestArgument(&MF); if (CallingConvention == CallingConv::X86_FastCall || - CallingConvention == CallingConv::Fast) { + CallingConvention == CallingConv::Fast || + CallingConvention == CallingConv::Tail) { if (IsNested) report_fatal_error("Segmented stacks does not support fastcall with " "nested function."); @@ -2525,6 +2551,18 @@ static unsigned getHiPELiteral( + " required but not provided"); } +// Return true if there are no non-ehpad successors to MBB and there are no +// non-meta instructions between MBBI and MBB.end(). +static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, + MachineBasicBlock::const_iterator MBBI) { + return std::all_of( + MBB.succ_begin(), MBB.succ_end(), + [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) && + std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) { + return MI.isMetaInstruction(); + }); +} + /// Erlang programs may need a special prologue to handle the stack size they /// might need at runtime. That is because Erlang/OTP does not implement a C /// stack but uses a custom implementation of hybrid stack/heap architecture. @@ -2758,7 +2796,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, unsigned Opcode = I->getOpcode(); bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); DebugLoc DL = I->getDebugLoc(); - uint64_t Amount = !reserveCallFrame ? TII.getFrameSize(*I) : 0; + uint64_t Amount = TII.getFrameSize(*I); uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0; I = MBB.erase(I); auto InsertPos = skipDebugInstructionsForward(I, MBB.end()); @@ -2847,7 +2885,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, return I; } - if (isDestroy && InternalAmt) { + if (isDestroy && InternalAmt && !blockEndIsUnreachable(MBB, I)) { // If we are performing frame pointer elimination and if the callee pops // something off the stack pointer, add it back. We do this until we have // more advanced stack pointer tracking ability. @@ -2912,8 +2950,8 @@ MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( "restoring EBP/ESI on non-32-bit target"); MachineFunction &MF = *MBB.getParent(); - unsigned FramePtr = TRI->getFrameRegister(MF); - unsigned BasePtr = TRI->getBaseRegister(); + Register FramePtr = TRI->getFrameRegister(MF); + Register BasePtr = TRI->getBaseRegister(); WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo(); X86MachineFunctionInfo *X86FI = MF.getInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index d32746e3a36..2103d6471ea 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -25,7 +25,7 @@ class X86RegisterInfo; class X86FrameLowering : public TargetFrameLowering { public: - X86FrameLowering(const X86Subtarget &STI, unsigned StackAlignOverride); + X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride); // Cached subtarget predicates. @@ -99,6 +99,8 @@ public: int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; + int getWin64EHFrameIndexRef(const MachineFunction &MF, + int FI, unsigned &SPReg) const; int getFrameIndexReferenceSP(const MachineFunction &MF, int FI, unsigned &SPReg, int Adjustment) const; int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 95d31e62caf..5b546d42d98 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -253,6 +253,11 @@ namespace { return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment); } + bool tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N, + SDValue &Base, SDValue &Scale, + SDValue &Index, SDValue &Disp, + SDValue &Segment); + /// Implement addressing mode selection for inline asm expressions. bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, @@ -362,6 +367,11 @@ namespace { if (User->getNumOperands() != 2) continue; + // If this can match to INC/DEC, don't count it as a use. + if (User->getOpcode() == ISD::ADD && + (isOneConstant(SDValue(N, 0)) || isAllOnesConstant(SDValue(N, 0)))) + continue; + // Immediates that are used for offsets as part of stack // manipulation should be left alone. These are typically // used to indicate SP offsets for argument passing and @@ -502,8 +512,10 @@ namespace { bool shrinkAndImmediate(SDNode *N); bool isMaskZeroExtended(SDNode *N) const; bool tryShiftAmountMod(SDNode *N); + bool combineIncDecVector(SDNode *Node); bool tryShrinkShlLogicImm(SDNode *N); bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask); + bool tryMatchBitSelect(SDNode *N); MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, const SDLoc &dl, MVT VT, SDNode *Node); @@ -746,7 +758,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { return false; LoadSDNode *LD = dyn_cast(Callee.getNode()); if (!LD || - LD->isVolatile() || + !LD->isSimple() || LD->getAddressingMode() != ISD::UNINDEXED || LD->getExtensionType() != ISD::NON_EXTLOAD) return false; @@ -873,10 +885,9 @@ void X86DAGToDAGISel::PreprocessISelDAG() { case ISD::FRINT: Imm = 0x4; break; } SDLoc dl(N); - SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, - N->getValueType(0), - N->getOperand(0), - CurDAG->getConstant(Imm, dl, MVT::i8)); + SDValue Res = CurDAG->getNode( + X86ISD::VRNDSCALE, dl, N->getValueType(0), N->getOperand(0), + CurDAG->getTargetConstant(Imm, dl, MVT::i8)); --I; CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); ++I; @@ -2305,10 +2316,10 @@ bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent, return false; // We can allow a full vector load here since narrowing a load is ok unless - // it's volatile. + // it's volatile or atomic. if (ISD::isNON_EXTLoad(N.getNode())) { LoadSDNode *LD = cast(N); - if (!LD->isVolatile() && + if (LD->isSimple() && IsProfitableToFold(N, LD, Root) && IsLegalToFold(N, Parent, Root, OptLevel)) { PatternNodeWithChain = N; @@ -2464,6 +2475,37 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N, Complexity += 2; } + // Heuristic: try harder to form an LEA from ADD if the operands set flags. + // Unlike ADD, LEA does not affect flags, so we will be less likely to require + // duplicating flag-producing instructions later in the pipeline. + if (N.getOpcode() == ISD::ADD) { + auto isMathWithFlags = [](SDValue V) { + switch (V.getOpcode()) { + case X86ISD::ADD: + case X86ISD::SUB: + case X86ISD::ADC: + case X86ISD::SBB: + /* TODO: These opcodes can be added safely, but we may want to justify + their inclusion for different reasons (better for reg-alloc). + case X86ISD::SMUL: + case X86ISD::UMUL: + case X86ISD::OR: + case X86ISD::XOR: + case X86ISD::AND: + */ + // Value 1 is the flag output of the node - verify it's not dead. + return !SDValue(V.getNode(), 1).use_empty(); + default: + return false; + } + }; + // TODO: This could be an 'or' rather than 'and' to make the transform more + // likely to happen. We might want to factor in whether there's a + // load folding opportunity for the math op that disappears with LEA. + if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1))) + Complexity++; + } + if (AM.Disp) Complexity++; @@ -2544,6 +2586,7 @@ bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { + assert(Root && P && "Unknown root/parent nodes"); if (!ISD::isNON_EXTLoad(N.getNode()) || !IsProfitableToFold(N, P, Root) || !IsLegalToFold(N, P, Root, OptLevel)) @@ -2553,6 +2596,20 @@ bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N, N.getOperand(1), Base, Scale, Index, Disp, Segment); } +bool X86DAGToDAGISel::tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N, + SDValue &Base, SDValue &Scale, + SDValue &Index, SDValue &Disp, + SDValue &Segment) { + assert(Root && P && "Unknown root/parent nodes"); + if (N->getOpcode() != X86ISD::VBROADCAST_LOAD || + !IsProfitableToFold(N, P, Root) || + !IsLegalToFold(N, P, Root, OptLevel)) + return false; + + return selectAddr(N.getNode(), + N.getOperand(1), Base, Scale, Index, Disp, Segment); +} + /// Return an SDNode that returns the value of the global base register. /// Output instructions required to initialize the global base register, /// if necessary. @@ -3302,8 +3359,12 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { SDValue ImplDef = SDValue( CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0); insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef); - NBits = CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, MVT::i32, ImplDef, - NBits); + + SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32); + insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal); + NBits = SDValue( + CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i32, ImplDef, + NBits, SRIdxVal), 0); insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); if (Subtarget->hasBMI2()) { @@ -3400,8 +3461,9 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) { // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM // hoisting the move immediate would make it worthwhile with a less optimal // BEXTR? - if (!Subtarget->hasTBM() && - !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR())) + bool PreferBEXTR = + Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR()); + if (!PreferBEXTR && !Subtarget->hasBMI2()) return nullptr; // Must have a shift right. @@ -3440,23 +3502,50 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) { if (Shift + MaskSize > NVT.getSizeInBits()) return nullptr; - SDValue New = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT); - unsigned ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri; - unsigned MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi; + // BZHI, if available, is always fast, unlike BEXTR. But even if we decide + // that we can't use BEXTR, it is only worthwhile using BZHI if the mask + // does not fit into 32 bits. Load folding is not a sufficient reason. + if (!PreferBEXTR && MaskSize <= 32) + return nullptr; - // BMI requires the immediate to placed in a register. - if (!Subtarget->hasTBM()) { - ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr; - MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm; + SDValue Control; + unsigned ROpc, MOpc; + + if (!PreferBEXTR) { + assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then."); + // If we can't make use of BEXTR then we can't fuse shift+mask stages. + // Let's perform the mask first, and apply shift later. Note that we need to + // widen the mask to account for the fact that we'll apply shift afterwards! + Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT); + ROpc = NVT == MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr; + MOpc = NVT == MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm; unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri; - New = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, New), 0); + Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0); + } else { + // The 'control' of BEXTR has the pattern of: + // [15...8 bit][ 7...0 bit] location + // [ bit count][ shift] name + // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11 + Control = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT); + if (Subtarget->hasTBM()) { + ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri; + MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi; + } else { + assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then."); + // BMI requires the immediate to placed in a register. + ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr; + MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm; + unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri; + Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0); + } } MachineSDNode *NewNode; SDValue Input = N0->getOperand(0); SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) }; + SDValue Ops[] = { + Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Control, Input.getOperand(0)}; SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); // Update the chain. @@ -3464,7 +3553,15 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) { // Record the mem-refs CurDAG->setNodeMemRefs(NewNode, {cast(Input)->getMemOperand()}); } else { - NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, New); + NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, Control); + } + + if (!PreferBEXTR) { + // We still need to apply the shift. + SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT); + unsigned NewOpc = NVT == MVT::i64 ? X86::SHR64ri : X86::SHR32ri; + NewNode = + CurDAG->getMachineNode(NewOpc, dl, NVT, SDValue(NewNode, 0), ShAmt); } return NewNode; @@ -3735,6 +3832,52 @@ bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) { return true; } +/// Convert vector increment or decrement to sub/add with an all-ones constant: +/// add X, <1, 1...> --> sub X, <-1, -1...> +/// sub X, <1, 1...> --> add X, <-1, -1...> +/// The all-ones vector constant can be materialized using a pcmpeq instruction +/// that is commonly recognized as an idiom (has no register dependency), so +/// that's better/smaller than loading a splat 1 constant. +bool X86DAGToDAGISel::combineIncDecVector(SDNode *Node) { + assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB) && + "Unexpected opcode for increment/decrement transform"); + + EVT VT = Node->getValueType(0); + assert(VT.isVector() && "Should only be called for vectors."); + + SDValue X = Node->getOperand(0); + SDValue OneVec = Node->getOperand(1); + + APInt SplatVal; + if (!X86::isConstantSplat(OneVec, SplatVal) || !SplatVal.isOneValue()) + return false; + + SDLoc DL(Node); + SDValue OneConstant, AllOnesVec; + + APInt Ones = APInt::getAllOnesValue(32); + assert(VT.getSizeInBits() % 32 == 0 && + "Expected bit count to be a multiple of 32"); + OneConstant = CurDAG->getConstant(Ones, DL, MVT::i32); + insertDAGNode(*CurDAG, X, OneConstant); + + unsigned NumElts = VT.getSizeInBits() / 32; + assert(NumElts > 0 && "Expected to get non-empty vector."); + AllOnesVec = CurDAG->getSplatBuildVector(MVT::getVectorVT(MVT::i32, NumElts), + DL, OneConstant); + insertDAGNode(*CurDAG, X, AllOnesVec); + + AllOnesVec = CurDAG->getBitcast(VT, AllOnesVec); + insertDAGNode(*CurDAG, X, AllOnesVec); + + unsigned NewOpcode = Node->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD; + SDValue NewNode = CurDAG->getNode(NewOpcode, DL, VT, X, AllOnesVec); + + ReplaceNode(Node, NewNode.getNode()); + SelectCode(NewNode.getNode()); + return true; +} + /// If the high bits of an 'and' operand are known zero, try setting the /// high bits of an 'and' constant operand to produce a smaller encoding by /// creating a small, sign-extended negative immediate rather than a large @@ -3975,12 +4118,18 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, if (CC != ISD::SETEQ && CC != ISD::SETNE) return false; - // See if we're comparing against zero. This should have been canonicalized - // to RHS during lowering. - if (!ISD::isBuildVectorAllZeros(Setcc.getOperand(1).getNode())) + SDValue SetccOp0 = Setcc.getOperand(0); + SDValue SetccOp1 = Setcc.getOperand(1); + + // Canonicalize the all zero vector to the RHS. + if (ISD::isBuildVectorAllZeros(SetccOp0.getNode())) + std::swap(SetccOp0, SetccOp1); + + // See if we're comparing against zero. + if (!ISD::isBuildVectorAllZeros(SetccOp1.getNode())) return false; - SDValue N0 = Setcc.getOperand(0); + SDValue N0 = SetccOp0; MVT CmpVT = N0.getSimpleValueType(); MVT CmpSVT = CmpVT.getVectorElementType(); @@ -4027,13 +4176,14 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, auto findBroadcastedOp = [](SDValue Src, MVT CmpSVT, SDNode *&Parent) { // Look through single use bitcasts. - if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse()) - Src = Src.getOperand(0); - - if (Src.getOpcode() == X86ISD::VBROADCAST && Src.hasOneUse()) { + if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse()) { Parent = Src.getNode(); Src = Src.getOperand(0); - if (Src.getSimpleValueType() == CmpSVT) + } + + if (Src.getOpcode() == X86ISD::VBROADCAST_LOAD && Src.hasOneUse()) { + auto *MemIntr = cast(Src); + if (MemIntr->getMemoryVT().getSizeInBits() == CmpSVT.getSizeInBits()) return Src; } @@ -4045,17 +4195,18 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, bool FoldedBCast = false; if (!FoldedLoad && CanFoldLoads && (CmpSVT == MVT::i32 || CmpSVT == MVT::i64)) { - SDNode *ParentNode = nullptr; + SDNode *ParentNode = N0.getNode(); if ((Load = findBroadcastedOp(Src1, CmpSVT, ParentNode))) { - FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0, - Tmp1, Tmp2, Tmp3, Tmp4); + FoldedBCast = tryFoldBroadcast(Root, ParentNode, Load, Tmp0, + Tmp1, Tmp2, Tmp3, Tmp4); } // Try the other operand. if (!FoldedBCast) { + SDNode *ParentNode = N0.getNode(); if ((Load = findBroadcastedOp(Src0, CmpSVT, ParentNode))) { - FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0, - Tmp1, Tmp2, Tmp3, Tmp4); + FoldedBCast = tryFoldBroadcast(Root, ParentNode, Load, Tmp0, + Tmp1, Tmp2, Tmp3, Tmp4); if (FoldedBCast) std::swap(Src0, Src1); } @@ -4125,7 +4276,7 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, // Update the chain. ReplaceUses(Load.getValue(1), SDValue(CNode, 1)); // Record the mem-refs - CurDAG->setNodeMemRefs(CNode, {cast(Load)->getMemOperand()}); + CurDAG->setNodeMemRefs(CNode, {cast(Load)->getMemOperand()}); } else { if (IsMasked) CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1); @@ -4146,6 +4297,55 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, return true; } +// Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it +// into vpternlog. +bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) { + assert(N->getOpcode() == ISD::OR && "Unexpected opcode!"); + + MVT NVT = N->getSimpleValueType(0); + + // Make sure we support VPTERNLOG. + if (!NVT.isVector() || !Subtarget->hasAVX512()) + return false; + + // We need VLX for 128/256-bit. + if (!(Subtarget->hasVLX() || NVT.is512BitVector())) + return false; + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Canonicalize AND to LHS. + if (N1.getOpcode() == ISD::AND) + std::swap(N0, N1); + + if (N0.getOpcode() != ISD::AND || + N1.getOpcode() != X86ISD::ANDNP || + !N0.hasOneUse() || !N1.hasOneUse()) + return false; + + // ANDN is not commutable, use it to pick down A and C. + SDValue A = N1.getOperand(0); + SDValue C = N1.getOperand(1); + + // AND is commutable, if one operand matches A, the other operand is B. + // Otherwise this isn't a match. + SDValue B; + if (N0.getOperand(0) == A) + B = N0.getOperand(1); + else if (N0.getOperand(1) == A) + B = N0.getOperand(0); + else + return false; + + SDLoc dl(N); + SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8); + SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm); + ReplaceNode(N, Ternlog.getNode()); + SelectCode(Ternlog.getNode()); + return true; +} + void X86DAGToDAGISel::Select(SDNode *Node) { MVT NVT = Node->getSimpleValueType(0); unsigned Opcode = Node->getOpcode(); @@ -4170,6 +4370,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { unsigned Opc = 0; switch (IntNo) { + default: llvm_unreachable("Unexpected intrinsic!"); case Intrinsic::x86_sse3_monitor: if (!Subtarget->hasSSE3()) break; @@ -4303,9 +4504,16 @@ void X86DAGToDAGISel::Select(SDNode *Node) { if (tryShrinkShlLogicImm(Node)) return; + if (Opcode == ISD::OR && tryMatchBitSelect(Node)) + return; + LLVM_FALLTHROUGH; case ISD::ADD: case ISD::SUB: { + if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && NVT.isVector() && + combineIncDecVector(Node)) + return; + // Try to avoid folding immediates with multiple uses for optsize. // This code tries to select to register form directly to avoid going // through the isel table which might fold the immediate. We can't change @@ -4333,6 +4541,10 @@ void X86DAGToDAGISel::Select(SDNode *Node) { if (!isInt<8>(Val) && !isInt<32>(Val)) break; + // If this can match to INC/DEC, let it go. + if (Opcode == ISD::ADD && (Val == 1 || Val == -1)) + break; + // Check if we should avoid folding this immediate. if (!shouldAvoidImmediateInstFormsForSize(N1.getNode())) break; @@ -4610,7 +4822,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: LoReg = X86::AL; ClrReg = HiReg = X86::AH; - SExtOpcode = X86::CBW; + SExtOpcode = 0; // Not used. break; case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; @@ -4632,24 +4844,27 @@ void X86DAGToDAGISel::Select(SDNode *Node) { bool signBitIsZero = CurDAG->SignBitIsZero(N0); SDValue InFlag; - if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) { + if (NVT == MVT::i8) { // Special case for div8, just use a move with zero extension to AX to // clear the upper 8 bits (AH). SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain; MachineSDNode *Move; if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; - Move = CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32, - MVT::Other, Ops); + unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rm8 + : X86::MOVZX16rm8; + Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, MVT::Other, Ops); Chain = SDValue(Move, 1); ReplaceUses(N0.getValue(1), Chain); // Record the mem-refs CurDAG->setNodeMemRefs(Move, {cast(N0)->getMemOperand()}); } else { - Move = CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0); + unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rr8 + : X86::MOVZX16rr8; + Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, N0); Chain = CurDAG->getEntryNode(); } - Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, SDValue(Move, 0), + Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, SDValue(Move, 0), SDValue()); InFlag = Chain.getValue(1); } else { @@ -4996,10 +5211,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) { case ISD::FRINT: Imm = 0x4; break; } SDLoc dl(Node); - SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, - Node->getValueType(0), + SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, Node->getValueType(0), Node->getOperand(0), - CurDAG->getConstant(Imm, dl, MVT::i8)); + CurDAG->getTargetConstant(Imm, dl, MVT::i8)); ReplaceNode(Node, Res.getNode()); SelectCode(Res.getNode()); return; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0b4bf687e6c..ed975e9248a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -65,17 +65,19 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); -static cl::opt ExperimentalVectorWideningLegalization( - "x86-experimental-vector-widening-legalization", cl::init(false), - cl::desc("Enable an experimental vector type legalization through widening " - "rather than promotion."), - cl::Hidden); - static cl::opt ExperimentalPrefLoopAlignment( "x86-experimental-pref-loop-alignment", cl::init(4), - cl::desc("Sets the preferable loop alignment for experiments " - "(the last x86-experimental-pref-loop-alignment bits" - " of the loop header PC will be 0)."), + cl::desc( + "Sets the preferable loop alignment for experiments (as log2 bytes)" + "(the last x86-experimental-pref-loop-alignment bits" + " of the loop header PC will be 0)."), + cl::Hidden); + +// Added in 10.0. +static cl::opt EnableOldKNLABI( + "x86-enable-old-knl-abi", cl::init(false), + cl::desc("Enables passing v32i16 and v64i8 in 2 YMM registers instead of " + "one ZMM register on AVX512F, but not AVX512BW targets."), cl::Hidden); static cl::opt MulConstantOptimization( @@ -84,6 +86,13 @@ static cl::opt MulConstantOptimization( "SHIFT, LEA, etc."), cl::Hidden); +static cl::opt ExperimentalUnorderedISEL( + "x86-experimental-unordered-atomic-isel", cl::init(false), + cl::desc("Use LoadSDNode and StoreSDNode instead of " + "AtomicSDNode for unordered atomic loads and " + "stores respectively."), + cl::Hidden); + /// Call this when the user attempts to do something unsupported, like /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike /// report_fatal_error, so calling code should attempt to recover without @@ -196,7 +205,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // Integer absolute. if (Subtarget.hasCMov()) { setOperationAction(ISD::ABS , MVT::i16 , Custom); - setOperationAction(ISD::ABS , MVT::i32 , Custom); + setOperationAction(ISD::ABS , MVT::i32 , Custom); } setOperationAction(ISD::ABS , MVT::i64 , Custom); @@ -214,14 +223,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); - if (Subtarget.is64Bit()) { - if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) - // f32/f64 are legal, f80 is custom. - setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); - else - setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); - setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); - } else if (!Subtarget.useSoftFloat()) { + if (!Subtarget.useSoftFloat()) { // We have an algorithm for SSE2->double, and we turn this into a // 64-bit FILD followed by conditional FADD for other targets. setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); @@ -277,29 +279,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); - if (Subtarget.is64Bit()) { - if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { - // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80. - setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); - setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); - } else { - setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); - setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); - } - } else if (!Subtarget.useSoftFloat()) { - // Since AVX is a superset of SSE3, only check for SSE here. - if (Subtarget.hasSSE1() && !Subtarget.hasSSE3()) - // Expand FP_TO_UINT into a select. - // FIXME: We would like to use a Custom expander here eventually to do - // the optimal thing for SSE vs. the default expansion in the legalizer. - setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); - else - // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom. - // With SSE3 we can use fisttpll to convert to a signed i64; without - // SSE, we're stuck with a fistpll. - setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); - - setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); + if (!Subtarget.useSoftFloat()) { + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); } // TODO: when we have SSE, these could be more efficient, by using movd/movq. @@ -345,11 +327,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); - setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); setOperationAction(ISD::FREM , MVT::f32 , Expand); setOperationAction(ISD::FREM , MVT::f64 , Expand); setOperationAction(ISD::FREM , MVT::f80 , Expand); + setOperationAction(ISD::FREM , MVT::f128 , Expand); setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); // Promote the i8 variants and force them on up to i32 which has a shorter @@ -396,15 +378,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // There's never any support for operations beyond MVT::f32. setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f80, MVT::f16, Expand); + setTruncStoreAction(MVT::f128, MVT::f16, Expand); if (Subtarget.hasPOPCNT()) { setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32); @@ -638,17 +624,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FMA, MVT::f32, Expand); - // Long double always uses X87, except f128 in MMX. + // f80 always uses X87. if (UseX87) { - if (Subtarget.is64Bit() && Subtarget.hasMMX()) { - addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass - : &X86::VR128RegClass); - ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); - setOperationAction(ISD::FABS , MVT::f128, Custom); - setOperationAction(ISD::FNEG , MVT::f128, Custom); - setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom); - } - addRegisterClass(MVT::f80, &X86::RFP80RegClass); setOperationAction(ISD::UNDEF, MVT::f80, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); @@ -684,10 +661,60 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::LLRINT, MVT::f80, Expand); } + // f128 uses xmm registers, but most operations require libcalls. + if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) { + addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass + : &X86::VR128RegClass); + + addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps + + setOperationAction(ISD::FADD, MVT::f128, Custom); + setOperationAction(ISD::FSUB, MVT::f128, Custom); + setOperationAction(ISD::FDIV, MVT::f128, Custom); + setOperationAction(ISD::FMUL, MVT::f128, Custom); + setOperationAction(ISD::FMA, MVT::f128, Expand); + + setOperationAction(ISD::FABS, MVT::f128, Custom); + setOperationAction(ISD::FNEG, MVT::f128, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom); + + setOperationAction(ISD::FSIN, MVT::f128, Expand); + setOperationAction(ISD::FCOS, MVT::f128, Expand); + setOperationAction(ISD::FSINCOS, MVT::f128, Expand); + setOperationAction(ISD::FSQRT, MVT::f128, Expand); + + setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); + // We need to custom handle any FP_ROUND with an f128 input, but + // LegalizeDAG uses the result type to know when to run a custom handler. + // So we have to list all legal floating point result types here. + if (isTypeLegal(MVT::f32)) { + setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom); + } + if (isTypeLegal(MVT::f64)) { + setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom); + } + if (isTypeLegal(MVT::f80)) { + setOperationAction(ISD::FP_ROUND, MVT::f80, Custom); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom); + } + + setOperationAction(ISD::SETCC, MVT::f128, Custom); + + setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand); + setTruncStoreAction(MVT::f128, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f64, Expand); + setTruncStoreAction(MVT::f128, MVT::f80, Expand); + } + // Always use a library call for pow. setOperationAction(ISD::FPOW , MVT::f32 , Expand); setOperationAction(ISD::FPOW , MVT::f64 , Expand); setOperationAction(ISD::FPOW , MVT::f80 , Expand); + setOperationAction(ISD::FPOW , MVT::f128 , Expand); setOperationAction(ISD::FLOG, MVT::f80, Expand); setOperationAction(ISD::FLOG2, MVT::f80, Expand); @@ -716,7 +743,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // First set operation action for all vector types to either promote // (for widening) or expand (for scalarization). Then we will selectively // turn on ones that can be effectively codegen'd. - for (MVT VT : MVT::vector_valuetypes()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); @@ -754,7 +781,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ZERO_EXTEND, VT, Expand); setOperationAction(ISD::ANY_EXTEND, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); - for (MVT InnerVT : MVT::vector_valuetypes()) { + for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { setTruncStoreAction(InnerVT, VT, Expand); setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand); @@ -797,6 +824,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::LOAD, MVT::v2f32, Custom); setOperationAction(ISD::STORE, MVT::v2f32, Custom); + + setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Custom); } if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { @@ -823,10 +852,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } setOperationAction(ISD::MUL, MVT::v2i8, Custom); - setOperationAction(ISD::MUL, MVT::v2i16, Custom); - setOperationAction(ISD::MUL, MVT::v2i32, Custom); setOperationAction(ISD::MUL, MVT::v4i8, Custom); - setOperationAction(ISD::MUL, MVT::v4i16, Custom); setOperationAction(ISD::MUL, MVT::v8i8, Custom); setOperationAction(ISD::MUL, MVT::v16i8, Custom); @@ -863,28 +889,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom); setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom); - if (!ExperimentalVectorWideningLegalization) { - // Use widening instead of promotion. - for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8, - MVT::v4i16, MVT::v2i16 }) { - setOperationAction(ISD::UADDSAT, VT, Custom); - setOperationAction(ISD::SADDSAT, VT, Custom); - setOperationAction(ISD::USUBSAT, VT, Custom); - setOperationAction(ISD::SSUBSAT, VT, Custom); - } - } - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); - // Provide custom widening for v2f32 setcc. This is really for VLX when - // setcc result type returns v2i1/v4i1 vector for v2f32/v4f32 leading to - // type legalization changing the result type to v4i1 during widening. - // It works fine for SSE2 and is probably faster so no need to qualify with - // VLX support. - setOperationAction(ISD::SETCC, MVT::v2i32, Custom); - for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::CTPOP, VT, Custom); @@ -904,19 +912,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); } - // We support custom legalizing of sext and anyext loads for specific - // memory vector types which we can load as a scalar (or sequence of - // scalars) and extend in-register to a legal 128-bit vector type. For sext - // loads these must work with a single scalar load. - for (MVT VT : MVT::integer_vector_valuetypes()) { - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom); - } - for (auto VT : { MVT::v2f64, MVT::v2i64 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); @@ -938,7 +933,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom); // Custom legalize these to avoid over promotion or custom promotion. setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom); @@ -991,18 +985,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom); - if (ExperimentalVectorWideningLegalization) { - setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom); - } else { - setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom); - } + setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom); // In the customized shift lowering, the legal v4i32/v2i64 cases // in AVX2 will be recognized. @@ -1069,22 +1059,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal); } - if (!ExperimentalVectorWideningLegalization) { - // Avoid narrow result types when widening. The legal types are listed - // in the next loop. - for (MVT VT : MVT::integer_vector_valuetypes()) { - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom); - } - } - // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal); setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal); - if (!ExperimentalVectorWideningLegalization) - setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal); setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal); setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal); setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal); @@ -1145,6 +1123,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Custom); + if (!Subtarget.hasAVX512()) setOperationAction(ISD::BITCAST, MVT::v32i1, Custom); @@ -1292,10 +1272,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STORE, VT, Custom); } - if (HasInt256) - setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); - if (HasInt256) { + setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); + // Custom legalize 2x32 to get a little better code. setOperationAction(ISD::MGATHER, MVT::v2f32, Custom); setOperationAction(ISD::MGATHER, MVT::v2i32, Custom); @@ -1407,6 +1386,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f32, Custom); + setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal); @@ -1433,12 +1414,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); - if (ExperimentalVectorWideningLegalization) { - // Need to custom widen this if we don't have AVX512BW. - setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom); - } + // Need to custom widen this if we don't have AVX512BW. + setOperationAction(ISD::ANY_EXTEND, MVT::v8i8, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i8, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom); for (auto VT : { MVT::v16f32, MVT::v8f64 }) { setOperationAction(ISD::FFLOOR, VT, Legal); @@ -1529,10 +1508,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MGATHER, VT, Custom); setOperationAction(ISD::MSCATTER, VT, Custom); } - // Need to custom split v32i16/v64i8 bitcasts. if (!Subtarget.hasBWI()) { + // Need to custom split v32i16/v64i8 bitcasts. setOperationAction(ISD::BITCAST, MVT::v32i16, Custom); setOperationAction(ISD::BITCAST, MVT::v64i8, Custom); + + // Better to split these into two 256-bit ops. + setOperationAction(ISD::BITREVERSE, MVT::v8i64, Custom); + setOperationAction(ISD::BITREVERSE, MVT::v16i32, Custom); } if (Subtarget.hasVBMI2()) { @@ -1777,6 +1760,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSHR, VT, Custom); } } + + setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom); } // We want to custom lower some of our intrinsics. @@ -1905,13 +1892,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, MaxLoadsPerMemcmpOptSize = 2; // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4). - setPrefLoopAlignment(ExperimentalPrefLoopAlignment); + setPrefLoopAlignment(Align(1ULL << ExperimentalPrefLoopAlignment)); // An out-of-order CPU can speculatively execute past a predictable branch, // but a conditional move could be stalled by an expensive earlier operation. PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder(); EnableExtLdPromotion = true; - setPrefFunctionAlignment(4); // 2^4 bytes. + setPrefFunctionAlignment(Align(16)); verifyIntrinsicTables(); } @@ -1939,8 +1926,7 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const { if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) return TypeSplitVector; - if (ExperimentalVectorWideningLegalization && - VT.getVectorNumElements() != 1 && + if (VT.getVectorNumElements() != 1 && VT.getVectorElementType() != MVT::i1) return TypeWidenVector; @@ -1950,19 +1936,62 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const { MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { + // v32i1 vectors should be promoted to v32i8 to match avx2. if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) return MVT::v32i8; + // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. + if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && + Subtarget.hasAVX512() && + (!isPowerOf2_32(VT.getVectorNumElements()) || + (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || + (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) + return MVT::i8; + // FIXME: Should we just make these types legal and custom split operations? + if ((VT == MVT::v32i16 || VT == MVT::v64i8) && + Subtarget.hasAVX512() && !Subtarget.hasBWI() && !EnableOldKNLABI) + return MVT::v16i32; return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); } unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { + // v32i1 vectors should be promoted to v32i8 to match avx2. if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) return 1; + // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. + if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && + Subtarget.hasAVX512() && + (!isPowerOf2_32(VT.getVectorNumElements()) || + (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || + (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) + return VT.getVectorNumElements(); + // FIXME: Should we just make these types legal and custom split operations? + if ((VT == MVT::v32i16 || VT == MVT::v64i8) && + Subtarget.hasAVX512() && !Subtarget.hasBWI() && !EnableOldKNLABI) + return 1; return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); } +unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const { + // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. + if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && + Subtarget.hasAVX512() && + (!isPowerOf2_32(VT.getVectorNumElements()) || + (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || + (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) { + RegisterVT = MVT::i8; + IntermediateVT = MVT::i1; + NumIntermediates = VT.getVectorNumElements(); + return NumIntermediates; + } + + return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT, + NumIntermediates, RegisterVT); +} + EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext& Context, EVT VT) const { @@ -2060,6 +2089,11 @@ EVT X86TargetLowering::getOptimalMemOpType( if (Size >= 16 && (!Subtarget.isUnalignedMem16Slow() || ((DstAlign == 0 || DstAlign >= 16) && (SrcAlign == 0 || SrcAlign >= 16)))) { + // FIXME: Check if unaligned 64-byte accesses are slow. + if (Size >= 64 && Subtarget.hasAVX512() && + (Subtarget.getPreferVectorWidth() >= 512)) { + return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32; + } // FIXME: Check if unaligned 32-byte accesses are slow. if (Size >= 32 && Subtarget.hasAVX() && (Subtarget.getPreferVectorWidth() >= 256)) { @@ -2403,8 +2437,8 @@ static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, /// Breaks v64i1 value into two registers and adds the new node to the DAG static void Passv64i1ArgInRegs( - const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg, - SmallVector, 8> &RegsToPass, CCValAssign &VA, + const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg, + SmallVectorImpl> &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, const X86Subtarget &Subtarget) { assert(Subtarget.hasBWI() && "Expected AVX512BW target!"); assert(Subtarget.is32Bit() && "Expecting 32 bit target"); @@ -2537,7 +2571,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, assert(VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"); - Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I], + Passv64i1ArgInRegs(dl, DAG, ValToCopy, RegsToPass, VA, RVLocs[++I], Subtarget); assert(2 == RegsToPass.size() && @@ -2816,6 +2850,10 @@ SDValue X86TargetLowering::LowerCallResult( ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) { errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. + } else if (CopyVT == MVT::f64 && + (Is64Bit && !Subtarget.hasSSE2())) { + errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); + VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. } // If we prefer to use the value in xmm registers, copy it out as f80 and @@ -2925,7 +2963,7 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, static bool canGuaranteeTCO(CallingConv::ID CC) { return (CC == CallingConv::Fast || CC == CallingConv::GHC || CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE || - CC == CallingConv::HHVM); + CC == CallingConv::HHVM || CC == CallingConv::Tail); } /// Return true if we might ever do TCO for calls with this calling convention. @@ -2951,7 +2989,7 @@ static bool mayTailCallThisCC(CallingConv::ID CC) { /// Return true if the function is being made into a tailcall target by /// changing its ABI. static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { - return GuaranteedTailCallOpt && canGuaranteeTCO(CC); + return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || CC == CallingConv::Tail; } bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { @@ -3405,7 +3443,7 @@ SDValue X86TargetLowering::LowerFormalArguments( // Find the largest legal vector type. MVT VecVT = MVT::Other; // FIXME: Only some x86_32 calling conventions support AVX512. - if (Subtarget.hasAVX512() && + if (Subtarget.useAVX512Regs() && (Is64Bit || (CallConv == CallingConv::X86_VectorCall || CallConv == CallingConv::Intel_OCL_BI))) VecVT = MVT::v16f32; @@ -3577,6 +3615,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU()); bool IsSibcall = false; + bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt || + CallConv == CallingConv::Tail; X86MachineFunctionInfo *X86Info = MF.getInfo(); auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls"); const auto *CI = dyn_cast_or_null(CLI.CS.getInstruction()); @@ -3597,8 +3637,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (Attr.getValueAsString() == "true") isTailCall = false; - if (Subtarget.isPICStyleGOT() && - !MF.getTarget().Options.GuaranteedTailCallOpt) { + if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO) { // If we are using a GOT, disable tail calls to external symbols with // default visibility. Tail calling such a symbol requires using a GOT // relocation, which forces early binding of the symbol. This breaks code @@ -3625,7 +3664,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Sibcalls are automatically detected tailcalls which do not require // ABI changes. - if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall) + if (!IsGuaranteeTCO && isTailCall) IsSibcall = true; if (isTailCall) @@ -3657,8 +3696,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // This is a sibcall. The memory operands are available in caller's // own caller's stack. NumBytes = 0; - else if (MF.getTarget().Options.GuaranteedTailCallOpt && - canGuaranteeTCO(CallConv)) + else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv)) NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); int FPDiff = 0; @@ -3782,8 +3820,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, assert(VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"); // Split v64i1 value into two registers - Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I], - Subtarget); + Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget); } else if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); const TargetOptions &Options = DAG.getTarget().Options; @@ -4069,6 +4106,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InFlag = Chain.getValue(1); DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); + // Save heapallocsite metadata. + if (CLI.CS) + if (MDNode *HeapAlloc = CLI.CS->getMetadata("heapallocsite")) + DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc); + // Create the CALLSEQ_END node. unsigned NumBytesForCalleeToPop; if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, @@ -4190,7 +4232,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast(Arg.getOperand(1))->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(VR)) + if (!Register::isVirtualRegister(VR)) return false; MachineInstr *Def = MRI->getVRegDef(VR); if (!Def) @@ -4279,6 +4321,8 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( bool CCMatch = CallerCC == CalleeCC; bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC); bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC); + bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt || + CalleeCC == CallingConv::Tail; // Win64 functions have extra shadow space for argument homing. Don't do the // sibcall if the caller and callee have mismatched expectations for this @@ -4286,7 +4330,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( if (IsCalleeWin64 != IsCallerWin64) return false; - if (DAG.getTarget().Options.GuaranteedTailCallOpt) { + if (IsGuaranteeTCO) { if (canGuaranteeTCO(CalleeCC) && CCMatch) return true; return false; @@ -4413,7 +4457,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( CCValAssign &VA = ArgLocs[i]; if (!VA.isRegLoc()) continue; - unsigned Reg = VA.getLocReg(); + Register Reg = VA.getLocReg(); switch (Reg) { default: break; case X86::EAX: case X86::EDX: case X86::ECX: @@ -4652,7 +4696,11 @@ static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL, // X < 0 -> X == 0, jump on sign. return X86::COND_S; } - if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) { + if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) { + // X >= 0 -> X == 0, jump on !sign. + return X86::COND_NS; + } + if (SetCCOpcode == ISD::SETLT && RHSC->getAPIntValue() == 1) { // X < 1 -> X <= 0 RHS = DAG.getConstant(0, DL, RHS.getValueType()); return X86::COND_LE; @@ -4760,7 +4808,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, ScalarVT = MVT::i32; Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements()); - Info.align = 1; + Info.align = Align::None(); Info.flags |= MachineMemOperand::MOStore; break; } @@ -4773,7 +4821,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, unsigned NumElts = std::min(DataVT.getVectorNumElements(), IndexVT.getVectorNumElements()); Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts); - Info.align = 1; + Info.align = Align::None(); Info.flags |= MachineMemOperand::MOLoad; break; } @@ -4785,7 +4833,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, unsigned NumElts = std::min(DataVT.getVectorNumElements(), IndexVT.getVectorNumElements()); Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts); - Info.align = 1; + Info.align = Align::None(); Info.flags |= MachineMemOperand::MOStore; break; } @@ -4811,6 +4859,8 @@ bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const { + assert(cast(Load)->isSimple() && "illegal to narrow"); + // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF // relocation target a movq or addq instruction: don't let the load shrink. SDValue BasePtr = cast(Load)->getBasePtr(); @@ -4852,11 +4902,12 @@ bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return true; } -bool X86TargetLowering::reduceSelectOfFPConstantLoads(bool IsFPSetCC) const { +bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const { // If we are using XMM registers in the ABI and the condition of the select is // a floating-point compare and we have blendv or conditional move, then it is // cheaper to select instead of doing a cross-register move and creating a // load that depends on the compare result. + bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128; return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX(); } @@ -4869,15 +4920,25 @@ bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const { return true; } -bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const { +bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, + SDValue C) const { // TODO: We handle scalars using custom code, but generic combining could make // that unnecessary. APInt MulC; if (!ISD::isConstantSplatVector(C.getNode(), MulC)) return false; + // Find the type this will be legalized too. Otherwise we might prematurely + // convert this to shl+add/sub and then still have to type legalize those ops. + // Another choice would be to defer the decision for illegal types until + // after type legalization. But constant splat vectors of i64 can't make it + // through type legalization on 32-bit targets so we would need to special + // case vXi64. + while (getTypeAction(Context, VT) != TypeLegal) + VT = getTypeToTransformTo(Context, VT); + // If vector multiply is legal, assume that's faster than shl + add/sub. - // TODO: Multiply is a complex op with higher latency and lower througput in + // TODO: Multiply is a complex op with higher latency and lower throughput in // most implementations, so this check could be loosened based on type // and/or a CPU attribute. if (isOperationLegal(ISD::MUL, VT)) @@ -5022,6 +5083,33 @@ bool X86TargetLowering::hasAndNot(SDValue Y) const { return Subtarget.hasSSE2(); } +bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const { + return X.getValueType().isScalarInteger(); // 'bt' +} + +bool X86TargetLowering:: + shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, + unsigned OldShiftOpcode, unsigned NewShiftOpcode, + SelectionDAG &DAG) const { + // Does baseline recommend not to perform the fold by default? + if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG)) + return false; + // For scalars this transform is always beneficial. + if (X.getValueType().isScalarInteger()) + return true; + // If all the shift amounts are identical, then transform is beneficial even + // with rudimentary SSE2 shifts. + if (DAG.isSplatValue(Y, /*AllowUndefs=*/true)) + return true; + // If we have AVX2 with it's powerful shift operations, then it's also good. + if (Subtarget.hasAVX2()) + return true; + // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'. + return NewShiftOpcode == ISD::SHL; +} + bool X86TargetLowering::shouldFoldConstantShiftPairToMask( const SDNode *N, CombineLevel Level) const { assert(((N->getOpcode() == ISD::SHL && @@ -5054,6 +5142,14 @@ bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const { return true; } +bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG, + SDNode *N) const { + if (DAG.getMachineFunction().getFunction().hasMinSize() && + !Subtarget.isOSWindows()) + return false; + return true; +} + bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const { // Any legal vector type can be splatted more efficiently than // loading/spilling from memory. @@ -5093,10 +5189,8 @@ static bool isUndefOrZero(int Val) { /// Return true if every element in Mask, beginning from position Pos and ending /// in Pos+Size is the undef sentinel value. static bool isUndefInRange(ArrayRef Mask, unsigned Pos, unsigned Size) { - for (unsigned i = Pos, e = Pos + Size; i != e; ++i) - if (Mask[i] != SM_SentinelUndef) - return false; - return true; + return llvm::all_of(Mask.slice(Pos, Size), + [](int M) { return M == SM_SentinelUndef; }); } /// Return true if the mask creates a vector whose lower half is undefined. @@ -5119,10 +5213,7 @@ static bool isInRange(int Val, int Low, int Hi) { /// Return true if the value of any element in Mask falls within the specified /// range (L, H]. static bool isAnyInRange(ArrayRef Mask, int Low, int Hi) { - for (int M : Mask) - if (isInRange(M, Low, Hi)) - return true; - return false; + return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); }); } /// Return true if Val is undef or if its value falls within the @@ -5133,12 +5224,9 @@ static bool isUndefOrInRange(int Val, int Low, int Hi) { /// Return true if every element in Mask is undef or if its value /// falls within the specified range (L, H]. -static bool isUndefOrInRange(ArrayRef Mask, - int Low, int Hi) { - for (int M : Mask) - if (!isUndefOrInRange(M, Low, Hi)) - return false; - return true; +static bool isUndefOrInRange(ArrayRef Mask, int Low, int Hi) { + return llvm::all_of( + Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); }); } /// Return true if Val is undef, zero or if its value falls within the @@ -5150,10 +5238,8 @@ static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) { /// Return true if every element in Mask is undef, zero or if its value /// falls within the specified range (L, H]. static bool isUndefOrZeroOrInRange(ArrayRef Mask, int Low, int Hi) { - for (int M : Mask) - if (!isUndefOrZeroOrInRange(M, Low, Hi)) - return false; - return true; + return llvm::all_of( + Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); }); } /// Return true if every element in Mask, beginning @@ -5171,8 +5257,9 @@ static bool isSequentialOrUndefInRange(ArrayRef Mask, unsigned Pos, /// from position Pos and ending in Pos+Size, falls within the specified /// sequential range (Low, Low+Size], or is undef or is zero. static bool isSequentialOrUndefOrZeroInRange(ArrayRef Mask, unsigned Pos, - unsigned Size, int Low) { - for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low) + unsigned Size, int Low, + int Step = 1) { + for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step) if (!isUndefOrZero(Mask[i]) && Mask[i] != Low) return false; return true; @@ -5182,10 +5269,8 @@ static bool isSequentialOrUndefOrZeroInRange(ArrayRef Mask, unsigned Pos, /// from position Pos and ending in Pos+Size is undef or is zero. static bool isUndefOrZeroInRange(ArrayRef Mask, unsigned Pos, unsigned Size) { - for (unsigned i = Pos, e = Pos + Size; i != e; ++i) - if (!isUndefOrZero(Mask[i])) - return false; - return true; + return llvm::all_of(Mask.slice(Pos, Size), + [](int M) { return isUndefOrZero(M); }); } /// Helper function to test whether a shuffle mask could be @@ -5357,6 +5442,8 @@ static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget, SDValue Vec; if (!Subtarget.hasSSE2() && VT.is128BitVector()) { Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32); + } else if (VT.isFloatingPoint()) { + Vec = DAG.getConstantFP(+0.0, dl, VT); } else if (VT.getVectorElementType() == MVT::i1) { assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && "Unexpected vector type"); @@ -5500,6 +5587,7 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl &Ops) { if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) && Idx == (VT.getVectorNumElements() / 2) && Src.getOpcode() == ISD::INSERT_SUBVECTOR && + Src.getOperand(1).getValueType() == SubVT && isNullConstant(Src.getOperand(2))) { Ops.push_back(Src.getOperand(1)); Ops.push_back(Sub); @@ -5593,7 +5681,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) { // May need to promote to a legal type. Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, - getZeroVector(WideOpVT, Subtarget, DAG, dl), + DAG.getConstant(0, dl, WideOpVT), SubVec, Idx); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); } @@ -5609,14 +5697,14 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, if (IdxVal == 0) { // Zero lower bits of the Vec - SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8); + SDValue ShiftBits = DAG.getTargetConstant(SubVecNumElems, dl, MVT::i8); Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); // Merge them together, SubVec should be zero extended. SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, - getZeroVector(WideOpVT, Subtarget, DAG, dl), + DAG.getConstant(0, dl, WideOpVT), SubVec, ZeroIdx); Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); @@ -5628,7 +5716,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, if (Vec.isUndef()) { assert(IdxVal != 0 && "Unexpected index"); SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, - DAG.getConstant(IdxVal, dl, MVT::i8)); + DAG.getTargetConstant(IdxVal, dl, MVT::i8)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); } @@ -5638,30 +5726,30 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, unsigned ShiftLeft = NumElems - SubVecNumElems; unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, - DAG.getConstant(ShiftLeft, dl, MVT::i8)); + DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); if (ShiftRight != 0) SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, - DAG.getConstant(ShiftRight, dl, MVT::i8)); + DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); } // Simple case when we put subvector in the upper part if (IdxVal + SubVecNumElems == NumElems) { SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, - DAG.getConstant(IdxVal, dl, MVT::i8)); + DAG.getTargetConstant(IdxVal, dl, MVT::i8)); if (SubVecNumElems * 2 == NumElems) { // Special case, use legal zero extending insert_subvector. This allows // isel to opimitize when bits are known zero. Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx); Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, - getZeroVector(WideOpVT, Subtarget, DAG, dl), + DAG.getConstant(0, dl, WideOpVT), Vec, ZeroIdx); } else { // Otherwise use explicit shifts to zero the bits. Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); NumElems = WideOpVT.getVectorNumElements(); - SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8); + SDValue ShiftBits = DAG.getTargetConstant(NumElems - IdxVal, dl, MVT::i8); Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); } @@ -5675,30 +5763,47 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, // Widen the vector if needed. Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); - // Move the current value of the bit to be replace to the lsbs. - Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, - DAG.getConstant(IdxVal, dl, MVT::i8)); - // Xor with the new bit. - Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec); - // Shift to MSB, filling bottom bits with 0. + + // Clear the upper bits of the subvector and move it to its insert position. unsigned ShiftLeft = NumElems - SubVecNumElems; - Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op, - DAG.getConstant(ShiftLeft, dl, MVT::i8)); - // Shift to the final position, filling upper bits with 0. + SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, + DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; - Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op, - DAG.getConstant(ShiftRight, dl, MVT::i8)); - // Xor with original vector leaving the new value. - Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op); + SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, + DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); + + // Isolate the bits below the insertion point. + unsigned LowShift = NumElems - IdxVal; + SDValue Low = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, + DAG.getTargetConstant(LowShift, dl, MVT::i8)); + Low = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Low, + DAG.getTargetConstant(LowShift, dl, MVT::i8)); + + // Isolate the bits after the last inserted bit. + unsigned HighShift = IdxVal + SubVecNumElems; + SDValue High = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, + DAG.getTargetConstant(HighShift, dl, MVT::i8)); + High = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, High, + DAG.getTargetConstant(HighShift, dl, MVT::i8)); + + // Now OR all 3 pieces together. + Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Low, High); + SubVec = DAG.getNode(ISD::OR, dl, WideOpVT, SubVec, Vec); + // Reduce to original width if needed. - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); } -static SDValue concatSubVectors(SDValue V1, SDValue V2, EVT VT, - unsigned NumElems, SelectionDAG &DAG, - const SDLoc &dl, unsigned VectorWidth) { - SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, VectorWidth); - return insertSubVector(V, V2, NumElems / 2, DAG, dl, VectorWidth); +static SDValue concatSubVectors(SDValue V1, SDValue V2, SelectionDAG &DAG, + const SDLoc &dl) { + assert(V1.getValueType() == V2.getValueType() && "subvector type mismatch"); + EVT SubVT = V1.getValueType(); + EVT SubSVT = SubVT.getScalarType(); + unsigned SubNumElts = SubVT.getVectorNumElements(); + unsigned SubVectorWidth = SubVT.getSizeInBits(); + EVT VT = EVT::getVectorVT(*DAG.getContext(), SubSVT, 2 * SubNumElts); + SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, SubVectorWidth); + return insertSubVector(V, V2, SubNumElts, DAG, dl, SubVectorWidth); } /// Returns a vector of specified type with all bits set. @@ -5755,6 +5860,34 @@ static SDValue getExtendInVec(unsigned Opcode, const SDLoc &DL, EVT VT, return DAG.getNode(Opcode, DL, VT, In); } +// Match (xor X, -1) -> X. +// Match extract_subvector(xor X, -1) -> extract_subvector(X). +// Match concat_vectors(xor X, -1, xor Y, -1) -> concat_vectors(X, Y). +static SDValue IsNOT(SDValue V, SelectionDAG &DAG) { + V = peekThroughBitcasts(V); + if (V.getOpcode() == ISD::XOR && + ISD::isBuildVectorAllOnes(V.getOperand(1).getNode())) + return V.getOperand(0); + if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR && + (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) { + if (SDValue Not = IsNOT(V.getOperand(0), DAG)) { + Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), V.getValueType(), + Not, V.getOperand(1)); + } + } + SmallVector CatOps; + if (collectConcatOps(V.getNode(), CatOps)) { + for (SDValue &CatOp : CatOps) { + SDValue NotCat = IsNOT(CatOp, DAG); + if (!NotCat) return SDValue(); + CatOp = DAG.getBitcast(CatOp.getValueType(), NotCat); + } + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(V), V.getValueType(), CatOps); + } + return SDValue(); +} + /// Returns a vector_shuffle node for an unpackl operation. static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1, SDValue V2) { @@ -6003,6 +6136,37 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, } } + if (Op.getOpcode() == X86ISD::VBROADCAST_LOAD && + EltSizeInBits <= VT.getScalarSizeInBits()) { + auto *MemIntr = cast(Op); + if (MemIntr->getMemoryVT().getScalarSizeInBits() != VT.getScalarSizeInBits()) + return false; + + SDValue Ptr = MemIntr->getBasePtr(); + if (Ptr->getOpcode() == X86ISD::Wrapper || + Ptr->getOpcode() == X86ISD::WrapperRIP) + Ptr = Ptr->getOperand(0); + + auto *CNode = dyn_cast(Ptr); + if (!CNode || CNode->isMachineConstantPoolEntry() || + CNode->getOffset() != 0) + return false; + + if (const Constant *C = CNode->getConstVal()) { + unsigned SrcEltSizeInBits = C->getType()->getScalarSizeInBits(); + unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; + + APInt UndefSrcElts(NumSrcElts, 0); + SmallVector SrcEltBits(1, APInt(SrcEltSizeInBits, 0)); + if (CollectConstantBits(C, SrcEltBits[0], UndefSrcElts, 0)) { + if (UndefSrcElts[0]) + UndefSrcElts.setBits(0, NumSrcElts); + SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]); + return CastBitData(UndefSrcElts, SrcEltBits); + } + } + } + // Extract constant bits from a subvector broadcast. if (Op.getOpcode() == X86ISD::SUBV_BROADCAST) { SmallVector SubEltBits; @@ -6123,7 +6287,9 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, return false; } -static bool isConstantSplat(SDValue Op, APInt &SplatVal) { +namespace llvm { +namespace X86 { +bool isConstantSplat(SDValue Op, APInt &SplatVal) { APInt UndefElts; SmallVector EltBits; if (getTargetConstantBitsFromNode(Op, Op.getScalarValueSizeInBits(), @@ -6146,6 +6312,8 @@ static bool isConstantSplat(SDValue Op, APInt &SplatVal) { return false; } +} // namespace X86 +} // namespace llvm static bool getTargetShuffleMaskIndices(SDValue MaskNode, unsigned MaskEltSizeInBits, @@ -6551,13 +6719,12 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, return true; } -/// Check a target shuffle mask's inputs to see if we can set any values to -/// SM_SentinelZero - this is for elements that are known to be zero -/// (not just zeroable) from their inputs. +/// Decode a target shuffle mask and inputs and see if any values are +/// known to be undef or zero from their inputs. /// Returns true if the target shuffle mask was decoded. -static bool setTargetShuffleZeroElements(SDValue N, - SmallVectorImpl &Mask, - SmallVectorImpl &Ops) { +static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl &Mask, + SmallVectorImpl &Ops, + APInt &KnownUndef, APInt &KnownZero) { bool IsUnary; if (!isTargetShuffle(N.getOpcode())) return false; @@ -6566,15 +6733,17 @@ static bool setTargetShuffleZeroElements(SDValue N, if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary)) return false; + int Size = Mask.size(); SDValue V1 = Ops[0]; SDValue V2 = IsUnary ? V1 : Ops[1]; + KnownUndef = KnownZero = APInt::getNullValue(Size); V1 = peekThroughBitcasts(V1); V2 = peekThroughBitcasts(V2); assert((VT.getSizeInBits() % Mask.size()) == 0 && "Illegal split of shuffle value type"); - unsigned EltSizeInBits = VT.getSizeInBits() / Mask.size(); + unsigned EltSizeInBits = VT.getSizeInBits() / Size; // Extract known constant input data. APInt UndefSrcElts[2]; @@ -6585,12 +6754,18 @@ static bool setTargetShuffleZeroElements(SDValue N, getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1], SrcEltBits[1], true, false)}; - for (int i = 0, Size = Mask.size(); i < Size; ++i) { + for (int i = 0; i < Size; ++i) { int M = Mask[i]; // Already decoded as SM_SentinelZero / SM_SentinelUndef. - if (M < 0) + if (M < 0) { + assert(isUndefOrZero(M) && "Unknown shuffle sentinel value!"); + if (SM_SentinelUndef == M) + KnownUndef.setBit(i); + if (SM_SentinelZero == M) + KnownZero.setBit(i); continue; + } // Determine shuffle input and normalize the mask. unsigned SrcIdx = M / Size; @@ -6599,7 +6774,7 @@ static bool setTargetShuffleZeroElements(SDValue N, // We are referencing an UNDEF input. if (V.isUndef()) { - Mask[i] = SM_SentinelUndef; + KnownUndef.setBit(i); continue; } @@ -6612,31 +6787,64 @@ static bool setTargetShuffleZeroElements(SDValue N, int Scale = Size / V.getValueType().getVectorNumElements(); int Idx = M / Scale; if (Idx != 0 && !VT.isFloatingPoint()) - Mask[i] = SM_SentinelUndef; + KnownUndef.setBit(i); else if (Idx == 0 && X86::isZeroNode(V.getOperand(0))) - Mask[i] = SM_SentinelZero; + KnownZero.setBit(i); continue; } // Attempt to extract from the source's constant bits. if (IsSrcConstant[SrcIdx]) { if (UndefSrcElts[SrcIdx][M]) - Mask[i] = SM_SentinelUndef; + KnownUndef.setBit(i); else if (SrcEltBits[SrcIdx][M] == 0) - Mask[i] = SM_SentinelZero; + KnownZero.setBit(i); } } - assert(VT.getVectorNumElements() == Mask.size() && + assert(VT.getVectorNumElements() == (unsigned)Size && "Different mask size from vector size!"); return true; } +// Replace target shuffle mask elements with known undef/zero sentinels. +static void resolveTargetShuffleFromZeroables(SmallVectorImpl &Mask, + const APInt &KnownUndef, + const APInt &KnownZero) { + unsigned NumElts = Mask.size(); + assert(KnownUndef.getBitWidth() == NumElts && + KnownZero.getBitWidth() == NumElts && "Shuffle mask size mismatch"); + + for (unsigned i = 0; i != NumElts; ++i) { + if (KnownUndef[i]) + Mask[i] = SM_SentinelUndef; + else if (KnownZero[i]) + Mask[i] = SM_SentinelZero; + } +} + +// Extract target shuffle mask sentinel elements to known undef/zero bitmasks. +static void resolveZeroablesFromTargetShuffle(const SmallVectorImpl &Mask, + APInt &KnownUndef, + APInt &KnownZero) { + unsigned NumElts = Mask.size(); + KnownUndef = KnownZero = APInt::getNullValue(NumElts); + + for (unsigned i = 0; i != NumElts; ++i) { + int M = Mask[i]; + if (SM_SentinelUndef == M) + KnownUndef.setBit(i); + if (SM_SentinelZero == M) + KnownZero.setBit(i); + } +} + // Forward declaration (for getFauxShuffleMask recursive check). -static bool resolveTargetShuffleInputs(SDValue Op, - SmallVectorImpl &Inputs, - SmallVectorImpl &Mask, - SelectionDAG &DAG); +// TODO: Use DemandedElts variant. +static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl &Inputs, + SmallVectorImpl &Mask, + SelectionDAG &DAG, unsigned Depth, + bool ResolveKnownElts); // Attempt to decode ops that could be represented as a shuffle mask. // The decoded shuffle mask may contain a different number of elements to the @@ -6644,7 +6852,8 @@ static bool resolveTargetShuffleInputs(SDValue Op, static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, SmallVectorImpl &Mask, SmallVectorImpl &Ops, - SelectionDAG &DAG) { + SelectionDAG &DAG, unsigned Depth, + bool ResolveKnownElts) { Mask.clear(); Ops.clear(); @@ -6685,7 +6894,7 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, Mask.push_back(SM_SentinelUndef); continue; } - uint64_t ByteBits = EltBits[i].getZExtValue(); + const APInt &ByteBits = EltBits[i]; if (ByteBits != 0 && ByteBits != 255) return false; Mask.push_back(ByteBits == ZeroMask ? SM_SentinelZero : i); @@ -6696,8 +6905,10 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, case ISD::OR: { // Inspect each operand at the byte level. We can merge these into a // blend shuffle mask if for each byte at least one is masked out (zero). - KnownBits Known0 = DAG.computeKnownBits(N.getOperand(0), DemandedElts); - KnownBits Known1 = DAG.computeKnownBits(N.getOperand(1), DemandedElts); + KnownBits Known0 = + DAG.computeKnownBits(N.getOperand(0), DemandedElts, Depth + 1); + KnownBits Known1 = + DAG.computeKnownBits(N.getOperand(1), DemandedElts, Depth + 1); if (Known0.One.isNullValue() && Known1.One.isNullValue()) { bool IsByteMask = true; unsigned NumSizeInBytes = NumSizeInBits / 8; @@ -6736,14 +6947,16 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, return false; SmallVector SrcMask0, SrcMask1; SmallVector SrcInputs0, SrcInputs1; - if (!resolveTargetShuffleInputs(N0, SrcInputs0, SrcMask0, DAG) || - !resolveTargetShuffleInputs(N1, SrcInputs1, SrcMask1, DAG)) + if (!getTargetShuffleInputs(N0, SrcInputs0, SrcMask0, DAG, Depth + 1, + true) || + !getTargetShuffleInputs(N1, SrcInputs1, SrcMask1, DAG, Depth + 1, + true)) return false; - int MaskSize = std::max(SrcMask0.size(), SrcMask1.size()); + size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size()); SmallVector Mask0, Mask1; scaleShuffleMask(MaskSize / SrcMask0.size(), SrcMask0, Mask0); scaleShuffleMask(MaskSize / SrcMask1.size(), SrcMask1, Mask1); - for (int i = 0; i != MaskSize; ++i) { + for (size_t i = 0; i != MaskSize; ++i) { if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef) Mask.push_back(SM_SentinelUndef); else if (Mask0[i] == SM_SentinelZero && Mask1[i] == SM_SentinelZero) @@ -6751,14 +6964,12 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, else if (Mask1[i] == SM_SentinelZero) Mask.push_back(Mask0[i]); else if (Mask0[i] == SM_SentinelZero) - Mask.push_back(Mask1[i] + (MaskSize * SrcInputs0.size())); + Mask.push_back(Mask1[i] + (int)(MaskSize * SrcInputs0.size())); else return false; } - for (SDValue &Op : SrcInputs0) - Ops.push_back(Op); - for (SDValue &Op : SrcInputs1) - Ops.push_back(Op); + Ops.append(SrcInputs0.begin(), SrcInputs0.end()); + Ops.append(SrcInputs1.begin(), SrcInputs1.end()); return true; } case ISD::INSERT_SUBVECTOR: { @@ -6786,8 +6997,8 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, // Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(SRC1)). SmallVector SubMask; SmallVector SubInputs; - if (!resolveTargetShuffleInputs(peekThroughOneUseBitcasts(Sub), SubInputs, - SubMask, DAG)) + if (!getTargetShuffleInputs(peekThroughOneUseBitcasts(Sub), SubInputs, + SubMask, DAG, Depth + 1, ResolveKnownElts)) return false; if (SubMask.size() != NumSubElts) { assert(((SubMask.size() % NumSubElts) == 0 || @@ -6911,14 +7122,16 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, // as a truncation shuffle. if (Opcode == X86ISD::PACKSS) { if ((!N0.isUndef() && - DAG.ComputeNumSignBits(N0, EltsLHS) <= NumBitsPerElt) || + DAG.ComputeNumSignBits(N0, EltsLHS, Depth + 1) <= NumBitsPerElt) || (!N1.isUndef() && - DAG.ComputeNumSignBits(N1, EltsRHS) <= NumBitsPerElt)) + DAG.ComputeNumSignBits(N1, EltsRHS, Depth + 1) <= NumBitsPerElt)) return false; } else { APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt); - if ((!N0.isUndef() && !DAG.MaskedValueIsZero(N0, ZeroMask, EltsLHS)) || - (!N1.isUndef() && !DAG.MaskedValueIsZero(N1, ZeroMask, EltsRHS))) + if ((!N0.isUndef() && + !DAG.MaskedValueIsZero(N0, ZeroMask, EltsLHS, Depth + 1)) || + (!N1.isUndef() && + !DAG.MaskedValueIsZero(N1, ZeroMask, EltsRHS, Depth + 1))) return false; } @@ -7061,23 +7274,45 @@ static void resolveTargetShuffleInputsAndMask(SmallVectorImpl &Inputs, Inputs = UsedInputs; } -/// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs -/// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the -/// remaining input indices in case we now have a unary shuffle and adjust the -/// inputs accordingly. +/// Calls getTargetShuffleAndZeroables to resolve a target shuffle mask's inputs +/// and then sets the SM_SentinelUndef and SM_SentinelZero values. /// Returns true if the target shuffle mask was decoded. -static bool resolveTargetShuffleInputs(SDValue Op, - SmallVectorImpl &Inputs, - SmallVectorImpl &Mask, - SelectionDAG &DAG) { +static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts, + SmallVectorImpl &Inputs, + SmallVectorImpl &Mask, + APInt &KnownUndef, APInt &KnownZero, + SelectionDAG &DAG, unsigned Depth, + bool ResolveKnownElts) { + EVT VT = Op.getValueType(); + if (!VT.isSimple() || !VT.isVector()) + return false; + + if (getTargetShuffleAndZeroables(Op, Mask, Inputs, KnownUndef, KnownZero)) { + if (ResolveKnownElts) + resolveTargetShuffleFromZeroables(Mask, KnownUndef, KnownZero); + return true; + } + if (getFauxShuffleMask(Op, DemandedElts, Mask, Inputs, DAG, Depth, + ResolveKnownElts)) { + resolveZeroablesFromTargetShuffle(Mask, KnownUndef, KnownZero); + return true; + } + return false; +} + +static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl &Inputs, + SmallVectorImpl &Mask, + SelectionDAG &DAG, unsigned Depth = 0, + bool ResolveKnownElts = true) { + EVT VT = Op.getValueType(); + if (!VT.isSimple() || !VT.isVector()) + return false; + + APInt KnownUndef, KnownZero; unsigned NumElts = Op.getValueType().getVectorNumElements(); APInt DemandedElts = APInt::getAllOnesValue(NumElts); - if (!setTargetShuffleZeroElements(Op, Mask, Inputs)) - if (!getFauxShuffleMask(Op, DemandedElts, Mask, Inputs, DAG)) - return false; - - resolveTargetShuffleInputsAndMask(Inputs, Mask); - return true; + return getTargetShuffleInputs(Op, DemandedElts, Inputs, Mask, KnownUndef, + KnownZero, DAG, Depth, ResolveKnownElts); } /// Returns the scalar element that will make up the ith @@ -7414,7 +7649,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"); SDLoc DL(Op); SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, - DAG.getIntPtrConstant(InsertPSMask, DL)); + DAG.getIntPtrConstant(InsertPSMask, DL, true)); return DAG.getBitcast(VT, Result); } @@ -7427,7 +7662,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; SrcOp = DAG.getBitcast(ShVT, SrcOp); assert(NumBits % 8 == 0 && "Only support byte sized shifts"); - SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, MVT::i8); + SDValue ShiftVal = DAG.getTargetConstant(NumBits / 8, dl, MVT::i8); return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal)); } @@ -7439,7 +7674,7 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, // the shuffle mask. if (LoadSDNode *LD = dyn_cast(SrcOp)) { SDValue Ptr = LD->getBasePtr(); - if (!ISD::isNormalLoad(LD) || LD->isVolatile()) + if (!ISD::isNormalLoad(LD) || !LD->isSimple()) return SDValue(); EVT PVT = LD->getValueType(0); if (PVT != MVT::i32 && PVT != MVT::f32) @@ -7504,6 +7739,49 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, return SDValue(); } +// Recurse to find a LoadSDNode source and the accumulated ByteOffest. +static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) { + if (ISD::isNON_EXTLoad(Elt.getNode())) { + auto *BaseLd = cast(Elt); + if (!BaseLd->isSimple()) + return false; + Ld = BaseLd; + ByteOffset = 0; + return true; + } + + switch (Elt.getOpcode()) { + case ISD::BITCAST: + case ISD::TRUNCATE: + case ISD::SCALAR_TO_VECTOR: + return findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset); + case ISD::SRL: + if (isa(Elt.getOperand(1))) { + uint64_t Idx = Elt.getConstantOperandVal(1); + if ((Idx % 8) == 0 && findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset)) { + ByteOffset += Idx / 8; + return true; + } + } + break; + case ISD::EXTRACT_VECTOR_ELT: + if (isa(Elt.getOperand(1))) { + SDValue Src = Elt.getOperand(0); + unsigned SrcSizeInBits = Src.getScalarValueSizeInBits(); + unsigned DstSizeInBits = Elt.getScalarValueSizeInBits(); + if (DstSizeInBits == SrcSizeInBits && (SrcSizeInBits % 8) == 0 && + findEltLoadSrc(Src, Ld, ByteOffset)) { + uint64_t Idx = Elt.getConstantOperandVal(1); + ByteOffset += Idx * (SrcSizeInBits / 8); + return true; + } + } + break; + } + + return false; +} + /// Given the initializing elements 'Elts' of a vector of type 'VT', see if the /// elements can be replaced by a single large load which has the same value as /// a build_vector or insert_subvector whose loaded operands are 'Elts'. @@ -7513,6 +7791,9 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, bool isAfterLegalize) { + if ((VT.getScalarSizeInBits() % 8) != 0) + return SDValue(); + unsigned NumElems = Elts.size(); int LastLoadedElt = -1; @@ -7521,6 +7802,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, APInt UndefMask = APInt::getNullValue(NumElems); SmallVector Loads(NumElems, nullptr); + SmallVector ByteOffsets(NumElems, 0); // For each element in the initializer, see if we've found a load, zero or an // undef. @@ -7539,13 +7821,16 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, // Each loaded element must be the correct fractional portion of the // requested vector load. - if ((NumElems * Elt.getValueSizeInBits()) != VT.getSizeInBits()) + unsigned EltSizeInBits = Elt.getValueSizeInBits(); + if ((NumElems * EltSizeInBits) != VT.getSizeInBits()) return SDValue(); - if (!ISD::isNON_EXTLoad(Elt.getNode())) + if (!findEltLoadSrc(Elt, Loads[i], ByteOffsets[i]) || ByteOffsets[i] < 0) + return SDValue(); + unsigned LoadSizeInBits = Loads[i]->getValueSizeInBits(0); + if (((ByteOffsets[i] * 8) + EltSizeInBits) > LoadSizeInBits) return SDValue(); - Loads[i] = cast(Elt); LoadMask.setBit(i); LastLoadedElt = i; } @@ -7575,6 +7860,24 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, int LoadSizeInBits = (1 + LastLoadedElt - FirstLoadedElt) * BaseSizeInBits; assert((BaseSizeInBits % 8) == 0 && "Sub-byte element loads detected"); + // TODO: Support offsetting the base load. + if (ByteOffsets[FirstLoadedElt] != 0) + return SDValue(); + + // Check to see if the element's load is consecutive to the base load + // or offset from a previous (already checked) load. + auto CheckConsecutiveLoad = [&](LoadSDNode *Base, int EltIdx) { + LoadSDNode *Ld = Loads[EltIdx]; + int64_t ByteOffset = ByteOffsets[EltIdx]; + if (ByteOffset && (ByteOffset % BaseSizeInBytes) == 0) { + int64_t BaseIdx = EltIdx - (ByteOffset / BaseSizeInBytes); + return (0 <= BaseIdx && BaseIdx < (int)NumElems && LoadMask[BaseIdx] && + Loads[BaseIdx] == Ld && ByteOffsets[BaseIdx] == 0); + } + return DAG.areNonVolatileConsecutiveLoads(Ld, Base, BaseSizeInBytes, + EltIdx - FirstLoadedElt); + }; + // Consecutive loads can contain UNDEFS but not ZERO elements. // Consecutive loads with UNDEFs and ZEROs elements require a // an additional shuffle stage to clear the ZERO elements. @@ -7582,8 +7885,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, bool IsConsecutiveLoadWithZeros = true; for (int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) { if (LoadMask[i]) { - if (!DAG.areNonVolatileConsecutiveLoads(Loads[i], LDBase, BaseSizeInBytes, - i - FirstLoadedElt)) { + if (!CheckConsecutiveLoad(LDBase, i)) { IsConsecutiveLoad = false; IsConsecutiveLoadWithZeros = false; break; @@ -7595,8 +7897,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) { auto MMOFlags = LDBase->getMemOperand()->getFlags(); - assert(!(MMOFlags & MachineMemOperand::MOVolatile) && - "Cannot merge volatile loads."); + assert(LDBase->isSimple() && + "Cannot merge volatile or atomic loads."); SDValue NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags); @@ -7636,17 +7938,22 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, // IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded // vector and a zero vector to clear out the zero elements. if (!isAfterLegalize && VT.isVector()) { - SmallVector ClearMask(NumElems, -1); - for (unsigned i = 0; i < NumElems; ++i) { - if (ZeroMask[i]) - ClearMask[i] = i + NumElems; - else if (LoadMask[i]) - ClearMask[i] = i; + unsigned NumMaskElts = VT.getVectorNumElements(); + if ((NumMaskElts % NumElems) == 0) { + unsigned Scale = NumMaskElts / NumElems; + SmallVector ClearMask(NumMaskElts, -1); + for (unsigned i = 0; i < NumElems; ++i) { + if (UndefMask[i]) + continue; + int Offset = ZeroMask[i] ? NumMaskElts : 0; + for (unsigned j = 0; j != Scale; ++j) + ClearMask[(i * Scale) + j] = (i * Scale) + j + Offset; + } + SDValue V = CreateLoad(VT, LDBase); + SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT) + : DAG.getConstantFP(0.0, DL, VT); + return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask); } - SDValue V = CreateLoad(VT, LDBase); - SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT) - : DAG.getConstantFP(0.0, DL, VT); - return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask); } } @@ -8194,34 +8501,10 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG, "Unexpected type in LowerBUILD_VECTORvXi1!"); SDLoc dl(Op); - if (ISD::isBuildVectorAllZeros(Op.getNode())) + if (ISD::isBuildVectorAllZeros(Op.getNode()) || + ISD::isBuildVectorAllOnes(Op.getNode())) return Op; - if (ISD::isBuildVectorAllOnes(Op.getNode())) - return Op; - - if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { - if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { - // Split the pieces. - SDValue Lower = - DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(0, 32)); - SDValue Upper = - DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(32, 32)); - // We have to manually lower both halves so getNode doesn't try to - // reassemble the build_vector. - Lower = LowerBUILD_VECTORvXi1(Lower, DAG, Subtarget); - Upper = LowerBUILD_VECTORvXi1(Upper, DAG, Subtarget); - return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lower, Upper); - } - SDValue Imm = ConvertI1VectorToInteger(Op, DAG); - if (Imm.getValueSizeInBits() == VT.getSizeInBits()) - return DAG.getBitcast(VT, Imm); - SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec, - DAG.getIntPtrConstant(0, dl)); - } - - // Vector has one or more non-const elements uint64_t Immediate = 0; SmallVector NonConstIdx; bool IsSplat = true; @@ -8244,29 +8527,40 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG, } // for splat use " (select i1 splat_elt, all-ones, all-zeroes)" - if (IsSplat) - return DAG.getSelect(dl, VT, Op.getOperand(SplatIdx), + if (IsSplat) { + // The build_vector allows the scalar element to be larger than the vector + // element type. We need to mask it to use as a condition unless we know + // the upper bits are zero. + // FIXME: Use computeKnownBits instead of checking specific opcode? + SDValue Cond = Op.getOperand(SplatIdx); + assert(Cond.getValueType() == MVT::i8 && "Unexpected VT!"); + if (Cond.getOpcode() != ISD::SETCC) + Cond = DAG.getNode(ISD::AND, dl, MVT::i8, Cond, + DAG.getConstant(1, dl, MVT::i8)); + return DAG.getSelect(dl, VT, Cond, DAG.getConstant(1, dl, VT), DAG.getConstant(0, dl, VT)); + } // insert elements one by one SDValue DstVec; - SDValue Imm; - if (Immediate) { - MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8)); - Imm = DAG.getConstant(Immediate, dl, ImmVT); - } - else if (HasConstElts) - Imm = DAG.getConstant(0, dl, VT); - else - Imm = DAG.getUNDEF(VT); - if (Imm.getValueSizeInBits() == VT.getSizeInBits()) - DstVec = DAG.getBitcast(VT, Imm); - else { - SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm); - DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec, - DAG.getIntPtrConstant(0, dl)); - } + if (HasConstElts) { + if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { + SDValue ImmL = DAG.getConstant(Lo_32(Immediate), dl, MVT::i32); + SDValue ImmH = DAG.getConstant(Hi_32(Immediate), dl, MVT::i32); + ImmL = DAG.getBitcast(MVT::v32i1, ImmL); + ImmH = DAG.getBitcast(MVT::v32i1, ImmH); + DstVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, ImmL, ImmH); + } else { + MVT ImmVT = MVT::getIntegerVT(std::max(VT.getSizeInBits(), 8U)); + SDValue Imm = DAG.getConstant(Immediate, dl, ImmVT); + MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1; + DstVec = DAG.getBitcast(VecVT, Imm); + DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, DstVec, + DAG.getIntPtrConstant(0, dl)); + } + } else + DstVec = DAG.getUNDEF(VT); for (unsigned i = 0, e = NonConstIdx.size(); i != e; ++i) { unsigned InsertIdx = NonConstIdx[i]; @@ -8757,7 +9051,7 @@ static SDValue getHopForBuildVector(const BuildVectorSDNode *BV, // If we don't need the upper xmm, then perform as a xmm hop. unsigned HalfNumElts = NumElts / 2; if (VT.is256BitVector() && DemandedElts.lshr(HalfNumElts) == 0) { - MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), HalfNumElts); + MVT HalfVT = VT.getHalfNumVectorElementsVT(); V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), 128); V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), 128); SDValue Half = DAG.getNode(HOpcode, SDLoc(BV), HalfVT, V0, V1); @@ -8965,21 +9259,14 @@ static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG, MVT VT = Op.getSimpleValueType(); // Vectors containing all zeros can be matched by pxor and xorps. - if (ISD::isBuildVectorAllZeros(Op.getNode())) { - // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd - // and 2) ensure that i64 scalars are eliminated on x86-32 hosts. - if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) - return Op; - - return getZeroVector(VT, Subtarget, DAG, DL); - } + if (ISD::isBuildVectorAllZeros(Op.getNode())) + return Op; // Vectors containing all ones can be matched by pcmpeqd on 128-bit width // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use // vpcmpeqd on 256-bit vectors. if (Subtarget.hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) { - if (VT == MVT::v4i32 || VT == MVT::v16i32 || - (VT == MVT::v8i32 && Subtarget.hasInt256())) + if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) return Op; return getOnesVector(VT, DAG, DL); @@ -9150,9 +9437,9 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec, SDValue HiHi = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec, {4, 5, 6, 7, 4, 5, 6, 7}); if (Subtarget.hasXOP()) - return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32, - LoLo, HiHi, IndicesVec, - DAG.getConstant(0, DL, MVT::i8))); + return DAG.getBitcast( + VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32, LoLo, HiHi, + IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8))); // Permute Lo and Hi and then select based on index range. // This works as VPERMILPS only uses index bits[0:1] to permute elements. SDValue Res = DAG.getSelectCC( @@ -9186,9 +9473,9 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec, // VPERMIL2PD selects with bit#1 of the index vector, so scale IndicesVec. IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec); if (Subtarget.hasXOP()) - return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64, - LoLo, HiHi, IndicesVec, - DAG.getConstant(0, DL, MVT::i8))); + return DAG.getBitcast( + VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64, LoLo, HiHi, + IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8))); // Permute Lo and Hi and then select based on index range. // This works as VPERMILPD only uses index bit[1] to permute elements. SDValue Res = DAG.getSelectCC( @@ -9283,7 +9570,7 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG, return SDValue(); auto *PermIdx = dyn_cast(ExtractedIndex.getOperand(1)); - if (!PermIdx || PermIdx->getZExtValue() != Idx) + if (!PermIdx || PermIdx->getAPIntValue() != Idx) return SDValue(); } @@ -9434,23 +9721,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // it to i32 first. if (EltVT == MVT::i16 || EltVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); - if (VT.getSizeInBits() >= 256) { - MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32); - if (Subtarget.hasAVX()) { - Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, Item); - Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); - } else { - // Without AVX, we need to extend to a 128-bit vector and then - // insert into the 256-bit vector. - Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); - SDValue ZeroVec = getZeroVector(ShufVT, Subtarget, DAG, dl); - Item = insert128BitVector(ZeroVec, Item, 0, DAG, dl); - } - } else { - assert(VT.is128BitVector() && "Expected an SSE value type!"); - Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); - Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); - } + MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32); + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, Item); + Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); return DAG.getBitcast(VT, Item); } } @@ -9549,8 +9822,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { HVT, dl, Op->ops().slice(NumElems / 2, NumElems /2)); // Recreate the wider vector with the lower and upper part. - return concatSubVectors(Lower, Upper, VT, NumElems, DAG, dl, - VT.getSizeInBits() / 2); + return concatSubVectors(Lower, Upper, DAG, dl); } // Let legalizer expand 2-wide build_vectors. @@ -9703,8 +9975,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, // If we have more than 2 non-zeros, build each half separately. if (NumNonZero > 2) { - MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(), - ResVT.getVectorNumElements()/2); + MVT HalfVT = ResVT.getHalfNumVectorElementsVT(); ArrayRef Ops = Op->ops(); SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops.slice(0, NumOperands/2)); @@ -9745,30 +10016,47 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, assert(NumOperands > 1 && isPowerOf2_32(NumOperands) && "Unexpected number of operands in CONCAT_VECTORS"); - unsigned NumZero = 0; - unsigned NumNonZero = 0; + uint64_t Zeros = 0; uint64_t NonZeros = 0; for (unsigned i = 0; i != NumOperands; ++i) { SDValue SubVec = Op.getOperand(i); if (SubVec.isUndef()) continue; + assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range. if (ISD::isBuildVectorAllZeros(SubVec.getNode())) - ++NumZero; - else { - assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range. + Zeros |= (uint64_t)1 << i; + else NonZeros |= (uint64_t)1 << i; - ++NumNonZero; - } } + unsigned NumElems = ResVT.getVectorNumElements(); + + // If we are inserting non-zero vector and there are zeros in LSBs and undef + // in the MSBs we need to emit a KSHIFTL. The generic lowering to + // insert_subvector will give us two kshifts. + if (isPowerOf2_64(NonZeros) && Zeros != 0 && NonZeros > Zeros && + Log2_64(NonZeros) != NumOperands - 1) { + MVT ShiftVT = ResVT; + if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) + ShiftVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; + unsigned Idx = Log2_64(NonZeros); + SDValue SubVec = Op.getOperand(Idx); + unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements(); + SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ShiftVT, + DAG.getUNDEF(ShiftVT), SubVec, + DAG.getIntPtrConstant(0, dl)); + Op = DAG.getNode(X86ISD::KSHIFTL, dl, ShiftVT, SubVec, + DAG.getTargetConstant(Idx * SubVecNumElts, dl, MVT::i8)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, Op, + DAG.getIntPtrConstant(0, dl)); + } // If there are zero or one non-zeros we can handle this very simply. - if (NumNonZero <= 1) { - SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl) - : DAG.getUNDEF(ResVT); - if (!NumNonZero) + if (NonZeros == 0 || isPowerOf2_64(NonZeros)) { + SDValue Vec = Zeros ? DAG.getConstant(0, dl, ResVT) : DAG.getUNDEF(ResVT); + if (!NonZeros) return Vec; - unsigned Idx = countTrailingZeros(NonZeros); + unsigned Idx = Log2_64(NonZeros); SDValue SubVec = Op.getOperand(Idx); unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements(); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, SubVec, @@ -9776,8 +10064,7 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, } if (NumOperands > 2) { - MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(), - ResVT.getVectorNumElements()/2); + MVT HalfVT = ResVT.getHalfNumVectorElementsVT(); ArrayRef Ops = Op->ops(); SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops.slice(0, NumOperands/2)); @@ -9786,7 +10073,7 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } - assert(NumNonZero == 2 && "Simple cases not handled?"); + assert(countPopulation(NonZeros) == 2 && "Simple cases not handled?"); if (ResVT.getVectorNumElements() >= 16) return Op; // The operation is legal with KUNPCK @@ -9794,7 +10081,6 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, SDValue Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, DAG.getUNDEF(ResVT), Op.getOperand(0), DAG.getIntPtrConstant(0, dl)); - unsigned NumElems = ResVT.getVectorNumElements(); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Op.getOperand(1), DAG.getIntPtrConstant(NumElems/2, dl)); } @@ -9997,42 +10283,44 @@ static bool isShuffleEquivalent(SDValue V1, SDValue V2, ArrayRef Mask, /// If an element in Mask matches SM_SentinelUndef (-1) then the corresponding /// value in ExpectedMask is always accepted. Otherwise the indices must match. /// -/// SM_SentinelZero is accepted as a valid negative index but must match in both. +/// SM_SentinelZero is accepted as a valid negative index but must match in +/// both. static bool isTargetShuffleEquivalent(ArrayRef Mask, - ArrayRef ExpectedMask) { + ArrayRef ExpectedMask, + SDValue V1 = SDValue(), + SDValue V2 = SDValue()) { int Size = Mask.size(); if (Size != (int)ExpectedMask.size()) return false; assert(isUndefOrZeroOrInRange(ExpectedMask, 0, 2 * Size) && "Illegal target shuffle mask"); - for (int i = 0; i < Size; ++i) - if (Mask[i] == SM_SentinelUndef) + // Check for out-of-range target shuffle mask indices. + if (!isUndefOrZeroOrInRange(Mask, 0, 2 * Size)) + return false; + + // If the values are build vectors, we can look through them to find + // equivalent inputs that make the shuffles equivalent. + auto *BV1 = dyn_cast_or_null(V1); + auto *BV2 = dyn_cast_or_null(V2); + BV1 = ((BV1 && Size != (int)BV1->getNumOperands()) ? nullptr : BV1); + BV2 = ((BV2 && Size != (int)BV2->getNumOperands()) ? nullptr : BV2); + + for (int i = 0; i < Size; ++i) { + if (Mask[i] == SM_SentinelUndef || Mask[i] == ExpectedMask[i]) continue; - else if (Mask[i] < 0 && Mask[i] != SM_SentinelZero) - return false; - else if (Mask[i] != ExpectedMask[i]) - return false; - - return true; -} - -// Merges a general DAG shuffle mask and zeroable bit mask into a target shuffle -// mask. -static SmallVector createTargetShuffleMask(ArrayRef Mask, - const APInt &Zeroable) { - int NumElts = Mask.size(); - assert(NumElts == (int)Zeroable.getBitWidth() && "Mismatch mask sizes"); - - SmallVector TargetMask(NumElts, SM_SentinelUndef); - for (int i = 0; i != NumElts; ++i) { - int M = Mask[i]; - if (M == SM_SentinelUndef) - continue; - assert(0 <= M && M < (2 * NumElts) && "Out of range shuffle index"); - TargetMask[i] = (Zeroable[i] ? SM_SentinelZero : M); + if (0 <= Mask[i] && 0 <= ExpectedMask[i]) { + auto *MaskBV = Mask[i] < Size ? BV1 : BV2; + auto *ExpectedBV = ExpectedMask[i] < Size ? BV1 : BV2; + if (MaskBV && ExpectedBV && + MaskBV->getOperand(Mask[i] % Size) == + ExpectedBV->getOperand(ExpectedMask[i] % Size)) + continue; + } + // TODO - handle SM_Sentinel equivalences. + return false; } - return TargetMask; + return true; } // Attempt to create a shuffle mask from a VSELECT condition mask. @@ -10133,7 +10421,7 @@ static unsigned getV4X86ShuffleImm(ArrayRef Mask) { static SDValue getV4X86ShuffleImm8ForMask(ArrayRef Mask, const SDLoc &DL, SelectionDAG &DAG) { - return DAG.getConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8); + return DAG.getTargetConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8); } /// Compute whether each element of a shuffle is zeroable. @@ -10573,14 +10861,14 @@ static bool matchVectorShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, // Try binary shuffle. SmallVector BinaryMask; createPackShuffleMask(VT, BinaryMask, false); - if (isTargetShuffleEquivalent(TargetMask, BinaryMask)) + if (isTargetShuffleEquivalent(TargetMask, BinaryMask, V1, V2)) if (MatchPACK(V1, V2)) return true; // Try unary shuffle. SmallVector UnaryMask; createPackShuffleMask(VT, UnaryMask, true); - if (isTargetShuffleEquivalent(TargetMask, UnaryMask)) + if (isTargetShuffleEquivalent(TargetMask, UnaryMask, V1)) if (MatchPACK(V1, V1)) return true; @@ -10685,9 +10973,9 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, SelectionDAG &DAG); static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2, - MutableArrayRef TargetMask, - bool &ForceV1Zero, bool &ForceV2Zero, - uint64_t &BlendMask) { + MutableArrayRef Mask, + const APInt &Zeroable, bool &ForceV1Zero, + bool &ForceV2Zero, uint64_t &BlendMask) { bool V1IsZeroOrUndef = V1.isUndef() || ISD::isBuildVectorAllZeros(V1.getNode()); bool V2IsZeroOrUndef = @@ -10695,13 +10983,12 @@ static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2, BlendMask = 0; ForceV1Zero = false, ForceV2Zero = false; - assert(TargetMask.size() <= 64 && "Shuffle mask too big for blend mask"); + assert(Mask.size() <= 64 && "Shuffle mask too big for blend mask"); // Attempt to generate the binary blend mask. If an input is zero then // we can use any lane. - // TODO: generalize the zero matching to any scalar like isShuffleEquivalent. - for (int i = 0, Size = TargetMask.size(); i < Size; ++i) { - int M = TargetMask[i]; + for (int i = 0, Size = Mask.size(); i < Size; ++i) { + int M = Mask[i]; if (M == SM_SentinelUndef) continue; if (M == i) @@ -10710,16 +10997,16 @@ static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2, BlendMask |= 1ull << i; continue; } - if (M == SM_SentinelZero) { + if (Zeroable[i]) { if (V1IsZeroOrUndef) { ForceV1Zero = true; - TargetMask[i] = i; + Mask[i] = i; continue; } if (V2IsZeroOrUndef) { ForceV2Zero = true; BlendMask |= 1ull << i; - TargetMask[i] = i + Size; + Mask[i] = i + Size; continue; } } @@ -10748,11 +11035,10 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - SmallVector Mask = createTargetShuffleMask(Original, Zeroable); - uint64_t BlendMask = 0; bool ForceV1Zero = false, ForceV2Zero = false; - if (!matchVectorShuffleAsBlend(V1, V2, Mask, ForceV1Zero, ForceV2Zero, + SmallVector Mask(Original.begin(), Original.end()); + if (!matchVectorShuffleAsBlend(V1, V2, Mask, Zeroable, ForceV1Zero, ForceV2Zero, BlendMask)) return SDValue(); @@ -10778,7 +11064,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, case MVT::v8i16: assert(Subtarget.hasSSE41() && "128-bit blends require SSE41!"); return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2, - DAG.getConstant(BlendMask, DL, MVT::i8)); + DAG.getTargetConstant(BlendMask, DL, MVT::i8)); case MVT::v16i16: { assert(Subtarget.hasAVX2() && "v16i16 blends require AVX2!"); SmallVector RepeatedMask; @@ -10790,7 +11076,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, if (RepeatedMask[i] >= 8) BlendMask |= 1ull << i; return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, - DAG.getConstant(BlendMask, DL, MVT::i8)); + DAG.getTargetConstant(BlendMask, DL, MVT::i8)); } // Use PBLENDW for lower/upper lanes and then blend lanes. // TODO - we should allow 2 PBLENDW here and leave shuffle combine to @@ -10799,9 +11085,9 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, uint64_t HiMask = (BlendMask >> 8) & 0xFF; if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) { SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, - DAG.getConstant(LoMask, DL, MVT::i8)); + DAG.getTargetConstant(LoMask, DL, MVT::i8)); SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, - DAG.getConstant(HiMask, DL, MVT::i8)); + DAG.getTargetConstant(HiMask, DL, MVT::i8)); return DAG.getVectorShuffle( MVT::v16i16, DL, Lo, Hi, {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}); @@ -11061,7 +11347,7 @@ static SDValue lowerShuffleAsByteRotateAndPermute( SDValue Rotate = DAG.getBitcast( VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, DAG.getBitcast(ByteVT, Hi), DAG.getBitcast(ByteVT, Lo), - DAG.getConstant(Scale * RotAmt, DL, MVT::i8))); + DAG.getTargetConstant(Scale * RotAmt, DL, MVT::i8))); SmallVector PermMask(NumElts, SM_SentinelUndef); for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) { for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) { @@ -11268,7 +11554,7 @@ static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1, "512-bit PALIGNR requires BWI instructions"); return DAG.getBitcast( VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, Lo, Hi, - DAG.getConstant(ByteRotation, DL, MVT::i8))); + DAG.getTargetConstant(ByteRotation, DL, MVT::i8))); } assert(VT.is128BitVector() && @@ -11282,10 +11568,12 @@ static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1, int LoByteShift = 16 - ByteRotation; int HiByteShift = ByteRotation; - SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo, - DAG.getConstant(LoByteShift, DL, MVT::i8)); - SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi, - DAG.getConstant(HiByteShift, DL, MVT::i8)); + SDValue LoShift = + DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo, + DAG.getTargetConstant(LoByteShift, DL, MVT::i8)); + SDValue HiShift = + DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi, + DAG.getTargetConstant(HiByteShift, DL, MVT::i8)); return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift)); } @@ -11317,7 +11605,7 @@ static SDValue lowerShuffleAsRotate(const SDLoc &DL, MVT VT, SDValue V1, return SDValue(); return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi, - DAG.getConstant(Rotation, DL, MVT::i8)); + DAG.getTargetConstant(Rotation, DL, MVT::i8)); } /// Try to lower a vector shuffle as a byte shift sequence. @@ -11356,27 +11644,27 @@ static SDValue lowerVectorShuffleAsByteShiftMask( if (ZeroLo == 0) { unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts); Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * Shift, DL, MVT::i8)); + DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * ZeroHi, DL, MVT::i8)); + DAG.getTargetConstant(Scale * ZeroHi, DL, MVT::i8)); } else if (ZeroHi == 0) { unsigned Shift = Mask[ZeroLo] % NumElts; Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * Shift, DL, MVT::i8)); + DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * ZeroLo, DL, MVT::i8)); + DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8)); } else if (!Subtarget.hasSSSE3()) { // If we don't have PSHUFB then its worth avoiding an AND constant mask // by performing 3 byte shifts. Shuffle combining can kick in above that. // TODO: There may be some cases where VSH{LR}DQ+PAND is still better. unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts); Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * Shift, DL, MVT::i8)); + DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); Shift += Mask[ZeroLo] % NumElts; Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * Shift, DL, MVT::i8)); + DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, - DAG.getConstant(Scale * ZeroLo, DL, MVT::i8)); + DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8)); } else return SDValue(); @@ -11498,7 +11786,7 @@ static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, "Illegal integer vector type"); V = DAG.getBitcast(ShiftVT, V); V = DAG.getNode(Opcode, DL, ShiftVT, V, - DAG.getConstant(ShiftAmt, DL, MVT::i8)); + DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); return DAG.getBitcast(VT, V); } @@ -11632,14 +11920,14 @@ static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, uint64_t BitLen, BitIdx; if (matchShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1, - DAG.getConstant(BitLen, DL, MVT::i8), - DAG.getConstant(BitIdx, DL, MVT::i8)); + DAG.getTargetConstant(BitLen, DL, MVT::i8), + DAG.getTargetConstant(BitIdx, DL, MVT::i8)); if (matchShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx)) return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT), V2 ? V2 : DAG.getUNDEF(VT), - DAG.getConstant(BitLen, DL, MVT::i8), - DAG.getConstant(BitIdx, DL, MVT::i8)); + DAG.getTargetConstant(BitLen, DL, MVT::i8), + DAG.getTargetConstant(BitIdx, DL, MVT::i8)); return SDValue(); } @@ -11686,9 +11974,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend( return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), ShMask); }; - // Found a valid zext mask! Try various lowering strategies based on the + // Found a valid a/zext mask! Try various lowering strategies based on the // input type and available ISA extensions. - // TODO: Add AnyExt support. if (Subtarget.hasSSE41()) { // Not worth offsetting 128-bit vectors if scale == 2, a pattern using // PUNPCK will catch this in a later shuffle match. @@ -11697,7 +11984,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend( MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale), NumElements / Scale); InputV = ShuffleOffset(InputV); - InputV = getExtendInVec(ISD::ZERO_EXTEND, DL, ExtVT, InputV, DAG); + InputV = getExtendInVec(AnyExt ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND, DL, + ExtVT, InputV, DAG); return DAG.getBitcast(VT, InputV); } @@ -11736,8 +12024,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend( int LoIdx = Offset * EltBits; SDValue Lo = DAG.getBitcast( MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, - DAG.getConstant(EltBits, DL, MVT::i8), - DAG.getConstant(LoIdx, DL, MVT::i8))); + DAG.getTargetConstant(EltBits, DL, MVT::i8), + DAG.getTargetConstant(LoIdx, DL, MVT::i8))); if (isUndefUpperHalf(Mask) || !SafeOffset(Offset + 1)) return DAG.getBitcast(VT, Lo); @@ -11745,8 +12033,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend( int HiIdx = (Offset + 1) * EltBits; SDValue Hi = DAG.getBitcast( MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, - DAG.getConstant(EltBits, DL, MVT::i8), - DAG.getConstant(HiIdx, DL, MVT::i8))); + DAG.getTargetConstant(EltBits, DL, MVT::i8), + DAG.getTargetConstant(HiIdx, DL, MVT::i8))); return DAG.getBitcast(VT, DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi)); } @@ -11759,8 +12047,12 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend( SDValue PSHUFBMask[16]; for (int i = 0; i < 16; ++i) { int Idx = Offset + (i / Scale); - PSHUFBMask[i] = DAG.getConstant( - (i % Scale == 0 && SafeOffset(Idx)) ? Idx : 0x80, DL, MVT::i8); + if ((i % Scale == 0 && SafeOffset(Idx))) { + PSHUFBMask[i] = DAG.getConstant(Idx, DL, MVT::i8); + continue; + } + PSHUFBMask[i] = + AnyExt ? DAG.getUNDEF(MVT::i8) : DAG.getConstant(0x80, DL, MVT::i8); } InputV = DAG.getBitcast(MVT::v16i8, InputV); return DAG.getBitcast( @@ -12052,9 +12344,9 @@ static SDValue lowerShuffleAsElementInsertion( V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle); } else { V2 = DAG.getBitcast(MVT::v16i8, V2); - V2 = DAG.getNode( - X86ISD::VSHLDQ, DL, MVT::v16i8, V2, - DAG.getConstant(V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8)); + V2 = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, V2, + DAG.getTargetConstant( + V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8)); V2 = DAG.getBitcast(VT, V2); } } @@ -12294,7 +12586,7 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1, // If we can't broadcast from a register, check that the input is a load. if (!BroadcastFromReg && !isShuffleFoldableLoad(V)) return SDValue(); - } else if (MayFoldLoad(V) && !cast(V)->isVolatile()) { + } else if (MayFoldLoad(V) && cast(V)->isSimple()) { // 32-bit targets need to load i64 as a f64 and then bitcast the result. if (!Subtarget.is64Bit() && VT.getScalarType() == MVT::i64) { BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements()); @@ -12486,7 +12778,7 @@ static SDValue lowerShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2, // Insert the V2 element into the desired position. return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, - DAG.getConstant(InsertPSMask, DL, MVT::i8)); + DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); } /// Try to lower a shuffle as a permute of the inputs followed by an @@ -12635,14 +12927,14 @@ static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef Mask, // If we have AVX, we can use VPERMILPS which will allow folding a load // into the shuffle. return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1, - DAG.getConstant(SHUFPDMask, DL, MVT::i8)); + DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); } return DAG.getNode( X86ISD::SHUFP, DL, MVT::v2f64, Mask[0] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1, Mask[1] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1, - DAG.getConstant(SHUFPDMask, DL, MVT::i8)); + DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); } assert(Mask[0] >= 0 && "No undef lanes in multi-input v2 shuffles!"); assert(Mask[1] >= 0 && "No undef lanes in multi-input v2 shuffles!"); @@ -12688,7 +12980,7 @@ static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef Mask, unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1); return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2, - DAG.getConstant(SHUFPDMask, DL, MVT::i8)); + DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); } /// Handle lowering of 2-lane 64-bit integer shuffles. @@ -12996,10 +13288,12 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef Mask, int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; }); if (NumV2Elements == 0) { - // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i32, V1, V2, - Mask, Subtarget, DAG)) - return Broadcast; + // Try to use broadcast unless the mask only has one non-undef element. + if (count_if(Mask, [](int M) { return M >= 0 && M < 4; }) > 1) { + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i32, V1, V2, + Mask, Subtarget, DAG)) + return Broadcast; + } // Straight shuffle of a single input vector. For everything from SSE2 // onward this has a single fast instruction with no scary immediates. @@ -13680,16 +13974,16 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef Mask, int NumV2Inputs = count_if(Mask, [](int M) { return M >= 8; }); if (NumV2Inputs == 0) { - // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i16, V1, V2, - Mask, Subtarget, DAG)) - return Broadcast; - // Try to use shift instructions. if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask, Zeroable, Subtarget, DAG)) return Shift; + // Check for being able to broadcast a single element. + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i16, V1, V2, + Mask, Subtarget, DAG)) + return Broadcast; + // Use dedicated unpack instructions for masks that match their pattern. if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) return V; @@ -13984,8 +14278,16 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef Mask, DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle)); // Unpack the bytes to form the i16s that will be shuffled into place. + bool EvenInUse = false, OddInUse = false; + for (int i = 0; i < 16; i += 2) { + EvenInUse |= (Mask[i + 0] >= 0); + OddInUse |= (Mask[i + 1] >= 0); + if (EvenInUse && OddInUse) + break; + } V1 = DAG.getNode(TargetLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, - MVT::v16i8, V1, V1); + MVT::v16i8, EvenInUse ? V1 : DAG.getUNDEF(MVT::v16i8), + OddInUse ? V1 : DAG.getUNDEF(MVT::v16i8)); int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; for (int i = 0; i < 16; ++i) @@ -14100,11 +14402,10 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef Mask, // First we need to zero all the dropped bytes. assert(NumEvenDrops <= 3 && "No support for dropping even elements more than 3 times."); - // We use the mask type to pick which bytes are preserved based on how many - // elements are dropped. - MVT MaskVTs[] = { MVT::v8i16, MVT::v4i32, MVT::v2i64 }; - SDValue ByteClearMask = DAG.getBitcast( - MVT::v16i8, DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1])); + SmallVector ByteClearOps(16, DAG.getConstant(0, DL, MVT::i8)); + for (unsigned i = 0; i != 16; i += 1 << NumEvenDrops) + ByteClearOps[i] = DAG.getConstant(0xFF, DL, MVT::i8); + SDValue ByteClearMask = DAG.getBuildVector(MVT::v16i8, DL, ByteClearOps); V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask); if (!IsSingleInput) V2 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V2, ByteClearMask); @@ -14448,16 +14749,14 @@ static SDValue lowerShuffleAsLanePermuteAndPermute( return DAG.getVectorShuffle(VT, DL, LanePermute, DAG.getUNDEF(VT), PermMask); } -/// Lower a vector shuffle crossing multiple 128-bit lanes as -/// a permutation and blend of those lanes. +/// Lower a vector shuffle crossing multiple 128-bit lanes by shuffling one +/// source with a lane permutation. /// -/// This essentially blends the out-of-lane inputs to each lane into the lane -/// from a permuted copy of the vector. This lowering strategy results in four -/// instructions in the worst case for a single-input cross lane shuffle which -/// is lower than any other fully general cross-lane shuffle strategy I'm aware -/// of. Special cases for each particular shuffle pattern should be handled -/// prior to trying this lowering. -static SDValue lowerShuffleAsLanePermuteAndBlend( +/// This lowering strategy results in four instructions in the worst case for a +/// single-input cross lane shuffle which is lower than any other fully general +/// cross-lane shuffle strategy I'm aware of. Special cases for each particular +/// shuffle pattern should be handled prior to trying this lowering. +static SDValue lowerShuffleAsLanePermuteAndShuffle( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // FIXME: This should probably be generalized for 512-bit vectors as well. @@ -14484,24 +14783,28 @@ static SDValue lowerShuffleAsLanePermuteAndBlend( return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); } + // TODO - we could support shuffling V2 in the Flipped input. assert(V2.isUndef() && "This last part of this routine only works on single input shuffles"); - SmallVector FlippedBlendMask(Size); - for (int i = 0; i < Size; ++i) - FlippedBlendMask[i] = - Mask[i] < 0 ? -1 : (((Mask[i] % Size) / LaneSize == i / LaneSize) - ? Mask[i] - : Mask[i] % LaneSize + - (i / LaneSize) * LaneSize + Size); + SmallVector InLaneMask(Mask.begin(), Mask.end()); + for (int i = 0; i < Size; ++i) { + int &M = InLaneMask[i]; + if (M < 0) + continue; + if (((M % Size) / LaneSize) != (i / LaneSize)) + M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size; + } + assert(!is128BitLaneCrossingShuffleMask(VT, InLaneMask) && + "In-lane shuffle mask expected"); - // Flip the vector, and blend the results which should now be in-lane. + // Flip the lanes, and shuffle the results which should now be in-lane. MVT PVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64; SDValue Flipped = DAG.getBitcast(PVT, V1); - Flipped = DAG.getVectorShuffle(PVT, DL, Flipped, DAG.getUNDEF(PVT), - { 2, 3, 0, 1 }); + Flipped = + DAG.getVectorShuffle(PVT, DL, Flipped, DAG.getUNDEF(PVT), {2, 3, 0, 1}); Flipped = DAG.getBitcast(VT, Flipped); - return DAG.getVectorShuffle(VT, DL, V1, Flipped, FlippedBlendMask); + return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask); } /// Handle lowering 2-lane 128-bit shuffles. @@ -14565,8 +14868,8 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1, if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) { unsigned PermMask = ((WidenedMask[0] % 2) << 0) | ((WidenedMask[1] % 2) << 1); - return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, - DAG.getConstant(PermMask, DL, MVT::i8)); + return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, + DAG.getTargetConstant(PermMask, DL, MVT::i8)); } } } @@ -14598,7 +14901,7 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1, V2 = DAG.getUNDEF(VT); return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2, - DAG.getConstant(PermMask, DL, MVT::i8)); + DAG.getTargetConstant(PermMask, DL, MVT::i8)); } /// Lower a vector shuffle by first fixing the 128-bit lanes and then @@ -14616,26 +14919,26 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( if (is128BitLaneRepeatedShuffleMask(VT, Mask)) return SDValue(); - int Size = Mask.size(); + int NumElts = Mask.size(); int NumLanes = VT.getSizeInBits() / 128; - int LaneSize = 128 / VT.getScalarSizeInBits(); - SmallVector RepeatMask(LaneSize, -1); + int NumLaneElts = 128 / VT.getScalarSizeInBits(); + SmallVector RepeatMask(NumLaneElts, -1); SmallVector, 2> LaneSrcs(NumLanes, {{-1, -1}}); // First pass will try to fill in the RepeatMask from lanes that need two // sources. for (int Lane = 0; Lane != NumLanes; ++Lane) { - int Srcs[2] = { -1, -1 }; - SmallVector InLaneMask(LaneSize, -1); - for (int i = 0; i != LaneSize; ++i) { - int M = Mask[(Lane * LaneSize) + i]; + int Srcs[2] = {-1, -1}; + SmallVector InLaneMask(NumLaneElts, -1); + for (int i = 0; i != NumLaneElts; ++i) { + int M = Mask[(Lane * NumLaneElts) + i]; if (M < 0) continue; // Determine which of the possible input lanes (NumLanes from each source) // this element comes from. Assign that as one of the sources for this // lane. We can assign up to 2 sources for this lane. If we run out // sources we can't do anything. - int LaneSrc = M / LaneSize; + int LaneSrc = M / NumLaneElts; int Src; if (Srcs[0] < 0 || Srcs[0] == LaneSrc) Src = 0; @@ -14645,7 +14948,7 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( return SDValue(); Srcs[Src] = LaneSrc; - InLaneMask[i] = (M % LaneSize) + Src * Size; + InLaneMask[i] = (M % NumLaneElts) + Src * NumElts; } // If this lane has two sources, see if it fits with the repeat mask so far. @@ -14701,23 +15004,23 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( if (LaneSrcs[Lane][0] >= 0) continue; - for (int i = 0; i != LaneSize; ++i) { - int M = Mask[(Lane * LaneSize) + i]; + for (int i = 0; i != NumLaneElts; ++i) { + int M = Mask[(Lane * NumLaneElts) + i]; if (M < 0) continue; // If RepeatMask isn't defined yet we can define it ourself. if (RepeatMask[i] < 0) - RepeatMask[i] = M % LaneSize; + RepeatMask[i] = M % NumLaneElts; - if (RepeatMask[i] < Size) { - if (RepeatMask[i] != M % LaneSize) + if (RepeatMask[i] < NumElts) { + if (RepeatMask[i] != M % NumLaneElts) return SDValue(); - LaneSrcs[Lane][0] = M / LaneSize; + LaneSrcs[Lane][0] = M / NumLaneElts; } else { - if (RepeatMask[i] != ((M % LaneSize) + Size)) + if (RepeatMask[i] != ((M % NumLaneElts) + NumElts)) return SDValue(); - LaneSrcs[Lane][1] = M / LaneSize; + LaneSrcs[Lane][1] = M / NumLaneElts; } } @@ -14725,14 +15028,14 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( return SDValue(); } - SmallVector NewMask(Size, -1); + SmallVector NewMask(NumElts, -1); for (int Lane = 0; Lane != NumLanes; ++Lane) { int Src = LaneSrcs[Lane][0]; - for (int i = 0; i != LaneSize; ++i) { + for (int i = 0; i != NumLaneElts; ++i) { int M = -1; if (Src >= 0) - M = Src * LaneSize + i; - NewMask[Lane * LaneSize + i] = M; + M = Src * NumLaneElts + i; + NewMask[Lane * NumLaneElts + i] = M; } } SDValue NewV1 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); @@ -14745,11 +15048,11 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( for (int Lane = 0; Lane != NumLanes; ++Lane) { int Src = LaneSrcs[Lane][1]; - for (int i = 0; i != LaneSize; ++i) { + for (int i = 0; i != NumLaneElts; ++i) { int M = -1; if (Src >= 0) - M = Src * LaneSize + i; - NewMask[Lane * LaneSize + i] = M; + M = Src * NumLaneElts + i; + NewMask[Lane * NumLaneElts + i] = M; } } SDValue NewV2 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); @@ -14760,12 +15063,12 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( cast(NewV2)->getMask() == Mask) return SDValue(); - for (int i = 0; i != Size; ++i) { - NewMask[i] = RepeatMask[i % LaneSize]; + for (int i = 0; i != NumElts; ++i) { + NewMask[i] = RepeatMask[i % NumLaneElts]; if (NewMask[i] < 0) continue; - NewMask[i] += (i / LaneSize) * LaneSize; + NewMask[i] += (i / NumLaneElts) * NumLaneElts; } return DAG.getVectorShuffle(VT, DL, NewV1, NewV2, NewMask); } @@ -14831,14 +15134,13 @@ getHalfShuffleMask(ArrayRef Mask, MutableArrayRef HalfMask, static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2, ArrayRef HalfMask, int HalfIdx1, int HalfIdx2, bool UndefLower, - SelectionDAG &DAG) { + SelectionDAG &DAG, bool UseConcat = false) { assert(V1.getValueType() == V2.getValueType() && "Different sized vectors?"); assert(V1.getValueType().isSimple() && "Expecting only simple types"); MVT VT = V1.getSimpleValueType(); - unsigned NumElts = VT.getVectorNumElements(); - unsigned HalfNumElts = NumElts / 2; - MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts); + MVT HalfVT = VT.getHalfNumVectorElementsVT(); + unsigned HalfNumElts = HalfVT.getVectorNumElements(); auto getHalfVector = [&](int HalfIdx) { if (HalfIdx < 0) @@ -14853,6 +15155,14 @@ static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2, SDValue Half1 = getHalfVector(HalfIdx1); SDValue Half2 = getHalfVector(HalfIdx2); SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask); + if (UseConcat) { + SDValue Op0 = V; + SDValue Op1 = DAG.getUNDEF(HalfVT); + if (UndefLower) + std::swap(Op0, Op1); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Op0, Op1); + } + unsigned Offset = UndefLower ? HalfNumElts : 0; return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, DAG.getIntPtrConstant(Offset, DL)); @@ -14877,9 +15187,8 @@ static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1, // Upper half is undef and lower half is whole upper subvector. // e.g. vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u> - unsigned NumElts = VT.getVectorNumElements(); - unsigned HalfNumElts = NumElts / 2; - MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts); + MVT HalfVT = VT.getHalfNumVectorElementsVT(); + unsigned HalfNumElts = HalfVT.getVectorNumElements(); if (!UndefLower && isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) { SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1, @@ -15155,11 +15464,19 @@ static SDValue lowerShuffleAsRepeatedMaskAndLanePermute( } static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2, - unsigned &ShuffleImm, ArrayRef Mask) { + bool &ForceV1Zero, bool &ForceV2Zero, + unsigned &ShuffleImm, ArrayRef Mask, + const APInt &Zeroable) { int NumElts = VT.getVectorNumElements(); assert(VT.getScalarSizeInBits() == 64 && (NumElts == 2 || NumElts == 4 || NumElts == 8) && "Unexpected data type for VSHUFPD"); + assert(isUndefOrZeroOrInRange(Mask, 0, 2 * NumElts) && + "Illegal shuffle mask"); + + bool ZeroLane[2] = { true, true }; + for (int i = 0; i < NumElts; ++i) + ZeroLane[i & 1] &= Zeroable[i]; // Mask for V8F64: 0/1, 8/9, 2/3, 10/11, 4/5, .. // Mask for V4F64; 0/1, 4/5, 2/3, 6/7.. @@ -15167,7 +15484,7 @@ static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2, bool ShufpdMask = true; bool CommutableMask = true; for (int i = 0; i < NumElts; ++i) { - if (Mask[i] == SM_SentinelUndef) + if (Mask[i] == SM_SentinelUndef || ZeroLane[i & 1]) continue; if (Mask[i] < 0) return false; @@ -15180,30 +15497,77 @@ static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2, ShuffleImm |= (Mask[i] % 2) << i; } - if (ShufpdMask) - return true; - if (CommutableMask) { - std::swap(V1, V2); - return true; - } + if (!ShufpdMask && !CommutableMask) + return false; - return false; + if (!ShufpdMask && CommutableMask) + std::swap(V1, V2); + + ForceV1Zero = ZeroLane[0]; + ForceV2Zero = ZeroLane[1]; + return true; } -static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, - ArrayRef Mask, SDValue V1, - SDValue V2, SelectionDAG &DAG) { - assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64)&& +static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + const APInt &Zeroable, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64) && "Unexpected data type for VSHUFPD"); unsigned Immediate = 0; - if (!matchShuffleWithSHUFPD(VT, V1, V2, Immediate, Mask)) + bool ForceV1Zero = false, ForceV2Zero = false; + if (!matchShuffleWithSHUFPD(VT, V1, V2, ForceV1Zero, ForceV2Zero, Immediate, + Mask, Zeroable)) return SDValue(); + // Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs. + if (ForceV1Zero) + V1 = getZeroVector(VT, Subtarget, DAG, DL); + if (ForceV2Zero) + V2 = getZeroVector(VT, Subtarget, DAG, DL); + return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, - DAG.getConstant(Immediate, DL, MVT::i8)); + DAG.getTargetConstant(Immediate, DL, MVT::i8)); } +// Look for {0, 8, 16, 24, 32, 40, 48, 56 } in the first 8 elements. Followed +// by zeroable elements in the remaining 24 elements. Turn this into two +// vmovqb instructions shuffled together. +static SDValue lowerShuffleAsVTRUNCAndUnpack(const SDLoc &DL, MVT VT, + SDValue V1, SDValue V2, + ArrayRef Mask, + const APInt &Zeroable, + SelectionDAG &DAG) { + assert(VT == MVT::v32i8 && "Unexpected type!"); + + // The first 8 indices should be every 8th element. + if (!isSequentialOrUndefInRange(Mask, 0, 8, 0, 8)) + return SDValue(); + + // Remaining elements need to be zeroable. + if (Zeroable.countLeadingOnes() < (Mask.size() - 8)) + return SDValue(); + + V1 = DAG.getBitcast(MVT::v4i64, V1); + V2 = DAG.getBitcast(MVT::v4i64, V2); + + V1 = DAG.getNode(X86ISD::VTRUNC, DL, MVT::v16i8, V1); + V2 = DAG.getNode(X86ISD::VTRUNC, DL, MVT::v16i8, V2); + + // The VTRUNCs will put 0s in the upper 12 bytes. Use them to put zeroes in + // the upper bits of the result using an unpckldq. + SDValue Unpack = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, + { 0, 1, 2, 3, 16, 17, 18, 19, + 4, 5, 6, 7, 20, 21, 22, 23 }); + // Insert the unpckldq into a zero vector to widen to v32i8. + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v32i8, + DAG.getConstant(0, DL, MVT::v32i8), Unpack, + DAG.getIntPtrConstant(0, DL)); +} + + /// Handle lowering of 4-lane 64-bit floating point shuffles. /// /// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2 @@ -15236,7 +15600,7 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef Mask, unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) | ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3); return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1, - DAG.getConstant(VPERMILPMask, DL, MVT::i8)); + DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8)); } // With AVX2 we have direct support for this permutation. @@ -15256,8 +15620,8 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef Mask, return V; // Otherwise, fall back. - return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask, DAG, - Subtarget); + return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v4f64, V1, V2, Mask, + DAG, Subtarget); } // Use dedicated unpack instructions for masks that match their pattern. @@ -15269,7 +15633,8 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef Mask, return Blend; // Check if the blend happens to exactly fit that of SHUFPD. - if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, Mask, V1, V2, DAG)) + if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Op; // If we have one input in place, then we can permute the other input and @@ -15473,8 +15838,8 @@ static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef Mask, return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, VPermMask, V1); // Otherwise, fall back. - return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask, - DAG, Subtarget); + return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v8f32, V1, V2, Mask, + DAG, Subtarget); } // Try to simplify this by merging 128-bit lanes to enable a lane-based @@ -15681,8 +16046,8 @@ static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef Mask, DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget)) return V; - return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2, Mask, - DAG, Subtarget); + return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v16i16, V1, V2, Mask, + DAG, Subtarget); } SmallVector RepeatedMask; @@ -15780,8 +16145,8 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef Mask, DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget)) return V; - return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask, DAG, - Subtarget); + return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v32i8, V1, V2, Mask, + DAG, Subtarget); } if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i8, Mask, V1, V2, @@ -15803,6 +16168,14 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef Mask, DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget)) return V; + // Look for {0, 8, 16, 24, 32, 40, 48, 56 } in the first 8 elements. Followed + // by zeroable elements in the remaining 24 elements. Turn this into two + // vmovqb instructions shuffled together. + if (Subtarget.hasVLX()) + if (SDValue V = lowerShuffleAsVTRUNCAndUnpack(DL, MVT::v32i8, V1, V2, + Mask, Zeroable, DAG)) + return V; + // Otherwise fall back on generic lowering. return lowerShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG); @@ -15974,7 +16347,7 @@ static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef Mask, } return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1], - DAG.getConstant(PermMask, DL, MVT::i8)); + DAG.getTargetConstant(PermMask, DL, MVT::i8)); } /// Handle lowering of 8-lane 64-bit floating point shuffles. @@ -15999,7 +16372,7 @@ static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef Mask, ((Mask[4] == 5) << 4) | ((Mask[5] == 5) << 5) | ((Mask[6] == 7) << 6) | ((Mask[7] == 7) << 7); return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f64, V1, - DAG.getConstant(VPERMILPMask, DL, MVT::i8)); + DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8)); } SmallVector RepeatedMask; @@ -16016,7 +16389,8 @@ static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef Mask, return Unpck; // Check if the blend happens to exactly fit that of SHUFPD. - if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v8f64, Mask, V1, V2, DAG)) + if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v8f64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Op; if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1, V2, @@ -16389,6 +16763,49 @@ static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef Mask, } } +static SDValue lower1BitShuffleAsKSHIFTR(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + // Shuffle should be unary. + if (!V2.isUndef()) + return SDValue(); + + int ShiftAmt = -1; + int NumElts = Mask.size(); + for (int i = 0; i != NumElts; ++i) { + int M = Mask[i]; + assert((M == SM_SentinelUndef || (0 <= M && M < NumElts)) && + "Unexpected mask index."); + if (M < 0) + continue; + + // The first non-undef element determines our shift amount. + if (ShiftAmt < 0) { + ShiftAmt = M - i; + // Need to be shifting right. + if (ShiftAmt <= 0) + return SDValue(); + } + // All non-undef elements must shift by the same amount. + if (ShiftAmt != M - i) + return SDValue(); + } + assert(ShiftAmt >= 0 && "All undef?"); + + // Great we found a shift right. + MVT WideVT = VT; + if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8) + WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; + SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, + DAG.getUNDEF(WideVT), V1, + DAG.getIntPtrConstant(0, DL)); + Res = DAG.getNode(X86ISD::KSHIFTR, DL, WideVT, Res, + DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, + DAG.getIntPtrConstant(0, DL)); +} + // Determine if this shuffle can be implemented with a KSHIFT instruction. // Returns the shift amount if possible or -1 if not. This is a simplified // version of matchShuffleAsShift. @@ -16434,13 +16851,20 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef Mask, assert(Subtarget.hasAVX512() && "Cannot lower 512-bit vectors w/o basic ISA!"); - unsigned NumElts = Mask.size(); + int NumElts = Mask.size(); // Try to recognize shuffles that are just padding a subvector with zeros. - unsigned SubvecElts = 0; - for (int i = 0; i != (int)NumElts; ++i) { - if (Mask[i] >= 0 && Mask[i] != i) - break; + int SubvecElts = 0; + int Src = -1; + for (int i = 0; i != NumElts; ++i) { + if (Mask[i] >= 0) { + // Grab the source from the first valid mask. All subsequent elements need + // to use this same source. + if (Src < 0) + Src = Mask[i] / NumElts; + if (Src != (Mask[i] / NumElts) || (Mask[i] % NumElts) != i) + break; + } ++SubvecElts; } @@ -16451,30 +16875,54 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef Mask, // Make sure the number of zeroable bits in the top at least covers the bits // not covered by the subvector. - if (Zeroable.countLeadingOnes() >= (NumElts - SubvecElts)) { + if ((int)Zeroable.countLeadingOnes() >= (NumElts - SubvecElts)) { + assert(Src >= 0 && "Expected a source!"); MVT ExtractVT = MVT::getVectorVT(MVT::i1, SubvecElts); SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, - V1, DAG.getIntPtrConstant(0, DL)); + Src == 0 ? V1 : V2, + DAG.getIntPtrConstant(0, DL)); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, - getZeroVector(VT, Subtarget, DAG, DL), + DAG.getConstant(0, DL, VT), Extract, DAG.getIntPtrConstant(0, DL)); } + // Try a simple shift right with undef elements. Later we'll try with zeros. + if (SDValue Shift = lower1BitShuffleAsKSHIFTR(DL, Mask, VT, V1, V2, Subtarget, + DAG)) + return Shift; + // Try to match KSHIFTs. - // TODO: Support narrower than legal shifts by widening and extracting. - if (NumElts >= 16 || (Subtarget.hasDQI() && NumElts == 8)) { - unsigned Offset = 0; - for (SDValue V : { V1, V2 }) { - unsigned Opcode; - int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable); - if (ShiftAmt >= 0) - return DAG.getNode(Opcode, DL, VT, V, - DAG.getConstant(ShiftAmt, DL, MVT::i8)); - Offset += NumElts; // Increment for next iteration. + unsigned Offset = 0; + for (SDValue V : { V1, V2 }) { + unsigned Opcode; + int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable); + if (ShiftAmt >= 0) { + MVT WideVT = VT; + if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8) + WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; + SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, + DAG.getUNDEF(WideVT), V, + DAG.getIntPtrConstant(0, DL)); + // Widened right shifts need two shifts to ensure we shift in zeroes. + if (Opcode == X86ISD::KSHIFTR && WideVT != VT) { + int WideElts = WideVT.getVectorNumElements(); + // Shift left to put the original vector in the MSBs of the new size. + Res = DAG.getNode(X86ISD::KSHIFTL, DL, WideVT, Res, + DAG.getTargetConstant(WideElts - NumElts, DL, MVT::i8)); + // Increase the shift amount to account for the left shift. + ShiftAmt += WideElts - NumElts; + } + + Res = DAG.getNode(Opcode, DL, WideVT, Res, + DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, + DAG.getIntPtrConstant(0, DL)); } + Offset += NumElts; // Increment for next iteration. } + MVT ExtVT; switch (VT.SimpleTy) { default: @@ -16594,7 +17042,7 @@ static bool canonicalizeShuffleMaskWithCommute(ArrayRef Mask) { static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(Op); - ArrayRef Mask = SVOp->getMask(); + ArrayRef OrigMask = SVOp->getMask(); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); MVT VT = Op.getSimpleValueType(); @@ -16620,8 +17068,8 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, // undef as well. This makes it easier to match the shuffle based solely on // the mask. if (V2IsUndef && - any_of(Mask, [NumElements](int M) { return M >= NumElements; })) { - SmallVector NewMask(Mask.begin(), Mask.end()); + any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) { + SmallVector NewMask(OrigMask.begin(), OrigMask.end()); for (int &M : NewMask) if (M >= NumElements) M = -1; @@ -16629,15 +17077,16 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, } // Check for illegal shuffle mask element index values. - int MaskUpperLimit = Mask.size() * (V2IsUndef ? 1 : 2); (void)MaskUpperLimit; - assert(llvm::all_of(Mask, + int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2); + (void)MaskUpperLimit; + assert(llvm::all_of(OrigMask, [&](int M) { return -1 <= M && M < MaskUpperLimit; }) && "Out of bounds shuffle index"); // We actually see shuffles that are entirely re-arrangements of a set of // zero inputs. This mostly happens while decomposing complex shuffles into // simple ones. Directly lower these as a buildvector of zeros. - APInt Zeroable = computeZeroableShuffleElements(Mask, V1, V2); + APInt Zeroable = computeZeroableShuffleElements(OrigMask, V1, V2); if (Zeroable.isAllOnesValue()) return getZeroVector(VT, Subtarget, DAG, DL); @@ -16645,11 +17094,11 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, // Create an alternative mask with info about zeroable elements. // Here we do not set undef elements as zeroable. - SmallVector ZeroableMask(Mask.begin(), Mask.end()); + SmallVector ZeroableMask(OrigMask.begin(), OrigMask.end()); if (V2IsZero) { assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!"); for (int i = 0; i != NumElements; ++i) - if (Mask[i] != SM_SentinelUndef && Zeroable[i]) + if (OrigMask[i] != SM_SentinelUndef && Zeroable[i]) ZeroableMask[i] = SM_SentinelZero; } @@ -16664,7 +17113,7 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, // by obfuscating the operands with bitcasts. // TODO: Avoid lowering directly from this top-level function: make this // a query (canLowerAsBroadcast) and defer lowering to the type-based calls. - if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask, + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, OrigMask, Subtarget, DAG)) return Broadcast; @@ -16700,8 +17149,11 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, } // Commute the shuffle if it will improve canonicalization. - if (canonicalizeShuffleMaskWithCommute(Mask)) - return DAG.getCommutedVectorShuffle(*SVOp); + SmallVector Mask(OrigMask.begin(), OrigMask.end()); + if (canonicalizeShuffleMaskWithCommute(Mask)) { + ShuffleVectorSDNode::commuteMask(Mask); + std::swap(V1, V2); + } if (SDValue V = lowerShuffleWithVPMOV(DL, Mask, VT, V1, V2, DAG, Subtarget)) return V; @@ -16910,7 +17362,7 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG, // Use kshiftr instruction to move to the lower element. Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, - DAG.getConstant(IdxVal, dl, MVT::i8)); + DAG.getTargetConstant(IdxVal, dl, MVT::i8)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, DAG.getIntPtrConstant(0, dl)); @@ -17137,8 +17589,8 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, if ((Subtarget.hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) || (Subtarget.hasAVX2() && EltVT == MVT::i32)) { SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1); - N2 = DAG.getIntPtrConstant(1, dl); - return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, N2); + return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, + DAG.getTargetConstant(1, dl, MVT::i8)); } } @@ -17207,14 +17659,14 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, // But if optimizing for size and there's a load folding opportunity, // generate insertps because blendps does not have a 32-bit memory // operand form. - N2 = DAG.getIntPtrConstant(1, dl); N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); - return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1, N2); + return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1, + DAG.getTargetConstant(1, dl, MVT::i8)); } - N2 = DAG.getIntPtrConstant(IdxVal << 4, dl); // Create this as a scalar to vector.. N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); - return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2); + return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, + DAG.getTargetConstant(IdxVal << 4, dl, MVT::i8)); } // PINSR* works with constant index. @@ -17300,7 +17752,7 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, // Shift to the LSB. Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, - DAG.getConstant(IdxVal, dl, MVT::i8)); + DAG.getTargetConstant(IdxVal, dl, MVT::i8)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec, DAG.getIntPtrConstant(0, dl)); @@ -17841,10 +18293,10 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, std::swap(Op0, Op1); APInt APIntShiftAmt; - if (isConstantSplat(Amt, APIntShiftAmt)) { + if (X86::isConstantSplat(Amt, APIntShiftAmt)) { uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits()); - return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, - Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8)); + return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, Op0, + Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); } return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT, @@ -17970,6 +18422,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); + if (VT == MVT::f128) + return LowerF128Call(Op, DAG, RTLIB::getSINTTOFP(SrcVT, VT)); + if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget)) return Extract; @@ -18072,6 +18527,16 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, return Result; } +/// Horizontal vector math instructions may be slower than normal math with +/// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch +/// implementation, and likely shuffle complexity of the alternate sequence. +static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + bool IsOptimizingSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool HasFastHOps = Subtarget.hasFastHorizontalOps(); + return !IsSingleSource || IsOptimizingSize || HasFastHOps; +} + /// 64-bit unsigned integer to double expansion. static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -18126,8 +18591,7 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG, SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); SDValue Result; - if (Subtarget.hasSSE3()) { - // FIXME: The 'haddpd' instruction may be slower than 'shuffle + addsd'. + if (Subtarget.hasSSE3() && shouldUseHorizontalOp(true, DAG, Subtarget)) { Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub); } else { SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1}); @@ -18273,7 +18737,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, // Low will be bitcasted right away, so do not bother bitcasting back to its // original type. Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast, - VecCstLowBitcast, DAG.getConstant(0xaa, DL, MVT::i32)); + VecCstLowBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8)); // uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16), // (uint4) 0x53000000, 0xaa); SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh); @@ -18281,7 +18745,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, // High will be bitcasted right away, so do not bother bitcasting back to // its original type. High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast, - VecCstHighBitcast, DAG.getConstant(0xaa, DL, MVT::i32)); + VecCstHighBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8)); } else { SDValue VecCstMask = DAG.getConstant(0xffff, DL, VecIntVT); // uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000; @@ -18329,16 +18793,18 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SDValue N0 = Op.getOperand(0); SDLoc dl(Op); auto PtrVT = getPointerTy(DAG.getDataLayout()); + MVT SrcVT = N0.getSimpleValueType(); + MVT DstVT = Op.getSimpleValueType(); - if (Op.getSimpleValueType().isVector()) + if (DstVT == MVT::f128) + return LowerF128Call(Op, DAG, RTLIB::getUINTTOFP(SrcVT, DstVT)); + + if (DstVT.isVector()) return lowerUINT_TO_FP_vec(Op, DAG, Subtarget); if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget)) return Extract; - MVT SrcVT = N0.getSimpleValueType(); - MVT DstVT = Op.getSimpleValueType(); - if (Subtarget.hasAVX512() && isScalarFPTypeInSSEReg(DstVT) && (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget.is64Bit()))) { // Conversions from unsigned i32 to f32/f64 are legal, @@ -18346,6 +18812,12 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, return Op; } + // Promote i32 to i64 and use a signed conversion on 64-bit targets. + if (SrcVT == MVT::i32 && Subtarget.is64Bit()) { + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, N0); + return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, N0); + } + if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget)) return V; @@ -18579,7 +19051,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, // Custom legalize v8i8->v8i64 on CPUs without avx512bw. if (InVT == MVT::v8i8) { - if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64) + if (VT != MVT::v8i64) return SDValue(); In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), @@ -18602,10 +19074,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64. // Concat upper and lower parts. // - - MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), - VT.getVectorNumElements() / 2); - + MVT HalfVT = VT.getHalfNumVectorElementsVT(); SDValue OpLo = DAG.getNode(ExtendInVecOpc, dl, HalfVT, In); // Short-circuit if we can determine that each 128-bit half is the same value. @@ -18903,9 +19372,29 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && "Invalid TRUNCATE operation"); - // If called by the legalizer just return. - if (!DAG.getTargetLoweringInfo().isTypeLegal(InVT)) + // If we're called by the type legalizer, handle a few cases. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(InVT)) { + if ((InVT == MVT::v8i64 || InVT == MVT::v16i32 || InVT == MVT::v16i64) && + VT.is128BitVector()) { + assert(Subtarget.hasVLX() && "Unexpected subtarget!"); + // The default behavior is to truncate one step, concatenate, and then + // truncate the remainder. We'd rather produce two 64-bit results and + // concatenate those. + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVector(In, DL); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); + + Lo = DAG.getNode(ISD::TRUNCATE, DL, LoVT, Lo); + Hi = DAG.getNode(ISD::TRUNCATE, DL, HiVT, Hi); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + } + + // Otherwise let default legalization handle it. return SDValue(); + } if (VT.getVectorElementType() == MVT::i1) return LowerTruncateVecI1(Op, DAG, Subtarget); @@ -18940,6 +19429,9 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget)) return V; + // Handle truncation of V256 to V128 using shuffles. + assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!"); + if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) { // On AVX2, v4i64 -> v4i32 becomes VPERMD. if (Subtarget.hasInt256()) { @@ -19016,22 +19508,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::PACKUS, DL, VT, InLo, InHi); } - // Handle truncation of V256 to V128 using shuffles. - assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!"); - - assert(Subtarget.hasAVX() && "256-bit vector without AVX!"); - - unsigned NumElems = VT.getVectorNumElements(); - MVT NVT = MVT::getVectorVT(VT.getVectorElementType(), NumElems * 2); - - SmallVector MaskVec(NumElems * 2, -1); - // Prepare truncation shuffle mask - for (unsigned i = 0; i != NumElems; ++i) - MaskVec[i] = i * 2; - In = DAG.getBitcast(NVT, In); - SDValue V = DAG.getVectorShuffle(NVT, DL, In, In, MaskVec); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, - DAG.getIntPtrConstant(0, DL)); + llvm_unreachable("All 256->128 cases should have been handled above!"); } SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { @@ -19041,6 +19518,17 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { MVT SrcVT = Src.getSimpleValueType(); SDLoc dl(Op); + if (SrcVT == MVT::f128) { + RTLIB::Libcall LC; + if (Op.getOpcode() == ISD::FP_TO_SINT) + LC = RTLIB::getFPTOSINT(SrcVT, VT); + else + LC = RTLIB::getFPTOUINT(SrcVT, VT); + + MakeLibCallOptions CallOptions; + return makeLibCall(DAG, LC, VT, Src, CallOptions, SDLoc(Op)).first; + } + if (VT.isVector()) { if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) { MVT ResVT = MVT::v4i32; @@ -19075,14 +19563,27 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { bool UseSSEReg = isScalarFPTypeInSSEReg(SrcVT); - if (!IsSigned && Subtarget.hasAVX512()) { - // Conversions from f32/f64 should be legal. - if (UseSSEReg) + if (!IsSigned && UseSSEReg) { + // Conversions from f32/f64 with AVX512 should be legal. + if (Subtarget.hasAVX512()) return Op; - // Use default expansion. + // Use default expansion for i64. if (VT == MVT::i64) return SDValue(); + + assert(VT == MVT::i32 && "Unexpected VT!"); + + // Promote i32 to i64 and use a signed operation on 64-bit targets. + if (Subtarget.is64Bit()) { + SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i64, Src); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); + } + + // Use default expansion for SSE1/2 targets without SSE3. With SSE3 we can + // use fisttp which will be handled later. + if (!Subtarget.hasSSE3()) + return SDValue(); } // Promote i16 to i32 if we can use a SSE operation. @@ -19103,12 +19604,17 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("Expected FP_TO_INTHelper to handle all remaining cases."); } -static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); MVT SVT = In.getSimpleValueType(); + if (VT == MVT::f128) { + RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, VT); + return LowerF128Call(Op, DAG, LC); + } + assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); return DAG.getNode(X86ISD::VFPEXT, DL, VT, @@ -19116,14 +19622,31 @@ static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) { In, DAG.getUNDEF(SVT))); } -/// Horizontal vector math instructions may be slower than normal math with -/// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch -/// implementation, and likely shuffle complexity of the alternate sequence. -static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - bool IsOptimizingSize = DAG.getMachineFunction().getFunction().hasOptSize(); - bool HasFastHOps = Subtarget.hasFastHorizontalOps(); - return !IsSingleSource || IsOptimizingSize || HasFastHOps; +SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { + MVT VT = Op.getSimpleValueType(); + SDValue In = Op.getOperand(0); + MVT SVT = In.getSimpleValueType(); + + // It's legal except when f128 is involved + if (SVT != MVT::f128) + return Op; + + RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, VT); + + // FP_ROUND node has a second operand indicating whether it is known to be + // precise. That doesn't take part in the LibCall so we can't directly use + // LowerF128Call. + MakeLibCallOptions CallOptions; + return makeLibCall(DAG, LC, VT, In, CallOptions, SDLoc(Op)).first; +} + +// FIXME: This is a hack to allow FP_ROUND to be marked Custom without breaking +// the default expansion of STRICT_FP_ROUND. +static SDValue LowerSTRICT_FP_ROUND(SDValue Op, SelectionDAG &DAG) { + // FIXME: Need to form a libcall with an input chain for f128. + assert(Op.getOperand(0).getValueType() != MVT::f128 && + "Don't know how to handle f128 yet!"); + return Op; } /// Depending on uarch and/or optimizing for size, we might prefer to use a @@ -19200,8 +19723,13 @@ static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG, /// Depending on uarch and/or optimizing for size, we might prefer to use a /// vector operation in place of the typical scalar operation. -static SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { +SDValue X86TargetLowering::lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const { + if (Op.getValueType() == MVT::f128) { + RTLIB::Libcall LC = Op.getOpcode() == ISD::FADD ? RTLIB::ADD_F128 + : RTLIB::SUB_F128; + return LowerF128Call(Op, DAG, LC); + } + assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && "Only expecting float/double"); return lowerAddSubToHorizontalOp(Op, DAG, Subtarget); @@ -19358,13 +19886,13 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl, SelectionDAG &DAG) { return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(Cond, dl, MVT::i8), EFLAGS); + DAG.getTargetConstant(Cond, dl, MVT::i8), EFLAGS); } /// Helper for matching OR(EXTRACTELT(X,0),OR(EXTRACTELT(X,1),...)) /// style scalarized (associative) reduction patterns. -static bool matchBitOpReduction(SDValue Op, ISD::NodeType BinOp, - SmallVectorImpl &SrcOps) { +static bool matchScalarReduction(SDValue Op, ISD::NodeType BinOp, + SmallVectorImpl &SrcOps) { SmallVector Opnds; DenseMap SrcOpMap; EVT VT = MVT::Other; @@ -19437,7 +19965,7 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC, return SDValue(); SmallVector VecIns; - if (!matchBitOpReduction(Op, ISD::OR, VecIns)) + if (!matchScalarReduction(Op, ISD::OR, VecIns)) return SDValue(); // Quit if not 128/256-bit vector. @@ -19461,8 +19989,8 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC, VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS)); } - X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE, DL, - MVT::i8); + X86CC = DAG.getTargetConstant(CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE, + DL, MVT::i8); return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, VecIns.back(), VecIns.back()); } @@ -19576,6 +20104,13 @@ static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, case X86ISD::XOR: case X86ISD::AND: return SDValue(Op.getNode(), 1); + case ISD::SSUBO: + case ISD::USUBO: { + // /USUBO/SSUBO will become a X86ISD::SUB and we can use its Z flag. + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + return DAG.getNode(X86ISD::SUB, dl, VTs, Op->getOperand(0), + Op->getOperand(1)).getValue(1); + } default: default_case: break; @@ -19766,6 +20301,63 @@ unsigned X86TargetLowering::combineRepeatedFPDivisors() const { return 2; } +SDValue +X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + SmallVectorImpl &Created) const { + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (isIntDivCheap(N->getValueType(0), Attr)) + return SDValue(N,0); // Lower SDIV as SDIV + + assert((Divisor.isPowerOf2() || (-Divisor).isPowerOf2()) && + "Unexpected divisor!"); + + // Only perform this transform if CMOV is supported otherwise the select + // below will become a branch. + if (!Subtarget.hasCMov()) + return SDValue(); + + // fold (sdiv X, pow2) + EVT VT = N->getValueType(0); + // FIXME: Support i8. + if (VT != MVT::i16 && VT != MVT::i32 && + !(Subtarget.is64Bit() && VT == MVT::i64)) + return SDValue(); + + unsigned Lg2 = Divisor.countTrailingZeros(); + + // If the divisor is 2 or -2, the default expansion is better. + if (Lg2 == 1) + return SDValue(); + + SDLoc DL(N); + SDValue N0 = N->getOperand(0); + SDValue Zero = DAG.getConstant(0, DL, VT); + APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2); + SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT); + + // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right. + SDValue Cmp = DAG.getSetCC(DL, MVT::i8, N0, Zero, ISD::SETLT); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); + SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0); + + Created.push_back(Cmp.getNode()); + Created.push_back(Add.getNode()); + Created.push_back(CMov.getNode()); + + // Divide by pow2. + SDValue SRA = + DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i64)); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (Divisor.isNonNegative()) + return SRA; + + Created.push_back(SRA.getNode()); + return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA); +} + /// Result of 'and' is compared against zero. Change to a BT node if possible. /// Returns the BT node and the condition code needed to use it. static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, @@ -19842,8 +20434,8 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, if (Src.getValueType() != BitNo.getValueType()) BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo); - X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B, - dl, MVT::i8); + X86CC = DAG.getTargetConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B, + dl, MVT::i8); return DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo); } @@ -19935,13 +20527,6 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) { ISD::CondCode SetCCOpcode = cast(CC)->get(); - // If this is a seteq make sure any build vectors of all zeros are on the RHS. - // This helps with vptestm matching. - // TODO: Should we just canonicalize the setcc during DAG combine? - if ((SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE) && - ISD::isBuildVectorAllZeros(Op0.getNode())) - std::swap(Op0, Op1); - // Prefer SETGT over SETLT. if (SetCCOpcode == ISD::SETLT) { SetCCOpcode = ISD::getSetCCSwappedOperands(SetCCOpcode); @@ -20007,7 +20592,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT, // Only do this pre-AVX since vpcmp* is no longer destructive. if (Subtarget.hasAVX()) return SDValue(); - SDValue ULEOp1 = incDecVectorConstant(Op1, DAG, false); + SDValue ULEOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/false); if (!ULEOp1) return SDValue(); Op1 = ULEOp1; @@ -20018,7 +20603,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT, // This is beneficial because materializing a constant 0 for the PCMPEQ is // probably cheaper than XOR+PCMPGT using 2 different vector constants: // cmpgt (xor X, SignMaskC) CmpC --> cmpeq (usubsat (CmpC+1), X), 0 - SDValue UGEOp1 = incDecVectorConstant(Op1, DAG, true); + SDValue UGEOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/true); if (!UGEOp1) return SDValue(); Op1 = Op0; @@ -20086,14 +20671,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, } SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getConstant(CC0, dl, MVT::i8)); + DAG.getTargetConstant(CC0, dl, MVT::i8)); SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getConstant(CC1, dl, MVT::i8)); + DAG.getTargetConstant(CC1, dl, MVT::i8)); Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); } else { // Handle all other FP comparisons here. Cmp = DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getConstant(SSECC, dl, MVT::i8)); + DAG.getTargetConstant(SSECC, dl, MVT::i8)); } // If this is SSE/AVX CMPP, bitcast the result back to integer to match the @@ -20106,16 +20691,12 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, } MVT VTOp0 = Op0.getSimpleValueType(); + (void)VTOp0; assert(VTOp0 == Op1.getSimpleValueType() && "Expected operands with same type!"); assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() && "Invalid number of packed elements for source and destination!"); - // This is being called by type legalization because v2i32 is marked custom - // for result type legalization for v2f32. - if (VTOp0 == MVT::v2i32) - return SDValue(); - // The non-AVX512 code below works under the assumption that source and // destination types are the same. assert((Subtarget.hasAVX512() || (VT == VTOp0)) && @@ -20153,7 +20734,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM; return DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getConstant(CmpMode, dl, MVT::i8)); + DAG.getTargetConstant(CmpMode, dl, MVT::i8)); } // (X & Y) != 0 --> (X & Y) == Y iff Y is power-of-2. @@ -20222,21 +20803,19 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, TLI.isOperationLegal(ISD::UMIN, VT)) { // If we have a constant operand, increment/decrement it and change the // condition to avoid an invert. - if (Cond == ISD::SETUGT && - ISD::matchUnaryPredicate(Op1, [](ConstantSDNode *C) { - return !C->getAPIntValue().isMaxValue(); - })) { + if (Cond == ISD::SETUGT) { // X > C --> X >= (C+1) --> X == umax(X, C+1) - Op1 = DAG.getNode(ISD::ADD, dl, VT, Op1, DAG.getConstant(1, dl, VT)); - Cond = ISD::SETUGE; + if (SDValue UGTOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/true)) { + Op1 = UGTOp1; + Cond = ISD::SETUGE; + } } - if (Cond == ISD::SETULT && - ISD::matchUnaryPredicate(Op1, [](ConstantSDNode *C) { - return !C->getAPIntValue().isNullValue(); - })) { + if (Cond == ISD::SETULT) { // X < C --> X <= (C-1) --> X == umin(X, C-1) - Op1 = DAG.getNode(ISD::SUB, dl, VT, Op1, DAG.getConstant(1, dl, VT)); - Cond = ISD::SETULE; + if (SDValue ULTOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/false)) { + Op1 = ULTOp1; + Cond = ISD::SETULE; + } } bool Invert = false; unsigned Opc; @@ -20360,11 +20939,11 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, return Result; } -// Try to select this as a KORTEST+SETCC if possible. -static SDValue EmitKORTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC, - const SDLoc &dl, SelectionDAG &DAG, - const X86Subtarget &Subtarget, - SDValue &X86CC) { +// Try to select this as a KORTEST+SETCC or KTEST+SETCC if possible. +static SDValue EmitAVX512Test(SDValue Op0, SDValue Op1, ISD::CondCode CC, + const SDLoc &dl, SelectionDAG &DAG, + const X86Subtarget &Subtarget, + SDValue &X86CC) { // Only support equality comparisons. if (CC != ISD::SETEQ && CC != ISD::SETNE) return SDValue(); @@ -20389,6 +20968,21 @@ static SDValue EmitKORTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC, } else return SDValue(); + // If the input is an AND, we can combine it's operands into the KTEST. + bool KTestable = false; + if (Subtarget.hasDQI() && (VT == MVT::v8i1 || VT == MVT::v16i1)) + KTestable = true; + if (Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)) + KTestable = true; + if (!isNullConstant(Op1)) + KTestable = false; + if (KTestable && Op0.getOpcode() == ISD::AND && Op0.hasOneUse()) { + SDValue LHS = Op0.getOperand(0); + SDValue RHS = Op0.getOperand(1); + X86CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8); + return DAG.getNode(X86ISD::KTEST, dl, MVT::i32, LHS, RHS); + } + // If the input is an OR, we can combine it's operands into the KORTEST. SDValue LHS = Op0; SDValue RHS = Op0; @@ -20397,7 +20991,7 @@ static SDValue EmitKORTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC, RHS = Op0.getOperand(1); } - X86CC = DAG.getConstant(X86Cond, dl, MVT::i8); + X86CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8); return DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS); } @@ -20425,9 +21019,9 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1, return PTEST; } - // Try to lower using KORTEST. - if (SDValue KORTEST = EmitKORTEST(Op0, Op1, CC, dl, DAG, Subtarget, X86CC)) - return KORTEST; + // Try to lower using KORTEST or KTEST. + if (SDValue Test = EmitAVX512Test(Op0, Op1, CC, dl, DAG, Subtarget, X86CC)) + return Test; // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of // these. @@ -20442,7 +21036,7 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1, if (Invert) { X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); - X86CC = DAG.getConstant(CCode, dl, MVT::i8); + X86CC = DAG.getTargetConstant(CCode, dl, MVT::i8); } return Op0.getOperand(1); @@ -20456,7 +21050,7 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1, SDValue EFLAGS = EmitCmp(Op0, Op1, CondCode, dl, DAG); EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG); - X86CC = DAG.getConstant(CondCode, dl, MVT::i8); + X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8); return EFLAGS; } @@ -20472,6 +21066,19 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); ISD::CondCode CC = cast(Op.getOperand(2))->get(); + // Handle f128 first, since one possible outcome is a normal integer + // comparison which gets handled by emitFlagsForSetcc. + if (Op0.getValueType() == MVT::f128) { + softenSetCCOperands(DAG, MVT::f128, Op0, Op1, CC, dl, Op0, Op1); + + // If softenSetCCOperands returned a scalar, use it. + if (!Op1.getNode()) { + assert(Op0.getValueType() == Op.getValueType() && + "Unexpected setcc expansion!"); + return Op0; + } + } + SDValue X86CC; SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC); if (!EFLAGS) @@ -20612,15 +21219,16 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { cast(Cond.getOperand(2))->get(), CondOp0, CondOp1); if (Subtarget.hasAVX512()) { - SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, - CondOp1, DAG.getConstant(SSECC, DL, MVT::i8)); + SDValue Cmp = + DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, CondOp1, + DAG.getTargetConstant(SSECC, DL, MVT::i8)); assert(!VT.isVector() && "Not a scalar type?"); return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); } if (SSECC < 8 || Subtarget.hasAVX()) { SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1, - DAG.getConstant(SSECC, DL, MVT::i8)); + DAG.getTargetConstant(SSECC, DL, MVT::i8)); // If we have AVX, we can use a variable vector select (VBLENDV) instead // of 3 logic instructions for size savings and potentially speed. @@ -20718,8 +21326,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { Cond.getOperand(1).getOpcode() == X86ISD::CMP && isNullConstant(Cond.getOperand(1).getOperand(1))) { SDValue Cmp = Cond.getOperand(1); - unsigned CondCode = - cast(Cond.getOperand(0))->getZExtValue(); + unsigned CondCode = Cond.getConstantOperandVal(0); if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && (CondCode == X86::COND_E || CondCode == X86::COND_NE)) { @@ -20807,8 +21414,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); - MVT VT = Op.getSimpleValueType(); - bool IllegalFPCMov = false; if (VT.isFloatingPoint() && !VT.isVector() && !isScalarFPTypeInSSEReg(VT)) // FPStack? @@ -20826,7 +21431,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { X86::CondCode X86Cond; std::tie(Value, Cond) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG); - CC = DAG.getConstant(X86Cond, DL, MVT::i8); + CC = DAG.getTargetConstant(X86Cond, DL, MVT::i8); AddTest = false; } @@ -20848,7 +21453,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } if (AddTest) { - CC = DAG.getConstant(X86::COND_NE, DL, MVT::i8); + CC = DAG.getTargetConstant(X86::COND_NE, DL, MVT::i8); Cond = EmitCmp(Cond, DAG.getConstant(0, DL, Cond.getValueType()), X86::COND_NE, DL, DAG); } @@ -20864,9 +21469,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) && (isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && (isNullConstant(Op1) || isNullConstant(Op2))) { - SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), - DAG.getConstant(X86::COND_B, DL, MVT::i8), - Cond); + SDValue Res = + DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), + DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), Cond); if (isAllOnesConstant(Op1) != (CondCode == X86::COND_B)) return DAG.getNOT(DL, Res, Res.getValueType()); return Res; @@ -21037,8 +21642,8 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, // pre-AVX2 256-bit extensions need to be split into 128-bit instructions. if (Subtarget.hasAVX()) { assert(VT.is256BitVector() && "256-bit vector expected"); - int HalfNumElts = NumElts / 2; - MVT HalfVT = MVT::getVectorVT(SVT, HalfNumElts); + MVT HalfVT = VT.getHalfNumVectorElementsVT(); + int HalfNumElts = HalfVT.getVectorNumElements(); unsigned NumSrcElts = InVT.getVectorNumElements(); SmallVector HiMask(NumSrcElts, SM_SentinelUndef); @@ -21081,7 +21686,7 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, unsigned SignExtShift = DestWidth - InSVT.getSizeInBits(); SignExt = DAG.getNode(X86ISD::VSRAI, dl, DestVT, Curr, - DAG.getConstant(SignExtShift, dl, MVT::i8)); + DAG.getTargetConstant(SignExtShift, dl, MVT::i8)); } if (VT == MVT::v2i64) { @@ -21119,7 +21724,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget, // Custom legalize v8i8->v8i64 on CPUs without avx512bw. if (InVT == MVT::v8i8) { - if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64) + if (VT != MVT::v8i64) return SDValue(); In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), @@ -21138,10 +21743,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget, // for v4i32 the high shuffle mask will be {2, 3, -1, -1} // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32 // concat the vectors to original VT - - MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), - VT.getVectorNumElements() / 2); - + MVT HalfVT = VT.getHalfNumVectorElementsVT(); SDValue OpLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, In); unsigned NumElems = InVT.getVectorNumElements(); @@ -21165,7 +21767,7 @@ static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG) { // Splitting volatile memory ops is not allowed unless the operation was not // legal to begin with. We are assuming the input op is legal (this transform // is only used for targets with AVX). - if (Store->isVolatile()) + if (!Store->isSimple()) return SDValue(); MVT StoreVT = StoredVal.getSimpleValueType(); @@ -21201,7 +21803,7 @@ static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, // Splitting volatile memory ops is not allowed unless the operation was not // legal to begin with. We are assuming the input op is legal (this transform // is only used for targets with AVX). - if (Store->isVolatile()) + if (!Store->isSimple()) return SDValue(); MVT StoreSVT = StoreVT.getScalarType(); @@ -21266,14 +21868,13 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, return SDValue(); } + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 && "Unexpected VT"); - if (DAG.getTargetLoweringInfo().getTypeAction(*DAG.getContext(), StoreVT) != - TargetLowering::TypeWidenVector) - return SDValue(); + assert(TLI.getTypeAction(*DAG.getContext(), StoreVT) == + TargetLowering::TypeWidenVector && "Unexpected type action!"); - MVT WideVT = MVT::getVectorVT(StoreVT.getVectorElementType(), - StoreVT.getVectorNumElements() * 2); + EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StoreVT); StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal, DAG.getUNDEF(StoreVT)); @@ -21313,11 +21914,10 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget, LoadSDNode *Ld = cast(Op.getNode()); SDLoc dl(Ld); - EVT MemVT = Ld->getMemoryVT(); // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 loads. if (RegVT.getVectorElementType() == MVT::i1) { - assert(EVT(RegVT) == MemVT && "Expected non-extending load"); + assert(EVT(RegVT) == Ld->getMemoryVT() && "Expected non-extending load"); assert(RegVT.getVectorNumElements() <= 8 && "Unexpected VT"); assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() && "Expected AVX512F without AVX512DQI"); @@ -21336,176 +21936,7 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget, return DAG.getMergeValues({Val, NewLd.getValue(1)}, dl); } - // Nothing useful we can do without SSE2 shuffles. - assert(Subtarget.hasSSE2() && "We only custom lower sext loads with SSE2."); - - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - unsigned RegSz = RegVT.getSizeInBits(); - - ISD::LoadExtType Ext = Ld->getExtensionType(); - - assert((Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD) - && "Only anyext and sext are currently implemented."); - assert(MemVT != RegVT && "Cannot extend to the same type"); - assert(MemVT.isVector() && "Must load a vector from memory"); - - unsigned NumElems = RegVT.getVectorNumElements(); - unsigned MemSz = MemVT.getSizeInBits(); - assert(RegSz > MemSz && "Register size must be greater than the mem size"); - - if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget.hasInt256()) { - // The only way in which we have a legal 256-bit vector result but not the - // integer 256-bit operations needed to directly lower a sextload is if we - // have AVX1 but not AVX2. In that case, we can always emit a sextload to - // a 128-bit vector and a normal sign_extend to 256-bits that should get - // correctly legalized. We do this late to allow the canonical form of - // sextload to persist throughout the rest of the DAG combiner -- it wants - // to fold together any extensions it can, and so will fuse a sign_extend - // of an sextload into a sextload targeting a wider value. - SDValue Load; - if (MemSz == 128) { - // Just switch this to a normal load. - assert(TLI.isTypeLegal(MemVT) && "If the memory type is a 128-bit type, " - "it must be a legal 128-bit vector " - "type!"); - Load = DAG.getLoad(MemVT, dl, Ld->getChain(), Ld->getBasePtr(), - Ld->getPointerInfo(), Ld->getAlignment(), - Ld->getMemOperand()->getFlags()); - } else { - assert(MemSz < 128 && - "Can't extend a type wider than 128 bits to a 256 bit vector!"); - // Do an sext load to a 128-bit vector type. We want to use the same - // number of elements, but elements half as wide. This will end up being - // recursively lowered by this routine, but will succeed as we definitely - // have all the necessary features if we're using AVX1. - EVT HalfEltVT = - EVT::getIntegerVT(*DAG.getContext(), RegVT.getScalarSizeInBits() / 2); - EVT HalfVecVT = EVT::getVectorVT(*DAG.getContext(), HalfEltVT, NumElems); - Load = - DAG.getExtLoad(Ext, dl, HalfVecVT, Ld->getChain(), Ld->getBasePtr(), - Ld->getPointerInfo(), MemVT, Ld->getAlignment(), - Ld->getMemOperand()->getFlags()); - } - - // Replace chain users with the new chain. - assert(Load->getNumValues() == 2 && "Loads must carry a chain!"); - - // Finally, do a normal sign-extend to the desired register. - SDValue SExt = DAG.getSExtOrTrunc(Load, dl, RegVT); - return DAG.getMergeValues({SExt, Load.getValue(1)}, dl); - } - - // All sizes must be a power of two. - assert(isPowerOf2_32(RegSz * MemSz * NumElems) && - "Non-power-of-two elements are not custom lowered!"); - - // Attempt to load the original value using scalar loads. - // Find the largest scalar type that divides the total loaded size. - MVT SclrLoadTy = MVT::i8; - for (MVT Tp : MVT::integer_valuetypes()) { - if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) { - SclrLoadTy = Tp; - } - } - - // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64. - if (TLI.isTypeLegal(MVT::f64) && SclrLoadTy.getSizeInBits() < 64 && - (64 <= MemSz)) - SclrLoadTy = MVT::f64; - - // Calculate the number of scalar loads that we need to perform - // in order to load our vector from memory. - unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits(); - - assert((Ext != ISD::SEXTLOAD || NumLoads == 1) && - "Can only lower sext loads with a single scalar load!"); - - unsigned loadRegSize = RegSz; - if (Ext == ISD::SEXTLOAD && RegSz >= 256) - loadRegSize = 128; - - // If we don't have BWI we won't be able to create the shuffle needed for - // v8i8->v8i64. - if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 && - MemVT == MVT::v8i8) - loadRegSize = 128; - - // Represent our vector as a sequence of elements which are the - // largest scalar that we can load. - EVT LoadUnitVecVT = EVT::getVectorVT( - *DAG.getContext(), SclrLoadTy, loadRegSize / SclrLoadTy.getSizeInBits()); - - // Represent the data using the same element type that is stored in - // memory. In practice, we ''widen'' MemVT. - EVT WideVecVT = - EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), - loadRegSize / MemVT.getScalarSizeInBits()); - - assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() && - "Invalid vector type"); - - // We can't shuffle using an illegal type. - assert(TLI.isTypeLegal(WideVecVT) && - "We only lower types that form legal widened vector types"); - - SmallVector Chains; - SDValue Ptr = Ld->getBasePtr(); - unsigned OffsetInc = SclrLoadTy.getSizeInBits() / 8; - SDValue Increment = DAG.getConstant(OffsetInc, dl, - TLI.getPointerTy(DAG.getDataLayout())); - SDValue Res = DAG.getUNDEF(LoadUnitVecVT); - - unsigned Offset = 0; - for (unsigned i = 0; i < NumLoads; ++i) { - unsigned NewAlign = MinAlign(Ld->getAlignment(), Offset); - - // Perform a single load. - SDValue ScalarLoad = - DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr, - Ld->getPointerInfo().getWithOffset(Offset), - NewAlign, Ld->getMemOperand()->getFlags()); - Chains.push_back(ScalarLoad.getValue(1)); - // Create the first element type using SCALAR_TO_VECTOR in order to avoid - // another round of DAGCombining. - if (i == 0) - Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad); - else - Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res, - ScalarLoad, DAG.getIntPtrConstant(i, dl)); - - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); - Offset += OffsetInc; - } - - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); - - // Bitcast the loaded value to a vector of the original element type, in - // the size of the target vector type. - SDValue SlicedVec = DAG.getBitcast(WideVecVT, Res); - unsigned SizeRatio = RegSz / MemSz; - - if (Ext == ISD::SEXTLOAD) { - SDValue Sext = getExtendInVec(ISD::SIGN_EXTEND, dl, RegVT, SlicedVec, DAG); - return DAG.getMergeValues({Sext, TF}, dl); - } - - if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 && - MemVT == MVT::v8i8) { - SDValue Sext = getExtendInVec(ISD::ZERO_EXTEND, dl, RegVT, SlicedVec, DAG); - return DAG.getMergeValues({Sext, TF}, dl); - } - - // Redistribute the loaded elements into the different locations. - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i * SizeRatio] = i; - - SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec, - DAG.getUNDEF(WideVecVT), ShuffleVec); - - // Bitcast to the requested type. - Shuff = DAG.getBitcast(RegVT, Shuff); - return DAG.getMergeValues({Shuff, TF}, dl); + return SDValue(); } /// Return true if node is an ISD::AND or ISD::OR of two X86ISD::SETCC nodes @@ -21610,7 +22041,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { if (Inverted) X86Cond = X86::GetOppositeBranchCondition(X86Cond); - CC = DAG.getConstant(X86Cond, dl, MVT::i8); + CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8); addTest = false; } else { unsigned CondOpc; @@ -21638,10 +22069,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { if (Cmp == Cond.getOperand(1).getOperand(1) && isX86LogicalCmp(Cmp) && Op.getNode()->hasOneUse()) { - X86::CondCode CCode = - (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); - CCode = X86::GetOppositeBranchCondition(CCode); - CC = DAG.getConstant(CCode, dl, MVT::i8); + X86::CondCode CCode0 = + (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); + CCode0 = X86::GetOppositeBranchCondition(CCode0); + CC = DAG.getTargetConstant(CCode0, dl, MVT::i8); SDNode *User = *Op.getNode()->use_begin(); // Look for an unconditional branch following this conditional branch. // We need this because we need to reverse the successors in order @@ -21654,12 +22085,12 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { (void)NewBR; Dest = FalseBB; - Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), - Chain, Dest, CC, Cmp); - X86::CondCode CCode = - (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0); - CCode = X86::GetOppositeBranchCondition(CCode); - CC = DAG.getConstant(CCode, dl, MVT::i8); + Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, + Dest, CC, Cmp); + X86::CondCode CCode1 = + (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0); + CCode1 = X86::GetOppositeBranchCondition(CCode1); + CC = DAG.getTargetConstant(CCode1, dl, MVT::i8); Cond = Cmp; addTest = false; } @@ -21672,7 +22103,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { X86::CondCode CCode = (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); - CC = DAG.getConstant(CCode, dl, MVT::i8); + CC = DAG.getTargetConstant(CCode, dl, MVT::i8); Cond = Cond.getOperand(0).getOperand(1); addTest = false; } else if (Cond.getOpcode() == ISD::SETCC && @@ -21698,10 +22129,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, Cond.getOperand(0), Cond.getOperand(1)); Cmp = ConvertCmpIfNecessary(Cmp, DAG); - CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8); + CC = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8); Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cmp); - CC = DAG.getConstant(X86::COND_P, dl, MVT::i8); + CC = DAG.getTargetConstant(X86::COND_P, dl, MVT::i8); Cond = Cmp; addTest = false; } @@ -21714,10 +22145,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, Cond.getOperand(0), Cond.getOperand(1)); Cmp = ConvertCmpIfNecessary(Cmp, DAG); - CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8); + CC = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8); Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cmp); - CC = DAG.getConstant(X86::COND_P, dl, MVT::i8); + CC = DAG.getTargetConstant(X86::COND_P, dl, MVT::i8); Cond = Cmp; addTest = false; } @@ -21742,7 +22173,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { if (addTest) { X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE; - CC = DAG.getConstant(X86Cond, dl, MVT::i8); + CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8); Cond = EmitCmp(Cond, DAG.getConstant(0, dl, Cond.getValueType()), X86Cond, dl, DAG); } @@ -21770,7 +22201,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDNode *Node = Op.getNode(); SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); - unsigned Align = cast(Op.getOperand(2))->getZExtValue(); + unsigned Align = Op.getConstantOperandVal(2); EVT VT = Node->getValueType(0); // Chain the dynamic stack allocation so that it doesn't modify the stack @@ -21811,7 +22242,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, } const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); - unsigned Vreg = MRI.createVirtualRegister(AddrRegClass); + Register Vreg = MRI.createVirtualRegister(AddrRegClass); Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, DAG.getRegister(Vreg, SPTy)); @@ -21821,7 +22252,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, MF.getInfo()->setHasWinAlloca(true); const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); - unsigned SPReg = RegInfo->getStackRegister(); + Register SPReg = RegInfo->getStackRegister(); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy); Chain = SP.getValue(1); @@ -22076,7 +22507,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, } return DAG.getNode(Opc, dl, VT, SrcOp, - DAG.getConstant(ShiftAmt, dl, MVT::i8)); + DAG.getTargetConstant(ShiftAmt, dl, MVT::i8)); } /// Handle vector element shifts where the shift amount may or may not be a @@ -22121,7 +22552,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), MVT::v2i64, ShAmt); else { - SDValue ByteShift = DAG.getConstant( + SDValue ByteShift = DAG.getTargetConstant( (128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8); ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt); ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, @@ -22308,13 +22739,21 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // Helper to detect if the operand is CUR_DIRECTION rounding mode. auto isRoundModeCurDirection = [](SDValue Rnd) { if (auto *C = dyn_cast(Rnd)) - return C->getZExtValue() == X86::STATIC_ROUNDING::CUR_DIRECTION; + return C->getAPIntValue() == X86::STATIC_ROUNDING::CUR_DIRECTION; return false; }; auto isRoundModeSAE = [](SDValue Rnd) { - if (auto *C = dyn_cast(Rnd)) - return C->getZExtValue() == X86::STATIC_ROUNDING::NO_EXC; + if (auto *C = dyn_cast(Rnd)) { + unsigned RC = C->getZExtValue(); + if (RC & X86::STATIC_ROUNDING::NO_EXC) { + // Clear the NO_EXC bit and check remaining bits. + RC ^= X86::STATIC_ROUNDING::NO_EXC; + // As a convenience we allow no other bits or explicitly + // current direction. + return RC == 0 || RC == X86::STATIC_ROUNDING::CUR_DIRECTION; + } + } return false; }; @@ -22335,7 +22774,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, }; SDLoc dl(Op); - unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); + unsigned IntNo = Op.getConstantOperandVal(0); MVT VT = Op.getSimpleValueType(); const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo); if (IntrData) { @@ -22411,9 +22850,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); - if (IntrData->Type == INTR_TYPE_3OP_IMM8) - Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3); - // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. @@ -22666,7 +23102,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case CMP_MASK_CC: { MVT MaskVT = Op.getSimpleValueType(); SDValue CC = Op.getOperand(3); - CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC); // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. @@ -22685,7 +23120,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case CMP_MASK_SCALAR_CC: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); - SDValue CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(3)); + SDValue CC = Op.getOperand(3); SDValue Mask = Op.getOperand(4); SDValue Cmp; @@ -22750,16 +23185,16 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case COMI_RM: { // Comparison intrinsics with Sae SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); - unsigned CondVal = cast(Op.getOperand(3))->getZExtValue(); + unsigned CondVal = Op.getConstantOperandVal(3); SDValue Sae = Op.getOperand(4); SDValue FCmp; if (isRoundModeCurDirection(Sae)) FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS, - DAG.getConstant(CondVal, dl, MVT::i8)); + DAG.getTargetConstant(CondVal, dl, MVT::i8)); else if (isRoundModeSAE(Sae)) FCmp = DAG.getNode(X86ISD::FSETCCM_SAE, dl, MVT::v1i1, LHS, RHS, - DAG.getConstant(CondVal, dl, MVT::i8), Sae); + DAG.getTargetConstant(CondVal, dl, MVT::i8), Sae); else return SDValue(); // Need to fill with zeros to ensure the bitcast will produce zeroes @@ -22819,9 +23254,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode"); // Clear the upper bits of the rounding immediate so that the legacy // intrinsic can't trigger the scaling behavior of VRNDSCALE. - SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32, - Op.getOperand(2), - DAG.getConstant(0xf, dl, MVT::i32)); + auto Round = cast(Op.getOperand(2)); + SDValue RoundingMode = + DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32); return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), RoundingMode); } @@ -22829,12 +23264,22 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, assert(IntrData->Opc0 == X86ISD::VRNDSCALES && "Unexpected opcode"); // Clear the upper bits of the rounding immediate so that the legacy // intrinsic can't trigger the scaling behavior of VRNDSCALE. - SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32, - Op.getOperand(3), - DAG.getConstant(0xf, dl, MVT::i32)); + auto Round = cast(Op.getOperand(3)); + SDValue RoundingMode = + DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32); return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), RoundingMode); } + case BEXTRI: { + assert(IntrData->Opc0 == X86ISD::BEXTR && "Unexpected opcode"); + + // The control is a TargetConstant, but we need to convert it to a + // ConstantSDNode. + uint64_t Imm = Op.getConstantOperandVal(2); + SDValue Control = DAG.getConstant(Imm, dl, Op.getValueType()); + return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), + Op.getOperand(1), Control); + } // ADC/ADCX/SBB case ADX: { SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32); @@ -23165,6 +23610,61 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, MaskVT, Operation); return DAG.getMergeValues({Result0, Result1}, DL); } + case Intrinsic::x86_mmx_pslli_w: + case Intrinsic::x86_mmx_pslli_d: + case Intrinsic::x86_mmx_pslli_q: + case Intrinsic::x86_mmx_psrli_w: + case Intrinsic::x86_mmx_psrli_d: + case Intrinsic::x86_mmx_psrli_q: + case Intrinsic::x86_mmx_psrai_w: + case Intrinsic::x86_mmx_psrai_d: { + SDLoc DL(Op); + SDValue ShAmt = Op.getOperand(2); + // If the argument is a constant, convert it to a target constant. + if (auto *C = dyn_cast(ShAmt)) { + ShAmt = DAG.getTargetConstant(C->getZExtValue(), DL, MVT::i32); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), + Op.getOperand(0), Op.getOperand(1), ShAmt); + } + + unsigned NewIntrinsic; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_mmx_pslli_w: + NewIntrinsic = Intrinsic::x86_mmx_psll_w; + break; + case Intrinsic::x86_mmx_pslli_d: + NewIntrinsic = Intrinsic::x86_mmx_psll_d; + break; + case Intrinsic::x86_mmx_pslli_q: + NewIntrinsic = Intrinsic::x86_mmx_psll_q; + break; + case Intrinsic::x86_mmx_psrli_w: + NewIntrinsic = Intrinsic::x86_mmx_psrl_w; + break; + case Intrinsic::x86_mmx_psrli_d: + NewIntrinsic = Intrinsic::x86_mmx_psrl_d; + break; + case Intrinsic::x86_mmx_psrli_q: + NewIntrinsic = Intrinsic::x86_mmx_psrl_q; + break; + case Intrinsic::x86_mmx_psrai_w: + NewIntrinsic = Intrinsic::x86_mmx_psra_w; + break; + case Intrinsic::x86_mmx_psrai_d: + NewIntrinsic = Intrinsic::x86_mmx_psra_d; + break; + } + + // The vector shift intrinsics with scalars uses 32b shift amounts but + // the sse2/mmx shift instructions reads 64 bits. Copy the 32 bits to an + // MMX register. + ShAmt = DAG.getNode(X86ISD::MMX_MOVW2D, DL, MVT::x86mmx, ShAmt); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), + DAG.getConstant(NewIntrinsic, DL, MVT::i32), + Op.getOperand(1), ShAmt); + + } } } @@ -23177,7 +23677,9 @@ static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, // Scale must be constant. if (!C) return SDValue(); - SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, + TLI.getPointerTy(DAG.getDataLayout())); EVT MaskVT = Mask.getValueType().changeVectorElementTypeToInteger(); SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); // If source is undef or we know it won't be used, use a zero vector @@ -23204,7 +23706,9 @@ static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG, // Scale must be constant. if (!C) return SDValue(); - SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, + TLI.getPointerTy(DAG.getDataLayout())); unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(), VT.getVectorNumElements()); MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts); @@ -23238,7 +23742,9 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, // Scale must be constant. if (!C) return SDValue(); - SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, + TLI.getPointerTy(DAG.getDataLayout())); unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(), Src.getSimpleValueType().getVectorNumElements()); MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts); @@ -23266,7 +23772,9 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, // Scale must be constant. if (!C) return SDValue(); - SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, + TLI.getPointerTy(DAG.getDataLayout())); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); MVT MaskVT = @@ -23435,8 +23943,7 @@ EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); - + unsigned IntNo = Op.getConstantOperandVal(1); const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo); if (!IntrData) { switch (IntNo) { @@ -23538,10 +24045,10 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, // If the value returned by RDRAND/RDSEED was valid (CF=1), return 1. // Otherwise return the value from Rand, which is always 0, casted to i32. - SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)), - DAG.getConstant(1, dl, Op->getValueType(1)), - DAG.getConstant(X86::COND_B, dl, MVT::i8), - SDValue(Result.getNode(), 1) }; + SDValue Ops[] = {DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)), + DAG.getConstant(1, dl, Op->getValueType(1)), + DAG.getTargetConstant(X86::COND_B, dl, MVT::i8), + SDValue(Result.getNode(), 1)}; SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, Op->getValueType(1), Ops); // Return { result, isValid, chain }. @@ -23581,8 +24088,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, Scale, Chain, Subtarget); } case PREFETCH: { - SDValue Hint = Op.getOperand(6); - unsigned HintVal = cast(Hint)->getZExtValue(); + const APInt &HintVal = Op.getConstantOperandAPInt(6); assert((HintVal == 2 || HintVal == 3) && "Wrong prefetch hint in intrinsic: should be 2 or 3"); unsigned Opcode = (HintVal == 2 ? IntrData->Opc1 : IntrData->Opc0); @@ -23678,7 +24184,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); - unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); SDLoc dl(Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); @@ -23730,7 +24236,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction()); SDLoc dl(Op); // FIXME probably not meaningful - unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); assert(((FrameReg == X86::RBP && VT == MVT::i64) || (FrameReg == X86::EBP && VT == MVT::i32)) && "Invalid Frame Register!"); @@ -23743,12 +24249,11 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { +Register X86TargetLowering::getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const { const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); - const MachineFunction &MF = DAG.getMachineFunction(); - unsigned Reg = StringSwitch(RegName) + Register Reg = StringSwitch(RegName) .Case("esp", X86::ESP) .Case("rsp", X86::RSP) .Case("ebp", X86::EBP) @@ -23762,8 +24267,7 @@ unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT, #ifndef NDEBUG else { const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); - unsigned FrameReg = - RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction()); + Register FrameReg = RegInfo->getPtrSizedFrameRegister(MF); assert((FrameReg == X86::EBP || FrameReg == X86::RBP) && "Invalid Frame Register!"); } @@ -23809,7 +24313,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(DAG.getDataLayout()); const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); - unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); + Register FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) || (FrameReg == X86::EBP && PtrVT == MVT::i32)) && "Invalid Frame Register!"); @@ -23967,6 +24471,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, case CallingConv::X86_FastCall: case CallingConv::X86_ThisCall: case CallingConv::Fast: + case CallingConv::Tail: // Pass 'nest' parameter in EAX. // Must be kept in sync with X86CallingConv.td NestReg = X86::EAX; @@ -24279,12 +24784,9 @@ static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget, if (Opc == ISD::CTLZ) { // If src is zero (i.e. bsr sets ZF), returns NumBits. - SDValue Ops[] = { - Op, - DAG.getConstant(NumBits + NumBits - 1, dl, OpVT), - DAG.getConstant(X86::COND_E, dl, MVT::i8), - Op.getValue(1) - }; + SDValue Ops[] = {Op, DAG.getConstant(NumBits + NumBits - 1, dl, OpVT), + DAG.getTargetConstant(X86::COND_E, dl, MVT::i8), + Op.getValue(1)}; Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops); } @@ -24312,12 +24814,9 @@ static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget, Op = DAG.getNode(X86ISD::BSF, dl, VTs, N0); // If src is zero (i.e. bsf sets ZF), returns NumBits. - SDValue Ops[] = { - Op, - DAG.getConstant(NumBits, dl, VT), - DAG.getConstant(X86::COND_E, dl, MVT::i8), - Op.getValue(1) - }; + SDValue Ops[] = {Op, DAG.getConstant(NumBits, dl, VT), + DAG.getTargetConstant(X86::COND_E, dl, MVT::i8), + Op.getValue(1)}; return DAG.getNode(X86ISD::CMOV, dl, VT, Ops); } @@ -24453,7 +24952,7 @@ static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget, SDValue N0 = Op.getOperand(0); SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32), DAG.getConstant(0, DL, VT), N0); - SDValue Ops[] = {N0, Neg, DAG.getConstant(X86::COND_GE, DL, MVT::i8), + SDValue Ops[] = {N0, Neg, DAG.getTargetConstant(X86::COND_GE, DL, MVT::i8), SDValue(Neg.getNode(), 1)}; return DAG.getNode(X86ISD::CMOV, DL, VT, Ops); } @@ -25033,7 +25532,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, // Optimize shl/srl/sra with constant shift amount. APInt APIntShiftAmt; - if (!isConstantSplat(Amt, APIntShiftAmt)) + if (!X86::isConstantSplat(Amt, APIntShiftAmt)) return SDValue(); // If the shift amount is out of range, return undef. @@ -25220,7 +25719,7 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl, } ConstantSDNode *ND = cast(Op); - APInt C(SVTBits, ND->getAPIntValue().getZExtValue()); + APInt C(SVTBits, ND->getZExtValue()); uint64_t ShAmt = C.getZExtValue(); if (ShAmt >= SVTBits) { Elts.push_back(DAG.getUNDEF(SVT)); @@ -25502,7 +26001,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, (VT == MVT::v32i8 && Subtarget.hasInt256())) && !Subtarget.hasXOP()) { int NumElts = VT.getVectorNumElements(); - SDValue Cst8 = DAG.getConstant(8, dl, MVT::i8); + SDValue Cst8 = DAG.getTargetConstant(8, dl, MVT::i8); // Extend constant shift amount to vXi16 (it doesn't matter if the type // isn't legal). @@ -25774,7 +26273,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI); uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits); return DAG.getNode(Op, DL, VT, R, - DAG.getConstant(RotateAmt, DL, MVT::i8)); + DAG.getTargetConstant(RotateAmt, DL, MVT::i8)); } // Else, fall-back on VPROLV/VPRORV. @@ -25795,7 +26294,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, if (0 <= CstSplatIndex) { uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits); return DAG.getNode(X86ISD::VROTLI, DL, VT, R, - DAG.getConstant(RotateAmt, DL, MVT::i8)); + DAG.getTargetConstant(RotateAmt, DL, MVT::i8)); } // Use general rotate by variable (per-element). @@ -26032,7 +26531,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { // If this is a canonical idempotent atomicrmw w/no uses, we have a better // lowering available in lowerAtomicArith. - // TODO: push more cases through this path. + // TODO: push more cases through this path. if (auto *C = dyn_cast(AI->getValOperand())) if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() && AI->use_empty()) @@ -26087,10 +26586,22 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { return Loaded; } +bool X86TargetLowering::lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const { + if (!SI.isUnordered()) + return false; + return ExperimentalUnorderedISEL; +} +bool X86TargetLowering::lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { + if (!LI.isUnordered()) + return false; + return ExperimentalUnorderedISEL; +} + + /// Emit a locked operation on a stack location which does not change any /// memory location, but does involve a lock prefix. Location is chosen to be /// a) very likely accessed only by a single thread to minimize cache traffic, -/// and b) definitely dereferenceable. Returns the new Chain result. +/// and b) definitely dereferenceable. Returns the new Chain result. static SDValue emitLockedStackOp(SelectionDAG &DAG, const X86Subtarget &Subtarget, SDValue Chain, SDLoc DL) { @@ -26099,22 +26610,22 @@ static SDValue emitLockedStackOp(SelectionDAG &DAG, // operations issued by the current processor. As such, the location // referenced is not relevant for the ordering properties of the instruction. // See: Intel® 64 and IA-32 ArchitecturesSoftware Developer’s Manual, - // 8.2.3.9 Loads and Stores Are Not Reordered with Locked Instructions + // 8.2.3.9 Loads and Stores Are Not Reordered with Locked Instructions // 2) Using an immediate operand appears to be the best encoding choice // here since it doesn't require an extra register. // 3) OR appears to be very slightly faster than ADD. (Though, the difference // is small enough it might just be measurement noise.) // 4) When choosing offsets, there are several contributing factors: // a) If there's no redzone, we default to TOS. (We could allocate a cache - // line aligned stack object to improve this case.) + // line aligned stack object to improve this case.) // b) To minimize our chances of introducing a false dependence, we prefer - // to offset the stack usage from TOS slightly. + // to offset the stack usage from TOS slightly. // c) To minimize concerns about cross thread stack usage - in particular, // the idiomatic MyThreadPool.run([&StackVars]() {...}) pattern which // captures state in the TOS frame and accesses it from many threads - // we want to use an offset such that the offset is in a distinct cache // line from the TOS frame. - // + // // For a general discussion of the tradeoffs and benchmark results, see: // https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ @@ -26155,10 +26666,10 @@ static SDValue emitLockedStackOp(SelectionDAG &DAG, static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); - AtomicOrdering FenceOrdering = static_cast( - cast(Op.getOperand(1))->getZExtValue()); - SyncScope::ID FenceSSID = static_cast( - cast(Op.getOperand(2))->getZExtValue()); + AtomicOrdering FenceOrdering = + static_cast(Op.getConstantOperandVal(1)); + SyncScope::ID FenceSSID = + static_cast(Op.getConstantOperandVal(2)); // The only fence that needs an instruction is a sequentially-consistent // cross-thread fence. @@ -26167,7 +26678,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget, if (Subtarget.hasMFence()) return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); - SDValue Chain = Op.getOperand(0); + SDValue Chain = Op.getOperand(0); return emitLockedStackOp(DAG, Subtarget, Chain, dl); } @@ -26218,6 +26729,17 @@ static SDValue getPMOVMSKB(const SDLoc &DL, SDValue V, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT InVT = V.getSimpleValueType(); + if (InVT == MVT::v64i8) { + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVector(V, DL); + Lo = getPMOVMSKB(DL, Lo, DAG, Subtarget); + Hi = getPMOVMSKB(DL, Hi, DAG, Subtarget); + Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Lo); + Hi = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Hi); + Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi, + DAG.getConstant(32, DL, MVT::i8)); + return DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi); + } if (InVT == MVT::v32i8 && !Subtarget.hasInt256()) { SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitVector(V, DL); @@ -26258,8 +26780,7 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget, SDLoc dl(Op); SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitVector(Op.getOperand(0), dl); - EVT CastVT = MVT::getVectorVT(DstVT.getVectorElementType(), - DstVT.getVectorNumElements() / 2); + MVT CastVT = DstVT.getHalfNumVectorElementsVT(); Lo = DAG.getBitcast(CastVT, Lo); Hi = DAG.getBitcast(CastVT, Hi); return DAG.getNode(ISD::CONCAT_VECTORS, dl, DstVT, Lo, Hi); @@ -26275,53 +26796,37 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget, return DAG.getZExtOrTrunc(V, DL, DstVT); } - if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 || - SrcVT == MVT::i64) { - assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); - if (DstVT != MVT::f64 && DstVT != MVT::i64 && - !(DstVT == MVT::x86mmx && SrcVT.isVector())) - // This conversion needs to be expanded. - return SDValue(); + assert((SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 || + SrcVT == MVT::i64) && "Unexpected VT!"); - SDLoc dl(Op); - if (SrcVT.isVector()) { - // Widen the vector in input in the case of MVT::v2i32. - // Example: from MVT::v2i32 to MVT::v4i32. - MVT NewVT = MVT::getVectorVT(SrcVT.getVectorElementType(), - SrcVT.getVectorNumElements() * 2); - Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src, - DAG.getUNDEF(SrcVT)); - } else { - assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() && - "Unexpected source type in LowerBITCAST"); - Src = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src); - } + assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); + if (!(DstVT == MVT::f64 && SrcVT == MVT::i64) && + !(DstVT == MVT::x86mmx && SrcVT.isVector())) + // This conversion needs to be expanded. + return SDValue(); - MVT V2X64VT = DstVT == MVT::f64 ? MVT::v2f64 : MVT::v2i64; - Src = DAG.getNode(ISD::BITCAST, dl, V2X64VT, Src); - - if (DstVT == MVT::x86mmx) - return DAG.getNode(X86ISD::MOVDQ2Q, dl, DstVT, Src); - - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DstVT, Src, - DAG.getIntPtrConstant(0, dl)); + SDLoc dl(Op); + if (SrcVT.isVector()) { + // Widen the vector in input in the case of MVT::v2i32. + // Example: from MVT::v2i32 to MVT::v4i32. + MVT NewVT = MVT::getVectorVT(SrcVT.getVectorElementType(), + SrcVT.getVectorNumElements() * 2); + Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src, + DAG.getUNDEF(SrcVT)); + } else { + assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() && + "Unexpected source type in LowerBITCAST"); + Src = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src); } - assert(Subtarget.is64Bit() && !Subtarget.hasSSE2() && - Subtarget.hasMMX() && "Unexpected custom BITCAST"); - assert((DstVT == MVT::i64 || - (DstVT.isVector() && DstVT.getSizeInBits()==64)) && - "Unexpected custom BITCAST"); - // i64 <=> MMX conversions are Legal. - if (SrcVT==MVT::i64 && DstVT.isVector()) - return Op; - if (DstVT==MVT::i64 && SrcVT.isVector()) - return Op; - // MMX <=> MMX conversions are Legal. - if (SrcVT.isVector() && DstVT.isVector()) - return Op; - // All other conversions need to be expanded. - return SDValue(); + MVT V2X64VT = DstVT == MVT::f64 ? MVT::v2f64 : MVT::v2i64; + Src = DAG.getNode(ISD::BITCAST, dl, V2X64VT, Src); + + if (DstVT == MVT::x86mmx) + return DAG.getNode(X86ISD::MOVDQ2Q, dl, DstVT, Src); + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DstVT, Src, + DAG.getIntPtrConstant(0, dl)); } /// Compute the horizontal sum of bytes in V for the elements of VT. @@ -26549,6 +27054,13 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget, SDValue In = Op.getOperand(0); SDLoc DL(Op); + // Split v8i64/v16i32 without BWI so that we can still use the PSHUFB + // lowering. + if (VT == MVT::v8i64 || VT == MVT::v16i32) { + assert(!Subtarget.hasBWI() && "BWI should Expand BITREVERSE"); + return Lower512IntUnary(Op, DAG); + } + unsigned NumElts = VT.getVectorNumElements(); assert(VT.getScalarType() == MVT::i8 && "Only byte vector BITREVERSE supported"); @@ -26656,12 +27168,12 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG, // seq_cst which isn't SingleThread, everything just needs to be preserved // during codegen and then dropped. Note that we expect (but don't assume), // that orderings other than seq_cst and acq_rel have been canonicalized to - // a store or load. + // a store or load. if (AN->getOrdering() == AtomicOrdering::SequentiallyConsistent && AN->getSyncScopeID() == SyncScope::System) { // Prefer a locked operation against a stack location to minimize cache // traffic. This assumes that stack locations are very likely to be - // accessed only by the owning thread. + // accessed only by the owning thread. SDValue NewChain = emitLockedStackOp(DAG, Subtarget, Chain, DL); assert(!N->hasAnyUseOfValue(0)); // NOTE: The getUNDEF is needed to give something for the unused result 0. @@ -26886,12 +27398,13 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget, SDValue Chain = N->getChain(); SDValue BasePtr = N->getBasePtr(); - if (VT == MVT::v2f32) { + if (VT == MVT::v2f32 || VT == MVT::v2i32) { assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); // If the index is v2i64 and we have VLX we can use xmm for data and index. if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) { - Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, - DAG.getUNDEF(MVT::v2f32)); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Src, DAG.getUNDEF(VT)); SDVTList VTs = DAG.getVTList(MVT::v2i1, MVT::Other); SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; SDValue NewScatter = DAG.getTargetMemSDNode( @@ -26901,30 +27414,6 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget, return SDValue(); } - if (VT == MVT::v2i32) { - assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); - Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, - DAG.getUNDEF(MVT::v2i32)); - // If the index is v2i64 and we have VLX we can use xmm for data and index. - if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) { - SDVTList VTs = DAG.getVTList(MVT::v2i1, MVT::Other); - SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; - SDValue NewScatter = DAG.getTargetMemSDNode( - VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand()); - return SDValue(NewScatter.getNode(), 1); - } - // Custom widen all the operands to avoid promotion. - EVT NewIndexVT = EVT::getVectorVT( - *DAG.getContext(), Index.getValueType().getVectorElementType(), 4); - Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index, - DAG.getUNDEF(Index.getValueType())); - Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, - DAG.getConstant(0, dl, MVT::v2i1)); - SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; - return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), N->getMemoryVT(), dl, - Ops, N->getMemOperand()); - } - MVT IndexVT = Index.getSimpleValueType(); MVT MaskVT = Mask.getSimpleValueType(); @@ -27160,6 +27649,13 @@ SDValue X86TargetLowering::LowerGC_TRANSITION_END(SDValue Op, return NOOP; } +SDValue X86TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, + RTLIB::Libcall Call) const { + SmallVector Ops(Op->op_begin(), Op->op_end()); + MakeLibCallOptions CallOptions; + return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first; +} + /// Provide custom lowering hooks for some operations. SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -27206,10 +27702,14 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); + case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); + case ISD::STRICT_FP_ROUND: return LowerSTRICT_FP_ROUND(Op, DAG); case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG); case ISD::STORE: return LowerStore(Op, Subtarget, DAG); case ISD::FADD: - case ISD::FSUB: return lowerFaddFsub(Op, DAG, Subtarget); + case ISD::FSUB: return lowerFaddFsub(Op, DAG); + case ISD::FMUL: return LowerF128Call(Op, DAG, RTLIB::MUL_F128); + case ISD::FDIV: return LowerF128Call(Op, DAG, RTLIB::DIV_F128); case ISD::FABS: case ISD::FNEG: return LowerFABSorFNEG(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); @@ -27347,37 +27847,22 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } case ISD::MUL: { EVT VT = N->getValueType(0); - assert(VT.isVector() && "Unexpected VT"); - if (getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger && - VT.getVectorNumElements() == 2) { - // Promote to a pattern that will be turned into PMULUDQ. - SDValue N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64, - N->getOperand(0)); - SDValue N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v2i64, - N->getOperand(1)); - SDValue Mul = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, N0, N1); - Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, VT, Mul)); - } else if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && - VT.getVectorElementType() == MVT::i8) { - // Pre-promote these to vXi16 to avoid op legalization thinking all 16 - // elements are needed. - MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements()); - SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0)); - SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1)); - SDValue Res = DAG.getNode(ISD::MUL, dl, MulVT, Op0, Op1); - Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); - unsigned NumConcats = 16 / VT.getVectorNumElements(); - SmallVector ConcatOps(NumConcats, DAG.getUNDEF(VT)); - ConcatOps[0] = Res; - Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, ConcatOps); - Results.push_back(Res); - } + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + VT.getVectorElementType() == MVT::i8 && "Unexpected VT!"); + // Pre-promote these to vXi16 to avoid op legalization thinking all 16 + // elements are needed. + MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements()); + SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0)); + SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1)); + SDValue Res = DAG.getNode(ISD::MUL, dl, MulVT, Op0, Op1); + Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); + unsigned NumConcats = 16 / VT.getVectorNumElements(); + SmallVector ConcatOps(NumConcats, DAG.getUNDEF(VT)); + ConcatOps[0] = Res; + Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, ConcatOps); + Results.push_back(Res); return; } - case ISD::UADDSAT: - case ISD::SADDSAT: - case ISD::USUBSAT: - case ISD::SSUBSAT: case X86ISD::VPMADDWD: case X86ISD::AVG: { // Legalize types for ISD::UADDSAT/SADDSAT/USUBSAT/SSUBSAT and @@ -27388,6 +27873,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, EVT InVT = N->getOperand(0).getValueType(); assert(VT.getSizeInBits() < 128 && 128 % VT.getSizeInBits() == 0 && "Expected a VT that divides into 128 bits."); + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + "Unexpected type action!"); unsigned NumConcat = 128 / InVT.getSizeInBits(); EVT InWideVT = EVT::getVectorVT(*DAG.getContext(), @@ -27404,9 +27891,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops); SDValue Res = DAG.getNode(N->getOpcode(), dl, WideVT, InVec0, InVec1); - if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, - DAG.getIntPtrConstant(0, dl)); Results.push_back(Res); return; } @@ -27435,26 +27919,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(Hi); return; } - case ISD::SETCC: { - // Widen v2i32 (setcc v2f32). This is really needed for AVX512VL when - // setCC result type is v2i1 because type legalzation will end up with - // a v4i1 setcc plus an extend. - assert(N->getValueType(0) == MVT::v2i32 && "Unexpected type"); - if (N->getOperand(0).getValueType() != MVT::v2f32 || - getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector) - return; - SDValue UNDEF = DAG.getUNDEF(MVT::v2f32); - SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, - N->getOperand(0), UNDEF); - SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, - N->getOperand(1), UNDEF); - SDValue Res = DAG.getNode(ISD::SETCC, dl, MVT::v4i32, LHS, RHS, - N->getOperand(2)); - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, - DAG.getIntPtrConstant(0, dl)); - Results.push_back(Res); - return; - } // We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32. case X86ISD::FMINC: case X86ISD::FMIN: @@ -27475,7 +27939,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::SREM: case ISD::UREM: { EVT VT = N->getValueType(0); - if (getTypeAction(*DAG.getContext(), VT) == TypeWidenVector) { + if (VT.isVector()) { + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + "Unexpected type action!"); // If this RHS is a constant splat vector we can widen this and let // division/remainder by constant optimize it. // TODO: Can we do something for non-splat? @@ -27493,17 +27959,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } - if (VT == MVT::v2i32) { - // Legalize v2i32 div/rem by unrolling. Otherwise we promote to the - // v2i64 and unroll later. But then we create i64 scalar ops which - // might be slow in 64-bit mode or require a libcall in 32-bit mode. - Results.push_back(DAG.UnrollVectorOp(N)); - return; - } - - if (VT.isVector()) - return; - LLVM_FALLTHROUGH; } case ISD::SDIVREM: @@ -27561,58 +28016,40 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } } - return; - } - case ISD::SIGN_EXTEND_VECTOR_INREG: { - if (ExperimentalVectorWideningLegalization) - return; + if (Subtarget.hasVLX() && InVT == MVT::v8i64 && VT == MVT::v8i8 && + getTypeAction(*DAG.getContext(), InVT) == TypeSplitVector && + isTypeLegal(MVT::v4i64)) { + // Input needs to be split and output needs to widened. Let's use two + // VTRUNCs, and shuffle their results together into the wider type. + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVector(In, dl); - EVT VT = N->getValueType(0); - SDValue In = N->getOperand(0); - EVT InVT = In.getValueType(); - if (!Subtarget.hasSSE41() && VT == MVT::v4i64 && - (InVT == MVT::v16i16 || InVT == MVT::v32i8)) { - // Custom split this so we can extend i8/i16->i32 invec. This is better - // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using - // sra. Then extending from i32 to i64 using pcmpgt. By custom splitting - // we allow the sra from the extend to i32 to be shared by the split. - EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), - InVT.getVectorNumElements() / 2); - MVT ExtendVT = MVT::getVectorVT(MVT::i32, - VT.getVectorNumElements()); - In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExtractVT, - In, DAG.getIntPtrConstant(0, dl)); - In = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, MVT::v4i32, In); - - // Fill a vector with sign bits for each element. - SDValue Zero = DAG.getConstant(0, dl, ExtendVT); - SDValue SignBits = DAG.getSetCC(dl, ExtendVT, Zero, In, ISD::SETGT); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - - // Create an unpackl and unpackh to interleave the sign bits then bitcast - // to vXi64. - SDValue Lo = getUnpackl(DAG, dl, ExtendVT, In, SignBits); - Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); - SDValue Hi = getUnpackh(DAG, dl, ExtendVT, In, SignBits); - Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); - - SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); + Lo = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Lo); + Hi = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Hi); + SDValue Res = DAG.getVectorShuffle(MVT::v16i8, dl, Lo, Hi, + { 0, 1, 2, 3, 16, 17, 18, 19, + -1, -1, -1, -1, -1, -1, -1, -1 }); Results.push_back(Res); return; } + return; } + case ISD::ANY_EXTEND: + // Right now, only MVT::v8i8 has Custom action for an illegal type. + // It's intended to custom handle the input type. + assert(N->getValueType(0) == MVT::v8i8 && + "Do not know how to legalize this Node"); + return; case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: { EVT VT = N->getValueType(0); SDValue In = N->getOperand(0); EVT InVT = In.getValueType(); if (!Subtarget.hasSSE41() && VT == MVT::v4i64 && - (InVT == MVT::v4i16 || InVT == MVT::v4i8) && - getTypeAction(*DAG.getContext(), InVT) == TypeWidenVector) { + (InVT == MVT::v4i16 || InVT == MVT::v4i8)){ + assert(getTypeAction(*DAG.getContext(), InVT) == TypeWidenVector && + "Unexpected type action!"); assert(N->getOpcode() == ISD::SIGN_EXTEND && "Unexpected opcode"); // Custom split this so we can extend i8/i16->i32 invec. This is better // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using @@ -27683,27 +28120,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SDValue Src = N->getOperand(0); EVT SrcVT = Src.getValueType(); - // Promote these manually to avoid over promotion to v2i64. Type - // legalization will revisit the v2i32 operation for more cleanup. - if ((VT == MVT::v2i8 || VT == MVT::v2i16) && - getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger) { - // AVX512DQ provides instructions that produce a v2i64 result. - if (Subtarget.hasDQI()) - return; - - SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v2i32, Src); - Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext - : ISD::AssertSext, - dl, MVT::v2i32, Res, - DAG.getValueType(VT.getVectorElementType())); - Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); - Results.push_back(Res); - return; - } - if (VT.isVector() && VT.getScalarSizeInBits() < 32) { - if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) - return; + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + "Unexpected type action!"); // Try to create a 128 bit vector, but don't exceed a 32 bit element. unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U); @@ -27738,35 +28157,18 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, assert((IsSigned || Subtarget.hasAVX512()) && "Can only handle signed conversion without AVX512"); assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); - bool Widenv2i32 = - getTypeAction(*DAG.getContext(), MVT::v2i32) == TypeWidenVector; + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + "Unexpected type action!"); if (Src.getValueType() == MVT::v2f64) { - unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; if (!IsSigned && !Subtarget.hasVLX()) { - // If v2i32 is widened, we can defer to the generic legalizer. - if (Widenv2i32) - return; - // Custom widen by doubling to a legal vector with. Isel will - // further widen to v8f64. - Opc = ISD::FP_TO_UINT; - Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f64, - Src, DAG.getUNDEF(MVT::v2f64)); + // If we have VLX we can emit a target specific FP_TO_UINT node, + // otherwise we can defer to the generic legalizer which will widen + // the input as well. This will be further widened during op + // legalization to v8i32<-v8f64. + return; } + unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; SDValue Res = DAG.getNode(Opc, dl, MVT::v4i32, Src); - if (!Widenv2i32) - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, - DAG.getIntPtrConstant(0, dl)); - Results.push_back(Res); - return; - } - if (SrcVT == MVT::v2f32 && - getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) { - SDValue Idx = DAG.getIntPtrConstant(0, dl); - SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, - DAG.getUNDEF(MVT::v2f32)); - Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT - : ISD::FP_TO_UINT, dl, MVT::v4i32, Res); - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx); Results.push_back(Res); return; } @@ -27776,6 +28178,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } + assert(!VT.isVector() && "Vectors should have been handled above!"); + if (Subtarget.hasDQI() && VT == MVT::i64 && (SrcVT == MVT::f32 || SrcVT == MVT::f64)) { assert(!Subtarget.is64Bit() && "i64 should be legal"); @@ -27847,7 +28251,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::INTRINSIC_W_CHAIN: { - unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(1); switch (IntNo) { default : llvm_unreachable("Do not know how to custom type " "legalize this intrinsic operation!"); @@ -27905,7 +28309,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); SDValue Result; SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); - unsigned BasePtr = TRI->getBaseRegister(); + Register BasePtr = TRI->getBaseRegister(); MachineMemOperand *MMO = cast(N)->getMemOperand(); if (TRI->hasBasePointer(DAG.getMachineFunction()) && (BasePtr == X86::RBX || BasePtr == X86::EBX)) { @@ -28060,34 +28464,33 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } - if (SrcVT != MVT::f64 || - (DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8) || - getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector) + if (DstVT.isVector() && SrcVT == MVT::x86mmx) { + assert(getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector && + "Unexpected type action!"); + EVT WideVT = getTypeToTransformTo(*DAG.getContext(), DstVT); + SDValue Res = DAG.getNode(X86ISD::MOVQ2DQ, dl, WideVT, N->getOperand(0)); + Results.push_back(Res); return; + } - unsigned NumElts = DstVT.getVectorNumElements(); - EVT SVT = DstVT.getVectorElementType(); - EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2); - SDValue Res; - Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, N->getOperand(0)); - Res = DAG.getBitcast(WiderVT, Res); - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, Res, - DAG.getIntPtrConstant(0, dl)); - Results.push_back(Res); return; } case ISD::MGATHER: { EVT VT = N->getValueType(0); - if (VT == MVT::v2f32 && (Subtarget.hasVLX() || !Subtarget.hasAVX512())) { + if ((VT == MVT::v2f32 || VT == MVT::v2i32) && + (Subtarget.hasVLX() || !Subtarget.hasAVX512())) { auto *Gather = cast(N); SDValue Index = Gather->getIndex(); if (Index.getValueType() != MVT::v2i64) return; + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + "Unexpected type action!"); + EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT); SDValue Mask = Gather->getMask(); assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); - SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, + SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Gather->getPassThru(), - DAG.getUNDEF(MVT::v2f32)); + DAG.getUNDEF(VT)); if (!Subtarget.hasVLX()) { // We need to widen the mask, but the instruction will only use 2 // of its elements. So we can use undef. @@ -28098,66 +28501,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SDValue Ops[] = { Gather->getChain(), PassThru, Mask, Gather->getBasePtr(), Index, Gather->getScale() }; SDValue Res = DAG.getTargetMemSDNode( - DAG.getVTList(MVT::v4f32, Mask.getValueType(), MVT::Other), Ops, dl, + DAG.getVTList(WideVT, Mask.getValueType(), MVT::Other), Ops, dl, Gather->getMemoryVT(), Gather->getMemOperand()); Results.push_back(Res); Results.push_back(Res.getValue(2)); return; } - if (VT == MVT::v2i32) { - auto *Gather = cast(N); - SDValue Index = Gather->getIndex(); - SDValue Mask = Gather->getMask(); - assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); - SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, - Gather->getPassThru(), - DAG.getUNDEF(MVT::v2i32)); - // If the index is v2i64 we can use it directly. - if (Index.getValueType() == MVT::v2i64 && - (Subtarget.hasVLX() || !Subtarget.hasAVX512())) { - if (!Subtarget.hasVLX()) { - // We need to widen the mask, but the instruction will only use 2 - // of its elements. So we can use undef. - Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, - DAG.getUNDEF(MVT::v2i1)); - Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask); - } - SDValue Ops[] = { Gather->getChain(), PassThru, Mask, - Gather->getBasePtr(), Index, Gather->getScale() }; - SDValue Res = DAG.getTargetMemSDNode( - DAG.getVTList(MVT::v4i32, Mask.getValueType(), MVT::Other), Ops, dl, - Gather->getMemoryVT(), Gather->getMemOperand()); - SDValue Chain = Res.getValue(2); - if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, - DAG.getIntPtrConstant(0, dl)); - Results.push_back(Res); - Results.push_back(Chain); - return; - } - if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) { - EVT IndexVT = Index.getValueType(); - EVT NewIndexVT = EVT::getVectorVT(*DAG.getContext(), - IndexVT.getScalarType(), 4); - // Otherwise we need to custom widen everything to avoid promotion. - Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index, - DAG.getUNDEF(IndexVT)); - Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, - DAG.getConstant(0, dl, MVT::v2i1)); - SDValue Ops[] = { Gather->getChain(), PassThru, Mask, - Gather->getBasePtr(), Index, Gather->getScale() }; - SDValue Res = DAG.getMaskedGather(DAG.getVTList(MVT::v4i32, MVT::Other), - Gather->getMemoryVT(), dl, Ops, - Gather->getMemOperand()); - SDValue Chain = Res.getValue(1); - if (getTypeAction(*DAG.getContext(), MVT::v2i32) != TypeWidenVector) - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, - DAG.getIntPtrConstant(0, dl)); - Results.push_back(Res); - Results.push_back(Chain); - return; - } - } return; } case ISD::LOAD: { @@ -28166,8 +28515,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, // cast since type legalization will try to use an i64 load. MVT VT = N->getSimpleValueType(0); assert(VT.isVector() && VT.getSizeInBits() == 64 && "Unexpected VT"); - if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) - return; + assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && + "Unexpected type action!"); if (!ISD::isNON_EXTLoad(N)) return; auto *Ld = cast(N); @@ -28177,11 +28526,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Ld->getPointerInfo(), Ld->getAlignment(), Ld->getMemOperand()->getFlags()); SDValue Chain = Res.getValue(1); - MVT WideVT = MVT::getVectorVT(LdVT, 2); - Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res); - MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(), - VT.getVectorNumElements() * 2); - Res = DAG.getBitcast(CastVT, Res); + MVT VecVT = MVT::getVectorVT(LdVT, 2); + Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Res); + EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT); + Res = DAG.getBitcast(WideVT, Res); Results.push_back(Res); Results.push_back(Chain); return; @@ -28236,6 +28584,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; case X86ISD::Wrapper: return "X86ISD::Wrapper"; case X86ISD::WrapperRIP: return "X86ISD::WrapperRIP"; + case X86ISD::MOVQ2DQ: return "X86ISD::MOVQ2DQ"; case X86ISD::MOVDQ2Q: return "X86ISD::MOVDQ2Q"; case X86ISD::MMX_MOVD2W: return "X86ISD::MMX_MOVD2W"; case X86ISD::MMX_MOVW2D: return "X86ISD::MMX_MOVW2D"; @@ -28373,6 +28722,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::UNPCKL: return "X86ISD::UNPCKL"; case X86ISD::UNPCKH: return "X86ISD::UNPCKH"; case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; + case X86ISD::VBROADCAST_LOAD: return "X86ISD::VBROADCAST_LOAD"; case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM"; case X86ISD::SUBV_BROADCAST: return "X86ISD::SUBV_BROADCAST"; case X86ISD::VPERMILPV: return "X86ISD::VPERMILPV"; @@ -28737,6 +29087,9 @@ bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { } bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { + if (isa(ExtVal.getOperand(0))) + return false; + EVT SrcVT = ExtVal.getOperand(0).getValueType(); // There is no extending load for vXi1. @@ -28856,10 +29209,10 @@ static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB, sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); - unsigned mainDstReg = MRI.createVirtualRegister(RC); - unsigned fallDstReg = MRI.createVirtualRegister(RC); + Register mainDstReg = MRI.createVirtualRegister(RC); + Register fallDstReg = MRI.createVirtualRegister(RC); // thisMBB: // xbegin fallMBB @@ -28913,7 +29266,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, static_assert(X86::AddrNumOperands == 5, "VAARG_64 assumes 5 address operands"); - unsigned DestReg = MI.getOperand(0).getReg(); + Register DestReg = MI.getOperand(0).getReg(); MachineOperand &Base = MI.getOperand(1); MachineOperand &Scale = MI.getOperand(2); MachineOperand &Index = MI.getOperand(3); @@ -29049,7 +29402,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, assert(OffsetReg != 0); // Read the reg_save_area address. - unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass); + Register RegSaveReg = MRI.createVirtualRegister(AddrRegClass); BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg) .add(Base) .add(Scale) @@ -29059,8 +29412,8 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, .setMemRefs(LoadOnlyMMO); // Zero-extend the offset - unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass); - BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64) + Register OffsetReg64 = MRI.createVirtualRegister(AddrRegClass); + BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64) .addImm(0) .addReg(OffsetReg) .addImm(X86::sub_32bit); @@ -29071,7 +29424,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, .addReg(RegSaveReg); // Compute the offset for the next argument - unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass); + Register NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass); BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg) .addReg(OffsetReg) .addImm(UseFPOffset ? 16 : 8); @@ -29096,7 +29449,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, // // Load the overflow_area address into a register. - unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass); + Register OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass); BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg) .add(Base) .add(Scale) @@ -29110,7 +29463,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, if (NeedsAlign) { // Align the overflow address assert(isPowerOf2_32(Align) && "Alignment must be a power of 2"); - unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass); + Register TmpReg = MRI.createVirtualRegister(AddrRegClass); // aligned_addr = (addr + (align-1)) & ~(align-1) BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg) @@ -29127,7 +29480,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI, // Compute the next overflow address after this argument. // (the overflow address should be kept 8-byte aligned) - unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass); + Register NextAddrReg = MRI.createVirtualRegister(AddrRegClass); BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg) .addReg(OverflowDestReg) .addImm(ArgSizeA8); @@ -29191,7 +29544,7 @@ MachineBasicBlock *X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned CountReg = MI.getOperand(0).getReg(); + Register CountReg = MI.getOperand(0).getReg(); int64_t RegSaveFrameIndex = MI.getOperand(1).getImm(); int64_t VarArgsFPOffset = MI.getOperand(2).getImm(); @@ -29273,7 +29626,9 @@ static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr, static bool isCMOVPseudo(MachineInstr &MI) { switch (MI.getOpcode()) { case X86::CMOV_FR32: + case X86::CMOV_FR32X: case X86::CMOV_FR64: + case X86::CMOV_FR64X: case X86::CMOV_GR8: case X86::CMOV_GR16: case X86::CMOV_GR32: @@ -29326,9 +29681,9 @@ static MachineInstrBuilder createPHIsForCMOVsInSinkBB( MachineInstrBuilder MIB; for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { - unsigned DestReg = MIIt->getOperand(0).getReg(); - unsigned Op1Reg = MIIt->getOperand(1).getReg(); - unsigned Op2Reg = MIIt->getOperand(2).getReg(); + Register DestReg = MIIt->getOperand(0).getReg(); + Register Op1Reg = MIIt->getOperand(1).getReg(); + Register Op2Reg = MIIt->getOperand(2).getReg(); // If this CMOV we are generating is the opposite condition from // the jump we generated, then we have to swap the operands for the @@ -29486,9 +29841,9 @@ X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV, // SinkMBB: // %Result = phi [ %FalseValue, SecondInsertedMBB ], [ %TrueValue, ThisMBB ] - unsigned DestReg = FirstCMOV.getOperand(0).getReg(); - unsigned Op1Reg = FirstCMOV.getOperand(1).getReg(); - unsigned Op2Reg = FirstCMOV.getOperand(2).getReg(); + Register DestReg = FirstCMOV.getOperand(0).getReg(); + Register Op1Reg = FirstCMOV.getOperand(1).getReg(); + Register Op2Reg = FirstCMOV.getOperand(2).getReg(); MachineInstrBuilder MIB = BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(X86::PHI), DestReg) .addReg(Op1Reg) @@ -30006,7 +30361,7 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI, // call the retpoline thunk. DebugLoc DL = MI.getDebugLoc(); const X86InstrInfo *TII = Subtarget.getInstrInfo(); - unsigned CalleeVReg = MI.getOperand(0).getReg(); + Register CalleeVReg = MI.getOperand(0).getReg(); unsigned Opc = getOpcodeForRetpoline(MI.getOpcode()); // Find an available scratch register to hold the callee. On 64-bit, we can @@ -30079,7 +30434,7 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI, // Initialize a register with zero. MVT PVT = getPointerTy(MF->getDataLayout()); const TargetRegisterClass *PtrRC = getRegClassFor(PVT); - unsigned ZReg = MRI.createVirtualRegister(PtrRC); + Register ZReg = MRI.createVirtualRegister(PtrRC); unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr; BuildMI(*MBB, MI, DL, TII->get(XorRROpc)) .addDef(ZReg) @@ -30087,7 +30442,7 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI, .addReg(ZReg, RegState::Undef); // Read the current SSP Register value to the zeroed register. - unsigned SSPCopyReg = MRI.createVirtualRegister(PtrRC); + Register SSPCopyReg = MRI.createVirtualRegister(PtrRC); unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD; BuildMI(*MBB, MI, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg); @@ -30131,8 +30486,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, const TargetRegisterClass *RC = MRI.getRegClass(DstReg); assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); (void)TRI; - unsigned mainDstReg = MRI.createVirtualRegister(RC); - unsigned restoreDstReg = MRI.createVirtualRegister(RC); + Register mainDstReg = MRI.createVirtualRegister(RC); + Register restoreDstReg = MRI.createVirtualRegister(RC); MemOpndSlot = CurOp; @@ -30246,8 +30601,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64(); X86MachineFunctionInfo *X86FI = MF->getInfo(); X86FI->setRestoreBasePointer(MF); - unsigned FramePtr = RegInfo->getFrameRegister(*MF); - unsigned BasePtr = RegInfo->getBaseRegister(); + Register FramePtr = RegInfo->getFrameRegister(*MF); + Register BasePtr = RegInfo->getBaseRegister(); unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm; addRegOffset(BuildMI(restoreMBB, DL, TII->get(Opm), BasePtr), FramePtr, true, X86FI->getRestoreBasePointerOffset()) @@ -30329,7 +30684,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, MBB->addSuccessor(checkSspMBB); // Initialize a register with zero. - unsigned ZReg = MRI.createVirtualRegister(PtrRC); + Register ZReg = MRI.createVirtualRegister(PtrRC); unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr; BuildMI(checkSspMBB, DL, TII->get(XorRROpc)) .addDef(ZReg) @@ -30337,7 +30692,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, .addReg(ZReg, RegState::Undef); // Read the current SSP Register value to the zeroed register. - unsigned SSPCopyReg = MRI.createVirtualRegister(PtrRC); + Register SSPCopyReg = MRI.createVirtualRegister(PtrRC); unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD; BuildMI(checkSspMBB, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg); @@ -30352,7 +30707,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, checkSspMBB->addSuccessor(fallMBB); // Reload the previously saved SSP register value. - unsigned PrevSSPReg = MRI.createVirtualRegister(PtrRC); + Register PrevSSPReg = MRI.createVirtualRegister(PtrRC); unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm; const int64_t SPPOffset = 3 * PVT.getStoreSize(); MachineInstrBuilder MIB = @@ -30370,7 +30725,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, MIB.setMemRefs(MMOs); // Subtract the current SSP from the previous SSP. - unsigned SspSubReg = MRI.createVirtualRegister(PtrRC); + Register SspSubReg = MRI.createVirtualRegister(PtrRC); unsigned SubRROpc = (PVT == MVT::i64) ? X86::SUB64rr : X86::SUB32rr; BuildMI(fallMBB, DL, TII->get(SubRROpc), SspSubReg) .addReg(PrevSSPReg) @@ -30384,7 +30739,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, // Shift right by 2/3 for 32/64 because incssp multiplies the argument by 4/8. unsigned ShrRIOpc = (PVT == MVT::i64) ? X86::SHR64ri : X86::SHR32ri; unsigned Offset = (PVT == MVT::i64) ? 3 : 2; - unsigned SspFirstShrReg = MRI.createVirtualRegister(PtrRC); + Register SspFirstShrReg = MRI.createVirtualRegister(PtrRC); BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspFirstShrReg) .addReg(SspSubReg) .addImm(Offset); @@ -30394,7 +30749,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, BuildMI(fixShadowMBB, DL, TII->get(IncsspOpc)).addReg(SspFirstShrReg); // Reset the lower 8 bits. - unsigned SspSecondShrReg = MRI.createVirtualRegister(PtrRC); + Register SspSecondShrReg = MRI.createVirtualRegister(PtrRC); BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspSecondShrReg) .addReg(SspFirstShrReg) .addImm(8); @@ -30406,12 +30761,12 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, // Do a single shift left. unsigned ShlR1Opc = (PVT == MVT::i64) ? X86::SHL64r1 : X86::SHL32r1; - unsigned SspAfterShlReg = MRI.createVirtualRegister(PtrRC); + Register SspAfterShlReg = MRI.createVirtualRegister(PtrRC); BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(ShlR1Opc), SspAfterShlReg) .addReg(SspSecondShrReg); // Save the value 128 to a register (will be used next with incssp). - unsigned Value128InReg = MRI.createVirtualRegister(PtrRC); + Register Value128InReg = MRI.createVirtualRegister(PtrRC); unsigned MovRIOpc = (PVT == MVT::i64) ? X86::MOV64ri32 : X86::MOV32ri; BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(MovRIOpc), Value128InReg) .addImm(128); @@ -30419,8 +30774,8 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, // Since incssp only looks at the lower 8 bits, we might need to do several // iterations of incssp until we finish fixing the shadow stack. - unsigned DecReg = MRI.createVirtualRegister(PtrRC); - unsigned CounterReg = MRI.createVirtualRegister(PtrRC); + Register DecReg = MRI.createVirtualRegister(PtrRC); + Register CounterReg = MRI.createVirtualRegister(PtrRC); BuildMI(fixShadowLoopMBB, DL, TII->get(X86::PHI), CounterReg) .addReg(SspAfterShlReg) .addMBB(fixShadowLoopPrepareMBB) @@ -30460,11 +30815,11 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, const TargetRegisterClass *RC = (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass; - unsigned Tmp = MRI.createVirtualRegister(RC); + Register Tmp = MRI.createVirtualRegister(RC); // Since FP is only updated here but NOT referenced, it's treated as GPR. const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP; - unsigned SP = RegInfo->getStackRegister(); + Register SP = RegInfo->getStackRegister(); MachineInstrBuilder MIB; @@ -30662,8 +31017,8 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, X86MachineFunctionInfo *MFI = MF->getInfo(); MFI->setRestoreBasePointer(MF); - unsigned FP = RI.getFrameRegister(*MF); - unsigned BP = RI.getBaseRegister(); + Register FP = RI.getFrameRegister(*MF); + Register BP = RI.getBaseRegister(); unsigned Op = FPIs64Bit ? X86::MOV64rm : X86::MOV32rm; addRegOffset(BuildMI(DispatchBB, DL, TII->get(Op), BP), FP, true, MFI->getRestoreBasePointerOffset()) @@ -30674,7 +31029,7 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, } // IReg is used as an index in a memory operand and therefore can't be SP - unsigned IReg = MRI->createVirtualRegister(&X86::GR32_NOSPRegClass); + Register IReg = MRI->createVirtualRegister(&X86::GR32_NOSPRegClass); addFrameReference(BuildMI(DispatchBB, DL, TII->get(X86::MOV32rm), IReg), FI, Subtarget.is64Bit() ? 8 : 4); BuildMI(DispatchBB, DL, TII->get(X86::CMP32ri)) @@ -30683,8 +31038,8 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, BuildMI(DispatchBB, DL, TII->get(X86::JCC_1)).addMBB(TrapBB).addImm(X86::COND_AE); if (Subtarget.is64Bit()) { - unsigned BReg = MRI->createVirtualRegister(&X86::GR64RegClass); - unsigned IReg64 = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); + Register BReg = MRI->createVirtualRegister(&X86::GR64RegClass); + Register IReg64 = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); // leaq .LJTI0_0(%rip), BReg BuildMI(DispContBB, DL, TII->get(X86::LEA64r), BReg) @@ -30710,9 +31065,9 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addReg(0); break; case MachineJumpTableInfo::EK_LabelDifference32: { - unsigned OReg = MRI->createVirtualRegister(&X86::GR32RegClass); - unsigned OReg64 = MRI->createVirtualRegister(&X86::GR64RegClass); - unsigned TReg = MRI->createVirtualRegister(&X86::GR64RegClass); + Register OReg = MRI->createVirtualRegister(&X86::GR32RegClass); + Register OReg64 = MRI->createVirtualRegister(&X86::GR64RegClass); + Register TReg = MRI->createVirtualRegister(&X86::GR64RegClass); // movl (BReg,IReg64,4), OReg BuildMI(DispContBB, DL, TII->get(X86::MOV32rm), OReg) @@ -30783,8 +31138,8 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, DefRegs[MOp.getReg()] = true; MachineInstrBuilder MIB(*MF, &II); - for (unsigned RI = 0; SavedRegs[RI]; ++RI) { - unsigned Reg = SavedRegs[RI]; + for (unsigned RegIdx = 0; SavedRegs[RegIdx]; ++RegIdx) { + unsigned Reg = SavedRegs[RegIdx]; if (!DefRegs[Reg]) MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); } @@ -30906,20 +31261,18 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, TII->get(X86::FNSTCW16m)), OrigCWFrameIdx); // Load the old value of the control word... - unsigned OldCW = - MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); + Register OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOVZX32rm16), OldCW), OrigCWFrameIdx); // OR 0b11 into bit 10 and 11. 0b11 is the encoding for round toward zero. - unsigned NewCW = - MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); + Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); BuildMI(*BB, MI, DL, TII->get(X86::OR32ri), NewCW) .addReg(OldCW, RegState::Kill).addImm(0xC00); // Extract to 16 bits. - unsigned NewCW16 = - MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass); + Register NewCW16 = + MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass); BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), NewCW16) .addReg(NewCW, RegState::Kill, X86::sub_16bit); @@ -31023,7 +31376,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineRegisterInfo &MRI = MF->getRegInfo(); MVT SPTy = getPointerTy(MF->getDataLayout()); const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); - unsigned computedAddrVReg = MRI.createVirtualRegister(AddrRegClass); + Register computedAddrVReg = MRI.createVirtualRegister(AddrRegClass); X86AddressMode AM = getAddressFromInstr(&MI, 0); // Regalloc does not need any help when the memory operand of CMPXCHG8B @@ -31034,10 +31387,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // After X86TargetLowering::ReplaceNodeResults CMPXCHG8B is glued to its // four operand definitions that are E[ABCD] registers. We skip them and // then insert the LEA. - MachineBasicBlock::iterator MBBI(MI); - while (MBBI->definesRegister(X86::EAX) || MBBI->definesRegister(X86::EBX) || - MBBI->definesRegister(X86::ECX) || MBBI->definesRegister(X86::EDX)) - --MBBI; + MachineBasicBlock::reverse_iterator RMBBI(MI.getReverseIterator()); + while (RMBBI != BB->rend() && (RMBBI->definesRegister(X86::EAX) || + RMBBI->definesRegister(X86::EBX) || + RMBBI->definesRegister(X86::ECX) || + RMBBI->definesRegister(X86::EDX))) { + ++RMBBI; + } + MachineBasicBlock::iterator MBBI(RMBBI); addFullAddress( BuildMI(*BB, *MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM); @@ -31232,12 +31589,21 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.One |= Known2.One; break; } + case X86ISD::PSADBW: { + assert(VT.getScalarType() == MVT::i64 && + Op.getOperand(0).getValueType().getScalarType() == MVT::i8 && + "Unexpected PSADBW types"); + + // PSADBW - fills low 16 bits and zeros upper 48 bits of each i64 result. + Known.Zero.setBitsFrom(16); + break; + } case X86ISD::CMOV: { - Known = DAG.computeKnownBits(Op.getOperand(1), Depth+1); + Known = DAG.computeKnownBits(Op.getOperand(1), Depth + 1); // If we don't know any bits, early out. if (Known.isUnknown()) break; - KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth+1); + KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); // Only known if known in both the LHS and RHS. Known.One &= Known2.One; @@ -31650,8 +32016,8 @@ static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef Mask, if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) { SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { - ArrayRef LoMask(Mask.data() + 0, 4); - ArrayRef HiMask(Mask.data() + 4, 4); + ArrayRef LoMask(RepeatedMask.data() + 0, 4); + ArrayRef HiMask(RepeatedMask.data() + 4, 4); // PSHUFLW: permute lower 4 elements only. if (isUndefOrInRange(LoMask, 0, 4) && @@ -31789,8 +32155,8 @@ static bool matchBinaryPermuteShuffle( uint64_t BlendMask = 0; bool ForceV1Zero = false, ForceV2Zero = false; SmallVector TargetMask(Mask.begin(), Mask.end()); - if (matchVectorShuffleAsBlend(V1, V2, TargetMask, ForceV1Zero, ForceV2Zero, - BlendMask)) { + if (matchVectorShuffleAsBlend(V1, V2, TargetMask, Zeroable, ForceV1Zero, + ForceV2Zero, BlendMask)) { if (MaskVT == MVT::v16i16) { // We can only use v16i16 PBLENDW if the lanes are repeated. SmallVector RepeatedMask; @@ -31819,15 +32185,15 @@ static bool matchBinaryPermuteShuffle( } } - // Attempt to combine to INSERTPS. + // Attempt to combine to INSERTPS, but only if it has elements that need to + // be set to zero. if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() && - MaskVT.is128BitVector()) { - if (Zeroable.getBoolValue() && - matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { - Shuffle = X86ISD::INSERTPS; - ShuffleVT = MVT::v4f32; - return true; - } + MaskVT.is128BitVector() && + llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; }) && + matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { + Shuffle = X86ISD::INSERTPS; + ShuffleVT = MVT::v4f32; + return true; } // Attempt to combine to SHUFPD. @@ -31835,7 +32201,11 @@ static bool matchBinaryPermuteShuffle( ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || (MaskVT.is256BitVector() && Subtarget.hasAVX()) || (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) { - if (matchShuffleWithSHUFPD(MaskVT, V1, V2, PermuteImm, Mask)) { + bool ForceV1Zero = false, ForceV2Zero = false; + if (matchShuffleWithSHUFPD(MaskVT, V1, V2, ForceV1Zero, ForceV2Zero, + PermuteImm, Mask, Zeroable)) { + V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1; + V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2; Shuffle = X86ISD::SHUFP; ShuffleVT = MVT::getVectorVT(MVT::f64, MaskVT.getSizeInBits() / 64); return true; @@ -31889,6 +32259,15 @@ static bool matchBinaryPermuteShuffle( } } + // Attempt to combine to INSERTPS more generally if X86ISD::SHUFP failed. + if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() && + MaskVT.is128BitVector() && + matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { + Shuffle = X86ISD::INSERTPS; + ShuffleVT = MVT::v4f32; + return true; + } + return false; } @@ -31942,7 +32321,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, unsigned NumRootElts = RootVT.getVectorNumElements(); unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts; bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() || - (RootVT.isFloatingPoint() && Depth >= 2) || + (RootVT.isFloatingPoint() && Depth >= 1) || (RootVT.is256BitVector() && !Subtarget.hasAVX2()); // Don't combine if we are a AVX512/EVEX target and the mask element size @@ -31981,7 +32360,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (UnaryShuffle && RootVT.is256BitVector() && NumBaseMaskElts == 2 && !(Subtarget.hasAVX2() && BaseMask[0] >= -1 && BaseMask[1] >= -1) && !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) { - if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128) + if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128) return SDValue(); // Nothing to do! MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64); unsigned PermMask = 0; @@ -31991,7 +32370,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, Res = DAG.getBitcast(ShuffleVT, V1); Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, Res, DAG.getUNDEF(ShuffleVT), - DAG.getConstant(PermMask, DL, MVT::i8)); + DAG.getTargetConstant(PermMask, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } @@ -32026,8 +32405,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, // Which shuffle domains are permitted? // Permit domain crossing at higher combine depths. // TODO: Should we indicate which domain is preferred if both are allowed? - bool AllowFloatDomain = FloatDomain || (Depth > 3); - bool AllowIntDomain = (!FloatDomain || (Depth > 3)) && Subtarget.hasSSE2() && + bool AllowFloatDomain = FloatDomain || (Depth >= 3); + bool AllowIntDomain = (!FloatDomain || (Depth >= 3)) && Subtarget.hasSSE2() && (!MaskVT.is256BitVector() || Subtarget.hasAVX2()); // Determine zeroable mask elements. @@ -32062,14 +32441,14 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (V1.getValueType() == MaskVT && V1.getOpcode() == ISD::SCALAR_TO_VECTOR && MayFoldLoad(V1.getOperand(0))) { - if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST) + if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST) return SDValue(); // Nothing to do! Res = V1.getOperand(0); Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res); return DAG.getBitcast(RootVT, Res); } if (Subtarget.hasAVX2()) { - if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST) + if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST) return SDValue(); // Nothing to do! Res = DAG.getBitcast(MaskVT, V1); Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res); @@ -32083,7 +32462,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { - if (Depth == 1 && Root.getOpcode() == Shuffle) + if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! Res = DAG.getBitcast(ShuffleSrcVT, NewV1); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res); @@ -32094,11 +32473,11 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, AllowIntDomain, Subtarget, Shuffle, ShuffleVT, PermuteImm) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { - if (Depth == 1 && Root.getOpcode() == Shuffle) + if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! Res = DAG.getBitcast(ShuffleVT, V1); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, - DAG.getConstant(PermuteImm, DL, MVT::i8)); + DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } } @@ -32109,7 +32488,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT, UnaryShuffle) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { - if (Depth == 1 && Root.getOpcode() == Shuffle) + if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1); NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2); @@ -32123,12 +32502,12 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1, NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { - if (Depth == 1 && Root.getOpcode() == Shuffle) + if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! NewV1 = DAG.getBitcast(ShuffleVT, NewV1); NewV2 = DAG.getBitcast(ShuffleVT, NewV2); Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2, - DAG.getConstant(PermuteImm, DL, MVT::i8)); + DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } @@ -32141,34 +32520,34 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, uint64_t BitLen, BitIdx; if (matchShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) { - if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI) + if (Depth == 0 && Root.getOpcode() == X86ISD::EXTRQI) return SDValue(); // Nothing to do! V1 = DAG.getBitcast(IntMaskVT, V1); Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1, - DAG.getConstant(BitLen, DL, MVT::i8), - DAG.getConstant(BitIdx, DL, MVT::i8)); + DAG.getTargetConstant(BitLen, DL, MVT::i8), + DAG.getTargetConstant(BitIdx, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } if (matchShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) { - if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI) + if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTQI) return SDValue(); // Nothing to do! V1 = DAG.getBitcast(IntMaskVT, V1); V2 = DAG.getBitcast(IntMaskVT, V2); Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2, - DAG.getConstant(BitLen, DL, MVT::i8), - DAG.getConstant(BitIdx, DL, MVT::i8)); + DAG.getTargetConstant(BitLen, DL, MVT::i8), + DAG.getTargetConstant(BitIdx, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } } // Don't try to re-form single instruction chains under any circumstances now // that we've done encoding canonicalization for them. - if (Depth < 2) + if (Depth < 1) return SDValue(); // Depth threshold above which we can efficiently use variable mask shuffles. - int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 2 : 3; + int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 1 : 2; AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask; bool MaskContainsZeros = @@ -32321,7 +32700,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, V2 = DAG.getBitcast(MaskVT, V2); SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true); Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp, - DAG.getConstant(M2ZImm, DL, MVT::i8)); + DAG.getTargetConstant(M2ZImm, DL, MVT::i8)); return DAG.getBitcast(RootVT, Res); } @@ -32650,7 +33029,7 @@ static SDValue combineX86ShufflesRecursively( // Bound the depth of our recursive combine because this is ultimately // quadratic in nature. const unsigned MaxRecursionDepth = 8; - if (Depth > MaxRecursionDepth) + if (Depth >= MaxRecursionDepth) return SDValue(); // Directly rip through bitcasts to find the underlying operand. @@ -32667,11 +33046,18 @@ static SDValue combineX86ShufflesRecursively( "Can only combine shuffles of the same vector register size."); // Extract target shuffle mask and resolve sentinels and inputs. + // TODO - determine Op's demanded elts from RootMask. SmallVector OpMask; SmallVector OpInputs; - if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG)) + APInt OpUndef, OpZero; + APInt OpDemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + bool IsOpVariableMask = isTargetShuffleVariableMask(Op.getOpcode()); + if (!getTargetShuffleInputs(Op, OpDemandedElts, OpInputs, OpMask, OpUndef, + OpZero, DAG, Depth, false)) return SDValue(); + resolveTargetShuffleFromZeroables(OpMask, OpUndef, OpZero); + // Add the inputs to the Ops list, avoiding duplicates. SmallVector Ops(SrcOps.begin(), SrcOps.end()); @@ -32772,6 +33158,9 @@ static SDValue combineX86ShufflesRecursively( Mask[i] = OpMaskedIdx; } + // Remove unused/repeated shuffle source ops. + resolveTargetShuffleInputsAndMask(Ops, Mask); + // Handle the all undef/zero cases early. if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; })) return DAG.getUNDEF(Root.getValueType()); @@ -32783,11 +33172,8 @@ static SDValue combineX86ShufflesRecursively( return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, SDLoc(Root)); - // Remove unused/repeated shuffle source ops. - resolveTargetShuffleInputsAndMask(Ops, Mask); assert(!Ops.empty() && "Shuffle with no inputs detected"); - - HasVariableMask |= isTargetShuffleVariableMask(Op.getOpcode()); + HasVariableMask |= IsOpVariableMask; // Update the list of shuffle nodes that have been combined so far. SmallVector CombinedNodes(SrcNodes.begin(), @@ -32853,7 +33239,7 @@ static SDValue combineX86ShufflesRecursively( /// Helper entry wrapper to combineX86ShufflesRecursively. static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1, + return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 0, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget); } @@ -33088,7 +33474,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, for (unsigned i = 0; i != Scale; ++i) DemandedMask[i] = i; if (SDValue Res = combineX86ShufflesRecursively( - {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 1, + {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 0, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return DAG.getNode(X86ISD::VBROADCAST, DL, VT, DAG.getBitcast(SrcVT, Res)); @@ -33120,6 +33506,30 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, VT.getSizeInBits()); } + // vbroadcast(scalarload X) -> vbroadcast_load X + // For float loads, extract other uses of the scalar from the broadcast. + if (!SrcVT.isVector() && (Src.hasOneUse() || VT.isFloatingPoint()) && + ISD::isNormalLoad(Src.getNode())) { + LoadSDNode *LN = cast(Src); + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; + SDValue BcastLd = + DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, + LN->getMemoryVT(), LN->getMemOperand()); + // If the load value is used only by N, replace it via CombineTo N. + bool NoReplaceExtract = Src.hasOneUse(); + DCI.CombineTo(N.getNode(), BcastLd); + if (NoReplaceExtract) { + DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); + DCI.recursivelyDeleteUnusedNodes(LN); + } else { + SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT, BcastLd, + DAG.getIntPtrConstant(0, DL)); + DCI.CombineTo(LN, Scl, BcastLd.getValue(1)); + } + return N; // Return N so it doesn't get rechecked! + } + return SDValue(); } case X86ISD::BLENDI: { @@ -33133,14 +33543,14 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, MVT SrcVT = N0.getOperand(0).getSimpleValueType(); if ((VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && SrcVT.getScalarSizeInBits() >= 32) { - unsigned Mask = N.getConstantOperandVal(2); + unsigned BlendMask = N.getConstantOperandVal(2); unsigned Size = VT.getVectorNumElements(); unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits(); - unsigned ScaleMask = scaleVectorShuffleBlendMask(Mask, Size, Scale); + BlendMask = scaleVectorShuffleBlendMask(BlendMask, Size, Scale); return DAG.getBitcast( VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0), N1.getOperand(0), - DAG.getConstant(ScaleMask, DL, MVT::i8))); + DAG.getTargetConstant(BlendMask, DL, MVT::i8))); } } return SDValue(); @@ -33208,76 +33618,97 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, // If we zero out all elements from Op0 then we don't need to reference it. if (((ZeroMask | (1u << DstIdx)) == 0xF) && !Op0.isUndef()) return DAG.getNode(X86ISD::INSERTPS, DL, VT, DAG.getUNDEF(VT), Op1, - DAG.getConstant(InsertPSMask, DL, MVT::i8)); + DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); // If we zero out the element from Op1 then we don't need to reference it. if ((ZeroMask & (1u << DstIdx)) && !Op1.isUndef()) return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), - DAG.getConstant(InsertPSMask, DL, MVT::i8)); + DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); // Attempt to merge insertps Op1 with an inner target shuffle node. SmallVector TargetMask1; SmallVector Ops1; - if (setTargetShuffleZeroElements(Op1, TargetMask1, Ops1)) { - int M = TargetMask1[SrcIdx]; - if (isUndefOrZero(M)) { + APInt KnownUndef1, KnownZero1; + if (getTargetShuffleAndZeroables(Op1, TargetMask1, Ops1, KnownUndef1, + KnownZero1)) { + if (KnownUndef1[SrcIdx] || KnownZero1[SrcIdx]) { // Zero/UNDEF insertion - zero out element and remove dependency. InsertPSMask |= (1u << DstIdx); return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), - DAG.getConstant(InsertPSMask, DL, MVT::i8)); + DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); } // Update insertps mask srcidx and reference the source input directly. + int M = TargetMask1[SrcIdx]; assert(0 <= M && M < 8 && "Shuffle index out of range"); InsertPSMask = (InsertPSMask & 0x3f) | ((M & 0x3) << 6); Op1 = Ops1[M < 4 ? 0 : 1]; return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, - DAG.getConstant(InsertPSMask, DL, MVT::i8)); + DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); } // Attempt to merge insertps Op0 with an inner target shuffle node. SmallVector TargetMask0; SmallVector Ops0; - if (!setTargetShuffleZeroElements(Op0, TargetMask0, Ops0)) - return SDValue(); + APInt KnownUndef0, KnownZero0; + if (getTargetShuffleAndZeroables(Op0, TargetMask0, Ops0, KnownUndef0, + KnownZero0)) { + bool Updated = false; + bool UseInput00 = false; + bool UseInput01 = false; + for (int i = 0; i != 4; ++i) { + if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) { + // No change if element is already zero or the inserted element. + continue; + } else if (KnownUndef0[i] || KnownZero0[i]) { + // If the target mask is undef/zero then we must zero the element. + InsertPSMask |= (1u << i); + Updated = true; + continue; + } - bool Updated = false; - bool UseInput00 = false; - bool UseInput01 = false; - for (int i = 0; i != 4; ++i) { - int M = TargetMask0[i]; - if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) { - // No change if element is already zero or the inserted element. - continue; - } else if (isUndefOrZero(M)) { - // If the target mask is undef/zero then we must zero the element. - InsertPSMask |= (1u << i); - Updated = true; - continue; + // The input vector element must be inline. + int M = TargetMask0[i]; + if (M != i && M != (i + 4)) + return SDValue(); + + // Determine which inputs of the target shuffle we're using. + UseInput00 |= (0 <= M && M < 4); + UseInput01 |= (4 <= M); } - // The input vector element must be inline. - if (M != i && M != (i + 4)) - return SDValue(); + // If we're not using both inputs of the target shuffle then use the + // referenced input directly. + if (UseInput00 && !UseInput01) { + Updated = true; + Op0 = Ops0[0]; + } else if (!UseInput00 && UseInput01) { + Updated = true; + Op0 = Ops0[1]; + } - // Determine which inputs of the target shuffle we're using. - UseInput00 |= (0 <= M && M < 4); - UseInput01 |= (4 <= M); + if (Updated) + return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, + DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); } - // If we're not using both inputs of the target shuffle then use the - // referenced input directly. - if (UseInput00 && !UseInput01) { - Updated = true; - Op0 = Ops0[0]; - } else if (!UseInput00 && UseInput01) { - Updated = true; - Op0 = Ops0[1]; + // If we're inserting an element from a vbroadcast load, fold the + // load into the X86insertps instruction. We need to convert the scalar + // load to a vector and clear the source lane of the INSERTPS control. + if (Op1.getOpcode() == X86ISD::VBROADCAST_LOAD && Op1.hasOneUse()) { + auto *MemIntr = cast(Op1); + if (MemIntr->getMemoryVT().getScalarSizeInBits() == 32) { + SDValue Load = DAG.getLoad(MVT::f32, DL, MemIntr->getChain(), + MemIntr->getBasePtr(), + MemIntr->getMemOperand()); + SDValue Insert = DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, + DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, + Load), + DAG.getTargetConstant(InsertPSMask & 0x3f, DL, MVT::i8)); + DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1)); + return Insert; + } } - if (Updated) - return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, - DAG.getConstant(InsertPSMask, DL, MVT::i8)); - return SDValue(); } default: @@ -33580,7 +34011,7 @@ static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG, } /// Eliminate a redundant shuffle of a horizontal math op. -static SDValue foldShuffleOfHorizOp(SDNode *N) { +static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) { unsigned Opcode = N->getOpcode(); if (Opcode != X86ISD::MOVDDUP && Opcode != X86ISD::VBROADCAST) if (Opcode != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef()) @@ -33611,17 +34042,36 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) { HOp.getOperand(0) != HOp.getOperand(1)) return SDValue(); + // The shuffle that we are eliminating may have allowed the horizontal op to + // have an undemanded (undefined) operand. Duplicate the other (defined) + // operand to ensure that the results are defined across all lanes without the + // shuffle. + auto updateHOp = [](SDValue HorizOp, SelectionDAG &DAG) { + SDValue X; + if (HorizOp.getOperand(0).isUndef()) { + assert(!HorizOp.getOperand(1).isUndef() && "Not expecting foldable h-op"); + X = HorizOp.getOperand(1); + } else if (HorizOp.getOperand(1).isUndef()) { + assert(!HorizOp.getOperand(0).isUndef() && "Not expecting foldable h-op"); + X = HorizOp.getOperand(0); + } else { + return HorizOp; + } + return DAG.getNode(HorizOp.getOpcode(), SDLoc(HorizOp), + HorizOp.getValueType(), X, X); + }; + // When the operands of a horizontal math op are identical, the low half of // the result is the same as the high half. If a target shuffle is also - // replicating low and high halves, we don't need the shuffle. + // replicating low and high halves (and without changing the type/length of + // the vector), we don't need the shuffle. if (Opcode == X86ISD::MOVDDUP || Opcode == X86ISD::VBROADCAST) { - if (HOp.getScalarValueSizeInBits() == 64) { + if (HOp.getScalarValueSizeInBits() == 64 && HOp.getValueType() == VT) { // movddup (hadd X, X) --> hadd X, X // broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X assert((HOp.getValueType() == MVT::v2f64 || - HOp.getValueType() == MVT::v4f64) && HOp.getValueType() == VT && - "Unexpected type for h-op"); - return HOp; + HOp.getValueType() == MVT::v4f64) && "Unexpected type for h-op"); + return updateHOp(HOp, DAG); } return SDValue(); } @@ -33635,14 +34085,14 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) { (isTargetShuffleEquivalent(Mask, {0, 0}) || isTargetShuffleEquivalent(Mask, {0, 1, 0, 1}) || isTargetShuffleEquivalent(Mask, {0, 1, 2, 3, 0, 1, 2, 3}))) - return HOp; + return updateHOp(HOp, DAG); if (HOp.getValueSizeInBits() == 256 && (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2}) || isTargetShuffleEquivalent(Mask, {0, 1, 0, 1, 4, 5, 4, 5}) || isTargetShuffleEquivalent( Mask, {0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11}))) - return HOp; + return updateHOp(HOp, DAG); return SDValue(); } @@ -33677,7 +34127,7 @@ static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) { // the wide shuffle that we started with. return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0), Shuf->getOperand(1), HalfMask, HalfIdx1, - HalfIdx2, false, DAG); + HalfIdx2, false, DAG, /*UseConcat*/true); } static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, @@ -33696,70 +34146,10 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG)) return AddSub; - if (SDValue HAddSub = foldShuffleOfHorizOp(N)) + if (SDValue HAddSub = foldShuffleOfHorizOp(N, DAG)) return HAddSub; } - // During Type Legalization, when promoting illegal vector types, - // the backend might introduce new shuffle dag nodes and bitcasts. - // - // This code performs the following transformation: - // fold: (shuffle (bitcast (BINOP A, B)), Undef, ) -> - // (shuffle (BINOP (bitcast A), (bitcast B)), Undef, ) - // - // We do this only if both the bitcast and the BINOP dag nodes have - // one use. Also, perform this transformation only if the new binary - // operation is legal. This is to avoid introducing dag nodes that - // potentially need to be further expanded (or custom lowered) into a - // less optimal sequence of dag nodes. - if (!DCI.isBeforeLegalize() && DCI.isBeforeLegalizeOps() && - N->getOpcode() == ISD::VECTOR_SHUFFLE && - N->getOperand(0).getOpcode() == ISD::BITCAST && - N->getOperand(1).isUndef() && N->getOperand(0).hasOneUse()) { - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - - SDValue BC0 = N0.getOperand(0); - EVT SVT = BC0.getValueType(); - unsigned Opcode = BC0.getOpcode(); - unsigned NumElts = VT.getVectorNumElements(); - - if (BC0.hasOneUse() && SVT.isVector() && - SVT.getVectorNumElements() * 2 == NumElts && - TLI.isOperationLegal(Opcode, VT)) { - bool CanFold = false; - switch (Opcode) { - default : break; - case ISD::ADD: - case ISD::SUB: - case ISD::MUL: - // isOperationLegal lies for integer ops on floating point types. - CanFold = VT.isInteger(); - break; - case ISD::FADD: - case ISD::FSUB: - case ISD::FMUL: - // isOperationLegal lies for floating point ops on integer types. - CanFold = VT.isFloatingPoint(); - break; - } - - unsigned SVTNumElts = SVT.getVectorNumElements(); - ShuffleVectorSDNode *SVOp = cast(N); - for (unsigned i = 0, e = SVTNumElts; i != e && CanFold; ++i) - CanFold = SVOp->getMaskElt(i) == (int)(i * 2); - for (unsigned i = SVTNumElts, e = NumElts; i != e && CanFold; ++i) - CanFold = SVOp->getMaskElt(i) < 0; - - if (CanFold) { - SDValue BC00 = DAG.getBitcast(VT, BC0.getOperand(0)); - SDValue BC01 = DAG.getBitcast(VT, BC0.getOperand(1)); - SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01); - return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, SVOp->getMask()); - } - } - } - // Attempt to combine into a vector load/broadcast. if (SDValue LD = combineToConsecutiveLoads(VT, N, dl, DAG, Subtarget, true)) return LD; @@ -33841,7 +34231,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, if (N->getOpcode() == X86ISD::VZEXT_MOVL && N->getOperand(0).hasOneUse() && ISD::isNormalLoad(N->getOperand(0).getNode())) { LoadSDNode *LN = cast(N->getOperand(0)); - if (!LN->isVolatile()) { + if (LN->isSimple()) { SDVTList Tys = DAG.getVTList(VT, MVT::Other); SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; SDValue VZLoad = @@ -33855,53 +34245,6 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, } } - - // Look for a truncating shuffle to v2i32 of a PMULUDQ where one of the - // operands is an extend from v2i32 to v2i64. Turn it into a pmulld. - // FIXME: This can probably go away once we default to widening legalization. - if (Subtarget.hasSSE41() && VT == MVT::v4i32 && - N->getOpcode() == ISD::VECTOR_SHUFFLE && - N->getOperand(0).getOpcode() == ISD::BITCAST && - N->getOperand(0).getOperand(0).getOpcode() == X86ISD::PMULUDQ) { - SDValue BC = N->getOperand(0); - SDValue MULUDQ = BC.getOperand(0); - ShuffleVectorSDNode *SVOp = cast(N); - ArrayRef Mask = SVOp->getMask(); - if (BC.hasOneUse() && MULUDQ.hasOneUse() && - Mask[0] == 0 && Mask[1] == 2 && Mask[2] == -1 && Mask[3] == -1) { - SDValue Op0 = MULUDQ.getOperand(0); - SDValue Op1 = MULUDQ.getOperand(1); - if (Op0.getOpcode() == ISD::BITCAST && - Op0.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE && - Op0.getOperand(0).getValueType() == MVT::v4i32) { - ShuffleVectorSDNode *SVOp0 = - cast(Op0.getOperand(0)); - ArrayRef Mask2 = SVOp0->getMask(); - if (Mask2[0] == 0 && Mask2[1] == -1 && - Mask2[2] == 1 && Mask2[3] == -1) { - Op0 = SVOp0->getOperand(0); - Op1 = DAG.getBitcast(MVT::v4i32, Op1); - Op1 = DAG.getVectorShuffle(MVT::v4i32, dl, Op1, Op1, Mask); - return DAG.getNode(ISD::MUL, dl, MVT::v4i32, Op0, Op1); - } - } - if (Op1.getOpcode() == ISD::BITCAST && - Op1.getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE && - Op1.getOperand(0).getValueType() == MVT::v4i32) { - ShuffleVectorSDNode *SVOp1 = - cast(Op1.getOperand(0)); - ArrayRef Mask2 = SVOp1->getMask(); - if (Mask2[0] == 0 && Mask2[1] == -1 && - Mask2[2] == 1 && Mask2[3] == -1) { - Op0 = DAG.getBitcast(MVT::v4i32, Op0); - Op0 = DAG.getVectorShuffle(MVT::v4i32, dl, Op0, Op0, Mask); - Op1 = SVOp1->getOperand(0); - return DAG.getNode(ISD::MUL, dl, MVT::v4i32, Op0, Op1); - } - } - } - } - return SDValue(); } @@ -33966,6 +34309,84 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( // TODO convert SrcUndef to KnownUndef. break; } + case X86ISD::KSHIFTL: { + SDValue Src = Op.getOperand(0); + auto *Amt = cast(Op.getOperand(1)); + assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount"); + unsigned ShiftAmt = Amt->getZExtValue(); + + if (ShiftAmt == 0) + return TLO.CombineTo(Op, Src); + + // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a + // single shift. We can do this if the bottom bits (which are shifted + // out) are never demanded. + if (Src.getOpcode() == X86ISD::KSHIFTR) { + if (!DemandedElts.intersects(APInt::getLowBitsSet(NumElts, ShiftAmt))) { + unsigned C1 = Src.getConstantOperandVal(1); + unsigned NewOpc = X86ISD::KSHIFTL; + int Diff = ShiftAmt - C1; + if (Diff < 0) { + Diff = -Diff; + NewOpc = X86ISD::KSHIFTR; + } + + SDLoc dl(Op); + SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8); + return TLO.CombineTo( + Op, TLO.DAG.getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA)); + } + } + + APInt DemandedSrc = DemandedElts.lshr(ShiftAmt); + if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO, + Depth + 1)) + return true; + + KnownUndef <<= ShiftAmt; + KnownZero <<= ShiftAmt; + KnownZero.setLowBits(ShiftAmt); + break; + } + case X86ISD::KSHIFTR: { + SDValue Src = Op.getOperand(0); + auto *Amt = cast(Op.getOperand(1)); + assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount"); + unsigned ShiftAmt = Amt->getZExtValue(); + + if (ShiftAmt == 0) + return TLO.CombineTo(Op, Src); + + // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a + // single shift. We can do this if the top bits (which are shifted + // out) are never demanded. + if (Src.getOpcode() == X86ISD::KSHIFTL) { + if (!DemandedElts.intersects(APInt::getHighBitsSet(NumElts, ShiftAmt))) { + unsigned C1 = Src.getConstantOperandVal(1); + unsigned NewOpc = X86ISD::KSHIFTR; + int Diff = ShiftAmt - C1; + if (Diff < 0) { + Diff = -Diff; + NewOpc = X86ISD::KSHIFTL; + } + + SDLoc dl(Op); + SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8); + return TLO.CombineTo( + Op, TLO.DAG.getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA)); + } + } + + APInt DemandedSrc = DemandedElts.shl(ShiftAmt); + if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO, + Depth + 1)) + return true; + + KnownUndef.lshrInPlace(ShiftAmt); + KnownZero.lshrInPlace(ShiftAmt); + KnownZero.setHighBits(ShiftAmt); + break; + } case X86ISD::CVTSI2P: case X86ISD::CVTUI2P: { SDValue Src = Op.getOperand(0); @@ -33979,16 +34400,36 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( } case X86ISD::PACKSS: case X86ISD::PACKUS: { + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + APInt DemandedLHS, DemandedRHS; getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS); APInt SrcUndef, SrcZero; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, SrcUndef, - SrcZero, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(N0, DemandedLHS, SrcUndef, SrcZero, TLO, + Depth + 1)) return true; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, SrcUndef, - SrcZero, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(N1, DemandedRHS, SrcUndef, SrcZero, TLO, + Depth + 1)) return true; + + // Aggressively peek through ops to get at the demanded elts. + // TODO - we should do this for all target/faux shuffles ops. + if (!DemandedElts.isAllOnesValue()) { + APInt DemandedSrcBits = + APInt::getAllOnesValue(N0.getScalarValueSizeInBits()); + SDValue NewN0 = SimplifyMultipleUseDemandedBits( + N0, DemandedSrcBits, DemandedLHS, TLO.DAG, Depth + 1); + SDValue NewN1 = SimplifyMultipleUseDemandedBits( + N1, DemandedSrcBits, DemandedRHS, TLO.DAG, Depth + 1); + if (NewN0 || NewN1) { + NewN0 = NewN0 ? NewN0 : N0; + NewN1 = NewN1 ? NewN1 : N1; + return TLO.CombineTo(Op, + TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewN0, NewN1)); + } + } break; } case X86ISD::HADD: @@ -34062,25 +34503,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( return true; break; } - case X86ISD::SUBV_BROADCAST: { - // Reduce size of broadcast if we don't need the upper half. - unsigned HalfElts = NumElts / 2; - if (DemandedElts.extractBits(HalfElts, HalfElts).isNullValue()) { - SDValue Src = Op.getOperand(0); - MVT SrcVT = Src.getSimpleValueType(); - - SDValue Half = Src; - if (SrcVT.getVectorNumElements() != HalfElts) { - MVT HalfVT = MVT::getVectorVT(SrcVT.getScalarType(), HalfElts); - Half = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, SDLoc(Op), HalfVT, Src); - } - - return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Half, 0, - TLO.DAG, SDLoc(Op), - Half.getValueSizeInBits())); - } - break; - } case X86ISD::VPERMV: { SDValue Mask = Op.getOperand(0); APInt MaskUndef, MaskZero; @@ -34134,6 +34556,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( SDValue Insert = insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); return TLO.CombineTo(Op, Insert); + } + // Subvector broadcast. + case X86ISD::SUBV_BROADCAST: { + SDLoc DL(Op); + SDValue Src = Op.getOperand(0); + if (Src.getValueSizeInBits() > ExtSizeInBits) + Src = extractSubVector(Src, 0, TLO.DAG, DL, ExtSizeInBits); + else if (Src.getValueSizeInBits() < ExtSizeInBits) { + MVT SrcSVT = Src.getSimpleValueType().getScalarType(); + MVT SrcVT = + MVT::getVectorVT(SrcSVT, ExtSizeInBits / SrcSVT.getSizeInBits()); + Src = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, DL, SrcVT, Src); + } + return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Src, 0, + TLO.DAG, DL, ExtSizeInBits)); } // Byte shifts by immediate. case X86ISD::VSHLDQ: @@ -34201,36 +34638,30 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( } } - // Simplify target shuffles. - if (!isTargetShuffle(Opc) || !VT.isSimple()) - return false; - - // Get target shuffle mask. - bool IsUnary; + // Get target/faux shuffle mask. + APInt OpUndef, OpZero; SmallVector OpMask; SmallVector OpInputs; - if (!getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, OpInputs, - OpMask, IsUnary)) + if (!getTargetShuffleInputs(Op, DemandedElts, OpInputs, OpMask, OpUndef, + OpZero, TLO.DAG, Depth, false)) return false; - // Shuffle inputs must be the same type as the result. - if (llvm::any_of(OpInputs, - [VT](SDValue V) { return VT != V.getValueType(); })) + // Shuffle inputs must be the same size as the result. + if (OpMask.size() != (unsigned)NumElts || + llvm::any_of(OpInputs, [VT](SDValue V) { + return VT.getSizeInBits() != V.getValueSizeInBits() || + !V.getValueType().isVector(); + })) return false; - // Clear known elts that might have been set above. - KnownZero.clearAllBits(); - KnownUndef.clearAllBits(); + KnownZero = OpZero; + KnownUndef = OpUndef; // Check if shuffle mask can be simplified to undef/zero/identity. int NumSrcs = OpInputs.size(); - for (int i = 0; i != NumElts; ++i) { - int &M = OpMask[i]; + for (int i = 0; i != NumElts; ++i) if (!DemandedElts[i]) - M = SM_SentinelUndef; - else if (0 <= M && OpInputs[M / NumElts].isUndef()) - M = SM_SentinelUndef; - } + OpMask[i] = SM_SentinelUndef; if (isUndefInRange(OpMask, 0, NumElts)) { KnownUndef.setAllBits(); @@ -34243,10 +34674,14 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( } for (int Src = 0; Src != NumSrcs; ++Src) if (isSequentialOrUndefInRange(OpMask, 0, NumElts, Src * NumElts)) - return TLO.CombineTo(Op, OpInputs[Src]); + return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, OpInputs[Src])); // Attempt to simplify inputs. for (int Src = 0; Src != NumSrcs; ++Src) { + // TODO: Support inputs of different types. + if (OpInputs[Src].getValueType() != VT) + continue; + int Lo = Src * NumElts; APInt SrcElts = APInt::getNullValue(NumElts); for (int i = 0; i != NumElts; ++i) @@ -34256,21 +34691,13 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( SrcElts.setBit(M); } + // TODO - Propagate input undef/zero elts. APInt SrcUndef, SrcZero; if (SimplifyDemandedVectorElts(OpInputs[Src], SrcElts, SrcUndef, SrcZero, TLO, Depth + 1)) return true; } - // Extract known zero/undef elements. - // TODO - Propagate input undef/zero elts. - for (int i = 0; i != NumElts; ++i) { - if (OpMask[i] == SM_SentinelUndef) - KnownUndef.setBit(i); - if (OpMask[i] == SM_SentinelZero) - KnownZero.setBit(i); - } - return false; } @@ -34296,6 +34723,18 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( if (SimplifyDemandedBits(RHS, DemandedMask, OriginalDemandedElts, KnownOp, TLO, Depth + 1)) return true; + + // Aggressively peek through ops to get at the demanded low bits. + SDValue DemandedLHS = SimplifyMultipleUseDemandedBits( + LHS, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedRHS = SimplifyMultipleUseDemandedBits( + RHS, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1); + if (DemandedLHS || DemandedRHS) { + DemandedLHS = DemandedLHS ? DemandedLHS : LHS; + DemandedRHS = DemandedRHS ? DemandedRHS : RHS; + return TLO.CombineTo( + Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, DemandedLHS, DemandedRHS)); + } break; } case X86ISD::VSHLI: { @@ -34323,7 +34762,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( unsigned NewOpc = Diff < 0 ? X86ISD::VSRLI : X86ISD::VSHLI; SDValue NewShift = TLO.DAG.getNode( NewOpc, SDLoc(Op), VT, Op0.getOperand(0), - TLO.DAG.getConstant(std::abs(Diff), SDLoc(Op), MVT::i8)); + TLO.DAG.getTargetConstant(std::abs(Diff), SDLoc(Op), MVT::i8)); return TLO.CombineTo(Op, NewShift); } } @@ -34441,6 +34880,11 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( KnownVec, TLO, Depth + 1)) return true; + if (SDValue V = SimplifyMultipleUseDemandedBits( + Vec, DemandedVecBits, DemandedVecElts, TLO.DAG, Depth + 1)) + return TLO.CombineTo( + Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, V, Op.getOperand(1))); + Known = KnownVec.zext(BitWidth, true); return false; } @@ -34542,12 +34986,80 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth); } +SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + SelectionDAG &DAG, unsigned Depth) const { + int NumElts = DemandedElts.getBitWidth(); + unsigned Opc = Op.getOpcode(); + EVT VT = Op.getValueType(); + + switch (Opc) { + case X86ISD::PINSRB: + case X86ISD::PINSRW: { + // If we don't demand the inserted element, return the base vector. + SDValue Vec = Op.getOperand(0); + auto *CIdx = dyn_cast(Op.getOperand(2)); + MVT VecVT = Vec.getSimpleValueType(); + if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) && + !DemandedElts[CIdx->getZExtValue()]) + return Vec; + break; + } + } + + APInt ShuffleUndef, ShuffleZero; + SmallVector ShuffleMask; + SmallVector ShuffleOps; + if (getTargetShuffleInputs(Op, DemandedElts, ShuffleOps, ShuffleMask, + ShuffleUndef, ShuffleZero, DAG, Depth, false)) { + // If all the demanded elts are from one operand and are inline, + // then we can use the operand directly. + int NumOps = ShuffleOps.size(); + if (ShuffleMask.size() == (unsigned)NumElts && + llvm::all_of(ShuffleOps, [VT](SDValue V) { + return VT.getSizeInBits() == V.getValueSizeInBits(); + })) { + + if (DemandedElts.isSubsetOf(ShuffleUndef)) + return DAG.getUNDEF(VT); + if (DemandedElts.isSubsetOf(ShuffleUndef | ShuffleZero)) + return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(Op)); + + // Bitmask that indicates which ops have only been accessed 'inline'. + APInt IdentityOp = APInt::getAllOnesValue(NumOps); + for (int i = 0; i != NumElts; ++i) { + int M = ShuffleMask[i]; + if (!DemandedElts[i] || ShuffleUndef[i]) + continue; + int Op = M / NumElts; + int Index = M % NumElts; + if (M < 0 || Index != i) { + IdentityOp.clearAllBits(); + break; + } + IdentityOp &= APInt::getOneBitSet(NumOps, Op); + if (IdentityOp == 0) + break; + } + assert((IdentityOp == 0 || IdentityOp.countPopulation() == 1) && + "Multiple identity shuffles detected"); + + if (IdentityOp != 0) + return DAG.getBitcast(VT, ShuffleOps[IdentityOp.countTrailingZeros()]); + } + } + + return TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( + Op, DemandedBits, DemandedElts, DAG, Depth); +} + /// Check if a vector extract from a target-specific shuffle of a load can be /// folded into a single element load. /// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but /// shuffles have been custom lowered so we need to handle those here. -static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { +static SDValue +XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -34559,13 +35071,17 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, return SDValue(); EVT OriginalVT = InVec.getValueType(); + unsigned NumOriginalElts = OriginalVT.getVectorNumElements(); // Peek through bitcasts, don't duplicate a load with other uses. InVec = peekThroughOneUseBitcasts(InVec); EVT CurrentVT = InVec.getValueType(); - if (!CurrentVT.isVector() || - CurrentVT.getVectorNumElements() != OriginalVT.getVectorNumElements()) + if (!CurrentVT.isVector()) + return SDValue(); + + unsigned NumCurrentElts = CurrentVT.getVectorNumElements(); + if ((NumOriginalElts % NumCurrentElts) != 0) return SDValue(); if (!isTargetShuffle(InVec.getOpcode())) @@ -34582,10 +35098,17 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, ShuffleOps, ShuffleMask, UnaryShuffle)) return SDValue(); + unsigned Scale = NumOriginalElts / NumCurrentElts; + if (Scale > 1) { + SmallVector ScaledMask; + scaleShuffleMask(Scale, ShuffleMask, ScaledMask); + ShuffleMask = std::move(ScaledMask); + } + assert(ShuffleMask.size() == NumOriginalElts && "Shuffle mask size mismatch"); + // Select the input vector, guarding against out of range extract vector. - unsigned NumElems = CurrentVT.getVectorNumElements(); int Elt = cast(EltNo)->getZExtValue(); - int Idx = (Elt > (int)NumElems) ? SM_SentinelUndef : ShuffleMask[Elt]; + int Idx = (Elt > (int)NumOriginalElts) ? SM_SentinelUndef : ShuffleMask[Elt]; if (Idx == SM_SentinelZero) return EltVT.isInteger() ? DAG.getConstant(0, SDLoc(N), EltVT) @@ -34598,8 +35121,9 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, if (llvm::any_of(ShuffleMask, [](int M) { return M == SM_SentinelZero; })) return SDValue(); - assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range"); - SDValue LdNode = (Idx < (int)NumElems) ? ShuffleOps[0] : ShuffleOps[1]; + assert(0 <= Idx && Idx < (int)(2 * NumOriginalElts) && + "Shuffle index out of range"); + SDValue LdNode = (Idx < (int)NumOriginalElts) ? ShuffleOps[0] : ShuffleOps[1]; // If inputs to shuffle are the same for both ops, then allow 2 uses unsigned AllowedUses = @@ -34619,7 +35143,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, LoadSDNode *LN0 = cast(LdNode); - if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile()) + if (!LN0 || !LN0->hasNUsesOfValue(AllowedUses, 0) || !LN0->isSimple()) return SDValue(); // If there's a bitcast before the shuffle, check if the load type and @@ -34637,10 +35161,11 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, SDLoc dl(N); // Create shuffle node taking into account the case that its a unary shuffle - SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(CurrentVT) : ShuffleOps[1]; - Shuffle = DAG.getVectorShuffle(CurrentVT, dl, ShuffleOps[0], Shuffle, - ShuffleMask); - Shuffle = DAG.getBitcast(OriginalVT, Shuffle); + SDValue Shuffle = UnaryShuffle ? DAG.getUNDEF(OriginalVT) + : DAG.getBitcast(OriginalVT, ShuffleOps[1]); + Shuffle = DAG.getVectorShuffle(OriginalVT, dl, + DAG.getBitcast(OriginalVT, ShuffleOps[0]), + Shuffle, ShuffleMask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle, EltNo); } @@ -34660,6 +35185,23 @@ static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size) { return false; } +// Helper to push sign extension of vXi1 SETCC result through bitops. +static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, + SDValue Src, const SDLoc &DL) { + switch (Src.getOpcode()) { + case ISD::SETCC: + return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); + case ISD::AND: + case ISD::XOR: + case ISD::OR: + return DAG.getNode( + Src.getOpcode(), DL, SExtVT, + signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL), + signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL)); + } + llvm_unreachable("Unexpected node type for vXi1 sign extension"); +} + // Try to match patterns such as // (i16 bitcast (v16i1 x)) // -> @@ -34698,6 +35240,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, // For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as: // (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef) MVT SExtVT; + bool PropagateSExt = false; switch (SrcVT.getSimpleVT().SimpleTy) { default: return SDValue(); @@ -34708,8 +35251,10 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, SExtVT = MVT::v4i32; // For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2)) // sign-extend to a 256-bit operation to avoid truncation. - if (Subtarget.hasAVX() && checkBitcastSrcVectorSize(Src, 256)) + if (Subtarget.hasAVX() && checkBitcastSrcVectorSize(Src, 256)) { SExtVT = MVT::v4i64; + PropagateSExt = true; + } break; case MVT::v8i1: SExtVT = MVT::v8i16; @@ -34718,11 +35263,10 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, // If the setcc operand is 128-bit, prefer sign-extending to 128-bit over // 256-bit because the shuffle is cheaper than sign extending the result of // the compare. - // TODO : use checkBitcastSrcVectorSize - if (Src.getOpcode() == ISD::SETCC && Subtarget.hasAVX() && - (Src.getOperand(0).getValueType().is256BitVector() || - Src.getOperand(0).getValueType().is512BitVector())) { + if (Subtarget.hasAVX() && (checkBitcastSrcVectorSize(Src, 256) || + checkBitcastSrcVectorSize(Src, 512))) { SExtVT = MVT::v8i32; + PropagateSExt = true; } break; case MVT::v16i1: @@ -34745,19 +35289,10 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, return SDValue(); }; - SDValue V = DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); + SDValue V = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL) + : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); - if (SExtVT == MVT::v64i8) { - SDValue Lo, Hi; - std::tie(Lo, Hi) = DAG.SplitVector(V, DL); - Lo = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Lo); - Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Lo); - Hi = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Hi); - Hi = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Hi); - Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi, - DAG.getConstant(32, DL, MVT::i8)); - V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi); - } else if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8) { + if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8 || SExtVT == MVT::v64i8) { V = getPMOVMSKB(DL, V, DAG, Subtarget); } else { if (SExtVT == MVT::v8i16) @@ -34891,8 +35426,8 @@ static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG, unsigned ShufMask = (NumElts > 2 ? 0 : 0x44); return DAG.getNode( ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx, - DAG.getConstant(Intrinsic::x86_sse_pshuf_w, DL, MVT::i32), Splat, - DAG.getConstant(ShufMask, DL, MVT::i8)); + DAG.getTargetConstant(Intrinsic::x86_sse_pshuf_w, DL, MVT::i32), + Splat, DAG.getTargetConstant(ShufMask, DL, MVT::i8)); } Ops.append(NumElts, Splat); } else { @@ -34935,6 +35470,24 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineBitcastvxi1(DAG, VT, N0, dl, Subtarget)) return V; + // Recognize the IR pattern for the movmsk intrinsic under SSE1 befoer type + // legalization destroys the v4i32 type. + if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && SrcVT == MVT::v4i1 && + VT.isScalarInteger() && N0.getOpcode() == ISD::SETCC && + N0.getOperand(0).getValueType() == MVT::v4i32 && + ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()) && + cast(N0.getOperand(2))->get() == ISD::SETLT) { + SDValue N00 = N0.getOperand(0); + // Only do this if we can avoid scalarizing the input. + if (ISD::isNormalLoad(N00.getNode()) || + (N00.getOpcode() == ISD::BITCAST && + N00.getOperand(0).getValueType() == MVT::v4f32)) { + SDValue V = DAG.getNode(X86ISD::MOVMSK, dl, MVT::i32, + DAG.getBitcast(MVT::v4f32, N00)); + return DAG.getZExtOrTrunc(V, dl, VT); + } + } + // If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer // type, widen both sides to avoid a trip through memory. if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() && @@ -34949,6 +35502,26 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, // type, widen both sides to avoid a trip through memory. if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() && Subtarget.hasAVX512()) { + // Use zeros for the widening if we already have some zeroes. This can + // allow SimplifyDemandedBits to remove scalar ANDs that may be down + // stream of this. + // FIXME: It might make sense to detect a concat_vectors with a mix of + // zeroes and undef and turn it into insert_subvector for i1 vectors as + // a separate combine. What we can't do is canonicalize the operands of + // such a concat or we'll get into a loop with SimplifyDemandedBits. + if (N0.getOpcode() == ISD::CONCAT_VECTORS) { + SDValue LastOp = N0.getOperand(N0.getNumOperands() - 1); + if (ISD::isBuildVectorAllZeros(LastOp.getNode())) { + SrcVT = LastOp.getValueType(); + unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); + SmallVector Ops(N0->op_begin(), N0->op_end()); + Ops.resize(NumConcats, DAG.getConstant(0, dl, SrcVT)); + N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); + N0 = DAG.getBitcast(MVT::i8, N0); + return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); + } + } + unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); SmallVector Ops(NumConcats, DAG.getUNDEF(SrcVT)); Ops[0] = N0; @@ -34958,6 +35531,33 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, } } + // Look for (i8 (bitcast (v8i1 (extract_subvector (v16i1 X), 0)))) and + // replace with (i8 (trunc (i16 (bitcast (v16i1 X))))). This can occur + // due to insert_subvector legalization on KNL. By promoting the copy to i16 + // we can help with known bits propagation from the vXi1 domain to the + // scalar domain. + if (VT == MVT::i8 && SrcVT == MVT::v8i1 && Subtarget.hasAVX512() && + !Subtarget.hasDQI() && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N0.getOperand(0).getValueType() == MVT::v16i1 && + isNullConstant(N0.getOperand(1))) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, + DAG.getBitcast(MVT::i16, N0.getOperand(0))); + + // Combine (bitcast (vbroadcast_load)) -> (vbroadcast_load). The memory VT + // determines // the number of bits loaded. Remaining bits are zero. + if (N0.getOpcode() == X86ISD::VBROADCAST_LOAD && N0.hasOneUse() && + VT.getScalarSizeInBits() == SrcVT.getScalarSizeInBits()) { + auto *BCast = cast(N0); + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + SDValue Ops[] = { BCast->getChain(), BCast->getBasePtr() }; + SDValue ResNode = + DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, SDLoc(N), Tys, Ops, + VT.getVectorElementType(), + BCast->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(SDValue(BCast, 1), ResNode.getValue(1)); + return ResNode; + } + // Since MMX types are special and don't usually play with other vector types, // it's better to handle them early to be sure we emit efficient code by // avoiding store-load conversions. @@ -35152,7 +35752,7 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG, // Check for SMAX/SMIN/UMAX/UMIN horizontal reduction patterns. ISD::NodeType BinOp; SDValue Src = DAG.matchBinOpReduction( - Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN}); + Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN}, true); if (!Src) return SDValue(); @@ -35246,29 +35846,31 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract, SDLoc DL(Extract); EVT MatchVT = Match.getValueType(); unsigned NumElts = MatchVT.getVectorNumElements(); + unsigned MaxElts = Subtarget.hasInt256() ? 32 : 16; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (ExtractVT == MVT::i1) { // Special case for (pre-legalization) vXi1 reductions. - if (NumElts > 32) + if (NumElts > 64 || !isPowerOf2_32(NumElts)) return SDValue(); - if (DAG.getTargetLoweringInfo().isTypeLegal(MatchVT)) { + if (TLI.isTypeLegal(MatchVT)) { // If this is a legal AVX512 predicate type then we can just bitcast. EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); Movmsk = DAG.getBitcast(MovmskVT, Match); } else { // Use combineBitcastvxi1 to create the MOVMSK. - if (NumElts == 32 && !Subtarget.hasInt256()) { + while (NumElts > MaxElts) { SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitVector(Match, DL); Match = DAG.getNode(BinOp, DL, Lo.getValueType(), Lo, Hi); - NumElts = 16; + NumElts /= 2; } EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); Movmsk = combineBitcastvxi1(DAG, MovmskVT, Match, DL, Subtarget); } if (!Movmsk) return SDValue(); - Movmsk = DAG.getZExtOrTrunc(Movmsk, DL, MVT::i32); + Movmsk = DAG.getZExtOrTrunc(Movmsk, DL, NumElts > 32 ? MVT::i64 : MVT::i32); } else { // Bail with AVX512VL (which uses predicate registers). if (Subtarget.hasVLX()) @@ -35309,13 +35911,15 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract, Movmsk = getPMOVMSKB(DL, BitcastLogicOp, DAG, Subtarget); NumElts = MaskSrcVT.getVectorNumElements(); } - assert(NumElts <= 32 && "Not expecting more than 32 elements"); + assert((NumElts <= 32 || NumElts == 64) && + "Not expecting more than 64 elements"); + MVT CmpVT = NumElts == 64 ? MVT::i64 : MVT::i32; if (BinOp == ISD::XOR) { // parity -> (AND (CTPOP(MOVMSK X)), 1) - SDValue Mask = DAG.getConstant(1, DL, MVT::i32); - SDValue Result = DAG.getNode(ISD::CTPOP, DL, MVT::i32, Movmsk); - Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result, Mask); + SDValue Mask = DAG.getConstant(1, DL, CmpVT); + SDValue Result = DAG.getNode(ISD::CTPOP, DL, CmpVT, Movmsk); + Result = DAG.getNode(ISD::AND, DL, CmpVT, Result, Mask); return DAG.getZExtOrTrunc(Result, DL, ExtractVT); } @@ -35323,19 +35927,19 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract, ISD::CondCode CondCode; if (BinOp == ISD::OR) { // any_of -> MOVMSK != 0 - CmpC = DAG.getConstant(0, DL, MVT::i32); + CmpC = DAG.getConstant(0, DL, CmpVT); CondCode = ISD::CondCode::SETNE; } else { // all_of -> MOVMSK == ((1 << NumElts) - 1) - CmpC = DAG.getConstant((1ULL << NumElts) - 1, DL, MVT::i32); + CmpC = DAG.getConstant(APInt::getLowBitsSet(CmpVT.getSizeInBits(), NumElts), + DL, CmpVT); CondCode = ISD::CondCode::SETEQ; } // The setcc produces an i8 of 0/1, so extend that to the result width and // negate to get the final 0/-1 mask value. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT SetccVT = - TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32); + TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT); SDValue Setcc = DAG.getSetCC(DL, SetccVT, Movmsk, CmpC, CondCode); SDValue Zext = DAG.getZExtOrTrunc(Setcc, DL, ExtractVT); SDValue Zero = DAG.getConstant(0, DL, ExtractVT); @@ -35431,6 +36035,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, if (DCI.isBeforeLegalizeOps()) return SDValue(); + SDLoc dl(N); SDValue Src = N->getOperand(0); SDValue Idx = N->getOperand(1); @@ -35452,10 +36057,37 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, return DAG.getBitcast(VT, SrcOp); } + // If we're extracting a single element from a broadcast load and there are + // no other users, just create a single load. + if (SrcBC.getOpcode() == X86ISD::VBROADCAST_LOAD && SrcBC.hasOneUse()) { + auto *MemIntr = cast(SrcBC); + unsigned SrcBCWidth = SrcBC.getScalarValueSizeInBits(); + if (MemIntr->getMemoryVT().getSizeInBits() == SrcBCWidth && + VT.getSizeInBits() == SrcBCWidth) { + SDValue Load = DAG.getLoad(VT, dl, MemIntr->getChain(), + MemIntr->getBasePtr(), + MemIntr->getPointerInfo(), + MemIntr->getAlignment(), + MemIntr->getMemOperand()->getFlags()); + DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1)); + return Load; + } + } + + // Handle extract(truncate(x)) for 0'th index. + // TODO: Treat this as a faux shuffle? + // TODO: When can we use this for general indices? + if (ISD::TRUNCATE == Src.getOpcode() && SrcVT.is128BitVector() && + isNullConstant(Idx)) { + Src = extract128BitVector(Src.getOperand(0), 0, DAG, dl); + Src = DAG.getBitcast(SrcVT, Src); + return DAG.getNode(N->getOpcode(), dl, VT, Src, Idx); + } + // Resolve the target shuffle inputs and mask. SmallVector Mask; SmallVector Ops; - if (!resolveTargetShuffleInputs(SrcBC, Ops, Mask, DAG)) + if (!getTargetShuffleInputs(SrcBC, Ops, Mask, DAG)) return SDValue(); // Attempt to narrow/widen the shuffle mask to the correct size. @@ -35489,7 +36121,6 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, return SDValue(); int SrcIdx = Mask[N->getConstantOperandVal(1)]; - SDLoc dl(N); // If the shuffle source element is undef/zero then we can just accept it. if (SrcIdx == SM_SentinelUndef) @@ -35584,7 +36215,7 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) { } // TODO: This switch could include FNEG and the x86-specific FP logic ops - // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid + // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid // missed load folding and fma+fneg combining. switch (Vec.getOpcode()) { case ISD::FMA: // Begin 3 operands @@ -35631,27 +36262,84 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) { static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG, const X86Subtarget &Subtarget) { assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unexpected caller"); - bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize(); - if (!Subtarget.hasFastHorizontalOps() && !OptForSize) - return SDValue(); - SDValue Index = ExtElt->getOperand(1); - if (!isNullConstant(Index)) - return SDValue(); - // TODO: Allow FADD with reduction and/or reassociation and no-signed-zeros. ISD::NodeType Opc; - SDValue Rdx = DAG.matchBinOpReduction(ExtElt, Opc, {ISD::ADD}); + SDValue Rdx = + DAG.matchBinOpReduction(ExtElt, Opc, {ISD::ADD, ISD::FADD}, true); if (!Rdx) return SDValue(); + SDValue Index = ExtElt->getOperand(1); + assert(isNullConstant(Index) && + "Reduction doesn't end in an extract from index 0"); + EVT VT = ExtElt->getValueType(0); - EVT VecVT = ExtElt->getOperand(0).getValueType(); + EVT VecVT = Rdx.getValueType(); if (VecVT.getScalarType() != VT) return SDValue(); - unsigned HorizOpcode = Opc == ISD::ADD ? X86ISD::HADD : X86ISD::FHADD; SDLoc DL(ExtElt); + // vXi8 reduction - sub 128-bit vector. + if (VecVT == MVT::v4i8 || VecVT == MVT::v8i8) { + if (VecVT == MVT::v4i8) { + // Pad with zero. + if (Subtarget.hasSSE41()) { + Rdx = DAG.getBitcast(MVT::i32, Rdx); + Rdx = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, + DAG.getConstant(0, DL, MVT::v4i32), Rdx, + DAG.getIntPtrConstant(0, DL)); + Rdx = DAG.getBitcast(MVT::v16i8, Rdx); + } else { + Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i8, Rdx, + DAG.getConstant(0, DL, VecVT)); + } + } + if (Rdx.getValueType() == MVT::v8i8) { + // Pad with undef. + Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Rdx, + DAG.getUNDEF(MVT::v8i8)); + } + Rdx = DAG.getNode(X86ISD::PSADBW, DL, MVT::v2i64, Rdx, + DAG.getConstant(0, DL, MVT::v16i8)); + Rdx = DAG.getBitcast(MVT::v16i8, Rdx); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); + } + + // Must be a >=128-bit vector with pow2 elements. + if ((VecVT.getSizeInBits() % 128) != 0 || + !isPowerOf2_32(VecVT.getVectorNumElements())) + return SDValue(); + + // vXi8 reduction - sum lo/hi halves then use PSADBW. + if (VT == MVT::i8) { + while (Rdx.getValueSizeInBits() > 128) { + unsigned HalfSize = VecVT.getSizeInBits() / 2; + unsigned HalfElts = VecVT.getVectorNumElements() / 2; + SDValue Lo = extractSubVector(Rdx, 0, DAG, DL, HalfSize); + SDValue Hi = extractSubVector(Rdx, HalfElts, DAG, DL, HalfSize); + Rdx = DAG.getNode(ISD::ADD, DL, Lo.getValueType(), Lo, Hi); + VecVT = Rdx.getValueType(); + } + assert(VecVT == MVT::v16i8 && "v16i8 reduction expected"); + + SDValue Hi = DAG.getVectorShuffle( + MVT::v16i8, DL, Rdx, Rdx, + {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}); + Rdx = DAG.getNode(ISD::ADD, DL, MVT::v16i8, Rdx, Hi); + Rdx = DAG.getNode(X86ISD::PSADBW, DL, MVT::v2i64, Rdx, + getZeroVector(MVT::v16i8, Subtarget, DAG, DL)); + Rdx = DAG.getBitcast(MVT::v16i8, Rdx); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); + } + + // Only use (F)HADD opcodes if they aren't microcoded or minimizes codesize. + bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize(); + if (!Subtarget.hasFastHorizontalOps() && !OptForSize) + return SDValue(); + + unsigned HorizOpcode = Opc == ISD::ADD ? X86ISD::HADD : X86ISD::FHADD; + // 256-bit horizontal instructions operate on 128-bit chunks rather than // across the whole vector, so we need an extract + hop preliminary stage. // This is the only step where the operands of the hop are not the same value. @@ -35661,15 +36349,14 @@ static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG, unsigned NumElts = VecVT.getVectorNumElements(); SDValue Hi = extract128BitVector(Rdx, NumElts / 2, DAG, DL); SDValue Lo = extract128BitVector(Rdx, 0, DAG, DL); - VecVT = EVT::getVectorVT(*DAG.getContext(), VT, NumElts / 2); - Rdx = DAG.getNode(HorizOpcode, DL, VecVT, Hi, Lo); + Rdx = DAG.getNode(HorizOpcode, DL, Lo.getValueType(), Hi, Lo); + VecVT = Rdx.getValueType(); } if (!((VecVT == MVT::v8i16 || VecVT == MVT::v4i32) && Subtarget.hasSSSE3()) && !((VecVT == MVT::v4f32 || VecVT == MVT::v2f64) && Subtarget.hasSSE3())) return SDValue(); // extract (add (shuf X), X), 0 --> extract (hadd X, X), 0 - assert(Rdx.getValueType() == VecVT && "Unexpected reduction match"); unsigned ReductionSteps = Log2_32(VecVT.getVectorNumElements()); for (unsigned i = 0; i != ReductionSteps; ++i) Rdx = DAG.getNode(HorizOpcode, DL, VecVT, Rdx, Rdx); @@ -35714,15 +36401,26 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, } } - // TODO - Remove this once we can handle the implicit zero-extension of - // X86ISD::PEXTRW/X86ISD::PEXTRB in: - // XFormVExtractWithShuffleIntoLoad, combineHorizontalPredicateResult and - // combineBasicSADPattern. if (IsPextr) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.SimplifyDemandedBits( SDValue(N, 0), APInt::getAllOnesValue(VT.getSizeInBits()), DCI)) return SDValue(N, 0); + + // PEXTR*(PINSR*(v, s, c), c) -> s (with implicit zext handling). + if ((InputVector.getOpcode() == X86ISD::PINSRB || + InputVector.getOpcode() == X86ISD::PINSRW) && + InputVector.getOperand(2) == EltIdx) { + assert(SrcVT == InputVector.getOperand(0).getValueType() && + "Vector type mismatch"); + SDValue Scl = InputVector.getOperand(1); + Scl = DAG.getNode(ISD::TRUNCATE, dl, SrcVT.getScalarType(), Scl); + return DAG.getZExtOrTrunc(Scl, dl, VT); + } + + // TODO - Remove this once we can handle the implicit zero-extension of + // X86ISD::PEXTRW/X86ISD::PEXTRB in XFormVExtractWithShuffleIntoLoad, + // combineHorizontalPredicateResult and combineBasicSADPattern. return SDValue(); } @@ -35832,6 +36530,15 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, // get simplified at node creation time)? bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode()); bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); + + // If both inputs are 0/undef, create a complete zero vector. + // FIXME: As noted above this should be handled by DAGCombiner/getNode. + if (TValIsAllZeros && FValIsAllZeros) { + if (VT.isFloatingPoint()) + return DAG.getConstantFP(0.0, DL, VT); + return DAG.getConstant(0, DL, VT); + } + if (TValIsAllZeros && !FValIsAllZeros && Subtarget.hasAVX512() && Cond.hasOneUse() && CondVT.getVectorElementType() == MVT::i1) { // Invert the cond to not(cond) : xor(op,allones)=not(op) @@ -36295,8 +37002,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // Since SKX these selects have a proper lowering. if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && CondVT.isVector() && CondVT.getVectorElementType() == MVT::i1 && - (ExperimentalVectorWideningLegalization || - VT.getVectorNumElements() > 4) && (VT.getVectorElementType() == MVT::i8 || VT.getVectorElementType() == MVT::i16)) { Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); @@ -36358,6 +37063,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // subl %esi, $edi // cmovsl %eax, %edi if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC && + Cond.hasOneUse() && DAG.isEqualTo(LHS, Cond.getOperand(0)) && DAG.isEqualTo(RHS, Cond.getOperand(1))) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); @@ -36508,6 +37214,12 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, if (SDValue V = narrowVectorSelect(N, DAG, Subtarget)) return V; + // select(~Cond, X, Y) -> select(Cond, Y, X) + if (CondVT.getScalarType() != MVT::i1) + if (SDValue CondNot = IsNOT(Cond, DAG)) + return DAG.getNode(N->getOpcode(), DL, VT, + DAG.getBitcast(CondVT, CondNot), RHS, LHS); + // Custom action for SELECT MMX if (VT == MVT::x86mmx) { LHS = DAG.getBitcast(MVT::i64, LHS); @@ -36873,8 +37585,8 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, // We can't always do this as FCMOV only supports a subset of X86 cond. if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG, Subtarget)) { if (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC)) { - SDValue Ops[] = {FalseOp, TrueOp, DAG.getConstant(CC, DL, MVT::i8), - Flags}; + SDValue Ops[] = {FalseOp, TrueOp, DAG.getTargetConstant(CC, DL, MVT::i8), + Flags}; return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); } } @@ -36923,12 +37635,13 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, // Optimize cases that will turn into an LEA instruction. This requires // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9). if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { - uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue(); - if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff; + APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue(); + assert(Diff.getBitWidth() == N->getValueType(0).getSizeInBits() && + "Implicit constant truncation"); bool isFastMultiplier = false; - if (Diff < 10) { - switch ((unsigned char)Diff) { + if (Diff.ult(10)) { + switch (Diff.getZExtValue()) { default: break; case 1: // result = add base, cond case 2: // result = lea base( , cond*2) @@ -36943,7 +37656,6 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, } if (isFastMultiplier) { - APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue(); Cond = getSETCC(CC, Cond, DL ,DAG); // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), @@ -36994,8 +37706,8 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, if (CC == X86::COND_E && CmpAgainst == dyn_cast(TrueOp)) { - SDValue Ops[] = { FalseOp, Cond.getOperand(0), - DAG.getConstant(CC, DL, MVT::i8), Cond }; + SDValue Ops[] = {FalseOp, Cond.getOperand(0), + DAG.getTargetConstant(CC, DL, MVT::i8), Cond}; return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); } } @@ -37029,10 +37741,11 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, CC1 = X86::GetOppositeBranchCondition(CC1); } - SDValue LOps[] = {FalseOp, TrueOp, DAG.getConstant(CC0, DL, MVT::i8), - Flags}; + SDValue LOps[] = {FalseOp, TrueOp, + DAG.getTargetConstant(CC0, DL, MVT::i8), Flags}; SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), LOps); - SDValue Ops[] = {LCMOV, TrueOp, DAG.getConstant(CC1, DL, MVT::i8), Flags}; + SDValue Ops[] = {LCMOV, TrueOp, DAG.getTargetConstant(CC1, DL, MVT::i8), + Flags}; SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); return CMOV; } @@ -37064,9 +37777,9 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); // This should constant fold. SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1)); - SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0), - DAG.getConstant(X86::COND_NE, DL, MVT::i8), - Cond); + SDValue CMov = + DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0), + DAG.getTargetConstant(X86::COND_NE, DL, MVT::i8), Cond); return DAG.getNode(ISD::ADD, DL, VT, CMov, Add.getOperand(1)); } } @@ -37166,98 +37879,45 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, if ((NumElts % 2) != 0) return SDValue(); - unsigned RegSize = 128; - MVT OpsVT = MVT::getVectorVT(MVT::i16, RegSize / 16); EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts); // Shrink the operands of mul. SDValue NewN0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N0); SDValue NewN1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N1); - if (ExperimentalVectorWideningLegalization || - NumElts >= OpsVT.getVectorNumElements()) { - // Generate the lower part of mul: pmullw. For MULU8/MULS8, only the - // lower part is needed. - SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1); - if (Mode == MULU8 || Mode == MULS8) - return DAG.getNode((Mode == MULU8) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND, - DL, VT, MulLo); + // Generate the lower part of mul: pmullw. For MULU8/MULS8, only the + // lower part is needed. + SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1); + if (Mode == MULU8 || Mode == MULS8) + return DAG.getNode((Mode == MULU8) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND, + DL, VT, MulLo); - MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2); - // Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16, - // the higher part is also needed. - SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL, - ReducedVT, NewN0, NewN1); - - // Repack the lower part and higher part result of mul into a wider - // result. - // Generate shuffle functioning as punpcklwd. - SmallVector ShuffleMask(NumElts); - for (unsigned i = 0, e = NumElts / 2; i < e; i++) { - ShuffleMask[2 * i] = i; - ShuffleMask[2 * i + 1] = i + NumElts; - } - SDValue ResLo = - DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); - ResLo = DAG.getBitcast(ResVT, ResLo); - // Generate shuffle functioning as punpckhwd. - for (unsigned i = 0, e = NumElts / 2; i < e; i++) { - ShuffleMask[2 * i] = i + NumElts / 2; - ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2; - } - SDValue ResHi = - DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); - ResHi = DAG.getBitcast(ResVT, ResHi); - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi); - } - - // When VT.getVectorNumElements() < OpsVT.getVectorNumElements(), we want - // to legalize the mul explicitly because implicit legalization for type - // <4 x i16> to <4 x i32> sometimes involves unnecessary unpack - // instructions which will not exist when we explicitly legalize it by - // extending <4 x i16> to <8 x i16> (concatenating the <4 x i16> val with - // <4 x i16> undef). - // - // Legalize the operands of mul. - // FIXME: We may be able to handle non-concatenated vectors by insertion. - unsigned ReducedSizeInBits = ReducedVT.getSizeInBits(); - if ((RegSize % ReducedSizeInBits) != 0) - return SDValue(); - - SmallVector Ops(RegSize / ReducedSizeInBits, - DAG.getUNDEF(ReducedVT)); - Ops[0] = NewN0; - NewN0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops); - Ops[0] = NewN1; - NewN1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops); - - if (Mode == MULU8 || Mode == MULS8) { - // Generate lower part of mul: pmullw. For MULU8/MULS8, only the lower - // part is needed. - SDValue Mul = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1); - - // convert the type of mul result to VT. - MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32); - SDValue Res = DAG.getNode(Mode == MULU8 ? ISD::ZERO_EXTEND_VECTOR_INREG - : ISD::SIGN_EXTEND_VECTOR_INREG, - DL, ResVT, Mul); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, - DAG.getIntPtrConstant(0, DL)); - } - - // Generate the lower and higher part of mul: pmulhw/pmulhuw. For - // MULU16/MULS16, both parts are needed. - SDValue MulLo = DAG.getNode(ISD::MUL, DL, OpsVT, NewN0, NewN1); + MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2); + // Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16, + // the higher part is also needed. SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL, - OpsVT, NewN0, NewN1); + ReducedVT, NewN0, NewN1); // Repack the lower part and higher part result of mul into a wider - // result. Make sure the type of mul result is VT. - MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32); - SDValue Res = getUnpackl(DAG, DL, OpsVT, MulLo, MulHi); - Res = DAG.getBitcast(ResVT, Res); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, - DAG.getIntPtrConstant(0, DL)); + // result. + // Generate shuffle functioning as punpcklwd. + SmallVector ShuffleMask(NumElts); + for (unsigned i = 0, e = NumElts / 2; i < e; i++) { + ShuffleMask[2 * i] = i; + ShuffleMask[2 * i + 1] = i + NumElts; + } + SDValue ResLo = + DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); + ResLo = DAG.getBitcast(ResVT, ResLo); + // Generate shuffle functioning as punpckhwd. + for (unsigned i = 0, e = NumElts / 2; i < e; i++) { + ShuffleMask[2 * i] = i + NumElts / 2; + ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2; + } + SDValue ResHi = + DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); + ResHi = DAG.getBitcast(ResVT, ResHi); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi); } static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, @@ -37365,8 +38025,7 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG, // Make sure the vXi16 type is legal. This covers the AVX512 without BWI case. // Also allow v2i32 if it will be widened. MVT WVT = MVT::getVectorVT(MVT::i16, 2 * VT.getVectorNumElements()); - if (!((ExperimentalVectorWideningLegalization && VT == MVT::v2i32) || - DAG.getTargetLoweringInfo().isTypeLegal(WVT))) + if (VT != MVT::v2i32 && !DAG.getTargetLoweringInfo().isTypeLegal(WVT)) return SDValue(); SDValue N0 = N->getOperand(0); @@ -37919,7 +38578,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, if (NewShiftVal >= NumBitsPerElt) NewShiftVal = NumBitsPerElt - 1; return DAG.getNode(X86ISD::VSRAI, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(NewShiftVal, SDLoc(N), MVT::i8)); + DAG.getTargetConstant(NewShiftVal, SDLoc(N), MVT::i8)); } // We can decode 'whole byte' logical bit shifts as shuffles. @@ -38039,7 +38698,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, if (Subtarget.hasAVX512()) { SDValue FSetCC = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01, - DAG.getConstant(x86cc, DL, MVT::i8)); + DAG.getTargetConstant(x86cc, DL, MVT::i8)); // Need to fill with zeros to ensure the bitcast will produce zeroes // for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that. SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v16i1, @@ -38048,10 +38707,9 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, return DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Ins), DL, N->getSimpleValueType(0)); } - SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL, - CMP00.getValueType(), CMP00, CMP01, - DAG.getConstant(x86cc, DL, - MVT::i8)); + SDValue OnesOrZeroesF = + DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, + CMP01, DAG.getTargetConstant(x86cc, DL, MVT::i8)); bool is64BitFP = (CMP00.getValueType() == MVT::f64); MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32; @@ -38083,34 +38741,6 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, return SDValue(); } -// Match (xor X, -1) -> X. -// Match extract_subvector(xor X, -1) -> extract_subvector(X). -// Match concat_vectors(xor X, -1, xor Y, -1) -> concat_vectors(X, Y). -static SDValue IsNOT(SDValue V, SelectionDAG &DAG) { - V = peekThroughBitcasts(V); - if (V.getOpcode() == ISD::XOR && - ISD::isBuildVectorAllOnes(V.getOperand(1).getNode())) - return V.getOperand(0); - if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR && - (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) { - if (SDValue Not = IsNOT(V.getOperand(0), DAG)) { - Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), V.getValueType(), - Not, V.getOperand(1)); - } - } - SmallVector CatOps; - if (collectConcatOps(V.getNode(), CatOps)) { - for (SDValue &CatOp : CatOps) { - SDValue NotCat = IsNOT(CatOp, DAG); - if (!NotCat) return SDValue(); - CatOp = DAG.getBitcast(CatOp.getValueType(), NotCat); - } - return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(V), V.getValueType(), CatOps); - } - return SDValue(); -} - /// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y). static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) { assert(N->getOpcode() == ISD::AND); @@ -38273,7 +38903,7 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG, SDLoc DL(N); unsigned ShiftVal = SplatVal.countTrailingOnes(); - SDValue ShAmt = DAG.getConstant(EltBitWidth - ShiftVal, DL, MVT::i8); + SDValue ShAmt = DAG.getTargetConstant(EltBitWidth - ShiftVal, DL, MVT::i8); SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt); return DAG.getBitcast(N->getValueType(0), Shift); } @@ -38499,7 +39129,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, // TODO: Support multiple SrcOps. if (VT == MVT::i1) { SmallVector SrcOps; - if (matchBitOpReduction(SDValue(N, 0), ISD::AND, SrcOps) && + if (matchScalarReduction(SDValue(N, 0), ISD::AND, SrcOps) && SrcOps.size() == 1) { SDLoc dl(N); unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements(); @@ -38570,7 +39200,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, } if (SDValue Shuffle = combineX86ShufflesRecursively( - {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 2, + {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 1, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle, N->getOperand(0).getOperand(1)); @@ -38585,7 +39215,7 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { assert(N->getOpcode() == ISD::OR && "Unexpected Opcode"); - EVT VT = N->getValueType(0); + MVT VT = N->getSimpleValueType(0); if (!VT.isVector() || (VT.getScalarSizeInBits() % 8) != 0) return SDValue(); @@ -38594,10 +39224,12 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG, if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND) return SDValue(); - // On XOP we'll lower to PCMOV so accept one use, otherwise only - // do this if either mask has multiple uses already. - if (!(Subtarget.hasXOP() || !N0.getOperand(1).hasOneUse() || - !N1.getOperand(1).hasOneUse())) + // On XOP we'll lower to PCMOV so accept one use. With AVX512, we can use + // VPTERNLOG. Otherwise only do this if either mask has multiple uses already. + bool UseVPTERNLOG = (Subtarget.hasAVX512() && VT.is512BitVector()) || + Subtarget.hasVLX(); + if (!(Subtarget.hasXOP() || UseVPTERNLOG || + !N0.getOperand(1).hasOneUse() || !N1.getOperand(1).hasOneUse())) return SDValue(); // Attempt to extract constant byte masks. @@ -38895,6 +39527,24 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, DAG.getBitcast(MVT::v4f32, N1))); } + // Match any-of bool scalar reductions into a bitcast/movmsk + cmp. + // TODO: Support multiple SrcOps. + if (VT == MVT::i1) { + SmallVector SrcOps; + if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps) && + SrcOps.size() == 1) { + SDLoc dl(N); + unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements(); + EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); + SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget); + if (Mask) { + APInt AllBits = APInt::getNullValue(NumElts); + return DAG.getSetCC(dl, MVT::i1, Mask, + DAG.getConstant(AllBits, dl, MaskVT), ISD::SETNE); + } + } + } + if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -39136,26 +39786,6 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones); } -/// Check if truncation with saturation form type \p SrcVT to \p DstVT -/// is valid for the given \p Subtarget. -static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT, - const X86Subtarget &Subtarget) { - if (!Subtarget.hasAVX512()) - return false; - - // FIXME: Scalar type may be supported if we move it to vector register. - if (!SrcVT.isVector()) - return false; - - EVT SrcElVT = SrcVT.getScalarType(); - EVT DstElVT = DstVT.getScalarType(); - if (DstElVT != MVT::i8 && DstElVT != MVT::i16 && DstElVT != MVT::i32) - return false; - if (SrcVT.is512BitVector() || Subtarget.hasVLX()) - return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI(); - return false; -} - /// Detect patterns of truncation with unsigned saturation: /// /// 1. (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). @@ -39253,64 +39883,61 @@ static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS = false) { return SDValue(); } -/// Detect a pattern of truncation with signed saturation. -/// The types should allow to use VPMOVSS* instruction on AVX512. -/// Return the source value to be truncated or SDValue() if the pattern was not -/// matched. -static SDValue detectAVX512SSatPattern(SDValue In, EVT VT, - const X86Subtarget &Subtarget, - const TargetLowering &TLI) { - if (!TLI.isTypeLegal(In.getValueType())) - return SDValue(); - if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) - return SDValue(); - return detectSSatPattern(In, VT); -} - -/// Detect a pattern of truncation with saturation: -/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). -/// The types should allow to use VPMOVUS* instruction on AVX512. -/// Return the source value to be truncated or SDValue() if the pattern was not -/// matched. -static SDValue detectAVX512USatPattern(SDValue In, EVT VT, SelectionDAG &DAG, - const SDLoc &DL, - const X86Subtarget &Subtarget, - const TargetLowering &TLI) { - if (!TLI.isTypeLegal(In.getValueType())) - return SDValue(); - if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) - return SDValue(); - return detectUSatPattern(In, VT, DAG, DL); -} - static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - EVT SVT = VT.getScalarType(); + if (!Subtarget.hasSSE2() || !VT.isVector()) + return SDValue(); + + EVT SVT = VT.getVectorElementType(); EVT InVT = In.getValueType(); - EVT InSVT = InVT.getScalarType(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLI.isTypeLegal(InVT) && TLI.isTypeLegal(VT) && - isSATValidOnAVX512Subtarget(InVT, VT, Subtarget)) { - if (auto SSatVal = detectSSatPattern(In, VT)) - return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal); - if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) - return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); + EVT InSVT = InVT.getVectorElementType(); + + // If we're clamping a signed 32-bit vector to 0-255 and the 32-bit vector is + // split across two registers. We can use a packusdw+perm to clamp to 0-65535 + // and concatenate at the same time. Then we can use a final vpmovuswb to + // clip to 0-255. + if (Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && + InVT == MVT::v16i32 && VT == MVT::v16i8) { + if (auto USatVal = detectSSatPattern(In, VT, true)) { + // Emit a VPACKUSDW+VPERMQ followed by a VPMOVUSWB. + SDValue Mid = truncateVectorWithPACK(X86ISD::PACKUS, MVT::v16i16, USatVal, + DL, DAG, Subtarget); + assert(Mid && "Failed to pack!"); + return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, Mid); + } } - if (VT.isVector() && isPowerOf2_32(VT.getVectorNumElements()) && - !Subtarget.hasAVX512() && + + // vXi32 truncate instructions are available with AVX512F. + // vXi16 truncate instructions are only available with AVX512BW. + // For 256-bit or smaller vectors, we require VLX. + // FIXME: We could widen truncates to 512 to remove the VLX restriction. + // If the result type is 256-bits or larger and we have disable 512-bit + // registers, we should go ahead and use the pack instructions if possible. + bool PreferAVX512 = ((Subtarget.hasAVX512() && InSVT == MVT::i32) || + (Subtarget.hasBWI() && InSVT == MVT::i16)) && + (InVT.getSizeInBits() > 128) && + (Subtarget.hasVLX() || InVT.getSizeInBits() > 256) && + !(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256); + + if (isPowerOf2_32(VT.getVectorNumElements()) && !PreferAVX512 && + VT.getSizeInBits() >= 64 && (SVT == MVT::i8 || SVT == MVT::i16) && (InSVT == MVT::i16 || InSVT == MVT::i32)) { if (auto USatVal = detectSSatPattern(In, VT, true)) { // vXi32 -> vXi8 must be performed as PACKUSWB(PACKSSDW,PACKSSDW). + // Only do this when the result is at least 64 bits or we'll leaving + // dangling PACKSSDW nodes. if (SVT == MVT::i8 && InSVT == MVT::i32) { EVT MidVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, VT.getVectorNumElements()); SDValue Mid = truncateVectorWithPACK(X86ISD::PACKSS, MidVT, USatVal, DL, DAG, Subtarget); - if (Mid) - return truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG, - Subtarget); + assert(Mid && "Failed to pack!"); + SDValue V = truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG, + Subtarget); + assert(V && "Failed to pack!"); + return V; } else if (SVT == MVT::i8 || Subtarget.hasSSE41()) return truncateVectorWithPACK(X86ISD::PACKUS, VT, USatVal, DL, DAG, Subtarget); @@ -39319,6 +39946,42 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, return truncateVectorWithPACK(X86ISD::PACKSS, VT, SSatVal, DL, DAG, Subtarget); } + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.isTypeLegal(InVT) && InVT.isVector() && SVT != MVT::i1 && + Subtarget.hasAVX512() && (InSVT != MVT::i16 || Subtarget.hasBWI())) { + unsigned TruncOpc; + SDValue SatVal; + if (auto SSatVal = detectSSatPattern(In, VT)) { + SatVal = SSatVal; + TruncOpc = X86ISD::VTRUNCS; + } else if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) { + SatVal = USatVal; + TruncOpc = X86ISD::VTRUNCUS; + } + if (SatVal) { + unsigned ResElts = VT.getVectorNumElements(); + // If the input type is less than 512 bits and we don't have VLX, we need + // to widen to 512 bits. + if (!Subtarget.hasVLX() && !InVT.is512BitVector()) { + unsigned NumConcats = 512 / InVT.getSizeInBits(); + ResElts *= NumConcats; + SmallVector ConcatOps(NumConcats, DAG.getUNDEF(InVT)); + ConcatOps[0] = SatVal; + InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, + NumConcats * InVT.getVectorNumElements()); + SatVal = DAG.getNode(ISD::CONCAT_VECTORS, DL, InVT, ConcatOps); + } + // Widen the result if its narrower than 128 bits. + if (ResElts * SVT.getSizeInBits() < 128) + ResElts = 128 / SVT.getSizeInBits(); + EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), SVT, ResElts); + SDValue Res = DAG.getNode(TruncOpc, DL, TruncVT, SatVal); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, + DAG.getIntPtrConstant(0, DL)); + } + } + return SDValue(); } @@ -39377,7 +40040,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, return true; }; - // Check if each element of the vector is left-shifted by one. + // Check if each element of the vector is right-shifted by one. auto LHS = In.getOperand(0); auto RHS = In.getOperand(1); if (!IsConstVectorInRange(RHS, 1, 1)) @@ -39679,90 +40342,7 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG, return Blend; } - if (Mld->getExtensionType() != ISD::EXTLOAD) - return SDValue(); - - // Resolve extending loads. - EVT VT = Mld->getValueType(0); - unsigned NumElems = VT.getVectorNumElements(); - EVT LdVT = Mld->getMemoryVT(); - SDLoc dl(Mld); - - assert(LdVT != VT && "Cannot extend to the same type"); - unsigned ToSz = VT.getScalarSizeInBits(); - unsigned FromSz = LdVT.getScalarSizeInBits(); - // From/To sizes and ElemCount must be pow of two. - assert (isPowerOf2_32(NumElems * FromSz * ToSz) && - "Unexpected size for extending masked load"); - - unsigned SizeRatio = ToSz / FromSz; - assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits()); - - // Create a type on which we perform the shuffle. - EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), - LdVT.getScalarType(), NumElems*SizeRatio); - assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - - // Convert PassThru value. - SDValue WidePassThru = DAG.getBitcast(WideVecVT, Mld->getPassThru()); - if (!Mld->getPassThru().isUndef()) { - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i] = i * SizeRatio; - - // Can't shuffle using an illegal type. - assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) && - "WideVecVT should be legal"); - WidePassThru = DAG.getVectorShuffle(WideVecVT, dl, WidePassThru, - DAG.getUNDEF(WideVecVT), ShuffleVec); - } - - // Prepare the new mask. - SDValue NewMask; - SDValue Mask = Mld->getMask(); - if (Mask.getValueType() == VT) { - // Mask and original value have the same type. - NewMask = DAG.getBitcast(WideVecVT, Mask); - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i] = i * SizeRatio; - for (unsigned i = NumElems; i != NumElems * SizeRatio; ++i) - ShuffleVec[i] = NumElems * SizeRatio; - NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask, - DAG.getConstant(0, dl, WideVecVT), - ShuffleVec); - } else { - assert(Mask.getValueType().getVectorElementType() == MVT::i1); - unsigned WidenNumElts = NumElems*SizeRatio; - unsigned MaskNumElts = VT.getVectorNumElements(); - EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - WidenNumElts); - - unsigned NumConcat = WidenNumElts / MaskNumElts; - SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType()); - SmallVector Ops(NumConcat, ZeroVal); - Ops[0] = Mask; - NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops); - } - - SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(), - Mld->getBasePtr(), NewMask, WidePassThru, - Mld->getMemoryVT(), Mld->getMemOperand(), - ISD::NON_EXTLOAD); - - SDValue SlicedVec = DAG.getBitcast(WideVecVT, WideLd); - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i * SizeRatio] = i; - - // Can't shuffle using an illegal type. - assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) && - "WideVecVT should be legal"); - SlicedVec = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec, - DAG.getUNDEF(WideVecVT), ShuffleVec); - SlicedVec = DAG.getBitcast(VT, SlicedVec); - - return DCI.CombineTo(N, SlicedVec, WideLd.getValue(1), true); + return SDValue(); } /// If exactly one element of the mask is set for a non-truncating masked store, @@ -39800,123 +40380,45 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, return SDValue(); EVT VT = Mst->getValue().getValueType(); - EVT StVT = Mst->getMemoryVT(); SDLoc dl(Mst); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!Mst->isTruncatingStore()) { - if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG)) - return ScalarStore; - - // If the mask value has been legalized to a non-boolean vector, try to - // simplify ops leading up to it. We only demand the MSB of each lane. - SDValue Mask = Mst->getMask(); - if (Mask.getScalarValueSizeInBits() != 1) { - APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits())); - if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) - return SDValue(N, 0); - } - - // TODO: AVX512 targets should also be able to simplify something like the - // pattern above, but that pattern will be different. It will either need to - // match setcc more generally or match PCMPGTM later (in tablegen?). - - SDValue Value = Mst->getValue(); - if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() && - TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), - Mst->getMemoryVT())) { - return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0), - Mst->getBasePtr(), Mask, - Mst->getMemoryVT(), Mst->getMemOperand(), true); - } - - return SDValue(); - } - - // Resolve truncating stores. - unsigned NumElems = VT.getVectorNumElements(); - - assert(StVT != VT && "Cannot truncate to the same type"); - unsigned FromSz = VT.getScalarSizeInBits(); - unsigned ToSz = StVT.getScalarSizeInBits(); - - // The truncating store is legal in some cases. For example - // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw - // are designated for truncate store. - // In this case we don't need any further transformations. - if (TLI.isTruncStoreLegal(VT, StVT)) + if (Mst->isTruncatingStore()) return SDValue(); - // From/To sizes and ElemCount must be pow of two. - assert (isPowerOf2_32(NumElems * FromSz * ToSz) && - "Unexpected size for truncating masked store"); - // We are going to use the original vector elt for storing. - // Accumulated smaller vector elements must be a multiple of the store size. - assert (((NumElems * FromSz) % ToSz) == 0 && - "Unexpected ratio for truncating masked store"); + if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG)) + return ScalarStore; - unsigned SizeRatio = FromSz / ToSz; - assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits()); - - // Create a type on which we perform the shuffle. - EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), - StVT.getScalarType(), NumElems*SizeRatio); - - assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - - SDValue WideVec = DAG.getBitcast(WideVecVT, Mst->getValue()); - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i] = i * SizeRatio; - - // Can't shuffle using an illegal type. - assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) && - "WideVecVT should be legal"); - - SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec, - DAG.getUNDEF(WideVecVT), - ShuffleVec); - - SDValue NewMask; + // If the mask value has been legalized to a non-boolean vector, try to + // simplify ops leading up to it. We only demand the MSB of each lane. SDValue Mask = Mst->getMask(); - if (Mask.getValueType() == VT) { - // Mask and original value have the same type. - NewMask = DAG.getBitcast(WideVecVT, Mask); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i] = i * SizeRatio; - for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i) - ShuffleVec[i] = NumElems*SizeRatio; - NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask, - DAG.getConstant(0, dl, WideVecVT), - ShuffleVec); - } else { - assert(Mask.getValueType().getVectorElementType() == MVT::i1); - unsigned WidenNumElts = NumElems*SizeRatio; - unsigned MaskNumElts = VT.getVectorNumElements(); - EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - WidenNumElts); - - unsigned NumConcat = WidenNumElts / MaskNumElts; - SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType()); - SmallVector Ops(NumConcat, ZeroVal); - Ops[0] = Mask; - NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops); + if (Mask.getScalarValueSizeInBits() != 1) { + APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits())); + if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) + return SDValue(N, 0); } - return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal, - Mst->getBasePtr(), NewMask, StVT, - Mst->getMemOperand(), false); + SDValue Value = Mst->getValue(); + if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() && + TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), + Mst->getMemoryVT())) { + return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0), + Mst->getBasePtr(), Mask, + Mst->getMemoryVT(), Mst->getMemOperand(), true); + } + + return SDValue(); } static SDValue combineStore(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { StoreSDNode *St = cast(N); - EVT VT = St->getValue().getValueType(); EVT StVT = St->getMemoryVT(); SDLoc dl(St); unsigned Alignment = St->getAlignment(); - SDValue StoredVal = St->getOperand(1); + SDValue StoredVal = St->getValue(); + EVT VT = StoredVal.getValueType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Convert a store of vXi1 into a store of iX and a bitcast. @@ -39986,8 +40488,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, St->getMemOperand()->getFlags()); } - // If we are saving a concatenation of two XMM registers and 32-byte stores - // are slow, such as on Sandy Bridge, perform two 16-byte stores. + // If we are saving a 32-byte vector and 32-byte stores are slow, such as on + // Sandy Bridge, perform two 16-byte stores. bool Fast; if (VT.is256BitVector() && StVT == VT && TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, @@ -40026,13 +40528,24 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, if (!St->isTruncatingStore() && VT == MVT::v16i8 && !Subtarget.hasBWI() && St->getValue().getOpcode() == ISD::TRUNCATE && St->getValue().getOperand(0).getValueType() == MVT::v16i16 && - TLI.isTruncStoreLegalOrCustom(MVT::v16i32, MVT::v16i8) && - !DCI.isBeforeLegalizeOps()) { + TLI.isTruncStoreLegal(MVT::v16i32, MVT::v16i8) && + St->getValue().hasOneUse() && !DCI.isBeforeLegalizeOps()) { SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32, St->getValue()); return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(), MVT::v16i8, St->getMemOperand()); } + // Try to fold a VTRUNCUS or VTRUNCS into a truncating store. + if (!St->isTruncatingStore() && StoredVal.hasOneUse() && + (StoredVal.getOpcode() == X86ISD::VTRUNCUS || + StoredVal.getOpcode() == X86ISD::VTRUNCS) && + TLI.isTruncStoreLegal(StoredVal.getOperand(0).getValueType(), VT)) { + bool IsSigned = StoredVal.getOpcode() == X86ISD::VTRUNCS; + return EmitTruncSStore(IsSigned, St->getChain(), + dl, StoredVal.getOperand(0), St->getBasePtr(), + VT, St->getMemOperand(), DAG); + } + // Optimize trunc store (of multiple scalars) to shuffle and store. // First, pack all of the elements in one place. Next, store to memory // in fewer chunks. @@ -40040,100 +40553,26 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, // Check if we can detect an AVG pattern from the truncation. If yes, // replace the trunc store by a normal store with the result of X86ISD::AVG // instruction. - if (SDValue Avg = detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG, - Subtarget, dl)) - return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(), - St->getPointerInfo(), St->getAlignment(), - St->getMemOperand()->getFlags()); + if (DCI.isBeforeLegalize() || TLI.isTypeLegal(St->getMemoryVT())) + if (SDValue Avg = detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG, + Subtarget, dl)) + return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(), + St->getPointerInfo(), St->getAlignment(), + St->getMemOperand()->getFlags()); - if (SDValue Val = - detectAVX512SSatPattern(St->getValue(), St->getMemoryVT(), Subtarget, - TLI)) - return EmitTruncSStore(true /* Signed saturation */, St->getChain(), - dl, Val, St->getBasePtr(), - St->getMemoryVT(), St->getMemOperand(), DAG); - if (SDValue Val = detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), - DAG, dl, Subtarget, TLI)) - return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(), - dl, Val, St->getBasePtr(), - St->getMemoryVT(), St->getMemOperand(), DAG); - - unsigned NumElems = VT.getVectorNumElements(); - assert(StVT != VT && "Cannot truncate to the same type"); - unsigned FromSz = VT.getScalarSizeInBits(); - unsigned ToSz = StVT.getScalarSizeInBits(); - - // The truncating store is legal in some cases. For example - // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw - // are designated for truncate store. - // In this case we don't need any further transformations. - if (TLI.isTruncStoreLegalOrCustom(VT, StVT)) - return SDValue(); - - // From, To sizes and ElemCount must be pow of two - if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue(); - // We are going to use the original vector elt for storing. - // Accumulated smaller vector elements must be a multiple of the store size. - if (0 != (NumElems * FromSz) % ToSz) return SDValue(); - - unsigned SizeRatio = FromSz / ToSz; - - assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits()); - - // Create a type on which we perform the shuffle - EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), - StVT.getScalarType(), NumElems*SizeRatio); - - assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - - SDValue WideVec = DAG.getBitcast(WideVecVT, St->getValue()); - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i] = i * SizeRatio; - - // Can't shuffle using an illegal type. - if (!TLI.isTypeLegal(WideVecVT)) - return SDValue(); - - SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec, - DAG.getUNDEF(WideVecVT), - ShuffleVec); - // At this point all of the data is stored at the bottom of the - // register. We now need to save it to mem. - - // Find the largest store unit - MVT StoreType = MVT::i8; - for (MVT Tp : MVT::integer_valuetypes()) { - if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToSz) - StoreType = Tp; + if (TLI.isTruncStoreLegal(VT, StVT)) { + if (SDValue Val = detectSSatPattern(St->getValue(), St->getMemoryVT())) + return EmitTruncSStore(true /* Signed saturation */, St->getChain(), + dl, Val, St->getBasePtr(), + St->getMemoryVT(), St->getMemOperand(), DAG); + if (SDValue Val = detectUSatPattern(St->getValue(), St->getMemoryVT(), + DAG, dl)) + return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(), + dl, Val, St->getBasePtr(), + St->getMemoryVT(), St->getMemOperand(), DAG); } - // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64. - if (TLI.isTypeLegal(MVT::f64) && StoreType.getSizeInBits() < 64 && - (64 <= NumElems * ToSz)) - StoreType = MVT::f64; - - // Bitcast the original vector into a vector of store-size units - EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), - StoreType, VT.getSizeInBits()/StoreType.getSizeInBits()); - assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); - SDValue ShuffWide = DAG.getBitcast(StoreVecVT, Shuff); - SmallVector Chains; - SDValue Ptr = St->getBasePtr(); - - // Perform one or more big stores into memory. - for (unsigned i=0, e=(ToSz*NumElems)/StoreType.getSizeInBits(); i!=e; ++i) { - SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - StoreType, ShuffWide, - DAG.getIntPtrConstant(i, dl)); - SDValue Ch = - DAG.getStore(St->getChain(), dl, SubVec, Ptr, St->getPointerInfo(), - St->getAlignment(), St->getMemOperand()->getFlags()); - Ptr = DAG.getMemBasePlusOffset(Ptr, StoreType.getStoreSize(), dl); - Chains.push_back(Ch); - } - - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + return SDValue(); } // Turn load->store of MMX types into GPR load/stores. This avoids clobbering @@ -40149,11 +40588,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat); bool F64IsLegal = !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2(); - if (((VT.isVector() && !VT.isFloatingPoint()) || - (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit())) && + if ((VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit()) && isa(St->getValue()) && - !cast(St->getValue())->isVolatile() && - St->getChain().hasOneUse() && !St->isVolatile()) { + cast(St->getValue())->isSimple() && + St->getChain().hasOneUse() && St->isSimple()) { LoadSDNode *Ld = cast(St->getValue().getNode()); SmallVector Ops; @@ -40595,8 +41033,8 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG, static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - // Requires SSE2 but AVX512 has fast truncate. - if (!Subtarget.hasSSE2() || Subtarget.hasAVX512()) + // Requires SSE2. + if (!Subtarget.hasSSE2()) return SDValue(); if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple()) @@ -40620,6 +41058,13 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL, if (InSVT != MVT::i16 && InSVT != MVT::i32 && InSVT != MVT::i64) return SDValue(); + // AVX512 has fast truncate, but if the input is already going to be split, + // there's no harm in trying pack. + if (Subtarget.hasAVX512() && + !(!Subtarget.useAVX512Regs() && VT.is256BitVector() && + InVT.is512BitVector())) + return SDValue(); + unsigned NumPackedSignBits = std::min(SVT.getSizeInBits(), 16); unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8; @@ -40658,9 +41103,7 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, // Only handle vXi16 types that are at least 128-bits unless they will be // widened. - if (!VT.isVector() || VT.getVectorElementType() != MVT::i16 || - (!ExperimentalVectorWideningLegalization && - VT.getVectorNumElements() < 8)) + if (!VT.isVector() || VT.getVectorElementType() != MVT::i16) return SDValue(); // Input type should be vXi32. @@ -40874,6 +41317,19 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, return combineVectorTruncation(N, DAG, Subtarget); } +static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + SDValue In = N->getOperand(0); + SDLoc DL(N); + + if (auto SSatVal = detectSSatPattern(In, VT)) + return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal); + if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) + return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); + + return SDValue(); +} + /// Returns the negated value if the node \p N flips sign of FP value. /// /// FP-negation node may have different forms: FNEG(x), FXOR (x, 0x80000000) @@ -40883,10 +41339,14 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, /// In this case we go though all bitcasts. /// This also recognizes splat of a negated value and returns the splat of that /// value. -static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) { +static SDValue isFNEG(SelectionDAG &DAG, SDNode *N, unsigned Depth = 0) { if (N->getOpcode() == ISD::FNEG) return N->getOperand(0); + // Don't recurse exponentially. + if (Depth > SelectionDAG::MaxRecursionDepth) + return SDValue(); + unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits(); SDValue Op = peekThroughBitcasts(SDValue(N, 0)); @@ -40900,7 +41360,7 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) { // of this is VECTOR_SHUFFLE(-VEC1, UNDEF). The mask can be anything here. if (!SVOp->getOperand(1).isUndef()) return SDValue(); - if (SDValue NegOp0 = isFNEG(DAG, SVOp->getOperand(0).getNode())) + if (SDValue NegOp0 = isFNEG(DAG, SVOp->getOperand(0).getNode(), Depth + 1)) if (NegOp0.getValueType() == VT) // FIXME: Can we do better? return DAG.getVectorShuffle(VT, SDLoc(SVOp), NegOp0, DAG.getUNDEF(VT), SVOp->getMask()); @@ -40914,7 +41374,7 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) { SDValue InsVal = Op.getOperand(1); if (!InsVector.isUndef()) return SDValue(); - if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode())) + if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode(), Depth + 1)) if (NegInsVal.getValueType() == VT.getVectorElementType()) // FIXME return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector, NegInsVal, Op.getOperand(2)); @@ -40951,6 +41411,57 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) { return SDValue(); } +static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc, + bool NegRes) { + if (NegMul) { + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode"); + case ISD::FMA: Opcode = X86ISD::FNMADD; break; + case X86ISD::FMADD_RND: Opcode = X86ISD::FNMADD_RND; break; + case X86ISD::FMSUB: Opcode = X86ISD::FNMSUB; break; + case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMSUB_RND; break; + case X86ISD::FNMADD: Opcode = ISD::FMA; break; + case X86ISD::FNMADD_RND: Opcode = X86ISD::FMADD_RND; break; + case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break; + case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break; + } + } + + if (NegAcc) { + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode"); + case ISD::FMA: Opcode = X86ISD::FMSUB; break; + case X86ISD::FMADD_RND: Opcode = X86ISD::FMSUB_RND; break; + case X86ISD::FMSUB: Opcode = ISD::FMA; break; + case X86ISD::FMSUB_RND: Opcode = X86ISD::FMADD_RND; break; + case X86ISD::FNMADD: Opcode = X86ISD::FNMSUB; break; + case X86ISD::FNMADD_RND: Opcode = X86ISD::FNMSUB_RND; break; + case X86ISD::FNMSUB: Opcode = X86ISD::FNMADD; break; + case X86ISD::FNMSUB_RND: Opcode = X86ISD::FNMADD_RND; break; + case X86ISD::FMADDSUB: Opcode = X86ISD::FMSUBADD; break; + case X86ISD::FMADDSUB_RND: Opcode = X86ISD::FMSUBADD_RND; break; + case X86ISD::FMSUBADD: Opcode = X86ISD::FMADDSUB; break; + case X86ISD::FMSUBADD_RND: Opcode = X86ISD::FMADDSUB_RND; break; + } + } + + if (NegRes) { + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode"); + case ISD::FMA: Opcode = X86ISD::FNMSUB; break; + case X86ISD::FMADD_RND: Opcode = X86ISD::FNMSUB_RND; break; + case X86ISD::FMSUB: Opcode = X86ISD::FNMADD; break; + case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMADD_RND; break; + case X86ISD::FNMADD: Opcode = X86ISD::FMSUB; break; + case X86ISD::FNMADD_RND: Opcode = X86ISD::FMSUB_RND; break; + case X86ISD::FNMSUB: Opcode = ISD::FMA; break; + case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMADD_RND; break; + } + } + + return Opcode; +} + /// Do target-specific dag combines on floating point negations. static SDValue combineFneg(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -40980,29 +41491,123 @@ static SDValue combineFneg(SDNode *N, SelectionDAG &DAG, // If we're negating an FMA node, then we can adjust the // instruction to include the extra negation. - unsigned NewOpcode = 0; if (Arg.hasOneUse() && Subtarget.hasAnyFMA()) { switch (Arg.getOpcode()) { - case ISD::FMA: NewOpcode = X86ISD::FNMSUB; break; - case X86ISD::FMSUB: NewOpcode = X86ISD::FNMADD; break; - case X86ISD::FNMADD: NewOpcode = X86ISD::FMSUB; break; - case X86ISD::FNMSUB: NewOpcode = ISD::FMA; break; - case X86ISD::FMADD_RND: NewOpcode = X86ISD::FNMSUB_RND; break; - case X86ISD::FMSUB_RND: NewOpcode = X86ISD::FNMADD_RND; break; - case X86ISD::FNMADD_RND: NewOpcode = X86ISD::FMSUB_RND; break; - case X86ISD::FNMSUB_RND: NewOpcode = X86ISD::FMADD_RND; break; - // We can't handle scalar intrinsic node here because it would only - // invert one element and not the whole vector. But we could try to handle - // a negation of the lower element only. + case ISD::FMA: + case X86ISD::FMSUB: + case X86ISD::FNMADD: + case X86ISD::FNMSUB: + case X86ISD::FMADD_RND: + case X86ISD::FMSUB_RND: + case X86ISD::FNMADD_RND: + case X86ISD::FNMSUB_RND: { + // We can't handle scalar intrinsic node here because it would only + // invert one element and not the whole vector. But we could try to handle + // a negation of the lower element only. + unsigned NewOpcode = negateFMAOpcode(Arg.getOpcode(), false, false, true); + return DAG.getBitcast(OrigVT, DAG.getNode(NewOpcode, DL, VT, Arg->ops())); + } } } - if (NewOpcode) - return DAG.getBitcast(OrigVT, DAG.getNode(NewOpcode, DL, VT, - Arg.getNode()->ops())); return SDValue(); } +char X86TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, + bool ForCodeSize, + unsigned Depth) const { + // fneg patterns are removable even if they have multiple uses. + if (isFNEG(DAG, Op.getNode(), Depth)) + return 2; + + // Don't recurse exponentially. + if (Depth > SelectionDAG::MaxRecursionDepth) + return 0; + + EVT VT = Op.getValueType(); + EVT SVT = VT.getScalarType(); + switch (Op.getOpcode()) { + case ISD::FMA: + case X86ISD::FMSUB: + case X86ISD::FNMADD: + case X86ISD::FNMSUB: + case X86ISD::FMADD_RND: + case X86ISD::FMSUB_RND: + case X86ISD::FNMADD_RND: + case X86ISD::FNMSUB_RND: { + if (!Op.hasOneUse() || !Subtarget.hasAnyFMA() || !isTypeLegal(VT) || + !(SVT == MVT::f32 || SVT == MVT::f64) || !LegalOperations) + break; + + // This is always negatible for free but we might be able to remove some + // extra operand negations as well. + for (int i = 0; i != 3; ++i) { + char V = isNegatibleForFree(Op.getOperand(i), DAG, LegalOperations, + ForCodeSize, Depth + 1); + if (V == 2) + return V; + } + return 1; + } + } + + return TargetLowering::isNegatibleForFree(Op, DAG, LegalOperations, + ForCodeSize, Depth); +} + +SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, + bool ForCodeSize, + unsigned Depth) const { + // fneg patterns are removable even if they have multiple uses. + if (SDValue Arg = isFNEG(DAG, Op.getNode(), Depth)) + return DAG.getBitcast(Op.getValueType(), Arg); + + EVT VT = Op.getValueType(); + EVT SVT = VT.getScalarType(); + unsigned Opc = Op.getOpcode(); + switch (Opc) { + case ISD::FMA: + case X86ISD::FMSUB: + case X86ISD::FNMADD: + case X86ISD::FNMSUB: + case X86ISD::FMADD_RND: + case X86ISD::FMSUB_RND: + case X86ISD::FNMADD_RND: + case X86ISD::FNMSUB_RND: { + if (!Op.hasOneUse() || !Subtarget.hasAnyFMA() || !isTypeLegal(VT) || + !(SVT == MVT::f32 || SVT == MVT::f64) || !LegalOperations) + break; + + // This is always negatible for free but we might be able to remove some + // extra operand negations as well. + SmallVector NewOps(Op.getNumOperands(), SDValue()); + for (int i = 0; i != 3; ++i) { + char V = isNegatibleForFree(Op.getOperand(i), DAG, LegalOperations, + ForCodeSize, Depth + 1); + if (V == 2) + NewOps[i] = getNegatedExpression(Op.getOperand(i), DAG, LegalOperations, + ForCodeSize, Depth + 1); + } + + bool NegA = !!NewOps[0]; + bool NegB = !!NewOps[1]; + bool NegC = !!NewOps[2]; + unsigned NewOpc = negateFMAOpcode(Opc, NegA != NegB, NegC, true); + + // Fill in the non-negated ops with the original values. + for (int i = 0, e = Op.getNumOperands(); i != e; ++i) + if (!NewOps[i]) + NewOps[i] = Op.getOperand(i); + return DAG.getNode(NewOpc, SDLoc(Op), VT, NewOps); + } + } + + return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations, + ForCodeSize, Depth); +} + static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT VT = N->getSimpleValueType(0); @@ -41312,8 +41917,8 @@ static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG, ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) { assert(InVT.is128BitVector() && "Expected 128-bit input vector"); LoadSDNode *LN = cast(N->getOperand(0)); - // Unless the load is volatile. - if (!LN->isVolatile()) { + // Unless the load is volatile or atomic. + if (LN->isSimple()) { SDLoc dl(N); unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements(); MVT MemVT = MVT::getIntegerVT(NumBits); @@ -41347,8 +41952,8 @@ static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG, ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) { assert(InVT.is128BitVector() && "Expected 128-bit input vector"); LoadSDNode *LN = cast(N->getOperand(0)); - // Unless the load is volatile. - if (!LN->isVolatile()) { + // Unless the load is volatile or atomic. + if (LN->isSimple()) { SDLoc dl(N); unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements(); MVT MemVT = MVT::getFloatingPointVT(NumBits); @@ -41724,127 +42329,6 @@ combineToExtendBoolVectorInReg(SDNode *N, SelectionDAG &DAG, DAG.getConstant(EltSizeInBits - 1, DL, VT)); } -/// Convert a SEXT or ZEXT of a vector to a SIGN_EXTEND_VECTOR_INREG or -/// ZERO_EXTEND_VECTOR_INREG, this requires the splitting (or concatenating -/// with UNDEFs) of the input to vectors of the same size as the target type -/// which then extends the lowest elements. -static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget &Subtarget) { - if (ExperimentalVectorWideningLegalization) - return SDValue(); - - unsigned Opcode = N->getOpcode(); - // TODO - add ANY_EXTEND support. - if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND) - return SDValue(); - if (!DCI.isBeforeLegalizeOps()) - return SDValue(); - if (!Subtarget.hasSSE2()) - return SDValue(); - - SDValue N0 = N->getOperand(0); - EVT VT = N->getValueType(0); - EVT SVT = VT.getScalarType(); - EVT InVT = N0.getValueType(); - EVT InSVT = InVT.getScalarType(); - - // FIXME: Generic DAGCombiner previously had a bug that would cause a - // sign_extend of setcc to sometimes return the original node and tricked it - // into thinking CombineTo was used which prevented the target combines from - // running. - // Earlying out here to avoid regressions like this - // (v4i32 (sext (v4i1 (setcc (v4i16))))) - // Becomes - // (v4i32 (sext_invec (v8i16 (concat (v4i16 (setcc (v4i16))), undef)))) - // Type legalized to - // (v4i32 (sext_invec (v8i16 (trunc_invec (v4i32 (setcc (v4i32))))))) - // Leading to a packssdw+pmovsxwd - // We could write a DAG combine to fix this, but really we shouldn't be - // creating sext_invec that's forcing v8i16 into the DAG. - if (N0.getOpcode() == ISD::SETCC) - return SDValue(); - - // Input type must be a vector and we must be extending legal integer types. - if (!VT.isVector() || VT.getVectorNumElements() < 2) - return SDValue(); - if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16) - return SDValue(); - if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8) - return SDValue(); - - // If the input/output types are both legal then we have at least AVX1 and - // we will be able to use SIGN_EXTEND/ZERO_EXTEND directly. - if (DAG.getTargetLoweringInfo().isTypeLegal(VT) && - DAG.getTargetLoweringInfo().isTypeLegal(InVT)) - return SDValue(); - - SDLoc DL(N); - - auto ExtendVecSize = [&DAG](const SDLoc &DL, SDValue N, unsigned Size) { - EVT SrcVT = N.getValueType(); - EVT DstVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), - Size / SrcVT.getScalarSizeInBits()); - SmallVector Opnds(Size / SrcVT.getSizeInBits(), - DAG.getUNDEF(SrcVT)); - Opnds[0] = N; - return DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Opnds); - }; - - // If target-size is less than 128-bits, extend to a type that would extend - // to 128 bits, extend that and extract the original target vector. - if (VT.getSizeInBits() < 128 && !(128 % VT.getSizeInBits())) { - unsigned Scale = 128 / VT.getSizeInBits(); - EVT ExVT = - EVT::getVectorVT(*DAG.getContext(), SVT, 128 / SVT.getSizeInBits()); - SDValue Ex = ExtendVecSize(DL, N0, Scale * InVT.getSizeInBits()); - SDValue SExt = DAG.getNode(Opcode, DL, ExVT, Ex); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SExt, - DAG.getIntPtrConstant(0, DL)); - } - - // If target-size is 128-bits (or 256-bits on AVX target), then convert to - // ISD::*_EXTEND_VECTOR_INREG which ensures lowering to X86ISD::V*EXT. - // Also use this if we don't have SSE41 to allow the legalizer do its job. - if (!Subtarget.hasSSE41() || VT.is128BitVector() || - (VT.is256BitVector() && Subtarget.hasAVX()) || - (VT.is512BitVector() && Subtarget.useAVX512Regs())) { - SDValue ExOp = ExtendVecSize(DL, N0, VT.getSizeInBits()); - Opcode = getOpcode_EXTEND_VECTOR_INREG(Opcode); - return DAG.getNode(Opcode, DL, VT, ExOp); - } - - auto SplitAndExtendInReg = [&](unsigned SplitSize) { - unsigned NumVecs = VT.getSizeInBits() / SplitSize; - unsigned NumSubElts = SplitSize / SVT.getSizeInBits(); - EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts); - EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts); - - unsigned IROpc = getOpcode_EXTEND_VECTOR_INREG(Opcode); - SmallVector Opnds; - for (unsigned i = 0, Offset = 0; i != NumVecs; ++i, Offset += NumSubElts) { - SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0, - DAG.getIntPtrConstant(Offset, DL)); - SrcVec = ExtendVecSize(DL, SrcVec, SplitSize); - SrcVec = DAG.getNode(IROpc, DL, SubVT, SrcVec); - Opnds.push_back(SrcVec); - } - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds); - }; - - // On pre-AVX targets, split into 128-bit nodes of - // ISD::*_EXTEND_VECTOR_INREG. - if (!Subtarget.hasAVX() && !(VT.getSizeInBits() % 128)) - return SplitAndExtendInReg(128); - - // On pre-AVX512 targets, split into 256-bit nodes of - // ISD::*_EXTEND_VECTOR_INREG. - if (!Subtarget.useAVX512Regs() && !(VT.getSizeInBits() % 256)) - return SplitAndExtendInReg(256); - - return SDValue(); -} - // Attempt to combine a (sext/zext (setcc)) to a setcc with a xmm/ymm/zmm // result type. static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG, @@ -41915,9 +42399,6 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::SUB, DL, VT, Zext, DAG.getConstant(1, DL, VT)); } - if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget)) - return V; - if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) return V; @@ -41931,45 +42412,15 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) { - if (NegMul) { - switch (Opcode) { - default: llvm_unreachable("Unexpected opcode"); - case ISD::FMA: Opcode = X86ISD::FNMADD; break; - case X86ISD::FMADD_RND: Opcode = X86ISD::FNMADD_RND; break; - case X86ISD::FMSUB: Opcode = X86ISD::FNMSUB; break; - case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMSUB_RND; break; - case X86ISD::FNMADD: Opcode = ISD::FMA; break; - case X86ISD::FNMADD_RND: Opcode = X86ISD::FMADD_RND; break; - case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break; - case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break; - } - } - - if (NegAcc) { - switch (Opcode) { - default: llvm_unreachable("Unexpected opcode"); - case ISD::FMA: Opcode = X86ISD::FMSUB; break; - case X86ISD::FMADD_RND: Opcode = X86ISD::FMSUB_RND; break; - case X86ISD::FMSUB: Opcode = ISD::FMA; break; - case X86ISD::FMSUB_RND: Opcode = X86ISD::FMADD_RND; break; - case X86ISD::FNMADD: Opcode = X86ISD::FNMSUB; break; - case X86ISD::FNMADD_RND: Opcode = X86ISD::FNMSUB_RND; break; - case X86ISD::FNMSUB: Opcode = X86ISD::FNMADD; break; - case X86ISD::FNMSUB_RND: Opcode = X86ISD::FNMADD_RND; break; - } - } - - return Opcode; -} - static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDLoc dl(N); EVT VT = N->getValueType(0); // Let legalize expand this if it isn't a legal type yet. - if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(VT)) return SDValue(); EVT ScalarVT = VT.getScalarType(); @@ -41980,17 +42431,21 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, SDValue B = N->getOperand(1); SDValue C = N->getOperand(2); - auto invertIfNegative = [&DAG](SDValue &V) { - if (SDValue NegVal = isFNEG(DAG, V.getNode())) { - V = DAG.getBitcast(V.getValueType(), NegVal); + auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) { + bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool LegalOperations = !DCI.isBeforeLegalizeOps(); + if (TLI.isNegatibleForFree(V, DAG, LegalOperations, CodeSize) == 2) { + V = TLI.getNegatedExpression(V, DAG, LegalOperations, CodeSize); return true; } // Look through extract_vector_elts. If it comes from an FNEG, create a // new extract from the FNEG input. if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT && isNullConstant(V.getOperand(1))) { - if (SDValue NegVal = isFNEG(DAG, V.getOperand(0).getNode())) { - NegVal = DAG.getBitcast(V.getOperand(0).getValueType(), NegVal); + SDValue Vec = V.getOperand(0); + if (TLI.isNegatibleForFree(Vec, DAG, LegalOperations, CodeSize) == 2) { + SDValue NegVal = + TLI.getNegatedExpression(Vec, DAG, LegalOperations, CodeSize); V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(), NegVal, V.getOperand(1)); return true; @@ -42009,7 +42464,8 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, if (!NegA && !NegB && !NegC) return SDValue(); - unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC); + unsigned NewOpcode = + negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC, false); if (N->getNumOperands() == 4) return DAG.getNode(NewOpcode, dl, VT, A, B, C, N->getOperand(3)); @@ -42017,33 +42473,27 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, } // Combine FMADDSUB(A, B, FNEG(C)) -> FMSUBADD(A, B, C) +// Combine FMSUBADD(A, B, FNEG(C)) -> FMADDSUB(A, B, C) static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { + TargetLowering::DAGCombinerInfo &DCI) { SDLoc dl(N); EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool LegalOperations = !DCI.isBeforeLegalizeOps(); - SDValue NegVal = isFNEG(DAG, N->getOperand(2).getNode()); - if (!NegVal) + SDValue N2 = N->getOperand(2); + if (TLI.isNegatibleForFree(N2, DAG, LegalOperations, CodeSize) != 2) return SDValue(); - // FIXME: Should we bitcast instead? - if (NegVal.getValueType() != VT) - return SDValue(); - - unsigned NewOpcode; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected opcode!"); - case X86ISD::FMADDSUB: NewOpcode = X86ISD::FMSUBADD; break; - case X86ISD::FMADDSUB_RND: NewOpcode = X86ISD::FMSUBADD_RND; break; - case X86ISD::FMSUBADD: NewOpcode = X86ISD::FMADDSUB; break; - case X86ISD::FMSUBADD_RND: NewOpcode = X86ISD::FMADDSUB_RND; break; - } + SDValue NegN2 = TLI.getNegatedExpression(N2, DAG, LegalOperations, CodeSize); + unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), false, true, false); if (N->getNumOperands() == 4) return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1), - NegVal, N->getOperand(3)); + NegN2, N->getOperand(3)); return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1), - NegVal); + NegN2); } static SDValue combineZext(SDNode *N, SelectionDAG &DAG, @@ -42090,9 +42540,6 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineExtSetcc(N, DAG, Subtarget)) return V; - if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget)) - return V; - if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) return V; @@ -42111,12 +42558,11 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG, VT.getScalarSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits()) { SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); - unsigned NumSrcElts = N00.getValueType().getVectorNumElements(); unsigned NumSrcEltBits = N00.getScalarValueSizeInBits(); APInt ZeroMask = APInt::getHighBitsSet(NumSrcEltBits, NumSrcEltBits / 2); if ((N00.isUndef() || DAG.MaskedValueIsZero(N00, ZeroMask)) && (N01.isUndef() || DAG.MaskedValueIsZero(N01, ZeroMask))) { - return concatSubVectors(N00, N01, VT, NumSrcElts * 2, DAG, dl, 128); + return concatSubVectors(N00, N01, DAG, dl); } } @@ -42159,16 +42605,30 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, !IsOrXorXorCCZero) return SDValue(); - // TODO: Use PXOR + PTEST for SSE4.1 or later? EVT VT = SetCC->getValueType(0); SDLoc DL(SetCC); + bool HasAVX = Subtarget.hasAVX(); + + // Use XOR (plus OR) and PTEST after SSE4.1 and before AVX512. + // Otherwise use PCMPEQ (plus AND) and mask testing. if ((OpSize == 128 && Subtarget.hasSSE2()) || - (OpSize == 256 && Subtarget.hasAVX2()) || + (OpSize == 256 && HasAVX) || (OpSize == 512 && Subtarget.useAVX512Regs())) { - EVT VecVT = OpSize == 512 ? MVT::v16i32 : - OpSize == 256 ? MVT::v32i8 : - MVT::v16i8; - EVT CmpVT = OpSize == 512 ? MVT::v16i1 : VecVT; + bool HasPT = Subtarget.hasSSE41(); + EVT VecVT = MVT::v16i8; + EVT CmpVT = MVT::v16i8; + if (OpSize == 256) + VecVT = CmpVT = MVT::v32i8; + if (OpSize == 512) { + if (Subtarget.hasBWI()) { + VecVT = MVT::v64i8; + CmpVT = MVT::v64i1; + } else { + VecVT = MVT::v16i32; + CmpVT = MVT::v16i1; + } + } + SDValue Cmp; if (IsOrXorXorCCZero) { // This is a bitwise-combined equality comparison of 2 pairs of vectors: @@ -42179,18 +42639,38 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, SDValue B = DAG.getBitcast(VecVT, X.getOperand(0).getOperand(1)); SDValue C = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(0)); SDValue D = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(1)); - SDValue Cmp1 = DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ); - SDValue Cmp2 = DAG.getSetCC(DL, CmpVT, C, D, ISD::SETEQ); - Cmp = DAG.getNode(ISD::AND, DL, CmpVT, Cmp1, Cmp2); + if (VecVT == CmpVT && HasPT) { + SDValue Cmp1 = DAG.getNode(ISD::XOR, DL, VecVT, A, B); + SDValue Cmp2 = DAG.getNode(ISD::XOR, DL, VecVT, C, D); + Cmp = DAG.getNode(ISD::OR, DL, VecVT, Cmp1, Cmp2); + } else { + SDValue Cmp1 = DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ); + SDValue Cmp2 = DAG.getSetCC(DL, CmpVT, C, D, ISD::SETEQ); + Cmp = DAG.getNode(ISD::AND, DL, CmpVT, Cmp1, Cmp2); + } } else { SDValue VecX = DAG.getBitcast(VecVT, X); SDValue VecY = DAG.getBitcast(VecVT, Y); - Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ); + if (VecVT == CmpVT && HasPT) { + Cmp = DAG.getNode(ISD::XOR, DL, VecVT, VecX, VecY); + } else { + Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ); + } } // For 512-bits we want to emit a setcc that will lower to kortest. - if (OpSize == 512) - return DAG.getSetCC(DL, VT, DAG.getBitcast(MVT::i16, Cmp), - DAG.getConstant(0xFFFF, DL, MVT::i16), CC); + if (VecVT != CmpVT) { + EVT KRegVT = CmpVT == MVT::v64i1 ? MVT::i64 : MVT::i16; + SDValue Mask = DAG.getAllOnesConstant(DL, KRegVT); + return DAG.getSetCC(DL, VT, DAG.getBitcast(KRegVT, Cmp), Mask, CC); + } + if (HasPT) { + SDValue BCCmp = DAG.getBitcast(OpSize == 256 ? MVT::v4i64 : MVT::v2i64, + Cmp); + SDValue PT = DAG.getNode(X86ISD::PTEST, DL, MVT::i32, BCCmp, BCCmp); + X86::CondCode X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE; + SDValue SetCC = getSETCC(X86CC, PT, DL, DAG); + return DAG.getNode(ISD::TRUNCATE, DL, VT, SetCC.getValue(0)); + } // If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality. // setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq // setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne @@ -42270,8 +42750,6 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, // go through type promotion to a 128-bit vector. if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() && VT.getVectorElementType() == MVT::i1 && - (ExperimentalVectorWideningLegalization || - VT.getVectorNumElements() > 4) && (OpVT.getVectorElementType() == MVT::i8 || OpVT.getVectorElementType() == MVT::i16)) { SDValue Setcc = DAG.getNode(ISD::SETCC, DL, OpVT, LHS, RHS, @@ -42289,7 +42767,8 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, } static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { SDValue Src = N->getOperand(0); MVT SrcVT = Src.getSimpleValueType(); MVT VT = N->getSimpleValueType(0); @@ -42310,7 +42789,7 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, // Look through int->fp bitcasts that don't change the element width. unsigned EltWidth = SrcVT.getScalarSizeInBits(); - if (Src.getOpcode() == ISD::BITCAST && + if (Subtarget.hasSSE2() && Src.getOpcode() == ISD::BITCAST && Src.getOperand(0).getScalarValueSizeInBits() == EltWidth) return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), VT, Src.getOperand(0)); @@ -42334,71 +42813,123 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + // With vector masks we only demand the upper bit of the mask. + SDValue Mask = cast(N)->getMask(); + if (Mask.getScalarValueSizeInBits() != 1) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits())); + if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) + return SDValue(N, 0); + } + + return SDValue(); +} + static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget &Subtarget) { + TargetLowering::DAGCombinerInfo &DCI) { SDLoc DL(N); + auto *GorS = cast(N); + SDValue Chain = GorS->getChain(); + SDValue Index = GorS->getIndex(); + SDValue Mask = GorS->getMask(); + SDValue Base = GorS->getBasePtr(); + SDValue Scale = GorS->getScale(); - if (DCI.isBeforeLegalizeOps()) { - SDValue Index = N->getOperand(4); - // Remove any sign extends from 32 or smaller to larger than 32. - // Only do this before LegalizeOps in case we need the sign extend for - // legalization. - if (Index.getOpcode() == ISD::SIGN_EXTEND) { - if (Index.getScalarValueSizeInBits() > 32 && - Index.getOperand(0).getScalarValueSizeInBits() <= 32) { - SmallVector NewOps(N->op_begin(), N->op_end()); - NewOps[4] = Index.getOperand(0); - SDNode *Res = DAG.UpdateNodeOperands(N, NewOps); - if (Res == N) { - // The original sign extend has less users, add back to worklist in - // case it needs to be removed - DCI.AddToWorklist(Index.getNode()); - DCI.AddToWorklist(N); + if (DCI.isBeforeLegalize()) { + unsigned IndexWidth = Index.getScalarValueSizeInBits(); + + // Shrink constant indices if they are larger than 32-bits. + // Only do this before legalize types since v2i64 could become v2i32. + // FIXME: We could check that the type is legal if we're after legalize + // types, but then we would need to construct test cases where that happens. + // FIXME: We could support more than just constant vectors, but we need to + // careful with costing. A truncate that can be optimized out would be fine. + // Otherwise we might only want to create a truncate if it avoids a split. + if (auto *BV = dyn_cast(Index)) { + if (BV->isConstant() && IndexWidth > 32 && + DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) { + unsigned NumElts = Index.getValueType().getVectorNumElements(); + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); + Index = DAG.getNode(ISD::TRUNCATE, DL, NewVT, Index); + if (auto *Gather = dyn_cast(GorS)) { + SDValue Ops[] = { Chain, Gather->getPassThru(), + Mask, Base, Index, Scale } ; + return DAG.getMaskedGather(Gather->getVTList(), + Gather->getMemoryVT(), DL, Ops, + Gather->getMemOperand(), + Gather->getIndexType()); } - return SDValue(Res, 0); + auto *Scatter = cast(GorS); + SDValue Ops[] = { Chain, Scatter->getValue(), + Mask, Base, Index, Scale }; + return DAG.getMaskedScatter(Scatter->getVTList(), + Scatter->getMemoryVT(), DL, + Ops, Scatter->getMemOperand(), + Scatter->getIndexType()); } } - // Make sure the index is either i32 or i64 - unsigned ScalarSize = Index.getScalarValueSizeInBits(); - if (ScalarSize != 32 && ScalarSize != 64) { - MVT EltVT = ScalarSize > 32 ? MVT::i64 : MVT::i32; - EVT IndexVT = EVT::getVectorVT(*DAG.getContext(), EltVT, - Index.getValueType().getVectorNumElements()); - Index = DAG.getSExtOrTrunc(Index, DL, IndexVT); - SmallVector NewOps(N->op_begin(), N->op_end()); - NewOps[4] = Index; - SDNode *Res = DAG.UpdateNodeOperands(N, NewOps); - if (Res == N) - DCI.AddToWorklist(N); - return SDValue(Res, 0); - } - - // Try to remove zero extends from 32->64 if we know the sign bit of - // the input is zero. - if (Index.getOpcode() == ISD::ZERO_EXTEND && - Index.getScalarValueSizeInBits() == 64 && - Index.getOperand(0).getScalarValueSizeInBits() == 32) { - if (DAG.SignBitIsZero(Index.getOperand(0))) { - SmallVector NewOps(N->op_begin(), N->op_end()); - NewOps[4] = Index.getOperand(0); - SDNode *Res = DAG.UpdateNodeOperands(N, NewOps); - if (Res == N) { - // The original sign extend has less users, add back to worklist in - // case it needs to be removed - DCI.AddToWorklist(Index.getNode()); - DCI.AddToWorklist(N); - } - return SDValue(Res, 0); + // Shrink any sign/zero extends from 32 or smaller to larger than 32 if + // there are sufficient sign bits. Only do this before legalize types to + // avoid creating illegal types in truncate. + if ((Index.getOpcode() == ISD::SIGN_EXTEND || + Index.getOpcode() == ISD::ZERO_EXTEND) && + IndexWidth > 32 && + Index.getOperand(0).getScalarValueSizeInBits() <= 32 && + DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) { + unsigned NumElts = Index.getValueType().getVectorNumElements(); + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); + Index = DAG.getNode(ISD::TRUNCATE, DL, NewVT, Index); + if (auto *Gather = dyn_cast(GorS)) { + SDValue Ops[] = { Chain, Gather->getPassThru(), + Mask, Base, Index, Scale } ; + return DAG.getMaskedGather(Gather->getVTList(), + Gather->getMemoryVT(), DL, Ops, + Gather->getMemOperand(), + Gather->getIndexType()); } + auto *Scatter = cast(GorS); + SDValue Ops[] = { Chain, Scatter->getValue(), + Mask, Base, Index, Scale }; + return DAG.getMaskedScatter(Scatter->getVTList(), + Scatter->getMemoryVT(), DL, + Ops, Scatter->getMemOperand(), + Scatter->getIndexType()); } } - // With AVX2 we only demand the upper bit of the mask. - if (!Subtarget.hasAVX512()) { + if (DCI.isBeforeLegalizeOps()) { + unsigned IndexWidth = Index.getScalarValueSizeInBits(); + + // Make sure the index is either i32 or i64 + if (IndexWidth != 32 && IndexWidth != 64) { + MVT EltVT = IndexWidth > 32 ? MVT::i64 : MVT::i32; + EVT IndexVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + Index.getValueType().getVectorNumElements()); + Index = DAG.getSExtOrTrunc(Index, DL, IndexVT); + if (auto *Gather = dyn_cast(GorS)) { + SDValue Ops[] = { Chain, Gather->getPassThru(), + Mask, Base, Index, Scale } ; + return DAG.getMaskedGather(Gather->getVTList(), + Gather->getMemoryVT(), DL, Ops, + Gather->getMemOperand(), + Gather->getIndexType()); + } + auto *Scatter = cast(GorS); + SDValue Ops[] = { Chain, Scatter->getValue(), + Mask, Base, Index, Scale }; + return DAG.getMaskedScatter(Scatter->getVTList(), + Scatter->getMemoryVT(), DL, + Ops, Scatter->getMemOperand(), + Scatter->getIndexType()); + } + } + + // With vector masks we only demand the upper bit of the mask. + if (Mask.getScalarValueSizeInBits() != 1) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue Mask = N->getOperand(2); APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits())); if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) return SDValue(N, 0); @@ -42432,7 +42963,7 @@ static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG, // Make sure to not keep references to operands, as combineSetCCEFLAGS can // RAUW them under us. if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG, Subtarget)) { - SDValue Cond = DAG.getConstant(CC, DL, MVT::i8); + SDValue Cond = DAG.getTargetConstant(CC, DL, MVT::i8); return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0), N->getOperand(1), Cond, Flags); } @@ -42549,6 +43080,7 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG, } static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { // First try to optimize away the conversion entirely when it's // conditionally from a constant. Vectors only. @@ -42578,13 +43110,22 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, unsigned BitWidth = InVT.getScalarSizeInBits(); unsigned NumSignBits = DAG.ComputeNumSignBits(Op0); if (NumSignBits >= (BitWidth - 31)) { - EVT TruncVT = EVT::getIntegerVT(*DAG.getContext(), 32); + EVT TruncVT = MVT::i32; if (InVT.isVector()) TruncVT = EVT::getVectorVT(*DAG.getContext(), TruncVT, InVT.getVectorNumElements()); SDLoc dl(N); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0); - return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc); + if (DCI.isBeforeLegalize() || TruncVT != MVT::v2i32) { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0); + return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc); + } + // If we're after legalize and the type is v2i32 we need to shuffle and + // use CVTSI2P. + assert(InVT == MVT::v2i64 && "Unexpected VT!"); + SDValue Cast = DAG.getBitcast(MVT::v4i32, Op0); + SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Cast, Cast, + { 0, 2, -1, -1 }); + return DAG.getNode(X86ISD::CVTSI2P, dl, VT, Shuf); } } @@ -42604,7 +43145,7 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, if (Subtarget.hasDQI() && VT != MVT::f80) return SDValue(); - if (!Ld->isVolatile() && !VT.isVector() && + if (Ld->isSimple() && !VT.isVector() && ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() && !Subtarget.is64Bit() && LdVT == MVT::i64) { SDValue FILDChain = Subtarget.getTargetLowering()->BuildFILD( @@ -42841,12 +43382,12 @@ static SDValue combineADC(SDNode *N, SelectionDAG &DAG, SDLoc DL(N); EVT VT = N->getValueType(0); SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1)); - SDValue Res1 = DAG.getNode(ISD::AND, DL, VT, - DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, - DAG.getConstant(X86::COND_B, DL, - MVT::i8), - N->getOperand(2)), - DAG.getConstant(1, DL, VT)); + SDValue Res1 = + DAG.getNode(ISD::AND, DL, VT, + DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), + N->getOperand(2)), + DAG.getConstant(1, DL, VT)); return DCI.CombineTo(N, Res1, CarryOut); } @@ -42906,7 +43447,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { // -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax // 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, - DAG.getConstant(X86::COND_B, DL, MVT::i8), + DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), Y.getOperand(1)); } @@ -42924,7 +43465,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { EFLAGS.getOperand(1), EFLAGS.getOperand(0)); SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, - DAG.getConstant(X86::COND_B, DL, MVT::i8), + DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), NewEFLAGS); } } @@ -42984,7 +43525,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z); return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, - DAG.getConstant(X86::COND_B, DL, MVT::i8), + DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), SDValue(Neg.getNode(), 1)); } @@ -42997,7 +43538,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { SDValue One = DAG.getConstant(1, DL, ZVT); SDValue Cmp1 = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z, One); return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, - DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp1); + DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), Cmp1); } } @@ -43025,9 +43566,6 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG, if (!Subtarget.hasSSE2()) return SDValue(); - SDValue Op0 = N->getOperand(0); - SDValue Op1 = N->getOperand(1); - EVT VT = N->getValueType(0); // If the vector size is less than 128, or greater than the supported RegSize, @@ -43035,14 +43573,27 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG, if (!VT.isVector() || VT.getVectorNumElements() < 8) return SDValue(); - if (Op0.getOpcode() != ISD::MUL) - std::swap(Op0, Op1); - if (Op0.getOpcode() != ISD::MUL) - return SDValue(); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); - ShrinkMode Mode; - if (!canReduceVMulWidth(Op0.getNode(), DAG, Mode) || Mode == MULU16) - return SDValue(); + auto UsePMADDWD = [&](SDValue Op) { + ShrinkMode Mode; + return Op.getOpcode() == ISD::MUL && + canReduceVMulWidth(Op.getNode(), DAG, Mode) && Mode != MULU16 && + (!Subtarget.hasSSE41() || + (Op->isOnlyUserOf(Op.getOperand(0).getNode()) && + Op->isOnlyUserOf(Op.getOperand(1).getNode()))); + }; + + SDValue MulOp, OtherOp; + if (UsePMADDWD(Op0)) { + MulOp = Op0; + OtherOp = Op1; + } else if (UsePMADDWD(Op1)) { + MulOp = Op1; + OtherOp = Op0; + } else + return SDValue(); SDLoc DL(N); EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, @@ -43050,34 +43601,27 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG, EVT MAddVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, VT.getVectorNumElements() / 2); + // Shrink the operands of mul. + SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(0)); + SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(1)); + // Madd vector size is half of the original vector size auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, ArrayRef Ops) { MVT OpVT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32); return DAG.getNode(X86ISD::VPMADDWD, DL, OpVT, Ops); }; + SDValue Madd = SplitOpsAndApply(DAG, Subtarget, DL, MAddVT, { N0, N1 }, + PMADDWDBuilder); + // Fill the rest of the output with 0 + SDValue Zero = DAG.getConstant(0, DL, Madd.getSimpleValueType()); + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd, Zero); - auto BuildPMADDWD = [&](SDValue Mul) { - // Shrink the operands of mul. - SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, Mul.getOperand(0)); - SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, Mul.getOperand(1)); - - SDValue Madd = SplitOpsAndApply(DAG, Subtarget, DL, MAddVT, { N0, N1 }, - PMADDWDBuilder); - // Fill the rest of the output with 0 - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd, - DAG.getConstant(0, DL, MAddVT)); - }; - - Op0 = BuildPMADDWD(Op0); - - // It's possible that Op1 is also a mul we can reduce. - if (Op1.getOpcode() == ISD::MUL && - canReduceVMulWidth(Op1.getNode(), DAG, Mode) && Mode != MULU16) { - Op1 = BuildPMADDWD(Op1); - } - - return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); + // Preserve the reduction flag on the ADD. We may need to revisit for the + // other operand. + SDNodeFlags Flags; + Flags.setVectorReduction(true); + return DAG.getNode(ISD::ADD, DL, VT, Concat, OtherOp, Flags); } static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, @@ -43087,8 +43631,6 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, SDLoc DL(N); EVT VT = N->getValueType(0); - SDValue Op0 = N->getOperand(0); - SDValue Op1 = N->getOperand(1); // TODO: There's nothing special about i32, any integer type above i16 should // work just as well. @@ -43108,80 +43650,53 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, if (VT.getSizeInBits() / 4 > RegSize) return SDValue(); - // We know N is a reduction add, which means one of its operands is a phi. - // To match SAD, we need the other operand to be a ABS. - if (Op0.getOpcode() != ISD::ABS) - std::swap(Op0, Op1); - if (Op0.getOpcode() != ISD::ABS) + // We know N is a reduction add. To match SAD, we need one of the operands to + // be an ABS. + SDValue AbsOp = N->getOperand(0); + SDValue OtherOp = N->getOperand(1); + if (AbsOp.getOpcode() != ISD::ABS) + std::swap(AbsOp, OtherOp); + if (AbsOp.getOpcode() != ISD::ABS) return SDValue(); - auto BuildPSADBW = [&](SDValue Op0, SDValue Op1) { - // SAD pattern detected. Now build a SAD instruction and an addition for - // reduction. Note that the number of elements of the result of SAD is less - // than the number of elements of its input. Therefore, we could only update - // part of elements in the reduction vector. - SDValue Sad = createPSADBW(DAG, Op0, Op1, DL, Subtarget); - - // The output of PSADBW is a vector of i64. - // We need to turn the vector of i64 into a vector of i32. - // If the reduction vector is at least as wide as the psadbw result, just - // bitcast. If it's narrower, truncate - the high i32 of each i64 is zero - // anyway. - MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32); - if (VT.getSizeInBits() >= ResVT.getSizeInBits()) - Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad); - else - Sad = DAG.getNode(ISD::TRUNCATE, DL, VT, Sad); - - if (VT.getSizeInBits() > ResVT.getSizeInBits()) { - // Fill the upper elements with zero to match the add width. - SDValue Zero = DAG.getConstant(0, DL, VT); - Sad = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Zero, Sad, - DAG.getIntPtrConstant(0, DL)); - } - - return Sad; - }; - // Check whether we have an abs-diff pattern feeding into the select. SDValue SadOp0, SadOp1; - if (!detectZextAbsDiff(Op0, SadOp0, SadOp1)) + if(!detectZextAbsDiff(AbsOp, SadOp0, SadOp1)) return SDValue(); - Op0 = BuildPSADBW(SadOp0, SadOp1); + // SAD pattern detected. Now build a SAD instruction and an addition for + // reduction. Note that the number of elements of the result of SAD is less + // than the number of elements of its input. Therefore, we could only update + // part of elements in the reduction vector. + SDValue Sad = createPSADBW(DAG, SadOp0, SadOp1, DL, Subtarget); - // It's possible we have a sad on the other side too. - if (Op1.getOpcode() == ISD::ABS && - detectZextAbsDiff(Op1, SadOp0, SadOp1)) { - Op1 = BuildPSADBW(SadOp0, SadOp1); + // The output of PSADBW is a vector of i64. + // We need to turn the vector of i64 into a vector of i32. + // If the reduction vector is at least as wide as the psadbw result, just + // bitcast. If it's narrower which can only occur for v2i32, bits 127:16 of + // the PSADBW will be zero. If we promote/ narrow vectors, truncate the v2i64 + // result to v2i32 which will be removed by type legalization. If we/ widen + // narrow vectors then we bitcast to v4i32 and extract v2i32. + MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32); + Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad); + + if (VT.getSizeInBits() > ResVT.getSizeInBits()) { + // Fill the upper elements with zero to match the add width. + assert(VT.getSizeInBits() % ResVT.getSizeInBits() == 0 && "Unexpected VTs"); + unsigned NumConcats = VT.getSizeInBits() / ResVT.getSizeInBits(); + SmallVector Ops(NumConcats, DAG.getConstant(0, DL, ResVT)); + Ops[0] = Sad; + Sad = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops); + } else if (VT.getSizeInBits() < ResVT.getSizeInBits()) { + Sad = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Sad, + DAG.getIntPtrConstant(0, DL)); } - return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); -} - -/// Convert vector increment or decrement to sub/add with an all-ones constant: -/// add X, <1, 1...> --> sub X, <-1, -1...> -/// sub X, <1, 1...> --> add X, <-1, -1...> -/// The all-ones vector constant can be materialized using a pcmpeq instruction -/// that is commonly recognized as an idiom (has no register dependency), so -/// that's better/smaller than loading a splat 1 constant. -static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) { - assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && - "Unexpected opcode for increment/decrement transform"); - - // Pseudo-legality check: getOnesVector() expects one of these types, so bail - // out and wait for legalization if we have an unsupported vector length. - EVT VT = N->getValueType(0); - if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector()) - return SDValue(); - - APInt SplatVal; - if (!isConstantSplat(N->getOperand(1), SplatVal) || !SplatVal.isOneValue()) - return SDValue(); - - SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N)); - unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD; - return DAG.getNode(NewOpcode, SDLoc(N), VT, N->getOperand(0), AllOnesVec); + // Preserve the reduction flag on the ADD. We may need to revisit for the + // other operand. + SDNodeFlags Flags; + Flags.setVectorReduction(true); + return DAG.getNode(ISD::ADD, DL, VT, Sad, OtherOp, Flags); } static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1, @@ -43294,8 +43809,8 @@ static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1, } // Attempt to turn this pattern into PMADDWD. -// (mul (add (zext (build_vector)), (zext (build_vector))), -// (add (zext (build_vector)), (zext (build_vector))) +// (mul (add (sext (build_vector)), (sext (build_vector))), +// (add (sext (build_vector)), (sext (build_vector))) static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1, const SDLoc &DL, EVT VT, const X86Subtarget &Subtarget) { @@ -43415,6 +43930,7 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1, } static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { const SDNodeFlags Flags = N->getFlags(); if (Flags.hasVectorReduction()) { @@ -43445,8 +43961,29 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, HADDBuilder); } - if (SDValue V = combineIncDecVector(N, DAG)) - return V; + // If vectors of i1 are legal, turn (add (zext (vXi1 X)), Y) into + // (sub Y, (sext (vXi1 X))). + // FIXME: We have the (sub Y, (zext (vXi1 X))) -> (add (sext (vXi1 X)), Y) in + // generic DAG combine without a legal type check, but adding this there + // caused regressions. + if (VT.isVector()) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (Op0.getOpcode() == ISD::ZERO_EXTEND && + Op0.getOperand(0).getValueType().getVectorElementType() == MVT::i1 && + TLI.isTypeLegal(Op0.getOperand(0).getValueType())) { + SDLoc DL(N); + SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op0.getOperand(0)); + return DAG.getNode(ISD::SUB, DL, VT, Op1, SExt); + } + + if (Op1.getOpcode() == ISD::ZERO_EXTEND && + Op1.getOperand(0).getValueType().getVectorElementType() == MVT::i1 && + TLI.isTypeLegal(Op1.getOperand(0).getValueType())) { + SDLoc DL(N); + SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op1.getOperand(0)); + return DAG.getNode(ISD::SUB, DL, VT, Op0, SExt); + } + } return combineAddOrSubToADCOrSBB(N, DAG); } @@ -43457,13 +43994,15 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, SDValue Op1 = N->getOperand(1); EVT VT = N->getValueType(0); + if (!VT.isVector()) + return SDValue(); + // PSUBUS is supported, starting from SSE2, but truncation for v8i32 // is only worth it with SSSE3 (PSHUFB). - if (!(Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) && + EVT EltVT = VT.getVectorElementType(); + if (!(Subtarget.hasSSE2() && (EltVT == MVT::i8 || EltVT == MVT::i16)) && !(Subtarget.hasSSSE3() && (VT == MVT::v8i32 || VT == MVT::v8i64)) && - !(Subtarget.hasAVX() && (VT == MVT::v32i8 || VT == MVT::v16i16)) && - !(Subtarget.useBWIRegs() && (VT == MVT::v64i8 || VT == MVT::v32i16 || - VT == MVT::v16i32 || VT == MVT::v8i64))) + !(Subtarget.useBWIRegs() && (VT == MVT::v16i32))) return SDValue(); SDValue SubusLHS, SubusRHS; @@ -43493,16 +44032,13 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, } else return SDValue(); - auto USUBSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL, - ArrayRef Ops) { - return DAG.getNode(ISD::USUBSAT, DL, Ops[0].getValueType(), Ops); - }; - // PSUBUS doesn't support v8i32/v8i64/v16i32, but it can be enabled with // special preprocessing in some cases. - if (VT != MVT::v8i32 && VT != MVT::v16i32 && VT != MVT::v8i64) - return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, - { SubusLHS, SubusRHS }, USUBSATBuilder); + if (EltVT == MVT::i8 || EltVT == MVT::i16) + return DAG.getNode(ISD::USUBSAT, SDLoc(N), VT, SubusLHS, SubusRHS); + + assert((VT == MVT::v8i32 || VT == MVT::v16i32 || VT == MVT::v8i64) && + "Unexpected VT!"); // Special preprocessing case can be only applied // if the value was zero extended from 16 bit, @@ -43531,15 +44067,16 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, SDValue NewSubusLHS = DAG.getZExtOrTrunc(SubusLHS, SDLoc(SubusLHS), ShrinkedType); SDValue NewSubusRHS = DAG.getZExtOrTrunc(UMin, SDLoc(SubusRHS), ShrinkedType); - SDValue Psubus = - SplitOpsAndApply(DAG, Subtarget, SDLoc(N), ShrinkedType, - { NewSubusLHS, NewSubusRHS }, USUBSATBuilder); + SDValue Psubus = DAG.getNode(ISD::USUBSAT, SDLoc(N), ShrinkedType, + NewSubusLHS, NewSubusRHS); + // Zero extend the result, it may be used somewhere as 32 bit, // if not zext and following trunc will shrink. return DAG.getZExtOrTrunc(Psubus, SDLoc(N), ExtType); } static SDValue combineSub(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); @@ -43576,9 +44113,6 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, HSUBBuilder); } - if (SDValue V = combineIncDecVector(N, DAG)) - return V; - // Try to create PSUBUS if SUB's argument is max/min if (SDValue V = combineSubToSubus(N, DAG, Subtarget)) return V; @@ -43712,14 +44246,6 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, } } - // If we're inserting all zeros into the upper half, change this to - // an insert into an all zeros vector. We will match this to a move - // with implicit upper bit zeroing during isel. - if (Ops.size() == 2 && ISD::isBuildVectorAllZeros(Ops[1].getNode())) - return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, - getZeroVector(VT, Subtarget, DAG, DL), Ops[0], - DAG.getIntPtrConstant(0, DL)); - return SDValue(); } @@ -43786,10 +44312,10 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, // least as large as the original insertion. Just insert the original // subvector into a zero vector. if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && IdxVal == 0 && - SubVec.getConstantOperandAPInt(1) == 0 && + isNullConstant(SubVec.getOperand(1)) && SubVec.getOperand(0).getOpcode() == ISD::INSERT_SUBVECTOR) { SDValue Ins = SubVec.getOperand(0); - if (Ins.getConstantOperandAPInt(2) == 0 && + if (isNullConstant(Ins.getOperand(2)) && ISD::isBuildVectorAllZeros(Ins.getOperand(0).getNode()) && Ins.getOperand(1).getValueSizeInBits() <= SubVecVT.getSizeInBits()) return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, @@ -43825,31 +44351,42 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, // Match concat_vector style patterns. SmallVector SubVectorOps; - if (collectConcatOps(N, SubVectorOps)) + if (collectConcatOps(N, SubVectorOps)) { if (SDValue Fold = combineConcatVectorOps(dl, OpVT, SubVectorOps, DAG, DCI, Subtarget)) return Fold; - // If we are inserting into both halves of the vector, the starting vector - // should be undef. If it isn't, make it so. Only do this if the early insert - // has no other uses. - // TODO: Should this be a generic DAG combine? - // TODO: Why doesn't SimplifyDemandedVectorElts catch this? - if ((IdxVal == OpVT.getVectorNumElements() / 2) && - Vec.getOpcode() == ISD::INSERT_SUBVECTOR && - OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2 && - isNullConstant(Vec.getOperand(2)) && !Vec.getOperand(0).isUndef() && - Vec.hasOneUse()) { - Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT), - Vec.getOperand(1), Vec.getOperand(2)); - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, SubVec, - N->getOperand(2)); + // If we're inserting all zeros into the upper half, change this to + // a concat with zero. We will match this to a move + // with implicit upper bit zeroing during isel. + // We do this here because we don't want combineConcatVectorOps to + // create INSERT_SUBVECTOR from CONCAT_VECTORS. + if (SubVectorOps.size() == 2 && + ISD::isBuildVectorAllZeros(SubVectorOps[1].getNode())) + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, + getZeroVector(OpVT, Subtarget, DAG, dl), + SubVectorOps[0], DAG.getIntPtrConstant(0, dl)); } // If this is a broadcast insert into an upper undef, use a larger broadcast. if (Vec.isUndef() && IdxVal != 0 && SubVec.getOpcode() == X86ISD::VBROADCAST) return DAG.getNode(X86ISD::VBROADCAST, dl, OpVT, SubVec.getOperand(0)); + // If this is a broadcast load inserted into an upper undef, use a larger + // broadcast load. + if (Vec.isUndef() && IdxVal != 0 && SubVec.hasOneUse() && + SubVec.getOpcode() == X86ISD::VBROADCAST_LOAD) { + auto *MemIntr = cast(SubVec); + SDVTList Tys = DAG.getVTList(OpVT, MVT::Other); + SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() }; + SDValue BcastLd = + DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, + MemIntr->getMemoryVT(), + MemIntr->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1)); + return BcastLd; + } + return SDValue(); } @@ -43928,12 +44465,15 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, return SDValue(); MVT VT = N->getSimpleValueType(0); - EVT WideVecVT = N->getOperand(0).getValueType(); - SDValue WideVec = peekThroughBitcasts(N->getOperand(0)); + SDValue InVec = N->getOperand(0); + SDValue InVecBC = peekThroughBitcasts(InVec); + EVT InVecVT = InVec.getValueType(); + EVT InVecBCVT = InVecBC.getValueType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (Subtarget.hasAVX() && !Subtarget.hasAVX2() && - TLI.isTypeLegal(WideVecVT) && - WideVecVT.getSizeInBits() == 256 && WideVec.getOpcode() == ISD::AND) { + TLI.isTypeLegal(InVecVT) && + InVecVT.getSizeInBits() == 256 && InVecBC.getOpcode() == ISD::AND) { auto isConcatenatedNot = [] (SDValue V) { V = peekThroughBitcasts(V); if (!isBitwiseNot(V)) @@ -43941,12 +44481,12 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, SDValue NotOp = V->getOperand(0); return peekThroughBitcasts(NotOp).getOpcode() == ISD::CONCAT_VECTORS; }; - if (isConcatenatedNot(WideVec.getOperand(0)) || - isConcatenatedNot(WideVec.getOperand(1))) { + if (isConcatenatedNot(InVecBC.getOperand(0)) || + isConcatenatedNot(InVecBC.getOperand(1))) { // extract (and v4i64 X, (not (concat Y1, Y2))), n -> andnp v2i64 X(n), Y1 - SDValue Concat = split256IntArith(WideVec, DAG); + SDValue Concat = split256IntArith(InVecBC, DAG); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, - DAG.getBitcast(WideVecVT, Concat), N->getOperand(1)); + DAG.getBitcast(InVecVT, Concat), N->getOperand(1)); } } @@ -43956,7 +44496,6 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, if (SDValue V = narrowExtractedVectorSelect(N, DAG)) return V; - SDValue InVec = N->getOperand(0); unsigned IdxVal = cast(N->getOperand(1))->getZExtValue(); if (ISD::isBuildVectorAllZeros(InVec.getNode())) @@ -43976,31 +44515,42 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, // Try to move vector bitcast after extract_subv by scaling extraction index: // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index') // TODO: Move this to DAGCombiner::visitEXTRACT_SUBVECTOR - if (InVec.getOpcode() == ISD::BITCAST && - InVec.getOperand(0).getValueType().isVector()) { - SDValue SrcOp = InVec.getOperand(0); - EVT SrcVT = SrcOp.getValueType(); - unsigned SrcNumElts = SrcVT.getVectorNumElements(); - unsigned DestNumElts = InVec.getValueType().getVectorNumElements(); + if (InVec != InVecBC && InVecBCVT.isVector()) { + unsigned SrcNumElts = InVecBCVT.getVectorNumElements(); + unsigned DestNumElts = InVecVT.getVectorNumElements(); if ((DestNumElts % SrcNumElts) == 0) { unsigned DestSrcRatio = DestNumElts / SrcNumElts; if ((VT.getVectorNumElements() % DestSrcRatio) == 0) { unsigned NewExtNumElts = VT.getVectorNumElements() / DestSrcRatio; EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), - SrcVT.getScalarType(), NewExtNumElts); + InVecBCVT.getScalarType(), NewExtNumElts); if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0 && TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) { unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio; SDLoc DL(N); SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL); SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT, - SrcOp, NewIndex); + InVecBC, NewIndex); return DAG.getBitcast(VT, NewExtract); } } } } + // If we are extracting from an insert into a zero vector, replace with a + // smaller insert into zero if we don't access less than the original + // subvector. Don't do this for i1 vectors. + if (VT.getVectorElementType() != MVT::i1 && + InVec.getOpcode() == ISD::INSERT_SUBVECTOR && IdxVal == 0 && + InVec.hasOneUse() && isNullConstant(InVec.getOperand(2)) && + ISD::isBuildVectorAllZeros(InVec.getOperand(0).getNode()) && + InVec.getOperand(1).getValueSizeInBits() <= VT.getSizeInBits()) { + SDLoc DL(N); + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, + getZeroVector(VT, Subtarget, DAG, DL), + InVec.getOperand(1), InVec.getOperand(2)); + } + // If we're extracting from a broadcast then we're better off just // broadcasting to the smaller type directly, assuming this is the only use. // As its a broadcast we don't care about the extraction index. @@ -44008,11 +44558,25 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, InVec.getOperand(0).getValueSizeInBits() <= VT.getSizeInBits()) return DAG.getNode(X86ISD::VBROADCAST, SDLoc(N), VT, InVec.getOperand(0)); + if (InVec.getOpcode() == X86ISD::VBROADCAST_LOAD && InVec.hasOneUse()) { + auto *MemIntr = cast(InVec); + if (MemIntr->getMemoryVT().getSizeInBits() <= VT.getSizeInBits()) { + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() }; + SDValue BcastLd = + DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, SDLoc(N), Tys, Ops, + MemIntr->getMemoryVT(), + MemIntr->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1)); + return BcastLd; + } + } + // If we're extracting the lowest subvector and we're the only user, // we may be able to perform this with a smaller vector width. if (IdxVal == 0 && InVec.hasOneUse()) { unsigned InOpcode = InVec.getOpcode(); - if (VT == MVT::v2f64 && InVec.getValueType() == MVT::v4f64) { + if (VT == MVT::v2f64 && InVecVT == MVT::v4f64) { // v2f64 CVTDQ2PD(v4i32). if (InOpcode == ISD::SINT_TO_FP && InVec.getOperand(0).getValueType() == MVT::v4i32) { @@ -44093,7 +44657,8 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) { // Simplify PMULDQ and PMULUDQ operations. static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); @@ -44103,23 +44668,43 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG, return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), RHS, LHS); // Multiply by zero. + // Don't return RHS as it may contain UNDEFs. if (ISD::isBuildVectorAllZeros(RHS.getNode())) - return RHS; - - // Aggressively peek through ops to get at the demanded low bits. - APInt DemandedMask = APInt::getLowBitsSet(64, 32); - SDValue DemandedLHS = DAG.GetDemandedBits(LHS, DemandedMask); - SDValue DemandedRHS = DAG.GetDemandedBits(RHS, DemandedMask); - if (DemandedLHS || DemandedRHS) - return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), - DemandedLHS ? DemandedLHS : LHS, - DemandedRHS ? DemandedRHS : RHS); + return DAG.getConstant(0, SDLoc(N), N->getValueType(0)); // PMULDQ/PMULUDQ only uses lower 32 bits from each vector element. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnesValue(64), DCI)) return SDValue(N, 0); + // If the input is an extend_invec and the SimplifyDemandedBits call didn't + // convert it to any_extend_invec, due to the LegalOperations check, do the + // conversion directly to a vector shuffle manually. This exposes combine + // opportunities missed by combineExtInVec not calling + // combineX86ShufflesRecursively on SSE4.1 targets. + // FIXME: This is basically a hack around several other issues related to + // ANY_EXTEND_VECTOR_INREG. + if (N->getValueType(0) == MVT::v2i64 && LHS.hasOneUse() && + (LHS.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG || + LHS.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) && + LHS.getOperand(0).getValueType() == MVT::v4i32) { + SDLoc dl(N); + LHS = DAG.getVectorShuffle(MVT::v4i32, dl, LHS.getOperand(0), + LHS.getOperand(0), { 0, -1, 1, -1 }); + LHS = DAG.getBitcast(MVT::v2i64, LHS); + return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS); + } + if (N->getValueType(0) == MVT::v2i64 && RHS.hasOneUse() && + (RHS.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG || + RHS.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) && + RHS.getOperand(0).getValueType() == MVT::v4i32) { + SDLoc dl(N); + RHS = DAG.getVectorShuffle(MVT::v4i32, dl, RHS.getOperand(0), + RHS.getOperand(0), { 0, -1, 1, -1 }); + RHS = DAG.getBitcast(MVT::v2i64, RHS); + return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS); + } + return SDValue(); } @@ -44134,7 +44719,7 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG, if (!DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) { auto *Ld = cast(In); - if (!Ld->isVolatile()) { + if (Ld->isSimple()) { MVT SVT = In.getSimpleValueType().getVectorElementType(); ISD::LoadExtType Ext = N->getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SEXTLOAD : ISD::ZEXTLOAD; EVT MemVT = EVT::getVectorVT(*DAG.getContext(), SVT, @@ -44150,17 +44735,6 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG, } } - // Disabling for widening legalization for now. We can enable if we find a - // case that needs it. Otherwise it can be deleted when we switch to - // widening legalization. - if (ExperimentalVectorWideningLegalization) - return SDValue(); - - // Combine (ext_invec (ext_invec X)) -> (ext_invec X) - if (In.getOpcode() == N->getOpcode() && - TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getOperand(0).getValueType())) - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, In.getOperand(0)); - // Attempt to combine as a shuffle. // TODO: SSE41 support if (Subtarget.hasAVX() && N->getOpcode() != ISD::SIGN_EXTEND_VECTOR_INREG) { @@ -44173,6 +44747,20 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue combineKSHIFT(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + EVT VT = N->getValueType(0); + + APInt KnownUndef, KnownZero; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef, + KnownZero, DCI)) + return SDValue(N, 0); + + return SDValue(); +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -44196,8 +44784,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget); case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget); case X86ISD::CMP: return combineCMP(N, DAG); - case ISD::ADD: return combineAdd(N, DAG, Subtarget); - case ISD::SUB: return combineSub(N, DAG, Subtarget); + case ISD::ADD: return combineAdd(N, DAG, DCI, Subtarget); + case ISD::SUB: return combineSub(N, DAG, DCI, Subtarget); case X86ISD::ADD: case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI); case X86ISD::SBB: return combineSBB(N, DAG); @@ -44214,12 +44802,13 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget); case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget); case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget); - case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, Subtarget); + case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, DCI, Subtarget); case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget); case ISD::FADD: case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget); case ISD::FNEG: return combineFneg(N, DAG, Subtarget); case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget); + case X86ISD::VTRUNC: return combineVTRUNC(N, DAG); case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget); case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget); case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget); @@ -44299,20 +44888,22 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FNMADD_RND: case X86ISD::FNMSUB: case X86ISD::FNMSUB_RND: - case ISD::FMA: return combineFMA(N, DAG, Subtarget); + case ISD::FMA: return combineFMA(N, DAG, DCI, Subtarget); case X86ISD::FMADDSUB_RND: case X86ISD::FMSUBADD_RND: case X86ISD::FMADDSUB: - case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, Subtarget); - case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI); + case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, DCI); + case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI, Subtarget); case X86ISD::MGATHER: - case X86ISD::MSCATTER: + case X86ISD::MSCATTER: return combineX86GatherScatter(N, DAG, DCI); case ISD::MGATHER: - case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI, Subtarget); + case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI); case X86ISD::PCMPEQ: case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget); case X86ISD::PMULDQ: - case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI); + case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI, Subtarget); + case X86ISD::KSHIFTL: + case X86ISD::KSHIFTR: return combineKSHIFT(N, DAG, DCI); } return SDValue(); @@ -44660,10 +45251,11 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const { case 'I': case 'J': case 'K': - case 'L': - case 'M': case 'N': case 'G': + case 'L': + case 'M': + return C_Immediate; case 'C': case 'e': case 'Z': @@ -45175,8 +45767,9 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, if (VConstraint && Subtarget.hasVLX()) return std::make_pair(0U, &X86::FR64XRegClass); return std::make_pair(0U, &X86::FR64RegClass); - // TODO: Handle f128 and i128 in FR128RegClass after it is tested well. - // Vector types. + // TODO: Handle i128 in FR128RegClass after it is tested well. + // Vector types and fp128. + case MVT::f128: case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: @@ -45469,7 +46062,7 @@ void X86TargetLowering::insertCopiesSplitCSR( else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); - unsigned NewVR = MRI->createVirtualRegister(RC); + Register NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. // FIXME: this currently does not emit CFI pseudo-instructions, it works // fine for CXX_FAST_TLS since the C++-style TLS access functions should be @@ -45514,3 +46107,16 @@ X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const { return Subtarget.isTargetCygMing() ? "___chkstk_ms" : "__chkstk"; return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk"; } + +unsigned +X86TargetLowering::getStackProbeSize(MachineFunction &MF) const { + // The default stack probe size is 4096 if the function has no stackprobesize + // attribute. + unsigned StackProbeSize = 4096; + const Function &Fn = MF.getFunction(); + if (Fn.hasFnAttribute("stack-probe-size")) + Fn.getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, StackProbeSize); + return StackProbeSize; +} diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index e0be03bc3f9..6f7e90008de 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -17,7 +17,6 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" -#include "llvm/Target/TargetOptions.h" namespace llvm { class X86Subtarget; @@ -144,6 +143,10 @@ namespace llvm { /// relative displacements. WrapperRIP, + /// Copies a 64-bit value from an MMX vector to the low word + /// of an XMM vector, with the high word zero filled. + MOVQ2DQ, + /// Copies a 64-bit value from the low word of an XMM vector /// to an MMX vector. MOVDQ2Q, @@ -422,7 +425,8 @@ namespace llvm { // Tests Types Of a FP Values for scalar types. VFPCLASSS, - // Broadcast scalar to vector. + // Broadcast (splat) scalar or element 0 of a vector. If the operand is + // a vector, this node may change the vector length as part of the splat. VBROADCAST, // Broadcast mask to vector. VBROADCASTM, @@ -611,6 +615,9 @@ namespace llvm { // extract_vector_elt, store. VEXTRACT_STORE, + // scalar broadcast from memory + VBROADCAST_LOAD, + // Store FP control world into i16 memory. FNSTCW16m, @@ -680,6 +687,9 @@ namespace llvm { bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO); + /// If Op is a constant whose elements are all the same constant or + /// undefined, return true and return the constant value in \p SplatVal. + bool isConstantSplat(SDValue Op, APInt &SplatVal); } // end namespace X86 //===--------------------------------------------------------------------===// @@ -792,6 +802,17 @@ namespace llvm { /// and some i16 instructions are slow. bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; + /// Return 1 if we can compute the negated form of the specified expression + /// for the same cost as the expression itself, or 2 if we can compute the + /// negated form more cheaply than the expression itself. Else return 0. + char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, bool LegalOperations, + bool ForCodeSize, unsigned Depth) const override; + + /// If isNegatibleForFree returns true, return the newly negated expression. + SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, bool ForCodeSize, + unsigned Depth) const override; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; @@ -840,6 +861,13 @@ namespace llvm { bool hasAndNot(SDValue Y) const override; + bool hasBitTest(SDValue X, SDValue Y) const override; + + bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, + unsigned OldShiftOpcode, unsigned NewShiftOpcode, + SelectionDAG &DAG) const override; + bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override; @@ -863,11 +891,7 @@ namespace llvm { return VTIsOk(XVT) && VTIsOk(KeptBitsVT); } - bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override { - if (DAG.getMachineFunction().getFunction().hasMinSize()) - return false; - return true; - } + bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; bool shouldSplatInsEltVarIndex(EVT VT) const override; @@ -913,6 +937,10 @@ namespace llvm { TargetLoweringOpt &TLO, unsigned Depth) const override; + SDValue SimplifyMultipleUseDemandedBitsForTargetNode( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + SelectionDAG &DAG, unsigned Depth) const override; + const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; SDValue unwrapAddress(SDValue N) const override; @@ -1090,11 +1118,12 @@ namespace llvm { bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; - bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override; + bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override; bool convertSelectOfConstantsToMath(EVT VT) const override; - bool decomposeMulByConstant(EVT VT, SDValue C) const override; + bool decomposeMulByConstant(LLVMContext &Context, EVT VT, + SDValue C) const override; bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const override; @@ -1136,8 +1165,8 @@ namespace llvm { return nullptr; // nothing to do, move along. } - unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const override; + Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const override; /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. @@ -1189,12 +1218,18 @@ namespace llvm { CallingConv::ID CC, EVT VT) const override; + unsigned getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const override; + bool isIntDivCheap(EVT VT, AttributeList Attr) const override; bool supportSwiftError() const override; StringRef getStackProbeSymbolName(MachineFunction &MF) const override; + unsigned getStackProbeSize(MachineFunction &MF) const; + bool hasVectorBlend() const override { return true; } unsigned getMaxSupportedInterleaveFactor() const override { return 4; } @@ -1326,6 +1361,12 @@ namespace llvm { SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG, + RTLIB::Libcall Call) const; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -1372,6 +1413,9 @@ namespace llvm { LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; + bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override; + bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override; + bool needsCmpXchgNb(Type *MemType) const; void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, @@ -1462,6 +1506,9 @@ namespace llvm { /// Reassociate floating point divisions into multiply by reciprocal. unsigned combineRepeatedFPDivisors() const override; + + SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, + SmallVectorImpl &Created) const override; }; namespace X86 { @@ -1625,24 +1672,24 @@ namespace llvm { /// mask. This is the reverse process to canWidenShuffleElements, but can /// always succeed. template - void scaleShuffleMask(int Scale, ArrayRef Mask, + void scaleShuffleMask(size_t Scale, ArrayRef Mask, SmallVectorImpl &ScaledMask) { assert(0 < Scale && "Unexpected scaling factor"); size_t NumElts = Mask.size(); ScaledMask.assign(NumElts * Scale, -1); - for (int i = 0; i != (int)NumElts; ++i) { + for (size_t i = 0; i != NumElts; ++i) { int M = Mask[i]; // Repeat sentinel values in every mask element. if (M < 0) { - for (int s = 0; s != Scale; ++s) + for (size_t s = 0; s != Scale; ++s) ScaledMask[(Scale * i) + s] = M; continue; } // Scale mask element and increment across each mask element. - for (int s = 0; s != Scale; ++s) + for (size_t s = 0; s != Scale; ++s) ScaledMask[(Scale * i) + s] = (Scale * M) + s; } } diff --git a/lib/Target/X86/X86IndirectBranchTracking.cpp b/lib/Target/X86/X86IndirectBranchTracking.cpp index 04e8b2231fe..cc0f59ab329 100644 --- a/lib/Target/X86/X86IndirectBranchTracking.cpp +++ b/lib/Target/X86/X86IndirectBranchTracking.cpp @@ -84,7 +84,7 @@ bool X86IndirectBranchTrackingPass::addENDBR( return false; } -bool IsCallReturnTwice(llvm::MachineOperand &MOp) { +static bool IsCallReturnTwice(llvm::MachineOperand &MOp) { if (!MOp.isGlobal()) return false; auto *CalleeFn = dyn_cast(MOp.getGlobal()); diff --git a/lib/Target/X86/X86InsertPrefetch.cpp b/lib/Target/X86/X86InsertPrefetch.cpp index 02ae73706a3..2b1e3f23efd 100644 --- a/lib/Target/X86/X86InsertPrefetch.cpp +++ b/lib/Target/X86/X86InsertPrefetch.cpp @@ -79,8 +79,8 @@ ErrorOr getPrefetchHints(const FunctionSamples *TopSamples, // The prefetch instruction can't take memory operands involving vector // registers. bool IsMemOpCompatibleWithPrefetch(const MachineInstr &MI, int Op) { - unsigned BaseReg = MI.getOperand(Op + X86::AddrBaseReg).getReg(); - unsigned IndexReg = MI.getOperand(Op + X86::AddrIndexReg).getReg(); + Register BaseReg = MI.getOperand(Op + X86::AddrBaseReg).getReg(); + Register IndexReg = MI.getOperand(Op + X86::AddrIndexReg).getReg(); return (BaseReg == 0 || X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) || X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg)) && @@ -108,7 +108,7 @@ bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples, Prefetches &Prefetches) const { assert(Prefetches.empty() && "Expected caller passed empty PrefetchInfo vector."); - static const std::pair HintTypes[] = { + static constexpr std::pair HintTypes[] = { {"_nta_", X86::PREFETCHNTA}, {"_t0_", X86::PREFETCHT0}, {"_t1_", X86::PREFETCHT1}, @@ -173,7 +173,7 @@ bool X86InsertPrefetch::doInitialization(Module &M) { void X86InsertPrefetch::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired(); + AU.addRequired(); } bool X86InsertPrefetch::runOnMachineFunction(MachineFunction &MF) { diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 54eddeacaa1..9b5de59430a 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -74,6 +74,7 @@ class X86VectorVTInfo("alignedload" # VTName); PatFrag ScalarLdFrag = !cast("load" # EltVT); + PatFrag BroadcastLdFrag = !cast("X86VBroadcastld" # EltSizeName); ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"), !cast("sse_load_f32"), @@ -412,6 +413,14 @@ def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", [(set VR512:$dst, (v16i32 immAllOnesV))]>; } +let Predicates = [HasAVX512] in { +def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; +} + // Alias instructions that allow VPTERNLOG to be used with a mask to create // a mix of all ones and all zeros elements. This is done this way to force // the same register to be used as input for all three sources. @@ -436,6 +445,19 @@ def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", [(set VR256X:$dst, (v8i32 immAllZerosV))]>; } +let Predicates = [HasAVX512] in { +def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>; +def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>; +def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>; +def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; +def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; +} + // Alias instructions that map fld0 to xorps for sse or vxorps for avx. // This is expanded by ExpandPostRAPseudos. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, @@ -443,7 +465,9 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", [(set FR32X:$dst, fp32imm0)]>; def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", - [(set FR64X:$dst, fpimm0)]>; + [(set FR64X:$dst, fp64imm0)]>; + def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", + [(set VR128X:$dst, fp128imm0)]>; } //===----------------------------------------------------------------------===// @@ -730,14 +754,14 @@ let isCommutable = 1 in def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>, + [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR128X:$dst, (X86insertps VR128X:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), - imm:$src3))]>, + timm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>, Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; } @@ -1100,75 +1124,104 @@ multiclass avx512_broadcast_rm_split opc, string OpcodeStr, X86VectorVTInfo MaskInfo, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, - SDPatternOperator UnmaskedOp = X86VBroadcast> { - let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in { - defm r : AVX512_maskable_split, - T8PD, EVEX, Sched<[SchedRR]>; - let mayLoad = 1 in - defm m : AVX512_maskable_split, - T8PD, EVEX, EVEX_CD8, - Sched<[SchedRM]>; - } + bit IsConvertibleToThreeAddress, + SDPatternOperator UnmaskedOp = X86VBroadcast, + SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> { + let hasSideEffects = 0 in + def r : AVX512PI, T8PD, EVEX, Sched<[SchedRR]>; + def rkz : AVX512PI, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; + let Constraints = "$src0 = $dst" in + def rk : AVX512PI, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>; - def : Pat<(MaskInfo.VT - (bitconvert - (DestInfo.VT (UnmaskedOp - (SrcInfo.VT (scalar_to_vector - (SrcInfo.ScalarLdFrag addr:$src))))))), - (!cast(Name#MaskInfo.ZSuffix#m) addr:$src)>; - def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask, + let hasSideEffects = 0, mayLoad = 1 in + def m : AVX512PI, T8PD, EVEX, + EVEX_CD8, Sched<[SchedRM]>; + + def mkz : AVX512PI(Name#DestInfo.ZSuffix#mk) - MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>; - def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask, - (bitconvert - (DestInfo.VT - (X86VBroadcast - (SrcInfo.VT (scalar_to_vector - (SrcInfo.ScalarLdFrag addr:$src)))))), - MaskInfo.ImmAllZerosV)), - (!cast(Name#MaskInfo.ZSuffix#mkz) - MaskInfo.KRCWM:$mask, addr:$src)>; + (SrcInfo.BroadcastLdFrag addr:$src)))), + MaskInfo.ImmAllZerosV))], + DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, + EVEX_CD8, Sched<[SchedRM]>; + + let Constraints = "$src0 = $dst", + isConvertibleToThreeAddress = IsConvertibleToThreeAddress in + def mk : AVX512PI, T8PD, EVEX, EVEX_K, + EVEX_CD8, Sched<[SchedRM]>; } // Helper class to force mask and broadcast result to same type. multiclass avx512_broadcast_rm opc, string OpcodeStr, string Name, SchedWrite SchedRR, SchedWrite SchedRM, X86VectorVTInfo DestInfo, - X86VectorVTInfo SrcInfo> : + X86VectorVTInfo SrcInfo, + bit IsConvertibleToThreeAddress> : avx512_broadcast_rm_split; + DestInfo, DestInfo, SrcInfo, + IsConvertibleToThreeAddress>; multiclass avx512_fp_broadcast_sd opc, string OpcodeStr, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in { defm Z : avx512_broadcast_rm, + WriteFShuffle256Ld, _.info512, _.info128, 1>, avx512_broadcast_scalar, EVEX_V512; @@ -1176,7 +1229,7 @@ multiclass avx512_fp_broadcast_sd opc, string OpcodeStr, let Predicates = [HasVLX] in { defm Z256 : avx512_broadcast_rm, + WriteFShuffle256Ld, _.info256, _.info128, 1>, avx512_broadcast_scalar, EVEX_V256; @@ -1187,7 +1240,7 @@ multiclass avx512_fp_broadcast_ss opc, string OpcodeStr, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in { defm Z : avx512_broadcast_rm, + WriteFShuffle256Ld, _.info512, _.info128, 1>, avx512_broadcast_scalar, EVEX_V512; @@ -1195,12 +1248,12 @@ multiclass avx512_fp_broadcast_ss opc, string OpcodeStr, let Predicates = [HasVLX] in { defm Z256 : avx512_broadcast_rm, + WriteFShuffle256Ld, _.info256, _.info128, 1>, avx512_broadcast_scalar, EVEX_V256; defm Z128 : avx512_broadcast_rm, + WriteFShuffle256Ld, _.info128, _.info128, 1>, avx512_broadcast_scalar, EVEX_V128; @@ -1284,46 +1337,35 @@ defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, X86VBroadcast, GR64, HasAVX512>, VEX_W; -// Provide aliases for broadcast from the same register class that -// automatically does the extract. -multiclass avx512_int_broadcast_rm_lowering { - def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))), - (!cast(Name#DestInfo.ZSuffix#"r") - (ExtInfo.VT (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm)))>; -} - multiclass avx512_int_broadcast_rm_vl opc, string OpcodeStr, - AVX512VLVectorVTInfo _, Predicate prd> { + AVX512VLVectorVTInfo _, Predicate prd, + bit IsConvertibleToThreeAddress> { let Predicates = [prd] in { defm Z : avx512_broadcast_rm, - avx512_int_broadcast_rm_lowering, + WriteShuffle256Ld, _.info512, _.info128, + IsConvertibleToThreeAddress>, EVEX_V512; - // Defined separately to avoid redefinition. - defm Z_Alt : avx512_int_broadcast_rm_lowering; } let Predicates = [prd, HasVLX] in { defm Z256 : avx512_broadcast_rm, - avx512_int_broadcast_rm_lowering, + WriteShuffle256Ld, _.info256, _.info128, + IsConvertibleToThreeAddress>, EVEX_V256; defm Z128 : avx512_broadcast_rm, + WriteShuffleXLd, _.info128, _.info128, + IsConvertibleToThreeAddress>, EVEX_V128; } } defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", - avx512vl_i8_info, HasBWI>; + avx512vl_i8_info, HasBWI, 0>; defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", - avx512vl_i16_info, HasBWI>; + avx512vl_i16_info, HasBWI, 0>; defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", - avx512vl_i32_info, HasAVX512>; + avx512vl_i32_info, HasAVX512, 1>; defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", - avx512vl_i64_info, HasAVX512>, VEX_W1X; + avx512vl_i64_info, HasAVX512, 1>, VEX_W1X; multiclass avx512_subvec_broadcast_rm opc, string OpcodeStr, X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { @@ -1354,6 +1396,10 @@ let Predicates = [HasAVX512] in { // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), (VPBROADCASTQZm addr:$src)>; + + // FIXME this is to handle aligned extloads from i8. + def : Pat<(v16i32 (X86VBroadcast (loadi32 addr:$src))), + (VPBROADCASTDZm addr:$src)>; } let Predicates = [HasVLX] in { @@ -1362,6 +1408,12 @@ let Predicates = [HasVLX] in { (VPBROADCASTQZ128m addr:$src)>; def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), (VPBROADCASTQZ256m addr:$src)>; + + // FIXME this is to handle aligned extloads from i8. + def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), + (VPBROADCASTDZ128m addr:$src)>; + def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), + (VPBROADCASTDZ256m addr:$src)>; } let Predicates = [HasVLX, HasBWI] in { // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. @@ -1382,6 +1434,12 @@ let Predicates = [HasVLX, HasBWI] in { def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (zextloadi16 addr:$src)))))), (VPBROADCASTWZ256m addr:$src)>; + + // FIXME this is to handle aligned extloads from i8. + def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))), + (VPBROADCASTWZ128m addr:$src)>; + def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))), + (VPBROADCASTWZ256m addr:$src)>; } let Predicates = [HasBWI] in { // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. @@ -1394,6 +1452,10 @@ let Predicates = [HasBWI] in { def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (zextloadi16 addr:$src)))))), (VPBROADCASTWZm addr:$src)>; + + // FIXME this is to handle aligned extloads from i8. + def : Pat<(v32i16 (X86VBroadcast (loadi16 addr:$src))), + (VPBROADCASTWZm addr:$src)>; } //===----------------------------------------------------------------------===// @@ -1629,12 +1691,12 @@ multiclass avx512_common_broadcast_32x2 opc, string OpcodeStr, let Predicates = [HasDQI] in defm Z : avx512_broadcast_rm_split, + _Src.info512, _Src.info128, 0, null_frag, null_frag>, EVEX_V512; let Predicates = [HasDQI, HasVLX] in defm Z256 : avx512_broadcast_rm_split, + _Src.info256, _Src.info128, 0, null_frag, null_frag>, EVEX_V256; } @@ -1645,7 +1707,7 @@ multiclass avx512_common_broadcast_i32x2 opc, string OpcodeStr, let Predicates = [HasDQI, HasVLX] in defm Z128 : avx512_broadcast_rm_split, + _Src.info128, _Src.info128, 0, null_frag, null_frag>, EVEX_V128; } @@ -1654,23 +1716,6 @@ defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", avx512vl_f32_info, avx512vl_f64_info>; -let Predicates = [HasVLX] in { -def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))), - (VBROADCASTSSZ256r (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>; -def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))), - (VBROADCASTSDZ256r (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>; -} - -def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))), - (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))>; -def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))), - (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>; - -def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))), - (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))>; -def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))), - (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>; - //===----------------------------------------------------------------------===// // AVX-512 BROADCAST MASK TO VECTOR REGISTER //--- @@ -1730,7 +1775,7 @@ multiclass avx512_perm_i_mb opc, string OpcodeStr, OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ), (_.VT (X86VPermt2 _.RC:$src2, - IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>, + IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, AVX5128IBase, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -1807,7 +1852,7 @@ multiclass avx512_perm_i_lowering(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3)>; @@ -1846,7 +1891,7 @@ multiclass avx512_perm_t_mb opc, string OpcodeStr, OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ), (_.VT (X86VPermt2 _.RC:$src1, - IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>, + IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, AVX5128IBase, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -1947,7 +1992,7 @@ multiclass WriteFVarBlendask opc, string OpcodeStr, } multiclass WriteFVarBlendask_rmb opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _> { - let mayLoad = 1, hasSideEffects = 0 in { + let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in { def rmbk : AVX5128I, EVEX_4V, VEX_LIG, Sched<[sched]>; + timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>; let mayLoad = 1 in defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), @@ -2041,9 +2086,9 @@ multiclass avx512_cmp_scalar, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, + timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, @@ -2052,9 +2097,9 @@ multiclass avx512_cmp_scalar, + timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; let isCodeGenOnly = 1 in { @@ -2065,7 +2110,7 @@ multiclass avx512_cmp_scalar, + timm:$cc))]>, EVEX_4V, VEX_LIG, Sched<[sched]>; def rm : AVX512Ii8<0xC2, MRMSrcMem, (outs _.KRC:$dst), @@ -2074,7 +2119,7 @@ multiclass avx512_cmp_scalar, + timm:$cc))]>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -2100,94 +2145,82 @@ let Predicates = [HasAVX512] in { SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W; } -multiclass avx512_icmp_packed opc, string OpcodeStr, PatFrag OpNode, - PatFrag OpNode_su, X86FoldableSchedWrite sched, +multiclass avx512_icmp_packed opc, string OpcodeStr, + X86FoldableSchedWrite sched, X86VectorVTInfo _, bit IsCommutable> { - let isCommutable = IsCommutable in + let isCommutable = IsCommutable, hasSideEffects = 0 in def rr : AVX512BI, - EVEX_4V, Sched<[sched]>; + []>, EVEX_4V, Sched<[sched]>; + let mayLoad = 1, hasSideEffects = 0 in def rm : AVX512BI, - EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; - let isCommutable = IsCommutable in + []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; + let isCommutable = IsCommutable, hasSideEffects = 0 in def rrk : AVX512BI, - EVEX_4V, EVEX_K, Sched<[sched]>; + []>, EVEX_4V, EVEX_K, Sched<[sched]>; + let mayLoad = 1, hasSideEffects = 0 in def rmk : AVX512BI, - EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; + []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; } -multiclass avx512_icmp_packed_rmb opc, string OpcodeStr, PatFrag OpNode, - PatFrag OpNode_su, +multiclass avx512_icmp_packed_rmb opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, bit IsCommutable> : - avx512_icmp_packed { + avx512_icmp_packed { + let mayLoad = 1, hasSideEffects = 0 in { def rmb : AVX512BI, - EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; + []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmbk : AVX512BI, - EVEX_4V, EVEX_K, EVEX_B, + []>, EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; + } } -multiclass avx512_icmp_packed_vl opc, string OpcodeStr, PatFrag OpNode, - PatFrag OpNode_su, X86SchedWriteWidths sched, +multiclass avx512_icmp_packed_vl opc, string OpcodeStr, + X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, Predicate prd, bit IsCommutable = 0> { let Predicates = [prd] in - defm Z : avx512_icmp_packed, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_icmp_packed, EVEX_V256; - defm Z128 : avx512_icmp_packed, EVEX_V128; } } multiclass avx512_icmp_packed_rmb_vl opc, string OpcodeStr, - PatFrag OpNode, PatFrag OpNode_su, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, Predicate prd, bit IsCommutable = 0> { let Predicates = [prd] in - defm Z : avx512_icmp_packed_rmb, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_icmp_packed_rmb, EVEX_V256; - defm Z128 : avx512_icmp_packed_rmb, EVEX_V128; } } @@ -2195,53 +2228,42 @@ multiclass avx512_icmp_packed_rmb_vl opc, string OpcodeStr, // This fragment treats X86cmpm as commutable to help match loads in both // operands for PCMPEQ. def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>; -def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2), - (X86setcc_commute node:$src1, node:$src2, SETEQ)>; def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), (setcc node:$src1, node:$src2, SETGT)>; -def X86pcmpeqm_c_su : PatFrag<(ops node:$src1, node:$src2), - (X86pcmpeqm_c node:$src1, node:$src2), [{ - return N->hasOneUse(); -}]>; -def X86pcmpgtm_su : PatFrag<(ops node:$src1, node:$src2), - (X86pcmpgtm node:$src1, node:$src2), [{ - return N->hasOneUse(); -}]>; - // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't // increase the pattern complexity the way an immediate would. let AddedComplexity = 2 in { // FIXME: Is there a better scheduler class for VPCMP? -defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c, X86pcmpeqm_c_su, +defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, EVEX_CD8<8, CD8VF>, VEX_WIG; -defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c, X86pcmpeqm_c_su, +defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, EVEX_CD8<16, CD8VF>, VEX_WIG; -defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c, X86pcmpeqm_c_su, +defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, EVEX_CD8<32, CD8VF>; -defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c, X86pcmpeqm_c_su, +defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, X86pcmpgtm_su, +defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", SchedWriteVecALU, avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>, VEX_WIG; -defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, X86pcmpgtm_su, +defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", SchedWriteVecALU, avx512vl_i16_info, HasBWI>, EVEX_CD8<16, CD8VF>, VEX_WIG; -defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, X86pcmpgtm_su, +defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, EVEX_CD8<32, CD8VF>; -defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, X86pcmpgtm_su, +defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; } @@ -2322,8 +2344,7 @@ multiclass avx512_icmp_cc_rmb opc, string Suffix, PatFrag Frag, "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), - (X86VBroadcast - (_.ScalarLdFrag addr:$src2)), + (_.BroadcastLdFrag addr:$src2), cond)))]>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmibk : AVX512AIi8 opc, string Suffix, PatFrag Frag, [(set _.KRC:$dst, (and _.KRCWM:$mask, (_.KVT (Frag_su:$cc (_.VT _.RC:$src1), - (X86VBroadcast - (_.ScalarLdFrag addr:$src2)), + (_.BroadcastLdFrag addr:$src2), cond))))]>, EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; - def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)), + def : Pat<(_.KVT (CommFrag:$cc (_.BroadcastLdFrag addr:$src2), (_.VT _.RC:$src1), cond)), (!cast(Name#_.ZSuffix#"rmib") _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>; def : Pat<(and _.KRCWM:$mask, - (_.KVT (CommFrag_su:$cc (X86VBroadcast - (_.ScalarLdFrag addr:$src2)), + (_.KVT (CommFrag_su:$cc (_.BroadcastLdFrag addr:$src2), (_.VT _.RC:$src1), cond))), (!cast(Name#_.ZSuffix#"rmibk") _.KRCWM:$mask, _.RC:$src1, addr:$src2, - (CommFrag.OperandTransform $cc))>; + (CommFrag_su.OperandTransform $cc))>; } multiclass avx512_icmp_cc_vl opc, string Suffix, PatFrag Frag, @@ -2496,14 +2515,19 @@ def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), return N->hasOneUse(); }]>; +def X86cmpm_imm_commute : SDNodeXFormgetZExtValue() & 0x1f); + return getI8Imm(Imm, SDLoc(N)); +}]>; + multiclass avx512_vcmp_common { defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc", - (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc), - (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc), + (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), + (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 1>, Sched<[sched]>; defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, @@ -2511,9 +2535,9 @@ multiclass avx512_vcmp_common, + timm:$cc)>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, @@ -2523,38 +2547,37 @@ multiclass avx512_vcmp_common, + (_.VT (_.BroadcastLdFrag addr:$src2)), + timm:$cc)>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; // Patterns for selecting with loads in other operand. def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), - CommutableCMPCC:$cc), + timm:$cc), (!cast(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, - imm:$cc)>; + (X86cmpm_imm_commute timm:$cc))>; def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), (_.VT _.RC:$src1), - CommutableCMPCC:$cc)), + timm:$cc)), (!cast(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, addr:$src2, - imm:$cc)>; + (X86cmpm_imm_commute timm:$cc))>; - def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)), - (_.VT _.RC:$src1), CommutableCMPCC:$cc), + def : Pat<(X86cmpm (_.BroadcastLdFrag addr:$src2), + (_.VT _.RC:$src1), timm:$cc), (!cast(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, - imm:$cc)>; + (X86cmpm_imm_commute timm:$cc))>; - def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (X86VBroadcast - (_.ScalarLdFrag addr:$src2)), + def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2), (_.VT _.RC:$src1), - CommutableCMPCC:$cc)), + timm:$cc)), (!cast(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, addr:$src2, - imm:$cc)>; + (X86cmpm_imm_commute timm:$cc))>; } multiclass avx512_vcmp_sae { @@ -2564,9 +2587,9 @@ multiclass avx512_vcmp_sae { "vcmp"#_.Suffix, "$cc, {sae}, $src2, $src1", "$src1, $src2, {sae}, $cc", - (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc), + (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), - imm:$cc)>, + timm:$cc)>, EVEX_B, Sched<[sched]>; } @@ -2590,12 +2613,12 @@ defm VCMPPS : avx512_vcmp, // Patterns to select fp compares with load as first operand. let Predicates = [HasAVX512] in { def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, - CommutableCMPCC:$cc)), - (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>; + timm:$cc)), + (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, - CommutableCMPCC:$cc)), - (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>; + timm:$cc)), + (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; } // ---------------------------------------------------------------- @@ -2621,7 +2644,7 @@ multiclass avx512_scalar_fpclass opc, string OpcodeStr, (ins _.RC:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), - (i32 imm:$src2)))]>, + (i32 timm:$src2)))]>, Sched<[sched]>; def rrk : AVX512 opc, string OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclasss_su (_.VT _.RC:$src1), - (i32 imm:$src2))))]>, + (i32 timm:$src2))))]>, EVEX_K, Sched<[sched]>; def rm : AVX512 opc, string OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst, (X86Vfpclasss _.ScalarIntMemCPat:$src1, - (i32 imm:$src2)))]>, + (i32 timm:$src2)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmk : AVX512 opc, string OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclasss_su _.ScalarIntMemCPat:$src1, - (i32 imm:$src2))))]>, + (i32 timm:$src2))))]>, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -2661,7 +2684,7 @@ multiclass avx512_vector_fpclass opc, string OpcodeStr, (ins _.RC:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), - (i32 imm:$src2)))]>, + (i32 timm:$src2)))]>, Sched<[sched]>; def rrk : AVX512 opc, string OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su (_.VT _.RC:$src1), - (i32 imm:$src2))))]>, + (i32 timm:$src2))))]>, EVEX_K, Sched<[sched]>; def rm : AVX512 opc, string OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(X86Vfpclass (_.VT (_.LdFrag addr:$src1)), - (i32 imm:$src2)))]>, + (i32 timm:$src2)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmk : AVX512 opc, string OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su (_.VT (_.LdFrag addr:$src1)), - (i32 imm:$src2))))]>, + (i32 timm:$src2))))]>, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmb : AVX512 opc, string OpcodeStr, _.BroadcastStr##", $dst|$dst, ${src1}" ##_.BroadcastStr##", $src2}", [(set _.KRC:$dst,(X86Vfpclass - (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src1))), - (i32 imm:$src2)))]>, + (_.VT (_.BroadcastLdFrag addr:$src1)), + (i32 timm:$src2)))]>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmbk : AVX512 opc, string OpcodeStr, _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"## _.BroadcastStr##", $src2}", [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su - (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src1))), - (i32 imm:$src2))))]>, + (_.VT (_.BroadcastLdFrag addr:$src1)), + (i32 timm:$src2))))]>, EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -2836,13 +2857,21 @@ def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), (KMOVWrk VK16:$src)>; +def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))), + (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>; def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), (COPY_TO_REGCLASS VK16:$src, GR32)>; +def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), + (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>; def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; +def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))), + (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>; def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), (COPY_TO_REGCLASS VK8:$src, GR32)>; +def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), + (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>; def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (COPY_TO_REGCLASS GR32:$src, VK32)>; @@ -3075,7 +3104,7 @@ multiclass avx512_mask_shiftop opc, string OpcodeStr, RegisterClass KRC, def ri : Ii8, + [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>, Sched<[sched]>; } @@ -3097,30 +3126,6 @@ multiclass avx512_mask_shiftop_w opc1, bits<8> opc2, string OpcodeStr, defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; -// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. -multiclass axv512_icmp_packed_no_vlx_lowering { - def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2))), - (COPY_TO_REGCLASS - (!cast(InstStr#"Zrr") - (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), - (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), - Narrow.KRC)>; - - def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, - (Frag_su (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2)))), - (COPY_TO_REGCLASS - (!cast(InstStr#"Zrrk") - (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), - (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), - (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), - Narrow.KRC)>; -} - // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. multiclass axv512_icmp_packed_cc_no_vlx_lowering(InstStr##Zrri) + (!cast(InstStr#"Zrri") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), (Frag.OperandTransform $cc)), Narrow.KRC)>; @@ -3138,53 +3143,111 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), (Narrow.VT Narrow.RC:$src2), cond)))), - (COPY_TO_REGCLASS (!cast(InstStr##Zrrik) + (COPY_TO_REGCLASS (!cast(InstStr#"Zrrik") (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), - (Frag.OperandTransform $cc)), Narrow.KRC)>; + (Frag_su.OperandTransform $cc)), Narrow.KRC)>; +} + +multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering { +// Broadcast load. +def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), + (Narrow.BroadcastLdFrag addr:$src2), cond)), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmib") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (Narrow.KVT + (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), + (Narrow.BroadcastLdFrag addr:$src2), + cond)))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrmibk") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>; + +// Commuted with broadcast load. +def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2), + (Narrow.VT Narrow.RC:$src1), + cond)), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmib") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (Narrow.KVT + (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), + (Narrow.VT Narrow.RC:$src1), + cond)))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrmibk") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>; } // Same as above, but for fp types which don't use PatFrags. -multiclass axv512_cmp_packed_cc_no_vlx_lowering { -def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2), imm:$cc)), +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), timm:$cc)), (COPY_TO_REGCLASS - (!cast(InstStr##Zrri) + (!cast(InstStr#"Zrri") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), - imm:$cc), Narrow.KRC)>; + timm:$cc), Narrow.KRC)>; def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, - (OpNode_su (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2), imm:$cc))), - (COPY_TO_REGCLASS (!cast(InstStr##Zrrik) + (X86cmpm_su (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), timm:$cc))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrrik") (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), - imm:$cc), Narrow.KRC)>; + timm:$cc), Narrow.KRC)>; + +// Broadcast load. +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), + (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmbi") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, timm:$cc), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (X86cmpm_su (Narrow.VT Narrow.RC:$src1), + (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrmbik") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, timm:$cc), Narrow.KRC)>; + +// Commuted with broadcast load. +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), + (Narrow.VT Narrow.RC:$src1), timm:$cc)), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmbi") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), + (Narrow.VT Narrow.RC:$src1), timm:$cc))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrmbik") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; } let Predicates = [HasAVX512, NoVLX] in { - // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't - // increase the pattern complexity the way an immediate would. - let AddedComplexity = 2 in { - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; - - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; - - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; - - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; - } - defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; @@ -3197,29 +3260,25 @@ let Predicates = [HasAVX512, NoVLX] in { defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; - defm : axv512_cmp_packed_cc_no_vlx_lowering; - defm : axv512_cmp_packed_cc_no_vlx_lowering; - defm : axv512_cmp_packed_cc_no_vlx_lowering; - defm : axv512_cmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; } let Predicates = [HasBWI, NoVLX] in { - // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't - // increase the pattern complexity the way an immediate would. - let AddedComplexity = 2 in { - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; - - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; - - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; - - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; - } - defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; @@ -4186,16 +4245,32 @@ def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; +def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), + (COPY_TO_REGCLASS + (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)), + VK1WM:$mask, addr:$src)), + FR32X)>; +def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), + (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>; + def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; -def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)), +def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; +def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), + (COPY_TO_REGCLASS + (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)), + VK1WM:$mask, addr:$src)), + FR64X)>; +def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), + (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>; + let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), @@ -4537,8 +4612,7 @@ multiclass avx512_binop_rmb opc, string OpcodeStr, SDNode OpNode, "${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr, (_.VT (OpNode _.RC:$src1, - (X86VBroadcast - (_.ScalarLdFrag addr:$src2))))>, + (_.BroadcastLdFrag addr:$src2)))>, AVX512BIBase, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -4664,8 +4738,7 @@ multiclass avx512_binop_rm2 opc, string OpcodeStr, "${src2}"##_Brdct.BroadcastStr##", $src1", "$src1, ${src2}"##_Brdct.BroadcastStr, (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert - (_Brdct.VT (X86VBroadcast - (_Brdct.ScalarLdFrag addr:$src2))))))>, + (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>, AVX512BIBase, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -4737,8 +4810,7 @@ multiclass avx512_packs_rmb opc, string OpcodeStr, SDNode OpNode, "${src2}"##_Src.BroadcastStr##", $src1", "$src1, ${src2}"##_Src.BroadcastStr, (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert - (_Src.VT (X86VBroadcast - (_Src.ScalarLdFrag addr:$src2))))))>, + (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>, EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -4874,22 +4946,11 @@ let Predicates = [HasDQI, NoVLX] in { (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), sub_ymm)>; - - def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), + def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), (EXTRACT_SUBREG - (VPMULLQZrr - (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), - (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), - sub_xmm)>; -} - -// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. -let Predicates = [HasDQI, NoVLX] in { - def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), - (EXTRACT_SUBREG - (VPMULLQZrr + (VPMULLQZrmb (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), - (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), + addr:$src2), sub_ymm)>; def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), @@ -4898,29 +4959,47 @@ let Predicates = [HasDQI, NoVLX] in { (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), sub_xmm)>; + def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), + (EXTRACT_SUBREG + (VPMULLQZrmb + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), + addr:$src2), + sub_xmm)>; } -multiclass avx512_min_max_lowering { +multiclass avx512_min_max_lowering { def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), (EXTRACT_SUBREG - (Instr + (!cast(Instr#"rr") (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), sub_ymm)>; + def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), + (EXTRACT_SUBREG + (!cast(Instr#"rmb") + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), + addr:$src2), + sub_ymm)>; def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), (EXTRACT_SUBREG - (Instr + (!cast(Instr#"rr") (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), sub_xmm)>; + def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), + (EXTRACT_SUBREG + (!cast(Instr#"rmb") + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), + addr:$src2), + sub_xmm)>; } let Predicates = [HasAVX512, NoVLX] in { - defm : avx512_min_max_lowering; - defm : avx512_min_max_lowering; - defm : avx512_min_max_lowering; - defm : avx512_min_max_lowering; + defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; + defm : avx512_min_max_lowering<"VPMINUQZ", umin>; + defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; + defm : avx512_min_max_lowering<"VPMINSQZ", smin>; } //===----------------------------------------------------------------------===// @@ -4977,32 +5056,6 @@ let Predicates = [HasVLX] in { def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)), (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; - def : Pat<(and VR128X:$src1, - (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPANDDZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(or VR128X:$src1, - (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPORDZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(xor VR128X:$src1, - (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPXORDZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(X86andnp VR128X:$src1, - (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPANDNDZ128rmb VR128X:$src1, addr:$src2)>; - - def : Pat<(and VR128X:$src1, - (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPANDQZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(or VR128X:$src1, - (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPORQZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(xor VR128X:$src1, - (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPXORQZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(X86andnp VR128X:$src1, - (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPANDNQZ128rmb VR128X:$src1, addr:$src2)>; - def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)), (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)), @@ -5042,32 +5095,6 @@ let Predicates = [HasVLX] in { (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)), (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; - - def : Pat<(and VR256X:$src1, - (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPANDDZ256rmb VR256X:$src1, addr:$src2)>; - def : Pat<(or VR256X:$src1, - (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPORDZ256rmb VR256X:$src1, addr:$src2)>; - def : Pat<(xor VR256X:$src1, - (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPXORDZ256rmb VR256X:$src1, addr:$src2)>; - def : Pat<(X86andnp VR256X:$src1, - (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPANDNDZ256rmb VR256X:$src1, addr:$src2)>; - - def : Pat<(and VR256X:$src1, - (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPANDQZ256rmb VR256X:$src1, addr:$src2)>; - def : Pat<(or VR256X:$src1, - (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPORQZ256rmb VR256X:$src1, addr:$src2)>; - def : Pat<(xor VR256X:$src1, - (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPXORQZ256rmb VR256X:$src1, addr:$src2)>; - def : Pat<(X86andnp VR256X:$src1, - (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPANDNQZ256rmb VR256X:$src1, addr:$src2)>; } let Predicates = [HasAVX512] in { @@ -5110,32 +5137,6 @@ let Predicates = [HasAVX512] in { (VPANDNQZrm VR512:$src1, addr:$src2)>; def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)), (VPANDNQZrm VR512:$src1, addr:$src2)>; - - def : Pat<(and VR512:$src1, - (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPANDDZrmb VR512:$src1, addr:$src2)>; - def : Pat<(or VR512:$src1, - (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPORDZrmb VR512:$src1, addr:$src2)>; - def : Pat<(xor VR512:$src1, - (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPXORDZrmb VR512:$src1, addr:$src2)>; - def : Pat<(X86andnp VR512:$src1, - (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))), - (VPANDNDZrmb VR512:$src1, addr:$src2)>; - - def : Pat<(and VR512:$src1, - (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPANDQZrmb VR512:$src1, addr:$src2)>; - def : Pat<(or VR512:$src1, - (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPORQZrmb VR512:$src1, addr:$src2)>; - def : Pat<(xor VR512:$src1, - (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPXORQZrmb VR512:$src1, addr:$src2)>; - def : Pat<(X86andnp VR512:$src1, - (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))), - (VPANDNQZrmb VR512:$src1, addr:$src2)>; } // Patterns to catch vselect with different type than logic op. @@ -5174,25 +5175,17 @@ multiclass avx512_logical_lowering_bcast { // Register-broadcast logical operations. - def : Pat<(IntInfo.VT (OpNode _.RC:$src1, - (bitconvert (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src2)))))), - (!cast(InstrStr#rmb) _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (bitconvert (IntInfo.VT (OpNode _.RC:$src1, - (bitconvert (_.VT - (X86VBroadcast - (_.ScalarLdFrag addr:$src2))))))), + (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), _.RC:$src0)), (!cast(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (bitconvert (IntInfo.VT (OpNode _.RC:$src1, - (bitconvert (_.VT - (X86VBroadcast - (_.ScalarLdFrag addr:$src2))))))), + (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), _.ImmAllZerosV)), (!cast(InstrStr#rmbkz) _.KRCWM:$mask, _.RC:$src1, addr:$src2)>; @@ -5329,7 +5322,8 @@ multiclass avx512_fp_scalar_round opc, string OpcodeStr,X86VectorVTInfo } multiclass avx512_fp_scalar_sae opc, string OpcodeStr,X86VectorVTInfo _, SDNode OpNode, SDNode VecNode, SDNode SaeNode, - X86FoldableSchedWrite sched, bit IsCommutable> { + X86FoldableSchedWrite sched, bit IsCommutable, + string EVEX2VexOvrd> { let ExeDomain = _.ExeDomain in { defm rr_Int : AVX512_maskable_scalar opc, string OpcodeStr,X86VectorVTInfo _, (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, - Sched<[sched]> { + Sched<[sched]>, + EVEX2VEXOverride { let isCommutable = IsCommutable; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), @@ -5357,7 +5352,8 @@ multiclass avx512_fp_scalar_sae opc, string OpcodeStr,X86VectorVTInfo _, OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))]>, - Sched<[sched.Folded, sched.ReadAfterFold]>; + Sched<[sched.Folded, sched.ReadAfterFold]>, + EVEX2VEXOverride; } defm rrb_Int : AVX512_maskable_scalar opc, string OpcodeStr, SDNode OpNode, SDNode VecNode, SDNode SaeNode, X86SchedWriteSizes sched, bit IsCommutable> { defm SSZ : avx512_fp_scalar_sae, + VecNode, SaeNode, sched.PS.Scl, IsCommutable, + NAME#"SS">, XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm SDZ : avx512_fp_scalar_sae, + VecNode, SaeNode, sched.PD.Scl, IsCommutable, + NAME#"SD">, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; } defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds, @@ -5410,13 +5408,14 @@ defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, // X86fminc and X86fmaxc instead of X86fmin and X86fmax multiclass avx512_comutable_binop_s opc, string OpcodeStr, X86VectorVTInfo _, SDNode OpNode, - X86FoldableSchedWrite sched> { + X86FoldableSchedWrite sched, + string EVEX2VEXOvrd> { let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, - Sched<[sched]> { + Sched<[sched]>, EVEX2VEXOverride { let isCommutable = 1; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), @@ -5424,24 +5423,27 @@ multiclass avx512_comutable_binop_s opc, string OpcodeStr, OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))]>, - Sched<[sched.Folded, sched.ReadAfterFold]>; + Sched<[sched.Folded, sched.ReadAfterFold]>, + EVEX2VEXOverride; } } defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, - SchedWriteFCmp.Scl>, XS, EVEX_4V, - VEX_LIG, EVEX_CD8<32, CD8VT1>; + SchedWriteFCmp.Scl, "VMINCSS">, XS, + EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, - SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V, - VEX_LIG, EVEX_CD8<64, CD8VT1>; + SchedWriteFCmp.Scl, "VMINCSD">, XD, + VEX_W, EVEX_4V, VEX_LIG, + EVEX_CD8<64, CD8VT1>; defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, - SchedWriteFCmp.Scl>, XS, EVEX_4V, - VEX_LIG, EVEX_CD8<32, CD8VT1>; + SchedWriteFCmp.Scl, "VMAXCSS">, XS, + EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, - SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V, - VEX_LIG, EVEX_CD8<64, CD8VT1>; + SchedWriteFCmp.Scl, "VMAXCSD">, XD, + VEX_W, EVEX_4V, VEX_LIG, + EVEX_CD8<64, CD8VT1>; multiclass avx512_fp_packed opc, string OpcodeStr, SDPatternOperator OpNode, X86VectorVTInfo _, X86FoldableSchedWrite sched, @@ -5464,8 +5466,7 @@ multiclass avx512_fp_packed opc, string OpcodeStr, SDPatternOperator OpN (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, "${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr, - (OpNode _.RC:$src1, (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src2))))>, + (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -5595,8 +5596,7 @@ multiclass avx512_fp_scalef_p opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, "${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr, - (OpNode _.RC:$src1, (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src2))))>, + (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -5751,13 +5751,13 @@ multiclass avx512_shift_rmi opc, Format ImmFormR, Format ImmFormM, defm ri : AVX512_maskable, + (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>, Sched<[sched]>; defm mi : AVX512_maskable, + (i8 timm:$src2)))>, Sched<[sched.Folded]>; } } @@ -5769,7 +5769,7 @@ multiclass avx512_shift_rmbi opc, Format ImmFormM, defm mbi : AVX512_maskable, + (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>, EVEX_B, Sched<[sched.Folded]>; } @@ -5911,17 +5911,17 @@ let Predicates = [HasAVX512, NoVLX] in { (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), VR128X:$src2)), sub_xmm)>; - def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))), + def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPSRAQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - imm:$src2)), sub_ymm)>; + timm:$src2)), sub_ymm)>; - def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPSRAQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), - imm:$src2)), sub_xmm)>; + timm:$src2)), sub_xmm)>; } //===-------------------------------------------------------------------===// @@ -5953,8 +5953,7 @@ multiclass avx512_var_shift_mb opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, "${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr, - (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src2)))))>, + (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>, AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -6062,27 +6061,27 @@ let Predicates = [HasAVX512, NoVLX] in { (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), sub_ymm)>; - def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPROLQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), - imm:$src2)), sub_xmm)>; - def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))), + timm:$src2)), sub_xmm)>; + def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPROLQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - imm:$src2)), sub_ymm)>; + timm:$src2)), sub_ymm)>; - def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v16i32 (VPROLDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), - imm:$src2)), sub_xmm)>; - def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))), + timm:$src2)), sub_xmm)>; + def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v16i32 (VPROLDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - imm:$src2)), sub_ymm)>; + timm:$src2)), sub_ymm)>; } // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. @@ -6113,27 +6112,27 @@ let Predicates = [HasAVX512, NoVLX] in { (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), sub_ymm)>; - def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPRORQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), - imm:$src2)), sub_xmm)>; - def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))), + timm:$src2)), sub_xmm)>; + def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v8i64 (VPRORQZri (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - imm:$src2)), sub_ymm)>; + timm:$src2)), sub_ymm)>; - def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))), + def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v16i32 (VPRORDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), - imm:$src2)), sub_xmm)>; - def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))), + timm:$src2)), sub_xmm)>; + def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))), (EXTRACT_SUBREG (v16i32 (VPRORDZri (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - imm:$src2)), sub_ymm)>; + timm:$src2)), sub_ymm)>; } //===-------------------------------------------------------------------===// @@ -6228,8 +6227,7 @@ multiclass avx512_permil_vec OpcVar, string OpcodeStr, SDNode OpNode, "$src1, ${src2}"##_.BroadcastStr, (_.VT (OpNode _.RC:$src1, - (Ctrl.VT (X86VBroadcast - (Ctrl.ScalarLdFrag addr:$src2)))))>, + (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>, T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -6419,7 +6417,7 @@ multiclass avx512_fma3p_213_rm opc, string OpcodeStr, SDNode OpNode, OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ), (OpNode _.RC:$src2, - _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>, + _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -6493,7 +6491,7 @@ multiclass avx512_fma3p_231_rm opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", "$src2, ${src3}"##_.BroadcastStr, (_.VT (OpNode _.RC:$src2, - (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), + (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -6571,7 +6569,7 @@ multiclass avx512_fma3p_132_rm opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src2, _.ScalarMemOp:$src3), OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", "$src2, ${src3}"##_.BroadcastStr, - (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), + (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1, _.RC:$src2)), 1, 0>, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -6964,7 +6962,7 @@ multiclass avx512_pmadd52_rm opc, string OpcodeStr, SDNode OpNode, OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ), (OpNode _.RC:$src2, - (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))), + (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1)>, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -7504,14 +7502,13 @@ multiclass avx512_vcvt_fp opc, string OpcodeStr, X86VectorVTInfo _, OpcodeStr, "${src}"##Broadcast, "${src}"##Broadcast, (_.VT (OpNode (_Src.VT - (X86VBroadcast (_Src.ScalarLdFrag addr:$src))) + (_Src.BroadcastLdFrag addr:$src)) )), (vselect MaskRC:$mask, (_.VT (OpNode (_Src.VT - (X86VBroadcast - (_Src.ScalarLdFrag addr:$src))))), + (_Src.BroadcastLdFrag addr:$src)))), _.RC:$src0), vselect, "$src0 = $dst">, EVEX, EVEX_B, Sched<[sched.Folded]>; @@ -7646,14 +7643,14 @@ let Predicates = [HasAVX512] in { v8f32x_info.ImmAllZerosV), (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>; - def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src))))), + def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcastld64 addr:$src)))), (VCVTPD2PSZrmb addr:$src)>; def : Pat<(vselect VK8WM:$mask, - (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))), + (fpround (v8f64 (X86VBroadcastld64 addr:$src))), (v8f32 VR256X:$src0)), (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, - (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))), + (fpround (v8f64 (X86VBroadcastld64 addr:$src))), v8f32x_info.ImmAllZerosV), (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>; } @@ -7677,14 +7674,14 @@ let Predicates = [HasVLX] in { v4f32x_info.ImmAllZerosV), (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>; - def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))), + def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))), (VCVTPD2PSZ256rmb addr:$src)>; def : Pat<(vselect VK4WM:$mask, - (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))), + (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))), VR128X:$src0), (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; def : Pat<(vselect VK4WM:$mask, - (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))), + (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))), v4f32x_info.ImmAllZerosV), (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>; @@ -7708,12 +7705,12 @@ let Predicates = [HasVLX] in { VK2WM:$mask), (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(X86vfpround (v2f64 (X86VBroadcast (loadf64 addr:$src)))), + def : Pat<(X86vfpround (v2f64 (X86VBroadcastld64 addr:$src))), (VCVTPD2PSZ128rmb addr:$src)>; - def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), (v4f32 VR128X:$src0), VK2WM:$mask), (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), v4f32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>; } @@ -8194,12 +8191,12 @@ let Predicates = [HasVLX] in { VK2WM:$mask), (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))))), + def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))), (VCVTPD2DQZ128rmb addr:$src)>; - def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), (v4i32 VR128X:$src0), VK2WM:$mask), (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), v4i32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; @@ -8223,12 +8220,12 @@ let Predicates = [HasVLX] in { VK2WM:$mask), (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))))), + def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))), (VCVTTPD2DQZ128rmb addr:$src)>; - def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), (v4i32 VR128X:$src0), VK2WM:$mask), (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), v4i32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; @@ -8252,12 +8249,12 @@ let Predicates = [HasVLX] in { VK2WM:$mask), (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))))), + def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))), (VCVTPD2UDQZ128rmb addr:$src)>; - def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), (v4i32 VR128X:$src0), VK2WM:$mask), (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), v4i32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; @@ -8281,12 +8278,12 @@ let Predicates = [HasVLX] in { VK2WM:$mask), (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))))), + def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))), (VCVTTPD2UDQZ128rmb addr:$src)>; - def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), (v4i32 VR128X:$src0), VK2WM:$mask), (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), v4i32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; } @@ -8419,12 +8416,12 @@ let Predicates = [HasDQI, HasVLX] in { VK2WM:$mask), (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))), + def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), (VCVTQQ2PSZ128rmb addr:$src)>; - def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), + def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), (v4f32 VR128X:$src0), VK2WM:$mask), (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), + def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), v4f32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; @@ -8448,12 +8445,12 @@ let Predicates = [HasDQI, HasVLX] in { VK2WM:$mask), (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))), + def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), (VCVTUQQ2PSZ128rmb addr:$src)>; - def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), + def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), (v4f32 VR128X:$src0), VK2WM:$mask), (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; - def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))), + def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), v4f32x_info.ImmAllZerosV, VK2WM:$mask), (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>; } @@ -8576,21 +8573,21 @@ let ExeDomain = GenericDomain in { (ins _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _dest.RC:$dst, - (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>, + (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, Sched<[RR]>; let Constraints = "$src0 = $dst" in def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", [(set _dest.RC:$dst, - (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2), + (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), _dest.RC:$src0, _src.KRCWM:$mask))]>, Sched<[RR]>, EVEX_K; def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", [(set _dest.RC:$dst, - (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2), + (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, Sched<[RR]>, EVEX_KZ; let hasSideEffects = 0, mayStore = 1 in { @@ -8631,17 +8628,17 @@ let Predicates = [HasAVX512] in { } def : Pat<(store (f64 (extractelt - (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))), + (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))), (iPTR 0))), addr:$dst), - (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>; + (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; def : Pat<(store (i64 (extractelt - (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))), + (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))), (iPTR 0))), addr:$dst), - (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>; - def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst), - (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>; - def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst), - (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>; + (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; + def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst), + (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>; + def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, timm:$src2)), addr:$dst), + (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>; } // Patterns for matching conversions from float to half-float and vice versa. @@ -8765,7 +8762,7 @@ multiclass avx512_fp14_p opc, string OpcodeStr, SDNode OpNode, (ins _.ScalarMemOp:$src), OpcodeStr, "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, (OpNode (_.VT - (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, + (_.BroadcastLdFrag addr:$src)))>, EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -8859,7 +8856,7 @@ multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, (ins _.ScalarMemOp:$src), OpcodeStr, "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, (OpNode (_.VT - (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, + (_.BroadcastLdFrag addr:$src)))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -8940,7 +8937,7 @@ multiclass avx512_sqrt_packed opc, string OpcodeStr, (ins _.ScalarMemOp:$src), OpcodeStr, "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, (fsqrt (_.VT - (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, + (_.BroadcastLdFrag addr:$src)))>, EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -9049,14 +9046,14 @@ multiclass avx512_rndscale_scalar opc, string OpcodeStr, (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3)))>, + (i32 timm:$src3)))>, Sched<[sched]>; defm rb_Int : AVX512_maskable_scalar, EVEX_B, + (i32 timm:$src3)))>, EVEX_B, Sched<[sched]>; defm m_Int : AVX512_maskable_scalar opc, string OpcodeStr, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86RndScales _.RC:$src1, - _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>, + _.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>, Sched<[sched.Folded, sched.ReadAfterFold]>; let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { @@ -9082,15 +9079,15 @@ multiclass avx512_rndscale_scalar opc, string OpcodeStr, } let Predicates = [HasAVX512] in { - def : Pat<(X86VRndScale _.FRC:$src1, imm:$src2), + def : Pat<(X86VRndScale _.FRC:$src1, timm:$src2), (_.EltVT (!cast(NAME##r) (_.EltVT (IMPLICIT_DEF)), - _.FRC:$src1, imm:$src2))>; + _.FRC:$src1, timm:$src2))>; } let Predicates = [HasAVX512, OptForSize] in { - def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), imm:$src2), + def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), (_.EltVT (!cast(NAME##m) (_.EltVT (IMPLICIT_DEF)), - addr:$src1, imm:$src2))>; + addr:$src1, timm:$src2))>; } } @@ -10109,19 +10106,19 @@ multiclass avx512_unary_fp_packed_imm opc, string OpcodeStr, SDNode OpNo (ins _.RC:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), - (i32 imm:$src2))>, Sched<[sched]>; + (i32 timm:$src2))>, Sched<[sched]>; defm rmi : AVX512_maskable, + (i32 timm:$src2))>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable, EVEX_B, + (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)), + (i32 timm:$src2))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10136,7 +10133,7 @@ multiclass avx512_unary_fp_sae_packed_imm opc, string OpcodeStr, OpcodeStr##_.Suffix, "$src2, {sae}, $src1", "$src1, {sae}, $src2", (OpNode (_.VT _.RC:$src1), - (i32 imm:$src2))>, + (i32 timm:$src2))>, EVEX_B, Sched<[sched]>; } @@ -10169,22 +10166,22 @@ multiclass avx512_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3))>, + (i32 timm:$src3))>, Sched<[sched]>; defm rmi : AVX512_maskable, + (i32 timm:$src3))>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable, EVEX_B, + (_.VT (_.BroadcastLdFrag addr:$src2)), + (i32 timm:$src3))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10200,7 +10197,7 @@ multiclass avx512_3Op_rm_imm8 opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), (SrcInfo.VT SrcInfo.RC:$src2), - (i8 imm:$src3)))>, + (i8 timm:$src3)))>, Sched<[sched]>; defm rmi : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), (SrcInfo.VT (bitconvert (SrcInfo.LdFrag addr:$src2))), - (i8 imm:$src3)))>, + (i8 timm:$src3)))>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10226,8 +10223,8 @@ multiclass avx512_3Op_imm8 opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr##", $src3", (OpNode (_.VT _.RC:$src1), - (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), - (i8 imm:$src3))>, EVEX_B, + (_.VT (_.BroadcastLdFrag addr:$src2)), + (i8 timm:$src3))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -10241,15 +10238,14 @@ multiclass avx512_fp_scalar_imm opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3))>, + (i32 timm:$src3))>, Sched<[sched]>; defm rmi : AVX512_maskable_scalar, + (_.VT _.ScalarIntMemCPat:$src2), + (i32 timm:$src3))>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10265,7 +10261,7 @@ multiclass avx512_fp_sae_packed_imm opc, string OpcodeStr, "$src1, $src2, {sae}, $src3", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3))>, + (i32 timm:$src3))>, EVEX_B, Sched<[sched]>; } @@ -10279,7 +10275,7 @@ multiclass avx512_fp_sae_scalar_imm opc, string OpcodeStr, SDNode OpNode "$src1, $src2, {sae}, $src3", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 imm:$src3))>, + (i32 timm:$src3))>, EVEX_B, Sched<[sched]>; } @@ -10401,7 +10397,7 @@ multiclass avx512_shuff_packed_128_common opc, string OpcodeStr, OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (bitconvert (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, - (i8 imm:$src3)))))>, + (i8 timm:$src3)))))>, Sched<[sched]>, EVEX2VEXOverride; defm rmi : AVX512_maskable opc, string OpcodeStr, (bitconvert (CastInfo.VT (X86Shuf128 _.RC:$src1, (CastInfo.LdFrag addr:$src2), - (i8 imm:$src3)))))>, + (i8 timm:$src3)))))>, Sched<[sched.Folded, sched.ReadAfterFold]>, EVEX2VEXOverride; defm rmbi : AVX512_maskable opc, string OpcodeStr, (bitconvert (CastInfo.VT (X86Shuf128 _.RC:$src1, - (X86VBroadcast (_.ScalarLdFrag addr:$src2)), - (i8 imm:$src3)))))>, EVEX_B, + (_.BroadcastLdFrag addr:$src2), + (i8 timm:$src3)))))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10491,14 +10487,14 @@ multiclass avx512_valign opc, string OpcodeStr, defm rri : AVX512_maskable, + (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; defm rmi : AVX512_maskable, + (i8 timm:$src3)))>, Sched<[sched.Folded, sched.ReadAfterFold]>, EVEX2VEXOverride<"VPALIGNRrmi">; @@ -10507,8 +10503,8 @@ multiclass avx512_valign opc, string OpcodeStr, OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr##", $src3", (X86VAlign _.RC:$src1, - (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), - (i8 imm:$src3))>, EVEX_B, + (_.VT (_.BroadcastLdFrag addr:$src2)), + (i8 timm:$src3))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10541,13 +10537,13 @@ defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", // Fragments to help convert valignq into masked valignd. Or valignq/valignd // into vpalignr. -def ValignqImm32XForm : SDNodeXFormgetZExtValue() * 2, SDLoc(N)); }]>; -def ValignqImm8XForm : SDNodeXFormgetZExtValue() * 8, SDLoc(N)); }]>; -def ValigndImm8XForm : SDNodeXFormgetZExtValue() * 4, SDLoc(N)); }]>; @@ -10557,40 +10553,40 @@ multiclass avx512_vpalign_mask_lowering(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, To.RC:$src1, To.RC:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, From.RC:$src2, - imm:$src3))), + timm:$src3))), To.ImmAllZerosV)), (!cast(OpcodeStr#"rrikz") To.KRCWM:$mask, To.RC:$src1, To.RC:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, (From.LdFrag addr:$src2), - imm:$src3))), + timm:$src3))), To.RC:$src0)), (!cast(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, To.RC:$src1, addr:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, (From.LdFrag addr:$src2), - imm:$src3))), + timm:$src3))), To.ImmAllZerosV)), (!cast(OpcodeStr#"rmikz") To.KRCWM:$mask, To.RC:$src1, addr:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; } multiclass avx512_vpalign_mask_lowering_mb : avx512_vpalign_mask_lowering { def : Pat<(From.VT (OpNode From.RC:$src1, - (bitconvert (To.VT (X86VBroadcast - (To.ScalarLdFrag addr:$src2)))), - imm:$src3)), + (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))), + timm:$src3)), (!cast(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, (bitconvert - (To.VT (X86VBroadcast - (To.ScalarLdFrag addr:$src2)))), - imm:$src3))), + (To.VT (To.BroadcastLdFrag addr:$src2))), + timm:$src3))), To.RC:$src0)), (!cast(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, To.RC:$src1, addr:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; def : Pat<(To.VT (vselect To.KRCWM:$mask, (bitconvert (From.VT (OpNode From.RC:$src1, (bitconvert - (To.VT (X86VBroadcast - (To.ScalarLdFrag addr:$src2)))), - imm:$src3))), + (To.VT (To.BroadcastLdFrag addr:$src2))), + timm:$src3))), To.ImmAllZerosV)), (!cast(OpcodeStr#"rmbikz") To.KRCWM:$mask, To.RC:$src1, addr:$src2, - (ImmXForm imm:$src3))>; + (ImmXForm timm:$src3))>; } let Predicates = [HasAVX512] in { @@ -10666,13 +10659,13 @@ multiclass avx512_unary_rm opc, string OpcodeStr, SDNode OpNode, defm rr : AVX512_maskable, EVEX, AVX5128IBase, + (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase, Sched<[sched]>; defm rm : AVX512_maskable, + (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded]>; } @@ -10685,8 +10678,7 @@ multiclass avx512_unary_rmb opc, string OpcodeStr, SDNode OpNode, (ins _.ScalarMemOp:$src1), OpcodeStr, "${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr, - (_.VT (OpNode (X86VBroadcast - (_.ScalarLdFrag addr:$src1))))>, + (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>, EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded]>; } @@ -10770,7 +10762,7 @@ let Predicates = [HasAVX512, NoVLX] in { multiclass avx512_unary_lowering { let Predicates = [prd, NoVLX] in { - def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)), + def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))), (EXTRACT_SUBREG (!cast(InstrStr # "Zrr") (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), @@ -10778,7 +10770,7 @@ multiclass avx512_unary_lowering; - def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)), + def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))), (EXTRACT_SUBREG (!cast(InstrStr # "Zrr") (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), @@ -10829,17 +10821,16 @@ defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, // AVX-512 - MOVDDUP //===----------------------------------------------------------------------===// -multiclass avx512_movddup_128 opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_movddup_128 opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable, EVEX, + (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX, Sched<[sched]>; defm rm : AVX512_maskable, + (_.VT (_.BroadcastLdFrag addr:$src))>, EVEX, EVEX_CD8<_.EltSize, CD8VH>, Sched<[sched.Folded]>; } @@ -10853,7 +10844,7 @@ multiclass avx512_movddup_common opc, string OpcodeStr, SDNode OpNode, let Predicates = [HasAVX512, HasVLX] in { defm Z256 : avx512_unary_rm, EVEX_V256; - defm Z128 : avx512_movddup_128, EVEX_V128; } } @@ -10867,11 +10858,9 @@ multiclass avx512_movddup opc, string OpcodeStr, SDNode OpNode, defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>; let Predicates = [HasVLX] in { -def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), - (VMOVDDUPZ128rm addr:$src)>; def : Pat<(v2f64 (X86VBroadcast f64:$src)), (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; -def : Pat<(v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))), +def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), (VMOVDDUPZ128rm addr:$src)>; def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))), (VMOVDDUPZ128rm addr:$src)>; @@ -10884,17 +10873,17 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), immAllZerosV), (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; -def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)), (v2f64 VR128X:$src0)), (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; -def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)), immAllZerosV), (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; -def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))), +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), (v2f64 VR128X:$src0)), (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; -def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))), +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), immAllZerosV), (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; } @@ -11070,14 +11059,14 @@ multiclass avx512_shift_packed opc, SDNode OpNode, Format MRMr, def rr : AVX512, + [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, Sched<[sched]>; def rm : AVX512, + (i8 timm:$src2))))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -11104,6 +11093,7 @@ defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", multiclass avx512_psadbw_packed opc, SDNode OpNode, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _dst, X86VectorVTInfo _src> { + let isCommutable = 1 in def rr : AVX512BIgetZExtValue(); // Swap bits 1/4 and 3/6. @@ -11151,7 +11141,7 @@ def VPTERNLOG321_imm8 : SDNodeXForm; -def VPTERNLOG213_imm8 : SDNodeXFormgetZExtValue(); // Swap bits 2/4 and 3/5. @@ -11162,7 +11152,7 @@ def VPTERNLOG213_imm8 : SDNodeXForm; -def VPTERNLOG132_imm8 : SDNodeXFormgetZExtValue(); // Swap bits 1/2 and 5/6. @@ -11173,7 +11163,7 @@ def VPTERNLOG132_imm8 : SDNodeXForm; -def VPTERNLOG231_imm8 : SDNodeXFormgetZExtValue(); // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 @@ -11186,7 +11176,7 @@ def VPTERNLOG231_imm8 : SDNodeXForm; -def VPTERNLOG312_imm8 : SDNodeXFormgetZExtValue(); // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 @@ -11210,7 +11200,7 @@ multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_.VT _.RC:$src3), - (i8 imm:$src4)), 1, 1>, + (i8 timm:$src4)), 1, 1>, AVX512AIi8Base, EVEX_4V, Sched<[sched]>; defm rmi : AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_.VT (bitconvert (_.LdFrag addr:$src3))), - (i8 imm:$src4)), 1, 0>, + (i8 timm:$src4)), 1, 0>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, "$src2, ${src3}"##_.BroadcastStr##", $src4", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), - (i8 imm:$src4)), 1, 0>, EVEX_B, + (_.VT (_.BroadcastLdFrag addr:$src3)), + (i8 timm:$src4)), 1, 0>, EVEX_B, AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded, sched.ReadAfterFold]>; }// Constraints = "$src1 = $dst" // Additional patterns for matching passthru operand in other positions. def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>; + _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)), + (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>; + _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>; // Additional patterns for matching loads in other positions. def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4))), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4))), (!cast(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, - addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; + addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4))), + _.RC:$src2, (i8 timm:$src4))), (!cast(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2, - addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; + addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; // Additional patterns for matching zero masking with loads in other // positions. def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.ImmAllZerosV)), (!cast(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4)), + _.RC:$src2, (i8 timm:$src4)), _.ImmAllZerosV)), (!cast(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; // Additional patterns for matching masked loads with different // operand orders. def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4)), + _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, _.RC:$src1, - (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)), + (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src1, (i8 imm:$src4)), + _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src3)), - _.RC:$src1, _.RC:$src2, (i8 imm:$src4)), + _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; // Additional patterns for matching broadcasts in other positions. - def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4))), + def : Pat<(_.VT (OpNode (_.BroadcastLdFrag addr:$src3), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4))), (!cast(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, - addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; + addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (OpNode _.RC:$src1, - (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4))), + (_.BroadcastLdFrag addr:$src3), + _.RC:$src2, (i8 timm:$src4))), (!cast(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2, - addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; + addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; // Additional patterns for matching zero masking with broadcasts in other // positions. def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + (OpNode (_.BroadcastLdFrag addr:$src3), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.ImmAllZerosV)), (!cast(Name#_.ZSuffix#rmbikz) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, - (VPTERNLOG321_imm8 imm:$src4))>; + (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src1, - (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4)), + (_.BroadcastLdFrag addr:$src3), + _.RC:$src2, (i8 timm:$src4)), _.ImmAllZerosV)), (!cast(Name#_.ZSuffix#rmbikz) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, - (VPTERNLOG132_imm8 imm:$src4))>; + (VPTERNLOG132_imm8 timm:$src4))>; // Additional patterns for matching masked broadcasts with different // operand orders. def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode _.RC:$src1, - (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, (i8 imm:$src4)), + (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3), + _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), + (OpNode (_.BroadcastLdFrag addr:$src3), + _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, _.RC:$src1, - (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - (i8 imm:$src4)), _.RC:$src1)), + (_.BroadcastLdFrag addr:$src3), + (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, - (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src1, (i8 imm:$src4)), + (_.BroadcastLdFrag addr:$src3), + _.RC:$src1, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), - _.RC:$src1, _.RC:$src2, (i8 imm:$src4)), + (OpNode (_.BroadcastLdFrag addr:$src3), + _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), _.RC:$src1)), (!cast(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, - _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; + _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; } multiclass avx512_common_ternlog, VEX_W; +// Patterns to use VPTERNLOG for vXi16/vXi8 vectors. +let Predicates = [HasVLX] in { + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, + timm:$src4)>; + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, + (loadv16i8 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2, + VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, + timm:$src4)>; + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, + (loadv8i16 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2, + VR128X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3), + VR128X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, + timm:$src4)>; + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, + (loadv32i8 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2, + VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, + timm:$src4)>; + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, + (loadv16i16 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2, + VR256X:$src1, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3), + VR256X:$src2, (i8 timm:$src4))), + (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; +} + +let Predicates = [HasAVX512] in { + def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, + timm:$src4)>; + def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, + (loadv64i8 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2, + VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; + + def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3, + (i8 timm:$src4))), + (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, + timm:$src4)>; + def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, + (loadv32i16 addr:$src3), (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + timm:$src4)>; + def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2, + VR512:$src1, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG321_imm8 timm:$src4))>; + def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3), + VR512:$src2, (i8 timm:$src4))), + (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3, + (VPTERNLOG132_imm8 timm:$src4))>; +} + // Patterns to implement vnot using vpternlog instead of creating all ones // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen // so that the result is only dependent on src0. But we use the same source @@ -11498,14 +11594,14 @@ multiclass avx512_fixupimm_packed opc, string OpcodeStr, (X86VFixupimm (_.VT _.RC:$src1), (_.VT _.RC:$src2), (TblVT.VT _.RC:$src3), - (i32 imm:$src4))>, Sched<[sched]>; + (i32 timm:$src4))>, Sched<[sched]>; defm rmi : AVX512_maskable_3src, + (i32 timm:$src4))>, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable_3src opc, string OpcodeStr, "$src2, ${src3}"##_.BroadcastStr##", $src4", (X86VFixupimm (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))), - (i32 imm:$src4))>, + (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)), + (i32 timm:$src4))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } // Constraints = "$src1 = $dst" } @@ -11531,7 +11627,7 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { (X86VFixupimmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), (TblVT.VT _.RC:$src3), - (i32 imm:$src4))>, + (i32 timm:$src4))>, EVEX_B, Sched<[sched]>; } } @@ -11547,7 +11643,7 @@ multiclass avx512_fixupimm_scalar opc, string OpcodeStr, (X86VFixupimms (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_src3VT.VT _src3VT.RC:$src3), - (i32 imm:$src4))>, Sched<[sched]>; + (i32 timm:$src4))>, Sched<[sched]>; defm rrib : AVX512_maskable_3src_scalar opc, string OpcodeStr, (X86VFixupimmSAEs (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_src3VT.VT _src3VT.RC:$src3), - (i32 imm:$src4))>, + (i32 timm:$src4))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmi : AVX512_maskable_3src_scalar opc, string OpcodeStr, (_.VT _.RC:$src2), (_src3VT.VT (scalar_to_vector (_src3VT.ScalarLdFrag addr:$src3))), - (i32 imm:$src4))>, + (i32 timm:$src4))>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } multiclass avx512_fixupimm_packed_all { let Predicates = [HasAVX512] in defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, @@ -11804,7 +11900,7 @@ multiclass VBMI2_shift_var_rmb Op, string OpStr, SDNode OpNode, "${src3}"##VTI.BroadcastStr##", $src2", "$src2, ${src3}"##VTI.BroadcastStr, (OpNode VTI.RC:$src1, VTI.RC:$src2, - (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>, + (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -11880,12 +11976,14 @@ defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, let Constraints = "$src1 = $dst" in multiclass VNNI_rmb Op, string OpStr, SDNode OpNode, - X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { + X86FoldableSchedWrite sched, X86VectorVTInfo VTI, + bit IsCommutable> { defm r : AVX512_maskable_3src, + VTI.RC:$src2, VTI.RC:$src3)), + IsCommutable, IsCommutable>, EVEX_4V, T8PD, Sched<[sched]>; defm m : AVX512_maskable_3src Op, string OpStr, SDNode OpNode, OpStr, "${src3}"##VTI.BroadcastStr##", $src2", "$src2, ${src3}"##VTI.BroadcastStr, (OpNode VTI.RC:$src1, VTI.RC:$src2, - (VTI.VT (X86VBroadcast - (VTI.ScalarLdFrag addr:$src3))))>, + (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass VNNI_common Op, string OpStr, SDNode OpNode, - X86SchedWriteWidths sched> { + X86SchedWriteWidths sched, bit IsCommutable> { let Predicates = [HasVNNI] in - defm Z : VNNI_rmb, EVEX_V512; + defm Z : VNNI_rmb, EVEX_V512; let Predicates = [HasVNNI, HasVLX] in { - defm Z256 : VNNI_rmb, EVEX_V256; - defm Z128 : VNNI_rmb, EVEX_V128; + defm Z256 : VNNI_rmb, EVEX_V256; + defm Z128 : VNNI_rmb, EVEX_V128; } } // FIXME: Is there a better scheduler class for VPDP? -defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul>; -defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul>; -defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul>; -defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul>; +defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>; +defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>; +defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>; +defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>; + +def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs), + (X86vpmaddwd node:$lhs, node:$rhs), [{ + return N->hasOneUse(); +}]>; + +// Patterns to match VPDPWSSD from existing instructions/intrinsics. +let Predicates = [HasVNNI] in { + def : Pat<(v16i32 (add VR512:$src1, + (X86vpmaddwd_su VR512:$src2, VR512:$src3))), + (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>; + def : Pat<(v16i32 (add VR512:$src1, + (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))), + (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>; +} +let Predicates = [HasVNNI,HasVLX] in { + def : Pat<(v8i32 (add VR256X:$src1, + (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))), + (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>; + def : Pat<(v8i32 (add VR256X:$src1, + (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))), + (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>; + def : Pat<(v4i32 (add VR128X:$src1, + (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))), + (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>; + def : Pat<(v4i32 (add VR128X:$src1, + (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))), + (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>; +} //===----------------------------------------------------------------------===// // Bit Algorithms @@ -12004,8 +12133,8 @@ multiclass GF2P8AFFINE_avx512_rmb_imm Op, string OpStr, SDNode OpNode, OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1", "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3", (OpNode (VTI.VT VTI.RC:$src1), - (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))), - (i8 imm:$src3))>, EVEX_B, + (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))), + (i8 timm:$src3))>, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -12116,7 +12245,7 @@ multiclass avx512_vp2intersect_modes { !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr, ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), [(set _.KRPC:$dst, (X86vp2intersect - _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))]>, + _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>, EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; } @@ -12217,12 +12346,12 @@ let Predicates = [HasBF16, HasVLX] in { (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>; def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 - (X86VBroadcast (loadf32 addr:$src))))), + (X86VBroadcastld32 addr:$src)))), (VCVTNEPS2BF16Z128rmb addr:$src)>; - def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))), + def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), (v8i16 VR128X:$src0), VK4WM:$mask), (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; - def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))), + def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), v8i16x_info.ImmAllZerosV, VK4WM:$mask), (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>; } @@ -12249,7 +12378,7 @@ multiclass avx512_dpbf16ps_rm opc, string OpcodeStr, SDNode OpNode, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr), (_.VT (OpNode _.RC:$src1, _.RC:$src2, - (src_v.VT (X86VBroadcast(src_v.ScalarLdFrag addr:$src3)))))>, + (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>, EVEX_B, EVEX_4V; } diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index e52635f8d48..1e399a89449 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -1271,22 +1271,22 @@ let isCompare = 1 in { // ANDN Instruction // multiclass bmi_andn { + PatFrag ld_frag, X86FoldableSchedWrite sched> { def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>, - Sched<[WriteALU]>; + Sched<[sched]>; def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>, - Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } // Complexity is reduced to give and with immediate a chance to match first. let Predicates = [HasBMI], Defs = [EFLAGS], AddedComplexity = -6 in { - defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32>, T8PS, VEX_4V; - defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8PS, VEX_4V, VEX_W; + defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8PS, VEX_4V; + defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8PS, VEX_4V, VEX_W; } let Predicates = [HasBMI], AddedComplexity = -6 in { diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index 50aed98112c..aa45e9b191c 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -131,11 +131,11 @@ addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) { /// reference. static inline void setDirectAddressInInstr(MachineInstr *MI, unsigned Operand, unsigned Reg) { - // Direct memory address is in a form of: Reg, 1 (Scale), NoReg, 0, NoReg. - MI->getOperand(Operand).setReg(Reg); + // Direct memory address is in a form of: Reg/FI, 1 (Scale), NoReg, 0, NoReg. + MI->getOperand(Operand).ChangeToRegister(Reg, /*isDef=*/false); MI->getOperand(Operand + 1).setImm(1); MI->getOperand(Operand + 2).setReg(0); - MI->getOperand(Operand + 3).setImm(0); + MI->getOperand(Operand + 3).ChangeToImmediate(0); MI->getOperand(Operand + 4).setReg(0); } diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td index 099f6aa8d8b..330b8c7a8a4 100644 --- a/lib/Target/X86/X86InstrCMovSetCC.td +++ b/lib/Target/X86/X86InstrCMovSetCC.td @@ -20,19 +20,19 @@ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", : I<0x40, MRMSrcRegCC, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, ccode:$cond), "cmov${cond}{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, - (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>, + (X86cmov GR16:$src1, GR16:$src2, timm:$cond, EFLAGS))]>, TB, OpSize16; def CMOV32rr : I<0x40, MRMSrcRegCC, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, ccode:$cond), "cmov${cond}{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, - (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>, + (X86cmov GR32:$src1, GR32:$src2, timm:$cond, EFLAGS))]>, TB, OpSize32; def CMOV64rr :RI<0x40, MRMSrcRegCC, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, ccode:$cond), "cmov${cond}{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, - (X86cmov GR64:$src1, GR64:$src2, imm:$cond, EFLAGS))]>, TB; + (X86cmov GR64:$src1, GR64:$src2, timm:$cond, EFLAGS))]>, TB; } let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", @@ -41,29 +41,46 @@ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", : I<0x40, MRMSrcMemCC, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2, ccode:$cond), "cmov${cond}{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - imm:$cond, EFLAGS))]>, TB, OpSize16; + timm:$cond, EFLAGS))]>, TB, OpSize16; def CMOV32rm : I<0x40, MRMSrcMemCC, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2, ccode:$cond), "cmov${cond}{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - imm:$cond, EFLAGS))]>, TB, OpSize32; + timm:$cond, EFLAGS))]>, TB, OpSize32; def CMOV64rm :RI<0x40, MRMSrcMemCC, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2, ccode:$cond), "cmov${cond}{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - imm:$cond, EFLAGS))]>, TB; + timm:$cond, EFLAGS))]>, TB; } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" } // isCodeGenOnly = 1, ForceDisassemble = 1 +def inv_cond_XFORM : SDNodeXForm(N->getZExtValue()); + return CurDAG->getTargetConstant(X86::GetOppositeBranchCondition(CC), + SDLoc(N), MVT::i8); +}]>; + +// Conditional moves with folded loads with operands swapped and conditions +// inverted. +let Predicates = [HasCMov] in { + def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, timm:$cond, EFLAGS), + (CMOV16rm GR16:$src2, addr:$src1, (inv_cond_XFORM timm:$cond))>; + def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, timm:$cond, EFLAGS), + (CMOV32rm GR32:$src2, addr:$src1, (inv_cond_XFORM timm:$cond))>; + def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, timm:$cond, EFLAGS), + (CMOV64rm GR64:$src2, addr:$src1, (inv_cond_XFORM timm:$cond))>; +} + // SetCC instructions. let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1 in { def SETCCr : I<0x90, MRMXrCC, (outs GR8:$dst), (ins ccode:$cond), "set${cond}\t$dst", - [(set GR8:$dst, (X86setcc imm:$cond, EFLAGS))]>, + [(set GR8:$dst, (X86setcc timm:$cond, EFLAGS))]>, TB, Sched<[WriteSETCC]>; def SETCCm : I<0x90, MRMXmCC, (outs), (ins i8mem:$dst, ccode:$cond), "set${cond}\t$dst", - [(store (X86setcc imm:$cond, EFLAGS), addr:$dst)]>, + [(store (X86setcc timm:$cond, EFLAGS), addr:$dst)]>, TB, Sched<[WriteSETCCStore]>; } // Uses = [EFLAGS] diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index efaccdc9ee9..78d8dd3c0d0 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -542,7 +542,7 @@ multiclass CMOVrr_PSEUDO { def CMOV#NAME : I<0, Pseudo, (outs RC:$dst), (ins RC:$t, RC:$f, i8imm:$cond), "#CMOV_"#NAME#" PSEUDO!", - [(set RC:$dst, (VT (X86cmov RC:$t, RC:$f, imm:$cond, + [(set RC:$dst, (VT (X86cmov RC:$t, RC:$f, timm:$cond, EFLAGS)))]>; } @@ -593,66 +593,66 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in { defm _VK64 : CMOVrr_PSEUDO; } // usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] -def : Pat<(f128 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)), - (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>; +def : Pat<(f128 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), + (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; let Predicates = [NoVLX] in { - def : Pat<(v16i8 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)), - (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>; - def : Pat<(v8i16 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)), - (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>; - def : Pat<(v4i32 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)), - (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>; - def : Pat<(v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)), - (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>; - def : Pat<(v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond, EFLAGS)), - (CMOV_VR128 VR128:$t, VR128:$f, imm:$cond)>; + def : Pat<(v16i8 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), + (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; + def : Pat<(v8i16 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), + (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; + def : Pat<(v4i32 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), + (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; + def : Pat<(v4f32 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), + (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; + def : Pat<(v2f64 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), + (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; - def : Pat<(v32i8 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)), - (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>; - def : Pat<(v16i16 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)), - (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>; - def : Pat<(v8i32 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)), - (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>; - def : Pat<(v8f32 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)), - (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>; - def : Pat<(v4f64 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)), - (CMOV_VR256 VR256:$t, VR256:$f, imm:$cond)>; + def : Pat<(v32i8 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), + (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; + def : Pat<(v16i16 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), + (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; + def : Pat<(v8i32 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), + (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; + def : Pat<(v8f32 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), + (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; + def : Pat<(v4f64 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), + (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; } let Predicates = [HasVLX] in { - def : Pat<(v16i8 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)), - (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>; - def : Pat<(v8i16 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)), - (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>; - def : Pat<(v4i32 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)), - (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>; - def : Pat<(v4f32 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)), - (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>; - def : Pat<(v2f64 (X86cmov VR128X:$t, VR128X:$f, imm:$cond, EFLAGS)), - (CMOV_VR128X VR128X:$t, VR128X:$f, imm:$cond)>; + def : Pat<(v16i8 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), + (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; + def : Pat<(v8i16 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), + (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; + def : Pat<(v4i32 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), + (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; + def : Pat<(v4f32 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), + (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; + def : Pat<(v2f64 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), + (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; - def : Pat<(v32i8 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)), - (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>; - def : Pat<(v16i16 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)), - (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>; - def : Pat<(v8i32 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)), - (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>; - def : Pat<(v8f32 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)), - (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>; - def : Pat<(v4f64 (X86cmov VR256X:$t, VR256X:$f, imm:$cond, EFLAGS)), - (CMOV_VR256X VR256X:$t, VR256X:$f, imm:$cond)>; + def : Pat<(v32i8 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), + (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; + def : Pat<(v16i16 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), + (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; + def : Pat<(v8i32 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), + (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; + def : Pat<(v8f32 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), + (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; + def : Pat<(v4f64 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), + (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; } -def : Pat<(v64i8 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)), - (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>; -def : Pat<(v32i16 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)), - (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>; -def : Pat<(v16i32 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)), - (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>; -def : Pat<(v16f32 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)), - (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>; -def : Pat<(v8f64 (X86cmov VR512:$t, VR512:$f, imm:$cond, EFLAGS)), - (CMOV_VR512 VR512:$t, VR512:$f, imm:$cond)>; +def : Pat<(v64i8 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), + (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; +def : Pat<(v32i16 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), + (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; +def : Pat<(v16i32 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), + (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; +def : Pat<(v16f32 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), + (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; +def : Pat<(v8f64 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), + (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; //===----------------------------------------------------------------------===// // Normal-Instructions-With-Lock-Prefix Pseudo Instructions @@ -1126,12 +1126,12 @@ def : Pat<(f64 (bitconvert (i64 (atomic_load_64 addr:$src)))), // binary size compared to a regular MOV, but it introduces an unnecessary // load, so is not suitable for regular or optsize functions. let Predicates = [OptForMinSize] in { -def : Pat<(nonvolatile_store (i16 0), addr:$dst), (AND16mi8 addr:$dst, 0)>; -def : Pat<(nonvolatile_store (i32 0), addr:$dst), (AND32mi8 addr:$dst, 0)>; -def : Pat<(nonvolatile_store (i64 0), addr:$dst), (AND64mi8 addr:$dst, 0)>; -def : Pat<(nonvolatile_store (i16 -1), addr:$dst), (OR16mi8 addr:$dst, -1)>; -def : Pat<(nonvolatile_store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>; -def : Pat<(nonvolatile_store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>; +def : Pat<(simple_store (i16 0), addr:$dst), (AND16mi8 addr:$dst, 0)>; +def : Pat<(simple_store (i32 0), addr:$dst), (AND32mi8 addr:$dst, 0)>; +def : Pat<(simple_store (i64 0), addr:$dst), (AND64mi8 addr:$dst, 0)>; +def : Pat<(simple_store (i16 -1), addr:$dst), (OR16mi8 addr:$dst, -1)>; +def : Pat<(simple_store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>; +def : Pat<(simple_store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>; } // In kernel code model, we can get the address of a label @@ -1276,23 +1276,6 @@ def : Pat<(X86cmp GR32:$src1, 0), def : Pat<(X86cmp GR64:$src1, 0), (TEST64rr GR64:$src1, GR64:$src1)>; -def inv_cond_XFORM : SDNodeXForm(N->getZExtValue()); - return CurDAG->getTargetConstant(X86::GetOppositeBranchCondition(CC), - SDLoc(N), MVT::i8); -}]>; - -// Conditional moves with folded loads with operands swapped and conditions -// inverted. -let Predicates = [HasCMov] in { - def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, imm:$cond, EFLAGS), - (CMOV16rm GR16:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>; - def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, imm:$cond, EFLAGS), - (CMOV32rm GR32:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>; - def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, imm:$cond, EFLAGS), - (CMOV64rm GR64:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>; -} - // zextload bool -> zextload byte // i1 stored in one byte in zero-extended form. // Upper bits cleanup should be executed before Store. diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index f82e80965b7..e1e6eea5988 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -75,7 +75,7 @@ let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump], def JCC_1 : Ii8PCRel <0x70, AddCCFrm, (outs), (ins brtarget8:$dst, ccode:$cond), "j${cond}\t$dst", - [(X86brcond bb:$dst, imm:$cond, EFLAGS)]>; + [(X86brcond bb:$dst, timm:$cond, EFLAGS)]>; let hasSideEffects = 0 in { def JCC_2 : Ii16PCRel<0x80, AddCCFrm, (outs), (ins brtarget16:$dst, ccode:$cond), @@ -145,6 +145,17 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>, Sched<[WriteJumpLd]>; + // Win64 wants indirect jumps leaving the function to have a REX_W prefix. + // These are switched from TAILJMPr/m64_REX in MCInstLower. + let isCodeGenOnly = 1, hasREX_WPrefix = 1 in { + def JMP64r_REX : I<0xFF, MRM4r, (outs), (ins GR64:$dst), + "rex64 jmp{q}\t{*}$dst", []>, Sched<[WriteJump]>; + let mayLoad = 1 in + def JMP64m_REX : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), + "rex64 jmp{q}\t{*}$dst", []>, Sched<[WriteJumpLd]>; + + } + // Non-tracking jumps for IBT, use with caution. let isCodeGenOnly = 1 in { def JMP16r_NT : I<0xFF, MRM4r, (outs), (ins GR16 : $dst), "jmp{w}\t{*}$dst", @@ -273,39 +284,35 @@ let isCall = 1 in // Tail call stuff. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in - let Uses = [ESP, SSP] in { - def TCRETURNdi : PseudoI<(outs), - (ins i32imm_pcrel:$dst, i32imm:$offset), []>, NotMemoryFoldable; - def TCRETURNri : PseudoI<(outs), - (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable; + isCodeGenOnly = 1, Uses = [ESP, SSP] in { + def TCRETURNdi : PseudoI<(outs), (ins i32imm_pcrel:$dst, i32imm:$offset), + []>, Sched<[WriteJump]>, NotMemoryFoldable; + def TCRETURNri : PseudoI<(outs), (ins ptr_rc_tailcall:$dst, i32imm:$offset), + []>, Sched<[WriteJump]>, NotMemoryFoldable; let mayLoad = 1 in - def TCRETURNmi : PseudoI<(outs), - (ins i32mem_TC:$dst, i32imm:$offset), []>; + def TCRETURNmi : PseudoI<(outs), (ins i32mem_TC:$dst, i32imm:$offset), + []>, Sched<[WriteJumpLd]>; - // FIXME: The should be pseudo instructions that are lowered when going to - // mcinst. - def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs), - (ins i32imm_pcrel:$dst), "jmp\t$dst", []>; + def TAILJMPd : PseudoI<(outs), (ins i32imm_pcrel:$dst), + []>, Sched<[WriteJump]>; - def TAILJMPr : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst), - "", []>; // FIXME: Remove encoding when JIT is dead. + def TAILJMPr : PseudoI<(outs), (ins ptr_rc_tailcall:$dst), + []>, Sched<[WriteJump]>; let mayLoad = 1 in - def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst), - "jmp{l}\t{*}$dst", []>; + def TAILJMPm : PseudoI<(outs), (ins i32mem_TC:$dst), + []>, Sched<[WriteJumpLd]>; } // Conditional tail calls are similar to the above, but they are branches // rather than barriers, and they use EFLAGS. let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1, - isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in + isCodeGenOnly = 1, SchedRW = [WriteJump] in let Uses = [ESP, EFLAGS, SSP] in { def TCRETURNdicc : PseudoI<(outs), (ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>; // This gets substituted to a conditional jump instruction in MC lowering. - def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs), - (ins i32imm_pcrel:$dst, i32imm:$cond), "", []>; + def TAILJMPd_CC : PseudoI<(outs), (ins i32imm_pcrel:$dst, i32imm:$cond), []>; } @@ -348,34 +355,36 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in { } let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - isCodeGenOnly = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in { + isCodeGenOnly = 1, Uses = [RSP, SSP] in { def TCRETURNdi64 : PseudoI<(outs), - (ins i64i32imm_pcrel:$dst, i32imm:$offset), - []>; + (ins i64i32imm_pcrel:$dst, i32imm:$offset), + []>, Sched<[WriteJump]>; def TCRETURNri64 : PseudoI<(outs), - (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable; + (ins ptr_rc_tailcall:$dst, i32imm:$offset), + []>, Sched<[WriteJump]>, NotMemoryFoldable; let mayLoad = 1 in def TCRETURNmi64 : PseudoI<(outs), - (ins i64mem_TC:$dst, i32imm:$offset), []>, NotMemoryFoldable; + (ins i64mem_TC:$dst, i32imm:$offset), + []>, Sched<[WriteJumpLd]>, NotMemoryFoldable; - def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), (ins i64i32imm_pcrel:$dst), - "jmp\t$dst", []>; + def TAILJMPd64 : PseudoI<(outs), (ins i64i32imm_pcrel:$dst), + []>, Sched<[WriteJump]>; - def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst), - "jmp{q}\t{*}$dst", []>; + def TAILJMPr64 : PseudoI<(outs), (ins ptr_rc_tailcall:$dst), + []>, Sched<[WriteJump]>; let mayLoad = 1 in - def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst), - "jmp{q}\t{*}$dst", []>; + def TAILJMPm64 : PseudoI<(outs), (ins i64mem_TC:$dst), + []>, Sched<[WriteJumpLd]>; // Win64 wants indirect jumps leaving the function to have a REX_W prefix. let hasREX_WPrefix = 1 in { - def TAILJMPr64_REX : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst), - "rex64 jmp{q}\t{*}$dst", []>; + def TAILJMPr64_REX : PseudoI<(outs), (ins ptr_rc_tailcall:$dst), + []>, Sched<[WriteJump]>; let mayLoad = 1 in - def TAILJMPm64_REX : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst), - "rex64 jmp{q}\t{*}$dst", []>; + def TAILJMPm64_REX : PseudoI<(outs), (ins i64mem_TC:$dst), + []>, Sched<[WriteJumpLd]>; } } @@ -403,13 +412,13 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1, // Conditional tail calls are similar to the above, but they are branches // rather than barriers, and they use EFLAGS. let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1, - isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in + isCodeGenOnly = 1, SchedRW = [WriteJump] in let Uses = [RSP, EFLAGS, SSP] in { def TCRETURNdi64cc : PseudoI<(outs), (ins i64i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>; // This gets substituted to a conditional jump instruction in MC lowering. - def TAILJMPd64_CC : Ii32PCRel<0x80, RawFrm, (outs), - (ins i64i32imm_pcrel:$dst, i32imm:$cond), "", []>; + def TAILJMPd64_CC : PseudoI<(outs), + (ins i64i32imm_pcrel:$dst, i32imm:$cond), []>; } diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td index 06e605fe5db..7a4eb138ec3 100644 --- a/lib/Target/X86/X86InstrExtension.td +++ b/lib/Target/X86/X86InstrExtension.td @@ -17,19 +17,18 @@ let hasSideEffects = 0 in { let Defs = [EAX], Uses = [AX] in // EAX = signext(AX) def CWDE : I<0x98, RawFrm, (outs), (ins), "{cwtl|cwde}", []>, OpSize32, Sched<[WriteALU]>; + let Defs = [RAX], Uses = [EAX] in // RAX = signext(EAX) + def CDQE : RI<0x98, RawFrm, (outs), (ins), + "{cltq|cdqe}", []>, Sched<[WriteALU]>, Requires<[In64BitMode]>; + // FIXME: CWD/CDQ/CQO shouldn't Def the A register, but the fast register + // allocator crashes if you remove it. let Defs = [AX,DX], Uses = [AX] in // DX:AX = signext(AX) def CWD : I<0x99, RawFrm, (outs), (ins), "{cwtd|cwd}", []>, OpSize16, Sched<[WriteALU]>; let Defs = [EAX,EDX], Uses = [EAX] in // EDX:EAX = signext(EAX) def CDQ : I<0x99, RawFrm, (outs), (ins), "{cltd|cdq}", []>, OpSize32, Sched<[WriteALU]>; - - - let Defs = [RAX], Uses = [EAX] in // RAX = signext(EAX) - def CDQE : RI<0x98, RawFrm, (outs), (ins), - "{cltq|cdqe}", []>, Sched<[WriteALU]>, Requires<[In64BitMode]>; - let Defs = [RAX,RDX], Uses = [RAX] in // RDX:RAX = signext(RAX) def CQO : RI<0x99, RawFrm, (outs), (ins), "{cqto|cqo}", []>, Sched<[WriteALU]>, Requires<[In64BitMode]>; diff --git a/lib/Target/X86/X86InstrFoldTables.cpp b/lib/Target/X86/X86InstrFoldTables.cpp index d42fec3770c..f3b286e0375 100644 --- a/lib/Target/X86/X86InstrFoldTables.cpp +++ b/lib/Target/X86/X86InstrFoldTables.cpp @@ -292,6 +292,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable0[] = { { X86::JMP32r_NT, X86::JMP32m_NT, TB_FOLDED_LOAD }, { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD }, { X86::JMP64r_NT, X86::JMP64m_NT, TB_FOLDED_LOAD }, + { X86::MMX_MOVD64from64rr, X86::MMX_MOVD64from64rm, TB_FOLDED_STORE | TB_NO_REVERSE }, + { X86::MMX_MOVD64grr, X86::MMX_MOVD64mr, TB_FOLDED_STORE | TB_NO_REVERSE }, { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE }, { X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE }, { X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE }, @@ -5245,6 +5247,270 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = { { X86::VXORPSZrrk, X86::VXORPSZrmk, 0 }, }; +static const X86MemoryFoldTableEntry BroadcastFoldTable2[] = { + { X86::VADDPDZ128rr, X86::VADDPDZ128rmb, TB_BCAST_SD }, + { X86::VADDPDZ256rr, X86::VADDPDZ256rmb, TB_BCAST_SD }, + { X86::VADDPDZrr, X86::VADDPDZrmb, TB_BCAST_SD }, + { X86::VADDPSZ128rr, X86::VADDPSZ128rmb, TB_BCAST_SS }, + { X86::VADDPSZ256rr, X86::VADDPSZ256rmb, TB_BCAST_SS }, + { X86::VADDPSZrr, X86::VADDPSZrmb, TB_BCAST_SS }, + { X86::VCMPPDZ128rri, X86::VCMPPDZ128rmbi, TB_BCAST_SD }, + { X86::VCMPPDZ256rri, X86::VCMPPDZ256rmbi, TB_BCAST_SD }, + { X86::VCMPPDZrri, X86::VCMPPDZrmbi, TB_BCAST_SD }, + { X86::VCMPPSZ128rri, X86::VCMPPSZ128rmbi, TB_BCAST_SS }, + { X86::VCMPPSZ256rri, X86::VCMPPSZ256rmbi, TB_BCAST_SS }, + { X86::VCMPPSZrri, X86::VCMPPSZrmbi, TB_BCAST_SS }, + { X86::VDIVPDZ128rr, X86::VDIVPDZ128rmb, TB_BCAST_SD }, + { X86::VDIVPDZ256rr, X86::VDIVPDZ256rmb, TB_BCAST_SD }, + { X86::VDIVPDZrr, X86::VDIVPDZrmb, TB_BCAST_SD }, + { X86::VDIVPSZ128rr, X86::VDIVPSZ128rmb, TB_BCAST_SS }, + { X86::VDIVPSZ256rr, X86::VDIVPSZ256rmb, TB_BCAST_SS }, + { X86::VDIVPSZrr, X86::VDIVPSZrmb, TB_BCAST_SS }, + { X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rmb, TB_BCAST_SD }, + { X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rmb, TB_BCAST_SD }, + { X86::VMAXCPDZrr, X86::VMAXCPDZrmb, TB_BCAST_SD }, + { X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rmb, TB_BCAST_SS }, + { X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rmb, TB_BCAST_SS }, + { X86::VMAXCPSZrr, X86::VMAXCPSZrmb, TB_BCAST_SS }, + { X86::VMAXPDZ128rr, X86::VMAXPDZ128rmb, TB_BCAST_SD }, + { X86::VMAXPDZ256rr, X86::VMAXPDZ256rmb, TB_BCAST_SD }, + { X86::VMAXPDZrr, X86::VMAXPDZrmb, TB_BCAST_SD }, + { X86::VMAXPSZ128rr, X86::VMAXPSZ128rmb, TB_BCAST_SS }, + { X86::VMAXPSZ256rr, X86::VMAXPSZ256rmb, TB_BCAST_SS }, + { X86::VMAXPSZrr, X86::VMAXPSZrmb, TB_BCAST_SS }, + { X86::VMINCPDZ128rr, X86::VMINCPDZ128rmb, TB_BCAST_SD }, + { X86::VMINCPDZ256rr, X86::VMINCPDZ256rmb, TB_BCAST_SD }, + { X86::VMINCPDZrr, X86::VMINCPDZrmb, TB_BCAST_SD }, + { X86::VMINCPSZ128rr, X86::VMINCPSZ128rmb, TB_BCAST_SS }, + { X86::VMINCPSZ256rr, X86::VMINCPSZ256rmb, TB_BCAST_SS }, + { X86::VMINCPSZrr, X86::VMINCPSZrmb, TB_BCAST_SS }, + { X86::VMINPDZ128rr, X86::VMINPDZ128rmb, TB_BCAST_SD }, + { X86::VMINPDZ256rr, X86::VMINPDZ256rmb, TB_BCAST_SD }, + { X86::VMINPDZrr, X86::VMINPDZrmb, TB_BCAST_SD }, + { X86::VMINPSZ128rr, X86::VMINPSZ128rmb, TB_BCAST_SS }, + { X86::VMINPSZ256rr, X86::VMINPSZ256rmb, TB_BCAST_SS }, + { X86::VMINPSZrr, X86::VMINPSZrmb, TB_BCAST_SS }, + { X86::VMULPDZ128rr, X86::VMULPDZ128rmb, TB_BCAST_SD }, + { X86::VMULPDZ256rr, X86::VMULPDZ256rmb, TB_BCAST_SD }, + { X86::VMULPDZrr, X86::VMULPDZrmb, TB_BCAST_SD }, + { X86::VMULPSZ128rr, X86::VMULPSZ128rmb, TB_BCAST_SS }, + { X86::VMULPSZ256rr, X86::VMULPSZ256rmb, TB_BCAST_SS }, + { X86::VMULPSZrr, X86::VMULPSZrmb, TB_BCAST_SS }, + { X86::VPADDDZ128rr, X86::VPADDDZ128rmb, TB_BCAST_D }, + { X86::VPADDDZ256rr, X86::VPADDDZ256rmb, TB_BCAST_D }, + { X86::VPADDDZrr, X86::VPADDDZrmb, TB_BCAST_D }, + { X86::VPADDQZ128rr, X86::VPADDQZ128rmb, TB_BCAST_Q }, + { X86::VPADDQZ256rr, X86::VPADDQZ256rmb, TB_BCAST_Q }, + { X86::VPADDQZrr, X86::VPADDQZrmb, TB_BCAST_Q }, + { X86::VPANDDZ128rr, X86::VPANDDZ128rmb, TB_BCAST_D }, + { X86::VPANDDZ256rr, X86::VPANDDZ256rmb, TB_BCAST_D }, + { X86::VPANDDZrr, X86::VPANDDZrmb, TB_BCAST_D }, + { X86::VPANDNDZ128rr, X86::VPANDNDZ128rmb, TB_BCAST_D }, + { X86::VPANDNDZ256rr, X86::VPANDNDZ256rmb, TB_BCAST_D }, + { X86::VPANDNDZrr, X86::VPANDNDZrmb, TB_BCAST_D }, + { X86::VPANDNQZ128rr, X86::VPANDNQZ128rmb, TB_BCAST_Q }, + { X86::VPANDNQZ256rr, X86::VPANDNQZ256rmb, TB_BCAST_Q }, + { X86::VPANDNQZrr, X86::VPANDNQZrmb, TB_BCAST_Q }, + { X86::VPANDQZ128rr, X86::VPANDQZ128rmb, TB_BCAST_Q }, + { X86::VPANDQZ256rr, X86::VPANDQZ256rmb, TB_BCAST_Q }, + { X86::VPANDQZrr, X86::VPANDQZrmb, TB_BCAST_Q }, + { X86::VPCMPDZ128rri, X86::VPCMPDZ128rmib, TB_BCAST_D }, + { X86::VPCMPDZ256rri, X86::VPCMPDZ256rmib, TB_BCAST_D }, + { X86::VPCMPDZrri, X86::VPCMPDZrmib, TB_BCAST_D }, + { X86::VPCMPEQDZ128rr, X86::VPCMPEQDZ128rmb, TB_BCAST_D }, + { X86::VPCMPEQDZ256rr, X86::VPCMPEQDZ256rmb, TB_BCAST_D }, + { X86::VPCMPEQDZrr, X86::VPCMPEQDZrmb, TB_BCAST_D }, + { X86::VPCMPEQQZ128rr, X86::VPCMPEQQZ128rmb, TB_BCAST_Q }, + { X86::VPCMPEQQZ256rr, X86::VPCMPEQQZ256rmb, TB_BCAST_Q }, + { X86::VPCMPEQQZrr, X86::VPCMPEQQZrmb, TB_BCAST_Q }, + { X86::VPCMPGTDZ128rr, X86::VPCMPGTDZ128rmb, TB_BCAST_D }, + { X86::VPCMPGTDZ256rr, X86::VPCMPGTDZ256rmb, TB_BCAST_D }, + { X86::VPCMPGTDZrr, X86::VPCMPGTDZrmb, TB_BCAST_D }, + { X86::VPCMPGTQZ128rr, X86::VPCMPGTQZ128rmb, TB_BCAST_Q }, + { X86::VPCMPGTQZ256rr, X86::VPCMPGTQZ256rmb, TB_BCAST_Q }, + { X86::VPCMPGTQZrr, X86::VPCMPGTQZrmb, TB_BCAST_Q }, + { X86::VPCMPQZ128rri, X86::VPCMPQZ128rmib, TB_BCAST_Q }, + { X86::VPCMPQZ256rri, X86::VPCMPQZ256rmib, TB_BCAST_Q }, + { X86::VPCMPQZrri, X86::VPCMPQZrmib, TB_BCAST_Q }, + { X86::VPCMPUDZ128rri, X86::VPCMPUDZ128rmib, TB_BCAST_D }, + { X86::VPCMPUDZ256rri, X86::VPCMPUDZ256rmib, TB_BCAST_D }, + { X86::VPCMPUDZrri, X86::VPCMPUDZrmib, TB_BCAST_D }, + { X86::VPCMPUQZ128rri, X86::VPCMPUQZ128rmib, TB_BCAST_Q }, + { X86::VPCMPUQZ256rri, X86::VPCMPUQZ256rmib, TB_BCAST_Q }, + { X86::VPCMPUQZrri, X86::VPCMPUQZrmib, TB_BCAST_Q }, + { X86::VPMAXSDZ128rr, X86::VPMAXSDZ128rmb, TB_BCAST_D }, + { X86::VPMAXSDZ256rr, X86::VPMAXSDZ256rmb, TB_BCAST_D }, + { X86::VPMAXSDZrr, X86::VPMAXSDZrmb, TB_BCAST_D }, + { X86::VPMAXSQZ128rr, X86::VPMAXSQZ128rmb, TB_BCAST_Q }, + { X86::VPMAXSQZ256rr, X86::VPMAXSQZ256rmb, TB_BCAST_Q }, + { X86::VPMAXSQZrr, X86::VPMAXSQZrmb, TB_BCAST_Q }, + { X86::VPMAXUDZ128rr, X86::VPMAXUDZ128rmb, TB_BCAST_D }, + { X86::VPMAXUDZ256rr, X86::VPMAXUDZ256rmb, TB_BCAST_D }, + { X86::VPMAXUDZrr, X86::VPMAXUDZrmb, TB_BCAST_D }, + { X86::VPMAXUQZ128rr, X86::VPMAXUQZ128rmb, TB_BCAST_Q }, + { X86::VPMAXUQZ256rr, X86::VPMAXUQZ256rmb, TB_BCAST_Q }, + { X86::VPMAXUQZrr, X86::VPMAXUQZrmb, TB_BCAST_Q }, + { X86::VPMINSDZ128rr, X86::VPMINSDZ128rmb, TB_BCAST_D }, + { X86::VPMINSDZ256rr, X86::VPMINSDZ256rmb, TB_BCAST_D }, + { X86::VPMINSDZrr, X86::VPMINSDZrmb, TB_BCAST_D }, + { X86::VPMINSQZ128rr, X86::VPMINSQZ128rmb, TB_BCAST_Q }, + { X86::VPMINSQZ256rr, X86::VPMINSQZ256rmb, TB_BCAST_Q }, + { X86::VPMINSQZrr, X86::VPMINSQZrmb, TB_BCAST_Q }, + { X86::VPMINUDZ128rr, X86::VPMINUDZ128rmb, TB_BCAST_D }, + { X86::VPMINUDZ256rr, X86::VPMINUDZ256rmb, TB_BCAST_D }, + { X86::VPMINUDZrr, X86::VPMINUDZrmb, TB_BCAST_D }, + { X86::VPMINUQZ128rr, X86::VPMINUQZ128rmb, TB_BCAST_Q }, + { X86::VPMINUQZ256rr, X86::VPMINUQZ256rmb, TB_BCAST_Q }, + { X86::VPMINUQZrr, X86::VPMINUQZrmb, TB_BCAST_Q }, + { X86::VPMULLDZ128rr, X86::VPMULLDZ128rmb, TB_BCAST_D }, + { X86::VPMULLDZ256rr, X86::VPMULLDZ256rmb, TB_BCAST_D }, + { X86::VPMULLDZrr, X86::VPMULLDZrmb, TB_BCAST_D }, + { X86::VPMULLQZ128rr, X86::VPMULLQZ128rmb, TB_BCAST_Q }, + { X86::VPMULLQZ256rr, X86::VPMULLQZ256rmb, TB_BCAST_Q }, + { X86::VPMULLQZrr, X86::VPMULLQZrmb, TB_BCAST_Q }, + { X86::VPORDZ128rr, X86::VPORDZ128rmb, TB_BCAST_D }, + { X86::VPORDZ256rr, X86::VPORDZ256rmb, TB_BCAST_D }, + { X86::VPORDZrr, X86::VPORDZrmb, TB_BCAST_D }, + { X86::VPORQZ128rr, X86::VPORQZ128rmb, TB_BCAST_Q }, + { X86::VPORQZ256rr, X86::VPORQZ256rmb, TB_BCAST_Q }, + { X86::VPORQZrr, X86::VPORQZrmb, TB_BCAST_Q }, + { X86::VPTESTMDZ128rr, X86::VPTESTMDZ128rmb, TB_BCAST_D }, + { X86::VPTESTMDZ256rr, X86::VPTESTMDZ256rmb, TB_BCAST_D }, + { X86::VPTESTMDZrr, X86::VPTESTMDZrmb, TB_BCAST_D }, + { X86::VPTESTMQZ128rr, X86::VPTESTMQZ128rmb, TB_BCAST_Q }, + { X86::VPTESTMQZ256rr, X86::VPTESTMQZ256rmb, TB_BCAST_Q }, + { X86::VPTESTMQZrr, X86::VPTESTMQZrmb, TB_BCAST_Q }, + { X86::VPTESTNMDZ128rr,X86::VPTESTNMDZ128rmb,TB_BCAST_D }, + { X86::VPTESTNMDZ256rr,X86::VPTESTNMDZ256rmb,TB_BCAST_D }, + { X86::VPTESTNMDZrr, X86::VPTESTNMDZrmb, TB_BCAST_D }, + { X86::VPTESTNMQZ128rr,X86::VPTESTNMQZ128rmb,TB_BCAST_Q }, + { X86::VPTESTNMQZ256rr,X86::VPTESTNMQZ256rmb,TB_BCAST_Q }, + { X86::VPTESTNMQZrr, X86::VPTESTNMQZrmb, TB_BCAST_Q }, + { X86::VPXORDZ128rr, X86::VPXORDZ128rmb, TB_BCAST_D }, + { X86::VPXORDZ256rr, X86::VPXORDZ256rmb, TB_BCAST_D }, + { X86::VPXORDZrr, X86::VPXORDZrmb, TB_BCAST_D }, + { X86::VPXORQZ128rr, X86::VPXORQZ128rmb, TB_BCAST_Q }, + { X86::VPXORQZ256rr, X86::VPXORQZ256rmb, TB_BCAST_Q }, + { X86::VPXORQZrr, X86::VPXORQZrmb, TB_BCAST_Q }, + { X86::VSUBPDZ128rr, X86::VSUBPDZ128rmb, TB_BCAST_SD }, + { X86::VSUBPDZ256rr, X86::VSUBPDZ256rmb, TB_BCAST_SD }, + { X86::VSUBPDZrr, X86::VSUBPDZrmb, TB_BCAST_SD }, + { X86::VSUBPSZ128rr, X86::VSUBPSZ128rmb, TB_BCAST_SS }, + { X86::VSUBPSZ256rr, X86::VSUBPSZ256rmb, TB_BCAST_SS }, + { X86::VSUBPSZrr, X86::VSUBPSZrmb, TB_BCAST_SS }, +}; + +static const X86MemoryFoldTableEntry BroadcastFoldTable3[] = { + { X86::VFMADD132PDZ128r, X86::VFMADD132PDZ128mb, TB_BCAST_SD }, + { X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256mb, TB_BCAST_SD }, + { X86::VFMADD132PDZr, X86::VFMADD132PDZmb, TB_BCAST_SD }, + { X86::VFMADD132PSZ128r, X86::VFMADD132PSZ128mb, TB_BCAST_SS }, + { X86::VFMADD132PSZ256r, X86::VFMADD132PSZ256mb, TB_BCAST_SS }, + { X86::VFMADD132PSZr, X86::VFMADD132PSZmb, TB_BCAST_SS }, + { X86::VFMADD213PDZ128r, X86::VFMADD213PDZ128mb, TB_BCAST_SD }, + { X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256mb, TB_BCAST_SD }, + { X86::VFMADD213PDZr, X86::VFMADD213PDZmb, TB_BCAST_SD }, + { X86::VFMADD213PSZ128r, X86::VFMADD213PSZ128mb, TB_BCAST_SS }, + { X86::VFMADD213PSZ256r, X86::VFMADD213PSZ256mb, TB_BCAST_SS }, + { X86::VFMADD213PSZr, X86::VFMADD213PSZmb, TB_BCAST_SS }, + { X86::VFMADD231PDZ128r, X86::VFMADD231PDZ128mb, TB_BCAST_SD }, + { X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256mb, TB_BCAST_SD }, + { X86::VFMADD231PDZr, X86::VFMADD231PDZmb, TB_BCAST_SD }, + { X86::VFMADD231PSZ128r, X86::VFMADD231PSZ128mb, TB_BCAST_SS }, + { X86::VFMADD231PSZ256r, X86::VFMADD231PSZ256mb, TB_BCAST_SS }, + { X86::VFMADD231PSZr, X86::VFMADD231PSZmb, TB_BCAST_SS }, + { X86::VFMADDSUB132PDZ128r, X86::VFMADDSUB132PDZ128mb, TB_BCAST_SD }, + { X86::VFMADDSUB132PDZ256r, X86::VFMADDSUB132PDZ256mb, TB_BCAST_SD }, + { X86::VFMADDSUB132PDZr, X86::VFMADDSUB132PDZmb, TB_BCAST_SD }, + { X86::VFMADDSUB132PSZ128r, X86::VFMADDSUB132PSZ128mb, TB_BCAST_SS }, + { X86::VFMADDSUB132PSZ256r, X86::VFMADDSUB132PSZ256mb, TB_BCAST_SS }, + { X86::VFMADDSUB132PSZr, X86::VFMADDSUB132PSZmb, TB_BCAST_SS }, + { X86::VFMADDSUB213PDZ128r, X86::VFMADDSUB213PDZ128mb, TB_BCAST_SD }, + { X86::VFMADDSUB213PDZ256r, X86::VFMADDSUB213PDZ256mb, TB_BCAST_SD }, + { X86::VFMADDSUB213PDZr, X86::VFMADDSUB213PDZmb, TB_BCAST_SD }, + { X86::VFMADDSUB213PSZ128r, X86::VFMADDSUB213PSZ128mb, TB_BCAST_SS }, + { X86::VFMADDSUB213PSZ256r, X86::VFMADDSUB213PSZ256mb, TB_BCAST_SS }, + { X86::VFMADDSUB213PSZr, X86::VFMADDSUB213PSZmb, TB_BCAST_SS }, + { X86::VFMADDSUB231PDZ128r, X86::VFMADDSUB231PDZ128mb, TB_BCAST_SD }, + { X86::VFMADDSUB231PDZ256r, X86::VFMADDSUB231PDZ256mb, TB_BCAST_SD }, + { X86::VFMADDSUB231PDZr, X86::VFMADDSUB231PDZmb, TB_BCAST_SD }, + { X86::VFMADDSUB231PSZ128r, X86::VFMADDSUB231PSZ128mb, TB_BCAST_SS }, + { X86::VFMADDSUB231PSZ256r, X86::VFMADDSUB231PSZ256mb, TB_BCAST_SS }, + { X86::VFMADDSUB231PSZr, X86::VFMADDSUB231PSZmb, TB_BCAST_SS }, + { X86::VFMSUB132PDZ128r, X86::VFMSUB132PDZ128mb, TB_BCAST_SD }, + { X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256mb, TB_BCAST_SD }, + { X86::VFMSUB132PDZr, X86::VFMSUB132PDZmb, TB_BCAST_SD }, + { X86::VFMSUB132PSZ128r, X86::VFMSUB132PSZ128mb, TB_BCAST_SS }, + { X86::VFMSUB132PSZ256r, X86::VFMSUB132PSZ256mb, TB_BCAST_SS }, + { X86::VFMSUB132PSZr, X86::VFMSUB132PSZmb, TB_BCAST_SS }, + { X86::VFMSUB213PDZ128r, X86::VFMSUB213PDZ128mb, TB_BCAST_SD }, + { X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256mb, TB_BCAST_SD }, + { X86::VFMSUB213PDZr, X86::VFMSUB213PDZmb, TB_BCAST_SD }, + { X86::VFMSUB213PSZ128r, X86::VFMSUB213PSZ128mb, TB_BCAST_SS }, + { X86::VFMSUB213PSZ256r, X86::VFMSUB213PSZ256mb, TB_BCAST_SS }, + { X86::VFMSUB213PSZr, X86::VFMSUB213PSZmb, TB_BCAST_SS }, + { X86::VFMSUB231PDZ128r, X86::VFMSUB231PDZ128mb, TB_BCAST_SD }, + { X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256mb, TB_BCAST_SD }, + { X86::VFMSUB231PDZr, X86::VFMSUB231PDZmb, TB_BCAST_SD }, + { X86::VFMSUB231PSZ128r, X86::VFMSUB231PSZ128mb, TB_BCAST_SS }, + { X86::VFMSUB231PSZ256r, X86::VFMSUB231PSZ256mb, TB_BCAST_SS }, + { X86::VFMSUB231PSZr, X86::VFMSUB231PSZmb, TB_BCAST_SS }, + { X86::VFMSUBADD132PDZ128r, X86::VFMSUBADD132PDZ128mb, TB_BCAST_SD }, + { X86::VFMSUBADD132PDZ256r, X86::VFMSUBADD132PDZ256mb, TB_BCAST_SD }, + { X86::VFMSUBADD132PDZr, X86::VFMSUBADD132PDZmb, TB_BCAST_SD }, + { X86::VFMSUBADD132PSZ128r, X86::VFMSUBADD132PSZ128mb, TB_BCAST_SS }, + { X86::VFMSUBADD132PSZ256r, X86::VFMSUBADD132PSZ256mb, TB_BCAST_SS }, + { X86::VFMSUBADD132PSZr, X86::VFMSUBADD132PSZmb, TB_BCAST_SS }, + { X86::VFMSUBADD213PDZ128r, X86::VFMSUBADD213PDZ128mb, TB_BCAST_SD }, + { X86::VFMSUBADD213PDZ256r, X86::VFMSUBADD213PDZ256mb, TB_BCAST_SD }, + { X86::VFMSUBADD213PDZr, X86::VFMSUBADD213PDZmb, TB_BCAST_SD }, + { X86::VFMSUBADD213PSZ128r, X86::VFMSUBADD213PSZ128mb, TB_BCAST_SS }, + { X86::VFMSUBADD213PSZ256r, X86::VFMSUBADD213PSZ256mb, TB_BCAST_SS }, + { X86::VFMSUBADD213PSZr, X86::VFMSUBADD213PSZmb, TB_BCAST_SS }, + { X86::VFMSUBADD231PDZ128r, X86::VFMSUBADD231PDZ128mb, TB_BCAST_SD }, + { X86::VFMSUBADD231PDZ256r, X86::VFMSUBADD231PDZ256mb, TB_BCAST_SD }, + { X86::VFMSUBADD231PDZr, X86::VFMSUBADD231PDZmb, TB_BCAST_SD }, + { X86::VFMSUBADD231PSZ128r, X86::VFMSUBADD231PSZ128mb, TB_BCAST_SS }, + { X86::VFMSUBADD231PSZ256r, X86::VFMSUBADD231PSZ256mb, TB_BCAST_SS }, + { X86::VFMSUBADD231PSZr, X86::VFMSUBADD231PSZmb, TB_BCAST_SS }, + { X86::VFNMADD132PDZ128r, X86::VFNMADD132PDZ128mb, TB_BCAST_SD }, + { X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256mb, TB_BCAST_SD }, + { X86::VFNMADD132PDZr, X86::VFNMADD132PDZmb, TB_BCAST_SD }, + { X86::VFNMADD132PSZ128r, X86::VFNMADD132PSZ128mb, TB_BCAST_SS }, + { X86::VFNMADD132PSZ256r, X86::VFNMADD132PSZ256mb, TB_BCAST_SS }, + { X86::VFNMADD132PSZr, X86::VFNMADD132PSZmb, TB_BCAST_SS }, + { X86::VFNMADD213PDZ128r, X86::VFNMADD213PDZ128mb, TB_BCAST_SD }, + { X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256mb, TB_BCAST_SD }, + { X86::VFNMADD213PDZr, X86::VFNMADD213PDZmb, TB_BCAST_SD }, + { X86::VFNMADD213PSZ128r, X86::VFNMADD213PSZ128mb, TB_BCAST_SS }, + { X86::VFNMADD213PSZ256r, X86::VFNMADD213PSZ256mb, TB_BCAST_SS }, + { X86::VFNMADD213PSZr, X86::VFNMADD213PSZmb, TB_BCAST_SS }, + { X86::VFNMADD231PDZ128r, X86::VFNMADD231PDZ128mb, TB_BCAST_SD }, + { X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256mb, TB_BCAST_SD }, + { X86::VFNMADD231PDZr, X86::VFNMADD231PDZmb, TB_BCAST_SD }, + { X86::VFNMADD231PSZ128r, X86::VFNMADD231PSZ128mb, TB_BCAST_SS }, + { X86::VFNMADD231PSZ256r, X86::VFNMADD231PSZ256mb, TB_BCAST_SS }, + { X86::VFNMADD231PSZr, X86::VFNMADD231PSZmb, TB_BCAST_SS }, + { X86::VFNMSUB132PDZ128r, X86::VFNMSUB132PDZ128mb, TB_BCAST_SD }, + { X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256mb, TB_BCAST_SD }, + { X86::VFNMSUB132PDZr, X86::VFNMSUB132PDZmb, TB_BCAST_SD }, + { X86::VFNMSUB132PSZ128r, X86::VFNMSUB132PSZ128mb, TB_BCAST_SS }, + { X86::VFNMSUB132PSZ256r, X86::VFNMSUB132PSZ256mb, TB_BCAST_SS }, + { X86::VFNMSUB132PSZr, X86::VFNMSUB132PSZmb, TB_BCAST_SS }, + { X86::VFNMSUB213PDZ128r, X86::VFNMSUB213PDZ128mb, TB_BCAST_SD }, + { X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256mb, TB_BCAST_SD }, + { X86::VFNMSUB213PDZr, X86::VFNMSUB213PDZmb, TB_BCAST_SD }, + { X86::VFNMSUB213PSZ128r, X86::VFNMSUB213PSZ128mb, TB_BCAST_SS }, + { X86::VFNMSUB213PSZ256r, X86::VFNMSUB213PSZ256mb, TB_BCAST_SS }, + { X86::VFNMSUB213PSZr, X86::VFNMSUB213PSZmb, TB_BCAST_SS }, + { X86::VFNMSUB231PDZ128r, X86::VFNMSUB231PDZ128mb, TB_BCAST_SD }, + { X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256mb, TB_BCAST_SD }, + { X86::VFNMSUB231PDZr, X86::VFNMSUB231PDZmb, TB_BCAST_SD }, + { X86::VFNMSUB231PSZ128r, X86::VFNMSUB231PSZ128mb, TB_BCAST_SS }, + { X86::VFNMSUB231PSZ256r, X86::VFNMSUB231PSZ256mb, TB_BCAST_SS }, + { X86::VFNMSUB231PSZr, X86::VFNMSUB231PSZmb, TB_BCAST_SS }, +}; + static const X86MemoryFoldTableEntry * lookupFoldTableImpl(ArrayRef Table, unsigned RegOp) { #ifndef NDEBUG @@ -5287,6 +5553,18 @@ lookupFoldTableImpl(ArrayRef Table, unsigned RegOp) { std::end(MemoryFoldTable4)) == std::end(MemoryFoldTable4) && "MemoryFoldTable4 is not sorted and unique!"); + assert(std::is_sorted(std::begin(BroadcastFoldTable2), + std::end(BroadcastFoldTable2)) && + std::adjacent_find(std::begin(BroadcastFoldTable2), + std::end(BroadcastFoldTable2)) == + std::end(BroadcastFoldTable2) && + "BroadcastFoldTable2 is not sorted and unique!"); + assert(std::is_sorted(std::begin(BroadcastFoldTable3), + std::end(BroadcastFoldTable3)) && + std::adjacent_find(std::begin(BroadcastFoldTable3), + std::end(BroadcastFoldTable3)) == + std::end(BroadcastFoldTable3) && + "BroadcastFoldTable3 is not sorted and unique!"); FoldTablesChecked.store(true, std::memory_order_relaxed); } #endif @@ -5355,6 +5633,15 @@ struct X86MemUnfoldTable { // Index 4, folded load addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD); + // Broadcast tables. + for (const X86MemoryFoldTableEntry &Entry : BroadcastFoldTable2) + // Index 2, folded broadcast + addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD | TB_FOLDED_BCAST); + + for (const X86MemoryFoldTableEntry &Entry : BroadcastFoldTable3) + // Index 2, folded broadcast + addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD | TB_FOLDED_BCAST); + // Sort the memory->reg unfold table. array_pod_sort(Table.begin(), Table.end()); diff --git a/lib/Target/X86/X86InstrFoldTables.h b/lib/Target/X86/X86InstrFoldTables.h index 419baf98f61..7dc236a0d7e 100644 --- a/lib/Target/X86/X86InstrFoldTables.h +++ b/lib/Target/X86/X86InstrFoldTables.h @@ -19,35 +19,48 @@ namespace llvm { enum { // Select which memory operand is being unfolded. - // (stored in bits 0 - 3) + // (stored in bits 0 - 2) TB_INDEX_0 = 0, TB_INDEX_1 = 1, TB_INDEX_2 = 2, TB_INDEX_3 = 3, TB_INDEX_4 = 4, - TB_INDEX_MASK = 0xf, + TB_INDEX_MASK = 0x7, // Do not insert the reverse map (MemOp -> RegOp) into the table. // This may be needed because there is a many -> one mapping. - TB_NO_REVERSE = 1 << 4, + TB_NO_REVERSE = 1 << 3, // Do not insert the forward map (RegOp -> MemOp) into the table. // This is needed for Native Client, which prohibits branch // instructions from using a memory operand. - TB_NO_FORWARD = 1 << 5, + TB_NO_FORWARD = 1 << 4, - TB_FOLDED_LOAD = 1 << 6, - TB_FOLDED_STORE = 1 << 7, + TB_FOLDED_LOAD = 1 << 5, + TB_FOLDED_STORE = 1 << 6, + TB_FOLDED_BCAST = 1 << 7, // Minimum alignment required for load/store. - // Used for RegOp->MemOp conversion. - // (stored in bits 8 - 15) + // Used for RegOp->MemOp conversion. Encoded as Log2(Align) + 1 to allow 0 + // to mean align of 0. + // (stored in bits 8 - 11) TB_ALIGN_SHIFT = 8, - TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT, - TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT, - TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT, - TB_ALIGN_64 = 64 << TB_ALIGN_SHIFT, - TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT + TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT, + TB_ALIGN_16 = 5 << TB_ALIGN_SHIFT, + TB_ALIGN_32 = 6 << TB_ALIGN_SHIFT, + TB_ALIGN_64 = 7 << TB_ALIGN_SHIFT, + TB_ALIGN_MASK = 0xf << TB_ALIGN_SHIFT, + + // Broadcast type. + // (stored in bits 12 - 13) + TB_BCAST_TYPE_SHIFT = 12, + TB_BCAST_D = 0 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_Q = 1 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_SS = 2 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_SD = 3 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_MASK = 0x3 << TB_BCAST_TYPE_SHIFT, + + // Unused bits 14-15 }; // This struct is used for both the folding and unfold tables. They KeyOp diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 096cc27861c..de6f8a81dff 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -103,6 +103,8 @@ def X86vzld : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def X86vextractst : SDNode<"X86ISD::VEXTRACT_STORE", SDTStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def X86VBroadcastld : SDNode<"X86ISD::VBROADCAST_LOAD", SDTLoad, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def SDTVtrunc : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisInt<1>, @@ -954,6 +956,26 @@ def X86vextractstore64 : PatFrag<(ops node:$val, node:$ptr), return cast(N)->getMemoryVT().getStoreSize() == 8; }]>; +def X86VBroadcastld8 : PatFrag<(ops node:$src), + (X86VBroadcastld node:$src), [{ + return cast(N)->getMemoryVT().getStoreSize() == 1; +}]>; + +def X86VBroadcastld16 : PatFrag<(ops node:$src), + (X86VBroadcastld node:$src), [{ + return cast(N)->getMemoryVT().getStoreSize() == 2; +}]>; + +def X86VBroadcastld32 : PatFrag<(ops node:$src), + (X86VBroadcastld node:$src), [{ + return cast(N)->getMemoryVT().getStoreSize() == 4; +}]>; + +def X86VBroadcastld64 : PatFrag<(ops node:$src), + (X86VBroadcastld node:$src), [{ + return cast(N)->getMemoryVT().getStoreSize() == 8; +}]>; + def fp32imm0 : PatLeaf<(f32 fpimm), [{ return N->isExactlyValue(+0.0); @@ -963,6 +985,10 @@ def fp64imm0 : PatLeaf<(f64 fpimm), [{ return N->isExactlyValue(+0.0); }]>; +def fp128imm0 : PatLeaf<(f128 fpimm), [{ + return N->isExactlyValue(+0.0); +}]>; + // EXTRACT_get_vextract128_imm xform function: convert extract_subvector index // to VEXTRACTF128/VEXTRACTI128 imm. def EXTRACT_get_vextract128_imm : SDNodeXFormgetParent(); @@ -675,7 +712,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, RC = Opc != X86::LEA32r ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass; } - unsigned SrcReg = Src.getReg(); + Register SrcReg = Src.getReg(); // For both LEA64 and LEA32 the register already has essentially the right // type (32-bit or 64-bit) we may just need to forbid SP. @@ -684,7 +721,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, isKill = Src.isKill(); assert(!Src.isUndef() && "Undef op doesn't need optimization"); - if (TargetRegisterInfo::isVirtualRegister(NewSrc) && + if (Register::isVirtualRegister(NewSrc) && !MF.getRegInfo().constrainRegClass(NewSrc, RC)) return false; @@ -693,7 +730,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, // This is for an LEA64_32r and incoming registers are 32-bit. One way or // another we need to add 64-bit registers to the final MI. - if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + if (Register::isPhysicalRegister(SrcReg)) { ImplicitOp = Src; ImplicitOp.setImplicit(); @@ -740,8 +777,8 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( return nullptr; unsigned Opcode = X86::LEA64_32r; - unsigned InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); - unsigned OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass); + Register InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); + Register OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass); // Build and insert into an implicit UNDEF value. This is OK because // we will be shifting and then extracting the lower 8/16-bits. @@ -751,8 +788,8 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( // But testing has shown this *does* help performance in 64-bit mode (at // least on modern x86 machines). MachineBasicBlock::iterator MBBI = MI.getIterator(); - unsigned Dest = MI.getOperand(0).getReg(); - unsigned Src = MI.getOperand(1).getReg(); + Register Dest = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); bool IsDead = MI.getOperand(0).isDead(); bool IsKill = MI.getOperand(1).isKill(); unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit; @@ -794,7 +831,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( case X86::ADD8rr_DB: case X86::ADD16rr: case X86::ADD16rr_DB: { - unsigned Src2 = MI.getOperand(2).getReg(); + Register Src2 = MI.getOperand(2).getReg(); bool IsKill2 = MI.getOperand(2).isKill(); assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization"); unsigned InRegLEA2 = 0; @@ -888,7 +925,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr; // LEA can't handle RSP. - if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && + if (Register::isVirtualRegister(Src.getReg()) && !MF.getRegInfo().constrainRegClass(Src.getReg(), &X86::GR64_NOSPRegClass)) return nullptr; @@ -911,7 +948,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // LEA can't handle ESP. bool isKill; - unsigned SrcReg; + Register SrcReg; MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, ImplicitOp, LV)) @@ -947,7 +984,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r : (Is64Bit ? X86::LEA64_32r : X86::LEA32r); bool isKill; - unsigned SrcReg; + Register SrcReg; MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, ImplicitOp, LV)) @@ -970,7 +1007,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, : (Is64Bit ? X86::LEA64_32r : X86::LEA32r); bool isKill; - unsigned SrcReg; + Register SrcReg; MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, ImplicitOp, LV)) @@ -1005,7 +1042,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; bool isKill; - unsigned SrcReg; + Register SrcReg; MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, SrcReg, isKill, ImplicitOp, LV)) @@ -1013,7 +1050,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, const MachineOperand &Src2 = MI.getOperand(2); bool isKill2; - unsigned SrcReg2; + Register SrcReg2; MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false, SrcReg2, isKill2, ImplicitOp2, LV)) @@ -1054,7 +1091,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; bool isKill; - unsigned SrcReg; + Register SrcReg; MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, SrcReg, isKill, ImplicitOp, LV)) @@ -1085,6 +1122,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return nullptr; case X86::SUB32ri8: case X86::SUB32ri: { + if (!MI.getOperand(2).isImm()) + return nullptr; int64_t Imm = MI.getOperand(2).getImm(); if (!isInt<32>(-Imm)) return nullptr; @@ -1093,7 +1132,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; bool isKill; - unsigned SrcReg; + Register SrcReg; MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, SrcReg, isKill, ImplicitOp, LV)) @@ -1111,6 +1150,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::SUB64ri8: case X86::SUB64ri32: { + if (!MI.getOperand(2).isImm()) + return nullptr; int64_t Imm = MI.getOperand(2).getImm(); if (!isInt<32>(-Imm)) return nullptr; @@ -1140,40 +1181,62 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::VMOVUPDZrmk: case X86::VMOVAPDZrmk: case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk: case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk: - case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk: { + case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk: + case X86::VBROADCASTSDZ256mk: + case X86::VBROADCASTSDZmk: + case X86::VBROADCASTSSZ128mk: + case X86::VBROADCASTSSZ256mk: + case X86::VBROADCASTSSZmk: + case X86::VPBROADCASTDZ128mk: + case X86::VPBROADCASTDZ256mk: + case X86::VPBROADCASTDZmk: + case X86::VPBROADCASTQZ128mk: + case X86::VPBROADCASTQZ256mk: + case X86::VPBROADCASTQZmk: { unsigned Opc; switch (MIOpc) { default: llvm_unreachable("Unreachable!"); - case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break; - case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break; - case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break; - case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break; - case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break; - case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break; - case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; - case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; - case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break; - case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; - case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; - case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break; - case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; - case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; - case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break; - case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; - case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; - case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break; - case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; - case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; - case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break; - case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; - case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; - case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break; - case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; - case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; - case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break; - case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; - case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; - case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break; + case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break; + case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break; + case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break; + case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break; + case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break; + case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break; + case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; + case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; + case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break; + case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; + case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; + case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break; + case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; + case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; + case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break; + case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; + case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; + case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break; + case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break; + case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break; + case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break; + case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break; + case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break; + case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break; + case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break; + case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break; + case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break; + case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break; + case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break; + case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break; + case X86::VBROADCASTSDZ256mk: Opc = X86::VBLENDMPDZ256rmbk; break; + case X86::VBROADCASTSDZmk: Opc = X86::VBLENDMPDZrmbk; break; + case X86::VBROADCASTSSZ128mk: Opc = X86::VBLENDMPSZ128rmbk; break; + case X86::VBROADCASTSSZ256mk: Opc = X86::VBLENDMPSZ256rmbk; break; + case X86::VBROADCASTSSZmk: Opc = X86::VBLENDMPSZrmbk; break; + case X86::VPBROADCASTDZ128mk: Opc = X86::VPBLENDMDZ128rmbk; break; + case X86::VPBROADCASTDZ256mk: Opc = X86::VPBLENDMDZ256rmbk; break; + case X86::VPBROADCASTDZmk: Opc = X86::VPBLENDMDZrmbk; break; + case X86::VPBROADCASTQZ128mk: Opc = X86::VPBLENDMQZ128rmbk; break; + case X86::VPBROADCASTQZ256mk: Opc = X86::VPBLENDMQZ256rmbk; break; + case X86::VPBROADCASTQZmk: Opc = X86::VPBLENDMQZrmbk; break; } NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc)) @@ -1187,6 +1250,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, .add(MI.getOperand(7)); break; } + case X86::VMOVDQU8Z128rrk: case X86::VMOVDQU8Z256rrk: case X86::VMOVDQU8Zrrk: @@ -1683,6 +1747,27 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } + case X86::VCMPSDZrr: + case X86::VCMPSSZrr: + case X86::VCMPPDZrri: + case X86::VCMPPSZrri: + case X86::VCMPPDZ128rri: + case X86::VCMPPSZ128rri: + case X86::VCMPPDZ256rri: + case X86::VCMPPSZ256rri: + case X86::VCMPPDZrrik: + case X86::VCMPPSZrrik: + case X86::VCMPPDZ128rrik: + case X86::VCMPPSZ128rrik: + case X86::VCMPPDZ256rrik: + case X86::VCMPPSZ256rrik: { + unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x1f; + Imm = X86::getSwappedVCMPImm(Imm); + auto &WorkingMI = cloneIfNew(MI); + WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm); + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + OpIdx1, OpIdx2); + } case X86::VPERM2F128rr: case X86::VPERM2I128rr: { // Flip permute source immediate. @@ -1859,7 +1944,7 @@ X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI, // CommutableOpIdx2 is well defined now. Let's choose another commutable // operand and assign its index to CommutableOpIdx1. - unsigned Op2Reg = MI.getOperand(CommutableOpIdx2).getReg(); + Register Op2Reg = MI.getOperand(CommutableOpIdx2).getReg(); unsigned CommutableOpIdx1; for (CommutableOpIdx1 = LastCommutableVecOp; @@ -1889,7 +1974,8 @@ X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI, return true; } -bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, +bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI, + unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { const MCInstrDesc &Desc = MI.getDesc(); if (!Desc.isCommutable()) @@ -1926,17 +2012,23 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, // Ordered/Unordered/Equal/NotEqual tests unsigned Imm = MI.getOperand(3 + OpOffset).getImm() & 0x7; switch (Imm) { + default: + // EVEX versions can be commuted. + if ((Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX) + break; + return false; case 0x00: // EQUAL case 0x03: // UNORDERED case 0x04: // NOT EQUAL case 0x07: // ORDERED - // The indices of the commutable operands are 1 and 2 (or 2 and 3 - // when masked). - // Assign them to the returned operand indices here. - return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset, - 2 + OpOffset); + break; } - return false; + + // The indices of the commutable operands are 1 and 2 (or 2 and 3 + // when masked). + // Assign them to the returned operand indices here. + return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset, + 2 + OpOffset); } case X86::MOVSSrr: // X86::MOVSDrr is always commutable. MOVSS is only commutable if we can @@ -1990,6 +2082,24 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, case X86::VPTERNLOGQZ256rmbikz: case X86::VPTERNLOGQZrmbikz: return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); + case X86::VPDPWSSDZ128r: + case X86::VPDPWSSDZ128rk: + case X86::VPDPWSSDZ128rkz: + case X86::VPDPWSSDZ256r: + case X86::VPDPWSSDZ256rk: + case X86::VPDPWSSDZ256rkz: + case X86::VPDPWSSDZr: + case X86::VPDPWSSDZrk: + case X86::VPDPWSSDZrkz: + case X86::VPDPWSSDSZ128r: + case X86::VPDPWSSDSZ128rk: + case X86::VPDPWSSDSZ128rkz: + case X86::VPDPWSSDSZ256r: + case X86::VPDPWSSDSZ256rk: + case X86::VPDPWSSDSZ256rkz: + case X86::VPDPWSSDSZr: + case X86::VPDPWSSDSZrk: + case X86::VPDPWSSDSZrkz: case X86::VPMADD52HUQZ128r: case X86::VPMADD52HUQZ128rk: case X86::VPMADD52HUQZ128rkz: @@ -2215,7 +2325,7 @@ unsigned X86::getVPCMPImmForCond(ISD::CondCode CC) { } } -/// Get the VPCMP immediate if the opcodes are swapped. +/// Get the VPCMP immediate if the operands are swapped. unsigned X86::getSwappedVPCMPImm(unsigned Imm) { switch (Imm) { default: llvm_unreachable("Unreachable!"); @@ -2233,7 +2343,7 @@ unsigned X86::getSwappedVPCMPImm(unsigned Imm) { return Imm; } -/// Get the VPCOM immediate if the opcodes are swapped. +/// Get the VPCOM immediate if the operands are swapped. unsigned X86::getSwappedVPCOMImm(unsigned Imm) { switch (Imm) { default: llvm_unreachable("Unreachable!"); @@ -2251,6 +2361,23 @@ unsigned X86::getSwappedVPCOMImm(unsigned Imm) { return Imm; } +/// Get the VCMP immediate if the operands are swapped. +unsigned X86::getSwappedVCMPImm(unsigned Imm) { + // Only need the lower 2 bits to distinquish. + switch (Imm & 0x3) { + default: llvm_unreachable("Unreachable!"); + case 0x00: case 0x03: + // EQ/NE/TRUE/FALSE/ORD/UNORD don't change immediate when commuted. + break; + case 0x01: case 0x02: + // Need to toggle bits 3:0. Bit 4 stays the same. + Imm ^= 0xf; + break; + } + + return Imm; +} + bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const { if (!MI.isTerminator()) return false; @@ -3131,25 +3258,6 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(isKill)); } -void X86InstrInfo::storeRegToAddr( - MachineFunction &MF, unsigned SrcReg, bool isKill, - SmallVectorImpl &Addr, const TargetRegisterClass *RC, - ArrayRef MMOs, - SmallVectorImpl &NewMIs) const { - const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - unsigned Alignment = std::max(TRI.getSpillSize(*RC), 16); - bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; - unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget); - DebugLoc DL; - MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.add(Addr[i]); - MIB.addReg(SrcReg, getKillRegState(isKill)); - MIB.setMemRefs(MMOs); - NewMIs.push_back(MIB); -} - - void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, @@ -3164,23 +3272,6 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), FrameIdx); } -void X86InstrInfo::loadRegFromAddr( - MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, const TargetRegisterClass *RC, - ArrayRef MMOs, - SmallVectorImpl &NewMIs) const { - const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - unsigned Alignment = std::max(TRI.getSpillSize(*RC), 16); - bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; - unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget); - DebugLoc DL; - MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.add(Addr[i]); - MIB.setMemRefs(MMOs); - NewMIs.push_back(MIB); -} - bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned &SrcReg2, int &CmpMask, int &CmpValue) const { @@ -3599,8 +3690,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, if (!IsCmpZero && !Sub) return false; - bool IsSwapped = (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && - Sub->getOperand(2).getReg() == SrcReg); + bool IsSwapped = + (SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 && + Sub->getOperand(2).getReg() == SrcReg); // Scan forward from the instruction after CmpInstr for uses of EFLAGS. // It is safe to remove CmpInstr if EFLAGS is redefined or killed. @@ -3755,7 +3847,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI, MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (Reg != FoldAsLoadDefReg) continue; // Do not fold if we have a subreg use or a def. @@ -3785,7 +3877,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI, static bool Expand2AddrUndef(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) { assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction."); - unsigned Reg = MIB->getOperand(0).getReg(); + Register Reg = MIB->getOperand(0).getReg(); MIB->setDesc(Desc); // MachineInstr::addOperand() will insert explicit operands before any @@ -3815,7 +3907,7 @@ static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, bool MinusOne) { MachineBasicBlock &MBB = *MIB->getParent(); DebugLoc DL = MIB->getDebugLoc(); - unsigned Reg = MIB->getOperand(0).getReg(); + Register Reg = MIB->getOperand(0).getReg(); // Insert the XOR. BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg) @@ -3891,7 +3983,7 @@ static void expandLoadStackGuard(MachineInstrBuilder &MIB, const TargetInstrInfo &TII) { MachineBasicBlock &MBB = *MIB->getParent(); DebugLoc DL = MIB->getDebugLoc(); - unsigned Reg = MIB->getOperand(0).getReg(); + Register Reg = MIB->getOperand(0).getReg(); const GlobalValue *GV = cast((*MIB->memoperands_begin())->getValue()); auto Flags = MachineMemOperand::MOLoad | @@ -3929,7 +4021,7 @@ static bool expandNOVLXLoad(MachineInstrBuilder &MIB, const MCInstrDesc &LoadDesc, const MCInstrDesc &BroadcastDesc, unsigned SubIdx) { - unsigned DestReg = MIB->getOperand(0).getReg(); + Register DestReg = MIB->getOperand(0).getReg(); // Check if DestReg is XMM16-31 or YMM16-31. if (TRI->getEncodingValue(DestReg) < 16) { // We can use a normal VEX encoded load. @@ -3952,7 +4044,7 @@ static bool expandNOVLXStore(MachineInstrBuilder &MIB, const MCInstrDesc &StoreDesc, const MCInstrDesc &ExtractDesc, unsigned SubIdx) { - unsigned SrcReg = MIB->getOperand(X86::AddrNumOperands).getReg(); + Register SrcReg = MIB->getOperand(X86::AddrNumOperands).getReg(); // Check if DestReg is XMM16-31 or YMM16-31. if (TRI->getEncodingValue(SrcReg) < 16) { // We can use a normal VEX encoded store. @@ -4008,12 +4100,13 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case X86::V_SET0: case X86::FsFLD0SS: case X86::FsFLD0SD: + case X86::FsFLD0F128: return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr)); case X86::AVX_SET0: { assert(HasAVX && "AVX not supported"); const TargetRegisterInfo *TRI = &getRegisterInfo(); - unsigned SrcReg = MIB->getOperand(0).getReg(); - unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm); + Register SrcReg = MIB->getOperand(0).getReg(); + Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm); MIB->getOperand(0).setReg(XReg); Expand2AddrUndef(MIB, get(X86::VXORPSrr)); MIB.addReg(SrcReg, RegState::ImplicitDefine); @@ -4021,9 +4114,10 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { } case X86::AVX512_128_SET0: case X86::AVX512_FsFLD0SS: - case X86::AVX512_FsFLD0SD: { + case X86::AVX512_FsFLD0SD: + case X86::AVX512_FsFLD0F128: { bool HasVLX = Subtarget.hasVLX(); - unsigned SrcReg = MIB->getOperand(0).getReg(); + Register SrcReg = MIB->getOperand(0).getReg(); const TargetRegisterInfo *TRI = &getRegisterInfo(); if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) return Expand2AddrUndef(MIB, @@ -4037,10 +4131,10 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case X86::AVX512_256_SET0: case X86::AVX512_512_SET0: { bool HasVLX = Subtarget.hasVLX(); - unsigned SrcReg = MIB->getOperand(0).getReg(); + Register SrcReg = MIB->getOperand(0).getReg(); const TargetRegisterInfo *TRI = &getRegisterInfo(); if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) { - unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm); + Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm); MIB->getOperand(0).setReg(XReg); Expand2AddrUndef(MIB, get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr)); @@ -4060,14 +4154,14 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case X86::AVX2_SETALLONES: return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr)); case X86::AVX1_SETALLONES: { - unsigned Reg = MIB->getOperand(0).getReg(); + Register Reg = MIB->getOperand(0).getReg(); // VCMPPSYrri with an immediate 0xf should produce VCMPTRUEPS. MIB->setDesc(get(X86::VCMPPSYrri)); MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf); return true; } case X86::AVX512_512_SETALLONES: { - unsigned Reg = MIB->getOperand(0).getReg(); + Register Reg = MIB->getOperand(0).getReg(); MIB->setDesc(get(X86::VPTERNLOGDZrri)); // VPTERNLOGD needs 3 register inputs and an immediate. // 0xff will return 1s for any input. @@ -4077,8 +4171,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { } case X86::AVX512_512_SEXT_MASK_32: case X86::AVX512_512_SEXT_MASK_64: { - unsigned Reg = MIB->getOperand(0).getReg(); - unsigned MaskReg = MIB->getOperand(1).getReg(); + Register Reg = MIB->getOperand(0).getReg(); + Register MaskReg = MIB->getOperand(1).getReg(); unsigned MaskState = getRegState(MIB->getOperand(1)); unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ? X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz; @@ -4115,8 +4209,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr), get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm); case X86::MOV32ri64: { - unsigned Reg = MIB->getOperand(0).getReg(); - unsigned Reg32 = RI.getSubReg(Reg, X86::sub_32bit); + Register Reg = MIB->getOperand(0).getReg(); + Register Reg32 = RI.getSubReg(Reg, X86::sub_32bit); MI.setDesc(get(X86::MOV32ri)); MIB->getOperand(0).setReg(Reg32); MIB.addReg(Reg, RegState::ImplicitDefine); @@ -4251,8 +4345,8 @@ unsigned X86InstrInfo::getPartialRegUpdateClearance( // If MI is marked as reading Reg, the partial register update is wanted. const MachineOperand &MO = MI.getOperand(0); - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + Register Reg = MO.getReg(); + if (Register::isVirtualRegister(Reg)) { if (MO.readsReg() || MI.readsVirtualRegister(Reg)) return 0; } else { @@ -4268,7 +4362,10 @@ unsigned X86InstrInfo::getPartialRegUpdateClearance( // Return true for any instruction the copies the high bits of the first source // operand into the unused high bits of the destination operand. -static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) { +static bool hasUndefRegUpdate(unsigned Opcode, unsigned &OpNum, + bool ForLoadFold = false) { + // Set the OpNum parameter to the first source operand. + OpNum = 1; switch (Opcode) { case X86::VCVTSI2SSrr: case X86::VCVTSI2SSrm: @@ -4427,6 +4524,14 @@ static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) { case X86::VSQRTSDZm: case X86::VSQRTSDZm_Int: return true; + case X86::VMOVSSZrrk: + case X86::VMOVSDZrrk: + OpNum = 3; + return true; + case X86::VMOVSSZrrkz: + case X86::VMOVSDZrrkz: + OpNum = 2; + return true; } return false; @@ -4449,14 +4554,11 @@ static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) { unsigned X86InstrInfo::getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum, const TargetRegisterInfo *TRI) const { - if (!hasUndefRegUpdate(MI.getOpcode())) + if (!hasUndefRegUpdate(MI.getOpcode(), OpNum)) return 0; - // Set the OpNum parameter to the first source operand. - OpNum = 1; - const MachineOperand &MO = MI.getOperand(OpNum); - if (MO.isUndef() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + if (MO.isUndef() && Register::isPhysicalRegister(MO.getReg())) { return UndefRegClearance; } return 0; @@ -4464,7 +4566,7 @@ X86InstrInfo::getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum, void X86InstrInfo::breakPartialRegDependency( MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { - unsigned Reg = MI.getOperand(OpNum).getReg(); + Register Reg = MI.getOperand(OpNum).getReg(); // If MI kills this register, the false dependence is already broken. if (MI.killsRegister(Reg, TRI)) return; @@ -4480,7 +4582,7 @@ void X86InstrInfo::breakPartialRegDependency( } else if (X86::VR256RegClass.contains(Reg)) { // Use vxorps to clear the full ymm register. // It wants to read and write the xmm sub-register. - unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm); + Register XReg = TRI->getSubReg(Reg, X86::sub_xmm); BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VXORPSrr), XReg) .addReg(XReg, RegState::Undef) .addReg(XReg, RegState::Undef) @@ -4489,7 +4591,7 @@ void X86InstrInfo::breakPartialRegDependency( } else if (X86::GR64RegClass.contains(Reg)) { // Using XOR32rr because it has shorter encoding and zeros up the upper bits // as well. - unsigned XReg = TRI->getSubReg(Reg, X86::sub_32bit); + Register XReg = TRI->getSubReg(Reg, X86::sub_32bit); BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), XReg) .addReg(XReg, RegState::Undef) .addReg(XReg, RegState::Undef) @@ -4538,8 +4640,8 @@ static void updateOperandRegConstraints(MachineFunction &MF, // We only need to update constraints on virtual register operands. if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!TRI.isVirtualRegister(Reg)) + Register Reg = MO.getReg(); + if (!Register::isVirtualRegister(Reg)) continue; auto *NewRC = MRI.constrainRegClass( @@ -4698,7 +4800,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom( static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, MachineInstr &MI) { - if (!hasUndefRegUpdate(MI.getOpcode(), /*ForLoadFold*/true) || + unsigned Ignored; + if (!hasUndefRegUpdate(MI.getOpcode(), Ignored, /*ForLoadFold*/true) || !MI.getOperand(1).isReg()) return false; @@ -4788,6 +4891,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( if (I != nullptr) { unsigned Opcode = I->DstOp; unsigned MinAlign = (I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT; + MinAlign = MinAlign ? 1 << (MinAlign - 1) : 0; if (Align < MinAlign) return nullptr; bool NarrowToMOV32rm = false; @@ -4821,8 +4925,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( // If this is the special case where we use a MOV32rm to load a 32-bit // value and zero-extend the top bits. Change the destination register // to a 32-bit one. - unsigned DstReg = NewMI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + Register DstReg = NewMI->getOperand(0).getReg(); + if (Register::isPhysicalRegister(DstReg)) NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit)); else NewMI->getOperand(0).setSubReg(X86::sub_32bit); @@ -5133,6 +5237,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( case X86::V_SET0: case X86::V_SETALLONES: case X86::AVX512_128_SET0: + case X86::FsFLD0F128: + case X86::AVX512_FsFLD0F128: Alignment = 16; break; case X86::MMX_SET0: @@ -5182,7 +5288,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( case X86::FsFLD0SD: case X86::AVX512_FsFLD0SD: case X86::FsFLD0SS: - case X86::AVX512_FsFLD0SS: { + case X86::AVX512_FsFLD0SS: + case X86::FsFLD0F128: + case X86::AVX512_FsFLD0F128: { // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. // Create a constant-pool entry and operands to load from it. @@ -5212,6 +5320,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( Ty = Type::getFloatTy(MF.getFunction().getContext()); else if (Opc == X86::FsFLD0SD || Opc == X86::AVX512_FsFLD0SD) Ty = Type::getDoubleTy(MF.getFunction().getContext()); + else if (Opc == X86::FsFLD0F128 || Opc == X86::AVX512_FsFLD0F128) + Ty = Type::getFP128Ty(MF.getFunction().getContext()); else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES) Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),16); else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 || @@ -5293,6 +5403,51 @@ extractStoreMMOs(ArrayRef MMOs, MachineFunction &MF) { return StoreMMOs; } +static unsigned getBroadcastOpcode(const X86MemoryFoldTableEntry *I, + const TargetRegisterClass *RC, + const X86Subtarget &STI) { + assert(STI.hasAVX512() && "Expected at least AVX512!"); + unsigned SpillSize = STI.getRegisterInfo()->getSpillSize(*RC); + assert((SpillSize == 64 || STI.hasVLX()) && + "Can't broadcast less than 64 bytes without AVX512VL!"); + + switch (I->Flags & TB_BCAST_MASK) { + default: llvm_unreachable("Unexpected broadcast type!"); + case TB_BCAST_D: + switch (SpillSize) { + default: llvm_unreachable("Unknown spill size"); + case 16: return X86::VPBROADCASTDZ128m; + case 32: return X86::VPBROADCASTDZ256m; + case 64: return X86::VPBROADCASTDZm; + } + break; + case TB_BCAST_Q: + switch (SpillSize) { + default: llvm_unreachable("Unknown spill size"); + case 16: return X86::VPBROADCASTQZ128m; + case 32: return X86::VPBROADCASTQZ256m; + case 64: return X86::VPBROADCASTQZm; + } + break; + case TB_BCAST_SS: + switch (SpillSize) { + default: llvm_unreachable("Unknown spill size"); + case 16: return X86::VBROADCASTSSZ128m; + case 32: return X86::VBROADCASTSSZ256m; + case 64: return X86::VBROADCASTSSZm; + } + break; + case TB_BCAST_SD: + switch (SpillSize) { + default: llvm_unreachable("Unknown spill size"); + case 16: return X86::VMOVDDUPZ128rm; + case 32: return X86::VBROADCASTSDZ256m; + case 64: return X86::VBROADCASTSDZm; + } + break; + } +} + bool X86InstrInfo::unfoldMemoryOperand( MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl &NewMIs) const { @@ -5303,6 +5458,7 @@ bool X86InstrInfo::unfoldMemoryOperand( unsigned Index = I->Flags & TB_INDEX_MASK; bool FoldedLoad = I->Flags & TB_FOLDED_LOAD; bool FoldedStore = I->Flags & TB_FOLDED_STORE; + bool FoldedBCast = I->Flags & TB_FOLDED_BCAST; if (UnfoldLoad && !FoldedLoad) return false; UnfoldLoad &= FoldedLoad; @@ -5311,7 +5467,9 @@ bool X86InstrInfo::unfoldMemoryOperand( UnfoldStore &= FoldedStore; const MCInstrDesc &MCID = get(Opc); + const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); // TODO: Check if 32-byte or greater accesses are slow too? if (!MI.hasOneMemOperand() && RC == &X86::VR128RegClass && Subtarget.isUnalignedMem16Slow()) @@ -5335,10 +5493,26 @@ bool X86InstrInfo::unfoldMemoryOperand( AfterOps.push_back(Op); } - // Emit the load instruction. + // Emit the load or broadcast instruction. if (UnfoldLoad) { auto MMOs = extractLoadMMOs(MI.memoperands(), MF); - loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs, NewMIs); + + unsigned Opc; + if (FoldedBCast) { + Opc = getBroadcastOpcode(I, RC, Subtarget); + } else { + unsigned Alignment = std::max(TRI.getSpillSize(*RC), 16); + bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; + Opc = getLoadRegOpcode(Reg, RC, isAligned, Subtarget); + } + + DebugLoc DL; + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), Reg); + for (unsigned i = 0, e = AddrOps.size(); i != e; ++i) + MIB.add(AddrOps[i]); + MIB.setMemRefs(MMOs); + NewMIs.push_back(MIB); + if (UnfoldStore) { // Address operands cannot be marked isKill. for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) { @@ -5404,7 +5578,16 @@ bool X86InstrInfo::unfoldMemoryOperand( if (UnfoldStore) { const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF); auto MMOs = extractStoreMMOs(MI.memoperands(), MF); - storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs, NewMIs); + unsigned Alignment = std::max(TRI.getSpillSize(*DstRC), 16); + bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; + unsigned Opc = getStoreRegOpcode(Reg, DstRC, isAligned, Subtarget); + DebugLoc DL; + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); + for (unsigned i = 0, e = AddrOps.size(); i != e; ++i) + MIB.add(AddrOps[i]); + MIB.addReg(Reg, RegState::Kill); + MIB.setMemRefs(MMOs); + NewMIs.push_back(MIB); } return true; @@ -5423,6 +5606,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, unsigned Index = I->Flags & TB_INDEX_MASK; bool FoldedLoad = I->Flags & TB_FOLDED_LOAD; bool FoldedStore = I->Flags & TB_FOLDED_STORE; + bool FoldedBCast = I->Flags & TB_FOLDED_BCAST; const MCInstrDesc &MCID = get(Opc); MachineFunction &MF = DAG.getMachineFunction(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); @@ -5456,10 +5640,17 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, return false; // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte // memory access is slow above. - unsigned Alignment = std::max(TRI.getSpillSize(*RC), 16); - bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; - Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl, - VT, MVT::Other, AddrOps); + + unsigned Opc; + if (FoldedBCast) { + Opc = getBroadcastOpcode(I, RC, Subtarget); + } else { + unsigned Alignment = std::max(TRI.getSpillSize(*RC), 16); + bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment; + Opc = getLoadRegOpcode(0, RC, isAligned, Subtarget); + } + + Load = DAG.getMachineNode(Opc, dl, VT, MVT::Other, AddrOps); NewNodes.push_back(Load); // Preserve memory reference information. @@ -7367,6 +7558,96 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { } } +Optional +X86InstrInfo::describeLoadedValue(const MachineInstr &MI) const { + const MachineOperand *Op = nullptr; + DIExpression *Expr = nullptr; + + switch (MI.getOpcode()) { + case X86::LEA32r: + case X86::LEA64r: + case X86::LEA64_32r: { + // Operand 4 could be global address. For now we do not support + // such situation. + if (!MI.getOperand(4).isImm() || !MI.getOperand(2).isImm()) + return None; + + const MachineOperand &Op1 = MI.getOperand(1); + const MachineOperand &Op2 = MI.getOperand(3); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + assert(Op2.isReg() && (Op2.getReg() == X86::NoRegister || + Register::isPhysicalRegister(Op2.getReg()))); + + // Omit situations like: + // %rsi = lea %rsi, 4, ... + if ((Op1.isReg() && Op1.getReg() == MI.getOperand(0).getReg()) || + Op2.getReg() == MI.getOperand(0).getReg()) + return None; + else if ((Op1.isReg() && Op1.getReg() != X86::NoRegister && + TRI->regsOverlap(Op1.getReg(), MI.getOperand(0).getReg())) || + (Op2.getReg() != X86::NoRegister && + TRI->regsOverlap(Op2.getReg(), MI.getOperand(0).getReg()))) + return None; + + int64_t Coef = MI.getOperand(2).getImm(); + int64_t Offset = MI.getOperand(4).getImm(); + SmallVector Ops; + + if ((Op1.isReg() && Op1.getReg() != X86::NoRegister)) { + Op = &Op1; + } else if (Op1.isFI()) + Op = &Op1; + + if (Op && Op->isReg() && Op->getReg() == Op2.getReg() && Coef > 0) { + Ops.push_back(dwarf::DW_OP_constu); + Ops.push_back(Coef + 1); + Ops.push_back(dwarf::DW_OP_mul); + } else { + if (Op && Op2.getReg() != X86::NoRegister) { + int dwarfReg = TRI->getDwarfRegNum(Op2.getReg(), false); + if (dwarfReg < 0) + return None; + else if (dwarfReg < 32) { + Ops.push_back(dwarf::DW_OP_breg0 + dwarfReg); + Ops.push_back(0); + } else { + Ops.push_back(dwarf::DW_OP_bregx); + Ops.push_back(dwarfReg); + Ops.push_back(0); + } + } else if (!Op) { + assert(Op2.getReg() != X86::NoRegister); + Op = &Op2; + } + + if (Coef > 1) { + assert(Op2.getReg() != X86::NoRegister); + Ops.push_back(dwarf::DW_OP_constu); + Ops.push_back(Coef); + Ops.push_back(dwarf::DW_OP_mul); + } + + if (((Op1.isReg() && Op1.getReg() != X86::NoRegister) || Op1.isFI()) && + Op2.getReg() != X86::NoRegister) { + Ops.push_back(dwarf::DW_OP_plus); + } + } + + DIExpression::appendOffset(Ops, Offset); + Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), Ops); + + return ParamLoadedValue(*Op, Expr);; + } + case X86::XOR32rr: { + if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) + return ParamLoadedValue(MachineOperand::CreateImm(0), Expr); + return None; + } + default: + return TargetInstrInfo::describeLoadedValue(MI); + } +} + /// This is an architecture-specific helper function of reassociateOps. /// Set special operand attributes for new instructions after reassociation. void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, @@ -7500,9 +7781,8 @@ namespace { // movq $_GLOBAL_OFFSET_TABLE_ - .LN$pb, %rcx // addq %rcx, %rax // RAX now holds address of _GLOBAL_OFFSET_TABLE_. - unsigned PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass); - unsigned GOTReg = - RegInfo.createVirtualRegister(&X86::GR64RegClass); + Register PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass); + Register GOTReg = RegInfo.createVirtualRegister(&X86::GR64RegClass); BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PBReg) .addReg(X86::RIP) .addImm(0) diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 13ca1713949..22b7b1d4cb1 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -67,6 +67,9 @@ unsigned getSwappedVPCMPImm(unsigned Imm); /// Get the VPCOM immediate if the opcodes are swapped. unsigned getSwappedVPCOMImm(unsigned Imm); +/// Get the VCMP immediate if the opcodes are swapped. +unsigned getSwappedVCMPImm(unsigned Imm); + } // namespace X86 /// isGlobalStubReference - Return true if the specified TargetFlag operand is @@ -203,7 +206,7 @@ public: int &FrameIndex) const override; bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const override; + AAResults *AA) const override; void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr &Orig, @@ -218,7 +221,7 @@ public: /// Reference parameters are set to indicate how caller should add this /// operand to the LEA instruction. bool classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, - unsigned LEAOpcode, bool AllowSP, unsigned &NewSrc, + unsigned LEAOpcode, bool AllowSP, Register &NewSrc, bool &isKill, MachineOperand &ImplicitOp, LiveVariables *LV) const; @@ -251,7 +254,7 @@ public: /// findCommutedOpIndices(MI, Op1, Op2); /// can be interpreted as a query asking to find an operand that would be /// commutable with the operand#1. - bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, + bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; /// Returns an adjusted FMA opcode that must be used in FMA instruction that @@ -317,23 +320,11 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; - void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - ArrayRef MMOs, - SmallVectorImpl &NewMIs) const; - void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; - void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - ArrayRef MMOs, - SmallVectorImpl &NewMIs) const; - bool expandPostRAPseudo(MachineInstr &MI) const override; /// Check whether the target can fold a load that feeds a subreg operand @@ -527,6 +518,13 @@ public: #define GET_INSTRINFO_HELPER_DECLS #include "X86GenInstrInfo.inc" + static bool hasLockPrefix(const MachineInstr &MI) { + return MI.getDesc().TSFlags & X86II::LOCK; + } + + Optional + describeLoadedValue(const MachineInstr &MI) const override; + protected: /// Commutes the operands in the given instruction by changing the operands /// order and/or changing the instruction's opcode and/or the immediate value diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 8e05dd8ec5c..e452145f3b6 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -673,6 +673,14 @@ def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass { ImmSExti64i32AsmOperand]; } +// 4-bit immediate used by some XOP instructions +// [0, 0xF] +def ImmUnsignedi4AsmOperand : AsmOperandClass { + let Name = "ImmUnsignedi4"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "InvalidImmUnsignedi4"; +} + // Unsigned immediate used by SSE/AVX instructions // [0, 0xFF] // [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] @@ -705,6 +713,13 @@ def i64i8imm : Operand { let OperandType = "OPERAND_IMMEDIATE"; } +// Unsigned 4-bit immediate used by some XOP instructions. +def u4imm : Operand { + let PrintMethod = "printU8Imm"; + let ParserMatchClass = ImmUnsignedi4AsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; +} + // Unsigned 8-bit immediate used by SSE/AVX instructions. def u8imm : Operand { let PrintMethod = "printU8Imm"; @@ -925,7 +940,6 @@ def HasMOVDIR64B : Predicate<"Subtarget->hasMOVDIR64B()">; def HasPTWRITE : Predicate<"Subtarget->hasPTWRITE()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; -def HasMPX : Predicate<"Subtarget->hasMPX()">; def HasSHSTK : Predicate<"Subtarget->hasSHSTK()">; def HasCLFLUSHOPT : Predicate<"Subtarget->hasCLFLUSHOPT()">; def HasCLWB : Predicate<"Subtarget->hasCLWB()">; @@ -1103,7 +1117,7 @@ def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{ if (ExtType == ISD::NON_EXTLOAD) return true; if (ExtType == ISD::EXTLOAD) - return LD->getAlignment() >= 2 && !LD->isVolatile(); + return LD->getAlignment() >= 2 && LD->isSimple(); return false; }]>; @@ -1113,7 +1127,7 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ if (ExtType == ISD::NON_EXTLOAD) return true; if (ExtType == ISD::EXTLOAD) - return LD->getAlignment() >= 4 && !LD->isVolatile(); + return LD->getAlignment() >= 4 && LD->isSimple(); return false; }]>; @@ -1170,7 +1184,7 @@ def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (unindexedload node:$ptr)), [ if (LD->getMemoryVT() == MVT::i32) return true; - return LD->getAlignment() >= 4 && !LD->isVolatile(); + return LD->getAlignment() >= 4 && LD->isSimple(); }]>; @@ -2404,25 +2418,26 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in { } multiclass bmi_bls { + RegisterClass RC, X86MemOperand x86memop, + X86FoldableSchedWrite sched> { let hasSideEffects = 0 in { def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src), !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, - T8PS, VEX_4V, Sched<[WriteBLS]>; + T8PS, VEX_4V, Sched<[sched]>; let mayLoad = 1 in def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src), !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, - T8PS, VEX_4V, Sched<[WriteBLS.Folded]>; + T8PS, VEX_4V, Sched<[sched.Folded]>; } } let Predicates = [HasBMI], Defs = [EFLAGS] in { - defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem>; - defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem>, VEX_W; - defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem>; - defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem>, VEX_W; - defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem>; - defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem>, VEX_W; + defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS>; + defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS>, VEX_W; + defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS>; + defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS>, VEX_W; + defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS>; + defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS>, VEX_W; } //===----------------------------------------------------------------------===// @@ -2683,12 +2698,12 @@ def SLWPCB64 : I<0x12, MRM1r, (outs GR64:$dst), (ins), "slwpcb\t$dst", multiclass lwpins_intr { def rri : Ii32<0x12, MRM0r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl), "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", - [(set EFLAGS, (X86lwpins RC:$src0, GR32:$src1, imm:$cntl))]>, + [(set EFLAGS, (X86lwpins RC:$src0, GR32:$src1, timm:$cntl))]>, XOP_4V, XOPA; let mayLoad = 1 in def rmi : Ii32<0x12, MRM0m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl), "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", - [(set EFLAGS, (X86lwpins RC:$src0, (loadi32 addr:$src1), imm:$cntl))]>, + [(set EFLAGS, (X86lwpins RC:$src0, (loadi32 addr:$src1), timm:$cntl))]>, XOP_4V, XOPA; } @@ -2700,11 +2715,11 @@ let Defs = [EFLAGS] in { multiclass lwpval_intr { def rri : Ii32<0x12, MRM1r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl), "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", - [(Int RC:$src0, GR32:$src1, imm:$cntl)]>, XOP_4V, XOPA; + [(Int RC:$src0, GR32:$src1, timm:$cntl)]>, XOP_4V, XOPA; let mayLoad = 1 in def rmi : Ii32<0x12, MRM1m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl), "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", - [(Int RC:$src0, (loadi32 addr:$src1), imm:$cntl)]>, + [(Int RC:$src0, (loadi32 addr:$src1), timm:$cntl)]>, XOP_4V, XOPA; } @@ -3205,13 +3220,13 @@ def : InstAlias<"aam", (AAM8i8 10)>, Requires<[Not64BitMode]>; // Disambiguate the mem/imm form of bt-without-a-suffix as btl. // Likewise for btc/btr/bts. def : InstAlias<"bt\t{$imm, $mem|$mem, $imm}", - (BT32mi8 i32mem:$mem, i32i8imm:$imm), 0, "att">; + (BT32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">; def : InstAlias<"btc\t{$imm, $mem|$mem, $imm}", - (BTC32mi8 i32mem:$mem, i32i8imm:$imm), 0, "att">; + (BTC32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">; def : InstAlias<"btr\t{$imm, $mem|$mem, $imm}", - (BTR32mi8 i32mem:$mem, i32i8imm:$imm), 0, "att">; + (BTR32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">; def : InstAlias<"bts\t{$imm, $mem|$mem, $imm}", - (BTS32mi8 i32mem:$mem, i32i8imm:$imm), 0, "att">; + (BTS32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">; // clr aliases. def : InstAlias<"clr{b}\t$reg", (XOR8rr GR8 :$reg, GR8 :$reg), 0>; diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 57835b1a256..cd9a866c91c 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -30,7 +30,6 @@ def MMX_SET0 : I<0, Pseudo, (outs VR64:$dst), (ins), "", []>; let Constraints = "$src1 = $dst" in { // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic. - // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp. multiclass MMXI_binop_rm_int opc, string OpcodeStr, Intrinsic IntId, X86FoldableSchedWrite sched, bit Commutable = 0, X86MemOperand OType = i64mem> { @@ -67,7 +66,7 @@ let Constraints = "$src1 = $dst" in { def ri : MMXIi8, + [(set VR64:$dst, (IntId2 VR64:$src1, timm:$src2))]>, Sched<[schedImm]>; } } @@ -114,13 +113,13 @@ multiclass ssse3_palign_mm, + [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 timm:$src3)))]>, Sched<[sched]>; def rmi : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2, u8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR64:$dst, (IntId VR64:$src1, - (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>, + (bitconvert (load_mmx addr:$src2)), (i8 timm:$src3)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -496,14 +495,14 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, u8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, - (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>, + (int_x86_sse_pshuf_w VR64:$src1, timm:$src2))]>, Sched<[SchedWriteShuffle.MMX]>; def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src1, u8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, (int_x86_sse_pshuf_w (load_mmx addr:$src1), - imm:$src2))]>, + timm:$src2))]>, Sched<[SchedWriteShuffle.MMX.Folded]>; // -- Conversion Instructions @@ -535,7 +534,7 @@ def MMX_PEXTRWrr: MMXIi8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR64:$src1, i32u8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, (int_x86_mmx_pextr_w VR64:$src1, - imm:$src2))]>, + timm:$src2))]>, Sched<[WriteVecExtract]>; let Constraints = "$src1 = $dst" in { let Predicates = [HasMMX, HasSSE1] in { @@ -544,7 +543,7 @@ let Predicates = [HasMMX, HasSSE1] in { (ins VR64:$src1, GR32orGR64:$src2, i32u8imm:$src3), "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1, - GR32orGR64:$src2, imm:$src3))]>, + GR32orGR64:$src2, timm:$src3))]>, Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; def MMX_PINSRWrm : MMXIi8<0xC4, MRMSrcMem, @@ -553,7 +552,7 @@ let Predicates = [HasMMX, HasSSE1] in { "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1, (i32 (anyext (loadi16 addr:$src2))), - imm:$src3))]>, + timm:$src3))]>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; } } @@ -567,6 +566,13 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (int_x86_mmx_pmovmskb VR64:$src))]>, Sched<[WriteMMXMOVMSK]>; +// MMX to XMM for vector types +def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1, + [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>; + +def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)), + (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; + // Low word of XMM to MMX. def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1, [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>; @@ -574,9 +580,13 @@ def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1, def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)), (x86mmx (MMX_MOVDQ2Qrr VR128:$src))>; -def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))), +def : Pat<(x86mmx (MMX_X86movdq2q (v2i64 (simple_load addr:$src)))), (x86mmx (MMX_MOVQ64rm addr:$src))>; +def : Pat<(v2i64 (X86vzmovl (scalar_to_vector + (i64 (bitconvert (x86mmx VR64:$src)))))), + (MMX_MOVQ2DQrr VR64:$src)>; + // Misc. let SchedRW = [SchedWriteShuffle.MMX] in { let Uses = [EDI], Predicates = [HasMMX, HasSSE1,Not64BitMode] in @@ -601,9 +611,6 @@ def : Pat<(x86mmx (MMX_X86movdq2q def : Pat<(x86mmx (MMX_X86movdq2q (bc_v2i64 (v4i32 (X86cvttp2si (v4f32 VR128:$src)))))), (MMX_CVTTPS2PIirr VR128:$src)>; -def : Pat<(x86mmx (MMX_X86movdq2q - (bc_v2i64 (v4i32 (fp_to_sint (v4f32 VR128:$src)))))), - (MMX_CVTTPS2PIirr VR128:$src)>; def : Pat<(x86mmx (MMX_X86movdq2q (bc_v2i64 (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))), (MMX_CVTPD2PIirr VR128:$src)>; diff --git a/lib/Target/X86/X86InstrMPX.td b/lib/Target/X86/X86InstrMPX.td index f7d931510fe..44ba071947c 100644 --- a/lib/Target/X86/X86InstrMPX.td +++ b/lib/Target/X86/X86InstrMPX.td @@ -12,16 +12,16 @@ // //===----------------------------------------------------------------------===// -// FIXME: Investigate a better scheduler class once MPX is used inside LLVM. +// FIXME: Investigate a better scheduler class if MPX is ever used inside LLVM. let SchedRW = [WriteSystem] in { multiclass mpx_bound_make opc, string OpcodeStr> { def 32rm: I, - Requires<[HasMPX, Not64BitMode]>; + Requires<[Not64BitMode]>; def 64rm: I, - Requires<[HasMPX, In64BitMode]>; + Requires<[In64BitMode]>; } defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS; @@ -29,17 +29,17 @@ defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS; multiclass mpx_bound_check opc, string OpcodeStr> { def 32rm: I, - Requires<[HasMPX, Not64BitMode]>; + Requires<[Not64BitMode]>; def 64rm: I, - Requires<[HasMPX, In64BitMode]>; + Requires<[In64BitMode]>; def 32rr: I, - Requires<[HasMPX, Not64BitMode]>; + Requires<[Not64BitMode]>; def 64rr: I, - Requires<[HasMPX, In64BitMode]>; + Requires<[In64BitMode]>; } defm BNDCL : mpx_bound_check<0x1A, "bndcl">, XS, NotMemoryFoldable; defm BNDCU : mpx_bound_check<0x1A, "bndcu">, XD, NotMemoryFoldable; @@ -47,33 +47,31 @@ defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD, NotMemoryFoldable; def BNDMOVrr : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src), "bndmov\t{$src, $dst|$dst, $src}", []>, PD, - Requires<[HasMPX]>, NotMemoryFoldable; + NotMemoryFoldable; let mayLoad = 1 in { def BNDMOV32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src), "bndmov\t{$src, $dst|$dst, $src}", []>, PD, - Requires<[HasMPX, Not64BitMode]>, NotMemoryFoldable; + Requires<[Not64BitMode]>, NotMemoryFoldable; def BNDMOV64rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src), "bndmov\t{$src, $dst|$dst, $src}", []>, PD, - Requires<[HasMPX, In64BitMode]>, NotMemoryFoldable; + Requires<[In64BitMode]>, NotMemoryFoldable; } let isCodeGenOnly = 1, ForceDisassemble = 1 in def BNDMOVrr_REV : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src), "bndmov\t{$src, $dst|$dst, $src}", []>, PD, - Requires<[HasMPX]>, NotMemoryFoldable; + NotMemoryFoldable; let mayStore = 1 in { def BNDMOV32mr : I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src), "bndmov\t{$src, $dst|$dst, $src}", []>, PD, - Requires<[HasMPX, Not64BitMode]>, NotMemoryFoldable; + Requires<[Not64BitMode]>, NotMemoryFoldable; def BNDMOV64mr : I<0x1B, MRMDestMem, (outs), (ins i128mem:$dst, BNDR:$src), "bndmov\t{$src, $dst|$dst, $src}", []>, PD, - Requires<[HasMPX, In64BitMode]>, NotMemoryFoldable; + Requires<[In64BitMode]>, NotMemoryFoldable; def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins anymem:$dst, BNDR:$src), - "bndstx\t{$src, $dst|$dst, $src}", []>, PS, - Requires<[HasMPX]>; + "bndstx\t{$src, $dst|$dst, $src}", []>, PS; } let mayLoad = 1 in def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src), - "bndldx\t{$src, $dst|$dst, $src}", []>, PS, - Requires<[HasMPX]>; + "bndldx\t{$src, $dst|$dst, $src}", []>, PS; } // SchedRW diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 7d0a5b87baf..09a04c0338b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -115,7 +115,9 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>; def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", - [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2, NoAVX512]>; + [(set FR64:$dst, fp64imm0)]>, Requires<[HasSSE2, NoAVX512]>; + def FsFLD0F128 : I<0, Pseudo, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, fp128imm0)]>, Requires<[HasSSE1, NoAVX512]>; } //===----------------------------------------------------------------------===// @@ -128,13 +130,18 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1, SchedRW = [WriteZero] in { + isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in { def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4f32 immAllZerosV))]>; } -let Predicates = [NoAVX512] in +let Predicates = [NoAVX512] in { +def : Pat<(v16i8 immAllZerosV), (V_SET0)>; +def : Pat<(v8i16 immAllZerosV), (V_SET0)>; def : Pat<(v4i32 immAllZerosV), (V_SET0)>; +def : Pat<(v2i64 immAllZerosV), (V_SET0)>; +def : Pat<(v2f64 immAllZerosV), (V_SET0)>; +} // The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI, @@ -147,6 +154,14 @@ def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v8i32 immAllZerosV))]>; } +let Predicates = [NoAVX512] in { +def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>; +def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>; +def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>; +def : Pat<(v8f32 immAllZerosV), (AVX_SET0)>; +def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; +} + // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, @@ -355,7 +370,7 @@ defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups", SSEPackedSingle, SchedWriteFMoveLS.YMM>, PS, VEX, VEX_L, VEX_WIG; -defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", +defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", SSEPackedDouble, SchedWriteFMoveLS.YMM>, PD, VEX, VEX_L, VEX_WIG; } @@ -661,7 +676,7 @@ let Predicates = [UseSSE1] in { // This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll // end up with a movsd or blend instead of shufp. // No need for aligned load, we're only loading 64-bits. - def : Pat<(X86Shufp (v4f32 (nonvolatile_load addr:$src2)), VR128:$src1, + def : Pat<(X86Shufp (v4f32 (simple_load addr:$src2)), VR128:$src1, (i8 -28)), (MOVLPSrm VR128:$src1, addr:$src2)>; def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)), @@ -727,7 +742,7 @@ let Predicates = [UseSSE1] in { // This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll // end up with a movsd or blend instead of shufp. // No need for aligned load, we're only loading 64-bits. - def : Pat<(X86Movlhps VR128:$src1, (v4f32 (nonvolatile_load addr:$src2))), + def : Pat<(X86Movlhps VR128:$src1, (v4f32 (simple_load addr:$src2))), (MOVHPSrm VR128:$src1, addr:$src2)>; def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))), (MOVHPSrm VR128:$src1, addr:$src2)>; @@ -761,7 +776,7 @@ let Predicates = [UseSSE2] in { let Predicates = [UseSSE2, NoSSE41_Or_OptForSize] in { // Use MOVLPD to load into the low bits from a full vector unless we can use // BLENDPD. - def : Pat<(X86Movsd VR128:$src1, (v2f64 (nonvolatile_load addr:$src2))), + def : Pat<(X86Movsd VR128:$src1, (v2f64 (simple_load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>; } @@ -1713,12 +1728,12 @@ multiclass sse12_cmp_scalar, + [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, timm:$cc))]>, Sched<[sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), - (ld_frag addr:$src2), imm:$cc))]>, + (ld_frag addr:$src2), timm:$cc))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -1751,13 +1766,13 @@ multiclass sse12_cmp_scalar_int, + VR128:$src, timm:$cc))]>, Sched<[sched]>; let mayLoad = 1 in def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, memop:$src, u8imm:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, - mem_cpat:$src, imm:$cc))]>, + mem_cpat:$src, timm:$cc))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -1876,12 +1891,12 @@ multiclass sse12_cmp_packed, + [(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, timm:$cc)))], d>, Sched<[sched]>; def rmi : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm, [(set RC:$dst, - (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), imm:$cc)))], d>, + (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -1906,7 +1921,7 @@ let Constraints = "$src1 = $dst" in { SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD; } -def CommutableCMPCC : PatLeaf<(imm), [{ +def CommutableCMPCC : PatLeaf<(timm), [{ uint64_t Imm = N->getZExtValue() & 0x7; return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07); }]>; @@ -1915,47 +1930,47 @@ def CommutableCMPCC : PatLeaf<(imm), [{ let Predicates = [HasAVX] in { def : Pat<(v4f64 (X86cmpp (loadv4f64 addr:$src2), VR256:$src1, CommutableCMPCC:$cc)), - (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; + (VCMPPDYrmi VR256:$src1, addr:$src2, timm:$cc)>; def : Pat<(v8f32 (X86cmpp (loadv8f32 addr:$src2), VR256:$src1, CommutableCMPCC:$cc)), - (VCMPPSYrmi VR256:$src1, addr:$src2, imm:$cc)>; + (VCMPPSYrmi VR256:$src1, addr:$src2, timm:$cc)>; def : Pat<(v2f64 (X86cmpp (loadv2f64 addr:$src2), VR128:$src1, CommutableCMPCC:$cc)), - (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; + (VCMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>; def : Pat<(v4f32 (X86cmpp (loadv4f32 addr:$src2), VR128:$src1, CommutableCMPCC:$cc)), - (VCMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>; + (VCMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>; def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, CommutableCMPCC:$cc)), - (VCMPSDrm FR64:$src1, addr:$src2, imm:$cc)>; + (VCMPSDrm FR64:$src1, addr:$src2, timm:$cc)>; def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, CommutableCMPCC:$cc)), - (VCMPSSrm FR32:$src1, addr:$src2, imm:$cc)>; + (VCMPSSrm FR32:$src1, addr:$src2, timm:$cc)>; } let Predicates = [UseSSE2] in { def : Pat<(v2f64 (X86cmpp (memopv2f64 addr:$src2), VR128:$src1, CommutableCMPCC:$cc)), - (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; + (CMPPDrmi VR128:$src1, addr:$src2, timm:$cc)>; def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, CommutableCMPCC:$cc)), - (CMPSDrm FR64:$src1, addr:$src2, imm:$cc)>; + (CMPSDrm FR64:$src1, addr:$src2, timm:$cc)>; } let Predicates = [UseSSE1] in { def : Pat<(v4f32 (X86cmpp (memopv4f32 addr:$src2), VR128:$src1, CommutableCMPCC:$cc)), - (CMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>; + (CMPPSrmi VR128:$src1, addr:$src2, timm:$cc)>; def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, CommutableCMPCC:$cc)), - (CMPSSrm FR32:$src1, addr:$src2, imm:$cc)>; + (CMPSSrm FR32:$src1, addr:$src2, timm:$cc)>; } //===----------------------------------------------------------------------===// @@ -1970,13 +1985,13 @@ multiclass sse12_shuffle, + (i8 timm:$src3))))], d>, Sched<[sched.Folded, sched.ReadAfterFold]>; let isCommutable = IsCommutable in def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, - (i8 imm:$src3))))], d>, + (i8 timm:$src3))))], d>, Sched<[sched]>; } @@ -2097,7 +2112,7 @@ let Predicates = [HasAVX1Only] in { let Predicates = [UseSSE2] in { // Use MOVHPD if the load isn't aligned enough for UNPCKLPD. def : Pat<(v2f64 (X86Unpckl VR128:$src1, - (v2f64 (nonvolatile_load addr:$src2)))), + (v2f64 (simple_load addr:$src2)))), (MOVHPDrm VR128:$src1, addr:$src2)>; } @@ -2721,7 +2736,7 @@ defm : scalar_math_patterns; defm : scalar_math_patterns; defm : scalar_math_patterns; - + /// Unop Arithmetic /// In addition, we also have a special variant of the scalar form here to /// represent the associated intrinsic operation. This form is unlike the @@ -3482,7 +3497,7 @@ multiclass PDI_binop_rmi opc, bits<8> opc2, Format ImmForm, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))]>, + [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 timm:$src2))))]>, Sched<[schedImm]>; } @@ -3514,7 +3529,7 @@ multiclass PDI_binop_ri opc, Format ImmForm, string OpcodeStr, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (VT (OpNode RC:$src1, (i8 imm:$src2))))]>, + [(set RC:$dst, (VT (OpNode RC:$src1, (i8 timm:$src2))))]>, Sched<[sched]>; } @@ -3597,7 +3612,7 @@ let Predicates = [HasAVX, prd] in { !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, - (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>, + (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>, VEX, Sched<[sched.XMM]>, VEX_WIG; def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), @@ -3605,7 +3620,7 @@ let Predicates = [HasAVX, prd] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (load addr:$src1), - (i8 imm:$src2))))]>, VEX, + (i8 timm:$src2))))]>, VEX, Sched<[sched.XMM.Folded]>, VEX_WIG; } @@ -3615,7 +3630,7 @@ let Predicates = [HasAVX2, prd] in { !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, - (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))]>, + (vt256 (OpNode VR256:$src1, (i8 timm:$src2))))]>, VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, u8imm:$src2), @@ -3623,7 +3638,7 @@ let Predicates = [HasAVX2, prd] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode (load addr:$src1), - (i8 imm:$src2))))]>, VEX, VEX_L, + (i8 timm:$src2))))]>, VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG; } @@ -3633,7 +3648,7 @@ let Predicates = [UseSSE2] in { !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, - (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>, + (vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>, Sched<[sched.XMM]>; def mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), @@ -3641,7 +3656,7 @@ let Predicates = [UseSSE2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (memop addr:$src1), - (i8 imm:$src2))))]>, + (i8 timm:$src2))))]>, Sched<[sched.XMM.Folded]>; } } @@ -4380,7 +4395,7 @@ defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>; let Predicates = [HasAVX, NoVLX] in { - def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))), + def : Pat<(X86Movddup (v2f64 (simple_load addr:$src))), (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))), (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; @@ -4388,7 +4403,7 @@ let Predicates = [HasAVX, NoVLX] in { let Predicates = [UseSSE3] in { // No need for aligned memory as this only loads 64-bits. - def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))), + def : Pat<(X86Movddup (v2f64 (simple_load addr:$src))), (MOVDDUPrm addr:$src)>; def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))), (MOVDDUPrm addr:$src)>; @@ -4812,7 +4827,7 @@ multiclass ssse3_palignr, + [(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 timm:$src3))))]>, Sched<[sched]>; let mayLoad = 1 in def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst), @@ -4823,7 +4838,7 @@ multiclass ssse3_palignr, + (i8 timm:$src3))))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -5300,7 +5315,7 @@ multiclass SS41I_insertf32 opc, string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (X86insertps VR128:$src1, VR128:$src2, imm:$src3))]>, + (X86insertps VR128:$src1, VR128:$src2, timm:$src3))]>, Sched<[SchedWriteFShuffle.XMM]>; def rm : SS4AIi8 opc, string asm, bit Is2Addr = 1> { [(set VR128:$dst, (X86insertps VR128:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), - imm:$src3))]>, + timm:$src3))]>, Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; } @@ -5323,17 +5338,6 @@ let ExeDomain = SSEPackedSingle in { defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1>; } -let Predicates = [UseAVX] in { - // If we're inserting an element from a vbroadcast of a load, fold the - // load into the X86insertps instruction. - def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), - (X86VBroadcast (loadf32 addr:$src2)), imm:$src3)), - (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>; - def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), - (X86VBroadcast (loadv4f32 addr:$src2)), imm:$src3)), - (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>; -} - //===----------------------------------------------------------------------===// // SSE4.1 - Round Instructions //===----------------------------------------------------------------------===// @@ -5348,7 +5352,7 @@ multiclass sse41_fp_unop_p opc, string OpcodeStr, (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (VT (OpNode RC:$src1, imm:$src2)))]>, + [(set RC:$dst, (VT (OpNode RC:$src1, timm:$src2)))]>, Sched<[sched]>; // Vector intrinsic operation, mem @@ -5357,13 +5361,13 @@ multiclass sse41_fp_unop_p opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, - (VT (OpNode (mem_frag addr:$src1),imm:$src2)))]>, + (VT (OpNode (mem_frag addr:$src1), timm:$src2)))]>, Sched<[sched.Folded]>; } multiclass avx_fp_unop_rm opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched> { -let ExeDomain = SSEPackedSingle, hasSideEffects = 0 in { +let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { def SSr : SS4AIi8, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedSingle, hasSideEffects = 0 -let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in { +let ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in { def SDr : SS4AIi8 opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched> { -let ExeDomain = SSEPackedSingle, hasSideEffects = 0 in { +let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { def SSr : SS4AIi8, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedSingle, hasSideEffects = 0 -let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in { +let ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in { def SDr : SS4AIi8 opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched, ValueType VT32, ValueType VT64, SDNode OpNode, bit Is2Addr = 1> { -let ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 in { +let ExeDomain = SSEPackedSingle in { def SSr_Int : SS4AIi8, + [(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>, Sched<[sched]>; def SSm_Int : SS4AIi8, + (OpNode VR128:$src1, sse_load_f32:$src2, timm:$src3))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 -let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in { +let ExeDomain = SSEPackedDouble in { def SDr_Int : SS4AIi8, + [(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>, Sched<[sched]>; def SDm_Int : SS4AIi8, + (OpNode VR128:$src1, sse_load_f64:$src2, timm:$src3))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 } @@ -5508,17 +5512,17 @@ let Predicates = [UseAVX] in { } let Predicates = [UseAVX] in { - def : Pat<(X86VRndScale FR32:$src1, imm:$src2), - (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, imm:$src2)>; - def : Pat<(X86VRndScale FR64:$src1, imm:$src2), - (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, imm:$src2)>; + def : Pat<(X86VRndScale FR32:$src1, timm:$src2), + (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>; + def : Pat<(X86VRndScale FR64:$src1, timm:$src2), + (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>; } let Predicates = [UseAVX, OptForSize] in { - def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2), - (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>; - def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2), - (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>; + def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2), + (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; + def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2), + (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; } let ExeDomain = SSEPackedSingle in @@ -5535,17 +5539,17 @@ defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl, v4f32, v2f64, X86RndScales>; let Predicates = [UseSSE41] in { - def : Pat<(X86VRndScale FR32:$src1, imm:$src2), - (ROUNDSSr FR32:$src1, imm:$src2)>; - def : Pat<(X86VRndScale FR64:$src1, imm:$src2), - (ROUNDSDr FR64:$src1, imm:$src2)>; + def : Pat<(X86VRndScale FR32:$src1, timm:$src2), + (ROUNDSSr FR32:$src1, timm:$src2)>; + def : Pat<(X86VRndScale FR64:$src1, timm:$src2), + (ROUNDSDr FR64:$src1, timm:$src2)>; } let Predicates = [UseSSE41, OptForSize] in { - def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2), - (ROUNDSSm addr:$src1, imm:$src2)>; - def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2), - (ROUNDSDm addr:$src1, imm:$src2)>; + def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2), + (ROUNDSSm addr:$src1, timm:$src2)>; + def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2), + (ROUNDSDm addr:$src1, timm:$src2)>; } //===----------------------------------------------------------------------===// @@ -5826,7 +5830,7 @@ multiclass SS41I_binop_rmi_int opc, string OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>, + [(set RC:$dst, (IntId RC:$src1, RC:$src2, timm:$src3))]>, Sched<[sched]>; def rmi : SS4AIi8 opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set RC:$dst, - (IntId RC:$src1, (memop_frag addr:$src2), imm:$src3))]>, + (IntId RC:$src1, (memop_frag addr:$src2), timm:$src3))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -5853,7 +5857,7 @@ multiclass SS41I_binop_rmi opc, string OpcodeStr, SDNode OpNode, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>, + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, Sched<[sched]>; def rmi : SS4AIi8 opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set RC:$dst, - (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>, + (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } -def BlendCommuteImm2 : SDNodeXFormgetZExtValue() & 0x03; return getI8Imm(Imm ^ 0x03, SDLoc(N)); }]>; -def BlendCommuteImm4 : SDNodeXFormgetZExtValue() & 0x0f; return getI8Imm(Imm ^ 0x0f, SDLoc(N)); }]>; -def BlendCommuteImm8 : SDNodeXFormgetZExtValue() & 0xff; return getI8Imm(Imm ^ 0xff, SDLoc(N)); }]>; // Turn a 4-bit blendi immediate to 8-bit for use with pblendw. -def BlendScaleImm4 : SDNodeXFormgetZExtValue(); uint8_t NewImm = 0; for (unsigned i = 0; i != 4; ++i) { @@ -5894,7 +5898,7 @@ def BlendScaleImm4 : SDNodeXForm; // Turn a 2-bit blendi immediate to 8-bit for use with pblendw. -def BlendScaleImm2 : SDNodeXFormgetZExtValue(); uint8_t NewImm = 0; for (unsigned i = 0; i != 2; ++i) { @@ -5905,7 +5909,7 @@ def BlendScaleImm2 : SDNodeXForm; // Turn a 2-bit blendi immediate to 4-bit for use with pblendd. -def BlendScaleImm2to4 : SDNodeXFormgetZExtValue(); uint8_t NewImm = 0; for (unsigned i = 0; i != 2; ++i) { @@ -5916,7 +5920,7 @@ def BlendScaleImm2to4 : SDNodeXForm; // Turn a 4-bit blendi immediate to 8-bit for use with pblendw and invert it. -def BlendScaleCommuteImm4 : SDNodeXFormgetZExtValue(); uint8_t NewImm = 0; for (unsigned i = 0; i != 4; ++i) { @@ -5927,7 +5931,7 @@ def BlendScaleCommuteImm4 : SDNodeXForm; // Turn a 2-bit blendi immediate to 8-bit for use with pblendw and invert it. -def BlendScaleCommuteImm2 : SDNodeXFormgetZExtValue(); uint8_t NewImm = 0; for (unsigned i = 0; i != 2; ++i) { @@ -5938,7 +5942,7 @@ def BlendScaleCommuteImm2 : SDNodeXForm; // Turn a 2-bit blendi immediate to 4-bit for use with pblendd and invert it. -def BlendScaleCommuteImm2to4 : SDNodeXFormgetZExtValue(); uint8_t NewImm = 0; for (unsigned i = 0; i != 2; ++i) { @@ -6008,7 +6012,7 @@ let ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in { "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>, + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, Sched<[sched]>; def rmi : SS4AIi8, + (OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>; } // Pattern to commute if load is in first source. - def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, imm:$src3)), + def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, timm:$src3)), (!cast(NAME#"rmi") RC:$src1, addr:$src2, - (commuteXForm imm:$src3))>; + (commuteXForm timm:$src3))>; } let Predicates = [HasAVX] in { @@ -6061,37 +6065,37 @@ let Predicates = [HasAVX2] in { // Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw. // ExecutionDomainFixPass will cleanup domains later on. let Predicates = [HasAVX1Only] in { -def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3), - (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$src3)>; -def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3), - (VBLENDPDYrmi VR256:$src1, addr:$src2, imm:$src3)>; -def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3), - (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>; +def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3), + (VBLENDPDYrri VR256:$src1, VR256:$src2, timm:$src3)>; +def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3), + (VBLENDPDYrmi VR256:$src1, addr:$src2, timm:$src3)>; +def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3), + (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 timm:$src3))>; // Use pblendw for 128-bit integer to keep it in the integer domain and prevent // it from becoming movsd via commuting under optsize. -def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3), - (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>; -def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3), - (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>; -def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3), - (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>; +def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), + (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>; +def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3), + (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>; +def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3), + (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>; -def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), imm:$src3), - (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$src3)>; -def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), imm:$src3), - (VBLENDPSYrmi VR256:$src1, addr:$src2, imm:$src3)>; -def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, imm:$src3), - (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 imm:$src3))>; +def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), timm:$src3), + (VBLENDPSYrri VR256:$src1, VR256:$src2, timm:$src3)>; +def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), timm:$src3), + (VBLENDPSYrmi VR256:$src1, addr:$src2, timm:$src3)>; +def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, timm:$src3), + (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 timm:$src3))>; // Use pblendw for 128-bit integer to keep it in the integer domain and prevent // it from becoming movss via commuting under optsize. -def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3), - (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>; -def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), imm:$src3), - (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>; -def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, imm:$src3), - (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>; +def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3), + (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>; +def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), timm:$src3), + (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; +def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, timm:$src3), + (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; } defm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32, @@ -6107,19 +6111,19 @@ defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16, let Predicates = [UseSSE41] in { // Use pblendw for 128-bit integer to keep it in the integer domain and prevent // it from becoming movss via commuting under optsize. -def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3), - (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>; -def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), imm:$src3), - (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>; -def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, imm:$src3), - (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>; +def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), + (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 timm:$src3))>; +def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), timm:$src3), + (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 timm:$src3))>; +def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, timm:$src3), + (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 timm:$src3))>; -def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3), - (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>; -def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), imm:$src3), - (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>; -def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, imm:$src3), - (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>; +def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3), + (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 timm:$src3))>; +def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), timm:$src3), + (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; +def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, timm:$src3), + (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; } // For insertion into the zero index (low half) of a 256-bit vector, it is @@ -6592,7 +6596,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (int_x86_sha1rnds4 VR128:$src1, VR128:$src2, - (i8 imm:$src3)))]>, TA, + (i8 timm:$src3)))]>, TA, Sched<[SchedWriteVecIMul.XMM]>; def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, u8imm:$src3), @@ -6600,7 +6604,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { [(set VR128:$dst, (int_x86_sha1rnds4 VR128:$src1, (memop addr:$src2), - (i8 imm:$src3)))]>, TA, + (i8 timm:$src3)))]>, TA, Sched<[SchedWriteVecIMul.XMM.Folded, SchedWriteVecIMul.XMM.ReadAfterFold]>; @@ -6718,26 +6722,26 @@ let Predicates = [HasAVX, HasAES] in { (ins VR128:$src1, u8imm:$src2), "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, + (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>, Sched<[WriteAESKeyGen]>, VEX, VEX_WIG; def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist (load addr:$src1), imm:$src2))]>, + (int_x86_aesni_aeskeygenassist (load addr:$src1), timm:$src2))]>, Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG; } def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, + (int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>, Sched<[WriteAESKeyGen]>; def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist (memop addr:$src1), imm:$src2))]>, + (int_x86_aesni_aeskeygenassist (memop addr:$src1), timm:$src2))]>, Sched<[WriteAESKeyGen.Folded]>; //===----------------------------------------------------------------------===// @@ -6745,7 +6749,7 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), //===----------------------------------------------------------------------===// // Immediate transform to help with commuting. -def PCLMULCommuteImm : SDNodeXFormgetZExtValue(); return getI8Imm((uint8_t)((Imm >> 4) | (Imm << 4)), SDLoc(N)); }]>; @@ -6758,7 +6762,7 @@ let Predicates = [NoAVX, HasPCLMUL] in { (ins VR128:$src1, VR128:$src2, u8imm:$src3), "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, - (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>, + (int_x86_pclmulqdq VR128:$src1, VR128:$src2, timm:$src3))]>, Sched<[WriteCLMul]>; def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), @@ -6766,14 +6770,14 @@ let Predicates = [NoAVX, HasPCLMUL] in { "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, (memop addr:$src2), - imm:$src3))]>, + timm:$src3))]>, Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; } // Constraints = "$src1 = $dst" def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1, - (i8 imm:$src3)), + (i8 timm:$src3)), (PCLMULQDQrm VR128:$src1, addr:$src2, - (PCLMULCommuteImm imm:$src3))>; + (PCLMULCommuteImm timm:$src3))>; } // Predicates = [NoAVX, HasPCLMUL] // SSE aliases @@ -6795,21 +6799,21 @@ multiclass vpclmulqdq, + (IntId RC:$src1, RC:$src2, timm:$src3))]>, Sched<[WriteCLMul]>; def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, MemOp:$src2, u8imm:$src3), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set RC:$dst, - (IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>, + (IntId RC:$src1, (LdFrag addr:$src2), timm:$src3))]>, Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; // We can commute a load in the first operand by swapping the sources and // rotating the immediate. - def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 imm:$src3)), + def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 timm:$src3)), (!cast(NAME#"rm") RC:$src1, addr:$src2, - (PCLMULCommuteImm imm:$src3))>; + (PCLMULCommuteImm timm:$src3))>; } let Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in @@ -6853,8 +6857,8 @@ let Constraints = "$src = $dst" in { def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst), (ins VR128:$src, u8imm:$len, u8imm:$idx), "extrq\t{$idx, $len, $src|$src, $len, $idx}", - [(set VR128:$dst, (X86extrqi VR128:$src, imm:$len, - imm:$idx))]>, + [(set VR128:$dst, (X86extrqi VR128:$src, timm:$len, + timm:$idx))]>, PD, Sched<[SchedWriteVecALU.XMM]>; def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$mask), @@ -6867,7 +6871,7 @@ def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx), "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2, - imm:$len, imm:$idx))]>, + timm:$len, timm:$idx))]>, XD, Sched<[SchedWriteVecALU.XMM]>; def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$mask), @@ -6907,10 +6911,10 @@ def : Pat<(nontemporalstore FR64:$src, addr:$dst), // class avx_broadcast_rm opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop, ValueType VT, - PatFrag ld_frag, SchedWrite Sched> : + PatFrag bcast_frag, SchedWrite Sched> : AVX8I, + [(set RC:$dst, (VT (bcast_frag addr:$src)))]>, Sched<[Sched]>, VEX; // AVX2 adds register forms @@ -6923,15 +6927,15 @@ class avx2_broadcast_rr opc, string OpcodeStr, RegisterClass RC, let ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in { def VBROADCASTSSrm : avx_broadcast_rm<0x18, "vbroadcastss", VR128, - f32mem, v4f32, loadf32, + f32mem, v4f32, X86VBroadcastld32, SchedWriteFShuffle.XMM.Folded>; def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256, - f32mem, v8f32, loadf32, + f32mem, v8f32, X86VBroadcastld32, SchedWriteFShuffle.XMM.Folded>, VEX_L; } let ExeDomain = SSEPackedDouble, Predicates = [HasAVX, NoVLX] in def VBROADCASTSDYrm : avx_broadcast_rm<0x19, "vbroadcastsd", VR256, f64mem, - v4f64, loadf64, + v4f64, X86VBroadcastld64, SchedWriteFShuffle.XMM.Folded>, VEX_L; let ExeDomain = SSEPackedSingle, Predicates = [HasAVX2, NoVLX] in { @@ -6944,15 +6948,6 @@ let ExeDomain = SSEPackedDouble, Predicates = [HasAVX2, NoVLX] in def VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256, v4f64, v2f64, WriteFShuffle256>, VEX_L; -let Predicates = [HasAVX, NoVLX] in { - def : Pat<(v4f32 (X86VBroadcast (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (VBROADCASTSSrm addr:$src)>; - def : Pat<(v8f32 (X86VBroadcast (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (VBROADCASTSSYrm addr:$src)>; - def : Pat<(v4f64 (X86VBroadcast (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (VBROADCASTSDYrm addr:$src)>; -} - //===----------------------------------------------------------------------===// // VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both // halves of a 256-bit vector. @@ -7081,27 +7076,29 @@ let Predicates = [HasAVX1Only] in { // multiclass avx_movmask_rm opc_rm, bits<8> opc_mr, string OpcodeStr, Intrinsic IntLd, Intrinsic IntLd256, - Intrinsic IntSt, Intrinsic IntSt256> { + Intrinsic IntSt, Intrinsic IntSt256, + X86SchedWriteMaskMove schedX, + X86SchedWriteMaskMove schedY> { def rm : AVX8I, - VEX_4V, Sched<[WriteFMaskedLoad]>; + VEX_4V, Sched<[schedX.RM]>; def Yrm : AVX8I, - VEX_4V, VEX_L, Sched<[WriteFMaskedLoadY]>; + VEX_4V, VEX_L, Sched<[schedY.RM]>; def mr : AVX8I, - VEX_4V, Sched<[WriteFMaskedStore]>; + VEX_4V, Sched<[schedX.MR]>; def Ymr : AVX8I, - VEX_4V, VEX_L, Sched<[WriteFMaskedStoreY]>; + VEX_4V, VEX_L, Sched<[schedY.MR]>; } let ExeDomain = SSEPackedSingle in @@ -7109,13 +7106,15 @@ defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps", int_x86_avx_maskload_ps, int_x86_avx_maskload_ps_256, int_x86_avx_maskstore_ps, - int_x86_avx_maskstore_ps_256>; + int_x86_avx_maskstore_ps_256, + WriteFMaskMove32, WriteFMaskMove32Y>; let ExeDomain = SSEPackedDouble in defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", int_x86_avx_maskload_pd, int_x86_avx_maskload_pd_256, int_x86_avx_maskstore_pd, - int_x86_avx_maskstore_pd_256>; + int_x86_avx_maskstore_pd_256, + WriteFMaskMove64, WriteFMaskMove64Y>; //===----------------------------------------------------------------------===// // VPERMIL - Permute Single and Double Floating-Point Values @@ -7143,13 +7142,13 @@ multiclass avx_permil opc_rm, bits<8> opc_rmi, string OpcodeStr, def ri : AVXAIi8, VEX, + [(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 timm:$src2))))]>, VEX, Sched<[sched]>; def mi : AVXAIi8, VEX, + (f_vt (X86VPermilpi (load addr:$src1), (i8 timm:$src2))))]>, VEX, Sched<[sched.Folded]>; }// Predicates = [HasAVX, NoVLX] } @@ -7181,38 +7180,38 @@ def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, u8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, - (i8 imm:$src3))))]>, VEX_4V, VEX_L, + (i8 timm:$src3))))]>, VEX_4V, VEX_L, Sched<[WriteFShuffle256]>; def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, u8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4f64 addr:$src2), - (i8 imm:$src3)))]>, VEX_4V, VEX_L, + (i8 timm:$src3)))]>, VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>; } // Immediate transform to help with commuting. -def Perm2XCommuteImm : SDNodeXFormgetZExtValue() ^ 0x22, SDLoc(N)); }]>; let Predicates = [HasAVX] in { // Pattern with load in other operand. def : Pat<(v4f64 (X86VPerm2x128 (loadv4f64 addr:$src2), - VR256:$src1, (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>; + VR256:$src1, (i8 timm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm timm:$imm))>; } let Predicates = [HasAVX1Only] in { -def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, timm:$imm)>; def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, - (loadv4i64 addr:$src2), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; + (loadv4i64 addr:$src2), (i8 timm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, timm:$imm)>; // Pattern with load in other operand. def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2), - VR256:$src1, (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>; + VR256:$src1, (i8 timm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm timm:$imm))>; } //===----------------------------------------------------------------------===// @@ -7257,7 +7256,7 @@ multiclass f16c_ps2ph, + [(set VR128:$dst, (X86cvtps2ph RC:$src1, timm:$src2))]>, TAPD, VEX, Sched<[RR]>; let hasSideEffects = 0, mayStore = 1 in def mr : Ii8<0x1D, MRMDestMem, (outs), @@ -7282,15 +7281,15 @@ let Predicates = [HasF16C, NoVLX] in { (VCVTPH2PSrm addr:$src)>; def : Pat<(store (f64 (extractelt - (bc_v2f64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))), + (bc_v2f64 (v8i16 (X86cvtps2ph VR128:$src1, timm:$src2))), (iPTR 0))), addr:$dst), - (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; + (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>; def : Pat<(store (i64 (extractelt - (bc_v2i64 (v8i16 (X86cvtps2ph VR128:$src1, i32:$src2))), + (bc_v2i64 (v8i16 (X86cvtps2ph VR128:$src1, timm:$src2))), (iPTR 0))), addr:$dst), - (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; - def : Pat<(store (v8i16 (X86cvtps2ph VR256:$src1, i32:$src2)), addr:$dst), - (VCVTPS2PHYmr addr:$dst, VR256:$src1, imm:$src2)>; + (VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>; + def : Pat<(store (v8i16 (X86cvtps2ph VR256:$src1, timm:$src2)), addr:$dst), + (VCVTPS2PHYmr addr:$dst, VR256:$src1, timm:$src2)>; } // Patterns for matching conversions from float to half-float and vice versa. @@ -7327,20 +7326,20 @@ multiclass AVX2_blend_rmi opc, string OpcodeStr, SDNode OpNode, (ins RC:$src1, RC:$src2, u8imm:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>, + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>, Sched<[sched]>, VEX_4V; def rmi : AVX2AIi8, + (OpVT (OpNode RC:$src1, (load addr:$src2), timm:$src3)))]>, Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V; // Pattern to commute if load is in first source. - def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, imm:$src3)), + def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, timm:$src3)), (!cast(NAME#"rmi") RC:$src1, addr:$src2, - (commuteXForm imm:$src3))>; + (commuteXForm timm:$src3))>; } let Predicates = [HasAVX2] in { @@ -7351,19 +7350,19 @@ defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32, SchedWriteBlend.YMM, VR256, i256mem, BlendCommuteImm8>, VEX_L; -def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3), - (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 imm:$src3))>; -def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3), - (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>; -def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3), - (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>; +def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3), + (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 timm:$src3))>; +def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3), + (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 timm:$src3))>; +def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3), + (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 timm:$src3))>; -def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3), - (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 imm:$src3))>; -def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3), - (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 imm:$src3))>; -def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3), - (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 imm:$src3))>; +def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3), + (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 timm:$src3))>; +def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3), + (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 timm:$src3))>; +def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3), + (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 timm:$src3))>; } // For insertion into the zero index (low half) of a 256-bit vector, it is @@ -7407,7 +7406,7 @@ def : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0 // destination operand // multiclass avx2_broadcast opc, string OpcodeStr, - X86MemOperand x86memop, PatFrag ld_frag, + X86MemOperand x86memop, PatFrag bcast_frag, ValueType OpVT128, ValueType OpVT256, Predicate prd> { let Predicates = [HasAVX2, prd] in { def rr : AVX28I opc, string OpcodeStr, def rm : AVX28I, + (OpVT128 (bcast_frag addr:$src)))]>, Sched<[SchedWriteShuffle.XMM.Folded]>, VEX; def Yrr : AVX28I opc, string OpcodeStr, def Yrm : AVX28I, + (OpVT256 (bcast_frag addr:$src)))]>, Sched<[SchedWriteShuffle.XMM.Folded]>, VEX, VEX_L; // Provide aliases for broadcast from the same register class that @@ -7439,13 +7438,13 @@ multiclass avx2_broadcast opc, string OpcodeStr, } } -defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8, +defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, X86VBroadcastld8, v16i8, v32i8, NoVLX_Or_NoBWI>; -defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16, +defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, X86VBroadcastld16, v8i16, v16i16, NoVLX_Or_NoBWI>; -defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32, +defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, X86VBroadcastld32, v4i32, v8i32, NoVLX>; -defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, +defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, X86VBroadcastld64, v2i64, v4i64, NoVLX>; let Predicates = [HasAVX2, NoVLX] in { @@ -7455,14 +7454,11 @@ let Predicates = [HasAVX2, NoVLX] in { def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), (VPBROADCASTQYrm addr:$src)>; - def : Pat<(v4i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))), + // FIXME this is to handle aligned extloads from i8/i16. + def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), (VPBROADCASTDrm addr:$src)>; - def : Pat<(v8i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))), + def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), (VPBROADCASTDYrm addr:$src)>; - def : Pat<(v2i64 (X86VBroadcast (v2i64 (scalar_to_vector (loadi64 addr:$src))))), - (VPBROADCASTQrm addr:$src)>; - def : Pat<(v4i64 (X86VBroadcast (v2i64 (scalar_to_vector (loadi64 addr:$src))))), - (VPBROADCASTQYrm addr:$src)>; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. @@ -7483,17 +7479,12 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (zextloadi16 addr:$src)))))), (VPBROADCASTWYrm addr:$src)>; -} -let Predicates = [HasAVX2, NoVLX] in { - // Provide aliases for broadcast from the same register class that - // automatically does the extract. - def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))), - (VBROADCASTSSYrr (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), - sub_xmm)))>; - def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256:$src))), - (VBROADCASTSDYrr (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), - sub_xmm)))>; + // FIXME this is to handle aligned extloads from i8. + def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))), + (VPBROADCASTWrm addr:$src)>; + def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))), + (VPBROADCASTWYrm addr:$src)>; } let Predicates = [HasAVX2, NoVLX] in { @@ -7509,45 +7500,41 @@ let Predicates = [HasAVX2, NoVLX] in { let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { def : Pat<(v16i8 (X86VBroadcast GR8:$src)), - (VPBROADCASTBrr (v16i8 (COPY_TO_REGCLASS + (VPBROADCASTBrr (VMOVDI2PDIrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), - GR8:$src, sub_8bit)), - VR128)))>; + GR8:$src, sub_8bit))))>; def : Pat<(v32i8 (X86VBroadcast GR8:$src)), - (VPBROADCASTBYrr (v16i8 (COPY_TO_REGCLASS + (VPBROADCASTBYrr (VMOVDI2PDIrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), - GR8:$src, sub_8bit)), - VR128)))>; + GR8:$src, sub_8bit))))>; def : Pat<(v8i16 (X86VBroadcast GR16:$src)), - (VPBROADCASTWrr (v8i16 (COPY_TO_REGCLASS + (VPBROADCASTWrr (VMOVDI2PDIrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), - GR16:$src, sub_16bit)), - VR128)))>; + GR16:$src, sub_16bit))))>; def : Pat<(v16i16 (X86VBroadcast GR16:$src)), - (VPBROADCASTWYrr (v8i16 (COPY_TO_REGCLASS + (VPBROADCASTWYrr (VMOVDI2PDIrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), - GR16:$src, sub_16bit)), - VR128)))>; + GR16:$src, sub_16bit))))>; } let Predicates = [HasAVX2, NoVLX] in { def : Pat<(v4i32 (X86VBroadcast GR32:$src)), - (VPBROADCASTDrr (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)))>; + (VPBROADCASTDrr (VMOVDI2PDIrr GR32:$src))>; def : Pat<(v8i32 (X86VBroadcast GR32:$src)), - (VPBROADCASTDYrr (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)))>; + (VPBROADCASTDYrr (VMOVDI2PDIrr GR32:$src))>; def : Pat<(v2i64 (X86VBroadcast GR64:$src)), - (VPBROADCASTQrr (v2i64 (COPY_TO_REGCLASS GR64:$src, VR128)))>; + (VPBROADCASTQrr (VMOV64toPQIrr GR64:$src))>; def : Pat<(v4i64 (X86VBroadcast GR64:$src)), - (VPBROADCASTQYrr (v2i64 (COPY_TO_REGCLASS GR64:$src, VR128)))>; + (VPBROADCASTQYrr (VMOV64toPQIrr GR64:$src))>; } // AVX1 broadcast patterns let Predicates = [HasAVX1Only] in { -def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), +def : Pat<(v8i32 (X86VBroadcastld32 addr:$src)), (VBROADCASTSSYrm addr:$src)>; -def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), +def : Pat<(v4i64 (X86VBroadcastld64 addr:$src)), (VBROADCASTSDYrm addr:$src)>; -def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), +def : Pat<(v4i32 (X86VBroadcastld32 addr:$src)), (VBROADCASTSSrm addr:$src)>; } @@ -7557,12 +7544,12 @@ let Predicates = [HasAVX, NoVLX] in { // 128bit broadcasts: def : Pat<(v2f64 (X86VBroadcast f64:$src)), (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))>; - def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), + def : Pat<(v2f64 (X86VBroadcastld64 addr:$src)), (VMOVDDUPrm addr:$src)>; def : Pat<(v2f64 (X86VBroadcast v2f64:$src)), (VMOVDDUPrr VR128:$src)>; - def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))), + def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))), (VMOVDDUPrm addr:$src)>; def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))), (VMOVDDUPrm addr:$src)>; @@ -7581,19 +7568,19 @@ let Predicates = [HasAVX1Only] in { (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), 1)>; def : Pat<(v4i32 (X86VBroadcast GR32:$src)), - (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)), 0)>; + (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)>; def : Pat<(v8i32 (X86VBroadcast GR32:$src)), (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), - (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)), 0)), sub_xmm), - (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR32:$src, VR128)), 0)), 1)>; + (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), sub_xmm), + (v4i32 (VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)), 1)>; def : Pat<(v4i64 (X86VBroadcast GR64:$src)), (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), - (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR64:$src, VR128)), 0x44)), sub_xmm), - (v4i32 (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR64:$src, VR128)), 0x44)), 1)>; + (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), sub_xmm), + (v4i32 (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)), 1)>; def : Pat<(v2i64 (X86VBroadcast i64:$src)), - (VPSHUFDri (v4i32 (COPY_TO_REGCLASS GR64:$src, VR128)), 0x44)>; - def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))), + (VPSHUFDri (VMOV64toPQIrr GR64:$src), 0x44)>; + def : Pat<(v2i64 (X86VBroadcastld64 addr:$src)), (VMOVDDUPrm addr:$src)>; } @@ -7636,7 +7623,7 @@ multiclass avx2_perm_imm opc, string OpcodeStr, PatFrag mem_frag, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, - (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, + (OpVT (X86VPermi VR256:$src1, (i8 timm:$src2))))]>, Sched<[Sched]>, VEX, VEX_L; def Ymi : AVX2AIi8 opc, string OpcodeStr, PatFrag mem_frag, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (OpVT (X86VPermi (mem_frag addr:$src1), - (i8 imm:$src2))))]>, + (i8 timm:$src2))))]>, Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L; } } @@ -7663,19 +7650,19 @@ def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, u8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, - (i8 imm:$src3))))]>, Sched<[WriteShuffle256]>, + (i8 timm:$src3))))]>, Sched<[WriteShuffle256]>, VEX_4V, VEX_L; def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, u8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2), - (i8 imm:$src3)))]>, + (i8 timm:$src3)))]>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; let Predicates = [HasAVX2] in def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2), - VR256:$src1, (i8 imm:$imm))), - (VPERM2I128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>; + VR256:$src1, (i8 timm:$imm))), + (VPERM2I128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm timm:$imm))>; //===----------------------------------------------------------------------===// @@ -7760,7 +7747,7 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", int_x86_avx2_maskstore_q_256>, VEX_W; multiclass maskmov_lowering { + ValueType MaskVT> { // masked store def: Pat<(masked_store (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)), (!cast(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>; @@ -7772,23 +7759,23 @@ multiclass maskmov_lowering(InstrStr#"rm") RC:$mask, addr:$ptr)>; } let Predicates = [HasAVX] in { - defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32, "VBLENDVPS", v4i32>; - defm : maskmov_lowering<"VMASKMOVPD", VR128, v2f64, v2i64, "VBLENDVPD", v4i32>; - defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32, "VBLENDVPSY", v8i32>; - defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64, "VBLENDVPDY", v8i32>; + defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32>; + defm : maskmov_lowering<"VMASKMOVPD", VR128, v2f64, v2i64>; + defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8f32, v8i32>; + defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4f64, v4i64>; } let Predicates = [HasAVX1Only] in { // load/store i32/i64 not supported use ps/pd version - defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8i32, v8i32, "VBLENDVPSY", v8i32>; - defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4i64, v4i64, "VBLENDVPDY", v8i32>; - defm : maskmov_lowering<"VMASKMOVPS", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>; - defm : maskmov_lowering<"VMASKMOVPD", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>; + defm : maskmov_lowering<"VMASKMOVPSY", VR256, v8i32, v8i32>; + defm : maskmov_lowering<"VMASKMOVPDY", VR256, v4i64, v4i64>; + defm : maskmov_lowering<"VMASKMOVPS", VR128, v4i32, v4i32>; + defm : maskmov_lowering<"VMASKMOVPD", VR128, v2i64, v2i64>; } let Predicates = [HasAVX2] in { - defm : maskmov_lowering<"VPMASKMOVDY", VR256, v8i32, v8i32, "VBLENDVPSY", v8i32>; - defm : maskmov_lowering<"VPMASKMOVQY", VR256, v4i64, v4i64, "VBLENDVPDY", v8i32>; - defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>; - defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>; + defm : maskmov_lowering<"VPMASKMOVDY", VR256, v8i32, v8i32>; + defm : maskmov_lowering<"VPMASKMOVQY", VR256, v4i64, v4i64>; + defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32>; + defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64>; } //===----------------------------------------------------------------------===// @@ -7956,13 +7943,13 @@ multiclass GF2P8AFFINE_rmi Op, string OpStr, ValueType OpVT, OpStr##"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in { def rri : Ii8, Sched<[SchedWriteVecALU.XMM]>; def rmi : Ii8, + timm:$src3)))], SSEPackedInt>, Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>; } } diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 7050e191749..7f41feb6c0d 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -43,7 +43,7 @@ def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))]>; let SchedRW = [WriteSystem] in { def INT : Ii8<0xcd, RawFrm, (outs), (ins u8imm:$trap), "int\t$trap", - [(int_x86_int imm:$trap)]>; + [(int_x86_int timm:$trap)]>; def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB; diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td index fc0da845299..3a1212342a1 100644 --- a/lib/Target/X86/X86InstrTSX.td +++ b/lib/Target/X86/X86InstrTSX.td @@ -45,7 +45,7 @@ def XTEST : I<0x01, MRM_D6, (outs), (ins), def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm), "xabort\t$imm", - [(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>; + [(int_x86_xabort timm:$imm)]>, Requires<[HasRTM]>; } // SchedRW // HLE prefixes diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td index 66ca78556b8..229af366d94 100644 --- a/lib/Target/X86/X86InstrXOP.td +++ b/lib/Target/X86/X86InstrXOP.td @@ -143,13 +143,13 @@ multiclass xop3opimm opc, string OpcodeStr, SDNode OpNode, (ins VR128:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, - (vt128 (OpNode (vt128 VR128:$src1), imm:$src2)))]>, + (vt128 (OpNode (vt128 VR128:$src1), timm:$src2)))]>, XOP, Sched<[sched]>; def mi : IXOPi8, + (vt128 (OpNode (vt128 (load addr:$src1)), timm:$src2)))]>, XOP, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -251,7 +251,7 @@ multiclass xopvpcom opc, string Suffix, SDNode OpNode, ValueType vt128, "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [(set VR128:$dst, (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), - imm:$cc)))]>, + timm:$cc)))]>, XOP_4V, Sched<[sched]>; def mi : IXOPi8 opc, string Suffix, SDNode OpNode, ValueType vt128, [(set VR128:$dst, (vt128 (OpNode (vt128 VR128:$src1), (vt128 (load addr:$src2)), - imm:$cc)))]>, + timm:$cc)))]>, XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; } def : Pat<(OpNode (load addr:$src2), - (vt128 VR128:$src1), imm:$cc), + (vt128 VR128:$src1), timm:$cc), (!cast(NAME#"mi") VR128:$src1, addr:$src2, - (CommuteVPCOMCC imm:$cc))>; + (CommuteVPCOMCC timm:$cc))>; } defm VPCOMB : xopvpcom<0xCC, "b", X86vpcom, v16i8, SchedWriteVecALU.XMM>; @@ -418,27 +418,27 @@ multiclass xop_vpermil2 Opc, string OpcodeStr, RegisterClass RC, ValueType VT, PatFrag FPLdFrag, PatFrag IntLdFrag, X86FoldableSchedWrite sched> { def rr : IXOP5, + (VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 timm:$src4))))]>, Sched<[sched]>; def rm : IXOP5, VEX_W, + (i8 timm:$src4))))]>, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; def mr : IXOP5, + RC:$src3, (i8 timm:$src4))))]>, Sched<[sched.Folded, sched.ReadAfterFold, // fpmemop:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, @@ -447,7 +447,7 @@ multiclass xop_vpermil2 Opc, string OpcodeStr, RegisterClass RC, // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def rr_REV : IXOP5, VEX_W, Sched<[sched]>, FoldGenData; diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp index 892a083f4d1..01620b7b64c 100644 --- a/lib/Target/X86/X86InstructionSelector.cpp +++ b/lib/Target/X86/X86InstructionSelector.cpp @@ -60,7 +60,7 @@ public: X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI, const X86RegisterBankInfo &RBI); - bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override; + bool select(MachineInstr &I) override; static const char *getName() { return DEBUG_TYPE; } private: @@ -94,11 +94,9 @@ private: MachineFunction &MF) const; bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI, - MachineFunction &MF, - CodeGenCoverage &CoverageInfo) const; + MachineFunction &MF); bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI, - MachineFunction &MF, - CodeGenCoverage &CoverageInfo) const; + MachineFunction &MF); bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI, @@ -217,7 +215,7 @@ static unsigned getSubRegIndex(const TargetRegisterClass *RC) { } static const TargetRegisterClass *getRegClassFromGRPhysReg(unsigned Reg) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + assert(Register::isPhysicalRegister(Reg)); if (X86::GR64RegClass.contains(Reg)) return &X86::GR64RegClass; if (X86::GR32RegClass.contains(Reg)) @@ -233,15 +231,15 @@ static const TargetRegisterClass *getRegClassFromGRPhysReg(unsigned Reg) { // Set X86 Opcode and constrain DestReg. bool X86InstructionSelector::selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const { - unsigned DstReg = I.getOperand(0).getReg(); + Register DstReg = I.getOperand(0).getReg(); const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); - unsigned SrcReg = I.getOperand(1).getReg(); + Register SrcReg = I.getOperand(1).getReg(); const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { + if (Register::isPhysicalRegister(DstReg)) { assert(I.isCopy() && "Generic operators do not allow physical registers"); if (DstSize > SrcSize && SrcRegBank.getID() == X86::GPRRegBankID && @@ -253,7 +251,7 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I, if (SrcRC != DstRC) { // This case can be generated by ABI lowering, performe anyext - unsigned ExtSrc = MRI.createVirtualRegister(DstRC); + Register ExtSrc = MRI.createVirtualRegister(DstRC); BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::SUBREG_TO_REG)) .addDef(ExtSrc) @@ -268,12 +266,12 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I, return true; } - assert((!TargetRegisterInfo::isPhysicalRegister(SrcReg) || I.isCopy()) && + assert((!Register::isPhysicalRegister(SrcReg) || I.isCopy()) && "No phys reg on generic operators"); assert((DstSize == SrcSize || // Copies are a mean to setup initial types, the number of // bits may not exactly match. - (TargetRegisterInfo::isPhysicalRegister(SrcReg) && + (Register::isPhysicalRegister(SrcReg) && DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI))) && "Copy with different width?!"); @@ -282,7 +280,7 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I, if (SrcRegBank.getID() == X86::GPRRegBankID && DstRegBank.getID() == X86::GPRRegBankID && SrcSize > DstSize && - TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + Register::isPhysicalRegister(SrcReg)) { // Change the physical register to performe truncate. const TargetRegisterClass *SrcRC = getRegClassFromGRPhysReg(SrcReg); @@ -308,8 +306,7 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I, return true; } -bool X86InstructionSelector::select(MachineInstr &I, - CodeGenCoverage &CoverageInfo) const { +bool X86InstructionSelector::select(MachineInstr &I) { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -333,7 +330,7 @@ bool X86InstructionSelector::select(MachineInstr &I, assert(I.getNumOperands() == I.getNumExplicitOperands() && "Generic instruction has unexpected implicit operands\n"); - if (selectImpl(I, CoverageInfo)) + if (selectImpl(I, *CoverageInfo)) return true; LLVM_DEBUG(dbgs() << " C++ instruction selection: "; I.print(dbgs())); @@ -370,10 +367,10 @@ bool X86InstructionSelector::select(MachineInstr &I, case TargetOpcode::G_UADDE: return selectUadde(I, MRI, MF); case TargetOpcode::G_UNMERGE_VALUES: - return selectUnmergeValues(I, MRI, MF, CoverageInfo); + return selectUnmergeValues(I, MRI, MF); case TargetOpcode::G_MERGE_VALUES: case TargetOpcode::G_CONCAT_VECTORS: - return selectMergeValues(I, MRI, MF, CoverageInfo); + return selectMergeValues(I, MRI, MF); case TargetOpcode::G_EXTRACT: return selectExtract(I, MRI, MF); case TargetOpcode::G_INSERT: @@ -512,7 +509,7 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, assert((Opc == TargetOpcode::G_STORE || Opc == TargetOpcode::G_LOAD) && "unexpected instruction"); - const unsigned DefReg = I.getOperand(0).getReg(); + const Register DefReg = I.getOperand(0).getReg(); LLT Ty = MRI.getType(DefReg); const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); @@ -572,7 +569,7 @@ bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I, assert((Opc == TargetOpcode::G_FRAME_INDEX || Opc == TargetOpcode::G_GEP) && "unexpected instruction"); - const unsigned DefReg = I.getOperand(0).getReg(); + const Register DefReg = I.getOperand(0).getReg(); LLT Ty = MRI.getType(DefReg); // Use LEA to calculate frame index and GEP @@ -625,7 +622,7 @@ bool X86InstructionSelector::selectGlobalValue(MachineInstr &I, AM.Base.Reg = X86::RIP; } - const unsigned DefReg = I.getOperand(0).getReg(); + const Register DefReg = I.getOperand(0).getReg(); LLT Ty = MRI.getType(DefReg); unsigned NewOpc = getLeaOP(Ty, STI); @@ -644,7 +641,7 @@ bool X86InstructionSelector::selectConstant(MachineInstr &I, assert((I.getOpcode() == TargetOpcode::G_CONSTANT) && "unexpected instruction"); - const unsigned DefReg = I.getOperand(0).getReg(); + const Register DefReg = I.getOperand(0).getReg(); LLT Ty = MRI.getType(DefReg); if (RBI.getRegBank(DefReg, MRI, TRI)->getID() != X86::GPRRegBankID) @@ -717,8 +714,8 @@ bool X86InstructionSelector::selectTruncOrPtrToInt(MachineInstr &I, I.getOpcode() == TargetOpcode::G_PTRTOINT) && "unexpected instruction"); - const unsigned DstReg = I.getOperand(0).getReg(); - const unsigned SrcReg = I.getOperand(1).getReg(); + const Register DstReg = I.getOperand(0).getReg(); + const Register SrcReg = I.getOperand(1).getReg(); const LLT DstTy = MRI.getType(DstReg); const LLT SrcTy = MRI.getType(SrcReg); @@ -781,8 +778,8 @@ bool X86InstructionSelector::selectZext(MachineInstr &I, MachineFunction &MF) const { assert((I.getOpcode() == TargetOpcode::G_ZEXT) && "unexpected instruction"); - const unsigned DstReg = I.getOperand(0).getReg(); - const unsigned SrcReg = I.getOperand(1).getReg(); + const Register DstReg = I.getOperand(0).getReg(); + const Register SrcReg = I.getOperand(1).getReg(); const LLT DstTy = MRI.getType(DstReg); const LLT SrcTy = MRI.getType(SrcReg); @@ -892,8 +889,8 @@ bool X86InstructionSelector::selectAnyext(MachineInstr &I, MachineFunction &MF) const { assert((I.getOpcode() == TargetOpcode::G_ANYEXT) && "unexpected instruction"); - const unsigned DstReg = I.getOperand(0).getReg(); - const unsigned SrcReg = I.getOperand(1).getReg(); + const Register DstReg = I.getOperand(0).getReg(); + const Register SrcReg = I.getOperand(1).getReg(); const LLT DstTy = MRI.getType(DstReg); const LLT SrcTy = MRI.getType(SrcReg); @@ -952,8 +949,8 @@ bool X86InstructionSelector::selectCmp(MachineInstr &I, std::tie(CC, SwapArgs) = X86::getX86ConditionCode( (CmpInst::Predicate)I.getOperand(1).getPredicate()); - unsigned LHS = I.getOperand(2).getReg(); - unsigned RHS = I.getOperand(3).getReg(); + Register LHS = I.getOperand(2).getReg(); + Register RHS = I.getOperand(3).getReg(); if (SwapArgs) std::swap(LHS, RHS); @@ -998,8 +995,8 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I, MachineFunction &MF) const { assert((I.getOpcode() == TargetOpcode::G_FCMP) && "unexpected instruction"); - unsigned LhsReg = I.getOperand(2).getReg(); - unsigned RhsReg = I.getOperand(3).getReg(); + Register LhsReg = I.getOperand(2).getReg(); + Register RhsReg = I.getOperand(3).getReg(); CmpInst::Predicate Predicate = (CmpInst::Predicate)I.getOperand(1).getPredicate(); @@ -1033,7 +1030,7 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I, break; } - unsigned ResultReg = I.getOperand(0).getReg(); + Register ResultReg = I.getOperand(0).getReg(); RBI.constrainGenericRegister( ResultReg, *getRegClass(LLT::scalar(8), *RBI.getRegBank(ResultReg, MRI, TRI)), MRI); @@ -1043,8 +1040,8 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I, .addReg(LhsReg) .addReg(RhsReg); - unsigned FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass); - unsigned FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass); + Register FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass); + Register FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass); MachineInstr &Set1 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), FlagReg1).addImm(SETFOpc[0]); MachineInstr &Set2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), @@ -1089,11 +1086,11 @@ bool X86InstructionSelector::selectUadde(MachineInstr &I, MachineFunction &MF) const { assert((I.getOpcode() == TargetOpcode::G_UADDE) && "unexpected instruction"); - const unsigned DstReg = I.getOperand(0).getReg(); - const unsigned CarryOutReg = I.getOperand(1).getReg(); - const unsigned Op0Reg = I.getOperand(2).getReg(); - const unsigned Op1Reg = I.getOperand(3).getReg(); - unsigned CarryInReg = I.getOperand(4).getReg(); + const Register DstReg = I.getOperand(0).getReg(); + const Register CarryOutReg = I.getOperand(1).getReg(); + const Register Op0Reg = I.getOperand(2).getReg(); + const Register Op1Reg = I.getOperand(3).getReg(); + Register CarryInReg = I.getOperand(4).getReg(); const LLT DstTy = MRI.getType(DstReg); @@ -1149,8 +1146,8 @@ bool X86InstructionSelector::selectExtract(MachineInstr &I, assert((I.getOpcode() == TargetOpcode::G_EXTRACT) && "unexpected instruction"); - const unsigned DstReg = I.getOperand(0).getReg(); - const unsigned SrcReg = I.getOperand(1).getReg(); + const Register DstReg = I.getOperand(0).getReg(); + const Register SrcReg = I.getOperand(1).getReg(); int64_t Index = I.getOperand(2).getImm(); const LLT DstTy = MRI.getType(DstReg); @@ -1281,9 +1278,9 @@ bool X86InstructionSelector::selectInsert(MachineInstr &I, MachineFunction &MF) const { assert((I.getOpcode() == TargetOpcode::G_INSERT) && "unexpected instruction"); - const unsigned DstReg = I.getOperand(0).getReg(); - const unsigned SrcReg = I.getOperand(1).getReg(); - const unsigned InsertReg = I.getOperand(2).getReg(); + const Register DstReg = I.getOperand(0).getReg(); + const Register SrcReg = I.getOperand(1).getReg(); + const Register InsertReg = I.getOperand(2).getReg(); int64_t Index = I.getOperand(3).getImm(); const LLT DstTy = MRI.getType(DstReg); @@ -1335,14 +1332,13 @@ bool X86InstructionSelector::selectInsert(MachineInstr &I, } bool X86InstructionSelector::selectUnmergeValues( - MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF, - CodeGenCoverage &CoverageInfo) const { + MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) { assert((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES) && "unexpected instruction"); // Split to extracts. unsigned NumDefs = I.getNumOperands() - 1; - unsigned SrcReg = I.getOperand(NumDefs).getReg(); + Register SrcReg = I.getOperand(NumDefs).getReg(); unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits(); for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { @@ -1352,7 +1348,7 @@ bool X86InstructionSelector::selectUnmergeValues( .addReg(SrcReg) .addImm(Idx * DefSize); - if (!select(ExtrInst, CoverageInfo)) + if (!select(ExtrInst)) return false; } @@ -1361,15 +1357,14 @@ bool X86InstructionSelector::selectUnmergeValues( } bool X86InstructionSelector::selectMergeValues( - MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF, - CodeGenCoverage &CoverageInfo) const { + MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) { assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES || I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS) && "unexpected instruction"); // Split to inserts. - unsigned DstReg = I.getOperand(0).getReg(); - unsigned SrcReg0 = I.getOperand(1).getReg(); + Register DstReg = I.getOperand(0).getReg(); + Register SrcReg0 = I.getOperand(1).getReg(); const LLT DstTy = MRI.getType(DstReg); const LLT SrcTy = MRI.getType(SrcReg0); @@ -1378,13 +1373,13 @@ bool X86InstructionSelector::selectMergeValues( const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); // For the first src use insertSubReg. - unsigned DefReg = MRI.createGenericVirtualRegister(DstTy); + Register DefReg = MRI.createGenericVirtualRegister(DstTy); MRI.setRegBank(DefReg, RegBank); if (!emitInsertSubreg(DefReg, I.getOperand(1).getReg(), I, MRI, MF)) return false; for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) { - unsigned Tmp = MRI.createGenericVirtualRegister(DstTy); + Register Tmp = MRI.createGenericVirtualRegister(DstTy); MRI.setRegBank(Tmp, RegBank); MachineInstr &InsertInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), @@ -1395,7 +1390,7 @@ bool X86InstructionSelector::selectMergeValues( DefReg = Tmp; - if (!select(InsertInst, CoverageInfo)) + if (!select(InsertInst)) return false; } @@ -1403,7 +1398,7 @@ bool X86InstructionSelector::selectMergeValues( TII.get(TargetOpcode::COPY), DstReg) .addReg(DefReg); - if (!select(CopyInst, CoverageInfo)) + if (!select(CopyInst)) return false; I.eraseFromParent(); @@ -1415,7 +1410,7 @@ bool X86InstructionSelector::selectCondBranch(MachineInstr &I, MachineFunction &MF) const { assert((I.getOpcode() == TargetOpcode::G_BRCOND) && "unexpected instruction"); - const unsigned CondReg = I.getOperand(0).getReg(); + const Register CondReg = I.getOperand(0).getReg(); MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); MachineInstr &TestInst = @@ -1442,7 +1437,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I, if (CM != CodeModel::Small && CM != CodeModel::Large) return false; - const unsigned DstReg = I.getOperand(0).getReg(); + const Register DstReg = I.getOperand(0).getReg(); const LLT DstTy = MRI.getType(DstReg); const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); unsigned Align = DstTy.getSizeInBits(); @@ -1460,7 +1455,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I, // Under X86-64 non-small code model, GV (and friends) are 64-bits, so // they cannot be folded into immediate fields. - unsigned AddrReg = MRI.createVirtualRegister(&X86::GR64RegClass); + Register AddrReg = MRI.createVirtualRegister(&X86::GR64RegClass); BuildMI(*I.getParent(), I, DbgLoc, TII.get(X86::MOV64ri), AddrReg) .addConstantPoolIndex(CPI, 0, OpFlag); @@ -1503,7 +1498,7 @@ bool X86InstructionSelector::selectImplicitDefOrPHI( I.getOpcode() == TargetOpcode::G_PHI) && "unexpected instruction"); - unsigned DstReg = I.getOperand(0).getReg(); + Register DstReg = I.getOperand(0).getReg(); if (!MRI.getRegClassOrNull(DstReg)) { const LLT DstTy = MRI.getType(DstReg); @@ -1537,7 +1532,7 @@ bool X86InstructionSelector::selectShift(MachineInstr &I, I.getOpcode() == TargetOpcode::G_LSHR) && "unexpected instruction"); - unsigned DstReg = I.getOperand(0).getReg(); + Register DstReg = I.getOperand(0).getReg(); const LLT DstTy = MRI.getType(DstReg); const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); @@ -1578,8 +1573,8 @@ bool X86InstructionSelector::selectShift(MachineInstr &I, return false; } - unsigned Op0Reg = I.getOperand(1).getReg(); - unsigned Op1Reg = I.getOperand(2).getReg(); + Register Op0Reg = I.getOperand(1).getReg(); + Register Op1Reg = I.getOperand(2).getReg(); assert(MRI.getType(Op1Reg).getSizeInBits() == 8); @@ -1606,9 +1601,9 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I, I.getOpcode() == TargetOpcode::G_UREM) && "unexpected instruction"); - const unsigned DstReg = I.getOperand(0).getReg(); - const unsigned Op1Reg = I.getOperand(1).getReg(); - const unsigned Op2Reg = I.getOperand(2).getReg(); + const Register DstReg = I.getOperand(0).getReg(); + const Register Op1Reg = I.getOperand(1).getReg(); + const Register Op2Reg = I.getOperand(2).getReg(); const LLT RegTy = MRI.getType(DstReg); assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) && @@ -1732,7 +1727,7 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I, BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpSignExtend)); else { - unsigned Zero32 = MRI.createVirtualRegister(&X86::GR32RegClass); + Register Zero32 = MRI.createVirtualRegister(&X86::GR32RegClass); BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::MOV32r0), Zero32); @@ -1770,8 +1765,8 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I, if ((I.getOpcode() == Instruction::SRem || I.getOpcode() == Instruction::URem) && OpEntry.DivRemResultReg == X86::AH && STI.is64Bit()) { - unsigned SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass); - unsigned ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass); + Register SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass); + Register ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass); BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), SourceSuperReg) .addReg(X86::AX); diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 40141d89462..1d7adbaa9e9 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -23,7 +23,7 @@ enum IntrinsicType : uint16_t { GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASSS, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP, INTR_TYPE_3OP_IMM8, - CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, BLENDV, + CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, BLENDV, BEXTRI, CVTPD2PS_MASK, INTR_TYPE_1OP_SAE, INTR_TYPE_2OP_SAE, INTR_TYPE_1OP_MASK_SAE, INTR_TYPE_2OP_MASK_SAE, INTR_TYPE_3OP_MASK_SAE, @@ -1101,8 +1101,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0), X86_INTRINSIC_DATA(subborrow_32, ADX, X86ISD::SBB, X86ISD::SUB), X86_INTRINSIC_DATA(subborrow_64, ADX, X86ISD::SBB, X86ISD::SUB), - X86_INTRINSIC_DATA(tbm_bextri_u32, INTR_TYPE_2OP, X86ISD::BEXTR, 0), - X86_INTRINSIC_DATA(tbm_bextri_u64, INTR_TYPE_2OP, X86ISD::BEXTR, 0), + X86_INTRINSIC_DATA(tbm_bextri_u32, BEXTRI, X86ISD::BEXTR, 0), + X86_INTRINSIC_DATA(tbm_bextri_u64, BEXTRI, X86ISD::BEXTR, 0), X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0), X86_INTRINSIC_DATA(vcvtph2ps_256, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0), X86_INTRINSIC_DATA(vcvtps2ph_128, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0), diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp index 00fb1b57385..04121f863c8 100644 --- a/lib/Target/X86/X86LegalizerInfo.cpp +++ b/lib/Target/X86/X86LegalizerInfo.cpp @@ -13,6 +13,7 @@ #include "X86LegalizerInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" @@ -84,6 +85,24 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, verify(*STI.getInstrInfo()); } +bool X86LegalizerInfo::legalizeIntrinsic(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + switch (MI.getIntrinsicID()) { + case Intrinsic::memcpy: + case Intrinsic::memset: + case Intrinsic::memmove: + if (createMemLibcall(MIRBuilder, MRI, MI) == + LegalizerHelper::UnableToLegalize) + return false; + MI.eraseFromParent(); + return true; + default: + break; + } + return true; +} + void X86LegalizerInfo::setLegalizerInfo32bit() { const LLT p0 = LLT::pointer(0, TM.getPointerSizeInBits(0)); @@ -158,6 +177,7 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { setAction({G_ANYEXT, Ty}, Legal); } setAction({G_ANYEXT, s128}, Legal); + getActionDefinitionsBuilder(G_SEXT_INREG).lower(); // Comparison setAction({G_ICMP, s1}, Legal); diff --git a/lib/Target/X86/X86LegalizerInfo.h b/lib/Target/X86/X86LegalizerInfo.h index d21707b9ab9..7a0f13fb5ae 100644 --- a/lib/Target/X86/X86LegalizerInfo.h +++ b/lib/Target/X86/X86LegalizerInfo.h @@ -32,6 +32,9 @@ private: public: X86LegalizerInfo(const X86Subtarget &STI, const X86TargetMachine &TM); + bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const override; + private: void setLegalizerInfo32bit(); void setLegalizerInfo64bit(); diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index b1fefaa84be..78098fd6262 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -427,6 +427,41 @@ X86MCInstLower::LowerMachineOperand(const MachineInstr *MI, } } +// Replace TAILJMP opcodes with their equivalent opcodes that have encoding +// information. +static unsigned convertTailJumpOpcode(unsigned Opcode) { + switch (Opcode) { + case X86::TAILJMPr: + Opcode = X86::JMP32r; + break; + case X86::TAILJMPm: + Opcode = X86::JMP32m; + break; + case X86::TAILJMPr64: + Opcode = X86::JMP64r; + break; + case X86::TAILJMPm64: + Opcode = X86::JMP64m; + break; + case X86::TAILJMPr64_REX: + Opcode = X86::JMP64r_REX; + break; + case X86::TAILJMPm64_REX: + Opcode = X86::JMP64m_REX; + break; + case X86::TAILJMPd: + case X86::TAILJMPd64: + Opcode = X86::JMP_1; + break; + case X86::TAILJMPd_CC: + case X86::TAILJMPd64_CC: + Opcode = X86::JCC_1; + break; + } + + return Opcode; +} + void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); @@ -500,21 +535,190 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { break; } - // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register - // inputs modeled as normal uses instead of implicit uses. As such, truncate - // off all but the first operand (the callee). FIXME: Change isel. - case X86::TAILJMPr64: - case X86::TAILJMPr64_REX: - case X86::CALL64r: - case X86::CALL64pcrel32: { - unsigned Opcode = OutMI.getOpcode(); - MCOperand Saved = OutMI.getOperand(0); - OutMI = MCInst(); - OutMI.setOpcode(Opcode); - OutMI.addOperand(Saved); + case X86::VPCMPBZ128rmi: case X86::VPCMPBZ128rmik: + case X86::VPCMPBZ128rri: case X86::VPCMPBZ128rrik: + case X86::VPCMPBZ256rmi: case X86::VPCMPBZ256rmik: + case X86::VPCMPBZ256rri: case X86::VPCMPBZ256rrik: + case X86::VPCMPBZrmi: case X86::VPCMPBZrmik: + case X86::VPCMPBZrri: case X86::VPCMPBZrrik: + case X86::VPCMPDZ128rmi: case X86::VPCMPDZ128rmik: + case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk: + case X86::VPCMPDZ128rri: case X86::VPCMPDZ128rrik: + case X86::VPCMPDZ256rmi: case X86::VPCMPDZ256rmik: + case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk: + case X86::VPCMPDZ256rri: case X86::VPCMPDZ256rrik: + case X86::VPCMPDZrmi: case X86::VPCMPDZrmik: + case X86::VPCMPDZrmib: case X86::VPCMPDZrmibk: + case X86::VPCMPDZrri: case X86::VPCMPDZrrik: + case X86::VPCMPQZ128rmi: case X86::VPCMPQZ128rmik: + case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk: + case X86::VPCMPQZ128rri: case X86::VPCMPQZ128rrik: + case X86::VPCMPQZ256rmi: case X86::VPCMPQZ256rmik: + case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk: + case X86::VPCMPQZ256rri: case X86::VPCMPQZ256rrik: + case X86::VPCMPQZrmi: case X86::VPCMPQZrmik: + case X86::VPCMPQZrmib: case X86::VPCMPQZrmibk: + case X86::VPCMPQZrri: case X86::VPCMPQZrrik: + case X86::VPCMPWZ128rmi: case X86::VPCMPWZ128rmik: + case X86::VPCMPWZ128rri: case X86::VPCMPWZ128rrik: + case X86::VPCMPWZ256rmi: case X86::VPCMPWZ256rmik: + case X86::VPCMPWZ256rri: case X86::VPCMPWZ256rrik: + case X86::VPCMPWZrmi: case X86::VPCMPWZrmik: + case X86::VPCMPWZrri: case X86::VPCMPWZrrik: { + // Turn immediate 0 into the VPCMPEQ instruction. + if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 0) { + unsigned NewOpc; + switch (OutMI.getOpcode()) { + case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPEQBZ128rm; break; + case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPEQBZ128rmk; break; + case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPEQBZ128rr; break; + case X86::VPCMPBZ128rrik: NewOpc = X86::VPCMPEQBZ128rrk; break; + case X86::VPCMPBZ256rmi: NewOpc = X86::VPCMPEQBZ256rm; break; + case X86::VPCMPBZ256rmik: NewOpc = X86::VPCMPEQBZ256rmk; break; + case X86::VPCMPBZ256rri: NewOpc = X86::VPCMPEQBZ256rr; break; + case X86::VPCMPBZ256rrik: NewOpc = X86::VPCMPEQBZ256rrk; break; + case X86::VPCMPBZrmi: NewOpc = X86::VPCMPEQBZrm; break; + case X86::VPCMPBZrmik: NewOpc = X86::VPCMPEQBZrmk; break; + case X86::VPCMPBZrri: NewOpc = X86::VPCMPEQBZrr; break; + case X86::VPCMPBZrrik: NewOpc = X86::VPCMPEQBZrrk; break; + case X86::VPCMPDZ128rmi: NewOpc = X86::VPCMPEQDZ128rm; break; + case X86::VPCMPDZ128rmib: NewOpc = X86::VPCMPEQDZ128rmb; break; + case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPEQDZ128rmbk; break; + case X86::VPCMPDZ128rmik: NewOpc = X86::VPCMPEQDZ128rmk; break; + case X86::VPCMPDZ128rri: NewOpc = X86::VPCMPEQDZ128rr; break; + case X86::VPCMPDZ128rrik: NewOpc = X86::VPCMPEQDZ128rrk; break; + case X86::VPCMPDZ256rmi: NewOpc = X86::VPCMPEQDZ256rm; break; + case X86::VPCMPDZ256rmib: NewOpc = X86::VPCMPEQDZ256rmb; break; + case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPEQDZ256rmbk; break; + case X86::VPCMPDZ256rmik: NewOpc = X86::VPCMPEQDZ256rmk; break; + case X86::VPCMPDZ256rri: NewOpc = X86::VPCMPEQDZ256rr; break; + case X86::VPCMPDZ256rrik: NewOpc = X86::VPCMPEQDZ256rrk; break; + case X86::VPCMPDZrmi: NewOpc = X86::VPCMPEQDZrm; break; + case X86::VPCMPDZrmib: NewOpc = X86::VPCMPEQDZrmb; break; + case X86::VPCMPDZrmibk: NewOpc = X86::VPCMPEQDZrmbk; break; + case X86::VPCMPDZrmik: NewOpc = X86::VPCMPEQDZrmk; break; + case X86::VPCMPDZrri: NewOpc = X86::VPCMPEQDZrr; break; + case X86::VPCMPDZrrik: NewOpc = X86::VPCMPEQDZrrk; break; + case X86::VPCMPQZ128rmi: NewOpc = X86::VPCMPEQQZ128rm; break; + case X86::VPCMPQZ128rmib: NewOpc = X86::VPCMPEQQZ128rmb; break; + case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPEQQZ128rmbk; break; + case X86::VPCMPQZ128rmik: NewOpc = X86::VPCMPEQQZ128rmk; break; + case X86::VPCMPQZ128rri: NewOpc = X86::VPCMPEQQZ128rr; break; + case X86::VPCMPQZ128rrik: NewOpc = X86::VPCMPEQQZ128rrk; break; + case X86::VPCMPQZ256rmi: NewOpc = X86::VPCMPEQQZ256rm; break; + case X86::VPCMPQZ256rmib: NewOpc = X86::VPCMPEQQZ256rmb; break; + case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPEQQZ256rmbk; break; + case X86::VPCMPQZ256rmik: NewOpc = X86::VPCMPEQQZ256rmk; break; + case X86::VPCMPQZ256rri: NewOpc = X86::VPCMPEQQZ256rr; break; + case X86::VPCMPQZ256rrik: NewOpc = X86::VPCMPEQQZ256rrk; break; + case X86::VPCMPQZrmi: NewOpc = X86::VPCMPEQQZrm; break; + case X86::VPCMPQZrmib: NewOpc = X86::VPCMPEQQZrmb; break; + case X86::VPCMPQZrmibk: NewOpc = X86::VPCMPEQQZrmbk; break; + case X86::VPCMPQZrmik: NewOpc = X86::VPCMPEQQZrmk; break; + case X86::VPCMPQZrri: NewOpc = X86::VPCMPEQQZrr; break; + case X86::VPCMPQZrrik: NewOpc = X86::VPCMPEQQZrrk; break; + case X86::VPCMPWZ128rmi: NewOpc = X86::VPCMPEQWZ128rm; break; + case X86::VPCMPWZ128rmik: NewOpc = X86::VPCMPEQWZ128rmk; break; + case X86::VPCMPWZ128rri: NewOpc = X86::VPCMPEQWZ128rr; break; + case X86::VPCMPWZ128rrik: NewOpc = X86::VPCMPEQWZ128rrk; break; + case X86::VPCMPWZ256rmi: NewOpc = X86::VPCMPEQWZ256rm; break; + case X86::VPCMPWZ256rmik: NewOpc = X86::VPCMPEQWZ256rmk; break; + case X86::VPCMPWZ256rri: NewOpc = X86::VPCMPEQWZ256rr; break; + case X86::VPCMPWZ256rrik: NewOpc = X86::VPCMPEQWZ256rrk; break; + case X86::VPCMPWZrmi: NewOpc = X86::VPCMPEQWZrm; break; + case X86::VPCMPWZrmik: NewOpc = X86::VPCMPEQWZrmk; break; + case X86::VPCMPWZrri: NewOpc = X86::VPCMPEQWZrr; break; + case X86::VPCMPWZrrik: NewOpc = X86::VPCMPEQWZrrk; break; + } + + OutMI.setOpcode(NewOpc); + OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1)); + break; + } + + // Turn immediate 6 into the VPCMPGT instruction. + if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 6) { + unsigned NewOpc; + switch (OutMI.getOpcode()) { + case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPGTBZ128rm; break; + case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPGTBZ128rmk; break; + case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPGTBZ128rr; break; + case X86::VPCMPBZ128rrik: NewOpc = X86::VPCMPGTBZ128rrk; break; + case X86::VPCMPBZ256rmi: NewOpc = X86::VPCMPGTBZ256rm; break; + case X86::VPCMPBZ256rmik: NewOpc = X86::VPCMPGTBZ256rmk; break; + case X86::VPCMPBZ256rri: NewOpc = X86::VPCMPGTBZ256rr; break; + case X86::VPCMPBZ256rrik: NewOpc = X86::VPCMPGTBZ256rrk; break; + case X86::VPCMPBZrmi: NewOpc = X86::VPCMPGTBZrm; break; + case X86::VPCMPBZrmik: NewOpc = X86::VPCMPGTBZrmk; break; + case X86::VPCMPBZrri: NewOpc = X86::VPCMPGTBZrr; break; + case X86::VPCMPBZrrik: NewOpc = X86::VPCMPGTBZrrk; break; + case X86::VPCMPDZ128rmi: NewOpc = X86::VPCMPGTDZ128rm; break; + case X86::VPCMPDZ128rmib: NewOpc = X86::VPCMPGTDZ128rmb; break; + case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPGTDZ128rmbk; break; + case X86::VPCMPDZ128rmik: NewOpc = X86::VPCMPGTDZ128rmk; break; + case X86::VPCMPDZ128rri: NewOpc = X86::VPCMPGTDZ128rr; break; + case X86::VPCMPDZ128rrik: NewOpc = X86::VPCMPGTDZ128rrk; break; + case X86::VPCMPDZ256rmi: NewOpc = X86::VPCMPGTDZ256rm; break; + case X86::VPCMPDZ256rmib: NewOpc = X86::VPCMPGTDZ256rmb; break; + case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPGTDZ256rmbk; break; + case X86::VPCMPDZ256rmik: NewOpc = X86::VPCMPGTDZ256rmk; break; + case X86::VPCMPDZ256rri: NewOpc = X86::VPCMPGTDZ256rr; break; + case X86::VPCMPDZ256rrik: NewOpc = X86::VPCMPGTDZ256rrk; break; + case X86::VPCMPDZrmi: NewOpc = X86::VPCMPGTDZrm; break; + case X86::VPCMPDZrmib: NewOpc = X86::VPCMPGTDZrmb; break; + case X86::VPCMPDZrmibk: NewOpc = X86::VPCMPGTDZrmbk; break; + case X86::VPCMPDZrmik: NewOpc = X86::VPCMPGTDZrmk; break; + case X86::VPCMPDZrri: NewOpc = X86::VPCMPGTDZrr; break; + case X86::VPCMPDZrrik: NewOpc = X86::VPCMPGTDZrrk; break; + case X86::VPCMPQZ128rmi: NewOpc = X86::VPCMPGTQZ128rm; break; + case X86::VPCMPQZ128rmib: NewOpc = X86::VPCMPGTQZ128rmb; break; + case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPGTQZ128rmbk; break; + case X86::VPCMPQZ128rmik: NewOpc = X86::VPCMPGTQZ128rmk; break; + case X86::VPCMPQZ128rri: NewOpc = X86::VPCMPGTQZ128rr; break; + case X86::VPCMPQZ128rrik: NewOpc = X86::VPCMPGTQZ128rrk; break; + case X86::VPCMPQZ256rmi: NewOpc = X86::VPCMPGTQZ256rm; break; + case X86::VPCMPQZ256rmib: NewOpc = X86::VPCMPGTQZ256rmb; break; + case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPGTQZ256rmbk; break; + case X86::VPCMPQZ256rmik: NewOpc = X86::VPCMPGTQZ256rmk; break; + case X86::VPCMPQZ256rri: NewOpc = X86::VPCMPGTQZ256rr; break; + case X86::VPCMPQZ256rrik: NewOpc = X86::VPCMPGTQZ256rrk; break; + case X86::VPCMPQZrmi: NewOpc = X86::VPCMPGTQZrm; break; + case X86::VPCMPQZrmib: NewOpc = X86::VPCMPGTQZrmb; break; + case X86::VPCMPQZrmibk: NewOpc = X86::VPCMPGTQZrmbk; break; + case X86::VPCMPQZrmik: NewOpc = X86::VPCMPGTQZrmk; break; + case X86::VPCMPQZrri: NewOpc = X86::VPCMPGTQZrr; break; + case X86::VPCMPQZrrik: NewOpc = X86::VPCMPGTQZrrk; break; + case X86::VPCMPWZ128rmi: NewOpc = X86::VPCMPGTWZ128rm; break; + case X86::VPCMPWZ128rmik: NewOpc = X86::VPCMPGTWZ128rmk; break; + case X86::VPCMPWZ128rri: NewOpc = X86::VPCMPGTWZ128rr; break; + case X86::VPCMPWZ128rrik: NewOpc = X86::VPCMPGTWZ128rrk; break; + case X86::VPCMPWZ256rmi: NewOpc = X86::VPCMPGTWZ256rm; break; + case X86::VPCMPWZ256rmik: NewOpc = X86::VPCMPGTWZ256rmk; break; + case X86::VPCMPWZ256rri: NewOpc = X86::VPCMPGTWZ256rr; break; + case X86::VPCMPWZ256rrik: NewOpc = X86::VPCMPGTWZ256rrk; break; + case X86::VPCMPWZrmi: NewOpc = X86::VPCMPGTWZrm; break; + case X86::VPCMPWZrmik: NewOpc = X86::VPCMPGTWZrmk; break; + case X86::VPCMPWZrri: NewOpc = X86::VPCMPGTWZrr; break; + case X86::VPCMPWZrrik: NewOpc = X86::VPCMPGTWZrrk; break; + } + + OutMI.setOpcode(NewOpc); + OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1)); + break; + } + break; } + // CALL64r, CALL64pcrel32 - These instructions used to have + // register inputs modeled as normal uses instead of implicit uses. As such, + // they we used to truncate off all but the first operand (the callee). This + // issue seems to have been fixed at some point. This assert verifies that. + case X86::CALL64r: + case X86::CALL64pcrel32: + assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!"); + break; + case X86::EH_RETURN: case X86::EH_RETURN64: { OutMI = MCInst(); @@ -539,36 +743,30 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { break; } - // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump - // instruction. - { - unsigned Opcode; - case X86::TAILJMPr: - Opcode = X86::JMP32r; - goto SetTailJmpOpcode; - case X86::TAILJMPd: - case X86::TAILJMPd64: - Opcode = X86::JMP_1; - goto SetTailJmpOpcode; - - SetTailJmpOpcode: - MCOperand Saved = OutMI.getOperand(0); - OutMI = MCInst(); - OutMI.setOpcode(Opcode); - OutMI.addOperand(Saved); - break; - } + // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump + // instruction. + case X86::TAILJMPr: + case X86::TAILJMPr64: + case X86::TAILJMPr64_REX: + case X86::TAILJMPd: + case X86::TAILJMPd64: + assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!"); + OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); + break; case X86::TAILJMPd_CC: - case X86::TAILJMPd64_CC: { - MCOperand Saved = OutMI.getOperand(0); - MCOperand Saved2 = OutMI.getOperand(1); - OutMI = MCInst(); - OutMI.setOpcode(X86::JCC_1); - OutMI.addOperand(Saved); - OutMI.addOperand(Saved2); + case X86::TAILJMPd64_CC: + assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!"); + OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); + break; + + case X86::TAILJMPm: + case X86::TAILJMPm64: + case X86::TAILJMPm64_REX: + assert(OutMI.getNumOperands() == X86::AddrNumOperands && + "Unexpected number of operands!"); + OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); break; - } case X86::DEC16r: case X86::DEC32r: @@ -958,7 +1156,7 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI, // FAULTING_LOAD_OP , , , // , - unsigned DefRegister = FaultingMI.getOperand(0).getReg(); + Register DefRegister = FaultingMI.getOperand(0).getReg(); FaultMaps::FaultKind FK = static_cast(FaultingMI.getOperand(1).getImm()); MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol(); @@ -1079,7 +1277,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, // Emit MOV to materialize the target address and the CALL to target. // This is encoded with 12-13 bytes, depending on which register is used. - unsigned ScratchReg = MI.getOperand(ScratchIdx).getReg(); + Register ScratchReg = MI.getOperand(ScratchIdx).getReg(); if (X86II::isX86_64ExtendedReg(ScratchReg)) EncodedBytes = 13; else @@ -1369,6 +1567,7 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, recordSled(CurSled, MI, SledKind::TAIL_CALL); unsigned OpCode = MI.getOperand(0).getImm(); + OpCode = convertTailJumpOpcode(OpCode); MCInst TC; TC.setOpcode(OpCode); @@ -1538,8 +1737,6 @@ static void printConstant(const Constant *COp, raw_ostream &CS) { void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) { assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); assert(getSubtarget().isOSWindows() && "SEH_ instruction Windows only"); - const X86RegisterInfo *RI = - MF->getSubtarget().getRegisterInfo(); // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86. if (EmitFPOData) { @@ -1577,17 +1774,16 @@ void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) { // Otherwise, use the .seh_ directives for all other Windows platforms. switch (MI->getOpcode()) { case X86::SEH_PushReg: - OutStreamer->EmitWinCFIPushReg( - RI->getSEHRegNum(MI->getOperand(0).getImm())); + OutStreamer->EmitWinCFIPushReg(MI->getOperand(0).getImm()); break; case X86::SEH_SaveReg: - OutStreamer->EmitWinCFISaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()), + OutStreamer->EmitWinCFISaveReg(MI->getOperand(0).getImm(), MI->getOperand(1).getImm()); break; case X86::SEH_SaveXMM: - OutStreamer->EmitWinCFISaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()), + OutStreamer->EmitWinCFISaveXMM(MI->getOperand(0).getImm(), MI->getOperand(1).getImm()); break; @@ -1596,9 +1792,8 @@ void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) { break; case X86::SEH_SetFrame: - OutStreamer->EmitWinCFISetFrame( - RI->getSEHRegNum(MI->getOperand(0).getImm()), - MI->getOperand(1).getImm()); + OutStreamer->EmitWinCFISetFrame(MI->getOperand(0).getImm(), + MI->getOperand(1).getImm()); break; case X86::SEH_PushFrame: @@ -1650,7 +1845,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::EH_RETURN: case X86::EH_RETURN64: { // Lower these as normal, but add some comments. - unsigned Reg = MI->getOperand(0).getReg(); + Register Reg = MI->getOperand(0).getReg(); OutStreamer->AddComment(StringRef("eh_return, addr: %") + X86ATTInstPrinter::getRegisterName(Reg)); break; @@ -1697,11 +1892,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::MASKPAIR16LOAD: { int64_t Disp = MI->getOperand(1 + X86::AddrDisp).getImm(); assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement"); - const X86RegisterInfo *RI = - MF->getSubtarget().getRegisterInfo(); - unsigned Reg = MI->getOperand(0).getReg(); - unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0); - unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1); + Register Reg = MI->getOperand(0).getReg(); + Register Reg0 = RI->getSubReg(Reg, X86::sub_mask_0); + Register Reg1 = RI->getSubReg(Reg, X86::sub_mask_1); // Load the first mask register MCInstBuilder MIB = MCInstBuilder(X86::KMOVWkm); @@ -1730,11 +1923,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::MASKPAIR16STORE: { int64_t Disp = MI->getOperand(X86::AddrDisp).getImm(); assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement"); - const X86RegisterInfo *RI = - MF->getSubtarget().getRegisterInfo(); - unsigned Reg = MI->getOperand(X86::AddrNumOperands).getReg(); - unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0); - unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1); + Register Reg = MI->getOperand(X86::AddrNumOperands).getReg(); + Register Reg0 = RI->getSubReg(Reg, X86::sub_mask_0); + Register Reg1 = RI->getSubReg(Reg, X86::sub_mask_1); // Store the first mask register MCInstBuilder MIB = MCInstBuilder(X86::KMOVWmk); diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index d7e535598d8..5cb80a082b5 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -36,6 +36,10 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// is stashed. signed char RestoreBasePointerOffset = 0; + /// WinEHXMMSlotInfo - Slot information of XMM registers in the stack frame + /// in bytes. + DenseMap WinEHXMMSlotInfo; + /// CalleeSavedFrameSize - Size of the callee-saved register portion of the /// stack frame in bytes. unsigned CalleeSavedFrameSize = 0; @@ -120,6 +124,10 @@ public: void setRestoreBasePointer(const MachineFunction *MF); int getRestoreBasePointerOffset() const {return RestoreBasePointerOffset; } + DenseMap& getWinEHXMMSlotInfo() { return WinEHXMMSlotInfo; } + const DenseMap& getWinEHXMMSlotInfo() const { + return WinEHXMMSlotInfo; } + unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; } diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp index 7f75598b065..1aee01563c4 100644 --- a/lib/Target/X86/X86OptimizeLEAs.cpp +++ b/lib/Target/X86/X86OptimizeLEAs.cpp @@ -198,8 +198,7 @@ static inline MemOpKey getMemOpKey(const MachineInstr &MI, unsigned N) { static inline bool isIdenticalOp(const MachineOperand &MO1, const MachineOperand &MO2) { return MO1.isIdenticalTo(MO2) && - (!MO1.isReg() || - !TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); + (!MO1.isReg() || !Register::isPhysicalRegister(MO1.getReg())); } #ifndef NDEBUG @@ -235,9 +234,9 @@ static inline bool isLEA(const MachineInstr &MI) { namespace { -class OptimizeLEAPass : public MachineFunctionPass { +class X86OptimizeLEAPass : public MachineFunctionPass { public: - OptimizeLEAPass() : MachineFunctionPass(ID) {} + X86OptimizeLEAPass() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "X86 LEA Optimize"; } @@ -246,6 +245,8 @@ public: /// been calculated by LEA. Also, remove redundant LEAs. bool runOnMachineFunction(MachineFunction &MF) override; + static char ID; + private: using MemOpMap = DenseMap>; @@ -296,18 +297,18 @@ private: MachineRegisterInfo *MRI; const X86InstrInfo *TII; const X86RegisterInfo *TRI; - - static char ID; }; } // end anonymous namespace -char OptimizeLEAPass::ID = 0; +char X86OptimizeLEAPass::ID = 0; -FunctionPass *llvm::createX86OptimizeLEAs() { return new OptimizeLEAPass(); } +FunctionPass *llvm::createX86OptimizeLEAs() { return new X86OptimizeLEAPass(); } +INITIALIZE_PASS(X86OptimizeLEAPass, DEBUG_TYPE, "X86 optimize LEA pass", false, + false) -int OptimizeLEAPass::calcInstrDist(const MachineInstr &First, - const MachineInstr &Last) { +int X86OptimizeLEAPass::calcInstrDist(const MachineInstr &First, + const MachineInstr &Last) { // Both instructions must be in the same basic block and they must be // presented in InstrPos. assert(Last.getParent() == First.getParent() && @@ -328,10 +329,9 @@ int OptimizeLEAPass::calcInstrDist(const MachineInstr &First, // 3) Displacement of the new memory operand should fit in 1 byte if possible. // 4) The LEA should be as close to MI as possible, and prior to it if // possible. -bool OptimizeLEAPass::chooseBestLEA(const SmallVectorImpl &List, - const MachineInstr &MI, - MachineInstr *&BestLEA, - int64_t &AddrDispShift, int &Dist) { +bool X86OptimizeLEAPass::chooseBestLEA( + const SmallVectorImpl &List, const MachineInstr &MI, + MachineInstr *&BestLEA, int64_t &AddrDispShift, int &Dist) { const MachineFunction *MF = MI.getParent()->getParent(); const MCInstrDesc &Desc = MI.getDesc(); int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags) + @@ -387,9 +387,10 @@ bool OptimizeLEAPass::chooseBestLEA(const SmallVectorImpl &List, // Get the difference between the addresses' displacements of the two // instructions \p MI1 and \p MI2. The numbers of the first memory operands are // passed through \p N1 and \p N2. -int64_t OptimizeLEAPass::getAddrDispShift(const MachineInstr &MI1, unsigned N1, - const MachineInstr &MI2, - unsigned N2) const { +int64_t X86OptimizeLEAPass::getAddrDispShift(const MachineInstr &MI1, + unsigned N1, + const MachineInstr &MI2, + unsigned N2) const { const MachineOperand &Op1 = MI1.getOperand(N1 + X86::AddrDisp); const MachineOperand &Op2 = MI2.getOperand(N2 + X86::AddrDisp); @@ -411,9 +412,9 @@ int64_t OptimizeLEAPass::getAddrDispShift(const MachineInstr &MI1, unsigned N1, // 2) Def registers of LEAs belong to the same class. // 3) All uses of the Last LEA def register are replaceable, thus the // register is used only as address base. -bool OptimizeLEAPass::isReplaceable(const MachineInstr &First, - const MachineInstr &Last, - int64_t &AddrDispShift) const { +bool X86OptimizeLEAPass::isReplaceable(const MachineInstr &First, + const MachineInstr &Last, + int64_t &AddrDispShift) const { assert(isLEA(First) && isLEA(Last) && "The function works only with LEA instructions"); @@ -467,7 +468,8 @@ bool OptimizeLEAPass::isReplaceable(const MachineInstr &First, return true; } -void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB, MemOpMap &LEAs) { +void X86OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB, + MemOpMap &LEAs) { unsigned Pos = 0; for (auto &MI : MBB) { // Assign the position number to the instruction. Note that we are going to @@ -485,7 +487,7 @@ void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB, MemOpMap &LEAs) { // Try to find load and store instructions which recalculate addresses already // calculated by some LEA and replace their memory operands with its def // register. -bool OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) { +bool X86OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) { bool Changed = false; assert(!LEAs.empty()); @@ -564,9 +566,9 @@ bool OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) { return Changed; } -MachineInstr *OptimizeLEAPass::replaceDebugValue(MachineInstr &MI, - unsigned VReg, - int64_t AddrDispShift) { +MachineInstr *X86OptimizeLEAPass::replaceDebugValue(MachineInstr &MI, + unsigned VReg, + int64_t AddrDispShift) { DIExpression *Expr = const_cast(MI.getDebugExpression()); if (AddrDispShift != 0) Expr = DIExpression::prepend(Expr, DIExpression::StackValue, AddrDispShift); @@ -583,7 +585,7 @@ MachineInstr *OptimizeLEAPass::replaceDebugValue(MachineInstr &MI, } // Try to find similar LEAs in the list and replace one with another. -bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { +bool X86OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { bool Changed = false; // Loop over all entries in the table. @@ -613,8 +615,8 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { // Loop over all uses of the Last LEA and update their operands. Note // that the correctness of this has already been checked in the // isReplaceable function. - unsigned FirstVReg = First.getOperand(0).getReg(); - unsigned LastVReg = Last.getOperand(0).getReg(); + Register FirstVReg = First.getOperand(0).getReg(); + Register LastVReg = Last.getOperand(0).getReg(); for (auto UI = MRI->use_begin(LastVReg), UE = MRI->use_end(); UI != UE;) { MachineOperand &MO = *UI++; @@ -670,7 +672,7 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { return Changed; } -bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { +bool X86OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; if (DisableX86LEAOpt || skipFunction(MF.getFunction())) diff --git a/lib/Target/X86/X86RegisterBankInfo.cpp b/lib/Target/X86/X86RegisterBankInfo.cpp index 78fede3dcde..daddf423189 100644 --- a/lib/Target/X86/X86RegisterBankInfo.cpp +++ b/lib/Target/X86/X86RegisterBankInfo.cpp @@ -46,7 +46,9 @@ const RegisterBank &X86RegisterBankInfo::getRegBankFromRegClass( if (X86::GR8RegClass.hasSubClassEq(&RC) || X86::GR16RegClass.hasSubClassEq(&RC) || X86::GR32RegClass.hasSubClassEq(&RC) || - X86::GR64RegClass.hasSubClassEq(&RC)) + X86::GR64RegClass.hasSubClassEq(&RC) || + X86::LOW32_ADDR_ACCESSRegClass.hasSubClassEq(&RC) || + X86::LOW32_ADDR_ACCESS_RBPRegClass.hasSubClassEq(&RC)) return getRegBank(X86::GPRRegBankID); if (X86::FR32XRegClass.hasSubClassEq(&RC) || diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 2e2f1f9e438..ff625325b4c 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -544,7 +544,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { "Stack realignment in presence of dynamic allocas is not supported with" "this calling convention."); - unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), 64); + Register BasePtr = getX86SubSuperRegister(getBaseRegister(), 64); for (MCSubRegIterator I(BasePtr, this, /*IncludeSelf=*/true); I.isValid(); ++I) Reserved.set(*I); @@ -677,13 +677,13 @@ static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) { MI.getOperand(4).getImm() != 0 || MI.getOperand(5).getReg() != X86::NoRegister) return false; - unsigned BasePtr = MI.getOperand(1).getReg(); + Register BasePtr = MI.getOperand(1).getReg(); // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will // be replaced with a 32-bit operand MOV which will zero extend the upper // 32-bits of the super register. if (Opc == X86::LEA64_32r) BasePtr = getX86SubSuperRegister(BasePtr, 32); - unsigned NewDestReg = MI.getOperand(0).getReg(); + Register NewDestReg = MI.getOperand(0).getReg(); const X86InstrInfo *TII = MI.getParent()->getParent()->getSubtarget().getInstrInfo(); TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr, @@ -692,12 +692,27 @@ static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) { return true; } +static bool isFuncletReturnInstr(MachineInstr &MI) { + switch (MI.getOpcode()) { + case X86::CATCHRET: + case X86::CLEANUPRET: + return true; + default: + return false; + } + llvm_unreachable("impossible"); +} + void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { MachineInstr &MI = *II; - MachineFunction &MF = *MI.getParent()->getParent(); + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false + : isFuncletReturnInstr(*MBBI); const X86FrameLowering *TFI = getFrameLowering(MF); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); @@ -709,6 +724,8 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) && "Return instruction can only reference SP relative frame objects"); FIOffset = TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0); + } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) { + FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr); } else { FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr); } @@ -729,7 +746,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // register as source operand, semantic is the same and destination is // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided. // Don't change BasePtr since it is used later for stack adjustment. - unsigned MachineBasePtr = BasePtr; + Register MachineBasePtr = BasePtr; if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr)) MachineBasePtr = getX86SubSuperRegister(BasePtr, 64); @@ -773,7 +790,7 @@ Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { unsigned X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const { const X86Subtarget &Subtarget = MF.getSubtarget(); - unsigned FrameReg = getFrameRegister(MF); + Register FrameReg = getFrameRegister(MF); if (Subtarget.isTarget64BitILP32()) FrameReg = getX86SubSuperRegister(FrameReg, 32); return FrameReg; @@ -782,7 +799,7 @@ X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const { unsigned X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const { const X86Subtarget &Subtarget = MF.getSubtarget(); - unsigned StackReg = getStackRegister(); + Register StackReg = getStackRegister(); if (Subtarget.isTarget64BitILP32()) StackReg = getX86SubSuperRegister(StackReg, 32); return StackReg; diff --git a/lib/Target/X86/X86RetpolineThunks.cpp b/lib/Target/X86/X86RetpolineThunks.cpp index b435b22e8ac..f8464c7e829 100644 --- a/lib/Target/X86/X86RetpolineThunks.cpp +++ b/lib/Target/X86/X86RetpolineThunks.cpp @@ -58,8 +58,8 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); } private: @@ -97,7 +97,7 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) { TII = STI->getInstrInfo(); Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64; - MMI = &getAnalysis(); + MMI = &getAnalysis().getMMI(); Module &M = const_cast(*MMI->getModule()); // If this function is not a thunk, check to see if we need to insert @@ -279,7 +279,7 @@ void X86RetpolineThunks::populateThunk(MachineFunction &MF, CallTarget->addLiveIn(Reg); CallTarget->setHasAddressTaken(); - CallTarget->setAlignment(4); + CallTarget->setAlignment(Align(16)); insertRegReturnAddrClobber(*CallTarget, Reg); CallTarget->back().setPreInstrSymbol(MF, TargetSym); BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc)); diff --git a/lib/Target/X86/X86SchedBroadwell.td b/lib/Target/X86/X86SchedBroadwell.td index 7574e4b8f89..9b1fcaa8a13 100755 --- a/lib/Target/X86/X86SchedBroadwell.td +++ b/lib/Target/X86/X86SchedBroadwell.td @@ -232,8 +232,12 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index 284d1567c5c..06f417501b2 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -231,8 +231,12 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/lib/Target/X86/X86SchedPredicates.td b/lib/Target/X86/X86SchedPredicates.td index 41bd776648f..76001d382a2 100644 --- a/lib/Target/X86/X86SchedPredicates.td +++ b/lib/Target/X86/X86SchedPredicates.td @@ -84,3 +84,60 @@ def IsSETAm_Or_SETBEm : CheckAny<[ CheckImmOperand_s<5, "X86::COND_A">, CheckImmOperand_s<5, "X86::COND_BE"> ]>; + +// A predicate used to check if an instruction has a LOCK prefix. +def CheckLockPrefix : CheckFunctionPredicate< + "X86_MC::hasLockPrefix", + "X86InstrInfo::hasLockPrefix" +>; + +def IsRegRegCompareAndSwap_8 : CheckOpcode<[ CMPXCHG8rr ]>; + +def IsRegMemCompareAndSwap_8 : CheckOpcode<[ + LCMPXCHG8, CMPXCHG8rm +]>; + +def IsRegRegCompareAndSwap_16_32_64 : CheckOpcode<[ + CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr +]>; + +def IsRegMemCompareAndSwap_16_32_64 : CheckOpcode<[ + CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm, + LCMPXCHG16, LCMPXCHG32, LCMPXCHG64, + LCMPXCHG8B, LCMPXCHG16B +]>; + +def IsCompareAndSwap8B : CheckOpcode<[ CMPXCHG8B, LCMPXCHG8B ]>; +def IsCompareAndSwap16B : CheckOpcode<[ CMPXCHG16B, LCMPXCHG16B ]>; + +def IsRegMemCompareAndSwap : CheckOpcode< + !listconcat( + IsRegMemCompareAndSwap_8.ValidOpcodes, + IsRegMemCompareAndSwap_16_32_64.ValidOpcodes + )>; + +def IsRegRegCompareAndSwap : CheckOpcode< + !listconcat( + IsRegRegCompareAndSwap_8.ValidOpcodes, + IsRegRegCompareAndSwap_16_32_64.ValidOpcodes + )>; + +def IsAtomicCompareAndSwap_8 : CheckAll<[ + CheckLockPrefix, + IsRegMemCompareAndSwap_8 +]>; + +def IsAtomicCompareAndSwap : CheckAll<[ + CheckLockPrefix, + IsRegMemCompareAndSwap +]>; + +def IsAtomicCompareAndSwap8B : CheckAll<[ + CheckLockPrefix, + IsCompareAndSwap8B +]>; + +def IsAtomicCompareAndSwap16B : CheckAll<[ + CheckLockPrefix, + IsCompareAndSwap16B +]>; diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index d40bdf728a4..26d4d8fa354 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -208,8 +208,12 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/lib/Target/X86/X86SchedSkylakeClient.td b/lib/Target/X86/X86SchedSkylakeClient.td index 8f3e4ae62d5..9a511ecc007 100644 --- a/lib/Target/X86/X86SchedSkylakeClient.td +++ b/lib/Target/X86/X86SchedSkylakeClient.td @@ -226,8 +226,12 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/lib/Target/X86/X86SchedSkylakeServer.td b/lib/Target/X86/X86SchedSkylakeServer.td index 58caf1dacfc..a8c65435ab9 100755 --- a/lib/Target/X86/X86SchedSkylakeServer.td +++ b/lib/Target/X86/X86SchedSkylakeServer.td @@ -226,8 +226,12 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index 55ca85ec1e3..95f710061ae 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -102,6 +102,12 @@ class X86SchedWriteMoveLS { + SchedWrite RM = LoadRM; + SchedWrite MR = StoreMR; +} + // Multiclass that wraps X86SchedWriteMoveLS for each vector width. class X86SchedWriteMoveLSWidths; +// Conditional SIMD Packed Loads and Stores wrappers. +def WriteFMaskMove32 + : X86SchedWriteMaskMove; +def WriteFMaskMove64 + : X86SchedWriteMaskMove; +def WriteFMaskMove32Y + : X86SchedWriteMaskMove; +def WriteFMaskMove64Y + : X86SchedWriteMaskMove; + // Vector width wrappers. def SchedWriteFAdd : X86SchedWriteWidths; diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index b0334655de7..78acb1065ec 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -216,8 +216,10 @@ defm : X86WriteResUnsupported; def : WriteRes; def : WriteRes; defm : X86WriteResUnsupported; -defm : X86WriteResUnsupported; -defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; def : WriteRes; def : WriteRes; diff --git a/lib/Target/X86/X86ScheduleBdVer2.td b/lib/Target/X86/X86ScheduleBdVer2.td index 8cc01c3acec..d7aea3cf4e9 100644 --- a/lib/Target/X86/X86ScheduleBdVer2.td +++ b/lib/Target/X86/X86ScheduleBdVer2.td @@ -726,8 +726,10 @@ defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; -defm : PdWriteRes; -defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; defm : PdWriteRes; defm : PdWriteRes; diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td index 2d26232b413..d0421d94ee0 100644 --- a/lib/Target/X86/X86ScheduleBtVer2.td +++ b/lib/Target/X86/X86ScheduleBtVer2.td @@ -180,9 +180,11 @@ multiclass JWriteResYMMPair; +// A folded store needs a cycle on the SAGU for the store data, most RMW +// instructions don't need an extra uop. ALU RMW operations don't seem to +// benefit from STLF, and their observed latency is 6cy. That is the reason why +// this write adds two extra cycles (instead of just 1cy for the store). +defm : X86WriteRes; //////////////////////////////////////////////////////////////////////////////// // Arithmetic. @@ -191,22 +193,22 @@ defm : X86WriteRes; defm : JWriteResIntPair; defm : JWriteResIntPair; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; -defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; defm : X86WriteRes; defm : JWriteResIntPair; @@ -305,6 +307,192 @@ def : WriteRes; // to '1' to tell the scheduler that the nop uses an ALU slot for a cycle. def : WriteRes { let Latency = 1; } +def JWriteCMPXCHG8rr : SchedWriteRes<[JALU01]> { + let Latency = 3; + let ResourceCycles = [3]; + let NumMicroOps = 3; +} + +def JWriteLOCK_CMPXCHG8rm : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 16; + let ResourceCycles = [3,16,16]; + let NumMicroOps = 5; +} + +def JWriteLOCK_CMPXCHGrm : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 17; + let ResourceCycles = [3,17,17]; + let NumMicroOps = 6; +} + +def JWriteCMPXCHG8rm : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 11; + let ResourceCycles = [3,1,1]; + let NumMicroOps = 5; +} + +def JWriteCMPXCHG8B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 11; + let ResourceCycles = [3,1,1]; + let NumMicroOps = 18; +} + +def JWriteCMPXCHG16B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 32; + let ResourceCycles = [6,1,1]; + let NumMicroOps = 28; +} + +def JWriteLOCK_CMPXCHG8B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 19; + let ResourceCycles = [3,19,19]; + let NumMicroOps = 18; +} + +def JWriteLOCK_CMPXCHG16B : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 38; + let ResourceCycles = [6,38,38]; + let NumMicroOps = 28; +} + +def JWriteCMPXCHGVariant : SchedWriteVariant<[ + SchedVar, [JWriteLOCK_CMPXCHG8B]>, + SchedVar, [JWriteLOCK_CMPXCHG16B]>, + SchedVar, [JWriteLOCK_CMPXCHG8rm]>, + SchedVar, [JWriteLOCK_CMPXCHGrm]>, + SchedVar, [JWriteCMPXCHG8B]>, + SchedVar, [JWriteCMPXCHG16B]>, + SchedVar, [JWriteCMPXCHG8rm]>, + SchedVar, [WriteCMPXCHGRMW]>, + SchedVar, [JWriteCMPXCHG8rr]>, + SchedVar +]>; + +// The first five reads are contributed by the memory load operand. +// We ignore those reads and set a read-advance for the other input operands +// including the implicit read of RAX. +def : InstRW<[JWriteCMPXCHGVariant, + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + ReadAfterLd, ReadAfterLd], (instrs LCMPXCHG8, LCMPXCHG16, + LCMPXCHG32, LCMPXCHG64, + CMPXCHG8rm, CMPXCHG16rm, + CMPXCHG32rm, CMPXCHG64rm)>; + +def : InstRW<[JWriteCMPXCHGVariant], (instrs CMPXCHG8rr, CMPXCHG16rr, + CMPXCHG32rr, CMPXCHG64rr)>; + +def : InstRW<[JWriteCMPXCHGVariant, + // Ignore reads contributed by the memory operand. + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + // Add a read-advance to every implicit register read. + ReadAfterLd, ReadAfterLd, ReadAfterLd, ReadAfterLd], (instrs LCMPXCHG8B, LCMPXCHG16B, + CMPXCHG8B, CMPXCHG16B)>; + +def JWriteLOCK_ALURMW : SchedWriteRes<[JALU01, JLAGU, JSAGU]> { + let Latency = 19; + let ResourceCycles = [1,19,19]; + let NumMicroOps = 1; +} + +def JWriteLOCK_ALURMWVariant : SchedWriteVariant<[ + SchedVar, [JWriteLOCK_ALURMW]>, + SchedVar +]>; +def : InstRW<[JWriteLOCK_ALURMWVariant], (instrs INC8m, INC16m, INC32m, INC64m, + DEC8m, DEC16m, DEC32m, DEC64m, + NOT8m, NOT16m, NOT32m, NOT64m, + NEG8m, NEG16m, NEG32m, NEG64m)>; + +def JWriteXCHG8rr_XADDrr : SchedWriteRes<[JALU01]> { + let Latency = 2; + let ResourceCycles = [3]; + let NumMicroOps = 3; +} +def : InstRW<[JWriteXCHG8rr_XADDrr], (instrs XCHG8rr, XADD8rr, XADD16rr, + XADD32rr, XADD64rr)>; + +// This write defines the latency of the in/out register operand of a non-atomic +// XADDrm. This is the first of a pair of writes that model non-atomic +// XADDrm instructions (the second write definition is JWriteXADDrm_LdSt_Part). +// +// We need two writes because the instruction latency differs from the output +// register operand latency. In particular, the first write describes the first +// (and only) output register operand of the instruction. However, the +// instruction latency is set to the MAX of all the write latencies. That's why +// a second write is needed in this case (see example below). +// +// Example: +// XADD %ecx, (%rsp) ## Instruction latency: 11cy +// ## ECX write Latency: 3cy +// +// Register ECX becomes available in 3 cycles. That is because the value of ECX +// is exchanged with the value read from the stack pointer, and the load-to-use +// latency is assumed to be 3cy. +def JWriteXADDrm_XCHG_Part : SchedWriteRes<[JALU01]> { + let Latency = 3; // load-to-use latency + let ResourceCycles = [3]; + let NumMicroOps = 3; +} + +// This write defines the latency of the in/out register operand of an atomic +// XADDrm. This is the first of a sequence of two writes used to model atomic +// XADD instructions. The second write of the sequence is JWriteXCHGrm_LdSt_Part. +// +// +// Example: +// LOCK XADD %ecx, (%rsp) ## Instruction Latency: 16cy +// ## ECX write Latency: 11cy +// +// The value of ECX becomes available only after 11cy from the start of +// execution. This write is used to specifically set that operand latency. +def JWriteLOCK_XADDrm_XCHG_Part : SchedWriteRes<[JALU01]> { + let Latency = 11; + let ResourceCycles = [3]; + let NumMicroOps = 3; +} + +// This write defines the latency of the in/out register operand of an atomic +// XCHGrm. This write is the first of a sequence of two writes that describe +// atomic XCHG operations. We need two writes because the instruction latency +// differs from the output register write latency. We want to make sure that +// the output register operand becomes visible after 11cy. However, we want to +// set the instruction latency to 16cy. +def JWriteXCHGrm_XCHG_Part : SchedWriteRes<[JALU01]> { + let Latency = 11; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} + +def JWriteXADDrm_LdSt_Part : SchedWriteRes<[JLAGU, JSAGU]> { + let Latency = 11; + let ResourceCycles = [1, 1]; + let NumMicroOps = 1; +} + +def JWriteXCHGrm_LdSt_Part : SchedWriteRes<[JLAGU, JSAGU]> { + let Latency = 16; + let ResourceCycles = [16, 16]; + let NumMicroOps = 1; +} + +def JWriteXADDrm_Part1 : SchedWriteVariant<[ + SchedVar, [JWriteLOCK_XADDrm_XCHG_Part]>, + SchedVar +]>; + +def JWriteXADDrm_Part2 : SchedWriteVariant<[ + SchedVar, [JWriteXCHGrm_LdSt_Part]>, + SchedVar +]>; + +def : InstRW<[JWriteXADDrm_Part1, JWriteXADDrm_Part2, ReadAfterLd], + (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm, + LXADD8, LXADD16, LXADD32, LXADD64)>; + +def : InstRW<[JWriteXCHGrm_XCHG_Part, JWriteXCHGrm_LdSt_Part, ReadAfterLd], + (instrs XCHG8rm, XCHG16rm, XCHG32rm, XCHG64rm)>; + + //////////////////////////////////////////////////////////////////////////////// // Floating point. This covers both scalar and vector operations. //////////////////////////////////////////////////////////////////////////////// @@ -313,19 +501,22 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -466,8 +657,8 @@ defm : X86WriteResUnsupported; //////////////////////////////////////////////////////////////////////////////// defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -475,7 +666,7 @@ defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -630,6 +821,18 @@ def JWriteJVZEROUPPER: SchedWriteRes<[]> { } def : InstRW<[JWriteJVZEROUPPER], (instrs VZEROUPPER)>; +/////////////////////////////////////////////////////////////////////////////// +// SSE2/AVX Store Selected Bytes of Double Quadword - (V)MASKMOVDQ +/////////////////////////////////////////////////////////////////////////////// + +def JWriteMASKMOVDQU: SchedWriteRes<[JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01]> { + let Latency = 34; + let ResourceCycles = [1, 1, 2, 2, 2, 16, 42]; + let NumMicroOps = 63; +} +def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64, + VMASKMOVDQU, VMASKMOVDQU64)>; + /////////////////////////////////////////////////////////////////////////////// // SchedWriteVariant definitions. /////////////////////////////////////////////////////////////////////////////// diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td index 34c251a5c5b..8e3ce721f1a 100644 --- a/lib/Target/X86/X86ScheduleSLM.td +++ b/lib/Target/X86/X86ScheduleSLM.td @@ -186,8 +186,12 @@ def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; -def : WriteRes; -def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + def : WriteRes; def : WriteRes; def : WriteRes; diff --git a/lib/Target/X86/X86ScheduleZnver1.td b/lib/Target/X86/X86ScheduleZnver1.td index 65f6d89df61..06201f4a3a8 100644 --- a/lib/Target/X86/X86ScheduleZnver1.td +++ b/lib/Target/X86/X86ScheduleZnver1.td @@ -268,8 +268,12 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; + +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; + defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 50690953eef..1ae8df977f8 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -36,7 +36,7 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible( const X86RegisterInfo *TRI = static_cast( DAG.getSubtarget().getRegisterInfo()); - unsigned BaseReg = TRI->getBaseRegister(); + Register BaseReg = TRI->getBaseRegister(); for (unsigned R : ClobberSet) if (BaseReg == R) return true; diff --git a/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/lib/Target/X86/X86SpeculativeLoadHardening.cpp index 40f5dbe57e4..b8980789258 100644 --- a/lib/Target/X86/X86SpeculativeLoadHardening.cpp +++ b/lib/Target/X86/X86SpeculativeLoadHardening.cpp @@ -477,7 +477,7 @@ bool X86SpeculativeLoadHardeningPass::runOnMachineFunction( // Otherwise, just build the predicate state itself by zeroing a register // as we don't need any initial state. PS->InitialReg = MRI->createVirtualRegister(PS->RC); - unsigned PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass); + Register PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass); auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0), PredStateSubReg); ++NumInstsInserted; @@ -750,7 +750,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG( int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); - unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); + Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC); // Note that we intentionally use an empty debug location so that // this picks up the preceding location. auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(), @@ -907,7 +907,7 @@ void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads( MI.dump(); dbgs() << "\n"); report_fatal_error("Unable to unfold load!"); } - unsigned Reg = MRI->createVirtualRegister(UnfoldedRC); + Register Reg = MRI->createVirtualRegister(UnfoldedRC); SmallVector NewMIs; // If we were able to compute an unfolded reg class, any failure here // is just a programming error so just assert. @@ -1102,7 +1102,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches( // synthetic target in the predecessor. We do this at the bottom of the // predecessor. auto InsertPt = Pred->getFirstTerminator(); - unsigned TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass); + Register TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass); if (MF.getTarget().getCodeModel() == CodeModel::Small && !Subtarget->isPositionIndependent()) { // Directly materialize it into an immediate. @@ -1153,7 +1153,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches( LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n"); } else { // Otherwise compute the address into a register first. - unsigned AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass); + Register AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass); auto AddrI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg) .addReg(/*Base*/ X86::RIP) @@ -1175,7 +1175,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches( // Now cmov over the predicate if the comparison wasn't equal. int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); - unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); + Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC); auto CMovI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg) .addReg(PS->InitialReg) @@ -1878,7 +1878,7 @@ unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS( DebugLoc Loc) { // FIXME: Hard coding this to a 32-bit register class seems weird, but matches // what instruction selection does. - unsigned Reg = MRI->createVirtualRegister(&X86::GR32RegClass); + Register Reg = MRI->createVirtualRegister(&X86::GR32RegClass); // We directly copy the FLAGS register and rely on later lowering to clean // this up into the appropriate setCC instructions. BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS); @@ -1905,7 +1905,7 @@ void X86SpeculativeLoadHardeningPass::restoreEFLAGS( void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc, unsigned PredStateReg) { - unsigned TmpReg = MRI->createVirtualRegister(PS->RC); + Register TmpReg = MRI->createVirtualRegister(PS->RC); // FIXME: This hard codes a shift distance based on the number of bits needed // to stay canonical on 64-bit. We should compute this somehow and support // 32-bit as part of that. @@ -1925,8 +1925,8 @@ void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP( unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc) { - unsigned PredStateReg = MRI->createVirtualRegister(PS->RC); - unsigned TmpReg = MRI->createVirtualRegister(PS->RC); + Register PredStateReg = MRI->createVirtualRegister(PS->RC); + Register TmpReg = MRI->createVirtualRegister(PS->RC); // We know that the stack pointer will have any preserved predicate state in // its high bit. We just want to smear this across the other bits. Turns out, @@ -2031,9 +2031,9 @@ void X86SpeculativeLoadHardeningPass::hardenLoadAddr( } for (MachineOperand *Op : HardenOpRegs) { - unsigned OpReg = Op->getReg(); + Register OpReg = Op->getReg(); auto *OpRC = MRI->getRegClass(OpReg); - unsigned TmpReg = MRI->createVirtualRegister(OpRC); + Register TmpReg = MRI->createVirtualRegister(OpRC); // If this is a vector register, we'll need somewhat custom logic to handle // hardening it. @@ -2045,7 +2045,7 @@ void X86SpeculativeLoadHardeningPass::hardenLoadAddr( // Move our state into a vector register. // FIXME: We could skip this at the cost of longer encodings with AVX-512 // but that doesn't seem likely worth it. - unsigned VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass); + Register VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass); auto MovI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg) .addReg(StateReg); @@ -2054,7 +2054,7 @@ void X86SpeculativeLoadHardeningPass::hardenLoadAddr( LLVM_DEBUG(dbgs() << " Inserting mov: "; MovI->dump(); dbgs() << "\n"); // Broadcast it across the vector register. - unsigned VBStateReg = MRI->createVirtualRegister(OpRC); + Register VBStateReg = MRI->createVirtualRegister(OpRC); auto BroadcastI = BuildMI(MBB, InsertPt, Loc, TII->get(Is128Bit ? X86::VPBROADCASTQrr : X86::VPBROADCASTQYrr), @@ -2084,7 +2084,7 @@ void X86SpeculativeLoadHardeningPass::hardenLoadAddr( assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!"); // Broadcast our state into a vector register. - unsigned VStateReg = MRI->createVirtualRegister(OpRC); + Register VStateReg = MRI->createVirtualRegister(OpRC); unsigned BroadcastOp = Is128Bit ? X86::VPBROADCASTQrZ128r : Is256Bit ? X86::VPBROADCASTQrZ256r : X86::VPBROADCASTQrZr; @@ -2153,7 +2153,7 @@ MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst( // See if we can sink hardening the loaded value. auto SinkCheckToSingleUse = [&](MachineInstr &MI) -> Optional { - unsigned DefReg = MI.getOperand(0).getReg(); + Register DefReg = MI.getOperand(0).getReg(); // We need to find a single use which we can sink the check. We can // primarily do this because many uses may already end up checked on their @@ -2210,8 +2210,8 @@ MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst( // If this register isn't a virtual register we can't walk uses of sanely, // just bail. Also check that its register class is one of the ones we // can harden. - unsigned UseDefReg = UseMI.getOperand(0).getReg(); - if (!TRI->isVirtualRegister(UseDefReg) || + Register UseDefReg = UseMI.getOperand(0).getReg(); + if (!Register::isVirtualRegister(UseDefReg) || !canHardenRegister(UseDefReg)) return {}; @@ -2241,6 +2241,9 @@ bool X86SpeculativeLoadHardeningPass::canHardenRegister(unsigned Reg) { // We don't support post-load hardening of vectors. return false; + unsigned RegIdx = Log2_32(RegBytes); + assert(RegIdx < 4 && "Unsupported register size"); + // If this register class is explicitly constrained to a class that doesn't // require REX prefix, we may not be able to satisfy that constraint when // emitting the hardening instructions, so bail out here. @@ -2251,13 +2254,13 @@ bool X86SpeculativeLoadHardeningPass::canHardenRegister(unsigned Reg) { const TargetRegisterClass *NOREXRegClasses[] = { &X86::GR8_NOREXRegClass, &X86::GR16_NOREXRegClass, &X86::GR32_NOREXRegClass, &X86::GR64_NOREXRegClass}; - if (RC == NOREXRegClasses[Log2_32(RegBytes)]) + if (RC == NOREXRegClasses[RegIdx]) return false; const TargetRegisterClass *GPRRegClasses[] = { &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass, &X86::GR64RegClass}; - return RC->hasSuperClassEq(GPRRegClasses[Log2_32(RegBytes)]); + return RC->hasSuperClassEq(GPRRegClasses[RegIdx]); } /// Harden a value in a register. @@ -2278,7 +2281,7 @@ unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister( unsigned Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc) { assert(canHardenRegister(Reg) && "Cannot harden this register!"); - assert(TRI->isVirtualRegister(Reg) && "Cannot harden a physical register!"); + assert(Register::isVirtualRegister(Reg) && "Cannot harden a physical register!"); auto *RC = MRI->getRegClass(Reg); int Bytes = TRI->getRegSizeInBits(*RC) / 8; @@ -2289,7 +2292,7 @@ unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister( if (Bytes != 8) { unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit}; unsigned SubRegImm = SubRegImms[Log2_32(Bytes)]; - unsigned NarrowStateReg = MRI->createVirtualRegister(RC); + Register NarrowStateReg = MRI->createVirtualRegister(RC); BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), NarrowStateReg) .addReg(StateReg, 0, SubRegImm); StateReg = NarrowStateReg; @@ -2299,7 +2302,7 @@ unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister( if (isEFLAGSLive(MBB, InsertPt, *TRI)) FlagsReg = saveEFLAGS(MBB, InsertPt, Loc); - unsigned NewReg = MRI->createVirtualRegister(RC); + Register NewReg = MRI->createVirtualRegister(RC); unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr}; unsigned OrOpCode = OrOpCodes[Log2_32(Bytes)]; auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), NewReg) @@ -2329,13 +2332,13 @@ unsigned X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) { DebugLoc Loc = MI.getDebugLoc(); auto &DefOp = MI.getOperand(0); - unsigned OldDefReg = DefOp.getReg(); + Register OldDefReg = DefOp.getReg(); auto *DefRC = MRI->getRegClass(OldDefReg); // Because we want to completely replace the uses of this def'ed value with // the hardened value, create a dedicated new register that will only be used // to communicate the unhardened value to the hardening. - unsigned UnhardenedReg = MRI->createVirtualRegister(DefRC); + Register UnhardenedReg = MRI->createVirtualRegister(DefRC); DefOp.setReg(UnhardenedReg); // Now harden this register's value, getting a hardened reg that is safe to @@ -2537,7 +2540,7 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall( .addReg(ExpectedRetAddrReg, RegState::Kill) .addSym(RetSymbol); } else { - unsigned ActualRetAddrReg = MRI->createVirtualRegister(AddrRC); + Register ActualRetAddrReg = MRI->createVirtualRegister(AddrRC); BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg) .addReg(/*Base*/ X86::RIP) .addImm(/*Scale*/ 1) @@ -2554,7 +2557,7 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall( int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); - unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); + Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC); auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg) .addReg(NewStateReg, RegState::Kill) .addReg(PS->PoisonReg) @@ -2611,7 +2614,7 @@ void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr( // For all of these, the target register is the first operand of the // instruction. auto &TargetOp = MI.getOperand(0); - unsigned OldTargetReg = TargetOp.getReg(); + Register OldTargetReg = TargetOp.getReg(); // Try to lookup a hardened version of this register. We retain a reference // here as we want to update the map to track any newly computed hardened diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index d5bb56603df..f8f78da52cc 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -146,6 +146,9 @@ unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV, return X86II::MO_DLLIMPORT; return X86II::MO_COFFSTUB; } + // Some JIT users use *-win32-elf triples; these shouldn't use GOT tables. + if (isOSWindows()) + return X86II::MO_NO_FLAG; if (is64Bit()) { // ELF supports a large, truly PIC code model with non-PC relative GOT @@ -285,10 +288,10 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both // 32 and 64 bit) and for all 64-bit targets. if (StackAlignOverride) - stackAlignment = StackAlignOverride; + stackAlignment = *StackAlignOverride; else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() || isTargetKFreeBSD() || In64BitMode) - stackAlignment = 16; + stackAlignment = Align(16); // Some CPUs have more overhead for gather. The specified overhead is relative // to the Load operation. "2" is the number provided by Intel architects. This @@ -304,6 +307,8 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // Consume the vector width attribute or apply any target specific limit. if (PreferVectorWidthOverride) PreferVectorWidth = PreferVectorWidthOverride; + else if (Prefer128Bit) + PreferVectorWidth = 128; else if (Prefer256Bit) PreferVectorWidth = 256; } @@ -316,12 +321,11 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU, X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const X86TargetMachine &TM, - unsigned StackAlignOverride, + MaybeAlign StackAlignOverride, unsigned PreferVectorWidthOverride, unsigned RequiredVectorWidth) - : X86GenSubtargetInfo(TT, CPU, FS), - PICStyle(PICStyles::None), TM(TM), TargetTriple(TT), - StackAlignOverride(StackAlignOverride), + : X86GenSubtargetInfo(TT, CPU, FS), PICStyle(PICStyles::None), TM(TM), + TargetTriple(TT), StackAlignOverride(StackAlignOverride), PreferVectorWidthOverride(PreferVectorWidthOverride), RequiredVectorWidth(RequiredVectorWidth), In64BitMode(TargetTriple.getArch() == Triple::x86_64), @@ -355,7 +359,7 @@ const CallLowering *X86Subtarget::getCallLowering() const { return CallLoweringInfo.get(); } -const InstructionSelector *X86Subtarget::getInstructionSelector() const { +InstructionSelector *X86Subtarget::getInstructionSelector() const { return InstSelector.get(); } diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 24ccc9cb784..e8efe8f2afe 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -365,8 +365,8 @@ protected: /// Processor has AVX-512 vp2intersect instructions bool HasVP2INTERSECT = false; - /// Processor supports MPX - Memory Protection Extensions - bool HasMPX = false; + /// Deprecated flag for MPX instructions. + bool DeprecatedHasMPX = false; /// Processor supports CET SHSTK - Control-Flow Enforcement Technology /// using Shadow Stack @@ -427,15 +427,21 @@ protected: /// Use software floating point for code generation. bool UseSoftFloat = false; + /// Use alias analysis during code generation. + bool UseAA = false; + /// The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. - unsigned stackAlignment = 4; + Align stackAlignment = Align(4); /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops. /// // FIXME: this is a known good value for Yonah. How about others? unsigned MaxInlineSizeThreshold = 128; + /// Indicates target prefers 128 bit instructions. + bool Prefer128Bit = false; + /// Indicates target prefers 256 bit instructions. bool Prefer256Bit = false; @@ -453,7 +459,7 @@ protected: private: /// Override the stack alignment. - unsigned StackAlignOverride; + MaybeAlign StackAlignOverride; /// Preferred vector width from function attribute. unsigned PreferVectorWidthOverride; @@ -490,7 +496,7 @@ public: /// of the specified triple. /// X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, - const X86TargetMachine &TM, unsigned StackAlignOverride, + const X86TargetMachine &TM, MaybeAlign StackAlignOverride, unsigned PreferVectorWidthOverride, unsigned RequiredVectorWidth); @@ -515,7 +521,7 @@ public: /// Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. - unsigned getStackAlignment() const { return stackAlignment; } + Align getStackAlignment() const { return stackAlignment; } /// Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. @@ -527,7 +533,7 @@ public: /// Methods used by Global ISel const CallLowering *getCallLowering() const override; - const InstructionSelector *getInstructionSelector() const override; + InstructionSelector *getInstructionSelector() const override; const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; @@ -684,7 +690,6 @@ public: bool hasBF16() const { return HasBF16; } bool hasVP2INTERSECT() const { return HasVP2INTERSECT; } bool hasBITALG() const { return HasBITALG; } - bool hasMPX() const { return HasMPX; } bool hasSHSTK() const { return HasSHSTK; } bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; } bool hasCLWB() const { return HasCLWB; } @@ -739,6 +744,7 @@ public: X86ProcFamily == IntelTRM; } bool useSoftFloat() const { return UseSoftFloat; } + bool useAA() const override { return UseAA; } /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for /// no-sse2). There isn't any reason to disable it if the target processor @@ -809,6 +815,7 @@ public: // On Win64, all these conventions just use the default convention. case CallingConv::C: case CallingConv::Fast: + case CallingConv::Tail: case CallingConv::Swift: case CallingConv::X86_FastCall: case CallingConv::X86_StdCall: diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 0cbf13899a2..c15297134e4 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -81,27 +81,28 @@ extern "C" void LLVMInitializeX86Target() { initializeX86SpeculativeLoadHardeningPassPass(PR); initializeX86FlagsCopyLoweringPassPass(PR); initializeX86CondBrFoldingPassPass(PR); + initializeX86OptimizeLEAPassPass(PR); } static std::unique_ptr createTLOF(const Triple &TT) { if (TT.isOSBinFormatMachO()) { if (TT.getArch() == Triple::x86_64) - return llvm::make_unique(); - return llvm::make_unique(); + return std::make_unique(); + return std::make_unique(); } if (TT.isOSFreeBSD()) - return llvm::make_unique(); + return std::make_unique(); if (TT.isOSLinux() || TT.isOSNaCl() || TT.isOSIAMCU()) - return llvm::make_unique(); + return std::make_unique(); if (TT.isOSSolaris()) - return llvm::make_unique(); + return std::make_unique(); if (TT.isOSFuchsia()) - return llvm::make_unique(); + return std::make_unique(); if (TT.isOSBinFormatELF()) - return llvm::make_unique(); + return std::make_unique(); if (TT.isOSBinFormatCOFF()) - return llvm::make_unique(); + return std::make_unique(); llvm_unreachable("unknown subtarget type"); } @@ -116,6 +117,9 @@ static std::string computeDataLayout(const Triple &TT) { !TT.isArch64Bit()) Ret += "-p:32:32"; + // Address spaces for 32 bit signed, 32 bit unsigned, and 64 bit pointers. + Ret += "-p270:32:32-p271:32:32-p272:64:64"; + // Some ABIs align 64 bit integers and doubles to 64 bits, others to 32. if (TT.isArch64Bit() || TT.isOSWindows() || TT.isOSNaCl()) Ret += "-i64:64"; @@ -218,17 +222,9 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT, getEffectiveX86CodeModel(CM, JIT, TT.getArch() == Triple::x86_64), OL), TLOF(createTLOF(getTargetTriple())) { - // Windows stack unwinder gets confused when execution flow "falls through" - // after a call to 'noreturn' function. - // To prevent that, we emit a trap for 'unreachable' IR instructions. - // (which on X86, happens to be the 'ud2' instruction) // On PS4, the "return address" of a 'noreturn' call must still be within // the calling function, and TrapUnreachable is an easy way to get that. - // The check here for 64-bit windows is a bit icky, but as we're unlikely - // to ever want to mix 32 and 64-bit windows code in a single module - // this should be fine. - if ((TT.isOSWindows() && TT.getArch() == Triple::x86_64) || TT.isPS4() || - TT.isOSBinFormatMachO()) { + if (TT.isPS4() || TT.isOSBinFormatMachO()) { this->Options.TrapUnreachable = true; this->Options.NoTrapAfterNoreturn = TT.isOSBinFormatMachO(); } @@ -311,10 +307,10 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique(TargetTriple, CPU, FS, *this, - Options.StackAlignmentOverride, - PreferVectorWidthOverride, - RequiredVectorWidth); + I = std::make_unique( + TargetTriple, CPU, FS, *this, + MaybeAlign(Options.StackAlignmentOverride), PreferVectorWidthOverride, + RequiredVectorWidth); } return I.get(); } @@ -517,12 +513,19 @@ void X86PassConfig::addPreEmitPass() { } void X86PassConfig::addPreEmitPass2() { + const Triple &TT = TM->getTargetTriple(); + const MCAsmInfo *MAI = TM->getMCAsmInfo(); + addPass(createX86RetpolineThunksPass()); + + // Insert extra int3 instructions after trailing call instructions to avoid + // issues in the unwinder. + if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) + addPass(createX86AvoidTrailingCallPass()); + // Verify basic block incoming and outgoing cfa offset and register values and // correct CFA calculation rule where needed by inserting appropriate CFI // instructions. - const Triple &TT = TM->getTargetTriple(); - const MCAsmInfo *MAI = TM->getMCAsmInfo(); if (!TT.isOSDarwin() && (!TT.isOSWindows() || MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI)) diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index b999e2e86af..ec3db7b1e9e 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -16,7 +16,6 @@ #include "X86Subtarget.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringMap.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetMachine.h" #include @@ -26,6 +25,7 @@ namespace llvm { class StringRef; class X86Subtarget; class X86RegisterBankInfo; +class TargetTransformInfo; class X86TargetMachine final : public LLVMTargetMachine { std::unique_ptr TLOF; diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 92e0779c2e7..44185957686 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -47,8 +47,8 @@ MCSymbol *X86_64MachoTargetObjectFile::getCFIPersonalitySymbol( } const MCExpr *X86_64MachoTargetObjectFile::getIndirectSymViaGOTPCRel( - const MCSymbol *Sym, const MCValue &MV, int64_t Offset, - MachineModuleInfo *MMI, MCStreamer &Streamer) const { + const GlobalValue *GV, const MCSymbol *Sym, const MCValue &MV, + int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const { // On Darwin/X86-64, we need to use foo@GOTPCREL+4 to access the got entry // from a data section. In case there's an additional offset, then use // foo@GOTPCREL+4+. diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index 13d7b4ad70d..1fd0bbf56b1 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -30,7 +30,8 @@ namespace llvm { const TargetMachine &TM, MachineModuleInfo *MMI) const override; - const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + const MCExpr *getIndirectSymViaGOTPCRel(const GlobalValue *GV, + const MCSymbol *Sym, const MCValue &MV, int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const override; diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 3dc59aeb263..70fd857fcf0 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -116,7 +116,8 @@ llvm::Optional X86TTIImpl::getCacheAssociativity( llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); } -unsigned X86TTIImpl::getNumberOfRegisters(bool Vector) { +unsigned X86TTIImpl::getNumberOfRegisters(unsigned ClassID) const { + bool Vector = (ClassID == 1); if (Vector && !ST->hasSSE1()) return 0; @@ -887,7 +888,7 @@ int X86TTIImpl::getArithmeticInstrCost( int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { // 64-bit packed float vectors (v2f32) are widened to type v4f32. - // 64-bit packed integer vectors (v2i32) are promoted to type v2i64. + // 64-bit packed integer vectors (v2i32) are widened to type v4i32. std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); // Treat Transpose as 2-op shuffles - there's no difference in lowering. @@ -911,6 +912,39 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, int NumSubElts = SubLT.second.getVectorNumElements(); if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0) return SubLT.first; + // Handle some cases for widening legalization. For now we only handle + // cases where the original subvector was naturally aligned and evenly + // fit in its legalized subvector type. + // FIXME: Remove some of the alignment restrictions. + // FIXME: We can use permq for 64-bit or larger extracts from 256-bit + // vectors. + int OrigSubElts = SubTp->getVectorNumElements(); + if (NumSubElts > OrigSubElts && + (Index % OrigSubElts) == 0 && (NumSubElts % OrigSubElts) == 0 && + LT.second.getVectorElementType() == + SubLT.second.getVectorElementType() && + LT.second.getVectorElementType().getSizeInBits() == + Tp->getVectorElementType()->getPrimitiveSizeInBits()) { + assert(NumElts >= NumSubElts && NumElts > OrigSubElts && + "Unexpected number of elements!"); + Type *VecTy = VectorType::get(Tp->getVectorElementType(), + LT.second.getVectorNumElements()); + Type *SubTy = VectorType::get(Tp->getVectorElementType(), + SubLT.second.getVectorNumElements()); + int ExtractIndex = alignDown((Index % NumElts), NumSubElts); + int ExtractCost = getShuffleCost(TTI::SK_ExtractSubvector, VecTy, + ExtractIndex, SubTy); + + // If the original size is 32-bits or more, we can use pshufd. Otherwise + // if we have SSSE3 we can use pshufb. + if (SubTp->getPrimitiveSizeInBits() >= 32 || ST->hasSSSE3()) + return ExtractCost + 1; // pshufd or pshufb + + assert(SubTp->getPrimitiveSizeInBits() == 16 && + "Unexpected vector size"); + + return ExtractCost + 2; // worst case pshufhw + pshufd + } } } @@ -1314,8 +1348,10 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 }, { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 1 }, { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, @@ -1354,6 +1390,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 5 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 }, + { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 1 }, + { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 1 }, { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 }, { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, @@ -1371,14 +1409,14 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 3 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 3 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 1 }, { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 1 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, @@ -1402,13 +1440,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 7 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 4 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 }, { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 4 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, @@ -1421,7 +1459,10 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 4 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 4 }, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 4 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 11 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 9 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 9 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 11 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 }, @@ -1507,6 +1548,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 }, { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 3 }, { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 }, + { ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1 }, // PSHUFB { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 }, }; @@ -1520,7 +1562,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 2*10 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2*10 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, @@ -1536,6 +1579,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 3 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 6 }, + { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 }, + { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 4 }, { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 }, { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 }, @@ -1562,15 +1607,21 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 5 }, + { ISD::TRUNCATE, MVT::v2i8, MVT::v2i16, 2 }, // PAND+PACKUSWB { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 4 }, { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, + { ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 3 }, // PAND+3*PACKUSWB + { ISD::TRUNCATE, MVT::v2i16, MVT::v2i32, 1 }, { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 3 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 3 }, { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 }, { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7 }, { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 }, { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 10 }, + { ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 4 }, // PAND+3*PACKUSWB + { ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 2 }, // PSHUFD+PSHUFLW + { ISD::TRUNCATE, MVT::v2i32, MVT::v2i64, 1 }, // PSHUFD }; std::pair LTSrc = TLI->getTypeLegalizationCost(DL, Src); @@ -1691,6 +1742,11 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, } } + static const CostTblEntry SLMCostTbl[] = { + // slm pcmpeq/pcmpgt throughput is 2 + { ISD::SETCC, MVT::v2i64, 2 }, + }; + static const CostTblEntry AVX512BWCostTbl[] = { { ISD::SETCC, MVT::v32i16, 1 }, { ISD::SETCC, MVT::v64i8, 1 }, @@ -1777,6 +1833,10 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, { ISD::SELECT, MVT::v4f32, 3 }, // andps + andnps + orps }; + if (ST->isSLM()) + if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy)) + return LT.first * (ExtraCost + Entry->Cost); + if (ST->hasBWI()) if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy)) return LT.first * (ExtraCost + Entry->Cost); @@ -2043,8 +2103,26 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, { ISD::FSQRT, MVT::f32, 28 }, // Pentium III from http://www.agner.org/ { ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/ }; + static const CostTblEntry LZCNT64CostTbl[] = { // 64-bit targets + { ISD::CTLZ, MVT::i64, 1 }, + }; + static const CostTblEntry LZCNT32CostTbl[] = { // 32 or 64-bit targets + { ISD::CTLZ, MVT::i32, 1 }, + { ISD::CTLZ, MVT::i16, 1 }, + { ISD::CTLZ, MVT::i8, 1 }, + }; + static const CostTblEntry POPCNT64CostTbl[] = { // 64-bit targets + { ISD::CTPOP, MVT::i64, 1 }, + }; + static const CostTblEntry POPCNT32CostTbl[] = { // 32 or 64-bit targets + { ISD::CTPOP, MVT::i32, 1 }, + { ISD::CTPOP, MVT::i16, 1 }, + { ISD::CTPOP, MVT::i8, 1 }, + }; static const CostTblEntry X64CostTbl[] = { // 64-bit targets { ISD::BITREVERSE, MVT::i64, 14 }, + { ISD::CTLZ, MVT::i64, 4 }, // BSR+XOR or BSR+XOR+CMOV + { ISD::CTPOP, MVT::i64, 10 }, { ISD::SADDO, MVT::i64, 1 }, { ISD::UADDO, MVT::i64, 1 }, }; @@ -2052,6 +2130,12 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, { ISD::BITREVERSE, MVT::i32, 14 }, { ISD::BITREVERSE, MVT::i16, 14 }, { ISD::BITREVERSE, MVT::i8, 11 }, + { ISD::CTLZ, MVT::i32, 4 }, // BSR+XOR or BSR+XOR+CMOV + { ISD::CTLZ, MVT::i16, 4 }, // BSR+XOR or BSR+XOR+CMOV + { ISD::CTLZ, MVT::i8, 4 }, // BSR+XOR or BSR+XOR+CMOV + { ISD::CTPOP, MVT::i32, 8 }, + { ISD::CTPOP, MVT::i16, 9 }, + { ISD::CTPOP, MVT::i8, 7 }, { ISD::SADDO, MVT::i32, 1 }, { ISD::SADDO, MVT::i16, 1 }, { ISD::SADDO, MVT::i8, 1 }, @@ -2163,6 +2247,26 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) return LT.first * Entry->Cost; + if (ST->hasLZCNT()) { + if (ST->is64Bit()) + if (const auto *Entry = CostTableLookup(LZCNT64CostTbl, ISD, MTy)) + return LT.first * Entry->Cost; + + if (const auto *Entry = CostTableLookup(LZCNT32CostTbl, ISD, MTy)) + return LT.first * Entry->Cost; + } + + if (ST->hasPOPCNT()) { + if (ST->is64Bit()) + if (const auto *Entry = CostTableLookup(POPCNT64CostTbl, ISD, MTy)) + return LT.first * Entry->Cost; + + if (const auto *Entry = CostTableLookup(POPCNT32CostTbl, ISD, MTy)) + return LT.first * Entry->Cost; + } + + // TODO - add BMI (TZCNT) scalar handling + if (ST->is64Bit()) if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy)) return LT.first * Entry->Cost; @@ -2357,8 +2461,9 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, unsigned NumElem = SrcVTy->getVectorNumElements(); VectorType *MaskTy = VectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem); - if ((IsLoad && !isLegalMaskedLoad(SrcVTy)) || - (IsStore && !isLegalMaskedStore(SrcVTy)) || !isPowerOf2_32(NumElem)) { + if ((IsLoad && !isLegalMaskedLoad(SrcVTy, MaybeAlign(Alignment))) || + (IsStore && !isLegalMaskedStore(SrcVTy, MaybeAlign(Alignment))) || + !isPowerOf2_32(NumElem)) { // Scalarization int MaskSplitCost = getScalarizationOverhead(MaskTy, false, true); int ScalarCompareCost = getCmpSelInstrCost( @@ -2425,70 +2530,107 @@ int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy, bool IsPairwise) { + // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput + // and make it as the cost. - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + static const CostTblEntry SSE2CostTblPairWise[] = { + { ISD::FADD, MVT::v2f64, 2 }, + { ISD::FADD, MVT::v4f32, 4 }, + { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6". + { ISD::ADD, MVT::v2i32, 2 }, // FIXME: chosen to be less than v4i32. + { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5". + { ISD::ADD, MVT::v2i16, 3 }, // FIXME: chosen to be less than v4i16 + { ISD::ADD, MVT::v4i16, 4 }, // FIXME: chosen to be less than v8i16 + { ISD::ADD, MVT::v8i16, 5 }, + { ISD::ADD, MVT::v2i8, 2 }, + { ISD::ADD, MVT::v4i8, 2 }, + { ISD::ADD, MVT::v8i8, 2 }, + { ISD::ADD, MVT::v16i8, 3 }, + }; - MVT MTy = LT.second; + static const CostTblEntry AVX1CostTblPairWise[] = { + { ISD::FADD, MVT::v4f64, 5 }, + { ISD::FADD, MVT::v8f32, 7 }, + { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5". + { ISD::ADD, MVT::v4i64, 5 }, // The data reported by the IACA tool is "4.8". + { ISD::ADD, MVT::v8i32, 5 }, + { ISD::ADD, MVT::v16i16, 6 }, + { ISD::ADD, MVT::v32i8, 4 }, + }; + + static const CostTblEntry SSE2CostTblNoPairWise[] = { + { ISD::FADD, MVT::v2f64, 2 }, + { ISD::FADD, MVT::v4f32, 4 }, + { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6". + { ISD::ADD, MVT::v2i32, 2 }, // FIXME: chosen to be less than v4i32 + { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.3". + { ISD::ADD, MVT::v2i16, 2 }, // The data reported by the IACA tool is "4.3". + { ISD::ADD, MVT::v4i16, 3 }, // The data reported by the IACA tool is "4.3". + { ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3". + { ISD::ADD, MVT::v2i8, 2 }, + { ISD::ADD, MVT::v4i8, 2 }, + { ISD::ADD, MVT::v8i8, 2 }, + { ISD::ADD, MVT::v16i8, 3 }, + }; + + static const CostTblEntry AVX1CostTblNoPairWise[] = { + { ISD::FADD, MVT::v4f64, 3 }, + { ISD::FADD, MVT::v4f32, 3 }, + { ISD::FADD, MVT::v8f32, 4 }, + { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5". + { ISD::ADD, MVT::v4i64, 3 }, + { ISD::ADD, MVT::v8i32, 5 }, + { ISD::ADD, MVT::v16i16, 5 }, + { ISD::ADD, MVT::v32i8, 4 }, + }; int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput - // and make it as the cost. + // Before legalizing the type, give a chance to look up illegal narrow types + // in the table. + // FIXME: Is there a better way to do this? + EVT VT = TLI->getValueType(DL, ValTy); + if (VT.isSimple()) { + MVT MTy = VT.getSimpleVT(); + if (IsPairwise) { + if (ST->hasAVX()) + if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy)) + return Entry->Cost; - static const CostTblEntry SSE42CostTblPairWise[] = { - { ISD::FADD, MVT::v2f64, 2 }, - { ISD::FADD, MVT::v4f32, 4 }, - { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6". - { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5". - { ISD::ADD, MVT::v8i16, 5 }, - }; + if (ST->hasSSE2()) + if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy)) + return Entry->Cost; + } else { + if (ST->hasAVX()) + if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy)) + return Entry->Cost; - static const CostTblEntry AVX1CostTblPairWise[] = { - { ISD::FADD, MVT::v4f32, 4 }, - { ISD::FADD, MVT::v4f64, 5 }, - { ISD::FADD, MVT::v8f32, 7 }, - { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5". - { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5". - { ISD::ADD, MVT::v4i64, 5 }, // The data reported by the IACA tool is "4.8". - { ISD::ADD, MVT::v8i16, 5 }, - { ISD::ADD, MVT::v8i32, 5 }, - }; + if (ST->hasSSE2()) + if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy)) + return Entry->Cost; + } + } - static const CostTblEntry SSE42CostTblNoPairWise[] = { - { ISD::FADD, MVT::v2f64, 2 }, - { ISD::FADD, MVT::v4f32, 4 }, - { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6". - { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.3". - { ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3". - }; + std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); - static const CostTblEntry AVX1CostTblNoPairWise[] = { - { ISD::FADD, MVT::v4f32, 3 }, - { ISD::FADD, MVT::v4f64, 3 }, - { ISD::FADD, MVT::v8f32, 4 }, - { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5". - { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "2.8". - { ISD::ADD, MVT::v4i64, 3 }, - { ISD::ADD, MVT::v8i16, 4 }, - { ISD::ADD, MVT::v8i32, 5 }, - }; + MVT MTy = LT.second; if (IsPairwise) { if (ST->hasAVX()) if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy)) return LT.first * Entry->Cost; - if (ST->hasSSE42()) - if (const auto *Entry = CostTableLookup(SSE42CostTblPairWise, ISD, MTy)) + if (ST->hasSSE2()) + if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy)) return LT.first * Entry->Cost; } else { if (ST->hasAVX()) if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy)) return LT.first * Entry->Cost; - if (ST->hasSSE42()) - if (const auto *Entry = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy)) + if (ST->hasSSE2()) + if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy)) return LT.first * Entry->Cost; } @@ -3116,7 +3258,7 @@ bool X86TTIImpl::canMacroFuseCmp() { return ST->hasMacroFusion() || ST->hasBranchFusion(); } -bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) { +bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment) { if (!ST->hasAVX()) return false; @@ -3139,11 +3281,11 @@ bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) { ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI()); } -bool X86TTIImpl::isLegalMaskedStore(Type *DataType) { - return isLegalMaskedLoad(DataType); +bool X86TTIImpl::isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { + return isLegalMaskedLoad(DataType, Alignment); } -bool X86TTIImpl::isLegalNTLoad(Type *DataType, unsigned Alignment) { +bool X86TTIImpl::isLegalNTLoad(Type *DataType, Align Alignment) { unsigned DataSize = DL.getTypeStoreSize(DataType); // The only supported nontemporal loads are for aligned vectors of 16 or 32 // bytes. Note that 32-byte nontemporal vector loads are supported by AVX2 @@ -3154,7 +3296,7 @@ bool X86TTIImpl::isLegalNTLoad(Type *DataType, unsigned Alignment) { return false; } -bool X86TTIImpl::isLegalNTStore(Type *DataType, unsigned Alignment) { +bool X86TTIImpl::isLegalNTStore(Type *DataType, Align Alignment) { unsigned DataSize = DL.getTypeStoreSize(DataType); // SSE4A supports nontemporal stores of float and double at arbitrary @@ -3299,9 +3441,8 @@ X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { if (IsZeroCmp) { // Only enable vector loads for equality comparison. Right now the vector // version is not as fast for three way compare (see #33329). - // TODO: enable AVX512 when the DAG is ready. - // if (ST->hasAVX512()) Options.LoadSizes.push_back(64); const unsigned PreferredWidth = ST->getPreferVectorWidth(); + if (PreferredWidth >= 512 && ST->hasAVX512()) Options.LoadSizes.push_back(64); if (PreferredWidth >= 256 && ST->hasAVX2()) Options.LoadSizes.push_back(32); if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16); // All GPR and vector loads can be unaligned. SIMD compare requires integer diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h index 25d9c33eb16..7581257f41f 100644 --- a/lib/Target/X86/X86TargetTransformInfo.h +++ b/lib/Target/X86/X86TargetTransformInfo.h @@ -83,6 +83,7 @@ class X86TTIImpl : public BasicTTIImplBase { X86::FeatureSlowUAMem32, // Based on whether user set the -mprefer-vector-width command line. + X86::FeaturePrefer128Bit, X86::FeaturePrefer256Bit, // CPU name enums. These just follow CPU string. @@ -115,7 +116,7 @@ public: /// \name Vector TTI Implementations /// @{ - unsigned getNumberOfRegisters(bool Vector); + unsigned getNumberOfRegisters(unsigned ClassID) const; unsigned getRegisterBitWidth(bool Vector) const; unsigned getLoadStoreVecRegBitWidth(unsigned AS) const; unsigned getMaxInterleaveFactor(unsigned VF); @@ -184,10 +185,10 @@ public: bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2); bool canMacroFuseCmp(); - bool isLegalMaskedLoad(Type *DataType); - bool isLegalMaskedStore(Type *DataType); - bool isLegalNTLoad(Type *DataType, unsigned Alignment); - bool isLegalNTStore(Type *DataType, unsigned Alignment); + bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment); + bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment); + bool isLegalNTLoad(Type *DataType, Align Alignment); + bool isLegalNTStore(Type *DataType, Align Alignment); bool isLegalMaskedGather(Type *DataType); bool isLegalMaskedScatter(Type *DataType); bool isLegalMaskedExpandLoad(Type *DataType); diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index a07d2f20aca..9280d030b5d 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -292,8 +292,7 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { // need to insert any VZEROUPPER instructions. This is constant-time, so it // is cheap in the common case of no ymm/zmm use. bool YmmOrZmmUsed = FnHasLiveInYmmOrZmm; - const TargetRegisterClass *RCs[2] = {&X86::VR256RegClass, &X86::VR512RegClass}; - for (auto *RC : RCs) { + for (auto *RC : {&X86::VR256RegClass, &X86::VR512_0_15RegClass}) { if (!YmmOrZmmUsed) { for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e; i++) { @@ -304,9 +303,8 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { } } } - if (!YmmOrZmmUsed) { + if (!YmmOrZmmUsed) return false; - } assert(BlockStates.empty() && DirtySuccessors.empty() && "X86VZeroUpper state should be clear"); diff --git a/lib/Target/X86/X86WinAllocaExpander.cpp b/lib/Target/X86/X86WinAllocaExpander.cpp index 9e499db1d7e..ae72c642758 100644 --- a/lib/Target/X86/X86WinAllocaExpander.cpp +++ b/lib/Target/X86/X86WinAllocaExpander.cpp @@ -81,7 +81,7 @@ static int64_t getWinAllocaAmount(MachineInstr *MI, MachineRegisterInfo *MRI) { MI->getOpcode() == X86::WIN_ALLOCA_64); assert(MI->getOperand(0).isReg()); - unsigned AmountReg = MI->getOperand(0).getReg(); + Register AmountReg = MI->getOperand(0).getReg(); MachineInstr *Def = MRI->getUniqueVRegDef(AmountReg); if (!Def || @@ -261,7 +261,7 @@ void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) { break; } - unsigned AmountReg = MI->getOperand(0).getReg(); + Register AmountReg = MI->getOperand(0).getReg(); MI->eraseFromParent(); // Delete the definition of AmountReg. diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp index f68d17d7256..d65e1f3ab41 100644 --- a/lib/Target/X86/X86WinEHState.cpp +++ b/lib/Target/X86/X86WinEHState.cpp @@ -339,7 +339,10 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) { if (UseStackGuard) { Value *Val = Builder.CreateLoad(Int32Ty, Cookie); Value *FrameAddr = Builder.CreateCall( - Intrinsic::getDeclaration(TheModule, Intrinsic::frameaddress), + Intrinsic::getDeclaration( + TheModule, Intrinsic::frameaddress, + Builder.getInt8PtrTy( + TheModule->getDataLayout().getAllocaAddrSpace())), Builder.getInt32(0), "frameaddr"); Value *FrameAddrI32 = Builder.CreatePtrToInt(FrameAddr, Int32Ty); FrameAddrI32 = Builder.CreateXor(FrameAddrI32, Val); diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp index 9f615b9e774..6b3dc27cb88 100644 --- a/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -115,7 +115,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { MCSymbol *GVSym = getSymbol(GV); const Constant *C = GV->getInitializer(); - unsigned Align = (unsigned)DL.getPreferredTypeAlignmentShift(C->getType()); + const Align Alignment(DL.getPrefTypeAlignment(C->getType())); // Mark the start of the global getTargetStreamer().emitCCTopData(GVSym->getName()); @@ -143,7 +143,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { llvm_unreachable("Unknown linkage type!"); } - EmitAlignment(Align > 2 ? Align : 2, GV); + EmitAlignment(std::max(Alignment, Align(4)), GV); if (GV->isThreadLocal()) { report_fatal_error("TLS is not supported by this target!"); diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 5066407c74a..fd8b37e26e4 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -211,7 +211,7 @@ static void RestoreSpillList(MachineBasicBlock &MBB, //===----------------------------------------------------------------------===// XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0) { + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(4), 0) { // Do nothing } @@ -367,8 +367,8 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF, RestoreSpillList(MBB, MBBI, dl, TII, RemainingAdj, SpillList); // Return to the landing pad. - unsigned EhStackReg = MBBI->getOperand(0).getReg(); - unsigned EhHandlerReg = MBBI->getOperand(1).getReg(); + Register EhStackReg = MBBI->getOperand(0).getReg(); + Register EhHandlerReg = MBBI->getOperand(1).getReg(); BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r)).addReg(EhStackReg); BuildMI(MBB, MBBI, dl, TII.get(XCore::BAU_1r)).addReg(EhHandlerReg); MBB.erase(MBBI); // Erase the previous return instruction. diff --git a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp index e433d21c59b..b5dbdea98ee 100644 --- a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp +++ b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp @@ -55,7 +55,7 @@ bool XCoreFTAOElim::runOnMachineFunction(MachineFunction &MF) { MBBI != EE; ++MBBI) { if (MBBI->getOpcode() == XCore::FRAME_TO_ARGS_OFFSET) { MachineInstr &OldInst = *MBBI; - unsigned Reg = OldInst.getOperand(0).getReg(); + Register Reg = OldInst.getOperand(0).getReg(); MBBI = TII.loadImmediate(MBB, MBBI, Reg, StackSize); OldInst.eraseFromParent(); } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 072278d9fc4..bf006fd673f 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -171,8 +171,8 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); - setMinFunctionAlignment(1); - setPrefFunctionAlignment(2); + setMinFunctionAlignment(Align(2)); + setPrefFunctionAlignment(Align(4)); } bool XCoreTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { @@ -414,8 +414,8 @@ SDValue XCoreTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { "Unexpected extension type"); assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT"); - if (allowsMemoryAccess(Context, DAG.getDataLayout(), LD->getMemoryVT(), - *LD->getMemOperand())) + if (allowsMemoryAccessForAlignment(Context, DAG.getDataLayout(), + LD->getMemoryVT(), *LD->getMemOperand())) return SDValue(); SDValue Chain = LD->getChain(); @@ -488,8 +488,8 @@ SDValue XCoreTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { assert(!ST->isTruncatingStore() && "Unexpected store type"); assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT"); - if (allowsMemoryAccess(Context, DAG.getDataLayout(), ST->getMemoryVT(), - *ST->getMemOperand())) + if (allowsMemoryAccessForAlignment(Context, DAG.getDataLayout(), + ST->getMemoryVT(), *ST->getMemOperand())) return SDValue(); SDValue Chain = ST->getChain(); @@ -1309,7 +1309,7 @@ SDValue XCoreTargetLowering::LowerCCCArguments( llvm_unreachable(nullptr); } case MVT::i32: - unsigned VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass); + Register VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); ArgIn = DAG.getCopyFromReg(Chain, dl, VReg, RegVT); CFRegNode.push_back(ArgIn.getValue(ArgIn->getNumValues() - 1)); @@ -1360,7 +1360,7 @@ SDValue XCoreTargetLowering::LowerCCCArguments( offset -= StackSlotSize; SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); // Move argument from phys reg -> virt reg - unsigned VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass); + Register VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass); RegInfo.addLiveIn(ArgRegs[i], VReg); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); CFRegNode.push_back(Val.getValue(Val->getNumValues() - 1)); @@ -1780,8 +1780,9 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // Replace unaligned store of unaligned load with memmove. StoreSDNode *ST = cast(N); if (!DCI.isBeforeLegalize() || - allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), - ST->getMemoryVT(), *ST->getMemOperand()) || + allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + ST->getMemoryVT(), + *ST->getMemOperand()) || ST->isVolatile() || ST->isIndexed()) { break; } diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 3752274e2cd..86ec7f82d4d 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -301,7 +301,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, << "<--------->\n"); Offset/=4; - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); assert(XCore::GRRegsRegClass.contains(Reg) && "Unexpected register operand"); if (TFI->hasFP(MF)) { diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 2a8cd6b657b..b5b7445265b 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -53,7 +53,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const Triple &TT, T, "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32", TT, CPU, FS, Options, getEffectiveRelocModel(RM), getEffectiveXCoreCodeModel(CM), OL), - TLOF(llvm::make_unique()), + TLOF(std::make_unique()), Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.h b/lib/Target/XCore/XCoreTargetTransformInfo.h index 3fecaaa5972..58df1f290ec 100644 --- a/lib/Target/XCore/XCoreTargetTransformInfo.h +++ b/lib/Target/XCore/XCoreTargetTransformInfo.h @@ -40,7 +40,8 @@ public: : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {} - unsigned getNumberOfRegisters(bool Vector) { + unsigned getNumberOfRegisters(unsigned ClassID) const { + bool Vector = (ClassID == 1); if (Vector) { return 0; } diff --git a/lib/TextAPI/MachO/Architecture.cpp b/lib/TextAPI/MachO/Architecture.cpp index a66a982fa15..699fb5f4587 100644 --- a/lib/TextAPI/MachO/Architecture.cpp +++ b/lib/TextAPI/MachO/Architecture.cpp @@ -68,6 +68,10 @@ std::pair getCPUTypeFromArchitecture(Architecture Arch) { return std::make_pair(0, 0); } +Architecture mapToArchitecture(const Triple &Target) { + return getArchitectureFromName(Target.getArchName()); +} + raw_ostream &operator<<(raw_ostream &OS, Architecture Arch) { OS << getArchitectureName(Arch); return OS; diff --git a/lib/TextAPI/MachO/InterfaceFile.cpp b/lib/TextAPI/MachO/InterfaceFile.cpp index 54ba8cc3126..c40a952a6a8 100644 --- a/lib/TextAPI/MachO/InterfaceFile.cpp +++ b/lib/TextAPI/MachO/InterfaceFile.cpp @@ -27,36 +27,65 @@ typename C::iterator addEntry(C &Container, StringRef InstallName) { return Container.emplace(I, InstallName); } + +template +typename C::iterator addEntry(C &Container, const Target &Target_) { + auto Iter = + lower_bound(Container, Target_, [](const Target &LHS, const Target &RHS) { + return LHS < RHS; + }); + if ((Iter != std::end(Container)) && !(Target_ < *Iter)) + return Iter; + + return Container.insert(Iter, Target_); +} } // end namespace detail. -void InterfaceFile::addAllowableClient(StringRef Name, - ArchitectureSet Architectures) { - auto Client = detail::addEntry(AllowableClients, Name); - Client->addArchitectures(Architectures); +void InterfaceFileRef::addTarget(const Target &Target) { + detail::addEntry(Targets, Target); +} + +void InterfaceFile::addAllowableClient(StringRef InstallName, + const Target &Target) { + auto Client = detail::addEntry(AllowableClients, InstallName); + Client->addTarget(Target); } void InterfaceFile::addReexportedLibrary(StringRef InstallName, - ArchitectureSet Architectures) { + const Target &Target) { auto Lib = detail::addEntry(ReexportedLibraries, InstallName); - Lib->addArchitectures(Architectures); + Lib->addTarget(Target); } -void InterfaceFile::addUUID(Architecture Arch, StringRef UUID) { - auto I = partition_point(UUIDs, - [=](const std::pair &O) { - return O.first < Arch; - }); +void InterfaceFile::addParentUmbrella(const Target &Target_, StringRef Parent) { + auto Iter = lower_bound(ParentUmbrellas, Target_, + [](const std::pair &LHS, + Target RHS) { return LHS.first < RHS; }); - if (I != UUIDs.end() && Arch == I->first) { - I->second = UUID; + if ((Iter != ParentUmbrellas.end()) && !(Target_ < Iter->first)) { + Iter->second = Parent; return; } - UUIDs.emplace(I, Arch, UUID); + ParentUmbrellas.emplace(Iter, Target_, Parent); return; } -void InterfaceFile::addUUID(Architecture Arch, uint8_t UUID[16]) { +void InterfaceFile::addUUID(const Target &Target_, StringRef UUID) { + auto Iter = lower_bound(UUIDs, Target_, + [](const std::pair &LHS, + Target RHS) { return LHS.first < RHS; }); + + if ((Iter != UUIDs.end()) && !(Target_ < Iter->first)) { + Iter->second = UUID; + return; + } + + UUIDs.emplace(Iter, Target_, UUID); + return; +} + +void InterfaceFile::addUUID(const Target &Target, uint8_t UUID[16]) { std::stringstream Stream; for (unsigned i = 0; i < 16; ++i) { if (i == 4 || i == 6 || i == 8 || i == 10) @@ -64,17 +93,30 @@ void InterfaceFile::addUUID(Architecture Arch, uint8_t UUID[16]) { Stream << std::setfill('0') << std::setw(2) << std::uppercase << std::hex << static_cast(UUID[i]); } - addUUID(Arch, Stream.str()); + addUUID(Target, Stream.str()); +} + +void InterfaceFile::addTarget(const Target &Target) { + detail::addEntry(Targets, Target); +} + +InterfaceFile::const_filtered_target_range +InterfaceFile::targets(ArchitectureSet Archs) const { + std::function fn = [Archs](const Target &Target_) { + return Archs.has(Target_.Arch); + }; + return make_filter_range(Targets, fn); } void InterfaceFile::addSymbol(SymbolKind Kind, StringRef Name, - ArchitectureSet Archs, SymbolFlags Flags) { + const TargetList &Targets, SymbolFlags Flags) { Name = copyString(Name); auto result = Symbols.try_emplace(SymbolsMapKey{Kind, Name}, nullptr); if (result.second) - result.first->second = new (Allocator) Symbol{Kind, Name, Archs, Flags}; + result.first->second = new (Allocator) Symbol{Kind, Name, Targets, Flags}; else - result.first->second->addArchitectures(Archs); + for (const auto &Target : Targets) + result.first->second->addTarget(Target); } } // end namespace MachO. diff --git a/lib/TextAPI/MachO/Platform.cpp b/lib/TextAPI/MachO/Platform.cpp new file mode 100644 index 00000000000..588ec9a4d83 --- /dev/null +++ b/lib/TextAPI/MachO/Platform.cpp @@ -0,0 +1,91 @@ +//===- llvm/TextAPI/MachO/Platform.cpp - Platform ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementations of Platform Helper functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/TextAPI/MachO/Platform.h" + +namespace llvm { +namespace MachO { + +PlatformKind mapToPlatformKind(PlatformKind Platform, bool WantSim) { + switch (Platform) { + default: + return Platform; + case PlatformKind::iOS: + return WantSim ? PlatformKind::iOSSimulator : PlatformKind::iOS; + case PlatformKind::tvOS: + return WantSim ? PlatformKind::tvOSSimulator : PlatformKind::tvOS; + case PlatformKind::watchOS: + return WantSim ? PlatformKind::watchOSSimulator : PlatformKind::watchOS; + } + llvm_unreachable("Unknown llvm.MachO.PlatformKind enum"); +} + +PlatformKind mapToPlatformKind(const Triple &Target) { + switch (Target.getOS()) { + default: + return PlatformKind::unknown; + case Triple::MacOSX: + return PlatformKind::macOS; + case Triple::IOS: + if (Target.isSimulatorEnvironment()) + return PlatformKind::iOSSimulator; + if (Target.getEnvironment() == Triple::MacABI) + return PlatformKind::macCatalyst; + return PlatformKind::iOS; + case Triple::TvOS: + return Target.isSimulatorEnvironment() ? PlatformKind::tvOSSimulator + : PlatformKind::tvOS; + case Triple::WatchOS: + return Target.isSimulatorEnvironment() ? PlatformKind::watchOSSimulator + : PlatformKind::watchOS; + // TODO: add bridgeOS once in llvm::Triple + } + llvm_unreachable("Unknown Target Triple"); +} + +PlatformSet mapToPlatformSet(ArrayRef Targets) { + PlatformSet Result; + for (const auto &Target : Targets) + Result.insert(mapToPlatformKind(Target)); + return Result; +} + +StringRef getPlatformName(PlatformKind Platform) { + switch (Platform) { + case PlatformKind::unknown: + return "unknown"; + case PlatformKind::macOS: + return "macOS"; + case PlatformKind::iOS: + return "iOS"; + case PlatformKind::tvOS: + return "tvOS"; + case PlatformKind::watchOS: + return "watchOS"; + case PlatformKind::bridgeOS: + return "bridgeOS"; + case PlatformKind::macCatalyst: + return "macCatalyst"; + case PlatformKind::iOSSimulator: + return "iOS Simulator"; + case PlatformKind::tvOSSimulator: + return "tvOS Simulator"; + case PlatformKind::watchOSSimulator: + return "watchOS Simulator"; + } + llvm_unreachable("Unknown llvm.MachO.PlatformKind enum"); +} + +} // end namespace MachO. +} // end namespace llvm. diff --git a/lib/TextAPI/MachO/Symbol.cpp b/lib/TextAPI/MachO/Symbol.cpp index 731b264f608..9f2d8172bee 100644 --- a/lib/TextAPI/MachO/Symbol.cpp +++ b/lib/TextAPI/MachO/Symbol.cpp @@ -45,5 +45,14 @@ LLVM_DUMP_METHOD void Symbol::dump(raw_ostream &OS) const { } #endif +Symbol::const_filtered_target_range +Symbol::targets(ArchitectureSet Architectures) const { + std::function FN = + [Architectures](const Target &Target) { + return Architectures.has(Target.Arch); + }; + return make_filter_range(Targets, FN); +} + } // end namespace MachO. } // end namespace llvm. diff --git a/lib/TextAPI/MachO/Target.cpp b/lib/TextAPI/MachO/Target.cpp new file mode 100644 index 00000000000..aee8ef42142 --- /dev/null +++ b/lib/TextAPI/MachO/Target.cpp @@ -0,0 +1,75 @@ +//===- tapi/Core/Target.cpp - Target ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TextAPI/MachO/Target.h" + +namespace llvm { +namespace MachO { + +Expected Target::create(StringRef TargetValue) { + auto Result = TargetValue.split('-'); + auto ArchitectureStr = Result.first; + auto Architecture = getArchitectureFromName(ArchitectureStr); + auto PlatformStr = Result.second; + PlatformKind Platform; + Platform = StringSwitch(PlatformStr) + .Case("macos", PlatformKind::macOS) + .Case("ios", PlatformKind::iOS) + .Case("tvos", PlatformKind::tvOS) + .Case("watchos", PlatformKind::watchOS) + .Case("bridgeos", PlatformKind::bridgeOS) + .Case("maccatalyst", PlatformKind::macCatalyst) + .Case("ios-simulator", PlatformKind::iOSSimulator) + .Case("tvos-simulator", PlatformKind::tvOSSimulator) + .Case("watchos-simulator", PlatformKind::watchOSSimulator) + .Default(PlatformKind::unknown); + + if (Platform == PlatformKind::unknown) { + if (PlatformStr.startswith("<") && PlatformStr.endswith(">")) { + PlatformStr = PlatformStr.drop_front().drop_back(); + unsigned long long RawValue; + if (!PlatformStr.getAsInteger(10, RawValue)) + Platform = (PlatformKind)RawValue; + } + } + + return Target{Architecture, Platform}; +} + +Target::operator std::string() const { + return (getArchitectureName(Arch) + " (" + getPlatformName(Platform) + ")") + .str(); +} + +raw_ostream &operator<<(raw_ostream &OS, const Target &Target) { + OS << std::string(Target); + return OS; +} + +PlatformSet mapToPlatformSet(ArrayRef Targets) { + PlatformSet Result; + for (const auto &Target : Targets) + Result.insert(Target.Platform); + return Result; +} + +ArchitectureSet mapToArchitectureSet(ArrayRef Targets) { + ArchitectureSet Result; + for (const auto &Target : Targets) + Result.set(Target.Arch); + return Result; +} + +} // end namespace MachO. +} // end namespace llvm. diff --git a/lib/TextAPI/MachO/TextStub.cpp b/lib/TextAPI/MachO/TextStub.cpp index 799ebdc883a..0584e43d589 100644 --- a/lib/TextAPI/MachO/TextStub.cpp +++ b/lib/TextAPI/MachO/TextStub.cpp @@ -147,6 +147,58 @@ Each undefineds section is defined as following: objc-ivars: [] # Optional: List of Objective C Instance Variables weak-ref-symbols: [] # Optional: List of weak defined symbols */ + +/* + + YAML Format specification. + +--- !tapi-tbd +tbd-version: 4 # The tbd version for format +targets: [ armv7-ios, x86_64-maccatalyst ] # The list of applicable tapi supported target triples +uuids: # Optional: List of target and UUID pairs. + - target: armv7-ios + value: ... + - target: x86_64-maccatalyst + value: ... +flags: [] # Optional: +install-name: /u/l/libfoo.dylib # +current-version: 1.2.3 # Optional: defaults to 1.0 +compatibility-version: 1.0 # Optional: defaults to 1.0 +swift-abi-version: 0 # Optional: defaults to 0 +parent-umbrella: # Optional: +allowable-clients: + - targets: [ armv7-ios ] # Optional: + clients: [ clientA ] +exports: # List of export sections +... +re-exports: # List of reexport sections +... +undefineds: # List of undefineds sections +... + +Each export and reexport section is defined as following: + +- targets: [ arm64-macos ] # The list of target triples associated with symbols + symbols: [ _symA ] # Optional: List of symbols + objc-classes: [] # Optional: List of Objective-C classes + objc-eh-types: [] # Optional: List of Objective-C classes + # with EH + objc-ivars: [] # Optional: List of Objective C Instance + # Variables + weak-symbols: [] # Optional: List of weak defined symbols + thread-local-symbols: [] # Optional: List of thread local symbols +- targets: [ arm64-macos, x86_64-maccatalyst ] # Optional: Targets for applicable additional symbols + symbols: [ _symB ] # Optional: List of symbols + +Each undefineds section is defined as following: +- targets: [ arm64-macos ] # The list of target triples associated with symbols + symbols: [ _symC ] # Optional: List of symbols + objc-classes: [] # Optional: List of Objective-C classes + objc-eh-types: [] # Optional: List of Objective-C classes + # with EH + objc-ivars: [] # Optional: List of Objective C Instance Variables + weak-symbols: [] # Optional: List of weak defined symbols +*/ // clang-format on using namespace llvm; @@ -175,6 +227,38 @@ struct UndefinedSection { std::vector WeakRefSymbols; }; +// Sections for direct target mapping in TBDv4 +struct SymbolSection { + TargetList Targets; + std::vector Symbols; + std::vector Classes; + std::vector ClassEHs; + std::vector Ivars; + std::vector WeakSymbols; + std::vector TlvSymbols; +}; + +struct MetadataSection { + enum Option { Clients, Libraries }; + std::vector Targets; + std::vector Values; +}; + +struct UmbrellaSection { + std::vector Targets; + std::string Umbrella; +}; + +// UUID's for TBDv4 are mapped to target not arch +struct UUIDv4 { + Target TargetID; + std::string Value; + + UUIDv4() = default; + UUIDv4(const Target &TargetID, const std::string &Value) + : TargetID(TargetID), Value(Value) {} +}; + // clang-format off enum TBDFlags : unsigned { None = 0U, @@ -189,6 +273,12 @@ enum TBDFlags : unsigned { LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(Architecture) LLVM_YAML_IS_SEQUENCE_VECTOR(ExportSection) LLVM_YAML_IS_SEQUENCE_VECTOR(UndefinedSection) +// Specific to TBDv4 +LLVM_YAML_IS_SEQUENCE_VECTOR(SymbolSection) +LLVM_YAML_IS_SEQUENCE_VECTOR(MetadataSection) +LLVM_YAML_IS_SEQUENCE_VECTOR(UmbrellaSection) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(Target) +LLVM_YAML_IS_SEQUENCE_VECTOR(UUIDv4) namespace llvm { namespace yaml { @@ -231,6 +321,49 @@ template <> struct MappingTraits { } }; +template <> struct MappingTraits { + static void mapping(IO &IO, SymbolSection &Section) { + IO.mapRequired("targets", Section.Targets); + IO.mapOptional("symbols", Section.Symbols); + IO.mapOptional("objc-classes", Section.Classes); + IO.mapOptional("objc-eh-types", Section.ClassEHs); + IO.mapOptional("objc-ivars", Section.Ivars); + IO.mapOptional("weak-symbols", Section.WeakSymbols); + IO.mapOptional("thread-local-symbols", Section.TlvSymbols); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, UmbrellaSection &Section) { + IO.mapRequired("targets", Section.Targets); + IO.mapRequired("umbrella", Section.Umbrella); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, UUIDv4 &UUID) { + IO.mapRequired("target", UUID.TargetID); + IO.mapRequired("value", UUID.Value); + } +}; + +template <> +struct MappingContextTraits { + static void mapping(IO &IO, MetadataSection &Section, + MetadataSection::Option &OptionKind) { + IO.mapRequired("targets", Section.Targets); + switch (OptionKind) { + case MetadataSection::Option::Clients: + IO.mapRequired("clients", Section.Values); + return; + case MetadataSection::Option::Libraries: + IO.mapRequired("libraries", Section.Values); + return; + } + llvm_unreachable("unexpected option for metadata"); + } +}; + template <> struct ScalarBitSetTraits { static void bitset(IO &IO, TBDFlags &Flags) { IO.bitSetCase(Flags, "flat_namespace", TBDFlags::FlatNamespace); @@ -240,13 +373,67 @@ template <> struct ScalarBitSetTraits { } }; +template <> struct ScalarTraits { + static void output(const Target &Value, void *, raw_ostream &OS) { + OS << Value.Arch << "-"; + switch (Value.Platform) { + default: + OS << "unknown"; + break; + case PlatformKind::macOS: + OS << "macos"; + break; + case PlatformKind::iOS: + OS << "ios"; + break; + case PlatformKind::tvOS: + OS << "tvos"; + break; + case PlatformKind::watchOS: + OS << "watchos"; + break; + case PlatformKind::bridgeOS: + OS << "bridgeos"; + break; + case PlatformKind::macCatalyst: + OS << "maccatalyst"; + break; + case PlatformKind::iOSSimulator: + OS << "ios-simulator"; + break; + case PlatformKind::tvOSSimulator: + OS << "tvos-simulator"; + break; + case PlatformKind::watchOSSimulator: + OS << "watchos-simulator"; + break; + } + } + + static StringRef input(StringRef Scalar, void *, Target &Value) { + auto Result = Target::create(Scalar); + if (!Result) + return toString(Result.takeError()); + + Value = *Result; + if (Value.Arch == AK_unknown) + return "unknown architecture"; + if (Value.Platform == PlatformKind::unknown) + return "unknown platform"; + + return {}; + } + + static QuotingType mustQuote(StringRef) { return QuotingType::None; } +}; + template <> struct MappingTraits { struct NormalizedTBD { explicit NormalizedTBD(IO &IO) {} NormalizedTBD(IO &IO, const InterfaceFile *&File) { Architectures = File->getArchitectures(); UUIDs = File->uuids(); - Platform = File->getPlatform(); + Platforms = File->getPlatforms(); InstallName = File->getInstallName(); CurrentVersion = PackedVersion(File->getCurrentVersion()); CompatibilityVersion = PackedVersion(File->getCompatibilityVersion()); @@ -263,7 +450,10 @@ template <> struct MappingTraits { if (File->isInstallAPI()) Flags |= TBDFlags::InstallAPI; - ParentUmbrella = File->getParentUmbrella(); + for (const auto &Iter : File->umbrellas()) { + ParentUmbrella = Iter.second; + break; + } std::set ArchSet; for (const auto &Library : File->allowableClients()) @@ -396,6 +586,29 @@ template <> struct MappingTraits { } } + // TBD v1 - TBD v3 files only support one platform and several + // architectures. It is possible to have more than one platform for TBD v3 + // files, but the architectures don't apply to all + // platforms, specifically to filter out the i386 slice from + // platform macCatalyst. + TargetList synthesizeTargets(ArchitectureSet Architectures, + const PlatformSet &Platforms) { + TargetList Targets; + + for (auto Platform : Platforms) { + Platform = mapToPlatformKind(Platform, Architectures.hasX86()); + + for (const auto &&Architecture : Architectures) { + if ((Architecture == AK_i386) && + (Platform == PlatformKind::macCatalyst)) + continue; + + Targets.emplace_back(Architecture, Platform); + } + } + return Targets; + } + const InterfaceFile *denormalize(IO &IO) { auto Ctx = reinterpret_cast(IO.getContext()); assert(Ctx); @@ -403,16 +616,16 @@ template <> struct MappingTraits { auto *File = new InterfaceFile; File->setPath(Ctx->Path); File->setFileType(Ctx->FileKind); + File->addTargets(synthesizeTargets(Architectures, Platforms)); for (auto &ID : UUIDs) File->addUUID(ID.first, ID.second); - File->setPlatform(Platform); - File->setArchitectures(Architectures); File->setInstallName(InstallName); File->setCurrentVersion(CurrentVersion); File->setCompatibilityVersion(CompatibilityVersion); File->setSwiftABIVersion(SwiftABIVersion); File->setObjCConstraint(ObjCConstraint); - File->setParentUmbrella(ParentUmbrella); + for (const auto &Target : File->targets()) + File->addParentUmbrella(Target, ParentUmbrella); if (Ctx->FileKind == FileType::TBD_V1) { File->setTwoLevelNamespace(); @@ -425,76 +638,80 @@ template <> struct MappingTraits { } for (const auto &Section : Exports) { - for (const auto &Library : Section.AllowableClients) - File->addAllowableClient(Library, Section.Architectures); - for (const auto &Library : Section.ReexportedLibraries) - File->addReexportedLibrary(Library, Section.Architectures); + const auto Targets = + synthesizeTargets(Section.Architectures, Platforms); + + for (const auto &Lib : Section.AllowableClients) + for (const auto &Target : Targets) + File->addAllowableClient(Lib, Target); + + for (const auto &Lib : Section.ReexportedLibraries) + for (const auto &Target : Targets) + File->addReexportedLibrary(Lib, Target); for (const auto &Symbol : Section.Symbols) { if (Ctx->FileKind != FileType::TBD_V3 && Symbol.value.startswith("_OBJC_EHTYPE_$_")) File->addSymbol(SymbolKind::ObjectiveCClassEHType, - Symbol.value.drop_front(15), Section.Architectures); + Symbol.value.drop_front(15), Targets); else - File->addSymbol(SymbolKind::GlobalSymbol, Symbol, - Section.Architectures); + File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets); } for (auto &Symbol : Section.Classes) { auto Name = Symbol.value; if (Ctx->FileKind != FileType::TBD_V3) Name = Name.drop_front(); - File->addSymbol(SymbolKind::ObjectiveCClass, Name, - Section.Architectures); + File->addSymbol(SymbolKind::ObjectiveCClass, Name, Targets); } for (auto &Symbol : Section.ClassEHs) - File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol, - Section.Architectures); + File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol, Targets); for (auto &Symbol : Section.IVars) { auto Name = Symbol.value; if (Ctx->FileKind != FileType::TBD_V3) Name = Name.drop_front(); File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, Name, - Section.Architectures); + Targets); } for (auto &Symbol : Section.WeakDefSymbols) - File->addSymbol(SymbolKind::GlobalSymbol, Symbol, - Section.Architectures, SymbolFlags::WeakDefined); + File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets, + SymbolFlags::WeakDefined); for (auto &Symbol : Section.TLVSymbols) - File->addSymbol(SymbolKind::GlobalSymbol, Symbol, - Section.Architectures, SymbolFlags::ThreadLocalValue); + File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets, + SymbolFlags::ThreadLocalValue); } for (const auto &Section : Undefineds) { + const auto Targets = + synthesizeTargets(Section.Architectures, Platforms); for (auto &Symbol : Section.Symbols) { if (Ctx->FileKind != FileType::TBD_V3 && Symbol.value.startswith("_OBJC_EHTYPE_$_")) File->addSymbol(SymbolKind::ObjectiveCClassEHType, - Symbol.value.drop_front(15), Section.Architectures, + Symbol.value.drop_front(15), Targets, SymbolFlags::Undefined); else - File->addSymbol(SymbolKind::GlobalSymbol, Symbol, - Section.Architectures, SymbolFlags::Undefined); + File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets, + SymbolFlags::Undefined); } for (auto &Symbol : Section.Classes) { auto Name = Symbol.value; if (Ctx->FileKind != FileType::TBD_V3) Name = Name.drop_front(); - File->addSymbol(SymbolKind::ObjectiveCClass, Name, - Section.Architectures, SymbolFlags::Undefined); + File->addSymbol(SymbolKind::ObjectiveCClass, Name, Targets, + SymbolFlags::Undefined); } for (auto &Symbol : Section.ClassEHs) - File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol, - Section.Architectures, SymbolFlags::Undefined); + File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol, Targets, + SymbolFlags::Undefined); for (auto &Symbol : Section.IVars) { auto Name = Symbol.value; if (Ctx->FileKind != FileType::TBD_V3) Name = Name.drop_front(); - File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, Name, - Section.Architectures, SymbolFlags::Undefined); + File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, Name, Targets, + SymbolFlags::Undefined); } for (auto &Symbol : Section.WeakRefSymbols) - File->addSymbol(SymbolKind::GlobalSymbol, Symbol, - Section.Architectures, + File->addSymbol(SymbolKind::GlobalSymbol, Symbol, Targets, SymbolFlags::Undefined | SymbolFlags::WeakReferenced); } @@ -513,7 +730,7 @@ template <> struct MappingTraits { std::vector Architectures; std::vector UUIDs; - PlatformKind Platform{PlatformKind::unknown}; + PlatformSet Platforms; StringRef InstallName; PackedVersion CurrentVersion; PackedVersion CompatibilityVersion; @@ -525,71 +742,336 @@ template <> struct MappingTraits { std::vector Undefineds; }; + static void setFileTypeForInput(TextAPIContext *Ctx, IO &IO) { + if (IO.mapTag("!tapi-tbd", false)) + Ctx->FileKind = FileType::TBD_V4; + else if (IO.mapTag("!tapi-tbd-v3", false)) + Ctx->FileKind = FileType::TBD_V3; + else if (IO.mapTag("!tapi-tbd-v2", false)) + Ctx->FileKind = FileType::TBD_V2; + else if (IO.mapTag("!tapi-tbd-v1", false) || + IO.mapTag("tag:yaml.org,2002:map", false)) + Ctx->FileKind = FileType::TBD_V1; + else { + Ctx->FileKind = FileType::Invalid; + return; + } + } + static void mapping(IO &IO, const InterfaceFile *&File) { auto *Ctx = reinterpret_cast(IO.getContext()); assert((!Ctx || !IO.outputting() || (Ctx && Ctx->FileKind != FileType::Invalid)) && "File type is not set in YAML context"); - MappingNormalization Keys(IO, File); - // prope file type when reading. if (!IO.outputting()) { - if (IO.mapTag("!tapi-tbd-v2", false)) - Ctx->FileKind = FileType::TBD_V2; - else if (IO.mapTag("!tapi-tbd-v3", false)) - Ctx->FileKind = FileType::TBD_V2; - else if (IO.mapTag("!tapi-tbd-v1", false) || - IO.mapTag("tag:yaml.org,2002:map", false)) - Ctx->FileKind = FileType::TBD_V1; - else { + setFileTypeForInput(Ctx, IO); + switch (Ctx->FileKind) { + default: + break; + case FileType::TBD_V4: + mapKeysToValuesV4(IO, File); + return; + case FileType::Invalid: IO.setError("unsupported file type"); return; } - } - - // Set file tyoe when writing. - if (IO.outputting()) { + } else { + // Set file type when writing. switch (Ctx->FileKind) { default: llvm_unreachable("unexpected file type"); - case FileType::TBD_V1: - // Don't write the tag into the .tbd file for TBD v1. + case FileType::TBD_V4: + mapKeysToValuesV4(IO, File); + return; + case FileType::TBD_V3: + IO.mapTag("!tapi-tbd-v3", true); break; case FileType::TBD_V2: IO.mapTag("!tapi-tbd-v2", true); break; - case FileType::TBD_V3: - IO.mapTag("!tapi-tbd-v3", true); + case FileType::TBD_V1: + // Don't write the tag into the .tbd file for TBD v1 break; } } + mapKeysToValues(Ctx->FileKind, IO, File); + } + using SectionList = std::vector; + struct NormalizedTBD_V4 { + explicit NormalizedTBD_V4(IO &IO) {} + NormalizedTBD_V4(IO &IO, const InterfaceFile *&File) { + auto Ctx = reinterpret_cast(IO.getContext()); + assert(Ctx); + TBDVersion = Ctx->FileKind >> 1; + Targets.insert(Targets.begin(), File->targets().begin(), + File->targets().end()); + for (const auto &IT : File->uuids()) + UUIDs.emplace_back(IT.first, IT.second); + InstallName = File->getInstallName(); + CurrentVersion = File->getCurrentVersion(); + CompatibilityVersion = File->getCompatibilityVersion(); + SwiftABIVersion = File->getSwiftABIVersion(); + + Flags = TBDFlags::None; + if (!File->isApplicationExtensionSafe()) + Flags |= TBDFlags::NotApplicationExtensionSafe; + + if (!File->isTwoLevelNamespace()) + Flags |= TBDFlags::FlatNamespace; + + if (File->isInstallAPI()) + Flags |= TBDFlags::InstallAPI; + + { + std::map valueToTargetList; + for (const auto &it : File->umbrellas()) + valueToTargetList[it.second].emplace_back(it.first); + + for (const auto &it : valueToTargetList) { + UmbrellaSection CurrentSection; + CurrentSection.Targets.insert(CurrentSection.Targets.begin(), + it.second.begin(), it.second.end()); + CurrentSection.Umbrella = it.first; + ParentUmbrellas.emplace_back(std::move(CurrentSection)); + } + } + + assignTargetsToLibrary(File->allowableClients(), AllowableClients); + assignTargetsToLibrary(File->reexportedLibraries(), ReexportedLibraries); + + auto handleSymbols = + [](SectionList &CurrentSections, + InterfaceFile::const_filtered_symbol_range Symbols, + std::function Pred) { + std::set TargetSet; + std::map SymbolToTargetList; + for (const auto *Symbol : Symbols) { + if (!Pred(Symbol)) + continue; + TargetList Targets(Symbol->targets()); + SymbolToTargetList[Symbol] = Targets; + TargetSet.emplace(std::move(Targets)); + } + for (const auto &TargetIDs : TargetSet) { + SymbolSection CurrentSection; + CurrentSection.Targets.insert(CurrentSection.Targets.begin(), + TargetIDs.begin(), TargetIDs.end()); + + for (const auto &IT : SymbolToTargetList) { + if (IT.second != TargetIDs) + continue; + + const auto *Symbol = IT.first; + switch (Symbol->getKind()) { + case SymbolKind::GlobalSymbol: + if (Symbol->isWeakDefined()) + CurrentSection.WeakSymbols.emplace_back(Symbol->getName()); + else if (Symbol->isThreadLocalValue()) + CurrentSection.TlvSymbols.emplace_back(Symbol->getName()); + else + CurrentSection.Symbols.emplace_back(Symbol->getName()); + break; + case SymbolKind::ObjectiveCClass: + CurrentSection.Classes.emplace_back(Symbol->getName()); + break; + case SymbolKind::ObjectiveCClassEHType: + CurrentSection.ClassEHs.emplace_back(Symbol->getName()); + break; + case SymbolKind::ObjectiveCInstanceVariable: + CurrentSection.Ivars.emplace_back(Symbol->getName()); + break; + } + } + sort(CurrentSection.Symbols); + sort(CurrentSection.Classes); + sort(CurrentSection.ClassEHs); + sort(CurrentSection.Ivars); + sort(CurrentSection.WeakSymbols); + sort(CurrentSection.TlvSymbols); + CurrentSections.emplace_back(std::move(CurrentSection)); + } + }; + + handleSymbols(Exports, File->exports(), [](const Symbol *Symbol) { + return !Symbol->isReexported(); + }); + handleSymbols(Reexports, File->exports(), [](const Symbol *Symbol) { + return Symbol->isReexported(); + }); + handleSymbols(Undefineds, File->undefineds(), + [](const Symbol *Symbol) { return true; }); + } + + const InterfaceFile *denormalize(IO &IO) { + auto Ctx = reinterpret_cast(IO.getContext()); + assert(Ctx); + + auto *File = new InterfaceFile; + File->setPath(Ctx->Path); + File->setFileType(Ctx->FileKind); + for (auto &id : UUIDs) + File->addUUID(id.TargetID, id.Value); + File->addTargets(Targets); + File->setInstallName(InstallName); + File->setCurrentVersion(CurrentVersion); + File->setCompatibilityVersion(CompatibilityVersion); + File->setSwiftABIVersion(SwiftABIVersion); + for (const auto &CurrentSection : ParentUmbrellas) + for (const auto &target : CurrentSection.Targets) + File->addParentUmbrella(target, CurrentSection.Umbrella); + File->setTwoLevelNamespace(!(Flags & TBDFlags::FlatNamespace)); + File->setApplicationExtensionSafe( + !(Flags & TBDFlags::NotApplicationExtensionSafe)); + File->setInstallAPI(Flags & TBDFlags::InstallAPI); + + for (const auto &CurrentSection : AllowableClients) { + for (const auto &lib : CurrentSection.Values) + for (const auto &Target : CurrentSection.Targets) + File->addAllowableClient(lib, Target); + } + + for (const auto &CurrentSection : ReexportedLibraries) { + for (const auto &Lib : CurrentSection.Values) + for (const auto &Target : CurrentSection.Targets) + File->addReexportedLibrary(Lib, Target); + } + + auto handleSymbols = [File](const SectionList &CurrentSections, + SymbolFlags Flag = SymbolFlags::None) { + for (const auto &CurrentSection : CurrentSections) { + for (auto &sym : CurrentSection.Symbols) + File->addSymbol(SymbolKind::GlobalSymbol, sym, + CurrentSection.Targets, Flag); + + for (auto &sym : CurrentSection.Classes) + File->addSymbol(SymbolKind::ObjectiveCClass, sym, + CurrentSection.Targets); + + for (auto &sym : CurrentSection.ClassEHs) + File->addSymbol(SymbolKind::ObjectiveCClassEHType, sym, + CurrentSection.Targets); + + for (auto &sym : CurrentSection.Ivars) + File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, sym, + CurrentSection.Targets); + + for (auto &sym : CurrentSection.WeakSymbols) + File->addSymbol(SymbolKind::GlobalSymbol, sym, + CurrentSection.Targets); + for (auto &sym : CurrentSection.TlvSymbols) + File->addSymbol(SymbolKind::GlobalSymbol, sym, + CurrentSection.Targets, + SymbolFlags::ThreadLocalValue); + } + }; + + handleSymbols(Exports); + handleSymbols(Reexports, SymbolFlags::Rexported); + handleSymbols(Undefineds, SymbolFlags::Undefined); + + return File; + } + + unsigned TBDVersion; + std::vector UUIDs; + TargetList Targets; + StringRef InstallName; + PackedVersion CurrentVersion; + PackedVersion CompatibilityVersion; + SwiftVersion SwiftABIVersion{0}; + std::vector AllowableClients; + std::vector ReexportedLibraries; + TBDFlags Flags{TBDFlags::None}; + std::vector ParentUmbrellas; + SectionList Exports; + SectionList Reexports; + SectionList Undefineds; + + private: + void assignTargetsToLibrary(const std::vector &Libraries, + std::vector &Section) { + std::set targetSet; + std::map valueToTargetList; + for (const auto &library : Libraries) { + TargetList targets(library.targets()); + valueToTargetList[&library] = targets; + targetSet.emplace(std::move(targets)); + } + + for (const auto &targets : targetSet) { + MetadataSection CurrentSection; + CurrentSection.Targets.insert(CurrentSection.Targets.begin(), + targets.begin(), targets.end()); + + for (const auto &it : valueToTargetList) { + if (it.second != targets) + continue; + + CurrentSection.Values.emplace_back(it.first->getInstallName()); + } + llvm::sort(CurrentSection.Values); + Section.emplace_back(std::move(CurrentSection)); + } + } + }; + + static void mapKeysToValues(FileType FileKind, IO &IO, + const InterfaceFile *&File) { + MappingNormalization Keys(IO, File); IO.mapRequired("archs", Keys->Architectures); - if (Ctx->FileKind != FileType::TBD_V1) + if (FileKind != FileType::TBD_V1) IO.mapOptional("uuids", Keys->UUIDs); - IO.mapRequired("platform", Keys->Platform); - if (Ctx->FileKind != FileType::TBD_V1) + IO.mapRequired("platform", Keys->Platforms); + if (FileKind != FileType::TBD_V1) IO.mapOptional("flags", Keys->Flags, TBDFlags::None); IO.mapRequired("install-name", Keys->InstallName); IO.mapOptional("current-version", Keys->CurrentVersion, PackedVersion(1, 0, 0)); IO.mapOptional("compatibility-version", Keys->CompatibilityVersion, PackedVersion(1, 0, 0)); - if (Ctx->FileKind != FileType::TBD_V3) + if (FileKind != FileType::TBD_V3) IO.mapOptional("swift-version", Keys->SwiftABIVersion, SwiftVersion(0)); else IO.mapOptional("swift-abi-version", Keys->SwiftABIVersion, SwiftVersion(0)); IO.mapOptional("objc-constraint", Keys->ObjCConstraint, - (Ctx->FileKind == FileType::TBD_V1) + (FileKind == FileType::TBD_V1) ? ObjCConstraintType::None : ObjCConstraintType::Retain_Release); - if (Ctx->FileKind != FileType::TBD_V1) + if (FileKind != FileType::TBD_V1) IO.mapOptional("parent-umbrella", Keys->ParentUmbrella, StringRef()); IO.mapOptional("exports", Keys->Exports); - if (Ctx->FileKind != FileType::TBD_V1) + if (FileKind != FileType::TBD_V1) IO.mapOptional("undefineds", Keys->Undefineds); } + + static void mapKeysToValuesV4(IO &IO, const InterfaceFile *&File) { + MappingNormalization Keys(IO, + File); + IO.mapTag("!tapi-tbd", true); + IO.mapRequired("tbd-version", Keys->TBDVersion); + IO.mapRequired("targets", Keys->Targets); + IO.mapOptional("uuids", Keys->UUIDs); + IO.mapOptional("flags", Keys->Flags, TBDFlags::None); + IO.mapRequired("install-name", Keys->InstallName); + IO.mapOptional("current-version", Keys->CurrentVersion, + PackedVersion(1, 0, 0)); + IO.mapOptional("compatibility-version", Keys->CompatibilityVersion, + PackedVersion(1, 0, 0)); + IO.mapOptional("swift-abi-version", Keys->SwiftABIVersion, SwiftVersion(0)); + IO.mapOptional("parent-umbrella", Keys->ParentUmbrellas); + auto OptionKind = MetadataSection::Option::Clients; + IO.mapOptionalWithContext("allowable-clients", Keys->AllowableClients, + OptionKind); + OptionKind = MetadataSection::Option::Libraries; + IO.mapOptionalWithContext("reexported-libraries", Keys->ReexportedLibraries, + OptionKind); + IO.mapOptional("exports", Keys->Exports); + IO.mapOptional("reexports", Keys->Reexports); + IO.mapOptional("undefineds", Keys->Undefineds); + } }; template <> @@ -623,15 +1105,17 @@ static void DiagHandler(const SMDiagnostic &Diag, void *Context) { } Expected> -TextAPIReader::get(std::unique_ptr InputBuffer) { +TextAPIReader::get(MemoryBufferRef InputBuffer) { TextAPIContext Ctx; - Ctx.Path = InputBuffer->getBufferIdentifier(); - yaml::Input YAMLIn(InputBuffer->getBuffer(), &Ctx, DiagHandler, &Ctx); + Ctx.Path = InputBuffer.getBufferIdentifier(); + yaml::Input YAMLIn(InputBuffer.getBuffer(), &Ctx, DiagHandler, &Ctx); // Fill vector with interface file objects created by parsing the YAML file. std::vector Files; YAMLIn >> Files; + // YAMLIn dynamically allocates for Interface file and in case of error, + // memory leak will occur unless wrapped around unique_ptr auto File = std::unique_ptr( const_cast(Files.front())); diff --git a/lib/TextAPI/MachO/TextStubCommon.cpp b/lib/TextAPI/MachO/TextStubCommon.cpp index 00382cd2457..183c5d5a93b 100644 --- a/lib/TextAPI/MachO/TextStubCommon.cpp +++ b/lib/TextAPI/MachO/TextStubCommon.cpp @@ -41,9 +41,21 @@ void ScalarEnumerationTraits::enumeration( IO.enumCase(Constraint, "gc", ObjCConstraintType::GC); } -void ScalarTraits::output(const PlatformKind &Value, void *, - raw_ostream &OS) { - switch (Value) { +void ScalarTraits::output(const PlatformSet &Values, void *IO, + raw_ostream &OS) { + + const auto *Ctx = reinterpret_cast(IO); + assert((!Ctx || Ctx->FileKind != FileType::Invalid) && + "File type is not set in context"); + + if (Ctx && Ctx->FileKind == TBD_V3 && Values.count(PlatformKind::macOS) && + Values.count(PlatformKind::macCatalyst)) { + OS << "zippered"; + return; + } + + assert(Values.size() == 1U); + switch (*Values.begin()) { default: llvm_unreachable("unexpected platform"); break; @@ -64,21 +76,44 @@ void ScalarTraits::output(const PlatformKind &Value, void *, break; } } -StringRef ScalarTraits::input(StringRef Scalar, void *, - PlatformKind &Value) { - Value = StringSwitch(Scalar) - .Case("macosx", PlatformKind::macOS) - .Case("ios", PlatformKind::iOS) - .Case("watchos", PlatformKind::watchOS) - .Case("tvos", PlatformKind::tvOS) - .Case("bridgeos", PlatformKind::bridgeOS) - .Default(PlatformKind::unknown); - if (Value == PlatformKind::unknown) +StringRef ScalarTraits::input(StringRef Scalar, void *IO, + PlatformSet &Values) { + const auto *Ctx = reinterpret_cast(IO); + assert((!Ctx || Ctx->FileKind != FileType::Invalid) && + "File type is not set in context"); + + if (Scalar == "zippered") { + if (Ctx && Ctx->FileKind == FileType::TBD_V3) { + Values.insert(PlatformKind::macOS); + Values.insert(PlatformKind::macCatalyst); + return {}; + } + return "invalid platform"; + } + + auto Platform = StringSwitch(Scalar) + .Case("unknown", PlatformKind::unknown) + .Case("macosx", PlatformKind::macOS) + .Case("ios", PlatformKind::iOS) + .Case("watchos", PlatformKind::watchOS) + .Case("tvos", PlatformKind::tvOS) + .Case("bridgeos", PlatformKind::bridgeOS) + .Case("iosmac", PlatformKind::macCatalyst) + .Default(PlatformKind::unknown); + + if (Platform == PlatformKind::macCatalyst) + if (Ctx && Ctx->FileKind != FileType::TBD_V3) + return "invalid platform"; + + if (Platform == PlatformKind::unknown) return "unknown platform"; + + Values.insert(Platform); return {}; } -QuotingType ScalarTraits::mustQuote(StringRef) { + +QuotingType ScalarTraits::mustQuote(StringRef) { return QuotingType::None; } @@ -137,14 +172,25 @@ void ScalarTraits::output(const SwiftVersion &Value, void *, break; } } -StringRef ScalarTraits::input(StringRef Scalar, void *, +StringRef ScalarTraits::input(StringRef Scalar, void *IO, SwiftVersion &Value) { - Value = StringSwitch(Scalar) - .Case("1.0", 1) - .Case("1.1", 2) - .Case("2.0", 3) - .Case("3.0", 4) - .Default(0); + const auto *Ctx = reinterpret_cast(IO); + assert((!Ctx || Ctx->FileKind != FileType::Invalid) && + "File type is not set in context"); + + if (Ctx->FileKind == FileType::TBD_V4) { + if (Scalar.getAsInteger(10, Value)) + return "invalid Swift ABI version."; + return {}; + } else { + Value = StringSwitch(Scalar) + .Case("1.0", 1) + .Case("1.1", 2) + .Case("2.0", 3) + .Case("3.0", 4) + .Default(0); + } + if (Value != SwiftVersion(0)) return {}; @@ -166,10 +212,11 @@ StringRef ScalarTraits::input(StringRef Scalar, void *, UUID &Value) { auto UUID = Split.second.trim(); if (UUID.empty()) return "invalid uuid string pair"; - Value.first = getArchitectureFromName(Arch); Value.second = UUID; + Value.first = Target{getArchitectureFromName(Arch), PlatformKind::unknown}; return {}; } + QuotingType ScalarTraits::mustQuote(StringRef) { return QuotingType::Single; } diff --git a/lib/TextAPI/MachO/TextStubCommon.h b/lib/TextAPI/MachO/TextStubCommon.h index c4dd1075b1c..a558cbcec9f 100644 --- a/lib/TextAPI/MachO/TextStubCommon.h +++ b/lib/TextAPI/MachO/TextStubCommon.h @@ -21,7 +21,7 @@ #include "llvm/TextAPI/MachO/InterfaceFile.h" #include "llvm/TextAPI/MachO/PackedVersion.h" -using UUID = std::pair; +using UUID = std::pair; LLVM_YAML_STRONG_TYPEDEF(llvm::StringRef, FlowStringRef) LLVM_YAML_STRONG_TYPEDEF(uint8_t, SwiftVersion) @@ -41,9 +41,9 @@ template <> struct ScalarEnumerationTraits { static void enumeration(IO &, MachO::ObjCConstraintType &); }; -template <> struct ScalarTraits { - static void output(const MachO::PlatformKind &, void *, raw_ostream &); - static StringRef input(StringRef, void *, MachO::PlatformKind &); +template <> struct ScalarTraits { + static void output(const MachO::PlatformSet &, void *, raw_ostream &); + static StringRef input(StringRef, void *, MachO::PlatformSet &); static QuotingType mustQuote(StringRef); }; diff --git a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp index 0b406cc531a..19f253be795 100644 --- a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp +++ b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp @@ -74,13 +74,6 @@ static MachineTypes getEmulation(StringRef S) { .Default(IMAGE_FILE_MACHINE_UNKNOWN); } -static std::string getImplibPath(StringRef Path) { - SmallString<128> Out = StringRef("lib"); - Out.append(Path); - sys::path::replace_extension(Out, ".a"); - return Out.str(); -} - int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { DllOptTable Table; unsigned MissingIndex; @@ -149,13 +142,23 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { Def->OutputFile = Arg->getValue(); if (Def->OutputFile.empty()) { - llvm::errs() << "no output file specified\n"; + llvm::errs() << "no DLL name specified\n"; return 1; } std::string Path = Args.getLastArgValue(OPT_l); - if (Path.empty()) - Path = getImplibPath(Def->OutputFile); + + // If ExtName is set (if the "ExtName = Name" syntax was used), overwrite + // Name with ExtName and clear ExtName. When only creating an import + // library and not linking, the internal name is irrelevant. This avoids + // cases where writeImportLibrary tries to transplant decoration from + // symbol decoration onto ExtName. + for (COFFShortExport& E : Def->Exports) { + if (!E.ExtName.empty()) { + E.Name = E.ExtName; + E.ExtName.clear(); + } + } if (Machine == IMAGE_FILE_MACHINE_I386 && Args.getLastArg(OPT_k)) { for (COFFShortExport& E : Def->Exports) { @@ -174,7 +177,8 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { } } - if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine, true)) + if (!Path.empty() && + writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine, true)) return 1; return 0; } diff --git a/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/lib/ToolDrivers/llvm-lib/LibDriver.cpp index 18ab6637305..286191abff2 100644 --- a/lib/ToolDrivers/llvm-lib/LibDriver.cpp +++ b/lib/ToolDrivers/llvm-lib/LibDriver.cpp @@ -13,6 +13,7 @@ #include "llvm/ToolDrivers/llvm-lib/LibDriver.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSet.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/Bitcode/BitcodeReader.h" @@ -141,6 +142,125 @@ static void doList(opt::InputArgList& Args) { fatalOpenError(std::move(Err), B->getBufferIdentifier()); } +static COFF::MachineTypes getCOFFFileMachine(MemoryBufferRef MB) { + std::error_code EC; + object::COFFObjectFile Obj(MB, EC); + if (EC) { + llvm::errs() << MB.getBufferIdentifier() + << ": failed to open: " << EC.message() << '\n'; + exit(1); + } + + uint16_t Machine = Obj.getMachine(); + if (Machine != COFF::IMAGE_FILE_MACHINE_I386 && + Machine != COFF::IMAGE_FILE_MACHINE_AMD64 && + Machine != COFF::IMAGE_FILE_MACHINE_ARMNT && + Machine != COFF::IMAGE_FILE_MACHINE_ARM64) { + llvm::errs() << MB.getBufferIdentifier() << ": unknown machine: " << Machine + << '\n'; + exit(1); + } + + return static_cast(Machine); +} + +static COFF::MachineTypes getBitcodeFileMachine(MemoryBufferRef MB) { + Expected TripleStr = getBitcodeTargetTriple(MB); + if (!TripleStr) { + llvm::errs() << MB.getBufferIdentifier() + << ": failed to get target triple from bitcode\n"; + exit(1); + } + + switch (Triple(*TripleStr).getArch()) { + case Triple::x86: + return COFF::IMAGE_FILE_MACHINE_I386; + case Triple::x86_64: + return COFF::IMAGE_FILE_MACHINE_AMD64; + case Triple::arm: + return COFF::IMAGE_FILE_MACHINE_ARMNT; + case Triple::aarch64: + return COFF::IMAGE_FILE_MACHINE_ARM64; + default: + llvm::errs() << MB.getBufferIdentifier() + << ": unknown arch in target triple " << *TripleStr << '\n'; + exit(1); + } +} + +static void appendFile(std::vector &Members, + COFF::MachineTypes &LibMachine, + std::string &LibMachineSource, MemoryBufferRef MB) { + file_magic Magic = identify_magic(MB.getBuffer()); + + if (Magic != file_magic::coff_object && Magic != file_magic::bitcode && + Magic != file_magic::archive && Magic != file_magic::windows_resource) { + llvm::errs() << MB.getBufferIdentifier() + << ": not a COFF object, bitcode, archive or resource file\n"; + exit(1); + } + + // If a user attempts to add an archive to another archive, llvm-lib doesn't + // handle the first archive file as a single file. Instead, it extracts all + // members from the archive and add them to the second archive. This beahvior + // is for compatibility with Microsoft's lib command. + if (Magic == file_magic::archive) { + Error Err = Error::success(); + object::Archive Archive(MB, Err); + fatalOpenError(std::move(Err), MB.getBufferIdentifier()); + + for (auto &C : Archive.children(Err)) { + Expected ChildMB = C.getMemoryBufferRef(); + if (!ChildMB) { + handleAllErrors(ChildMB.takeError(), [&](const ErrorInfoBase &EIB) { + llvm::errs() << MB.getBufferIdentifier() << ": " << EIB.message() + << "\n"; + }); + exit(1); + } + + appendFile(Members, LibMachine, LibMachineSource, *ChildMB); + } + + fatalOpenError(std::move(Err), MB.getBufferIdentifier()); + return; + } + + // Check that all input files have the same machine type. + // Mixing normal objects and LTO bitcode files is fine as long as they + // have the same machine type. + // Doing this here duplicates the header parsing work that writeArchive() + // below does, but it's not a lot of work and it's a bit awkward to do + // in writeArchive() which needs to support many tools, can't assume the + // input is COFF, and doesn't have a good way to report errors. + if (Magic == file_magic::coff_object || Magic == file_magic::bitcode) { + COFF::MachineTypes FileMachine = (Magic == file_magic::coff_object) + ? getCOFFFileMachine(MB) + : getBitcodeFileMachine(MB); + + // FIXME: Once lld-link rejects multiple resource .obj files: + // Call convertResToCOFF() on .res files and add the resulting + // COFF file to the .lib output instead of adding the .res file, and remove + // this check. See PR42180. + if (FileMachine != COFF::IMAGE_FILE_MACHINE_UNKNOWN) { + if (LibMachine == COFF::IMAGE_FILE_MACHINE_UNKNOWN) { + LibMachine = FileMachine; + LibMachineSource = + (" (inferred from earlier file '" + MB.getBufferIdentifier() + "')") + .str(); + } else if (LibMachine != FileMachine) { + llvm::errs() << MB.getBufferIdentifier() << ": file machine type " + << machineToStr(FileMachine) + << " conflicts with library machine type " + << machineToStr(LibMachine) << LibMachineSource << '\n'; + exit(1); + } + } + } + + Members.emplace_back(MB); +} + int llvm::libDriverMain(ArrayRef ArgsArr) { BumpPtrAllocator Alloc; StringSaver Saver(Alloc); @@ -195,104 +315,40 @@ int llvm::libDriverMain(ArrayRef ArgsArr) { std::string(" (from '/machine:") + Arg->getValue() + "' flag)"; } - // Create a NewArchiveMember for each input file. + std::vector> MBs; + StringSet<> Seen; std::vector Members; + + // Create a NewArchiveMember for each input file. for (auto *Arg : Args.filtered(OPT_INPUT)) { + // Find a file std::string Path = findInputFile(Arg->getValue(), SearchPaths); if (Path.empty()) { llvm::errs() << Arg->getValue() << ": no such file or directory\n"; return 1; } - Expected MOrErr = - NewArchiveMember::getFile(Saver.save(Path), /*Deterministic=*/true); - if (!MOrErr) { - handleAllErrors(MOrErr.takeError(), [&](const ErrorInfoBase &EIB) { - llvm::errs() << Arg->getValue() << ": " << EIB.message() << "\n"; - }); - return 1; - } + // Input files are uniquified by pathname. If you specify the exact same + // path more than once, all but the first one are ignored. + // + // Note that there's a loophole in the rule; you can prepend `.\` or + // something like that to a path to make it look different, and they are + // handled as if they were different files. This behavior is compatible with + // Microsoft lib.exe. + if (!Seen.insert(Path).second) + continue; - file_magic Magic = identify_magic(MOrErr->Buf->getBuffer()); - if (Magic != file_magic::coff_object && Magic != file_magic::bitcode && - Magic != file_magic::windows_resource) { - llvm::errs() << Arg->getValue() - << ": not a COFF object, bitcode or resource file\n"; - return 1; - } + // Open a file. + ErrorOr> MOrErr = + MemoryBuffer::getFile(Path, -1, false); + fatalOpenError(errorCodeToError(MOrErr.getError()), Path); + MemoryBufferRef MBRef = (*MOrErr)->getMemBufferRef(); - // Check that all input files have the same machine type. - // Mixing normal objects and LTO bitcode files is fine as long as they - // have the same machine type. - // Doing this here duplicates the header parsing work that writeArchive() - // below does, but it's not a lot of work and it's a bit awkward to do - // in writeArchive() which needs to support many tools, can't assume the - // input is COFF, and doesn't have a good way to report errors. - COFF::MachineTypes FileMachine = COFF::IMAGE_FILE_MACHINE_UNKNOWN; - if (Magic == file_magic::coff_object) { - std::error_code EC; - object::COFFObjectFile Obj(*MOrErr->Buf, EC); - if (EC) { - llvm::errs() << Arg->getValue() << ": failed to open: " << EC.message() - << '\n'; - return 1; - } - uint16_t Machine = Obj.getMachine(); - if (Machine != COFF::IMAGE_FILE_MACHINE_I386 && - Machine != COFF::IMAGE_FILE_MACHINE_AMD64 && - Machine != COFF::IMAGE_FILE_MACHINE_ARMNT && - Machine != COFF::IMAGE_FILE_MACHINE_ARM64) { - llvm::errs() << Arg->getValue() << ": unknown machine: " << Machine - << '\n'; - return 1; - } - FileMachine = static_cast(Machine); - } else if (Magic == file_magic::bitcode) { - Expected TripleStr = getBitcodeTargetTriple(*MOrErr->Buf); - if (!TripleStr) { - llvm::errs() << Arg->getValue() - << ": failed to get target triple from bitcode\n"; - return 1; - } - switch (Triple(*TripleStr).getArch()) { - case Triple::x86: - FileMachine = COFF::IMAGE_FILE_MACHINE_I386; - break; - case Triple::x86_64: - FileMachine = COFF::IMAGE_FILE_MACHINE_AMD64; - break; - case Triple::arm: - FileMachine = COFF::IMAGE_FILE_MACHINE_ARMNT; - break; - case Triple::aarch64: - FileMachine = COFF::IMAGE_FILE_MACHINE_ARM64; - break; - default: - llvm::errs() << Arg->getValue() << ": unknown arch in target triple " - << *TripleStr << '\n'; - return 1; - } - } + // Append a file. + appendFile(Members, LibMachine, LibMachineSource, MBRef); - // FIXME: Once lld-link rejects multiple resource .obj files: - // Call convertResToCOFF() on .res files and add the resulting - // COFF file to the .lib output instead of adding the .res file, and remove - // this check. See PR42180. - if (FileMachine != COFF::IMAGE_FILE_MACHINE_UNKNOWN) { - if (LibMachine == COFF::IMAGE_FILE_MACHINE_UNKNOWN) { - LibMachine = FileMachine; - LibMachineSource = std::string(" (inferred from earlier file '") + - Arg->getValue() + "')"; - } else if (LibMachine != FileMachine) { - llvm::errs() << Arg->getValue() << ": file machine type " - << machineToStr(FileMachine) - << " conflicts with library machine type " - << machineToStr(LibMachine) << LibMachineSource << '\n'; - return 1; - } - } - - Members.emplace_back(std::move(*MOrErr)); + // Take the ownership of the file buffer to keep the file open. + MBs.push_back(std::move(*MOrErr)); } // Create an archive file. diff --git a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 06222d7e7e4..a24de3ca213 100644 --- a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -121,14 +121,13 @@ static bool foldGuardedRotateToFunnelShift(Instruction &I) { BasicBlock *GuardBB = Phi.getIncomingBlock(RotSrc == P1); BasicBlock *RotBB = Phi.getIncomingBlock(RotSrc != P1); Instruction *TermI = GuardBB->getTerminator(); - BasicBlock *TrueBB, *FalseBB; ICmpInst::Predicate Pred; - if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(RotAmt), m_ZeroInt()), TrueBB, - FalseBB))) + BasicBlock *PhiBB = Phi.getParent(); + if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(RotAmt), m_ZeroInt()), + m_SpecificBB(PhiBB), m_SpecificBB(RotBB)))) return false; - BasicBlock *PhiBB = Phi.getParent(); - if (Pred != CmpInst::ICMP_EQ || TrueBB != PhiBB || FalseBB != RotBB) + if (Pred != CmpInst::ICMP_EQ) return false; // We matched a variation of this IR pattern: @@ -251,6 +250,72 @@ static bool foldAnyOrAllBitsSet(Instruction &I) { return true; } +// Try to recognize below function as popcount intrinsic. +// This is the "best" algorithm from +// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel +// Also used in TargetLowering::expandCTPOP(). +// +// int popcount(unsigned int i) { +// i = i - ((i >> 1) & 0x55555555); +// i = (i & 0x33333333) + ((i >> 2) & 0x33333333); +// i = ((i + (i >> 4)) & 0x0F0F0F0F); +// return (i * 0x01010101) >> 24; +// } +static bool tryToRecognizePopCount(Instruction &I) { + if (I.getOpcode() != Instruction::LShr) + return false; + + Type *Ty = I.getType(); + if (!Ty->isIntOrIntVectorTy()) + return false; + + unsigned Len = Ty->getScalarSizeInBits(); + // FIXME: fix Len == 8 and other irregular type lengths. + if (!(Len <= 128 && Len > 8 && Len % 8 == 0)) + return false; + + APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55)); + APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33)); + APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F)); + APInt Mask01 = APInt::getSplat(Len, APInt(8, 0x01)); + APInt MaskShift = APInt(Len, Len - 8); + + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + Value *MulOp0; + // Matching "(i * 0x01010101...) >> 24". + if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(Mask01)))) && + match(Op1, m_SpecificInt(MaskShift))) { + Value *ShiftOp0; + // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)". + if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)), + m_Deferred(ShiftOp0)), + m_SpecificInt(Mask0F)))) { + Value *AndOp0; + // Matching "(i & 0x33333333...) + ((i >> 2) & 0x33333333...)". + if (match(ShiftOp0, + m_c_Add(m_And(m_Value(AndOp0), m_SpecificInt(Mask33)), + m_And(m_LShr(m_Deferred(AndOp0), m_SpecificInt(2)), + m_SpecificInt(Mask33))))) { + Value *Root, *SubOp1; + // Matching "i - ((i >> 1) & 0x55555555...)". + if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) && + match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)), + m_SpecificInt(Mask55)))) { + LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n"); + IRBuilder<> Builder(&I); + Function *Func = Intrinsic::getDeclaration( + I.getModule(), Intrinsic::ctpop, I.getType()); + I.replaceAllUsesWith(Builder.CreateCall(Func, {Root})); + return true; + } + } + } + } + + return false; +} + /// This is the entry point for folds that could be implemented in regular /// InstCombine, but they are separated because they are not expected to /// occur frequently and/or have more than a constant-length pattern match. @@ -269,6 +334,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) { for (Instruction &I : make_range(BB.rbegin(), BB.rend())) { MadeChange |= foldAnyOrAllBitsSet(I); MadeChange |= foldGuardedRotateToFunnelShift(I); + MadeChange |= tryToRecognizePopCount(I); } } @@ -303,7 +369,7 @@ void AggressiveInstCombinerLegacyPass::getAnalysisUsage( } bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) { - auto &TLI = getAnalysis().getTLI(); + auto &TLI = getAnalysis().getTLI(F); auto &DT = getAnalysis().getDomTree(); return runImpl(F, TLI, DT); } diff --git a/lib/Transforms/Coroutines/CoroCleanup.cpp b/lib/Transforms/Coroutines/CoroCleanup.cpp index 1fb0a114d0c..c3e05577f04 100644 --- a/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -73,6 +73,8 @@ bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) { II->replaceAllUsesWith(ConstantInt::getTrue(Context)); break; case Intrinsic::coro_id: + case Intrinsic::coro_id_retcon: + case Intrinsic::coro_id_retcon_once: II->replaceAllUsesWith(ConstantTokenNone::get(Context)); break; case Intrinsic::coro_subfn_addr: @@ -111,8 +113,9 @@ struct CoroCleanup : FunctionPass { bool doInitialization(Module &M) override { if (coro::declaresIntrinsics(M, {"llvm.coro.alloc", "llvm.coro.begin", "llvm.coro.subfn.addr", "llvm.coro.free", - "llvm.coro.id"})) - L = llvm::make_unique(M); + "llvm.coro.id", "llvm.coro.id.retcon", + "llvm.coro.id.retcon.once"})) + L = std::make_unique(M); return false; } diff --git a/lib/Transforms/Coroutines/CoroEarly.cpp b/lib/Transforms/Coroutines/CoroEarly.cpp index 692697d6f32..55993d33ee4 100644 --- a/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/lib/Transforms/Coroutines/CoroEarly.cpp @@ -91,13 +91,14 @@ void Lowerer::lowerCoroDone(IntrinsicInst *II) { Value *Operand = II->getArgOperand(0); // ResumeFnAddr is the first pointer sized element of the coroutine frame. + static_assert(coro::Shape::SwitchFieldIndex::Resume == 0, + "resume function not at offset zero"); auto *FrameTy = Int8Ptr; PointerType *FramePtrTy = FrameTy->getPointerTo(); Builder.SetInsertPoint(II); auto *BCI = Builder.CreateBitCast(Operand, FramePtrTy); - auto *Gep = Builder.CreateConstInBoundsGEP1_32(FrameTy, BCI, 0); - auto *Load = Builder.CreateLoad(FrameTy, Gep); + auto *Load = Builder.CreateLoad(BCI); auto *Cond = Builder.CreateICmpEQ(Load, NullPtr); II->replaceAllUsesWith(Cond); @@ -189,6 +190,10 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) { } } break; + case Intrinsic::coro_id_retcon: + case Intrinsic::coro_id_retcon_once: + F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT); + break; case Intrinsic::coro_resume: lowerResumeOrDestroy(CS, CoroSubFnInst::ResumeIndex); break; @@ -231,11 +236,18 @@ struct CoroEarly : public FunctionPass { // This pass has work to do only if we find intrinsics we are going to lower // in the module. bool doInitialization(Module &M) override { - if (coro::declaresIntrinsics( - M, {"llvm.coro.id", "llvm.coro.destroy", "llvm.coro.done", - "llvm.coro.end", "llvm.coro.noop", "llvm.coro.free", - "llvm.coro.promise", "llvm.coro.resume", "llvm.coro.suspend"})) - L = llvm::make_unique(M); + if (coro::declaresIntrinsics(M, {"llvm.coro.id", + "llvm.coro.id.retcon", + "llvm.coro.id.retcon.once", + "llvm.coro.destroy", + "llvm.coro.done", + "llvm.coro.end", + "llvm.coro.noop", + "llvm.coro.free", + "llvm.coro.promise", + "llvm.coro.resume", + "llvm.coro.suspend"})) + L = std::make_unique(M); return false; } diff --git a/lib/Transforms/Coroutines/CoroElide.cpp b/lib/Transforms/Coroutines/CoroElide.cpp index 6707aa1c827..aca77119023 100644 --- a/lib/Transforms/Coroutines/CoroElide.cpp +++ b/lib/Transforms/Coroutines/CoroElide.cpp @@ -286,7 +286,7 @@ struct CoroElide : FunctionPass { bool doInitialization(Module &M) override { if (coro::declaresIntrinsics(M, {"llvm.coro.id"})) - L = llvm::make_unique(M); + L = std::make_unique(M); return false; } diff --git a/lib/Transforms/Coroutines/CoroFrame.cpp b/lib/Transforms/Coroutines/CoroFrame.cpp index 58bf22bee29..2c42cf8a6d2 100644 --- a/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/lib/Transforms/Coroutines/CoroFrame.cpp @@ -18,6 +18,7 @@ #include "CoroInternal.h" #include "llvm/ADT/BitVector.h" +#include "llvm/Analysis/PtrUseVisitor.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/CFG.h" @@ -28,6 +29,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/circular_raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" using namespace llvm; @@ -120,6 +122,15 @@ struct SuspendCrossingInfo { return false; BasicBlock *UseBB = I->getParent(); + + // As a special case, treat uses by an llvm.coro.suspend.retcon + // as if they were uses in the suspend's single predecessor: the + // uses conceptually occur before the suspend. + if (isa(I)) { + UseBB = UseBB->getSinglePredecessor(); + assert(UseBB && "should have split coro.suspend into its own block"); + } + return hasPathCrossingSuspendPoint(DefBB, UseBB); } @@ -128,7 +139,17 @@ struct SuspendCrossingInfo { } bool isDefinitionAcrossSuspend(Instruction &I, User *U) const { - return isDefinitionAcrossSuspend(I.getParent(), U); + auto *DefBB = I.getParent(); + + // As a special case, treat values produced by an llvm.coro.suspend.* + // as if they were defined in the single successor: the uses + // conceptually occur after the suspend. + if (isa(I)) { + DefBB = DefBB->getSingleSuccessor(); + assert(DefBB && "should have split coro.suspend into its own block"); + } + + return isDefinitionAcrossSuspend(DefBB, U); } }; } // end anonymous namespace @@ -183,9 +204,10 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape) B.Suspend = true; B.Kills |= B.Consumes; }; - for (CoroSuspendInst *CSI : Shape.CoroSuspends) { + for (auto *CSI : Shape.CoroSuspends) { markSuspendBlock(CSI); - markSuspendBlock(CSI->getCoroSave()); + if (auto *Save = CSI->getCoroSave()) + markSuspendBlock(Save); } // Iterate propagating consumes and kills until they stop changing. @@ -261,11 +283,13 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape) // We build up the list of spills for every case where a use is separated // from the definition by a suspend point. +static const unsigned InvalidFieldIndex = ~0U; + namespace { class Spill { Value *Def = nullptr; Instruction *User = nullptr; - unsigned FieldNo = 0; + unsigned FieldNo = InvalidFieldIndex; public: Spill(Value *Def, llvm::User *U) : Def(Def), User(cast(U)) {} @@ -280,11 +304,11 @@ public: // the definition the first time they encounter it. Consider refactoring // SpillInfo into two arrays to normalize the spill representation. unsigned fieldIndex() const { - assert(FieldNo && "Accessing unassigned field"); + assert(FieldNo != InvalidFieldIndex && "Accessing unassigned field"); return FieldNo; } void setFieldIndex(unsigned FieldNumber) { - assert(!FieldNo && "Reassigning field number"); + assert(FieldNo == InvalidFieldIndex && "Reassigning field number"); FieldNo = FieldNumber; } }; @@ -376,18 +400,30 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, SmallString<32> Name(F.getName()); Name.append(".Frame"); StructType *FrameTy = StructType::create(C, Name); - auto *FramePtrTy = FrameTy->getPointerTo(); - auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy, - /*isVarArg=*/false); - auto *FnPtrTy = FnTy->getPointerTo(); + SmallVector Types; + + AllocaInst *PromiseAlloca = Shape.getPromiseAlloca(); + + if (Shape.ABI == coro::ABI::Switch) { + auto *FramePtrTy = FrameTy->getPointerTo(); + auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy, + /*IsVarArg=*/false); + auto *FnPtrTy = FnTy->getPointerTo(); + + // Figure out how wide should be an integer type storing the suspend index. + unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size())); + Type *PromiseType = PromiseAlloca + ? PromiseAlloca->getType()->getElementType() + : Type::getInt1Ty(C); + Type *IndexType = Type::getIntNTy(C, IndexBits); + Types.push_back(FnPtrTy); + Types.push_back(FnPtrTy); + Types.push_back(PromiseType); + Types.push_back(IndexType); + } else { + assert(PromiseAlloca == nullptr && "lowering doesn't support promises"); + } - // Figure out how wide should be an integer type storing the suspend index. - unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size())); - Type *PromiseType = Shape.PromiseAlloca - ? Shape.PromiseAlloca->getType()->getElementType() - : Type::getInt1Ty(C); - SmallVector Types{FnPtrTy, FnPtrTy, PromiseType, - Type::getIntNTy(C, IndexBits)}; Value *CurrentDef = nullptr; Padder.addTypes(Types); @@ -399,7 +435,7 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, CurrentDef = S.def(); // PromiseAlloca was already added to Types array earlier. - if (CurrentDef == Shape.PromiseAlloca) + if (CurrentDef == PromiseAlloca) continue; uint64_t Count = 1; @@ -430,9 +466,80 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, } FrameTy->setBody(Types); + switch (Shape.ABI) { + case coro::ABI::Switch: + break; + + // Remember whether the frame is inline in the storage. + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: { + auto &Layout = F.getParent()->getDataLayout(); + auto Id = Shape.getRetconCoroId(); + Shape.RetconLowering.IsFrameInlineInStorage + = (Layout.getTypeAllocSize(FrameTy) <= Id->getStorageSize() && + Layout.getABITypeAlignment(FrameTy) <= Id->getStorageAlignment()); + break; + } + } + return FrameTy; } +// We use a pointer use visitor to discover if there are any writes into an +// alloca that dominates CoroBegin. If that is the case, insertSpills will copy +// the value from the alloca into the coroutine frame spill slot corresponding +// to that alloca. +namespace { +struct AllocaUseVisitor : PtrUseVisitor { + using Base = PtrUseVisitor; + AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT, + const CoroBeginInst &CB) + : PtrUseVisitor(DL), DT(DT), CoroBegin(CB) {} + + // We are only interested in uses that dominate coro.begin. + void visit(Instruction &I) { + if (DT.dominates(&I, &CoroBegin)) + Base::visit(I); + } + // We need to provide this overload as PtrUseVisitor uses a pointer based + // visiting function. + void visit(Instruction *I) { return visit(*I); } + + void visitLoadInst(LoadInst &) {} // Good. Nothing to do. + + // If the use is an operand, the pointer escaped and anything can write into + // that memory. If the use is the pointer, we are definitely writing into the + // alloca and therefore we need to copy. + void visitStoreInst(StoreInst &SI) { PI.setAborted(&SI); } + + // Any other instruction that is not filtered out by PtrUseVisitor, will + // result in the copy. + void visitInstruction(Instruction &I) { PI.setAborted(&I); } + +private: + const DominatorTree &DT; + const CoroBeginInst &CoroBegin; +}; +} // namespace +static bool mightWriteIntoAllocaPtr(AllocaInst &A, const DominatorTree &DT, + const CoroBeginInst &CB) { + const DataLayout &DL = A.getModule()->getDataLayout(); + AllocaUseVisitor Visitor(DL, DT, CB); + auto PtrI = Visitor.visitPtr(A); + if (PtrI.isEscaped() || PtrI.isAborted()) { + auto *PointerEscapingInstr = PtrI.getEscapingInst() + ? PtrI.getEscapingInst() + : PtrI.getAbortingInst(); + if (PointerEscapingInstr) { + LLVM_DEBUG( + dbgs() << "AllocaInst copy was triggered by instruction: " + << *PointerEscapingInstr << "\n"); + } + return true; + } + return false; +} + // We need to make room to insert a spill after initial PHIs, but before // catchswitch instruction. Placing it before violates the requirement that // catchswitch, like all other EHPads must be the first nonPHI in a block. @@ -476,7 +583,7 @@ static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) { // whatever // // -static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { +static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { auto *CB = Shape.CoroBegin; LLVMContext &C = CB->getContext(); IRBuilder<> Builder(CB->getNextNode()); @@ -484,11 +591,14 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { PointerType *FramePtrTy = FrameTy->getPointerTo(); auto *FramePtr = cast(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr")); + DominatorTree DT(*CB->getFunction()); Value *CurrentValue = nullptr; BasicBlock *CurrentBlock = nullptr; Value *CurrentReload = nullptr; - unsigned Index = 0; // Proper field number will be read from field definition. + + // Proper field number will be read from field definition. + unsigned Index = InvalidFieldIndex; // We need to keep track of any allocas that need "spilling" // since they will live in the coroutine frame now, all access to them @@ -496,9 +606,11 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { // we remember allocas and their indices to be handled once we processed // all the spills. SmallVector, 4> Allocas; - // Promise alloca (if present) has a fixed field number (Shape::PromiseField) - if (Shape.PromiseAlloca) - Allocas.emplace_back(Shape.PromiseAlloca, coro::Shape::PromiseField); + // Promise alloca (if present) has a fixed field number. + if (auto *PromiseAlloca = Shape.getPromiseAlloca()) { + assert(Shape.ABI == coro::ABI::Switch); + Allocas.emplace_back(PromiseAlloca, coro::Shape::SwitchFieldIndex::Promise); + } // Create a GEP with the given index into the coroutine frame for the original // value Orig. Appends an extra 0 index for array-allocas, preserving the @@ -526,7 +638,7 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { // Create a load instruction to reload the spilled value from the coroutine // frame. auto CreateReload = [&](Instruction *InsertBefore) { - assert(Index && "accessing unassigned field number"); + assert(Index != InvalidFieldIndex && "accessing unassigned field number"); Builder.SetInsertPoint(InsertBefore); auto *G = GetFramePointer(Index, CurrentValue); @@ -558,29 +670,45 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { // coroutine frame. Instruction *InsertPt = nullptr; - if (isa(CurrentValue)) { + if (auto Arg = dyn_cast(CurrentValue)) { // For arguments, we will place the store instruction right after // the coroutine frame pointer instruction, i.e. bitcast of // coro.begin from i8* to %f.frame*. InsertPt = FramePtr->getNextNode(); + + // If we're spilling an Argument, make sure we clear 'nocapture' + // from the coroutine function. + Arg->getParent()->removeParamAttr(Arg->getArgNo(), + Attribute::NoCapture); + } else if (auto *II = dyn_cast(CurrentValue)) { // If we are spilling the result of the invoke instruction, split the // normal edge and insert the spill in the new block. auto NewBB = SplitEdge(II->getParent(), II->getNormalDest()); InsertPt = NewBB->getTerminator(); - } else if (dyn_cast(CurrentValue)) { + } else if (isa(CurrentValue)) { // Skip the PHINodes and EH pads instructions. BasicBlock *DefBlock = cast(E.def())->getParent(); if (auto *CSI = dyn_cast(DefBlock->getTerminator())) InsertPt = splitBeforeCatchSwitch(CSI); else InsertPt = &*DefBlock->getFirstInsertionPt(); + } else if (auto CSI = dyn_cast(CurrentValue)) { + // Don't spill immediately after a suspend; splitting assumes + // that the suspend will be followed by a branch. + InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI(); } else { + auto *I = cast(E.def()); + assert(!I->isTerminator() && "unexpected terminator"); // For all other values, the spill is placed immediately after // the definition. - assert(!cast(E.def())->isTerminator() && - "unexpected terminator"); - InsertPt = cast(E.def())->getNextNode(); + if (DT.dominates(CB, I)) { + InsertPt = I->getNextNode(); + } else { + // Unless, it is not dominated by CoroBegin, then it will be + // inserted immediately after CoroFrame is computed. + InsertPt = FramePtr->getNextNode(); + } } Builder.SetInsertPoint(InsertPt); @@ -613,21 +741,53 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { } BasicBlock *FramePtrBB = FramePtr->getParent(); - Shape.AllocaSpillBlock = - FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB"); - Shape.AllocaSpillBlock->splitBasicBlock(&Shape.AllocaSpillBlock->front(), - "PostSpill"); - Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front()); + auto SpillBlock = + FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB"); + SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill"); + Shape.AllocaSpillBlock = SpillBlock; // If we found any allocas, replace all of their remaining uses with Geps. + // Note: we cannot do it indiscriminately as some of the uses may not be + // dominated by CoroBegin. + bool MightNeedToCopy = false; + Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front()); + SmallVector UsersToUpdate; for (auto &P : Allocas) { - auto *G = GetFramePointer(P.second, P.first); + AllocaInst *const A = P.first; + UsersToUpdate.clear(); + for (User *U : A->users()) { + auto *I = cast(U); + if (DT.dominates(CB, I)) + UsersToUpdate.push_back(I); + else + MightNeedToCopy = true; + } + if (!UsersToUpdate.empty()) { + auto *G = GetFramePointer(P.second, A); + G->takeName(A); + for (Instruction *I : UsersToUpdate) + I->replaceUsesOfWith(A, G); + } + } + // If we discovered such uses not dominated by CoroBegin, see if any of them + // preceed coro begin and have instructions that can modify the + // value of the alloca and therefore would require a copying the value into + // the spill slot in the coroutine frame. + if (MightNeedToCopy) { + Builder.SetInsertPoint(FramePtr->getNextNode()); - // We are not using ReplaceInstWithInst(P.first, cast(G)) here, - // as we are changing location of the instruction. - G->takeName(P.first); - P.first->replaceAllUsesWith(G); - P.first->eraseFromParent(); + for (auto &P : Allocas) { + AllocaInst *const A = P.first; + if (mightWriteIntoAllocaPtr(*A, DT, *CB)) { + if (A->isArrayAllocation()) + report_fatal_error( + "Coroutines cannot handle copying of array allocas yet"); + + auto *G = GetFramePointer(P.second, A); + auto *Value = Builder.CreateLoad(A); + Builder.CreateStore(Value, G); + } + } } return FramePtr; } @@ -829,52 +989,6 @@ static void rewriteMaterializableInstructions(IRBuilder<> &IRB, } } -// Move early uses of spilled variable after CoroBegin. -// For example, if a parameter had address taken, we may end up with the code -// like: -// define @f(i32 %n) { -// %n.addr = alloca i32 -// store %n, %n.addr -// ... -// call @coro.begin -// we need to move the store after coro.begin -static void moveSpillUsesAfterCoroBegin(Function &F, SpillInfo const &Spills, - CoroBeginInst *CoroBegin) { - DominatorTree DT(F); - SmallVector NeedsMoving; - - Value *CurrentValue = nullptr; - - for (auto const &E : Spills) { - if (CurrentValue == E.def()) - continue; - - CurrentValue = E.def(); - - for (User *U : CurrentValue->users()) { - Instruction *I = cast(U); - if (!DT.dominates(CoroBegin, I)) { - LLVM_DEBUG(dbgs() << "will move: " << *I << "\n"); - - // TODO: Make this more robust. Currently if we run into a situation - // where simple instruction move won't work we panic and - // report_fatal_error. - for (User *UI : I->users()) { - if (!DT.dominates(CoroBegin, cast(UI))) - report_fatal_error("cannot move instruction since its users are not" - " dominated by CoroBegin"); - } - - NeedsMoving.push_back(I); - } - } - } - - Instruction *InsertPt = CoroBegin->getNextNode(); - for (Instruction *I : NeedsMoving) - I->moveBefore(InsertPt); -} - // Splits the block at a particular instruction unless it is the first // instruction in the block with a single predecessor. static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) { @@ -895,21 +1009,337 @@ static void splitAround(Instruction *I, const Twine &Name) { splitBlockIfNotFirst(I->getNextNode(), "After" + Name); } +static bool isSuspendBlock(BasicBlock *BB) { + return isa(BB->front()); +} + +typedef SmallPtrSet VisitedBlocksSet; + +/// Does control flow starting at the given block ever reach a suspend +/// instruction before reaching a block in VisitedOrFreeBBs? +static bool isSuspendReachableFrom(BasicBlock *From, + VisitedBlocksSet &VisitedOrFreeBBs) { + // Eagerly try to add this block to the visited set. If it's already + // there, stop recursing; this path doesn't reach a suspend before + // either looping or reaching a freeing block. + if (!VisitedOrFreeBBs.insert(From).second) + return false; + + // We assume that we'll already have split suspends into their own blocks. + if (isSuspendBlock(From)) + return true; + + // Recurse on the successors. + for (auto Succ : successors(From)) { + if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs)) + return true; + } + + return false; +} + +/// Is the given alloca "local", i.e. bounded in lifetime to not cross a +/// suspend point? +static bool isLocalAlloca(CoroAllocaAllocInst *AI) { + // Seed the visited set with all the basic blocks containing a free + // so that we won't pass them up. + VisitedBlocksSet VisitedOrFreeBBs; + for (auto User : AI->users()) { + if (auto FI = dyn_cast(User)) + VisitedOrFreeBBs.insert(FI->getParent()); + } + + return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs); +} + +/// After we split the coroutine, will the given basic block be along +/// an obvious exit path for the resumption function? +static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB, + unsigned depth = 3) { + // If we've bottomed out our depth count, stop searching and assume + // that the path might loop back. + if (depth == 0) return false; + + // If this is a suspend block, we're about to exit the resumption function. + if (isSuspendBlock(BB)) return true; + + // Recurse into the successors. + for (auto Succ : successors(BB)) { + if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1)) + return false; + } + + // If none of the successors leads back in a loop, we're on an exit/abort. + return true; +} + +static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) { + // Look for a free that isn't sufficiently obviously followed by + // either a suspend or a termination, i.e. something that will leave + // the coro resumption frame. + for (auto U : AI->users()) { + auto FI = dyn_cast(U); + if (!FI) continue; + + if (!willLeaveFunctionImmediatelyAfter(FI->getParent())) + return true; + } + + // If we never found one, we don't need a stack save. + return false; +} + +/// Turn each of the given local allocas into a normal (dynamic) alloca +/// instruction. +static void lowerLocalAllocas(ArrayRef LocalAllocas, + SmallVectorImpl &DeadInsts) { + for (auto AI : LocalAllocas) { + auto M = AI->getModule(); + IRBuilder<> Builder(AI); + + // Save the stack depth. Try to avoid doing this if the stackrestore + // is going to immediately precede a return or something. + Value *StackSave = nullptr; + if (localAllocaNeedsStackSave(AI)) + StackSave = Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::stacksave)); + + // Allocate memory. + auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize()); + Alloca->setAlignment(MaybeAlign(AI->getAlignment())); + + for (auto U : AI->users()) { + // Replace gets with the allocation. + if (isa(U)) { + U->replaceAllUsesWith(Alloca); + + // Replace frees with stackrestores. This is safe because + // alloca.alloc is required to obey a stack discipline, although we + // don't enforce that structurally. + } else { + auto FI = cast(U); + if (StackSave) { + Builder.SetInsertPoint(FI); + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::stackrestore), + StackSave); + } + } + DeadInsts.push_back(cast(U)); + } + + DeadInsts.push_back(AI); + } +} + +/// Turn the given coro.alloca.alloc call into a dynamic allocation. +/// This happens during the all-instructions iteration, so it must not +/// delete the call. +static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI, + coro::Shape &Shape, + SmallVectorImpl &DeadInsts) { + IRBuilder<> Builder(AI); + auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr); + + for (User *U : AI->users()) { + if (isa(U)) { + U->replaceAllUsesWith(Alloc); + } else { + auto FI = cast(U); + Builder.SetInsertPoint(FI); + Shape.emitDealloc(Builder, Alloc, nullptr); + } + DeadInsts.push_back(cast(U)); + } + + // Push this on last so that it gets deleted after all the others. + DeadInsts.push_back(AI); + + // Return the new allocation value so that we can check for needed spills. + return cast(Alloc); +} + +/// Get the current swifterror value. +static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy, + coro::Shape &Shape) { + // Make a fake function pointer as a sort of intrinsic. + auto FnTy = FunctionType::get(ValueTy, {}, false); + auto Fn = ConstantPointerNull::get(FnTy->getPointerTo()); + + auto Call = Builder.CreateCall(Fn, {}); + Shape.SwiftErrorOps.push_back(Call); + + return Call; +} + +/// Set the given value as the current swifterror value. +/// +/// Returns a slot that can be used as a swifterror slot. +static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V, + coro::Shape &Shape) { + // Make a fake function pointer as a sort of intrinsic. + auto FnTy = FunctionType::get(V->getType()->getPointerTo(), + {V->getType()}, false); + auto Fn = ConstantPointerNull::get(FnTy->getPointerTo()); + + auto Call = Builder.CreateCall(Fn, { V }); + Shape.SwiftErrorOps.push_back(Call); + + return Call; +} + +/// Set the swifterror value from the given alloca before a call, +/// then put in back in the alloca afterwards. +/// +/// Returns an address that will stand in for the swifterror slot +/// until splitting. +static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call, + AllocaInst *Alloca, + coro::Shape &Shape) { + auto ValueTy = Alloca->getAllocatedType(); + IRBuilder<> Builder(Call); + + // Load the current value from the alloca and set it as the + // swifterror value. + auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca); + auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape); + + // Move to after the call. Since swifterror only has a guaranteed + // value on normal exits, we can ignore implicit and explicit unwind + // edges. + if (isa(Call)) { + Builder.SetInsertPoint(Call->getNextNode()); + } else { + auto Invoke = cast(Call); + Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg()); + } + + // Get the current swifterror value and store it to the alloca. + auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape); + Builder.CreateStore(ValueAfterCall, Alloca); + + return Addr; +} + +/// Eliminate a formerly-swifterror alloca by inserting the get/set +/// intrinsics and attempting to MemToReg the alloca away. +static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca, + coro::Shape &Shape) { + for (auto UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ) { + // We're likely changing the use list, so use a mutation-safe + // iteration pattern. + auto &Use = *UI; + ++UI; + + // swifterror values can only be used in very specific ways. + // We take advantage of that here. + auto User = Use.getUser(); + if (isa(User) || isa(User)) + continue; + + assert(isa(User) || isa(User)); + auto Call = cast(User); + + auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape); + + // Use the returned slot address as the call argument. + Use.set(Addr); + } + + // All the uses should be loads and stores now. + assert(isAllocaPromotable(Alloca)); +} + +/// "Eliminate" a swifterror argument by reducing it to the alloca case +/// and then loading and storing in the prologue and epilog. +/// +/// The argument keeps the swifterror flag. +static void eliminateSwiftErrorArgument(Function &F, Argument &Arg, + coro::Shape &Shape, + SmallVectorImpl &AllocasToPromote) { + IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg()); + + auto ArgTy = cast(Arg.getType()); + auto ValueTy = ArgTy->getElementType(); + + // Reduce to the alloca case: + + // Create an alloca and replace all uses of the arg with it. + auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace()); + Arg.replaceAllUsesWith(Alloca); + + // Set an initial value in the alloca. swifterror is always null on entry. + auto InitialValue = Constant::getNullValue(ValueTy); + Builder.CreateStore(InitialValue, Alloca); + + // Find all the suspends in the function and save and restore around them. + for (auto Suspend : Shape.CoroSuspends) { + (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape); + } + + // Find all the coro.ends in the function and restore the error value. + for (auto End : Shape.CoroEnds) { + Builder.SetInsertPoint(End); + auto FinalValue = Builder.CreateLoad(ValueTy, Alloca); + (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape); + } + + // Now we can use the alloca logic. + AllocasToPromote.push_back(Alloca); + eliminateSwiftErrorAlloca(F, Alloca, Shape); +} + +/// Eliminate all problematic uses of swifterror arguments and allocas +/// from the function. We'll fix them up later when splitting the function. +static void eliminateSwiftError(Function &F, coro::Shape &Shape) { + SmallVector AllocasToPromote; + + // Look for a swifterror argument. + for (auto &Arg : F.args()) { + if (!Arg.hasSwiftErrorAttr()) continue; + + eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote); + break; + } + + // Look for swifterror allocas. + for (auto &Inst : F.getEntryBlock()) { + auto Alloca = dyn_cast(&Inst); + if (!Alloca || !Alloca->isSwiftError()) continue; + + // Clear the swifterror flag. + Alloca->setSwiftError(false); + + AllocasToPromote.push_back(Alloca); + eliminateSwiftErrorAlloca(F, Alloca, Shape); + } + + // If we have any allocas to promote, compute a dominator tree and + // promote them en masse. + if (!AllocasToPromote.empty()) { + DominatorTree DT(F); + PromoteMemToReg(AllocasToPromote, DT); + } +} + void coro::buildCoroutineFrame(Function &F, Shape &Shape) { // Lower coro.dbg.declare to coro.dbg.value, since we are going to rewrite // access to local variables. LowerDbgDeclare(F); - Shape.PromiseAlloca = Shape.CoroBegin->getId()->getPromise(); - if (Shape.PromiseAlloca) { - Shape.CoroBegin->getId()->clearPromise(); + eliminateSwiftError(F, Shape); + + if (Shape.ABI == coro::ABI::Switch && + Shape.SwitchLowering.PromiseAlloca) { + Shape.getSwitchCoroId()->clearPromise(); } // Make sure that all coro.save, coro.suspend and the fallthrough coro.end // intrinsics are in their own blocks to simplify the logic of building up // SuspendCrossing data. - for (CoroSuspendInst *CSI : Shape.CoroSuspends) { - splitAround(CSI->getCoroSave(), "CoroSave"); + for (auto *CSI : Shape.CoroSuspends) { + if (auto *Save = CSI->getCoroSave()) + splitAround(Save, "CoroSave"); splitAround(CSI, "CoroSuspend"); } @@ -926,6 +1356,8 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { IRBuilder<> Builder(F.getContext()); SpillInfo Spills; + SmallVector LocalAllocas; + SmallVector DeadInstructions; for (int Repeat = 0; Repeat < 4; ++Repeat) { // See if there are materializable instructions across suspend points. @@ -955,11 +1387,40 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { // of the Coroutine Frame. if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin) continue; + // The Coroutine Promise always included into coroutine frame, no need to // check for suspend crossing. - if (Shape.PromiseAlloca == &I) + if (Shape.ABI == coro::ABI::Switch && + Shape.SwitchLowering.PromiseAlloca == &I) continue; + // Handle alloca.alloc specially here. + if (auto AI = dyn_cast(&I)) { + // Check whether the alloca's lifetime is bounded by suspend points. + if (isLocalAlloca(AI)) { + LocalAllocas.push_back(AI); + continue; + } + + // If not, do a quick rewrite of the alloca and then add spills of + // the rewritten value. The rewrite doesn't invalidate anything in + // Spills because the other alloca intrinsics have no other operands + // besides AI, and it doesn't invalidate the iteration because we delay + // erasing AI. + auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions); + + for (User *U : Alloc->users()) { + if (Checker.isDefinitionAcrossSuspend(*Alloc, U)) + Spills.emplace_back(Alloc, U); + } + continue; + } + + // Ignore alloca.get; we process this as part of coro.alloca.alloc. + if (isa(I)) { + continue; + } + for (User *U : I.users()) if (Checker.isDefinitionAcrossSuspend(I, U)) { // We cannot spill a token. @@ -970,7 +1431,10 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { } } LLVM_DEBUG(dump("Spills", Spills)); - moveSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin); Shape.FrameTy = buildFrameType(F, Shape, Spills); Shape.FramePtr = insertSpills(Spills, Shape); + lowerLocalAllocas(LocalAllocas, DeadInstructions); + + for (auto I : DeadInstructions) + I->eraseFromParent(); } diff --git a/lib/Transforms/Coroutines/CoroInstr.h b/lib/Transforms/Coroutines/CoroInstr.h index 5e19d7642e3..de2d2920cb1 100644 --- a/lib/Transforms/Coroutines/CoroInstr.h +++ b/lib/Transforms/Coroutines/CoroInstr.h @@ -27,6 +27,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { @@ -77,10 +78,8 @@ public: } }; -/// This represents the llvm.coro.alloc instruction. -class LLVM_LIBRARY_VISIBILITY CoroIdInst : public IntrinsicInst { - enum { AlignArg, PromiseArg, CoroutineArg, InfoArg }; - +/// This represents a common base class for llvm.coro.id instructions. +class LLVM_LIBRARY_VISIBILITY AnyCoroIdInst : public IntrinsicInst { public: CoroAllocInst *getCoroAlloc() { for (User *U : users()) @@ -97,6 +96,24 @@ public: llvm_unreachable("no coro.begin associated with coro.id"); } + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + auto ID = I->getIntrinsicID(); + return ID == Intrinsic::coro_id || + ID == Intrinsic::coro_id_retcon || + ID == Intrinsic::coro_id_retcon_once; + } + + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.id instruction. +class LLVM_LIBRARY_VISIBILITY CoroIdInst : public AnyCoroIdInst { + enum { AlignArg, PromiseArg, CoroutineArg, InfoArg }; + +public: AllocaInst *getPromise() const { Value *Arg = getArgOperand(PromiseArg); return isa(Arg) @@ -182,6 +199,80 @@ public: } }; +/// This represents either the llvm.coro.id.retcon or +/// llvm.coro.id.retcon.once instruction. +class LLVM_LIBRARY_VISIBILITY AnyCoroIdRetconInst : public AnyCoroIdInst { + enum { SizeArg, AlignArg, StorageArg, PrototypeArg, AllocArg, DeallocArg }; + +public: + void checkWellFormed() const; + + uint64_t getStorageSize() const { + return cast(getArgOperand(SizeArg))->getZExtValue(); + } + + uint64_t getStorageAlignment() const { + return cast(getArgOperand(AlignArg))->getZExtValue(); + } + + Value *getStorage() const { + return getArgOperand(StorageArg); + } + + /// Return the prototype for the continuation function. The type, + /// attributes, and calling convention of the continuation function(s) + /// are taken from this declaration. + Function *getPrototype() const { + return cast(getArgOperand(PrototypeArg)->stripPointerCasts()); + } + + /// Return the function to use for allocating memory. + Function *getAllocFunction() const { + return cast(getArgOperand(AllocArg)->stripPointerCasts()); + } + + /// Return the function to use for deallocating memory. + Function *getDeallocFunction() const { + return cast(getArgOperand(DeallocArg)->stripPointerCasts()); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + auto ID = I->getIntrinsicID(); + return ID == Intrinsic::coro_id_retcon + || ID == Intrinsic::coro_id_retcon_once; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.id.retcon instruction. +class LLVM_LIBRARY_VISIBILITY CoroIdRetconInst + : public AnyCoroIdRetconInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_id_retcon; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.id.retcon.once instruction. +class LLVM_LIBRARY_VISIBILITY CoroIdRetconOnceInst + : public AnyCoroIdRetconInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_id_retcon_once; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + /// This represents the llvm.coro.frame instruction. class LLVM_LIBRARY_VISIBILITY CoroFrameInst : public IntrinsicInst { public: @@ -215,7 +306,9 @@ class LLVM_LIBRARY_VISIBILITY CoroBeginInst : public IntrinsicInst { enum { IdArg, MemArg }; public: - CoroIdInst *getId() const { return cast(getArgOperand(IdArg)); } + AnyCoroIdInst *getId() const { + return cast(getArgOperand(IdArg)); + } Value *getMem() const { return getArgOperand(MemArg); } @@ -261,8 +354,22 @@ public: } }; +class LLVM_LIBRARY_VISIBILITY AnyCoroSuspendInst : public IntrinsicInst { +public: + CoroSaveInst *getCoroSave() const; + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_suspend || + I->getIntrinsicID() == Intrinsic::coro_suspend_retcon; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + /// This represents the llvm.coro.suspend instruction. -class LLVM_LIBRARY_VISIBILITY CoroSuspendInst : public IntrinsicInst { +class LLVM_LIBRARY_VISIBILITY CoroSuspendInst : public AnyCoroSuspendInst { enum { SaveArg, FinalArg }; public: @@ -273,6 +380,7 @@ public: assert(isa(Arg)); return nullptr; } + bool isFinal() const { return cast(getArgOperand(FinalArg))->isOneValue(); } @@ -286,6 +394,37 @@ public: } }; +inline CoroSaveInst *AnyCoroSuspendInst::getCoroSave() const { + if (auto Suspend = dyn_cast(this)) + return Suspend->getCoroSave(); + return nullptr; +} + +/// This represents the llvm.coro.suspend.retcon instruction. +class LLVM_LIBRARY_VISIBILITY CoroSuspendRetconInst : public AnyCoroSuspendInst { +public: + op_iterator value_begin() { return arg_begin(); } + const_op_iterator value_begin() const { return arg_begin(); } + + op_iterator value_end() { return arg_end(); } + const_op_iterator value_end() const { return arg_end(); } + + iterator_range value_operands() { + return make_range(value_begin(), value_end()); + } + iterator_range value_operands() const { + return make_range(value_begin(), value_end()); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_suspend_retcon; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + /// This represents the llvm.coro.size instruction. class LLVM_LIBRARY_VISIBILITY CoroSizeInst : public IntrinsicInst { public: @@ -317,6 +456,60 @@ public: } }; +/// This represents the llvm.coro.alloca.alloc instruction. +class LLVM_LIBRARY_VISIBILITY CoroAllocaAllocInst : public IntrinsicInst { + enum { SizeArg, AlignArg }; +public: + Value *getSize() const { + return getArgOperand(SizeArg); + } + unsigned getAlignment() const { + return cast(getArgOperand(AlignArg))->getZExtValue(); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_alloca_alloc; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.alloca.get instruction. +class LLVM_LIBRARY_VISIBILITY CoroAllocaGetInst : public IntrinsicInst { + enum { AllocArg }; +public: + CoroAllocaAllocInst *getAlloc() const { + return cast(getArgOperand(AllocArg)); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_alloca_get; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This represents the llvm.coro.alloca.free instruction. +class LLVM_LIBRARY_VISIBILITY CoroAllocaFreeInst : public IntrinsicInst { + enum { AllocArg }; +public: + CoroAllocaAllocInst *getAlloc() const { + return cast(getArgOperand(AllocArg)); + } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_alloca_free; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + } // End namespace llvm. #endif diff --git a/lib/Transforms/Coroutines/CoroInternal.h b/lib/Transforms/Coroutines/CoroInternal.h index 441c8a20f1f..c151474316f 100644 --- a/lib/Transforms/Coroutines/CoroInternal.h +++ b/lib/Transforms/Coroutines/CoroInternal.h @@ -12,6 +12,7 @@ #define LLVM_LIB_TRANSFORMS_COROUTINES_COROINTERNAL_H #include "CoroInstr.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/Transforms/Coroutines.h" namespace llvm { @@ -61,37 +62,174 @@ struct LowererBase { Value *makeSubFnCall(Value *Arg, int Index, Instruction *InsertPt); }; +enum class ABI { + /// The "resume-switch" lowering, where there are separate resume and + /// destroy functions that are shared between all suspend points. The + /// coroutine frame implicitly stores the resume and destroy functions, + /// the current index, and any promise value. + Switch, + + /// The "returned-continuation" lowering, where each suspend point creates a + /// single continuation function that is used for both resuming and + /// destroying. Does not support promises. + Retcon, + + /// The "unique returned-continuation" lowering, where each suspend point + /// creates a single continuation function that is used for both resuming + /// and destroying. Does not support promises. The function is known to + /// suspend at most once during its execution, and the return value of + /// the continuation is void. + RetconOnce, +}; + // Holds structural Coroutine Intrinsics for a particular function and other // values used during CoroSplit pass. struct LLVM_LIBRARY_VISIBILITY Shape { CoroBeginInst *CoroBegin; SmallVector CoroEnds; SmallVector CoroSizes; - SmallVector CoroSuspends; + SmallVector CoroSuspends; + SmallVector SwiftErrorOps; - // Field Indexes for known coroutine frame fields. - enum { - ResumeField, - DestroyField, - PromiseField, - IndexField, + // Field indexes for special fields in the switch lowering. + struct SwitchFieldIndex { + enum { + Resume, + Destroy, + Promise, + Index, + /// The index of the first spill field. + FirstSpill + }; }; + coro::ABI ABI; + StructType *FrameTy; Instruction *FramePtr; BasicBlock *AllocaSpillBlock; - SwitchInst *ResumeSwitch; - AllocaInst *PromiseAlloca; - bool HasFinalSuspend; + + struct SwitchLoweringStorage { + SwitchInst *ResumeSwitch; + AllocaInst *PromiseAlloca; + BasicBlock *ResumeEntryBlock; + bool HasFinalSuspend; + }; + + struct RetconLoweringStorage { + Function *ResumePrototype; + Function *Alloc; + Function *Dealloc; + BasicBlock *ReturnBlock; + bool IsFrameInlineInStorage; + }; + + union { + SwitchLoweringStorage SwitchLowering; + RetconLoweringStorage RetconLowering; + }; + + CoroIdInst *getSwitchCoroId() const { + assert(ABI == coro::ABI::Switch); + return cast(CoroBegin->getId()); + } + + AnyCoroIdRetconInst *getRetconCoroId() const { + assert(ABI == coro::ABI::Retcon || + ABI == coro::ABI::RetconOnce); + return cast(CoroBegin->getId()); + } IntegerType *getIndexType() const { + assert(ABI == coro::ABI::Switch); assert(FrameTy && "frame type not assigned"); - return cast(FrameTy->getElementType(IndexField)); + return cast(FrameTy->getElementType(SwitchFieldIndex::Index)); } ConstantInt *getIndex(uint64_t Value) const { return ConstantInt::get(getIndexType(), Value); } + PointerType *getSwitchResumePointerType() const { + assert(ABI == coro::ABI::Switch); + assert(FrameTy && "frame type not assigned"); + return cast(FrameTy->getElementType(SwitchFieldIndex::Resume)); + } + + FunctionType *getResumeFunctionType() const { + switch (ABI) { + case coro::ABI::Switch: { + auto *FnPtrTy = getSwitchResumePointerType(); + return cast(FnPtrTy->getPointerElementType()); + } + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + return RetconLowering.ResumePrototype->getFunctionType(); + } + llvm_unreachable("Unknown coro::ABI enum"); + } + + ArrayRef getRetconResultTypes() const { + assert(ABI == coro::ABI::Retcon || + ABI == coro::ABI::RetconOnce); + auto FTy = CoroBegin->getFunction()->getFunctionType(); + + // The safety of all this is checked by checkWFRetconPrototype. + if (auto STy = dyn_cast(FTy->getReturnType())) { + return STy->elements().slice(1); + } else { + return ArrayRef(); + } + } + + ArrayRef getRetconResumeTypes() const { + assert(ABI == coro::ABI::Retcon || + ABI == coro::ABI::RetconOnce); + + // The safety of all this is checked by checkWFRetconPrototype. + auto FTy = RetconLowering.ResumePrototype->getFunctionType(); + return FTy->params().slice(1); + } + + CallingConv::ID getResumeFunctionCC() const { + switch (ABI) { + case coro::ABI::Switch: + return CallingConv::Fast; + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + return RetconLowering.ResumePrototype->getCallingConv(); + } + llvm_unreachable("Unknown coro::ABI enum"); + } + + unsigned getFirstSpillFieldIndex() const { + switch (ABI) { + case coro::ABI::Switch: + return SwitchFieldIndex::FirstSpill; + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + return 0; + } + llvm_unreachable("Unknown coro::ABI enum"); + } + + AllocaInst *getPromiseAlloca() const { + if (ABI == coro::ABI::Switch) + return SwitchLowering.PromiseAlloca; + return nullptr; + } + + /// Allocate memory according to the rules of the active lowering. + /// + /// \param CG - if non-null, will be updated for the new call + Value *emitAlloc(IRBuilder<> &Builder, Value *Size, CallGraph *CG) const; + + /// Deallocate memory according to the rules of the active lowering. + /// + /// \param CG - if non-null, will be updated for the new call + void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const; + Shape() = default; explicit Shape(Function &F) { buildFrom(F); } void buildFrom(Function &F); diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp index 5458e70ff16..04723cbde41 100644 --- a/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/lib/Transforms/Coroutines/CoroSplit.cpp @@ -55,6 +55,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -70,9 +71,197 @@ using namespace llvm; #define DEBUG_TYPE "coro-split" +namespace { + +/// A little helper class for building +class CoroCloner { +public: + enum class Kind { + /// The shared resume function for a switch lowering. + SwitchResume, + + /// The shared unwind function for a switch lowering. + SwitchUnwind, + + /// The shared cleanup function for a switch lowering. + SwitchCleanup, + + /// An individual continuation function. + Continuation, + }; +private: + Function &OrigF; + Function *NewF; + const Twine &Suffix; + coro::Shape &Shape; + Kind FKind; + ValueToValueMapTy VMap; + IRBuilder<> Builder; + Value *NewFramePtr = nullptr; + Value *SwiftErrorSlot = nullptr; + + /// The active suspend instruction; meaningful only for continuation ABIs. + AnyCoroSuspendInst *ActiveSuspend = nullptr; + +public: + /// Create a cloner for a switch lowering. + CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, + Kind FKind) + : OrigF(OrigF), NewF(nullptr), Suffix(Suffix), Shape(Shape), + FKind(FKind), Builder(OrigF.getContext()) { + assert(Shape.ABI == coro::ABI::Switch); + } + + /// Create a cloner for a continuation lowering. + CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, + Function *NewF, AnyCoroSuspendInst *ActiveSuspend) + : OrigF(OrigF), NewF(NewF), Suffix(Suffix), Shape(Shape), + FKind(Kind::Continuation), Builder(OrigF.getContext()), + ActiveSuspend(ActiveSuspend) { + assert(Shape.ABI == coro::ABI::Retcon || + Shape.ABI == coro::ABI::RetconOnce); + assert(NewF && "need existing function for continuation"); + assert(ActiveSuspend && "need active suspend point for continuation"); + } + + Function *getFunction() const { + assert(NewF != nullptr && "declaration not yet set"); + return NewF; + } + + void create(); + +private: + bool isSwitchDestroyFunction() { + switch (FKind) { + case Kind::Continuation: + case Kind::SwitchResume: + return false; + case Kind::SwitchUnwind: + case Kind::SwitchCleanup: + return true; + } + llvm_unreachable("Unknown CoroCloner::Kind enum"); + } + + void createDeclaration(); + void replaceEntryBlock(); + Value *deriveNewFramePointer(); + void replaceRetconSuspendUses(); + void replaceCoroSuspends(); + void replaceCoroEnds(); + void replaceSwiftErrorOps(); + void handleFinalSuspend(); + void maybeFreeContinuationStorage(); +}; + +} // end anonymous namespace + +static void maybeFreeRetconStorage(IRBuilder<> &Builder, coro::Shape &Shape, + Value *FramePtr, CallGraph *CG) { + assert(Shape.ABI == coro::ABI::Retcon || + Shape.ABI == coro::ABI::RetconOnce); + if (Shape.RetconLowering.IsFrameInlineInStorage) + return; + + Shape.emitDealloc(Builder, FramePtr, CG); +} + +/// Replace a non-unwind call to llvm.coro.end. +static void replaceFallthroughCoroEnd(CoroEndInst *End, coro::Shape &Shape, + Value *FramePtr, bool InResume, + CallGraph *CG) { + // Start inserting right before the coro.end. + IRBuilder<> Builder(End); + + // Create the return instruction. + switch (Shape.ABI) { + // The cloned functions in switch-lowering always return void. + case coro::ABI::Switch: + // coro.end doesn't immediately end the coroutine in the main function + // in this lowering, because we need to deallocate the coroutine. + if (!InResume) + return; + Builder.CreateRetVoid(); + break; + + // In unique continuation lowering, the continuations always return void. + // But we may have implicitly allocated storage. + case coro::ABI::RetconOnce: + maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); + Builder.CreateRetVoid(); + break; + + // In non-unique continuation lowering, we signal completion by returning + // a null continuation. + case coro::ABI::Retcon: { + maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); + auto RetTy = Shape.getResumeFunctionType()->getReturnType(); + auto RetStructTy = dyn_cast(RetTy); + PointerType *ContinuationTy = + cast(RetStructTy ? RetStructTy->getElementType(0) : RetTy); + + Value *ReturnValue = ConstantPointerNull::get(ContinuationTy); + if (RetStructTy) { + ReturnValue = Builder.CreateInsertValue(UndefValue::get(RetStructTy), + ReturnValue, 0); + } + Builder.CreateRet(ReturnValue); + break; + } + } + + // Remove the rest of the block, by splitting it into an unreachable block. + auto *BB = End->getParent(); + BB->splitBasicBlock(End); + BB->getTerminator()->eraseFromParent(); +} + +/// Replace an unwind call to llvm.coro.end. +static void replaceUnwindCoroEnd(CoroEndInst *End, coro::Shape &Shape, + Value *FramePtr, bool InResume, CallGraph *CG){ + IRBuilder<> Builder(End); + + switch (Shape.ABI) { + // In switch-lowering, this does nothing in the main function. + case coro::ABI::Switch: + if (!InResume) + return; + break; + + // In continuation-lowering, this frees the continuation storage. + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + maybeFreeRetconStorage(Builder, Shape, FramePtr, CG); + break; + } + + // If coro.end has an associated bundle, add cleanupret instruction. + if (auto Bundle = End->getOperandBundle(LLVMContext::OB_funclet)) { + auto *FromPad = cast(Bundle->Inputs[0]); + auto *CleanupRet = Builder.CreateCleanupRet(FromPad, nullptr); + End->getParent()->splitBasicBlock(End); + CleanupRet->getParent()->getTerminator()->eraseFromParent(); + } +} + +static void replaceCoroEnd(CoroEndInst *End, coro::Shape &Shape, + Value *FramePtr, bool InResume, CallGraph *CG) { + if (End->isUnwind()) + replaceUnwindCoroEnd(End, Shape, FramePtr, InResume, CG); + else + replaceFallthroughCoroEnd(End, Shape, FramePtr, InResume, CG); + + auto &Context = End->getContext(); + End->replaceAllUsesWith(InResume ? ConstantInt::getTrue(Context) + : ConstantInt::getFalse(Context)); + End->eraseFromParent(); +} + // Create an entry block for a resume function with a switch that will jump to // suspend points. -static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) { +static void createResumeEntryBlock(Function &F, coro::Shape &Shape) { + assert(Shape.ABI == coro::ABI::Switch); LLVMContext &C = F.getContext(); // resume.entry: @@ -91,15 +280,16 @@ static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) { IRBuilder<> Builder(NewEntry); auto *FramePtr = Shape.FramePtr; auto *FrameTy = Shape.FrameTy; - auto *GepIndex = Builder.CreateConstInBoundsGEP2_32( - FrameTy, FramePtr, 0, coro::Shape::IndexField, "index.addr"); + auto *GepIndex = Builder.CreateStructGEP( + FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr"); auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index"); auto *Switch = Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size()); - Shape.ResumeSwitch = Switch; + Shape.SwitchLowering.ResumeSwitch = Switch; size_t SuspendIndex = 0; - for (CoroSuspendInst *S : Shape.CoroSuspends) { + for (auto *AnyS : Shape.CoroSuspends) { + auto *S = cast(AnyS); ConstantInt *IndexVal = Shape.getIndex(SuspendIndex); // Replace CoroSave with a store to Index: @@ -109,14 +299,15 @@ static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) { Builder.SetInsertPoint(Save); if (S->isFinal()) { // Final suspend point is represented by storing zero in ResumeFnAddr. - auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0, - 0, "ResumeFn.addr"); + auto *GepIndex = Builder.CreateStructGEP(FrameTy, FramePtr, + coro::Shape::SwitchFieldIndex::Resume, + "ResumeFn.addr"); auto *NullPtr = ConstantPointerNull::get(cast( cast(GepIndex->getType())->getElementType())); Builder.CreateStore(NullPtr, GepIndex); } else { - auto *GepIndex = Builder.CreateConstInBoundsGEP2_32( - FrameTy, FramePtr, 0, coro::Shape::IndexField, "index.addr"); + auto *GepIndex = Builder.CreateStructGEP( + FrameTy, FramePtr, coro::Shape::SwitchFieldIndex::Index, "index.addr"); Builder.CreateStore(IndexVal, GepIndex); } Save->replaceAllUsesWith(ConstantTokenNone::get(C)); @@ -164,48 +355,9 @@ static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) { Builder.SetInsertPoint(UnreachBB); Builder.CreateUnreachable(); - return NewEntry; + Shape.SwitchLowering.ResumeEntryBlock = NewEntry; } -// In Resumers, we replace fallthrough coro.end with ret void and delete the -// rest of the block. -static void replaceFallthroughCoroEnd(IntrinsicInst *End, - ValueToValueMapTy &VMap) { - auto *NewE = cast(VMap[End]); - ReturnInst::Create(NewE->getContext(), nullptr, NewE); - - // Remove the rest of the block, by splitting it into an unreachable block. - auto *BB = NewE->getParent(); - BB->splitBasicBlock(NewE); - BB->getTerminator()->eraseFromParent(); -} - -// In Resumers, we replace unwind coro.end with True to force the immediate -// unwind to caller. -static void replaceUnwindCoroEnds(coro::Shape &Shape, ValueToValueMapTy &VMap) { - if (Shape.CoroEnds.empty()) - return; - - LLVMContext &Context = Shape.CoroEnds.front()->getContext(); - auto *True = ConstantInt::getTrue(Context); - for (CoroEndInst *CE : Shape.CoroEnds) { - if (!CE->isUnwind()) - continue; - - auto *NewCE = cast(VMap[CE]); - - // If coro.end has an associated bundle, add cleanupret instruction. - if (auto Bundle = NewCE->getOperandBundle(LLVMContext::OB_funclet)) { - Value *FromPad = Bundle->Inputs[0]; - auto *CleanupRet = CleanupReturnInst::Create(FromPad, nullptr, NewCE); - NewCE->getParent()->splitBasicBlock(NewCE); - CleanupRet->getParent()->getTerminator()->eraseFromParent(); - } - - NewCE->replaceAllUsesWith(True); - NewCE->eraseFromParent(); - } -} // Rewrite final suspend point handling. We do not use suspend index to // represent the final suspend point. Instead we zero-out ResumeFnAddr in the @@ -216,83 +368,364 @@ static void replaceUnwindCoroEnds(coro::Shape &Shape, ValueToValueMapTy &VMap) { // In the destroy function, we add a code sequence to check if ResumeFnAddress // is Null, and if so, jump to the appropriate label to handle cleanup from the // final suspend point. -static void handleFinalSuspend(IRBuilder<> &Builder, Value *FramePtr, - coro::Shape &Shape, SwitchInst *Switch, - bool IsDestroy) { - assert(Shape.HasFinalSuspend); +void CoroCloner::handleFinalSuspend() { + assert(Shape.ABI == coro::ABI::Switch && + Shape.SwitchLowering.HasFinalSuspend); + auto *Switch = cast(VMap[Shape.SwitchLowering.ResumeSwitch]); auto FinalCaseIt = std::prev(Switch->case_end()); BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor(); Switch->removeCase(FinalCaseIt); - if (IsDestroy) { + if (isSwitchDestroyFunction()) { BasicBlock *OldSwitchBB = Switch->getParent(); auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch"); Builder.SetInsertPoint(OldSwitchBB->getTerminator()); - auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(Shape.FrameTy, FramePtr, - 0, 0, "ResumeFn.addr"); - auto *Load = Builder.CreateLoad( - Shape.FrameTy->getElementType(coro::Shape::ResumeField), GepIndex); - auto *NullPtr = - ConstantPointerNull::get(cast(Load->getType())); - auto *Cond = Builder.CreateICmpEQ(Load, NullPtr); + auto *GepIndex = Builder.CreateStructGEP(Shape.FrameTy, NewFramePtr, + coro::Shape::SwitchFieldIndex::Resume, + "ResumeFn.addr"); + auto *Load = Builder.CreateLoad(Shape.getSwitchResumePointerType(), + GepIndex); + auto *Cond = Builder.CreateIsNull(Load); Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB); OldSwitchBB->getTerminator()->eraseFromParent(); } } -// Create a resume clone by cloning the body of the original function, setting -// new entry block and replacing coro.suspend an appropriate value to force -// resume or cleanup pass for every suspend point. -static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, - BasicBlock *ResumeEntry, int8_t FnIndex) { - Module *M = F.getParent(); - auto *FrameTy = Shape.FrameTy; - auto *FnPtrTy = cast(FrameTy->getElementType(0)); - auto *FnTy = cast(FnPtrTy->getElementType()); +static Function *createCloneDeclaration(Function &OrigF, coro::Shape &Shape, + const Twine &Suffix, + Module::iterator InsertBefore) { + Module *M = OrigF.getParent(); + auto *FnTy = Shape.getResumeFunctionType(); Function *NewF = - Function::Create(FnTy, GlobalValue::LinkageTypes::ExternalLinkage, - F.getName() + Suffix, M); + Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage, + OrigF.getName() + Suffix); NewF->addParamAttr(0, Attribute::NonNull); NewF->addParamAttr(0, Attribute::NoAlias); - ValueToValueMapTy VMap; + M->getFunctionList().insert(InsertBefore, NewF); + + return NewF; +} + +/// Replace uses of the active llvm.coro.suspend.retcon call with the +/// arguments to the continuation function. +/// +/// This assumes that the builder has a meaningful insertion point. +void CoroCloner::replaceRetconSuspendUses() { + assert(Shape.ABI == coro::ABI::Retcon || + Shape.ABI == coro::ABI::RetconOnce); + + auto NewS = VMap[ActiveSuspend]; + if (NewS->use_empty()) return; + + // Copy out all the continuation arguments after the buffer pointer into + // an easily-indexed data structure for convenience. + SmallVector Args; + for (auto I = std::next(NewF->arg_begin()), E = NewF->arg_end(); I != E; ++I) + Args.push_back(&*I); + + // If the suspend returns a single scalar value, we can just do a simple + // replacement. + if (!isa(NewS->getType())) { + assert(Args.size() == 1); + NewS->replaceAllUsesWith(Args.front()); + return; + } + + // Try to peephole extracts of an aggregate return. + for (auto UI = NewS->use_begin(), UE = NewS->use_end(); UI != UE; ) { + auto EVI = dyn_cast((UI++)->getUser()); + if (!EVI || EVI->getNumIndices() != 1) + continue; + + EVI->replaceAllUsesWith(Args[EVI->getIndices().front()]); + EVI->eraseFromParent(); + } + + // If we have no remaining uses, we're done. + if (NewS->use_empty()) return; + + // Otherwise, we need to create an aggregate. + Value *Agg = UndefValue::get(NewS->getType()); + for (size_t I = 0, E = Args.size(); I != E; ++I) + Agg = Builder.CreateInsertValue(Agg, Args[I], I); + + NewS->replaceAllUsesWith(Agg); +} + +void CoroCloner::replaceCoroSuspends() { + Value *SuspendResult; + + switch (Shape.ABI) { + // In switch lowering, replace coro.suspend with the appropriate value + // for the type of function we're extracting. + // Replacing coro.suspend with (0) will result in control flow proceeding to + // a resume label associated with a suspend point, replacing it with (1) will + // result in control flow proceeding to a cleanup label associated with this + // suspend point. + case coro::ABI::Switch: + SuspendResult = Builder.getInt8(isSwitchDestroyFunction() ? 1 : 0); + break; + + // In returned-continuation lowering, the arguments from earlier + // continuations are theoretically arbitrary, and they should have been + // spilled. + case coro::ABI::RetconOnce: + case coro::ABI::Retcon: + return; + } + + for (AnyCoroSuspendInst *CS : Shape.CoroSuspends) { + // The active suspend was handled earlier. + if (CS == ActiveSuspend) continue; + + auto *MappedCS = cast(VMap[CS]); + MappedCS->replaceAllUsesWith(SuspendResult); + MappedCS->eraseFromParent(); + } +} + +void CoroCloner::replaceCoroEnds() { + for (CoroEndInst *CE : Shape.CoroEnds) { + // We use a null call graph because there's no call graph node for + // the cloned function yet. We'll just be rebuilding that later. + auto NewCE = cast(VMap[CE]); + replaceCoroEnd(NewCE, Shape, NewFramePtr, /*in resume*/ true, nullptr); + } +} + +static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape, + ValueToValueMapTy *VMap) { + Value *CachedSlot = nullptr; + auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * { + if (CachedSlot) { + assert(CachedSlot->getType()->getPointerElementType() == ValueTy && + "multiple swifterror slots in function with different types"); + return CachedSlot; + } + + // Check if the function has a swifterror argument. + for (auto &Arg : F.args()) { + if (Arg.isSwiftError()) { + CachedSlot = &Arg; + assert(Arg.getType()->getPointerElementType() == ValueTy && + "swifterror argument does not have expected type"); + return &Arg; + } + } + + // Create a swifterror alloca. + IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg()); + auto Alloca = Builder.CreateAlloca(ValueTy); + Alloca->setSwiftError(true); + + CachedSlot = Alloca; + return Alloca; + }; + + for (CallInst *Op : Shape.SwiftErrorOps) { + auto MappedOp = VMap ? cast((*VMap)[Op]) : Op; + IRBuilder<> Builder(MappedOp); + + // If there are no arguments, this is a 'get' operation. + Value *MappedResult; + if (Op->getNumArgOperands() == 0) { + auto ValueTy = Op->getType(); + auto Slot = getSwiftErrorSlot(ValueTy); + MappedResult = Builder.CreateLoad(ValueTy, Slot); + } else { + assert(Op->getNumArgOperands() == 1); + auto Value = MappedOp->getArgOperand(0); + auto ValueTy = Value->getType(); + auto Slot = getSwiftErrorSlot(ValueTy); + Builder.CreateStore(Value, Slot); + MappedResult = Slot; + } + + MappedOp->replaceAllUsesWith(MappedResult); + MappedOp->eraseFromParent(); + } + + // If we're updating the original function, we've invalidated SwiftErrorOps. + if (VMap == nullptr) { + Shape.SwiftErrorOps.clear(); + } +} + +void CoroCloner::replaceSwiftErrorOps() { + ::replaceSwiftErrorOps(*NewF, Shape, &VMap); +} + +void CoroCloner::replaceEntryBlock() { + // In the original function, the AllocaSpillBlock is a block immediately + // following the allocation of the frame object which defines GEPs for + // all the allocas that have been moved into the frame, and it ends by + // branching to the original beginning of the coroutine. Make this + // the entry block of the cloned function. + auto *Entry = cast(VMap[Shape.AllocaSpillBlock]); + Entry->setName("entry" + Suffix); + Entry->moveBefore(&NewF->getEntryBlock()); + Entry->getTerminator()->eraseFromParent(); + + // Clear all predecessors of the new entry block. There should be + // exactly one predecessor, which we created when splitting out + // AllocaSpillBlock to begin with. + assert(Entry->hasOneUse()); + auto BranchToEntry = cast(Entry->user_back()); + assert(BranchToEntry->isUnconditional()); + Builder.SetInsertPoint(BranchToEntry); + Builder.CreateUnreachable(); + BranchToEntry->eraseFromParent(); + + // TODO: move any allocas into Entry that weren't moved into the frame. + // (Currently we move all allocas into the frame.) + + // Branch from the entry to the appropriate place. + Builder.SetInsertPoint(Entry); + switch (Shape.ABI) { + case coro::ABI::Switch: { + // In switch-lowering, we built a resume-entry block in the original + // function. Make the entry block branch to this. + auto *SwitchBB = + cast(VMap[Shape.SwitchLowering.ResumeEntryBlock]); + Builder.CreateBr(SwitchBB); + break; + } + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: { + // In continuation ABIs, we want to branch to immediately after the + // active suspend point. Earlier phases will have put the suspend in its + // own basic block, so just thread our jump directly to its successor. + auto MappedCS = cast(VMap[ActiveSuspend]); + auto Branch = cast(MappedCS->getNextNode()); + assert(Branch->isUnconditional()); + Builder.CreateBr(Branch->getSuccessor(0)); + break; + } + } +} + +/// Derive the value of the new frame pointer. +Value *CoroCloner::deriveNewFramePointer() { + // Builder should be inserting to the front of the new entry block. + + switch (Shape.ABI) { + // In switch-lowering, the argument is the frame pointer. + case coro::ABI::Switch: + return &*NewF->arg_begin(); + + // In continuation-lowering, the argument is the opaque storage. + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: { + Argument *NewStorage = &*NewF->arg_begin(); + auto FramePtrTy = Shape.FrameTy->getPointerTo(); + + // If the storage is inline, just bitcast to the storage to the frame type. + if (Shape.RetconLowering.IsFrameInlineInStorage) + return Builder.CreateBitCast(NewStorage, FramePtrTy); + + // Otherwise, load the real frame from the opaque storage. + auto FramePtrPtr = + Builder.CreateBitCast(NewStorage, FramePtrTy->getPointerTo()); + return Builder.CreateLoad(FramePtrPtr); + } + } + llvm_unreachable("bad ABI"); +} + +/// Clone the body of the original function into a resume function of +/// some sort. +void CoroCloner::create() { + // Create the new function if we don't already have one. + if (!NewF) { + NewF = createCloneDeclaration(OrigF, Shape, Suffix, + OrigF.getParent()->end()); + } + // Replace all args with undefs. The buildCoroutineFrame algorithm already // rewritten access to the args that occurs after suspend points with loads // and stores to/from the coroutine frame. - for (Argument &A : F.args()) + for (Argument &A : OrigF.args()) VMap[&A] = UndefValue::get(A.getType()); SmallVector Returns; - CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/true, Returns); - NewF->setLinkage(GlobalValue::LinkageTypes::InternalLinkage); + // Ignore attempts to change certain attributes of the function. + // TODO: maybe there should be a way to suppress this during cloning? + auto savedVisibility = NewF->getVisibility(); + auto savedUnnamedAddr = NewF->getUnnamedAddr(); + auto savedDLLStorageClass = NewF->getDLLStorageClass(); - // Remove old returns. - for (ReturnInst *Return : Returns) - changeToUnreachable(Return, /*UseLLVMTrap=*/false); + // NewF's linkage (which CloneFunctionInto does *not* change) might not + // be compatible with the visibility of OrigF (which it *does* change), + // so protect against that. + auto savedLinkage = NewF->getLinkage(); + NewF->setLinkage(llvm::GlobalValue::ExternalLinkage); - // Remove old return attributes. - NewF->removeAttributes( - AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewF->getReturnType())); + CloneFunctionInto(NewF, &OrigF, VMap, /*ModuleLevelChanges=*/true, Returns); - // Make AllocaSpillBlock the new entry block. - auto *SwitchBB = cast(VMap[ResumeEntry]); - auto *Entry = cast(VMap[Shape.AllocaSpillBlock]); - Entry->moveBefore(&NewF->getEntryBlock()); - Entry->getTerminator()->eraseFromParent(); - BranchInst::Create(SwitchBB, Entry); - Entry->setName("entry" + Suffix); + NewF->setLinkage(savedLinkage); + NewF->setVisibility(savedVisibility); + NewF->setUnnamedAddr(savedUnnamedAddr); + NewF->setDLLStorageClass(savedDLLStorageClass); - // Clear all predecessors of the new entry block. - auto *Switch = cast(VMap[Shape.ResumeSwitch]); - Entry->replaceAllUsesWith(Switch->getDefaultDest()); + auto &Context = NewF->getContext(); - IRBuilder<> Builder(&NewF->getEntryBlock().front()); + // Replace the attributes of the new function: + auto OrigAttrs = NewF->getAttributes(); + auto NewAttrs = AttributeList(); + + switch (Shape.ABI) { + case coro::ABI::Switch: + // Bootstrap attributes by copying function attributes from the + // original function. This should include optimization settings and so on. + NewAttrs = NewAttrs.addAttributes(Context, AttributeList::FunctionIndex, + OrigAttrs.getFnAttributes()); + break; + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + // If we have a continuation prototype, just use its attributes, + // full-stop. + NewAttrs = Shape.RetconLowering.ResumePrototype->getAttributes(); + break; + } + + // Make the frame parameter nonnull and noalias. + NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NonNull); + NewAttrs = NewAttrs.addParamAttribute(Context, 0, Attribute::NoAlias); + + switch (Shape.ABI) { + // In these ABIs, the cloned functions always return 'void', and the + // existing return sites are meaningless. Note that for unique + // continuations, this includes the returns associated with suspends; + // this is fine because we can't suspend twice. + case coro::ABI::Switch: + case coro::ABI::RetconOnce: + // Remove old returns. + for (ReturnInst *Return : Returns) + changeToUnreachable(Return, /*UseLLVMTrap=*/false); + break; + + // With multi-suspend continuations, we'll already have eliminated the + // original returns and inserted returns before all the suspend points, + // so we want to leave any returns in place. + case coro::ABI::Retcon: + break; + } + + NewF->setAttributes(NewAttrs); + NewF->setCallingConv(Shape.getResumeFunctionCC()); + + // Set up the new entry block. + replaceEntryBlock(); + + Builder.SetInsertPoint(&NewF->getEntryBlock().front()); + NewFramePtr = deriveNewFramePointer(); // Remap frame pointer. - Argument *NewFramePtr = &*NewF->arg_begin(); - Value *OldFramePtr = cast(VMap[Shape.FramePtr]); + Value *OldFramePtr = VMap[Shape.FramePtr]; NewFramePtr->takeName(OldFramePtr); OldFramePtr->replaceAllUsesWith(NewFramePtr); @@ -302,50 +735,55 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, Value *OldVFrame = cast(VMap[Shape.CoroBegin]); OldVFrame->replaceAllUsesWith(NewVFrame); - // Rewrite final suspend handling as it is not done via switch (allows to - // remove final case from the switch, since it is undefined behavior to resume - // the coroutine suspended at the final suspend point. - if (Shape.HasFinalSuspend) { - auto *Switch = cast(VMap[Shape.ResumeSwitch]); - bool IsDestroy = FnIndex != 0; - handleFinalSuspend(Builder, NewFramePtr, Shape, Switch, IsDestroy); + switch (Shape.ABI) { + case coro::ABI::Switch: + // Rewrite final suspend handling as it is not done via switch (allows to + // remove final case from the switch, since it is undefined behavior to + // resume the coroutine suspended at the final suspend point. + if (Shape.SwitchLowering.HasFinalSuspend) + handleFinalSuspend(); + break; + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + // Replace uses of the active suspend with the corresponding + // continuation-function arguments. + assert(ActiveSuspend != nullptr && + "no active suspend when lowering a continuation-style coroutine"); + replaceRetconSuspendUses(); + break; } - // Replace coro suspend with the appropriate resume index. - // Replacing coro.suspend with (0) will result in control flow proceeding to - // a resume label associated with a suspend point, replacing it with (1) will - // result in control flow proceeding to a cleanup label associated with this - // suspend point. - auto *NewValue = Builder.getInt8(FnIndex ? 1 : 0); - for (CoroSuspendInst *CS : Shape.CoroSuspends) { - auto *MappedCS = cast(VMap[CS]); - MappedCS->replaceAllUsesWith(NewValue); - MappedCS->eraseFromParent(); - } + // Handle suspends. + replaceCoroSuspends(); + + // Handle swifterror. + replaceSwiftErrorOps(); // Remove coro.end intrinsics. - replaceFallthroughCoroEnd(Shape.CoroEnds.front(), VMap); - replaceUnwindCoroEnds(Shape, VMap); + replaceCoroEnds(); + // Eliminate coro.free from the clones, replacing it with 'null' in cleanup, // to suppress deallocation code. - coro::replaceCoroFree(cast(VMap[Shape.CoroBegin->getId()]), - /*Elide=*/FnIndex == 2); - - NewF->setCallingConv(CallingConv::Fast); - - return NewF; + if (Shape.ABI == coro::ABI::Switch) + coro::replaceCoroFree(cast(VMap[Shape.CoroBegin->getId()]), + /*Elide=*/ FKind == CoroCloner::Kind::SwitchCleanup); } -static void removeCoroEnds(coro::Shape &Shape) { - if (Shape.CoroEnds.empty()) - return; +// Create a resume clone by cloning the body of the original function, setting +// new entry block and replacing coro.suspend an appropriate value to force +// resume or cleanup pass for every suspend point. +static Function *createClone(Function &F, const Twine &Suffix, + coro::Shape &Shape, CoroCloner::Kind FKind) { + CoroCloner Cloner(F, Suffix, Shape, FKind); + Cloner.create(); + return Cloner.getFunction(); +} - LLVMContext &Context = Shape.CoroEnds.front()->getContext(); - auto *False = ConstantInt::getFalse(Context); - - for (CoroEndInst *CE : Shape.CoroEnds) { - CE->replaceAllUsesWith(False); - CE->eraseFromParent(); +/// Remove calls to llvm.coro.end in the original function. +static void removeCoroEnds(coro::Shape &Shape, CallGraph *CG) { + for (auto End : Shape.CoroEnds) { + replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, CG); } } @@ -377,8 +815,12 @@ static void replaceFrameSize(coro::Shape &Shape) { // i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to i8*)) // // Assumes that all the functions have the same signature. -static void setCoroInfo(Function &F, CoroBeginInst *CoroBegin, - std::initializer_list Fns) { +static void setCoroInfo(Function &F, coro::Shape &Shape, + ArrayRef Fns) { + // This only works under the switch-lowering ABI because coro elision + // only works on the switch-lowering ABI. + assert(Shape.ABI == coro::ABI::Switch); + SmallVector Args(Fns.begin(), Fns.end()); assert(!Args.empty()); Function *Part = *Fns.begin(); @@ -393,38 +835,45 @@ static void setCoroInfo(Function &F, CoroBeginInst *CoroBegin, // Update coro.begin instruction to refer to this constant. LLVMContext &C = F.getContext(); auto *BC = ConstantExpr::getPointerCast(GV, Type::getInt8PtrTy(C)); - CoroBegin->getId()->setInfo(BC); + Shape.getSwitchCoroId()->setInfo(BC); } // Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame. static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn, Function *DestroyFn, Function *CleanupFn) { + assert(Shape.ABI == coro::ABI::Switch); + IRBuilder<> Builder(Shape.FramePtr->getNextNode()); - auto *ResumeAddr = Builder.CreateConstInBoundsGEP2_32( - Shape.FrameTy, Shape.FramePtr, 0, coro::Shape::ResumeField, + auto *ResumeAddr = Builder.CreateStructGEP( + Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume, "resume.addr"); Builder.CreateStore(ResumeFn, ResumeAddr); Value *DestroyOrCleanupFn = DestroyFn; - CoroIdInst *CoroId = Shape.CoroBegin->getId(); + CoroIdInst *CoroId = Shape.getSwitchCoroId(); if (CoroAllocInst *CA = CoroId->getCoroAlloc()) { // If there is a CoroAlloc and it returns false (meaning we elide the // allocation, use CleanupFn instead of DestroyFn). DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn); } - auto *DestroyAddr = Builder.CreateConstInBoundsGEP2_32( - Shape.FrameTy, Shape.FramePtr, 0, coro::Shape::DestroyField, + auto *DestroyAddr = Builder.CreateStructGEP( + Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Destroy, "destroy.addr"); Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr); } static void postSplitCleanup(Function &F) { removeUnreachableBlocks(F); + + // For now, we do a mandatory verification step because we don't + // entirely trust this pass. Note that we don't want to add a verifier + // pass to FPM below because it will also verify all the global data. + verifyFunction(F); + legacy::FunctionPassManager FPM(F.getParent()); - FPM.add(createVerifierPass()); FPM.add(createSCCPPass()); FPM.add(createCFGSimplificationPass()); FPM.add(createEarlyCSEPass()); @@ -520,21 +969,34 @@ static void addMustTailToCoroResumes(Function &F) { // Coroutine has no suspend points. Remove heap allocation for the coroutine // frame if possible. -static void handleNoSuspendCoroutine(CoroBeginInst *CoroBegin, Type *FrameTy) { +static void handleNoSuspendCoroutine(coro::Shape &Shape) { + auto *CoroBegin = Shape.CoroBegin; auto *CoroId = CoroBegin->getId(); auto *AllocInst = CoroId->getCoroAlloc(); - coro::replaceCoroFree(CoroId, /*Elide=*/AllocInst != nullptr); - if (AllocInst) { - IRBuilder<> Builder(AllocInst); - // FIXME: Need to handle overaligned members. - auto *Frame = Builder.CreateAlloca(FrameTy); - auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy()); - AllocInst->replaceAllUsesWith(Builder.getFalse()); - AllocInst->eraseFromParent(); - CoroBegin->replaceAllUsesWith(VFrame); - } else { - CoroBegin->replaceAllUsesWith(CoroBegin->getMem()); + switch (Shape.ABI) { + case coro::ABI::Switch: { + auto SwitchId = cast(CoroId); + coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr); + if (AllocInst) { + IRBuilder<> Builder(AllocInst); + // FIXME: Need to handle overaligned members. + auto *Frame = Builder.CreateAlloca(Shape.FrameTy); + auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy()); + AllocInst->replaceAllUsesWith(Builder.getFalse()); + AllocInst->eraseFromParent(); + CoroBegin->replaceAllUsesWith(VFrame); + } else { + CoroBegin->replaceAllUsesWith(CoroBegin->getMem()); + } + break; } + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + CoroBegin->replaceAllUsesWith(UndefValue::get(CoroBegin->getType())); + break; + } + CoroBegin->eraseFromParent(); } @@ -670,12 +1132,16 @@ static bool simplifySuspendPoint(CoroSuspendInst *Suspend, // Remove suspend points that are simplified. static void simplifySuspendPoints(coro::Shape &Shape) { + // Currently, the only simplification we do is switch-lowering-specific. + if (Shape.ABI != coro::ABI::Switch) + return; + auto &S = Shape.CoroSuspends; size_t I = 0, N = S.size(); if (N == 0) return; while (true) { - if (simplifySuspendPoint(S[I], Shape.CoroBegin)) { + if (simplifySuspendPoint(cast(S[I]), Shape.CoroBegin)) { if (--N == I) break; std::swap(S[I], S[N]); @@ -687,126 +1153,18 @@ static void simplifySuspendPoints(coro::Shape &Shape) { S.resize(N); } -static SmallPtrSet getCoroBeginPredBlocks(CoroBeginInst *CB) { - // Collect all blocks that we need to look for instructions to relocate. - SmallPtrSet RelocBlocks; - SmallVector Work; - Work.push_back(CB->getParent()); +static void splitSwitchCoroutine(Function &F, coro::Shape &Shape, + SmallVectorImpl &Clones) { + assert(Shape.ABI == coro::ABI::Switch); - do { - BasicBlock *Current = Work.pop_back_val(); - for (BasicBlock *BB : predecessors(Current)) - if (RelocBlocks.count(BB) == 0) { - RelocBlocks.insert(BB); - Work.push_back(BB); - } - } while (!Work.empty()); - return RelocBlocks; -} + createResumeEntryBlock(F, Shape); + auto ResumeClone = createClone(F, ".resume", Shape, + CoroCloner::Kind::SwitchResume); + auto DestroyClone = createClone(F, ".destroy", Shape, + CoroCloner::Kind::SwitchUnwind); + auto CleanupClone = createClone(F, ".cleanup", Shape, + CoroCloner::Kind::SwitchCleanup); -static SmallPtrSet -getNotRelocatableInstructions(CoroBeginInst *CoroBegin, - SmallPtrSetImpl &RelocBlocks) { - SmallPtrSet DoNotRelocate; - // Collect all instructions that we should not relocate - SmallVector Work; - - // Start with CoroBegin and terminators of all preceding blocks. - Work.push_back(CoroBegin); - BasicBlock *CoroBeginBB = CoroBegin->getParent(); - for (BasicBlock *BB : RelocBlocks) - if (BB != CoroBeginBB) - Work.push_back(BB->getTerminator()); - - // For every instruction in the Work list, place its operands in DoNotRelocate - // set. - do { - Instruction *Current = Work.pop_back_val(); - LLVM_DEBUG(dbgs() << "CoroSplit: Will not relocate: " << *Current << "\n"); - DoNotRelocate.insert(Current); - for (Value *U : Current->operands()) { - auto *I = dyn_cast(U); - if (!I) - continue; - - if (auto *A = dyn_cast(I)) { - // Stores to alloca instructions that occur before the coroutine frame - // is allocated should not be moved; the stored values may be used by - // the coroutine frame allocator. The operands to those stores must also - // remain in place. - for (const auto &User : A->users()) - if (auto *SI = dyn_cast(User)) - if (RelocBlocks.count(SI->getParent()) != 0 && - DoNotRelocate.count(SI) == 0) { - Work.push_back(SI); - DoNotRelocate.insert(SI); - } - continue; - } - - if (DoNotRelocate.count(I) == 0) { - Work.push_back(I); - DoNotRelocate.insert(I); - } - } - } while (!Work.empty()); - return DoNotRelocate; -} - -static void relocateInstructionBefore(CoroBeginInst *CoroBegin, Function &F) { - // Analyze which non-alloca instructions are needed for allocation and - // relocate the rest to after coro.begin. We need to do it, since some of the - // targets of those instructions may be placed into coroutine frame memory - // for which becomes available after coro.begin intrinsic. - - auto BlockSet = getCoroBeginPredBlocks(CoroBegin); - auto DoNotRelocateSet = getNotRelocatableInstructions(CoroBegin, BlockSet); - - Instruction *InsertPt = CoroBegin->getNextNode(); - BasicBlock &BB = F.getEntryBlock(); // TODO: Look at other blocks as well. - for (auto B = BB.begin(), E = BB.end(); B != E;) { - Instruction &I = *B++; - if (isa(&I)) - continue; - if (&I == CoroBegin) - break; - if (DoNotRelocateSet.count(&I)) - continue; - I.moveBefore(InsertPt); - } -} - -static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) { - EliminateUnreachableBlocks(F); - - coro::Shape Shape(F); - if (!Shape.CoroBegin) - return; - - simplifySuspendPoints(Shape); - relocateInstructionBefore(Shape.CoroBegin, F); - buildCoroutineFrame(F, Shape); - replaceFrameSize(Shape); - - // If there are no suspend points, no split required, just remove - // the allocation and deallocation blocks, they are not needed. - if (Shape.CoroSuspends.empty()) { - handleNoSuspendCoroutine(Shape.CoroBegin, Shape.FrameTy); - removeCoroEnds(Shape); - postSplitCleanup(F); - coro::updateCallGraph(F, {}, CG, SCC); - return; - } - - auto *ResumeEntry = createResumeEntryBlock(F, Shape); - auto ResumeClone = createClone(F, ".resume", Shape, ResumeEntry, 0); - auto DestroyClone = createClone(F, ".destroy", Shape, ResumeEntry, 1); - auto CleanupClone = createClone(F, ".cleanup", Shape, ResumeEntry, 2); - - // We no longer need coro.end in F. - removeCoroEnds(Shape); - - postSplitCleanup(F); postSplitCleanup(*ResumeClone); postSplitCleanup(*DestroyClone); postSplitCleanup(*CleanupClone); @@ -816,13 +1174,206 @@ static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) { // Store addresses resume/destroy/cleanup functions in the coroutine frame. updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone); + assert(Clones.empty()); + Clones.push_back(ResumeClone); + Clones.push_back(DestroyClone); + Clones.push_back(CleanupClone); + // Create a constant array referring to resume/destroy/clone functions pointed // by the last argument of @llvm.coro.info, so that CoroElide pass can // determined correct function to call. - setCoroInfo(F, Shape.CoroBegin, {ResumeClone, DestroyClone, CleanupClone}); + setCoroInfo(F, Shape, Clones); +} + +static void splitRetconCoroutine(Function &F, coro::Shape &Shape, + SmallVectorImpl &Clones) { + assert(Shape.ABI == coro::ABI::Retcon || + Shape.ABI == coro::ABI::RetconOnce); + assert(Clones.empty()); + + // Reset various things that the optimizer might have decided it + // "knows" about the coroutine function due to not seeing a return. + F.removeFnAttr(Attribute::NoReturn); + F.removeAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); + F.removeAttribute(AttributeList::ReturnIndex, Attribute::NonNull); + + // Allocate the frame. + auto *Id = cast(Shape.CoroBegin->getId()); + Value *RawFramePtr; + if (Shape.RetconLowering.IsFrameInlineInStorage) { + RawFramePtr = Id->getStorage(); + } else { + IRBuilder<> Builder(Id); + + // Determine the size of the frame. + const DataLayout &DL = F.getParent()->getDataLayout(); + auto Size = DL.getTypeAllocSize(Shape.FrameTy); + + // Allocate. We don't need to update the call graph node because we're + // going to recompute it from scratch after splitting. + RawFramePtr = Shape.emitAlloc(Builder, Builder.getInt64(Size), nullptr); + RawFramePtr = + Builder.CreateBitCast(RawFramePtr, Shape.CoroBegin->getType()); + + // Stash the allocated frame pointer in the continuation storage. + auto Dest = Builder.CreateBitCast(Id->getStorage(), + RawFramePtr->getType()->getPointerTo()); + Builder.CreateStore(RawFramePtr, Dest); + } + + // Map all uses of llvm.coro.begin to the allocated frame pointer. + { + // Make sure we don't invalidate Shape.FramePtr. + TrackingVH Handle(Shape.FramePtr); + Shape.CoroBegin->replaceAllUsesWith(RawFramePtr); + Shape.FramePtr = Handle.getValPtr(); + } + + // Create a unique return block. + BasicBlock *ReturnBB = nullptr; + SmallVector ReturnPHIs; + + // Create all the functions in order after the main function. + auto NextF = std::next(F.getIterator()); + + // Create a continuation function for each of the suspend points. + Clones.reserve(Shape.CoroSuspends.size()); + for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) { + auto Suspend = cast(Shape.CoroSuspends[i]); + + // Create the clone declaration. + auto Continuation = + createCloneDeclaration(F, Shape, ".resume." + Twine(i), NextF); + Clones.push_back(Continuation); + + // Insert a branch to the unified return block immediately before + // the suspend point. + auto SuspendBB = Suspend->getParent(); + auto NewSuspendBB = SuspendBB->splitBasicBlock(Suspend); + auto Branch = cast(SuspendBB->getTerminator()); + + // Create the unified return block. + if (!ReturnBB) { + // Place it before the first suspend. + ReturnBB = BasicBlock::Create(F.getContext(), "coro.return", &F, + NewSuspendBB); + Shape.RetconLowering.ReturnBlock = ReturnBB; + + IRBuilder<> Builder(ReturnBB); + + // Create PHIs for all the return values. + assert(ReturnPHIs.empty()); + + // First, the continuation. + ReturnPHIs.push_back(Builder.CreatePHI(Continuation->getType(), + Shape.CoroSuspends.size())); + + // Next, all the directly-yielded values. + for (auto ResultTy : Shape.getRetconResultTypes()) + ReturnPHIs.push_back(Builder.CreatePHI(ResultTy, + Shape.CoroSuspends.size())); + + // Build the return value. + auto RetTy = F.getReturnType(); + + // Cast the continuation value if necessary. + // We can't rely on the types matching up because that type would + // have to be infinite. + auto CastedContinuationTy = + (ReturnPHIs.size() == 1 ? RetTy : RetTy->getStructElementType(0)); + auto *CastedContinuation = + Builder.CreateBitCast(ReturnPHIs[0], CastedContinuationTy); + + Value *RetV; + if (ReturnPHIs.size() == 1) { + RetV = CastedContinuation; + } else { + RetV = UndefValue::get(RetTy); + RetV = Builder.CreateInsertValue(RetV, CastedContinuation, 0); + for (size_t I = 1, E = ReturnPHIs.size(); I != E; ++I) + RetV = Builder.CreateInsertValue(RetV, ReturnPHIs[I], I); + } + + Builder.CreateRet(RetV); + } + + // Branch to the return block. + Branch->setSuccessor(0, ReturnBB); + ReturnPHIs[0]->addIncoming(Continuation, SuspendBB); + size_t NextPHIIndex = 1; + for (auto &VUse : Suspend->value_operands()) + ReturnPHIs[NextPHIIndex++]->addIncoming(&*VUse, SuspendBB); + assert(NextPHIIndex == ReturnPHIs.size()); + } + + assert(Clones.size() == Shape.CoroSuspends.size()); + for (size_t i = 0, e = Shape.CoroSuspends.size(); i != e; ++i) { + auto Suspend = Shape.CoroSuspends[i]; + auto Clone = Clones[i]; + + CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend).create(); + } +} + +namespace { + class PrettyStackTraceFunction : public PrettyStackTraceEntry { + Function &F; + public: + PrettyStackTraceFunction(Function &F) : F(F) {} + void print(raw_ostream &OS) const override { + OS << "While splitting coroutine "; + F.printAsOperand(OS, /*print type*/ false, F.getParent()); + OS << "\n"; + } + }; +} + +static void splitCoroutine(Function &F, coro::Shape &Shape, + SmallVectorImpl &Clones) { + switch (Shape.ABI) { + case coro::ABI::Switch: + return splitSwitchCoroutine(F, Shape, Clones); + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + return splitRetconCoroutine(F, Shape, Clones); + } + llvm_unreachable("bad ABI kind"); +} + +static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) { + PrettyStackTraceFunction prettyStackTrace(F); + + // The suspend-crossing algorithm in buildCoroutineFrame get tripped + // up by uses in unreachable blocks, so remove them as a first pass. + removeUnreachableBlocks(F); + + coro::Shape Shape(F); + if (!Shape.CoroBegin) + return; + + simplifySuspendPoints(Shape); + buildCoroutineFrame(F, Shape); + replaceFrameSize(Shape); + + SmallVector Clones; + + // If there are no suspend points, no split required, just remove + // the allocation and deallocation blocks, they are not needed. + if (Shape.CoroSuspends.empty()) { + handleNoSuspendCoroutine(Shape); + } else { + splitCoroutine(F, Shape, Clones); + } + + // Replace all the swifterror operations in the original function. + // This invalidates SwiftErrorOps in the Shape. + replaceSwiftErrorOps(F, Shape, nullptr); + + removeCoroEnds(Shape, &CG); + postSplitCleanup(F); // Update call graph and add the functions we created to the SCC. - coro::updateCallGraph(F, {ResumeClone, DestroyClone, CleanupClone}, CG, SCC); + coro::updateCallGraph(F, Clones, CG, SCC); } // When we see the coroutine the first time, we insert an indirect call to a @@ -881,6 +1432,80 @@ static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) { SCC.initialize(Nodes); } +/// Replace a call to llvm.coro.prepare.retcon. +static void replacePrepare(CallInst *Prepare, CallGraph &CG) { + auto CastFn = Prepare->getArgOperand(0); // as an i8* + auto Fn = CastFn->stripPointerCasts(); // as its original type + + // Find call graph nodes for the preparation. + CallGraphNode *PrepareUserNode = nullptr, *FnNode = nullptr; + if (auto ConcreteFn = dyn_cast(Fn)) { + PrepareUserNode = CG[Prepare->getFunction()]; + FnNode = CG[ConcreteFn]; + } + + // Attempt to peephole this pattern: + // %0 = bitcast [[TYPE]] @some_function to i8* + // %1 = call @llvm.coro.prepare.retcon(i8* %0) + // %2 = bitcast %1 to [[TYPE]] + // ==> + // %2 = @some_function + for (auto UI = Prepare->use_begin(), UE = Prepare->use_end(); + UI != UE; ) { + // Look for bitcasts back to the original function type. + auto *Cast = dyn_cast((UI++)->getUser()); + if (!Cast || Cast->getType() != Fn->getType()) continue; + + // Check whether the replacement will introduce new direct calls. + // If so, we'll need to update the call graph. + if (PrepareUserNode) { + for (auto &Use : Cast->uses()) { + if (auto *CB = dyn_cast(Use.getUser())) { + if (!CB->isCallee(&Use)) + continue; + PrepareUserNode->removeCallEdgeFor(*CB); + PrepareUserNode->addCalledFunction(CB, FnNode); + } + } + } + + // Replace and remove the cast. + Cast->replaceAllUsesWith(Fn); + Cast->eraseFromParent(); + } + + // Replace any remaining uses with the function as an i8*. + // This can never directly be a callee, so we don't need to update CG. + Prepare->replaceAllUsesWith(CastFn); + Prepare->eraseFromParent(); + + // Kill dead bitcasts. + while (auto *Cast = dyn_cast(CastFn)) { + if (!Cast->use_empty()) break; + CastFn = Cast->getOperand(0); + Cast->eraseFromParent(); + } +} + +/// Remove calls to llvm.coro.prepare.retcon, a barrier meant to prevent +/// IPO from operating on calls to a retcon coroutine before it's been +/// split. This is only safe to do after we've split all retcon +/// coroutines in the module. We can do that this in this pass because +/// this pass does promise to split all retcon coroutines (as opposed to +/// switch coroutines, which are lowered in multiple stages). +static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) { + bool Changed = false; + for (auto PI = PrepareFn->use_begin(), PE = PrepareFn->use_end(); + PI != PE; ) { + // Intrinsics can only be used in calls. + auto *Prepare = cast((PI++)->getUser()); + replacePrepare(Prepare, CG); + Changed = true; + } + + return Changed; +} + //===----------------------------------------------------------------------===// // Top Level Driver //===----------------------------------------------------------------------===// @@ -899,7 +1524,9 @@ struct CoroSplit : public CallGraphSCCPass { // A coroutine is identified by the presence of coro.begin intrinsic, if // we don't have any, this pass has nothing to do. bool doInitialization(CallGraph &CG) override { - Run = coro::declaresIntrinsics(CG.getModule(), {"llvm.coro.begin"}); + Run = coro::declaresIntrinsics(CG.getModule(), + {"llvm.coro.begin", + "llvm.coro.prepare.retcon"}); return CallGraphSCCPass::doInitialization(CG); } @@ -907,6 +1534,12 @@ struct CoroSplit : public CallGraphSCCPass { if (!Run) return false; + // Check for uses of llvm.coro.prepare.retcon. + auto PrepareFn = + SCC.getCallGraph().getModule().getFunction("llvm.coro.prepare.retcon"); + if (PrepareFn && PrepareFn->use_empty()) + PrepareFn = nullptr; + // Find coroutines for processing. SmallVector Coroutines; for (CallGraphNode *CGN : SCC) @@ -914,12 +1547,17 @@ struct CoroSplit : public CallGraphSCCPass { if (F->hasFnAttribute(CORO_PRESPLIT_ATTR)) Coroutines.push_back(F); - if (Coroutines.empty()) + if (Coroutines.empty() && !PrepareFn) return false; CallGraph &CG = getAnalysis().getCallGraph(); + + if (Coroutines.empty()) + return replaceAllPrepares(PrepareFn, CG); + createDevirtTriggerFunc(CG, SCC); + // Split all the coroutines. for (Function *F : Coroutines) { Attribute Attr = F->getFnAttribute(CORO_PRESPLIT_ATTR); StringRef Value = Attr.getValueAsString(); @@ -932,6 +1570,10 @@ struct CoroSplit : public CallGraphSCCPass { F->removeFnAttr(CORO_PRESPLIT_ATTR); splitCoroutine(*F, CG, SCC); } + + if (PrepareFn) + replaceAllPrepares(PrepareFn, CG); + return true; } diff --git a/lib/Transforms/Coroutines/Coroutines.cpp b/lib/Transforms/Coroutines/Coroutines.cpp index a581d1d2116..f39483b2751 100644 --- a/lib/Transforms/Coroutines/Coroutines.cpp +++ b/lib/Transforms/Coroutines/Coroutines.cpp @@ -123,12 +123,26 @@ Value *coro::LowererBase::makeSubFnCall(Value *Arg, int Index, static bool isCoroutineIntrinsicName(StringRef Name) { // NOTE: Must be sorted! static const char *const CoroIntrinsics[] = { - "llvm.coro.alloc", "llvm.coro.begin", "llvm.coro.destroy", - "llvm.coro.done", "llvm.coro.end", "llvm.coro.frame", - "llvm.coro.free", "llvm.coro.id", "llvm.coro.noop", - "llvm.coro.param", "llvm.coro.promise", "llvm.coro.resume", - "llvm.coro.save", "llvm.coro.size", "llvm.coro.subfn.addr", + "llvm.coro.alloc", + "llvm.coro.begin", + "llvm.coro.destroy", + "llvm.coro.done", + "llvm.coro.end", + "llvm.coro.frame", + "llvm.coro.free", + "llvm.coro.id", + "llvm.coro.id.retcon", + "llvm.coro.id.retcon.once", + "llvm.coro.noop", + "llvm.coro.param", + "llvm.coro.prepare.retcon", + "llvm.coro.promise", + "llvm.coro.resume", + "llvm.coro.save", + "llvm.coro.size", + "llvm.coro.subfn.addr", "llvm.coro.suspend", + "llvm.coro.suspend.retcon", }; return Intrinsic::lookupLLVMIntrinsicByName(CoroIntrinsics, Name) != -1; } @@ -217,9 +231,6 @@ static void clear(coro::Shape &Shape) { Shape.FrameTy = nullptr; Shape.FramePtr = nullptr; Shape.AllocaSpillBlock = nullptr; - Shape.ResumeSwitch = nullptr; - Shape.PromiseAlloca = nullptr; - Shape.HasFinalSuspend = false; } static CoroSaveInst *createCoroSave(CoroBeginInst *CoroBegin, @@ -235,6 +246,7 @@ static CoroSaveInst *createCoroSave(CoroBeginInst *CoroBegin, // Collect "interesting" coroutine intrinsics. void coro::Shape::buildFrom(Function &F) { + bool HasFinalSuspend = false; size_t FinalSuspendIndex = 0; clear(*this); SmallVector CoroFrames; @@ -257,9 +269,15 @@ void coro::Shape::buildFrom(Function &F) { if (II->use_empty()) UnusedCoroSaves.push_back(cast(II)); break; - case Intrinsic::coro_suspend: - CoroSuspends.push_back(cast(II)); - if (CoroSuspends.back()->isFinal()) { + case Intrinsic::coro_suspend_retcon: { + auto Suspend = cast(II); + CoroSuspends.push_back(Suspend); + break; + } + case Intrinsic::coro_suspend: { + auto Suspend = cast(II); + CoroSuspends.push_back(Suspend); + if (Suspend->isFinal()) { if (HasFinalSuspend) report_fatal_error( "Only one suspend point can be marked as final"); @@ -267,18 +285,23 @@ void coro::Shape::buildFrom(Function &F) { FinalSuspendIndex = CoroSuspends.size() - 1; } break; + } case Intrinsic::coro_begin: { auto CB = cast(II); - if (CB->getId()->getInfo().isPreSplit()) { - if (CoroBegin) - report_fatal_error( + + // Ignore coro id's that aren't pre-split. + auto Id = dyn_cast(CB->getId()); + if (Id && !Id->getInfo().isPreSplit()) + break; + + if (CoroBegin) + report_fatal_error( "coroutine should have exactly one defining @llvm.coro.begin"); - CB->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); - CB->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); - CB->removeAttribute(AttributeList::FunctionIndex, - Attribute::NoDuplicate); - CoroBegin = CB; - } + CB->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); + CB->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); + CB->removeAttribute(AttributeList::FunctionIndex, + Attribute::NoDuplicate); + CoroBegin = CB; break; } case Intrinsic::coro_end: @@ -310,7 +333,7 @@ void coro::Shape::buildFrom(Function &F) { // Replace all coro.suspend with undef and remove related coro.saves if // present. - for (CoroSuspendInst *CS : CoroSuspends) { + for (AnyCoroSuspendInst *CS : CoroSuspends) { CS->replaceAllUsesWith(UndefValue::get(CS->getType())); CS->eraseFromParent(); if (auto *CoroSave = CS->getCoroSave()) @@ -324,19 +347,136 @@ void coro::Shape::buildFrom(Function &F) { return; } + auto Id = CoroBegin->getId(); + switch (auto IdIntrinsic = Id->getIntrinsicID()) { + case Intrinsic::coro_id: { + auto SwitchId = cast(Id); + this->ABI = coro::ABI::Switch; + this->SwitchLowering.HasFinalSuspend = HasFinalSuspend; + this->SwitchLowering.ResumeSwitch = nullptr; + this->SwitchLowering.PromiseAlloca = SwitchId->getPromise(); + this->SwitchLowering.ResumeEntryBlock = nullptr; + + for (auto AnySuspend : CoroSuspends) { + auto Suspend = dyn_cast(AnySuspend); + if (!Suspend) { +#ifndef NDEBUG + AnySuspend->dump(); +#endif + report_fatal_error("coro.id must be paired with coro.suspend"); + } + + if (!Suspend->getCoroSave()) + createCoroSave(CoroBegin, Suspend); + } + break; + } + + case Intrinsic::coro_id_retcon: + case Intrinsic::coro_id_retcon_once: { + auto ContinuationId = cast(Id); + ContinuationId->checkWellFormed(); + this->ABI = (IdIntrinsic == Intrinsic::coro_id_retcon + ? coro::ABI::Retcon + : coro::ABI::RetconOnce); + auto Prototype = ContinuationId->getPrototype(); + this->RetconLowering.ResumePrototype = Prototype; + this->RetconLowering.Alloc = ContinuationId->getAllocFunction(); + this->RetconLowering.Dealloc = ContinuationId->getDeallocFunction(); + this->RetconLowering.ReturnBlock = nullptr; + this->RetconLowering.IsFrameInlineInStorage = false; + + // Determine the result value types, and make sure they match up with + // the values passed to the suspends. + auto ResultTys = getRetconResultTypes(); + auto ResumeTys = getRetconResumeTypes(); + + for (auto AnySuspend : CoroSuspends) { + auto Suspend = dyn_cast(AnySuspend); + if (!Suspend) { +#ifndef NDEBUG + AnySuspend->dump(); +#endif + report_fatal_error("coro.id.retcon.* must be paired with " + "coro.suspend.retcon"); + } + + // Check that the argument types of the suspend match the results. + auto SI = Suspend->value_begin(), SE = Suspend->value_end(); + auto RI = ResultTys.begin(), RE = ResultTys.end(); + for (; SI != SE && RI != RE; ++SI, ++RI) { + auto SrcTy = (*SI)->getType(); + if (SrcTy != *RI) { + // The optimizer likes to eliminate bitcasts leading into variadic + // calls, but that messes with our invariants. Re-insert the + // bitcast and ignore this type mismatch. + if (CastInst::isBitCastable(SrcTy, *RI)) { + auto BCI = new BitCastInst(*SI, *RI, "", Suspend); + SI->set(BCI); + continue; + } + +#ifndef NDEBUG + Suspend->dump(); + Prototype->getFunctionType()->dump(); +#endif + report_fatal_error("argument to coro.suspend.retcon does not " + "match corresponding prototype function result"); + } + } + if (SI != SE || RI != RE) { +#ifndef NDEBUG + Suspend->dump(); + Prototype->getFunctionType()->dump(); +#endif + report_fatal_error("wrong number of arguments to coro.suspend.retcon"); + } + + // Check that the result type of the suspend matches the resume types. + Type *SResultTy = Suspend->getType(); + ArrayRef SuspendResultTys; + if (SResultTy->isVoidTy()) { + // leave as empty array + } else if (auto SResultStructTy = dyn_cast(SResultTy)) { + SuspendResultTys = SResultStructTy->elements(); + } else { + // forms an ArrayRef using SResultTy, be careful + SuspendResultTys = SResultTy; + } + if (SuspendResultTys.size() != ResumeTys.size()) { +#ifndef NDEBUG + Suspend->dump(); + Prototype->getFunctionType()->dump(); +#endif + report_fatal_error("wrong number of results from coro.suspend.retcon"); + } + for (size_t I = 0, E = ResumeTys.size(); I != E; ++I) { + if (SuspendResultTys[I] != ResumeTys[I]) { +#ifndef NDEBUG + Suspend->dump(); + Prototype->getFunctionType()->dump(); +#endif + report_fatal_error("result from coro.suspend.retcon does not " + "match corresponding prototype function param"); + } + } + } + break; + } + + default: + llvm_unreachable("coro.begin is not dependent on a coro.id call"); + } + // The coro.free intrinsic is always lowered to the result of coro.begin. for (CoroFrameInst *CF : CoroFrames) { CF->replaceAllUsesWith(CoroBegin); CF->eraseFromParent(); } - // Canonicalize coro.suspend by inserting a coro.save if needed. - for (CoroSuspendInst *CS : CoroSuspends) - if (!CS->getCoroSave()) - createCoroSave(CoroBegin, CS); - // Move final suspend to be the last element in the CoroSuspends vector. - if (HasFinalSuspend && + if (ABI == coro::ABI::Switch && + SwitchLowering.HasFinalSuspend && FinalSuspendIndex != CoroSuspends.size() - 1) std::swap(CoroSuspends[FinalSuspendIndex], CoroSuspends.back()); @@ -345,6 +485,154 @@ void coro::Shape::buildFrom(Function &F) { CoroSave->eraseFromParent(); } +static void propagateCallAttrsFromCallee(CallInst *Call, Function *Callee) { + Call->setCallingConv(Callee->getCallingConv()); + // TODO: attributes? +} + +static void addCallToCallGraph(CallGraph *CG, CallInst *Call, Function *Callee){ + if (CG) + (*CG)[Call->getFunction()]->addCalledFunction(Call, (*CG)[Callee]); +} + +Value *coro::Shape::emitAlloc(IRBuilder<> &Builder, Value *Size, + CallGraph *CG) const { + switch (ABI) { + case coro::ABI::Switch: + llvm_unreachable("can't allocate memory in coro switch-lowering"); + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: { + auto Alloc = RetconLowering.Alloc; + Size = Builder.CreateIntCast(Size, + Alloc->getFunctionType()->getParamType(0), + /*is signed*/ false); + auto *Call = Builder.CreateCall(Alloc, Size); + propagateCallAttrsFromCallee(Call, Alloc); + addCallToCallGraph(CG, Call, Alloc); + return Call; + } + } + llvm_unreachable("Unknown coro::ABI enum"); +} + +void coro::Shape::emitDealloc(IRBuilder<> &Builder, Value *Ptr, + CallGraph *CG) const { + switch (ABI) { + case coro::ABI::Switch: + llvm_unreachable("can't allocate memory in coro switch-lowering"); + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: { + auto Dealloc = RetconLowering.Dealloc; + Ptr = Builder.CreateBitCast(Ptr, + Dealloc->getFunctionType()->getParamType(0)); + auto *Call = Builder.CreateCall(Dealloc, Ptr); + propagateCallAttrsFromCallee(Call, Dealloc); + addCallToCallGraph(CG, Call, Dealloc); + return; + } + } + llvm_unreachable("Unknown coro::ABI enum"); +} + +LLVM_ATTRIBUTE_NORETURN +static void fail(const Instruction *I, const char *Reason, Value *V) { +#ifndef NDEBUG + I->dump(); + if (V) { + errs() << " Value: "; + V->printAsOperand(llvm::errs()); + errs() << '\n'; + } +#endif + report_fatal_error(Reason); +} + +/// Check that the given value is a well-formed prototype for the +/// llvm.coro.id.retcon.* intrinsics. +static void checkWFRetconPrototype(const AnyCoroIdRetconInst *I, Value *V) { + auto F = dyn_cast(V->stripPointerCasts()); + if (!F) + fail(I, "llvm.coro.id.retcon.* prototype not a Function", V); + + auto FT = F->getFunctionType(); + + if (isa(I)) { + bool ResultOkay; + if (FT->getReturnType()->isPointerTy()) { + ResultOkay = true; + } else if (auto SRetTy = dyn_cast(FT->getReturnType())) { + ResultOkay = (!SRetTy->isOpaque() && + SRetTy->getNumElements() > 0 && + SRetTy->getElementType(0)->isPointerTy()); + } else { + ResultOkay = false; + } + if (!ResultOkay) + fail(I, "llvm.coro.id.retcon prototype must return pointer as first " + "result", F); + + if (FT->getReturnType() != + I->getFunction()->getFunctionType()->getReturnType()) + fail(I, "llvm.coro.id.retcon prototype return type must be same as" + "current function return type", F); + } else { + // No meaningful validation to do here for llvm.coro.id.unique.once. + } + + if (FT->getNumParams() == 0 || !FT->getParamType(0)->isPointerTy()) + fail(I, "llvm.coro.id.retcon.* prototype must take pointer as " + "its first parameter", F); +} + +/// Check that the given value is a well-formed allocator. +static void checkWFAlloc(const Instruction *I, Value *V) { + auto F = dyn_cast(V->stripPointerCasts()); + if (!F) + fail(I, "llvm.coro.* allocator not a Function", V); + + auto FT = F->getFunctionType(); + if (!FT->getReturnType()->isPointerTy()) + fail(I, "llvm.coro.* allocator must return a pointer", F); + + if (FT->getNumParams() != 1 || + !FT->getParamType(0)->isIntegerTy()) + fail(I, "llvm.coro.* allocator must take integer as only param", F); +} + +/// Check that the given value is a well-formed deallocator. +static void checkWFDealloc(const Instruction *I, Value *V) { + auto F = dyn_cast(V->stripPointerCasts()); + if (!F) + fail(I, "llvm.coro.* deallocator not a Function", V); + + auto FT = F->getFunctionType(); + if (!FT->getReturnType()->isVoidTy()) + fail(I, "llvm.coro.* deallocator must return void", F); + + if (FT->getNumParams() != 1 || + !FT->getParamType(0)->isPointerTy()) + fail(I, "llvm.coro.* deallocator must take pointer as only param", F); +} + +static void checkConstantInt(const Instruction *I, Value *V, + const char *Reason) { + if (!isa(V)) { + fail(I, Reason, V); + } +} + +void AnyCoroIdRetconInst::checkWellFormed() const { + checkConstantInt(this, getArgOperand(SizeArg), + "size argument to coro.id.retcon.* must be constant"); + checkConstantInt(this, getArgOperand(AlignArg), + "alignment argument to coro.id.retcon.* must be constant"); + checkWFRetconPrototype(this, getArgOperand(PrototypeArg)); + checkWFAlloc(this, getArgOperand(AllocArg)); + checkWFDealloc(this, getArgOperand(DeallocArg)); +} + void LLVMAddCoroEarlyPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createCoroEarlyPass()); } diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 95a9f31cced..dd9f74a881e 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -304,7 +304,7 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, // of the previous load. LoadInst *newLoad = IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val"); - newLoad->setAlignment(OrigLoad->getAlignment()); + newLoad->setAlignment(MaybeAlign(OrigLoad->getAlignment())); // Transfer the AA info too. AAMDNodes AAInfo; OrigLoad->getAAMetadata(AAInfo); diff --git a/lib/Transforms/IPO/Attributor.cpp b/lib/Transforms/IPO/Attributor.cpp index 2a52c6b9b4a..95f47345d8f 100644 --- a/lib/Transforms/IPO/Attributor.cpp +++ b/lib/Transforms/IPO/Attributor.cpp @@ -16,11 +16,15 @@ #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -30,6 +34,9 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" + #include using namespace llvm; @@ -46,19 +53,50 @@ STATISTIC(NumAttributesValidFixpoint, "Number of abstract attributes in a valid fixpoint state"); STATISTIC(NumAttributesManifested, "Number of abstract attributes manifested in IR"); -STATISTIC(NumFnNoUnwind, "Number of functions marked nounwind"); -STATISTIC(NumFnUniqueReturned, "Number of function with unique return"); -STATISTIC(NumFnKnownReturns, "Number of function with known return values"); -STATISTIC(NumFnArgumentReturned, - "Number of function arguments marked returned"); -STATISTIC(NumFnNoSync, "Number of functions marked nosync"); -STATISTIC(NumFnNoFree, "Number of functions marked nofree"); -STATISTIC(NumFnReturnedNonNull, - "Number of function return values marked nonnull"); -STATISTIC(NumFnArgumentNonNull, "Number of function arguments marked nonnull"); -STATISTIC(NumCSArgumentNonNull, "Number of call site arguments marked nonnull"); -STATISTIC(NumFnWillReturn, "Number of functions marked willreturn"); +// Some helper macros to deal with statistics tracking. +// +// Usage: +// For simple IR attribute tracking overload trackStatistics in the abstract +// attribute and choose the right STATS_DECLTRACK_********* macro, +// e.g.,: +// void trackStatistics() const override { +// STATS_DECLTRACK_ARG_ATTR(returned) +// } +// If there is a single "increment" side one can use the macro +// STATS_DECLTRACK with a custom message. If there are multiple increment +// sides, STATS_DECL and STATS_TRACK can also be used separatly. +// +#define BUILD_STAT_MSG_IR_ATTR(TYPE, NAME) \ + ("Number of " #TYPE " marked '" #NAME "'") +#define BUILD_STAT_NAME(NAME, TYPE) NumIR##TYPE##_##NAME +#define STATS_DECL_(NAME, MSG) STATISTIC(NAME, MSG); +#define STATS_DECL(NAME, TYPE, MSG) \ + STATS_DECL_(BUILD_STAT_NAME(NAME, TYPE), MSG); +#define STATS_TRACK(NAME, TYPE) ++(BUILD_STAT_NAME(NAME, TYPE)); +#define STATS_DECLTRACK(NAME, TYPE, MSG) \ + { \ + STATS_DECL(NAME, TYPE, MSG) \ + STATS_TRACK(NAME, TYPE) \ + } +#define STATS_DECLTRACK_ARG_ATTR(NAME) \ + STATS_DECLTRACK(NAME, Arguments, BUILD_STAT_MSG_IR_ATTR(arguments, NAME)) +#define STATS_DECLTRACK_CSARG_ATTR(NAME) \ + STATS_DECLTRACK(NAME, CSArguments, \ + BUILD_STAT_MSG_IR_ATTR(call site arguments, NAME)) +#define STATS_DECLTRACK_FN_ATTR(NAME) \ + STATS_DECLTRACK(NAME, Function, BUILD_STAT_MSG_IR_ATTR(functions, NAME)) +#define STATS_DECLTRACK_CS_ATTR(NAME) \ + STATS_DECLTRACK(NAME, CS, BUILD_STAT_MSG_IR_ATTR(call site, NAME)) +#define STATS_DECLTRACK_FNRET_ATTR(NAME) \ + STATS_DECLTRACK(NAME, FunctionReturn, \ + BUILD_STAT_MSG_IR_ATTR(function returns, NAME)) +#define STATS_DECLTRACK_CSRET_ATTR(NAME) \ + STATS_DECLTRACK(NAME, CSReturn, \ + BUILD_STAT_MSG_IR_ATTR(call site returns, NAME)) +#define STATS_DECLTRACK_FLOATING_ATTR(NAME) \ + STATS_DECLTRACK(NAME, Floating, \ + ("Number of floating values known to be '" #NAME "'")) // TODO: Determine a good default value. // @@ -72,18 +110,32 @@ static cl::opt MaxFixpointIterations("attributor-max-iterations", cl::Hidden, cl::desc("Maximal number of fixpoint iterations."), cl::init(32)); +static cl::opt VerifyMaxFixpointIterations( + "attributor-max-iterations-verify", cl::Hidden, + cl::desc("Verify that max-iterations is a tight bound for a fixpoint"), + cl::init(false)); static cl::opt DisableAttributor( "attributor-disable", cl::Hidden, cl::desc("Disable the attributor inter-procedural deduction pass."), cl::init(true)); -static cl::opt VerifyAttributor( - "attributor-verify", cl::Hidden, - cl::desc("Verify the Attributor deduction and " - "manifestation of attributes -- may issue false-positive errors"), +static cl::opt ManifestInternal( + "attributor-manifest-internal", cl::Hidden, + cl::desc("Manifest Attributor internal string attributes."), cl::init(false)); +static cl::opt DepRecInterval( + "attributor-dependence-recompute-interval", cl::Hidden, + cl::desc("Number of iterations until dependences are recomputed."), + cl::init(4)); + +static cl::opt EnableHeapToStack("enable-heap-to-stack-conversion", + cl::init(true), cl::Hidden); + +static cl::opt MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128), + cl::Hidden); + /// Logic operators for the change status enum class. /// ///{ @@ -95,78 +147,30 @@ ChangeStatus llvm::operator&(ChangeStatus l, ChangeStatus r) { } ///} -/// Helper to adjust the statistics. -static void bookkeeping(AbstractAttribute::ManifestPosition MP, - const Attribute &Attr) { - if (!AreStatisticsEnabled()) - return; - - if (!Attr.isEnumAttribute()) - return; - switch (Attr.getKindAsEnum()) { - case Attribute::NoUnwind: - NumFnNoUnwind++; - return; - case Attribute::Returned: - NumFnArgumentReturned++; - return; - case Attribute::NoSync: - NumFnNoSync++; - break; - case Attribute::NoFree: - NumFnNoFree++; - break; - case Attribute::NonNull: - switch (MP) { - case AbstractAttribute::MP_RETURNED: - NumFnReturnedNonNull++; - break; - case AbstractAttribute::MP_ARGUMENT: - NumFnArgumentNonNull++; - break; - case AbstractAttribute::MP_CALL_SITE_ARGUMENT: - NumCSArgumentNonNull++; - break; - default: - break; - } - break; - case Attribute::WillReturn: - NumFnWillReturn++; - break; - default: - return; - } -} - -template -using followValueCB_t = std::function; -template -using visitValueCB_t = std::function; - -/// Recursively visit all values that might become \p InitV at some point. This +/// Recursively visit all values that might become \p IRP at some point. This /// will be done by looking through cast instructions, selects, phis, and calls -/// with the "returned" attribute. The callback \p FollowValueCB is asked before -/// a potential origin value is looked at. If no \p FollowValueCB is passed, a -/// default one is used that will make sure we visit every value only once. Once -/// we cannot look through the value any further, the callback \p VisitValueCB -/// is invoked and passed the current value and the \p State. To limit how much -/// effort is invested, we will never visit more than \p MaxValues values. -template +/// with the "returned" attribute. Once we cannot look through the value any +/// further, the callback \p VisitValueCB is invoked and passed the current +/// value, the \p State, and a flag to indicate if we stripped anything. To +/// limit how much effort is invested, we will never visit more values than +/// specified by \p MaxValues. +template static bool genericValueTraversal( - Value *InitV, StateTy &State, visitValueCB_t &VisitValueCB, - followValueCB_t *FollowValueCB = nullptr, int MaxValues = 8) { + Attributor &A, IRPosition IRP, const AAType &QueryingAA, StateTy &State, + const function_ref &VisitValueCB, + int MaxValues = 8) { + const AAIsDead *LivenessAA = nullptr; + if (IRP.getAnchorScope()) + LivenessAA = &A.getAAFor( + QueryingAA, IRPosition::function(*IRP.getAnchorScope()), + /* TrackDependence */ false); + bool AnyDead = false; + + // TODO: Use Positions here to allow context sensitivity in VisitValueCB SmallPtrSet Visited; - followValueCB_t DefaultFollowValueCB = [&](Value *Val, bool &) { - return Visited.insert(Val).second; - }; - - if (!FollowValueCB) - FollowValueCB = &DefaultFollowValueCB; - SmallVector Worklist; - Worklist.push_back(InitV); + Worklist.push_back(&IRP.getAssociatedValue()); int Iteration = 0; do { @@ -174,7 +178,7 @@ static bool genericValueTraversal( // Check if we should process the current value. To prevent endless // recursion keep a record of the values we followed! - if (!(*FollowValueCB)(V, State)) + if (!Visited.insert(V).second) continue; // Make sure we limit the compile time for complex expressions. @@ -183,23 +187,23 @@ static bool genericValueTraversal( // Explicitly look through calls with a "returned" attribute if we do // not have a pointer as stripPointerCasts only works on them. + Value *NewV = nullptr; if (V->getType()->isPointerTy()) { - V = V->stripPointerCasts(); + NewV = V->stripPointerCasts(); } else { CallSite CS(V); if (CS && CS.getCalledFunction()) { - Value *NewV = nullptr; for (Argument &Arg : CS.getCalledFunction()->args()) if (Arg.hasReturnedAttr()) { NewV = CS.getArgOperand(Arg.getArgNo()); break; } - if (NewV) { - Worklist.push_back(NewV); - continue; - } } } + if (NewV && NewV != V) { + Worklist.push_back(NewV); + continue; + } // Look through select instructions, visit both potential values. if (auto *SI = dyn_cast(V)) { @@ -208,35 +212,34 @@ static bool genericValueTraversal( continue; } - // Look through phi nodes, visit all operands. + // Look through phi nodes, visit all live operands. if (auto *PHI = dyn_cast(V)) { - Worklist.append(PHI->op_begin(), PHI->op_end()); + assert(LivenessAA && + "Expected liveness in the presence of instructions!"); + for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) { + const BasicBlock *IncomingBB = PHI->getIncomingBlock(u); + if (LivenessAA->isAssumedDead(IncomingBB->getTerminator())) { + AnyDead = true; + continue; + } + Worklist.push_back(PHI->getIncomingValue(u)); + } continue; } // Once a leaf is reached we inform the user through the callback. - VisitValueCB(V, State); + if (!VisitValueCB(*V, State, Iteration > 1)) + return false; } while (!Worklist.empty()); + // If we actually used liveness information so we have to record a dependence. + if (AnyDead) + A.recordDependence(*LivenessAA, QueryingAA); + // All values have been visited. return true; } -/// Helper to identify the correct offset into an attribute list. -static unsigned getAttrIndex(AbstractAttribute::ManifestPosition MP, - unsigned ArgNo = 0) { - switch (MP) { - case AbstractAttribute::MP_ARGUMENT: - case AbstractAttribute::MP_CALL_SITE_ARGUMENT: - return ArgNo + AttributeList::FirstArgIndex; - case AbstractAttribute::MP_FUNCTION: - return AttributeList::FunctionIndex; - case AbstractAttribute::MP_RETURNED: - return AttributeList::ReturnIndex; - } - llvm_unreachable("Unknown manifest position!"); -} - /// Return true if \p New is equal or worse than \p Old. static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) { if (!Old.isIntAttribute()) @@ -247,12 +250,9 @@ static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) { /// Return true if the information provided by \p Attr was added to the /// attribute list \p Attrs. This is only the case if it was not already present -/// in \p Attrs at the position describe by \p MP and \p ArgNo. +/// in \p Attrs at the position describe by \p PK and \p AttrIdx. static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr, - AttributeList &Attrs, - AbstractAttribute::ManifestPosition MP, - unsigned ArgNo = 0) { - unsigned AttrIdx = getAttrIndex(MP, ArgNo); + AttributeList &Attrs, int AttrIdx) { if (Attr.isEnumAttribute()) { Attribute::AttrKind Kind = Attr.getKindAsEnum(); @@ -270,9 +270,47 @@ static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr, Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr); return true; } + if (Attr.isIntAttribute()) { + Attribute::AttrKind Kind = Attr.getKindAsEnum(); + if (Attrs.hasAttribute(AttrIdx, Kind)) + if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind))) + return false; + Attrs = Attrs.removeAttribute(Ctx, AttrIdx, Kind); + Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr); + return true; + } llvm_unreachable("Expected enum or string attribute!"); } +static const Value *getPointerOperand(const Instruction *I) { + if (auto *LI = dyn_cast(I)) + if (!LI->isVolatile()) + return LI->getPointerOperand(); + + if (auto *SI = dyn_cast(I)) + if (!SI->isVolatile()) + return SI->getPointerOperand(); + + if (auto *CXI = dyn_cast(I)) + if (!CXI->isVolatile()) + return CXI->getPointerOperand(); + + if (auto *RMWI = dyn_cast(I)) + if (!RMWI->isVolatile()) + return RMWI->getPointerOperand(); + + return nullptr; +} +static const Value *getBasePointerOfAccessPointerOperand(const Instruction *I, + int64_t &BytesOffset, + const DataLayout &DL) { + const Value *Ptr = getPointerOperand(I); + if (!Ptr) + return nullptr; + + return GetPointerBaseWithConstantOffset(Ptr, BytesOffset, DL, + /*AllowNonInbounds*/ false); +} ChangeStatus AbstractAttribute::update(Attributor &A) { ChangeStatus HasChanged = ChangeStatus::UNCHANGED; @@ -289,143 +327,527 @@ ChangeStatus AbstractAttribute::update(Attributor &A) { return HasChanged; } -ChangeStatus AbstractAttribute::manifest(Attributor &A) { - assert(getState().isValidState() && - "Attempted to manifest an invalid state!"); - assert(getAssociatedValue() && - "Attempted to manifest an attribute without associated value!"); - - ChangeStatus HasChanged = ChangeStatus::UNCHANGED; - SmallVector DeducedAttrs; - getDeducedAttributes(DeducedAttrs); - - Function &ScopeFn = getAnchorScope(); - LLVMContext &Ctx = ScopeFn.getContext(); - ManifestPosition MP = getManifestPosition(); - - AttributeList Attrs; - SmallVector ArgNos; +ChangeStatus +IRAttributeManifest::manifestAttrs(Attributor &A, IRPosition &IRP, + const ArrayRef &DeducedAttrs) { + Function *ScopeFn = IRP.getAssociatedFunction(); + IRPosition::Kind PK = IRP.getPositionKind(); // In the following some generic code that will manifest attributes in // DeducedAttrs if they improve the current IR. Due to the different // annotation positions we use the underlying AttributeList interface. - // Note that MP_CALL_SITE_ARGUMENT can annotate multiple locations. - switch (MP) { - case MP_ARGUMENT: - ArgNos.push_back(cast(getAssociatedValue())->getArgNo()); - Attrs = ScopeFn.getAttributes(); + AttributeList Attrs; + switch (PK) { + case IRPosition::IRP_INVALID: + case IRPosition::IRP_FLOAT: + return ChangeStatus::UNCHANGED; + case IRPosition::IRP_ARGUMENT: + case IRPosition::IRP_FUNCTION: + case IRPosition::IRP_RETURNED: + Attrs = ScopeFn->getAttributes(); break; - case MP_FUNCTION: - case MP_RETURNED: - ArgNos.push_back(0); - Attrs = ScopeFn.getAttributes(); + case IRPosition::IRP_CALL_SITE: + case IRPosition::IRP_CALL_SITE_RETURNED: + case IRPosition::IRP_CALL_SITE_ARGUMENT: + Attrs = ImmutableCallSite(&IRP.getAnchorValue()).getAttributes(); break; - case MP_CALL_SITE_ARGUMENT: { - CallSite CS(&getAnchoredValue()); - for (unsigned u = 0, e = CS.getNumArgOperands(); u != e; u++) - if (CS.getArgOperand(u) == getAssociatedValue()) - ArgNos.push_back(u); - Attrs = CS.getAttributes(); - } } + ChangeStatus HasChanged = ChangeStatus::UNCHANGED; + LLVMContext &Ctx = IRP.getAnchorValue().getContext(); for (const Attribute &Attr : DeducedAttrs) { - for (unsigned ArgNo : ArgNos) { - if (!addIfNotExistent(Ctx, Attr, Attrs, MP, ArgNo)) - continue; + if (!addIfNotExistent(Ctx, Attr, Attrs, IRP.getAttrIdx())) + continue; - HasChanged = ChangeStatus::CHANGED; - bookkeeping(MP, Attr); - } + HasChanged = ChangeStatus::CHANGED; } if (HasChanged == ChangeStatus::UNCHANGED) return HasChanged; - switch (MP) { - case MP_ARGUMENT: - case MP_FUNCTION: - case MP_RETURNED: - ScopeFn.setAttributes(Attrs); + switch (PK) { + case IRPosition::IRP_ARGUMENT: + case IRPosition::IRP_FUNCTION: + case IRPosition::IRP_RETURNED: + ScopeFn->setAttributes(Attrs); + break; + case IRPosition::IRP_CALL_SITE: + case IRPosition::IRP_CALL_SITE_RETURNED: + case IRPosition::IRP_CALL_SITE_ARGUMENT: + CallSite(&IRP.getAnchorValue()).setAttributes(Attrs); + break; + case IRPosition::IRP_INVALID: + case IRPosition::IRP_FLOAT: break; - case MP_CALL_SITE_ARGUMENT: - CallSite(&getAnchoredValue()).setAttributes(Attrs); } return HasChanged; } -Function &AbstractAttribute::getAnchorScope() { - Value &V = getAnchoredValue(); - if (isa(V)) - return cast(V); - if (isa(V)) - return *cast(V).getParent(); - if (isa(V)) - return *cast(V).getFunction(); - llvm_unreachable("No scope for anchored value found!"); +const IRPosition IRPosition::EmptyKey(255); +const IRPosition IRPosition::TombstoneKey(256); + +SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { + IRPositions.emplace_back(IRP); + + ImmutableCallSite ICS(&IRP.getAnchorValue()); + switch (IRP.getPositionKind()) { + case IRPosition::IRP_INVALID: + case IRPosition::IRP_FLOAT: + case IRPosition::IRP_FUNCTION: + return; + case IRPosition::IRP_ARGUMENT: + case IRPosition::IRP_RETURNED: + IRPositions.emplace_back( + IRPosition::function(*IRP.getAssociatedFunction())); + return; + case IRPosition::IRP_CALL_SITE: + assert(ICS && "Expected call site!"); + // TODO: We need to look at the operand bundles similar to the redirection + // in CallBase. + if (!ICS.hasOperandBundles()) + if (const Function *Callee = ICS.getCalledFunction()) + IRPositions.emplace_back(IRPosition::function(*Callee)); + return; + case IRPosition::IRP_CALL_SITE_RETURNED: + assert(ICS && "Expected call site!"); + // TODO: We need to look at the operand bundles similar to the redirection + // in CallBase. + if (!ICS.hasOperandBundles()) { + if (const Function *Callee = ICS.getCalledFunction()) { + IRPositions.emplace_back(IRPosition::returned(*Callee)); + IRPositions.emplace_back(IRPosition::function(*Callee)); + } + } + IRPositions.emplace_back( + IRPosition::callsite_function(cast(*ICS.getInstruction()))); + return; + case IRPosition::IRP_CALL_SITE_ARGUMENT: { + int ArgNo = IRP.getArgNo(); + assert(ICS && ArgNo >= 0 && "Expected call site!"); + // TODO: We need to look at the operand bundles similar to the redirection + // in CallBase. + if (!ICS.hasOperandBundles()) { + const Function *Callee = ICS.getCalledFunction(); + if (Callee && Callee->arg_size() > unsigned(ArgNo)) + IRPositions.emplace_back(IRPosition::argument(*Callee->getArg(ArgNo))); + if (Callee) + IRPositions.emplace_back(IRPosition::function(*Callee)); + } + IRPositions.emplace_back(IRPosition::value(IRP.getAssociatedValue())); + return; + } + } } -const Function &AbstractAttribute::getAnchorScope() const { - return const_cast(this)->getAnchorScope(); +bool IRPosition::hasAttr(ArrayRef AKs, + bool IgnoreSubsumingPositions) const { + for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this)) { + for (Attribute::AttrKind AK : AKs) + if (EquivIRP.getAttr(AK).getKindAsEnum() == AK) + return true; + // The first position returned by the SubsumingPositionIterator is + // always the position itself. If we ignore subsuming positions we + // are done after the first iteration. + if (IgnoreSubsumingPositions) + break; + } + return false; } +void IRPosition::getAttrs(ArrayRef AKs, + SmallVectorImpl &Attrs) const { + for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this)) + for (Attribute::AttrKind AK : AKs) { + const Attribute &Attr = EquivIRP.getAttr(AK); + if (Attr.getKindAsEnum() == AK) + Attrs.push_back(Attr); + } +} + +void IRPosition::verify() { + switch (KindOrArgNo) { + default: + assert(KindOrArgNo >= 0 && "Expected argument or call site argument!"); + assert((isa(AnchorVal) || isa(AnchorVal)) && + "Expected call base or argument for positive attribute index!"); + if (isa(AnchorVal)) { + assert(cast(AnchorVal)->getArgNo() == unsigned(getArgNo()) && + "Argument number mismatch!"); + assert(cast(AnchorVal) == &getAssociatedValue() && + "Associated value mismatch!"); + } else { + assert(cast(*AnchorVal).arg_size() > unsigned(getArgNo()) && + "Call site argument number mismatch!"); + assert(cast(*AnchorVal).getArgOperand(getArgNo()) == + &getAssociatedValue() && + "Associated value mismatch!"); + } + break; + case IRP_INVALID: + assert(!AnchorVal && "Expected no value for an invalid position!"); + break; + case IRP_FLOAT: + assert((!isa(&getAssociatedValue()) && + !isa(&getAssociatedValue())) && + "Expected specialized kind for call base and argument values!"); + break; + case IRP_RETURNED: + assert(isa(AnchorVal) && + "Expected function for a 'returned' position!"); + assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); + break; + case IRP_CALL_SITE_RETURNED: + assert((isa(AnchorVal)) && + "Expected call base for 'call site returned' position!"); + assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); + break; + case IRP_CALL_SITE: + assert((isa(AnchorVal)) && + "Expected call base for 'call site function' position!"); + assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); + break; + case IRP_FUNCTION: + assert(isa(AnchorVal) && + "Expected function for a 'function' position!"); + assert(AnchorVal == &getAssociatedValue() && "Associated value mismatch!"); + break; + } +} + +namespace { +/// Helper functions to clamp a state \p S of type \p StateType with the +/// information in \p R and indicate/return if \p S did change (as-in update is +/// required to be run again). +/// +///{ +template +ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R); + +template <> +ChangeStatus clampStateAndIndicateChange(IntegerState &S, + const IntegerState &R) { + auto Assumed = S.getAssumed(); + S ^= R; + return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED + : ChangeStatus::CHANGED; +} + +template <> +ChangeStatus clampStateAndIndicateChange(BooleanState &S, + const BooleanState &R) { + return clampStateAndIndicateChange(S, R); +} +///} + +/// Clamp the information known for all returned values of a function +/// (identified by \p QueryingAA) into \p S. +template +static void clampReturnedValueStates(Attributor &A, const AAType &QueryingAA, + StateType &S) { + LLVM_DEBUG(dbgs() << "[Attributor] Clamp return value states for " + << static_cast(QueryingAA) + << " into " << S << "\n"); + + assert((QueryingAA.getIRPosition().getPositionKind() == + IRPosition::IRP_RETURNED || + QueryingAA.getIRPosition().getPositionKind() == + IRPosition::IRP_CALL_SITE_RETURNED) && + "Can only clamp returned value states for a function returned or call " + "site returned position!"); + + // Use an optional state as there might not be any return values and we want + // to join (IntegerState::operator&) the state of all there are. + Optional T; + + // Callback for each possibly returned value. + auto CheckReturnValue = [&](Value &RV) -> bool { + const IRPosition &RVPos = IRPosition::value(RV); + const AAType &AA = A.getAAFor(QueryingAA, RVPos); + LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV << " AA: " << AA.getAsStr() + << " @ " << RVPos << "\n"); + const StateType &AAS = static_cast(AA.getState()); + if (T.hasValue()) + *T &= AAS; + else + T = AAS; + LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " RV State: " << T + << "\n"); + return T->isValidState(); + }; + + if (!A.checkForAllReturnedValues(CheckReturnValue, QueryingAA)) + S.indicatePessimisticFixpoint(); + else if (T.hasValue()) + S ^= *T; +} + +/// Helper class to compose two generic deduction +template class F, template class G> +struct AAComposeTwoGenericDeduction + : public F, StateType> { + AAComposeTwoGenericDeduction(const IRPosition &IRP) + : F, StateType>(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + ChangeStatus ChangedF = F, StateType>::updateImpl(A); + ChangeStatus ChangedG = G::updateImpl(A); + return ChangedF | ChangedG; + } +}; + +/// Helper class for generic deduction: return value -> returned position. +template +struct AAReturnedFromReturnedValues : public Base { + AAReturnedFromReturnedValues(const IRPosition &IRP) : Base(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + StateType S; + clampReturnedValueStates(A, *this, S); + // TODO: If we know we visited all returned values, thus no are assumed + // dead, we can take the known information from the state T. + return clampStateAndIndicateChange(this->getState(), S); + } +}; + +/// Clamp the information known at all call sites for a given argument +/// (identified by \p QueryingAA) into \p S. +template +static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA, + StateType &S) { + LLVM_DEBUG(dbgs() << "[Attributor] Clamp call site argument states for " + << static_cast(QueryingAA) + << " into " << S << "\n"); + + assert(QueryingAA.getIRPosition().getPositionKind() == + IRPosition::IRP_ARGUMENT && + "Can only clamp call site argument states for an argument position!"); + + // Use an optional state as there might not be any return values and we want + // to join (IntegerState::operator&) the state of all there are. + Optional T; + + // The argument number which is also the call site argument number. + unsigned ArgNo = QueryingAA.getIRPosition().getArgNo(); + + auto CallSiteCheck = [&](AbstractCallSite ACS) { + const IRPosition &ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo); + // Check if a coresponding argument was found or if it is on not associated + // (which can happen for callback calls). + if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID) + return false; + + const AAType &AA = A.getAAFor(QueryingAA, ACSArgPos); + LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction() + << " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n"); + const StateType &AAS = static_cast(AA.getState()); + if (T.hasValue()) + *T &= AAS; + else + T = AAS; + LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " CSA State: " << T + << "\n"); + return T->isValidState(); + }; + + if (!A.checkForAllCallSites(CallSiteCheck, QueryingAA, true)) + S.indicatePessimisticFixpoint(); + else if (T.hasValue()) + S ^= *T; +} + +/// Helper class for generic deduction: call site argument -> argument position. +template +struct AAArgumentFromCallSiteArguments : public Base { + AAArgumentFromCallSiteArguments(const IRPosition &IRP) : Base(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + StateType S; + clampCallSiteArgumentStates(A, *this, S); + // TODO: If we know we visited all incoming values, thus no are assumed + // dead, we can take the known information from the state T. + return clampStateAndIndicateChange(this->getState(), S); + } +}; + +/// Helper class for generic replication: function returned -> cs returned. +template +struct AACallSiteReturnedFromReturned : public Base { + AACallSiteReturnedFromReturned(const IRPosition &IRP) : Base(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + assert(this->getIRPosition().getPositionKind() == + IRPosition::IRP_CALL_SITE_RETURNED && + "Can only wrap function returned positions for call site returned " + "positions!"); + auto &S = this->getState(); + + const Function *AssociatedFunction = + this->getIRPosition().getAssociatedFunction(); + if (!AssociatedFunction) + return S.indicatePessimisticFixpoint(); + + IRPosition FnPos = IRPosition::returned(*AssociatedFunction); + const AAType &AA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + S, static_cast(AA.getState())); + } +}; + +/// Helper class for generic deduction using must-be-executed-context +/// Base class is required to have `followUse` method. + +/// bool followUse(Attributor &A, const Use *U, const Instruction *I) +/// U - Underlying use. +/// I - The user of the \p U. +/// `followUse` returns true if the value should be tracked transitively. + +template +struct AAFromMustBeExecutedContext : public Base { + AAFromMustBeExecutedContext(const IRPosition &IRP) : Base(IRP) {} + + void initialize(Attributor &A) override { + Base::initialize(A); + IRPosition &IRP = this->getIRPosition(); + Instruction *CtxI = IRP.getCtxI(); + + if (!CtxI) + return; + + for (const Use &U : IRP.getAssociatedValue().uses()) + Uses.insert(&U); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto BeforeState = this->getState(); + auto &S = this->getState(); + Instruction *CtxI = this->getIRPosition().getCtxI(); + if (!CtxI) + return ChangeStatus::UNCHANGED; + + MustBeExecutedContextExplorer &Explorer = + A.getInfoCache().getMustBeExecutedContextExplorer(); + + SetVector NextUses; + + for (const Use *U : Uses) { + if (const Instruction *UserI = dyn_cast(U->getUser())) { + auto EIt = Explorer.begin(CtxI), EEnd = Explorer.end(CtxI); + bool Found = EIt.count(UserI); + while (!Found && ++EIt != EEnd) + Found = EIt.getCurrentInst() == UserI; + if (Found && Base::followUse(A, U, UserI)) + for (const Use &Us : UserI->uses()) + NextUses.insert(&Us); + } + } + for (const Use *U : NextUses) + Uses.insert(U); + + return BeforeState == S ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; + } + +private: + /// Container for (transitive) uses of the associated value. + SetVector Uses; +}; + +template +using AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext = + AAComposeTwoGenericDeduction; + +template +using AACallSiteReturnedFromReturnedAndMustBeExecutedContext = + AAComposeTwoGenericDeduction; + /// -----------------------NoUnwind Function Attribute-------------------------- -struct AANoUnwindFunction : AANoUnwind, BooleanState { - - AANoUnwindFunction(Function &F, InformationCache &InfoCache) - : AANoUnwind(F, InfoCache) {} - - /// See AbstractAttribute::getState() - /// { - AbstractState &getState() override { return *this; } - const AbstractState &getState() const override { return *this; } - /// } - - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { return MP_FUNCTION; } +struct AANoUnwindImpl : AANoUnwind { + AANoUnwindImpl(const IRPosition &IRP) : AANoUnwind(IRP) {} const std::string getAsStr() const override { return getAssumed() ? "nounwind" : "may-unwind"; } /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; + ChangeStatus updateImpl(Attributor &A) override { + auto Opcodes = { + (unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, + (unsigned)Instruction::Call, (unsigned)Instruction::CleanupRet, + (unsigned)Instruction::CatchSwitch, (unsigned)Instruction::Resume}; - /// See AANoUnwind::isAssumedNoUnwind(). - bool isAssumedNoUnwind() const override { return getAssumed(); } + auto CheckForNoUnwind = [&](Instruction &I) { + if (!I.mayThrow()) + return true; - /// See AANoUnwind::isKnownNoUnwind(). - bool isKnownNoUnwind() const override { return getKnown(); } + if (ImmutableCallSite ICS = ImmutableCallSite(&I)) { + const auto &NoUnwindAA = + A.getAAFor(*this, IRPosition::callsite_function(ICS)); + return NoUnwindAA.isAssumedNoUnwind(); + } + return false; + }; + + if (!A.checkForAllInstructions(CheckForNoUnwind, *this, Opcodes)) + return indicatePessimisticFixpoint(); + + return ChangeStatus::UNCHANGED; + } }; -ChangeStatus AANoUnwindFunction::updateImpl(Attributor &A) { - Function &F = getAnchorScope(); +struct AANoUnwindFunction final : public AANoUnwindImpl { + AANoUnwindFunction(const IRPosition &IRP) : AANoUnwindImpl(IRP) {} - // The map from instruction opcodes to those instructions in the function. - auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); - auto Opcodes = { - (unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, - (unsigned)Instruction::Call, (unsigned)Instruction::CleanupRet, - (unsigned)Instruction::CatchSwitch, (unsigned)Instruction::Resume}; + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nounwind) } +}; - for (unsigned Opcode : Opcodes) { - for (Instruction *I : OpcodeInstMap[Opcode]) { - if (!I->mayThrow()) - continue; +/// NoUnwind attribute deduction for a call sites. +struct AANoUnwindCallSite final : AANoUnwindImpl { + AANoUnwindCallSite(const IRPosition &IRP) : AANoUnwindImpl(IRP) {} - auto *NoUnwindAA = A.getAAFor(*this, *I); - - if (!NoUnwindAA || !NoUnwindAA->isAssumedNoUnwind()) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; - } - } + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoUnwindImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); } - return ChangeStatus::UNCHANGED; -} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + getState(), + static_cast(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nounwind); } +}; /// --------------------- Function Return Values ------------------------------- @@ -434,68 +856,48 @@ ChangeStatus AANoUnwindFunction::updateImpl(Attributor &A) { /// /// If there is a unique returned value R, the manifest method will: /// - mark R with the "returned" attribute, if R is an argument. -class AAReturnedValuesImpl final : public AAReturnedValues, AbstractState { +class AAReturnedValuesImpl : public AAReturnedValues, public AbstractState { /// Mapping of values potentially returned by the associated function to the /// return instructions that might return them. - DenseMap> ReturnedValues; + MapVector> ReturnedValues; + + /// Mapping to remember the number of returned values for a call site such + /// that we can avoid updates if nothing changed. + DenseMap NumReturnedValuesPerKnownAA; + + /// Set of unresolved calls returned by the associated function. + SmallSetVector UnresolvedCalls; /// State flags /// ///{ - bool IsFixed; - bool IsValidState; - bool HasOverdefinedReturnedCalls; + bool IsFixed = false; + bool IsValidState = true; ///} - /// Collect values that could become \p V in the set \p Values, each mapped to - /// \p ReturnInsts. - void collectValuesRecursively( - Attributor &A, Value *V, SmallPtrSetImpl &ReturnInsts, - DenseMap> &Values) { - - visitValueCB_t VisitValueCB = [&](Value *Val, bool &) { - assert(!isa(Val) || - &getAnchorScope() == cast(Val)->getFunction()); - Values[Val].insert(ReturnInsts.begin(), ReturnInsts.end()); - }; - - bool UnusedBool; - bool Success = genericValueTraversal(V, UnusedBool, VisitValueCB); - - // If we did abort the above traversal we haven't see all the values. - // Consequently, we cannot know if the information we would derive is - // accurate so we give up early. - if (!Success) - indicatePessimisticFixpoint(); - } - public: - /// See AbstractAttribute::AbstractAttribute(...). - AAReturnedValuesImpl(Function &F, InformationCache &InfoCache) - : AAReturnedValues(F, InfoCache) { - // We do not have an associated argument yet. - AssociatedVal = nullptr; - } + AAReturnedValuesImpl(const IRPosition &IRP) : AAReturnedValues(IRP) {} /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { // Reset the state. - AssociatedVal = nullptr; IsFixed = false; IsValidState = true; - HasOverdefinedReturnedCalls = false; ReturnedValues.clear(); - Function &F = cast(getAnchoredValue()); + Function *F = getAssociatedFunction(); + if (!F) { + indicatePessimisticFixpoint(); + return; + } // The map from instruction opcodes to those instructions in the function. - auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); + auto &OpcodeInstMap = A.getInfoCache().getOpcodeInstMapForFunction(*F); // Look through all arguments, if one is marked as returned we are done. - for (Argument &Arg : F.args()) { + for (Argument &Arg : F->args()) { if (Arg.hasReturnedAttr()) { - auto &ReturnInstSet = ReturnedValues[&Arg]; for (Instruction *RI : OpcodeInstMap[Instruction::Ret]) ReturnInstSet.insert(cast(RI)); @@ -505,13 +907,8 @@ public: } } - // If no argument was marked as returned we look at all return instructions - // and collect potentially returned values. - for (Instruction *RI : OpcodeInstMap[Instruction::Ret]) { - SmallPtrSet RISet({cast(RI)}); - collectValuesRecursively(A, cast(RI)->getReturnValue(), RISet, - ReturnedValues); - } + if (!F->hasExactDefinition()) + indicatePessimisticFixpoint(); } /// See AbstractAttribute::manifest(...). @@ -523,25 +920,35 @@ public: /// See AbstractAttribute::getState(...). const AbstractState &getState() const override { return *this; } - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; } - /// See AbstractAttribute::updateImpl(Attributor &A). ChangeStatus updateImpl(Attributor &A) override; + llvm::iterator_range returned_values() override { + return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end()); + } + + llvm::iterator_range returned_values() const override { + return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end()); + } + + const SmallSetVector &getUnresolvedCalls() const override { + return UnresolvedCalls; + } + /// Return the number of potential return values, -1 if unknown. - size_t getNumReturnValues() const { + size_t getNumReturnValues() const override { return isValidState() ? ReturnedValues.size() : -1; } /// Return an assumed unique return value if a single candidate is found. If /// there cannot be one, return a nullptr. If it is not clear yet, return the /// Optional::NoneType. - Optional getAssumedUniqueReturnValue() const; + Optional getAssumedUniqueReturnValue(Attributor &A) const; - /// See AbstractState::checkForallReturnedValues(...). - bool - checkForallReturnedValues(std::function &Pred) const override; + /// See AbstractState::checkForAllReturnedValues(...). + bool checkForAllReturnedValuesAndReturnInsts( + const function_ref &)> + &Pred) const override; /// Pretty print the attribute similar to the IR representation. const std::string getAsStr() const override; @@ -553,13 +960,15 @@ public: bool isValidState() const override { return IsValidState; } /// See AbstractState::indicateOptimisticFixpoint(...). - void indicateOptimisticFixpoint() override { + ChangeStatus indicateOptimisticFixpoint() override { IsFixed = true; - IsValidState &= true; + return ChangeStatus::UNCHANGED; } - void indicatePessimisticFixpoint() override { + + ChangeStatus indicatePessimisticFixpoint() override { IsFixed = true; IsValidState = false; + return ChangeStatus::CHANGED; } }; @@ -568,21 +977,52 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) { // Bookkeeping. assert(isValidState()); - NumFnKnownReturns++; + STATS_DECLTRACK(KnownReturnValues, FunctionReturn, + "Number of function with known return values"); // Check if we have an assumed unique return value that we could manifest. - Optional UniqueRV = getAssumedUniqueReturnValue(); + Optional UniqueRV = getAssumedUniqueReturnValue(A); if (!UniqueRV.hasValue() || !UniqueRV.getValue()) return Changed; // Bookkeeping. - NumFnUniqueReturned++; + STATS_DECLTRACK(UniqueReturnValue, FunctionReturn, + "Number of function with unique return"); + + // Callback to replace the uses of CB with the constant C. + auto ReplaceCallSiteUsersWith = [](CallBase &CB, Constant &C) { + if (CB.getNumUses() == 0 || CB.isMustTailCall()) + return ChangeStatus::UNCHANGED; + CB.replaceAllUsesWith(&C); + return ChangeStatus::CHANGED; + }; // If the assumed unique return value is an argument, annotate it. if (auto *UniqueRVArg = dyn_cast(UniqueRV.getValue())) { - AssociatedVal = UniqueRVArg; - Changed = AbstractAttribute::manifest(A) | Changed; + getIRPosition() = IRPosition::argument(*UniqueRVArg); + Changed = IRAttribute::manifest(A); + } else if (auto *RVC = dyn_cast(UniqueRV.getValue())) { + // We can replace the returned value with the unique returned constant. + Value &AnchorValue = getAnchorValue(); + if (Function *F = dyn_cast(&AnchorValue)) { + for (const Use &U : F->uses()) + if (CallBase *CB = dyn_cast(U.getUser())) + if (CB->isCallee(&U)) { + Constant *RVCCast = + ConstantExpr::getTruncOrBitCast(RVC, CB->getType()); + Changed = ReplaceCallSiteUsersWith(*CB, *RVCCast) | Changed; + } + } else { + assert(isa(AnchorValue) && + "Expcected a function or call base anchor!"); + Constant *RVCCast = + ConstantExpr::getTruncOrBitCast(RVC, AnchorValue.getType()); + Changed = ReplaceCallSiteUsersWith(cast(AnchorValue), *RVCCast); + } + if (Changed == ChangeStatus::CHANGED) + STATS_DECLTRACK(UniqueConstantReturnValue, FunctionReturn, + "Number of function returns replaced by constant return"); } return Changed; @@ -590,18 +1030,20 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) { const std::string AAReturnedValuesImpl::getAsStr() const { return (isAtFixpoint() ? "returns(#" : "may-return(#") + - (isValidState() ? std::to_string(getNumReturnValues()) : "?") + ")"; + (isValidState() ? std::to_string(getNumReturnValues()) : "?") + + ")[#UC: " + std::to_string(UnresolvedCalls.size()) + "]"; } -Optional AAReturnedValuesImpl::getAssumedUniqueReturnValue() const { - // If checkForallReturnedValues provides a unique value, ignoring potential +Optional +AAReturnedValuesImpl::getAssumedUniqueReturnValue(Attributor &A) const { + // If checkForAllReturnedValues provides a unique value, ignoring potential // undef values that can also be present, it is assumed to be the actual // return value and forwarded to the caller of this method. If there are // multiple, a nullptr is returned indicating there cannot be a unique // returned value. Optional UniqueRV; - std::function Pred = [&](Value &RV) -> bool { + auto Pred = [&](Value &RV) -> bool { // If we found a second returned value and neither the current nor the saved // one is an undef, there is no unique returned value. Undefs are special // since we can pretend they have any value. @@ -618,14 +1060,15 @@ Optional AAReturnedValuesImpl::getAssumedUniqueReturnValue() const { return true; }; - if (!checkForallReturnedValues(Pred)) + if (!A.checkForAllReturnedValues(Pred, *this)) UniqueRV = nullptr; return UniqueRV; } -bool AAReturnedValuesImpl::checkForallReturnedValues( - std::function &Pred) const { +bool AAReturnedValuesImpl::checkForAllReturnedValuesAndReturnInsts( + const function_ref &)> + &Pred) const { if (!isValidState()) return false; @@ -634,11 +1077,11 @@ bool AAReturnedValuesImpl::checkForallReturnedValues( for (auto &It : ReturnedValues) { Value *RV = It.first; - ImmutableCallSite ICS(RV); - if (ICS && !HasOverdefinedReturnedCalls) + CallBase *CB = dyn_cast(RV); + if (CB && !UnresolvedCalls.count(CB)) continue; - if (!Pred(*RV)) + if (!Pred(*RV, It.second)) return false; } @@ -646,125 +1089,196 @@ bool AAReturnedValuesImpl::checkForallReturnedValues( } ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) { + size_t NumUnresolvedCalls = UnresolvedCalls.size(); + bool Changed = false; - // Check if we know of any values returned by the associated function, - // if not, we are done. - if (getNumReturnValues() == 0) { - indicateOptimisticFixpoint(); - return ChangeStatus::UNCHANGED; - } + // State used in the value traversals starting in returned values. + struct RVState { + // The map in which we collect return values -> return instrs. + decltype(ReturnedValues) &RetValsMap; + // The flag to indicate a change. + bool &Changed; + // The return instrs we come from. + SmallSetVector RetInsts; + }; - // Check if any of the returned values is a call site we can refine. - decltype(ReturnedValues) AddRVs; - bool HasCallSite = false; + // Callback for a leaf value returned by the associated function. + auto VisitValueCB = [](Value &Val, RVState &RVS, bool) -> bool { + auto Size = RVS.RetValsMap[&Val].size(); + RVS.RetValsMap[&Val].insert(RVS.RetInsts.begin(), RVS.RetInsts.end()); + bool Inserted = RVS.RetValsMap[&Val].size() != Size; + RVS.Changed |= Inserted; + LLVM_DEBUG({ + if (Inserted) + dbgs() << "[AAReturnedValues] 1 Add new returned value " << Val + << " => " << RVS.RetInsts.size() << "\n"; + }); + return true; + }; - // Look at all returned call sites. + // Helper method to invoke the generic value traversal. + auto VisitReturnedValue = [&](Value &RV, RVState &RVS) { + IRPosition RetValPos = IRPosition::value(RV); + return genericValueTraversal(A, RetValPos, *this, + RVS, VisitValueCB); + }; + + // Callback for all "return intructions" live in the associated function. + auto CheckReturnInst = [this, &VisitReturnedValue, &Changed](Instruction &I) { + ReturnInst &Ret = cast(I); + RVState RVS({ReturnedValues, Changed, {}}); + RVS.RetInsts.insert(&Ret); + return VisitReturnedValue(*Ret.getReturnValue(), RVS); + }; + + // Start by discovering returned values from all live returned instructions in + // the associated function. + if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret})) + return indicatePessimisticFixpoint(); + + // Once returned values "directly" present in the code are handled we try to + // resolve returned calls. + decltype(ReturnedValues) NewRVsMap; for (auto &It : ReturnedValues) { - SmallPtrSet &ReturnInsts = It.second; - Value *RV = It.first; - LLVM_DEBUG(dbgs() << "[AAReturnedValues] Potentially returned value " << *RV + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *It.first + << " by #" << It.second.size() << " RIs\n"); + CallBase *CB = dyn_cast(It.first); + if (!CB || UnresolvedCalls.count(CB)) + continue; + + if (!CB->getCalledFunction()) { + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB + << "\n"); + UnresolvedCalls.insert(CB); + continue; + } + + // TODO: use the function scope once we have call site AAReturnedValues. + const auto &RetValAA = A.getAAFor( + *this, IRPosition::function(*CB->getCalledFunction())); + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Found another AAReturnedValues: " + << static_cast(RetValAA) << "\n"); - // Only call sites can change during an update, ignore the rest. - CallSite RetCS(RV); - if (!RetCS) - continue; - - // For now, any call site we see will prevent us from directly fixing the - // state. However, if the information on the callees is fixed, the call - // sites will be removed and we will fix the information for this state. - HasCallSite = true; - - // Try to find a assumed unique return value for the called function. - auto *RetCSAA = A.getAAFor(*this, *RV); - if (!RetCSAA) { - HasOverdefinedReturnedCalls = true; - LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned call site (" << *RV - << ") with " << (RetCSAA ? "invalid" : "no") - << " associated state\n"); + // Skip dead ends, thus if we do not know anything about the returned + // call we mark it as unresolved and it will stay that way. + if (!RetValAA.getState().isValidState()) { + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB + << "\n"); + UnresolvedCalls.insert(CB); continue; } - // Try to find a assumed unique return value for the called function. - Optional AssumedUniqueRV = RetCSAA->getAssumedUniqueReturnValue(); - - // If no assumed unique return value was found due to the lack of - // candidates, we may need to resolve more calls (through more update - // iterations) or the called function will not return. Either way, we simply - // stick with the call sites as return values. Because there were not - // multiple possibilities, we do not treat it as overdefined. - if (!AssumedUniqueRV.hasValue()) - continue; - - // If multiple, non-refinable values were found, there cannot be a unique - // return value for the called function. The returned call is overdefined! - if (!AssumedUniqueRV.getValue()) { - HasOverdefinedReturnedCalls = true; - LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned call site has multiple " - "potentially returned values\n"); + // Do not try to learn partial information. If the callee has unresolved + // return values we will treat the call as unresolved/opaque. + auto &RetValAAUnresolvedCalls = RetValAA.getUnresolvedCalls(); + if (!RetValAAUnresolvedCalls.empty()) { + UnresolvedCalls.insert(CB); continue; } - LLVM_DEBUG({ - bool UniqueRVIsKnown = RetCSAA->isAtFixpoint(); - dbgs() << "[AAReturnedValues] Returned call site " - << (UniqueRVIsKnown ? "known" : "assumed") - << " unique return value: " << *AssumedUniqueRV << "\n"; - }); + // Now check if we can track transitively returned values. If possible, thus + // if all return value can be represented in the current scope, do so. + bool Unresolved = false; + for (auto &RetValAAIt : RetValAA.returned_values()) { + Value *RetVal = RetValAAIt.first; + if (isa(RetVal) || isa(RetVal) || + isa(RetVal)) + continue; + // Anything that did not fit in the above categories cannot be resolved, + // mark the call as unresolved. + LLVM_DEBUG(dbgs() << "[AAReturnedValues] transitively returned value " + "cannot be translated: " + << *RetVal << "\n"); + UnresolvedCalls.insert(CB); + Unresolved = true; + break; + } - // The assumed unique return value. - Value *AssumedRetVal = AssumedUniqueRV.getValue(); + if (Unresolved) + continue; - // If the assumed unique return value is an argument, lookup the matching - // call site operand and recursively collect new returned values. - // If it is not an argument, it is just put into the set of returned values - // as we would have already looked through casts, phis, and similar values. - if (Argument *AssumedRetArg = dyn_cast(AssumedRetVal)) - collectValuesRecursively(A, - RetCS.getArgOperand(AssumedRetArg->getArgNo()), - ReturnInsts, AddRVs); - else - AddRVs[AssumedRetVal].insert(ReturnInsts.begin(), ReturnInsts.end()); + // Now track transitively returned values. + unsigned &NumRetAA = NumReturnedValuesPerKnownAA[CB]; + if (NumRetAA == RetValAA.getNumReturnValues()) { + LLVM_DEBUG(dbgs() << "[AAReturnedValues] Skip call as it has not " + "changed since it was seen last\n"); + continue; + } + NumRetAA = RetValAA.getNumReturnValues(); + + for (auto &RetValAAIt : RetValAA.returned_values()) { + Value *RetVal = RetValAAIt.first; + if (Argument *Arg = dyn_cast(RetVal)) { + // Arguments are mapped to call site operands and we begin the traversal + // again. + bool Unused = false; + RVState RVS({NewRVsMap, Unused, RetValAAIt.second}); + VisitReturnedValue(*CB->getArgOperand(Arg->getArgNo()), RVS); + continue; + } else if (isa(RetVal)) { + // Call sites are resolved by the callee attribute over time, no need to + // do anything for us. + continue; + } else if (isa(RetVal)) { + // Constants are valid everywhere, we can simply take them. + NewRVsMap[RetVal].insert(It.second.begin(), It.second.end()); + continue; + } + } } - // Keep track of any change to trigger updates on dependent attributes. - ChangeStatus Changed = ChangeStatus::UNCHANGED; - - for (auto &It : AddRVs) { + // To avoid modifications to the ReturnedValues map while we iterate over it + // we kept record of potential new entries in a copy map, NewRVsMap. + for (auto &It : NewRVsMap) { assert(!It.second.empty() && "Entry does not add anything."); auto &ReturnInsts = ReturnedValues[It.first]; for (ReturnInst *RI : It.second) - if (ReturnInsts.insert(RI).second) { + if (ReturnInsts.insert(RI)) { LLVM_DEBUG(dbgs() << "[AAReturnedValues] Add new returned value " << *It.first << " => " << *RI << "\n"); - Changed = ChangeStatus::CHANGED; + Changed = true; } } - // If there is no call site in the returned values we are done. - if (!HasCallSite) { - indicateOptimisticFixpoint(); - return ChangeStatus::CHANGED; + Changed |= (NumUnresolvedCalls != UnresolvedCalls.size()); + return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; +} + +struct AAReturnedValuesFunction final : public AAReturnedValuesImpl { + AAReturnedValuesFunction(const IRPosition &IRP) : AAReturnedValuesImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(returned) } +}; + +/// Returned values information for a call sites. +struct AAReturnedValuesCallSite final : AAReturnedValuesImpl { + AAReturnedValuesCallSite(const IRPosition &IRP) : AAReturnedValuesImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites instead of + // redirecting requests to the callee. + llvm_unreachable("Abstract attributes for returned values are not " + "supported for call sites yet!"); } - return Changed; -} + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + return indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; /// ------------------------ NoSync Function Attribute ------------------------- -struct AANoSyncFunction : AANoSync, BooleanState { - - AANoSyncFunction(Function &F, InformationCache &InfoCache) - : AANoSync(F, InfoCache) {} - - /// See AbstractAttribute::getState() - /// { - AbstractState &getState() override { return *this; } - const AbstractState &getState() const override { return *this; } - /// } - - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { return MP_FUNCTION; } +struct AANoSyncImpl : AANoSync { + AANoSyncImpl(const IRPosition &IRP) : AANoSync(IRP) {} const std::string getAsStr() const override { return getAssumed() ? "nosync" : "may-sync"; @@ -773,12 +1287,6 @@ struct AANoSyncFunction : AANoSync, BooleanState { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override; - /// See AANoSync::isAssumedNoSync() - bool isAssumedNoSync() const override { return getAssumed(); } - - /// See AANoSync::isKnownNoSync() - bool isKnownNoSync() const override { return getKnown(); } - /// Helper function used to determine whether an instruction is non-relaxed /// atomic. In other words, if an atomic instruction does not have unordered /// or monotonic ordering @@ -792,7 +1300,7 @@ struct AANoSyncFunction : AANoSync, BooleanState { static bool isNoSyncIntrinsic(Instruction *I); }; -bool AANoSyncFunction::isNonRelaxedAtomic(Instruction *I) { +bool AANoSyncImpl::isNonRelaxedAtomic(Instruction *I) { if (!I->isAtomic()) return false; @@ -841,7 +1349,7 @@ bool AANoSyncFunction::isNonRelaxedAtomic(Instruction *I) { /// Checks if an intrinsic is nosync. Currently only checks mem* intrinsics. /// FIXME: We should ipmrove the handling of intrinsics. -bool AANoSyncFunction::isNoSyncIntrinsic(Instruction *I) { +bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) { if (auto *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { /// Element wise atomic memory intrinsics are can only be unordered, @@ -863,7 +1371,7 @@ bool AANoSyncFunction::isNoSyncIntrinsic(Instruction *I) { return false; } -bool AANoSyncFunction::isVolatile(Instruction *I) { +bool AANoSyncImpl::isVolatile(Instruction *I) { assert(!ImmutableCallSite(I) && !isa(I) && "Calls should not be checked here"); @@ -881,372 +1389,436 @@ bool AANoSyncFunction::isVolatile(Instruction *I) { } } -ChangeStatus AANoSyncFunction::updateImpl(Attributor &A) { - Function &F = getAnchorScope(); +ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) { - /// We are looking for volatile instructions or Non-Relaxed atomics. - /// FIXME: We should ipmrove the handling of intrinsics. - for (Instruction *I : InfoCache.getReadOrWriteInstsForFunction(F)) { - ImmutableCallSite ICS(I); - auto *NoSyncAA = A.getAAFor(*this, *I); + auto CheckRWInstForNoSync = [&](Instruction &I) { + /// We are looking for volatile instructions or Non-Relaxed atomics. + /// FIXME: We should ipmrove the handling of intrinsics. - if (isa(I) && isNoSyncIntrinsic(I)) - continue; + if (isa(&I) && isNoSyncIntrinsic(&I)) + return true; - if (ICS && (!NoSyncAA || !NoSyncAA->isAssumedNoSync()) && - !ICS.hasFnAttr(Attribute::NoSync)) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; + if (ImmutableCallSite ICS = ImmutableCallSite(&I)) { + if (ICS.hasFnAttr(Attribute::NoSync)) + return true; + + const auto &NoSyncAA = + A.getAAFor(*this, IRPosition::callsite_function(ICS)); + if (NoSyncAA.isAssumedNoSync()) + return true; + return false; } - if (ICS) - continue; + if (!isVolatile(&I) && !isNonRelaxedAtomic(&I)) + return true; - if (!isVolatile(I) && !isNonRelaxedAtomic(I)) - continue; + return false; + }; - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; - } + auto CheckForNoSync = [&](Instruction &I) { + // At this point we handled all read/write effects and they are all + // nosync, so they can be skipped. + if (I.mayReadOrWriteMemory()) + return true; - auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); - auto Opcodes = {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, - (unsigned)Instruction::Call}; + // non-convergent and readnone imply nosync. + return !ImmutableCallSite(&I).isConvergent(); + }; - for (unsigned Opcode : Opcodes) { - for (Instruction *I : OpcodeInstMap[Opcode]) { - // At this point we handled all read/write effects and they are all - // nosync, so they can be skipped. - if (I->mayReadOrWriteMemory()) - continue; - - ImmutableCallSite ICS(I); - - // non-convergent and readnone imply nosync. - if (!ICS.isConvergent()) - continue; - - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; - } - } + if (!A.checkForAllReadWriteInstructions(CheckRWInstForNoSync, *this) || + !A.checkForAllCallLikeInstructions(CheckForNoSync, *this)) + return indicatePessimisticFixpoint(); return ChangeStatus::UNCHANGED; } +struct AANoSyncFunction final : public AANoSyncImpl { + AANoSyncFunction(const IRPosition &IRP) : AANoSyncImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nosync) } +}; + +/// NoSync attribute deduction for a call sites. +struct AANoSyncCallSite final : AANoSyncImpl { + AANoSyncCallSite(const IRPosition &IRP) : AANoSyncImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoSyncImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + getState(), static_cast(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nosync); } +}; + /// ------------------------ No-Free Attributes ---------------------------- -struct AANoFreeFunction : AbstractAttribute, BooleanState { +struct AANoFreeImpl : public AANoFree { + AANoFreeImpl(const IRPosition &IRP) : AANoFree(IRP) {} - /// See AbstractAttribute::AbstractAttribute(...). - AANoFreeFunction(Function &F, InformationCache &InfoCache) - : AbstractAttribute(F, InfoCache) {} + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto CheckForNoFree = [&](Instruction &I) { + ImmutableCallSite ICS(&I); + if (ICS.hasFnAttr(Attribute::NoFree)) + return true; - /// See AbstractAttribute::getState() - ///{ - AbstractState &getState() override { return *this; } - const AbstractState &getState() const override { return *this; } - ///} + const auto &NoFreeAA = + A.getAAFor(*this, IRPosition::callsite_function(ICS)); + return NoFreeAA.isAssumedNoFree(); + }; - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { return MP_FUNCTION; } + if (!A.checkForAllCallLikeInstructions(CheckForNoFree, *this)) + return indicatePessimisticFixpoint(); + return ChangeStatus::UNCHANGED; + } /// See AbstractAttribute::getAsStr(). const std::string getAsStr() const override { return getAssumed() ? "nofree" : "may-free"; } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; - - /// See AbstractAttribute::getAttrKind(). - Attribute::AttrKind getAttrKind() const override { return ID; } - - /// Return true if "nofree" is assumed. - bool isAssumedNoFree() const { return getAssumed(); } - - /// Return true if "nofree" is known. - bool isKnownNoFree() const { return getKnown(); } - - /// The identifier used by the Attributor for this class of attributes. - static constexpr Attribute::AttrKind ID = Attribute::NoFree; }; -ChangeStatus AANoFreeFunction::updateImpl(Attributor &A) { - Function &F = getAnchorScope(); +struct AANoFreeFunction final : public AANoFreeImpl { + AANoFreeFunction(const IRPosition &IRP) : AANoFreeImpl(IRP) {} - // The map from instruction opcodes to those instructions in the function. - auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nofree) } +}; - for (unsigned Opcode : - {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, - (unsigned)Instruction::Call}) { - for (Instruction *I : OpcodeInstMap[Opcode]) { +/// NoFree attribute deduction for a call sites. +struct AANoFreeCallSite final : AANoFreeImpl { + AANoFreeCallSite(const IRPosition &IRP) : AANoFreeImpl(IRP) {} - auto ICS = ImmutableCallSite(I); - auto *NoFreeAA = A.getAAFor(*this, *I); - - if ((!NoFreeAA || !NoFreeAA->isAssumedNoFree()) && - !ICS.hasFnAttr(Attribute::NoFree)) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; - } - } + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoFreeImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); } - return ChangeStatus::UNCHANGED; -} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + getState(), static_cast(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nofree); } +}; /// ------------------------ NonNull Argument Attribute ------------------------ -struct AANonNullImpl : AANonNull, BooleanState { +static int64_t getKnownNonNullAndDerefBytesForUse( + Attributor &A, AbstractAttribute &QueryingAA, Value &AssociatedValue, + const Use *U, const Instruction *I, bool &IsNonNull, bool &TrackUse) { + TrackUse = false; - AANonNullImpl(Value &V, InformationCache &InfoCache) - : AANonNull(V, InfoCache) {} + const Value *UseV = U->get(); + if (!UseV->getType()->isPointerTy()) + return 0; - AANonNullImpl(Value *AssociatedVal, Value &AnchoredValue, - InformationCache &InfoCache) - : AANonNull(AssociatedVal, AnchoredValue, InfoCache) {} + Type *PtrTy = UseV->getType(); + const Function *F = I->getFunction(); + bool NullPointerIsDefined = + F ? llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace()) : true; + const DataLayout &DL = A.getInfoCache().getDL(); + if (ImmutableCallSite ICS = ImmutableCallSite(I)) { + if (ICS.isBundleOperand(U)) + return 0; - /// See AbstractAttribute::getState() - /// { - AbstractState &getState() override { return *this; } - const AbstractState &getState() const override { return *this; } - /// } + if (ICS.isCallee(U)) { + IsNonNull |= !NullPointerIsDefined; + return 0; + } + + unsigned ArgNo = ICS.getArgumentNo(U); + IRPosition IRP = IRPosition::callsite_argument(ICS, ArgNo); + auto &DerefAA = A.getAAFor(QueryingAA, IRP); + IsNonNull |= DerefAA.isKnownNonNull(); + return DerefAA.getKnownDereferenceableBytes(); + } + + int64_t Offset; + if (const Value *Base = getBasePointerOfAccessPointerOperand(I, Offset, DL)) { + if (Base == &AssociatedValue && getPointerOperand(I) == UseV) { + int64_t DerefBytes = + Offset + (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType()); + + IsNonNull |= !NullPointerIsDefined; + return DerefBytes; + } + } + if (const Value *Base = + GetPointerBaseWithConstantOffset(UseV, Offset, DL, + /*AllowNonInbounds*/ false)) { + auto &DerefAA = + A.getAAFor(QueryingAA, IRPosition::value(*Base)); + IsNonNull |= (!NullPointerIsDefined && DerefAA.isKnownNonNull()); + IsNonNull |= (!NullPointerIsDefined && (Offset != 0)); + int64_t DerefBytes = DerefAA.getKnownDereferenceableBytes(); + return std::max(int64_t(0), DerefBytes - Offset); + } + + return 0; +} + +struct AANonNullImpl : AANonNull { + AANonNullImpl(const IRPosition &IRP) + : AANonNull(IRP), + NullIsDefined(NullPointerIsDefined( + getAnchorScope(), + getAssociatedValue().getType()->getPointerAddressSpace())) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + if (!NullIsDefined && + hasAttr({Attribute::NonNull, Attribute::Dereferenceable})) + indicateOptimisticFixpoint(); + else + AANonNull::initialize(A); + } + + /// See AAFromMustBeExecutedContext + bool followUse(Attributor &A, const Use *U, const Instruction *I) { + bool IsNonNull = false; + bool TrackUse = false; + getKnownNonNullAndDerefBytesForUse(A, *this, getAssociatedValue(), U, I, + IsNonNull, TrackUse); + takeKnownMaximum(IsNonNull); + return TrackUse; + } /// See AbstractAttribute::getAsStr(). const std::string getAsStr() const override { return getAssumed() ? "nonnull" : "may-null"; } - /// See AANonNull::isAssumedNonNull(). - bool isAssumedNonNull() const override { return getAssumed(); } - - /// See AANonNull::isKnownNonNull(). - bool isKnownNonNull() const override { return getKnown(); } - - /// Generate a predicate that checks if a given value is assumed nonnull. - /// The generated function returns true if a value satisfies any of - /// following conditions. - /// (i) A value is known nonZero(=nonnull). - /// (ii) A value is associated with AANonNull and its isAssumedNonNull() is - /// true. - std::function generatePredicate(Attributor &); + /// Flag to determine if the underlying value can be null and still allow + /// valid accesses. + const bool NullIsDefined; }; -std::function AANonNullImpl::generatePredicate(Attributor &A) { - // FIXME: The `AAReturnedValues` should provide the predicate with the - // `ReturnInst` vector as well such that we can use the control flow sensitive - // version of `isKnownNonZero`. This should fix `test11` in - // `test/Transforms/FunctionAttrs/nonnull.ll` - - std::function Pred = [&](Value &RV) -> bool { - if (isKnownNonZero(&RV, getAnchorScope().getParent()->getDataLayout())) - return true; - - auto *NonNullAA = A.getAAFor(*this, RV); - - ImmutableCallSite ICS(&RV); - - if ((!NonNullAA || !NonNullAA->isAssumedNonNull()) && - (!ICS || !ICS.hasRetAttr(Attribute::NonNull))) - return false; - - return true; - }; - - return Pred; -} - -/// NonNull attribute for function return value. -struct AANonNullReturned : AANonNullImpl { - - AANonNullReturned(Function &F, InformationCache &InfoCache) - : AANonNullImpl(F, InfoCache) {} - - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { return MP_RETURNED; } - - /// See AbstractAttriubute::initialize(...). - void initialize(Attributor &A) override { - Function &F = getAnchorScope(); - - // Already nonnull. - if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex, - Attribute::NonNull)) - indicateOptimisticFixpoint(); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; -}; - -ChangeStatus AANonNullReturned::updateImpl(Attributor &A) { - Function &F = getAnchorScope(); - - auto *AARetVal = A.getAAFor(*this, F); - if (!AARetVal) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; - } - - std::function Pred = this->generatePredicate(A); - if (!AARetVal->checkForallReturnedValues(Pred)) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; - } - return ChangeStatus::UNCHANGED; -} - -/// NonNull attribute for function argument. -struct AANonNullArgument : AANonNullImpl { - - AANonNullArgument(Argument &A, InformationCache &InfoCache) - : AANonNullImpl(A, InfoCache) {} - - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; } - - /// See AbstractAttriubute::initialize(...). - void initialize(Attributor &A) override { - Argument *Arg = cast(getAssociatedValue()); - if (Arg->hasNonNullAttr()) - indicateOptimisticFixpoint(); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; -}; - -/// NonNull attribute for a call site argument. -struct AANonNullCallSiteArgument : AANonNullImpl { - - /// See AANonNullImpl::AANonNullImpl(...). - AANonNullCallSiteArgument(CallSite CS, unsigned ArgNo, - InformationCache &InfoCache) - : AANonNullImpl(CS.getArgOperand(ArgNo), *CS.getInstruction(), InfoCache), - ArgNo(ArgNo) {} +/// NonNull attribute for a floating value. +struct AANonNullFloating + : AAFromMustBeExecutedContext { + using Base = AAFromMustBeExecutedContext; + AANonNullFloating(const IRPosition &IRP) : Base(IRP) {} /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { - CallSite CS(&getAnchoredValue()); - if (isKnownNonZero(getAssociatedValue(), - getAnchorScope().getParent()->getDataLayout()) || - CS.paramHasAttr(ArgNo, getAttrKind())) + Base::initialize(A); + + if (isAtFixpoint()) + return; + + const IRPosition &IRP = getIRPosition(); + const Value &V = IRP.getAssociatedValue(); + const DataLayout &DL = A.getDataLayout(); + + // TODO: This context sensitive query should be removed once we can do + // context sensitive queries in the genericValueTraversal below. + if (isKnownNonZero(&V, DL, 0, /* TODO: AC */ nullptr, IRP.getCtxI(), + /* TODO: DT */ nullptr)) indicateOptimisticFixpoint(); } - /// See AbstractAttribute::updateImpl(Attributor &A). - ChangeStatus updateImpl(Attributor &A) override; + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + ChangeStatus Change = Base::updateImpl(A); + if (isKnownNonNull()) + return Change; - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { - return MP_CALL_SITE_ARGUMENT; - }; - - // Return argument index of associated value. - int getArgNo() const { return ArgNo; } - -private: - unsigned ArgNo; -}; -ChangeStatus AANonNullArgument::updateImpl(Attributor &A) { - Function &F = getAnchorScope(); - Argument &Arg = cast(getAnchoredValue()); - - unsigned ArgNo = Arg.getArgNo(); - - // Callback function - std::function CallSiteCheck = [&](CallSite CS) { - assert(CS && "Sanity check: Call site was not initialized properly!"); - - auto *NonNullAA = A.getAAFor(*this, *CS.getInstruction(), ArgNo); - - // Check that NonNullAA is AANonNullCallSiteArgument. - if (NonNullAA) { - ImmutableCallSite ICS(&NonNullAA->getAnchoredValue()); - if (ICS && CS.getInstruction() == ICS.getInstruction()) - return NonNullAA->isAssumedNonNull(); - return false; + if (!NullIsDefined) { + const auto &DerefAA = A.getAAFor(*this, getIRPosition()); + if (DerefAA.getAssumedDereferenceableBytes()) + return Change; } - if (CS.paramHasAttr(ArgNo, Attribute::NonNull)) - return true; + const DataLayout &DL = A.getDataLayout(); - Value *V = CS.getArgOperand(ArgNo); - if (isKnownNonZero(V, getAnchorScope().getParent()->getDataLayout())) - return true; + auto VisitValueCB = [&](Value &V, AAAlign::StateType &T, + bool Stripped) -> bool { + const auto &AA = A.getAAFor(*this, IRPosition::value(V)); + if (!Stripped && this == &AA) { + if (!isKnownNonZero(&V, DL, 0, /* TODO: AC */ nullptr, + /* CtxI */ getCtxI(), + /* TODO: DT */ nullptr)) + T.indicatePessimisticFixpoint(); + } else { + // Use abstract attribute information. + const AANonNull::StateType &NS = + static_cast(AA.getState()); + T ^= NS; + } + return T.isValidState(); + }; - return false; - }; - if (!A.checkForAllCallSites(F, CallSiteCheck, true)) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; - } - return ChangeStatus::UNCHANGED; -} + StateType T; + if (!genericValueTraversal(A, getIRPosition(), *this, + T, VisitValueCB)) + return indicatePessimisticFixpoint(); -ChangeStatus AANonNullCallSiteArgument::updateImpl(Attributor &A) { - // NOTE: Never look at the argument of the callee in this method. - // If we do this, "nonnull" is always deduced because of the assumption. - - Value &V = *getAssociatedValue(); - - auto *NonNullAA = A.getAAFor(*this, V); - - if (!NonNullAA || !NonNullAA->isAssumedNonNull()) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; + return clampStateAndIndicateChange(getState(), T); } - return ChangeStatus::UNCHANGED; -} + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) } +}; -/// ------------------------ Will-Return Attributes ---------------------------- +/// NonNull attribute for function return value. +struct AANonNullReturned final + : AAReturnedFromReturnedValues { + AANonNullReturned(const IRPosition &IRP) + : AAReturnedFromReturnedValues(IRP) {} -struct AAWillReturnImpl : public AAWillReturn, BooleanState { + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) } +}; - /// See AbstractAttribute::AbstractAttribute(...). - AAWillReturnImpl(Function &F, InformationCache &InfoCache) - : AAWillReturn(F, InfoCache) {} +/// NonNull attribute for function argument. +struct AANonNullArgument final + : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext { + AANonNullArgument(const IRPosition &IRP) + : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext( + IRP) {} - /// See AAWillReturn::isKnownWillReturn(). - bool isKnownWillReturn() const override { return getKnown(); } + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nonnull) } +}; - /// See AAWillReturn::isAssumedWillReturn(). - bool isAssumedWillReturn() const override { return getAssumed(); } +struct AANonNullCallSiteArgument final : AANonNullFloating { + AANonNullCallSiteArgument(const IRPosition &IRP) : AANonNullFloating(IRP) {} - /// See AbstractAttribute::getState(...). - AbstractState &getState() override { return *this; } + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(nonnull) } +}; - /// See AbstractAttribute::getState(...). - const AbstractState &getState() const override { return *this; } +/// NonNull attribute for a call site return position. +struct AANonNullCallSiteReturned final + : AACallSiteReturnedFromReturnedAndMustBeExecutedContext { + AANonNullCallSiteReturned(const IRPosition &IRP) + : AACallSiteReturnedFromReturnedAndMustBeExecutedContext( + IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nonnull) } +}; + +/// ------------------------ No-Recurse Attributes ---------------------------- + +struct AANoRecurseImpl : public AANoRecurse { + AANoRecurseImpl(const IRPosition &IRP) : AANoRecurse(IRP) {} /// See AbstractAttribute::getAsStr() const std::string getAsStr() const override { - return getAssumed() ? "willreturn" : "may-noreturn"; + return getAssumed() ? "norecurse" : "may-recurse"; } }; -struct AAWillReturnFunction final : AAWillReturnImpl { - - /// See AbstractAttribute::AbstractAttribute(...). - AAWillReturnFunction(Function &F, InformationCache &InfoCache) - : AAWillReturnImpl(F, InfoCache) {} - - /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { - return MP_FUNCTION; - } +struct AANoRecurseFunction final : AANoRecurseImpl { + AANoRecurseFunction(const IRPosition &IRP) : AANoRecurseImpl(IRP) {} /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override; + void initialize(Attributor &A) override { + AANoRecurseImpl::initialize(A); + if (const Function *F = getAnchorScope()) + if (A.getInfoCache().getSccSize(*F) == 1) + return; + indicatePessimisticFixpoint(); + } /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; + ChangeStatus updateImpl(Attributor &A) override { + + auto CheckForNoRecurse = [&](Instruction &I) { + ImmutableCallSite ICS(&I); + if (ICS.hasFnAttr(Attribute::NoRecurse)) + return true; + + const auto &NoRecurseAA = + A.getAAFor(*this, IRPosition::callsite_function(ICS)); + if (!NoRecurseAA.isAssumedNoRecurse()) + return false; + + // Recursion to the same function + if (ICS.getCalledFunction() == getAnchorScope()) + return false; + + return true; + }; + + if (!A.checkForAllCallLikeInstructions(CheckForNoRecurse, *this)) + return indicatePessimisticFixpoint(); + return ChangeStatus::UNCHANGED; + } + + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(norecurse) } }; +/// NoRecurse attribute deduction for a call sites. +struct AANoRecurseCallSite final : AANoRecurseImpl { + AANoRecurseCallSite(const IRPosition &IRP) : AANoRecurseImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoRecurseImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + getState(), + static_cast(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(norecurse); } +}; + +/// ------------------------ Will-Return Attributes ---------------------------- + // Helper function that checks whether a function has any cycle. // TODO: Replace with more efficent code -bool containsCycle(Function &F) { +static bool containsCycle(Function &F) { SmallPtrSet Visited; // Traverse BB by dfs and check whether successor is already visited. @@ -1264,99 +1836,2627 @@ bool containsCycle(Function &F) { // endless loop // FIXME: Any cycle is regarded as endless loop for now. // We have to allow some patterns. -bool containsPossiblyEndlessLoop(Function &F) { return containsCycle(F); } - -void AAWillReturnFunction::initialize(Attributor &A) { - Function &F = getAnchorScope(); - - if (containsPossiblyEndlessLoop(F)) - indicatePessimisticFixpoint(); +static bool containsPossiblyEndlessLoop(Function *F) { + return !F || !F->hasExactDefinition() || containsCycle(*F); } -ChangeStatus AAWillReturnFunction::updateImpl(Attributor &A) { - Function &F = getAnchorScope(); +struct AAWillReturnImpl : public AAWillReturn { + AAWillReturnImpl(const IRPosition &IRP) : AAWillReturn(IRP) {} - // The map from instruction opcodes to those instructions in the function. - auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAWillReturn::initialize(A); - for (unsigned Opcode : - {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, - (unsigned)Instruction::Call}) { - for (Instruction *I : OpcodeInstMap[Opcode]) { - auto ICS = ImmutableCallSite(I); + Function *F = getAssociatedFunction(); + if (containsPossiblyEndlessLoop(F)) + indicatePessimisticFixpoint(); + } - if (ICS.hasFnAttr(Attribute::WillReturn)) + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto CheckForWillReturn = [&](Instruction &I) { + IRPosition IPos = IRPosition::callsite_function(ImmutableCallSite(&I)); + const auto &WillReturnAA = A.getAAFor(*this, IPos); + if (WillReturnAA.isKnownWillReturn()) + return true; + if (!WillReturnAA.isAssumedWillReturn()) + return false; + const auto &NoRecurseAA = A.getAAFor(*this, IPos); + return NoRecurseAA.isAssumedNoRecurse(); + }; + + if (!A.checkForAllCallLikeInstructions(CheckForWillReturn, *this)) + return indicatePessimisticFixpoint(); + + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::getAsStr() + const std::string getAsStr() const override { + return getAssumed() ? "willreturn" : "may-noreturn"; + } +}; + +struct AAWillReturnFunction final : AAWillReturnImpl { + AAWillReturnFunction(const IRPosition &IRP) : AAWillReturnImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(willreturn) } +}; + +/// WillReturn attribute deduction for a call sites. +struct AAWillReturnCallSite final : AAWillReturnImpl { + AAWillReturnCallSite(const IRPosition &IRP) : AAWillReturnImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAWillReturnImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + getState(), + static_cast(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(willreturn); } +}; + +/// ------------------------ NoAlias Argument Attribute ------------------------ + +struct AANoAliasImpl : AANoAlias { + AANoAliasImpl(const IRPosition &IRP) : AANoAlias(IRP) {} + + const std::string getAsStr() const override { + return getAssumed() ? "noalias" : "may-alias"; + } +}; + +/// NoAlias attribute for a floating value. +struct AANoAliasFloating final : AANoAliasImpl { + AANoAliasFloating(const IRPosition &IRP) : AANoAliasImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoAliasImpl::initialize(A); + Value &Val = getAssociatedValue(); + if (isa(Val)) + indicateOptimisticFixpoint(); + if (isa(Val) && + Val.getType()->getPointerAddressSpace() == 0) + indicateOptimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Implement this. + return indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(noalias) + } +}; + +/// NoAlias attribute for an argument. +struct AANoAliasArgument final + : AAArgumentFromCallSiteArguments { + AANoAliasArgument(const IRPosition &IRP) + : AAArgumentFromCallSiteArguments(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noalias) } +}; + +struct AANoAliasCallSiteArgument final : AANoAliasImpl { + AANoAliasCallSiteArgument(const IRPosition &IRP) : AANoAliasImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // See callsite argument attribute and callee argument attribute. + ImmutableCallSite ICS(&getAnchorValue()); + if (ICS.paramHasAttr(getArgNo(), Attribute::NoAlias)) + indicateOptimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // We can deduce "noalias" if the following conditions hold. + // (i) Associated value is assumed to be noalias in the definition. + // (ii) Associated value is assumed to be no-capture in all the uses + // possibly executed before this callsite. + // (iii) There is no other pointer argument which could alias with the + // value. + + const Value &V = getAssociatedValue(); + const IRPosition IRP = IRPosition::value(V); + + // (i) Check whether noalias holds in the definition. + + auto &NoAliasAA = A.getAAFor(*this, IRP); + + if (!NoAliasAA.isAssumedNoAlias()) + return indicatePessimisticFixpoint(); + + LLVM_DEBUG(dbgs() << "[Attributor][AANoAliasCSArg] " << V + << " is assumed NoAlias in the definition\n"); + + // (ii) Check whether the value is captured in the scope using AANoCapture. + // FIXME: This is conservative though, it is better to look at CFG and + // check only uses possibly executed before this callsite. + + auto &NoCaptureAA = A.getAAFor(*this, IRP); + if (!NoCaptureAA.isAssumedNoCaptureMaybeReturned()) { + LLVM_DEBUG( + dbgs() << "[Attributor][AANoAliasCSArg] " << V + << " cannot be noalias as it is potentially captured\n"); + return indicatePessimisticFixpoint(); + } + + // (iii) Check there is no other pointer argument which could alias with the + // value. + ImmutableCallSite ICS(&getAnchorValue()); + for (unsigned i = 0; i < ICS.getNumArgOperands(); i++) { + if (getArgNo() == (int)i) + continue; + const Value *ArgOp = ICS.getArgOperand(i); + if (!ArgOp->getType()->isPointerTy()) continue; - auto *WillReturnAA = A.getAAFor(*this, *I); - if (!WillReturnAA || !WillReturnAA->isAssumedWillReturn()) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; + if (const Function *F = getAnchorScope()) { + if (AAResults *AAR = A.getInfoCache().getAAResultsForFunction(*F)) { + bool IsAliasing = AAR->isNoAlias(&getAssociatedValue(), ArgOp); + LLVM_DEBUG(dbgs() + << "[Attributor][NoAliasCSArg] Check alias between " + "callsite arguments " + << AAR->isNoAlias(&getAssociatedValue(), ArgOp) << " " + << getAssociatedValue() << " " << *ArgOp << " => " + << (IsAliasing ? "" : "no-") << "alias \n"); + + if (IsAliasing) + continue; + } + } + return indicatePessimisticFixpoint(); + } + + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(noalias) } +}; + +/// NoAlias attribute for function return value. +struct AANoAliasReturned final : AANoAliasImpl { + AANoAliasReturned(const IRPosition &IRP) : AANoAliasImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + virtual ChangeStatus updateImpl(Attributor &A) override { + + auto CheckReturnValue = [&](Value &RV) -> bool { + if (Constant *C = dyn_cast(&RV)) + if (C->isNullValue() || isa(C)) + return true; + + /// For now, we can only deduce noalias if we have call sites. + /// FIXME: add more support. + ImmutableCallSite ICS(&RV); + if (!ICS) + return false; + + const IRPosition &RVPos = IRPosition::value(RV); + const auto &NoAliasAA = A.getAAFor(*this, RVPos); + if (!NoAliasAA.isAssumedNoAlias()) + return false; + + const auto &NoCaptureAA = A.getAAFor(*this, RVPos); + return NoCaptureAA.isAssumedNoCaptureMaybeReturned(); + }; + + if (!A.checkForAllReturnedValues(CheckReturnValue, *this)) + return indicatePessimisticFixpoint(); + + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noalias) } +}; + +/// NoAlias attribute deduction for a call site return value. +struct AANoAliasCallSiteReturned final : AANoAliasImpl { + AANoAliasCallSiteReturned(const IRPosition &IRP) : AANoAliasImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoAliasImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::returned(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + getState(), static_cast(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noalias); } +}; + +/// -------------------AAIsDead Function Attribute----------------------- + +struct AAIsDeadImpl : public AAIsDead { + AAIsDeadImpl(const IRPosition &IRP) : AAIsDead(IRP) {} + + void initialize(Attributor &A) override { + const Function *F = getAssociatedFunction(); + if (F && !F->isDeclaration()) + exploreFromEntry(A, F); + } + + void exploreFromEntry(Attributor &A, const Function *F) { + ToBeExploredPaths.insert(&(F->getEntryBlock().front())); + + for (size_t i = 0; i < ToBeExploredPaths.size(); ++i) + if (const Instruction *NextNoReturnI = + findNextNoReturn(A, ToBeExploredPaths[i])) + NoReturnCalls.insert(NextNoReturnI); + + // Mark the block live after we looked for no-return instructions. + assumeLive(A, F->getEntryBlock()); + } + + /// Find the next assumed noreturn instruction in the block of \p I starting + /// from, thus including, \p I. + /// + /// The caller is responsible to monitor the ToBeExploredPaths set as new + /// instructions discovered in other basic block will be placed in there. + /// + /// \returns The next assumed noreturn instructions in the block of \p I + /// starting from, thus including, \p I. + const Instruction *findNextNoReturn(Attributor &A, const Instruction *I); + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return "Live[#BB " + std::to_string(AssumedLiveBlocks.size()) + "/" + + std::to_string(getAssociatedFunction()->size()) + "][#NRI " + + std::to_string(NoReturnCalls.size()) + "]"; + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + assert(getState().isValidState() && + "Attempted to manifest an invalid state!"); + + ChangeStatus HasChanged = ChangeStatus::UNCHANGED; + Function &F = *getAssociatedFunction(); + + if (AssumedLiveBlocks.empty()) { + A.deleteAfterManifest(F); + return ChangeStatus::CHANGED; + } + + // Flag to determine if we can change an invoke to a call assuming the + // callee is nounwind. This is not possible if the personality of the + // function allows to catch asynchronous exceptions. + bool Invoke2CallAllowed = !mayCatchAsynchronousExceptions(F); + + for (const Instruction *NRC : NoReturnCalls) { + Instruction *I = const_cast(NRC); + BasicBlock *BB = I->getParent(); + Instruction *SplitPos = I->getNextNode(); + // TODO: mark stuff before unreachable instructions as dead. + + if (auto *II = dyn_cast(I)) { + // If we keep the invoke the split position is at the beginning of the + // normal desitination block (it invokes a noreturn function after all). + BasicBlock *NormalDestBB = II->getNormalDest(); + SplitPos = &NormalDestBB->front(); + + /// Invoke is replaced with a call and unreachable is placed after it if + /// the callee is nounwind and noreturn. Otherwise, we keep the invoke + /// and only place an unreachable in the normal successor. + if (Invoke2CallAllowed) { + if (II->getCalledFunction()) { + const IRPosition &IPos = IRPosition::callsite_function(*II); + const auto &AANoUnw = A.getAAFor(*this, IPos); + if (AANoUnw.isAssumedNoUnwind()) { + LLVM_DEBUG(dbgs() + << "[AAIsDead] Replace invoke with call inst\n"); + // We do not need an invoke (II) but instead want a call followed + // by an unreachable. However, we do not remove II as other + // abstract attributes might have it cached as part of their + // results. Given that we modify the CFG anyway, we simply keep II + // around but in a new dead block. To avoid II being live through + // a different edge we have to ensure the block we place it in is + // only reached from the current block of II and then not reached + // at all when we insert the unreachable. + SplitBlockPredecessors(NormalDestBB, {BB}, ".i2c"); + CallInst *CI = createCallMatchingInvoke(II); + CI->insertBefore(II); + CI->takeName(II); + II->replaceAllUsesWith(CI); + SplitPos = CI->getNextNode(); + } + } + } + + if (SplitPos == &NormalDestBB->front()) { + // If this is an invoke of a noreturn function the edge to the normal + // destination block is dead but not necessarily the block itself. + // TODO: We need to move to an edge based system during deduction and + // also manifest. + assert(!NormalDestBB->isLandingPad() && + "Expected the normal destination not to be a landingpad!"); + if (NormalDestBB->getUniquePredecessor() == BB) { + assumeLive(A, *NormalDestBB); + } else { + BasicBlock *SplitBB = + SplitBlockPredecessors(NormalDestBB, {BB}, ".dead"); + // The split block is live even if it contains only an unreachable + // instruction at the end. + assumeLive(A, *SplitBB); + SplitPos = SplitBB->getTerminator(); + HasChanged = ChangeStatus::CHANGED; + } + } } - auto *NoRecurseAA = A.getAAFor(*this, *I); + if (isa_and_nonnull(SplitPos)) + continue; - // FIXME: (i) Prohibit any recursion for now. - // (ii) AANoRecurse isn't implemented yet so currently any call is - // regarded as having recursion. - // Code below should be - // if ((!NoRecurseAA || !NoRecurseAA->isAssumedNoRecurse()) && - if (!NoRecurseAA && !ICS.hasFnAttr(Attribute::NoRecurse)) { - indicatePessimisticFixpoint(); - return ChangeStatus::CHANGED; + BB = SplitPos->getParent(); + SplitBlock(BB, SplitPos); + changeToUnreachable(BB->getTerminator(), /* UseLLVMTrap */ false); + HasChanged = ChangeStatus::CHANGED; + } + + for (BasicBlock &BB : F) + if (!AssumedLiveBlocks.count(&BB)) + A.deleteAfterManifest(BB); + + return HasChanged; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override; + + /// See AAIsDead::isAssumedDead(BasicBlock *). + bool isAssumedDead(const BasicBlock *BB) const override { + assert(BB->getParent() == getAssociatedFunction() && + "BB must be in the same anchor scope function."); + + if (!getAssumed()) + return false; + return !AssumedLiveBlocks.count(BB); + } + + /// See AAIsDead::isKnownDead(BasicBlock *). + bool isKnownDead(const BasicBlock *BB) const override { + return getKnown() && isAssumedDead(BB); + } + + /// See AAIsDead::isAssumed(Instruction *I). + bool isAssumedDead(const Instruction *I) const override { + assert(I->getParent()->getParent() == getAssociatedFunction() && + "Instruction must be in the same anchor scope function."); + + if (!getAssumed()) + return false; + + // If it is not in AssumedLiveBlocks then it for sure dead. + // Otherwise, it can still be after noreturn call in a live block. + if (!AssumedLiveBlocks.count(I->getParent())) + return true; + + // If it is not after a noreturn call, than it is live. + return isAfterNoReturn(I); + } + + /// See AAIsDead::isKnownDead(Instruction *I). + bool isKnownDead(const Instruction *I) const override { + return getKnown() && isAssumedDead(I); + } + + /// Check if instruction is after noreturn call, in other words, assumed dead. + bool isAfterNoReturn(const Instruction *I) const; + + /// Determine if \p F might catch asynchronous exceptions. + static bool mayCatchAsynchronousExceptions(const Function &F) { + return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F); + } + + /// Assume \p BB is (partially) live now and indicate to the Attributor \p A + /// that internal function called from \p BB should now be looked at. + void assumeLive(Attributor &A, const BasicBlock &BB) { + if (!AssumedLiveBlocks.insert(&BB).second) + return; + + // We assume that all of BB is (probably) live now and if there are calls to + // internal functions we will assume that those are now live as well. This + // is a performance optimization for blocks with calls to a lot of internal + // functions. It can however cause dead functions to be treated as live. + for (const Instruction &I : BB) + if (ImmutableCallSite ICS = ImmutableCallSite(&I)) + if (const Function *F = ICS.getCalledFunction()) + if (F->hasLocalLinkage()) + A.markLiveInternalFunction(*F); + } + + /// Collection of to be explored paths. + SmallSetVector ToBeExploredPaths; + + /// Collection of all assumed live BasicBlocks. + DenseSet AssumedLiveBlocks; + + /// Collection of calls with noreturn attribute, assumed or knwon. + SmallSetVector NoReturnCalls; +}; + +struct AAIsDeadFunction final : public AAIsDeadImpl { + AAIsDeadFunction(const IRPosition &IRP) : AAIsDeadImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECL(PartiallyDeadBlocks, Function, + "Number of basic blocks classified as partially dead"); + BUILD_STAT_NAME(PartiallyDeadBlocks, Function) += NoReturnCalls.size(); + } +}; + +bool AAIsDeadImpl::isAfterNoReturn(const Instruction *I) const { + const Instruction *PrevI = I->getPrevNode(); + while (PrevI) { + if (NoReturnCalls.count(PrevI)) + return true; + PrevI = PrevI->getPrevNode(); + } + return false; +} + +const Instruction *AAIsDeadImpl::findNextNoReturn(Attributor &A, + const Instruction *I) { + const BasicBlock *BB = I->getParent(); + const Function &F = *BB->getParent(); + + // Flag to determine if we can change an invoke to a call assuming the callee + // is nounwind. This is not possible if the personality of the function allows + // to catch asynchronous exceptions. + bool Invoke2CallAllowed = !mayCatchAsynchronousExceptions(F); + + // TODO: We should have a function that determines if an "edge" is dead. + // Edges could be from an instruction to the next or from a terminator + // to the successor. For now, we need to special case the unwind block + // of InvokeInst below. + + while (I) { + ImmutableCallSite ICS(I); + + if (ICS) { + const IRPosition &IPos = IRPosition::callsite_function(ICS); + // Regarless of the no-return property of an invoke instruction we only + // learn that the regular successor is not reachable through this + // instruction but the unwind block might still be. + if (auto *Invoke = dyn_cast(I)) { + // Use nounwind to justify the unwind block is dead as well. + const auto &AANoUnw = A.getAAFor(*this, IPos); + if (!Invoke2CallAllowed || !AANoUnw.isAssumedNoUnwind()) { + assumeLive(A, *Invoke->getUnwindDest()); + ToBeExploredPaths.insert(&Invoke->getUnwindDest()->front()); + } + } + + const auto &NoReturnAA = A.getAAFor(*this, IPos); + if (NoReturnAA.isAssumedNoReturn()) + return I; + } + + I = I->getNextNode(); + } + + // get new paths (reachable blocks). + for (const BasicBlock *SuccBB : successors(BB)) { + assumeLive(A, *SuccBB); + ToBeExploredPaths.insert(&SuccBB->front()); + } + + // No noreturn instruction found. + return nullptr; +} + +ChangeStatus AAIsDeadImpl::updateImpl(Attributor &A) { + ChangeStatus Status = ChangeStatus::UNCHANGED; + + // Temporary collection to iterate over existing noreturn instructions. This + // will alow easier modification of NoReturnCalls collection + SmallVector NoReturnChanged; + + for (const Instruction *I : NoReturnCalls) + NoReturnChanged.push_back(I); + + for (const Instruction *I : NoReturnChanged) { + size_t Size = ToBeExploredPaths.size(); + + const Instruction *NextNoReturnI = findNextNoReturn(A, I); + if (NextNoReturnI != I) { + Status = ChangeStatus::CHANGED; + NoReturnCalls.remove(I); + if (NextNoReturnI) + NoReturnCalls.insert(NextNoReturnI); + } + + // Explore new paths. + while (Size != ToBeExploredPaths.size()) { + Status = ChangeStatus::CHANGED; + if (const Instruction *NextNoReturnI = + findNextNoReturn(A, ToBeExploredPaths[Size++])) + NoReturnCalls.insert(NextNoReturnI); + } + } + + LLVM_DEBUG(dbgs() << "[AAIsDead] AssumedLiveBlocks: " + << AssumedLiveBlocks.size() << " Total number of blocks: " + << getAssociatedFunction()->size() << "\n"); + + // If we know everything is live there is no need to query for liveness. + if (NoReturnCalls.empty() && + getAssociatedFunction()->size() == AssumedLiveBlocks.size()) { + // Indicating a pessimistic fixpoint will cause the state to be "invalid" + // which will cause the Attributor to not return the AAIsDead on request, + // which will prevent us from querying isAssumedDead(). + indicatePessimisticFixpoint(); + assert(!isValidState() && "Expected an invalid state!"); + Status = ChangeStatus::CHANGED; + } + + return Status; +} + +/// Liveness information for a call sites. +struct AAIsDeadCallSite final : AAIsDeadImpl { + AAIsDeadCallSite(const IRPosition &IRP) : AAIsDeadImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites instead of + // redirecting requests to the callee. + llvm_unreachable("Abstract attributes for liveness are not " + "supported for call sites yet!"); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + return indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; + +/// -------------------- Dereferenceable Argument Attribute -------------------- + +template <> +ChangeStatus clampStateAndIndicateChange(DerefState &S, + const DerefState &R) { + ChangeStatus CS0 = clampStateAndIndicateChange( + S.DerefBytesState, R.DerefBytesState); + ChangeStatus CS1 = + clampStateAndIndicateChange(S.GlobalState, R.GlobalState); + return CS0 | CS1; +} + +struct AADereferenceableImpl : AADereferenceable { + AADereferenceableImpl(const IRPosition &IRP) : AADereferenceable(IRP) {} + using StateType = DerefState; + + void initialize(Attributor &A) override { + SmallVector Attrs; + getAttrs({Attribute::Dereferenceable, Attribute::DereferenceableOrNull}, + Attrs); + for (const Attribute &Attr : Attrs) + takeKnownDerefBytesMaximum(Attr.getValueAsInt()); + + NonNullAA = &A.getAAFor(*this, getIRPosition()); + + const IRPosition &IRP = this->getIRPosition(); + bool IsFnInterface = IRP.isFnInterfaceKind(); + const Function *FnScope = IRP.getAnchorScope(); + if (IsFnInterface && (!FnScope || !FnScope->hasExactDefinition())) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::getState() + /// { + StateType &getState() override { return *this; } + const StateType &getState() const override { return *this; } + /// } + + /// See AAFromMustBeExecutedContext + bool followUse(Attributor &A, const Use *U, const Instruction *I) { + bool IsNonNull = false; + bool TrackUse = false; + int64_t DerefBytes = getKnownNonNullAndDerefBytesForUse( + A, *this, getAssociatedValue(), U, I, IsNonNull, TrackUse); + takeKnownDerefBytesMaximum(DerefBytes); + return TrackUse; + } + + void getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl &Attrs) const override { + // TODO: Add *_globally support + if (isAssumedNonNull()) + Attrs.emplace_back(Attribute::getWithDereferenceableBytes( + Ctx, getAssumedDereferenceableBytes())); + else + Attrs.emplace_back(Attribute::getWithDereferenceableOrNullBytes( + Ctx, getAssumedDereferenceableBytes())); + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + if (!getAssumedDereferenceableBytes()) + return "unknown-dereferenceable"; + return std::string("dereferenceable") + + (isAssumedNonNull() ? "" : "_or_null") + + (isAssumedGlobal() ? "_globally" : "") + "<" + + std::to_string(getKnownDereferenceableBytes()) + "-" + + std::to_string(getAssumedDereferenceableBytes()) + ">"; + } +}; + +/// Dereferenceable attribute for a floating value. +struct AADereferenceableFloating + : AAFromMustBeExecutedContext { + using Base = + AAFromMustBeExecutedContext; + AADereferenceableFloating(const IRPosition &IRP) : Base(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + ChangeStatus Change = Base::updateImpl(A); + + const DataLayout &DL = A.getDataLayout(); + + auto VisitValueCB = [&](Value &V, DerefState &T, bool Stripped) -> bool { + unsigned IdxWidth = + DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace()); + APInt Offset(IdxWidth, 0); + const Value *Base = + V.stripAndAccumulateInBoundsConstantOffsets(DL, Offset); + + const auto &AA = + A.getAAFor(*this, IRPosition::value(*Base)); + int64_t DerefBytes = 0; + if (!Stripped && this == &AA) { + // Use IR information if we did not strip anything. + // TODO: track globally. + bool CanBeNull; + DerefBytes = Base->getPointerDereferenceableBytes(DL, CanBeNull); + T.GlobalState.indicatePessimisticFixpoint(); + } else { + const DerefState &DS = static_cast(AA.getState()); + DerefBytes = DS.DerefBytesState.getAssumed(); + T.GlobalState &= DS.GlobalState; + } + + // For now we do not try to "increase" dereferenceability due to negative + // indices as we first have to come up with code to deal with loops and + // for overflows of the dereferenceable bytes. + int64_t OffsetSExt = Offset.getSExtValue(); + if (OffsetSExt < 0) + OffsetSExt = 0; + + T.takeAssumedDerefBytesMinimum( + std::max(int64_t(0), DerefBytes - OffsetSExt)); + + if (this == &AA) { + if (!Stripped) { + // If nothing was stripped IR information is all we got. + T.takeKnownDerefBytesMaximum( + std::max(int64_t(0), DerefBytes - OffsetSExt)); + T.indicatePessimisticFixpoint(); + } else if (OffsetSExt > 0) { + // If something was stripped but there is circular reasoning we look + // for the offset. If it is positive we basically decrease the + // dereferenceable bytes in a circluar loop now, which will simply + // drive them down to the known value in a very slow way which we + // can accelerate. + T.indicatePessimisticFixpoint(); + } + } + + return T.isValidState(); + }; + + DerefState T; + if (!genericValueTraversal( + A, getIRPosition(), *this, T, VisitValueCB)) + return indicatePessimisticFixpoint(); + + return Change | clampStateAndIndicateChange(getState(), T); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(dereferenceable) + } +}; + +/// Dereferenceable attribute for a return value. +struct AADereferenceableReturned final + : AAReturnedFromReturnedValues { + AADereferenceableReturned(const IRPosition &IRP) + : AAReturnedFromReturnedValues(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FNRET_ATTR(dereferenceable) + } +}; + +/// Dereferenceable attribute for an argument +struct AADereferenceableArgument final + : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext< + AADereferenceable, AADereferenceableImpl, DerefState> { + using Base = AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext< + AADereferenceable, AADereferenceableImpl, DerefState>; + AADereferenceableArgument(const IRPosition &IRP) : Base(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_ARG_ATTR(dereferenceable) + } +}; + +/// Dereferenceable attribute for a call site argument. +struct AADereferenceableCallSiteArgument final : AADereferenceableFloating { + AADereferenceableCallSiteArgument(const IRPosition &IRP) + : AADereferenceableFloating(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(dereferenceable) + } +}; + +/// Dereferenceable attribute deduction for a call site return value. +struct AADereferenceableCallSiteReturned final + : AACallSiteReturnedFromReturnedAndMustBeExecutedContext< + AADereferenceable, AADereferenceableImpl> { + using Base = AACallSiteReturnedFromReturnedAndMustBeExecutedContext< + AADereferenceable, AADereferenceableImpl>; + AADereferenceableCallSiteReturned(const IRPosition &IRP) : Base(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + Base::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + + ChangeStatus Change = Base::updateImpl(A); + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::returned(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return Change | + clampStateAndIndicateChange( + getState(), static_cast(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CS_ATTR(dereferenceable); + } +}; + +// ------------------------ Align Argument Attribute ------------------------ + +struct AAAlignImpl : AAAlign { + AAAlignImpl(const IRPosition &IRP) : AAAlign(IRP) {} + + // Max alignemnt value allowed in IR + static const unsigned MAX_ALIGN = 1U << 29; + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + takeAssumedMinimum(MAX_ALIGN); + + SmallVector Attrs; + getAttrs({Attribute::Alignment}, Attrs); + for (const Attribute &Attr : Attrs) + takeKnownMaximum(Attr.getValueAsInt()); + + if (getIRPosition().isFnInterfaceKind() && + (!getAssociatedFunction() || + !getAssociatedFunction()->hasExactDefinition())) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + ChangeStatus Changed = ChangeStatus::UNCHANGED; + + // Check for users that allow alignment annotations. + Value &AnchorVal = getIRPosition().getAnchorValue(); + for (const Use &U : AnchorVal.uses()) { + if (auto *SI = dyn_cast(U.getUser())) { + if (SI->getPointerOperand() == &AnchorVal) + if (SI->getAlignment() < getAssumedAlign()) { + STATS_DECLTRACK(AAAlign, Store, + "Number of times alignemnt added to a store"); + SI->setAlignment(Align(getAssumedAlign())); + Changed = ChangeStatus::CHANGED; + } + } else if (auto *LI = dyn_cast(U.getUser())) { + if (LI->getPointerOperand() == &AnchorVal) + if (LI->getAlignment() < getAssumedAlign()) { + LI->setAlignment(Align(getAssumedAlign())); + STATS_DECLTRACK(AAAlign, Load, + "Number of times alignemnt added to a load"); + Changed = ChangeStatus::CHANGED; + } + } + } + + return AAAlign::manifest(A) | Changed; + } + + // TODO: Provide a helper to determine the implied ABI alignment and check in + // the existing manifest method and a new one for AAAlignImpl that value + // to avoid making the alignment explicit if it did not improve. + + /// See AbstractAttribute::getDeducedAttributes + virtual void + getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl &Attrs) const override { + if (getAssumedAlign() > 1) + Attrs.emplace_back( + Attribute::getWithAlignment(Ctx, Align(getAssumedAlign()))); + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return getAssumedAlign() ? ("align<" + std::to_string(getKnownAlign()) + + "-" + std::to_string(getAssumedAlign()) + ">") + : "unknown-align"; + } +}; + +/// Align attribute for a floating value. +struct AAAlignFloating : AAAlignImpl { + AAAlignFloating(const IRPosition &IRP) : AAAlignImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + const DataLayout &DL = A.getDataLayout(); + + auto VisitValueCB = [&](Value &V, AAAlign::StateType &T, + bool Stripped) -> bool { + const auto &AA = A.getAAFor(*this, IRPosition::value(V)); + if (!Stripped && this == &AA) { + // Use only IR information if we did not strip anything. + const MaybeAlign PA = V.getPointerAlignment(DL); + T.takeKnownMaximum(PA ? PA->value() : 0); + T.indicatePessimisticFixpoint(); + } else { + // Use abstract attribute information. + const AAAlign::StateType &DS = + static_cast(AA.getState()); + T ^= DS; + } + return T.isValidState(); + }; + + StateType T; + if (!genericValueTraversal(A, getIRPosition(), *this, T, + VisitValueCB)) + return indicatePessimisticFixpoint(); + + // TODO: If we know we visited all incoming values, thus no are assumed + // dead, we can take the known information from the state T. + return clampStateAndIndicateChange(getState(), T); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FLOATING_ATTR(align) } +}; + +/// Align attribute for function return value. +struct AAAlignReturned final + : AAReturnedFromReturnedValues { + AAAlignReturned(const IRPosition &IRP) + : AAReturnedFromReturnedValues(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(aligned) } +}; + +/// Align attribute for function argument. +struct AAAlignArgument final + : AAArgumentFromCallSiteArguments { + AAAlignArgument(const IRPosition &IRP) + : AAArgumentFromCallSiteArguments(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(aligned) } +}; + +struct AAAlignCallSiteArgument final : AAAlignFloating { + AAAlignCallSiteArgument(const IRPosition &IRP) : AAAlignFloating(IRP) {} + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + return AAAlignImpl::manifest(A); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(aligned) } +}; + +/// Align attribute deduction for a call site return value. +struct AAAlignCallSiteReturned final : AAAlignImpl { + AAAlignCallSiteReturned(const IRPosition &IRP) : AAAlignImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAAlignImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::returned(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + getState(), static_cast(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); } +}; + +/// ------------------ Function No-Return Attribute ---------------------------- +struct AANoReturnImpl : public AANoReturn { + AANoReturnImpl(const IRPosition &IRP) : AANoReturn(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoReturn::initialize(A); + Function *F = getAssociatedFunction(); + if (!F || F->hasFnAttribute(Attribute::WillReturn)) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return getAssumed() ? "noreturn" : "may-return"; + } + + /// See AbstractAttribute::updateImpl(Attributor &A). + virtual ChangeStatus updateImpl(Attributor &A) override { + const auto &WillReturnAA = A.getAAFor(*this, getIRPosition()); + if (WillReturnAA.isKnownWillReturn()) + return indicatePessimisticFixpoint(); + auto CheckForNoReturn = [](Instruction &) { return false; }; + if (!A.checkForAllInstructions(CheckForNoReturn, *this, + {(unsigned)Instruction::Ret})) + return indicatePessimisticFixpoint(); + return ChangeStatus::UNCHANGED; + } +}; + +struct AANoReturnFunction final : AANoReturnImpl { + AANoReturnFunction(const IRPosition &IRP) : AANoReturnImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(noreturn) } +}; + +/// NoReturn attribute deduction for a call sites. +struct AANoReturnCallSite final : AANoReturnImpl { + AANoReturnCallSite(const IRPosition &IRP) : AANoReturnImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + getState(), + static_cast(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(noreturn); } +}; + +/// ----------------------- Variable Capturing --------------------------------- + +/// A class to hold the state of for no-capture attributes. +struct AANoCaptureImpl : public AANoCapture { + AANoCaptureImpl(const IRPosition &IRP) : AANoCapture(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoCapture::initialize(A); + + // You cannot "capture" null in the default address space. + if (isa(getAssociatedValue()) && + getAssociatedValue().getType()->getPointerAddressSpace() == 0) { + indicateOptimisticFixpoint(); + return; + } + + const IRPosition &IRP = getIRPosition(); + const Function *F = + getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope(); + + // Check what state the associated function can actually capture. + if (F) + determineFunctionCaptureCapabilities(IRP, *F, *this); + else + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override; + + /// see AbstractAttribute::isAssumedNoCaptureMaybeReturned(...). + virtual void + getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl &Attrs) const override { + if (!isAssumedNoCaptureMaybeReturned()) + return; + + if (getArgNo() >= 0) { + if (isAssumedNoCapture()) + Attrs.emplace_back(Attribute::get(Ctx, Attribute::NoCapture)); + else if (ManifestInternal) + Attrs.emplace_back(Attribute::get(Ctx, "no-capture-maybe-returned")); + } + } + + /// Set the NOT_CAPTURED_IN_MEM and NOT_CAPTURED_IN_RET bits in \p Known + /// depending on the ability of the function associated with \p IRP to capture + /// state in memory and through "returning/throwing", respectively. + static void determineFunctionCaptureCapabilities(const IRPosition &IRP, + const Function &F, + IntegerState &State) { + // TODO: Once we have memory behavior attributes we should use them here. + + // If we know we cannot communicate or write to memory, we do not care about + // ptr2int anymore. + if (F.onlyReadsMemory() && F.doesNotThrow() && + F.getReturnType()->isVoidTy()) { + State.addKnownBits(NO_CAPTURE); + return; + } + + // A function cannot capture state in memory if it only reads memory, it can + // however return/throw state and the state might be influenced by the + // pointer value, e.g., loading from a returned pointer might reveal a bit. + if (F.onlyReadsMemory()) + State.addKnownBits(NOT_CAPTURED_IN_MEM); + + // A function cannot communicate state back if it does not through + // exceptions and doesn not return values. + if (F.doesNotThrow() && F.getReturnType()->isVoidTy()) + State.addKnownBits(NOT_CAPTURED_IN_RET); + + // Check existing "returned" attributes. + int ArgNo = IRP.getArgNo(); + if (F.doesNotThrow() && ArgNo >= 0) { + for (unsigned u = 0, e = F.arg_size(); u< e; ++u) + if (F.hasParamAttribute(u, Attribute::Returned)) { + if (u == unsigned(ArgNo)) + State.removeAssumedBits(NOT_CAPTURED_IN_RET); + else if (F.onlyReadsMemory()) + State.addKnownBits(NO_CAPTURE); + else + State.addKnownBits(NOT_CAPTURED_IN_RET); + break; + } + } + } + + /// See AbstractState::getAsStr(). + const std::string getAsStr() const override { + if (isKnownNoCapture()) + return "known not-captured"; + if (isAssumedNoCapture()) + return "assumed not-captured"; + if (isKnownNoCaptureMaybeReturned()) + return "known not-captured-maybe-returned"; + if (isAssumedNoCaptureMaybeReturned()) + return "assumed not-captured-maybe-returned"; + return "assumed-captured"; + } +}; + +/// Attributor-aware capture tracker. +struct AACaptureUseTracker final : public CaptureTracker { + + /// Create a capture tracker that can lookup in-flight abstract attributes + /// through the Attributor \p A. + /// + /// If a use leads to a potential capture, \p CapturedInMemory is set and the + /// search is stopped. If a use leads to a return instruction, + /// \p CommunicatedBack is set to true and \p CapturedInMemory is not changed. + /// If a use leads to a ptr2int which may capture the value, + /// \p CapturedInInteger is set. If a use is found that is currently assumed + /// "no-capture-maybe-returned", the user is added to the \p PotentialCopies + /// set. All values in \p PotentialCopies are later tracked as well. For every + /// explored use we decrement \p RemainingUsesToExplore. Once it reaches 0, + /// the search is stopped with \p CapturedInMemory and \p CapturedInInteger + /// conservatively set to true. + AACaptureUseTracker(Attributor &A, AANoCapture &NoCaptureAA, + const AAIsDead &IsDeadAA, IntegerState &State, + SmallVectorImpl &PotentialCopies, + unsigned &RemainingUsesToExplore) + : A(A), NoCaptureAA(NoCaptureAA), IsDeadAA(IsDeadAA), State(State), + PotentialCopies(PotentialCopies), + RemainingUsesToExplore(RemainingUsesToExplore) {} + + /// Determine if \p V maybe captured. *Also updates the state!* + bool valueMayBeCaptured(const Value *V) { + if (V->getType()->isPointerTy()) { + PointerMayBeCaptured(V, this); + } else { + State.indicatePessimisticFixpoint(); + } + return State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED); + } + + /// See CaptureTracker::tooManyUses(). + void tooManyUses() override { + State.removeAssumedBits(AANoCapture::NO_CAPTURE); + } + + bool isDereferenceableOrNull(Value *O, const DataLayout &DL) override { + if (CaptureTracker::isDereferenceableOrNull(O, DL)) + return true; + const auto &DerefAA = + A.getAAFor(NoCaptureAA, IRPosition::value(*O)); + return DerefAA.getAssumedDereferenceableBytes(); + } + + /// See CaptureTracker::captured(...). + bool captured(const Use *U) override { + Instruction *UInst = cast(U->getUser()); + LLVM_DEBUG(dbgs() << "Check use: " << *U->get() << " in " << *UInst + << "\n"); + + // Because we may reuse the tracker multiple times we keep track of the + // number of explored uses ourselves as well. + if (RemainingUsesToExplore-- == 0) { + LLVM_DEBUG(dbgs() << " - too many uses to explore!\n"); + return isCapturedIn(/* Memory */ true, /* Integer */ true, + /* Return */ true); + } + + // Deal with ptr2int by following uses. + if (isa(UInst)) { + LLVM_DEBUG(dbgs() << " - ptr2int assume the worst!\n"); + return valueMayBeCaptured(UInst); + } + + // Explicitly catch return instructions. + if (isa(UInst)) + return isCapturedIn(/* Memory */ false, /* Integer */ false, + /* Return */ true); + + // For now we only use special logic for call sites. However, the tracker + // itself knows about a lot of other non-capturing cases already. + CallSite CS(UInst); + if (!CS || !CS.isArgOperand(U)) + return isCapturedIn(/* Memory */ true, /* Integer */ true, + /* Return */ true); + + unsigned ArgNo = CS.getArgumentNo(U); + const IRPosition &CSArgPos = IRPosition::callsite_argument(CS, ArgNo); + // If we have a abstract no-capture attribute for the argument we can use + // it to justify a non-capture attribute here. This allows recursion! + auto &ArgNoCaptureAA = A.getAAFor(NoCaptureAA, CSArgPos); + if (ArgNoCaptureAA.isAssumedNoCapture()) + return isCapturedIn(/* Memory */ false, /* Integer */ false, + /* Return */ false); + if (ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) { + addPotentialCopy(CS); + return isCapturedIn(/* Memory */ false, /* Integer */ false, + /* Return */ false); + } + + // Lastly, we could not find a reason no-capture can be assumed so we don't. + return isCapturedIn(/* Memory */ true, /* Integer */ true, + /* Return */ true); + } + + /// Register \p CS as potential copy of the value we are checking. + void addPotentialCopy(CallSite CS) { + PotentialCopies.push_back(CS.getInstruction()); + } + + /// See CaptureTracker::shouldExplore(...). + bool shouldExplore(const Use *U) override { + // Check liveness. + return !IsDeadAA.isAssumedDead(cast(U->getUser())); + } + + /// Update the state according to \p CapturedInMem, \p CapturedInInt, and + /// \p CapturedInRet, then return the appropriate value for use in the + /// CaptureTracker::captured() interface. + bool isCapturedIn(bool CapturedInMem, bool CapturedInInt, + bool CapturedInRet) { + LLVM_DEBUG(dbgs() << " - captures [Mem " << CapturedInMem << "|Int " + << CapturedInInt << "|Ret " << CapturedInRet << "]\n"); + if (CapturedInMem) + State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_MEM); + if (CapturedInInt) + State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_INT); + if (CapturedInRet) + State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_RET); + return !State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED); + } + +private: + /// The attributor providing in-flight abstract attributes. + Attributor &A; + + /// The abstract attribute currently updated. + AANoCapture &NoCaptureAA; + + /// The abstract liveness state. + const AAIsDead &IsDeadAA; + + /// The state currently updated. + IntegerState &State; + + /// Set of potential copies of the tracked value. + SmallVectorImpl &PotentialCopies; + + /// Global counter to limit the number of explored uses. + unsigned &RemainingUsesToExplore; +}; + +ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) { + const IRPosition &IRP = getIRPosition(); + const Value *V = + getArgNo() >= 0 ? IRP.getAssociatedArgument() : &IRP.getAssociatedValue(); + if (!V) + return indicatePessimisticFixpoint(); + + const Function *F = + getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope(); + assert(F && "Expected a function!"); + const IRPosition &FnPos = IRPosition::function(*F); + const auto &IsDeadAA = A.getAAFor(*this, FnPos); + + AANoCapture::StateType T; + + // Readonly means we cannot capture through memory. + const auto &FnMemAA = A.getAAFor(*this, FnPos); + if (FnMemAA.isAssumedReadOnly()) { + T.addKnownBits(NOT_CAPTURED_IN_MEM); + if (FnMemAA.isKnownReadOnly()) + addKnownBits(NOT_CAPTURED_IN_MEM); + } + + // Make sure all returned values are different than the underlying value. + // TODO: we could do this in a more sophisticated way inside + // AAReturnedValues, e.g., track all values that escape through returns + // directly somehow. + auto CheckReturnedArgs = [&](const AAReturnedValues &RVAA) { + bool SeenConstant = false; + for (auto &It : RVAA.returned_values()) { + if (isa(It.first)) { + if (SeenConstant) + return false; + SeenConstant = true; + } else if (!isa(It.first) || + It.first == getAssociatedArgument()) + return false; + } + return true; + }; + + const auto &NoUnwindAA = A.getAAFor(*this, FnPos); + if (NoUnwindAA.isAssumedNoUnwind()) { + bool IsVoidTy = F->getReturnType()->isVoidTy(); + const AAReturnedValues *RVAA = + IsVoidTy ? nullptr : &A.getAAFor(*this, FnPos); + if (IsVoidTy || CheckReturnedArgs(*RVAA)) { + T.addKnownBits(NOT_CAPTURED_IN_RET); + if (T.isKnown(NOT_CAPTURED_IN_MEM)) + return ChangeStatus::UNCHANGED; + if (NoUnwindAA.isKnownNoUnwind() && + (IsVoidTy || RVAA->getState().isAtFixpoint())) { + addKnownBits(NOT_CAPTURED_IN_RET); + if (isKnown(NOT_CAPTURED_IN_MEM)) + return indicateOptimisticFixpoint(); } } } + // Use the CaptureTracker interface and logic with the specialized tracker, + // defined in AACaptureUseTracker, that can look at in-flight abstract + // attributes and directly updates the assumed state. + SmallVector PotentialCopies; + unsigned RemainingUsesToExplore = DefaultMaxUsesToExplore; + AACaptureUseTracker Tracker(A, *this, IsDeadAA, T, PotentialCopies, + RemainingUsesToExplore); + + // Check all potential copies of the associated value until we can assume + // none will be captured or we have to assume at least one might be. + unsigned Idx = 0; + PotentialCopies.push_back(V); + while (T.isAssumed(NO_CAPTURE_MAYBE_RETURNED) && Idx < PotentialCopies.size()) + Tracker.valueMayBeCaptured(PotentialCopies[Idx++]); + + AAAlign::StateType &S = getState(); + auto Assumed = S.getAssumed(); + S.intersectAssumedBits(T.getAssumed()); + return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED + : ChangeStatus::CHANGED; +} + +/// NoCapture attribute for function arguments. +struct AANoCaptureArgument final : AANoCaptureImpl { + AANoCaptureArgument(const IRPosition &IRP) : AANoCaptureImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nocapture) } +}; + +/// NoCapture attribute for call site arguments. +struct AANoCaptureCallSiteArgument final : AANoCaptureImpl { + AANoCaptureCallSiteArgument(const IRPosition &IRP) : AANoCaptureImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Argument *Arg = getAssociatedArgument(); + if (!Arg) + return indicatePessimisticFixpoint(); + const IRPosition &ArgPos = IRPosition::argument(*Arg); + auto &ArgAA = A.getAAFor(*this, ArgPos); + return clampStateAndIndicateChange( + getState(), + static_cast(ArgAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nocapture)}; +}; + +/// NoCapture attribute for floating values. +struct AANoCaptureFloating final : AANoCaptureImpl { + AANoCaptureFloating(const IRPosition &IRP) : AANoCaptureImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(nocapture) + } +}; + +/// NoCapture attribute for function return value. +struct AANoCaptureReturned final : AANoCaptureImpl { + AANoCaptureReturned(const IRPosition &IRP) : AANoCaptureImpl(IRP) { + llvm_unreachable("NoCapture is not applicable to function returns!"); + } + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + llvm_unreachable("NoCapture is not applicable to function returns!"); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable("NoCapture is not applicable to function returns!"); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; + +/// NoCapture attribute deduction for a call site return value. +struct AANoCaptureCallSiteReturned final : AANoCaptureImpl { + AANoCaptureCallSiteReturned(const IRPosition &IRP) : AANoCaptureImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSRET_ATTR(nocapture) + } +}; + +/// ------------------ Value Simplify Attribute ---------------------------- +struct AAValueSimplifyImpl : AAValueSimplify { + AAValueSimplifyImpl(const IRPosition &IRP) : AAValueSimplify(IRP) {} + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return getAssumed() ? (getKnown() ? "simplified" : "maybe-simple") + : "not-simple"; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} + + /// See AAValueSimplify::getAssumedSimplifiedValue() + Optional getAssumedSimplifiedValue(Attributor &A) const override { + if (!getAssumed()) + return const_cast(&getAssociatedValue()); + return SimplifiedAssociatedValue; + } + void initialize(Attributor &A) override {} + + /// Helper function for querying AAValueSimplify and updating candicate. + /// \param QueryingValue Value trying to unify with SimplifiedValue + /// \param AccumulatedSimplifiedValue Current simplification result. + static bool checkAndUpdate(Attributor &A, const AbstractAttribute &QueryingAA, + Value &QueryingValue, + Optional &AccumulatedSimplifiedValue) { + // FIXME: Add a typecast support. + + auto &ValueSimpifyAA = A.getAAFor( + QueryingAA, IRPosition::value(QueryingValue)); + + Optional QueryingValueSimplified = + ValueSimpifyAA.getAssumedSimplifiedValue(A); + + if (!QueryingValueSimplified.hasValue()) + return true; + + if (!QueryingValueSimplified.getValue()) + return false; + + Value &QueryingValueSimplifiedUnwrapped = + *QueryingValueSimplified.getValue(); + + if (isa(QueryingValueSimplifiedUnwrapped)) + return true; + + if (AccumulatedSimplifiedValue.hasValue()) + return AccumulatedSimplifiedValue == QueryingValueSimplified; + + LLVM_DEBUG(dbgs() << "[Attributor][ValueSimplify] " << QueryingValue + << " is assumed to be " + << QueryingValueSimplifiedUnwrapped << "\n"); + + AccumulatedSimplifiedValue = QueryingValueSimplified; + return true; + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + ChangeStatus Changed = ChangeStatus::UNCHANGED; + + if (!SimplifiedAssociatedValue.hasValue() || + !SimplifiedAssociatedValue.getValue()) + return Changed; + + if (auto *C = dyn_cast(SimplifiedAssociatedValue.getValue())) { + // We can replace the AssociatedValue with the constant. + Value &V = getAssociatedValue(); + if (!V.user_empty() && &V != C && V.getType() == C->getType()) { + LLVM_DEBUG(dbgs() << "[Attributor][ValueSimplify] " << V << " -> " << *C + << "\n"); + V.replaceAllUsesWith(C); + Changed = ChangeStatus::CHANGED; + } + } + + return Changed | AAValueSimplify::manifest(A); + } + +protected: + // An assumed simplified value. Initially, it is set to Optional::None, which + // means that the value is not clear under current assumption. If in the + // pessimistic state, getAssumedSimplifiedValue doesn't return this value but + // returns orignal associated value. + Optional SimplifiedAssociatedValue; +}; + +struct AAValueSimplifyArgument final : AAValueSimplifyImpl { + AAValueSimplifyArgument(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + bool HasValueBefore = SimplifiedAssociatedValue.hasValue(); + + auto PredForCallSite = [&](AbstractCallSite ACS) { + // Check if we have an associated argument or not (which can happen for + // callback calls). + if (Value *ArgOp = ACS.getCallArgOperand(getArgNo())) + return checkAndUpdate(A, *this, *ArgOp, SimplifiedAssociatedValue); + return false; + }; + + if (!A.checkForAllCallSites(PredForCallSite, *this, true)) + return indicatePessimisticFixpoint(); + + // If a candicate was found in this update, return CHANGED. + return HasValueBefore == SimplifiedAssociatedValue.hasValue() + ? ChangeStatus::UNCHANGED + : ChangeStatus ::CHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_ARG_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyReturned : AAValueSimplifyImpl { + AAValueSimplifyReturned(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + bool HasValueBefore = SimplifiedAssociatedValue.hasValue(); + + auto PredForReturned = [&](Value &V) { + return checkAndUpdate(A, *this, V, SimplifiedAssociatedValue); + }; + + if (!A.checkForAllReturnedValues(PredForReturned, *this)) + return indicatePessimisticFixpoint(); + + // If a candicate was found in this update, return CHANGED. + return HasValueBefore == SimplifiedAssociatedValue.hasValue() + ? ChangeStatus::UNCHANGED + : ChangeStatus ::CHANGED; + } + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FNRET_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyFloating : AAValueSimplifyImpl { + AAValueSimplifyFloating(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + Value &V = getAnchorValue(); + + // TODO: add other stuffs + if (isa(V) || isa(V)) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + bool HasValueBefore = SimplifiedAssociatedValue.hasValue(); + + auto VisitValueCB = [&](Value &V, BooleanState, bool Stripped) -> bool { + auto &AA = A.getAAFor(*this, IRPosition::value(V)); + if (!Stripped && this == &AA) { + // TODO: Look the instruction and check recursively. + LLVM_DEBUG( + dbgs() << "[Attributor][ValueSimplify] Can't be stripped more : " + << V << "\n"); + indicatePessimisticFixpoint(); + return false; + } + return checkAndUpdate(A, *this, V, SimplifiedAssociatedValue); + }; + + if (!genericValueTraversal( + A, getIRPosition(), *this, static_cast(*this), + VisitValueCB)) + return indicatePessimisticFixpoint(); + + // If a candicate was found in this update, return CHANGED. + + return HasValueBefore == SimplifiedAssociatedValue.hasValue() + ? ChangeStatus::UNCHANGED + : ChangeStatus ::CHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyFunction : AAValueSimplifyImpl { + AAValueSimplifyFunction(const IRPosition &IRP) : AAValueSimplifyImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + SimplifiedAssociatedValue = &getAnchorValue(); + indicateOptimisticFixpoint(); + } + /// See AbstractAttribute::initialize(...). + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable( + "AAValueSimplify(Function|CallSite)::updateImpl will not be called"); + } + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FN_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyCallSite : AAValueSimplifyFunction { + AAValueSimplifyCallSite(const IRPosition &IRP) + : AAValueSimplifyFunction(IRP) {} + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CS_ATTR(value_simplify) + } +}; + +struct AAValueSimplifyCallSiteReturned : AAValueSimplifyReturned { + AAValueSimplifyCallSiteReturned(const IRPosition &IRP) + : AAValueSimplifyReturned(IRP) {} + + void trackStatistics() const override { + STATS_DECLTRACK_CSRET_ATTR(value_simplify) + } +}; +struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating { + AAValueSimplifyCallSiteArgument(const IRPosition &IRP) + : AAValueSimplifyFloating(IRP) {} + + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(value_simplify) + } +}; + +/// ----------------------- Heap-To-Stack Conversion --------------------------- +struct AAHeapToStackImpl : public AAHeapToStack { + AAHeapToStackImpl(const IRPosition &IRP) : AAHeapToStack(IRP) {} + + const std::string getAsStr() const override { + return "[H2S] Mallocs: " + std::to_string(MallocCalls.size()); + } + + ChangeStatus manifest(Attributor &A) override { + assert(getState().isValidState() && + "Attempted to manifest an invalid state!"); + + ChangeStatus HasChanged = ChangeStatus::UNCHANGED; + Function *F = getAssociatedFunction(); + const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); + + for (Instruction *MallocCall : MallocCalls) { + // This malloc cannot be replaced. + if (BadMallocCalls.count(MallocCall)) + continue; + + for (Instruction *FreeCall : FreesForMalloc[MallocCall]) { + LLVM_DEBUG(dbgs() << "H2S: Removing free call: " << *FreeCall << "\n"); + A.deleteAfterManifest(*FreeCall); + HasChanged = ChangeStatus::CHANGED; + } + + LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall + << "\n"); + + Constant *Size; + if (isCallocLikeFn(MallocCall, TLI)) { + auto *Num = cast(MallocCall->getOperand(0)); + auto *SizeT = dyn_cast(MallocCall->getOperand(1)); + APInt TotalSize = SizeT->getValue() * Num->getValue(); + Size = + ConstantInt::get(MallocCall->getOperand(0)->getType(), TotalSize); + } else { + Size = cast(MallocCall->getOperand(0)); + } + + unsigned AS = cast(MallocCall->getType())->getAddressSpace(); + Instruction *AI = new AllocaInst(Type::getInt8Ty(F->getContext()), AS, + Size, "", MallocCall->getNextNode()); + + if (AI->getType() != MallocCall->getType()) + AI = new BitCastInst(AI, MallocCall->getType(), "malloc_bc", + AI->getNextNode()); + + MallocCall->replaceAllUsesWith(AI); + + if (auto *II = dyn_cast(MallocCall)) { + auto *NBB = II->getNormalDest(); + BranchInst::Create(NBB, MallocCall->getParent()); + A.deleteAfterManifest(*MallocCall); + } else { + A.deleteAfterManifest(*MallocCall); + } + + if (isCallocLikeFn(MallocCall, TLI)) { + auto *BI = new BitCastInst(AI, MallocCall->getType(), "calloc_bc", + AI->getNextNode()); + Value *Ops[] = { + BI, ConstantInt::get(F->getContext(), APInt(8, 0, false)), Size, + ConstantInt::get(Type::getInt1Ty(F->getContext()), false)}; + + Type *Tys[] = {BI->getType(), MallocCall->getOperand(0)->getType()}; + Module *M = F->getParent(); + Function *Fn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys); + CallInst::Create(Fn, Ops, "", BI->getNextNode()); + } + HasChanged = ChangeStatus::CHANGED; + } + + return HasChanged; + } + + /// Collection of all malloc calls in a function. + SmallSetVector MallocCalls; + + /// Collection of malloc calls that cannot be converted. + DenseSet BadMallocCalls; + + /// A map for each malloc call to the set of associated free calls. + DenseMap> FreesForMalloc; + + ChangeStatus updateImpl(Attributor &A) override; +}; + +ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) { + const Function *F = getAssociatedFunction(); + const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); + + auto UsesCheck = [&](Instruction &I) { + SmallPtrSet Visited; + SmallVector Worklist; + + for (Use &U : I.uses()) + Worklist.push_back(&U); + + while (!Worklist.empty()) { + const Use *U = Worklist.pop_back_val(); + if (!Visited.insert(U).second) + continue; + + auto *UserI = U->getUser(); + + if (isa(UserI)) + continue; + if (auto *SI = dyn_cast(UserI)) { + if (SI->getValueOperand() == U->get()) { + LLVM_DEBUG(dbgs() << "[H2S] escaping store to memory: " << *UserI << "\n"); + return false; + } + // A store into the malloc'ed memory is fine. + continue; + } + + // NOTE: Right now, if a function that has malloc pointer as an argument + // frees memory, we assume that the malloc pointer is freed. + + // TODO: Add nofree callsite argument attribute to indicate that pointer + // argument is not freed. + if (auto *CB = dyn_cast(UserI)) { + if (!CB->isArgOperand(U)) + continue; + + if (CB->isLifetimeStartOrEnd()) + continue; + + // Record malloc. + if (isFreeCall(UserI, TLI)) { + FreesForMalloc[&I].insert( + cast(const_cast(UserI))); + continue; + } + + // If a function does not free memory we are fine + const auto &NoFreeAA = + A.getAAFor(*this, IRPosition::callsite_function(*CB)); + + unsigned ArgNo = U - CB->arg_begin(); + const auto &NoCaptureAA = A.getAAFor( + *this, IRPosition::callsite_argument(*CB, ArgNo)); + + if (!NoCaptureAA.isAssumedNoCapture() || !NoFreeAA.isAssumedNoFree()) { + LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n"); + return false; + } + continue; + } + + if (isa(UserI) || isa(UserI)) { + for (Use &U : UserI->uses()) + Worklist.push_back(&U); + continue; + } + + // Unknown user. + LLVM_DEBUG(dbgs() << "[H2S] Unknown user: " << *UserI << "\n"); + return false; + } + return true; + }; + + auto MallocCallocCheck = [&](Instruction &I) { + if (BadMallocCalls.count(&I)) + return true; + + bool IsMalloc = isMallocLikeFn(&I, TLI); + bool IsCalloc = !IsMalloc && isCallocLikeFn(&I, TLI); + if (!IsMalloc && !IsCalloc) { + BadMallocCalls.insert(&I); + return true; + } + + if (IsMalloc) { + if (auto *Size = dyn_cast(I.getOperand(0))) + if (Size->getValue().sle(MaxHeapToStackSize)) + if (UsesCheck(I)) { + MallocCalls.insert(&I); + return true; + } + } else if (IsCalloc) { + bool Overflow = false; + if (auto *Num = dyn_cast(I.getOperand(0))) + if (auto *Size = dyn_cast(I.getOperand(1))) + if ((Size->getValue().umul_ov(Num->getValue(), Overflow)) + .sle(MaxHeapToStackSize)) + if (!Overflow && UsesCheck(I)) { + MallocCalls.insert(&I); + return true; + } + } + + BadMallocCalls.insert(&I); + return true; + }; + + size_t NumBadMallocs = BadMallocCalls.size(); + + A.checkForAllCallLikeInstructions(MallocCallocCheck, *this); + + if (NumBadMallocs != BadMallocCalls.size()) + return ChangeStatus::CHANGED; + return ChangeStatus::UNCHANGED; } +struct AAHeapToStackFunction final : public AAHeapToStackImpl { + AAHeapToStackFunction(const IRPosition &IRP) : AAHeapToStackImpl(IRP) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECL(MallocCalls, Function, + "Number of MallocCalls converted to allocas"); + BUILD_STAT_NAME(MallocCalls, Function) += MallocCalls.size(); + } +}; + +/// -------------------- Memory Behavior Attributes ---------------------------- +/// Includes read-none, read-only, and write-only. +/// ---------------------------------------------------------------------------- +struct AAMemoryBehaviorImpl : public AAMemoryBehavior { + AAMemoryBehaviorImpl(const IRPosition &IRP) : AAMemoryBehavior(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + intersectAssumedBits(BEST_STATE); + getKnownStateFromValue(getIRPosition(), getState()); + IRAttribute::initialize(A); + } + + /// Return the memory behavior information encoded in the IR for \p IRP. + static void getKnownStateFromValue(const IRPosition &IRP, + IntegerState &State) { + SmallVector Attrs; + IRP.getAttrs(AttrKinds, Attrs); + for (const Attribute &Attr : Attrs) { + switch (Attr.getKindAsEnum()) { + case Attribute::ReadNone: + State.addKnownBits(NO_ACCESSES); + break; + case Attribute::ReadOnly: + State.addKnownBits(NO_WRITES); + break; + case Attribute::WriteOnly: + State.addKnownBits(NO_READS); + break; + default: + llvm_unreachable("Unexpcted attribute!"); + } + } + + if (auto *I = dyn_cast(&IRP.getAnchorValue())) { + if (!I->mayReadFromMemory()) + State.addKnownBits(NO_READS); + if (!I->mayWriteToMemory()) + State.addKnownBits(NO_WRITES); + } + } + + /// See AbstractAttribute::getDeducedAttributes(...). + void getDeducedAttributes(LLVMContext &Ctx, + SmallVectorImpl &Attrs) const override { + assert(Attrs.size() == 0); + if (isAssumedReadNone()) + Attrs.push_back(Attribute::get(Ctx, Attribute::ReadNone)); + else if (isAssumedReadOnly()) + Attrs.push_back(Attribute::get(Ctx, Attribute::ReadOnly)); + else if (isAssumedWriteOnly()) + Attrs.push_back(Attribute::get(Ctx, Attribute::WriteOnly)); + assert(Attrs.size() <= 1); + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + IRPosition &IRP = getIRPosition(); + + // Check if we would improve the existing attributes first. + SmallVector DeducedAttrs; + getDeducedAttributes(IRP.getAnchorValue().getContext(), DeducedAttrs); + if (llvm::all_of(DeducedAttrs, [&](const Attribute &Attr) { + return IRP.hasAttr(Attr.getKindAsEnum(), + /* IgnoreSubsumingPositions */ true); + })) + return ChangeStatus::UNCHANGED; + + // Clear existing attributes. + IRP.removeAttrs(AttrKinds); + + // Use the generic manifest method. + return IRAttribute::manifest(A); + } + + /// See AbstractState::getAsStr(). + const std::string getAsStr() const override { + if (isAssumedReadNone()) + return "readnone"; + if (isAssumedReadOnly()) + return "readonly"; + if (isAssumedWriteOnly()) + return "writeonly"; + return "may-read/write"; + } + + /// The set of IR attributes AAMemoryBehavior deals with. + static const Attribute::AttrKind AttrKinds[3]; +}; + +const Attribute::AttrKind AAMemoryBehaviorImpl::AttrKinds[] = { + Attribute::ReadNone, Attribute::ReadOnly, Attribute::WriteOnly}; + +/// Memory behavior attribute for a floating value. +struct AAMemoryBehaviorFloating : AAMemoryBehaviorImpl { + AAMemoryBehaviorFloating(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAMemoryBehaviorImpl::initialize(A); + // Initialize the use vector with all direct uses of the associated value. + for (const Use &U : getAssociatedValue().uses()) + Uses.insert(&U); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override; + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_FLOATING_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_FLOATING_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_FLOATING_ATTR(writeonly) + } + +private: + /// Return true if users of \p UserI might access the underlying + /// variable/location described by \p U and should therefore be analyzed. + bool followUsersOfUseIn(Attributor &A, const Use *U, + const Instruction *UserI); + + /// Update the state according to the effect of use \p U in \p UserI. + void analyzeUseIn(Attributor &A, const Use *U, const Instruction *UserI); + +protected: + /// Container for (transitive) uses of the associated argument. + SetVector Uses; +}; + +/// Memory behavior attribute for function argument. +struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating { + AAMemoryBehaviorArgument(const IRPosition &IRP) + : AAMemoryBehaviorFloating(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAMemoryBehaviorFloating::initialize(A); + + // Initialize the use vector with all direct uses of the associated value. + Argument *Arg = getAssociatedArgument(); + if (!Arg || !Arg->getParent()->hasExactDefinition()) + indicatePessimisticFixpoint(); + } + + ChangeStatus manifest(Attributor &A) override { + // TODO: From readattrs.ll: "inalloca parameters are always + // considered written" + if (hasAttr({Attribute::InAlloca})) { + removeKnownBits(NO_WRITES); + removeAssumedBits(NO_WRITES); + } + return AAMemoryBehaviorFloating::manifest(A); + } + + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_ARG_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_ARG_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_ARG_ATTR(writeonly) + } +}; + +struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument { + AAMemoryBehaviorCallSiteArgument(const IRPosition &IRP) + : AAMemoryBehaviorArgument(IRP) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Argument *Arg = getAssociatedArgument(); + const IRPosition &ArgPos = IRPosition::argument(*Arg); + auto &ArgAA = A.getAAFor(*this, ArgPos); + return clampStateAndIndicateChange( + getState(), + static_cast(ArgAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_CSARG_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_CSARG_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_CSARG_ATTR(writeonly) + } +}; + +/// Memory behavior attribute for a call site return position. +struct AAMemoryBehaviorCallSiteReturned final : AAMemoryBehaviorFloating { + AAMemoryBehaviorCallSiteReturned(const IRPosition &IRP) + : AAMemoryBehaviorFloating(IRP) {} + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + // We do not annotate returned values. + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; + +/// An AA to represent the memory behavior function attributes. +struct AAMemoryBehaviorFunction final : public AAMemoryBehaviorImpl { + AAMemoryBehaviorFunction(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {} + + /// See AbstractAttribute::updateImpl(Attributor &A). + virtual ChangeStatus updateImpl(Attributor &A) override; + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + Function &F = cast(getAnchorValue()); + if (isAssumedReadNone()) { + F.removeFnAttr(Attribute::ArgMemOnly); + F.removeFnAttr(Attribute::InaccessibleMemOnly); + F.removeFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + } + return AAMemoryBehaviorImpl::manifest(A); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_FN_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_FN_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_FN_ATTR(writeonly) + } +}; + +/// AAMemoryBehavior attribute for call sites. +struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl { + AAMemoryBehaviorCallSite(const IRPosition &IRP) : AAMemoryBehaviorImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAMemoryBehaviorImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F || !F->hasExactDefinition()) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // TODO: Once we have call site specific value information we can provide + // call site specific liveness liveness information and then it makes + // sense to specialize attributes for call sites arguments instead of + // redirecting requests to the callee argument. + Function *F = getAssociatedFunction(); + const IRPosition &FnPos = IRPosition::function(*F); + auto &FnAA = A.getAAFor(*this, FnPos); + return clampStateAndIndicateChange( + getState(), static_cast(FnAA.getState())); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + if (isAssumedReadNone()) + STATS_DECLTRACK_CS_ATTR(readnone) + else if (isAssumedReadOnly()) + STATS_DECLTRACK_CS_ATTR(readonly) + else if (isAssumedWriteOnly()) + STATS_DECLTRACK_CS_ATTR(writeonly) + } +}; +} // namespace + +ChangeStatus AAMemoryBehaviorFunction::updateImpl(Attributor &A) { + + // The current assumed state used to determine a change. + auto AssumedState = getAssumed(); + + auto CheckRWInst = [&](Instruction &I) { + // If the instruction has an own memory behavior state, use it to restrict + // the local state. No further analysis is required as the other memory + // state is as optimistic as it gets. + if (ImmutableCallSite ICS = ImmutableCallSite(&I)) { + const auto &MemBehaviorAA = A.getAAFor( + *this, IRPosition::callsite_function(ICS)); + intersectAssumedBits(MemBehaviorAA.getAssumed()); + return !isAtFixpoint(); + } + + // Remove access kind modifiers if necessary. + if (I.mayReadFromMemory()) + removeAssumedBits(NO_READS); + if (I.mayWriteToMemory()) + removeAssumedBits(NO_WRITES); + return !isAtFixpoint(); + }; + + if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this)) + return indicatePessimisticFixpoint(); + + return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED + : ChangeStatus::UNCHANGED; +} + +ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) { + + const IRPosition &IRP = getIRPosition(); + const IRPosition &FnPos = IRPosition::function_scope(IRP); + AAMemoryBehavior::StateType &S = getState(); + + // First, check the function scope. We take the known information and we avoid + // work if the assumed information implies the current assumed information for + // this attribute. + const auto &FnMemAA = A.getAAFor(*this, FnPos); + S.addKnownBits(FnMemAA.getKnown()); + if ((S.getAssumed() & FnMemAA.getAssumed()) == S.getAssumed()) + return ChangeStatus::UNCHANGED; + + // Make sure the value is not captured (except through "return"), if + // it is, any information derived would be irrelevant anyway as we cannot + // check the potential aliases introduced by the capture. However, no need + // to fall back to anythign less optimistic than the function state. + const auto &ArgNoCaptureAA = A.getAAFor(*this, IRP); + if (!ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) { + S.intersectAssumedBits(FnMemAA.getAssumed()); + return ChangeStatus::CHANGED; + } + + // The current assumed state used to determine a change. + auto AssumedState = S.getAssumed(); + + // Liveness information to exclude dead users. + // TODO: Take the FnPos once we have call site specific liveness information. + const auto &LivenessAA = A.getAAFor( + *this, IRPosition::function(*IRP.getAssociatedFunction())); + + // Visit and expand uses until all are analyzed or a fixpoint is reached. + for (unsigned i = 0; i < Uses.size() && !isAtFixpoint(); i++) { + const Use *U = Uses[i]; + Instruction *UserI = cast(U->getUser()); + LLVM_DEBUG(dbgs() << "[AAMemoryBehavior] Use: " << **U << " in " << *UserI + << " [Dead: " << (LivenessAA.isAssumedDead(UserI)) + << "]\n"); + if (LivenessAA.isAssumedDead(UserI)) + continue; + + // Check if the users of UserI should also be visited. + if (followUsersOfUseIn(A, U, UserI)) + for (const Use &UserIUse : UserI->uses()) + Uses.insert(&UserIUse); + + // If UserI might touch memory we analyze the use in detail. + if (UserI->mayReadOrWriteMemory()) + analyzeUseIn(A, U, UserI); + } + + return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED + : ChangeStatus::UNCHANGED; +} + +bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use *U, + const Instruction *UserI) { + // The loaded value is unrelated to the pointer argument, no need to + // follow the users of the load. + if (isa(UserI)) + return false; + + // By default we follow all uses assuming UserI might leak information on U, + // we have special handling for call sites operands though. + ImmutableCallSite ICS(UserI); + if (!ICS || !ICS.isArgOperand(U)) + return true; + + // If the use is a call argument known not to be captured, the users of + // the call do not need to be visited because they have to be unrelated to + // the input. Note that this check is not trivial even though we disallow + // general capturing of the underlying argument. The reason is that the + // call might the argument "through return", which we allow and for which we + // need to check call users. + unsigned ArgNo = ICS.getArgumentNo(U); + const auto &ArgNoCaptureAA = + A.getAAFor(*this, IRPosition::callsite_argument(ICS, ArgNo)); + return !ArgNoCaptureAA.isAssumedNoCapture(); +} + +void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use *U, + const Instruction *UserI) { + assert(UserI->mayReadOrWriteMemory()); + + switch (UserI->getOpcode()) { + default: + // TODO: Handle all atomics and other side-effect operations we know of. + break; + case Instruction::Load: + // Loads cause the NO_READS property to disappear. + removeAssumedBits(NO_READS); + return; + + case Instruction::Store: + // Stores cause the NO_WRITES property to disappear if the use is the + // pointer operand. Note that we do assume that capturing was taken care of + // somewhere else. + if (cast(UserI)->getPointerOperand() == U->get()) + removeAssumedBits(NO_WRITES); + return; + + case Instruction::Call: + case Instruction::CallBr: + case Instruction::Invoke: { + // For call sites we look at the argument memory behavior attribute (this + // could be recursive!) in order to restrict our own state. + ImmutableCallSite ICS(UserI); + + // Give up on operand bundles. + if (ICS.isBundleOperand(U)) { + indicatePessimisticFixpoint(); + return; + } + + // Calling a function does read the function pointer, maybe write it if the + // function is self-modifying. + if (ICS.isCallee(U)) { + removeAssumedBits(NO_READS); + break; + } + + // Adjust the possible access behavior based on the information on the + // argument. + unsigned ArgNo = ICS.getArgumentNo(U); + const IRPosition &ArgPos = IRPosition::callsite_argument(ICS, ArgNo); + const auto &MemBehaviorAA = A.getAAFor(*this, ArgPos); + // "assumed" has at most the same bits as the MemBehaviorAA assumed + // and at least "known". + intersectAssumedBits(MemBehaviorAA.getAssumed()); + return; + } + }; + + // Generally, look at the "may-properties" and adjust the assumed state if we + // did not trigger special handling before. + if (UserI->mayReadFromMemory()) + removeAssumedBits(NO_READS); + if (UserI->mayWriteToMemory()) + removeAssumedBits(NO_WRITES); +} + /// ---------------------------------------------------------------------------- /// Attributor /// ---------------------------------------------------------------------------- -bool Attributor::checkForAllCallSites(Function &F, - std::function &Pred, - bool RequireAllCallSites) { +bool Attributor::isAssumedDead(const AbstractAttribute &AA, + const AAIsDead *LivenessAA) { + const Instruction *CtxI = AA.getIRPosition().getCtxI(); + if (!CtxI) + return false; + + if (!LivenessAA) + LivenessAA = + &getAAFor(AA, IRPosition::function(*CtxI->getFunction()), + /* TrackDependence */ false); + + // Don't check liveness for AAIsDead. + if (&AA == LivenessAA) + return false; + + if (!LivenessAA->isAssumedDead(CtxI)) + return false; + + // We actually used liveness information so we have to record a dependence. + recordDependence(*LivenessAA, AA); + + return true; +} + +bool Attributor::checkForAllCallSites( + const function_ref &Pred, + const AbstractAttribute &QueryingAA, bool RequireAllCallSites) { // We can try to determine information from // the call sites. However, this is only possible all call sites are known, // hence the function has internal linkage. - if (RequireAllCallSites && !F.hasInternalLinkage()) { + const IRPosition &IRP = QueryingAA.getIRPosition(); + const Function *AssociatedFunction = IRP.getAssociatedFunction(); + if (!AssociatedFunction) { + LLVM_DEBUG(dbgs() << "[Attributor] No function associated with " << IRP + << "\n"); + return false; + } + + return checkForAllCallSites(Pred, *AssociatedFunction, RequireAllCallSites, + &QueryingAA); +} + +bool Attributor::checkForAllCallSites( + const function_ref &Pred, const Function &Fn, + bool RequireAllCallSites, const AbstractAttribute *QueryingAA) { + if (RequireAllCallSites && !Fn.hasLocalLinkage()) { LLVM_DEBUG( dbgs() - << "Attributor: Function " << F.getName() + << "[Attributor] Function " << Fn.getName() << " has no internal linkage, hence not all call sites are known\n"); return false; } - for (const Use &U : F.uses()) { - - CallSite CS(U.getUser()); - if (!CS || !CS.isCallee(&U) || !CS.getCaller()->hasExactDefinition()) { - if (!RequireAllCallSites) - continue; - - LLVM_DEBUG(dbgs() << "Attributor: User " << *U.getUser() - << " is an invalid use of " << F.getName() << "\n"); + for (const Use &U : Fn.uses()) { + AbstractCallSite ACS(&U); + if (!ACS) { + LLVM_DEBUG(dbgs() << "[Attributor] Function " + << Fn.getName() + << " has non call site use " << *U.get() << " in " + << *U.getUser() << "\n"); return false; } - if (Pred(CS)) + Instruction *I = ACS.getInstruction(); + Function *Caller = I->getFunction(); + + const auto *LivenessAA = + lookupAAFor(IRPosition::function(*Caller), QueryingAA, + /* TrackDependence */ false); + + // Skip dead calls. + if (LivenessAA && LivenessAA->isAssumedDead(I)) { + // We actually used liveness information so we have to record a + // dependence. + if (QueryingAA) + recordDependence(*LivenessAA, *QueryingAA); + continue; + } + + const Use *EffectiveUse = + ACS.isCallbackCall() ? &ACS.getCalleeUseForCallback() : &U; + if (!ACS.isCallee(EffectiveUse)) { + if (!RequireAllCallSites) + continue; + LLVM_DEBUG(dbgs() << "[Attributor] User " << EffectiveUse->getUser() + << " is an invalid use of " + << Fn.getName() << "\n"); + return false; + } + + if (Pred(ACS)) continue; - LLVM_DEBUG(dbgs() << "Attributor: Call site callback failed for " - << *CS.getInstruction() << "\n"); + LLVM_DEBUG(dbgs() << "[Attributor] Call site callback failed for " + << *ACS.getInstruction() << "\n"); return false; } return true; } -ChangeStatus Attributor::run() { - // Initialize all abstract attributes. - for (AbstractAttribute *AA : AllAbstractAttributes) - AA->initialize(*this); +bool Attributor::checkForAllReturnedValuesAndReturnInsts( + const function_ref &)> + &Pred, + const AbstractAttribute &QueryingAA) { + const IRPosition &IRP = QueryingAA.getIRPosition(); + // Since we need to provide return instructions we have to have an exact + // definition. + const Function *AssociatedFunction = IRP.getAssociatedFunction(); + if (!AssociatedFunction) + return false; + + // If this is a call site query we use the call site specific return values + // and liveness information. + // TODO: use the function scope once we have call site AAReturnedValues. + const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); + const auto &AARetVal = getAAFor(QueryingAA, QueryIRP); + if (!AARetVal.getState().isValidState()) + return false; + + return AARetVal.checkForAllReturnedValuesAndReturnInsts(Pred); +} + +bool Attributor::checkForAllReturnedValues( + const function_ref &Pred, + const AbstractAttribute &QueryingAA) { + + const IRPosition &IRP = QueryingAA.getIRPosition(); + const Function *AssociatedFunction = IRP.getAssociatedFunction(); + if (!AssociatedFunction) + return false; + + // TODO: use the function scope once we have call site AAReturnedValues. + const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); + const auto &AARetVal = getAAFor(QueryingAA, QueryIRP); + if (!AARetVal.getState().isValidState()) + return false; + + return AARetVal.checkForAllReturnedValuesAndReturnInsts( + [&](Value &RV, const SmallSetVector &) { + return Pred(RV); + }); +} + +static bool +checkForAllInstructionsImpl(InformationCache::OpcodeInstMapTy &OpcodeInstMap, + const function_ref &Pred, + const AAIsDead *LivenessAA, bool &AnyDead, + const ArrayRef &Opcodes) { + for (unsigned Opcode : Opcodes) { + for (Instruction *I : OpcodeInstMap[Opcode]) { + // Skip dead instructions. + if (LivenessAA && LivenessAA->isAssumedDead(I)) { + AnyDead = true; + continue; + } + + if (!Pred(*I)) + return false; + } + } + return true; +} + +bool Attributor::checkForAllInstructions( + const llvm::function_ref &Pred, + const AbstractAttribute &QueryingAA, const ArrayRef &Opcodes) { + + const IRPosition &IRP = QueryingAA.getIRPosition(); + // Since we need to provide instructions we have to have an exact definition. + const Function *AssociatedFunction = IRP.getAssociatedFunction(); + if (!AssociatedFunction) + return false; + + // TODO: use the function scope once we have call site AAReturnedValues. + const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); + const auto &LivenessAA = + getAAFor(QueryingAA, QueryIRP, /* TrackDependence */ false); + bool AnyDead = false; + + auto &OpcodeInstMap = + InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction); + if (!checkForAllInstructionsImpl(OpcodeInstMap, Pred, &LivenessAA, AnyDead, + Opcodes)) + return false; + + // If we actually used liveness information so we have to record a dependence. + if (AnyDead) + recordDependence(LivenessAA, QueryingAA); + + return true; +} + +bool Attributor::checkForAllReadWriteInstructions( + const llvm::function_ref &Pred, + AbstractAttribute &QueryingAA) { + + const Function *AssociatedFunction = + QueryingAA.getIRPosition().getAssociatedFunction(); + if (!AssociatedFunction) + return false; + + // TODO: use the function scope once we have call site AAReturnedValues. + const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); + const auto &LivenessAA = + getAAFor(QueryingAA, QueryIRP, /* TrackDependence */ false); + bool AnyDead = false; + + for (Instruction *I : + InfoCache.getReadOrWriteInstsForFunction(*AssociatedFunction)) { + // Skip dead instructions. + if (LivenessAA.isAssumedDead(I)) { + AnyDead = true; + continue; + } + + if (!Pred(*I)) + return false; + } + + // If we actually used liveness information so we have to record a dependence. + if (AnyDead) + recordDependence(LivenessAA, QueryingAA); + + return true; +} + +ChangeStatus Attributor::run(Module &M) { LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized " << AllAbstractAttributes.size() << " abstract attributes.\n"); @@ -1370,10 +4470,25 @@ ChangeStatus Attributor::run() { SetVector Worklist; Worklist.insert(AllAbstractAttributes.begin(), AllAbstractAttributes.end()); + bool RecomputeDependences = false; + do { + // Remember the size to determine new attributes. + size_t NumAAs = AllAbstractAttributes.size(); LLVM_DEBUG(dbgs() << "\n\n[Attributor] #Iteration: " << IterationCounter << ", Worklist size: " << Worklist.size() << "\n"); + // If dependences (=QueryMap) are recomputed we have to look at all abstract + // attributes again, regardless of what changed in the last iteration. + if (RecomputeDependences) { + LLVM_DEBUG( + dbgs() << "[Attributor] Run all AAs to recompute dependences\n"); + QueryMap.clear(); + ChangedAAs.clear(); + Worklist.insert(AllAbstractAttributes.begin(), + AllAbstractAttributes.end()); + } + // Add all abstract attributes that are potentially dependent on one that // changed to the work list. for (AbstractAttribute *ChangedAA : ChangedAAs) { @@ -1381,27 +4496,42 @@ ChangeStatus Attributor::run() { Worklist.insert(QuerriedAAs.begin(), QuerriedAAs.end()); } + LLVM_DEBUG(dbgs() << "[Attributor] #Iteration: " << IterationCounter + << ", Worklist+Dependent size: " << Worklist.size() + << "\n"); + // Reset the changed set. ChangedAAs.clear(); // Update all abstract attribute in the work list and record the ones that // changed. for (AbstractAttribute *AA : Worklist) - if (AA->update(*this) == ChangeStatus::CHANGED) - ChangedAAs.push_back(AA); + if (!isAssumedDead(*AA, nullptr)) + if (AA->update(*this) == ChangeStatus::CHANGED) + ChangedAAs.push_back(AA); + + // Check if we recompute the dependences in the next iteration. + RecomputeDependences = (DepRecomputeInterval > 0 && + IterationCounter % DepRecomputeInterval == 0); + + // Add attributes to the changed set if they have been created in the last + // iteration. + ChangedAAs.append(AllAbstractAttributes.begin() + NumAAs, + AllAbstractAttributes.end()); // Reset the work list and repopulate with the changed abstract attributes. // Note that dependent ones are added above. Worklist.clear(); Worklist.insert(ChangedAAs.begin(), ChangedAAs.end()); - } while (!Worklist.empty() && ++IterationCounter < MaxFixpointIterations); + } while (!Worklist.empty() && (IterationCounter++ < MaxFixpointIterations || + VerifyMaxFixpointIterations)); LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: " << IterationCounter << "/" << MaxFixpointIterations << " iterations\n"); - bool FinishedAtFixpoint = Worklist.empty(); + size_t NumFinalAAs = AllAbstractAttributes.size(); // Reset abstract arguments not settled in a sound fixpoint by now. This // happens when we stopped the fixpoint iteration early. Note that only the @@ -1448,8 +4578,14 @@ ChangeStatus Attributor::run() { if (!State.isValidState()) continue; + // Skip dead code. + if (isAssumedDead(*AA, nullptr)) + continue; // Manifest the state and record if we changed the IR. ChangeStatus LocalChange = AA->manifest(*this); + if (LocalChange == ChangeStatus::CHANGED && AreStatisticsEnabled()) + AA->trackStatistics(); + ManifestChange = ManifestChange | LocalChange; NumAtFixpoint++; @@ -1462,69 +4598,92 @@ ChangeStatus Attributor::run() { << " arguments while " << NumAtFixpoint << " were in a valid fixpoint state\n"); - // If verification is requested, we finished this run at a fixpoint, and the - // IR was changed, we re-run the whole fixpoint analysis, starting at - // re-initialization of the arguments. This re-run should not result in an IR - // change. Though, the (virtual) state of attributes at the end of the re-run - // might be more optimistic than the known state or the IR state if the better - // state cannot be manifested. - if (VerifyAttributor && FinishedAtFixpoint && - ManifestChange == ChangeStatus::CHANGED) { - VerifyAttributor = false; - ChangeStatus VerifyStatus = run(); - if (VerifyStatus != ChangeStatus::UNCHANGED) - llvm_unreachable( - "Attributor verification failed, re-run did result in an IR change " - "even after a fixpoint was reached in the original run. (False " - "positives possible!)"); - VerifyAttributor = true; - } - NumAttributesManifested += NumManifested; NumAttributesValidFixpoint += NumAtFixpoint; + (void)NumFinalAAs; + assert( + NumFinalAAs == AllAbstractAttributes.size() && + "Expected the final number of abstract attributes to remain unchanged!"); + + // Delete stuff at the end to avoid invalid references and a nice order. + { + LLVM_DEBUG(dbgs() << "\n[Attributor] Delete at least " + << ToBeDeletedFunctions.size() << " functions and " + << ToBeDeletedBlocks.size() << " blocks and " + << ToBeDeletedInsts.size() << " instructions\n"); + for (Instruction *I : ToBeDeletedInsts) { + if (!I->use_empty()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); + } + + if (unsigned NumDeadBlocks = ToBeDeletedBlocks.size()) { + SmallVector ToBeDeletedBBs; + ToBeDeletedBBs.reserve(NumDeadBlocks); + ToBeDeletedBBs.append(ToBeDeletedBlocks.begin(), ToBeDeletedBlocks.end()); + DeleteDeadBlocks(ToBeDeletedBBs); + STATS_DECLTRACK(AAIsDead, BasicBlock, + "Number of dead basic blocks deleted."); + } + + STATS_DECL(AAIsDead, Function, "Number of dead functions deleted."); + for (Function *Fn : ToBeDeletedFunctions) { + Fn->replaceAllUsesWith(UndefValue::get(Fn->getType())); + Fn->eraseFromParent(); + STATS_TRACK(AAIsDead, Function); + } + + // Identify dead internal functions and delete them. This happens outside + // the other fixpoint analysis as we might treat potentially dead functions + // as live to lower the number of iterations. If they happen to be dead, the + // below fixpoint loop will identify and eliminate them. + SmallVector InternalFns; + for (Function &F : M) + if (F.hasLocalLinkage()) + InternalFns.push_back(&F); + + bool FoundDeadFn = true; + while (FoundDeadFn) { + FoundDeadFn = false; + for (unsigned u = 0, e = InternalFns.size(); u < e; ++u) { + Function *F = InternalFns[u]; + if (!F) + continue; + + const auto *LivenessAA = + lookupAAFor(IRPosition::function(*F)); + if (LivenessAA && + !checkForAllCallSites([](AbstractCallSite ACS) { return false; }, + *LivenessAA, true)) + continue; + + STATS_TRACK(AAIsDead, Function); + F->replaceAllUsesWith(UndefValue::get(F->getType())); + F->eraseFromParent(); + InternalFns[u] = nullptr; + FoundDeadFn = true; + } + } + } + + if (VerifyMaxFixpointIterations && + IterationCounter != MaxFixpointIterations) { + errs() << "\n[Attributor] Fixpoint iteration done after: " + << IterationCounter << "/" << MaxFixpointIterations + << " iterations\n"; + llvm_unreachable("The fixpoint was not reached with exactly the number of " + "specified iterations!"); + } + return ManifestChange; } -void Attributor::identifyDefaultAbstractAttributes( - Function &F, InformationCache &InfoCache, - DenseSet *Whitelist) { +void Attributor::initializeInformationCache(Function &F) { - // Every function can be nounwind. - registerAA(*new AANoUnwindFunction(F, InfoCache)); - - // Every function might be marked "nosync" - registerAA(*new AANoSyncFunction(F, InfoCache)); - - // Every function might be "no-free". - registerAA(*new AANoFreeFunction(F, InfoCache)); - - // Return attributes are only appropriate if the return type is non void. - Type *ReturnType = F.getReturnType(); - if (!ReturnType->isVoidTy()) { - // Argument attribute "returned" --- Create only one per function even - // though it is an argument attribute. - if (!Whitelist || Whitelist->count(AAReturnedValues::ID)) - registerAA(*new AAReturnedValuesImpl(F, InfoCache)); - - // Every function with pointer return type might be marked nonnull. - if (ReturnType->isPointerTy() && - (!Whitelist || Whitelist->count(AANonNullReturned::ID))) - registerAA(*new AANonNullReturned(F, InfoCache)); - } - - // Every argument with pointer type might be marked nonnull. - for (Argument &Arg : F.args()) { - if (Arg.getType()->isPointerTy()) - registerAA(*new AANonNullArgument(Arg, InfoCache)); - } - - // Every function might be "will-return". - registerAA(*new AAWillReturnFunction(F, InfoCache)); - - // Walk all instructions to find more attribute opportunities and also - // interesting instructions that might be queried by abstract attributes - // during their initialization or update. + // Walk all instructions to find interesting instructions that might be + // queried by abstract attributes during their initialization or update. + // This has to happen before we create attributes. auto &ReadOrWriteInsts = InfoCache.FuncRWInstsMap[&F]; auto &InstOpcodeMap = InfoCache.FuncInstOpcodeMap[&F]; @@ -1540,8 +4699,12 @@ void Attributor::identifyDefaultAbstractAttributes( default: assert((!ImmutableCallSite(&I)) && (!isa(&I)) && "New call site/base instruction type needs to be known int the " - "attributor."); + "Attributor."); break; + case Instruction::Load: + // The alignment of a pointer is interesting for loads. + case Instruction::Store: + // The alignment of a pointer is interesting for stores. case Instruction::Call: case Instruction::CallBr: case Instruction::Invoke: @@ -1555,18 +4718,154 @@ void Attributor::identifyDefaultAbstractAttributes( InstOpcodeMap[I.getOpcode()].push_back(&I); if (I.mayReadOrWriteMemory()) ReadOrWriteInsts.push_back(&I); + } +} +void Attributor::identifyDefaultAbstractAttributes(Function &F) { + if (!VisitedFunctions.insert(&F).second) + return; + + IRPosition FPos = IRPosition::function(F); + + // Check for dead BasicBlocks in every function. + // We need dead instruction detection because we do not want to deal with + // broken IR in which SSA rules do not apply. + getOrCreateAAFor(FPos); + + // Every function might be "will-return". + getOrCreateAAFor(FPos); + + // Every function can be nounwind. + getOrCreateAAFor(FPos); + + // Every function might be marked "nosync" + getOrCreateAAFor(FPos); + + // Every function might be "no-free". + getOrCreateAAFor(FPos); + + // Every function might be "no-return". + getOrCreateAAFor(FPos); + + // Every function might be "no-recurse". + getOrCreateAAFor(FPos); + + // Every function might be "readnone/readonly/writeonly/...". + getOrCreateAAFor(FPos); + + // Every function might be applicable for Heap-To-Stack conversion. + if (EnableHeapToStack) + getOrCreateAAFor(FPos); + + // Return attributes are only appropriate if the return type is non void. + Type *ReturnType = F.getReturnType(); + if (!ReturnType->isVoidTy()) { + // Argument attribute "returned" --- Create only one per function even + // though it is an argument attribute. + getOrCreateAAFor(FPos); + + IRPosition RetPos = IRPosition::returned(F); + + // Every function might be simplified. + getOrCreateAAFor(RetPos); + + if (ReturnType->isPointerTy()) { + + // Every function with pointer return type might be marked align. + getOrCreateAAFor(RetPos); + + // Every function with pointer return type might be marked nonnull. + getOrCreateAAFor(RetPos); + + // Every function with pointer return type might be marked noalias. + getOrCreateAAFor(RetPos); + + // Every function with pointer return type might be marked + // dereferenceable. + getOrCreateAAFor(RetPos); + } + } + + for (Argument &Arg : F.args()) { + IRPosition ArgPos = IRPosition::argument(Arg); + + // Every argument might be simplified. + getOrCreateAAFor(ArgPos); + + if (Arg.getType()->isPointerTy()) { + // Every argument with pointer type might be marked nonnull. + getOrCreateAAFor(ArgPos); + + // Every argument with pointer type might be marked noalias. + getOrCreateAAFor(ArgPos); + + // Every argument with pointer type might be marked dereferenceable. + getOrCreateAAFor(ArgPos); + + // Every argument with pointer type might be marked align. + getOrCreateAAFor(ArgPos); + + // Every argument with pointer type might be marked nocapture. + getOrCreateAAFor(ArgPos); + + // Every argument with pointer type might be marked + // "readnone/readonly/writeonly/..." + getOrCreateAAFor(ArgPos); + } + } + + auto CallSitePred = [&](Instruction &I) -> bool { CallSite CS(&I); - if (CS && CS.getCalledFunction()) { + if (CS.getCalledFunction()) { for (int i = 0, e = CS.getCalledFunction()->arg_size(); i < e; i++) { + + IRPosition CSArgPos = IRPosition::callsite_argument(CS, i); + + // Call site argument might be simplified. + getOrCreateAAFor(CSArgPos); + if (!CS.getArgument(i)->getType()->isPointerTy()) continue; // Call site argument attribute "non-null". - registerAA(*new AANonNullCallSiteArgument(CS, i, InfoCache), i); + getOrCreateAAFor(CSArgPos); + + // Call site argument attribute "no-alias". + getOrCreateAAFor(CSArgPos); + + // Call site argument attribute "dereferenceable". + getOrCreateAAFor(CSArgPos); + + // Call site argument attribute "align". + getOrCreateAAFor(CSArgPos); } } - } + return true; + }; + + auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F); + bool Success, AnyDead = false; + Success = checkForAllInstructionsImpl( + OpcodeInstMap, CallSitePred, nullptr, AnyDead, + {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, + (unsigned)Instruction::Call}); + (void)Success; + assert(Success && !AnyDead && "Expected the check call to be successful!"); + + auto LoadStorePred = [&](Instruction &I) -> bool { + if (isa(I)) + getOrCreateAAFor( + IRPosition::value(*cast(I).getPointerOperand())); + else + getOrCreateAAFor( + IRPosition::value(*cast(I).getPointerOperand())); + return true; + }; + Success = checkForAllInstructionsImpl( + OpcodeInstMap, LoadStorePred, nullptr, AnyDead, + {(unsigned)Instruction::Load, (unsigned)Instruction::Store}); + (void)Success; + assert(Success && !AnyDead && "Expected the check call to be successful!"); } /// Helpers to ease debugging through output streams and print calls. @@ -1576,21 +4875,39 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, ChangeStatus S) { return OS << (S == ChangeStatus::CHANGED ? "changed" : "unchanged"); } -raw_ostream &llvm::operator<<(raw_ostream &OS, - AbstractAttribute::ManifestPosition AP) { +raw_ostream &llvm::operator<<(raw_ostream &OS, IRPosition::Kind AP) { switch (AP) { - case AbstractAttribute::MP_ARGUMENT: - return OS << "arg"; - case AbstractAttribute::MP_CALL_SITE_ARGUMENT: - return OS << "cs_arg"; - case AbstractAttribute::MP_FUNCTION: - return OS << "fn"; - case AbstractAttribute::MP_RETURNED: + case IRPosition::IRP_INVALID: + return OS << "inv"; + case IRPosition::IRP_FLOAT: + return OS << "flt"; + case IRPosition::IRP_RETURNED: return OS << "fn_ret"; + case IRPosition::IRP_CALL_SITE_RETURNED: + return OS << "cs_ret"; + case IRPosition::IRP_FUNCTION: + return OS << "fn"; + case IRPosition::IRP_CALL_SITE: + return OS << "cs"; + case IRPosition::IRP_ARGUMENT: + return OS << "arg"; + case IRPosition::IRP_CALL_SITE_ARGUMENT: + return OS << "cs_arg"; } llvm_unreachable("Unknown attribute position!"); } +raw_ostream &llvm::operator<<(raw_ostream &OS, const IRPosition &Pos) { + const Value &AV = Pos.getAssociatedValue(); + return OS << "{" << Pos.getPositionKind() << ":" << AV.getName() << " [" + << Pos.getAnchorValue().getName() << "@" << Pos.getArgNo() << "]}"; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const IntegerState &S) { + return OS << "(" << S.getKnown() << "-" << S.getAssumed() << ")" + << static_cast(S); +} + raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractState &S) { return OS << (!S.isValidState() ? "top" : (S.isAtFixpoint() ? "fix" : "")); } @@ -1601,8 +4918,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractAttribute &AA) { } void AbstractAttribute::print(raw_ostream &OS) const { - OS << "[" << getManifestPosition() << "][" << getAsStr() << "][" - << AnchoredVal.getName() << "]"; + OS << "[P: " << getIRPosition() << "][" << getAsStr() << "][S: " << getState() + << "]"; } ///} @@ -1610,7 +4927,7 @@ void AbstractAttribute::print(raw_ostream &OS) const { /// Pass (Manager) Boilerplate /// ---------------------------------------------------------------------------- -static bool runAttributorOnModule(Module &M) { +static bool runAttributorOnModule(Module &M, AnalysisGetter &AG) { if (DisableAttributor) return false; @@ -1619,39 +4936,39 @@ static bool runAttributorOnModule(Module &M) { // Create an Attributor and initially empty information cache that is filled // while we identify default attribute opportunities. - Attributor A; - InformationCache InfoCache; + InformationCache InfoCache(M, AG); + Attributor A(InfoCache, DepRecInterval); + + for (Function &F : M) + A.initializeInformationCache(F); for (Function &F : M) { - // TODO: Not all attributes require an exact definition. Find a way to - // enable deduction for some but not all attributes in case the - // definition might be changed at runtime, see also - // http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html. - // TODO: We could always determine abstract attributes and if sufficient - // information was found we could duplicate the functions that do not - // have an exact definition. - if (!F.hasExactDefinition()) { + if (F.hasExactDefinition()) + NumFnWithExactDefinition++; + else NumFnWithoutExactDefinition++; - continue; + + // We look at internal functions only on-demand but if any use is not a + // direct call, we have to do it eagerly. + if (F.hasLocalLinkage()) { + if (llvm::all_of(F.uses(), [](const Use &U) { + return ImmutableCallSite(U.getUser()) && + ImmutableCallSite(U.getUser()).isCallee(&U); + })) + continue; } - // For now we ignore naked and optnone functions. - if (F.hasFnAttribute(Attribute::Naked) || - F.hasFnAttribute(Attribute::OptimizeNone)) - continue; - - NumFnWithExactDefinition++; - // Populate the Attributor with abstract attribute opportunities in the // function and the information cache with IR information. - A.identifyDefaultAbstractAttributes(F, InfoCache); + A.identifyDefaultAbstractAttributes(F); } - return A.run() == ChangeStatus::CHANGED; + return A.run(M) == ChangeStatus::CHANGED; } PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) { - if (runAttributorOnModule(M)) { + AnalysisGetter AG(AM); + if (runAttributorOnModule(M, AG)) { // FIXME: Think about passes we will preserve and add them here. return PreservedAnalyses::none(); } @@ -1670,12 +4987,14 @@ struct AttributorLegacyPass : public ModulePass { bool runOnModule(Module &M) override { if (skipModule(M)) return false; - return runAttributorOnModule(M); + + AnalysisGetter AG; + return runAttributorOnModule(M, AG); } void getAnalysisUsage(AnalysisUsage &AU) const override { // FIXME: Think about passes we will preserve and add them here. - AU.setPreservesCFG(); + AU.addRequired(); } }; @@ -1684,7 +5003,147 @@ struct AttributorLegacyPass : public ModulePass { Pass *llvm::createAttributorLegacyPass() { return new AttributorLegacyPass(); } char AttributorLegacyPass::ID = 0; + +const char AAReturnedValues::ID = 0; +const char AANoUnwind::ID = 0; +const char AANoSync::ID = 0; +const char AANoFree::ID = 0; +const char AANonNull::ID = 0; +const char AANoRecurse::ID = 0; +const char AAWillReturn::ID = 0; +const char AANoAlias::ID = 0; +const char AANoReturn::ID = 0; +const char AAIsDead::ID = 0; +const char AADereferenceable::ID = 0; +const char AAAlign::ID = 0; +const char AANoCapture::ID = 0; +const char AAValueSimplify::ID = 0; +const char AAHeapToStack::ID = 0; +const char AAMemoryBehavior::ID = 0; + +// Macro magic to create the static generator function for attributes that +// follow the naming scheme. + +#define SWITCH_PK_INV(CLASS, PK, POS_NAME) \ + case IRPosition::PK: \ + llvm_unreachable("Cannot create " #CLASS " for a " POS_NAME " position!"); + +#define SWITCH_PK_CREATE(CLASS, IRP, PK, SUFFIX) \ + case IRPosition::PK: \ + AA = new CLASS##SUFFIX(IRP); \ + break; + +#define CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \ + SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \ + SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \ + } \ + return *AA; \ + } + +#define CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_INV(CLASS, IRP_FUNCTION, "function") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \ + } \ + return *AA; \ + } + +#define CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \ + } \ + return *AA; \ + } + +#define CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \ + SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \ + SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \ + SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ + } \ + return *AA; \ + } + +#define CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \ + CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \ + CLASS *AA = nullptr; \ + switch (IRP.getPositionKind()) { \ + SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \ + SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \ + SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \ + } \ + return *AA; \ + } + +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUnwind) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoSync) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFree) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoRecurse) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAWillReturn) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoReturn) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead) +CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReturnedValues) + +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture) + +CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify) + +CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHeapToStack) + +CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior) + +#undef CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION +#undef SWITCH_PK_CREATE +#undef SWITCH_PK_INV + INITIALIZE_PASS_BEGIN(AttributorLegacyPass, "attributor", "Deduce and propagate attributes", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(AttributorLegacyPass, "attributor", "Deduce and propagate attributes", false, false) diff --git a/lib/Transforms/IPO/BlockExtractor.cpp b/lib/Transforms/IPO/BlockExtractor.cpp index 6c365f3f3cb..de80c88c159 100644 --- a/lib/Transforms/IPO/BlockExtractor.cpp +++ b/lib/Transforms/IPO/BlockExtractor.cpp @@ -119,6 +119,8 @@ void BlockExtractor::loadFile() { /*KeepEmpty=*/false); if (LineSplit.empty()) continue; + if (LineSplit.size()!=2) + report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'"); SmallVector BBNames; LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1, /*KeepEmpty=*/false); @@ -204,7 +206,8 @@ bool BlockExtractor::runOnModule(Module &M) { ++NumExtracted; Changed = true; } - Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(); + CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent()); + Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC); if (F) LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName() << "' in: " << F->getName() << '\n'); diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index ad877ae1786..3cf839e397f 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -48,7 +48,7 @@ static void FindUsedValues(GlobalVariable *LLVMUsed, ConstantArray *Inits = cast(LLVMUsed->getInitializer()); for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) { - Value *Operand = Inits->getOperand(i)->stripPointerCastsNoFollowAliases(); + Value *Operand = Inits->getOperand(i)->stripPointerCasts(); GlobalValue *GV = cast(Operand); UsedValues.insert(GV); } @@ -120,7 +120,7 @@ static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New) { // Bump the alignment if necessary. if (Old->getAlignment() || New->getAlignment()) - New->setAlignment(std::max(getAlignment(Old), getAlignment(New))); + New->setAlignment(Align(std::max(getAlignment(Old), getAlignment(New)))); copyDebugLocMetadata(Old, New); Old->replaceAllUsesWith(NewConstant); diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp index e30b33aa487..e20159ba0db 100644 --- a/lib/Transforms/IPO/CrossDSOCFI.cpp +++ b/lib/Transforms/IPO/CrossDSOCFI.cpp @@ -84,13 +84,9 @@ void CrossDSOCFI::buildCFICheck(Module &M) { for (GlobalObject &GO : M.global_objects()) { Types.clear(); GO.getMetadata(LLVMContext::MD_type, Types); - for (MDNode *Type : Types) { - // Sanity check. GO must not be a function declaration. - assert(!isa(&GO) || !cast(&GO)->isDeclaration()); - + for (MDNode *Type : Types) if (ConstantInt *TypeId = extractNumericTypeId(Type)) TypeIds.insert(TypeId->getZExtValue()); - } } NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"); @@ -108,11 +104,11 @@ void CrossDSOCFI::buildCFICheck(Module &M) { FunctionCallee C = M.getOrInsertFunction( "__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx), Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx)); - Function *F = dyn_cast(C.getCallee()); + Function *F = cast(C.getCallee()); // Take over the existing function. The frontend emits a weak stub so that the // linker knows about the symbol; this pass replaces the function body. F->deleteBody(); - F->setAlignment(4096); + F->setAlignment(Align(4096)); Triple T(M.getTargetTriple()); if (T.isARM() || T.isThumb()) diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 5ccd8bc4b0f..b174c63a577 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -78,11 +78,8 @@ STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); STATISTIC(NumNoFree, "Number of functions marked as nofree"); -// FIXME: This is disabled by default to avoid exposing security vulnerabilities -// in C/C++ code compiled by clang: -// http://lists.llvm.org/pipermail/cfe-dev/2017-January/052066.html static cl::opt EnableNonnullArgPropagation( - "enable-nonnull-arg-prop", cl::Hidden, + "enable-nonnull-arg-prop", cl::init(true), cl::Hidden, cl::desc("Try to propagate nonnull argument attributes from callsites to " "caller functions.")); @@ -664,6 +661,25 @@ static bool addArgumentAttrsFromCallsites(Function &F) { return Changed; } +static bool addReadAttr(Argument *A, Attribute::AttrKind R) { + assert((R == Attribute::ReadOnly || R == Attribute::ReadNone) + && "Must be a Read attribute."); + assert(A && "Argument must not be null."); + + // If the argument already has the attribute, nothing needs to be done. + if (A->hasAttribute(R)) + return false; + + // Otherwise, remove potentially conflicting attribute, add the new one, + // and update statistics. + A->removeAttr(Attribute::WriteOnly); + A->removeAttr(Attribute::ReadOnly); + A->removeAttr(Attribute::ReadNone); + A->addAttr(R); + R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; + return true; +} + /// Deduce nocapture attributes for the SCC. static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { bool Changed = false; @@ -732,11 +748,8 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { SmallPtrSet Self; Self.insert(&*A); Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self); - if (R != Attribute::None) { - A->addAttr(R); - Changed = true; - R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; - } + if (R != Attribute::None) + Changed = addReadAttr(A, R); } } } @@ -833,12 +846,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { if (ReadAttr != Attribute::None) { for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; - // Clear out existing readonly/readnone attributes - A->removeAttr(Attribute::ReadOnly); - A->removeAttr(Attribute::ReadNone); - A->addAttr(ReadAttr); - ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; - Changed = true; + Changed = addReadAttr(A, ReadAttr); } } } diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index 62c7fbd0722..3f5cc078d75 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -450,7 +450,7 @@ static void computeImportForFunction( } else if (PrintImportFailures) { assert(!FailureInfo && "Expected no FailureInfo for newly rejected candidate"); - FailureInfo = llvm::make_unique( + FailureInfo = std::make_unique( VI, Edge.second.getHotness(), Reason, 1); } LLVM_DEBUG( @@ -764,7 +764,7 @@ void llvm::computeDeadSymbols( } // Make value live and add it to the worklist if it was not live before. - auto visit = [&](ValueInfo VI) { + auto visit = [&](ValueInfo VI, bool IsAliasee) { // FIXME: If we knew which edges were created for indirect call profiles, // we could skip them here. Any that are live should be reached via // other edges, e.g. reference edges. Otherwise, using a profile collected @@ -800,12 +800,15 @@ void llvm::computeDeadSymbols( Interposable = true; } - if (!KeepAliveLinkage) - return; + if (!IsAliasee) { + if (!KeepAliveLinkage) + return; - if (Interposable) - report_fatal_error( - "Interposable and available_externally/linkonce_odr/weak_odr symbol"); + if (Interposable) + report_fatal_error( + "Interposable and available_externally/linkonce_odr/weak_odr " + "symbol"); + } } for (auto &S : VI.getSummaryList()) @@ -821,16 +824,16 @@ void llvm::computeDeadSymbols( // If this is an alias, visit the aliasee VI to ensure that all copies // are marked live and it is added to the worklist for further // processing of its references. - visit(AS->getAliaseeVI()); + visit(AS->getAliaseeVI(), true); continue; } Summary->setLive(true); for (auto Ref : Summary->refs()) - visit(Ref); + visit(Ref, false); if (auto *FS = dyn_cast(Summary.get())) for (auto Call : FS->calls()) - visit(Call.first); + visit(Call.first, false); } } Index.setWithGlobalValueDeadStripping(); @@ -892,7 +895,7 @@ std::error_code llvm::EmitImportsFiles( StringRef ModulePath, StringRef OutputFilename, const std::map &ModuleToSummariesForIndex) { std::error_code EC; - raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::OF_None); if (EC) return EC; for (auto &ILI : ModuleToSummariesForIndex) @@ -948,23 +951,15 @@ void llvm::thinLTOResolvePrevailingInModule( auto NewLinkage = GS->second->linkage(); if (NewLinkage == GV.getLinkage()) return; - - // Switch the linkage to weakany if asked for, e.g. we do this for - // linker redefined symbols (via --wrap or --defsym). - // We record that the visibility should be changed here in `addThinLTO` - // as we need access to the resolution vectors for each input file in - // order to find which symbols have been redefined. - // We may consider reorganizing this code and moving the linkage recording - // somewhere else, e.g. in thinLTOResolvePrevailingInIndex. - if (NewLinkage == GlobalValue::WeakAnyLinkage) { - GV.setLinkage(NewLinkage); - return; - } - if (GlobalValue::isLocalLinkage(GV.getLinkage()) || + // Don't internalize anything here, because the code below + // lacks necessary correctness checks. Leave this job to + // LLVM 'internalize' pass. + GlobalValue::isLocalLinkage(NewLinkage) || // In case it was dead and already converted to declaration. GV.isDeclaration()) return; + // Check for a non-prevailing def that has interposable linkage // (e.g. non-odr weak or linkonce). In that case we can't simply // convert to available_externally, since it would lose the diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 86b7f3e49ee..f010f7b703a 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -17,9 +17,11 @@ #include "llvm/Transforms/IPO/GlobalDCE.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/CtorUtils.h" @@ -29,10 +31,15 @@ using namespace llvm; #define DEBUG_TYPE "globaldce" +static cl::opt + ClEnableVFE("enable-vfe", cl::Hidden, cl::init(true), cl::ZeroOrMore, + cl::desc("Enable virtual function elimination")); + STATISTIC(NumAliases , "Number of global aliases removed"); STATISTIC(NumFunctions, "Number of functions removed"); STATISTIC(NumIFuncs, "Number of indirect functions removed"); STATISTIC(NumVariables, "Number of global variables removed"); +STATISTIC(NumVFuncs, "Number of virtual functions removed"); namespace { class GlobalDCELegacyPass : public ModulePass { @@ -118,6 +125,15 @@ void GlobalDCEPass::UpdateGVDependencies(GlobalValue &GV) { ComputeDependencies(User, Deps); Deps.erase(&GV); // Remove self-reference. for (GlobalValue *GVU : Deps) { + // If this is a dep from a vtable to a virtual function, and we have + // complete information about all virtual call sites which could call + // though this vtable, then skip it, because the call site information will + // be more precise. + if (VFESafeVTables.count(GVU) && isa(&GV)) { + LLVM_DEBUG(dbgs() << "Ignoring dep " << GVU->getName() << " -> " + << GV.getName() << "\n"); + continue; + } GVDependencies[GVU].insert(&GV); } } @@ -132,12 +148,133 @@ void GlobalDCEPass::MarkLive(GlobalValue &GV, if (Updates) Updates->push_back(&GV); if (Comdat *C = GV.getComdat()) { - for (auto &&CM : make_range(ComdatMembers.equal_range(C))) + for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { MarkLive(*CM.second, Updates); // Recursion depth is only two because only // globals in the same comdat are visited. + } } } +void GlobalDCEPass::ScanVTables(Module &M) { + SmallVector Types; + LLVM_DEBUG(dbgs() << "Building type info -> vtable map\n"); + + auto *LTOPostLinkMD = + cast_or_null(M.getModuleFlag("LTOPostLink")); + bool LTOPostLink = + LTOPostLinkMD && + (cast(LTOPostLinkMD->getValue())->getZExtValue() != 0); + + for (GlobalVariable &GV : M.globals()) { + Types.clear(); + GV.getMetadata(LLVMContext::MD_type, Types); + if (GV.isDeclaration() || Types.empty()) + continue; + + // Use the typeid metadata on the vtable to build a mapping from typeids to + // the list of (GV, offset) pairs which are the possible vtables for that + // typeid. + for (MDNode *Type : Types) { + Metadata *TypeID = Type->getOperand(1).get(); + + uint64_t Offset = + cast( + cast(Type->getOperand(0))->getValue()) + ->getZExtValue(); + + TypeIdMap[TypeID].insert(std::make_pair(&GV, Offset)); + } + + // If the type corresponding to the vtable is private to this translation + // unit, we know that we can see all virtual functions which might use it, + // so VFE is safe. + if (auto GO = dyn_cast(&GV)) { + GlobalObject::VCallVisibility TypeVis = GO->getVCallVisibility(); + if (TypeVis == GlobalObject::VCallVisibilityTranslationUnit || + (LTOPostLink && + TypeVis == GlobalObject::VCallVisibilityLinkageUnit)) { + LLVM_DEBUG(dbgs() << GV.getName() << " is safe for VFE\n"); + VFESafeVTables.insert(&GV); + } + } + } +} + +void GlobalDCEPass::ScanVTableLoad(Function *Caller, Metadata *TypeId, + uint64_t CallOffset) { + for (auto &VTableInfo : TypeIdMap[TypeId]) { + GlobalVariable *VTable = VTableInfo.first; + uint64_t VTableOffset = VTableInfo.second; + + Constant *Ptr = + getPointerAtOffset(VTable->getInitializer(), VTableOffset + CallOffset, + *Caller->getParent()); + if (!Ptr) { + LLVM_DEBUG(dbgs() << "can't find pointer in vtable!\n"); + VFESafeVTables.erase(VTable); + return; + } + + auto Callee = dyn_cast(Ptr->stripPointerCasts()); + if (!Callee) { + LLVM_DEBUG(dbgs() << "vtable entry is not function pointer!\n"); + VFESafeVTables.erase(VTable); + return; + } + + LLVM_DEBUG(dbgs() << "vfunc dep " << Caller->getName() << " -> " + << Callee->getName() << "\n"); + GVDependencies[Caller].insert(Callee); + } +} + +void GlobalDCEPass::ScanTypeCheckedLoadIntrinsics(Module &M) { + LLVM_DEBUG(dbgs() << "Scanning type.checked.load intrinsics\n"); + Function *TypeCheckedLoadFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load)); + + if (!TypeCheckedLoadFunc) + return; + + for (auto U : TypeCheckedLoadFunc->users()) { + auto CI = dyn_cast(U); + if (!CI) + continue; + + auto *Offset = dyn_cast(CI->getArgOperand(1)); + Value *TypeIdValue = CI->getArgOperand(2); + auto *TypeId = cast(TypeIdValue)->getMetadata(); + + if (Offset) { + ScanVTableLoad(CI->getFunction(), TypeId, Offset->getZExtValue()); + } else { + // type.checked.load with a non-constant offset, so assume every entry in + // every matching vtable is used. + for (auto &VTableInfo : TypeIdMap[TypeId]) { + VFESafeVTables.erase(VTableInfo.first); + } + } + } +} + +void GlobalDCEPass::AddVirtualFunctionDependencies(Module &M) { + if (!ClEnableVFE) + return; + + ScanVTables(M); + + if (VFESafeVTables.empty()) + return; + + ScanTypeCheckedLoadIntrinsics(M); + + LLVM_DEBUG( + dbgs() << "VFE safe vtables:\n"; + for (auto *VTable : VFESafeVTables) + dbgs() << " " << VTable->getName() << "\n"; + ); +} + PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { bool Changed = false; @@ -163,6 +300,10 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { if (Comdat *C = GA.getComdat()) ComdatMembers.insert(std::make_pair(C, &GA)); + // Add dependencies between virtual call sites and the virtual functions they + // might call, if we have that information. + AddVirtualFunctionDependencies(M); + // Loop over the module, adding globals which are obviously necessary. for (GlobalObject &GO : M.global_objects()) { Changed |= RemoveUnusedGlobalValue(GO); @@ -257,8 +398,17 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { }; NumFunctions += DeadFunctions.size(); - for (Function *F : DeadFunctions) + for (Function *F : DeadFunctions) { + if (!F->use_empty()) { + // Virtual functions might still be referenced by one or more vtables, + // but if we've proven them to be unused then it's safe to replace the + // virtual function pointers with null, allowing us to remove the + // function itself. + ++NumVFuncs; + F->replaceNonMetadataUsesWith(ConstantPointerNull::get(F->getType())); + } EraseUnusedGlobalValue(F); + } NumVariables += DeadGlobalVars.size(); for (GlobalVariable *GV : DeadGlobalVars) @@ -277,6 +427,8 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { ConstantDependenciesCache.clear(); GVDependencies.clear(); ComdatMembers.clear(); + TypeIdMap.clear(); + VFESafeVTables.clear(); if (Changed) return PreservedAnalyses::none(); diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index c4fb3ce77f6..819715b9f8d 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -155,7 +155,8 @@ static bool isLeakCheckerRoot(GlobalVariable *GV) { /// Given a value that is stored to a global but never read, determine whether /// it's safe to remove the store and the chain of computation that feeds the /// store. -static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) { +static bool IsSafeComputationToRemove( + Value *V, function_ref GetTLI) { do { if (isa(V)) return true; @@ -164,7 +165,7 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) { if (isa(V) || isa(V) || isa(V) || isa(V)) return false; - if (isAllocationFn(V, TLI)) + if (isAllocationFn(V, GetTLI)) return true; Instruction *I = cast(V); @@ -184,8 +185,9 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) { /// This GV is a pointer root. Loop over all users of the global and clean up /// any that obviously don't assign the global a value that isn't dynamically /// allocated. -static bool CleanupPointerRootUsers(GlobalVariable *GV, - const TargetLibraryInfo *TLI) { +static bool +CleanupPointerRootUsers(GlobalVariable *GV, + function_ref GetTLI) { // A brief explanation of leak checkers. The goal is to find bugs where // pointers are forgotten, causing an accumulating growth in memory // usage over time. The common strategy for leak checkers is to whitelist the @@ -241,18 +243,18 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, C->destroyConstant(); // This could have invalidated UI, start over from scratch. Dead.clear(); - CleanupPointerRootUsers(GV, TLI); + CleanupPointerRootUsers(GV, GetTLI); return true; } } } for (int i = 0, e = Dead.size(); i != e; ++i) { - if (IsSafeComputationToRemove(Dead[i].first, TLI)) { + if (IsSafeComputationToRemove(Dead[i].first, GetTLI)) { Dead[i].second->eraseFromParent(); Instruction *I = Dead[i].first; do { - if (isAllocationFn(I, TLI)) + if (isAllocationFn(I, GetTLI)) break; Instruction *J = dyn_cast(I->getOperand(0)); if (!J) @@ -270,9 +272,9 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, /// We just marked GV constant. Loop over all users of the global, cleaning up /// the obvious ones. This is largely just a quick scan over the use list to /// clean up the easy and obvious cruft. This returns true if it made a change. -static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, - const DataLayout &DL, - TargetLibraryInfo *TLI) { +static bool CleanupConstantGlobalUsers( + Value *V, Constant *Init, const DataLayout &DL, + function_ref GetTLI) { bool Changed = false; // Note that we need to use a weak value handle for the worklist items. When // we delete a constant array, we may also be holding pointer to one of its @@ -302,12 +304,12 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, Constant *SubInit = nullptr; if (Init) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); - Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI); + Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, GetTLI); } else if ((CE->getOpcode() == Instruction::BitCast && CE->getType()->isPointerTy()) || CE->getOpcode() == Instruction::AddrSpaceCast) { // Pointer cast, delete any stores and memsets to the global. - Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI); + Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, GetTLI); } if (CE->use_empty()) { @@ -321,7 +323,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, Constant *SubInit = nullptr; if (!isa(GEP->getOperand(0))) { ConstantExpr *CE = dyn_cast_or_null( - ConstantFoldInstruction(GEP, DL, TLI)); + ConstantFoldInstruction(GEP, DL, &GetTLI(*GEP->getFunction()))); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); @@ -331,7 +333,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, if (Init && isa(Init) && GEP->isInBounds()) SubInit = Constant::getNullValue(GEP->getResultElementType()); } - Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, TLI); + Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, GetTLI); if (GEP->use_empty()) { GEP->eraseFromParent(); @@ -348,7 +350,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, // us, and if they are all dead, nuke them without remorse. if (isSafeToDestroyConstant(C)) { C->destroyConstant(); - CleanupConstantGlobalUsers(V, Init, DL, TLI); + CleanupConstantGlobalUsers(V, Init, DL, GetTLI); return true; } } @@ -495,8 +497,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { // had 256 byte alignment for example, something might depend on that: // propagate info to each field. uint64_t FieldOffset = Layout.getElementOffset(i); - unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset); - if (NewAlign > DL.getABITypeAlignment(STy->getElementType(i))) + Align NewAlign(MinAlign(StartAlignment, FieldOffset)); + if (NewAlign > Align(DL.getABITypeAlignment(STy->getElementType(i)))) NGV->setAlignment(NewAlign); // Copy over the debug info for the variable. @@ -511,7 +513,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { NewGlobals.reserve(NumElements); auto ElTy = STy->getElementType(); uint64_t EltSize = DL.getTypeAllocSize(ElTy); - unsigned EltAlign = DL.getABITypeAlignment(ElTy); + Align EltAlign(DL.getABITypeAlignment(ElTy)); uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy); for (unsigned i = 0, e = NumElements; i != e; ++i) { Constant *In = Init->getAggregateElement(i); @@ -530,7 +532,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { // Calculate the known alignment of the field. If the original aggregate // had 256 byte alignment for example, something might depend on that: // propagate info to each field. - unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i); + Align NewAlign(MinAlign(StartAlignment, EltSize * i)); if (NewAlign > EltAlign) NGV->setAlignment(NewAlign); transferSRADebugInfo(GV, NGV, FragmentSizeInBits * i, FragmentSizeInBits, @@ -745,9 +747,9 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { /// are uses of the loaded value that would trap if the loaded value is /// dynamically null, then we know that they cannot be reachable with a null /// optimize away the load. -static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, - const DataLayout &DL, - TargetLibraryInfo *TLI) { +static bool OptimizeAwayTrappingUsesOfLoads( + GlobalVariable *GV, Constant *LV, const DataLayout &DL, + function_ref GetTLI) { bool Changed = false; // Keep track of whether we are able to remove all the uses of the global @@ -793,10 +795,10 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, // nor is the global. if (AllNonStoreUsesGone) { if (isLeakCheckerRoot(GV)) { - Changed |= CleanupPointerRootUsers(GV, TLI); + Changed |= CleanupPointerRootUsers(GV, GetTLI); } else { Changed = true; - CleanupConstantGlobalUsers(GV, nullptr, DL, TLI); + CleanupConstantGlobalUsers(GV, nullptr, DL, GetTLI); } if (GV->use_empty()) { LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); @@ -889,8 +891,8 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, while (!GV->use_empty()) { if (StoreInst *SI = dyn_cast(GV->user_back())) { // The global is initialized when the store to it occurs. - new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0, - SI->getOrdering(), SI->getSyncScopeID(), SI); + new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, + None, SI->getOrdering(), SI->getSyncScopeID(), SI); SI->eraseFromParent(); continue; } @@ -907,7 +909,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, // Replace the cmp X, 0 with a use of the bool value. // Sink the load to where the compare was, if atomic rules allow us to. Value *LV = new LoadInst(InitBool->getValueType(), InitBool, - InitBool->getName() + ".val", false, 0, + InitBool->getName() + ".val", false, None, LI->getOrdering(), LI->getSyncScopeID(), LI->isUnordered() ? (Instruction *)ICI : LI); InitBoolUsed = true; @@ -1562,10 +1564,10 @@ static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, // Try to optimize globals based on the knowledge that only one value (besides // its initializer) is ever stored to the global. -static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, - AtomicOrdering Ordering, - const DataLayout &DL, - TargetLibraryInfo *TLI) { +static bool +optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, + AtomicOrdering Ordering, const DataLayout &DL, + function_ref GetTLI) { // Ignore no-op GEPs and bitcasts. StoredOnceVal = StoredOnceVal->stripPointerCasts(); @@ -1583,9 +1585,10 @@ static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); // Optimize away any trapping uses of the loaded value. - if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, TLI)) + if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI)) return true; - } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) { + } else if (CallInst *CI = extractMallocCall(StoredOnceVal, GetTLI)) { + auto *TLI = &GetTLI(*CI->getFunction()); Type *MallocType = getMallocAllocatedType(CI, TLI); if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, DL, TLI)) @@ -1643,10 +1646,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { // instead of a select to synthesize the desired value. bool IsOneZero = false; bool EmitOneOrZero = true; - if (ConstantInt *CI = dyn_cast(OtherVal)){ + auto *CI = dyn_cast(OtherVal); + if (CI && CI->getValue().getActiveBits() <= 64) { IsOneZero = InitVal->isNullValue() && CI->isOne(); - if (ConstantInt *CIInit = dyn_cast(GV->getInitializer())){ + auto *CIInit = dyn_cast(GV->getInitializer()); + if (CIInit && CIInit->getValue().getActiveBits() <= 64) { uint64_t ValInit = CIInit->getZExtValue(); uint64_t ValOther = CI->getZExtValue(); uint64_t ValMinus = ValOther - ValInit; @@ -1711,7 +1716,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { assert(LI->getOperand(0) == GV && "Not a copy!"); // Insert a new load, to preserve the saved value. StoreVal = new LoadInst(NewGV->getValueType(), NewGV, - LI->getName() + ".b", false, 0, + LI->getName() + ".b", false, None, LI->getOrdering(), LI->getSyncScopeID(), LI); } else { assert((isa(StoredVal) || isa(StoredVal)) && @@ -1721,15 +1726,15 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { } } StoreInst *NSI = - new StoreInst(StoreVal, NewGV, false, 0, SI->getOrdering(), + new StoreInst(StoreVal, NewGV, false, None, SI->getOrdering(), SI->getSyncScopeID(), SI); NSI->setDebugLoc(SI->getDebugLoc()); } else { // Change the load into a load of bool then a select. LoadInst *LI = cast(UI); - LoadInst *NLI = - new LoadInst(NewGV->getValueType(), NewGV, LI->getName() + ".b", - false, 0, LI->getOrdering(), LI->getSyncScopeID(), LI); + LoadInst *NLI = new LoadInst(NewGV->getValueType(), NewGV, + LI->getName() + ".b", false, None, + LI->getOrdering(), LI->getSyncScopeID(), LI); Instruction *NSI; if (IsOneZero) NSI = new ZExtInst(NLI, LI->getType(), "", LI); @@ -1914,9 +1919,10 @@ static void makeAllConstantUsesInstructions(Constant *C) { /// Analyze the specified global variable and optimize /// it if possible. If we make a change, return true. -static bool processInternalGlobal( - GlobalVariable *GV, const GlobalStatus &GS, TargetLibraryInfo *TLI, - function_ref LookupDomTree) { +static bool +processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, + function_ref GetTLI, + function_ref LookupDomTree) { auto &DL = GV->getParent()->getDataLayout(); // If this is a first class global and has only one accessing function and // this function is non-recursive, we replace the global with a local alloca @@ -1963,11 +1969,12 @@ static bool processInternalGlobal( bool Changed; if (isLeakCheckerRoot(GV)) { // Delete any constant stores to the global. - Changed = CleanupPointerRootUsers(GV, TLI); + Changed = CleanupPointerRootUsers(GV, GetTLI); } else { // Delete any stores we can find to the global. We may not be able to // make it completely dead though. - Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI); + Changed = + CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); } // If the global is dead now, delete it. @@ -1989,7 +1996,7 @@ static bool processInternalGlobal( GV->setConstant(true); // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI); + CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); // If the global is dead now, just nuke it. if (GV->use_empty()) { @@ -2019,7 +2026,7 @@ static bool processInternalGlobal( GV->setInitializer(SOVConstant); // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI); + CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); if (GV->use_empty()) { LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to " @@ -2033,7 +2040,8 @@ static bool processInternalGlobal( // Try to optimize globals based on the knowledge that only one value // (besides its initializer) is ever stored to the global. - if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, TLI)) + if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, + GetTLI)) return true; // Otherwise, if the global was not a boolean, we can shrink it to be a @@ -2054,7 +2062,8 @@ static bool processInternalGlobal( /// Analyze the specified global variable and optimize it if possible. If we /// make a change, return true. static bool -processGlobal(GlobalValue &GV, TargetLibraryInfo *TLI, +processGlobal(GlobalValue &GV, + function_ref GetTLI, function_ref LookupDomTree) { if (GV.getName().startswith("llvm.")) return false; @@ -2086,7 +2095,7 @@ processGlobal(GlobalValue &GV, TargetLibraryInfo *TLI, if (GVar->isConstant() || !GVar->hasInitializer()) return Changed; - return processInternalGlobal(GVar, GS, TLI, LookupDomTree) || Changed; + return processInternalGlobal(GVar, GS, GetTLI, LookupDomTree) || Changed; } /// Walk all of the direct calls of the specified function, changing them to @@ -2234,7 +2243,8 @@ hasOnlyColdCalls(Function &F, } static bool -OptimizeFunctions(Module &M, TargetLibraryInfo *TLI, +OptimizeFunctions(Module &M, + function_ref GetTLI, function_ref GetTTI, function_ref GetBFI, function_ref LookupDomTree, @@ -2275,17 +2285,13 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI, // So, remove unreachable blocks from the function, because a) there's // no point in analyzing them and b) GlobalOpt should otherwise grow // some more complicated logic to break these cycles. - // Removing unreachable blocks might invalidate the dominator so we - // recalculate it. if (!F->isDeclaration()) { - if (removeUnreachableBlocks(*F)) { - auto &DT = LookupDomTree(*F); - DT.recalculate(*F); - Changed = true; - } + auto &DT = LookupDomTree(*F); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + Changed |= removeUnreachableBlocks(*F, &DTU); } - Changed |= processGlobal(*F, TLI, LookupDomTree); + Changed |= processGlobal(*F, GetTLI, LookupDomTree); if (!F->hasLocalLinkage()) continue; @@ -2342,7 +2348,8 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI, } static bool -OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI, +OptimizeGlobalVars(Module &M, + function_ref GetTLI, function_ref LookupDomTree, SmallPtrSetImpl &NotDiscardableComdats) { bool Changed = false; @@ -2357,7 +2364,10 @@ OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI, if (GV->hasInitializer()) if (auto *C = dyn_cast(GV->getInitializer())) { auto &DL = M.getDataLayout(); - Constant *New = ConstantFoldConstant(C, DL, TLI); + // TLI is not used in the case of a Constant, so use default nullptr + // for that optional parameter, since we don't have a Function to + // provide GetTLI anyway. + Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr); if (New && New != C) GV->setInitializer(New); } @@ -2367,7 +2377,7 @@ OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI, continue; } - Changed |= processGlobal(*GV, TLI, LookupDomTree); + Changed |= processGlobal(*GV, GetTLI, LookupDomTree); } return Changed; } @@ -2581,8 +2591,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, } static int compareNames(Constant *const *A, Constant *const *B) { - Value *AStripped = (*A)->stripPointerCastsNoFollowAliases(); - Value *BStripped = (*B)->stripPointerCastsNoFollowAliases(); + Value *AStripped = (*A)->stripPointerCasts(); + Value *BStripped = (*B)->stripPointerCasts(); return AStripped->getName().compare(BStripped->getName()); } @@ -2809,7 +2819,14 @@ OptimizeGlobalAliases(Module &M, return Changed; } -static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { +static Function * +FindCXAAtExit(Module &M, function_ref GetTLI) { + // Hack to get a default TLI before we have actual Function. + auto FuncIter = M.begin(); + if (FuncIter == M.end()) + return nullptr; + auto *TLI = &GetTLI(*FuncIter); + LibFunc F = LibFunc_cxa_atexit; if (!TLI->has(F)) return nullptr; @@ -2818,6 +2835,9 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { if (!Fn) return nullptr; + // Now get the actual TLI for Fn. + TLI = &GetTLI(*Fn); + // Make sure that the function has the correct prototype. if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit) return nullptr; @@ -2889,7 +2909,8 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { } static bool optimizeGlobalsInModule( - Module &M, const DataLayout &DL, TargetLibraryInfo *TLI, + Module &M, const DataLayout &DL, + function_ref GetTLI, function_ref GetTTI, function_ref GetBFI, function_ref LookupDomTree) { @@ -2914,24 +2935,24 @@ static bool optimizeGlobalsInModule( NotDiscardableComdats.insert(C); // Delete functions that are trivially dead, ccc -> fastcc - LocalChange |= OptimizeFunctions(M, TLI, GetTTI, GetBFI, LookupDomTree, + LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree, NotDiscardableComdats); // Optimize global_ctors list. LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) { - return EvaluateStaticConstructor(F, DL, TLI); + return EvaluateStaticConstructor(F, DL, &GetTLI(*F)); }); // Optimize non-address-taken globals. - LocalChange |= OptimizeGlobalVars(M, TLI, LookupDomTree, - NotDiscardableComdats); + LocalChange |= + OptimizeGlobalVars(M, GetTLI, LookupDomTree, NotDiscardableComdats); // Resolve aliases, when possible. LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats); // Try to remove trivial global destructors if they are not removed // already. - Function *CXAAtExitFn = FindCXAAtExit(M, TLI); + Function *CXAAtExitFn = FindCXAAtExit(M, GetTLI); if (CXAAtExitFn) LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn); @@ -2946,12 +2967,14 @@ static bool optimizeGlobalsInModule( PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) { auto &DL = M.getDataLayout(); - auto &TLI = AM.getResult(M); auto &FAM = AM.getResult(M).getManager(); auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{ return FAM.getResult(F); }; + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { return FAM.getResult(F); }; @@ -2960,7 +2983,7 @@ PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) { return FAM.getResult(F); }; - if (!optimizeGlobalsInModule(M, DL, &TLI, GetTTI, GetBFI, LookupDomTree)) + if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } @@ -2979,10 +3002,12 @@ struct GlobalOptLegacyPass : public ModulePass { return false; auto &DL = M.getDataLayout(); - auto *TLI = &getAnalysis().getTLI(); auto LookupDomTree = [this](Function &F) -> DominatorTree & { return this->getAnalysis(F).getDomTree(); }; + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; auto GetTTI = [this](Function &F) -> TargetTransformInfo & { return this->getAnalysis().getTTI(F); }; @@ -2991,7 +3016,8 @@ struct GlobalOptLegacyPass : public ModulePass { return this->getAnalysis(F).getBFI(); }; - return optimizeGlobalsInModule(M, DL, TLI, GetTTI, GetBFI, LookupDomTree); + return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, + LookupDomTree); } void getAnalysisUsage(AnalysisUsage &AU) const override { diff --git a/lib/Transforms/IPO/HotColdSplitting.cpp b/lib/Transforms/IPO/HotColdSplitting.cpp index ab1a9a79cad..cfdcc8db7f5 100644 --- a/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/lib/Transforms/IPO/HotColdSplitting.cpp @@ -85,12 +85,6 @@ static cl::opt "multiple of TCC_Basic)")); namespace { - -/// A sequence of basic blocks. -/// -/// A 0-sized SmallVector is slightly cheaper to move than a std::vector. -using BlockSequence = SmallVector; - // Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify // this function unless you modify the MBB version as well. // @@ -169,31 +163,6 @@ static bool markFunctionCold(Function &F, bool UpdateEntryCount = false) { return Changed; } -class HotColdSplitting { -public: - HotColdSplitting(ProfileSummaryInfo *ProfSI, - function_ref GBFI, - function_ref GTTI, - std::function *GORE, - function_ref LAC) - : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE), LookupAC(LAC) {} - bool run(Module &M); - -private: - bool isFunctionCold(const Function &F) const; - bool shouldOutlineFrom(const Function &F) const; - bool outlineColdRegions(Function &F, bool HasProfileSummary); - Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT, - BlockFrequencyInfo *BFI, TargetTransformInfo &TTI, - OptimizationRemarkEmitter &ORE, - AssumptionCache *AC, unsigned Count); - ProfileSummaryInfo *PSI; - function_ref GetBFI; - function_ref GetTTI; - std::function *GetORE; - function_ref LookupAC; -}; - class HotColdSplittingLegacyPass : public ModulePass { public: static char ID; @@ -321,13 +290,10 @@ static int getOutliningPenalty(ArrayRef Region, return Penalty; } -Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region, - DominatorTree &DT, - BlockFrequencyInfo *BFI, - TargetTransformInfo &TTI, - OptimizationRemarkEmitter &ORE, - AssumptionCache *AC, - unsigned Count) { +Function *HotColdSplitting::extractColdRegion( + const BlockSequence &Region, const CodeExtractorAnalysisCache &CEAC, + DominatorTree &DT, BlockFrequencyInfo *BFI, TargetTransformInfo &TTI, + OptimizationRemarkEmitter &ORE, AssumptionCache *AC, unsigned Count) { assert(!Region.empty()); // TODO: Pass BFI and BPI to update profile information. @@ -349,7 +315,7 @@ Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region, return nullptr; Function *OrigF = Region[0]->getParent(); - if (Function *OutF = CE.extractCodeRegion()) { + if (Function *OutF = CE.extractCodeRegion(CEAC)) { User *U = *OutF->user_begin(); CallInst *CI = cast(U); CallSite CS(CI); @@ -607,9 +573,9 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) { }); if (!DT) - DT = make_unique(F); + DT = std::make_unique(F); if (!PDT) - PDT = make_unique(F); + PDT = std::make_unique(F); auto Regions = OutliningRegion::create(*BB, *DT, *PDT); for (OutliningRegion &Region : Regions) { @@ -637,9 +603,14 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) { } } + if (OutliningWorklist.empty()) + return Changed; + // Outline single-entry cold regions, splitting up larger regions as needed. unsigned OutlinedFunctionID = 1; - while (!OutliningWorklist.empty()) { + // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time. + CodeExtractorAnalysisCache CEAC(F); + do { OutliningRegion Region = OutliningWorklist.pop_back_val(); assert(!Region.empty() && "Empty outlining region in worklist"); do { @@ -650,14 +621,14 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) { BB->dump(); }); - Function *Outlined = extractColdRegion(SubRegion, *DT, BFI, TTI, ORE, AC, - OutlinedFunctionID); + Function *Outlined = extractColdRegion(SubRegion, CEAC, *DT, BFI, TTI, + ORE, AC, OutlinedFunctionID); if (Outlined) { ++OutlinedFunctionID; Changed = true; } } while (!Region.empty()); - } + } while (!OutliningWorklist.empty()); return Changed; } diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index 34db75dd8b0..bddf7521159 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -114,6 +114,10 @@ void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createIPSCCPPass()); } +void LLVMAddMergeFunctionsPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createMergeFunctionsPass()); +} + void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) { auto PreserveMain = [=](const GlobalValue &GV) { return AllButMain && GV.getName() == "main"; @@ -121,6 +125,15 @@ void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) { unwrap(PM)->add(createInternalizePass(PreserveMain)); } +void LLVMAddInternalizePassWithMustPreservePredicate( + LLVMPassManagerRef PM, + void *Context, + LLVMBool (*Pred)(LLVMValueRef, void *)) { + unwrap(PM)->add(createInternalizePass([=](const GlobalValue &GV) { + return Pred(wrap(&GV), Context) == 0 ? false : true; + })); +} + void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createStripDeadPrototypesPass()); } diff --git a/lib/Transforms/IPO/InferFunctionAttrs.cpp b/lib/Transforms/IPO/InferFunctionAttrs.cpp index 7f5511e008e..d1a68b28bd3 100644 --- a/lib/Transforms/IPO/InferFunctionAttrs.cpp +++ b/lib/Transforms/IPO/InferFunctionAttrs.cpp @@ -18,24 +18,28 @@ using namespace llvm; #define DEBUG_TYPE "inferattrs" -static bool inferAllPrototypeAttributes(Module &M, - const TargetLibraryInfo &TLI) { +static bool inferAllPrototypeAttributes( + Module &M, function_ref GetTLI) { bool Changed = false; for (Function &F : M.functions()) // We only infer things using the prototype and the name; we don't need // definitions. if (F.isDeclaration() && !F.hasOptNone()) - Changed |= inferLibFuncAttributes(F, TLI); + Changed |= inferLibFuncAttributes(F, GetTLI(F)); return Changed; } PreservedAnalyses InferFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { - auto &TLI = AM.getResult(M); + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; - if (!inferAllPrototypeAttributes(M, TLI)) + if (!inferAllPrototypeAttributes(M, GetTLI)) // If we didn't infer anything, preserve all analyses. return PreservedAnalyses::all(); @@ -60,8 +64,10 @@ struct InferFunctionAttrsLegacyPass : public ModulePass { if (skipModule(M)) return false; - auto &TLI = getAnalysis().getTLI(); - return inferAllPrototypeAttributes(M, TLI); + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; + return inferAllPrototypeAttributes(M, GetTLI); } }; } diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 945f8affae6..4b72261131c 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -239,7 +239,7 @@ static void mergeInlinedArrayAllocas( } if (Align1 > Align2) - AvailableAlloca->setAlignment(AI->getAlignment()); + AvailableAlloca->setAlignment(MaybeAlign(AI->getAlignment())); } AI->eraseFromParent(); @@ -527,7 +527,8 @@ static void setInlineRemark(CallSite &CS, StringRef message) { static bool inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, std::function GetAssumptionCache, - ProfileSummaryInfo *PSI, TargetLibraryInfo &TLI, + ProfileSummaryInfo *PSI, + std::function GetTLI, bool InsertLifetime, function_ref GetInlineCost, function_ref AARGetter, @@ -626,7 +627,8 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, Instruction *Instr = CS.getInstruction(); - bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI); + bool IsTriviallyDead = + isInstructionTriviallyDead(Instr, &GetTLI(*Caller)); int InlineHistoryID; if (!IsTriviallyDead) { @@ -757,13 +759,16 @@ bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis().getCallGraph(); ACT = &getAnalysis(); PSI = &getAnalysis().getPSI(); - auto &TLI = getAnalysis().getTLI(); + auto GetTLI = [&](Function &F) -> TargetLibraryInfo & { + return getAnalysis().getTLI(F); + }; auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }; - return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime, - [this](CallSite CS) { return getInlineCost(CS); }, - LegacyAARGetter(*this), ImportedFunctionsStats); + return inlineCallsImpl( + SCC, CG, GetAssumptionCache, PSI, GetTLI, InsertLifetime, + [this](CallSite CS) { return getInlineCost(CS); }, LegacyAARGetter(*this), + ImportedFunctionsStats); } /// Remove now-dead linkonce functions at the end of @@ -879,7 +884,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, if (!ImportedFunctionsStats && InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) { ImportedFunctionsStats = - llvm::make_unique(); + std::make_unique(); ImportedFunctionsStats->setModuleInfo(M); } diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index 91c7b5f5f13..add2ae05373 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -141,10 +141,12 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { if (NumLoops == 0) return Changed; --NumLoops; AssumptionCache *AC = nullptr; + Function &Func = *L->getHeader()->getParent(); if (auto *ACT = getAnalysisIfAvailable()) - AC = ACT->lookupAssumptionCache(*L->getHeader()->getParent()); + AC = ACT->lookupAssumptionCache(Func); + CodeExtractorAnalysisCache CEAC(Func); CodeExtractor Extractor(DT, *L, false, nullptr, nullptr, AC); - if (Extractor.extractCodeRegion() != nullptr) { + if (Extractor.extractCodeRegion(CEAC) != nullptr) { Changed = true; // After extraction, the loop is replaced by a function call, so // we shouldn't try to run any more loop passes on it. diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp index f7371284f47..2dec366d70e 100644 --- a/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/lib/Transforms/IPO/LowerTypeTests.cpp @@ -230,6 +230,16 @@ void ByteArrayBuilder::allocate(const std::set &Bits, Bytes[AllocByteOffset + B] |= AllocMask; } +bool lowertypetests::isJumpTableCanonical(Function *F) { + if (F->isDeclarationForLinker()) + return false; + auto *CI = mdconst::extract_or_null( + F->getParent()->getModuleFlag("CFI Canonical Jump Tables")); + if (!CI || CI->getZExtValue() != 0) + return true; + return F->hasFnAttribute("cfi-canonical-jump-table"); +} + namespace { struct ByteArrayInfo { @@ -251,9 +261,12 @@ class GlobalTypeMember final : TrailingObjects { GlobalObject *GO; size_t NTypes; - // For functions: true if this is a definition (either in the merged module or - // in one of the thinlto modules). - bool IsDefinition; + // For functions: true if the jump table is canonical. This essentially means + // whether the canonical address (i.e. the symbol table entry) of the function + // is provided by the local jump table. This is normally the same as whether + // the function is defined locally, but if canonical jump tables are disabled + // by the user then the jump table never provides a canonical definition. + bool IsJumpTableCanonical; // For functions: true if this function is either defined or used in a thinlto // module and its jumptable entry needs to be exported to thinlto backends. @@ -263,13 +276,13 @@ class GlobalTypeMember final : TrailingObjects { public: static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO, - bool IsDefinition, bool IsExported, + bool IsJumpTableCanonical, bool IsExported, ArrayRef Types) { auto *GTM = static_cast(Alloc.Allocate( totalSizeToAlloc(Types.size()), alignof(GlobalTypeMember))); GTM->GO = GO; GTM->NTypes = Types.size(); - GTM->IsDefinition = IsDefinition; + GTM->IsJumpTableCanonical = IsJumpTableCanonical; GTM->IsExported = IsExported; std::uninitialized_copy(Types.begin(), Types.end(), GTM->getTrailingObjects()); @@ -280,8 +293,8 @@ public: return GO; } - bool isDefinition() const { - return IsDefinition; + bool isJumpTableCanonical() const { + return IsJumpTableCanonical; } bool isExported() const { @@ -320,6 +333,49 @@ private: size_t NTargets; }; +struct ScopedSaveAliaseesAndUsed { + Module &M; + SmallPtrSet Used, CompilerUsed; + std::vector> FunctionAliases; + + ScopedSaveAliaseesAndUsed(Module &M) : M(M) { + // The users of this class want to replace all function references except + // for aliases and llvm.used/llvm.compiler.used with references to a jump + // table. We avoid replacing aliases in order to avoid introducing a double + // indirection (or an alias pointing to a declaration in ThinLTO mode), and + // we avoid replacing llvm.used/llvm.compiler.used because these global + // variables describe properties of the global, not the jump table (besides, + // offseted references to the jump table in llvm.used are invalid). + // Unfortunately, LLVM doesn't have a "RAUW except for these (possibly + // indirect) users", so what we do is save the list of globals referenced by + // llvm.used/llvm.compiler.used and aliases, erase the used lists, let RAUW + // replace the aliasees and then set them back to their original values at + // the end. + if (GlobalVariable *GV = collectUsedGlobalVariables(M, Used, false)) + GV->eraseFromParent(); + if (GlobalVariable *GV = collectUsedGlobalVariables(M, CompilerUsed, true)) + GV->eraseFromParent(); + + for (auto &GIS : concat(M.aliases(), M.ifuncs())) { + // FIXME: This should look past all aliases not just interposable ones, + // see discussion on D65118. + if (auto *F = + dyn_cast(GIS.getIndirectSymbol()->stripPointerCasts())) + FunctionAliases.push_back({&GIS, F}); + } + } + + ~ScopedSaveAliaseesAndUsed() { + appendToUsed(M, std::vector(Used.begin(), Used.end())); + appendToCompilerUsed(M, std::vector(CompilerUsed.begin(), + CompilerUsed.end())); + + for (auto P : FunctionAliases) + P.first->setIndirectSymbol( + ConstantExpr::getBitCast(P.second, P.first->getType())); + } +}; + class LowerTypeTestsModule { Module &M; @@ -387,7 +443,8 @@ class LowerTypeTestsModule { uint8_t *exportTypeId(StringRef TypeId, const TypeIdLowering &TIL); TypeIdLowering importTypeId(StringRef TypeId); void importTypeTest(CallInst *CI); - void importFunction(Function *F, bool isDefinition); + void importFunction(Function *F, bool isJumpTableCanonical, + std::vector &AliasesToErase); BitSetInfo buildBitSet(Metadata *TypeId, @@ -421,7 +478,8 @@ class LowerTypeTestsModule { ArrayRef Globals, ArrayRef ICallBranchFunnels); - void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT, bool IsDefinition); + void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT, + bool IsJumpTableCanonical); void moveInitializerToModuleConstructor(GlobalVariable *GV); void findGlobalVariableUsersOf(Constant *C, SmallSetVector &Out); @@ -433,7 +491,7 @@ class LowerTypeTestsModule { /// the block. 'This's use list is expected to have at least one element. /// Unlike replaceAllUsesWith this function skips blockaddr and direct call /// uses. - void replaceCfiUses(Function *Old, Value *New, bool IsDefinition); + void replaceCfiUses(Function *Old, Value *New, bool IsJumpTableCanonical); /// replaceDirectCalls - Go through the uses list for this definition and /// replace each use, which is a direct function call. @@ -759,43 +817,50 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables( // Build a new global with the combined contents of the referenced globals. // This global is a struct whose even-indexed elements contain the original // contents of the referenced globals and whose odd-indexed elements contain - // any padding required to align the next element to the next power of 2. + // any padding required to align the next element to the next power of 2 plus + // any additional padding required to meet its alignment requirements. std::vector GlobalInits; const DataLayout &DL = M.getDataLayout(); + DenseMap GlobalLayout; + Align MaxAlign; + uint64_t CurOffset = 0; + uint64_t DesiredPadding = 0; for (GlobalTypeMember *G : Globals) { - GlobalVariable *GV = cast(G->getGlobal()); + auto *GV = cast(G->getGlobal()); + MaybeAlign Alignment(GV->getAlignment()); + if (!Alignment) + Alignment = Align(DL.getABITypeAlignment(GV->getValueType())); + MaxAlign = std::max(MaxAlign, *Alignment); + uint64_t GVOffset = alignTo(CurOffset + DesiredPadding, *Alignment); + GlobalLayout[G] = GVOffset; + if (GVOffset != 0) { + uint64_t Padding = GVOffset - CurOffset; + GlobalInits.push_back( + ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding))); + } + GlobalInits.push_back(GV->getInitializer()); uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType()); + CurOffset = GVOffset + InitSize; - // Compute the amount of padding required. - uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize; + // Compute the amount of padding that we'd like for the next element. + DesiredPadding = NextPowerOf2(InitSize - 1) - InitSize; // Experiments of different caps with Chromium on both x64 and ARM64 // have shown that the 32-byte cap generates the smallest binary on // both platforms while different caps yield similar performance. // (see https://lists.llvm.org/pipermail/llvm-dev/2018-July/124694.html) - if (Padding > 32) - Padding = alignTo(InitSize, 32) - InitSize; - - GlobalInits.push_back( - ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding))); + if (DesiredPadding > 32) + DesiredPadding = alignTo(InitSize, 32) - InitSize; } - if (!GlobalInits.empty()) - GlobalInits.pop_back(); + Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits); auto *CombinedGlobal = new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true, GlobalValue::PrivateLinkage, NewInit); + CombinedGlobal->setAlignment(MaxAlign); StructType *NewTy = cast(NewInit->getType()); - const StructLayout *CombinedGlobalLayout = DL.getStructLayout(NewTy); - - // Compute the offsets of the original globals within the new global. - DenseMap GlobalLayout; - for (unsigned I = 0; I != Globals.size(); ++I) - // Multiply by 2 to account for padding elements. - GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2); - lowerTypeTestCalls(TypeIds, CombinedGlobal, GlobalLayout); // Build aliases pointing to offsets into the combined global for each @@ -975,14 +1040,16 @@ void LowerTypeTestsModule::importTypeTest(CallInst *CI) { } // ThinLTO backend: the function F has a jump table entry; update this module -// accordingly. isDefinition describes the type of the jump table entry. -void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) { +// accordingly. isJumpTableCanonical describes the type of the jump table entry. +void LowerTypeTestsModule::importFunction( + Function *F, bool isJumpTableCanonical, + std::vector &AliasesToErase) { assert(F->getType()->getAddressSpace() == 0); GlobalValue::VisibilityTypes Visibility = F->getVisibility(); std::string Name = F->getName(); - if (F->isDeclarationForLinker() && isDefinition) { + if (F->isDeclarationForLinker() && isJumpTableCanonical) { // Non-dso_local functions may be overriden at run time, // don't short curcuit them if (F->isDSOLocal()) { @@ -997,12 +1064,13 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) { } Function *FDecl; - if (F->isDeclarationForLinker() && !isDefinition) { - // Declaration of an external function. + if (!isJumpTableCanonical) { + // Either a declaration of an external function or a reference to a locally + // defined jump table. FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, F->getAddressSpace(), Name + ".cfi_jt", &M); FDecl->setVisibility(GlobalValue::HiddenVisibility); - } else if (isDefinition) { + } else { F->setName(Name + ".cfi"); F->setLinkage(GlobalValue::ExternalLinkage); FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, @@ -1011,8 +1079,8 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) { Visibility = GlobalValue::HiddenVisibility; // Delete aliases pointing to this function, they'll be re-created in the - // merged output - SmallVector ToErase; + // merged output. Don't do it yet though because ScopedSaveAliaseesAndUsed + // will want to reset the aliasees first. for (auto &U : F->uses()) { if (auto *A = dyn_cast(U.getUser())) { Function *AliasDecl = Function::Create( @@ -1020,24 +1088,15 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) { F->getAddressSpace(), "", &M); AliasDecl->takeName(A); A->replaceAllUsesWith(AliasDecl); - ToErase.push_back(A); + AliasesToErase.push_back(A); } } - for (auto *A : ToErase) - A->eraseFromParent(); - } else { - // Function definition without type metadata, where some other translation - // unit contained a declaration with type metadata. This normally happens - // during mixed CFI + non-CFI compilation. We do nothing with the function - // so that it is treated the same way as a function defined outside of the - // LTO unit. - return; } - if (F->isWeakForLinker()) - replaceWeakDeclarationWithJumpTablePtr(F, FDecl, isDefinition); + if (F->hasExternalWeakLinkage()) + replaceWeakDeclarationWithJumpTablePtr(F, FDecl, isJumpTableCanonical); else - replaceCfiUses(F, FDecl, isDefinition); + replaceCfiUses(F, FDecl, isJumpTableCanonical); // Set visibility late because it's used in replaceCfiUses() to determine // whether uses need to to be replaced. @@ -1225,7 +1284,7 @@ void LowerTypeTestsModule::findGlobalVariableUsersOf( // Replace all uses of F with (F ? JT : 0). void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr( - Function *F, Constant *JT, bool IsDefinition) { + Function *F, Constant *JT, bool IsJumpTableCanonical) { // The target expression can not appear in a constant initializer on most // (all?) targets. Switch to a runtime initializer. SmallSetVector GlobalVarUsers; @@ -1239,7 +1298,7 @@ void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr( Function::Create(cast(F->getValueType()), GlobalValue::ExternalWeakLinkage, F->getAddressSpace(), "", &M); - replaceCfiUses(F, PlaceholderFn, IsDefinition); + replaceCfiUses(F, PlaceholderFn, IsJumpTableCanonical); Constant *Target = ConstantExpr::getSelect( ConstantExpr::getICmp(CmpInst::ICMP_NE, F, @@ -1276,8 +1335,9 @@ selectJumpTableArmEncoding(ArrayRef Functions, unsigned ArmCount = 0, ThumbCount = 0; for (const auto GTM : Functions) { - if (!GTM->isDefinition()) { + if (!GTM->isJumpTableCanonical()) { // PLT stubs are always ARM. + // FIXME: This is the wrong heuristic for non-canonical jump tables. ++ArmCount; continue; } @@ -1303,7 +1363,7 @@ void LowerTypeTestsModule::createJumpTable( cast(Functions[I]->getGlobal())); // Align the whole table by entry size. - F->setAlignment(getJumpTableEntrySize()); + F->setAlignment(Align(getJumpTableEntrySize())); // Skip prologue. // Disabled on win32 due to https://llvm.org/bugs/show_bug.cgi?id=28641#c3. // Luckily, this function does not get any prologue even without the @@ -1438,46 +1498,52 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( lowerTypeTestCalls(TypeIds, JumpTable, GlobalLayout); - // Build aliases pointing to offsets into the jump table, and replace - // references to the original functions with references to the aliases. - for (unsigned I = 0; I != Functions.size(); ++I) { - Function *F = cast(Functions[I]->getGlobal()); - bool IsDefinition = Functions[I]->isDefinition(); + { + ScopedSaveAliaseesAndUsed S(M); - Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast( - ConstantExpr::getInBoundsGetElementPtr( - JumpTableType, JumpTable, - ArrayRef{ConstantInt::get(IntPtrTy, 0), - ConstantInt::get(IntPtrTy, I)}), - F->getType()); - if (Functions[I]->isExported()) { - if (IsDefinition) { - ExportSummary->cfiFunctionDefs().insert(F->getName()); - } else { - GlobalAlias *JtAlias = GlobalAlias::create( - F->getValueType(), 0, GlobalValue::ExternalLinkage, - F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M); - JtAlias->setVisibility(GlobalValue::HiddenVisibility); - ExportSummary->cfiFunctionDecls().insert(F->getName()); + // Build aliases pointing to offsets into the jump table, and replace + // references to the original functions with references to the aliases. + for (unsigned I = 0; I != Functions.size(); ++I) { + Function *F = cast(Functions[I]->getGlobal()); + bool IsJumpTableCanonical = Functions[I]->isJumpTableCanonical(); + + Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast( + ConstantExpr::getInBoundsGetElementPtr( + JumpTableType, JumpTable, + ArrayRef{ConstantInt::get(IntPtrTy, 0), + ConstantInt::get(IntPtrTy, I)}), + F->getType()); + if (Functions[I]->isExported()) { + if (IsJumpTableCanonical) { + ExportSummary->cfiFunctionDefs().insert(F->getName()); + } else { + GlobalAlias *JtAlias = GlobalAlias::create( + F->getValueType(), 0, GlobalValue::ExternalLinkage, + F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M); + JtAlias->setVisibility(GlobalValue::HiddenVisibility); + ExportSummary->cfiFunctionDecls().insert(F->getName()); + } } - } - if (!IsDefinition) { - if (F->isWeakForLinker()) - replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr, IsDefinition); - else - replaceCfiUses(F, CombinedGlobalElemPtr, IsDefinition); - } else { - assert(F->getType()->getAddressSpace() == 0); + if (!IsJumpTableCanonical) { + if (F->hasExternalWeakLinkage()) + replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr, + IsJumpTableCanonical); + else + replaceCfiUses(F, CombinedGlobalElemPtr, IsJumpTableCanonical); + } else { + assert(F->getType()->getAddressSpace() == 0); - GlobalAlias *FAlias = GlobalAlias::create( - F->getValueType(), 0, F->getLinkage(), "", CombinedGlobalElemPtr, &M); - FAlias->setVisibility(F->getVisibility()); - FAlias->takeName(F); - if (FAlias->hasName()) - F->setName(FAlias->getName() + ".cfi"); - replaceCfiUses(F, FAlias, IsDefinition); - if (!F->hasLocalLinkage()) - F->setVisibility(GlobalVariable::HiddenVisibility); + GlobalAlias *FAlias = + GlobalAlias::create(F->getValueType(), 0, F->getLinkage(), "", + CombinedGlobalElemPtr, &M); + FAlias->setVisibility(F->getVisibility()); + FAlias->takeName(F); + if (FAlias->hasName()) + F->setName(FAlias->getName() + ".cfi"); + replaceCfiUses(F, FAlias, IsJumpTableCanonical); + if (!F->hasLocalLinkage()) + F->setVisibility(GlobalVariable::HiddenVisibility); + } } } @@ -1623,7 +1689,7 @@ bool LowerTypeTestsModule::runForTesting(Module &M) { ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary + ": "); std::error_code EC; - raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text); + raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_Text); ExitOnErr(errorCodeToError(EC)); yaml::Output Out(OS); @@ -1643,7 +1709,8 @@ static bool isDirectCall(Use& U) { return false; } -void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, bool IsDefinition) { +void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, + bool IsJumpTableCanonical) { SmallSetVector Constants; auto UI = Old->use_begin(), E = Old->use_end(); for (; UI != E;) { @@ -1655,7 +1722,7 @@ void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, bool IsDefi continue; // Skip direct calls to externally defined or non-dso_local functions - if (isDirectCall(U) && (Old->isDSOLocal() || !IsDefinition)) + if (isDirectCall(U) && (Old->isDSOLocal() || !IsJumpTableCanonical)) continue; // Must handle Constants specially, we cannot call replaceUsesOfWith on a @@ -1678,16 +1745,7 @@ void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, bool IsDefi } void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) { - auto UI = Old->use_begin(), E = Old->use_end(); - for (; UI != E;) { - Use &U = *UI; - ++UI; - - if (!isDirectCall(U)) - continue; - - U.set(New); - } + Old->replaceUsesWithIf(New, [](Use &U) { return isDirectCall(U); }); } bool LowerTypeTestsModule::lower() { @@ -1734,10 +1792,16 @@ bool LowerTypeTestsModule::lower() { Decls.push_back(&F); } - for (auto F : Defs) - importFunction(F, /*isDefinition*/ true); - for (auto F : Decls) - importFunction(F, /*isDefinition*/ false); + std::vector AliasesToErase; + { + ScopedSaveAliaseesAndUsed S(M); + for (auto F : Defs) + importFunction(F, /*isJumpTableCanonical*/ true, AliasesToErase); + for (auto F : Decls) + importFunction(F, /*isJumpTableCanonical*/ false, AliasesToErase); + } + for (GlobalAlias *GA : AliasesToErase) + GA->eraseFromParent(); return true; } @@ -1823,6 +1887,17 @@ bool LowerTypeTestsModule::lower() { CfiFunctionLinkage Linkage = P.second.Linkage; MDNode *FuncMD = P.second.FuncMD; Function *F = M.getFunction(FunctionName); + if (F && F->hasLocalLinkage()) { + // Locally defined function that happens to have the same name as a + // function defined in a ThinLTO module. Rename it to move it out of + // the way of the external reference that we're about to create. + // Note that setName will find a unique name for the function, so even + // if there is an existing function with the suffix there won't be a + // name collision. + F->setName(F->getName() + ".1"); + F = nullptr; + } + if (!F) F = Function::Create( FunctionType::get(Type::getVoidTy(M.getContext()), false), @@ -1871,24 +1946,26 @@ bool LowerTypeTestsModule::lower() { Types.clear(); GO.getMetadata(LLVMContext::MD_type, Types); - bool IsDefinition = !GO.isDeclarationForLinker(); + bool IsJumpTableCanonical = false; bool IsExported = false; if (Function *F = dyn_cast(&GO)) { + IsJumpTableCanonical = isJumpTableCanonical(F); if (ExportedFunctions.count(F->getName())) { - IsDefinition |= ExportedFunctions[F->getName()].Linkage == CFL_Definition; + IsJumpTableCanonical |= + ExportedFunctions[F->getName()].Linkage == CFL_Definition; IsExported = true; // TODO: The logic here checks only that the function is address taken, // not that the address takers are live. This can be updated to check // their liveness and emit fewer jumptable entries once monolithic LTO // builds also emit summaries. } else if (!F->hasAddressTaken()) { - if (!CrossDsoCfi || !IsDefinition || F->hasLocalLinkage()) + if (!CrossDsoCfi || !IsJumpTableCanonical || F->hasLocalLinkage()) continue; } } - auto *GTM = - GlobalTypeMember::create(Alloc, &GO, IsDefinition, IsExported, Types); + auto *GTM = GlobalTypeMember::create(Alloc, &GO, IsJumpTableCanonical, + IsExported, Types); GlobalTypeMembers[&GO] = GTM; for (MDNode *Type : Types) { verifyTypeMDNode(&GO, Type); diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 3a08069dcd4..8b9abaddc84 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -769,7 +769,7 @@ void MergeFunctions::writeAlias(Function *F, Function *G) { PtrType->getElementType(), PtrType->getAddressSpace(), G->getLinkage(), "", BitcastF, G->getParent()); - F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + F->setAlignment(MaybeAlign(std::max(F->getAlignment(), G->getAlignment()))); GA->takeName(G); GA->setVisibility(G->getVisibility()); GA->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); @@ -816,7 +816,7 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { removeUsers(F); F->replaceAllUsesWith(NewF); - unsigned MaxAlignment = std::max(G->getAlignment(), NewF->getAlignment()); + MaybeAlign MaxAlignment(std::max(G->getAlignment(), NewF->getAlignment())); writeThunkOrAlias(F, G); writeThunkOrAlias(F, NewF); diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 733782e8764..e193074884a 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -409,7 +409,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F, return std::unique_ptr(); std::unique_ptr OutliningInfo = - llvm::make_unique(); + std::make_unique(); auto IsSingleEntry = [](SmallVectorImpl &BlockList) { BasicBlock *Dom = BlockList.front(); @@ -589,7 +589,7 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) { }; std::unique_ptr OutliningInfo = - llvm::make_unique(); + std::make_unique(); BasicBlock *CurrEntry = EntryBlock; bool CandidateFound = false; @@ -966,7 +966,7 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner( Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE, function_ref LookupAC) : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) { - ClonedOI = llvm::make_unique(); + ClonedOI = std::make_unique(); // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; @@ -991,7 +991,7 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner( OptimizationRemarkEmitter &ORE, function_ref LookupAC) : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) { - ClonedOMRI = llvm::make_unique(); + ClonedOMRI = std::make_unique(); // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; @@ -1122,6 +1122,9 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() { BranchProbabilityInfo BPI(*ClonedFunc, LI); ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI)); + // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time. + CodeExtractorAnalysisCache CEAC(*ClonedFunc); + SetVector Inputs, Outputs, Sinks; for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo : ClonedOMRI->ORI) { @@ -1148,7 +1151,7 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() { if (Outputs.size() > 0 && !ForceLiveExit) continue; - Function *OutlinedFunc = CE.extractCodeRegion(); + Function *OutlinedFunc = CE.extractCodeRegion(CEAC); if (OutlinedFunc) { CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc); @@ -1210,11 +1213,12 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() { } // Extract the body of the if. + CodeExtractorAnalysisCache CEAC(*ClonedFunc); Function *OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc), /* AllowVarargs */ true) - .extractCodeRegion(); + .extractCodeRegion(CEAC); if (OutlinedFunc) { BasicBlock *OutliningCallBB = @@ -1264,7 +1268,7 @@ std::pair PartialInlinerImpl::unswitchFunction(Function *F) { if (PSI->isFunctionEntryCold(F)) return {false, nullptr}; - if (empty(F->users())) + if (F->users().empty()) return {false, nullptr}; OptimizationRemarkEmitter ORE(F); @@ -1370,7 +1374,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { return false; } - assert(empty(Cloner.OrigFunc->users()) && + assert(Cloner.OrigFunc->users().empty() && "F's users should all be replaced!"); std::vector Users(Cloner.ClonedFunc->user_begin(), diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 3ea77f08fd3..5314a8219b1 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -654,6 +654,7 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createGlobalsAAWrapperPass()); MPM.add(createFloat2IntPass()); + MPM.add(createLowerConstantIntrinsicsPass()); addExtensionsToPM(EP_VectorizerStart, MPM); diff --git a/lib/Transforms/IPO/SCCP.cpp b/lib/Transforms/IPO/SCCP.cpp index 7be3608bd2e..307690729b1 100644 --- a/lib/Transforms/IPO/SCCP.cpp +++ b/lib/Transforms/IPO/SCCP.cpp @@ -9,16 +9,18 @@ using namespace llvm; PreservedAnalyses IPSCCPPass::run(Module &M, ModuleAnalysisManager &AM) { const DataLayout &DL = M.getDataLayout(); - auto &TLI = AM.getResult(M); auto &FAM = AM.getResult(M).getManager(); + auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & { + return FAM.getResult(F); + }; auto getAnalysis = [&FAM](Function &F) -> AnalysisResultsForFn { DominatorTree &DT = FAM.getResult(F); return { - make_unique(F, DT, FAM.getResult(F)), + std::make_unique(F, DT, FAM.getResult(F)), &DT, FAM.getCachedResult(F)}; }; - if (!runIPSCCP(M, DL, &TLI, getAnalysis)) + if (!runIPSCCP(M, DL, GetTLI, getAnalysis)) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -47,14 +49,14 @@ public: if (skipModule(M)) return false; const DataLayout &DL = M.getDataLayout(); - const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); - + auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; auto getAnalysis = [this](Function &F) -> AnalysisResultsForFn { DominatorTree &DT = this->getAnalysis(F).getDomTree(); return { - make_unique( + std::make_unique( F, DT, this->getAnalysis().getAssumptionCache( F)), @@ -62,7 +64,7 @@ public: nullptr}; // manager, so set them to nullptr. }; - return runIPSCCP(M, DL, TLI, getAnalysis); + return runIPSCCP(M, DL, GetTLI, getAnalysis); } void getAnalysisUsage(AnalysisUsage &AU) const override { diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index 877d20e72ff..6184681db8a 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -72,6 +72,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/MisExpect.h" #include #include #include @@ -79,6 +80,7 @@ #include #include #include +#include #include #include #include @@ -128,6 +130,12 @@ static cl::opt ProfileSampleAccurate( "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. ")); +static cl::opt ProfileAccurateForSymsInList( + "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore, + cl::init(true), + cl::desc("For symbols in profile symbol list, regard their profiles to " + "be accurate. It may be overriden by profile-sample-accurate. ")); + namespace { using BlockWeightMap = DenseMap; @@ -137,9 +145,11 @@ using EdgeWeightMap = DenseMap; using BlockEdgeMap = DenseMap>; +class SampleProfileLoader; + class SampleCoverageTracker { public: - SampleCoverageTracker() = default; + SampleCoverageTracker(SampleProfileLoader &SPL) : SPLoader(SPL){}; bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset, uint32_t Discriminator, uint64_t Samples); @@ -185,6 +195,76 @@ private: /// keyed by FunctionSamples pointers, but these stats are cleared after /// every function, so we just need to keep a single counter. uint64_t TotalUsedSamples = 0; + + SampleProfileLoader &SPLoader; +}; + +class GUIDToFuncNameMapper { +public: + GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader, + DenseMap &GUIDToFuncNameMap) + : CurrentReader(Reader), CurrentModule(M), + CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) { + if (CurrentReader.getFormat() != SPF_Compact_Binary) + return; + + for (const auto &F : CurrentModule) { + StringRef OrigName = F.getName(); + CurrentGUIDToFuncNameMap.insert( + {Function::getGUID(OrigName), OrigName}); + + // Local to global var promotion used by optimization like thinlto + // will rename the var and add suffix like ".llvm.xxx" to the + // original local name. In sample profile, the suffixes of function + // names are all stripped. Since it is possible that the mapper is + // built in post-thin-link phase and var promotion has been done, + // we need to add the substring of function name without the suffix + // into the GUIDToFuncNameMap. + StringRef CanonName = FunctionSamples::getCanonicalFnName(F); + if (CanonName != OrigName) + CurrentGUIDToFuncNameMap.insert( + {Function::getGUID(CanonName), CanonName}); + } + + // Update GUIDToFuncNameMap for each function including inlinees. + SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap); + } + + ~GUIDToFuncNameMapper() { + if (CurrentReader.getFormat() != SPF_Compact_Binary) + return; + + CurrentGUIDToFuncNameMap.clear(); + + // Reset GUIDToFuncNameMap for of each function as they're no + // longer valid at this point. + SetGUIDToFuncNameMapForAll(nullptr); + } + +private: + void SetGUIDToFuncNameMapForAll(DenseMap *Map) { + std::queue FSToUpdate; + for (auto &IFS : CurrentReader.getProfiles()) { + FSToUpdate.push(&IFS.second); + } + + while (!FSToUpdate.empty()) { + FunctionSamples *FS = FSToUpdate.front(); + FSToUpdate.pop(); + FS->GUIDToFuncNameMap = Map; + for (const auto &ICS : FS->getCallsiteSamples()) { + const FunctionSamplesMap &FSMap = ICS.second; + for (auto &IFS : FSMap) { + FunctionSamples &FS = const_cast(IFS.second); + FSToUpdate.push(&FS); + } + } + } + } + + SampleProfileReader &CurrentReader; + Module &CurrentModule; + DenseMap &CurrentGUIDToFuncNameMap; }; /// Sample profile pass. @@ -199,8 +279,9 @@ public: std::function GetAssumptionCache, std::function GetTargetTransformInfo) : GetAC(std::move(GetAssumptionCache)), - GetTTI(std::move(GetTargetTransformInfo)), Filename(Name), - RemappingFilename(RemapName), IsThinLTOPreLink(IsThinLTOPreLink) {} + GetTTI(std::move(GetTargetTransformInfo)), CoverageTracker(*this), + Filename(Name), RemappingFilename(RemapName), + IsThinLTOPreLink(IsThinLTOPreLink) {} bool doInitialization(Module &M); bool runOnModule(Module &M, ModuleAnalysisManager *AM, @@ -209,6 +290,8 @@ public: void dump() { Reader->dump(); } protected: + friend class SampleCoverageTracker; + bool runOnFunction(Function &F, ModuleAnalysisManager *AM); unsigned getFunctionLoc(Function &F); bool emitAnnotations(Function &F); @@ -237,6 +320,8 @@ protected: bool propagateThroughEdges(Function &F, bool UpdateBlockCount); void computeDominanceAndLoopInfo(Function &F); void clearFunctionData(); + bool callsiteIsHot(const FunctionSamples *CallsiteFS, + ProfileSummaryInfo *PSI); /// Map basic blocks to their computed weights. /// @@ -310,6 +395,10 @@ protected: /// Profile Summary Info computed from sample profile. ProfileSummaryInfo *PSI = nullptr; + /// Profle Symbol list tells whether a function name appears in the binary + /// used to generate the current profile. + std::unique_ptr PSL; + /// Total number of samples collected in this profile. /// /// This is the sum of all the samples collected in all the functions executed @@ -326,6 +415,21 @@ protected: uint64_t entryCount; }; DenseMap notInlinedCallInfo; + + // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for + // all the function symbols defined or declared in current module. + DenseMap GUIDToFuncNameMap; + + // All the Names used in FunctionSamples including outline function + // names, inline instance names and call target names. + StringSet<> NamesInProfile; + + // For symbol in profile symbol list, whether to regard their profiles + // to be accurate. It is mainly decided by existance of profile symbol + // list and -profile-accurate-for-symsinlist flag, but it can be + // overriden by -profile-sample-accurate or profile-sample-accurate + // attribute. + bool ProfAccForSymsInList; }; class SampleProfileLoaderLegacyPass : public ModulePass { @@ -381,14 +485,23 @@ private: /// To decide whether an inlined callsite is hot, we compare the callsite /// sample count with the hot cutoff computed by ProfileSummaryInfo, it is /// regarded as hot if the count is above the cutoff value. -static bool callsiteIsHot(const FunctionSamples *CallsiteFS, - ProfileSummaryInfo *PSI) { +/// +/// When ProfileAccurateForSymsInList is enabled and profile symbol list +/// is present, functions in the profile symbol list but without profile will +/// be regarded as cold and much less inlining will happen in CGSCC inlining +/// pass, so we tend to lower the hot criteria here to allow more early +/// inlining to happen for warm callsites and it is helpful for performance. +bool SampleProfileLoader::callsiteIsHot(const FunctionSamples *CallsiteFS, + ProfileSummaryInfo *PSI) { if (!CallsiteFS) return false; // The callsite was not inlined in the original binary. assert(PSI && "PSI is expected to be non null"); uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples(); - return PSI->isHotCount(CallsiteTotalSamples); + if (ProfAccForSymsInList) + return !PSI->isColdCount(CallsiteTotalSamples); + else + return PSI->isHotCount(CallsiteTotalSamples); } /// Mark as used the sample record for the given function samples at @@ -425,7 +538,7 @@ SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS, for (const auto &I : FS->getCallsiteSamples()) for (const auto &J : I.second) { const FunctionSamples *CalleeSamples = &J.second; - if (callsiteIsHot(CalleeSamples, PSI)) + if (SPLoader.callsiteIsHot(CalleeSamples, PSI)) Count += countUsedRecords(CalleeSamples, PSI); } @@ -444,7 +557,7 @@ SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS, for (const auto &I : FS->getCallsiteSamples()) for (const auto &J : I.second) { const FunctionSamples *CalleeSamples = &J.second; - if (callsiteIsHot(CalleeSamples, PSI)) + if (SPLoader.callsiteIsHot(CalleeSamples, PSI)) Count += countBodyRecords(CalleeSamples, PSI); } @@ -465,7 +578,7 @@ SampleCoverageTracker::countBodySamples(const FunctionSamples *FS, for (const auto &I : FS->getCallsiteSamples()) for (const auto &J : I.second) { const FunctionSamples *CalleeSamples = &J.second; - if (callsiteIsHot(CalleeSamples, PSI)) + if (SPLoader.callsiteIsHot(CalleeSamples, PSI)) Total += countBodySamples(CalleeSamples, PSI); } @@ -788,6 +901,14 @@ bool SampleProfileLoader::inlineHotFunctions( Function &F, DenseSet &InlinedGUIDs) { DenseSet PromotedInsns; + // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure + // Profile symbol list is ignored when profile-sample-accurate is on. + assert((!ProfAccForSymsInList || + (!ProfileSampleAccurate && + !F.hasFnAttribute("profile-sample-accurate"))) && + "ProfAccForSymsInList should be false when profile-sample-accurate " + "is enabled"); + DenseMap localNotInlinedCallSites; bool Changed = false; while (true) { @@ -1219,17 +1340,12 @@ void SampleProfileLoader::buildEdges(Function &F) { } /// Returns the sorted CallTargetMap \p M by count in descending order. -static SmallVector SortCallTargets( - const SampleRecord::CallTargetMap &M) { +static SmallVector GetSortedValueDataFromCallTargets( + const SampleRecord::CallTargetMap & M) { SmallVector R; - for (auto I = M.begin(); I != M.end(); ++I) - R.push_back({FunctionSamples::getGUID(I->getKey()), I->getValue()}); - llvm::sort(R, [](const InstrProfValueData &L, const InstrProfValueData &R) { - if (L.Count == R.Count) - return L.Value > R.Value; - else - return L.Count > R.Count; - }); + for (const auto &I : SampleRecord::SortCallTargets(M)) { + R.emplace_back(InstrProfValueData{FunctionSamples::getGUID(I.first), I.second}); + } return R; } @@ -1324,7 +1440,7 @@ void SampleProfileLoader::propagateWeights(Function &F) { if (!T || T.get().empty()) continue; SmallVector SortedCallTargets = - SortCallTargets(T.get()); + GetSortedValueDataFromCallTargets(T.get()); uint64_t Sum; findIndirectCallFunctionSamples(I, Sum); annotateValueSite(*I.getParent()->getParent()->getParent(), I, @@ -1374,6 +1490,8 @@ void SampleProfileLoader::propagateWeights(Function &F) { } } + misexpect::verifyMisExpect(TI, Weights, TI->getContext()); + uint64_t TempWeight; // Only set weights if there is at least one non-zero weight. // In any other case, let the analyzer set weights. @@ -1557,30 +1675,29 @@ INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile", bool SampleProfileLoader::doInitialization(Module &M) { auto &Ctx = M.getContext(); - auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx); + + std::unique_ptr RemapReader; + auto ReaderOrErr = + SampleProfileReader::create(Filename, Ctx, RemappingFilename); if (std::error_code EC = ReaderOrErr.getError()) { std::string Msg = "Could not open profile: " + EC.message(); Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); return false; } Reader = std::move(ReaderOrErr.get()); - Reader->collectFuncsToUse(M); + Reader->collectFuncsFrom(M); ProfileIsValid = (Reader->read() == sampleprof_error::success); + PSL = Reader->getProfileSymbolList(); - if (!RemappingFilename.empty()) { - // Apply profile remappings to the loaded profile data if requested. - // For now, we only support remapping symbols encoded using the Itanium - // C++ ABI's name mangling scheme. - ReaderOrErr = SampleProfileReaderItaniumRemapper::create( - RemappingFilename, Ctx, std::move(Reader)); - if (std::error_code EC = ReaderOrErr.getError()) { - std::string Msg = "Could not open profile remapping file: " + EC.message(); - Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); - return false; - } - Reader = std::move(ReaderOrErr.get()); - ProfileIsValid = (Reader->read() == sampleprof_error::success); + // While profile-sample-accurate is on, ignore symbol list. + ProfAccForSymsInList = + ProfileAccurateForSymsInList && PSL && !ProfileSampleAccurate; + if (ProfAccForSymsInList) { + NamesInProfile.clear(); + if (auto NameTable = Reader->getNameTable()) + NamesInProfile.insert(NameTable->begin(), NameTable->end()); } + return true; } @@ -1594,7 +1711,7 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) { bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, ProfileSummaryInfo *_PSI) { - FunctionSamples::GUIDToFuncNameMapper Mapper(M); + GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); if (!ProfileIsValid) return false; @@ -1651,19 +1768,48 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { } bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) { - + DILocation2SampleMap.clear(); // By default the entry count is initialized to -1, which will be treated // conservatively by getEntryCount as the same as unknown (None). This is // to avoid newly added code to be treated as cold. If we have samples // this will be overwritten in emitAnnotations. - // If ProfileSampleAccurate is true or F has profile-sample-accurate - // attribute, initialize the entry count to 0 so callsites or functions - // unsampled will be treated as cold. - uint64_t initialEntryCount = - (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) - ? 0 - : -1; + uint64_t initialEntryCount = -1; + + ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL; + if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) { + // initialize all the function entry counts to 0. It means all the + // functions without profile will be regarded as cold. + initialEntryCount = 0; + // profile-sample-accurate is a user assertion which has a higher precedence + // than symbol list. When profile-sample-accurate is on, ignore symbol list. + ProfAccForSymsInList = false; + } + + // PSL -- profile symbol list include all the symbols in sampled binary. + // If ProfileAccurateForSymsInList is enabled, PSL is used to treat + // old functions without samples being cold, without having to worry + // about new and hot functions being mistakenly treated as cold. + if (ProfAccForSymsInList) { + // Initialize the entry count to 0 for functions in the list. + if (PSL->contains(F.getName())) + initialEntryCount = 0; + + // Function in the symbol list but without sample will be regarded as + // cold. To minimize the potential negative performance impact it could + // have, we want to be a little conservative here saying if a function + // shows up in the profile, no matter as outline function, inline instance + // or call targets, treat the function as not being cold. This will handle + // the cases such as most callsites of a function are inlined in sampled + // binary but not inlined in current build (because of source code drift, + // imprecise debug information, or the callsites are all cold individually + // but not cold accumulatively...), so the outline function showing up as + // cold in sampled binary will actually not be cold after current build. + StringRef CanonName = FunctionSamples::getCanonicalFnName(F); + if (NamesInProfile.count(CanonName)) + initialEntryCount = -1; + } + F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); std::unique_ptr OwnedORE; if (AM) { @@ -1672,7 +1818,7 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) .getManager(); ORE = &FAM.getResult(F); } else { - OwnedORE = make_unique(&F); + OwnedORE = std::make_unique(&F); ORE = OwnedORE.get(); } Samples = Reader->getSamplesFor(F); diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 24c476376c1..690b5e8bf49 100644 --- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -24,6 +24,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionImport.h" +#include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; @@ -218,10 +219,18 @@ void splitAndWriteThinLTOBitcode( promoteTypeIds(M, ModuleId); - // Returns whether a global has attached type metadata. Such globals may - // participate in CFI or whole-program devirtualization, so they need to - // appear in the merged module instead of the thin LTO module. + // Returns whether a global or its associated global has attached type + // metadata. The former may participate in CFI or whole-program + // devirtualization, so they need to appear in the merged module instead of + // the thin LTO module. Similarly, globals that are associated with globals + // with type metadata need to appear in the merged module because they will + // reference the global's section directly. auto HasTypeMetadata = [](const GlobalObject *GO) { + if (MDNode *MD = GO->getMetadata(LLVMContext::MD_associated)) + if (auto *AssocVM = dyn_cast_or_null(MD->getOperand(0))) + if (auto *AssocGO = dyn_cast(AssocVM->getValue())) + if (AssocGO->hasMetadata(LLVMContext::MD_type)) + return true; return GO->hasMetadata(LLVMContext::MD_type); }; @@ -315,9 +324,9 @@ void splitAndWriteThinLTOBitcode( SmallVector Elts; Elts.push_back(MDString::get(Ctx, F.getName())); CfiFunctionLinkage Linkage; - if (!F.isDeclarationForLinker()) + if (lowertypetests::isJumpTableCanonical(&F)) Linkage = CFL_Definition; - else if (F.isWeakForLinker()) + else if (F.hasExternalWeakLinkage()) Linkage = CFL_WeakDeclaration; else Linkage = CFL_Declaration; @@ -457,7 +466,7 @@ void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS, // splitAndWriteThinLTOBitcode). Just always build it once via the // buildModuleSummaryIndex when Module(s) are ready. ProfileSummaryInfo PSI(M); - NewIndex = llvm::make_unique( + NewIndex = std::make_unique( buildModuleSummaryIndex(M, nullptr, &PSI)); Index = NewIndex.get(); } diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp index 6b6dd6194e1..f0cf5581ba8 100644 --- a/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -24,12 +24,14 @@ // returns 0, or a single vtable's function returns 1, replace each virtual // call with a comparison of the vptr against that vtable's address. // -// This pass is intended to be used during the regular and thin LTO pipelines. +// This pass is intended to be used during the regular and thin LTO pipelines: +// // During regular LTO, the pass determines the best optimization for each // virtual call and applies the resolutions directly to virtual calls that are // eligible for virtual call optimization (i.e. calls that use either of the -// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). During -// ThinLTO, the pass operates in two phases: +// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). +// +// During hybrid Regular/ThinLTO, the pass operates in two phases: // - Export phase: this is run during the thin link over a single merged module // that contains all vtables with !type metadata that participate in the link. // The pass computes a resolution for each virtual call and stores it in the @@ -38,6 +40,14 @@ // modules. The pass applies the resolutions previously computed during the // import phase to each eligible virtual call. // +// During ThinLTO, the pass operates in two phases: +// - Export phase: this is run during the thin link over the index which +// contains a summary of all vtables with !type metadata that participate in +// the link. It computes a resolution for each virtual call and stores it in +// the type identifier summary. Only single implementation devirtualization +// is supported. +// - Import phase: (same as with hybrid case above). +// //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/WholeProgramDevirt.h" @@ -117,6 +127,11 @@ static cl::opt cl::desc("Maximum number of call targets per " "call site to enable branch funnels")); +static cl::opt + PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden, + cl::init(false), cl::ZeroOrMore, + cl::desc("Print index-based devirtualization messages")); + // Find the minimum offset that we may store a value of size Size bits at. If // IsAfter is set, look for an offset before the object, otherwise look for an // offset after the object. @@ -265,6 +280,25 @@ template <> struct DenseMapInfo { } }; +template <> struct DenseMapInfo { + static VTableSlotSummary getEmptyKey() { + return {DenseMapInfo::getEmptyKey(), + DenseMapInfo::getEmptyKey()}; + } + static VTableSlotSummary getTombstoneKey() { + return {DenseMapInfo::getTombstoneKey(), + DenseMapInfo::getTombstoneKey()}; + } + static unsigned getHashValue(const VTableSlotSummary &I) { + return DenseMapInfo::getHashValue(I.TypeID) ^ + DenseMapInfo::getHashValue(I.ByteOffset); + } + static bool isEqual(const VTableSlotSummary &LHS, + const VTableSlotSummary &RHS) { + return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset; + } +}; + } // end namespace llvm namespace { @@ -342,19 +376,21 @@ struct CallSiteInfo { /// pass the vector is non-empty, we will need to add a use of llvm.type.test /// to each of the function summaries in the vector. std::vector SummaryTypeCheckedLoadUsers; + std::vector SummaryTypeTestAssumeUsers; bool isExported() const { return SummaryHasTypeTestAssumeUsers || !SummaryTypeCheckedLoadUsers.empty(); } - void markSummaryHasTypeTestAssumeUsers() { - SummaryHasTypeTestAssumeUsers = true; + void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) { + SummaryTypeCheckedLoadUsers.push_back(FS); AllCallSitesDevirted = false; } - void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) { - SummaryTypeCheckedLoadUsers.push_back(FS); + void addSummaryTypeTestAssumeUser(FunctionSummary *FS) { + SummaryTypeTestAssumeUsers.push_back(FS); + SummaryHasTypeTestAssumeUsers = true; AllCallSitesDevirted = false; } @@ -456,7 +492,6 @@ struct DevirtModule { void buildTypeIdentifierMap( std::vector &Bits, DenseMap> &TypeIdMap); - Constant *getPointerAtOffset(Constant *I, uint64_t Offset); bool tryFindVirtualCallTargets(std::vector &TargetsForSlot, const std::set &TypeMemberInfos, @@ -464,7 +499,8 @@ struct DevirtModule { void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn, bool &IsExported); - bool trySingleImplDevirt(MutableArrayRef TargetsForSlot, + bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary, + MutableArrayRef TargetsForSlot, VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res); @@ -542,6 +578,38 @@ struct DevirtModule { function_ref LookupDomTree); }; +struct DevirtIndex { + ModuleSummaryIndex &ExportSummary; + // The set in which to record GUIDs exported from their module by + // devirtualization, used by client to ensure they are not internalized. + std::set &ExportedGUIDs; + // A map in which to record the information necessary to locate the WPD + // resolution for local targets in case they are exported by cross module + // importing. + std::map> &LocalWPDTargetsMap; + + MapVector CallSlots; + + DevirtIndex( + ModuleSummaryIndex &ExportSummary, + std::set &ExportedGUIDs, + std::map> &LocalWPDTargetsMap) + : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs), + LocalWPDTargetsMap(LocalWPDTargetsMap) {} + + bool tryFindVirtualCallTargets(std::vector &TargetsForSlot, + const TypeIdCompatibleVtableInfo TIdInfo, + uint64_t ByteOffset); + + bool trySingleImplDevirt(MutableArrayRef TargetsForSlot, + VTableSlotSummary &SlotSummary, + VTableSlotInfo &SlotInfo, + WholeProgramDevirtResolution *Res, + std::set &DevirtTargets); + + void run(); +}; + struct WholeProgramDevirt : public ModulePass { static char ID; @@ -572,7 +640,7 @@ struct WholeProgramDevirt : public ModulePass { // an optimization remark emitter on the fly, when we need it. std::unique_ptr ORE; auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & { - ORE = make_unique(F); + ORE = std::make_unique(F); return *ORE; }; @@ -632,6 +700,41 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M, return PreservedAnalyses::none(); } +namespace llvm { +void runWholeProgramDevirtOnIndex( + ModuleSummaryIndex &Summary, std::set &ExportedGUIDs, + std::map> &LocalWPDTargetsMap) { + DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run(); +} + +void updateIndexWPDForExports( + ModuleSummaryIndex &Summary, + function_ref isExported, + std::map> &LocalWPDTargetsMap) { + for (auto &T : LocalWPDTargetsMap) { + auto &VI = T.first; + // This was enforced earlier during trySingleImplDevirt. + assert(VI.getSummaryList().size() == 1 && + "Devirt of local target has more than one copy"); + auto &S = VI.getSummaryList()[0]; + if (!isExported(S->modulePath(), VI.getGUID())) + continue; + + // It's been exported by a cross module import. + for (auto &SlotSummary : T.second) { + auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID); + assert(TIdSum); + auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset); + assert(WPDRes != TIdSum->WPDRes.end()); + WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal( + WPDRes->second.SingleImplName, + Summary.getModuleHash(S->modulePath())); + } + } +} + +} // end namespace llvm + bool DevirtModule::runForTesting( Module &M, function_ref AARGetter, function_ref OREGetter, @@ -662,7 +765,7 @@ bool DevirtModule::runForTesting( ExitOnError ExitOnErr( "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": "); std::error_code EC; - raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text); + raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_Text); ExitOnErr(errorCodeToError(EC)); yaml::Output Out(OS); @@ -706,38 +809,6 @@ void DevirtModule::buildTypeIdentifierMap( } } -Constant *DevirtModule::getPointerAtOffset(Constant *I, uint64_t Offset) { - if (I->getType()->isPointerTy()) { - if (Offset == 0) - return I; - return nullptr; - } - - const DataLayout &DL = M.getDataLayout(); - - if (auto *C = dyn_cast(I)) { - const StructLayout *SL = DL.getStructLayout(C->getType()); - if (Offset >= SL->getSizeInBytes()) - return nullptr; - - unsigned Op = SL->getElementContainingOffset(Offset); - return getPointerAtOffset(cast(I->getOperand(Op)), - Offset - SL->getElementOffset(Op)); - } - if (auto *C = dyn_cast(I)) { - ArrayType *VTableTy = C->getType(); - uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType()); - - unsigned Op = Offset / ElemSize; - if (Op >= C->getNumOperands()) - return nullptr; - - return getPointerAtOffset(cast(I->getOperand(Op)), - Offset % ElemSize); - } - return nullptr; -} - bool DevirtModule::tryFindVirtualCallTargets( std::vector &TargetsForSlot, const std::set &TypeMemberInfos, uint64_t ByteOffset) { @@ -746,7 +817,7 @@ bool DevirtModule::tryFindVirtualCallTargets( return false; Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(), - TM.Offset + ByteOffset); + TM.Offset + ByteOffset, M); if (!Ptr) return false; @@ -766,6 +837,34 @@ bool DevirtModule::tryFindVirtualCallTargets( return !TargetsForSlot.empty(); } +bool DevirtIndex::tryFindVirtualCallTargets( + std::vector &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo, + uint64_t ByteOffset) { + for (const TypeIdOffsetVtableInfo P : TIdInfo) { + // VTable initializer should have only one summary, or all copies must be + // linkonce/weak ODR. + assert(P.VTableVI.getSummaryList().size() == 1 || + llvm::all_of( + P.VTableVI.getSummaryList(), + [&](const std::unique_ptr &Summary) { + return GlobalValue::isLinkOnceODRLinkage(Summary->linkage()) || + GlobalValue::isWeakODRLinkage(Summary->linkage()); + })); + const auto *VS = cast(P.VTableVI.getSummaryList()[0].get()); + if (!P.VTableVI.getSummaryList()[0]->isLive()) + continue; + for (auto VTP : VS->vTableFuncs()) { + if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset) + continue; + + TargetsForSlot.push_back(VTP.FuncVI); + } + } + + // Give up if we couldn't find any targets. + return !TargetsForSlot.empty(); +} + void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn, bool &IsExported) { auto Apply = [&](CallSiteInfo &CSInfo) { @@ -788,9 +887,38 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, Apply(P.second); } +static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) { + // We can't add calls if we haven't seen a definition + if (Callee.getSummaryList().empty()) + return false; + + // Insert calls into the summary index so that the devirtualized targets + // are eligible for import. + // FIXME: Annotate type tests with hotness. For now, mark these as hot + // to better ensure we have the opportunity to inline them. + bool IsExported = false; + auto &S = Callee.getSummaryList()[0]; + CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0); + auto AddCalls = [&](CallSiteInfo &CSInfo) { + for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) { + FS->addCall({Callee, CI}); + IsExported |= S->modulePath() != FS->modulePath(); + } + for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) { + FS->addCall({Callee, CI}); + IsExported |= S->modulePath() != FS->modulePath(); + } + }; + AddCalls(SlotInfo.CSInfo); + for (auto &P : SlotInfo.ConstCSInfo) + AddCalls(P.second); + return IsExported; +} + bool DevirtModule::trySingleImplDevirt( - MutableArrayRef TargetsForSlot, - VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res) { + ModuleSummaryIndex *ExportSummary, + MutableArrayRef TargetsForSlot, VTableSlotInfo &SlotInfo, + WholeProgramDevirtResolution *Res) { // See if the program contains a single implementation of this virtual // function. Function *TheFn = TargetsForSlot[0].Fn; @@ -830,6 +958,10 @@ bool DevirtModule::trySingleImplDevirt( TheFn->setVisibility(GlobalValue::HiddenVisibility); TheFn->setName(NewName); } + if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID())) + // Any needed promotion of 'TheFn' has already been done during + // LTO unit split, so we can ignore return value of AddCalls. + AddCalls(SlotInfo, TheFnVI); Res->TheKind = WholeProgramDevirtResolution::SingleImpl; Res->SingleImplName = TheFn->getName(); @@ -837,6 +969,63 @@ bool DevirtModule::trySingleImplDevirt( return true; } +bool DevirtIndex::trySingleImplDevirt(MutableArrayRef TargetsForSlot, + VTableSlotSummary &SlotSummary, + VTableSlotInfo &SlotInfo, + WholeProgramDevirtResolution *Res, + std::set &DevirtTargets) { + // See if the program contains a single implementation of this virtual + // function. + auto TheFn = TargetsForSlot[0]; + for (auto &&Target : TargetsForSlot) + if (TheFn != Target) + return false; + + // Don't devirtualize if we don't have target definition. + auto Size = TheFn.getSummaryList().size(); + if (!Size) + return false; + + // If the summary list contains multiple summaries where at least one is + // a local, give up, as we won't know which (possibly promoted) name to use. + for (auto &S : TheFn.getSummaryList()) + if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1) + return false; + + // Collect functions devirtualized at least for one call site for stats. + if (PrintSummaryDevirt) + DevirtTargets.insert(TheFn); + + auto &S = TheFn.getSummaryList()[0]; + bool IsExported = AddCalls(SlotInfo, TheFn); + if (IsExported) + ExportedGUIDs.insert(TheFn.getGUID()); + + // Record in summary for use in devirtualization during the ThinLTO import + // step. + Res->TheKind = WholeProgramDevirtResolution::SingleImpl; + if (GlobalValue::isLocalLinkage(S->linkage())) { + if (IsExported) + // If target is a local function and we are exporting it by + // devirtualizing a call in another module, we need to record the + // promoted name. + Res->SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal( + TheFn.name(), ExportSummary.getModuleHash(S->modulePath())); + else { + LocalWPDTargetsMap[TheFn].push_back(SlotSummary); + Res->SingleImplName = TheFn.name(); + } + } else + Res->SingleImplName = TheFn.name(); + + // Name will be empty if this thin link driven off of serialized combined + // index (e.g. llvm-lto). However, WPD is not supported/invoked for the + // legacy LTO API anyway. + assert(!Res->SingleImplName.empty()); + + return true; +} + void DevirtModule::tryICallBranchFunnel( MutableArrayRef TargetsForSlot, VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res, VTableSlot Slot) { @@ -1302,10 +1491,13 @@ void DevirtModule::rebuildGlobal(VTableBits &B) { if (B.Before.Bytes.empty() && B.After.Bytes.empty()) return; - // Align each byte array to pointer width. - unsigned PointerSize = M.getDataLayout().getPointerSize(); - B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), PointerSize)); - B.After.Bytes.resize(alignTo(B.After.Bytes.size(), PointerSize)); + // Align the before byte array to the global's minimum alignment so that we + // don't break any alignment requirements on the global. + MaybeAlign Alignment(B.GV->getAlignment()); + if (!Alignment) + Alignment = + Align(M.getDataLayout().getABITypeAlignment(B.GV->getValueType())); + B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment)); // Before was stored in reverse order; flip it now. for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I) @@ -1322,6 +1514,7 @@ void DevirtModule::rebuildGlobal(VTableBits &B) { GlobalVariable::PrivateLinkage, NewInit, "", B.GV); NewGV->setSection(B.GV->getSection()); NewGV->setComdat(B.GV->getComdat()); + NewGV->setAlignment(MaybeAlign(B.GV->getAlignment())); // Copy the original vtable's metadata to the anonymous global, adjusting // offsets as required. @@ -1483,8 +1676,11 @@ void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) { } void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) { + auto *TypeId = dyn_cast(Slot.TypeID); + if (!TypeId) + return; const TypeIdSummary *TidSummary = - ImportSummary->getTypeIdSummary(cast(Slot.TypeID)->getString()); + ImportSummary->getTypeIdSummary(TypeId->getString()); if (!TidSummary) return; auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset); @@ -1493,6 +1689,7 @@ void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) { const WholeProgramDevirtResolution &Res = ResI->second; if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) { + assert(!Res.SingleImplName.empty()); // The type of the function in the declaration is irrelevant because every // call site will cast it to the correct type. Constant *SingleImpl = @@ -1627,8 +1824,7 @@ bool DevirtModule::run() { // FIXME: Only add live functions. for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) { for (Metadata *MD : MetadataByGUID[VF.GUID]) { - CallSlots[{MD, VF.Offset}] - .CSInfo.markSummaryHasTypeTestAssumeUsers(); + CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS); } } for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) { @@ -1641,7 +1837,7 @@ bool DevirtModule::run() { for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) { CallSlots[{MD, VC.VFunc.Offset}] .ConstCSInfo[VC.Args] - .markSummaryHasTypeTestAssumeUsers(); + .addSummaryTypeTestAssumeUser(FS); } } for (const FunctionSummary::ConstVCall &VC : @@ -1673,7 +1869,7 @@ bool DevirtModule::run() { cast(S.first.TypeID)->getString()) .WPDRes[S.first.ByteOffset]; - if (!trySingleImplDevirt(TargetsForSlot, S.second, Res)) { + if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) { DidVirtualConstProp |= tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first); @@ -1710,7 +1906,7 @@ bool DevirtModule::run() { using namespace ore; OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F) << "devirtualized " - << NV("FunctionName", F->getName())); + << NV("FunctionName", DT.first)); } } @@ -1722,5 +1918,86 @@ bool DevirtModule::run() { for (VTableBits &B : Bits) rebuildGlobal(B); + // We have lowered or deleted the type checked load intrinsics, so we no + // longer have enough information to reason about the liveness of virtual + // function pointers in GlobalDCE. + for (GlobalVariable &GV : M.globals()) + GV.eraseMetadata(LLVMContext::MD_vcall_visibility); + return true; } + +void DevirtIndex::run() { + if (ExportSummary.typeIdCompatibleVtableMap().empty()) + return; + + DenseMap> NameByGUID; + for (auto &P : ExportSummary.typeIdCompatibleVtableMap()) { + NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first); + } + + // Collect information from summary about which calls to try to devirtualize. + for (auto &P : ExportSummary) { + for (auto &S : P.second.SummaryList) { + auto *FS = dyn_cast(S.get()); + if (!FS) + continue; + // FIXME: Only add live functions. + for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) { + for (StringRef Name : NameByGUID[VF.GUID]) { + CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS); + } + } + for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) { + for (StringRef Name : NameByGUID[VF.GUID]) { + CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS); + } + } + for (const FunctionSummary::ConstVCall &VC : + FS->type_test_assume_const_vcalls()) { + for (StringRef Name : NameByGUID[VC.VFunc.GUID]) { + CallSlots[{Name, VC.VFunc.Offset}] + .ConstCSInfo[VC.Args] + .addSummaryTypeTestAssumeUser(FS); + } + } + for (const FunctionSummary::ConstVCall &VC : + FS->type_checked_load_const_vcalls()) { + for (StringRef Name : NameByGUID[VC.VFunc.GUID]) { + CallSlots[{Name, VC.VFunc.Offset}] + .ConstCSInfo[VC.Args] + .addSummaryTypeCheckedLoadUser(FS); + } + } + } + } + + std::set DevirtTargets; + // For each (type, offset) pair: + for (auto &S : CallSlots) { + // Search each of the members of the type identifier for the virtual + // function implementation at offset S.first.ByteOffset, and add to + // TargetsForSlot. + std::vector TargetsForSlot; + auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID); + assert(TidSummary); + if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary, + S.first.ByteOffset)) { + WholeProgramDevirtResolution *Res = + &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID) + .WPDRes[S.first.ByteOffset]; + + if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res, + DevirtTargets)) + continue; + } + } + + // Optionally have the thin link print message for each devirtualized + // function. + if (PrintSummaryDevirt) + for (const auto &DT : DevirtTargets) + errs() << "Devirtualized call to " << DT << "\n"; + + return; +} diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index ba15b023f2a..8bc34825f8a 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1097,6 +1097,107 @@ static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) { return nullptr; } +Instruction * +InstCombiner::canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( + BinaryOperator &I) { + assert((I.getOpcode() == Instruction::Add || + I.getOpcode() == Instruction::Or || + I.getOpcode() == Instruction::Sub) && + "Expecting add/or/sub instruction"); + + // We have a subtraction/addition between a (potentially truncated) *logical* + // right-shift of X and a "select". + Value *X, *Select; + Instruction *LowBitsToSkip, *Extract; + if (!match(&I, m_c_BinOp(m_TruncOrSelf(m_CombineAnd( + m_LShr(m_Value(X), m_Instruction(LowBitsToSkip)), + m_Instruction(Extract))), + m_Value(Select)))) + return nullptr; + + // `add`/`or` is commutative; but for `sub`, "select" *must* be on RHS. + if (I.getOpcode() == Instruction::Sub && I.getOperand(1) != Select) + return nullptr; + + Type *XTy = X->getType(); + bool HadTrunc = I.getType() != XTy; + + // If there was a truncation of extracted value, then we'll need to produce + // one extra instruction, so we need to ensure one instruction will go away. + if (HadTrunc && !match(&I, m_c_BinOp(m_OneUse(m_Value()), m_Value()))) + return nullptr; + + // Extraction should extract high NBits bits, with shift amount calculated as: + // low bits to skip = shift bitwidth - high bits to extract + // The shift amount itself may be extended, and we need to look past zero-ext + // when matching NBits, that will matter for matching later. + Constant *C; + Value *NBits; + if (!match( + LowBitsToSkip, + m_ZExtOrSelf(m_Sub(m_Constant(C), m_ZExtOrSelf(m_Value(NBits))))) || + !match(C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ, + APInt(C->getType()->getScalarSizeInBits(), + X->getType()->getScalarSizeInBits())))) + return nullptr; + + // Sign-extending value can be zero-extended if we `sub`tract it, + // or sign-extended otherwise. + auto SkipExtInMagic = [&I](Value *&V) { + if (I.getOpcode() == Instruction::Sub) + match(V, m_ZExtOrSelf(m_Value(V))); + else + match(V, m_SExtOrSelf(m_Value(V))); + }; + + // Now, finally validate the sign-extending magic. + // `select` itself may be appropriately extended, look past that. + SkipExtInMagic(Select); + + ICmpInst::Predicate Pred; + const APInt *Thr; + Value *SignExtendingValue, *Zero; + bool ShouldSignext; + // It must be a select between two values we will later establish to be a + // sign-extending value and a zero constant. The condition guarding the + // sign-extension must be based on a sign bit of the same X we had in `lshr`. + if (!match(Select, m_Select(m_ICmp(Pred, m_Specific(X), m_APInt(Thr)), + m_Value(SignExtendingValue), m_Value(Zero))) || + !isSignBitCheck(Pred, *Thr, ShouldSignext)) + return nullptr; + + // icmp-select pair is commutative. + if (!ShouldSignext) + std::swap(SignExtendingValue, Zero); + + // If we should not perform sign-extension then we must add/or/subtract zero. + if (!match(Zero, m_Zero())) + return nullptr; + // Otherwise, it should be some constant, left-shifted by the same NBits we + // had in `lshr`. Said left-shift can also be appropriately extended. + // Again, we must look past zero-ext when looking for NBits. + SkipExtInMagic(SignExtendingValue); + Constant *SignExtendingValueBaseConstant; + if (!match(SignExtendingValue, + m_Shl(m_Constant(SignExtendingValueBaseConstant), + m_ZExtOrSelf(m_Specific(NBits))))) + return nullptr; + // If we `sub`, then the constant should be one, else it should be all-ones. + if (I.getOpcode() == Instruction::Sub + ? !match(SignExtendingValueBaseConstant, m_One()) + : !match(SignExtendingValueBaseConstant, m_AllOnes())) + return nullptr; + + auto *NewAShr = BinaryOperator::CreateAShr(X, LowBitsToSkip, + Extract->getName() + ".sext"); + NewAShr->copyIRFlags(Extract); // Preserve `exact`-ness. + if (!HadTrunc) + return NewAShr; + + Builder.Insert(NewAShr); + return TruncInst::CreateTruncOrBitCast(NewAShr, I.getType()); +} + Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), @@ -1302,12 +1403,32 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Instruction *V = canonicalizeLowbitMask(I, Builder)) return V; + if (Instruction *V = + canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I)) + return V; + if (Instruction *SatAdd = foldToUnsignedSaturatedAdd(I)) return SatAdd; return Changed ? &I : nullptr; } +/// Eliminate an op from a linear interpolation (lerp) pattern. +static Instruction *factorizeLerp(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + Value *X, *Y, *Z; + if (!match(&I, m_c_FAdd(m_OneUse(m_c_FMul(m_Value(Y), + m_OneUse(m_FSub(m_FPOne(), + m_Value(Z))))), + m_OneUse(m_c_FMul(m_Value(X), m_Deferred(Z)))))) + return nullptr; + + // (Y * (1.0 - Z)) + (X * Z) --> Y + Z * (X - Y) [8 commuted variants] + Value *XY = Builder.CreateFSubFMF(X, Y, &I); + Value *MulZ = Builder.CreateFMulFMF(Z, XY, &I); + return BinaryOperator::CreateFAddFMF(Y, MulZ, &I); +} + /// Factor a common operand out of fadd/fsub of fmul/fdiv. static Instruction *factorizeFAddFSub(BinaryOperator &I, InstCombiner::BuilderTy &Builder) { @@ -1315,6 +1436,10 @@ static Instruction *factorizeFAddFSub(BinaryOperator &I, I.getOpcode() == Instruction::FSub) && "Expecting fadd/fsub"); assert(I.hasAllowReassoc() && I.hasNoSignedZeros() && "FP factorization requires FMF"); + + if (Instruction *Lerp = factorizeLerp(I, Builder)) + return Lerp; + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); Value *X, *Y, *Z; bool IsFMul; @@ -1362,17 +1487,32 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { if (Instruction *FoldedFAdd = foldBinOpIntoSelectOrPhi(I)) return FoldedFAdd; - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - Value *X; // (-X) + Y --> Y - X - if (match(LHS, m_FNeg(m_Value(X)))) - return BinaryOperator::CreateFSubFMF(RHS, X, &I); - // Y + (-X) --> Y - X - if (match(RHS, m_FNeg(m_Value(X)))) - return BinaryOperator::CreateFSubFMF(LHS, X, &I); + Value *X, *Y; + if (match(&I, m_c_FAdd(m_FNeg(m_Value(X)), m_Value(Y)))) + return BinaryOperator::CreateFSubFMF(Y, X, &I); + + // Similar to above, but look through fmul/fdiv for the negated term. + // (-X * Y) + Z --> Z - (X * Y) [4 commuted variants] + Value *Z; + if (match(&I, m_c_FAdd(m_OneUse(m_c_FMul(m_FNeg(m_Value(X)), m_Value(Y))), + m_Value(Z)))) { + Value *XY = Builder.CreateFMulFMF(X, Y, &I); + return BinaryOperator::CreateFSubFMF(Z, XY, &I); + } + // (-X / Y) + Z --> Z - (X / Y) [2 commuted variants] + // (X / -Y) + Z --> Z - (X / Y) [2 commuted variants] + if (match(&I, m_c_FAdd(m_OneUse(m_FDiv(m_FNeg(m_Value(X)), m_Value(Y))), + m_Value(Z))) || + match(&I, m_c_FAdd(m_OneUse(m_FDiv(m_Value(X), m_FNeg(m_Value(Y)))), + m_Value(Z)))) { + Value *XY = Builder.CreateFDivFMF(X, Y, &I); + return BinaryOperator::CreateFSubFMF(Z, XY, &I); + } // Check for (fadd double (sitofp x), y), see if we can merge this into an // integer add followed by a promotion. + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); if (SIToFPInst *LHSConv = dyn_cast(LHS)) { Value *LHSIntVal = LHSConv->getOperand(0); Type *FPType = LHSConv->getType(); @@ -1631,37 +1771,50 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { const APInt *Op0C; if (match(Op0, m_APInt(Op0C))) { - unsigned BitWidth = I.getType()->getScalarSizeInBits(); - // -(X >>u 31) -> (X >>s 31) - // -(X >>s 31) -> (X >>u 31) if (Op0C->isNullValue()) { + Value *Op1Wide; + match(Op1, m_TruncOrSelf(m_Value(Op1Wide))); + bool HadTrunc = Op1Wide != Op1; + bool NoTruncOrTruncIsOneUse = !HadTrunc || Op1->hasOneUse(); + unsigned BitWidth = Op1Wide->getType()->getScalarSizeInBits(); + Value *X; const APInt *ShAmt; - if (match(Op1, m_LShr(m_Value(X), m_APInt(ShAmt))) && + // -(X >>u 31) -> (X >>s 31) + if (NoTruncOrTruncIsOneUse && + match(Op1Wide, m_LShr(m_Value(X), m_APInt(ShAmt))) && *ShAmt == BitWidth - 1) { - Value *ShAmtOp = cast(Op1)->getOperand(1); - return BinaryOperator::CreateAShr(X, ShAmtOp); + Value *ShAmtOp = cast(Op1Wide)->getOperand(1); + Instruction *NewShift = BinaryOperator::CreateAShr(X, ShAmtOp); + NewShift->copyIRFlags(Op1Wide); + if (!HadTrunc) + return NewShift; + Builder.Insert(NewShift); + return TruncInst::CreateTruncOrBitCast(NewShift, Op1->getType()); } - if (match(Op1, m_AShr(m_Value(X), m_APInt(ShAmt))) && + // -(X >>s 31) -> (X >>u 31) + if (NoTruncOrTruncIsOneUse && + match(Op1Wide, m_AShr(m_Value(X), m_APInt(ShAmt))) && *ShAmt == BitWidth - 1) { - Value *ShAmtOp = cast(Op1)->getOperand(1); - return BinaryOperator::CreateLShr(X, ShAmtOp); + Value *ShAmtOp = cast(Op1Wide)->getOperand(1); + Instruction *NewShift = BinaryOperator::CreateLShr(X, ShAmtOp); + NewShift->copyIRFlags(Op1Wide); + if (!HadTrunc) + return NewShift; + Builder.Insert(NewShift); + return TruncInst::CreateTruncOrBitCast(NewShift, Op1->getType()); } - if (Op1->hasOneUse()) { + if (!HadTrunc && Op1->hasOneUse()) { Value *LHS, *RHS; SelectPatternFlavor SPF = matchSelectPattern(Op1, LHS, RHS).Flavor; if (SPF == SPF_ABS || SPF == SPF_NABS) { // This is a negate of an ABS/NABS pattern. Just swap the operands // of the select. - SelectInst *SI = cast(Op1); - Value *TrueVal = SI->getTrueValue(); - Value *FalseVal = SI->getFalseValue(); - SI->setTrueValue(FalseVal); - SI->setFalseValue(TrueVal); + cast(Op1)->swapValues(); // Don't swap prof metadata, we didn't change the branch behavior. - return replaceInstUsesWith(I, SI); + return replaceInstUsesWith(I, Op1); } } } @@ -1686,6 +1839,23 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return BinaryOperator::CreateNeg(Y); } + // (sub (or A, B) (and A, B)) --> (xor A, B) + { + Value *A, *B; + if (match(Op1, m_And(m_Value(A), m_Value(B))) && + match(Op0, m_c_Or(m_Specific(A), m_Specific(B)))) + return BinaryOperator::CreateXor(A, B); + } + + // (sub (and A, B) (or A, B)) --> neg (xor A, B) + { + Value *A, *B; + if (match(Op0, m_And(m_Value(A), m_Value(B))) && + match(Op1, m_c_Or(m_Specific(A), m_Specific(B))) && + (Op0->hasOneUse() || Op1->hasOneUse())) + return BinaryOperator::CreateNeg(Builder.CreateXor(A, B)); + } + // (sub (or A, B), (xor A, B)) --> (and A, B) { Value *A, *B; @@ -1694,6 +1864,15 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return BinaryOperator::CreateAnd(A, B); } + // (sub (xor A, B) (or A, B)) --> neg (and A, B) + { + Value *A, *B; + if (match(Op0, m_Xor(m_Value(A), m_Value(B))) && + match(Op1, m_c_Or(m_Specific(A), m_Specific(B))) && + (Op0->hasOneUse() || Op1->hasOneUse())) + return BinaryOperator::CreateNeg(Builder.CreateAnd(A, B)); + } + { Value *Y; // ((X | Y) - X) --> (~X & Y) @@ -1778,7 +1957,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { std::swap(LHS, RHS); // LHS is now O above and expected to have at least 2 uses (the min/max) // NotA is epected to have 2 uses from the min/max and 1 from the sub. - if (IsFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) && + if (isFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) && !NotA->hasNUsesOrMore(4)) { // Note: We don't generate the inverse max/min, just create the not of // it and let other folds do the rest. @@ -1826,6 +2005,10 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return SelectInst::Create(Cmp, Neg, A); } + if (Instruction *V = + canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I)) + return V; + if (Instruction *Ext = narrowMathIfNoOverflow(I)) return Ext; @@ -1865,6 +2048,22 @@ static Instruction *foldFNegIntoConstant(Instruction &I) { return nullptr; } +static Instruction *hoistFNegAboveFMulFDiv(Instruction &I, + InstCombiner::BuilderTy &Builder) { + Value *FNeg; + if (!match(&I, m_FNeg(m_Value(FNeg)))) + return nullptr; + + Value *X, *Y; + if (match(FNeg, m_OneUse(m_FMul(m_Value(X), m_Value(Y))))) + return BinaryOperator::CreateFMulFMF(Builder.CreateFNegFMF(X, &I), Y, &I); + + if (match(FNeg, m_OneUse(m_FDiv(m_Value(X), m_Value(Y))))) + return BinaryOperator::CreateFDivFMF(Builder.CreateFNegFMF(X, &I), Y, &I); + + return nullptr; +} + Instruction *InstCombiner::visitFNeg(UnaryOperator &I) { Value *Op = I.getOperand(0); @@ -1882,6 +2081,9 @@ Instruction *InstCombiner::visitFNeg(UnaryOperator &I) { match(Op, m_OneUse(m_FSub(m_Value(X), m_Value(Y))))) return BinaryOperator::CreateFSubFMF(Y, X, &I); + if (Instruction *R = hoistFNegAboveFMulFDiv(I, Builder)) + return R; + return nullptr; } @@ -1903,6 +2105,9 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { if (Instruction *X = foldFNegIntoConstant(I)) return X; + if (Instruction *R = hoistFNegAboveFMulFDiv(I, Builder)) + return R; + Value *X, *Y; Constant *C; @@ -1944,6 +2149,21 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { if (match(Op1, m_OneUse(m_FPExt(m_FNeg(m_Value(Y)))))) return BinaryOperator::CreateFAddFMF(Op0, Builder.CreateFPExt(Y, Ty), &I); + // Similar to above, but look through fmul/fdiv of the negated value: + // Op0 - (-X * Y) --> Op0 + (X * Y) + // Op0 - (Y * -X) --> Op0 + (X * Y) + if (match(Op1, m_OneUse(m_c_FMul(m_FNeg(m_Value(X)), m_Value(Y))))) { + Value *FMul = Builder.CreateFMulFMF(X, Y, &I); + return BinaryOperator::CreateFAddFMF(Op0, FMul, &I); + } + // Op0 - (-X / Y) --> Op0 + (X / Y) + // Op0 - (X / -Y) --> Op0 + (X / Y) + if (match(Op1, m_OneUse(m_FDiv(m_FNeg(m_Value(X)), m_Value(Y)))) || + match(Op1, m_OneUse(m_FDiv(m_Value(X), m_FNeg(m_Value(Y)))))) { + Value *FDiv = Builder.CreateFDivFMF(X, Y, &I); + return BinaryOperator::CreateFAddFMF(Op0, FDiv, &I); + } + // Handle special cases for FSub with selects feeding the operation if (Value *V = SimplifySelectsFeedingBinaryOp(I, Op0, Op1)) return replaceInstUsesWith(I, V); diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 2b9859b602f..4a30b60ca93 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -160,16 +160,14 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, } /// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise -/// (V < Lo || V >= Hi). This method expects that Lo <= Hi. IsSigned indicates +/// (V < Lo || V >= Hi). This method expects that Lo < Hi. IsSigned indicates /// whether to treat V, Lo, and Hi as signed or not. Value *InstCombiner::insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi, bool isSigned, bool Inside) { - assert((isSigned ? Lo.sle(Hi) : Lo.ule(Hi)) && - "Lo is not <= Hi in range emission code!"); + assert((isSigned ? Lo.slt(Hi) : Lo.ult(Hi)) && + "Lo is not < Hi in range emission code!"); Type *Ty = V->getType(); - if (Lo == Hi) - return Inside ? ConstantInt::getFalse(Ty) : ConstantInt::getTrue(Ty); // V >= Min && V < Hi --> V < Hi // V < Min || V >= Hi --> V >= Hi @@ -1051,9 +1049,103 @@ static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd, return nullptr; } +/// Commuted variants are assumed to be handled by calling this function again +/// with the parameters swapped. +static Value *foldUnsignedUnderflowCheck(ICmpInst *ZeroICmp, + ICmpInst *UnsignedICmp, bool IsAnd, + const SimplifyQuery &Q, + InstCombiner::BuilderTy &Builder) { + Value *ZeroCmpOp; + ICmpInst::Predicate EqPred; + if (!match(ZeroICmp, m_ICmp(EqPred, m_Value(ZeroCmpOp), m_Zero())) || + !ICmpInst::isEquality(EqPred)) + return nullptr; + + auto IsKnownNonZero = [&](Value *V) { + return isKnownNonZero(V, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); + }; + + ICmpInst::Predicate UnsignedPred; + + Value *A, *B; + if (match(UnsignedICmp, + m_c_ICmp(UnsignedPred, m_Specific(ZeroCmpOp), m_Value(A))) && + match(ZeroCmpOp, m_c_Add(m_Specific(A), m_Value(B))) && + (ZeroICmp->hasOneUse() || UnsignedICmp->hasOneUse())) { + if (UnsignedICmp->getOperand(0) != ZeroCmpOp) + UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred); + + auto GetKnownNonZeroAndOther = [&](Value *&NonZero, Value *&Other) { + if (!IsKnownNonZero(NonZero)) + std::swap(NonZero, Other); + return IsKnownNonZero(NonZero); + }; + + // Given ZeroCmpOp = (A + B) + // ZeroCmpOp <= A && ZeroCmpOp != 0 --> (0-B) < A + // ZeroCmpOp > A || ZeroCmpOp == 0 --> (0-B) >= A + // + // ZeroCmpOp < A && ZeroCmpOp != 0 --> (0-X) < Y iff + // ZeroCmpOp >= A || ZeroCmpOp == 0 --> (0-X) >= Y iff + // with X being the value (A/B) that is known to be non-zero, + // and Y being remaining value. + if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE && + IsAnd) + return Builder.CreateICmpULT(Builder.CreateNeg(B), A); + if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE && + IsAnd && GetKnownNonZeroAndOther(B, A)) + return Builder.CreateICmpULT(Builder.CreateNeg(B), A); + if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ && + !IsAnd) + return Builder.CreateICmpUGE(Builder.CreateNeg(B), A); + if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_EQ && + !IsAnd && GetKnownNonZeroAndOther(B, A)) + return Builder.CreateICmpUGE(Builder.CreateNeg(B), A); + } + + Value *Base, *Offset; + if (!match(ZeroCmpOp, m_Sub(m_Value(Base), m_Value(Offset)))) + return nullptr; + + if (!match(UnsignedICmp, + m_c_ICmp(UnsignedPred, m_Specific(Base), m_Specific(Offset))) || + !ICmpInst::isUnsigned(UnsignedPred)) + return nullptr; + if (UnsignedICmp->getOperand(0) != Base) + UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred); + + // Base >=/> Offset && (Base - Offset) != 0 <--> Base > Offset + // (no overflow and not null) + if ((UnsignedPred == ICmpInst::ICMP_UGE || + UnsignedPred == ICmpInst::ICMP_UGT) && + EqPred == ICmpInst::ICMP_NE && IsAnd) + return Builder.CreateICmpUGT(Base, Offset); + + // Base <=/< Offset || (Base - Offset) == 0 <--> Base <= Offset + // (overflow or null) + if ((UnsignedPred == ICmpInst::ICMP_ULE || + UnsignedPred == ICmpInst::ICMP_ULT) && + EqPred == ICmpInst::ICMP_EQ && !IsAnd) + return Builder.CreateICmpULE(Base, Offset); + + // Base <= Offset && (Base - Offset) != 0 --> Base < Offset + if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE && + IsAnd) + return Builder.CreateICmpULT(Base, Offset); + + // Base > Offset || (Base - Offset) == 0 --> Base >= Offset + if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ && + !IsAnd) + return Builder.CreateICmpUGE(Base, Offset); + + return nullptr; +} + /// Fold (icmp)&(icmp) if possible. Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI) { + const SimplifyQuery Q = SQ.getWithInstruction(&CxtI); + // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2) // if K1 and K2 are a one-bit mask. if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, true, CxtI)) @@ -1096,6 +1188,13 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, if (Value *V = foldIsPowerOf2(LHS, RHS, true /* JoinedByAnd */, Builder)) return V; + if (Value *X = + foldUnsignedUnderflowCheck(LHS, RHS, /*IsAnd=*/true, Q, Builder)) + return X; + if (Value *X = + foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/true, Q, Builder)) + return X; + // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0); ConstantInt *LHSC = dyn_cast(LHS->getOperand(1)); @@ -1196,16 +1295,22 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_ULT: - if (LHSC == SubOne(RHSC)) // (X != 13 & X u< 14) -> X < 13 + // (X != 13 & X u< 14) -> X < 13 + if (LHSC->getValue() == (RHSC->getValue() - 1)) return Builder.CreateICmpULT(LHS0, LHSC); - if (LHSC->isZero()) // (X != 0 & X u< 14) -> X-1 u< 13 + if (LHSC->isZero()) // (X != 0 & X u< C) -> X-1 u< C-1 return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), false, true); break; // (X != 13 & X u< 15) -> no change case ICmpInst::ICMP_SLT: - if (LHSC == SubOne(RHSC)) // (X != 13 & X s< 14) -> X < 13 + // (X != 13 & X s< 14) -> X < 13 + if (LHSC->getValue() == (RHSC->getValue() - 1)) return Builder.CreateICmpSLT(LHS0, LHSC); - break; // (X != 13 & X s< 15) -> no change + // (X != INT_MIN & X s< C) -> X-(INT_MIN+1) u< (C-(INT_MIN+1)) + if (LHSC->isMinValue(true)) + return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), + true, true); + break; // (X != 13 & X s< 15) -> no change case ICmpInst::ICMP_NE: // Potential folds for this case should already be handled. break; @@ -1216,10 +1321,15 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_NE: - if (RHSC == AddOne(LHSC)) // (X u> 13 & X != 14) -> X u> 14 + // (X u> 13 & X != 14) -> X u> 14 + if (RHSC->getValue() == (LHSC->getValue() + 1)) return Builder.CreateICmp(PredL, LHS0, RHSC); + // X u> C & X != UINT_MAX -> (X-(C+1)) u< UINT_MAX-(C+1) + if (RHSC->isMaxValue(false)) + return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), + false, true); break; // (X u> 13 & X != 15) -> no change - case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) 13 & X u< 15) -> (X-14) u< 1 return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), false, true); } @@ -1229,10 +1339,15 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_NE: - if (RHSC == AddOne(LHSC)) // (X s> 13 & X != 14) -> X s> 14 + // (X s> 13 & X != 14) -> X s> 14 + if (RHSC->getValue() == (LHSC->getValue() + 1)) return Builder.CreateICmp(PredL, LHS0, RHSC); + // X s> C & X != INT_MAX -> (X-(C+1)) u< INT_MAX-(C+1) + if (RHSC->isMaxValue(true)) + return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), + true, true); break; // (X s> 13 & X != 15) -> no change - case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 + case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) u< 1 return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), true, true); } @@ -1352,8 +1467,8 @@ static Instruction *matchDeMorgansLaws(BinaryOperator &I, Value *A, *B; if (match(I.getOperand(0), m_OneUse(m_Not(m_Value(A)))) && match(I.getOperand(1), m_OneUse(m_Not(m_Value(B)))) && - !IsFreeToInvert(A, A->hasOneUse()) && - !IsFreeToInvert(B, B->hasOneUse())) { + !isFreeToInvert(A, A->hasOneUse()) && + !isFreeToInvert(B, B->hasOneUse())) { Value *AndOr = Builder.CreateBinOp(Opcode, A, B, I.getName() + ".demorgan"); return BinaryOperator::CreateNot(AndOr); } @@ -1770,13 +1885,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A)))) - if (Op1->hasOneUse() || IsFreeToInvert(C, C->hasOneUse())) + if (Op1->hasOneUse() || isFreeToInvert(C, C->hasOneUse())) return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(C)); // ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B)))) if (match(Op1, m_Xor(m_Specific(B), m_Specific(A)))) - if (Op0->hasOneUse() || IsFreeToInvert(C, C->hasOneUse())) + if (Op0->hasOneUse() || isFreeToInvert(C, C->hasOneUse())) return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(C)); // (A | B) & ((~A) ^ B) -> (A & B) @@ -1844,6 +1959,20 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { A->getType()->isIntOrIntVectorTy(1)) return SelectInst::Create(A, Op0, Constant::getNullValue(I.getType())); + // and(ashr(subNSW(Y, X), ScalarSizeInBits(Y)-1), X) --> X s> Y ? X : 0. + { + Value *X, *Y; + const APInt *ShAmt; + Type *Ty = I.getType(); + if (match(&I, m_c_And(m_OneUse(m_AShr(m_NSWSub(m_Value(Y), m_Value(X)), + m_APInt(ShAmt))), + m_Deferred(X))) && + *ShAmt == Ty->getScalarSizeInBits() - 1) { + Value *NewICmpInst = Builder.CreateICmpSGT(X, Y); + return SelectInst::Create(NewICmpInst, X, ConstantInt::getNullValue(Ty)); + } + } + return nullptr; } @@ -2057,6 +2186,8 @@ Value *InstCombiner::matchSelectFromAndOr(Value *A, Value *C, Value *B, /// Fold (icmp)|(icmp) if possible. Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI) { + const SimplifyQuery Q = SQ.getWithInstruction(&CxtI); + // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) // if K1 and K2 are a one-bit mask. if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, false, CxtI)) @@ -2182,6 +2313,13 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, if (Value *V = foldIsPowerOf2(LHS, RHS, false /* JoinedByAnd */, Builder)) return V; + if (Value *X = + foldUnsignedUnderflowCheck(LHS, RHS, /*IsAnd=*/false, Q, Builder)) + return X; + if (Value *X = + foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/false, Q, Builder)) + return X; + // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). if (!LHSC || !RHSC) return nullptr; @@ -2251,8 +2389,19 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, case ICmpInst::ICMP_EQ: // Potential folds for this case should already be handled. break; - case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change - case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change + case ICmpInst::ICMP_UGT: + // (X == 0 || X u> C) -> (X-1) u>= C + if (LHSC->isMinValue(false)) + return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue() + 1, + false, false); + // (X == 13 | X u> 14) -> no change + break; + case ICmpInst::ICMP_SGT: + // (X == INT_MIN || X s> C) -> (X-(INT_MIN+1)) u>= C-INT_MIN + if (LHSC->isMinValue(true)) + return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue() + 1, + true, false); + // (X == 13 | X s> 14) -> no change break; } break; @@ -2261,6 +2410,10 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change + // (X u< C || X == UINT_MAX) => (X-C) u>= UINT_MAX-C + if (RHSC->isMaxValue(false)) + return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue(), + false, false); break; case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2 assert(!RHSC->isMaxValue(false) && "Missed icmp simplification"); @@ -2272,9 +2425,14 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, switch (PredR) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change + case ICmpInst::ICMP_EQ: + // (X s< C || X == INT_MAX) => (X-C) u>= INT_MAX-C + if (RHSC->isMaxValue(true)) + return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue(), + true, false); + // (X s< 13 | X == 14) -> no change break; - case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2 + case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) u> 2 assert(!RHSC->isMaxValue(true) && "Missed icmp simplification"); return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue() + 1, true, false); @@ -2552,6 +2710,25 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } } + // or(ashr(subNSW(Y, X), ScalarSizeInBits(Y)-1), X) --> X s> Y ? -1 : X. + { + Value *X, *Y; + const APInt *ShAmt; + Type *Ty = I.getType(); + if (match(&I, m_c_Or(m_OneUse(m_AShr(m_NSWSub(m_Value(Y), m_Value(X)), + m_APInt(ShAmt))), + m_Deferred(X))) && + *ShAmt == Ty->getScalarSizeInBits() - 1) { + Value *NewICmpInst = Builder.CreateICmpSGT(X, Y); + return SelectInst::Create(NewICmpInst, ConstantInt::getAllOnesValue(Ty), + X); + } + } + + if (Instruction *V = + canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I)) + return V; + return nullptr; } @@ -2617,7 +2794,11 @@ static Instruction *foldXorToXor(BinaryOperator &I, return nullptr; } -Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) { +Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, + BinaryOperator &I) { + assert(I.getOpcode() == Instruction::Xor && I.getOperand(0) == LHS && + I.getOperand(1) == RHS && "Should be 'xor' with these operands"); + if (predicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) { if (LHS->getOperand(0) == RHS->getOperand(1) && LHS->getOperand(1) == RHS->getOperand(0)) @@ -2672,14 +2853,35 @@ Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // TODO: If OrICmp is false, the whole thing is false (InstSimplify?). if (Value *AndICmp = SimplifyBinOp(Instruction::And, LHS, RHS, SQ)) { // TODO: Independently handle cases where the 'and' side is a constant. - if (OrICmp == LHS && AndICmp == RHS && RHS->hasOneUse()) { - // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS - RHS->setPredicate(RHS->getInversePredicate()); - return Builder.CreateAnd(LHS, RHS); + ICmpInst *X = nullptr, *Y = nullptr; + if (OrICmp == LHS && AndICmp == RHS) { + // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS --> X & !Y + X = LHS; + Y = RHS; } - if (OrICmp == RHS && AndICmp == LHS && LHS->hasOneUse()) { - // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS - LHS->setPredicate(LHS->getInversePredicate()); + if (OrICmp == RHS && AndICmp == LHS) { + // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS --> !Y & X + X = RHS; + Y = LHS; + } + if (X && Y && (Y->hasOneUse() || canFreelyInvertAllUsersOf(Y, &I))) { + // Invert the predicate of 'Y', thus inverting its output. + Y->setPredicate(Y->getInversePredicate()); + // So, are there other uses of Y? + if (!Y->hasOneUse()) { + // We need to adapt other uses of Y though. Get a value that matches + // the original value of Y before inversion. While this increases + // immediate instruction count, we have just ensured that all the + // users are freely-invertible, so that 'not' *will* get folded away. + BuilderTy::InsertPointGuard Guard(Builder); + // Set insertion point to right after the Y. + Builder.SetInsertPoint(Y->getParent(), ++(Y->getIterator())); + Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not"); + // Replace all uses of Y (excluding the one in NotY!) with NotY. + Y->replaceUsesWithIf(NotY, + [NotY](Use &U) { return U.getUser() != NotY; }); + } + // All done. return Builder.CreateAnd(LHS, RHS); } } @@ -2747,9 +2949,9 @@ static Instruction *sinkNotIntoXor(BinaryOperator &I, return nullptr; // We only want to do the transform if it is free to do. - if (IsFreeToInvert(X, X->hasOneUse())) { + if (isFreeToInvert(X, X->hasOneUse())) { // Ok, good. - } else if (IsFreeToInvert(Y, Y->hasOneUse())) { + } else if (isFreeToInvert(Y, Y->hasOneUse())) { std::swap(X, Y); } else return nullptr; @@ -2827,9 +3029,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // Apply DeMorgan's Law when inverts are free: // ~(X & Y) --> (~X | ~Y) // ~(X | Y) --> (~X & ~Y) - if (IsFreeToInvert(NotVal->getOperand(0), + if (isFreeToInvert(NotVal->getOperand(0), NotVal->getOperand(0)->hasOneUse()) && - IsFreeToInvert(NotVal->getOperand(1), + isFreeToInvert(NotVal->getOperand(1), NotVal->getOperand(1)->hasOneUse())) { Value *NotX = Builder.CreateNot(NotVal->getOperand(0), "notlhs"); Value *NotY = Builder.CreateNot(NotVal->getOperand(1), "notrhs"); @@ -3004,7 +3206,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (auto *LHS = dyn_cast(I.getOperand(0))) if (auto *RHS = dyn_cast(I.getOperand(1))) - if (Value *V = foldXorOfICmps(LHS, RHS)) + if (Value *V = foldXorOfICmps(LHS, RHS, I)) return replaceInstUsesWith(I, V); if (Instruction *CastedXor = foldCastedBitwiseLogic(I)) @@ -3052,7 +3254,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (SelectPatternResult::isMinOrMax(SPF)) { // It's possible we get here before the not has been simplified, so make // sure the input to the not isn't freely invertible. - if (match(LHS, m_Not(m_Value(X))) && !IsFreeToInvert(X, X->hasOneUse())) { + if (match(LHS, m_Not(m_Value(X))) && !isFreeToInvert(X, X->hasOneUse())) { Value *NotY = Builder.CreateNot(RHS); return SelectInst::Create( Builder.CreateICmp(getInverseMinMaxPred(SPF), X, NotY), X, NotY); @@ -3060,7 +3262,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // It's possible we get here before the not has been simplified, so make // sure the input to the not isn't freely invertible. - if (match(RHS, m_Not(m_Value(Y))) && !IsFreeToInvert(Y, Y->hasOneUse())) { + if (match(RHS, m_Not(m_Value(Y))) && !isFreeToInvert(Y, Y->hasOneUse())) { Value *NotX = Builder.CreateNot(LHS); return SelectInst::Create( Builder.CreateICmp(getInverseMinMaxPred(SPF), NotX, Y), NotX, Y); @@ -3068,8 +3270,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // If both sides are freely invertible, then we can get rid of the xor // completely. - if (IsFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) && - IsFreeToInvert(RHS, !RHS->hasNUsesOrMore(3))) { + if (isFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) && + isFreeToInvert(RHS, !RHS->hasNUsesOrMore(3))) { Value *NotLHS = Builder.CreateNot(LHS); Value *NotRHS = Builder.CreateNot(RHS); return SelectInst::Create( diff --git a/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp index 5f37a00f56c..825f4b468b0 100644 --- a/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp +++ b/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp @@ -124,7 +124,7 @@ Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) { auto *SI = new StoreInst(RMWI.getValOperand(), RMWI.getPointerOperand(), &RMWI); SI->setAtomic(Ordering, RMWI.getSyncScopeID()); - SI->setAlignment(DL.getABITypeAlignment(RMWI.getType())); + SI->setAlignment(MaybeAlign(DL.getABITypeAlignment(RMWI.getType()))); return eraseInstFromFunction(RMWI); } @@ -154,6 +154,6 @@ Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) { LoadInst *Load = new LoadInst(RMWI.getType(), RMWI.getPointerOperand()); Load->setAtomic(Ordering, RMWI.getSyncScopeID()); - Load->setAlignment(DL.getABITypeAlignment(RMWI.getType())); + Load->setAlignment(MaybeAlign(DL.getABITypeAlignment(RMWI.getType()))); return Load; } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 4b3333affa7..c650d242cd5 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -185,7 +185,8 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); LoadInst *L = Builder.CreateLoad(IntType, Src); // Alignment from the mem intrinsic will be better, so use it. - L->setAlignment(CopySrcAlign); + L->setAlignment( + MaybeAlign(CopySrcAlign)); // FIXME: Check if we can use Align instead. if (CopyMD) L->setMetadata(LLVMContext::MD_tbaa, CopyMD); MDNode *LoopMemParallelMD = @@ -198,7 +199,8 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { StoreInst *S = Builder.CreateStore(L, Dest); // Alignment from the mem intrinsic will be better, so use it. - S->setAlignment(CopyDstAlign); + S->setAlignment( + MaybeAlign(CopyDstAlign)); // FIXME: Check if we can use Align instead. if (CopyMD) S->setMetadata(LLVMContext::MD_tbaa, CopyMD); if (LoopMemParallelMD) @@ -223,9 +225,10 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { } Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) { - unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT); - if (MI->getDestAlignment() < Alignment) { - MI->setDestAlignment(Alignment); + const unsigned KnownAlignment = + getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT); + if (MI->getDestAlignment() < KnownAlignment) { + MI->setDestAlignment(KnownAlignment); return MI; } @@ -243,13 +246,9 @@ Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) { ConstantInt *FillC = dyn_cast(MI->getValue()); if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8)) return nullptr; - uint64_t Len = LenC->getLimitedValue(); - Alignment = MI->getDestAlignment(); + const uint64_t Len = LenC->getLimitedValue(); assert(Len && "0-sized memory setting should be removed already."); - - // Alignment 0 is identity for alignment 1 for memset, but not store. - if (Alignment == 0) - Alignment = 1; + const Align Alignment = assumeAligned(MI->getDestAlignment()); // If it is an atomic and alignment is less than the size then we will // introduce the unaligned memory access which will be later transformed @@ -1060,9 +1059,9 @@ Value *InstCombiner::simplifyMaskedLoad(IntrinsicInst &II) { // If we can unconditionally load from this address, replace with a // load/select idiom. TODO: use DT for context sensitive query - if (isDereferenceableAndAlignedPointer(LoadPtr, II.getType(), Alignment, - II.getModule()->getDataLayout(), - &II, nullptr)) { + if (isDereferenceableAndAlignedPointer( + LoadPtr, II.getType(), MaybeAlign(Alignment), + II.getModule()->getDataLayout(), &II, nullptr)) { Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, "unmaskedload"); return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3)); @@ -1086,7 +1085,8 @@ Instruction *InstCombiner::simplifyMaskedStore(IntrinsicInst &II) { // If the mask is all ones, this is a plain vector store of the 1st argument. if (ConstMask->isAllOnesValue()) { Value *StorePtr = II.getArgOperand(1); - unsigned Alignment = cast(II.getArgOperand(2))->getZExtValue(); + MaybeAlign Alignment( + cast(II.getArgOperand(2))->getZExtValue()); return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment); } @@ -2234,6 +2234,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return replaceInstUsesWith(*II, Add); } + // Try to simplify the underlying FMul. + if (Value *V = SimplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1), + II->getFastMathFlags(), + SQ.getWithInstruction(II))) { + auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2)); + FAdd->copyFastMathFlags(II); + return FAdd; + } + LLVM_FALLTHROUGH; } case Intrinsic::fma: { @@ -2258,9 +2267,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return II; } - // fma x, 1, z -> fadd x, z - if (match(Src1, m_FPOne())) { - auto *FAdd = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2)); + // Try to simplify the underlying FMul. We can only apply simplifications + // that do not require rounding. + if (Value *V = SimplifyFMAFMul(II->getArgOperand(0), II->getArgOperand(1), + II->getFastMathFlags(), + SQ.getWithInstruction(II))) { + auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2)); FAdd->copyFastMathFlags(II); return FAdd; } @@ -2331,7 +2343,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Turn PPC VSX loads into normal loads. Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); - return new LoadInst(II->getType(), Ptr, Twine(""), false, 1); + return new LoadInst(II->getType(), Ptr, Twine(""), false, Align::None()); } case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: @@ -2349,7 +2361,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Turn PPC VSX stores into normal stores. Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); - return new StoreInst(II->getArgOperand(0), Ptr, false, 1); + return new StoreInst(II->getArgOperand(0), Ptr, false, Align::None()); } case Intrinsic::ppc_qpx_qvlfs: // Turn PPC QPX qvlfs -> load if the pointer is known aligned. @@ -3885,6 +3897,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Asan needs to poison memory to detect invalid access which is possible // even for empty lifetime range. if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) || + II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) || II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress)) break; @@ -3950,10 +3963,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } case Intrinsic::experimental_gc_relocate: { + auto &GCR = *cast(II); + + // If we have two copies of the same pointer in the statepoint argument + // list, canonicalize to one. This may let us common gc.relocates. + if (GCR.getBasePtr() == GCR.getDerivedPtr() && + GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) { + auto *OpIntTy = GCR.getOperand(2)->getType(); + II->setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex())); + return II; + } + // Translate facts known about a pointer before relocating into // facts about the relocate value, while being careful to // preserve relocation semantics. - Value *DerivedPtr = cast(II)->getDerivedPtr(); + Value *DerivedPtr = GCR.getDerivedPtr(); // Remove the relocation if unused, note that this check is required // to prevent the cases below from looping forever. @@ -4177,10 +4201,58 @@ static IntrinsicInst *findInitTrampoline(Value *Callee) { return nullptr; } +static void annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI) { + unsigned NumArgs = Call.getNumArgOperands(); + ConstantInt *Op0C = dyn_cast(Call.getOperand(0)); + ConstantInt *Op1C = + (NumArgs == 1) ? nullptr : dyn_cast(Call.getOperand(1)); + // Bail out if the allocation size is zero. + if ((Op0C && Op0C->isNullValue()) || (Op1C && Op1C->isNullValue())) + return; + + if (isMallocLikeFn(&Call, TLI) && Op0C) { + if (isOpNewLikeFn(&Call, TLI)) + Call.addAttribute(AttributeList::ReturnIndex, + Attribute::getWithDereferenceableBytes( + Call.getContext(), Op0C->getZExtValue())); + else + Call.addAttribute(AttributeList::ReturnIndex, + Attribute::getWithDereferenceableOrNullBytes( + Call.getContext(), Op0C->getZExtValue())); + } else if (isReallocLikeFn(&Call, TLI) && Op1C) { + Call.addAttribute(AttributeList::ReturnIndex, + Attribute::getWithDereferenceableOrNullBytes( + Call.getContext(), Op1C->getZExtValue())); + } else if (isCallocLikeFn(&Call, TLI) && Op0C && Op1C) { + bool Overflow; + const APInt &N = Op0C->getValue(); + APInt Size = N.umul_ov(Op1C->getValue(), Overflow); + if (!Overflow) + Call.addAttribute(AttributeList::ReturnIndex, + Attribute::getWithDereferenceableOrNullBytes( + Call.getContext(), Size.getZExtValue())); + } else if (isStrdupLikeFn(&Call, TLI)) { + uint64_t Len = GetStringLength(Call.getOperand(0)); + if (Len) { + // strdup + if (NumArgs == 1) + Call.addAttribute(AttributeList::ReturnIndex, + Attribute::getWithDereferenceableOrNullBytes( + Call.getContext(), Len)); + // strndup + else if (NumArgs == 2 && Op1C) + Call.addAttribute( + AttributeList::ReturnIndex, + Attribute::getWithDereferenceableOrNullBytes( + Call.getContext(), std::min(Len, Op1C->getZExtValue() + 1))); + } + } +} + /// Improvements for call, callbr and invoke instructions. Instruction *InstCombiner::visitCallBase(CallBase &Call) { - if (isAllocLikeFn(&Call, &TLI)) - return visitAllocSite(Call); + if (isAllocationFn(&Call, &TLI)) + annotateAnyAllocSite(Call, &TLI); bool Changed = false; @@ -4312,6 +4384,9 @@ Instruction *InstCombiner::visitCallBase(CallBase &Call) { if (I) return eraseInstFromFunction(*I); } + if (isAllocLikeFn(&Call, &TLI)) + return visitAllocSite(Call); + return Changed ? &Call : nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 2c9ba203fbf..65aaef28d87 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -140,7 +140,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, } AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); - New->setAlignment(AI.getAlignment()); + New->setAlignment(MaybeAlign(AI.getAlignment())); New->takeName(&AI); New->setUsedWithInAlloca(AI.isUsedWithInAlloca()); @@ -1531,16 +1531,16 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) { // what we can and cannot do safely varies from operation to operation, and // is explained below in the various case statements. Type *Ty = FPT.getType(); - BinaryOperator *OpI = dyn_cast(FPT.getOperand(0)); - if (OpI && OpI->hasOneUse()) { - Type *LHSMinType = getMinimumFPType(OpI->getOperand(0)); - Type *RHSMinType = getMinimumFPType(OpI->getOperand(1)); - unsigned OpWidth = OpI->getType()->getFPMantissaWidth(); + auto *BO = dyn_cast(FPT.getOperand(0)); + if (BO && BO->hasOneUse()) { + Type *LHSMinType = getMinimumFPType(BO->getOperand(0)); + Type *RHSMinType = getMinimumFPType(BO->getOperand(1)); + unsigned OpWidth = BO->getType()->getFPMantissaWidth(); unsigned LHSWidth = LHSMinType->getFPMantissaWidth(); unsigned RHSWidth = RHSMinType->getFPMantissaWidth(); unsigned SrcWidth = std::max(LHSWidth, RHSWidth); unsigned DstWidth = Ty->getFPMantissaWidth(); - switch (OpI->getOpcode()) { + switch (BO->getOpcode()) { default: break; case Instruction::FAdd: case Instruction::FSub: @@ -1563,10 +1563,10 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) { // could be tightened for those cases, but they are rare (the main // case of interest here is (float)((double)float + float)). if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) { - Value *LHS = Builder.CreateFPTrunc(OpI->getOperand(0), Ty); - Value *RHS = Builder.CreateFPTrunc(OpI->getOperand(1), Ty); - Instruction *RI = BinaryOperator::Create(OpI->getOpcode(), LHS, RHS); - RI->copyFastMathFlags(OpI); + Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty); + Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty); + Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS); + RI->copyFastMathFlags(BO); return RI; } break; @@ -1577,9 +1577,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) { // rounding can possibly occur; we can safely perform the operation // in the destination format if it can represent both sources. if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) { - Value *LHS = Builder.CreateFPTrunc(OpI->getOperand(0), Ty); - Value *RHS = Builder.CreateFPTrunc(OpI->getOperand(1), Ty); - return BinaryOperator::CreateFMulFMF(LHS, RHS, OpI); + Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty); + Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty); + return BinaryOperator::CreateFMulFMF(LHS, RHS, BO); } break; case Instruction::FDiv: @@ -1590,9 +1590,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) { // condition used here is a good conservative first pass. // TODO: Tighten bound via rigorous analysis of the unbalanced case. if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) { - Value *LHS = Builder.CreateFPTrunc(OpI->getOperand(0), Ty); - Value *RHS = Builder.CreateFPTrunc(OpI->getOperand(1), Ty); - return BinaryOperator::CreateFDivFMF(LHS, RHS, OpI); + Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty); + Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty); + return BinaryOperator::CreateFDivFMF(LHS, RHS, BO); } break; case Instruction::FRem: { @@ -1604,14 +1604,14 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) { break; Value *LHS, *RHS; if (LHSWidth == SrcWidth) { - LHS = Builder.CreateFPTrunc(OpI->getOperand(0), LHSMinType); - RHS = Builder.CreateFPTrunc(OpI->getOperand(1), LHSMinType); + LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType); + RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType); } else { - LHS = Builder.CreateFPTrunc(OpI->getOperand(0), RHSMinType); - RHS = Builder.CreateFPTrunc(OpI->getOperand(1), RHSMinType); + LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType); + RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType); } - Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, OpI); + Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO); return CastInst::CreateFPCast(ExactResult, Ty); } } @@ -2338,8 +2338,23 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If we found a path from the src to dest, create the getelementptr now. if (SrcElTy == DstElTy) { SmallVector Idxs(NumZeros + 1, Builder.getInt32(0)); - return GetElementPtrInst::CreateInBounds(SrcPTy->getElementType(), Src, - Idxs); + GetElementPtrInst *GEP = + GetElementPtrInst::Create(SrcPTy->getElementType(), Src, Idxs); + + // If the source pointer is dereferenceable, then assume it points to an + // allocated object and apply "inbounds" to the GEP. + bool CanBeNull; + if (Src->getPointerDereferenceableBytes(DL, CanBeNull)) { + // In a non-default address space (not 0), a null pointer can not be + // assumed inbounds, so ignore that case (dereferenceable_or_null). + // The reason is that 'null' is not treated differently in these address + // spaces, and we consequently ignore the 'gep inbounds' special case + // for 'null' which allows 'inbounds' on 'null' if the indices are + // zeros. + if (SrcPTy->getAddressSpace() == 0 || !CanBeNull) + GEP->setIsInBounds(); + } + return GEP; } } @@ -2391,28 +2406,47 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } } - if (ShuffleVectorInst *SVI = dyn_cast(Src)) { + if (auto *Shuf = dyn_cast(Src)) { // Okay, we have (bitcast (shuffle ..)). Check to see if this is // a bitcast to a vector with the same # elts. - if (SVI->hasOneUse() && DestTy->isVectorTy() && - DestTy->getVectorNumElements() == SVI->getType()->getNumElements() && - SVI->getType()->getNumElements() == - SVI->getOperand(0)->getType()->getVectorNumElements()) { + Value *ShufOp0 = Shuf->getOperand(0); + Value *ShufOp1 = Shuf->getOperand(1); + unsigned NumShufElts = Shuf->getType()->getVectorNumElements(); + unsigned NumSrcVecElts = ShufOp0->getType()->getVectorNumElements(); + if (Shuf->hasOneUse() && DestTy->isVectorTy() && + DestTy->getVectorNumElements() == NumShufElts && + NumShufElts == NumSrcVecElts) { BitCastInst *Tmp; // If either of the operands is a cast from CI.getType(), then // evaluating the shuffle in the casted destination's type will allow // us to eliminate at least one cast. - if (((Tmp = dyn_cast(SVI->getOperand(0))) && + if (((Tmp = dyn_cast(ShufOp0)) && Tmp->getOperand(0)->getType() == DestTy) || - ((Tmp = dyn_cast(SVI->getOperand(1))) && + ((Tmp = dyn_cast(ShufOp1)) && Tmp->getOperand(0)->getType() == DestTy)) { - Value *LHS = Builder.CreateBitCast(SVI->getOperand(0), DestTy); - Value *RHS = Builder.CreateBitCast(SVI->getOperand(1), DestTy); + Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy); + Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy); // Return a new shuffle vector. Use the same element ID's, as we // know the vector types match #elts. - return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2)); + return new ShuffleVectorInst(LHS, RHS, Shuf->getOperand(2)); } } + + // A bitcasted-to-scalar and byte-reversing shuffle is better recognized as + // a byte-swap: + // bitcast (shuf X, undef, ) --> bswap (bitcast X) + // TODO: We should match the related pattern for bitreverse. + if (DestTy->isIntegerTy() && + DL.isLegalInteger(DestTy->getScalarSizeInBits()) && + SrcTy->getScalarSizeInBits() == 8 && NumShufElts % 2 == 0 && + Shuf->hasOneUse() && Shuf->isReverse()) { + assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask"); + assert(isa(ShufOp1) && "Unexpected shuffle op"); + Function *Bswap = + Intrinsic::getDeclaration(CI.getModule(), Intrinsic::bswap, DestTy); + Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy); + return IntrinsicInst::Create(Bswap, { ScalarX }); + } } // Handle the A->B->A cast, and there is an intervening PHI node. diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 3a4283ae540..a9f64feb600 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -69,34 +69,6 @@ static bool hasBranchUse(ICmpInst &I) { return false; } -/// Given an exploded icmp instruction, return true if the comparison only -/// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if the -/// result of the comparison is true when the input value is signed. -static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS, - bool &TrueIfSigned) { - switch (Pred) { - case ICmpInst::ICMP_SLT: // True if LHS s< 0 - TrueIfSigned = true; - return RHS.isNullValue(); - case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 - TrueIfSigned = true; - return RHS.isAllOnesValue(); - case ICmpInst::ICMP_SGT: // True if LHS s> -1 - TrueIfSigned = false; - return RHS.isAllOnesValue(); - case ICmpInst::ICMP_UGT: - // True if LHS u> RHS and RHS == high-bit-mask - 1 - TrueIfSigned = true; - return RHS.isMaxSignedValue(); - case ICmpInst::ICMP_UGE: - // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) - TrueIfSigned = true; - return RHS.isSignMask(); - default: - return false; - } -} - /// Returns true if the exploded icmp can be expressed as a signed comparison /// to zero and updates the predicate accordingly. /// The signedness of the comparison is preserved. @@ -832,6 +804,10 @@ getAsConstantIndexedAddress(Value *V, const DataLayout &DL) { static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, const DataLayout &DL) { + // FIXME: Support vector of pointers. + if (GEPLHS->getType()->isVectorTy()) + return nullptr; + if (!GEPLHS->hasAllConstantIndices()) return nullptr; @@ -882,7 +858,9 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, RHS = RHS->stripPointerCasts(); Value *PtrBase = GEPLHS->getOperand(0); - if (PtrBase == RHS && GEPLHS->isInBounds()) { + // FIXME: Support vector pointer GEPs. + if (PtrBase == RHS && GEPLHS->isInBounds() && + !GEPLHS->getType()->isVectorTy()) { // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). // This transformation (ignoring the base and scales) is valid because we // know pointers can't overflow since the gep is inbounds. See if we can @@ -894,6 +872,37 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, Offset = EmitGEPOffset(GEPLHS); return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, Constant::getNullValue(Offset->getType())); + } + + if (GEPLHS->isInBounds() && ICmpInst::isEquality(Cond) && + isa(RHS) && cast(RHS)->isNullValue() && + !NullPointerIsDefined(I.getFunction(), + RHS->getType()->getPointerAddressSpace())) { + // For most address spaces, an allocation can't be placed at null, but null + // itself is treated as a 0 size allocation in the in bounds rules. Thus, + // the only valid inbounds address derived from null, is null itself. + // Thus, we have four cases to consider: + // 1) Base == nullptr, Offset == 0 -> inbounds, null + // 2) Base == nullptr, Offset != 0 -> poison as the result is out of bounds + // 3) Base != nullptr, Offset == (-base) -> poison (crossing allocations) + // 4) Base != nullptr, Offset != (-base) -> nonnull (and possibly poison) + // + // (Note if we're indexing a type of size 0, that simply collapses into one + // of the buckets above.) + // + // In general, we're allowed to make values less poison (i.e. remove + // sources of full UB), so in this case, we just select between the two + // non-poison cases (1 and 4 above). + // + // For vectors, we apply the same reasoning on a per-lane basis. + auto *Base = GEPLHS->getPointerOperand(); + if (GEPLHS->getType()->isVectorTy() && Base->getType()->isPointerTy()) { + int NumElts = GEPLHS->getType()->getVectorNumElements(); + Base = Builder.CreateVectorSplat(NumElts, Base); + } + return new ICmpInst(Cond, Base, + ConstantExpr::getPointerBitCastOrAddrSpaceCast( + cast(RHS), Base->getType())); } else if (GEPOperator *GEPRHS = dyn_cast(RHS)) { // If the base pointers are different, but the indices are the same, just // compare the base pointer. @@ -916,11 +925,13 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // If we're comparing GEPs with two base pointers that only differ in type // and both GEPs have only constant indices or just one use, then fold // the compare with the adjusted indices. + // FIXME: Support vector of pointers. if (GEPLHS->isInBounds() && GEPRHS->isInBounds() && (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) && (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) && PtrBase->stripPointerCasts() == - GEPRHS->getOperand(0)->stripPointerCasts()) { + GEPRHS->getOperand(0)->stripPointerCasts() && + !GEPLHS->getType()->isVectorTy()) { Value *LOffset = EmitGEPOffset(GEPLHS); Value *ROffset = EmitGEPOffset(GEPRHS); @@ -949,12 +960,14 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, } // If one of the GEPs has all zero indices, recurse. - if (GEPLHS->hasAllZeroIndices()) + // FIXME: Handle vector of pointers. + if (!GEPLHS->getType()->isVectorTy() && GEPLHS->hasAllZeroIndices()) return foldGEPICmp(GEPRHS, GEPLHS->getOperand(0), ICmpInst::getSwappedPredicate(Cond), I); // If the other GEP has all zero indices, recurse. - if (GEPRHS->hasAllZeroIndices()) + // FIXME: Handle vector of pointers. + if (!GEPRHS->getType()->isVectorTy() && GEPRHS->hasAllZeroIndices()) return foldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds(); @@ -964,15 +977,20 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, unsigned DiffOperand = 0; // The operand that differs. for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) { - if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() != - GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) { + Type *LHSType = GEPLHS->getOperand(i)->getType(); + Type *RHSType = GEPRHS->getOperand(i)->getType(); + // FIXME: Better support for vector of pointers. + if (LHSType->getPrimitiveSizeInBits() != + RHSType->getPrimitiveSizeInBits() || + (GEPLHS->getType()->isVectorTy() && + (!LHSType->isVectorTy() || !RHSType->isVectorTy()))) { // Irreconcilable differences. NumDifferences = 2; break; - } else { - if (NumDifferences++) break; - DiffOperand = i; } + + if (NumDifferences++) break; + DiffOperand = i; } if (NumDifferences == 0) // SAME GEP? @@ -1317,6 +1335,59 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, return ExtractValueInst::Create(Call, 1, "sadd.overflow"); } +/// If we have: +/// icmp eq/ne (urem/srem %x, %y), 0 +/// iff %y is a power-of-two, we can replace this with a bit test: +/// icmp eq/ne (and %x, (add %y, -1)), 0 +Instruction *InstCombiner::foldIRemByPowerOfTwoToBitTest(ICmpInst &I) { + // This fold is only valid for equality predicates. + if (!I.isEquality()) + return nullptr; + ICmpInst::Predicate Pred; + Value *X, *Y, *Zero; + if (!match(&I, m_ICmp(Pred, m_OneUse(m_IRem(m_Value(X), m_Value(Y))), + m_CombineAnd(m_Zero(), m_Value(Zero))))) + return nullptr; + if (!isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, 0, &I)) + return nullptr; + // This may increase instruction count, we don't enforce that Y is a constant. + Value *Mask = Builder.CreateAdd(Y, Constant::getAllOnesValue(Y->getType())); + Value *Masked = Builder.CreateAnd(X, Mask); + return ICmpInst::Create(Instruction::ICmp, Pred, Masked, Zero); +} + +/// Fold equality-comparison between zero and any (maybe truncated) right-shift +/// by one-less-than-bitwidth into a sign test on the original value. +Instruction *InstCombiner::foldSignBitTest(ICmpInst &I) { + Instruction *Val; + ICmpInst::Predicate Pred; + if (!I.isEquality() || !match(&I, m_ICmp(Pred, m_Instruction(Val), m_Zero()))) + return nullptr; + + Value *X; + Type *XTy; + + Constant *C; + if (match(Val, m_TruncOrSelf(m_Shr(m_Value(X), m_Constant(C))))) { + XTy = X->getType(); + unsigned XBitWidth = XTy->getScalarSizeInBits(); + if (!match(C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ, + APInt(XBitWidth, XBitWidth - 1)))) + return nullptr; + } else if (isa(Val) && + (X = reassociateShiftAmtsOfTwoSameDirectionShifts( + cast(Val), SQ.getWithInstruction(Val), + /*AnalyzeForSignBitExtraction=*/true))) { + XTy = X->getType(); + } else + return nullptr; + + return ICmpInst::Create(Instruction::ICmp, + Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_SGE + : ICmpInst::ICMP_SLT, + X, ConstantInt::getNullValue(XTy)); +} + // Handle icmp pred X, 0 Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) { CmpInst::Predicate Pred = Cmp.getPredicate(); @@ -1335,6 +1406,9 @@ Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) { } } + if (Instruction *New = foldIRemByPowerOfTwoToBitTest(Cmp)) + return New; + // Given: // icmp eq/ne (urem %x, %y), 0 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem': @@ -2179,6 +2253,44 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, return nullptr; } +Instruction *InstCombiner::foldICmpSRemConstant(ICmpInst &Cmp, + BinaryOperator *SRem, + const APInt &C) { + // Match an 'is positive' or 'is negative' comparison of remainder by a + // constant power-of-2 value: + // (X % pow2C) sgt/slt 0 + const ICmpInst::Predicate Pred = Cmp.getPredicate(); + if (Pred != ICmpInst::ICMP_SGT && Pred != ICmpInst::ICMP_SLT) + return nullptr; + + // TODO: The one-use check is standard because we do not typically want to + // create longer instruction sequences, but this might be a special-case + // because srem is not good for analysis or codegen. + if (!SRem->hasOneUse()) + return nullptr; + + const APInt *DivisorC; + if (!C.isNullValue() || !match(SRem->getOperand(1), m_Power2(DivisorC))) + return nullptr; + + // Mask off the sign bit and the modulo bits (low-bits). + Type *Ty = SRem->getType(); + APInt SignMask = APInt::getSignMask(Ty->getScalarSizeInBits()); + Constant *MaskC = ConstantInt::get(Ty, SignMask | (*DivisorC - 1)); + Value *And = Builder.CreateAnd(SRem->getOperand(0), MaskC); + + // For 'is positive?' check that the sign-bit is clear and at least 1 masked + // bit is set. Example: + // (i8 X % 32) s> 0 --> (X & 159) s> 0 + if (Pred == ICmpInst::ICMP_SGT) + return new ICmpInst(ICmpInst::ICMP_SGT, And, ConstantInt::getNullValue(Ty)); + + // For 'is negative?' check that the sign-bit is set and at least 1 masked + // bit is set. Example: + // (i16 X % 4) s< 0 --> (X & 32771) u> 32768 + return new ICmpInst(ICmpInst::ICMP_UGT, And, ConstantInt::get(Ty, SignMask)); +} + /// Fold icmp (udiv X, Y), C. Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp, BinaryOperator *UDiv, @@ -2387,6 +2499,11 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp, const APInt *C2; APInt SubResult; + // icmp eq/ne (sub C, Y), C -> icmp eq/ne Y, 0 + if (match(X, m_APInt(C2)) && *C2 == C && Cmp.isEquality()) + return new ICmpInst(Cmp.getPredicate(), Y, + ConstantInt::get(Y->getType(), 0)); + // (icmp P (sub nuw|nsw C2, Y), C) -> (icmp swap(P) Y, C2-C) if (match(X, m_APInt(C2)) && ((Cmp.isUnsigned() && Sub->hasNoUnsignedWrap()) || @@ -2509,20 +2626,49 @@ bool InstCombiner::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS, // TODO: Generalize this to work with other comparison idioms or ensure // they get canonicalized into this form. - // select i1 (a == b), i32 Equal, i32 (select i1 (a < b), i32 Less, i32 - // Greater), where Equal, Less and Greater are placeholders for any three - // constants. - ICmpInst::Predicate PredA, PredB; - if (match(SI->getTrueValue(), m_ConstantInt(Equal)) && - match(SI->getCondition(), m_ICmp(PredA, m_Value(LHS), m_Value(RHS))) && - PredA == ICmpInst::ICMP_EQ && - match(SI->getFalseValue(), - m_Select(m_ICmp(PredB, m_Specific(LHS), m_Specific(RHS)), - m_ConstantInt(Less), m_ConstantInt(Greater))) && - PredB == ICmpInst::ICMP_SLT) { - return true; + // select i1 (a == b), + // i32 Equal, + // i32 (select i1 (a < b), i32 Less, i32 Greater) + // where Equal, Less and Greater are placeholders for any three constants. + ICmpInst::Predicate PredA; + if (!match(SI->getCondition(), m_ICmp(PredA, m_Value(LHS), m_Value(RHS))) || + !ICmpInst::isEquality(PredA)) + return false; + Value *EqualVal = SI->getTrueValue(); + Value *UnequalVal = SI->getFalseValue(); + // We still can get non-canonical predicate here, so canonicalize. + if (PredA == ICmpInst::ICMP_NE) + std::swap(EqualVal, UnequalVal); + if (!match(EqualVal, m_ConstantInt(Equal))) + return false; + ICmpInst::Predicate PredB; + Value *LHS2, *RHS2; + if (!match(UnequalVal, m_Select(m_ICmp(PredB, m_Value(LHS2), m_Value(RHS2)), + m_ConstantInt(Less), m_ConstantInt(Greater)))) + return false; + // We can get predicate mismatch here, so canonicalize if possible: + // First, ensure that 'LHS' match. + if (LHS2 != LHS) { + // x sgt y <--> y slt x + std::swap(LHS2, RHS2); + PredB = ICmpInst::getSwappedPredicate(PredB); } - return false; + if (LHS2 != LHS) + return false; + // We also need to canonicalize 'RHS'. + if (PredB == ICmpInst::ICMP_SGT && isa(RHS2)) { + // x sgt C-1 <--> x sge C <--> not(x slt C) + auto FlippedStrictness = + getFlippedStrictnessPredicateAndConstant(PredB, cast(RHS2)); + if (!FlippedStrictness) + return false; + assert(FlippedStrictness->first == ICmpInst::ICMP_SGE && "Sanity check"); + RHS2 = FlippedStrictness->second; + // And kind-of perform the result swap. + std::swap(Less, Greater); + PredB = ICmpInst::ICMP_SLT; + } + return PredB == ICmpInst::ICMP_SLT && RHS == RHS2; } Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp, @@ -2702,6 +2848,10 @@ Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) { if (Instruction *I = foldICmpShrConstant(Cmp, BO, *C)) return I; break; + case Instruction::SRem: + if (Instruction *I = foldICmpSRemConstant(Cmp, BO, *C)) + return I; + break; case Instruction::UDiv: if (Instruction *I = foldICmpUDivConstant(Cmp, BO, *C)) return I; @@ -2926,6 +3076,28 @@ Instruction *InstCombiner::foldICmpEqIntrinsicWithConstant(ICmpInst &Cmp, } break; } + + case Intrinsic::uadd_sat: { + // uadd.sat(a, b) == 0 -> (a | b) == 0 + if (C.isNullValue()) { + Value *Or = Builder.CreateOr(II->getArgOperand(0), II->getArgOperand(1)); + return replaceInstUsesWith(Cmp, Builder.CreateICmp( + Cmp.getPredicate(), Or, Constant::getNullValue(Ty))); + + } + break; + } + + case Intrinsic::usub_sat: { + // usub.sat(a, b) == 0 -> a <= b + if (C.isNullValue()) { + ICmpInst::Predicate NewPred = Cmp.getPredicate() == ICmpInst::ICMP_EQ + ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT; + return ICmpInst::Create(Instruction::ICmp, NewPred, + II->getArgOperand(0), II->getArgOperand(1)); + } + break; + } default: break; } @@ -3275,6 +3447,7 @@ foldICmpWithTruncSignExtendedVal(ICmpInst &I, // we should move shifts to the same hand of 'and', i.e. rewrite as // icmp eq/ne (and (x shift (Q+K)), y), 0 iff (Q+K) u< bitwidth(x) // We are only interested in opposite logical shifts here. +// One of the shifts can be truncated. // If we can, we want to end up creating 'lshr' shift. static Value * foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ, @@ -3284,55 +3457,215 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ, return nullptr; auto m_AnyLogicalShift = m_LogicalShift(m_Value(), m_Value()); - auto m_AnyLShr = m_LShr(m_Value(), m_Value()); - // Look for an 'and' of two (opposite) logical shifts. - // Pick the single-use shift as XShift. - Value *XShift, *YShift; - if (!match(I.getOperand(0), - m_c_And(m_OneUse(m_CombineAnd(m_AnyLogicalShift, m_Value(XShift))), - m_CombineAnd(m_AnyLogicalShift, m_Value(YShift))))) + // Look for an 'and' of two logical shifts, one of which may be truncated. + // We use m_TruncOrSelf() on the RHS to correctly handle commutative case. + Instruction *XShift, *MaybeTruncation, *YShift; + if (!match( + I.getOperand(0), + m_c_And(m_CombineAnd(m_AnyLogicalShift, m_Instruction(XShift)), + m_CombineAnd(m_TruncOrSelf(m_CombineAnd( + m_AnyLogicalShift, m_Instruction(YShift))), + m_Instruction(MaybeTruncation))))) return nullptr; - // If YShift is a single-use 'lshr', swap the shifts around. - if (match(YShift, m_OneUse(m_AnyLShr))) + // We potentially looked past 'trunc', but only when matching YShift, + // therefore YShift must have the widest type. + Instruction *WidestShift = YShift; + // Therefore XShift must have the shallowest type. + // Or they both have identical types if there was no truncation. + Instruction *NarrowestShift = XShift; + + Type *WidestTy = WidestShift->getType(); + assert(NarrowestShift->getType() == I.getOperand(0)->getType() && + "We did not look past any shifts while matching XShift though."); + bool HadTrunc = WidestTy != I.getOperand(0)->getType(); + + // If YShift is a 'lshr', swap the shifts around. + if (match(YShift, m_LShr(m_Value(), m_Value()))) std::swap(XShift, YShift); // The shifts must be in opposite directions. - Instruction::BinaryOps XShiftOpcode = - cast(XShift)->getOpcode(); - if (XShiftOpcode == cast(YShift)->getOpcode()) + auto XShiftOpcode = XShift->getOpcode(); + if (XShiftOpcode == YShift->getOpcode()) return nullptr; // Do not care about same-direction shifts here. Value *X, *XShAmt, *Y, *YShAmt; - match(XShift, m_BinOp(m_Value(X), m_Value(XShAmt))); - match(YShift, m_BinOp(m_Value(Y), m_Value(YShAmt))); + match(XShift, m_BinOp(m_Value(X), m_ZExtOrSelf(m_Value(XShAmt)))); + match(YShift, m_BinOp(m_Value(Y), m_ZExtOrSelf(m_Value(YShAmt)))); + + // If one of the values being shifted is a constant, then we will end with + // and+icmp, and [zext+]shift instrs will be constant-folded. If they are not, + // however, we will need to ensure that we won't increase instruction count. + if (!isa(X) && !isa(Y)) { + // At least one of the hands of the 'and' should be one-use shift. + if (!match(I.getOperand(0), + m_c_And(m_OneUse(m_AnyLogicalShift), m_Value()))) + return nullptr; + if (HadTrunc) { + // Due to the 'trunc', we will need to widen X. For that either the old + // 'trunc' or the shift amt in the non-truncated shift should be one-use. + if (!MaybeTruncation->hasOneUse() && + !NarrowestShift->getOperand(1)->hasOneUse()) + return nullptr; + } + } + + // We have two shift amounts from two different shifts. The types of those + // shift amounts may not match. If that's the case let's bailout now. + if (XShAmt->getType() != YShAmt->getType()) + return nullptr; // Can we fold (XShAmt+YShAmt) ? - Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, XShAmt, YShAmt, - SQ.getWithInstruction(&I)); + auto *NewShAmt = dyn_cast_or_null( + SimplifyAddInst(XShAmt, YShAmt, /*isNSW=*/false, + /*isNUW=*/false, SQ.getWithInstruction(&I))); if (!NewShAmt) return nullptr; + NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, WidestTy); + unsigned WidestBitWidth = WidestTy->getScalarSizeInBits(); + // Is the new shift amount smaller than the bit width? // FIXME: could also rely on ConstantRange. - unsigned BitWidth = X->getType()->getScalarSizeInBits(); - if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT, - APInt(BitWidth, BitWidth)))) + if (!match(NewShAmt, + m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT, + APInt(WidestBitWidth, WidestBitWidth)))) return nullptr; - // All good, we can do this fold. The shift is the same that was for X. + + // An extra legality check is needed if we had trunc-of-lshr. + if (HadTrunc && match(WidestShift, m_LShr(m_Value(), m_Value()))) { + auto CanFold = [NewShAmt, WidestBitWidth, NarrowestShift, SQ, + WidestShift]() { + // It isn't obvious whether it's worth it to analyze non-constants here. + // Also, let's basically give up on non-splat cases, pessimizing vectors. + // If *any* of these preconditions matches we can perform the fold. + Constant *NewShAmtSplat = NewShAmt->getType()->isVectorTy() + ? NewShAmt->getSplatValue() + : NewShAmt; + // If it's edge-case shift (by 0 or by WidestBitWidth-1) we can fold. + if (NewShAmtSplat && + (NewShAmtSplat->isNullValue() || + NewShAmtSplat->getUniqueInteger() == WidestBitWidth - 1)) + return true; + // We consider *min* leading zeros so a single outlier + // blocks the transform as opposed to allowing it. + if (auto *C = dyn_cast(NarrowestShift->getOperand(0))) { + KnownBits Known = computeKnownBits(C, SQ.DL); + unsigned MinLeadZero = Known.countMinLeadingZeros(); + // If the value being shifted has at most lowest bit set we can fold. + unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero; + if (MaxActiveBits <= 1) + return true; + // Precondition: NewShAmt u<= countLeadingZeros(C) + if (NewShAmtSplat && NewShAmtSplat->getUniqueInteger().ule(MinLeadZero)) + return true; + } + if (auto *C = dyn_cast(WidestShift->getOperand(0))) { + KnownBits Known = computeKnownBits(C, SQ.DL); + unsigned MinLeadZero = Known.countMinLeadingZeros(); + // If the value being shifted has at most lowest bit set we can fold. + unsigned MaxActiveBits = Known.getBitWidth() - MinLeadZero; + if (MaxActiveBits <= 1) + return true; + // Precondition: ((WidestBitWidth-1)-NewShAmt) u<= countLeadingZeros(C) + if (NewShAmtSplat) { + APInt AdjNewShAmt = + (WidestBitWidth - 1) - NewShAmtSplat->getUniqueInteger(); + if (AdjNewShAmt.ule(MinLeadZero)) + return true; + } + } + return false; // Can't tell if it's ok. + }; + if (!CanFold()) + return nullptr; + } + + // All good, we can do this fold. + X = Builder.CreateZExt(X, WidestTy); + Y = Builder.CreateZExt(Y, WidestTy); + // The shift is the same that was for X. Value *T0 = XShiftOpcode == Instruction::BinaryOps::LShr ? Builder.CreateLShr(X, NewShAmt) : Builder.CreateShl(X, NewShAmt); Value *T1 = Builder.CreateAnd(T0, Y); return Builder.CreateICmp(I.getPredicate(), T1, - Constant::getNullValue(X->getType())); + Constant::getNullValue(WidestTy)); +} + +/// Fold +/// (-1 u/ x) u< y +/// ((x * y) u/ x) != y +/// to +/// @llvm.umul.with.overflow(x, y) plus extraction of overflow bit +/// Note that the comparison is commutative, while inverted (u>=, ==) predicate +/// will mean that we are looking for the opposite answer. +Value *InstCombiner::foldUnsignedMultiplicationOverflowCheck(ICmpInst &I) { + ICmpInst::Predicate Pred; + Value *X, *Y; + Instruction *Mul; + bool NeedNegation; + // Look for: (-1 u/ x) u= y + if (!I.isEquality() && + match(&I, m_c_ICmp(Pred, m_OneUse(m_UDiv(m_AllOnes(), m_Value(X))), + m_Value(Y)))) { + Mul = nullptr; + // Canonicalize as-if y was on RHS. + if (I.getOperand(1) != Y) + Pred = I.getSwappedPredicate(); + + // Are we checking that overflow does not happen, or does happen? + switch (Pred) { + case ICmpInst::Predicate::ICMP_ULT: + NeedNegation = false; + break; // OK + case ICmpInst::Predicate::ICMP_UGE: + NeedNegation = true; + break; // OK + default: + return nullptr; // Wrong predicate. + } + } else // Look for: ((x * y) u/ x) !=/== y + if (I.isEquality() && + match(&I, m_c_ICmp(Pred, m_Value(Y), + m_OneUse(m_UDiv(m_CombineAnd(m_c_Mul(m_Deferred(Y), + m_Value(X)), + m_Instruction(Mul)), + m_Deferred(X)))))) { + NeedNegation = Pred == ICmpInst::Predicate::ICMP_EQ; + } else + return nullptr; + + BuilderTy::InsertPointGuard Guard(Builder); + // If the pattern included (x * y), we'll want to insert new instructions + // right before that original multiplication so that we can replace it. + bool MulHadOtherUses = Mul && !Mul->hasOneUse(); + if (MulHadOtherUses) + Builder.SetInsertPoint(Mul); + + Function *F = Intrinsic::getDeclaration( + I.getModule(), Intrinsic::umul_with_overflow, X->getType()); + CallInst *Call = Builder.CreateCall(F, {X, Y}, "umul"); + + // If the multiplication was used elsewhere, to ensure that we don't leave + // "duplicate" instructions, replace uses of that original multiplication + // with the multiplication result from the with.overflow intrinsic. + if (MulHadOtherUses) + replaceInstUsesWith(*Mul, Builder.CreateExtractValue(Call, 0, "umul.val")); + + Value *Res = Builder.CreateExtractValue(Call, 1, "umul.ov"); + if (NeedNegation) // This technically increases instruction count. + Res = Builder.CreateNot(Res, "umul.not.ov"); + + return Res; } /// Try to fold icmp (binop), X or icmp X, (binop). /// TODO: A large part of this logic is duplicated in InstSimplify's /// simplifyICmpWithBinOp(). We should be able to share that and avoid the code /// duplication. -Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { +Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I, const SimplifyQuery &SQ) { + const SimplifyQuery Q = SQ.getWithInstruction(&I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // Special logic for binary operators. @@ -3345,13 +3678,13 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { Value *X; // Convert add-with-unsigned-overflow comparisons into a 'not' with compare. - // (Op1 + X) ~Op1 u (Op0 + X) --> X >u ~Op0 + // (Op1 + X) u= Op1 --> ~Op1 u= X if (match(Op0, m_OneUse(m_c_Add(m_Specific(Op1), m_Value(X)))) && - Pred == ICmpInst::ICMP_ULT) + (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) return new ICmpInst(Pred, Builder.CreateNot(Op1), X); + // Op0 u>/u<= (Op0 + X) --> X u>/u<= ~Op0 if (match(Op1, m_OneUse(m_c_Add(m_Specific(Op0), m_Value(X)))) && - Pred == ICmpInst::ICMP_UGT) + (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE)) return new ICmpInst(Pred, X, Builder.CreateNot(Op0)); bool NoOp0WrapProblem = false, NoOp1WrapProblem = false; @@ -3378,21 +3711,21 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { D = BO1->getOperand(1); } - // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow. + // icmp (A+B), A -> icmp B, 0 for equalities or if there is no overflow. + // icmp (A+B), B -> icmp A, 0 for equalities or if there is no overflow. if ((A == Op1 || B == Op1) && NoOp0WrapProblem) return new ICmpInst(Pred, A == Op1 ? B : A, Constant::getNullValue(Op1->getType())); - // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. + // icmp C, (C+D) -> icmp 0, D for equalities or if there is no overflow. + // icmp D, (C+D) -> icmp 0, C for equalities or if there is no overflow. if ((C == Op0 || D == Op0) && NoOp1WrapProblem) return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()), C == Op0 ? D : C); - // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow. + // icmp (A+B), (A+D) -> icmp B, D for equalities or if there is no overflow. if (A && C && (A == C || A == D || B == C || B == D) && NoOp0WrapProblem && - NoOp1WrapProblem && - // Try not to increase register pressure. - BO0->hasOneUse() && BO1->hasOneUse()) { + NoOp1WrapProblem) { // Determine Y and Z in the form icmp (X+Y), (X+Z). Value *Y, *Z; if (A == C) { @@ -3416,39 +3749,39 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { return new ICmpInst(Pred, Y, Z); } - // icmp slt (X + -1), Y -> icmp sle X, Y + // icmp slt (A + -1), Op1 -> icmp sle A, Op1 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT && match(B, m_AllOnes())) return new ICmpInst(CmpInst::ICMP_SLE, A, Op1); - // icmp sge (X + -1), Y -> icmp sgt X, Y + // icmp sge (A + -1), Op1 -> icmp sgt A, Op1 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE && match(B, m_AllOnes())) return new ICmpInst(CmpInst::ICMP_SGT, A, Op1); - // icmp sle (X + 1), Y -> icmp slt X, Y + // icmp sle (A + 1), Op1 -> icmp slt A, Op1 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && match(B, m_One())) return new ICmpInst(CmpInst::ICMP_SLT, A, Op1); - // icmp sgt (X + 1), Y -> icmp sge X, Y + // icmp sgt (A + 1), Op1 -> icmp sge A, Op1 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && match(B, m_One())) return new ICmpInst(CmpInst::ICMP_SGE, A, Op1); - // icmp sgt X, (Y + -1) -> icmp sge X, Y + // icmp sgt Op0, (C + -1) -> icmp sge Op0, C if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT && match(D, m_AllOnes())) return new ICmpInst(CmpInst::ICMP_SGE, Op0, C); - // icmp sle X, (Y + -1) -> icmp slt X, Y + // icmp sle Op0, (C + -1) -> icmp slt Op0, C if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE && match(D, m_AllOnes())) return new ICmpInst(CmpInst::ICMP_SLT, Op0, C); - // icmp sge X, (Y + 1) -> icmp sgt X, Y + // icmp sge Op0, (C + 1) -> icmp sgt Op0, C if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE && match(D, m_One())) return new ICmpInst(CmpInst::ICMP_SGT, Op0, C); - // icmp slt X, (Y + 1) -> icmp sle X, Y + // icmp slt Op0, (C + 1) -> icmp sle Op0, C if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT && match(D, m_One())) return new ICmpInst(CmpInst::ICMP_SLE, Op0, C); @@ -3456,33 +3789,33 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { // canonicalization from (X -nuw 1) to (X + -1) means that the combinations // wouldn't happen even if they were implemented. // - // icmp ult (X - 1), Y -> icmp ule X, Y - // icmp uge (X - 1), Y -> icmp ugt X, Y - // icmp ugt X, (Y - 1) -> icmp uge X, Y - // icmp ule X, (Y - 1) -> icmp ult X, Y + // icmp ult (A - 1), Op1 -> icmp ule A, Op1 + // icmp uge (A - 1), Op1 -> icmp ugt A, Op1 + // icmp ugt Op0, (C - 1) -> icmp uge Op0, C + // icmp ule Op0, (C - 1) -> icmp ult Op0, C - // icmp ule (X + 1), Y -> icmp ult X, Y + // icmp ule (A + 1), Op0 -> icmp ult A, Op1 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_ULE && match(B, m_One())) return new ICmpInst(CmpInst::ICMP_ULT, A, Op1); - // icmp ugt (X + 1), Y -> icmp uge X, Y + // icmp ugt (A + 1), Op0 -> icmp uge A, Op1 if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_UGT && match(B, m_One())) return new ICmpInst(CmpInst::ICMP_UGE, A, Op1); - // icmp uge X, (Y + 1) -> icmp ugt X, Y + // icmp uge Op0, (C + 1) -> icmp ugt Op0, C if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_UGE && match(D, m_One())) return new ICmpInst(CmpInst::ICMP_UGT, Op0, C); - // icmp ult X, (Y + 1) -> icmp ule X, Y + // icmp ult Op0, (C + 1) -> icmp ule Op0, C if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_ULT && match(D, m_One())) return new ICmpInst(CmpInst::ICMP_ULE, Op0, C); // if C1 has greater magnitude than C2: - // icmp (X + C1), (Y + C2) -> icmp (X + C3), Y + // icmp (A + C1), (C + C2) -> icmp (A + C3), C // s.t. C3 = C1 - C2 // // if C2 has greater magnitude than C1: - // icmp (X + C1), (Y + C2) -> icmp X, (Y + C3) + // icmp (A + C1), (C + C2) -> icmp A, (C + C3) // s.t. C3 = C2 - C1 if (A && C && NoOp0WrapProblem && NoOp1WrapProblem && (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned()) @@ -3520,29 +3853,35 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { D = BO1->getOperand(1); } - // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow. + // icmp (A-B), A -> icmp 0, B for equalities or if there is no overflow. if (A == Op1 && NoOp0WrapProblem) return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B); - // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow. + // icmp C, (C-D) -> icmp D, 0 for equalities or if there is no overflow. if (C == Op0 && NoOp1WrapProblem) return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType())); - // (A - B) >u A --> A C /u<= A --> B u>/u<= A + if (A == Op1 && (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE)) + return new ICmpInst(Pred, B, A); + // C u= (C - D) --> C u= D + if (C == Op0 && (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) + return new ICmpInst(Pred, C, D); + // (A - B) u>=/u< A --> B u>/u<= A iff B != 0 + if (A == Op1 && (Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_ULT) && + isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), B, A); + // C u<=/u> (C - D) --> C u= D iff B != 0 + if (C == Op0 && (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT) && + isKnownNonZero(D, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return new ICmpInst(CmpInst::getFlippedStrictnessPredicate(Pred), C, D); - // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow. - if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem && - // Try not to increase register pressure. - BO0->hasOneUse() && BO1->hasOneUse()) + // icmp (A-B), (C-B) -> icmp A, C for equalities or if there is no overflow. + if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem) return new ICmpInst(Pred, A, C); - // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow. - if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem && - // Try not to increase register pressure. - BO0->hasOneUse() && BO1->hasOneUse()) + + // icmp (A-B), (A-D) -> icmp D, B for equalities or if there is no overflow. + if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem) return new ICmpInst(Pred, D, B); // icmp (0-X) < cst --> x > -cst @@ -3677,6 +4016,9 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { } } + if (Value *V = foldUnsignedMultiplicationOverflowCheck(I)) + return replaceInstUsesWith(I, V); + if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder)) return replaceInstUsesWith(I, V); @@ -3953,125 +4295,140 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { return nullptr; } -/// Handle icmp (cast x to y), (cast/cst). We only handle extending casts so -/// far. -Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) { - const CastInst *LHSCI = cast(ICmp.getOperand(0)); - Value *LHSCIOp = LHSCI->getOperand(0); - Type *SrcTy = LHSCIOp->getType(); - Type *DestTy = LHSCI->getType(); - - // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the - // integer type is the same size as the pointer type. - const auto& CompatibleSizes = [&](Type* SrcTy, Type* DestTy) -> bool { - if (isa(SrcTy)) { - SrcTy = cast(SrcTy)->getElementType(); - DestTy = cast(DestTy)->getElementType(); - } - return DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth(); - }; - if (LHSCI->getOpcode() == Instruction::PtrToInt && - CompatibleSizes(SrcTy, DestTy)) { - Value *RHSOp = nullptr; - if (auto *RHSC = dyn_cast(ICmp.getOperand(1))) { - Value *RHSCIOp = RHSC->getOperand(0); - if (RHSCIOp->getType()->getPointerAddressSpace() == - LHSCIOp->getType()->getPointerAddressSpace()) { - RHSOp = RHSC->getOperand(0); - // If the pointer types don't match, insert a bitcast. - if (LHSCIOp->getType() != RHSOp->getType()) - RHSOp = Builder.CreateBitCast(RHSOp, LHSCIOp->getType()); - } - } else if (auto *RHSC = dyn_cast(ICmp.getOperand(1))) { - RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); - } - - if (RHSOp) - return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSOp); - } - - // The code below only handles extension cast instructions, so far. - // Enforce this. - if (LHSCI->getOpcode() != Instruction::ZExt && - LHSCI->getOpcode() != Instruction::SExt) +static Instruction *foldICmpWithZextOrSext(ICmpInst &ICmp, + InstCombiner::BuilderTy &Builder) { + assert(isa(ICmp.getOperand(0)) && "Expected cast for operand 0"); + auto *CastOp0 = cast(ICmp.getOperand(0)); + Value *X; + if (!match(CastOp0, m_ZExtOrSExt(m_Value(X)))) return nullptr; - bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt; - bool isSignedCmp = ICmp.isSigned(); - - if (auto *CI = dyn_cast(ICmp.getOperand(1))) { - // Not an extension from the same type? - Value *RHSCIOp = CI->getOperand(0); - if (RHSCIOp->getType() != LHSCIOp->getType()) - return nullptr; - + bool IsSignedExt = CastOp0->getOpcode() == Instruction::SExt; + bool IsSignedCmp = ICmp.isSigned(); + if (auto *CastOp1 = dyn_cast(ICmp.getOperand(1))) { // If the signedness of the two casts doesn't agree (i.e. one is a sext // and the other is a zext), then we can't handle this. - if (CI->getOpcode() != LHSCI->getOpcode()) + // TODO: This is too strict. We can handle some predicates (equality?). + if (CastOp0->getOpcode() != CastOp1->getOpcode()) return nullptr; - // Deal with equality cases early. + // Not an extension from the same type? + Value *Y = CastOp1->getOperand(0); + Type *XTy = X->getType(), *YTy = Y->getType(); + if (XTy != YTy) { + // One of the casts must have one use because we are creating a new cast. + if (!CastOp0->hasOneUse() && !CastOp1->hasOneUse()) + return nullptr; + // Extend the narrower operand to the type of the wider operand. + if (XTy->getScalarSizeInBits() < YTy->getScalarSizeInBits()) + X = Builder.CreateCast(CastOp0->getOpcode(), X, YTy); + else if (YTy->getScalarSizeInBits() < XTy->getScalarSizeInBits()) + Y = Builder.CreateCast(CastOp0->getOpcode(), Y, XTy); + else + return nullptr; + } + + // (zext X) == (zext Y) --> X == Y + // (sext X) == (sext Y) --> X == Y if (ICmp.isEquality()) - return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSCIOp); + return new ICmpInst(ICmp.getPredicate(), X, Y); // A signed comparison of sign extended values simplifies into a // signed comparison. - if (isSignedCmp && isSignedExt) - return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSCIOp); + if (IsSignedCmp && IsSignedExt) + return new ICmpInst(ICmp.getPredicate(), X, Y); // The other three cases all fold into an unsigned comparison. - return new ICmpInst(ICmp.getUnsignedPredicate(), LHSCIOp, RHSCIOp); + return new ICmpInst(ICmp.getUnsignedPredicate(), X, Y); } - // If we aren't dealing with a constant on the RHS, exit early. + // Below here, we are only folding a compare with constant. auto *C = dyn_cast(ICmp.getOperand(1)); if (!C) return nullptr; // Compute the constant that would happen if we truncated to SrcTy then // re-extended to DestTy. + Type *SrcTy = CastOp0->getSrcTy(); + Type *DestTy = CastOp0->getDestTy(); Constant *Res1 = ConstantExpr::getTrunc(C, SrcTy); - Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), Res1, DestTy); + Constant *Res2 = ConstantExpr::getCast(CastOp0->getOpcode(), Res1, DestTy); // If the re-extended constant didn't change... if (Res2 == C) { - // Deal with equality cases early. if (ICmp.isEquality()) - return new ICmpInst(ICmp.getPredicate(), LHSCIOp, Res1); + return new ICmpInst(ICmp.getPredicate(), X, Res1); // A signed comparison of sign extended values simplifies into a // signed comparison. - if (isSignedExt && isSignedCmp) - return new ICmpInst(ICmp.getPredicate(), LHSCIOp, Res1); + if (IsSignedExt && IsSignedCmp) + return new ICmpInst(ICmp.getPredicate(), X, Res1); // The other three cases all fold into an unsigned comparison. - return new ICmpInst(ICmp.getUnsignedPredicate(), LHSCIOp, Res1); + return new ICmpInst(ICmp.getUnsignedPredicate(), X, Res1); } // The re-extended constant changed, partly changed (in the case of a vector), // or could not be determined to be equal (in the case of a constant // expression), so the constant cannot be represented in the shorter type. - // Consequently, we cannot emit a simple comparison. // All the cases that fold to true or false will have already been handled // by SimplifyICmpInst, so only deal with the tricky case. - - if (isSignedCmp || !isSignedExt || !isa(C)) + if (IsSignedCmp || !IsSignedExt || !isa(C)) return nullptr; - // Evaluate the comparison for LT (we invert for GT below). LE and GE cases - // should have been folded away previously and not enter in here. - - // We're performing an unsigned comp with a sign extended value. - // This is true if the input is >= 0. [aka >s -1] - Constant *NegOne = Constant::getAllOnesValue(SrcTy); - Value *Result = Builder.CreateICmpSGT(LHSCIOp, NegOne, ICmp.getName()); - - // Finally, return the value computed. + // Is source op positive? + // icmp ult (sext X), C --> icmp sgt X, -1 if (ICmp.getPredicate() == ICmpInst::ICMP_ULT) - return replaceInstUsesWith(ICmp, Result); + return new ICmpInst(CmpInst::ICMP_SGT, X, Constant::getAllOnesValue(SrcTy)); + // Is source op negative? + // icmp ugt (sext X), C --> icmp slt X, 0 assert(ICmp.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!"); - return BinaryOperator::CreateNot(Result); + return new ICmpInst(CmpInst::ICMP_SLT, X, Constant::getNullValue(SrcTy)); +} + +/// Handle icmp (cast x), (cast or constant). +Instruction *InstCombiner::foldICmpWithCastOp(ICmpInst &ICmp) { + auto *CastOp0 = dyn_cast(ICmp.getOperand(0)); + if (!CastOp0) + return nullptr; + if (!isa(ICmp.getOperand(1)) && !isa(ICmp.getOperand(1))) + return nullptr; + + Value *Op0Src = CastOp0->getOperand(0); + Type *SrcTy = CastOp0->getSrcTy(); + Type *DestTy = CastOp0->getDestTy(); + + // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the + // integer type is the same size as the pointer type. + auto CompatibleSizes = [&](Type *SrcTy, Type *DestTy) { + if (isa(SrcTy)) { + SrcTy = cast(SrcTy)->getElementType(); + DestTy = cast(DestTy)->getElementType(); + } + return DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth(); + }; + if (CastOp0->getOpcode() == Instruction::PtrToInt && + CompatibleSizes(SrcTy, DestTy)) { + Value *NewOp1 = nullptr; + if (auto *PtrToIntOp1 = dyn_cast(ICmp.getOperand(1))) { + Value *PtrSrc = PtrToIntOp1->getOperand(0); + if (PtrSrc->getType()->getPointerAddressSpace() == + Op0Src->getType()->getPointerAddressSpace()) { + NewOp1 = PtrToIntOp1->getOperand(0); + // If the pointer types don't match, insert a bitcast. + if (Op0Src->getType() != NewOp1->getType()) + NewOp1 = Builder.CreateBitCast(NewOp1, Op0Src->getType()); + } + } else if (auto *RHSC = dyn_cast(ICmp.getOperand(1))) { + NewOp1 = ConstantExpr::getIntToPtr(RHSC, SrcTy); + } + + if (NewOp1) + return new ICmpInst(ICmp.getPredicate(), Op0Src, NewOp1); + } + + return foldICmpWithZextOrSext(ICmp, Builder); } static bool isNeutralValue(Instruction::BinaryOps BinaryOp, Value *RHS) { @@ -4791,13 +5148,66 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { return nullptr; } +llvm::Optional> +llvm::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, + Constant *C) { + assert(ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate(Pred) && + "Only for relational integer predicates."); + + Type *Type = C->getType(); + bool IsSigned = ICmpInst::isSigned(Pred); + + CmpInst::Predicate UnsignedPred = ICmpInst::getUnsignedPredicate(Pred); + bool WillIncrement = + UnsignedPred == ICmpInst::ICMP_ULE || UnsignedPred == ICmpInst::ICMP_UGT; + + // Check if the constant operand can be safely incremented/decremented + // without overflowing/underflowing. + auto ConstantIsOk = [WillIncrement, IsSigned](ConstantInt *C) { + return WillIncrement ? !C->isMaxValue(IsSigned) : !C->isMinValue(IsSigned); + }; + + if (auto *CI = dyn_cast(C)) { + // Bail out if the constant can't be safely incremented/decremented. + if (!ConstantIsOk(CI)) + return llvm::None; + } else if (Type->isVectorTy()) { + unsigned NumElts = Type->getVectorNumElements(); + for (unsigned i = 0; i != NumElts; ++i) { + Constant *Elt = C->getAggregateElement(i); + if (!Elt) + return llvm::None; + + if (isa(Elt)) + continue; + + // Bail out if we can't determine if this constant is min/max or if we + // know that this constant is min/max. + auto *CI = dyn_cast(Elt); + if (!CI || !ConstantIsOk(CI)) + return llvm::None; + } + } else { + // ConstantExpr? + return llvm::None; + } + + CmpInst::Predicate NewPred = CmpInst::getFlippedStrictnessPredicate(Pred); + + // Increment or decrement the constant. + Constant *OneOrNegOne = ConstantInt::get(Type, WillIncrement ? 1 : -1, true); + Constant *NewC = ConstantExpr::getAdd(C, OneOrNegOne); + + return std::make_pair(NewPred, NewC); +} + /// If we have an icmp le or icmp ge instruction with a constant operand, turn /// it into the appropriate icmp lt or icmp gt instruction. This transform /// allows them to be folded in visitICmpInst. static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) { ICmpInst::Predicate Pred = I.getPredicate(); - if (Pred != ICmpInst::ICMP_SLE && Pred != ICmpInst::ICMP_SGE && - Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_UGE) + if (ICmpInst::isEquality(Pred) || !ICmpInst::isIntPredicate(Pred) || + isCanonicalPredicate(Pred)) return nullptr; Value *Op0 = I.getOperand(0); @@ -4806,47 +5216,11 @@ static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) { if (!Op1C) return nullptr; - // Check if the constant operand can be safely incremented/decremented without - // overflowing/underflowing. For scalars, SimplifyICmpInst has already handled - // the edge cases for us, so we just assert on them. For vectors, we must - // handle the edge cases. - Type *Op1Type = Op1->getType(); - bool IsSigned = I.isSigned(); - bool IsLE = (Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_ULE); - auto *CI = dyn_cast(Op1C); - if (CI) { - // A <= MAX -> TRUE ; A >= MIN -> TRUE - assert(IsLE ? !CI->isMaxValue(IsSigned) : !CI->isMinValue(IsSigned)); - } else if (Op1Type->isVectorTy()) { - // TODO? If the edge cases for vectors were guaranteed to be handled as they - // are for scalar, we could remove the min/max checks. However, to do that, - // we would have to use insertelement/shufflevector to replace edge values. - unsigned NumElts = Op1Type->getVectorNumElements(); - for (unsigned i = 0; i != NumElts; ++i) { - Constant *Elt = Op1C->getAggregateElement(i); - if (!Elt) - return nullptr; - - if (isa(Elt)) - continue; - - // Bail out if we can't determine if this constant is min/max or if we - // know that this constant is min/max. - auto *CI = dyn_cast(Elt); - if (!CI || (IsLE ? CI->isMaxValue(IsSigned) : CI->isMinValue(IsSigned))) - return nullptr; - } - } else { - // ConstantExpr? + auto FlippedStrictness = getFlippedStrictnessPredicateAndConstant(Pred, Op1C); + if (!FlippedStrictness) return nullptr; - } - // Increment or decrement the constant and set the new comparison predicate: - // ULE -> ULT ; UGE -> UGT ; SLE -> SLT ; SGE -> SGT - Constant *OneOrNegOne = ConstantInt::get(Op1Type, IsLE ? 1 : -1, true); - CmpInst::Predicate NewPred = IsLE ? ICmpInst::ICMP_ULT: ICmpInst::ICMP_UGT; - NewPred = IsSigned ? ICmpInst::getSignedPredicate(NewPred) : NewPred; - return new ICmpInst(NewPred, Op0, ConstantExpr::getAdd(Op1C, OneOrNegOne)); + return new ICmpInst(FlippedStrictness->first, Op0, FlippedStrictness->second); } /// Integer compare with boolean values can always be turned into bitwise ops. @@ -5002,6 +5376,7 @@ static Instruction *foldVectorCmp(CmpInst &Cmp, Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { bool Changed = false; + const SimplifyQuery Q = SQ.getWithInstruction(&I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); unsigned Op0Cplxity = getComplexity(Op0); unsigned Op1Cplxity = getComplexity(Op1); @@ -5016,8 +5391,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Changed = true; } - if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, - SQ.getWithInstruction(&I))) + if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, Q)) return replaceInstUsesWith(I, V); // Comparing -val or val with non-zero is the same as just comparing val @@ -5050,6 +5424,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpWithDominatingICmp(I)) return Res; + if (Instruction *Res = foldICmpBinOp(I, Q)) + return Res; + if (Instruction *Res = foldICmpUsingKnownBits(I)) return Res; @@ -5098,6 +5475,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpInstWithConstant(I)) return Res; + // Try to match comparison as a sign bit test. Intentionally do this after + // foldICmpInstWithConstant() to potentially let other folds to happen first. + if (Instruction *New = foldSignBitTest(I)) + return New; + if (Instruction *Res = foldICmpInstWithConstantNotInt(I)) return Res; @@ -5124,20 +5506,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpBitCast(I, Builder)) return Res; - if (isa(Op0)) { - // Handle the special case of: icmp (cast bool to X), - // This comes up when you have code like - // int X = A < B; - // if (X) ... - // For generality, we handle any zero-extension of any operand comparison - // with a constant or another cast from the same type. - if (isa(Op1) || isa(Op1)) - if (Instruction *R = foldICmpWithCastAndCast(I)) - return R; - } - - if (Instruction *Res = foldICmpBinOp(I)) - return Res; + if (Instruction *R = foldICmpWithCastOp(I)) + return R; if (Instruction *Res = foldICmpWithMinMax(I)) return Res; diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 434b0d59121..1dbc06d92e7 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -113,6 +113,48 @@ static inline bool isCanonicalPredicate(CmpInst::Predicate Pred) { } } +/// Given an exploded icmp instruction, return true if the comparison only +/// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if the +/// result of the comparison is true when the input value is signed. +inline bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS, + bool &TrueIfSigned) { + switch (Pred) { + case ICmpInst::ICMP_SLT: // True if LHS s< 0 + TrueIfSigned = true; + return RHS.isNullValue(); + case ICmpInst::ICMP_SLE: // True if LHS s<= -1 + TrueIfSigned = true; + return RHS.isAllOnesValue(); + case ICmpInst::ICMP_SGT: // True if LHS s> -1 + TrueIfSigned = false; + return RHS.isAllOnesValue(); + case ICmpInst::ICMP_SGE: // True if LHS s>= 0 + TrueIfSigned = false; + return RHS.isNullValue(); + case ICmpInst::ICMP_UGT: + // True if LHS u> RHS and RHS == sign-bit-mask - 1 + TrueIfSigned = true; + return RHS.isMaxSignedValue(); + case ICmpInst::ICMP_UGE: + // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc) + TrueIfSigned = true; + return RHS.isMinSignedValue(); + case ICmpInst::ICMP_ULT: + // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc) + TrueIfSigned = false; + return RHS.isMinSignedValue(); + case ICmpInst::ICMP_ULE: + // True if LHS u<= RHS and RHS == sign-bit-mask - 1 + TrueIfSigned = false; + return RHS.isMaxSignedValue(); + default: + return false; + } +} + +llvm::Optional> +getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, Constant *C); + /// Return the source operand of a potentially bitcasted value while optionally /// checking if it has one use. If there is no bitcast or the one use check is /// not met, return the input value itself. @@ -139,32 +181,17 @@ static inline Constant *SubOne(Constant *C) { /// This happens in cases where the ~ can be eliminated. If WillInvertAllUses /// is true, work under the assumption that the caller intends to remove all /// uses of V and only keep uses of ~V. -static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) { +/// +/// See also: canFreelyInvertAllUsersOf() +static inline bool isFreeToInvert(Value *V, bool WillInvertAllUses) { // ~(~(X)) -> X. if (match(V, m_Not(m_Value()))) return true; // Constants can be considered to be not'ed values. - if (isa(V)) + if (match(V, m_AnyIntegralConstant())) return true; - // A vector of constant integers can be inverted easily. - if (V->getType()->isVectorTy() && isa(V)) { - unsigned NumElts = V->getType()->getVectorNumElements(); - for (unsigned i = 0; i != NumElts; ++i) { - Constant *Elt = cast(V)->getAggregateElement(i); - if (!Elt) - return false; - - if (isa(Elt)) - continue; - - if (!isa(Elt)) - return false; - } - return true; - } - // Compares can be inverted if all of their uses are being modified to use the // ~V. if (isa(V)) @@ -185,6 +212,32 @@ static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) { return false; } +/// Given i1 V, can every user of V be freely adapted if V is changed to !V ? +/// +/// See also: isFreeToInvert() +static inline bool canFreelyInvertAllUsersOf(Value *V, Value *IgnoredUser) { + // Look at every user of V. + for (User *U : V->users()) { + if (U == IgnoredUser) + continue; // Don't consider this user. + + auto *I = cast(U); + switch (I->getOpcode()) { + case Instruction::Select: + case Instruction::Br: + break; // Free to invert by swapping true/false values/destinations. + case Instruction::Xor: // Can invert 'xor' if it's a 'not', by ignoring it. + if (!match(I, m_Not(m_Value()))) + return false; // Not a 'not'. + break; + default: + return false; // Don't know, likely not freely invertible. + } + // So far all users were free to invert... + } + return true; // Can freely invert all users! +} + /// Some binary operators require special handling to avoid poison and undefined /// behavior. If a constant vector has undef elements, replace those undefs with /// identity constants if possible because those are always safe to execute. @@ -337,6 +390,13 @@ public: Instruction *visitOr(BinaryOperator &I); Instruction *visitXor(BinaryOperator &I); Instruction *visitShl(BinaryOperator &I); + Value *reassociateShiftAmtsOfTwoSameDirectionShifts( + BinaryOperator *Sh0, const SimplifyQuery &SQ, + bool AnalyzeForSignBitExtraction = false); + Instruction *canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract( + BinaryOperator &I); + Instruction *foldVariableSignZeroExtensionOfVariableHighBitExtract( + BinaryOperator &OldAShr); Instruction *visitAShr(BinaryOperator &I); Instruction *visitLShr(BinaryOperator &I); Instruction *commonShiftTransforms(BinaryOperator &I); @@ -541,6 +601,7 @@ private: Instruction *narrowMathIfNoOverflow(BinaryOperator &I); Instruction *narrowRotate(TruncInst &Trunc); Instruction *optimizeBitCastFromPhi(CastInst &CI, PHINode *PN); + Instruction *matchSAddSubSat(SelectInst &MinMax1); /// Determine if a pair of casts can be replaced by a single cast. /// @@ -557,7 +618,7 @@ private: Value *foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI); Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI); - Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS); + Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &I); /// Optimize (fcmp)&(fcmp) or (fcmp)|(fcmp). /// NOTE: Unlike most of instcombine, this returns a Value which should @@ -725,7 +786,7 @@ public: Value *LHS, Value *RHS, Instruction *CxtI) const; /// Maximum size of array considered when transforming. - uint64_t MaxArraySizeForCombine; + uint64_t MaxArraySizeForCombine = 0; private: /// Performs a few simplifications for operators which are associative @@ -798,7 +859,8 @@ private: int DmaskIdx = -1); Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, - APInt &UndefElts, unsigned Depth = 0); + APInt &UndefElts, unsigned Depth = 0, + bool AllowMultipleUsers = false); /// Canonicalize the position of binops relative to shufflevector. Instruction *foldVectorBinop(BinaryOperator &Inst); @@ -847,17 +909,21 @@ private: Constant *RHSC); Instruction *foldICmpAddOpConst(Value *X, const APInt &C, ICmpInst::Predicate Pred); - Instruction *foldICmpWithCastAndCast(ICmpInst &ICI); + Instruction *foldICmpWithCastOp(ICmpInst &ICI); Instruction *foldICmpUsingKnownBits(ICmpInst &Cmp); Instruction *foldICmpWithDominatingICmp(ICmpInst &Cmp); Instruction *foldICmpWithConstant(ICmpInst &Cmp); Instruction *foldICmpInstWithConstant(ICmpInst &Cmp); Instruction *foldICmpInstWithConstantNotInt(ICmpInst &Cmp); - Instruction *foldICmpBinOp(ICmpInst &Cmp); + Instruction *foldICmpBinOp(ICmpInst &Cmp, const SimplifyQuery &SQ); Instruction *foldICmpEquality(ICmpInst &Cmp); + Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I); + Instruction *foldSignBitTest(ICmpInst &I); Instruction *foldICmpWithZero(ICmpInst &Cmp); + Value *foldUnsignedMultiplicationOverflowCheck(ICmpInst &Cmp); + Instruction *foldICmpSelectConstant(ICmpInst &Cmp, SelectInst *Select, ConstantInt *C); Instruction *foldICmpTruncConstant(ICmpInst &Cmp, TruncInst *Trunc, @@ -874,6 +940,8 @@ private: const APInt &C); Instruction *foldICmpShrConstant(ICmpInst &Cmp, BinaryOperator *Shr, const APInt &C); + Instruction *foldICmpSRemConstant(ICmpInst &Cmp, BinaryOperator *UDiv, + const APInt &C); Instruction *foldICmpUDivConstant(ICmpInst &Cmp, BinaryOperator *UDiv, const APInt &C); Instruction *foldICmpDivConstant(ICmpInst &Cmp, BinaryOperator *Div, diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 054fb7da09a..3a0e05832fc 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -175,7 +175,7 @@ static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType()); if (!AllocaSize) return false; - return isDereferenceableAndAlignedPointer(V, AI->getAlignment(), + return isDereferenceableAndAlignedPointer(V, Align(AI->getAlignment()), APInt(64, AllocaSize), DL); } @@ -197,7 +197,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { if (C->getValue().getActiveBits() <= 64) { Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName()); - New->setAlignment(AI.getAlignment()); + New->setAlignment(MaybeAlign(AI.getAlignment())); // Scan to the end of the allocation instructions, to skip over a block of // allocas if possible...also skip interleaved debug info @@ -345,7 +345,8 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { if (AI.getAllocatedType()->isSized()) { // If the alignment is 0 (unspecified), assign it the preferred alignment. if (AI.getAlignment() == 0) - AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType())); + AI.setAlignment( + MaybeAlign(DL.getPrefTypeAlignment(AI.getAllocatedType()))); // Move all alloca's of zero byte objects to the entry block and merge them // together. Note that we only do this for alloca's, because malloc should @@ -377,12 +378,12 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // assign it the preferred alignment. if (EntryAI->getAlignment() == 0) EntryAI->setAlignment( - DL.getPrefTypeAlignment(EntryAI->getAllocatedType())); + MaybeAlign(DL.getPrefTypeAlignment(EntryAI->getAllocatedType()))); // Replace this zero-sized alloca with the one at the start of the entry // block after ensuring that the address will be aligned enough for both // types. - unsigned MaxAlign = std::max(EntryAI->getAlignment(), - AI.getAlignment()); + const MaybeAlign MaxAlign( + std::max(EntryAI->getAlignment(), AI.getAlignment())); EntryAI->setAlignment(MaxAlign); if (AI.getType() != EntryAI->getType()) return new BitCastInst(EntryAI, AI.getType()); @@ -455,9 +456,6 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT Value *Ptr = LI.getPointerOperand(); unsigned AS = LI.getPointerAddressSpace(); - SmallVector, 8> MD; - LI.getAllMetadata(MD); - Value *NewPtr = nullptr; if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) && NewPtr->getType()->getPointerElementType() == NewTy && @@ -467,48 +465,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT LoadInst *NewLoad = IC.Builder.CreateAlignedLoad( NewTy, NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix); NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); - MDBuilder MDB(NewLoad->getContext()); - for (const auto &MDPair : MD) { - unsigned ID = MDPair.first; - MDNode *N = MDPair.second; - // Note, essentially every kind of metadata should be preserved here! This - // routine is supposed to clone a load instruction changing *only its type*. - // The only metadata it makes sense to drop is metadata which is invalidated - // when the pointer type changes. This should essentially never be the case - // in LLVM, but we explicitly switch over only known metadata to be - // conservatively correct. If you are adding metadata to LLVM which pertains - // to loads, you almost certainly want to add it here. - switch (ID) { - case LLVMContext::MD_dbg: - case LLVMContext::MD_tbaa: - case LLVMContext::MD_prof: - case LLVMContext::MD_fpmath: - case LLVMContext::MD_tbaa_struct: - case LLVMContext::MD_invariant_load: - case LLVMContext::MD_alias_scope: - case LLVMContext::MD_noalias: - case LLVMContext::MD_nontemporal: - case LLVMContext::MD_mem_parallel_loop_access: - case LLVMContext::MD_access_group: - // All of these directly apply. - NewLoad->setMetadata(ID, N); - break; - - case LLVMContext::MD_nonnull: - copyNonnullMetadata(LI, N, *NewLoad); - break; - case LLVMContext::MD_align: - case LLVMContext::MD_dereferenceable: - case LLVMContext::MD_dereferenceable_or_null: - // These only directly apply if the new type is also a pointer. - if (NewTy->isPointerTy()) - NewLoad->setMetadata(ID, N); - break; - case LLVMContext::MD_range: - copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad); - break; - } - } + copyMetadataForLoad(*NewLoad, LI); return NewLoad; } @@ -1004,9 +961,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType()); if (KnownAlign > EffectiveLoadAlign) - LI.setAlignment(KnownAlign); + LI.setAlignment(MaybeAlign(KnownAlign)); else if (LoadAlign == 0) - LI.setAlignment(EffectiveLoadAlign); + LI.setAlignment(MaybeAlign(EffectiveLoadAlign)); // Replace GEP indices if possible. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) { @@ -1063,11 +1020,11 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // if (SelectInst *SI = dyn_cast(Op)) { // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). - unsigned Align = LI.getAlignment(); - if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(), Align, - DL, SI) && - isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(), Align, - DL, SI)) { + const MaybeAlign Alignment(LI.getAlignment()); + if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(), + Alignment, DL, SI) && + isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(), + Alignment, DL, SI)) { LoadInst *V1 = Builder.CreateLoad(LI.getType(), SI->getOperand(1), SI->getOperand(1)->getName() + ".val"); @@ -1075,9 +1032,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Builder.CreateLoad(LI.getType(), SI->getOperand(2), SI->getOperand(2)->getName() + ".val"); assert(LI.isUnordered() && "implied by above"); - V1->setAlignment(Align); + V1->setAlignment(Alignment); V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); - V2->setAlignment(Align); + V2->setAlignment(Alignment); V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); return SelectInst::Create(SI->getCondition(), V1, V2); } @@ -1399,15 +1356,15 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { return eraseInstFromFunction(SI); // Attempt to improve the alignment. - unsigned KnownAlign = getOrEnforceKnownAlignment( - Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT); - unsigned StoreAlign = SI.getAlignment(); - unsigned EffectiveStoreAlign = - StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType()); + const Align KnownAlign = Align(getOrEnforceKnownAlignment( + Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT)); + const MaybeAlign StoreAlign = MaybeAlign(SI.getAlignment()); + const Align EffectiveStoreAlign = + StoreAlign ? *StoreAlign : Align(DL.getABITypeAlignment(Val->getType())); if (KnownAlign > EffectiveStoreAlign) SI.setAlignment(KnownAlign); - else if (StoreAlign == 0) + else if (!StoreAlign) SI.setAlignment(EffectiveStoreAlign); // Try to canonicalize the stored type. @@ -1622,8 +1579,8 @@ bool InstCombiner::mergeStoreIntoSuccessor(StoreInst &SI) { // Advance to a place where it is safe to insert the new store and insert it. BBI = DestBB->getFirstInsertionPt(); - StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), - SI.isVolatile(), SI.getAlignment(), + StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), + MaybeAlign(SI.getAlignment()), SI.getOrdering(), SI.getSyncScopeID()); InsertNewInstBefore(NewSI, *BBI); NewSI->setDebugLoc(MergedLoc); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index cc753ce0531..0b9128a9f5a 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -124,6 +124,50 @@ static Constant *getLogBase2(Type *Ty, Constant *C) { return ConstantVector::get(Elts); } +// TODO: This is a specific form of a much more general pattern. +// We could detect a select with any binop identity constant, or we +// could use SimplifyBinOp to see if either arm of the select reduces. +// But that needs to be done carefully and/or while removing potential +// reverse canonicalizations as in InstCombiner::foldSelectIntoOp(). +static Value *foldMulSelectToNegate(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + Value *Cond, *OtherOp; + + // mul (select Cond, 1, -1), OtherOp --> select Cond, OtherOp, -OtherOp + // mul OtherOp, (select Cond, 1, -1) --> select Cond, OtherOp, -OtherOp + if (match(&I, m_c_Mul(m_OneUse(m_Select(m_Value(Cond), m_One(), m_AllOnes())), + m_Value(OtherOp)))) + return Builder.CreateSelect(Cond, OtherOp, Builder.CreateNeg(OtherOp)); + + // mul (select Cond, -1, 1), OtherOp --> select Cond, -OtherOp, OtherOp + // mul OtherOp, (select Cond, -1, 1) --> select Cond, -OtherOp, OtherOp + if (match(&I, m_c_Mul(m_OneUse(m_Select(m_Value(Cond), m_AllOnes(), m_One())), + m_Value(OtherOp)))) + return Builder.CreateSelect(Cond, Builder.CreateNeg(OtherOp), OtherOp); + + // fmul (select Cond, 1.0, -1.0), OtherOp --> select Cond, OtherOp, -OtherOp + // fmul OtherOp, (select Cond, 1.0, -1.0) --> select Cond, OtherOp, -OtherOp + if (match(&I, m_c_FMul(m_OneUse(m_Select(m_Value(Cond), m_SpecificFP(1.0), + m_SpecificFP(-1.0))), + m_Value(OtherOp)))) { + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); + Builder.setFastMathFlags(I.getFastMathFlags()); + return Builder.CreateSelect(Cond, OtherOp, Builder.CreateFNeg(OtherOp)); + } + + // fmul (select Cond, -1.0, 1.0), OtherOp --> select Cond, -OtherOp, OtherOp + // fmul OtherOp, (select Cond, -1.0, 1.0) --> select Cond, -OtherOp, OtherOp + if (match(&I, m_c_FMul(m_OneUse(m_Select(m_Value(Cond), m_SpecificFP(-1.0), + m_SpecificFP(1.0))), + m_Value(OtherOp)))) { + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); + Builder.setFastMathFlags(I.getFastMathFlags()); + return Builder.CreateSelect(Cond, Builder.CreateFNeg(OtherOp), OtherOp); + } + + return nullptr; +} + Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (Value *V = SimplifyMulInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) @@ -213,6 +257,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I)) return FoldedMul; + if (Value *FoldedMul = foldMulSelectToNegate(I, Builder)) + return replaceInstUsesWith(I, FoldedMul); + // Simplify mul instructions with a constant RHS. if (isa(Op1)) { // Canonicalize (X+C1)*CI -> X*CI+C1*CI. @@ -358,6 +405,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I)) return FoldedMul; + if (Value *FoldedMul = foldMulSelectToNegate(I, Builder)) + return replaceInstUsesWith(I, FoldedMul); + // X * -1.0 --> -X Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (match(Op1, m_SpecificFP(-1.0))) @@ -373,16 +423,6 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Constant(C))) return BinaryOperator::CreateFMulFMF(X, ConstantExpr::getFNeg(C), &I); - // Sink negation: -X * Y --> -(X * Y) - // But don't transform constant expressions because there's an inverse fold. - if (match(Op0, m_OneUse(m_FNeg(m_Value(X)))) && !isa(Op0)) - return BinaryOperator::CreateFNegFMF(Builder.CreateFMulFMF(X, Op1, &I), &I); - - // Sink negation: Y * -X --> -(X * Y) - // But don't transform constant expressions because there's an inverse fold. - if (match(Op1, m_OneUse(m_FNeg(m_Value(X)))) && !isa(Op1)) - return BinaryOperator::CreateFNegFMF(Builder.CreateFMulFMF(X, Op0, &I), &I); - // fabs(X) * fabs(X) -> X * X if (Op0 == Op1 && match(Op0, m_Intrinsic(m_Value(X)))) return BinaryOperator::CreateFMulFMF(X, X, &I); @@ -1211,8 +1251,8 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { !IsTan && match(Op0, m_Intrinsic(m_Value(X))) && match(Op1, m_Intrinsic(m_Specific(X))); - if ((IsTan || IsCot) && hasUnaryFloatFn(&TLI, I.getType(), LibFunc_tan, - LibFunc_tanf, LibFunc_tanl)) { + if ((IsTan || IsCot) && + hasFloatFn(&TLI, I.getType(), LibFunc_tan, LibFunc_tanf, LibFunc_tanl)) { IRBuilder<> B(&I); IRBuilder<>::FastMathFlagGuard FMFGuard(B); B.setFastMathFlags(I.getFastMathFlags()); @@ -1244,6 +1284,17 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { return &I; } + // X / fabs(X) -> copysign(1.0, X) + // fabs(X) / X -> copysign(1.0, X) + if (I.hasNoNaNs() && I.hasNoInfs() && + (match(&I, + m_FDiv(m_Value(X), m_Intrinsic(m_Deferred(X)))) || + match(&I, m_FDiv(m_Intrinsic(m_Value(X)), + m_Deferred(X))))) { + Value *V = Builder.CreateBinaryIntrinsic( + Intrinsic::copysign, ConstantFP::get(I.getType(), 1.0), X, &I); + return replaceInstUsesWith(I, V); + } return nullptr; } @@ -1309,6 +1360,8 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); Type *Ty = I.getType(); if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) { + // This may increase instruction count, we don't enforce that Y is a + // constant. Constant *N1 = Constant::getAllOnesValue(Ty); Value *Add = Builder.CreateAdd(Op1, N1); return BinaryOperator::CreateAnd(Op0, Add); diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 5820ab72663..e0376b7582f 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -542,7 +542,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // visitLoadInst will propagate an alignment onto the load when TD is around, // and if TD isn't around, we can't handle the mixed case. bool isVolatile = FirstLI->isVolatile(); - unsigned LoadAlignment = FirstLI->getAlignment(); + MaybeAlign LoadAlignment(FirstLI->getAlignment()); unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace(); // We can't sink the load if the loaded value could be modified between the @@ -574,10 +574,10 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // If some of the loads have an alignment specified but not all of them, // we can't do the transformation. - if ((LoadAlignment != 0) != (LI->getAlignment() != 0)) + if ((LoadAlignment.hasValue()) != (LI->getAlignment() != 0)) return nullptr; - LoadAlignment = std::min(LoadAlignment, LI->getAlignment()); + LoadAlignment = std::min(LoadAlignment, MaybeAlign(LI->getAlignment())); // If the PHI is of volatile loads and the load block has multiple // successors, sinking it would remove a load of the volatile value from diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index aefaf5af175..9fc871e49b3 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -785,6 +785,41 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, return nullptr; } +/// Fold the following code sequence: +/// \code +/// int a = ctlz(x & -x); +// x ? 31 - a : a; +/// \code +/// +/// into: +/// cttz(x) +static Instruction *foldSelectCtlzToCttz(ICmpInst *ICI, Value *TrueVal, + Value *FalseVal, + InstCombiner::BuilderTy &Builder) { + unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits(); + if (!ICI->isEquality() || !match(ICI->getOperand(1), m_Zero())) + return nullptr; + + if (ICI->getPredicate() == ICmpInst::ICMP_NE) + std::swap(TrueVal, FalseVal); + + if (!match(FalseVal, + m_Xor(m_Deferred(TrueVal), m_SpecificInt(BitWidth - 1)))) + return nullptr; + + if (!match(TrueVal, m_Intrinsic())) + return nullptr; + + Value *X = ICI->getOperand(0); + auto *II = cast(TrueVal); + if (!match(II->getOperand(0), m_c_And(m_Specific(X), m_Neg(m_Specific(X))))) + return nullptr; + + Function *F = Intrinsic::getDeclaration(II->getModule(), Intrinsic::cttz, + II->getType()); + return CallInst::Create(F, {X, II->getArgOperand(1)}); +} + /// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single /// call to cttz/ctlz with flag 'is_zero_undef' cleared. /// @@ -973,8 +1008,7 @@ canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp, // If we are swapping the select operands, swap the metadata too. assert(Sel.getTrueValue() == RHS && Sel.getFalseValue() == LHS && "Unexpected results from matchSelectPattern"); - Sel.setTrueValue(LHS); - Sel.setFalseValue(RHS); + Sel.swapValues(); Sel.swapProfMetadata(); return &Sel; } @@ -1056,17 +1090,293 @@ static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp, } // We are swapping the select operands, so swap the metadata too. - Sel.setTrueValue(FVal); - Sel.setFalseValue(TVal); + Sel.swapValues(); Sel.swapProfMetadata(); return &Sel; } +static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *ReplaceOp, + const SimplifyQuery &Q) { + // If this is a binary operator, try to simplify it with the replaced op + // because we know Op and ReplaceOp are equivalant. + // For example: V = X + 1, Op = X, ReplaceOp = 42 + // Simplifies as: add(42, 1) --> 43 + if (auto *BO = dyn_cast(V)) { + if (BO->getOperand(0) == Op) + return SimplifyBinOp(BO->getOpcode(), ReplaceOp, BO->getOperand(1), Q); + if (BO->getOperand(1) == Op) + return SimplifyBinOp(BO->getOpcode(), BO->getOperand(0), ReplaceOp, Q); + } + + return nullptr; +} + +/// If we have a select with an equality comparison, then we know the value in +/// one of the arms of the select. See if substituting this value into an arm +/// and simplifying the result yields the same value as the other arm. +/// +/// To make this transform safe, we must drop poison-generating flags +/// (nsw, etc) if we simplified to a binop because the select may be guarding +/// that poison from propagating. If the existing binop already had no +/// poison-generating flags, then this transform can be done by instsimplify. +/// +/// Consider: +/// %cmp = icmp eq i32 %x, 2147483647 +/// %add = add nsw i32 %x, 1 +/// %sel = select i1 %cmp, i32 -2147483648, i32 %add +/// +/// We can't replace %sel with %add unless we strip away the flags. +/// TODO: Wrapping flags could be preserved in some cases with better analysis. +static Value *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp, + const SimplifyQuery &Q) { + if (!Cmp.isEquality()) + return nullptr; + + // Canonicalize the pattern to ICMP_EQ by swapping the select operands. + Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue(); + if (Cmp.getPredicate() == ICmpInst::ICMP_NE) + std::swap(TrueVal, FalseVal); + + // Try each equivalence substitution possibility. + // We have an 'EQ' comparison, so the select's false value will propagate. + // Example: + // (X == 42) ? 43 : (X + 1) --> (X == 42) ? (X + 1) : (X + 1) --> X + 1 + // (X == 42) ? (X + 1) : 43 --> (X == 42) ? (42 + 1) : 43 --> 43 + Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1); + if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q) == TrueVal || + simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q) == TrueVal || + simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q) == FalseVal || + simplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q) == FalseVal) { + if (auto *FalseInst = dyn_cast(FalseVal)) + FalseInst->dropPoisonGeneratingFlags(); + return FalseVal; + } + return nullptr; +} + +// See if this is a pattern like: +// %old_cmp1 = icmp slt i32 %x, C2 +// %old_replacement = select i1 %old_cmp1, i32 %target_low, i32 %target_high +// %old_x_offseted = add i32 %x, C1 +// %old_cmp0 = icmp ult i32 %old_x_offseted, C0 +// %r = select i1 %old_cmp0, i32 %x, i32 %old_replacement +// This can be rewritten as more canonical pattern: +// %new_cmp1 = icmp slt i32 %x, -C1 +// %new_cmp2 = icmp sge i32 %x, C0-C1 +// %new_clamped_low = select i1 %new_cmp1, i32 %target_low, i32 %x +// %r = select i1 %new_cmp2, i32 %target_high, i32 %new_clamped_low +// Iff -C1 s<= C2 s<= C0-C1 +// Also ULT predicate can also be UGT iff C0 != -1 (+invert result) +// SLT predicate can also be SGT iff C2 != INT_MAX (+invert res.) +static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, + InstCombiner::BuilderTy &Builder) { + Value *X = Sel0.getTrueValue(); + Value *Sel1 = Sel0.getFalseValue(); + + // First match the condition of the outermost select. + // Said condition must be one-use. + if (!Cmp0.hasOneUse()) + return nullptr; + Value *Cmp00 = Cmp0.getOperand(0); + Constant *C0; + if (!match(Cmp0.getOperand(1), + m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C0)))) + return nullptr; + // Canonicalize Cmp0 into the form we expect. + // FIXME: we shouldn't care about lanes that are 'undef' in the end? + switch (Cmp0.getPredicate()) { + case ICmpInst::Predicate::ICMP_ULT: + break; // Great! + case ICmpInst::Predicate::ICMP_ULE: + // We'd have to increment C0 by one, and for that it must not have all-ones + // element, but then it would have been canonicalized to 'ult' before + // we get here. So we can't do anything useful with 'ule'. + return nullptr; + case ICmpInst::Predicate::ICMP_UGT: + // We want to canonicalize it to 'ult', so we'll need to increment C0, + // which again means it must not have any all-ones elements. + if (!match(C0, + m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_NE, + APInt::getAllOnesValue( + C0->getType()->getScalarSizeInBits())))) + return nullptr; // Can't do, have all-ones element[s]. + C0 = AddOne(C0); + std::swap(X, Sel1); + break; + case ICmpInst::Predicate::ICMP_UGE: + // The only way we'd get this predicate if this `icmp` has extra uses, + // but then we won't be able to do this fold. + return nullptr; + default: + return nullptr; // Unknown predicate. + } + + // Now that we've canonicalized the ICmp, we know the X we expect; + // the select in other hand should be one-use. + if (!Sel1->hasOneUse()) + return nullptr; + + // We now can finish matching the condition of the outermost select: + // it should either be the X itself, or an addition of some constant to X. + Constant *C1; + if (Cmp00 == X) + C1 = ConstantInt::getNullValue(Sel0.getType()); + else if (!match(Cmp00, + m_Add(m_Specific(X), + m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C1))))) + return nullptr; + + Value *Cmp1; + ICmpInst::Predicate Pred1; + Constant *C2; + Value *ReplacementLow, *ReplacementHigh; + if (!match(Sel1, m_Select(m_Value(Cmp1), m_Value(ReplacementLow), + m_Value(ReplacementHigh))) || + !match(Cmp1, + m_ICmp(Pred1, m_Specific(X), + m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C2))))) + return nullptr; + + if (!Cmp1->hasOneUse() && (Cmp00 == X || !Cmp00->hasOneUse())) + return nullptr; // Not enough one-use instructions for the fold. + // FIXME: this restriction could be relaxed if Cmp1 can be reused as one of + // two comparisons we'll need to build. + + // Canonicalize Cmp1 into the form we expect. + // FIXME: we shouldn't care about lanes that are 'undef' in the end? + switch (Pred1) { + case ICmpInst::Predicate::ICMP_SLT: + break; + case ICmpInst::Predicate::ICMP_SLE: + // We'd have to increment C2 by one, and for that it must not have signed + // max element, but then it would have been canonicalized to 'slt' before + // we get here. So we can't do anything useful with 'sle'. + return nullptr; + case ICmpInst::Predicate::ICMP_SGT: + // We want to canonicalize it to 'slt', so we'll need to increment C2, + // which again means it must not have any signed max elements. + if (!match(C2, + m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_NE, + APInt::getSignedMaxValue( + C2->getType()->getScalarSizeInBits())))) + return nullptr; // Can't do, have signed max element[s]. + C2 = AddOne(C2); + LLVM_FALLTHROUGH; + case ICmpInst::Predicate::ICMP_SGE: + // Also non-canonical, but here we don't need to change C2, + // so we don't have any restrictions on C2, so we can just handle it. + std::swap(ReplacementLow, ReplacementHigh); + break; + default: + return nullptr; // Unknown predicate. + } + + // The thresholds of this clamp-like pattern. + auto *ThresholdLowIncl = ConstantExpr::getNeg(C1); + auto *ThresholdHighExcl = ConstantExpr::getSub(C0, C1); + + // The fold has a precondition 1: C2 s>= ThresholdLow + auto *Precond1 = ConstantExpr::getICmp(ICmpInst::Predicate::ICMP_SGE, C2, + ThresholdLowIncl); + if (!match(Precond1, m_One())) + return nullptr; + // The fold has a precondition 2: C2 s<= ThresholdHigh + auto *Precond2 = ConstantExpr::getICmp(ICmpInst::Predicate::ICMP_SLE, C2, + ThresholdHighExcl); + if (!match(Precond2, m_One())) + return nullptr; + + // All good, finally emit the new pattern. + Value *ShouldReplaceLow = Builder.CreateICmpSLT(X, ThresholdLowIncl); + Value *ShouldReplaceHigh = Builder.CreateICmpSGE(X, ThresholdHighExcl); + Value *MaybeReplacedLow = + Builder.CreateSelect(ShouldReplaceLow, ReplacementLow, X); + Instruction *MaybeReplacedHigh = + SelectInst::Create(ShouldReplaceHigh, ReplacementHigh, MaybeReplacedLow); + + return MaybeReplacedHigh; +} + +// If we have +// %cmp = icmp [canonical predicate] i32 %x, C0 +// %r = select i1 %cmp, i32 %y, i32 C1 +// Where C0 != C1 and %x may be different from %y, see if the constant that we +// will have if we flip the strictness of the predicate (i.e. without changing +// the result) is identical to the C1 in select. If it matches we can change +// original comparison to one with swapped predicate, reuse the constant, +// and swap the hands of select. +static Instruction * +tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp, + InstCombiner::BuilderTy &Builder) { + ICmpInst::Predicate Pred; + Value *X; + Constant *C0; + if (!match(&Cmp, m_OneUse(m_ICmp( + Pred, m_Value(X), + m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C0)))))) + return nullptr; + + // If comparison predicate is non-relational, we won't be able to do anything. + if (ICmpInst::isEquality(Pred)) + return nullptr; + + // If comparison predicate is non-canonical, then we certainly won't be able + // to make it canonical; canonicalizeCmpWithConstant() already tried. + if (!isCanonicalPredicate(Pred)) + return nullptr; + + // If the [input] type of comparison and select type are different, lets abort + // for now. We could try to compare constants with trunc/[zs]ext though. + if (C0->getType() != Sel.getType()) + return nullptr; + + // FIXME: are there any magic icmp predicate+constant pairs we must not touch? + + Value *SelVal0, *SelVal1; // We do not care which one is from where. + match(&Sel, m_Select(m_Value(), m_Value(SelVal0), m_Value(SelVal1))); + // At least one of these values we are selecting between must be a constant + // else we'll never succeed. + if (!match(SelVal0, m_AnyIntegralConstant()) && + !match(SelVal1, m_AnyIntegralConstant())) + return nullptr; + + // Does this constant C match any of the `select` values? + auto MatchesSelectValue = [SelVal0, SelVal1](Constant *C) { + return C->isElementWiseEqual(SelVal0) || C->isElementWiseEqual(SelVal1); + }; + + // If C0 *already* matches true/false value of select, we are done. + if (MatchesSelectValue(C0)) + return nullptr; + + // Check the constant we'd have with flipped-strictness predicate. + auto FlippedStrictness = getFlippedStrictnessPredicateAndConstant(Pred, C0); + if (!FlippedStrictness) + return nullptr; + + // If said constant doesn't match either, then there is no hope, + if (!MatchesSelectValue(FlippedStrictness->second)) + return nullptr; + + // It matched! Lets insert the new comparison just before select. + InstCombiner::BuilderTy::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(&Sel); + + Pred = ICmpInst::getSwappedPredicate(Pred); // Yes, swapped. + Value *NewCmp = Builder.CreateICmp(Pred, X, FlippedStrictness->second, + Cmp.getName() + ".inv"); + Sel.setCondition(NewCmp); + Sel.swapValues(); + Sel.swapProfMetadata(); + + return &Sel; +} + /// Visit a SelectInst that has an ICmpInst as its first operand. Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI) { - Value *TrueVal = SI.getTrueValue(); - Value *FalseVal = SI.getFalseValue(); + if (Value *V = foldSelectValueEquivalence(SI, *ICI, SQ)) + return replaceInstUsesWith(SI, V); if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, Builder)) return NewSel; @@ -1074,12 +1384,21 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, if (Instruction *NewAbs = canonicalizeAbsNabs(SI, *ICI, Builder)) return NewAbs; + if (Instruction *NewAbs = canonicalizeClampLike(SI, *ICI, Builder)) + return NewAbs; + + if (Instruction *NewSel = + tryToReuseConstantFromSelectInComparison(SI, *ICI, Builder)) + return NewSel; + bool Changed = adjustMinMax(SI, *ICI); if (Value *V = foldSelectICmpAnd(SI, ICI, Builder)) return replaceInstUsesWith(SI, V); // NOTE: if we wanted to, this is where to detect integer MIN/MAX + Value *TrueVal = SI.getTrueValue(); + Value *FalseVal = SI.getFalseValue(); ICmpInst::Predicate Pred = ICI->getPredicate(); Value *CmpLHS = ICI->getOperand(0); Value *CmpRHS = ICI->getOperand(1); @@ -1149,6 +1468,9 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, foldSelectICmpAndAnd(SI.getType(), ICI, TrueVal, FalseVal, Builder)) return V; + if (Instruction *V = foldSelectCtlzToCttz(ICI, TrueVal, FalseVal, Builder)) + return V; + if (Value *V = foldSelectICmpAndOr(ICI, TrueVal, FalseVal, Builder)) return replaceInstUsesWith(SI, V); @@ -1253,6 +1575,16 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner, } } + // max(max(A, B), min(A, B)) --> max(A, B) + // min(min(A, B), max(A, B)) --> min(A, B) + // TODO: This could be done in instsimplify. + if (SPF1 == SPF2 && + ((SPF1 == SPF_UMIN && match(C, m_c_UMax(m_Specific(A), m_Specific(B)))) || + (SPF1 == SPF_SMIN && match(C, m_c_SMax(m_Specific(A), m_Specific(B)))) || + (SPF1 == SPF_UMAX && match(C, m_c_UMin(m_Specific(A), m_Specific(B)))) || + (SPF1 == SPF_SMAX && match(C, m_c_SMin(m_Specific(A), m_Specific(B)))))) + return replaceInstUsesWith(Outer, Inner); + // ABS(ABS(X)) -> ABS(X) // NABS(NABS(X)) -> NABS(X) // TODO: This could be done in instsimplify. @@ -1280,7 +1612,7 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner, return true; } - if (IsFreeToInvert(V, !V->hasNUsesOrMore(3))) { + if (isFreeToInvert(V, !V->hasNUsesOrMore(3))) { NotV = nullptr; return true; } @@ -1492,6 +1824,30 @@ static Instruction *canonicalizeSelectToShuffle(SelectInst &SI) { ConstantVector::get(Mask)); } +/// If we have a select of vectors with a scalar condition, try to convert that +/// to a vector select by splatting the condition. A splat may get folded with +/// other operations in IR and having all operands of a select be vector types +/// is likely better for vector codegen. +static Instruction *canonicalizeScalarSelectOfVecs( + SelectInst &Sel, InstCombiner::BuilderTy &Builder) { + Type *Ty = Sel.getType(); + if (!Ty->isVectorTy()) + return nullptr; + + // We can replace a single-use extract with constant index. + Value *Cond = Sel.getCondition(); + if (!match(Cond, m_OneUse(m_ExtractElement(m_Value(), m_ConstantInt())))) + return nullptr; + + // select (extelt V, Index), T, F --> select (splat V, Index), T, F + // Splatting the extracted condition reduces code (we could directly create a + // splat shuffle of the source vector to eliminate the intermediate step). + unsigned NumElts = Ty->getVectorNumElements(); + Value *SplatCond = Builder.CreateVectorSplat(NumElts, Cond); + Sel.setCondition(SplatCond); + return &Sel; +} + /// Reuse bitcasted operands between a compare and select: /// select (cmp (bitcast C), (bitcast D)), (bitcast' C), (bitcast' D) --> /// bitcast (select (cmp (bitcast C), (bitcast D)), (bitcast C), (bitcast D)) @@ -1648,6 +2004,71 @@ static Instruction *moveAddAfterMinMax(SelectPatternFlavor SPF, Value *X, return nullptr; } +/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value. +Instruction *InstCombiner::matchSAddSubSat(SelectInst &MinMax1) { + Type *Ty = MinMax1.getType(); + + // We are looking for a tree of: + // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B)))) + // Where the min and max could be reversed + Instruction *MinMax2; + BinaryOperator *AddSub; + const APInt *MinValue, *MaxValue; + if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) { + if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue)))) + return nullptr; + } else if (match(&MinMax1, + m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) { + if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue)))) + return nullptr; + } else + return nullptr; + + // Check that the constants clamp a saturate, and that the new type would be + // sensible to convert to. + if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1) + return nullptr; + // In what bitwidth can this be treated as saturating arithmetics? + unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1; + // FIXME: This isn't quite right for vectors, but using the scalar type is a + // good first approximation for what should be done there. + if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth)) + return nullptr; + + // Also make sure that the number of uses is as expected. The "3"s are for the + // the two items of min/max (the compare and the select). + if (MinMax2->hasNUsesOrMore(3) || AddSub->hasNUsesOrMore(3)) + return nullptr; + + // Create the new type (which can be a vector type) + Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth); + // Match the two extends from the add/sub + Value *A, *B; + if(!match(AddSub, m_BinOp(m_SExt(m_Value(A)), m_SExt(m_Value(B))))) + return nullptr; + // And check the incoming values are of a type smaller than or equal to the + // size of the saturation. Otherwise the higher bits can cause different + // results. + if (A->getType()->getScalarSizeInBits() > NewBitWidth || + B->getType()->getScalarSizeInBits() > NewBitWidth) + return nullptr; + + Intrinsic::ID IntrinsicID; + if (AddSub->getOpcode() == Instruction::Add) + IntrinsicID = Intrinsic::sadd_sat; + else if (AddSub->getOpcode() == Instruction::Sub) + IntrinsicID = Intrinsic::ssub_sat; + else + return nullptr; + + // Finally create and return the sat intrinsic, truncated to the new type + Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy); + Value *AT = Builder.CreateSExt(A, NewTy); + Value *BT = Builder.CreateSExt(B, NewTy); + Value *Sat = Builder.CreateCall(F, {AT, BT}); + return CastInst::Create(Instruction::SExt, Sat, Ty); +} + /// Reduce a sequence of min/max with a common operand. static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS, Value *RHS, @@ -1788,6 +2209,9 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (Instruction *I = canonicalizeSelectToShuffle(SI)) return I; + if (Instruction *I = canonicalizeScalarSelectOfVecs(SI, Builder)) + return I; + // Canonicalize a one-use integer compare with a non-canonical predicate by // inverting the predicate and swapping the select operands. This matches a // compare canonicalization for conditional branches. @@ -2013,16 +2437,17 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { (LHS->getType()->isFPOrFPVectorTy() && ((CmpLHS != LHS && CmpLHS != RHS) || (CmpRHS != LHS && CmpRHS != RHS)))) { - CmpInst::Predicate Pred = getMinMaxPred(SPF, SPR.Ordered); + CmpInst::Predicate MinMaxPred = getMinMaxPred(SPF, SPR.Ordered); Value *Cmp; - if (CmpInst::isIntPredicate(Pred)) { - Cmp = Builder.CreateICmp(Pred, LHS, RHS); + if (CmpInst::isIntPredicate(MinMaxPred)) { + Cmp = Builder.CreateICmp(MinMaxPred, LHS, RHS); } else { IRBuilder<>::FastMathFlagGuard FMFG(Builder); - auto FMF = cast(SI.getCondition())->getFastMathFlags(); + auto FMF = + cast(SI.getCondition())->getFastMathFlags(); Builder.setFastMathFlags(FMF); - Cmp = Builder.CreateFCmp(Pred, LHS, RHS); + Cmp = Builder.CreateFCmp(MinMaxPred, LHS, RHS); } Value *NewSI = Builder.CreateSelect(Cmp, LHS, RHS, SI.getName(), &SI); @@ -2040,9 +2465,9 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * { Value *A; if (match(X, m_Not(m_Value(A))) && !X->hasNUsesOrMore(3) && - !IsFreeToInvert(A, A->hasOneUse()) && + !isFreeToInvert(A, A->hasOneUse()) && // Passing false to only consider m_Not and constants. - IsFreeToInvert(Y, false)) { + isFreeToInvert(Y, false)) { Value *B = Builder.CreateNot(Y); Value *NewMinMax = createMinMax(Builder, getInverseMinMaxFlavor(SPF), A, B); @@ -2070,6 +2495,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (Instruction *I = factorizeMinMaxTree(SPF, LHS, RHS, Builder)) return I; + if (Instruction *I = matchSAddSubSat(SI)) + return I; } } diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index c821292400c..64294838644 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -25,50 +25,275 @@ using namespace PatternMatch; // we should rewrite it as // x shiftopcode (Q+K) iff (Q+K) u< bitwidth(x) // This is valid for any shift, but they must be identical. -static Instruction * -reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0, - const SimplifyQuery &SQ) { - // Look for: (x shiftopcode ShAmt0) shiftopcode ShAmt1 - Value *X, *ShAmt1, *ShAmt0; - Instruction *Sh1; - if (!match(Sh0, m_Shift(m_CombineAnd(m_Shift(m_Value(X), m_Value(ShAmt1)), - m_Instruction(Sh1)), - m_Value(ShAmt0)))) +// +// AnalyzeForSignBitExtraction indicates that we will only analyze whether this +// pattern has any 2 right-shifts that sum to 1 less than original bit width. +Value *InstCombiner::reassociateShiftAmtsOfTwoSameDirectionShifts( + BinaryOperator *Sh0, const SimplifyQuery &SQ, + bool AnalyzeForSignBitExtraction) { + // Look for a shift of some instruction, ignore zext of shift amount if any. + Instruction *Sh0Op0; + Value *ShAmt0; + if (!match(Sh0, + m_Shift(m_Instruction(Sh0Op0), m_ZExtOrSelf(m_Value(ShAmt0))))) return nullptr; - // The shift opcodes must be identical. - Instruction::BinaryOps ShiftOpcode = Sh0->getOpcode(); - if (ShiftOpcode != Sh1->getOpcode()) + // If there is a truncation between the two shifts, we must make note of it + // and look through it. The truncation imposes additional constraints on the + // transform. + Instruction *Sh1; + Value *Trunc = nullptr; + match(Sh0Op0, + m_CombineOr(m_CombineAnd(m_Trunc(m_Instruction(Sh1)), m_Value(Trunc)), + m_Instruction(Sh1))); + + // Inner shift: (x shiftopcode ShAmt1) + // Like with other shift, ignore zext of shift amount if any. + Value *X, *ShAmt1; + if (!match(Sh1, m_Shift(m_Value(X), m_ZExtOrSelf(m_Value(ShAmt1))))) return nullptr; + + // We have two shift amounts from two different shifts. The types of those + // shift amounts may not match. If that's the case let's bailout now.. + if (ShAmt0->getType() != ShAmt1->getType()) + return nullptr; + + // We are only looking for signbit extraction if we have two right shifts. + bool HadTwoRightShifts = match(Sh0, m_Shr(m_Value(), m_Value())) && + match(Sh1, m_Shr(m_Value(), m_Value())); + // ... and if it's not two right-shifts, we know the answer already. + if (AnalyzeForSignBitExtraction && !HadTwoRightShifts) + return nullptr; + + // The shift opcodes must be identical, unless we are just checking whether + // this pattern can be interpreted as a sign-bit-extraction. + Instruction::BinaryOps ShiftOpcode = Sh0->getOpcode(); + bool IdenticalShOpcodes = Sh0->getOpcode() == Sh1->getOpcode(); + if (!IdenticalShOpcodes && !AnalyzeForSignBitExtraction) + return nullptr; + + // If we saw truncation, we'll need to produce extra instruction, + // and for that one of the operands of the shift must be one-use, + // unless of course we don't actually plan to produce any instructions here. + if (Trunc && !AnalyzeForSignBitExtraction && + !match(Sh0, m_c_BinOp(m_OneUse(m_Value()), m_Value()))) + return nullptr; + // Can we fold (ShAmt0+ShAmt1) ? - Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, ShAmt0, ShAmt1, - SQ.getWithInstruction(Sh0)); + auto *NewShAmt = dyn_cast_or_null( + SimplifyAddInst(ShAmt0, ShAmt1, /*isNSW=*/false, /*isNUW=*/false, + SQ.getWithInstruction(Sh0))); if (!NewShAmt) return nullptr; // Did not simplify. - // Is the new shift amount smaller than the bit width? - // FIXME: could also rely on ConstantRange. - unsigned BitWidth = X->getType()->getScalarSizeInBits(); + unsigned NewShAmtBitWidth = NewShAmt->getType()->getScalarSizeInBits(); + unsigned XBitWidth = X->getType()->getScalarSizeInBits(); + // Is the new shift amount smaller than the bit width of inner/new shift? if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT, - APInt(BitWidth, BitWidth)))) - return nullptr; - // All good, we can do this fold. - BinaryOperator *NewShift = BinaryOperator::Create(ShiftOpcode, X, NewShAmt); - // If both of the original shifts had the same flag set, preserve the flag. - if (ShiftOpcode == Instruction::BinaryOps::Shl) { - NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() && - Sh1->hasNoUnsignedWrap()); - NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() && - Sh1->hasNoSignedWrap()); - } else { - NewShift->setIsExact(Sh0->isExact() && Sh1->isExact()); + APInt(NewShAmtBitWidth, XBitWidth)))) + return nullptr; // FIXME: could perform constant-folding. + + // If there was a truncation, and we have a right-shift, we can only fold if + // we are left with the original sign bit. Likewise, if we were just checking + // that this is a sighbit extraction, this is the place to check it. + // FIXME: zero shift amount is also legal here, but we can't *easily* check + // more than one predicate so it's not really worth it. + if (HadTwoRightShifts && (Trunc || AnalyzeForSignBitExtraction)) { + // If it's not a sign bit extraction, then we're done. + if (!match(NewShAmt, + m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ, + APInt(NewShAmtBitWidth, XBitWidth - 1)))) + return nullptr; + // If it is, and that was the question, return the base value. + if (AnalyzeForSignBitExtraction) + return X; } - return NewShift; + + assert(IdenticalShOpcodes && "Should not get here with different shifts."); + + // All good, we can do this fold. + NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, X->getType()); + + BinaryOperator *NewShift = BinaryOperator::Create(ShiftOpcode, X, NewShAmt); + + // The flags can only be propagated if there wasn't a trunc. + if (!Trunc) { + // If the pattern did not involve trunc, and both of the original shifts + // had the same flag set, preserve the flag. + if (ShiftOpcode == Instruction::BinaryOps::Shl) { + NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() && + Sh1->hasNoUnsignedWrap()); + NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() && + Sh1->hasNoSignedWrap()); + } else { + NewShift->setIsExact(Sh0->isExact() && Sh1->isExact()); + } + } + + Instruction *Ret = NewShift; + if (Trunc) { + Builder.Insert(NewShift); + Ret = CastInst::Create(Instruction::Trunc, NewShift, Sh0->getType()); + } + + return Ret; +} + +// Try to replace `undef` constants in C with Replacement. +static Constant *replaceUndefsWith(Constant *C, Constant *Replacement) { + if (C && match(C, m_Undef())) + return Replacement; + + if (auto *CV = dyn_cast(C)) { + llvm::SmallVector NewOps(CV->getNumOperands()); + for (unsigned i = 0, NumElts = NewOps.size(); i != NumElts; ++i) { + Constant *EltC = CV->getOperand(i); + NewOps[i] = EltC && match(EltC, m_Undef()) ? Replacement : EltC; + } + return ConstantVector::get(NewOps); + } + + // Don't know how to deal with this constant. + return C; +} + +// If we have some pattern that leaves only some low bits set, and then performs +// left-shift of those bits, if none of the bits that are left after the final +// shift are modified by the mask, we can omit the mask. +// +// There are many variants to this pattern: +// a) (x & ((1 << MaskShAmt) - 1)) << ShiftShAmt +// b) (x & (~(-1 << MaskShAmt))) << ShiftShAmt +// c) (x & (-1 >> MaskShAmt)) << ShiftShAmt +// d) (x & ((-1 << MaskShAmt) >> MaskShAmt)) << ShiftShAmt +// e) ((x << MaskShAmt) l>> MaskShAmt) << ShiftShAmt +// f) ((x << MaskShAmt) a>> MaskShAmt) << ShiftShAmt +// All these patterns can be simplified to just: +// x << ShiftShAmt +// iff: +// a,b) (MaskShAmt+ShiftShAmt) u>= bitwidth(x) +// c,d,e,f) (ShiftShAmt-MaskShAmt) s>= 0 (i.e. ShiftShAmt u>= MaskShAmt) +static Instruction * +dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift, + const SimplifyQuery &Q, + InstCombiner::BuilderTy &Builder) { + assert(OuterShift->getOpcode() == Instruction::BinaryOps::Shl && + "The input must be 'shl'!"); + + Value *Masked, *ShiftShAmt; + match(OuterShift, m_Shift(m_Value(Masked), m_Value(ShiftShAmt))); + + Type *NarrowestTy = OuterShift->getType(); + Type *WidestTy = Masked->getType(); + // The mask must be computed in a type twice as wide to ensure + // that no bits are lost if the sum-of-shifts is wider than the base type. + Type *ExtendedTy = WidestTy->getExtendedType(); + + Value *MaskShAmt; + + // ((1 << MaskShAmt) - 1) + auto MaskA = m_Add(m_Shl(m_One(), m_Value(MaskShAmt)), m_AllOnes()); + // (~(-1 << maskNbits)) + auto MaskB = m_Xor(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_AllOnes()); + // (-1 >> MaskShAmt) + auto MaskC = m_Shr(m_AllOnes(), m_Value(MaskShAmt)); + // ((-1 << MaskShAmt) >> MaskShAmt) + auto MaskD = + m_Shr(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_Deferred(MaskShAmt)); + + Value *X; + Constant *NewMask; + + if (match(Masked, m_c_And(m_CombineOr(MaskA, MaskB), m_Value(X)))) { + // Can we simplify (MaskShAmt+ShiftShAmt) ? + auto *SumOfShAmts = dyn_cast_or_null(SimplifyAddInst( + MaskShAmt, ShiftShAmt, /*IsNSW=*/false, /*IsNUW=*/false, Q)); + if (!SumOfShAmts) + return nullptr; // Did not simplify. + // In this pattern SumOfShAmts correlates with the number of low bits + // that shall remain in the root value (OuterShift). + + // An extend of an undef value becomes zero because the high bits are never + // completely unknown. Replace the the `undef` shift amounts with final + // shift bitwidth to ensure that the value remains undef when creating the + // subsequent shift op. + SumOfShAmts = replaceUndefsWith( + SumOfShAmts, ConstantInt::get(SumOfShAmts->getType()->getScalarType(), + ExtendedTy->getScalarSizeInBits())); + auto *ExtendedSumOfShAmts = ConstantExpr::getZExt(SumOfShAmts, ExtendedTy); + // And compute the mask as usual: ~(-1 << (SumOfShAmts)) + auto *ExtendedAllOnes = ConstantExpr::getAllOnesValue(ExtendedTy); + auto *ExtendedInvertedMask = + ConstantExpr::getShl(ExtendedAllOnes, ExtendedSumOfShAmts); + NewMask = ConstantExpr::getNot(ExtendedInvertedMask); + } else if (match(Masked, m_c_And(m_CombineOr(MaskC, MaskD), m_Value(X))) || + match(Masked, m_Shr(m_Shl(m_Value(X), m_Value(MaskShAmt)), + m_Deferred(MaskShAmt)))) { + // Can we simplify (ShiftShAmt-MaskShAmt) ? + auto *ShAmtsDiff = dyn_cast_or_null(SimplifySubInst( + ShiftShAmt, MaskShAmt, /*IsNSW=*/false, /*IsNUW=*/false, Q)); + if (!ShAmtsDiff) + return nullptr; // Did not simplify. + // In this pattern ShAmtsDiff correlates with the number of high bits that + // shall be unset in the root value (OuterShift). + + // An extend of an undef value becomes zero because the high bits are never + // completely unknown. Replace the the `undef` shift amounts with negated + // bitwidth of innermost shift to ensure that the value remains undef when + // creating the subsequent shift op. + unsigned WidestTyBitWidth = WidestTy->getScalarSizeInBits(); + ShAmtsDiff = replaceUndefsWith( + ShAmtsDiff, ConstantInt::get(ShAmtsDiff->getType()->getScalarType(), + -WidestTyBitWidth)); + auto *ExtendedNumHighBitsToClear = ConstantExpr::getZExt( + ConstantExpr::getSub(ConstantInt::get(ShAmtsDiff->getType(), + WidestTyBitWidth, + /*isSigned=*/false), + ShAmtsDiff), + ExtendedTy); + // And compute the mask as usual: (-1 l>> (NumHighBitsToClear)) + auto *ExtendedAllOnes = ConstantExpr::getAllOnesValue(ExtendedTy); + NewMask = + ConstantExpr::getLShr(ExtendedAllOnes, ExtendedNumHighBitsToClear); + } else + return nullptr; // Don't know anything about this pattern. + + NewMask = ConstantExpr::getTrunc(NewMask, NarrowestTy); + + // Does this mask has any unset bits? If not then we can just not apply it. + bool NeedMask = !match(NewMask, m_AllOnes()); + + // If we need to apply a mask, there are several more restrictions we have. + if (NeedMask) { + // The old masking instruction must go away. + if (!Masked->hasOneUse()) + return nullptr; + // The original "masking" instruction must not have been`ashr`. + if (match(Masked, m_AShr(m_Value(), m_Value()))) + return nullptr; + } + + // No 'NUW'/'NSW'! We no longer know that we won't shift-out non-0 bits. + auto *NewShift = BinaryOperator::Create(OuterShift->getOpcode(), X, + OuterShift->getOperand(1)); + + if (!NeedMask) + return NewShift; + + Builder.Insert(NewShift); + return BinaryOperator::Create(Instruction::And, NewShift, NewMask); } Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); assert(Op0->getType() == Op1->getType()); + // If the shift amount is a one-use `sext`, we can demote it to `zext`. + Value *Y; + if (match(Op1, m_OneUse(m_SExt(m_Value(Y))))) { + Value *NewExt = Builder.CreateZExt(Y, I.getType(), Op1->getName()); + return BinaryOperator::Create(I.getOpcode(), Op0, NewExt); + } + // See if we can fold away this shift. if (SimplifyDemandedInstructionBits(I)) return &I; @@ -83,8 +308,8 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) return Res; - if (Instruction *NewShift = - reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ)) + if (auto *NewShift = cast_or_null( + reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ))) return NewShift; // (C1 shift (A add C2)) -> (C1 shift C2) shift A) @@ -618,9 +843,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, } Instruction *InstCombiner::visitShl(BinaryOperator &I) { + const SimplifyQuery Q = SQ.getWithInstruction(&I); + if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1), - I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), - SQ.getWithInstruction(&I))) + I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), Q)) return replaceInstUsesWith(I, V); if (Instruction *X = foldVectorBinop(I)) @@ -629,6 +855,9 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { if (Instruction *V = commonShiftTransforms(I)) return V; + if (Instruction *V = dropRedundantMaskingOfLeftShiftInput(&I, Q, Builder)) + return V; + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); Type *Ty = I.getType(); unsigned BitWidth = Ty->getScalarSizeInBits(); @@ -636,12 +865,11 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { const APInt *ShAmtAPInt; if (match(Op1, m_APInt(ShAmtAPInt))) { unsigned ShAmt = ShAmtAPInt->getZExtValue(); - unsigned BitWidth = Ty->getScalarSizeInBits(); // shl (zext X), ShAmt --> zext (shl X, ShAmt) // This is only valid if X would have zeros shifted out. Value *X; - if (match(Op0, m_ZExt(m_Value(X)))) { + if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) { unsigned SrcWidth = X->getType()->getScalarSizeInBits(); if (ShAmt < SrcWidth && MaskedValueIsZero(X, APInt::getHighBitsSet(SrcWidth, ShAmt), 0, &I)) @@ -719,6 +947,12 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { // (X * C2) << C1 --> X * (C2 << C1) if (match(Op0, m_Mul(m_Value(X), m_Constant(C2)))) return BinaryOperator::CreateMul(X, ConstantExpr::getShl(C2, C1)); + + // shl (zext i1 X), C1 --> select (X, 1 << C1, 0) + if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) { + auto *NewC = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C1); + return SelectInst::Create(X, NewC, ConstantInt::getNullValue(Ty)); + } } // (1 << (C - x)) -> ((1 << C) >> x) if C is bitwidth - 1 @@ -859,6 +1093,75 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return nullptr; } +Instruction * +InstCombiner::foldVariableSignZeroExtensionOfVariableHighBitExtract( + BinaryOperator &OldAShr) { + assert(OldAShr.getOpcode() == Instruction::AShr && + "Must be called with arithmetic right-shift instruction only."); + + // Check that constant C is a splat of the element-wise bitwidth of V. + auto BitWidthSplat = [](Constant *C, Value *V) { + return match( + C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ, + APInt(C->getType()->getScalarSizeInBits(), + V->getType()->getScalarSizeInBits()))); + }; + + // It should look like variable-length sign-extension on the outside: + // (Val << (bitwidth(Val)-Nbits)) a>> (bitwidth(Val)-Nbits) + Value *NBits; + Instruction *MaybeTrunc; + Constant *C1, *C2; + if (!match(&OldAShr, + m_AShr(m_Shl(m_Instruction(MaybeTrunc), + m_ZExtOrSelf(m_Sub(m_Constant(C1), + m_ZExtOrSelf(m_Value(NBits))))), + m_ZExtOrSelf(m_Sub(m_Constant(C2), + m_ZExtOrSelf(m_Deferred(NBits)))))) || + !BitWidthSplat(C1, &OldAShr) || !BitWidthSplat(C2, &OldAShr)) + return nullptr; + + // There may or may not be a truncation after outer two shifts. + Instruction *HighBitExtract; + match(MaybeTrunc, m_TruncOrSelf(m_Instruction(HighBitExtract))); + bool HadTrunc = MaybeTrunc != HighBitExtract; + + // And finally, the innermost part of the pattern must be a right-shift. + Value *X, *NumLowBitsToSkip; + if (!match(HighBitExtract, m_Shr(m_Value(X), m_Value(NumLowBitsToSkip)))) + return nullptr; + + // Said right-shift must extract high NBits bits - C0 must be it's bitwidth. + Constant *C0; + if (!match(NumLowBitsToSkip, + m_ZExtOrSelf( + m_Sub(m_Constant(C0), m_ZExtOrSelf(m_Specific(NBits))))) || + !BitWidthSplat(C0, HighBitExtract)) + return nullptr; + + // Since the NBits is identical for all shifts, if the outermost and + // innermost shifts are identical, then outermost shifts are redundant. + // If we had truncation, do keep it though. + if (HighBitExtract->getOpcode() == OldAShr.getOpcode()) + return replaceInstUsesWith(OldAShr, MaybeTrunc); + + // Else, if there was a truncation, then we need to ensure that one + // instruction will go away. + if (HadTrunc && !match(&OldAShr, m_c_BinOp(m_OneUse(m_Value()), m_Value()))) + return nullptr; + + // Finally, bypass two innermost shifts, and perform the outermost shift on + // the operands of the innermost shift. + Instruction *NewAShr = + BinaryOperator::Create(OldAShr.getOpcode(), X, NumLowBitsToSkip); + NewAShr->copyIRFlags(HighBitExtract); // We can preserve 'exact'-ness. + if (!HadTrunc) + return NewAShr; + + Builder.Insert(NewAShr); + return TruncInst::CreateTruncOrBitCast(NewAShr, OldAShr.getType()); +} + Instruction *InstCombiner::visitAShr(BinaryOperator &I) { if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), SQ.getWithInstruction(&I))) @@ -933,6 +1236,9 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { } } + if (Instruction *R = foldVariableSignZeroExtensionOfVariableHighBitExtract(I)) + return R; + // See if we can turn a signed shr into an unsigned shr. if (MaskedValueIsZero(Op0, APInt::getSignMask(BitWidth), 0, &I)) return BinaryOperator::CreateLShr(Op0, Op1); diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index e0d85c4b49a..d30ab800189 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -971,6 +971,13 @@ InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1, Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II, APInt DemandedElts, int DMaskIdx) { + + // FIXME: Allow v3i16/v3f16 in buffer intrinsics when the types are fully supported. + if (DMaskIdx < 0 && + II->getType()->getScalarSizeInBits() != 32 && + DemandedElts.getActiveBits() == 3) + return nullptr; + unsigned VWidth = II->getType()->getVectorNumElements(); if (VWidth == 1) return nullptr; @@ -1067,16 +1074,22 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II, } /// The specified value produces a vector with any number of elements. +/// This method analyzes which elements of the operand are undef and returns +/// that information in UndefElts. +/// /// DemandedElts contains the set of elements that are actually used by the -/// caller. This method analyzes which elements of the operand are undef and -/// returns that information in UndefElts. +/// caller, and by default (AllowMultipleUsers equals false) the value is +/// simplified only if it has a single caller. If AllowMultipleUsers is set +/// to true, DemandedElts refers to the union of sets of elements that are +/// used by all callers. /// /// If the information about demanded elements can be used to simplify the /// operation, the operation is simplified, then the resultant value is /// returned. This returns null if no change was made. Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, - unsigned Depth) { + unsigned Depth, + bool AllowMultipleUsers) { unsigned VWidth = V->getType()->getVectorNumElements(); APInt EltMask(APInt::getAllOnesValue(VWidth)); assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); @@ -1130,19 +1143,21 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (Depth == 10) return nullptr; - // If multiple users are using the root value, proceed with - // simplification conservatively assuming that all elements - // are needed. - if (!V->hasOneUse()) { - // Quit if we find multiple users of a non-root value though. - // They'll be handled when it's their turn to be visited by - // the main instcombine process. - if (Depth != 0) - // TODO: Just compute the UndefElts information recursively. - return nullptr; + if (!AllowMultipleUsers) { + // If multiple users are using the root value, proceed with + // simplification conservatively assuming that all elements + // are needed. + if (!V->hasOneUse()) { + // Quit if we find multiple users of a non-root value though. + // They'll be handled when it's their turn to be visited by + // the main instcombine process. + if (Depth != 0) + // TODO: Just compute the UndefElts information recursively. + return nullptr; - // Conservatively assume that all elements are needed. - DemandedElts = EltMask; + // Conservatively assume that all elements are needed. + DemandedElts = EltMask; + } } Instruction *I = dyn_cast(V); @@ -1674,8 +1689,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, case Intrinsic::amdgcn_buffer_load_format: case Intrinsic::amdgcn_raw_buffer_load: case Intrinsic::amdgcn_raw_buffer_load_format: + case Intrinsic::amdgcn_raw_tbuffer_load: case Intrinsic::amdgcn_struct_buffer_load: case Intrinsic::amdgcn_struct_buffer_load_format: + case Intrinsic::amdgcn_struct_tbuffer_load: + case Intrinsic::amdgcn_tbuffer_load: return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts); default: { if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID())) diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index dc9abdd7f47..9c890748e5a 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -253,6 +253,69 @@ static Instruction *foldBitcastExtElt(ExtractElementInst &Ext, return nullptr; } +/// Find elements of V demanded by UserInstr. +static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) { + unsigned VWidth = V->getType()->getVectorNumElements(); + + // Conservatively assume that all elements are needed. + APInt UsedElts(APInt::getAllOnesValue(VWidth)); + + switch (UserInstr->getOpcode()) { + case Instruction::ExtractElement: { + ExtractElementInst *EEI = cast(UserInstr); + assert(EEI->getVectorOperand() == V); + ConstantInt *EEIIndexC = dyn_cast(EEI->getIndexOperand()); + if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) { + UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue()); + } + break; + } + case Instruction::ShuffleVector: { + ShuffleVectorInst *Shuffle = cast(UserInstr); + unsigned MaskNumElts = UserInstr->getType()->getVectorNumElements(); + + UsedElts = APInt(VWidth, 0); + for (unsigned i = 0; i < MaskNumElts; i++) { + unsigned MaskVal = Shuffle->getMaskValue(i); + if (MaskVal == -1u || MaskVal >= 2 * VWidth) + continue; + if (Shuffle->getOperand(0) == V && (MaskVal < VWidth)) + UsedElts.setBit(MaskVal); + if (Shuffle->getOperand(1) == V && + ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth))) + UsedElts.setBit(MaskVal - VWidth); + } + break; + } + default: + break; + } + return UsedElts; +} + +/// Find union of elements of V demanded by all its users. +/// If it is known by querying findDemandedEltsBySingleUser that +/// no user demands an element of V, then the corresponding bit +/// remains unset in the returned value. +static APInt findDemandedEltsByAllUsers(Value *V) { + unsigned VWidth = V->getType()->getVectorNumElements(); + + APInt UnionUsedElts(VWidth, 0); + for (const Use &U : V->uses()) { + if (Instruction *I = dyn_cast(U.getUser())) { + UnionUsedElts |= findDemandedEltsBySingleUser(V, I); + } else { + UnionUsedElts = APInt::getAllOnesValue(VWidth); + break; + } + + if (UnionUsedElts.isAllOnesValue()) + break; + } + + return UnionUsedElts; +} + Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { Value *SrcVec = EI.getVectorOperand(); Value *Index = EI.getIndexOperand(); @@ -271,19 +334,35 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { return nullptr; // This instruction only demands the single element from the input vector. - // If the input vector has a single use, simplify it based on this use - // property. - if (SrcVec->hasOneUse() && NumElts != 1) { - APInt UndefElts(NumElts, 0); - APInt DemandedElts(NumElts, 0); - DemandedElts.setBit(IndexC->getZExtValue()); - if (Value *V = SimplifyDemandedVectorElts(SrcVec, DemandedElts, - UndefElts)) { - EI.setOperand(0, V); - return &EI; + if (NumElts != 1) { + // If the input vector has a single use, simplify it based on this use + // property. + if (SrcVec->hasOneUse()) { + APInt UndefElts(NumElts, 0); + APInt DemandedElts(NumElts, 0); + DemandedElts.setBit(IndexC->getZExtValue()); + if (Value *V = + SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts)) { + EI.setOperand(0, V); + return &EI; + } + } else { + // If the input vector has multiple uses, simplify it based on a union + // of all elements used. + APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec); + if (!DemandedElts.isAllOnesValue()) { + APInt UndefElts(NumElts, 0); + if (Value *V = SimplifyDemandedVectorElts( + SrcVec, DemandedElts, UndefElts, 0 /* Depth */, + true /* AllowMultipleUsers */)) { + if (V != SrcVec) { + SrcVec->replaceAllUsesWith(V); + return &EI; + } + } + } } } - if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian())) return I; @@ -766,6 +845,55 @@ static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) { return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask); } +/// Try to fold an extract+insert element into an existing identity shuffle by +/// changing the shuffle's mask to include the index of this insert element. +static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) { + // Check if the vector operand of this insert is an identity shuffle. + auto *Shuf = dyn_cast(InsElt.getOperand(0)); + if (!Shuf || !isa(Shuf->getOperand(1)) || + !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding())) + return nullptr; + + // Check for a constant insertion index. + uint64_t IdxC; + if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC))) + return nullptr; + + // Check if this insert's scalar op is extracted from the identity shuffle's + // input vector. + Value *Scalar = InsElt.getOperand(1); + Value *X = Shuf->getOperand(0); + if (!match(Scalar, m_ExtractElement(m_Specific(X), m_SpecificInt(IdxC)))) + return nullptr; + + // Replace the shuffle mask element at the index of this extract+insert with + // that same index value. + // For example: + // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask' + unsigned NumMaskElts = Shuf->getType()->getVectorNumElements(); + SmallVector NewMaskVec(NumMaskElts); + Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext()); + Constant *NewMaskEltC = ConstantInt::get(I32Ty, IdxC); + Constant *OldMask = Shuf->getMask(); + for (unsigned i = 0; i != NumMaskElts; ++i) { + if (i != IdxC) { + // All mask elements besides the inserted element remain the same. + NewMaskVec[i] = OldMask->getAggregateElement(i); + } else if (OldMask->getAggregateElement(i) == NewMaskEltC) { + // If the mask element was already set, there's nothing to do + // (demanded elements analysis may unset it later). + return nullptr; + } else { + assert(isa(OldMask->getAggregateElement(i)) && + "Unexpected shuffle mask element for identity shuffle"); + NewMaskVec[i] = NewMaskEltC; + } + } + + Constant *NewMask = ConstantVector::get(NewMaskVec); + return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask); +} + /// If we have an insertelement instruction feeding into another insertelement /// and the 2nd is inserting a constant into the vector, canonicalize that /// constant insertion before the insertion of a variable: @@ -987,6 +1115,9 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { if (Instruction *Splat = foldInsEltIntoSplat(IE)) return Splat; + if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE)) + return IdentityShuf; + return nullptr; } @@ -1009,17 +1140,23 @@ static bool canEvaluateShuffled(Value *V, ArrayRef Mask, if (Depth == 0) return false; switch (I->getOpcode()) { + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + // Propagating an undefined shuffle mask element to integer div/rem is not + // allowed because those opcodes can create immediate undefined behavior + // from an undefined element in an operand. + if (llvm::any_of(Mask, [](int M){ return M == -1; })) + return false; + LLVM_FALLTHROUGH; case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: @@ -1040,9 +1177,7 @@ static bool canEvaluateShuffled(Value *V, ArrayRef Mask, case Instruction::FPExt: case Instruction::GetElementPtr: { // Bail out if we would create longer vector ops. We could allow creating - // longer vector ops, but that may result in more expensive codegen. We - // would also need to limit the transform to avoid undefined behavior for - // integer div/rem. + // longer vector ops, but that may result in more expensive codegen. Type *ITy = I->getType(); if (ITy->isVectorTy() && Mask.size() > ITy->getVectorNumElements()) return false; diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 385f4926b84..ecb486c544e 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -200,8 +200,8 @@ bool InstCombiner::shouldChangeType(Type *From, Type *To) const { // where both B and C should be ConstantInts, results in a constant that does // not overflow. This function only handles the Add and Sub opcodes. For // all other opcodes, the function conservatively returns false. -static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { - OverflowingBinaryOperator *OBO = dyn_cast(&I); +static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { + auto *OBO = dyn_cast(&I); if (!OBO || !OBO->hasNoSignedWrap()) return false; @@ -224,10 +224,15 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { } static bool hasNoUnsignedWrap(BinaryOperator &I) { - OverflowingBinaryOperator *OBO = dyn_cast(&I); + auto *OBO = dyn_cast(&I); return OBO && OBO->hasNoUnsignedWrap(); } +static bool hasNoSignedWrap(BinaryOperator &I) { + auto *OBO = dyn_cast(&I); + return OBO && OBO->hasNoSignedWrap(); +} + /// Conservatively clears subclassOptionalData after a reassociation or /// commutation. We preserve fast-math flags when applicable as they can be /// preserved. @@ -332,22 +337,21 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { // It simplifies to V. Form "A op V". I.setOperand(0, A); I.setOperand(1, V); - // Conservatively clear the optional flags, since they may not be - // preserved by the reassociation. bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0); - bool IsNSW = MaintainNoSignedWrap(I, B, C); + bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0); + // Conservatively clear all optional flags since they may not be + // preserved by the reassociation. Reset nsw/nuw based on the above + // analysis. ClearSubclassDataAfterReassociation(I); + // Note: this is only valid because SimplifyBinOp doesn't look at + // the operands to Op0. if (IsNUW) I.setHasNoUnsignedWrap(true); - if (IsNSW && - (!Op0 || (isa(Op0) && Op0->hasNoSignedWrap()))) { - // Note: this is only valid because SimplifyBinOp doesn't look at - // the operands to Op0. + if (IsNSW) I.setHasNoSignedWrap(true); - } Changed = true; ++NumReassoc; @@ -610,7 +614,6 @@ Value *InstCombiner::tryFactorization(BinaryOperator &I, HasNUW &= ROBO->hasNoUnsignedWrap(); } - const APInt *CInt; if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) { // We can propagate 'nsw' if we know that @@ -620,6 +623,7 @@ Value *InstCombiner::tryFactorization(BinaryOperator &I, // %Z = mul nsw i16 %X, C+1 // // iff C+1 isn't INT_MIN + const APInt *CInt; if (match(V, m_APInt(CInt))) { if (!CInt->isMinSignedValue()) BO->setHasNoSignedWrap(HasNSW); @@ -763,12 +767,16 @@ Value *InstCombiner::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, if (match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C))) && match(RHS, m_Select(m_Specific(A), m_Value(D), m_Value(E)))) { bool SelectsHaveOneUse = LHS->hasOneUse() && RHS->hasOneUse(); - BuilderTy::FastMathFlagGuard Guard(Builder); - if (isa(&I)) - Builder.setFastMathFlags(I.getFastMathFlags()); - Value *V1 = SimplifyBinOp(Opcode, C, E, SQ.getWithInstruction(&I)); - Value *V2 = SimplifyBinOp(Opcode, B, D, SQ.getWithInstruction(&I)); + FastMathFlags FMF; + BuilderTy::FastMathFlagGuard Guard(Builder); + if (isa(&I)) { + FMF = I.getFastMathFlags(); + Builder.setFastMathFlags(FMF); + } + + Value *V1 = SimplifyBinOp(Opcode, C, E, FMF, SQ.getWithInstruction(&I)); + Value *V2 = SimplifyBinOp(Opcode, B, D, FMF, SQ.getWithInstruction(&I)); if (V1 && V2) SI = Builder.CreateSelect(A, V2, V1); else if (V2 && SelectsHaveOneUse) @@ -1659,7 +1667,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // to an index of zero, so replace it with zero if it is not zero already. Type *EltTy = GTI.getIndexedType(); if (EltTy->isSized() && DL.getTypeAllocSize(EltTy) == 0) - if (!isa(*I) || !cast(*I)->isNullValue()) { + if (!isa(*I) || !match(I->get(), m_Zero())) { *I = Constant::getNullValue(NewIndexType); MadeChange = true; } @@ -2549,9 +2557,7 @@ Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) { Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { // Change br (not X), label True, label False to: br X, label False, True Value *X = nullptr; - BasicBlock *TrueDest; - BasicBlock *FalseDest; - if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) && + if (match(&BI, m_Br(m_Not(m_Value(X)), m_BasicBlock(), m_BasicBlock())) && !isa(X)) { // Swap Destinations and condition... BI.setCondition(X); @@ -2569,8 +2575,8 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { // Canonicalize, for example, icmp_ne -> icmp_eq or fcmp_one -> fcmp_oeq. CmpInst::Predicate Pred; - if (match(&BI, m_Br(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), TrueDest, - FalseDest)) && + if (match(&BI, m_Br(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), + m_BasicBlock(), m_BasicBlock())) && !isCanonicalPredicate(Pred)) { // Swap destinations and condition. CmpInst *Cond = cast(BI.getCondition()); @@ -3156,6 +3162,21 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { findDbgUsers(DbgUsers, I); for (auto *DII : reverse(DbgUsers)) { if (DII->getParent() == SrcBlock) { + if (isa(DII)) { + // A dbg.declare instruction should not be cloned, since there can only be + // one per variable fragment. It should be left in the original place since + // sunk instruction is not an alloca(otherwise we could not be here). + // But we need to update arguments of dbg.declare instruction, so that it + // would not point into sunk instruction. + if (!isa(I)) + continue; // dbg.declare points at something it shouldn't + + DII->setOperand( + 0, MetadataAsValue::get(I->getContext(), + ValueAsMetadata::get(I->getOperand(0)))); + continue; + } + // dbg.value is in the same basic block as the sunk inst, see if we can // salvage it. Clone a new copy of the instruction: on success we need // both salvaged and unsalvaged copies. @@ -3580,7 +3601,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { // Required analyses. auto AA = &getAnalysis().getAAResults(); auto &AC = getAnalysis().getAssumptionCache(F); - auto &TLI = getAnalysis().getTLI(); + auto &TLI = getAnalysis().getTLI(F); auto &DT = getAnalysis().getDomTree(); auto &ORE = getAnalysis().getORE(); diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 6821e214e92..d92ee11c2e1 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -129,6 +129,8 @@ static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E; static const char *const kAsanModuleCtorName = "asan.module_ctor"; static const char *const kAsanModuleDtorName = "asan.module_dtor"; static const uint64_t kAsanCtorAndDtorPriority = 1; +// On Emscripten, the system needs more than one priorities for constructors. +static const uint64_t kAsanEmscriptenCtorAndDtorPriority = 50; static const char *const kAsanReportErrorTemplate = "__asan_report_"; static const char *const kAsanRegisterGlobalsName = "__asan_register_globals"; static const char *const kAsanUnregisterGlobalsName = @@ -191,6 +193,11 @@ static cl::opt ClRecover( cl::desc("Enable recovery mode (continue-after-error)."), cl::Hidden, cl::init(false)); +static cl::opt ClInsertVersionCheck( + "asan-guard-against-version-mismatch", + cl::desc("Guard against compiler/runtime version mismatch."), + cl::Hidden, cl::init(true)); + // This flag may need to be replaced with -f[no-]asan-reads. static cl::opt ClInstrumentReads("asan-instrument-reads", cl::desc("instrument read instructions"), @@ -530,6 +537,14 @@ static size_t RedzoneSizeForScale(int MappingScale) { return std::max(32U, 1U << MappingScale); } +static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) { + if (TargetTriple.isOSEmscripten()) { + return kAsanEmscriptenCtorAndDtorPriority; + } else { + return kAsanCtorAndDtorPriority; + } +} + namespace { /// Module analysis for getting various metadata about the module. @@ -565,10 +580,10 @@ char ASanGlobalsMetadataWrapperPass::ID = 0; /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer { - AddressSanitizer(Module &M, GlobalsMetadata &GlobalsMD, + AddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD, bool CompileKernel = false, bool Recover = false, bool UseAfterScope = false) - : UseAfterScope(UseAfterScope || ClUseAfterScope), GlobalsMD(GlobalsMD) { + : UseAfterScope(UseAfterScope || ClUseAfterScope), GlobalsMD(*GlobalsMD) { this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover; this->CompileKernel = ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel; @@ -677,7 +692,7 @@ private: FunctionCallee AsanMemmove, AsanMemcpy, AsanMemset; InlineAsm *EmptyAsm; Value *LocalDynamicShadow = nullptr; - GlobalsMetadata GlobalsMD; + const GlobalsMetadata &GlobalsMD; DenseMap ProcessedAllocas; }; @@ -706,8 +721,8 @@ public: GlobalsMetadata &GlobalsMD = getAnalysis().getGlobalsMD(); const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); - AddressSanitizer ASan(*F.getParent(), GlobalsMD, CompileKernel, Recover, + &getAnalysis().getTLI(F); + AddressSanitizer ASan(*F.getParent(), &GlobalsMD, CompileKernel, Recover, UseAfterScope); return ASan.instrumentFunction(F, TLI); } @@ -720,10 +735,10 @@ private: class ModuleAddressSanitizer { public: - ModuleAddressSanitizer(Module &M, GlobalsMetadata &GlobalsMD, + ModuleAddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD, bool CompileKernel = false, bool Recover = false, bool UseGlobalsGC = true, bool UseOdrIndicator = false) - : GlobalsMD(GlobalsMD), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC), + : GlobalsMD(*GlobalsMD), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC), // Enable aliases as they should have no downside with ODR indicators. UsePrivateAlias(UseOdrIndicator || ClUsePrivateAlias), UseOdrIndicator(UseOdrIndicator || ClUseOdrIndicator), @@ -783,7 +798,7 @@ private: } int GetAsanVersion(const Module &M) const; - GlobalsMetadata GlobalsMD; + const GlobalsMetadata &GlobalsMD; bool CompileKernel; bool Recover; bool UseGlobalsGC; @@ -830,7 +845,7 @@ public: bool runOnModule(Module &M) override { GlobalsMetadata &GlobalsMD = getAnalysis().getGlobalsMD(); - ModuleAddressSanitizer ASanModule(M, GlobalsMD, CompileKernel, Recover, + ModuleAddressSanitizer ASanModule(M, &GlobalsMD, CompileKernel, Recover, UseGlobalGC, UseOdrIndicator); return ASanModule.instrumentModule(M); } @@ -1033,7 +1048,7 @@ struct FunctionStackPoisoner : public InstVisitor { if (!II.isLifetimeStartOrEnd()) return; // Found lifetime intrinsic, add ASan instrumentation if necessary. - ConstantInt *Size = dyn_cast(II.getArgOperand(0)); + auto *Size = cast(II.getArgOperand(0)); // If size argument is undefined, don't do anything. if (Size->isMinusOne()) return; // Check that size doesn't saturate uint64_t and can @@ -1156,7 +1171,7 @@ PreservedAnalyses AddressSanitizerPass::run(Function &F, Module &M = *F.getParent(); if (auto *R = MAM.getCachedResult(M)) { const TargetLibraryInfo *TLI = &AM.getResult(F); - AddressSanitizer Sanitizer(M, *R, CompileKernel, Recover, UseAfterScope); + AddressSanitizer Sanitizer(M, R, CompileKernel, Recover, UseAfterScope); if (Sanitizer.instrumentFunction(F, TLI)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); @@ -1178,7 +1193,7 @@ ModuleAddressSanitizerPass::ModuleAddressSanitizerPass(bool CompileKernel, PreservedAnalyses ModuleAddressSanitizerPass::run(Module &M, AnalysisManager &AM) { GlobalsMetadata &GlobalsMD = AM.getResult(M); - ModuleAddressSanitizer Sanitizer(M, GlobalsMD, CompileKernel, Recover, + ModuleAddressSanitizer Sanitizer(M, &GlobalsMD, CompileKernel, Recover, UseGlobalGC, UseOdrIndicator); if (Sanitizer.instrumentModule(M)) return PreservedAnalyses::none(); @@ -1331,7 +1346,7 @@ Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I, unsigned *Alignment, Value **MaybeMask) { // Skip memory accesses inserted by another instrumentation. - if (I->getMetadata("nosanitize")) return nullptr; + if (I->hasMetadata("nosanitize")) return nullptr; // Do not instrument the load fetching the dynamic shadow address. if (LocalDynamicShadow == I) @@ -1775,9 +1790,10 @@ void ModuleAddressSanitizer::createInitializerPoisonCalls( // Must have a function or null ptr. if (Function *F = dyn_cast(CS->getOperand(1))) { if (F->getName() == kAsanModuleCtorName) continue; - ConstantInt *Priority = dyn_cast(CS->getOperand(0)); + auto *Priority = cast(CS->getOperand(0)); // Don't instrument CTORs that will run before asan.module_ctor. - if (Priority->getLimitedValue() <= kAsanCtorAndDtorPriority) continue; + if (Priority->getLimitedValue() <= GetCtorAndDtorPriority(TargetTriple)) + continue; poisonOneInitializer(*F, ModuleName); } } @@ -1919,7 +1935,12 @@ StringRef ModuleAddressSanitizer::getGlobalMetadataSection() const { case Triple::COFF: return ".ASAN$GL"; case Triple::ELF: return "asan_globals"; case Triple::MachO: return "__DATA,__asan_globals,regular"; - default: break; + case Triple::Wasm: + case Triple::XCOFF: + report_fatal_error( + "ModuleAddressSanitizer not implemented for object file format."); + case Triple::UnknownObjectFormat: + break; } llvm_unreachable("unsupported object format"); } @@ -2033,7 +2054,7 @@ void ModuleAddressSanitizer::InstrumentGlobalsCOFF( unsigned SizeOfGlobalStruct = DL.getTypeAllocSize(Initializer->getType()); assert(isPowerOf2_32(SizeOfGlobalStruct) && "global metadata will not be padded appropriately"); - Metadata->setAlignment(SizeOfGlobalStruct); + Metadata->setAlignment(assumeAligned(SizeOfGlobalStruct)); SetComdatForGlobalMetadata(G, Metadata, ""); } @@ -2170,7 +2191,7 @@ void ModuleAddressSanitizer::InstrumentGlobalsWithMetadataArray( M, ArrayOfGlobalStructTy, false, GlobalVariable::InternalLinkage, ConstantArray::get(ArrayOfGlobalStructTy, MetadataInitializers), ""); if (Mapping.Scale > 3) - AllGlobals->setAlignment(1ULL << Mapping.Scale); + AllGlobals->setAlignment(Align(1ULL << Mapping.Scale)); IRB.CreateCall(AsanRegisterGlobals, {IRB.CreatePointerCast(AllGlobals, IntptrTy), @@ -2270,7 +2291,7 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M, "", G, G->getThreadLocalMode()); NewGlobal->copyAttributesFrom(G); NewGlobal->setComdat(G->getComdat()); - NewGlobal->setAlignment(MinRZ); + NewGlobal->setAlignment(MaybeAlign(MinRZ)); // Don't fold globals with redzones. ODR violation detector and redzone // poisoning implicitly creates a dependence on the global's address, so it // is no longer valid for it to be marked unnamed_addr. @@ -2338,7 +2359,7 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M, // Set meaningful attributes for indicator symbol. ODRIndicatorSym->setVisibility(NewGlobal->getVisibility()); ODRIndicatorSym->setDLLStorageClass(NewGlobal->getDLLStorageClass()); - ODRIndicatorSym->setAlignment(1); + ODRIndicatorSym->setAlignment(Align::None()); ODRIndicator = ODRIndicatorSym; } @@ -2410,39 +2431,39 @@ bool ModuleAddressSanitizer::instrumentModule(Module &M) { // Create a module constructor. A destructor is created lazily because not all // platforms, and not all modules need it. + std::string AsanVersion = std::to_string(GetAsanVersion(M)); std::string VersionCheckName = - kAsanVersionCheckNamePrefix + std::to_string(GetAsanVersion(M)); + ClInsertVersionCheck ? (kAsanVersionCheckNamePrefix + AsanVersion) : ""; std::tie(AsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions( M, kAsanModuleCtorName, kAsanInitName, /*InitArgTypes=*/{}, /*InitArgs=*/{}, VersionCheckName); bool CtorComdat = true; - bool Changed = false; // TODO(glider): temporarily disabled globals instrumentation for KASan. if (ClGlobals) { IRBuilder<> IRB(AsanCtorFunction->getEntryBlock().getTerminator()); - Changed |= InstrumentGlobals(IRB, M, &CtorComdat); + InstrumentGlobals(IRB, M, &CtorComdat); } + const uint64_t Priority = GetCtorAndDtorPriority(TargetTriple); + // Put the constructor and destructor in comdat if both // (1) global instrumentation is not TU-specific // (2) target is ELF. if (UseCtorComdat && TargetTriple.isOSBinFormatELF() && CtorComdat) { AsanCtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleCtorName)); - appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority, - AsanCtorFunction); + appendToGlobalCtors(M, AsanCtorFunction, Priority, AsanCtorFunction); if (AsanDtorFunction) { AsanDtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleDtorName)); - appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority, - AsanDtorFunction); + appendToGlobalDtors(M, AsanDtorFunction, Priority, AsanDtorFunction); } } else { - appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority); + appendToGlobalCtors(M, AsanCtorFunction, Priority); if (AsanDtorFunction) - appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority); + appendToGlobalDtors(M, AsanDtorFunction, Priority); } - return Changed; + return true; } void AddressSanitizer::initializeCallbacks(Module &M) { @@ -2664,7 +2685,7 @@ bool AddressSanitizer::instrumentFunction(Function &F, if (CS) { // A call inside BB. TempsToInstrument.clear(); - if (CS.doesNotReturn() && !CS->getMetadata("nosanitize")) + if (CS.doesNotReturn() && !CS->hasMetadata("nosanitize")) NoReturnCalls.push_back(CS.getInstruction()); } if (CallInst *CI = dyn_cast(&Inst)) @@ -2877,18 +2898,19 @@ void FunctionStackPoisoner::copyArgsPassedByValToAllocas() { for (Argument &Arg : F.args()) { if (Arg.hasByValAttr()) { Type *Ty = Arg.getType()->getPointerElementType(); - unsigned Align = Arg.getParamAlignment(); - if (Align == 0) Align = DL.getABITypeAlignment(Ty); + unsigned Alignment = Arg.getParamAlignment(); + if (Alignment == 0) + Alignment = DL.getABITypeAlignment(Ty); AllocaInst *AI = IRB.CreateAlloca( Ty, nullptr, (Arg.hasName() ? Arg.getName() : "Arg" + Twine(Arg.getArgNo())) + ".byval"); - AI->setAlignment(Align); + AI->setAlignment(Align(Alignment)); Arg.replaceAllUsesWith(AI); uint64_t AllocSize = DL.getTypeAllocSize(Ty); - IRB.CreateMemCpy(AI, Align, &Arg, Align, AllocSize); + IRB.CreateMemCpy(AI, Alignment, &Arg, Alignment, AllocSize); } } } @@ -2919,7 +2941,7 @@ Value *FunctionStackPoisoner::createAllocaForLayout( } assert((ClRealignStack & (ClRealignStack - 1)) == 0); size_t FrameAlignment = std::max(L.FrameAlignment, (size_t)ClRealignStack); - Alloca->setAlignment(FrameAlignment); + Alloca->setAlignment(MaybeAlign(FrameAlignment)); return IRB.CreatePointerCast(Alloca, IntptrTy); } @@ -2928,7 +2950,7 @@ void FunctionStackPoisoner::createDynamicAllocasInitStorage() { IRBuilder<> IRB(dyn_cast(FirstBB.begin())); DynamicAllocaLayout = IRB.CreateAlloca(IntptrTy, nullptr); IRB.CreateStore(Constant::getNullValue(IntptrTy), DynamicAllocaLayout); - DynamicAllocaLayout->setAlignment(32); + DynamicAllocaLayout->setAlignment(Align(32)); } void FunctionStackPoisoner::processDynamicAllocas() { @@ -3275,7 +3297,7 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) { // Insert new alloca with new NewSize and Align params. AllocaInst *NewAlloca = IRB.CreateAlloca(IRB.getInt8Ty(), NewSize); - NewAlloca->setAlignment(Align); + NewAlloca->setAlignment(MaybeAlign(Align)); // NewAddress = Address + Align Value *NewAddress = IRB.CreateAdd(IRB.CreatePtrToInt(NewAlloca, IntptrTy), diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index 4dc9b611c15..ae34be98653 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -224,7 +224,7 @@ struct BoundsCheckingLegacyPass : public FunctionPass { } bool runOnFunction(Function &F) override { - auto &TLI = getAnalysis().getTLI(); + auto &TLI = getAnalysis().getTLI(F); auto &SE = getAnalysis().getSE(); return addBoundsChecking(F, TLI, SE); } diff --git a/lib/Transforms/Instrumentation/CFGMST.h b/lib/Transforms/Instrumentation/CFGMST.h index 971e0004176..8bb6f47c484 100644 --- a/lib/Transforms/Instrumentation/CFGMST.h +++ b/lib/Transforms/Instrumentation/CFGMST.h @@ -257,13 +257,13 @@ public: std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Src, nullptr)); if (Inserted) { // Newly inserted, update the real info. - Iter->second = std::move(llvm::make_unique(Index)); + Iter->second = std::move(std::make_unique(Index)); Index++; } std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Dest, nullptr)); if (Inserted) // Newly inserted, update the real info. - Iter->second = std::move(llvm::make_unique(Index)); + Iter->second = std::move(std::make_unique(Index)); AllEdges.emplace_back(new Edge(Src, Dest, W)); return *AllEdges.back(); } diff --git a/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index 3f4f9bc7145..55c64fa4b72 100644 --- a/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -512,30 +512,38 @@ static bool isHoistable(Instruction *I, DominatorTree &DT) { // first-region entry block) or the (hoistable or unhoistable) base values that // are defined outside (including the first-region entry block) of the // scope. The returned set doesn't include constants. -static std::set getBaseValues(Value *V, - DominatorTree &DT) { +static std::set getBaseValues( + Value *V, DominatorTree &DT, + DenseMap> &Visited) { + if (Visited.count(V)) { + return Visited[V]; + } std::set Result; if (auto *I = dyn_cast(V)) { // We don't stop at a block that's not in the Scope because we would miss some // instructions that are based on the same base values if we stop there. if (!isHoistable(I, DT)) { Result.insert(I); + Visited.insert(std::make_pair(V, Result)); return Result; } // I is hoistable above the Scope. for (Value *Op : I->operands()) { - std::set OpResult = getBaseValues(Op, DT); + std::set OpResult = getBaseValues(Op, DT, Visited); Result.insert(OpResult.begin(), OpResult.end()); } + Visited.insert(std::make_pair(V, Result)); return Result; } if (isa(V)) { Result.insert(V); + Visited.insert(std::make_pair(V, Result)); return Result; } // We don't include others like constants because those won't lead to any // chance of folding of conditions (eg two bit checks merged into one check) // after CHR. + Visited.insert(std::make_pair(V, Result)); return Result; // empty } @@ -1078,12 +1086,13 @@ static bool shouldSplit(Instruction *InsertPoint, if (!PrevConditionValues.empty() && !ConditionValues.empty()) { // Use std::set as DenseSet doesn't work with set_intersection. std::set PrevBases, Bases; + DenseMap> Visited; for (Value *V : PrevConditionValues) { - std::set BaseValues = getBaseValues(V, DT); + std::set BaseValues = getBaseValues(V, DT, Visited); PrevBases.insert(BaseValues.begin(), BaseValues.end()); } for (Value *V : ConditionValues) { - std::set BaseValues = getBaseValues(V, DT); + std::set BaseValues = getBaseValues(V, DT, Visited); Bases.insert(BaseValues.begin(), BaseValues.end()); } CHR_DEBUG( @@ -1538,10 +1547,7 @@ static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, } if (auto *SI = dyn_cast(U)) { // Swap operands - Value *TrueValue = SI->getTrueValue(); - Value *FalseValue = SI->getFalseValue(); - SI->setTrueValue(FalseValue); - SI->setFalseValue(TrueValue); + SI->swapValues(); SI->swapProfMetadata(); if (Scope->TrueBiasedSelects.count(SI)) { assert(Scope->FalseBiasedSelects.count(SI) == 0 && @@ -2073,7 +2079,7 @@ bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { getAnalysis().getPSI(); RegionInfo &RI = getAnalysis().getRegionInfo(); std::unique_ptr OwnedORE = - llvm::make_unique(&F); + std::make_unique(&F); return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run(); } diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 2279c1bcb6a..c0353cba0b2 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1212,7 +1212,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, return DFS.ZeroShadow; case 1: { LoadInst *LI = new LoadInst(DFS.ShadowTy, ShadowAddr, "", Pos); - LI->setAlignment(ShadowAlign); + LI->setAlignment(MaybeAlign(ShadowAlign)); return LI; } case 2: { diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 59950ffc4e9..ac6082441ea 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -86,7 +86,9 @@ public: ReversedVersion[3] = Options.Version[0]; ReversedVersion[4] = '\0'; } - bool runOnModule(Module &M, const TargetLibraryInfo &TLI); + bool + runOnModule(Module &M, + std::function GetTLI); private: // Create the .gcno files for the Module based on DebugInfo. @@ -102,9 +104,9 @@ private: std::vector &Regexes); // Get pointers to the functions in the runtime library. - FunctionCallee getStartFileFunc(); - FunctionCallee getEmitFunctionFunc(); - FunctionCallee getEmitArcsFunc(); + FunctionCallee getStartFileFunc(const TargetLibraryInfo *TLI); + FunctionCallee getEmitFunctionFunc(const TargetLibraryInfo *TLI); + FunctionCallee getEmitArcsFunc(const TargetLibraryInfo *TLI); FunctionCallee getSummaryInfoFunc(); FunctionCallee getEndFileFunc(); @@ -127,7 +129,7 @@ private: SmallVector FileChecksums; Module *M; - const TargetLibraryInfo *TLI; + std::function GetTLI; LLVMContext *Ctx; SmallVector, 16> Funcs; std::vector FilterRe; @@ -147,8 +149,9 @@ public: StringRef getPassName() const override { return "GCOV Profiler"; } bool runOnModule(Module &M) override { - auto &TLI = getAnalysis().getTLI(); - return Profiler.runOnModule(M, TLI); + return Profiler.runOnModule(M, [this](Function &F) -> TargetLibraryInfo & { + return getAnalysis().getTLI(F); + }); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -555,9 +558,10 @@ std::string GCOVProfiler::mangleName(const DICompileUnit *CU, return CurPath.str(); } -bool GCOVProfiler::runOnModule(Module &M, const TargetLibraryInfo &TLI) { +bool GCOVProfiler::runOnModule( + Module &M, std::function GetTLI) { this->M = &M; - this->TLI = &TLI; + this->GetTLI = std::move(GetTLI); Ctx = &M.getContext(); AddFlushBeforeForkAndExec(); @@ -574,9 +578,12 @@ PreservedAnalyses GCOVProfilerPass::run(Module &M, ModuleAnalysisManager &AM) { GCOVProfiler Profiler(GCOVOpts); + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); - auto &TLI = AM.getResult(M); - if (!Profiler.runOnModule(M, TLI)) + if (!Profiler.runOnModule(M, [&](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + })) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -624,6 +631,7 @@ static bool shouldKeepInEntry(BasicBlock::iterator It) { void GCOVProfiler::AddFlushBeforeForkAndExec() { SmallVector ForkAndExecs; for (auto &F : M->functions()) { + auto *TLI = &GetTLI(F); for (auto &I : instructions(F)) { if (CallInst *CI = dyn_cast(&I)) { if (Function *Callee = CI->getCalledFunction()) { @@ -669,7 +677,8 @@ void GCOVProfiler::emitProfileNotes() { continue; std::error_code EC; - raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC, sys::fs::F_None); + raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC, + sys::fs::OF_None); if (EC) { Ctx->emitError(Twine("failed to open coverage notes file for writing: ") + EC.message()); @@ -695,7 +704,7 @@ void GCOVProfiler::emitProfileNotes() { ++It; EntryBlock.splitBasicBlock(It); - Funcs.push_back(make_unique(SP, &F, &out, FunctionIdent++, + Funcs.push_back(std::make_unique(SP, &F, &out, FunctionIdent++, Options.UseCfgChecksum, Options.ExitBlockBeforeBody)); GCOVFunction &Func = *Funcs.back(); @@ -873,7 +882,7 @@ bool GCOVProfiler::emitProfileArcs() { return Result; } -FunctionCallee GCOVProfiler::getStartFileFunc() { +FunctionCallee GCOVProfiler::getStartFileFunc(const TargetLibraryInfo *TLI) { Type *Args[] = { Type::getInt8PtrTy(*Ctx), // const char *orig_filename Type::getInt8PtrTy(*Ctx), // const char version[4] @@ -887,7 +896,7 @@ FunctionCallee GCOVProfiler::getStartFileFunc() { return Res; } -FunctionCallee GCOVProfiler::getEmitFunctionFunc() { +FunctionCallee GCOVProfiler::getEmitFunctionFunc(const TargetLibraryInfo *TLI) { Type *Args[] = { Type::getInt32Ty(*Ctx), // uint32_t ident Type::getInt8PtrTy(*Ctx), // const char *function_name @@ -906,7 +915,7 @@ FunctionCallee GCOVProfiler::getEmitFunctionFunc() { return M->getOrInsertFunction("llvm_gcda_emit_function", FTy); } -FunctionCallee GCOVProfiler::getEmitArcsFunc() { +FunctionCallee GCOVProfiler::getEmitArcsFunc(const TargetLibraryInfo *TLI) { Type *Args[] = { Type::getInt32Ty(*Ctx), // uint32_t num_counters Type::getInt64PtrTy(*Ctx), // uint64_t *counters @@ -943,9 +952,11 @@ Function *GCOVProfiler::insertCounterWriteout( BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF); IRBuilder<> Builder(BB); - FunctionCallee StartFile = getStartFileFunc(); - FunctionCallee EmitFunction = getEmitFunctionFunc(); - FunctionCallee EmitArcs = getEmitArcsFunc(); + auto *TLI = &GetTLI(*WriteoutF); + + FunctionCallee StartFile = getStartFileFunc(TLI); + FunctionCallee EmitFunction = getEmitFunctionFunc(TLI); + FunctionCallee EmitArcs = getEmitArcsFunc(TLI); FunctionCallee SummaryInfo = getSummaryInfoFunc(); FunctionCallee EndFile = getEndFileFunc(); diff --git a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 90a9f4955a4..f87132ee475 100644 --- a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -12,10 +12,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -52,7 +54,10 @@ using namespace llvm; #define DEBUG_TYPE "hwasan" static const char *const kHwasanModuleCtorName = "hwasan.module_ctor"; +static const char *const kHwasanNoteName = "hwasan.note"; static const char *const kHwasanInitName = "__hwasan_init"; +static const char *const kHwasanPersonalityThunkName = + "__hwasan_personality_thunk"; static const char *const kHwasanShadowMemoryDynamicAddress = "__hwasan_shadow_memory_dynamic_address"; @@ -112,6 +117,9 @@ static cl::opt ClGenerateTagsWithCalls( cl::desc("generate new tags with runtime library calls"), cl::Hidden, cl::init(false)); +static cl::opt ClGlobals("hwasan-globals", cl::desc("Instrument globals"), + cl::Hidden, cl::init(false)); + static cl::opt ClMatchAllTag( "hwasan-match-all-tag", cl::desc("don't report bad accesses via pointers with this tag"), @@ -155,8 +163,18 @@ static cl::opt static cl::opt ClInstrumentLandingPads("hwasan-instrument-landing-pads", - cl::desc("instrument landing pads"), cl::Hidden, - cl::init(true)); + cl::desc("instrument landing pads"), cl::Hidden, + cl::init(false), cl::ZeroOrMore); + +static cl::opt ClUseShortGranules( + "hwasan-use-short-granules", + cl::desc("use short granules in allocas and outlined checks"), cl::Hidden, + cl::init(false), cl::ZeroOrMore); + +static cl::opt ClInstrumentPersonalityFunctions( + "hwasan-instrument-personality-functions", + cl::desc("instrument personality functions"), cl::Hidden, cl::init(false), + cl::ZeroOrMore); static cl::opt ClInlineAllChecks("hwasan-inline-all-checks", cl::desc("inline all checks"), @@ -169,16 +187,16 @@ namespace { class HWAddressSanitizer { public: explicit HWAddressSanitizer(Module &M, bool CompileKernel = false, - bool Recover = false) { + bool Recover = false) : M(M) { this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover; this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0 ? ClEnableKhwasan : CompileKernel; - initializeModule(M); + initializeModule(); } bool sanitizeFunction(Function &F); - void initializeModule(Module &M); + void initializeModule(); void initializeCallbacks(Module &M); @@ -216,9 +234,14 @@ public: Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty); void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord); + void instrumentGlobal(GlobalVariable *GV, uint8_t Tag); + void instrumentGlobals(); + + void instrumentPersonalityFunctions(); + private: LLVMContext *C; - std::string CurModuleUniqueId; + Module &M; Triple TargetTriple; FunctionCallee HWAsanMemmove, HWAsanMemcpy, HWAsanMemset; FunctionCallee HWAsanHandleVfork; @@ -238,17 +261,21 @@ private: bool InTls; void init(Triple &TargetTriple); - unsigned getAllocaAlignment() const { return 1U << Scale; } + unsigned getObjectAlignment() const { return 1U << Scale; } }; ShadowMapping Mapping; + Type *VoidTy = Type::getVoidTy(M.getContext()); Type *IntptrTy; Type *Int8PtrTy; Type *Int8Ty; Type *Int32Ty; + Type *Int64Ty = Type::getInt64Ty(M.getContext()); bool CompileKernel; bool Recover; + bool UseShortGranules; + bool InstrumentLandingPads; Function *HwasanCtorFunction; @@ -278,7 +305,7 @@ public: StringRef getPassName() const override { return "HWAddressSanitizer"; } bool doInitialization(Module &M) override { - HWASan = llvm::make_unique(M, CompileKernel, Recover); + HWASan = std::make_unique(M, CompileKernel, Recover); return true; } @@ -333,7 +360,7 @@ PreservedAnalyses HWAddressSanitizerPass::run(Module &M, /// Module-level initialization. /// /// inserts a call to __hwasan_init to the module's constructor list. -void HWAddressSanitizer::initializeModule(Module &M) { +void HWAddressSanitizer::initializeModule() { LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n"); auto &DL = M.getDataLayout(); @@ -342,7 +369,6 @@ void HWAddressSanitizer::initializeModule(Module &M) { Mapping.init(TargetTriple); C = &(M.getContext()); - CurModuleUniqueId = getUniqueModuleId(&M); IRBuilder<> IRB(*C); IntptrTy = IRB.getIntPtrTy(DL); Int8PtrTy = IRB.getInt8PtrTy(); @@ -350,6 +376,21 @@ void HWAddressSanitizer::initializeModule(Module &M) { Int32Ty = IRB.getInt32Ty(); HwasanCtorFunction = nullptr; + + // Older versions of Android do not have the required runtime support for + // short granules, global or personality function instrumentation. On other + // platforms we currently require using the latest version of the runtime. + bool NewRuntime = + !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30); + + UseShortGranules = + ClUseShortGranules.getNumOccurrences() ? ClUseShortGranules : NewRuntime; + + // If we don't have personality function support, fall back to landing pads. + InstrumentLandingPads = ClInstrumentLandingPads.getNumOccurrences() + ? ClInstrumentLandingPads + : !NewRuntime; + if (!CompileKernel) { std::tie(HwasanCtorFunction, std::ignore) = getOrCreateSanitizerCtorAndInitFunctions( @@ -363,6 +404,18 @@ void HWAddressSanitizer::initializeModule(Module &M) { Ctor->setComdat(CtorComdat); appendToGlobalCtors(M, Ctor, 0, Ctor); }); + + bool InstrumentGlobals = + ClGlobals.getNumOccurrences() ? ClGlobals : NewRuntime; + if (InstrumentGlobals) + instrumentGlobals(); + + bool InstrumentPersonalityFunctions = + ClInstrumentPersonalityFunctions.getNumOccurrences() + ? ClInstrumentPersonalityFunctions + : NewRuntime; + if (InstrumentPersonalityFunctions) + instrumentPersonalityFunctions(); } if (!TargetTriple.isAndroid()) { @@ -456,7 +509,7 @@ Value *HWAddressSanitizer::isInterestingMemoryAccess(Instruction *I, unsigned *Alignment, Value **MaybeMask) { // Skip memory accesses inserted by another instrumentation. - if (I->getMetadata("nosanitize")) return nullptr; + if (I->hasMetadata("nosanitize")) return nullptr; // Do not instrument the load fetching the dynamic shadow address. if (LocalDynamicShadow == I) @@ -564,9 +617,11 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite, TargetTriple.isOSBinFormatELF() && !Recover) { Module *M = IRB.GetInsertBlock()->getParent()->getParent(); Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy); - IRB.CreateCall( - Intrinsic::getDeclaration(M, Intrinsic::hwasan_check_memaccess), - {shadowBase(), Ptr, ConstantInt::get(Int32Ty, AccessInfo)}); + IRB.CreateCall(Intrinsic::getDeclaration( + M, UseShortGranules + ? Intrinsic::hwasan_check_memaccess_shortgranules + : Intrinsic::hwasan_check_memaccess), + {shadowBase(), Ptr, ConstantInt::get(Int32Ty, AccessInfo)}); return; } @@ -718,7 +773,9 @@ static uint64_t getAllocaSizeInBytes(const AllocaInst &AI) { bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size) { - size_t AlignedSize = alignTo(Size, Mapping.getAllocaAlignment()); + size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment()); + if (!UseShortGranules) + Size = AlignedSize; Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty()); if (ClInstrumentWithCalls) { @@ -738,7 +795,7 @@ bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, /*Align=*/1); if (Size != AlignedSize) { IRB.CreateStore( - ConstantInt::get(Int8Ty, Size % Mapping.getAllocaAlignment()), + ConstantInt::get(Int8Ty, Size % Mapping.getObjectAlignment()), IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize)); IRB.CreateStore(JustTag, IRB.CreateConstGEP1_32( Int8Ty, IRB.CreateBitCast(AI, Int8PtrTy), @@ -778,8 +835,9 @@ Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) { // FIXME: use addressofreturnaddress (but implement it in aarch64 backend // first). Module *M = IRB.GetInsertBlock()->getParent()->getParent(); - auto GetStackPointerFn = - Intrinsic::getDeclaration(M, Intrinsic::frameaddress); + auto GetStackPointerFn = Intrinsic::getDeclaration( + M, Intrinsic::frameaddress, + IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace())); Value *StackPointer = IRB.CreateCall( GetStackPointerFn, {Constant::getNullValue(IRB.getInt32Ty())}); @@ -912,8 +970,10 @@ void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) { PC = readRegister(IRB, "pc"); else PC = IRB.CreatePtrToInt(F, IntptrTy); - auto GetStackPointerFn = - Intrinsic::getDeclaration(F->getParent(), Intrinsic::frameaddress); + Module *M = F->getParent(); + auto GetStackPointerFn = Intrinsic::getDeclaration( + M, Intrinsic::frameaddress, + IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace())); Value *SP = IRB.CreatePtrToInt( IRB.CreateCall(GetStackPointerFn, {Constant::getNullValue(IRB.getInt32Ty())}), @@ -999,11 +1059,8 @@ bool HWAddressSanitizer::instrumentStack( AI->hasName() ? AI->getName().str() : "alloca." + itostr(N); Replacement->setName(Name + ".hwasan"); - for (auto UI = AI->use_begin(), UE = AI->use_end(); UI != UE;) { - Use &U = *UI++; - if (U.getUser() != AILong) - U.set(Replacement); - } + AI->replaceUsesWithIf(Replacement, + [AILong](Use &U) { return U.getUser() != AILong; }); for (auto *DDI : AllocaDeclareMap.lookup(AI)) { DIExpression *OldExpr = DDI->getExpression(); @@ -1020,7 +1077,7 @@ bool HWAddressSanitizer::instrumentStack( // Re-tag alloca memory with the special UAR tag. Value *Tag = getUARTag(IRB, StackTag); - tagAlloca(IRB, AI, Tag, alignTo(Size, Mapping.getAllocaAlignment())); + tagAlloca(IRB, AI, Tag, alignTo(Size, Mapping.getObjectAlignment())); } } @@ -1074,7 +1131,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { if (auto *Alloca = dyn_cast_or_null(DDI->getAddress())) AllocaDeclareMap[Alloca].push_back(DDI); - if (ClInstrumentLandingPads && isa(Inst)) + if (InstrumentLandingPads && isa(Inst)) LandingPadVec.push_back(&Inst); Value *MaybeMask = nullptr; @@ -1093,6 +1150,13 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { if (!LandingPadVec.empty()) instrumentLandingPads(LandingPadVec); + if (AllocasToInstrument.empty() && F.hasPersonalityFn() && + F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) { + // __hwasan_personality_thunk is a no-op for functions without an + // instrumented stack, so we can drop it. + F.setPersonalityFn(nullptr); + } + if (AllocasToInstrument.empty() && ToInstrument.empty()) return false; @@ -1118,8 +1182,9 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { DenseMap AllocaToPaddedAllocaMap; for (AllocaInst *AI : AllocasToInstrument) { uint64_t Size = getAllocaSizeInBytes(*AI); - uint64_t AlignedSize = alignTo(Size, Mapping.getAllocaAlignment()); - AI->setAlignment(std::max(AI->getAlignment(), 16u)); + uint64_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment()); + AI->setAlignment( + MaybeAlign(std::max(AI->getAlignment(), Mapping.getObjectAlignment()))); if (Size != AlignedSize) { Type *AllocatedType = AI->getAllocatedType(); if (AI->isArrayAllocation()) { @@ -1132,7 +1197,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { auto *NewAI = new AllocaInst( TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI); NewAI->takeName(AI); - NewAI->setAlignment(AI->getAlignment()); + NewAI->setAlignment(MaybeAlign(AI->getAlignment())); NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca()); NewAI->setSwiftError(AI->isSwiftError()); NewAI->copyMetadata(*AI); @@ -1179,6 +1244,257 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { return Changed; } +void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) { + Constant *Initializer = GV->getInitializer(); + uint64_t SizeInBytes = + M.getDataLayout().getTypeAllocSize(Initializer->getType()); + uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment()); + if (SizeInBytes != NewSize) { + // Pad the initializer out to the next multiple of 16 bytes and add the + // required short granule tag. + std::vector Init(NewSize - SizeInBytes, 0); + Init.back() = Tag; + Constant *Padding = ConstantDataArray::get(*C, Init); + Initializer = ConstantStruct::getAnon({Initializer, Padding}); + } + + auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(), + GlobalValue::ExternalLinkage, Initializer, + GV->getName() + ".hwasan"); + NewGV->copyAttributesFrom(GV); + NewGV->setLinkage(GlobalValue::PrivateLinkage); + NewGV->copyMetadata(GV, 0); + NewGV->setAlignment( + MaybeAlign(std::max(GV->getAlignment(), Mapping.getObjectAlignment()))); + + // It is invalid to ICF two globals that have different tags. In the case + // where the size of the global is a multiple of the tag granularity the + // contents of the globals may be the same but the tags (i.e. symbol values) + // may be different, and the symbols are not considered during ICF. In the + // case where the size is not a multiple of the granularity, the short granule + // tags would discriminate two globals with different tags, but there would + // otherwise be nothing stopping such a global from being incorrectly ICF'd + // with an uninstrumented (i.e. tag 0) global that happened to have the short + // granule tag in the last byte. + NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None); + + // Descriptor format (assuming little-endian): + // bytes 0-3: relative address of global + // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case + // it isn't, we create multiple descriptors) + // byte 7: tag + auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty); + const uint64_t MaxDescriptorSize = 0xfffff0; + for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes; + DescriptorPos += MaxDescriptorSize) { + auto *Descriptor = + new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage, + nullptr, GV->getName() + ".hwasan.descriptor"); + auto *GVRelPtr = ConstantExpr::getTrunc( + ConstantExpr::getAdd( + ConstantExpr::getSub( + ConstantExpr::getPtrToInt(NewGV, Int64Ty), + ConstantExpr::getPtrToInt(Descriptor, Int64Ty)), + ConstantInt::get(Int64Ty, DescriptorPos)), + Int32Ty); + uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize); + auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24)); + Descriptor->setComdat(NewGV->getComdat()); + Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag})); + Descriptor->setSection("hwasan_globals"); + Descriptor->setMetadata(LLVMContext::MD_associated, + MDNode::get(*C, ValueAsMetadata::get(NewGV))); + appendToCompilerUsed(M, Descriptor); + } + + Constant *Aliasee = ConstantExpr::getIntToPtr( + ConstantExpr::getAdd( + ConstantExpr::getPtrToInt(NewGV, Int64Ty), + ConstantInt::get(Int64Ty, uint64_t(Tag) << kPointerTagShift)), + GV->getType()); + auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(), + GV->getLinkage(), "", Aliasee, &M); + Alias->setVisibility(GV->getVisibility()); + Alias->takeName(GV); + GV->replaceAllUsesWith(Alias); + GV->eraseFromParent(); +} + +void HWAddressSanitizer::instrumentGlobals() { + // Start by creating a note that contains pointers to the list of global + // descriptors. Adding a note to the output file will cause the linker to + // create a PT_NOTE program header pointing to the note that we can use to + // find the descriptor list starting from the program headers. A function + // provided by the runtime initializes the shadow memory for the globals by + // accessing the descriptor list via the note. The dynamic loader needs to + // call this function whenever a library is loaded. + // + // The reason why we use a note for this instead of a more conventional + // approach of having a global constructor pass a descriptor list pointer to + // the runtime is because of an order of initialization problem. With + // constructors we can encounter the following problematic scenario: + // + // 1) library A depends on library B and also interposes one of B's symbols + // 2) B's constructors are called before A's (as required for correctness) + // 3) during construction, B accesses one of its "own" globals (actually + // interposed by A) and triggers a HWASAN failure due to the initialization + // for A not having happened yet + // + // Even without interposition it is possible to run into similar situations in + // cases where two libraries mutually depend on each other. + // + // We only need one note per binary, so put everything for the note in a + // comdat. + Comdat *NoteComdat = M.getOrInsertComdat(kHwasanNoteName); + + Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0); + auto Start = + new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage, + nullptr, "__start_hwasan_globals"); + Start->setVisibility(GlobalValue::HiddenVisibility); + Start->setDSOLocal(true); + auto Stop = + new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage, + nullptr, "__stop_hwasan_globals"); + Stop->setVisibility(GlobalValue::HiddenVisibility); + Stop->setDSOLocal(true); + + // Null-terminated so actually 8 bytes, which are required in order to align + // the note properly. + auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0"); + + auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(), + Int32Ty, Int32Ty); + auto *Note = + new GlobalVariable(M, NoteTy, /*isConstantGlobal=*/true, + GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName); + Note->setSection(".note.hwasan.globals"); + Note->setComdat(NoteComdat); + Note->setAlignment(Align(4)); + Note->setDSOLocal(true); + + // The pointers in the note need to be relative so that the note ends up being + // placed in rodata, which is the standard location for notes. + auto CreateRelPtr = [&](Constant *Ptr) { + return ConstantExpr::getTrunc( + ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty), + ConstantExpr::getPtrToInt(Note, Int64Ty)), + Int32Ty); + }; + Note->setInitializer(ConstantStruct::getAnon( + {ConstantInt::get(Int32Ty, 8), // n_namesz + ConstantInt::get(Int32Ty, 8), // n_descsz + ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type + Name, CreateRelPtr(Start), CreateRelPtr(Stop)})); + appendToCompilerUsed(M, Note); + + // Create a zero-length global in hwasan_globals so that the linker will + // always create start and stop symbols. + auto Dummy = new GlobalVariable( + M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage, + Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global"); + Dummy->setSection("hwasan_globals"); + Dummy->setComdat(NoteComdat); + Dummy->setMetadata(LLVMContext::MD_associated, + MDNode::get(*C, ValueAsMetadata::get(Note))); + appendToCompilerUsed(M, Dummy); + + std::vector Globals; + for (GlobalVariable &GV : M.globals()) { + if (GV.isDeclarationForLinker() || GV.getName().startswith("llvm.") || + GV.isThreadLocal()) + continue; + + // Common symbols can't have aliases point to them, so they can't be tagged. + if (GV.hasCommonLinkage()) + continue; + + // Globals with custom sections may be used in __start_/__stop_ enumeration, + // which would be broken both by adding tags and potentially by the extra + // padding/alignment that we insert. + if (GV.hasSection()) + continue; + + Globals.push_back(&GV); + } + + MD5 Hasher; + Hasher.update(M.getSourceFileName()); + MD5::MD5Result Hash; + Hasher.final(Hash); + uint8_t Tag = Hash[0]; + + for (GlobalVariable *GV : Globals) { + // Skip tag 0 in order to avoid collisions with untagged memory. + if (Tag == 0) + Tag = 1; + instrumentGlobal(GV, Tag++); + } +} + +void HWAddressSanitizer::instrumentPersonalityFunctions() { + // We need to untag stack frames as we unwind past them. That is the job of + // the personality function wrapper, which either wraps an existing + // personality function or acts as a personality function on its own. Each + // function that has a personality function or that can be unwound past has + // its personality function changed to a thunk that calls the personality + // function wrapper in the runtime. + MapVector> PersonalityFns; + for (Function &F : M) { + if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress)) + continue; + + if (F.hasPersonalityFn()) { + PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F); + } else if (!F.hasFnAttribute(Attribute::NoUnwind)) { + PersonalityFns[nullptr].push_back(&F); + } + } + + if (PersonalityFns.empty()) + return; + + FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction( + "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty, + Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy); + FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy); + FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy); + + for (auto &P : PersonalityFns) { + std::string ThunkName = kHwasanPersonalityThunkName; + if (P.first) + ThunkName += ("." + P.first->getName()).str(); + FunctionType *ThunkFnTy = FunctionType::get( + Int32Ty, {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int8PtrTy}, false); + bool IsLocal = P.first && (!isa(P.first) || + cast(P.first)->hasLocalLinkage()); + auto *ThunkFn = Function::Create(ThunkFnTy, + IsLocal ? GlobalValue::InternalLinkage + : GlobalValue::LinkOnceODRLinkage, + ThunkName, &M); + if (!IsLocal) { + ThunkFn->setVisibility(GlobalValue::HiddenVisibility); + ThunkFn->setComdat(M.getOrInsertComdat(ThunkName)); + } + + auto *BB = BasicBlock::Create(*C, "entry", ThunkFn); + IRBuilder<> IRB(BB); + CallInst *WrapperCall = IRB.CreateCall( + HwasanPersonalityWrapper, + {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2), + ThunkFn->getArg(3), ThunkFn->getArg(4), + P.first ? IRB.CreateBitCast(P.first, Int8PtrTy) + : Constant::getNullValue(Int8PtrTy), + IRB.CreateBitCast(UnwindGetGR.getCallee(), Int8PtrTy), + IRB.CreateBitCast(UnwindGetCFA.getCallee(), Int8PtrTy)}); + WrapperCall->setTailCall(); + IRB.CreateRet(WrapperCall); + + for (Function *F : P.second) + F->setPersonalityFn(ThunkFn); + } +} + void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple) { Scale = kDefaultShadowScale; if (ClMappingOffset.getNumOccurrences() > 0) { diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index c7371f567ff..74d6e76eceb 100644 --- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -403,7 +403,7 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, AM->getResult(M).getManager(); ORE = &FAM.getResult(F); } else { - OwnedORE = llvm::make_unique(&F); + OwnedORE = std::make_unique(&F); ORE = OwnedORE.get(); } diff --git a/lib/Transforms/Instrumentation/InstrOrderFile.cpp b/lib/Transforms/Instrumentation/InstrOrderFile.cpp index a2c1ddfd279..93d3a8a14d5 100644 --- a/lib/Transforms/Instrumentation/InstrOrderFile.cpp +++ b/lib/Transforms/Instrumentation/InstrOrderFile.cpp @@ -100,7 +100,8 @@ public: if (!ClOrderFileWriteMapping.empty()) { std::lock_guard LogLock(MappingMutex); std::error_code EC; - llvm::raw_fd_ostream OS(ClOrderFileWriteMapping, EC, llvm::sys::fs::F_Append); + llvm::raw_fd_ostream OS(ClOrderFileWriteMapping, EC, + llvm::sys::fs::OF_Append); if (EC) { report_fatal_error(Twine("Failed to open ") + ClOrderFileWriteMapping + " to save mapping file for order file instrumentation\n"); diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index 63c2b807896..1f092a5f310 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -157,7 +157,10 @@ public: } bool runOnModule(Module &M) override { - return InstrProf.run(M, getAnalysis().getTLI()); + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; + return InstrProf.run(M, GetTLI); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -370,8 +373,12 @@ private: } // end anonymous namespace PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) { - auto &TLI = AM.getResult(M); - if (!run(M, TLI)) + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; + if (!run(M, GetTLI)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -441,7 +448,7 @@ void InstrProfiling::promoteCounterLoadStores(Function *F) { std::unique_ptr BFI; if (Options.UseBFIInPromotion) { std::unique_ptr BPI; - BPI.reset(new BranchProbabilityInfo(*F, LI, TLI)); + BPI.reset(new BranchProbabilityInfo(*F, LI, &GetTLI(*F))); BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI)); } @@ -482,9 +489,10 @@ static bool containsProfilingIntrinsics(Module &M) { return false; } -bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) { +bool InstrProfiling::run( + Module &M, std::function GetTLI) { this->M = &M; - this->TLI = &TLI; + this->GetTLI = std::move(GetTLI); NamesVar = nullptr; NamesSize = 0; ProfileDataMap.clear(); @@ -601,6 +609,7 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { bool IsRange = (Ind->getValueKind()->getZExtValue() == llvm::InstrProfValueKind::IPVK_MemOPSize); CallInst *Call = nullptr; + auto *TLI = &GetTLI(*Ind->getFunction()); if (!IsRange) { Value *Args[3] = {Ind->getTargetValue(), Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), @@ -731,9 +740,8 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { PD = It->second; } - // Match the linkage and visibility of the name global, except on COFF, where - // the linkage must be local and consequentially the visibility must be - // default. + // Match the linkage and visibility of the name global. COFF supports using + // comdats with internal symbols, so do that if we can. Function *Fn = Inc->getParent()->getParent(); GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage(); GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility(); @@ -749,19 +757,21 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { // new comdat group for the counters and profiling data. If we use the comdat // of the parent function, that will result in relocations against discarded // sections. - Comdat *Cmdt = nullptr; - GlobalValue::LinkageTypes CounterLinkage = Linkage; - if (needsComdatForCounter(*Fn, *M)) { - StringRef CmdtPrefix = getInstrProfComdatPrefix(); + bool NeedComdat = needsComdatForCounter(*Fn, *M); + if (NeedComdat) { if (TT.isOSBinFormatCOFF()) { - // For COFF, the comdat group name must be the name of a symbol in the - // group. Use the counter variable name, and upgrade its linkage to - // something externally visible, like linkonce_odr. - CmdtPrefix = getInstrProfCountersVarPrefix(); - CounterLinkage = GlobalValue::LinkOnceODRLinkage; + // For COFF, put the counters, data, and values each into their own + // comdats. We can't use a group because the Visual C++ linker will + // report duplicate symbol errors if there are multiple external symbols + // with the same name marked IMAGE_COMDAT_SELECT_ASSOCIATIVE. + Linkage = GlobalValue::LinkOnceODRLinkage; + Visibility = GlobalValue::HiddenVisibility; } - Cmdt = M->getOrInsertComdat(getVarName(Inc, CmdtPrefix)); } + auto MaybeSetComdat = [=](GlobalVariable *GV) { + if (NeedComdat) + GV->setComdat(M->getOrInsertComdat(GV->getName())); + }; uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); LLVMContext &Ctx = M->getContext(); @@ -775,9 +785,9 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { CounterPtr->setVisibility(Visibility); CounterPtr->setSection( getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat())); - CounterPtr->setAlignment(8); - CounterPtr->setComdat(Cmdt); - CounterPtr->setLinkage(CounterLinkage); + CounterPtr->setAlignment(Align(8)); + MaybeSetComdat(CounterPtr); + CounterPtr->setLinkage(Linkage); auto *Int8PtrTy = Type::getInt8PtrTy(Ctx); // Allocate statically the array of pointers to value profile nodes for @@ -797,8 +807,8 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { ValuesVar->setVisibility(Visibility); ValuesVar->setSection( getInstrProfSectionName(IPSK_vals, TT.getObjectFormat())); - ValuesVar->setAlignment(8); - ValuesVar->setComdat(Cmdt); + ValuesVar->setAlignment(Align(8)); + MaybeSetComdat(ValuesVar); ValuesPtrExpr = ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx)); } @@ -830,8 +840,9 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { getVarName(Inc, getInstrProfDataVarPrefix())); Data->setVisibility(Visibility); Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat())); - Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT); - Data->setComdat(Cmdt); + Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT)); + MaybeSetComdat(Data); + Data->setLinkage(Linkage); PD.RegionCounters = CounterPtr; PD.DataVar = Data; @@ -920,7 +931,7 @@ void InstrProfiling::emitNameData() { // On COFF, it's important to reduce the alignment down to 1 to prevent the // linker from inserting padding before the start of the names section or // between names entries. - NamesVar->setAlignment(1); + NamesVar->setAlignment(Align::None()); UsedVars.push_back(NamesVar); for (auto *NamePtr : ReferencedNames) diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp index f56a1bd91b8..a6c2c9b464b 100644 --- a/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -68,7 +68,8 @@ GlobalVariable *llvm::createPrivateGlobalForString(Module &M, StringRef Str, GlobalValue::PrivateLinkage, StrConst, NamePrefix); if (AllowMerging) GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - GV->setAlignment(1); // Strings may not be merged w/o setting align 1. + GV->setAlignment(Align::None()); // Strings may not be merged w/o setting + // alignment explicitly. return GV; } @@ -116,7 +117,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializeMemorySanitizerLegacyPassPass(Registry); initializeHWAddressSanitizerLegacyPassPass(Registry); initializeThreadSanitizerLegacyPassPass(Registry); - initializeSanitizerCoverageModulePass(Registry); + initializeModuleSanitizerCoverageLegacyPassPass(Registry); initializeDataFlowSanitizerPass(Registry); } diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index b25cbed1bb0..69c9020e060 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -462,16 +462,9 @@ namespace { /// the module. class MemorySanitizer { public: - MemorySanitizer(Module &M, MemorySanitizerOptions Options) { - this->CompileKernel = - ClEnableKmsan.getNumOccurrences() > 0 ? ClEnableKmsan : Options.Kernel; - if (ClTrackOrigins.getNumOccurrences() > 0) - this->TrackOrigins = ClTrackOrigins; - else - this->TrackOrigins = this->CompileKernel ? 2 : Options.TrackOrigins; - this->Recover = ClKeepGoing.getNumOccurrences() > 0 - ? ClKeepGoing - : (this->CompileKernel | Options.Recover); + MemorySanitizer(Module &M, MemorySanitizerOptions Options) + : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins), + Recover(Options.Recover) { initializeModule(M); } @@ -594,10 +587,26 @@ private: /// An empty volatile inline asm that prevents callback merge. InlineAsm *EmptyAsm; - - Function *MsanCtorFunction; }; +void insertModuleCtor(Module &M) { + getOrCreateSanitizerCtorAndInitFunctions( + M, kMsanModuleCtorName, kMsanInitName, + /*InitArgTypes=*/{}, + /*InitArgs=*/{}, + // This callback is invoked when the functions are created the first + // time. Hook them into the global ctors list in that case: + [&](Function *Ctor, FunctionCallee) { + if (!ClWithComdat) { + appendToGlobalCtors(M, Ctor, 0); + return; + } + Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName); + Ctor->setComdat(MsanCtorComdat); + appendToGlobalCtors(M, Ctor, 0, Ctor); + }); +} + /// A legacy function pass for msan instrumentation. /// /// Instruments functions to detect unitialized reads. @@ -615,7 +624,7 @@ struct MemorySanitizerLegacyPass : public FunctionPass { bool runOnFunction(Function &F) override { return MSan->sanitizeFunction( - F, getAnalysis().getTLI()); + F, getAnalysis().getTLI(F)); } bool doInitialization(Module &M) override; @@ -623,8 +632,17 @@ struct MemorySanitizerLegacyPass : public FunctionPass { MemorySanitizerOptions Options; }; +template T getOptOrDefault(const cl::opt &Opt, T Default) { + return (Opt.getNumOccurrences() > 0) ? Opt : Default; +} + } // end anonymous namespace +MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K) + : Kernel(getOptOrDefault(ClEnableKmsan, K)), + TrackOrigins(getOptOrDefault(ClTrackOrigins, Kernel ? 2 : TO)), + Recover(getOptOrDefault(ClKeepGoing, Kernel || R)) {} + PreservedAnalyses MemorySanitizerPass::run(Function &F, FunctionAnalysisManager &FAM) { MemorySanitizer Msan(*F.getParent(), Options); @@ -633,6 +651,14 @@ PreservedAnalyses MemorySanitizerPass::run(Function &F, return PreservedAnalyses::all(); } +PreservedAnalyses MemorySanitizerPass::run(Module &M, + ModuleAnalysisManager &AM) { + if (Options.Kernel) + return PreservedAnalyses::all(); + insertModuleCtor(M); + return PreservedAnalyses::none(); +} + char MemorySanitizerLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(MemorySanitizerLegacyPass, "msan", @@ -918,23 +944,6 @@ void MemorySanitizer::initializeModule(Module &M) { OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000); if (!CompileKernel) { - std::tie(MsanCtorFunction, std::ignore) = - getOrCreateSanitizerCtorAndInitFunctions( - M, kMsanModuleCtorName, kMsanInitName, - /*InitArgTypes=*/{}, - /*InitArgs=*/{}, - // This callback is invoked when the functions are created the first - // time. Hook them into the global ctors list in that case: - [&](Function *Ctor, FunctionCallee) { - if (!ClWithComdat) { - appendToGlobalCtors(M, Ctor, 0); - return; - } - Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName); - Ctor->setComdat(MsanCtorComdat); - appendToGlobalCtors(M, Ctor, 0, Ctor); - }); - if (TrackOrigins) M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] { return new GlobalVariable( @@ -952,6 +961,8 @@ void MemorySanitizer::initializeModule(Module &M) { } bool MemorySanitizerLegacyPass::doInitialization(Module &M) { + if (!Options.Kernel) + insertModuleCtor(M); MSan.emplace(M, Options); return true; } @@ -2562,6 +2573,11 @@ struct MemorySanitizerVisitor : public InstVisitor { return false; } + void handleInvariantGroup(IntrinsicInst &I) { + setShadow(&I, getShadow(&I, 0)); + setOrigin(&I, getOrigin(&I, 0)); + } + void handleLifetimeStart(IntrinsicInst &I) { if (!PoisonStack) return; @@ -2993,6 +3009,10 @@ struct MemorySanitizerVisitor : public InstVisitor { case Intrinsic::lifetime_start: handleLifetimeStart(I); break; + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: + handleInvariantGroup(I); + break; case Intrinsic::bswap: handleBswap(I); break; @@ -3627,10 +3647,10 @@ struct MemorySanitizerVisitor : public InstVisitor { int getNumOutputArgs(InlineAsm *IA, CallBase *CB) { int NumRetOutputs = 0; int NumOutputs = 0; - Type *RetTy = dyn_cast(CB)->getType(); + Type *RetTy = cast(CB)->getType(); if (!RetTy->isVoidTy()) { // Register outputs are returned via the CallInst return value. - StructType *ST = dyn_cast_or_null(RetTy); + auto *ST = dyn_cast(RetTy); if (ST) NumRetOutputs = ST->getNumElements(); else @@ -3667,7 +3687,7 @@ struct MemorySanitizerVisitor : public InstVisitor { // corresponding CallInst has nO+nI+1 operands (the last operand is the // function to be called). const DataLayout &DL = F.getParent()->getDataLayout(); - CallBase *CB = dyn_cast(&I); + CallBase *CB = cast(&I); IRBuilder<> IRB(&I); InlineAsm *IA = cast(CB->getCalledValue()); int OutputArgs = getNumOutputArgs(IA, CB); @@ -4567,8 +4587,9 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, } bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) { - if (!CompileKernel && (&F == MsanCtorFunction)) + if (!CompileKernel && F.getName() == kMsanModuleCtorName) return false; + MemorySanitizerVisitor Visitor(F, *this, TLI); // Clear out readonly/readnone attributes. diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 6fec3c9c79e..ca1bb62389e 100644 --- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -48,6 +48,7 @@ //===----------------------------------------------------------------------===// #include "CFGMST.h" +#include "ValueProfileCollector.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" @@ -61,7 +62,6 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/IndirectCallVisitor.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" @@ -96,6 +96,7 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/Support/BranchProbability.h" +#include "llvm/Support/CRC.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/DOTGraphTraits.h" @@ -103,11 +104,11 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" -#include "llvm/Support/JamCRC.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/MisExpect.h" #include #include #include @@ -120,6 +121,7 @@ using namespace llvm; using ProfileCount = Function::ProfileCount; +using VPCandidateInfo = ValueProfileCollector::CandidateInfo; #define DEBUG_TYPE "pgo-instrumentation" @@ -286,6 +288,11 @@ static std::string getBranchCondString(Instruction *TI) { return result; } +static const char *ValueProfKindDescr[] = { +#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, +#include "llvm/ProfileData/InstrProfData.inc" +}; + namespace { /// The select instruction visitor plays three roles specified @@ -348,50 +355,6 @@ struct SelectInstVisitor : public InstVisitor { unsigned getNumOfSelectInsts() const { return NSIs; } }; -/// Instruction Visitor class to visit memory intrinsic calls. -struct MemIntrinsicVisitor : public InstVisitor { - Function &F; - unsigned NMemIs = 0; // Number of memIntrinsics instrumented. - VisitMode Mode = VM_counting; // Visiting mode. - unsigned CurCtrId = 0; // Current counter index. - unsigned TotalNumCtrs = 0; // Total number of counters - GlobalVariable *FuncNameVar = nullptr; - uint64_t FuncHash = 0; - PGOUseFunc *UseFunc = nullptr; - std::vector Candidates; - - MemIntrinsicVisitor(Function &Func) : F(Func) {} - - void countMemIntrinsics(Function &Func) { - NMemIs = 0; - Mode = VM_counting; - visit(Func); - } - - void instrumentMemIntrinsics(Function &Func, unsigned TotalNC, - GlobalVariable *FNV, uint64_t FHash) { - Mode = VM_instrument; - TotalNumCtrs = TotalNC; - FuncHash = FHash; - FuncNameVar = FNV; - visit(Func); - } - - std::vector findMemIntrinsics(Function &Func) { - Candidates.clear(); - Mode = VM_annotate; - visit(Func); - return Candidates; - } - - // Visit the IR stream and annotate all mem intrinsic call instructions. - void instrumentOneMemIntrinsic(MemIntrinsic &MI); - - // Visit \p MI instruction and perform tasks according to visit mode. - void visitMemIntrinsic(MemIntrinsic &SI); - - unsigned getNumOfMemIntrinsics() const { return NMemIs; } -}; class PGOInstrumentationGenLegacyPass : public ModulePass { public: @@ -563,13 +526,14 @@ private: // A map that stores the Comdat group in function F. std::unordered_multimap &ComdatMembers; + ValueProfileCollector VPC; + void computeCFGHash(); void renameComdatFunction(); public: - std::vector> ValueSites; + std::vector> ValueSites; SelectInstVisitor SIVisitor; - MemIntrinsicVisitor MIVisitor; std::string FuncName; GlobalVariable *FuncNameVar; @@ -604,23 +568,21 @@ public: std::unordered_multimap &ComdatMembers, bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr, bool IsCS = false) - : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), - ValueSites(IPVK_Last + 1), SIVisitor(Func), MIVisitor(Func), - MST(F, BPI, BFI) { + : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func), + ValueSites(IPVK_Last + 1), SIVisitor(Func), MST(F, BPI, BFI) { // This should be done before CFG hash computation. SIVisitor.countSelects(Func); - MIVisitor.countMemIntrinsics(Func); + ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); if (!IsCS) { NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); - NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); + NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); NumOfPGOBB += MST.BBInfos.size(); - ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func); + ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); } else { NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); - NumOfCSPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); + NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); NumOfCSPGOBB += MST.BBInfos.size(); } - ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func); FuncName = getPGOFuncName(F); computeCFGHash(); @@ -647,7 +609,7 @@ public: // value of each BB in the CFG. The higher 32 bits record the number of edges. template void FuncPGOInstrumentation::computeCFGHash() { - std::vector Indexes; + std::vector Indexes; JamCRC JC; for (auto &BB : F) { const Instruction *TI = BB.getTerminator(); @@ -658,7 +620,7 @@ void FuncPGOInstrumentation::computeCFGHash() { continue; uint32_t Index = BI->Index; for (int J = 0; J < 4; J++) - Indexes.push_back((char)(Index >> (J * 8))); + Indexes.push_back((uint8_t)(Index >> (J * 8))); } } JC.update(Indexes); @@ -874,28 +836,36 @@ static void instrumentOneFunc( if (DisableValueProfiling) return; - unsigned NumIndirectCalls = 0; - for (auto &I : FuncInfo.ValueSites[IPVK_IndirectCallTarget]) { - CallSite CS(I); - Value *Callee = CS.getCalledValue(); - LLVM_DEBUG(dbgs() << "Instrument one indirect call: CallSite Index = " - << NumIndirectCalls << "\n"); - IRBuilder<> Builder(I); - assert(Builder.GetInsertPoint() != I->getParent()->end() && - "Cannot get the Instrumentation point"); - Builder.CreateCall( - Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), - {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), - Builder.getInt64(FuncInfo.FunctionHash), - Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()), - Builder.getInt32(IPVK_IndirectCallTarget), - Builder.getInt32(NumIndirectCalls++)}); - } - NumOfPGOICall += NumIndirectCalls; + NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); - // Now instrument memop intrinsic calls. - FuncInfo.MIVisitor.instrumentMemIntrinsics( - F, NumCounters, FuncInfo.FuncNameVar, FuncInfo.FunctionHash); + // For each VP Kind, walk the VP candidates and instrument each one. + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { + unsigned SiteIndex = 0; + if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) + continue; + + for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { + LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] + << " site: CallSite Index = " << SiteIndex << "\n"); + + IRBuilder<> Builder(Cand.InsertPt); + assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && + "Cannot get the Instrumentation point"); + + Value *ToProfile = nullptr; + if (Cand.V->getType()->isIntegerTy()) + ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); + else if (Cand.V->getType()->isPointerTy()) + ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); + assert(ToProfile && "value profiling Value is of unexpected type"); + + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), + {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), + Builder.getInt64(FuncInfo.FunctionHash), ToProfile, + Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}); + } + } // IPVK_First <= Kind <= IPVK_Last } namespace { @@ -984,9 +954,9 @@ class PGOUseFunc { public: PGOUseFunc(Function &Func, Module *Modu, std::unordered_multimap &ComdatMembers, - BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFIin = nullptr, bool IsCS = false) - : F(Func), M(Modu), BFI(BFIin), + BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, + ProfileSummaryInfo *PSI, bool IsCS) + : F(Func), M(Modu), BFI(BFIin), PSI(PSI), FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS), FreqAttr(FFA_Normal), IsCS(IsCS) {} @@ -1041,6 +1011,7 @@ private: Function &F; Module *M; BlockFrequencyInfo *BFI; + ProfileSummaryInfo *PSI; // This member stores the shared information with class PGOGenFunc. FuncPGOInstrumentation FuncInfo; @@ -1078,15 +1049,9 @@ private: // FIXME: This function should be removed once the functionality in // the inliner is implemented. void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { - if (ProgramMaxCount == 0) - return; - // Threshold of the hot functions. - const BranchProbability HotFunctionThreshold(1, 100); - // Threshold of the cold functions. - const BranchProbability ColdFunctionThreshold(2, 10000); - if (EntryCount >= HotFunctionThreshold.scale(ProgramMaxCount)) + if (PSI->isHotCount(EntryCount)) FreqAttr = FFA_Hot; - else if (MaxCount <= ColdFunctionThreshold.scale(ProgramMaxCount)) + else if (PSI->isColdCount(MaxCount)) FreqAttr = FFA_Cold; } }; @@ -1433,43 +1398,6 @@ void SelectInstVisitor::visitSelectInst(SelectInst &SI) { llvm_unreachable("Unknown visiting mode"); } -void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) { - Module *M = F.getParent(); - IRBuilder<> Builder(&MI); - Type *Int64Ty = Builder.getInt64Ty(); - Type *I8PtrTy = Builder.getInt8PtrTy(); - Value *Length = MI.getLength(); - assert(!isa(Length)); - Builder.CreateCall( - Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), - {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), - Builder.getInt64(FuncHash), Builder.CreateZExtOrTrunc(Length, Int64Ty), - Builder.getInt32(IPVK_MemOPSize), Builder.getInt32(CurCtrId)}); - ++CurCtrId; -} - -void MemIntrinsicVisitor::visitMemIntrinsic(MemIntrinsic &MI) { - if (!PGOInstrMemOP) - return; - Value *Length = MI.getLength(); - // Not instrument constant length calls. - if (dyn_cast(Length)) - return; - - switch (Mode) { - case VM_counting: - NMemIs++; - return; - case VM_instrument: - instrumentOneMemIntrinsic(MI); - return; - case VM_annotate: - Candidates.push_back(&MI); - return; - } - llvm_unreachable("Unknown visiting mode"); -} - // Traverse all valuesites and annotate the instructions for all value kind. void PGOUseFunc::annotateValueSites() { if (DisableValueProfiling) @@ -1482,11 +1410,6 @@ void PGOUseFunc::annotateValueSites() { annotateValueSites(Kind); } -static const char *ValueProfKindDescr[] = { -#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, -#include "llvm/ProfileData/InstrProfData.inc" -}; - // Annotate the instructions for a specific value kind. void PGOUseFunc::annotateValueSites(uint32_t Kind) { assert(Kind <= IPVK_Last); @@ -1505,11 +1428,11 @@ void PGOUseFunc::annotateValueSites(uint32_t Kind) { return; } - for (auto &I : ValueSites) { + for (VPCandidateInfo &I : ValueSites) { LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind << "): Index = " << ValueSiteIndex << " out of " << NumValueSites << "\n"); - annotateValueSite(*M, *I, ProfileRecord, + annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, static_cast(Kind), ValueSiteIndex, Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations : MaxNumAnnotations); @@ -1595,7 +1518,8 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M, static bool annotateAllFunctions( Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, function_ref LookupBPI, - function_ref LookupBFI, bool IsCS) { + function_ref LookupBFI, + ProfileSummaryInfo *PSI, bool IsCS) { LLVM_DEBUG(dbgs() << "Read in profile counters: "); auto &Ctx = M.getContext(); // Read the counter array from file. @@ -1626,6 +1550,13 @@ static bool annotateAllFunctions( return false; } + // Add the profile summary (read from the header of the indexed summary) here + // so that we can use it below when reading counters (which checks if the + // function should be marked with a cold or inlinehint attribute). + M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), + IsCS ? ProfileSummary::PSK_CSInstr + : ProfileSummary::PSK_Instr); + std::unordered_multimap ComdatMembers; collectComdatMembers(M, ComdatMembers); std::vector HotFunctions; @@ -1638,7 +1569,7 @@ static bool annotateAllFunctions( // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, BPI, BFI); - PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, IsCS); + PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, PSI, IsCS); bool AllZeros = false; if (!Func.readCounters(PGOReader.get(), AllZeros)) continue; @@ -1662,9 +1593,9 @@ static bool annotateAllFunctions( F.getName().equals(ViewBlockFreqFuncName))) { LoopInfo LI{DominatorTree(F)}; std::unique_ptr NewBPI = - llvm::make_unique(F, LI); + std::make_unique(F, LI); std::unique_ptr NewBFI = - llvm::make_unique(F, *NewBPI, LI); + std::make_unique(F, *NewBPI, LI); if (PGOViewCounts == PGOVCT_Graph) NewBFI->view(); else if (PGOViewCounts == PGOVCT_Text) { @@ -1686,9 +1617,6 @@ static bool annotateAllFunctions( } } } - M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), - IsCS ? ProfileSummary::PSK_CSInstr - : ProfileSummary::PSK_Instr); // Set function hotness attribute from the profile. // We have to apply these attributes at the end because their presence @@ -1730,8 +1658,10 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M, return &FAM.getResult(F); }; + auto *PSI = &AM.getResult(M); + if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, - LookupBPI, LookupBFI, IsCS)) + LookupBPI, LookupBFI, PSI, IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -1748,7 +1678,8 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { return &this->getAnalysis(F).getBFI(); }; - return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, + auto *PSI = &getAnalysis().getPSI(); + return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, PSI, IsCS); } @@ -1776,6 +1707,9 @@ void llvm::setProfMetadata(Module *M, Instruction *TI, : Weights) { dbgs() << W << " "; } dbgs() << "\n";); + + misexpect::verifyMisExpect(TI, Weights, TI->getContext()); + TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); if (EmitBranchProbability) { std::string BrCondStr = getBranchCondString(TI); diff --git a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp index 188f95b4676..9f81bb16d0a 100644 --- a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -138,7 +138,7 @@ public: OptimizationRemarkEmitter &ORE, DominatorTree *DT) : Func(Func), BFI(BFI), ORE(ORE), DT(DT), Changed(false) { ValueDataArray = - llvm::make_unique(MemOPMaxVersion + 2); + std::make_unique(MemOPMaxVersion + 2); // Get the MemOPSize range information from option MemOPSizeRange, getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart, PreciseRangeLast); @@ -374,8 +374,8 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB); Instruction *NewInst = MI->clone(); // Fix the argument. - MemIntrinsic * MemI = dyn_cast(NewInst); - IntegerType *SizeType = dyn_cast(MemI->getLength()->getType()); + auto *MemI = cast(NewInst); + auto *SizeType = dyn_cast(MemI->getLength()->getType()); assert(SizeType && "Expected integer type size argument."); ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId); MemI->setLength(CaseSizeId); diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index ca0cb4bdbe8..f8fa9cad03b 100644 --- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/EHPersonalities.h" @@ -176,24 +177,21 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) { return Options; } -class SanitizerCoverageModule : public ModulePass { -public: - SanitizerCoverageModule( - const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()) - : ModulePass(ID), Options(OverrideFromCL(Options)) { - initializeSanitizerCoverageModulePass(*PassRegistry::getPassRegistry()); - } - bool runOnModule(Module &M) override; - bool runOnFunction(Function &F); - static char ID; // Pass identification, replacement for typeid - StringRef getPassName() const override { return "SanitizerCoverageModule"; } +using DomTreeCallback = function_ref; +using PostDomTreeCallback = + function_ref; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - } +class ModuleSanitizerCoverage { +public: + ModuleSanitizerCoverage( + const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()) + : Options(OverrideFromCL(Options)) {} + bool instrumentModule(Module &M, DomTreeCallback DTCallback, + PostDomTreeCallback PDTCallback); private: + void instrumentFunction(Function &F, DomTreeCallback DTCallback, + PostDomTreeCallback PDTCallback); void InjectCoverageForIndirectCalls(Function &F, ArrayRef IndirCalls); void InjectTraceForCmp(Function &F, ArrayRef CmpTraceTargets); @@ -252,10 +250,57 @@ private: SanitizerCoverageOptions Options; }; +class ModuleSanitizerCoverageLegacyPass : public ModulePass { +public: + ModuleSanitizerCoverageLegacyPass( + const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()) + : ModulePass(ID), Options(Options) { + initializeModuleSanitizerCoverageLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + bool runOnModule(Module &M) override { + ModuleSanitizerCoverage ModuleSancov(Options); + auto DTCallback = [this](Function &F) -> const DominatorTree * { + return &this->getAnalysis(F).getDomTree(); + }; + auto PDTCallback = [this](Function &F) -> const PostDominatorTree * { + return &this->getAnalysis(F) + .getPostDomTree(); + }; + return ModuleSancov.instrumentModule(M, DTCallback, PDTCallback); + } + + static char ID; // Pass identification, replacement for typeid + StringRef getPassName() const override { return "ModuleSanitizerCoverage"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + } + +private: + SanitizerCoverageOptions Options; +}; + } // namespace +PreservedAnalyses ModuleSanitizerCoveragePass::run(Module &M, + ModuleAnalysisManager &MAM) { + ModuleSanitizerCoverage ModuleSancov(Options); + auto &FAM = MAM.getResult(M).getManager(); + auto DTCallback = [&FAM](Function &F) -> const DominatorTree * { + return &FAM.getResult(F); + }; + auto PDTCallback = [&FAM](Function &F) -> const PostDominatorTree * { + return &FAM.getResult(F); + }; + if (ModuleSancov.instrumentModule(M, DTCallback, PDTCallback)) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); +} + std::pair -SanitizerCoverageModule::CreateSecStartEnd(Module &M, const char *Section, +ModuleSanitizerCoverage::CreateSecStartEnd(Module &M, const char *Section, Type *Ty) { GlobalVariable *SecStart = new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr, @@ -278,7 +323,7 @@ SanitizerCoverageModule::CreateSecStartEnd(Module &M, const char *Section, return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEndPtr); } -Function *SanitizerCoverageModule::CreateInitCallsForSections( +Function *ModuleSanitizerCoverage::CreateInitCallsForSections( Module &M, const char *CtorName, const char *InitFunctionName, Type *Ty, const char *Section) { auto SecStartEnd = CreateSecStartEnd(M, Section, Ty); @@ -310,7 +355,8 @@ Function *SanitizerCoverageModule::CreateInitCallsForSections( return CtorFunc; } -bool SanitizerCoverageModule::runOnModule(Module &M) { +bool ModuleSanitizerCoverage::instrumentModule( + Module &M, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) { if (Options.CoverageType == SanitizerCoverageOptions::SCK_None) return false; C = &(M.getContext()); @@ -403,7 +449,7 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { M.getOrInsertFunction(SanCovTracePCGuardName, VoidTy, Int32PtrTy); for (auto &F : M) - runOnFunction(F); + instrumentFunction(F, DTCallback, PDTCallback); Function *Ctor = nullptr; @@ -518,29 +564,30 @@ static bool IsInterestingCmp(ICmpInst *CMP, const DominatorTree *DT, return true; } -bool SanitizerCoverageModule::runOnFunction(Function &F) { +void ModuleSanitizerCoverage::instrumentFunction( + Function &F, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) { if (F.empty()) - return false; + return; if (F.getName().find(".module_ctor") != std::string::npos) - return false; // Should not instrument sanitizer init functions. + return; // Should not instrument sanitizer init functions. if (F.getName().startswith("__sanitizer_")) - return false; // Don't instrument __sanitizer_* callbacks. + return; // Don't instrument __sanitizer_* callbacks. // Don't touch available_externally functions, their actual body is elewhere. if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) - return false; + return; // Don't instrument MSVC CRT configuration helpers. They may run before normal // initialization. if (F.getName() == "__local_stdio_printf_options" || F.getName() == "__local_stdio_scanf_options") - return false; + return; if (isa(F.getEntryBlock().getTerminator())) - return false; + return; // Don't instrument functions using SEH for now. Splitting basic blocks like // we do for coverage breaks WinEHPrepare. // FIXME: Remove this when SEH no longer uses landingpad pattern matching. if (F.hasPersonalityFn() && isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) - return false; + return; if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge) SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions().setIgnoreUnreachableDests()); SmallVector IndirCalls; @@ -550,10 +597,8 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) { SmallVector DivTraceTargets; SmallVector GepTraceTargets; - const DominatorTree *DT = - &getAnalysis(F).getDomTree(); - const PostDominatorTree *PDT = - &getAnalysis(F).getPostDomTree(); + const DominatorTree *DT = DTCallback(F); + const PostDominatorTree *PDT = PDTCallback(F); bool IsLeafFunc = true; for (auto &BB : F) { @@ -593,10 +638,9 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) { InjectTraceForSwitch(F, SwitchTraceTargets); InjectTraceForDiv(F, DivTraceTargets); InjectTraceForGep(F, GepTraceTargets); - return true; } -GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection( +GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection( size_t NumElements, Function &F, Type *Ty, const char *Section) { ArrayType *ArrayTy = ArrayType::get(Ty, NumElements); auto Array = new GlobalVariable( @@ -608,8 +652,9 @@ GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection( GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId)) Array->setComdat(Comdat); Array->setSection(getSectionName(Section)); - Array->setAlignment(Ty->isPointerTy() ? DL->getPointerSize() - : Ty->getPrimitiveSizeInBits() / 8); + Array->setAlignment(Align(Ty->isPointerTy() + ? DL->getPointerSize() + : Ty->getPrimitiveSizeInBits() / 8)); GlobalsToAppendToUsed.push_back(Array); GlobalsToAppendToCompilerUsed.push_back(Array); MDNode *MD = MDNode::get(F.getContext(), ValueAsMetadata::get(&F)); @@ -619,7 +664,7 @@ GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection( } GlobalVariable * -SanitizerCoverageModule::CreatePCArray(Function &F, +ModuleSanitizerCoverage::CreatePCArray(Function &F, ArrayRef AllBlocks) { size_t N = AllBlocks.size(); assert(N); @@ -646,7 +691,7 @@ SanitizerCoverageModule::CreatePCArray(Function &F, return PCArray; } -void SanitizerCoverageModule::CreateFunctionLocalArrays( +void ModuleSanitizerCoverage::CreateFunctionLocalArrays( Function &F, ArrayRef AllBlocks) { if (Options.TracePCGuard) FunctionGuardArray = CreateFunctionLocalArrayInSection( @@ -660,7 +705,7 @@ void SanitizerCoverageModule::CreateFunctionLocalArrays( FunctionPCsArray = CreatePCArray(F, AllBlocks); } -bool SanitizerCoverageModule::InjectCoverage(Function &F, +bool ModuleSanitizerCoverage::InjectCoverage(Function &F, ArrayRef AllBlocks, bool IsLeafFunc) { if (AllBlocks.empty()) return false; @@ -677,7 +722,7 @@ bool SanitizerCoverageModule::InjectCoverage(Function &F, // The cache is used to speed up recording the caller-callee pairs. // The address of the caller is passed implicitly via caller PC. // CacheSize is encoded in the name of the run-time function. -void SanitizerCoverageModule::InjectCoverageForIndirectCalls( +void ModuleSanitizerCoverage::InjectCoverageForIndirectCalls( Function &F, ArrayRef IndirCalls) { if (IndirCalls.empty()) return; @@ -696,7 +741,7 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls( // __sanitizer_cov_trace_switch(CondValue, // {NumCases, ValueSizeInBits, Case0Value, Case1Value, Case2Value, ... }) -void SanitizerCoverageModule::InjectTraceForSwitch( +void ModuleSanitizerCoverage::InjectTraceForSwitch( Function &, ArrayRef SwitchTraceTargets) { for (auto I : SwitchTraceTargets) { if (SwitchInst *SI = dyn_cast(I)) { @@ -735,7 +780,7 @@ void SanitizerCoverageModule::InjectTraceForSwitch( } } -void SanitizerCoverageModule::InjectTraceForDiv( +void ModuleSanitizerCoverage::InjectTraceForDiv( Function &, ArrayRef DivTraceTargets) { for (auto BO : DivTraceTargets) { IRBuilder<> IRB(BO); @@ -753,7 +798,7 @@ void SanitizerCoverageModule::InjectTraceForDiv( } } -void SanitizerCoverageModule::InjectTraceForGep( +void ModuleSanitizerCoverage::InjectTraceForGep( Function &, ArrayRef GepTraceTargets) { for (auto GEP : GepTraceTargets) { IRBuilder<> IRB(GEP); @@ -764,7 +809,7 @@ void SanitizerCoverageModule::InjectTraceForGep( } } -void SanitizerCoverageModule::InjectTraceForCmp( +void ModuleSanitizerCoverage::InjectTraceForCmp( Function &, ArrayRef CmpTraceTargets) { for (auto I : CmpTraceTargets) { if (ICmpInst *ICMP = dyn_cast(I)) { @@ -799,7 +844,7 @@ void SanitizerCoverageModule::InjectTraceForCmp( } } -void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, +void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx, bool IsLeafFunc) { BasicBlock::iterator IP = BB.getFirstInsertionPt(); @@ -842,8 +887,10 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, } if (Options.StackDepth && IsEntryBB && !IsLeafFunc) { // Check stack depth. If it's the deepest so far, record it. - Function *GetFrameAddr = - Intrinsic::getDeclaration(F.getParent(), Intrinsic::frameaddress); + Module *M = F.getParent(); + Function *GetFrameAddr = Intrinsic::getDeclaration( + M, Intrinsic::frameaddress, + IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace())); auto FrameAddrPtr = IRB.CreateCall(GetFrameAddr, {Constant::getNullValue(Int32Ty)}); auto FrameAddrInt = IRB.CreatePtrToInt(FrameAddrPtr, IntptrTy); @@ -858,7 +905,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, } std::string -SanitizerCoverageModule::getSectionName(const std::string &Section) const { +ModuleSanitizerCoverage::getSectionName(const std::string &Section) const { if (TargetTriple.isOSBinFormatCOFF()) { if (Section == SanCovCountersSectionName) return ".SCOV$CM"; @@ -872,32 +919,29 @@ SanitizerCoverageModule::getSectionName(const std::string &Section) const { } std::string -SanitizerCoverageModule::getSectionStart(const std::string &Section) const { +ModuleSanitizerCoverage::getSectionStart(const std::string &Section) const { if (TargetTriple.isOSBinFormatMachO()) return "\1section$start$__DATA$__" + Section; return "__start___" + Section; } std::string -SanitizerCoverageModule::getSectionEnd(const std::string &Section) const { +ModuleSanitizerCoverage::getSectionEnd(const std::string &Section) const { if (TargetTriple.isOSBinFormatMachO()) return "\1section$end$__DATA$__" + Section; return "__stop___" + Section; } - -char SanitizerCoverageModule::ID = 0; -INITIALIZE_PASS_BEGIN(SanitizerCoverageModule, "sancov", - "SanitizerCoverage: TODO." - "ModulePass", - false, false) +char ModuleSanitizerCoverageLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(ModuleSanitizerCoverageLegacyPass, "sancov", + "Pass for instrumenting coverage on functions", false, + false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) -INITIALIZE_PASS_END(SanitizerCoverageModule, "sancov", - "SanitizerCoverage: TODO." - "ModulePass", - false, false) -ModulePass *llvm::createSanitizerCoverageModulePass( +INITIALIZE_PASS_END(ModuleSanitizerCoverageLegacyPass, "sancov", + "Pass for instrumenting coverage on functions", false, + false) +ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass( const SanitizerCoverageOptions &Options) { - return new SanitizerCoverageModule(Options); + return new ModuleSanitizerCoverageLegacyPass(Options); } diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 5be13fa745c..ac274a155a8 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -92,11 +92,10 @@ namespace { /// ensures the __tsan_init function is in the list of global constructors for /// the module. struct ThreadSanitizer { - ThreadSanitizer(Module &M); bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI); private: - void initializeCallbacks(Module &M); + void initialize(Module &M); bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL); bool instrumentAtomic(Instruction *I, const DataLayout &DL); bool instrumentMemIntrinsic(Instruction *I); @@ -108,8 +107,6 @@ private: void InsertRuntimeIgnores(Function &F); Type *IntptrTy; - IntegerType *OrdTy; - // Callbacks to run-time library are computed in doInitialization. FunctionCallee TsanFuncEntry; FunctionCallee TsanFuncExit; FunctionCallee TsanIgnoreBegin; @@ -130,7 +127,6 @@ private: FunctionCallee TsanVptrUpdate; FunctionCallee TsanVptrLoad; FunctionCallee MemmoveFn, MemcpyFn, MemsetFn; - Function *TsanCtorFunction; }; struct ThreadSanitizerLegacyPass : FunctionPass { @@ -143,16 +139,32 @@ struct ThreadSanitizerLegacyPass : FunctionPass { private: Optional TSan; }; + +void insertModuleCtor(Module &M) { + getOrCreateSanitizerCtorAndInitFunctions( + M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{}, + /*InitArgs=*/{}, + // This callback is invoked when the functions are created the first + // time. Hook them into the global ctors list in that case: + [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, Ctor, 0); }); +} + } // namespace PreservedAnalyses ThreadSanitizerPass::run(Function &F, FunctionAnalysisManager &FAM) { - ThreadSanitizer TSan(*F.getParent()); + ThreadSanitizer TSan; if (TSan.sanitizeFunction(F, FAM.getResult(F))) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } +PreservedAnalyses ThreadSanitizerPass::run(Module &M, + ModuleAnalysisManager &MAM) { + insertModuleCtor(M); + return PreservedAnalyses::none(); +} + char ThreadSanitizerLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(ThreadSanitizerLegacyPass, "tsan", "ThreadSanitizer: detects data races.", false, false) @@ -169,12 +181,13 @@ void ThreadSanitizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { } bool ThreadSanitizerLegacyPass::doInitialization(Module &M) { - TSan.emplace(M); + insertModuleCtor(M); + TSan.emplace(); return true; } bool ThreadSanitizerLegacyPass::runOnFunction(Function &F) { - auto &TLI = getAnalysis().getTLI(); + auto &TLI = getAnalysis().getTLI(F); TSan->sanitizeFunction(F, TLI); return true; } @@ -183,7 +196,10 @@ FunctionPass *llvm::createThreadSanitizerLegacyPassPass() { return new ThreadSanitizerLegacyPass(); } -void ThreadSanitizer::initializeCallbacks(Module &M) { +void ThreadSanitizer::initialize(Module &M) { + const DataLayout &DL = M.getDataLayout(); + IntptrTy = DL.getIntPtrType(M.getContext()); + IRBuilder<> IRB(M.getContext()); AttributeList Attr; Attr = Attr.addAttribute(M.getContext(), AttributeList::FunctionIndex, @@ -197,7 +213,7 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { IRB.getVoidTy()); TsanIgnoreEnd = M.getOrInsertFunction("__tsan_ignore_thread_end", Attr, IRB.getVoidTy()); - OrdTy = IRB.getInt32Ty(); + IntegerType *OrdTy = IRB.getInt32Ty(); for (size_t i = 0; i < kNumberOfAccessSizes; ++i) { const unsigned ByteSize = 1U << i; const unsigned BitSize = ByteSize * 8; @@ -280,20 +296,6 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy); } -ThreadSanitizer::ThreadSanitizer(Module &M) { - const DataLayout &DL = M.getDataLayout(); - IntptrTy = DL.getIntPtrType(M.getContext()); - std::tie(TsanCtorFunction, std::ignore) = - getOrCreateSanitizerCtorAndInitFunctions( - M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{}, - /*InitArgs=*/{}, - // This callback is invoked when the functions are created the first - // time. Hook them into the global ctors list in that case: - [&](Function *Ctor, FunctionCallee) { - appendToGlobalCtors(M, Ctor, 0); - }); -} - static bool isVtableAccess(Instruction *I) { if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) return Tag->isTBAAVtableAccess(); @@ -436,9 +438,9 @@ bool ThreadSanitizer::sanitizeFunction(Function &F, const TargetLibraryInfo &TLI) { // This is required to prevent instrumenting call to __tsan_init from within // the module constructor. - if (&F == TsanCtorFunction) + if (F.getName() == kTsanModuleCtorName) return false; - initializeCallbacks(*F.getParent()); + initialize(*F.getParent()); SmallVector AllLoadsAndStores; SmallVector LocalLoadsAndStores; SmallVector AtomicAccesses; diff --git a/lib/Transforms/Instrumentation/ValueProfileCollector.cpp b/lib/Transforms/Instrumentation/ValueProfileCollector.cpp new file mode 100644 index 00000000000..604726d4f40 --- /dev/null +++ b/lib/Transforms/Instrumentation/ValueProfileCollector.cpp @@ -0,0 +1,78 @@ +//===- ValueProfileCollector.cpp - determine what to value profile --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The implementation of the ValueProfileCollector via ValueProfileCollectorImpl +// +//===----------------------------------------------------------------------===// + +#include "ValueProfilePlugins.inc" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/InitializePasses.h" + +#include + +using namespace llvm; + +namespace { + +/// A plugin-based class that takes an arbitrary number of Plugin types. +/// Each plugin type must satisfy the following API: +/// 1) the constructor must take a `Function &f`. Typically, the plugin would +/// scan the function looking for candidates. +/// 2) contain a member function with the following signature and name: +/// void run(std::vector &Candidates); +/// such that the plugin would append its result into the vector parameter. +/// +/// Plugins are defined in ValueProfilePlugins.inc +template class PluginChain; + +/// The type PluginChainFinal is the final chain of plugins that will be used by +/// ValueProfileCollectorImpl. +using PluginChainFinal = PluginChain; + +template <> class PluginChain<> { +public: + PluginChain(Function &F) {} + void get(InstrProfValueKind K, std::vector &Candidates) {} +}; + +template +class PluginChain : public PluginChain { + PluginT Plugin; + using Base = PluginChain; + +public: + PluginChain(Function &F) : PluginChain(F), Plugin(F) {} + + void get(InstrProfValueKind K, std::vector &Candidates) { + if (K == PluginT::Kind) + Plugin.run(Candidates); + Base::get(K, Candidates); + } +}; + +} // end anonymous namespace + +/// ValueProfileCollectorImpl inherits the API of PluginChainFinal. +class ValueProfileCollector::ValueProfileCollectorImpl : public PluginChainFinal { +public: + using PluginChainFinal::PluginChainFinal; +}; + +ValueProfileCollector::ValueProfileCollector(Function &F) + : PImpl(new ValueProfileCollectorImpl(F)) {} + +ValueProfileCollector::~ValueProfileCollector() = default; + +std::vector +ValueProfileCollector::get(InstrProfValueKind Kind) const { + std::vector Result; + PImpl->get(Kind, Result); + return Result; +} diff --git a/lib/Transforms/Instrumentation/ValueProfileCollector.h b/lib/Transforms/Instrumentation/ValueProfileCollector.h new file mode 100644 index 00000000000..ff883c8d0c7 --- /dev/null +++ b/lib/Transforms/Instrumentation/ValueProfileCollector.h @@ -0,0 +1,79 @@ +//===- ValueProfileCollector.h - determine what to value profile ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a utility class, ValueProfileCollector, that is used to +// determine what kind of llvm::Value's are worth value-profiling, at which +// point in the program, and which instruction holds the Value Profile metadata. +// Currently, the only users of this utility is the PGOInstrumentation[Gen|Use] +// passes. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H +#define LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/ProfileData/InstrProf.h" + +namespace llvm { + +/// Utility analysis that determines what values are worth profiling. +/// The actual logic is inside the ValueProfileCollectorImpl, whose job is to +/// populate the Candidates vector. +/// +/// Value profiling an expression means to track the values that this expression +/// takes at runtime and the frequency of each value. +/// It is important to distinguish between two sets of value profiles for a +/// particular expression: +/// 1) The set of values at the point of evaluation. +/// 2) The set of values at the point of use. +/// In some cases, the two sets are identical, but it's not unusual for the two +/// to differ. +/// +/// To elaborate more, consider this C code, and focus on the expression `nn`: +/// void foo(int nn, bool b) { +/// if (b) memcpy(x, y, nn); +/// } +/// The point of evaluation can be as early as the start of the function, and +/// let's say the value profile for `nn` is: +/// total=100; (value,freq) set = {(8,10), (32,50)} +/// The point of use is right before we call memcpy, and since we execute the +/// memcpy conditionally, the value profile of `nn` can be: +/// total=15; (value,freq) set = {(8,10), (4,5)} +/// +/// For this reason, a plugin is responsible for computing the insertion point +/// for each value to be profiled. The `CandidateInfo` structure encapsulates +/// all the information needed for each value profile site. +class ValueProfileCollector { +public: + struct CandidateInfo { + Value *V; // The value to profile. + Instruction *InsertPt; // Insert the VP lib call before this instr. + Instruction *AnnotatedInst; // Where metadata is attached. + }; + + ValueProfileCollector(Function &Fn); + ValueProfileCollector(ValueProfileCollector &&) = delete; + ValueProfileCollector &operator=(ValueProfileCollector &&) = delete; + + ValueProfileCollector(const ValueProfileCollector &) = delete; + ValueProfileCollector &operator=(const ValueProfileCollector &) = delete; + ~ValueProfileCollector(); + + /// returns a list of value profiling candidates of the given kind + std::vector get(InstrProfValueKind Kind) const; + +private: + class ValueProfileCollectorImpl; + std::unique_ptr PImpl; +}; + +} // namespace llvm + +#endif diff --git a/lib/Transforms/Instrumentation/ValueProfilePlugins.inc b/lib/Transforms/Instrumentation/ValueProfilePlugins.inc new file mode 100644 index 00000000000..4cc4c6c848c --- /dev/null +++ b/lib/Transforms/Instrumentation/ValueProfilePlugins.inc @@ -0,0 +1,75 @@ +//=== ValueProfilePlugins.inc - set of plugins used by ValueProfileCollector =// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a set of plugin classes used in ValueProfileCollectorImpl. +// Each plugin is responsible for collecting Value Profiling candidates for a +// particular optimization. +// Each plugin must satisfy the interface described in ValueProfileCollector.cpp +// +//===----------------------------------------------------------------------===// + +#include "ValueProfileCollector.h" +#include "llvm/Analysis/IndirectCallVisitor.h" +#include "llvm/IR/InstVisitor.h" + +using namespace llvm; +using CandidateInfo = ValueProfileCollector::CandidateInfo; + +///--------------------------- MemIntrinsicPlugin ------------------------------ +class MemIntrinsicPlugin : public InstVisitor { + Function &F; + std::vector *Candidates; + +public: + static constexpr InstrProfValueKind Kind = IPVK_MemOPSize; + + MemIntrinsicPlugin(Function &Fn) : F(Fn), Candidates(nullptr) {} + + void run(std::vector &Cs) { + Candidates = &Cs; + visit(F); + Candidates = nullptr; + } + void visitMemIntrinsic(MemIntrinsic &MI) { + Value *Length = MI.getLength(); + // Not instrument constant length calls. + if (dyn_cast(Length)) + return; + + Instruction *InsertPt = &MI; + Instruction *AnnotatedInst = &MI; + Candidates->emplace_back(CandidateInfo{Length, InsertPt, AnnotatedInst}); + } +}; + +///------------------------ IndirectCallPromotionPlugin ------------------------ +class IndirectCallPromotionPlugin { + Function &F; + +public: + static constexpr InstrProfValueKind Kind = IPVK_IndirectCallTarget; + + IndirectCallPromotionPlugin(Function &Fn) : F(Fn) {} + + void run(std::vector &Candidates) { + std::vector Result = findIndirectCalls(F); + for (Instruction *I : Result) { + Value *Callee = CallSite(I).getCalledValue(); + Instruction *InsertPt = I; + Instruction *AnnotatedInst = I; + Candidates.emplace_back(CandidateInfo{Callee, InsertPt, AnnotatedInst}); + } + } +}; + +///----------------------- Registration of the plugins ------------------------- +/// For now, registering a plugin with the ValueProfileCollector is done by +/// adding the plugin type to the VP_PLUGIN_LIST macro. +#define VP_PLUGIN_LIST \ + MemIntrinsicPlugin, \ + IndirectCallPromotionPlugin diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp index 3243481dee0..26dd416d618 100644 --- a/lib/Transforms/ObjCARC/PtrState.cpp +++ b/lib/Transforms/ObjCARC/PtrState.cpp @@ -275,6 +275,10 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst, } else { InsertAfter = std::next(Inst->getIterator()); } + + if (InsertAfter != BB->end()) + InsertAfter = skipDebugIntrinsics(InsertAfter); + InsertReverseInsertPt(&*InsertAfter); }; diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index de9a62e88c2..0e9f03a0606 100644 --- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -93,9 +93,7 @@ static unsigned getNewAlignmentDiff(const SCEV *DiffSCEV, const SCEV *AlignSCEV, ScalarEvolution *SE) { // DiffUnits = Diff % int64_t(Alignment) - const SCEV *DiffAlignDiv = SE->getUDivExpr(DiffSCEV, AlignSCEV); - const SCEV *DiffAlign = SE->getMulExpr(DiffAlignDiv, AlignSCEV); - const SCEV *DiffUnitsSCEV = SE->getMinusSCEV(DiffAlign, DiffSCEV); + const SCEV *DiffUnitsSCEV = SE->getURemExpr(DiffSCEV, AlignSCEV); LLVM_DEBUG(dbgs() << "\talignment relative to " << *AlignSCEV << " is " << *DiffUnitsSCEV << " (diff: " << *DiffSCEV << ")\n"); @@ -323,7 +321,7 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) { LI->getPointerOperand(), SE); if (NewAlignment > LI->getAlignment()) { - LI->setAlignment(NewAlignment); + LI->setAlignment(MaybeAlign(NewAlignment)); ++NumLoadAlignChanged; } } else if (StoreInst *SI = dyn_cast(J)) { @@ -331,7 +329,7 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) { SI->getPointerOperand(), SE); if (NewAlignment > SI->getAlignment()) { - SI->setAlignment(NewAlignment); + SI->setAlignment(MaybeAlign(NewAlignment)); ++NumStoreAlignChanged; } } else if (MemIntrinsic *MI = dyn_cast(J)) { diff --git a/lib/Transforms/Scalar/CallSiteSplitting.cpp b/lib/Transforms/Scalar/CallSiteSplitting.cpp index 3519b000a33..c3fba923104 100644 --- a/lib/Transforms/Scalar/CallSiteSplitting.cpp +++ b/lib/Transforms/Scalar/CallSiteSplitting.cpp @@ -562,7 +562,7 @@ struct CallSiteSplittingLegacyPass : public FunctionPass { if (skipFunction(F)) return false; - auto &TLI = getAnalysis().getTLI(); + auto &TLI = getAnalysis().getTLI(F); auto &TTI = getAnalysis().getTTI(F); auto &DT = getAnalysis().getDomTree(); return doCallSiteSplitting(F, TLI, TTI, DT); diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp index 98243a23f1e..9f340afbf7c 100644 --- a/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -204,7 +204,7 @@ Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst, /// set found in \p BBs. static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, BasicBlock *Entry, - SmallPtrSet &BBs) { + SetVector &BBs) { assert(!BBs.count(Entry) && "Assume Entry is not in BBs"); // Nodes on the current path to the root. SmallPtrSet Path; @@ -257,7 +257,7 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, // Visit Orders in bottom-up order. using InsertPtsCostPair = - std::pair, BlockFrequency>; + std::pair, BlockFrequency>; // InsertPtsMap is a map from a BB to the best insertion points for the // subtree of BB (subtree not including the BB itself). @@ -266,7 +266,7 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, for (auto RIt = Orders.rbegin(); RIt != Orders.rend(); RIt++) { BasicBlock *Node = *RIt; bool NodeInBBs = BBs.count(Node); - SmallPtrSet &InsertPts = InsertPtsMap[Node].first; + auto &InsertPts = InsertPtsMap[Node].first; BlockFrequency &InsertPtsFreq = InsertPtsMap[Node].second; // Return the optimal insert points in BBs. @@ -283,7 +283,7 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, BasicBlock *Parent = DT.getNode(Node)->getIDom()->getBlock(); // Initially, ParentInsertPts is empty and ParentPtsFreq is 0. Every child // will update its parent's ParentInsertPts and ParentPtsFreq. - SmallPtrSet &ParentInsertPts = InsertPtsMap[Parent].first; + auto &ParentInsertPts = InsertPtsMap[Parent].first; BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second; // Choose to insert in Node or in subtree of Node. // Don't hoist to EHPad because we may not find a proper place to insert @@ -305,12 +305,12 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, } /// Find an insertion point that dominates all uses. -SmallPtrSet ConstantHoistingPass::findConstantInsertionPoint( +SetVector ConstantHoistingPass::findConstantInsertionPoint( const ConstantInfo &ConstInfo) const { assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry."); // Collect all basic blocks. - SmallPtrSet BBs; - SmallPtrSet InsertPts; + SetVector BBs; + SetVector InsertPts; for (auto const &RCI : ConstInfo.RebasedConstants) for (auto const &U : RCI.Uses) BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent()); @@ -333,15 +333,13 @@ SmallPtrSet ConstantHoistingPass::findConstantInsertionPoint( while (BBs.size() >= 2) { BasicBlock *BB, *BB1, *BB2; - BB1 = *BBs.begin(); - BB2 = *std::next(BBs.begin()); + BB1 = BBs.pop_back_val(); + BB2 = BBs.pop_back_val(); BB = DT->findNearestCommonDominator(BB1, BB2); if (BB == Entry) { InsertPts.insert(&Entry->front()); return InsertPts; } - BBs.erase(BB1); - BBs.erase(BB2); BBs.insert(BB); } assert((BBs.size() == 1) && "Expected only one element."); @@ -403,7 +401,7 @@ void ConstantHoistingPass::collectConstantCandidates( return; // Get offset from the base GV. - PointerType *GVPtrTy = dyn_cast(BaseGV->getType()); + PointerType *GVPtrTy = cast(BaseGV->getType()); IntegerType *PtrIntTy = DL->getIntPtrType(*Ctx, GVPtrTy->getAddressSpace()); APInt Offset(DL->getTypeSizeInBits(PtrIntTy), /*val*/0, /*isSigned*/true); auto *GEPO = cast(ConstExpr); @@ -830,7 +828,7 @@ bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) { SmallVectorImpl &ConstInfoVec = BaseGV ? ConstGEPInfoMap[BaseGV] : ConstIntInfoVec; for (auto const &ConstInfo : ConstInfoVec) { - SmallPtrSet IPSet = findConstantInsertionPoint(ConstInfo); + SetVector IPSet = findConstantInsertionPoint(ConstInfo); // We can have an empty set if the function contains unreachable blocks. if (IPSet.empty()) continue; diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index 770321c740a..e9e6afe3fdd 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -82,7 +82,7 @@ bool ConstantPropagation::runOnFunction(Function &F) { bool Changed = false; const DataLayout &DL = F.getParent()->getDataLayout(); TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); + &getAnalysis().getTLI(F); while (!WorkList.empty()) { SmallVector NewWorkListVec; diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 89497177524..2ef85268df4 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -62,6 +62,23 @@ STATISTIC(NumSDivs, "Number of sdiv converted to udiv"); STATISTIC(NumUDivs, "Number of udivs whose width was decreased"); STATISTIC(NumAShrs, "Number of ashr converted to lshr"); STATISTIC(NumSRems, "Number of srem converted to urem"); +STATISTIC(NumSExt, "Number of sext converted to zext"); +STATISTIC(NumAnd, "Number of ands removed"); +STATISTIC(NumNW, "Number of no-wrap deductions"); +STATISTIC(NumNSW, "Number of no-signed-wrap deductions"); +STATISTIC(NumNUW, "Number of no-unsigned-wrap deductions"); +STATISTIC(NumAddNW, "Number of no-wrap deductions for add"); +STATISTIC(NumAddNSW, "Number of no-signed-wrap deductions for add"); +STATISTIC(NumAddNUW, "Number of no-unsigned-wrap deductions for add"); +STATISTIC(NumSubNW, "Number of no-wrap deductions for sub"); +STATISTIC(NumSubNSW, "Number of no-signed-wrap deductions for sub"); +STATISTIC(NumSubNUW, "Number of no-unsigned-wrap deductions for sub"); +STATISTIC(NumMulNW, "Number of no-wrap deductions for mul"); +STATISTIC(NumMulNSW, "Number of no-signed-wrap deductions for mul"); +STATISTIC(NumMulNUW, "Number of no-unsigned-wrap deductions for mul"); +STATISTIC(NumShlNW, "Number of no-wrap deductions for shl"); +STATISTIC(NumShlNSW, "Number of no-signed-wrap deductions for shl"); +STATISTIC(NumShlNUW, "Number of no-unsigned-wrap deductions for shl"); STATISTIC(NumOverflows, "Number of overflow checks removed"); STATISTIC(NumSaturating, "Number of saturating arithmetics converted to normal arithmetics"); @@ -85,6 +102,7 @@ namespace { AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); } }; @@ -416,37 +434,96 @@ static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI) { return NWRegion.contains(LRange); } -static void processOverflowIntrinsic(WithOverflowInst *WO) { - IRBuilder<> B(WO); - Value *NewOp = B.CreateBinOp( - WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), WO->getName()); - // Constant-folding could have happened. - if (auto *Inst = dyn_cast(NewOp)) { - if (WO->isSigned()) - Inst->setHasNoSignedWrap(); - else - Inst->setHasNoUnsignedWrap(); +static void setDeducedOverflowingFlags(Value *V, Instruction::BinaryOps Opcode, + bool NewNSW, bool NewNUW) { + Statistic *OpcNW, *OpcNSW, *OpcNUW; + switch (Opcode) { + case Instruction::Add: + OpcNW = &NumAddNW; + OpcNSW = &NumAddNSW; + OpcNUW = &NumAddNUW; + break; + case Instruction::Sub: + OpcNW = &NumSubNW; + OpcNSW = &NumSubNSW; + OpcNUW = &NumSubNUW; + break; + case Instruction::Mul: + OpcNW = &NumMulNW; + OpcNSW = &NumMulNSW; + OpcNUW = &NumMulNUW; + break; + case Instruction::Shl: + OpcNW = &NumShlNW; + OpcNSW = &NumShlNSW; + OpcNUW = &NumShlNUW; + break; + default: + llvm_unreachable("Will not be called with other binops"); } - Value *NewI = B.CreateInsertValue(UndefValue::get(WO->getType()), NewOp, 0); - NewI = B.CreateInsertValue(NewI, ConstantInt::getFalse(WO->getContext()), 1); + auto *Inst = dyn_cast(V); + if (NewNSW) { + ++NumNW; + ++*OpcNW; + ++NumNSW; + ++*OpcNSW; + if (Inst) + Inst->setHasNoSignedWrap(); + } + if (NewNUW) { + ++NumNW; + ++*OpcNW; + ++NumNUW; + ++*OpcNUW; + if (Inst) + Inst->setHasNoUnsignedWrap(); + } +} + +static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI); + +// Rewrite this with.overflow intrinsic as non-overflowing. +static void processOverflowIntrinsic(WithOverflowInst *WO, LazyValueInfo *LVI) { + IRBuilder<> B(WO); + Instruction::BinaryOps Opcode = WO->getBinaryOp(); + bool NSW = WO->isSigned(); + bool NUW = !WO->isSigned(); + + Value *NewOp = + B.CreateBinOp(Opcode, WO->getLHS(), WO->getRHS(), WO->getName()); + setDeducedOverflowingFlags(NewOp, Opcode, NSW, NUW); + + StructType *ST = cast(WO->getType()); + Constant *Struct = ConstantStruct::get(ST, + { UndefValue::get(ST->getElementType(0)), + ConstantInt::getFalse(ST->getElementType(1)) }); + Value *NewI = B.CreateInsertValue(Struct, NewOp, 0); WO->replaceAllUsesWith(NewI); WO->eraseFromParent(); ++NumOverflows; + + // See if we can infer the other no-wrap too. + if (auto *BO = dyn_cast(NewOp)) + processBinOp(BO, LVI); } -static void processSaturatingInst(SaturatingInst *SI) { +static void processSaturatingInst(SaturatingInst *SI, LazyValueInfo *LVI) { + Instruction::BinaryOps Opcode = SI->getBinaryOp(); + bool NSW = SI->isSigned(); + bool NUW = !SI->isSigned(); BinaryOperator *BinOp = BinaryOperator::Create( - SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI); + Opcode, SI->getLHS(), SI->getRHS(), SI->getName(), SI); BinOp->setDebugLoc(SI->getDebugLoc()); - if (SI->isSigned()) - BinOp->setHasNoSignedWrap(); - else - BinOp->setHasNoUnsignedWrap(); + setDeducedOverflowingFlags(BinOp, Opcode, NSW, NUW); SI->replaceAllUsesWith(BinOp); SI->eraseFromParent(); ++NumSaturating; + + // See if we can infer the other no-wrap too. + if (auto *BO = dyn_cast(BinOp)) + processBinOp(BO, LVI); } /// Infer nonnull attributes for the arguments at the specified callsite. @@ -456,14 +533,14 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) { if (auto *WO = dyn_cast(CS.getInstruction())) { if (WO->getLHS()->getType()->isIntegerTy() && willNotOverflow(WO, LVI)) { - processOverflowIntrinsic(WO); + processOverflowIntrinsic(WO, LVI); return true; } } if (auto *SI = dyn_cast(CS.getInstruction())) { if (SI->getType()->isIntegerTy() && willNotOverflow(SI, LVI)) { - processSaturatingInst(SI); + processSaturatingInst(SI, LVI); return true; } } @@ -632,6 +709,27 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) { return true; } +static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) { + if (SDI->getType()->isVectorTy()) + return false; + + Value *Base = SDI->getOperand(0); + + Constant *Zero = ConstantInt::get(Base->getType(), 0); + if (LVI->getPredicateAt(ICmpInst::ICMP_SGE, Base, Zero, SDI) != + LazyValueInfo::True) + return false; + + ++NumSExt; + auto *ZExt = + CastInst::CreateZExtOrBitCast(Base, SDI->getType(), SDI->getName(), SDI); + ZExt->setDebugLoc(SDI->getDebugLoc()); + SDI->replaceAllUsesWith(ZExt); + SDI->eraseFromParent(); + + return true; +} + static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) { using OBO = OverflowingBinaryOperator; @@ -648,6 +746,7 @@ static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) { BasicBlock *BB = BinOp->getParent(); + Instruction::BinaryOps Opcode = BinOp->getOpcode(); Value *LHS = BinOp->getOperand(0); Value *RHS = BinOp->getOperand(1); @@ -655,24 +754,48 @@ static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) { ConstantRange RRange = LVI->getConstantRange(RHS, BB, BinOp); bool Changed = false; + bool NewNUW = false, NewNSW = false; if (!NUW) { ConstantRange NUWRange = ConstantRange::makeGuaranteedNoWrapRegion( - BinOp->getOpcode(), RRange, OBO::NoUnsignedWrap); - bool NewNUW = NUWRange.contains(LRange); - BinOp->setHasNoUnsignedWrap(NewNUW); + Opcode, RRange, OBO::NoUnsignedWrap); + NewNUW = NUWRange.contains(LRange); Changed |= NewNUW; } if (!NSW) { ConstantRange NSWRange = ConstantRange::makeGuaranteedNoWrapRegion( - BinOp->getOpcode(), RRange, OBO::NoSignedWrap); - bool NewNSW = NSWRange.contains(LRange); - BinOp->setHasNoSignedWrap(NewNSW); + Opcode, RRange, OBO::NoSignedWrap); + NewNSW = NSWRange.contains(LRange); Changed |= NewNSW; } + setDeducedOverflowingFlags(BinOp, Opcode, NewNSW, NewNUW); + return Changed; } +static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) { + if (BinOp->getType()->isVectorTy()) + return false; + + // Pattern match (and lhs, C) where C includes a superset of bits which might + // be set in lhs. This is a common truncation idiom created by instcombine. + BasicBlock *BB = BinOp->getParent(); + Value *LHS = BinOp->getOperand(0); + ConstantInt *RHS = dyn_cast(BinOp->getOperand(1)); + if (!RHS || !RHS->getValue().isMask()) + return false; + + ConstantRange LRange = LVI->getConstantRange(LHS, BB, BinOp); + if (!LRange.getUnsignedMax().ule(RHS->getValue())) + return false; + + BinOp->replaceAllUsesWith(LHS); + BinOp->eraseFromParent(); + NumAnd++; + return true; +} + + static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) { if (Constant *C = LVI->getConstant(V, At->getParent(), At)) return C; @@ -740,10 +863,18 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT, case Instruction::AShr: BBChanged |= processAShr(cast(II), LVI); break; + case Instruction::SExt: + BBChanged |= processSExt(cast(II), LVI); + break; case Instruction::Add: case Instruction::Sub: + case Instruction::Mul: + case Instruction::Shl: BBChanged |= processBinOp(cast(II), LVI); break; + case Instruction::And: + BBChanged |= processAnd(cast(II), LVI); + break; } } @@ -796,5 +927,6 @@ CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) { PreservedAnalyses PA; PA.preserve(); PA.preserve(); + PA.preserve(); return PA; } diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index 479e0ed7407..a79d775aa7f 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -38,17 +38,19 @@ namespace { //===--------------------------------------------------------------------===// // DeadInstElimination pass implementation // - struct DeadInstElimination : public BasicBlockPass { - static char ID; // Pass identification, replacement for typeid - DeadInstElimination() : BasicBlockPass(ID) { - initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry()); - } - bool runOnBasicBlock(BasicBlock &BB) override { - if (skipBasicBlock(BB)) - return false; - auto *TLIP = getAnalysisIfAvailable(); - TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr; - bool Changed = false; +struct DeadInstElimination : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + DeadInstElimination() : FunctionPass(ID) { + initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + auto *TLIP = getAnalysisIfAvailable(); + TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI(F) : nullptr; + + bool Changed = false; + for (auto &BB : F) { for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) { Instruction *Inst = &*DI++; if (isInstructionTriviallyDead(Inst, TLI)) { @@ -60,13 +62,14 @@ namespace { ++DIEEliminated; } } - return Changed; } + return Changed; + } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); } - }; +}; } char DeadInstElimination::ID = 0; @@ -154,7 +157,7 @@ struct DCELegacyPass : public FunctionPass { return false; auto *TLIP = getAnalysisIfAvailable(); - TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr; + TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI(F) : nullptr; return eliminateDeadCode(F, TLI); } diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index a81645745b4..685de82810e 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1254,8 +1254,9 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, auto *SI = new StoreInst( ConstantInt::get(Earlier->getValueOperand()->getType(), Merged), - Earlier->getPointerOperand(), false, Earlier->getAlignment(), - Earlier->getOrdering(), Earlier->getSyncScopeID(), DepWrite); + Earlier->getPointerOperand(), false, + MaybeAlign(Earlier->getAlignment()), Earlier->getOrdering(), + Earlier->getSyncScopeID(), DepWrite); unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, @@ -1361,7 +1362,7 @@ public: MemoryDependenceResults *MD = &getAnalysis().getMemDep(); const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); + &getAnalysis().getTLI(F); return eliminateDeadStores(F, AA, MD, DT, TLI); } diff --git a/lib/Transforms/Scalar/DivRemPairs.cpp b/lib/Transforms/Scalar/DivRemPairs.cpp index 876681b4f9d..93485350747 100644 --- a/lib/Transforms/Scalar/DivRemPairs.cpp +++ b/lib/Transforms/Scalar/DivRemPairs.cpp @@ -1,4 +1,4 @@ -//===- DivRemPairs.cpp - Hoist/decompose division and remainder -*- C++ -*-===// +//===- DivRemPairs.cpp - Hoist/[dr]ecompose division and remainder --------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This pass hoists and/or decomposes integer division and remainder +// This pass hoists and/or decomposes/recomposes integer division and remainder // instructions to enable CFG improvements and better codegen. // //===----------------------------------------------------------------------===// @@ -19,19 +19,149 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Pass.h" #include "llvm/Support/DebugCounter.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BypassSlowDivision.h" + using namespace llvm; +using namespace llvm::PatternMatch; #define DEBUG_TYPE "div-rem-pairs" STATISTIC(NumPairs, "Number of div/rem pairs"); +STATISTIC(NumRecomposed, "Number of instructions recomposed"); STATISTIC(NumHoisted, "Number of instructions hoisted"); STATISTIC(NumDecomposed, "Number of instructions decomposed"); DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform", "Controls transformations in div-rem-pairs pass"); +namespace { +struct ExpandedMatch { + DivRemMapKey Key; + Instruction *Value; +}; +} // namespace + +/// See if we can match: (which is the form we expand into) +/// X - ((X ?/ Y) * Y) +/// which is equivalent to: +/// X ?% Y +static llvm::Optional matchExpandedRem(Instruction &I) { + Value *Dividend, *XroundedDownToMultipleOfY; + if (!match(&I, m_Sub(m_Value(Dividend), m_Value(XroundedDownToMultipleOfY)))) + return llvm::None; + + Value *Divisor; + Instruction *Div; + // Look for ((X / Y) * Y) + if (!match( + XroundedDownToMultipleOfY, + m_c_Mul(m_CombineAnd(m_IDiv(m_Specific(Dividend), m_Value(Divisor)), + m_Instruction(Div)), + m_Deferred(Divisor)))) + return llvm::None; + + ExpandedMatch M; + M.Key.SignedOp = Div->getOpcode() == Instruction::SDiv; + M.Key.Dividend = Dividend; + M.Key.Divisor = Divisor; + M.Value = &I; + return M; +} + +/// A thin wrapper to store two values that we matched as div-rem pair. +/// We want this extra indirection to avoid dealing with RAUW'ing the map keys. +struct DivRemPairWorklistEntry { + /// The actual udiv/sdiv instruction. Source of truth. + AssertingVH DivInst; + + /// The instruction that we have matched as a remainder instruction. + /// Should only be used as Value, don't introspect it. + AssertingVH RemInst; + + DivRemPairWorklistEntry(Instruction *DivInst_, Instruction *RemInst_) + : DivInst(DivInst_), RemInst(RemInst_) { + assert((DivInst->getOpcode() == Instruction::UDiv || + DivInst->getOpcode() == Instruction::SDiv) && + "Not a division."); + assert(DivInst->getType() == RemInst->getType() && "Types should match."); + // We can't check anything else about remainder instruction, + // it's not strictly required to be a urem/srem. + } + + /// The type for this pair, identical for both the div and rem. + Type *getType() const { return DivInst->getType(); } + + /// Is this pair signed or unsigned? + bool isSigned() const { return DivInst->getOpcode() == Instruction::SDiv; } + + /// In this pair, what are the divident and divisor? + Value *getDividend() const { return DivInst->getOperand(0); } + Value *getDivisor() const { return DivInst->getOperand(1); } + + bool isRemExpanded() const { + switch (RemInst->getOpcode()) { + case Instruction::SRem: + case Instruction::URem: + return false; // single 'rem' instruction - unexpanded form. + default: + return true; // anything else means we have remainder in expanded form. + } + } +}; +using DivRemWorklistTy = SmallVector; + +/// Find matching pairs of integer div/rem ops (they have the same numerator, +/// denominator, and signedness). Place those pairs into a worklist for further +/// processing. This indirection is needed because we have to use TrackingVH<> +/// because we will be doing RAUW, and if one of the rem instructions we change +/// happens to be an input to another div/rem in the maps, we'd have problems. +static DivRemWorklistTy getWorklist(Function &F) { + // Insert all divide and remainder instructions into maps keyed by their + // operands and opcode (signed or unsigned). + DenseMap DivMap; + // Use a MapVector for RemMap so that instructions are moved/inserted in a + // deterministic order. + MapVector RemMap; + for (auto &BB : F) { + for (auto &I : BB) { + if (I.getOpcode() == Instruction::SDiv) + DivMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I; + else if (I.getOpcode() == Instruction::UDiv) + DivMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I; + else if (I.getOpcode() == Instruction::SRem) + RemMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I; + else if (I.getOpcode() == Instruction::URem) + RemMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I; + else if (auto Match = matchExpandedRem(I)) + RemMap[Match->Key] = Match->Value; + } + } + + // We'll accumulate the matching pairs of div-rem instructions here. + DivRemWorklistTy Worklist; + + // We can iterate over either map because we are only looking for matched + // pairs. Choose remainders for efficiency because they are usually even more + // rare than division. + for (auto &RemPair : RemMap) { + // Find the matching division instruction from the division map. + Instruction *DivInst = DivMap[RemPair.first]; + if (!DivInst) + continue; + + // We have a matching pair of div/rem instructions. + NumPairs++; + Instruction *RemInst = RemPair.second; + + // Place it in the worklist. + Worklist.emplace_back(DivInst, RemInst); + } + + return Worklist; +} + /// Find matching pairs of integer div/rem ops (they have the same numerator, /// denominator, and signedness). If they exist in different basic blocks, bring /// them together by hoisting or replace the common division operation that is @@ -50,40 +180,48 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, const DominatorTree &DT) { bool Changed = false; - // Insert all divide and remainder instructions into maps keyed by their - // operands and opcode (signed or unsigned). - DenseMap DivMap; - // Use a MapVector for RemMap so that instructions are moved/inserted in a - // deterministic order. - MapVector RemMap; - for (auto &BB : F) { - for (auto &I : BB) { - if (I.getOpcode() == Instruction::SDiv) - DivMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I; - else if (I.getOpcode() == Instruction::UDiv) - DivMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I; - else if (I.getOpcode() == Instruction::SRem) - RemMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I; - else if (I.getOpcode() == Instruction::URem) - RemMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I; - } - } + // Get the matching pairs of div-rem instructions. We want this extra + // indirection to avoid dealing with having to RAUW the keys of the maps. + DivRemWorklistTy Worklist = getWorklist(F); - // We can iterate over either map because we are only looking for matched - // pairs. Choose remainders for efficiency because they are usually even more - // rare than division. - for (auto &RemPair : RemMap) { - // Find the matching division instruction from the division map. - Instruction *DivInst = DivMap[RemPair.first]; - if (!DivInst) + // Process each entry in the worklist. + for (DivRemPairWorklistEntry &E : Worklist) { + if (!DebugCounter::shouldExecute(DRPCounter)) continue; - // We have a matching pair of div/rem instructions. If one dominates the - // other, hoist and/or replace one. - NumPairs++; - Instruction *RemInst = RemPair.second; - bool IsSigned = DivInst->getOpcode() == Instruction::SDiv; - bool HasDivRemOp = TTI.hasDivRemOp(DivInst->getType(), IsSigned); + bool HasDivRemOp = TTI.hasDivRemOp(E.getType(), E.isSigned()); + + auto &DivInst = E.DivInst; + auto &RemInst = E.RemInst; + + const bool RemOriginallyWasInExpandedForm = E.isRemExpanded(); + (void)RemOriginallyWasInExpandedForm; // suppress unused variable warning + + if (HasDivRemOp && E.isRemExpanded()) { + // The target supports div+rem but the rem is expanded. + // We should recompose it first. + Value *X = E.getDividend(); + Value *Y = E.getDivisor(); + Instruction *RealRem = E.isSigned() ? BinaryOperator::CreateSRem(X, Y) + : BinaryOperator::CreateURem(X, Y); + // Note that we place it right next to the original expanded instruction, + // and letting further handling to move it if needed. + RealRem->setName(RemInst->getName() + ".recomposed"); + RealRem->insertAfter(RemInst); + Instruction *OrigRemInst = RemInst; + // Update AssertingVH<> with new instruction so it doesn't assert. + RemInst = RealRem; + // And replace the original instruction with the new one. + OrigRemInst->replaceAllUsesWith(RealRem); + OrigRemInst->eraseFromParent(); + NumRecomposed++; + // Note that we have left ((X / Y) * Y) around. + // If it had other uses we could rewrite it as X - X % Y + } + + assert((!E.isRemExpanded() || !HasDivRemOp) && + "*If* the target supports div-rem, then by now the RemInst *is* " + "Instruction::[US]Rem."); // If the target supports div+rem and the instructions are in the same block // already, there's nothing to do. The backend should handle this. If the @@ -92,10 +230,16 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, continue; bool DivDominates = DT.dominates(DivInst, RemInst); - if (!DivDominates && !DT.dominates(RemInst, DivInst)) + if (!DivDominates && !DT.dominates(RemInst, DivInst)) { + // We have matching div-rem pair, but they are in two different blocks, + // neither of which dominates one another. + // FIXME: We could hoist both ops to the common predecessor block? continue; + } - if (!DebugCounter::shouldExecute(DRPCounter)) + // The target does not have a single div/rem operation, + // and the rem is already in expanded form. Nothing to do. + if (!HasDivRemOp && E.isRemExpanded()) continue; if (HasDivRemOp) { @@ -107,11 +251,17 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, DivInst->moveAfter(RemInst); NumHoisted++; } else { - // The target does not have a single div/rem operation. Decompose the - // remainder calculation as: + // The target does not have a single div/rem operation, + // and the rem is *not* in a already-expanded form. + // Decompose the remainder calculation as: // X % Y --> X - ((X / Y) * Y). - Value *X = RemInst->getOperand(0); - Value *Y = RemInst->getOperand(1); + + assert(!RemOriginallyWasInExpandedForm && + "We should not be expanding if the rem was in expanded form to " + "begin with."); + + Value *X = E.getDividend(); + Value *Y = E.getDivisor(); Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y); Instruction *Sub = BinaryOperator::CreateSub(X, Mul); @@ -152,8 +302,13 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, // Now kill the explicit remainder. We have replaced it with: // (sub X, (mul (div X, Y), Y) - RemInst->replaceAllUsesWith(Sub); - RemInst->eraseFromParent(); + Sub->setName(RemInst->getName() + ".decomposed"); + Instruction *OrigRemInst = RemInst; + // Update AssertingVH<> with new instruction so it doesn't assert. + RemInst = Sub; + // And replace the original instruction with the new one. + OrigRemInst->replaceAllUsesWith(Sub); + OrigRemInst->eraseFromParent(); NumDecomposed++; } Changed = true; @@ -188,7 +343,7 @@ struct DivRemPairsLegacyPass : public FunctionPass { return optimizeDivRem(F, TTI, DT); } }; -} +} // namespace char DivRemPairsLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs", diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index f1f07525702..ce540683dae 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -108,11 +108,12 @@ struct SimpleValue { // This can only handle non-void readnone functions. if (CallInst *CI = dyn_cast(Inst)) return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy(); - return isa(Inst) || isa(Inst) || - isa(Inst) || isa(Inst) || - isa(Inst) || isa(Inst) || - isa(Inst) || isa(Inst) || - isa(Inst) || isa(Inst); + return isa(Inst) || isa(Inst) || + isa(Inst) || isa(Inst) || + isa(Inst) || isa(Inst) || + isa(Inst) || isa(Inst) || + isa(Inst) || isa(Inst) || + isa(Inst); } }; @@ -240,7 +241,7 @@ static unsigned getHashValueImpl(SimpleValue Val) { assert((isa(Inst) || isa(Inst) || isa(Inst) || isa(Inst) || - isa(Inst)) && + isa(Inst) || isa(Inst)) && "Invalid/unknown instruction"); // Mix in the opcode. @@ -526,7 +527,7 @@ public: const TargetTransformInfo &TTI, DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA) : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA), - MSSAUpdater(llvm::make_unique(MSSA)) {} + MSSAUpdater(std::make_unique(MSSA)) {} bool run(); @@ -651,7 +652,7 @@ private: bool isInvariantLoad() const { if (auto *LI = dyn_cast(Inst)) - return LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr; + return LI->hasMetadata(LLVMContext::MD_invariant_load); return false; } @@ -790,7 +791,7 @@ bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) { // A location loaded from with an invariant_load is assumed to *never* change // within the visible scope of the compilation. if (auto *LI = dyn_cast(I)) - if (LI->getMetadata(LLVMContext::MD_invariant_load)) + if (LI->hasMetadata(LLVMContext::MD_invariant_load)) return true; auto MemLocOpt = MemoryLocation::getOrNone(I); @@ -1359,7 +1360,7 @@ public: if (skipFunction(F)) return false; - auto &TLI = getAnalysis().getTLI(); + auto &TLI = getAnalysis().getTLI(F); auto &TTI = getAnalysis().getTTI(F); auto &DT = getAnalysis().getDomTree(); auto &AC = getAnalysis().getAssumptionCache(F); @@ -1381,6 +1382,7 @@ public: AU.addPreserved(); } AU.addPreserved(); + AU.addPreserved(); AU.setPreservesCFG(); } }; diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp index 31670b1464e..e6abf1ceb02 100644 --- a/lib/Transforms/Scalar/FlattenCFGPass.cpp +++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp @@ -11,10 +11,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" + using namespace llvm; #define DEBUG_TYPE "flattencfg" @@ -52,15 +54,23 @@ FunctionPass *llvm::createFlattenCFGPass() { return new FlattenCFGPass(); } static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) { bool Changed = false; bool LocalChange = true; + + // Use block handles instead of iterating over function blocks directly + // to avoid using iterators invalidated by erasing blocks. + std::vector Blocks; + Blocks.reserve(F.size()); + for (auto &BB : F) + Blocks.push_back(&BB); + while (LocalChange) { LocalChange = false; - // Loop over all of the basic blocks and remove them if they are unneeded... - // - for (Function::iterator BBIt = F.begin(); BBIt != F.end();) { - if (FlattenCFG(&*BBIt++, AA)) { - LocalChange = true; - } + // Loop over all of the basic blocks and try to flatten them. + for (WeakVH &BlockHandle : Blocks) { + // Skip blocks erased by FlattenCFG. + if (auto *BB = cast_or_null(BlockHandle)) + if (FlattenCFG(BB, AA)) + LocalChange = true; } Changed |= LocalChange; } diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp index 4f83e869b30..4d2eac0451d 100644 --- a/lib/Transforms/Scalar/Float2Int.cpp +++ b/lib/Transforms/Scalar/Float2Int.cpp @@ -60,11 +60,13 @@ namespace { if (skipFunction(F)) return false; - return Impl.runImpl(F); + const DominatorTree &DT = getAnalysis().getDomTree(); + return Impl.runImpl(F, DT); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addRequired(); AU.addPreserved(); } @@ -116,21 +118,29 @@ static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) { // Find the roots - instructions that convert from the FP domain to // integer domain. -void Float2IntPass::findRoots(Function &F, SmallPtrSet &Roots) { - for (auto &I : instructions(F)) { - if (isa(I.getType())) +void Float2IntPass::findRoots(Function &F, const DominatorTree &DT, + SmallPtrSet &Roots) { + for (BasicBlock &BB : F) { + // Unreachable code can take on strange forms that we are not prepared to + // handle. For example, an instruction may have itself as an operand. + if (!DT.isReachableFromEntry(&BB)) continue; - switch (I.getOpcode()) { - default: break; - case Instruction::FPToUI: - case Instruction::FPToSI: - Roots.insert(&I); - break; - case Instruction::FCmp: - if (mapFCmpPred(cast(&I)->getPredicate()) != - CmpInst::BAD_ICMP_PREDICATE) + + for (Instruction &I : BB) { + if (isa(I.getType())) + continue; + switch (I.getOpcode()) { + default: break; + case Instruction::FPToUI: + case Instruction::FPToSI: Roots.insert(&I); - break; + break; + case Instruction::FCmp: + if (mapFCmpPred(cast(&I)->getPredicate()) != + CmpInst::BAD_ICMP_PREDICATE) + Roots.insert(&I); + break; + } } } } @@ -503,7 +513,7 @@ void Float2IntPass::cleanup() { I.first->eraseFromParent(); } -bool Float2IntPass::runImpl(Function &F) { +bool Float2IntPass::runImpl(Function &F, const DominatorTree &DT) { LLVM_DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n"); // Clear out all state. ECs = EquivalenceClasses(); @@ -513,7 +523,7 @@ bool Float2IntPass::runImpl(Function &F) { Ctx = &F.getParent()->getContext(); - findRoots(F, Roots); + findRoots(F, DT, Roots); walkBackwards(Roots); walkForwards(); @@ -527,8 +537,9 @@ bool Float2IntPass::runImpl(Function &F) { namespace llvm { FunctionPass *createFloat2IntPass() { return new Float2IntLegacyPass(); } -PreservedAnalyses Float2IntPass::run(Function &F, FunctionAnalysisManager &) { - if (!runImpl(F)) +PreservedAnalyses Float2IntPass::run(Function &F, FunctionAnalysisManager &AM) { + const DominatorTree &DT = AM.getResult(F); + if (!runImpl(F, DT)) return PreservedAnalyses::all(); PreservedAnalyses PA; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 1a02e9d33f4..743353eaea2 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -70,6 +70,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -626,6 +627,8 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) { PA.preserve(); PA.preserve(); PA.preserve(); + if (LI) + PA.preserve(); return PA; } @@ -1161,15 +1164,30 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // Do PHI translation to get its value in the predecessor if necessary. The // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. + // We do the translation for each edge we skipped by going from LI's block + // to LoadBB, otherwise we might miss pieces needing translation. // If all preds have a single successor, then we know it is safe to insert // the load on the pred (?!?), so we can insert code to materialize the // pointer if it is not available. - PHITransAddr Address(LI->getPointerOperand(), DL, AC); - Value *LoadPtr = nullptr; - LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, - *DT, NewInsts); + Value *LoadPtr = LI->getPointerOperand(); + BasicBlock *Cur = LI->getParent(); + while (Cur != LoadBB) { + PHITransAddr Address(LoadPtr, DL, AC); + LoadPtr = Address.PHITranslateWithInsertion( + Cur, Cur->getSinglePredecessor(), *DT, NewInsts); + if (!LoadPtr) { + CanDoPRE = false; + break; + } + Cur = Cur->getSinglePredecessor(); + } + if (LoadPtr) { + PHITransAddr Address(LoadPtr, DL, AC); + LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, *DT, + NewInsts); + } // If we couldn't find or insert a computation of this phi translated value, // we fail PRE. if (!LoadPtr) { @@ -1184,8 +1202,12 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, if (!CanDoPRE) { while (!NewInsts.empty()) { - Instruction *I = NewInsts.pop_back_val(); - markInstructionForDeletion(I); + // Erase instructions generated by the failed PHI translation before + // trying to number them. PHI translation might insert instructions + // in basic blocks other than the current one, and we delete them + // directly, as markInstructionForDeletion only allows removing from the + // current basic block. + NewInsts.pop_back_val()->eraseFromParent(); } // HINT: Don't revert the edge-splitting as following transformation may // also need to split these critical edges. @@ -1219,10 +1241,10 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, BasicBlock *UnavailablePred = PredLoad.first; Value *LoadPtr = PredLoad.second; - auto *NewLoad = - new LoadInst(LI->getType(), LoadPtr, LI->getName() + ".pre", - LI->isVolatile(), LI->getAlignment(), LI->getOrdering(), - LI->getSyncScopeID(), UnavailablePred->getTerminator()); + auto *NewLoad = new LoadInst( + LI->getType(), LoadPtr, LI->getName() + ".pre", LI->isVolatile(), + MaybeAlign(LI->getAlignment()), LI->getOrdering(), LI->getSyncScopeID(), + UnavailablePred->getTerminator()); NewLoad->setDebugLoc(LI->getDebugLoc()); // Transfer the old load's AA tags to the new load. @@ -1365,6 +1387,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks); } +static bool hasUsersIn(Value *V, BasicBlock *BB) { + for (User *U : V->users()) + if (isa(U) && + cast(U)->getParent() == BB) + return true; + return false; +} + bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { assert(IntrinsicI->getIntrinsicID() == Intrinsic::assume && "This function can only be called with llvm.assume intrinsic"); @@ -1403,12 +1433,23 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { // We can replace assume value with true, which covers cases like this: // call void @llvm.assume(i1 %cmp) // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true - ReplaceWithConstMap[V] = True; + ReplaceOperandsWithMap[V] = True; - // If one of *cmp *eq operand is const, adding it to map will cover this: + // If we find an equality fact, canonicalize all dominated uses in this block + // to one of the two values. We heuristically choice the "oldest" of the + // two where age is determined by value number. (Note that propagateEquality + // above handles the cross block case.) + // + // Key case to cover are: + // 1) // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen // call void @llvm.assume(i1 %cmp) // ret float %0 ; will change it to ret float 3.000000e+00 + // 2) + // %load = load float, float* %addr + // %cmp = fcmp oeq float %load, %0 + // call void @llvm.assume(i1 %cmp) + // ret float %load ; will change it to ret float %0 if (auto *CmpI = dyn_cast(V)) { if (CmpI->getPredicate() == CmpInst::Predicate::ICMP_EQ || CmpI->getPredicate() == CmpInst::Predicate::FCMP_OEQ || @@ -1416,13 +1457,50 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { CmpI->getFastMathFlags().noNaNs())) { Value *CmpLHS = CmpI->getOperand(0); Value *CmpRHS = CmpI->getOperand(1); - if (isa(CmpLHS)) + // Heuristically pick the better replacement -- the choice of heuristic + // isn't terribly important here, but the fact we canonicalize on some + // replacement is for exposing other simplifications. + // TODO: pull this out as a helper function and reuse w/existing + // (slightly different) logic. + if (isa(CmpLHS) && !isa(CmpRHS)) std::swap(CmpLHS, CmpRHS); - auto *RHSConst = dyn_cast(CmpRHS); + if (!isa(CmpLHS) && isa(CmpRHS)) + std::swap(CmpLHS, CmpRHS); + if ((isa(CmpLHS) && isa(CmpRHS)) || + (isa(CmpLHS) && isa(CmpRHS))) { + // Move the 'oldest' value to the right-hand side, using the value + // number as a proxy for age. + uint32_t LVN = VN.lookupOrAdd(CmpLHS); + uint32_t RVN = VN.lookupOrAdd(CmpRHS); + if (LVN < RVN) + std::swap(CmpLHS, CmpRHS); + } - // If only one operand is constant. - if (RHSConst != nullptr && !isa(CmpLHS)) - ReplaceWithConstMap[CmpLHS] = RHSConst; + // Handle degenerate case where we either haven't pruned a dead path or a + // removed a trivial assume yet. + if (isa(CmpLHS) && isa(CmpRHS)) + return Changed; + + // +0.0 and -0.0 compare equal, but do not imply equivalence. Unless we + // can prove equivalence, bail. + if (CmpRHS->getType()->isFloatTy() && + (!isa(CmpRHS) || cast(CmpRHS)->isZero())) + return Changed; + + LLVM_DEBUG(dbgs() << "Replacing dominated uses of " + << *CmpLHS << " with " + << *CmpRHS << " in block " + << IntrinsicI->getParent()->getName() << "\n"); + + + // Setup the replacement map - this handles uses within the same block + if (hasUsersIn(CmpLHS, IntrinsicI->getParent())) + ReplaceOperandsWithMap[CmpLHS] = CmpRHS; + + // NOTE: The non-block local cases are handled by the call to + // propagateEquality above; this block is just about handling the block + // local cases. TODO: There's a bunch of logic in propagateEqualiy which + // isn't duplicated for the block local case, can we share it somehow? } } return Changed; @@ -1522,6 +1600,41 @@ uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred, return NewNum; } +// Return true if the value number \p Num and NewNum have equal value. +// Return false if the result is unknown. +bool GVN::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum, + const BasicBlock *Pred, + const BasicBlock *PhiBlock, GVN &Gvn) { + CallInst *Call = nullptr; + LeaderTableEntry *Vals = &Gvn.LeaderTable[Num]; + while (Vals) { + Call = dyn_cast(Vals->Val); + if (Call && Call->getParent() == PhiBlock) + break; + Vals = Vals->Next; + } + + if (AA->doesNotAccessMemory(Call)) + return true; + + if (!MD || !AA->onlyReadsMemory(Call)) + return false; + + MemDepResult local_dep = MD->getDependency(Call); + if (!local_dep.isNonLocal()) + return false; + + const MemoryDependenceResults::NonLocalDepInfo &deps = + MD->getNonLocalCallDependency(Call); + + // Check to see if the Call has no function local clobber. + for (unsigned i = 0; i < deps.size(); i++) { + if (deps[i].getResult().isNonFuncLocal()) + return true; + } + return false; +} + /// Translate value number \p Num using phis, so that it has the values of /// the phis in BB. uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred, @@ -1568,8 +1681,11 @@ uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred, } } - if (uint32_t NewNum = expressionNumbering[Exp]) + if (uint32_t NewNum = expressionNumbering[Exp]) { + if (Exp.opcode == Instruction::Call && NewNum != Num) + return areCallValsEqual(Num, NewNum, Pred, PhiBlock, Gvn) ? NewNum : Num; return NewNum; + } return Num; } @@ -1637,16 +1753,12 @@ void GVN::assignBlockRPONumber(Function &F) { InvalidBlockRPONumbers = false; } -// Tries to replace instruction with const, using information from -// ReplaceWithConstMap. -bool GVN::replaceOperandsWithConsts(Instruction *Instr) const { +bool GVN::replaceOperandsForInBlockEquality(Instruction *Instr) const { bool Changed = false; for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) { - Value *Operand = Instr->getOperand(OpNum); - auto it = ReplaceWithConstMap.find(Operand); - if (it != ReplaceWithConstMap.end()) { - assert(!isa(Operand) && - "Replacing constants with constants is invalid"); + Value *Operand = Instr->getOperand(OpNum); + auto it = ReplaceOperandsWithMap.find(Operand); + if (it != ReplaceOperandsWithMap.end()) { LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand << " with " << *it->second << " in instruction " << *Instr << '\n'); Instr->setOperand(OpNum, it->second); @@ -1976,6 +2088,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, MD = RunMD; ImplicitControlFlowTracking ImplicitCFT(DT); ICF = &ImplicitCFT; + this->LI = LI; VN.setMemDep(MD); ORE = RunORE; InvalidBlockRPONumbers = true; @@ -2037,13 +2150,13 @@ bool GVN::processBlock(BasicBlock *BB) { return false; // Clearing map before every BB because it can be used only for single BB. - ReplaceWithConstMap.clear(); + ReplaceOperandsWithMap.clear(); bool ChangedFunction = false; for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - if (!ReplaceWithConstMap.empty()) - ChangedFunction |= replaceOperandsWithConsts(&*BI); + if (!ReplaceOperandsWithMap.empty()) + ChangedFunction |= replaceOperandsForInBlockEquality(&*BI); ChangedFunction |= processInstruction(&*BI); if (InstrsToErase.empty()) { @@ -2335,7 +2448,7 @@ bool GVN::performPRE(Function &F) { /// the block inserted to the critical edge. BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) { BasicBlock *BB = - SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT)); + SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT, LI)); if (MD) MD->invalidateCachedPredecessors(); InvalidBlockRPONumbers = true; @@ -2350,7 +2463,7 @@ bool GVN::splitCriticalEdges() { do { std::pair Edge = toSplit.pop_back_val(); SplitCriticalEdge(Edge.first, Edge.second, - CriticalEdgeSplittingOptions(DT)); + CriticalEdgeSplittingOptions(DT, LI)); } while (!toSplit.empty()); if (MD) MD->invalidateCachedPredecessors(); InvalidBlockRPONumbers = true; @@ -2456,18 +2569,26 @@ void GVN::addDeadBlock(BasicBlock *BB) { if (DeadBlocks.count(B)) continue; + // First, split the critical edges. This might also create additional blocks + // to preserve LoopSimplify form and adjust edges accordingly. SmallVector Preds(pred_begin(B), pred_end(B)); for (BasicBlock *P : Preds) { if (!DeadBlocks.count(P)) continue; - if (isCriticalEdge(P->getTerminator(), GetSuccessorNumber(P, B))) { + if (llvm::any_of(successors(P), + [B](BasicBlock *Succ) { return Succ == B; }) && + isCriticalEdge(P->getTerminator(), B)) { if (BasicBlock *S = splitCriticalEdges(P, B)) DeadBlocks.insert(P = S); } + } - for (BasicBlock::iterator II = B->begin(); isa(II); ++II) { - PHINode &Phi = cast(*II); + // Now undef the incoming values from the dead predecessors. + for (BasicBlock *P : predecessors(B)) { + if (!DeadBlocks.count(P)) + continue; + for (PHINode &Phi : B->phis()) { Phi.setIncomingValueForBlock(P, UndefValue::get(Phi.getType())); if (MD) MD->invalidateCachedPointerInfo(&Phi); @@ -2544,10 +2665,11 @@ public: return Impl.runImpl( F, getAnalysis().getAssumptionCache(F), getAnalysis().getDomTree(), - getAnalysis().getTLI(), + getAnalysis().getTLI(F), getAnalysis().getAAResults(), - NoMemDepAnalysis ? nullptr - : &getAnalysis().getMemDep(), + NoMemDepAnalysis + ? nullptr + : &getAnalysis().getMemDep(), LIWP ? &LIWP->getLoopInfo() : nullptr, &getAnalysis().getORE()); } @@ -2556,6 +2678,7 @@ public: AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); if (!NoMemDepAnalysis) AU.addRequired(); AU.addRequired(); @@ -2563,6 +2686,8 @@ public: AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); + AU.addPreservedID(LoopSimplifyID); AU.addRequired(); } diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp index 7614599653c..c87e41484b1 100644 --- a/lib/Transforms/Scalar/GVNHoist.cpp +++ b/lib/Transforms/Scalar/GVNHoist.cpp @@ -257,7 +257,7 @@ public: GVNHoist(DominatorTree *DT, PostDominatorTree *PDT, AliasAnalysis *AA, MemoryDependenceResults *MD, MemorySSA *MSSA) : DT(DT), PDT(PDT), AA(AA), MD(MD), MSSA(MSSA), - MSSAUpdater(llvm::make_unique(MSSA)) {} + MSSAUpdater(std::make_unique(MSSA)) {} bool run(Function &F) { NumFuncArgs = F.arg_size(); @@ -539,7 +539,7 @@ private: // Check for unsafe hoistings due to side effects. if (K == InsKind::Store) { - if (hasEHOrLoadsOnPath(NewPt, dyn_cast(U), NBBsOnAllPaths)) + if (hasEHOrLoadsOnPath(NewPt, cast(U), NBBsOnAllPaths)) return false; } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths)) return false; @@ -889,19 +889,18 @@ private: void updateAlignment(Instruction *I, Instruction *Repl) { if (auto *ReplacementLoad = dyn_cast(Repl)) { - ReplacementLoad->setAlignment( - std::min(ReplacementLoad->getAlignment(), - cast(I)->getAlignment())); + ReplacementLoad->setAlignment(MaybeAlign(std::min( + ReplacementLoad->getAlignment(), cast(I)->getAlignment()))); ++NumLoadsRemoved; } else if (auto *ReplacementStore = dyn_cast(Repl)) { ReplacementStore->setAlignment( - std::min(ReplacementStore->getAlignment(), - cast(I)->getAlignment())); + MaybeAlign(std::min(ReplacementStore->getAlignment(), + cast(I)->getAlignment()))); ++NumStoresRemoved; } else if (auto *ReplacementAlloca = dyn_cast(Repl)) { ReplacementAlloca->setAlignment( - std::max(ReplacementAlloca->getAlignment(), - cast(I)->getAlignment())); + MaybeAlign(std::max(ReplacementAlloca->getAlignment(), + cast(I)->getAlignment()))); } else if (isa(Repl)) { ++NumCallsRemoved; } diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp index e14f44bb706..2697d780956 100644 --- a/lib/Transforms/Scalar/GuardWidening.cpp +++ b/lib/Transforms/Scalar/GuardWidening.cpp @@ -591,7 +591,7 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1, else Result = RC.getCheckInst(); } - + assert(Result && "Failed to find result value"); Result->setName("wide.chk"); } return true; diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index f9fc698a4a9..5519a00c12c 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -124,6 +124,11 @@ static cl::opt DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization")); +static cl::opt +LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(false), + cl::desc("Predicate conditions in read only loops")); + + namespace { struct RewritePhi; @@ -144,7 +149,11 @@ class IndVarSimplify { bool rewriteNonIntegerIVs(Loop *L); bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); - bool optimizeLoopExits(Loop *L); + /// Try to eliminate loop exits based on analyzeable exit counts + bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); + /// Try to form loop invariant tests for loop exits by changing how many + /// iterations of the loop run when that is unobservable. + bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); bool canLoopBeDeleted(Loop *L, SmallVector &RewritePhiSet); bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); @@ -628,12 +637,30 @@ bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { // Okay, this instruction has a user outside of the current loop // and varies predictably *inside* the loop. Evaluate the value it - // contains when the loop exits, if possible. + // contains when the loop exits, if possible. We prefer to start with + // expressions which are true for all exits (so as to maximize + // expression reuse by the SCEVExpander), but resort to per-exit + // evaluation if that fails. const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); - if (!SE->isLoopInvariant(ExitValue, L) || - !isSafeToExpand(ExitValue, *SE)) - continue; - + if (isa(ExitValue) || + !SE->isLoopInvariant(ExitValue, L) || + !isSafeToExpand(ExitValue, *SE)) { + // TODO: This should probably be sunk into SCEV in some way; maybe a + // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for + // most SCEV expressions and other recurrence types (e.g. shift + // recurrences). Is there existing code we can reuse? + const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i)); + if (isa(ExitCount)) + continue; + if (auto *AddRec = dyn_cast(SE->getSCEV(Inst))) + if (AddRec->getLoop() == L) + ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); + if (isa(ExitValue) || + !SE->isLoopInvariant(ExitValue, L) || + !isSafeToExpand(ExitValue, *SE)) + continue; + } + // Computing the value outside of the loop brings no benefit if it is // definitely used inside the loop in a way which can not be optimized // away. Avoid doing so unless we know we have a value which computes @@ -804,7 +831,7 @@ bool IndVarSimplify::canLoopBeDeleted( L->getExitingBlocks(ExitingBlocks); SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); - if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1) + if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1) return false; BasicBlock *ExitBlock = ExitBlocks[0]; @@ -1654,6 +1681,10 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { return nullptr; } + // if we reached this point then we are going to replace + // DU.NarrowUse with WideUse. Reattach DbgValue then. + replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); + ExtendKindMap[DU.NarrowUse] = WideAddRec.second; // Returning WideUse pushes it on the worklist. return WideUse; @@ -1779,14 +1810,9 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { DeadInsts.emplace_back(DU.NarrowDef); } - // Attach any debug information to the new PHI. Since OrigPhi and WidePHI - // evaluate the same recurrence, we can just copy the debug info over. - SmallVector DbgValues; - llvm::findDbgValues(DbgValues, OrigPhi); - auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(), - ValueAsMetadata::get(WidePhi)); - for (auto &DbgValue : DbgValues) - DbgValue->setOperand(0, MDPhi); + // Attach any debug information to the new PHI. + replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); + return WidePhi; } @@ -1817,8 +1843,8 @@ void WidenIV::calculatePostIncRange(Instruction *NarrowDef, auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); auto CmpConstrainedLHSRange = ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); - auto NarrowDefRange = - CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS); + auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( + *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); }; @@ -2242,8 +2268,8 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) continue; - const auto *AR = dyn_cast(SE->getSCEV(Phi)); - + const auto *AR = cast(SE->getSCEV(Phi)); + // AR may be a pointer type, while BECount is an integer type. // AR may be wider than BECount. With eq/ne tests overflow is immaterial. // AR may not be a narrower type, or we may never exit. @@ -2624,74 +2650,125 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { return MadeAnyChanges; } -bool IndVarSimplify::optimizeLoopExits(Loop *L) { +/// Return a symbolic upper bound for the backedge taken count of the loop. +/// This is more general than getConstantMaxBackedgeTakenCount as it returns +/// an arbitrary expression as opposed to only constants. +/// TODO: Move into the ScalarEvolution class. +static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE, + DominatorTree &DT, Loop *L) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); // Form an expression for the maximum exit count possible for this loop. We // merge the max and exact information to approximate a version of - // getMaxBackedgeTakenInfo which isn't restricted to just constants. - // TODO: factor this out as a version of getMaxBackedgeTakenCount which - // isn't guaranteed to return a constant. + // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. SmallVector ExitCounts; - const SCEV *MaxConstEC = SE->getMaxBackedgeTakenCount(L); + const SCEV *MaxConstEC = SE.getConstantMaxBackedgeTakenCount(L); if (!isa(MaxConstEC)) ExitCounts.push_back(MaxConstEC); for (BasicBlock *ExitingBB : ExitingBlocks) { - const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); + const SCEV *ExitCount = SE.getExitCount(L, ExitingBB); if (!isa(ExitCount)) { - assert(DT->dominates(ExitingBB, L->getLoopLatch()) && + assert(DT.dominates(ExitingBB, L->getLoopLatch()) && "We should only have known counts for exiting blocks that " "dominate latch!"); ExitCounts.push_back(ExitCount); } } if (ExitCounts.empty()) - return false; - const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts); + return SE.getCouldNotCompute(); + return SE.getUMinFromMismatchedTypes(ExitCounts); +} - bool Changed = false; - for (BasicBlock *ExitingBB : ExitingBlocks) { +bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + // Remove all exits which aren't both rewriteable and analyzeable. + auto NewEnd = llvm::remove_if(ExitingBlocks, + [&](BasicBlock *ExitingBB) { // If our exitting block exits multiple loops, we can only rewrite the // innermost one. Otherwise, we're changing how many times the innermost // loop runs before it exits. if (LI->getLoopFor(ExitingBB) != L) - continue; + return true; // Can't rewrite non-branch yet. BranchInst *BI = dyn_cast(ExitingBB->getTerminator()); if (!BI) - continue; + return true; // If already constant, nothing to do. if (isa(BI->getCondition())) - continue; + return true; const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); if (isa(ExitCount)) - continue; + return true; + return false; + }); + ExitingBlocks.erase(NewEnd, ExitingBlocks.end()); + if (ExitingBlocks.empty()) + return false; + + // Get a symbolic upper bound on the loop backedge taken count. + const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L); + if (isa(MaxExitCount)) + return false; + + // Visit our exit blocks in order of dominance. We know from the fact that + // all exits (left) are analyzeable that the must be a total dominance order + // between them as each must dominate the latch. The visit order only + // matters for the provably equal case. + llvm::sort(ExitingBlocks, + [&](BasicBlock *A, BasicBlock *B) { + // std::sort sorts in ascending order, so we want the inverse of + // the normal dominance relation. + if (DT->properlyDominates(A, B)) return true; + if (DT->properlyDominates(B, A)) return false; + llvm_unreachable("expected total dominance order!"); + }); +#ifdef ASSERT + for (unsigned i = 1; i < ExitingBlocks.size(); i++) { + assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); + } +#endif + + auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) { + BranchInst *BI = cast(ExitingBB->getTerminator()); + bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); + auto *OldCond = BI->getCondition(); + auto *NewCond = ConstantInt::get(OldCond->getType(), + IsTaken ? ExitIfTrue : !ExitIfTrue); + BI->setCondition(NewCond); + if (OldCond->use_empty()) + DeadInsts.push_back(OldCond); + }; + + bool Changed = false; + SmallSet DominatingExitCounts; + for (BasicBlock *ExitingBB : ExitingBlocks) { + const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); + assert(!isa(ExitCount) && "checked above"); + // If we know we'd exit on the first iteration, rewrite the exit to // reflect this. This does not imply the loop must exit through this // exit; there may be an earlier one taken on the first iteration. // TODO: Given we know the backedge can't be taken, we should go ahead // and break it. Or at least, kill all the header phis and simplify. if (ExitCount->isZero()) { - bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); - auto *OldCond = BI->getCondition(); - auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) : - ConstantInt::getFalse(OldCond->getType()); - BI->setCondition(NewCond); - if (OldCond->use_empty()) - DeadInsts.push_back(OldCond); + FoldExit(ExitingBB, true); Changed = true; continue; } - // If we end up with a pointer exit count, bail. + // If we end up with a pointer exit count, bail. Note that we can end up + // with a pointer exit count for one exiting block, and not for another in + // the same loop. if (!ExitCount->getType()->isIntegerTy() || !MaxExitCount->getType()->isIntegerTy()) - return false; + continue; Type *WiderType = SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); @@ -2700,35 +2777,198 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L) { assert(MaxExitCount->getType() == ExitCount->getType()); // Can we prove that some other exit must be taken strictly before this - // one? TODO: handle cases where ule is known, and equality is covered - // by a dominating exit + // one? if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxExitCount, ExitCount)) { - bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); - auto *OldCond = BI->getCondition(); - auto *NewCond = ExitIfTrue ? ConstantInt::getFalse(OldCond->getType()) : - ConstantInt::getTrue(OldCond->getType()); - BI->setCondition(NewCond); - if (OldCond->use_empty()) - DeadInsts.push_back(OldCond); + FoldExit(ExitingBB, false); Changed = true; continue; } - // TODO: If we can prove that the exiting iteration is equal to the exit - // count for this exit and that no previous exit oppurtunities exist within - // the loop, then we can discharge all other exits. (May fall out of - // previous TODO.) - - // TODO: If we can't prove any relation between our exit count and the - // loops exit count, but taking this exit doesn't require actually running - // the loop (i.e. no side effects, no computed values used in exit), then - // we can replace the exit test with a loop invariant test which exits on - // the first iteration. + // As we run, keep track of which exit counts we've encountered. If we + // find a duplicate, we've found an exit which would have exited on the + // exiting iteration, but (from the visit order) strictly follows another + // which does the same and is thus dead. + if (!DominatingExitCounts.insert(ExitCount).second) { + FoldExit(ExitingBB, false); + Changed = true; + continue; + } + + // TODO: There might be another oppurtunity to leverage SCEV's reasoning + // here. If we kept track of the min of dominanting exits so far, we could + // discharge exits with EC >= MDEC. This is less powerful than the existing + // transform (since later exits aren't considered), but potentially more + // powerful for any case where SCEV can prove a >=u b, but neither a == b + // or a >u b. Such a case is not currently known. } return Changed; } +bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + bool Changed = false; + + // Finally, see if we can rewrite our exit conditions into a loop invariant + // form. If we have a read-only loop, and we can tell that we must exit down + // a path which does not need any of the values computed within the loop, we + // can rewrite the loop to exit on the first iteration. Note that this + // doesn't either a) tell us the loop exits on the first iteration (unless + // *all* exits are predicateable) or b) tell us *which* exit might be taken. + // This transformation looks a lot like a restricted form of dead loop + // elimination, but restricted to read-only loops and without neccesssarily + // needing to kill the loop entirely. + if (!LoopPredication) + return Changed; + + if (!SE->hasLoopInvariantBackedgeTakenCount(L)) + return Changed; + + // Note: ExactBTC is the exact backedge taken count *iff* the loop exits + // through *explicit* control flow. We have to eliminate the possibility of + // implicit exits (see below) before we know it's truly exact. + const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); + if (isa(ExactBTC) || + !SE->isLoopInvariant(ExactBTC, L) || + !isSafeToExpand(ExactBTC, *SE)) + return Changed; + + auto BadExit = [&](BasicBlock *ExitingBB) { + // If our exiting block exits multiple loops, we can only rewrite the + // innermost one. Otherwise, we're changing how many times the innermost + // loop runs before it exits. + if (LI->getLoopFor(ExitingBB) != L) + return true; + + // Can't rewrite non-branch yet. + BranchInst *BI = dyn_cast(ExitingBB->getTerminator()); + if (!BI) + return true; + + // If already constant, nothing to do. + if (isa(BI->getCondition())) + return true; + + // If the exit block has phis, we need to be able to compute the values + // within the loop which contains them. This assumes trivially lcssa phis + // have already been removed; TODO: generalize + BasicBlock *ExitBlock = + BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); + if (!ExitBlock->phis().empty()) + return true; + + const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); + assert(!isa(ExactBTC) && "implied by having exact trip count"); + if (!SE->isLoopInvariant(ExitCount, L) || + !isSafeToExpand(ExitCount, *SE)) + return true; + + return false; + }; + + // If we have any exits which can't be predicated themselves, than we can't + // predicate any exit which isn't guaranteed to execute before it. Consider + // two exits (a) and (b) which would both exit on the same iteration. If we + // can predicate (b), but not (a), and (a) preceeds (b) along some path, then + // we could convert a loop from exiting through (a) to one exiting through + // (b). Note that this problem exists only for exits with the same exit + // count, and we could be more aggressive when exit counts are known inequal. + llvm::sort(ExitingBlocks, + [&](BasicBlock *A, BasicBlock *B) { + // std::sort sorts in ascending order, so we want the inverse of + // the normal dominance relation, plus a tie breaker for blocks + // unordered by dominance. + if (DT->properlyDominates(A, B)) return true; + if (DT->properlyDominates(B, A)) return false; + return A->getName() < B->getName(); + }); + // Check to see if our exit blocks are a total order (i.e. a linear chain of + // exits before the backedge). If they aren't, reasoning about reachability + // is complicated and we choose not to for now. + for (unsigned i = 1; i < ExitingBlocks.size(); i++) + if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) + return Changed; + + // Given our sorted total order, we know that exit[j] must be evaluated + // after all exit[i] such j > i. + for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) + if (BadExit(ExitingBlocks[i])) { + ExitingBlocks.resize(i); + break; + } + + if (ExitingBlocks.empty()) + return Changed; + + // We rely on not being able to reach an exiting block on a later iteration + // then it's statically compute exit count. The implementaton of + // getExitCount currently has this invariant, but assert it here so that + // breakage is obvious if this ever changes.. + assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { + return DT->dominates(ExitingBB, L->getLoopLatch()); + })); + + // At this point, ExitingBlocks consists of only those blocks which are + // predicatable. Given that, we know we have at least one exit we can + // predicate if the loop is doesn't have side effects and doesn't have any + // implicit exits (because then our exact BTC isn't actually exact). + // @Reviewers - As structured, this is O(I^2) for loop nests. Any + // suggestions on how to improve this? I can obviously bail out for outer + // loops, but that seems less than ideal. MemorySSA can find memory writes, + // is that enough for *all* side effects? + for (BasicBlock *BB : L->blocks()) + for (auto &I : *BB) + // TODO:isGuaranteedToTransfer + if (I.mayHaveSideEffects() || I.mayThrow()) + return Changed; + + // Finally, do the actual predication for all predicatable blocks. A couple + // of notes here: + // 1) We don't bother to constant fold dominated exits with identical exit + // counts; that's simply a form of CSE/equality propagation and we leave + // it for dedicated passes. + // 2) We insert the comparison at the branch. Hoisting introduces additional + // legality constraints and we leave that to dedicated logic. We want to + // predicate even if we can't insert a loop invariant expression as + // peeling or unrolling will likely reduce the cost of the otherwise loop + // varying check. + Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); + IRBuilder<> B(L->getLoopPreheader()->getTerminator()); + Value *ExactBTCV = nullptr; //lazy generated if needed + for (BasicBlock *ExitingBB : ExitingBlocks) { + const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); + + auto *BI = cast(ExitingBB->getTerminator()); + Value *NewCond; + if (ExitCount == ExactBTC) { + NewCond = L->contains(BI->getSuccessor(0)) ? + B.getFalse() : B.getTrue(); + } else { + Value *ECV = Rewriter.expandCodeFor(ExitCount); + if (!ExactBTCV) + ExactBTCV = Rewriter.expandCodeFor(ExactBTC); + Value *RHS = ExactBTCV; + if (ECV->getType() != RHS->getType()) { + Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); + ECV = B.CreateZExt(ECV, WiderTy); + RHS = B.CreateZExt(RHS, WiderTy); + } + auto Pred = L->contains(BI->getSuccessor(0)) ? + ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; + NewCond = B.CreateICmp(Pred, ECV, RHS); + } + Value *OldCond = BI->getCondition(); + BI->setCondition(NewCond); + if (OldCond->use_empty()) + DeadInsts.push_back(OldCond); + Changed = true; + } + + return Changed; +} + //===----------------------------------------------------------------------===// // IndVarSimplify driver. Manage several subpasses of IV simplification. //===----------------------------------------------------------------------===// @@ -2755,7 +2995,10 @@ bool IndVarSimplify::run(Loop *L) { // transform them to use integer recurrences. Changed |= rewriteNonIntegerIVs(L); +#ifndef NDEBUG + // Used below for a consistency check only const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); +#endif // Create a rewriter object which we'll use to transform the code with. SCEVExpander Rewriter(*SE, DL, "indvars"); @@ -2772,20 +3015,22 @@ bool IndVarSimplify::run(Loop *L) { Rewriter.disableCanonicalMode(); Changed |= simplifyAndExtend(L, Rewriter, LI); - // Check to see if this loop has a computable loop-invariant execution count. - // If so, this means that we can compute the final value of any expressions + // Check to see if we can compute the final value of any expressions // that are recurrent in the loop, and substitute the exit values from the - // loop into any instructions outside of the loop that use the final values of - // the current expressions. - // - if (ReplaceExitValue != NeverRepl && - !isa(BackedgeTakenCount)) + // loop into any instructions outside of the loop that use the final values + // of the current expressions. + if (ReplaceExitValue != NeverRepl) Changed |= rewriteLoopExitValues(L, Rewriter); // Eliminate redundant IV cycles. NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); - Changed |= optimizeLoopExits(L); + // Try to eliminate loop exits based on analyzeable exit counts + Changed |= optimizeLoopExits(L, Rewriter); + + // Try to form loop invariant tests for loop exits by changing how many + // iterations of the loop run when that is unobservable. + Changed |= predicateLoopExits(L, Rewriter); // If we have a trip count expression, rewrite the loop's exit condition // using it. @@ -2825,7 +3070,7 @@ bool IndVarSimplify::run(Loop *L) { // that our definition of "high cost" is not exactly principled. if (Rewriter.isHighCostExpansion(ExitCount, L)) continue; - + // Check preconditions for proper SCEVExpander operation. SCEV does not // express SCEVExpander's dependencies, such as LoopSimplify. Instead // any pass that uses the SCEVExpander must do it. This does not work @@ -2924,7 +3169,7 @@ struct IndVarSimplifyLegacyPass : public LoopPass { auto *SE = &getAnalysis().getSE(); auto *DT = &getAnalysis().getDomTree(); auto *TLIP = getAnalysisIfAvailable(); - auto *TLI = TLIP ? &TLIP->getTLI() : nullptr; + auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; auto *TTIP = getAnalysisIfAvailable(); auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp index 5f0e2001c73..e7e73a132fb 100644 --- a/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -141,6 +141,8 @@ using ValueToAddrSpaceMapTy = DenseMap; /// InferAddressSpaces class InferAddressSpaces : public FunctionPass { + const TargetTransformInfo *TTI; + /// Target specific address space which uses of should be replaced if /// possible. unsigned FlatAddrSpace; @@ -264,17 +266,6 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II, Module *M = II->getParent()->getParent()->getParent(); switch (II->getIntrinsicID()) { - case Intrinsic::amdgcn_atomic_inc: - case Intrinsic::amdgcn_atomic_dec: - case Intrinsic::amdgcn_ds_fadd: - case Intrinsic::amdgcn_ds_fmin: - case Intrinsic::amdgcn_ds_fmax: { - const ConstantInt *IsVolatile = dyn_cast(II->getArgOperand(4)); - if (!IsVolatile || !IsVolatile->isZero()) - return false; - - LLVM_FALLTHROUGH; - } case Intrinsic::objectsize: { Type *DestTy = II->getType(); Type *SrcTy = NewV->getType(); @@ -285,25 +276,27 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II, return true; } default: - return false; + return TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); } } -// TODO: Move logic to TTI? void InferAddressSpaces::collectRewritableIntrinsicOperands( IntrinsicInst *II, std::vector> &PostorderStack, DenseSet &Visited) const { - switch (II->getIntrinsicID()) { + auto IID = II->getIntrinsicID(); + switch (IID) { case Intrinsic::objectsize: - case Intrinsic::amdgcn_atomic_inc: - case Intrinsic::amdgcn_atomic_dec: - case Intrinsic::amdgcn_ds_fadd: - case Intrinsic::amdgcn_ds_fmin: - case Intrinsic::amdgcn_ds_fmax: appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0), PostorderStack, Visited); break; default: + SmallVector OpIndexes; + if (TTI->collectFlatAddressOperands(OpIndexes, IID)) { + for (int Idx : OpIndexes) { + appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(Idx), + PostorderStack, Visited); + } + } break; } } @@ -631,11 +624,10 @@ bool InferAddressSpaces::runOnFunction(Function &F) { if (skipFunction(F)) return false; - const TargetTransformInfo &TTI = - getAnalysis().getTTI(F); + TTI = &getAnalysis().getTTI(F); if (FlatAddrSpace == UninitializedAddressSpace) { - FlatAddrSpace = TTI.getFlatAddressSpace(); + FlatAddrSpace = TTI->getFlatAddressSpace(); if (FlatAddrSpace == UninitializedAddressSpace) return false; } @@ -650,7 +642,7 @@ bool InferAddressSpaces::runOnFunction(Function &F) { // Changes the address spaces of the flat address expressions who are inferred // to point to a specific address space. - return rewriteWithNewAddressSpaces(TTI, Postorder, InferredAddrSpace, &F); + return rewriteWithNewAddressSpaces(*TTI, Postorder, InferredAddrSpace, &F); } // Constants need to be tracked through RAUW to handle cases with nested diff --git a/lib/Transforms/Scalar/InstSimplifyPass.cpp b/lib/Transforms/Scalar/InstSimplifyPass.cpp index 6616364ab20..ec28f790f25 100644 --- a/lib/Transforms/Scalar/InstSimplifyPass.cpp +++ b/lib/Transforms/Scalar/InstSimplifyPass.cpp @@ -33,37 +33,39 @@ static bool runImpl(Function &F, const SimplifyQuery &SQ, bool Changed = false; do { - for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { - // Here be subtlety: the iterator must be incremented before the loop - // body (not sure why), so a range-for loop won't work here. - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - Instruction *I = &*BI++; - // The first time through the loop ToSimplify is empty and we try to - // simplify all instructions. On later iterations ToSimplify is not + for (BasicBlock &BB : F) { + // Unreachable code can take on strange forms that we are not prepared to + // handle. For example, an instruction may have itself as an operand. + if (!SQ.DT->isReachableFromEntry(&BB)) + continue; + + SmallVector DeadInstsInBB; + for (Instruction &I : BB) { + // The first time through the loop, ToSimplify is empty and we try to + // simplify all instructions. On later iterations, ToSimplify is not // empty and we only bother simplifying instructions that are in it. - if (!ToSimplify->empty() && !ToSimplify->count(I)) + if (!ToSimplify->empty() && !ToSimplify->count(&I)) continue; - // Don't waste time simplifying unused instructions. - if (!I->use_empty()) { - if (Value *V = SimplifyInstruction(I, SQ, ORE)) { + // Don't waste time simplifying dead/unused instructions. + if (isInstructionTriviallyDead(&I)) { + DeadInstsInBB.push_back(&I); + Changed = true; + } else if (!I.use_empty()) { + if (Value *V = SimplifyInstruction(&I, SQ, ORE)) { // Mark all uses for resimplification next time round the loop. - for (User *U : I->users()) + for (User *U : I.users()) Next->insert(cast(U)); - I->replaceAllUsesWith(V); + I.replaceAllUsesWith(V); ++NumSimplified; Changed = true; + // A call can get simplified, but it may not be trivially dead. + if (isInstructionTriviallyDead(&I)) + DeadInstsInBB.push_back(&I); } } - if (RecursivelyDeleteTriviallyDeadInstructions(I, SQ.TLI)) { - // RecursivelyDeleteTriviallyDeadInstruction can remove more than one - // instruction, so simply incrementing the iterator does not work. - // When instructions get deleted re-iterate instead. - BI = BB->begin(); - BE = BB->end(); - Changed = true; - } } + RecursivelyDeleteTriviallyDeadInstructions(DeadInstsInBB, SQ.TLI); } // Place the list of instructions to simplify on the next loop iteration @@ -90,7 +92,7 @@ struct InstSimplifyLegacyPass : public FunctionPass { AU.addRequired(); } - /// runOnFunction - Remove instructions that simplify. + /// Remove instructions that simplify. bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; @@ -98,7 +100,7 @@ struct InstSimplifyLegacyPass : public FunctionPass { const DominatorTree *DT = &getAnalysis().getDomTree(); const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); + &getAnalysis().getTLI(F); AssumptionCache *AC = &getAnalysis().getAssumptionCache(F); OptimizationRemarkEmitter *ORE = diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index b86bf2fefbe..0cf00baaa24 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -224,13 +224,21 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) { BasicBlock *PhiBB) -> std::pair { auto *PredBB = IncomingBB; auto *SuccBB = PhiBB; + SmallPtrSet Visited; while (true) { BranchInst *PredBr = dyn_cast(PredBB->getTerminator()); if (PredBr && PredBr->isConditional()) return {PredBB, SuccBB}; + Visited.insert(PredBB); auto *SinglePredBB = PredBB->getSinglePredecessor(); if (!SinglePredBB) return {nullptr, nullptr}; + + // Stop searching when SinglePredBB has been visited. It means we see + // an unreachable loop. + if (Visited.count(SinglePredBB)) + return {nullptr, nullptr}; + SuccBB = PredBB; PredBB = SinglePredBB; } @@ -253,7 +261,9 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) { return; BasicBlock *PredBB = PredOutEdge.first; - BranchInst *PredBr = cast(PredBB->getTerminator()); + BranchInst *PredBr = dyn_cast(PredBB->getTerminator()); + if (!PredBr) + return; uint64_t PredTrueWeight, PredFalseWeight; // FIXME: We currently only set the profile data when it is missing. @@ -286,7 +296,7 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) { bool JumpThreading::runOnFunction(Function &F) { if (skipFunction(F)) return false; - auto TLI = &getAnalysis().getTLI(); + auto TLI = &getAnalysis().getTLI(F); // Get DT analysis before LVI. When LVI is initialized it conditionally adds // DT if it's available. auto DT = &getAnalysis().getDomTree(); @@ -1461,7 +1471,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) { "Can't handle critical edge here!"); LoadInst *NewVal = new LoadInst( LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred), - LoadI->getName() + ".pr", false, LoadI->getAlignment(), + LoadI->getName() + ".pr", false, MaybeAlign(LoadI->getAlignment()), LoadI->getOrdering(), LoadI->getSyncScopeID(), UnavailablePred->getTerminator()); NewVal->setDebugLoc(LoadI->getDebugLoc()); @@ -2423,7 +2433,7 @@ void JumpThreadingPass::UnfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, // |----- // v // BB - BranchInst *PredTerm = dyn_cast(Pred->getTerminator()); + BranchInst *PredTerm = cast(Pred->getTerminator()); BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold", BB->getParent(), BB); // Move the unconditional branch to NewBB. diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index d9dda4cef2d..6ce4831a735 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -220,7 +220,8 @@ struct LegacyLICMPass : public LoopPass { &getAnalysis().getAAResults(), &getAnalysis().getLoopInfo(), &getAnalysis().getDomTree(), - &getAnalysis().getTLI(), + &getAnalysis().getTLI( + *L->getHeader()->getParent()), &getAnalysis().getTTI( *L->getHeader()->getParent()), SE ? &SE->getSE() : nullptr, MSSA, &ORE, false); @@ -294,7 +295,7 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, PA.preserve(); PA.preserve(); - if (EnableMSSALoopDependency) + if (AR.MSSA) PA.preserve(); return PA; @@ -330,6 +331,12 @@ bool LoopInvariantCodeMotion::runOnLoop( assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); + // If this loop has metadata indicating that LICM is not to be performed then + // just exit. + if (hasDisableLICMTransformsHint(L)) { + return false; + } + std::unique_ptr CurAST; std::unique_ptr MSSAU; bool NoOfMemAccTooLarge = false; @@ -340,7 +347,7 @@ bool LoopInvariantCodeMotion::runOnLoop( CurAST = collectAliasInfoForLoop(L, LI, AA); } else { LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n"); - MSSAU = make_unique(MSSA); + MSSAU = std::make_unique(MSSA); unsigned AccessCapCount = 0; for (auto *BB : L->getBlocks()) { @@ -956,7 +963,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // Now that we've finished hoisting make sure that LI and DT are still // valid. -#ifndef NDEBUG +#ifdef EXPENSIVE_CHECKS if (Changed) { assert(DT->verify(DominatorTree::VerificationLevel::Fast) && "Dominator tree verification failed"); @@ -1026,7 +1033,8 @@ namespace { bool isHoistableAndSinkableInst(Instruction &I) { // Only these instructions are hoistable/sinkable. return (isa(I) || isa(I) || isa(I) || - isa(I) || isa(I) || isa(I) || + isa(I) || isa(I) || + isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || @@ -1092,7 +1100,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, // in the same alias set as something that ends up being modified. if (AA->pointsToConstantMemory(LI->getOperand(0))) return true; - if (LI->getMetadata(LLVMContext::MD_invariant_load)) + if (LI->hasMetadata(LLVMContext::MD_invariant_load)) return true; if (LI->isAtomic() && !TargetExecutesOncePerLoop) @@ -1240,12 +1248,22 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, // FIXME: More precise: no Uses that alias SI. if (!Flags->IsSink && !MSSA->dominates(SIMD, MU)) return false; - } else if (const auto *MD = dyn_cast(&MA)) + } else if (const auto *MD = dyn_cast(&MA)) { if (auto *LI = dyn_cast(MD->getMemoryInst())) { (void)LI; // Silence warning. assert(!LI->isUnordered() && "Expected unordered load"); return false; } + // Any call, while it may not be clobbering SI, it may be a use. + if (auto *CI = dyn_cast(MD->getMemoryInst())) { + // Check if the call may read from the memory locattion written + // to by SI. Check CI's attributes and arguments; the number of + // such checks performed is limited above by NoOfMemAccTooLarge. + ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI)); + if (isModOrRefSet(MRI)) + return false; + } + } } auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI); @@ -1375,8 +1393,7 @@ static Instruction *CloneInstructionInExitBlock( if (!I.getName().empty()) New->setName(I.getName() + ".le"); - MemoryAccess *OldMemAcc; - if (MSSAU && (OldMemAcc = MSSAU->getMemorySSA()->getMemoryAccess(&I))) { + if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) { // Create a new MemoryAccess and let MemorySSA set its defining access. MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB( New, nullptr, New->getParent(), MemorySSA::Beginning); @@ -1385,7 +1402,7 @@ static Instruction *CloneInstructionInExitBlock( MSSAU->insertDef(MemDef, /*RenameUses=*/true); else { auto *MemUse = cast(NewMemAcc); - MSSAU->insertUse(MemUse); + MSSAU->insertUse(MemUse, /*RenameUses=*/true); } } } @@ -1783,7 +1800,7 @@ public: StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); if (UnorderedAtomic) NewSI->setOrdering(AtomicOrdering::Unordered); - NewSI->setAlignment(Alignment); + NewSI->setAlignment(MaybeAlign(Alignment)); NewSI->setDebugLoc(DL); if (AATags) NewSI->setAAMetadata(AATags); @@ -2016,7 +2033,8 @@ bool llvm::promoteLoopAccessesToScalars( if (!DereferenceableInPH) { DereferenceableInPH = isDereferenceableAndAlignedPointer( Store->getPointerOperand(), Store->getValueOperand()->getType(), - Store->getAlignment(), MDL, Preheader->getTerminator(), DT); + MaybeAlign(Store->getAlignment()), MDL, + Preheader->getTerminator(), DT); } } else return false; // Not a load or store. @@ -2101,20 +2119,21 @@ bool llvm::promoteLoopAccessesToScalars( SomePtr->getName() + ".promoted", Preheader->getTerminator()); if (SawUnorderedAtomic) PreheaderLoad->setOrdering(AtomicOrdering::Unordered); - PreheaderLoad->setAlignment(Alignment); + PreheaderLoad->setAlignment(MaybeAlign(Alignment)); PreheaderLoad->setDebugLoc(DL); if (AATags) PreheaderLoad->setAAMetadata(AATags); SSA.AddAvailableValue(Preheader, PreheaderLoad); - MemoryAccess *PreheaderLoadMemoryAccess; if (MSSAU) { - PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB( + MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB( PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End); MemoryUse *NewMemUse = cast(PreheaderLoadMemoryAccess); - MSSAU->insertUse(NewMemUse); + MSSAU->insertUse(NewMemUse, /*RenameUses=*/true); } + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); // Rewrite all the loads in the loop and remember all the definitions from // stores in the loop. Promoter.run(LoopUses); @@ -2161,7 +2180,7 @@ LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI, LoopToAliasSetMap.erase(MapI); } if (!CurAST) - CurAST = make_unique(*AA); + CurAST = std::make_unique(*AA); // Add everything from the sub loops that are no longer directly available. for (Loop *InnerL : RecomputeLoops) @@ -2180,7 +2199,7 @@ std::unique_ptr LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA( Loop *L, AliasAnalysis *AA, MemorySSAUpdater *MSSAU) { auto *MSSA = MSSAU->getMemorySSA(); - auto CurAST = make_unique(*AA, MSSA, L); + auto CurAST = std::make_unique(*AA, MSSA, L); CurAST->addAllInstructionsInLoopUsingMSSA(); return CurAST; } diff --git a/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/lib/Transforms/Scalar/LoopDataPrefetch.cpp index 1fcf1315a17..a972d6fa2fc 100644 --- a/lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ b/lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -312,8 +312,8 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) { IRBuilder<> Builder(MemI); Module *M = BB->getParent()->getParent(); Type *I32 = Type::getInt32Ty(BB->getContext()); - Function *PrefetchFunc = - Intrinsic::getDeclaration(M, Intrinsic::prefetch); + Function *PrefetchFunc = Intrinsic::getDeclaration( + M, Intrinsic::prefetch, PrefPtrValue->getType()); Builder.CreateCall( PrefetchFunc, {PrefPtrValue, diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 8371367e24e..cee197cf835 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -191,7 +191,7 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT, // Don't remove loops for which we can't solve the trip count. // They could be infinite, in which case we'd be changing program behavior. - const SCEV *S = SE.getMaxBackedgeTakenCount(L); + const SCEV *S = SE.getConstantMaxBackedgeTakenCount(L); if (isa(S)) { LLVM_DEBUG(dbgs() << "Could not compute SCEV MaxBackedgeTakenCount.\n"); return Changed ? LoopDeletionResult::Modified diff --git a/lib/Transforms/Scalar/LoopFuse.cpp b/lib/Transforms/Scalar/LoopFuse.cpp index 0bc2bcff2ae..9f93c68e612 100644 --- a/lib/Transforms/Scalar/LoopFuse.cpp +++ b/lib/Transforms/Scalar/LoopFuse.cpp @@ -66,7 +66,7 @@ using namespace llvm; #define DEBUG_TYPE "loop-fusion" -STATISTIC(FuseCounter, "Count number of loop fusions performed"); +STATISTIC(FuseCounter, "Loops fused"); STATISTIC(NumFusionCandidates, "Number of candidates for loop fusion"); STATISTIC(InvalidPreheader, "Loop has invalid preheader"); STATISTIC(InvalidHeader, "Loop has invalid header"); @@ -79,12 +79,15 @@ STATISTIC(MayThrowException, "Loop may throw an exception"); STATISTIC(ContainsVolatileAccess, "Loop contains a volatile access"); STATISTIC(NotSimplifiedForm, "Loop is not in simplified form"); STATISTIC(InvalidDependencies, "Dependencies prevent fusion"); -STATISTIC(InvalidTripCount, - "Loop does not have invariant backedge taken count"); +STATISTIC(UnknownTripCount, "Loop has unknown trip count"); STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop"); -STATISTIC(NonEqualTripCount, "Candidate trip counts are not the same"); -STATISTIC(NonAdjacent, "Candidates are not adjacent"); -STATISTIC(NonEmptyPreheader, "Candidate has a non-empty preheader"); +STATISTIC(NonEqualTripCount, "Loop trip counts are not the same"); +STATISTIC(NonAdjacent, "Loops are not adjacent"); +STATISTIC(NonEmptyPreheader, "Loop has a non-empty preheader"); +STATISTIC(FusionNotBeneficial, "Fusion is not beneficial"); +STATISTIC(NonIdenticalGuards, "Candidates have different guards"); +STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block"); +STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block"); enum FusionDependenceAnalysisChoice { FUSION_DEPENDENCE_ANALYSIS_SCEV, @@ -110,6 +113,7 @@ static cl::opt cl::Hidden, cl::init(false), cl::ZeroOrMore); #endif +namespace { /// This class is used to represent a candidate for loop fusion. When it is /// constructed, it checks the conditions for loop fusion to ensure that it /// represents a valid candidate. It caches several parts of a loop that are @@ -143,6 +147,8 @@ struct FusionCandidate { SmallVector MemWrites; /// Are all of the members of this fusion candidate still valid bool Valid; + /// Guard branch of the loop, if it exists + BranchInst *GuardBranch; /// Dominator and PostDominator trees are needed for the /// FusionCandidateCompare function, required by FusionCandidateSet to @@ -151,11 +157,20 @@ struct FusionCandidate { const DominatorTree *DT; const PostDominatorTree *PDT; + OptimizationRemarkEmitter &ORE; + FusionCandidate(Loop *L, const DominatorTree *DT, - const PostDominatorTree *PDT) + const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE) : Preheader(L->getLoopPreheader()), Header(L->getHeader()), ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()), - Latch(L->getLoopLatch()), L(L), Valid(true), DT(DT), PDT(PDT) { + Latch(L->getLoopLatch()), L(L), Valid(true), GuardBranch(nullptr), + DT(DT), PDT(PDT), ORE(ORE) { + + // TODO: This is temporary while we fuse both rotated and non-rotated + // loops. Once we switch to only fusing rotated loops, the initialization of + // GuardBranch can be moved into the initialization list above. + if (isRotated()) + GuardBranch = L->getLoopGuardBranch(); // Walk over all blocks in the loop and check for conditions that may // prevent fusion. For each block, walk over all instructions and collect @@ -163,28 +178,28 @@ struct FusionCandidate { // found, invalidate this object and return. for (BasicBlock *BB : L->blocks()) { if (BB->hasAddressTaken()) { - AddressTakenBB++; invalidate(); + reportInvalidCandidate(AddressTakenBB); return; } for (Instruction &I : *BB) { if (I.mayThrow()) { - MayThrowException++; invalidate(); + reportInvalidCandidate(MayThrowException); return; } if (StoreInst *SI = dyn_cast(&I)) { if (SI->isVolatile()) { - ContainsVolatileAccess++; invalidate(); + reportInvalidCandidate(ContainsVolatileAccess); return; } } if (LoadInst *LI = dyn_cast(&I)) { if (LI->isVolatile()) { - ContainsVolatileAccess++; invalidate(); + reportInvalidCandidate(ContainsVolatileAccess); return; } } @@ -214,19 +229,96 @@ struct FusionCandidate { assert(Latch == L->getLoopLatch() && "Latch is out of sync"); } + /// Get the entry block for this fusion candidate. + /// + /// If this fusion candidate represents a guarded loop, the entry block is the + /// loop guard block. If it represents an unguarded loop, the entry block is + /// the preheader of the loop. + BasicBlock *getEntryBlock() const { + if (GuardBranch) + return GuardBranch->getParent(); + else + return Preheader; + } + + /// Given a guarded loop, get the successor of the guard that is not in the + /// loop. + /// + /// This method returns the successor of the loop guard that is not located + /// within the loop (i.e., the successor of the guard that is not the + /// preheader). + /// This method is only valid for guarded loops. + BasicBlock *getNonLoopBlock() const { + assert(GuardBranch && "Only valid on guarded loops."); + assert(GuardBranch->isConditional() && + "Expecting guard to be a conditional branch."); + return (GuardBranch->getSuccessor(0) == Preheader) + ? GuardBranch->getSuccessor(1) + : GuardBranch->getSuccessor(0); + } + + bool isRotated() const { + assert(L && "Expecting loop to be valid."); + assert(Latch && "Expecting latch to be valid."); + return L->isLoopExiting(Latch); + } + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void dump() const { - dbgs() << "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr") + dbgs() << "\tGuardBranch: " + << (GuardBranch ? GuardBranch->getName() : "nullptr") << "\n" + << "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr") << "\n" << "\tHeader: " << (Header ? Header->getName() : "nullptr") << "\n" << "\tExitingBB: " << (ExitingBlock ? ExitingBlock->getName() : "nullptr") << "\n" << "\tExitBB: " << (ExitBlock ? ExitBlock->getName() : "nullptr") << "\n" - << "\tLatch: " << (Latch ? Latch->getName() : "nullptr") << "\n"; + << "\tLatch: " << (Latch ? Latch->getName() : "nullptr") << "\n" + << "\tEntryBlock: " + << (getEntryBlock() ? getEntryBlock()->getName() : "nullptr") + << "\n"; } #endif + /// Determine if a fusion candidate (representing a loop) is eligible for + /// fusion. Note that this only checks whether a single loop can be fused - it + /// does not check whether it is *legal* to fuse two loops together. + bool isEligibleForFusion(ScalarEvolution &SE) const { + if (!isValid()) { + LLVM_DEBUG(dbgs() << "FC has invalid CFG requirements!\n"); + if (!Preheader) + ++InvalidPreheader; + if (!Header) + ++InvalidHeader; + if (!ExitingBlock) + ++InvalidExitingBlock; + if (!ExitBlock) + ++InvalidExitBlock; + if (!Latch) + ++InvalidLatch; + if (L->isInvalid()) + ++InvalidLoop; + + return false; + } + + // Require ScalarEvolution to be able to determine a trip count. + if (!SE.hasLoopInvariantBackedgeTakenCount(L)) { + LLVM_DEBUG(dbgs() << "Loop " << L->getName() + << " trip count not computable!\n"); + return reportInvalidCandidate(UnknownTripCount); + } + + if (!L->isLoopSimplifyForm()) { + LLVM_DEBUG(dbgs() << "Loop " << L->getName() + << " is not in simplified form!\n"); + return reportInvalidCandidate(NotSimplifiedForm); + } + + return true; + } + private: // This is only used internally for now, to clear the MemWrites and MemReads // list and setting Valid to false. I can't envision other uses of this right @@ -239,18 +331,19 @@ private: MemReads.clear(); Valid = false; } + + bool reportInvalidCandidate(llvm::Statistic &Stat) const { + using namespace ore; + assert(L && Preheader && "Fusion candidate not initialized properly!"); + ++Stat; + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, Stat.getName(), + L->getStartLoc(), Preheader) + << "[" << Preheader->getParent()->getName() << "]: " + << "Loop is not a candidate for fusion: " << Stat.getDesc()); + return false; + } }; -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const FusionCandidate &FC) { - if (FC.isValid()) - OS << FC.Preheader->getName(); - else - OS << ""; - - return OS; -} - struct FusionCandidateCompare { /// Comparison functor to sort two Control Flow Equivalent fusion candidates /// into dominance order. @@ -260,21 +353,24 @@ struct FusionCandidateCompare { const FusionCandidate &RHS) const { const DominatorTree *DT = LHS.DT; + BasicBlock *LHSEntryBlock = LHS.getEntryBlock(); + BasicBlock *RHSEntryBlock = RHS.getEntryBlock(); + // Do not save PDT to local variable as it is only used in asserts and thus // will trigger an unused variable warning if building without asserts. assert(DT && LHS.PDT && "Expecting valid dominator tree"); // Do this compare first so if LHS == RHS, function returns false. - if (DT->dominates(RHS.Preheader, LHS.Preheader)) { + if (DT->dominates(RHSEntryBlock, LHSEntryBlock)) { // RHS dominates LHS // Verify LHS post-dominates RHS - assert(LHS.PDT->dominates(LHS.Preheader, RHS.Preheader)); + assert(LHS.PDT->dominates(LHSEntryBlock, RHSEntryBlock)); return false; } - if (DT->dominates(LHS.Preheader, RHS.Preheader)) { + if (DT->dominates(LHSEntryBlock, RHSEntryBlock)) { // Verify RHS Postdominates LHS - assert(LHS.PDT->dominates(RHS.Preheader, LHS.Preheader)); + assert(LHS.PDT->dominates(RHSEntryBlock, LHSEntryBlock)); return true; } @@ -286,7 +382,6 @@ struct FusionCandidateCompare { } }; -namespace { using LoopVector = SmallVector; // Set of Control Flow Equivalent (CFE) Fusion Candidates, sorted in dominance @@ -301,17 +396,26 @@ using LoopVector = SmallVector; // keeps the FusionCandidateSet sorted will also simplify the implementation. using FusionCandidateSet = std::set; using FusionCandidateCollection = SmallVector; -} // namespace -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const FusionCandidateSet &CandSet) { - for (auto IT : CandSet) - OS << IT << "\n"; +#if !defined(NDEBUG) +static llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const FusionCandidate &FC) { + if (FC.isValid()) + OS << FC.Preheader->getName(); + else + OS << ""; + + return OS; +} + +static llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const FusionCandidateSet &CandSet) { + for (const FusionCandidate &FC : CandSet) + OS << FC << '\n'; return OS; } -#if !defined(NDEBUG) static void printFusionCandidates(const FusionCandidateCollection &FusionCandidates) { dbgs() << "Fusion Candidates: \n"; @@ -391,16 +495,6 @@ static void printLoopVector(const LoopVector &LV) { } #endif -static void reportLoopFusion(const FusionCandidate &FC0, - const FusionCandidate &FC1, - OptimizationRemarkEmitter &ORE) { - using namespace ore; - ORE.emit( - OptimizationRemark(DEBUG_TYPE, "LoopFusion", FC0.Preheader->getParent()) - << "Fused " << NV("Cand1", StringRef(FC0.Preheader->getName())) - << " with " << NV("Cand2", StringRef(FC1.Preheader->getName()))); -} - struct LoopFuser { private: // Sets of control flow equivalent fusion candidates for a given nest level. @@ -497,62 +591,25 @@ private: const FusionCandidate &FC1) const { assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders"); - if (DT.dominates(FC0.Preheader, FC1.Preheader)) - return PDT.dominates(FC1.Preheader, FC0.Preheader); + BasicBlock *FC0EntryBlock = FC0.getEntryBlock(); + BasicBlock *FC1EntryBlock = FC1.getEntryBlock(); - if (DT.dominates(FC1.Preheader, FC0.Preheader)) - return PDT.dominates(FC0.Preheader, FC1.Preheader); + if (DT.dominates(FC0EntryBlock, FC1EntryBlock)) + return PDT.dominates(FC1EntryBlock, FC0EntryBlock); + + if (DT.dominates(FC1EntryBlock, FC0EntryBlock)) + return PDT.dominates(FC0EntryBlock, FC1EntryBlock); return false; } - /// Determine if a fusion candidate (representing a loop) is eligible for - /// fusion. Note that this only checks whether a single loop can be fused - it - /// does not check whether it is *legal* to fuse two loops together. - bool eligibleForFusion(const FusionCandidate &FC) const { - if (!FC.isValid()) { - LLVM_DEBUG(dbgs() << "FC " << FC << " has invalid CFG requirements!\n"); - if (!FC.Preheader) - InvalidPreheader++; - if (!FC.Header) - InvalidHeader++; - if (!FC.ExitingBlock) - InvalidExitingBlock++; - if (!FC.ExitBlock) - InvalidExitBlock++; - if (!FC.Latch) - InvalidLatch++; - if (FC.L->isInvalid()) - InvalidLoop++; - - return false; - } - - // Require ScalarEvolution to be able to determine a trip count. - if (!SE.hasLoopInvariantBackedgeTakenCount(FC.L)) { - LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName() - << " trip count not computable!\n"); - InvalidTripCount++; - return false; - } - - if (!FC.L->isLoopSimplifyForm()) { - LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName() - << " is not in simplified form!\n"); - NotSimplifiedForm++; - return false; - } - - return true; - } - /// Iterate over all loops in the given loop set and identify the loops that /// are eligible for fusion. Place all eligible fusion candidates into Control /// Flow Equivalent sets, sorted by dominance. void collectFusionCandidates(const LoopVector &LV) { for (Loop *L : LV) { - FusionCandidate CurrCand(L, &DT, &PDT); - if (!eligibleForFusion(CurrCand)) + FusionCandidate CurrCand(L, &DT, &PDT, ORE); + if (!CurrCand.isEligibleForFusion(SE)) continue; // Go through each list in FusionCandidates and determine if L is control @@ -664,31 +721,64 @@ private: if (!identicalTripCounts(*FC0, *FC1)) { LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip " "counts. Not fusing.\n"); - NonEqualTripCount++; + reportLoopFusion(*FC0, *FC1, + NonEqualTripCount); continue; } if (!isAdjacent(*FC0, *FC1)) { LLVM_DEBUG(dbgs() << "Fusion candidates are not adjacent. Not fusing.\n"); - NonAdjacent++; + reportLoopFusion(*FC0, *FC1, NonAdjacent); continue; } - // For now we skip fusing if the second candidate has any instructions - // in the preheader. This is done because we currently do not have the - // safety checks to determine if it is save to move the preheader of - // the second candidate past the body of the first candidate. Once - // these checks are added, this condition can be removed. + // Ensure that FC0 and FC1 have identical guards. + // If one (or both) are not guarded, this check is not necessary. + if (FC0->GuardBranch && FC1->GuardBranch && + !haveIdenticalGuards(*FC0, *FC1)) { + LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical " + "guards. Not Fusing.\n"); + reportLoopFusion(*FC0, *FC1, + NonIdenticalGuards); + continue; + } + + // The following three checks look for empty blocks in FC0 and FC1. If + // any of these blocks are non-empty, we do not fuse. This is done + // because we currently do not have the safety checks to determine if + // it is safe to move the blocks past other blocks in the loop. Once + // these checks are added, these conditions can be relaxed. if (!isEmptyPreheader(*FC1)) { LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty " "preheader. Not fusing.\n"); - NonEmptyPreheader++; + reportLoopFusion(*FC0, *FC1, + NonEmptyPreheader); continue; } + if (FC0->GuardBranch && !isEmptyExitBlock(*FC0)) { + LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty exit " + "block. Not fusing.\n"); + reportLoopFusion(*FC0, *FC1, + NonEmptyExitBlock); + continue; + } + + if (FC1->GuardBranch && !isEmptyGuardBlock(*FC1)) { + LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty guard " + "block. Not fusing.\n"); + reportLoopFusion(*FC0, *FC1, + NonEmptyGuardBlock); + continue; + } + + // Check the dependencies across the loops and do not fuse if it would + // violate them. if (!dependencesAllowFusion(*FC0, *FC1)) { LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n"); + reportLoopFusion(*FC0, *FC1, + InvalidDependencies); continue; } @@ -696,9 +786,11 @@ private: LLVM_DEBUG(dbgs() << "\tFusion appears to be " << (BeneficialToFuse ? "" : "un") << "profitable!\n"); - if (!BeneficialToFuse) + if (!BeneficialToFuse) { + reportLoopFusion(*FC0, *FC1, + FusionNotBeneficial); continue; - + } // All analysis has completed and has determined that fusion is legal // and profitable. At this point, start transforming the code and // perform fusion. @@ -710,15 +802,14 @@ private: // Note this needs to be done *before* performFusion because // performFusion will change the original loops, making it not // possible to identify them after fusion is complete. - reportLoopFusion(*FC0, *FC1, ORE); + reportLoopFusion(*FC0, *FC1, FuseCounter); - FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT); + FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT, ORE); FusedCand.verify(); - assert(eligibleForFusion(FusedCand) && + assert(FusedCand.isEligibleForFusion(SE) && "Fused candidate should be eligible for fusion!"); // Notify the loop-depth-tree that these loops are not valid objects - // anymore. LDT.removeLoop(FC1->L); CandidateSet.erase(FC0); @@ -889,7 +980,7 @@ private: LLVM_DEBUG(dbgs() << "Check if " << FC0 << " can be fused with " << FC1 << "\n"); assert(FC0.L->getLoopDepth() == FC1.L->getLoopDepth()); - assert(DT.dominates(FC0.Preheader, FC1.Preheader)); + assert(DT.dominates(FC0.getEntryBlock(), FC1.getEntryBlock())); for (Instruction *WriteL0 : FC0.MemWrites) { for (Instruction *WriteL1 : FC1.MemWrites) @@ -939,18 +1030,89 @@ private: return true; } - /// Determine if the exit block of \p FC0 is the preheader of \p FC1. In this - /// case, there is no code in between the two fusion candidates, thus making - /// them adjacent. + /// Determine if two fusion candidates are adjacent in the CFG. + /// + /// This method will determine if there are additional basic blocks in the CFG + /// between the exit of \p FC0 and the entry of \p FC1. + /// If the two candidates are guarded loops, then it checks whether the + /// non-loop successor of the \p FC0 guard branch is the entry block of \p + /// FC1. If not, then the loops are not adjacent. If the two candidates are + /// not guarded loops, then it checks whether the exit block of \p FC0 is the + /// preheader of \p FC1. bool isAdjacent(const FusionCandidate &FC0, const FusionCandidate &FC1) const { - return FC0.ExitBlock == FC1.Preheader; + // If the successor of the guard branch is FC1, then the loops are adjacent + if (FC0.GuardBranch) + return FC0.getNonLoopBlock() == FC1.getEntryBlock(); + else + return FC0.ExitBlock == FC1.getEntryBlock(); + } + + /// Determine if two fusion candidates have identical guards + /// + /// This method will determine if two fusion candidates have the same guards. + /// The guards are considered the same if: + /// 1. The instructions to compute the condition used in the compare are + /// identical. + /// 2. The successors of the guard have the same flow into/around the loop. + /// If the compare instructions are identical, then the first successor of the + /// guard must go to the same place (either the preheader of the loop or the + /// NonLoopBlock). In other words, the the first successor of both loops must + /// both go into the loop (i.e., the preheader) or go around the loop (i.e., + /// the NonLoopBlock). The same must be true for the second successor. + bool haveIdenticalGuards(const FusionCandidate &FC0, + const FusionCandidate &FC1) const { + assert(FC0.GuardBranch && FC1.GuardBranch && + "Expecting FC0 and FC1 to be guarded loops."); + + if (auto FC0CmpInst = + dyn_cast(FC0.GuardBranch->getCondition())) + if (auto FC1CmpInst = + dyn_cast(FC1.GuardBranch->getCondition())) + if (!FC0CmpInst->isIdenticalTo(FC1CmpInst)) + return false; + + // The compare instructions are identical. + // Now make sure the successor of the guards have the same flow into/around + // the loop + if (FC0.GuardBranch->getSuccessor(0) == FC0.Preheader) + return (FC1.GuardBranch->getSuccessor(0) == FC1.Preheader); + else + return (FC1.GuardBranch->getSuccessor(1) == FC1.Preheader); + } + + /// Check that the guard for \p FC *only* contains the cmp/branch for the + /// guard. + /// Once we are able to handle intervening code, any code in the guard block + /// for FC1 will need to be treated as intervening code and checked whether + /// it can safely move around the loops. + bool isEmptyGuardBlock(const FusionCandidate &FC) const { + assert(FC.GuardBranch && "Expecting a fusion candidate with guard branch."); + if (auto *CmpInst = dyn_cast(FC.GuardBranch->getCondition())) { + auto *GuardBlock = FC.GuardBranch->getParent(); + // If the generation of the cmp value is in GuardBlock, then the size of + // the guard block should be 2 (cmp + branch). If the generation of the + // cmp value is in a different block, then the size of the guard block + // should only be 1. + if (CmpInst->getParent() == GuardBlock) + return GuardBlock->size() == 2; + else + return GuardBlock->size() == 1; + } + + return false; } bool isEmptyPreheader(const FusionCandidate &FC) const { + assert(FC.Preheader && "Expecting a valid preheader"); return FC.Preheader->size() == 1; } + bool isEmptyExitBlock(const FusionCandidate &FC) const { + assert(FC.ExitBlock && "Expecting a valid exit block"); + return FC.ExitBlock->size() == 1; + } + /// Fuse two fusion candidates, creating a new fused loop. /// /// This method contains the mechanics of fusing two loops, represented by \p @@ -987,6 +1149,12 @@ private: LLVM_DEBUG(dbgs() << "Fusion Candidate 0: \n"; FC0.dump(); dbgs() << "Fusion Candidate 1: \n"; FC1.dump();); + // Fusing guarded loops is handled slightly differently than non-guarded + // loops and has been broken out into a separate method instead of trying to + // intersperse the logic within a single method. + if (FC0.GuardBranch) + return fuseGuardedLoops(FC0, FC1); + assert(FC1.Preheader == FC0.ExitBlock); assert(FC1.Preheader->size() == 1 && FC1.Preheader->getSingleSuccessor() == FC1.Header); @@ -1131,7 +1299,258 @@ private: SE.verify(); #endif - FuseCounter++; + LLVM_DEBUG(dbgs() << "Fusion done:\n"); + + return FC0.L; + } + + /// Report details on loop fusion opportunities. + /// + /// This template function can be used to report both successful and missed + /// loop fusion opportunities, based on the RemarkKind. The RemarkKind should + /// be one of: + /// - OptimizationRemarkMissed to report when loop fusion is unsuccessful + /// given two valid fusion candidates. + /// - OptimizationRemark to report successful fusion of two fusion + /// candidates. + /// The remarks will be printed using the form: + /// ::: []: + /// and : + template + void reportLoopFusion(const FusionCandidate &FC0, const FusionCandidate &FC1, + llvm::Statistic &Stat) { + assert(FC0.Preheader && FC1.Preheader && + "Expecting valid fusion candidates"); + using namespace ore; + ++Stat; + ORE.emit(RemarkKind(DEBUG_TYPE, Stat.getName(), FC0.L->getStartLoc(), + FC0.Preheader) + << "[" << FC0.Preheader->getParent()->getName() + << "]: " << NV("Cand1", StringRef(FC0.Preheader->getName())) + << " and " << NV("Cand2", StringRef(FC1.Preheader->getName())) + << ": " << Stat.getDesc()); + } + + /// Fuse two guarded fusion candidates, creating a new fused loop. + /// + /// Fusing guarded loops is handled much the same way as fusing non-guarded + /// loops. The rewiring of the CFG is slightly different though, because of + /// the presence of the guards around the loops and the exit blocks after the + /// loop body. As such, the new loop is rewired as follows: + /// 1. Keep the guard branch from FC0 and use the non-loop block target + /// from the FC1 guard branch. + /// 2. Remove the exit block from FC0 (this exit block should be empty + /// right now). + /// 3. Remove the guard branch for FC1 + /// 4. Remove the preheader for FC1. + /// The exit block successor for the latch of FC0 is updated to be the header + /// of FC1 and the non-exit block successor of the latch of FC1 is updated to + /// be the header of FC0, thus creating the fused loop. + Loop *fuseGuardedLoops(const FusionCandidate &FC0, + const FusionCandidate &FC1) { + assert(FC0.GuardBranch && FC1.GuardBranch && "Expecting guarded loops"); + + BasicBlock *FC0GuardBlock = FC0.GuardBranch->getParent(); + BasicBlock *FC1GuardBlock = FC1.GuardBranch->getParent(); + BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock(); + BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock(); + + assert(FC0NonLoopBlock == FC1GuardBlock && "Loops are not adjacent"); + + SmallVector TreeUpdates; + + //////////////////////////////////////////////////////////////////////////// + // Update the Loop Guard + //////////////////////////////////////////////////////////////////////////// + // The guard for FC0 is updated to guard both FC0 and FC1. This is done by + // changing the NonLoopGuardBlock for FC0 to the NonLoopGuardBlock for FC1. + // Thus, one path from the guard goes to the preheader for FC0 (and thus + // executes the new fused loop) and the other path goes to the NonLoopBlock + // for FC1 (where FC1 guard would have gone if FC1 was not executed). + FC0.GuardBranch->replaceUsesOfWith(FC0NonLoopBlock, FC1NonLoopBlock); + FC0.ExitBlock->getTerminator()->replaceUsesOfWith(FC1GuardBlock, + FC1.Header); + + // The guard of FC1 is not necessary anymore. + FC1.GuardBranch->eraseFromParent(); + new UnreachableInst(FC1GuardBlock->getContext(), FC1GuardBlock); + + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Delete, FC1GuardBlock, FC1.Preheader)); + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Delete, FC1GuardBlock, FC1NonLoopBlock)); + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Delete, FC0GuardBlock, FC1GuardBlock)); + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Insert, FC0GuardBlock, FC1NonLoopBlock)); + + assert(pred_begin(FC1GuardBlock) == pred_end(FC1GuardBlock) && + "Expecting guard block to have no predecessors"); + assert(succ_begin(FC1GuardBlock) == succ_end(FC1GuardBlock) && + "Expecting guard block to have no successors"); + + // Remember the phi nodes originally in the header of FC0 in order to rewire + // them later. However, this is only necessary if the new loop carried + // values might not dominate the exiting branch. While we do not generally + // test if this is the case but simply insert intermediate phi nodes, we + // need to make sure these intermediate phi nodes have different + // predecessors. To this end, we filter the special case where the exiting + // block is the latch block of the first loop. Nothing needs to be done + // anyway as all loop carried values dominate the latch and thereby also the + // exiting branch. + // KB: This is no longer necessary because FC0.ExitingBlock == FC0.Latch + // (because the loops are rotated. Thus, nothing will ever be added to + // OriginalFC0PHIs. + SmallVector OriginalFC0PHIs; + if (FC0.ExitingBlock != FC0.Latch) + for (PHINode &PHI : FC0.Header->phis()) + OriginalFC0PHIs.push_back(&PHI); + + assert(OriginalFC0PHIs.empty() && "Expecting OriginalFC0PHIs to be empty!"); + + // Replace incoming blocks for header PHIs first. + FC1.Preheader->replaceSuccessorsPhiUsesWith(FC0.Preheader); + FC0.Latch->replaceSuccessorsPhiUsesWith(FC1.Latch); + + // The old exiting block of the first loop (FC0) has to jump to the header + // of the second as we need to execute the code in the second header block + // regardless of the trip count. That is, if the trip count is 0, so the + // back edge is never taken, we still have to execute both loop headers, + // especially (but not only!) if the second is a do-while style loop. + // However, doing so might invalidate the phi nodes of the first loop as + // the new values do only need to dominate their latch and not the exiting + // predicate. To remedy this potential problem we always introduce phi + // nodes in the header of the second loop later that select the loop carried + // value, if the second header was reached through an old latch of the + // first, or undef otherwise. This is sound as exiting the first implies the + // second will exit too, __without__ taking the back-edge (their + // trip-counts are equal after all). + FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC0.ExitBlock, + FC1.Header); + + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock)); + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Insert, FC0.ExitingBlock, FC1.Header)); + + // Remove FC0 Exit Block + // The exit block for FC0 is no longer needed since control will flow + // directly to the header of FC1. Since it is an empty block, it can be + // removed at this point. + // TODO: In the future, we can handle non-empty exit blocks my merging any + // instructions from FC0 exit block into FC1 exit block prior to removing + // the block. + assert(pred_begin(FC0.ExitBlock) == pred_end(FC0.ExitBlock) && + "Expecting exit block to be empty"); + FC0.ExitBlock->getTerminator()->eraseFromParent(); + new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock); + + // Remove FC1 Preheader + // The pre-header of L1 is not necessary anymore. + assert(pred_begin(FC1.Preheader) == pred_end(FC1.Preheader)); + FC1.Preheader->getTerminator()->eraseFromParent(); + new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader); + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Delete, FC1.Preheader, FC1.Header)); + + // Moves the phi nodes from the second to the first loops header block. + while (PHINode *PHI = dyn_cast(&FC1.Header->front())) { + if (SE.isSCEVable(PHI->getType())) + SE.forgetValue(PHI); + if (PHI->hasNUsesOrMore(1)) + PHI->moveBefore(&*FC0.Header->getFirstInsertionPt()); + else + PHI->eraseFromParent(); + } + + // Introduce new phi nodes in the second loop header to ensure + // exiting the first and jumping to the header of the second does not break + // the SSA property of the phis originally in the first loop. See also the + // comment above. + Instruction *L1HeaderIP = &FC1.Header->front(); + for (PHINode *LCPHI : OriginalFC0PHIs) { + int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch); + assert(L1LatchBBIdx >= 0 && + "Expected loop carried value to be rewired at this point!"); + + Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx); + + PHINode *L1HeaderPHI = PHINode::Create( + LCV->getType(), 2, LCPHI->getName() + ".afterFC0", L1HeaderIP); + L1HeaderPHI->addIncoming(LCV, FC0.Latch); + L1HeaderPHI->addIncoming(UndefValue::get(LCV->getType()), + FC0.ExitingBlock); + + LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI); + } + + // Update the latches + + // Replace latch terminator destinations. + FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header); + FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header); + + // If FC0.Latch and FC0.ExitingBlock are the same then we have already + // performed the updates above. + if (FC0.Latch != FC0.ExitingBlock) + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Insert, FC0.Latch, FC1.Header)); + + TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete, + FC0.Latch, FC0.Header)); + TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Insert, + FC1.Latch, FC0.Header)); + TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete, + FC1.Latch, FC1.Header)); + + // All done + // Apply the updates to the Dominator Tree and cleanup. + + assert(succ_begin(FC1GuardBlock) == succ_end(FC1GuardBlock) && + "FC1GuardBlock has successors!!"); + assert(pred_begin(FC1GuardBlock) == pred_end(FC1GuardBlock) && + "FC1GuardBlock has predecessors!!"); + + // Update DT/PDT + DTU.applyUpdates(TreeUpdates); + + LI.removeBlock(FC1.Preheader); + DTU.deleteBB(FC1.Preheader); + DTU.deleteBB(FC0.ExitBlock); + DTU.flush(); + + // Is there a way to keep SE up-to-date so we don't need to forget the loops + // and rebuild the information in subsequent passes of fusion? + SE.forgetLoop(FC1.L); + SE.forgetLoop(FC0.L); + + // Merge the loops. + SmallVector Blocks(FC1.L->block_begin(), + FC1.L->block_end()); + for (BasicBlock *BB : Blocks) { + FC0.L->addBlockEntry(BB); + FC1.L->removeBlockFromLoop(BB); + if (LI.getLoopFor(BB) != FC1.L) + continue; + LI.changeLoopFor(BB, FC0.L); + } + while (!FC1.L->empty()) { + const auto &ChildLoopIt = FC1.L->begin(); + Loop *ChildLoop = *ChildLoopIt; + FC1.L->removeChildLoop(ChildLoopIt); + FC0.L->addChildLoop(ChildLoop); + } + + // Delete the now empty loop L1. + LI.erase(FC1.L); + +#ifndef NDEBUG + assert(!verifyFunction(*FC0.Header->getParent(), &errs())); + assert(DT.verify(DominatorTree::VerificationLevel::Fast)); + assert(PDT.verify()); + LI.verify(DT); + SE.verify(); +#endif LLVM_DEBUG(dbgs() << "Fusion done:\n"); @@ -1177,6 +1596,7 @@ struct LoopFuseLegacy : public FunctionPass { return LF.fuseLoops(F); } }; +} // namespace PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) { auto &LI = AM.getResult(F); diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index e561494f19c..dd477e80069 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -41,6 +41,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -77,16 +78,20 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/IR/Verifier.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -102,6 +107,7 @@ using namespace llvm; STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); +STATISTIC(NumBCmp, "Number of memcmp's formed from loop 2xload+eq-compare"); static cl::opt UseLIRCodeSizeHeurs( "use-lir-code-size-heurs", @@ -111,6 +117,26 @@ static cl::opt UseLIRCodeSizeHeurs( namespace { +// FIXME: reinventing the wheel much? Is there a cleaner solution? +struct PMAbstraction { + virtual void markLoopAsDeleted(Loop *L) = 0; + virtual ~PMAbstraction() = default; +}; +struct LegacyPMAbstraction : PMAbstraction { + LPPassManager &LPM; + LegacyPMAbstraction(LPPassManager &LPM) : LPM(LPM) {} + virtual ~LegacyPMAbstraction() = default; + void markLoopAsDeleted(Loop *L) override { LPM.markLoopAsDeleted(*L); } +}; +struct NewPMAbstraction : PMAbstraction { + LPMUpdater &Updater; + NewPMAbstraction(LPMUpdater &Updater) : Updater(Updater) {} + virtual ~NewPMAbstraction() = default; + void markLoopAsDeleted(Loop *L) override { + Updater.markLoopAsDeleted(*L, L->getName()); + } +}; + class LoopIdiomRecognize { Loop *CurLoop = nullptr; AliasAnalysis *AA; @@ -120,6 +146,7 @@ class LoopIdiomRecognize { TargetLibraryInfo *TLI; const TargetTransformInfo *TTI; const DataLayout *DL; + PMAbstraction &LoopDeleter; OptimizationRemarkEmitter &ORE; bool ApplyCodeSizeHeuristics; @@ -128,9 +155,10 @@ public: LoopInfo *LI, ScalarEvolution *SE, TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, - const DataLayout *DL, + const DataLayout *DL, PMAbstraction &LoopDeleter, OptimizationRemarkEmitter &ORE) - : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {} + : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), + LoopDeleter(LoopDeleter), ORE(ORE) {} bool runOnLoop(Loop *L); @@ -144,6 +172,8 @@ private: bool HasMemset; bool HasMemsetPattern; bool HasMemcpy; + bool HasMemCmp; + bool HasBCmp; /// Return code for isLegalStore() enum LegalStoreKind { @@ -186,6 +216,32 @@ private: bool runOnNoncountableLoop(); + struct CmpLoopStructure { + Value *BCmpValue, *LatchCmpValue; + BasicBlock *HeaderBrEqualBB, *HeaderBrUnequalBB; + BasicBlock *LatchBrFinishBB, *LatchBrContinueBB; + }; + bool matchBCmpLoopStructure(CmpLoopStructure &CmpLoop) const; + struct CmpOfLoads { + ICmpInst::Predicate BCmpPred; + Value *LoadSrcA, *LoadSrcB; + Value *LoadA, *LoadB; + }; + bool matchBCmpOfLoads(Value *BCmpValue, CmpOfLoads &CmpOfLoads) const; + bool recognizeBCmpLoopControlFlow(const CmpOfLoads &CmpOfLoads, + CmpLoopStructure &CmpLoop) const; + bool recognizeBCmpLoopSCEV(uint64_t BCmpTyBytes, CmpOfLoads &CmpOfLoads, + const SCEV *&SrcA, const SCEV *&SrcB, + const SCEV *&Iterations) const; + bool detectBCmpIdiom(ICmpInst *&BCmpInst, CmpInst *&LatchCmpInst, + LoadInst *&LoadA, LoadInst *&LoadB, const SCEV *&SrcA, + const SCEV *&SrcB, const SCEV *&NBytes) const; + BasicBlock *transformBCmpControlFlow(ICmpInst *ComparedEqual); + void transformLoopToBCmp(ICmpInst *BCmpInst, CmpInst *LatchCmpInst, + LoadInst *LoadA, LoadInst *LoadB, const SCEV *SrcA, + const SCEV *SrcB, const SCEV *NBytes); + bool recognizeBCmp(); + bool recognizePopcount(); void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst, PHINode *CntPhi, Value *Var); @@ -217,18 +273,20 @@ public: LoopInfo *LI = &getAnalysis().getLoopInfo(); ScalarEvolution *SE = &getAnalysis().getSE(); TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); + &getAnalysis().getTLI( + *L->getHeader()->getParent()); const TargetTransformInfo *TTI = &getAnalysis().getTTI( *L->getHeader()->getParent()); const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout(); + LegacyPMAbstraction LoopDeleter(LPM); // For the old PM, we can't use OptimizationRemarkEmitter as an analysis // pass. Function analyses need to be preserved across loop transformations // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); - LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, ORE); + LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, LoopDeleter, ORE); return LIR.runOnLoop(L); } @@ -247,7 +305,7 @@ char LoopIdiomRecognizeLegacyPass::ID = 0; PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, - LPMUpdater &) { + LPMUpdater &Updater) { const auto *DL = &L.getHeader()->getModule()->getDataLayout(); const auto &FAM = @@ -261,8 +319,9 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, "LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached " "at a higher level"); + NewPMAbstraction LoopDeleter(Updater); LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL, - *ORE); + LoopDeleter, *ORE); if (!LIR.runOnLoop(&L)) return PreservedAnalyses::all(); @@ -299,7 +358,8 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) { // Disable loop idiom recognition if the function's name is a common idiom. StringRef Name = L->getHeader()->getParent()->getName(); - if (Name == "memset" || Name == "memcpy") + if (Name == "memset" || Name == "memcpy" || Name == "memcmp" || + Name == "bcmp") return false; // Determine if code size heuristics need to be applied. @@ -309,8 +369,10 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) { HasMemset = TLI->has(LibFunc_memset); HasMemsetPattern = TLI->has(LibFunc_memset_pattern16); HasMemcpy = TLI->has(LibFunc_memcpy); + HasMemCmp = TLI->has(LibFunc_memcmp); + HasBCmp = TLI->has(LibFunc_bcmp); - if (HasMemset || HasMemsetPattern || HasMemcpy) + if (HasMemset || HasMemsetPattern || HasMemcpy || HasMemCmp || HasBCmp) if (SE->hasLoopInvariantBackedgeTakenCount(L)) return runOnCountableLoop(); @@ -961,7 +1023,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( GlobalValue::PrivateLinkage, PatternValue, ".memset_pattern"); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these. - GV->setAlignment(16); + GV->setAlignment(Align(16)); Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy); NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes}); } @@ -1149,7 +1211,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() { << "] Noncountable Loop %" << CurLoop->getHeader()->getName() << "\n"); - return recognizePopcount() || recognizeAndInsertFFS(); + return recognizeBCmp() || recognizePopcount() || recognizeAndInsertFFS(); } /// Check if the given conditional branch is based on the comparison between @@ -1823,3 +1885,811 @@ void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB, // loop. The loop would otherwise not be deleted even if it becomes empty. SE->forgetLoop(CurLoop); } + +bool LoopIdiomRecognize::matchBCmpLoopStructure( + CmpLoopStructure &CmpLoop) const { + ICmpInst::Predicate BCmpPred; + + // We are looking for the following basic layout: + // PreheaderBB: ; preds = ??? + // <...> + // br label %LoopHeaderBB + // LoopHeaderBB: ; preds = %PreheaderBB,%LoopLatchBB + // <...> + // %BCmpValue = icmp <...> + // br i1 %BCmpValue, label %LoopLatchBB, label %Successor0 + // LoopLatchBB: ; preds = %LoopHeaderBB + // <...> + // %LatchCmpValue = + // br i1 %LatchCmpValue, label %Successor1, label %LoopHeaderBB + // Successor0: ; preds = %LoopHeaderBB + // <...> + // Successor1: ; preds = %LoopLatchBB + // <...> + // + // Successor0 and Successor1 may or may not be the same basic block. + + // Match basic frame-work of this supposedly-comparison loop. + using namespace PatternMatch; + if (!match(CurLoop->getHeader()->getTerminator(), + m_Br(m_CombineAnd(m_ICmp(BCmpPred, m_Value(), m_Value()), + m_Value(CmpLoop.BCmpValue)), + CmpLoop.HeaderBrEqualBB, CmpLoop.HeaderBrUnequalBB)) || + !match(CurLoop->getLoopLatch()->getTerminator(), + m_Br(m_CombineAnd(m_Cmp(), m_Value(CmpLoop.LatchCmpValue)), + CmpLoop.LatchBrFinishBB, CmpLoop.LatchBrContinueBB))) { + LLVM_DEBUG(dbgs() << "Basic control-flow layout unrecognized.\n"); + return false; + } + LLVM_DEBUG(dbgs() << "Recognized basic control-flow layout.\n"); + return true; +} + +bool LoopIdiomRecognize::matchBCmpOfLoads(Value *BCmpValue, + CmpOfLoads &CmpOfLoads) const { + using namespace PatternMatch; + LLVM_DEBUG(dbgs() << "Analyzing header icmp " << *BCmpValue + << " as bcmp pattern.\n"); + + // Match bcmp-style loop header cmp. It must be an eq-icmp of loads. Example: + // %v0 = load <...>, <...>* %LoadSrcA + // %v1 = load <...>, <...>* %LoadSrcB + // %CmpLoop.BCmpValue = icmp eq <...> %v0, %v1 + // There won't be any no-op bitcasts between load and icmp, + // they would have been transformed into a load of bitcast. + // FIXME: {b,mem}cmp() calls have the same semantics as icmp. Match them too. + if (!match(BCmpValue, + m_ICmp(CmpOfLoads.BCmpPred, + m_CombineAnd(m_Load(m_Value(CmpOfLoads.LoadSrcA)), + m_Value(CmpOfLoads.LoadA)), + m_CombineAnd(m_Load(m_Value(CmpOfLoads.LoadSrcB)), + m_Value(CmpOfLoads.LoadB)))) || + !ICmpInst::isEquality(CmpOfLoads.BCmpPred)) { + LLVM_DEBUG(dbgs() << "Loop header icmp did not match bcmp pattern.\n"); + return false; + } + LLVM_DEBUG(dbgs() << "Recognized header icmp as bcmp pattern with loads:\n\t" + << *CmpOfLoads.LoadA << "\n\t" << *CmpOfLoads.LoadB + << "\n"); + // FIXME: handle memcmp pattern? + return true; +} + +bool LoopIdiomRecognize::recognizeBCmpLoopControlFlow( + const CmpOfLoads &CmpOfLoads, CmpLoopStructure &CmpLoop) const { + BasicBlock *LoopHeaderBB = CurLoop->getHeader(); + BasicBlock *LoopLatchBB = CurLoop->getLoopLatch(); + + // Be wary, comparisons can be inverted, canonicalize order. + // If this 'element' comparison passed, we expect to proceed to the next elt. + if (CmpOfLoads.BCmpPred != ICmpInst::Predicate::ICMP_EQ) + std::swap(CmpLoop.HeaderBrEqualBB, CmpLoop.HeaderBrUnequalBB); + // The predicate on loop latch does not matter, just canonicalize some order. + if (CmpLoop.LatchBrContinueBB != LoopHeaderBB) + std::swap(CmpLoop.LatchBrFinishBB, CmpLoop.LatchBrContinueBB); + + SmallVector ExitBlocks; + + CurLoop->getUniqueExitBlocks(ExitBlocks); + assert(ExitBlocks.size() <= 2U && "Can't have more than two exit blocks."); + + // Check that control-flow between blocks is as expected. + if (CmpLoop.HeaderBrEqualBB != LoopLatchBB || + CmpLoop.LatchBrContinueBB != LoopHeaderBB || + !is_contained(ExitBlocks, CmpLoop.HeaderBrUnequalBB) || + !is_contained(ExitBlocks, CmpLoop.LatchBrFinishBB)) { + LLVM_DEBUG(dbgs() << "Loop control-flow not recognized.\n"); + return false; + } + + assert(!is_contained(ExitBlocks, CmpLoop.HeaderBrEqualBB) && + !is_contained(ExitBlocks, CmpLoop.LatchBrContinueBB) && + "Unexpected exit edges."); + + LLVM_DEBUG(dbgs() << "Recognized loop control-flow.\n"); + + LLVM_DEBUG(dbgs() << "Performing side-effect analysis on the loop.\n"); + assert(CurLoop->isLCSSAForm(*DT) && "Should only get LCSSA-form loops here."); + // No loop instructions must be used outside of the loop. Since we are in + // LCSSA form, we only need to check successor block's PHI nodes's incoming + // values for incoming blocks that are the loop basic blocks. + for (const BasicBlock *ExitBB : ExitBlocks) { + for (const PHINode &PHI : ExitBB->phis()) { + for (const BasicBlock *LoopBB : + make_filter_range(PHI.blocks(), [this](BasicBlock *PredecessorBB) { + return CurLoop->contains(PredecessorBB); + })) { + const auto *I = + dyn_cast(PHI.getIncomingValueForBlock(LoopBB)); + if (I && CurLoop->contains(I)) { + LLVM_DEBUG(dbgs() + << "Loop contains instruction " << *I + << " which is used outside of the loop in basic block " + << ExitBB->getName() << " in phi node " << PHI << "\n"); + return false; + } + } + } + } + // Similarly, the loop should not have any other observable side-effects + // other than the final comparison result. + for (BasicBlock *LoopBB : CurLoop->blocks()) { + for (Instruction &I : *LoopBB) { + if (isa(I)) // Ignore dbginfo. + continue; // FIXME: anything else? lifetime info? + if ((I.mayHaveSideEffects() || I.isAtomic() || I.isFenceLike()) && + &I != CmpOfLoads.LoadA && &I != CmpOfLoads.LoadB) { + LLVM_DEBUG( + dbgs() << "Loop contains instruction with potential side-effects: " + << I << "\n"); + return false; + } + } + } + LLVM_DEBUG(dbgs() << "No loop instructions deemed to have side-effects.\n"); + return true; +} + +bool LoopIdiomRecognize::recognizeBCmpLoopSCEV(uint64_t BCmpTyBytes, + CmpOfLoads &CmpOfLoads, + const SCEV *&SrcA, + const SCEV *&SrcB, + const SCEV *&Iterations) const { + // Try to compute SCEV of the loads, for this loop's scope. + const auto *ScevForSrcA = dyn_cast( + SE->getSCEVAtScope(CmpOfLoads.LoadSrcA, CurLoop)); + const auto *ScevForSrcB = dyn_cast( + SE->getSCEVAtScope(CmpOfLoads.LoadSrcB, CurLoop)); + if (!ScevForSrcA || !ScevForSrcB) { + LLVM_DEBUG(dbgs() << "Failed to get SCEV expressions for load sources.\n"); + return false; + } + + LLVM_DEBUG(dbgs() << "Got SCEV expressions (at loop scope) for loads:\n\t" + << *ScevForSrcA << "\n\t" << *ScevForSrcB << "\n"); + + // Loads must have folloving SCEV exprs: {%ptr,+,BCmpTyBytes}<%LoopHeaderBB> + const SCEV *RecStepForA = ScevForSrcA->getStepRecurrence(*SE); + const SCEV *RecStepForB = ScevForSrcB->getStepRecurrence(*SE); + if (!ScevForSrcA->isAffine() || !ScevForSrcB->isAffine() || + ScevForSrcA->getLoop() != CurLoop || ScevForSrcB->getLoop() != CurLoop || + RecStepForA != RecStepForB || !isa(RecStepForA) || + cast(RecStepForA)->getAPInt() != BCmpTyBytes) { + LLVM_DEBUG(dbgs() << "Unsupported SCEV expressions for loads. Only support " + "affine SCEV expressions originating in the loop we " + "are analysing with identical constant positive step, " + "equal to the count of bytes compared. Got:\n\t" + << *RecStepForA << "\n\t" << *RecStepForB << "\n"); + return false; + // FIXME: can support BCmpTyBytes > Step. + // But will need to account for the extra bytes compared at the end. + } + + SrcA = ScevForSrcA->getStart(); + SrcB = ScevForSrcB->getStart(); + LLVM_DEBUG(dbgs() << "Got SCEV expressions for load sources:\n\t" << *SrcA + << "\n\t" << *SrcB << "\n"); + + // The load sources must be loop-invants that dominate the loop header. + if (SrcA == SE->getCouldNotCompute() || SrcB == SE->getCouldNotCompute() || + !SE->isAvailableAtLoopEntry(SrcA, CurLoop) || + !SE->isAvailableAtLoopEntry(SrcB, CurLoop)) { + LLVM_DEBUG(dbgs() << "Unsupported SCEV expressions for loads, unavaliable " + "prior to loop header.\n"); + return false; + } + + LLVM_DEBUG(dbgs() << "SCEV expressions for loads are acceptable.\n"); + + // bcmp / memcmp take length argument as size_t, so let's conservatively + // assume that the iteration count should be not wider than that. + Type *CmpFuncSizeTy = DL->getIntPtrType(SE->getContext()); + + // For how many iterations is loop guaranteed not to exit via LoopLatch? + // This is one less than the maximal number of comparisons,and is: n + -1 + const SCEV *LoopExitCount = + SE->getExitCount(CurLoop, CurLoop->getLoopLatch()); + LLVM_DEBUG(dbgs() << "Got SCEV expression for loop latch exit count: " + << *LoopExitCount << "\n"); + // Exit count, similarly, must be loop-invant that dominates the loop header. + if (LoopExitCount == SE->getCouldNotCompute() || + !LoopExitCount->getType()->isIntOrPtrTy() || + LoopExitCount->getType()->getScalarSizeInBits() > + CmpFuncSizeTy->getScalarSizeInBits() || + !SE->isAvailableAtLoopEntry(LoopExitCount, CurLoop)) { + LLVM_DEBUG(dbgs() << "Unsupported SCEV expression for loop latch exit.\n"); + return false; + } + + // LoopExitCount is always one less than the actual count of iterations. + // Do this before cast, else we will be stuck with 1 + zext(-1 + n) + Iterations = SE->getAddExpr( + LoopExitCount, SE->getOne(LoopExitCount->getType()), SCEV::FlagNUW); + assert(Iterations != SE->getCouldNotCompute() && + "Shouldn't fail to increment by one."); + + LLVM_DEBUG(dbgs() << "Computed iteration count: " << *Iterations << "\n"); + return true; +} + +/// Return true iff the bcmp idiom is detected in the loop. +/// +/// Additionally: +/// 1) \p BCmpInst is set to the root byte-comparison instruction. +/// 2) \p LatchCmpInst is set to the comparison that controls the latch. +/// 3) \p LoadA is set to the first LoadInst. +/// 4) \p LoadB is set to the second LoadInst. +/// 5) \p SrcA is set to the first source location that is being compared. +/// 6) \p SrcB is set to the second source location that is being compared. +/// 7) \p NBytes is set to the number of bytes to compare. +bool LoopIdiomRecognize::detectBCmpIdiom(ICmpInst *&BCmpInst, + CmpInst *&LatchCmpInst, + LoadInst *&LoadA, LoadInst *&LoadB, + const SCEV *&SrcA, const SCEV *&SrcB, + const SCEV *&NBytes) const { + LLVM_DEBUG(dbgs() << "Recognizing bcmp idiom\n"); + + // Give up if the loop is not in normal form, or has more than 2 blocks. + if (!CurLoop->isLoopSimplifyForm() || CurLoop->getNumBlocks() > 2) { + LLVM_DEBUG(dbgs() << "Basic loop structure unrecognized.\n"); + return false; + } + LLVM_DEBUG(dbgs() << "Recognized basic loop structure.\n"); + + CmpLoopStructure CmpLoop; + if (!matchBCmpLoopStructure(CmpLoop)) + return false; + + CmpOfLoads CmpOfLoads; + if (!matchBCmpOfLoads(CmpLoop.BCmpValue, CmpOfLoads)) + return false; + + if (!recognizeBCmpLoopControlFlow(CmpOfLoads, CmpLoop)) + return false; + + BCmpInst = cast(CmpLoop.BCmpValue); // FIXME: is there no + LatchCmpInst = cast(CmpLoop.LatchCmpValue); // way to combine + LoadA = cast(CmpOfLoads.LoadA); // these cast with + LoadB = cast(CmpOfLoads.LoadB); // m_Value() matcher? + + Type *BCmpValTy = BCmpInst->getOperand(0)->getType(); + LLVMContext &Context = BCmpValTy->getContext(); + uint64_t BCmpTyBits = DL->getTypeSizeInBits(BCmpValTy); + static constexpr uint64_t ByteTyBits = 8; + + LLVM_DEBUG(dbgs() << "Got comparison between values of type " << *BCmpValTy + << " of size " << BCmpTyBits + << " bits (while byte = " << ByteTyBits << " bits).\n"); + // bcmp()/memcmp() minimal unit of work is a byte. Therefore we must check + // that we are dealing with a multiple of a byte here. + if (BCmpTyBits % ByteTyBits != 0) { + LLVM_DEBUG(dbgs() << "Value size is not a multiple of byte.\n"); + return false; + // FIXME: could still be done under a run-time check that the total bit + // count is a multiple of a byte i guess? Or handle remainder separately? + } + + // Each comparison is done on this many bytes. + uint64_t BCmpTyBytes = BCmpTyBits / ByteTyBits; + LLVM_DEBUG(dbgs() << "Size is exactly " << BCmpTyBytes + << " bytes, eligible for bcmp conversion.\n"); + + const SCEV *Iterations; + if (!recognizeBCmpLoopSCEV(BCmpTyBytes, CmpOfLoads, SrcA, SrcB, Iterations)) + return false; + + // bcmp / memcmp take length argument as size_t, do promotion now. + Type *CmpFuncSizeTy = DL->getIntPtrType(Context); + Iterations = SE->getNoopOrZeroExtend(Iterations, CmpFuncSizeTy); + assert(Iterations != SE->getCouldNotCompute() && "Promotion failed."); + // Note that it didn't do ptrtoint cast, we will need to do it manually. + + // We will be comparing *bytes*, not BCmpTy, we need to recalculate size. + // It's a multiplication, and it *could* overflow. But for it to overflow + // we'd want to compare more bytes than could be represented by size_t, But + // allocation functions also take size_t. So how'd you produce such buffer? + // FIXME: we likely need to actually check that we know this won't overflow, + // via llvm::computeOverflowForUnsignedMul(). + NBytes = SE->getMulExpr( + Iterations, SE->getConstant(CmpFuncSizeTy, BCmpTyBytes), SCEV::FlagNUW); + assert(NBytes != SE->getCouldNotCompute() && + "Shouldn't fail to increment by one."); + + LLVM_DEBUG(dbgs() << "Computed total byte count: " << *NBytes << "\n"); + + if (LoadA->getPointerAddressSpace() != LoadB->getPointerAddressSpace() || + LoadA->getPointerAddressSpace() != 0 || !LoadA->isSimple() || + !LoadB->isSimple()) { + StringLiteral L("Unsupported loads in idiom - only support identical, " + "simple loads from address space 0.\n"); + LLVM_DEBUG(dbgs() << L); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "BCmpIdiomUnsupportedLoads", + BCmpInst->getDebugLoc(), + CurLoop->getHeader()) + << L; + }); + return false; // FIXME: support non-simple loads. + } + + LLVM_DEBUG(dbgs() << "Recognized bcmp idiom\n"); + ORE.emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "RecognizedBCmpIdiom", + CurLoop->getStartLoc(), + CurLoop->getHeader()) + << "Loop recognized as a bcmp idiom"; + }); + + return true; +} + +BasicBlock * +LoopIdiomRecognize::transformBCmpControlFlow(ICmpInst *ComparedEqual) { + LLVM_DEBUG(dbgs() << "Transforming control-flow.\n"); + SmallVector DTUpdates; + + BasicBlock *PreheaderBB = CurLoop->getLoopPreheader(); + BasicBlock *HeaderBB = CurLoop->getHeader(); + BasicBlock *LoopLatchBB = CurLoop->getLoopLatch(); + SmallString<32> LoopName = CurLoop->getName(); + Function *Func = PreheaderBB->getParent(); + LLVMContext &Context = Func->getContext(); + + // Before doing anything, drop SCEV info. + SE->forgetLoop(CurLoop); + + // Here we start with: (0/6) + // PreheaderBB: ; preds = ??? + // <...> + // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes) + // %ComparedEqual = icmp eq <...> %memcmp, 0 + // br label %LoopHeaderBB + // LoopHeaderBB: ; preds = %PreheaderBB,%LoopLatchBB + // <...> + // br i1 %<...>, label %LoopLatchBB, label %Successor0BB + // LoopLatchBB: ; preds = %LoopHeaderBB + // <...> + // br i1 %<...>, label %Successor1BB, label %LoopHeaderBB + // Successor0BB: ; preds = %LoopHeaderBB + // %S0PHI = phi <...> [ <...>, %LoopHeaderBB ] + // <...> + // Successor1BB: ; preds = %LoopLatchBB + // %S1PHI = phi <...> [ <...>, %LoopLatchBB ] + // <...> + // + // Successor0 and Successor1 may or may not be the same basic block. + + // Decouple the edge between loop preheader basic block and loop header basic + // block. Thus the loop has become unreachable. + assert(cast(PreheaderBB->getTerminator())->isUnconditional() && + PreheaderBB->getTerminator()->getSuccessor(0) == HeaderBB && + "Preheader bb must end with an unconditional branch to header bb."); + PreheaderBB->getTerminator()->eraseFromParent(); + DTUpdates.push_back({DominatorTree::Delete, PreheaderBB, HeaderBB}); + + // Create a new preheader basic block before loop header basic block. + auto *PhonyPreheaderBB = BasicBlock::Create( + Context, LoopName + ".phonypreheaderbb", Func, HeaderBB); + // And insert an unconditional branch from phony preheader basic block to + // loop header basic block. + IRBuilder<>(PhonyPreheaderBB).CreateBr(HeaderBB); + DTUpdates.push_back({DominatorTree::Insert, PhonyPreheaderBB, HeaderBB}); + + // Create a *single* new empty block that we will substitute as a + // successor basic block for the loop's exits. This one is temporary. + // Much like phony preheader basic block, it is not connected. + auto *PhonySuccessorBB = + BasicBlock::Create(Context, LoopName + ".phonysuccessorbb", Func, + LoopLatchBB->getNextNode()); + // That block must have *some* non-PHI instruction, or else deleteDeadLoop() + // will mess up cleanup of dbginfo, and verifier will complain. + IRBuilder<>(PhonySuccessorBB).CreateUnreachable(); + + // Create two new empty blocks that we will use to preserve the original + // loop exit control-flow, and preserve the incoming values in the PHI nodes + // in loop's successor exit blocks. These will live one. + auto *ComparedUnequalBB = + BasicBlock::Create(Context, ComparedEqual->getName() + ".unequalbb", Func, + PhonySuccessorBB->getNextNode()); + auto *ComparedEqualBB = + BasicBlock::Create(Context, ComparedEqual->getName() + ".equalbb", Func, + PhonySuccessorBB->getNextNode()); + + // By now we have: (1/6) + // PreheaderBB: ; preds = ??? + // <...> + // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes) + // %ComparedEqual = icmp eq <...> %memcmp, 0 + // [no terminator instruction!] + // PhonyPreheaderBB: ; No preds, UNREACHABLE! + // br label %LoopHeaderBB + // LoopHeaderBB: ; preds = %PhonyPreheaderBB, %LoopLatchBB + // <...> + // br i1 %<...>, label %LoopLatchBB, label %Successor0BB + // LoopLatchBB: ; preds = %LoopHeaderBB + // <...> + // br i1 %<...>, label %Successor1BB, label %LoopHeaderBB + // PhonySuccessorBB: ; No preds, UNREACHABLE! + // unreachable + // EqualBB: ; No preds, UNREACHABLE! + // [no terminator instruction!] + // UnequalBB: ; No preds, UNREACHABLE! + // [no terminator instruction!] + // Successor0BB: ; preds = %LoopHeaderBB + // %S0PHI = phi <...> [ <...>, %LoopHeaderBB ] + // <...> + // Successor1BB: ; preds = %LoopLatchBB + // %S1PHI = phi <...> [ <...>, %LoopLatchBB ] + // <...> + + // What is the mapping/replacement basic block for exiting out of the loop + // from either of old's loop basic blocks? + auto GetReplacementBB = [this, ComparedEqualBB, + ComparedUnequalBB](const BasicBlock *OldBB) { + assert(CurLoop->contains(OldBB) && "Only for loop's basic blocks."); + if (OldBB == CurLoop->getLoopLatch()) // "all elements compared equal". + return ComparedEqualBB; + if (OldBB == CurLoop->getHeader()) // "element compared unequal". + return ComparedUnequalBB; + llvm_unreachable("Only had two basic blocks in loop."); + }; + + // What are the exits out of this loop? + SmallVector LoopExitEdges; + CurLoop->getExitEdges(LoopExitEdges); + assert(LoopExitEdges.size() == 2 && "Should have only to two exit edges."); + + // Populate new basic blocks, update the exiting control-flow, PHI nodes. + for (const Loop::Edge &Edge : LoopExitEdges) { + auto *OldLoopBB = const_cast(Edge.first); + auto *SuccessorBB = const_cast(Edge.second); + assert(CurLoop->contains(OldLoopBB) && !CurLoop->contains(SuccessorBB) && + "Unexpected edge."); + + // If we would exit the loop from this loop's basic block, + // what semantically would that mean? Did comparison succeed or fail? + BasicBlock *NewBB = GetReplacementBB(OldLoopBB); + assert(NewBB->empty() && "Should not get same new basic block here twice."); + IRBuilder<> Builder(NewBB); + Builder.SetCurrentDebugLocation(OldLoopBB->getTerminator()->getDebugLoc()); + Builder.CreateBr(SuccessorBB); + DTUpdates.push_back({DominatorTree::Insert, NewBB, SuccessorBB}); + // Also, be *REALLY* careful with PHI nodes in successor basic block, + // update them to recieve the same input value, but not from current loop's + // basic block, but from new basic block instead. + SuccessorBB->replacePhiUsesWith(OldLoopBB, NewBB); + // Also, change loop control-flow. This loop's basic block shall no longer + // exit from the loop to it's original successor basic block, but to our new + // phony successor basic block. Note that new successor will be unique exit. + OldLoopBB->getTerminator()->replaceSuccessorWith(SuccessorBB, + PhonySuccessorBB); + DTUpdates.push_back({DominatorTree::Delete, OldLoopBB, SuccessorBB}); + DTUpdates.push_back({DominatorTree::Insert, OldLoopBB, PhonySuccessorBB}); + } + + // Inform DomTree about edge changes. Note that LoopInfo is still out-of-date. + assert(DTUpdates.size() == 8 && "Update count prediction failed."); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + DTU.applyUpdates(DTUpdates); + DTUpdates.clear(); + + // By now we have: (2/6) + // PreheaderBB: ; preds = ??? + // <...> + // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes) + // %ComparedEqual = icmp eq <...> %memcmp, 0 + // [no terminator instruction!] + // PhonyPreheaderBB: ; No preds, UNREACHABLE! + // br label %LoopHeaderBB + // LoopHeaderBB: ; preds = %PhonyPreheaderBB, %LoopLatchBB + // <...> + // br i1 %<...>, label %LoopLatchBB, label %PhonySuccessorBB + // LoopLatchBB: ; preds = %LoopHeaderBB + // <...> + // br i1 %<...>, label %PhonySuccessorBB, label %LoopHeaderBB + // PhonySuccessorBB: ; preds = %LoopHeaderBB, %LoopLatchBB + // unreachable + // EqualBB: ; No preds, UNREACHABLE! + // br label %Successor1BB + // UnequalBB: ; No preds, UNREACHABLE! + // br label %Successor0BB + // Successor0BB: ; preds = %UnequalBB + // %S0PHI = phi <...> [ <...>, %UnequalBB ] + // <...> + // Successor1BB: ; preds = %EqualBB + // %S0PHI = phi <...> [ <...>, %EqualBB ] + // <...> + + // *Finally*, zap the original loop. Record it's parent loop though. + Loop *ParentLoop = CurLoop->getParentLoop(); + LLVM_DEBUG(dbgs() << "Deleting old loop.\n"); + LoopDeleter.markLoopAsDeleted(CurLoop); // Mark as deleted *BEFORE* deleting! + deleteDeadLoop(CurLoop, DT, SE, LI); // And actually delete the loop. + CurLoop = nullptr; + + // By now we have: (3/6) + // PreheaderBB: ; preds = ??? + // <...> + // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes) + // %ComparedEqual = icmp eq <...> %memcmp, 0 + // [no terminator instruction!] + // PhonyPreheaderBB: ; No preds, UNREACHABLE! + // br label %PhonySuccessorBB + // PhonySuccessorBB: ; preds = %PhonyPreheaderBB + // unreachable + // EqualBB: ; No preds, UNREACHABLE! + // br label %Successor1BB + // UnequalBB: ; No preds, UNREACHABLE! + // br label %Successor0BB + // Successor0BB: ; preds = %UnequalBB + // %S0PHI = phi <...> [ <...>, %UnequalBB ] + // <...> + // Successor1BB: ; preds = %EqualBB + // %S0PHI = phi <...> [ <...>, %EqualBB ] + // <...> + + // Now, actually restore the CFG. + + // Insert an unconditional branch from an actual preheader basic block to + // phony preheader basic block. + IRBuilder<>(PreheaderBB).CreateBr(PhonyPreheaderBB); + DTUpdates.push_back({DominatorTree::Insert, PhonyPreheaderBB, HeaderBB}); + // Insert proper conditional branch from phony successor basic block to the + // "dispatch" basic blocks, which were used to preserve incoming values in + // original loop's successor basic blocks. + assert(isa(PhonySuccessorBB->getTerminator()) && + "Yep, that's the one we created to keep deleteDeadLoop() happy."); + PhonySuccessorBB->getTerminator()->eraseFromParent(); + { + IRBuilder<> Builder(PhonySuccessorBB); + Builder.SetCurrentDebugLocation(ComparedEqual->getDebugLoc()); + Builder.CreateCondBr(ComparedEqual, ComparedEqualBB, ComparedUnequalBB); + } + DTUpdates.push_back( + {DominatorTree::Insert, PhonySuccessorBB, ComparedEqualBB}); + DTUpdates.push_back( + {DominatorTree::Insert, PhonySuccessorBB, ComparedUnequalBB}); + + BasicBlock *DispatchBB = PhonySuccessorBB; + DispatchBB->setName(LoopName + ".bcmpdispatchbb"); + + assert(DTUpdates.size() == 3 && "Update count prediction failed."); + DTU.applyUpdates(DTUpdates); + DTUpdates.clear(); + + // By now we have: (4/6) + // PreheaderBB: ; preds = ??? + // <...> + // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes) + // %ComparedEqual = icmp eq <...> %memcmp, 0 + // br label %PhonyPreheaderBB + // PhonyPreheaderBB: ; preds = %PreheaderBB + // br label %DispatchBB + // DispatchBB: ; preds = %PhonyPreheaderBB + // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB + // EqualBB: ; preds = %DispatchBB + // br label %Successor1BB + // UnequalBB: ; preds = %DispatchBB + // br label %Successor0BB + // Successor0BB: ; preds = %UnequalBB + // %S0PHI = phi <...> [ <...>, %UnequalBB ] + // <...> + // Successor1BB: ; preds = %EqualBB + // %S0PHI = phi <...> [ <...>, %EqualBB ] + // <...> + + // The basic CFG has been restored! Now let's merge redundant basic blocks. + + // Merge phony successor basic block into it's only predecessor, + // phony preheader basic block. It is fully pointlessly redundant. + MergeBasicBlockIntoOnlyPred(DispatchBB, &DTU); + + // By now we have: (5/6) + // PreheaderBB: ; preds = ??? + // <...> + // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes) + // %ComparedEqual = icmp eq <...> %memcmp, 0 + // br label %DispatchBB + // DispatchBB: ; preds = %PreheaderBB + // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB + // EqualBB: ; preds = %DispatchBB + // br label %Successor1BB + // UnequalBB: ; preds = %DispatchBB + // br label %Successor0BB + // Successor0BB: ; preds = %UnequalBB + // %S0PHI = phi <...> [ <...>, %UnequalBB ] + // <...> + // Successor1BB: ; preds = %EqualBB + // %S0PHI = phi <...> [ <...>, %EqualBB ] + // <...> + + // Was this loop nested? + if (!ParentLoop) { + // If the loop was *NOT* nested, then let's also merge phony successor + // basic block into it's only predecessor, preheader basic block. + // Also, here we need to update LoopInfo. + LI->removeBlock(PreheaderBB); + MergeBasicBlockIntoOnlyPred(DispatchBB, &DTU); + + // By now we have: (6/6) + // DispatchBB: ; preds = ??? + // <...> + // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes) + // %ComparedEqual = icmp eq <...> %memcmp, 0 + // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB + // EqualBB: ; preds = %DispatchBB + // br label %Successor1BB + // UnequalBB: ; preds = %DispatchBB + // br label %Successor0BB + // Successor0BB: ; preds = %UnequalBB + // %S0PHI = phi <...> [ <...>, %UnequalBB ] + // <...> + // Successor1BB: ; preds = %EqualBB + // %S0PHI = phi <...> [ <...>, %EqualBB ] + // <...> + + return DispatchBB; + } + + // Otherwise, we need to "preserve" the LoopSimplify form of the deleted loop. + // To achieve that, we shall keep the preheader basic block (mainly so that + // the loop header block will be guaranteed to have a predecessor outside of + // the loop), and create a phony loop with all these new three basic blocks. + Loop *PhonyLoop = LI->AllocateLoop(); + ParentLoop->addChildLoop(PhonyLoop); + PhonyLoop->addBasicBlockToLoop(DispatchBB, *LI); + PhonyLoop->addBasicBlockToLoop(ComparedEqualBB, *LI); + PhonyLoop->addBasicBlockToLoop(ComparedUnequalBB, *LI); + + // But we only have a preheader basic block, a header basic block block and + // two exiting basic blocks. For a proper loop we also need a backedge from + // non-header basic block to header bb. + // Let's just add a never-taken branch from both of the exiting basic blocks. + for (BasicBlock *BB : {ComparedEqualBB, ComparedUnequalBB}) { + BranchInst *OldTerminator = cast(BB->getTerminator()); + assert(OldTerminator->isUnconditional() && "That's the one we created."); + BasicBlock *SuccessorBB = OldTerminator->getSuccessor(0); + + IRBuilder<> Builder(OldTerminator); + Builder.SetCurrentDebugLocation(OldTerminator->getDebugLoc()); + Builder.CreateCondBr(ConstantInt::getTrue(Context), SuccessorBB, + DispatchBB); + OldTerminator->eraseFromParent(); + // Yes, the backedge will never be taken. The control-flow is redundant. + // If it can be simplified further, other passes will take care. + DTUpdates.push_back({DominatorTree::Delete, BB, SuccessorBB}); + DTUpdates.push_back({DominatorTree::Insert, BB, SuccessorBB}); + DTUpdates.push_back({DominatorTree::Insert, BB, DispatchBB}); + } + assert(DTUpdates.size() == 6 && "Update count prediction failed."); + DTU.applyUpdates(DTUpdates); + DTUpdates.clear(); + + // By now we have: (6/6) + // PreheaderBB: ; preds = ??? + // <...> + // %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes) + // %ComparedEqual = icmp eq <...> %memcmp, 0 + // br label %BCmpDispatchBB + // BCmpDispatchBB:

; preds = %PreheaderBB + // br i1 %ComparedEqual, label %EqualBB, label %UnequalBB + // EqualBB: ; preds = %BCmpDispatchBB + // br i1 %true, label %Successor1BB, label %BCmpDispatchBB + // UnequalBB: ; preds = %BCmpDispatchBB + // br i1 %true, label %Successor0BB, label %BCmpDispatchBB + // Successor0BB: ; preds = %UnequalBB + // %S0PHI = phi <...> [ <...>, %UnequalBB ] + // <...> + // Successor1BB: ; preds = %EqualBB + // %S0PHI = phi <...> [ <...>, %EqualBB ] + // <...> + + // Finally fully DONE! + return DispatchBB; +} + +void LoopIdiomRecognize::transformLoopToBCmp(ICmpInst *BCmpInst, + CmpInst *LatchCmpInst, + LoadInst *LoadA, LoadInst *LoadB, + const SCEV *SrcA, const SCEV *SrcB, + const SCEV *NBytes) { + // We will be inserting before the terminator instruction of preheader block. + IRBuilder<> Builder(CurLoop->getLoopPreheader()->getTerminator()); + + LLVM_DEBUG(dbgs() << "Transforming bcmp loop idiom into a call.\n"); + LLVM_DEBUG(dbgs() << "Emitting new instructions.\n"); + + // Expand the SCEV expressions for both sources to compare, and produce value + // for the byte len (beware of Iterations potentially being a pointer, and + // account for element size being BCmpTyBytes bytes, which may be not 1 byte) + Value *PtrA, *PtrB, *Len; + { + SCEVExpander SExp(*SE, *DL, "LoopToBCmp"); + SExp.setInsertPoint(&*Builder.GetInsertPoint()); + + auto HandlePtr = [&SExp](LoadInst *Load, const SCEV *Src) { + SExp.SetCurrentDebugLocation(DebugLoc()); + // If the pointer operand of original load had dbgloc - use it. + if (const auto *I = dyn_cast(Load->getPointerOperand())) + SExp.SetCurrentDebugLocation(I->getDebugLoc()); + return SExp.expandCodeFor(Src); + }; + PtrA = HandlePtr(LoadA, SrcA); + PtrB = HandlePtr(LoadB, SrcB); + + // For len calculation let's use dbgloc for the loop's latch condition. + Builder.SetCurrentDebugLocation(LatchCmpInst->getDebugLoc()); + SExp.SetCurrentDebugLocation(LatchCmpInst->getDebugLoc()); + Len = SExp.expandCodeFor(NBytes); + + Type *CmpFuncSizeTy = DL->getIntPtrType(Builder.getContext()); + assert(SE->getTypeSizeInBits(Len->getType()) == + DL->getTypeSizeInBits(CmpFuncSizeTy) && + "Len should already have the correct size."); + + // Make sure that iteration count is a number, insert ptrtoint cast if not. + if (Len->getType()->isPointerTy()) + Len = Builder.CreatePtrToInt(Len, CmpFuncSizeTy); + assert(Len->getType() == CmpFuncSizeTy && "Should have correct type now."); + + Len->setName(Len->getName() + ".bytecount"); + + // There is no legality check needed. We want to compare that the memory + // regions [PtrA, PtrA+Len) and [PtrB, PtrB+Len) are fully identical, equal. + // For them to be fully equal, they must match bit-by-bit. And likewise, + // for them to *NOT* be fully equal, they have to differ just by one bit. + // The step of comparison (bits compared at once) simply does not matter. + } + + // For the rest of new instructions, dbgloc should point at the value cmp. + Builder.SetCurrentDebugLocation(BCmpInst->getDebugLoc()); + + // Emit the comparison itself. + auto *CmpCall = + cast(HasBCmp ? emitBCmp(PtrA, PtrB, Len, Builder, *DL, TLI) + : emitMemCmp(PtrA, PtrB, Len, Builder, *DL, TLI)); + // FIXME: add {B,Mem}CmpInst with MemoryCompareInst + // (based on MemIntrinsicBase) as base? + // FIXME: propagate metadata from loads? (alignments, AS, TBAA, ...) + + // {b,mem}cmp returned 0 if they were equal, or non-zero if not equal. + auto *ComparedEqual = cast(Builder.CreateICmpEQ( + CmpCall, ConstantInt::get(CmpCall->getType(), 0), + PtrA->getName() + ".vs." + PtrB->getName() + ".eqcmp")); + + BasicBlock *BB = transformBCmpControlFlow(ComparedEqual); + Builder.ClearInsertionPoint(); + + // We're done. + LLVM_DEBUG(dbgs() << "Transformed loop bcmp idiom into a call.\n"); + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "TransformedBCmpIdiomToCall", + CmpCall->getDebugLoc(), BB) + << "Transformed bcmp idiom into a call to " + << ore::NV("NewFunction", CmpCall->getCalledFunction()) + << "() function"; + }); + ++NumBCmp; +} + +/// Recognizes a bcmp idiom in a non-countable loop. +/// +/// If detected, transforms the relevant code to issue the bcmp (or memcmp) +/// intrinsic function call, and returns true; otherwise, returns false. +bool LoopIdiomRecognize::recognizeBCmp() { + if (!HasMemCmp && !HasBCmp) + return false; + + ICmpInst *BCmpInst; + CmpInst *LatchCmpInst; + LoadInst *LoadA, *LoadB; + const SCEV *SrcA, *SrcB, *NBytes; + if (!detectBCmpIdiom(BCmpInst, LatchCmpInst, LoadA, LoadB, SrcA, SrcB, + NBytes)) { + LLVM_DEBUG(dbgs() << "bcmp idiom recognition failed.\n"); + return false; + } + + transformLoopToBCmp(BCmpInst, LatchCmpInst, LoadA, LoadB, SrcA, SrcB, NBytes); + return true; +} diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 31191b52895..368b9d4e8df 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -192,7 +192,8 @@ public: getAnalysis().getAssumptionCache( *L->getHeader()->getParent()); const TargetLibraryInfo &TLI = - getAnalysis().getTLI(); + getAnalysis().getTLI( + *L->getHeader()->getParent()); MemorySSA *MSSA = nullptr; Optional MSSAU; if (EnableMSSALoopDependency) { @@ -233,7 +234,7 @@ PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, auto PA = getLoopPassPreservedAnalyses(); PA.preserveSet(); - if (EnableMSSALoopDependency) + if (AR.MSSA) PA.preserve(); return PA; } diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp index 9a42365adc1..1af4b21b432 100644 --- a/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/lib/Transforms/Scalar/LoopInterchange.cpp @@ -410,8 +410,6 @@ public: void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop); private: - void splitInnerLoopLatch(Instruction *); - void splitInnerLoopHeader(); bool adjustLoopLinks(); void adjustLoopPreheaders(); bool adjustLoopBranches(); @@ -1226,7 +1224,7 @@ bool LoopInterchangeTransform::transform() { if (InnerLoop->getSubLoops().empty()) { BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); - LLVM_DEBUG(dbgs() << "Calling Split Inner Loop\n"); + LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n"); PHINode *InductionPHI = getInductionVariable(InnerLoop, SE); if (!InductionPHI) { LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n"); @@ -1242,11 +1240,55 @@ bool LoopInterchangeTransform::transform() { if (&InductionPHI->getParent()->front() != InductionPHI) InductionPHI->moveBefore(&InductionPHI->getParent()->front()); - // Split at the place were the induction variable is - // incremented/decremented. - // TODO: This splitting logic may not work always. Fix this. - splitInnerLoopLatch(InnerIndexVar); - LLVM_DEBUG(dbgs() << "splitInnerLoopLatch done\n"); + // Create a new latch block for the inner loop. We split at the + // current latch's terminator and then move the condition and all + // operands that are not either loop-invariant or the induction PHI into the + // new latch block. + BasicBlock *NewLatch = + SplitBlock(InnerLoop->getLoopLatch(), + InnerLoop->getLoopLatch()->getTerminator(), DT, LI); + + SmallSetVector WorkList; + unsigned i = 0; + auto MoveInstructions = [&i, &WorkList, this, InductionPHI, NewLatch]() { + for (; i < WorkList.size(); i++) { + // Duplicate instruction and move it the new latch. Update uses that + // have been moved. + Instruction *NewI = WorkList[i]->clone(); + NewI->insertBefore(NewLatch->getFirstNonPHI()); + assert(!NewI->mayHaveSideEffects() && + "Moving instructions with side-effects may change behavior of " + "the loop nest!"); + for (auto UI = WorkList[i]->use_begin(), UE = WorkList[i]->use_end(); + UI != UE;) { + Use &U = *UI++; + Instruction *UserI = cast(U.getUser()); + if (!InnerLoop->contains(UserI->getParent()) || + UserI->getParent() == NewLatch || UserI == InductionPHI) + U.set(NewI); + } + // Add operands of moved instruction to the worklist, except if they are + // outside the inner loop or are the induction PHI. + for (Value *Op : WorkList[i]->operands()) { + Instruction *OpI = dyn_cast(Op); + if (!OpI || + this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop || + OpI == InductionPHI) + continue; + WorkList.insert(OpI); + } + } + }; + + // FIXME: Should we interchange when we have a constant condition? + Instruction *CondI = dyn_cast( + cast(InnerLoop->getLoopLatch()->getTerminator()) + ->getCondition()); + if (CondI) + WorkList.insert(CondI); + MoveInstructions(); + WorkList.insert(cast(InnerIndexVar)); + MoveInstructions(); // Splits the inner loops phi nodes out into a separate basic block. BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); @@ -1263,10 +1305,6 @@ bool LoopInterchangeTransform::transform() { return true; } -void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) { - SplitBlock(InnerLoop->getLoopLatch(), Inc, DT, LI); -} - /// \brief Move all instructions except the terminator from FromBB right before /// InsertBefore static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) { diff --git a/lib/Transforms/Scalar/LoopLoadElimination.cpp b/lib/Transforms/Scalar/LoopLoadElimination.cpp index 2b3d5e0ce9b..e8dc879a184 100644 --- a/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -435,7 +435,8 @@ public: PH->getTerminator()); Value *Initial = new LoadInst( Cand.Load->getType(), InitialPtr, "load_initial", - /* isVolatile */ false, Cand.Load->getAlignment(), PH->getTerminator()); + /* isVolatile */ false, MaybeAlign(Cand.Load->getAlignment()), + PH->getTerminator()); PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded", &L->getHeader()->front()); diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp index 507a1e251ca..885c0e8f4b8 100644 --- a/lib/Transforms/Scalar/LoopPredication.cpp +++ b/lib/Transforms/Scalar/LoopPredication.cpp @@ -543,7 +543,7 @@ bool LoopPredication::isLoopInvariantValue(const SCEV* S) { if (const auto *LI = dyn_cast(U->getValue())) if (LI->isUnordered() && L->hasLoopInvariantOperands(LI)) if (AA->pointsToConstantMemory(LI->getOperand(0)) || - LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr) + LI->hasMetadata(LLVMContext::MD_invariant_load)) return true; return false; } diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp index 166b57f20b4..96e2c2a3ac6 100644 --- a/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -1644,7 +1644,8 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) { AA = &getAnalysis().getAAResults(); LI = &getAnalysis().getLoopInfo(); SE = &getAnalysis().getSE(); - TLI = &getAnalysis().getTLI(); + TLI = &getAnalysis().getTLI( + *L->getHeader()->getParent()); DT = &getAnalysis().getDomTree(); PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index e009947690a..94517996df3 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -55,7 +55,7 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM, AR.MSSA->verifyMemorySSA(); auto PA = getLoopPassPreservedAnalyses(); - if (EnableMSSALoopDependency) + if (AR.MSSA) PA.preserve(); return PA; } @@ -94,17 +94,15 @@ public: auto *LI = &getAnalysis().getLoopInfo(); const auto *TTI = &getAnalysis().getTTI(F); auto *AC = &getAnalysis().getAssumptionCache(F); - auto *DTWP = getAnalysisIfAvailable(); - auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; - auto *SEWP = getAnalysisIfAvailable(); - auto *SE = SEWP ? &SEWP->getSE() : nullptr; + auto &DT = getAnalysis().getDomTree(); + auto &SE = getAnalysis().getSE(); const SimplifyQuery SQ = getBestSimplifyQuery(*this, F); Optional MSSAU; if (EnableMSSALoopDependency) { MemorySSA *MSSA = &getAnalysis().getMSSA(); MSSAU = MemorySSAUpdater(MSSA); } - return LoopRotation(L, LI, TTI, AC, DT, SE, + return LoopRotation(L, LI, TTI, AC, &DT, &SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ, false, MaxHeaderSize, false); } diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index 046f4c8af49..299f3fc5fb1 100644 --- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -690,7 +690,7 @@ PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &LPMU) { Optional MSSAU; - if (EnableMSSALoopDependency && AR.MSSA) + if (AR.MSSA) MSSAU = MemorySSAUpdater(AR.MSSA); bool DeleteCurrentLoop = false; if (!simplifyLoopCFG(L, AR.DT, AR.LI, AR.SE, @@ -702,7 +702,7 @@ PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM, LPMU.markLoopAsDeleted(L, "loop-simplifycfg"); auto PA = getLoopPassPreservedAnalyses(); - if (EnableMSSALoopDependency) + if (AR.MSSA) PA.preserve(); return PA; } diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp index 975452e13f0..65e0dee0225 100644 --- a/lib/Transforms/Scalar/LoopSink.cpp +++ b/lib/Transforms/Scalar/LoopSink.cpp @@ -230,12 +230,9 @@ static bool sinkInstruction(Loop &L, Instruction &I, IC->setName(I.getName()); IC->insertBefore(&*N->getFirstInsertionPt()); // Replaces uses of I with IC in N - for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;) { - Use &U = *UI++; - auto *I = cast(U.getUser()); - if (I->getParent() == N) - U.set(IC); - } + I.replaceUsesWithIf(IC, [N](Use &U) { + return cast(U.getUser())->getParent() == N; + }); // Replaces uses of I with IC in blocks dominated by N replaceDominatedUsesWith(&I, IC, DT, N); LLVM_DEBUG(dbgs() << "Sinking a clone of " << I << " To: " << N->getName() diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 59a387a186b..7f119175c4a 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1386,7 +1386,9 @@ void Cost::RateFormula(const Formula &F, // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as // additional instruction (at least fill). - unsigned TTIRegNum = TTI->getNumberOfRegisters(false) - 1; + // TODO: Need distinguish register class? + unsigned TTIRegNum = TTI->getNumberOfRegisters( + TTI->getRegisterClassForType(false, F.getType())) - 1; if (C.NumRegs > TTIRegNum) { // Cost already exceeded TTIRegNum, then only newly added register can add // new instructions. @@ -3165,6 +3167,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n"); return; } + assert(IVSrc && "Failed to find IV chain source"); LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n"); Type *IVTy = IVSrc->getType(); @@ -3265,12 +3268,12 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { // requirements for both N and i at the same time. Limiting this code to // equality icmps is not a problem because all interesting loops use // equality icmps, thanks to IndVarSimplify. - if (ICmpInst *CI = dyn_cast(UserInst)) + if (ICmpInst *CI = dyn_cast(UserInst)) { + // If CI can be saved in some target, like replaced inside hardware loop + // in PowerPC, no need to generate initial formulae for it. + if (SaveCmp && CI == dyn_cast(ExitBranch->getCondition())) + continue; if (CI->isEquality()) { - // If CI can be saved in some target, like replaced inside hardware loop - // in PowerPC, no need to generate initial formulae for it. - if (SaveCmp && CI == dyn_cast(ExitBranch->getCondition())) - continue; // Swap the operands if needed to put the OperandValToReplace on the // left, for consistency. Value *NV = CI->getOperand(1); @@ -3298,6 +3301,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { Factors.insert(-(uint64_t)Factors[i]); Factors.insert(-1); } + } // Get or create an LSRUse. std::pair P = getUse(S, Kind, AccessTy); @@ -4834,6 +4838,7 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { } } } + assert(Best && "Failed to find best LSRUse candidate"); LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best << " will yield profitable reuse.\n"); @@ -5740,7 +5745,8 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { *L->getHeader()->getParent()); auto &AC = getAnalysis().getAssumptionCache( *L->getHeader()->getParent()); - auto &LibInfo = getAnalysis().getTLI(); + auto &LibInfo = getAnalysis().getTLI( + *L->getHeader()->getParent()); return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, LibInfo); } diff --git a/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp index 86891eb451b..8d88be42031 100644 --- a/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -166,7 +166,7 @@ static bool computeUnrollAndJamCount( bool UseUpperBound = false; bool ExplicitUnroll = computeUnrollCount( L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount, - OuterTripMultiple, OuterLoopSize, UP, UseUpperBound); + /*MaxOrZero*/ false, OuterTripMultiple, OuterLoopSize, UP, UseUpperBound); if (ExplicitUnroll || UseUpperBound) { // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it // for the unroller instead. @@ -293,9 +293,9 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, if (Latch != Exit || SubLoopLatch != SubLoopExit) return LoopUnrollResult::Unmodified; - TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( - L, SE, TTI, nullptr, nullptr, OptLevel, - None, None, None, None, None, None); + TargetTransformInfo::UnrollingPreferences UP = + gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, OptLevel, None, + None, None, None, None, None, None, None); if (AllowUnrollAndJam.getNumOccurrences() > 0) UP.UnrollAndJam = AllowUnrollAndJam; if (UnrollAndJamThreshold.getNumOccurrences() > 0) diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 2fa7436213d..a6d4164c364 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -178,7 +178,9 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional UserThreshold, Optional UserCount, Optional UserAllowPartial, Optional UserRuntime, - Optional UserUpperBound, Optional UserAllowPeeling) { + Optional UserUpperBound, Optional UserAllowPeeling, + Optional UserAllowProfileBasedPeeling, + Optional UserFullUnrollMaxCount) { TargetTransformInfo::UnrollingPreferences UP; // Set up the defaults @@ -202,6 +204,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.UpperBound = false; UP.AllowPeeling = true; UP.UnrollAndJam = false; + UP.PeelProfiledIterations = true; UP.UnrollAndJamInnerLoopThreshold = 60; // Override with any target specific settings @@ -257,6 +260,10 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.UpperBound = *UserUpperBound; if (UserAllowPeeling.hasValue()) UP.AllowPeeling = *UserAllowPeeling; + if (UserAllowProfileBasedPeeling.hasValue()) + UP.PeelProfiledIterations = *UserAllowProfileBasedPeeling; + if (UserFullUnrollMaxCount.hasValue()) + UP.FullUnrollMaxCount = *UserFullUnrollMaxCount; return UP; } @@ -730,7 +737,7 @@ bool llvm::computeUnrollCount( Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, - unsigned &TripMultiple, unsigned LoopSize, + bool MaxOrZero, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) { // Check for explicit Count. @@ -781,18 +788,34 @@ bool llvm::computeUnrollCount( // Also we need to check if we exceed FullUnrollMaxCount. // If using the upper bound to unroll, TripMultiple should be set to 1 because // we do not know when loop may exit. - // MaxTripCount and ExactTripCount cannot both be non zero since we only + + // We can unroll by the upper bound amount if it's generally allowed or if + // we know that the loop is executed either the upper bound or zero times. + // (MaxOrZero unrolling keeps only the first loop test, so the number of + // loop tests remains the same compared to the non-unrolled version, whereas + // the generic upper bound unrolling keeps all but the last loop test so the + // number of loop tests goes up which may end up being worse on targets with + // constrained branch predictor resources so is controlled by an option.) + // In addition we only unroll small upper bounds. + unsigned FullUnrollMaxTripCount = MaxTripCount; + if (!(UP.UpperBound || MaxOrZero) || + FullUnrollMaxTripCount > UnrollMaxUpperBound) + FullUnrollMaxTripCount = 0; + + // UnrollByMaxCount and ExactTripCount cannot both be non zero since we only // compute the former when the latter is zero. unsigned ExactTripCount = TripCount; - assert((ExactTripCount == 0 || MaxTripCount == 0) && - "ExtractTripCount and MaxTripCount cannot both be non zero."); - unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount; + assert((ExactTripCount == 0 || FullUnrollMaxTripCount == 0) && + "ExtractTripCount and UnrollByMaxCount cannot both be non zero."); + + unsigned FullUnrollTripCount = + ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount; UP.Count = FullUnrollTripCount; if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) { // When computing the unrolled size, note that BEInsns are not replicated // like the rest of the loop body. if (getUnrolledLoopSize(LoopSize, UP) < UP.Threshold) { - UseUpperBound = (MaxTripCount == FullUnrollTripCount); + UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); TripCount = FullUnrollTripCount; TripMultiple = UP.UpperBound ? 1 : TripMultiple; return ExplicitUnroll; @@ -806,7 +829,7 @@ bool llvm::computeUnrollCount( unsigned Boost = getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost); if (Cost->UnrolledCost < UP.Threshold * Boost / 100) { - UseUpperBound = (MaxTripCount == FullUnrollTripCount); + UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); TripCount = FullUnrollTripCount; TripMultiple = UP.UpperBound ? 1 : TripMultiple; return ExplicitUnroll; @@ -882,6 +905,8 @@ bool llvm::computeUnrollCount( "because " "unrolled size is too large."; }); + LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count + << "\n"); return ExplicitUnroll; } assert(TripCount == 0 && @@ -903,6 +928,12 @@ bool llvm::computeUnrollCount( return false; } + // Don't unroll a small upper bound loop unless user or TTI asked to do so. + if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) { + UP.Count = 0; + return false; + } + // Check if the runtime trip count is too small when profile is available. if (L->getHeader()->getParent()->hasProfileData()) { if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) { @@ -966,7 +997,11 @@ bool llvm::computeUnrollCount( if (UP.Count > UP.MaxCount) UP.Count = UP.MaxCount; - LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count + + if (MaxTripCount && UP.Count > MaxTripCount) + UP.Count = MaxTripCount; + + LLVM_DEBUG(dbgs() << " runtime unrolling with count: " << UP.Count << "\n"); if (UP.Count < 2) UP.Count = 0; @@ -976,13 +1011,14 @@ bool llvm::computeUnrollCount( static LoopUnrollResult tryToUnrollLoop( Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, - OptimizationRemarkEmitter &ORE, - BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, - bool PreserveLCSSA, int OptLevel, + OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, bool OnlyWhenForced, bool ForgetAllSCEV, Optional ProvidedCount, Optional ProvidedThreshold, Optional ProvidedAllowPartial, Optional ProvidedRuntime, Optional ProvidedUpperBound, - Optional ProvidedAllowPeeling) { + Optional ProvidedAllowPeeling, + Optional ProvidedAllowProfileBasedPeeling, + Optional ProvidedFullUnrollMaxCount) { LLVM_DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"); @@ -1007,7 +1043,8 @@ static LoopUnrollResult tryToUnrollLoop( TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, - ProvidedAllowPeeling); + ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, + ProvidedFullUnrollMaxCount); // Exit early if unrolling is disabled. For OptForSize, we pick the loop size // as threshold later on. @@ -1028,10 +1065,10 @@ static LoopUnrollResult tryToUnrollLoop( return LoopUnrollResult::Unmodified; } - // When optimizing for size, use LoopSize as threshold, to (fully) unroll - // loops, if it does not increase code size. + // When optimizing for size, use LoopSize + 1 as threshold (we use < Threshold + // later), to (fully) unroll loops, if it does not increase code size. if (OptForSize) - UP.Threshold = std::max(UP.Threshold, LoopSize); + UP.Threshold = std::max(UP.Threshold, LoopSize + 1); if (NumInlineCandidates != 0) { LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); @@ -1040,7 +1077,6 @@ static LoopUnrollResult tryToUnrollLoop( // Find trip count and trip multiple if count is not available unsigned TripCount = 0; - unsigned MaxTripCount = 0; unsigned TripMultiple = 1; // If there are multiple exiting blocks but one of them is the latch, use the // latch for the trip count estimation. Otherwise insist on a single exiting @@ -1070,28 +1106,18 @@ static LoopUnrollResult tryToUnrollLoop( // Try to find the trip count upper bound if we cannot find the exact trip // count. + unsigned MaxTripCount = 0; bool MaxOrZero = false; if (!TripCount) { MaxTripCount = SE.getSmallConstantMaxTripCount(L); MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L); - // We can unroll by the upper bound amount if it's generally allowed or if - // we know that the loop is executed either the upper bound or zero times. - // (MaxOrZero unrolling keeps only the first loop test, so the number of - // loop tests remains the same compared to the non-unrolled version, whereas - // the generic upper bound unrolling keeps all but the last loop test so the - // number of loop tests goes up which may end up being worse on targets with - // constrained branch predictor resources so is controlled by an option.) - // In addition we only unroll small upper bounds. - if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) { - MaxTripCount = 0; - } } // computeUnrollCount() decides whether it is beneficial to use upper bound to // fully unroll the loop. bool UseUpperBound = false; bool IsCountSetExplicitly = computeUnrollCount( - L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, + L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, UseUpperBound); if (!UP.Count) return LoopUnrollResult::Unmodified; @@ -1139,7 +1165,7 @@ static LoopUnrollResult tryToUnrollLoop( // If the loop was peeled, we already "used up" the profile information // we had, so we don't want to unroll or peel again. if (UnrollResult != LoopUnrollResult::FullyUnrolled && - (IsCountSetExplicitly || UP.PeelCount)) + (IsCountSetExplicitly || (UP.PeelProfiledIterations && UP.PeelCount))) L->setLoopAlreadyUnrolled(); return UnrollResult; @@ -1169,18 +1195,24 @@ public: Optional ProvidedRuntime; Optional ProvidedUpperBound; Optional ProvidedAllowPeeling; + Optional ProvidedAllowProfileBasedPeeling; + Optional ProvidedFullUnrollMaxCount; LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false, bool ForgetAllSCEV = false, Optional Threshold = None, Optional Count = None, Optional AllowPartial = None, Optional Runtime = None, Optional UpperBound = None, - Optional AllowPeeling = None) + Optional AllowPeeling = None, + Optional AllowProfileBasedPeeling = None, + Optional ProvidedFullUnrollMaxCount = None) : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced), ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)), ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound), - ProvidedAllowPeeling(AllowPeeling) { + ProvidedAllowPeeling(AllowPeeling), + ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling), + ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) { initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); } @@ -1203,10 +1235,11 @@ public: bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); LoopUnrollResult Result = tryToUnrollLoop( - L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, - PreserveLCSSA, OptLevel, OnlyWhenForced, - ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, - ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling); + L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel, + OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold, + ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, + ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, + ProvidedFullUnrollMaxCount); if (Result == LoopUnrollResult::FullyUnrolled) LPM.markLoopAsDeleted(*L); @@ -1283,14 +1316,16 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, std::string LoopName = L.getName(); - bool Changed = - tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE, - /*BFI*/ nullptr, /*PSI*/ nullptr, - /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced, - ForgetSCEV, /*Count*/ None, - /*Threshold*/ None, /*AllowPartial*/ false, - /*Runtime*/ false, /*UpperBound*/ false, - /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified; + bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE, + /*BFI*/ nullptr, /*PSI*/ nullptr, + /*PreserveLCSSA*/ true, OptLevel, + OnlyWhenForced, ForgetSCEV, /*Count*/ None, + /*Threshold*/ None, /*AllowPartial*/ false, + /*Runtime*/ false, /*UpperBound*/ false, + /*AllowPeeling*/ false, + /*AllowProfileBasedPeeling*/ false, + /*FullUnrollMaxCount*/ None) != + LoopUnrollResult::Unmodified; if (!Changed) return PreservedAnalyses::all(); @@ -1430,7 +1465,8 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced, UnrollOpts.ForgetSCEV, /*Count*/ None, /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime, - UnrollOpts.AllowUpperBound, LocalAllowPeeling); + UnrollOpts.AllowUpperBound, LocalAllowPeeling, + UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount); Changed |= Result != LoopUnrollResult::Unmodified; // The parent must not be damaged by unrolling! diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index b5b8e720069..b410df0c5f6 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -420,7 +420,8 @@ enum OperatorChain { /// cost of creating an entirely new loop. static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, OperatorChain &ParentChain, - DenseMap &Cache) { + DenseMap &Cache, + MemorySSAUpdater *MSSAU) { auto CacheIt = Cache.find(Cond); if (CacheIt != Cache.end()) return CacheIt->second; @@ -438,7 +439,7 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, // TODO: Handle: br (VARIANT|INVARIANT). // Hoist simple values out. - if (L->makeLoopInvariant(Cond, Changed)) { + if (L->makeLoopInvariant(Cond, Changed, nullptr, MSSAU)) { Cache[Cond] = Cond; return Cond; } @@ -478,7 +479,7 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, // which will cause the branch to go away in one loop and the condition to // simplify in the other one. if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed, - ParentChain, Cache)) { + ParentChain, Cache, MSSAU)) { Cache[Cond] = LHS; return LHS; } @@ -486,7 +487,7 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, // operand(1). ParentChain = NewChain; if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed, - ParentChain, Cache)) { + ParentChain, Cache, MSSAU)) { Cache[Cond] = RHS; return RHS; } @@ -500,12 +501,12 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, /// Cond is a condition that occurs in L. If it is invariant in the loop, or has /// an invariant piece, return the invariant along with the operator chain type. /// Otherwise, return null. -static std::pair FindLIVLoopCondition(Value *Cond, - Loop *L, - bool &Changed) { +static std::pair +FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, + MemorySSAUpdater *MSSAU) { DenseMap Cache; OperatorChain OpChain = OC_OpChainNone; - Value *FCond = FindLIVLoopCondition(Cond, L, Changed, OpChain, Cache); + Value *FCond = FindLIVLoopCondition(Cond, L, Changed, OpChain, Cache, MSSAU); // In case we do find a LIV, it can not be obtained by walking up a mixed // operator chain. @@ -525,7 +526,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { DT = &getAnalysis().getDomTree(); if (EnableMSSALoopDependency) { MSSA = &getAnalysis().getMSSA(); - MSSAU = make_unique(MSSA); + MSSAU = std::make_unique(MSSA); assert(DT && "Cannot update MemorySSA without a valid DomTree."); } currentLoop = L; @@ -694,8 +695,9 @@ bool LoopUnswitch::processCurrentLoop() { } for (IntrinsicInst *Guard : Guards) { - Value *LoopCond = - FindLIVLoopCondition(Guard->getOperand(0), currentLoop, Changed).first; + Value *LoopCond = FindLIVLoopCondition(Guard->getOperand(0), currentLoop, + Changed, MSSAU.get()) + .first; if (LoopCond && UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) { // NB! Unswitching (if successful) could have erased some of the @@ -735,8 +737,9 @@ bool LoopUnswitch::processCurrentLoop() { if (BI->isConditional()) { // See if this, or some part of it, is loop invariant. If so, we can // unswitch on it if we desire. - Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), - currentLoop, Changed).first; + Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), currentLoop, + Changed, MSSAU.get()) + .first; if (LoopCond && !EqualityPropUnSafe(*LoopCond) && UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) { ++NumBranches; @@ -748,7 +751,7 @@ bool LoopUnswitch::processCurrentLoop() { Value *LoopCond; OperatorChain OpChain; std::tie(LoopCond, OpChain) = - FindLIVLoopCondition(SC, currentLoop, Changed); + FindLIVLoopCondition(SC, currentLoop, Changed, MSSAU.get()); unsigned NumCases = SI->getNumCases(); if (LoopCond && NumCases) { @@ -808,8 +811,9 @@ bool LoopUnswitch::processCurrentLoop() { for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); BBI != E; ++BBI) if (SelectInst *SI = dyn_cast(BBI)) { - Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), - currentLoop, Changed).first; + Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), currentLoop, + Changed, MSSAU.get()) + .first; if (LoopCond && UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) { ++NumSelects; @@ -1123,8 +1127,9 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) { if (!BI->isConditional()) return false; - Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), - currentLoop, Changed).first; + Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), currentLoop, + Changed, MSSAU.get()) + .first; // Unswitch only if the trivial condition itself is an LIV (not // partial LIV which could occur in and/or) @@ -1157,8 +1162,9 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) { return true; } else if (SwitchInst *SI = dyn_cast(CurrentTerm)) { // If this isn't switching on an invariant condition, we can't unswitch it. - Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), - currentLoop, Changed).first; + Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), currentLoop, + Changed, MSSAU.get()) + .first; // Unswitch only if the trivial condition itself is an LIV (not // partial LIV which could occur in and/or) @@ -1240,6 +1246,9 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, LoopBlocks.clear(); NewBlocks.clear(); + if (MSSAU && VerifyMemorySSA) + MSSA->verifyMemorySSA(); + // First step, split the preheader and exit blocks, and add these blocks to // the LoopBlocks list. BasicBlock *NewPreheader = @@ -1607,36 +1616,30 @@ void LoopUnswitch::SimplifyCode(std::vector &Worklist, Loop *L) { // If BI's parent is the only pred of the successor, fold the two blocks // together. BasicBlock *Pred = BI->getParent(); + (void)Pred; BasicBlock *Succ = BI->getSuccessor(0); BasicBlock *SinglePred = Succ->getSinglePredecessor(); if (!SinglePred) continue; // Nothing to do. assert(SinglePred == Pred && "CFG broken"); - LLVM_DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- " - << Succ->getName() << "\n"); - - // Resolve any single entry PHI nodes in Succ. - while (PHINode *PN = dyn_cast(Succ->begin())) - ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM, - MSSAU.get()); - - // If Succ has any successors with PHI nodes, update them to have - // entries coming from Pred instead of Succ. - Succ->replaceAllUsesWith(Pred); - - // Move all of the successor contents from Succ to Pred. - Pred->getInstList().splice(BI->getIterator(), Succ->getInstList(), - Succ->begin(), Succ->end()); - if (MSSAU) - MSSAU->moveAllAfterMergeBlocks(Succ, Pred, BI); + // Make the LPM and Worklist updates specific to LoopUnswitch. LPM->deleteSimpleAnalysisValue(BI, L); RemoveFromWorklist(BI, Worklist); - BI->eraseFromParent(); - - // Remove Succ from the loop tree. - LI->removeBlock(Succ); LPM->deleteSimpleAnalysisValue(Succ, L); - Succ->eraseFromParent(); + auto SuccIt = Succ->begin(); + while (PHINode *PN = dyn_cast(SuccIt++)) { + for (unsigned It = 0, E = PN->getNumOperands(); It != E; ++It) + if (Instruction *Use = dyn_cast(PN->getOperand(It))) + Worklist.push_back(Use); + for (User *U : PN->users()) + Worklist.push_back(cast(U)); + LPM->deleteSimpleAnalysisValue(PN, L); + RemoveFromWorklist(PN, Worklist); + ++NumSimplify; + } + // Merge the block and make the remaining analyses updates. + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + MergeBlockIntoPredecessor(Succ, &DTU, LI, MSSAU.get()); ++NumSimplify; continue; } diff --git a/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/lib/Transforms/Scalar/LoopVersioningLICM.cpp index 896dd8bcb92..2ccb7cae307 100644 --- a/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -112,37 +112,6 @@ static cl::opt LVLoopDepthThreshold( "LoopVersioningLICM's threshold for maximum allowed loop nest/depth"), cl::init(2), cl::Hidden); -/// Create MDNode for input string. -static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) { - LLVMContext &Context = TheLoop->getHeader()->getContext(); - Metadata *MDs[] = { - MDString::get(Context, Name), - ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))}; - return MDNode::get(Context, MDs); -} - -/// Set input string into loop metadata by keeping other values intact. -void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *MDString, - unsigned V) { - SmallVector MDs(1); - // If the loop already has metadata, retain it. - MDNode *LoopID = TheLoop->getLoopID(); - if (LoopID) { - for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { - MDNode *Node = cast(LoopID->getOperand(i)); - MDs.push_back(Node); - } - } - // Add new metadata. - MDs.push_back(createStringMetadata(TheLoop, MDString, V)); - // Replace current metadata node with new one. - LLVMContext &Context = TheLoop->getHeader()->getContext(); - MDNode *NewLoopID = MDNode::get(Context, MDs); - // Set operand 0 to refer to the loop id itself. - NewLoopID->replaceOperandWith(0, NewLoopID); - TheLoop->setLoopID(NewLoopID); -} - namespace { struct LoopVersioningLICM : public LoopPass { diff --git a/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp new file mode 100644 index 00000000000..d0fcf38b5a7 --- /dev/null +++ b/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp @@ -0,0 +1,170 @@ +//===- LowerConstantIntrinsics.cpp - Lower constant intrinsic calls -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass lowers all remaining 'objectsize' 'is.constant' intrinsic calls +// and provides constant propagation and basic CFG cleanup on the result. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" + +using namespace llvm; +using namespace llvm::PatternMatch; + +#define DEBUG_TYPE "lower-is-constant-intrinsic" + +STATISTIC(IsConstantIntrinsicsHandled, + "Number of 'is.constant' intrinsic calls handled"); +STATISTIC(ObjectSizeIntrinsicsHandled, + "Number of 'objectsize' intrinsic calls handled"); + +static Value *lowerIsConstantIntrinsic(IntrinsicInst *II) { + Value *Op = II->getOperand(0); + + return isa(Op) ? ConstantInt::getTrue(II->getType()) + : ConstantInt::getFalse(II->getType()); +} + +static bool replaceConditionalBranchesOnConstant(Instruction *II, + Value *NewValue) { + bool HasDeadBlocks = false; + SmallSetVector Worklist; + replaceAndRecursivelySimplify(II, NewValue, nullptr, nullptr, nullptr, + &Worklist); + for (auto I : Worklist) { + BranchInst *BI = dyn_cast(I); + if (!BI) + continue; + if (BI->isUnconditional()) + continue; + + BasicBlock *Target, *Other; + if (match(BI->getOperand(0), m_Zero())) { + Target = BI->getSuccessor(1); + Other = BI->getSuccessor(0); + } else if (match(BI->getOperand(0), m_One())) { + Target = BI->getSuccessor(0); + Other = BI->getSuccessor(1); + } else { + Target = nullptr; + Other = nullptr; + } + if (Target && Target != Other) { + BasicBlock *Source = BI->getParent(); + Other->removePredecessor(Source); + BI->eraseFromParent(); + BranchInst::Create(Target, Source); + if (pred_begin(Other) == pred_end(Other)) + HasDeadBlocks = true; + } + } + return HasDeadBlocks; +} + +static bool lowerConstantIntrinsics(Function &F, const TargetLibraryInfo *TLI) { + bool HasDeadBlocks = false; + const auto &DL = F.getParent()->getDataLayout(); + SmallVector Worklist; + + ReversePostOrderTraversal RPOT(&F); + for (BasicBlock *BB : RPOT) { + for (Instruction &I: *BB) { + IntrinsicInst *II = dyn_cast(&I); + if (!II) + continue; + switch (II->getIntrinsicID()) { + default: + break; + case Intrinsic::is_constant: + case Intrinsic::objectsize: + Worklist.push_back(WeakTrackingVH(&I)); + break; + } + } + } + for (WeakTrackingVH &VH: Worklist) { + // Items on the worklist can be mutated by earlier recursive replaces. + // This can remove the intrinsic as dead (VH == null), but also replace + // the intrinsic in place. + if (!VH) + continue; + IntrinsicInst *II = dyn_cast(&*VH); + if (!II) + continue; + Value *NewValue; + switch (II->getIntrinsicID()) { + default: + continue; + case Intrinsic::is_constant: + NewValue = lowerIsConstantIntrinsic(II); + IsConstantIntrinsicsHandled++; + break; + case Intrinsic::objectsize: + NewValue = lowerObjectSizeCall(II, DL, TLI, true); + ObjectSizeIntrinsicsHandled++; + break; + } + HasDeadBlocks |= replaceConditionalBranchesOnConstant(II, NewValue); + } + if (HasDeadBlocks) + removeUnreachableBlocks(F); + return !Worklist.empty(); +} + +PreservedAnalyses +LowerConstantIntrinsicsPass::run(Function &F, FunctionAnalysisManager &AM) { + if (lowerConstantIntrinsics(F, AM.getCachedResult(F))) + return PreservedAnalyses::none(); + + return PreservedAnalyses::all(); +} + +namespace { +/// Legacy pass for lowering is.constant intrinsics out of the IR. +/// +/// When this pass is run over a function it converts is.constant intrinsics +/// into 'true' or 'false'. This is completements the normal constand folding +/// to 'true' as part of Instruction Simplify passes. +class LowerConstantIntrinsics : public FunctionPass { +public: + static char ID; + LowerConstantIntrinsics() : FunctionPass(ID) { + initializeLowerConstantIntrinsicsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto *TLIP = getAnalysisIfAvailable(); + const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI(F) : nullptr; + return lowerConstantIntrinsics(F, TLI); + } +}; +} // namespace + +char LowerConstantIntrinsics::ID = 0; +INITIALIZE_PASS(LowerConstantIntrinsics, "lower-constant-intrinsics", + "Lower constant intrinsics", false, false) + +FunctionPass *llvm::createLowerConstantIntrinsicsPass() { + return new LowerConstantIntrinsics(); +} diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp index 0d67c0d740e..d85f20b3f80 100644 --- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp +++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/MisExpect.h" using namespace llvm; @@ -71,15 +72,20 @@ static bool handleSwitchExpect(SwitchInst &SI) { unsigned n = SI.getNumCases(); // +1 for default case. SmallVector Weights(n + 1, UnlikelyBranchWeight); - if (Case == *SI.case_default()) - Weights[0] = LikelyBranchWeight; - else - Weights[Case.getCaseIndex() + 1] = LikelyBranchWeight; + uint64_t Index = (Case == *SI.case_default()) ? 0 : Case.getCaseIndex() + 1; + Weights[Index] = LikelyBranchWeight; + + SI.setMetadata( + LLVMContext::MD_misexpect, + MDBuilder(CI->getContext()) + .createMisExpect(Index, LikelyBranchWeight, UnlikelyBranchWeight)); + + SI.setCondition(ArgValue); + misexpect::checkFrontendInstrumentation(SI); SI.setMetadata(LLVMContext::MD_prof, MDBuilder(CI->getContext()).createBranchWeights(Weights)); - SI.setCondition(ArgValue); return true; } @@ -155,7 +161,7 @@ static void handlePhiDef(CallInst *Expect) { return Result; }; - auto *PhiDef = dyn_cast(V); + auto *PhiDef = cast(V); // Get the first dominating conditional branch of the operand // i's incoming block. @@ -280,19 +286,28 @@ template static bool handleBrSelExpect(BrSelInst &BSI) { MDBuilder MDB(CI->getContext()); MDNode *Node; + MDNode *ExpNode; if ((ExpectedValue->getZExtValue() == ValueComparedTo) == - (Predicate == CmpInst::ICMP_EQ)) + (Predicate == CmpInst::ICMP_EQ)) { Node = MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight); - else + ExpNode = MDB.createMisExpect(0, LikelyBranchWeight, UnlikelyBranchWeight); + } else { Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight); + ExpNode = MDB.createMisExpect(1, LikelyBranchWeight, UnlikelyBranchWeight); + } - BSI.setMetadata(LLVMContext::MD_prof, Node); + BSI.setMetadata(LLVMContext::MD_misexpect, ExpNode); if (CmpI) CmpI->setOperand(0, ArgValue); else BSI.setCondition(ArgValue); + + misexpect::checkFrontendInstrumentation(BSI); + + BSI.setMetadata(LLVMContext::MD_prof, Node); + return true; } diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 5a055139be4..2364748efb0 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -69,90 +69,6 @@ STATISTIC(NumMemSetInfer, "Number of memsets inferred"); STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); -static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, - bool &VariableIdxFound, - const DataLayout &DL) { - // Skip over the first indices. - gep_type_iterator GTI = gep_type_begin(GEP); - for (unsigned i = 1; i != Idx; ++i, ++GTI) - /*skip along*/; - - // Compute the offset implied by the rest of the indices. - int64_t Offset = 0; - for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { - ConstantInt *OpC = dyn_cast(GEP->getOperand(i)); - if (!OpC) - return VariableIdxFound = true; - if (OpC->isZero()) continue; // No offset. - - // Handle struct indices, which add their field offset to the pointer. - if (StructType *STy = GTI.getStructTypeOrNull()) { - Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); - continue; - } - - // Otherwise, we have a sequential type like an array or vector. Multiply - // the index by the ElementSize. - uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()); - Offset += Size*OpC->getSExtValue(); - } - - return Offset; -} - -/// Return true if Ptr1 is provably equal to Ptr2 plus a constant offset, and -/// return that constant offset. For example, Ptr1 might be &A[42], and Ptr2 -/// might be &A[40]. In this case offset would be -8. -static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, - const DataLayout &DL) { - Ptr1 = Ptr1->stripPointerCasts(); - Ptr2 = Ptr2->stripPointerCasts(); - - // Handle the trivial case first. - if (Ptr1 == Ptr2) { - Offset = 0; - return true; - } - - GEPOperator *GEP1 = dyn_cast(Ptr1); - GEPOperator *GEP2 = dyn_cast(Ptr2); - - bool VariableIdxFound = false; - - // If one pointer is a GEP and the other isn't, then see if the GEP is a - // constant offset from the base, as in "P" and "gep P, 1". - if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { - Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, DL); - return !VariableIdxFound; - } - - if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) { - Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, DL); - return !VariableIdxFound; - } - - // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical - // base. After that base, they may have some number of common (and - // potentially variable) indices. After that they handle some constant - // offset, which determines their offset from each other. At this point, we - // handle no other case. - if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) - return false; - - // Skip any common indices and track the GEP types. - unsigned Idx = 1; - for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx) - if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) - break; - - int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, DL); - int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, DL); - if (VariableIdxFound) return false; - - Offset = Offset2-Offset1; - return true; -} - namespace { /// Represents a range of memset'd bytes with the ByteVal value. @@ -419,12 +335,12 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, break; // Check to see if this store is to a constant offset from the start ptr. - int64_t Offset; - if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, - DL)) + Optional Offset = + isPointerOffset(StartPtr, NextStore->getPointerOperand(), DL); + if (!Offset) break; - Ranges.addStore(Offset, NextStore); + Ranges.addStore(*Offset, NextStore); } else { MemSetInst *MSI = cast(BI); @@ -433,11 +349,11 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, break; // Check to see if this store is to a constant offset from the start ptr. - int64_t Offset; - if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, DL)) + Optional Offset = isPointerOffset(StartPtr, MSI->getDest(), DL); + if (!Offset) break; - Ranges.addMemSet(Offset, MSI); + Ranges.addMemSet(*Offset, MSI); } } @@ -597,9 +513,13 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P, ToLift.push_back(C); for (unsigned k = 0, e = C->getNumOperands(); k != e; ++k) - if (auto *A = dyn_cast(C->getOperand(k))) - if (A->getParent() == SI->getParent()) + if (auto *A = dyn_cast(C->getOperand(k))) { + if (A->getParent() == SI->getParent()) { + // Cannot hoist user of P above P + if(A == P) return false; Args.insert(A); + } + } } // We made it, we need to lift @@ -979,7 +899,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, // If the destination wasn't sufficiently aligned then increase its alignment. if (!isDestSufficientlyAligned) { assert(isa(cpyDest) && "Can only increase alloca alignment!"); - cast(cpyDest)->setAlignment(srcAlign); + cast(cpyDest)->setAlignment(MaybeAlign(srcAlign)); } // Drop any cached information about the call, because we may have changed @@ -1516,7 +1436,7 @@ bool MemCpyOptLegacyPass::runOnFunction(Function &F) { return false; auto *MD = &getAnalysis().getMemDep(); - auto *TLI = &getAnalysis().getTLI(); + auto *TLI = &getAnalysis().getTLI(F); auto LookupAliasAnalysis = [this]() -> AliasAnalysis & { return getAnalysis().getAAResults(); diff --git a/lib/Transforms/Scalar/MergeICmps.cpp b/lib/Transforms/Scalar/MergeICmps.cpp index 3d047a19326..98a45b39131 100644 --- a/lib/Transforms/Scalar/MergeICmps.cpp +++ b/lib/Transforms/Scalar/MergeICmps.cpp @@ -897,7 +897,7 @@ public: bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; - const auto &TLI = getAnalysis().getTLI(); + const auto &TLI = getAnalysis().getTLI(F); const auto &TTI = getAnalysis().getTTI(F); // MergeICmps does not need the DominatorTree, but we update it if it's // already available. diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp index 30645f4400e..9799ea7960e 100644 --- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -14,9 +14,11 @@ // diamond (hammock) and merges them into a single load in the header. Similar // it sinks and merges two stores to the tail block (footer). The algorithm // iterates over the instructions of one side of the diamond and attempts to -// find a matching load/store on the other side. It hoists / sinks when it -// thinks it safe to do so. This optimization helps with eg. hiding load -// latencies, triggering if-conversion, and reducing static code size. +// find a matching load/store on the other side. New tail/footer block may be +// insterted if the tail/footer block has more predecessors (not only the two +// predecessors that are forming the diamond). It hoists / sinks when it thinks +// it safe to do so. This optimization helps with eg. hiding load latencies, +// triggering if-conversion, and reducing static code size. // // NOTE: This code no longer performs load hoisting, it is subsumed by GVNHoist. // @@ -103,7 +105,9 @@ class MergedLoadStoreMotion { // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl. const int MagicCompileTimeControl = 250; + const bool SplitFooterBB; public: + MergedLoadStoreMotion(bool SplitFooterBB) : SplitFooterBB(SplitFooterBB) {} bool run(Function &F, AliasAnalysis &AA); private: @@ -114,7 +118,9 @@ private: PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1); bool isStoreSinkBarrierInRange(const Instruction &Start, const Instruction &End, MemoryLocation Loc); - bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst); + bool canSinkStoresAndGEPs(StoreInst *S0, StoreInst *S1) const; + void sinkStoresAndGEPs(BasicBlock *BB, StoreInst *SinkCand, + StoreInst *ElseInst); bool mergeStores(BasicBlock *BB); }; } // end anonymous namespace @@ -216,75 +222,83 @@ PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0, return NewPN; } +/// +/// Check if 2 stores can be sunk together with corresponding GEPs +/// +bool MergedLoadStoreMotion::canSinkStoresAndGEPs(StoreInst *S0, + StoreInst *S1) const { + auto *A0 = dyn_cast(S0->getPointerOperand()); + auto *A1 = dyn_cast(S1->getPointerOperand()); + return A0 && A1 && A0->isIdenticalTo(A1) && A0->hasOneUse() && + (A0->getParent() == S0->getParent()) && A1->hasOneUse() && + (A1->getParent() == S1->getParent()) && isa(A0); +} + /// /// Merge two stores to same address and sink into \p BB /// /// Also sinks GEP instruction computing the store address /// -bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0, - StoreInst *S1) { +void MergedLoadStoreMotion::sinkStoresAndGEPs(BasicBlock *BB, StoreInst *S0, + StoreInst *S1) { // Only one definition? auto *A0 = dyn_cast(S0->getPointerOperand()); auto *A1 = dyn_cast(S1->getPointerOperand()); - if (A0 && A1 && A0->isIdenticalTo(A1) && A0->hasOneUse() && - (A0->getParent() == S0->getParent()) && A1->hasOneUse() && - (A1->getParent() == S1->getParent()) && isa(A0)) { - LLVM_DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump(); - dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n"; - dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n"); - // Hoist the instruction. - BasicBlock::iterator InsertPt = BB->getFirstInsertionPt(); - // Intersect optional metadata. - S0->andIRFlags(S1); - S0->dropUnknownNonDebugMetadata(); + LLVM_DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump(); + dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n"; + dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n"); + // Hoist the instruction. + BasicBlock::iterator InsertPt = BB->getFirstInsertionPt(); + // Intersect optional metadata. + S0->andIRFlags(S1); + S0->dropUnknownNonDebugMetadata(); - // Create the new store to be inserted at the join point. - StoreInst *SNew = cast(S0->clone()); - Instruction *ANew = A0->clone(); - SNew->insertBefore(&*InsertPt); - ANew->insertBefore(SNew); + // Create the new store to be inserted at the join point. + StoreInst *SNew = cast(S0->clone()); + Instruction *ANew = A0->clone(); + SNew->insertBefore(&*InsertPt); + ANew->insertBefore(SNew); - assert(S0->getParent() == A0->getParent()); - assert(S1->getParent() == A1->getParent()); + assert(S0->getParent() == A0->getParent()); + assert(S1->getParent() == A1->getParent()); - // New PHI operand? Use it. - if (PHINode *NewPN = getPHIOperand(BB, S0, S1)) - SNew->setOperand(0, NewPN); - S0->eraseFromParent(); - S1->eraseFromParent(); - A0->replaceAllUsesWith(ANew); - A0->eraseFromParent(); - A1->replaceAllUsesWith(ANew); - A1->eraseFromParent(); - return true; - } - return false; + // New PHI operand? Use it. + if (PHINode *NewPN = getPHIOperand(BB, S0, S1)) + SNew->setOperand(0, NewPN); + S0->eraseFromParent(); + S1->eraseFromParent(); + A0->replaceAllUsesWith(ANew); + A0->eraseFromParent(); + A1->replaceAllUsesWith(ANew); + A1->eraseFromParent(); } /// /// True when two stores are equivalent and can sink into the footer /// -/// Starting from a diamond tail block, iterate over the instructions in one -/// predecessor block and try to match a store in the second predecessor. +/// Starting from a diamond head block, iterate over the instructions in one +/// successor block and try to match a store in the second successor. /// -bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) { +bool MergedLoadStoreMotion::mergeStores(BasicBlock *HeadBB) { bool MergedStores = false; - assert(T && "Footer of a diamond cannot be empty"); + BasicBlock *TailBB = getDiamondTail(HeadBB); + BasicBlock *SinkBB = TailBB; + assert(SinkBB && "Footer of a diamond cannot be empty"); - pred_iterator PI = pred_begin(T), E = pred_end(T); - assert(PI != E); - BasicBlock *Pred0 = *PI; - ++PI; - BasicBlock *Pred1 = *PI; - ++PI; + succ_iterator SI = succ_begin(HeadBB); + assert(SI != succ_end(HeadBB) && "Diamond head cannot have zero successors"); + BasicBlock *Pred0 = *SI; + ++SI; + assert(SI != succ_end(HeadBB) && "Diamond head cannot have single successor"); + BasicBlock *Pred1 = *SI; // tail block of a diamond/hammock? if (Pred0 == Pred1) return false; // No. - if (PI != E) - return false; // No. More than 2 predecessors. - - // #Instructions in Succ1 for Compile Time Control + // bail out early if we can not merge into the footer BB + if (!SplitFooterBB && TailBB->hasNPredecessorsOrMore(3)) + return false; + // #Instructions in Pred1 for Compile Time Control auto InstsNoDbg = Pred1->instructionsWithoutDebug(); int Size1 = std::distance(InstsNoDbg.begin(), InstsNoDbg.end()); int NStores = 0; @@ -304,14 +318,23 @@ bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) { if (NStores * Size1 >= MagicCompileTimeControl) break; if (StoreInst *S1 = canSinkFromBlock(Pred1, S0)) { - bool Res = sinkStore(T, S0, S1); - MergedStores |= Res; - // Don't attempt to sink below stores that had to stick around - // But after removal of a store and some of its feeding - // instruction search again from the beginning since the iterator - // is likely stale at this point. - if (!Res) + if (!canSinkStoresAndGEPs(S0, S1)) + // Don't attempt to sink below stores that had to stick around + // But after removal of a store and some of its feeding + // instruction search again from the beginning since the iterator + // is likely stale at this point. break; + + if (SinkBB == TailBB && TailBB->hasNPredecessorsOrMore(3)) { + // We have more than 2 predecessors. Insert a new block + // postdominating 2 predecessors we're going to sink from. + SinkBB = SplitBlockPredecessors(TailBB, {Pred0, Pred1}, ".sink.split"); + if (!SinkBB) + break; + } + + MergedStores = true; + sinkStoresAndGEPs(SinkBB, S0, S1); RBI = Pred0->rbegin(); RBE = Pred0->rend(); LLVM_DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump()); @@ -328,13 +351,15 @@ bool MergedLoadStoreMotion::run(Function &F, AliasAnalysis &AA) { // Merge unconditional branches, allowing PRE to catch more // optimization opportunities. + // This loop doesn't care about newly inserted/split blocks + // since they never will be diamond heads. for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { BasicBlock *BB = &*FI++; // Hoist equivalent loads and sink stores // outside diamonds when possible if (isDiamondHead(BB)) { - Changed |= mergeStores(getDiamondTail(BB)); + Changed |= mergeStores(BB); } } return Changed; @@ -342,9 +367,11 @@ bool MergedLoadStoreMotion::run(Function &F, AliasAnalysis &AA) { namespace { class MergedLoadStoreMotionLegacyPass : public FunctionPass { + const bool SplitFooterBB; public: static char ID; // Pass identification, replacement for typeid - MergedLoadStoreMotionLegacyPass() : FunctionPass(ID) { + MergedLoadStoreMotionLegacyPass(bool SplitFooterBB = false) + : FunctionPass(ID), SplitFooterBB(SplitFooterBB) { initializeMergedLoadStoreMotionLegacyPassPass( *PassRegistry::getPassRegistry()); } @@ -355,13 +382,14 @@ public: bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; - MergedLoadStoreMotion Impl; + MergedLoadStoreMotion Impl(SplitFooterBB); return Impl.run(F, getAnalysis().getAAResults()); } private: void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); + if (!SplitFooterBB) + AU.setPreservesCFG(); AU.addRequired(); AU.addPreserved(); } @@ -373,8 +401,8 @@ char MergedLoadStoreMotionLegacyPass::ID = 0; /// /// createMergedLoadStoreMotionPass - The public interface to this file. /// -FunctionPass *llvm::createMergedLoadStoreMotionPass() { - return new MergedLoadStoreMotionLegacyPass(); +FunctionPass *llvm::createMergedLoadStoreMotionPass(bool SplitFooterBB) { + return new MergedLoadStoreMotionLegacyPass(SplitFooterBB); } INITIALIZE_PASS_BEGIN(MergedLoadStoreMotionLegacyPass, "mldst-motion", @@ -385,13 +413,14 @@ INITIALIZE_PASS_END(MergedLoadStoreMotionLegacyPass, "mldst-motion", PreservedAnalyses MergedLoadStoreMotionPass::run(Function &F, FunctionAnalysisManager &AM) { - MergedLoadStoreMotion Impl; + MergedLoadStoreMotion Impl(Options.SplitFooterBB); auto &AA = AM.getResult(F); if (!Impl.run(F, AA)) return PreservedAnalyses::all(); PreservedAnalyses PA; - PA.preserveSet(); + if (!Options.SplitFooterBB) + PA.preserveSet(); PA.preserve(); return PA; } diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp index 94436b55752..1260bd39cde 100644 --- a/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/lib/Transforms/Scalar/NaryReassociate.cpp @@ -170,7 +170,7 @@ bool NaryReassociateLegacyPass::runOnFunction(Function &F) { auto *AC = &getAnalysis().getAssumptionCache(F); auto *DT = &getAnalysis().getDomTree(); auto *SE = &getAnalysis().getSE(); - auto *TLI = &getAnalysis().getTLI(); + auto *TLI = &getAnalysis().getTLI(F); auto *TTI = &getAnalysis().getTTI(F); return Impl.runImpl(F, AC, DT, SE, TLI, TTI); diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 08ac2b666fc..b213264de55 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -89,6 +89,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" @@ -122,6 +123,7 @@ using namespace llvm; using namespace llvm::GVNExpression; using namespace llvm::VNCoercion; +using namespace llvm::PatternMatch; #define DEBUG_TYPE "newgvn" @@ -656,7 +658,7 @@ public: TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA, const DataLayout &DL) : F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL), - PredInfo(make_unique(F, *DT, *AC)), + PredInfo(std::make_unique(F, *DT, *AC)), SQ(DL, TLI, DT, AC, /*CtxI=*/nullptr, /*UseInstrInfo=*/false) {} bool runGVN(); @@ -1332,7 +1334,7 @@ LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp, E->setOpcode(0); E->op_push_back(PointerOp); if (LI) - E->setAlignment(LI->getAlignment()); + E->setAlignment(MaybeAlign(LI->getAlignment())); // TODO: Value number heap versions. We may be able to discover // things alias analysis can't on it's own (IE that a store and a @@ -1637,8 +1639,11 @@ const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I) const { if (AA->doesNotAccessMemory(CI)) { return createCallExpression(CI, TOPClass->getMemoryLeader()); } else if (AA->onlyReadsMemory(CI)) { - MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(CI); - return createCallExpression(CI, DefiningAccess); + if (auto *MA = MSSA->getMemoryAccess(CI)) { + auto *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(MA); + return createCallExpression(CI, DefiningAccess); + } else // MSSA determined that CI does not access memory. + return createCallExpression(CI, TOPClass->getMemoryLeader()); } return nullptr; } @@ -1754,7 +1759,7 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef PHIOps, return true; }); // If we are left with no operands, it's dead. - if (empty(Filtered)) { + if (Filtered.empty()) { // If it has undef at this point, it means there are no-non-undef arguments, // and thus, the value of the phi node must be undef. if (HasUndef) { @@ -2464,9 +2469,9 @@ Value *NewGVN::findConditionEquivalence(Value *Cond) const { // Process the outgoing edges of a block for reachability. void NewGVN::processOutgoingEdges(Instruction *TI, BasicBlock *B) { // Evaluate reachability of terminator instruction. - BranchInst *BR; - if ((BR = dyn_cast(TI)) && BR->isConditional()) { - Value *Cond = BR->getCondition(); + Value *Cond; + BasicBlock *TrueSucc, *FalseSucc; + if (match(TI, m_Br(m_Value(Cond), TrueSucc, FalseSucc))) { Value *CondEvaluated = findConditionEquivalence(Cond); if (!CondEvaluated) { if (auto *I = dyn_cast(Cond)) { @@ -2479,8 +2484,6 @@ void NewGVN::processOutgoingEdges(Instruction *TI, BasicBlock *B) { } } ConstantInt *CI; - BasicBlock *TrueSucc = BR->getSuccessor(0); - BasicBlock *FalseSucc = BR->getSuccessor(1); if (CondEvaluated && (CI = dyn_cast(CondEvaluated))) { if (CI->isOne()) { LLVM_DEBUG(dbgs() << "Condition for Terminator " << *TI @@ -4196,7 +4199,7 @@ bool NewGVNLegacyPass::runOnFunction(Function &F) { return false; return NewGVN(F, &getAnalysis().getDomTree(), &getAnalysis().getAssumptionCache(F), - &getAnalysis().getTLI(), + &getAnalysis().getTLI(F), &getAnalysis().getAAResults(), &getAnalysis().getMSSA(), F.getParent()->getDataLayout()) diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp index 03912321854..68a0f5151ad 100644 --- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp +++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp @@ -161,7 +161,7 @@ public: return false; TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); + &getAnalysis().getTLI(F); const TargetTransformInfo *TTI = &getAnalysis().getTTI(F); return runPartiallyInlineLibCalls(F, TLI, TTI); diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp index b544f0a39ea..beb299272ed 100644 --- a/lib/Transforms/Scalar/PlaceSafepoints.cpp +++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp @@ -131,7 +131,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass { SE = &getAnalysis().getSE(); DT = &getAnalysis().getDomTree(); LI = &getAnalysis().getLoopInfo(); - TLI = &getAnalysis().getTLI(); + TLI = &getAnalysis().getTLI(F); for (Loop *I : *LI) { runOnLoopAndSubLoops(I); } @@ -240,7 +240,7 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header, static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE, BasicBlock *Pred) { // A conservative bound on the loop as a whole. - const SCEV *MaxTrips = SE->getMaxBackedgeTakenCount(L); + const SCEV *MaxTrips = SE->getConstantMaxBackedgeTakenCount(L); if (MaxTrips != SE->getCouldNotCompute() && SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN( CountedLoopTripWidth)) @@ -478,7 +478,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) { return false; const TargetLibraryInfo &TLI = - getAnalysis().getTLI(); + getAnalysis().getTLI(F); bool Modified = false; diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index fa8c9e2a5fe..124f625ef7b 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -861,7 +861,7 @@ static Value *NegateValue(Value *V, Instruction *BI, // this use. We do this by moving it to the entry block (if it is a // non-instruction value) or right after the definition. These negates will // be zapped by reassociate later, so we don't need much finesse here. - BinaryOperator *TheNeg = cast(U); + Instruction *TheNeg = cast(U); // Verify that the negate is in this function, V might be a constant expr. if (TheNeg->getParent()->getParent() != BI->getParent()->getParent()) @@ -1938,88 +1938,132 @@ void ReassociatePass::EraseInst(Instruction *I) { MadeChange = true; } -// Canonicalize expressions of the following form: -// x + (-Constant * y) -> x - (Constant * y) -// x - (-Constant * y) -> x + (Constant * y) -Instruction *ReassociatePass::canonicalizeNegConstExpr(Instruction *I) { - if (!I->hasOneUse() || I->getType()->isVectorTy()) - return nullptr; +/// Recursively analyze an expression to build a list of instructions that have +/// negative floating-point constant operands. The caller can then transform +/// the list to create positive constants for better reassociation and CSE. +static void getNegatibleInsts(Value *V, + SmallVectorImpl &Candidates) { + // Handle only one-use instructions. Combining negations does not justify + // replicating instructions. + Instruction *I; + if (!match(V, m_OneUse(m_Instruction(I)))) + return; - // Must be a fmul or fdiv instruction. - unsigned Opcode = I->getOpcode(); - if (Opcode != Instruction::FMul && Opcode != Instruction::FDiv) - return nullptr; + // Handle expressions of multiplications and divisions. + // TODO: This could look through floating-point casts. + const APFloat *C; + switch (I->getOpcode()) { + case Instruction::FMul: + // Not expecting non-canonical code here. Bail out and wait. + if (match(I->getOperand(0), m_Constant())) + break; - auto *C0 = dyn_cast(I->getOperand(0)); - auto *C1 = dyn_cast(I->getOperand(1)); + if (match(I->getOperand(1), m_APFloat(C)) && C->isNegative()) { + Candidates.push_back(I); + LLVM_DEBUG(dbgs() << "FMul with negative constant: " << *I << '\n'); + } + getNegatibleInsts(I->getOperand(0), Candidates); + getNegatibleInsts(I->getOperand(1), Candidates); + break; + case Instruction::FDiv: + // Not expecting non-canonical code here. Bail out and wait. + if (match(I->getOperand(0), m_Constant()) && + match(I->getOperand(1), m_Constant())) + break; - // Both operands are constant, let it get constant folded away. - if (C0 && C1) - return nullptr; + if ((match(I->getOperand(0), m_APFloat(C)) && C->isNegative()) || + (match(I->getOperand(1), m_APFloat(C)) && C->isNegative())) { + Candidates.push_back(I); + LLVM_DEBUG(dbgs() << "FDiv with negative constant: " << *I << '\n'); + } + getNegatibleInsts(I->getOperand(0), Candidates); + getNegatibleInsts(I->getOperand(1), Candidates); + break; + default: + break; + } +} - ConstantFP *CF = C0 ? C0 : C1; +/// Given an fadd/fsub with an operand that is a one-use instruction +/// (the fadd/fsub), try to change negative floating-point constants into +/// positive constants to increase potential for reassociation and CSE. +Instruction *ReassociatePass::canonicalizeNegFPConstantsForOp(Instruction *I, + Instruction *Op, + Value *OtherOp) { + assert((I->getOpcode() == Instruction::FAdd || + I->getOpcode() == Instruction::FSub) && "Expected fadd/fsub"); - // Must have one constant operand. - if (!CF) - return nullptr; - - // Must be a negative ConstantFP. - if (!CF->isNegative()) - return nullptr; - - // User must be a binary operator with one or more uses. - Instruction *User = I->user_back(); - if (!isa(User) || User->use_empty()) - return nullptr; - - unsigned UserOpcode = User->getOpcode(); - if (UserOpcode != Instruction::FAdd && UserOpcode != Instruction::FSub) - return nullptr; - - // Subtraction is not commutative. Explicitly, the following transform is - // not valid: (-Constant * y) - x -> x + (Constant * y) - if (!User->isCommutative() && User->getOperand(1) != I) + // Collect instructions with negative FP constants from the subtree that ends + // in Op. + SmallVector Candidates; + getNegatibleInsts(Op, Candidates); + if (Candidates.empty()) return nullptr; // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the // resulting subtract will be broken up later. This can get us into an // infinite loop during reassociation. - if (UserOpcode == Instruction::FAdd && ShouldBreakUpSubtract(User)) + bool IsFSub = I->getOpcode() == Instruction::FSub; + bool NeedsSubtract = !IsFSub && Candidates.size() % 2 == 1; + if (NeedsSubtract && ShouldBreakUpSubtract(I)) return nullptr; - // Change the sign of the constant. - APFloat Val = CF->getValueAPF(); - Val.changeSign(); - I->setOperand(C0 ? 0 : 1, ConstantFP::get(CF->getContext(), Val)); - - // Canonicalize I to RHS to simplify the next bit of logic. E.g., - // ((-Const*y) + x) -> (x + (-Const*y)). - if (User->getOperand(0) == I && User->isCommutative()) - cast(User)->swapOperands(); - - Value *Op0 = User->getOperand(0); - Value *Op1 = User->getOperand(1); - BinaryOperator *NI; - switch (UserOpcode) { - default: - llvm_unreachable("Unexpected Opcode!"); - case Instruction::FAdd: - NI = BinaryOperator::CreateFSub(Op0, Op1); - NI->setFastMathFlags(cast(User)->getFastMathFlags()); - break; - case Instruction::FSub: - NI = BinaryOperator::CreateFAdd(Op0, Op1); - NI->setFastMathFlags(cast(User)->getFastMathFlags()); - break; + for (Instruction *Negatible : Candidates) { + const APFloat *C; + if (match(Negatible->getOperand(0), m_APFloat(C))) { + assert(!match(Negatible->getOperand(1), m_Constant()) && + "Expecting only 1 constant operand"); + assert(C->isNegative() && "Expected negative FP constant"); + Negatible->setOperand(0, ConstantFP::get(Negatible->getType(), abs(*C))); + MadeChange = true; + } + if (match(Negatible->getOperand(1), m_APFloat(C))) { + assert(!match(Negatible->getOperand(0), m_Constant()) && + "Expecting only 1 constant operand"); + assert(C->isNegative() && "Expected negative FP constant"); + Negatible->setOperand(1, ConstantFP::get(Negatible->getType(), abs(*C))); + MadeChange = true; + } } + assert(MadeChange == true && "Negative constant candidate was not changed"); - NI->insertBefore(User); - NI->setName(User->getName()); - User->replaceAllUsesWith(NI); - NI->setDebugLoc(I->getDebugLoc()); + // Negations cancelled out. + if (Candidates.size() % 2 == 0) + return I; + + // Negate the final operand in the expression by flipping the opcode of this + // fadd/fsub. + assert(Candidates.size() % 2 == 1 && "Expected odd number"); + IRBuilder<> Builder(I); + Value *NewInst = IsFSub ? Builder.CreateFAddFMF(OtherOp, Op, I) + : Builder.CreateFSubFMF(OtherOp, Op, I); + I->replaceAllUsesWith(NewInst); RedoInsts.insert(I); - MadeChange = true; - return NI; + return dyn_cast(NewInst); +} + +/// Canonicalize expressions that contain a negative floating-point constant +/// of the following form: +/// OtherOp + (subtree) -> OtherOp {+/-} (canonical subtree) +/// (subtree) + OtherOp -> OtherOp {+/-} (canonical subtree) +/// OtherOp - (subtree) -> OtherOp {+/-} (canonical subtree) +/// +/// The fadd/fsub opcode may be switched to allow folding a negation into the +/// input instruction. +Instruction *ReassociatePass::canonicalizeNegFPConstants(Instruction *I) { + LLVM_DEBUG(dbgs() << "Combine negations for: " << *I << '\n'); + Value *X; + Instruction *Op; + if (match(I, m_FAdd(m_Value(X), m_OneUse(m_Instruction(Op))))) + if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X)) + I = R; + if (match(I, m_FAdd(m_OneUse(m_Instruction(Op)), m_Value(X)))) + if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X)) + I = R; + if (match(I, m_FSub(m_Value(X), m_OneUse(m_Instruction(Op))))) + if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X)) + I = R; + return I; } /// Inspect and optimize the given instruction. Note that erasing @@ -2042,16 +2086,16 @@ void ReassociatePass::OptimizeInst(Instruction *I) { I = NI; } - // Canonicalize negative constants out of expressions. - if (Instruction *Res = canonicalizeNegConstExpr(I)) - I = Res; - // Commute binary operators, to canonicalize the order of their operands. // This can potentially expose more CSE opportunities, and makes writing other // transformations simpler. if (I->isCommutative()) canonicalizeOperands(I); + // Canonicalize negative constants out of expressions. + if (Instruction *Res = canonicalizeNegFPConstants(I)) + I = Res; + // Don't optimize floating-point instructions unless they are 'fast'. if (I->getType()->isFPOrFPVectorTy() && !I->isFast()) return; diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index c358258d24c..48bbdd8d1b3 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -172,8 +172,6 @@ public: bool runOnModule(Module &M) override { bool Changed = false; - const TargetLibraryInfo &TLI = - getAnalysis().getTLI(); for (Function &F : M) { // Nothing to do for declarations. if (F.isDeclaration() || F.empty()) @@ -186,6 +184,8 @@ public: TargetTransformInfo &TTI = getAnalysis().getTTI(F); + const TargetLibraryInfo &TLI = + getAnalysis().getTLI(F); auto &DT = getAnalysis(F).getDomTree(); Changed |= Impl.runOnFunction(F, DT, TTI, TLI); @@ -2530,7 +2530,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT, // statepoints surviving this pass. This makes testing easier and the // resulting IR less confusing to human readers. DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); - bool MadeChange = removeUnreachableBlocks(F, nullptr, &DTU); + bool MadeChange = removeUnreachableBlocks(F, &DTU); // Flush the Dominator Tree. DTU.getDomTree(); diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 4093e50ce89..10fbdc8aacd 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -191,7 +191,7 @@ public: /// class SCCPSolver : public InstVisitor { const DataLayout &DL; - const TargetLibraryInfo *TLI; + std::function GetTLI; SmallPtrSet BBExecutable; // The BBs that are executable. DenseMap ValueState; // The state each value is in. // The state each parameter is in. @@ -268,8 +268,9 @@ public: return {A->second.DT, A->second.PDT, DomTreeUpdater::UpdateStrategy::Lazy}; } - SCCPSolver(const DataLayout &DL, const TargetLibraryInfo *tli) - : DL(DL), TLI(tli) {} + SCCPSolver(const DataLayout &DL, + std::function GetTLI) + : DL(DL), GetTLI(std::move(GetTLI)) {} /// MarkBlockExecutable - This method can be used by clients to mark all of /// the blocks that are known to be intrinsically live in the processed unit. @@ -1290,7 +1291,7 @@ CallOverdefined: // If we can constant fold this, mark the result of the call as a // constant. if (Constant *C = ConstantFoldCall(cast(CS.getInstruction()), F, - Operands, TLI)) { + Operands, &GetTLI(*F))) { // call -> undef. if (isa(C)) return; @@ -1465,7 +1466,24 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { } LatticeVal &LV = getValueState(&I); - if (!LV.isUnknown()) continue; + if (!LV.isUnknown()) + continue; + + // There are two reasons a call can have an undef result + // 1. It could be tracked. + // 2. It could be constant-foldable. + // Because of the way we solve return values, tracked calls must + // never be marked overdefined in ResolvedUndefsIn. + if (CallSite CS = CallSite(&I)) { + if (Function *F = CS.getCalledFunction()) + if (TrackedRetVals.count(F)) + continue; + + // If the call is constant-foldable, we mark it overdefined because + // we do not know what return values are valid. + markOverdefined(&I); + return true; + } // extractvalue is safe; check here because the argument is a struct. if (isa(I)) @@ -1638,19 +1656,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { case Instruction::Call: case Instruction::Invoke: case Instruction::CallBr: - // There are two reasons a call can have an undef result - // 1. It could be tracked. - // 2. It could be constant-foldable. - // Because of the way we solve return values, tracked calls must - // never be marked overdefined in ResolvedUndefsIn. - if (Function *F = CallSite(&I).getCalledFunction()) - if (TrackedRetVals.count(F)) - break; - - // If the call is constant-foldable, we mark it overdefined because - // we do not know what return values are valid. - markOverdefined(&I); - return true; + llvm_unreachable("Call-like instructions should have be handled early"); default: // If we don't know what should happen here, conservatively mark it // overdefined. @@ -1751,7 +1757,7 @@ static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) { [](const LatticeVal &LV) { return LV.isOverdefined(); })) return false; std::vector ConstVals; - auto *ST = dyn_cast(V->getType()); + auto *ST = cast(V->getType()); for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { LatticeVal V = IVs[i]; ConstVals.push_back(V.isConstant() @@ -1796,7 +1802,8 @@ static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) { static bool runSCCP(Function &F, const DataLayout &DL, const TargetLibraryInfo *TLI) { LLVM_DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n"); - SCCPSolver Solver(DL, TLI); + SCCPSolver Solver( + DL, [TLI](Function &F) -> const TargetLibraryInfo & { return *TLI; }); // Mark the first block of the function as being executable. Solver.MarkBlockExecutable(&F.front()); @@ -1891,7 +1898,7 @@ public: return false; const DataLayout &DL = F.getParent()->getDataLayout(); const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); + &getAnalysis().getTLI(F); return runSCCP(F, DL, TLI); } }; @@ -1924,6 +1931,27 @@ static void findReturnsToZap(Function &F, return; } + assert( + all_of(F.users(), + [&Solver](User *U) { + if (isa(U) && + !Solver.isBlockExecutable(cast(U)->getParent())) + return true; + // Non-callsite uses are not impacted by zapping. Also, constant + // uses (like blockaddresses) could stuck around, without being + // used in the underlying IR, meaning we do not have lattice + // values for them. + if (!CallSite(U)) + return true; + if (U->getType()->isStructTy()) { + return all_of( + Solver.getStructLatticeValueFor(U), + [](const LatticeVal &LV) { return !LV.isOverdefined(); }); + } + return !Solver.getLatticeValueFor(U).isOverdefined(); + }) && + "We can only zap functions where all live users have a concrete value"); + for (BasicBlock &BB : F) { if (CallInst *CI = BB.getTerminatingMustTailCall()) { LLVM_DEBUG(dbgs() << "Can't zap return of the block due to present " @@ -1974,9 +2002,10 @@ static void forceIndeterminateEdge(Instruction* I, SCCPSolver &Solver) { } bool llvm::runIPSCCP( - Module &M, const DataLayout &DL, const TargetLibraryInfo *TLI, + Module &M, const DataLayout &DL, + std::function GetTLI, function_ref getAnalysis) { - SCCPSolver Solver(DL, TLI); + SCCPSolver Solver(DL, GetTLI); // Loop over all functions, marking arguments to those with their addresses // taken or that are external as overdefined. diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 33f90d0b01e..74b8ff91305 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -959,14 +959,16 @@ private: std::tie(UsedI, I) = Uses.pop_back_val(); if (LoadInst *LI = dyn_cast(I)) { - Size = std::max(Size, DL.getTypeStoreSize(LI->getType())); + Size = std::max(Size, + DL.getTypeStoreSize(LI->getType()).getFixedSize()); continue; } if (StoreInst *SI = dyn_cast(I)) { Value *Op = SI->getOperand(0); if (Op == UsedI) return SI; - Size = std::max(Size, DL.getTypeStoreSize(Op->getType())); + Size = std::max(Size, + DL.getTypeStoreSize(Op->getType()).getFixedSize()); continue; } @@ -1197,7 +1199,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) { // TODO: Allow recursive phi users. // TODO: Allow stores. BasicBlock *BB = PN.getParent(); - unsigned MaxAlign = 0; + MaybeAlign MaxAlign; uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType()); APInt MaxSize(APWidth, 0); bool HaveLoad = false; @@ -1218,8 +1220,8 @@ static bool isSafePHIToSpeculate(PHINode &PN) { if (BBI->mayWriteToMemory()) return false; - uint64_t Size = DL.getTypeStoreSizeInBits(LI->getType()); - MaxAlign = std::max(MaxAlign, LI->getAlignment()); + uint64_t Size = DL.getTypeStoreSize(LI->getType()); + MaxAlign = std::max(MaxAlign, MaybeAlign(LI->getAlignment())); MaxSize = MaxSize.ult(Size) ? APInt(APWidth, Size) : MaxSize; HaveLoad = true; } @@ -1266,11 +1268,11 @@ static void speculatePHINodeLoads(PHINode &PN) { PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(), PN.getName() + ".sroa.speculated"); - // Get the AA tags and alignment to use from one of the loads. It doesn't + // Get the AA tags and alignment to use from one of the loads. It does not // matter which one we get and if any differ. AAMDNodes AATags; SomeLoad->getAAMetadata(AATags); - unsigned Align = SomeLoad->getAlignment(); + const MaybeAlign Align = MaybeAlign(SomeLoad->getAlignment()); // Rewrite all loads of the PN to use the new PHI. while (!PN.use_empty()) { @@ -1338,11 +1340,11 @@ static bool isSafeSelectToSpeculate(SelectInst &SI) { // Both operands to the select need to be dereferenceable, either // absolutely (e.g. allocas) or at this point because we can see other // accesses to it. - if (!isSafeToLoadUnconditionally(TValue, LI->getType(), LI->getAlignment(), - DL, LI)) + if (!isSafeToLoadUnconditionally(TValue, LI->getType(), + MaybeAlign(LI->getAlignment()), DL, LI)) return false; - if (!isSafeToLoadUnconditionally(FValue, LI->getType(), LI->getAlignment(), - DL, LI)) + if (!isSafeToLoadUnconditionally(FValue, LI->getType(), + MaybeAlign(LI->getAlignment()), DL, LI)) return false; } @@ -1368,8 +1370,8 @@ static void speculateSelectInstLoads(SelectInst &SI) { NumLoadsSpeculated += 2; // Transfer alignment and AA info if present. - TL->setAlignment(LI->getAlignment()); - FL->setAlignment(LI->getAlignment()); + TL->setAlignment(MaybeAlign(LI->getAlignment())); + FL->setAlignment(MaybeAlign(LI->getAlignment())); AAMDNodes Tags; LI->getAAMetadata(Tags); @@ -1888,6 +1890,14 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { bool HaveCommonEltTy = true; auto CheckCandidateType = [&](Type *Ty) { if (auto *VTy = dyn_cast(Ty)) { + // Return if bitcast to vectors is different for total size in bits. + if (!CandidateTys.empty()) { + VectorType *V = CandidateTys[0]; + if (DL.getTypeSizeInBits(VTy) != DL.getTypeSizeInBits(V)) { + CandidateTys.clear(); + return; + } + } CandidateTys.push_back(VTy); if (!CommonEltTy) CommonEltTy = VTy->getElementType(); @@ -3110,7 +3120,7 @@ private: unsigned LoadAlign = LI->getAlignment(); if (!LoadAlign) LoadAlign = DL.getABITypeAlignment(LI->getType()); - LI->setAlignment(std::min(LoadAlign, getSliceAlign())); + LI->setAlignment(MaybeAlign(std::min(LoadAlign, getSliceAlign()))); continue; } if (StoreInst *SI = dyn_cast(I)) { @@ -3119,7 +3129,7 @@ private: Value *Op = SI->getOperand(0); StoreAlign = DL.getABITypeAlignment(Op->getType()); } - SI->setAlignment(std::min(StoreAlign, getSliceAlign())); + SI->setAlignment(MaybeAlign(std::min(StoreAlign, getSliceAlign()))); continue; } diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 869cf00e0a8..1d2e40bf62b 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -79,6 +79,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLoopVersioningLICMPass(Registry); initializeLoopIdiomRecognizeLegacyPassPass(Registry); initializeLowerAtomicLegacyPassPass(Registry); + initializeLowerConstantIntrinsicsPass(Registry); initializeLowerExpectIntrinsicPass(Registry); initializeLowerGuardIntrinsicLegacyPassPass(Registry); initializeLowerWidenableConditionLegacyPassPass(Registry); @@ -123,6 +124,10 @@ void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createAggressiveDCEPass()); } +void LLVMAddDCEPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createDeadCodeEliminationPass()); +} + void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createBitTrackingDCEPass()); } @@ -280,6 +285,10 @@ void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createBasicAAWrapperPass()); } +void LLVMAddLowerConstantIntrinsicsPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLowerConstantIntrinsicsPass()); +} + void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLowerExpectIntrinsicPass()); } diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index f6a12fb1314..41554fccdf0 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1121,7 +1121,7 @@ bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) { DT = &getAnalysis().getDomTree(); SE = &getAnalysis().getSE(); LI = &getAnalysis().getLoopInfo(); - TLI = &getAnalysis().getTLI(); + TLI = &getAnalysis().getTLI(F); bool Changed = false; for (BasicBlock &B : F) { for (BasicBlock::iterator I = B.begin(), IE = B.end(); I != IE;) diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index aeac6f548b3..ac832b9b456 100644 --- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -1909,7 +1909,7 @@ static void unswitchNontrivialInvariants( // We can only unswitch switches, conditional branches with an invariant // condition, or combining invariant conditions with an instruction. - assert((SI || BI->isConditional()) && + assert((SI || (BI && BI->isConditional())) && "Can only unswitch switches and conditional branch!"); bool FullUnswitch = SI || BI->getCondition() == Invariants[0]; if (FullUnswitch) @@ -2141,17 +2141,21 @@ static void unswitchNontrivialInvariants( buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction, *ClonedPH, *LoopPH); DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); + + if (MSSAU) { + DT.applyUpdates(DTUpdates); + DTUpdates.clear(); + + // Perform MSSA cloning updates. + for (auto &VMap : VMaps) + MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap, + /*IgnoreIncomingWithNoClones=*/true); + MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT); + } } // Apply the updates accumulated above to get an up-to-date dominator tree. DT.applyUpdates(DTUpdates); - if (!FullUnswitch && MSSAU) { - // Update MSSA for partial unswitch, after DT update. - SmallVector Updates; - Updates.push_back( - {cfg::UpdateKind::Insert, SplitBB, ClonedPHs.begin()->second}); - MSSAU->applyInsertUpdates(Updates, DT); - } // Now that we have an accurate dominator tree, first delete the dead cloned // blocks so that we can accurately build any cloned loops. It is important to @@ -2720,7 +2724,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, return Cost * (SuccessorsCount - 1); }; Instruction *BestUnswitchTI = nullptr; - int BestUnswitchCost; + int BestUnswitchCost = 0; ArrayRef BestUnswitchInvariants; for (auto &TerminatorAndInvariants : UnswitchCandidates) { Instruction &TI = *TerminatorAndInvariants.first; @@ -2752,6 +2756,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, BestUnswitchInvariants = Invariants; } } + assert(BestUnswitchTI && "Failed to find loop unswitch candidate"); if (BestUnswitchCost >= UnswitchThreshold) { LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " @@ -2880,7 +2885,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast)); auto PA = getLoopPassPreservedAnalyses(); - if (EnableMSSALoopDependency) + if (AR.MSSA) PA.preserve(); return PA; } diff --git a/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp index c13fb3e0451..e6db11f47ea 100644 --- a/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp +++ b/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp @@ -777,8 +777,10 @@ static bool tryToSpeculatePHIs(SmallVectorImpl &PNs, // speculation if the predecessor is an invoke. This doesn't seem // fundamental and we should probably be splitting critical edges // differently. - if (isa(PredBB->getTerminator()) || - isa(PredBB->getTerminator())) { + const auto *TermInst = PredBB->getTerminator(); + if (isa(TermInst) || + isa(TermInst) || + isa(TermInst)) { LLVM_DEBUG(dbgs() << " Invalid: predecessor terminator: " << PredBB->getName() << "\n"); return false; diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index e5400676c7e..9791cf41f62 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -65,7 +65,7 @@ static cl::opt ForceSkipUniformRegions( static cl::opt RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden, cl::desc("Allow relaxed uniform region checks"), - cl::init(false)); + cl::init(true)); // Definition of the complex types used in this pass. diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index f0b79079d81..b27a36b67d6 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -341,7 +341,7 @@ static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) { const DataLayout &DL = L->getModule()->getDataLayout(); if (isModSet(AA->getModRefInfo(CI, MemoryLocation::get(L))) || !isSafeToLoadUnconditionally(L->getPointerOperand(), L->getType(), - L->getAlignment(), DL, L)) + MaybeAlign(L->getAlignment()), DL, L)) return false; } } diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 5fa371377c8..d85cc40c372 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -170,7 +170,8 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, LoopInfo *LI, MemorySSAUpdater *MSSAU, - MemoryDependenceResults *MemDep) { + MemoryDependenceResults *MemDep, + bool PredecessorWithTwoSuccessors) { if (BB->hasAddressTaken()) return false; @@ -185,9 +186,24 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, return false; // Can't merge if there are multiple distinct successors. - if (PredBB->getUniqueSuccessor() != BB) + if (!PredecessorWithTwoSuccessors && PredBB->getUniqueSuccessor() != BB) return false; + // Currently only allow PredBB to have two predecessors, one being BB. + // Update BI to branch to BB's only successor instead of BB. + BranchInst *PredBB_BI; + BasicBlock *NewSucc = nullptr; + unsigned FallThruPath; + if (PredecessorWithTwoSuccessors) { + if (!(PredBB_BI = dyn_cast(PredBB->getTerminator()))) + return false; + BranchInst *BB_JmpI = dyn_cast(BB->getTerminator()); + if (!BB_JmpI || !BB_JmpI->isUnconditional()) + return false; + NewSucc = BB_JmpI->getSuccessor(0); + FallThruPath = PredBB_BI->getSuccessor(0) == BB ? 0 : 1; + } + // Can't merge if there is PHI loop. for (PHINode &PN : BB->phis()) for (Value *IncValue : PN.incoming_values()) @@ -227,18 +243,39 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, Updates.push_back({DominatorTree::Delete, PredBB, BB}); } - if (MSSAU) - MSSAU->moveAllAfterMergeBlocks(BB, PredBB, &*(BB->begin())); + Instruction *PTI = PredBB->getTerminator(); + Instruction *STI = BB->getTerminator(); + Instruction *Start = &*BB->begin(); + // If there's nothing to move, mark the starting instruction as the last + // instruction in the block. + if (Start == STI) + Start = PTI; - // Delete the unconditional branch from the predecessor... - PredBB->getInstList().pop_back(); + // Move all definitions in the successor to the predecessor... + PredBB->getInstList().splice(PTI->getIterator(), BB->getInstList(), + BB->begin(), STI->getIterator()); + + if (MSSAU) + MSSAU->moveAllAfterMergeBlocks(BB, PredBB, Start); // Make all PHI nodes that referred to BB now refer to Pred as their // source... BB->replaceAllUsesWith(PredBB); - // Move all definitions in the successor to the predecessor... - PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); + if (PredecessorWithTwoSuccessors) { + // Delete the unconditional branch from BB. + BB->getInstList().pop_back(); + + // Update branch in the predecessor. + PredBB_BI->setSuccessor(FallThruPath, NewSucc); + } else { + // Delete the unconditional branch from the predecessor. + PredBB->getInstList().pop_back(); + + // Move terminator instruction. + PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); + } + // Add unreachable to now empty BB. new UnreachableInst(BB->getContext(), BB); // Eliminate duplicate dbg.values describing the entry PHI node post-splice. @@ -274,11 +311,10 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, "applying corresponding DTU updates."); DTU->applyUpdatesPermissive(Updates); DTU->deleteBB(BB); - } - - else { + } else { BB->eraseFromParent(); // Nuke BB if DTU is nullptr. } + return true; } @@ -365,11 +401,13 @@ llvm::SplitAllCriticalEdges(Function &F, BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI, - MemorySSAUpdater *MSSAU) { + MemorySSAUpdater *MSSAU, const Twine &BBName) { BasicBlock::iterator SplitIt = SplitPt->getIterator(); while (isa(SplitIt) || SplitIt->isEHPad()) ++SplitIt; - BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split"); + std::string Name = BBName.str(); + BasicBlock *New = Old->splitBasicBlock( + SplitIt, Name.empty() ? Old->getName() + ".split" : Name); // The new block lives in whichever loop the old one did. This preserves // LCSSA as well, because we force the split point to be after any PHI nodes. diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 27f110e24f9..71316ce8f75 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -88,6 +88,14 @@ static bool setDoesNotCapture(Function &F, unsigned ArgNo) { return true; } +static bool setDoesNotAlias(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::NoAlias)) + return false; + F.addParamAttr(ArgNo, Attribute::NoAlias); + ++NumNoAlias; + return true; +} + static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) { if (F.hasParamAttribute(ArgNo, Attribute::ReadOnly)) return false; @@ -175,6 +183,9 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_strcpy: case LibFunc_strncpy: + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotAlias(F, 1); + LLVM_FALLTHROUGH; case LibFunc_strcat: case LibFunc_strncat: Changed |= setReturnedArg(F, 0); @@ -249,12 +260,14 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_sprintf: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotAlias(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_snprintf: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotAlias(F, 0); Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 2); return Changed; @@ -291,11 +304,23 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_memcpy: + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotAlias(F, 1); + Changed |= setReturnedArg(F, 0); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; case LibFunc_memmove: Changed |= setReturnedArg(F, 0); - LLVM_FALLTHROUGH; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; case LibFunc_mempcpy: case LibFunc_memccpy: + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotAlias(F, 1); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); @@ -760,9 +785,8 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { } } -bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, - LibFunc DoubleFn, LibFunc FloatFn, - LibFunc LongDoubleFn) { +bool llvm::hasFloatFn(const TargetLibraryInfo *TLI, Type *Ty, + LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn) { switch (Ty->getTypeID()) { case Type::HalfTyID: return false; @@ -775,10 +799,10 @@ bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, } } -StringRef llvm::getUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, - LibFunc DoubleFn, LibFunc FloatFn, - LibFunc LongDoubleFn) { - assert(hasUnaryFloatFn(TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) && +StringRef llvm::getFloatFnName(const TargetLibraryInfo *TLI, Type *Ty, + LibFunc DoubleFn, LibFunc FloatFn, + LibFunc LongDoubleFn) { + assert(hasFloatFn(TLI, Ty, DoubleFn, FloatFn, LongDoubleFn) && "Cannot get name for unavailable function!"); switch (Ty->getTypeID()) { @@ -827,6 +851,12 @@ Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL, B.getInt8PtrTy(), castToCStr(Ptr, B), B, TLI); } +Value *llvm::emitStrDup(Value *Ptr, IRBuilder<> &B, + const TargetLibraryInfo *TLI) { + return emitLibCall(LibFunc_strdup, B.getInt8PtrTy(), B.getInt8PtrTy(), + castToCStr(Ptr, B), B, TLI); +} + Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B, const TargetLibraryInfo *TLI) { Type *I8Ptr = B.getInt8PtrTy(); @@ -1045,12 +1075,35 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, const TargetLibraryInfo *TLI, LibFunc LongDoubleFn, IRBuilder<> &B, const AttributeList &Attrs) { // Get the name of the function according to TLI. - StringRef Name = getUnaryFloatFn(TLI, Op->getType(), - DoubleFn, FloatFn, LongDoubleFn); + StringRef Name = getFloatFnName(TLI, Op->getType(), + DoubleFn, FloatFn, LongDoubleFn); return emitUnaryFloatFnCallHelper(Op, Name, B, Attrs); } +static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2, + StringRef Name, IRBuilder<> &B, + const AttributeList &Attrs) { + assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall"); + + Module *M = B.GetInsertBlock()->getModule(); + FunctionCallee Callee = M->getOrInsertFunction(Name, Op1->getType(), + Op1->getType(), Op2->getType()); + CallInst *CI = B.CreateCall(Callee, { Op1, Op2 }, Name); + + // The incoming attribute set may have come from a speculatable intrinsic, but + // is being replaced with a library call which is not allowed to be + // speculatable. + CI->setAttributes(Attrs.removeAttribute(B.getContext(), + AttributeList::FunctionIndex, + Attribute::Speculatable)); + if (const Function *F = + dyn_cast(Callee.getCallee()->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, IRBuilder<> &B, const AttributeList &Attrs) { assert((Name != "") && "Must specify Name to emitBinaryFloatFnCall"); @@ -1058,16 +1111,19 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, SmallString<20> NameBuffer; appendTypeSuffix(Op1, Name, NameBuffer); - Module *M = B.GetInsertBlock()->getModule(); - FunctionCallee Callee = M->getOrInsertFunction( - Name, Op1->getType(), Op1->getType(), Op2->getType()); - CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name); - CI->setAttributes(Attrs); - if (const Function *F = - dyn_cast(Callee.getCallee()->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); + return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs); +} - return CI; +Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, + const TargetLibraryInfo *TLI, + LibFunc DoubleFn, LibFunc FloatFn, + LibFunc LongDoubleFn, IRBuilder<> &B, + const AttributeList &Attrs) { + // Get the name of the function according to TLI. + StringRef Name = getFloatFnName(TLI, Op1->getType(), + DoubleFn, FloatFn, LongDoubleFn); + + return emitBinaryFloatFnCallHelper(Op1, Op2, Name, B, Attrs); } Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B, diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index df299f673f6..9a6761040bd 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -448,13 +448,17 @@ bool llvm::bypassSlowDivision(BasicBlock *BB, DivCacheTy PerBBDivCache; bool MadeChange = false; - Instruction* Next = &*BB->begin(); + Instruction *Next = &*BB->begin(); while (Next != nullptr) { // We may add instructions immediately after I, but we want to skip over // them. - Instruction* I = Next; + Instruction *I = Next; Next = Next->getNextNode(); + // Ignore dead code to save time and avoid bugs. + if (I->hasNUses(0)) + continue; + FastDivInsertionTask Task(I, BypassWidths); if (Value *Replacement = Task.getReplacement(PerBBDivCache)) { I->replaceAllUsesWith(Replacement); diff --git a/lib/Transforms/Utils/CanonicalizeAliases.cpp b/lib/Transforms/Utils/CanonicalizeAliases.cpp index 455fcbb1cf9..3c7c8d87259 100644 --- a/lib/Transforms/Utils/CanonicalizeAliases.cpp +++ b/lib/Transforms/Utils/CanonicalizeAliases.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" using namespace llvm; diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 1026c9d3703..75e8963303c 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -210,6 +210,21 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, RemapInstruction(&II, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer); + + // Register all DICompileUnits of the old parent module in the new parent module + auto* OldModule = OldFunc->getParent(); + auto* NewModule = NewFunc->getParent(); + if (OldModule && NewModule && OldModule != NewModule && DIFinder.compile_unit_count()) { + auto* NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu"); + // Avoid multiple insertions of the same DICompileUnit to NMD. + SmallPtrSet Visited; + for (auto* Operand : NMD->operands()) + Visited.insert(Operand); + for (auto* Unit : DIFinder.compile_units()) + // VMap.MD()[Unit] == Unit + if (Visited.insert(Unit).second) + NMD->addOperand(Unit); + } } /// Return a copy of the specified function and add it to that function's diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 7ddf59becba..2c8c3abb292 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -48,7 +48,7 @@ std::unique_ptr llvm::CloneModule( function_ref ShouldCloneDefinition) { // First off, we need to create the new module. std::unique_ptr New = - llvm::make_unique(M.getModuleIdentifier(), M.getContext()); + std::make_unique(M.getModuleIdentifier(), M.getContext()); New->setSourceFileName(M.getSourceFileName()); New->setDataLayout(M.getDataLayout()); New->setTargetTriple(M.getTargetTriple()); @@ -181,13 +181,25 @@ std::unique_ptr llvm::CloneModule( } // And named metadata.... + const auto* LLVM_DBG_CU = M.getNamedMetadata("llvm.dbg.cu"); for (Module::const_named_metadata_iterator I = M.named_metadata_begin(), E = M.named_metadata_end(); I != E; ++I) { const NamedMDNode &NMD = *I; NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName()); - for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) - NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap)); + if (&NMD == LLVM_DBG_CU) { + // Do not insert duplicate operands. + SmallPtrSet Visited; + for (const auto* Operand : NewNMD->operands()) + Visited.insert(Operand); + for (const auto* Operand : NMD.operands()) { + auto* MappedOperand = MapMetadata(Operand, VMap); + if (Visited.insert(MappedOperand).second) + NewNMD->addOperand(MappedOperand); + } + } else + for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) + NewNMD->addOperand(MapMetadata(NMD.getOperand(i), VMap)); } return New; diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index fa6d3f8ae87..0298ff9a395 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -293,10 +293,8 @@ static BasicBlock *getCommonExitBlock(const SetVector &Blocks) { CommonExitBlock = Succ; continue; } - if (CommonExitBlock == Succ) - continue; - - return true; + if (CommonExitBlock != Succ) + return true; } return false; }; @@ -307,52 +305,79 @@ static BasicBlock *getCommonExitBlock(const SetVector &Blocks) { return CommonExitBlock; } +CodeExtractorAnalysisCache::CodeExtractorAnalysisCache(Function &F) { + for (BasicBlock &BB : F) { + for (Instruction &II : BB.instructionsWithoutDebug()) + if (auto *AI = dyn_cast(&II)) + Allocas.push_back(AI); + + findSideEffectInfoForBlock(BB); + } +} + +void CodeExtractorAnalysisCache::findSideEffectInfoForBlock(BasicBlock &BB) { + for (Instruction &II : BB.instructionsWithoutDebug()) { + unsigned Opcode = II.getOpcode(); + Value *MemAddr = nullptr; + switch (Opcode) { + case Instruction::Store: + case Instruction::Load: { + if (Opcode == Instruction::Store) { + StoreInst *SI = cast(&II); + MemAddr = SI->getPointerOperand(); + } else { + LoadInst *LI = cast(&II); + MemAddr = LI->getPointerOperand(); + } + // Global variable can not be aliased with locals. + if (dyn_cast(MemAddr)) + break; + Value *Base = MemAddr->stripInBoundsConstantOffsets(); + if (!isa(Base)) { + SideEffectingBlocks.insert(&BB); + return; + } + BaseMemAddrs[&BB].insert(Base); + break; + } + default: { + IntrinsicInst *IntrInst = dyn_cast(&II); + if (IntrInst) { + if (IntrInst->isLifetimeStartOrEnd()) + break; + SideEffectingBlocks.insert(&BB); + return; + } + // Treat all the other cases conservatively if it has side effects. + if (II.mayHaveSideEffects()) { + SideEffectingBlocks.insert(&BB); + return; + } + } + } + } +} + +bool CodeExtractorAnalysisCache::doesBlockContainClobberOfAddr( + BasicBlock &BB, AllocaInst *Addr) const { + if (SideEffectingBlocks.count(&BB)) + return true; + auto It = BaseMemAddrs.find(&BB); + if (It != BaseMemAddrs.end()) + return It->second.count(Addr); + return false; +} + bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers( - Instruction *Addr) const { + const CodeExtractorAnalysisCache &CEAC, Instruction *Addr) const { AllocaInst *AI = cast(Addr->stripInBoundsConstantOffsets()); Function *Func = (*Blocks.begin())->getParent(); for (BasicBlock &BB : *Func) { if (Blocks.count(&BB)) continue; - for (Instruction &II : BB) { - if (isa(II)) - continue; - - unsigned Opcode = II.getOpcode(); - Value *MemAddr = nullptr; - switch (Opcode) { - case Instruction::Store: - case Instruction::Load: { - if (Opcode == Instruction::Store) { - StoreInst *SI = cast(&II); - MemAddr = SI->getPointerOperand(); - } else { - LoadInst *LI = cast(&II); - MemAddr = LI->getPointerOperand(); - } - // Global variable can not be aliased with locals. - if (dyn_cast(MemAddr)) - break; - Value *Base = MemAddr->stripInBoundsConstantOffsets(); - if (!isa(Base) || Base == AI) - return false; - break; - } - default: { - IntrinsicInst *IntrInst = dyn_cast(&II); - if (IntrInst) { - if (IntrInst->isLifetimeStartOrEnd()) - break; - return false; - } - // Treat all the other cases conservatively if it has side effects. - if (II.mayHaveSideEffects()) - return false; - } - } - } + if (CEAC.doesBlockContainClobberOfAddr(BB, AI)) + return false; } - return true; } @@ -415,7 +440,8 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) { // outline region. If there are not other untracked uses of the address, return // the pair of markers if found; otherwise return a pair of nullptr. CodeExtractor::LifetimeMarkerInfo -CodeExtractor::getLifetimeMarkers(Instruction *Addr, +CodeExtractor::getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, + Instruction *Addr, BasicBlock *ExitBlock) const { LifetimeMarkerInfo Info; @@ -447,7 +473,7 @@ CodeExtractor::getLifetimeMarkers(Instruction *Addr, Info.HoistLifeEnd = !definedInRegion(Blocks, Info.LifeEnd); // Do legality check. if ((Info.SinkLifeStart || Info.HoistLifeEnd) && - !isLegalToShrinkwrapLifetimeMarkers(Addr)) + !isLegalToShrinkwrapLifetimeMarkers(CEAC, Addr)) return {}; // Check to see if we have a place to do hoisting, if not, bail. @@ -457,7 +483,8 @@ CodeExtractor::getLifetimeMarkers(Instruction *Addr, return Info; } -void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, +void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC, + ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const { Function *Func = (*Blocks.begin())->getParent(); ExitBlock = getCommonExitBlock(Blocks); @@ -478,74 +505,104 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, return true; }; - for (BasicBlock &BB : *Func) { - if (Blocks.count(&BB)) + // Look up allocas in the original function in CodeExtractorAnalysisCache, as + // this is much faster than walking all the instructions. + for (AllocaInst *AI : CEAC.getAllocas()) { + BasicBlock *BB = AI->getParent(); + if (Blocks.count(BB)) continue; - for (Instruction &II : BB) { - auto *AI = dyn_cast(&II); - if (!AI) - continue; - LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(AI, ExitBlock); - bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo); - if (Moved) { - LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n"); - SinkCands.insert(AI); - continue; - } + // As a prior call to extractCodeRegion() may have shrinkwrapped the alloca, + // check whether it is actually still in the original function. + Function *AIFunc = BB->getParent(); + if (AIFunc != Func) + continue; - // Follow any bitcasts. - SmallVector Bitcasts; - SmallVector BitcastLifetimeInfo; - for (User *U : AI->users()) { - if (U->stripInBoundsConstantOffsets() == AI) { - Instruction *Bitcast = cast(U); - LifetimeMarkerInfo LMI = getLifetimeMarkers(Bitcast, ExitBlock); - if (LMI.LifeStart) { - Bitcasts.push_back(Bitcast); - BitcastLifetimeInfo.push_back(LMI); - continue; - } - } - - // Found unknown use of AI. - if (!definedInRegion(Blocks, U)) { - Bitcasts.clear(); - break; - } - } - - // Either no bitcasts reference the alloca or there are unknown uses. - if (Bitcasts.empty()) - continue; - - LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n"); + LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(CEAC, AI, ExitBlock); + bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo); + if (Moved) { + LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n"); SinkCands.insert(AI); - for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) { - Instruction *BitcastAddr = Bitcasts[I]; - const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I]; - assert(LMI.LifeStart && - "Unsafe to sink bitcast without lifetime markers"); - moveOrIgnoreLifetimeMarkers(LMI); - if (!definedInRegion(Blocks, BitcastAddr)) { - LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr - << "\n"); - SinkCands.insert(BitcastAddr); + continue; + } + + // Follow any bitcasts. + SmallVector Bitcasts; + SmallVector BitcastLifetimeInfo; + for (User *U : AI->users()) { + if (U->stripInBoundsConstantOffsets() == AI) { + Instruction *Bitcast = cast(U); + LifetimeMarkerInfo LMI = getLifetimeMarkers(CEAC, Bitcast, ExitBlock); + if (LMI.LifeStart) { + Bitcasts.push_back(Bitcast); + BitcastLifetimeInfo.push_back(LMI); + continue; } } + + // Found unknown use of AI. + if (!definedInRegion(Blocks, U)) { + Bitcasts.clear(); + break; + } + } + + // Either no bitcasts reference the alloca or there are unknown uses. + if (Bitcasts.empty()) + continue; + + LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n"); + SinkCands.insert(AI); + for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) { + Instruction *BitcastAddr = Bitcasts[I]; + const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I]; + assert(LMI.LifeStart && + "Unsafe to sink bitcast without lifetime markers"); + moveOrIgnoreLifetimeMarkers(LMI); + if (!definedInRegion(Blocks, BitcastAddr)) { + LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr + << "\n"); + SinkCands.insert(BitcastAddr); + } } } } +bool CodeExtractor::isEligible() const { + if (Blocks.empty()) + return false; + BasicBlock *Header = *Blocks.begin(); + Function *F = Header->getParent(); + + // For functions with varargs, check that varargs handling is only done in the + // outlined function, i.e vastart and vaend are only used in outlined blocks. + if (AllowVarArgs && F->getFunctionType()->isVarArg()) { + auto containsVarArgIntrinsic = [](const Instruction &I) { + if (const CallInst *CI = dyn_cast(&I)) + if (const Function *Callee = CI->getCalledFunction()) + return Callee->getIntrinsicID() == Intrinsic::vastart || + Callee->getIntrinsicID() == Intrinsic::vaend; + return false; + }; + + for (auto &BB : *F) { + if (Blocks.count(&BB)) + continue; + if (llvm::any_of(BB, containsVarArgIntrinsic)) + return false; + } + } + return true; +} + void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, const ValueSet &SinkCands) const { for (BasicBlock *BB : Blocks) { // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. for (Instruction &II : *BB) { - for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE; - ++OI) { - Value *V = *OI; + for (auto &OI : II.operands()) { + Value *V = OI; if (!SinkCands.count(V) && definedInCaller(Blocks, V)) Inputs.insert(V); } @@ -904,12 +961,12 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, // within the new function. This must be done before we lose track of which // blocks were originally in the code region. std::vector Users(header->user_begin(), header->user_end()); - for (unsigned i = 0, e = Users.size(); i != e; ++i) + for (auto &U : Users) // The BasicBlock which contains the branch is not in the region // modify the branch target to a new block - if (Instruction *I = dyn_cast(Users[i])) - if (I->isTerminator() && !Blocks.count(I->getParent()) && - I->getParent()->getParent() == oldFunction) + if (Instruction *I = dyn_cast(U)) + if (I->isTerminator() && I->getFunction() == oldFunction && + !Blocks.count(I->getParent())) I->replaceUsesOfWith(header, newHeader); return newFunction; @@ -1277,13 +1334,6 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) { // Insert this basic block into the new function newBlocks.push_back(Block); - - // Remove @llvm.assume calls that were moved to the new function from the - // old function's assumption cache. - if (AC) - for (auto &I : *Block) - if (match(&I, m_Intrinsic())) - AC->unregisterAssumption(cast(&I)); } } @@ -1332,7 +1382,8 @@ void CodeExtractor::calculateNewCallTerminatorWeights( MDBuilder(TI->getContext()).createBranchWeights(BranchWeights)); } -Function *CodeExtractor::extractCodeRegion() { +Function * +CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) { if (!isEligible()) return nullptr; @@ -1341,27 +1392,6 @@ Function *CodeExtractor::extractCodeRegion() { BasicBlock *header = *Blocks.begin(); Function *oldFunction = header->getParent(); - // For functions with varargs, check that varargs handling is only done in the - // outlined function, i.e vastart and vaend are only used in outlined blocks. - if (AllowVarArgs && oldFunction->getFunctionType()->isVarArg()) { - auto containsVarArgIntrinsic = [](Instruction &I) { - if (const CallInst *CI = dyn_cast(&I)) - if (const Function *F = CI->getCalledFunction()) - return F->getIntrinsicID() == Intrinsic::vastart || - F->getIntrinsicID() == Intrinsic::vaend; - return false; - }; - - for (auto &BB : *oldFunction) { - if (Blocks.count(&BB)) - continue; - if (llvm::any_of(BB, containsVarArgIntrinsic)) - return nullptr; - } - } - ValueSet inputs, outputs, SinkingCands, HoistingCands; - BasicBlock *CommonExit = nullptr; - // Calculate the entry frequency of the new function before we change the root // block. BlockFrequency EntryFreq; @@ -1375,6 +1405,15 @@ Function *CodeExtractor::extractCodeRegion() { } } + if (AC) { + // Remove @llvm.assume calls that were moved to the new function from the + // old function's assumption cache. + for (BasicBlock *Block : Blocks) + for (auto &I : *Block) + if (match(&I, m_Intrinsic())) + AC->unregisterAssumption(cast(&I)); + } + // If we have any return instructions in the region, split those blocks so // that the return is not in the region. splitReturnBlocks(); @@ -1428,7 +1467,9 @@ Function *CodeExtractor::extractCodeRegion() { } newFuncRoot->getInstList().push_back(BranchI); - findAllocas(SinkingCands, HoistingCands, CommonExit); + ValueSet inputs, outputs, SinkingCands, HoistingCands; + BasicBlock *CommonExit = nullptr; + findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); assert(HoistingCands.empty() || CommonExit); // Find inputs to, outputs from the code region. @@ -1563,5 +1604,17 @@ Function *CodeExtractor::extractCodeRegion() { }); LLVM_DEBUG(if (verifyFunction(*oldFunction)) report_fatal_error("verification of oldFunction failed!")); + LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, AC)) + report_fatal_error("Stale Asumption cache for old Function!")); return newFunction; } + +bool CodeExtractor::verifyAssumptionCache(const Function& F, + AssumptionCache *AC) { + for (auto AssumeVH : AC->assumptions()) { + CallInst *I = cast(AssumeVH); + if (I->getFunction() != &F) + return true; + } + return false; +} diff --git a/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/lib/Transforms/Utils/EntryExitInstrumenter.cpp index 4aa40eeadda..57e2ff0251a 100644 --- a/lib/Transforms/Utils/EntryExitInstrumenter.cpp +++ b/lib/Transforms/Utils/EntryExitInstrumenter.cpp @@ -24,7 +24,7 @@ static void insertCall(Function &CurFn, StringRef Func, if (Func == "mcount" || Func == ".mcount" || - Func == "\01__gnu_mcount_nc" || + Func == "llvm.arm.gnu.eabi.mcount" || Func == "\01_mcount" || Func == "\01mcount" || Func == "__mcount" || diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp index 0e203f4e075..ad36790b8c6 100644 --- a/lib/Transforms/Utils/Evaluator.cpp +++ b/lib/Transforms/Utils/Evaluator.cpp @@ -469,7 +469,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, return false; // Cannot handle array allocs. } Type *Ty = AI->getAllocatedType(); - AllocaTmps.push_back(llvm::make_unique( + AllocaTmps.push_back(std::make_unique( Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), AI->getName(), /*TLMode=*/GlobalValue::NotThreadLocal, AI->getType()->getPointerAddressSpace())); diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp index 0c52e6f3703..893f23eb604 100644 --- a/lib/Transforms/Utils/FlattenCFG.cpp +++ b/lib/Transforms/Utils/FlattenCFG.cpp @@ -67,7 +67,7 @@ public: /// Before: /// ...... /// %cmp10 = fcmp une float %tmp1, %tmp2 -/// br i1 %cmp1, label %if.then, label %lor.rhs +/// br i1 %cmp10, label %if.then, label %lor.rhs /// /// lor.rhs: /// ...... @@ -251,8 +251,8 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) { bool EverChanged = false; for (; CurrBlock != FirstCondBlock; CurrBlock = CurrBlock->getSinglePredecessor()) { - BranchInst *BI = dyn_cast(CurrBlock->getTerminator()); - CmpInst *CI = dyn_cast(BI->getCondition()); + auto *BI = cast(CurrBlock->getTerminator()); + auto *CI = dyn_cast(BI->getCondition()); if (!CI) continue; @@ -278,7 +278,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) { // Do the transformation. BasicBlock *CB; - BranchInst *PBI = dyn_cast(FirstCondBlock->getTerminator()); + BranchInst *PBI = cast(FirstCondBlock->getTerminator()); bool Iteration = true; IRBuilder<>::InsertPointGuard Guard(Builder); Value *PC = PBI->getCondition(); @@ -444,7 +444,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { FirstEntryBlock->getInstList().pop_back(); FirstEntryBlock->getInstList() .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList()); - BranchInst *PBI = dyn_cast(FirstEntryBlock->getTerminator()); + BranchInst *PBI = cast(FirstEntryBlock->getTerminator()); Value *CC = PBI->getCondition(); BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); @@ -453,6 +453,16 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { PBI->replaceUsesOfWith(CC, NC); Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + // Handle PHI node to replace its predecessors to FirstEntryBlock. + for (BasicBlock *Succ : successors(PBI)) { + for (PHINode &Phi : Succ->phis()) { + for (unsigned i = 0, e = Phi.getNumIncomingValues(); i != e; ++i) { + if (Phi.getIncomingBlock(i) == SecondEntryBlock) + Phi.setIncomingBlock(i, FirstEntryBlock); + } + } + } + // Remove IfTrue1 if (IfTrue1 != FirstEntryBlock) { IfTrue1->dropAllReferences(); diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp index c9cc0990f23..76b4635ad50 100644 --- a/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -210,7 +210,7 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) { if (Function *F = dyn_cast(&GV)) { if (!F->isDeclaration()) { for (auto &S : VI.getSummaryList()) { - FunctionSummary *FS = dyn_cast(S->getBaseObject()); + auto *FS = cast(S->getBaseObject()); if (FS->modulePath() == M.getModuleIdentifier()) { F->setEntryCount(Function::ProfileCount(FS->entryCount(), Function::PCT_Synthetic)); diff --git a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp index 8041e66e6c4..ea93f99d69e 100644 --- a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp +++ b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp @@ -25,8 +25,8 @@ ImportedFunctionsInliningStatistics::createInlineGraphNode(const Function &F) { auto &ValueLookup = NodesMap[F.getName()]; if (!ValueLookup) { - ValueLookup = llvm::make_unique(); - ValueLookup->Imported = F.getMetadata("thinlto_src_module") != nullptr; + ValueLookup = std::make_unique(); + ValueLookup->Imported = F.hasMetadata("thinlto_src_module"); } return *ValueLookup; } @@ -64,7 +64,7 @@ void ImportedFunctionsInliningStatistics::setModuleInfo(const Module &M) { if (F.isDeclaration()) continue; AllFunctions++; - ImportedFunctions += int(F.getMetadata("thinlto_src_module") != nullptr); + ImportedFunctions += int(F.hasMetadata("thinlto_src_module")); } } static std::string getStatString(const char *Msg, int32_t Fraction, int32_t All, diff --git a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp index 8c67d1dc6eb..ed28fffc22b 100644 --- a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp +++ b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp @@ -533,7 +533,7 @@ static bool runImpl(Function &F, const TargetLibraryInfo &TLI, } bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) { - auto &TLI = getAnalysis().getTLI(); + auto &TLI = getAnalysis().getTLI(F); auto *DTWP = getAnalysisIfAvailable(); auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; return runImpl(F, TLI, DT); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 39b6b889f91..5bcd05757ec 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -324,8 +324,14 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, Value *Address = IBI->getAddress(); IBI->eraseFromParent(); if (DeleteDeadConditions) + // Delete pointer cast instructions. RecursivelyDeleteTriviallyDeadInstructions(Address, TLI); + // Also zap the blockaddress constant if there are no users remaining, + // otherwise the destination is still marked as having its address taken. + if (BA->use_empty()) + BA->destroyConstant(); + // If we didn't find our destination in the IBI successor list, then we // have undefined behavior. Replace the unconditional branch with an // 'unreachable' instruction. @@ -633,17 +639,6 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, // Control Flow Graph Restructuring. // -/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this -/// method is called when we're about to delete Pred as a predecessor of BB. If -/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred. -/// -/// Unlike the removePredecessor method, this attempts to simplify uses of PHI -/// nodes that collapse into identity values. For example, if we have: -/// x = phi(1, 0, 0, 0) -/// y = and x, z -/// -/// .. and delete the predecessor corresponding to the '1', this will attempt to -/// recursively fold the and to 0. void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU) { // This only adjusts blocks with PHI nodes. @@ -672,10 +667,6 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, DTU->applyUpdatesPermissive({{DominatorTree::Delete, Pred, BB}}); } -/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its -/// predecessor is known to have one successor (DestBB!). Eliminate the edge -/// between them, moving the instructions in the predecessor into DestBB and -/// deleting the predecessor block. void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DomTreeUpdater *DTU) { @@ -755,15 +746,14 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, } } -/// CanMergeValues - Return true if we can choose one of these values to use -/// in place of the other. Note that we will always choose the non-undef -/// value to keep. +/// Return true if we can choose one of these values to use in place of the +/// other. Note that we will always choose the non-undef value to keep. static bool CanMergeValues(Value *First, Value *Second) { return First == Second || isa(First) || isa(Second); } -/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an -/// almost-empty BB ending in an unconditional branch to Succ, into Succ. +/// Return true if we can fold BB, an almost-empty BB ending in an unconditional +/// branch to Succ, into Succ. /// /// Assumption: Succ is the single successor for BB. static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { @@ -956,11 +946,6 @@ static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB, replaceUndefValuesInPhi(PN, IncomingValues); } -/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an -/// unconditional branch, and contains no instructions other than PHI nodes, -/// potential side-effect free intrinsics and the branch. If possible, -/// eliminate BB by rewriting all the predecessors to branch to the successor -/// block and return true. If we can't transform, return false. bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU) { assert(BB != &BB->getParent()->getEntryBlock() && @@ -1088,10 +1073,6 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, return true; } -/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI -/// nodes in this block. This doesn't try to be clever about PHI nodes -/// which differ only in the order of the incoming values, but instcombine -/// orders them so it usually won't matter. bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { // This implementation doesn't currently consider undef operands // specially. Theoretically, two phis which are identical except for @@ -1151,10 +1132,10 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { /// often possible though. If alignment is important, a more reliable approach /// is to simply align all global variables and allocation instructions to /// their preferred alignment from the beginning. -static unsigned enforceKnownAlignment(Value *V, unsigned Align, +static unsigned enforceKnownAlignment(Value *V, unsigned Alignment, unsigned PrefAlign, const DataLayout &DL) { - assert(PrefAlign > Align); + assert(PrefAlign > Alignment); V = V->stripPointerCasts(); @@ -1165,36 +1146,36 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, // stripPointerCasts recurses through infinite layers of bitcasts, // while computeKnownBits is not allowed to traverse more than 6 // levels. - Align = std::max(AI->getAlignment(), Align); - if (PrefAlign <= Align) - return Align; + Alignment = std::max(AI->getAlignment(), Alignment); + if (PrefAlign <= Alignment) + return Alignment; // If the preferred alignment is greater than the natural stack alignment // then don't round up. This avoids dynamic stack realignment. - if (DL.exceedsNaturalStackAlignment(PrefAlign)) - return Align; - AI->setAlignment(PrefAlign); + if (DL.exceedsNaturalStackAlignment(Align(PrefAlign))) + return Alignment; + AI->setAlignment(MaybeAlign(PrefAlign)); return PrefAlign; } if (auto *GO = dyn_cast(V)) { // TODO: as above, this shouldn't be necessary. - Align = std::max(GO->getAlignment(), Align); - if (PrefAlign <= Align) - return Align; + Alignment = std::max(GO->getAlignment(), Alignment); + if (PrefAlign <= Alignment) + return Alignment; // If there is a large requested alignment and we can, bump up the alignment // of the global. If the memory we set aside for the global may not be the // memory used by the final program then it is impossible for us to reliably // enforce the preferred alignment. if (!GO->canIncreaseAlignment()) - return Align; + return Alignment; - GO->setAlignment(PrefAlign); + GO->setAlignment(MaybeAlign(PrefAlign)); return PrefAlign; } - return Align; + return Alignment; } unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, @@ -1397,7 +1378,12 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, /// Determine whether this alloca is either a VLA or an array. static bool isArray(AllocaInst *AI) { return AI->isArrayAllocation() || - AI->getType()->getElementType()->isArrayTy(); + (AI->getAllocatedType() && AI->getAllocatedType()->isArrayTy()); +} + +/// Determine whether this alloca is a structure. +static bool isStructure(AllocaInst *AI) { + return AI->getAllocatedType() && AI->getAllocatedType()->isStructTy(); } /// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set @@ -1422,7 +1408,7 @@ bool llvm::LowerDbgDeclare(Function &F) { // stored on the stack, while the dbg.declare can only describe // the stack slot (and at a lexical-scope granularity). Later // passes will attempt to elide the stack slot. - if (!AI || isArray(AI)) + if (!AI || isArray(AI) || isStructure(AI)) continue; // A volatile load/store means that the alloca can't be elided anyway. @@ -1591,15 +1577,10 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress, DIExpr->getElement(0) != dwarf::DW_OP_deref) return; - // Insert the offset immediately after the first deref. + // Insert the offset before the first deref. // We could just change the offset argument of dbg.value, but it's unsigned... - if (Offset) { - SmallVector Ops; - Ops.push_back(dwarf::DW_OP_deref); - DIExpression::appendOffset(Ops, Offset); - Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end()); - DIExpr = Builder.createExpression(Ops); - } + if (Offset) + DIExpr = DIExpression::prepend(DIExpr, 0, Offset); Builder.insertDbgValueIntrinsic(NewAddress, DIVar, DIExpr, Loc, DVI); DVI->eraseFromParent(); @@ -1957,18 +1938,24 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, return NumInstrsRemoved; } -/// changeToCall - Convert the specified invoke into a normal call. -static void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr) { - SmallVector Args(II->arg_begin(), II->arg_end()); +CallInst *llvm::createCallMatchingInvoke(InvokeInst *II) { + SmallVector Args(II->arg_begin(), II->arg_end()); SmallVector OpBundles; II->getOperandBundlesAsDefs(OpBundles); - CallInst *NewCall = CallInst::Create( - II->getFunctionType(), II->getCalledValue(), Args, OpBundles, "", II); - NewCall->takeName(II); + CallInst *NewCall = CallInst::Create(II->getFunctionType(), + II->getCalledValue(), Args, OpBundles); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); NewCall->setDebugLoc(II->getDebugLoc()); NewCall->copyMetadata(*II); + return NewCall; +} + +/// changeToCall - Convert the specified invoke into a normal call. +void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) { + CallInst *NewCall = createCallMatchingInvoke(II); + NewCall->takeName(II); + NewCall->insertBefore(II); II->replaceAllUsesWith(NewCall); // Follow the call by a branch to the normal destination. @@ -2223,12 +2210,10 @@ void llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) { /// removeUnreachableBlocks - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false -/// otherwise. If `LVI` is passed, this function preserves LazyValueInfo -/// after modifying the CFG. -bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, - DomTreeUpdater *DTU, +/// otherwise. +bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU) { - SmallPtrSet Reachable; + SmallPtrSet Reachable; bool Changed = markAliveBlocks(F, Reachable, DTU); // If there are unreachable blocks in the CFG... @@ -2236,21 +2221,21 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, return Changed; assert(Reachable.size() < F.size()); - NumRemoved += F.size()-Reachable.size(); + NumRemoved += F.size() - Reachable.size(); SmallSetVector DeadBlockSet; - for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) { - auto *BB = &*I; - if (Reachable.count(BB)) + for (BasicBlock &BB : F) { + // Skip reachable basic blocks + if (Reachable.find(&BB) != Reachable.end()) continue; - DeadBlockSet.insert(BB); + DeadBlockSet.insert(&BB); } if (MSSAU) MSSAU->removeBlocks(DeadBlockSet); // Loop over all of the basic blocks that are not reachable, dropping all of - // their internal references. Update DTU and LVI if available. + // their internal references. Update DTU if available. std::vector Updates; for (auto *BB : DeadBlockSet) { for (BasicBlock *Successor : successors(BB)) { @@ -2259,26 +2244,18 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, if (DTU) Updates.push_back({DominatorTree::Delete, BB, Successor}); } - if (LVI) - LVI->eraseBlock(BB); BB->dropAllReferences(); - } - for (Function::iterator I = ++F.begin(); I != F.end();) { - auto *BB = &*I; - if (Reachable.count(BB)) { - ++I; - continue; - } if (DTU) { - // Remove the terminator of BB to clear the successor list of BB. - if (BB->getTerminator()) - BB->getInstList().pop_back(); + Instruction *TI = BB->getTerminator(); + assert(TI && "Basic block should have a terminator"); + // Terminators like invoke can have users. We have to replace their users, + // before removing them. + if (!TI->use_empty()) + TI->replaceAllUsesWith(UndefValue::get(TI->getType())); + TI->eraseFromParent(); new UnreachableInst(BB->getContext(), BB); assert(succ_empty(BB) && "The successor list of BB isn't empty before " "applying corresponding DTU updates."); - ++I; - } else { - I = F.getBasicBlockList().erase(I); } } @@ -2294,7 +2271,11 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, } if (!Deleted) return false; + } else { + for (auto *BB : DeadBlockSet) + BB->eraseFromParent(); } + return true; } @@ -2363,6 +2344,9 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, K->setMetadata(Kind, MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); break; + case LLVMContext::MD_preserve_access_index: + // Preserve !preserve.access.index in K. + break; } } // Set !invariant.group from J if J has it. If both instructions have it @@ -2385,10 +2369,61 @@ void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J, LLVMContext::MD_invariant_group, LLVMContext::MD_align, LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null, - LLVMContext::MD_access_group}; + LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index}; combineMetadata(K, J, KnownIDs, KDominatesJ); } +void llvm::copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source) { + SmallVector, 8> MD; + Source.getAllMetadata(MD); + MDBuilder MDB(Dest.getContext()); + Type *NewType = Dest.getType(); + const DataLayout &DL = Source.getModule()->getDataLayout(); + for (const auto &MDPair : MD) { + unsigned ID = MDPair.first; + MDNode *N = MDPair.second; + // Note, essentially every kind of metadata should be preserved here! This + // routine is supposed to clone a load instruction changing *only its type*. + // The only metadata it makes sense to drop is metadata which is invalidated + // when the pointer type changes. This should essentially never be the case + // in LLVM, but we explicitly switch over only known metadata to be + // conservatively correct. If you are adding metadata to LLVM which pertains + // to loads, you almost certainly want to add it here. + switch (ID) { + case LLVMContext::MD_dbg: + case LLVMContext::MD_tbaa: + case LLVMContext::MD_prof: + case LLVMContext::MD_fpmath: + case LLVMContext::MD_tbaa_struct: + case LLVMContext::MD_invariant_load: + case LLVMContext::MD_alias_scope: + case LLVMContext::MD_noalias: + case LLVMContext::MD_nontemporal: + case LLVMContext::MD_mem_parallel_loop_access: + case LLVMContext::MD_access_group: + // All of these directly apply. + Dest.setMetadata(ID, N); + break; + + case LLVMContext::MD_nonnull: + copyNonnullMetadata(Source, N, Dest); + break; + + case LLVMContext::MD_align: + case LLVMContext::MD_dereferenceable: + case LLVMContext::MD_dereferenceable_or_null: + // These only directly apply if the new type is also a pointer. + if (NewType->isPointerTy()) + Dest.setMetadata(ID, N); + break; + + case LLVMContext::MD_range: + copyRangeMetadata(DL, Source, N, Dest); + break; + } + } +} + void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) { auto *ReplInst = dyn_cast(Repl); if (!ReplInst) @@ -2417,7 +2452,7 @@ void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) { LLVMContext::MD_noalias, LLVMContext::MD_range, LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, LLVMContext::MD_invariant_group, LLVMContext::MD_nonnull, - LLVMContext::MD_access_group}; + LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index}; combineMetadata(ReplInst, I, KnownIDs, false); } diff --git a/lib/Transforms/Utils/LoopRotationUtils.cpp b/lib/Transforms/Utils/LoopRotationUtils.cpp index 37389a695b4..889ea5ca997 100644 --- a/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -615,30 +615,9 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) { LLVM_DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into " << LastExit->getName() << "\n"); - // Hoist the instructions from Latch into LastExit. - Instruction *FirstLatchInst = &*(Latch->begin()); - LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(), - Latch->begin(), Jmp->getIterator()); - - // Update MemorySSA - if (MSSAU) - MSSAU->moveAllAfterMergeBlocks(Latch, LastExit, FirstLatchInst); - - unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1; - BasicBlock *Header = Jmp->getSuccessor(0); - assert(Header == L->getHeader() && "expected a backward branch"); - - // Remove Latch from the CFG so that LastExit becomes the new Latch. - BI->setSuccessor(FallThruPath, Header); - Latch->replaceSuccessorsPhiUsesWith(LastExit); - Jmp->eraseFromParent(); - - // Nuke the Latch block. - assert(Latch->empty() && "unable to evacuate Latch"); - LI->removeBlock(Latch); - if (DT) - DT->eraseNode(Latch); - Latch->eraseFromParent(); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + MergeBlockIntoPredecessor(Latch, &DTU, LI, MSSAU, nullptr, + /*PredecessorWithTwoSuccessors=*/true); if (MSSAU && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 7e6da02d570..d0f89dc54bf 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -808,7 +808,7 @@ bool LoopSimplify::runOnFunction(Function &F) { auto *MSSAAnalysis = getAnalysisIfAvailable(); if (MSSAAnalysis) { MSSA = &MSSAAnalysis->getMSSA(); - MSSAU = make_unique(MSSA); + MSSAU = std::make_unique(MSSA); } } @@ -835,12 +835,19 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F, DominatorTree *DT = &AM.getResult(F); ScalarEvolution *SE = AM.getCachedResult(F); AssumptionCache *AC = &AM.getResult(F); + auto *MSSAAnalysis = AM.getCachedResult(F); + std::unique_ptr MSSAU; + if (MSSAAnalysis) { + auto *MSSA = &MSSAAnalysis->getMSSA(); + MSSAU = std::make_unique(MSSA); + } + // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA - // after simplifying the loops. MemorySSA is not preserved either. + // after simplifying the loops. MemorySSA is preserved if it exists. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) Changed |= - simplifyLoop(*I, DT, LI, SE, AC, nullptr, /*PreserveLCSSA*/ false); + simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), /*PreserveLCSSA*/ false); if (!Changed) return PreservedAnalyses::all(); @@ -853,6 +860,8 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F, PA.preserve(); PA.preserve(); PA.preserve(); + if (MSSAAnalysis) + PA.preserve(); // BPI maps conditional terminators to probabilities, LoopSimplify can insert // blocks, but it does so only by splitting existing blocks and edges. This // results in the interesting property that all new terminators inserted are diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index e39ade52371..a7590fc3254 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -711,7 +711,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, auto setDest = [LoopExit, ContinueOnTrue](BasicBlock *Src, BasicBlock *Dest, ArrayRef NextBlocks, - BasicBlock *CurrentHeader, + BasicBlock *BlockInLoop, bool NeedConditional) { auto *Term = cast(Src->getTerminator()); if (NeedConditional) { @@ -723,7 +723,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, if (Dest != LoopExit) { BasicBlock *BB = Src; for (BasicBlock *Succ : successors(BB)) { - if (Succ == CurrentHeader) + // Preserve the incoming value from BB if we are jumping to the block + // in the current loop. + if (Succ == BlockInLoop) continue; for (PHINode &Phi : Succ->phis()) Phi.removeIncomingValue(BB, false); @@ -794,7 +796,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // unconditional branch for some iterations. NeedConditional = false; - setDest(Headers[i], Dest, Headers, Headers[i], NeedConditional); + setDest(Headers[i], Dest, Headers, HeaderSucc[i], NeedConditional); } // Set up latches to branch to the new header in the unrolled iterations or @@ -868,7 +870,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, assert(!DT || !UnrollVerifyDomtree || DT->verify(DominatorTree::VerificationLevel::Fast)); - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); // Merge adjacent basic blocks, if possible. for (BasicBlock *Latch : Latches) { BranchInst *Term = dyn_cast(Latch->getTerminator()); @@ -888,6 +890,8 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } } } + // Apply updates to the DomTree. + DT = &DTU.getDomTree(); // At this point, the code is well formed. We now simplify the unrolled loop, // doing constant propagation and dead code elimination as we go. diff --git a/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/lib/Transforms/Utils/LoopUnrollAndJam.cpp index ff49d83f25c..bf2e87b0d49 100644 --- a/lib/Transforms/Utils/LoopUnrollAndJam.cpp +++ b/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -517,6 +517,7 @@ LoopUnrollResult llvm::UnrollAndJamLoop( movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]); } + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the // new ones required. if (Count != 1) { @@ -530,7 +531,7 @@ LoopUnrollResult llvm::UnrollAndJamLoop( ForeBlocksLast.back(), SubLoopBlocksFirst[0]); DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert, SubLoopBlocksLast.back(), AftBlocksFirst[0]); - DT->applyUpdates(DTUpdates); + DTU.applyUpdatesPermissive(DTUpdates); } // Merge adjacent basic blocks, if possible. @@ -538,7 +539,6 @@ LoopUnrollResult llvm::UnrollAndJamLoop( MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end()); MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end()); MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end()); - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); while (!MergeBlocks.empty()) { BasicBlock *BB = *MergeBlocks.begin(); BranchInst *Term = dyn_cast(BB->getTerminator()); @@ -555,6 +555,8 @@ LoopUnrollResult llvm::UnrollAndJamLoop( } else MergeBlocks.erase(BB); } + // Apply updates to the DomTree. + DT = &DTU.getDomTree(); // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp index 005306cf189..58e42074f96 100644 --- a/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -62,9 +62,11 @@ static cl::opt UnrollForcePeelCount( cl::desc("Force a peel count regardless of profiling information.")); static cl::opt UnrollPeelMultiDeoptExit( - "unroll-peel-multi-deopt-exit", cl::init(false), cl::Hidden, + "unroll-peel-multi-deopt-exit", cl::init(true), cl::Hidden, cl::desc("Allow peeling of loops with multiple deopt exits.")); +static const char *PeeledCountMetaData = "llvm.loop.peeled.count"; + // Designates that a Phi is estimated to become invariant after an "infinite" // number of loop iterations (i.e. only may become an invariant if the loop is // fully unrolled). @@ -275,6 +277,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount << " iterations.\n"); UP.PeelCount = UnrollForcePeelCount; + UP.PeelProfiledIterations = true; return; } @@ -282,6 +285,13 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (!UP.AllowPeeling) return; + unsigned AlreadyPeeled = 0; + if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData)) + AlreadyPeeled = *Peeled; + // Stop if we already peeled off the maximum number of iterations. + if (AlreadyPeeled >= UnrollPeelMaxCount) + return; + // Here we try to get rid of Phis which become invariants after 1, 2, ..., N // iterations of the loop. For this we compute the number for iterations after // which every Phi is guaranteed to become an invariant, and try to peel the @@ -317,11 +327,14 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); // Consider max peel count limitation. assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); - LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount - << " iteration(s) to turn" - << " some Phis into invariants.\n"); - UP.PeelCount = DesiredPeelCount; - return; + if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) { + LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount + << " iteration(s) to turn" + << " some Phis into invariants.\n"); + UP.PeelCount = DesiredPeelCount; + UP.PeelProfiledIterations = false; + return; + } } } @@ -330,6 +343,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (TripCount) return; + // Do not apply profile base peeling if it is disabled. + if (!UP.PeelProfiledIterations) + return; // If we don't know the trip count, but have reason to believe the average // trip count is low, peeling should be beneficial, since we will usually // hit the peeled section. @@ -344,7 +360,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, << "\n"); if (*PeelCount) { - if ((*PeelCount <= UnrollPeelMaxCount) && + if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) && (LoopSize * (*PeelCount + 1) <= UP.Threshold)) { LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n"); @@ -352,6 +368,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, return; } LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n"); + LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n"); LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) << "\n"); @@ -364,88 +381,77 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, /// iteration. /// This sets the branch weights for the latch of the recently peeled off loop /// iteration correctly. -/// Our goal is to make sure that: -/// a) The total weight of all the copies of the loop body is preserved. -/// b) The total weight of the loop exit is preserved. -/// c) The body weight is reasonably distributed between the peeled iterations. +/// Let F is a weight of the edge from latch to header. +/// Let E is a weight of the edge from latch to exit. +/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to +/// go to exit. +/// Then, Estimated TripCount = F / E. +/// For I-th (counting from 0) peeled off iteration we set the the weights for +/// the peeled latch as (TC - I, 1). It gives us reasonable distribution, +/// The probability to go to exit 1/(TC-I) increases. At the same time +/// the estimated trip count of remaining loop reduces by I. +/// To avoid dealing with division rounding we can just multiple both part +/// of weights to E and use weight as (F - I * E, E). /// /// \param Header The copy of the header block that belongs to next iteration. /// \param LatchBR The copy of the latch branch that belongs to this iteration. -/// \param IterNumber The serial number of the iteration that was just -/// peeled off. -/// \param AvgIters The average number of iterations we expect the loop to have. -/// \param[in,out] PeeledHeaderWeight The total number of dynamic loop -/// iterations that are unaccounted for. As an input, it represents the number -/// of times we expect to enter the header of the iteration currently being -/// peeled off. The output is the number of times we expect to enter the -/// header of the next iteration. +/// \param[in,out] FallThroughWeight The weight of the edge from latch to +/// header before peeling (in) and after peeled off one iteration (out). static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - unsigned IterNumber, unsigned AvgIters, - uint64_t &PeeledHeaderWeight) { - if (!PeeledHeaderWeight) + uint64_t ExitWeight, + uint64_t &FallThroughWeight) { + // FallThroughWeight is 0 means that there is no branch weights on original + // latch block or estimated trip count is zero. + if (!FallThroughWeight) return; - // FIXME: Pick a more realistic distribution. - // Currently the proportion of weight we assign to the fall-through - // side of the branch drops linearly with the iteration number, and we use - // a 0.9 fudge factor to make the drop-off less sharp... - uint64_t FallThruWeight = - PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9); - uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight; - PeeledHeaderWeight -= ExitWeight; unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); MDBuilder MDB(LatchBR->getContext()); MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight) - : MDB.createBranchWeights(FallThruWeight, ExitWeight); + HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight) + : MDB.createBranchWeights(FallThroughWeight, ExitWeight); LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); + FallThroughWeight = + FallThroughWeight > ExitWeight ? FallThroughWeight - ExitWeight : 1; } /// Initialize the weights. /// /// \param Header The header block. /// \param LatchBR The latch branch. -/// \param AvgIters The average number of iterations we expect the loop to have. -/// \param[out] ExitWeight The # of times the edge from Latch to Exit is taken. -/// \param[out] CurHeaderWeight The # of times the header is executed. +/// \param[out] ExitWeight The weight of the edge from Latch to Exit. +/// \param[out] FallThroughWeight The weight of the edge from Latch to Header. static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - unsigned AvgIters, uint64_t &ExitWeight, - uint64_t &CurHeaderWeight) { + uint64_t &ExitWeight, + uint64_t &FallThroughWeight) { uint64_t TrueWeight, FalseWeight; if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) return; unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; - // The # of times the loop body executes is the sum of the exit block - // is taken and the # of times the backedges are taken. - CurHeaderWeight = TrueWeight + FalseWeight; + FallThroughWeight = HeaderIdx ? FalseWeight : TrueWeight; } /// Update the weights of original Latch block after peeling off all iterations. /// /// \param Header The header block. /// \param LatchBR The latch branch. -/// \param ExitWeight The weight of the edge from Latch to Exit block. -/// \param CurHeaderWeight The # of time the header is executed. +/// \param ExitWeight The weight of the edge from Latch to Exit. +/// \param FallThroughWeight The weight of the edge from Latch to Header. static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - uint64_t ExitWeight, uint64_t CurHeaderWeight) { - // Adjust the branch weights on the loop exit. - if (!ExitWeight) + uint64_t ExitWeight, + uint64_t FallThroughWeight) { + // FallThroughWeight is 0 means that there is no branch weights on original + // latch block or estimated trip count is zero. + if (!FallThroughWeight) return; - // The backedge count is the difference of current header weight and - // current loop exit weight. If the current header weight is smaller than - // the current loop exit weight, we mark the loop backedge weight as 1. - uint64_t BackEdgeWeight = 0; - if (ExitWeight < CurHeaderWeight) - BackEdgeWeight = CurHeaderWeight - ExitWeight; - else - BackEdgeWeight = 1; + // Sets the branch weights on the loop exit. MDBuilder MDB(LatchBR->getContext()); unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) - : MDB.createBranchWeights(BackEdgeWeight, ExitWeight); + HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight) + : MDB.createBranchWeights(FallThroughWeight, ExitWeight); LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); } @@ -586,11 +592,30 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, DenseMap ExitIDom; if (DT) { + // We'd like to determine the idom of exit block after peeling one + // iteration. + // Let Exit is exit block. + // Let ExitingSet - is a set of predecessors of Exit block. They are exiting + // blocks. + // Let Latch' and ExitingSet' are copies after a peeling. + // We'd like to find an idom'(Exit) - idom of Exit after peeling. + // It is an evident that idom'(Exit) will be the nearest common dominator + // of ExitingSet and ExitingSet'. + // idom(Exit) is a nearest common dominator of ExitingSet. + // idom(Exit)' is a nearest common dominator of ExitingSet'. + // Taking into account that we have a single Latch, Latch' will dominate + // Header and idom(Exit). + // So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'. + // All these basic blocks are in the same loop, so what we find is + // (nearest common dominator of idom(Exit) and Latch)'. + // In the loop below we remember nearest common dominator of idom(Exit) and + // Latch to update idom of Exit later. assert(L->hasDedicatedExits() && "No dedicated exits?"); for (auto Edge : ExitEdges) { if (ExitIDom.count(Edge.second)) continue; - BasicBlock *BB = DT->getNode(Edge.second)->getIDom()->getBlock(); + BasicBlock *BB = DT->findNearestCommonDominator( + DT->getNode(Edge.second)->getIDom()->getBlock(), Latch); assert(L->contains(BB) && "IDom is not in a loop"); ExitIDom[Edge.second] = BB; } @@ -659,23 +684,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // newly created branches. BranchInst *LatchBR = cast(cast(Latch)->getTerminator()); - uint64_t ExitWeight = 0, CurHeaderWeight = 0; - initBranchWeights(Header, LatchBR, PeelCount, ExitWeight, CurHeaderWeight); + uint64_t ExitWeight = 0, FallThroughWeight = 0; + initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight); // For each peeled-off iteration, make a copy of the loop. for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { SmallVector NewBlocks; ValueToValueMapTy VMap; - // Subtract the exit weight from the current header weight -- the exit - // weight is exactly the weight of the previous iteration's header. - // FIXME: due to the way the distribution is constructed, we need a - // guard here to make sure we don't end up with non-positive weights. - if (ExitWeight < CurHeaderWeight) - CurHeaderWeight -= ExitWeight; - else - CurHeaderWeight = 1; - cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks, LoopBlocks, VMap, LVMap, DT, LI); @@ -697,8 +713,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, } auto *LatchBRCopy = cast(VMap[LatchBR]); - updateBranchWeights(InsertBot, LatchBRCopy, Iter, - PeelCount, ExitWeight); + updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight); // Remove Loop metadata from the latch branch instruction // because it is not the Loop's latch branch anymore. LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr); @@ -724,7 +739,13 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, PHI->setIncomingValueForBlock(NewPreHeader, NewVal); } - fixupBranchWeights(Header, LatchBR, ExitWeight, CurHeaderWeight); + fixupBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight); + + // Update Metadata for count of peeled off iterations. + unsigned AlreadyPeeled = 0; + if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData)) + AlreadyPeeled = *Peeled; + addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount); if (Loop *ParentLoop = L->getParentLoop()) L = ParentLoop; diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index ec226e65f65..b4d7f35d2d9 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -45,6 +46,7 @@ using namespace llvm::PatternMatch; #define DEBUG_TYPE "loop-utils" static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced"; +static const char *LLVMLoopDisableLICM = "llvm.licm.disable"; bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, @@ -169,6 +171,8 @@ void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); + // FIXME: When all loop passes preserve MemorySSA, it can be required and + // preserved here instead of the individual handling in each pass. } /// Manually defined generic "LoopPass" dependency initialization. This is used @@ -189,6 +193,54 @@ void llvm::initializeLoopPassPass(PassRegistry &Registry) { INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) + INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +} + +/// Create MDNode for input string. +static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) { + LLVMContext &Context = TheLoop->getHeader()->getContext(); + Metadata *MDs[] = { + MDString::get(Context, Name), + ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))}; + return MDNode::get(Context, MDs); +} + +/// Set input string into loop metadata by keeping other values intact. +/// If the string is already in loop metadata update value if it is +/// different. +void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD, + unsigned V) { + SmallVector MDs(1); + // If the loop already has metadata, retain it. + MDNode *LoopID = TheLoop->getLoopID(); + if (LoopID) { + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + MDNode *Node = cast(LoopID->getOperand(i)); + // If it is of form key = value, try to parse it. + if (Node->getNumOperands() == 2) { + MDString *S = dyn_cast(Node->getOperand(0)); + if (S && S->getString().equals(StringMD)) { + ConstantInt *IntMD = + mdconst::extract_or_null(Node->getOperand(1)); + if (IntMD && IntMD->getSExtValue() == V) + // It is already in place. Do nothing. + return; + // We need to update the value, so just skip it here and it will + // be added after copying other existed nodes. + continue; + } + } + MDs.push_back(Node); + } + } + // Add new metadata. + MDs.push_back(createStringMetadata(TheLoop, StringMD, V)); + // Replace current metadata node with new one. + LLVMContext &Context = TheLoop->getHeader()->getContext(); + MDNode *NewLoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + TheLoop->setLoopID(NewLoopID); } /// Find string metadata for loop @@ -332,6 +384,10 @@ bool llvm::hasDisableAllTransformsHint(const Loop *L) { return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced); } +bool llvm::hasDisableLICMTransformsHint(const Loop *L) { + return getBooleanLoopAttribute(L, LLVMLoopDisableLICM); +} + TransformationMode llvm::hasUnrollTransformation(Loop *L) { if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable")) return TM_SuppressedByUser; diff --git a/lib/Transforms/Utils/LoopVersioning.cpp b/lib/Transforms/Utils/LoopVersioning.cpp index a9a480a4b7f..5d7759056c7 100644 --- a/lib/Transforms/Utils/LoopVersioning.cpp +++ b/lib/Transforms/Utils/LoopVersioning.cpp @@ -92,8 +92,8 @@ void LoopVersioning::versionLoop( // Create empty preheader for the loop (and after cloning for the // non-versioned loop). BasicBlock *PH = - SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI); - PH->setName(VersionedLoop->getHeader()->getName() + ".ph"); + SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI, + nullptr, VersionedLoop->getHeader()->getName() + ".ph"); // Clone the loop including the preheader. // diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp index c0b7edc547f..60bb2775a19 100644 --- a/lib/Transforms/Utils/MetaRenamer.cpp +++ b/lib/Transforms/Utils/MetaRenamer.cpp @@ -121,15 +121,14 @@ namespace { } // Rename all functions - const TargetLibraryInfo &TLI = - getAnalysis().getTLI(); for (auto &F : M) { StringRef Name = F.getName(); LibFunc Tmp; // Leave library functions alone because their presence or absence could // affect the behavior of other passes. if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) || - TLI.getLibFunc(F, Tmp)) + getAnalysis().getTLI(F).getLibFunc( + F, Tmp)) continue; // Leave @main alone. The output of -metarenamer might be passed to diff --git a/lib/Transforms/Utils/MisExpect.cpp b/lib/Transforms/Utils/MisExpect.cpp new file mode 100644 index 00000000000..26d3402bd27 --- /dev/null +++ b/lib/Transforms/Utils/MisExpect.cpp @@ -0,0 +1,177 @@ +//===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit warnings for potentially incorrect usage of the +// llvm.expect intrinsic. This utility extracts the threshold values from +// metadata associated with the instrumented Branch or Switch instruction. The +// threshold values are then used to determine if a warning should be emmited. +// +// MisExpect metadata is generated when llvm.expect intrinsics are lowered see +// LowerExpectIntrinsic.cpp +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/MisExpect.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormatVariadic.h" +#include +#include +#include + +#define DEBUG_TYPE "misexpect" + +using namespace llvm; +using namespace misexpect; + +namespace llvm { + +// Command line option to enable/disable the warning when profile data suggests +// a mismatch with the use of the llvm.expect intrinsic +static cl::opt PGOWarnMisExpect( + "pgo-warn-misexpect", cl::init(false), cl::Hidden, + cl::desc("Use this option to turn on/off " + "warnings about incorrect usage of llvm.expect intrinsics.")); + +} // namespace llvm + +namespace { + +Instruction *getOprndOrInst(Instruction *I) { + assert(I != nullptr && "MisExpect target Instruction cannot be nullptr"); + Instruction *Ret = nullptr; + if (auto *B = dyn_cast(I)) { + Ret = dyn_cast(B->getCondition()); + } + // TODO: Find a way to resolve condition location for switches + // Using the condition of the switch seems to often resolve to an earlier + // point in the program, i.e. the calculation of the switch condition, rather + // than the switches location in the source code. Thus, we should use the + // instruction to get source code locations rather than the condition to + // improve diagnostic output, such as the caret. If the same problem exists + // for branch instructions, then we should remove this function and directly + // use the instruction + // + // else if (auto S = dyn_cast(I)) { + // Ret = I; + //} + return Ret ? Ret : I; +} + +void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx, + uint64_t ProfCount, uint64_t TotalCount) { + double PercentageCorrect = (double)ProfCount / TotalCount; + auto PerString = + formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount); + auto RemStr = formatv( + "Potential performance regression from use of the llvm.expect intrinsic: " + "Annotation was correct on {0} of profiled executions.", + PerString); + Twine Msg(PerString); + Instruction *Cond = getOprndOrInst(I); + if (PGOWarnMisExpect) + Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg)); + OptimizationRemarkEmitter ORE(I->getParent()->getParent()); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str()); +} + +} // namespace + +namespace llvm { +namespace misexpect { + +void verifyMisExpect(Instruction *I, const SmallVector &Weights, + LLVMContext &Ctx) { + if (auto *MisExpectData = I->getMetadata(LLVMContext::MD_misexpect)) { + auto *MisExpectDataName = dyn_cast(MisExpectData->getOperand(0)); + if (MisExpectDataName && + MisExpectDataName->getString().equals("misexpect")) { + LLVM_DEBUG(llvm::dbgs() << "------------------\n"); + LLVM_DEBUG(llvm::dbgs() + << "Function: " << I->getFunction()->getName() << "\n"); + LLVM_DEBUG(llvm::dbgs() << "Instruction: " << *I << ":\n"); + LLVM_DEBUG(for (int Idx = 0, Size = Weights.size(); Idx < Size; ++Idx) { + llvm::dbgs() << "Weights[" << Idx << "] = " << Weights[Idx] << "\n"; + }); + + // extract values from misexpect metadata + const auto *IndexCint = + mdconst::dyn_extract(MisExpectData->getOperand(1)); + const auto *LikelyCInt = + mdconst::dyn_extract(MisExpectData->getOperand(2)); + const auto *UnlikelyCInt = + mdconst::dyn_extract(MisExpectData->getOperand(3)); + + if (!IndexCint || !LikelyCInt || !UnlikelyCInt) + return; + + const uint64_t Index = IndexCint->getZExtValue(); + const uint64_t LikelyBranchWeight = LikelyCInt->getZExtValue(); + const uint64_t UnlikelyBranchWeight = UnlikelyCInt->getZExtValue(); + const uint64_t ProfileCount = Weights[Index]; + const uint64_t CaseTotal = std::accumulate( + Weights.begin(), Weights.end(), (uint64_t)0, std::plus()); + const uint64_t NumUnlikelyTargets = Weights.size() - 1; + + const uint64_t TotalBranchWeight = + LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets); + + const llvm::BranchProbability LikelyThreshold(LikelyBranchWeight, + TotalBranchWeight); + uint64_t ScaledThreshold = LikelyThreshold.scale(CaseTotal); + + LLVM_DEBUG(llvm::dbgs() + << "Unlikely Targets: " << NumUnlikelyTargets << ":\n"); + LLVM_DEBUG(llvm::dbgs() << "Profile Count: " << ProfileCount << ":\n"); + LLVM_DEBUG(llvm::dbgs() + << "Scaled Threshold: " << ScaledThreshold << ":\n"); + LLVM_DEBUG(llvm::dbgs() << "------------------\n"); + if (ProfileCount < ScaledThreshold) + emitMisexpectDiagnostic(I, Ctx, ProfileCount, CaseTotal); + } + } +} + +void checkFrontendInstrumentation(Instruction &I) { + if (auto *MD = I.getMetadata(LLVMContext::MD_prof)) { + unsigned NOps = MD->getNumOperands(); + + // Only emit misexpect diagnostics if at least 2 branch weights are present. + // Less than 2 branch weights means that the profiling metadata is: + // 1) incorrect/corrupted + // 2) not branch weight metadata + // 3) completely deterministic + // In these cases we should not emit any diagnostic related to misexpect. + if (NOps < 3) + return; + + // Operand 0 is a string tag "branch_weights" + if (MDString *Tag = cast(MD->getOperand(0))) { + if (Tag->getString().equals("branch_weights")) { + SmallVector RealWeights(NOps - 1); + for (unsigned i = 1; i < NOps; i++) { + ConstantInt *Value = + mdconst::dyn_extract(MD->getOperand(i)); + RealWeights[i - 1] = Value->getZExtValue(); + } + verifyMisExpect(&I, RealWeights, I.getContext()); + } + } + } +} + +} // namespace misexpect +} // namespace llvm +#undef DEBUG_TYPE diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index c84beceee19..1ef3757017a 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -73,7 +73,7 @@ static void appendToUsedList(Module &M, StringRef Name, ArrayRef SmallPtrSet InitAsSet; SmallVector Init; if (GV) { - ConstantArray *CA = dyn_cast(GV->getInitializer()); + auto *CA = cast(GV->getInitializer()); for (auto &Op : CA->operands()) { Constant *C = cast_or_null(Op); if (InitAsSet.insert(C).second) diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp index bdf24d80bd1..44859eafb9c 100644 --- a/lib/Transforms/Utils/PredicateInfo.cpp +++ b/lib/Transforms/Utils/PredicateInfo.cpp @@ -125,8 +125,10 @@ static bool valueComesBefore(OrderedInstructions &OI, const Value *A, // necessary to compare uses/defs in the same block. Doing so allows us to walk // the minimum number of instructions necessary to compute our def/use ordering. struct ValueDFS_Compare { + DominatorTree &DT; OrderedInstructions &OI; - ValueDFS_Compare(OrderedInstructions &OI) : OI(OI) {} + ValueDFS_Compare(DominatorTree &DT, OrderedInstructions &OI) + : DT(DT), OI(OI) {} bool operator()(const ValueDFS &A, const ValueDFS &B) const { if (&A == &B) @@ -136,7 +138,9 @@ struct ValueDFS_Compare { // comesbefore to see what the real ordering is, because they are in the // same basic block. - bool SameBlock = std::tie(A.DFSIn, A.DFSOut) == std::tie(B.DFSIn, B.DFSOut); + assert((A.DFSIn != B.DFSIn || A.DFSOut == B.DFSOut) && + "Equal DFS-in numbers imply equal out numbers"); + bool SameBlock = A.DFSIn == B.DFSIn; // We want to put the def that will get used for a given set of phi uses, // before those phi uses. @@ -145,9 +149,11 @@ struct ValueDFS_Compare { if (SameBlock && A.LocalNum == LN_Last && B.LocalNum == LN_Last) return comparePHIRelated(A, B); + bool isADef = A.Def; + bool isBDef = B.Def; if (!SameBlock || A.LocalNum != LN_Middle || B.LocalNum != LN_Middle) - return std::tie(A.DFSIn, A.DFSOut, A.LocalNum, A.Def, A.U) < - std::tie(B.DFSIn, B.DFSOut, B.LocalNum, B.Def, B.U); + return std::tie(A.DFSIn, A.LocalNum, isADef) < + std::tie(B.DFSIn, B.LocalNum, isBDef); return localComesBefore(A, B); } @@ -164,10 +170,35 @@ struct ValueDFS_Compare { // For two phi related values, return the ordering. bool comparePHIRelated(const ValueDFS &A, const ValueDFS &B) const { - auto &ABlockEdge = getBlockEdge(A); - auto &BBlockEdge = getBlockEdge(B); - // Now sort by block edge and then defs before uses. - return std::tie(ABlockEdge, A.Def, A.U) < std::tie(BBlockEdge, B.Def, B.U); + BasicBlock *ASrc, *ADest, *BSrc, *BDest; + std::tie(ASrc, ADest) = getBlockEdge(A); + std::tie(BSrc, BDest) = getBlockEdge(B); + +#ifndef NDEBUG + // This function should only be used for values in the same BB, check that. + DomTreeNode *DomASrc = DT.getNode(ASrc); + DomTreeNode *DomBSrc = DT.getNode(BSrc); + assert(DomASrc->getDFSNumIn() == (unsigned)A.DFSIn && + "DFS numbers for A should match the ones of the source block"); + assert(DomBSrc->getDFSNumIn() == (unsigned)B.DFSIn && + "DFS numbers for B should match the ones of the source block"); + assert(A.DFSIn == B.DFSIn && "Values must be in the same block"); +#endif + (void)ASrc; + (void)BSrc; + + // Use DFS numbers to compare destination blocks, to guarantee a + // deterministic order. + DomTreeNode *DomADest = DT.getNode(ADest); + DomTreeNode *DomBDest = DT.getNode(BDest); + unsigned AIn = DomADest->getDFSNumIn(); + unsigned BIn = DomBDest->getDFSNumIn(); + bool isADef = A.Def; + bool isBDef = B.Def; + assert((!A.Def || !A.U) && (!B.Def || !B.U) && + "Def and U cannot be set at the same time"); + // Now sort by edge destination and then defs before uses. + return std::tie(AIn, isADef) < std::tie(BIn, isBDef); } // Get the definition of an instruction that occurs in the middle of a block. @@ -306,10 +337,11 @@ void collectCmpOps(CmpInst *Comparison, SmallVectorImpl &CmpOperands) { } // Add Op, PB to the list of value infos for Op, and mark Op to be renamed. -void PredicateInfo::addInfoFor(SmallPtrSetImpl &OpsToRename, Value *Op, +void PredicateInfo::addInfoFor(SmallVectorImpl &OpsToRename, Value *Op, PredicateBase *PB) { - OpsToRename.insert(Op); auto &OperandInfo = getOrCreateValueInfo(Op); + if (OperandInfo.Infos.empty()) + OpsToRename.push_back(Op); AllInfos.push_back(PB); OperandInfo.Infos.push_back(PB); } @@ -317,7 +349,7 @@ void PredicateInfo::addInfoFor(SmallPtrSetImpl &OpsToRename, Value *Op, // Process an assume instruction and place relevant operations we want to rename // into OpsToRename. void PredicateInfo::processAssume(IntrinsicInst *II, BasicBlock *AssumeBB, - SmallPtrSetImpl &OpsToRename) { + SmallVectorImpl &OpsToRename) { // See if we have a comparison we support SmallVector CmpOperands; SmallVector ConditionsToProcess; @@ -357,7 +389,7 @@ void PredicateInfo::processAssume(IntrinsicInst *II, BasicBlock *AssumeBB, // Process a block terminating branch, and place relevant operations to be // renamed into OpsToRename. void PredicateInfo::processBranch(BranchInst *BI, BasicBlock *BranchBB, - SmallPtrSetImpl &OpsToRename) { + SmallVectorImpl &OpsToRename) { BasicBlock *FirstBB = BI->getSuccessor(0); BasicBlock *SecondBB = BI->getSuccessor(1); SmallVector SuccsToProcess; @@ -427,7 +459,7 @@ void PredicateInfo::processBranch(BranchInst *BI, BasicBlock *BranchBB, // Process a block terminating switch, and place relevant operations to be // renamed into OpsToRename. void PredicateInfo::processSwitch(SwitchInst *SI, BasicBlock *BranchBB, - SmallPtrSetImpl &OpsToRename) { + SmallVectorImpl &OpsToRename) { Value *Op = SI->getCondition(); if ((!isa(Op) && !isa(Op)) || Op->hasOneUse()) return; @@ -457,7 +489,7 @@ void PredicateInfo::buildPredicateInfo() { DT.updateDFSNumbers(); // Collect operands to rename from all conditional branch terminators, as well // as assume statements. - SmallPtrSet OpsToRename; + SmallVector OpsToRename; for (auto DTN : depth_first(DT.getRootNode())) { BasicBlock *BranchBB = DTN->getBlock(); if (auto *BI = dyn_cast(BranchBB->getTerminator())) { @@ -524,7 +556,7 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter, if (isa(ValInfo)) { IRBuilder<> B(getBranchTerminator(ValInfo)); Function *IF = getCopyDeclaration(F.getParent(), Op->getType()); - if (empty(IF->users())) + if (IF->users().empty()) CreatedDeclarations.insert(IF); CallInst *PIC = B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++)); @@ -536,7 +568,7 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter, "Should not have gotten here without it being an assume"); IRBuilder<> B(PAssume->AssumeInst); Function *IF = getCopyDeclaration(F.getParent(), Op->getType()); - if (empty(IF->users())) + if (IF->users().empty()) CreatedDeclarations.insert(IF); CallInst *PIC = B.CreateCall(IF, Op); PredicateMap.insert({PIC, ValInfo}); @@ -565,14 +597,8 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter, // // TODO: Use this algorithm to perform fast single-variable renaming in // promotememtoreg and memoryssa. -void PredicateInfo::renameUses(SmallPtrSetImpl &OpSet) { - // Sort OpsToRename since we are going to iterate it. - SmallVector OpsToRename(OpSet.begin(), OpSet.end()); - auto Comparator = [&](const Value *A, const Value *B) { - return valueComesBefore(OI, A, B); - }; - llvm::sort(OpsToRename, Comparator); - ValueDFS_Compare Compare(OI); +void PredicateInfo::renameUses(SmallVectorImpl &OpsToRename) { + ValueDFS_Compare Compare(DT, OI); // Compute liveness, and rename in O(uses) per Op. for (auto *Op : OpsToRename) { LLVM_DEBUG(dbgs() << "Visiting " << *Op << "\n"); @@ -772,7 +798,7 @@ static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) { bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) { auto &DT = getAnalysis().getDomTree(); auto &AC = getAnalysis().getAssumptionCache(F); - auto PredInfo = make_unique(F, DT, AC); + auto PredInfo = std::make_unique(F, DT, AC); PredInfo->print(dbgs()); if (VerifyPredicateInfo) PredInfo->verifyPredicateInfo(); @@ -786,7 +812,7 @@ PreservedAnalyses PredicateInfoPrinterPass::run(Function &F, auto &DT = AM.getResult(F); auto &AC = AM.getResult(F); OS << "PredicateInfo for function: " << F.getName() << "\n"; - auto PredInfo = make_unique(F, DT, AC); + auto PredInfo = std::make_unique(F, DT, AC); PredInfo->print(OS); replaceCreatedSSACopys(*PredInfo, F); @@ -845,7 +871,7 @@ PreservedAnalyses PredicateInfoVerifierPass::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult(F); auto &AC = AM.getResult(F); - make_unique(F, DT, AC)->verifyPredicateInfo(); + std::make_unique(F, DT, AC)->verifyPredicateInfo(); return PreservedAnalyses::all(); } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 11651d040dc..3a5e3293ed4 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -94,6 +94,12 @@ static cl::opt PHINodeFoldingThreshold( cl::desc( "Control the amount of phi node folding to perform (default = 2)")); +static cl::opt TwoEntryPHINodeFoldingThreshold( + "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), + cl::desc("Control the maximal total instruction cost that we are willing " + "to speculatively execute to fold a 2-entry PHI node into a " + "select (default = 4)")); + static cl::opt DupRet( "simplifycfg-dup-ret", cl::Hidden, cl::init(false), cl::desc("Duplicate return instructions into unconditional branches")); @@ -332,7 +338,7 @@ static unsigned ComputeSpeculationCost(const User *I, /// CostRemaining, false is returned and CostRemaining is undefined. static bool DominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl &AggressiveInsts, - unsigned &CostRemaining, + int &BudgetRemaining, const TargetTransformInfo &TTI, unsigned Depth = 0) { // It is possible to hit a zero-cost cycle (phi/gep instructions for example), @@ -375,7 +381,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, if (!isSafeToSpeculativelyExecute(I)) return false; - unsigned Cost = ComputeSpeculationCost(I, TTI); + BudgetRemaining -= ComputeSpeculationCost(I, TTI); // Allow exactly one instruction to be speculated regardless of its cost // (as long as it is safe to do so). @@ -383,17 +389,14 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // or other expensive operation. The speculation of an expensive instruction // is expected to be undone in CodeGenPrepare if the speculation has not // enabled further IR optimizations. - if (Cost > CostRemaining && + if (BudgetRemaining < 0 && (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0)) return false; - // Avoid unsigned wrap. - CostRemaining = (Cost > CostRemaining) ? 0 : CostRemaining - Cost; - // Okay, we can only really hoist these out if their operands do // not take us over the cost threshold. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI, + if (!DominatesMergePoint(*i, BB, AggressiveInsts, BudgetRemaining, TTI, Depth + 1)) return false; // Okay, it's safe to do this! Remember this instruction. @@ -629,8 +632,7 @@ private: /// vector. /// One "Extra" case is allowed to differ from the other. void gather(Value *V) { - Instruction *I = dyn_cast(V); - bool isEQ = (I->getOpcode() == Instruction::Or); + bool isEQ = (cast(V)->getOpcode() == Instruction::Or); // Keep a stack (SmallVector for efficiency) for depth-first traversal SmallVector DFT; @@ -1313,7 +1315,8 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null, LLVMContext::MD_mem_parallel_loop_access, - LLVMContext::MD_access_group}; + LLVMContext::MD_access_group, + LLVMContext::MD_preserve_access_index}; combineMetadata(I1, I2, KnownIDs, true); // I1 and I2 are being combined into a single instruction. Its debug @@ -1420,6 +1423,20 @@ HoistTerminator: return true; } +// Check lifetime markers. +static bool isLifeTimeMarker(const Instruction *I) { + if (auto II = dyn_cast(I)) { + switch (II->getIntrinsicID()) { + default: + break; + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return true; + } + } + return false; +} + // All instructions in Insts belong to different blocks that all unconditionally // branch to a common successor. Analyze each instruction and return true if it // would be possible to sink them into their successor, creating one common @@ -1474,20 +1491,25 @@ static bool canSinkInstructions( return false; } - // Because SROA can't handle speculating stores of selects, try not - // to sink loads or stores of allocas when we'd have to create a PHI for - // the address operand. Also, because it is likely that loads or stores - // of allocas will disappear when Mem2Reg/SROA is run, don't sink them. + // Because SROA can't handle speculating stores of selects, try not to sink + // loads, stores or lifetime markers of allocas when we'd have to create a + // PHI for the address operand. Also, because it is likely that loads or + // stores of allocas will disappear when Mem2Reg/SROA is run, don't sink + // them. // This can cause code churn which can have unintended consequences down // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244. // FIXME: This is a workaround for a deficiency in SROA - see // https://llvm.org/bugs/show_bug.cgi?id=30188 if (isa(I0) && any_of(Insts, [](const Instruction *I) { - return isa(I->getOperand(1)); + return isa(I->getOperand(1)->stripPointerCasts()); })) return false; if (isa(I0) && any_of(Insts, [](const Instruction *I) { - return isa(I->getOperand(0)); + return isa(I->getOperand(0)->stripPointerCasts()); + })) + return false; + if (isLifeTimeMarker(I0) && any_of(Insts, [](const Instruction *I) { + return isa(I->getOperand(1)->stripPointerCasts()); })) return false; @@ -1959,7 +1981,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, SmallVector SpeculatedDbgIntrinsics; - unsigned SpeculationCost = 0; + unsigned SpeculatedInstructions = 0; Value *SpeculatedStoreValue = nullptr; StoreInst *SpeculatedStore = nullptr; for (BasicBlock::iterator BBI = ThenBB->begin(), @@ -1974,8 +1996,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // Only speculatively execute a single instruction (not counting the // terminator) for now. - ++SpeculationCost; - if (SpeculationCost > 1) + ++SpeculatedInstructions; + if (SpeculatedInstructions > 1) return false; // Don't hoist the instruction if it's unsafe or expensive. @@ -2012,8 +2034,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, E = SinkCandidateUseCounts.end(); I != E; ++I) if (I->first->hasNUses(I->second)) { - ++SpeculationCost; - if (SpeculationCost > 1) + ++SpeculatedInstructions; + if (SpeculatedInstructions > 1) return false; } @@ -2053,8 +2075,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // getting expanded into Instructions. // FIXME: This doesn't account for how many operations are combined in the // constant expression. - ++SpeculationCost; - if (SpeculationCost > 1) + ++SpeculatedInstructions; + if (SpeculatedInstructions > 1) return false; } @@ -2302,10 +2324,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // instructions. While we are at it, keep track of the instructions // that need to be moved to the dominating block. SmallPtrSet AggressiveInsts; - unsigned MaxCostVal0 = PHINodeFoldingThreshold, - MaxCostVal1 = PHINodeFoldingThreshold; - MaxCostVal0 *= TargetTransformInfo::TCC_Basic; - MaxCostVal1 *= TargetTransformInfo::TCC_Basic; + int BudgetRemaining = + TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; for (BasicBlock::iterator II = BB->begin(); isa(II);) { PHINode *PN = cast(II++); @@ -2316,9 +2336,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } if (!DominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts, - MaxCostVal0, TTI) || + BudgetRemaining, TTI) || !DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts, - MaxCostVal1, TTI)) + BudgetRemaining, TTI)) return false; } @@ -2328,12 +2348,24 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, if (!PN) return true; - // Don't fold i1 branches on PHIs which contain binary operators. These can - // often be turned into switches and other things. + // Return true if at least one of these is a 'not', and another is either + // a 'not' too, or a constant. + auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) { + if (!match(V0, m_Not(m_Value()))) + std::swap(V0, V1); + auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant()); + return match(V0, m_Not(m_Value())) && match(V1, Invertible); + }; + + // Don't fold i1 branches on PHIs which contain binary operators, unless one + // of the incoming values is an 'not' and another one is freely invertible. + // These can often be turned into switches and other things. if (PN->getType()->isIntegerTy(1) && (isa(PN->getIncomingValue(0)) || isa(PN->getIncomingValue(1)) || - isa(IfCond))) + isa(IfCond)) && + !CanHoistNotFromBothValues(PN->getIncomingValue(0), + PN->getIncomingValue(1))) return false; // If all PHI nodes are promotable, check to make sure that all instructions @@ -2368,6 +2400,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, return false; } } + assert(DomBlock && "Failed to find root DomBlock"); LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " << IfTrue->getName() @@ -2913,42 +2946,8 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, - const DataLayout &DL) { - auto IsaBitcastOfPointerType = [](const Instruction &I) { - return Operator::getOpcode(&I) == Instruction::BitCast && - I.getType()->isPointerTy(); - }; - - // If we're not in aggressive mode, we only optimize if we have some - // confidence that by optimizing we'll allow P and/or Q to be if-converted. - auto IsWorthwhile = [&](BasicBlock *BB) { - if (!BB) - return true; - // Heuristic: if the block can be if-converted/phi-folded and the - // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to - // thread this store. - unsigned N = 0; - for (auto &I : BB->instructionsWithoutDebug()) { - // Cheap instructions viable for folding. - if (isa(I) || isa(I) || - isa(I)) - ++N; - // Free instructions. - else if (I.isTerminator() || IsaBitcastOfPointerType(I)) - continue; - else - return false; - } - // The store we want to merge is counted in N, so add 1 to make sure - // we're counting the instructions that would be left. - return N <= (PHINodeFoldingThreshold + 1); - }; - - if (!MergeCondStoresAggressively && - (!IsWorthwhile(PTB) || !IsWorthwhile(PFB) || !IsWorthwhile(QTB) || - !IsWorthwhile(QFB))) - return false; - + const DataLayout &DL, + const TargetTransformInfo &TTI) { // For every pointer, there must be exactly two stores, one coming from // PTB or PFB, and the other from QTB or QFB. We don't support more than one // store (to any address) in PTB,PFB or QTB,QFB. @@ -2989,6 +2988,46 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, if (&*I != PStore && I->mayReadOrWriteMemory()) return false; + // If we're not in aggressive mode, we only optimize if we have some + // confidence that by optimizing we'll allow P and/or Q to be if-converted. + auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef FreeStores) { + if (!BB) + return true; + // Heuristic: if the block can be if-converted/phi-folded and the + // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to + // thread this store. + int BudgetRemaining = + PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; + for (auto &I : BB->instructionsWithoutDebug()) { + // Consider terminator instruction to be free. + if (I.isTerminator()) + continue; + // If this is one the stores that we want to speculate out of this BB, + // then don't count it's cost, consider it to be free. + if (auto *S = dyn_cast(&I)) + if (llvm::find(FreeStores, S)) + continue; + // Else, we have a white-list of instructions that we are ak speculating. + if (!isa(I) && !isa(I)) + return false; // Not in white-list - not worthwhile folding. + // And finally, if this is a non-free instruction that we are okay + // speculating, ensure that we consider the speculation budget. + BudgetRemaining -= TTI.getUserCost(&I); + if (BudgetRemaining < 0) + return false; // Eagerly refuse to fold as soon as we're out of budget. + } + assert(BudgetRemaining >= 0 && + "When we run out of budget we will eagerly return from within the " + "per-instruction loop."); + return true; + }; + + const SmallVector FreeStores = {PStore, QStore}; + if (!MergeCondStoresAggressively && + (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) || + !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores))) + return false; + // If PostBB has more than two predecessors, we need to split it so we can // sink the store. if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) { @@ -3048,15 +3087,15 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, // store that doesn't execute. if (MinAlignment != 0) { // Choose the minimum of all non-zero alignments. - SI->setAlignment(MinAlignment); + SI->setAlignment(Align(MinAlignment)); } else if (MaxAlignment != 0) { // Choose the minimal alignment between the non-zero alignment and the ABI // default alignment for the type of the stored value. - SI->setAlignment(std::min(MaxAlignment, TypeAlignment)); + SI->setAlignment(Align(std::min(MaxAlignment, TypeAlignment))); } else { // If both alignments are zero, use ABI default alignment for the type of // the stored value. - SI->setAlignment(TypeAlignment); + SI->setAlignment(Align(TypeAlignment)); } QStore->eraseFromParent(); @@ -3066,7 +3105,8 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, } static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, - const DataLayout &DL) { + const DataLayout &DL, + const TargetTransformInfo &TTI) { // The intention here is to find diamonds or triangles (see below) where each // conditional block contains a store to the same address. Both of these // stores are conditional, so they can't be unconditionally sunk. But it may @@ -3168,7 +3208,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, bool Changed = false; for (auto *Address : CommonAddresses) Changed |= mergeConditionalStoreToAddress( - PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL); + PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL, TTI); return Changed; } @@ -3177,7 +3217,8 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, /// that PBI and BI are both conditional branches, and BI is in one of the /// successor blocks of PBI - PBI branches to BI. static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, - const DataLayout &DL) { + const DataLayout &DL, + const TargetTransformInfo &TTI) { assert(PBI->isConditional() && BI->isConditional()); BasicBlock *BB = BI->getParent(); @@ -3233,7 +3274,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // If both branches are conditional and both contain stores to the same // address, remove the stores from the conditionals and create a conditional // merged store at the end. - if (MergeCondStores && mergeConditionalStores(PBI, BI, DL)) + if (MergeCondStores && mergeConditionalStores(PBI, BI, DL, TTI)) return true; // If this is a conditional branch in an empty block, and if any @@ -3697,12 +3738,17 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, BasicBlock *BB = BI->getParent(); + // MSAN does not like undefs as branch condition which can be introduced + // with "explicit branch". + if (ExtraCase && BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory)) + return false; + LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() << " cases into SWITCH. BB is:\n" << *BB); // If there are any extra values that couldn't be folded into the switch - // then we evaluate them with an explicit branch first. Split the block + // then we evaluate them with an explicit branch first. Split the block // right before the condbr to handle it. if (ExtraCase) { BasicBlock *NewBB = @@ -3851,7 +3897,7 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) { // Simplify resume that is only used by a single (non-phi) landing pad. bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) { BasicBlock *BB = RI->getParent(); - LandingPadInst *LPInst = dyn_cast(BB->getFirstNonPHI()); + auto *LPInst = cast(BB->getFirstNonPHI()); assert(RI->getValue() == LPInst && "Resume must unwind the exception that caused control to here"); @@ -4178,23 +4224,22 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { IRBuilder<> Builder(TI); if (auto *BI = dyn_cast(TI)) { if (BI->isUnconditional()) { - if (BI->getSuccessor(0) == BB) { - new UnreachableInst(TI->getContext(), TI); - TI->eraseFromParent(); - Changed = true; - } + assert(BI->getSuccessor(0) == BB && "Incorrect CFG"); + new UnreachableInst(TI->getContext(), TI); + TI->eraseFromParent(); + Changed = true; } else { Value* Cond = BI->getCondition(); if (BI->getSuccessor(0) == BB) { Builder.CreateAssumption(Builder.CreateNot(Cond)); Builder.CreateBr(BI->getSuccessor(1)); - EraseTerminatorAndDCECond(BI); - } else if (BI->getSuccessor(1) == BB) { + } else { + assert(BI->getSuccessor(1) == BB && "Incorrect CFG"); Builder.CreateAssumption(Cond); Builder.CreateBr(BI->getSuccessor(0)); - EraseTerminatorAndDCECond(BI); - Changed = true; } + EraseTerminatorAndDCECond(BI); + Changed = true; } } else if (auto *SI = dyn_cast(TI)) { SwitchInstProfUpdateWrapper SU(*SI); @@ -4276,6 +4321,17 @@ static bool CasesAreContiguous(SmallVectorImpl &Cases) { return true; } +static void createUnreachableSwitchDefault(SwitchInst *Switch) { + LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); + BasicBlock *NewDefaultBlock = + SplitBlockPredecessors(Switch->getDefaultDest(), Switch->getParent(), ""); + Switch->setDefaultDest(&*NewDefaultBlock); + SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front()); + auto *NewTerminator = NewDefaultBlock->getTerminator(); + new UnreachableInst(Switch->getContext(), NewTerminator); + EraseTerminatorAndDCECond(NewTerminator); +} + /// Turn a switch with two reachable destinations into an integer range /// comparison and branch. static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { @@ -4384,6 +4440,11 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { cast(BBI)->removeIncomingValue(SI->getParent()); } + // Clean up the default block - it may have phis or other instructions before + // the unreachable terminator. + if (!HasDefault) + createUnreachableSwitchDefault(SI); + // Drop the switch. SI->eraseFromParent(); @@ -4428,14 +4489,7 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, if (HasDefault && DeadCases.empty() && NumUnknownBits < 64 /* avoid overflow */ && SI->getNumCases() == (1ULL << NumUnknownBits)) { - LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); - BasicBlock *NewDefault = - SplitBlockPredecessors(SI->getDefaultDest(), SI->getParent(), ""); - SI->setDefaultDest(&*NewDefault); - SplitBlock(&*NewDefault, &NewDefault->front()); - auto *OldTI = NewDefault->getTerminator(); - new UnreachableInst(SI->getContext(), OldTI); - EraseTerminatorAndDCECond(OldTI); + createUnreachableSwitchDefault(SI); return true; } @@ -5031,7 +5085,7 @@ SwitchLookupTable::SwitchLookupTable( Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Set the alignment to that of an array items. We will be only loading one // value out of it. - Array->setAlignment(DL.getPrefTypeAlignment(ValueType)); + Array->setAlignment(Align(DL.getPrefTypeAlignment(ValueType))); Kind = ArrayKind; } @@ -5260,7 +5314,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // Figure out the corresponding result for each case value and phi node in the // common destination, as well as the min and max case values. - assert(!empty(SI->cases())); + assert(!SI->cases().empty()); SwitchInst::CaseIt CI = SI->case_begin(); ConstantInt *MinCaseVal = CI->getCaseValue(); ConstantInt *MaxCaseVal = CI->getCaseValue(); @@ -5892,7 +5946,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (SimplifyCondBranchToCondBranch(PBI, BI, DL)) + if (SimplifyCondBranchToCondBranch(PBI, BI, DL, TTI)) return requestResimplify(); // Look for diamond patterns. @@ -5900,7 +5954,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB)) if (BranchInst *PBI = dyn_cast(PrevBB->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (mergeConditionalStores(PBI, BI, DL)) + if (mergeConditionalStores(PBI, BI, DL, TTI)) return requestResimplify(); return false; diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index e0def81d5ee..0324993a820 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/SizeOpts.h" @@ -47,7 +48,6 @@ static cl::opt cl::desc("Enable unsafe double to float " "shrinking for math lib calls")); - //===----------------------------------------------------------------------===// // Helper Functions //===----------------------------------------------------------------------===// @@ -177,7 +177,8 @@ static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len, if (!isOnlyUsedInComparisonWithZero(CI)) return false; - if (!isDereferenceableAndAlignedPointer(Str, 1, APInt(64, Len), DL)) + if (!isDereferenceableAndAlignedPointer(Str, Align::None(), APInt(64, Len), + DL)) return false; if (CI->getFunction()->hasFnAttribute(Attribute::SanitizeMemory)) @@ -186,6 +187,67 @@ static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len, return true; } +static void annotateDereferenceableBytes(CallInst *CI, + ArrayRef ArgNos, + uint64_t DereferenceableBytes) { + const Function *F = CI->getCaller(); + if (!F) + return; + for (unsigned ArgNo : ArgNos) { + uint64_t DerefBytes = DereferenceableBytes; + unsigned AS = CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace(); + if (!llvm::NullPointerIsDefined(F, AS) || + CI->paramHasAttr(ArgNo, Attribute::NonNull)) + DerefBytes = std::max(CI->getDereferenceableOrNullBytes( + ArgNo + AttributeList::FirstArgIndex), + DereferenceableBytes); + + if (CI->getDereferenceableBytes(ArgNo + AttributeList::FirstArgIndex) < + DerefBytes) { + CI->removeParamAttr(ArgNo, Attribute::Dereferenceable); + if (!llvm::NullPointerIsDefined(F, AS) || + CI->paramHasAttr(ArgNo, Attribute::NonNull)) + CI->removeParamAttr(ArgNo, Attribute::DereferenceableOrNull); + CI->addParamAttr(ArgNo, Attribute::getWithDereferenceableBytes( + CI->getContext(), DerefBytes)); + } + } +} + +static void annotateNonNullBasedOnAccess(CallInst *CI, + ArrayRef ArgNos) { + Function *F = CI->getCaller(); + if (!F) + return; + + for (unsigned ArgNo : ArgNos) { + if (CI->paramHasAttr(ArgNo, Attribute::NonNull)) + continue; + unsigned AS = CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace(); + if (llvm::NullPointerIsDefined(F, AS)) + continue; + + CI->addParamAttr(ArgNo, Attribute::NonNull); + annotateDereferenceableBytes(CI, ArgNo, 1); + } +} + +static void annotateNonNullAndDereferenceable(CallInst *CI, ArrayRef ArgNos, + Value *Size, const DataLayout &DL) { + if (ConstantInt *LenC = dyn_cast(Size)) { + annotateNonNullBasedOnAccess(CI, ArgNos); + annotateDereferenceableBytes(CI, ArgNos, LenC->getZExtValue()); + } else if (isKnownNonZero(Size, DL)) { + annotateNonNullBasedOnAccess(CI, ArgNos); + const APInt *X, *Y; + uint64_t DerefMin = 1; + if (match(Size, m_Select(m_Value(), m_APInt(X), m_APInt(Y)))) { + DerefMin = std::min(X->getZExtValue(), Y->getZExtValue()); + annotateDereferenceableBytes(CI, ArgNos, DerefMin); + } + } +} + //===----------------------------------------------------------------------===// // String and Memory Library Call Optimizations //===----------------------------------------------------------------------===// @@ -194,10 +256,13 @@ Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) { // Extract some information from the instruction Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); + annotateNonNullBasedOnAccess(CI, {0, 1}); // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); - if (Len == 0) + if (Len) + annotateDereferenceableBytes(CI, 1, Len); + else return nullptr; --Len; // Unbias length. @@ -232,24 +297,34 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) { // Extract some information from the instruction. Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); + Value *Size = CI->getArgOperand(2); uint64_t Len; + annotateNonNullBasedOnAccess(CI, 0); + if (isKnownNonZero(Size, DL)) + annotateNonNullBasedOnAccess(CI, 1); // We don't do anything if length is not constant. - if (ConstantInt *LengthArg = dyn_cast(CI->getArgOperand(2))) + ConstantInt *LengthArg = dyn_cast(Size); + if (LengthArg) { Len = LengthArg->getZExtValue(); - else + // strncat(x, c, 0) -> x + if (!Len) + return Dst; + } else { return nullptr; + } // See if we can get the length of the input string. uint64_t SrcLen = GetStringLength(Src); - if (SrcLen == 0) + if (SrcLen) { + annotateDereferenceableBytes(CI, 1, SrcLen); + --SrcLen; // Unbias length. + } else { return nullptr; - --SrcLen; // Unbias length. + } - // Handle the simple, do-nothing cases: // strncat(x, "", c) -> x - // strncat(x, c, 0) -> x - if (SrcLen == 0 || Len == 0) + if (SrcLen == 0) return Dst; // We don't optimize this case. @@ -265,13 +340,18 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); FunctionType *FT = Callee->getFunctionType(); Value *SrcStr = CI->getArgOperand(0); + annotateNonNullBasedOnAccess(CI, 0); // If the second operand is non-constant, see if we can compute the length // of the input string and turn this into memchr. ConstantInt *CharC = dyn_cast(CI->getArgOperand(1)); if (!CharC) { uint64_t Len = GetStringLength(SrcStr); - if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32. + if (Len) + annotateDereferenceableBytes(CI, 0, Len); + else + return nullptr; + if (!FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32. return nullptr; return emitMemChr(SrcStr, CI->getArgOperand(1), // include nul. @@ -304,6 +384,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) { Value *SrcStr = CI->getArgOperand(0); ConstantInt *CharC = dyn_cast(CI->getArgOperand(1)); + annotateNonNullBasedOnAccess(CI, 0); // Cannot fold anything if we're not looking for a constant. if (!CharC) @@ -351,7 +432,12 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { // strcmp(P, "x") -> memcmp(P, "x", 2) uint64_t Len1 = GetStringLength(Str1P); + if (Len1) + annotateDereferenceableBytes(CI, 0, Len1); uint64_t Len2 = GetStringLength(Str2P); + if (Len2) + annotateDereferenceableBytes(CI, 1, Len2); + if (Len1 && Len2) { return emitMemCmp(Str1P, Str2P, ConstantInt::get(DL.getIntPtrType(CI->getContext()), @@ -374,17 +460,22 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { TLI); } + annotateNonNullBasedOnAccess(CI, {0, 1}); return nullptr; } Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { - Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); + Value *Str1P = CI->getArgOperand(0); + Value *Str2P = CI->getArgOperand(1); + Value *Size = CI->getArgOperand(2); if (Str1P == Str2P) // strncmp(x,x,n) -> 0 return ConstantInt::get(CI->getType(), 0); + if (isKnownNonZero(Size, DL)) + annotateNonNullBasedOnAccess(CI, {0, 1}); // Get the length argument if it is constant. uint64_t Length; - if (ConstantInt *LengthArg = dyn_cast(CI->getArgOperand(2))) + if (ConstantInt *LengthArg = dyn_cast(Size)) Length = LengthArg->getZExtValue(); else return nullptr; @@ -393,7 +484,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { return ConstantInt::get(CI->getType(), 0); if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) - return emitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI); + return emitMemCmp(Str1P, Str2P, Size, B, DL, TLI); StringRef Str1, Str2; bool HasStr1 = getConstantStringInfo(Str1P, Str1); @@ -415,7 +506,11 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { CI->getType()); uint64_t Len1 = GetStringLength(Str1P); + if (Len1) + annotateDereferenceableBytes(CI, 0, Len1); uint64_t Len2 = GetStringLength(Str2P); + if (Len2) + annotateDereferenceableBytes(CI, 1, Len2); // strncmp to memcmp if (!HasStr1 && HasStr2) { @@ -437,20 +532,38 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { return nullptr; } +Value *LibCallSimplifier::optimizeStrNDup(CallInst *CI, IRBuilder<> &B) { + Value *Src = CI->getArgOperand(0); + ConstantInt *Size = dyn_cast(CI->getArgOperand(1)); + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen && Size) { + annotateDereferenceableBytes(CI, 0, SrcLen); + if (SrcLen <= Size->getZExtValue() + 1) + return emitStrDup(Src, B, TLI); + } + + return nullptr; +} + Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) { Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) // strcpy(x,x) -> x return Src; - + + annotateNonNullBasedOnAccess(CI, {0, 1}); // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); - if (Len == 0) + if (Len) + annotateDereferenceableBytes(CI, 1, Len); + else return nullptr; // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy(Dst, 1, Src, 1, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len)); + CallInst *NewCI = + B.CreateMemCpy(Dst, 1, Src, 1, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len)); + NewCI->setAttributes(CI->getAttributes()); return Dst; } @@ -464,7 +577,9 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); - if (Len == 0) + if (Len) + annotateDereferenceableBytes(CI, 1, Len); + else return nullptr; Type *PT = Callee->getFunctionType()->getParamType(0); @@ -474,7 +589,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy(Dst, 1, Src, 1, LenV); + CallInst *NewCI = B.CreateMemCpy(Dst, 1, Src, 1, LenV); + NewCI->setAttributes(CI->getAttributes()); return DstEnd; } @@ -482,28 +598,38 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); - Value *LenOp = CI->getArgOperand(2); - - // See if we can get the length of the input string. - uint64_t SrcLen = GetStringLength(Src); - if (SrcLen == 0) - return nullptr; - --SrcLen; - - if (SrcLen == 0) { - // strncpy(x, "", y) -> memset(align 1 x, '\0', y) - B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1); - return Dst; - } + Value *Size = CI->getArgOperand(2); + annotateNonNullBasedOnAccess(CI, 0); + if (isKnownNonZero(Size, DL)) + annotateNonNullBasedOnAccess(CI, 1); uint64_t Len; - if (ConstantInt *LengthArg = dyn_cast(LenOp)) + if (ConstantInt *LengthArg = dyn_cast(Size)) Len = LengthArg->getZExtValue(); else return nullptr; + // strncpy(x, y, 0) -> x if (Len == 0) - return Dst; // strncpy(x, y, 0) -> x + return Dst; + + // See if we can get the length of the input string. + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen) { + annotateDereferenceableBytes(CI, 1, SrcLen); + --SrcLen; // Unbias length. + } else { + return nullptr; + } + + if (SrcLen == 0) { + // strncpy(x, "", y) -> memset(align 1 x, '\0', y) + CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, 1); + AttrBuilder ArgAttrs(CI->getAttributes().getParamAttributes(0)); + NewCI->setAttributes(NewCI->getAttributes().addParamAttributes( + CI->getContext(), 0, ArgAttrs)); + return Dst; + } // Let strncpy handle the zero padding if (Len > SrcLen + 1) @@ -511,8 +637,8 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { Type *PT = Callee->getFunctionType()->getParamType(0); // strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant] - B.CreateMemCpy(Dst, 1, Src, 1, ConstantInt::get(DL.getIntPtrType(PT), Len)); - + CallInst *NewCI = B.CreateMemCpy(Dst, 1, Src, 1, ConstantInt::get(DL.getIntPtrType(PT), Len)); + NewCI->setAttributes(CI->getAttributes()); return Dst; } @@ -608,7 +734,10 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B, } Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { - return optimizeStringLength(CI, B, 8); + if (Value *V = optimizeStringLength(CI, B, 8)) + return V; + annotateNonNullBasedOnAccess(CI, 0); + return nullptr; } Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) { @@ -756,21 +885,35 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) { Value *StrChr = emitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI); return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr; } + + annotateNonNullBasedOnAccess(CI, {0, 1}); + return nullptr; +} + +Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilder<> &B) { + if (isKnownNonZero(CI->getOperand(2), DL)) + annotateNonNullBasedOnAccess(CI, 0); return nullptr; } Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { Value *SrcStr = CI->getArgOperand(0); + Value *Size = CI->getArgOperand(2); + annotateNonNullAndDereferenceable(CI, 0, Size, DL); ConstantInt *CharC = dyn_cast(CI->getArgOperand(1)); - ConstantInt *LenC = dyn_cast(CI->getArgOperand(2)); + ConstantInt *LenC = dyn_cast(Size); // memchr(x, y, 0) -> null - if (LenC && LenC->isZero()) - return Constant::getNullValue(CI->getType()); + if (LenC) { + if (LenC->isZero()) + return Constant::getNullValue(CI->getType()); + } else { + // From now on we need at least constant length and string. + return nullptr; + } - // From now on we need at least constant length and string. StringRef Str; - if (!LenC || !getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false)) + if (!getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false)) return nullptr; // Truncate the string to LenC. If Str is smaller than LenC we will still only @@ -913,6 +1056,7 @@ static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS, Ret = 1; return ConstantInt::get(CI->getType(), Ret); } + return nullptr; } @@ -925,12 +1069,19 @@ Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI, if (LHS == RHS) // memcmp(s,s,x) -> 0 return Constant::getNullValue(CI->getType()); + annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL); // Handle constant lengths. - if (ConstantInt *LenC = dyn_cast(Size)) - if (Value *Res = optimizeMemCmpConstantSize(CI, LHS, RHS, - LenC->getZExtValue(), B, DL)) - return Res; + ConstantInt *LenC = dyn_cast(Size); + if (!LenC) + return nullptr; + // memcmp(d,s,0) -> 0 + if (LenC->getZExtValue() == 0) + return Constant::getNullValue(CI->getType()); + + if (Value *Res = + optimizeMemCmpConstantSize(CI, LHS, RHS, LenC->getZExtValue(), B, DL)) + return Res; return nullptr; } @@ -939,9 +1090,9 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { return V; // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0 - // `bcmp` can be more efficient than memcmp because it only has to know that - // there is a difference, not where it is. - if (isOnlyUsedInZeroEqualityComparison(CI) && TLI->has(LibFunc_bcmp)) { + // bcmp can be more efficient than memcmp because it only has to know that + // there is a difference, not how different one is to the other. + if (TLI->has(LibFunc_bcmp) && isOnlyUsedInZeroEqualityComparison(CI)) { Value *LHS = CI->getArgOperand(0); Value *RHS = CI->getArgOperand(1); Value *Size = CI->getArgOperand(2); @@ -956,16 +1107,37 @@ Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { + Value *Size = CI->getArgOperand(2); + annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL); + if (isa(CI)) + return nullptr; + // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n) - B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - CI->getArgOperand(2)); + CallInst *NewCI = + B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, Size); + NewCI->setAttributes(CI->getAttributes()); return CI->getArgOperand(0); } +Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilder<> &B) { + Value *Dst = CI->getArgOperand(0); + Value *N = CI->getArgOperand(2); + // mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n + CallInst *NewCI = B.CreateMemCpy(Dst, 1, CI->getArgOperand(1), 1, N); + NewCI->setAttributes(CI->getAttributes()); + return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N); +} + Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { + Value *Size = CI->getArgOperand(2); + annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL); + if (isa(CI)) + return nullptr; + // memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n) - B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - CI->getArgOperand(2)); + CallInst *NewCI = + B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, Size); + NewCI->setAttributes(CI->getAttributes()); return CI->getArgOperand(0); } @@ -1003,25 +1175,29 @@ Value *LibCallSimplifier::foldMallocMemset(CallInst *Memset, IRBuilder<> &B) { B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator()); const DataLayout &DL = Malloc->getModule()->getDataLayout(); IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext()); - Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1), - Malloc->getArgOperand(0), Malloc->getAttributes(), - B, *TLI); - if (!Calloc) - return nullptr; + if (Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1), + Malloc->getArgOperand(0), + Malloc->getAttributes(), B, *TLI)) { + substituteInParent(Malloc, Calloc); + return Calloc; + } - Malloc->replaceAllUsesWith(Calloc); - eraseFromParent(Malloc); - - return Calloc; + return nullptr; } Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) { + Value *Size = CI->getArgOperand(2); + annotateNonNullAndDereferenceable(CI, 0, Size, DL); + if (isa(CI)) + return nullptr; + if (auto *Calloc = foldMallocMemset(CI, B)) return Calloc; // memset(p, v, n) -> llvm.memset(align 1 p, v, n) Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); - B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, 1); + NewCI->setAttributes(CI->getAttributes()); return CI->getArgOperand(0); } @@ -1096,21 +1272,18 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B, if (!V[0] || (isBinary && !V[1])) return nullptr; - StringRef CalleeNm = CalleeFn->getName(); - AttributeList CalleeAt = CalleeFn->getAttributes(); - bool CalleeIn = CalleeFn->isIntrinsic(); - // If call isn't an intrinsic, check that it isn't within a function with the // same name as the float version of this call, otherwise the result is an // infinite loop. For example, from MinGW-w64: // // float expf(float val) { return (float) exp((double) val); } - if (!CalleeIn) { - const Function *Fn = CI->getFunction(); - StringRef FnName = Fn->getName(); - if (FnName.back() == 'f' && - FnName.size() == (CalleeNm.size() + 1) && - FnName.startswith(CalleeNm)) + StringRef CalleeName = CalleeFn->getName(); + bool IsIntrinsic = CalleeFn->isIntrinsic(); + if (!IsIntrinsic) { + StringRef CallerName = CI->getFunction()->getName(); + if (!CallerName.empty() && CallerName.back() == 'f' && + CallerName.size() == (CalleeName.size() + 1) && + CallerName.startswith(CalleeName)) return nullptr; } @@ -1120,16 +1293,16 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B, // g((double) float) -> (double) gf(float) Value *R; - if (CalleeIn) { + if (IsIntrinsic) { Module *M = CI->getModule(); Intrinsic::ID IID = CalleeFn->getIntrinsicID(); Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy()); R = isBinary ? B.CreateCall(Fn, V) : B.CreateCall(Fn, V[0]); + } else { + AttributeList CalleeAttrs = CalleeFn->getAttributes(); + R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeName, B, CalleeAttrs) + : emitUnaryFloatFnCall(V[0], CalleeName, B, CalleeAttrs); } - else - R = isBinary ? emitBinaryFloatFnCall(V[0], V[1], CalleeNm, B, CalleeAt) - : emitUnaryFloatFnCall(V[0], CalleeNm, B, CalleeAt); - return B.CreateFPExt(R, B.getDoubleTy()); } @@ -1234,9 +1407,25 @@ static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) { return InnerChain[Exp]; } +// Return a properly extended 32-bit integer if the operation is an itofp. +static Value *getIntToFPVal(Value *I2F, IRBuilder<> &B) { + if (isa(I2F) || isa(I2F)) { + Value *Op = cast(I2F)->getOperand(0); + // Make sure that the exponent fits inside an int32_t, + // thus avoiding any range issues that FP has not. + unsigned BitWidth = Op->getType()->getPrimitiveSizeInBits(); + if (BitWidth < 32 || + (BitWidth == 32 && isa(I2F))) + return isa(I2F) ? B.CreateSExt(Op, B.getInt32Ty()) + : B.CreateZExt(Op, B.getInt32Ty()); + } + + return nullptr; +} + /// Use exp{,2}(x * y) for pow(exp{,2}(x), y); -/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x); -/// exp2(log2(n) * x) for pow(n, x). +/// ldexp(1.0, x) for pow(2.0, itofp(x)); exp2(n * x) for pow(2.0 ** n, x); +/// exp10(x) for pow(10.0, x); exp2(log2(n) * x) for pow(n, x). Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); AttributeList Attrs = Pow->getCalledFunction()->getAttributes(); @@ -1269,9 +1458,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { StringRef ExpName; Intrinsic::ID ID; Value *ExpFn; - LibFunc LibFnFloat; - LibFunc LibFnDouble; - LibFunc LibFnLongDouble; + LibFunc LibFnFloat, LibFnDouble, LibFnLongDouble; switch (LibFn) { default: @@ -1305,9 +1492,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { // elimination cannot be trusted to remove it, since it may have side // effects (e.g., errno). When the only consumer for the original // exp{,2}() is pow(), then it has to be explicitly erased. - BaseFn->replaceAllUsesWith(ExpFn); - eraseFromParent(BaseFn); - + substituteInParent(BaseFn, ExpFn); return ExpFn; } } @@ -1318,8 +1503,18 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { if (!match(Pow->getArgOperand(0), m_APFloat(BaseF))) return nullptr; + // pow(2.0, itofp(x)) -> ldexp(1.0, x) + if (match(Base, m_SpecificFP(2.0)) && + (isa(Expo) || isa(Expo)) && + hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { + if (Value *ExpoI = getIntToFPVal(Expo, B)) + return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI, TLI, + LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl, + B, Attrs); + } + // pow(2.0 ** n, x) -> exp2(n * x) - if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) { + if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) { APFloat BaseR = APFloat(1.0); BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored); BaseR = BaseR / *BaseF; @@ -1344,7 +1539,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { // pow(10.0, x) -> exp10(x) // TODO: There is no exp10() intrinsic yet, but some day there shall be one. if (match(Base, m_SpecificFP(10.0)) && - hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) + hasFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l, B, Attrs); @@ -1359,17 +1554,15 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) { if (Log) { Value *FMul = B.CreateFMul(Log, Expo, "mul"); - if (Pow->doesNotAccessMemory()) { + if (Pow->doesNotAccessMemory()) return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty), FMul, "exp2"); - } else { - if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, - LibFunc_exp2l)) - return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f, - LibFunc_exp2l, B, Attrs); - } + else if (hasFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f, LibFunc_exp2l)) + return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f, + LibFunc_exp2l, B, Attrs); } } + return nullptr; } @@ -1384,8 +1577,7 @@ static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno, } // Otherwise, use the libcall for sqrt(). - if (hasUnaryFloatFn(TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf, - LibFunc_sqrtl)) + if (hasFloatFn(TLI, V->getType(), LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) // TODO: We also should check that the target can in fact lower the sqrt() // libcall. We currently have no way to ask this question, so we ask if // the target has a sqrt() libcall, which is not exactly the same. @@ -1452,7 +1644,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { bool Ignored; // Bail out if simplifying libcalls to pow() is disabled. - if (!hasUnaryFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl)) + if (!hasFloatFn(TLI, Ty, LibFunc_pow, LibFunc_powf, LibFunc_powl)) return nullptr; // Propagate the math semantics from the call to any created instructions. @@ -1480,8 +1672,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { if (match(Expo, m_SpecificFP(-1.0))) return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal"); - // pow(x, 0.0) -> 1.0 - if (match(Expo, m_SpecificFP(0.0))) + // pow(x, +/-0.0) -> 1.0 + if (match(Expo, m_AnyZeroFP())) return ConstantFP::get(Ty, 1.0); // pow(x, 1.0) -> x @@ -1558,16 +1750,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { // powf(x, itofp(y)) -> powi(x, y) if (AllowApprox && (isa(Expo) || isa(Expo))) { - Value *IntExpo = cast(Expo)->getOperand(0); - Value *NewExpo = nullptr; - unsigned BitWidth = IntExpo->getType()->getPrimitiveSizeInBits(); - if (isa(Expo) && BitWidth == 32) - NewExpo = IntExpo; - else if (BitWidth < 32) - NewExpo = isa(Expo) ? B.CreateSExt(IntExpo, B.getInt32Ty()) - : B.CreateZExt(IntExpo, B.getInt32Ty()); - if (NewExpo) - return createPowWithIntegerExponent(Base, NewExpo, M, B); + if (Value *ExpoI = getIntToFPVal(Expo, B)) + return createPowWithIntegerExponent(Base, ExpoI, M, B); } return Shrunk; @@ -1575,45 +1759,25 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - Value *Ret = nullptr; StringRef Name = Callee->getName(); - if (UnsafeFPShrink && Name == "exp2" && hasFloatVersion(Name)) + Value *Ret = nullptr; + if (UnsafeFPShrink && Name == TLI->getName(LibFunc_exp2) && + hasFloatVersion(Name)) Ret = optimizeUnaryDoubleFP(CI, B, true); + Type *Ty = CI->getType(); Value *Op = CI->getArgOperand(0); + // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 - LibFunc LdExp = LibFunc_ldexpl; - if (Op->getType()->isFloatTy()) - LdExp = LibFunc_ldexpf; - else if (Op->getType()->isDoubleTy()) - LdExp = LibFunc_ldexp; - - if (TLI->has(LdExp)) { - Value *LdExpArg = nullptr; - if (SIToFPInst *OpC = dyn_cast(Op)) { - if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) - LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty()); - } else if (UIToFPInst *OpC = dyn_cast(Op)) { - if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) - LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty()); - } - - if (LdExpArg) { - Constant *One = ConstantFP::get(CI->getContext(), APFloat(1.0f)); - if (!Op->getType()->isFloatTy()) - One = ConstantExpr::getFPExtend(One, Op->getType()); - - Module *M = CI->getModule(); - FunctionCallee NewCallee = M->getOrInsertFunction( - TLI->getName(LdExp), Op->getType(), Op->getType(), B.getInt32Ty()); - CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg}); - if (const Function *F = dyn_cast(Callee->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; - } + if ((isa(Op) || isa(Op)) && + hasFloatFn(TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { + if (Value *Exp = getIntToFPVal(Op, B)) + return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI, + LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl, + B, CI->getCalledFunction()->getAttributes()); } + return Ret; } @@ -1644,48 +1808,155 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) }); } -Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); +Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilder<> &B) { + Function *LogFn = Log->getCalledFunction(); + AttributeList Attrs = LogFn->getAttributes(); + StringRef LogNm = LogFn->getName(); + Intrinsic::ID LogID = LogFn->getIntrinsicID(); + Module *Mod = Log->getModule(); + Type *Ty = Log->getType(); Value *Ret = nullptr; - StringRef Name = Callee->getName(); - if (UnsafeFPShrink && hasFloatVersion(Name)) - Ret = optimizeUnaryDoubleFP(CI, B, true); - if (!CI->isFast()) - return Ret; - Value *Op1 = CI->getArgOperand(0); - auto *OpC = dyn_cast(Op1); + if (UnsafeFPShrink && hasFloatVersion(LogNm)) + Ret = optimizeUnaryDoubleFP(Log, B, true); // The earlier call must also be 'fast' in order to do these transforms. - if (!OpC || !OpC->isFast()) + CallInst *Arg = dyn_cast(Log->getArgOperand(0)); + if (!Log->isFast() || !Arg || !Arg->isFast() || !Arg->hasOneUse()) return Ret; - // log(pow(x,y)) -> y*log(x) - // This is only applicable to log, log2, log10. - if (Name != "log" && Name != "log2" && Name != "log10") + LibFunc LogLb, ExpLb, Exp2Lb, Exp10Lb, PowLb; + + // This is only applicable to log(), log2(), log10(). + if (TLI->getLibFunc(LogNm, LogLb)) + switch (LogLb) { + case LibFunc_logf: + LogID = Intrinsic::log; + ExpLb = LibFunc_expf; + Exp2Lb = LibFunc_exp2f; + Exp10Lb = LibFunc_exp10f; + PowLb = LibFunc_powf; + break; + case LibFunc_log: + LogID = Intrinsic::log; + ExpLb = LibFunc_exp; + Exp2Lb = LibFunc_exp2; + Exp10Lb = LibFunc_exp10; + PowLb = LibFunc_pow; + break; + case LibFunc_logl: + LogID = Intrinsic::log; + ExpLb = LibFunc_expl; + Exp2Lb = LibFunc_exp2l; + Exp10Lb = LibFunc_exp10l; + PowLb = LibFunc_powl; + break; + case LibFunc_log2f: + LogID = Intrinsic::log2; + ExpLb = LibFunc_expf; + Exp2Lb = LibFunc_exp2f; + Exp10Lb = LibFunc_exp10f; + PowLb = LibFunc_powf; + break; + case LibFunc_log2: + LogID = Intrinsic::log2; + ExpLb = LibFunc_exp; + Exp2Lb = LibFunc_exp2; + Exp10Lb = LibFunc_exp10; + PowLb = LibFunc_pow; + break; + case LibFunc_log2l: + LogID = Intrinsic::log2; + ExpLb = LibFunc_expl; + Exp2Lb = LibFunc_exp2l; + Exp10Lb = LibFunc_exp10l; + PowLb = LibFunc_powl; + break; + case LibFunc_log10f: + LogID = Intrinsic::log10; + ExpLb = LibFunc_expf; + Exp2Lb = LibFunc_exp2f; + Exp10Lb = LibFunc_exp10f; + PowLb = LibFunc_powf; + break; + case LibFunc_log10: + LogID = Intrinsic::log10; + ExpLb = LibFunc_exp; + Exp2Lb = LibFunc_exp2; + Exp10Lb = LibFunc_exp10; + PowLb = LibFunc_pow; + break; + case LibFunc_log10l: + LogID = Intrinsic::log10; + ExpLb = LibFunc_expl; + Exp2Lb = LibFunc_exp2l; + Exp10Lb = LibFunc_exp10l; + PowLb = LibFunc_powl; + break; + default: + return Ret; + } + else if (LogID == Intrinsic::log || LogID == Intrinsic::log2 || + LogID == Intrinsic::log10) { + if (Ty->getScalarType()->isFloatTy()) { + ExpLb = LibFunc_expf; + Exp2Lb = LibFunc_exp2f; + Exp10Lb = LibFunc_exp10f; + PowLb = LibFunc_powf; + } else if (Ty->getScalarType()->isDoubleTy()) { + ExpLb = LibFunc_exp; + Exp2Lb = LibFunc_exp2; + Exp10Lb = LibFunc_exp10; + PowLb = LibFunc_pow; + } else + return Ret; + } else return Ret; IRBuilder<>::FastMathFlagGuard Guard(B); - FastMathFlags FMF; - FMF.setFast(); - B.setFastMathFlags(FMF); + B.setFastMathFlags(FastMathFlags::getFast()); - LibFunc Func; - Function *F = OpC->getCalledFunction(); - if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) && - Func == LibFunc_pow) || F->getIntrinsicID() == Intrinsic::pow)) - return B.CreateFMul(OpC->getArgOperand(1), - emitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B, - Callee->getAttributes()), "mul"); + Intrinsic::ID ArgID = Arg->getIntrinsicID(); + LibFunc ArgLb = NotLibFunc; + TLI->getLibFunc(Arg, ArgLb); + + // log(pow(x,y)) -> y*log(x) + if (ArgLb == PowLb || ArgID == Intrinsic::pow) { + Value *LogX = + Log->doesNotAccessMemory() + ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty), + Arg->getOperand(0), "log") + : emitUnaryFloatFnCall(Arg->getOperand(0), LogNm, B, Attrs); + Value *MulY = B.CreateFMul(Arg->getArgOperand(1), LogX, "mul"); + // Since pow() may have side effects, e.g. errno, + // dead code elimination may not be trusted to remove it. + substituteInParent(Arg, MulY); + return MulY; + } + + // log(exp{,2,10}(y)) -> y*log({e,2,10}) + // TODO: There is no exp10() intrinsic yet. + if (ArgLb == ExpLb || ArgLb == Exp2Lb || ArgLb == Exp10Lb || + ArgID == Intrinsic::exp || ArgID == Intrinsic::exp2) { + Constant *Eul; + if (ArgLb == ExpLb || ArgID == Intrinsic::exp) + // FIXME: Add more precise value of e for long double. + Eul = ConstantFP::get(Log->getType(), numbers::e); + else if (ArgLb == Exp2Lb || ArgID == Intrinsic::exp2) + Eul = ConstantFP::get(Log->getType(), 2.0); + else + Eul = ConstantFP::get(Log->getType(), 10.0); + Value *LogE = Log->doesNotAccessMemory() + ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty), + Eul, "log") + : emitUnaryFloatFnCall(Eul, LogNm, B, Attrs); + Value *MulY = B.CreateFMul(Arg->getArgOperand(0), LogE, "mul"); + // Since exp() may have side effects, e.g. errno, + // dead code elimination may not be trusted to remove it. + substituteInParent(Arg, MulY); + return MulY; + } - // log(exp2(y)) -> y*log(2) - if (F && Name == "log" && TLI->getLibFunc(F->getName(), Func) && - TLI->has(Func) && Func == LibFunc_exp2) - return B.CreateFMul( - OpC->getArgOperand(0), - emitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0), - Callee->getName(), B, Callee->getAttributes()), - "logmul"); return Ret; } @@ -2137,6 +2408,7 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) { return New; } + annotateNonNullBasedOnAccess(CI, 0); return nullptr; } @@ -2231,21 +2503,21 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) { return New; } + annotateNonNullBasedOnAccess(CI, {0, 1}); return nullptr; } Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) { - // Check for a fixed format string. - StringRef FormatStr; - if (!getConstantStringInfo(CI->getArgOperand(2), FormatStr)) - return nullptr; - // Check for size ConstantInt *Size = dyn_cast(CI->getArgOperand(1)); if (!Size) return nullptr; uint64_t N = Size->getZExtValue(); + // Check for a fixed format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(2), FormatStr)) + return nullptr; // If we just have a format string (nothing else crazy) transform it. if (CI->getNumArgOperands() == 3) { @@ -2318,6 +2590,8 @@ Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilder<> &B) { return V; } + if (isKnownNonZero(CI->getOperand(1), DL)) + annotateNonNullBasedOnAccess(CI, 0); return nullptr; } @@ -2503,6 +2777,7 @@ Value *LibCallSimplifier::optimizeFRead(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) { + annotateNonNullBasedOnAccess(CI, 0); if (!CI->use_empty()) return nullptr; @@ -2515,6 +2790,12 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) { return nullptr; } +Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilder<> &B) { + // bcopy(src, dst, n) -> llvm.memmove(dst, src, n) + return B.CreateMemMove(CI->getArgOperand(1), 1, CI->getArgOperand(0), 1, + CI->getArgOperand(2)); +} + bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) { LibFunc Func; SmallString<20> FloatFuncName = FuncName; @@ -2557,6 +2838,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeStrLen(CI, Builder); case LibFunc_strpbrk: return optimizeStrPBrk(CI, Builder); + case LibFunc_strndup: + return optimizeStrNDup(CI, Builder); case LibFunc_strtol: case LibFunc_strtod: case LibFunc_strtof: @@ -2573,12 +2856,16 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeStrStr(CI, Builder); case LibFunc_memchr: return optimizeMemChr(CI, Builder); + case LibFunc_memrchr: + return optimizeMemRChr(CI, Builder); case LibFunc_bcmp: return optimizeBCmp(CI, Builder); case LibFunc_memcmp: return optimizeMemCmp(CI, Builder); case LibFunc_memcpy: return optimizeMemCpy(CI, Builder); + case LibFunc_mempcpy: + return optimizeMemPCpy(CI, Builder); case LibFunc_memmove: return optimizeMemMove(CI, Builder); case LibFunc_memset: @@ -2587,6 +2874,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeRealloc(CI, Builder); case LibFunc_wcslen: return optimizeWcslen(CI, Builder); + case LibFunc_bcopy: + return optimizeBCopy(CI, Builder); default: break; } @@ -2626,11 +2915,21 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI, case LibFunc_sqrt: case LibFunc_sqrtl: return optimizeSqrt(CI, Builder); + case LibFunc_logf: case LibFunc_log: + case LibFunc_logl: + case LibFunc_log10f: case LibFunc_log10: + case LibFunc_log10l: + case LibFunc_log1pf: case LibFunc_log1p: + case LibFunc_log1pl: + case LibFunc_log2f: case LibFunc_log2: + case LibFunc_log2l: + case LibFunc_logbf: case LibFunc_logb: + case LibFunc_logbl: return optimizeLog(CI, Builder); case LibFunc_tan: case LibFunc_tanf: @@ -2721,10 +3020,18 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { case Intrinsic::exp2: return optimizeExp2(CI, Builder); case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: return optimizeLog(CI, Builder); case Intrinsic::sqrt: return optimizeSqrt(CI, Builder); // TODO: Use foldMallocMemset() with memset intrinsic. + case Intrinsic::memset: + return optimizeMemSet(CI, Builder); + case Intrinsic::memcpy: + return optimizeMemCpy(CI, Builder); + case Intrinsic::memmove: + return optimizeMemMove(CI, Builder); default: return nullptr; } @@ -2740,8 +3047,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { IRBuilder<> TmpBuilder(SimplifiedCI); if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, TmpBuilder)) { // If we were able to further simplify, remove the now redundant call. - SimplifiedCI->replaceAllUsesWith(V); - eraseFromParent(SimplifiedCI); + substituteInParent(SimplifiedCI, V); return V; } } @@ -2898,7 +3204,9 @@ FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp)); // If the length is 0 we don't know how long it is and so we can't // remove the check. - if (Len == 0) + if (Len) + annotateDereferenceableBytes(CI, *StrOp, Len); + else return false; return ObjSizeCI->getZExtValue() >= Len; } @@ -2915,8 +3223,9 @@ FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) { if (isFortifiedCallFoldable(CI, 3, 2)) { - B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - CI->getArgOperand(2)); + CallInst *NewCI = B.CreateMemCpy( + CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, CI->getArgOperand(2)); + NewCI->setAttributes(CI->getAttributes()); return CI->getArgOperand(0); } return nullptr; @@ -2925,8 +3234,9 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) { if (isFortifiedCallFoldable(CI, 3, 2)) { - B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - CI->getArgOperand(2)); + CallInst *NewCI = B.CreateMemMove( + CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, CI->getArgOperand(2)); + NewCI->setAttributes(CI->getAttributes()); return CI->getArgOperand(0); } return nullptr; @@ -2938,7 +3248,9 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, if (isFortifiedCallFoldable(CI, 3, 2)) { Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); - B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + CallInst *NewCI = + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + NewCI->setAttributes(CI->getAttributes()); return CI->getArgOperand(0); } return nullptr; @@ -2974,7 +3286,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk. uint64_t Len = GetStringLength(Src); - if (Len == 0) + if (Len) + annotateDereferenceableBytes(CI, 1, Len); + else return nullptr; Type *SizeTTy = DL.getIntPtrType(CI->getContext()); diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp index 456724779b4..5d380dcf231 100644 --- a/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/lib/Transforms/Utils/SymbolRewriter.cpp @@ -380,11 +380,11 @@ parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, // TODO see if there is a more elegant solution to selecting the rewrite // descriptor type if (!Target.empty()) - DL->push_back(llvm::make_unique( + DL->push_back(std::make_unique( Source, Target, Naked)); else DL->push_back( - llvm::make_unique(Source, Transform)); + std::make_unique(Source, Transform)); return true; } @@ -442,11 +442,11 @@ parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, } if (!Target.empty()) - DL->push_back(llvm::make_unique( + DL->push_back(std::make_unique( Source, Target, /*Naked*/ false)); else - DL->push_back(llvm::make_unique( + DL->push_back(std::make_unique( Source, Transform)); return true; @@ -505,11 +505,11 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, } if (!Target.empty()) - DL->push_back(llvm::make_unique( + DL->push_back(std::make_unique( Source, Target, /*Naked*/ false)); else - DL->push_back(llvm::make_unique( + DL->push_back(std::make_unique( Source, Transform)); return true; diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp index a77bf50fe10..591e1fd2dbe 100644 --- a/lib/Transforms/Utils/VNCoercion.cpp +++ b/lib/Transforms/Utils/VNCoercion.cpp @@ -431,7 +431,7 @@ Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy, PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); LoadInst *NewLoad = Builder.CreateLoad(DestTy, PtrVal); NewLoad->takeName(SrcVal); - NewLoad->setAlignment(SrcVal->getAlignment()); + NewLoad->setAlignment(MaybeAlign(SrcVal->getAlignment())); LLVM_DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); LLVM_DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index fbc3407c301..da68d3713b4 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -27,8 +27,8 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalIndirectSymbol.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instruction.h" @@ -66,7 +66,7 @@ struct WorklistEntry { enum EntryKind { MapGlobalInit, MapAppendingVar, - MapGlobalAliasee, + MapGlobalIndirectSymbol, RemapFunction }; struct GVInitTy { @@ -77,9 +77,9 @@ struct WorklistEntry { GlobalVariable *GV; Constant *InitPrefix; }; - struct GlobalAliaseeTy { - GlobalAlias *GA; - Constant *Aliasee; + struct GlobalIndirectSymbolTy { + GlobalIndirectSymbol *GIS; + Constant *Target; }; unsigned Kind : 2; @@ -89,7 +89,7 @@ struct WorklistEntry { union { GVInitTy GVInit; AppendingGVTy AppendingGV; - GlobalAliaseeTy GlobalAliasee; + GlobalIndirectSymbolTy GlobalIndirectSymbol; Function *RemapF; } Data; }; @@ -161,8 +161,8 @@ public: bool IsOldCtorDtor, ArrayRef NewMembers, unsigned MCID); - void scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, - unsigned MCID); + void scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, Constant &Target, + unsigned MCID); void scheduleRemapFunction(Function &F, unsigned MCID); void flush(); @@ -172,7 +172,7 @@ private: void mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, bool IsOldCtorDtor, ArrayRef NewMembers); - void mapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee); + void mapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, Constant &Target); void remapFunction(Function &F, ValueToValueMapTy &VM); ValueToValueMapTy &getVM() { return *MCs[CurrentMCID].VM; } @@ -774,20 +774,6 @@ Metadata *MDNodeMapper::mapTopLevelUniquedNode(const MDNode &FirstN) { return *getMappedOp(&FirstN); } -namespace { - -struct MapMetadataDisabler { - ValueToValueMapTy &VM; - - MapMetadataDisabler(ValueToValueMapTy &VM) : VM(VM) { - VM.disableMapMetadata(); - } - - ~MapMetadataDisabler() { VM.enableMapMetadata(); } -}; - -} // end anonymous namespace - Optional Mapper::mapSimpleMetadata(const Metadata *MD) { // If the value already exists in the map, use it. if (Optional NewMD = getVM().getMappedMD(MD)) @@ -802,9 +788,6 @@ Optional Mapper::mapSimpleMetadata(const Metadata *MD) { return const_cast(MD); if (auto *CMD = dyn_cast(MD)) { - // Disallow recursion into metadata mapping through mapValue. - MapMetadataDisabler MMD(getVM()); - // Don't memoize ConstantAsMetadata. Instead of lasting until the // LLVMContext is destroyed, they can be deleted when the GlobalValue they // reference is destructed. These aren't super common, so the extra @@ -846,9 +829,9 @@ void Mapper::flush() { AppendingInits.resize(PrefixSize); break; } - case WorklistEntry::MapGlobalAliasee: - E.Data.GlobalAliasee.GA->setAliasee( - mapConstant(E.Data.GlobalAliasee.Aliasee)); + case WorklistEntry::MapGlobalIndirectSymbol: + E.Data.GlobalIndirectSymbol.GIS->setIndirectSymbol( + mapConstant(E.Data.GlobalIndirectSymbol.Target)); break; case WorklistEntry::RemapFunction: remapFunction(*E.Data.RemapF); @@ -1041,16 +1024,16 @@ void Mapper::scheduleMapAppendingVariable(GlobalVariable &GV, AppendingInits.append(NewMembers.begin(), NewMembers.end()); } -void Mapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, - unsigned MCID) { - assert(AlreadyScheduled.insert(&GA).second && "Should not reschedule"); +void Mapper::scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, + Constant &Target, unsigned MCID) { + assert(AlreadyScheduled.insert(&GIS).second && "Should not reschedule"); assert(MCID < MCs.size() && "Invalid mapping context"); WorklistEntry WE; - WE.Kind = WorklistEntry::MapGlobalAliasee; + WE.Kind = WorklistEntry::MapGlobalIndirectSymbol; WE.MCID = MCID; - WE.Data.GlobalAliasee.GA = &GA; - WE.Data.GlobalAliasee.Aliasee = &Aliasee; + WE.Data.GlobalIndirectSymbol.GIS = &GIS; + WE.Data.GlobalIndirectSymbol.Target = &Target; Worklist.push_back(WE); } @@ -1147,9 +1130,10 @@ void ValueMapper::scheduleMapAppendingVariable(GlobalVariable &GV, GV, InitPrefix, IsOldCtorDtor, NewMembers, MCID); } -void ValueMapper::scheduleMapGlobalAliasee(GlobalAlias &GA, Constant &Aliasee, - unsigned MCID) { - getAsMapper(pImpl)->scheduleMapGlobalAliasee(GA, Aliasee, MCID); +void ValueMapper::scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, + Constant &Target, + unsigned MCID) { + getAsMapper(pImpl)->scheduleMapGlobalIndirectSymbol(GIS, Target, MCID); } void ValueMapper::scheduleRemapFunction(Function &F, unsigned MCID) { diff --git a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index 4273080ddd9..f44976c723e 100644 --- a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -147,7 +147,7 @@ private: static const unsigned MaxDepth = 3; bool isConsecutiveAccess(Value *A, Value *B); - bool areConsecutivePointers(Value *PtrA, Value *PtrB, const APInt &PtrDelta, + bool areConsecutivePointers(Value *PtrA, Value *PtrB, APInt PtrDelta, unsigned Depth = 0) const; bool lookThroughComplexAddresses(Value *PtrA, Value *PtrB, APInt PtrDelta, unsigned Depth) const; @@ -336,14 +336,29 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { } bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB, - const APInt &PtrDelta, - unsigned Depth) const { + APInt PtrDelta, unsigned Depth) const { unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(PtrA->getType()); APInt OffsetA(PtrBitWidth, 0); APInt OffsetB(PtrBitWidth, 0); PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); + unsigned NewPtrBitWidth = DL.getTypeStoreSizeInBits(PtrA->getType()); + + if (NewPtrBitWidth != DL.getTypeStoreSizeInBits(PtrB->getType())) + return false; + + // In case if we have to shrink the pointer + // stripAndAccumulateInBoundsConstantOffsets should properly handle a + // possible overflow and the value should fit into a smallest data type + // used in the cast/gep chain. + assert(OffsetA.getMinSignedBits() <= NewPtrBitWidth && + OffsetB.getMinSignedBits() <= NewPtrBitWidth); + + OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth); + OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth); + PtrDelta = PtrDelta.sextOrTrunc(NewPtrBitWidth); + APInt OffsetDelta = OffsetB - OffsetA; // Check if they are based on the same pointer. That makes the offsets @@ -650,7 +665,7 @@ Vectorizer::getVectorizablePrefix(ArrayRef Chain) { // We can ignore the alias if the we have a load store pair and the load // is known to be invariant. The load cannot be clobbered by the store. auto IsInvariantLoad = [](const LoadInst *LI) -> bool { - return LI->getMetadata(LLVMContext::MD_invariant_load); + return LI->hasMetadata(LLVMContext::MD_invariant_load); }; // We can ignore the alias as long as the load comes before the store, @@ -1077,7 +1092,7 @@ bool Vectorizer::vectorizeLoadChain( LoadInst *L0 = cast(Chain[0]); // If the vector has an int element, default to int for the whole load. - Type *LoadTy; + Type *LoadTy = nullptr; for (const auto &V : Chain) { LoadTy = cast(V)->getType(); if (LoadTy->isIntOrIntVectorTy()) @@ -1089,6 +1104,7 @@ bool Vectorizer::vectorizeLoadChain( break; } } + assert(LoadTy && "Can't determine LoadInst type from chain"); unsigned Sz = DL.getTypeSizeInBits(LoadTy); unsigned AS = L0->getPointerAddressSpace(); diff --git a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 6ef8dc2d3cd..f43842be535 100644 --- a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -13,7 +13,10 @@ // pass. It should be easy to create an analysis pass around it if there // is a need (but D45420 needs to happen first). // +#include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/IntrinsicInst.h" @@ -47,38 +50,6 @@ static const unsigned MaxInterleaveFactor = 16; namespace llvm { -#ifndef NDEBUG -static void debugVectorizationFailure(const StringRef DebugMsg, - Instruction *I) { - dbgs() << "LV: Not vectorizing: " << DebugMsg; - if (I != nullptr) - dbgs() << " " << *I; - else - dbgs() << '.'; - dbgs() << '\n'; -} -#endif - -OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName, - StringRef RemarkName, - Loop *TheLoop, - Instruction *I) { - Value *CodeRegion = TheLoop->getHeader(); - DebugLoc DL = TheLoop->getStartLoc(); - - if (I) { - CodeRegion = I->getParent(); - // If there is no debug location attached to the instruction, revert back to - // using the loop's. - if (I->getDebugLoc()) - DL = I->getDebugLoc(); - } - - OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion); - R << "loop not vectorized: "; - return R; -} - bool LoopVectorizeHints::Hint::validate(unsigned Val) { switch (Kind) { case HK_WIDTH: @@ -88,6 +59,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) { case HK_FORCE: return (Val <= 1); case HK_ISVECTORIZED: + case HK_PREDICATE: return (Val == 0 || Val == 1); } return false; @@ -99,7 +71,9 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L, : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL), Force("vectorize.enable", FK_Undefined, HK_FORCE), - IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) { + IsVectorized("isvectorized", 0, HK_ISVECTORIZED), + Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L), + ORE(ORE) { // Populate values with existing loop metadata. getHintsFromMetadata(); @@ -250,7 +224,7 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { return; unsigned Val = C->getZExtValue(); - Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized}; + Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate}; for (auto H : Hints) { if (Name == H->Name) { if (H->validate(Val)) @@ -435,7 +409,8 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap(); - int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false); + bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize(); + int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false); if (Stride == 1 || Stride == -1) return Stride; return 0; @@ -445,14 +420,6 @@ bool LoopVectorizationLegality::isUniform(Value *V) { return LAI->isUniform(V); } -void LoopVectorizationLegality::reportVectorizationFailure( - const StringRef DebugMsg, const StringRef OREMsg, - const StringRef ORETag, Instruction *I) const { - LLVM_DEBUG(debugVectorizationFailure(DebugMsg, I)); - ORE->emit(createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(), - ORETag, TheLoop, I) << OREMsg); -} - bool LoopVectorizationLegality::canVectorizeOuterLoop() { assert(!TheLoop->empty() && "We are not vectorizing an outer loop."); // Store the result and return it at the end instead of exiting early, in case @@ -467,7 +434,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() { if (!Br) { reportVectorizationFailure("Unsupported basic block terminator", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -486,7 +453,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() { !LI->isLoopHeader(Br->getSuccessor(1))) { reportVectorizationFailure("Unsupported conditional branch", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -500,7 +467,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() { TheLoop /*context outer loop*/)) { reportVectorizationFailure("Outer loop contains divergent loops", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -511,7 +478,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() { if (!setupOuterLoopInductions()) { reportVectorizationFailure("Unsupported outer loop Phi(s)", "Unsupported outer loop Phi(s)", - "UnsupportedPhi"); + "UnsupportedPhi", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -618,7 +585,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { !PhiTy->isPointerTy()) { reportVectorizationFailure("Found a non-int non-pointer PHI", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); return false; } @@ -631,6 +598,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Unsafe cyclic dependencies with header phis are identified during // legalization for reduction, induction and first order // recurrences. + AllowedExit.insert(&I); continue; } @@ -638,7 +606,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (Phi->getNumIncomingValues() != 2) { reportVectorizationFailure("Found an invalid PHI", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood", Phi); + "CFGNotUnderstood", ORE, TheLoop, Phi); return false; } @@ -690,7 +658,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { reportVectorizationFailure("Found an unidentified PHI", "value that could not be identified as " "reduction is used outside the loop", - "NonReductionValueUsedOutsideLoop", Phi); + "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi); return false; } // end of PHI handling @@ -721,11 +689,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { "library call cannot be vectorized. " "Try compiling with -fno-math-errno, -ffast-math, " "or similar flags", - "CantVectorizeLibcall", CI); + "CantVectorizeLibcall", ORE, TheLoop, CI); } else { reportVectorizationFailure("Found a non-intrinsic callsite", "call instruction cannot be vectorized", - "CantVectorizeLibcall", CI); + "CantVectorizeLibcall", ORE, TheLoop, CI); } return false; } @@ -740,7 +708,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) { reportVectorizationFailure("Found unvectorizable intrinsic", "intrinsic instruction cannot be vectorized", - "CantVectorizeIntrinsic", CI); + "CantVectorizeIntrinsic", ORE, TheLoop, CI); return false; } } @@ -753,7 +721,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { isa(I)) { reportVectorizationFailure("Found unvectorizable type", "instruction return type cannot be vectorized", - "CantVectorizeInstructionReturnType", &I); + "CantVectorizeInstructionReturnType", ORE, TheLoop, &I); return false; } @@ -763,7 +731,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (!VectorType::isValidElementType(T)) { reportVectorizationFailure("Store instruction cannot be vectorized", "store instruction cannot be vectorized", - "CantVectorizeStore", ST); + "CantVectorizeStore", ORE, TheLoop, ST); return false; } @@ -773,12 +741,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Arbitrarily try a vector of 2 elements. Type *VecTy = VectorType::get(T, /*NumElements=*/2); assert(VecTy && "did not find vectorized version of stored type"); - unsigned Alignment = getLoadStoreAlignment(ST); - if (!TTI->isLegalNTStore(VecTy, Alignment)) { + const MaybeAlign Alignment = getLoadStoreAlignment(ST); + assert(Alignment && "Alignment should be set"); + if (!TTI->isLegalNTStore(VecTy, *Alignment)) { reportVectorizationFailure( "nontemporal store instruction cannot be vectorized", "nontemporal store instruction cannot be vectorized", - "CantVectorizeNontemporalStore", ST); + "CantVectorizeNontemporalStore", ORE, TheLoop, ST); return false; } } @@ -789,12 +758,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // supported on the target (arbitrarily try a vector of 2 elements). Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2); assert(VecTy && "did not find vectorized version of load type"); - unsigned Alignment = getLoadStoreAlignment(LD); - if (!TTI->isLegalNTLoad(VecTy, Alignment)) { + const MaybeAlign Alignment = getLoadStoreAlignment(LD); + assert(Alignment && "Alignment should be set"); + if (!TTI->isLegalNTLoad(VecTy, *Alignment)) { reportVectorizationFailure( "nontemporal load instruction cannot be vectorized", "nontemporal load instruction cannot be vectorized", - "CantVectorizeNontemporalLoad", LD); + "CantVectorizeNontemporalLoad", ORE, TheLoop, LD); return false; } } @@ -823,7 +793,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { } reportVectorizationFailure("Value cannot be used outside the loop", "value cannot be used outside the loop", - "ValueUsedOutsideLoop", &I); + "ValueUsedOutsideLoop", ORE, TheLoop, &I); return false; } } // next instr. @@ -833,12 +803,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (Inductions.empty()) { reportVectorizationFailure("Did not find one integer induction var", "loop induction variable could not be identified", - "NoInductionVariable"); + "NoInductionVariable", ORE, TheLoop); return false; } else if (!WidestIndTy) { reportVectorizationFailure("Did not find one integer induction var", "integer loop induction variable could not be identified", - "NoIntegerInductionVariable"); + "NoIntegerInductionVariable", ORE, TheLoop); return false; } else { LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); @@ -869,7 +839,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { if (LAI->hasDependenceInvolvingLoopInvariantAddress()) { reportVectorizationFailure("Stores to a uniform address", "write to a loop invariant address could not be vectorized", - "CantVectorizeStoreToLoopInvariantAddress"); + "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); return false; } Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); @@ -905,7 +875,7 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { } bool LoopVectorizationLegality::blockCanBePredicated( - BasicBlock *BB, SmallPtrSetImpl &SafePtrs) { + BasicBlock *BB, SmallPtrSetImpl &SafePtrs, bool PreserveGuards) { const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); for (Instruction &I : *BB) { @@ -924,7 +894,7 @@ bool LoopVectorizationLegality::blockCanBePredicated( // !llvm.mem.parallel_loop_access implies if-conversion safety. // Otherwise, record that the load needs (real or emulated) masking // and let the cost model decide. - if (!IsAnnotatedParallel) + if (!IsAnnotatedParallel || PreserveGuards) MaskedOp.insert(LI); continue; } @@ -953,23 +923,41 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { if (!EnableIfConversion) { reportVectorizationFailure("If-conversion is disabled", "if-conversion is disabled", - "IfConversionDisabled"); + "IfConversionDisabled", + ORE, TheLoop); return false; } assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); - // A list of pointers that we can safely read and write to. + // A list of pointers which are known to be dereferenceable within scope of + // the loop body for each iteration of the loop which executes. That is, + // the memory pointed to can be dereferenced (with the access size implied by + // the value's type) unconditionally within the loop header without + // introducing a new fault. SmallPtrSet SafePointes; // Collect safe addresses. for (BasicBlock *BB : TheLoop->blocks()) { - if (blockNeedsPredication(BB)) + if (!blockNeedsPredication(BB)) { + for (Instruction &I : *BB) + if (auto *Ptr = getLoadStorePointerOperand(&I)) + SafePointes.insert(Ptr); continue; + } - for (Instruction &I : *BB) - if (auto *Ptr = getLoadStorePointerOperand(&I)) - SafePointes.insert(Ptr); + // For a block which requires predication, a address may be safe to access + // in the loop w/o predication if we can prove dereferenceability facts + // sufficient to ensure it'll never fault within the loop. For the moment, + // we restrict this to loads; stores are more complicated due to + // concurrency restrictions. + ScalarEvolution &SE = *PSE.getSE(); + for (Instruction &I : *BB) { + LoadInst *LI = dyn_cast(&I); + if (LI && !mustSuppressSpeculation(*LI) && + isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT)) + SafePointes.insert(LI->getPointerOperand()); + } } // Collect the blocks that need predication. @@ -979,7 +967,8 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { if (!isa(BB->getTerminator())) { reportVectorizationFailure("Loop contains a switch statement", "loop contains a switch statement", - "LoopContainsSwitch", BB->getTerminator()); + "LoopContainsSwitch", ORE, TheLoop, + BB->getTerminator()); return false; } @@ -989,14 +978,16 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { reportVectorizationFailure( "Control flow cannot be substituted for a select", "control flow cannot be substituted for a select", - "NoCFGForSelect", BB->getTerminator()); + "NoCFGForSelect", ORE, TheLoop, + BB->getTerminator()); return false; } } else if (BB != Header && !canIfConvertPHINodes(BB)) { reportVectorizationFailure( "Control flow cannot be substituted for a select", "control flow cannot be substituted for a select", - "NoCFGForSelect", BB->getTerminator()); + "NoCFGForSelect", ORE, TheLoop, + BB->getTerminator()); return false; } } @@ -1026,7 +1017,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, if (!Lp->getLoopPreheader()) { reportVectorizationFailure("Loop doesn't have a legal pre-header", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -1037,7 +1028,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, if (Lp->getNumBackEdges() != 1) { reportVectorizationFailure("The loop must have a single backedge", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -1048,7 +1039,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, if (!Lp->getExitingBlock()) { reportVectorizationFailure("The loop must have an exiting block", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -1061,7 +1052,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, if (Lp->getExitingBlock() != Lp->getLoopLatch()) { reportVectorizationFailure("The exiting block is not the loop latch", "loop control flow is not understood by vectorizer", - "CFGNotUnderstood"); + "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -1124,7 +1115,8 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { if (!canVectorizeOuterLoop()) { reportVectorizationFailure("Unsupported outer loop", "unsupported outer loop", - "UnsupportedOuterLoop"); + "UnsupportedOuterLoop", + ORE, TheLoop); // TODO: Implement DoExtraAnalysis when subsequent legal checks support // outer loops. return false; @@ -1176,7 +1168,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { reportVectorizationFailure("Too many SCEV checks needed", "Too many SCEV assumptions need to be made and checked at runtime", - "TooManySCEVRunTimeChecks"); + "TooManySCEVRunTimeChecks", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else @@ -1190,7 +1182,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { return Result; } -bool LoopVectorizationLegality::canFoldTailByMasking() { +bool LoopVectorizationLegality::prepareToFoldTailByMasking() { LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n"); @@ -1199,22 +1191,21 @@ bool LoopVectorizationLegality::canFoldTailByMasking() { "No primary induction, cannot fold tail by masking", "Missing a primary induction variable in the loop, which is " "needed in order to fold tail by masking as required.", - "NoPrimaryInduction"); + "NoPrimaryInduction", ORE, TheLoop); return false; } - // TODO: handle reductions when tail is folded by masking. - if (!Reductions.empty()) { - reportVectorizationFailure( - "Loop has reductions, cannot fold tail by masking", - "Cannot fold tail by masking in the presence of reductions.", - "ReductionFoldingTailByMasking"); - return false; - } + SmallPtrSet ReductionLiveOuts; - // TODO: handle outside users when tail is folded by masking. + for (auto &Reduction : *getReductionVars()) + ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr()); + + // TODO: handle non-reduction outside users when tail is folded by masking. for (auto *AE : AllowedExit) { - // Check that all users of allowed exit values are inside the loop. + // Check that all users of allowed exit values are inside the loop or + // are the live-out of a reduction. + if (ReductionLiveOuts.count(AE)) + continue; for (User *U : AE->users()) { Instruction *UI = cast(U); if (TheLoop->contains(UI)) @@ -1222,7 +1213,7 @@ bool LoopVectorizationLegality::canFoldTailByMasking() { reportVectorizationFailure( "Cannot fold tail by masking, loop has an outside user for", "Cannot fold tail by masking in the presence of live outs.", - "LiveOutFoldingTailByMasking", UI); + "LiveOutFoldingTailByMasking", ORE, TheLoop, UI); return false; } } @@ -1233,11 +1224,12 @@ bool LoopVectorizationLegality::canFoldTailByMasking() { // Check and mark all blocks for predication, including those that ordinarily // do not need predication such as the header block. for (BasicBlock *BB : TheLoop->blocks()) { - if (!blockCanBePredicated(BB, SafePointers)) { + if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) { reportVectorizationFailure( "Cannot fold tail by masking as required", "control flow cannot be substituted for a select", - "NoCFGForSelect", BB->getTerminator()); + "NoCFGForSelect", ORE, TheLoop, + BB->getTerminator()); return false; } } diff --git a/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 97077cce83e..a5e85f27fab 100644 --- a/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -228,11 +228,11 @@ public: /// Plan how to best vectorize, return the best VF and its cost, or None if /// vectorization and interleaving should be avoided up front. - Optional plan(bool OptForSize, unsigned UserVF); + Optional plan(unsigned UserVF); /// Use the VPlan-native path to plan how to best vectorize, return the best /// VF and its cost. - VectorizationFactor planInVPlanNativePath(bool OptForSize, unsigned UserVF); + VectorizationFactor planInVPlanNativePath(unsigned UserVF); /// Finalize the best decision and dispose of all other VPlans. void setBestPlan(unsigned VF, unsigned UF); diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 46265e3f3e1..8f0bf70f873 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -177,6 +177,14 @@ static cl::opt TinyTripCountVectorThreshold( "value are vectorized only if no scalar iteration overheads " "are incurred.")); +// Indicates that an epilogue is undesired, predication is preferred. +// This means that the vectorizer will try to fold the loop-tail (epilogue) +// into the loop and predicate the loop body accordingly. +static cl::opt PreferPredicateOverEpilog( + "prefer-predicate-over-epilog", cl::init(false), cl::Hidden, + cl::desc("Indicate that an epilogue is undesired, predication should be " + "used instead.")); + static cl::opt MaximizeBandwidth( "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, cl::desc("Maximize bandwidth when selecting vectorization factor which " @@ -347,6 +355,29 @@ static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { : ConstantFP::get(Ty, C); } +/// Returns "best known" trip count for the specified loop \p L as defined by +/// the following procedure: +/// 1) Returns exact trip count if it is known. +/// 2) Returns expected trip count according to profile data if any. +/// 3) Returns upper bound estimate if it is known. +/// 4) Returns None if all of the above failed. +static Optional getSmallBestKnownTC(ScalarEvolution &SE, Loop *L) { + // Check if exact trip count is known. + if (unsigned ExpectedTC = SE.getSmallConstantTripCount(L)) + return ExpectedTC; + + // Check if there is an expected trip count available from profile data. + if (LoopVectorizeWithBlockFrequency) + if (auto EstimatedTC = getLoopEstimatedTripCount(L)) + return EstimatedTC; + + // Check if upper bound estimate is known. + if (unsigned ExpectedTC = SE.getSmallConstantMaxTripCount(L)) + return ExpectedTC; + + return None; +} + namespace llvm { /// InnerLoopVectorizer vectorizes loops which contain only one basic @@ -795,6 +826,59 @@ void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) B.SetCurrentDebugLocation(DebugLoc()); } +/// Write a record \p DebugMsg about vectorization failure to the debug +/// output stream. If \p I is passed, it is an instruction that prevents +/// vectorization. +#ifndef NDEBUG +static void debugVectorizationFailure(const StringRef DebugMsg, + Instruction *I) { + dbgs() << "LV: Not vectorizing: " << DebugMsg; + if (I != nullptr) + dbgs() << " " << *I; + else + dbgs() << '.'; + dbgs() << '\n'; +} +#endif + +/// Create an analysis remark that explains why vectorization failed +/// +/// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p +/// RemarkName is the identifier for the remark. If \p I is passed it is an +/// instruction that prevents vectorization. Otherwise \p TheLoop is used for +/// the location of the remark. \return the remark object that can be +/// streamed to. +static OptimizationRemarkAnalysis createLVAnalysis(const char *PassName, + StringRef RemarkName, Loop *TheLoop, Instruction *I) { + Value *CodeRegion = TheLoop->getHeader(); + DebugLoc DL = TheLoop->getStartLoc(); + + if (I) { + CodeRegion = I->getParent(); + // If there is no debug location attached to the instruction, revert back to + // using the loop's. + if (I->getDebugLoc()) + DL = I->getDebugLoc(); + } + + OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion); + R << "loop not vectorized: "; + return R; +} + +namespace llvm { + +void reportVectorizationFailure(const StringRef DebugMsg, + const StringRef OREMsg, const StringRef ORETag, + OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I) { + LLVM_DEBUG(debugVectorizationFailure(DebugMsg, I)); + LoopVectorizeHints Hints(TheLoop, true /* doesn't matter */, *ORE); + ORE->emit(createLVAnalysis(Hints.vectorizeAnalysisPassName(), + ORETag, TheLoop, I) << OREMsg); +} + +} // end namespace llvm + #ifndef NDEBUG /// \return string containing a file name and a line # for the given loop. static std::string getDebugLocString(const Loop *L) { @@ -836,6 +920,26 @@ void InnerLoopVectorizer::addMetadata(ArrayRef To, namespace llvm { +// Loop vectorization cost-model hints how the scalar epilogue loop should be +// lowered. +enum ScalarEpilogueLowering { + + // The default: allowing scalar epilogues. + CM_ScalarEpilogueAllowed, + + // Vectorization with OptForSize: don't allow epilogues. + CM_ScalarEpilogueNotAllowedOptSize, + + // A special case of vectorisation with OptForSize: loops with a very small + // trip count are considered for vectorization under OptForSize, thereby + // making sure the cost of their loop body is dominant, free of runtime + // guards and scalar iteration overheads. + CM_ScalarEpilogueNotAllowedLowTripLoop, + + // Loop hint predicate indicating an epilogue is undesired. + CM_ScalarEpilogueNotNeededUsePredicate +}; + /// LoopVectorizationCostModel - estimates the expected speedups due to /// vectorization. /// In many cases vectorization is not profitable. This can happen because of @@ -845,20 +949,26 @@ namespace llvm { /// different operations. class LoopVectorizationCostModel { public: - LoopVectorizationCostModel(Loop *L, PredicatedScalarEvolution &PSE, - LoopInfo *LI, LoopVectorizationLegality *Legal, + LoopVectorizationCostModel(ScalarEpilogueLowering SEL, Loop *L, + PredicatedScalarEvolution &PSE, LoopInfo *LI, + LoopVectorizationLegality *Legal, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, const Function *F, const LoopVectorizeHints *Hints, InterleavedAccessInfo &IAI) - : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), - AC(AC), ORE(ORE), TheFunction(F), Hints(Hints), InterleaveInfo(IAI) {} + : ScalarEpilogueStatus(SEL), TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), + TTI(TTI), TLI(TLI), DB(DB), AC(AC), ORE(ORE), TheFunction(F), + Hints(Hints), InterleaveInfo(IAI) {} /// \return An upper bound for the vectorization factor, or None if /// vectorization and interleaving should be avoided up front. - Optional computeMaxVF(bool OptForSize); + Optional computeMaxVF(); + + /// \return True if runtime checks are required for vectorization, and false + /// otherwise. + bool runtimeChecksRequired(); /// \return The most profitable vectorization factor and the cost of that VF. /// This method checks every power of two up to MaxVF. If UserVF is not ZERO @@ -881,8 +991,7 @@ public: /// If interleave count has been specified by metadata it will be returned. /// Otherwise, the interleave count is computed and returned. VF and LoopCost /// are the selected vectorization factor and the cost of the selected VF. - unsigned selectInterleaveCount(bool OptForSize, unsigned VF, - unsigned LoopCost); + unsigned selectInterleaveCount(unsigned VF, unsigned LoopCost); /// Memory access instruction may be vectorized in more than one way. /// Form of instruction after vectorization depends on cost. @@ -897,10 +1006,11 @@ public: /// of a loop. struct RegisterUsage { /// Holds the number of loop invariant values that are used in the loop. - unsigned LoopInvariantRegs; - + /// The key is ClassID of target-provided register class. + SmallMapVector LoopInvariantRegs; /// Holds the maximum number of concurrent live intervals in the loop. - unsigned MaxLocalUsers; + /// The key is ClassID of target-provided register class. + SmallMapVector MaxLocalUsers; }; /// \return Returns information about the register usages of the loop for the @@ -1080,14 +1190,16 @@ public: /// Returns true if the target machine supports masked store operation /// for the given \p DataType and kind of access to \p Ptr. - bool isLegalMaskedStore(Type *DataType, Value *Ptr) { - return Legal->isConsecutivePtr(Ptr) && TTI.isLegalMaskedStore(DataType); + bool isLegalMaskedStore(Type *DataType, Value *Ptr, MaybeAlign Alignment) { + return Legal->isConsecutivePtr(Ptr) && + TTI.isLegalMaskedStore(DataType, Alignment); } /// Returns true if the target machine supports masked load operation /// for the given \p DataType and kind of access to \p Ptr. - bool isLegalMaskedLoad(Type *DataType, Value *Ptr) { - return Legal->isConsecutivePtr(Ptr) && TTI.isLegalMaskedLoad(DataType); + bool isLegalMaskedLoad(Type *DataType, Value *Ptr, MaybeAlign Alignment) { + return Legal->isConsecutivePtr(Ptr) && + TTI.isLegalMaskedLoad(DataType, Alignment); } /// Returns true if the target machine supports masked scatter operation @@ -1157,11 +1269,14 @@ public: /// to handle accesses with gaps, and there is nothing preventing us from /// creating a scalar epilogue. bool requiresScalarEpilogue() const { - return IsScalarEpilogueAllowed && InterleaveInfo.requiresScalarEpilogue(); + return isScalarEpilogueAllowed() && InterleaveInfo.requiresScalarEpilogue(); } - /// Returns true if a scalar epilogue is not allowed due to optsize. - bool isScalarEpilogueAllowed() const { return IsScalarEpilogueAllowed; } + /// Returns true if a scalar epilogue is not allowed due to optsize or a + /// loop hint annotation. + bool isScalarEpilogueAllowed() const { + return ScalarEpilogueStatus == CM_ScalarEpilogueAllowed; + } /// Returns true if all loop blocks should be masked to fold tail loop. bool foldTailByMasking() const { return FoldTailByMasking; } @@ -1187,7 +1302,7 @@ private: /// \return An upper bound for the vectorization factor, larger than zero. /// One is returned if vectorization should best be avoided due to cost. - unsigned computeFeasibleMaxVF(bool OptForSize, unsigned ConstTripCount); + unsigned computeFeasibleMaxVF(unsigned ConstTripCount); /// The vectorization cost is a combination of the cost itself and a boolean /// indicating whether any of the contributing operations will actually @@ -1246,15 +1361,6 @@ private: /// should be used. bool useEmulatedMaskMemRefHack(Instruction *I); - /// Create an analysis remark that explains why vectorization failed - /// - /// \p RemarkName is the identifier for the remark. \return the remark object - /// that can be streamed to. - OptimizationRemarkAnalysis createMissedAnalysis(StringRef RemarkName) { - return createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(), - RemarkName, TheLoop); - } - /// Map of scalar integer values to the smallest bitwidth they can be legally /// represented as. The vector equivalents of these values should be truncated /// to this type. @@ -1270,13 +1376,13 @@ private: SmallPtrSet PredicatedBBsAfterVectorization; /// Records whether it is allowed to have the original scalar loop execute at - /// least once. This may be needed as a fallback loop in case runtime + /// least once. This may be needed as a fallback loop in case runtime /// aliasing/dependence checks fail, or to handle the tail/remainder /// iterations when the trip count is unknown or doesn't divide by the VF, /// or as a peel-loop to handle gaps in interleave-groups. /// Under optsize and when the trip count is very small we don't allow any /// iterations to execute in the scalar loop. - bool IsScalarEpilogueAllowed = true; + ScalarEpilogueLowering ScalarEpilogueStatus = CM_ScalarEpilogueAllowed; /// All blocks of loop are to be masked to fold tail of scalar iterations. bool FoldTailByMasking = false; @@ -1496,7 +1602,7 @@ struct LoopVectorize : public FunctionPass { auto *DT = &getAnalysis().getDomTree(); auto *BFI = &getAnalysis().getBFI(); auto *TLIP = getAnalysisIfAvailable(); - auto *TLI = TLIP ? &TLIP->getTLI() : nullptr; + auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr; auto *AA = &getAnalysis().getAAResults(); auto *AC = &getAnalysis().getAssumptionCache(F); auto *LAA = &getAnalysis(); @@ -2253,12 +2359,11 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, Type *ScalarDataTy = getMemInstValueType(Instr); Type *DataTy = VectorType::get(ScalarDataTy, VF); Value *Ptr = getLoadStorePointerOperand(Instr); - unsigned Alignment = getLoadStoreAlignment(Instr); // An alignment of 0 means target abi alignment. We need to use the scalar's // target abi alignment in such a case. const DataLayout &DL = Instr->getModule()->getDataLayout(); - if (!Alignment) - Alignment = DL.getABITypeAlignment(ScalarDataTy); + const Align Alignment = + DL.getValueOrABITypeAlignment(getLoadStoreAlignment(Instr), ScalarDataTy); unsigned AddressSpace = getLoadStoreAddressSpace(Instr); // Determine if the pointer operand of the access is either consecutive or @@ -2322,8 +2427,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, if (CreateGatherScatter) { Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr; Value *VectorGep = getOrCreateVectorValue(Ptr, Part); - NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment, - MaskPart); + NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, + Alignment.value(), MaskPart); } else { if (Reverse) { // If we store to reverse consecutive memory locations, then we need @@ -2334,10 +2439,11 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, } auto *VecPtr = CreateVecPtr(Part, Ptr); if (isMaskRequired) - NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, - Mask[Part]); + NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, + Alignment.value(), Mask[Part]); else - NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment); + NewSI = + Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment.value()); } addMetadata(NewSI, SI); } @@ -2352,18 +2458,18 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, if (CreateGatherScatter) { Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr; Value *VectorGep = getOrCreateVectorValue(Ptr, Part); - NewLI = Builder.CreateMaskedGather(VectorGep, Alignment, MaskPart, + NewLI = Builder.CreateMaskedGather(VectorGep, Alignment.value(), MaskPart, nullptr, "wide.masked.gather"); addMetadata(NewLI, LI); } else { auto *VecPtr = CreateVecPtr(Part, Ptr); if (isMaskRequired) - NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment, Mask[Part], + NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment.value(), Mask[Part], UndefValue::get(DataTy), "wide.masked.load"); else - NewLI = - Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load"); + NewLI = Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment.value(), + "wide.load"); // Add metadata to the load, but setVectorValue to the reverse shuffle. addMetadata(NewLI, LI); @@ -2615,8 +2721,9 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) { if (C->isZero()) return; - assert(!Cost->foldTailByMasking() && - "Cannot SCEV check stride or overflow when folding tail"); + assert(!BB->getParent()->hasOptSize() && + "Cannot SCEV check stride or overflow when optimizing for size"); + // Create a new block containing the stride check. BB->setName("vector.scevcheck"); auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph"); @@ -2649,7 +2756,20 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) { if (!MemRuntimeCheck) return; - assert(!Cost->foldTailByMasking() && "Cannot check memory when folding tail"); + if (BB->getParent()->hasOptSize()) { + assert(Cost->Hints->getForce() == LoopVectorizeHints::FK_Enabled && + "Cannot emit memory checks when optimizing for size, unless forced " + "to vectorize."); + ORE->emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationCodeSize", + L->getStartLoc(), L->getHeader()) + << "Code-size may be reduced by not forcing " + "vectorization, or by source-code modifications " + "eliminating the need for runtime checks " + "(e.g., adding 'restrict')."; + }); + } + // Create a new block containing the memory check. BB->setName("vector.memcheck"); auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph"); @@ -2666,7 +2786,7 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) { // We currently don't use LoopVersioning for the actual loop cloning but we // still use it to add the noalias metadata. - LVer = llvm::make_unique(*Legal->getLAI(), OrigLoop, LI, DT, + LVer = std::make_unique(*Legal->getLAI(), OrigLoop, LI, DT, PSE.getSE()); LVer->prepareNoAliasMetadata(); } @@ -3598,6 +3718,26 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { setDebugLocFromInst(Builder, LoopExitInst); + // If tail is folded by masking, the vector value to leave the loop should be + // a Select choosing between the vectorized LoopExitInst and vectorized Phi, + // instead of the former. + if (Cost->foldTailByMasking()) { + for (unsigned Part = 0; Part < UF; ++Part) { + Value *VecLoopExitInst = + VectorLoopValueMap.getVectorValue(LoopExitInst, Part); + Value *Sel = nullptr; + for (User *U : VecLoopExitInst->users()) { + if (isa(U)) { + assert(!Sel && "Reduction exit feeding two selects"); + Sel = U; + } else + assert(isa(U) && "Reduction exit must feed Phi's or select"); + } + assert(Sel && "Reduction exit feeds no select"); + VectorLoopValueMap.resetVectorValue(LoopExitInst, Part, Sel); + } + } + // If the vector reduction can be performed in a smaller type, we truncate // then extend the loop exit value to enable InstCombine to evaluate the // entire expression in the smaller type. @@ -4064,7 +4204,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) { case Instruction::FCmp: { // Widen compares. Generate vector compares. bool FCmp = (I.getOpcode() == Instruction::FCmp); - auto *Cmp = dyn_cast(&I); + auto *Cmp = cast(&I); setDebugLocFromInst(Builder, Cmp); for (unsigned Part = 0; Part < UF; ++Part) { Value *A = getOrCreateVectorValue(Cmp->getOperand(0), Part); @@ -4097,7 +4237,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) { case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { - auto *CI = dyn_cast(&I); + auto *CI = cast(&I); setDebugLocFromInst(Builder, CI); /// Vectorize casts. @@ -4421,9 +4561,10 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, unsigne "Widening decision should be ready at this moment"); return WideningDecision == CM_Scalarize; } + const MaybeAlign Alignment = getLoadStoreAlignment(I); return isa(I) ? - !(isLegalMaskedLoad(Ty, Ptr) || isLegalMaskedGather(Ty)) - : !(isLegalMaskedStore(Ty, Ptr) || isLegalMaskedScatter(Ty)); + !(isLegalMaskedLoad(Ty, Ptr, Alignment) || isLegalMaskedGather(Ty)) + : !(isLegalMaskedStore(Ty, Ptr, Alignment) || isLegalMaskedScatter(Ty)); } case Instruction::UDiv: case Instruction::SDiv: @@ -4452,10 +4593,10 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I, // Check if masking is required. // A Group may need masking for one of two reasons: it resides in a block that // needs predication, or it was decided to use masking to deal with gaps. - bool PredicatedAccessRequiresMasking = + bool PredicatedAccessRequiresMasking = Legal->blockNeedsPredication(I->getParent()) && Legal->isMaskRequired(I); - bool AccessWithGapsRequiresMasking = - Group->requiresScalarEpilogue() && !IsScalarEpilogueAllowed; + bool AccessWithGapsRequiresMasking = + Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed(); if (!PredicatedAccessRequiresMasking && !AccessWithGapsRequiresMasking) return true; @@ -4466,8 +4607,9 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I, "Masked interleave-groups for predicated accesses are not enabled."); auto *Ty = getMemInstValueType(I); - return isa(I) ? TTI.isLegalMaskedLoad(Ty) - : TTI.isLegalMaskedStore(Ty); + const MaybeAlign Alignment = getLoadStoreAlignment(I); + return isa(I) ? TTI.isLegalMaskedLoad(Ty, Alignment) + : TTI.isLegalMaskedStore(Ty, Alignment); } bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I, @@ -4675,82 +4817,96 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { Uniforms[VF].insert(Worklist.begin(), Worklist.end()); } -Optional LoopVectorizationCostModel::computeMaxVF(bool OptForSize) { - if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) { - // TODO: It may by useful to do since it's still likely to be dynamically - // uniform if the target can skip. - LLVM_DEBUG( - dbgs() << "LV: Not inserting runtime ptr check for divergent target"); - - ORE->emit( - createMissedAnalysis("CantVersionLoopWithDivergentTarget") - << "runtime pointer checks needed. Not enabled for divergent target"); - - return None; - } - - unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop); - if (!OptForSize) // Remaining checks deal with scalar loop when OptForSize. - return computeFeasibleMaxVF(OptForSize, TC); +bool LoopVectorizationCostModel::runtimeChecksRequired() { + LLVM_DEBUG(dbgs() << "LV: Performing code size checks.\n"); if (Legal->getRuntimePointerChecking()->Need) { - ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize") - << "runtime pointer checks needed. Enable vectorization of this " - "loop with '#pragma clang loop vectorize(enable)' when " - "compiling with -Os/-Oz"); - LLVM_DEBUG( - dbgs() - << "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n"); - return None; + reportVectorizationFailure("Runtime ptr check is required with -Os/-Oz", + "runtime pointer checks needed. Enable vectorization of this " + "loop with '#pragma clang loop vectorize(enable)' when " + "compiling with -Os/-Oz", + "CantVersionLoopWithOptForSize", ORE, TheLoop); + return true; } if (!PSE.getUnionPredicate().getPredicates().empty()) { - ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize") - << "runtime SCEV checks needed. Enable vectorization of this " - "loop with '#pragma clang loop vectorize(enable)' when " - "compiling with -Os/-Oz"); - LLVM_DEBUG( - dbgs() - << "LV: Aborting. Runtime SCEV check is required with -Os/-Oz.\n"); - return None; + reportVectorizationFailure("Runtime SCEV check is required with -Os/-Oz", + "runtime SCEV checks needed. Enable vectorization of this " + "loop with '#pragma clang loop vectorize(enable)' when " + "compiling with -Os/-Oz", + "CantVersionLoopWithOptForSize", ORE, TheLoop); + return true; } // FIXME: Avoid specializing for stride==1 instead of bailing out. if (!Legal->getLAI()->getSymbolicStrides().empty()) { - ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize") - << "runtime stride == 1 checks needed. Enable vectorization of " - "this loop with '#pragma clang loop vectorize(enable)' when " - "compiling with -Os/-Oz"); - LLVM_DEBUG( - dbgs() - << "LV: Aborting. Runtime stride check is required with -Os/-Oz.\n"); + reportVectorizationFailure("Runtime stride check is required with -Os/-Oz", + "runtime stride == 1 checks needed. Enable vectorization of " + "this loop with '#pragma clang loop vectorize(enable)' when " + "compiling with -Os/-Oz", + "CantVersionLoopWithOptForSize", ORE, TheLoop); + return true; + } + + return false; +} + +Optional LoopVectorizationCostModel::computeMaxVF() { + if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) { + // TODO: It may by useful to do since it's still likely to be dynamically + // uniform if the target can skip. + reportVectorizationFailure( + "Not inserting runtime ptr check for divergent target", + "runtime pointer checks needed. Not enabled for divergent target", + "CantVersionLoopWithDivergentTarget", ORE, TheLoop); return None; } - // If we optimize the program for size, avoid creating the tail loop. + unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop); LLVM_DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n'); - if (TC == 1) { - ORE->emit(createMissedAnalysis("SingleIterationLoop") - << "loop trip count is one, irrelevant for vectorization"); - LLVM_DEBUG(dbgs() << "LV: Aborting, single iteration (non) loop.\n"); + reportVectorizationFailure("Single iteration (non) loop", + "loop trip count is one, irrelevant for vectorization", + "SingleIterationLoop", ORE, TheLoop); return None; } - // Record that scalar epilogue is not allowed. - LLVM_DEBUG(dbgs() << "LV: Not allowing scalar epilogue due to -Os/-Oz.\n"); + switch (ScalarEpilogueStatus) { + case CM_ScalarEpilogueAllowed: + return computeFeasibleMaxVF(TC); + case CM_ScalarEpilogueNotNeededUsePredicate: + LLVM_DEBUG( + dbgs() << "LV: vector predicate hint/switch found.\n" + << "LV: Not allowing scalar epilogue, creating predicated " + << "vector loop.\n"); + break; + case CM_ScalarEpilogueNotAllowedLowTripLoop: + // fallthrough as a special case of OptForSize + case CM_ScalarEpilogueNotAllowedOptSize: + if (ScalarEpilogueStatus == CM_ScalarEpilogueNotAllowedOptSize) + LLVM_DEBUG( + dbgs() << "LV: Not allowing scalar epilogue due to -Os/-Oz.\n"); + else + LLVM_DEBUG(dbgs() << "LV: Not allowing scalar epilogue due to low trip " + << "count.\n"); - IsScalarEpilogueAllowed = !OptForSize; + // Bail if runtime checks are required, which are not good when optimising + // for size. + if (runtimeChecksRequired()) + return None; + break; + } + + // Now try the tail folding - // We don't create an epilogue when optimizing for size. // Invalidate interleave groups that require an epilogue if we can't mask // the interleave-group. - if (!useMaskedInterleavedAccesses(TTI)) + if (!useMaskedInterleavedAccesses(TTI)) InterleaveInfo.invalidateGroupsRequiringScalarEpilogue(); - unsigned MaxVF = computeFeasibleMaxVF(OptForSize, TC); - + unsigned MaxVF = computeFeasibleMaxVF(TC); if (TC > 0 && TC % MaxVF == 0) { + // Accept MaxVF if we do not have a tail. LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n"); return MaxVF; } @@ -4759,28 +4915,30 @@ Optional LoopVectorizationCostModel::computeMaxVF(bool OptForSize) { // found modulo the vectorization factor is not zero, try to fold the tail // by masking. // FIXME: look for a smaller MaxVF that does divide TC rather than masking. - if (Legal->canFoldTailByMasking()) { + if (Legal->prepareToFoldTailByMasking()) { FoldTailByMasking = true; return MaxVF; } if (TC == 0) { - ORE->emit( - createMissedAnalysis("UnknownLoopCountComplexCFG") - << "unable to calculate the loop count due to complex control flow"); + reportVectorizationFailure( + "Unable to calculate the loop count due to complex control flow", + "unable to calculate the loop count due to complex control flow", + "UnknownLoopCountComplexCFG", ORE, TheLoop); return None; } - ORE->emit(createMissedAnalysis("NoTailLoopWithOptForSize") - << "cannot optimize for size and vectorize at the same time. " - "Enable vectorization of this loop with '#pragma clang loop " - "vectorize(enable)' when compiling with -Os/-Oz"); + reportVectorizationFailure( + "Cannot optimize for size and vectorize at the same time.", + "cannot optimize for size and vectorize at the same time. " + "Enable vectorization of this loop with '#pragma clang loop " + "vectorize(enable)' when compiling with -Os/-Oz", + "NoTailLoopWithOptForSize", ORE, TheLoop); return None; } unsigned -LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize, - unsigned ConstTripCount) { +LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) { MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI); unsigned SmallestType, WidestType; std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes(); @@ -4818,8 +4976,8 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize, } unsigned MaxVF = MaxVectorSize; - if (TTI.shouldMaximizeVectorBandwidth(OptForSize) || - (MaximizeBandwidth && !OptForSize)) { + if (TTI.shouldMaximizeVectorBandwidth(!isScalarEpilogueAllowed()) || + (MaximizeBandwidth && isScalarEpilogueAllowed())) { // Collect all viable vectorization factors larger than the default MaxVF // (i.e. MaxVectorSize). SmallVector VFs; @@ -4832,9 +4990,14 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize, // Select the largest VF which doesn't require more registers than existing // ones. - unsigned TargetNumRegisters = TTI.getNumberOfRegisters(true); for (int i = RUs.size() - 1; i >= 0; --i) { - if (RUs[i].MaxLocalUsers <= TargetNumRegisters) { + bool Selected = true; + for (auto& pair : RUs[i].MaxLocalUsers) { + unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first); + if (pair.second > TargetNumRegisters) + Selected = false; + } + if (Selected) { MaxVF = VFs[i]; break; } @@ -4886,10 +5049,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(unsigned MaxVF) { } if (!EnableCondStoresVectorization && NumPredStores) { - ORE->emit(createMissedAnalysis("ConditionalStore") - << "store that is conditionally executed prevents vectorization"); - LLVM_DEBUG( - dbgs() << "LV: No vectorization. There are conditional stores.\n"); + reportVectorizationFailure("There are conditional stores.", + "store that is conditionally executed prevents vectorization", + "ConditionalStore", ORE, TheLoop); Width = 1; Cost = ScalarCost; } @@ -4958,8 +5120,7 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() { return {MinWidth, MaxWidth}; } -unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, - unsigned VF, +unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF, unsigned LoopCost) { // -- The interleave heuristics -- // We interleave the loop in order to expose ILP and reduce the loop overhead. @@ -4975,8 +5136,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, // 3. We don't interleave if we think that we will spill registers to memory // due to the increased register pressure. - // When we optimize for size, we don't interleave. - if (OptForSize) + if (!isScalarEpilogueAllowed()) return 1; // We used the distance for the interleave count. @@ -4988,22 +5148,12 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, if (TC > 1 && TC < TinyTripCountInterleaveThreshold) return 1; - unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1); - LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters - << " registers\n"); - - if (VF == 1) { - if (ForceTargetNumScalarRegs.getNumOccurrences() > 0) - TargetNumRegisters = ForceTargetNumScalarRegs; - } else { - if (ForceTargetNumVectorRegs.getNumOccurrences() > 0) - TargetNumRegisters = ForceTargetNumVectorRegs; - } - RegisterUsage R = calculateRegisterUsage({VF})[0]; // We divide by these constants so assume that we have at least one // instruction that uses at least one register. - R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U); + for (auto& pair : R.MaxLocalUsers) { + pair.second = std::max(pair.second, 1U); + } // We calculate the interleave count using the following formula. // Subtract the number of loop invariants from the number of available @@ -5016,13 +5166,35 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, // We also want power of two interleave counts to ensure that the induction // variable of the vector loop wraps to zero, when tail is folded by masking; // this currently happens when OptForSize, in which case IC is set to 1 above. - unsigned IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) / - R.MaxLocalUsers); + unsigned IC = UINT_MAX; - // Don't count the induction variable as interleaved. - if (EnableIndVarRegisterHeur) - IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) / - std::max(1U, (R.MaxLocalUsers - 1))); + for (auto& pair : R.MaxLocalUsers) { + unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first); + LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters + << " registers of " + << TTI.getRegisterClassName(pair.first) << " register class\n"); + if (VF == 1) { + if (ForceTargetNumScalarRegs.getNumOccurrences() > 0) + TargetNumRegisters = ForceTargetNumScalarRegs; + } else { + if (ForceTargetNumVectorRegs.getNumOccurrences() > 0) + TargetNumRegisters = ForceTargetNumVectorRegs; + } + unsigned MaxLocalUsers = pair.second; + unsigned LoopInvariantRegs = 0; + if (R.LoopInvariantRegs.find(pair.first) != R.LoopInvariantRegs.end()) + LoopInvariantRegs = R.LoopInvariantRegs[pair.first]; + + unsigned TmpIC = PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs) / MaxLocalUsers); + // Don't count the induction variable as interleaved. + if (EnableIndVarRegisterHeur) { + TmpIC = + PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs - 1) / + std::max(1U, (MaxLocalUsers - 1))); + } + + IC = std::min(IC, TmpIC); + } // Clamp the interleave ranges to reasonable counts. unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF); @@ -5036,6 +5208,14 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, MaxInterleaveCount = ForceTargetMaxVectorInterleaveFactor; } + // If the trip count is constant, limit the interleave count to be less than + // the trip count divided by VF. + if (TC > 0) { + assert(TC >= VF && "VF exceeds trip count?"); + if ((TC / VF) < MaxInterleaveCount) + MaxInterleaveCount = (TC / VF); + } + // If we did not calculate the cost for VF (because the user selected the VF) // then we calculate the cost of VF here. if (LoopCost == 0) @@ -5044,7 +5224,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, assert(LoopCost && "Non-zero loop cost expected"); // Clamp the calculated IC to be between the 1 and the max interleave count - // that the target allows. + // that the target and trip count allows. if (IC > MaxInterleaveCount) IC = MaxInterleaveCount; else if (IC < 1) @@ -5196,7 +5376,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { const DataLayout &DL = TheFunction->getParent()->getDataLayout(); SmallVector RUs(VFs.size()); - SmallVector MaxUsages(VFs.size(), 0); + SmallVector, 8> MaxUsages(VFs.size()); LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n"); @@ -5226,21 +5406,45 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { // For each VF find the maximum usage of registers. for (unsigned j = 0, e = VFs.size(); j < e; ++j) { - if (VFs[j] == 1) { - MaxUsages[j] = std::max(MaxUsages[j], OpenIntervals.size()); - continue; - } - collectUniformsAndScalars(VFs[j]); // Count the number of live intervals. - unsigned RegUsage = 0; - for (auto Inst : OpenIntervals) { - // Skip ignored values for VF > 1. - if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end() || - isScalarAfterVectorization(Inst, VFs[j])) - continue; - RegUsage += GetRegUsage(Inst->getType(), VFs[j]); + SmallMapVector RegUsage; + + if (VFs[j] == 1) { + for (auto Inst : OpenIntervals) { + unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType()); + if (RegUsage.find(ClassID) == RegUsage.end()) + RegUsage[ClassID] = 1; + else + RegUsage[ClassID] += 1; + } + } else { + collectUniformsAndScalars(VFs[j]); + for (auto Inst : OpenIntervals) { + // Skip ignored values for VF > 1. + if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end()) + continue; + if (isScalarAfterVectorization(Inst, VFs[j])) { + unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType()); + if (RegUsage.find(ClassID) == RegUsage.end()) + RegUsage[ClassID] = 1; + else + RegUsage[ClassID] += 1; + } else { + unsigned ClassID = TTI.getRegisterClassForType(true, Inst->getType()); + if (RegUsage.find(ClassID) == RegUsage.end()) + RegUsage[ClassID] = GetRegUsage(Inst->getType(), VFs[j]); + else + RegUsage[ClassID] += GetRegUsage(Inst->getType(), VFs[j]); + } + } + } + + for (auto& pair : RegUsage) { + if (MaxUsages[j].find(pair.first) != MaxUsages[j].end()) + MaxUsages[j][pair.first] = std::max(MaxUsages[j][pair.first], pair.second); + else + MaxUsages[j][pair.first] = pair.second; } - MaxUsages[j] = std::max(MaxUsages[j], RegUsage); } LLVM_DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " @@ -5251,18 +5455,34 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { } for (unsigned i = 0, e = VFs.size(); i < e; ++i) { - unsigned Invariant = 0; - if (VFs[i] == 1) - Invariant = LoopInvariants.size(); - else { - for (auto Inst : LoopInvariants) - Invariant += GetRegUsage(Inst->getType(), VFs[i]); + SmallMapVector Invariant; + + for (auto Inst : LoopInvariants) { + unsigned Usage = VFs[i] == 1 ? 1 : GetRegUsage(Inst->getType(), VFs[i]); + unsigned ClassID = TTI.getRegisterClassForType(VFs[i] > 1, Inst->getType()); + if (Invariant.find(ClassID) == Invariant.end()) + Invariant[ClassID] = Usage; + else + Invariant[ClassID] += Usage; } - LLVM_DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n'); - LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n'); - LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant - << '\n'); + LLVM_DEBUG({ + dbgs() << "LV(REG): VF = " << VFs[i] << '\n'; + dbgs() << "LV(REG): Found max usage: " << MaxUsages[i].size() + << " item\n"; + for (const auto &pair : MaxUsages[i]) { + dbgs() << "LV(REG): RegisterClass: " + << TTI.getRegisterClassName(pair.first) << ", " << pair.second + << " registers\n"; + } + dbgs() << "LV(REG): Found invariant usage: " << Invariant.size() + << " item\n"; + for (const auto &pair : Invariant) { + dbgs() << "LV(REG): RegisterClass: " + << TTI.getRegisterClassName(pair.first) << ", " << pair.second + << " registers\n"; + } + }); RU.LoopInvariantRegs = Invariant; RU.MaxLocalUsers = MaxUsages[i]; @@ -5511,7 +5731,6 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, Type *ValTy = getMemInstValueType(I); auto SE = PSE.getSE(); - unsigned Alignment = getLoadStoreAlignment(I); unsigned AS = getLoadStoreAddressSpace(I); Value *Ptr = getLoadStorePointerOperand(I); Type *PtrTy = ToVectorTy(Ptr->getType(), VF); @@ -5525,9 +5744,9 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, // Don't pass *I here, since it is scalar but will actually be part of a // vectorized loop where the user of it is a vectorized instruction. - Cost += VF * - TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment, - AS); + const MaybeAlign Alignment = getLoadStoreAlignment(I); + Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), + Alignment ? Alignment->value() : 0, AS); // Get the overhead of the extractelement and insertelement instructions // we might create due to scalarization. @@ -5552,18 +5771,20 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); Type *VectorTy = ToVectorTy(ValTy, VF); - unsigned Alignment = getLoadStoreAlignment(I); Value *Ptr = getLoadStorePointerOperand(I); unsigned AS = getLoadStoreAddressSpace(I); int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) && "Stride should be 1 or -1 for consecutive memory access"); + const MaybeAlign Alignment = getLoadStoreAlignment(I); unsigned Cost = 0; if (Legal->isMaskRequired(I)) - Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); + Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, + Alignment ? Alignment->value() : 0, AS); else - Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, I); + Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, + Alignment ? Alignment->value() : 0, AS, I); bool Reverse = ConsecutiveStride < 0; if (Reverse) @@ -5575,33 +5796,37 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); Type *VectorTy = ToVectorTy(ValTy, VF); - unsigned Alignment = getLoadStoreAlignment(I); + const MaybeAlign Alignment = getLoadStoreAlignment(I); unsigned AS = getLoadStoreAddressSpace(I); if (isa(I)) { return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) + + TTI.getMemoryOpCost(Instruction::Load, ValTy, + Alignment ? Alignment->value() : 0, AS) + TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy); } StoreInst *SI = cast(I); bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand()); return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS) + - (isLoopInvariantStoreValue ? 0 : TTI.getVectorInstrCost( - Instruction::ExtractElement, - VectorTy, VF - 1)); + TTI.getMemoryOpCost(Instruction::Store, ValTy, + Alignment ? Alignment->value() : 0, AS) + + (isLoopInvariantStoreValue + ? 0 + : TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy, + VF - 1)); } unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); Type *VectorTy = ToVectorTy(ValTy, VF); - unsigned Alignment = getLoadStoreAlignment(I); + const MaybeAlign Alignment = getLoadStoreAlignment(I); Value *Ptr = getLoadStorePointerOperand(I); return TTI.getAddressComputationCost(VectorTy) + TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr, - Legal->isMaskRequired(I), Alignment); + Legal->isMaskRequired(I), + Alignment ? Alignment->value() : 0); } unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, @@ -5626,8 +5851,8 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, } // Calculate the cost of the whole interleaved group. - bool UseMaskForGaps = - Group->requiresScalarEpilogue() && !IsScalarEpilogueAllowed; + bool UseMaskForGaps = + Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed(); unsigned Cost = TTI.getInterleavedMemoryOpCost( I->getOpcode(), WideVecTy, Group->getFactor(), Indices, Group->getAlignment(), AS, Legal->isMaskRequired(I), UseMaskForGaps); @@ -5648,11 +5873,12 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I, // moment. if (VF == 1) { Type *ValTy = getMemInstValueType(I); - unsigned Alignment = getLoadStoreAlignment(I); + const MaybeAlign Alignment = getLoadStoreAlignment(I); unsigned AS = getLoadStoreAddressSpace(I); return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I); + TTI.getMemoryOpCost(I->getOpcode(), ValTy, + Alignment ? Alignment->value() : 0, AS, I); } return getWideningCost(I, VF); } @@ -6167,8 +6393,7 @@ static unsigned determineVPlanVF(const unsigned WidestVectorRegBits, } VectorizationFactor -LoopVectorizationPlanner::planInVPlanNativePath(bool OptForSize, - unsigned UserVF) { +LoopVectorizationPlanner::planInVPlanNativePath(unsigned UserVF) { unsigned VF = UserVF; // Outer loop handling: They may require CFG and instruction level // transformations before even evaluating whether vectorization is profitable. @@ -6207,10 +6432,9 @@ LoopVectorizationPlanner::planInVPlanNativePath(bool OptForSize, return VectorizationFactor::Disabled(); } -Optional LoopVectorizationPlanner::plan(bool OptForSize, - unsigned UserVF) { +Optional LoopVectorizationPlanner::plan(unsigned UserVF) { assert(OrigLoop->empty() && "Inner loop expected."); - Optional MaybeMaxVF = CM.computeMaxVF(OptForSize); + Optional MaybeMaxVF = CM.computeMaxVF(); if (!MaybeMaxVF) // Cases that should not to be vectorized nor interleaved. return None; @@ -6840,8 +7064,15 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF, // If the tail is to be folded by masking, the primary induction variable // needs to be represented in VPlan for it to model early-exit masking. - if (CM.foldTailByMasking()) + // Also, both the Phi and the live-out instruction of each reduction are + // required in order to introduce a select between them in VPlan. + if (CM.foldTailByMasking()) { NeedDef.insert(Legal->getPrimaryInduction()); + for (auto &Reduction : *Legal->getReductionVars()) { + NeedDef.insert(Reduction.first); + NeedDef.insert(Reduction.second.getLoopExitInstr()); + } + } // Collect instructions from the original loop that will become trivially dead // in the vectorized loop. We don't need to vectorize these instructions. For @@ -6873,7 +7104,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( // Create a dummy pre-entry VPBasicBlock to start building the VPlan. VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry"); - auto Plan = llvm::make_unique(VPBB); + auto Plan = std::make_unique(VPBB); VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder); // Represent values that will have defs inside VPlan. @@ -6968,6 +7199,18 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VPBlockUtils::disconnectBlocks(PreEntry, Entry); delete PreEntry; + // Finally, if tail is folded by masking, introduce selects between the phi + // and the live-out instruction of each reduction, at the end of the latch. + if (CM.foldTailByMasking()) { + Builder.setInsertPoint(VPBB); + auto *Cond = RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), Plan); + for (auto &Reduction : *Legal->getReductionVars()) { + VPValue *Phi = Plan->getVPValue(Reduction.first); + VPValue *Red = Plan->getVPValue(Reduction.second.getLoopExitInstr()); + Builder.createNaryOp(Instruction::Select, {Cond, Red, Phi}); + } + } + std::string PlanName; raw_string_ostream RSO(PlanName); unsigned VF = Range.Start; @@ -6993,7 +7236,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); // Create new empty VPlan - auto Plan = llvm::make_unique(); + auto Plan = std::make_unique(); // Build hierarchical CFG VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); @@ -7199,6 +7442,20 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues); } +static ScalarEpilogueLowering +getScalarEpilogueLowering(Function *F, Loop *L, LoopVectorizeHints &Hints, + ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { + ScalarEpilogueLowering SEL = CM_ScalarEpilogueAllowed; + if (Hints.getForce() != LoopVectorizeHints::FK_Enabled && + (F->hasOptSize() || + llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI))) + SEL = CM_ScalarEpilogueNotAllowedOptSize; + else if (PreferPredicateOverEpilog || Hints.getPredicate()) + SEL = CM_ScalarEpilogueNotNeededUsePredicate; + + return SEL; +} + // Process the loop in the VPlan-native vectorization path. This path builds // VPlan upfront in the vectorization pipeline, which allows to apply // VPlan-to-VPlan transformations from the very beginning without modifying the @@ -7213,7 +7470,9 @@ static bool processLoopInVPlanNativePath( assert(EnableVPlanNativePath && "VPlan-native path is disabled."); Function *F = L->getHeader()->getParent(); InterleavedAccessInfo IAI(PSE, L, DT, LI, LVL->getLAI()); - LoopVectorizationCostModel CM(L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F, + ScalarEpilogueLowering SEL = getScalarEpilogueLowering(F, L, Hints, PSI, BFI); + + LoopVectorizationCostModel CM(SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F, &Hints, IAI); // Use the planner for outer loop vectorization. // TODO: CM is not used at this point inside the planner. Turn CM into an @@ -7223,15 +7482,8 @@ static bool processLoopInVPlanNativePath( // Get user vectorization factor. const unsigned UserVF = Hints.getWidth(); - // Check the function attributes and profiles to find out if this function - // should be optimized for size. - bool OptForSize = - Hints.getForce() != LoopVectorizeHints::FK_Enabled && - (F->hasOptSize() || - llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI)); - // Plan how to best vectorize, return the best VF and its cost. - const VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF); + const VectorizationFactor VF = LVP.planInVPlanNativePath(UserVF); // If we are stress testing VPlan builds, do not attempt to generate vector // code. Masked vector code generation support will follow soon. @@ -7310,10 +7562,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Check the function attributes and profiles to find out if this function // should be optimized for size. - bool OptForSize = - Hints.getForce() != LoopVectorizeHints::FK_Enabled && - (F->hasOptSize() || - llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI)); + ScalarEpilogueLowering SEL = getScalarEpilogueLowering(F, L, Hints, PSI, BFI); // Entrance to the VPlan-native vectorization path. Outer loops are processed // here. They may require CFG and instruction level transformations before @@ -7325,36 +7574,11 @@ bool LoopVectorizePass::processLoop(Loop *L) { ORE, BFI, PSI, Hints); assert(L->empty() && "Inner loop expected."); + // Check the loop for a trip count threshold: vectorize loops with a tiny trip // count by optimizing for size, to minimize overheads. - // Prefer constant trip counts over profile data, over upper bound estimate. - unsigned ExpectedTC = 0; - bool HasExpectedTC = false; - if (const SCEVConstant *ConstExits = - dyn_cast(SE->getBackedgeTakenCount(L))) { - const APInt &ExitsCount = ConstExits->getAPInt(); - // We are interested in small values for ExpectedTC. Skip over those that - // can't fit an unsigned. - if (ExitsCount.ult(std::numeric_limits::max())) { - ExpectedTC = static_cast(ExitsCount.getZExtValue()) + 1; - HasExpectedTC = true; - } - } - // ExpectedTC may be large because it's bound by a variable. Check - // profiling information to validate we should vectorize. - if (!HasExpectedTC && LoopVectorizeWithBlockFrequency) { - auto EstimatedTC = getLoopEstimatedTripCount(L); - if (EstimatedTC) { - ExpectedTC = *EstimatedTC; - HasExpectedTC = true; - } - } - if (!HasExpectedTC) { - ExpectedTC = SE->getSmallConstantMaxTripCount(L); - HasExpectedTC = (ExpectedTC > 0); - } - - if (HasExpectedTC && ExpectedTC < TinyTripCountVectorThreshold) { + auto ExpectedTC = getSmallBestKnownTC(*SE, L); + if (ExpectedTC && *ExpectedTC < TinyTripCountVectorThreshold) { LLVM_DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " << "This loop is worth vectorizing only if no scalar " << "iteration overheads are incurred."); @@ -7362,10 +7586,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { LLVM_DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); else { LLVM_DEBUG(dbgs() << "\n"); - // Loops with a very small trip count are considered for vectorization - // under OptForSize, thereby making sure the cost of their loop body is - // dominant, free of runtime guards and scalar iteration overheads. - OptForSize = true; + SEL = CM_ScalarEpilogueNotAllowedLowTripLoop; } } @@ -7374,11 +7595,10 @@ bool LoopVectorizePass::processLoop(Loop *L) { // an integer loop and the vector instructions selected are purely integer // vector instructions? if (F->hasFnAttribute(Attribute::NoImplicitFloat)) { - LLVM_DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat" - "attribute is used.\n"); - ORE->emit(createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(), - "NoImplicitFloat", L) - << "loop not vectorized due to NoImplicitFloat attribute"); + reportVectorizationFailure( + "Can't vectorize when the NoImplicitFloat attribute is used", + "loop not vectorized due to NoImplicitFloat attribute", + "NoImplicitFloat", ORE, L); Hints.emitRemarkWithHints(); return false; } @@ -7389,11 +7609,10 @@ bool LoopVectorizePass::processLoop(Loop *L) { // additional fp-math flags can help. if (Hints.isPotentiallyUnsafe() && TTI->isFPVectorizationPotentiallyUnsafe()) { - LLVM_DEBUG( - dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n"); - ORE->emit( - createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(), "UnsafeFP", L) - << "loop not vectorized due to unsafe FP support."); + reportVectorizationFailure( + "Potentially unsafe FP op prevents vectorization", + "loop not vectorized due to unsafe FP support.", + "UnsafeFP", ORE, L); Hints.emitRemarkWithHints(); return false; } @@ -7411,8 +7630,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { } // Use the cost model. - LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, F, - &Hints, IAI); + LoopVectorizationCostModel CM(SEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, + F, &Hints, IAI); CM.collectValuesToIgnore(); // Use the planner for vectorization. @@ -7422,7 +7641,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { unsigned UserVF = Hints.getWidth(); // Plan how to best vectorize, return the best VF and its cost. - Optional MaybeVF = LVP.plan(OptForSize, UserVF); + Optional MaybeVF = LVP.plan(UserVF); VectorizationFactor VF = VectorizationFactor::Disabled(); unsigned IC = 1; @@ -7431,7 +7650,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { if (MaybeVF) { VF = *MaybeVF; // Select the interleave count. - IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost); + IC = CM.selectInterleaveCount(VF.Width, VF.Cost); } // Identify the diagnostic messages that should be produced. @@ -7609,7 +7828,8 @@ bool LoopVectorizePass::runImpl( // The second condition is necessary because, even if the target has no // vector registers, loop vectorization may still enable scalar // interleaving. - if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2) + if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)) && + TTI->getMaxInterleaveFactor(1) < 2) return false; bool Changed = false; diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 27a86c0bca9..974eff9974d 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -194,10 +194,13 @@ static bool allSameBlock(ArrayRef VL) { return true; } -/// \returns True if all of the values in \p VL are constants. +/// \returns True if all of the values in \p VL are constants (but not +/// globals/constant expressions). static bool allConstant(ArrayRef VL) { + // Constant expressions and globals can't be vectorized like normal integer/FP + // constants. for (Value *i : VL) - if (!isa(i)) + if (!isa(i) || isa(i) || isa(i)) return false; return true; } @@ -486,6 +489,7 @@ namespace slpvectorizer { /// Bottom Up SLP Vectorizer. class BoUpSLP { struct TreeEntry; + struct ScheduleData; public: using ValueList = SmallVector; @@ -614,6 +618,15 @@ public: /// vectorizable. We do not vectorize such trees. bool isTreeTinyAndNotFullyVectorizable() const; + /// Assume that a legal-sized 'or'-reduction of shifted/zexted loaded values + /// can be load combined in the backend. Load combining may not be allowed in + /// the IR optimizer, so we do not want to alter the pattern. For example, + /// partially transforming a scalar bswap() pattern into vector code is + /// effectively impossible for the backend to undo. + /// TODO: If load combining is allowed in the IR optimizer, this analysis + /// may not be necessary. + bool isLoadCombineReductionCandidate(unsigned ReductionOpcode) const; + OptimizationRemarkEmitter *getORE() { return ORE; } /// This structure holds any data we need about the edges being traversed @@ -1117,6 +1130,14 @@ public: #endif }; + /// Checks if the instruction is marked for deletion. + bool isDeleted(Instruction *I) const { return DeletedInstructions.count(I); } + + /// Marks values operands for later deletion by replacing them with Undefs. + void eraseInstructions(ArrayRef AV); + + ~BoUpSLP(); + private: /// Checks if all users of \p I are the part of the vectorization tree. bool areAllUsersVectorized(Instruction *I) const; @@ -1153,8 +1174,7 @@ private: /// Set the Builder insert point to one after the last instruction in /// the bundle - void setInsertPointAfterBundle(ArrayRef VL, - const InstructionsState &S); + void setInsertPointAfterBundle(TreeEntry *E); /// \returns a vector from a collection of scalars in \p VL. Value *Gather(ArrayRef VL, VectorType *Ty); @@ -1220,27 +1240,37 @@ private: /// reordering of operands during buildTree_rec() and vectorizeTree(). SmallVector Operands; + /// The main/alternate instruction. + Instruction *MainOp = nullptr; + Instruction *AltOp = nullptr; + public: /// Set this bundle's \p OpIdx'th operand to \p OpVL. - void setOperand(unsigned OpIdx, ArrayRef OpVL, - ArrayRef ReuseShuffleIndices) { + void setOperand(unsigned OpIdx, ArrayRef OpVL) { if (Operands.size() < OpIdx + 1) Operands.resize(OpIdx + 1); assert(Operands[OpIdx].size() == 0 && "Already resized?"); Operands[OpIdx].resize(Scalars.size()); for (unsigned Lane = 0, E = Scalars.size(); Lane != E; ++Lane) - Operands[OpIdx][Lane] = (!ReuseShuffleIndices.empty()) - ? OpVL[ReuseShuffleIndices[Lane]] - : OpVL[Lane]; + Operands[OpIdx][Lane] = OpVL[Lane]; } - /// If there is a user TreeEntry, then set its operand. - void trySetUserTEOperand(const EdgeInfo &UserTreeIdx, - ArrayRef OpVL, - ArrayRef ReuseShuffleIndices) { - if (UserTreeIdx.UserTE) - UserTreeIdx.UserTE->setOperand(UserTreeIdx.EdgeIdx, OpVL, - ReuseShuffleIndices); + /// Set the operands of this bundle in their original order. + void setOperandsInOrder() { + assert(Operands.empty() && "Already initialized?"); + auto *I0 = cast(Scalars[0]); + Operands.resize(I0->getNumOperands()); + unsigned NumLanes = Scalars.size(); + for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands(); + OpIdx != NumOperands; ++OpIdx) { + Operands[OpIdx].resize(NumLanes); + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + auto *I = cast(Scalars[Lane]); + assert(I->getNumOperands() == NumOperands && + "Expected same number of operands"); + Operands[OpIdx][Lane] = I->getOperand(OpIdx); + } + } } /// \returns the \p OpIdx operand of this TreeEntry. @@ -1249,6 +1279,9 @@ private: return Operands[OpIdx]; } + /// \returns the number of operands. + unsigned getNumOperands() const { return Operands.size(); } + /// \return the single \p OpIdx operand. Value *getSingleOperand(unsigned OpIdx) const { assert(OpIdx < Operands.size() && "Off bounds"); @@ -1256,6 +1289,58 @@ private: return Operands[OpIdx][0]; } + /// Some of the instructions in the list have alternate opcodes. + bool isAltShuffle() const { + return getOpcode() != getAltOpcode(); + } + + bool isOpcodeOrAlt(Instruction *I) const { + unsigned CheckedOpcode = I->getOpcode(); + return (getOpcode() == CheckedOpcode || + getAltOpcode() == CheckedOpcode); + } + + /// Chooses the correct key for scheduling data. If \p Op has the same (or + /// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is + /// \p OpValue. + Value *isOneOf(Value *Op) const { + auto *I = dyn_cast(Op); + if (I && isOpcodeOrAlt(I)) + return Op; + return MainOp; + } + + void setOperations(const InstructionsState &S) { + MainOp = S.MainOp; + AltOp = S.AltOp; + } + + Instruction *getMainOp() const { + return MainOp; + } + + Instruction *getAltOp() const { + return AltOp; + } + + /// The main/alternate opcodes for the list of instructions. + unsigned getOpcode() const { + return MainOp ? MainOp->getOpcode() : 0; + } + + unsigned getAltOpcode() const { + return AltOp ? AltOp->getOpcode() : 0; + } + + /// Update operations state of this entry if reorder occurred. + bool updateStateIfReorder() { + if (ReorderIndices.empty()) + return false; + InstructionsState S = getSameOpcode(Scalars, ReorderIndices.front()); + setOperations(S); + return true; + } + #ifndef NDEBUG /// Debug printer. LLVM_DUMP_METHOD void dump() const { @@ -1269,6 +1354,8 @@ private: for (Value *V : Scalars) dbgs().indent(2) << *V << "\n"; dbgs() << "NeedToGather: " << NeedToGather << "\n"; + dbgs() << "MainOp: " << *MainOp << "\n"; + dbgs() << "AltOp: " << *AltOp << "\n"; dbgs() << "VectorizedValue: "; if (VectorizedValue) dbgs() << *VectorizedValue; @@ -1279,12 +1366,12 @@ private: if (ReuseShuffleIndices.empty()) dbgs() << "Emtpy"; else - for (unsigned Idx : ReuseShuffleIndices) - dbgs() << Idx << ", "; + for (unsigned ReuseIdx : ReuseShuffleIndices) + dbgs() << ReuseIdx << ", "; dbgs() << "\n"; dbgs() << "ReorderIndices: "; - for (unsigned Idx : ReorderIndices) - dbgs() << Idx << ", "; + for (unsigned ReorderIdx : ReorderIndices) + dbgs() << ReorderIdx << ", "; dbgs() << "\n"; dbgs() << "UserTreeIndices: "; for (const auto &EInfo : UserTreeIndices) @@ -1295,11 +1382,13 @@ private: }; /// Create a new VectorizableTree entry. - TreeEntry *newTreeEntry(ArrayRef VL, bool Vectorized, + TreeEntry *newTreeEntry(ArrayRef VL, Optional Bundle, + const InstructionsState &S, const EdgeInfo &UserTreeIdx, ArrayRef ReuseShuffleIndices = None, ArrayRef ReorderIndices = None) { - VectorizableTree.push_back(llvm::make_unique(VectorizableTree)); + bool Vectorized = (bool)Bundle; + VectorizableTree.push_back(std::make_unique(VectorizableTree)); TreeEntry *Last = VectorizableTree.back().get(); Last->Idx = VectorizableTree.size() - 1; Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end()); @@ -1307,11 +1396,22 @@ private: Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(), ReuseShuffleIndices.end()); Last->ReorderIndices = ReorderIndices; + Last->setOperations(S); if (Vectorized) { for (int i = 0, e = VL.size(); i != e; ++i) { assert(!getTreeEntry(VL[i]) && "Scalar already in tree!"); - ScalarToTreeEntry[VL[i]] = Last->Idx; + ScalarToTreeEntry[VL[i]] = Last; } + // Update the scheduler bundle to point to this TreeEntry. + unsigned Lane = 0; + for (ScheduleData *BundleMember = Bundle.getValue(); BundleMember; + BundleMember = BundleMember->NextInBundle) { + BundleMember->TE = Last; + BundleMember->Lane = Lane; + ++Lane; + } + assert((!Bundle.getValue() || Lane == VL.size()) && + "Bundle and VL out of sync"); } else { MustGather.insert(VL.begin(), VL.end()); } @@ -1319,7 +1419,6 @@ private: if (UserTreeIdx.UserTE) Last->UserTreeIndices.push_back(UserTreeIdx); - Last->trySetUserTEOperand(UserTreeIdx, VL, ReuseShuffleIndices); return Last; } @@ -1340,19 +1439,19 @@ private: TreeEntry *getTreeEntry(Value *V) { auto I = ScalarToTreeEntry.find(V); if (I != ScalarToTreeEntry.end()) - return VectorizableTree[I->second].get(); + return I->second; return nullptr; } const TreeEntry *getTreeEntry(Value *V) const { auto I = ScalarToTreeEntry.find(V); if (I != ScalarToTreeEntry.end()) - return VectorizableTree[I->second].get(); + return I->second; return nullptr; } /// Maps a specific scalar to its tree entry. - SmallDenseMap ScalarToTreeEntry; + SmallDenseMap ScalarToTreeEntry; /// A list of scalars that we found that we need to keep as scalars. ValueSet MustGather; @@ -1408,15 +1507,14 @@ private: /// This is required to ensure that there are no incorrect collisions in the /// AliasCache, which can happen if a new instruction is allocated at the /// same address as a previously deleted instruction. - void eraseInstruction(Instruction *I) { - I->removeFromParent(); - I->dropAllReferences(); - DeletedInstructions.emplace_back(I); + void eraseInstruction(Instruction *I, bool ReplaceOpsWithUndef = false) { + auto It = DeletedInstructions.try_emplace(I, ReplaceOpsWithUndef).first; + It->getSecond() = It->getSecond() && ReplaceOpsWithUndef; } /// Temporary store for deleted instructions. Instructions will be deleted /// eventually when the BoUpSLP is destructed. - SmallVector DeletedInstructions; + DenseMap DeletedInstructions; /// A list of values that need to extracted out of the tree. /// This list holds pairs of (Internal Scalar : External User). External User @@ -1453,6 +1551,8 @@ private: UnscheduledDepsInBundle = UnscheduledDeps; clearDependencies(); OpValue = OpVal; + TE = nullptr; + Lane = -1; } /// Returns true if the dependency information has been calculated. @@ -1559,6 +1659,12 @@ private: /// Opcode of the current instruction in the schedule data. Value *OpValue = nullptr; + + /// The TreeEntry that this instruction corresponds to. + TreeEntry *TE = nullptr; + + /// The lane of this node in the TreeEntry. + int Lane = -1; }; #ifndef NDEBUG @@ -1633,10 +1739,9 @@ private: continue; } // Handle the def-use chain dependencies. - for (Use &U : BundleMember->Inst->operands()) { - auto *I = dyn_cast(U.get()); - if (!I) - continue; + + // Decrement the unscheduled counter and insert to ready list if ready. + auto &&DecrUnsched = [this, &ReadyList](Instruction *I) { doForAllOpcodes(I, [&ReadyList](ScheduleData *OpDef) { if (OpDef && OpDef->hasValidDependencies() && OpDef->incrementUnscheduledDeps(-1) == 0) { @@ -1651,6 +1756,24 @@ private: << "SLP: gets ready (def): " << *DepBundle << "\n"); } }); + }; + + // If BundleMember is a vector bundle, its operands may have been + // reordered duiring buildTree(). We therefore need to get its operands + // through the TreeEntry. + if (TreeEntry *TE = BundleMember->TE) { + int Lane = BundleMember->Lane; + assert(Lane >= 0 && "Lane not set"); + for (unsigned OpIdx = 0, NumOperands = TE->getNumOperands(); + OpIdx != NumOperands; ++OpIdx) + if (auto *I = dyn_cast(TE->getOperand(OpIdx)[Lane])) + DecrUnsched(I); + } else { + // If BundleMember is a stand-alone instruction, no operand reordering + // has taken place, so we directly access its operands. + for (Use &U : BundleMember->Inst->operands()) + if (auto *I = dyn_cast(U.get())) + DecrUnsched(I); } // Handle the memory dependencies. for (ScheduleData *MemoryDepSD : BundleMember->MemoryDependencies) { @@ -1697,8 +1820,11 @@ private: /// Checks if a bundle of instructions can be scheduled, i.e. has no /// cyclic dependencies. This is only a dry-run, no instructions are /// actually moved at this stage. - bool tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, - const InstructionsState &S); + /// \returns the scheduling bundle. The returned Optional value is non-None + /// if \p VL is allowed to be scheduled. + Optional + tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, + const InstructionsState &S); /// Un-bundles a group of instructions. void cancelScheduling(ArrayRef VL, Value *OpValue); @@ -1945,6 +2071,30 @@ template <> struct DOTGraphTraits : public DefaultDOTGraphTraits { } // end namespace llvm +BoUpSLP::~BoUpSLP() { + for (const auto &Pair : DeletedInstructions) { + // Replace operands of ignored instructions with Undefs in case if they were + // marked for deletion. + if (Pair.getSecond()) { + Value *Undef = UndefValue::get(Pair.getFirst()->getType()); + Pair.getFirst()->replaceAllUsesWith(Undef); + } + Pair.getFirst()->dropAllReferences(); + } + for (const auto &Pair : DeletedInstructions) { + assert(Pair.getFirst()->use_empty() && + "trying to erase instruction with users."); + Pair.getFirst()->eraseFromParent(); + } +} + +void BoUpSLP::eraseInstructions(ArrayRef AV) { + for (auto *V : AV) { + if (auto *I = dyn_cast(V)) + eraseInstruction(I, /*ReplaceWithUndef=*/true); + }; +} + void BoUpSLP::buildTree(ArrayRef Roots, ArrayRef UserIgnoreLst) { ExtraValueToDebugLocsMap ExternallyUsedValues; @@ -2026,28 +2176,28 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, InstructionsState S = getSameOpcode(VL); if (Depth == RecursionMaxDepth) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } // Don't handle vectors. if (S.OpValue->getType()->isVectorTy()) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } if (StoreInst *SI = dyn_cast(S.OpValue)) if (SI->getValueOperand()->getType()->isVectorTy()) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } // If all of the operands are identical or constant we have a simple solution. if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode()) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } @@ -2055,11 +2205,11 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // the same block. // Don't vectorize ephemeral values. - for (unsigned i = 0, e = VL.size(); i != e; ++i) { - if (EphValues.count(VL[i])) { - LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] + for (Value *V : VL) { + if (EphValues.count(V)) { + LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V << ") is ephemeral.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } } @@ -2069,7 +2219,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n"); if (!E->isSame(VL)) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } // Record the reuse of the tree node. FIXME, currently this is only used to @@ -2077,19 +2227,18 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, E->UserTreeIndices.push_back(UserTreeIdx); LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue << ".\n"); - E->trySetUserTEOperand(UserTreeIdx, VL, None); return; } // Check that none of the instructions in the bundle are already in the tree. - for (unsigned i = 0, e = VL.size(); i != e; ++i) { - auto *I = dyn_cast(VL[i]); + for (Value *V : VL) { + auto *I = dyn_cast(V); if (!I) continue; if (getTreeEntry(I)) { - LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] + LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V << ") is already in tree.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } } @@ -2097,10 +2246,10 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // If any of the scalars is marked as a value that needs to stay scalar, then // we need to gather the scalars. // The reduction nodes (stored in UserIgnoreList) also should stay scalar. - for (unsigned i = 0, e = VL.size(); i != e; ++i) { - if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) { + for (Value *V : VL) { + if (MustGather.count(V) || is_contained(UserIgnoreList, V)) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } } @@ -2114,7 +2263,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // Don't go into unreachable blocks. They may contain instructions with // dependency cycles which confuse the final scheduling. LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } @@ -2128,13 +2277,15 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (Res.second) UniqueValues.emplace_back(V); } - if (UniqueValues.size() == VL.size()) { + size_t NumUniqueScalarValues = UniqueValues.size(); + if (NumUniqueScalarValues == VL.size()) { ReuseShuffleIndicies.clear(); } else { LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n"); - if (UniqueValues.size() <= 1 || !llvm::isPowerOf2_32(UniqueValues.size())) { + if (NumUniqueScalarValues <= 1 || + !llvm::isPowerOf2_32(NumUniqueScalarValues)) { LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; } VL = UniqueValues; @@ -2142,16 +2293,18 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, auto &BSRef = BlocksSchedules[BB]; if (!BSRef) - BSRef = llvm::make_unique(BB); + BSRef = std::make_unique(BB); BlockScheduling &BS = *BSRef.get(); - if (!BS.tryScheduleBundle(VL, this, S)) { + Optional Bundle = BS.tryScheduleBundle(VL, this, S); + if (!Bundle) { LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n"); assert((!BS.getScheduleData(VL0) || !BS.getScheduleData(VL0)->isPartOfBundle()) && "tryScheduleBundle should cancelScheduling on failure"); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); return; } LLVM_DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n"); @@ -2160,7 +2313,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, (unsigned) Instruction::ShuffleVector : S.getOpcode(); switch (ShuffleOrOp) { case Instruction::PHI: { - PHINode *PH = dyn_cast(VL0); + auto *PH = cast(VL0); // Check for terminator values (e.g. invoke). for (unsigned j = 0; j < VL.size(); ++j) @@ -2172,23 +2325,29 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, LLVM_DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (terminator use).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + TreeEntry *TE = + newTreeEntry(VL, Bundle, S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n"); + // Keeps the reordered operands to avoid code duplication. + SmallVector OperandsVec; for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) { ValueList Operands; // Prepare the operand vector. for (Value *j : VL) Operands.push_back(cast(j)->getIncomingValueForBlock( PH->getIncomingBlock(i))); - - buildTree_rec(Operands, Depth + 1, {TE, i}); + TE->setOperand(i, Operands); + OperandsVec.push_back(Operands); } + for (unsigned OpIdx = 0, OpE = OperandsVec.size(); OpIdx != OpE; ++OpIdx) + buildTree_rec(OperandsVec[OpIdx], Depth + 1, {TE, OpIdx}); return; } case Instruction::ExtractValue: @@ -2198,13 +2357,13 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (Reuse) { LLVM_DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n"); ++NumOpsWantToKeepOriginalOrder; - newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, + newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); // This is a special case, as it does not gather, but at the same time // we are not extending buildTree_rec() towards the operands. ValueList Op0; Op0.assign(VL.size(), VL0->getOperand(0)); - VectorizableTree.back()->setOperand(0, Op0, ReuseShuffleIndicies); + VectorizableTree.back()->setOperand(0, Op0); return; } if (!CurrentOrder.empty()) { @@ -2220,17 +2379,19 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, auto StoredCurrentOrderAndNum = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first; ++StoredCurrentOrderAndNum->getSecond(); - newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, ReuseShuffleIndicies, + newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies, StoredCurrentOrderAndNum->getFirst()); // This is a special case, as it does not gather, but at the same time // we are not extending buildTree_rec() towards the operands. ValueList Op0; Op0.assign(VL.size(), VL0->getOperand(0)); - VectorizableTree.back()->setOperand(0, Op0, ReuseShuffleIndicies); + VectorizableTree.back()->setOperand(0, Op0); return; } LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n"); - newTreeEntry(VL, /*Vectorized=*/false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); BS.cancelScheduling(VL, VL0); return; } @@ -2246,7 +2407,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (DL->getTypeSizeInBits(ScalarTy) != DL->getTypeAllocSizeInBits(ScalarTy)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n"); return; } @@ -2259,7 +2421,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, auto *L = cast(V); if (!L->isSimple()) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n"); return; } @@ -2289,15 +2452,18 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (CurrentOrder.empty()) { // Original loads are consecutive and does not require reordering. ++NumOpsWantToKeepOriginalOrder; - newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, - ReuseShuffleIndicies); + TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, + UserTreeIdx, ReuseShuffleIndicies); + TE->setOperandsInOrder(); LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n"); } else { // Need to reorder. auto I = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first; ++I->getSecond(); - newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, - ReuseShuffleIndicies, I->getFirst()); + TreeEntry *TE = + newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies, I->getFirst()); + TE->setOperandsInOrder(); LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n"); } return; @@ -2306,7 +2472,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); return; } case Instruction::ZExt: @@ -2322,24 +2489,27 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, case Instruction::FPTrunc: case Instruction::BitCast: { Type *SrcTy = VL0->getOperand(0)->getType(); - for (unsigned i = 0; i < VL.size(); ++i) { - Type *Ty = cast(VL[i])->getOperand(0)->getType(); + for (Value *V : VL) { + Type *Ty = cast(V)->getOperand(0)->getType(); if (Ty != SrcTy || !isValidElementType(Ty)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n"); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n"); + TE->setOperandsInOrder(); for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { ValueList Operands; // Prepare the operand vector. - for (Value *j : VL) - Operands.push_back(cast(j)->getOperand(i)); + for (Value *V : VL) + Operands.push_back(cast(V)->getOperand(i)); buildTree_rec(Operands, Depth + 1, {TE, i}); } @@ -2351,19 +2521,21 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, CmpInst::Predicate P0 = cast(VL0)->getPredicate(); CmpInst::Predicate SwapP0 = CmpInst::getSwappedPredicate(P0); Type *ComparedTy = VL0->getOperand(0)->getType(); - for (unsigned i = 1, e = VL.size(); i < e; ++i) { - CmpInst *Cmp = cast(VL[i]); + for (Value *V : VL) { + CmpInst *Cmp = cast(V); if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) || Cmp->getOperand(0)->getType() != ComparedTy) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n"); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n"); ValueList Left, Right; @@ -2384,7 +2556,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, Right.push_back(RHS); } } - + TE->setOperand(0, Left); + TE->setOperand(1, Right); buildTree_rec(Left, Depth + 1, {TE, 0}); buildTree_rec(Right, Depth + 1, {TE, 1}); return; @@ -2409,7 +2582,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, case Instruction::And: case Instruction::Or: case Instruction::Xor: { - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n"); // Sort operands of the instructions so that each side is more likely to @@ -2417,11 +2591,14 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (isa(VL0) && VL0->isCommutative()) { ValueList Left, Right; reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE); + TE->setOperand(0, Left); + TE->setOperand(1, Right); buildTree_rec(Left, Depth + 1, {TE, 0}); buildTree_rec(Right, Depth + 1, {TE, 1}); return; } + TE->setOperandsInOrder(); for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { ValueList Operands; // Prepare the operand vector. @@ -2434,11 +2611,12 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } case Instruction::GetElementPtr: { // We don't combine GEPs with complicated (nested) indexing. - for (unsigned j = 0; j < VL.size(); ++j) { - if (cast(VL[j])->getNumOperands() != 2) { + for (Value *V : VL) { + if (cast(V)->getNumOperands() != 2) { LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); return; } } @@ -2446,58 +2624,64 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // We can't combine several GEPs into one vector if they operate on // different types. Type *Ty0 = VL0->getOperand(0)->getType(); - for (unsigned j = 0; j < VL.size(); ++j) { - Type *CurTy = cast(VL[j])->getOperand(0)->getType(); + for (Value *V : VL) { + Type *CurTy = cast(V)->getOperand(0)->getType(); if (Ty0 != CurTy) { LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); return; } } // We don't combine GEPs with non-constant indexes. - for (unsigned j = 0; j < VL.size(); ++j) { - auto Op = cast(VL[j])->getOperand(1); + for (Value *V : VL) { + auto Op = cast(V)->getOperand(1); if (!isa(Op)) { LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n"); + TE->setOperandsInOrder(); for (unsigned i = 0, e = 2; i < e; ++i) { ValueList Operands; // Prepare the operand vector. - for (Value *j : VL) - Operands.push_back(cast(j)->getOperand(i)); + for (Value *V : VL) + Operands.push_back(cast(V)->getOperand(i)); buildTree_rec(Operands, Depth + 1, {TE, i}); } return; } case Instruction::Store: { - // Check if the stores are consecutive or of we need to swizzle them. + // Check if the stores are consecutive or if we need to swizzle them. for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); return; } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n"); ValueList Operands; - for (Value *j : VL) - Operands.push_back(cast(j)->getOperand(0)); - + for (Value *V : VL) + Operands.push_back(cast(V)->getOperand(0)); + TE->setOperandsInOrder(); buildTree_rec(Operands, Depth + 1, {TE, 0}); return; } @@ -2509,7 +2693,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); if (!isTriviallyVectorizable(ID)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; } @@ -2519,14 +2704,15 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, for (unsigned j = 0; j != NumArgs; ++j) if (hasVectorInstrinsicScalarOpd(ID, j)) ScalarArgs[j] = CI->getArgOperand(j); - for (unsigned i = 1, e = VL.size(); i != e; ++i) { - CallInst *CI2 = dyn_cast(VL[i]); + for (Value *V : VL) { + CallInst *CI2 = dyn_cast(V); if (!CI2 || CI2->getCalledFunction() != Int || getVectorIntrinsicIDForCall(CI2, TLI) != ID || !CI->hasIdenticalOperandBundleSchema(*CI2)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); - LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i] + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); + LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *V << "\n"); return; } @@ -2537,7 +2723,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, Value *A1J = CI2->getArgOperand(j); if (ScalarArgs[j] != A1J) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI << " argument " << ScalarArgs[j] << "!=" << A1J << "\n"); @@ -2551,19 +2738,22 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, CI->op_begin() + CI->getBundleOperandsEndIndex(), CI2->op_begin() + CI2->getBundleOperandsStartIndex())) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" - << *CI << "!=" << *VL[i] << '\n'); + << *CI << "!=" << *V << '\n'); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); + TE->setOperandsInOrder(); for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) { ValueList Operands; // Prepare the operand vector. - for (Value *j : VL) { - CallInst *CI2 = dyn_cast(j); + for (Value *V : VL) { + auto *CI2 = cast(V); Operands.push_back(CI2->getArgOperand(i)); } buildTree_rec(Operands, Depth + 1, {TE, i}); @@ -2575,27 +2765,32 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // then do not vectorize this instruction. if (!S.isAltShuffle()) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); return; } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n"); // Reorder operands if reordering would enable vectorization. if (isa(VL0)) { ValueList Left, Right; reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE); + TE->setOperand(0, Left); + TE->setOperand(1, Right); buildTree_rec(Left, Depth + 1, {TE, 0}); buildTree_rec(Right, Depth + 1, {TE, 1}); return; } + TE->setOperandsInOrder(); for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { ValueList Operands; // Prepare the operand vector. - for (Value *j : VL) - Operands.push_back(cast(j)->getOperand(i)); + for (Value *V : VL) + Operands.push_back(cast(V)->getOperand(i)); buildTree_rec(Operands, Depth + 1, {TE, i}); } @@ -2603,7 +2798,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } default: BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n"); return; } @@ -2738,7 +2934,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { return ReuseShuffleCost + TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0); } - if (getSameOpcode(VL).getOpcode() == Instruction::ExtractElement && + if (E->getOpcode() == Instruction::ExtractElement && allSameType(VL) && allSameBlock(VL)) { Optional ShuffleKind = isShuffle(VL); if (ShuffleKind.hasValue()) { @@ -2761,11 +2957,10 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { } return ReuseShuffleCost + getGatherCost(VL); } - InstructionsState S = getSameOpcode(VL); - assert(S.getOpcode() && allSameType(VL) && allSameBlock(VL) && "Invalid VL"); - Instruction *VL0 = cast(S.OpValue); - unsigned ShuffleOrOp = S.isAltShuffle() ? - (unsigned) Instruction::ShuffleVector : S.getOpcode(); + assert(E->getOpcode() && allSameType(VL) && allSameBlock(VL) && "Invalid VL"); + Instruction *VL0 = E->getMainOp(); + unsigned ShuffleOrOp = + E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode(); switch (ShuffleOrOp) { case Instruction::PHI: return 0; @@ -2851,7 +3046,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { case Instruction::BitCast: { Type *SrcTy = VL0->getOperand(0)->getType(); int ScalarEltCost = - TTI->getCastInstrCost(S.getOpcode(), ScalarTy, SrcTy, VL0); + TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy, VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } @@ -2864,7 +3059,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { // Check if the values are candidates to demote. if (!MinBWs.count(VL0) || VecTy != SrcVecTy) { VecCost = ReuseShuffleCost + - TTI->getCastInstrCost(S.getOpcode(), VecTy, SrcVecTy, VL0); + TTI->getCastInstrCost(E->getOpcode(), VecTy, SrcVecTy, VL0); } return VecCost - ScalarCost; } @@ -2872,14 +3067,14 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { case Instruction::ICmp: case Instruction::Select: { // Calculate the cost of this instruction. - int ScalarEltCost = TTI->getCmpSelInstrCost(S.getOpcode(), ScalarTy, + int ScalarEltCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(), VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size()); int ScalarCost = VecTy->getNumElements() * ScalarEltCost; - int VecCost = TTI->getCmpSelInstrCost(S.getOpcode(), VecTy, MaskTy, VL0); + int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, VL0); return ReuseShuffleCost + VecCost - ScalarCost; } case Instruction::FNeg: @@ -2940,12 +3135,12 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { SmallVector Operands(VL0->operand_values()); int ScalarEltCost = TTI->getArithmeticInstrCost( - S.getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands); + E->getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } int ScalarCost = VecTy->getNumElements() * ScalarEltCost; - int VecCost = TTI->getArithmeticInstrCost(S.getOpcode(), VecTy, Op1VK, + int VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands); return ReuseShuffleCost + VecCost - ScalarCost; } @@ -3027,11 +3222,11 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { return ReuseShuffleCost + VecCallCost - ScalarCallCost; } case Instruction::ShuffleVector: { - assert(S.isAltShuffle() && - ((Instruction::isBinaryOp(S.getOpcode()) && - Instruction::isBinaryOp(S.getAltOpcode())) || - (Instruction::isCast(S.getOpcode()) && - Instruction::isCast(S.getAltOpcode()))) && + assert(E->isAltShuffle() && + ((Instruction::isBinaryOp(E->getOpcode()) && + Instruction::isBinaryOp(E->getAltOpcode())) || + (Instruction::isCast(E->getOpcode()) && + Instruction::isCast(E->getAltOpcode()))) && "Invalid Shuffle Vector Operand"); int ScalarCost = 0; if (NeedToShuffleReuses) { @@ -3046,25 +3241,25 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { I, TargetTransformInfo::TCK_RecipThroughput); } } - for (Value *i : VL) { - Instruction *I = cast(i); - assert(S.isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); + for (Value *V : VL) { + Instruction *I = cast(V); + assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); ScalarCost += TTI->getInstructionCost( I, TargetTransformInfo::TCK_RecipThroughput); } // VecCost is equal to sum of the cost of creating 2 vectors // and the cost of creating shuffle. int VecCost = 0; - if (Instruction::isBinaryOp(S.getOpcode())) { - VecCost = TTI->getArithmeticInstrCost(S.getOpcode(), VecTy); - VecCost += TTI->getArithmeticInstrCost(S.getAltOpcode(), VecTy); + if (Instruction::isBinaryOp(E->getOpcode())) { + VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy); + VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy); } else { - Type *Src0SclTy = S.MainOp->getOperand(0)->getType(); - Type *Src1SclTy = S.AltOp->getOperand(0)->getType(); + Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType(); + Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType(); VectorType *Src0Ty = VectorType::get(Src0SclTy, VL.size()); VectorType *Src1Ty = VectorType::get(Src1SclTy, VL.size()); - VecCost = TTI->getCastInstrCost(S.getOpcode(), VecTy, Src0Ty); - VecCost += TTI->getCastInstrCost(S.getAltOpcode(), VecTy, Src1Ty); + VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty); + VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty); } VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, 0); return ReuseShuffleCost + VecCost - ScalarCost; @@ -3098,6 +3293,43 @@ bool BoUpSLP::isFullyVectorizableTinyTree() const { return true; } +bool BoUpSLP::isLoadCombineReductionCandidate(unsigned RdxOpcode) const { + if (RdxOpcode != Instruction::Or) + return false; + + unsigned NumElts = VectorizableTree[0]->Scalars.size(); + Value *FirstReduced = VectorizableTree[0]->Scalars[0]; + + // Look past the reduction to find a source value. Arbitrarily follow the + // path through operand 0 of any 'or'. Also, peek through optional + // shift-left-by-constant. + Value *ZextLoad = FirstReduced; + while (match(ZextLoad, m_Or(m_Value(), m_Value())) || + match(ZextLoad, m_Shl(m_Value(), m_Constant()))) + ZextLoad = cast(ZextLoad)->getOperand(0); + + // Check if the input to the reduction is an extended load. + Value *LoadPtr; + if (!match(ZextLoad, m_ZExt(m_Load(m_Value(LoadPtr))))) + return false; + + // Require that the total load bit width is a legal integer type. + // For example, <8 x i8> --> i64 is a legal integer on a 64-bit target. + // But <16 x i8> --> i128 is not, so the backend probably can't reduce it. + Type *SrcTy = LoadPtr->getType()->getPointerElementType(); + unsigned LoadBitWidth = SrcTy->getIntegerBitWidth() * NumElts; + LLVMContext &Context = FirstReduced->getContext(); + if (!TTI->isTypeLegal(IntegerType::get(Context, LoadBitWidth))) + return false; + + // Everything matched - assume that we can fold the whole sequence using + // load combining. + LLVM_DEBUG(dbgs() << "SLP: Assume load combining for scalar reduction of " + << *(cast(FirstReduced)) << "\n"); + + return true; +} + bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() const { // We can vectorize the tree if its size is greater than or equal to the // minimum size specified by the MinTreeSize command line option. @@ -3319,16 +3551,16 @@ void BoUpSLP::reorderInputsAccordingToOpcode( Right = Ops.getVL(1); } -void BoUpSLP::setInsertPointAfterBundle(ArrayRef VL, - const InstructionsState &S) { +void BoUpSLP::setInsertPointAfterBundle(TreeEntry *E) { // Get the basic block this bundle is in. All instructions in the bundle // should be in this block. - auto *Front = cast(S.OpValue); + auto *Front = E->getMainOp(); auto *BB = Front->getParent(); - assert(llvm::all_of(make_range(VL.begin(), VL.end()), [=](Value *V) -> bool { - auto *I = cast(V); - return !S.isOpcodeOrAlt(I) || I->getParent() == BB; - })); + assert(llvm::all_of(make_range(E->Scalars.begin(), E->Scalars.end()), + [=](Value *V) -> bool { + auto *I = cast(V); + return !E->isOpcodeOrAlt(I) || I->getParent() == BB; + })); // The last instruction in the bundle in program order. Instruction *LastInst = nullptr; @@ -3339,7 +3571,7 @@ void BoUpSLP::setInsertPointAfterBundle(ArrayRef VL, // bundle. The end of the bundle is marked by null ScheduleData. if (BlocksSchedules.count(BB)) { auto *Bundle = - BlocksSchedules[BB]->getScheduleData(isOneOf(S, VL.back())); + BlocksSchedules[BB]->getScheduleData(E->isOneOf(E->Scalars.back())); if (Bundle && Bundle->isPartOfBundle()) for (; Bundle; Bundle = Bundle->NextInBundle) if (Bundle->OpValue == Bundle->Inst) @@ -3365,14 +3597,15 @@ void BoUpSLP::setInsertPointAfterBundle(ArrayRef VL, // we both exit early from buildTree_rec and that the bundle be out-of-order // (causing us to iterate all the way to the end of the block). if (!LastInst) { - SmallPtrSet Bundle(VL.begin(), VL.end()); + SmallPtrSet Bundle(E->Scalars.begin(), E->Scalars.end()); for (auto &I : make_range(BasicBlock::iterator(Front), BB->end())) { - if (Bundle.erase(&I) && S.isOpcodeOrAlt(&I)) + if (Bundle.erase(&I) && E->isOpcodeOrAlt(&I)) LastInst = &I; if (Bundle.empty()) break; } } + assert(LastInst && "Failed to find last instruction in bundle"); // Set the insertion point after the last instruction in the bundle. Set the // debug location to Front. @@ -3385,7 +3618,7 @@ Value *BoUpSLP::Gather(ArrayRef VL, VectorType *Ty) { // Generate the 'InsertElement' instruction. for (unsigned i = 0; i < Ty->getNumElements(); ++i) { Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); - if (Instruction *Insrt = dyn_cast(Vec)) { + if (auto *Insrt = dyn_cast(Vec)) { GatherSeq.insert(Insrt); CSEBlocks.insert(Insrt->getParent()); @@ -3494,8 +3727,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { return E->VectorizedValue; } - InstructionsState S = getSameOpcode(E->Scalars); - Instruction *VL0 = cast(S.OpValue); + Instruction *VL0 = E->getMainOp(); Type *ScalarTy = VL0->getType(); if (StoreInst *SI = dyn_cast(VL0)) ScalarTy = SI->getValueOperand()->getType(); @@ -3504,7 +3736,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); if (E->NeedToGather) { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); auto *V = Gather(E->Scalars, VecTy); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), @@ -3518,11 +3750,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { return V; } - unsigned ShuffleOrOp = S.isAltShuffle() ? - (unsigned) Instruction::ShuffleVector : S.getOpcode(); + unsigned ShuffleOrOp = + E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode(); switch (ShuffleOrOp) { case Instruction::PHI: { - PHINode *PH = dyn_cast(VL0); + auto *PH = cast(VL0); Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI()); Builder.SetCurrentDebugLocation(PH->getDebugLoc()); PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues()); @@ -3577,7 +3809,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { E->VectorizedValue = V; return V; } - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); auto *V = Gather(E->Scalars, VecTy); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), @@ -3612,7 +3844,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { E->VectorizedValue = NewV; return NewV; } - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); auto *V = Gather(E->Scalars, VecTy); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), @@ -3637,7 +3869,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *InVec = vectorizeTree(E->getOperand(0)); @@ -3646,7 +3878,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { return E->VectorizedValue; } - CastInst *CI = dyn_cast(VL0); + auto *CI = cast(VL0); Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), @@ -3658,7 +3890,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } case Instruction::FCmp: case Instruction::ICmp: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *L = vectorizeTree(E->getOperand(0)); Value *R = vectorizeTree(E->getOperand(1)); @@ -3670,7 +3902,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { CmpInst::Predicate P0 = cast(VL0)->getPredicate(); Value *V; - if (S.getOpcode() == Instruction::FCmp) + if (E->getOpcode() == Instruction::FCmp) V = Builder.CreateFCmp(P0, L, R); else V = Builder.CreateICmp(P0, L, R); @@ -3685,7 +3917,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { return V; } case Instruction::Select: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *Cond = vectorizeTree(E->getOperand(0)); Value *True = vectorizeTree(E->getOperand(1)); @@ -3706,7 +3938,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { return V; } case Instruction::FNeg: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *Op = vectorizeTree(E->getOperand(0)); @@ -3716,7 +3948,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } Value *V = Builder.CreateUnOp( - static_cast(S.getOpcode()), Op); + static_cast(E->getOpcode()), Op); propagateIRFlags(V, E->Scalars, VL0); if (auto *I = dyn_cast(V)) V = propagateMetadata(I, E->Scalars); @@ -3748,7 +3980,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::And: case Instruction::Or: case Instruction::Xor: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *LHS = vectorizeTree(E->getOperand(0)); Value *RHS = vectorizeTree(E->getOperand(1)); @@ -3759,7 +3991,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } Value *V = Builder.CreateBinOp( - static_cast(S.getOpcode()), LHS, RHS); + static_cast(E->getOpcode()), LHS, + RHS); propagateIRFlags(V, E->Scalars, VL0); if (auto *I = dyn_cast(V)) V = propagateMetadata(I, E->Scalars); @@ -3776,12 +4009,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::Load: { // Loads are inserted at the head of the tree because we don't want to // sink them all the way down past store instructions. - bool IsReorder = !E->ReorderIndices.empty(); - if (IsReorder) { - S = getSameOpcode(E->Scalars, E->ReorderIndices.front()); - VL0 = cast(S.OpValue); - } - setInsertPointAfterBundle(E->Scalars, S); + bool IsReorder = E->updateStateIfReorder(); + if (IsReorder) + VL0 = E->getMainOp(); + setInsertPointAfterBundle(E); LoadInst *LI = cast(VL0); Type *ScalarLoadTy = LI->getType(); @@ -3797,11 +4028,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { if (getTreeEntry(PO)) ExternalUses.push_back(ExternalUser(PO, cast(VecPtr), 0)); - unsigned Alignment = LI->getAlignment(); + MaybeAlign Alignment = MaybeAlign(LI->getAlignment()); LI = Builder.CreateLoad(VecTy, VecPtr); - if (!Alignment) { - Alignment = DL->getABITypeAlignment(ScalarLoadTy); - } + if (!Alignment) + Alignment = MaybeAlign(DL->getABITypeAlignment(ScalarLoadTy)); LI->setAlignment(Alignment); Value *V = propagateMetadata(LI, E->Scalars); if (IsReorder) { @@ -3824,7 +4054,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { unsigned Alignment = SI->getAlignment(); unsigned AS = SI->getPointerAddressSpace(); - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *VecValue = vectorizeTree(E->getOperand(0)); Value *ScalarPtr = SI->getPointerOperand(); @@ -3840,7 +4070,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { if (!Alignment) Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType()); - ST->setAlignment(Alignment); + ST->setAlignment(Align(Alignment)); Value *V = propagateMetadata(ST, E->Scalars); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), @@ -3851,7 +4081,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { return V; } case Instruction::GetElementPtr: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *Op0 = vectorizeTree(E->getOperand(0)); @@ -3878,13 +4108,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } case Instruction::Call: { CallInst *CI = cast(VL0); - setInsertPointAfterBundle(E->Scalars, S); - Function *FI; + setInsertPointAfterBundle(E); + Intrinsic::ID IID = Intrinsic::not_intrinsic; - Value *ScalarArg = nullptr; - if (CI && (FI = CI->getCalledFunction())) { + if (Function *FI = CI->getCalledFunction()) IID = FI->getIntrinsicID(); - } + + Value *ScalarArg = nullptr; std::vector OpVecs; for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) { ValueList OpVL; @@ -3926,20 +4156,20 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { return V; } case Instruction::ShuffleVector: { - assert(S.isAltShuffle() && - ((Instruction::isBinaryOp(S.getOpcode()) && - Instruction::isBinaryOp(S.getAltOpcode())) || - (Instruction::isCast(S.getOpcode()) && - Instruction::isCast(S.getAltOpcode()))) && + assert(E->isAltShuffle() && + ((Instruction::isBinaryOp(E->getOpcode()) && + Instruction::isBinaryOp(E->getAltOpcode())) || + (Instruction::isCast(E->getOpcode()) && + Instruction::isCast(E->getAltOpcode()))) && "Invalid Shuffle Vector Operand"); - Value *LHS, *RHS; - if (Instruction::isBinaryOp(S.getOpcode())) { - setInsertPointAfterBundle(E->Scalars, S); + Value *LHS = nullptr, *RHS = nullptr; + if (Instruction::isBinaryOp(E->getOpcode())) { + setInsertPointAfterBundle(E); LHS = vectorizeTree(E->getOperand(0)); RHS = vectorizeTree(E->getOperand(1)); } else { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); LHS = vectorizeTree(E->getOperand(0)); } @@ -3949,16 +4179,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } Value *V0, *V1; - if (Instruction::isBinaryOp(S.getOpcode())) { + if (Instruction::isBinaryOp(E->getOpcode())) { V0 = Builder.CreateBinOp( - static_cast(S.getOpcode()), LHS, RHS); + static_cast(E->getOpcode()), LHS, RHS); V1 = Builder.CreateBinOp( - static_cast(S.getAltOpcode()), LHS, RHS); + static_cast(E->getAltOpcode()), LHS, RHS); } else { V0 = Builder.CreateCast( - static_cast(S.getOpcode()), LHS, VecTy); + static_cast(E->getOpcode()), LHS, VecTy); V1 = Builder.CreateCast( - static_cast(S.getAltOpcode()), LHS, VecTy); + static_cast(E->getAltOpcode()), LHS, VecTy); } // Create shuffle to take alternate operations from the vector. @@ -3969,8 +4199,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { SmallVector Mask(e); for (unsigned i = 0; i < e; ++i) { auto *OpInst = cast(E->Scalars[i]); - assert(S.isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode"); - if (OpInst->getOpcode() == S.getAltOpcode()) { + assert(E->isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode"); + if (OpInst->getOpcode() == E->getAltOpcode()) { Mask[i] = Builder.getInt32(e + i); AltScalars.push_back(E->Scalars[i]); } else { @@ -4136,20 +4366,18 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { Value *Scalar = Entry->Scalars[Lane]; +#ifndef NDEBUG Type *Ty = Scalar->getType(); if (!Ty->isVoidTy()) { -#ifndef NDEBUG for (User *U : Scalar->users()) { LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); - // It is legal to replace users in the ignorelist by undef. + // It is legal to delete users in the ignorelist. assert((getTreeEntry(U) || is_contained(UserIgnoreList, U)) && - "Replacing out-of-tree value with undef"); + "Deleting out-of-tree value"); } -#endif - Value *Undef = UndefValue::get(Ty); - Scalar->replaceAllUsesWith(Undef); } +#endif LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n"); eraseInstruction(cast(Scalar)); } @@ -4165,7 +4393,7 @@ void BoUpSLP::optimizeGatherSequence() { << " gather sequences instructions.\n"); // LICM InsertElementInst sequences. for (Instruction *I : GatherSeq) { - if (!isa(I) && !isa(I)) + if (isDeleted(I)) continue; // Check if this block is inside a loop. @@ -4219,6 +4447,8 @@ void BoUpSLP::optimizeGatherSequence() { // For all instructions in blocks containing gather sequences: for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) { Instruction *In = &*it++; + if (isDeleted(In)) + continue; if (!isa(In) && !isa(In)) continue; @@ -4245,11 +4475,11 @@ void BoUpSLP::optimizeGatherSequence() { // Groups the instructions to a bundle (which is then a single scheduling entity) // and schedules instructions until the bundle gets ready. -bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, - BoUpSLP *SLP, - const InstructionsState &S) { +Optional +BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, + const InstructionsState &S) { if (isa(S.OpValue)) - return true; + return nullptr; // Initialize the instruction bundle. Instruction *OldScheduleEnd = ScheduleEnd; @@ -4262,7 +4492,7 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, // instructions of the bundle. for (Value *V : VL) { if (!extendSchedulingRegion(V, S)) - return false; + return None; } for (Value *V : VL) { @@ -4308,6 +4538,7 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, resetSchedule(); initialFillReadyList(ReadyInsts); } + assert(Bundle && "Failed to find schedule bundle"); LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle << " in block " << BB->getName() << "\n"); @@ -4329,9 +4560,9 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, } if (!Bundle->isReady()) { cancelScheduling(VL, S.OpValue); - return false; + return None; } - return true; + return Bundle; } void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef VL, @@ -4364,7 +4595,7 @@ void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef VL, BoUpSLP::ScheduleData *BoUpSLP::BlockScheduling::allocateScheduleDataChunks() { // Allocate a new ScheduleData for the instruction. if (ChunkPos >= ChunkSize) { - ScheduleDataChunks.push_back(llvm::make_unique(ChunkSize)); + ScheduleDataChunks.push_back(std::make_unique(ChunkSize)); ChunkPos = 0; } return &(ScheduleDataChunks.back()[ChunkPos++]); @@ -4977,7 +5208,7 @@ struct SLPVectorizer : public FunctionPass { auto *SE = &getAnalysis().getSE(); auto *TTI = &getAnalysis().getTTI(F); auto *TLIP = getAnalysisIfAvailable(); - auto *TLI = TLIP ? &TLIP->getTLI() : nullptr; + auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr; auto *AA = &getAnalysis().getAAResults(); auto *LI = &getAnalysis().getLoopInfo(); auto *DT = &getAnalysis().getDomTree(); @@ -5052,7 +5283,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, // If the target claims to have no vector registers don't attempt // vectorization. - if (!TTI->getNumberOfRegisters(true)) + if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true))) return false; // Don't vectorize when the attribute NoImplicitFloat is used. @@ -5100,19 +5331,6 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, return Changed; } -/// Check that the Values in the slice in VL array are still existent in -/// the WeakTrackingVH array. -/// Vectorization of part of the VL array may cause later values in the VL array -/// to become invalid. We track when this has happened in the WeakTrackingVH -/// array. -static bool hasValueBeenRAUWed(ArrayRef VL, - ArrayRef VH, unsigned SliceBegin, - unsigned SliceSize) { - VL = VL.slice(SliceBegin, SliceSize); - VH = VH.slice(SliceBegin, SliceSize); - return !std::equal(VL.begin(), VL.end(), VH.begin()); -} - bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, unsigned VecRegSize) { const unsigned ChainLen = Chain.size(); @@ -5124,20 +5342,20 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, if (!isPowerOf2_32(Sz) || VF < 2) return false; - // Keep track of values that were deleted by vectorizing in the loop below. - const SmallVector TrackValues(Chain.begin(), Chain.end()); - bool Changed = false; // Look for profitable vectorizable trees at all offsets, starting at zero. for (unsigned i = 0, e = ChainLen; i + VF <= e; ++i) { + ArrayRef Operands = Chain.slice(i, VF); // Check that a previous iteration of this loop did not delete the Value. - if (hasValueBeenRAUWed(Chain, TrackValues, i, VF)) + if (llvm::any_of(Operands, [&R](Value *V) { + auto *I = dyn_cast(V); + return I && R.isDeleted(I); + })) continue; LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i << "\n"); - ArrayRef Operands = Chain.slice(i, VF); R.buildTree(Operands); if (R.isTreeTinyAndNotFullyVectorizable()) @@ -5329,12 +5547,8 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, bool CandidateFound = false; int MinCost = SLPCostThreshold; - // Keep track of values that were deleted by vectorizing in the loop below. - SmallVector TrackValues(VL.begin(), VL.end()); - unsigned NextInst = 0, MaxInst = VL.size(); - for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; - VF /= 2) { + for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /= 2) { // No actual vectorization should happen, if number of parts is the same as // provided vectorization factor (i.e. the scalar type is used for vector // code during codegen). @@ -5352,13 +5566,16 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2) break; + ArrayRef Ops = VL.slice(I, OpsWidth); // Check that a previous iteration of this loop did not delete the Value. - if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth)) + if (llvm::any_of(Ops, [&R](Value *V) { + auto *I = dyn_cast(V); + return I && R.isDeleted(I); + })) continue; LLVM_DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations " << "\n"); - ArrayRef Ops = VL.slice(I, OpsWidth); R.buildTree(Ops); Optional> Order = R.bestOrder(); @@ -5571,7 +5788,7 @@ class HorizontalReduction { Value *createOp(IRBuilder<> &Builder, const Twine &Name) const { assert(isVectorizable() && "Expected add|fadd or min/max reduction operation."); - Value *Cmp; + Value *Cmp = nullptr; switch (Kind) { case RK_Arithmetic: return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, LHS, RHS, @@ -5579,23 +5796,23 @@ class HorizontalReduction { case RK_Min: Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSLT(LHS, RHS) : Builder.CreateFCmpOLT(LHS, RHS); - break; + return Builder.CreateSelect(Cmp, LHS, RHS, Name); case RK_Max: Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSGT(LHS, RHS) : Builder.CreateFCmpOGT(LHS, RHS); - break; + return Builder.CreateSelect(Cmp, LHS, RHS, Name); case RK_UMin: assert(Opcode == Instruction::ICmp && "Expected integer types."); Cmp = Builder.CreateICmpULT(LHS, RHS); - break; + return Builder.CreateSelect(Cmp, LHS, RHS, Name); case RK_UMax: assert(Opcode == Instruction::ICmp && "Expected integer types."); Cmp = Builder.CreateICmpUGT(LHS, RHS); - break; + return Builder.CreateSelect(Cmp, LHS, RHS, Name); case RK_None: - llvm_unreachable("Unknown reduction operation."); + break; } - return Builder.CreateSelect(Cmp, LHS, RHS, Name); + llvm_unreachable("Unknown reduction operation."); } public: @@ -6203,6 +6420,8 @@ public: } if (V.isTreeTinyAndNotFullyVectorizable()) break; + if (V.isLoadCombineReductionCandidate(ReductionData.getOpcode())) + break; V.computeMinimumValueSizes(); @@ -6275,6 +6494,9 @@ public: } // Update users. ReductionRoot->replaceAllUsesWith(VectorizedTree); + // Mark all scalar reduction ops for deletion, they are replaced by the + // vector reductions. + V.eraseInstructions(IgnoreList); } return VectorizedTree != nullptr; } @@ -6323,7 +6545,7 @@ private: IsPairwiseReduction = PairwiseRdxCost < SplittingRdxCost; int VecReduxCost = IsPairwiseReduction ? PairwiseRdxCost : SplittingRdxCost; - int ScalarReduxCost; + int ScalarReduxCost = 0; switch (ReductionData.getKind()) { case RK_Arithmetic: ScalarReduxCost = @@ -6429,10 +6651,9 @@ static bool findBuildVector(InsertElementInst *LastInsertElem, /// \return true if it matches. static bool findBuildAggregate(InsertValueInst *IV, SmallVectorImpl &BuildVectorOpds) { - Value *V; do { BuildVectorOpds.push_back(IV->getInsertedValueOperand()); - V = IV->getAggregateOperand(); + Value *V = IV->getAggregateOperand(); if (isa(V)) break; IV = dyn_cast(V); @@ -6530,18 +6751,13 @@ static bool tryToVectorizeHorReductionOrInstOperands( // horizontal reduction. // Interrupt the process if the Root instruction itself was vectorized or all // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized. - SmallVector, 8> Stack(1, {Root, 0}); + SmallVector, 8> Stack(1, {Root, 0}); SmallPtrSet VisitedInstrs; bool Res = false; while (!Stack.empty()) { - Value *V; + Instruction *Inst; unsigned Level; - std::tie(V, Level) = Stack.pop_back_val(); - if (!V) - continue; - auto *Inst = dyn_cast(V); - if (!Inst) - continue; + std::tie(Inst, Level) = Stack.pop_back_val(); auto *BI = dyn_cast(Inst); auto *SI = dyn_cast(Inst); if (BI || SI) { @@ -6582,8 +6798,8 @@ static bool tryToVectorizeHorReductionOrInstOperands( for (auto *Op : Inst->operand_values()) if (VisitedInstrs.insert(Op).second) if (auto *I = dyn_cast(Op)) - if (!isa(I) && I->getParent() == BB) - Stack.emplace_back(Op, Level); + if (!isa(I) && !R.isDeleted(I) && I->getParent() == BB) + Stack.emplace_back(I, Level); } return Res; } @@ -6652,11 +6868,10 @@ bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB, } bool SLPVectorizerPass::vectorizeSimpleInstructions( - SmallVectorImpl &Instructions, BasicBlock *BB, BoUpSLP &R) { + SmallVectorImpl &Instructions, BasicBlock *BB, BoUpSLP &R) { bool OpsChanged = false; - for (auto &VH : reverse(Instructions)) { - auto *I = dyn_cast_or_null(VH); - if (!I) + for (auto *I : reverse(Instructions)) { + if (R.isDeleted(I)) continue; if (auto *LastInsertValue = dyn_cast(I)) OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R); @@ -6685,7 +6900,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (!P) break; - if (!VisitedInstrs.count(P)) + if (!VisitedInstrs.count(P) && !R.isDeleted(P)) Incoming.push_back(P); } @@ -6729,9 +6944,12 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { VisitedInstrs.clear(); - SmallVector PostProcessInstructions; + SmallVector PostProcessInstructions; SmallDenseSet KeyNodes; for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + // Skip instructions marked for the deletion. + if (R.isDeleted(&*it)) + continue; // We may go through BB multiple times so skip the one we have checked. if (!VisitedInstrs.insert(&*it).second) { if (it->use_empty() && KeyNodes.count(&*it) > 0 && @@ -6811,10 +7029,16 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { LLVM_DEBUG(dbgs() << "SLP: Analyzing a getelementptr list of length " << Entry.second.size() << ".\n"); - // We process the getelementptr list in chunks of 16 (like we do for - // stores) to minimize compile-time. - for (unsigned BI = 0, BE = Entry.second.size(); BI < BE; BI += 16) { - auto Len = std::min(BE - BI, 16); + // Process the GEP list in chunks suitable for the target's supported + // vector size. If a vector register can't hold 1 element, we are done. + unsigned MaxVecRegSize = R.getMaxVecRegSize(); + unsigned EltSize = R.getVectorElementSize(Entry.second[0]); + if (MaxVecRegSize < EltSize) + continue; + + unsigned MaxElts = MaxVecRegSize / EltSize; + for (unsigned BI = 0, BE = Entry.second.size(); BI < BE; BI += MaxElts) { + auto Len = std::min(BE - BI, MaxElts); auto GEPList = makeArrayRef(&Entry.second[BI], Len); // Initialize a set a candidate getelementptrs. Note that we use a @@ -6824,10 +7048,10 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { SetVector Candidates(GEPList.begin(), GEPList.end()); // Some of the candidates may have already been vectorized after we - // initially collected them. If so, the WeakTrackingVHs will have - // nullified the - // values, so remove them from the set of candidates. - Candidates.remove(nullptr); + // initially collected them. If so, they are marked as deleted, so remove + // them from the set of candidates. + Candidates.remove_if( + [&R](Value *I) { return R.isDeleted(cast(I)); }); // Remove from the set of candidates all pairs of getelementptrs with // constant differences. Such getelementptrs are likely not good @@ -6835,18 +7059,18 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { // computed from the other. We also ensure all candidate getelementptr // indices are unique. for (int I = 0, E = GEPList.size(); I < E && Candidates.size() > 1; ++I) { - auto *GEPI = cast(GEPList[I]); + auto *GEPI = GEPList[I]; if (!Candidates.count(GEPI)) continue; auto *SCEVI = SE->getSCEV(GEPList[I]); for (int J = I + 1; J < E && Candidates.size() > 1; ++J) { - auto *GEPJ = cast(GEPList[J]); + auto *GEPJ = GEPList[J]; auto *SCEVJ = SE->getSCEV(GEPList[J]); if (isa(SE->getMinusSCEV(SCEVI, SCEVJ))) { - Candidates.remove(GEPList[I]); - Candidates.remove(GEPList[J]); + Candidates.remove(GEPI); + Candidates.remove(GEPJ); } else if (GEPI->idx_begin()->get() == GEPJ->idx_begin()->get()) { - Candidates.remove(GEPList[J]); + Candidates.remove(GEPJ); } } } diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp index 517d759d7bf..4b80d1fb20a 100644 --- a/lib/Transforms/Vectorize/VPlan.cpp +++ b/lib/Transforms/Vectorize/VPlan.cpp @@ -283,6 +283,12 @@ iplist::iterator VPRecipeBase::eraseFromParent() { return getParent()->getRecipeList().erase(getIterator()); } +void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { + InsertPos->getParent()->getRecipeList().splice( + std::next(InsertPos->getIterator()), getParent()->getRecipeList(), + getIterator()); +} + void VPInstruction::generateInstruction(VPTransformState &State, unsigned Part) { IRBuilder<> &Builder = State.Builder; @@ -309,6 +315,14 @@ void VPInstruction::generateInstruction(VPTransformState &State, State.set(this, V, Part); break; } + case Instruction::Select: { + Value *Cond = State.get(getOperand(0), Part); + Value *Op1 = State.get(getOperand(1), Part); + Value *Op2 = State.get(getOperand(2), Part); + Value *V = Builder.CreateSelect(Cond, Op1, Op2); + State.set(this, V, Part); + break; + } default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -728,7 +742,7 @@ void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, auto NewIGIter = Old2New.find(IG); if (NewIGIter == Old2New.end()) Old2New[IG] = new InterleaveGroup( - IG->getFactor(), IG->isReverse(), IG->getAlignment()); + IG->getFactor(), IG->isReverse(), Align(IG->getAlignment())); if (Inst == IG->getInsertPos()) Old2New[IG]->setInsertPos(VPInst); @@ -736,7 +750,8 @@ void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, InterleaveGroupMap[VPInst] = Old2New[IG]; InterleaveGroupMap[VPInst]->insertMember( VPInst, IG->getIndex(Inst), - IG->isReverse() ? (-1) * int(IG->getFactor()) : IG->getFactor()); + Align(IG->isReverse() ? (-1) * int(IG->getFactor()) + : IG->getFactor())); } } else if (VPRegionBlock *Region = dyn_cast(Block)) visitRegion(Region, Old2New, IAI); diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h index 8a06412ad59..44d8a198f27 100644 --- a/lib/Transforms/Vectorize/VPlan.h +++ b/lib/Transforms/Vectorize/VPlan.h @@ -615,6 +615,10 @@ public: /// the specified recipe. void insertBefore(VPRecipeBase *InsertPos); + /// Unlink this recipe from its current VPBasicBlock and insert it into + /// the VPBasicBlock that MovePos lives in, right after MovePos. + void moveAfter(VPRecipeBase *MovePos); + /// This method unlinks 'this' from the containing basic block and deletes it. /// /// \returns an iterator pointing to the element after the erased one diff --git a/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp b/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp index 7ed7d21b6ca..b22d3190d65 100644 --- a/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp +++ b/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp @@ -21,7 +21,7 @@ void VPlanHCFGTransforms::VPInstructionsToVPRecipes( LoopVectorizationLegality::InductionList *Inductions, SmallPtrSetImpl &DeadInstructions) { - VPRegionBlock *TopRegion = dyn_cast(Plan->getEntry()); + auto *TopRegion = cast(Plan->getEntry()); ReversePostOrderTraversal RPOT(TopRegion->getEntry()); // Condition bit VPValues get deleted during transformation to VPRecipes. diff --git a/lib/Transforms/Vectorize/VPlanSLP.cpp b/lib/Transforms/Vectorize/VPlanSLP.cpp index e5ab24e52df..9019ed15ec5 100644 --- a/lib/Transforms/Vectorize/VPlanSLP.cpp +++ b/lib/Transforms/Vectorize/VPlanSLP.cpp @@ -346,11 +346,14 @@ SmallVector VPlanSlp::reorderMultiNodeOps() { void VPlanSlp::dumpBundle(ArrayRef Values) { dbgs() << " Ops: "; - for (auto Op : Values) - if (auto *Instr = cast_or_null(Op)->getUnderlyingInstr()) - dbgs() << *Instr << " | "; - else - dbgs() << " nullptr | "; + for (auto Op : Values) { + if (auto *VPInstr = cast_or_null(Op)) + if (auto *Instr = VPInstr->getUnderlyingInstr()) { + dbgs() << *Instr << " | "; + continue; + } + dbgs() << " nullptr | "; + } dbgs() << "\n"; } diff --git a/lib/WindowsManifest/WindowsManifestMerger.cpp b/lib/WindowsManifest/WindowsManifestMerger.cpp index d092ab493c9..031a963cd3b 100644 --- a/lib/WindowsManifest/WindowsManifestMerger.cpp +++ b/lib/WindowsManifest/WindowsManifestMerger.cpp @@ -58,7 +58,7 @@ private: #if LLVM_LIBXML2_ENABLED -static const std::pair MtNsHrefsPrefixes[] = { +static constexpr std::pair MtNsHrefsPrefixes[] = { {"urn:schemas-microsoft-com:asm.v1", "ms_asmv1"}, {"urn:schemas-microsoft-com:asm.v2", "ms_asmv2"}, {"urn:schemas-microsoft-com:asm.v3", "ms_asmv3"}, @@ -704,7 +704,7 @@ bool windows_manifest::isAvailable() { return false; } #endif WindowsManifestMerger::WindowsManifestMerger() - : Impl(make_unique()) {} + : Impl(std::make_unique()) {} WindowsManifestMerger::~WindowsManifestMerger() {} diff --git a/lib/XRay/FDRRecordProducer.cpp b/lib/XRay/FDRRecordProducer.cpp index 452bc6c55fb..479b710444b 100644 --- a/lib/XRay/FDRRecordProducer.cpp +++ b/lib/XRay/FDRRecordProducer.cpp @@ -40,32 +40,32 @@ metadataRecordType(const XRayFileHeader &Header, uint8_t T) { "Invalid metadata record type: %d", T); switch (T) { case MetadataRecordKinds::NewBufferKind: - return make_unique(); + return std::make_unique(); case MetadataRecordKinds::EndOfBufferKind: if (Header.Version >= 2) return createStringError( std::make_error_code(std::errc::executable_format_error), "End of buffer records are no longer supported starting version " "2 of the log."); - return make_unique(); + return std::make_unique(); case MetadataRecordKinds::NewCPUIdKind: - return make_unique(); + return std::make_unique(); case MetadataRecordKinds::TSCWrapKind: - return make_unique(); + return std::make_unique(); case MetadataRecordKinds::WalltimeMarkerKind: - return make_unique(); + return std::make_unique(); case MetadataRecordKinds::CustomEventMarkerKind: if (Header.Version >= 5) - return make_unique(); - return make_unique(); + return std::make_unique(); + return std::make_unique(); case MetadataRecordKinds::CallArgumentKind: - return make_unique(); + return std::make_unique(); case MetadataRecordKinds::BufferExtentsKind: - return make_unique(); + return std::make_unique(); case MetadataRecordKinds::TypedEventMarkerKind: - return make_unique(); + return std::make_unique(); case MetadataRecordKinds::PidKind: - return make_unique(); + return std::make_unique(); case MetadataRecordKinds::EnumEndMarker: llvm_unreachable("Invalid MetadataRecordKind"); } @@ -89,7 +89,7 @@ FileBasedRecordProducer::findNextBufferExtent() { if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading one byte from offset %d.", OffsetPtr); + "Failed reading one byte from offset %" PRId64 ".", OffsetPtr); if (isMetadataIntroducer(FirstByte)) { auto LoadedType = FirstByte >> 1; @@ -130,7 +130,7 @@ Expected> FileBasedRecordProducer::produce() { R = std::move(BufferExtentsOrError.get()); assert(R != nullptr); assert(isa(R.get())); - auto BE = dyn_cast(R.get()); + auto BE = cast(R.get()); CurrentBufferBytes = BE->size(); return std::move(R); } @@ -151,7 +151,7 @@ Expected> FileBasedRecordProducer::produce() { if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading one byte from offset %d.", OffsetPtr); + "Failed reading one byte from offset %" PRId64 ".", OffsetPtr); // For metadata records, handle especially here. if (isMetadataIntroducer(FirstByte)) { @@ -162,11 +162,12 @@ Expected> FileBasedRecordProducer::produce() { MetadataRecordOrErr.takeError(), createStringError( std::make_error_code(std::errc::executable_format_error), - "Encountered an unsupported metadata record (%d) at offset %d.", + "Encountered an unsupported metadata record (%d) " + "at offset %" PRId64 ".", LoadedType, PreReadOffset)); R = std::move(MetadataRecordOrErr.get()); } else { - R = llvm::make_unique(); + R = std::make_unique(); } RecordInitializer RI(E, OffsetPtr); @@ -182,8 +183,8 @@ Expected> FileBasedRecordProducer::produce() { if (OffsetPtr - PreReadOffset > CurrentBufferBytes) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Buffer over-read at offset %d (over-read by %d bytes); Record Type " - "= %s.", + "Buffer over-read at offset %" PRId64 " (over-read by %" PRId64 + " bytes); Record Type = %s.", OffsetPtr, (OffsetPtr - PreReadOffset) - CurrentBufferBytes, Record::kindToString(R->getRecordType()).data()); diff --git a/lib/XRay/FileHeaderReader.cpp b/lib/XRay/FileHeaderReader.cpp index 3fb021906a6..6b6daf9deba 100644 --- a/lib/XRay/FileHeaderReader.cpp +++ b/lib/XRay/FileHeaderReader.cpp @@ -12,7 +12,7 @@ namespace xray { // Populates the FileHeader reference by reading the first 32 bytes of the file. Expected readBinaryFormatHeader(DataExtractor &HeaderExtractor, - uint32_t &OffsetPtr) { + uint64_t &OffsetPtr) { // FIXME: Maybe deduce whether the data is little or big-endian using some // magic bytes in the beginning of the file? @@ -30,21 +30,24 @@ Expected readBinaryFormatHeader(DataExtractor &HeaderExtractor, if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading version from file header at offset %d.", OffsetPtr); + "Failed reading version from file header at offset %" PRId64 ".", + OffsetPtr); PreReadOffset = OffsetPtr; FileHeader.Type = HeaderExtractor.getU16(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading file type from file header at offset %d.", OffsetPtr); + "Failed reading file type from file header at offset %" PRId64 ".", + OffsetPtr); PreReadOffset = OffsetPtr; uint32_t Bitfield = HeaderExtractor.getU32(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading flag bits from file header at offset %d.", OffsetPtr); + "Failed reading flag bits from file header at offset %" PRId64 ".", + OffsetPtr); FileHeader.ConstantTSC = Bitfield & 1uL; FileHeader.NonstopTSC = Bitfield & 1uL << 1; @@ -53,7 +56,8 @@ Expected readBinaryFormatHeader(DataExtractor &HeaderExtractor, if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading cycle frequency from file header at offset %d.", + "Failed reading cycle frequency from file header at offset %" PRId64 + ".", OffsetPtr); std::memcpy(&FileHeader.FreeFormData, diff --git a/lib/XRay/InstrumentationMap.cpp b/lib/XRay/InstrumentationMap.cpp index fe5e941f7ea..7453613c703 100644 --- a/lib/XRay/InstrumentationMap.cpp +++ b/lib/XRay/InstrumentationMap.cpp @@ -67,10 +67,11 @@ loadObj(StringRef Filename, object::OwningBinary &ObjFile, StringRef Contents = ""; const auto &Sections = ObjFile.getBinary()->sections(); auto I = llvm::find_if(Sections, [&](object::SectionRef Section) { - StringRef Name = ""; - if (Section.getName(Name)) - return false; - return Name == "xray_instr_map"; + Expected NameOrErr = Section.getName(); + if (NameOrErr) + return *NameOrErr == "xray_instr_map"; + consumeError(NameOrErr.takeError()); + return false; }); if (I == Sections.end()) @@ -118,7 +119,7 @@ loadObj(StringRef Filename, object::OwningBinary &ObjFile, "an XRay sled entry in ELF64."), std::make_error_code(std::errc::executable_format_error)); - auto RelocateOrElse = [&](uint32_t Offset, uint64_t Address) { + auto RelocateOrElse = [&](uint64_t Offset, uint64_t Address) { if (!Address) { uint64_t A = I->getAddress() + C - Contents.bytes_begin() + Offset; RelocMap::const_iterator R = Relocs.find(A); @@ -136,10 +137,10 @@ loadObj(StringRef Filename, object::OwningBinary &ObjFile, 8); Sleds.push_back({}); auto &Entry = Sleds.back(); - uint32_t OffsetPtr = 0; - uint32_t AddrOff = OffsetPtr; + uint64_t OffsetPtr = 0; + uint64_t AddrOff = OffsetPtr; Entry.Address = RelocateOrElse(AddrOff, Extractor.getU64(&OffsetPtr)); - uint32_t FuncOff = OffsetPtr; + uint64_t FuncOff = OffsetPtr; Entry.Function = RelocateOrElse(FuncOff, Extractor.getU64(&OffsetPtr)); auto Kind = Extractor.getU8(&OffsetPtr); static constexpr SledEntry::FunctionKinds Kinds[] = { diff --git a/lib/XRay/Profile.cpp b/lib/XRay/Profile.cpp index e34b182f2e0..c1a43632b60 100644 --- a/lib/XRay/Profile.cpp +++ b/lib/XRay/Profile.cpp @@ -49,9 +49,9 @@ struct BlockHeader { }; static Expected readBlockHeader(DataExtractor &Extractor, - uint32_t &Offset) { + uint64_t &Offset) { BlockHeader H; - uint32_t CurrentOffset = Offset; + uint64_t CurrentOffset = Offset; H.Size = Extractor.getU32(&Offset); if (Offset == CurrentOffset) return make_error( @@ -76,7 +76,7 @@ static Expected readBlockHeader(DataExtractor &Extractor, } static Expected> readPath(DataExtractor &Extractor, - uint32_t &Offset) { + uint64_t &Offset) { // We're reading a sequence of int32_t's until we find a 0. std::vector Path; auto CurrentOffset = Offset; @@ -94,7 +94,7 @@ static Expected> readPath(DataExtractor &Extractor, } static Expected readData(DataExtractor &Extractor, - uint32_t &Offset) { + uint64_t &Offset) { // We expect a certain number of elements for Data: // - A 64-bit CallCount // - A 64-bit CumulativeLocalTime counter @@ -280,7 +280,7 @@ Expected loadProfile(StringRef Filename) { StringRef Data(MappedFile.data(), MappedFile.size()); Profile P; - uint32_t Offset = 0; + uint64_t Offset = 0; DataExtractor Extractor(Data, true, 8); // For each block we get from the file: diff --git a/lib/XRay/RecordInitializer.cpp b/lib/XRay/RecordInitializer.cpp index 78163031a8c..68ab3db0620 100644 --- a/lib/XRay/RecordInitializer.cpp +++ b/lib/XRay/RecordInitializer.cpp @@ -12,15 +12,15 @@ namespace xray { Error RecordInitializer::visit(BufferExtents &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, sizeof(uint64_t))) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a buffer extent (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a buffer extent (%" PRId64 ").", OffsetPtr); auto PreReadOffset = OffsetPtr; R.Size = E.getU64(&OffsetPtr); if (PreReadOffset == OffsetPtr) return createStringError(std::make_error_code(std::errc::invalid_argument), - "Cannot read buffer extent at offset %d.", + "Cannot read buffer extent at offset %" PRId64 ".", OffsetPtr); OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset); @@ -30,23 +30,25 @@ Error RecordInitializer::visit(BufferExtents &R) { Error RecordInitializer::visit(WallclockRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a wallclock record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a wallclock record (%" PRId64 ").", OffsetPtr); auto BeginOffset = OffsetPtr; auto PreReadOffset = OffsetPtr; R.Seconds = E.getU64(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Cannot read wall clock 'seconds' field at offset %d.", OffsetPtr); + "Cannot read wall clock 'seconds' field at offset %" PRId64 ".", + OffsetPtr); PreReadOffset = OffsetPtr; R.Nanos = E.getU32(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Cannot read wall clock 'nanos' field at offset %d.", OffsetPtr); + "Cannot read wall clock 'nanos' field at offset %" PRId64 ".", + OffsetPtr); // Align to metadata record size boundary. assert(OffsetPtr - BeginOffset <= MetadataRecord::kMetadataBodySize); @@ -57,21 +59,23 @@ Error RecordInitializer::visit(WallclockRecord &R) { Error RecordInitializer::visit(NewCPUIDRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a new cpu id record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a new cpu id record (%" PRId64 ").", OffsetPtr); auto BeginOffset = OffsetPtr; auto PreReadOffset = OffsetPtr; R.CPUId = E.getU16(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError(std::make_error_code(std::errc::invalid_argument), - "Cannot read CPU id at offset %d.", OffsetPtr); + "Cannot read CPU id at offset %" PRId64 ".", + OffsetPtr); PreReadOffset = OffsetPtr; R.TSC = E.getU64(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError(std::make_error_code(std::errc::invalid_argument), - "Cannot read CPU TSC at offset %d.", OffsetPtr); + "Cannot read CPU TSC at offset %" PRId64 ".", + OffsetPtr); OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - BeginOffset); return Error::success(); @@ -80,16 +84,16 @@ Error RecordInitializer::visit(NewCPUIDRecord &R) { Error RecordInitializer::visit(TSCWrapRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a new TSC wrap record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a new TSC wrap record (%" PRId64 ").", OffsetPtr); auto PreReadOffset = OffsetPtr; R.BaseTSC = E.getU64(&OffsetPtr); if (PreReadOffset == OffsetPtr) - return createStringError(std::make_error_code(std::errc::invalid_argument), - "Cannot read TSC wrap record at offset %d.", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "Cannot read TSC wrap record at offset %" PRId64 ".", OffsetPtr); OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset); return Error::success(); @@ -98,9 +102,9 @@ Error RecordInitializer::visit(TSCWrapRecord &R) { Error RecordInitializer::visit(CustomEventRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a custom event record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a custom event record (%" PRId64 ").", OffsetPtr); auto BeginOffset = OffsetPtr; auto PreReadOffset = OffsetPtr; @@ -108,20 +112,22 @@ Error RecordInitializer::visit(CustomEventRecord &R) { if (PreReadOffset == OffsetPtr) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Cannot read a custom event record size field offset %d.", OffsetPtr); + "Cannot read a custom event record size field offset %" PRId64 ".", + OffsetPtr); if (R.Size <= 0) return createStringError( std::make_error_code(std::errc::bad_address), - "Invalid size for custom event (size = %d) at offset %d.", R.Size, - OffsetPtr); + "Invalid size for custom event (size = %d) at offset %" PRId64 ".", + R.Size, OffsetPtr); PreReadOffset = OffsetPtr; R.TSC = E.getU64(&OffsetPtr); if (PreReadOffset == OffsetPtr) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Cannot read a custom event TSC field at offset %d.", OffsetPtr); + "Cannot read a custom event TSC field at offset %" PRId64 ".", + OffsetPtr); // For version 4 onwards, of the FDR log, we want to also capture the CPU ID // of the custom event. @@ -131,7 +137,7 @@ Error RecordInitializer::visit(CustomEventRecord &R) { if (PreReadOffset == OffsetPtr) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Missing CPU field at offset %d", OffsetPtr); + "Missing CPU field at offset %" PRId64 ".", OffsetPtr); } assert(OffsetPtr > BeginOffset && @@ -142,8 +148,8 @@ Error RecordInitializer::visit(CustomEventRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, R.Size)) return createStringError( std::make_error_code(std::errc::bad_address), - "Cannot read %d bytes of custom event data from offset %d.", R.Size, - OffsetPtr); + "Cannot read %d bytes of custom event data from offset %" PRId64 ".", + R.Size, OffsetPtr); std::vector Buffer; Buffer.resize(R.Size); @@ -151,15 +157,15 @@ Error RecordInitializer::visit(CustomEventRecord &R) { if (E.getU8(&OffsetPtr, Buffer.data(), R.Size) != Buffer.data()) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading data into buffer of size %d at offset %d.", R.Size, - OffsetPtr); + "Failed reading data into buffer of size %d at offset %" PRId64 ".", + R.Size, OffsetPtr); assert(OffsetPtr >= PreReadOffset); if (OffsetPtr - PreReadOffset != static_cast(R.Size)) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading enough bytes for the custom event payload -- read %d " - "expecting %d bytes at offset %d.", + "Failed reading enough bytes for the custom event payload -- read " + "%" PRId64 " expecting %d bytes at offset %" PRId64 ".", OffsetPtr - PreReadOffset, R.Size, PreReadOffset); R.Data.assign(Buffer.begin(), Buffer.end()); @@ -169,9 +175,9 @@ Error RecordInitializer::visit(CustomEventRecord &R) { Error RecordInitializer::visit(CustomEventRecordV5 &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a custom event record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a custom event record (%" PRId64 ").", OffsetPtr); auto BeginOffset = OffsetPtr; auto PreReadOffset = OffsetPtr; @@ -180,20 +186,22 @@ Error RecordInitializer::visit(CustomEventRecordV5 &R) { if (PreReadOffset == OffsetPtr) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Cannot read a custom event record size field offset %d.", OffsetPtr); + "Cannot read a custom event record size field offset %" PRId64 ".", + OffsetPtr); if (R.Size <= 0) return createStringError( std::make_error_code(std::errc::bad_address), - "Invalid size for custom event (size = %d) at offset %d.", R.Size, - OffsetPtr); + "Invalid size for custom event (size = %d) at offset %" PRId64 ".", + R.Size, OffsetPtr); PreReadOffset = OffsetPtr; R.Delta = E.getSigned(&OffsetPtr, sizeof(int32_t)); if (PreReadOffset == OffsetPtr) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Cannot read a custom event record TSC delta field at offset %d.", + "Cannot read a custom event record TSC delta field at offset " + "%" PRId64 ".", OffsetPtr); assert(OffsetPtr > BeginOffset && @@ -204,8 +212,8 @@ Error RecordInitializer::visit(CustomEventRecordV5 &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, R.Size)) return createStringError( std::make_error_code(std::errc::bad_address), - "Cannot read %d bytes of custom event data from offset %d.", R.Size, - OffsetPtr); + "Cannot read %d bytes of custom event data from offset %" PRId64 ".", + R.Size, OffsetPtr); std::vector Buffer; Buffer.resize(R.Size); @@ -213,15 +221,15 @@ Error RecordInitializer::visit(CustomEventRecordV5 &R) { if (E.getU8(&OffsetPtr, Buffer.data(), R.Size) != Buffer.data()) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading data into buffer of size %d at offset %d.", R.Size, - OffsetPtr); + "Failed reading data into buffer of size %d at offset %" PRId64 ".", + R.Size, OffsetPtr); assert(OffsetPtr >= PreReadOffset); if (OffsetPtr - PreReadOffset != static_cast(R.Size)) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading enough bytes for the custom event payload -- read %d " - "expecting %d bytes at offset %d.", + "Failed reading enough bytes for the custom event payload -- read " + "%" PRId64 " expecting %d bytes at offset %" PRId64 ".", OffsetPtr - PreReadOffset, R.Size, PreReadOffset); R.Data.assign(Buffer.begin(), Buffer.end()); @@ -231,9 +239,9 @@ Error RecordInitializer::visit(CustomEventRecordV5 &R) { Error RecordInitializer::visit(TypedEventRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a typed event record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a typed event record (%" PRId64 ").", OffsetPtr); auto BeginOffset = OffsetPtr; auto PreReadOffset = OffsetPtr; @@ -242,20 +250,22 @@ Error RecordInitializer::visit(TypedEventRecord &R) { if (PreReadOffset == OffsetPtr) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Cannot read a typed event record size field offset %d.", OffsetPtr); + "Cannot read a typed event record size field offset %" PRId64 ".", + OffsetPtr); if (R.Size <= 0) return createStringError( std::make_error_code(std::errc::bad_address), - "Invalid size for typed event (size = %d) at offset %d.", R.Size, - OffsetPtr); + "Invalid size for typed event (size = %d) at offset %" PRId64 ".", + R.Size, OffsetPtr); PreReadOffset = OffsetPtr; R.Delta = E.getSigned(&OffsetPtr, sizeof(int32_t)); if (PreReadOffset == OffsetPtr) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Cannot read a typed event record TSC delta field at offset %d.", + "Cannot read a typed event record TSC delta field at offset " + "%" PRId64 ".", OffsetPtr); PreReadOffset = OffsetPtr; @@ -263,7 +273,8 @@ Error RecordInitializer::visit(TypedEventRecord &R) { if (PreReadOffset == OffsetPtr) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Cannot read a typed event record type field at offset %d.", OffsetPtr); + "Cannot read a typed event record type field at offset %" PRId64 ".", + OffsetPtr); assert(OffsetPtr > BeginOffset && OffsetPtr - BeginOffset <= MetadataRecord::kMetadataBodySize); @@ -273,8 +284,8 @@ Error RecordInitializer::visit(TypedEventRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, R.Size)) return createStringError( std::make_error_code(std::errc::bad_address), - "Cannot read %d bytes of custom event data from offset %d.", R.Size, - OffsetPtr); + "Cannot read %d bytes of custom event data from offset %" PRId64 ".", + R.Size, OffsetPtr); std::vector Buffer; Buffer.resize(R.Size); @@ -282,15 +293,15 @@ Error RecordInitializer::visit(TypedEventRecord &R) { if (E.getU8(&OffsetPtr, Buffer.data(), R.Size) != Buffer.data()) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading data into buffer of size %d at offset %d.", R.Size, - OffsetPtr); + "Failed reading data into buffer of size %d at offset %" PRId64 ".", + R.Size, OffsetPtr); assert(OffsetPtr >= PreReadOffset); if (OffsetPtr - PreReadOffset != static_cast(R.Size)) return createStringError( std::make_error_code(std::errc::invalid_argument), - "Failed reading enough bytes for the typed event payload -- read %d " - "expecting %d bytes at offset %d.", + "Failed reading enough bytes for the typed event payload -- read " + "%" PRId64 " expecting %d bytes at offset %" PRId64 ".", OffsetPtr - PreReadOffset, R.Size, PreReadOffset); R.Data.assign(Buffer.begin(), Buffer.end()); @@ -300,16 +311,17 @@ Error RecordInitializer::visit(TypedEventRecord &R) { Error RecordInitializer::visit(CallArgRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a call argument record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a call argument record (%" PRId64 ").", + OffsetPtr); auto PreReadOffset = OffsetPtr; R.Arg = E.getU64(&OffsetPtr); if (PreReadOffset == OffsetPtr) - return createStringError(std::make_error_code(std::errc::invalid_argument), - "Cannot read a call arg record at offset %d.", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "Cannot read a call arg record at offset %" PRId64 ".", OffsetPtr); OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset); return Error::success(); @@ -318,16 +330,16 @@ Error RecordInitializer::visit(CallArgRecord &R) { Error RecordInitializer::visit(PIDRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a process ID record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a process ID record (%" PRId64 ").", OffsetPtr); auto PreReadOffset = OffsetPtr; R.PID = E.getSigned(&OffsetPtr, 4); if (PreReadOffset == OffsetPtr) - return createStringError(std::make_error_code(std::errc::invalid_argument), - "Cannot read a process ID record at offset %d.", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "Cannot read a process ID record at offset %" PRId64 ".", OffsetPtr); OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset); return Error::success(); @@ -336,16 +348,16 @@ Error RecordInitializer::visit(PIDRecord &R) { Error RecordInitializer::visit(NewBufferRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a new buffer record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a new buffer record (%" PRId64 ").", OffsetPtr); auto PreReadOffset = OffsetPtr; R.TID = E.getSigned(&OffsetPtr, sizeof(int32_t)); if (PreReadOffset == OffsetPtr) - return createStringError(std::make_error_code(std::errc::invalid_argument), - "Cannot read a new buffer record at offset %d.", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "Cannot read a new buffer record at offset %" PRId64 ".", OffsetPtr); OffsetPtr += MetadataRecord::kMetadataBodySize - (OffsetPtr - PreReadOffset); return Error::success(); @@ -354,9 +366,10 @@ Error RecordInitializer::visit(NewBufferRecord &R) { Error RecordInitializer::visit(EndBufferRecord &R) { if (!E.isValidOffsetForDataOfSize(OffsetPtr, MetadataRecord::kMetadataBodySize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for an end-of-buffer record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for an end-of-buffer record (%" PRId64 ").", + OffsetPtr); OffsetPtr += MetadataRecord::kMetadataBodySize; return Error::success(); @@ -373,17 +386,17 @@ Error RecordInitializer::visit(FunctionRecord &R) { // if (OffsetPtr == 0 || !E.isValidOffsetForDataOfSize( --OffsetPtr, FunctionRecord::kFunctionRecordSize)) - return createStringError(std::make_error_code(std::errc::bad_address), - "Invalid offset for a function record (%d).", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Invalid offset for a function record (%" PRId64 ").", OffsetPtr); auto BeginOffset = OffsetPtr; auto PreReadOffset = BeginOffset; uint32_t Buffer = E.getU32(&OffsetPtr); if (PreReadOffset == OffsetPtr) - return createStringError(std::make_error_code(std::errc::bad_address), - "Cannot read function id field from offset %d.", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::bad_address), + "Cannot read function id field from offset %" PRId64 ".", OffsetPtr); // To get the function record type, we shift the buffer one to the right // (truncating the function record indicator) then take the three bits @@ -397,18 +410,19 @@ Error RecordInitializer::visit(FunctionRecord &R) { R.Kind = static_cast(FunctionType); break; default: - return createStringError(std::make_error_code(std::errc::invalid_argument), - "Unknown function record type '%d' at offset %d.", - FunctionType, BeginOffset); + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "Unknown function record type '%d' at offset %" PRId64 ".", + FunctionType, BeginOffset); } R.FuncId = Buffer >> 4; PreReadOffset = OffsetPtr; R.Delta = E.getU32(&OffsetPtr); if (OffsetPtr == PreReadOffset) - return createStringError(std::make_error_code(std::errc::invalid_argument), - "Failed reading TSC delta from offset %d.", - OffsetPtr); + return createStringError( + std::make_error_code(std::errc::invalid_argument), + "Failed reading TSC delta from offset %" PRId64 ".", OffsetPtr); assert(FunctionRecord::kFunctionRecordSize == (OffsetPtr - BeginOffset)); return Error::success(); } diff --git a/lib/XRay/Trace.cpp b/lib/XRay/Trace.cpp index b9b67c561c6..4f107e1059c 100644 --- a/lib/XRay/Trace.cpp +++ b/lib/XRay/Trace.cpp @@ -47,7 +47,7 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, std::make_error_code(std::errc::invalid_argument)); DataExtractor Reader(Data, IsLittleEndian, 8); - uint32_t OffsetPtr = 0; + uint64_t OffsetPtr = 0; auto FileHeaderOrError = readBinaryFormatHeader(Reader, OffsetPtr); if (!FileHeaderOrError) return FileHeaderOrError.takeError(); @@ -67,13 +67,14 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, if (!Reader.isValidOffsetForDataOfSize(OffsetPtr, 32)) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Not enough bytes to read a full record at offset %d.", OffsetPtr); + "Not enough bytes to read a full record at offset %" PRId64 ".", + OffsetPtr); auto PreReadOffset = OffsetPtr; auto RecordType = Reader.getU16(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading record type at offset %d.", OffsetPtr); + "Failed reading record type at offset %" PRId64 ".", OffsetPtr); switch (RecordType) { case 0: { // Normal records. @@ -86,14 +87,15 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading CPU field at offset %d.", OffsetPtr); + "Failed reading CPU field at offset %" PRId64 ".", OffsetPtr); PreReadOffset = OffsetPtr; auto Type = Reader.getU8(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading record type field at offset %d.", OffsetPtr); + "Failed reading record type field at offset %" PRId64 ".", + OffsetPtr); switch (Type) { case 0: @@ -111,7 +113,7 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, default: return createStringError( std::make_error_code(std::errc::executable_format_error), - "Unknown record type '%d' at offset %d.", Type, OffsetPtr); + "Unknown record type '%d' at offset %" PRId64 ".", Type, OffsetPtr); } PreReadOffset = OffsetPtr; @@ -119,28 +121,29 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading function id field at offset %d.", OffsetPtr); + "Failed reading function id field at offset %" PRId64 ".", + OffsetPtr); PreReadOffset = OffsetPtr; Record.TSC = Reader.getU64(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading TSC field at offset %d.", OffsetPtr); + "Failed reading TSC field at offset %" PRId64 ".", OffsetPtr); PreReadOffset = OffsetPtr; Record.TId = Reader.getU32(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading thread id field at offset %d.", OffsetPtr); + "Failed reading thread id field at offset %" PRId64 ".", OffsetPtr); PreReadOffset = OffsetPtr; Record.PId = Reader.getU32(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading process id at offset %d.", OffsetPtr); + "Failed reading process id at offset %" PRId64 ".", OffsetPtr); break; } @@ -155,21 +158,23 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading function id field at offset %d.", OffsetPtr); + "Failed reading function id field at offset %" PRId64 ".", + OffsetPtr); PreReadOffset = OffsetPtr; auto TId = Reader.getU32(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading thread id field at offset %d.", OffsetPtr); + "Failed reading thread id field at offset %" PRId64 ".", OffsetPtr); PreReadOffset = OffsetPtr; auto PId = Reader.getU32(&OffsetPtr); if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading process id field at offset %d.", OffsetPtr); + "Failed reading process id field at offset %" PRId64 ".", + OffsetPtr); // Make a check for versions above 3 for the Pid field if (Record.FuncId != FuncId || Record.TId != TId || @@ -178,7 +183,7 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, std::make_error_code(std::errc::executable_format_error), "Corrupted log, found arg payload following non-matching " "function+thread record. Record for function %d != %d at offset " - "%d", + "%" PRId64 ".", Record.FuncId, FuncId, OffsetPtr); PreReadOffset = OffsetPtr; @@ -186,7 +191,8 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, if (OffsetPtr == PreReadOffset) return createStringError( std::make_error_code(std::errc::executable_format_error), - "Failed reading argument payload at offset %d.", OffsetPtr); + "Failed reading argument payload at offset %" PRId64 ".", + OffsetPtr); Record.CallArgs.push_back(Arg); break; @@ -194,7 +200,8 @@ Error loadNaiveFormatLog(StringRef Data, bool IsLittleEndian, default: return createStringError( std::make_error_code(std::errc::executable_format_error), - "Unknown record type '%d' at offset %d.", RecordType, OffsetPtr); + "Unknown record type '%d' at offset %" PRId64 ".", RecordType, + OffsetPtr); } // Advance the offset pointer enough bytes to align to 32-byte records for // basic mode logs. @@ -265,7 +272,7 @@ Error loadFDRLog(StringRef Data, bool IsLittleEndian, "Not enough bytes for an XRay FDR log."); DataExtractor DE(Data, IsLittleEndian, 8); - uint32_t OffsetPtr = 0; + uint64_t OffsetPtr = 0; auto FileHeaderOrError = readBinaryFormatHeader(DE, OffsetPtr); if (!FileHeaderOrError) return FileHeaderOrError.takeError(); @@ -424,7 +431,7 @@ Expected llvm::xray::loadTrace(const DataExtractor &DE, bool Sort) { // Only if we can't load either the binary or the YAML format will we yield an // error. DataExtractor HeaderExtractor(DE.getData(), DE.isLittleEndian(), 8); - uint32_t OffsetPtr = 0; + uint64_t OffsetPtr = 0; uint16_t Version = HeaderExtractor.getU16(&OffsetPtr); uint16_t Type = HeaderExtractor.getU16(&OffsetPtr); diff --git a/tools/bugpoint/BugDriver.h b/tools/bugpoint/BugDriver.h index 75f166b21b2..fe5201eb2e6 100644 --- a/tools/bugpoint/BugDriver.h +++ b/tools/bugpoint/BugDriver.h @@ -217,8 +217,7 @@ public: /// returning the transformed module on success, or a null pointer on failure. std::unique_ptr runPassesOn(Module *M, const std::vector &Passes, - unsigned NumExtraArgs = 0, - const char *const *ExtraArgs = nullptr); + ArrayRef ExtraArgs = {}); /// runPasses - Run the specified passes on Program, outputting a bitcode /// file and writting the filename into OutputFile if successful. If the @@ -231,8 +230,8 @@ public: /// bool runPasses(Module &Program, const std::vector &PassesToRun, std::string &OutputFilename, bool DeleteOutput = false, - bool Quiet = false, unsigned NumExtraArgs = 0, - const char *const *ExtraArgs = nullptr) const; + bool Quiet = false, + ArrayRef ExtraArgs = {}) const; /// runPasses - Just like the method above, but this just returns true or /// false indicating whether or not the optimizer crashed on the specified diff --git a/tools/bugpoint/ExtractFunction.cpp b/tools/bugpoint/ExtractFunction.cpp index 105702de3f1..d9047acd30e 100644 --- a/tools/bugpoint/ExtractFunction.cpp +++ b/tools/bugpoint/ExtractFunction.cpp @@ -407,11 +407,10 @@ BugDriver::extractMappedBlocksFromModule(const std::vector &BBs, std::string uniqueFN = "--extract-blocks-file="; uniqueFN += Temp->TmpName; - const char *ExtraArg = uniqueFN.c_str(); std::vector PI; PI.push_back("extract-blocks"); - std::unique_ptr Ret = runPassesOn(M, PI, 1, &ExtraArg); + std::unique_ptr Ret = runPassesOn(M, PI, {uniqueFN}); if (!Ret) { outs() << "*** Basic Block extraction failed, please report a bug!\n"; diff --git a/tools/bugpoint/OptimizerDriver.cpp b/tools/bugpoint/OptimizerDriver.cpp index 562de795238..64af81fcc8a 100644 --- a/tools/bugpoint/OptimizerDriver.cpp +++ b/tools/bugpoint/OptimizerDriver.cpp @@ -79,7 +79,7 @@ bool BugDriver::writeProgramToFile(int FD, const Module &M) const { bool BugDriver::writeProgramToFile(const std::string &Filename, const Module &M) const { std::error_code EC; - ToolOutputFile Out(Filename, EC, sys::fs::F_None); + ToolOutputFile Out(Filename, EC, sys::fs::OF_None); if (!EC) return writeProgramToFileAux(Out, M); return true; @@ -130,8 +130,7 @@ static cl::list OptArgs("opt-args", cl::Positional, bool BugDriver::runPasses(Module &Program, const std::vector &Passes, std::string &OutputFilename, bool DeleteOutput, - bool Quiet, unsigned NumExtraArgs, - const char *const *ExtraArgs) const { + bool Quiet, ArrayRef ExtraArgs) const { // setup the output file name outs().flush(); SmallString<128> UniqueFilename; @@ -223,8 +222,7 @@ bool BugDriver::runPasses(Module &Program, I != E; ++I) Args.push_back(I->c_str()); Args.push_back(Temp->TmpName.c_str()); - for (unsigned i = 0; i < NumExtraArgs; ++i) - Args.push_back(*ExtraArgs); + Args.append(ExtraArgs.begin(), ExtraArgs.end()); LLVM_DEBUG(errs() << "\nAbout to run:\t"; for (unsigned i = 0, e = Args.size() - 1; i != e; ++i) errs() @@ -268,10 +266,10 @@ bool BugDriver::runPasses(Module &Program, std::unique_ptr BugDriver::runPassesOn(Module *M, const std::vector &Passes, - unsigned NumExtraArgs, const char *const *ExtraArgs) { + ArrayRef ExtraArgs) { std::string BitcodeResult; if (runPasses(*M, Passes, BitcodeResult, false /*delete*/, true /*quiet*/, - NumExtraArgs, ExtraArgs)) { + ExtraArgs)) { return nullptr; } diff --git a/tools/bugpoint/ToolRunner.cpp b/tools/bugpoint/ToolRunner.cpp index da4244345e3..19b2ea2c018 100644 --- a/tools/bugpoint/ToolRunner.cpp +++ b/tools/bugpoint/ToolRunner.cpp @@ -170,7 +170,7 @@ Expected LLI::ExecuteProgram(const std::string &Bitcode, const std::vector &SharedLibs, unsigned Timeout, unsigned MemoryLimit) { std::vector LLIArgs; - LLIArgs.push_back(LLIPath.c_str()); + LLIArgs.push_back(LLIPath); LLIArgs.push_back("-force-interpreter=true"); for (std::vector::const_iterator i = SharedLibs.begin(), @@ -266,15 +266,15 @@ Error CustomCompiler::compileProgram(const std::string &Bitcode, unsigned Timeout, unsigned MemoryLimit) { std::vector ProgramArgs; - ProgramArgs.push_back(CompilerCommand.c_str()); + ProgramArgs.push_back(CompilerCommand); - for (std::size_t i = 0; i < CompilerArgs.size(); ++i) - ProgramArgs.push_back(CompilerArgs.at(i).c_str()); + for (const auto &Arg : CompilerArgs) + ProgramArgs.push_back(Arg); ProgramArgs.push_back(Bitcode); // Add optional parameters to the running program from Argv - for (unsigned i = 0, e = CompilerArgs.size(); i != e; ++i) - ProgramArgs.push_back(CompilerArgs[i].c_str()); + for (const auto &Arg : CompilerArgs) + ProgramArgs.push_back(Arg); if (RunProgramWithTimeout(CompilerCommand, ProgramArgs, "", "", "", Timeout, MemoryLimit)) @@ -559,7 +559,7 @@ Expected JIT::ExecuteProgram(const std::string &Bitcode, unsigned Timeout, unsigned MemoryLimit) { // Construct a vector of parameters, incorporating those from the command-line std::vector JITArgs; - JITArgs.push_back(LLIPath.c_str()); + JITArgs.push_back(LLIPath); JITArgs.push_back("-force-interpreter=false"); // Add any extra LLI args. @@ -570,7 +570,7 @@ Expected JIT::ExecuteProgram(const std::string &Bitcode, JITArgs.push_back("-load"); JITArgs.push_back(SharedLibs[i]); } - JITArgs.push_back(Bitcode.c_str()); + JITArgs.push_back(Bitcode); // Add optional parameters to the running program from Argv for (unsigned i = 0, e = Args.size(); i != e; ++i) JITArgs.push_back(Args[i]); diff --git a/tools/bugpoint/bugpoint.cpp b/tools/bugpoint/bugpoint.cpp index 2d5322a351a..c7644e75ae4 100644 --- a/tools/bugpoint/bugpoint.cpp +++ b/tools/bugpoint/bugpoint.cpp @@ -80,6 +80,10 @@ static cl::opt OptLevelOs( cl::desc( "Like -O2 with extra optimizations for size. Similar to clang -Os")); +static cl::opt +OptLevelOz("Oz", + cl::desc("Like -Os but reduces code size further. Similar to clang -Oz")); + static cl::opt OptLevelO3("O3", cl::desc("Optimization level 3. Identical to 'opt -O3'")); @@ -109,6 +113,26 @@ public: }; } +// This routine adds optimization passes based on selected optimization level, +// OptLevel. +// +// OptLevel - Optimization Level +static void AddOptimizationPasses(legacy::FunctionPassManager &FPM, + unsigned OptLevel, + unsigned SizeLevel) { + PassManagerBuilder Builder; + Builder.OptLevel = OptLevel; + Builder.SizeLevel = SizeLevel; + + if (OptLevel > 1) + Builder.Inliner = createFunctionInliningPass(OptLevel, SizeLevel, false); + else + Builder.Inliner = createAlwaysInlinerLegacyPass(); + + Builder.populateFunctionPassManager(FPM); + Builder.populateModulePassManager(FPM); +} + #ifdef LINK_POLLY_INTO_TOOLS namespace polly { void initializePollyPasses(llvm::PassRegistry &Registry); @@ -189,18 +213,16 @@ int main(int argc, char **argv) { Builder.populateLTOPassManager(PM); } - if (OptLevelO1 || OptLevelO2 || OptLevelO3) { - PassManagerBuilder Builder; - if (OptLevelO1) - Builder.Inliner = createAlwaysInlinerLegacyPass(); - else if (OptLevelOs || OptLevelO2) - Builder.Inliner = createFunctionInliningPass( - 2, OptLevelOs ? 1 : 0, false); - else - Builder.Inliner = createFunctionInliningPass(275); - Builder.populateFunctionPassManager(PM); - Builder.populateModulePassManager(PM); - } + if (OptLevelO1) + AddOptimizationPasses(PM, 1, 0); + else if (OptLevelO2) + AddOptimizationPasses(PM, 2, 0); + else if (OptLevelO3) + AddOptimizationPasses(PM, 3, 0); + else if (OptLevelOs) + AddOptimizationPasses(PM, 2, 1); + else if (OptLevelOz) + AddOptimizationPasses(PM, 2, 2); for (const PassInfo *PI : PassList) D.addPass(PI->getPassArgument()); diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp index 76da843f065..574b15b399c 100644 --- a/tools/llc/llc.cpp +++ b/tools/llc/llc.cpp @@ -239,10 +239,10 @@ static std::unique_ptr GetOutputStream(const char *TargetName, // Open the file. std::error_code EC; - sys::fs::OpenFlags OpenFlags = sys::fs::F_None; + sys::fs::OpenFlags OpenFlags = sys::fs::OF_None; if (!Binary) - OpenFlags |= sys::fs::F_Text; - auto FDOut = llvm::make_unique(OutputFilename, EC, OpenFlags); + OpenFlags |= sys::fs::OF_Text; + auto FDOut = std::make_unique(OutputFilename, EC, OpenFlags); if (EC) { WithColor::error() << EC.message() << '\n'; return nullptr; @@ -329,7 +329,7 @@ int main(int argc, char **argv) { // Set a diagnostic handler that doesn't exit on the first error bool HasError = false; Context.setDiagnosticHandler( - llvm::make_unique(&HasError)); + std::make_unique(&HasError)); Context.setInlineAsmDiagnosticHandler(InlineAsmDiagHandler, &HasError); Expected> RemarksFileOrErr = @@ -479,8 +479,8 @@ static int compileModule(char **argv, LLVMContext &Context) { std::unique_ptr DwoOut; if (!SplitDwarfOutputFile.empty()) { std::error_code EC; - DwoOut = llvm::make_unique(SplitDwarfOutputFile, EC, - sys::fs::F_None); + DwoOut = std::make_unique(SplitDwarfOutputFile, EC, + sys::fs::OF_None); if (EC) { WithColor::error(errs(), argv[0]) << EC.message() << '\n'; return 1; @@ -533,13 +533,14 @@ static int compileModule(char **argv, LLVMContext &Context) { if ((FileType != TargetMachine::CGFT_AssemblyFile && !Out->os().supportsSeeking()) || CompileTwice) { - BOS = make_unique(Buffer); + BOS = std::make_unique(Buffer); OS = BOS.get(); } const char *argv0 = argv[0]; - LLVMTargetMachine &LLVMTM = static_cast(*Target); - MachineModuleInfo *MMI = new MachineModuleInfo(&LLVMTM); + LLVMTargetMachine &LLVMTM = static_cast(*Target); + MachineModuleInfoWrapperPass *MMIWP = + new MachineModuleInfoWrapperPass(&LLVMTM); // Construct a custom pass pipeline that starts after instruction // selection. @@ -559,7 +560,7 @@ static int compileModule(char **argv, LLVMContext &Context) { TPC.setDisableVerify(NoVerify); PM.add(&TPC); - PM.add(MMI); + PM.add(MMIWP); TPC.printAndVerify(""); for (const std::string &RunPassName : *RunPassNames) { if (addPass(PM, argv0, RunPassName, TPC)) @@ -570,7 +571,7 @@ static int compileModule(char **argv, LLVMContext &Context) { PM.add(createFreeMachineFunctionPass()); } else if (Target->addPassesToEmitFile(PM, *OS, DwoOut ? &DwoOut->os() : nullptr, - FileType, NoVerify, MMI)) { + FileType, NoVerify, MMIWP)) { WithColor::warning(errs(), argv[0]) << "target does not support generation of this" << " file type!\n"; @@ -578,8 +579,8 @@ static int compileModule(char **argv, LLVMContext &Context) { } if (MIR) { - assert(MMI && "Forgot to create MMI?"); - if (MIR->parseMachineFunctions(*M, *MMI)) + assert(MMIWP && "Forgot to create MMIWP?"); + if (MIR->parseMachineFunctions(*M, MMIWP->getMMI())) return 1; } diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp index 8c8cd88c971..ccad0672141 100644 --- a/tools/lli/lli.cpp +++ b/tools/lli/lli.cpp @@ -251,7 +251,7 @@ public: sys::fs::create_directories(Twine(dir)); } std::error_code EC; - raw_fd_ostream outfile(CacheName, EC, sys::fs::F_None); + raw_fd_ostream outfile(CacheName, EC, sys::fs::OF_None); outfile.write(Obj.getBufferStart(), Obj.getBufferSize()); outfile.close(); } @@ -308,7 +308,7 @@ static void addCygMingExtraModule(ExecutionEngine &EE, LLVMContext &Context, Triple TargetTriple(TargetTripleStr); // Create a new module. - std::unique_ptr M = make_unique("CygMingHelper", Context); + std::unique_ptr M = std::make_unique("CygMingHelper", Context); M->setTargetTriple(TargetTripleStr); // Create an empty function named "__main". @@ -695,18 +695,16 @@ int main(int argc, char **argv, char * const *envp) { return Result; } -static orc::IRTransformLayer::TransformFunction createDebugDumper() { +static std::function createDebugDumper() { switch (OrcDumpKind) { case DumpKind::NoDump: - return [](orc::ThreadSafeModule TSM, - const orc::MaterializationResponsibility &R) { return TSM; }; + return [](Module &M) {}; case DumpKind::DumpFuncsToStdOut: - return [](orc::ThreadSafeModule TSM, - const orc::MaterializationResponsibility &R) { + return [](Module &M) { printf("[ "); - for (const auto &F : *TSM.getModule()) { + for (const auto &F : M) { if (F.isDeclaration()) continue; @@ -718,31 +716,23 @@ static orc::IRTransformLayer::TransformFunction createDebugDumper() { } printf("]\n"); - return TSM; }; case DumpKind::DumpModsToStdOut: - return [](orc::ThreadSafeModule TSM, - const orc::MaterializationResponsibility &R) { - outs() << "----- Module Start -----\n" - << *TSM.getModule() << "----- Module End -----\n"; - - return TSM; + return [](Module &M) { + outs() << "----- Module Start -----\n" << M << "----- Module End -----\n"; }; case DumpKind::DumpModsToDisk: - return [](orc::ThreadSafeModule TSM, - const orc::MaterializationResponsibility &R) { + return [](Module &M) { std::error_code EC; - raw_fd_ostream Out(TSM.getModule()->getModuleIdentifier() + ".ll", EC, - sys::fs::F_Text); + raw_fd_ostream Out(M.getModuleIdentifier() + ".ll", EC, sys::fs::OF_Text); if (EC) { - errs() << "Couldn't open " << TSM.getModule()->getModuleIdentifier() + errs() << "Couldn't open " << M.getModuleIdentifier() << " for dumping.\nError:" << EC.message() << "\n"; exit(1); } - Out << *TSM.getModule(); - return TSM; + Out << M; }; } llvm_unreachable("Unknown DumpKind"); @@ -754,14 +744,13 @@ int runOrcLazyJIT(const char *ProgName) { // Start setting up the JIT environment. // Parse the main module. - orc::ThreadSafeContext TSCtx(llvm::make_unique()); + orc::ThreadSafeContext TSCtx(std::make_unique()); SMDiagnostic Err; - auto MainModule = orc::ThreadSafeModule( - parseIRFile(InputFile, Err, *TSCtx.getContext()), TSCtx); + auto MainModule = parseIRFile(InputFile, Err, *TSCtx.getContext()); if (!MainModule) reportError(Err, ProgName); - const auto &TT = MainModule.getModule()->getTargetTriple(); + const auto &TT = MainModule->getTargetTriple(); orc::LLLazyJITBuilder Builder; Builder.setJITTargetMachineBuilder( @@ -794,13 +783,16 @@ int runOrcLazyJIT(const char *ProgName) { J->setLazyCompileTransform([&](orc::ThreadSafeModule TSM, const orc::MaterializationResponsibility &R) { - if (verifyModule(*TSM.getModule(), &dbgs())) { - dbgs() << "Bad module: " << *TSM.getModule() << "\n"; - exit(1); - } - return Dump(std::move(TSM), R); + TSM.withModuleDo([&](Module &M) { + if (verifyModule(M, &dbgs())) { + dbgs() << "Bad module: " << &M << "\n"; + exit(1); + } + Dump(M); + }); + return TSM; }); - J->getMainJITDylib().setGenerator( + J->getMainJITDylib().addGenerator( ExitOnErr(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess( J->getDataLayout().getGlobalPrefix()))); @@ -809,7 +801,8 @@ int runOrcLazyJIT(const char *ProgName) { ExitOnErr(CXXRuntimeOverrides.enable(J->getMainJITDylib(), Mangle)); // Add the main module. - ExitOnErr(J->addLazyIRModule(std::move(MainModule))); + ExitOnErr( + J->addLazyIRModule(orc::ThreadSafeModule(std::move(MainModule), TSCtx))); // Create JITDylibs and add any extra modules. { @@ -839,6 +832,16 @@ int runOrcLazyJIT(const char *ProgName) { ExitOnErr( J->addLazyIRModule(JD, orc::ThreadSafeModule(std::move(M), TSCtx))); } + + for (auto EAItr = ExtraArchives.begin(), EAEnd = ExtraArchives.end(); + EAItr != EAEnd; ++EAItr) { + auto EAIdx = ExtraArchives.getPosition(EAItr - ExtraArchives.begin()); + assert(EAIdx != 0 && "ExtraArchive should have index > 0"); + auto JDItr = std::prev(IdxToDylib.lower_bound(EAIdx)); + auto &JD = *JDItr->second; + JD.addGenerator(ExitOnErr(orc::StaticLibraryDefinitionGenerator::Load( + J->getObjLinkingLayer(), EAItr->c_str()))); + } } // Add the objects. @@ -959,6 +962,6 @@ std::unique_ptr launchRemote() { close(PipeFD[1][1]); // Return an RPC channel connected to our end of the pipes. - return llvm::make_unique(PipeFD[1][0], PipeFD[0][1]); + return std::make_unique(PipeFD[1][0], PipeFD[0][1]); #endif } diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp index 91746d0fab3..c9cf217f768 100644 --- a/tools/llvm-ar/llvm-ar.cpp +++ b/tools/llvm-ar/llvm-ar.cpp @@ -43,6 +43,11 @@ #include #endif +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#endif + using namespace llvm; // The name this program was invoked as. @@ -70,14 +75,14 @@ USAGE: llvm-ar [options] [-][modifiers] [relpos] [count] [f llvm-ar -M [ - Ignored for compatibility - --help - Display available options - --version - Display the version of this program + --plugin= - ignored for compatibility + -h --help - display this help and exit + --version - print the version and exit @ - read options from OPERATIONS: @@ -95,11 +100,13 @@ MODIFIERS: [b] - put [files] before [relpos] (same as [i]) [c] - do not warn if archive had to be created [D] - use zero for timestamps and uids/gids (default) + [h] - display this help and exit [i] - put [files] before [relpos] (same as [b]) [l] - ignored for compatibility [L] - add archive's contents [N] - use instance [count] of name [o] - preserve original dates + [O] - display member offsets [P] - use full names when matching (implied for thin archives) [s] - create an archive index (cf. ranlib) [S] - do not build a symbol table @@ -107,6 +114,7 @@ MODIFIERS: [u] - update only [files] newer than archive contents [U] - use actual timestamps and uids/gids [v] - be verbose about actions taken + [V] - display the version and exit )"; void printHelpMessage() { @@ -116,10 +124,19 @@ void printHelpMessage() { outs() << ArHelp; } +static unsigned MRILineNumber; +static bool ParsingMRIScript; + // Show the error message and exit. LLVM_ATTRIBUTE_NORETURN static void fail(Twine Error) { - WithColor::error(errs(), ToolName) << Error << ".\n"; - printHelpMessage(); + if (ParsingMRIScript) { + WithColor::error(errs(), ToolName) + << "script line " << MRILineNumber << ": " << Error << "\n"; + } else { + WithColor::error(errs(), ToolName) << Error << "\n"; + printHelpMessage(); + } + exit(1); } @@ -171,17 +188,18 @@ enum ArchiveOperation { }; // Modifiers to follow operation to vary behavior -static bool AddAfter = false; ///< 'a' modifier -static bool AddBefore = false; ///< 'b' modifier -static bool Create = false; ///< 'c' modifier -static bool OriginalDates = false; ///< 'o' modifier -static bool CompareFullPath = false; ///< 'P' modifier -static bool OnlyUpdate = false; ///< 'u' modifier -static bool Verbose = false; ///< 'v' modifier -static bool Symtab = true; ///< 's' modifier -static bool Deterministic = true; ///< 'D' and 'U' modifiers -static bool Thin = false; ///< 'T' modifier -static bool AddLibrary = false; ///< 'L' modifier +static bool AddAfter = false; ///< 'a' modifier +static bool AddBefore = false; ///< 'b' modifier +static bool Create = false; ///< 'c' modifier +static bool OriginalDates = false; ///< 'o' modifier +static bool DisplayMemberOffsets = false; ///< 'O' modifier +static bool CompareFullPath = false; ///< 'P' modifier +static bool OnlyUpdate = false; ///< 'u' modifier +static bool Verbose = false; ///< 'v' modifier +static bool Symtab = true; ///< 's' modifier +static bool Deterministic = true; ///< 'D' and 'U' modifiers +static bool Thin = false; ///< 'T' modifier +static bool AddLibrary = false; ///< 'L' modifier // Relative Positional Argument (for insert/move). This variable holds // the name of the archive member to which the 'a', 'b' or 'i' modifier @@ -198,6 +216,9 @@ static int CountParam = 0; // command line. static std::string ArchiveName; +static std::vector> ArchiveBuffers; +static std::vector> Archives; + // This variable holds the list of member files to proecess, as given // on the command line. static std::vector Members; @@ -209,7 +230,7 @@ static BumpPtrAllocator Alloc; // associated with a, b, and i modifiers static void getRelPos() { if (PositionalArgs.empty()) - fail("Expected [relpos] for a, b, or i modifier"); + fail("expected [relpos] for 'a', 'b', or 'i' modifier"); RelPos = PositionalArgs[0]; PositionalArgs.erase(PositionalArgs.begin()); } @@ -218,40 +239,31 @@ static void getRelPos() { // associated with the N modifier static void getCountParam() { if (PositionalArgs.empty()) - fail("Expected [count] for N modifier"); + fail("expected [count] for 'N' modifier"); auto CountParamArg = StringRef(PositionalArgs[0]); if (CountParamArg.getAsInteger(10, CountParam)) - fail("Value for [count] must be numeric, got: " + CountParamArg); + fail("value for [count] must be numeric, got: " + CountParamArg); if (CountParam < 1) - fail("Value for [count] must be positive, got: " + CountParamArg); + fail("value for [count] must be positive, got: " + CountParamArg); PositionalArgs.erase(PositionalArgs.begin()); } // Get the archive file name from the command line static void getArchive() { if (PositionalArgs.empty()) - fail("An archive name must be specified"); + fail("an archive name must be specified"); ArchiveName = PositionalArgs[0]; PositionalArgs.erase(PositionalArgs.begin()); } -// Copy over remaining items in PositionalArgs to our Members vector -static void getMembers() { - for (auto &Arg : PositionalArgs) - Members.push_back(Arg); -} - -std::vector> ArchiveBuffers; -std::vector> Archives; - static object::Archive &readLibrary(const Twine &Library) { auto BufOrErr = MemoryBuffer::getFile(Library, -1, false); - failIfError(BufOrErr.getError(), "Could not open library " + Library); + failIfError(BufOrErr.getError(), "could not open library " + Library); ArchiveBuffers.push_back(std::move(*BufOrErr)); auto LibOrErr = object::Archive::create(ArchiveBuffers.back()->getMemBufferRef()); failIfError(errorToErrorCode(LibOrErr.takeError()), - "Could not parse library"); + "could not parse library"); Archives.push_back(std::move(*LibOrErr)); return *Archives.back(); } @@ -264,7 +276,7 @@ static void runMRIScript(); static ArchiveOperation parseCommandLine() { if (MRI) { if (!PositionalArgs.empty() || !Options.empty()) - fail("Cannot mix -M and other options"); + fail("cannot mix -M and other options"); runMRIScript(); } @@ -319,6 +331,9 @@ static ArchiveOperation parseCommandLine() { case 'o': OriginalDates = true; break; + case 'O': + DisplayMemberOffsets = true; + break; case 'P': CompareFullPath = true; break; @@ -367,6 +382,12 @@ static ArchiveOperation parseCommandLine() { case 'L': AddLibrary = true; break; + case 'V': + cl::PrintVersionMessage(); + exit(0); + case 'h': + printHelpMessage(); + exit(0); default: fail(std::string("unknown option ") + Options[i]); } @@ -377,37 +398,37 @@ static ArchiveOperation parseCommandLine() { getArchive(); // Everything on the command line at this point is a member. - getMembers(); + Members.assign(PositionalArgs.begin(), PositionalArgs.end()); if (NumOperations == 0 && MaybeJustCreateSymTab) { NumOperations = 1; Operation = CreateSymTab; if (!Members.empty()) - fail("The s operation takes only an archive as argument"); + fail("the 's' operation takes only an archive as argument"); } // Perform various checks on the operation/modifier specification // to make sure we are dealing with a legal request. if (NumOperations == 0) - fail("You must specify at least one of the operations"); + fail("you must specify at least one of the operations"); if (NumOperations > 1) - fail("Only one operation may be specified"); + fail("only one operation may be specified"); if (NumPositional > 1) - fail("You may only specify one of a, b, and i modifiers"); + fail("you may only specify one of 'a', 'b', and 'i' modifiers"); if (AddAfter || AddBefore) if (Operation != Move && Operation != ReplaceOrInsert) - fail("The 'a', 'b' and 'i' modifiers can only be specified with " + fail("the 'a', 'b' and 'i' modifiers can only be specified with " "the 'm' or 'r' operations"); if (CountParam) if (Operation != Extract && Operation != Delete) - fail("The 'N' modifier can only be specified with the 'x' or 'd' " + fail("the 'N' modifier can only be specified with the 'x' or 'd' " "operations"); if (OriginalDates && Operation != Extract) - fail("The 'o' modifier is only applicable to the 'x' operation"); + fail("the 'o' modifier is only applicable to the 'x' operation"); if (OnlyUpdate && Operation != ReplaceOrInsert) - fail("The 'u' modifier is only applicable to the 'r' operation"); + fail("the 'u' modifier is only applicable to the 'r' operation"); if (AddLibrary && Operation != QuickAppend) - fail("The 'L' modifier is only applicable to the 'q' operation"); + fail("the 'L' modifier is only applicable to the 'q' operation"); // Return the parsed operation to the caller return Operation; @@ -470,12 +491,35 @@ static void doDisplayTable(StringRef Name, const object::Archive::Child &C) { if (!ParentDir.empty()) outs() << sys::path::convert_to_slash(ParentDir) << '/'; } + outs() << Name; + } else { + outs() << Name; + if (DisplayMemberOffsets) + outs() << " 0x" << utohexstr(C.getDataOffset(), true); } - outs() << Name << "\n"; + outs() << '\n'; } -static StringRef normalizePath(StringRef Path) { - return CompareFullPath ? Path : sys::path::filename(Path); +static std::string normalizePath(StringRef Path) { + return CompareFullPath ? sys::path::convert_to_slash(Path) + : std::string(sys::path::filename(Path)); +} + +static bool comparePaths(StringRef Path1, StringRef Path2) { +// When on Windows this function calls CompareStringOrdinal +// as Windows file paths are case-insensitive. +// CompareStringOrdinal compares two Unicode strings for +// binary equivalence and allows for case insensitivity. +#ifdef _WIN32 + SmallVector WPath1, WPath2; + failIfError(sys::path::widenPath(normalizePath(Path1), WPath1)); + failIfError(sys::path::widenPath(normalizePath(Path2), WPath2)); + + return CompareStringOrdinal(WPath1.data(), WPath1.size(), WPath2.data(), + WPath2.size(), true) == CSTR_EQUAL; +#else + return normalizePath(Path1) == normalizePath(Path2); +#endif } // Implement the 'x' operation. This function extracts files back to the file @@ -489,7 +533,7 @@ static void doExtract(StringRef Name, const object::Archive::Child &C) { int FD; failIfError(sys::fs::openFileForWrite(sys::path::filename(Name), FD, sys::fs::CD_CreateAlways, - sys::fs::F_None, Mode), + sys::fs::OF_None, Mode), Name); { @@ -551,7 +595,7 @@ static void performReadOperation(ArchiveOperation Operation, if (Filter) { auto I = find_if(Members, [Name](StringRef Path) { - return Name == normalizePath(Path); + return comparePaths(Name, Path); }); if (I == Members.end()) continue; @@ -588,7 +632,7 @@ static void addChildMember(std::vector &Members, const object::Archive::Child &M, bool FlattenArchive = false) { if (Thin && !M.getParent()->isThin()) - fail("Cannot convert a regular archive to a thin one"); + fail("cannot convert a regular archive to a thin one"); Expected NMOrErr = NewArchiveMember::getOldMember(M, Deterministic); failIfError(NMOrErr.takeError()); @@ -681,7 +725,7 @@ static InsertAction computeInsertAction(ArchiveOperation Operation, if (Operation == QuickAppend || Members.empty()) return IA_AddOldMember; auto MI = find_if( - Members, [Name](StringRef Path) { return Name == normalizePath(Path); }); + Members, [Name](StringRef Path) { return comparePaths(Name, Path); }); if (MI == Members.end()) return IA_AddOldMember; @@ -698,9 +742,8 @@ static InsertAction computeInsertAction(ArchiveOperation Operation, return IA_MoveOldMember; if (Operation == ReplaceOrInsert) { - StringRef PosName = normalizePath(RelPos); if (!OnlyUpdate) { - if (PosName.empty()) + if (RelPos.empty()) return IA_AddNewMember; return IA_MoveNewMember; } @@ -712,12 +755,12 @@ static InsertAction computeInsertAction(ArchiveOperation Operation, auto ModTimeOrErr = Member.getLastModified(); failIfError(ModTimeOrErr.takeError()); if (Status.getLastModificationTime() < ModTimeOrErr.get()) { - if (PosName.empty()) + if (RelPos.empty()) return IA_AddOldMember; return IA_MoveOldMember; } - if (PosName.empty()) + if (RelPos.empty()) return IA_AddNewMember; return IA_MoveNewMember; } @@ -732,7 +775,6 @@ computeNewArchiveMembers(ArchiveOperation Operation, std::vector Ret; std::vector Moved; int InsertPos = -1; - StringRef PosName = normalizePath(RelPos); if (OldArchive) { Error Err = Error::success(); StringMap MemberCount; @@ -740,8 +782,8 @@ computeNewArchiveMembers(ArchiveOperation Operation, int Pos = Ret.size(); Expected NameOrErr = Child.getName(); failIfError(NameOrErr.takeError()); - StringRef Name = NameOrErr.get(); - if (Name == PosName) { + std::string Name = NameOrErr.get(); + if (comparePaths(Name, RelPos)) { assert(AddAfter || AddBefore); if (AddBefore) InsertPos = Pos; @@ -783,7 +825,7 @@ computeNewArchiveMembers(ArchiveOperation Operation, return Ret; if (!RelPos.empty() && InsertPos == -1) - fail("Insertion point not found"); + fail("insertion point not found"); if (RelPos.empty()) InsertPos = Ret.size(); @@ -859,12 +901,12 @@ static void performWriteOperation(ArchiveOperation Operation, break; case BSD: if (Thin) - fail("Only the gnu format has a thin mode"); + fail("only the gnu format has a thin mode"); Kind = object::Archive::K_BSD; break; case DARWIN: if (Thin) - fail("Only the gnu format has a thin mode"); + fail("only the gnu format has a thin mode"); Kind = object::Archive::K_DARWIN; break; case Unknown: @@ -922,14 +964,12 @@ static int performOperation(ArchiveOperation Operation, MemoryBuffer::getFile(ArchiveName, -1, false); std::error_code EC = Buf.getError(); if (EC && EC != errc::no_such_file_or_directory) - fail("error opening '" + ArchiveName + "': " + EC.message() + "!"); + fail("error opening '" + ArchiveName + "': " + EC.message()); if (!EC) { Error Err = Error::success(); object::Archive Archive(Buf.get()->getMemBufferRef(), Err); - EC = errorToErrorCode(std::move(Err)); - failIfError(EC, - "error loading '" + ArchiveName + "': " + EC.message() + "!"); + failIfError(std::move(Err), "unable to load '" + ArchiveName + "'"); if (Archive.isThin()) CompareFullPath = true; performOperation(Operation, &Archive, std::move(Buf.get()), NewMembers); @@ -960,8 +1000,10 @@ static void runMRIScript() { const MemoryBuffer &Ref = *Buf.get(); bool Saved = false; std::vector NewMembers; + ParsingMRIScript = true; for (line_iterator I(Ref, /*SkipBlanks*/ false), E; I != E; ++I) { + ++MRILineNumber; StringRef Line = *I; Line = Line.split(';').first; Line = Line.split('*').first; @@ -1003,15 +1045,15 @@ static void runMRIScript() { case MRICommand::Create: Create = true; if (!ArchiveName.empty()) - fail("Editing multiple archives not supported"); + fail("editing multiple archives not supported"); if (Saved) - fail("File already saved"); + fail("file already saved"); ArchiveName = Rest; break; case MRICommand::Delete: { - StringRef Name = normalizePath(Rest); - llvm::erase_if(NewMembers, - [=](NewArchiveMember &M) { return M.MemberName == Name; }); + llvm::erase_if(NewMembers, [=](NewArchiveMember &M) { + return comparePaths(M.MemberName, Rest); + }); break; } case MRICommand::Save: @@ -1020,10 +1062,12 @@ static void runMRIScript() { case MRICommand::End: break; case MRICommand::Invalid: - fail("Unknown command: " + CommandStr); + fail("unknown command: " + CommandStr); } } - + + ParsingMRIScript = false; + // Nothing to do if not saved. if (Saved) performOperation(ReplaceOrInsert, &NewMembers); @@ -1108,7 +1152,7 @@ static int ranlib_main(int argc, char **argv) { return 0; } else { if (ArchiveSpecified) - fail("Exactly one archive should be specified"); + fail("exactly one archive should be specified"); ArchiveSpecified = true; ArchiveName = argv[i]; } @@ -1136,5 +1180,5 @@ int main(int argc, char **argv) { if (Stem.contains_lower("ar")) return ar_main(argc, argv); - fail("Not ranlib, ar, lib or dlltool!"); + fail("not ranlib, ar, lib or dlltool"); } diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp index 234fef907a3..c9f50e38fc6 100644 --- a/tools/llvm-as/llvm-as.cpp +++ b/tools/llvm-as/llvm-as.cpp @@ -82,7 +82,7 @@ static void WriteOutputFile(const Module *M, const ModuleSummaryIndex *Index) { std::error_code EC; std::unique_ptr Out( - new ToolOutputFile(OutputFilename, EC, sys::fs::F_None)); + new ToolOutputFile(OutputFilename, EC, sys::fs::OF_None)); if (EC) { errs() << EC.message() << '\n'; exit(1); diff --git a/tools/llvm-cov/CodeCoverage.cpp b/tools/llvm-cov/CodeCoverage.cpp index f707e3c7ab5..7151cfb032f 100644 --- a/tools/llvm-cov/CodeCoverage.cpp +++ b/tools/llvm-cov/CodeCoverage.cpp @@ -712,15 +712,15 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) { // Create the function filters if (!NameFilters.empty() || NameWhitelist || !NameRegexFilters.empty()) { - auto NameFilterer = llvm::make_unique(); + auto NameFilterer = std::make_unique(); for (const auto &Name : NameFilters) - NameFilterer->push_back(llvm::make_unique(Name)); + NameFilterer->push_back(std::make_unique(Name)); if (NameWhitelist) NameFilterer->push_back( - llvm::make_unique(*NameWhitelist)); + std::make_unique(*NameWhitelist)); for (const auto &Regex : NameRegexFilters) NameFilterer->push_back( - llvm::make_unique(Regex)); + std::make_unique(Regex)); Filters.push_back(std::move(NameFilterer)); } @@ -728,18 +728,18 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) { RegionCoverageGtFilter.getNumOccurrences() || LineCoverageLtFilter.getNumOccurrences() || LineCoverageGtFilter.getNumOccurrences()) { - auto StatFilterer = llvm::make_unique(); + auto StatFilterer = std::make_unique(); if (RegionCoverageLtFilter.getNumOccurrences()) - StatFilterer->push_back(llvm::make_unique( + StatFilterer->push_back(std::make_unique( RegionCoverageFilter::LessThan, RegionCoverageLtFilter)); if (RegionCoverageGtFilter.getNumOccurrences()) - StatFilterer->push_back(llvm::make_unique( + StatFilterer->push_back(std::make_unique( RegionCoverageFilter::GreaterThan, RegionCoverageGtFilter)); if (LineCoverageLtFilter.getNumOccurrences()) - StatFilterer->push_back(llvm::make_unique( + StatFilterer->push_back(std::make_unique( LineCoverageFilter::LessThan, LineCoverageLtFilter)); if (LineCoverageGtFilter.getNumOccurrences()) - StatFilterer->push_back(llvm::make_unique( + StatFilterer->push_back(std::make_unique( RegionCoverageFilter::GreaterThan, LineCoverageGtFilter)); Filters.push_back(std::move(StatFilterer)); } @@ -747,7 +747,7 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) { // Create the ignore filename filters. for (const auto &RE : IgnoreFilenameRegexFilters) IgnoreFilenameFilters.push_back( - llvm::make_unique(RE)); + std::make_unique(RE)); if (!Arches.empty()) { for (const std::string &Arch : Arches) { @@ -1040,7 +1040,7 @@ int CodeCoverageTool::doExport(int argc, const char **argv, switch (ViewOpts.Format) { case CoverageViewOptions::OutputFormat::Text: - Exporter = llvm::make_unique(*Coverage.get(), + Exporter = std::make_unique(*Coverage.get(), ViewOpts, outs()); break; case CoverageViewOptions::OutputFormat::HTML: @@ -1048,7 +1048,7 @@ int CodeCoverageTool::doExport(int argc, const char **argv, // above. llvm_unreachable("Export in HTML is not supported!"); case CoverageViewOptions::OutputFormat::Lcov: - Exporter = llvm::make_unique(*Coverage.get(), + Exporter = std::make_unique(*Coverage.get(), ViewOpts, outs()); break; } diff --git a/tools/llvm-cov/SourceCoverageView.cpp b/tools/llvm-cov/SourceCoverageView.cpp index 616f667e2c8..0e20ea63cd6 100644 --- a/tools/llvm-cov/SourceCoverageView.cpp +++ b/tools/llvm-cov/SourceCoverageView.cpp @@ -76,9 +76,9 @@ std::unique_ptr CoveragePrinter::create(const CoverageViewOptions &Opts) { switch (Opts.Format) { case CoverageViewOptions::OutputFormat::Text: - return llvm::make_unique(Opts); + return std::make_unique(Opts); case CoverageViewOptions::OutputFormat::HTML: - return llvm::make_unique(Opts); + return std::make_unique(Opts); case CoverageViewOptions::OutputFormat::Lcov: // Unreachable because CodeCoverage.cpp should terminate with an error // before we get here. @@ -141,10 +141,10 @@ SourceCoverageView::create(StringRef SourceName, const MemoryBuffer &File, CoverageData &&CoverageInfo) { switch (Options.Format) { case CoverageViewOptions::OutputFormat::Text: - return llvm::make_unique( + return std::make_unique( SourceName, File, Options, std::move(CoverageInfo)); case CoverageViewOptions::OutputFormat::HTML: - return llvm::make_unique( + return std::make_unique( SourceName, File, Options, std::move(CoverageInfo)); case CoverageViewOptions::OutputFormat::Lcov: // Unreachable because CodeCoverage.cpp should terminate with an error diff --git a/tools/llvm-cov/TestingSupport.cpp b/tools/llvm-cov/TestingSupport.cpp index 3ee318c9c64..b99bd83157d 100644 --- a/tools/llvm-cov/TestingSupport.cpp +++ b/tools/llvm-cov/TestingSupport.cpp @@ -8,6 +8,7 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/raw_ostream.h" @@ -50,8 +51,13 @@ int convertForTestingMain(int argc, const char *argv[]) { auto ObjFormat = OF->getTripleObjectFormat(); for (const auto &Section : OF->sections()) { StringRef Name; - if (Section.getName(Name)) + if (Expected NameOrErr = Section.getName()) { + Name = *NameOrErr; + } else { + consumeError(NameOrErr.takeError()); return 1; + } + if (Name == llvm::getInstrProfSectionName(IPSK_name, ObjFormat, /*AddSegmentInfo=*/false)) { ProfileNames = Section; @@ -94,7 +100,7 @@ int convertForTestingMain(int argc, const char *argv[]) { encodeULEB128(ProfileNamesAddress, OS); OS << ProfileNamesData; // Coverage mapping data is expected to have an alignment of 8. - for (unsigned Pad = OffsetToAlignment(OS.tell(), 8); Pad; --Pad) + for (unsigned Pad = offsetToAlignment(OS.tell(), Align(8)); Pad; --Pad) OS.write(uint8_t(0)); OS << CoverageMappingData; diff --git a/tools/llvm-cxxdump/llvm-cxxdump.cpp b/tools/llvm-cxxdump/llvm-cxxdump.cpp index 83331265578..03e1bab9417 100644 --- a/tools/llvm-cxxdump/llvm-cxxdump.cpp +++ b/tools/llvm-cxxdump/llvm-cxxdump.cpp @@ -174,7 +174,11 @@ static void dumpCXXData(const ObjectFile *Obj) { SectionRelocMap.clear(); for (const SectionRef &Section : Obj->sections()) { - section_iterator Sec2 = Section.getRelocatedSection(); + Expected ErrOrSec = Section.getRelocatedSection(); + if (!ErrOrSec) + error(ErrOrSec.takeError()); + + section_iterator Sec2 = *ErrOrSec; if (Sec2 != Obj->section_end()) SectionRelocMap[*Sec2].push_back(Section); } diff --git a/tools/llvm-cxxmap/llvm-cxxmap.cpp b/tools/llvm-cxxmap/llvm-cxxmap.cpp index 87d4d06bbc9..b53a6364c89 100644 --- a/tools/llvm-cxxmap/llvm-cxxmap.cpp +++ b/tools/llvm-cxxmap/llvm-cxxmap.cpp @@ -145,7 +145,7 @@ int main(int argc, const char *argv[]) { exitWithErrorCode(RemappingBufOrError.getError(), RemappingFile); std::error_code EC; - raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::F_Text); + raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_Text); if (EC) exitWithErrorCode(EC, OutputFilename); diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp index 3f337b874b1..d66299cbf76 100644 --- a/tools/llvm-dis/llvm-dis.cpp +++ b/tools/llvm-dis/llvm-dis.cpp @@ -153,7 +153,7 @@ int main(int argc, char **argv) { LLVMContext Context; Context.setDiagnosticHandler( - llvm::make_unique(argv[0])); + std::make_unique(argv[0])); cl::ParseCommandLineOptions(argc, argv, "llvm .bc -> .ll disassembler\n"); std::unique_ptr MB = @@ -186,7 +186,7 @@ int main(int argc, char **argv) { std::error_code EC; std::unique_ptr Out( - new ToolOutputFile(OutputFilename, EC, sys::fs::F_None)); + new ToolOutputFile(OutputFilename, EC, sys::fs::OF_Text)); if (EC) { errs() << EC.message() << '\n'; return 1; diff --git a/tools/llvm-dwarfdump/Statistics.cpp b/tools/llvm-dwarfdump/Statistics.cpp index f26369b935c..c29ad783a9e 100644 --- a/tools/llvm-dwarfdump/Statistics.cpp +++ b/tools/llvm-dwarfdump/Statistics.cpp @@ -5,11 +5,18 @@ #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Support/JSON.h" #define DEBUG_TYPE "dwarfdump" using namespace llvm; using namespace object; +/// This represents the number of categories of debug location coverage being +/// calculated. The first category is the number of variables with 0% location +/// coverage, but the last category is the number of variables with 100% +/// location coverage. +constexpr int NumOfCoverageCategories = 12; + /// Holds statistics for one function (or other entity that has a PC range and /// contains variables, such as a compile unit). struct PerFunctionStats { @@ -43,9 +50,9 @@ struct PerFunctionStats { unsigned NumVars = 0; /// Number of variables with source location. unsigned NumVarSourceLocations = 0; - /// Number of variables wtih type. + /// Number of variables with type. unsigned NumVarTypes = 0; - /// Number of variables wtih DW_AT_location. + /// Number of variables with DW_AT_location. unsigned NumVarLocations = 0; }; @@ -56,16 +63,74 @@ struct GlobalStats { /// Total number of PC range bytes in each variable's enclosing scope, /// starting from the first definition of the variable. unsigned ScopeBytesFromFirstDefinition = 0; - /// Total number of call site entries (DW_TAG_call_site) or - /// (DW_AT_call_file & DW_AT_call_line). + /// Total number of PC range bytes covered by DW_AT_locations with + /// the debug entry values (DW_OP_entry_value). + unsigned ScopeEntryValueBytesCovered = 0; + /// Total number of PC range bytes covered by DW_AT_locations of + /// formal parameters. + unsigned ParamScopeBytesCovered = 0; + /// Total number of PC range bytes in each variable's enclosing scope, + /// starting from the first definition of the variable (only for parameters). + unsigned ParamScopeBytesFromFirstDefinition = 0; + /// Total number of PC range bytes covered by DW_AT_locations with + /// the debug entry values (DW_OP_entry_value) (only for parameters). + unsigned ParamScopeEntryValueBytesCovered = 0; + /// Total number of PC range bytes covered by DW_AT_locations (only for local + /// variables). + unsigned VarScopeBytesCovered = 0; + /// Total number of PC range bytes in each variable's enclosing scope, + /// starting from the first definition of the variable (only for local + /// variables). + unsigned VarScopeBytesFromFirstDefinition = 0; + /// Total number of PC range bytes covered by DW_AT_locations with + /// the debug entry values (DW_OP_entry_value) (only for local variables). + unsigned VarScopeEntryValueBytesCovered = 0; + /// Total number of call site entries (DW_AT_call_file & DW_AT_call_line). unsigned CallSiteEntries = 0; + /// Total number of call site DIEs (DW_TAG_call_site). + unsigned CallSiteDIEs = 0; + /// Total number of call site parameter DIEs (DW_TAG_call_site_parameter). + unsigned CallSiteParamDIEs = 0; /// Total byte size of concrete functions. This byte size includes /// inline functions contained in the concrete functions. - uint64_t FunctionSize = 0; + unsigned FunctionSize = 0; /// Total byte size of inlined functions. This is the total number of bytes /// for the top inline functions within concrete functions. This can help /// tune the inline settings when compiling to match user expectations. - uint64_t InlineFunctionSize = 0; + unsigned InlineFunctionSize = 0; +}; + +/// Holds accumulated debug location statistics about local variables and +/// formal parameters. +struct LocationStats { + /// Map the scope coverage decile to the number of variables in the decile. + /// The first element of the array (at the index zero) represents the number + /// of variables with the no debug location at all, but the last element + /// in the vector represents the number of fully covered variables within + /// its scope. + std::vector VarParamLocStats{ + std::vector(NumOfCoverageCategories, 0)}; + /// Map non debug entry values coverage. + std::vector VarParamNonEntryValLocStats{ + std::vector(NumOfCoverageCategories, 0)}; + /// The debug location statistics for formal parameters. + std::vector ParamLocStats{ + std::vector(NumOfCoverageCategories, 0)}; + /// Map non debug entry values coverage for formal parameters. + std::vector ParamNonEntryValLocStats{ + std::vector(NumOfCoverageCategories, 0)}; + /// The debug location statistics for local variables. + std::vector VarLocStats{ + std::vector(NumOfCoverageCategories, 0)}; + /// Map non debug entry values coverage for local variables. + std::vector VarNonEntryValLocStats{ + std::vector(NumOfCoverageCategories, 0)}; + /// Total number of local variables and function parameters processed. + unsigned NumVarParam = 0; + /// Total number of formal parameters processed. + unsigned NumParam = 0; + /// Total number of local variables processed. + unsigned NumVar = 0; }; /// Extract the low pc from a Die. @@ -81,27 +146,66 @@ static uint64_t getLowPC(DWARFDie Die) { return dwarf::toAddress(Die.find(dwarf::DW_AT_low_pc), 0); } +/// Collect debug location statistics for one DIE. +static void collectLocStats(uint64_t BytesCovered, uint64_t BytesInScope, + std::vector &VarParamLocStats, + std::vector &ParamLocStats, + std::vector &VarLocStats, bool IsParam, + bool IsLocalVar) { + auto getCoverageBucket = [BytesCovered, BytesInScope]() -> unsigned { + unsigned LocBucket = 100 * (double)BytesCovered / BytesInScope; + if (LocBucket == 0) { + // No debug location at all for the variable. + return 0; + } else if (LocBucket == 100 || BytesCovered > BytesInScope) { + // Fully covered variable within its scope. + return NumOfCoverageCategories - 1; + } else { + // Get covered range (e.g. 20%-29%). + LocBucket /= 10; + return LocBucket + 1; + } + }; + + unsigned CoverageBucket = getCoverageBucket(); + VarParamLocStats[CoverageBucket]++; + if (IsParam) + ParamLocStats[CoverageBucket]++; + else if (IsLocalVar) + VarLocStats[CoverageBucket]++; +} + /// Collect debug info quality metrics for one DIE. -static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, +static void collectStatsForDie(DWARFDie Die, uint64_t UnitLowPC, std::string FnPrefix, std::string VarPrefix, uint64_t ScopeLowPC, uint64_t BytesInScope, uint32_t InlineDepth, StringMap &FnStatMap, - GlobalStats &GlobalStats) { + GlobalStats &GlobalStats, + LocationStats &LocStats) { bool HasLoc = false; bool HasSrcLoc = false; bool HasType = false; bool IsArtificial = false; uint64_t BytesCovered = 0; + uint64_t BytesEntryValuesCovered = 0; uint64_t OffsetToFirstDefinition = 0; + auto &FnStats = FnStatMap[FnPrefix]; + bool IsParam = Die.getTag() == dwarf::DW_TAG_formal_parameter; + bool IsLocalVar = Die.getTag() == dwarf::DW_TAG_variable; - if (Die.getTag() == dwarf::DW_TAG_call_site) { - GlobalStats.CallSiteEntries++; + if (Die.getTag() == dwarf::DW_TAG_call_site || + Die.getTag() == dwarf::DW_TAG_GNU_call_site) { + GlobalStats.CallSiteDIEs++; return; } - if (Die.getTag() != dwarf::DW_TAG_formal_parameter && - Die.getTag() != dwarf::DW_TAG_variable && - Die.getTag() != dwarf::DW_TAG_member) { + if (Die.getTag() == dwarf::DW_TAG_call_site_parameter || + Die.getTag() == dwarf::DW_TAG_GNU_call_site_parameter) { + GlobalStats.CallSiteParamDIEs++; + return; + } + + if (!IsParam && !IsLocalVar && Die.getTag() != dwarf::DW_TAG_member) { // Not a variable or constant member. return; } @@ -116,6 +220,19 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, if (Die.find(dwarf::DW_AT_artificial)) IsArtificial = true; + auto IsEntryValue = [&](ArrayRef D) -> bool { + DWARFUnit *U = Die.getDwarfUnit(); + DataExtractor Data(toStringRef(D), + Die.getDwarfUnit()->getContext().isLittleEndian(), 0); + DWARFExpression Expression(Data, U->getVersion(), U->getAddressByteSize()); + // Consider the expression containing the DW_OP_entry_value as + // an entry value. + return llvm::any_of(Expression, [](DWARFExpression::Operation &Op) { + return Op.getCode() == dwarf::DW_OP_entry_value || + Op.getCode() == dwarf::DW_OP_GNU_entry_value; + }); + }; + if (Die.find(dwarf::DW_AT_const_value)) { // This catches constant members *and* variables. HasLoc = true; @@ -133,11 +250,15 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, if (auto DebugLocOffset = FormValue->getAsSectionOffset()) { auto *DebugLoc = Die.getDwarfUnit()->getContext().getDebugLoc(); if (auto List = DebugLoc->getLocationListAtOffset(*DebugLocOffset)) { - for (auto Entry : List->Entries) - BytesCovered += Entry.End - Entry.Begin; + for (auto Entry : List->Entries) { + uint64_t BytesEntryCovered = Entry.End - Entry.Begin; + BytesCovered += BytesEntryCovered; + if (IsEntryValue(Entry.Loc)) + BytesEntryValuesCovered += BytesEntryCovered; + } if (List->Entries.size()) { uint64_t FirstDef = List->Entries[0].Begin; - uint64_t UnitOfs = getLowPC(Die.getDwarfUnit()->getUnitDIE()); + uint64_t UnitOfs = UnitLowPC; // Ranges sometimes start before the lexical scope. if (UnitOfs + FirstDef >= ScopeLowPC) OffsetToFirstDefinition = UnitOfs + FirstDef - ScopeLowPC; @@ -154,8 +275,25 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, } } + // Calculate the debug location statistics. + if (BytesInScope) { + LocStats.NumVarParam++; + if (IsParam) + LocStats.NumParam++; + else if (IsLocalVar) + LocStats.NumVar++; + + collectLocStats(BytesCovered, BytesInScope, LocStats.VarParamLocStats, + LocStats.ParamLocStats, LocStats.VarLocStats, IsParam, + IsLocalVar); + // Non debug entry values coverage statistics. + collectLocStats(BytesCovered - BytesEntryValuesCovered, BytesInScope, + LocStats.VarParamNonEntryValLocStats, + LocStats.ParamNonEntryValLocStats, + LocStats.VarNonEntryValLocStats, IsParam, IsLocalVar); + } + // Collect PC range coverage data. - auto &FnStats = FnStatMap[FnPrefix]; if (DWARFDie D = Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) Die = D; @@ -171,6 +309,17 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, // Turns out we have a lot of ranges that extend past the lexical scope. GlobalStats.ScopeBytesCovered += std::min(BytesInScope, BytesCovered); GlobalStats.ScopeBytesFromFirstDefinition += BytesInScope; + GlobalStats.ScopeEntryValueBytesCovered += BytesEntryValuesCovered; + if (IsParam) { + GlobalStats.ParamScopeBytesCovered += + std::min(BytesInScope, BytesCovered); + GlobalStats.ParamScopeBytesFromFirstDefinition += BytesInScope; + GlobalStats.ParamScopeEntryValueBytesCovered += BytesEntryValuesCovered; + } else if (IsLocalVar) { + GlobalStats.VarScopeBytesCovered += std::min(BytesInScope, BytesCovered); + GlobalStats.VarScopeBytesFromFirstDefinition += BytesInScope; + GlobalStats.VarScopeEntryValueBytesCovered += BytesEntryValuesCovered; + } assert(GlobalStats.ScopeBytesCovered <= GlobalStats.ScopeBytesFromFirstDefinition); } else if (Die.getTag() == dwarf::DW_TAG_member) { @@ -179,7 +328,7 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, FnStats.TotalVarWithLoc += (unsigned)HasLoc; } if (!IsArtificial) { - if (Die.getTag() == dwarf::DW_TAG_formal_parameter) { + if (IsParam) { FnStats.NumParams++; if (HasType) FnStats.NumParamTypes++; @@ -187,7 +336,7 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, FnStats.NumParamSourceLocations++; if (HasLoc) FnStats.NumParamLocations++; - } else if (Die.getTag() == dwarf::DW_TAG_variable) { + } else if (IsLocalVar) { FnStats.NumVars++; if (HasType) FnStats.NumVarTypes++; @@ -200,11 +349,12 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, } /// Recursively collect debug info quality metrics. -static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix, +static void collectStatsRecursive(DWARFDie Die, uint64_t UnitLowPC, std::string FnPrefix, std::string VarPrefix, uint64_t ScopeLowPC, uint64_t BytesInScope, uint32_t InlineDepth, StringMap &FnStatMap, - GlobalStats &GlobalStats) { + GlobalStats &GlobalStats, + LocationStats &LocStats) { // Handle any kind of lexical scope. const dwarf::Tag Tag = Die.getTag(); const bool IsFunction = Tag == dwarf::DW_TAG_subprogram; @@ -272,8 +422,8 @@ static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix, } } else { // Not a scope, visit the Die itself. It could be a variable. - collectStatsForDie(Die, FnPrefix, VarPrefix, ScopeLowPC, BytesInScope, - InlineDepth, FnStatMap, GlobalStats); + collectStatsForDie(Die, UnitLowPC, FnPrefix, VarPrefix, ScopeLowPC, BytesInScope, + InlineDepth, FnStatMap, GlobalStats, LocStats); } // Set InlineDepth correctly for child recursion @@ -290,8 +440,9 @@ static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix, if (Child.getTag() == dwarf::DW_TAG_lexical_block) ChildVarPrefix += toHex(LexicalBlockIndex++) + '.'; - collectStatsRecursive(Child, FnPrefix, ChildVarPrefix, ScopeLowPC, - BytesInScope, InlineDepth, FnStatMap, GlobalStats); + collectStatsRecursive(Child, UnitLowPC, FnPrefix, ChildVarPrefix, ScopeLowPC, + BytesInScope, InlineDepth, FnStatMap, GlobalStats, + LocStats); Child = Child.getSibling(); } } @@ -299,14 +450,33 @@ static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix, /// Print machine-readable output. /// The machine-readable format is single-line JSON output. /// \{ -static void printDatum(raw_ostream &OS, const char *Key, StringRef Value) { - OS << ",\"" << Key << "\":\"" << Value << '"'; - LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n'); -} -static void printDatum(raw_ostream &OS, const char *Key, uint64_t Value) { +static void printDatum(raw_ostream &OS, const char *Key, json::Value Value) { OS << ",\"" << Key << "\":" << Value; LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n'); } +static void printLocationStats(raw_ostream &OS, + const char *Key, + std::vector &LocationStats) { + OS << ",\"" << Key << " with 0% of its scope covered\":" + << LocationStats[0]; + LLVM_DEBUG(llvm::dbgs() << Key << " with 0% of its scope covered: " + << LocationStats[0] << '\n'); + OS << ",\"" << Key << " with 1-9% of its scope covered\":" + << LocationStats[1]; + LLVM_DEBUG(llvm::dbgs() << Key << " with 1-9% of its scope covered: " + << LocationStats[1] << '\n'); + for (unsigned i = 2; i < NumOfCoverageCategories - 1; ++i) { + OS << ",\"" << Key << " with " << (i - 1) * 10 << "-" << i * 10 - 1 + << "% of its scope covered\":" << LocationStats[i]; + LLVM_DEBUG(llvm::dbgs() + << Key << " with " << (i - 1) * 10 << "-" << i * 10 - 1 + << "% of its scope covered: " << LocationStats[i]); + } + OS << ",\"" << Key << " with 100% of its scope covered\":" + << LocationStats[NumOfCoverageCategories - 1]; + LLVM_DEBUG(llvm::dbgs() << Key << " with 100% of its scope covered: " + << LocationStats[NumOfCoverageCategories - 1]); +} /// \} /// Collect debug info quality metrics for an entire DIContext. @@ -321,10 +491,12 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx, Twine Filename, raw_ostream &OS) { StringRef FormatName = Obj.getFileFormatName(); GlobalStats GlobalStats; + LocationStats LocStats; StringMap Statistics; for (const auto &CU : static_cast(&DICtx)->compile_units()) if (DWARFDie CUDie = CU->getNonSkeletonUnitDIE(false)) - collectStatsRecursive(CUDie, "/", "g", 0, 0, 0, Statistics, GlobalStats); + collectStatsRecursive(CUDie, getLowPC(CUDie), "/", "g", 0, 0, 0, + Statistics, GlobalStats, LocStats); /// The version number should be increased every time the algorithm is changed /// (including bug fixes). New metrics may be added without increasing the @@ -387,9 +559,24 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx, printDatum(OS, "source variables", VarParamTotal); printDatum(OS, "variables with location", VarParamWithLoc); printDatum(OS, "call site entries", GlobalStats.CallSiteEntries); + printDatum(OS, "call site DIEs", GlobalStats.CallSiteDIEs); + printDatum(OS, "call site parameter DIEs", GlobalStats.CallSiteParamDIEs); printDatum(OS, "scope bytes total", GlobalStats.ScopeBytesFromFirstDefinition); printDatum(OS, "scope bytes covered", GlobalStats.ScopeBytesCovered); + printDatum(OS, "entry value scope bytes covered", + GlobalStats.ScopeEntryValueBytesCovered); + printDatum(OS, "formal params scope bytes total", + GlobalStats.ParamScopeBytesFromFirstDefinition); + printDatum(OS, "formal params scope bytes covered", + GlobalStats.ParamScopeBytesCovered); + printDatum(OS, "formal params entry value scope bytes covered", + GlobalStats.ParamScopeEntryValueBytesCovered); + printDatum(OS, "vars scope bytes total", + GlobalStats.VarScopeBytesFromFirstDefinition); + printDatum(OS, "vars scope bytes covered", GlobalStats.VarScopeBytesCovered); + printDatum(OS, "vars entry value scope bytes covered", + GlobalStats.VarScopeEntryValueBytesCovered); printDatum(OS, "total function size", GlobalStats.FunctionSize); printDatum(OS, "total inlined function size", GlobalStats.InlineFunctionSize); printDatum(OS, "total formal params", ParamTotal); @@ -400,6 +587,20 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx, printDatum(OS, "vars with source location", VarWithSrcLoc); printDatum(OS, "vars with type", VarWithType); printDatum(OS, "vars with binary location", VarWithLoc); + printDatum(OS, "total variables procesed by location statistics", + LocStats.NumVarParam); + printLocationStats(OS, "variables", LocStats.VarParamLocStats); + printLocationStats(OS, "variables (excluding the debug entry values)", + LocStats.VarParamNonEntryValLocStats); + printDatum(OS, "total params procesed by location statistics", + LocStats.NumParam); + printLocationStats(OS, "params", LocStats.ParamLocStats); + printLocationStats(OS, "params (excluding the debug entry values)", + LocStats.ParamNonEntryValLocStats); + printDatum(OS, "total vars procesed by location statistics", LocStats.NumVar); + printLocationStats(OS, "vars", LocStats.VarLocStats); + printLocationStats(OS, "vars (excluding the debug entry values)", + LocStats.VarNonEntryValLocStats); OS << "}\n"; LLVM_DEBUG( llvm::dbgs() << "Total Availability: " diff --git a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp index 05a7aef67ec..e20f6041f98 100644 --- a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp +++ b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -584,7 +584,7 @@ int main(int argc, char **argv) { } std::error_code EC; - ToolOutputFile OutputFile(OutputFilename, EC, sys::fs::OF_None); + ToolOutputFile OutputFile(OutputFilename, EC, sys::fs::OF_Text); error("Unable to open output file" + OutputFilename, EC); // Don't remove output file if we exit with an error. OutputFile.keep(); diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp index 300bc0b4bd5..dddc0d9baa0 100644 --- a/tools/llvm-extract/llvm-extract.cpp +++ b/tools/llvm-extract/llvm-extract.cpp @@ -74,8 +74,18 @@ static cl::list // ExtractBlocks - The blocks to extract from the module. static cl::list ExtractBlocks( - "bb", cl::desc("Specify pairs to extract"), - cl::ZeroOrMore, cl::value_desc("function:bb"), cl::cat(ExtractCat)); + "bb", + cl::desc( + "Specify pairs to extract.\n" + "Each pair will create a function.\n" + "If multiple basic blocks are specified in one pair,\n" + "the first block in the sequence should dominate the rest.\n" + "eg:\n" + " --bb=f:bb1;bb2 will extract one function with both bb1 and bb2;\n" + " --bb=f:bb1 --bb=f:bb2 will extract two functions, one with bb1, one " + "with bb2."), + cl::ZeroOrMore, cl::value_desc("function:bb1[;bb2...]"), + cl::cat(ExtractCat)); // ExtractAlias - The alias to extract from the module. static cl::list @@ -350,7 +360,7 @@ int main(int argc, char **argv) { Passes.add(createStripDeadPrototypesPass()); // Remove dead func decls std::error_code EC; - ToolOutputFile Out(OutputFilename, EC, sys::fs::F_None); + ToolOutputFile Out(OutputFilename, EC, sys::fs::OF_None); if (EC) { errs() << EC.message() << '\n'; return 1; diff --git a/tools/llvm-ifs/CMakeLists.txt b/tools/llvm-ifs/CMakeLists.txt new file mode 100644 index 00000000000..544b0e41a5e --- /dev/null +++ b/tools/llvm-ifs/CMakeLists.txt @@ -0,0 +1,10 @@ +set(LLVM_LINK_COMPONENTS + Object + Support + TextAPI + ObjectYAML + ) + +add_llvm_tool(llvm-ifs + llvm-ifs.cpp + ) diff --git a/tools/llvm-ifs/LLVMBuild.txt b/tools/llvm-ifs/LLVMBuild.txt new file mode 100644 index 00000000000..10dc6bd8f55 --- /dev/null +++ b/tools/llvm-ifs/LLVMBuild.txt @@ -0,0 +1,21 @@ +;===- ./tools/llvm-ifs/LLVMBuild.txt ---------------------------*- Conf -*--===; +; +; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Tool +name = llvm-ifs +parent = Tools +required_libraries = Object Support TextAPI diff --git a/tools/llvm-ifs/llvm-ifs.cpp b/tools/llvm-ifs/llvm-ifs.cpp new file mode 100644 index 00000000000..f329b463363 --- /dev/null +++ b/tools/llvm-ifs/llvm-ifs.cpp @@ -0,0 +1,532 @@ +//===- llvm-ifs.cpp -------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------------===/ + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/VersionTuple.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TextAPI/MachO/InterfaceFile.h" +#include "llvm/TextAPI/MachO/TextAPIReader.h" +#include "llvm/TextAPI/MachO/TextAPIWriter.h" +#include +#include + +using namespace llvm; +using namespace llvm::yaml; +using namespace llvm::MachO; + +#define DEBUG_TYPE "llvm-ifs" + +namespace { +const VersionTuple IFSVersionCurrent(1, 2); +} + +static cl::opt Action("action", cl::desc(""), + cl::value_desc("write-ifs | write-bin"), + cl::init("write-ifs")); + +static cl::opt ForceFormat("force-format", + cl::desc(""), + cl::value_desc("ELF | TBD"), + cl::init("")); + +static cl::list InputFilenames(cl::Positional, + cl::desc(""), + cl::ZeroOrMore); + +static cl::opt OutputFilename("o", cl::desc(""), + cl::value_desc("path")); + +enum class IFSSymbolType { + NoType = 0, + Object, + Func, + // Type information is 4 bits, so 16 is safely out of range. + Unknown = 16, +}; + +std::string getTypeName(IFSSymbolType Type) { + switch (Type) { + case IFSSymbolType::NoType: + return "NoType"; + case IFSSymbolType::Func: + return "Func"; + case IFSSymbolType::Object: + return "Object"; + case IFSSymbolType::Unknown: + return "Unknown"; + } + llvm_unreachable("Unexpected ifs symbol type."); +} + +struct IFSSymbol { + IFSSymbol(std::string SymbolName) : Name(SymbolName) {} + std::string Name; + uint64_t Size; + IFSSymbolType Type; + bool Weak; + Optional Warning; + bool operator<(const IFSSymbol &RHS) const { return Name < RHS.Name; } +}; + +namespace llvm { +namespace yaml { +/// YAML traits for IFSSymbolType. +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &IO, IFSSymbolType &SymbolType) { + IO.enumCase(SymbolType, "NoType", IFSSymbolType::NoType); + IO.enumCase(SymbolType, "Func", IFSSymbolType::Func); + IO.enumCase(SymbolType, "Object", IFSSymbolType::Object); + IO.enumCase(SymbolType, "Unknown", IFSSymbolType::Unknown); + // Treat other symbol types as noise, and map to Unknown. + if (!IO.outputting() && IO.matchEnumFallback()) + SymbolType = IFSSymbolType::Unknown; + } +}; + +template <> struct ScalarTraits { + static void output(const VersionTuple &Value, void *, + llvm::raw_ostream &Out) { + Out << Value.getAsString(); + } + + static StringRef input(StringRef Scalar, void *, VersionTuple &Value) { + if (Value.tryParse(Scalar)) + return StringRef("Can't parse version: invalid version format."); + + if (Value > IFSVersionCurrent) + return StringRef("Unsupported IFS version."); + + // Returning empty StringRef indicates successful parse. + return StringRef(); + } + + // Don't place quotation marks around version value. + static QuotingType mustQuote(StringRef) { return QuotingType::None; } +}; + +/// YAML traits for IFSSymbol. +template <> struct MappingTraits { + static void mapping(IO &IO, IFSSymbol &Symbol) { + IO.mapRequired("Type", Symbol.Type); + // The need for symbol size depends on the symbol type. + if (Symbol.Type == IFSSymbolType::NoType) + IO.mapOptional("Size", Symbol.Size, (uint64_t)0); + else if (Symbol.Type == IFSSymbolType::Func) + Symbol.Size = 0; + else + IO.mapRequired("Size", Symbol.Size); + IO.mapOptional("Weak", Symbol.Weak, false); + IO.mapOptional("Warning", Symbol.Warning); + } + + // Compacts symbol information into a single line. + static const bool flow = true; +}; + +/// YAML traits for set of IFSSymbols. +template <> struct CustomMappingTraits> { + static void inputOne(IO &IO, StringRef Key, std::set &Set) { + std::string Name = Key.str(); + IFSSymbol Sym(Name); + IO.mapRequired(Name.c_str(), Sym); + Set.insert(Sym); + } + + static void output(IO &IO, std::set &Set) { + for (auto &Sym : Set) + IO.mapRequired(Sym.Name.c_str(), const_cast(Sym)); + } +}; +} // namespace yaml +} // namespace llvm + +// A cumulative representation of ELF stubs. +// Both textual and binary stubs will read into and write from this object. +class IFSStub { + // TODO: Add support for symbol versioning. +public: + VersionTuple IfsVersion; + std::string Triple; + std::string ObjectFileFormat; + Optional SOName; + std::vector NeededLibs; + std::set Symbols; + + IFSStub() = default; + IFSStub(const IFSStub &Stub) + : IfsVersion(Stub.IfsVersion), Triple(Stub.Triple), + ObjectFileFormat(Stub.ObjectFileFormat), SOName(Stub.SOName), + NeededLibs(Stub.NeededLibs), Symbols(Stub.Symbols) {} + IFSStub(IFSStub &&Stub) + : IfsVersion(std::move(Stub.IfsVersion)), Triple(std::move(Stub.Triple)), + ObjectFileFormat(std::move(Stub.ObjectFileFormat)), + SOName(std::move(Stub.SOName)), NeededLibs(std::move(Stub.NeededLibs)), + Symbols(std::move(Stub.Symbols)) {} +}; + +namespace llvm { +namespace yaml { +/// YAML traits for IFSStub objects. +template <> struct MappingTraits { + static void mapping(IO &IO, IFSStub &Stub) { + if (!IO.mapTag("!experimental-ifs-v1", true)) + IO.setError("Not a .ifs YAML file."); + IO.mapRequired("IfsVersion", Stub.IfsVersion); + IO.mapOptional("Triple", Stub.Triple); + IO.mapOptional("ObjectFileFormat", Stub.ObjectFileFormat); + IO.mapOptional("SOName", Stub.SOName); + IO.mapOptional("NeededLibs", Stub.NeededLibs); + IO.mapRequired("Symbols", Stub.Symbols); + } +}; +} // namespace yaml +} // namespace llvm + +static Expected> readInputFile(StringRef FilePath) { + // Read in file. + ErrorOr> BufOrError = + MemoryBuffer::getFileOrSTDIN(FilePath); + if (!BufOrError) + return createStringError(BufOrError.getError(), "Could not open `%s`", + FilePath.data()); + + std::unique_ptr FileReadBuffer = std::move(*BufOrError); + yaml::Input YamlIn(FileReadBuffer->getBuffer()); + std::unique_ptr Stub(new IFSStub()); + YamlIn >> *Stub; + + if (std::error_code Err = YamlIn.error()) + return createStringError(Err, "Failed reading Interface Stub File."); + + return std::move(Stub); +} + +int writeTbdStub(const llvm::Triple &T, const std::set &Symbols, + const StringRef Format, raw_ostream &Out) { + + auto PlatformKindOrError = + [](const llvm::Triple &T) -> llvm::Expected { + if (T.isMacOSX()) + return llvm::MachO::PlatformKind::macOS; + if (T.isTvOS()) + return llvm::MachO::PlatformKind::tvOS; + if (T.isWatchOS()) + return llvm::MachO::PlatformKind::watchOS; + // Note: put isiOS last because tvOS and watchOS are also iOS according + // to the Triple. + if (T.isiOS()) + return llvm::MachO::PlatformKind::iOS; + + // TODO: Add an option for ForceTriple, but keep ForceFormat for now. + if (ForceFormat == "TBD") + return llvm::MachO::PlatformKind::macOS; + + return createStringError(errc::not_supported, "Invalid Platform.\n"); + }(T); + + if (!PlatformKindOrError) + return -1; + + PlatformKind Plat = PlatformKindOrError.get(); + TargetList Targets({Target(llvm::MachO::mapToArchitecture(T), Plat)}); + + InterfaceFile File; + File.setFileType(FileType::TBD_V3); // Only supporting v3 for now. + File.addTargets(Targets); + + for (const auto &Symbol : Symbols) { + auto Name = Symbol.Name; + auto Kind = SymbolKind::GlobalSymbol; + switch (Symbol.Type) { + default: + case IFSSymbolType::NoType: + Kind = SymbolKind::GlobalSymbol; + break; + case IFSSymbolType::Object: + Kind = SymbolKind::GlobalSymbol; + break; + case IFSSymbolType::Func: + Kind = SymbolKind::GlobalSymbol; + break; + } + if (Symbol.Weak) + File.addSymbol(Kind, Name, Targets, SymbolFlags::WeakDefined); + else + File.addSymbol(Kind, Name, Targets); + } + + SmallString<4096> Buffer; + raw_svector_ostream OS(Buffer); + if (Error Result = TextAPIWriter::writeToStream(OS, File)) + return -1; + Out << OS.str(); + return 0; +} + +int writeElfStub(const llvm::Triple &T, const std::set &Symbols, + const StringRef Format, raw_ostream &Out) { + SmallString<0> Storage; + Storage.clear(); + raw_svector_ostream OS(Storage); + + OS << "--- !ELF\n"; + OS << "FileHeader:\n"; + OS << " Class: ELFCLASS"; + OS << (T.isArch64Bit() ? "64" : "32"); + OS << "\n"; + OS << " Data: ELFDATA2"; + OS << (T.isLittleEndian() ? "LSB" : "MSB"); + OS << "\n"; + OS << " Type: ET_DYN\n"; + OS << " Machine: " + << llvm::StringSwitch(T.getArchName()) + .Case("x86_64", "EM_X86_64") + .Case("i386", "EM_386") + .Case("i686", "EM_386") + .Case("aarch64", "EM_AARCH64") + .Case("amdgcn", "EM_AMDGPU") + .Case("r600", "EM_AMDGPU") + .Case("arm", "EM_ARM") + .Case("thumb", "EM_ARM") + .Case("avr", "EM_AVR") + .Case("mips", "EM_MIPS") + .Case("mipsel", "EM_MIPS") + .Case("mips64", "EM_MIPS") + .Case("mips64el", "EM_MIPS") + .Case("msp430", "EM_MSP430") + .Case("ppc", "EM_PPC") + .Case("ppc64", "EM_PPC64") + .Case("ppc64le", "EM_PPC64") + .Case("x86", T.isOSIAMCU() ? "EM_IAMCU" : "EM_386") + .Case("x86_64", "EM_X86_64") + .Default("EM_NONE") + << "\nSections:" + << "\n - Name: .text" + << "\n Type: SHT_PROGBITS" + << "\n - Name: .data" + << "\n Type: SHT_PROGBITS" + << "\n - Name: .rodata" + << "\n Type: SHT_PROGBITS" + << "\nSymbols:\n"; + for (const auto &Symbol : Symbols) { + OS << " - Name: " << Symbol.Name << "\n" + << " Type: STT_"; + switch (Symbol.Type) { + default: + case IFSSymbolType::NoType: + OS << "NOTYPE"; + break; + case IFSSymbolType::Object: + OS << "OBJECT"; + break; + case IFSSymbolType::Func: + OS << "FUNC"; + break; + } + OS << "\n Section: .text" + << "\n Binding: STB_" << (Symbol.Weak ? "WEAK" : "GLOBAL") + << "\n"; + } + OS << "...\n"; + + std::string YamlStr = OS.str(); + + // Only or debugging. Not an offical format. + LLVM_DEBUG({ + if (ForceFormat == "ELFOBJYAML") { + Out << YamlStr; + return 0; + } + }); + + yaml::Input YIn(YamlStr); + auto ErrHandler = [](const Twine &Msg) { + WithColor::error(errs(), "llvm-ifs") << Msg << "\n"; + }; + return convertYAML(YIn, Out, ErrHandler) ? 0 : 1; +} + +int writeIfso(const IFSStub &Stub, bool IsWriteIfs, raw_ostream &Out) { + if (IsWriteIfs) { + yaml::Output YamlOut(Out, NULL, /*WrapColumn =*/0); + YamlOut << const_cast(Stub); + return 0; + } + + std::string ObjectFileFormat = + ForceFormat.empty() ? Stub.ObjectFileFormat : ForceFormat; + + if (ObjectFileFormat == "ELF" || ForceFormat == "ELFOBJYAML") + return writeElfStub(llvm::Triple(Stub.Triple), Stub.Symbols, + Stub.ObjectFileFormat, Out); + if (ObjectFileFormat == "TBD") + return writeTbdStub(llvm::Triple(Stub.Triple), Stub.Symbols, + Stub.ObjectFileFormat, Out); + + WithColor::error() + << "Invalid ObjectFileFormat: Only ELF and TBD are supported.\n"; + return -1; +} + +// New Interface Stubs Yaml Format: +// --- !experimental-ifs-v1 +// IfsVersion: 1.0 +// Triple: +// ObjectFileFormat: +// Symbols: +// _ZSymbolName: { Type: } +// ... + +int main(int argc, char *argv[]) { + // Parse arguments. + cl::ParseCommandLineOptions(argc, argv); + + if (InputFilenames.empty()) + InputFilenames.push_back("-"); + + IFSStub Stub; + std::map SymbolMap; + + std::string PreviousInputFilePath = ""; + for (const std::string &InputFilePath : InputFilenames) { + Expected> StubOrErr = readInputFile(InputFilePath); + if (!StubOrErr) { + WithColor::error() << StubOrErr.takeError() << "\n"; + return -1; + } + std::unique_ptr TargetStub = std::move(StubOrErr.get()); + + if (Stub.Triple.empty()) { + PreviousInputFilePath = InputFilePath; + Stub.IfsVersion = TargetStub->IfsVersion; + Stub.Triple = TargetStub->Triple; + Stub.ObjectFileFormat = TargetStub->ObjectFileFormat; + Stub.SOName = TargetStub->SOName; + Stub.NeededLibs = TargetStub->NeededLibs; + } else { + if (Stub.IfsVersion != TargetStub->IfsVersion) { + if (Stub.IfsVersion.getMajor() != IFSVersionCurrent.getMajor()) { + WithColor::error() + << "Interface Stub: IfsVersion Mismatch." + << "\nFilenames: " << PreviousInputFilePath << " " + << InputFilePath << "\nIfsVersion Values: " << Stub.IfsVersion + << " " << TargetStub->IfsVersion << "\n"; + return -1; + } + if (TargetStub->IfsVersion > Stub.IfsVersion) + Stub.IfsVersion = TargetStub->IfsVersion; + } + if (Stub.ObjectFileFormat != TargetStub->ObjectFileFormat) { + WithColor::error() << "Interface Stub: ObjectFileFormat Mismatch." + << "\nFilenames: " << PreviousInputFilePath << " " + << InputFilePath << "\nObjectFileFormat Values: " + << Stub.ObjectFileFormat << " " + << TargetStub->ObjectFileFormat << "\n"; + return -1; + } + if (Stub.Triple != TargetStub->Triple) { + WithColor::error() << "Interface Stub: Triple Mismatch." + << "\nFilenames: " << PreviousInputFilePath << " " + << InputFilePath + << "\nTriple Values: " << Stub.Triple << " " + << TargetStub->Triple << "\n"; + return -1; + } + if (Stub.SOName != TargetStub->SOName) { + WithColor::error() << "Interface Stub: SOName Mismatch." + << "\nFilenames: " << PreviousInputFilePath << " " + << InputFilePath + << "\nSOName Values: " << Stub.SOName << " " + << TargetStub->SOName << "\n"; + return -1; + } + if (Stub.NeededLibs != TargetStub->NeededLibs) { + WithColor::error() << "Interface Stub: NeededLibs Mismatch." + << "\nFilenames: " << PreviousInputFilePath << " " + << InputFilePath << "\n"; + return -1; + } + } + + for (auto Symbol : TargetStub->Symbols) { + auto SI = SymbolMap.find(Symbol.Name); + if (SI == SymbolMap.end()) { + SymbolMap.insert( + std::pair(Symbol.Name, Symbol)); + continue; + } + + assert(Symbol.Name == SI->second.Name && "Symbol Names Must Match."); + + // Check conflicts: + if (Symbol.Type != SI->second.Type) { + WithColor::error() << "Interface Stub: Type Mismatch for " + << Symbol.Name << ".\nFilename: " << InputFilePath + << "\nType Values: " << getTypeName(SI->second.Type) + << " " << getTypeName(Symbol.Type) << "\n"; + + return -1; + } + if (Symbol.Size != SI->second.Size) { + WithColor::error() << "Interface Stub: Size Mismatch for " + << Symbol.Name << ".\nFilename: " << InputFilePath + << "\nSize Values: " << SI->second.Size << " " + << Symbol.Size << "\n"; + + return -1; + } + if (Symbol.Weak != SI->second.Weak) { + // TODO: Add conflict resolution for Weak vs non-Weak. + WithColor::error() << "Interface Stub: Weak Mismatch for " + << Symbol.Name << ".\nFilename: " << InputFilePath + << "\nWeak Values: " << SI->second.Weak << " " + << Symbol.Weak << "\n"; + + return -1; + } + // TODO: Not checking Warning. Will be dropped. + } + + PreviousInputFilePath = InputFilePath; + } + + if (Stub.IfsVersion != IFSVersionCurrent) + if (Stub.IfsVersion.getMajor() != IFSVersionCurrent.getMajor()) { + WithColor::error() << "Interface Stub: Bad IfsVersion: " + << Stub.IfsVersion << ", llvm-ifs supported version: " + << IFSVersionCurrent << ".\n"; + return -1; + } + + for (auto &Entry : SymbolMap) + Stub.Symbols.insert(Entry.second); + + std::error_code SysErr; + + // Open file for writing. + raw_fd_ostream Out(OutputFilename, SysErr); + if (SysErr) { + WithColor::error() << "Couldn't open " << OutputFilename + << " for writing.\n"; + return -1; + } + + return writeIfso(Stub, (Action == "write-ifs"), Out); +} diff --git a/tools/llvm-link/llvm-link.cpp b/tools/llvm-link/llvm-link.cpp index 50ba57178d0..fa36e083b6f 100644 --- a/tools/llvm-link/llvm-link.cpp +++ b/tools/llvm-link/llvm-link.cpp @@ -351,13 +351,13 @@ int main(int argc, char **argv) { LLVMContext Context; Context.setDiagnosticHandler( - llvm::make_unique(), true); + std::make_unique(), true); cl::ParseCommandLineOptions(argc, argv, "llvm linker\n"); if (!DisableDITypeMap) Context.enableDebugTypeODRUniquing(); - auto Composite = make_unique("llvm-link", Context); + auto Composite = std::make_unique("llvm-link", Context); Linker L(*Composite); unsigned Flags = Linker::Flags::None; @@ -381,7 +381,7 @@ int main(int argc, char **argv) { errs() << "Here's the assembly:\n" << *Composite; std::error_code EC; - ToolOutputFile Out(OutputFilename, EC, sys::fs::F_None); + ToolOutputFile Out(OutputFilename, EC, sys::fs::OF_None); if (EC) { WithColor::error() << EC.message() << '\n'; return 1; diff --git a/tools/llvm-lto/llvm-lto.cpp b/tools/llvm-lto/llvm-lto.cpp index 585207b2518..b47e68e8285 100644 --- a/tools/llvm-lto/llvm-lto.cpp +++ b/tools/llvm-lto/llvm-lto.cpp @@ -315,8 +315,8 @@ getLocalLTOModule(StringRef Path, std::unique_ptr &Buffer, error(BufferOrErr, "error loading file '" + Path + "'"); Buffer = std::move(BufferOrErr.get()); CurrentActivity = ("loading file '" + Path + "'").str(); - std::unique_ptr Context = llvm::make_unique(); - Context->setDiagnosticHandler(llvm::make_unique(), + std::unique_ptr Context = std::make_unique(); + Context->setDiagnosticHandler(std::make_unique(), true); ErrorOr> Ret = LTOModule::createInLocalContext( std::move(Context), Buffer->getBufferStart(), Buffer->getBufferSize(), @@ -420,7 +420,7 @@ static void createCombinedModuleSummaryIndex() { std::error_code EC; assert(!OutputFilename.empty()); raw_fd_ostream OS(OutputFilename + ".thinlto.bc", EC, - sys::fs::OpenFlags::F_None); + sys::fs::OpenFlags::OF_None); error(EC, "error opening the file '" + OutputFilename + ".thinlto.bc'"); WriteIndexToFile(CombinedIndex, OS); OS.close(); @@ -510,7 +510,7 @@ static std::unique_ptr loadModuleFromInput(lto::InputFile &File, static void writeModuleToFile(Module &TheModule, StringRef Filename) { std::error_code EC; - raw_fd_ostream OS(Filename, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream OS(Filename, EC, sys::fs::OpenFlags::OF_None); error(EC, "error opening the file '" + Filename + "'"); maybeVerifyModule(TheModule); WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true); @@ -581,7 +581,7 @@ private: if (!CombinedIndex) report_fatal_error("ThinLink didn't create an index"); std::error_code EC; - raw_fd_ostream OS(OutputFilename, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream OS(OutputFilename, EC, sys::fs::OpenFlags::OF_None); error(EC, "error opening the file '" + OutputFilename + "'"); WriteIndexToFile(*CombinedIndex, OS); } @@ -619,7 +619,7 @@ private: } OutputName = getThinLTOOutputFile(OutputName, OldPrefix, NewPrefix); std::error_code EC; - raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::OF_None); error(EC, "error opening the file '" + OutputName + "'"); WriteIndexToFile(*Index, OS, &ModuleToSummariesForIndex); } @@ -802,7 +802,7 @@ private: } std::error_code EC; - raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::OF_None); error(EC, "error opening the file '" + OutputName + "'"); OS << std::get<0>(BinName)->getBuffer(); } @@ -848,7 +848,7 @@ private: for (unsigned BufID = 0; BufID < Binaries.size(); ++BufID) { auto OutputName = InputFilenames[BufID] + ".thinlto.o"; std::error_code EC; - raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::F_None); + raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::OF_None); error(EC, "error opening the file '" + OutputName + "'"); OS << Binaries[BufID]->getBuffer(); } @@ -921,7 +921,7 @@ int main(int argc, char **argv) { unsigned BaseArg = 0; LLVMContext Context; - Context.setDiagnosticHandler(llvm::make_unique(), + Context.setDiagnosticHandler(std::make_unique(), true); LTOCodeGenerator CodeGen(Context); @@ -1020,7 +1020,7 @@ int main(int argc, char **argv) { if (Parallelism != 1) PartFilename += "." + utostr(I); std::error_code EC; - OSs.emplace_back(PartFilename, EC, sys::fs::F_None); + OSs.emplace_back(PartFilename, EC, sys::fs::OF_None); if (EC) error("error opening the file '" + PartFilename + "': " + EC.message()); OSPtrs.push_back(&OSs.back().os()); diff --git a/tools/llvm-lto2/llvm-lto2.cpp b/tools/llvm-lto2/llvm-lto2.cpp index 0bd9289dc93..5e3b3dcb6c3 100644 --- a/tools/llvm-lto2/llvm-lto2.cpp +++ b/tools/llvm-lto2/llvm-lto2.cpp @@ -291,6 +291,14 @@ static int run(int argc, char **argv) { std::vector Res; for (const InputFile::Symbol &Sym : Input->symbols()) { auto I = CommandLineResolutions.find({F, Sym.getName()}); + // If it isn't found, look for "$", which would have been added + // (followed by a hash) when the symbol was promoted during module + // splitting if it was defined in one part and used in the other. + // Try looking up the symbol name before the "$". + if (I == CommandLineResolutions.end()) { + auto SplitName = Sym.getName().rsplit("$"); + I = CommandLineResolutions.find({F, SplitName.first}); + } if (I == CommandLineResolutions.end()) { llvm::errs() << argv[0] << ": missing symbol resolution for " << F << ',' << Sym.getName() << '\n'; @@ -325,9 +333,9 @@ static int run(int argc, char **argv) { std::string Path = OutputFilename + "." + utostr(Task); std::error_code EC; - auto S = llvm::make_unique(Path, EC, sys::fs::F_None); + auto S = std::make_unique(Path, EC, sys::fs::OF_None); check(EC, Path); - return llvm::make_unique(std::move(S)); + return std::make_unique(std::move(S)); }; auto AddBuffer = [&](size_t Task, std::unique_ptr MB) { diff --git a/tools/llvm-mc/Disassembler.cpp b/tools/llvm-mc/Disassembler.cpp index e2af2e7f2e3..1ddbddfa184 100644 --- a/tools/llvm-mc/Disassembler.cpp +++ b/tools/llvm-mc/Disassembler.cpp @@ -17,6 +17,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -129,13 +130,10 @@ static bool ByteArrayFromString(ByteArrayTy &ByteArray, return false; } -int Disassembler::disassemble(const Target &T, - const std::string &Triple, - MCSubtargetInfo &STI, - MCStreamer &Streamer, - MemoryBuffer &Buffer, - SourceMgr &SM, - raw_ostream &Out) { +int Disassembler::disassemble(const Target &T, const std::string &Triple, + MCSubtargetInfo &STI, MCStreamer &Streamer, + MemoryBuffer &Buffer, SourceMgr &SM, + MCContext &Ctx, raw_ostream &Out) { std::unique_ptr MRI(T.createMCRegInfo(Triple)); if (!MRI) { @@ -149,9 +147,6 @@ int Disassembler::disassemble(const Target &T, return -1; } - // Set up the MCContext for creating symbols and MCExpr's. - MCContext Ctx(MAI.get(), MRI.get(), nullptr); - std::unique_ptr DisAsm( T.createMCDisassembler(STI, Ctx)); if (!DisAsm) { diff --git a/tools/llvm-mc/Disassembler.h b/tools/llvm-mc/Disassembler.h index 11b685233ab..dcd8c279c91 100644 --- a/tools/llvm-mc/Disassembler.h +++ b/tools/llvm-mc/Disassembler.h @@ -22,17 +22,15 @@ class MemoryBuffer; class Target; class raw_ostream; class SourceMgr; +class MCContext; class MCSubtargetInfo; class MCStreamer; class Disassembler { public: - static int disassemble(const Target &T, - const std::string &Triple, - MCSubtargetInfo &STI, - MCStreamer &Streamer, - MemoryBuffer &Buffer, - SourceMgr &SM, + static int disassemble(const Target &T, const std::string &Triple, + MCSubtargetInfo &STI, MCStreamer &Streamer, + MemoryBuffer &Buffer, SourceMgr &SM, MCContext &Ctx, raw_ostream &Out); }; diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp index ec189c29786..c23740a3094 100644 --- a/tools/llvm-mc/llvm-mc.cpp +++ b/tools/llvm-mc/llvm-mc.cpp @@ -209,9 +209,10 @@ static const Target *GetTarget(const char *ProgName) { return TheTarget; } -static std::unique_ptr GetOutputStream(StringRef Path) { +static std::unique_ptr GetOutputStream(StringRef Path, + sys::fs::OpenFlags Flags) { std::error_code EC; - auto Out = llvm::make_unique(Path, EC, sys::fs::F_None); + auto Out = std::make_unique(Path, EC, Flags); if (EC) { WithColor::error() << EC.message() << '\n'; return nullptr; @@ -279,7 +280,7 @@ static int fillCommandLineSymbols(MCAsmParser &Parser) { static int AssembleInput(const char *ProgName, const Target *TheTarget, SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str, MCAsmInfo &MAI, MCSubtargetInfo &STI, - MCInstrInfo &MCII, MCTargetOptions &MCOptions) { + MCInstrInfo &MCII, MCTargetOptions const &MCOptions) { std::unique_ptr Parser( createMCAsmParser(SrcMgr, Ctx, Str, MAI)); std::unique_ptr TAP( @@ -316,7 +317,7 @@ int main(int argc, char **argv) { cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion); cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n"); - MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); + const MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); setDwarfDebugFlags(argc, argv); setDwarfDebugProducer(); @@ -368,7 +369,7 @@ int main(int argc, char **argv) { // FIXME: This is not pretty. MCContext has a ptr to MCObjectFileInfo and // MCObjectFileInfo needs a MCContext reference in order to initialize itself. MCObjectFileInfo MOFI; - MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr); + MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr, &MCOptions); MOFI.InitMCObjectFileInfo(TheTriple, PIC, Ctx, LargeCodeModel); if (SaveTempLabels) @@ -413,7 +414,9 @@ int main(int argc, char **argv) { FeaturesStr = Features.getString(); } - std::unique_ptr Out = GetOutputStream(OutputFilename); + sys::fs::OpenFlags Flags = (FileType == OFT_AssemblyFile) ? sys::fs::OF_Text + : sys::fs::OF_None; + std::unique_ptr Out = GetOutputStream(OutputFilename, Flags); if (!Out) return 1; @@ -423,7 +426,7 @@ int main(int argc, char **argv) { WithColor::error() << "dwo output only supported with object files\n"; return 1; } - DwoOut = GetOutputStream(SplitDwarfFile); + DwoOut = GetOutputStream(SplitDwarfFile, sys::fs::OF_None); if (!DwoOut) return 1; } @@ -459,7 +462,7 @@ int main(int argc, char **argv) { std::unique_ptr MAB( TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions)); - auto FOut = llvm::make_unique(*OS); + auto FOut = std::make_unique(*OS); Str.reset( TheTarget->createAsmStreamer(Ctx, std::move(FOut), /*asmverbose*/ true, /*useDwarfDirectory*/ true, IP, @@ -474,7 +477,7 @@ int main(int argc, char **argv) { Ctx.setUseNamesOnTempLabels(false); if (!Out->os().supportsSeeking()) { - BOS = make_unique(Out->os()); + BOS = std::make_unique(Out->os()); OS = BOS.get(); } @@ -506,7 +509,7 @@ int main(int argc, char **argv) { break; case AC_MDisassemble: assert(IP && "Expected assembly output"); - IP->setUseMarkup(1); + IP->setUseMarkup(true); disassemble = true; break; case AC_Disassemble: @@ -514,8 +517,8 @@ int main(int argc, char **argv) { break; } if (disassemble) - Res = Disassembler::disassemble(*TheTarget, TripleName, *STI, *Str, - *Buffer, SrcMgr, Out->os()); + Res = Disassembler::disassemble(*TheTarget, TripleName, *STI, *Str, *Buffer, + SrcMgr, Ctx, Out->os()); // Keep output if no errors. if (Res == 0) { diff --git a/tools/llvm-mca/CodeRegion.cpp b/tools/llvm-mca/CodeRegion.cpp index bf592f67245..e05517c1ac9 100644 --- a/tools/llvm-mca/CodeRegion.cpp +++ b/tools/llvm-mca/CodeRegion.cpp @@ -18,7 +18,7 @@ namespace mca { CodeRegions::CodeRegions(llvm::SourceMgr &S) : SM(S), FoundErrors(false) { // Create a default region for the input code sequence. - Regions.emplace_back(make_unique("", SMLoc())); + Regions.emplace_back(std::make_unique("", SMLoc())); } bool CodeRegion::isLocInRange(SMLoc Loc) const { @@ -36,7 +36,7 @@ void CodeRegions::beginRegion(StringRef Description, SMLoc Loc) { if (Regions.size() == 1 && !Regions[0]->startLoc().isValid() && !Regions[0]->endLoc().isValid()) { ActiveRegions[Description] = 0; - Regions[0] = make_unique(Description, Loc); + Regions[0] = std::make_unique(Description, Loc); return; } } else { @@ -62,7 +62,7 @@ void CodeRegions::beginRegion(StringRef Description, SMLoc Loc) { } ActiveRegions[Description] = Regions.size(); - Regions.emplace_back(make_unique(Description, Loc)); + Regions.emplace_back(std::make_unique(Description, Loc)); return; } diff --git a/tools/llvm-mca/CodeRegionGenerator.cpp b/tools/llvm-mca/CodeRegionGenerator.cpp index c793169e64e..8ddcd2f4abe 100644 --- a/tools/llvm-mca/CodeRegionGenerator.cpp +++ b/tools/llvm-mca/CodeRegionGenerator.cpp @@ -118,6 +118,8 @@ Expected AsmCodeRegionGenerator::parseCodeRegions() { MCAsmLexer &Lexer = Parser->getLexer(); MCACommentConsumer CC(Regions); Lexer.setCommentConsumer(&CC); + // Enable support for MASM literal numbers (example: 05h, 101b). + Lexer.setLexMasmIntegers(true); std::unique_ptr TAP( TheTarget.createMCAsmParser(STI, *Parser, MCII, Opts)); diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/tools/llvm-mca/Views/BottleneckAnalysis.cpp index 560c6c6e8a3..feff0cd6d52 100644 --- a/tools/llvm-mca/Views/BottleneckAnalysis.cpp +++ b/tools/llvm-mca/Views/BottleneckAnalysis.cpp @@ -165,10 +165,33 @@ void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, "Unsupported dependency type!"); OS << " - RESOURCE MASK: " << DE.ResourceOrRegID; } - OS << " - CYCLES: " << DE.Cost << '\n'; + OS << " - COST: " << DE.Cost << '\n'; } #endif // NDEBUG +void DependencyGraph::pruneEdges(unsigned Iterations) { + for (DGNode &N : Nodes) { + unsigned NumPruned = 0; + const unsigned Size = N.OutgoingEdges.size(); + // Use a cut-off threshold to prune edges with a low frequency. + for (unsigned I = 0, E = Size; I < E; ++I) { + DependencyEdge &Edge = N.OutgoingEdges[I]; + if (Edge.Frequency == Iterations) + continue; + double Factor = (double)Edge.Frequency / Iterations; + if (0.10 < Factor) + continue; + Nodes[Edge.ToIID].NumPredecessors--; + std::swap(Edge, N.OutgoingEdges[E - 1]); + --E; + ++NumPruned; + } + + if (NumPruned) + N.OutgoingEdges.resize(Size - NumPruned); + } +} + void DependencyGraph::initializeRootSet( SmallVectorImpl &RootSet) const { for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { @@ -179,7 +202,7 @@ void DependencyGraph::initializeRootSet( } void DependencyGraph::propagateThroughEdges( - SmallVectorImpl &RootSet) { + SmallVectorImpl &RootSet, unsigned Iterations) { SmallVector ToVisit; // A critical sequence is computed as the longest path from a node of the @@ -189,6 +212,10 @@ void DependencyGraph::propagateThroughEdges( // Each node of the graph starts with an initial default cost of zero. The // cost of a node is a measure of criticality: the higher the cost, the bigger // is the performance impact. + // For register and memory dependencies, the cost is a function of the write + // latency as well as the actual delay (in cycles) caused to users. + // For processor resource dependencies, the cost is a function of the resource + // pressure. Resource interferences with low frequency values are ignored. // // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of // the inner loop selects (i.e. visits) a node N from a set of `unvisited @@ -277,6 +304,10 @@ static void printInstruction(formatted_raw_ostream &FOS, } void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const { + // Early exit if no bottlenecks were found during the simulation. + if (!SeenStallCycles || !BPI.PressureIncreaseCycles) + return; + SmallVector Seq; DG.getCriticalSequence(Seq); if (Seq.empty()) @@ -432,7 +463,6 @@ void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To, bool IsLoopCarried = From >= To; unsigned SourceSize = Source.size(); if (IsLoopCarried) { - Cost *= Iterations / 2; DG.addRegisterDep(From, To + SourceSize, RegID, Cost); DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost); return; @@ -445,7 +475,6 @@ void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, bool IsLoopCarried = From >= To; unsigned SourceSize = Source.size(); if (IsLoopCarried) { - Cost *= Iterations / 2; DG.addMemoryDep(From, To + SourceSize, Cost); DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost); return; @@ -458,7 +487,6 @@ void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To, bool IsLoopCarried = From >= To; unsigned SourceSize = Source.size(); if (IsLoopCarried) { - Cost *= Iterations / 2; DG.addResourceDep(From, To + SourceSize, Mask, Cost); DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost); return; @@ -514,7 +542,7 @@ void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { // Check if this is the last simulated instruction. if (IID == ((Iterations * Source.size()) - 1)) - DG.finalizeGraph(); + DG.finalizeGraph(Iterations); } void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.h b/tools/llvm-mca/Views/BottleneckAnalysis.h index 7564b1a4820..9e3bd5978f0 100644 --- a/tools/llvm-mca/Views/BottleneckAnalysis.h +++ b/tools/llvm-mca/Views/BottleneckAnalysis.h @@ -236,8 +236,9 @@ class DependencyGraph { void addDependency(unsigned From, unsigned To, DependencyEdge::Dependency &&DE); + void pruneEdges(unsigned Iterations); void initializeRootSet(SmallVectorImpl &RootSet) const; - void propagateThroughEdges(SmallVectorImpl &RootSet); + void propagateThroughEdges(SmallVectorImpl &RootSet, unsigned Iterations); #ifndef NDEBUG void dumpDependencyEdge(raw_ostream &OS, const DependencyEdge &DE, @@ -263,10 +264,11 @@ public: // Called by the bottleneck analysis at the end of simulation to propagate // costs through the edges of the graph, and compute a critical path. - void finalizeGraph() { + void finalizeGraph(unsigned Iterations) { SmallVector RootSet; + pruneEdges(Iterations); initializeRootSet(RootSet); - propagateThroughEdges(RootSet); + propagateThroughEdges(RootSet, Iterations); } // Returns a sequence of edges representing the critical sequence based on the diff --git a/tools/llvm-mca/Views/InstructionInfoView.cpp b/tools/llvm-mca/Views/InstructionInfoView.cpp index 1fbffa3e5b6..a6f9153b494 100644 --- a/tools/llvm-mca/Views/InstructionInfoView.cpp +++ b/tools/llvm-mca/Views/InstructionInfoView.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "Views/InstructionInfoView.h" +#include "llvm/Support/FormattedStream.h" namespace llvm { namespace mca { @@ -26,10 +27,17 @@ void InstructionInfoView::printView(raw_ostream &OS) const { TempStream << "\n\nInstruction Info:\n"; TempStream << "[1]: #uOps\n[2]: Latency\n[3]: RThroughput\n" - << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n\n"; + << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n"; + if (PrintEncodings) { + TempStream << "[7]: Encoding Size\n"; + TempStream << "\n[1] [2] [3] [4] [5] [6] [7] " + << "Encodings: Instructions:\n"; + } else { + TempStream << "\n[1] [2] [3] [4] [5] [6] Instructions:\n"; + } - TempStream << "[1] [2] [3] [4] [5] [6] Instructions:\n"; - for (const MCInst &Inst : Source) { + for (unsigned I = 0, E = Source.size(); I < E; ++I) { + const MCInst &Inst = Source[I]; const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode()); // Obtain the scheduling class information from the instruction. @@ -72,7 +80,20 @@ void InstructionInfoView::printView(raw_ostream &OS) const { } TempStream << (MCDesc.mayLoad() ? " * " : " "); TempStream << (MCDesc.mayStore() ? " * " : " "); - TempStream << (MCDesc.hasUnmodeledSideEffects() ? " U " : " "); + TempStream << (MCDesc.hasUnmodeledSideEffects() ? " U " : " "); + + if (PrintEncodings) { + StringRef Encoding(CE.getEncoding(I)); + unsigned EncodingSize = Encoding.size(); + TempStream << " " << EncodingSize + << (EncodingSize < 10 ? " " : " "); + TempStream.flush(); + formatted_raw_ostream FOS(TempStream); + for (unsigned i = 0, e = Encoding.size(); i != e; ++i) + FOS << format("%02x ", (uint8_t)Encoding[i]); + FOS.PadToColumn(30); + FOS.flush(); + } MCIP.printInst(&Inst, InstrStream, "", STI); InstrStream.flush(); @@ -80,7 +101,7 @@ void InstructionInfoView::printView(raw_ostream &OS) const { // Consume any tabs or spaces at the beginning of the string. StringRef Str(Instruction); Str = Str.ltrim(); - TempStream << " " << Str << '\n'; + TempStream << Str << '\n'; Instruction = ""; } diff --git a/tools/llvm-mca/Views/InstructionInfoView.h b/tools/llvm-mca/Views/InstructionInfoView.h index 640d8738343..0e948304119 100644 --- a/tools/llvm-mca/Views/InstructionInfoView.h +++ b/tools/llvm-mca/Views/InstructionInfoView.h @@ -40,6 +40,7 @@ #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/CodeEmitter.h" #include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "llvm-mca" @@ -51,14 +52,18 @@ namespace mca { class InstructionInfoView : public View { const llvm::MCSubtargetInfo &STI; const llvm::MCInstrInfo &MCII; + CodeEmitter &CE; + bool PrintEncodings; llvm::ArrayRef Source; llvm::MCInstPrinter &MCIP; public: - InstructionInfoView(const llvm::MCSubtargetInfo &sti, - const llvm::MCInstrInfo &mcii, - llvm::ArrayRef S, llvm::MCInstPrinter &IP) - : STI(sti), MCII(mcii), Source(S), MCIP(IP) {} + InstructionInfoView(const llvm::MCSubtargetInfo &ST, + const llvm::MCInstrInfo &II, CodeEmitter &C, + bool ShouldPrintEncodings, llvm::ArrayRef S, + llvm::MCInstPrinter &IP) + : STI(ST), MCII(II), CE(C), PrintEncodings(ShouldPrintEncodings), + Source(S), MCIP(IP) {} void printView(llvm::raw_ostream &OS) const override; }; diff --git a/tools/llvm-mca/Views/TimelineView.cpp b/tools/llvm-mca/Views/TimelineView.cpp index fe3f16ba344..1e7caa297ac 100644 --- a/tools/llvm-mca/Views/TimelineView.cpp +++ b/tools/llvm-mca/Views/TimelineView.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "Views/TimelineView.h" +#include namespace llvm { namespace mca { @@ -132,25 +133,38 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS, const WaitTimeEntry &Entry, unsigned SourceIndex, unsigned Executions) const { - OS << SourceIndex << '.'; + bool PrintingTotals = SourceIndex == Source.size(); + unsigned CumulativeExecutions = PrintingTotals ? Timeline.size() : Executions; + + if (!PrintingTotals) + OS << SourceIndex << '.'; + OS.PadToColumn(7); double AverageTime1, AverageTime2, AverageTime3; - AverageTime1 = (double)Entry.CyclesSpentInSchedulerQueue / Executions; - AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / Executions; - AverageTime3 = (double)Entry.CyclesSpentAfterWBAndBeforeRetire / Executions; + AverageTime1 = + (double)Entry.CyclesSpentInSchedulerQueue / CumulativeExecutions; + AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / CumulativeExecutions; + AverageTime3 = + (double)Entry.CyclesSpentAfterWBAndBeforeRetire / CumulativeExecutions; OS << Executions; OS.PadToColumn(13); - int BufferSize = UsedBuffer[SourceIndex].second; - tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, Executions, BufferSize); + + int BufferSize = PrintingTotals ? 0 : UsedBuffer[SourceIndex].second; + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions, + BufferSize); OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10); OS.PadToColumn(20); - tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, Executions, BufferSize); + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions, + BufferSize); OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10); OS.PadToColumn(27); - tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, Executions, - STI.getSchedModel().MicroOpBufferSize); + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, + CumulativeExecutions, STI.getSchedModel().MicroOpBufferSize); OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10); if (OS.has_colors()) @@ -190,6 +204,24 @@ void TimelineView::printAverageWaitTimes(raw_ostream &OS) const { ++IID; } + + // If the timeline contains more than one instruction, + // let's also print global averages. + if (Source.size() != 1) { + WaitTimeEntry TotalWaitTime = std::accumulate( + WaitTime.begin(), WaitTime.end(), WaitTimeEntry{0, 0, 0}, + [](const WaitTimeEntry &A, const WaitTimeEntry &B) { + return WaitTimeEntry{ + A.CyclesSpentInSchedulerQueue + B.CyclesSpentInSchedulerQueue, + A.CyclesSpentInSQWhileReady + B.CyclesSpentInSQWhileReady, + A.CyclesSpentAfterWBAndBeforeRetire + + B.CyclesSpentAfterWBAndBeforeRetire}; + }); + printWaitTimeEntry(FOS, TotalWaitTime, IID, Executions); + FOS << " " + << "" << '\n'; + InstrStream.flush(); + } } void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS, diff --git a/tools/llvm-mca/Views/TimelineView.h b/tools/llvm-mca/Views/TimelineView.h index b63b234293c..9bec3b87db4 100644 --- a/tools/llvm-mca/Views/TimelineView.h +++ b/tools/llvm-mca/Views/TimelineView.h @@ -84,6 +84,7 @@ /// 3. 2 1.5 0.5 1.0 vaddss %xmm1, %xmm0, %xmm3 /// 4. 2 3.5 0.0 0.0 vaddss %xmm3, %xmm2, %xmm4 /// 5. 2 6.5 0.0 0.0 vaddss %xmm4, %xmm5, %xmm6 +/// 2 2.4 0.6 1.6 /// /// By comparing column [2] with column [1], we get an idea about how many /// cycles were spent in the scheduler's queue due to data dependencies. diff --git a/tools/llvm-mca/llvm-mca.cpp b/tools/llvm-mca/llvm-mca.cpp index b3590b5910e..99c45eebdd8 100644 --- a/tools/llvm-mca/llvm-mca.cpp +++ b/tools/llvm-mca/llvm-mca.cpp @@ -32,11 +32,17 @@ #include "Views/SchedulerStatistics.h" #include "Views/SummaryView.h" #include "Views/TimelineView.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.inc" +#include "llvm/MCA/CodeEmitter.h" #include "llvm/MCA/Context.h" +#include "llvm/MCA/InstrBuilder.h" #include "llvm/MCA/Pipeline.h" #include "llvm/MCA/Stages/EntryStage.h" #include "llvm/MCA/Stages/InstructionTables.h" @@ -83,11 +89,20 @@ static cl::opt cl::desc("Target a specific cpu type (-mcpu=help for details)"), cl::value_desc("cpu-name"), cl::cat(ToolOptions), cl::init("native")); +static cl::opt + MATTR("mattr", + cl::desc("Additional target features."), + cl::cat(ToolOptions)); + static cl::opt OutputAsmVariant("output-asm-variant", cl::desc("Syntax variant to use for output printing"), cl::cat(ToolOptions), cl::init(-1)); +static cl::opt + PrintImmHex("print-imm-hex", cl::cat(ToolOptions), cl::init(false), + cl::desc("Prefer hex format when printing immediate values")); + static cl::opt Iterations("iterations", cl::desc("Number of iterations to run"), cl::cat(ToolOptions), cl::init(0)); @@ -193,6 +208,11 @@ static cl::opt EnableBottleneckAnalysis( cl::desc("Enable bottleneck analysis (disabled by default)"), cl::cat(ViewOptions), cl::init(false)); +static cl::opt ShowEncoding( + "show-encoding", + cl::desc("Print encoding information in the instruction info view"), + cl::cat(ViewOptions), cl::init(false)); + namespace { const Target *getTarget(const char *ProgName) { @@ -218,7 +238,7 @@ ErrorOr> getOutputStream() { OutputFilename = "-"; std::error_code EC; auto Out = - llvm::make_unique(OutputFilename, EC, sys::fs::F_None); + std::make_unique(OutputFilename, EC, sys::fs::OF_Text); if (!EC) return std::move(Out); return EC; @@ -303,33 +323,11 @@ int main(int argc, char **argv) { // Apply overrides to llvm-mca specific options. processViewOptions(); - SourceMgr SrcMgr; - - // Tell SrcMgr about this buffer, which is what the parser will pick up. - SrcMgr.AddNewSourceBuffer(std::move(*BufferPtr), SMLoc()); - - std::unique_ptr MRI(TheTarget->createMCRegInfo(TripleName)); - assert(MRI && "Unable to create target register info!"); - - std::unique_ptr MAI(TheTarget->createMCAsmInfo(*MRI, TripleName)); - assert(MAI && "Unable to create target asm info!"); - - MCObjectFileInfo MOFI; - MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr); - MOFI.InitMCObjectFileInfo(TheTriple, /* PIC= */ false, Ctx); - - std::unique_ptr BOS; - - std::unique_ptr MCII(TheTarget->createMCInstrInfo()); - - std::unique_ptr MCIA( - TheTarget->createMCInstrAnalysis(MCII.get())); - if (!MCPU.compare("native")) MCPU = llvm::sys::getHostCPUName(); std::unique_ptr STI( - TheTarget->createMCSubtargetInfo(TripleName, MCPU, /* FeaturesStr */ "")); + TheTarget->createMCSubtargetInfo(TripleName, MCPU, MATTR)); if (!STI->isCPUStringValid(MCPU)) return 1; @@ -352,6 +350,29 @@ int main(int argc, char **argv) { return 1; } + std::unique_ptr MRI(TheTarget->createMCRegInfo(TripleName)); + assert(MRI && "Unable to create target register info!"); + + std::unique_ptr MAI(TheTarget->createMCAsmInfo(*MRI, TripleName)); + assert(MAI && "Unable to create target asm info!"); + + MCObjectFileInfo MOFI; + SourceMgr SrcMgr; + + // Tell SrcMgr about this buffer, which is what the parser will pick up. + SrcMgr.AddNewSourceBuffer(std::move(*BufferPtr), SMLoc()); + + MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr); + + MOFI.InitMCObjectFileInfo(TheTriple, /* PIC= */ false, Ctx); + + std::unique_ptr BOS; + + std::unique_ptr MCII(TheTarget->createMCInstrInfo()); + + std::unique_ptr MCIA( + TheTarget->createMCInstrAnalysis(MCII.get())); + // Parse the input and create CodeRegions that llvm-mca can analyze. mca::AsmCodeRegionGenerator CRG(*TheTarget, SrcMgr, Ctx, *MAI, *STI, *MCII); Expected RegionsOrErr = CRG.parseCodeRegions(); @@ -396,6 +417,9 @@ int main(int argc, char **argv) { return 1; } + // Set the display preference for hex vs. decimal immediates. + IP->setPrintImmHex(PrintImmHex); + std::unique_ptr TOF = std::move(*OF); const MCSchedModel &SM = STI->getSchedModel(); @@ -413,6 +437,12 @@ int main(int argc, char **argv) { // Number each region in the sequence. unsigned RegionIdx = 0; + std::unique_ptr MCE( + TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); + + std::unique_ptr MAB(TheTarget->createMCAsmBackend( + *STI, *MRI, InitMCTargetOptionsFromFlags())); + for (const std::unique_ptr &Region : Regions) { // Skip empty code regions. if (Region->empty()) @@ -430,6 +460,7 @@ int main(int argc, char **argv) { // Lower the MCInst sequence into an mca::Instruction sequence. ArrayRef Insts = Region->getInstructions(); + mca::CodeEmitter CE(*STI, *MAB, *MCE, Insts); std::vector> LoweredSequence; for (const MCInst &MCI : Insts) { Expected> Inst = @@ -459,18 +490,18 @@ int main(int argc, char **argv) { if (PrintInstructionTables) { // Create a pipeline, stages, and a printer. - auto P = llvm::make_unique(); - P->appendStage(llvm::make_unique(S)); - P->appendStage(llvm::make_unique(SM)); + auto P = std::make_unique(); + P->appendStage(std::make_unique(S)); + P->appendStage(std::make_unique(SM)); mca::PipelinePrinter Printer(*P); // Create the views for this pipeline, execute, and emit a report. if (PrintInstructionInfoView) { - Printer.addView(llvm::make_unique( - *STI, *MCII, Insts, *IP)); + Printer.addView(std::make_unique( + *STI, *MCII, CE, ShowEncoding, Insts, *IP)); } Printer.addView( - llvm::make_unique(*STI, *IP, Insts)); + std::make_unique(*STI, *IP, Insts)); if (!runPipeline(*P)) return 1; @@ -480,42 +511,42 @@ int main(int argc, char **argv) { } // Create a basic pipeline simulating an out-of-order backend. - auto P = MCA.createDefaultPipeline(PO, IB, S); + auto P = MCA.createDefaultPipeline(PO, S); mca::PipelinePrinter Printer(*P); if (PrintSummaryView) Printer.addView( - llvm::make_unique(SM, Insts, DispatchWidth)); + std::make_unique(SM, Insts, DispatchWidth)); if (EnableBottleneckAnalysis) { - Printer.addView(llvm::make_unique( + Printer.addView(std::make_unique( *STI, *IP, Insts, S.getNumIterations())); } if (PrintInstructionInfoView) - Printer.addView( - llvm::make_unique(*STI, *MCII, Insts, *IP)); + Printer.addView(std::make_unique( + *STI, *MCII, CE, ShowEncoding, Insts, *IP)); if (PrintDispatchStats) - Printer.addView(llvm::make_unique()); + Printer.addView(std::make_unique()); if (PrintSchedulerStats) - Printer.addView(llvm::make_unique(*STI)); + Printer.addView(std::make_unique(*STI)); if (PrintRetireStats) - Printer.addView(llvm::make_unique(SM)); + Printer.addView(std::make_unique(SM)); if (PrintRegisterFileStats) - Printer.addView(llvm::make_unique(*STI)); + Printer.addView(std::make_unique(*STI)); if (PrintResourcePressureView) Printer.addView( - llvm::make_unique(*STI, *IP, Insts)); + std::make_unique(*STI, *IP, Insts)); if (PrintTimelineView) { unsigned TimelineIterations = TimelineMaxIterations ? TimelineMaxIterations : 10; - Printer.addView(llvm::make_unique( + Printer.addView(std::make_unique( *STI, *IP, Insts, std::min(TimelineIterations, S.getNumIterations()), TimelineMaxCycles)); } diff --git a/tools/llvm-modextract/llvm-modextract.cpp b/tools/llvm-modextract/llvm-modextract.cpp index 3adefc5f0d3..7c409962584 100644 --- a/tools/llvm-modextract/llvm-modextract.cpp +++ b/tools/llvm-modextract/llvm-modextract.cpp @@ -54,7 +54,7 @@ int main(int argc, char **argv) { std::error_code EC; std::unique_ptr Out( - new ToolOutputFile(OutputFilename, EC, sys::fs::F_None)); + new ToolOutputFile(OutputFilename, EC, sys::fs::OF_None)); ExitOnErr(errorCodeToError(EC)); if (BinaryExtract) { diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp index aa62e6f0209..ee55722dc13 100644 --- a/tools/llvm-nm/llvm-nm.cpp +++ b/tools/llvm-nm/llvm-nm.cpp @@ -711,17 +711,21 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName, const std::string &ArchiveName, const std::string &ArchitectureName) { if (!NoSort) { - std::function Cmp; + using Comparator = bool (*)(const NMSymbol &, const NMSymbol &); + Comparator Cmp; if (NumericSort) - Cmp = compareSymbolAddress; + Cmp = &compareSymbolAddress; else if (SizeSort) - Cmp = compareSymbolSize; + Cmp = &compareSymbolSize; else - Cmp = compareSymbolName; + Cmp = &compareSymbolName; if (ReverseSort) - Cmp = [=](const NMSymbol &A, const NMSymbol &B) { return Cmp(B, A); }; - llvm::sort(SymbolList, Cmp); + llvm::sort(SymbolList, [=](const NMSymbol &A, const NMSymbol &B) -> bool { + return Cmp(B, A); + }); + else + llvm::sort(SymbolList, Cmp); } if (!PrintFileName) { @@ -913,10 +917,12 @@ static char getSymbolNMTypeChar(ELFObjectFileBase &Obj, if (Flags & ELF::SHF_ALLOC) return Flags & ELF::SHF_WRITE ? 'd' : 'r'; - StringRef SecName; - if (SecI->getName(SecName)) + auto NameOrErr = SecI->getName(); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); return '?'; - if (SecName.startswith(".debug")) + } + if ((*NameOrErr).startswith(".debug")) return 'N'; if (!(Flags & ELF::SHF_WRITE)) return 'n'; @@ -1076,7 +1082,7 @@ static StringRef getNMTypeName(SymbolicFile &Obj, basic_symbol_iterator I) { static char getNMSectionTagAndName(SymbolicFile &Obj, basic_symbol_iterator I, StringRef &SecName) { uint32_t Symflags = I->getFlags(); - if (isa(&Obj)) { + if (ELFObjectFileBase *ELFObj = dyn_cast(&Obj)) { if (Symflags & object::SymbolRef::SF_Absolute) SecName = "*ABS*"; else if (Symflags & object::SymbolRef::SF_Common) @@ -1090,8 +1096,16 @@ static char getNMSectionTagAndName(SymbolicFile &Obj, basic_symbol_iterator I, consumeError(SecIOrErr.takeError()); return '?'; } - elf_section_iterator secT = *SecIOrErr; - secT->getName(SecName); + + if (*SecIOrErr == ELFObj->section_end()) + return '?'; + + Expected NameOrErr = (*SecIOrErr)->getName(); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + return '?'; + } + SecName = *NameOrErr; } } @@ -1347,7 +1361,12 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName, StringRef SectionName = StringRef(); for (const SectionRef &Section : MachO->sections()) { S.NSect++; - Section.getName(SectionName); + + if (Expected NameOrErr = Section.getName()) + SectionName = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + SegmentName = MachO->getSectionFinalSegmentName( Section.getRawDataRefImpl()); if (S.Address >= Section.getAddress() && @@ -1667,7 +1686,11 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName, StringRef SegmentName = StringRef(); StringRef SectionName = StringRef(); for (const SectionRef &Section : MachO->sections()) { - Section.getName(SectionName); + if (Expected NameOrErr = Section.getName()) + SectionName = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + SegmentName = MachO->getSectionFinalSegmentName( Section.getRawDataRefImpl()); F.NSect++; diff --git a/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/tools/llvm-objcopy/COFF/COFFObjcopy.cpp index 4ae46851a66..2a8d816e6f3 100644 --- a/tools/llvm-objcopy/COFF/COFFObjcopy.cpp +++ b/tools/llvm-objcopy/COFF/COFFObjcopy.cpp @@ -16,8 +16,8 @@ #include "llvm/Object/Binary.h" #include "llvm/Object/COFF.h" +#include "llvm/Support/CRC.h" #include "llvm/Support/Errc.h" -#include "llvm/Support/JamCRC.h" #include "llvm/Support/Path.h" #include @@ -40,22 +40,13 @@ static uint64_t getNextRVA(const Object &Obj) { Obj.IsPE ? Obj.PeHeader.SectionAlignment : 1); } -static uint32_t getCRC32(StringRef Data) { - JamCRC CRC; - CRC.update(ArrayRef(Data.data(), Data.size())); - // The CRC32 value needs to be complemented because the JamCRC dosn't - // finalize the CRC32 value. It also dosn't negate the initial CRC32 value - // but it starts by default at 0xFFFFFFFF which is the complement of zero. - return ~CRC.getCRC(); -} - static std::vector createGnuDebugLinkSectionContents(StringRef File) { ErrorOr> LinkTargetOrErr = MemoryBuffer::getFile(File); if (!LinkTargetOrErr) error("'" + File + "': " + LinkTargetOrErr.getError().message()); auto LinkTarget = std::move(*LinkTargetOrErr); - uint32_t CRC32 = getCRC32(LinkTarget->getBuffer()); + uint32_t CRC32 = llvm::crc32(arrayRefFromStringRef(LinkTarget->getBuffer())); StringRef FileName = sys::path::filename(File); size_t CRCPos = alignTo(FileName.size() + 1, 4); @@ -65,26 +56,37 @@ static std::vector createGnuDebugLinkSectionContents(StringRef File) { return Data; } -static void addGnuDebugLink(Object &Obj, StringRef DebugLinkFile) { - uint32_t StartRVA = getNextRVA(Obj); +// Adds named section with given contents to the object. +static void addSection(Object &Obj, StringRef Name, ArrayRef Contents, + uint32_t Characteristics) { + bool NeedVA = Characteristics & (IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE); - std::vector
Sections; Section Sec; - Sec.setOwnedContents(createGnuDebugLinkSectionContents(DebugLinkFile)); - Sec.Name = ".gnu_debuglink"; - Sec.Header.VirtualSize = Sec.getContents().size(); - Sec.Header.VirtualAddress = StartRVA; - Sec.Header.SizeOfRawData = alignTo(Sec.Header.VirtualSize, - Obj.IsPE ? Obj.PeHeader.FileAlignment : 1); + Sec.setOwnedContents(Contents); + Sec.Name = Name; + Sec.Header.VirtualSize = NeedVA ? Sec.getContents().size() : 0u; + Sec.Header.VirtualAddress = NeedVA ? getNextRVA(Obj) : 0u; + Sec.Header.SizeOfRawData = + NeedVA ? alignTo(Sec.Header.VirtualSize, + Obj.IsPE ? Obj.PeHeader.FileAlignment : 1) + : Sec.getContents().size(); // Sec.Header.PointerToRawData is filled in by the writer. Sec.Header.PointerToRelocations = 0; Sec.Header.PointerToLinenumbers = 0; // Sec.Header.NumberOfRelocations is filled in by the writer. Sec.Header.NumberOfLinenumbers = 0; - Sec.Header.Characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA | - IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_DISCARDABLE; - Sections.push_back(Sec); - Obj.addSections(Sections); + Sec.Header.Characteristics = Characteristics; + + Obj.addSections(Sec); +} + +static void addGnuDebugLink(Object &Obj, StringRef DebugLinkFile) { + std::vector Contents = + createGnuDebugLinkSectionContents(DebugLinkFile); + addSection(Obj, ".gnu_debuglink", Contents, + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_DISCARDABLE); } static Error handleArgs(const CopyConfig &Config, Object &Obj) { @@ -92,8 +94,7 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { Obj.removeSections([&Config](const Section &Sec) { // Contrary to --only-keep-debug, --only-section fully removes sections that // aren't mentioned. - if (!Config.OnlySection.empty() && - !is_contained(Config.OnlySection, Sec.Name)) + if (!Config.OnlySection.empty() && !Config.OnlySection.matches(Sec.Name)) return true; if (Config.StripDebug || Config.StripAll || Config.StripAllGNU || @@ -103,7 +104,7 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { return true; } - if (is_contained(Config.ToRemove, Sec.Name)) + if (Config.ToRemove.matches(Sec.Name)) return true; return false; @@ -137,7 +138,7 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { if (Config.StripAll || Config.StripAllGNU) return true; - if (is_contained(Config.SymbolsToRemove, Sym.Name)) { + if (Config.SymbolsToRemove.matches(Sym.Name)) { // Explicitly removing a referenced symbol is an error. if (Sym.Referenced) reportError(Config.OutputFilename, @@ -156,7 +157,7 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { if (Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC || Sym.Sym.SectionNumber == 0) if (Config.StripUnneeded || - is_contained(Config.UnneededSymbolsToRemove, Sym.Name)) + Config.UnneededSymbolsToRemove.matches(Sym.Name)) return true; // GNU objcopy keeps referenced local symbols and external symbols @@ -171,21 +172,38 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { return false; }); + for (const auto &Flag : Config.AddSection) { + StringRef SecName, FileName; + std::tie(SecName, FileName) = Flag.split("="); + + auto BufOrErr = MemoryBuffer::getFile(FileName); + if (!BufOrErr) + return createFileError(FileName, errorCodeToError(BufOrErr.getError())); + auto Buf = std::move(*BufOrErr); + + addSection( + Obj, SecName, + makeArrayRef(reinterpret_cast(Buf->getBufferStart()), + Buf->getBufferSize()), + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_ALIGN_1BYTES); + } + if (!Config.AddGnuDebugLink.empty()) addGnuDebugLink(Obj, Config.AddGnuDebugLink); if (Config.AllowBrokenLinks || !Config.BuildIdLinkDir.empty() || Config.BuildIdLinkInput || Config.BuildIdLinkOutput || !Config.SplitDWO.empty() || !Config.SymbolsPrefix.empty() || - !Config.AllocSectionsPrefix.empty() || !Config.AddSection.empty() || - !Config.DumpSection.empty() || !Config.KeepSection.empty() || + !Config.AllocSectionsPrefix.empty() || !Config.DumpSection.empty() || + !Config.KeepSection.empty() || Config.NewSymbolVisibility || !Config.SymbolsToGlobalize.empty() || !Config.SymbolsToKeep.empty() || !Config.SymbolsToLocalize.empty() || !Config.SymbolsToWeaken.empty() || !Config.SymbolsToKeepGlobal.empty() || !Config.SectionsToRename.empty() || - !Config.SetSectionFlags.empty() || !Config.SymbolsToRename.empty() || - Config.ExtractDWO || Config.KeepFileSymbols || Config.LocalizeHidden || - Config.PreserveDates || Config.StripDWO || Config.StripNonAlloc || - Config.StripSections || Config.Weaken || Config.DecompressDebugSections || + !Config.SetSectionAlignment.empty() || !Config.SetSectionFlags.empty() || + !Config.SymbolsToRename.empty() || Config.ExtractDWO || + Config.KeepFileSymbols || Config.LocalizeHidden || Config.PreserveDates || + Config.StripDWO || Config.StripNonAlloc || Config.StripSections || + Config.Weaken || Config.DecompressDebugSections || Config.DiscardMode == DiscardType::Locals || !Config.SymbolsToAdd.empty() || Config.EntryExpr) { return createStringError(llvm::errc::invalid_argument, diff --git a/tools/llvm-objcopy/COFF/Reader.cpp b/tools/llvm-objcopy/COFF/Reader.cpp index 1f0ec9fa969..2fcec0057c0 100644 --- a/tools/llvm-objcopy/COFF/Reader.cpp +++ b/tools/llvm-objcopy/COFF/Reader.cpp @@ -36,14 +36,9 @@ Error COFFReader::readExecutableHeaders(Object &Obj) const { DH->AddressOfNewExeHeader - sizeof(*DH)); if (COFFObj.is64()) { - const pe32plus_header *PE32Plus = nullptr; - if (auto EC = COFFObj.getPE32PlusHeader(PE32Plus)) - return errorCodeToError(EC); - Obj.PeHeader = *PE32Plus; + Obj.PeHeader = *COFFObj.getPE32PlusHeader(); } else { - const pe32_header *PE32 = nullptr; - if (auto EC = COFFObj.getPE32Header(PE32)) - return errorCodeToError(EC); + const pe32_header *PE32 = COFFObj.getPE32Header(); copyPeHeader(Obj.PeHeader, *PE32); // The pe32plus_header (stored in Object) lacks the BaseOfData field. Obj.BaseOfData = PE32->BaseOfData; @@ -196,16 +191,13 @@ Error COFFReader::setSymbolTargets(Object &Obj) const { } Expected> COFFReader::create() const { - auto Obj = llvm::make_unique(); + auto Obj = std::make_unique(); - const coff_file_header *CFH = nullptr; - const coff_bigobj_file_header *CBFH = nullptr; - COFFObj.getCOFFHeader(CFH); - COFFObj.getCOFFBigObjHeader(CBFH); bool IsBigObj = false; - if (CFH) { + if (const coff_file_header *CFH = COFFObj.getCOFFHeader()) { Obj->CoffFileHeader = *CFH; } else { + const coff_bigobj_file_header *CBFH = COFFObj.getCOFFBigObjHeader(); if (!CBFH) return createStringError(object_error::parse_failed, "no COFF file header returned"); diff --git a/tools/llvm-objcopy/COFF/Writer.cpp b/tools/llvm-objcopy/COFF/Writer.cpp index f3bb1ce331f..6db37435fd9 100644 --- a/tools/llvm-objcopy/COFF/Writer.cpp +++ b/tools/llvm-objcopy/COFF/Writer.cpp @@ -120,12 +120,12 @@ size_t COFFWriter::finalizeStringTable() { StrTabBuilder.finalize(); for (auto &S : Obj.getMutableSections()) { + memset(S.Header.Name, 0, sizeof(S.Header.Name)); if (S.Name.size() > COFF::NameSize) { - memset(S.Header.Name, 0, sizeof(S.Header.Name)); snprintf(S.Header.Name, sizeof(S.Header.Name), "/%d", (int)StrTabBuilder.getOffset(S.Name)); } else { - strncpy(S.Header.Name, S.Name.data(), COFF::NameSize); + memcpy(S.Header.Name, S.Name.data(), S.Name.size()); } } for (auto &S : Obj.getMutableSymbols()) { diff --git a/tools/llvm-objcopy/CommonOpts.td b/tools/llvm-objcopy/CommonOpts.td new file mode 100644 index 00000000000..e8c092b4443 --- /dev/null +++ b/tools/llvm-objcopy/CommonOpts.td @@ -0,0 +1,123 @@ +include "llvm/Option/OptParser.td" + +multiclass Eq { + def NAME : Separate<["--"], name>; + def NAME #_eq : Joined<["--"], name #"=">, + Alias(NAME)>, + HelpText; +} + +def help : Flag<["--"], "help">; +def h : Flag<["-"], "h">, Alias; + +def allow_broken_links + : Flag<["--"], "allow-broken-links">, + HelpText<"Allow the tool to remove sections even if it would leave " + "invalid section references. The appropriate sh_link fields " + "will be set to zero.">; + +def enable_deterministic_archives + : Flag<["--"], "enable-deterministic-archives">, + HelpText<"Enable deterministic mode when operating on archives (use " + "zero for UIDs, GIDs, and timestamps).">; +def D : Flag<["-"], "D">, + Alias, + HelpText<"Alias for --enable-deterministic-archives">; + +def disable_deterministic_archives + : Flag<["--"], "disable-deterministic-archives">, + HelpText<"Disable deterministic mode when operating on archives (use " + "real values for UIDs, GIDs, and timestamps).">; +def U : Flag<["-"], "U">, + Alias, + HelpText<"Alias for --disable-deterministic-archives">; + +def preserve_dates : Flag<["--"], "preserve-dates">, + HelpText<"Preserve access and modification timestamps">; +def p : Flag<["-"], "p">, + Alias, + HelpText<"Alias for --preserve-dates">; + +def strip_all : Flag<["--"], "strip-all">, + HelpText<"Remove non-allocated sections outside segments. " + ".gnu.warning* sections are not removed">; + +def strip_all_gnu + : Flag<["--"], "strip-all-gnu">, + HelpText<"Compatible with GNU's --strip-all">; + +def strip_debug : Flag<["--"], "strip-debug">, + HelpText<"Remove all debug sections">; +def g : Flag<["-"], "g">, + Alias, + HelpText<"Alias for --strip-debug">; + +def strip_unneeded : Flag<["--"], "strip-unneeded">, + HelpText<"Remove all symbols not needed by relocations">; + +defm remove_section : Eq<"remove-section", "Remove
">, + MetaVarName<"section">; +def R : JoinedOrSeparate<["-"], "R">, + Alias, + HelpText<"Alias for --remove-section">; + +def strip_sections + : Flag<["--"], "strip-sections">, + HelpText<"Remove all section headers and all sections not in segments">; + +defm strip_symbol : Eq<"strip-symbol", "Strip ">, + MetaVarName<"symbol">; +def N : JoinedOrSeparate<["-"], "N">, + Alias, + HelpText<"Alias for --strip-symbol">; + +defm keep_section : Eq<"keep-section", "Keep
">, + MetaVarName<"section">; + +defm keep_symbol : Eq<"keep-symbol", "Do not remove symbol ">, + MetaVarName<"symbol">; +def K : JoinedOrSeparate<["-"], "K">, + Alias, + HelpText<"Alias for --keep-symbol">; + +def keep_file_symbols : Flag<["--"], "keep-file-symbols">, + HelpText<"Do not remove file symbols">; + +def only_keep_debug + : Flag<["--"], "only-keep-debug">, + HelpText<"Clear sections that would not be stripped by --strip-debug. " + "Currently only implemented for COFF.">; + +def discard_locals : Flag<["--"], "discard-locals">, + HelpText<"Remove compiler-generated local symbols, (e.g. " + "symbols starting with .L)">; +def X : Flag<["-"], "X">, + Alias, + HelpText<"Alias for --discard-locals">; + +def discard_all + : Flag<["--"], "discard-all">, + HelpText<"Remove all local symbols except file and section symbols">; +def x : Flag<["-"], "x">, + Alias, + HelpText<"Alias for --discard-all">; + +def regex + : Flag<["--"], "regex">, + HelpText<"Permit regular expressions in name comparison">; + +def version : Flag<["--"], "version">, + HelpText<"Print the version and exit.">; +def V : Flag<["-"], "V">, + Alias, + HelpText<"Alias for --version">; + +def wildcard + : Flag<["--"], "wildcard">, + HelpText<"Allow wildcard syntax for symbol-related flags. Incompatible " + "with --regex. Allows using '*' to match any number of " + "characters, '?' to match any single character, '\' to escape " + "special characters, and '[]' to define character classes. " + "Wildcards beginning with '!' will prevent a match, for example " + "\"-N '*' -N '!x'\" will strip all symbols except for \"x\".">; +def w : Flag<["-"], "w">, Alias, HelpText<"Alias for --wildcard">; diff --git a/tools/llvm-objcopy/CopyConfig.cpp b/tools/llvm-objcopy/CopyConfig.cpp index 8d6431b3044..d707bec20c4 100644 --- a/tools/llvm-objcopy/CopyConfig.cpp +++ b/tools/llvm-objcopy/CopyConfig.cpp @@ -14,10 +14,10 @@ #include "llvm/ADT/StringSet.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/CRC.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Errc.h" -#include "llvm/Support/JamCRC.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/StringSaver.h" #include @@ -155,6 +155,25 @@ static Expected parseRenameSectionValue(StringRef FlagValue) { return SR; } +static Expected> +parseSetSectionAlignment(StringRef FlagValue) { + if (!FlagValue.contains('=')) + return createStringError( + errc::invalid_argument, + "bad format for --set-section-alignment: missing '='"); + auto Split = StringRef(FlagValue).split('='); + if (Split.first.empty()) + return createStringError( + errc::invalid_argument, + "bad format for --set-section-alignment: missing section name"); + uint64_t NewAlign; + if (Split.second.getAsInteger(0, NewAlign)) + return createStringError(errc::invalid_argument, + "invalid alignment for --set-section-alignment: '%s'", + Split.second.str().c_str()); + return std::make_pair(Split.first, NewAlign); +} + static Expected parseSetSectionFlagValue(StringRef FlagValue) { if (!StringRef(FlagValue).contains('=')) @@ -177,106 +196,6 @@ parseSetSectionFlagValue(StringRef FlagValue) { return SFU; } -static Expected parseNewSymbolInfo(StringRef FlagValue) { - // Parse value given with --add-symbol option and create the - // new symbol if possible. The value format for --add-symbol is: - // - // =[
:][,] - // - // where: - // - symbol name, can be empty string - //
- optional section name. If not given ABS symbol is created - // - symbol value, can be decimal or hexadecimal number prefixed - // with 0x. - // - optional flags affecting symbol type, binding or visibility: - // The following are currently supported: - // - // global, local, weak, default, hidden, file, section, object, - // indirect-function. - // - // The following flags are ignored and provided for GNU - // compatibility only: - // - // warning, debug, constructor, indirect, synthetic, - // unique-object, before=. - NewSymbolInfo SI; - StringRef Value; - std::tie(SI.SymbolName, Value) = FlagValue.split('='); - if (Value.empty()) - return createStringError( - errc::invalid_argument, - "bad format for --add-symbol, missing '=' after '%s'", - SI.SymbolName.str().c_str()); - - if (Value.contains(':')) { - std::tie(SI.SectionName, Value) = Value.split(':'); - if (SI.SectionName.empty() || Value.empty()) - return createStringError( - errc::invalid_argument, - "bad format for --add-symbol, missing section name or symbol value"); - } - - SmallVector Flags; - Value.split(Flags, ','); - if (Flags[0].getAsInteger(0, SI.Value)) - return createStringError(errc::invalid_argument, "bad symbol value: '%s'", - Flags[0].str().c_str()); - - using Functor = std::function; - SmallVector UnsupportedFlags; - for (size_t I = 1, NumFlags = Flags.size(); I < NumFlags; ++I) - static_cast( - StringSwitch(Flags[I]) - .CaseLower("global", [&SI] { SI.Bind = ELF::STB_GLOBAL; }) - .CaseLower("local", [&SI] { SI.Bind = ELF::STB_LOCAL; }) - .CaseLower("weak", [&SI] { SI.Bind = ELF::STB_WEAK; }) - .CaseLower("default", [&SI] { SI.Visibility = ELF::STV_DEFAULT; }) - .CaseLower("hidden", [&SI] { SI.Visibility = ELF::STV_HIDDEN; }) - .CaseLower("file", [&SI] { SI.Type = ELF::STT_FILE; }) - .CaseLower("section", [&SI] { SI.Type = ELF::STT_SECTION; }) - .CaseLower("object", [&SI] { SI.Type = ELF::STT_OBJECT; }) - .CaseLower("function", [&SI] { SI.Type = ELF::STT_FUNC; }) - .CaseLower("indirect-function", - [&SI] { SI.Type = ELF::STT_GNU_IFUNC; }) - .CaseLower("debug", [] {}) - .CaseLower("constructor", [] {}) - .CaseLower("warning", [] {}) - .CaseLower("indirect", [] {}) - .CaseLower("synthetic", [] {}) - .CaseLower("unique-object", [] {}) - .StartsWithLower("before", [] {}) - .Default([&] { UnsupportedFlags.push_back(Flags[I]); }))(); - if (!UnsupportedFlags.empty()) - return createStringError(errc::invalid_argument, - "unsupported flag%s for --add-symbol: '%s'", - UnsupportedFlags.size() > 1 ? "s" : "", - join(UnsupportedFlags, "', '").c_str()); - return SI; -} - -static const StringMap ArchMap{ - // Name, {EMachine, 64bit, LittleEndian} - {"aarch64", {ELF::EM_AARCH64, true, true}}, - {"arm", {ELF::EM_ARM, false, true}}, - {"i386", {ELF::EM_386, false, true}}, - {"i386:x86-64", {ELF::EM_X86_64, true, true}}, - {"mips", {ELF::EM_MIPS, false, false}}, - {"powerpc:common64", {ELF::EM_PPC64, true, true}}, - {"riscv:rv32", {ELF::EM_RISCV, false, true}}, - {"riscv:rv64", {ELF::EM_RISCV, true, true}}, - {"sparc", {ELF::EM_SPARC, false, false}}, - {"sparcel", {ELF::EM_SPARC, false, true}}, - {"x86-64", {ELF::EM_X86_64, true, true}}, -}; - -static Expected getMachineInfo(StringRef Arch) { - auto Iter = ArchMap.find(Arch); - if (Iter == std::end(ArchMap)) - return createStringError(errc::invalid_argument, - "invalid architecture: '%s'", Arch.str().c_str()); - return Iter->getValue(); -} - struct TargetInfo { FileFormat Format; MachineInfo Machine; @@ -341,9 +260,10 @@ getOutputTargetInfoByTargetName(StringRef TargetName) { return {TargetInfo{Format, MI}}; } -static Error addSymbolsFromFile(std::vector &Symbols, - BumpPtrAllocator &Alloc, StringRef Filename, - bool UseRegex) { +static Error +addSymbolsFromFile(NameMatcher &Symbols, BumpPtrAllocator &Alloc, + StringRef Filename, MatchStyle MS, + llvm::function_ref ErrorCallback) { StringSaver Saver(Alloc); SmallVector Lines; auto BufOrErr = MemoryBuffer::getFile(Filename); @@ -356,21 +276,47 @@ static Error addSymbolsFromFile(std::vector &Symbols, // it's not empty. auto TrimmedLine = Line.split('#').first.trim(); if (!TrimmedLine.empty()) - Symbols.emplace_back(Saver.save(TrimmedLine), UseRegex); + if (Error E = Symbols.addMatcher(NameOrPattern::create( + Saver.save(TrimmedLine), MS, ErrorCallback))) + return E; } return Error::success(); } -NameOrRegex::NameOrRegex(StringRef Pattern, bool IsRegex) { - if (!IsRegex) { - Name = Pattern; - return; - } +Expected +NameOrPattern::create(StringRef Pattern, MatchStyle MS, + llvm::function_ref ErrorCallback) { + switch (MS) { + case MatchStyle::Literal: + return NameOrPattern(Pattern); + case MatchStyle::Wildcard: { + SmallVector Data; + bool IsPositiveMatch = true; + if (Pattern[0] == '!') { + IsPositiveMatch = false; + Pattern = Pattern.drop_front(); + } + Expected GlobOrErr = GlobPattern::create(Pattern); - SmallVector Data; - R = std::make_shared( - ("^" + Pattern.ltrim('^').rtrim('$') + "$").toStringRef(Data)); + // If we couldn't create it as a glob, report the error, but try again with + // a literal if the error reporting is non-fatal. + if (!GlobOrErr) { + if (Error E = ErrorCallback(GlobOrErr.takeError())) + return std::move(E); + return create(Pattern, MatchStyle::Literal, ErrorCallback); + } + + return NameOrPattern(std::make_shared(*GlobOrErr), + IsPositiveMatch); + } + case MatchStyle::Regex: { + SmallVector Data; + return NameOrPattern(std::make_shared( + ("^" + Pattern.ltrim('^').rtrim('$') + "$").toStringRef(Data))); + } + } + llvm_unreachable("Unhandled llvm.objcopy.MatchStyle enum"); } static Error addSymbolsToRenameFromFile(StringMap &SymbolsToRename, @@ -407,10 +353,22 @@ template static ErrorOr getAsInteger(StringRef Val) { return Result; } +static void printHelp(const opt::OptTable &OptTable, raw_ostream &OS, + StringRef ToolName) { + OptTable.PrintHelp(OS, (ToolName + " input [output]").str().c_str(), + (ToolName + " tool").str().c_str()); + // TODO: Replace this with libOption call once it adds extrahelp support. + // The CommandLine library has a cl::extrahelp class to support this, + // but libOption does not have that yet. + OS << "\nPass @FILE as argument to read options from FILE.\n"; +} + // ParseObjcopyOptions returns the config and sets the input arguments. If a // help flag is set then ParseObjcopyOptions will print the help messege and // exit. -Expected parseObjcopyOptions(ArrayRef ArgsArr) { +Expected +parseObjcopyOptions(ArrayRef ArgsArr, + llvm::function_ref ErrorCallback) { DriverConfig DC; ObjcopyOptTable T; unsigned MissingArgumentIndex, MissingArgumentCount; @@ -418,12 +376,12 @@ Expected parseObjcopyOptions(ArrayRef ArgsArr) { T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); if (InputArgs.size() == 0) { - T.PrintHelp(errs(), "llvm-objcopy input [output]", "objcopy tool"); + printHelp(T, errs(), "llvm-objcopy"); exit(1); } if (InputArgs.hasArg(OBJCOPY_help)) { - T.PrintHelp(outs(), "llvm-objcopy input [output]", "objcopy tool"); + printHelp(T, outs(), "llvm-objcopy"); exit(0); } @@ -459,7 +417,18 @@ Expected parseObjcopyOptions(ArrayRef ArgsArr) { errc::invalid_argument, "--target cannot be used with --input-target or --output-target"); - bool UseRegex = InputArgs.hasArg(OBJCOPY_regex); + if (InputArgs.hasArg(OBJCOPY_regex) && InputArgs.hasArg(OBJCOPY_wildcard)) + return createStringError(errc::invalid_argument, + "--regex and --wildcard are incompatible"); + + MatchStyle SectionMatchStyle = InputArgs.hasArg(OBJCOPY_regex) + ? MatchStyle::Regex + : MatchStyle::Wildcard; + MatchStyle SymbolMatchStyle = InputArgs.hasArg(OBJCOPY_regex) + ? MatchStyle::Regex + : InputArgs.hasArg(OBJCOPY_wildcard) + ? MatchStyle::Wildcard + : MatchStyle::Literal; StringRef InputFormat, OutputFormat; if (InputArgs.hasArg(OBJCOPY_target)) { InputFormat = InputArgs.getLastArgValue(OBJCOPY_target); @@ -476,28 +445,26 @@ Expected parseObjcopyOptions(ArrayRef ArgsArr) { .Case("binary", FileFormat::Binary) .Case("ihex", FileFormat::IHex) .Default(FileFormat::Unspecified); - if (Config.InputFormat == FileFormat::Binary) { - auto BinaryArch = InputArgs.getLastArgValue(OBJCOPY_binary_architecture); - if (BinaryArch.empty()) - return createStringError( - errc::invalid_argument, - "specified binary input without specifiying an architecture"); - Expected MI = getMachineInfo(BinaryArch); - if (!MI) - return MI.takeError(); - Config.BinaryArch = *MI; - } + + if (InputArgs.hasArg(OBJCOPY_new_symbol_visibility)) + Config.NewSymbolVisibility = + InputArgs.getLastArgValue(OBJCOPY_new_symbol_visibility); Config.OutputFormat = StringSwitch(OutputFormat) .Case("binary", FileFormat::Binary) .Case("ihex", FileFormat::IHex) .Default(FileFormat::Unspecified); - if (Config.OutputFormat == FileFormat::Unspecified && !OutputFormat.empty()) { - Expected Target = getOutputTargetInfoByTargetName(OutputFormat); - if (!Target) - return Target.takeError(); - Config.OutputFormat = Target->Format; - Config.OutputArch = Target->Machine; + if (Config.OutputFormat == FileFormat::Unspecified) { + if (OutputFormat.empty()) { + Config.OutputFormat = Config.InputFormat; + } else { + Expected Target = + getOutputTargetInfoByTargetName(OutputFormat); + if (!Target) + return Target.takeError(); + Config.OutputFormat = Target->Format; + Config.OutputArch = Target->Machine; + } } if (auto Arg = InputArgs.getLastArg(OBJCOPY_compress_debug_sections, @@ -535,12 +502,8 @@ Expected parseObjcopyOptions(ArrayRef ArgsArr) { if (!DebugOrErr) return createFileError(Config.AddGnuDebugLink, DebugOrErr.getError()); auto Debug = std::move(*DebugOrErr); - JamCRC CRC; - CRC.update( - ArrayRef(Debug->getBuffer().data(), Debug->getBuffer().size())); - // The CRC32 value needs to be complemented because the JamCRC doesn't - // finalize the CRC32 value. - Config.GnuDebugLinkCRC32 = ~CRC.getCRC(); + Config.GnuDebugLinkCRC32 = + llvm::crc32(arrayRefFromStringRef(Debug->getBuffer())); } Config.BuildIdLinkDir = InputArgs.getLastArgValue(OBJCOPY_build_id_link_dir); if (InputArgs.hasArg(OBJCOPY_build_id_link_input)) @@ -582,6 +545,13 @@ Expected parseObjcopyOptions(ArrayRef ArgsArr) { "multiple renames of section '%s'", SR->OriginalName.str().c_str()); } + for (auto Arg : InputArgs.filtered(OBJCOPY_set_section_alignment)) { + Expected> NameAndAlign = + parseSetSectionAlignment(Arg->getValue()); + if (!NameAndAlign) + return NameAndAlign.takeError(); + Config.SetSectionAlignment[NameAndAlign->first] = NameAndAlign->second; + } for (auto Arg : InputArgs.filtered(OBJCOPY_set_section_flags)) { Expected SFU = parseSetSectionFlagValue(Arg->getValue()); @@ -612,13 +582,28 @@ Expected parseObjcopyOptions(ArrayRef ArgsArr) { } for (auto Arg : InputArgs.filtered(OBJCOPY_remove_section)) - Config.ToRemove.emplace_back(Arg->getValue(), UseRegex); + if (Error E = Config.ToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_keep_section)) - Config.KeepSection.emplace_back(Arg->getValue(), UseRegex); + if (Error E = Config.KeepSection.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_only_section)) - Config.OnlySection.emplace_back(Arg->getValue(), UseRegex); - for (auto Arg : InputArgs.filtered(OBJCOPY_add_section)) - Config.AddSection.push_back(Arg->getValue()); + if (Error E = Config.OnlySection.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); + for (auto Arg : InputArgs.filtered(OBJCOPY_add_section)) { + StringRef ArgValue(Arg->getValue()); + if (!ArgValue.contains('=')) + return createStringError(errc::invalid_argument, + "bad format for --add-section: missing '='"); + if (ArgValue.split("=").second.empty()) + return createStringError( + errc::invalid_argument, + "bad format for --add-section: missing file name"); + Config.AddSection.push_back(ArgValue); + } for (auto Arg : InputArgs.filtered(OBJCOPY_dump_section)) Config.DumpSection.push_back(Arg->getValue()); Config.StripAll = InputArgs.hasArg(OBJCOPY_strip_all); @@ -645,53 +630,71 @@ Expected parseObjcopyOptions(ArrayRef ArgsArr) { if (Config.DiscardMode == DiscardType::All) Config.StripDebug = true; for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbol)) - Config.SymbolsToLocalize.emplace_back(Arg->getValue(), UseRegex); + if (Error E = Config.SymbolsToLocalize.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToLocalize, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbol)) - Config.SymbolsToKeepGlobal.emplace_back(Arg->getValue(), UseRegex); + if (Error E = Config.SymbolsToKeepGlobal.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToKeepGlobal, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbol)) - Config.SymbolsToGlobalize.emplace_back(Arg->getValue(), UseRegex); + if (Error E = Config.SymbolsToGlobalize.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToGlobalize, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbol)) - Config.SymbolsToWeaken.emplace_back(Arg->getValue(), UseRegex); + if (Error E = Config.SymbolsToWeaken.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToWeaken, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbol)) - Config.SymbolsToRemove.emplace_back(Arg->getValue(), UseRegex); + if (Error E = Config.SymbolsToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToRemove, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbol)) - Config.UnneededSymbolsToRemove.emplace_back(Arg->getValue(), UseRegex); + if (Error E = + Config.UnneededSymbolsToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbols)) if (Error E = addSymbolsFromFile(Config.UnneededSymbolsToRemove, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle, + ErrorCallback)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbol)) - Config.SymbolsToKeep.emplace_back(Arg->getValue(), UseRegex); - for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbols)) - if (Error E = addSymbolsFromFile(Config.SymbolsToKeep, DC.Alloc, - Arg->getValue(), UseRegex)) + if (Error E = Config.SymbolsToKeep.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) return std::move(E); - for (auto Arg : InputArgs.filtered(OBJCOPY_add_symbol)) { - Expected NSI = parseNewSymbolInfo(Arg->getValue()); - if (!NSI) - return NSI.takeError(); - Config.SymbolsToAdd.push_back(*NSI); - } + for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbols)) + if (Error E = + addSymbolsFromFile(Config.SymbolsToKeep, DC.Alloc, Arg->getValue(), + SymbolMatchStyle, ErrorCallback)) + return std::move(E); + for (auto Arg : InputArgs.filtered(OBJCOPY_add_symbol)) + Config.SymbolsToAdd.push_back(Arg->getValue()); Config.AllowBrokenLinks = InputArgs.hasArg(OBJCOPY_allow_broken_links); @@ -754,19 +757,19 @@ Expected parseObjcopyOptions(ArrayRef ArgsArr) { // exit. Expected parseStripOptions(ArrayRef ArgsArr, - std::function ErrorCallback) { + llvm::function_ref ErrorCallback) { StripOptTable T; unsigned MissingArgumentIndex, MissingArgumentCount; llvm::opt::InputArgList InputArgs = T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); if (InputArgs.size() == 0) { - T.PrintHelp(errs(), "llvm-strip [options] file...", "strip tool"); + printHelp(T, errs(), "llvm-strip"); exit(1); } if (InputArgs.hasArg(STRIP_help)) { - T.PrintHelp(outs(), "llvm-strip [options] file...", "strip tool"); + printHelp(T, outs(), "llvm-strip"); exit(0); } @@ -792,7 +795,17 @@ parseStripOptions(ArrayRef ArgsArr, "multiple input files cannot be used in combination with -o"); CopyConfig Config; - bool UseRegexp = InputArgs.hasArg(STRIP_regex); + + if (InputArgs.hasArg(STRIP_regex) && InputArgs.hasArg(STRIP_wildcard)) + return createStringError(errc::invalid_argument, + "--regex and --wildcard are incompatible"); + MatchStyle SectionMatchStyle = + InputArgs.hasArg(STRIP_regex) ? MatchStyle::Regex : MatchStyle::Wildcard; + MatchStyle SymbolMatchStyle = InputArgs.hasArg(STRIP_regex) + ? MatchStyle::Regex + : InputArgs.hasArg(STRIP_wildcard) + ? MatchStyle::Wildcard + : MatchStyle::Literal; Config.AllowBrokenLinks = InputArgs.hasArg(STRIP_allow_broken_links); Config.StripDebug = InputArgs.hasArg(STRIP_strip_debug); @@ -801,6 +814,7 @@ parseStripOptions(ArrayRef ArgsArr, InputArgs.hasFlag(STRIP_discard_all, STRIP_discard_locals) ? DiscardType::All : DiscardType::Locals; + Config.StripSections = InputArgs.hasArg(STRIP_strip_sections); Config.StripUnneeded = InputArgs.hasArg(STRIP_strip_unneeded); if (auto Arg = InputArgs.getLastArg(STRIP_strip_all, STRIP_no_strip_all)) Config.StripAll = Arg->getOption().getID() == STRIP_strip_all; @@ -809,16 +823,24 @@ parseStripOptions(ArrayRef ArgsArr, Config.KeepFileSymbols = InputArgs.hasArg(STRIP_keep_file_symbols); for (auto Arg : InputArgs.filtered(STRIP_keep_section)) - Config.KeepSection.emplace_back(Arg->getValue(), UseRegexp); + if (Error E = Config.KeepSection.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(STRIP_remove_section)) - Config.ToRemove.emplace_back(Arg->getValue(), UseRegexp); + if (Error E = Config.ToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SectionMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(STRIP_strip_symbol)) - Config.SymbolsToRemove.emplace_back(Arg->getValue(), UseRegexp); + if (Error E = Config.SymbolsToRemove.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); for (auto Arg : InputArgs.filtered(STRIP_keep_symbol)) - Config.SymbolsToKeep.emplace_back(Arg->getValue(), UseRegexp); + if (Error E = Config.SymbolsToKeep.addMatcher(NameOrPattern::create( + Arg->getValue(), SymbolMatchStyle, ErrorCallback))) + return std::move(E); if (!InputArgs.hasArg(STRIP_no_strip_all) && !Config.StripDebug && !Config.StripUnneeded && Config.DiscardMode == DiscardType::None && diff --git a/tools/llvm-objcopy/CopyConfig.h b/tools/llvm-objcopy/CopyConfig.h index aff3631a487..55a55d3a2bc 100644 --- a/tools/llvm-objcopy/CopyConfig.h +++ b/tools/llvm-objcopy/CopyConfig.h @@ -9,6 +9,7 @@ #ifndef LLVM_TOOLS_LLVM_OBJCOPY_COPY_CONFIG_H #define LLVM_TOOLS_LLVM_OBJCOPY_COPY_CONFIG_H +#include "ELF/ELFConfig.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/Optional.h" @@ -18,6 +19,7 @@ #include "llvm/Object/ELFTypes.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" +#include "llvm/Support/GlobPattern.h" #include "llvm/Support/Regex.h" // Necessary for llvm::DebugCompressionType::None #include "llvm/Target/TargetOptions.h" @@ -87,36 +89,71 @@ enum class DiscardType { Locals, // --discard-locals (-X) }; -class NameOrRegex { +enum class MatchStyle { + Literal, // Default for symbols. + Wildcard, // Default for sections, or enabled with --wildcard (-w). + Regex, // Enabled with --regex. +}; + +class NameOrPattern { StringRef Name; // Regex is shared between multiple CopyConfig instances. std::shared_ptr R; + std::shared_ptr G; + bool IsPositiveMatch = true; + + NameOrPattern(StringRef N) : Name(N) {} + NameOrPattern(std::shared_ptr R) : R(R) {} + NameOrPattern(std::shared_ptr G, bool IsPositiveMatch) + : G(G), IsPositiveMatch(IsPositiveMatch) {} public: - NameOrRegex(StringRef Pattern, bool IsRegex); - bool operator==(StringRef S) const { return R ? R->match(S) : Name == S; } + // ErrorCallback is used to handle recoverable errors. An Error returned + // by the callback aborts the parsing and is then returned by this function. + static Expected + create(StringRef Pattern, MatchStyle MS, + llvm::function_ref ErrorCallback); + + bool isPositiveMatch() const { return IsPositiveMatch; } + bool operator==(StringRef S) const { + return R ? R->match(S) : G ? G->match(S) : Name == S; + } bool operator!=(StringRef S) const { return !operator==(S); } }; -struct NewSymbolInfo { - StringRef SymbolName; - StringRef SectionName; - uint64_t Value = 0; - uint8_t Type = ELF::STT_NOTYPE; - uint8_t Bind = ELF::STB_GLOBAL; - uint8_t Visibility = ELF::STV_DEFAULT; +// Matcher that checks symbol or section names against the command line flags +// provided for that option. +class NameMatcher { + std::vector PosMatchers; + std::vector NegMatchers; + +public: + Error addMatcher(Expected Matcher) { + if (!Matcher) + return Matcher.takeError(); + if (Matcher->isPositiveMatch()) + PosMatchers.push_back(std::move(*Matcher)); + else + NegMatchers.push_back(std::move(*Matcher)); + return Error::success(); + } + bool matches(StringRef S) const { + return is_contained(PosMatchers, S) && !is_contained(NegMatchers, S); + } + bool empty() const { return PosMatchers.empty() && NegMatchers.empty(); } }; // Configuration for copying/stripping a single file. struct CopyConfig { + // Format-specific options to be initialized lazily when needed. + Optional ELF; + // Main input/output options StringRef InputFilename; FileFormat InputFormat; StringRef OutputFilename; FileFormat OutputFormat; - // Only applicable for --input-format=binary - MachineInfo BinaryArch; // Only applicable when --output-format!=binary (e.g. elf64-x86-64). Optional OutputArch; @@ -132,24 +169,30 @@ struct CopyConfig { StringRef SymbolsPrefix; StringRef AllocSectionsPrefix; DiscardType DiscardMode = DiscardType::None; + Optional NewSymbolVisibility; // Repeated options std::vector AddSection; std::vector DumpSection; - std::vector SymbolsToAdd; - std::vector KeepSection; - std::vector OnlySection; - std::vector SymbolsToGlobalize; - std::vector SymbolsToKeep; - std::vector SymbolsToLocalize; - std::vector SymbolsToRemove; - std::vector UnneededSymbolsToRemove; - std::vector SymbolsToWeaken; - std::vector ToRemove; - std::vector SymbolsToKeepGlobal; + std::vector SymbolsToAdd; + + // Section matchers + NameMatcher KeepSection; + NameMatcher OnlySection; + NameMatcher ToRemove; + + // Symbol matchers + NameMatcher SymbolsToGlobalize; + NameMatcher SymbolsToKeep; + NameMatcher SymbolsToLocalize; + NameMatcher SymbolsToRemove; + NameMatcher UnneededSymbolsToRemove; + NameMatcher SymbolsToWeaken; + NameMatcher SymbolsToKeepGlobal; // Map options StringMap SectionsToRename; + StringMap SetSectionAlignment; StringMap SetSectionFlags; StringMap SymbolsToRename; @@ -178,6 +221,18 @@ struct CopyConfig { bool Weaken = false; bool DecompressDebugSections = false; DebugCompressionType CompressionType = DebugCompressionType::None; + + // parseELFConfig performs ELF-specific command-line parsing. Fills `ELF` on + // success or returns an Error otherwise. + Error parseELFConfig() { + if (!ELF) { + Expected ELFConfig = elf::parseConfig(*this); + if (!ELFConfig) + return ELFConfig.takeError(); + ELF = *ELFConfig; + } + return Error::success(); + } }; // Configuration for the overall invocation of this tool. When invoked as @@ -190,8 +245,11 @@ struct DriverConfig { // ParseObjcopyOptions returns the config and sets the input arguments. If a // help flag is set then ParseObjcopyOptions will print the help messege and -// exit. -Expected parseObjcopyOptions(ArrayRef ArgsArr); +// exit. ErrorCallback is used to handle recoverable errors. An Error returned +// by the callback aborts the parsing and is then returned by this function. +Expected +parseObjcopyOptions(ArrayRef ArgsArr, + llvm::function_ref ErrorCallback); // ParseStripOptions returns the config and sets the input arguments. If a // help flag is set then ParseStripOptions will print the help messege and @@ -199,7 +257,7 @@ Expected parseObjcopyOptions(ArrayRef ArgsArr); // by the callback aborts the parsing and is then returned by this function. Expected parseStripOptions(ArrayRef ArgsArr, - std::function ErrorCallback); + llvm::function_ref ErrorCallback); } // namespace objcopy } // namespace llvm diff --git a/tools/llvm-objcopy/ELF/ELFConfig.cpp b/tools/llvm-objcopy/ELF/ELFConfig.cpp new file mode 100644 index 00000000000..40993760add --- /dev/null +++ b/tools/llvm-objcopy/ELF/ELFConfig.cpp @@ -0,0 +1,133 @@ +//===- ELFConfig.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CopyConfig.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace objcopy { +namespace elf { + +static Expected parseNewSymbolInfo(StringRef FlagValue, + uint8_t DefaultVisibility) { + // Parse value given with --add-symbol option and create the + // new symbol if possible. The value format for --add-symbol is: + // + // =[
:][,] + // + // where: + // - symbol name, can be empty string + //
- optional section name. If not given ABS symbol is created + // - symbol value, can be decimal or hexadecimal number prefixed + // with 0x. + // - optional flags affecting symbol type, binding or visibility: + // The following are currently supported: + // + // global, local, weak, default, hidden, file, section, object, + // indirect-function. + // + // The following flags are ignored and provided for GNU + // compatibility only: + // + // warning, debug, constructor, indirect, synthetic, + // unique-object, before=. + NewSymbolInfo SI; + StringRef Value; + std::tie(SI.SymbolName, Value) = FlagValue.split('='); + if (Value.empty()) + return createStringError( + errc::invalid_argument, + "bad format for --add-symbol, missing '=' after '%s'", + SI.SymbolName.str().c_str()); + + if (Value.contains(':')) { + std::tie(SI.SectionName, Value) = Value.split(':'); + if (SI.SectionName.empty() || Value.empty()) + return createStringError( + errc::invalid_argument, + "bad format for --add-symbol, missing section name or symbol value"); + } + + SmallVector Flags; + Value.split(Flags, ','); + if (Flags[0].getAsInteger(0, SI.Value)) + return createStringError(errc::invalid_argument, "bad symbol value: '%s'", + Flags[0].str().c_str()); + + SI.Visibility = DefaultVisibility; + + using Functor = std::function; + SmallVector UnsupportedFlags; + for (size_t I = 1, NumFlags = Flags.size(); I < NumFlags; ++I) + static_cast( + StringSwitch(Flags[I]) + .CaseLower("global", [&SI] { SI.Bind = ELF::STB_GLOBAL; }) + .CaseLower("local", [&SI] { SI.Bind = ELF::STB_LOCAL; }) + .CaseLower("weak", [&SI] { SI.Bind = ELF::STB_WEAK; }) + .CaseLower("default", [&SI] { SI.Visibility = ELF::STV_DEFAULT; }) + .CaseLower("hidden", [&SI] { SI.Visibility = ELF::STV_HIDDEN; }) + .CaseLower("protected", + [&SI] { SI.Visibility = ELF::STV_PROTECTED; }) + .CaseLower("file", [&SI] { SI.Type = ELF::STT_FILE; }) + .CaseLower("section", [&SI] { SI.Type = ELF::STT_SECTION; }) + .CaseLower("object", [&SI] { SI.Type = ELF::STT_OBJECT; }) + .CaseLower("function", [&SI] { SI.Type = ELF::STT_FUNC; }) + .CaseLower("indirect-function", + [&SI] { SI.Type = ELF::STT_GNU_IFUNC; }) + .CaseLower("debug", [] {}) + .CaseLower("constructor", [] {}) + .CaseLower("warning", [] {}) + .CaseLower("indirect", [] {}) + .CaseLower("synthetic", [] {}) + .CaseLower("unique-object", [] {}) + .StartsWithLower("before", [] {}) + .Default([&] { UnsupportedFlags.push_back(Flags[I]); }))(); + if (!UnsupportedFlags.empty()) + return createStringError(errc::invalid_argument, + "unsupported flag%s for --add-symbol: '%s'", + UnsupportedFlags.size() > 1 ? "s" : "", + join(UnsupportedFlags, "', '").c_str()); + return SI; +} + +Expected parseConfig(const CopyConfig &Config) { + ELFCopyConfig ELFConfig; + if (Config.NewSymbolVisibility) { + const uint8_t Invalid = 0xff; + ELFConfig.NewSymbolVisibility = + StringSwitch(*Config.NewSymbolVisibility) + .Case("default", ELF::STV_DEFAULT) + .Case("hidden", ELF::STV_HIDDEN) + .Case("internal", ELF::STV_INTERNAL) + .Case("protected", ELF::STV_PROTECTED) + .Default(Invalid); + + if (ELFConfig.NewSymbolVisibility == Invalid) + return createStringError(errc::invalid_argument, + "'%s' is not a valid symbol visibility", + Config.NewSymbolVisibility->str().c_str()); + } + + for (StringRef Arg : Config.SymbolsToAdd) { + Expected NSI = parseNewSymbolInfo( + Arg, + ELFConfig.NewSymbolVisibility.getValueOr((uint8_t)ELF::STV_DEFAULT)); + if (!NSI) + return NSI.takeError(); + ELFConfig.SymbolsToAdd.push_back(*NSI); + } + + return ELFConfig; +} + +} // end namespace elf +} // end namespace objcopy +} // end namespace llvm diff --git a/tools/llvm-objcopy/ELF/ELFConfig.h b/tools/llvm-objcopy/ELF/ELFConfig.h new file mode 100644 index 00000000000..977efbc4166 --- /dev/null +++ b/tools/llvm-objcopy/ELF/ELFConfig.h @@ -0,0 +1,44 @@ +//===- ELFConfig.h ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_OBJCOPY_ELFCONFIG_H +#define LLVM_TOOLS_OBJCOPY_ELFCONFIG_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/ELFTypes.h" +#include "llvm/Support/Error.h" +#include + +namespace llvm { +namespace objcopy { +struct CopyConfig; + +namespace elf { + +struct NewSymbolInfo { + StringRef SymbolName; + StringRef SectionName; + uint64_t Value = 0; + uint8_t Type = ELF::STT_NOTYPE; + uint8_t Bind = ELF::STB_GLOBAL; + uint8_t Visibility = ELF::STV_DEFAULT; +}; + +struct ELFCopyConfig { + Optional NewSymbolVisibility; + std::vector SymbolsToAdd; +}; + +Expected parseConfig(const CopyConfig &Config); + +} // namespace elf +} // namespace objcopy +} // namespace llvm + +#endif diff --git a/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/tools/llvm-objcopy/ELF/ELFObjcopy.cpp index b366c6e5598..8bf7e0f8801 100644 --- a/tools/llvm-objcopy/ELF/ELFObjcopy.cpp +++ b/tools/llvm-objcopy/ELF/ELFObjcopy.cpp @@ -136,16 +136,16 @@ static std::unique_ptr createELFWriter(const CopyConfig &Config, // Depending on the initial ELFT and OutputFormat we need a different Writer. switch (OutputElfType) { case ELFT_ELF32LE: - return llvm::make_unique>(Obj, Buf, + return std::make_unique>(Obj, Buf, !Config.StripSections); case ELFT_ELF64LE: - return llvm::make_unique>(Obj, Buf, + return std::make_unique>(Obj, Buf, !Config.StripSections); case ELFT_ELF32BE: - return llvm::make_unique>(Obj, Buf, + return std::make_unique>(Obj, Buf, !Config.StripSections); case ELFT_ELF64BE: - return llvm::make_unique>(Obj, Buf, + return std::make_unique>(Obj, Buf, !Config.StripSections); } llvm_unreachable("Invalid output format"); @@ -156,9 +156,9 @@ static std::unique_ptr createWriter(const CopyConfig &Config, ElfType OutputElfType) { switch (Config.OutputFormat) { case FileFormat::Binary: - return llvm::make_unique(Obj, Buf); + return std::make_unique(Obj, Buf); case FileFormat::IHex: - return llvm::make_unique(Obj, Buf); + return std::make_unique(Obj, Buf); default: return createELFWriter(Config, Obj, Buf, OutputElfType); } @@ -263,7 +263,7 @@ static Error linkToBuildIdDir(const CopyConfig &Config, StringRef ToLink, static Error splitDWOToFile(const CopyConfig &Config, const Reader &Reader, StringRef File, ElfType OutputElfType) { - auto DWOFile = Reader.create(); + auto DWOFile = Reader.create(false); auto OnlyKeepDWOPred = [&DWOFile](const SectionBase &Sec) { return onlyKeepDWOPred(*DWOFile, Sec); }; @@ -305,9 +305,9 @@ static Error dumpSectionToFile(StringRef SecName, StringRef Filename, SecName.str().c_str()); } -static bool isCompressable(const SectionBase &Section) { - return !(Section.Flags & ELF::SHF_COMPRESSED) && - StringRef(Section.Name).startswith(".debug"); +static bool isCompressable(const SectionBase &Sec) { + return !(Sec.Flags & ELF::SHF_COMPRESSED) && + StringRef(Sec.Name).startswith(".debug"); } static void replaceDebugSections( @@ -356,7 +356,7 @@ static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) { if (!Sym.isCommon() && Sym.getShndx() != SHN_UNDEF && ((Config.LocalizeHidden && (Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) || - is_contained(Config.SymbolsToLocalize, Sym.Name))) + Config.SymbolsToLocalize.matches(Sym.Name))) Sym.Binding = STB_LOCAL; // Note: these two globalize flags have very similar names but different @@ -370,16 +370,15 @@ static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) { // --keep-global-symbol. Because of that, make sure to check // --globalize-symbol second. if (!Config.SymbolsToKeepGlobal.empty() && - !is_contained(Config.SymbolsToKeepGlobal, Sym.Name) && + !Config.SymbolsToKeepGlobal.matches(Sym.Name) && Sym.getShndx() != SHN_UNDEF) Sym.Binding = STB_LOCAL; - if (is_contained(Config.SymbolsToGlobalize, Sym.Name) && + if (Config.SymbolsToGlobalize.matches(Sym.Name) && Sym.getShndx() != SHN_UNDEF) Sym.Binding = STB_GLOBAL; - if (is_contained(Config.SymbolsToWeaken, Sym.Name) && - Sym.Binding == STB_GLOBAL) + if (Config.SymbolsToWeaken.matches(Sym.Name) && Sym.Binding == STB_GLOBAL) Sym.Binding = STB_WEAK; if (Config.Weaken && Sym.Binding == STB_GLOBAL && @@ -399,12 +398,12 @@ static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) { // symbols are still 'needed' and which are not. if (Config.StripUnneeded || !Config.UnneededSymbolsToRemove.empty() || !Config.OnlySection.empty()) { - for (auto &Section : Obj.sections()) - Section.markSymbols(); + for (SectionBase &Sec : Obj.sections()) + Sec.markSymbols(); } auto RemoveSymbolsPred = [&](const Symbol &Sym) { - if (is_contained(Config.SymbolsToKeep, Sym.Name) || + if (Config.SymbolsToKeep.matches(Sym.Name) || (Config.KeepFileSymbols && Sym.Type == STT_FILE)) return false; @@ -418,12 +417,12 @@ static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) { if (Config.StripAll || Config.StripAllGNU) return true; - if (is_contained(Config.SymbolsToRemove, Sym.Name)) + if (Config.SymbolsToRemove.matches(Sym.Name)) return true; if ((Config.StripUnneeded || - is_contained(Config.UnneededSymbolsToRemove, Sym.Name)) && - isUnneededSymbol(Sym)) + Config.UnneededSymbolsToRemove.matches(Sym.Name)) && + (!Obj.isRelocatable() || isUnneededSymbol(Sym))) return true; // We want to remove undefined symbols if all references have been stripped. @@ -443,7 +442,7 @@ static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) { // Removes: if (!Config.ToRemove.empty()) { RemovePred = [&Config](const SectionBase &Sec) { - return is_contained(Config.ToRemove, Sec.Name); + return Config.ToRemove.matches(Sec.Name); }; } @@ -481,7 +480,7 @@ static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) { }; } - if (Config.StripDebug) { + if (Config.StripDebug || Config.StripUnneeded) { RemovePred = [RemovePred](const SectionBase &Sec) { return RemovePred(Sec) || isDebugSection(Sec); }; @@ -523,7 +522,7 @@ static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) { if (!Config.OnlySection.empty()) { RemovePred = [&Config, RemovePred, &Obj](const SectionBase &Sec) { // Explicitly keep these sections regardless of previous removes. - if (is_contained(Config.OnlySection, Sec.Name)) + if (Config.OnlySection.matches(Sec.Name)) return false; // Allow all implicit removes. @@ -545,7 +544,7 @@ static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) { if (!Config.KeepSection.empty()) { RemovePred = [&Config, RemovePred](const SectionBase &Sec) { // Explicitly keep these sections regardless of previous removes. - if (is_contained(Config.KeepSection, Sec.Name)) + if (Config.KeepSection.matches(Sec.Name)) return false; // Otherwise defer to RemovePred. return RemovePred(Sec); @@ -614,9 +613,8 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj, if (Error E = updateAndRemoveSymbols(Config, Obj)) return E; - if (!Config.SectionsToRename.empty() || !Config.AllocSectionsPrefix.empty()) { - DenseSet PrefixedSections; - for (auto &Sec : Obj.sections()) { + if (!Config.SectionsToRename.empty()) { + for (SectionBase &Sec : Obj.sections()) { const auto Iter = Config.SectionsToRename.find(Sec.Name); if (Iter != Config.SectionsToRename.end()) { const SectionRename &SR = Iter->second; @@ -624,63 +622,62 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj, if (SR.NewFlags.hasValue()) setSectionFlagsAndType(Sec, SR.NewFlags.getValue()); } + } + } - // Add a prefix to allocated sections and their relocation sections. This - // should be done after renaming the section by Config.SectionToRename to - // imitate the GNU objcopy behavior. - if (!Config.AllocSectionsPrefix.empty()) { - if (Sec.Flags & SHF_ALLOC) { - Sec.Name = (Config.AllocSectionsPrefix + Sec.Name).str(); - PrefixedSections.insert(&Sec); - - // Rename relocation sections associated to the allocated sections. - // For example, if we rename .text to .prefix.text, we also rename - // .rel.text to .rel.prefix.text. - // - // Dynamic relocation sections (SHT_REL[A] with SHF_ALLOC) are handled - // above, e.g., .rela.plt is renamed to .prefix.rela.plt, not - // .rela.prefix.plt since GNU objcopy does so. - } else if (auto *RelocSec = dyn_cast(&Sec)) { - auto *TargetSec = RelocSec->getSection(); - if (TargetSec && (TargetSec->Flags & SHF_ALLOC)) { - StringRef prefix; - switch (Sec.Type) { - case SHT_REL: - prefix = ".rel"; - break; - case SHT_RELA: - prefix = ".rela"; - break; - default: - continue; - } - - // If the relocation section comes *after* the target section, we - // don't add Config.AllocSectionsPrefix because we've already added - // the prefix to TargetSec->Name. Otherwise, if the relocation - // section comes *before* the target section, we add the prefix. - if (PrefixedSections.count(TargetSec)) { - Sec.Name = (prefix + TargetSec->Name).str(); - } else { - const auto Iter = Config.SectionsToRename.find(TargetSec->Name); - if (Iter != Config.SectionsToRename.end()) { - // Both `--rename-section` and `--prefix-alloc-sections` are - // given but the target section is not yet renamed. - Sec.Name = - (prefix + Config.AllocSectionsPrefix + Iter->second.NewName) - .str(); - } else { - Sec.Name = - (prefix + Config.AllocSectionsPrefix + TargetSec->Name) - .str(); - } - } + // Add a prefix to allocated sections and their relocation sections. This + // should be done after renaming the section by Config.SectionToRename to + // imitate the GNU objcopy behavior. + if (!Config.AllocSectionsPrefix.empty()) { + DenseSet PrefixedSections; + for (SectionBase &Sec : Obj.sections()) { + if (Sec.Flags & SHF_ALLOC) { + Sec.Name = (Config.AllocSectionsPrefix + Sec.Name).str(); + PrefixedSections.insert(&Sec); + } else if (auto *RelocSec = dyn_cast(&Sec)) { + // Rename relocation sections associated to the allocated sections. + // For example, if we rename .text to .prefix.text, we also rename + // .rel.text to .rel.prefix.text. + // + // Dynamic relocation sections (SHT_REL[A] with SHF_ALLOC) are handled + // above, e.g., .rela.plt is renamed to .prefix.rela.plt, not + // .rela.prefix.plt since GNU objcopy does so. + const SectionBase *TargetSec = RelocSec->getSection(); + if (TargetSec && (TargetSec->Flags & SHF_ALLOC)) { + StringRef prefix; + switch (Sec.Type) { + case SHT_REL: + prefix = ".rel"; + break; + case SHT_RELA: + prefix = ".rela"; + break; + default: + llvm_unreachable("not a relocation section"); } + + // If the relocation section comes *after* the target section, we + // don't add Config.AllocSectionsPrefix because we've already added + // the prefix to TargetSec->Name. Otherwise, if the relocation + // section comes *before* the target section, we add the prefix. + if (PrefixedSections.count(TargetSec)) + Sec.Name = (prefix + TargetSec->Name).str(); + else + Sec.Name = + (prefix + Config.AllocSectionsPrefix + TargetSec->Name).str(); } } } } + if (!Config.SetSectionAlignment.empty()) { + for (SectionBase &Sec : Obj.sections()) { + auto I = Config.SetSectionAlignment.find(Sec.Name); + if (I != Config.SetSectionAlignment.end()) + Sec.Align = I->second; + } + } + if (!Config.SetSectionFlags.empty()) { for (auto &Sec : Obj.sections()) { const auto Iter = Config.SetSectionFlags.find(Sec.Name); @@ -721,7 +718,7 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj, Obj.addSection(Config.AddGnuDebugLink, Config.GnuDebugLinkCRC32); - for (const NewSymbolInfo &SI : Config.SymbolsToAdd) { + for (const NewSymbolInfo &SI : Config.ELF->SymbolsToAdd) { SectionBase *Sec = Obj.findSection(SI.SectionName); uint64_t Value = Sec ? Sec->Addr + SI.Value : SI.Value; Obj.SymbolTable->addSymbol( @@ -746,9 +743,9 @@ static Error writeOutput(const CopyConfig &Config, Object &Obj, Buffer &Out, Error executeObjcopyOnIHex(const CopyConfig &Config, MemoryBuffer &In, Buffer &Out) { IHexReader Reader(&In); - std::unique_ptr Obj = Reader.create(); + std::unique_ptr Obj = Reader.create(true); const ElfType OutputElfType = - getOutputElfType(Config.OutputArch.getValueOr(Config.BinaryArch)); + getOutputElfType(Config.OutputArch.getValueOr(MachineInfo())); if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType)) return E; return writeOutput(Config, *Obj, Out, OutputElfType); @@ -756,13 +753,15 @@ Error executeObjcopyOnIHex(const CopyConfig &Config, MemoryBuffer &In, Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In, Buffer &Out) { - BinaryReader Reader(Config.BinaryArch, &In); - std::unique_ptr Obj = Reader.create(); + uint8_t NewSymbolVisibility = + Config.ELF->NewSymbolVisibility.getValueOr((uint8_t)ELF::STV_DEFAULT); + BinaryReader Reader(&In, NewSymbolVisibility); + std::unique_ptr Obj = Reader.create(true); // Prefer OutputArch (-O) if set, otherwise fallback to BinaryArch // (-B). const ElfType OutputElfType = - getOutputElfType(Config.OutputArch.getValueOr(Config.BinaryArch)); + getOutputElfType(Config.OutputArch.getValueOr(MachineInfo())); if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType)) return E; return writeOutput(Config, *Obj, Out, OutputElfType); @@ -771,7 +770,7 @@ Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In, Error executeObjcopyOnBinary(const CopyConfig &Config, object::ELFObjectFileBase &In, Buffer &Out) { ELFReader Reader(&In, Config.ExtractPartition); - std::unique_ptr Obj = Reader.create(); + std::unique_ptr Obj = Reader.create(!Config.SymbolsToAdd.empty()); // Prefer OutputArch (-O) if set, otherwise infer it from the input. const ElfType OutputElfType = Config.OutputArch ? getOutputElfType(Config.OutputArch.getValue()) diff --git a/tools/llvm-objcopy/ELF/Object.cpp b/tools/llvm-objcopy/ELF/Object.cpp index fa696380e17..74145dad6e6 100644 --- a/tools/llvm-objcopy/ELF/Object.cpp +++ b/tools/llvm-objcopy/ELF/Object.cpp @@ -397,7 +397,7 @@ void SectionWriter::visit(const OwnedDataSection &Sec) { llvm::copy(Sec.Data, Out.getBufferStart() + Sec.Offset); } -static const std::vector ZlibGnuMagic = {'Z', 'L', 'I', 'B'}; +static constexpr std::array ZlibGnuMagic = {{'Z', 'L', 'I', 'B'}}; static bool isDataGnuCompressed(ArrayRef Data) { return Data.size() > ZlibGnuMagic.size() && @@ -665,7 +665,7 @@ void SymbolTableSection::addSymbol(Twine Name, uint8_t Bind, uint8_t Type, Sym.Visibility = Visibility; Sym.Size = SymbolSize; Sym.Index = Symbols.size(); - Symbols.emplace_back(llvm::make_unique(Sym)); + Symbols.emplace_back(std::make_unique(Sym)); Size += this->EntrySize; } @@ -1055,29 +1055,28 @@ void GroupSection::accept(MutableSectionVisitor &Visitor) { } // Returns true IFF a section is wholly inside the range of a segment -static bool sectionWithinSegment(const SectionBase &Section, - const Segment &Segment) { +static bool sectionWithinSegment(const SectionBase &Sec, const Segment &Seg) { // If a section is empty it should be treated like it has a size of 1. This is // to clarify the case when an empty section lies on a boundary between two // segments and ensures that the section "belongs" to the second segment and // not the first. - uint64_t SecSize = Section.Size ? Section.Size : 1; + uint64_t SecSize = Sec.Size ? Sec.Size : 1; - if (Section.Type == SHT_NOBITS) { - if (!(Section.Flags & SHF_ALLOC)) + if (Sec.Type == SHT_NOBITS) { + if (!(Sec.Flags & SHF_ALLOC)) return false; - bool SectionIsTLS = Section.Flags & SHF_TLS; - bool SegmentIsTLS = Segment.Type == PT_TLS; + bool SectionIsTLS = Sec.Flags & SHF_TLS; + bool SegmentIsTLS = Seg.Type == PT_TLS; if (SectionIsTLS != SegmentIsTLS) return false; - return Segment.VAddr <= Section.Addr && - Segment.VAddr + Segment.MemSize >= Section.Addr + SecSize; + return Seg.VAddr <= Sec.Addr && + Seg.VAddr + Seg.MemSize >= Sec.Addr + SecSize; } - return Segment.Offset <= Section.OriginalOffset && - Segment.Offset + Segment.FileSize >= Section.OriginalOffset + SecSize; + return Seg.Offset <= Sec.OriginalOffset && + Seg.Offset + Seg.FileSize >= Sec.OriginalOffset + SecSize; } // Returns true IFF a segment's original offset is inside of another segment's @@ -1113,7 +1112,7 @@ void BasicELFBuilder::initFileHeader() { Obj->OSABI = ELFOSABI_NONE; Obj->ABIVersion = 0; Obj->Entry = 0x0; - Obj->Machine = EMachine; + Obj->Machine = EM_NONE; Obj->Version = 1; } @@ -1141,8 +1140,8 @@ SymbolTableSection *BasicELFBuilder::addSymTab(StringTableSection *StrTab) { } void BasicELFBuilder::initSections() { - for (auto &Section : Obj->sections()) - Section.initialize(Obj->sections()); + for (SectionBase &Sec : Obj->sections()) + Sec.initialize(Obj->sections()); } void BinaryELFBuilder::addData(SymbolTableSection *SymTab) { @@ -1161,11 +1160,12 @@ void BinaryELFBuilder::addData(SymbolTableSection *SymTab) { Twine Prefix = Twine("_binary_") + SanitizedFilename; SymTab->addSymbol(Prefix + "_start", STB_GLOBAL, STT_NOTYPE, &DataSection, - /*Value=*/0, STV_DEFAULT, 0, 0); + /*Value=*/0, NewSymbolVisibility, 0, 0); SymTab->addSymbol(Prefix + "_end", STB_GLOBAL, STT_NOTYPE, &DataSection, - /*Value=*/DataSection.Size, STV_DEFAULT, 0, 0); + /*Value=*/DataSection.Size, NewSymbolVisibility, 0, 0); SymTab->addSymbol(Prefix + "_size", STB_GLOBAL, STT_NOTYPE, nullptr, - /*Value=*/DataSection.Size, STV_DEFAULT, SHN_ABS, 0); + /*Value=*/DataSection.Size, NewSymbolVisibility, SHN_ABS, + 0); } std::unique_ptr BinaryELFBuilder::build() { @@ -1255,10 +1255,9 @@ template void ELFBuilder::findEhdrOffset() { if (!ExtractPartition) return; - for (const SectionBase &Section : Obj.sections()) { - if (Section.Type == SHT_LLVM_PART_EHDR && - Section.Name == *ExtractPartition) { - EhdrOffset = Section.Offset; + for (const SectionBase &Sec : Obj.sections()) { + if (Sec.Type == SHT_LLVM_PART_EHDR && Sec.Name == *ExtractPartition) { + EhdrOffset = Sec.Offset; return; } } @@ -1287,15 +1286,12 @@ void ELFBuilder::readProgramHeaders(const ELFFile &HeadersFile) { Seg.MemSize = Phdr.p_memsz; Seg.Align = Phdr.p_align; Seg.Index = Index++; - for (SectionBase &Section : Obj.sections()) { - if (sectionWithinSegment(Section, Seg)) { - Seg.addSection(&Section); - if (!Section.ParentSegment || - Section.ParentSegment->Offset > Seg.Offset) { - Section.ParentSegment = &Seg; - } + for (SectionBase &Sec : Obj.sections()) + if (sectionWithinSegment(Sec, Seg)) { + Seg.addSection(&Sec); + if (!Sec.ParentSegment || Sec.ParentSegment->Offset > Seg.Offset) + Sec.ParentSegment = &Seg; } - } } auto &ElfHdr = Obj.ElfHdrSegment; @@ -1531,7 +1527,7 @@ template void ELFBuilder::readSectionHeaders() { } } -template void ELFBuilder::readSections() { +template void ELFBuilder::readSections(bool EnsureSymtab) { // If a section index table exists we'll need to initialize it before we // initialize the symbol table because the symbol table might need to // reference it. @@ -1544,16 +1540,37 @@ template void ELFBuilder::readSections() { if (Obj.SymbolTable) { Obj.SymbolTable->initialize(Obj.sections()); initSymbolTable(Obj.SymbolTable); + } else if (EnsureSymtab) { + // Reuse the existing SHT_STRTAB section if exists. + StringTableSection *StrTab = nullptr; + for (auto &Sec : Obj.sections()) { + if (Sec.Type == ELF::SHT_STRTAB && !(Sec.Flags & SHF_ALLOC)) { + StrTab = static_cast(&Sec); + + // Prefer .strtab to .shstrtab. + if (Obj.SectionNames != &Sec) + break; + } + } + if (!StrTab) + StrTab = &Obj.addSection(); + + SymbolTableSection &SymTab = Obj.addSection(); + SymTab.Name = ".symtab"; + SymTab.Link = StrTab->Index; + SymTab.initialize(Obj.sections()); + SymTab.addSymbol("", 0, 0, nullptr, 0, 0, 0, 0); + Obj.SymbolTable = &SymTab; } // Now that all sections and symbols have been added we can add // relocations that reference symbols and set the link and info fields for // relocation sections. - for (auto &Section : Obj.sections()) { - if (&Section == Obj.SymbolTable) + for (auto &Sec : Obj.sections()) { + if (&Sec == Obj.SymbolTable) continue; - Section.initialize(Obj.sections()); - if (auto RelSec = dyn_cast(&Section)) { + Sec.initialize(Obj.sections()); + if (auto RelSec = dyn_cast(&Sec)) { auto Shdr = unwrapOrError(ElfFile.sections()).begin() + RelSec->Index; if (RelSec->Type == SHT_REL) initRelocations(RelSec, Obj.SymbolTable, @@ -1561,7 +1578,7 @@ template void ELFBuilder::readSections() { else initRelocations(RelSec, Obj.SymbolTable, unwrapOrError(ElfFile.relas(Shdr))); - } else if (auto GroupSec = dyn_cast(&Section)) { + } else if (auto GroupSec = dyn_cast(&Sec)) { initGroupSection(GroupSec); } } @@ -1582,7 +1599,7 @@ template void ELFBuilder::readSections() { " is not a string table"); } -template void ELFBuilder::build() { +template void ELFBuilder::build(bool EnsureSymtab) { readSectionHeaders(); findEhdrOffset(); @@ -1601,7 +1618,7 @@ template void ELFBuilder::build() { Obj.Entry = Ehdr.e_entry; Obj.Flags = Ehdr.e_flags; - readSections(); + readSections(EnsureSymtab); readProgramHeaders(HeadersFile); } @@ -1609,8 +1626,8 @@ Writer::~Writer() {} Reader::~Reader() {} -std::unique_ptr BinaryReader::create() const { - return BinaryELFBuilder(MInfo.EMachine, MemBuf).build(); +std::unique_ptr BinaryReader::create(bool /*EnsureSymtab*/) const { + return BinaryELFBuilder(MemBuf, NewSymbolVisibility).build(); } Expected> IHexReader::parse() const { @@ -1639,28 +1656,28 @@ Expected> IHexReader::parse() const { return std::move(Records); } -std::unique_ptr IHexReader::create() const { +std::unique_ptr IHexReader::create(bool /*EnsureSymtab*/) const { std::vector Records = unwrapOrError(parse()); return IHexELFBuilder(Records).build(); } -std::unique_ptr ELFReader::create() const { - auto Obj = llvm::make_unique(); +std::unique_ptr ELFReader::create(bool EnsureSymtab) const { + auto Obj = std::make_unique(); if (auto *O = dyn_cast>(Bin)) { ELFBuilder Builder(*O, *Obj, ExtractPartition); - Builder.build(); + Builder.build(EnsureSymtab); return Obj; } else if (auto *O = dyn_cast>(Bin)) { ELFBuilder Builder(*O, *Obj, ExtractPartition); - Builder.build(); + Builder.build(EnsureSymtab); return Obj; } else if (auto *O = dyn_cast>(Bin)) { ELFBuilder Builder(*O, *Obj, ExtractPartition); - Builder.build(); + Builder.build(EnsureSymtab); return Obj; } else if (auto *O = dyn_cast>(Bin)) { ELFBuilder Builder(*O, *Obj, ExtractPartition); - Builder.build(); + Builder.build(EnsureSymtab); return Obj; } error("invalid file type"); @@ -1693,7 +1710,7 @@ template void ELFWriter::writeEhdr() { Ehdr.e_ehsize = sizeof(Elf_Ehdr); if (WriteSectionHeaders && Obj.sections().size() != 0) { Ehdr.e_shentsize = sizeof(Elf_Shdr); - Ehdr.e_shoff = Obj.SHOffset; + Ehdr.e_shoff = Obj.SHOff; // """ // If the number of sections is greater than or equal to // SHN_LORESERVE (0xff00), this member has the value zero and the actual @@ -1732,7 +1749,7 @@ template void ELFWriter::writeShdrs() { // This reference serves to write the dummy section header at the begining // of the file. It is not used for anything else Elf_Shdr &Shdr = - *reinterpret_cast(Buf.getBufferStart() + Obj.SHOffset); + *reinterpret_cast(Buf.getBufferStart() + Obj.SHOff); Shdr.sh_name = 0; Shdr.sh_type = SHT_NULL; Shdr.sh_flags = 0; @@ -1862,26 +1879,13 @@ void Object::sortSections() { }); } -static uint64_t alignToAddr(uint64_t Offset, uint64_t Addr, uint64_t Align) { - // Calculate Diff such that (Offset + Diff) & -Align == Addr & -Align. - if (Align == 0) - Align = 1; - auto Diff = - static_cast(Addr % Align) - static_cast(Offset % Align); - // We only want to add to Offset, however, so if Diff < 0 we can add Align and - // (Offset + Diff) & -Align == Addr & -Align will still hold. - if (Diff < 0) - Diff += Align; - return Offset + Diff; -} - // Orders segments such that if x = y->ParentSegment then y comes before x. static void orderSegments(std::vector &Segments) { llvm::stable_sort(Segments, compareSegmentsByOffset); } // This function finds a consistent layout for a list of segments starting from -// an Offset. It assumes that Segments have been sorted by OrderSegments and +// an Offset. It assumes that Segments have been sorted by orderSegments and // returns an Offset one past the end of the last segment. static uint64_t layoutSegments(std::vector &Segments, uint64_t Offset) { @@ -1902,8 +1906,8 @@ static uint64_t layoutSegments(std::vector &Segments, Seg->Offset = Parent->Offset + Seg->OriginalOffset - Parent->OriginalOffset; } else { - Offset = alignToAddr(Offset, Seg->VAddr, Seg->Align); - Seg->Offset = Offset; + Seg->Offset = + alignTo(Offset, std::max(Seg->Align, 1), Seg->VAddr); } Offset = std::max(Offset, Seg->Offset + Seg->FileSize); } @@ -1925,17 +1929,17 @@ static uint64_t layoutSections(Range Sections, uint64_t Offset) { // of the segment we can assign a new offset to the section. For sections not // covered by segments we can just bump Offset to the next valid location. uint32_t Index = 1; - for (auto &Section : Sections) { - Section.Index = Index++; - if (Section.ParentSegment != nullptr) { - auto Segment = *Section.ParentSegment; - Section.Offset = - Segment.Offset + (Section.OriginalOffset - Segment.OriginalOffset); + for (auto &Sec : Sections) { + Sec.Index = Index++; + if (Sec.ParentSegment != nullptr) { + auto Segment = *Sec.ParentSegment; + Sec.Offset = + Segment.Offset + (Sec.OriginalOffset - Segment.OriginalOffset); } else { - Offset = alignTo(Offset, Section.Align == 0 ? 1 : Section.Align); - Section.Offset = Offset; - if (Section.Type != SHT_NOBITS) - Offset += Section.Size; + Offset = alignTo(Offset, Sec.Align == 0 ? 1 : Sec.Align); + Sec.Offset = Offset; + if (Sec.Type != SHT_NOBITS) + Offset += Sec.Size; } } return Offset; @@ -1971,16 +1975,16 @@ template void ELFWriter::assignOffsets() { // Offset so that SHOffset is valid. if (WriteSectionHeaders) Offset = alignTo(Offset, sizeof(Elf_Addr)); - Obj.SHOffset = Offset; + Obj.SHOff = Offset; } template size_t ELFWriter::totalSize() const { // We already have the section header offset so we can calculate the total // size by just adding up the size of each section header. if (!WriteSectionHeaders) - return Obj.SHOffset; + return Obj.SHOff; size_t ShdrCount = Obj.sections().size() + 1; // Includes null shdr. - return Obj.SHOffset + ShdrCount * sizeof(Elf_Shdr); + return Obj.SHOff + ShdrCount * sizeof(Elf_Shdr); } template Error ELFWriter::write() { @@ -1995,6 +1999,25 @@ template Error ELFWriter::write() { return Buf.commit(); } +static Error removeUnneededSections(Object &Obj) { + // We can remove an empty symbol table from non-relocatable objects. + // Relocatable objects typically have relocation sections whose + // sh_link field points to .symtab, so we can't remove .symtab + // even if it is empty. + if (Obj.isRelocatable() || Obj.SymbolTable == nullptr || + !Obj.SymbolTable->empty()) + return Error::success(); + + // .strtab can be used for section names. In such a case we shouldn't + // remove it. + auto *StrTab = Obj.SymbolTable->getStrTab() == Obj.SectionNames + ? nullptr + : Obj.SymbolTable->getStrTab(); + return Obj.removeSections(false, [&](const SectionBase &Sec) { + return &Sec == Obj.SymbolTable || &Sec == StrTab; + }); +} + template Error ELFWriter::finalize() { // It could happen that SectionNames has been removed and yet the user wants // a section header table output. We need to throw an error if a user tries @@ -2004,6 +2027,8 @@ template Error ELFWriter::finalize() { "cannot write section header table because " "section header string table was removed"); + if (Error E = removeUnneededSections(Obj)) + return E; Obj.sortSections(); // We need to assign indexes before we perform layout because we need to know @@ -2045,9 +2070,8 @@ template Error ELFWriter::finalize() { // Make sure we add the names of all the sections. Importantly this must be // done after we decide to add or remove SectionIndexes. if (Obj.SectionNames != nullptr) - for (const auto &Section : Obj.sections()) { - Obj.SectionNames->addString(Section.Name); - } + for (const SectionBase &Sec : Obj.sections()) + Obj.SectionNames->addString(Sec.Name); initEhdrSegment(); @@ -2055,8 +2079,8 @@ template Error ELFWriter::finalize() { // Also, the output arch may not be the same as the input arch, so fix up // size-related fields before doing layout calculations. uint64_t Index = 0; - auto SecSizer = llvm::make_unique>(); - for (auto &Sec : Obj.sections()) { + auto SecSizer = std::make_unique>(); + for (SectionBase &Sec : Obj.sections()) { Sec.Index = Index++; Sec.accept(*SecSizer); } @@ -2082,40 +2106,36 @@ template Error ELFWriter::finalize() { // Finally now that all offsets and indexes have been set we can finalize any // remaining issues. - uint64_t Offset = Obj.SHOffset + sizeof(Elf_Shdr); - for (SectionBase &Section : Obj.sections()) { - Section.HeaderOffset = Offset; + uint64_t Offset = Obj.SHOff + sizeof(Elf_Shdr); + for (SectionBase &Sec : Obj.sections()) { + Sec.HeaderOffset = Offset; Offset += sizeof(Elf_Shdr); if (WriteSectionHeaders) - Section.NameIndex = Obj.SectionNames->findIndex(Section.Name); - Section.finalize(); + Sec.NameIndex = Obj.SectionNames->findIndex(Sec.Name); + Sec.finalize(); } if (Error E = Buf.allocate(totalSize())) return E; - SecWriter = llvm::make_unique>(Buf); + SecWriter = std::make_unique>(Buf); return Error::success(); } Error BinaryWriter::write() { - for (auto &Section : Obj.sections()) - if (Section.Flags & SHF_ALLOC) - Section.accept(*SecWriter); + for (const SectionBase &Sec : Obj.allocSections()) + Sec.accept(*SecWriter); return Buf.commit(); } Error BinaryWriter::finalize() { - // TODO: Create a filter range to construct OrderedSegments from so that this - // code can be deduped with assignOffsets above. This should also solve the - // todo below for LayoutSections. // We need a temporary list of segments that has a special order to it // so that we know that anytime ->ParentSegment is set that segment has // already had it's offset properly set. We only want to consider the segments // that will affect layout of allocated sections so we only add those. std::vector OrderedSegments; - for (SectionBase &Section : Obj.sections()) - if ((Section.Flags & SHF_ALLOC) != 0 && Section.ParentSegment != nullptr) - OrderedSegments.push_back(Section.ParentSegment); + for (const SectionBase &Sec : Obj.allocSections()) + if (Sec.ParentSegment != nullptr) + OrderedSegments.push_back(Sec.ParentSegment); // For binary output, we're going to use physical addresses instead of // virtual addresses, since a binary output is used for cases like ROM @@ -2130,7 +2150,7 @@ Error BinaryWriter::finalize() { llvm::stable_sort(OrderedSegments, compareSegmentsByPAddr); // Because we add a ParentSegment for each section we might have duplicate - // segments in OrderedSegments. If there were duplicates then LayoutSegments + // segments in OrderedSegments. If there were duplicates then layoutSegments // would do very strange things. auto End = std::unique(std::begin(OrderedSegments), std::end(OrderedSegments)); @@ -2158,28 +2178,20 @@ Error BinaryWriter::finalize() { } } - // TODO: generalize LayoutSections to take a range. Pass a special range - // constructed from an iterator that skips values for which a predicate does - // not hold. Then pass such a range to LayoutSections instead of constructing - // AllocatedSections here. - std::vector AllocatedSections; - for (SectionBase &Section : Obj.sections()) - if (Section.Flags & SHF_ALLOC) - AllocatedSections.push_back(&Section); - layoutSections(make_pointee_range(AllocatedSections), Offset); + layoutSections(Obj.allocSections(), Offset); // Now that every section has been laid out we just need to compute the total // file size. This might not be the same as the offset returned by - // LayoutSections, because we want to truncate the last segment to the end of + // layoutSections, because we want to truncate the last segment to the end of // its last section, to match GNU objcopy's behaviour. TotalSize = 0; - for (SectionBase *Section : AllocatedSections) - if (Section->Type != SHT_NOBITS) - TotalSize = std::max(TotalSize, Section->Offset + Section->Size); + for (const SectionBase &Sec : Obj.allocSections()) + if (Sec.Type != SHT_NOBITS) + TotalSize = std::max(TotalSize, Sec.Offset + Sec.Size); if (Error E = Buf.allocate(TotalSize)) return E; - SecWriter = llvm::make_unique(Buf); + SecWriter = std::make_unique(Buf); return Error::success(); } @@ -2259,17 +2271,17 @@ Error IHexWriter::finalize() { // If any section we're to write has segment then we // switch to using physical addresses. Otherwise we // use section virtual address. - for (auto &Section : Obj.sections()) - if (ShouldWrite(Section) && IsInPtLoad(Section)) { + for (const SectionBase &Sec : Obj.sections()) + if (ShouldWrite(Sec) && IsInPtLoad(Sec)) { UseSegments = true; break; } - for (auto &Section : Obj.sections()) - if (ShouldWrite(Section) && (!UseSegments || IsInPtLoad(Section))) { - if (Error E = checkSection(Section)) + for (const SectionBase &Sec : Obj.sections()) + if (ShouldWrite(Sec) && (!UseSegments || IsInPtLoad(Sec))) { + if (Error E = checkSection(Sec)) return E; - Sections.insert(&Section); + Sections.insert(&Sec); } IHexSectionWriterBase LengthCalc(Buf); diff --git a/tools/llvm-objcopy/ELF/Object.h b/tools/llvm-objcopy/ELF/Object.h index f3df93b9662..eeacb014e4d 100644 --- a/tools/llvm-objcopy/ELF/Object.h +++ b/tools/llvm-objcopy/ELF/Object.h @@ -57,8 +57,8 @@ public: : Sections(Secs) {} SectionTableRef(const SectionTableRef &) = default; - iterator begin() { return iterator(Sections.data()); } - iterator end() { return iterator(Sections.data() + Sections.size()); } + iterator begin() const { return iterator(Sections.data()); } + iterator end() const { return iterator(Sections.data() + Sections.size()); } size_t size() const { return Sections.size(); } SectionBase *getSection(uint32_t Index, Twine ErrMsg); @@ -863,7 +863,7 @@ public: class Reader { public: virtual ~Reader(); - virtual std::unique_ptr create() const = 0; + virtual std::unique_ptr create(bool EnsureSymtab) const = 0; }; using object::Binary; @@ -873,7 +873,6 @@ using object::OwningBinary; class BasicELFBuilder { protected: - uint16_t EMachine; std::unique_ptr Obj; void initFileHeader(); @@ -883,17 +882,18 @@ protected: void initSections(); public: - BasicELFBuilder(uint16_t EM) - : EMachine(EM), Obj(llvm::make_unique()) {} + BasicELFBuilder() : Obj(std::make_unique()) {} }; class BinaryELFBuilder : public BasicELFBuilder { MemoryBuffer *MemBuf; + uint8_t NewSymbolVisibility; void addData(SymbolTableSection *SymTab); public: - BinaryELFBuilder(uint16_t EM, MemoryBuffer *MB) - : BasicELFBuilder(EM), MemBuf(MB) {} + BinaryELFBuilder(MemoryBuffer *MB, uint8_t NewSymbolVisibility) + : BasicELFBuilder(), MemBuf(MB), + NewSymbolVisibility(NewSymbolVisibility) {} std::unique_ptr build(); }; @@ -905,7 +905,7 @@ class IHexELFBuilder : public BasicELFBuilder { public: IHexELFBuilder(const std::vector &Records) - : BasicELFBuilder(ELF::EM_386), Records(Records) {} + : BasicELFBuilder(), Records(Records) {} std::unique_ptr build(); }; @@ -926,7 +926,7 @@ private: void initGroupSection(GroupSection *GroupSec); void initSymbolTable(SymbolTableSection *SymTab); void readSectionHeaders(); - void readSections(); + void readSections(bool EnsureSymtab); void findEhdrOffset(); SectionBase &makeSection(const Elf_Shdr &Shdr); @@ -936,17 +936,17 @@ public: : ElfFile(*ElfObj.getELFFile()), Obj(Obj), ExtractPartition(ExtractPartition) {} - void build(); + void build(bool EnsureSymtab); }; class BinaryReader : public Reader { - const MachineInfo &MInfo; MemoryBuffer *MemBuf; + uint8_t NewSymbolVisibility; public: - BinaryReader(const MachineInfo &MI, MemoryBuffer *MB) - : MInfo(MI), MemBuf(MB) {} - std::unique_ptr create() const override; + BinaryReader(MemoryBuffer *MB, const uint8_t NewSymbolVisibility) + : MemBuf(MB), NewSymbolVisibility(NewSymbolVisibility) {} + std::unique_ptr create(bool EnsureSymtab) const override; }; class IHexReader : public Reader { @@ -968,7 +968,7 @@ class IHexReader : public Reader { public: IHexReader(MemoryBuffer *MB) : MemBuf(MB) {} - std::unique_ptr create() const override; + std::unique_ptr create(bool EnsureSymtab) const override; }; class ELFReader : public Reader { @@ -976,7 +976,7 @@ class ELFReader : public Reader { Optional ExtractPartition; public: - std::unique_ptr create() const override; + std::unique_ptr create(bool EnsureSymtab) const override; explicit ELFReader(Binary *B, Optional ExtractPartition) : Bin(B), ExtractPartition(ExtractPartition) {} }; @@ -990,6 +990,10 @@ private: std::vector Segments; std::vector RemovedSections; + static bool sectionIsAlloc(const SectionBase &Sec) { + return Sec.Flags & ELF::SHF_ALLOC; + }; + public: template using Range = iterator_range< @@ -1011,13 +1015,14 @@ public: uint8_t OSABI; uint8_t ABIVersion; uint64_t Entry; - uint64_t SHOffset; + uint64_t SHOff; uint32_t Type; uint32_t Machine; uint32_t Version; uint32_t Flags; bool HadShdrs = true; + bool MustBeRelocatable = false; StringTableSection *SectionNames = nullptr; SymbolTableSection *SymbolTable = nullptr; SectionIndexSection *SectionIndexTable = nullptr; @@ -1027,6 +1032,13 @@ public: ConstRange sections() const { return make_pointee_range(Sections); } + iterator_range< + filter_iterator::const_iterator>, + decltype(§ionIsAlloc)>> + allocSections() const { + return make_filter_range(make_pointee_range(Sections), sectionIsAlloc); + } + SectionBase *findSection(StringRef Name) { auto SecIt = find_if(Sections, [&](const SecPtr &Sec) { return Sec->Name == Name; }); @@ -1041,16 +1053,20 @@ public: std::function ToRemove); Error removeSymbols(function_ref ToRemove); template T &addSection(Ts &&... Args) { - auto Sec = llvm::make_unique(std::forward(Args)...); + auto Sec = std::make_unique(std::forward(Args)...); auto Ptr = Sec.get(); + MustBeRelocatable |= isa(*Ptr); Sections.emplace_back(std::move(Sec)); Ptr->Index = Sections.size(); return *Ptr; } Segment &addSegment(ArrayRef Data) { - Segments.emplace_back(llvm::make_unique(Data)); + Segments.emplace_back(std::make_unique(Data)); return *Segments.back(); } + bool isRelocatable() const { + return (Type != ELF::ET_DYN && Type != ELF::ET_EXEC) || MustBeRelocatable; + } }; } // end namespace elf diff --git a/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp b/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp new file mode 100644 index 00000000000..f621f3aa09c --- /dev/null +++ b/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp @@ -0,0 +1,350 @@ +//===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MachOLayoutBuilder.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { +namespace objcopy { +namespace macho { + +uint32_t MachOLayoutBuilder::computeSizeOfCmds() const { + uint32_t Size = 0; + for (const auto &LC : O.LoadCommands) { + const MachO::macho_load_command &MLC = LC.MachOLoadCommand; + auto cmd = MLC.load_command_data.cmd; + switch (cmd) { + case MachO::LC_SEGMENT: + Size += sizeof(MachO::segment_command) + + sizeof(MachO::section) * LC.Sections.size(); + continue; + case MachO::LC_SEGMENT_64: + Size += sizeof(MachO::segment_command_64) + + sizeof(MachO::section_64) * LC.Sections.size(); + continue; + } + + switch (cmd) { +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ + case MachO::LCName: \ + Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \ + break; +#include "llvm/BinaryFormat/MachO.def" +#undef HANDLE_LOAD_COMMAND + } + } + + return Size; +} + +void MachOLayoutBuilder::constructStringTable() { + for (std::unique_ptr &Sym : O.SymTable.Symbols) + StrTableBuilder.add(Sym->Name); + StrTableBuilder.finalize(); +} + +void MachOLayoutBuilder::updateSymbolIndexes() { + uint32_t Index = 0; + for (auto &Symbol : O.SymTable.Symbols) + Symbol->Index = Index++; +} + +// Updates the index and the number of local/external/undefined symbols. +void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) { + assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB); + // Make sure that nlist entries in the symbol table are sorted by the those + // types. The order is: local < defined external < undefined external. + assert(std::is_sorted(O.SymTable.Symbols.begin(), O.SymTable.Symbols.end(), + [](const std::unique_ptr &A, + const std::unique_ptr &B) { + return (A->isLocalSymbol() && !B->isLocalSymbol()) || + (!A->isUndefinedSymbol() && + B->isUndefinedSymbol()); + }) && + "Symbols are not sorted by their types."); + + uint32_t NumLocalSymbols = 0; + auto Iter = O.SymTable.Symbols.begin(); + auto End = O.SymTable.Symbols.end(); + for (; Iter != End; ++Iter) { + if ((*Iter)->isExternalSymbol()) + break; + + ++NumLocalSymbols; + } + + uint32_t NumExtDefSymbols = 0; + for (; Iter != End; ++Iter) { + if ((*Iter)->isUndefinedSymbol()) + break; + + ++NumExtDefSymbols; + } + + MLC.dysymtab_command_data.ilocalsym = 0; + MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols; + MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols; + MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols; + MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols; + MLC.dysymtab_command_data.nundefsym = + O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols); +} + +// Recomputes and updates offset and size fields in load commands and sections +// since they could be modified. +uint64_t MachOLayoutBuilder::layoutSegments() { + auto HeaderSize = + Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); + const bool IsObjectFile = + O.Header.FileType == MachO::HeaderFileType::MH_OBJECT; + uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0; + for (auto &LC : O.LoadCommands) { + auto &MLC = LC.MachOLoadCommand; + StringRef Segname; + uint64_t SegmentVmAddr; + uint64_t SegmentVmSize; + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + SegmentVmAddr = MLC.segment_command_data.vmaddr; + SegmentVmSize = MLC.segment_command_data.vmsize; + Segname = StringRef(MLC.segment_command_data.segname, + strnlen(MLC.segment_command_data.segname, + sizeof(MLC.segment_command_data.segname))); + break; + case MachO::LC_SEGMENT_64: + SegmentVmAddr = MLC.segment_command_64_data.vmaddr; + SegmentVmSize = MLC.segment_command_64_data.vmsize; + Segname = StringRef(MLC.segment_command_64_data.segname, + strnlen(MLC.segment_command_64_data.segname, + sizeof(MLC.segment_command_64_data.segname))); + break; + default: + continue; + } + + if (Segname == "__LINKEDIT") { + // We update the __LINKEDIT segment later (in layoutTail). + assert(LC.Sections.empty() && "__LINKEDIT segment has sections"); + LinkEditLoadCommand = &MLC; + continue; + } + + // Update file offsets and sizes of sections. + uint64_t SegOffset = Offset; + uint64_t SegFileSize = 0; + uint64_t VMSize = 0; + for (auto &Sec : LC.Sections) { + if (IsObjectFile) { + if (Sec.isVirtualSection()) { + Sec.Offset = 0; + } else { + uint64_t PaddingSize = + offsetToAlignment(SegFileSize, Align(1ull << Sec.Align)); + Sec.Offset = SegOffset + SegFileSize + PaddingSize; + Sec.Size = Sec.Content.size(); + SegFileSize += PaddingSize + Sec.Size; + } + VMSize = std::max(VMSize, Sec.Addr + Sec.Size); + } else { + if (Sec.isVirtualSection()) { + Sec.Offset = 0; + VMSize += Sec.Size; + } else { + uint32_t SectOffset = Sec.Addr - SegmentVmAddr; + Sec.Offset = SegOffset + SectOffset; + Sec.Size = Sec.Content.size(); + SegFileSize = std::max(SegFileSize, SectOffset + Sec.Size); + VMSize = std::max(VMSize, SegFileSize); + } + } + } + + if (IsObjectFile) { + Offset += SegFileSize; + } else { + Offset = alignTo(Offset + SegFileSize, PageSize); + SegFileSize = alignTo(SegFileSize, PageSize); + // Use the original vmsize if the segment is __PAGEZERO. + VMSize = + Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize); + } + + switch (MLC.load_command_data.cmd) { + case MachO::LC_SEGMENT: + MLC.segment_command_data.cmdsize = + sizeof(MachO::segment_command) + + sizeof(MachO::section) * LC.Sections.size(); + MLC.segment_command_data.nsects = LC.Sections.size(); + MLC.segment_command_data.fileoff = SegOffset; + MLC.segment_command_data.vmsize = VMSize; + MLC.segment_command_data.filesize = SegFileSize; + break; + case MachO::LC_SEGMENT_64: + MLC.segment_command_64_data.cmdsize = + sizeof(MachO::segment_command_64) + + sizeof(MachO::section_64) * LC.Sections.size(); + MLC.segment_command_64_data.nsects = LC.Sections.size(); + MLC.segment_command_64_data.fileoff = SegOffset; + MLC.segment_command_64_data.vmsize = VMSize; + MLC.segment_command_64_data.filesize = SegFileSize; + break; + } + } + + return Offset; +} + +uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) { + for (auto &LC : O.LoadCommands) + for (auto &Sec : LC.Sections) { + Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset; + Sec.NReloc = Sec.Relocations.size(); + Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc; + } + + return Offset; +} + +Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { + // The order of LINKEDIT elements is as follows: + // rebase info, binding info, weak binding info, lazy binding info, export + // trie, data-in-code, symbol table, indirect symbol table, symbol table + // strings. + uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); + uint64_t StartOfLinkEdit = Offset; + uint64_t StartOfRebaseInfo = StartOfLinkEdit; + uint64_t StartOfBindingInfo = StartOfRebaseInfo + O.Rebases.Opcodes.size(); + uint64_t StartOfWeakBindingInfo = StartOfBindingInfo + O.Binds.Opcodes.size(); + uint64_t StartOfLazyBindingInfo = + StartOfWeakBindingInfo + O.WeakBinds.Opcodes.size(); + uint64_t StartOfExportTrie = + StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size(); + uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size(); + uint64_t StartOfDataInCode = + StartOfFunctionStarts + O.FunctionStarts.Data.size(); + uint64_t StartOfSymbols = StartOfDataInCode + O.DataInCode.Data.size(); + uint64_t StartOfIndirectSymbols = + StartOfSymbols + NListSize * O.SymTable.Symbols.size(); + uint64_t StartOfSymbolStrings = + StartOfIndirectSymbols + + sizeof(uint32_t) * O.IndirectSymTable.Symbols.size(); + uint64_t LinkEditSize = + (StartOfSymbolStrings + StrTableBuilder.getSize()) - StartOfLinkEdit; + + // Now we have determined the layout of the contents of the __LINKEDIT + // segment. Update its load command. + if (LinkEditLoadCommand) { + MachO::macho_load_command *MLC = LinkEditLoadCommand; + switch (LinkEditLoadCommand->load_command_data.cmd) { + case MachO::LC_SEGMENT: + MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command); + MLC->segment_command_data.fileoff = StartOfLinkEdit; + MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize); + MLC->segment_command_data.filesize = LinkEditSize; + break; + case MachO::LC_SEGMENT_64: + MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64); + MLC->segment_command_64_data.fileoff = StartOfLinkEdit; + MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize); + MLC->segment_command_64_data.filesize = LinkEditSize; + break; + } + } + + for (auto &LC : O.LoadCommands) { + auto &MLC = LC.MachOLoadCommand; + auto cmd = MLC.load_command_data.cmd; + switch (cmd) { + case MachO::LC_SYMTAB: + MLC.symtab_command_data.symoff = StartOfSymbols; + MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size(); + MLC.symtab_command_data.stroff = StartOfSymbolStrings; + MLC.symtab_command_data.strsize = StrTableBuilder.getSize(); + break; + case MachO::LC_DYSYMTAB: { + if (MLC.dysymtab_command_data.ntoc != 0 || + MLC.dysymtab_command_data.nmodtab != 0 || + MLC.dysymtab_command_data.nextrefsyms != 0 || + MLC.dysymtab_command_data.nlocrel != 0 || + MLC.dysymtab_command_data.nextrel != 0) + return createStringError(llvm::errc::not_supported, + "shared library is not yet supported"); + + if (!O.IndirectSymTable.Symbols.empty()) { + MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols; + MLC.dysymtab_command_data.nindirectsyms = + O.IndirectSymTable.Symbols.size(); + } + + updateDySymTab(MLC); + break; + } + case MachO::LC_DATA_IN_CODE: + MLC.linkedit_data_command_data.dataoff = StartOfDataInCode; + MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size(); + break; + case MachO::LC_FUNCTION_STARTS: + MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts; + MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size(); + break; + case MachO::LC_DYLD_INFO: + case MachO::LC_DYLD_INFO_ONLY: + MLC.dyld_info_command_data.rebase_off = + O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo; + MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size(); + MLC.dyld_info_command_data.bind_off = + O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo; + MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size(); + MLC.dyld_info_command_data.weak_bind_off = + O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo; + MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size(); + MLC.dyld_info_command_data.lazy_bind_off = + O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo; + MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size(); + MLC.dyld_info_command_data.export_off = + O.Exports.Trie.empty() ? 0 : StartOfExportTrie; + MLC.dyld_info_command_data.export_size = O.Exports.Trie.size(); + break; + case MachO::LC_LOAD_DYLINKER: + case MachO::LC_MAIN: + case MachO::LC_RPATH: + case MachO::LC_SEGMENT: + case MachO::LC_SEGMENT_64: + case MachO::LC_VERSION_MIN_MACOSX: + case MachO::LC_BUILD_VERSION: + case MachO::LC_ID_DYLIB: + case MachO::LC_LOAD_DYLIB: + case MachO::LC_UUID: + case MachO::LC_SOURCE_VERSION: + // Nothing to update. + break; + default: + // Abort if it's unsupported in order to prevent corrupting the object. + return createStringError(llvm::errc::not_supported, + "unsupported load command (cmd=0x%x)", cmd); + } + } + + return Error::success(); +} + +Error MachOLayoutBuilder::layout() { + O.Header.NCmds = O.LoadCommands.size(); + O.Header.SizeOfCmds = computeSizeOfCmds(); + constructStringTable(); + updateSymbolIndexes(); + uint64_t Offset = layoutSegments(); + Offset = layoutRelocations(Offset); + return layoutTail(Offset); +} + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm diff --git a/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h b/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h new file mode 100644 index 00000000000..21cbe56605d --- /dev/null +++ b/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h @@ -0,0 +1,50 @@ +//===- MachOLayoutBuilder.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H +#define LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H + +#include "MachOObjcopy.h" +#include "Object.h" + +namespace llvm { +namespace objcopy { +namespace macho { + +class MachOLayoutBuilder { + Object &O; + bool Is64Bit; + uint64_t PageSize; + + // Points to the __LINKEDIT segment if it exists. + MachO::macho_load_command *LinkEditLoadCommand = nullptr; + StringTableBuilder StrTableBuilder{StringTableBuilder::MachO}; + + uint32_t computeSizeOfCmds() const; + void constructStringTable(); + void updateSymbolIndexes(); + void updateDySymTab(MachO::macho_load_command &MLC); + uint64_t layoutSegments(); + uint64_t layoutRelocations(uint64_t Offset); + Error layoutTail(uint64_t Offset); + +public: + MachOLayoutBuilder(Object &O, bool Is64Bit, uint64_t PageSize) + : O(O), Is64Bit(Is64Bit), PageSize(PageSize) {} + + // Recomputes and updates fields in the given object such as file offsets. + Error layout(); + + StringTableBuilder &getStringTableBuilder() { return StrTableBuilder; } +}; + +} // end namespace macho +} // end namespace objcopy +} // end namespace llvm + +#endif // LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H diff --git a/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/tools/llvm-objcopy/MachO/MachOObjcopy.cpp index 19343b65dd1..6d586e7d73f 100644 --- a/tools/llvm-objcopy/MachO/MachOObjcopy.cpp +++ b/tools/llvm-objcopy/MachO/MachOObjcopy.cpp @@ -25,18 +25,20 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) { !Config.SplitDWO.empty() || !Config.SymbolsPrefix.empty() || !Config.AllocSectionsPrefix.empty() || !Config.AddSection.empty() || !Config.DumpSection.empty() || !Config.KeepSection.empty() || - !Config.OnlySection.empty() || !Config.SymbolsToGlobalize.empty() || - !Config.SymbolsToKeep.empty() || !Config.SymbolsToLocalize.empty() || - !Config.SymbolsToWeaken.empty() || !Config.SymbolsToKeepGlobal.empty() || - !Config.SectionsToRename.empty() || !Config.SymbolsToRename.empty() || + Config.NewSymbolVisibility || !Config.OnlySection.empty() || + !Config.SymbolsToGlobalize.empty() || !Config.SymbolsToKeep.empty() || + !Config.SymbolsToLocalize.empty() || !Config.SymbolsToWeaken.empty() || + !Config.SymbolsToKeepGlobal.empty() || !Config.SectionsToRename.empty() || + !Config.SymbolsToRename.empty() || !Config.UnneededSymbolsToRemove.empty() || - !Config.SetSectionFlags.empty() || !Config.ToRemove.empty() || - Config.ExtractDWO || Config.KeepFileSymbols || Config.LocalizeHidden || - Config.PreserveDates || Config.StripDWO || Config.StripNonAlloc || - Config.StripSections || Config.Weaken || Config.DecompressDebugSections || - Config.StripDebug || Config.StripNonAlloc || Config.StripSections || - Config.StripUnneeded || Config.DiscardMode != DiscardType::None || - !Config.SymbolsToAdd.empty() || Config.EntryExpr) { + !Config.SetSectionAlignment.empty() || !Config.SetSectionFlags.empty() || + !Config.ToRemove.empty() || Config.ExtractDWO || Config.KeepFileSymbols || + Config.LocalizeHidden || Config.PreserveDates || Config.StripDWO || + Config.StripNonAlloc || Config.StripSections || Config.Weaken || + Config.DecompressDebugSections || Config.StripDebug || + Config.StripNonAlloc || Config.StripSections || Config.StripUnneeded || + Config.DiscardMode != DiscardType::None || !Config.SymbolsToAdd.empty() || + Config.EntryExpr) { return createStringError(llvm::errc::invalid_argument, "option not supported by llvm-objcopy for MachO"); } @@ -57,7 +59,11 @@ Error executeObjcopyOnBinary(const CopyConfig &Config, if (Error E = handleArgs(Config, *O)) return createFileError(Config.InputFilename, std::move(E)); - MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), Out); + // TODO: Support 16KB pages which are employed in iOS arm64 binaries: + // https://github.com/llvm/llvm-project/commit/1bebb2832ee312d3b0316dacff457a7a29435edb + const uint64_t PageSize = 4096; + + MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), PageSize, Out); if (auto E = Writer.finalize()) return E; return Writer.write(); diff --git a/tools/llvm-objcopy/MachO/MachOReader.cpp b/tools/llvm-objcopy/MachO/MachOReader.cpp index d3129303460..b48a0d8952d 100644 --- a/tools/llvm-objcopy/MachO/MachOReader.cpp +++ b/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -129,10 +129,19 @@ void MachOReader::readLoadCommands(Object &O) const { case MachO::LC_SYMTAB: O.SymTabCommandIndex = O.LoadCommands.size(); break; + case MachO::LC_DYSYMTAB: + O.DySymTabCommandIndex = O.LoadCommands.size(); + break; case MachO::LC_DYLD_INFO: case MachO::LC_DYLD_INFO_ONLY: O.DyLdInfoCommandIndex = O.LoadCommands.size(); break; + case MachO::LC_DATA_IN_CODE: + O.DataInCodeCommandIndex = O.LoadCommands.size(); + break; + case MachO::LC_FUNCTION_STARTS: + O.FunctionStartsCommandIndex = O.LoadCommands.size(); + break; } #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ case MachO::LCName: \ @@ -188,7 +197,7 @@ void MachOReader::readSymbolTable(Object &O) const { StrTable, MachOObj.getSymbolTableEntry(Symbol.getRawDataRefImpl()))); - O.SymTable.Symbols.push_back(llvm::make_unique(SE)); + O.SymTable.Symbols.push_back(std::make_unique(SE)); } } @@ -222,8 +231,37 @@ void MachOReader::readExportInfo(Object &O) const { O.Exports.Trie = MachOObj.getDyldInfoExportsTrie(); } +void MachOReader::readDataInCodeData(Object &O) const { + if (!O.DataInCodeCommandIndex) + return; + const MachO::linkedit_data_command &LDC = + O.LoadCommands[*O.DataInCodeCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + O.DataInCode.Data = arrayRefFromStringRef( + MachOObj.getData().substr(LDC.dataoff, LDC.datasize)); +} + +void MachOReader::readFunctionStartsData(Object &O) const { + if (!O.FunctionStartsCommandIndex) + return; + const MachO::linkedit_data_command &LDC = + O.LoadCommands[*O.FunctionStartsCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + O.FunctionStarts.Data = arrayRefFromStringRef( + MachOObj.getData().substr(LDC.dataoff, LDC.datasize)); +} + +void MachOReader::readIndirectSymbolTable(Object &O) const { + MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand(); + for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) + O.IndirectSymTable.Symbols.push_back( + MachOObj.getIndirectSymbolTableEntry(DySymTab, i)); +} + std::unique_ptr MachOReader::create() const { - auto Obj = llvm::make_unique(); + auto Obj = std::make_unique(); readHeader(*Obj); readLoadCommands(*Obj); readSymbolTable(*Obj); @@ -233,6 +271,9 @@ std::unique_ptr MachOReader::create() const { readWeakBindInfo(*Obj); readLazyBindInfo(*Obj); readExportInfo(*Obj); + readDataInCodeData(*Obj); + readFunctionStartsData(*Obj); + readIndirectSymbolTable(*Obj); return Obj; } diff --git a/tools/llvm-objcopy/MachO/MachOReader.h b/tools/llvm-objcopy/MachO/MachOReader.h index 795e5cc2363..00c8f0d55f6 100644 --- a/tools/llvm-objcopy/MachO/MachOReader.h +++ b/tools/llvm-objcopy/MachO/MachOReader.h @@ -36,6 +36,9 @@ class MachOReader : public Reader { void readWeakBindInfo(Object &O) const; void readLazyBindInfo(Object &O) const; void readExportInfo(Object &O) const; + void readDataInCodeData(Object &O) const; + void readFunctionStartsData(Object &O) const; + void readIndirectSymbolTable(Object &O) const; public: explicit MachOReader(const object::MachOObjectFile &Obj) : MachOObj(Obj) {} diff --git a/tools/llvm-objcopy/MachO/MachOWriter.cpp b/tools/llvm-objcopy/MachO/MachOWriter.cpp index 74200c5aa62..4ec91cc9eb7 100644 --- a/tools/llvm-objcopy/MachO/MachOWriter.cpp +++ b/tools/llvm-objcopy/MachO/MachOWriter.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "MachOWriter.h" +#include "MachOLayoutBuilder.h" #include "Object.h" #include "llvm/ADT/STLExtras.h" #include "llvm/BinaryFormat/MachO.h" @@ -40,16 +41,10 @@ size_t MachOWriter::totalSize() const { const MachO::symtab_command &SymTabCommand = O.LoadCommands[*O.SymTabCommandIndex] .MachOLoadCommand.symtab_command_data; - if (SymTabCommand.symoff) { - assert((SymTabCommand.nsyms == O.SymTable.Symbols.size()) && - "Incorrect number of symbols"); + if (SymTabCommand.symoff) Ends.push_back(SymTabCommand.symoff + symTableSize()); - } - if (SymTabCommand.stroff) { - assert((SymTabCommand.strsize == StrTableBuilder.getSize()) && - "Incorrect string table size"); + if (SymTabCommand.stroff) Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize); - } } if (O.DyLdInfoCommandIndex) { const MachO::dyld_info_command &DyLdInfoCommand = @@ -84,6 +79,36 @@ size_t MachOWriter::totalSize() const { } } + if (O.DySymTabCommandIndex) { + const MachO::dysymtab_command &DySymTabCommand = + O.LoadCommands[*O.DySymTabCommandIndex] + .MachOLoadCommand.dysymtab_command_data; + + if (DySymTabCommand.indirectsymoff) + Ends.push_back(DySymTabCommand.indirectsymoff + + sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); + } + + if (O.DataInCodeCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.DataInCodeCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + if (LinkEditDataCommand.dataoff) + Ends.push_back(LinkEditDataCommand.dataoff + + LinkEditDataCommand.datasize); + } + + if (O.FunctionStartsCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.FunctionStartsCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + if (LinkEditDataCommand.dataoff) + Ends.push_back(LinkEditDataCommand.dataoff + + LinkEditDataCommand.datasize); + } + // Otherwise, use the last section / reloction. for (const auto &LC : O.LoadCommands) for (const auto &S : LC.Sections) { @@ -120,14 +145,6 @@ void MachOWriter::writeHeader() { memcpy(B.getBufferStart(), &Header, HeaderSize); } -void MachOWriter::updateSymbolIndexes() { - uint32_t Index = 0; - for (auto &Symbol : O.SymTable.Symbols) { - Symbol->Index = Index; - Index++; - } -} - void MachOWriter::writeLoadCommands() { uint8_t *Begin = B.getBufferStart() + headerSize(); for (const auto &LC : O.LoadCommands) { @@ -253,7 +270,7 @@ void writeNListEntry(const SymbolEntry &SE, bool IsLittleEndian, char *&Out, Out += sizeof(NListType); } -void MachOWriter::writeSymbolTable() { +void MachOWriter::writeStringTable() { if (!O.SymTabCommandIndex) return; const MachO::symtab_command &SymTabCommand = @@ -261,10 +278,10 @@ void MachOWriter::writeSymbolTable() { .MachOLoadCommand.symtab_command_data; uint8_t *StrTable = (uint8_t *)B.getBufferStart() + SymTabCommand.stroff; - StrTableBuilder.write(StrTable); + LayoutBuilder.getStringTableBuilder().write(StrTable); } -void MachOWriter::writeStringTable() { +void MachOWriter::writeSymbolTable() { if (!O.SymTabCommandIndex) return; const MachO::symtab_command &SymTabCommand = @@ -275,7 +292,7 @@ void MachOWriter::writeStringTable() { for (auto Iter = O.SymTable.Symbols.begin(), End = O.SymTable.Symbols.end(); Iter != End; Iter++) { SymbolEntry *Sym = Iter->get(); - auto Nstrx = StrTableBuilder.getOffset(Sym->Name); + uint32_t Nstrx = LayoutBuilder.getStringTableBuilder().getOffset(Sym->Name); if (Is64Bit) writeNListEntry(*Sym, IsLittleEndian, SymTable, Nstrx); @@ -344,6 +361,45 @@ void MachOWriter::writeExportInfo() { memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size()); } +void MachOWriter::writeIndirectSymbolTable() { + if (!O.DySymTabCommandIndex) + return; + + const MachO::dysymtab_command &DySymTabCommand = + O.LoadCommands[*O.DySymTabCommandIndex] + .MachOLoadCommand.dysymtab_command_data; + + char *Out = (char *)B.getBufferStart() + DySymTabCommand.indirectsymoff; + assert((DySymTabCommand.nindirectsyms == O.IndirectSymTable.Symbols.size()) && + "Incorrect indirect symbol table size"); + memcpy(Out, O.IndirectSymTable.Symbols.data(), + sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); +} + +void MachOWriter::writeDataInCodeData() { + if (!O.DataInCodeCommandIndex) + return; + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.DataInCodeCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff; + assert((LinkEditDataCommand.datasize == O.DataInCode.Data.size()) && + "Incorrect data in code data size"); + memcpy(Out, O.DataInCode.Data.data(), O.DataInCode.Data.size()); +} + +void MachOWriter::writeFunctionStartsData() { + if (!O.FunctionStartsCommandIndex) + return; + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.FunctionStartsCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff; + assert((LinkEditDataCommand.datasize == O.FunctionStarts.Data.size()) && + "Incorrect function starts data size"); + memcpy(Out, O.FunctionStarts.Data.data(), O.FunctionStarts.Data.size()); +} + void MachOWriter::writeTail() { typedef void (MachOWriter::*WriteHandlerType)(void); typedef std::pair WriteOperation; @@ -379,6 +435,36 @@ void MachOWriter::writeTail() { {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo}); } + if (O.DySymTabCommandIndex) { + const MachO::dysymtab_command &DySymTabCommand = + O.LoadCommands[*O.DySymTabCommandIndex] + .MachOLoadCommand.dysymtab_command_data; + + if (DySymTabCommand.indirectsymoff) + Queue.emplace_back(DySymTabCommand.indirectsymoff, + &MachOWriter::writeIndirectSymbolTable); + } + + if (O.DataInCodeCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.DataInCodeCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + if (LinkEditDataCommand.dataoff) + Queue.emplace_back(LinkEditDataCommand.dataoff, + &MachOWriter::writeDataInCodeData); + } + + if (O.FunctionStartsCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.FunctionStartsCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + if (LinkEditDataCommand.dataoff) + Queue.emplace_back(LinkEditDataCommand.dataoff, + &MachOWriter::writeFunctionStartsData); + } + llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) { return LHS.first < RHS.first; }); @@ -387,198 +473,13 @@ void MachOWriter::writeTail() { (this->*WriteOp.second)(); } -void MachOWriter::updateSizeOfCmds() { - auto Size = 0; - for (const auto &LC : O.LoadCommands) { - auto &MLC = LC.MachOLoadCommand; - auto cmd = MLC.load_command_data.cmd; - - switch (cmd) { - case MachO::LC_SEGMENT: - Size += sizeof(MachO::segment_command) + - sizeof(MachO::section) * LC.Sections.size(); - continue; - case MachO::LC_SEGMENT_64: - Size += sizeof(MachO::segment_command_64) + - sizeof(MachO::section_64) * LC.Sections.size(); - continue; - } - - switch (cmd) { -#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ - case MachO::LCName: \ - Size += sizeof(MachO::LCStruct); \ - break; -#include "llvm/BinaryFormat/MachO.def" -#undef HANDLE_LOAD_COMMAND - } - } - - O.Header.SizeOfCmds = Size; -} - -// Updates the index and the number of local/external/undefined symbols. Here we -// assume that MLC is a LC_DYSYMTAB and the nlist entries in the symbol table -// are already sorted by the those types. -void MachOWriter::updateDySymTab(MachO::macho_load_command &MLC) { - uint32_t NumLocalSymbols = 0; - auto Iter = O.SymTable.Symbols.begin(); - auto End = O.SymTable.Symbols.end(); - for (; Iter != End; Iter++) { - if ((*Iter)->n_type & (MachO::N_EXT | MachO::N_PEXT)) - break; - - NumLocalSymbols++; - } - - uint32_t NumExtDefSymbols = 0; - for (; Iter != End; Iter++) { - if (((*Iter)->n_type & MachO::N_TYPE) == MachO::N_UNDF) - break; - - NumExtDefSymbols++; - } - - MLC.dysymtab_command_data.ilocalsym = 0; - MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols; - MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols; - MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols; - MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols; - MLC.dysymtab_command_data.nundefsym = - O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols); -} - -// Recomputes and updates offset and size fields in load commands and sections -// since they could be modified. -Error MachOWriter::layout() { - auto SizeOfCmds = loadCommandsSize(); - auto Offset = headerSize() + SizeOfCmds; - O.Header.NCmds = O.LoadCommands.size(); - O.Header.SizeOfCmds = SizeOfCmds; - - // Lay out sections. - for (auto &LC : O.LoadCommands) { - uint64_t FileOff = Offset; - uint64_t VMSize = 0; - uint64_t FileOffsetInSegment = 0; - for (auto &Sec : LC.Sections) { - if (!Sec.isVirtualSection()) { - auto FilePaddingSize = - OffsetToAlignment(FileOffsetInSegment, 1ull << Sec.Align); - Sec.Offset = Offset + FileOffsetInSegment + FilePaddingSize; - Sec.Size = Sec.Content.size(); - FileOffsetInSegment += FilePaddingSize + Sec.Size; - } - - VMSize = std::max(VMSize, Sec.Addr + Sec.Size); - } - - // TODO: Handle the __PAGEZERO segment. - auto &MLC = LC.MachOLoadCommand; - switch (MLC.load_command_data.cmd) { - case MachO::LC_SEGMENT: - MLC.segment_command_data.cmdsize = - sizeof(MachO::segment_command) + - sizeof(MachO::section) * LC.Sections.size(); - MLC.segment_command_data.nsects = LC.Sections.size(); - MLC.segment_command_data.fileoff = FileOff; - MLC.segment_command_data.vmsize = VMSize; - MLC.segment_command_data.filesize = FileOffsetInSegment; - break; - case MachO::LC_SEGMENT_64: - MLC.segment_command_64_data.cmdsize = - sizeof(MachO::segment_command_64) + - sizeof(MachO::section_64) * LC.Sections.size(); - MLC.segment_command_64_data.nsects = LC.Sections.size(); - MLC.segment_command_64_data.fileoff = FileOff; - MLC.segment_command_64_data.vmsize = VMSize; - MLC.segment_command_64_data.filesize = FileOffsetInSegment; - break; - } - - Offset += FileOffsetInSegment; - } - - // Lay out relocations. - for (auto &LC : O.LoadCommands) - for (auto &Sec : LC.Sections) { - Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset; - Sec.NReloc = Sec.Relocations.size(); - Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc; - } - - // Lay out tail stuff. - auto NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); - for (auto &LC : O.LoadCommands) { - auto &MLC = LC.MachOLoadCommand; - auto cmd = MLC.load_command_data.cmd; - switch (cmd) { - case MachO::LC_SYMTAB: - MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size(); - MLC.symtab_command_data.strsize = StrTableBuilder.getSize(); - MLC.symtab_command_data.symoff = Offset; - Offset += NListSize * MLC.symtab_command_data.nsyms; - MLC.symtab_command_data.stroff = Offset; - Offset += MLC.symtab_command_data.strsize; - break; - case MachO::LC_DYSYMTAB: { - if (MLC.dysymtab_command_data.ntoc != 0 || - MLC.dysymtab_command_data.nmodtab != 0 || - MLC.dysymtab_command_data.nextrefsyms != 0 || - MLC.dysymtab_command_data.nlocrel != 0 || - MLC.dysymtab_command_data.nextrel != 0) - return createStringError(llvm::errc::not_supported, - "shared library is not yet supported"); - - if (MLC.dysymtab_command_data.nindirectsyms != 0) - return createStringError(llvm::errc::not_supported, - "indirect symbol table is not yet supported"); - - updateDySymTab(MLC); - break; - } - case MachO::LC_SEGMENT: - case MachO::LC_SEGMENT_64: - case MachO::LC_VERSION_MIN_MACOSX: - case MachO::LC_BUILD_VERSION: - case MachO::LC_ID_DYLIB: - case MachO::LC_LOAD_DYLIB: - case MachO::LC_UUID: - case MachO::LC_SOURCE_VERSION: - // Nothing to update. - break; - default: - // Abort if it's unsupported in order to prevent corrupting the object. - return createStringError(llvm::errc::not_supported, - "unsupported load command (cmd=0x%x)", cmd); - } - } - - return Error::success(); -} - -void MachOWriter::constructStringTable() { - for (std::unique_ptr &Sym : O.SymTable.Symbols) - StrTableBuilder.add(Sym->Name); - StrTableBuilder.finalize(); -} - -Error MachOWriter::finalize() { - updateSizeOfCmds(); - constructStringTable(); - - if (auto E = layout()) - return E; - - return Error::success(); -} +Error MachOWriter::finalize() { return LayoutBuilder.layout(); } Error MachOWriter::write() { if (Error E = B.allocate(totalSize())) return E; memset(B.getBufferStart(), 0, totalSize()); writeHeader(); - updateSymbolIndexes(); writeLoadCommands(); writeSections(); writeTail(); diff --git a/tools/llvm-objcopy/MachO/MachOWriter.h b/tools/llvm-objcopy/MachO/MachOWriter.h index ecf12d62de2..22abbad56f4 100644 --- a/tools/llvm-objcopy/MachO/MachOWriter.h +++ b/tools/llvm-objcopy/MachO/MachOWriter.h @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "../Buffer.h" +#include "MachOLayoutBuilder.h" #include "MachOObjcopy.h" #include "Object.h" #include "llvm/BinaryFormat/MachO.h" @@ -22,20 +23,15 @@ class MachOWriter { Object &O; bool Is64Bit; bool IsLittleEndian; + uint64_t PageSize; Buffer &B; - StringTableBuilder StrTableBuilder{StringTableBuilder::MachO}; + MachOLayoutBuilder LayoutBuilder; size_t headerSize() const; size_t loadCommandsSize() const; size_t symTableSize() const; size_t strTableSize() const; - void updateDySymTab(MachO::macho_load_command &MLC); - void updateSizeOfCmds(); - void updateSymbolIndexes(); - void constructStringTable(); - Error layout(); - void writeHeader(); void writeLoadCommands(); template @@ -48,11 +44,16 @@ class MachOWriter { void writeWeakBindInfo(); void writeLazyBindInfo(); void writeExportInfo(); + void writeIndirectSymbolTable(); + void writeDataInCodeData(); + void writeFunctionStartsData(); void writeTail(); public: - MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, Buffer &B) - : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), B(B) {} + MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, uint64_t PageSize, + Buffer &B) + : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), + PageSize(PageSize), B(B), LayoutBuilder(O, Is64Bit, PageSize) {} size_t totalSize() const; Error finalize(); diff --git a/tools/llvm-objcopy/MachO/Object.h b/tools/llvm-objcopy/MachO/Object.h index ed85fcbc47f..1cebf8253d1 100644 --- a/tools/llvm-objcopy/MachO/Object.h +++ b/tools/llvm-objcopy/MachO/Object.h @@ -90,6 +90,16 @@ struct SymbolEntry { uint8_t n_sect; uint16_t n_desc; uint64_t n_value; + + bool isExternalSymbol() const { + return n_type & ((MachO::N_EXT | MachO::N_PEXT)); + } + + bool isLocalSymbol() const { return !isExternalSymbol(); } + + bool isUndefinedSymbol() const { + return (n_type & MachO::N_TYPE) == MachO::N_UNDF; + } }; /// The location of the symbol table inside the binary is described by LC_SYMTAB @@ -100,6 +110,10 @@ struct SymbolTable { const SymbolEntry *getSymbolByIndex(uint32_t Index) const; }; +struct IndirectSymbolTable { + std::vector Symbols; +}; + /// The location of the string table inside the binary is described by LC_SYMTAB /// load command. struct StringTable { @@ -206,6 +220,10 @@ struct ExportInfo { ArrayRef Trie; }; +struct LinkData { + ArrayRef Data; +}; + struct Object { MachHeader Header; std::vector LoadCommands; @@ -218,11 +236,20 @@ struct Object { WeakBindInfo WeakBinds; LazyBindInfo LazyBinds; ExportInfo Exports; + IndirectSymbolTable IndirectSymTable; + LinkData DataInCode; + LinkData FunctionStarts; /// The index of LC_SYMTAB load command if present. Optional SymTabCommandIndex; /// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present. Optional DyLdInfoCommandIndex; + /// The index LC_DYSYMTAB load comamnd if present. + Optional DySymTabCommandIndex; + /// The index LC_DATA_IN_CODE load comamnd if present. + Optional DataInCodeCommandIndex; + /// The index LC_FUNCTION_STARTS load comamnd if present. + Optional FunctionStartsCommandIndex; }; } // end namespace macho diff --git a/tools/llvm-objcopy/ObjcopyOpts.td b/tools/llvm-objcopy/ObjcopyOpts.td index 5fce4fbde53..9e6b6f0005c 100644 --- a/tools/llvm-objcopy/ObjcopyOpts.td +++ b/tools/llvm-objcopy/ObjcopyOpts.td @@ -1,37 +1,33 @@ -include "llvm/Option/OptParser.td" - -multiclass Eq { - def NAME : Separate<["--"], name>; - def NAME #_eq : Joined<["--"], name #"=">, - Alias(NAME)>, - HelpText; -} - -def help : Flag<["--"], "help">; -def h : Flag<["-"], "h">, Alias; - -def allow_broken_links - : Flag<["--"], "allow-broken-links">, - HelpText<"Allow llvm-objcopy to remove sections even if it would leave " - "invalid section references. The appropriate sh_link fields " - "will be set to zero.">; +include "CommonOpts.td" defm binary_architecture - : Eq<"binary-architecture", "Used when transforming an architecture-less " - "format (such as binary) to another format">; -def B : JoinedOrSeparate<["-"], "B">, Alias; + : Eq<"binary-architecture", "Ignored for compatibility">; +def B : JoinedOrSeparate<["-"], "B">, + Alias, + HelpText<"Alias for --binary-architecture">; defm target : Eq<"target", "Format of the input and output file">, Values<"binary">; -def F : JoinedOrSeparate<["-"], "F">, Alias; +def F : JoinedOrSeparate<["-"], "F">, + Alias, + HelpText<"Alias for --target">; defm input_target : Eq<"input-target", "Format of the input file">, Values<"binary">; -def I : JoinedOrSeparate<["-"], "I">, Alias; +def I : JoinedOrSeparate<["-"], "I">, + Alias, + HelpText<"Alias for --input-target">; defm output_target : Eq<"output-target", "Format of the output file">, Values<"binary">; -def O : JoinedOrSeparate<["-"], "O">, Alias; +def O : JoinedOrSeparate<["-"], "O">, + Alias, + HelpText<"Alias for --output-target">; + +defm new_symbol_visibility : Eq<"new-symbol-visibility", "Visibility of " + "symbols generated for binary input or added" + " with --add-symbol unless otherwise" + " specified. The default value is 'default'.">; def compress_debug_sections : Flag<["--"], "compress-debug-sections">; def compress_debug_sections_eq @@ -46,34 +42,10 @@ defm split_dwo ", then strip-dwo on the input file">, MetaVarName<"dwo-file">; -def enable_deterministic_archives - : Flag<["--"], "enable-deterministic-archives">, - HelpText<"Enable deterministic mode when copying archives (use zero for " - "UIDs, GIDs, and timestamps).">; -def D : Flag<["-"], "D">, - Alias, - HelpText<"Alias for --enable-deterministic-archives">; - -def disable_deterministic_archives - : Flag<["--"], "disable-deterministic-archives">, - HelpText<"Disable deterministic mode when copying archives (use real " - "values for UIDs, GIDs, and timestamps).">; -def U : Flag<["-"], "U">, - Alias, - HelpText<"Alias for --disable-deterministic-archives">; - -def preserve_dates : Flag<["--"], "preserve-dates">, - HelpText<"Preserve access and modification timestamps">; -def p : Flag<["-"], "p">, Alias; - defm add_gnu_debuglink : Eq<"add-gnu-debuglink", "Add a .gnu_debuglink for ">, MetaVarName<"debug-file">; -defm remove_section : Eq<"remove-section", "Remove
">, - MetaVarName<"section">; -def R : JoinedOrSeparate<["-"], "R">, Alias; - defm rename_section : Eq<"rename-section", "Renames a section from old to new, optionally with specified flags. " @@ -93,16 +65,20 @@ defm redefine_symbols "symbols from many files.">, MetaVarName<"filename">; -defm keep_section : Eq<"keep-section", "Keep
">, - MetaVarName<"section">; defm only_section : Eq<"only-section", "Remove all but
">, MetaVarName<"section">; -def j : JoinedOrSeparate<["-"], "j">, Alias; +def j : JoinedOrSeparate<["-"], "j">, + Alias, + HelpText<"Alias for --only-section">; defm add_section : Eq<"add-section", "Make a section named
with the contents of .">, MetaVarName<"section=file">; +defm set_section_alignment + : Eq<"set-section-alignment", "Set alignment for a given section.">, + MetaVarName<"section=align">; + defm set_section_flags : Eq<"set-section-flags", "Set section flags for a given section. Flags supported for GNU " @@ -110,26 +86,14 @@ defm set_section_flags "rom, share, contents, merge, strings.">, MetaVarName<"section=flag1[,flag2,...]">; -def strip_all : Flag<["--"], "strip-all">, - HelpText<"Remove non-allocated sections outside segments. " - ".gnu.warning* sections are not removed">; -def S : Flag<["-"], "S">, Alias; -def strip_all_gnu : Flag<["--"], "strip-all-gnu">, - HelpText<"Compatible with GNU objcopy's --strip-all">; -def strip_debug : Flag<["--"], "strip-debug">, - HelpText<"Remove all debug information">; -def g : Flag<["-"], "g">, Alias, - HelpText<"Alias for --strip-debug">; +def S : Flag<["-"], "S">, + Alias, + HelpText<"Alias for --strip-all">; def strip_dwo : Flag<["--"], "strip-dwo">, HelpText<"Remove all DWARF .dwo sections from file">; -def strip_sections - : Flag<["--"], "strip-sections">, - HelpText<"Remove all section headers and all sections not in segments">; def strip_non_alloc : Flag<["--"], "strip-non-alloc">, HelpText<"Remove all non-allocated sections outside segments">; -def strip_unneeded : Flag<["--"], "strip-unneeded">, - HelpText<"Remove all symbols not needed by relocations">; defm strip_unneeded_symbol : Eq<"strip-unneeded-symbol", "Remove symbol if it is not needed by relocations">, @@ -163,7 +127,9 @@ defm localize_symbols "Reads a list of symbols from and marks them local.">, MetaVarName<"filename">; -def L : JoinedOrSeparate<["-"], "L">, Alias; +def L : JoinedOrSeparate<["-"], "L">, + Alias, + HelpText<"Alias for --localize-symbol">; defm globalize_symbol : Eq<"globalize-symbol", "Mark as global">, MetaVarName<"symbol">; @@ -178,7 +144,9 @@ defm keep_global_symbol "Convert all symbols except to local. May be repeated to " "convert all except a set of symbols to local.">, MetaVarName<"symbol">; -def G : JoinedOrSeparate<["-"], "G">, Alias; +def G : JoinedOrSeparate<["-"], "G">, + Alias, + HelpText<"Alias for --keep-global-symbol">; defm keep_global_symbols : Eq<"keep-global-symbols", @@ -196,31 +164,17 @@ defm weaken_symbols "Reads a list of symbols from and marks them weak.">, MetaVarName<"filename">; -def W : JoinedOrSeparate<["-"], "W">, Alias; +def W : JoinedOrSeparate<["-"], "W">, + Alias, + HelpText<"Alias for --weaken-symbol">; def weaken : Flag<["--"], "weaken">, HelpText<"Mark all global symbols as weak">; -def discard_locals : Flag<["--"], "discard-locals">, - HelpText<"Remove compiler-generated local symbols, (e.g. " - "symbols starting with .L)">; -def X : Flag<["-"], "X">, Alias; - -def discard_all - : Flag<["--"], "discard-all">, - HelpText<"Remove all local symbols except file and section symbols">; -def x : Flag<["-"], "x">, Alias; -defm strip_symbol : Eq<"strip-symbol", "Remove symbol ">, - MetaVarName<"symbol">; defm strip_symbols : Eq<"strip-symbols", "Reads a list of symbols from and removes them.">, MetaVarName<"filename">; -def N : JoinedOrSeparate<["-"], "N">, Alias; -defm keep_symbol : Eq<"keep-symbol", "Do not remove symbol ">, - MetaVarName<"symbol">; -def K : JoinedOrSeparate<["-"], "K">, Alias; - defm keep_symbols : Eq<"keep-symbols", "Reads a list of symbols from and runs as if " @@ -230,13 +184,6 @@ defm keep_symbols "be repeated to read symbols from many files.">, MetaVarName<"filename">; -def only_keep_debug - : Flag<["--"], "only-keep-debug">, - HelpText<"Clear sections that would not be stripped by --strip-debug. " - "Currently only implemented for COFF.">; - -def keep_file_symbols : Flag<["--"], "keep-file-symbols">, - HelpText<"Do not remove file symbols">; defm dump_section : Eq<"dump-section", "Dump contents of section named
into file ">, @@ -249,9 +196,6 @@ defm prefix_alloc_sections : Eq<"prefix-alloc-sections", "Add to the start of every allocated section name">, MetaVarName<"prefix">; -def version : Flag<["--"], "version">, - HelpText<"Print the version and exit.">; -def V : Flag<["-"], "V">, Alias; defm build_id_link_dir : Eq<"build-id-link-dir", "Set directory for --build-id-link-input and " "--build-id-link-output to ">, @@ -265,10 +209,6 @@ defm build_id_link_output "name derived from hex build ID">, MetaVarName<"suffix">; -def regex - : Flag<["--"], "regex">, - HelpText<"Permit regular expressions in name comparison">; - defm set_start : Eq<"set-start", "Set the start address to . Overrides " "any previous --change-start or --adjust-start values.">, MetaVarName<"addr">; @@ -277,11 +217,12 @@ defm change_start : Eq<"change-start", "Add to the start address. Can be "cumulatively.">, MetaVarName<"incr">; def adjust_start : JoinedOrSeparate<["--"], "adjust-start">, - Alias; + Alias, + HelpText<"Alias for --change-start">; defm add_symbol : Eq<"add-symbol", "Add new symbol to .symtab. Accepted flags: " - "global, local, weak, default, hidden, file, section, object, " + "global, local, weak, default, hidden, protected, file, section, object, " "function, indirect-function. Accepted but ignored for " "compatibility: debug, constructor, warning, indirect, synthetic, " "unique-object, before.">, diff --git a/tools/llvm-objcopy/StripOpts.td b/tools/llvm-objcopy/StripOpts.td index 1d06bb3dfb3..cd02cffae67 100644 --- a/tools/llvm-objcopy/StripOpts.td +++ b/tools/llvm-objcopy/StripOpts.td @@ -1,96 +1,17 @@ -include "llvm/Option/OptParser.td" +include "CommonOpts.td" -multiclass Eq { - def NAME : Separate<["--"], name>; - def NAME #_eq : Joined<["--"], name #"=">, - Alias(NAME)>, - HelpText; -} +def output : JoinedOrSeparate<["-"], "o">, HelpText<"Write output to ">, + MetaVarName<"">; -def help : Flag<["--"], "help">; -def h : Flag<["-"], "h">, Alias; - -def allow_broken_links - : Flag<["--"], "allow-broken-links">, - HelpText<"Allow llvm-strip to remove sections even if it would leave " - "invalid section references. The appropriate sh_link fields " - "will be set to zero.">; - -def enable_deterministic_archives - : Flag<["--"], "enable-deterministic-archives">, - HelpText<"Enable deterministic mode when stripping archives (use zero " - "for UIDs, GIDs, and timestamps).">; -def D : Flag<["-"], "D">, - Alias, - HelpText<"Alias for --enable-deterministic-archives">; - -def disable_deterministic_archives - : Flag<["--"], "disable-deterministic-archives">, - HelpText<"Disable deterministic mode when stripping archives (use real " - "values for UIDs, GIDs, and timestamps).">; -def U : Flag<["-"], "U">, - Alias, - HelpText<"Alias for --disable-deterministic-archives">; - -def output : JoinedOrSeparate<["-"], "o">, HelpText<"Write output to ">; - -def preserve_dates : Flag<["--"], "preserve-dates">, - HelpText<"Preserve access and modification timestamps">; -def p : Flag<["-"], "p">, Alias; - -def strip_all : Flag<["--"], "strip-all">, - HelpText<"Remove non-allocated sections outside segments. " - ".gnu.warning* sections are not removed">; -def s : Flag<["-"], "s">, Alias; +def s : Flag<["-"], "s">, + Alias, + HelpText<"Alias for --strip-all">; def no_strip_all : Flag<["--"], "no-strip-all">, HelpText<"Disable --strip-all">; -def strip_all_gnu : Flag<["--"], "strip-all-gnu">, - HelpText<"Compatible with GNU strip's --strip-all">; -def strip_debug : Flag<["--"], "strip-debug">, - HelpText<"Remove debugging symbols only">; -def d : Flag<["-"], "d">, Alias; -def g : Flag<["-"], "g">, Alias; -def S : Flag<["-"], "S">, Alias; -def strip_unneeded : Flag<["--"], "strip-unneeded">, - HelpText<"Remove all symbols not needed by relocations">; - -defm remove_section : Eq<"remove-section", "Remove
">, - MetaVarName<"section">; -def R : JoinedOrSeparate<["-"], "R">, Alias; - -defm strip_symbol : Eq<"strip-symbol", "Strip ">, - MetaVarName<"symbol">; -def N : JoinedOrSeparate<["-"], "N">, Alias; - -defm keep_section : Eq<"keep-section", "Keep
">, - MetaVarName<"section">; -defm keep_symbol : Eq<"keep-symbol", "Do not remove symbol ">, - MetaVarName<"symbol">; -def keep_file_symbols : Flag<["--"], "keep-file-symbols">, - HelpText<"Do not remove file symbols">; - -def K : JoinedOrSeparate<["-"], "K">, Alias; - -def only_keep_debug - : Flag<["--"], "only-keep-debug">, - HelpText<"Clear sections that would not be stripped by --strip-debug. " - "Currently only implemented for COFF.">; - -def discard_locals : Flag<["--"], "discard-locals">, - HelpText<"Remove compiler-generated local symbols, (e.g. " - "symbols starting with .L)">; -def X : Flag<["-"], "X">, Alias; - -def discard_all - : Flag<["--"], "discard-all">, - HelpText<"Remove all local symbols except file and section symbols">; -def x : Flag<["-"], "x">, Alias; - -def regex - : Flag<["--"], "regex">, - HelpText<"Permit regular expressions in name comparison">; - -def version : Flag<["--"], "version">, - HelpText<"Print the version and exit.">; -def V : Flag<["-"], "V">, Alias; +def d : Flag<["-"], "d">, + Alias, + HelpText<"Alias for --strip-debug">; +def S : Flag<["-"], "S">, + Alias, + HelpText<"Alias for --strip-debug">; diff --git a/tools/llvm-objcopy/llvm-objcopy.cpp b/tools/llvm-objcopy/llvm-objcopy.cpp index e9372176e43..a68210f3fdd 100644 --- a/tools/llvm-objcopy/llvm-objcopy.cpp +++ b/tools/llvm-objcopy/llvm-objcopy.cpp @@ -29,6 +29,7 @@ #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" @@ -36,6 +37,7 @@ #include "llvm/Support/Memory.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" +#include "llvm/Support/StringSaver.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" #include @@ -84,7 +86,7 @@ LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Error E) { ErrorSuccess reportWarning(Error E) { assert(E); - WithColor::warning(errs(), ToolName) << toString(std::move(E)); + WithColor::warning(errs(), ToolName) << toString(std::move(E)) << '\n'; return Error::success(); } @@ -130,16 +132,18 @@ static Error deepWriteArchive(StringRef ArcName, /// The function executeObjcopyOnIHex does the dispatch based on the format /// of the output specified by the command line options. -static Error executeObjcopyOnIHex(const CopyConfig &Config, MemoryBuffer &In, +static Error executeObjcopyOnIHex(CopyConfig &Config, MemoryBuffer &In, Buffer &Out) { // TODO: support output formats other than ELF. + if (Error E = Config.parseELFConfig()) + return E; return elf::executeObjcopyOnIHex(Config, In, Out); } /// The function executeObjcopyOnRawBinary does the dispatch based on the format /// of the output specified by the command line options. -static Error executeObjcopyOnRawBinary(const CopyConfig &Config, - MemoryBuffer &In, Buffer &Out) { +static Error executeObjcopyOnRawBinary(CopyConfig &Config, MemoryBuffer &In, + Buffer &Out) { switch (Config.OutputFormat) { case FileFormat::ELF: // FIXME: Currently, we call elf::executeObjcopyOnRawBinary even if the @@ -148,6 +152,8 @@ static Error executeObjcopyOnRawBinary(const CopyConfig &Config, case FileFormat::Binary: case FileFormat::IHex: case FileFormat::Unspecified: + if (Error E = Config.parseELFConfig()) + return E; return elf::executeObjcopyOnRawBinary(Config, In, Out); } @@ -156,11 +162,13 @@ static Error executeObjcopyOnRawBinary(const CopyConfig &Config, /// The function executeObjcopyOnBinary does the dispatch based on the format /// of the input binary (ELF, MachO or COFF). -static Error executeObjcopyOnBinary(const CopyConfig &Config, - object::Binary &In, Buffer &Out) { - if (auto *ELFBinary = dyn_cast(&In)) +static Error executeObjcopyOnBinary(CopyConfig &Config, object::Binary &In, + Buffer &Out) { + if (auto *ELFBinary = dyn_cast(&In)) { + if (Error E = Config.parseELFConfig()) + return E; return elf::executeObjcopyOnBinary(Config, *ELFBinary, Out); - else if (auto *COFFBinary = dyn_cast(&In)) + } else if (auto *COFFBinary = dyn_cast(&In)) return coff::executeObjcopyOnBinary(Config, *COFFBinary, Out); else if (auto *MachOBinary = dyn_cast(&In)) return macho::executeObjcopyOnBinary(Config, *MachOBinary, Out); @@ -169,8 +177,7 @@ static Error executeObjcopyOnBinary(const CopyConfig &Config, "unsupported object file format"); } -static Error executeObjcopyOnArchive(const CopyConfig &Config, - const Archive &Ar) { +static Error executeObjcopyOnArchive(CopyConfig &Config, const Archive &Ar) { std::vector NewArchiveMembers; Error Err = Error::success(); for (const Archive::Child &Child : Ar.children(Err)) { @@ -246,7 +253,7 @@ static Error restoreStatOnFile(StringRef Filename, /// The function executeObjcopy does the higher level dispatch based on the type /// of input (raw binary, archive or single object file) and takes care of the /// format-agnostic modifications, i.e. preserving dates. -static Error executeObjcopy(const CopyConfig &Config) { +static Error executeObjcopy(CopyConfig &Config) { sys::fs::file_status Stat; if (Config.InputFilename != "-") { if (auto EC = sys::fs::status(Config.InputFilename, Stat)) @@ -255,7 +262,7 @@ static Error executeObjcopy(const CopyConfig &Config) { Stat.permissions(static_cast(0777)); } - typedef Error (*ProcessRawFn)(const CopyConfig &, MemoryBuffer &, Buffer &); + using ProcessRawFn = Error (*)(CopyConfig &, MemoryBuffer &, Buffer &); ProcessRawFn ProcessRaw; switch (Config.InputFormat) { case FileFormat::Binary: @@ -310,15 +317,31 @@ int main(int argc, char **argv) { InitLLVM X(argc, argv); ToolName = argv[0]; bool IsStrip = sys::path::stem(ToolName).contains("strip"); + + // Expand response files. + // TODO: Move these lines, which are copied from lib/Support/CommandLine.cpp, + // into a separate function in the CommandLine library and call that function + // here. This is duplicated code. + SmallVector NewArgv(argv, argv + argc); + BumpPtrAllocator A; + StringSaver Saver(A); + cl::ExpandResponseFiles(Saver, + Triple(sys::getProcessTriple()).isOSWindows() + ? cl::TokenizeWindowsCommandLine + : cl::TokenizeGNUCommandLine, + NewArgv); + + auto Args = makeArrayRef(NewArgv).drop_front(); + Expected DriverConfig = - IsStrip ? parseStripOptions(makeArrayRef(argv + 1, argc), reportWarning) - : parseObjcopyOptions(makeArrayRef(argv + 1, argc)); + IsStrip ? parseStripOptions(Args, reportWarning) + : parseObjcopyOptions(Args, reportWarning); if (!DriverConfig) { logAllUnhandledErrors(DriverConfig.takeError(), WithColor::error(errs(), ToolName)); return 1; } - for (const CopyConfig &CopyConfig : DriverConfig->CopyConfigs) { + for (CopyConfig &CopyConfig : DriverConfig->CopyConfigs) { if (Error E = executeObjcopy(CopyConfig)) { logAllUnhandledErrors(std::move(E), WithColor::error(errs(), ToolName)); return 1; diff --git a/tools/llvm-objdump/COFFDump.cpp b/tools/llvm-objdump/COFFDump.cpp index 1ba0a68902c..60b0f5a3cbd 100644 --- a/tools/llvm-objdump/COFFDump.cpp +++ b/tools/llvm-objdump/COFFDump.cpp @@ -234,15 +234,14 @@ printSEHTable(const COFFObjectFile *Obj, uint32_t TableVA, int Count) { if (Count == 0) return; - const pe32_header *PE32Header; - error(Obj->getPE32Header(PE32Header)); - uint32_t ImageBase = PE32Header->ImageBase; uintptr_t IntPtr = 0; - error(Obj->getVaPtr(TableVA, IntPtr)); + if (std::error_code EC = Obj->getVaPtr(TableVA, IntPtr)) + reportError(errorCodeToError(EC), Obj->getFileName()); + const support::ulittle32_t *P = (const support::ulittle32_t *)IntPtr; outs() << "SEH Table:"; for (int I = 0; I < Count; ++I) - outs() << format(" 0x%x", P[I] + ImageBase); + outs() << format(" 0x%x", P[I] + Obj->getPE32Header()->ImageBase); outs() << "\n\n"; } @@ -268,22 +267,24 @@ static void printTLSDirectoryT(const coff_tls_directory *TLSDir) { } static void printTLSDirectory(const COFFObjectFile *Obj) { - const pe32_header *PE32Header; - error(Obj->getPE32Header(PE32Header)); - - const pe32plus_header *PE32PlusHeader; - error(Obj->getPE32PlusHeader(PE32PlusHeader)); + const pe32_header *PE32Header = Obj->getPE32Header(); + const pe32plus_header *PE32PlusHeader = Obj->getPE32PlusHeader(); // Skip if it's not executable. if (!PE32Header && !PE32PlusHeader) return; const data_directory *DataDir; - error(Obj->getDataDirectory(COFF::TLS_TABLE, DataDir)); - uintptr_t IntPtr = 0; + if (std::error_code EC = Obj->getDataDirectory(COFF::TLS_TABLE, DataDir)) + reportError(errorCodeToError(EC), Obj->getFileName()); + if (DataDir->RelativeVirtualAddress == 0) return; - error(Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr)); + + uintptr_t IntPtr = 0; + if (std::error_code EC = + Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr)) + reportError(errorCodeToError(EC), Obj->getFileName()); if (PE32Header) { auto *TLSDir = reinterpret_cast(IntPtr); @@ -298,9 +299,7 @@ static void printTLSDirectory(const COFFObjectFile *Obj) { static void printLoadConfiguration(const COFFObjectFile *Obj) { // Skip if it's not executable. - const pe32_header *PE32Header; - error(Obj->getPE32Header(PE32Header)); - if (!PE32Header) + if (!Obj->getPE32Header()) return; // Currently only x86 is supported @@ -308,11 +307,18 @@ static void printLoadConfiguration(const COFFObjectFile *Obj) { return; const data_directory *DataDir; - error(Obj->getDataDirectory(COFF::LOAD_CONFIG_TABLE, DataDir)); + + if (std::error_code EC = + Obj->getDataDirectory(COFF::LOAD_CONFIG_TABLE, DataDir)) + reportError(errorCodeToError(EC), Obj->getFileName()); + uintptr_t IntPtr = 0; if (DataDir->RelativeVirtualAddress == 0) return; - error(Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr)); + + if (std::error_code EC = + Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr)) + reportError(errorCodeToError(EC), Obj->getFileName()); auto *LoadConf = reinterpret_cast(IntPtr); outs() << "Load configuration:" @@ -442,8 +448,7 @@ static bool getPDataSection(const COFFObjectFile *Obj, std::vector &Rels, const RuntimeFunction *&RFStart, int &NumRFs) { for (const SectionRef &Section : Obj->sections()) { - StringRef Name; - error(Section.getName(Name)); + StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName()); if (Name != ".pdata") continue; @@ -455,7 +460,9 @@ static bool getPDataSection(const COFFObjectFile *Obj, llvm::sort(Rels, isRelocAddressLess); ArrayRef Contents; - error(Obj->getSectionContents(Pdata, Contents)); + if (Error E = Obj->getSectionContents(Pdata, Contents)) + reportError(std::move(E), Obj->getFileName()); + if (Contents.empty()) continue; @@ -571,10 +578,12 @@ static void printRuntimeFunctionRels(const COFFObjectFile *Obj, ArrayRef XContents; uint64_t UnwindInfoOffset = 0; - error(getSectionContents( - Obj, Rels, SectionOffset + - /*offsetof(RuntimeFunction, UnwindInfoOffset)*/ 8, - XContents, UnwindInfoOffset)); + if (Error E = getSectionContents( + Obj, Rels, + SectionOffset + + /*offsetof(RuntimeFunction, UnwindInfoOffset)*/ 8, + XContents, UnwindInfoOffset)) + reportError(std::move(E), Obj->getFileName()); if (XContents.empty()) return; @@ -650,9 +659,12 @@ void printCOFFSymbolTable(const object::COFFImportFile *i) { void printCOFFSymbolTable(const COFFObjectFile *coff) { for (unsigned SI = 0, SE = coff->getNumberOfSymbols(); SI != SE; ++SI) { Expected Symbol = coff->getSymbol(SI); + if (!Symbol) + reportError(Symbol.takeError(), coff->getFileName()); + StringRef Name; - error(Symbol.takeError()); - error(coff->getSymbolName(*Symbol, Name)); + if (std::error_code EC = coff->getSymbolName(*Symbol, Name)) + reportError(errorCodeToError(EC), coff->getFileName()); outs() << "[" << format("%2d", SI) << "]" << "(sec " << format("%2d", int(Symbol->getSectionNumber())) << ")" @@ -682,7 +694,9 @@ void printCOFFSymbolTable(const COFFObjectFile *coff) { for (unsigned AI = 0, AE = Symbol->getNumberOfAuxSymbols(); AI < AE; ++AI, ++SI) { if (Symbol->isSectionDefinition()) { const coff_aux_section_definition *asd; - error(coff->getAuxSymbol(SI + 1, asd)); + if (std::error_code EC = + coff->getAuxSymbol(SI + 1, asd)) + reportError(errorCodeToError(EC), coff->getFileName()); int32_t AuxNumber = asd->getNumber(Symbol->isBigObj()); @@ -697,7 +711,8 @@ void printCOFFSymbolTable(const COFFObjectFile *coff) { , unsigned(asd->Selection)); } else if (Symbol->isFileRecord()) { const char *FileName; - error(coff->getAuxSymbol(SI + 1, FileName)); + if (std::error_code EC = coff->getAuxSymbol(SI + 1, FileName)) + reportError(errorCodeToError(EC), coff->getFileName()); StringRef Name(FileName, Symbol->getNumberOfAuxSymbols() * coff->getSymbolTableEntrySize()); @@ -707,7 +722,9 @@ void printCOFFSymbolTable(const COFFObjectFile *coff) { break; } else if (Symbol->isWeakExternal()) { const coff_aux_weak_external *awe; - error(coff->getAuxSymbol(SI + 1, awe)); + if (std::error_code EC = + coff->getAuxSymbol(SI + 1, awe)) + reportError(errorCodeToError(EC), coff->getFileName()); outs() << "AUX " << format("indx %d srch %d\n", static_cast(awe->TagIndex), diff --git a/tools/llvm-objdump/ELFDump.cpp b/tools/llvm-objdump/ELFDump.cpp index 9c4d67d0f1b..93d070eee16 100644 --- a/tools/llvm-objdump/ELFDump.cpp +++ b/tools/llvm-objdump/ELFDump.cpp @@ -178,7 +178,7 @@ void printDynamicSection(const ELFFile *Elf, StringRef Filename) { outs() << (Data + Dyn.d_un.d_val) << "\n"; continue; } - warn(toString(StrTabOrErr.takeError())); + reportWarning(toString(StrTabOrErr.takeError()), Filename); consumeError(StrTabOrErr.takeError()); } outs() << format(Fmt, (uint64_t)Dyn.d_un.d_val); diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp index 58ff7be4543..e4684d0f160 100644 --- a/tools/llvm-objdump/MachODump.cpp +++ b/tools/llvm-objdump/MachODump.cpp @@ -236,11 +236,11 @@ struct SymbolSorter { bool operator()(const SymbolRef &A, const SymbolRef &B) { Expected ATypeOrErr = A.getType(); if (!ATypeOrErr) - report_error(ATypeOrErr.takeError(), A.getObject()->getFileName()); + reportError(ATypeOrErr.takeError(), A.getObject()->getFileName()); SymbolRef::Type AType = *ATypeOrErr; Expected BTypeOrErr = B.getType(); if (!BTypeOrErr) - report_error(BTypeOrErr.takeError(), B.getObject()->getFileName()); + reportError(BTypeOrErr.takeError(), B.getObject()->getFileName()); SymbolRef::Type BType = *BTypeOrErr; uint64_t AAddr = (AType != SymbolRef::ST_Function) ? 0 : A.getValue(); uint64_t BAddr = (BType != SymbolRef::ST_Function) ? 0 : B.getValue(); @@ -371,11 +371,8 @@ static void getSectionsAndSymbols(MachOObjectFile *MachOObj, Symbols.push_back(Symbol); } - for (const SectionRef &Section : MachOObj->sections()) { - StringRef SectName; - Section.getName(SectName); + for (const SectionRef &Section : MachOObj->sections()) Sections.push_back(Section); - } bool BaseSegmentAddressSet = false; for (const auto &Command : MachOObj->load_commands()) { @@ -393,10 +390,40 @@ static void getSectionsAndSymbols(MachOObjectFile *MachOObj, BaseSegmentAddressSet = true; BaseSegmentAddress = SLC.vmaddr; } + } else if (Command.C.cmd == MachO::LC_SEGMENT_64) { + MachO::segment_command_64 SLC = MachOObj->getSegment64LoadCommand(Command); + StringRef SegName = SLC.segname; + if (!BaseSegmentAddressSet && SegName != "__PAGEZERO") { + BaseSegmentAddressSet = true; + BaseSegmentAddress = SLC.vmaddr; + } } } } +static bool DumpAndSkipDataInCode(uint64_t PC, const uint8_t *bytes, + DiceTable &Dices, uint64_t &InstSize) { + // Check the data in code table here to see if this is data not an + // instruction to be disassembled. + DiceTable Dice; + Dice.push_back(std::make_pair(PC, DiceRef())); + dice_table_iterator DTI = + std::search(Dices.begin(), Dices.end(), Dice.begin(), Dice.end(), + compareDiceTableEntries); + if (DTI != Dices.end()) { + uint16_t Length; + DTI->second.getLength(Length); + uint16_t Kind; + DTI->second.getKind(Kind); + InstSize = DumpDataInCode(bytes, Length, Kind); + if ((Kind == MachO::DICE_KIND_JUMP_TABLE8) && + (PC == (DTI->first + Length - 1)) && (Length & 1)) + InstSize++; + return true; + } + return false; +} + static void printRelocationTargetName(const MachOObjectFile *O, const MachO::any_relocation_info &RE, raw_string_ostream &Fmt) { @@ -419,13 +446,11 @@ static void printRelocationTargetName(const MachOObjectFile *O, // If we couldn't find a symbol that this relocation refers to, try // to find a section beginning instead. for (const SectionRef &Section : ToolSectionFilter(*O)) { - StringRef Name; uint64_t Addr = Section.getAddress(); if (Addr != Val) continue; - if (std::error_code EC = Section.getName(Name)) - report_error(errorCodeToError(EC), O->getFileName()); - Fmt << Name; + StringRef NameOrErr = unwrapOrError(Section.getName(), O->getFileName()); + Fmt << NameOrErr; return; } @@ -458,10 +483,14 @@ static void printRelocationTargetName(const MachOObjectFile *O, --I; advance(SI, 1); } - if (SI == O->section_end()) + if (SI == O->section_end()) { Fmt << Val << " (?,?)"; - else - SI->getName(S); + } else { + if (Expected NameOrErr = SI->getName()) + S = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + } } Fmt << S; @@ -504,8 +533,8 @@ Error getMachORelocationValueString(const MachOObjectFile *Obj, // NOTE: Scattered relocations don't exist on x86_64. unsigned RType = Obj->getAnyRelocationType(RENext); if (RType != MachO::X86_64_RELOC_UNSIGNED) - report_error(Obj->getFileName(), "Expected X86_64_RELOC_UNSIGNED after " - "X86_64_RELOC_SUBTRACTOR."); + reportError(Obj->getFileName(), "Expected X86_64_RELOC_UNSIGNED after " + "X86_64_RELOC_SUBTRACTOR."); // The X86_64_RELOC_UNSIGNED contains the minuend symbol; // X86_64_RELOC_SUBTRACTOR contains the subtrahend. @@ -553,8 +582,8 @@ Error getMachORelocationValueString(const MachOObjectFile *Obj, unsigned RType = Obj->getAnyRelocationType(RENext); if (RType != MachO::GENERIC_RELOC_PAIR) - report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after " - "GENERIC_RELOC_SECTDIFF."); + reportError(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after " + "GENERIC_RELOC_SECTDIFF."); printRelocationTargetName(Obj, RE, Fmt); Fmt << "-"; @@ -574,8 +603,8 @@ Error getMachORelocationValueString(const MachOObjectFile *Obj, // GENERIC_RELOC_PAIR. unsigned RType = Obj->getAnyRelocationType(RENext); if (RType != MachO::GENERIC_RELOC_PAIR) - report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after " - "GENERIC_RELOC_LOCAL_SECTDIFF."); + reportError(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after " + "GENERIC_RELOC_LOCAL_SECTDIFF."); printRelocationTargetName(Obj, RE, Fmt); Fmt << "-"; @@ -614,8 +643,8 @@ Error getMachORelocationValueString(const MachOObjectFile *Obj, // ARM_RELOC_PAIR. unsigned RType = Obj->getAnyRelocationType(RENext); if (RType != MachO::ARM_RELOC_PAIR) - report_error(Obj->getFileName(), "Expected ARM_RELOC_PAIR after " - "ARM_RELOC_HALF"); + reportError(Obj->getFileName(), "Expected ARM_RELOC_PAIR after " + "ARM_RELOC_HALF"); // NOTE: The half of the target virtual address is stashed in the // address field of the secondary relocation, but we can't reverse @@ -1501,7 +1530,12 @@ static void DumpLiteralPointerSection(MachOObjectFile *O, uint64_t SectSize = Sect->getSize(); StringRef SectName; - Sect->getName(SectName); + Expected SectNameOrErr = Sect->getName(); + if (SectNameOrErr) + SectName = *SectNameOrErr; + else + consumeError(SectNameOrErr.takeError()); + DataRefImpl Ref = Sect->getRawDataRefImpl(); StringRef SegmentName = O->getSectionFinalSegmentName(Ref); outs() << SegmentName << ":" << SectName << ":"; @@ -1713,7 +1747,12 @@ static void DumpSectionContents(StringRef Filename, MachOObjectFile *O, } for (const SectionRef &Section : O->sections()) { StringRef SectName; - Section.getName(SectName); + Expected SecNameOrErr = Section.getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = Section.getRawDataRefImpl(); StringRef SegName = O->getSectionFinalSegmentName(Ref); if ((DumpSegName.empty() || SegName == DumpSegName) && @@ -1809,7 +1848,12 @@ static void DumpInfoPlistSectionContents(StringRef Filename, MachOObjectFile *O) { for (const SectionRef &Section : O->sections()) { StringRef SectName; - Section.getName(SectName); + Expected SecNameOrErr = Section.getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = Section.getRawDataRefImpl(); StringRef SegName = O->getSectionFinalSegmentName(Ref); if (SegName == "__TEXT" && SectName == "__info_plist") { @@ -1901,12 +1945,16 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF, // the error message. if (Disassemble || IndirectSymbols || !FilterSections.empty() || UnwindInfo) if (Error Err = MachOOF->checkSymbolTable()) - report_error(std::move(Err), ArchiveName, FileName, ArchitectureName); + reportError(std::move(Err), FileName, ArchiveName, ArchitectureName); if (DisassembleAll) { for (const SectionRef &Section : MachOOF->sections()) { StringRef SectName; - Section.getName(SectName); + if (Expected NameOrErr = Section.getName()) + SectName = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + if (SectName.equals("__text")) { DataRefImpl Ref = Section.getRawDataRefImpl(); StringRef SegName = MachOOF->getSectionFinalSegmentName(Ref); @@ -2151,7 +2199,7 @@ static void printMachOUniversalHeaders(const object::MachOUniversalBinary *UB, outs() << " offset " << OFA.getOffset(); if (OFA.getOffset() > size) outs() << " (past end of file)"; - if (OFA.getOffset() % (1 << OFA.getAlign()) != 0) + if (OFA.getOffset() % (1ull << OFA.getAlign()) != 0) outs() << " (not aligned on it's alignment (2^" << OFA.getAlign() << ")"; outs() << "\n"; outs() << " size " << OFA.getSize(); @@ -2165,12 +2213,14 @@ static void printMachOUniversalHeaders(const object::MachOUniversalBinary *UB, } static void printArchiveChild(StringRef Filename, const Archive::Child &C, - bool verbose, bool print_offset, + size_t ChildIndex, bool verbose, + bool print_offset, StringRef ArchitectureName = StringRef()) { if (print_offset) outs() << C.getChildOffset() << "\t"; sys::fs::perms Mode = - unwrapOrError(C.getAccessMode(), Filename, C, ArchitectureName); + unwrapOrError(C.getAccessMode(), getFileNameForError(C, ChildIndex), + Filename, ArchitectureName); if (verbose) { // FIXME: this first dash, "-", is for (Mode & S_IFMT) == S_IFREG. // But there is nothing in sys::fs::perms for S_IFMT or S_IFREG. @@ -2188,11 +2238,14 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C, outs() << format("0%o ", Mode); } - outs() << format( - "%3d/%-3d %5" PRId64 " ", - unwrapOrError(C.getUID(), Filename, C, ArchitectureName), - unwrapOrError(C.getGID(), Filename, C, ArchitectureName), - unwrapOrError(C.getRawSize(), Filename, C, ArchitectureName)); + outs() << format("%3d/%-3d %5" PRId64 " ", + unwrapOrError(C.getUID(), getFileNameForError(C, ChildIndex), + Filename, ArchitectureName), + unwrapOrError(C.getGID(), getFileNameForError(C, ChildIndex), + Filename, ArchitectureName), + unwrapOrError(C.getRawSize(), + getFileNameForError(C, ChildIndex), Filename, + ArchitectureName)); StringRef RawLastModified = C.getRawLastModified(); if (verbose) { @@ -2215,14 +2268,17 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C, Expected NameOrErr = C.getName(); if (!NameOrErr) { consumeError(NameOrErr.takeError()); - outs() << unwrapOrError(C.getRawName(), Filename, C, ArchitectureName) + outs() << unwrapOrError(C.getRawName(), + getFileNameForError(C, ChildIndex), Filename, + ArchitectureName) << "\n"; } else { StringRef Name = NameOrErr.get(); outs() << Name << "\n"; } } else { - outs() << unwrapOrError(C.getRawName(), Filename, C, ArchitectureName) + outs() << unwrapOrError(C.getRawName(), getFileNameForError(C, ChildIndex), + Filename, ArchitectureName) << "\n"; } } @@ -2231,11 +2287,13 @@ static void printArchiveHeaders(StringRef Filename, Archive *A, bool verbose, bool print_offset, StringRef ArchitectureName = StringRef()) { Error Err = Error::success(); + size_t I = 0; for (const auto &C : A->children(Err, false)) - printArchiveChild(Filename, C, verbose, print_offset, ArchitectureName); + printArchiveChild(Filename, C, I++, verbose, print_offset, + ArchitectureName); if (Err) - report_error(std::move(Err), StringRef(), Filename, ArchitectureName); + reportError(std::move(Err), Filename, "", ArchitectureName); } static bool ValidateArchFlags() { @@ -2267,7 +2325,7 @@ void parseInputMachO(StringRef Filename) { Expected> BinaryOrErr = createBinary(Filename); if (!BinaryOrErr) { if (Error E = isNotObjectErrorInvalidFileType(BinaryOrErr.takeError())) - report_error(std::move(E), Filename); + reportError(std::move(E), Filename); else outs() << Filename << ": is not an object file\n"; return; @@ -2280,11 +2338,13 @@ void parseInputMachO(StringRef Filename) { printArchiveHeaders(Filename, A, !NonVerbose, ArchiveMemberOffsets); Error Err = Error::success(); + unsigned I = -1; for (auto &C : A->children(Err)) { + ++I; Expected> ChildOrErr = C.getAsBinary(); if (!ChildOrErr) { if (Error E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) - report_error(std::move(E), Filename, C); + reportError(std::move(E), getFileNameForError(C, I), Filename); continue; } if (MachOObjectFile *O = dyn_cast(&*ChildOrErr.get())) { @@ -2294,7 +2354,7 @@ void parseInputMachO(StringRef Filename) { } } if (Err) - report_error(std::move(Err), Filename); + reportError(std::move(Err), Filename); return; } if (MachOUniversalBinary *UB = dyn_cast(&Bin)) { @@ -2346,7 +2406,7 @@ void parseInputMachO(MachOUniversalBinary *UB) { ProcessMachO(Filename, MachOOF, "", ArchitectureName); } else if (Error E = isNotObjectErrorInvalidFileType( ObjOrErr.takeError())) { - report_error(std::move(E), Filename, StringRef(), ArchitectureName); + reportError(std::move(E), "", Filename, ArchitectureName); continue; } else if (Expected> AOrErr = I->getAsArchive()) { @@ -2359,11 +2419,15 @@ void parseInputMachO(MachOUniversalBinary *UB) { printArchiveHeaders(Filename, A.get(), !NonVerbose, ArchiveMemberOffsets, ArchitectureName); Error Err = Error::success(); + unsigned I = -1; for (auto &C : A->children(Err)) { + ++I; Expected> ChildOrErr = C.getAsBinary(); if (!ChildOrErr) { - if (Error E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) - report_error(std::move(E), Filename, C, ArchitectureName); + if (Error E = + isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) + reportError(std::move(E), getFileNameForError(C, I), Filename, + ArchitectureName); continue; } if (MachOObjectFile *O = @@ -2371,12 +2435,13 @@ void parseInputMachO(MachOUniversalBinary *UB) { ProcessMachO(Filename, O, O->getFileName(), ArchitectureName); } if (Err) - report_error(std::move(Err), Filename); + reportError(std::move(Err), Filename); } else { consumeError(AOrErr.takeError()); - error("Mach-O universal file: " + Filename + " for " + - "architecture " + StringRef(I->getArchFlagName()) + - " is not a Mach-O file or an archive file"); + reportError(Filename, + "Mach-O universal file for architecture " + + StringRef(I->getArchFlagName()) + + " is not a Mach-O file or an archive file"); } } } @@ -2406,7 +2471,7 @@ void parseInputMachO(MachOUniversalBinary *UB) { ProcessMachO(Filename, MachOOF); } else if (Error E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) { - report_error(std::move(E), Filename); + reportError(std::move(E), Filename); } else if (Expected> AOrErr = I->getAsArchive()) { std::unique_ptr &A = *AOrErr; @@ -2415,12 +2480,14 @@ void parseInputMachO(MachOUniversalBinary *UB) { printArchiveHeaders(Filename, A.get(), !NonVerbose, ArchiveMemberOffsets); Error Err = Error::success(); + unsigned I = -1; for (auto &C : A->children(Err)) { + ++I; Expected> ChildOrErr = C.getAsBinary(); if (!ChildOrErr) { if (Error E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) - report_error(std::move(E), Filename, C); + reportError(std::move(E), getFileNameForError(C, I), Filename); continue; } if (MachOObjectFile *O = @@ -2428,12 +2495,12 @@ void parseInputMachO(MachOUniversalBinary *UB) { ProcessMachO(Filename, O, O->getFileName()); } if (Err) - report_error(std::move(Err), Filename); + reportError(std::move(Err), Filename); } else { consumeError(AOrErr.takeError()); - error("Mach-O universal file: " + Filename + " for architecture " + - StringRef(I->getArchFlagName()) + - " is not a Mach-O file or an archive file"); + reportError(Filename, "Mach-O universal file for architecture " + + StringRef(I->getArchFlagName()) + + " is not a Mach-O file or an archive file"); } return; } @@ -2455,7 +2522,7 @@ void parseInputMachO(MachOUniversalBinary *UB) { ProcessMachO(Filename, MachOOF, "", ArchitectureName); } else if (Error E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) { - report_error(std::move(E), StringRef(), Filename, ArchitectureName); + reportError(std::move(E), Filename, "", ArchitectureName); } else if (Expected> AOrErr = I->getAsArchive()) { std::unique_ptr &A = *AOrErr; outs() << "Archive : " << Filename; @@ -2466,11 +2533,14 @@ void parseInputMachO(MachOUniversalBinary *UB) { printArchiveHeaders(Filename, A.get(), !NonVerbose, ArchiveMemberOffsets, ArchitectureName); Error Err = Error::success(); + unsigned I = -1; for (auto &C : A->children(Err)) { + ++I; Expected> ChildOrErr = C.getAsBinary(); if (!ChildOrErr) { if (Error E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) - report_error(std::move(E), Filename, C, ArchitectureName); + reportError(std::move(E), getFileNameForError(C, I), Filename, + ArchitectureName); continue; } if (MachOObjectFile *O = @@ -2481,12 +2551,12 @@ void parseInputMachO(MachOUniversalBinary *UB) { } } if (Err) - report_error(std::move(Err), Filename); + reportError(std::move(Err), Filename); } else { consumeError(AOrErr.takeError()); - error("Mach-O universal file: " + Filename + " for architecture " + - StringRef(I->getArchFlagName()) + - " is not a Mach-O file or an archive file"); + reportError(Filename, "Mach-O universal file for architecture " + + StringRef(I->getArchFlagName()) + + " is not a Mach-O file or an archive file"); } } } @@ -3083,7 +3153,7 @@ static void method_reference(struct DisassembleInfo *info, if (strcmp(*ReferenceName, "_objc_msgSend") == 0) { if (info->selector_name != nullptr) { if (info->class_name != nullptr) { - info->method = llvm::make_unique( + info->method = std::make_unique( 5 + strlen(info->class_name) + strlen(info->selector_name)); char *method = info->method.get(); if (method != nullptr) { @@ -3097,7 +3167,7 @@ static void method_reference(struct DisassembleInfo *info, } } else { info->method = - llvm::make_unique(9 + strlen(info->selector_name)); + std::make_unique(9 + strlen(info->selector_name)); char *method = info->method.get(); if (method != nullptr) { if (Arch == Triple::x86_64) @@ -3117,7 +3187,7 @@ static void method_reference(struct DisassembleInfo *info, } else if (strcmp(*ReferenceName, "_objc_msgSendSuper2") == 0) { if (info->selector_name != nullptr) { info->method = - llvm::make_unique(17 + strlen(info->selector_name)); + std::make_unique(17 + strlen(info->selector_name)); char *method = info->method.get(); if (method != nullptr) { if (Arch == Triple::x86_64) @@ -3217,7 +3287,13 @@ static const char *get_pointer_64(uint64_t Address, uint32_t &offset, continue; if (objc_only) { StringRef SectName; - ((*(info->Sections))[SectIdx]).getName(SectName); + Expected SecNameOrErr = + ((*(info->Sections))[SectIdx]).getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = ((*(info->Sections))[SectIdx]).getRawDataRefImpl(); StringRef SegName = info->O->getSectionFinalSegmentName(Ref); if (SegName != "__OBJC" && SectName != "__cstring") @@ -4009,7 +4085,12 @@ static const SectionRef get_section(MachOObjectFile *O, const char *segname, const char *sectname) { for (const SectionRef &Section : O->sections()) { StringRef SectName; - Section.getName(SectName); + Expected SecNameOrErr = Section.getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = Section.getRawDataRefImpl(); StringRef SegName = O->getSectionFinalSegmentName(Ref); if (SegName == segname && SectName == sectname) @@ -4026,7 +4107,12 @@ walk_pointer_list_64(const char *listname, const SectionRef S, return; StringRef SectName; - S.getName(SectName); + Expected SecNameOrErr = S.getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = S.getRawDataRefImpl(); StringRef SegName = O->getSectionFinalSegmentName(Ref); outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; @@ -4075,8 +4161,7 @@ walk_pointer_list_32(const char *listname, const SectionRef S, if (S == SectionRef()) return; - StringRef SectName; - S.getName(SectName); + StringRef SectName = unwrapOrError(S.getName(), O->getFileName()); DataRefImpl Ref = S.getRawDataRefImpl(); StringRef SegName = O->getSectionFinalSegmentName(Ref); outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; @@ -5750,7 +5835,12 @@ static void print_message_refs64(SectionRef S, struct DisassembleInfo *info) { return; StringRef SectName; - S.getName(SectName); + Expected SecNameOrErr = S.getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = S.getRawDataRefImpl(); StringRef SegName = info->O->getSectionFinalSegmentName(Ref); outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; @@ -5813,7 +5903,12 @@ static void print_message_refs32(SectionRef S, struct DisassembleInfo *info) { return; StringRef SectName; - S.getName(SectName); + Expected SecNameOrErr = S.getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = S.getRawDataRefImpl(); StringRef SegName = info->O->getSectionFinalSegmentName(Ref); outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; @@ -5859,7 +5954,12 @@ static void print_image_info64(SectionRef S, struct DisassembleInfo *info) { return; StringRef SectName; - S.getName(SectName); + Expected SecNameOrErr = S.getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = S.getRawDataRefImpl(); StringRef SegName = info->O->getSectionFinalSegmentName(Ref); outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; @@ -5916,7 +6016,12 @@ static void print_image_info32(SectionRef S, struct DisassembleInfo *info) { return; StringRef SectName; - S.getName(SectName); + Expected SecNameOrErr = S.getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = S.getRawDataRefImpl(); StringRef SegName = info->O->getSectionFinalSegmentName(Ref); outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; @@ -5966,7 +6071,12 @@ static void print_image_info(SectionRef S, struct DisassembleInfo *info) { const char *r; StringRef SectName; - S.getName(SectName); + Expected SecNameOrErr = S.getName(); + if (SecNameOrErr) + SectName = *SecNameOrErr; + else + consumeError(SecNameOrErr.takeError()); + DataRefImpl Ref = S.getRawDataRefImpl(); StringRef SegName = info->O->getSectionFinalSegmentName(Ref); outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; @@ -6001,11 +6111,8 @@ static void printObjc2_64bit_MetaData(MachOObjectFile *O, bool verbose) { CreateSymbolAddressMap(O, &AddrMap); std::vector Sections; - for (const SectionRef &Section : O->sections()) { - StringRef SectName; - Section.getName(SectName); + for (const SectionRef &Section : O->sections()) Sections.push_back(Section); - } struct DisassembleInfo info(O, &AddrMap, &Sections, verbose); @@ -6086,11 +6193,8 @@ static void printObjc2_32bit_MetaData(MachOObjectFile *O, bool verbose) { CreateSymbolAddressMap(O, &AddrMap); std::vector Sections; - for (const SectionRef &Section : O->sections()) { - StringRef SectName; - Section.getName(SectName); + for (const SectionRef &Section : O->sections()) Sections.push_back(Section); - } struct DisassembleInfo info(O, &AddrMap, &Sections, verbose); @@ -6184,11 +6288,8 @@ static bool printObjc1_32bit_MetaData(MachOObjectFile *O, bool verbose) { CreateSymbolAddressMap(O, &AddrMap); std::vector Sections; - for (const SectionRef &Section : O->sections()) { - StringRef SectName; - Section.getName(SectName); + for (const SectionRef &Section : O->sections()) Sections.push_back(Section); - } struct DisassembleInfo info(O, &AddrMap, &Sections, verbose); @@ -6345,11 +6446,8 @@ static void DumpProtocolSection(MachOObjectFile *O, const char *sect, CreateSymbolAddressMap(O, &AddrMap); std::vector Sections; - for (const SectionRef &Section : O->sections()) { - StringRef SectName; - Section.getName(SectName); + for (const SectionRef &Section : O->sections()) Sections.push_back(Section); - } struct DisassembleInfo info(O, &AddrMap, &Sections, true); @@ -7203,7 +7301,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, std::vector Sections; std::vector Symbols; SmallVector FoundFns; - uint64_t BaseSegmentAddress; + uint64_t BaseSegmentAddress = 0; getSectionsAndSymbols(MachOOF, Sections, Symbols, FoundFns, BaseSegmentAddress); @@ -7242,10 +7340,24 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, // A separate DSym file path was specified, parse it as a macho file, // get the sections and supply it to the section name parsing machinery. if (!DSYMFile.empty()) { + std::string DSYMPath(DSYMFile); + + // If DSYMPath is a .dSYM directory, append the Mach-O file. + if (llvm::sys::fs::is_directory(DSYMPath) && + llvm::sys::path::extension(DSYMPath) == ".dSYM") { + SmallString<128> ShortName(llvm::sys::path::filename(DSYMPath)); + llvm::sys::path::replace_extension(ShortName, ""); + SmallString<1024> FullPath(DSYMPath); + llvm::sys::path::append(FullPath, "Contents", "Resources", "DWARF", + ShortName); + DSYMPath = FullPath.str(); + } + + // Load the file. ErrorOr> BufOrErr = - MemoryBuffer::getFileOrSTDIN(DSYMFile); + MemoryBuffer::getFileOrSTDIN(DSYMPath); if (std::error_code EC = BufOrErr.getError()) { - report_error(errorCodeToError(EC), DSYMFile); + reportError(errorCodeToError(EC), DSYMPath); return; } @@ -7255,13 +7367,12 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, Expected> BinaryOrErr = createBinary(DSYMBuf.get()->getMemBufferRef()); if (!BinaryOrErr) { - report_error(BinaryOrErr.takeError(), DSYMFile); + reportError(BinaryOrErr.takeError(), DSYMPath); return; } - // We need to keep the Binary elive with the buffer + // We need to keep the Binary alive with the buffer DSYMBinary = std::move(BinaryOrErr.get()); - if (ObjectFile *O = dyn_cast(DSYMBinary.get())) { // this is a Mach-O object file, use it if (MachOObjectFile *MachDSYM = dyn_cast(&*O)) { @@ -7269,7 +7380,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, } else { WithColor::error(errs(), "llvm-objdump") - << DSYMFile << " is not a Mach-O file type.\n"; + << DSYMPath << " is not a Mach-O file type.\n"; return; } } @@ -7289,19 +7400,19 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, Triple T = MachOObjectFile::getArchTriple(CPUType, CPUSubType, nullptr, &ArchFlag); Expected> MachDSYM = - UB->getObjectForArch(ArchFlag); + UB->getMachOObjectForArch(ArchFlag); if (!MachDSYM) { - report_error(MachDSYM.takeError(), DSYMFile); + reportError(MachDSYM.takeError(), DSYMPath); return; } - // We need to keep the Binary elive with the buffer + // We need to keep the Binary alive with the buffer DbgObj = &*MachDSYM.get(); DSYMBinary = std::move(*MachDSYM); } else { WithColor::error(errs(), "llvm-objdump") - << DSYMFile << " is not a Mach-O or Universal file type.\n"; + << DSYMPath << " is not a Mach-O or Universal file type.\n"; return; } } @@ -7314,8 +7425,12 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, outs() << "(" << DisSegName << "," << DisSectName << ") section\n"; for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) { - StringRef SectName; - if (Sections[SectIdx].getName(SectName) || SectName != DisSectName) + Expected SecNameOrErr = Sections[SectIdx].getName(); + if (!SecNameOrErr) { + consumeError(SecNameOrErr.takeError()); + continue; + } + if (*SecNameOrErr != DisSectName) continue; DataRefImpl DR = Sections[SectIdx].getRawDataRefImpl(); @@ -7496,24 +7611,8 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, if (!NoShowRawInsn || Arch == Triple::arm) outs() << "\t"; - // Check the data in code table here to see if this is data not an - // instruction to be disassembled. - DiceTable Dice; - Dice.push_back(std::make_pair(PC, DiceRef())); - dice_table_iterator DTI = - std::search(Dices.begin(), Dices.end(), Dice.begin(), Dice.end(), - compareDiceTableEntries); - if (DTI != Dices.end()) { - uint16_t Length; - DTI->second.getLength(Length); - uint16_t Kind; - DTI->second.getKind(Kind); - Size = DumpDataInCode(Bytes.data() + Index, Length, Kind); - if ((Kind == MachO::DICE_KIND_JUMP_TABLE8) && - (PC == (DTI->first + Length - 1)) && (Length & 1)) - Size++; + if (DumpAndSkipDataInCode(PC, Bytes.data() + Index, Dices, Size)) continue; - } SmallVector AnnotationsBytes; raw_svector_ostream Annotations(AnnotationsBytes); @@ -7588,6 +7687,10 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, MCInst Inst; uint64_t PC = SectAddress + Index; + + if (DumpAndSkipDataInCode(PC, Bytes.data() + Index, Dices, InstSize)) + continue; + SmallVector AnnotationsBytes; raw_svector_ostream Annotations(AnnotationsBytes); if (DisAsm->getInstruction(Inst, InstSize, Bytes.slice(Index), PC, @@ -7724,8 +7827,12 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj, auto Sym = Symbols.upper_bound(Addr); if (Sym == Symbols.begin()) { // The first symbol in the object is after this reference, the best we can - // do is section-relative notation. - RelocSection.getName(Name); + // do is section-relative notation. + if (Expected NameOrErr = RelocSection.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + Addend = Addr - SectionAddr; return; } @@ -7744,7 +7851,11 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj, // There is a symbol before this reference, but it's in a different // section. Probably not helpful to mention it, so use the section name. - RelocSection.getName(Name); + if (Expected NameOrErr = RelocSection.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + Addend = Addr - SectionAddr; } @@ -8109,7 +8220,11 @@ void printMachOUnwindInfo(const MachOObjectFile *Obj) { for (const SectionRef &Section : Obj->sections()) { StringRef SectName; - Section.getName(SectName); + if (Expected NameOrErr = Section.getName()) + SectName = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + if (SectName == "__compact_unwind") printMachOCompactUnwindSection(Obj, Symbols, Section); else if (SectName == "__unwind_info") @@ -10191,7 +10306,7 @@ void printMachOExportsTrie(const object::MachOObjectFile *Obj) { outs() << "\n"; } if (Err) - report_error(std::move(Err), Obj->getFileName()); + reportError(std::move(Err), Obj->getFileName()); } //===----------------------------------------------------------------------===// @@ -10212,7 +10327,7 @@ void printMachORebaseTable(object::MachOObjectFile *Obj) { Address, Entry.typeName().str().c_str()); } if (Err) - report_error(std::move(Err), Obj->getFileName()); + reportError(std::move(Err), Obj->getFileName()); } static StringRef ordinalName(const object::MachOObjectFile *Obj, int Ordinal) { @@ -10264,7 +10379,7 @@ void printMachOBindTable(object::MachOObjectFile *Obj) { << Entry.symbolName() << Attr << "\n"; } if (Err) - report_error(std::move(Err), Obj->getFileName()); + reportError(std::move(Err), Obj->getFileName()); } //===----------------------------------------------------------------------===// @@ -10289,7 +10404,7 @@ void printMachOLazyBindTable(object::MachOObjectFile *Obj) { << Entry.symbolName() << "\n"; } if (Err) - report_error(std::move(Err), Obj->getFileName()); + reportError(std::move(Err), Obj->getFileName()); } //===----------------------------------------------------------------------===// @@ -10321,7 +10436,7 @@ void printMachOWeakBindTable(object::MachOObjectFile *Obj) { << "\n"; } if (Err) - report_error(std::move(Err), Obj->getFileName()); + reportError(std::move(Err), Obj->getFileName()); } // get_dyld_bind_info_symbolname() is used for disassembly and passed an @@ -10331,7 +10446,7 @@ void printMachOWeakBindTable(object::MachOObjectFile *Obj) { static const char *get_dyld_bind_info_symbolname(uint64_t ReferenceValue, struct DisassembleInfo *info) { if (info->bindtable == nullptr) { - info->bindtable = llvm::make_unique(); + info->bindtable = std::make_unique(); Error Err = Error::success(); for (const object::MachOBindEntry &Entry : info->O->bindTable(Err)) { uint64_t Address = Entry.address(); @@ -10340,7 +10455,7 @@ static const char *get_dyld_bind_info_symbolname(uint64_t ReferenceValue, (*info->bindtable)[Address] = name; } if (Err) - report_error(std::move(Err), info->O->getFileName()); + reportError(std::move(Err), info->O->getFileName()); } auto name = info->bindtable->lookup(ReferenceValue); return !name.empty() ? name.data() : nullptr; diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index 58981203c59..34a44b3b7fa 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -51,6 +51,7 @@ #include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/Host.h" #include "llvm/Support/InitLLVM.h" @@ -341,78 +342,84 @@ static StringRef ToolName; typedef std::vector> SectionSymbolsTy; -static bool shouldKeep(object::SectionRef S) { +namespace { +struct FilterResult { + // True if the section should not be skipped. + bool Keep; + + // True if the index counter should be incremented, even if the section should + // be skipped. For example, sections may be skipped if they are not included + // in the --section flag, but we still want those to count toward the section + // count. + bool IncrementIndex; +}; +} // namespace + +static FilterResult checkSectionFilter(object::SectionRef S) { if (FilterSections.empty()) - return true; - StringRef SecName; - std::error_code error = S.getName(SecName); - if (error) - return false; + return {/*Keep=*/true, /*IncrementIndex=*/true}; + + Expected SecNameOrErr = S.getName(); + if (!SecNameOrErr) { + consumeError(SecNameOrErr.takeError()); + return {/*Keep=*/false, /*IncrementIndex=*/false}; + } + StringRef SecName = *SecNameOrErr; + // StringSet does not allow empty key so avoid adding sections with // no name (such as the section with index 0) here. if (!SecName.empty()) FoundSectionSet.insert(SecName); - return is_contained(FilterSections, SecName); + + // Only show the section if it's in the FilterSections list, but always + // increment so the indexing is stable. + return {/*Keep=*/is_contained(FilterSections, SecName), + /*IncrementIndex=*/true}; } -SectionFilter ToolSectionFilter(object::ObjectFile const &O) { - return SectionFilter([](object::SectionRef S) { return shouldKeep(S); }, O); +SectionFilter ToolSectionFilter(object::ObjectFile const &O, uint64_t *Idx) { + // Start at UINT64_MAX so that the first index returned after an increment is + // zero (after the unsigned wrap). + if (Idx) + *Idx = UINT64_MAX; + return SectionFilter( + [Idx](object::SectionRef S) { + FilterResult Result = checkSectionFilter(S); + if (Idx != nullptr && Result.IncrementIndex) + *Idx += 1; + return Result.Keep; + }, + O); } -void error(std::error_code EC) { - if (!EC) - return; - WithColor::error(errs(), ToolName) - << "reading file: " << EC.message() << ".\n"; - errs().flush(); - exit(1); +std::string getFileNameForError(const object::Archive::Child &C, + unsigned Index) { + Expected NameOrErr = C.getName(); + if (NameOrErr) + return NameOrErr.get(); + // If we have an error getting the name then we print the index of the archive + // member. Since we are already in an error state, we just ignore this error. + consumeError(NameOrErr.takeError()); + return ""; } -void error(Error E) { - if (!E) - return; - WithColor::error(errs(), ToolName) << toString(std::move(E)); - exit(1); -} - -LLVM_ATTRIBUTE_NORETURN void error(Twine Message) { - WithColor::error(errs(), ToolName) << Message << ".\n"; - errs().flush(); - exit(1); -} - -void warn(StringRef Message) { - WithColor::warning(errs(), ToolName) << Message << ".\n"; - errs().flush(); -} - -static void warn(Twine Message) { +void reportWarning(Twine Message, StringRef File) { // Output order between errs() and outs() matters especially for archive // files where the output is per member object. outs().flush(); - WithColor::warning(errs(), ToolName) << Message << "\n"; + WithColor::warning(errs(), ToolName) + << "'" << File << "': " << Message << "\n"; errs().flush(); } -LLVM_ATTRIBUTE_NORETURN void report_error(StringRef File, Twine Message) { - WithColor::error(errs(), ToolName) - << "'" << File << "': " << Message << ".\n"; +LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Twine Message) { + WithColor::error(errs(), ToolName) << "'" << File << "': " << Message << "\n"; exit(1); } -LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef File) { - assert(E); - std::string Buf; - raw_string_ostream OS(Buf); - logAllUnhandledErrors(std::move(E), OS); - OS.flush(); - WithColor::error(errs(), ToolName) << "'" << File << "': " << Buf; - exit(1); -} - -LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef ArchiveName, - StringRef FileName, - StringRef ArchitectureName) { +LLVM_ATTRIBUTE_NORETURN void reportError(Error E, StringRef FileName, + StringRef ArchiveName, + StringRef ArchitectureName) { assert(E); WithColor::error(errs(), ToolName); if (ArchiveName != "") @@ -429,18 +436,13 @@ LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef ArchiveName, exit(1); } -LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef ArchiveName, - const object::Archive::Child &C, - StringRef ArchitectureName) { - Expected NameOrErr = C.getName(); - // TODO: if we have a error getting the name then it would be nice to print - // the index of which archive member this is and or its offset in the - // archive instead of "???" as the name. - if (!NameOrErr) { - consumeError(NameOrErr.takeError()); - report_error(std::move(E), ArchiveName, "???", ArchitectureName); - } else - report_error(std::move(E), ArchiveName, NameOrErr.get(), ArchitectureName); +static void reportCmdLineWarning(Twine Message) { + WithColor::warning(errs(), ToolName) << Message << "\n"; +} + +LLVM_ATTRIBUTE_NORETURN static void reportCmdLineError(Twine Message) { + WithColor::error(errs(), ToolName) << Message << "\n"; + exit(1); } static void warnOnNoMatchForSections() { @@ -455,37 +457,29 @@ static void warnOnNoMatchForSections() { // Warn only if no section in FilterSections is matched. for (StringRef S : MissingSections) - warn("section '" + S + "' mentioned in a -j/--section option, but not " - "found in any input file"); + reportCmdLineWarning("section '" + S + + "' mentioned in a -j/--section option, but not " + "found in any input file"); } -static const Target *getTarget(const ObjectFile *Obj = nullptr) { +static const Target *getTarget(const ObjectFile *Obj) { // Figure out the target triple. Triple TheTriple("unknown-unknown-unknown"); if (TripleName.empty()) { - if (Obj) - TheTriple = Obj->makeTriple(); + TheTriple = Obj->makeTriple(); } else { TheTriple.setTriple(Triple::normalize(TripleName)); - - // Use the triple, but also try to combine with ARM build attributes. - if (Obj) { - auto Arch = Obj->getArch(); - if (Arch == Triple::arm || Arch == Triple::armeb) - Obj->setARMSubArch(TheTriple); - } + auto Arch = Obj->getArch(); + if (Arch == Triple::arm || Arch == Triple::armeb) + Obj->setARMSubArch(TheTriple); } // Get the target specific parser. std::string Error; const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple, Error); - if (!TheTarget) { - if (Obj) - report_error(Obj->getFileName(), "can't find target: " + Error); - else - error("can't find target: " + Error); - } + if (!TheTarget) + reportError(Obj->getFileName(), "can't find target: " + Error); // Update the triple name and return the found target. TripleName = TheTriple.getTriple(); @@ -548,17 +542,22 @@ protected: DILineInfo OldLineInfo; const ObjectFile *Obj = nullptr; std::unique_ptr Symbolizer; - // File name to file contents of source + // File name to file contents of source. std::unordered_map> SourceCache; - // Mark the line endings of the cached source + // Mark the line endings of the cached source. std::unordered_map> LineCache; + // Keep track of missing sources. + StringSet<> MissingSources; + // Only emit 'no debug info' warning once. + bool WarnedNoDebugInfo; private: bool cacheSource(const DILineInfo& LineInfoFile); public: SourcePrinter() = default; - SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch) : Obj(Obj) { + SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch) + : Obj(Obj), WarnedNoDebugInfo(false) { symbolize::LLVMSymbolizer::Options SymbolizerOpts; SymbolizerOpts.PrintFunctions = DILineInfoSpecifier::FunctionNameKind::None; SymbolizerOpts.Demangle = false; @@ -568,6 +567,7 @@ public: virtual ~SourcePrinter() = default; virtual void printSourceLine(raw_ostream &OS, object::SectionedAddress Address, + StringRef ObjectFilename, StringRef Delimiter = "; "); }; @@ -577,8 +577,12 @@ bool SourcePrinter::cacheSource(const DILineInfo &LineInfo) { Buffer = MemoryBuffer::getMemBuffer(*LineInfo.Source); } else { auto BufferOrError = MemoryBuffer::getFile(LineInfo.FileName); - if (!BufferOrError) + if (!BufferOrError) { + if (MissingSources.insert(LineInfo.FileName).second) + reportWarning("failed to find source " + LineInfo.FileName, + Obj->getFileName()); return false; + } Buffer = std::move(*BufferOrError); } // Chomp the file to get lines @@ -599,20 +603,33 @@ bool SourcePrinter::cacheSource(const DILineInfo &LineInfo) { void SourcePrinter::printSourceLine(raw_ostream &OS, object::SectionedAddress Address, + StringRef ObjectFilename, StringRef Delimiter) { if (!Symbolizer) return; DILineInfo LineInfo = DILineInfo(); auto ExpectedLineInfo = Symbolizer->symbolizeCode(*Obj, Address); + std::string ErrorMessage; if (!ExpectedLineInfo) - consumeError(ExpectedLineInfo.takeError()); + ErrorMessage = toString(ExpectedLineInfo.takeError()); else LineInfo = *ExpectedLineInfo; - if ((LineInfo.FileName == "") || LineInfo.Line == 0 || - ((OldLineInfo.Line == LineInfo.Line) && - (OldLineInfo.FileName == LineInfo.FileName))) + if (LineInfo.FileName == DILineInfo::BadString) { + if (!WarnedNoDebugInfo) { + std::string Warning = + "failed to parse debug information for " + ObjectFilename.str(); + if (!ErrorMessage.empty()) + Warning += ": " + ErrorMessage; + reportWarning(Warning, ObjectFilename); + WarnedNoDebugInfo = true; + } + return; + } + + if (LineInfo.Line == 0 || ((OldLineInfo.Line == LineInfo.Line) && + (OldLineInfo.FileName == LineInfo.FileName))) return; if (PrintLines) @@ -623,8 +640,14 @@ void SourcePrinter::printSourceLine(raw_ostream &OS, return; auto LineBuffer = LineCache.find(LineInfo.FileName); if (LineBuffer != LineCache.end()) { - if (LineInfo.Line > LineBuffer->second.size()) + if (LineInfo.Line > LineBuffer->second.size()) { + reportWarning( + formatv( + "debug info line number {0} exceeds the number of lines in {1}", + LineInfo.Line, LineInfo.FileName), + ObjectFilename); return; + } // Vector begins at 0, line numbers are non-zero OS << Delimiter << LineBuffer->second[LineInfo.Line - 1] << '\n'; } @@ -646,13 +669,14 @@ static bool hasMappingSymbols(const ObjectFile *Obj) { return isArmElf(Obj) || isAArch64Elf(Obj); } -static void printRelocation(const RelocationRef &Rel, uint64_t Address, - bool Is64Bits) { +static void printRelocation(StringRef FileName, const RelocationRef &Rel, + uint64_t Address, bool Is64Bits) { StringRef Fmt = Is64Bits ? "\t\t%016" PRIx64 ": " : "\t\t\t%08" PRIx64 ": "; SmallString<16> Name; SmallString<32> Val; Rel.getTypeName(Name); - error(getRelocationValueString(Rel, Val)); + if (Error E = getRelocationValueString(Rel, Val)) + reportError(std::move(E), FileName); outs() << format(Fmt.data(), Address) << Name << "\t" << Val << "\n"; } @@ -663,29 +687,25 @@ public: ArrayRef Bytes, object::SectionedAddress Address, raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, - SourcePrinter *SP, + SourcePrinter *SP, StringRef ObjectFilename, std::vector *Rels = nullptr) { if (SP && (PrintSource || PrintLines)) - SP->printSourceLine(OS, Address); + SP->printSourceLine(OS, Address, ObjectFilename); - { - formatted_raw_ostream FOS(OS); - if (!NoLeadingAddr) - FOS << format("%8" PRIx64 ":", Address.Address); - if (!NoShowRawInsn) { - FOS << ' '; - dumpBytes(Bytes, FOS); - } - FOS.flush(); - // The output of printInst starts with a tab. Print some spaces so that - // the tab has 1 column and advances to the target tab stop. - unsigned TabStop = NoShowRawInsn ? 16 : 40; - unsigned Column = FOS.getColumn(); - FOS.indent(Column < TabStop - 1 ? TabStop - 1 - Column : 7 - Column % 8); - - // The dtor calls flush() to ensure the indent comes before printInst(). + size_t Start = OS.tell(); + if (!NoLeadingAddr) + OS << format("%8" PRIx64 ":", Address.Address); + if (!NoShowRawInsn) { + OS << ' '; + dumpBytes(Bytes, OS); } + // The output of printInst starts with a tab. Print some spaces so that + // the tab has 1 column and advances to the target tab stop. + unsigned TabStop = NoShowRawInsn ? 16 : 40; + unsigned Column = OS.tell() - Start; + OS.indent(Column < TabStop - 1 ? TabStop - 1 - Column : 7 - Column % 8); + if (MI) IP.printInst(MI, OS, "", STI); else @@ -711,9 +731,10 @@ public: void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef Bytes, object::SectionedAddress Address, raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, + StringRef ObjectFilename, std::vector *Rels) override { if (SP && (PrintSource || PrintLines)) - SP->printSourceLine(OS, Address, ""); + SP->printSourceLine(OS, Address, ObjectFilename, ""); if (!MI) { printLead(Bytes, Address.Address, OS); OS << " "; @@ -739,7 +760,7 @@ public: auto PrintReloc = [&]() -> void { while ((RelCur != RelEnd) && (RelCur->getOffset() <= Address.Address)) { if (RelCur->getOffset() == Address.Address) { - printRelocation(*RelCur, Address.Address, false); + printRelocation(ObjectFilename, *RelCur, Address.Address, false); return; } ++RelCur; @@ -750,7 +771,7 @@ public: OS << Separator; Separator = "\n"; if (SP && (PrintSource || PrintLines)) - SP->printSourceLine(OS, Address, ""); + SP->printSourceLine(OS, Address, ObjectFilename, ""); printLead(Bytes, Address.Address, OS); OS << Preamble; Preamble = " "; @@ -780,9 +801,10 @@ public: void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef Bytes, object::SectionedAddress Address, raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, + StringRef ObjectFilename, std::vector *Rels) override { if (SP && (PrintSource || PrintLines)) - SP->printSourceLine(OS, Address); + SP->printSourceLine(OS, Address, ObjectFilename); if (MI) { SmallString<40> InstStr; @@ -831,9 +853,10 @@ public: void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef Bytes, object::SectionedAddress Address, raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, + StringRef ObjectFilename, std::vector *Rels) override { if (SP && (PrintSource || PrintLines)) - SP->printSourceLine(OS, Address); + SP->printSourceLine(OS, Address, ObjectFilename); if (!NoLeadingAddr) OS << format("%8" PRId64 ":", Address.Address / 8); if (!NoShowRawInsn) { @@ -924,10 +947,12 @@ static void addPltEntries(const ObjectFile *Obj, StringSaver &Saver) { Optional Plt = None; for (const SectionRef &Section : Obj->sections()) { - StringRef Name; - if (Section.getName(Name)) + Expected SecNameOrErr = Section.getName(); + if (!SecNameOrErr) { + consumeError(SecNameOrErr.takeError()); continue; - if (Name == ".plt") + } + if (*SecNameOrErr == ".plt") Plt = Section; } if (!Plt) @@ -968,9 +993,18 @@ static size_t countSkippableZeroBytes(ArrayRef Buf) { static std::map> getRelocsMap(object::ObjectFile const &Obj) { std::map> Ret; + uint64_t I = (uint64_t)-1; for (SectionRef Sec : Obj.sections()) { - section_iterator Relocated = Sec.getRelocatedSection(); - if (Relocated == Obj.section_end() || !shouldKeep(*Relocated)) + ++I; + Expected RelocatedOrErr = Sec.getRelocatedSection(); + if (!RelocatedOrErr) + reportError(Obj.getFileName(), + "section (" + Twine(I) + + "): failed to get a relocated section: " + + toString(RelocatedOrErr.takeError())); + + section_iterator Relocated = *RelocatedOrErr; + if (Relocated == Obj.section_end() || !checkSectionFilter(*Relocated).Keep) continue; std::vector &V = Ret[*Relocated]; for (const RelocationRef &R : Sec.relocations()) @@ -1137,11 +1171,14 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, if (const auto *COFFObj = dyn_cast(Obj)) { for (const auto &ExportEntry : COFFObj->export_directories()) { StringRef Name; - error(ExportEntry.getSymbolName(Name)); + if (std::error_code EC = ExportEntry.getSymbolName(Name)) + reportError(errorCodeToError(EC), Obj->getFileName()); if (Name.empty()) continue; + uint32_t RVA; - error(ExportEntry.getExportRVA(RVA)); + if (std::error_code EC = ExportEntry.getExportRVA(RVA)) + reportError(errorCodeToError(EC), Obj->getFileName()); uint64_t VA = COFFObj->getImageBase() + RVA; auto Sec = partition_point( @@ -1210,9 +1247,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, DataRefImpl DR = Section.getRawDataRefImpl(); SegmentName = MachO->getSectionFinalSegmentName(DR); } - StringRef SectionName; - error(Section.getName(SectionName)); + StringRef SectionName = unwrapOrError(Section.getName(), Obj->getFileName()); // If the section has no symbol at the start, just insert a dummy one. if (Symbols.empty() || std::get<0>(Symbols[0]) != 0) { Symbols.insert( @@ -1381,10 +1417,10 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, if (Size == 0) Size = 1; - PIP.printInst( - *IP, Disassembled ? &Inst : nullptr, Bytes.slice(Index, Size), - {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, outs(), - "", *STI, &SP, &Rels); + PIP.printInst(*IP, Disassembled ? &Inst : nullptr, + Bytes.slice(Index, Size), + {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, + outs(), "", *STI, &SP, Obj->getFileName(), &Rels); outs() << CommentStream.str(); Comments.clear(); @@ -1470,7 +1506,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, Offset += AdjustVMA; } - printRelocation(*RelCur, SectionAddr + Offset, Is64Bits); + printRelocation(Obj->getFileName(), *RelCur, SectionAddr + Offset, + Is64Bits); ++RelCur; } } @@ -1482,7 +1519,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, StringSet<> MissingDisasmFuncsSet = set_difference(DisasmFuncsSet, FoundDisasmFuncsSet); for (StringRef MissingDisasmFunc : MissingDisasmFuncsSet.keys()) - warn("failed to disassemble missing function " + MissingDisasmFunc); + reportWarning("failed to disassemble missing function " + MissingDisasmFunc, + FileName); } static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { @@ -1497,24 +1535,24 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { std::unique_ptr MRI( TheTarget->createMCRegInfo(TripleName)); if (!MRI) - report_error(Obj->getFileName(), - "no register info for target " + TripleName); + reportError(Obj->getFileName(), + "no register info for target " + TripleName); // Set up disassembler. std::unique_ptr AsmInfo( TheTarget->createMCAsmInfo(*MRI, TripleName)); if (!AsmInfo) - report_error(Obj->getFileName(), - "no assembly info for target " + TripleName); + reportError(Obj->getFileName(), + "no assembly info for target " + TripleName); std::unique_ptr STI( TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString())); if (!STI) - report_error(Obj->getFileName(), - "no subtarget info for target " + TripleName); + reportError(Obj->getFileName(), + "no subtarget info for target " + TripleName); std::unique_ptr MII(TheTarget->createMCInstrInfo()); if (!MII) - report_error(Obj->getFileName(), - "no instruction info for target " + TripleName); + reportError(Obj->getFileName(), + "no instruction info for target " + TripleName); MCObjectFileInfo MOFI; MCContext Ctx(AsmInfo.get(), MRI.get(), &MOFI); // FIXME: for now initialize MCObjectFileInfo with default values @@ -1523,8 +1561,7 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { std::unique_ptr DisAsm( TheTarget->createMCDisassembler(*STI, Ctx)); if (!DisAsm) - report_error(Obj->getFileName(), - "no disassembler for target " + TripleName); + reportError(Obj->getFileName(), "no disassembler for target " + TripleName); // If we have an ARM object file, we need a second disassembler, because // ARM CPUs have two different instruction sets: ARM mode, and Thumb mode. @@ -1549,8 +1586,8 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { std::unique_ptr IP(TheTarget->createMCInstPrinter( Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI)); if (!IP) - report_error(Obj->getFileName(), - "no instruction printer for target " + TripleName); + reportError(Obj->getFileName(), + "no instruction printer for target " + TripleName); IP->setPrintImmHex(PrintImmHex); PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName)); @@ -1558,7 +1595,8 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { for (StringRef Opt : DisassemblerOptions) if (!IP->applyTargetSpecificCLOption(Opt)) - error("Unrecognized disassembler option: " + Opt); + reportError(Obj->getFileName(), + "Unrecognized disassembler option: " + Opt); disassembleObject(TheTarget, Obj, Ctx, DisAsm.get(), SecondaryDisAsm.get(), MIA.get(), IP.get(), STI.get(), SecondarySTI.get(), PIP, @@ -1577,16 +1615,21 @@ void printRelocations(const ObjectFile *Obj) { // sections. Usually, there is an only one relocation section for // each relocated section. MapVector> SecToRelSec; - for (const SectionRef &Section : ToolSectionFilter(*Obj)) { + uint64_t Ndx; + for (const SectionRef &Section : ToolSectionFilter(*Obj, &Ndx)) { if (Section.relocation_begin() == Section.relocation_end()) continue; - const SectionRef TargetSec = *Section.getRelocatedSection(); - SecToRelSec[TargetSec].push_back(Section); + Expected SecOrErr = Section.getRelocatedSection(); + if (!SecOrErr) + reportError(Obj->getFileName(), + "section (" + Twine(Ndx) + + "): unable to get a relocation target: " + + toString(SecOrErr.takeError())); + SecToRelSec[**SecOrErr].push_back(Section); } for (std::pair> &P : SecToRelSec) { - StringRef SecName; - error(P.first.getName(SecName)); + StringRef SecName = unwrapOrError(P.first.getName(), Obj->getFileName()); outs() << "RELOCATION RECORDS FOR [" << SecName << "]:\n"; for (SectionRef Section : P.second) { @@ -1597,7 +1640,9 @@ void printRelocations(const ObjectFile *Obj) { if (Address < StartAddress || Address > StopAddress || getHidden(Reloc)) continue; Reloc.getTypeName(RelocName); - error(getRelocationValueString(Reloc, ValueStr)); + if (Error E = getRelocationValueString(Reloc, ValueStr)) + reportError(std::move(E), Obj->getFileName()); + outs() << format(Fmt.data(), Address) << " " << RelocName << " " << ValueStr << "\n"; } @@ -1613,7 +1658,7 @@ void printDynamicRelocations(const ObjectFile *Obj) { const auto *Elf = dyn_cast(Obj); if (!Elf || Elf->getEType() != ELF::ET_DYN) { - error("not a dynamic object"); + reportError(Obj->getFileName(), "not a dynamic object"); return; } @@ -1629,7 +1674,8 @@ void printDynamicRelocations(const ObjectFile *Obj) { SmallString<32> RelocName; SmallString<32> ValueStr; Reloc.getTypeName(RelocName); - error(getRelocationValueString(Reloc, ValueStr)); + if (Error E = getRelocationValueString(Reloc, ValueStr)) + reportError(std::move(E), Obj->getFileName()); outs() << format(Fmt.data(), Address) << " " << RelocName << " " << ValueStr << "\n"; } @@ -1647,47 +1693,64 @@ static bool shouldDisplayLMA(const ObjectFile *Obj) { return ShowLMA; } +static size_t getMaxSectionNameWidth(const ObjectFile *Obj) { + // Default column width for names is 13 even if no names are that long. + size_t MaxWidth = 13; + for (const SectionRef &Section : ToolSectionFilter(*Obj)) { + StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName()); + MaxWidth = std::max(MaxWidth, Name.size()); + } + return MaxWidth; +} + void printSectionHeaders(const ObjectFile *Obj) { + size_t NameWidth = getMaxSectionNameWidth(Obj); + size_t AddressWidth = 2 * Obj->getBytesInAddress(); bool HasLMAColumn = shouldDisplayLMA(Obj); if (HasLMAColumn) outs() << "Sections:\n" - "Idx Name Size VMA LMA " - "Type\n"; + "Idx " + << left_justify("Name", NameWidth) << " Size " + << left_justify("VMA", AddressWidth) << " " + << left_justify("LMA", AddressWidth) << " Type\n"; else outs() << "Sections:\n" - "Idx Name Size VMA Type\n"; + "Idx " + << left_justify("Name", NameWidth) << " Size " + << left_justify("VMA", AddressWidth) << " Type\n"; - for (const SectionRef &Section : ToolSectionFilter(*Obj)) { - StringRef Name; - error(Section.getName(Name)); + uint64_t Idx; + for (const SectionRef &Section : ToolSectionFilter(*Obj, &Idx)) { + StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName()); uint64_t VMA = Section.getAddress(); if (shouldAdjustVA(Section)) VMA += AdjustVMA; uint64_t Size = Section.getSize(); - bool Text = Section.isText(); - bool Data = Section.isData(); - bool BSS = Section.isBSS(); - std::string Type = (std::string(Text ? "TEXT " : "") + - (Data ? "DATA " : "") + (BSS ? "BSS" : "")); + + std::string Type = Section.isText() ? "TEXT" : ""; + if (Section.isData()) + Type += Type.empty() ? "DATA" : " DATA"; + if (Section.isBSS()) + Type += Type.empty() ? "BSS" : " BSS"; if (HasLMAColumn) - outs() << format("%3d %-13s %08" PRIx64 " %016" PRIx64 " %016" PRIx64 - " %s\n", - (unsigned)Section.getIndex(), Name.str().c_str(), Size, - VMA, getELFSectionLMA(Section), Type.c_str()); + outs() << format("%3" PRIu64 " %-*s %08" PRIx64 " ", Idx, NameWidth, + Name.str().c_str(), Size) + << format_hex_no_prefix(VMA, AddressWidth) << " " + << format_hex_no_prefix(getELFSectionLMA(Section), AddressWidth) + << " " << Type << "\n"; else - outs() << format("%3d %-13s %08" PRIx64 " %016" PRIx64 " %s\n", - (unsigned)Section.getIndex(), Name.str().c_str(), Size, - VMA, Type.c_str()); + outs() << format("%3" PRIu64 " %-*s %08" PRIx64 " ", Idx, NameWidth, + Name.str().c_str(), Size) + << format_hex_no_prefix(VMA, AddressWidth) << " " << Type << "\n"; } outs() << "\n"; } void printSectionContents(const ObjectFile *Obj) { for (const SectionRef &Section : ToolSectionFilter(*Obj)) { - StringRef Name; - error(Section.getName(Name)); + StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName()); uint64_t BaseAddr = Section.getAddress(); uint64_t Size = Section.getSize(); if (!Size) @@ -1741,21 +1804,26 @@ void printSymbolTable(const ObjectFile *O, StringRef ArchiveName, const StringRef FileName = O->getFileName(); for (auto I = O->symbol_begin(), E = O->symbol_end(); I != E; ++I) { const SymbolRef &Symbol = *I; - uint64_t Address = unwrapOrError(Symbol.getAddress(), ArchiveName, FileName, + uint64_t Address = unwrapOrError(Symbol.getAddress(), FileName, ArchiveName, ArchitectureName); if ((Address < StartAddress) || (Address > StopAddress)) continue; - SymbolRef::Type Type = unwrapOrError(Symbol.getType(), ArchiveName, - FileName, ArchitectureName); + SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName, + ArchiveName, ArchitectureName); uint32_t Flags = Symbol.getFlags(); - section_iterator Section = unwrapOrError(Symbol.getSection(), ArchiveName, - FileName, ArchitectureName); + section_iterator Section = unwrapOrError(Symbol.getSection(), FileName, + ArchiveName, ArchitectureName); StringRef Name; - if (Type == SymbolRef::ST_Debug && Section != O->section_end()) - Section->getName(Name); - else - Name = unwrapOrError(Symbol.getName(), ArchiveName, FileName, + if (Type == SymbolRef::ST_Debug && Section != O->section_end()) { + if (Expected NameOrErr = Section->getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + + } else { + Name = unwrapOrError(Symbol.getName(), FileName, ArchiveName, ArchitectureName); + } bool Global = Flags & SymbolRef::SF_Global; bool Weak = Flags & SymbolRef::SF_Weak; @@ -1801,8 +1869,8 @@ void printSymbolTable(const ObjectFile *O, StringRef ArchiveName, StringRef SegmentName = MachO->getSectionFinalSegmentName(DR); outs() << SegmentName << ","; } - StringRef SectionName; - error(Section->getName(SectionName)); + StringRef SectionName = + unwrapOrError(Section->getName(), O->getFileName()); outs() << SectionName; } @@ -1875,7 +1943,11 @@ void printRawClangAST(const ObjectFile *Obj) { Optional ClangASTSection; for (auto Sec : ToolSectionFilter(*Obj)) { StringRef Name; - Sec.getName(Name); + if (Expected NameOrErr = Sec.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + if (Name == ClangASTSectionName) { ClangASTSection = Sec; break; @@ -1907,7 +1979,11 @@ static void printFaultMaps(const ObjectFile *Obj) { for (auto Sec : ToolSectionFilter(*Obj)) { StringRef Name; - Sec.getName(Name); + if (Expected NameOrErr = Sec.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + if (Name == FaultMapSectionName) { FaultMapSection = Sec; break; @@ -1946,12 +2022,12 @@ static void printPrivateFileHeaders(const ObjectFile *O, bool OnlyFirst) { printMachOLoadCommands(O); return; } - report_error(O->getFileName(), "Invalid/Unsupported object file format"); + reportError(O->getFileName(), "Invalid/Unsupported object file format"); } static void printFileHeaders(const ObjectFile *O) { if (!O->isELF() && !O->isCOFF()) - report_error(O->getFileName(), "Invalid/Unsupported object file format"); + reportError(O->getFileName(), "Invalid/Unsupported object file format"); Triple::ArchType AT = O->getArch(); outs() << "architecture: " << Triple::getArchTypeName(AT) << "\n"; @@ -2010,6 +2086,43 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C) { outs() << Name << "\n"; } +// For ELF only now. +static bool shouldWarnForInvalidStartStopAddress(ObjectFile *Obj) { + if (const auto *Elf = dyn_cast(Obj)) { + if (Elf->getEType() != ELF::ET_REL) + return true; + } + return false; +} + +static void checkForInvalidStartStopAddress(ObjectFile *Obj, + uint64_t Start, uint64_t Stop) { + if (!shouldWarnForInvalidStartStopAddress(Obj)) + return; + + for (const SectionRef &Section : Obj->sections()) + if (ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC) { + uint64_t BaseAddr = Section.getAddress(); + uint64_t Size = Section.getSize(); + if ((Start < BaseAddr + Size) && Stop > BaseAddr) + return; + } + + if (StartAddress.getNumOccurrences() == 0) + reportWarning("no section has address less than 0x" + + Twine::utohexstr(Stop) + " specified by --stop-address", + Obj->getFileName()); + else if (StopAddress.getNumOccurrences() == 0) + reportWarning("no section has address greater than or equal to 0x" + + Twine::utohexstr(Start) + " specified by --start-address", + Obj->getFileName()); + else + reportWarning("no section overlaps the range [0x" + + Twine::utohexstr(Start) + ",0x" + Twine::utohexstr(Stop) + + ") specified by --start-address/--stop-address", + Obj->getFileName()); +} + static void dumpObject(ObjectFile *O, const Archive *A = nullptr, const Archive::Child *C = nullptr) { // Avoid other output when using a raw option. @@ -2022,27 +2135,40 @@ static void dumpObject(ObjectFile *O, const Archive *A = nullptr, outs() << ":\tfile format " << O->getFileFormatName() << "\n\n"; } + if (StartAddress.getNumOccurrences() || StopAddress.getNumOccurrences()) + checkForInvalidStartStopAddress(O, StartAddress, StopAddress); + + // Note: the order here matches GNU objdump for compatability. StringRef ArchiveName = A ? A->getFileName() : ""; - if (FileHeaders) - printFileHeaders(O); if (ArchiveHeaders && !MachOOpt && C) printArchiveChild(ArchiveName, *C); - if (Disassemble) - disassembleObject(O, Relocations); + if (FileHeaders) + printFileHeaders(O); + if (PrivateHeaders || FirstPrivateHeader) + printPrivateFileHeaders(O, FirstPrivateHeader); + if (SectionHeaders) + printSectionHeaders(O); + if (SymbolTable) + printSymbolTable(O, ArchiveName); + if (DwarfDumpType != DIDT_Null) { + std::unique_ptr DICtx = DWARFContext::create(*O); + // Dump the complete DWARF structure. + DIDumpOptions DumpOpts; + DumpOpts.DumpType = DwarfDumpType; + DICtx->dump(outs(), DumpOpts); + } if (Relocations && !Disassemble) printRelocations(O); if (DynamicRelocations) printDynamicRelocations(O); - if (SectionHeaders) - printSectionHeaders(O); if (SectionContents) printSectionContents(O); - if (SymbolTable) - printSymbolTable(O, ArchiveName); + if (Disassemble) + disassembleObject(O, Relocations); if (UnwindInfo) printUnwindInfo(O); - if (PrivateHeaders || FirstPrivateHeader) - printPrivateFileHeaders(O, FirstPrivateHeader); + + // Mach-O specific options: if (ExportsTrie) printExportsTrie(O); if (Rebase) @@ -2053,17 +2179,12 @@ static void dumpObject(ObjectFile *O, const Archive *A = nullptr, printLazyBindTable(O); if (WeakBind) printWeakBindTable(O); + + // Other special sections: if (RawClangAST) printRawClangAST(O); if (FaultMapSection) printFaultMaps(O); - if (DwarfDumpType != DIDT_Null) { - std::unique_ptr DICtx = DWARFContext::create(*O); - // Dump the complete DWARF structure. - DIDumpOptions DumpOpts; - DumpOpts.DumpType = DwarfDumpType; - DICtx->dump(outs(), DumpOpts); - } } static void dumpObject(const COFFImportFile *I, const Archive *A, @@ -2086,11 +2207,13 @@ static void dumpObject(const COFFImportFile *I, const Archive *A, /// Dump each object file in \a a; static void dumpArchive(const Archive *A) { Error Err = Error::success(); + unsigned I = -1; for (auto &C : A->children(Err)) { + ++I; Expected> ChildOrErr = C.getAsBinary(); if (!ChildOrErr) { if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) - report_error(std::move(E), A->getFileName(), C); + reportError(std::move(E), getFileNameForError(C, I), A->getFileName()); continue; } if (ObjectFile *O = dyn_cast(&*ChildOrErr.get())) @@ -2098,11 +2221,11 @@ static void dumpArchive(const Archive *A) { else if (COFFImportFile *I = dyn_cast(&*ChildOrErr.get())) dumpObject(I, A, &C); else - report_error(errorCodeToError(object_error::invalid_file_type), - A->getFileName()); + reportError(errorCodeToError(object_error::invalid_file_type), + A->getFileName()); } if (Err) - report_error(std::move(Err), A->getFileName()); + reportError(std::move(Err), A->getFileName()); } /// Open file and figure out how to dump it. @@ -2126,7 +2249,7 @@ static void dumpInput(StringRef file) { else if (MachOUniversalBinary *UB = dyn_cast(&Binary)) parseInputMachO(UB); else - report_error(errorCodeToError(object_error::invalid_file_type), file); + reportError(errorCodeToError(object_error::invalid_file_type), file); } } // namespace llvm @@ -2147,7 +2270,7 @@ int main(int argc, char **argv) { cl::ParseCommandLineOptions(argc, argv, "llvm object file dumper\n"); if (StartAddress >= StopAddress) - error("start address should be less than stop address"); + reportCmdLineError("start address should be less than stop address"); ToolName = argv[0]; diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h index e58d4a05c2e..43ce02ae0bc 100644 --- a/tools/llvm-objdump/llvm-objdump.h +++ b/tools/llvm-objdump/llvm-objdump.h @@ -31,6 +31,8 @@ extern cl::opt Demangle; typedef std::function FilterPredicate; +/// A filtered iterator for SectionRefs that skips sections based on some given +/// predicate. class SectionFilterIterator { public: SectionFilterIterator(FilterPredicate P, @@ -60,6 +62,8 @@ private: llvm::object::section_iterator End; }; +/// Creates an iterator range of SectionFilterIterators for a given Object and +/// predicate. class SectionFilter { public: SectionFilter(FilterPredicate P, llvm::object::ObjectFile const &O) @@ -79,7 +83,15 @@ private: }; // Various helper functions. -SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O); + +/// Creates a SectionFilter with a standard predicate that conditionally skips +/// sections when the --section objdump flag is provided. +/// +/// Idx is an optional output parameter that keeps track of which section index +/// this is. This may be different than the actual section number, as some +/// sections may be filtered (e.g. symbol tables). +SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O, + uint64_t *Idx = nullptr); Error getELFRelocationValueString(const object::ELFObjectFileBase *Obj, const object::RelocationRef &Rel, @@ -96,8 +108,6 @@ Error getMachORelocationValueString(const object::MachOObjectFile *Obj, uint64_t getELFSectionLMA(const object::ELFSectionRef& Sec); -void error(std::error_code ec); -void error(Error E); bool isRelocAddressLess(object::RelocationRef A, object::RelocationRef B); void parseInputMachO(StringRef Filename); void parseInputMachO(object::MachOUniversalBinary *UB); @@ -129,24 +139,22 @@ void printSectionHeaders(const object::ObjectFile *O); void printSectionContents(const object::ObjectFile *O); void printSymbolTable(const object::ObjectFile *O, StringRef ArchiveName, StringRef ArchitectureName = StringRef()); -void warn(StringRef Message); -LLVM_ATTRIBUTE_NORETURN void error(Twine Message); -LLVM_ATTRIBUTE_NORETURN void report_error(StringRef File, Twine Message); -LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef File); -LLVM_ATTRIBUTE_NORETURN void -report_error(Error E, StringRef FileName, StringRef ArchiveName, - StringRef ArchitectureName = StringRef()); -LLVM_ATTRIBUTE_NORETURN void -report_error(Error E, StringRef ArchiveName, const object::Archive::Child &C, - StringRef ArchitectureName = StringRef()); +LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Twine Message); +LLVM_ATTRIBUTE_NORETURN void reportError(Error E, StringRef FileName, + StringRef ArchiveName = "", + StringRef ArchitectureName = ""); +void reportWarning(Twine Message, StringRef File); template T unwrapOrError(Expected EO, Ts &&... Args) { if (EO) return std::move(*EO); - report_error(EO.takeError(), std::forward(Args)...); + reportError(EO.takeError(), std::forward(Args)...); } +std::string getFileNameForError(const object::Archive::Child &C, + unsigned Index); + } // end namespace llvm #endif diff --git a/tools/llvm-pdbutil/BytesOutputStyle.cpp b/tools/llvm-pdbutil/BytesOutputStyle.cpp index 162d12c120b..ffc907e09f1 100644 --- a/tools/llvm-pdbutil/BytesOutputStyle.cpp +++ b/tools/llvm-pdbutil/BytesOutputStyle.cpp @@ -457,7 +457,7 @@ BytesOutputStyle::initializeTypes(uint32_t StreamIdx) { uint32_t Count = Tpi->getNumTypeRecords(); auto Offsets = Tpi->getTypeIndexOffsets(); TypeCollection = - llvm::make_unique(Types, Count, Offsets); + std::make_unique(Types, Count, Offsets); return *TypeCollection; } diff --git a/tools/llvm-pdbutil/DumpOutputStyle.cpp b/tools/llvm-pdbutil/DumpOutputStyle.cpp index 962d4cf88a8..4d82e0fd917 100644 --- a/tools/llvm-pdbutil/DumpOutputStyle.cpp +++ b/tools/llvm-pdbutil/DumpOutputStyle.cpp @@ -1369,9 +1369,10 @@ Error DumpOutputStyle::dumpTypesFromObjectFile() { LazyRandomTypeCollection Types(100); for (const auto &S : getObj().sections()) { - StringRef SectionName; - if (auto EC = S.getName(SectionName)) - return errorCodeToError(EC); + Expected NameOrErr = S.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef SectionName = *NameOrErr; // .debug$T is a standard CodeView type section, while .debug$P is the same // format but used for MSVC precompiled header object files. @@ -1551,7 +1552,7 @@ Error DumpOutputStyle::dumpModuleSymsForObj() { Dumper.setSymbolGroup(&Strings); for (auto Symbol : Symbols) { if (auto EC = Visitor.visitSymbolRecord(Symbol)) { - SymbolError = llvm::make_unique(std::move(EC)); + SymbolError = std::make_unique(std::move(EC)); return; } } diff --git a/tools/llvm-pdbutil/ExplainOutputStyle.cpp b/tools/llvm-pdbutil/ExplainOutputStyle.cpp index 94faa046398..3d2490509c0 100644 --- a/tools/llvm-pdbutil/ExplainOutputStyle.cpp +++ b/tools/llvm-pdbutil/ExplainOutputStyle.cpp @@ -64,7 +64,7 @@ Error ExplainOutputStyle::explainPdbFile() { Error ExplainOutputStyle::explainBinaryFile() { std::unique_ptr Stream = - llvm::make_unique(File.unknown().getBuffer(), + std::make_unique(File.unknown().getBuffer(), llvm::support::little); switch (opts::explain::InputType) { case opts::explain::InputFileType::DBIStream: { diff --git a/tools/llvm-pdbutil/InputFile.cpp b/tools/llvm-pdbutil/InputFile.cpp index bd23bfdbe31..b316882de64 100644 --- a/tools/llvm-pdbutil/InputFile.cpp +++ b/tools/llvm-pdbutil/InputFile.cpp @@ -66,12 +66,13 @@ getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) { static inline bool isCodeViewDebugSubsection(object::SectionRef Section, StringRef Name, BinaryStreamReader &Reader) { - StringRef SectionName; - if (Section.getName(SectionName)) - return false; - - if (SectionName != Name) + if (Expected NameOrErr = Section.getName()) { + if (*NameOrErr != Name) + return false; + } else { + consumeError(NameOrErr.takeError()); return false; + } Expected ContentsOrErr = Section.getContents(); if (!ContentsOrErr) { @@ -384,7 +385,7 @@ InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) { uint32_t Count = Stream.getNumTypeRecords(); auto Offsets = Stream.getTypeIndexOffsets(); Collection = - llvm::make_unique(Array, Count, Offsets); + std::make_unique(Array, Count, Offsets); return *Collection; } @@ -397,11 +398,11 @@ InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) { if (!isDebugTSection(Section, Records)) continue; - Types = llvm::make_unique(Records, 100); + Types = std::make_unique(Records, 100); return *Types; } - Types = llvm::make_unique(100); + Types = std::make_unique(100); return *Types; } diff --git a/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/tools/llvm-pdbutil/MinimalSymbolDumper.cpp index e5ae4705067..ebfa50625e7 100644 --- a/tools/llvm-pdbutil/MinimalSymbolDumper.cpp +++ b/tools/llvm-pdbutil/MinimalSymbolDumper.cpp @@ -569,8 +569,9 @@ Error MinimalSymbolDumper::visitKnownRecord( Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, DefRangeFramePointerRelSym &Def) { AutoIndent Indent(P, 7); - P.formatLine("offset = {0}, range = {1}", Def.Offset, formatRange(Def.Range)); - P.formatLine("gaps = {2}", Def.Offset, + P.formatLine("offset = {0}, range = {1}", Def.Hdr.Offset, + formatRange(Def.Range)); + P.formatLine("gaps = {2}", Def.Hdr.Offset, formatGaps(P.getIndentLevel() + 9, Def.Gaps)); return Error::success(); } diff --git a/tools/llvm-pdbutil/PrettyTypeDumper.cpp b/tools/llvm-pdbutil/PrettyTypeDumper.cpp index e8f8e5aa62c..2f7a39803ca 100644 --- a/tools/llvm-pdbutil/PrettyTypeDumper.cpp +++ b/tools/llvm-pdbutil/PrettyTypeDumper.cpp @@ -117,7 +117,7 @@ filterAndSortClassDefs(LinePrinter &Printer, Enumerator &E, continue; } - auto Layout = llvm::make_unique(std::move(Class)); + auto Layout = std::make_unique(std::move(Class)); if (Layout->deepPaddingSize() < opts::pretty::PaddingThreshold) { ++Discarded; continue; @@ -259,7 +259,7 @@ void TypeDumper::start(const PDBSymbolExe &Exe) { continue; } - auto Layout = llvm::make_unique(std::move(Class)); + auto Layout = std::make_unique(std::move(Class)); if (Layout->deepPaddingSize() < opts::pretty::PaddingThreshold) continue; diff --git a/tools/llvm-pdbutil/llvm-pdbutil.cpp b/tools/llvm-pdbutil/llvm-pdbutil.cpp index 785a9808679..9307300861d 100644 --- a/tools/llvm-pdbutil/llvm-pdbutil.cpp +++ b/tools/llvm-pdbutil/llvm-pdbutil.cpp @@ -863,8 +863,8 @@ static void pdb2Yaml(StringRef Path) { std::unique_ptr Session; auto &File = loadPDB(Path, Session); - auto O = llvm::make_unique(File); - O = llvm::make_unique(File); + auto O = std::make_unique(File); + O = std::make_unique(File); ExitOnErr(O->dump()); } @@ -872,7 +872,7 @@ static void pdb2Yaml(StringRef Path) { static void dumpRaw(StringRef Path) { InputFile IF = ExitOnErr(InputFile::open(Path)); - auto O = llvm::make_unique(IF); + auto O = std::make_unique(IF); ExitOnErr(O->dump()); } @@ -880,7 +880,7 @@ static void dumpBytes(StringRef Path) { std::unique_ptr Session; auto &File = loadPDB(Path, Session); - auto O = llvm::make_unique(File); + auto O = std::make_unique(File); ExitOnErr(O->dump()); } @@ -1347,7 +1347,7 @@ static void explain() { ExitOnErr(InputFile::open(opts::explain::InputFilename.front(), true)); for (uint64_t Off : opts::explain::Offsets) { - auto O = llvm::make_unique(IF, Off); + auto O = std::make_unique(IF, Off); ExitOnErr(O->dump()); } diff --git a/tools/llvm-profdata/llvm-profdata.cpp b/tools/llvm-profdata/llvm-profdata.cpp index 16d3ebe3fcb..41e9abb82b1 100644 --- a/tools/llvm-profdata/llvm-profdata.cpp +++ b/tools/llvm-profdata/llvm-profdata.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/InitLLVM.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Threading.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" @@ -37,6 +38,7 @@ enum ProfileFormat { PF_None = 0, PF_Text, PF_Compact_Binary, + PF_Ext_Binary, PF_GCC, PF_Binary }; @@ -84,6 +86,15 @@ static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") { namespace { enum ProfileKinds { instr, sample }; +enum FailureMode { failIfAnyAreInvalid, failIfAllAreInvalid }; +} + +static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC, + StringRef Whence = "") { + if (FailMode == failIfAnyAreInvalid) + exitWithErrorCode(EC, Whence); + else + warn(EC.message(), Whence); } static void handleMergeWriterError(Error E, StringRef WhenceFile = "", @@ -136,7 +147,7 @@ public: if (!BufOrError) exitWithErrorCode(BufOrError.getError(), InputFile); - auto Remapper = llvm::make_unique(); + auto Remapper = std::make_unique(); Remapper->File = std::move(BufOrError.get()); for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#'); @@ -173,33 +184,16 @@ typedef SmallVector WeightedFileVector; struct WriterContext { std::mutex Lock; InstrProfWriter Writer; - Error Err; - std::string ErrWhence; + std::vector> Errors; std::mutex &ErrLock; SmallSet &WriterErrorCodes; WriterContext(bool IsSparse, std::mutex &ErrLock, SmallSet &WriterErrorCodes) - : Lock(), Writer(IsSparse), Err(Error::success()), ErrWhence(""), - ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {} + : Lock(), Writer(IsSparse), Errors(), ErrLock(ErrLock), + WriterErrorCodes(WriterErrorCodes) {} }; -/// Determine whether an error is fatal for profile merging. -static bool isFatalError(instrprof_error IPE) { - switch (IPE) { - default: - return true; - case instrprof_error::success: - case instrprof_error::eof: - case instrprof_error::unknown_function: - case instrprof_error::hash_mismatch: - case instrprof_error::count_mismatch: - case instrprof_error::counter_overflow: - case instrprof_error::value_site_count_mismatch: - return false; - } -} - /// Computer the overlap b/w profile BaseFilename and TestFileName, /// and store the program level result to Overlap. static void overlapInput(const std::string &BaseFilename, @@ -212,7 +206,7 @@ static void overlapInput(const std::string &BaseFilename, // Skip the empty profiles by returning sliently. instrprof_error IPE = InstrProfError::take(std::move(E)); if (IPE != instrprof_error::empty_raw_profile) - WC->Err = make_error(IPE); + WC->Errors.emplace_back(make_error(IPE), TestFilename); return; } @@ -231,21 +225,17 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, WriterContext *WC) { std::unique_lock CtxGuard{WC->Lock}; - // If there's a pending hard error, don't do more work. - if (WC->Err) - return; - // Copy the filename, because llvm::ThreadPool copied the input "const // WeightedFile &" by value, making a reference to the filename within it // invalid outside of this packaged task. - WC->ErrWhence = Input.Filename; + std::string Filename = Input.Filename; auto ReaderOrErr = InstrProfReader::create(Input.Filename); if (Error E = ReaderOrErr.takeError()) { // Skip the empty profiles by returning sliently. instrprof_error IPE = InstrProfError::take(std::move(E)); if (IPE != instrprof_error::empty_raw_profile) - WC->Err = make_error(IPE); + WC->Errors.emplace_back(make_error(IPE), Filename); return; } @@ -253,9 +243,11 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, bool IsIRProfile = Reader->isIRLevelProfile(); bool HasCSIRProfile = Reader->hasCSIRLevelProfile(); if (WC->Writer.setIsIRLevelProfile(IsIRProfile, HasCSIRProfile)) { - WC->Err = make_error( - "Merge IR generated profile with Clang generated profile.", - std::error_code()); + WC->Errors.emplace_back( + make_error( + "Merge IR generated profile with Clang generated profile.", + std::error_code()), + Filename); return; } @@ -278,30 +270,23 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, FuncName, firstTime); }); } - if (Reader->hasError()) { - if (Error E = Reader->getError()) { - instrprof_error IPE = InstrProfError::take(std::move(E)); - if (isFatalError(IPE)) - WC->Err = make_error(IPE); - } - } + if (Reader->hasError()) + if (Error E = Reader->getError()) + WC->Errors.emplace_back(std::move(E), Filename); } /// Merge the \p Src writer context into \p Dst. static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) { - // If we've already seen a hard error, continuing with the merge would - // clobber it. - if (Dst->Err || Src->Err) - return; + for (auto &ErrorPair : Src->Errors) + Dst->Errors.push_back(std::move(ErrorPair)); + Src->Errors.clear(); - bool Reported = false; Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer), [&](Error E) { - if (Reported) { - consumeError(std::move(E)); - return; - } - Reported = true; - Dst->Err = std::move(E); + instrprof_error IPE = InstrProfError::take(std::move(E)); + std::unique_lock ErrGuard{Dst->ErrLock}; + bool firstTime = Dst->WriterErrorCodes.insert(IPE).second; + if (firstTime) + warn(toString(make_error(IPE))); }); } @@ -309,12 +294,12 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, StringRef OutputFilename, ProfileFormat OutputFormat, bool OutputSparse, - unsigned NumThreads) { + unsigned NumThreads, FailureMode FailMode) { if (OutputFilename.compare("-") == 0) exitWithError("Cannot write indexed profdata format to stdout."); if (OutputFormat != PF_Binary && OutputFormat != PF_Compact_Binary && - OutputFormat != PF_Text) + OutputFormat != PF_Ext_Binary && OutputFormat != PF_Text) exitWithError("Unknown format is specified."); std::mutex ErrorLock; @@ -328,7 +313,7 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs, // Initialize the writer contexts. SmallVector, 4> Contexts; for (unsigned I = 0; I < NumThreads; ++I) - Contexts.emplace_back(llvm::make_unique( + Contexts.emplace_back(std::make_unique( OutputSparse, ErrorLock, WriterErrorCodes)); if (NumThreads == 1) { @@ -364,23 +349,21 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs, } while (Mid > 0); } - // Handle deferred hard errors encountered during merging. + // Handle deferred errors encountered during merging. If the number of errors + // is equal to the number of inputs the merge failed. + unsigned NumErrors = 0; for (std::unique_ptr &WC : Contexts) { - if (!WC->Err) - continue; - if (!WC->Err.isA()) - exitWithError(std::move(WC->Err), WC->ErrWhence); - - instrprof_error IPE = InstrProfError::take(std::move(WC->Err)); - if (isFatalError(IPE)) - exitWithError(make_error(IPE), WC->ErrWhence); - else - warn(toString(make_error(IPE)), - WC->ErrWhence); + for (auto &ErrorPair : WC->Errors) { + ++NumErrors; + warn(toString(std::move(ErrorPair.first)), ErrorPair.second); + } } + if (NumErrors == Inputs.size() || + (NumErrors > 0 && FailMode == failIfAnyAreInvalid)) + exitWithError("No profiles could be merged."); std::error_code EC; - raw_fd_ostream Output(OutputFilename.data(), EC, sys::fs::F_None); + raw_fd_ostream Output(OutputFilename.data(), EC, sys::fs::OF_None); if (EC) exitWithErrorCode(EC, OutputFilename); @@ -425,21 +408,78 @@ remapSamples(const sampleprof::FunctionSamples &Samples, } static sampleprof::SampleProfileFormat FormatMap[] = { - sampleprof::SPF_None, sampleprof::SPF_Text, sampleprof::SPF_Compact_Binary, - sampleprof::SPF_GCC, sampleprof::SPF_Binary}; + sampleprof::SPF_None, + sampleprof::SPF_Text, + sampleprof::SPF_Compact_Binary, + sampleprof::SPF_Ext_Binary, + sampleprof::SPF_GCC, + sampleprof::SPF_Binary}; + +static std::unique_ptr +getInputFileBuf(const StringRef &InputFile) { + if (InputFile == "") + return {}; + + auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile); + if (!BufOrError) + exitWithErrorCode(BufOrError.getError(), InputFile); + + return std::move(*BufOrError); +} + +static void populateProfileSymbolList(MemoryBuffer *Buffer, + sampleprof::ProfileSymbolList &PSL) { + if (!Buffer) + return; + + SmallVector SymbolVec; + StringRef Data = Buffer->getBuffer(); + Data.split(SymbolVec, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false); + + for (StringRef symbol : SymbolVec) + PSL.add(symbol); +} + +static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer, + ProfileFormat OutputFormat, + MemoryBuffer *Buffer, + sampleprof::ProfileSymbolList &WriterList, + bool CompressAllSections) { + populateProfileSymbolList(Buffer, WriterList); + if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary) + warn("Profile Symbol list is not empty but the output format is not " + "ExtBinary format. The list will be lost in the output. "); + + Writer.setProfileSymbolList(&WriterList); + + if (CompressAllSections) { + if (OutputFormat != PF_Ext_Binary) { + warn("-compress-all-section is ignored. Specify -extbinary to enable it"); + } else { + auto ExtBinaryWriter = + static_cast(&Writer); + ExtBinaryWriter->setToCompressAllSections(); + } + } +} static void mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, StringRef OutputFilename, - ProfileFormat OutputFormat) { + ProfileFormat OutputFormat, + StringRef ProfileSymbolListFile, + bool CompressAllSections, FailureMode FailMode) { using namespace sampleprof; StringMap ProfileMap; SmallVector, 5> Readers; LLVMContext Context; + sampleprof::ProfileSymbolList WriterList; for (const auto &Input : Inputs) { auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context); - if (std::error_code EC = ReaderOrErr.getError()) - exitWithErrorCode(EC, Input.Filename); + if (std::error_code EC = ReaderOrErr.getError()) { + warnOrExitGivenError(FailMode, EC, Input.Filename); + continue; + } // We need to keep the readers around until after all the files are // read so that we do not lose the function names stored in each @@ -447,8 +487,11 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs, // merged profile map. Readers.push_back(std::move(ReaderOrErr.get())); const auto Reader = Readers.back().get(); - if (std::error_code EC = Reader->read()) - exitWithErrorCode(EC, Input.Filename); + if (std::error_code EC = Reader->read()) { + warnOrExitGivenError(FailMode, EC, Input.Filename); + Readers.pop_back(); + continue; + } StringMap &Profiles = Reader->getProfiles(); for (StringMap::iterator I = Profiles.begin(), @@ -466,6 +509,11 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs, handleMergeWriterError(errorCodeToError(EC), Input.Filename, FName); } } + + std::unique_ptr ReaderList = + Reader->getProfileSymbolList(); + if (ReaderList) + WriterList.merge(*ReaderList); } auto WriterOrErr = SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]); @@ -473,6 +521,11 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs, exitWithErrorCode(EC, OutputFilename); auto Writer = std::move(WriterOrErr.get()); + // WriterList will have StringRef refering to string in Buffer. + // Make sure Buffer lives as long as WriterList. + auto Buffer = getInputFileBuf(ProfileSymbolListFile); + handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList, + CompressAllSections); Writer->write(ProfileMap); } @@ -487,18 +540,6 @@ static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) { return {FileName, Weight}; } -static std::unique_ptr -getInputFilenamesFileBuf(const StringRef &InputFilenamesFile) { - if (InputFilenamesFile == "") - return {}; - - auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFilenamesFile); - if (!BufOrError) - exitWithErrorCode(BufOrError.getError(), InputFilenamesFile); - - return std::move(*BufOrError); -} - static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) { StringRef Filename = WF.Filename; uint64_t Weight = WF.Weight; @@ -583,12 +624,20 @@ static int merge_main(int argc, const char *argv[]) { clEnumVal(sample, "Sample profile"))); cl::opt OutputFormat( cl::desc("Format of output profile"), cl::init(PF_Binary), - cl::values(clEnumValN(PF_Binary, "binary", "Binary encoding (default)"), - clEnumValN(PF_Compact_Binary, "compbinary", - "Compact binary encoding"), - clEnumValN(PF_Text, "text", "Text encoding"), - clEnumValN(PF_GCC, "gcc", - "GCC encoding (only meaningful for -sample)"))); + cl::values( + clEnumValN(PF_Binary, "binary", "Binary encoding (default)"), + clEnumValN(PF_Compact_Binary, "compbinary", + "Compact binary encoding"), + clEnumValN(PF_Ext_Binary, "extbinary", "Extensible binary encoding"), + clEnumValN(PF_Text, "text", "Text encoding"), + clEnumValN(PF_GCC, "gcc", + "GCC encoding (only meaningful for -sample)"))); + cl::opt FailureMode( + "failure-mode", cl::init(failIfAnyAreInvalid), cl::desc("Failure mode:"), + cl::values(clEnumValN(failIfAnyAreInvalid, "any", + "Fail if any profile is invalid."), + clEnumValN(failIfAllAreInvalid, "all", + "Fail only if all profiles are invalid."))); cl::opt OutputSparse("sparse", cl::init(false), cl::desc("Generate a sparse profile (only meaningful for -instr)")); cl::opt NumThreads( @@ -596,6 +645,14 @@ static int merge_main(int argc, const char *argv[]) { cl::desc("Number of merge threads to use (default: autodetect)")); cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"), cl::aliasopt(NumThreads)); + cl::opt ProfileSymbolListFile( + "prof-sym-list", cl::init(""), + cl::desc("Path to file containing the list of function symbols " + "used to populate profile symbol list")); + cl::opt CompressAllSections( + "compress-all-sections", cl::init(false), cl::Hidden, + cl::desc("Compress all sections when writing the profile (only " + "meaningful for -extbinary)")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); @@ -607,7 +664,7 @@ static int merge_main(int argc, const char *argv[]) { // Make sure that the file buffer stays alive for the duration of the // weighted input vector's lifetime. - auto Buffer = getInputFilenamesFileBuf(InputFilenamesFile); + auto Buffer = getInputFileBuf(InputFilenamesFile); parseInputFilenamesFile(Buffer.get(), WeightedInputs); if (WeightedInputs.empty()) @@ -626,10 +683,11 @@ static int merge_main(int argc, const char *argv[]) { if (ProfileKind == instr) mergeInstrProfile(WeightedInputs, Remapper.get(), OutputFilename, - OutputFormat, OutputSparse, NumThreads); + OutputFormat, OutputSparse, NumThreads, FailureMode); else mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename, - OutputFormat); + OutputFormat, ProfileSymbolListFile, CompressAllSections, + FailureMode); return 0; } @@ -644,7 +702,7 @@ static void overlapInstrProfile(const std::string &BaseFilename, WriterContext Context(false, ErrorLock, WriterErrorCodes); WeightedFile WeightedInput{BaseFilename, 1}; OverlapStats Overlap; - Error E = Overlap.accumuateCounts(BaseFilename, TestFilename, IsCS); + Error E = Overlap.accumulateCounts(BaseFilename, TestFilename, IsCS); if (E) exitWithError(std::move(E), "Error in getting profile count sums"); if (Overlap.Base.CountSum < 1.0f) { @@ -682,7 +740,7 @@ static int overlap_main(int argc, const char *argv[]) { cl::ParseCommandLineOptions(argc, argv, "LLVM profile data overlap tool\n"); std::error_code EC; - raw_fd_ostream OS(Output.data(), EC, sys::fs::F_Text); + raw_fd_ostream OS(Output.data(), EC, sys::fs::OF_Text); if (EC) exitWithErrorCode(EC, Output); @@ -944,10 +1002,21 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts, return 0; } +static void showSectionInfo(sampleprof::SampleProfileReader *Reader, + raw_fd_ostream &OS) { + if (!Reader->dumpSectionInfo(OS)) { + WithColor::warning() << "-show-sec-info-only is only supported for " + << "sample profile in extbinary format and is " + << "ignored for other formats.\n"; + return; + } +} + static int showSampleProfile(const std::string &Filename, bool ShowCounts, bool ShowAllFunctions, const std::string &ShowFunction, - raw_fd_ostream &OS) { + bool ShowProfileSymbolList, + bool ShowSectionInfoOnly, raw_fd_ostream &OS) { using namespace sampleprof; LLVMContext Context; auto ReaderOrErr = SampleProfileReader::create(Filename, Context); @@ -955,6 +1024,12 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts, exitWithErrorCode(EC, Filename); auto Reader = std::move(ReaderOrErr.get()); + + if (ShowSectionInfoOnly) { + showSectionInfo(Reader.get(), OS); + return 0; + } + if (std::error_code EC = Reader->read()) exitWithErrorCode(EC, Filename); @@ -963,6 +1038,12 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts, else Reader->dumpFunctionProfile(ShowFunction, OS); + if (ShowProfileSymbolList) { + std::unique_ptr ReaderList = + Reader->getProfileSymbolList(); + ReaderList->dump(OS); + } + return 0; } @@ -1015,6 +1096,15 @@ static int show_main(int argc, const char *argv[]) { "list-below-cutoff", cl::init(false), cl::desc("Only output names of functions whose max count values are " "below the cutoff value")); + cl::opt ShowProfileSymbolList( + "show-prof-sym-list", cl::init(false), + cl::desc("Show profile symbol list if it exists in the profile. ")); + cl::opt ShowSectionInfoOnly( + "show-sec-info-only", cl::init(false), + cl::desc("Show the information of each section in the sample profile. " + "The flag is only usable when the sample profile is in " + "extbinary format")); + cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n"); if (OutputFilename.empty()) @@ -1027,7 +1117,7 @@ static int show_main(int argc, const char *argv[]) { } std::error_code EC; - raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::F_Text); + raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_Text); if (EC) exitWithErrorCode(EC, OutputFilename); @@ -1042,7 +1132,8 @@ static int show_main(int argc, const char *argv[]) { OnlyListBelow, ShowFunction, TextFormat, OS); else return showSampleProfile(Filename, ShowCounts, ShowAllFunctions, - ShowFunction, OS); + ShowFunction, ShowProfileSymbolList, + ShowSectionInfoOnly, OS); } int main(int argc, const char *argv[]) { diff --git a/tools/llvm-readobj/ARMEHABIPrinter.h b/tools/llvm-readobj/ARMEHABIPrinter.h index 11f9d6166a5..2c0912038c3 100644 --- a/tools/llvm-readobj/ARMEHABIPrinter.h +++ b/tools/llvm-readobj/ARMEHABIPrinter.h @@ -329,6 +329,7 @@ class PrinterContext { ScopedPrinter &SW; const object::ELFFile *ELF; + StringRef FileName; const Elf_Shdr *Symtab; ArrayRef ShndxTable; @@ -352,8 +353,8 @@ class PrinterContext { public: PrinterContext(ScopedPrinter &SW, const object::ELFFile *ELF, - const Elf_Shdr *Symtab) - : SW(SW), ELF(ELF), Symtab(Symtab) {} + StringRef FileName, const Elf_Shdr *Symtab) + : SW(SW), ELF(ELF), FileName(FileName), Symtab(Symtab) {} void PrintUnwindInformation() const; }; @@ -369,10 +370,10 @@ PrinterContext::FunctionAtAddress(unsigned Section, return readobj_error::unknown_symbol; auto StrTableOrErr = ELF->getStringTableForSymtab(*Symtab); if (!StrTableOrErr) - error(StrTableOrErr.takeError()); + reportError(StrTableOrErr.takeError(), FileName); StringRef StrTable = *StrTableOrErr; - for (const Elf_Sym &Sym : unwrapOrError(ELF->symbols(Symtab))) + for (const Elf_Sym &Sym : unwrapOrError(FileName, ELF->symbols(Symtab))) if (Sym.st_shndx == Section && Sym.st_value == Address && Sym.getType() == ELF::STT_FUNC) { auto NameOrErr = Sym.getName(StrTable); @@ -398,16 +399,16 @@ PrinterContext::FindExceptionTable(unsigned IndexSectionIndex, /// handling table. Use this symbol to recover the actual exception handling /// table. - for (const Elf_Shdr &Sec : unwrapOrError(ELF->sections())) { + for (const Elf_Shdr &Sec : unwrapOrError(FileName, ELF->sections())) { if (Sec.sh_type != ELF::SHT_REL || Sec.sh_info != IndexSectionIndex) continue; auto SymTabOrErr = ELF->getSection(Sec.sh_link); if (!SymTabOrErr) - error(SymTabOrErr.takeError()); + reportError(SymTabOrErr.takeError(), FileName); const Elf_Shdr *SymTab = *SymTabOrErr; - for (const Elf_Rel &R : unwrapOrError(ELF->rels(&Sec))) { + for (const Elf_Rel &R : unwrapOrError(FileName, ELF->rels(&Sec))) { if (R.r_offset != static_cast(IndexTableOffset)) continue; @@ -417,7 +418,7 @@ PrinterContext::FindExceptionTable(unsigned IndexSectionIndex, RelA.r_addend = 0; const Elf_Sym *Symbol = - unwrapOrError(ELF->getRelocationSymbol(&RelA, SymTab)); + unwrapOrError(FileName, ELF->getRelocationSymbol(&RelA, SymTab)); auto Ret = ELF->getSection(Symbol, SymTab, ShndxTable); if (!Ret) @@ -570,7 +571,7 @@ void PrinterContext::PrintUnwindInformation() const { DictScope UI(SW, "UnwindInformation"); int SectionIndex = 0; - for (const Elf_Shdr &Sec : unwrapOrError(ELF->sections())) { + for (const Elf_Shdr &Sec : unwrapOrError(FileName, ELF->sections())) { if (Sec.sh_type == ELF::SHT_ARM_EXIDX) { DictScope UIT(SW, "UnwindIndexTable"); diff --git a/tools/llvm-readobj/ARMWinEHPrinter.cpp b/tools/llvm-readobj/ARMWinEHPrinter.cpp index 4de14e2e78d..3e026f58871 100644 --- a/tools/llvm-readobj/ARMWinEHPrinter.cpp +++ b/tools/llvm-readobj/ARMWinEHPrinter.cpp @@ -842,8 +842,10 @@ bool Decoder::dumpXDataRecord(const COFFObjectFile &COFF, if ((int64_t)(Contents.size() - Offset - 4 * HeaderWords(XData) - (XData.E() ? 0 : XData.EpilogueCount() * 4) - - (XData.X() ? 8 : 0)) < (int64_t)ByteCodeLength) + (XData.X() ? 8 : 0)) < (int64_t)ByteCodeLength) { + SW.flush(); report_fatal_error("Malformed unwind data"); + } if (XData.E()) { ArrayRef UC = XData.UnwindByteCode(); @@ -1039,10 +1041,7 @@ bool Decoder::dumpPackedEntry(const object::COFFObjectFile &COFF, } FunctionAddress = *FunctionAddressOrErr; } else { - const pe32_header *PEHeader; - if (COFF.getPE32Header(PEHeader)) - return false; - FunctionAddress = PEHeader->ImageBase + RF.BeginAddress; + FunctionAddress = COFF.getPE32Header()->ImageBase + RF.BeginAddress; } SW.printString("Function", formatSymbol(FunctionName, FunctionAddress)); diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp index 4c2e39dfa3c..9b2c6adb9d9 100644 --- a/tools/llvm-readobj/COFFDumper.cpp +++ b/tools/llvm-readobj/COFFDumper.cpp @@ -60,6 +60,10 @@ using namespace llvm::codeview; using namespace llvm::support; using namespace llvm::Win64EH; +static inline Error createError(const Twine &Err) { + return make_error(Err, object_error::parse_failed); +} + namespace { struct LoadConfigTables { @@ -167,9 +171,6 @@ private: void printDelayImportedSymbols( const DelayImportDirectoryEntryRef &I, iterator_range Range); - ErrorOr - getResourceDirectoryTableEntry(const coff_resource_dir_table &Table, - uint32_t Index); typedef DenseMap > RelocMapTy; @@ -627,14 +628,10 @@ void COFFDumper::printFileHeaders() { // Print PE header. This header does not exist if this is an object file and // not an executable. - const pe32_header *PEHeader = nullptr; - error(Obj->getPE32Header(PEHeader)); - if (PEHeader) + if (const pe32_header *PEHeader = Obj->getPE32Header()) printPEHeader(PEHeader); - const pe32plus_header *PEPlusHeader = nullptr; - error(Obj->getPE32PlusHeader(PEPlusHeader)); - if (PEPlusHeader) + if (const pe32plus_header *PEPlusHeader = Obj->getPE32PlusHeader()) printPEHeader(PEPlusHeader); if (const dos_header *DH = Obj->getDOSHeader()) @@ -728,7 +725,9 @@ void COFFDumper::printCOFFDebugDirectory() { if (D.Type == COFF::IMAGE_DEBUG_TYPE_CODEVIEW) { const codeview::DebugInfo *DebugInfo; StringRef PDBFileName; - error(Obj->getDebugPDBInfo(&D, DebugInfo, PDBFileName)); + if (std::error_code EC = Obj->getDebugPDBInfo(&D, DebugInfo, PDBFileName)) + reportError(errorCodeToError(EC), Obj->getFileName()); + DictScope PDBScope(W, "PDBInfo"); W.printHex("PDBSignature", DebugInfo->Signature.CVSignature); if (DebugInfo->Signature.CVSignature == OMF::Signature::PDB70) { @@ -740,8 +739,9 @@ void COFFDumper::printCOFFDebugDirectory() { // FIXME: Type values of 12 and 13 are commonly observed but are not in // the documented type enum. Figure out what they mean. ArrayRef RawData; - error( - Obj->getRvaAndSizeAsBytes(D.AddressOfRawData, D.SizeOfData, RawData)); + if (std::error_code EC = Obj->getRvaAndSizeAsBytes(D.AddressOfRawData, + D.SizeOfData, RawData)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printBinaryBlock("RawData", RawData); } } @@ -750,8 +750,11 @@ void COFFDumper::printCOFFDebugDirectory() { void COFFDumper::printRVATable(uint64_t TableVA, uint64_t Count, uint64_t EntrySize, PrintExtraCB PrintExtra) { uintptr_t TableStart, TableEnd; - error(Obj->getVaPtr(TableVA, TableStart)); - error(Obj->getVaPtr(TableVA + Count * EntrySize - 1, TableEnd)); + if (std::error_code EC = Obj->getVaPtr(TableVA, TableStart)) + reportError(errorCodeToError(EC), Obj->getFileName()); + if (std::error_code EC = + Obj->getVaPtr(TableVA + Count * EntrySize - 1, TableEnd)) + reportError(errorCodeToError(EC), Obj->getFileName()); TableEnd++; for (uintptr_t I = TableStart; I < TableEnd; I += EntrySize) { uint32_t RVA = *reinterpret_cast(I); @@ -887,16 +890,14 @@ void COFFDumper::printBaseOfDataField(const pe32plus_header *) {} void COFFDumper::printCodeViewDebugInfo() { // Print types first to build CVUDTNames, then print symbols. for (const SectionRef &S : Obj->sections()) { - StringRef SectionName; - error(S.getName(SectionName)); + StringRef SectionName = unwrapOrError(Obj->getFileName(), S.getName()); // .debug$T is a standard CodeView type section, while .debug$P is the same // format but used for MSVC precompiled header object files. if (SectionName == ".debug$T" || SectionName == ".debug$P") printCodeViewTypeSection(SectionName, S); } for (const SectionRef &S : Obj->sections()) { - StringRef SectionName; - error(S.getName(SectionName)); + StringRef SectionName = unwrapOrError(Obj->getFileName(), S.getName()); if (SectionName == ".debug$S") printCodeViewSymbolSection(SectionName, S); } @@ -908,32 +909,40 @@ void COFFDumper::initializeFileAndStringTables(BinaryStreamReader &Reader) { // The section consists of a number of subsection in the following format: // |SubSectionType|SubSectionSize|Contents...| uint32_t SubType, SubSectionSize; - error(Reader.readInteger(SubType)); - error(Reader.readInteger(SubSectionSize)); + + if (Error E = Reader.readInteger(SubType)) + reportError(std::move(E), Obj->getFileName()); + if (Error E = Reader.readInteger(SubSectionSize)) + reportError(std::move(E), Obj->getFileName()); StringRef Contents; - error(Reader.readFixedString(Contents, SubSectionSize)); + if (Error E = Reader.readFixedString(Contents, SubSectionSize)) + reportError(std::move(E), Obj->getFileName()); BinaryStreamRef ST(Contents, support::little); switch (DebugSubsectionKind(SubType)) { case DebugSubsectionKind::FileChecksums: - error(CVFileChecksumTable.initialize(ST)); + if (Error E = CVFileChecksumTable.initialize(ST)) + reportError(std::move(E), Obj->getFileName()); break; case DebugSubsectionKind::StringTable: - error(CVStringTable.initialize(ST)); + if (Error E = CVStringTable.initialize(ST)) + reportError(std::move(E), Obj->getFileName()); break; default: break; } uint32_t PaddedSize = alignTo(SubSectionSize, 4); - error(Reader.skip(PaddedSize - SubSectionSize)); + if (Error E = Reader.skip(PaddedSize - SubSectionSize)) + reportError(std::move(E), Obj->getFileName()); } } void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, const SectionRef &Section) { - StringRef SectionContents = unwrapOrError(Section.getContents()); + StringRef SectionContents = + unwrapOrError(Obj->getFileName(), Section.getContents()); StringRef Data = SectionContents; SmallVector FunctionNames; @@ -944,10 +953,13 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, W.printNumber("Section", SectionName, Obj->getSectionID(Section)); uint32_t Magic; - error(consume(Data, Magic)); + if (Error E = consume(Data, Magic)) + reportError(std::move(E), Obj->getFileName()); + W.printHex("Magic", Magic); if (Magic != COFF::DEBUG_SECTION_MAGIC) - return error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); BinaryStreamReader FSReader(Data, support::little); initializeFileAndStringTables(FSReader); @@ -957,8 +969,10 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, // The section consists of a number of subsection in the following format: // |SubSectionType|SubSectionSize|Contents...| uint32_t SubType, SubSectionSize; - error(consume(Data, SubType)); - error(consume(Data, SubSectionSize)); + if (Error E = consume(Data, SubType)) + reportError(std::move(E), Obj->getFileName()); + if (Error E = consume(Data, SubSectionSize)) + reportError(std::move(E), Obj->getFileName()); ListScope S(W, "Subsection"); // Dump the subsection as normal even if the ignore bit is set. @@ -971,7 +985,8 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, // Get the contents of the subsection. if (SubSectionSize > Data.size()) - return error(object_error::parse_failed); + return reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); StringRef Contents = Data.substr(0, SubSectionSize); // Add SubSectionSize to the current offset and align that offset to find @@ -980,7 +995,8 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, size_t NextOffset = SectionOffset + SubSectionSize; NextOffset = alignTo(NextOffset, 4); if (NextOffset > SectionContents.size()) - return error(object_error::parse_failed); + return reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); Data = SectionContents.drop_front(NextOffset); // Optionally print the subsection bytes in case our parsing gets confused @@ -1010,17 +1026,21 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, if (SubSectionSize < 12) { // There should be at least three words to store two function // relocations and size of the code. - error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); return; } StringRef LinkageName; - error(resolveSymbolName(Obj->getCOFFSection(Section), SectionOffset, - LinkageName)); + if (std::error_code EC = resolveSymbolName(Obj->getCOFFSection(Section), + SectionOffset, LinkageName)) + reportError(errorCodeToError(EC), Obj->getFileName()); + W.printString("LinkageName", LinkageName); if (FunctionLineTables.count(LinkageName) != 0) { // Saw debug info for this function already? - error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); return; } @@ -1033,17 +1053,21 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, BinaryStreamReader SR(Contents, llvm::support::little); DebugFrameDataSubsectionRef FrameData; - error(FrameData.initialize(SR)); + if (Error E = FrameData.initialize(SR)) + reportError(std::move(E), Obj->getFileName()); StringRef LinkageName; - error(resolveSymbolName(Obj->getCOFFSection(Section), SectionContents, - FrameData.getRelocPtr(), LinkageName)); + if (std::error_code EC = + resolveSymbolName(Obj->getCOFFSection(Section), SectionContents, + FrameData.getRelocPtr(), LinkageName)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printString("LinkageName", LinkageName); // To find the active frame description, search this array for the // smallest PC range that includes the current PC. for (const auto &FD : FrameData) { - StringRef FrameFunc = error(CVStringTable.getString(FD.FrameFunc)); + StringRef FrameFunc = unwrapOrError( + Obj->getFileName(), CVStringTable.getString(FD.FrameFunc)); DictScope S(W, "FrameData"); W.printHex("RvaStart", FD.RvaStart); @@ -1094,7 +1118,8 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, BinaryStreamReader Reader(FunctionLineTables[Name], support::little); DebugLinesSubsectionRef LineInfo; - error(LineInfo.initialize(Reader)); + if (Error E = LineInfo.initialize(Reader)) + reportError(std::move(E), Obj->getFileName()); W.printHex("Flags", LineInfo.header()->Flags); W.printHex("CodeSize", LineInfo.header()->CodeSize); @@ -1105,7 +1130,8 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, uint32_t ColumnIndex = 0; for (const auto &Line : Entry.LineNumbers) { if (Line.Offset >= LineInfo.header()->CodeSize) { - error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); return; } @@ -1136,21 +1162,20 @@ void COFFDumper::printCodeViewSymbolsSubsection(StringRef Subsection, StringRef SectionContents) { ArrayRef BinaryData(Subsection.bytes_begin(), Subsection.bytes_end()); - auto CODD = llvm::make_unique(*this, Section, Obj, + auto CODD = std::make_unique(*this, Section, Obj, SectionContents); CVSymbolDumper CVSD(W, Types, CodeViewContainer::ObjectFile, std::move(CODD), CompilationCPUType, opts::CodeViewSubsectionBytes); CVSymbolArray Symbols; BinaryStreamReader Reader(BinaryData, llvm::support::little); - if (auto EC = Reader.readArray(Symbols, Reader.getLength())) { - consumeError(std::move(EC)); + if (Error E = Reader.readArray(Symbols, Reader.getLength())) { W.flush(); - error(object_error::parse_failed); + reportError(std::move(E), Obj->getFileName()); } - if (auto EC = CVSD.dump(Symbols)) { + if (Error E = CVSD.dump(Symbols)) { W.flush(); - error(std::move(EC)); + reportError(std::move(E), Obj->getFileName()); } CompilationCPUType = CVSD.getCompilationCPUType(); W.flush(); @@ -1159,12 +1184,14 @@ void COFFDumper::printCodeViewSymbolsSubsection(StringRef Subsection, void COFFDumper::printCodeViewFileChecksums(StringRef Subsection) { BinaryStreamRef Stream(Subsection, llvm::support::little); DebugChecksumsSubsectionRef Checksums; - error(Checksums.initialize(Stream)); + if (Error E = Checksums.initialize(Stream)) + reportError(std::move(E), Obj->getFileName()); for (auto &FC : Checksums) { DictScope S(W, "FileChecksum"); - StringRef Filename = error(CVStringTable.getString(FC.FileNameOffset)); + StringRef Filename = unwrapOrError( + Obj->getFileName(), CVStringTable.getString(FC.FileNameOffset)); W.printHex("Filename", Filename, FC.FileNameOffset); W.printHex("ChecksumSize", FC.Checksum.size()); W.printEnum("ChecksumKind", uint8_t(FC.Kind), @@ -1177,7 +1204,8 @@ void COFFDumper::printCodeViewFileChecksums(StringRef Subsection) { void COFFDumper::printCodeViewInlineeLines(StringRef Subsection) { BinaryStreamReader SR(Subsection, llvm::support::little); DebugInlineeLinesSubsectionRef Lines; - error(Lines.initialize(SR)); + if (Error E = Lines.initialize(SR)) + reportError(std::move(E), Obj->getFileName()); for (auto &Line : Lines) { DictScope S(W, "InlineeSourceLine"); @@ -1198,15 +1226,18 @@ void COFFDumper::printCodeViewInlineeLines(StringRef Subsection) { StringRef COFFDumper::getFileNameForFileOffset(uint32_t FileOffset) { // The file checksum subsection should precede all references to it. if (!CVFileChecksumTable.valid() || !CVStringTable.valid()) - error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); auto Iter = CVFileChecksumTable.getArray().at(FileOffset); // Check if the file checksum table offset is valid. if (Iter == CVFileChecksumTable.end()) - error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); - return error(CVStringTable.getString(Iter->FileNameOffset)); + return unwrapOrError(Obj->getFileName(), + CVStringTable.getString(Iter->FileNameOffset)); } void COFFDumper::printFileNameForOffset(StringRef Label, uint32_t FileOffset) { @@ -1219,35 +1250,38 @@ void COFFDumper::mergeCodeViewTypes(MergingTypeTableBuilder &CVIDs, GlobalTypeTableBuilder &GlobalCVTypes, bool GHash) { for (const SectionRef &S : Obj->sections()) { - StringRef SectionName; - error(S.getName(SectionName)); + StringRef SectionName = unwrapOrError(Obj->getFileName(), S.getName()); if (SectionName == ".debug$T") { - StringRef Data = unwrapOrError(S.getContents()); + StringRef Data = unwrapOrError(Obj->getFileName(), S.getContents()); uint32_t Magic; - error(consume(Data, Magic)); + if (Error E = consume(Data, Magic)) + reportError(std::move(E), Obj->getFileName()); + if (Magic != 4) - error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); CVTypeArray Types; BinaryStreamReader Reader(Data, llvm::support::little); if (auto EC = Reader.readArray(Types, Reader.getLength())) { consumeError(std::move(EC)); W.flush(); - error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); } SmallVector SourceToDest; Optional PCHSignature; if (GHash) { std::vector Hashes = GloballyHashedType::hashTypes(Types); - if (auto EC = + if (Error E = mergeTypeAndIdRecords(GlobalCVIDs, GlobalCVTypes, SourceToDest, Types, Hashes, PCHSignature)) - return error(std::move(EC)); + return reportError(std::move(E), Obj->getFileName()); } else { - if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types, + if (Error E = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types, PCHSignature)) - return error(std::move(EC)); + return reportError(std::move(E), Obj->getFileName()); } } } @@ -1258,20 +1292,25 @@ void COFFDumper::printCodeViewTypeSection(StringRef SectionName, ListScope D(W, "CodeViewTypes"); W.printNumber("Section", SectionName, Obj->getSectionID(Section)); - StringRef Data = unwrapOrError(Section.getContents()); + StringRef Data = unwrapOrError(Obj->getFileName(), Section.getContents()); if (opts::CodeViewSubsectionBytes) W.printBinaryBlock("Data", Data); uint32_t Magic; - error(consume(Data, Magic)); + if (Error E = consume(Data, Magic)) + reportError(std::move(E), Obj->getFileName()); + W.printHex("Magic", Magic); if (Magic != COFF::DEBUG_SECTION_MAGIC) - return error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); Types.reset(Data, 100); TypeDumpVisitor TDV(Types, &W, opts::CodeViewSubsectionBytes); - error(codeview::visitTypeStream(Types, TDV)); + if (Error E = codeview::visitTypeStream(Types, TDV)) + reportError(std::move(E), Obj->getFileName()); + W.flush(); } @@ -1282,8 +1321,7 @@ void COFFDumper::printSectionHeaders() { ++SectionNumber; const coff_section *Section = Obj->getCOFFSection(Sec); - StringRef Name; - error(Sec.getName(Name)); + StringRef Name = unwrapOrError(Obj->getFileName(), Sec.getName()); DictScope D(W, "Section"); W.printNumber("Number", SectionNumber); @@ -1318,7 +1356,7 @@ void COFFDumper::printSectionHeaders() { if (opts::SectionData && !(Section->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)) { - StringRef Data = unwrapOrError(Sec.getContents()); + StringRef Data = unwrapOrError(Obj->getFileName(), Sec.getContents()); W.printBinaryBlock("SectionData", Data); } } @@ -1330,8 +1368,7 @@ void COFFDumper::printRelocations() { int SectionNumber = 0; for (const SectionRef &Section : Obj->sections()) { ++SectionNumber; - StringRef Name; - error(Section.getName(Name)); + StringRef Name = unwrapOrError(Obj->getFileName(), Section.getName()); bool PrintedGroup = false; for (const RelocationRef &Reloc : Section.relocations()) { @@ -1362,7 +1399,9 @@ void COFFDumper::printRelocation(const SectionRef &Section, int64_t SymbolIndex = -1; if (Symbol != Obj->symbol_end()) { Expected SymbolNameOrErr = Symbol->getName(); - error(errorToErrorCode(SymbolNameOrErr.takeError())); + if (!SymbolNameOrErr) + reportError(SymbolNameOrErr.takeError(), Obj->getFileName()); + SymbolName = *SymbolNameOrErr; SymbolIndex = Obj->getSymbolIndex(Obj->getCOFFSymbol(*Symbol)); } @@ -1439,7 +1478,8 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) { for (uint8_t I = 0; I < Symbol.getNumberOfAuxSymbols(); ++I) { if (Symbol.isFunctionDefinition()) { const coff_aux_function_definition *Aux; - error(getSymbolAuxData(Obj, Symbol, I, Aux)); + if (std::error_code EC = getSymbolAuxData(Obj, Symbol, I, Aux)) + reportError(errorCodeToError(EC), Obj->getFileName()); DictScope AS(W, "AuxFunctionDef"); W.printNumber("TagIndex", Aux->TagIndex); @@ -1449,15 +1489,16 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) { } else if (Symbol.isAnyUndefined()) { const coff_aux_weak_external *Aux; - error(getSymbolAuxData(Obj, Symbol, I, Aux)); + if (std::error_code EC = getSymbolAuxData(Obj, Symbol, I, Aux)) + reportError(errorCodeToError(EC), Obj->getFileName()); Expected Linked = Obj->getSymbol(Aux->TagIndex); + if (!Linked) + reportError(Linked.takeError(), Obj->getFileName()); + StringRef LinkedName; - std::error_code EC = errorToErrorCode(Linked.takeError()); - if (EC || (EC = Obj->getSymbolName(*Linked, LinkedName))) { - LinkedName = ""; - error(EC); - } + if (std::error_code EC = Obj->getSymbolName(*Linked, LinkedName)) + reportError(errorCodeToError(EC), Obj->getFileName()); DictScope AS(W, "AuxWeakExternal"); W.printNumber("Linked", LinkedName, Aux->TagIndex); @@ -1466,8 +1507,8 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) { } else if (Symbol.isFileRecord()) { const char *FileName; - error(getSymbolAuxData(Obj, Symbol, I, FileName)); - + if (std::error_code EC = getSymbolAuxData(Obj, Symbol, I, FileName)) + reportError(errorCodeToError(EC), Obj->getFileName()); DictScope AS(W, "AuxFileRecord"); StringRef Name(FileName, Symbol.getNumberOfAuxSymbols() * @@ -1476,7 +1517,8 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) { break; } else if (Symbol.isSectionDefinition()) { const coff_aux_section_definition *Aux; - error(getSymbolAuxData(Obj, Symbol, I, Aux)); + if (std::error_code EC = getSymbolAuxData(Obj, Symbol, I, Aux)) + reportError(errorCodeToError(EC), Obj->getFileName()); int32_t AuxNumber = Aux->getNumber(Symbol.isBigObj()); @@ -1493,26 +1535,27 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) { const coff_section *Assoc; StringRef AssocName = ""; if (std::error_code EC = Obj->getSection(AuxNumber, Assoc)) - error(EC); + reportError(errorCodeToError(EC), Obj->getFileName()); Expected Res = getSectionName(Obj, AuxNumber, Assoc); if (!Res) - error(Res.takeError()); + reportError(Res.takeError(), Obj->getFileName()); AssocName = *Res; W.printNumber("AssocSection", AssocName, AuxNumber); } } else if (Symbol.isCLRToken()) { const coff_aux_clr_token *Aux; - error(getSymbolAuxData(Obj, Symbol, I, Aux)); + if (std::error_code EC = getSymbolAuxData(Obj, Symbol, I, Aux)) + reportError(errorCodeToError(EC), Obj->getFileName()); Expected ReferredSym = Obj->getSymbol(Aux->SymbolTableIndex); + if (!ReferredSym) + reportError(ReferredSym.takeError(), Obj->getFileName()); + StringRef ReferredName; - std::error_code EC = errorToErrorCode(ReferredSym.takeError()); - if (EC || (EC = Obj->getSymbolName(*ReferredSym, ReferredName))) { - ReferredName = ""; - error(EC); - } + if (std::error_code EC = Obj->getSymbolName(*ReferredSym, ReferredName)) + reportError(errorCodeToError(EC), Obj->getFileName()); DictScope AS(W, "AuxCLRToken"); W.printNumber("AuxType", Aux->AuxType); @@ -1578,9 +1621,11 @@ void COFFDumper::printImportedSymbols( iterator_range Range) { for (const ImportedSymbolRef &I : Range) { StringRef Sym; - error(I.getSymbolName(Sym)); + if (std::error_code EC = I.getSymbolName(Sym)) + reportError(errorCodeToError(EC), Obj->getFileName()); uint16_t Ordinal; - error(I.getOrdinal(Ordinal)); + if (std::error_code EC = I.getOrdinal(Ordinal)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printNumber("Symbol", Sym, Ordinal); } } @@ -1592,12 +1637,17 @@ void COFFDumper::printDelayImportedSymbols( for (const ImportedSymbolRef &S : Range) { DictScope Import(W, "Import"); StringRef Sym; - error(S.getSymbolName(Sym)); + if (std::error_code EC = S.getSymbolName(Sym)) + reportError(errorCodeToError(EC), Obj->getFileName()); + uint16_t Ordinal; - error(S.getOrdinal(Ordinal)); + if (std::error_code EC = S.getOrdinal(Ordinal)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printNumber("Symbol", Sym, Ordinal); + uint64_t Addr; - error(I.getImportAddress(Index++, Addr)); + if (std::error_code EC = I.getImportAddress(Index++, Addr)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printHex("Address", Addr); } } @@ -1607,13 +1657,16 @@ void COFFDumper::printCOFFImports() { for (const ImportDirectoryEntryRef &I : Obj->import_directories()) { DictScope Import(W, "Import"); StringRef Name; - error(I.getName(Name)); + if (std::error_code EC = I.getName(Name)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printString("Name", Name); uint32_t ILTAddr; - error(I.getImportLookupTableRVA(ILTAddr)); + if (std::error_code EC = I.getImportLookupTableRVA(ILTAddr)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printHex("ImportLookupTableRVA", ILTAddr); uint32_t IATAddr; - error(I.getImportAddressTableRVA(IATAddr)); + if (std::error_code EC = I.getImportAddressTableRVA(IATAddr)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printHex("ImportAddressTableRVA", IATAddr); // The import lookup table can be missing with certain older linkers, so // fall back to the import address table in that case. @@ -1627,10 +1680,12 @@ void COFFDumper::printCOFFImports() { for (const DelayImportDirectoryEntryRef &I : Obj->delay_import_directories()) { DictScope Import(W, "DelayImport"); StringRef Name; - error(I.getName(Name)); + if (std::error_code EC = I.getName(Name)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printString("Name", Name); const delay_import_directory_table_entry *Table; - error(I.getDelayImportTable(Table)); + if (std::error_code EC = I.getDelayImportTable(Table)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printHex("Attributes", Table->Attributes); W.printHex("ModuleHandle", Table->ModuleHandle); W.printHex("ImportAddressTable", Table->DelayImportAddressTable); @@ -1648,9 +1703,12 @@ void COFFDumper::printCOFFExports() { StringRef Name; uint32_t Ordinal, RVA; - error(E.getSymbolName(Name)); - error(E.getOrdinal(Ordinal)); - error(E.getExportRVA(RVA)); + if (std::error_code EC = E.getSymbolName(Name)) + reportError(errorCodeToError(EC), Obj->getFileName()); + if (std::error_code EC = E.getOrdinal(Ordinal)) + reportError(errorCodeToError(EC), Obj->getFileName()); + if (std::error_code EC = E.getExportRVA(RVA)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printNumber("Ordinal", Ordinal); W.printString("Name", Name); @@ -1660,13 +1718,12 @@ void COFFDumper::printCOFFExports() { void COFFDumper::printCOFFDirectives() { for (const SectionRef &Section : Obj->sections()) { - StringRef Name; - - error(Section.getName(Name)); + StringRef Name = unwrapOrError(Obj->getFileName(), Section.getName()); if (Name != ".drectve") continue; - StringRef Contents = unwrapOrError(Section.getContents()); + StringRef Contents = + unwrapOrError(Obj->getFileName(), Section.getContents()); W.printString("Directive(s)", Contents); } } @@ -1689,8 +1746,10 @@ void COFFDumper::printCOFFBaseReloc() { for (const BaseRelocRef &I : Obj->base_relocs()) { uint8_t Type; uint32_t RVA; - error(I.getRVA(RVA)); - error(I.getType(Type)); + if (std::error_code EC = I.getRVA(RVA)) + reportError(errorCodeToError(EC), Obj->getFileName()); + if (std::error_code EC = I.getType(Type)) + reportError(errorCodeToError(EC), Obj->getFileName()); DictScope Import(W, "Entry"); W.printString("Type", getBaseRelocTypeName(Type)); W.printHex("Address", RVA); @@ -1700,16 +1759,18 @@ void COFFDumper::printCOFFBaseReloc() { void COFFDumper::printCOFFResources() { ListScope ResourcesD(W, "Resources"); for (const SectionRef &S : Obj->sections()) { - StringRef Name; - error(S.getName(Name)); + StringRef Name = unwrapOrError(Obj->getFileName(), S.getName()); if (!Name.startswith(".rsrc")) continue; - StringRef Ref = unwrapOrError(S.getContents()); + StringRef Ref = unwrapOrError(Obj->getFileName(), S.getContents()); if ((Name == ".rsrc") || (Name == ".rsrc$01")) { - ResourceSectionRef RSF(Ref); - auto &BaseTable = unwrapOrError(RSF.getBaseTable()); + ResourceSectionRef RSF; + Error E = RSF.load(Obj, S); + if (E) + reportError(std::move(E), Obj->getFileName()); + auto &BaseTable = unwrapOrError(Obj->getFileName(), RSF.getBaseTable()); W.printNumber("Total Number of Resources", countTotalTableEntries(RSF, BaseTable, "Type")); W.printHex("Base Table Address", @@ -1729,14 +1790,15 @@ COFFDumper::countTotalTableEntries(ResourceSectionRef RSF, uint32_t TotalEntries = 0; for (int i = 0; i < Table.NumberOfNameEntries + Table.NumberOfIDEntries; i++) { - auto Entry = unwrapOrError(getResourceDirectoryTableEntry(Table, i)); + auto Entry = unwrapOrError(Obj->getFileName(), RSF.getTableEntry(Table, i)); if (Entry.Offset.isSubDir()) { StringRef NextLevel; if (Level == "Name") NextLevel = "Language"; else NextLevel = "Name"; - auto &NextTable = unwrapOrError(RSF.getEntrySubDir(Entry)); + auto &NextTable = + unwrapOrError(Obj->getFileName(), RSF.getEntrySubDir(Entry)); TotalEntries += countTotalTableEntries(RSF, NextTable, NextLevel); } else { TotalEntries += 1; @@ -1755,13 +1817,13 @@ void COFFDumper::printResourceDirectoryTable( // Iterate through level in resource directory tree. for (int i = 0; i < Table.NumberOfNameEntries + Table.NumberOfIDEntries; i++) { - auto Entry = unwrapOrError(getResourceDirectoryTableEntry(Table, i)); + auto Entry = unwrapOrError(Obj->getFileName(), RSF.getTableEntry(Table, i)); StringRef Name; SmallString<20> IDStr; raw_svector_ostream OS(IDStr); if (i < Table.NumberOfNameEntries) { ArrayRef RawEntryNameString = - unwrapOrError(RSF.getEntryNameString(Entry)); + unwrapOrError(Obj->getFileName(), RSF.getEntryNameString(Entry)); std::vector EndianCorrectedNameString; if (llvm::sys::IsBigEndianHost) { EndianCorrectedNameString.resize(RawEntryNameString.size() + 1); @@ -1772,14 +1834,14 @@ void COFFDumper::printResourceDirectoryTable( } std::string EntryNameString; if (!llvm::convertUTF16ToUTF8String(RawEntryNameString, EntryNameString)) - error(object_error::parse_failed); + reportError(errorCodeToError(object_error::parse_failed), + Obj->getFileName()); OS << ": "; OS << EntryNameString; } else { if (Level == "Type") { OS << ": "; printResourceTypeName(Entry.Identifier.ID, OS); - IDStr = IDStr.slice(0, IDStr.find_first_of(")", 0) + 1); } else { OS << ": (ID " << Entry.Identifier.ID << ")"; } @@ -1793,7 +1855,8 @@ void COFFDumper::printResourceDirectoryTable( NextLevel = "Language"; else NextLevel = "Name"; - auto &NextTable = unwrapOrError(RSF.getEntrySubDir(Entry)); + auto &NextTable = + unwrapOrError(Obj->getFileName(), RSF.getEntrySubDir(Entry)); printResourceDirectoryTable(RSF, NextTable, NextLevel); } else { W.printHex("Entry Offset", Entry.Offset.value()); @@ -1804,24 +1867,29 @@ void COFFDumper::printResourceDirectoryTable( W.printNumber("Major Version", Table.MajorVersion); W.printNumber("Minor Version", Table.MinorVersion); W.printNumber("Characteristics", Table.Characteristics); + ListScope DataScope(W, "Data"); + auto &DataEntry = + unwrapOrError(Obj->getFileName(), RSF.getEntryData(Entry)); + W.printHex("DataRVA", DataEntry.DataRVA); + W.printNumber("DataSize", DataEntry.DataSize); + W.printNumber("Codepage", DataEntry.Codepage); + W.printNumber("Reserved", DataEntry.Reserved); + StringRef Contents = + unwrapOrError(Obj->getFileName(), RSF.getContents(DataEntry)); + W.printBinaryBlock("Data", Contents); } } } -ErrorOr -COFFDumper::getResourceDirectoryTableEntry(const coff_resource_dir_table &Table, - uint32_t Index) { - if (Index >= (uint32_t)(Table.NumberOfNameEntries + Table.NumberOfIDEntries)) - return object_error::parse_failed; - auto TablePtr = reinterpret_cast(&Table + 1); - return TablePtr[Index]; -} - void COFFDumper::printStackMap() const { object::SectionRef StackMapSection; for (auto Sec : Obj->sections()) { StringRef Name; - Sec.getName(Name); + if (Expected NameOrErr = Sec.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + if (Name == ".llvm_stackmaps") { StackMapSection = Sec; break; @@ -1831,7 +1899,8 @@ void COFFDumper::printStackMap() const { if (StackMapSection == object::SectionRef()) return; - StringRef StackMapContents = unwrapOrError(StackMapSection.getContents()); + StringRef StackMapContents = + unwrapOrError(Obj->getFileName(), StackMapSection.getContents()); ArrayRef StackMapContentsArray = arrayRefFromStringRef(StackMapContents); @@ -1847,7 +1916,11 @@ void COFFDumper::printAddrsig() { object::SectionRef AddrsigSection; for (auto Sec : Obj->sections()) { StringRef Name; - Sec.getName(Name); + if (Expected NameOrErr = Sec.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + if (Name == ".llvm_addrsig") { AddrsigSection = Sec; break; @@ -1857,7 +1930,8 @@ void COFFDumper::printAddrsig() { if (AddrsigSection == object::SectionRef()) return; - StringRef AddrsigContents = unwrapOrError(AddrsigSection.getContents()); + StringRef AddrsigContents = + unwrapOrError(Obj->getFileName(), AddrsigSection.getContents()); ArrayRef AddrsigContentsArray(AddrsigContents.bytes_begin(), AddrsigContents.size()); @@ -1869,15 +1943,15 @@ void COFFDumper::printAddrsig() { const char *Err; uint64_t SymIndex = decodeULEB128(Cur, &Size, End, &Err); if (Err) - reportError(Err); + reportError(createError(Err), Obj->getFileName()); Expected Sym = Obj->getSymbol(SymIndex); + if (!Sym) + reportError(Sym.takeError(), Obj->getFileName()); + StringRef SymName; - std::error_code EC = errorToErrorCode(Sym.takeError()); - if (EC || (EC = Obj->getSymbolName(*Sym, SymName))) { - SymName = ""; - error(EC); - } + if (std::error_code EC = Obj->getSymbolName(*Sym, SymName)) + reportError(errorCodeToError(EC), Obj->getFileName()); W.printNumber("Sym", SymName, SymIndex); Cur += Size; @@ -1891,7 +1965,8 @@ void llvm::dumpCodeViewMergedTypes(ScopedPrinter &Writer, { ListScope S(Writer, "MergedTypeStream"); TypeDumpVisitor TDV(TpiTypes, &Writer, opts::CodeViewSubsectionBytes); - error(codeview::visitTypeStream(TpiTypes, TDV)); + if (Error Err = codeview::visitTypeStream(TpiTypes, TDV)) + reportError(std::move(Err), ""); Writer.flush(); } @@ -1902,7 +1977,8 @@ void llvm::dumpCodeViewMergedTypes(ScopedPrinter &Writer, ListScope S(Writer, "MergedIDStream"); TypeDumpVisitor TDV(TpiTypes, &Writer, opts::CodeViewSubsectionBytes); TDV.setIpiTypes(IpiTypes); - error(codeview::visitTypeStream(IpiTypes, TDV)); + if (Error Err = codeview::visitTypeStream(IpiTypes, TDV)) + reportError(std::move(Err), ""); Writer.flush(); } } diff --git a/tools/llvm-readobj/DwarfCFIEHPrinter.h b/tools/llvm-readobj/DwarfCFIEHPrinter.h index 7055510ef2f..0a365d4fe72 100644 --- a/tools/llvm-readobj/DwarfCFIEHPrinter.h +++ b/tools/llvm-readobj/DwarfCFIEHPrinter.h @@ -44,12 +44,12 @@ public: void printUnwindInformation() const; }; -template -static const typename ELFO::Elf_Shdr *findSectionByAddress(const ELFO *Obj, - uint64_t Addr) { - auto Sections = Obj->sections(); +template +static const typename object::ELFObjectFile::Elf_Shdr * +findSectionByAddress(const object::ELFObjectFile *ObjF, uint64_t Addr) { + auto Sections = ObjF->getELFFile()->sections(); if (Error E = Sections.takeError()) - reportError(toString(std::move(E))); + reportError(std::move(E), ObjF->getFileName()); for (const auto &Shdr : *Sections) if (Shdr.sh_addr == Addr) @@ -64,13 +64,15 @@ void PrinterContext::printUnwindInformation() const { auto PHs = Obj->program_headers(); if (Error E = PHs.takeError()) - reportError(toString(std::move(E))); + reportError(std::move(E), ObjF->getFileName()); for (const auto &Phdr : *PHs) { if (Phdr.p_type == ELF::PT_GNU_EH_FRAME) { EHFramePhdr = &Phdr; if (Phdr.p_memsz != Phdr.p_filesz) - reportError("p_memsz does not match p_filesz for GNU_EH_FRAME"); + reportError(object::createError( + "p_memsz does not match p_filesz for GNU_EH_FRAME"), + ObjF->getFileName()); break; } } @@ -81,12 +83,12 @@ void PrinterContext::printUnwindInformation() const { auto Sections = Obj->sections(); if (Error E = Sections.takeError()) - reportError(toString(std::move(E))); + reportError(std::move(E), ObjF->getFileName()); for (const auto &Shdr : *Sections) { auto SectionName = Obj->getSectionName(&Shdr); if (Error E = SectionName.takeError()) - reportError(toString(std::move(E))); + reportError(std::move(E), ObjF->getFileName()); if (*SectionName == ".eh_frame") printEHFrame(&Shdr); @@ -97,49 +99,52 @@ template void PrinterContext::printEHFrameHdr(uint64_t EHFrameHdrOffset, uint64_t EHFrameHdrAddress, uint64_t EHFrameHdrSize) const { - ListScope L(W, "EH_FRAME Header"); + DictScope L(W, "EHFrameHeader"); W.startLine() << format("Address: 0x%" PRIx64 "\n", EHFrameHdrAddress); W.startLine() << format("Offset: 0x%" PRIx64 "\n", EHFrameHdrOffset); W.startLine() << format("Size: 0x%" PRIx64 "\n", EHFrameHdrSize); const object::ELFFile *Obj = ObjF->getELFFile(); - const auto *EHFrameHdrShdr = findSectionByAddress(Obj, EHFrameHdrAddress); + const auto *EHFrameHdrShdr = findSectionByAddress(ObjF, EHFrameHdrAddress); if (EHFrameHdrShdr) { auto SectionName = Obj->getSectionName(EHFrameHdrShdr); if (Error E = SectionName.takeError()) - reportError(toString(std::move(E))); + reportError(std::move(E), ObjF->getFileName()); W.printString("Corresponding Section", *SectionName); } - DataExtractor DE( - StringRef(reinterpret_cast(Obj->base()) + EHFrameHdrOffset, - EHFrameHdrSize), - ELFT::TargetEndianness == support::endianness::little, - ELFT::Is64Bits ? 8 : 4); + DataExtractor DE(makeArrayRef(Obj->base() + EHFrameHdrOffset, EHFrameHdrSize), + ELFT::TargetEndianness == support::endianness::little, + ELFT::Is64Bits ? 8 : 4); DictScope D(W, "Header"); - uint32_t Offset = 0; + uint64_t Offset = 0; auto Version = DE.getU8(&Offset); W.printNumber("version", Version); if (Version != 1) - reportError("only version 1 of .eh_frame_hdr is supported"); + reportError( + object::createError("only version 1 of .eh_frame_hdr is supported"), + ObjF->getFileName()); uint64_t EHFramePtrEnc = DE.getU8(&Offset); W.startLine() << format("eh_frame_ptr_enc: 0x%" PRIx64 "\n", EHFramePtrEnc); if (EHFramePtrEnc != (dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4)) - reportError("unexpected encoding eh_frame_ptr_enc"); + reportError(object::createError("unexpected encoding eh_frame_ptr_enc"), + ObjF->getFileName()); uint64_t FDECountEnc = DE.getU8(&Offset); W.startLine() << format("fde_count_enc: 0x%" PRIx64 "\n", FDECountEnc); if (FDECountEnc != dwarf::DW_EH_PE_udata4) - reportError("unexpected encoding fde_count_enc"); + reportError(object::createError("unexpected encoding fde_count_enc"), + ObjF->getFileName()); uint64_t TableEnc = DE.getU8(&Offset); W.startLine() << format("table_enc: 0x%" PRIx64 "\n", TableEnc); if (TableEnc != (dwarf::DW_EH_PE_datarel | dwarf::DW_EH_PE_sdata4)) - reportError("unexpected encoding table_enc"); + reportError(object::createError("unexpected encoding table_enc"), + ObjF->getFileName()); auto EHFramePtr = DE.getSigned(&Offset, 4) + EHFrameHdrAddress + 4; W.startLine() << format("eh_frame_ptr: 0x%" PRIx64 "\n", EHFramePtr); @@ -158,7 +163,8 @@ void PrinterContext::printEHFrameHdr(uint64_t EHFrameHdrOffset, W.startLine() << format("address: 0x%" PRIx64 "\n", Address); if (InitialPC < PrevPC) - reportError("initial_location is out of order"); + reportError(object::createError("initial_location is out of order"), + ObjF->getFileName()); PrevPC = InitialPC; ++NumEntries; @@ -178,7 +184,7 @@ void PrinterContext::printEHFrame( const object::ELFFile *Obj = ObjF->getELFFile(); auto Result = Obj->getSectionContents(EHFrameShdr); if (Error E = Result.takeError()) - reportError(toString(std::move(E))); + reportError(std::move(E), ObjF->getFileName()); auto Contents = Result.get(); DWARFDataExtractor DE( diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp index 4e1cb7d544e..57144882c4b 100644 --- a/tools/llvm-readobj/ELFDumper.cpp +++ b/tools/llvm-readobj/ELFDumper.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" @@ -36,6 +37,7 @@ #include "llvm/Object/ELFTypes.h" #include "llvm/Object/Error.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Object/RelocationResolver.h" #include "llvm/Object/StackMapParser.h" #include "llvm/Support/AMDGPUMetadata.h" #include "llvm/Support/ARMAttributeParser.h" @@ -61,6 +63,7 @@ #include #include #include +#include #include using namespace llvm; @@ -119,9 +122,9 @@ template class DumpStyle; /// the size, entity size and virtual address are different entries in arbitrary /// order (DT_REL, DT_RELSZ, DT_RELENT for example). struct DynRegionInfo { - DynRegionInfo() = default; - DynRegionInfo(const void *A, uint64_t S, uint64_t ES) - : Addr(A), Size(S), EntSize(ES) {} + DynRegionInfo(StringRef ObjName) : FileName(ObjName) {} + DynRegionInfo(const void *A, uint64_t S, uint64_t ES, StringRef ObjName) + : Addr(A), Size(S), EntSize(ES), FileName(ObjName) {} /// Address in current address space. const void *Addr = nullptr; @@ -130,14 +133,18 @@ struct DynRegionInfo { /// Size of each entity in the region. uint64_t EntSize = 0; + /// Name of the file. Used for error reporting. + StringRef FileName; + template ArrayRef getAsArrayRef() const { const Type *Start = reinterpret_cast(Addr); if (!Start) return {Start, Start}; if (EntSize != sizeof(Type) || Size % EntSize) { // TODO: Add a section index to this warning. - reportWarning("invalid section size (" + Twine(Size) + - ") or entity size (" + Twine(EntSize) + ")"); + reportWarning(createError("invalid section size (" + Twine(Size) + + ") or entity size (" + Twine(EntSize) + ")"), + FileName); return {Start, Start}; } return {Start, Start + (Size / EntSize)}; @@ -166,11 +173,7 @@ public: void printVersionInfo() override; void printGroupSections() override; - void printAttributes() override; - void printMipsPLTGOT() override; - void printMipsABIFlags() override; - void printMipsReginfo() override; - void printMipsOptions() override; + void printArchSpecificInfo() override; void printStackMap() const override; @@ -182,6 +185,7 @@ public: void printNotes() override; void printELFLinkerOptions() override; + void printStackSizes() override; const object::ELFObjectFile *getElfObject() const { return ObjF; }; @@ -195,20 +199,27 @@ private: if (DRI.Addr < Obj->base() || reinterpret_cast(DRI.Addr) + DRI.Size > Obj->base() + Obj->getBufSize()) - error(llvm::object::object_error::parse_failed); + reportError(errorCodeToError(llvm::object::object_error::parse_failed), + ObjF->getFileName()); return DRI; } DynRegionInfo createDRIFrom(const Elf_Phdr *P, uintX_t EntSize) { - return checkDRI( - {ObjF->getELFFile()->base() + P->p_offset, P->p_filesz, EntSize}); + return checkDRI({ObjF->getELFFile()->base() + P->p_offset, P->p_filesz, + EntSize, ObjF->getFileName()}); } DynRegionInfo createDRIFrom(const Elf_Shdr *S) { - return checkDRI( - {ObjF->getELFFile()->base() + S->sh_offset, S->sh_size, S->sh_entsize}); + return checkDRI({ObjF->getELFFile()->base() + S->sh_offset, S->sh_size, + S->sh_entsize, ObjF->getFileName()}); } + void printAttributes(); + void printMipsReginfo(); + void printMipsOptions(); + + std::pair + findDynamic(const ELFFile *Obj); void loadDynamicTable(const ELFFile *Obj); void parseDynamicTable(); @@ -226,7 +237,7 @@ private: DynRegionInfo DynSymRegion; DynRegionInfo DynamicTable; StringRef DynamicStringTable; - StringRef SOName = ""; + std::string SOName = ""; const Elf_Hash *HashTable = nullptr; const Elf_GnuHash *GnuHashTable = nullptr; const Elf_Shdr *DotSymtabSec = nullptr; @@ -291,7 +302,8 @@ public: void getSectionNameIndex(const Elf_Sym *Symbol, const Elf_Sym *FirstSym, StringRef &SectionName, unsigned &SectionIndex) const; - std::string getStaticSymbolName(uint32_t Index) const; + Expected getStaticSymbolName(uint32_t Index) const; + std::string getDynamicString(uint64_t Value) const; StringRef getSymbolVersionByIndex(StringRef StrTab, uint32_t VersionSymbolIndex, bool &IsDefault) const; @@ -328,16 +340,27 @@ void ELFDumper::printSymbolsHelper(bool IsDynamic) const { } else { if (!DotSymtabSec) return; - StrTable = unwrapOrError(Obj->getStringTableForSymtab(*DotSymtabSec)); - Syms = unwrapOrError(Obj->symbols(DotSymtabSec)); - SymtabName = unwrapOrError(Obj->getSectionName(DotSymtabSec)); + StrTable = unwrapOrError(ObjF->getFileName(), + Obj->getStringTableForSymtab(*DotSymtabSec)); + Syms = unwrapOrError(ObjF->getFileName(), Obj->symbols(DotSymtabSec)); + SymtabName = + unwrapOrError(ObjF->getFileName(), Obj->getSectionName(DotSymtabSec)); Entries = DotSymtabSec->getEntityCount(); } if (Syms.begin() == Syms.end()) return; - ELFDumperStyle->printSymtabMessage(Obj, SymtabName, Entries); + + // The st_other field has 2 logical parts. The first two bits hold the symbol + // visibility (STV_*) and the remainder hold other platform-specific values. + bool NonVisibilityBitsUsed = llvm::find_if(Syms, [](const Elf_Sym &S) { + return S.st_other & ~0x3; + }) != Syms.end(); + + ELFDumperStyle->printSymtabMessage(Obj, SymtabName, Entries, + NonVisibilityBitsUsed); for (const auto &Sym : Syms) - ELFDumperStyle->printSymbol(Obj, &Sym, Syms.begin(), StrTable, IsDynamic); + ELFDumperStyle->printSymbol(Obj, &Sym, Syms.begin(), StrTable, IsDynamic, + NonVisibilityBitsUsed); } template class MipsGOTParser; @@ -346,8 +369,20 @@ template class DumpStyle { public: using Elf_Shdr = typename ELFT::Shdr; using Elf_Sym = typename ELFT::Sym; + using Elf_Addr = typename ELFT::Addr; + + DumpStyle(ELFDumper *Dumper) : Dumper(Dumper) { + FileName = this->Dumper->getElfObject()->getFileName(); + + // Dumper reports all non-critical errors as warnings. + // It does not print the same warning more than once. + WarningHandler = [this](const Twine &Msg) { + if (Warnings.insert(Msg.str()).second) + reportWarning(createError(Msg), FileName); + return Error::success(); + }; + } - DumpStyle(ELFDumper *Dumper) : Dumper(Dumper) {} virtual ~DumpStyle() = default; virtual void printFileHeaders(const ELFFile *Obj) = 0; @@ -360,10 +395,10 @@ public: virtual void printDynamic(const ELFFile *Obj) {} virtual void printDynamicRelocations(const ELFFile *Obj) = 0; virtual void printSymtabMessage(const ELFFile *Obj, StringRef Name, - size_t Offset) {} + size_t Offset, bool NonVisibilityBitsUsed) {} virtual void printSymbol(const ELFFile *Obj, const Elf_Sym *Symbol, const Elf_Sym *FirstSym, StringRef StrTable, - bool IsDynamic) = 0; + bool IsDynamic, bool NonVisibilityBitsUsed) = 0; virtual void printProgramHeaders(const ELFFile *Obj, bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) = 0; @@ -378,11 +413,31 @@ public: virtual void printAddrsig(const ELFFile *Obj) = 0; virtual void printNotes(const ELFFile *Obj) = 0; virtual void printELFLinkerOptions(const ELFFile *Obj) = 0; + virtual void printStackSizes(const ELFObjectFile *Obj) = 0; + void printNonRelocatableStackSizes(const ELFObjectFile *Obj, + std::function PrintHeader); + void printRelocatableStackSizes(const ELFObjectFile *Obj, + std::function PrintHeader); + void printFunctionStackSize(const ELFObjectFile *Obj, uint64_t SymValue, + SectionRef FunctionSec, + const StringRef SectionName, DataExtractor Data, + uint64_t *Offset); + void printStackSize(const ELFObjectFile *Obj, RelocationRef Rel, + SectionRef FunctionSec, + const StringRef &StackSizeSectionName, + const RelocationResolver &Resolver, DataExtractor Data); + virtual void printStackSizeEntry(uint64_t Size, StringRef FuncName) = 0; virtual void printMipsGOT(const MipsGOTParser &Parser) = 0; virtual void printMipsPLT(const MipsGOTParser &Parser) = 0; + virtual void printMipsABIFlags(const ELFObjectFile *Obj) = 0; const ELFDumper *dumper() const { return Dumper; } +protected: + std::function WarningHandler; + StringRef FileName; + private: + std::unordered_set Warnings; const ELFDumper *Dumper; }; @@ -407,8 +462,8 @@ public: void printHashSymbols(const ELFO *Obj) override; void printDynamic(const ELFFile *Obj) override; void printDynamicRelocations(const ELFO *Obj) override; - void printSymtabMessage(const ELFO *Obj, StringRef Name, - size_t Offset) override; + void printSymtabMessage(const ELFO *Obj, StringRef Name, size_t Offset, + bool NonVisibilityBitsUsed) override; void printProgramHeaders(const ELFO *Obj, bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) override; void printVersionSymbolSection(const ELFFile *Obj, @@ -422,8 +477,11 @@ public: void printAddrsig(const ELFFile *Obj) override; void printNotes(const ELFFile *Obj) override; void printELFLinkerOptions(const ELFFile *Obj) override; + void printStackSizes(const ELFObjectFile *Obj) override; + void printStackSizeEntry(uint64_t Size, StringRef FuncName) override; void printMipsGOT(const MipsGOTParser &Parser) override; void printMipsPLT(const MipsGOTParser &Parser) override; + void printMipsABIFlags(const ELFObjectFile *Obj) override; private: struct Field { @@ -484,7 +542,8 @@ private: void printRelocation(const ELFO *Obj, const Elf_Sym *Sym, StringRef SymbolName, const Elf_Rela &R, bool IsRela); void printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First, - StringRef StrTable, bool IsDynamic) override; + StringRef StrTable, bool IsDynamic, + bool NonVisibilityBitsUsed) override; std::string getSymbolSectionNdx(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *FirstSym); void printDynamicRelocation(const ELFO *Obj, Elf_Rela R, bool IsRela); @@ -525,8 +584,11 @@ public: void printAddrsig(const ELFFile *Obj) override; void printNotes(const ELFFile *Obj) override; void printELFLinkerOptions(const ELFFile *Obj) override; + void printStackSizes(const ELFObjectFile *Obj) override; + void printStackSizeEntry(uint64_t Size, StringRef FuncName) override; void printMipsGOT(const MipsGOTParser &Parser) override; void printMipsPLT(const MipsGOTParser &Parser) override; + void printMipsABIFlags(const ELFObjectFile *Obj) override; private: void printRelocation(const ELFO *Obj, Elf_Rela Rel, const Elf_Shdr *SymTab); @@ -534,7 +596,8 @@ private: void printSymbols(const ELFO *Obj); void printDynamicSymbols(const ELFO *Obj); void printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First, - StringRef StrTable, bool IsDynamic) override; + StringRef StrTable, bool IsDynamic, + bool /*NonVisibilityBitsUsed*/) override; void printProgramHeaders(const ELFO *Obj); void printSectionMapping(const ELFO *Obj) {} @@ -680,9 +743,9 @@ StringRef ELFDumper::getSymbolVersion(StringRef StrTab, sizeof(Elf_Sym); // Get the corresponding version index entry. - const Elf_Versym *Versym = - unwrapOrError(ObjF->getELFFile()->template getEntry( - SymbolVersionSection, EntryIndex)); + const Elf_Versym *Versym = unwrapOrError( + ObjF->getFileName(), ObjF->getELFFile()->template getEntry( + SymbolVersionSection, EntryIndex)); return this->getSymbolVersionByIndex(StrTab, Versym->vs_index, IsDefault); } @@ -691,15 +754,22 @@ static std::string maybeDemangle(StringRef Name) { } template -std::string ELFDumper::getStaticSymbolName(uint32_t Index) const { +Expected +ELFDumper::getStaticSymbolName(uint32_t Index) const { const ELFFile *Obj = ObjF->getELFFile(); - StringRef StrTable = - unwrapOrError(Obj->getStringTableForSymtab(*DotSymtabSec)); - Elf_Sym_Range Syms = unwrapOrError(Obj->symbols(DotSymtabSec)); - if (Index >= Syms.size()) - reportError("Invalid symbol index"); - const Elf_Sym *Sym = &Syms[Index]; - return maybeDemangle(unwrapOrError(Sym->getName(StrTable))); + Expected SymOrErr = + Obj->getSymbol(DotSymtabSec, Index); + if (!SymOrErr) + return SymOrErr.takeError(); + + Expected StrTabOrErr = Obj->getStringTableForSymtab(*DotSymtabSec); + if (!StrTabOrErr) + return StrTabOrErr.takeError(); + + Expected NameOrErr = (*SymOrErr)->getName(*StrTabOrErr); + if (!NameOrErr) + return NameOrErr.takeError(); + return maybeDemangle(*NameOrErr); } template @@ -717,7 +787,7 @@ StringRef ELFDumper::getSymbolVersionByIndex(StringRef StrTab, // Lookup this symbol in the version table. LoadVersionMap(); if (VersionIndex >= VersionMap.size() || VersionMap[VersionIndex].isNull()) - reportError("Invalid version entry"); + reportError(createError("Invalid version entry"), ObjF->getFileName()); const VersionMapEntry &Entry = VersionMap[VersionIndex]; // Get the version name string. @@ -731,7 +801,7 @@ StringRef ELFDumper::getSymbolVersionByIndex(StringRef StrTab, IsDefault = false; } if (NameOffset >= StrTab.size()) - reportError("Invalid string offset"); + reportError(createError("Invalid string offset"), ObjF->getFileName()); return StrTab.data() + NameOffset; } @@ -739,14 +809,14 @@ template std::string ELFDumper::getFullSymbolName(const Elf_Sym *Symbol, StringRef StrTable, bool IsDynamic) const { - std::string SymbolName = - maybeDemangle(unwrapOrError(Symbol->getName(StrTable))); + std::string SymbolName = maybeDemangle( + unwrapOrError(ObjF->getFileName(), Symbol->getName(StrTable))); if (SymbolName.empty() && Symbol->getType() == ELF::STT_SECTION) { unsigned SectionIndex; StringRef SectionName; - Elf_Sym_Range Syms = - unwrapOrError(ObjF->getELFFile()->symbols(DotSymtabSec)); + Elf_Sym_Range Syms = unwrapOrError( + ObjF->getFileName(), ObjF->getELFFile()->symbols(DotSymtabSec)); getSectionNameIndex(Symbol, Syms.begin(), SectionName, SectionIndex); return SectionName; } @@ -783,31 +853,32 @@ void ELFDumper::getSectionNameIndex(const Elf_Sym *Symbol, SectionName = "Reserved"; else { if (SectionIndex == SHN_XINDEX) - SectionIndex = unwrapOrError(object::getExtendedSymbolTableIndex( - Symbol, FirstSym, ShndxTable)); + SectionIndex = unwrapOrError(ObjF->getFileName(), + object::getExtendedSymbolTableIndex( + Symbol, FirstSym, ShndxTable)); const ELFFile *Obj = ObjF->getELFFile(); const typename ELFT::Shdr *Sec = - unwrapOrError(Obj->getSection(SectionIndex)); - SectionName = unwrapOrError(Obj->getSectionName(Sec)); + unwrapOrError(ObjF->getFileName(), Obj->getSection(SectionIndex)); + SectionName = unwrapOrError(ObjF->getFileName(), Obj->getSectionName(Sec)); } } template static const typename ELFO::Elf_Shdr * -findNotEmptySectionByAddress(const ELFO *Obj, uint64_t Addr) { - for (const auto &Shdr : unwrapOrError(Obj->sections())) +findNotEmptySectionByAddress(const ELFO *Obj, StringRef FileName, + uint64_t Addr) { + for (const auto &Shdr : unwrapOrError(FileName, Obj->sections())) if (Shdr.sh_addr == Addr && Shdr.sh_size > 0) return &Shdr; return nullptr; } template -static const typename ELFO::Elf_Shdr *findSectionByName(const ELFO &Obj, - StringRef Name) { - for (const auto &Shdr : unwrapOrError(Obj.sections())) { - if (Name == unwrapOrError(Obj.getSectionName(&Shdr))) +static const typename ELFO::Elf_Shdr * +findSectionByName(const ELFO &Obj, StringRef FileName, StringRef Name) { + for (const auto &Shdr : unwrapOrError(FileName, Obj.sections())) + if (Name == unwrapOrError(FileName, Obj.getSectionName(&Shdr))) return &Shdr; - } return nullptr; } @@ -1356,10 +1427,12 @@ static const char *getElfMipsOptionsOdkType(unsigned Odk) { } template -void ELFDumper::loadDynamicTable(const ELFFile *Obj) { +std::pair +ELFDumper::findDynamic(const ELFFile *Obj) { // Try to locate the PT_DYNAMIC header. const Elf_Phdr *DynamicPhdr = nullptr; - for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) { + for (const Elf_Phdr &Phdr : + unwrapOrError(ObjF->getFileName(), Obj->program_headers())) { if (Phdr.p_type != ELF::PT_DYNAMIC) continue; DynamicPhdr = &Phdr; @@ -1368,61 +1441,132 @@ void ELFDumper::loadDynamicTable(const ELFFile *Obj) { // Try to locate the .dynamic section in the sections header table. const Elf_Shdr *DynamicSec = nullptr; - for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) { + for (const Elf_Shdr &Sec : + unwrapOrError(ObjF->getFileName(), Obj->sections())) { if (Sec.sh_type != ELF::SHT_DYNAMIC) continue; DynamicSec = &Sec; break; } - // Information in the section header has priority over the information - // in a PT_DYNAMIC header. + if (DynamicPhdr && DynamicPhdr->p_offset + DynamicPhdr->p_filesz > + ObjF->getMemoryBufferRef().getBufferSize()) { + reportWarning( + createError( + "PT_DYNAMIC segment offset + size exceeds the size of the file"), + ObjF->getFileName()); + // Don't use the broken dynamic header. + DynamicPhdr = nullptr; + } + + if (DynamicPhdr && DynamicSec) { + StringRef Name = + unwrapOrError(ObjF->getFileName(), Obj->getSectionName(DynamicSec)); + if (DynamicSec->sh_addr + DynamicSec->sh_size > + DynamicPhdr->p_vaddr + DynamicPhdr->p_memsz || + DynamicSec->sh_addr < DynamicPhdr->p_vaddr) + reportWarning(createError("The SHT_DYNAMIC section '" + Name + + "' is not contained within the " + "PT_DYNAMIC segment"), + ObjF->getFileName()); + + if (DynamicSec->sh_addr != DynamicPhdr->p_vaddr) + reportWarning(createError("The SHT_DYNAMIC section '" + Name + + "' is not at the start of " + "PT_DYNAMIC segment"), + ObjF->getFileName()); + } + + return std::make_pair(DynamicPhdr, DynamicSec); +} + +template +void ELFDumper::loadDynamicTable(const ELFFile *Obj) { + const Elf_Phdr *DynamicPhdr; + const Elf_Shdr *DynamicSec; + std::tie(DynamicPhdr, DynamicSec) = findDynamic(Obj); + if (!DynamicPhdr && !DynamicSec) + return; + + DynRegionInfo FromPhdr(ObjF->getFileName()); + bool IsPhdrTableValid = false; + if (DynamicPhdr) { + FromPhdr = createDRIFrom(DynamicPhdr, sizeof(Elf_Dyn)); + IsPhdrTableValid = !FromPhdr.getAsArrayRef().empty(); + } + + // Locate the dynamic table described in a section header. // Ignore sh_entsize and use the expected value for entry size explicitly. - // This allows us to dump the dynamic sections with a broken sh_entsize + // This allows us to dump dynamic sections with a broken sh_entsize // field. + DynRegionInfo FromSec(ObjF->getFileName()); + bool IsSecTableValid = false; if (DynamicSec) { - DynamicTable = checkDRI({ObjF->getELFFile()->base() + DynamicSec->sh_offset, - DynamicSec->sh_size, sizeof(Elf_Dyn)}); - parseDynamicTable(); + FromSec = + checkDRI({ObjF->getELFFile()->base() + DynamicSec->sh_offset, + DynamicSec->sh_size, sizeof(Elf_Dyn), ObjF->getFileName()}); + IsSecTableValid = !FromSec.getAsArrayRef().empty(); } - // If we have a PT_DYNAMIC header, we will either check the found dynamic - // section or take the dynamic table data directly from the header. - if (!DynamicPhdr) - return; - - if (DynamicPhdr->p_offset + DynamicPhdr->p_filesz > - ObjF->getMemoryBufferRef().getBufferSize()) - reportError( - "PT_DYNAMIC segment offset + size exceeds the size of the file"); - - if (!DynamicSec) { - DynamicTable = createDRIFrom(DynamicPhdr, sizeof(Elf_Dyn)); - parseDynamicTable(); + // When we only have information from one of the SHT_DYNAMIC section header or + // PT_DYNAMIC program header, just use that. + if (!DynamicPhdr || !DynamicSec) { + if ((DynamicPhdr && IsPhdrTableValid) || (DynamicSec && IsSecTableValid)) { + DynamicTable = DynamicPhdr ? FromPhdr : FromSec; + parseDynamicTable(); + } else { + reportWarning(createError("no valid dynamic table was found"), + ObjF->getFileName()); + } return; } - StringRef Name = unwrapOrError(Obj->getSectionName(DynamicSec)); - if (DynamicSec->sh_addr + DynamicSec->sh_size > - DynamicPhdr->p_vaddr + DynamicPhdr->p_memsz || - DynamicSec->sh_addr < DynamicPhdr->p_vaddr) - reportWarning("The SHT_DYNAMIC section '" + Name + - "' is not contained within the " - "PT_DYNAMIC segment"); + // At this point we have tables found from the section header and from the + // dynamic segment. Usually they match, but we have to do sanity checks to + // verify that. - if (DynamicSec->sh_addr != DynamicPhdr->p_vaddr) - reportWarning("The SHT_DYNAMIC section '" + Name + - "' is not at the start of " - "PT_DYNAMIC segment"); + if (FromPhdr.Addr != FromSec.Addr) + reportWarning(createError("SHT_DYNAMIC section header and PT_DYNAMIC " + "program header disagree about " + "the location of the dynamic table"), + ObjF->getFileName()); + + if (!IsPhdrTableValid && !IsSecTableValid) { + reportWarning(createError("no valid dynamic table was found"), + ObjF->getFileName()); + return; + } + + // Information in the PT_DYNAMIC program header has priority over the information + // in a section header. + if (IsPhdrTableValid) { + if (!IsSecTableValid) + reportWarning( + createError( + "SHT_DYNAMIC dynamic table is invalid: PT_DYNAMIC will be used"), + ObjF->getFileName()); + DynamicTable = FromPhdr; + } else { + reportWarning( + createError( + "PT_DYNAMIC dynamic table is invalid: SHT_DYNAMIC will be used"), + ObjF->getFileName()); + DynamicTable = FromSec; + } + + parseDynamicTable(); } template ELFDumper::ELFDumper(const object::ELFObjectFile *ObjF, - ScopedPrinter &Writer) - : ObjDumper(Writer), ObjF(ObjF) { + ScopedPrinter &Writer) + : ObjDumper(Writer), ObjF(ObjF), DynRelRegion(ObjF->getFileName()), + DynRelaRegion(ObjF->getFileName()), DynRelrRegion(ObjF->getFileName()), + DynPLTRelRegion(ObjF->getFileName()), DynSymRegion(ObjF->getFileName()), + DynamicTable(ObjF->getFileName()) { const ELFFile *Obj = ObjF->getELFFile(); - - for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) { + for (const Elf_Shdr &Sec : + unwrapOrError(ObjF->getFileName(), Obj->sections())) { switch (Sec.sh_type) { case ELF::SHT_SYMTAB: if (!DotSymtabSec) @@ -1433,16 +1577,17 @@ ELFDumper::ELFDumper(const object::ELFObjectFile *ObjF, DynSymRegion = createDRIFrom(&Sec); // This is only used (if Elf_Shdr present)for naming section in GNU // style - DynSymtabName = unwrapOrError(Obj->getSectionName(&Sec)); + DynSymtabName = + unwrapOrError(ObjF->getFileName(), Obj->getSectionName(&Sec)); if (Expected E = Obj->getStringTableForSymtab(Sec)) DynamicStringTable = *E; else - warn(E.takeError()); + reportWarning(E.takeError(), ObjF->getFileName()); } break; case ELF::SHT_SYMTAB_SHNDX: - ShndxTable = unwrapOrError(Obj->getSHNDXTable(Sec)); + ShndxTable = unwrapOrError(ObjF->getFileName(), Obj->getSHNDXTable(Sec)); break; case ELF::SHT_GNU_versym: if (!SymbolVersionSection) @@ -1547,10 +1692,13 @@ template void ELFDumper::parseDynamicTable() { auto toMappedAddr = [&](uint64_t Tag, uint64_t VAddr) -> const uint8_t * { auto MappedAddrOrError = ObjF->getELFFile()->toMappedAddr(VAddr); if (!MappedAddrOrError) { - reportWarning("Unable to parse DT_" + - Twine(getTypeString( - ObjF->getELFFile()->getHeader()->e_machine, Tag)) + - ": " + llvm::toString(MappedAddrOrError.takeError())); + Error Err = + createError("Unable to parse DT_" + + Twine(getTypeString( + ObjF->getELFFile()->getHeader()->e_machine, Tag)) + + ": " + llvm::toString(MappedAddrOrError.takeError())); + + reportWarning(std::move(Err), ObjF->getFileName()); return nullptr; } return MappedAddrOrError.get(); @@ -1576,10 +1724,29 @@ template void ELFDumper::parseDynamicTable() { case ELF::DT_STRSZ: StringTableSize = Dyn.getVal(); break; - case ELF::DT_SYMTAB: - DynSymRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr()); - DynSymRegion.EntSize = sizeof(Elf_Sym); + case ELF::DT_SYMTAB: { + // Often we find the information about the dynamic symbol table + // location in the SHT_DYNSYM section header. However, the value in + // DT_SYMTAB has priority, because it is used by dynamic loaders to + // locate .dynsym at runtime. The location we find in the section header + // and the location we find here should match. If we can't map the + // DT_SYMTAB value to an address (e.g. when there are no program headers), we + // ignore its value. + if (const uint8_t *VA = toMappedAddr(Dyn.getTag(), Dyn.getPtr())) { + // EntSize is non-zero if the dynamic symbol table has been found via a + // section header. + if (DynSymRegion.EntSize && VA != DynSymRegion.Addr) + reportWarning( + createError( + "SHT_DYNSYM section header and DT_SYMTAB disagree about " + "the location of the dynamic symbol table"), + ObjF->getFileName()); + + DynSymRegion.Addr = VA; + DynSymRegion.EntSize = sizeof(Elf_Sym); + } break; + } case ELF::DT_RELA: DynRelaRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr()); break; @@ -1619,8 +1786,9 @@ template void ELFDumper::parseDynamicTable() { else if (Dyn.getVal() == DT_RELA) DynPLTRelRegion.EntSize = sizeof(Elf_Rela); else - reportError(Twine("unknown DT_PLTREL value of ") + - Twine((uint64_t)Dyn.getVal())); + reportError(createError(Twine("unknown DT_PLTREL value of ") + + Twine((uint64_t)Dyn.getVal())), + ObjF->getFileName()); break; case ELF::DT_JMPREL: DynPLTRelRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr()); @@ -1632,8 +1800,7 @@ template void ELFDumper::parseDynamicTable() { } if (StringTableBegin) DynamicStringTable = StringRef(StringTableBegin, StringTableSize); - if (SONameOffset && SONameOffset < DynamicStringTable.size()) - SOName = DynamicStringTable.data() + SONameOffset; + SOName = getDynamicString(SONameOffset); } template @@ -1715,6 +1882,10 @@ template void ELFDumper::printELFLinkerOptions() { ELFDumperStyle->printELFLinkerOptions(ObjF->getELFFile()); } +template void ELFDumper::printStackSizes() { + ELFDumperStyle->printStackSizes(ObjF); +} + #define LLVM_READOBJ_DT_FLAG_ENT(prefix, enum) \ { #enum, prefix##_##enum } @@ -1953,13 +2124,7 @@ void ELFDumper::printDynamicEntry(raw_ostream &OS, uint64_t Type, {DT_RPATH, "Library rpath"}, {DT_RUNPATH, "Library runpath"}, }; - OS << TagNames.at(Type) << ": "; - if (DynamicStringTable.empty()) - OS << " "; - else if (Value < DynamicStringTable.size()) - OS << "[" << StringRef(DynamicStringTable.data() + Value) << "]"; - else - OS << ""; + OS << TagNames.at(Type) << ": [" << getDynamicString(Value) << "]"; break; } case DT_FLAGS: @@ -1974,6 +2139,15 @@ void ELFDumper::printDynamicEntry(raw_ostream &OS, uint64_t Type, } } +template +std::string ELFDumper::getDynamicString(uint64_t Value) const { + if (DynamicStringTable.empty()) + return ""; + if (Value < DynamicStringTable.size()) + return DynamicStringTable.data() + Value; + return Twine("").str(); +} + template void ELFDumper::printUnwindInfo() { DwarfCFIEH::PrinterContext Ctx(W, ObjF); Ctx.printUnwindInformation(); @@ -1985,7 +2159,8 @@ template <> void ELFDumper::printUnwindInfo() { const ELFFile *Obj = ObjF->getELFFile(); const unsigned Machine = Obj->getHeader()->e_machine; if (Machine == EM_ARM) { - ARM::EHABI::PrinterContext Ctx(W, Obj, DotSymtabSec); + ARM::EHABI::PrinterContext Ctx(W, Obj, ObjF->getFileName(), + DotSymtabSec); Ctx.PrintUnwindInformation(); } DwarfCFIEH::PrinterContext Ctx(W, ObjF); @@ -2001,17 +2176,10 @@ template void ELFDumper::printDynamicTable() { template void ELFDumper::printNeededLibraries() { ListScope D(W, "NeededLibraries"); - using LibsTy = std::vector; - LibsTy Libs; - + std::vector Libs; for (const auto &Entry : dynamic_table()) - if (Entry.d_tag == ELF::DT_NEEDED) { - uint64_t Value = Entry.d_un.d_val; - if (Value < DynamicStringTable.size()) - Libs.push_back(StringRef(DynamicStringTable.data() + Value)); - else - Libs.push_back(""); - } + if (Entry.d_tag == ELF::DT_NEEDED) + Libs.push_back(getDynamicString(Entry.d_un.d_val)); llvm::stable_sort(Libs); @@ -2042,7 +2210,7 @@ template void ELFDumper::printGnuHashTable() { Elf_Sym_Range Syms = dynamic_symbols(); unsigned NumSyms = std::distance(Syms.begin(), Syms.end()); if (!NumSyms) - reportError("No dynamic symbol section"); + reportError(createError("No dynamic symbol section"), ObjF->getFileName()); W.printHexList("Values", GnuHashTable->values(NumSyms)); } @@ -2050,6 +2218,30 @@ template void ELFDumper::printLoadName() { W.printString("LoadName", SOName); } +template void ELFDumper::printArchSpecificInfo() { + const ELFFile *Obj = ObjF->getELFFile(); + switch (Obj->getHeader()->e_machine) { + case EM_ARM: + printAttributes(); + break; + case EM_MIPS: { + ELFDumperStyle->printMipsABIFlags(ObjF); + printMipsOptions(); + printMipsReginfo(); + + MipsGOTParser Parser(Obj, ObjF->getFileName(), dynamic_table(), + dynamic_symbols()); + if (Parser.hasGot()) + ELFDumperStyle->printMipsGOT(Parser); + if (Parser.hasPlt()) + ELFDumperStyle->printMipsPLT(Parser); + break; + } + default: + break; + } +} + template void ELFDumper::printAttributes() { W.startLine() << "Attributes not implemented.\n"; } @@ -2064,11 +2256,13 @@ template <> void ELFDumper::printAttributes() { } DictScope BA(W, "BuildAttributes"); - for (const ELFO::Elf_Shdr &Sec : unwrapOrError(Obj->sections())) { + for (const ELFO::Elf_Shdr &Sec : + unwrapOrError(ObjF->getFileName(), Obj->sections())) { if (Sec.sh_type != ELF::SHT_ARM_ATTRIBUTES) continue; - ArrayRef Contents = unwrapOrError(Obj->getSectionContents(&Sec)); + ArrayRef Contents = + unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(&Sec)); if (Contents[0] != ARMBuildAttrs::Format_Version) { errs() << "unrecognised FormatVersion: 0x" << Twine::utohexstr(Contents[0]) << '\n'; @@ -2092,7 +2286,8 @@ public: const bool IsStatic; const ELFO * const Obj; - MipsGOTParser(const ELFO *Obj, Elf_Dyn_Range DynTable, Elf_Sym_Range DynSyms); + MipsGOTParser(const ELFO *Obj, StringRef FileName, Elf_Dyn_Range DynTable, + Elf_Sym_Range DynSyms); bool hasGot() const { return !GotEntries.empty(); } bool hasPlt() const { return !PltEntries.empty(); } @@ -2126,6 +2321,8 @@ private: const Elf_Shdr *PltSec; const Elf_Shdr *PltRelSec; const Elf_Shdr *PltSymTable; + StringRef FileName; + Elf_Sym_Range GotDynSyms; StringRef PltStrTable; @@ -2136,21 +2333,24 @@ private: } // end anonymous namespace template -MipsGOTParser::MipsGOTParser(const ELFO *Obj, Elf_Dyn_Range DynTable, +MipsGOTParser::MipsGOTParser(const ELFO *Obj, StringRef FileName, + Elf_Dyn_Range DynTable, Elf_Sym_Range DynSyms) : IsStatic(DynTable.empty()), Obj(Obj), GotSec(nullptr), LocalNum(0), - GlobalNum(0), PltSec(nullptr), PltRelSec(nullptr), PltSymTable(nullptr) { + GlobalNum(0), PltSec(nullptr), PltRelSec(nullptr), PltSymTable(nullptr), + FileName(FileName) { // See "Global Offset Table" in Chapter 5 in the following document // for detailed GOT description. // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf // Find static GOT secton. if (IsStatic) { - GotSec = findSectionByName(*Obj, ".got"); + GotSec = findSectionByName(*Obj, FileName, ".got"); if (!GotSec) - reportError("Cannot find .got section"); + return; - ArrayRef Content = unwrapOrError(Obj->getSectionContents(GotSec)); + ArrayRef Content = + unwrapOrError(FileName, Obj->getSectionContents(GotSec)); GotEntries = Entries(reinterpret_cast(Content.data()), Content.size() / sizeof(Entry)); LocalNum = GotEntries.size(); @@ -2194,17 +2394,21 @@ MipsGOTParser::MipsGOTParser(const ELFO *Obj, Elf_Dyn_Range DynTable, size_t DynSymTotal = DynSyms.size(); if (*DtGotSym > DynSymTotal) - reportError("MIPS_GOTSYM exceeds a number of dynamic symbols"); + reportError( + createError("MIPS_GOTSYM exceeds a number of dynamic symbols"), + FileName); - GotSec = findNotEmptySectionByAddress(Obj, *DtPltGot); + GotSec = findNotEmptySectionByAddress(Obj, FileName, *DtPltGot); if (!GotSec) - reportError("There is no not empty GOT section at 0x" + - Twine::utohexstr(*DtPltGot)); + reportError(createError("There is no not empty GOT section at 0x" + + Twine::utohexstr(*DtPltGot)), + FileName); LocalNum = *DtLocalGotNum; GlobalNum = DynSymTotal - *DtGotSym; - ArrayRef Content = unwrapOrError(Obj->getSectionContents(GotSec)); + ArrayRef Content = + unwrapOrError(FileName, Obj->getSectionContents(GotSec)); GotEntries = Entries(reinterpret_cast(Content.data()), Content.size() / sizeof(Entry)); GotDynSyms = DynSyms.drop_front(*DtGotSym); @@ -2217,23 +2421,24 @@ MipsGOTParser::MipsGOTParser(const ELFO *Obj, Elf_Dyn_Range DynTable, if (!DtJmpRel) report_fatal_error("Cannot find JMPREL dynamic table tag."); - PltSec = findNotEmptySectionByAddress(Obj, *DtMipsPltGot); + PltSec = findNotEmptySectionByAddress(Obj, FileName, * DtMipsPltGot); if (!PltSec) report_fatal_error("There is no not empty PLTGOT section at 0x " + Twine::utohexstr(*DtMipsPltGot)); - PltRelSec = findNotEmptySectionByAddress(Obj, *DtJmpRel); + PltRelSec = findNotEmptySectionByAddress(Obj, FileName, * DtJmpRel); if (!PltRelSec) report_fatal_error("There is no not empty RELPLT section at 0x" + Twine::utohexstr(*DtJmpRel)); ArrayRef PltContent = - unwrapOrError(Obj->getSectionContents(PltSec)); + unwrapOrError(FileName, Obj->getSectionContents(PltSec)); PltEntries = Entries(reinterpret_cast(PltContent.data()), PltContent.size() / sizeof(Entry)); - PltSymTable = unwrapOrError(Obj->getSection(PltRelSec->sh_link)); - PltStrTable = unwrapOrError(Obj->getStringTableForSymtab(*PltSymTable)); + PltSymTable = unwrapOrError(FileName, Obj->getSection(PltRelSec->sh_link)); + PltStrTable = + unwrapOrError(FileName, Obj->getStringTableForSymtab(*PltSymTable)); } } @@ -2334,26 +2539,16 @@ const typename MipsGOTParser::Elf_Sym * MipsGOTParser::getPltSym(const Entry *E) const { int64_t Offset = std::distance(getPltEntries().data(), E); if (PltRelSec->sh_type == ELF::SHT_REL) { - Elf_Rel_Range Rels = unwrapOrError(Obj->rels(PltRelSec)); - return unwrapOrError(Obj->getRelocationSymbol(&Rels[Offset], PltSymTable)); + Elf_Rel_Range Rels = unwrapOrError(FileName, Obj->rels(PltRelSec)); + return unwrapOrError(FileName, + Obj->getRelocationSymbol(&Rels[Offset], PltSymTable)); } else { - Elf_Rela_Range Rels = unwrapOrError(Obj->relas(PltRelSec)); - return unwrapOrError(Obj->getRelocationSymbol(&Rels[Offset], PltSymTable)); + Elf_Rela_Range Rels = unwrapOrError(FileName, Obj->relas(PltRelSec)); + return unwrapOrError(FileName, + Obj->getRelocationSymbol(&Rels[Offset], PltSymTable)); } } -template void ELFDumper::printMipsPLTGOT() { - const ELFFile *Obj = ObjF->getELFFile(); - if (Obj->getHeader()->e_machine != EM_MIPS) - reportError("MIPS PLT GOT is available for MIPS targets only"); - - MipsGOTParser Parser(Obj, dynamic_table(), dynamic_symbols()); - if (Parser.hasGot()) - ELFDumperStyle->printMipsGOT(Parser); - if (Parser.hasPlt()) - ELFDumperStyle->printMipsPLT(Parser); -} - static const EnumEntry ElfMipsISAExtType[] = { {"None", Mips::AFL_EXT_NONE}, {"Broadcom SB-1", Mips::AFL_EXT_SB1}, @@ -2427,41 +2622,6 @@ static int getMipsRegisterSize(uint8_t Flag) { } } -template void ELFDumper::printMipsABIFlags() { - const ELFFile *Obj = ObjF->getELFFile(); - const Elf_Shdr *Shdr = findSectionByName(*Obj, ".MIPS.abiflags"); - if (!Shdr) { - W.startLine() << "There is no .MIPS.abiflags section in the file.\n"; - return; - } - ArrayRef Sec = unwrapOrError(Obj->getSectionContents(Shdr)); - if (Sec.size() != sizeof(Elf_Mips_ABIFlags)) { - W.startLine() << "The .MIPS.abiflags section has a wrong size.\n"; - return; - } - - auto *Flags = reinterpret_cast *>(Sec.data()); - - raw_ostream &OS = W.getOStream(); - DictScope GS(W, "MIPS ABI Flags"); - - W.printNumber("Version", Flags->version); - W.startLine() << "ISA: "; - if (Flags->isa_rev <= 1) - OS << format("MIPS%u", Flags->isa_level); - else - OS << format("MIPS%ur%u", Flags->isa_level, Flags->isa_rev); - OS << "\n"; - W.printEnum("ISA Extension", Flags->isa_ext, makeArrayRef(ElfMipsISAExtType)); - W.printFlags("ASEs", Flags->ases, makeArrayRef(ElfMipsASEFlags)); - W.printEnum("FP ABI", Flags->fp_abi, makeArrayRef(ElfMipsFpABIType)); - W.printNumber("GPR size", getMipsRegisterSize(Flags->gpr_size)); - W.printNumber("CPR1 size", getMipsRegisterSize(Flags->cpr1_size)); - W.printNumber("CPR2 size", getMipsRegisterSize(Flags->cpr2_size)); - W.printFlags("Flags 1", Flags->flags1, makeArrayRef(ElfMipsFlags1)); - W.printHex("Flags 2", Flags->flags2); -} - template static void printMipsReginfoData(ScopedPrinter &W, const Elf_Mips_RegInfo &Reginfo) { @@ -2475,12 +2635,13 @@ static void printMipsReginfoData(ScopedPrinter &W, template void ELFDumper::printMipsReginfo() { const ELFFile *Obj = ObjF->getELFFile(); - const Elf_Shdr *Shdr = findSectionByName(*Obj, ".reginfo"); + const Elf_Shdr *Shdr = findSectionByName(*Obj, ObjF->getFileName(), ".reginfo"); if (!Shdr) { W.startLine() << "There is no .reginfo section in the file.\n"; return; } - ArrayRef Sec = unwrapOrError(Obj->getSectionContents(Shdr)); + ArrayRef Sec = + unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(Shdr)); if (Sec.size() != sizeof(Elf_Mips_RegInfo)) { W.startLine() << "The .reginfo section has a wrong size.\n"; return; @@ -2493,7 +2654,8 @@ template void ELFDumper::printMipsReginfo() { template void ELFDumper::printMipsOptions() { const ELFFile *Obj = ObjF->getELFFile(); - const Elf_Shdr *Shdr = findSectionByName(*Obj, ".MIPS.options"); + const Elf_Shdr *Shdr = + findSectionByName(*Obj, ObjF->getFileName(), ".MIPS.options"); if (!Shdr) { W.startLine() << "There is no .MIPS.options section in the file.\n"; return; @@ -2501,7 +2663,8 @@ template void ELFDumper::printMipsOptions() { DictScope GS(W, "MIPS Options"); - ArrayRef Sec = unwrapOrError(Obj->getSectionContents(Shdr)); + ArrayRef Sec = + unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(Shdr)); while (!Sec.empty()) { if (Sec.size() < sizeof(Elf_Mips_Options)) { W.startLine() << "The .MIPS.options section has a wrong size.\n"; @@ -2524,8 +2687,9 @@ template void ELFDumper::printMipsOptions() { template void ELFDumper::printStackMap() const { const ELFFile *Obj = ObjF->getELFFile(); const Elf_Shdr *StackMapSection = nullptr; - for (const auto &Sec : unwrapOrError(Obj->sections())) { - StringRef Name = unwrapOrError(Obj->getSectionName(&Sec)); + for (const auto &Sec : unwrapOrError(ObjF->getFileName(), Obj->sections())) { + StringRef Name = + unwrapOrError(ObjF->getFileName(), Obj->getSectionName(&Sec)); if (Name == ".llvm_stackmaps") { StackMapSection = &Sec; break; @@ -2535,8 +2699,8 @@ template void ELFDumper::printStackMap() const { if (!StackMapSection) return; - ArrayRef StackMapContentsArray = - unwrapOrError(Obj->getSectionContents(StackMapSection)); + ArrayRef StackMapContentsArray = unwrapOrError( + ObjF->getFileName(), Obj->getSectionContents(StackMapSection)); prettyPrintStackMap( W, StackMapParser(StackMapContentsArray)); @@ -2560,24 +2724,26 @@ static inline void printFields(formatted_raw_ostream &OS, StringRef Str1, } template -static std::string getSectionHeadersNumString(const ELFFile *Obj) { +static std::string getSectionHeadersNumString(const ELFFile *Obj, + StringRef FileName) { const typename ELFT::Ehdr *ElfHeader = Obj->getHeader(); if (ElfHeader->e_shnum != 0) return to_string(ElfHeader->e_shnum); - ArrayRef Arr = unwrapOrError(Obj->sections()); + ArrayRef Arr = unwrapOrError(FileName, Obj->sections()); if (Arr.empty()) return "0"; return "0 (" + to_string(Arr[0].sh_size) + ")"; } template -static std::string getSectionHeaderTableIndexString(const ELFFile *Obj) { +static std::string getSectionHeaderTableIndexString(const ELFFile *Obj, + StringRef FileName) { const typename ELFT::Ehdr *ElfHeader = Obj->getHeader(); if (ElfHeader->e_shstrndx != SHN_XINDEX) return to_string(ElfHeader->e_shstrndx); - ArrayRef Arr = unwrapOrError(Obj->sections()); + ArrayRef Arr = unwrapOrError(FileName, Obj->sections()); if (Arr.empty()) return "65535 (corrupt: out of range)"; return to_string(ElfHeader->e_shstrndx) + " (" + to_string(Arr[0].sh_link) + @@ -2639,9 +2805,9 @@ template void GNUStyle::printFileHeaders(const ELFO *Obj) { printFields(OS, "Number of program headers:", Str); Str = to_string(e->e_shentsize) + " (bytes)"; printFields(OS, "Size of section headers:", Str); - Str = getSectionHeadersNumString(Obj); + Str = getSectionHeadersNumString(Obj, this->FileName); printFields(OS, "Number of section headers:", Str); - Str = getSectionHeaderTableIndexString(Obj); + Str = getSectionHeaderTableIndexString(Obj, this->FileName); printFields(OS, "Section header string table index:", Str); } @@ -2663,26 +2829,29 @@ struct GroupSection { }; template -std::vector getGroups(const ELFFile *Obj) { +std::vector getGroups(const ELFFile *Obj, + StringRef FileName) { using Elf_Shdr = typename ELFT::Shdr; using Elf_Sym = typename ELFT::Sym; using Elf_Word = typename ELFT::Word; std::vector Ret; uint64_t I = 0; - for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) { + for (const Elf_Shdr &Sec : unwrapOrError(FileName, Obj->sections())) { ++I; if (Sec.sh_type != ELF::SHT_GROUP) continue; - const Elf_Shdr *Symtab = unwrapOrError(Obj->getSection(Sec.sh_link)); - StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*Symtab)); - const Elf_Sym *Sym = - unwrapOrError(Obj->template getEntry(Symtab, Sec.sh_info)); - auto Data = - unwrapOrError(Obj->template getSectionContentsAsArray(&Sec)); + const Elf_Shdr *Symtab = + unwrapOrError(FileName, Obj->getSection(Sec.sh_link)); + StringRef StrTable = + unwrapOrError(FileName, Obj->getStringTableForSymtab(*Symtab)); + const Elf_Sym *Sym = unwrapOrError( + FileName, Obj->template getEntry(Symtab, Sec.sh_info)); + auto Data = unwrapOrError( + FileName, Obj->template getSectionContentsAsArray(&Sec)); - StringRef Name = unwrapOrError(Obj->getSectionName(&Sec)); + StringRef Name = unwrapOrError(FileName, Obj->getSectionName(&Sec)); StringRef Signature = StrTable.data() + Sym->st_name; Ret.push_back({Name, maybeDemangle(Signature), @@ -2695,8 +2864,8 @@ std::vector getGroups(const ELFFile *Obj) { std::vector &GM = Ret.back().Members; for (uint32_t Ndx : Data.slice(1)) { - auto Sec = unwrapOrError(Obj->getSection(Ndx)); - const StringRef Name = unwrapOrError(Obj->getSectionName(Sec)); + auto Sec = unwrapOrError(FileName, Obj->getSection(Ndx)); + const StringRef Name = unwrapOrError(FileName, Obj->getSectionName(Sec)); GM.push_back({Name, Ndx}); } } @@ -2715,7 +2884,7 @@ mapSectionsToGroups(ArrayRef Groups) { } // namespace template void GNUStyle::printGroupSections(const ELFO *Obj) { - std::vector V = getGroups(Obj); + std::vector V = getGroups(Obj, this->FileName); DenseMap Map = mapSectionsToGroups(V); for (const GroupSection &G : V) { OS << "\n" @@ -2745,14 +2914,17 @@ template void GNUStyle::printGroupSections(const ELFO *Obj) { template void GNUStyle::printRelocation(const ELFO *Obj, const Elf_Shdr *SymTab, const Elf_Rela &R, bool IsRela) { - const Elf_Sym *Sym = unwrapOrError(Obj->getRelocationSymbol(&R, SymTab)); + const Elf_Sym *Sym = + unwrapOrError(this->FileName, Obj->getRelocationSymbol(&R, SymTab)); std::string TargetName; if (Sym && Sym->getType() == ELF::STT_SECTION) { const Elf_Shdr *Sec = unwrapOrError( + this->FileName, Obj->getSection(Sym, SymTab, this->dumper()->getShndxTable())); - TargetName = unwrapOrError(Obj->getSectionName(Sec)); + TargetName = unwrapOrError(this->FileName, Obj->getSectionName(Sec)); } else if (Sym) { - StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*SymTab)); + StringRef StrTable = + unwrapOrError(this->FileName, Obj->getStringTableForSymtab(*SymTab)); TargetName = this->dumper()->getFullSymbolName( Sym, StrTable, SymTab->sh_type == SHT_DYNSYM /* IsDynamic */); } @@ -2821,21 +2993,21 @@ template void GNUStyle::printRelocHeader(unsigned SType) { template void GNUStyle::printRelocations(const ELFO *Obj) { bool HasRelocSections = false; - for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) { + for (const Elf_Shdr &Sec : unwrapOrError(this->FileName, Obj->sections())) { if (Sec.sh_type != ELF::SHT_REL && Sec.sh_type != ELF::SHT_RELA && Sec.sh_type != ELF::SHT_RELR && Sec.sh_type != ELF::SHT_ANDROID_REL && Sec.sh_type != ELF::SHT_ANDROID_RELA && Sec.sh_type != ELF::SHT_ANDROID_RELR) continue; HasRelocSections = true; - StringRef Name = unwrapOrError(Obj->getSectionName(&Sec)); + StringRef Name = unwrapOrError(this->FileName, Obj->getSectionName(&Sec)); unsigned Entries = Sec.getEntityCount(); std::vector AndroidRelas; if (Sec.sh_type == ELF::SHT_ANDROID_REL || Sec.sh_type == ELF::SHT_ANDROID_RELA) { // Android's packed relocation section needs to be unpacked first // to get the actual number of entries. - AndroidRelas = unwrapOrError(Obj->android_relas(&Sec)); + AndroidRelas = unwrapOrError(this->FileName, Obj->android_relas(&Sec)); Entries = AndroidRelas.size(); } std::vector RelrRelas; @@ -2843,8 +3015,8 @@ template void GNUStyle::printRelocations(const ELFO *Obj) { Sec.sh_type == ELF::SHT_ANDROID_RELR)) { // .relr.dyn relative relocation section needs to be unpacked first // to get the actual number of entries. - Elf_Relr_Range Relrs = unwrapOrError(Obj->relrs(&Sec)); - RelrRelas = unwrapOrError(Obj->decode_relrs(Relrs)); + Elf_Relr_Range Relrs = unwrapOrError(this->FileName, Obj->relrs(&Sec)); + RelrRelas = unwrapOrError(this->FileName, Obj->decode_relrs(Relrs)); Entries = RelrRelas.size(); } uintX_t Offset = Sec.sh_offset; @@ -2852,10 +3024,11 @@ template void GNUStyle::printRelocations(const ELFO *Obj) { << to_hexString(Offset, false) << " contains " << Entries << " entries:\n"; printRelocHeader(Sec.sh_type); - const Elf_Shdr *SymTab = unwrapOrError(Obj->getSection(Sec.sh_link)); + const Elf_Shdr *SymTab = + unwrapOrError(this->FileName, Obj->getSection(Sec.sh_link)); switch (Sec.sh_type) { case ELF::SHT_REL: - for (const auto &R : unwrapOrError(Obj->rels(&Sec))) { + for (const auto &R : unwrapOrError(this->FileName, Obj->rels(&Sec))) { Elf_Rela Rela; Rela.r_offset = R.r_offset; Rela.r_info = R.r_info; @@ -2864,13 +3037,13 @@ template void GNUStyle::printRelocations(const ELFO *Obj) { } break; case ELF::SHT_RELA: - for (const auto &R : unwrapOrError(Obj->relas(&Sec))) + for (const auto &R : unwrapOrError(this->FileName, Obj->relas(&Sec))) printRelocation(Obj, SymTab, R, true); break; case ELF::SHT_RELR: case ELF::SHT_ANDROID_RELR: if (opts::RawRelr) - for (const auto &R : unwrapOrError(Obj->relrs(&Sec))) + for (const auto &R : unwrapOrError(this->FileName, Obj->relrs(&Sec))) OS << to_string(format_hex_no_prefix(R, ELFT::Is64Bits ? 16 : 8)) << "\n"; else @@ -2992,6 +3165,12 @@ static std::string getSectionTypeString(unsigned Arch, unsigned Type) { return "LLVM_ADDRSIG"; case SHT_LLVM_DEPENDENT_LIBRARIES: return "LLVM_DEPENDENT_LIBRARIES"; + case SHT_LLVM_SYMPART: + return "LLVM_SYMPART"; + case SHT_LLVM_PART_EHDR: + return "LLVM_PART_EHDR"; + case SHT_LLVM_PART_PHDR: + return "LLVM_PART_PHDR"; // FIXME: Parse processor specific GNU attributes case SHT_GNU_ATTRIBUTES: return "ATTRIBUTES"; @@ -3009,30 +3188,10 @@ static std::string getSectionTypeString(unsigned Arch, unsigned Type) { return ""; } -template -static StringRef getSectionName(const typename ELFT::Shdr &Sec, - const ELFObjectFile &ElfObj, - ArrayRef Sections) { - const ELFFile &Obj = *ElfObj.getELFFile(); - uint32_t Index = Obj.getHeader()->e_shstrndx; - if (Index == ELF::SHN_XINDEX) - Index = Sections[0].sh_link; - if (!Index) // no section string table. - return ""; - // TODO: Test a case when the sh_link of the section with index 0 is broken. - if (Index >= Sections.size()) - reportError(ElfObj.getFileName(), - createError("section header string table index " + - Twine(Index) + " does not exist")); - StringRef Data = toStringRef(unwrapOrError( - Obj.template getSectionContentsAsArray(&Sections[Index]))); - return unwrapOrError(Obj.getSectionName(&Sec, Data)); -} - template void GNUStyle::printSectionHeaders(const ELFO *Obj) { unsigned Bias = ELFT::Is64Bits ? 0 : 8; - ArrayRef Sections = unwrapOrError(Obj->sections()); + ArrayRef Sections = unwrapOrError(this->FileName, Obj->sections()); OS << "There are " << to_string(Sections.size()) << " section headers, starting at offset " << "0x" << to_hexString(Obj->getHeader()->e_shoff, false) << ":\n\n"; @@ -3050,7 +3209,8 @@ void GNUStyle::printSectionHeaders(const ELFO *Obj) { size_t SectionIndex = 0; for (const Elf_Shdr &Sec : Sections) { Fields[0].Str = to_string(SectionIndex); - Fields[1].Str = getSectionName(Sec, *ElfObj, Sections); + Fields[1].Str = unwrapOrError( + ElfObj->getFileName(), Obj->getSectionName(&Sec, this->WarningHandler)); Fields[2].Str = getSectionTypeString(Obj->getHeader()->e_machine, Sec.sh_type); Fields[3].Str = @@ -3089,7 +3249,8 @@ void GNUStyle::printSectionHeaders(const ELFO *Obj) { template void GNUStyle::printSymtabMessage(const ELFO *Obj, StringRef Name, - size_t Entries) { + size_t Entries, + bool NonVisibilityBitsUsed) { if (!Name.empty()) OS << "\nSymbol table '" << Name << "' contains " << Entries << " entries:\n"; @@ -3097,9 +3258,13 @@ void GNUStyle::printSymtabMessage(const ELFO *Obj, StringRef Name, OS << "\n Symbol table for image:\n"; if (ELFT::Is64Bits) - OS << " Num: Value Size Type Bind Vis Ndx Name\n"; + OS << " Num: Value Size Type Bind Vis"; else - OS << " Num: Value Size Type Bind Vis Ndx Name\n"; + OS << " Num: Value Size Type Bind Vis"; + + if (NonVisibilityBitsUsed) + OS << " "; + OS << " Ndx Name\n"; } template @@ -3115,10 +3280,11 @@ std::string GNUStyle::getSymbolSectionNdx(const ELFO *Obj, case ELF::SHN_COMMON: return "COM"; case ELF::SHN_XINDEX: - return to_string( - format_decimal(unwrapOrError(object::getExtendedSymbolTableIndex( - Symbol, FirstSym, this->dumper()->getShndxTable())), - 3)); + return to_string(format_decimal( + unwrapOrError(this->FileName, + object::getExtendedSymbolTableIndex( + Symbol, FirstSym, this->dumper()->getShndxTable())), + 3)); default: // Find if: // Processor specific @@ -3142,7 +3308,7 @@ std::string GNUStyle::getSymbolSectionNdx(const ELFO *Obj, template void GNUStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *FirstSym, StringRef StrTable, - bool IsDynamic) { + bool IsDynamic, bool NonVisibilityBitsUsed) { static int Idx = 0; static bool Dynamic = true; @@ -3156,7 +3322,7 @@ void GNUStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, unsigned Bias = ELFT::Is64Bits ? 8 : 0; Field Fields[8] = {0, 8, 17 + Bias, 23 + Bias, - 31 + Bias, 38 + Bias, 47 + Bias, 51 + Bias}; + 31 + Bias, 38 + Bias, 48 + Bias, 51 + Bias}; Fields[0].Str = to_string(format_decimal(Idx++, 6)) + ":"; Fields[1].Str = to_string( format_hex_no_prefix(Symbol->st_value, ELFT::Is64Bits ? 16 : 8)); @@ -3173,7 +3339,13 @@ void GNUStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, printEnum(Symbol->getBinding(), makeArrayRef(ElfSymbolBindings)); Fields[5].Str = printEnum(Symbol->getVisibility(), makeArrayRef(ElfSymbolVisibilities)); + if (Symbol->st_other & ~0x3) + Fields[5].Str += + " [st_other, 2)) + ">]"; + + Fields[6].Column += NonVisibilityBitsUsed ? 13 : 0; Fields[6].Str = getSymbolSectionNdx(Obj, Symbol, FirstSym); + Fields[7].Str = this->dumper()->getFullSymbolName(Symbol, StrTable, IsDynamic); for (auto &Entry : Fields) @@ -3193,7 +3365,7 @@ void GNUStyle::printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym, const auto Symbol = FirstSym + Sym; Fields[2].Str = to_string( - format_hex_no_prefix(Symbol->st_value, ELFT::Is64Bits ? 18 : 8)); + format_hex_no_prefix(Symbol->st_value, ELFT::Is64Bits ? 16 : 8)); Fields[3].Str = to_string(format_decimal(Symbol->st_size, 5)); unsigned char SymbolType = Symbol->getType(); @@ -3246,10 +3418,21 @@ template void GNUStyle::printHashSymbols(const ELFO *Obj) { for (uint32_t Buc = 0; Buc < SysVHash->nbucket; Buc++) { if (Buckets[Buc] == ELF::STN_UNDEF) continue; + std::vector Visited(SysVHash->nchain); for (uint32_t Ch = Buckets[Buc]; Ch < SysVHash->nchain; Ch = Chains[Ch]) { if (Ch == ELF::STN_UNDEF) break; + + if (Visited[Ch]) { + reportWarning( + createError(".hash section is invalid: bucket " + Twine(Ch) + + ": a cycle was detected in the linked chain"), + this->FileName); + break; + } + printHashedSymbol(Obj, &DynSyms[0], Ch, StringTable, Buc); + Visited[Ch] = true; } } } @@ -3380,7 +3563,8 @@ void GNUStyle::printProgramHeaders(const ELFO *Obj) { unsigned Width = ELFT::Is64Bits ? 18 : 10; unsigned SizeWidth = ELFT::Is64Bits ? 8 : 7; - for (const auto &Phdr : unwrapOrError(Obj->program_headers())) { + for (const auto &Phdr : + unwrapOrError(this->FileName, Obj->program_headers())) { Fields[0].Str = getElfPtType(Header->e_machine, Phdr.p_type); Fields[1].Str = to_string(format_hex(Phdr.p_offset, 8)); Fields[2].Str = to_string(format_hex(Phdr.p_vaddr, Width)); @@ -3404,10 +3588,11 @@ void GNUStyle::printSectionMapping(const ELFO *Obj) { OS << "\n Section to Segment mapping:\n Segment Sections...\n"; DenseSet BelongsToSegment; int Phnum = 0; - for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) { + for (const Elf_Phdr &Phdr : + unwrapOrError(this->FileName, Obj->program_headers())) { std::string Sections; OS << format(" %2.2d ", Phnum++); - for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) { + for (const Elf_Shdr &Sec : unwrapOrError(this->FileName, Obj->sections())) { // Check if each section is in a segment and then print mapping. // readelf additionally makes sure it does not print zero sized sections // at end of segments and for PT_DYNAMIC both start and end of section @@ -3418,7 +3603,9 @@ void GNUStyle::printSectionMapping(const ELFO *Obj) { if (!TbssInNonTLS && checkTLSSections(Phdr, Sec) && checkoffsets(Phdr, Sec) && checkVMA(Phdr, Sec) && checkPTDynamic(Phdr, Sec) && (Sec.sh_type != ELF::SHT_NULL)) { - Sections += unwrapOrError(Obj->getSectionName(&Sec)).str() + " "; + Sections += + unwrapOrError(this->FileName, Obj->getSectionName(&Sec)).str() + + " "; BelongsToSegment.insert(&Sec); } } @@ -3428,9 +3615,10 @@ void GNUStyle::printSectionMapping(const ELFO *Obj) { // Display sections that do not belong to a segment. std::string Sections; - for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) { + for (const Elf_Shdr &Sec : unwrapOrError(this->FileName, Obj->sections())) { if (BelongsToSegment.find(&Sec) == BelongsToSegment.end()) - Sections += unwrapOrError(Obj->getSectionName(&Sec)).str() + ' '; + Sections += + unwrapOrError(this->FileName, Obj->getSectionName(&Sec)).str() + ' '; } if (!Sections.empty()) { OS << " None " << Sections << '\n'; @@ -3438,14 +3626,40 @@ void GNUStyle::printSectionMapping(const ELFO *Obj) { } } +namespace { +template struct RelSymbol { + const typename ELFT::Sym *Sym; + std::string Name; +}; + +template +RelSymbol getSymbolForReloc(const ELFFile *Obj, StringRef FileName, + const ELFDumper *Dumper, + const typename ELFT::Rela &Reloc) { + uint32_t SymIndex = Reloc.getSymbol(Obj->isMips64EL()); + const typename ELFT::Sym *Sym = Dumper->dynamic_symbols().begin() + SymIndex; + Expected ErrOrName = Sym->getName(Dumper->getDynamicStringTable()); + + std::string Name; + if (ErrOrName) { + Name = maybeDemangle(*ErrOrName); + } else { + reportWarning( + createError("unable to get name of the dynamic symbol with index " + + Twine(SymIndex) + ": " + toString(ErrOrName.takeError())), + FileName); + Name = ""; + } + + return {Sym, std::move(Name)}; +} +} // namespace + template void GNUStyle::printDynamicRelocation(const ELFO *Obj, Elf_Rela R, bool IsRela) { - uint32_t SymIndex = R.getSymbol(Obj->isMips64EL()); - const Elf_Sym *Sym = this->dumper()->dynamic_symbols().begin() + SymIndex; - std::string SymbolName = maybeDemangle( - unwrapOrError(Sym->getName(this->dumper()->getDynamicStringTable()))); - printRelocation(Obj, Sym, SymbolName, R, IsRela); + RelSymbol S = getSymbolForReloc(Obj, this->FileName, this->dumper(), R); + printRelocation(Obj, S.Sym, S.Name, R, IsRela); } template void GNUStyle::printDynamic(const ELFO *Obj) { @@ -3518,7 +3732,8 @@ void GNUStyle::printDynamicRelocations(const ELFO *Obj) { << " contains " << DynRelrRegion.Size << " bytes:\n"; printRelocHeader(ELF::SHT_REL); Elf_Relr_Range Relrs = this->dumper()->dyn_relrs(); - std::vector RelrRelas = unwrapOrError(Obj->decode_relrs(Relrs)); + std::vector RelrRelas = + unwrapOrError(this->FileName, Obj->decode_relrs(Relrs)); for (const Elf_Rela &Rela : RelrRelas) { printDynamicRelocation(Obj, Rela, false); } @@ -3550,14 +3765,15 @@ template static void printGNUVersionSectionProlog(formatted_raw_ostream &OS, const Twine &Name, unsigned EntriesNum, const ELFFile *Obj, - const typename ELFT::Shdr *Sec) { - StringRef SecName = unwrapOrError(Obj->getSectionName(Sec)); + const typename ELFT::Shdr *Sec, + StringRef FileName) { + StringRef SecName = unwrapOrError(FileName, Obj->getSectionName(Sec)); OS << Name << " section '" << SecName << "' " << "contains " << EntriesNum << " entries:\n"; const typename ELFT::Shdr *SymTab = - unwrapOrError(Obj->getSection(Sec->sh_link)); - StringRef SymTabName = unwrapOrError(Obj->getSectionName(SymTab)); + unwrapOrError(FileName, Obj->getSection(Sec->sh_link)); + StringRef SymTabName = unwrapOrError(FileName, Obj->getSectionName(SymTab)); OS << " Addr: " << format_hex_no_prefix(Sec->sh_addr, 16) << " Offset: " << format_hex(Sec->sh_offset, 8) << " Link: " << Sec->sh_link << " (" << SymTabName << ")\n"; @@ -3570,7 +3786,8 @@ void GNUStyle::printVersionSymbolSection(const ELFFile *Obj, return; unsigned Entries = Sec->sh_size / sizeof(Elf_Versym); - printGNUVersionSectionProlog(OS, "Version symbols", Entries, Obj, Sec); + printGNUVersionSectionProlog(OS, "Version symbols", Entries, Obj, Sec, + this->FileName); const uint8_t *VersymBuf = reinterpret_cast(Obj->base() + Sec->sh_offset); @@ -3642,14 +3859,17 @@ void GNUStyle::printVersionDefinitionSection(const ELFFile *Obj, return; unsigned VerDefsNum = Sec->sh_info; - printGNUVersionSectionProlog(OS, "Version definition", VerDefsNum, Obj, Sec); + printGNUVersionSectionProlog(OS, "Version definition", VerDefsNum, Obj, Sec, + this->FileName); - const Elf_Shdr *StrTabSec = unwrapOrError(Obj->getSection(Sec->sh_link)); + const Elf_Shdr *StrTabSec = + unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link)); StringRef StringTable( reinterpret_cast(Obj->base() + StrTabSec->sh_offset), (size_t)StrTabSec->sh_size); - const uint8_t *VerdefBuf = unwrapOrError(Obj->getSectionContents(Sec)).data(); + const uint8_t *VerdefBuf = + unwrapOrError(this->FileName, Obj->getSectionContents(Sec)).data(); const uint8_t *Begin = VerdefBuf; while (VerDefsNum--) { @@ -3684,11 +3904,14 @@ void GNUStyle::printVersionDependencySection(const ELFFile *Obj, return; unsigned VerneedNum = Sec->sh_info; - printGNUVersionSectionProlog(OS, "Version needs", VerneedNum, Obj, Sec); + printGNUVersionSectionProlog(OS, "Version needs", VerneedNum, Obj, Sec, + this->FileName); - ArrayRef SecData = unwrapOrError(Obj->getSectionContents(Sec)); + ArrayRef SecData = + unwrapOrError(this->FileName, Obj->getSectionContents(Sec)); - const Elf_Shdr *StrTabSec = unwrapOrError(Obj->getSection(Sec->sh_link)); + const Elf_Shdr *StrTabSec = + unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link)); StringRef StringTable = { reinterpret_cast(Obj->base() + StrTabSec->sh_offset), (size_t)StrTabSec->sh_size}; @@ -3745,9 +3968,21 @@ void GNUStyle::printHashHistogram(const ELFFile *Obj) { // Go over all buckets and and note chain lengths of each bucket (total // unique chain lengths). for (size_t B = 0; B < NBucket; B++) { - for (size_t C = Buckets[B]; C > 0 && C < NChain; C = Chains[C]) + std::vector Visited(NChain); + for (size_t C = Buckets[B]; C < NChain; C = Chains[C]) { + if (C == ELF::STN_UNDEF) + break; + if (Visited[C]) { + reportWarning( + createError(".hash section is invalid: bucket " + Twine(C) + + ": a cycle was detected in the linked chain"), + this->FileName); + break; + } + Visited[C] = true; if (MaxChain <= ++ChainLen[B]) MaxChain++; + } TotalSyms += ChainLen[B]; } @@ -3829,7 +4064,7 @@ void GNUStyle::printCGProfile(const ELFFile *Obj) { template void GNUStyle::printAddrsig(const ELFFile *Obj) { - OS << "GNUStyle::printAddrsig not implemented\n"; + reportError(createError("--addrsig: not implemented"), this->FileName); } static StringRef getGenericNoteTypeName(const uint32_t NT) { @@ -3850,6 +4085,86 @@ static StringRef getGenericNoteTypeName(const uint32_t NT) { return ""; } +static StringRef getCoreNoteTypeName(const uint32_t NT) { + static const struct { + uint32_t ID; + const char *Name; + } Notes[] = { + {ELF::NT_PRSTATUS, "NT_PRSTATUS (prstatus structure)"}, + {ELF::NT_FPREGSET, "NT_FPREGSET (floating point registers)"}, + {ELF::NT_PRPSINFO, "NT_PRPSINFO (prpsinfo structure)"}, + {ELF::NT_TASKSTRUCT, "NT_TASKSTRUCT (task structure)"}, + {ELF::NT_AUXV, "NT_AUXV (auxiliary vector)"}, + {ELF::NT_PSTATUS, "NT_PSTATUS (pstatus structure)"}, + {ELF::NT_FPREGS, "NT_FPREGS (floating point registers)"}, + {ELF::NT_PSINFO, "NT_PSINFO (psinfo structure)"}, + {ELF::NT_LWPSTATUS, "NT_LWPSTATUS (lwpstatus_t structure)"}, + {ELF::NT_LWPSINFO, "NT_LWPSINFO (lwpsinfo_t structure)"}, + {ELF::NT_WIN32PSTATUS, "NT_WIN32PSTATUS (win32_pstatus structure)"}, + + {ELF::NT_PPC_VMX, "NT_PPC_VMX (ppc Altivec registers)"}, + {ELF::NT_PPC_VSX, "NT_PPC_VSX (ppc VSX registers)"}, + {ELF::NT_PPC_TAR, "NT_PPC_TAR (ppc TAR register)"}, + {ELF::NT_PPC_PPR, "NT_PPC_PPR (ppc PPR register)"}, + {ELF::NT_PPC_DSCR, "NT_PPC_DSCR (ppc DSCR register)"}, + {ELF::NT_PPC_EBB, "NT_PPC_EBB (ppc EBB registers)"}, + {ELF::NT_PPC_PMU, "NT_PPC_PMU (ppc PMU registers)"}, + {ELF::NT_PPC_TM_CGPR, "NT_PPC_TM_CGPR (ppc checkpointed GPR registers)"}, + {ELF::NT_PPC_TM_CFPR, + "NT_PPC_TM_CFPR (ppc checkpointed floating point registers)"}, + {ELF::NT_PPC_TM_CVMX, + "NT_PPC_TM_CVMX (ppc checkpointed Altivec registers)"}, + {ELF::NT_PPC_TM_CVSX, "NT_PPC_TM_CVSX (ppc checkpointed VSX registers)"}, + {ELF::NT_PPC_TM_SPR, "NT_PPC_TM_SPR (ppc TM special purpose registers)"}, + {ELF::NT_PPC_TM_CTAR, "NT_PPC_TM_CTAR (ppc checkpointed TAR register)"}, + {ELF::NT_PPC_TM_CPPR, "NT_PPC_TM_CPPR (ppc checkpointed PPR register)"}, + {ELF::NT_PPC_TM_CDSCR, + "NT_PPC_TM_CDSCR (ppc checkpointed DSCR register)"}, + + {ELF::NT_386_TLS, "NT_386_TLS (x86 TLS information)"}, + {ELF::NT_386_IOPERM, "NT_386_IOPERM (x86 I/O permissions)"}, + {ELF::NT_X86_XSTATE, "NT_X86_XSTATE (x86 XSAVE extended state)"}, + + {ELF::NT_S390_HIGH_GPRS, + "NT_S390_HIGH_GPRS (s390 upper register halves)"}, + {ELF::NT_S390_TIMER, "NT_S390_TIMER (s390 timer register)"}, + {ELF::NT_S390_TODCMP, "NT_S390_TODCMP (s390 TOD comparator register)"}, + {ELF::NT_S390_TODPREG, + "NT_S390_TODPREG (s390 TOD programmable register)"}, + {ELF::NT_S390_CTRS, "NT_S390_CTRS (s390 control registers)"}, + {ELF::NT_S390_PREFIX, "NT_S390_PREFIX (s390 prefix register)"}, + {ELF::NT_S390_LAST_BREAK, + "NT_S390_LAST_BREAK (s390 last breaking event address)"}, + {ELF::NT_S390_SYSTEM_CALL, + "NT_S390_SYSTEM_CALL (s390 system call restart data)"}, + {ELF::NT_S390_TDB, "NT_S390_TDB (s390 transaction diagnostic block)"}, + {ELF::NT_S390_VXRS_LOW, + "NT_S390_VXRS_LOW (s390 vector registers 0-15 upper half)"}, + {ELF::NT_S390_VXRS_HIGH, + "NT_S390_VXRS_HIGH (s390 vector registers 16-31)"}, + {ELF::NT_S390_GS_CB, "NT_S390_GS_CB (s390 guarded-storage registers)"}, + {ELF::NT_S390_GS_BC, + "NT_S390_GS_BC (s390 guarded-storage broadcast control)"}, + + {ELF::NT_ARM_VFP, "NT_ARM_VFP (arm VFP registers)"}, + {ELF::NT_ARM_TLS, "NT_ARM_TLS (AArch TLS registers)"}, + {ELF::NT_ARM_HW_BREAK, + "NT_ARM_HW_BREAK (AArch hardware breakpoint registers)"}, + {ELF::NT_ARM_HW_WATCH, + "NT_ARM_HW_WATCH (AArch hardware watchpoint registers)"}, + + {ELF::NT_FILE, "NT_FILE (mapped files)"}, + {ELF::NT_PRXFPREG, "NT_PRXFPREG (user_xfpregs structure)"}, + {ELF::NT_SIGINFO, "NT_SIGINFO (siginfo_t data)"}, + }; + + for (const auto &Note : Notes) + if (Note.ID == NT) + return Note.Name; + + return ""; +} + static std::string getGNUNoteTypeName(const uint32_t NT) { static const struct { uint32_t ID; @@ -4207,13 +4522,85 @@ static AMDGPUNote getAMDGPUNote(uint32_t NoteType, ArrayRef Desc) { } } +struct CoreFileMapping { + uint64_t Start, End, Offset; + StringRef Filename; +}; + +struct CoreNote { + uint64_t PageSize; + std::vector Mappings; +}; + +static Expected readCoreNote(DataExtractor Desc) { + // Expected format of the NT_FILE note description: + // 1. # of file mappings (call it N) + // 2. Page size + // 3. N (start, end, offset) triples + // 4. N packed filenames (null delimited) + // Each field is an Elf_Addr, except for filenames which are char* strings. + + CoreNote Ret; + const int Bytes = Desc.getAddressSize(); + + if (!Desc.isValidOffsetForAddress(2)) + return createStringError(object_error::parse_failed, + "malformed note: header too short"); + if (Desc.getData().back() != 0) + return createStringError(object_error::parse_failed, + "malformed note: not NUL terminated"); + + uint64_t DescOffset = 0; + uint64_t FileCount = Desc.getAddress(&DescOffset); + Ret.PageSize = Desc.getAddress(&DescOffset); + + if (!Desc.isValidOffsetForAddress(3 * FileCount * Bytes)) + return createStringError(object_error::parse_failed, + "malformed note: too short for number of files"); + + uint64_t FilenamesOffset = 0; + DataExtractor Filenames( + Desc.getData().drop_front(DescOffset + 3 * FileCount * Bytes), + Desc.isLittleEndian(), Desc.getAddressSize()); + + Ret.Mappings.resize(FileCount); + for (CoreFileMapping &Mapping : Ret.Mappings) { + if (!Filenames.isValidOffsetForDataOfSize(FilenamesOffset, 1)) + return createStringError(object_error::parse_failed, + "malformed note: too few filenames"); + Mapping.Start = Desc.getAddress(&DescOffset); + Mapping.End = Desc.getAddress(&DescOffset); + Mapping.Offset = Desc.getAddress(&DescOffset); + Mapping.Filename = Filenames.getCStrRef(&FilenamesOffset); + } + + return Ret; +} + +template +static void printCoreNote(raw_ostream &OS, const CoreNote &Note) { + // Length of "0x
" string. + const int FieldWidth = ELFT::Is64Bits ? 18 : 10; + + OS << " Page size: " << format_decimal(Note.PageSize, 0) << '\n'; + OS << " " << right_justify("Start", FieldWidth) << " " + << right_justify("End", FieldWidth) << " " + << right_justify("Page Offset", FieldWidth) << '\n'; + for (const CoreFileMapping &Mapping : Note.Mappings) { + OS << " " << format_hex(Mapping.Start, FieldWidth) << " " + << format_hex(Mapping.End, FieldWidth) << " " + << format_hex(Mapping.Offset, FieldWidth) << "\n " + << Mapping.Filename << '\n'; + } +} + template void GNUStyle::printNotes(const ELFFile *Obj) { auto PrintHeader = [&](const typename ELFT::Off Offset, const typename ELFT::Addr Size) { OS << "Displaying notes found at file offset " << format_hex(Offset, 10) << " with length " << format_hex(Size, 10) << ":\n" - << " Owner Data size\tDescription\n"; + << " Owner Data size \tDescription\n"; }; auto ProcessNote = [&](const Elf_Note &Note) { @@ -4221,47 +4608,61 @@ void GNUStyle::printNotes(const ELFFile *Obj) { ArrayRef Descriptor = Note.getDesc(); Elf_Word Type = Note.getType(); - OS << " " << Name << std::string(22 - Name.size(), ' ') + // Print the note owner/type. + OS << " " << left_justify(Name, 20) << ' ' << format_hex(Descriptor.size(), 10) << '\t'; - if (Name == "GNU") { OS << getGNUNoteTypeName(Type) << '\n'; - printGNUNote(OS, Type, Descriptor); } else if (Name == "FreeBSD") { OS << getFreeBSDNoteTypeName(Type) << '\n'; } else if (Name == "AMD") { OS << getAMDNoteTypeName(Type) << '\n'; + } else if (Name == "AMDGPU") { + OS << getAMDGPUNoteTypeName(Type) << '\n'; + } else { + StringRef NoteType = Obj->getHeader()->e_type == ELF::ET_CORE + ? getCoreNoteTypeName(Type) + : getGenericNoteTypeName(Type); + if (!NoteType.empty()) + OS << NoteType << '\n'; + else + OS << "Unknown note type: (" << format_hex(Type, 10) << ")\n"; + } + + // Print the description, or fallback to printing raw bytes for unknown + // owners. + if (Name == "GNU") { + printGNUNote(OS, Type, Descriptor); + } else if (Name == "AMD") { const AMDNote N = getAMDNote(Type, Descriptor); if (!N.Type.empty()) OS << " " << N.Type << ":\n " << N.Value << '\n'; } else if (Name == "AMDGPU") { - OS << getAMDGPUNoteTypeName(Type) << '\n'; const AMDGPUNote N = getAMDGPUNote(Type, Descriptor); if (!N.Type.empty()) OS << " " << N.Type << ":\n " << N.Value << '\n'; - } else { - StringRef NoteType = getGenericNoteTypeName(Type); - if (!NoteType.empty()) - OS << NoteType; - else - OS << "Unknown note type: (" << format_hex(Type, 10) << ')'; + } else if (Name == "CORE") { + if (Type == ELF::NT_FILE) { + DataExtractor DescExtractor(Descriptor, + ELFT::TargetEndianness == support::little, + sizeof(Elf_Addr)); + Expected Note = readCoreNote(DescExtractor); + if (Note) + printCoreNote(OS, *Note); + else + reportWarning(Note.takeError(), this->FileName); + } + } else if (!Descriptor.empty()) { + OS << " description data:"; + for (uint8_t B : Descriptor) + OS << " " << format("%02x", B); + OS << '\n'; } - OS << '\n'; }; - if (Obj->getHeader()->e_type == ELF::ET_CORE) { - for (const auto &P : unwrapOrError(Obj->program_headers())) { - if (P.p_type != PT_NOTE) - continue; - PrintHeader(P.p_offset, P.p_filesz); - Error Err = Error::success(); - for (const auto &Note : Obj->notes(P, Err)) - ProcessNote(Note); - if (Err) - error(std::move(Err)); - } - } else { - for (const auto &S : unwrapOrError(Obj->sections())) { + ArrayRef Sections = unwrapOrError(this->FileName, Obj->sections()); + if (Obj->getHeader()->e_type != ELF::ET_CORE && !Sections.empty()) { + for (const auto &S : Sections) { if (S.sh_type != SHT_NOTE) continue; PrintHeader(S.sh_offset, S.sh_size); @@ -4269,7 +4670,19 @@ void GNUStyle::printNotes(const ELFFile *Obj) { for (const auto &Note : Obj->notes(S, Err)) ProcessNote(Note); if (Err) - error(std::move(Err)); + reportError(std::move(Err), this->FileName); + } + } else { + for (const auto &P : + unwrapOrError(this->FileName, Obj->program_headers())) { + if (P.p_type != PT_NOTE) + continue; + PrintHeader(P.p_offset, P.p_filesz); + Error Err = Error::success(); + for (const auto &Note : Obj->notes(P, Err)) + ProcessNote(Note); + if (Err) + reportError(std::move(Err), this->FileName); } } } @@ -4279,6 +4692,294 @@ void GNUStyle::printELFLinkerOptions(const ELFFile *Obj) { OS << "printELFLinkerOptions not implemented!\n"; } +// Used for printing section names in places where possible errors can be +// ignored. +static StringRef getSectionName(const SectionRef &Sec) { + Expected NameOrErr = Sec.getName(); + if (NameOrErr) + return *NameOrErr; + consumeError(NameOrErr.takeError()); + return ""; +} + +// Used for printing symbol names in places where possible errors can be +// ignored. +static std::string getSymbolName(const ELFSymbolRef &Sym) { + Expected NameOrErr = Sym.getName(); + if (NameOrErr) + return maybeDemangle(*NameOrErr); + consumeError(NameOrErr.takeError()); + return ""; +} + +template +void DumpStyle::printFunctionStackSize( + const ELFObjectFile *Obj, uint64_t SymValue, SectionRef FunctionSec, + const StringRef SectionName, DataExtractor Data, uint64_t *Offset) { + // This function ignores potentially erroneous input, unless it is directly + // related to stack size reporting. + SymbolRef FuncSym; + for (const ELFSymbolRef &Symbol : Obj->symbols()) { + Expected SymAddrOrErr = Symbol.getAddress(); + if (!SymAddrOrErr) { + consumeError(SymAddrOrErr.takeError()); + continue; + } + if (Symbol.getELFType() == ELF::STT_FUNC && *SymAddrOrErr == SymValue) { + // Check if the symbol is in the right section. + if (FunctionSec.containsSymbol(Symbol)) { + FuncSym = Symbol; + break; + } + } + } + + std::string FuncName = "?"; + // A valid SymbolRef has a non-null object file pointer. + if (FuncSym.BasicSymbolRef::getObject()) + FuncName = getSymbolName(FuncSym); + else + reportWarning( + createError("could not identify function symbol for stack size entry"), + Obj->getFileName()); + + // Extract the size. The expectation is that Offset is pointing to the right + // place, i.e. past the function address. + uint64_t PrevOffset = *Offset; + uint64_t StackSize = Data.getULEB128(Offset); + // getULEB128() does not advance Offset if it is not able to extract a valid + // integer. + if (*Offset == PrevOffset) + reportError( + createStringError(object_error::parse_failed, + "could not extract a valid stack size in section %s", + SectionName.data()), + Obj->getFileName()); + + printStackSizeEntry(StackSize, FuncName); +} + +template +void GNUStyle::printStackSizeEntry(uint64_t Size, StringRef FuncName) { + OS.PadToColumn(2); + OS << format_decimal(Size, 11); + OS.PadToColumn(18); + OS << FuncName << "\n"; +} + +template +void DumpStyle::printStackSize(const ELFObjectFile *Obj, + RelocationRef Reloc, + SectionRef FunctionSec, + const StringRef &StackSizeSectionName, + const RelocationResolver &Resolver, + DataExtractor Data) { + // This function ignores potentially erroneous input, unless it is directly + // related to stack size reporting. + object::symbol_iterator RelocSym = Reloc.getSymbol(); + uint64_t RelocSymValue = 0; + StringRef FileStr = Obj->getFileName(); + if (RelocSym != Obj->symbol_end()) { + // Ensure that the relocation symbol is in the function section, i.e. the + // section where the functions whose stack sizes we are reporting are + // located. + auto SectionOrErr = RelocSym->getSection(); + if (!SectionOrErr) { + reportWarning( + createError("cannot identify the section for relocation symbol '" + + getSymbolName(*RelocSym) + "'"), + FileStr); + consumeError(SectionOrErr.takeError()); + } else if (*SectionOrErr != FunctionSec) { + reportWarning(createError("relocation symbol '" + + getSymbolName(*RelocSym) + + "' is not in the expected section"), + FileStr); + // Pretend that the symbol is in the correct section and report its + // stack size anyway. + FunctionSec = **SectionOrErr; + } + + Expected RelocSymValueOrErr = RelocSym->getValue(); + if (RelocSymValueOrErr) + RelocSymValue = *RelocSymValueOrErr; + else + consumeError(RelocSymValueOrErr.takeError()); + } + + uint64_t Offset = Reloc.getOffset(); + if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Elf_Addr) + 1)) + reportError( + createStringError(object_error::parse_failed, + "found invalid relocation offset into section %s " + "while trying to extract a stack size entry", + StackSizeSectionName.data()), + FileStr); + + uint64_t Addend = Data.getAddress(&Offset); + uint64_t SymValue = Resolver(Reloc, RelocSymValue, Addend); + this->printFunctionStackSize(Obj, SymValue, FunctionSec, StackSizeSectionName, + Data, &Offset); +} + +template +void DumpStyle::printNonRelocatableStackSizes( + const ELFObjectFile *Obj, std::function PrintHeader) { + // This function ignores potentially erroneous input, unless it is directly + // related to stack size reporting. + const ELFFile *EF = Obj->getELFFile(); + StringRef FileStr = Obj->getFileName(); + for (const SectionRef &Sec : Obj->sections()) { + StringRef SectionName = getSectionName(Sec); + if (SectionName != ".stack_sizes") + continue; + PrintHeader(); + const Elf_Shdr *ElfSec = Obj->getSection(Sec.getRawDataRefImpl()); + ArrayRef Contents = + unwrapOrError(this->FileName, EF->getSectionContents(ElfSec)); + DataExtractor Data(Contents, Obj->isLittleEndian(), sizeof(Elf_Addr)); + // A .stack_sizes section header's sh_link field is supposed to point + // to the section that contains the functions whose stack sizes are + // described in it. + const Elf_Shdr *FunctionELFSec = + unwrapOrError(this->FileName, EF->getSection(ElfSec->sh_link)); + uint64_t Offset = 0; + while (Offset < Contents.size()) { + // The function address is followed by a ULEB representing the stack + // size. Check for an extra byte before we try to process the entry. + if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Elf_Addr) + 1)) { + reportError( + createStringError( + object_error::parse_failed, + "section %s ended while trying to extract a stack size entry", + SectionName.data()), + FileStr); + } + uint64_t SymValue = Data.getAddress(&Offset); + printFunctionStackSize(Obj, SymValue, Obj->toSectionRef(FunctionELFSec), + SectionName, Data, &Offset); + } + } +} + +template +void DumpStyle::printRelocatableStackSizes( + const ELFObjectFile *Obj, std::function PrintHeader) { + const ELFFile *EF = Obj->getELFFile(); + + // Build a map between stack size sections and their corresponding relocation + // sections. + llvm::MapVector StackSizeRelocMap; + const SectionRef NullSection{}; + + for (const SectionRef &Sec : Obj->sections()) { + StringRef SectionName; + if (Expected NameOrErr = Sec.getName()) + SectionName = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + + // A stack size section that we haven't encountered yet is mapped to the + // null section until we find its corresponding relocation section. + if (SectionName == ".stack_sizes") + if (StackSizeRelocMap.count(Sec) == 0) { + StackSizeRelocMap[Sec] = NullSection; + continue; + } + + // Check relocation sections if they are relocating contents of a + // stack sizes section. + const Elf_Shdr *ElfSec = Obj->getSection(Sec.getRawDataRefImpl()); + uint32_t SectionType = ElfSec->sh_type; + if (SectionType != ELF::SHT_RELA && SectionType != ELF::SHT_REL) + continue; + + Expected RelSecOrErr = Sec.getRelocatedSection(); + if (!RelSecOrErr) + reportError(createStringError(object_error::parse_failed, + "%s: failed to get a relocated section: %s", + SectionName.data(), + toString(RelSecOrErr.takeError()).c_str()), + Obj->getFileName()); + + const Elf_Shdr *ContentsSec = + Obj->getSection((*RelSecOrErr)->getRawDataRefImpl()); + Expected ContentsSectionNameOrErr = + EF->getSectionName(ContentsSec); + if (!ContentsSectionNameOrErr) { + consumeError(ContentsSectionNameOrErr.takeError()); + continue; + } + if (*ContentsSectionNameOrErr != ".stack_sizes") + continue; + // Insert a mapping from the stack sizes section to its relocation section. + StackSizeRelocMap[Obj->toSectionRef(ContentsSec)] = Sec; + } + + for (const auto &StackSizeMapEntry : StackSizeRelocMap) { + PrintHeader(); + const SectionRef &StackSizesSec = StackSizeMapEntry.first; + const SectionRef &RelocSec = StackSizeMapEntry.second; + + // Warn about stack size sections without a relocation section. + StringRef StackSizeSectionName = getSectionName(StackSizesSec); + if (RelocSec == NullSection) { + reportWarning(createError("section " + StackSizeSectionName + + " does not have a corresponding " + "relocation section"), + Obj->getFileName()); + continue; + } + + // A .stack_sizes section header's sh_link field is supposed to point + // to the section that contains the functions whose stack sizes are + // described in it. + const Elf_Shdr *StackSizesELFSec = + Obj->getSection(StackSizesSec.getRawDataRefImpl()); + const SectionRef FunctionSec = Obj->toSectionRef(unwrapOrError( + this->FileName, EF->getSection(StackSizesELFSec->sh_link))); + + bool (*IsSupportedFn)(uint64_t); + RelocationResolver Resolver; + std::tie(IsSupportedFn, Resolver) = getRelocationResolver(*Obj); + auto Contents = unwrapOrError(this->FileName, StackSizesSec.getContents()); + DataExtractor Data(Contents, Obj->isLittleEndian(), sizeof(Elf_Addr)); + for (const RelocationRef &Reloc : RelocSec.relocations()) { + if (!IsSupportedFn || !IsSupportedFn(Reloc.getType())) + reportError(createStringError( + object_error::parse_failed, + "unsupported relocation type in section %s: %s", + getSectionName(RelocSec).data(), + EF->getRelocationTypeName(Reloc.getType()).data()), + Obj->getFileName()); + this->printStackSize(Obj, Reloc, FunctionSec, StackSizeSectionName, + Resolver, Data); + } + } +} + +template +void GNUStyle::printStackSizes(const ELFObjectFile *Obj) { + bool HeaderHasBeenPrinted = false; + auto PrintHeader = [&]() { + if (HeaderHasBeenPrinted) + return; + OS << "\nStack Sizes:\n"; + OS.PadToColumn(9); + OS << "Size"; + OS.PadToColumn(18); + OS << "Function\n"; + HeaderHasBeenPrinted = true; + }; + + // For non-relocatable objects, look directly for sections whose name starts + // with .stack_sizes and process the contents. + if (Obj->isRelocatableObject()) + this->printRelocatableStackSizes(Obj, PrintHeader); + else + this->printNonRelocatableStackSizes(Obj, PrintHeader); +} + template void GNUStyle::printMipsGOT(const MipsGOTParser &Parser) { size_t Bias = ELFT::Is64Bits ? 8 : 0; @@ -4402,6 +5103,45 @@ void GNUStyle::printMipsPLT(const MipsGOTParser &Parser) { } } +template +void GNUStyle::printMipsABIFlags(const ELFObjectFile *ObjF) { + const ELFFile *Obj = ObjF->getELFFile(); + const Elf_Shdr *Shdr = + findSectionByName(*Obj, ObjF->getFileName(), ".MIPS.abiflags"); + if (!Shdr) + return; + + ArrayRef Sec = + unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(Shdr)); + if (Sec.size() != sizeof(Elf_Mips_ABIFlags)) + reportError(createError(".MIPS.abiflags section has a wrong size"), + ObjF->getFileName()); + + auto *Flags = reinterpret_cast *>(Sec.data()); + + OS << "MIPS ABI Flags Version: " << Flags->version << "\n\n"; + OS << "ISA: MIPS" << int(Flags->isa_level); + if (Flags->isa_rev > 1) + OS << "r" << int(Flags->isa_rev); + OS << "\n"; + OS << "GPR size: " << getMipsRegisterSize(Flags->gpr_size) << "\n"; + OS << "CPR1 size: " << getMipsRegisterSize(Flags->cpr1_size) << "\n"; + OS << "CPR2 size: " << getMipsRegisterSize(Flags->cpr2_size) << "\n"; + OS << "FP ABI: " << printEnum(Flags->fp_abi, makeArrayRef(ElfMipsFpABIType)) + << "\n"; + OS << "ISA Extension: " + << printEnum(Flags->isa_ext, makeArrayRef(ElfMipsISAExtType)) << "\n"; + if (Flags->ases == 0) + OS << "ASEs: None\n"; + else + // FIXME: Print each flag on a separate line. + OS << "ASEs: " << printFlags(Flags->ases, makeArrayRef(ElfMipsASEFlags)) + << "\n"; + OS << "FLAGS 1: " << format_hex_no_prefix(Flags->flags1, 8, false) << "\n"; + OS << "FLAGS 2: " << format_hex_no_prefix(Flags->flags2, 8, false) << "\n"; + OS << "\n"; +} + template void LLVMStyle::printFileHeaders(const ELFO *Obj) { const Elf_Ehdr *E = Obj->getHeader(); { @@ -4455,16 +5195,17 @@ template void LLVMStyle::printFileHeaders(const ELFO *Obj) { W.printNumber("ProgramHeaderEntrySize", E->e_phentsize); W.printNumber("ProgramHeaderCount", E->e_phnum); W.printNumber("SectionHeaderEntrySize", E->e_shentsize); - W.printString("SectionHeaderCount", getSectionHeadersNumString(Obj)); + W.printString("SectionHeaderCount", + getSectionHeadersNumString(Obj, this->FileName)); W.printString("StringTableSectionIndex", - getSectionHeaderTableIndexString(Obj)); + getSectionHeaderTableIndexString(Obj, this->FileName)); } } template void LLVMStyle::printGroupSections(const ELFO *Obj) { DictScope Lists(W, "Groups"); - std::vector V = getGroups(Obj); + std::vector V = getGroups(Obj, this->FileName); DenseMap Map = mapSectionsToGroups(V); for (const GroupSection &G : V) { DictScope D(W, "Group"); @@ -4499,7 +5240,7 @@ template void LLVMStyle::printRelocations(const ELFO *Obj) { ListScope D(W, "Relocations"); int SectionNumber = -1; - for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) { + for (const Elf_Shdr &Sec : unwrapOrError(this->FileName, Obj->sections())) { ++SectionNumber; if (Sec.sh_type != ELF::SHT_REL && Sec.sh_type != ELF::SHT_RELA && @@ -4508,7 +5249,7 @@ template void LLVMStyle::printRelocations(const ELFO *Obj) { Sec.sh_type != ELF::SHT_ANDROID_RELR) continue; - StringRef Name = unwrapOrError(Obj->getSectionName(&Sec)); + StringRef Name = unwrapOrError(this->FileName, Obj->getSectionName(&Sec)); W.startLine() << "Section (" << SectionNumber << ") " << Name << " {\n"; W.indent(); @@ -4522,11 +5263,12 @@ template void LLVMStyle::printRelocations(const ELFO *Obj) { template void LLVMStyle::printRelocations(const Elf_Shdr *Sec, const ELFO *Obj) { - const Elf_Shdr *SymTab = unwrapOrError(Obj->getSection(Sec->sh_link)); + const Elf_Shdr *SymTab = + unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link)); switch (Sec->sh_type) { case ELF::SHT_REL: - for (const Elf_Rel &R : unwrapOrError(Obj->rels(Sec))) { + for (const Elf_Rel &R : unwrapOrError(this->FileName, Obj->rels(Sec))) { Elf_Rela Rela; Rela.r_offset = R.r_offset; Rela.r_info = R.r_info; @@ -4535,17 +5277,18 @@ void LLVMStyle::printRelocations(const Elf_Shdr *Sec, const ELFO *Obj) { } break; case ELF::SHT_RELA: - for (const Elf_Rela &R : unwrapOrError(Obj->relas(Sec))) + for (const Elf_Rela &R : unwrapOrError(this->FileName, Obj->relas(Sec))) printRelocation(Obj, R, SymTab); break; case ELF::SHT_RELR: case ELF::SHT_ANDROID_RELR: { - Elf_Relr_Range Relrs = unwrapOrError(Obj->relrs(Sec)); + Elf_Relr_Range Relrs = unwrapOrError(this->FileName, Obj->relrs(Sec)); if (opts::RawRelr) { for (const Elf_Relr &R : Relrs) W.startLine() << W.hex(R) << "\n"; } else { - std::vector RelrRelas = unwrapOrError(Obj->decode_relrs(Relrs)); + std::vector RelrRelas = + unwrapOrError(this->FileName, Obj->decode_relrs(Relrs)); for (const Elf_Rela &R : RelrRelas) printRelocation(Obj, R, SymTab); } @@ -4553,7 +5296,8 @@ void LLVMStyle::printRelocations(const Elf_Shdr *Sec, const ELFO *Obj) { } case ELF::SHT_ANDROID_REL: case ELF::SHT_ANDROID_RELA: - for (const Elf_Rela &R : unwrapOrError(Obj->android_relas(Sec))) + for (const Elf_Rela &R : + unwrapOrError(this->FileName, Obj->android_relas(Sec))) printRelocation(Obj, R, SymTab); break; } @@ -4565,13 +5309,16 @@ void LLVMStyle::printRelocation(const ELFO *Obj, Elf_Rela Rel, SmallString<32> RelocName; Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName); std::string TargetName; - const Elf_Sym *Sym = unwrapOrError(Obj->getRelocationSymbol(&Rel, SymTab)); + const Elf_Sym *Sym = + unwrapOrError(this->FileName, Obj->getRelocationSymbol(&Rel, SymTab)); if (Sym && Sym->getType() == ELF::STT_SECTION) { const Elf_Shdr *Sec = unwrapOrError( + this->FileName, Obj->getSection(Sym, SymTab, this->dumper()->getShndxTable())); - TargetName = unwrapOrError(Obj->getSectionName(Sec)); + TargetName = unwrapOrError(this->FileName, Obj->getSectionName(Sec)); } else if (Sym) { - StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*SymTab)); + StringRef StrTable = + unwrapOrError(this->FileName, Obj->getStringTableForSymtab(*SymTab)); TargetName = this->dumper()->getFullSymbolName( Sym, StrTable, SymTab->sh_type == SHT_DYNSYM /* IsDynamic */); } @@ -4596,10 +5343,11 @@ void LLVMStyle::printSectionHeaders(const ELFO *Obj) { ListScope SectionsD(W, "Sections"); int SectionIndex = -1; - ArrayRef Sections = unwrapOrError(Obj->sections()); + ArrayRef Sections = unwrapOrError(this->FileName, Obj->sections()); const ELFObjectFile *ElfObj = this->dumper()->getElfObject(); for (const Elf_Shdr &Sec : Sections) { - StringRef Name = getSectionName(Sec, *ElfObj, Sections); + StringRef Name = unwrapOrError( + ElfObj->getFileName(), Obj->getSectionName(&Sec, this->WarningHandler)); DictScope SectionD(W, "Section"); W.printNumber("Index", ++SectionIndex); W.printNumber("Name", Name, Sec.sh_name); @@ -4652,19 +5400,25 @@ void LLVMStyle::printSectionHeaders(const ELFO *Obj) { if (opts::SectionSymbols) { ListScope D(W, "Symbols"); const Elf_Shdr *Symtab = this->dumper()->getDotSymtabSec(); - StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*Symtab)); + StringRef StrTable = + unwrapOrError(this->FileName, Obj->getStringTableForSymtab(*Symtab)); - for (const Elf_Sym &Sym : unwrapOrError(Obj->symbols(Symtab))) { + for (const Elf_Sym &Sym : + unwrapOrError(this->FileName, Obj->symbols(Symtab))) { const Elf_Shdr *SymSec = unwrapOrError( + this->FileName, Obj->getSection(&Sym, Symtab, this->dumper()->getShndxTable())); if (SymSec == &Sec) - printSymbol(Obj, &Sym, unwrapOrError(Obj->symbols(Symtab)).begin(), - StrTable, false); + printSymbol( + Obj, &Sym, + unwrapOrError(this->FileName, Obj->symbols(Symtab)).begin(), + StrTable, false, false); } } if (opts::SectionData && Sec.sh_type != ELF::SHT_NOBITS) { - ArrayRef Data = unwrapOrError(Obj->getSectionContents(&Sec)); + ArrayRef Data = + unwrapOrError(this->FileName, Obj->getSectionContents(&Sec)); W.printBinaryBlock( "SectionData", StringRef(reinterpret_cast(Data.data()), Data.size())); @@ -4675,7 +5429,8 @@ void LLVMStyle::printSectionHeaders(const ELFO *Obj) { template void LLVMStyle::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First, StringRef StrTable, - bool IsDynamic) { + bool IsDynamic, + bool /*NonVisibilityBitsUsed*/) { unsigned SectionIndex = 0; StringRef SectionName; this->dumper()->getSectionNameIndex(Symbol, First, SectionName, SectionIndex); @@ -4786,7 +5541,8 @@ void LLVMStyle::printDynamicRelocations(const ELFO *Obj) { } if (DynRelrRegion.Size > 0) { Elf_Relr_Range Relrs = this->dumper()->dyn_relrs(); - std::vector RelrRelas = unwrapOrError(Obj->decode_relrs(Relrs)); + std::vector RelrRelas = + unwrapOrError(this->FileName, Obj->decode_relrs(Relrs)); for (const Elf_Rela &Rela : RelrRelas) printDynamicRelocation(Obj, Rela); } @@ -4809,11 +5565,9 @@ template void LLVMStyle::printDynamicRelocation(const ELFO *Obj, Elf_Rela Rel) { SmallString<32> RelocName; Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName); - std::string SymbolName; - uint32_t SymIndex = Rel.getSymbol(Obj->isMips64EL()); - const Elf_Sym *Sym = this->dumper()->dynamic_symbols().begin() + SymIndex; - SymbolName = maybeDemangle( - unwrapOrError(Sym->getName(this->dumper()->getDynamicStringTable()))); + std::string SymbolName = + getSymbolForReloc(Obj, this->FileName, this->dumper(), Rel).Name; + if (opts::ExpandRelocs) { DictScope Group(W, "Relocation"); W.printHex("Offset", Rel.r_offset); @@ -4842,7 +5596,8 @@ template void LLVMStyle::printProgramHeaders(const ELFO *Obj) { ListScope L(W, "ProgramHeaders"); - for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) { + for (const Elf_Phdr &Phdr : + unwrapOrError(this->FileName, Obj->program_headers())) { DictScope P(W, "ProgramHeader"); W.printHex("Type", getElfSegmentType(Obj->getHeader()->e_machine, Phdr.p_type), @@ -4860,23 +5615,16 @@ void LLVMStyle::printProgramHeaders(const ELFO *Obj) { template void LLVMStyle::printVersionSymbolSection(const ELFFile *Obj, const Elf_Shdr *Sec) { - DictScope SS(W, "Version symbols"); + ListScope SS(W, "VersionSymbols"); if (!Sec) return; - StringRef SecName = unwrapOrError(Obj->getSectionName(Sec)); - W.printNumber("Section Name", SecName, Sec->sh_name); - W.printHex("Address", Sec->sh_addr); - W.printHex("Offset", Sec->sh_offset); - W.printNumber("Link", Sec->sh_link); - const uint8_t *VersymBuf = reinterpret_cast(Obj->base() + Sec->sh_offset); const ELFDumper *Dumper = this->dumper(); StringRef StrTable = Dumper->getDynamicStringTable(); // Same number of entries in the dynamic symbol table (DT_SYMTAB). - ListScope Syms(W, "Symbols"); for (const Elf_Sym &Sym : Dumper->dynamic_symbols()) { DictScope S(W, "Symbol"); const Elf_Versym *Versym = reinterpret_cast(VersymBuf); @@ -4891,7 +5639,7 @@ void LLVMStyle::printVersionSymbolSection(const ELFFile *Obj, template void LLVMStyle::printVersionDefinitionSection(const ELFFile *Obj, const Elf_Shdr *Sec) { - DictScope SD(W, "SHT_GNU_verdef"); + ListScope SD(W, "VersionDefinitions"); if (!Sec) return; @@ -4899,7 +5647,8 @@ void LLVMStyle::printVersionDefinitionSection(const ELFFile *Obj, reinterpret_cast(Obj->base() + Sec->sh_offset); const uint8_t *SecEndAddress = SecStartAddress + Sec->sh_size; const uint8_t *VerdefBuf = SecStartAddress; - const Elf_Shdr *StrTab = unwrapOrError(Obj->getSection(Sec->sh_link)); + const Elf_Shdr *StrTab = + unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link)); unsigned VerDefsNum = Sec->sh_info; while (VerDefsNum--) { @@ -4938,13 +5687,14 @@ void LLVMStyle::printVersionDefinitionSection(const ELFFile *Obj, template void LLVMStyle::printVersionDependencySection(const ELFFile *Obj, const Elf_Shdr *Sec) { - DictScope SD(W, "SHT_GNU_verneed"); + ListScope SD(W, "VersionRequirements"); if (!Sec) return; const uint8_t *SecData = reinterpret_cast(Obj->base() + Sec->sh_offset); - const Elf_Shdr *StrTab = unwrapOrError(Obj->getSection(Sec->sh_link)); + const Elf_Shdr *StrTab = + unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link)); const uint8_t *VerneedBuf = SecData; unsigned VerneedNum = Sec->sh_info; @@ -4986,37 +5736,62 @@ void LLVMStyle::printCGProfile(const ELFFile *Obj) { ListScope L(W, "CGProfile"); if (!this->dumper()->getDotCGProfileSec()) return; - auto CGProfile = - unwrapOrError(Obj->template getSectionContentsAsArray( - this->dumper()->getDotCGProfileSec())); + auto CGProfile = unwrapOrError( + this->FileName, Obj->template getSectionContentsAsArray( + this->dumper()->getDotCGProfileSec())); for (const Elf_CGProfile &CGPE : CGProfile) { DictScope D(W, "CGProfileEntry"); - W.printNumber("From", this->dumper()->getStaticSymbolName(CGPE.cgp_from), - CGPE.cgp_from); - W.printNumber("To", this->dumper()->getStaticSymbolName(CGPE.cgp_to), - CGPE.cgp_to); + W.printNumber( + "From", + unwrapOrError(this->FileName, + this->dumper()->getStaticSymbolName(CGPE.cgp_from)), + CGPE.cgp_from); + W.printNumber( + "To", + unwrapOrError(this->FileName, + this->dumper()->getStaticSymbolName(CGPE.cgp_to)), + CGPE.cgp_to); W.printNumber("Weight", CGPE.cgp_weight); } } +static Expected> toULEB128Array(ArrayRef Data) { + std::vector Ret; + const uint8_t *Cur = Data.begin(); + const uint8_t *End = Data.end(); + while (Cur != End) { + unsigned Size; + const char *Err; + Ret.push_back(decodeULEB128(Cur, &Size, End, &Err)); + if (Err) + return createError(Err); + Cur += Size; + } + return Ret; +} + template void LLVMStyle::printAddrsig(const ELFFile *Obj) { ListScope L(W, "Addrsig"); if (!this->dumper()->getDotAddrsigSec()) return; ArrayRef Contents = unwrapOrError( + this->FileName, Obj->getSectionContents(this->dumper()->getDotAddrsigSec())); - const uint8_t *Cur = Contents.begin(); - const uint8_t *End = Contents.end(); - while (Cur != End) { - unsigned Size; - const char *Err; - uint64_t SymIndex = decodeULEB128(Cur, &Size, End, &Err); - if (Err) - reportError(Err); - W.printNumber("Sym", this->dumper()->getStaticSymbolName(SymIndex), - SymIndex); - Cur += Size; + Expected> V = toULEB128Array(Contents); + if (!V) { + reportWarning(V.takeError(), this->FileName); + return; + } + + for (uint64_t Sym : *V) { + Expected NameOrErr = this->dumper()->getStaticSymbolName(Sym); + if (NameOrErr) { + W.printNumber("Sym", *NameOrErr, Sym); + continue; + } + reportWarning(NameOrErr.takeError(), this->FileName); + W.printNumber("Sym", "", Sym); } } @@ -5051,6 +5826,17 @@ static void printGNUNoteLLVMStyle(uint32_t NoteType, ArrayRef Desc, } } +static void printCoreNoteLLVMStyle(const CoreNote &Note, ScopedPrinter &W) { + W.printNumber("Page Size", Note.PageSize); + for (const CoreFileMapping &Mapping : Note.Mappings) { + ListScope D(W, "Mapping"); + W.printHex("Start", Mapping.Start); + W.printHex("End", Mapping.End); + W.printHex("Offset", Mapping.Offset); + W.printString("Filename", Mapping.Filename); + } +} + template void LLVMStyle::printNotes(const ELFFile *Obj) { ListScope L(W, "Notes"); @@ -5067,47 +5853,59 @@ void LLVMStyle::printNotes(const ELFFile *Obj) { ArrayRef Descriptor = Note.getDesc(); Elf_Word Type = Note.getType(); + // Print the note owner/type. W.printString("Owner", Name); W.printHex("Data size", Descriptor.size()); if (Name == "GNU") { W.printString("Type", getGNUNoteTypeName(Type)); - printGNUNoteLLVMStyle(Type, Descriptor, W); } else if (Name == "FreeBSD") { W.printString("Type", getFreeBSDNoteTypeName(Type)); } else if (Name == "AMD") { W.printString("Type", getAMDNoteTypeName(Type)); - const AMDNote N = getAMDNote(Type, Descriptor); - if (!N.Type.empty()) - W.printString(N.Type, N.Value); } else if (Name == "AMDGPU") { W.printString("Type", getAMDGPUNoteTypeName(Type)); - const AMDGPUNote N = getAMDGPUNote(Type, Descriptor); - if (!N.Type.empty()) - W.printString(N.Type, N.Value); } else { - StringRef NoteType = getGenericNoteTypeName(Type); + StringRef NoteType = Obj->getHeader()->e_type == ELF::ET_CORE + ? getCoreNoteTypeName(Type) + : getGenericNoteTypeName(Type); if (!NoteType.empty()) W.printString("Type", NoteType); else W.printString("Type", "Unknown (" + to_string(format_hex(Type, 10)) + ")"); } + + // Print the description, or fallback to printing raw bytes for unknown + // owners. + if (Name == "GNU") { + printGNUNoteLLVMStyle(Type, Descriptor, W); + } else if (Name == "AMD") { + const AMDNote N = getAMDNote(Type, Descriptor); + if (!N.Type.empty()) + W.printString(N.Type, N.Value); + } else if (Name == "AMDGPU") { + const AMDGPUNote N = getAMDGPUNote(Type, Descriptor); + if (!N.Type.empty()) + W.printString(N.Type, N.Value); + } else if (Name == "CORE") { + if (Type == ELF::NT_FILE) { + DataExtractor DescExtractor(Descriptor, + ELFT::TargetEndianness == support::little, + sizeof(Elf_Addr)); + Expected Note = readCoreNote(DescExtractor); + if (Note) + printCoreNoteLLVMStyle(*Note, W); + else + reportWarning(Note.takeError(), this->FileName); + } + } else if (!Descriptor.empty()) { + W.printBinaryBlock("Description data", Descriptor); + } }; - if (Obj->getHeader()->e_type == ELF::ET_CORE) { - for (const auto &P : unwrapOrError(Obj->program_headers())) { - if (P.p_type != PT_NOTE) - continue; - DictScope D(W, "NoteSection"); - PrintHeader(P.p_offset, P.p_filesz); - Error Err = Error::success(); - for (const auto &Note : Obj->notes(P, Err)) - ProcessNote(Note); - if (Err) - error(std::move(Err)); - } - } else { - for (const auto &S : unwrapOrError(Obj->sections())) { + ArrayRef Sections = unwrapOrError(this->FileName, Obj->sections()); + if (Obj->getHeader()->e_type != ELF::ET_CORE && !Sections.empty()) { + for (const auto &S : Sections) { if (S.sh_type != SHT_NOTE) continue; DictScope D(W, "NoteSection"); @@ -5116,7 +5914,20 @@ void LLVMStyle::printNotes(const ELFFile *Obj) { for (const auto &Note : Obj->notes(S, Err)) ProcessNote(Note); if (Err) - error(std::move(Err)); + reportError(std::move(Err), this->FileName); + } + } else { + for (const auto &P : + unwrapOrError(this->FileName, Obj->program_headers())) { + if (P.p_type != PT_NOTE) + continue; + DictScope D(W, "NoteSection"); + PrintHeader(P.p_offset, P.p_filesz); + Error Err = Error::success(); + for (const auto &Note : Obj->notes(P, Err)) + ProcessNote(Note); + if (Err) + reportError(std::move(Err), this->FileName); } } } @@ -5125,11 +5936,12 @@ template void LLVMStyle::printELFLinkerOptions(const ELFFile *Obj) { ListScope L(W, "LinkerOptions"); - for (const Elf_Shdr &Shdr : unwrapOrError(Obj->sections())) { + for (const Elf_Shdr &Shdr : unwrapOrError(this->FileName, Obj->sections())) { if (Shdr.sh_type != ELF::SHT_LLVM_LINKER_OPTIONS) continue; - ArrayRef Contents = unwrapOrError(Obj->getSectionContents(&Shdr)); + ArrayRef Contents = + unwrapOrError(this->FileName, Obj->getSectionContents(&Shdr)); for (const uint8_t *P = Contents.begin(), *E = Contents.end(); P < E; ) { StringRef Key = StringRef(reinterpret_cast(P)); StringRef Value = @@ -5142,6 +5954,22 @@ void LLVMStyle::printELFLinkerOptions(const ELFFile *Obj) { } } +template +void LLVMStyle::printStackSizes(const ELFObjectFile *Obj) { + ListScope L(W, "StackSizes"); + if (Obj->isRelocatableObject()) + this->printRelocatableStackSizes(Obj, []() {}); + else + this->printNonRelocatableStackSizes(Obj, []() {}); +} + +template +void LLVMStyle::printStackSizeEntry(uint64_t Size, StringRef FuncName) { + DictScope D(W, "Entry"); + W.printString("Function", FuncName); + W.printHex("Size", Size); +} + template void LLVMStyle::printMipsGOT(const MipsGOTParser &Parser) { auto PrintEntry = [&](const Elf_Addr *E) { @@ -5252,3 +6080,41 @@ void LLVMStyle::printMipsPLT(const MipsGOTParser &Parser) { } } } + +template +void LLVMStyle::printMipsABIFlags(const ELFObjectFile *ObjF) { + const ELFFile *Obj = ObjF->getELFFile(); + const Elf_Shdr *Shdr = + findSectionByName(*Obj, ObjF->getFileName(), ".MIPS.abiflags"); + if (!Shdr) { + W.startLine() << "There is no .MIPS.abiflags section in the file.\n"; + return; + } + ArrayRef Sec = + unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(Shdr)); + if (Sec.size() != sizeof(Elf_Mips_ABIFlags)) { + W.startLine() << "The .MIPS.abiflags section has a wrong size.\n"; + return; + } + + auto *Flags = reinterpret_cast *>(Sec.data()); + + raw_ostream &OS = W.getOStream(); + DictScope GS(W, "MIPS ABI Flags"); + + W.printNumber("Version", Flags->version); + W.startLine() << "ISA: "; + if (Flags->isa_rev <= 1) + OS << format("MIPS%u", Flags->isa_level); + else + OS << format("MIPS%ur%u", Flags->isa_level, Flags->isa_rev); + OS << "\n"; + W.printEnum("ISA Extension", Flags->isa_ext, makeArrayRef(ElfMipsISAExtType)); + W.printFlags("ASEs", Flags->ases, makeArrayRef(ElfMipsASEFlags)); + W.printEnum("FP ABI", Flags->fp_abi, makeArrayRef(ElfMipsFpABIType)); + W.printNumber("GPR size", getMipsRegisterSize(Flags->gpr_size)); + W.printNumber("CPR1 size", getMipsRegisterSize(Flags->cpr1_size)); + W.printNumber("CPR2 size", getMipsRegisterSize(Flags->cpr2_size)); + W.printFlags("Flags 1", Flags->flags1, makeArrayRef(ElfMipsFlags1)); + W.printHex("Flags 2", Flags->flags2); +} diff --git a/tools/llvm-readobj/MachODumper.cpp b/tools/llvm-readobj/MachODumper.cpp index 32a3866eb2f..20a60b3df69 100644 --- a/tools/llvm-readobj/MachODumper.cpp +++ b/tools/llvm-readobj/MachODumper.cpp @@ -214,6 +214,31 @@ static const EnumEntry MachOHeaderFlags[] = { LLVM_READOBJ_ENUM_ENT(MachO, MH_APP_EXTENSION_SAFE), }; +static const EnumEntry MachOSectionTypes[] = { + { "Regular" , MachO::S_REGULAR }, + { "ZeroFill" , MachO::S_ZEROFILL }, + { "CStringLiterals" , MachO::S_CSTRING_LITERALS }, + { "4ByteLiterals" , MachO::S_4BYTE_LITERALS }, + { "8ByteLiterals" , MachO::S_8BYTE_LITERALS }, + { "LiteralPointers" , MachO::S_LITERAL_POINTERS }, + { "NonLazySymbolPointers" , MachO::S_NON_LAZY_SYMBOL_POINTERS }, + { "LazySymbolPointers" , MachO::S_LAZY_SYMBOL_POINTERS }, + { "SymbolStubs" , MachO::S_SYMBOL_STUBS }, + { "ModInitFuncPointers" , MachO::S_MOD_INIT_FUNC_POINTERS }, + { "ModTermFuncPointers" , MachO::S_MOD_TERM_FUNC_POINTERS }, + { "Coalesced" , MachO::S_COALESCED }, + { "GBZeroFill" , MachO::S_GB_ZEROFILL }, + { "Interposing" , MachO::S_INTERPOSING }, + { "16ByteLiterals" , MachO::S_16BYTE_LITERALS }, + { "DTraceDOF" , MachO::S_DTRACE_DOF }, + { "LazyDylibSymbolPointers" , MachO::S_LAZY_DYLIB_SYMBOL_POINTERS }, + { "ThreadLocalRegular" , MachO::S_THREAD_LOCAL_REGULAR }, + { "ThreadLocalZerofill" , MachO::S_THREAD_LOCAL_ZEROFILL }, + { "ThreadLocalVariables" , MachO::S_THREAD_LOCAL_VARIABLES }, + { "ThreadLocalVariablePointers" , MachO::S_THREAD_LOCAL_VARIABLE_POINTERS }, + { "ThreadLocalInitFunctionPointers", MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS } +}; + static const EnumEntry MachOSectionAttributes[] = { { "LocReloc" , 1 << 0 /*S_ATTR_LOC_RELOC */ }, { "ExtReloc" , 1 << 1 /*S_ATTR_EXT_RELOC */ }, @@ -440,10 +465,7 @@ void MachODumper::printSectionHeaders(const MachOObjectFile *Obj) { MachOSection MOSection; getSection(Obj, Section.getRawDataRefImpl(), MOSection); DataRefImpl DR = Section.getRawDataRefImpl(); - - StringRef Name; - error(Section.getName(Name)); - + StringRef Name = unwrapOrError(Obj->getFileName(), Section.getName()); ArrayRef RawName = Obj->getSectionRawName(DR); StringRef SegmentName = Obj->getSectionFinalSegmentName(DR); ArrayRef RawSegmentName = Obj->getSectionRawFinalSegmentName(DR); @@ -459,7 +481,7 @@ void MachODumper::printSectionHeaders(const MachOObjectFile *Obj) { W.printHex("RelocationOffset", MOSection.RelocationTableOffset); W.printNumber("RelocationCount", MOSection.NumRelocationTableEntries); W.printEnum("Type", MOSection.Flags & 0xFF, - makeArrayRef(MachOSectionAttributes)); + makeArrayRef(MachOSectionTypes)); W.printFlags("Attributes", MOSection.Flags >> 8, makeArrayRef(MachOSectionAttributes)); W.printHex("Reserved1", MOSection.Reserved1); @@ -484,7 +506,8 @@ void MachODumper::printSectionHeaders(const MachOObjectFile *Obj) { } if (opts::SectionData && !Section.isBSS()) - W.printBinaryBlock("SectionData", unwrapOrError(Section.getContents())); + W.printBinaryBlock("SectionData", unwrapOrError(Obj->getFileName(), + Section.getContents())); } } @@ -493,9 +516,7 @@ void MachODumper::printRelocations() { std::error_code EC; for (const SectionRef &Section : Obj->sections()) { - StringRef Name; - error(Section.getName(Name)); - + StringRef Name = unwrapOrError(Obj->getFileName(), Section.getName()); bool PrintedGroup = false; for (const RelocationRef &Reloc : Section.relocations()) { if (!PrintedGroup) { @@ -535,14 +556,13 @@ void MachODumper::printRelocation(const MachOObjectFile *Obj, if (Symbol != Obj->symbol_end()) { Expected TargetNameOrErr = Symbol->getName(); if (!TargetNameOrErr) - error(errorToErrorCode(TargetNameOrErr.takeError())); + reportError(TargetNameOrErr.takeError(), Obj->getFileName()); TargetName = *TargetNameOrErr; } } else if (!IsScattered) { section_iterator SecI = Obj->getRelocationSection(DR); - if (SecI != Obj->section_end()) { - error(SecI->getName(TargetName)); - } + if (SecI != Obj->section_end()) + TargetName = unwrapOrError(Obj->getFileName(), SecI->getName()); } if (TargetName.empty()) TargetName = "-"; @@ -610,10 +630,12 @@ void MachODumper::printSymbol(const SymbolRef &Symbol) { StringRef SectionName = ""; Expected SecIOrErr = Symbol.getSection(); - error(errorToErrorCode(SecIOrErr.takeError())); + if (!SecIOrErr) + reportError(SecIOrErr.takeError(), Obj->getFileName()); + section_iterator SecI = *SecIOrErr; if (SecI != Obj->section_end()) - error(SecI->getName(SectionName)); + SectionName = unwrapOrError(Obj->getFileName(), SecI->getName()); DictScope D(W, "Symbol"); W.printNumber("Name", SymbolName, MOSymbol.StringIndex); @@ -643,7 +665,11 @@ void MachODumper::printStackMap() const { object::SectionRef StackMapSection; for (auto Sec : Obj->sections()) { StringRef Name; - Sec.getName(Name); + if (Expected NameOrErr = Sec.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + if (Name == "__llvm_stackmaps") { StackMapSection = Sec; break; @@ -653,7 +679,8 @@ void MachODumper::printStackMap() const { if (StackMapSection == object::SectionRef()) return; - StringRef StackMapContents = unwrapOrError(StackMapSection.getContents()); + StringRef StackMapContents = + unwrapOrError(Obj->getFileName(), StackMapSection.getContents()); ArrayRef StackMapContentsArray = arrayRefFromStringRef(StackMapContents); diff --git a/tools/llvm-readobj/ObjDumper.cpp b/tools/llvm-readobj/ObjDumper.cpp index 0a9e22c8a71..9e5ebd99ac3 100644 --- a/tools/llvm-readobj/ObjDumper.cpp +++ b/tools/llvm-readobj/ObjDumper.cpp @@ -23,6 +23,10 @@ namespace llvm { +static inline Error createError(const Twine &Msg) { + return createStringError(object::object_error::parse_failed, Msg); +} + ObjDumper::ObjDumper(ScopedPrinter &Writer) : W(Writer) {} ObjDumper::~ObjDumper() { @@ -49,8 +53,7 @@ getSectionRefsByNameOrIndex(const object::ObjectFile *Obj, SecIndex = Obj->isELF() ? 0 : 1; for (object::SectionRef SecRef : Obj->sections()) { - StringRef SecName; - error(SecRef.getName(SecName)); + StringRef SecName = unwrapOrError(Obj->getFileName(), SecRef.getName()); auto NameIt = SecNames.find(SecName); if (NameIt != SecNames.end()) NameIt->second = true; @@ -64,10 +67,15 @@ getSectionRefsByNameOrIndex(const object::ObjectFile *Obj, for (const std::pair &S : SecNames) if (!S.second) - reportWarning(formatv("could not find section '{0}'", S.first).str()); + reportWarning( + createError(formatv("could not find section '{0}'", S.first).str()), + Obj->getFileName()); + for (std::pair S : SecIndices) if (!S.second) - reportWarning(formatv("could not find section {0}", S.first).str()); + reportWarning( + createError(formatv("could not find section {0}", S.first).str()), + Obj->getFileName()); return Ret; } @@ -77,14 +85,16 @@ void ObjDumper::printSectionsAsString(const object::ObjectFile *Obj, bool First = true; for (object::SectionRef Section : getSectionRefsByNameOrIndex(Obj, Sections)) { - StringRef SectionName; - error(Section.getName(SectionName)); + StringRef SectionName = + unwrapOrError(Obj->getFileName(), Section.getName()); + if (!First) W.startLine() << '\n'; First = false; W.startLine() << "String dump of section '" << SectionName << "':\n"; - StringRef SectionContent = unwrapOrError(Section.getContents()); + StringRef SectionContent = + unwrapOrError(Obj->getFileName(), Section.getContents()); const uint8_t *SecContent = SectionContent.bytes_begin(); const uint8_t *CurrentWord = SecContent; @@ -110,14 +120,16 @@ void ObjDumper::printSectionsAsHex(const object::ObjectFile *Obj, bool First = true; for (object::SectionRef Section : getSectionRefsByNameOrIndex(Obj, Sections)) { - StringRef SectionName; - error(Section.getName(SectionName)); + StringRef SectionName = + unwrapOrError(Obj->getFileName(), Section.getName()); + if (!First) W.startLine() << '\n'; First = false; W.startLine() << "Hex dump of section '" << SectionName << "':\n"; - StringRef SectionContent = unwrapOrError(Section.getContents()); + StringRef SectionContent = + unwrapOrError(Obj->getFileName(), Section.getContents()); const uint8_t *SecContent = SectionContent.bytes_begin(); const uint8_t *SecEnd = SecContent + SectionContent.size(); diff --git a/tools/llvm-readobj/ObjDumper.h b/tools/llvm-readobj/ObjDumper.h index aaabfa2ca2e..2ba44134249 100644 --- a/tools/llvm-readobj/ObjDumper.h +++ b/tools/llvm-readobj/ObjDumper.h @@ -68,15 +68,8 @@ public: virtual void printAddrsig() {} virtual void printNotes() {} virtual void printELFLinkerOptions() {} - - // Only implemented for ARM ELF at this time. - virtual void printAttributes() { } - - // Only implemented for MIPS ELF at this time. - virtual void printMipsPLTGOT() { } - virtual void printMipsABIFlags() { } - virtual void printMipsReginfo() { } - virtual void printMipsOptions() { } + virtual void printStackSizes() {} + virtual void printArchSpecificInfo() { } // Only implemented for PE/COFF. virtual void printCOFFImports() { } diff --git a/tools/llvm-readobj/WasmDumper.cpp b/tools/llvm-readobj/WasmDumper.cpp index 041a9a15bdb..dfab9f40d71 100644 --- a/tools/llvm-readobj/WasmDumper.cpp +++ b/tools/llvm-readobj/WasmDumper.cpp @@ -51,6 +51,7 @@ static const EnumEntry WasmSymbolFlags[] = { ENUM_ENTRY(UNDEFINED), ENUM_ENTRY(EXPORTED), ENUM_ENTRY(EXPLICIT_NAME), + ENUM_ENTRY(NO_STRIP), #undef ENUM_ENTRY }; @@ -90,7 +91,7 @@ void WasmDumper::printRelocation(const SectionRef &Section, StringRef SymName; symbol_iterator SI = Reloc.getSymbol(); if (SI != Obj->symbol_end()) - SymName = error(SI->getName()); + SymName = unwrapOrError(Obj->getFileName(), SI->getName()); bool HasAddend = false; switch (RelocType) { @@ -133,8 +134,8 @@ void WasmDumper::printRelocations() { int SectionNumber = 0; for (const SectionRef &Section : Obj->sections()) { bool PrintedGroup = false; - StringRef Name; - error(Section.getName(Name)); + StringRef Name = unwrapOrError(Obj->getFileName(), Section.getName()); + ++SectionNumber; for (const RelocationRef &Reloc : Section.relocations()) { diff --git a/tools/llvm-readobj/Win64EHDumper.cpp b/tools/llvm-readobj/Win64EHDumper.cpp index e64b8f15718..fa268ce9d43 100644 --- a/tools/llvm-readobj/Win64EHDumper.cpp +++ b/tools/llvm-readobj/Win64EHDumper.cpp @@ -289,7 +289,9 @@ void Dumper::printRuntimeFunction(const Context &Ctx, resolveRelocation(Ctx, Section, SectionOffset + 8, XData, Offset); ArrayRef Contents; - error(Ctx.COFF.getSectionContents(XData, Contents)); + if (Error E = Ctx.COFF.getSectionContents(XData, Contents)) + reportError(std::move(E), Ctx.COFF.getFileName()); + if (Contents.empty()) return; @@ -304,14 +306,19 @@ void Dumper::printRuntimeFunction(const Context &Ctx, void Dumper::printData(const Context &Ctx) { for (const auto &Section : Ctx.COFF.sections()) { StringRef Name; - Section.getName(Name); + if (Expected NameOrErr = Section.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); if (Name != ".pdata" && !Name.startswith(".pdata$")) continue; const coff_section *PData = Ctx.COFF.getCOFFSection(Section); ArrayRef Contents; - error(Ctx.COFF.getSectionContents(PData, Contents)); + + if (Error E = Ctx.COFF.getSectionContents(PData, Contents)) + reportError(std::move(E), Ctx.COFF.getFileName()); if (Contents.empty()) continue; diff --git a/tools/llvm-readobj/WindowsResourceDumper.cpp b/tools/llvm-readobj/WindowsResourceDumper.cpp index 13989f696d9..a2fb6aac3f9 100644 --- a/tools/llvm-readobj/WindowsResourceDumper.cpp +++ b/tools/llvm-readobj/WindowsResourceDumper.cpp @@ -56,8 +56,12 @@ void Dumper::printEntry(const ResourceEntryRef &Ref) { if (Ref.checkTypeString()) { auto NarrowStr = stripUTF16(Ref.getTypeString()); SW.printString("Resource type (string)", NarrowStr); - } else - SW.printNumber("Resource type (int)", Ref.getTypeID()); + } else { + SmallString<20> IDStr; + raw_svector_ostream OS(IDStr); + printResourceTypeName(Ref.getTypeID(), OS); + SW.printString("Resource type (int)", IDStr); + } if (Ref.checkNameString()) { auto NarrowStr = stripUTF16(Ref.getNameString()); diff --git a/tools/llvm-readobj/XCOFFDumper.cpp b/tools/llvm-readobj/XCOFFDumper.cpp index 6f260f91537..fe95b6d1b49 100644 --- a/tools/llvm-readobj/XCOFFDumper.cpp +++ b/tools/llvm-readobj/XCOFFDumper.cpp @@ -22,6 +22,12 @@ using namespace object; namespace { class XCOFFDumper : public ObjDumper { + enum { + SymbolTypeMask = 0x07, + SymbolAlignmentMask = 0xF8, + SymbolAlignmentBitOffset = 3 + }; + public: XCOFFDumper(const XCOFFObjectFile &Obj, ScopedPrinter &Writer) : ObjDumper(Writer), Obj(Obj) {} @@ -37,11 +43,21 @@ public: private: template void printSectionHeaders(ArrayRef Sections); - - const XCOFFObjectFile &Obj; + template void printGenericSectionHeader(T &Sec) const; + template void printOverflowSectionHeader(T &Sec) const; + void printFileAuxEnt(const XCOFFFileAuxEnt *AuxEntPtr); + void printCsectAuxEnt32(const XCOFFCsectAuxEnt32 *AuxEntPtr); + void printSectAuxEntForStat(const XCOFFSectAuxEntForStat *AuxEntPtr); + void printSymbol(const SymbolRef &); // Least significant 3 bits are reserved. static constexpr unsigned SectionFlagsReservedMask = 0x7; + + // The low order 16 bits of section flags denotes the section type. + static constexpr unsigned SectionFlagsTypeMask = 0xffffu; + + void printRelocations(ArrayRef Sections); + const XCOFFObjectFile &Obj; }; } // anonymous namespace @@ -100,11 +116,315 @@ void XCOFFDumper::printSectionHeaders() { } void XCOFFDumper::printRelocations() { - llvm_unreachable("Unimplemented functionality for XCOFFDumper"); + if (Obj.is64Bit()) + llvm_unreachable("64-bit relocation output not implemented!"); + else + printRelocations(Obj.sections32()); +} + +static const EnumEntry RelocationTypeNameclass[] = { +#define ECase(X) \ + { #X, XCOFF::X } + ECase(R_POS), ECase(R_RL), ECase(R_RLA), ECase(R_NEG), + ECase(R_REL), ECase(R_TOC), ECase(R_TRL), ECase(R_TRLA), + ECase(R_GL), ECase(R_TCL), ECase(R_REF), ECase(R_BA), + ECase(R_BR), ECase(R_RBA), ECase(R_RBR), ECase(R_TLS), + ECase(R_TLS_IE), ECase(R_TLS_LD), ECase(R_TLS_LE), ECase(R_TLSM), + ECase(R_TLSML), ECase(R_TOCU), ECase(R_TOCL) +#undef ECase +}; + +void XCOFFDumper::printRelocations(ArrayRef Sections) { + if (!opts::ExpandRelocs) + report_fatal_error("Unexpanded relocation output not implemented."); + + ListScope LS(W, "Relocations"); + uint16_t Index = 0; + for (const auto &Sec : Sections) { + ++Index; + // Only the .text, .data, .tdata, and STYP_DWARF sections have relocation. + if (Sec.Flags != XCOFF::STYP_TEXT && Sec.Flags != XCOFF::STYP_DATA && + Sec.Flags != XCOFF::STYP_TDATA && Sec.Flags != XCOFF::STYP_DWARF) + continue; + auto Relocations = unwrapOrError(Obj.getFileName(), Obj.relocations(Sec)); + if (Relocations.empty()) + continue; + + W.startLine() << "Section (index: " << Index << ") " << Sec.getName() + << " {\n"; + for (auto Reloc : Relocations) { + StringRef SymbolName = unwrapOrError( + Obj.getFileName(), Obj.getSymbolNameByIndex(Reloc.SymbolIndex)); + + DictScope RelocScope(W, "Relocation"); + W.printHex("Virtual Address", Reloc.VirtualAddress); + W.printNumber("Symbol", SymbolName, Reloc.SymbolIndex); + W.printString("IsSigned", Reloc.isRelocationSigned() ? "Yes" : "No"); + W.printNumber("FixupBitValue", Reloc.isFixupIndicated() ? 1 : 0); + W.printNumber("Length", Reloc.getRelocatedLength()); + W.printEnum("Type", (uint8_t)Reloc.Type, + makeArrayRef(RelocationTypeNameclass)); + } + W.unindent(); + W.startLine() << "}\n"; + } +} + +static const EnumEntry FileStringType[] = { +#define ECase(X) \ + { #X, XCOFF::X } + ECase(XFT_FN), ECase(XFT_CT), ECase(XFT_CV), ECase(XFT_CD) +#undef ECase +}; + +void XCOFFDumper::printFileAuxEnt(const XCOFFFileAuxEnt *AuxEntPtr) { + if (Obj.is64Bit()) + report_fatal_error( + "Printing for File Auxiliary Entry in 64-bit is unimplemented."); + StringRef FileName = + unwrapOrError(Obj.getFileName(), Obj.getCFileName(AuxEntPtr)); + DictScope SymDs(W, "File Auxiliary Entry"); + W.printNumber("Index", + Obj.getSymbolIndex(reinterpret_cast(AuxEntPtr))); + W.printString("Name", FileName); + W.printEnum("Type", static_cast(AuxEntPtr->Type), + makeArrayRef(FileStringType)); +} + +static const EnumEntry CsectStorageMappingClass[] = + { +#define ECase(X) \ + { #X, XCOFF::X } + ECase(XMC_PR), ECase(XMC_RO), ECase(XMC_DB), + ECase(XMC_GL), ECase(XMC_XO), ECase(XMC_SV), + ECase(XMC_SV64), ECase(XMC_SV3264), ECase(XMC_TI), + ECase(XMC_TB), ECase(XMC_RW), ECase(XMC_TC0), + ECase(XMC_TC), ECase(XMC_TD), ECase(XMC_DS), + ECase(XMC_UA), ECase(XMC_BS), ECase(XMC_UC), + ECase(XMC_TL), ECase(XMC_TE) +#undef ECase +}; + +static const EnumEntry CsectSymbolTypeClass[] = { +#define ECase(X) \ + { #X, XCOFF::X } + ECase(XTY_ER), ECase(XTY_SD), ECase(XTY_LD), ECase(XTY_CM) +#undef ECase +}; + +void XCOFFDumper::printCsectAuxEnt32(const XCOFFCsectAuxEnt32 *AuxEntPtr) { + assert(!Obj.is64Bit() && "32-bit interface called on 64-bit object file."); + + DictScope SymDs(W, "CSECT Auxiliary Entry"); + W.printNumber("Index", + Obj.getSymbolIndex(reinterpret_cast(AuxEntPtr))); + if ((AuxEntPtr->SymbolAlignmentAndType & SymbolTypeMask) == XCOFF::XTY_LD) + W.printNumber("ContainingCsectSymbolIndex", AuxEntPtr->SectionOrLength); + else + W.printNumber("SectionLen", AuxEntPtr->SectionOrLength); + W.printHex("ParameterHashIndex", AuxEntPtr->ParameterHashIndex); + W.printHex("TypeChkSectNum", AuxEntPtr->TypeChkSectNum); + // Print out symbol alignment and type. + W.printNumber("SymbolAlignmentLog2", + (AuxEntPtr->SymbolAlignmentAndType & SymbolAlignmentMask) >> + SymbolAlignmentBitOffset); + W.printEnum("SymbolType", AuxEntPtr->SymbolAlignmentAndType & SymbolTypeMask, + makeArrayRef(CsectSymbolTypeClass)); + W.printEnum("StorageMappingClass", + static_cast(AuxEntPtr->StorageMappingClass), + makeArrayRef(CsectStorageMappingClass)); + W.printHex("StabInfoIndex", AuxEntPtr->StabInfoIndex); + W.printHex("StabSectNum", AuxEntPtr->StabSectNum); +} + +void XCOFFDumper::printSectAuxEntForStat( + const XCOFFSectAuxEntForStat *AuxEntPtr) { + assert(!Obj.is64Bit() && "32-bit interface called on 64-bit object file."); + + DictScope SymDs(W, "Sect Auxiliary Entry For Stat"); + W.printNumber("Index", + Obj.getSymbolIndex(reinterpret_cast(AuxEntPtr))); + W.printNumber("SectionLength", AuxEntPtr->SectionLength); + + // Unlike the corresponding fields in the section header, NumberOfRelocEnt + // and NumberOfLineNum do not handle values greater than 65535. + W.printNumber("NumberOfRelocEnt", AuxEntPtr->NumberOfRelocEnt); + W.printNumber("NumberOfLineNum", AuxEntPtr->NumberOfLineNum); +} + +static const EnumEntry SymStorageClass[] = { +#define ECase(X) \ + { #X, XCOFF::X } + ECase(C_NULL), ECase(C_AUTO), ECase(C_EXT), ECase(C_STAT), + ECase(C_REG), ECase(C_EXTDEF), ECase(C_LABEL), ECase(C_ULABEL), + ECase(C_MOS), ECase(C_ARG), ECase(C_STRTAG), ECase(C_MOU), + ECase(C_UNTAG), ECase(C_TPDEF), ECase(C_USTATIC), ECase(C_ENTAG), + ECase(C_MOE), ECase(C_REGPARM), ECase(C_FIELD), ECase(C_BLOCK), + ECase(C_FCN), ECase(C_EOS), ECase(C_FILE), ECase(C_LINE), + ECase(C_ALIAS), ECase(C_HIDDEN), ECase(C_HIDEXT), ECase(C_BINCL), + ECase(C_EINCL), ECase(C_INFO), ECase(C_WEAKEXT), ECase(C_DWARF), + ECase(C_GSYM), ECase(C_LSYM), ECase(C_PSYM), ECase(C_RSYM), + ECase(C_RPSYM), ECase(C_STSYM), ECase(C_TCSYM), ECase(C_BCOMM), + ECase(C_ECOML), ECase(C_ECOMM), ECase(C_DECL), ECase(C_ENTRY), + ECase(C_FUN), ECase(C_BSTAT), ECase(C_ESTAT), ECase(C_GTLS), + ECase(C_STTLS), ECase(C_EFCN) +#undef ECase +}; + +static StringRef GetSymbolValueName(XCOFF::StorageClass SC) { + switch (SC) { + case XCOFF::C_EXT: + case XCOFF::C_WEAKEXT: + case XCOFF::C_HIDEXT: + case XCOFF::C_STAT: + return "Value (RelocatableAddress)"; + case XCOFF::C_FILE: + return "Value (SymbolTableIndex)"; + case XCOFF::C_FCN: + case XCOFF::C_BLOCK: + case XCOFF::C_FUN: + case XCOFF::C_STSYM: + case XCOFF::C_BINCL: + case XCOFF::C_EINCL: + case XCOFF::C_INFO: + case XCOFF::C_BSTAT: + case XCOFF::C_LSYM: + case XCOFF::C_PSYM: + case XCOFF::C_RPSYM: + case XCOFF::C_RSYM: + case XCOFF::C_ECOML: + case XCOFF::C_DWARF: + assert(false && "This StorageClass for the symbol is not yet implemented."); + return ""; + default: + return "Value"; + } +} + +static const EnumEntry CFileLangIdClass[] = { +#define ECase(X) \ + { #X, XCOFF::X } + ECase(TB_C), ECase(TB_CPLUSPLUS) +#undef ECase +}; + +static const EnumEntry CFileCpuIdClass[] = { +#define ECase(X) \ + { #X, XCOFF::X } + ECase(TCPU_PPC64), ECase(TCPU_COM), ECase(TCPU_970) +#undef ECase +}; + +void XCOFFDumper::printSymbol(const SymbolRef &S) { + if (Obj.is64Bit()) + report_fatal_error("64-bit support is unimplemented."); + + DataRefImpl SymbolDRI = S.getRawDataRefImpl(); + const XCOFFSymbolEntry *SymbolEntPtr = Obj.toSymbolEntry(SymbolDRI); + + XCOFFSymbolRef XCOFFSymRef(SymbolDRI, &Obj); + uint8_t NumberOfAuxEntries = XCOFFSymRef.getNumberOfAuxEntries(); + + DictScope SymDs(W, "Symbol"); + + StringRef SymbolName = + unwrapOrError(Obj.getFileName(), Obj.getSymbolName(SymbolDRI)); + + W.printNumber("Index", + Obj.getSymbolIndex(reinterpret_cast(SymbolEntPtr))); + W.printString("Name", SymbolName); + W.printHex(GetSymbolValueName(SymbolEntPtr->StorageClass), + SymbolEntPtr->Value); + + StringRef SectionName = + unwrapOrError(Obj.getFileName(), Obj.getSymbolSectionName(SymbolEntPtr)); + + W.printString("Section", SectionName); + if (XCOFFSymRef.getStorageClass() == XCOFF::C_FILE) { + W.printEnum("Source Language ID", + SymbolEntPtr->CFileLanguageIdAndTypeId.LanguageId, + makeArrayRef(CFileLangIdClass)); + W.printEnum("CPU Version ID", + SymbolEntPtr->CFileLanguageIdAndTypeId.CpuTypeId, + makeArrayRef(CFileCpuIdClass)); + } else + W.printHex("Type", SymbolEntPtr->SymbolType); + + W.printEnum("StorageClass", static_cast(SymbolEntPtr->StorageClass), + makeArrayRef(SymStorageClass)); + W.printNumber("NumberOfAuxEntries", SymbolEntPtr->NumberOfAuxEntries); + + if (NumberOfAuxEntries == 0) + return; + + switch (XCOFFSymRef.getStorageClass()) { + case XCOFF::C_FILE: + // If the symbol is C_FILE and has auxiliary entries... + for (int i = 1; i <= NumberOfAuxEntries; i++) { + const XCOFFFileAuxEnt *FileAuxEntPtr = + reinterpret_cast(SymbolEntPtr + i); +#ifndef NDEBUG + Obj.checkSymbolEntryPointer(reinterpret_cast(FileAuxEntPtr)); +#endif + printFileAuxEnt(FileAuxEntPtr); + } + break; + case XCOFF::C_EXT: + case XCOFF::C_WEAKEXT: + case XCOFF::C_HIDEXT: + // If the symbol is for a function, and it has more than 1 auxiliary entry, + // then one of them must be function auxiliary entry which we do not + // support yet. + if (XCOFFSymRef.isFunction() && NumberOfAuxEntries >= 2) + report_fatal_error("Function auxiliary entry printing is unimplemented."); + + // If there is more than 1 auxiliary entry, instead of printing out + // error information, print out the raw Auxiliary entry from 1st till + // the last - 1. The last one must be a CSECT Auxiliary Entry. + for (int i = 1; i < NumberOfAuxEntries; i++) { + W.startLine() << "!Unexpected raw auxiliary entry data:\n"; + W.startLine() << format_bytes( + ArrayRef(reinterpret_cast(SymbolEntPtr + i), + XCOFF::SymbolTableEntrySize)); + } + + // The symbol's last auxiliary entry is a CSECT Auxiliary Entry. + printCsectAuxEnt32(XCOFFSymRef.getXCOFFCsectAuxEnt32()); + break; + case XCOFF::C_STAT: + if (NumberOfAuxEntries > 1) + report_fatal_error( + "C_STAT symbol should not have more than 1 auxiliary entry."); + + const XCOFFSectAuxEntForStat *StatAuxEntPtr; + StatAuxEntPtr = + reinterpret_cast(SymbolEntPtr + 1); +#ifndef NDEBUG + Obj.checkSymbolEntryPointer(reinterpret_cast(StatAuxEntPtr)); +#endif + printSectAuxEntForStat(StatAuxEntPtr); + break; + case XCOFF::C_DWARF: + case XCOFF::C_BLOCK: + case XCOFF::C_FCN: + report_fatal_error("Symbol table entry printing for this storage class " + "type is unimplemented."); + break; + default: + for (int i = 1; i <= NumberOfAuxEntries; i++) { + W.startLine() << "!Unexpected raw auxiliary entry data:\n"; + W.startLine() << format_bytes( + ArrayRef(reinterpret_cast(SymbolEntPtr + i), + XCOFF::SymbolTableEntrySize)); + } + break; + } } void XCOFFDumper::printSymbols() { - llvm_unreachable("Unimplemented functionality for XCOFFDumper"); + ListScope Group(W, "Symbols"); + for (const SymbolRef &S : Obj.symbols()) + printSymbol(S); } void XCOFFDumper::printDynamicSymbols() { @@ -134,6 +454,39 @@ static const EnumEntry SectionTypeFlagsNames[] = { #undef ECase }; +template +void XCOFFDumper::printOverflowSectionHeader(T &Sec) const { + if (Obj.is64Bit()) { + reportWarning(make_error("An 64-bit XCOFF object file may not " + "contain an overflow section header.", + object_error::parse_failed), + Obj.getFileName()); + } + + W.printString("Name", Sec.getName()); + W.printNumber("NumberOfRelocations", Sec.PhysicalAddress); + W.printNumber("NumberOfLineNumbers", Sec.VirtualAddress); + W.printHex("Size", Sec.SectionSize); + W.printHex("RawDataOffset", Sec.FileOffsetToRawData); + W.printHex("RelocationPointer", Sec.FileOffsetToRelocationInfo); + W.printHex("LineNumberPointer", Sec.FileOffsetToLineNumberInfo); + W.printNumber("IndexOfSectionOverflowed", Sec.NumberOfRelocations); + W.printNumber("IndexOfSectionOverflowed", Sec.NumberOfLineNumbers); +} + +template +void XCOFFDumper::printGenericSectionHeader(T &Sec) const { + W.printString("Name", Sec.getName()); + W.printHex("PhysicalAddress", Sec.PhysicalAddress); + W.printHex("VirtualAddress", Sec.VirtualAddress); + W.printHex("Size", Sec.SectionSize); + W.printHex("RawDataOffset", Sec.FileOffsetToRawData); + W.printHex("RelocationPointer", Sec.FileOffsetToRelocationInfo); + W.printHex("LineNumberPointer", Sec.FileOffsetToLineNumberInfo); + W.printNumber("NumberOfRelocations", Sec.NumberOfRelocations); + W.printNumber("NumberOfLineNumbers", Sec.NumberOfLineNumbers); +} + template void XCOFFDumper::printSectionHeaders(ArrayRef Sections) { ListScope Group(W, "Sections"); @@ -143,27 +496,28 @@ void XCOFFDumper::printSectionHeaders(ArrayRef Sections) { DictScope SecDS(W, "Section"); W.printNumber("Index", Index++); - W.printString("Name", Sec.getName()); - W.printHex("PhysicalAddress", Sec.PhysicalAddress); - W.printHex("VirtualAddress", Sec.VirtualAddress); - W.printHex("Size", Sec.SectionSize); - W.printHex("RawDataOffset", Sec.FileOffsetToRawData); - W.printHex("RelocationPointer", Sec.FileOffsetToRelocationInfo); - W.printHex("LineNumberPointer", Sec.FileOffsetToLineNumberInfo); - - // TODO Need to add overflow handling when NumberOfX == _OVERFLOW_MARKER - // in 32-bit object files. - W.printNumber("NumberOfRelocations", Sec.NumberOfRelocations); - W.printNumber("NumberOfLineNumbers", Sec.NumberOfLineNumbers); - - // The most significant 16-bits represent the DWARF section subtype. For - // now we just dump the section type flags. - uint16_t Flags = Sec.Flags & 0xffffu; - if (Flags & SectionFlagsReservedMask) - W.printHex("Flags", "Reserved", Flags); + uint16_t SectionType = Sec.Flags & SectionFlagsTypeMask; + switch (SectionType) { + case XCOFF::STYP_OVRFLO: + printOverflowSectionHeader(Sec); + break; + case XCOFF::STYP_LOADER: + case XCOFF::STYP_EXCEPT: + case XCOFF::STYP_TYPCHK: + // TODO The interpretation of loader, exception and type check section + // headers are different from that of generic section headers. We will + // implement them later. We interpret them as generic section headers for + // now. + default: + printGenericSectionHeader(Sec); + break; + } + // For now we just dump the section type portion of the flags. + if (SectionType & SectionFlagsReservedMask) + W.printHex("Flags", "Reserved", SectionType); else - W.printEnum("Type", Flags, makeArrayRef(SectionTypeFlagsNames)); + W.printEnum("Type", SectionType, makeArrayRef(SectionTypeFlagsNames)); } if (opts::SectionRelocations) diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp index 1bd5bb74bf2..4db13897879 100644 --- a/tools/llvm-readobj/llvm-readobj.cpp +++ b/tools/llvm-readobj/llvm-readobj.cpp @@ -231,26 +231,11 @@ namespace opts { "codeview-subsection-bytes", cl::desc("Dump raw contents of codeview debug sections and records")); - // --arm-attributes - cl::opt ARMAttributes("arm-attributes", - cl::desc("Display the ARM attributes section")); - - // --mips-plt-got - cl::opt - MipsPLTGOT("mips-plt-got", - cl::desc("Display the MIPS GOT and PLT GOT sections")); - - // --mips-abi-flags - cl::opt MipsABIFlags("mips-abi-flags", - cl::desc("Display the MIPS.abiflags section")); - - // --mips-reginfo - cl::opt MipsReginfo("mips-reginfo", - cl::desc("Display the MIPS .reginfo section")); - - // --mips-options - cl::opt MipsOptions("mips-options", - cl::desc("Display the MIPS .MIPS.options section")); + // --arch-specific + cl::opt ArchSpecificInfo("arch-specific", + cl::desc("Displays architecture-specific information, if there is any.")); + cl::alias ArchSpecifcInfoShort("A", cl::desc("Alias for --arch-specific"), + cl::aliasopt(ArchSpecificInfo), cl::NotHidden); // --coff-imports cl::opt @@ -324,6 +309,11 @@ namespace opts { PrintStackMap("stackmap", cl::desc("Display contents of stackmap section")); + // --stack-sizes + cl::opt + PrintStackSizes("stack-sizes", + cl::desc("Display contents of all stack sizes sections")); + // --version-info, -V cl::opt VersionInfo("version-info", @@ -368,63 +358,45 @@ namespace opts { HelpResponse("\nPass @FILE as argument to read options from FILE.\n"); } // namespace opts +static StringRef ToolName; + namespace llvm { -LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg) { +LLVM_ATTRIBUTE_NORETURN static void error(Twine Msg) { + // Flush the standard output to print the error at a + // proper place. fouts().flush(); errs() << "\n"; - WithColor::error(errs()) << Msg << "\n"; + WithColor::error(errs(), ToolName) << Msg << "\n"; exit(1); } -void reportError(StringRef Input, Error Err) { +LLVM_ATTRIBUTE_NORETURN void reportError(Error Err, StringRef Input) { + assert(Err); if (Input == "-") Input = ""; - error(createFileError(Input, std::move(Err))); + handleAllErrors(createFileError(Input, std::move(Err)), + [&](const ErrorInfoBase &EI) { error(EI.message()); }); + llvm_unreachable("error() call should never return"); } -void reportWarning(Twine Msg) { +void reportWarning(Error Err, StringRef Input) { + assert(Err); + if (Input == "-") + Input = ""; + + // Flush the standard output to print the warning at a + // proper place. fouts().flush(); - errs() << "\n"; - WithColor::warning(errs()) << Msg << "\n"; -} - -void warn(Error Err) { - handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) { - reportWarning(EI.message()); - }); -} - -void error(Error EC) { - if (!EC) - return; - handleAllErrors(std::move(EC), - [&](const ErrorInfoBase &EI) { reportError(EI.message()); }); -} - -void error(std::error_code EC) { - if (!EC) - return; - reportError(EC.message()); + handleAllErrors( + createFileError(Input, std::move(Err)), [&](const ErrorInfoBase &EI) { + errs() << "\n"; + WithColor::warning(errs(), ToolName) << EI.message() << "\n"; + }); } } // namespace llvm -static void reportError(StringRef Input, std::error_code EC) { - reportError(Input, errorCodeToError(EC)); -} - -static bool isMipsArch(unsigned Arch) { - switch (Arch) { - case llvm::Triple::mips: - case llvm::Triple::mipsel: - case llvm::Triple::mips64: - case llvm::Triple::mips64el: - return true; - default: - return false; - } -} namespace { struct ReadObjTypeTableBuilder { ReadObjTypeTableBuilder() @@ -471,19 +443,19 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer, std::unique_ptr Dumper; if (std::error_code EC = createDumper(Obj, Writer, Dumper)) - reportError(FileStr, EC); + reportError(errorCodeToError(EC), FileStr); - Writer.startLine() << "\n"; - if (opts::Output == opts::LLVM) { + if (opts::Output == opts::LLVM || opts::InputFilenames.size() > 1 || A) { + Writer.startLine() << "\n"; Writer.printString("File", FileStr); + } + if (opts::Output == opts::LLVM) { Writer.printString("Format", Obj->getFileFormatName()); Writer.printString("Arch", Triple::getArchTypeName( (llvm::Triple::ArchType)Obj->getArch())); Writer.printString("AddressSize", formatv("{0}bit", 8 * Obj->getBytesInAddress())); Dumper->printLoadName(); - } else if (opts::Output == opts::GNU && A) { - Writer.printString("File", FileStr); } if (opts::FileHeaders) @@ -519,19 +491,8 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer, if (Obj->isELF()) { if (opts::ELFLinkerOptions) Dumper->printELFLinkerOptions(); - if (Obj->getArch() == llvm::Triple::arm) - if (opts::ARMAttributes) - Dumper->printAttributes(); - if (isMipsArch(Obj->getArch())) { - if (opts::MipsPLTGOT) - Dumper->printMipsPLTGOT(); - if (opts::MipsABIFlags) - Dumper->printMipsABIFlags(); - if (opts::MipsReginfo) - Dumper->printMipsReginfo(); - if (opts::MipsOptions) - Dumper->printMipsOptions(); - } + if (opts::ArchSpecificInfo) + Dumper->printArchSpecificInfo(); if (opts::SectionGroups) Dumper->printGroupSections(); if (opts::HashHistogram) @@ -583,6 +544,8 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer, } if (opts::PrintStackMap) Dumper->printStackMap(); + if (opts::PrintStackSizes) + Dumper->printStackSizes(); } /// Dumps each object file in \a Arc; @@ -591,9 +554,8 @@ static void dumpArchive(const Archive *Arc, ScopedPrinter &Writer) { for (auto &Child : Arc->children(Err)) { Expected> ChildOrErr = Child.getAsBinary(); if (!ChildOrErr) { - if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) { - reportError(Arc->getFileName(), std::move(E)); - } + if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) + reportError(std::move(E), Arc->getFileName()); continue; } if (ObjectFile *Obj = dyn_cast(&*ChildOrErr.get())) @@ -601,10 +563,11 @@ static void dumpArchive(const Archive *Arc, ScopedPrinter &Writer) { else if (COFFImportFile *Imp = dyn_cast(&*ChildOrErr.get())) dumpCOFFImportFile(Imp, Writer); else - reportError(Arc->getFileName(), readobj_error::unrecognized_file_format); + reportError(errorCodeToError(readobj_error::unrecognized_file_format), + Arc->getFileName()); } if (Err) - reportError(Arc->getFileName(), std::move(Err)); + reportError(std::move(Err), Arc->getFileName()); } /// Dumps each object file in \a MachO Universal Binary; @@ -614,9 +577,8 @@ static void dumpMachOUniversalBinary(const MachOUniversalBinary *UBinary, Expected> ObjOrErr = Obj.getAsObjectFile(); if (ObjOrErr) dumpObject(&*ObjOrErr.get(), Writer); - else if (auto E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) { - reportError(UBinary->getFileName(), ObjOrErr.takeError()); - } + else if (auto E = isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) + reportError(ObjOrErr.takeError(), UBinary->getFileName()); else if (Expected> AOrErr = Obj.getAsArchive()) dumpArchive(&*AOrErr.get(), Writer); } @@ -627,7 +589,7 @@ static void dumpWindowsResourceFile(WindowsResource *WinRes, ScopedPrinter &Printer) { WindowsRes::Dumper Dumper(WinRes, Printer); if (auto Err = Dumper.printData()) - reportError(WinRes->getFileName(), std::move(Err)); + reportError(std::move(Err), WinRes->getFileName()); } @@ -636,7 +598,7 @@ static void dumpInput(StringRef File, ScopedPrinter &Writer) { // Attempt to open the binary. Expected> BinaryOrErr = createBinary(File); if (!BinaryOrErr) - reportError(File, BinaryOrErr.takeError()); + reportError(BinaryOrErr.takeError(), File); Binary &Binary = *BinaryOrErr.get().getBinary(); if (Archive *Arc = dyn_cast(&Binary)) @@ -651,7 +613,8 @@ static void dumpInput(StringRef File, ScopedPrinter &Writer) { else if (WindowsResource *WinRes = dyn_cast(&Binary)) dumpWindowsResourceFile(WinRes, Writer); else - reportError(File, readobj_error::unrecognized_file_format); + reportError(errorCodeToError(readobj_error::unrecognized_file_format), + File); CVTypes.Binaries.push_back(std::move(*BinaryOrErr)); } @@ -702,6 +665,7 @@ static void registerReadelfAliases() { int main(int argc, const char *argv[]) { InitLLVM X(argc, argv); + ToolName = argv[0]; // Register the target printer for --version. cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion); @@ -727,6 +691,10 @@ int main(int argc, const char *argv[]) { opts::UnwindInfo = true; opts::SectionGroups = true; opts::HashHistogram = true; + if (opts::Output == opts::LLVM) { + opts::Addrsig = true; + opts::PrintStackSizes = true; + } } if (opts::Headers) { diff --git a/tools/llvm-readobj/llvm-readobj.h b/tools/llvm-readobj/llvm-readobj.h index 0e02da4cb84..d9813f5dea6 100644 --- a/tools/llvm-readobj/llvm-readobj.h +++ b/tools/llvm-readobj/llvm-readobj.h @@ -21,30 +21,13 @@ namespace llvm { } // Various helper functions. - LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg); - void reportError(StringRef Input, Error Err); - void reportWarning(Twine Msg); - void warn(llvm::Error Err); - void error(std::error_code EC); - void error(llvm::Error EC); - template T error(llvm::Expected &&E) { - error(E.takeError()); - return std::move(*E); - } + LLVM_ATTRIBUTE_NORETURN void reportError(Error Err, StringRef Input); + void reportWarning(Error Err, StringRef Input); - template T unwrapOrError(ErrorOr EO) { + template T unwrapOrError(StringRef Input, Expected EO) { if (EO) return *EO; - reportError(EO.getError().message()); - } - template T unwrapOrError(Expected EO) { - if (EO) - return *EO; - std::string Buf; - raw_string_ostream OS(Buf); - logAllUnhandledErrors(EO.takeError(), OS); - OS.flush(); - reportError(Buf); + reportError(EO.takeError(), Input); } } // namespace llvm diff --git a/tools/llvm-reduce/CMakeLists.txt b/tools/llvm-reduce/CMakeLists.txt new file mode 100644 index 00000000000..48de0ffa78a --- /dev/null +++ b/tools/llvm-reduce/CMakeLists.txt @@ -0,0 +1,26 @@ +set(LLVM_LINK_COMPONENTS + AllTargetsAsmParsers + AllTargetsCodeGens + AllTargetsDescs + AllTargetsInfos + Core + IRReader + Support + Target + TransformUtils + ) + +add_llvm_tool(llvm-reduce + llvm-reduce.cpp + TestRunner.cpp + deltas/Delta.cpp + deltas/ReduceFunctions.cpp + deltas/ReduceGlobalVars.cpp + deltas/ReduceMetadata.cpp + deltas/ReduceArguments.cpp + deltas/ReduceBasicBlocks.cpp + deltas/ReduceInstructions.cpp + + DEPENDS + intrinsics_gen + ) diff --git a/tools/llvm-reduce/DeltaManager.h b/tools/llvm-reduce/DeltaManager.h new file mode 100644 index 00000000000..2309c3adf4e --- /dev/null +++ b/tools/llvm-reduce/DeltaManager.h @@ -0,0 +1,36 @@ +//===- DeltaManager.h - Runs Delta Passes to reduce Input -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file calls each specialized Delta pass in order to reduce the input IR +// file. +// +//===----------------------------------------------------------------------===// + +#include "TestRunner.h" +#include "deltas/Delta.h" +#include "deltas/ReduceArguments.h" +#include "deltas/ReduceBasicBlocks.h" +#include "deltas/ReduceFunctions.h" +#include "deltas/ReduceGlobalVars.h" +#include "deltas/ReduceMetadata.h" +#include "deltas/ReduceInstructions.h" + +namespace llvm { + +// TODO: Add CLI option to run only specified Passes (for unit tests) +inline void runDeltaPasses(TestRunner &Tester) { + reduceFunctionsDeltaPass(Tester); + reduceBasicBlocksDeltaPass(Tester); + reduceGlobalsDeltaPass(Tester); + reduceMetadataDeltaPass(Tester); + reduceArgumentsDeltaPass(Tester); + reduceInstructionsDeltaPass(Tester); + // TODO: Implement the remaining Delta Passes +} + +} // namespace llvm diff --git a/tools/llvm-reduce/LLVMBuild.txt b/tools/llvm-reduce/LLVMBuild.txt new file mode 100644 index 00000000000..7928f050328 --- /dev/null +++ b/tools/llvm-reduce/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./tools/llvm-reduce/LLVMBuild.txt ------------------------*- Conf -*--===; +; +; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Tool +name = llvm-reduce +parent = Tools +required_libraries = + BitReader + IRReader + all-targets diff --git a/tools/llvm-reduce/TestRunner.cpp b/tools/llvm-reduce/TestRunner.cpp new file mode 100644 index 00000000000..d0e195d5697 --- /dev/null +++ b/tools/llvm-reduce/TestRunner.cpp @@ -0,0 +1,42 @@ +//===-- TestRunner.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TestRunner.h" + +using namespace llvm; + +TestRunner::TestRunner(StringRef TestName, const std::vector &TestArgs) + : TestName(TestName), TestArgs(TestArgs) { +} + +/// Runs the interestingness test, passes file to be tested as first argument +/// and other specified test arguments after that. +int TestRunner::run(StringRef Filename) { + std::vector ProgramArgs; + ProgramArgs.push_back(TestName); + + for (const auto &Arg : TestArgs) + ProgramArgs.push_back(Arg); + + ProgramArgs.push_back(Filename); + + std::string ErrMsg; + int Result = sys::ExecuteAndWait( + TestName, ProgramArgs, /*Env=*/None, /*Redirects=*/None, + /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg); + + if (Result < 0) { + Error E = make_error("Error running interesting-ness test: " + + ErrMsg, + inconvertibleErrorCode()); + errs() << toString(std::move(E)); + exit(1); + } + + return !Result; +} diff --git a/tools/llvm-reduce/TestRunner.h b/tools/llvm-reduce/TestRunner.h new file mode 100644 index 00000000000..2270d6bd90b --- /dev/null +++ b/tools/llvm-reduce/TestRunner.h @@ -0,0 +1,46 @@ +//===-- tools/llvm-reduce/TestRunner.h ---------------------------*- C++ -*-===/ +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVMREDUCE_TESTRUNNER_H +#define LLVM_TOOLS_LLVMREDUCE_TESTRUNNER_H + +#include "llvm/ADT/SmallString.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include + +namespace llvm { + +// This class contains all the info necessary for running the provided +// interesting-ness test, as well as the most reduced module and its +// respective filename. +class TestRunner { +public: + TestRunner(StringRef TestName, const std::vector &TestArgs); + + /// Runs the interesting-ness test for the specified file + /// @returns 0 if test was successful, 1 if otherwise + int run(StringRef Filename); + + /// Returns the most reduced version of the original testcase + Module *getProgram() const { return Program.get(); } + + void setProgram(std::unique_ptr P) { Program = std::move(P); } + +private: + StringRef TestName; + const std::vector &TestArgs; + std::unique_ptr Program; +}; + +} // namespace llvm + +#endif diff --git a/tools/llvm-reduce/deltas/Delta.cpp b/tools/llvm-reduce/deltas/Delta.cpp new file mode 100644 index 00000000000..0642241ddeb --- /dev/null +++ b/tools/llvm-reduce/deltas/Delta.cpp @@ -0,0 +1,162 @@ +//===- Delta.cpp - Delta Debugging Algorithm Implementation ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation for the Delta Debugging Algorithm: +// it splits a given set of Targets (i.e. Functions, Instructions, BBs, etc.) +// into chunks and tries to reduce the number chunks that are interesting. +// +//===----------------------------------------------------------------------===// + +#include "Delta.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include +#include + +using namespace llvm; + +bool IsReduced(Module &M, TestRunner &Test, SmallString<128> &CurrentFilepath) { + // Write Module to tmp file + int FD; + std::error_code EC = + sys::fs::createTemporaryFile("llvm-reduce", "ll", FD, CurrentFilepath); + if (EC) { + errs() << "Error making unique filename: " << EC.message() << "!\n"; + exit(1); + } + + ToolOutputFile Out(CurrentFilepath, FD); + M.print(Out.os(), /*AnnotationWriter=*/nullptr); + Out.os().close(); + if (Out.os().has_error()) { + errs() << "Error emitting bitcode to file '" << CurrentFilepath << "'!\n"; + exit(1); + } + + // Current Chunks aren't interesting + return Test.run(CurrentFilepath); +} + +/// Counts the amount of lines for a given file +static int getLines(StringRef Filepath) { + int Lines = 0; + std::string CurrLine; + std::ifstream FileStream(Filepath); + + while (std::getline(FileStream, CurrLine)) + ++Lines; + + return Lines; +} + +/// Splits Chunks in half and prints them. +/// If unable to split (when chunk size is 1) returns false. +static bool increaseGranularity(std::vector &Chunks) { + errs() << "Increasing granularity..."; + std::vector NewChunks; + bool SplitOne = false; + + for (auto &C : Chunks) { + if (C.end - C.begin == 0) + NewChunks.push_back(C); + else { + int Half = (C.begin + C.end) / 2; + NewChunks.push_back({C.begin, Half}); + NewChunks.push_back({Half + 1, C.end}); + SplitOne = true; + } + } + if (SplitOne) { + Chunks = NewChunks; + errs() << "Success! New Chunks:\n"; + for (auto C : Chunks) { + errs() << '\t'; + C.print(); + errs() << '\n'; + } + } + return SplitOne; +} + +/// Runs the Delta Debugging algorithm, splits the code into chunks and +/// reduces the amount of chunks that are considered interesting by the +/// given test. +void llvm::runDeltaPass( + TestRunner &Test, int Targets, + std::function &, Module *)> + ExtractChunksFromModule) { + assert(Targets >= 0); + if (!Targets) { + errs() << "\nNothing to reduce\n"; + return; + } + + if (Module *Program = Test.getProgram()) { + SmallString<128> CurrentFilepath; + if (!IsReduced(*Program, Test, CurrentFilepath)) { + errs() << "\nInput isn't interesting! Verify interesting-ness test\n"; + exit(1); + } + } + + std::vector Chunks = {{1, Targets}}; + std::set UninterestingChunks; + std::unique_ptr ReducedProgram; + + if (!increaseGranularity(Chunks)) { + errs() << "\nAlready at minimum size. Cannot reduce anymore.\n"; + return; + } + + do { + UninterestingChunks = {}; + for (int I = Chunks.size() - 1; I >= 0; --I) { + std::vector CurrentChunks; + + for (auto C : Chunks) + if (!UninterestingChunks.count(C) && C != Chunks[I]) + CurrentChunks.push_back(C); + + if (CurrentChunks.empty()) + continue; + + // Clone module before hacking it up.. + std::unique_ptr Clone = CloneModule(*Test.getProgram()); + // Generate Module with only Targets inside Current Chunks + ExtractChunksFromModule(CurrentChunks, Clone.get()); + + errs() << "Ignoring: "; + Chunks[I].print(); + for (auto C : UninterestingChunks) + C.print(); + + + + SmallString<128> CurrentFilepath; + if (!IsReduced(*Clone, Test, CurrentFilepath)) { + errs() << "\n"; + continue; + } + + UninterestingChunks.insert(Chunks[I]); + ReducedProgram = std::move(Clone); + errs() << " **** SUCCESS | lines: " << getLines(CurrentFilepath) << "\n"; + } + // Delete uninteresting chunks + erase_if(Chunks, [&UninterestingChunks](const Chunk &C) { + return UninterestingChunks.count(C); + }); + + } while (!UninterestingChunks.empty() || increaseGranularity(Chunks)); + + // If we reduced the testcase replace it + if (ReducedProgram) + Test.setProgram(std::move(ReducedProgram)); + errs() << "Couldn't increase anymore.\n"; +} diff --git a/tools/llvm-reduce/deltas/Delta.h b/tools/llvm-reduce/deltas/Delta.h new file mode 100644 index 00000000000..dbb18e4bd07 --- /dev/null +++ b/tools/llvm-reduce/deltas/Delta.h @@ -0,0 +1,76 @@ +//===- Delta.h - Delta Debugging Algorithm Implementation -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation for the Delta Debugging Algorithm: +// it splits a given set of Targets (i.e. Functions, Instructions, BBs, etc.) +// into chunks and tries to reduce the number chunks that are interesting. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVMREDUCE_LLVMREDUCE_DELTA_H +#define LLVM_TOOLS_LLVMREDUCE_LLVMREDUCE_DELTA_H + +#include "TestRunner.h" +#include +#include +#include + +namespace llvm { + +struct Chunk { + int begin; + int end; + + /// Helper function to verify if a given Target-index is inside the Chunk + bool contains(int Index) const { return Index >= begin && Index <= end; } + + void print() const { + errs() << "[" << begin; + if (end - begin != 0) + errs() << "," << end; + errs() << "]"; + } + + /// Operator when populating CurrentChunks in Generic Delta Pass + friend bool operator!=(const Chunk &C1, const Chunk &C2) { + return C1.begin != C2.begin || C1.end != C2.end; + } + + /// Operator used for sets + friend bool operator<(const Chunk &C1, const Chunk &C2) { + return std::tie(C1.begin, C1.end) < std::tie(C2.begin, C2.end); + } +}; + +/// This function implements the Delta Debugging algorithm, it receives a +/// number of Targets (e.g. Functions, Instructions, Basic Blocks, etc.) and +/// splits them in half; these chunks of targets are then tested while ignoring +/// one chunk, if a chunk is proven to be uninteresting (i.e. fails the test) +/// it is removed from consideration. The algorithm will attempt to split the +/// Chunks in half and start the process again until it can't split chunks +/// anymore. +/// +/// This function is intended to be called by each specialized delta pass (e.g. +/// RemoveFunctions) and receives three key parameters: +/// * Test: The main TestRunner instance which is used to run the provided +/// interesting-ness test, as well as to store and access the reduced Program. +/// * Targets: The amount of Targets that are going to be reduced by the +/// algorithm, for example, the RemoveGlobalVars pass would send the amount of +/// initialized GVs. +/// * ExtractChunksFromModule: A function used to tailor the main program so it +/// only contains Targets that are inside Chunks of the given iteration. +/// Note: This function is implemented by each specialized Delta pass +/// +/// Other implementations of the Delta Debugging algorithm can also be found in +/// the CReduce, Delta, and Lithium projects. +void runDeltaPass(TestRunner &Test, int Targets, + std::function &, Module *)> + ExtractChunksFromModule); +} // namespace llvm + +#endif diff --git a/tools/llvm-reduce/deltas/ReduceArguments.cpp b/tools/llvm-reduce/deltas/ReduceArguments.cpp new file mode 100644 index 00000000000..f5f14b83f42 --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceArguments.cpp @@ -0,0 +1,125 @@ +//===- ReduceArguments.cpp - Specialized Delta Pass -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce uninteresting Arguments from defined functions. +// +//===----------------------------------------------------------------------===// + +#include "ReduceArguments.h" +#include "Delta.h" +#include "llvm/ADT/SmallVector.h" +#include +#include + +using namespace llvm; + +/// Goes over OldF calls and replaces them with a call to NewF +static void replaceFunctionCalls(Function &OldF, Function &NewF, + const std::set &ArgIndexesToKeep) { + const auto &Users = OldF.users(); + for (auto I = Users.begin(), E = Users.end(); I != E; ) + if (auto *CI = dyn_cast(*I++)) { + SmallVector Args; + for (auto ArgI = CI->arg_begin(), E = CI->arg_end(); ArgI != E; ++ArgI) + if (ArgIndexesToKeep.count(ArgI - CI->arg_begin())) + Args.push_back(*ArgI); + + CallInst *NewCI = CallInst::Create(&NewF, Args); + NewCI->setCallingConv(NewF.getCallingConv()); + if (!CI->use_empty()) + CI->replaceAllUsesWith(NewCI); + ReplaceInstWithInst(CI, NewCI); + } +} + +/// Removes out-of-chunk arguments from functions, and modifies their calls +/// accordingly. It also removes allocations of out-of-chunk arguments. +static void extractArgumentsFromModule(std::vector ChunksToKeep, + Module *Program) { + int I = 0, ArgCount = 0; + std::set ArgsToKeep; + std::vector Funcs; + // Get inside-chunk arguments, as well as their parent function + for (auto &F : *Program) + if (!F.isDeclaration()) { + Funcs.push_back(&F); + for (auto &A : F.args()) + if (I < (int)ChunksToKeep.size()) { + if (ChunksToKeep[I].contains(++ArgCount)) + ArgsToKeep.insert(&A); + if (ChunksToKeep[I].end == ArgCount) + ++I; + } + } + + for (auto *F : Funcs) { + ValueToValueMapTy VMap; + std::vector InstToDelete; + for (auto &A : F->args()) + if (!ArgsToKeep.count(&A)) { + // By adding undesired arguments to the VMap, CloneFunction will remove + // them from the resulting Function + VMap[&A] = UndefValue::get(A.getType()); + for (auto *U : A.users()) + if (auto *I = dyn_cast(*&U)) + InstToDelete.push_back(I); + } + // Delete any instruction that uses the argument + for (auto *I : InstToDelete) { + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); + } + + // No arguments to reduce + if (VMap.empty()) + continue; + + std::set ArgIndexesToKeep; + int ArgI = 0; + for (auto &Arg : F->args()) + if (ArgsToKeep.count(&Arg)) + ArgIndexesToKeep.insert(++ArgI); + + auto *ClonedFunc = CloneFunction(F, VMap); + // In order to preserve function order, we move Clone after old Function + ClonedFunc->removeFromParent(); + Program->getFunctionList().insertAfter(F->getIterator(), ClonedFunc); + + replaceFunctionCalls(*F, *ClonedFunc, ArgIndexesToKeep); + // Rename Cloned Function to Old's name + std::string FName = F->getName(); + F->eraseFromParent(); + ClonedFunc->setName(FName); + } +} + +/// Counts the amount of arguments in non-declaration functions and prints their +/// respective name, index, and parent function name +static int countArguments(Module *Program) { + // TODO: Silence index with --quiet flag + outs() << "----------------------------\n"; + outs() << "Param Index Reference:\n"; + int ArgsCount = 0; + for (auto &F : *Program) + if (!F.isDeclaration() && F.arg_size()) { + outs() << " " << F.getName() << "\n"; + for (auto &A : F.args()) + outs() << "\t" << ++ArgsCount << ": " << A.getName() << "\n"; + + outs() << "----------------------------\n"; + } + + return ArgsCount; +} + +void llvm::reduceArgumentsDeltaPass(TestRunner &Test) { + outs() << "*** Reducing Arguments...\n"; + int ArgCount = countArguments(Test.getProgram()); + runDeltaPass(Test, ArgCount, extractArgumentsFromModule); +} diff --git a/tools/llvm-reduce/deltas/ReduceArguments.h b/tools/llvm-reduce/deltas/ReduceArguments.h new file mode 100644 index 00000000000..d9682b44f74 --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceArguments.h @@ -0,0 +1,21 @@ +//===- ReduceArguments.h - Specialized Delta Pass -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce uninteresting Arguments from defined functions. +// +//===----------------------------------------------------------------------===// + +#include "Delta.h" +#include "llvm/IR/Argument.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" + +namespace llvm { +void reduceArgumentsDeltaPass(TestRunner &Test); +} // namespace llvm diff --git a/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp b/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp new file mode 100644 index 00000000000..03c3962d2fd --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp @@ -0,0 +1,146 @@ +//===- ReduceArguments.cpp - Specialized Delta Pass -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce uninteresting Arguments from defined functions. +// +//===----------------------------------------------------------------------===// + +#include "ReduceBasicBlocks.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +/// Replaces BB Terminator with one that only contains Chunk BBs +static void replaceBranchTerminator(BasicBlock &BB, + std::set BBsToKeep) { + auto Term = BB.getTerminator(); + std::vector ChunkSucessors; + for (auto Succ : successors(&BB)) + if (BBsToKeep.count(Succ)) + ChunkSucessors.push_back(Succ); + + // BB only references Chunk BBs + if (ChunkSucessors.size() == Term->getNumSuccessors()) + return; + + bool IsBranch = isa(Term); + Value *Address = nullptr; + if (auto IndBI = dyn_cast(Term)) + Address = IndBI->getAddress(); + + Term->eraseFromParent(); + + if (ChunkSucessors.empty()) { + ReturnInst::Create(BB.getContext(), nullptr, &BB); + return; + } + + if (IsBranch) + BranchInst::Create(ChunkSucessors[0], &BB); + + if (Address) { + auto NewIndBI = + IndirectBrInst::Create(Address, ChunkSucessors.size(), &BB); + for (auto Dest : ChunkSucessors) + NewIndBI->addDestination(Dest); + } +} + +/// Removes uninteresting BBs from switch, if the default case ends up being +/// uninteresting, the switch is replaced with a void return (since it has to be +/// replace with something) +static void removeUninterestingBBsFromSwitch(SwitchInst &SwInst, + std::set BBsToKeep) { + if (!BBsToKeep.count(SwInst.getDefaultDest())) { + ReturnInst::Create(SwInst.getContext(), nullptr, SwInst.getParent()); + SwInst.eraseFromParent(); + } else + for (int I = 0, E = SwInst.getNumCases(); I != E; ++I) { + auto Case = SwInst.case_begin() + I; + if (!BBsToKeep.count(Case->getCaseSuccessor())) { + SwInst.removeCase(Case); + --I; + --E; + } + } +} + +/// Removes out-of-chunk arguments from functions, and modifies their calls +/// accordingly. It also removes allocations of out-of-chunk arguments. +static void extractBasicBlocksFromModule(std::vector ChunksToKeep, + Module *Program) { + int I = 0, BBCount = 0; + std::set BBsToKeep; + + for (auto &F : *Program) + for (auto &BB : F) + if (I < (int)ChunksToKeep.size()) { + if (ChunksToKeep[I].contains(++BBCount)) + BBsToKeep.insert(&BB); + if (ChunksToKeep[I].end == BBCount) + ++I; + } + + std::vector BBsToDelete; + for (auto &F : *Program) + for (auto &BB : F) { + if (!BBsToKeep.count(&BB)) { + BBsToDelete.push_back(&BB); + // Remove out-of-chunk BB from successor phi nodes + for (auto *Succ : successors(&BB)) + Succ->removePredecessor(&BB); + } + } + + // Replace terminators that reference out-of-chunk BBs + for (auto &F : *Program) + for (auto &BB : F) { + if (auto *SwInst = dyn_cast(BB.getTerminator())) + removeUninterestingBBsFromSwitch(*SwInst, BBsToKeep); + else + replaceBranchTerminator(BB, BBsToKeep); + } + + // Replace out-of-chunk switch uses + for (auto &BB : BBsToDelete) { + // Instructions might be referenced in other BBs + for (auto &I : *BB) + I.replaceAllUsesWith(UndefValue::get(I.getType())); + BB->eraseFromParent(); + } +} + +/// Counts the amount of basic blocks and prints their name & respective index +static int countBasicBlocks(Module *Program) { + // TODO: Silence index with --quiet flag + outs() << "----------------------------\n"; + int BBCount = 0; + for (auto &F : *Program) + for (auto &BB : F) { + if (BB.hasName()) + outs() << "\t" << ++BBCount << ": " << BB.getName() << "\n"; + else + outs() << "\t" << ++BBCount << ": Unnamed\n"; + } + + return BBCount; +} + +void llvm::reduceBasicBlocksDeltaPass(TestRunner &Test) { + outs() << "*** Reducing Basic Blocks...\n"; + int BBCount = countBasicBlocks(Test.getProgram()); + runDeltaPass(Test, BBCount, extractBasicBlocksFromModule); +} diff --git a/tools/llvm-reduce/deltas/ReduceBasicBlocks.h b/tools/llvm-reduce/deltas/ReduceBasicBlocks.h new file mode 100644 index 00000000000..cf76a0abbcd --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceBasicBlocks.h @@ -0,0 +1,20 @@ +//===- ReduceArguments.h - Specialized Delta Pass -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce uninteresting Arguments from defined functions. +// +//===----------------------------------------------------------------------===// + +#include "Delta.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" + +namespace llvm { +void reduceBasicBlocksDeltaPass(TestRunner &Test); +} // namespace llvm diff --git a/tools/llvm-reduce/deltas/ReduceFunctions.cpp b/tools/llvm-reduce/deltas/ReduceFunctions.cpp new file mode 100644 index 00000000000..3382f35a945 --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceFunctions.cpp @@ -0,0 +1,77 @@ +//===- ReduceFunctions.cpp - Specialized Delta Pass -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce functions (and any instruction that calls it) in the provided +// Module. +// +//===----------------------------------------------------------------------===// + +#include "ReduceFunctions.h" +#include "Delta.h" +#include "llvm/ADT/SetVector.h" +#include + +using namespace llvm; + +/// Removes all the Defined Functions (as well as their calls) +/// that aren't inside any of the desired Chunks. +static void extractFunctionsFromModule(const std::vector &ChunksToKeep, + Module *Program) { + // Get functions inside desired chunks + std::set FuncsToKeep; + int I = 0, FunctionCount = 0; + for (auto &F : *Program) + if (I < (int)ChunksToKeep.size()) { + if (ChunksToKeep[I].contains(++FunctionCount)) + FuncsToKeep.insert(&F); + if (FunctionCount == ChunksToKeep[I].end) + ++I; + } + + // Delete out-of-chunk functions, and replace their calls with undef + std::vector FuncsToRemove; + SetVector CallsToRemove; + for (auto &F : *Program) + if (!FuncsToKeep.count(&F)) { + for (auto U : F.users()) + if (auto *Call = dyn_cast(U)) { + Call->replaceAllUsesWith(UndefValue::get(Call->getType())); + CallsToRemove.insert(Call); + } + F.replaceAllUsesWith(UndefValue::get(F.getType())); + FuncsToRemove.push_back(&F); + } + + for (auto *C : CallsToRemove) + C->eraseFromParent(); + + for (auto *F : FuncsToRemove) + F->eraseFromParent(); +} + +/// Counts the amount of non-declaration functions and prints their +/// respective name & index +static int countFunctions(Module *Program) { + // TODO: Silence index with --quiet flag + errs() << "----------------------------\n"; + errs() << "Function Index Reference:\n"; + int FunctionCount = 0; + for (auto &F : *Program) + errs() << "\t" << ++FunctionCount << ": " << F.getName() << "\n"; + + errs() << "----------------------------\n"; + return FunctionCount; +} + +void llvm::reduceFunctionsDeltaPass(TestRunner &Test) { + errs() << "*** Reducing Functions...\n"; + int Functions = countFunctions(Test.getProgram()); + runDeltaPass(Test, Functions, extractFunctionsFromModule); + errs() << "----------------------------\n"; +} diff --git a/tools/llvm-reduce/deltas/ReduceFunctions.h b/tools/llvm-reduce/deltas/ReduceFunctions.h new file mode 100644 index 00000000000..7c2cd3f33e9 --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceFunctions.h @@ -0,0 +1,20 @@ +//===- ReduceFunctions.h - Specialized Delta Pass -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce functions (and any instruction that calls it) in the provided +// Module. +// +//===----------------------------------------------------------------------===// + +#include "Delta.h" +#include "llvm/Transforms/Utils/Cloning.h" + +namespace llvm { +void reduceFunctionsDeltaPass(TestRunner &Test); +} // namespace llvm diff --git a/tools/llvm-reduce/deltas/ReduceGlobalVars.cpp b/tools/llvm-reduce/deltas/ReduceGlobalVars.cpp new file mode 100644 index 00000000000..5732208ee0a --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceGlobalVars.cpp @@ -0,0 +1,74 @@ +//===- ReduceGlobalVars.cpp - Specialized Delta Pass ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce initialized Global Variables in the provided Module. +// +//===----------------------------------------------------------------------===// + +#include "ReduceGlobalVars.h" +#include + +using namespace llvm; + +/// Removes all the Initialized GVs that aren't inside the desired Chunks. +static void extractGVsFromModule(std::vector ChunksToKeep, + Module *Program) { + // Get GVs inside desired chunks + std::set GVsToKeep; + int I = 0, GVCount = 0; + for (auto &GV : Program->globals()) + if (GV.hasInitializer() && I < (int)ChunksToKeep.size()) { + if (ChunksToKeep[I].contains(++GVCount)) + GVsToKeep.insert(&GV); + if (GVCount == ChunksToKeep[I].end) + ++I; + } + + // Delete out-of-chunk GVs and their uses + std::vector ToRemove; + std::vector InstToRemove; + for (auto &GV : Program->globals()) + if (GV.hasInitializer() && !GVsToKeep.count(&GV)) { + for (auto U : GV.users()) + if (auto *Inst = dyn_cast(U)) + InstToRemove.push_back(Inst); + + GV.replaceAllUsesWith(UndefValue::get(GV.getType())); + ToRemove.push_back(&GV); + } + + // Delete Instruction uses of unwanted GVs + for (auto *Inst : InstToRemove) { + Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); + Inst->eraseFromParent(); + } + + for (auto *GV : ToRemove) + GV->eraseFromParent(); +} + +/// Counts the amount of initialized GVs and displays their +/// respective name & index +static int countGVs(Module *Program) { + // TODO: Silence index with --quiet flag + outs() << "----------------------------\n"; + outs() << "GlobalVariable Index Reference:\n"; + int GVCount = 0; + for (auto &GV : Program->globals()) + if (GV.hasInitializer()) + outs() << "\t" << ++GVCount << ": " << GV.getName() << "\n"; + outs() << "----------------------------\n"; + return GVCount; +} + +void llvm::reduceGlobalsDeltaPass(TestRunner &Test) { + outs() << "*** Reducing GVs...\n"; + int GVCount = countGVs(Test.getProgram()); + runDeltaPass(Test, GVCount, extractGVsFromModule); +} diff --git a/tools/llvm-reduce/deltas/ReduceGlobalVars.h b/tools/llvm-reduce/deltas/ReduceGlobalVars.h new file mode 100644 index 00000000000..d4a870aded5 --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceGlobalVars.h @@ -0,0 +1,20 @@ +//===- ReduceGlobalVars.h - Specialized Delta Pass ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce initialized Global Variables in the provided Module. +// +//===----------------------------------------------------------------------===// + +#include "Delta.h" +#include "llvm/IR/Value.h" +#include "llvm/Transforms/Utils/Cloning.h" + +namespace llvm { +void reduceGlobalsDeltaPass(TestRunner &Test); +} // namespace llvm diff --git a/tools/llvm-reduce/deltas/ReduceInstructions.cpp b/tools/llvm-reduce/deltas/ReduceInstructions.cpp new file mode 100644 index 00000000000..b3497ad2dc0 --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceInstructions.cpp @@ -0,0 +1,65 @@ +//===- ReduceArguments.cpp - Specialized Delta Pass -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce uninteresting Arguments from defined functions. +// +//===----------------------------------------------------------------------===// + +#include "ReduceInstructions.h" + +using namespace llvm; + +/// Removes out-of-chunk arguments from functions, and modifies their calls +/// accordingly. It also removes allocations of out-of-chunk arguments. +static void extractInstrFromModule(std::vector ChunksToKeep, + Module *Program) { + int I = 0, InstCount = 0; + std::set InstToKeep; + + for (auto &F : *Program) + for (auto &BB : F) + for (auto &Inst : BB) + if (I < (int)ChunksToKeep.size()) { + if (ChunksToKeep[I].contains(++InstCount)) + InstToKeep.insert(&Inst); + if (ChunksToKeep[I].end == InstCount) + ++I; + } + + std::vector InstToDelete; + for (auto &F : *Program) + for (auto &BB : F) + for (auto &Inst : BB) + if (!InstToKeep.count(&Inst)) { + Inst.replaceAllUsesWith(UndefValue::get(Inst.getType())); + InstToDelete.push_back(&Inst); + } + + for (auto &I : InstToDelete) + I->eraseFromParent(); +} + +/// Counts the amount of basic blocks and prints their name & respective index +static unsigned countInstructions(Module *Program) { + // TODO: Silence index with --quiet flag + outs() << "----------------------------\n"; + int InstCount = 0; + for (auto &F : *Program) + for (auto &BB : F) + InstCount += BB.getInstList().size(); + outs() << "Number of instructions: " << InstCount << "\n"; + + return InstCount; +} + +void llvm::reduceInstructionsDeltaPass(TestRunner &Test) { + outs() << "*** Reducing Insructions...\n"; + unsigned InstCount = countInstructions(Test.getProgram()); + runDeltaPass(Test, InstCount, extractInstrFromModule); +} diff --git a/tools/llvm-reduce/deltas/ReduceInstructions.h b/tools/llvm-reduce/deltas/ReduceInstructions.h new file mode 100644 index 00000000000..a9266acd051 --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceInstructions.h @@ -0,0 +1,20 @@ +//===- ReduceArguments.h - Specialized Delta Pass -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce uninteresting Arguments from defined functions. +// +//===----------------------------------------------------------------------===// + +#include "Delta.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" + +namespace llvm { +void reduceInstructionsDeltaPass(TestRunner &Test); +} // namespace llvm diff --git a/tools/llvm-reduce/deltas/ReduceMetadata.cpp b/tools/llvm-reduce/deltas/ReduceMetadata.cpp new file mode 100644 index 00000000000..4ea223546ef --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceMetadata.cpp @@ -0,0 +1,138 @@ +//===- ReduceMetadata.cpp - Specialized Delta Pass ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements two functions used by the Generic Delta Debugging +// Algorithm, which are used to reduce Metadata nodes. +// +//===----------------------------------------------------------------------===// + +#include "ReduceMetadata.h" +#include "Delta.h" +#include "llvm/ADT/SmallVector.h" +#include +#include + +using namespace llvm; + +/// Adds all Unnamed Metadata Nodes that are inside desired Chunks to set +template +static void getChunkMetadataNodes(T &MDUser, int &I, + const std::vector &ChunksToKeep, + std::set &SeenNodes, + std::set &NodesToKeep) { + SmallVector, 4> MDs; + MDUser.getAllMetadata(MDs); + for (auto &MD : MDs) { + SeenNodes.insert(MD.second); + if (I < (int)ChunksToKeep.size()) { + if (ChunksToKeep[I].contains(SeenNodes.size())) + NodesToKeep.insert(MD.second); + if (ChunksToKeep[I].end == (int)SeenNodes.size()) + ++I; + } + } +} + +/// Erases out-of-chunk unnamed metadata nodes from its user +template +static void eraseMetadataIfOutsideChunk(T &MDUser, + const std::set &NodesToKeep) { + SmallVector, 4> MDs; + MDUser.getAllMetadata(MDs); + for (int I = 0, E = MDs.size(); I != E; ++I) + if (!NodesToKeep.count(MDs[I].second)) + MDUser.setMetadata(I, NULL); +} + +/// Removes all the Named and Unnamed Metadata Nodes, as well as any debug +/// functions that aren't inside the desired Chunks. +static void extractMetadataFromModule(const std::vector &ChunksToKeep, + Module *Program) { + std::set SeenNodes; + std::set NodesToKeep; + int I = 0; + + // Add chunk MDNodes used by GVs, Functions, and Instructions to set + for (auto &GV : Program->globals()) + getChunkMetadataNodes(GV, I, ChunksToKeep, SeenNodes, NodesToKeep); + + for (auto &F : *Program) { + getChunkMetadataNodes(F, I, ChunksToKeep, SeenNodes, NodesToKeep); + for (auto &BB : F) + for (auto &Inst : BB) + getChunkMetadataNodes(Inst, I, ChunksToKeep, SeenNodes, NodesToKeep); + } + + // Once more, go over metadata nodes, but deleting the ones outside chunks + for (auto &GV : Program->globals()) + eraseMetadataIfOutsideChunk(GV, NodesToKeep); + + for (auto &F : *Program) { + eraseMetadataIfOutsideChunk(F, NodesToKeep); + for (auto &BB : F) + for (auto &Inst : BB) + eraseMetadataIfOutsideChunk(Inst, NodesToKeep); + } + + + // Get out-of-chunk Named metadata nodes + unsigned MetadataCount = SeenNodes.size(); + std::vector NamedNodesToDelete; + for (auto &MD : Program->named_metadata()) { + if (I < (int)ChunksToKeep.size()) { + if (!ChunksToKeep[I].contains(++MetadataCount)) + NamedNodesToDelete.push_back(&MD); + if (ChunksToKeep[I].end == (int)SeenNodes.size()) + ++I; + } else + NamedNodesToDelete.push_back(&MD); + } + + for (auto *NN : NamedNodesToDelete) { + for (int I = 0, E = NN->getNumOperands(); I != E; ++I) + NN->setOperand(I, NULL); + NN->eraseFromParent(); + } +} + +// Gets unnamed metadata nodes used by a given instruction/GV/function and adds +// them to the set of seen nodes +template +static void addMetadataToSet(T &MDUser, std::set &UnnamedNodes) { + SmallVector, 4> MDs; + MDUser.getAllMetadata(MDs); + for (auto &MD : MDs) + UnnamedNodes.insert(MD.second); +} + +/// Returns the amount of Named and Unnamed Metadata Nodes +static int countMetadataTargets(Module *Program) { + std::set UnnamedNodes; + int NamedMetadataNodes = Program->named_metadata_size(); + + // Get metadata nodes used by globals + for (auto &GV : Program->globals()) + addMetadataToSet(GV, UnnamedNodes); + + // Do the same for nodes used by functions & instructions + for (auto &F : *Program) { + addMetadataToSet(F, UnnamedNodes); + for (auto &BB : F) + for (auto &I : BB) + addMetadataToSet(I, UnnamedNodes); + } + + return UnnamedNodes.size() + NamedMetadataNodes; +} + +void llvm::reduceMetadataDeltaPass(TestRunner &Test) { + outs() << "*** Reducing Metadata...\n"; + int MDCount = countMetadataTargets(Test.getProgram()); + runDeltaPass(Test, MDCount, extractMetadataFromModule); + outs() << "----------------------------\n"; +} diff --git a/tools/llvm-reduce/deltas/ReduceMetadata.h b/tools/llvm-reduce/deltas/ReduceMetadata.h new file mode 100644 index 00000000000..275b44c2aa7 --- /dev/null +++ b/tools/llvm-reduce/deltas/ReduceMetadata.h @@ -0,0 +1,18 @@ +//===- ReduceMetadata.h - Specialized Delta Pass ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements two functions used by the Generic Delta Debugging +// Algorithm, which are used to reduce Metadata nodes. +// +//===----------------------------------------------------------------------===// + +#include "TestRunner.h" + +namespace llvm { +void reduceMetadataDeltaPass(TestRunner &Test); +} // namespace llvm diff --git a/tools/llvm-reduce/llvm-reduce.cpp b/tools/llvm-reduce/llvm-reduce.cpp new file mode 100644 index 00000000000..83dcf980a78 --- /dev/null +++ b/tools/llvm-reduce/llvm-reduce.cpp @@ -0,0 +1,114 @@ +//===- llvm-reduce.cpp - The LLVM Delta Reduction utility -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This program tries to reduce an IR test case for a given interesting-ness +// test. It runs multiple delta debugging passes in order to minimize the input +// file. It's worth noting that this is a part of the bugpoint redesign +// proposal, and thus a *temporary* tool that will eventually be integrated +// into the bugpoint tool itself. +// +//===----------------------------------------------------------------------===// + +#include "DeltaManager.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Verifier.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +using namespace llvm; + +static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden); +static cl::opt Version("v", cl::desc("Alias for -version"), cl::Hidden); + +static cl::opt InputFilename(cl::Positional, cl::Required, + cl::desc("")); + +static cl::opt + TestFilename("test", cl::Required, + cl::desc("Name of the interesting-ness test to be run")); + +static cl::list + TestArguments("test-arg", cl::ZeroOrMore, + cl::desc("Arguments passed onto the interesting-ness test")); + +static cl::opt + OutputFilename("output", + cl::desc("Specify the output file. default: reduced.ll")); +static cl::alias OutputFileAlias("o", cl::desc("Alias for -output"), + cl::aliasopt(OutputFilename)); + +static cl::opt + ReplaceInput("in-place", + cl::desc("WARNING: This option will replace your input file" + "with the reduced version!")); + +// Parses IR into a Module and verifies it +static std::unique_ptr parseInputFile(StringRef Filename, + LLVMContext &Ctxt) { + SMDiagnostic Err; + std::unique_ptr Result = parseIRFile(Filename, Err, Ctxt); + if (!Result) { + Err.print("llvm-reduce", errs()); + return Result; + } + + if (verifyModule(*Result, &errs())) { + errs() << "Error: " << Filename << " - input module is broken!\n"; + return std::unique_ptr(); + } + + return Result; +} + +int main(int argc, char **argv) { + InitLLVM X(argc, argv); + + cl::ParseCommandLineOptions(argc, argv, "LLVM automatic testcase reducer.\n"); + + LLVMContext Context; + std::unique_ptr OriginalProgram = + parseInputFile(InputFilename, Context); + + // Initialize test environment + TestRunner Tester(TestFilename, TestArguments); + Tester.setProgram(std::move(OriginalProgram)); + + // Try to reduce code + runDeltaPasses(Tester); + + if (!Tester.getProgram()) { + errs() << "\nCouldnt reduce input :/\n"; + } else { + // Print reduced file to STDOUT + if (OutputFilename == "-") + Tester.getProgram()->print(outs(), nullptr); + else { + if (ReplaceInput) // In-place + OutputFilename = InputFilename.c_str(); + else if (OutputFilename.empty()) + OutputFilename = "reduced.ll"; + + std::error_code EC; + raw_fd_ostream Out(OutputFilename, EC); + if (EC) { + errs() << "Error opening output file: " << EC.message() << "!\n"; + exit(1); + } + Tester.getProgram()->print(Out, /*AnnotationWriter=*/nullptr); + errs() << "\nDone reducing! Reduced testcase: " << OutputFilename << "\n"; + } + } + + return 0; +} diff --git a/tools/llvm-rtdyld/llvm-rtdyld.cpp b/tools/llvm-rtdyld/llvm-rtdyld.cpp index a7cc1deb8cf..3a36e770948 100644 --- a/tools/llvm-rtdyld/llvm-rtdyld.cpp +++ b/tools/llvm-rtdyld/llvm-rtdyld.cpp @@ -27,12 +27,13 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/InitLLVM.h" +#include "llvm/Support/MSVCErrorWorkarounds.h" #include "llvm/Support/Memory.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/MSVCErrorWorkarounds.h" #include "llvm/Support/Path.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" +#include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include @@ -138,8 +139,21 @@ PrintAllocationRequests("print-alloc-requests", "manager by RuntimeDyld"), cl::Hidden); +static cl::opt ShowTimes("show-times", + cl::desc("Show times for llvm-rtdyld phases"), + cl::init(false)); + ExitOnError ExitOnErr; +struct RTDyldTimers { + TimerGroup RTDyldTG{"llvm-rtdyld timers", "timers for llvm-rtdyld phases"}; + Timer LoadObjectsTimer{"load", "time to load/add object files", RTDyldTG}; + Timer LinkTimer{"link", "time to link object files", RTDyldTG}; + Timer RunTimer{"run", "time to execute jitlink'd code", RTDyldTG}; +}; + +std::unique_ptr Timers; + /* *** */ using SectionIDMap = StringMap; @@ -441,8 +455,6 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) { continue; } object::section_iterator Sec = *SecOrErr; - StringRef SecName; - Sec->getName(SecName); Address.SectionIndex = Sec->getIndex(); uint64_t SectionLoadAddress = LoadedObjInfo->getSectionLoadAddress(*Sec); @@ -491,35 +503,41 @@ static int executeInput() { // If we don't have any input files, read from stdin. if (!InputFileList.size()) InputFileList.push_back("-"); - for (auto &File : InputFileList) { - // Load the input memory buffer. - ErrorOr> InputBuffer = - MemoryBuffer::getFileOrSTDIN(File); - if (std::error_code EC = InputBuffer.getError()) - ErrorAndExit("unable to read input: '" + EC.message() + "'"); - Expected> MaybeObj( - ObjectFile::createObjectFile((*InputBuffer)->getMemBufferRef())); + { + TimeRegion TR(Timers ? &Timers->LoadObjectsTimer : nullptr); + for (auto &File : InputFileList) { + // Load the input memory buffer. + ErrorOr> InputBuffer = + MemoryBuffer::getFileOrSTDIN(File); + if (std::error_code EC = InputBuffer.getError()) + ErrorAndExit("unable to read input: '" + EC.message() + "'"); + Expected> MaybeObj( + ObjectFile::createObjectFile((*InputBuffer)->getMemBufferRef())); - if (!MaybeObj) { - std::string Buf; - raw_string_ostream OS(Buf); - logAllUnhandledErrors(MaybeObj.takeError(), OS); - OS.flush(); - ErrorAndExit("unable to create object file: '" + Buf + "'"); - } + if (!MaybeObj) { + std::string Buf; + raw_string_ostream OS(Buf); + logAllUnhandledErrors(MaybeObj.takeError(), OS); + OS.flush(); + ErrorAndExit("unable to create object file: '" + Buf + "'"); + } - ObjectFile &Obj = **MaybeObj; + ObjectFile &Obj = **MaybeObj; - // Load the object file - Dyld.loadObject(Obj); - if (Dyld.hasError()) { - ErrorAndExit(Dyld.getErrorString()); + // Load the object file + Dyld.loadObject(Obj); + if (Dyld.hasError()) { + ErrorAndExit(Dyld.getErrorString()); + } } } - // Resove all the relocations we can. - // FIXME: Error out if there are unresolved relocations. - Dyld.resolveRelocations(); + { + TimeRegion TR(Timers ? &Timers->LinkTimer : nullptr); + // Resove all the relocations we can. + // FIXME: Error out if there are unresolved relocations. + Dyld.resolveRelocations(); + } // Get the address of the entry point (_main by default). void *MainAddress = Dyld.getSymbolLocalAddress(EntryPoint); @@ -551,7 +569,13 @@ static int executeInput() { for (auto &Arg : InputArgv) Argv.push_back(Arg.data()); Argv.push_back(nullptr); - return Main(Argv.size() - 1, Argv.data()); + int Result = 0; + { + TimeRegion TR(Timers ? &Timers->RunTimer : nullptr); + Result = Main(Argv.size() - 1, Argv.data()); + } + + return Result; } static int checkAllExpressions(RuntimeDyldChecker &Checker) { @@ -891,7 +915,7 @@ static int linkAndVerify() { ObjectFile &Obj = **MaybeObj; if (!Checker) - Checker = llvm::make_unique( + Checker = std::make_unique( IsSymbolValid, GetSymbolInfo, GetSectionInfo, GetStubInfo, GetStubInfo, Obj.isLittleEndian() ? support::little : support::big, Disassembler.get(), InstPrinter.get(), dbgs()); @@ -937,16 +961,28 @@ int main(int argc, char **argv) { ExitOnErr.setBanner(std::string(argv[0]) + ": "); + Timers = ShowTimes ? std::make_unique() : nullptr; + + int Result; switch (Action) { case AC_Execute: - return executeInput(); + Result = executeInput(); + break; case AC_PrintDebugLineInfo: - return printLineInfoForInput(/* LoadObjects */ true,/* UseDebugObj */ true); + Result = + printLineInfoForInput(/* LoadObjects */ true, /* UseDebugObj */ true); + break; case AC_PrintLineInfo: - return printLineInfoForInput(/* LoadObjects */ true,/* UseDebugObj */false); + Result = + printLineInfoForInput(/* LoadObjects */ true, /* UseDebugObj */ false); + break; case AC_PrintObjectLineInfo: - return printLineInfoForInput(/* LoadObjects */false,/* UseDebugObj */false); + Result = + printLineInfoForInput(/* LoadObjects */ false, /* UseDebugObj */ false); + break; case AC_Verify: - return linkAndVerify(); + Result = linkAndVerify(); + break; } + return Result; } diff --git a/tools/llvm-stress/llvm-stress.cpp b/tools/llvm-stress/llvm-stress.cpp index a455bf13fe7..5f36a785332 100644 --- a/tools/llvm-stress/llvm-stress.cpp +++ b/tools/llvm-stress/llvm-stress.cpp @@ -735,7 +735,7 @@ int main(int argc, char **argv) { cl::ParseCommandLineOptions(argc, argv, "llvm codegen stress-tester\n"); llvm_shutdown_obj Y; - auto M = llvm::make_unique("/tmp/autogen.bc", Context); + auto M = std::make_unique("/tmp/autogen.bc", Context); Function *F = GenEmptyFunction(M.get()); // Pick an initial seed value @@ -752,7 +752,7 @@ int main(int argc, char **argv) { OutputFilename = "-"; std::error_code EC; - Out.reset(new ToolOutputFile(OutputFilename, EC, sys::fs::F_None)); + Out.reset(new ToolOutputFile(OutputFilename, EC, sys::fs::OF_None)); if (EC) { errs() << EC.message() << '\n'; return 1; diff --git a/tools/llvm-symbolizer/llvm-symbolizer.cpp b/tools/llvm-symbolizer/llvm-symbolizer.cpp index ea94cf9b69a..54ce87d4797 100644 --- a/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -55,6 +55,10 @@ static cl::opt cl::desc("Interpret addresses as relative addresses"), cl::ReallyHidden); +static cl::opt ClUntagAddresses( + "untag-addresses", cl::init(true), + cl::desc("Remove memory tags from addresses before symbolization")); + static cl::opt ClPrintInlining("inlining", cl::init(true), cl::desc("Print all inlined frames for a given address")); @@ -274,6 +278,7 @@ int main(int argc, char **argv) { ClDemangle.setInitialValue(false); ClPrintFunctions.setInitialValue(FunctionNameKind::None); ClPrintInlining.setInitialValue(false); + ClUntagAddresses.setInitialValue(false); ClOutputStyle.setInitialValue(DIPrinter::OutputStyle::GNU); } @@ -290,6 +295,7 @@ int main(int argc, char **argv) { Opts.UseSymbolTable = ClUseSymbolTable; Opts.Demangle = ClDemangle; Opts.RelativeAddresses = ClUseRelativeAddress; + Opts.UntagAddresses = ClUntagAddresses; Opts.DefaultArch = ClDefaultArch; Opts.FallbackDebugPath = ClFallbackDebugPath; Opts.DWPName = ClDwpName; diff --git a/tools/llvm-xray/func-id-helper.cpp b/tools/llvm-xray/func-id-helper.cpp index dc821a420c6..afc912a6398 100644 --- a/tools/llvm-xray/func-id-helper.cpp +++ b/tools/llvm-xray/func-id-helper.cpp @@ -36,7 +36,7 @@ std::string FuncIdConversionHelper::SymbolOrNumber(int32_t FuncId) const { ModuleAddress.SectionIndex = object::SectionedAddress::UndefSection; if (auto ResOrErr = Symbolizer.symbolizeCode(BinaryInstrMap, ModuleAddress)) { auto &DI = *ResOrErr; - if (DI.FunctionName == "") + if (DI.FunctionName == DILineInfo::BadString) F << "@(" << std::hex << It->second << ")"; else F << DI.FunctionName; diff --git a/tools/llvm-xray/xray-account.cpp b/tools/llvm-xray/xray-account.cpp index 2b49a311d7e..e37cd212377 100644 --- a/tools/llvm-xray/xray-account.cpp +++ b/tools/llvm-xray/xray-account.cpp @@ -421,7 +421,7 @@ static CommandRegistration Unused(&Account, []() -> Error { } std::error_code EC; - raw_fd_ostream OS(AccountOutput, EC, sys::fs::OpenFlags::F_Text); + raw_fd_ostream OS(AccountOutput, EC, sys::fs::OpenFlags::OF_Text); if (EC) return make_error( Twine("Cannot open file '") + AccountOutput + "' for writing.", EC); diff --git a/tools/llvm-xray/xray-converter.cpp b/tools/llvm-xray/xray-converter.cpp index dfc757e0f27..7258245b95c 100644 --- a/tools/llvm-xray/xray-converter.cpp +++ b/tools/llvm-xray/xray-converter.cpp @@ -387,8 +387,8 @@ static CommandRegistration Unused(&Convert, []() -> Error { std::error_code EC; raw_fd_ostream OS(ConvertOutput, EC, ConvertOutputFormat == ConvertFormats::BINARY - ? sys::fs::OpenFlags::F_None - : sys::fs::OpenFlags::F_Text); + ? sys::fs::OpenFlags::OF_None + : sys::fs::OpenFlags::OF_Text); if (EC) return make_error( Twine("Cannot open file '") + ConvertOutput + "' for writing.", EC); diff --git a/tools/llvm-xray/xray-extract.cpp b/tools/llvm-xray/xray-extract.cpp index 7c7d26b5a38..7800b88d9ee 100644 --- a/tools/llvm-xray/xray-extract.cpp +++ b/tools/llvm-xray/xray-extract.cpp @@ -80,7 +80,7 @@ static CommandRegistration Unused(&Extract, []() -> Error { InstrumentationMapOrError.takeError()); std::error_code EC; - raw_fd_ostream OS(ExtractOutput, EC, sys::fs::OpenFlags::F_Text); + raw_fd_ostream OS(ExtractOutput, EC, sys::fs::OpenFlags::OF_Text); if (EC) return make_error( Twine("Cannot open file '") + ExtractOutput + "' for writing.", EC); diff --git a/tools/llvm-xray/xray-fdr-dump.cpp b/tools/llvm-xray/xray-fdr-dump.cpp index 81a93cac57c..295f7a78765 100644 --- a/tools/llvm-xray/xray-fdr-dump.cpp +++ b/tools/llvm-xray/xray-fdr-dump.cpp @@ -51,7 +51,7 @@ static CommandRegistration Unused(&Dump, []() -> Error { sys::fs::closeFile(*FDOrErr); DataExtractor DE(StringRef(MappedFile.data(), MappedFile.size()), true, 8); - uint32_t OffsetPtr = 0; + uint64_t OffsetPtr = 0; auto FileHeaderOrError = readBinaryFormatHeader(DE, OffsetPtr); if (!FileHeaderOrError) diff --git a/tools/llvm-xray/xray-graph-diff.cpp b/tools/llvm-xray/xray-graph-diff.cpp index a514be97f40..116aa6869ec 100644 --- a/tools/llvm-xray/xray-graph-diff.cpp +++ b/tools/llvm-xray/xray-graph-diff.cpp @@ -470,7 +470,7 @@ static CommandRegistration Unused(&GraphDiff, []() -> Error { auto &GDR = *GDROrErr; std::error_code EC; - raw_fd_ostream OS(GraphDiffOutput, EC, sys::fs::OpenFlags::F_Text); + raw_fd_ostream OS(GraphDiffOutput, EC, sys::fs::OpenFlags::OF_Text); if (EC) return make_error( Twine("Cannot open file '") + GraphDiffOutput + "' for writing.", EC); diff --git a/tools/llvm-xray/xray-graph.cpp b/tools/llvm-xray/xray-graph.cpp index c09357fcb50..0be511219c1 100644 --- a/tools/llvm-xray/xray-graph.cpp +++ b/tools/llvm-xray/xray-graph.cpp @@ -506,7 +506,7 @@ static CommandRegistration Unused(&GraphC, []() -> Error { auto &GR = *GROrError; std::error_code EC; - raw_fd_ostream OS(GraphOutput, EC, sys::fs::OpenFlags::F_Text); + raw_fd_ostream OS(GraphOutput, EC, sys::fs::OpenFlags::OF_Text); if (EC) return make_error( Twine("Cannot open file '") + GraphOutput + "' for writing.", EC); diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index ccf8b073b82..15495a511d0 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -523,7 +523,6 @@ int main(int argc, char **argv) { initializeDwarfEHPreparePass(Registry); initializeSafeStackLegacyPassPass(Registry); initializeSjLjEHPreparePass(Registry); - initializeStackProtectorPass(Registry); initializePreISelIntrinsicLoweringLegacyPassPass(Registry); initializeGlobalMergePass(Registry); initializeIndirectBrExpandPassPass(Registry); @@ -612,7 +611,9 @@ int main(int argc, char **argv) { OutputFilename = "-"; std::error_code EC; - Out.reset(new ToolOutputFile(OutputFilename, EC, sys::fs::F_None)); + sys::fs::OpenFlags Flags = OutputAssembly ? sys::fs::OF_Text + : sys::fs::OF_None; + Out.reset(new ToolOutputFile(OutputFilename, EC, Flags)); if (EC) { errs() << EC.message() << '\n'; return 1; @@ -620,7 +621,7 @@ int main(int argc, char **argv) { if (!ThinLinkBitcodeFile.empty()) { ThinLinkOut.reset( - new ToolOutputFile(ThinLinkBitcodeFile, EC, sys::fs::F_None)); + new ToolOutputFile(ThinLinkBitcodeFile, EC, sys::fs::OF_None)); if (EC) { errs() << EC.message() << '\n'; return 1; @@ -720,8 +721,8 @@ int main(int argc, char **argv) { OutputFilename = "-"; std::error_code EC; - Out = llvm::make_unique(OutputFilename, EC, - sys::fs::F_None); + Out = std::make_unique(OutputFilename, EC, + sys::fs::OF_None); if (EC) { errs() << EC.message() << '\n'; return 1; @@ -867,7 +868,7 @@ int main(int argc, char **argv) { assert(Out); OS = &Out->os(); if (RunTwice) { - BOS = make_unique(Buffer); + BOS = std::make_unique(Buffer); OS = BOS.get(); } if (OutputAssembly) { diff --git a/tools/vfabi-demangle-fuzzer/CMakeLists.txt b/tools/vfabi-demangle-fuzzer/CMakeLists.txt new file mode 100644 index 00000000000..908364690f5 --- /dev/null +++ b/tools/vfabi-demangle-fuzzer/CMakeLists.txt @@ -0,0 +1,7 @@ +set(LLVM_LINK_COMPONENTS + Analysis + Support +) +add_llvm_fuzzer(vfabi-demangler-fuzzer + vfabi-demangler-fuzzer.cpp +) diff --git a/tools/vfabi-demangle-fuzzer/vfabi-demangler-fuzzer.cpp b/tools/vfabi-demangle-fuzzer/vfabi-demangler-fuzzer.cpp new file mode 100644 index 00000000000..13657effbbe --- /dev/null +++ b/tools/vfabi-demangle-fuzzer/vfabi-demangler-fuzzer.cpp @@ -0,0 +1,26 @@ +//===-- vfabi-demangler-fuzzer.cpp - Fuzzer VFABI using lib/Fuzzer ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Build tool to fuzz the demangler for the vector function ABI names. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/VectorUtils.h" + +using namespace llvm; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + const StringRef MangledName((const char *)Data, Size); + const auto Info = VFABI::tryDemangleForVFABI(MangledName); + + // Do not optimize away the return value. Inspired by + // https://github.com/google/benchmark/blob/master/include/benchmark/benchmark.h#L307-L345 + asm volatile("" : : "r,m"(Info) : "memory"); + + return 0; +} diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp index 146d10835b8..1d39b300091 100644 --- a/utils/TableGen/AsmMatcherEmitter.cpp +++ b/utils/TableGen/AsmMatcherEmitter.cpp @@ -1111,6 +1111,7 @@ static std::string getEnumNameForToken(StringRef Str) { case '<': Res += "_LT_"; break; case '>': Res += "_GT_"; break; case '-': Res += "_MINUS_"; break; + case '#': Res += "_HASH_"; break; default: if ((*it >= 'A' && *it <= 'Z') || (*it >= 'a' && *it <= 'z') || @@ -1439,7 +1440,7 @@ void AsmMatcherInfo::buildOperandMatchInfo() { /// Map containing a mask with all operands indices that can be found for /// that class inside a instruction. - typedef std::map> OpClassMaskTy; + typedef std::map>> OpClassMaskTy; OpClassMaskTy OpClassMask; for (const auto &MI : Matchables) { @@ -1515,7 +1516,7 @@ void AsmMatcherInfo::buildInfo() { if (!V.empty() && V != Variant.Name) continue; - auto II = llvm::make_unique(*CGI); + auto II = std::make_unique(*CGI); II->initialize(*this, SingletonRegisters, Variant, HasMnemonicFirst); @@ -1532,7 +1533,7 @@ void AsmMatcherInfo::buildInfo() { std::vector AllInstAliases = Records.getAllDerivedDefinitions("InstAlias"); for (unsigned i = 0, e = AllInstAliases.size(); i != e; ++i) { - auto Alias = llvm::make_unique(AllInstAliases[i], + auto Alias = std::make_unique(AllInstAliases[i], Target); // If the tblgen -match-prefix option is specified (for tblgen hackers), @@ -1546,7 +1547,7 @@ void AsmMatcherInfo::buildInfo() { if (!V.empty() && V != Variant.Name) continue; - auto II = llvm::make_unique(std::move(Alias)); + auto II = std::make_unique(std::move(Alias)); II->initialize(*this, SingletonRegisters, Variant, HasMnemonicFirst); @@ -1615,7 +1616,7 @@ void AsmMatcherInfo::buildInfo() { II->TheDef->getValueAsString("TwoOperandAliasConstraint"); if (Constraint != "") { // Start by making a copy of the original matchable. - auto AliasII = llvm::make_unique(*II); + auto AliasII = std::make_unique(*II); // Adjust it to be a two-operand alias. AliasII->formTwoOperandAlias(Constraint); @@ -2381,7 +2382,7 @@ static void emitMatchClassEnumeration(CodeGenTarget &Target, OS << " NumMatchClassKinds\n"; OS << "};\n\n"; - OS << "}\n\n"; + OS << "} // end anonymous namespace\n\n"; } /// emitMatchClassDiagStrings - Emit a function to get the diagnostic text to be @@ -2866,7 +2867,7 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target, OS << " }\n"; OS << " };\n"; - OS << "} // end anonymous namespace.\n\n"; + OS << "} // end anonymous namespace\n\n"; OS << "static const OperandMatchEntry OperandMatchTable[" << Info.OperandMatchInfo.size() << "] = {\n"; @@ -3366,7 +3367,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { OS << " " << getNameForFeatureBitset(FeatureBitset) << ",\n"; } OS << "};\n\n" - << "const static FeatureBitset FeatureBitsets[] {\n" + << "static constexpr FeatureBitset FeatureBitsets[] = {\n" << " {}, // AMFBS_None\n"; for (const auto &FeatureBitset : FeatureBitsets) { if (FeatureBitset.empty()) @@ -3422,7 +3423,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { OS << " }\n"; OS << " };\n"; - OS << "} // end anonymous namespace.\n\n"; + OS << "} // end anonymous namespace\n\n"; unsigned VariantCount = Target.getAsmParserVariantCount(); for (unsigned VC = 0; VC != VariantCount; ++VC) { diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp index 05d81f13350..b5c7f35be0e 100644 --- a/utils/TableGen/AsmWriterEmitter.cpp +++ b/utils/TableGen/AsmWriterEmitter.cpp @@ -784,8 +784,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) { continue; // Aliases with priority 0 are never emitted. const DagInit *DI = R->getValueAsDag("ResultInst"); - const DefInit *Op = cast(DI->getOperator()); - AliasMap[getQualifiedName(Op->getDef())].insert( + AliasMap[getQualifiedName(DI->getOperatorAsDef(R->getLoc()))].insert( std::make_pair(CodeGenInstAlias(R, Target), Priority)); } diff --git a/utils/TableGen/CallingConvEmitter.cpp b/utils/TableGen/CallingConvEmitter.cpp index de5044e24d4..9eabb44d900 100644 --- a/utils/TableGen/CallingConvEmitter.cpp +++ b/utils/TableGen/CallingConvEmitter.cpp @@ -264,6 +264,10 @@ void CallingConvEmitter::EmitAction(Record *Action, Record *DestTy = Action->getValueAsDef("DestTy"); O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n"; O << IndentStr << "LocInfo = CCValAssign::BCvt;\n"; + } else if (Action->isSubClassOf("CCTruncToType")) { + Record *DestTy = Action->getValueAsDef("DestTy"); + O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n"; + O << IndentStr << "LocInfo = CCValAssign::Trunc;\n"; } else if (Action->isSubClassOf("CCPassIndirect")) { Record *DestTy = Action->getValueAsDef("DestTy"); O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n"; diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp index da65763905a..42f69cb253d 100644 --- a/utils/TableGen/CodeEmitterGen.cpp +++ b/utils/TableGen/CodeEmitterGen.cpp @@ -16,6 +16,7 @@ #include "CodeGenTarget.h" #include "SubtargetFeatureInfo.h" #include "Types.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Casting.h" @@ -45,12 +46,19 @@ public: private: int getVariableBit(const std::string &VarName, BitsInit *BI, int bit); std::string getInstructionCase(Record *R, CodeGenTarget &Target); + std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef, + CodeGenTarget &Target); void AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName, unsigned &NumberedOp, std::set &NamedOpIndices, std::string &Case, CodeGenTarget &Target); + void emitInstructionBaseValues( + raw_ostream &o, ArrayRef NumberedInstructions, + CodeGenTarget &Target, int HwMode = -1); + unsigned BitWidth; + bool UseAPInt; }; // If the VarBitInit at position 'bit' matches the specified variable then @@ -126,7 +134,10 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName, std::pair SO = CGI.Operands.getSubOperandNumber(OpIdx); std::string &EncoderMethodName = CGI.Operands[SO.first].EncoderMethodName; - + + if (UseAPInt) + Case += " op.clearAllBits();\n"; + // If the source operand has a custom encoder, use it. This will // get the encoding for all of the suboperands. if (!EncoderMethodName.empty()) { @@ -134,18 +145,54 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName, // sub-operands, if there are more than one, so only // query the encoder once per source operand. if (SO.second == 0) { - Case += " // op: " + VarName + "\n" + - " op = " + EncoderMethodName + "(MI, " + utostr(OpIdx); - Case += ", Fixups, STI"; - Case += ");\n"; + Case += " // op: " + VarName + "\n"; + if (UseAPInt) { + Case += " " + EncoderMethodName + "(MI, " + utostr(OpIdx); + Case += ", op"; + } else { + Case += " op = " + EncoderMethodName + "(MI, " + utostr(OpIdx); + } + Case += ", Fixups, STI);\n"; } } else { - Case += " // op: " + VarName + "\n" + - " op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; - Case += ", Fixups, STI"; + Case += " // op: " + VarName + "\n"; + if (UseAPInt) { + Case += " getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; + Case += ", op, Fixups, STI"; + } else { + Case += " op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")"; + Case += ", Fixups, STI"; + } Case += ");\n"; } - + + // Precalculate the number of lits this variable contributes to in the + // operand. If there is a single lit (consecutive range of bits) we can use a + // destructive sequence on APInt that reduces memory allocations. + int numOperandLits = 0; + for (int tmpBit = bit; tmpBit >= 0;) { + int varBit = getVariableBit(VarName, BI, tmpBit); + + // If this bit isn't from a variable, skip it. + if (varBit == -1) { + --tmpBit; + continue; + } + + // Figure out the consecutive range of bits covered by this operand, in + // order to generate better encoding code. + int beginVarBit = varBit; + int N = 1; + for (--tmpBit; tmpBit >= 0;) { + varBit = getVariableBit(VarName, BI, tmpBit); + if (varBit == -1 || varBit != (beginVarBit - N)) + break; + ++N; + --tmpBit; + } + ++numOperandLits; + } + for (; bit >= 0; ) { int varBit = getVariableBit(VarName, BI, bit); @@ -166,20 +213,52 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName, ++N; --bit; } - - uint64_t opMask = ~(uint64_t)0 >> (64-N); - int opShift = beginVarBit - N + 1; - opMask <<= opShift; - opShift = beginInstBit - beginVarBit; - - if (opShift > 0) { - Case += " Value |= (op & UINT64_C(" + utostr(opMask) + ")) << " + - itostr(opShift) + ";\n"; - } else if (opShift < 0) { - Case += " Value |= (op & UINT64_C(" + utostr(opMask) + ")) >> " + - itostr(-opShift) + ";\n"; + + std::string maskStr; + int opShift; + + unsigned loBit = beginVarBit - N + 1; + unsigned hiBit = loBit + N; + unsigned loInstBit = beginInstBit - N + 1; + if (UseAPInt) { + std::string extractStr; + if (N >= 64) { + extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " + + itostr(loBit) + ")"; + Case += " Value.insertBits(" + extractStr + ", " + + itostr(loInstBit) + ");\n"; + } else { + extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) + + ", " + itostr(loBit) + ")"; + Case += " Value.insertBits(" + extractStr + ", " + + itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n"; + } } else { - Case += " Value |= op & UINT64_C(" + utostr(opMask) + ");\n"; + uint64_t opMask = ~(uint64_t)0 >> (64 - N); + opShift = beginVarBit - N + 1; + opMask <<= opShift; + maskStr = "UINT64_C(" + utostr(opMask) + ")"; + opShift = beginInstBit - beginVarBit; + + if (numOperandLits == 1) { + Case += " op &= " + maskStr + ";\n"; + if (opShift > 0) { + Case += " op <<= " + itostr(opShift) + ";\n"; + } else if (opShift < 0) { + Case += " op >>= " + itostr(-opShift) + ";\n"; + } + Case += " Value |= op;\n"; + } else { + if (opShift > 0) { + Case += " Value |= (op & " + maskStr + ") << " + + itostr(opShift) + ";\n"; + } else if (opShift < 0) { + Case += " Value |= (op & " + maskStr + ") >> " + + itostr(-opShift) + ";\n"; + } else { + Case += " Value |= (op & " + maskStr + ");\n"; + } + } } } } @@ -187,7 +266,29 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName, std::string CodeEmitterGen::getInstructionCase(Record *R, CodeGenTarget &Target) { std::string Case; - BitsInit *BI = R->getValueAsBitsInit("Inst"); + if (const RecordVal *RV = R->getValue("EncodingInfos")) { + if (auto *DI = dyn_cast_or_null(RV->getValue())) { + const CodeGenHwModes &HWM = Target.getHwModes(); + EncodingInfoByHwMode EBM(DI->getDef(), HWM); + Case += " switch (HwMode) {\n"; + Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n"; + for (auto &KV : EBM.Map) { + Case += " case " + itostr(KV.first) + ": {\n"; + Case += getInstructionCaseForEncoding(R, KV.second, Target); + Case += " break;\n"; + Case += " }\n"; + } + Case += " }\n"; + return Case; + } + } + return getInstructionCaseForEncoding(R, R, Target); +} + +std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *EncodingDef, + CodeGenTarget &Target) { + std::string Case; + BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); unsigned NumberedOp = 0; std::set NamedOpIndices; @@ -207,7 +308,7 @@ std::string CodeEmitterGen::getInstructionCase(Record *R, // Loop over all of the fields in the instruction, determining which are the // operands to the instruction. - for (const RecordVal &RV : R->getValues()) { + for (const RecordVal &RV : EncodingDef->getValues()) { // Ignore fixed fields in the record, we're looking for values like: // bits<5> RST = { ?, ?, ?, ?, ? }; if (RV.getPrefix() || RV.getValue()->isComplete()) @@ -237,6 +338,54 @@ getNameForFeatureBitset(const std::vector &FeatureBitset) { return Name; } +static void emitInstBits(raw_ostream &OS, const APInt &Bits) { + for (unsigned I = 0; I < Bits.getNumWords(); ++I) + OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I]) + << ")"; +} + +void CodeEmitterGen::emitInstructionBaseValues( + raw_ostream &o, ArrayRef NumberedInstructions, + CodeGenTarget &Target, int HwMode) { + const CodeGenHwModes &HWM = Target.getHwModes(); + if (HwMode == -1) + o << " static const uint64_t InstBits[] = {\n"; + else + o << " static const uint64_t InstBits_" << HWM.getMode(HwMode).Name + << "[] = {\n"; + + for (const CodeGenInstruction *CGI : NumberedInstructions) { + Record *R = CGI->TheDef; + + if (R->getValueAsString("Namespace") == "TargetOpcode" || + R->getValueAsBit("isPseudo")) { + o << " "; emitInstBits(o, APInt(BitWidth, 0)); o << ",\n"; + continue; + } + + Record *EncodingDef = R; + if (const RecordVal *RV = R->getValue("EncodingInfos")) { + if (auto *DI = dyn_cast_or_null(RV->getValue())) { + EncodingInfoByHwMode EBM(DI->getDef(), HWM); + if (EBM.hasMode(HwMode)) + EncodingDef = EBM.get(HwMode); + } + } + BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst"); + + // Start by filling in fixed values. + APInt Value(BitWidth, 0); + for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) { + if (BitInit *B = dyn_cast(BI->getBit(e - i - 1))) + Value |= APInt(BitWidth, (uint64_t)B->getValue()) << (e - i - 1); + } + o << " "; + emitInstBits(o, Value); + o << "," << '\t' << "// " << R->getName() << "\n"; + } + o << " UINT64_C(0)\n };\n"; +} + void CodeEmitterGen::run(raw_ostream &o) { CodeGenTarget Target(Records); std::vector Insts = Records.getAllDerivedDefinitions("Instruction"); @@ -247,34 +396,66 @@ void CodeEmitterGen::run(raw_ostream &o) { ArrayRef NumberedInstructions = Target.getInstructionsByEnumValue(); - // Emit function declaration - o << "uint64_t " << Target.getName(); - o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" - << " SmallVectorImpl &Fixups,\n" - << " const MCSubtargetInfo &STI) const {\n"; - - // Emit instruction base values - o << " static const uint64_t InstBits[] = {\n"; + const CodeGenHwModes &HWM = Target.getHwModes(); + // The set of HwModes used by instruction encodings. + std::set HwModes; + BitWidth = 0; for (const CodeGenInstruction *CGI : NumberedInstructions) { Record *R = CGI->TheDef; - if (R->getValueAsString("Namespace") == "TargetOpcode" || - R->getValueAsBit("isPseudo")) { - o << " UINT64_C(0),\n"; + R->getValueAsBit("isPseudo")) continue; - } + if (const RecordVal *RV = R->getValue("EncodingInfos")) { + if (DefInit *DI = dyn_cast_or_null(RV->getValue())) { + EncodingInfoByHwMode EBM(DI->getDef(), HWM); + for (auto &KV : EBM.Map) { + BitsInit *BI = KV.second->getValueAsBitsInit("Inst"); + BitWidth = std::max(BitWidth, BI->getNumBits()); + HwModes.insert(KV.first); + } + continue; + } + } BitsInit *BI = R->getValueAsBitsInit("Inst"); - - // Start by filling in fixed values. - uint64_t Value = 0; - for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) { - if (BitInit *B = dyn_cast(BI->getBit(e-i-1))) - Value |= (uint64_t)B->getValue() << (e-i-1); - } - o << " UINT64_C(" << Value << ")," << '\t' << "// " << R->getName() << "\n"; + BitWidth = std::max(BitWidth, BI->getNumBits()); + } + UseAPInt = BitWidth > 64; + + // Emit function declaration + if (UseAPInt) { + o << "void " << Target.getName() + << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" + << " SmallVectorImpl &Fixups,\n" + << " APInt &Inst,\n" + << " APInt &Scratch,\n" + << " const MCSubtargetInfo &STI) const {\n"; + } else { + o << "uint64_t " << Target.getName(); + o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" + << " SmallVectorImpl &Fixups,\n" + << " const MCSubtargetInfo &STI) const {\n"; + } + + // Emit instruction base values + if (HwModes.empty()) { + emitInstructionBaseValues(o, NumberedInstructions, Target, -1); + } else { + for (unsigned HwMode : HwModes) + emitInstructionBaseValues(o, NumberedInstructions, Target, (int)HwMode); + } + + if (!HwModes.empty()) { + o << " const uint64_t *InstBits;\n"; + o << " unsigned HwMode = STI.getHwMode();\n"; + o << " switch (HwMode) {\n"; + o << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n"; + for (unsigned I : HwModes) { + o << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name + << "; break;\n"; + } + o << " };\n"; } - o << " UINT64_C(0)\n };\n"; // Map to accumulate all the cases. std::map> CaseMap; @@ -294,11 +475,26 @@ void CodeEmitterGen::run(raw_ostream &o) { } // Emit initial function code - o << " const unsigned opcode = MI.getOpcode();\n" - << " uint64_t Value = InstBits[opcode];\n" - << " uint64_t op = 0;\n" - << " (void)op; // suppress warning\n" - << " switch (opcode) {\n"; + if (UseAPInt) { + int NumWords = APInt::getNumWords(BitWidth); + int NumBytes = (BitWidth + 7) / 8; + o << " const unsigned opcode = MI.getOpcode();\n" + << " if (Inst.getBitWidth() != " << BitWidth << ")\n" + << " Inst = Inst.zext(" << BitWidth << ");\n" + << " if (Scratch.getBitWidth() != " << BitWidth << ")\n" + << " Scratch = Scratch.zext(" << BitWidth << ");\n" + << " LoadIntFromMemory(Inst, (uint8_t*)&InstBits[opcode * " << NumWords + << "], " << NumBytes << ");\n" + << " APInt &Value = Inst;\n" + << " APInt &op = Scratch;\n" + << " switch (opcode) {\n"; + } else { + o << " const unsigned opcode = MI.getOpcode();\n" + << " uint64_t Value = InstBits[opcode];\n" + << " uint64_t op = 0;\n" + << " (void)op; // suppress warning\n" + << " switch (opcode) {\n"; + } // Emit each case statement std::map>::iterator IE, EE; @@ -322,9 +518,12 @@ void CodeEmitterGen::run(raw_ostream &o) { << " raw_string_ostream Msg(msg);\n" << " Msg << \"Not supported instr: \" << MI;\n" << " report_fatal_error(Msg.str());\n" - << " }\n" - << " return Value;\n" - << "}\n\n"; + << " }\n"; + if (UseAPInt) + o << " Inst = Value;\n"; + else + o << " return Value;\n"; + o << "}\n\n"; const auto &All = SubtargetFeatureInfo::getAll(Records); std::map SubtargetFeatures; @@ -385,8 +584,8 @@ void CodeEmitterGen::run(raw_ostream &o) { o << " " << getNameForFeatureBitset(FeatureBitset) << ",\n"; } o << "};\n\n" - << "const static FeatureBitset FeatureBitsets[] {\n" - << " {}, // CEFBS_None\n"; + << "static constexpr FeatureBitset FeatureBitsets[] = {\n" + << " {}, // CEFBS_None\n"; for (const auto &FeatureBitset : FeatureBitsets) { if (FeatureBitset.empty()) continue; diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp index c8f710d66a0..46f986ca017 100644 --- a/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/utils/TableGen/CodeGenDAGPatterns.cpp @@ -769,7 +769,10 @@ void TypeInfer::expandOverloads(TypeSetByHwMode::SetType &Out, for (MVT T : MVT::integer_valuetypes()) if (Legal.count(T)) Out.insert(T); - for (MVT T : MVT::integer_vector_valuetypes()) + for (MVT T : MVT::integer_fixedlen_vector_valuetypes()) + if (Legal.count(T)) + Out.insert(T); + for (MVT T : MVT::integer_scalable_vector_valuetypes()) if (Legal.count(T)) Out.insert(T); return; @@ -777,7 +780,10 @@ void TypeInfer::expandOverloads(TypeSetByHwMode::SetType &Out, for (MVT T : MVT::fp_valuetypes()) if (Legal.count(T)) Out.insert(T); - for (MVT T : MVT::fp_vector_valuetypes()) + for (MVT T : MVT::fp_fixedlen_vector_valuetypes()) + if (Legal.count(T)) + Out.insert(T); + for (MVT T : MVT::fp_scalable_vector_valuetypes()) if (Legal.count(T)) Out.insert(T); return; @@ -883,7 +889,8 @@ std::string TreePredicateFn::getPredCode() const { if (isLoad()) { if (!isUnindexed() && !isNonExtLoad() && !isAnyExtLoad() && !isSignExtLoad() && !isZeroExtLoad() && getMemoryVT() == nullptr && - getScalarMemoryVT() == nullptr) + getScalarMemoryVT() == nullptr && getAddressSpaces() == nullptr && + getMinAlignment() < 1) PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), "IsLoad cannot be used by itself"); } else { @@ -903,7 +910,8 @@ std::string TreePredicateFn::getPredCode() const { if (isStore()) { if (!isUnindexed() && !isTruncStore() && !isNonTruncStore() && - getMemoryVT() == nullptr && getScalarMemoryVT() == nullptr) + getMemoryVT() == nullptr && getScalarMemoryVT() == nullptr && + getAddressSpaces() == nullptr && getMinAlignment() < 1) PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), "IsStore cannot be used by itself"); } else { @@ -917,6 +925,7 @@ std::string TreePredicateFn::getPredCode() const { if (isAtomic()) { if (getMemoryVT() == nullptr && !isAtomicOrderingMonotonic() && + getAddressSpaces() == nullptr && !isAtomicOrderingAcquire() && !isAtomicOrderingRelease() && !isAtomicOrderingAcquireRelease() && !isAtomicOrderingSequentiallyConsistent() && @@ -977,6 +986,13 @@ std::string TreePredicateFn::getPredCode() const { Code += ")\nreturn false;\n"; } + int64_t MinAlign = getMinAlignment(); + if (MinAlign > 0) { + Code += "if (cast(N)->getAlignment() < "; + Code += utostr(MinAlign); + Code += ")\nreturn false;\n"; + } + Record *MemoryVT = getMemoryVT(); if (MemoryVT) @@ -1177,6 +1193,13 @@ ListInit *TreePredicateFn::getAddressSpaces() const { return R->getValueAsListInit("AddressSpaces"); } +int64_t TreePredicateFn::getMinAlignment() const { + Record *R = getOrigPatFragRecord()->getRecord(); + if (R->isValueUnset("MinAlignment")) + return 0; + return R->getValueAsInt("MinAlignment"); +} + Record *TreePredicateFn::getScalarMemoryVT() const { Record *R = getOrigPatFragRecord()->getRecord(); if (R->isValueUnset("ScalarMemoryVT")) @@ -1373,9 +1396,11 @@ getPatternComplexity(const CodeGenDAGPatterns &CGP) const { /// std::string PatternToMatch::getPredicateCheck() const { SmallVector PredList; - for (const Predicate &P : Predicates) - PredList.push_back(&P); - llvm::sort(PredList, deref()); + for (const Predicate &P : Predicates) { + if (!P.getCondString().empty()) + PredList.push_back(&P); + } + llvm::sort(PredList, deref>()); std::string Check; for (unsigned i = 0, e = PredList.size(); i != e; ++i) { @@ -2772,6 +2797,7 @@ TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit, if (Operator->isSubClassOf("SDNode") && Operator->getName() != "imm" && + Operator->getName() != "timm" && Operator->getName() != "fpimm" && Operator->getName() != "tglobaltlsaddr" && Operator->getName() != "tconstpool" && @@ -3083,7 +3109,7 @@ void CodeGenDAGPatterns::ParsePatternFragments(bool OutFrags) { ListInit *LI = Frag->getValueAsListInit("Fragments"); TreePattern *P = - (PatternFragments[Frag] = llvm::make_unique( + (PatternFragments[Frag] = std::make_unique( Frag, LI, !Frag->isSubClassOf("OutPatFrag"), *this)).get(); diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h index 2b49a64c3f1..80fc932a7a5 100644 --- a/utils/TableGen/CodeGenDAGPatterns.h +++ b/utils/TableGen/CodeGenDAGPatterns.h @@ -594,6 +594,7 @@ public: Record *getScalarMemoryVT() const; ListInit *getAddressSpaces() const; + int64_t getMinAlignment() const; // If true, indicates that GlobalISel-based C++ code was supplied. bool hasGISelPredicateCode() const; @@ -1075,8 +1076,11 @@ public: std::string C = IsHwMode ? std::string("MF->getSubtarget().checkFeatures(\"" + Features + "\")") : std::string(Def->getValueAsString("CondString")); + if (C.empty()) + return ""; return IfCond ? C : "!("+C+')'; } + bool operator==(const Predicate &P) const { return IfCond == P.IfCond && IsHwMode == P.IsHwMode && Def == P.Def; } diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp index 2463824469a..fde946d0658 100644 --- a/utils/TableGen/CodeGenInstruction.cpp +++ b/utils/TableGen/CodeGenInstruction.cpp @@ -363,6 +363,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R) Namespace = R->getValueAsString("Namespace"); AsmString = R->getValueAsString("AsmString"); + isPreISelOpcode = R->getValueAsBit("isPreISelOpcode"); isReturn = R->getValueAsBit("isReturn"); isEHScopeReturn = R->getValueAsBit("isEHScopeReturn"); isBranch = R->getValueAsBit("isBranch"); diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h index bb5b1369649..2cb28425df7 100644 --- a/utils/TableGen/CodeGenInstruction.h +++ b/utils/TableGen/CodeGenInstruction.h @@ -231,6 +231,7 @@ template class ArrayRef; std::vector ImplicitDefs, ImplicitUses; // Various boolean values we track for the instruction. + bool isPreISelOpcode : 1; bool isReturn : 1; bool isEHScopeReturn : 1; bool isBranch : 1; diff --git a/utils/TableGen/CodeGenIntrinsics.h b/utils/TableGen/CodeGenIntrinsics.h index 7b74bb07d6e..83e780671b4 100644 --- a/utils/TableGen/CodeGenIntrinsics.h +++ b/utils/TableGen/CodeGenIntrinsics.h @@ -141,6 +141,7 @@ struct CodeGenIntrinsic { enum ArgAttribute { NoCapture, + NoAlias, Returned, ReadOnly, WriteOnly, @@ -154,6 +155,13 @@ struct CodeGenIntrinsic { return Properties & (1 << Prop); } + /// Returns true if the parameter at \p ParamIdx is a pointer type. Returns + /// false if the parameter is not a pointer, or \p ParamIdx is greater than + /// the size of \p IS.ParamVTs. + /// + /// Note that this requires that \p IS.ParamVTs is available. + bool isParamAPointer(unsigned ParamIdx) const; + CodeGenIntrinsic(Record *R); }; diff --git a/utils/TableGen/CodeGenMapTable.cpp b/utils/TableGen/CodeGenMapTable.cpp index b1774b01ba8..793bb61481e 100644 --- a/utils/TableGen/CodeGenMapTable.cpp +++ b/utils/TableGen/CodeGenMapTable.cpp @@ -132,7 +132,7 @@ public: MapRec->getName() + "' has empty " + "`ValueCols' field!"); for (Init *I : ColValList->getValues()) { - ListInit *ColI = dyn_cast(I); + auto *ColI = cast(I); // Make sure that all the sub-lists in 'ValueCols' have same number of // elements as the fields in 'ColFields'. @@ -168,7 +168,7 @@ public: return ValueCols; } }; -} // End anonymous namespace. +} // end anonymous namespace //===----------------------------------------------------------------------===// @@ -226,7 +226,7 @@ public: void emitMapFuncBody(raw_ostream &OS, unsigned TableSize); }; -} // End anonymous namespace. +} // end anonymous namespace //===----------------------------------------------------------------------===// @@ -521,7 +521,7 @@ static void emitEnums(raw_ostream &OS, RecordKeeper &Records) { unsigned ListSize = List->size(); for (unsigned j = 0; j < ListSize; j++) { - ListInit *ListJ = dyn_cast(List->getElement(j)); + auto *ListJ = cast(List->getElement(j)); if (ListJ->size() != ColFields->size()) PrintFatalError("Record `" + CurMap->getName() + "', field " @@ -604,8 +604,8 @@ void EmitMapTable(RecordKeeper &Records, raw_ostream &OS) { // Emit map tables and the functions to query them. IMap.emitTablesWithFunc(OS); } - OS << "} // End " << NameSpace << " namespace\n"; - OS << "} // End llvm namespace\n"; + OS << "} // end namespace " << NameSpace << "\n"; + OS << "} // end namespace llvm\n"; OS << "#endif // GET_INSTRMAP_INFO\n\n"; } diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp index f87c6d6c945..6153c759b12 100644 --- a/utils/TableGen/CodeGenRegisters.cpp +++ b/utils/TableGen/CodeGenRegisters.cpp @@ -639,7 +639,8 @@ struct TupleExpander : SetTheory::Expander { // Precompute some types. Record *RegisterCl = Def->getRecords().getClass("Register"); RecTy *RegisterRecTy = RecordRecTy::get(RegisterCl); - StringInit *BlankName = StringInit::get(""); + std::vector RegNames = + Def->getValueAsListOfStrings("RegAsmNames"); // Zip them up. for (unsigned n = 0; n != Length; ++n) { @@ -656,11 +657,20 @@ struct TupleExpander : SetTheory::Expander { unsigned(Reg->getValueAsInt("CostPerUse"))); } + StringInit *AsmName = StringInit::get(""); + if (!RegNames.empty()) { + if (RegNames.size() <= n) + PrintFatalError(Def->getLoc(), + "Register tuple definition missing name for '" + + Name + "'."); + AsmName = StringInit::get(RegNames[n]); + } + // Create a new Record representing the synthesized register. This record // is only for consumption by CodeGenRegister, it is not added to the // RecordKeeper. SynthDefs.emplace_back( - llvm::make_unique(Name, Def->getLoc(), Def->getRecords())); + std::make_unique(Name, Def->getLoc(), Def->getRecords())); Record *NewReg = SynthDefs.back().get(); Elts.insert(NewReg); @@ -683,9 +693,8 @@ struct TupleExpander : SetTheory::Expander { if (Field == "SubRegs") RV.setValue(ListInit::get(Tuple, RegisterRecTy)); - // Provide a blank AsmName. MC hacks are required anyway. if (Field == "AsmName") - RV.setValue(BlankName); + RV.setValue(AsmName); // CostPerUse is aggregated from all Tuple members. if (Field == "CostPerUse") @@ -725,8 +734,8 @@ struct TupleExpander : SetTheory::Expander { //===----------------------------------------------------------------------===// static void sortAndUniqueRegisters(CodeGenRegister::Vec &M) { - llvm::sort(M, deref()); - M.erase(std::unique(M.begin(), M.end(), deref()), M.end()); + llvm::sort(M, deref>()); + M.erase(std::unique(M.begin(), M.end(), deref>()), M.end()); } CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R) @@ -851,7 +860,7 @@ void CodeGenRegisterClass::inheritProperties(CodeGenRegBank &RegBank) { bool CodeGenRegisterClass::contains(const CodeGenRegister *Reg) const { return std::binary_search(Members.begin(), Members.end(), Reg, - deref()); + deref>()); } namespace llvm { @@ -887,7 +896,7 @@ static bool testSubClass(const CodeGenRegisterClass *A, return A->RSI.isSubClassOf(B->RSI) && std::includes(A->getMembers().begin(), A->getMembers().end(), B->getMembers().begin(), B->getMembers().end(), - deref()); + deref>()); } /// Sorting predicate for register classes. This provides a topological @@ -1089,7 +1098,7 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records, Sets.addFieldExpander("RegisterClass", "MemberList"); Sets.addFieldExpander("CalleeSavedRegs", "SaveList"); Sets.addExpander("RegisterTuples", - llvm::make_unique(SynthDefs)); + std::make_unique(SynthDefs)); // Read in the user-defined (named) sub-register indices. // More indices will be synthesized later. @@ -2131,9 +2140,10 @@ void CodeGenRegBank::inferCommonSubClass(CodeGenRegisterClass *RC) { const CodeGenRegister::Vec &Memb1 = RC1->getMembers(); const CodeGenRegister::Vec &Memb2 = RC2->getMembers(); CodeGenRegister::Vec Intersection; - std::set_intersection( - Memb1.begin(), Memb1.end(), Memb2.begin(), Memb2.end(), - std::inserter(Intersection, Intersection.begin()), deref()); + std::set_intersection(Memb1.begin(), Memb1.end(), Memb2.begin(), + Memb2.end(), + std::inserter(Intersection, Intersection.begin()), + deref>()); // Skip disjoint class pairs. if (Intersection.empty()) @@ -2158,7 +2168,8 @@ void CodeGenRegBank::inferCommonSubClass(CodeGenRegisterClass *RC) { void CodeGenRegBank::inferSubClassWithSubReg(CodeGenRegisterClass *RC) { // Map SubRegIndex to set of registers in RC supporting that SubRegIndex. typedef std::map> SubReg2SetMap; + deref>> + SubReg2SetMap; // Compute the set of registers supporting each SubRegIndex. SubReg2SetMap SRSets; @@ -2357,6 +2368,21 @@ CodeGenRegBank::getRegClassForRegister(Record *R) { return FoundRC; } +const CodeGenRegisterClass * +CodeGenRegBank::getMinimalPhysRegClass(Record *RegRecord, + ValueTypeByHwMode *VT) { + const CodeGenRegister *Reg = getReg(RegRecord); + const CodeGenRegisterClass *BestRC = nullptr; + for (const auto &RC : getRegClasses()) { + if ((!VT || RC.hasType(*VT)) && + RC.contains(Reg) && (!BestRC || BestRC->hasSubClass(&RC))) + BestRC = &RC; + } + + assert(BestRC && "Couldn't find the register class"); + return BestRC; +} + BitVector CodeGenRegBank::computeCoveredRegisters(ArrayRef Regs) { SetVector Set; diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h index f04a90f8fde..6d933baec2a 100644 --- a/utils/TableGen/CodeGenRegisters.h +++ b/utils/TableGen/CodeGenRegisters.h @@ -93,7 +93,8 @@ namespace llvm { // Map of composite subreg indices. typedef std::map> CompMap; + deref>> + CompMap; // Returns the subreg index that results from composing this with Idx. // Returns NULL if this and Idx don't compose. @@ -137,15 +138,14 @@ namespace llvm { /// list of subregisters they are composed of (if any). Do this recursively. void computeConcatTransitiveClosure(); + bool operator<(const CodeGenSubRegIndex &RHS) const { + return this->EnumValue < RHS.EnumValue; + } + private: CompMap Composed; }; - inline bool operator<(const CodeGenSubRegIndex &A, - const CodeGenSubRegIndex &B) { - return A.EnumValue < B.EnumValue; - } - /// CodeGenRegister - Represents a register definition. struct CodeGenRegister { Record *TheDef; @@ -156,7 +156,8 @@ namespace llvm { bool Artificial; // Map SubRegIndex -> Register. - typedef std::map> + typedef std::map>> SubRegMap; CodeGenRegister(Record *R, unsigned Enum); @@ -347,6 +348,10 @@ namespace llvm { ArrayRef getValueTypes() const { return VTs; } unsigned getNumValueTypes() const { return VTs.size(); } + bool hasType(const ValueTypeByHwMode &VT) const { + return std::find(VTs.begin(), VTs.end(), VT) != VTs.end(); + } + const ValueTypeByHwMode &getValueTypeNum(unsigned VTNum) const { if (VTNum < VTs.size()) return VTs[VTNum]; @@ -708,6 +713,13 @@ namespace llvm { /// return the superclass. Otherwise return null. const CodeGenRegisterClass* getRegClassForRegister(Record *R); + // Analog of TargetRegisterInfo::getMinimalPhysRegClass. Unlike + // getRegClassForRegister, this tries to find the smallest class containing + // the physical register. If \p VT is specified, it will only find classes + // with a matching type + const CodeGenRegisterClass * + getMinimalPhysRegClass(Record *RegRecord, ValueTypeByHwMode *VT = nullptr); + // Get the sum of unit weights. unsigned getRegUnitSetWeight(const std::vector &Units) const { unsigned Weight = 0; diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp index fd007044a16..f12d7d484a8 100644 --- a/utils/TableGen/CodeGenSchedule.cpp +++ b/utils/TableGen/CodeGenSchedule.cpp @@ -172,8 +172,8 @@ CodeGenSchedModels::CodeGenSchedModels(RecordKeeper &RK, // Allow Set evaluation to recognize the dags used in InstRW records: // (instrs Op1, Op1...) - Sets.addOperator("instrs", llvm::make_unique()); - Sets.addOperator("instregex", llvm::make_unique(Target)); + Sets.addOperator("instrs", std::make_unique()); + Sets.addOperator("instregex", std::make_unique(Target)); // Instantiate a CodeGenProcModel for each SchedMachineModel with the values // that are explicitly referenced in tablegen records. Resources associated @@ -1083,9 +1083,13 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) { if (RWD->getValueAsDef("SchedModel") == RWModelDef && RWModelDef->getValueAsBit("FullInstRWOverlapCheck")) { for (Record *Inst : InstDefs) { - PrintFatalError(InstRWDef->getLoc(), "Overlapping InstRW def " + - Inst->getName() + " also matches " + - RWD->getValue("Instrs")->getValue()->getAsString()); + PrintFatalError + (InstRWDef->getLoc(), + "Overlapping InstRW definition for \"" + + Inst->getName() + + "\" also matches previous \"" + + RWD->getValue("Instrs")->getValue()->getAsString() + + "\"."); } } } @@ -1115,9 +1119,13 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) { for (Record *OldRWDef : SchedClasses[OldSCIdx].InstRWs) { if (OldRWDef->getValueAsDef("SchedModel") == RWModelDef) { for (Record *InstDef : InstDefs) { - PrintFatalError(OldRWDef->getLoc(), "Overlapping InstRW def " + - InstDef->getName() + " also matches " + - OldRWDef->getValue("Instrs")->getValue()->getAsString()); + PrintFatalError + (InstRWDef->getLoc(), + "Overlapping InstRW definition for \"" + + InstDef->getName() + + "\" also matches previous \"" + + OldRWDef->getValue("Instrs")->getValue()->getAsString() + + "\"."); } } assert(OldRWDef != InstRWDef && diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp index b65e1b6af79..fa8b842c97f 100644 --- a/utils/TableGen/CodeGenTarget.cpp +++ b/utils/TableGen/CodeGenTarget.cpp @@ -98,6 +98,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) { case MVT::v256i8: return "MVT::v256i8"; case MVT::v1i16: return "MVT::v1i16"; case MVT::v2i16: return "MVT::v2i16"; + case MVT::v3i16: return "MVT::v3i16"; case MVT::v4i16: return "MVT::v4i16"; case MVT::v8i16: return "MVT::v8i16"; case MVT::v16i16: return "MVT::v16i16"; @@ -126,8 +127,11 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) { case MVT::v32i64: return "MVT::v32i64"; case MVT::v1i128: return "MVT::v1i128"; case MVT::v2f16: return "MVT::v2f16"; + case MVT::v3f16: return "MVT::v3f16"; case MVT::v4f16: return "MVT::v4f16"; case MVT::v8f16: return "MVT::v8f16"; + case MVT::v16f16: return "MVT::v16f16"; + case MVT::v32f16: return "MVT::v32f16"; case MVT::v1f32: return "MVT::v1f32"; case MVT::v2f32: return "MVT::v2f32"; case MVT::v3f32: return "MVT::v3f32"; @@ -289,10 +293,57 @@ Record *CodeGenTarget::getAsmWriter() const { CodeGenRegBank &CodeGenTarget::getRegBank() const { if (!RegBank) - RegBank = llvm::make_unique(Records, getHwModes()); + RegBank = std::make_unique(Records, getHwModes()); return *RegBank; } +Optional +CodeGenTarget::getSuperRegForSubReg(const ValueTypeByHwMode &ValueTy, + CodeGenRegBank &RegBank, + const CodeGenSubRegIndex *SubIdx) const { + std::vector Candidates; + auto &RegClasses = RegBank.getRegClasses(); + + // Try to find a register class which supports ValueTy, and also contains + // SubIdx. + for (CodeGenRegisterClass &RC : RegClasses) { + // Is there a subclass of this class which contains this subregister index? + CodeGenRegisterClass *SubClassWithSubReg = RC.getSubClassWithSubReg(SubIdx); + if (!SubClassWithSubReg) + continue; + + // We have a class. Check if it supports this value type. + if (llvm::none_of(SubClassWithSubReg->VTs, + [&ValueTy](const ValueTypeByHwMode &ClassVT) { + return ClassVT == ValueTy; + })) + continue; + + // We have a register class which supports both the value type and + // subregister index. Remember it. + Candidates.push_back(SubClassWithSubReg); + } + + // If we didn't find anything, we're done. + if (Candidates.empty()) + return None; + + // Find and return the largest of our candidate classes. + llvm::stable_sort(Candidates, [&](const CodeGenRegisterClass *A, + const CodeGenRegisterClass *B) { + if (A->getMembers().size() > B->getMembers().size()) + return true; + + if (A->getMembers().size() < B->getMembers().size()) + return false; + + // Order by name as a tie-breaker. + return StringRef(A->getName()) < B->getName(); + }); + + return Candidates[0]; +} + void CodeGenTarget::ReadRegAltNameIndices() const { RegAltNameIndices = Records.getAllDerivedDefinitions("RegAltNameIndex"); llvm::sort(RegAltNameIndices, LessRecord()); @@ -339,7 +390,7 @@ void CodeGenTarget::ReadLegalValueTypes() const { CodeGenSchedModels &CodeGenTarget::getSchedModels() const { if (!SchedModels) - SchedModels = llvm::make_unique(Records, *this); + SchedModels = std::make_unique(Records, *this); return *SchedModels; } @@ -352,7 +403,7 @@ void CodeGenTarget::ReadInstructions() const { // Parse the instructions defined in the .td file. for (unsigned i = 0, e = Insts.size(); i != e; ++i) - Instructions[Insts[i]] = llvm::make_unique(Insts[i]); + Instructions[Insts[i]] = std::make_unique(Insts[i]); } static const CodeGenInstruction * @@ -427,7 +478,8 @@ void CodeGenTarget::reverseBitsForLittleEndianEncoding() { if (!isLittleEndianEncoding()) return; - std::vector Insts = Records.getAllDerivedDefinitions("Instruction"); + std::vector Insts = + Records.getAllDerivedDefinitions("InstructionEncoding"); for (Record *R : Insts) { if (R->getValueAsString("Namespace") == "TargetOpcode" || R->getValueAsBit("isPseudo")) @@ -733,6 +785,9 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) { else if (Property->isSubClassOf("NoCapture")) { unsigned ArgNo = Property->getValueAsInt("ArgNo"); ArgumentAttributes.push_back(std::make_pair(ArgNo, NoCapture)); + } else if (Property->isSubClassOf("NoAlias")) { + unsigned ArgNo = Property->getValueAsInt("ArgNo"); + ArgumentAttributes.push_back(std::make_pair(ArgNo, NoAlias)); } else if (Property->isSubClassOf("Returned")) { unsigned ArgNo = Property->getValueAsInt("ArgNo"); ArgumentAttributes.push_back(std::make_pair(ArgNo, Returned)); @@ -758,3 +813,10 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) { // Sort the argument attributes for later benefit. llvm::sort(ArgumentAttributes); } + +bool CodeGenIntrinsic::isParamAPointer(unsigned ParamIdx) const { + if (ParamIdx >= IS.ParamVTs.size()) + return false; + MVT ParamType = MVT(IS.ParamVTs[ParamIdx]); + return ParamType == MVT::iPTR || ParamType == MVT::iPTRAny; +} diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h index 1ab2de269c7..d52ffac4ce6 100644 --- a/utils/TableGen/CodeGenTarget.h +++ b/utils/TableGen/CodeGenTarget.h @@ -103,6 +103,12 @@ public: /// getRegBank - Return the register bank description. CodeGenRegBank &getRegBank() const; + /// Return the largest register class on \p RegBank which supports \p Ty and + /// covers \p SubIdx if it exists. + Optional + getSuperRegForSubReg(const ValueTypeByHwMode &Ty, CodeGenRegBank &RegBank, + const CodeGenSubRegIndex *SubIdx) const; + /// getRegisterByName - If there is a register with the specific AsmName, /// return it. const CodeGenRegister *getRegisterByName(StringRef Name) const; diff --git a/utils/TableGen/DAGISelEmitter.cpp b/utils/TableGen/DAGISelEmitter.cpp index fb0c6faa529..d8e78ce55c7 100644 --- a/utils/TableGen/DAGISelEmitter.cpp +++ b/utils/TableGen/DAGISelEmitter.cpp @@ -173,7 +173,7 @@ void DAGISelEmitter::run(raw_ostream &OS) { } std::unique_ptr TheMatcher = - llvm::make_unique(PatternMatchers); + std::make_unique(PatternMatchers); OptimizeMatcher(TheMatcher, CGP); //Matcher->dump(); diff --git a/utils/TableGen/DAGISelMatcher.h b/utils/TableGen/DAGISelMatcher.h index 0a782e84a37..223513fc8d3 100644 --- a/utils/TableGen/DAGISelMatcher.h +++ b/utils/TableGen/DAGISelMatcher.h @@ -932,13 +932,15 @@ private: /// class EmitCopyToRegMatcher : public Matcher { unsigned SrcSlot; // Value to copy into the physreg. - Record *DestPhysReg; + const CodeGenRegister *DestPhysReg; + public: - EmitCopyToRegMatcher(unsigned srcSlot, Record *destPhysReg) + EmitCopyToRegMatcher(unsigned srcSlot, + const CodeGenRegister *destPhysReg) : Matcher(EmitCopyToReg), SrcSlot(srcSlot), DestPhysReg(destPhysReg) {} unsigned getSrcSlot() const { return SrcSlot; } - Record *getDestPhysReg() const { return DestPhysReg; } + const CodeGenRegister *getDestPhysReg() const { return DestPhysReg; } static bool classof(const Matcher *N) { return N->getKind() == EmitCopyToReg; diff --git a/utils/TableGen/DAGISelMatcherEmitter.cpp b/utils/TableGen/DAGISelMatcherEmitter.cpp index cecbc6cccdf..e9f1fb93d51 100644 --- a/utils/TableGen/DAGISelMatcherEmitter.cpp +++ b/utils/TableGen/DAGISelMatcherEmitter.cpp @@ -670,12 +670,22 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx, OS << '\n'; return 2+MN->getNumNodes(); } - case Matcher::EmitCopyToReg: - OS << "OPC_EmitCopyToReg, " - << cast(N)->getSrcSlot() << ", " - << getQualifiedName(cast(N)->getDestPhysReg()) - << ",\n"; - return 3; + case Matcher::EmitCopyToReg: { + const auto *C2RMatcher = cast(N); + int Bytes = 3; + const CodeGenRegister *Reg = C2RMatcher->getDestPhysReg(); + if (Reg->EnumValue > 255) { + assert(isUInt<16>(Reg->EnumValue) && "not handled"); + OS << "OPC_EmitCopyToReg2, " << C2RMatcher->getSrcSlot() << ", " + << "TARGET_VAL(" << getQualifiedName(Reg->TheDef) << "),\n"; + ++Bytes; + } else { + OS << "OPC_EmitCopyToReg, " << C2RMatcher->getSrcSlot() << ", " + << getQualifiedName(Reg->TheDef) << ",\n"; + } + + return Bytes; + } case Matcher::EmitNodeXForm: { const EmitNodeXFormMatcher *XF = cast(N); OS << "OPC_EmitNodeXForm, " << getNodeXFormID(XF->getNodeXForm()) << ", " diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp index 8f54beeba65..49c09c7d195 100644 --- a/utils/TableGen/DAGISelMatcherGen.cpp +++ b/utils/TableGen/DAGISelMatcherGen.cpp @@ -141,7 +141,7 @@ namespace { SmallVectorImpl &ResultOps); }; -} // end anon namespace. +} // end anonymous namespace MatcherGen::MatcherGen(const PatternToMatch &pattern, const CodeGenDAGPatterns &cgp) @@ -867,9 +867,13 @@ EmitResultInstructionAsOperand(const TreePatternNode *N, if (isRoot && !PhysRegInputs.empty()) { // Emit all of the CopyToReg nodes for the input physical registers. These // occur in patterns like (mul:i8 AL:i8, GR8:i8:$src). - for (unsigned i = 0, e = PhysRegInputs.size(); i != e; ++i) + for (unsigned i = 0, e = PhysRegInputs.size(); i != e; ++i) { + const CodeGenRegister *Reg = + CGP.getTargetInfo().getRegBank().getReg(PhysRegInputs[i].first); AddMatcher(new EmitCopyToRegMatcher(PhysRegInputs[i].second, - PhysRegInputs[i].first)); + Reg)); + } + // Even if the node has no other glue inputs, the resultant node must be // glued to the CopyFromReg nodes we just generated. TreeHasInGlue = true; diff --git a/utils/TableGen/DAGISelMatcherOpt.cpp b/utils/TableGen/DAGISelMatcherOpt.cpp index 7d51b076937..6746fdd676a 100644 --- a/utils/TableGen/DAGISelMatcherOpt.cpp +++ b/utils/TableGen/DAGISelMatcherOpt.cpp @@ -409,13 +409,14 @@ static void FactorNodes(std::unique_ptr &InputMatcherPtr) { DenseMap TypeEntry; SmallVector, 8> Cases; for (unsigned i = 0, e = NewOptionsToMatch.size(); i != e; ++i) { - CheckTypeMatcher *CTM = - cast_or_null(FindNodeWithKind(NewOptionsToMatch[i], - Matcher::CheckType)); + Matcher* M = FindNodeWithKind(NewOptionsToMatch[i], Matcher::CheckType); + assert(M && isa(M) && "Unknown Matcher type"); + + auto *CTM = cast(M); Matcher *MatcherWithoutCTM = NewOptionsToMatch[i]->unlinkNode(CTM); MVT::SimpleValueType CTMTy = CTM->getType(); delete CTM; - + unsigned &Entry = TypeEntry[CTMTy]; if (Entry != 0) { // If we have unfactored duplicate types, then we should factor them. diff --git a/utils/TableGen/DFAEmitter.cpp b/utils/TableGen/DFAEmitter.cpp new file mode 100644 index 00000000000..dd3db7c150b --- /dev/null +++ b/utils/TableGen/DFAEmitter.cpp @@ -0,0 +1,394 @@ +//===- DFAEmitter.cpp - Finite state automaton emitter --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class can produce a generic deterministic finite state automaton (DFA), +// given a set of possible states and transitions. +// +// The input transitions can be nondeterministic - this class will produce the +// deterministic equivalent state machine. +// +// The generated code can run the DFA and produce an accepted / not accepted +// state and also produce, given a sequence of transitions that results in an +// accepted state, the sequence of intermediate states. This is useful if the +// initial automaton was nondeterministic - it allows mapping back from the DFA +// to the NFA. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "dfa-emitter" + +#include "DFAEmitter.h" +#include "CodeGenTarget.h" +#include "SequenceToOffsetTable.h" +#include "TableGenBackends.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/UniqueVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// DfaEmitter implementation. This is independent of the GenAutomaton backend. +//===----------------------------------------------------------------------===// + +void DfaEmitter::addTransition(state_type From, state_type To, action_type A) { + Actions.insert(A); + NfaStates.insert(From); + NfaStates.insert(To); + NfaTransitions[{From, A}].push_back(To); + ++NumNfaTransitions; +} + +void DfaEmitter::visitDfaState(DfaState DS) { + // For every possible action... + auto FromId = DfaStates.idFor(DS); + for (action_type A : Actions) { + DfaState NewStates; + DfaTransitionInfo TI; + // For every represented state, word pair in the original NFA... + for (state_type &FromState : DS) { + // If this action is possible from this state add the transitioned-to + // states to NewStates. + auto I = NfaTransitions.find({FromState, A}); + if (I == NfaTransitions.end()) + continue; + for (state_type &ToState : I->second) { + NewStates.push_back(ToState); + TI.emplace_back(FromState, ToState); + } + } + if (NewStates.empty()) + continue; + // Sort and unique. + sort(NewStates); + NewStates.erase(std::unique(NewStates.begin(), NewStates.end()), + NewStates.end()); + sort(TI); + TI.erase(std::unique(TI.begin(), TI.end()), TI.end()); + unsigned ToId = DfaStates.insert(NewStates); + DfaTransitions.emplace(std::make_pair(FromId, A), std::make_pair(ToId, TI)); + } +} + +void DfaEmitter::constructDfa() { + DfaState Initial(1, /*NFA initial state=*/0); + DfaStates.insert(Initial); + + // Note that UniqueVector starts indices at 1, not zero. + unsigned DfaStateId = 1; + while (DfaStateId <= DfaStates.size()) + visitDfaState(DfaStates[DfaStateId++]); +} + +void DfaEmitter::emit(StringRef Name, raw_ostream &OS) { + constructDfa(); + + OS << "// Input NFA has " << NfaStates.size() << " states with " + << NumNfaTransitions << " transitions.\n"; + OS << "// Generated DFA has " << DfaStates.size() << " states with " + << DfaTransitions.size() << " transitions.\n\n"; + + // Implementation note: We don't bake a simple std::pair<> here as it requires + // significantly more effort to parse. A simple test with a large array of + // struct-pairs (N=100000) took clang-10 6s to parse. The same array of + // std::pair took 242s. Instead we allow the user to + // define the pair type. + // + // FIXME: It may make sense to emit these as ULEB sequences instead of + // pairs of uint64_t. + OS << "// A zero-terminated sequence of NFA state transitions. Every DFA\n"; + OS << "// transition implies a set of NFA transitions. These are referred\n"; + OS << "// to by index in " << Name << "Transitions[].\n"; + + SequenceToOffsetTable Table; + std::map EmittedIndices; + for (auto &T : DfaTransitions) + Table.add(T.second.second); + Table.layout(); + OS << "std::array " << Name + << "TransitionInfo = {{\n"; + Table.emit( + OS, + [](raw_ostream &OS, std::pair P) { + OS << "{" << P.first << ", " << P.second << "}"; + }, + "{0ULL, 0ULL}"); + + OS << "}};\n\n"; + + OS << "// A transition in the generated " << Name << " DFA.\n"; + OS << "struct " << Name << "Transition {\n"; + OS << " unsigned FromDfaState; // The transitioned-from DFA state.\n"; + OS << " "; + printActionType(OS); + OS << " Action; // The input symbol that causes this transition.\n"; + OS << " unsigned ToDfaState; // The transitioned-to DFA state.\n"; + OS << " unsigned InfoIdx; // Start index into " << Name + << "TransitionInfo.\n"; + OS << "};\n\n"; + + OS << "// A table of DFA transitions, ordered by {FromDfaState, Action}.\n"; + OS << "// The initial state is 1, not zero.\n"; + OS << "std::array<" << Name << "Transition, " << DfaTransitions.size() << "> " + << Name << "Transitions = {{\n"; + for (auto &KV : DfaTransitions) { + dfa_state_type From = KV.first.first; + dfa_state_type To = KV.second.first; + action_type A = KV.first.second; + unsigned InfoIdx = Table.get(KV.second.second); + OS << " {" << From << ", "; + printActionValue(A, OS); + OS << ", " << To << ", " << InfoIdx << "},\n"; + } + OS << "\n}};\n\n"; +} + +void DfaEmitter::printActionType(raw_ostream &OS) { OS << "uint64_t"; } + +void DfaEmitter::printActionValue(action_type A, raw_ostream &OS) { OS << A; } + +//===----------------------------------------------------------------------===// +// AutomatonEmitter implementation +//===----------------------------------------------------------------------===// + +namespace { +// FIXME: This entire discriminated union could be removed with c++17: +// using Action = std::variant; +struct Action { + Record *R = nullptr; + unsigned I = 0; + std::string S = nullptr; + + Action() = default; + Action(Record *R, unsigned I, std::string S) : R(R), I(I), S(S) {} + + void print(raw_ostream &OS) const { + if (R) + OS << R->getName(); + else if (!S.empty()) + OS << '"' << S << '"'; + else + OS << I; + } + bool operator<(const Action &Other) const { + return std::make_tuple(R, I, S) < + std::make_tuple(Other.R, Other.I, Other.S); + } +}; + +using ActionTuple = std::vector; +class Automaton; + +class Transition { + uint64_t NewState; + // The tuple of actions that causes this transition. + ActionTuple Actions; + // The types of the actions; this is the same across all transitions. + SmallVector Types; + +public: + Transition(Record *R, Automaton *Parent); + const ActionTuple &getActions() { return Actions; } + SmallVector getTypes() { return Types; } + + bool canTransitionFrom(uint64_t State); + uint64_t transitionFrom(uint64_t State); +}; + +class Automaton { + RecordKeeper &Records; + Record *R; + std::vector Transitions; + /// All possible action tuples, uniqued. + UniqueVector Actions; + /// The fields within each Transition object to find the action symbols. + std::vector ActionSymbolFields; + +public: + Automaton(RecordKeeper &Records, Record *R); + void emit(raw_ostream &OS); + + ArrayRef getActionSymbolFields() { return ActionSymbolFields; } + /// If the type of action A has been overridden (there exists a field + /// "TypeOf_A") return that, otherwise return the empty string. + StringRef getActionSymbolType(StringRef A); +}; + +class AutomatonEmitter { + RecordKeeper &Records; + +public: + AutomatonEmitter(RecordKeeper &R) : Records(R) {} + void run(raw_ostream &OS); +}; + +/// A DfaEmitter implementation that can print our variant action type. +class CustomDfaEmitter : public DfaEmitter { + const UniqueVector &Actions; + std::string TypeName; + +public: + CustomDfaEmitter(const UniqueVector &Actions, StringRef TypeName) + : Actions(Actions), TypeName(TypeName) {} + + void printActionType(raw_ostream &OS) override; + void printActionValue(action_type A, raw_ostream &OS) override; +}; +} // namespace + +void AutomatonEmitter::run(raw_ostream &OS) { + for (Record *R : Records.getAllDerivedDefinitions("GenericAutomaton")) { + Automaton A(Records, R); + OS << "#ifdef GET_" << R->getName() << "_DECL\n"; + A.emit(OS); + OS << "#endif // GET_" << R->getName() << "_DECL\n"; + } +} + +Automaton::Automaton(RecordKeeper &Records, Record *R) + : Records(Records), R(R) { + LLVM_DEBUG(dbgs() << "Emitting automaton for " << R->getName() << "\n"); + ActionSymbolFields = R->getValueAsListOfStrings("SymbolFields"); +} + +void Automaton::emit(raw_ostream &OS) { + StringRef TransitionClass = R->getValueAsString("TransitionClass"); + for (Record *T : Records.getAllDerivedDefinitions(TransitionClass)) { + assert(T->isSubClassOf("Transition")); + Transitions.emplace_back(T, this); + Actions.insert(Transitions.back().getActions()); + } + + LLVM_DEBUG(dbgs() << " Action alphabet cardinality: " << Actions.size() + << "\n"); + LLVM_DEBUG(dbgs() << " Each state has " << Transitions.size() + << " potential transitions.\n"); + + StringRef Name = R->getName(); + + CustomDfaEmitter Emitter(Actions, std::string(Name) + "Action"); + // Starting from the initial state, build up a list of possible states and + // transitions. + std::deque Worklist(1, 0); + std::set SeenStates; + unsigned NumTransitions = 0; + SeenStates.insert(Worklist.front()); + while (!Worklist.empty()) { + uint64_t State = Worklist.front(); + Worklist.pop_front(); + for (Transition &T : Transitions) { + if (!T.canTransitionFrom(State)) + continue; + uint64_t NewState = T.transitionFrom(State); + if (SeenStates.emplace(NewState).second) + Worklist.emplace_back(NewState); + ++NumTransitions; + Emitter.addTransition(State, NewState, Actions.idFor(T.getActions())); + } + } + LLVM_DEBUG(dbgs() << " NFA automaton has " << SeenStates.size() + << " states with " << NumTransitions << " transitions.\n"); + + const auto &ActionTypes = Transitions.back().getTypes(); + OS << "// The type of an action in the " << Name << " automaton.\n"; + if (ActionTypes.size() == 1) { + OS << "using " << Name << "Action = " << ActionTypes[0] << ";\n"; + } else { + OS << "using " << Name << "Action = std::tuple<" << join(ActionTypes, ", ") + << ">;\n"; + } + OS << "\n"; + + Emitter.emit(Name, OS); +} + +StringRef Automaton::getActionSymbolType(StringRef A) { + Twine Ty = "TypeOf_" + A; + if (!R->getValue(Ty.str())) + return ""; + return R->getValueAsString(Ty.str()); +} + +Transition::Transition(Record *R, Automaton *Parent) { + BitsInit *NewStateInit = R->getValueAsBitsInit("NewState"); + NewState = 0; + assert(NewStateInit->getNumBits() <= sizeof(uint64_t) * 8 && + "State cannot be represented in 64 bits!"); + for (unsigned I = 0; I < NewStateInit->getNumBits(); ++I) { + if (auto *Bit = dyn_cast(NewStateInit->getBit(I))) { + if (Bit->getValue()) + NewState |= 1ULL << I; + } + } + + for (StringRef A : Parent->getActionSymbolFields()) { + RecordVal *SymbolV = R->getValue(A); + if (auto *Ty = dyn_cast(SymbolV->getType())) { + Actions.emplace_back(R->getValueAsDef(A), 0, ""); + Types.emplace_back(Ty->getAsString()); + } else if (isa(SymbolV->getType())) { + Actions.emplace_back(nullptr, R->getValueAsInt(A), ""); + Types.emplace_back("unsigned"); + } else if (isa(SymbolV->getType()) || + isa(SymbolV->getType())) { + Actions.emplace_back(nullptr, 0, R->getValueAsString(A)); + Types.emplace_back("std::string"); + } else { + report_fatal_error("Unhandled symbol type!"); + } + + StringRef TypeOverride = Parent->getActionSymbolType(A); + if (!TypeOverride.empty()) + Types.back() = TypeOverride; + } +} + +bool Transition::canTransitionFrom(uint64_t State) { + if ((State & NewState) == 0) + // The bits we want to set are not set; + return true; + return false; +} + +uint64_t Transition::transitionFrom(uint64_t State) { + return State | NewState; +} + +void CustomDfaEmitter::printActionType(raw_ostream &OS) { OS << TypeName; } + +void CustomDfaEmitter::printActionValue(action_type A, raw_ostream &OS) { + const ActionTuple &AT = Actions[A]; + if (AT.size() > 1) + OS << "std::make_tuple("; + bool First = true; + for (const auto &SingleAction : AT) { + if (!First) + OS << ", "; + First = false; + SingleAction.print(OS); + } + if (AT.size() > 1) + OS << ")"; +} + +namespace llvm { + +void EmitAutomata(RecordKeeper &RK, raw_ostream &OS) { + AutomatonEmitter(RK).run(OS); +} + +} // namespace llvm diff --git a/utils/TableGen/DFAEmitter.h b/utils/TableGen/DFAEmitter.h new file mode 100644 index 00000000000..76de8f72cd8 --- /dev/null +++ b/utils/TableGen/DFAEmitter.h @@ -0,0 +1,107 @@ +//===--------------------- DfaEmitter.h -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Defines a generic automaton builder. This takes a set of transitions and +// states that represent a nondeterministic finite state automaton (NFA) and +// emits a determinized DFA in a form that include/llvm/Support/Automaton.h can +// drive. +// +// See file llvm/TableGen/Automaton.td for the TableGen API definition. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_UTILS_TABLEGEN_DFAEMITTER_H +#define LLVM_UTILS_TABLEGEN_DFAEMITTER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/UniqueVector.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Record.h" +#include +#include + +namespace llvm { + +class raw_ostream; +/// Construct a deterministic finite state automaton from possible +/// nondeterministic state and transition data. +/// +/// The state type is a 64-bit unsigned integer. The generated automaton is +/// invariant to the sparsity of the state representation - its size is only +/// a function of the cardinality of the set of states. +/// +/// The inputs to this emitter are considered to define a nondeterministic +/// finite state automaton (NFA). This is then converted to a DFA during +/// emission. The emitted tables can be used to by +/// include/llvm/Support/Automaton.h. +class DfaEmitter { +public: + // The type of an NFA state. The initial state is always zero. + using state_type = uint64_t; + // The type of an action. + using action_type = uint64_t; + + DfaEmitter() = default; + virtual ~DfaEmitter() = default; + + void addTransition(state_type From, state_type To, action_type A); + void emit(StringRef Name, raw_ostream &OS); + +protected: + /// Emit the C++ type of an action to OS. + virtual void printActionType(raw_ostream &OS); + /// Emit the C++ value of an action A to OS. + virtual void printActionValue(action_type A, raw_ostream &OS); + +private: + /// The state type of deterministic states. These are only used internally to + /// this class. This is an ID into the DfaStates UniqueVector. + using dfa_state_type = unsigned; + + /// The actual representation of a DFA state, which is a union of one or more + /// NFA states. + using DfaState = SmallVector; + + /// A DFA transition consists of a set of NFA states transitioning to a + /// new set of NFA states. The DfaTransitionInfo tracks, for every + /// transitioned-from NFA state, a set of valid transitioned-to states. + /// + /// Emission of this transition relation allows algorithmic determination of + /// the possible candidate NFA paths taken under a given input sequence to + /// reach a given DFA state. + using DfaTransitionInfo = SmallVector, 4>; + + /// The set of all possible actions. + std::set Actions; + + /// The set of nondeterministic transitions. A state-action pair can + /// transition to multiple target states. + std::map, std::vector> + NfaTransitions; + std::set NfaStates; + unsigned NumNfaTransitions = 0; + + /// The set of deterministic states. DfaStates.getId(DfaState) returns an ID, + /// which is dfa_state_type. Note that because UniqueVector reserves state + /// zero, the initial DFA state is always 1. + UniqueVector DfaStates; + /// The set of deterministic transitions. A state-action pair has only a + /// single target state. + std::map, + std::pair> + DfaTransitions; + + /// Visit all NFA states and construct the DFA. + void constructDfa(); + /// Visit a single DFA state and construct all possible transitions to new DFA + /// states. + void visitDfaState(DfaState DS); +}; + +} // namespace llvm + +#endif diff --git a/utils/TableGen/DFAPacketizerEmitter.cpp b/utils/TableGen/DFAPacketizerEmitter.cpp index dabcc8f8ed5..ccb4ef1b967 100644 --- a/utils/TableGen/DFAPacketizerEmitter.cpp +++ b/utils/TableGen/DFAPacketizerEmitter.cpp @@ -17,6 +17,7 @@ #define DEBUG_TYPE "dfa-emitter" #include "CodeGenTarget.h" +#include "DFAEmitter.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -29,6 +30,7 @@ #include #include #include +#include #include using namespace llvm; @@ -154,121 +156,13 @@ public: int &maxStages, raw_ostream &OS); + // Emit code for a subset of itineraries. + void emitForItineraries(raw_ostream &OS, + std::vector &ProcItinList, + std::string DFAName); + void run(raw_ostream &OS); }; - -// -// State represents the usage of machine resources if the packet contains -// a set of instruction classes. -// -// Specifically, currentState is a set of bit-masks. -// The nth bit in a bit-mask indicates whether the nth resource is being used -// by this state. The set of bit-masks in a state represent the different -// possible outcomes of transitioning to this state. -// For example: consider a two resource architecture: resource L and resource M -// with three instruction classes: L, M, and L_or_M. -// From the initial state (currentState = 0x00), if we add instruction class -// L_or_M we will transition to a state with currentState = [0x01, 0x10]. This -// represents the possible resource states that can result from adding a L_or_M -// instruction -// -// Another way of thinking about this transition is we are mapping a NDFA with -// two states [0x01] and [0x10] into a DFA with a single state [0x01, 0x10]. -// -// A State instance also contains a collection of transitions from that state: -// a map from inputs to new states. -// -class State { - public: - static int currentStateNum; - // stateNum is the only member used for equality/ordering, all other members - // can be mutated even in const State objects. - const int stateNum; - mutable bool isInitial; - mutable std::set stateInfo; - typedef std::map, const State *> TransitionMap; - mutable TransitionMap Transitions; - - State(); - - bool operator<(const State &s) const { - return stateNum < s.stateNum; - } - - // - // canMaybeAddInsnClass - Quickly verifies if an instruction of type InsnClass - // may be a valid transition from this state i.e., can an instruction of type - // InsnClass be added to the packet represented by this state. - // - // Note that for multiple stages, this quick check does not take into account - // any possible resource competition between the stages themselves. That is - // enforced in AddInsnClassStages which checks the cross product of all - // stages for resource availability (which is a more involved check). - // - bool canMaybeAddInsnClass(std::vector &InsnClass, - std::map &ComboBitToBitsMap) const; - - // - // AddInsnClass - Return all combinations of resource reservation - // which are possible from this state (PossibleStates). - // - // PossibleStates is the set of valid resource states that ensue from valid - // transitions. - // - void AddInsnClass(std::vector &InsnClass, - std::map &ComboBitToBitsMap, - std::set &PossibleStates) const; - - // - // AddInsnClassStages - Return all combinations of resource reservation - // resulting from the cross product of all stages for this InsnClass - // which are possible from this state (PossibleStates). - // - void AddInsnClassStages(std::vector &InsnClass, - std::map &ComboBitToBitsMap, - unsigned chkstage, unsigned numstages, - unsigned prevState, unsigned origState, - DenseSet &VisitedResourceStates, - std::set &PossibleStates) const; - - // - // addTransition - Add a transition from this state given the input InsnClass - // - void addTransition(std::vector InsnClass, const State *To) const; - - // - // hasTransition - Returns true if there is a transition from this state - // given the input InsnClass - // - bool hasTransition(std::vector InsnClass) const; -}; - -// -// class DFA: deterministic finite automaton for processor resource tracking. -// -class DFA { -public: - DFA() = default; - - // Set of states. Need to keep this sorted to emit the transition table. - typedef std::set StateSet; - StateSet states; - - State *currentState = nullptr; - - // - // Modify the DFA. - // - const State &newState(); - - // - // writeTable: Print out a table representing the DFA. - // - void writeTableAndAPI(raw_ostream &OS, const std::string &ClassName, - int numInsnClasses = 0, - int maxResources = 0, int numCombos = 0, int maxStages = 0); -}; - } // end anonymous namespace #ifndef NDEBUG @@ -288,22 +182,6 @@ void dbgsInsnClass(const std::vector &InsnClass) { LLVM_DEBUG(dbgs() << " (input: 0x" << Twine::utohexstr(InsnInput) << ")"); } -// -// dbgsStateInfo - When debugging, print the set of state info. -// -void dbgsStateInfo(const std::set &stateInfo) { - LLVM_DEBUG(dbgs() << "StateInfo: "); - unsigned i = 0; - for (std::set::iterator SI = stateInfo.begin(); - SI != stateInfo.end(); ++SI, ++i) { - unsigned thisState = *SI; - if (i > 0) { - LLVM_DEBUG(dbgs() << ", "); - } - LLVM_DEBUG(dbgs() << "0x" << Twine::utohexstr(thisState)); - } -} - // // dbgsIndent - When debugging, indent by the specified amount. // @@ -314,335 +192,9 @@ void dbgsIndent(unsigned indent) { } #endif // NDEBUG -// -// Constructors and destructors for State and DFA -// -State::State() : - stateNum(currentStateNum++), isInitial(false) {} - -// -// addTransition - Add a transition from this state given the input InsnClass -// -void State::addTransition(std::vector InsnClass, const State *To) - const { - assert(!Transitions.count(InsnClass) && - "Cannot have multiple transitions for the same input"); - Transitions[InsnClass] = To; -} - -// -// hasTransition - Returns true if there is a transition from this state -// given the input InsnClass -// -bool State::hasTransition(std::vector InsnClass) const { - return Transitions.count(InsnClass) > 0; -} - -// -// AddInsnClass - Return all combinations of resource reservation -// which are possible from this state (PossibleStates). -// -// PossibleStates is the set of valid resource states that ensue from valid -// transitions. -// -void State::AddInsnClass(std::vector &InsnClass, - std::map &ComboBitToBitsMap, - std::set &PossibleStates) const { - // - // Iterate over all resource states in currentState. - // - unsigned numstages = InsnClass.size(); - assert((numstages > 0) && "InsnClass has no stages"); - - for (std::set::iterator SI = stateInfo.begin(); - SI != stateInfo.end(); ++SI) { - unsigned thisState = *SI; - - DenseSet VisitedResourceStates; - - LLVM_DEBUG(dbgs() << " thisState: 0x" << Twine::utohexstr(thisState) - << "\n"); - AddInsnClassStages(InsnClass, ComboBitToBitsMap, - numstages - 1, numstages, - thisState, thisState, - VisitedResourceStates, PossibleStates); - } -} - -void State::AddInsnClassStages(std::vector &InsnClass, - std::map &ComboBitToBitsMap, - unsigned chkstage, unsigned numstages, - unsigned prevState, unsigned origState, - DenseSet &VisitedResourceStates, - std::set &PossibleStates) const { - assert((chkstage < numstages) && "AddInsnClassStages: stage out of range"); - unsigned thisStage = InsnClass[chkstage]; - - LLVM_DEBUG({ - dbgsIndent((1 + numstages - chkstage) << 1); - dbgs() << "AddInsnClassStages " << chkstage << " (0x" - << Twine::utohexstr(thisStage) << ") from "; - dbgsInsnClass(InsnClass); - dbgs() << "\n"; - }); - - // - // Iterate over all possible resources used in thisStage. - // For ex: for thisStage = 0x11, all resources = {0x01, 0x10}. - // - for (unsigned int j = 0; j < DFA_MAX_RESOURCES; ++j) { - unsigned resourceMask = (0x1 << j); - if (resourceMask & thisStage) { - unsigned combo = ComboBitToBitsMap[resourceMask]; - if (combo && ((~prevState & combo) != combo)) { - LLVM_DEBUG(dbgs() << "\tSkipped Add 0x" << Twine::utohexstr(prevState) - << " - combo op 0x" << Twine::utohexstr(resourceMask) - << " (0x" << Twine::utohexstr(combo) - << ") cannot be scheduled\n"); - continue; - } - // - // For each possible resource used in thisStage, generate the - // resource state if that resource was used. - // - unsigned ResultingResourceState = prevState | resourceMask | combo; - LLVM_DEBUG({ - dbgsIndent((2 + numstages - chkstage) << 1); - dbgs() << "0x" << Twine::utohexstr(prevState) << " | 0x" - << Twine::utohexstr(resourceMask); - if (combo) - dbgs() << " | 0x" << Twine::utohexstr(combo); - dbgs() << " = 0x" << Twine::utohexstr(ResultingResourceState) << " "; - }); - - // - // If this is the final stage for this class - // - if (chkstage == 0) { - // - // Check if the resulting resource state can be accommodated in this - // packet. - // We compute resource OR prevState (originally started as origState). - // If the result of the OR is different than origState, it implies - // that there is at least one resource that can be used to schedule - // thisStage in the current packet. - // Insert ResultingResourceState into PossibleStates only if we haven't - // processed ResultingResourceState before. - // - if (ResultingResourceState != prevState) { - if (VisitedResourceStates.count(ResultingResourceState) == 0) { - VisitedResourceStates.insert(ResultingResourceState); - PossibleStates.insert(ResultingResourceState); - LLVM_DEBUG(dbgs() - << "\tResultingResourceState: 0x" - << Twine::utohexstr(ResultingResourceState) << "\n"); - } else { - LLVM_DEBUG(dbgs() << "\tSkipped Add - state already seen\n"); - } - } else { - LLVM_DEBUG(dbgs() - << "\tSkipped Add - no final resources available\n"); - } - } else { - // - // If the current resource can be accommodated, check the next - // stage in InsnClass for available resources. - // - if (ResultingResourceState != prevState) { - LLVM_DEBUG(dbgs() << "\n"); - AddInsnClassStages(InsnClass, ComboBitToBitsMap, - chkstage - 1, numstages, - ResultingResourceState, origState, - VisitedResourceStates, PossibleStates); - } else { - LLVM_DEBUG(dbgs() << "\tSkipped Add - no resources available\n"); - } - } - } - } -} - -// -// canMaybeAddInsnClass - Quickly verifies if an instruction of type InsnClass -// may be a valid transition from this state i.e., can an instruction of type -// InsnClass be added to the packet represented by this state. -// -// Note that this routine is performing conservative checks that can be -// quickly executed acting as a filter before calling AddInsnClassStages. -// Any cases allowed through here will be caught later in AddInsnClassStages -// which performs the more expensive exact check. -// -bool State::canMaybeAddInsnClass(std::vector &InsnClass, - std::map &ComboBitToBitsMap) const { - for (std::set::const_iterator SI = stateInfo.begin(); - SI != stateInfo.end(); ++SI) { - // Check to see if all required resources are available. - bool available = true; - - // Inspect each stage independently. - // note: This is a conservative check as we aren't checking for - // possible resource competition between the stages themselves - // The full cross product is examined later in AddInsnClass. - for (unsigned i = 0; i < InsnClass.size(); ++i) { - unsigned resources = *SI; - if ((~resources & InsnClass[i]) == 0) { - available = false; - break; - } - // Make sure _all_ resources for a combo function are available. - // note: This is a quick conservative check as it won't catch an - // unscheduleable combo if this stage is an OR expression - // containing a combo. - // These cases are caught later in AddInsnClass. - unsigned combo = ComboBitToBitsMap[InsnClass[i]]; - if (combo && ((~resources & combo) != combo)) { - LLVM_DEBUG(dbgs() << "\tSkipped canMaybeAdd 0x" - << Twine::utohexstr(resources) << " - combo op 0x" - << Twine::utohexstr(InsnClass[i]) << " (0x" - << Twine::utohexstr(combo) - << ") cannot be scheduled\n"); - available = false; - break; - } - } - - if (available) { - return true; - } - } - return false; -} - -const State &DFA::newState() { - auto IterPair = states.insert(State()); - assert(IterPair.second && "State already exists"); - return *IterPair.first; -} - -int State::currentStateNum = 0; - DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R): TargetName(CodeGenTarget(R).getName()), Records(R) {} -// -// writeTableAndAPI - Print out a table representing the DFA and the -// associated API to create a DFA packetizer. -// -// Format: -// DFAStateInputTable[][2] = pairs of for all valid -// transitions. -// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable for -// the ith state. -// -// -void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName, - int numInsnClasses, - int maxResources, int numCombos, int maxStages) { - unsigned numStates = states.size(); - - LLVM_DEBUG(dbgs() << "-------------------------------------------------------" - "----------------------\n"); - LLVM_DEBUG(dbgs() << "writeTableAndAPI\n"); - LLVM_DEBUG(dbgs() << "Total states: " << numStates << "\n"); - - OS << "namespace llvm {\n"; - - OS << "\n// Input format:\n"; - OS << "#define DFA_MAX_RESTERMS " << DFA_MAX_RESTERMS - << "\t// maximum AND'ed resource terms\n"; - OS << "#define DFA_MAX_RESOURCES " << DFA_MAX_RESOURCES - << "\t// maximum resource bits in one term\n"; - - OS << "\n// " << TargetName << "DFAStateInputTable[][2] = " - << "pairs of for all valid\n"; - OS << "// transitions.\n"; - OS << "// " << numStates << "\tstates\n"; - OS << "// " << numInsnClasses << "\tinstruction classes\n"; - OS << "// " << maxResources << "\tresources max\n"; - OS << "// " << numCombos << "\tcombo resources\n"; - OS << "// " << maxStages << "\tstages max\n"; - OS << "const " << DFA_TBLTYPE << " " - << TargetName << "DFAStateInputTable[][2] = {\n"; - - // This table provides a map to the beginning of the transitions for State s - // in DFAStateInputTable. - std::vector StateEntry(numStates+1); - static const std::string SentinelEntry = "{-1, -1}"; - - // Tracks the total valid transitions encountered so far. It is used - // to construct the StateEntry table. - int ValidTransitions = 0; - DFA::StateSet::iterator SI = states.begin(); - for (unsigned i = 0; i < numStates; ++i, ++SI) { - assert ((SI->stateNum == (int) i) && "Mismatch in state numbers"); - StateEntry[i] = ValidTransitions; - for (State::TransitionMap::iterator - II = SI->Transitions.begin(), IE = SI->Transitions.end(); - II != IE; ++II) { - OS << "{0x" << Twine::utohexstr(getDFAInsnInput(II->first)) << ", " - << II->second->stateNum << "},\t"; - } - ValidTransitions += SI->Transitions.size(); - - // If there are no valid transitions from this stage, we need a sentinel - // transition. - if (ValidTransitions == StateEntry[i]) { - OS << SentinelEntry << ",\t"; - ++ValidTransitions; - } - - OS << " // state " << i << ": " << StateEntry[i]; - if (StateEntry[i] != (ValidTransitions-1)) { // More than one transition. - OS << "-" << (ValidTransitions-1); - } - OS << "\n"; - } - - // Print out a sentinel entry at the end of the StateInputTable. This is - // needed to iterate over StateInputTable in DFAPacketizer::ReadTable() - OS << SentinelEntry << "\t"; - OS << " // state " << numStates << ": " << ValidTransitions; - OS << "\n"; - - OS << "};\n\n"; - OS << "// " << TargetName << "DFAStateEntryTable[i] = " - << "Index of the first entry in DFAStateInputTable for\n"; - OS << "// " - << "the ith state.\n"; - OS << "// " << numStates << " states\n"; - OS << "const unsigned int " << TargetName << "DFAStateEntryTable[] = {\n"; - - // Multiply i by 2 since each entry in DFAStateInputTable is a set of - // two numbers. - unsigned lastState = 0; - for (unsigned i = 0; i < numStates; ++i) { - if (i && ((i % 10) == 0)) { - lastState = i-1; - OS << " // states " << (i-10) << ":" << lastState << "\n"; - } - OS << StateEntry[i] << ", "; - } - - // Print out the index to the sentinel entry in StateInputTable - OS << ValidTransitions << ", "; - OS << " // states " << (lastState+1) << ":" << numStates << "\n"; - - OS << "};\n"; - OS << "} // namespace\n"; - - // - // Emit DFA Packetizer tables if the target is a VLIW machine. - // - std::string SubTargetClassName = TargetName + "GenSubtargetInfo"; - OS << "\n" << "#include \"llvm/CodeGen/DFAPacketizer.h\"\n"; - OS << "namespace llvm {\n"; - OS << "DFAPacketizer *" << SubTargetClassName << "::" - << "createDFAPacketizer(const InstrItineraryData *IID) const {\n" - << " return new DFAPacketizer(IID, " << TargetName - << "DFAStateInputTable, " << TargetName << "DFAStateEntryTable);\n}\n\n"; - OS << "} // End llvm namespace \n"; -} - // // collectAllFuncUnits - Construct a map of function unit names to bits. // @@ -837,10 +389,32 @@ int DFAPacketizerEmitter::collectAllInsnClasses(const std::string &ProcName, // Run the worklist algorithm to generate the DFA. // void DFAPacketizerEmitter::run(raw_ostream &OS) { + OS << "\n" + << "#include \"llvm/CodeGen/DFAPacketizer.h\"\n"; + OS << "namespace llvm {\n"; + + OS << "\n// Input format:\n"; + OS << "#define DFA_MAX_RESTERMS " << DFA_MAX_RESTERMS + << "\t// maximum AND'ed resource terms\n"; + OS << "#define DFA_MAX_RESOURCES " << DFA_MAX_RESOURCES + << "\t// maximum resource bits in one term\n"; + // Collect processor iteraries. std::vector ProcItinList = Records.getAllDerivedDefinitions("ProcessorItineraries"); + std::unordered_map> ItinsByNamespace; + for (Record *R : ProcItinList) + ItinsByNamespace[R->getValueAsString("PacketizerNamespace")].push_back(R); + + for (auto &KV : ItinsByNamespace) + emitForItineraries(OS, KV.second, KV.first); + OS << "} // end namespace llvm\n"; +} + +void DFAPacketizerEmitter::emitForItineraries( + raw_ostream &OS, std::vector &ProcItinList, + std::string DFAName) { // // Collect the Functional units. // @@ -855,8 +429,7 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) { std::map ComboBitToBitsMap; std::vector ComboFuncList = Records.getAllDerivedDefinitions("ComboFuncUnits"); - int numCombos = collectAllComboFuncs(ComboFuncList, - FUNameToBitsMap, ComboBitToBitsMap, OS); + collectAllComboFuncs(ComboFuncList, FUNameToBitsMap, ComboBitToBitsMap, OS); // // Collect the itineraries. @@ -887,103 +460,89 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) { FUNameToBitsMap, ItinDataList, maxStages, OS); } - // - // Run a worklist algorithm to generate the DFA. - // - DFA D; - const State *Initial = &D.newState(); - Initial->isInitial = true; - Initial->stateInfo.insert(0x0); - SmallVector WorkList; - std::map, const State*> Visited; + // The type of a state in the nondeterministic automaton we're defining. + using NfaStateTy = unsigned; - WorkList.push_back(Initial); + // Given a resource state, return all resource states by applying + // InsnClass. + auto applyInsnClass = [&](ArrayRef InsnClass, + NfaStateTy State) -> std::deque { + std::deque V(1, State); + // Apply every stage in the class individually. + for (unsigned Stage : InsnClass) { + // Apply this stage to every existing member of V in turn. + size_t Sz = V.size(); + for (unsigned I = 0; I < Sz; ++I) { + unsigned S = V.front(); + V.pop_front(); - // - // Worklist algorithm to create a DFA for processor resource tracking. - // C = {set of InsnClasses} - // Begin with initial node in worklist. Initial node does not have - // any consumed resources, - // ResourceState = 0x0 - // Visited = {} - // While worklist != empty - // S = first element of worklist - // For every instruction class C - // if we can accommodate C in S: - // S' = state with resource states = {S Union C} - // Add a new transition: S x C -> S' - // If S' is not in Visited: - // Add S' to worklist - // Add S' to Visited - // - while (!WorkList.empty()) { - const State *current = WorkList.pop_back_val(); - LLVM_DEBUG({ - dbgs() << "---------------------\n"; - dbgs() << "Processing state: " << current->stateNum << " - "; - dbgsStateInfo(current->stateInfo); - dbgs() << "\n"; - }); + // For this stage, state combination, try all possible resources. + for (unsigned J = 0; J < DFA_MAX_RESOURCES; ++J) { + unsigned ResourceMask = 1U << J; + if ((ResourceMask & Stage) == 0) + // This resource isn't required by this stage. + continue; + unsigned Combo = ComboBitToBitsMap[ResourceMask]; + if (Combo && ((~S & Combo) != Combo)) + // This combo units bits are not available. + continue; + unsigned ResultingResourceState = S | ResourceMask | Combo; + if (ResultingResourceState == S) + continue; + V.push_back(ResultingResourceState); + } + } + } + return V; + }; + + // Given a resource state, return a quick (conservative) guess as to whether + // InsnClass can be applied. This is a filter for the more heavyweight + // applyInsnClass. + auto canApplyInsnClass = [](ArrayRef InsnClass, + NfaStateTy State) -> bool { + for (unsigned Resources : InsnClass) { + if ((State | Resources) == State) + return false; + } + return true; + }; + + DfaEmitter Emitter; + std::deque Worklist(1, 0); + std::set SeenStates; + SeenStates.insert(Worklist.front()); + while (!Worklist.empty()) { + NfaStateTy State = Worklist.front(); + Worklist.pop_front(); for (unsigned i = 0; i < allInsnClasses.size(); i++) { - std::vector InsnClass = allInsnClasses[i]; - LLVM_DEBUG({ - dbgs() << i << " "; - dbgsInsnClass(InsnClass); - dbgs() << "\n"; - }); - - std::set NewStateResources; - // - // If we haven't already created a transition for this input - // and the state can accommodate this InsnClass, create a transition. - // - if (!current->hasTransition(InsnClass) && - current->canMaybeAddInsnClass(InsnClass, ComboBitToBitsMap)) { - const State *NewState = nullptr; - current->AddInsnClass(InsnClass, ComboBitToBitsMap, NewStateResources); - if (NewStateResources.empty()) { - LLVM_DEBUG(dbgs() << " Skipped - no new states generated\n"); - continue; - } - - LLVM_DEBUG({ - dbgs() << "\t"; - dbgsStateInfo(NewStateResources); - dbgs() << "\n"; - }); - - // - // If we have seen this state before, then do not create a new state. - // - auto VI = Visited.find(NewStateResources); - if (VI != Visited.end()) { - NewState = VI->second; - LLVM_DEBUG({ - dbgs() << "\tFound existing state: " << NewState->stateNum - << " - "; - dbgsStateInfo(NewState->stateInfo); - dbgs() << "\n"; - }); - } else { - NewState = &D.newState(); - NewState->stateInfo = NewStateResources; - Visited[NewStateResources] = NewState; - WorkList.push_back(NewState); - LLVM_DEBUG({ - dbgs() << "\tAccepted new state: " << NewState->stateNum << " - "; - dbgsStateInfo(NewState->stateInfo); - dbgs() << "\n"; - }); - } - - current->addTransition(InsnClass, NewState); + const std::vector &InsnClass = allInsnClasses[i]; + if (!canApplyInsnClass(InsnClass, State)) + continue; + for (unsigned NewState : applyInsnClass(InsnClass, State)) { + if (SeenStates.emplace(NewState).second) + Worklist.emplace_back(NewState); + Emitter.addTransition(State, NewState, getDFAInsnInput(InsnClass)); } } } - // Print out the table. - D.writeTableAndAPI(OS, TargetName, - numInsnClasses, maxResources, numCombos, maxStages); + OS << "} // end namespace llvm\n\n"; + OS << "namespace {\n"; + std::string TargetAndDFAName = TargetName + DFAName; + Emitter.emit(TargetAndDFAName, OS); + OS << "} // end anonymous namespace\n\n"; + + std::string SubTargetClassName = TargetName + "GenSubtargetInfo"; + OS << "namespace llvm {\n"; + OS << "DFAPacketizer *" << SubTargetClassName << "::" + << "create" << DFAName + << "DFAPacketizer(const InstrItineraryData *IID) const {\n" + << " static Automaton A(ArrayRef<" << TargetAndDFAName + << "Transition>(" << TargetAndDFAName << "Transitions), " + << TargetAndDFAName << "TransitionInfo);\n" + << " return new DFAPacketizer(IID, A);\n" + << "\n}\n\n"; } namespace llvm { diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp index 9e75c7fba77..0002b0e14db 100644 --- a/utils/TableGen/DisassemblerEmitter.cpp +++ b/utils/TableGen/DisassemblerEmitter.cpp @@ -153,4 +153,4 @@ void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) { "MCDisassembler::Success", "MCDisassembler::Fail", ""); } -} // End llvm namespace +} // end namespace llvm diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp index f5e975d2e5a..ac69b431607 100644 --- a/utils/TableGen/FixedLenDecoderEmitter.cpp +++ b/utils/TableGen/FixedLenDecoderEmitter.cpp @@ -13,6 +13,7 @@ #include "CodeGenInstruction.h" #include "CodeGenTarget.h" +#include "InfoByHwMode.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/CachedHashString.h" @@ -64,9 +65,10 @@ struct OperandInfo { std::vector Fields; std::string Decoder; bool HasCompleteDecoder; + uint64_t InitValue; OperandInfo(std::string D, bool HCD) - : Decoder(std::move(D)), HasCompleteDecoder(HCD) {} + : Decoder(std::move(D)), HasCompleteDecoder(HCD), InitValue(0) {} void addField(unsigned Base, unsigned Width, unsigned Offset) { Fields.push_back(EncodingField(Base, Width, Offset)); @@ -96,9 +98,11 @@ struct DecoderTableInfo { struct EncodingAndInst { const Record *EncodingDef; const CodeGenInstruction *Inst; + StringRef HwModeName; - EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst) - : EncodingDef(EncodingDef), Inst(Inst) {} + EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst, + StringRef HwModeName = "") + : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {} }; struct EncodingIDAndOpcode { @@ -599,7 +603,7 @@ void Filter::recurse() { // Delegates to an inferior filter chooser for further processing on this // group of instructions whose segment values are variable. FilterChooserMap.insert( - std::make_pair(-1U, llvm::make_unique( + std::make_pair(-1U, std::make_unique( Owner->AllInstructions, VariableInstructions, Owner->Operands, BitValueArray, *Owner))); } @@ -625,7 +629,7 @@ void Filter::recurse() { // Delegates to an inferior filter chooser for further processing on this // category of instructions. FilterChooserMap.insert(std::make_pair( - Inst.first, llvm::make_unique( + Inst.first, std::make_unique( Owner->AllInstructions, Inst.second, Owner->Operands, BitValueArray, *Owner))); } @@ -1103,12 +1107,15 @@ void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation, bool &OpHasCompleteDecoder) const { const std::string &Decoder = OpInfo.Decoder; - if (OpInfo.numFields() != 1) - o.indent(Indentation) << "tmp = 0;\n"; + if (OpInfo.numFields() != 1 || OpInfo.InitValue != 0) { + o.indent(Indentation) << "tmp = 0x"; + o.write_hex(OpInfo.InitValue); + o << ";\n"; + } for (const EncodingField &EF : OpInfo) { o.indent(Indentation) << "tmp "; - if (OpInfo.numFields() != 1) o << '|'; + if (OpInfo.numFields() != 1 || OpInfo.InitValue != 0) o << '|'; o << "= fieldFromInstruction" << "(insn, " << EF.Base << ", " << EF.Width << ')'; if (OpInfo.numFields() != 1 || EF.Offset != 0) @@ -2026,6 +2033,16 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, HasCompleteDecoderBit->getValue() : true; OperandInfo OpInfo(Decoder, HasCompleteDecoder); + + // Some bits of the operand may be required to be 1 depending on the + // instruction's encoding. Collect those bits. + if (const RecordVal *EncodedValue = EncodingDef.getValue(Op.second)) + if (const BitsInit *OpBits = dyn_cast(EncodedValue->getValue())) + for (unsigned I = 0; I < OpBits->getNumBits(); ++I) + if (const BitInit *OpBit = dyn_cast(OpBits->getBit(I))) + if (OpBit->getValue()) + OpInfo.InitValue |= 1ULL << I; + unsigned Base = ~0U; unsigned Width = 0; unsigned Offset = 0; @@ -2368,12 +2385,50 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) { Target.reverseBitsForLittleEndianEncoding(); // Parameterize the decoders based on namespace and instruction width. + std::set HwModeNames; const auto &NumberedInstructions = Target.getInstructionsByEnumValue(); NumberedEncodings.reserve(NumberedInstructions.size()); DenseMap IndexOfInstruction; + // First, collect all HwModes referenced by the target. for (const auto &NumberedInstruction : NumberedInstructions) { IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size(); - NumberedEncodings.emplace_back(NumberedInstruction->TheDef, NumberedInstruction); + + if (const RecordVal *RV = + NumberedInstruction->TheDef->getValue("EncodingInfos")) { + if (auto *DI = dyn_cast_or_null(RV->getValue())) { + const CodeGenHwModes &HWM = Target.getHwModes(); + EncodingInfoByHwMode EBM(DI->getDef(), HWM); + for (auto &KV : EBM.Map) + HwModeNames.insert(HWM.getMode(KV.first).Name); + } + } + } + + // If HwModeNames is empty, add the empty string so we always have one HwMode. + if (HwModeNames.empty()) + HwModeNames.insert(""); + + for (const auto &NumberedInstruction : NumberedInstructions) { + IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size(); + + if (const RecordVal *RV = + NumberedInstruction->TheDef->getValue("EncodingInfos")) { + if (DefInit *DI = dyn_cast_or_null(RV->getValue())) { + const CodeGenHwModes &HWM = Target.getHwModes(); + EncodingInfoByHwMode EBM(DI->getDef(), HWM); + for (auto &KV : EBM.Map) { + NumberedEncodings.emplace_back(KV.second, NumberedInstruction, + HWM.getMode(KV.first).Name); + HwModeNames.insert(HWM.getMode(KV.first).Name); + } + continue; + } + } + // This instruction is encoded the same on all HwModes. Emit it for all + // HwModes. + for (StringRef HwModeName : HwModeNames) + NumberedEncodings.emplace_back(NumberedInstruction->TheDef, + NumberedInstruction, HwModeName); } for (const auto &NumberedAlias : RK.getAllDerivedDefinitions("AdditionalEncoding")) NumberedEncodings.emplace_back( @@ -2401,13 +2456,19 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) { NumInstructions++; NumEncodings++; - StringRef DecoderNamespace = EncodingDef->getValueAsString("DecoderNamespace"); + if (!Size) + continue; - if (Size) { - if (populateInstruction(Target, *EncodingDef, *Inst, i, Operands)) { - OpcMap[std::make_pair(DecoderNamespace, Size)].emplace_back(i, IndexOfInstruction.find(Def)->second); - } else - NumEncodingsOmitted++; + if (populateInstruction(Target, *EncodingDef, *Inst, i, Operands)) { + std::string DecoderNamespace = + EncodingDef->getValueAsString("DecoderNamespace"); + if (!NumberedEncodings[i].HwModeName.empty()) + DecoderNamespace += + std::string("_") + NumberedEncodings[i].HwModeName.str(); + OpcMap[std::make_pair(DecoderNamespace, Size)].emplace_back( + i, IndexOfInstruction.find(Def)->second); + } else { + NumEncodingsOmitted++; } } @@ -2451,7 +2512,7 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) { // Emit the main entry point for the decoder, decodeInstruction(). emitDecodeInstruction(OS); - OS << "\n} // End llvm namespace\n"; + OS << "\n} // end namespace llvm\n"; } namespace llvm { diff --git a/utils/TableGen/GICombinerEmitter.cpp b/utils/TableGen/GICombinerEmitter.cpp new file mode 100644 index 00000000000..5dc4d6b0774 --- /dev/null +++ b/utils/TableGen/GICombinerEmitter.cpp @@ -0,0 +1,452 @@ +//===- GlobalCombinerEmitter.cpp - Generate a combiner --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file Generate a combiner implementation for GlobalISel from a declarative +/// syntax +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Timer.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/StringMatcher.h" +#include "llvm/TableGen/TableGenBackend.h" +#include "CodeGenTarget.h" +#include "GlobalISel/CodeExpander.h" +#include "GlobalISel/CodeExpansions.h" + +using namespace llvm; + +#define DEBUG_TYPE "gicombiner-emitter" + +// FIXME: Use ALWAYS_ENABLED_STATISTIC once it's available. +unsigned NumPatternTotal = 0; +STATISTIC(NumPatternTotalStatistic, "Total number of patterns"); + +cl::OptionCategory + GICombinerEmitterCat("Options for -gen-global-isel-combiner"); +static cl::list + SelectedCombiners("combiners", cl::desc("Emit the specified combiners"), + cl::cat(GICombinerEmitterCat), cl::CommaSeparated); +static cl::opt ShowExpansions( + "gicombiner-show-expansions", + cl::desc("Use C++ comments to indicate occurence of code expansion"), + cl::cat(GICombinerEmitterCat)); + +namespace { +typedef uint64_t RuleID; + +class RootInfo { + StringRef PatternSymbol; + +public: + RootInfo(StringRef PatternSymbol) : PatternSymbol(PatternSymbol) {} + + StringRef getPatternSymbol() const { return PatternSymbol; } +}; + +class CombineRule { +protected: + /// A unique ID for this rule + /// ID's are used for debugging and run-time disabling of rules among other + /// things. + RuleID ID; + + /// The record defining this rule. + const Record &TheDef; + + /// The roots of a match. These are the leaves of the DAG that are closest to + /// the end of the function. I.e. the nodes that are encountered without + /// following any edges of the DAG described by the pattern as we work our way + /// from the bottom of the function to the top. + std::vector Roots; + + /// A block of arbitrary C++ to finish testing the match. + /// FIXME: This is a temporary measure until we have actual pattern matching + const CodeInit *MatchingFixupCode = nullptr; +public: + CombineRule(const CodeGenTarget &Target, RuleID ID, const Record &R) + : ID(ID), TheDef(R) {} + bool parseDefs(); + bool parseMatcher(const CodeGenTarget &Target); + + RuleID getID() const { return ID; } + StringRef getName() const { return TheDef.getName(); } + const Record &getDef() const { return TheDef; } + const CodeInit *getMatchingFixupCode() const { return MatchingFixupCode; } + size_t getNumRoots() const { return Roots.size(); } + + using const_root_iterator = std::vector::const_iterator; + const_root_iterator roots_begin() const { return Roots.begin(); } + const_root_iterator roots_end() const { return Roots.end(); } + iterator_range roots() const { + return llvm::make_range(Roots.begin(), Roots.end()); + } +}; + +/// A convenience function to check that an Init refers to a specific def. This +/// is primarily useful for testing for defs and similar in DagInit's since +/// DagInit's support any type inside them. +static bool isSpecificDef(const Init &N, StringRef Def) { + if (const DefInit *OpI = dyn_cast(&N)) + if (OpI->getDef()->getName() == Def) + return true; + return false; +} + +/// A convenience function to check that an Init refers to a def that is a +/// subclass of the given class and coerce it to a def if it is. This is +/// primarily useful for testing for subclasses of GIMatchKind and similar in +/// DagInit's since DagInit's support any type inside them. +static Record *getDefOfSubClass(const Init &N, StringRef Cls) { + if (const DefInit *OpI = dyn_cast(&N)) + if (OpI->getDef()->isSubClassOf(Cls)) + return OpI->getDef(); + return nullptr; +} + +bool CombineRule::parseDefs() { + NamedRegionTimer T("parseDefs", "Time spent parsing the defs", "Rule Parsing", + "Time spent on rule parsing", TimeRegions); + DagInit *Defs = TheDef.getValueAsDag("Defs"); + + if (Defs->getOperatorAsDef(TheDef.getLoc())->getName() != "defs") { + PrintError(TheDef.getLoc(), "Expected defs operator"); + return false; + } + + for (unsigned I = 0, E = Defs->getNumArgs(); I < E; ++I) { + // Roots should be collected into Roots + if (isSpecificDef(*Defs->getArg(I), "root")) { + Roots.emplace_back(Defs->getArgNameStr(I)); + continue; + } + + // Otherwise emit an appropriate error message. + if (getDefOfSubClass(*Defs->getArg(I), "GIDefKind")) + PrintError(TheDef.getLoc(), + "This GIDefKind not implemented in tablegen"); + else if (getDefOfSubClass(*Defs->getArg(I), "GIDefKindWithArgs")) + PrintError(TheDef.getLoc(), + "This GIDefKindWithArgs not implemented in tablegen"); + else + PrintError(TheDef.getLoc(), + "Expected a subclass of GIDefKind or a sub-dag whose " + "operator is of type GIDefKindWithArgs"); + return false; + } + + if (Roots.empty()) { + PrintError(TheDef.getLoc(), "Combine rules must have at least one root"); + return false; + } + return true; +} + +bool CombineRule::parseMatcher(const CodeGenTarget &Target) { + NamedRegionTimer T("parseMatcher", "Time spent parsing the matcher", + "Rule Parsing", "Time spent on rule parsing", TimeRegions); + DagInit *Matchers = TheDef.getValueAsDag("Match"); + + if (Matchers->getOperatorAsDef(TheDef.getLoc())->getName() != "match") { + PrintError(TheDef.getLoc(), "Expected match operator"); + return false; + } + + if (Matchers->getNumArgs() == 0) { + PrintError(TheDef.getLoc(), "Matcher is empty"); + return false; + } + + // The match section consists of a list of matchers and predicates. Parse each + // one and add the equivalent GIMatchDag nodes, predicates, and edges. + for (unsigned I = 0; I < Matchers->getNumArgs(); ++I) { + + // Parse arbitrary C++ code we have in lieu of supporting MIR matching + if (const CodeInit *CodeI = dyn_cast(Matchers->getArg(I))) { + assert(!MatchingFixupCode && + "Only one block of arbitrary code is currently permitted"); + MatchingFixupCode = CodeI; + continue; + } + + PrintError(TheDef.getLoc(), + "Expected a subclass of GIMatchKind or a sub-dag whose " + "operator is either of a GIMatchKindWithArgs or Instruction"); + PrintNote("Pattern was `" + Matchers->getArg(I)->getAsString() + "'"); + return false; + } + return true; +} + +class GICombinerEmitter { + StringRef Name; + const CodeGenTarget &Target; + Record *Combiner; + std::vector> Rules; + std::unique_ptr makeCombineRule(const Record &R); + + void gatherRules(std::vector> &ActiveRules, + const std::vector &&RulesAndGroups); + +public: + explicit GICombinerEmitter(RecordKeeper &RK, const CodeGenTarget &Target, + StringRef Name, Record *Combiner); + ~GICombinerEmitter() {} + + StringRef getClassName() const { + return Combiner->getValueAsString("Classname"); + } + void run(raw_ostream &OS); + + /// Emit the name matcher (guarded by #ifndef NDEBUG) used to disable rules in + /// response to the generated cl::opt. + void emitNameMatcher(raw_ostream &OS) const; + void generateCodeForRule(raw_ostream &OS, const CombineRule *Rule, + StringRef Indent) const; +}; + +GICombinerEmitter::GICombinerEmitter(RecordKeeper &RK, + const CodeGenTarget &Target, + StringRef Name, Record *Combiner) + : Name(Name), Target(Target), Combiner(Combiner) {} + +void GICombinerEmitter::emitNameMatcher(raw_ostream &OS) const { + std::vector> Cases; + Cases.reserve(Rules.size()); + + for (const CombineRule &EnumeratedRule : make_pointee_range(Rules)) { + std::string Code; + raw_string_ostream SS(Code); + SS << "return " << EnumeratedRule.getID() << ";\n"; + Cases.push_back(std::make_pair(EnumeratedRule.getName(), SS.str())); + } + + OS << "static Optional getRuleIdxForIdentifier(StringRef " + "RuleIdentifier) {\n" + << " uint64_t I;\n" + << " // getAtInteger(...) returns false on success\n" + << " bool Parsed = !RuleIdentifier.getAsInteger(0, I);\n" + << " if (Parsed)\n" + << " return I;\n\n" + << "#ifndef NDEBUG\n"; + StringMatcher Matcher("RuleIdentifier", Cases, OS); + Matcher.Emit(); + OS << "#endif // ifndef NDEBUG\n\n" + << " return None;\n" + << "}\n"; +} + +std::unique_ptr +GICombinerEmitter::makeCombineRule(const Record &TheDef) { + std::unique_ptr Rule = + std::make_unique(Target, NumPatternTotal, TheDef); + + if (!Rule->parseDefs()) + return nullptr; + if (!Rule->parseMatcher(Target)) + return nullptr; + // For now, don't support multi-root rules. We'll come back to this later + // once we have the algorithm changes to support it. + if (Rule->getNumRoots() > 1) { + PrintError(TheDef.getLoc(), "Multi-root matches are not supported (yet)"); + return nullptr; + } + return Rule; +} + +/// Recurse into GICombineGroup's and flatten the ruleset into a simple list. +void GICombinerEmitter::gatherRules( + std::vector> &ActiveRules, + const std::vector &&RulesAndGroups) { + for (Record *R : RulesAndGroups) { + if (R->isValueUnset("Rules")) { + std::unique_ptr Rule = makeCombineRule(*R); + if (Rule == nullptr) { + PrintError(R->getLoc(), "Failed to parse rule"); + continue; + } + ActiveRules.emplace_back(std::move(Rule)); + ++NumPatternTotal; + } else + gatherRules(ActiveRules, R->getValueAsListOfDefs("Rules")); + } +} + +void GICombinerEmitter::generateCodeForRule(raw_ostream &OS, + const CombineRule *Rule, + StringRef Indent) const { + { + const Record &RuleDef = Rule->getDef(); + + OS << Indent << "// Rule: " << RuleDef.getName() << "\n" + << Indent << "if (!isRuleDisabled(" << Rule->getID() << ")) {\n"; + + CodeExpansions Expansions; + for (const RootInfo &Root : Rule->roots()) { + Expansions.declare(Root.getPatternSymbol(), "MI"); + } + DagInit *Applyer = RuleDef.getValueAsDag("Apply"); + if (Applyer->getOperatorAsDef(RuleDef.getLoc())->getName() != + "apply") { + PrintError(RuleDef.getLoc(), "Expected apply operator"); + return; + } + + OS << Indent << " if (1\n"; + + if (Rule->getMatchingFixupCode() && + !Rule->getMatchingFixupCode()->getValue().empty()) { + // FIXME: Single-use lambda's like this are a serious compile-time + // performance and memory issue. It's convenient for this early stage to + // defer some work to successive patches but we need to eliminate this + // before the ruleset grows to small-moderate size. Last time, it became + // a big problem for low-mem systems around the 500 rule mark but by the + // time we grow that large we should have merged the ISel match table + // mechanism with the Combiner. + OS << Indent << " && [&]() {\n" + << Indent << " " + << CodeExpander(Rule->getMatchingFixupCode()->getValue(), Expansions, + Rule->getMatchingFixupCode()->getLoc(), ShowExpansions) + << "\n" + << Indent << " return true;\n" + << Indent << " }()"; + } + OS << ") {\n" << Indent << " "; + + if (const CodeInit *Code = dyn_cast(Applyer->getArg(0))) { + OS << CodeExpander(Code->getAsUnquotedString(), Expansions, + Code->getLoc(), ShowExpansions) + << "\n" + << Indent << " return true;\n" + << Indent << " }\n"; + } else { + PrintError(RuleDef.getLoc(), "Expected apply code block"); + return; + } + + OS << Indent << "}\n"; + } +} + +void GICombinerEmitter::run(raw_ostream &OS) { + gatherRules(Rules, Combiner->getValueAsListOfDefs("Rules")); + NamedRegionTimer T("Emit", "Time spent emitting the combiner", + "Code Generation", "Time spent generating code", + TimeRegions); + OS << "#ifdef " << Name.upper() << "_GENCOMBINERHELPER_DEPS\n" + << "#include \"llvm/ADT/SparseBitVector.h\"\n" + << "namespace llvm {\n" + << "extern cl::OptionCategory GICombinerOptionCategory;\n" + << "} // end namespace llvm\n" + << "#endif // ifdef " << Name.upper() << "_GENCOMBINERHELPER_DEPS\n\n"; + + OS << "#ifdef " << Name.upper() << "_GENCOMBINERHELPER_H\n" + << "class " << getClassName() << " {\n" + << " SparseBitVector<> DisabledRules;\n" + << "\n" + << "public:\n" + << " bool parseCommandLineOption();\n" + << " bool isRuleDisabled(unsigned ID) const;\n" + << " bool setRuleDisabled(StringRef RuleIdentifier);\n" + << "\n" + << " bool tryCombineAll(\n" + << " GISelChangeObserver &Observer,\n" + << " MachineInstr &MI,\n" + << " MachineIRBuilder &B) const;\n" + << "};\n\n"; + + emitNameMatcher(OS); + + OS << "bool " << getClassName() + << "::setRuleDisabled(StringRef RuleIdentifier) {\n" + << " std::pair RangePair = " + "RuleIdentifier.split('-');\n" + << " if (!RangePair.second.empty()) {\n" + << " const auto First = getRuleIdxForIdentifier(RangePair.first);\n" + << " const auto Last = getRuleIdxForIdentifier(RangePair.second);\n" + << " if (!First.hasValue() || !Last.hasValue())\n" + << " return false;\n" + << " if (First >= Last)\n" + << " report_fatal_error(\"Beginning of range should be before end of " + "range\");\n" + << " for (auto I = First.getValue(); I < Last.getValue(); ++I)\n" + << " DisabledRules.set(I);\n" + << " return true;\n" + << " } else {\n" + << " const auto I = getRuleIdxForIdentifier(RangePair.first);\n" + << " if (!I.hasValue())\n" + << " return false;\n" + << " DisabledRules.set(I.getValue());\n" + << " return true;\n" + << " }\n" + << " return false;\n" + << "}\n"; + + OS << "bool " << getClassName() + << "::isRuleDisabled(unsigned RuleID) const {\n" + << " return DisabledRules.test(RuleID);\n" + << "}\n"; + OS << "#endif // ifdef " << Name.upper() << "_GENCOMBINERHELPER_H\n\n"; + + OS << "#ifdef " << Name.upper() << "_GENCOMBINERHELPER_CPP\n" + << "\n" + << "cl::list " << Name << "Option(\n" + << " \"" << Name.lower() << "-disable-rule\",\n" + << " cl::desc(\"Disable one or more combiner rules temporarily in " + << "the " << Name << " pass\"),\n" + << " cl::CommaSeparated,\n" + << " cl::Hidden,\n" + << " cl::cat(GICombinerOptionCategory));\n" + << "\n" + << "bool " << getClassName() << "::parseCommandLineOption() {\n" + << " for (const auto &Identifier : " << Name << "Option)\n" + << " if (!setRuleDisabled(Identifier))\n" + << " return false;\n" + << " return true;\n" + << "}\n\n"; + + OS << "bool " << getClassName() << "::tryCombineAll(\n" + << " GISelChangeObserver &Observer,\n" + << " MachineInstr &MI,\n" + << " MachineIRBuilder &B) const {\n" + << " CombinerHelper Helper(Observer, B);\n" + << " MachineBasicBlock *MBB = MI.getParent();\n" + << " MachineFunction *MF = MBB->getParent();\n" + << " MachineRegisterInfo &MRI = MF->getRegInfo();\n" + << " (void)MBB; (void)MF; (void)MRI;\n\n"; + + for (const auto &Rule : Rules) + generateCodeForRule(OS, Rule.get(), " "); + OS << "\n return false;\n" + << "}\n" + << "#endif // ifdef " << Name.upper() << "_GENCOMBINERHELPER_CPP\n"; +} + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// + +namespace llvm { +void EmitGICombiner(RecordKeeper &RK, raw_ostream &OS) { + CodeGenTarget Target(RK); + emitSourceFileHeader("Global Combiner", OS); + + if (SelectedCombiners.empty()) + PrintFatalError("No combiners selected with -combiners"); + for (const auto &Combiner : SelectedCombiners) { + Record *CombinerDef = RK.getDef(Combiner); + if (!CombinerDef) + PrintFatalError("Could not find " + Combiner); + GICombinerEmitter(RK, Target, Combiner, CombinerDef).run(OS); + } + NumPatternTotalStatistic = NumPatternTotal; +} + +} // namespace llvm diff --git a/utils/TableGen/GlobalISel/CMakeLists.txt b/utils/TableGen/GlobalISel/CMakeLists.txt new file mode 100644 index 00000000000..2f74d1087bc --- /dev/null +++ b/utils/TableGen/GlobalISel/CMakeLists.txt @@ -0,0 +1,7 @@ +set(LLVM_LINK_COMPONENTS + Support + ) + +llvm_add_library(LLVMTableGenGlobalISel STATIC DISABLE_LLVM_LINK_LLVM_DYLIB + CodeExpander.cpp + ) diff --git a/utils/TableGen/GlobalISel/CodeExpander.cpp b/utils/TableGen/GlobalISel/CodeExpander.cpp new file mode 100644 index 00000000000..d59a9b8e3b6 --- /dev/null +++ b/utils/TableGen/GlobalISel/CodeExpander.cpp @@ -0,0 +1,93 @@ +//===- CodeExpander.cpp - Expand variables in a string --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file Expand the variables in a string. +// +//===----------------------------------------------------------------------===// + +#include "CodeExpander.h" +#include "CodeExpansions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Error.h" + +using namespace llvm; + +void CodeExpander::emit(raw_ostream &OS) const { + StringRef Current = Code; + + while (!Current.empty()) { + size_t Pos = Current.find_first_of("$\n\\"); + if (Pos == StringRef::npos) { + OS << Current; + Current = ""; + continue; + } + + OS << Current.substr(0, Pos); + Current = Current.substr(Pos); + + if (Current.startswith("\n")) { + OS << "\n" << Indent; + Current = Current.drop_front(1); + continue; + } + + if (Current.startswith("\\$") || Current.startswith("\\\\")) { + OS << Current[1]; + Current = Current.drop_front(2); + continue; + } + + if (Current.startswith("\\")) { + Current = Current.drop_front(1); + continue; + } + + if (Current.startswith("${")) { + StringRef StartVar = Current; + Current = Current.drop_front(2); + StringRef Var; + std::tie(Var, Current) = Current.split("}"); + + // Warn if we split because no terminator was found. + StringRef EndVar = StartVar.drop_front(2 /* ${ */ + Var.size()); + if (EndVar.empty()) { + size_t LocOffset = StartVar.data() - Code.data(); + PrintWarning( + Loc.size() > 0 && Loc[0].isValid() + ? SMLoc::getFromPointer(Loc[0].getPointer() + LocOffset) + : SMLoc(), + "Unterminated expansion"); + } + + auto ValueI = Expansions.find(Var); + if (ValueI == Expansions.end()) { + size_t LocOffset = StartVar.data() - Code.data(); + PrintError(Loc.size() > 0 && Loc[0].isValid() + ? SMLoc::getFromPointer(Loc[0].getPointer() + LocOffset) + : SMLoc(), + "Attempting to expand an undeclared variable " + Var); + } + if (ShowExpansions) + OS << "/*$" << Var << "{*/"; + OS << Expansions.lookup(Var); + if (ShowExpansions) + OS << "/*}*/"; + continue; + } + + size_t LocOffset = Current.data() - Code.data(); + PrintWarning(Loc.size() > 0 && Loc[0].isValid() + ? SMLoc::getFromPointer(Loc[0].getPointer() + LocOffset) + : SMLoc(), + "Assuming missing escape character"); + OS << "$"; + Current = Current.drop_front(1); + } +} diff --git a/utils/TableGen/GlobalISel/CodeExpander.h b/utils/TableGen/GlobalISel/CodeExpander.h new file mode 100644 index 00000000000..bd6946de592 --- /dev/null +++ b/utils/TableGen/GlobalISel/CodeExpander.h @@ -0,0 +1,55 @@ +//===- CodeExpander.h - Expand variables in a string ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file Expand the variables in a string. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_UTILS_TABLEGEN_CODEEXPANDER_H +#define LLVM_UTILS_TABLEGEN_CODEEXPANDER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/SMLoc.h" + +namespace llvm { +class CodeExpansions; +class raw_ostream; + +/// Emit the given code with all '${foo}' placeholders expanded to their +/// replacements. +/// +/// It's an error to use an undefined expansion and expansion-like output that +/// needs to be emitted verbatim can be escaped as '\${foo}' +/// +/// The emitted code can be given a custom indent to enable both indentation by +/// an arbitrary amount of whitespace and emission of the code as a comment. +class CodeExpander { + StringRef Code; + const CodeExpansions &Expansions; + const ArrayRef &Loc; + bool ShowExpansions; + StringRef Indent; + +public: + CodeExpander(StringRef Code, const CodeExpansions &Expansions, + const ArrayRef &Loc, bool ShowExpansions, + StringRef Indent = " ") + : Code(Code), Expansions(Expansions), Loc(Loc), + ShowExpansions(ShowExpansions), Indent(Indent) {} + + void emit(raw_ostream &OS) const; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const CodeExpander &Expander) { + Expander.emit(OS); + return OS; +} +} // end namespace llvm + +#endif // ifndef LLVM_UTILS_TABLEGEN_CODEEXPANDER_H diff --git a/utils/TableGen/GlobalISel/CodeExpansions.h b/utils/TableGen/GlobalISel/CodeExpansions.h new file mode 100644 index 00000000000..bb890ec8f57 --- /dev/null +++ b/utils/TableGen/GlobalISel/CodeExpansions.h @@ -0,0 +1,43 @@ +//===- CodeExpansions.h - Record expansions for CodeExpander --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file Record the expansions to use in a CodeExpander. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringMap.h" + +#ifndef LLVM_UTILS_TABLEGEN_CODEEXPANSIONS_H +#define LLVM_UTILS_TABLEGEN_CODEEXPANSIONS_H +namespace llvm { +class CodeExpansions { +public: + using const_iterator = StringMap::const_iterator; + +protected: + StringMap Expansions; + +public: + void declare(StringRef Name, StringRef Expansion) { + bool Inserted = Expansions.try_emplace(Name, Expansion).second; + assert(Inserted && "Declared variable twice"); + (void)Inserted; + } + + std::string lookup(StringRef Variable) const { + return Expansions.lookup(Variable); + } + + const_iterator begin() const { return Expansions.begin(); } + const_iterator end() const { return Expansions.end(); } + const_iterator find(StringRef Variable) const { + return Expansions.find(Variable); + } +}; +} // end namespace llvm +#endif // ifndef LLVM_UTILS_TABLEGEN_CODEEXPANSIONS_H diff --git a/utils/TableGen/GlobalISelEmitter.cpp b/utils/TableGen/GlobalISelEmitter.cpp index f1c02134198..d8d4c9f4f55 100644 --- a/utils/TableGen/GlobalISelEmitter.cpp +++ b/utils/TableGen/GlobalISelEmitter.cpp @@ -249,6 +249,10 @@ static std::string explainPredicates(const TreePatternNode *N) { OS << ']'; } + int64_t MinAlign = P.getMinAlignment(); + if (MinAlign > 0) + Explanation += " MinAlign=" + utostr(MinAlign); + if (P.isAtomicOrderingMonotonic()) Explanation += " monotonic"; if (P.isAtomicOrderingAcquire()) @@ -329,6 +333,9 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) { const ListInit *AddrSpaces = Predicate.getAddressSpaces(); if (AddrSpaces && !AddrSpaces->empty()) continue; + + if (Predicate.getMinAlignment() > 0) + continue; } if (Predicate.isAtomic() && Predicate.getMemoryVT()) @@ -822,6 +829,10 @@ protected: /// the renderers. StringMap DefinedOperands; + /// A map of anonymous physical register operands defined by the matchers that + /// may be referenced by the renderers. + DenseMap PhysRegOperands; + /// ID for the next instruction variable defined with implicitlyDefineInsnVar() unsigned NextInsnVarID; @@ -904,6 +915,8 @@ public: void defineOperand(StringRef SymbolicName, OperandMatcher &OM); + void definePhysRegOperand(Record *Reg, OperandMatcher &OM); + Error defineComplexSubOperand(StringRef SymbolicName, Record *ComplexPattern, unsigned RendererID, unsigned SubOperandID) { if (ComplexSubOperands.count(SymbolicName)) @@ -927,6 +940,7 @@ public: InstructionMatcher &getInstructionMatcher(StringRef SymbolicName) const; const OperandMatcher &getOperandMatcher(StringRef Name) const; + const OperandMatcher &getPhysRegOperandMatcher(Record *) const; void optimize() override; void emit(MatchTable &Table) override; @@ -1048,14 +1062,17 @@ public: IPM_Opcode, IPM_NumOperands, IPM_ImmPredicate, + IPM_Imm, IPM_AtomicOrderingMMO, IPM_MemoryLLTSize, IPM_MemoryVsLLTSize, IPM_MemoryAddressSpace, + IPM_MemoryAlignment, IPM_GenericPredicate, OPM_SameOperand, OPM_ComplexPattern, OPM_IntrinsicID, + OPM_CmpPredicate, OPM_Instruction, OPM_Int, OPM_LiteralInt, @@ -1324,6 +1341,23 @@ public: } }; +class ImmOperandMatcher : public OperandPredicateMatcher { +public: + ImmOperandMatcher(unsigned InsnVarID, unsigned OpIdx) + : OperandPredicateMatcher(IPM_Imm, InsnVarID, OpIdx) {} + + static bool classof(const PredicateMatcher *P) { + return P->getKind() == IPM_Imm; + } + + void emitPredicateOpcodes(MatchTable &Table, + RuleMatcher &Rule) const override { + Table << MatchTable::Opcode("GIM_CheckIsImm") << MatchTable::Comment("MI") + << MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Op") + << MatchTable::IntValue(OpIdx) << MatchTable::LineBreak; + } +}; + /// Generates code to check that an operand is a G_CONSTANT with a particular /// int. class ConstantIntOperandMatcher : public OperandPredicateMatcher { @@ -1381,6 +1415,36 @@ public: } }; +/// Generates code to check that an operand is an CmpInst predicate +class CmpPredicateOperandMatcher : public OperandPredicateMatcher { +protected: + std::string PredName; + +public: + CmpPredicateOperandMatcher(unsigned InsnVarID, unsigned OpIdx, + std::string P) + : OperandPredicateMatcher(OPM_CmpPredicate, InsnVarID, OpIdx), PredName(P) {} + + bool isIdentical(const PredicateMatcher &B) const override { + return OperandPredicateMatcher::isIdentical(B) && + PredName == cast(&B)->PredName; + } + + static bool classof(const PredicateMatcher *P) { + return P->getKind() == OPM_CmpPredicate; + } + + void emitPredicateOpcodes(MatchTable &Table, + RuleMatcher &Rule) const override { + Table << MatchTable::Opcode("GIM_CheckCmpPredicate") + << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID) + << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx) + << MatchTable::Comment("Predicate") + << MatchTable::NamedValue("CmpInst", PredName) + << MatchTable::LineBreak; + } +}; + /// Generates code to check that an operand is an intrinsic ID. class IntrinsicIDOperandMatcher : public OperandPredicateMatcher { protected: @@ -1442,7 +1506,7 @@ public: Optional addPredicate(Args &&... args) { if (isSameAsAnotherOperand()) return None; - Predicates.emplace_back(llvm::make_unique( + Predicates.emplace_back(std::make_unique( getInsnVarID(), getOpIdx(), std::forward(args)...)); return static_cast(Predicates.back().get()); } @@ -1849,6 +1913,40 @@ public: } }; +class MemoryAlignmentPredicateMatcher : public InstructionPredicateMatcher { +protected: + unsigned MMOIdx; + int MinAlign; + +public: + MemoryAlignmentPredicateMatcher(unsigned InsnVarID, unsigned MMOIdx, + int MinAlign) + : InstructionPredicateMatcher(IPM_MemoryAlignment, InsnVarID), + MMOIdx(MMOIdx), MinAlign(MinAlign) { + assert(MinAlign > 0); + } + + static bool classof(const PredicateMatcher *P) { + return P->getKind() == IPM_MemoryAlignment; + } + + bool isIdentical(const PredicateMatcher &B) const override { + if (!InstructionPredicateMatcher::isIdentical(B)) + return false; + auto *Other = cast(&B); + return MMOIdx == Other->MMOIdx && MinAlign == Other->MinAlign; + } + + void emitPredicateOpcodes(MatchTable &Table, + RuleMatcher &Rule) const override { + Table << MatchTable::Opcode("GIM_CheckMemoryAlignment") + << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID) + << MatchTable::Comment("MMO") << MatchTable::IntValue(MMOIdx) + << MatchTable::Comment("MinAlign") << MatchTable::IntValue(MinAlign) + << MatchTable::LineBreak; + } +}; + /// Generates code to check that the size of an MMO is less-than, equal-to, or /// greater than a given LLT. class MemoryVsLLTSizePredicateMatcher : public InstructionPredicateMatcher { @@ -1945,6 +2043,11 @@ protected: std::string SymbolicName; unsigned InsnVarID; + /// PhysRegInputs - List list has an entry for each explicitly specified + /// physreg input to the pattern. The first elt is the Register node, the + /// second is the recorded slot number the input pattern match saved it in. + SmallVector, 2> PhysRegInputs; + public: InstructionMatcher(RuleMatcher &Rule, StringRef SymbolicName) : Rule(Rule), SymbolicName(SymbolicName) { @@ -1957,7 +2060,7 @@ public: template Optional addPredicate(Args &&... args) { Predicates.emplace_back( - llvm::make_unique(getInsnVarID(), std::forward(args)...)); + std::make_unique(getInsnVarID(), std::forward(args)...)); return static_cast(Predicates.back().get()); } @@ -1986,6 +2089,20 @@ public: llvm_unreachable("Failed to lookup operand"); } + OperandMatcher &addPhysRegInput(Record *Reg, unsigned OpIdx, + unsigned TempOpIdx) { + assert(SymbolicName.empty()); + OperandMatcher *OM = new OperandMatcher(*this, OpIdx, "", TempOpIdx); + Operands.emplace_back(OM); + Rule.definePhysRegOperand(Reg, *OM); + PhysRegInputs.emplace_back(Reg, OpIdx); + return *OM; + } + + ArrayRef> getPhysRegInputs() const { + return PhysRegInputs; + } + StringRef getSymbolicName() const { return SymbolicName; } unsigned getNumOperands() const { return Operands.size(); } OperandVec::iterator operands_begin() { return Operands.begin(); } @@ -2193,9 +2310,11 @@ public: OR_Copy, OR_CopyOrAddZeroReg, OR_CopySubReg, + OR_CopyPhysReg, OR_CopyConstantAsImm, OR_CopyFConstantAsFPImm, OR_Imm, + OR_SubRegIndex, OR_Register, OR_TempRegister, OR_ComplexPattern, @@ -2247,6 +2366,38 @@ public: } }; +/// A CopyRenderer emits code to copy a virtual register to a specific physical +/// register. +class CopyPhysRegRenderer : public OperandRenderer { +protected: + unsigned NewInsnID; + Record *PhysReg; + +public: + CopyPhysRegRenderer(unsigned NewInsnID, Record *Reg) + : OperandRenderer(OR_CopyPhysReg), NewInsnID(NewInsnID), + PhysReg(Reg) { + assert(PhysReg); + } + + static bool classof(const OperandRenderer *R) { + return R->getKind() == OR_CopyPhysReg; + } + + Record *getPhysReg() const { return PhysReg; } + + void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override { + const OperandMatcher &Operand = Rule.getPhysRegOperandMatcher(PhysReg); + unsigned OldInsnVarID = Rule.getInsnVarID(Operand.getInstructionMatcher()); + Table << MatchTable::Opcode("GIR_Copy") << MatchTable::Comment("NewInsnID") + << MatchTable::IntValue(NewInsnID) << MatchTable::Comment("OldInsnID") + << MatchTable::IntValue(OldInsnVarID) << MatchTable::Comment("OpIdx") + << MatchTable::IntValue(Operand.getOpIdx()) + << MatchTable::Comment(PhysReg->getName()) + << MatchTable::LineBreak; + } +}; + /// A CopyOrAddZeroRegRenderer emits code to copy a single operand from an /// existing instruction to the one being built. If the operand turns out to be /// a 'G_CONSTANT 0' then it replaces the operand with a zero register. @@ -2393,11 +2544,13 @@ class AddRegisterRenderer : public OperandRenderer { protected: unsigned InsnID; const Record *RegisterDef; + bool IsDef; public: - AddRegisterRenderer(unsigned InsnID, const Record *RegisterDef) - : OperandRenderer(OR_Register), InsnID(InsnID), RegisterDef(RegisterDef) { - } + AddRegisterRenderer(unsigned InsnID, const Record *RegisterDef, + bool IsDef = false) + : OperandRenderer(OR_Register), InsnID(InsnID), RegisterDef(RegisterDef), + IsDef(IsDef) {} static bool classof(const OperandRenderer *R) { return R->getKind() == OR_Register; @@ -2411,7 +2564,16 @@ public: ? RegisterDef->getValueAsString("Namespace") : ""), RegisterDef->getName()) - << MatchTable::LineBreak; + << MatchTable::Comment("AddRegisterRegFlags"); + + // TODO: This is encoded as a 64-bit element, but only 16 or 32-bits are + // really needed for a physical register reference. We can pack the + // register and flags in a single field. + if (IsDef) + Table << MatchTable::NamedValue("RegState::Define"); + else + Table << MatchTable::IntValue(0); + Table << MatchTable::LineBreak; } }; @@ -2467,6 +2629,28 @@ public: } }; +/// Adds an enum value for a subreg index to the instruction being built. +class SubRegIndexRenderer : public OperandRenderer { +protected: + unsigned InsnID; + const CodeGenSubRegIndex *SubRegIdx; + +public: + SubRegIndexRenderer(unsigned InsnID, const CodeGenSubRegIndex *SRI) + : OperandRenderer(OR_SubRegIndex), InsnID(InsnID), SubRegIdx(SRI) {} + + static bool classof(const OperandRenderer *R) { + return R->getKind() == OR_SubRegIndex; + } + + void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override { + Table << MatchTable::Opcode("GIR_AddImm") << MatchTable::Comment("InsnID") + << MatchTable::IntValue(InsnID) << MatchTable::Comment("SubRegIndex") + << MatchTable::IntValue(SubRegIdx->EnumValue) + << MatchTable::LineBreak; + } +}; + /// Adds operands by calling a renderer function supplied by the ComplexPattern /// matcher function. class RenderComplexPatternOperand : public OperandRenderer { @@ -2620,7 +2804,7 @@ public: template Kind &addRenderer(Args&&... args) { OperandRenderers.emplace_back( - llvm::make_unique(InsnID, std::forward(args)...)); + std::make_unique(InsnID, std::forward(args)...)); return *static_cast(OperandRenderers.back().get()); } @@ -2747,7 +2931,9 @@ private: public: MakeTempRegisterAction(const LLTCodeGen &Ty, unsigned TempRegID) - : Ty(Ty), TempRegID(TempRegID) {} + : Ty(Ty), TempRegID(TempRegID) { + KnownTypes.insert(Ty); + } void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override { Table << MatchTable::Opcode("GIR_MakeTempReg") @@ -2781,7 +2967,7 @@ const std::vector &RuleMatcher::getRequiredFeatures() const { // iterator. template Kind &RuleMatcher::addAction(Args &&... args) { - Actions.emplace_back(llvm::make_unique(std::forward(args)...)); + Actions.emplace_back(std::make_unique(std::forward(args)...)); return *static_cast(Actions.back().get()); } @@ -2796,7 +2982,7 @@ template action_iterator RuleMatcher::insertAction(action_iterator InsertPt, Args &&... args) { return Actions.emplace(InsertPt, - llvm::make_unique(std::forward(args)...)); + std::make_unique(std::forward(args)...)); } unsigned RuleMatcher::implicitlyDefineInsnVar(InstructionMatcher &Matcher) { @@ -2823,6 +3009,13 @@ void RuleMatcher::defineOperand(StringRef SymbolicName, OperandMatcher &OM) { OM.addPredicate(OM.getSymbolicName()); } +void RuleMatcher::definePhysRegOperand(Record *Reg, OperandMatcher &OM) { + if (PhysRegOperands.find(Reg) == PhysRegOperands.end()) { + PhysRegOperands[Reg] = &OM; + return; + } +} + InstructionMatcher & RuleMatcher::getInstructionMatcher(StringRef SymbolicName) const { for (const auto &I : InsnVariableIDs) @@ -2832,6 +3025,18 @@ RuleMatcher::getInstructionMatcher(StringRef SymbolicName) const { ("Failed to lookup instruction " + SymbolicName).str().c_str()); } +const OperandMatcher & +RuleMatcher::getPhysRegOperandMatcher(Record *Reg) const { + const auto &I = PhysRegOperands.find(Reg); + + if (I == PhysRegOperands.end()) { + PrintFatalError(SrcLoc, "Register " + Reg->getName() + + " was not declared in matcher"); + } + + return *I->second; +} + const OperandMatcher & RuleMatcher::getOperandMatcher(StringRef Name) const { const auto &I = DefinedOperands.find(Name); @@ -3079,9 +3284,9 @@ private: bool OperandIsAPointer, unsigned OpIdx, unsigned &TempOpIdx); - Expected - createAndImportInstructionRenderer(RuleMatcher &M, - const TreePatternNode *Dst); + Expected createAndImportInstructionRenderer( + RuleMatcher &M, InstructionMatcher &InsnMatcher, + const TreePatternNode *Src, const TreePatternNode *Dst); Expected createAndImportSubInstructionRenderer( action_iterator InsertPt, RuleMatcher &M, const TreePatternNode *Dst, unsigned TempReg); @@ -3089,6 +3294,7 @@ private: createInstructionRenderer(action_iterator InsertPt, RuleMatcher &M, const TreePatternNode *Dst); void importExplicitDefRenderers(BuildMIAction &DstMIBuilder); + Expected importExplicitUseRenderers(action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder, @@ -3122,6 +3328,32 @@ private: MatchTable buildMatchTable(MutableArrayRef Rules, bool Optimize, bool WithCoverage); + /// Infer a CodeGenRegisterClass for the type of \p SuperRegNode. The returned + /// CodeGenRegisterClass will support the CodeGenRegisterClass of + /// \p SubRegNode, and the subregister index defined by \p SubRegIdxNode. + /// If no register class is found, return None. + Optional + inferSuperRegisterClassForNode(const TypeSetByHwMode &Ty, + TreePatternNode *SuperRegNode, + TreePatternNode *SubRegIdxNode); + Optional + inferSubRegIndexForNode(TreePatternNode *SubRegIdxNode); + + /// Infer a CodeGenRegisterClass which suppoorts \p Ty and \p SubRegIdxNode. + /// Return None if no such class exists. + Optional + inferSuperRegisterClass(const TypeSetByHwMode &Ty, + TreePatternNode *SubRegIdxNode); + + /// Return the CodeGenRegisterClass associated with \p Leaf if it has one. + Optional + getRegClassFromLeaf(TreePatternNode *Leaf); + + /// Return a CodeGenRegisterClass for \p N if one can be found. Return None + /// otherwise. + Optional + inferRegClassFromPattern(TreePatternNode *N); + public: /// Takes a sequence of \p Rules and group them based on the predicates /// they share. \p MatcherStorage is used as a memory container @@ -3190,6 +3422,13 @@ Record *GlobalISelEmitter::findNodeEquiv(Record *N) const { const CodeGenInstruction * GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode *N) const { + if (N->getNumChildren() >= 1) { + // setcc operation maps to two different G_* instructions based on the type. + if (!Equiv.isValueUnset("IfFloatingPoint") && + MVT(N->getChild(0)->getSimpleType(0)).isFloatingPoint()) + return &Target.getInstruction(Equiv.getValueAsDef("IfFloatingPoint")); + } + for (const TreePredicateCall &Call : N->getPredicateCalls()) { const TreePredicateFn &Predicate = Call.Fn; if (!Equiv.isValueUnset("IfSignExtend") && Predicate.isLoad() && @@ -3199,6 +3438,7 @@ GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode *N) const { Predicate.isZeroExtLoad()) return &Target.getInstruction(Equiv.getValueAsDef("IfZeroExtend")); } + return &Target.getInstruction(Equiv.getValueAsDef("I")); } @@ -3212,7 +3452,7 @@ Error GlobalISelEmitter::importRulePredicates(RuleMatcher &M, ArrayRef Predicates) { for (const Predicate &P : Predicates) { - if (!P.Def) + if (!P.Def || P.getCondString().empty()) continue; declareSubtargetFeature(P.Def); M.addRequiredFeature(P.Def); @@ -3287,6 +3527,10 @@ Expected GlobalISelEmitter::createAndImportSelDAGMatcher( 0, ParsedAddrSpaces); } } + + int64_t MinAlign = Predicate.getMinAlignment(); + if (MinAlign > 0) + InsnMatcher.addPredicate(0, MinAlign); } // G_LOAD is used for both non-extending and any-extending loads. @@ -3301,11 +3545,19 @@ Expected GlobalISelEmitter::createAndImportSelDAGMatcher( continue; } - if (Predicate.isStore() && Predicate.isTruncStore()) { - // FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size. - InsnMatcher.addPredicate( - 0, MemoryVsLLTSizePredicateMatcher::LessThan, 0); - continue; + if (Predicate.isStore()) { + if (Predicate.isTruncStore()) { + // FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size. + InsnMatcher.addPredicate( + 0, MemoryVsLLTSizePredicateMatcher::LessThan, 0); + continue; + } + if (Predicate.isNonTruncStore()) { + // We need to check the sizes match here otherwise we could incorrectly + // match truncating stores with non-truncating ones. + InsnMatcher.addPredicate( + 0, MemoryVsLLTSizePredicateMatcher::EqualTo, 0); + } } // No check required. We already did it by swapping the opcode. @@ -3405,6 +3657,10 @@ Expected GlobalISelEmitter::createAndImportSelDAGMatcher( } if (SrcGIEquivOrNull && SrcGIEquivOrNull->getValueAsBit("CheckMMOIsNonAtomic")) InsnMatcher.addPredicate("NotAtomic"); + else if (SrcGIEquivOrNull && SrcGIEquivOrNull->getValueAsBit("CheckMMOIsAtomic")) { + InsnMatcher.addPredicate( + "Unordered", AtomicOrderingMMOPredicateMatcher::AO_OrStronger); + } if (Src->isLeaf()) { Init *SrcInit = Src->getLeafValue(); @@ -3427,8 +3683,43 @@ Expected GlobalISelEmitter::createAndImportSelDAGMatcher( return InsnMatcher; } + // Special case because the operand order is changed from setcc. The + // predicate operand needs to be swapped from the last operand to the first + // source. + + unsigned NumChildren = Src->getNumChildren(); + bool IsFCmp = SrcGIOrNull->TheDef->getName() == "G_FCMP"; + + if (IsFCmp || SrcGIOrNull->TheDef->getName() == "G_ICMP") { + TreePatternNode *SrcChild = Src->getChild(NumChildren - 1); + if (SrcChild->isLeaf()) { + DefInit *DI = dyn_cast(SrcChild->getLeafValue()); + Record *CCDef = DI ? DI->getDef() : nullptr; + if (!CCDef || !CCDef->isSubClassOf("CondCode")) + return failedImport("Unable to handle CondCode"); + + OperandMatcher &OM = + InsnMatcher.addOperand(OpIdx++, SrcChild->getName(), TempOpIdx); + StringRef PredType = IsFCmp ? CCDef->getValueAsString("FCmpPredicate") : + CCDef->getValueAsString("ICmpPredicate"); + + if (!PredType.empty()) { + OM.addPredicate(PredType); + // Process the other 2 operands normally. + --NumChildren; + } + } + } + // Match the used operands (i.e. the children of the operator). - for (unsigned i = 0, e = Src->getNumChildren(); i != e; ++i) { + bool IsIntrinsic = + SrcGIOrNull->TheDef->getName() == "G_INTRINSIC" || + SrcGIOrNull->TheDef->getName() == "G_INTRINSIC_W_SIDE_EFFECTS"; + const CodeGenIntrinsic *II = Src->getIntrinsicInfo(CGP); + if (IsIntrinsic && !II) + return failedImport("Expected IntInit containing intrinsic ID)"); + + for (unsigned i = 0; i != NumChildren; ++i) { TreePatternNode *SrcChild = Src->getChild(i); // SelectionDAG allows pointers to be represented with iN since it doesn't @@ -3436,19 +3727,21 @@ Expected GlobalISelEmitter::createAndImportSelDAGMatcher( // Coerce integers to pointers to address space 0 if the context indicates a pointer. bool OperandIsAPointer = SrcGIOrNull->isOperandAPointer(i); - // For G_INTRINSIC/G_INTRINSIC_W_SIDE_EFFECTS, the operand immediately - // following the defs is an intrinsic ID. - if ((SrcGIOrNull->TheDef->getName() == "G_INTRINSIC" || - SrcGIOrNull->TheDef->getName() == "G_INTRINSIC_W_SIDE_EFFECTS") && - i == 0) { - if (const CodeGenIntrinsic *II = Src->getIntrinsicInfo(CGP)) { + if (IsIntrinsic) { + // For G_INTRINSIC/G_INTRINSIC_W_SIDE_EFFECTS, the operand immediately + // following the defs is an intrinsic ID. + if (i == 0) { OperandMatcher &OM = InsnMatcher.addOperand(OpIdx++, SrcChild->getName(), TempOpIdx); OM.addPredicate(II); continue; } - return failedImport("Expected IntInit containing instrinsic ID)"); + // We have to check intrinsics for llvm_anyptr_ty parameters. + // + // Note that we have to look at the i-1th parameter, because we don't + // have the intrinsic ID in the intrinsic's parameter list. + OperandIsAPointer |= II->isParamAPointer(i - 1); } if (auto Error = @@ -3473,14 +3766,37 @@ Error GlobalISelEmitter::importComplexPatternOperandMatcher( return Error::success(); } +// Get the name to use for a pattern operand. For an anonymous physical register +// input, this should use the register name. +static StringRef getSrcChildName(const TreePatternNode *SrcChild, + Record *&PhysReg) { + StringRef SrcChildName = SrcChild->getName(); + if (SrcChildName.empty() && SrcChild->isLeaf()) { + if (auto *ChildDefInit = dyn_cast(SrcChild->getLeafValue())) { + auto *ChildRec = ChildDefInit->getDef(); + if (ChildRec->isSubClassOf("Register")) { + SrcChildName = ChildRec->getName(); + PhysReg = ChildRec; + } + } + } + + return SrcChildName; +} + Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule, InstructionMatcher &InsnMatcher, const TreePatternNode *SrcChild, bool OperandIsAPointer, unsigned OpIdx, unsigned &TempOpIdx) { - OperandMatcher &OM = - InsnMatcher.addOperand(OpIdx, SrcChild->getName(), TempOpIdx); + + Record *PhysReg = nullptr; + StringRef SrcChildName = getSrcChildName(SrcChild, PhysReg); + + OperandMatcher &OM = PhysReg ? + InsnMatcher.addPhysRegInput(PhysReg, OpIdx, TempOpIdx) : + InsnMatcher.addOperand(OpIdx, SrcChildName, TempOpIdx); if (OM.isSameAsAnotherOperand()) return Error::success(); @@ -3496,6 +3812,10 @@ Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule, OM.addPredicate(); return Error::success(); } + if (SrcChild->getOperator()->getName() == "timm") { + OM.addPredicate(); + return Error::success(); + } } } @@ -3569,6 +3889,20 @@ Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule, return Error::success(); } + if (ChildRec->isSubClassOf("Register")) { + // This just be emitted as a copy to the specific register. + ValueTypeByHwMode VT = ChildTypes.front().getValueTypeByHwMode(); + const CodeGenRegisterClass *RC + = CGRegs.getMinimalPhysRegClass(ChildRec, &VT); + if (!RC) { + return failedImport( + "Could not determine physical register class of pattern source"); + } + + OM.addPredicate(*RC); + return Error::success(); + } + // Check for ValueType. if (ChildRec->isSubClassOf("ValueType")) { // We already added a type check as standard practice so this doesn't need @@ -3631,7 +3965,10 @@ Expected GlobalISelEmitter::importExplicitUseRenderer( // rendered as operands. // FIXME: The target should be able to choose sign-extended when appropriate // (e.g. on Mips). - if (DstChild->getOperator()->getName() == "imm") { + if (DstChild->getOperator()->getName() == "timm") { + DstMIBuilder.addRenderer(DstChild->getName()); + return InsertPt; + } else if (DstChild->getOperator()->getName() == "imm") { DstMIBuilder.addRenderer(DstChild->getName()); return InsertPt; } else if (DstChild->getOperator()->getName() == "fpimm") { @@ -3708,6 +4045,12 @@ Expected GlobalISelEmitter::importExplicitUseRenderer( return InsertPt; } + if (ChildRec->isSubClassOf("SubRegIndex")) { + CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(ChildRec); + DstMIBuilder.addRenderer(SubIdx->EnumValue); + return InsertPt; + } + if (ChildRec->isSubClassOf("ComplexPattern")) { const auto &ComplexPattern = ComplexPatternEquivs.find(ChildRec); if (ComplexPattern == ComplexPatternEquivs.end()) @@ -3729,7 +4072,8 @@ Expected GlobalISelEmitter::importExplicitUseRenderer( } Expected GlobalISelEmitter::createAndImportInstructionRenderer( - RuleMatcher &M, const TreePatternNode *Dst) { + RuleMatcher &M, InstructionMatcher &InsnMatcher, const TreePatternNode *Src, + const TreePatternNode *Dst) { auto InsertPtOrError = createInstructionRenderer(M.actions_end(), M, Dst); if (auto Error = InsertPtOrError.takeError()) return std::move(Error); @@ -3737,6 +4081,17 @@ Expected GlobalISelEmitter::createAndImportInstructionRenderer( action_iterator InsertPt = InsertPtOrError.get(); BuildMIAction &DstMIBuilder = *static_cast(InsertPt->get()); + for (auto PhysInput : InsnMatcher.getPhysRegInputs()) { + InsertPt = M.insertAction( + InsertPt, M.allocateOutputInsnID(), + &Target.getInstruction(RK.getDef("COPY"))); + BuildMIAction &CopyToPhysRegMIBuilder = + *static_cast(InsertPt->get()); + CopyToPhysRegMIBuilder.addRenderer(PhysInput.first, + true); + CopyToPhysRegMIBuilder.addRenderer(PhysInput.first); + } + importExplicitDefRenderers(DstMIBuilder); if (auto Error = importExplicitUseRenderers(InsertPt, M, DstMIBuilder, Dst) @@ -3768,6 +4123,78 @@ GlobalISelEmitter::createAndImportSubInstructionRenderer( if (auto Error = InsertPtOrError.takeError()) return std::move(Error); + // We need to make sure that when we import an INSERT_SUBREG as a + // subinstruction that it ends up being constrained to the correct super + // register and subregister classes. + auto OpName = Target.getInstruction(Dst->getOperator()).TheDef->getName(); + if (OpName == "INSERT_SUBREG") { + auto SubClass = inferRegClassFromPattern(Dst->getChild(1)); + if (!SubClass) + return failedImport( + "Cannot infer register class from INSERT_SUBREG operand #1"); + Optional SuperClass = + inferSuperRegisterClassForNode(Dst->getExtType(0), Dst->getChild(0), + Dst->getChild(2)); + if (!SuperClass) + return failedImport( + "Cannot infer register class for INSERT_SUBREG operand #0"); + // The destination and the super register source of an INSERT_SUBREG must + // be the same register class. + M.insertAction( + InsertPt, DstMIBuilder.getInsnID(), 0, **SuperClass); + M.insertAction( + InsertPt, DstMIBuilder.getInsnID(), 1, **SuperClass); + M.insertAction( + InsertPt, DstMIBuilder.getInsnID(), 2, **SubClass); + return InsertPtOrError.get(); + } + + if (OpName == "EXTRACT_SUBREG") { + // EXTRACT_SUBREG selects into a subregister COPY but unlike most + // instructions, the result register class is controlled by the + // subregisters of the operand. As a result, we must constrain the result + // class rather than check that it's already the right one. + auto SuperClass = inferRegClassFromPattern(Dst->getChild(0)); + if (!SuperClass) + return failedImport( + "Cannot infer register class from EXTRACT_SUBREG operand #0"); + + auto SubIdx = inferSubRegIndexForNode(Dst->getChild(1)); + if (!SubIdx) + return failedImport("EXTRACT_SUBREG child #1 is not a subreg index"); + + const auto &SrcRCDstRCPair = + (*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx); + assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass"); + M.insertAction( + InsertPt, DstMIBuilder.getInsnID(), 0, *SrcRCDstRCPair->second); + M.insertAction( + InsertPt, DstMIBuilder.getInsnID(), 1, *SrcRCDstRCPair->first); + + // We're done with this pattern! It's eligible for GISel emission; return + // it. + return InsertPtOrError.get(); + } + + // Similar to INSERT_SUBREG, we also have to handle SUBREG_TO_REG as a + // subinstruction. + if (OpName == "SUBREG_TO_REG") { + auto SubClass = inferRegClassFromPattern(Dst->getChild(1)); + if (!SubClass) + return failedImport( + "Cannot infer register class from SUBREG_TO_REG child #1"); + auto SuperClass = inferSuperRegisterClass(Dst->getExtType(0), + Dst->getChild(2)); + if (!SuperClass) + return failedImport( + "Cannot infer register class for SUBREG_TO_REG operand #0"); + M.insertAction( + InsertPt, DstMIBuilder.getInsnID(), 0, **SuperClass); + M.insertAction( + InsertPt, DstMIBuilder.getInsnID(), 2, **SubClass); + return InsertPtOrError.get(); + } + M.insertAction(InsertPt, DstMIBuilder.getInsnID()); return InsertPtOrError.get(); @@ -3786,12 +4213,9 @@ Expected GlobalISelEmitter::createInstructionRenderer( // COPY_TO_REGCLASS is just a copy with a ConstrainOperandToRegClassAction // attached. Similarly for EXTRACT_SUBREG except that's a subregister copy. - if (DstI->TheDef->getName() == "COPY_TO_REGCLASS") + StringRef Name = DstI->TheDef->getName(); + if (Name == "COPY_TO_REGCLASS" || Name == "EXTRACT_SUBREG") DstI = &Target.getInstruction(RK.getDef("COPY")); - else if (DstI->TheDef->getName() == "EXTRACT_SUBREG") - DstI = &Target.getInstruction(RK.getDef("COPY")); - else if (DstI->TheDef->getName() == "REG_SEQUENCE") - return failedImport("Unable to emit REG_SEQUENCE"); return M.insertAction(InsertPt, M.allocateOutputInsnID(), DstI); @@ -3812,8 +4236,11 @@ Expected GlobalISelEmitter::importExplicitUseRenderers( const CodeGenInstruction *DstI = DstMIBuilder.getCGI(); CodeGenInstruction *OrigDstI = &Target.getInstruction(Dst->getOperator()); + StringRef Name = OrigDstI->TheDef->getName(); + unsigned ExpectedDstINumUses = Dst->getNumChildren(); + // EXTRACT_SUBREG needs to use a subregister COPY. - if (OrigDstI->TheDef->getName() == "EXTRACT_SUBREG") { + if (Name == "EXTRACT_SUBREG") { if (!Dst->getChild(0)->isLeaf()) return failedImport("EXTRACT_SUBREG child #1 is not a leaf"); @@ -3843,10 +4270,41 @@ Expected GlobalISelEmitter::importExplicitUseRenderers( return failedImport("EXTRACT_SUBREG child #1 is not a subreg index"); } + if (Name == "REG_SEQUENCE") { + if (!Dst->getChild(0)->isLeaf()) + return failedImport("REG_SEQUENCE child #0 is not a leaf"); + + Record *RCDef = getInitValueAsRegClass(Dst->getChild(0)->getLeafValue()); + if (!RCDef) + return failedImport("REG_SEQUENCE child #0 could not " + "be coerced to a register class"); + + if ((ExpectedDstINumUses - 1) % 2 != 0) + return failedImport("Malformed REG_SEQUENCE"); + + for (unsigned I = 1; I != ExpectedDstINumUses; I += 2) { + TreePatternNode *ValChild = Dst->getChild(I); + TreePatternNode *SubRegChild = Dst->getChild(I + 1); + + if (DefInit *SubRegInit = + dyn_cast(SubRegChild->getLeafValue())) { + CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef()); + + auto InsertPtOrError = + importExplicitUseRenderer(InsertPt, M, DstMIBuilder, ValChild); + if (auto Error = InsertPtOrError.takeError()) + return std::move(Error); + InsertPt = InsertPtOrError.get(); + DstMIBuilder.addRenderer(SubIdx); + } + } + + return InsertPt; + } + // Render the explicit uses. unsigned DstINumUses = OrigDstI->Operands.size() - OrigDstI->Operands.NumDefs; - unsigned ExpectedDstINumUses = Dst->getNumChildren(); - if (OrigDstI->TheDef->getName() == "COPY_TO_REGCLASS") { + if (Name == "COPY_TO_REGCLASS") { DstINumUses--; // Ignore the class constraint. ExpectedDstINumUses--; } @@ -3945,6 +4403,126 @@ Error GlobalISelEmitter::importImplicitDefRenderers( return Error::success(); } +Optional +GlobalISelEmitter::getRegClassFromLeaf(TreePatternNode *Leaf) { + assert(Leaf && "Expected node?"); + assert(Leaf->isLeaf() && "Expected leaf?"); + Record *RCRec = getInitValueAsRegClass(Leaf->getLeafValue()); + if (!RCRec) + return None; + CodeGenRegisterClass *RC = CGRegs.getRegClass(RCRec); + if (!RC) + return None; + return RC; +} + +Optional +GlobalISelEmitter::inferRegClassFromPattern(TreePatternNode *N) { + if (!N) + return None; + + if (N->isLeaf()) + return getRegClassFromLeaf(N); + + // We don't have a leaf node, so we have to try and infer something. Check + // that we have an instruction that we an infer something from. + + // Only handle things that produce a single type. + if (N->getNumTypes() != 1) + return None; + Record *OpRec = N->getOperator(); + + // We only want instructions. + if (!OpRec->isSubClassOf("Instruction")) + return None; + + // Don't want to try and infer things when there could potentially be more + // than one candidate register class. + auto &Inst = Target.getInstruction(OpRec); + if (Inst.Operands.NumDefs > 1) + return None; + + // Handle any special-case instructions which we can safely infer register + // classes from. + StringRef InstName = Inst.TheDef->getName(); + bool IsRegSequence = InstName == "REG_SEQUENCE"; + if (IsRegSequence || InstName == "COPY_TO_REGCLASS") { + // If we have a COPY_TO_REGCLASS, then we need to handle it specially. It + // has the desired register class as the first child. + TreePatternNode *RCChild = N->getChild(IsRegSequence ? 0 : 1); + if (!RCChild->isLeaf()) + return None; + return getRegClassFromLeaf(RCChild); + } + + // Handle destination record types that we can safely infer a register class + // from. + const auto &DstIOperand = Inst.Operands[0]; + Record *DstIOpRec = DstIOperand.Rec; + if (DstIOpRec->isSubClassOf("RegisterOperand")) { + DstIOpRec = DstIOpRec->getValueAsDef("RegClass"); + const CodeGenRegisterClass &RC = Target.getRegisterClass(DstIOpRec); + return &RC; + } + + if (DstIOpRec->isSubClassOf("RegisterClass")) { + const CodeGenRegisterClass &RC = Target.getRegisterClass(DstIOpRec); + return &RC; + } + + return None; +} + +Optional +GlobalISelEmitter::inferSuperRegisterClass(const TypeSetByHwMode &Ty, + TreePatternNode *SubRegIdxNode) { + assert(SubRegIdxNode && "Expected subregister index node!"); + // We need a ValueTypeByHwMode for getSuperRegForSubReg. + if (!Ty.isValueTypeByHwMode(false)) + return None; + if (!SubRegIdxNode->isLeaf()) + return None; + DefInit *SubRegInit = dyn_cast(SubRegIdxNode->getLeafValue()); + if (!SubRegInit) + return None; + CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef()); + + // Use the information we found above to find a minimal register class which + // supports the subregister and type we want. + auto RC = + Target.getSuperRegForSubReg(Ty.getValueTypeByHwMode(), CGRegs, SubIdx); + if (!RC) + return None; + return *RC; +} + +Optional +GlobalISelEmitter::inferSuperRegisterClassForNode( + const TypeSetByHwMode &Ty, TreePatternNode *SuperRegNode, + TreePatternNode *SubRegIdxNode) { + assert(SuperRegNode && "Expected super register node!"); + // Check if we already have a defined register class for the super register + // node. If we do, then we should preserve that rather than inferring anything + // from the subregister index node. We can assume that whoever wrote the + // pattern in the first place made sure that the super register and + // subregister are compatible. + if (Optional SuperRegisterClass = + inferRegClassFromPattern(SuperRegNode)) + return *SuperRegisterClass; + return inferSuperRegisterClass(Ty, SubRegIdxNode); +} + +Optional +GlobalISelEmitter::inferSubRegIndexForNode(TreePatternNode *SubRegIdxNode) { + if (!SubRegIdxNode->isLeaf()) + return None; + + DefInit *SubRegInit = dyn_cast(SubRegIdxNode->getLeafValue()); + if (!SubRegInit) + return None; + return CGRegs.getSubRegIdx(SubRegInit->getDef()); +} + Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { // Keep track of the matchers and actions to emit. int Score = P.getPatternComplexity(CGP); @@ -4035,6 +4613,8 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { return failedImport("Pattern operator isn't an instruction"); auto &DstI = Target.getInstruction(DstOp); + StringRef DstIName = DstI.TheDef->getName(); + if (DstI.Operands.NumDefs != Src->getExtTypes().size()) return failedImport("Src pattern results and dst MI defs are different (" + to_string(Src->getExtTypes().size()) + " def(s) vs " + @@ -4048,13 +4628,17 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { const auto &DstIOperand = DstI.Operands[OpIdx]; Record *DstIOpRec = DstIOperand.Rec; - if (DstI.TheDef->getName() == "COPY_TO_REGCLASS") { + if (DstIName == "COPY_TO_REGCLASS") { DstIOpRec = getInitValueAsRegClass(Dst->getChild(1)->getLeafValue()); if (DstIOpRec == nullptr) return failedImport( "COPY_TO_REGCLASS operand #1 isn't a register class"); - } else if (DstI.TheDef->getName() == "EXTRACT_SUBREG") { + } else if (DstIName == "REG_SEQUENCE") { + DstIOpRec = getInitValueAsRegClass(Dst->getChild(0)->getLeafValue()); + if (DstIOpRec == nullptr) + return failedImport("REG_SEQUENCE operand #0 isn't a register class"); + } else if (DstIName == "EXTRACT_SUBREG") { if (!Dst->getChild(0)->isLeaf()) return failedImport("EXTRACT_SUBREG operand #0 isn't a leaf"); @@ -4063,8 +4647,33 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { DstIOpRec = getInitValueAsRegClass(Dst->getChild(0)->getLeafValue()); if (DstIOpRec == nullptr) + return failedImport("EXTRACT_SUBREG operand #0 isn't a register class"); + } else if (DstIName == "INSERT_SUBREG") { + auto MaybeSuperClass = inferSuperRegisterClassForNode( + VTy, Dst->getChild(0), Dst->getChild(2)); + if (!MaybeSuperClass) return failedImport( - "EXTRACT_SUBREG operand #0 isn't a register class"); + "Cannot infer register class for INSERT_SUBREG operand #0"); + // Move to the next pattern here, because the register class we found + // doesn't necessarily have a record associated with it. So, we can't + // set DstIOpRec using this. + OperandMatcher &OM = InsnMatcher.getOperand(OpIdx); + OM.setSymbolicName(DstIOperand.Name); + M.defineOperand(OM.getSymbolicName(), OM); + OM.addPredicate(**MaybeSuperClass); + ++OpIdx; + continue; + } else if (DstIName == "SUBREG_TO_REG") { + auto MaybeRegClass = inferSuperRegisterClass(VTy, Dst->getChild(2)); + if (!MaybeRegClass) + return failedImport( + "Cannot infer register class for SUBREG_TO_REG operand #0"); + OperandMatcher &OM = InsnMatcher.getOperand(OpIdx); + OM.setSymbolicName(DstIOperand.Name); + M.defineOperand(OM.getSymbolicName(), OM); + OM.addPredicate(**MaybeRegClass); + ++OpIdx; + continue; } else if (DstIOpRec->isSubClassOf("RegisterOperand")) DstIOpRec = DstIOpRec->getValueAsDef("RegClass"); else if (!DstIOpRec->isSubClassOf("RegisterClass")) @@ -4079,7 +4688,8 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { ++OpIdx; } - auto DstMIBuilderOrError = createAndImportInstructionRenderer(M, Dst); + auto DstMIBuilderOrError = + createAndImportInstructionRenderer(M, InsnMatcher, Src, Dst); if (auto Error = DstMIBuilderOrError.takeError()) return std::move(Error); BuildMIAction &DstMIBuilder = DstMIBuilderOrError.get(); @@ -4093,7 +4703,7 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { // Constrain the registers to classes. This is normally derived from the // emitted instruction but a few instructions require special handling. - if (DstI.TheDef->getName() == "COPY_TO_REGCLASS") { + if (DstIName == "COPY_TO_REGCLASS") { // COPY_TO_REGCLASS does not provide operand constraints itself but the // result is constrained to the class given by the second child. Record *DstIOpRec = @@ -4111,28 +4721,16 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { return std::move(M); } - if (DstI.TheDef->getName() == "EXTRACT_SUBREG") { - // EXTRACT_SUBREG selects into a subregister COPY but unlike most - // instructions, the result register class is controlled by the - // subregisters of the operand. As a result, we must constrain the result - // class rather than check that it's already the right one. - if (!Dst->getChild(0)->isLeaf()) - return failedImport("EXTRACT_SUBREG child #1 is not a leaf"); + if (DstIName == "EXTRACT_SUBREG") { + auto SuperClass = inferRegClassFromPattern(Dst->getChild(0)); + if (!SuperClass) + return failedImport( + "Cannot infer register class from EXTRACT_SUBREG operand #0"); - DefInit *SubRegInit = dyn_cast(Dst->getChild(1)->getLeafValue()); - if (!SubRegInit) + auto SubIdx = inferSubRegIndexForNode(Dst->getChild(1)); + if (!SubIdx) return failedImport("EXTRACT_SUBREG child #1 is not a subreg index"); - // Constrain the result to the same register bank as the operand. - Record *DstIOpRec = - getInitValueAsRegClass(Dst->getChild(0)->getLeafValue()); - - if (DstIOpRec == nullptr) - return failedImport("EXTRACT_SUBREG operand #1 isn't a register class"); - - CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef()); - CodeGenRegisterClass *SrcRC = CGRegs.getRegClass(DstIOpRec); - // It would be nice to leave this constraint implicit but we're required // to pick a register class so constrain the result to a register class // that can hold the correct MVT. @@ -4143,7 +4741,7 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { "Expected Src of EXTRACT_SUBREG to have one result type"); const auto &SrcRCDstRCPair = - SrcRC->getMatchingSubClassWithSubRegs(CGRegs, SubIdx); + (*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx); assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass"); M.addAction(0, 0, *SrcRCDstRCPair->second); M.addAction(0, 1, *SrcRCDstRCPair->first); @@ -4154,6 +4752,51 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { return std::move(M); } + if (DstIName == "INSERT_SUBREG") { + assert(Src->getExtTypes().size() == 1 && + "Expected Src of INSERT_SUBREG to have one result type"); + // We need to constrain the destination, a super regsister source, and a + // subregister source. + auto SubClass = inferRegClassFromPattern(Dst->getChild(1)); + if (!SubClass) + return failedImport( + "Cannot infer register class from INSERT_SUBREG operand #1"); + auto SuperClass = inferSuperRegisterClassForNode( + Src->getExtType(0), Dst->getChild(0), Dst->getChild(2)); + if (!SuperClass) + return failedImport( + "Cannot infer register class for INSERT_SUBREG operand #0"); + M.addAction(0, 0, **SuperClass); + M.addAction(0, 1, **SuperClass); + M.addAction(0, 2, **SubClass); + ++NumPatternImported; + return std::move(M); + } + + if (DstIName == "SUBREG_TO_REG") { + // We need to constrain the destination and subregister source. + assert(Src->getExtTypes().size() == 1 && + "Expected Src of SUBREG_TO_REG to have one result type"); + + // Attempt to infer the subregister source from the first child. If it has + // an explicitly given register class, we'll use that. Otherwise, we will + // fail. + auto SubClass = inferRegClassFromPattern(Dst->getChild(1)); + if (!SubClass) + return failedImport( + "Cannot infer register class from SUBREG_TO_REG child #1"); + // We don't have a child to look at that might have a super register node. + auto SuperClass = + inferSuperRegisterClass(Src->getExtType(0), Dst->getChild(2)); + if (!SuperClass) + return failedImport( + "Cannot infer register class for SUBREG_TO_REG operand #0"); + M.addAction(0, 0, **SuperClass); + M.addAction(0, 2, **SubClass); + ++NumPatternImported; + return std::move(M); + } + M.addAction(0); // We're done with this pattern! It's eligible for GISel emission; return it. @@ -4235,7 +4878,7 @@ std::vector GlobalISelEmitter::optimizeRules( std::vector> &MatcherStorage) { std::vector OptRules; - std::unique_ptr CurrentGroup = make_unique(); + std::unique_ptr CurrentGroup = std::make_unique(); assert(CurrentGroup->empty() && "Newly created group isn't empty!"); unsigned NumGroups = 0; @@ -4256,7 +4899,7 @@ std::vector GlobalISelEmitter::optimizeRules( MatcherStorage.emplace_back(std::move(CurrentGroup)); ++NumGroups; } - CurrentGroup = make_unique(); + CurrentGroup = std::make_unique(); }; for (Matcher *Rule : Rules) { // Greedily add as many matchers as possible to the current group: diff --git a/utils/TableGen/InfoByHwMode.cpp b/utils/TableGen/InfoByHwMode.cpp index d9662889a5d..7cd1b0f0813 100644 --- a/utils/TableGen/InfoByHwMode.cpp +++ b/utils/TableGen/InfoByHwMode.cpp @@ -192,6 +192,17 @@ void RegSizeInfoByHwMode::writeToStream(raw_ostream &OS) const { OS << '}'; } +EncodingInfoByHwMode::EncodingInfoByHwMode(Record *R, const CodeGenHwModes &CGH) { + const HwModeSelect &MS = CGH.getHwModeSelect(R); + for (const HwModeSelect::PairType &P : MS.Items) { + assert(P.second && P.second->isSubClassOf("InstructionEncoding") && + "Encoding must subclass InstructionEncoding"); + auto I = Map.insert({P.first, P.second}); + assert(I.second && "Duplicate entry?"); + (void)I; + } +} + namespace llvm { raw_ostream &operator<<(raw_ostream &OS, const ValueTypeByHwMode &T) { T.writeToStream(OS); diff --git a/utils/TableGen/InfoByHwMode.h b/utils/TableGen/InfoByHwMode.h index 9e5cc3d5f2a..d92e5901a7f 100644 --- a/utils/TableGen/InfoByHwMode.h +++ b/utils/TableGen/InfoByHwMode.h @@ -184,6 +184,11 @@ raw_ostream &operator<<(raw_ostream &OS, const ValueTypeByHwMode &T); raw_ostream &operator<<(raw_ostream &OS, const RegSizeInfo &T); raw_ostream &operator<<(raw_ostream &OS, const RegSizeInfoByHwMode &T); +struct EncodingInfoByHwMode : public InfoByHwMode { + EncodingInfoByHwMode(Record *R, const CodeGenHwModes &CGH); + EncodingInfoByHwMode() = default; +}; + } // namespace llvm #endif // LLVM_UTILS_TABLEGEN_INFOBYHWMODE_H diff --git a/utils/TableGen/InstrDocsEmitter.cpp b/utils/TableGen/InstrDocsEmitter.cpp index 91c457ba08f..45fa936b957 100644 --- a/utils/TableGen/InstrDocsEmitter.cpp +++ b/utils/TableGen/InstrDocsEmitter.cpp @@ -231,4 +231,4 @@ void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) { } } -} // end llvm namespace +} // end namespace llvm diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp index 2d367f538b7..300ba36a700 100644 --- a/utils/TableGen/InstrInfoEmitter.cpp +++ b/utils/TableGen/InstrInfoEmitter.cpp @@ -332,6 +332,10 @@ void InstrInfoEmitter::emitOperandTypeMappings( StringRef Namespace = Target.getInstNamespace(); std::vector Operands = Records.getAllDerivedDefinitions("Operand"); + std::vector RegisterOperands = + Records.getAllDerivedDefinitions("RegisterOperand"); + std::vector RegisterClasses = + Records.getAllDerivedDefinitions("RegisterClass"); OS << "#ifdef GET_INSTRINFO_OPERAND_TYPES_ENUM\n"; OS << "#undef GET_INSTRINFO_OPERAND_TYPES_ENUM\n"; @@ -341,10 +345,13 @@ void InstrInfoEmitter::emitOperandTypeMappings( OS << "enum OperandType {\n"; unsigned EnumVal = 0; - for (const Record *Op : Operands) { - if (!Op->isAnonymous()) - OS << " " << Op->getName() << " = " << EnumVal << ",\n"; - ++EnumVal; + for (const std::vector *RecordsToAdd : + {&Operands, &RegisterOperands, &RegisterClasses}) { + for (const Record *Op : *RecordsToAdd) { + if (!Op->isAnonymous()) + OS << " " << Op->getName() << " = " << EnumVal << ",\n"; + ++EnumVal; + } } OS << " OPERAND_TYPE_LIST_END" << "\n};\n"; @@ -358,7 +365,8 @@ void InstrInfoEmitter::emitOperandTypeMappings( OS << "namespace llvm {\n"; OS << "namespace " << Namespace << " {\n"; OS << "LLVM_READONLY\n"; - OS << "int getOperandType(uint16_t Opcode, uint16_t OpIdx) {\n"; + OS << "static int getOperandType(uint16_t Opcode, uint16_t OpIdx) {\n"; + // TODO: Factor out instructions with same operands to compress the tables. if (!NumberedInstructions.empty()) { std::vector OperandOffsets; std::vector OperandRecords; @@ -399,7 +407,10 @@ void InstrInfoEmitter::emitOperandTypeMappings( OS << "/**/\n "; } Record *OpR = OperandRecords[I]; - if (OpR->isSubClassOf("Operand") && !OpR->isAnonymous()) + if ((OpR->isSubClassOf("Operand") || + OpR->isSubClassOf("RegisterOperand") || + OpR->isSubClassOf("RegisterClass")) && + !OpR->isAnonymous()) OS << "OpTypes::" << OpR->getName(); else OS << -1; @@ -414,7 +425,7 @@ void InstrInfoEmitter::emitOperandTypeMappings( OS << "}\n"; OS << "} // end namespace " << Namespace << "\n"; OS << "} // end namespace llvm\n"; - OS << "#endif //GET_INSTRINFO_OPERAND_TYPE\n\n"; + OS << "#endif // GET_INSTRINFO_OPERAND_TYPE\n\n"; } void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS, @@ -436,8 +447,8 @@ void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS, << "(const MCInst &MI);\n"; } - OS << "\n} // end " << TargetName << "_MC namespace\n"; - OS << "} // end llvm namespace\n\n"; + OS << "\n} // end namespace " << TargetName << "_MC\n"; + OS << "} // end namespace llvm\n\n"; OS << "#endif // GET_INSTRINFO_MC_HELPER_DECLS\n\n"; @@ -459,8 +470,8 @@ void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS, OS << "\n}\n\n"; } - OS << "} // end " << TargetName << "_MC namespace\n"; - OS << "} // end llvm namespace\n\n"; + OS << "} // end namespace " << TargetName << "_MC\n"; + OS << "} // end namespace llvm\n\n"; OS << "#endif // GET_GENISTRINFO_MC_HELPERS\n"; } @@ -576,7 +587,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) { << TargetName << "InstrNameIndices, " << TargetName << "InstrNameData, " << NumberedInstructions.size() << ");\n}\n\n"; - OS << "} // end llvm namespace\n"; + OS << "} // end namespace llvm\n"; OS << "#endif // GET_INSTRINFO_MC_DESC\n\n"; @@ -592,7 +603,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) { << " ~" << ClassName << "() override = default;\n"; - OS << "\n};\n} // end llvm namespace\n"; + OS << "\n};\n} // end namespace llvm\n"; OS << "#endif // GET_INSTRINFO_HEADER\n\n"; @@ -620,7 +631,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) { << " InitMCInstrInfo(" << TargetName << "Insts, " << TargetName << "InstrNameIndices, " << TargetName << "InstrNameData, " << NumberedInstructions.size() << ");\n}\n"; - OS << "} // end llvm namespace\n"; + OS << "} // end namespace llvm\n"; OS << "#endif // GET_INSTRINFO_CTOR_DTOR\n\n"; @@ -651,6 +662,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num, CodeGenTarget &Target = CDP.getTargetInfo(); // Emit all of the target independent flags... + if (Inst.isPreISelOpcode) OS << "|(1ULL<TheDef->getName() << "\t= " << Num++ << ",\n"; OS << " INSTRUCTION_LIST_END = " << Num << "\n"; OS << " };\n\n"; - OS << "} // end " << Namespace << " namespace\n"; - OS << "} // end llvm namespace\n"; + OS << "} // end namespace " << Namespace << "\n"; + OS << "} // end namespace llvm\n"; OS << "#endif // GET_INSTRINFO_ENUM\n\n"; OS << "#ifdef GET_INSTRINFO_SCHED_ENUM\n"; @@ -780,9 +792,9 @@ void InstrInfoEmitter::emitEnums(raw_ostream &OS) { OS << " " << Class.Name << "\t= " << Num++ << ",\n"; OS << " SCHED_LIST_END = " << Num << "\n"; OS << " };\n"; - OS << "} // end Sched namespace\n"; - OS << "} // end " << Namespace << " namespace\n"; - OS << "} // end llvm namespace\n"; + OS << "} // end namespace Sched\n"; + OS << "} // end namespace " << Namespace << "\n"; + OS << "} // end namespace llvm\n"; OS << "#endif // GET_INSTRINFO_SCHED_ENUM\n\n"; } @@ -794,4 +806,4 @@ void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS) { EmitMapTable(RK, OS); } -} // end llvm namespace +} // end namespace llvm diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp index 979af98f676..e01f91c2045 100644 --- a/utils/TableGen/IntrinsicEmitter.cpp +++ b/utils/TableGen/IntrinsicEmitter.cpp @@ -220,7 +220,11 @@ enum IIT_Info { IIT_STRUCT7 = 39, IIT_STRUCT8 = 40, IIT_F128 = 41, - IIT_VEC_ELEMENT = 42 + IIT_VEC_ELEMENT = 42, + IIT_SCALABLE_VEC = 43, + IIT_SUBDIVIDE2_ARG = 44, + IIT_SUBDIVIDE4_ARG = 45, + IIT_VEC_OF_BITCASTS_TO_INT = 46 }; static void EncodeFixedValueType(MVT::SimpleValueType VT, @@ -292,6 +296,12 @@ static void EncodeFixedType(Record *R, std::vector &ArgCodes, Sig.push_back(IIT_PTR_TO_ELT); else if (R->isSubClassOf("LLVMVectorElementType")) Sig.push_back(IIT_VEC_ELEMENT); + else if (R->isSubClassOf("LLVMSubdivide2VectorType")) + Sig.push_back(IIT_SUBDIVIDE2_ARG); + else if (R->isSubClassOf("LLVMSubdivide4VectorType")) + Sig.push_back(IIT_SUBDIVIDE4_ARG); + else if (R->isSubClassOf("LLVMVectorOfBitcastsToInt")) + Sig.push_back(IIT_VEC_OF_BITCASTS_TO_INT); else Sig.push_back(IIT_ARG); return Sig.push_back((Number << 3) | 7 /*IITDescriptor::AK_MatchType*/); @@ -339,6 +349,8 @@ static void EncodeFixedType(Record *R, std::vector &ArgCodes, if (MVT(VT).isVector()) { MVT VVT = VT; + if (VVT.isScalableVector()) + Sig.push_back(IIT_SCALABLE_VEC); switch (VVT.getVectorNumElements()) { default: PrintFatalError("unhandled vector type width in intrinsic!"); case 1: Sig.push_back(IIT_V1); break; @@ -647,6 +659,12 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints, OS << "Attribute::NoCapture"; addComma = true; break; + case CodeGenIntrinsic::NoAlias: + if (addComma) + OS << ","; + OS << "Attribute::NoAlias"; + addComma = true; + break; case CodeGenIntrinsic::Returned: if (addComma) OS << ","; diff --git a/utils/TableGen/RISCVCompressInstEmitter.cpp b/utils/TableGen/RISCVCompressInstEmitter.cpp index e62f528ebc2..2f1d3898f18 100644 --- a/utils/TableGen/RISCVCompressInstEmitter.cpp +++ b/utils/TableGen/RISCVCompressInstEmitter.cpp @@ -411,12 +411,8 @@ void RISCVCompressInstEmitter::evaluateCompressPat(Record *Rec) { assert(SourceDag && "Missing 'Input' in compress pattern!"); LLVM_DEBUG(dbgs() << "Input: " << *SourceDag << "\n"); - DefInit *OpDef = dyn_cast(SourceDag->getOperator()); - if (!OpDef) - PrintFatalError(Rec->getLoc(), - Rec->getName() + " has unexpected operator type!"); // Checking we are transforming from compressed to uncompressed instructions. - Record *Operator = OpDef->getDef(); + Record *Operator = SourceDag->getOperatorAsDef(Rec->getLoc()); if (!Operator->isSubClassOf("RVInst")) PrintFatalError(Rec->getLoc(), "Input instruction '" + Operator->getName() + "' is not a 32 bit wide instruction!"); @@ -428,12 +424,7 @@ void RISCVCompressInstEmitter::evaluateCompressPat(Record *Rec) { assert(DestDag && "Missing 'Output' in compress pattern!"); LLVM_DEBUG(dbgs() << "Output: " << *DestDag << "\n"); - DefInit *DestOpDef = dyn_cast(DestDag->getOperator()); - if (!DestOpDef) - PrintFatalError(Rec->getLoc(), - Rec->getName() + " has unexpected operator type!"); - - Record *DestOperator = DestOpDef->getDef(); + Record *DestOperator = DestDag->getOperatorAsDef(Rec->getLoc()); if (!DestOperator->isSubClassOf("RVInst16")) PrintFatalError(Rec->getLoc(), "Output instruction '" + DestOperator->getName() + diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp index 1b619072c81..513cd14e0fa 100644 --- a/utils/TableGen/RegisterInfoEmitter.cpp +++ b/utils/TableGen/RegisterInfoEmitter.cpp @@ -888,7 +888,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target, // Keep track of sub-register names as well. These are not differentially // encoded. typedef SmallVector SubRegIdxVec; - SequenceToOffsetTable> SubRegIdxSeqs; + SequenceToOffsetTable>> SubRegIdxSeqs; SmallVector SubRegIdxLists(Regs.size()); SequenceToOffsetTable RegStrings; @@ -1315,7 +1315,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target, // Compress the sub-reg index lists. typedef std::vector IdxList; SmallVector SuperRegIdxLists(RegisterClasses.size()); - SequenceToOffsetTable> SuperRegIdxSeqs; + SequenceToOffsetTable>> SuperRegIdxSeqs; BitVector MaskBV(RegisterClasses.size()); for (const auto &RC : RegisterClasses) { diff --git a/utils/TableGen/SearchableTableEmitter.cpp b/utils/TableGen/SearchableTableEmitter.cpp index 954b63e7253..f08f8aa0195 100644 --- a/utils/TableGen/SearchableTableEmitter.cpp +++ b/utils/TableGen/SearchableTableEmitter.cpp @@ -134,7 +134,7 @@ private: CodeGenIntrinsic &getIntrinsic(Init *I) { std::unique_ptr &Intr = Intrinsics[I]; if (!Intr) - Intr = make_unique(cast(I)->getDef()); + Intr = std::make_unique(cast(I)->getDef()); return *Intr; } @@ -496,7 +496,7 @@ void SearchableTableEmitter::emitGenericTable(const GenericTable &Table, emitIfdef((Twine("GET_") + Table.PreprocessorGuard + "_IMPL").str(), OS); // The primary data table contains all the fields defined for this map. - OS << "const " << Table.CppTypeName << " " << Table.Name << "[] = {\n"; + OS << "constexpr " << Table.CppTypeName << " " << Table.Name << "[] = {\n"; for (unsigned i = 0; i < Table.Entries.size(); ++i) { Record *Entry = Table.Entries[i]; OS << " { "; @@ -541,7 +541,7 @@ std::unique_ptr SearchableTableEmitter::parseSearchIndex(GenericTable &Table, StringRef Name, const std::vector &Key, bool EarlyOut) { - auto Index = llvm::make_unique(); + auto Index = std::make_unique(); Index->Name = Name; Index->EarlyOut = EarlyOut; @@ -577,7 +577,7 @@ void SearchableTableEmitter::collectEnumEntries( if (!ValueField.empty()) Value = getInt(EntryRec, ValueField); - Enum.Entries.push_back(llvm::make_unique(Name, Value)); + Enum.Entries.push_back(std::make_unique(Name, Value)); Enum.EntryMap.insert(std::make_pair(EntryRec, Enum.Entries.back().get())); } @@ -647,7 +647,7 @@ void SearchableTableEmitter::run(raw_ostream &OS) { if (!EnumRec->isValueUnset("ValueField")) ValueField = EnumRec->getValueAsString("ValueField"); - auto Enum = llvm::make_unique(); + auto Enum = std::make_unique(); Enum->Name = EnumRec->getName(); Enum->PreprocessorGuard = EnumRec->getName(); @@ -664,7 +664,7 @@ void SearchableTableEmitter::run(raw_ostream &OS) { } for (auto TableRec : Records.getAllDerivedDefinitions("GenericTable")) { - auto Table = llvm::make_unique(); + auto Table = std::make_unique(); Table->Name = TableRec->getName(); Table->PreprocessorGuard = TableRec->getName(); Table->CppTypeName = TableRec->getValueAsString("CppTypeName"); @@ -733,7 +733,7 @@ void SearchableTableEmitter::run(raw_ostream &OS) { if (!Class->isValueUnset("EnumValueField")) ValueField = Class->getValueAsString("EnumValueField"); - auto Enum = llvm::make_unique(); + auto Enum = std::make_unique(); Enum->Name = (Twine(Class->getName()) + "Values").str(); Enum->PreprocessorGuard = Class->getName().upper(); Enum->Class = Class; @@ -743,7 +743,7 @@ void SearchableTableEmitter::run(raw_ostream &OS) { Enums.emplace_back(std::move(Enum)); } - auto Table = llvm::make_unique(); + auto Table = std::make_unique(); Table->Name = (Twine(Class->getName()) + "sList").str(); Table->PreprocessorGuard = Class->getName().upper(); Table->CppTypeName = Class->getName(); diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp index 9ce2b3b275c..9b094adb7d5 100644 --- a/utils/TableGen/SubtargetEmitter.cpp +++ b/utils/TableGen/SubtargetEmitter.cpp @@ -1057,6 +1057,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel, LLVM_DEBUG(dbgs() << ProcModel.ModelName << " does not have resources for class " << SC.Name << '\n'); + SCDesc.NumMicroOps = MCSchedClassDesc::InvalidNumMicroOps; } } // Sum resources across all operand writes. @@ -1728,7 +1729,7 @@ void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) { << " const MCInst *MI, unsigned CPUID) {\n"; emitSchedModelHelpersImpl(OS, /* OnlyExpandMCPredicates */ true); OS << "}\n"; - OS << "} // end of namespace " << Target << "_MC\n\n"; + OS << "} // end namespace " << Target << "_MC\n\n"; OS << "struct " << Target << "GenMCSubtargetInfo : public MCSubtargetInfo {\n"; @@ -1746,7 +1747,10 @@ void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) { << " return " << Target << "_MC" << "::resolveVariantSchedClassImpl(SchedClass, MI, CPUID); \n"; OS << " }\n"; + if (TGT.getHwModes().getNumModeIds() > 1) + OS << " unsigned getHwMode() const override;\n"; OS << "};\n"; + EmitHwModeCheck(Target + "GenMCSubtargetInfo", OS); } void SubtargetEmitter::EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS) { @@ -1858,7 +1862,7 @@ void SubtargetEmitter::run(raw_ostream &OS) { OS << "namespace " << Target << "_MC {\n" << "unsigned resolveVariantSchedClassImpl(unsigned SchedClass," << " const MCInst *MI, unsigned CPUID);\n" - << "}\n\n"; + << "} // end namespace " << Target << "_MC\n\n"; OS << "struct " << ClassName << " : public TargetSubtargetInfo {\n" << " explicit " << ClassName << "(const Triple &TT, StringRef CPU, " << "StringRef FS);\n" diff --git a/utils/TableGen/SubtargetFeatureInfo.cpp b/utils/TableGen/SubtargetFeatureInfo.cpp index edf0b4a01c6..5430f73d5e0 100644 --- a/utils/TableGen/SubtargetFeatureInfo.cpp +++ b/utils/TableGen/SubtargetFeatureInfo.cpp @@ -38,6 +38,10 @@ SubtargetFeatureInfo::getAll(const RecordKeeper &Records) { if (Pred->getName().empty()) PrintFatalError(Pred->getLoc(), "Predicate has no name!"); + // Ignore always true predicates. + if (Pred->getValueAsString("CondString").empty()) + continue; + SubtargetFeatures.emplace_back( Pred, SubtargetFeatureInfo(Pred, SubtargetFeatures.size())); } @@ -95,9 +99,11 @@ void SubtargetFeatureInfo::emitComputeAvailableFeatures( OS << " PredicateBitset Features;\n"; for (const auto &SF : SubtargetFeatures) { const SubtargetFeatureInfo &SFI = SF.second; + StringRef CondStr = SFI.TheDef->getValueAsString("CondString"); + assert(!CondStr.empty() && "true predicate should have been filtered"); - OS << " if (" << SFI.TheDef->getValueAsString("CondString") << ")\n"; - OS << " Features[" << SFI.getEnumBitName() << "] = 1;\n"; + OS << " if (" << CondStr << ")\n"; + OS << " Features.set(" << SFI.getEnumBitName() << ");\n"; } OS << " return Features;\n"; OS << "}\n\n"; @@ -142,7 +148,7 @@ void SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures( } while (true); OS << ")\n"; - OS << " Features[" << SFI.getEnumBitName() << "] = 1;\n"; + OS << " Features.set(" << SFI.getEnumBitName() << ");\n"; } OS << " return Features;\n"; OS << "}\n\n"; diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp index c485ed2feb7..f730d91160a 100644 --- a/utils/TableGen/TableGen.cpp +++ b/utils/TableGen/TableGen.cpp @@ -49,10 +49,12 @@ enum ActionType { GenAttributes, GenSearchableTables, GenGlobalISel, + GenGICombiner, GenX86EVEX2VEXTables, GenX86FoldTables, GenRegisterBank, GenExegesis, + GenAutomata, }; namespace llvm { @@ -62,75 +64,75 @@ bool TimeRegions = false; } // end namespace llvm namespace { - cl::opt - Action(cl::desc("Action to perform:"), - cl::values(clEnumValN(PrintRecords, "print-records", - "Print all records to stdout (default)"), - clEnumValN(DumpJSON, "dump-json", - "Dump all records as machine-readable JSON"), - clEnumValN(GenEmitter, "gen-emitter", - "Generate machine code emitter"), - clEnumValN(GenRegisterInfo, "gen-register-info", - "Generate registers and register classes info"), - clEnumValN(GenInstrInfo, "gen-instr-info", - "Generate instruction descriptions"), - clEnumValN(GenInstrDocs, "gen-instr-docs", - "Generate instruction documentation"), - clEnumValN(GenCallingConv, "gen-callingconv", - "Generate calling convention descriptions"), - clEnumValN(GenAsmWriter, "gen-asm-writer", - "Generate assembly writer"), - clEnumValN(GenDisassembler, "gen-disassembler", - "Generate disassembler"), - clEnumValN(GenPseudoLowering, "gen-pseudo-lowering", - "Generate pseudo instruction lowering"), - clEnumValN(GenCompressInst, "gen-compress-inst-emitter", - "Generate RISCV compressed instructions."), - clEnumValN(GenAsmMatcher, "gen-asm-matcher", - "Generate assembly instruction matcher"), - clEnumValN(GenDAGISel, "gen-dag-isel", - "Generate a DAG instruction selector"), - clEnumValN(GenDFAPacketizer, "gen-dfa-packetizer", - "Generate DFA Packetizer for VLIW targets"), - clEnumValN(GenFastISel, "gen-fast-isel", - "Generate a \"fast\" instruction selector"), - clEnumValN(GenSubtarget, "gen-subtarget", - "Generate subtarget enumerations"), - clEnumValN(GenIntrinsicEnums, "gen-intrinsic-enums", - "Generate intrinsic enums"), - clEnumValN(GenIntrinsicImpl, "gen-intrinsic-impl", - "Generate intrinsic information"), - clEnumValN(GenTgtIntrinsicEnums, "gen-tgt-intrinsic-enums", - "Generate target intrinsic enums"), - clEnumValN(GenTgtIntrinsicImpl, "gen-tgt-intrinsic-impl", - "Generate target intrinsic information"), - clEnumValN(PrintEnums, "print-enums", - "Print enum values for a class"), - clEnumValN(PrintSets, "print-sets", - "Print expanded sets for testing DAG exprs"), - clEnumValN(GenOptParserDefs, "gen-opt-parser-defs", - "Generate option definitions"), - clEnumValN(GenCTags, "gen-ctags", - "Generate ctags-compatible index"), - clEnumValN(GenAttributes, "gen-attrs", - "Generate attributes"), - clEnumValN(GenSearchableTables, "gen-searchable-tables", - "Generate generic binary-searchable table"), - clEnumValN(GenGlobalISel, "gen-global-isel", - "Generate GlobalISel selector"), - clEnumValN(GenX86EVEX2VEXTables, "gen-x86-EVEX2VEX-tables", - "Generate X86 EVEX to VEX compress tables"), - clEnumValN(GenX86FoldTables, "gen-x86-fold-tables", - "Generate X86 fold tables"), - clEnumValN(GenRegisterBank, "gen-register-bank", - "Generate registers bank descriptions"), - clEnumValN(GenExegesis, "gen-exegesis", - "Generate llvm-exegesis tables"))); +cl::opt Action( + cl::desc("Action to perform:"), + cl::values( + clEnumValN(PrintRecords, "print-records", + "Print all records to stdout (default)"), + clEnumValN(DumpJSON, "dump-json", + "Dump all records as machine-readable JSON"), + clEnumValN(GenEmitter, "gen-emitter", "Generate machine code emitter"), + clEnumValN(GenRegisterInfo, "gen-register-info", + "Generate registers and register classes info"), + clEnumValN(GenInstrInfo, "gen-instr-info", + "Generate instruction descriptions"), + clEnumValN(GenInstrDocs, "gen-instr-docs", + "Generate instruction documentation"), + clEnumValN(GenCallingConv, "gen-callingconv", + "Generate calling convention descriptions"), + clEnumValN(GenAsmWriter, "gen-asm-writer", "Generate assembly writer"), + clEnumValN(GenDisassembler, "gen-disassembler", + "Generate disassembler"), + clEnumValN(GenPseudoLowering, "gen-pseudo-lowering", + "Generate pseudo instruction lowering"), + clEnumValN(GenCompressInst, "gen-compress-inst-emitter", + "Generate RISCV compressed instructions."), + clEnumValN(GenAsmMatcher, "gen-asm-matcher", + "Generate assembly instruction matcher"), + clEnumValN(GenDAGISel, "gen-dag-isel", + "Generate a DAG instruction selector"), + clEnumValN(GenDFAPacketizer, "gen-dfa-packetizer", + "Generate DFA Packetizer for VLIW targets"), + clEnumValN(GenFastISel, "gen-fast-isel", + "Generate a \"fast\" instruction selector"), + clEnumValN(GenSubtarget, "gen-subtarget", + "Generate subtarget enumerations"), + clEnumValN(GenIntrinsicEnums, "gen-intrinsic-enums", + "Generate intrinsic enums"), + clEnumValN(GenIntrinsicImpl, "gen-intrinsic-impl", + "Generate intrinsic information"), + clEnumValN(GenTgtIntrinsicEnums, "gen-tgt-intrinsic-enums", + "Generate target intrinsic enums"), + clEnumValN(GenTgtIntrinsicImpl, "gen-tgt-intrinsic-impl", + "Generate target intrinsic information"), + clEnumValN(PrintEnums, "print-enums", "Print enum values for a class"), + clEnumValN(PrintSets, "print-sets", + "Print expanded sets for testing DAG exprs"), + clEnumValN(GenOptParserDefs, "gen-opt-parser-defs", + "Generate option definitions"), + clEnumValN(GenCTags, "gen-ctags", "Generate ctags-compatible index"), + clEnumValN(GenAttributes, "gen-attrs", "Generate attributes"), + clEnumValN(GenSearchableTables, "gen-searchable-tables", + "Generate generic binary-searchable table"), + clEnumValN(GenGlobalISel, "gen-global-isel", + "Generate GlobalISel selector"), + clEnumValN(GenGICombiner, "gen-global-isel-combiner", + "Generate GlobalISel combiner"), + clEnumValN(GenX86EVEX2VEXTables, "gen-x86-EVEX2VEX-tables", + "Generate X86 EVEX to VEX compress tables"), + clEnumValN(GenX86FoldTables, "gen-x86-fold-tables", + "Generate X86 fold tables"), + clEnumValN(GenRegisterBank, "gen-register-bank", + "Generate registers bank descriptions"), + clEnumValN(GenExegesis, "gen-exegesis", + "Generate llvm-exegesis tables"), + clEnumValN(GenAutomata, "gen-automata", + "Generate generic automata"))); - cl::OptionCategory PrintEnumsCat("Options for -print-enums"); - cl::opt - Class("class", cl::desc("Print Enum list for this class"), - cl::value_desc("class name"), cl::cat(PrintEnumsCat)); +cl::OptionCategory PrintEnumsCat("Options for -print-enums"); +cl::opt Class("class", cl::desc("Print Enum list for this class"), + cl::value_desc("class name"), + cl::cat(PrintEnumsCat)); cl::opt TimeRegionsOpt("time-regions", @@ -235,6 +237,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenGlobalISel: EmitGlobalISel(Records, OS); break; + case GenGICombiner: + EmitGICombiner(Records, OS); + break; case GenRegisterBank: EmitRegisterBank(Records, OS); break; @@ -247,6 +252,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenExegesis: EmitExegesis(Records, OS); break; + case GenAutomata: + EmitAutomata(Records, OS); + break; } return false; @@ -263,11 +271,16 @@ int main(int argc, char **argv) { return TableGenMain(argv[0], &LLVMTableGenMain); } -#ifdef __has_feature -#if __has_feature(address_sanitizer) +#ifndef __has_feature +#define __has_feature(x) 0 +#endif + +#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) || \ + __has_feature(leak_sanitizer) + #include // Disable LeakSanitizer for this binary as it has too many leaks that are not // very interesting to fix. See compiler-rt/include/sanitizer/lsan_interface.h . LLVM_ATTRIBUTE_USED int __lsan_is_turned_off() { return 1; } -#endif // __has_feature(address_sanitizer) -#endif // defined(__has_feature) + +#endif diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h index 135ec65c0f9..8c067dd51b3 100644 --- a/utils/TableGen/TableGenBackends.h +++ b/utils/TableGen/TableGenBackends.h @@ -85,10 +85,12 @@ void EmitCTags(RecordKeeper &RK, raw_ostream &OS); void EmitAttributes(RecordKeeper &RK, raw_ostream &OS); void EmitSearchableTables(RecordKeeper &RK, raw_ostream &OS); void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS); +void EmitGICombiner(RecordKeeper &RK, raw_ostream &OS); void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS); void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS); void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS); void EmitExegesis(RecordKeeper &RK, raw_ostream &OS); +void EmitAutomata(RecordKeeper &RK, raw_ostream &OS); } // End llvm namespace diff --git a/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp b/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp index 365cba5a60c..54aa5a8164f 100644 --- a/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp +++ b/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp @@ -167,7 +167,7 @@ void emitWebAssemblyDisassemblerTables( OS << " },\n"; } OS << " { 0, nullptr }\n};\n\n"; - OS << "} // End llvm namespace\n"; + OS << "} // end namespace llvm\n"; } } // namespace llvm diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp index 8036aecc4f4..14bce4c2944 100644 --- a/utils/TableGen/X86DisassemblerTables.cpp +++ b/utils/TableGen/X86DisassemblerTables.cpp @@ -651,7 +651,7 @@ static const char* stringForDecisionType(ModRMDecisionType dt) { DisassemblerTables::DisassemblerTables() { for (unsigned i = 0; i < array_lengthof(Tables); i++) - Tables[i] = llvm::make_unique(); + Tables[i] = std::make_unique(); HasConflicts = false; } diff --git a/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp index 3df14f40e4a..6dc7e31e0da 100644 --- a/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp +++ b/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp @@ -98,6 +98,7 @@ public: bool EVEX_W1_VEX_W0 = RecE->getValueAsBit("EVEX_W1_VEX_W0"); if (RecV->getValueAsDef("OpEnc")->getName().str() != "EncVEX" || + RecV->getValueAsBit("isCodeGenOnly") != RecE->getValueAsBit("isCodeGenOnly") || // VEX/EVEX fields RecV->getValueAsDef("OpPrefix") != RecE->getValueAsDef("OpPrefix") || RecV->getValueAsDef("OpMap") != RecE->getValueAsDef("OpMap") || diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp index ab8a8855c47..33dc6f3f9e2 100644 --- a/utils/TableGen/X86RecognizableInstr.cpp +++ b/utils/TableGen/X86RecognizableInstr.cpp @@ -749,7 +749,7 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { case X86Local::RawFrmImm8: case X86Local::RawFrmImm16: case X86Local::AddCCFrm: - filter = llvm::make_unique(); + filter = std::make_unique(); break; case X86Local::MRMDestReg: case X86Local::MRMSrcReg: @@ -758,7 +758,7 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { case X86Local::MRMSrcRegCC: case X86Local::MRMXrCC: case X86Local::MRMXr: - filter = llvm::make_unique(true); + filter = std::make_unique(true); break; case X86Local::MRMDestMem: case X86Local::MRMSrcMem: @@ -767,22 +767,22 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { case X86Local::MRMSrcMemCC: case X86Local::MRMXmCC: case X86Local::MRMXm: - filter = llvm::make_unique(false); + filter = std::make_unique(false); break; case X86Local::MRM0r: case X86Local::MRM1r: case X86Local::MRM2r: case X86Local::MRM3r: case X86Local::MRM4r: case X86Local::MRM5r: case X86Local::MRM6r: case X86Local::MRM7r: - filter = llvm::make_unique(true, Form - X86Local::MRM0r); + filter = std::make_unique(true, Form - X86Local::MRM0r); break; case X86Local::MRM0m: case X86Local::MRM1m: case X86Local::MRM2m: case X86Local::MRM3m: case X86Local::MRM4m: case X86Local::MRM5m: case X86Local::MRM6m: case X86Local::MRM7m: - filter = llvm::make_unique(false, Form - X86Local::MRM0m); + filter = std::make_unique(false, Form - X86Local::MRM0m); break; X86_INSTR_MRM_MAPPING - filter = llvm::make_unique(0xC0 + Form - X86Local::MRM_C0); + filter = std::make_unique(0xC0 + Form - X86Local::MRM_C0); break; } // switch (Form) @@ -854,6 +854,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s, TYPE("GR64", TYPE_R64) TYPE("i8mem", TYPE_M) TYPE("i8imm", TYPE_IMM) + TYPE("u4imm", TYPE_UIMM8) TYPE("u8imm", TYPE_UIMM8) TYPE("i16u8imm", TYPE_UIMM8) TYPE("i32u8imm", TYPE_UIMM8) @@ -973,6 +974,7 @@ RecognizableInstr::immediateEncodingFromString(const std::string &s, ENCODING("i64i32imm", ENCODING_ID) ENCODING("i64i8imm", ENCODING_IB) ENCODING("i8imm", ENCODING_IB) + ENCODING("u4imm", ENCODING_IB) ENCODING("u8imm", ENCODING_IB) ENCODING("i16u8imm", ENCODING_IB) ENCODING("i32u8imm", ENCODING_IB) diff --git a/utils/add_argument_names.py b/utils/add_argument_names.py new file mode 100755 index 00000000000..38dde259979 --- /dev/null +++ b/utils/add_argument_names.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +import re, sys + +def fix_string(s): + TYPE = re.compile('\s*(i[0-9]+|float|double|x86_fp80|fp128|ppc_fp128|\[\[.*?\]\]|\[2 x \[\[[A-Z_0-9]+\]\]\]|<.*?>|{.*?}|\[[0-9]+ x .*?\]|%["a-z:A-Z0-9._]+({{.*?}})?|%{{.*?}}|{{.*?}}|\[\[.*?\]\])(\s*(\*|addrspace\(.*?\)|dereferenceable\(.*?\)|byval\(.*?\)|sret|zeroext|inreg|returned|signext|nocapture|align \d+|swiftself|swifterror|readonly|noalias|inalloca|nocapture))*\s*') + + counter = 0 + if 'i32{{.*}}' in s: + counter = 1 + + at_pos = s.find('@') + if at_pos == -1: + at_pos = 0 + + annoying_pos = s.find('{{[^(]+}}') + if annoying_pos != -1: + at_pos = annoying_pos + 9 + + paren_pos = s.find('(', at_pos) + if paren_pos == -1: + return s + + res = s[:paren_pos+1] + s = s[paren_pos+1:] + + m = TYPE.match(s) + while m: + res += m.group() + s = s[m.end():] + if s.startswith(',') or s.startswith(')'): + res += f' %{counter}' + counter += 1 + + next_arg = s.find(',') + if next_arg == -1: + break + + res += s[:next_arg+1] + s = s[next_arg+1:] + m = TYPE.match(s) + + return res+s + +def process_file(contents): + PREFIX = re.compile(r'check-prefix(es)?(=|\s+)([a-zA-Z0-9,]+)') + check_prefixes = ['CHECK'] + result = '' + for line in contents.split('\n'): + if 'FileCheck' in line: + m = PREFIX.search(line) + if m: + check_prefixes.extend(m.group(3).split(',')) + + found_check = False + for prefix in check_prefixes: + if prefix in line: + found_check = True + break + + if not found_check or 'define' not in line: + result += line + '\n' + continue + + # We have a check for a function definition. Number the args. + line = fix_string(line) + result += line + '\n' + return result + +def main(): + print(f'Processing {sys.argv[1]}') + f = open(sys.argv[1]) + content = f.read() + f.close() + + content = process_file(content) + + f = open(sys.argv[1], 'w') + f.write(content) + f.close() + +if __name__ == '__main__': + main() diff --git a/utils/llvm-locstats/CMakeLists.txt b/utils/llvm-locstats/CMakeLists.txt new file mode 100644 index 00000000000..a919023e141 --- /dev/null +++ b/utils/llvm-locstats/CMakeLists.txt @@ -0,0 +1,12 @@ +if (LLVM_BUILD_UTILS AND LLVM_BUILD_TOOLS) + add_custom_command( + OUTPUT ${LLVM_TOOLS_BINARY_DIR}/llvm-locstats + DEPENDS ${LLVM_MAIN_SRC_DIR}/utils/llvm-locstats/llvm-locstats.py + COMMAND ${CMAKE_COMMAND} -E copy ${LLVM_MAIN_SRC_DIR}/utils/llvm-locstats/llvm-locstats.py ${LLVM_TOOLS_BINARY_DIR}/llvm-locstats + COMMENT "Copying llvm-locstats into ${LLVM_TOOLS_BINARY_DIR}" + ) + add_custom_target(llvm-locstats ALL + DEPENDS ${LLVM_TOOLS_BINARY_DIR}/llvm-locstats + ) + set_target_properties(llvm-locstats PROPERTIES FOLDER "Tools") +endif() diff --git a/utils/llvm-locstats/llvm-locstats.py b/utils/llvm-locstats/llvm-locstats.py new file mode 100755 index 00000000000..4df525ed1a9 --- /dev/null +++ b/utils/llvm-locstats/llvm-locstats.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# +# This is a tool that works like debug location coverage calculator. +# It parses the llvm-dwarfdump --statistics output by reporting it +# in a more human readable way. +# + +from __future__ import print_function +import argparse +import os +import sys +from json import loads +from math import ceil +from subprocess import Popen, PIPE + +def coverage_buckets(): + yield '0%' + yield '1-9%' + for start in range(10, 91, 10): + yield '{0}-{1}%'.format(start, start + 9) + yield '100%' + +def locstats_output( + variables_total, + variables_total_locstats, + variables_with_loc, + scope_bytes_covered, + scope_bytes_from_first_def, + variables_coverage_map + ): + + pc_ranges_covered = int(ceil(scope_bytes_covered * 100.0) + / scope_bytes_from_first_def) + variables_coverage_per_map = {} + for cov_bucket in coverage_buckets(): + variables_coverage_per_map[cov_bucket] = \ + int(ceil(variables_coverage_map[cov_bucket] * 100.0) \ + / variables_total_locstats) + + print (' =================================================') + print (' Debug Location Statistics ') + print (' =================================================') + print (' cov% samples percentage(~) ') + print (' -------------------------------------------------') + for cov_bucket in coverage_buckets(): + print (' {0:6} {1:8d} {2:3d}%'. \ + format(cov_bucket, variables_coverage_map[cov_bucket], \ + variables_coverage_per_map[cov_bucket])) + print (' =================================================') + print (' -the number of debug variables processed: ' \ + + str(variables_total_locstats)) + print (' -PC ranges covered: ' + str(pc_ranges_covered) + '%') + + # Only if we are processing all the variables output the total + # availability. + if variables_total and variables_with_loc: + total_availability = int(ceil(variables_with_loc * 100.0) \ + / variables_total) + print (' -------------------------------------------------') + print (' -total availability: ' + str(total_availability) + '%') + print (' =================================================') + +def parse_program_args(parser): + parser.add_argument('-only-variables', action='store_true', + default=False, + help='calculate the location statistics only for ' + 'local variables' + ) + parser.add_argument('-only-formal-parameters', action='store_true', + default=False, + help='calculate the location statistics only for ' + 'formal parameters' + ) + parser.add_argument('-ignore-debug-entry-values', action='store_true', + default=False, + help='ignore the location statistics on locations with ' + 'entry values' + ) + parser.add_argument('file_name', type=str, help='file to process') + return parser.parse_args() + + +def Main(): + parser = argparse.ArgumentParser() + results = parse_program_args(parser) + + if len(sys.argv) < 2: + print ('error: Too few arguments.') + parser.print_help() + sys.exit(1) + + if results.only_variables and results.only_formal_parameters: + print ('error: Please use just one only* option.') + parser.print_help() + sys.exit(1) + + # These will be different due to different options enabled. + variables_total = None + variables_total_locstats = None + variables_with_loc = None + variables_scope_bytes_covered = None + variables_scope_bytes_from_first_def = None + variables_scope_bytes_entry_values = None + variables_coverage_map = {} + binary = results.file_name + + # Get the directory of the LLVM tools. + llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), \ + "llvm-dwarfdump") + # The statistics llvm-dwarfdump option. + llvm_dwarfdump_stats_opt = "--statistics" + + subproc = Popen([llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], \ + stdin=PIPE, stdout=PIPE, stderr=PIPE, \ + universal_newlines = True) + cmd_stdout, cmd_stderr = subproc.communicate() + + # Get the JSON and parse it. + json_parsed = None + + try: + json_parsed = loads(cmd_stdout) + except: + print ('error: No valid llvm-dwarfdump statistics found.') + sys.exit(1) + + if results.only_variables: + # Read the JSON only for local variables. + variables_total_locstats = \ + json_parsed['total vars procesed by location statistics'] + variables_scope_bytes_covered = \ + json_parsed['vars scope bytes covered'] + variables_scope_bytes_from_first_def = \ + json_parsed['vars scope bytes total'] + if not results.ignore_debug_entry_values: + for cov_bucket in coverage_buckets(): + cov_category = "vars with {} of its scope covered".format(cov_bucket) + variables_coverage_map[cov_bucket] = json_parsed[cov_category] + else: + variables_scope_bytes_entry_values = \ + json_parsed['vars entry value scope bytes covered'] + variables_scope_bytes_covered = variables_scope_bytes_covered \ + - variables_scope_bytes_entry_values + for cov_bucket in coverage_buckets(): + cov_category = \ + "vars (excluding the debug entry values) " \ + "with {} of its scope covered".format(cov_bucket) + variables_coverage_map[cov_bucket] = json_parsed[cov_category] + elif results.only_formal_parameters: + # Read the JSON only for formal parameters. + variables_total_locstats = \ + json_parsed['total params procesed by location statistics'] + variables_scope_bytes_covered = \ + json_parsed['formal params scope bytes covered'] + variables_scope_bytes_from_first_def = \ + json_parsed['formal params scope bytes total'] + if not results.ignore_debug_entry_values: + for cov_bucket in coverage_buckets(): + cov_category = "params with {} of its scope covered".format(cov_bucket) + variables_coverage_map[cov_bucket] = json_parsed[cov_category] + else: + variables_scope_bytes_entry_values = \ + json_parsed['formal params entry value scope bytes covered'] + variables_scope_bytes_covered = variables_scope_bytes_covered \ + - variables_scope_bytes_entry_values + for cov_bucket in coverage_buckets(): + cov_category = \ + "params (excluding the debug entry values) " \ + "with {} of its scope covered".format(cov_bucket) + variables_coverage_map[cov_bucket] = json_parsed[cov_category] + else: + # Read the JSON for both local variables and formal parameters. + variables_total = \ + json_parsed['source variables'] + variables_with_loc = json_parsed['variables with location'] + variables_total_locstats = \ + json_parsed['total variables procesed by location statistics'] + variables_scope_bytes_covered = \ + json_parsed['scope bytes covered'] + variables_scope_bytes_from_first_def = \ + json_parsed['scope bytes total'] + if not results.ignore_debug_entry_values: + for cov_bucket in coverage_buckets(): + cov_category = "variables with {} of its scope covered". \ + format(cov_bucket) + variables_coverage_map[cov_bucket] = json_parsed[cov_category] + else: + variables_scope_bytes_entry_values = \ + json_parsed['entry value scope bytes covered'] + variables_scope_bytes_covered = variables_scope_bytes_covered \ + - variables_scope_bytes_entry_values + for cov_bucket in coverage_buckets(): + cov_category = "variables (excluding the debug entry values) " \ + "with {} of its scope covered". format(cov_bucket) + variables_coverage_map[cov_bucket] = json_parsed[cov_category] + + # Pretty print collected info. + locstats_output( + variables_total, + variables_total_locstats, + variables_with_loc, + variables_scope_bytes_covered, + variables_scope_bytes_from_first_def, + variables_coverage_map + ) + +if __name__ == '__main__': + Main() + sys.exit(0) From 519fc96c475680de2cc49e7811dbbfadb912cbcc Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 23 Oct 2019 17:52:09 +0000 Subject: [PATCH 02/10] Vendor import of stripped clang trunk r375505, the last commit before the upstream Subversion repository was made read-only, and the LLVM project migrated to GitHub: https://llvm.org/svn/llvm-project/cfe/trunk@375505 --- include/clang-c/FatalErrorHandler.h | 33 + include/clang-c/Index.h | 22 +- include/clang/AST/APValue.h | 46 +- include/clang/AST/ASTContext.h | 138 +- include/clang/AST/ASTFwd.h | 2 +- include/clang/AST/ASTImporter.h | 43 +- include/clang/AST/ASTImporterSharedState.h | 2 +- include/clang/AST/ASTNodeTraverser.h | 3 + include/clang/AST/ASTStructuralEquivalence.h | 19 +- include/clang/AST/ASTTypeTraits.h | 4 +- include/clang/AST/Attr.h | 147 +- .../clang/AST/CXXRecordDeclDefinitionBits.def | 236 ++ include/clang/AST/CharUnits.h | 5 + include/clang/AST/CommentCommands.td | 1 + include/clang/AST/CommentLexer.h | 3 +- include/clang/AST/Decl.h | 52 +- include/clang/AST/DeclBase.h | 2 +- include/clang/AST/DeclCXX.h | 343 +-- include/clang/AST/DeclTemplate.h | 178 +- include/clang/AST/Expr.h | 16 +- include/clang/AST/ExprCXX.h | 235 +- include/clang/AST/ExternalASTMerger.h | 38 +- include/clang/AST/FormatString.h | 22 +- include/clang/AST/GlobalDecl.h | 1 + include/clang/AST/JSONNodeDumper.h | 2 + include/clang/AST/Mangle.h | 18 +- include/clang/AST/NSAPI.h | 3 - include/clang/AST/OpenMPClause.h | 157 +- include/clang/AST/OperationKinds.def | 5 +- include/clang/AST/OptionalDiagnostic.h | 78 + include/clang/AST/RawCommentList.h | 23 +- include/clang/AST/RecursiveASTVisitor.h | 100 +- include/clang/AST/Stmt.h | 10 + include/clang/AST/StmtOpenMP.h | 302 +- include/clang/AST/TextNodeDumper.h | 2 - include/clang/AST/Type.h | 105 +- include/clang/AST/TypeLoc.h | 2 +- include/clang/AST/TypeLocNodes.def | 2 +- include/clang/AST/TypeNodes.def | 135 - include/clang/AST/TypeVisitor.h | 4 +- include/clang/ASTMatchers/ASTMatchers.h | 186 +- .../clang/ASTMatchers/ASTMatchersInternal.h | 25 +- include/clang/Analysis/AnalysisDeclContext.h | 51 +- include/clang/Analysis/CFG.h | 195 +- include/clang/Analysis/CallGraph.h | 1 + .../BugReporter => Analysis}/PathDiagnostic.h | 145 +- include/clang/Basic/AArch64SVEACLETypes.def | 70 + include/clang/Basic/Attr.td | 182 +- include/clang/Basic/AttrDocs.td | 210 +- include/clang/Basic/AttributeCommonInfo.h | 190 ++ include/clang/Basic/Builtins.def | 27 +- include/clang/Basic/BuiltinsAArch64.def | 56 +- include/clang/Basic/BuiltinsAMDGPU.def | 9 + include/clang/Basic/BuiltinsARM.def | 48 +- include/clang/Basic/BuiltinsBPF.def | 24 + include/clang/Basic/BuiltinsPPC.def | 14 +- include/clang/Basic/BuiltinsWebAssembly.def | 33 + include/clang/Basic/BuiltinsX86.def | 4 +- include/clang/Basic/BuiltinsX86_64.def | 4 +- include/clang/Basic/CodeGenOptions.def | 14 +- include/clang/Basic/CodeGenOptions.h | 6 + include/clang/Basic/Diagnostic.h | 17 +- include/clang/Basic/DiagnosticASTKinds.td | 139 +- include/clang/Basic/DiagnosticCommentKinds.td | 6 + include/clang/Basic/DiagnosticCommonKinds.td | 26 +- include/clang/Basic/DiagnosticDriverKinds.td | 9 +- .../clang/Basic/DiagnosticFrontendKinds.td | 13 +- include/clang/Basic/DiagnosticGroups.td | 50 +- include/clang/Basic/DiagnosticOptions.def | 1 + include/clang/Basic/DiagnosticParseKinds.td | 55 +- include/clang/Basic/DiagnosticSemaKinds.td | 388 ++- .../Basic/DiagnosticSerializationKinds.td | 15 +- include/clang/Basic/Features.def | 4 +- include/clang/Basic/FileManager.h | 204 +- include/clang/Basic/IdentifierTable.h | 6 + include/clang/Basic/LangOptions.def | 11 +- include/clang/Basic/LangOptions.h | 16 + .../clang/{Frontend => Basic}/LangStandard.h | 69 +- .../{Frontend => Basic}/LangStandards.def | 5 +- include/clang/Basic/Linkage.h | 6 + include/clang/Basic/OpenCLOptions.h | 6 +- include/clang/Basic/OpenMPKinds.def | 103 + include/clang/Basic/OpenMPKinds.h | 13 +- include/clang/Basic/OperatorKinds.h | 19 + include/clang/Basic/SourceManager.h | 83 +- include/clang/Basic/Specifiers.h | 3 +- include/clang/Basic/Stack.h | 29 + include/clang/Basic/StmtNodes.td | 7 + include/clang/Basic/SyncScope.h | 2 +- include/clang/Basic/TargetBuiltins.h | 10 + include/clang/Basic/TargetInfo.h | 23 +- include/clang/Basic/TokenKinds.def | 72 +- include/clang/Basic/TokenKinds.h | 11 +- include/clang/Basic/TypeNodes.td | 106 + include/clang/Basic/X86Target.def | 5 + include/clang/Basic/arm_neon.td | 8 +- include/clang/CodeGen/CGFunctionInfo.h | 10 +- include/clang/CrossTU/CrossTranslationUnit.h | 141 +- .../clang/DirectoryWatcher/DirectoryWatcher.h | 10 +- include/clang/Driver/Action.h | 26 +- include/clang/Driver/CC1Options.td | 16 +- include/clang/Driver/CLCompatOptions.td | 14 +- include/clang/Driver/Driver.h | 14 +- include/clang/Driver/Options.h | 2 +- include/clang/Driver/Options.td | 136 +- include/clang/Driver/Phases.h | 3 +- include/clang/Driver/SanitizerArgs.h | 7 +- include/clang/Driver/ToolChain.h | 9 + include/clang/Driver/Types.def | 123 +- include/clang/Driver/Types.h | 7 +- include/clang/Format/Format.h | 240 +- include/clang/Frontend/ASTUnit.h | 5 +- include/clang/Frontend/CompilerInstance.h | 24 +- include/clang/Frontend/CompilerInvocation.h | 14 +- include/clang/Frontend/FrontendActions.h | 9 +- include/clang/Frontend/FrontendOptions.h | 51 +- include/clang/Frontend/Utils.h | 13 +- include/clang/Index/CodegenNameGenerator.h | 52 - include/clang/Index/IndexDataConsumer.h | 16 +- include/clang/Index/IndexingAction.h | 37 +- include/clang/Index/IndexingOptions.h | 42 + .../Lex/DependencyDirectivesSourceMinimizer.h | 22 + include/clang/Lex/DirectoryLookup.h | 68 +- include/clang/Lex/HeaderMap.h | 5 +- include/clang/Lex/HeaderSearch.h | 14 +- include/clang/Lex/HeaderSearchOptions.h | 27 +- include/clang/Lex/Lexer.h | 15 + include/clang/Lex/MacroArgs.h | 10 - include/clang/Lex/PPCallbacks.h | 12 +- include/clang/Lex/Preprocessor.h | 55 +- ...rExcludedConditionalDirectiveSkipMapping.h | 31 + include/clang/Lex/PreprocessorOptions.h | 16 + include/clang/Parse/Parser.h | 50 +- include/clang/Rewrite/Core/Rewriter.h | 11 + include/clang/Sema/Overload.h | 102 +- include/clang/Sema/ParsedAttr.h | 212 +- include/clang/Sema/ScopeInfo.h | 10 +- include/clang/Sema/Sema.h | 577 +++- include/clang/Sema/SemaInternal.h | 2 +- include/clang/Sema/TypoCorrection.h | 8 +- include/clang/Serialization/ASTBitCodes.h | 17 +- include/clang/Serialization/ASTReader.h | 14 +- .../clang/StaticAnalyzer/Checkers/Checkers.td | 45 +- .../StaticAnalyzer/Core/AnalyzerOptions.def | 32 +- .../StaticAnalyzer/Core/AnalyzerOptions.h | 68 +- .../Core/BugReporter/BugReporter.h | 656 +++-- .../Core/BugReporter/BugReporterVisitors.h | 217 +- .../StaticAnalyzer/Core/BugReporter/BugType.h | 40 +- .../Core/BugReporter/CommonBugCategories.h | 1 + include/clang/StaticAnalyzer/Core/Checker.h | 4 +- .../StaticAnalyzer/Core/CheckerManager.h | 24 +- .../Core/PathDiagnosticConsumers.h | 12 +- .../Core/PathSensitive/AnalysisManager.h | 9 +- .../Core/PathSensitive/CallEvent.h | 27 +- .../Core/PathSensitive/CheckerContext.h | 17 +- .../Core/PathSensitive/DynamicCastInfo.h | 55 + .../Core/PathSensitive/DynamicType.h | 73 + .../Core/PathSensitive/DynamicTypeInfo.h | 48 +- .../Core/PathSensitive/DynamicTypeMap.h | 63 - .../Core/PathSensitive/ExplodedGraph.h | 54 +- .../Core/PathSensitive/ExprEngine.h | 10 +- .../Core/PathSensitive/MemRegion.h | 6 + .../Core/PathSensitive/ProgramState.h | 4 + include/clang/Tooling/ASTDiff/ASTDiff.h | 2 +- include/clang/Tooling/AllTUsExecution.h | 3 +- include/clang/Tooling/ArgumentsAdjusters.h | 4 + .../DependencyScanningFilesystem.h | 188 ++ .../DependencyScanningService.h | 65 + .../DependencyScanningTool.h | 48 + .../DependencyScanningWorker.h | 42 +- include/clang/Tooling/Execution.h | 7 - .../clang/Tooling/Inclusions/HeaderIncludes.h | 1 + .../clang/Tooling/Inclusions/IncludeStyle.h | 2 + .../Refactoring/Extract/SourceExtraction.h | 6 +- .../Refactoring/RecursiveSymbolVisitor.h | 15 +- .../RefactoringActionRulesInternal.h | 10 +- include/clang/Tooling/StandaloneExecution.h | 2 - include/clang/Tooling/Syntax/Tokens.h | 10 + include/clang/Tooling/Tooling.h | 30 +- .../clang/Tooling/Transformer/MatchConsumer.h | 62 + .../RangeSelector.h | 30 +- .../RewriteRule.h} | 109 +- .../{Refactoring => Transformer}/SourceCode.h | 19 +- .../Tooling/Transformer/SourceCodeBuilders.h | 86 + .../{Refactoring => Transformer}/Stencil.h | 130 +- .../clang/Tooling/Transformer/Transformer.h | 52 + include/clang/module.modulemap | 6 +- lib/ARCMigrate/ARCMT.cpp | 6 +- lib/ARCMigrate/FileRemapper.cpp | 24 +- lib/ARCMigrate/ObjCMT.cpp | 15 +- lib/ARCMigrate/PlistReporter.cpp | 2 +- lib/AST/APValue.cpp | 38 +- lib/AST/ASTContext.cpp | 757 ++--- lib/AST/ASTDiagnostic.cpp | 2 +- lib/AST/ASTImporter.cpp | 542 ++-- lib/AST/ASTStructuralEquivalence.cpp | 90 +- lib/AST/ASTTypeTraits.cpp | 16 +- lib/AST/CXXInheritance.cpp | 2 +- lib/AST/Comment.cpp | 15 + lib/AST/CommentLexer.cpp | 7 +- lib/AST/CommentParser.cpp | 6 + lib/AST/CommentSema.cpp | 8 + lib/AST/Decl.cpp | 75 +- lib/AST/DeclBase.cpp | 15 +- lib/AST/DeclCXX.cpp | 84 +- lib/AST/DeclPrinter.cpp | 15 +- lib/AST/DeclTemplate.cpp | 69 +- lib/AST/Expr.cpp | 140 +- lib/AST/ExprCXX.cpp | 176 +- lib/AST/ExprClassification.cpp | 5 + lib/AST/ExprConstant.cpp | 2490 ++++++++++++----- lib/AST/ExternalASTMerger.cpp | 124 +- lib/AST/FormatString.cpp | 4 + lib/AST/FormatStringParsing.h | 13 + lib/AST/InheritViz.cpp | 4 +- lib/AST/Interp/Block.cpp | 87 + lib/AST/Interp/Block.h | 140 + lib/AST/Interp/Boolean.h | 148 + lib/AST/Interp/ByteCodeEmitter.cpp | 175 ++ lib/AST/Interp/ByteCodeEmitter.h | 112 + lib/AST/Interp/ByteCodeExprGen.cpp | 580 ++++ lib/AST/Interp/ByteCodeExprGen.h | 331 +++ lib/AST/Interp/ByteCodeGenError.cpp | 14 + lib/AST/Interp/ByteCodeGenError.h | 46 + lib/AST/Interp/ByteCodeStmtGen.cpp | 265 ++ lib/AST/Interp/ByteCodeStmtGen.h | 89 + lib/AST/Interp/Context.cpp | 148 + lib/AST/Interp/Context.h | 100 + lib/AST/Interp/Descriptor.cpp | 292 ++ lib/AST/Interp/Descriptor.h | 220 ++ lib/AST/Interp/Disasm.cpp | 69 + lib/AST/Interp/EvalEmitter.cpp | 253 ++ lib/AST/Interp/EvalEmitter.h | 129 + lib/AST/Interp/Frame.cpp | 14 + lib/AST/Interp/Frame.h | 45 + lib/AST/Interp/Function.cpp | 48 + lib/AST/Interp/Function.h | 163 ++ lib/AST/Interp/Integral.h | 269 ++ lib/AST/Interp/Interp.cpp | 417 +++ lib/AST/Interp/Interp.h | 960 +++++++ lib/AST/Interp/InterpFrame.cpp | 193 ++ lib/AST/Interp/InterpFrame.h | 153 + lib/AST/Interp/InterpStack.cpp | 78 + lib/AST/Interp/InterpStack.h | 113 + lib/AST/Interp/InterpState.cpp | 74 + lib/AST/Interp/InterpState.h | 112 + lib/AST/Interp/Opcode.h | 30 + lib/AST/Interp/Opcodes.td | 422 +++ lib/AST/Interp/Pointer.cpp | 193 ++ lib/AST/Interp/Pointer.h | 353 +++ lib/AST/Interp/PrimType.cpp | 23 + lib/AST/Interp/PrimType.h | 115 + lib/AST/Interp/Program.cpp | 364 +++ lib/AST/Interp/Program.h | 220 ++ lib/AST/Interp/Record.cpp | 46 + lib/AST/Interp/Record.h | 121 + lib/AST/Interp/Source.cpp | 39 + lib/AST/Interp/Source.h | 118 + lib/AST/Interp/State.cpp | 158 ++ lib/AST/Interp/State.h | 133 + lib/AST/ItaniumCXXABI.cpp | 74 +- lib/AST/ItaniumMangle.cpp | 154 +- lib/AST/JSONNodeDumper.cpp | 49 +- lib/AST/Mangle.cpp | 16 +- lib/AST/MicrosoftCXXABI.cpp | 4 +- lib/AST/MicrosoftMangle.cpp | 32 +- lib/AST/NSAPI.cpp | 14 +- lib/AST/OpenMPClause.cpp | 71 +- lib/AST/PrintfFormatString.cpp | 20 + lib/AST/RawCommentList.cpp | 64 +- lib/AST/Stmt.cpp | 11 + lib/AST/StmtOpenMP.cpp | 246 ++ lib/AST/StmtPrinter.cpp | 46 +- lib/AST/StmtProfile.cpp | 37 + lib/AST/TemplateBase.cpp | 2 +- lib/AST/TextNodeDumper.cpp | 17 +- lib/AST/Type.cpp | 113 +- lib/AST/TypeLoc.cpp | 3 + lib/AST/TypePrinter.cpp | 11 +- lib/AST/VTTBuilder.cpp | 12 +- lib/AST/VTableBuilder.cpp | 12 +- lib/ASTMatchers/ASTMatchFinder.cpp | 73 +- lib/ASTMatchers/Dynamic/Marshallers.h | 14 +- lib/ASTMatchers/Dynamic/Registry.cpp | 3 +- lib/Analysis/AnalysisDeclContext.cpp | 23 +- lib/Analysis/BodyFarm.cpp | 4 +- lib/Analysis/CFG.cpp | 432 ++- lib/Analysis/CallGraph.cpp | 42 +- lib/Analysis/CloneDetection.cpp | 3 +- lib/Analysis/CocoaConventions.cpp | 4 +- lib/Analysis/Consumed.cpp | 12 +- .../Core => Analysis}/PathDiagnostic.cpp | 289 +- lib/Analysis/ProgramPoint.cpp | 6 +- lib/Analysis/ReachableCode.cpp | 2 +- lib/Analysis/RetainSummaryManager.cpp | 2 +- lib/Analysis/ThreadSafety.cpp | 30 +- .../SampleAnalyzer/MainCallChecker.cpp | 4 +- lib/Basic/Attributes.cpp | 75 +- lib/Basic/FileManager.cpp | 259 +- lib/Basic/IdentifierTable.cpp | 15 + lib/{Frontend => Basic}/LangStandards.cpp | 25 +- lib/Basic/Module.cpp | 4 +- lib/Basic/OpenMPKinds.cpp | 85 +- lib/Basic/SourceManager.cpp | 205 +- lib/Basic/Stack.cpp | 75 + lib/Basic/TargetInfo.cpp | 6 + lib/Basic/Targets.cpp | 23 +- lib/Basic/Targets/AArch64.cpp | 32 +- lib/Basic/Targets/AArch64.h | 1 + lib/Basic/Targets/AMDGPU.cpp | 8 +- lib/Basic/Targets/ARM.cpp | 113 +- lib/Basic/Targets/BPF.cpp | 12 + lib/Basic/Targets/BPF.h | 4 +- lib/Basic/Targets/OSTargets.h | 15 +- lib/Basic/Targets/PPC.cpp | 10 +- lib/Basic/Targets/PPC.h | 1 + lib/Basic/Targets/RISCV.cpp | 63 +- lib/Basic/Targets/RISCV.h | 20 +- lib/Basic/Targets/SPIR.h | 2 +- lib/Basic/Targets/Sparc.h | 1 + lib/Basic/Targets/SystemZ.cpp | 2 +- lib/Basic/Targets/X86.cpp | 20 +- lib/Basic/Targets/X86.h | 39 +- lib/Basic/TokenKinds.cpp | 20 + lib/CodeGen/BackendUtil.cpp | 85 +- lib/CodeGen/CGAtomic.cpp | 9 +- lib/CodeGen/CGBlocks.cpp | 5 +- lib/CodeGen/CGBuiltin.cpp | 518 ++-- lib/CodeGen/CGCUDANV.cpp | 23 +- lib/CodeGen/CGCXX.cpp | 6 +- lib/CodeGen/CGCXXABI.cpp | 4 +- lib/CodeGen/CGCXXABI.h | 2 +- lib/CodeGen/CGCall.cpp | 84 +- lib/CodeGen/CGClass.cpp | 51 +- lib/CodeGen/CGCleanup.cpp | 11 +- lib/CodeGen/CGDebugInfo.cpp | 95 +- lib/CodeGen/CGDecl.cpp | 39 +- lib/CodeGen/CGDeclCXX.cpp | 15 +- lib/CodeGen/CGException.cpp | 8 +- lib/CodeGen/CGExpr.cpp | 88 +- lib/CodeGen/CGExprAgg.cpp | 26 +- lib/CodeGen/CGExprCXX.cpp | 45 +- lib/CodeGen/CGExprComplex.cpp | 4 + lib/CodeGen/CGExprConstant.cpp | 10 +- lib/CodeGen/CGExprScalar.cpp | 221 +- lib/CodeGen/CGLoopInfo.cpp | 60 +- lib/CodeGen/CGLoopInfo.h | 12 +- lib/CodeGen/CGNonTrivialStruct.cpp | 2 +- lib/CodeGen/CGObjC.cpp | 4 +- lib/CodeGen/CGObjCGNU.cpp | 13 +- lib/CodeGen/CGObjCMac.cpp | 47 +- lib/CodeGen/CGOpenMPRuntime.cpp | 1214 +++++--- lib/CodeGen/CGOpenMPRuntime.h | 106 +- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 128 +- lib/CodeGen/CGOpenMPRuntimeNVPTX.h | 12 + lib/CodeGen/CGStmt.cpp | 72 +- lib/CodeGen/CGStmtOpenMP.cpp | 160 +- lib/CodeGen/CGVTables.cpp | 92 +- lib/CodeGen/CodeGenAction.cpp | 41 +- lib/CodeGen/CodeGenFunction.cpp | 56 +- lib/CodeGen/CodeGenFunction.h | 15 +- lib/CodeGen/CodeGenModule.cpp | 233 +- lib/CodeGen/CodeGenModule.h | 15 +- lib/CodeGen/CodeGenPGO.cpp | 2 +- lib/CodeGen/CodeGenPGO.h | 4 +- lib/CodeGen/CodeGenTypes.cpp | 25 +- lib/CodeGen/ConstantInitBuilder.cpp | 2 +- lib/CodeGen/CoverageMappingGen.cpp | 27 +- lib/CodeGen/CoverageMappingGen.h | 8 +- lib/CodeGen/EHScopeStack.h | 4 +- lib/CodeGen/ItaniumCXXABI.cpp | 248 +- lib/CodeGen/MicrosoftCXXABI.cpp | 17 +- lib/CodeGen/ModuleBuilder.cpp | 15 +- .../ObjectFilePCHContainerOperations.cpp | 12 +- lib/CodeGen/TargetInfo.cpp | 362 ++- lib/CrossTU/CrossTranslationUnit.cpp | 263 +- .../DirectoryWatcher-not-implemented.cpp | 6 +- .../linux/DirectoryWatcher-linux.cpp | 33 +- .../mac/DirectoryWatcher-mac.cpp | 34 +- .../windows/DirectoryWatcher-windows.cpp | 50 + lib/Driver/Action.cpp | 14 + lib/Driver/Compilation.cpp | 2 +- lib/Driver/Driver.cpp | 558 ++-- lib/Driver/DriverOptions.cpp | 17 +- lib/Driver/Phases.cpp | 1 + lib/Driver/SanitizerArgs.cpp | 40 +- lib/Driver/ToolChain.cpp | 32 + lib/Driver/ToolChains/AMDGPU.cpp | 2 +- lib/Driver/ToolChains/AVR.cpp | 2 +- lib/Driver/ToolChains/Ananas.cpp | 4 +- lib/Driver/ToolChains/Arch/AArch64.cpp | 1 + lib/Driver/ToolChains/Arch/ARM.cpp | 50 +- lib/Driver/ToolChains/Arch/Mips.cpp | 25 +- lib/Driver/ToolChains/Arch/Mips.h | 3 +- lib/Driver/ToolChains/Arch/PPC.cpp | 4 +- lib/Driver/ToolChains/Arch/RISCV.cpp | 315 ++- lib/Driver/ToolChains/Arch/RISCV.h | 3 +- lib/Driver/ToolChains/Arch/X86.cpp | 1 + lib/Driver/ToolChains/BareMetal.cpp | 2 +- lib/Driver/ToolChains/Clang.cpp | 616 ++-- lib/Driver/ToolChains/Clang.h | 18 + lib/Driver/ToolChains/CloudABI.cpp | 2 +- lib/Driver/ToolChains/CommonArgs.cpp | 356 +-- lib/Driver/ToolChains/CommonArgs.h | 16 +- lib/Driver/ToolChains/CrossWindows.cpp | 4 +- lib/Driver/ToolChains/Cuda.cpp | 12 +- lib/Driver/ToolChains/Darwin.cpp | 41 +- lib/Driver/ToolChains/DragonFly.cpp | 4 +- lib/Driver/ToolChains/FreeBSD.cpp | 24 +- lib/Driver/ToolChains/FreeBSD.h | 2 +- lib/Driver/ToolChains/Fuchsia.cpp | 4 +- lib/Driver/ToolChains/Fuchsia.h | 4 +- lib/Driver/ToolChains/Gnu.cpp | 23 +- lib/Driver/ToolChains/HIP.cpp | 64 +- lib/Driver/ToolChains/HIP.h | 3 +- lib/Driver/ToolChains/Hexagon.cpp | 4 +- lib/Driver/ToolChains/InterfaceStubs.cpp | 37 + lib/Driver/ToolChains/InterfaceStubs.h | 36 + lib/Driver/ToolChains/Linux.cpp | 15 +- lib/Driver/ToolChains/MSP430.cpp | 2 +- lib/Driver/ToolChains/MSVC.cpp | 17 +- lib/Driver/ToolChains/MSVC.h | 14 +- lib/Driver/ToolChains/MinGW.cpp | 4 +- lib/Driver/ToolChains/MinGW.h | 3 +- lib/Driver/ToolChains/Minix.cpp | 4 +- lib/Driver/ToolChains/Myriad.cpp | 6 +- lib/Driver/ToolChains/NaCl.cpp | 2 +- lib/Driver/ToolChains/NetBSD.cpp | 25 +- lib/Driver/ToolChains/OpenBSD.cpp | 4 +- lib/Driver/ToolChains/PPCLinux.cpp | 5 +- lib/Driver/ToolChains/PS4CPU.cpp | 6 +- lib/Driver/ToolChains/RISCVToolchain.cpp | 9 +- lib/Driver/ToolChains/RISCVToolchain.h | 1 + lib/Driver/ToolChains/Solaris.cpp | 30 +- lib/Driver/ToolChains/WebAssembly.cpp | 31 +- lib/Driver/ToolChains/XCore.cpp | 4 +- lib/Driver/Types.cpp | 135 +- lib/Driver/XRayArgs.cpp | 2 +- lib/Format/BreakableToken.cpp | 11 +- lib/Format/ContinuationIndenter.cpp | 23 +- lib/Format/Encoding.h | 3 +- lib/Format/Format.cpp | 246 +- lib/Format/FormatToken.h | 13 + lib/Format/FormatTokenLexer.cpp | 20 +- lib/Format/FormatTokenLexer.h | 1 + lib/Format/NamespaceEndCommentsFixer.cpp | 16 +- lib/Format/TokenAnnotator.cpp | 132 +- lib/Format/TokenAnnotator.h | 3 +- lib/Format/UnwrappedLineFormatter.cpp | 63 +- lib/Format/UnwrappedLineParser.cpp | 41 +- lib/Format/UnwrappedLineParser.h | 2 +- lib/Format/WhitespaceManager.cpp | 43 +- lib/Frontend/ASTConsumers.cpp | 8 +- lib/Frontend/ASTUnit.cpp | 74 +- lib/Frontend/ChainedIncludesSource.cpp | 2 +- lib/Frontend/CompilerInstance.cpp | 117 +- lib/Frontend/CompilerInvocation.cpp | 376 ++- .../CreateInvocationFromCommandLine.cpp | 16 +- lib/Frontend/DependencyFile.cpp | 25 +- lib/Frontend/DependencyGraph.cpp | 4 +- lib/Frontend/FrontendAction.cpp | 29 +- lib/Frontend/FrontendActions.cpp | 61 +- lib/Frontend/FrontendOptions.cpp | 37 +- lib/Frontend/HeaderIncludeGen.cpp | 5 +- lib/Frontend/InitHeaderSearch.cpp | 19 +- lib/Frontend/InitPreprocessor.cpp | 81 +- .../InterfaceStubFunctionsConsumer.cpp | 106 +- lib/Frontend/ModuleDependencyCollector.cpp | 8 +- lib/Frontend/MultiplexConsumer.cpp | 4 +- lib/Frontend/PrecompiledPreamble.cpp | 24 +- lib/Frontend/PrintPreprocessedOutput.cpp | 2 +- lib/Frontend/Rewrite/FixItRewriter.cpp | 2 +- lib/Frontend/Rewrite/FrontendActions.cpp | 15 +- lib/Frontend/Rewrite/HTMLPrint.cpp | 2 +- lib/Frontend/Rewrite/InclusionRewriter.cpp | 180 +- lib/Frontend/Rewrite/RewriteModernObjC.cpp | 45 +- lib/Frontend/Rewrite/RewriteObjC.cpp | 23 +- lib/Frontend/SerializedDiagnosticPrinter.cpp | 8 +- lib/Frontend/TextDiagnostic.cpp | 11 +- lib/Frontend/VerifyDiagnosticConsumer.cpp | 11 +- .../ExecuteCompilerInvocation.cpp | 99 +- lib/Headers/__clang_cuda_intrinsics.h | 10 +- lib/Headers/altivec.h | 85 +- lib/Headers/arm_acle.h | 24 +- lib/Headers/avx512fintrin.h | 27 +- lib/Headers/bmiintrin.h | 175 +- lib/Headers/cpuid.h | 4 +- lib/Headers/emmintrin.h | 6 +- lib/Headers/ia32intrin.h | 68 + lib/Headers/immintrin.h | 3 +- lib/Headers/opencl-c-base.h | 19 +- lib/Headers/opencl-c.h | 212 +- lib/Headers/ppc_wrappers/emmintrin.h | 6 + lib/Headers/ppc_wrappers/mm_malloc.h | 6 + lib/Headers/ppc_wrappers/mmintrin.h | 7 + lib/Headers/ppc_wrappers/pmmintrin.h | 150 + lib/Headers/ppc_wrappers/smmintrin.h | 85 + lib/Headers/ppc_wrappers/tmmintrin.h | 495 ++++ lib/Headers/ppc_wrappers/xmmintrin.h | 6 + lib/Index/CodegenNameGenerator.cpp | 36 - lib/Index/IndexSymbol.cpp | 2 +- lib/Index/IndexingAction.cpp | 190 +- lib/Index/USRGeneration.cpp | 3 + .../DependencyDirectivesSourceMinimizer.cpp | 250 +- lib/Lex/HeaderMap.cpp | 8 +- lib/Lex/HeaderSearch.cpp | 348 ++- lib/Lex/Lexer.cpp | 9 + lib/Lex/MacroArgs.cpp | 20 - lib/Lex/ModuleMap.cpp | 73 +- lib/Lex/PPDirectives.cpp | 391 +-- lib/Lex/PPLexerChange.cpp | 25 +- lib/Lex/PPMacroExpansion.cpp | 52 +- lib/Lex/Pragma.cpp | 85 +- lib/Lex/Preprocessor.cpp | 13 +- lib/Lex/TokenLexer.cpp | 16 +- lib/Lex/UnicodeCharSets.h | 2 +- lib/Parse/ParseCXXInlineMethods.cpp | 2 +- lib/Parse/ParseDecl.cpp | 130 +- lib/Parse/ParseDeclCXX.cpp | 16 +- lib/Parse/ParseExpr.cpp | 39 +- lib/Parse/ParseExprCXX.cpp | 77 +- lib/Parse/ParseInit.cpp | 27 + lib/Parse/ParseObjc.cpp | 4 - lib/Parse/ParseOpenMP.cpp | 478 +++- lib/Parse/ParsePragma.cpp | 136 +- lib/Parse/ParseStmt.cpp | 26 +- lib/Parse/ParseTemplate.cpp | 8 +- lib/Parse/ParseTentative.cpp | 7 +- lib/Parse/Parser.cpp | 47 +- lib/Rewrite/Rewriter.cpp | 11 + lib/Sema/AnalysisBasedWarnings.cpp | 82 +- lib/Sema/DeclSpec.cpp | 15 +- lib/Sema/OpenCLBuiltins.td | 740 +++-- lib/Sema/ParsedAttr.cpp | 65 - lib/Sema/Sema.cpp | 91 +- lib/Sema/SemaAccess.cpp | 6 +- lib/Sema/SemaAttr.cpp | 135 +- lib/Sema/SemaCUDA.cpp | 95 +- lib/Sema/SemaCXXScopeSpec.cpp | 2 +- lib/Sema/SemaCast.cpp | 17 +- lib/Sema/SemaChecking.cpp | 612 +++- lib/Sema/SemaCodeComplete.cpp | 3 + lib/Sema/SemaConcept.cpp | 125 + lib/Sema/SemaCoroutine.cpp | 2 +- lib/Sema/SemaDecl.cpp | 920 ++++-- lib/Sema/SemaDeclAttr.cpp | 1101 +++----- lib/Sema/SemaDeclCXX.cpp | 577 ++-- lib/Sema/SemaDeclObjC.cpp | 14 +- lib/Sema/SemaExceptionSpec.cpp | 23 +- lib/Sema/SemaExpr.cpp | 993 +++++-- lib/Sema/SemaExprCXX.cpp | 295 +- lib/Sema/SemaExprMember.cpp | 20 +- lib/Sema/SemaExprObjC.cpp | 4 +- lib/Sema/SemaInit.cpp | 1202 +++++--- lib/Sema/SemaLambda.cpp | 127 +- lib/Sema/SemaLookup.cpp | 889 +++--- lib/Sema/SemaModule.cpp | 2 + lib/Sema/SemaObjCProperty.cpp | 14 +- lib/Sema/SemaOpenMP.cpp | 1878 +++++++++++-- lib/Sema/SemaOverload.cpp | 653 +++-- lib/Sema/SemaStmt.cpp | 147 +- lib/Sema/SemaStmtAsm.cpp | 32 +- lib/Sema/SemaStmtAttr.cpp | 31 +- lib/Sema/SemaTemplate.cpp | 274 +- lib/Sema/SemaTemplateDeduction.cpp | 47 +- lib/Sema/SemaTemplateInstantiate.cpp | 69 +- lib/Sema/SemaTemplateInstantiateDecl.cpp | 213 +- lib/Sema/SemaTemplateVariadic.cpp | 76 +- lib/Sema/SemaType.cpp | 214 +- lib/Sema/TreeTransform.h | 220 +- lib/Sema/TypeLocBuilder.cpp | 2 +- lib/Sema/TypeLocBuilder.h | 12 +- lib/Serialization/ASTCommon.cpp | 5 + lib/Serialization/ASTReader.cpp | 223 +- lib/Serialization/ASTReaderDecl.cpp | 171 +- lib/Serialization/ASTReaderStmt.cpp | 86 + lib/Serialization/ASTWriter.cpp | 174 +- lib/Serialization/ASTWriterDecl.cpp | 15 +- lib/Serialization/ASTWriterStmt.cpp | 50 + lib/Serialization/GlobalModuleIndex.cpp | 49 +- lib/Serialization/ModuleManager.cpp | 35 +- lib/Serialization/PCHContainerOperations.cpp | 6 +- .../Checkers/ArrayBoundChecker.cpp | 3 +- .../Checkers/ArrayBoundCheckerV2.cpp | 4 +- .../Checkers/BasicObjCFoundationChecks.cpp | 11 +- .../BlockInCriticalSectionChecker.cpp | 5 +- .../Checkers/BoolAssignmentChecker.cpp | 4 +- .../Checkers/CStringChecker.cpp | 72 +- .../Checkers/CStringSyntaxChecker.cpp | 27 +- .../Checkers/CallAndMessageChecker.cpp | 23 +- .../Checkers/CastSizeChecker.cpp | 3 +- .../Checkers/CastValueChecker.cpp | 461 ++- .../Checkers/CheckObjCDealloc.cpp | 15 +- .../Checkers/CheckObjCInstMethSignature.cpp | 2 +- .../Checkers/CheckSecuritySyntaxOnly.cpp | 43 +- lib/StaticAnalyzer/Checkers/ChrootChecker.cpp | 2 +- lib/StaticAnalyzer/Checkers/CloneChecker.cpp | 6 +- .../Checkers/ConversionChecker.cpp | 2 +- .../Checkers/DeadStoresChecker.cpp | 104 +- lib/StaticAnalyzer/Checkers/DebugCheckers.cpp | 3 +- .../DeleteWithNonVirtualDtorChecker.cpp | 21 +- .../Checkers/DereferenceChecker.cpp | 6 +- .../Checkers/DivZeroChecker.cpp | 4 +- .../Checkers/DynamicTypeChecker.cpp | 25 +- .../Checkers/DynamicTypePropagation.cpp | 54 +- .../Checkers/EnumCastOutOfRangeChecker.cpp | 18 +- .../Checkers/ExprInspectionChecker.cpp | 4 +- .../Checkers/FixedAddressChecker.cpp | 3 +- .../Checkers/GenericTaintChecker.cpp | 270 +- .../Checkers/InnerPointerChecker.cpp | 19 +- .../Checkers/IteratorChecker.cpp | 51 +- .../Checkers/IvarInvalidationChecker.cpp | 14 +- .../Checkers/LocalizationChecker.cpp | 42 +- lib/StaticAnalyzer/Checkers/MIGChecker.cpp | 5 +- .../Checkers/MPI-Checker/MPIBugReporter.cpp | 24 +- .../Checkers/MPI-Checker/MPIBugReporter.h | 6 +- .../Checkers/MacOSKeychainAPIChecker.cpp | 43 +- .../Checkers/MacOSXAPIChecker.cpp | 3 +- lib/StaticAnalyzer/Checkers/MallocChecker.cpp | 1303 +++++---- .../Checkers/MallocSizeofChecker.cpp | 2 +- .../Checkers/MmapWriteExecChecker.cpp | 2 +- lib/StaticAnalyzer/Checkers/MoveChecker.cpp | 23 +- .../Checkers/NSAutoreleasePoolChecker.cpp | 8 +- .../Checkers/NSErrorChecker.cpp | 3 +- .../Checkers/NonNullParamChecker.cpp | 31 +- .../Checkers/NullabilityChecker.cpp | 36 +- .../Checkers/ObjCAtSyncChecker.cpp | 8 +- .../Checkers/ObjCContainersChecker.cpp | 7 +- .../Checkers/ObjCMissingSuperCallChecker.cpp | 2 +- .../Checkers/ObjCSelfInitChecker.cpp | 2 +- .../Checkers/ObjCSuperDeallocChecker.cpp | 20 +- .../Checkers/ObjCUnusedIVarsChecker.cpp | 6 +- .../Checkers/PaddingChecker.cpp | 5 +- .../Checkers/PointerArithChecker.cpp | 18 +- .../Checkers/PointerSubChecker.cpp | 3 +- .../Checkers/PthreadLockChecker.cpp | 14 +- .../RetainCountChecker/RetainCountChecker.cpp | 10 +- .../RetainCountChecker/RetainCountChecker.h | 2 +- .../RetainCountDiagnostics.cpp | 61 +- .../RetainCountDiagnostics.h | 18 +- .../Checkers/ReturnPointerRangeChecker.cpp | 3 +- .../Checkers/ReturnUndefChecker.cpp | 3 +- .../Checkers/SimpleStreamChecker.cpp | 9 +- .../Checkers/StackAddrEscapeChecker.cpp | 22 +- lib/StaticAnalyzer/Checkers/StreamChecker.cpp | 8 +- lib/StaticAnalyzer/Checkers/Taint.cpp | 8 +- lib/StaticAnalyzer/Checkers/Taint.h | 6 +- .../Checkers/TaintTesterChecker.cpp | 2 +- .../Checkers/TestAfterDivZeroChecker.cpp | 16 +- .../Checkers/UndefBranchChecker.cpp | 3 +- .../Checkers/UndefCapturedBlockVarChecker.cpp | 7 +- .../Checkers/UndefResultChecker.cpp | 2 +- .../UndefinedArraySubscriptChecker.cpp | 2 +- .../Checkers/UndefinedAssignmentChecker.cpp | 4 +- .../UninitializedObjectChecker.cpp | 6 +- .../UninitializedPointee.cpp | 11 +- .../Checkers/UnixAPIChecker.cpp | 8 +- .../Checkers/UnreachableCodeChecker.cpp | 2 +- .../Checkers/VLASizeChecker.cpp | 4 +- lib/StaticAnalyzer/Checkers/ValistChecker.cpp | 44 +- lib/StaticAnalyzer/Checkers/VforkChecker.cpp | 2 +- .../Checkers/VirtualCallChecker.cpp | 205 +- lib/StaticAnalyzer/Checkers/Yaml.h | 59 + lib/StaticAnalyzer/Core/AnalysisManager.cpp | 4 +- lib/StaticAnalyzer/Core/AnalyzerOptions.cpp | 19 - lib/StaticAnalyzer/Core/BugReporter.cpp | 1850 ++++++------ .../Core/BugReporterVisitors.cpp | 666 +++-- lib/StaticAnalyzer/Core/CallEvent.cpp | 24 +- lib/StaticAnalyzer/Core/Checker.cpp | 8 +- lib/StaticAnalyzer/Core/CheckerHelpers.cpp | 2 +- lib/StaticAnalyzer/Core/CheckerManager.cpp | 2 +- .../Core/CommonBugCategories.cpp | 1 + lib/StaticAnalyzer/Core/DynamicType.cpp | 229 ++ lib/StaticAnalyzer/Core/DynamicTypeMap.cpp | 97 - lib/StaticAnalyzer/Core/Environment.cpp | 1 + lib/StaticAnalyzer/Core/ExplodedGraph.cpp | 120 +- lib/StaticAnalyzer/Core/ExprEngine.cpp | 91 +- lib/StaticAnalyzer/Core/ExprEngineC.cpp | 3 +- lib/StaticAnalyzer/Core/ExprEngineCXX.cpp | 36 +- .../Core/ExprEngineCallAndReturn.cpp | 5 +- lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp | 50 +- lib/StaticAnalyzer/Core/LoopUnrolling.cpp | 4 +- lib/StaticAnalyzer/Core/MemRegion.cpp | 5 +- lib/StaticAnalyzer/Core/PlistDiagnostics.cpp | 155 +- lib/StaticAnalyzer/Core/ProgramState.cpp | 2 +- .../Core/RangeConstraintManager.cpp | 2 +- lib/StaticAnalyzer/Core/RegionStore.cpp | 89 +- .../Core/SMTConstraintManager.cpp | 2 +- lib/StaticAnalyzer/Core/SarifDiagnostics.cpp | 123 +- lib/StaticAnalyzer/Core/Store.cpp | 2 +- lib/StaticAnalyzer/Core/WorkList.cpp | 12 +- .../Frontend/AnalysisConsumer.cpp | 104 +- .../Frontend/CheckerRegistration.cpp | 23 +- .../Frontend/CheckerRegistry.cpp | 24 +- .../Frontend/FrontendActions.cpp | 2 +- lib/StaticAnalyzer/Frontend/ModelInjector.cpp | 3 +- lib/Tooling/ASTDiff/ASTDiff.cpp | 16 +- lib/Tooling/AllTUsExecution.cpp | 5 +- lib/Tooling/ArgumentsAdjusters.cpp | 16 + lib/Tooling/CommonOptionsParser.cpp | 2 +- lib/Tooling/CompilationDatabase.cpp | 4 +- lib/Tooling/Core/Replacement.cpp | 11 +- .../DependencyScanningFilesystem.cpp | 234 ++ .../DependencyScanningService.cpp | 19 + .../DependencyScanningTool.cpp | 71 + .../DependencyScanningWorker.cpp | 138 +- .../GuessTargetAndModeCompilationDatabase.cpp | 2 +- lib/Tooling/Inclusions/HeaderIncludes.cpp | 14 + lib/Tooling/Inclusions/IncludeStyle.cpp | 1 + .../InterpolatingCompilationDatabase.cpp | 33 +- lib/Tooling/Refactoring.cpp | 5 +- .../Refactoring/ASTSelectionRequirements.cpp | 2 +- lib/Tooling/Refactoring/Extract/Extract.cpp | 2 +- .../Refactoring/Extract/SourceExtraction.cpp | 8 +- .../Refactoring/RefactoringActions.cpp | 4 +- .../Refactoring/Rename/RenamingAction.cpp | 4 +- .../Refactoring/Rename/SymbolOccurrences.cpp | 2 +- .../Refactoring/Rename/USRFindingAction.cpp | 6 +- lib/Tooling/Refactoring/SourceCode.cpp | 31 - lib/Tooling/Refactoring/Stencil.cpp | 175 -- lib/Tooling/Refactoring/Transformer.cpp | 263 -- lib/Tooling/RefactoringCallbacks.cpp | 2 +- lib/Tooling/StandaloneExecution.cpp | 2 +- lib/Tooling/Syntax/BuildTree.cpp | 12 +- lib/Tooling/Syntax/Tokens.cpp | 17 +- lib/Tooling/Tooling.cpp | 84 +- lib/Tooling/Transformer/CMakeLists.txt | 18 + .../RangeSelector.cpp | 56 +- lib/Tooling/Transformer/RewriteRule.cpp | 178 ++ lib/Tooling/Transformer/SourceCode.cpp | 65 + .../Transformer/SourceCodeBuilders.cpp | 160 ++ lib/Tooling/Transformer/Stencil.cpp | 318 +++ lib/Tooling/Transformer/Transformer.cpp | 72 + tools/clang-format/ClangFormat.cpp | 284 +- tools/clang-offload-wrapper/CMakeLists.txt | 23 + .../ClangOffloadWrapper.cpp | 371 +++ tools/driver/cc1_main.cpp | 42 +- tools/driver/cc1as_main.cpp | 38 +- tools/driver/driver.cpp | 8 +- utils/TableGen/ClangASTNodesEmitter.cpp | 10 +- utils/TableGen/ClangAttrEmitter.cpp | 264 +- .../ClangCommentCommandInfoEmitter.cpp | 8 +- ...mentHTMLNamedCharacterReferenceEmitter.cpp | 9 +- utils/TableGen/ClangDataCollectorsEmitter.cpp | 5 +- utils/TableGen/ClangDiagnosticsEmitter.cpp | 18 +- utils/TableGen/ClangOpcodesEmitter.cpp | 357 +++ utils/TableGen/ClangOpenCLBuiltinEmitter.cpp | 544 +++- utils/TableGen/ClangOptionDocEmitter.cpp | 9 +- utils/TableGen/ClangSACheckersEmitter.cpp | 5 +- utils/TableGen/ClangTypeNodesEmitter.cpp | 220 ++ utils/TableGen/NeonEmitter.cpp | 60 +- utils/TableGen/TableGen.cpp | 12 + utils/TableGen/TableGenBackends.h | 2 + 753 files changed, 46610 insertions(+), 17372 deletions(-) create mode 100644 include/clang-c/FatalErrorHandler.h create mode 100644 include/clang/AST/CXXRecordDeclDefinitionBits.def create mode 100644 include/clang/AST/OptionalDiagnostic.h delete mode 100644 include/clang/AST/TypeNodes.def rename include/clang/{StaticAnalyzer/Core/BugReporter => Analysis}/PathDiagnostic.h (88%) create mode 100644 include/clang/Basic/AArch64SVEACLETypes.def create mode 100644 include/clang/Basic/AttributeCommonInfo.h create mode 100644 include/clang/Basic/BuiltinsBPF.def rename include/clang/{Frontend => Basic}/LangStandard.h (60%) rename include/clang/{Frontend => Basic}/LangStandards.def (98%) create mode 100644 include/clang/Basic/TypeNodes.td delete mode 100644 include/clang/Index/CodegenNameGenerator.h create mode 100644 include/clang/Index/IndexingOptions.h create mode 100644 include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h create mode 100644 include/clang/StaticAnalyzer/Core/PathSensitive/DynamicCastInfo.h create mode 100644 include/clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h delete mode 100644 include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h create mode 100644 include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h create mode 100644 include/clang/Tooling/DependencyScanning/DependencyScanningService.h create mode 100644 include/clang/Tooling/DependencyScanning/DependencyScanningTool.h rename {lib => include/clang}/Tooling/Refactoring/Extract/SourceExtraction.h (88%) mode change 100644 => 100755 create mode 100644 include/clang/Tooling/Transformer/MatchConsumer.h rename include/clang/Tooling/{Refactoring => Transformer}/RangeSelector.h (80%) rename include/clang/Tooling/{Refactoring/Transformer.h => Transformer/RewriteRule.h} (82%) rename include/clang/Tooling/{Refactoring => Transformer}/SourceCode.h (78%) create mode 100644 include/clang/Tooling/Transformer/SourceCodeBuilders.h rename include/clang/Tooling/{Refactoring => Transformer}/Stencil.h (58%) create mode 100644 include/clang/Tooling/Transformer/Transformer.h create mode 100644 lib/AST/Interp/Block.cpp create mode 100644 lib/AST/Interp/Block.h create mode 100644 lib/AST/Interp/Boolean.h create mode 100644 lib/AST/Interp/ByteCodeEmitter.cpp create mode 100644 lib/AST/Interp/ByteCodeEmitter.h create mode 100644 lib/AST/Interp/ByteCodeExprGen.cpp create mode 100644 lib/AST/Interp/ByteCodeExprGen.h create mode 100644 lib/AST/Interp/ByteCodeGenError.cpp create mode 100644 lib/AST/Interp/ByteCodeGenError.h create mode 100644 lib/AST/Interp/ByteCodeStmtGen.cpp create mode 100644 lib/AST/Interp/ByteCodeStmtGen.h create mode 100644 lib/AST/Interp/Context.cpp create mode 100644 lib/AST/Interp/Context.h create mode 100644 lib/AST/Interp/Descriptor.cpp create mode 100644 lib/AST/Interp/Descriptor.h create mode 100644 lib/AST/Interp/Disasm.cpp create mode 100644 lib/AST/Interp/EvalEmitter.cpp create mode 100644 lib/AST/Interp/EvalEmitter.h create mode 100644 lib/AST/Interp/Frame.cpp create mode 100644 lib/AST/Interp/Frame.h create mode 100644 lib/AST/Interp/Function.cpp create mode 100644 lib/AST/Interp/Function.h create mode 100644 lib/AST/Interp/Integral.h create mode 100644 lib/AST/Interp/Interp.cpp create mode 100644 lib/AST/Interp/Interp.h create mode 100644 lib/AST/Interp/InterpFrame.cpp create mode 100644 lib/AST/Interp/InterpFrame.h create mode 100644 lib/AST/Interp/InterpStack.cpp create mode 100644 lib/AST/Interp/InterpStack.h create mode 100644 lib/AST/Interp/InterpState.cpp create mode 100644 lib/AST/Interp/InterpState.h create mode 100644 lib/AST/Interp/Opcode.h create mode 100644 lib/AST/Interp/Opcodes.td create mode 100644 lib/AST/Interp/Pointer.cpp create mode 100644 lib/AST/Interp/Pointer.h create mode 100644 lib/AST/Interp/PrimType.cpp create mode 100644 lib/AST/Interp/PrimType.h create mode 100644 lib/AST/Interp/Program.cpp create mode 100644 lib/AST/Interp/Program.h create mode 100644 lib/AST/Interp/Record.cpp create mode 100644 lib/AST/Interp/Record.h create mode 100644 lib/AST/Interp/Source.cpp create mode 100644 lib/AST/Interp/Source.h create mode 100644 lib/AST/Interp/State.cpp create mode 100644 lib/AST/Interp/State.h rename lib/{StaticAnalyzer/Core => Analysis}/PathDiagnostic.cpp (81%) rename lib/{Frontend => Basic}/LangStandards.cpp (60%) create mode 100644 lib/Basic/Stack.cpp create mode 100644 lib/DirectoryWatcher/windows/DirectoryWatcher-windows.cpp create mode 100644 lib/Driver/ToolChains/InterfaceStubs.cpp create mode 100644 lib/Driver/ToolChains/InterfaceStubs.h create mode 100644 lib/Headers/ppc_wrappers/pmmintrin.h create mode 100644 lib/Headers/ppc_wrappers/smmintrin.h create mode 100644 lib/Headers/ppc_wrappers/tmmintrin.h delete mode 100644 lib/Index/CodegenNameGenerator.cpp create mode 100644 lib/Sema/SemaConcept.cpp create mode 100755 lib/StaticAnalyzer/Checkers/Yaml.h create mode 100644 lib/StaticAnalyzer/Core/DynamicType.cpp delete mode 100644 lib/StaticAnalyzer/Core/DynamicTypeMap.cpp create mode 100644 lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp create mode 100644 lib/Tooling/DependencyScanning/DependencyScanningService.cpp create mode 100644 lib/Tooling/DependencyScanning/DependencyScanningTool.cpp delete mode 100644 lib/Tooling/Refactoring/SourceCode.cpp delete mode 100644 lib/Tooling/Refactoring/Stencil.cpp delete mode 100644 lib/Tooling/Refactoring/Transformer.cpp create mode 100644 lib/Tooling/Transformer/CMakeLists.txt rename lib/Tooling/{Refactoring => Transformer}/RangeSelector.cpp (85%) create mode 100644 lib/Tooling/Transformer/RewriteRule.cpp create mode 100644 lib/Tooling/Transformer/SourceCode.cpp create mode 100644 lib/Tooling/Transformer/SourceCodeBuilders.cpp create mode 100644 lib/Tooling/Transformer/Stencil.cpp create mode 100644 lib/Tooling/Transformer/Transformer.cpp create mode 100644 tools/clang-offload-wrapper/CMakeLists.txt create mode 100644 tools/clang-offload-wrapper/ClangOffloadWrapper.cpp create mode 100644 utils/TableGen/ClangOpcodesEmitter.cpp create mode 100644 utils/TableGen/ClangTypeNodesEmitter.cpp diff --git a/include/clang-c/FatalErrorHandler.h b/include/clang-c/FatalErrorHandler.h new file mode 100644 index 00000000000..ce8ff2cae73 --- /dev/null +++ b/include/clang-c/FatalErrorHandler.h @@ -0,0 +1,33 @@ +/*===-- clang-c/FatalErrorHandler.h - Fatal Error Handling --------*- C -*-===*\ +|* *| +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *| +|* Exceptions. *| +|* See https://llvm.org/LICENSE.txt for license information. *| +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_CLANG_C_FATAL_ERROR_HANDLER_H +#define LLVM_CLANG_C_FATAL_ERROR_HANDLER_H + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Installs error handler that prints error message to stderr and calls abort(). + * Replaces currently installed error handler (if any). + */ +void clang_install_aborting_llvm_fatal_error_handler(void); + +/** + * Removes currently installed error handler (if any). + * If no error handler is intalled, the default strategy is to print error + * message to stderr and call exit(1). + */ +void clang_uninstall_llvm_fatal_error_handler(void); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/include/clang-c/Index.h b/include/clang-c/Index.h index 74badac740b..22689350543 100644 --- a/include/clang-c/Index.h +++ b/include/clang-c/Index.h @@ -1356,7 +1356,12 @@ enum CXTranslationUnit_Flags { * the case where these warnings are not of interest, as for an IDE for * example, which typically shows only the diagnostics in the main file. */ - CXTranslationUnit_IgnoreNonErrorsFromIncludedFiles = 0x4000 + CXTranslationUnit_IgnoreNonErrorsFromIncludedFiles = 0x4000, + + /** + * Tells the preprocessor not to skip excluded conditional blocks. + */ + CXTranslationUnit_RetainExcludedConditionalBlocks = 0x8000 }; /** @@ -2550,7 +2555,20 @@ enum CXCursorKind { */ CXCursor_BuiltinBitCastExpr = 280, - CXCursor_LastStmt = CXCursor_BuiltinBitCastExpr, + /** OpenMP master taskloop directive. + */ + CXCursor_OMPMasterTaskLoopDirective = 281, + + /** OpenMP parallel master taskloop directive. + */ + CXCursor_OMPParallelMasterTaskLoopDirective = 282, + + /** OpenMP master taskloop simd directive. + */ + CXCursor_OMPMasterTaskLoopSimdDirective = 283, + + + CXCursor_LastStmt = CXCursor_OMPMasterTaskLoopSimdDirective, /** * Cursor that represents the translation unit itself. diff --git a/include/clang/AST/APValue.h b/include/clang/AST/APValue.h index 6943479831e..63359294ef6 100644 --- a/include/clang/AST/APValue.h +++ b/include/clang/AST/APValue.h @@ -53,6 +53,34 @@ public: void print(llvm::raw_ostream &Out, const PrintingPolicy &Policy) const; }; + +/// Symbolic representation of a dynamic allocation. +class DynamicAllocLValue { + unsigned Index; + +public: + DynamicAllocLValue() : Index(0) {} + explicit DynamicAllocLValue(unsigned Index) : Index(Index + 1) {} + unsigned getIndex() { return Index - 1; } + + explicit operator bool() const { return Index != 0; } + + void *getOpaqueValue() { + return reinterpret_cast(static_cast(Index) + << NumLowBitsAvailable); + } + static DynamicAllocLValue getFromOpaqueValue(void *Value) { + DynamicAllocLValue V; + V.Index = reinterpret_cast(Value) >> NumLowBitsAvailable; + return V; + } + + static unsigned getMaxIndex() { + return (std::numeric_limits::max() >> NumLowBitsAvailable) - 1; + } + + static constexpr int NumLowBitsAvailable = 3; +}; } namespace llvm { @@ -67,6 +95,17 @@ template<> struct PointerLikeTypeTraits { // to include Type.h. static constexpr int NumLowBitsAvailable = 3; }; + +template<> struct PointerLikeTypeTraits { + static void *getAsVoidPointer(clang::DynamicAllocLValue V) { + return V.getOpaqueValue(); + } + static clang::DynamicAllocLValue getFromVoidPointer(void *P) { + return clang::DynamicAllocLValue::getFromOpaqueValue(P); + } + static constexpr int NumLowBitsAvailable = + clang::DynamicAllocLValue::NumLowBitsAvailable; +}; } namespace clang { @@ -97,13 +136,15 @@ public: }; class LValueBase { - typedef llvm::PointerUnion + typedef llvm::PointerUnion PtrTy; public: LValueBase() : Local{} {} LValueBase(const ValueDecl *P, unsigned I = 0, unsigned V = 0); LValueBase(const Expr *P, unsigned I = 0, unsigned V = 0); + static LValueBase getDynamicAlloc(DynamicAllocLValue LV, QualType Type); static LValueBase getTypeInfo(TypeInfoLValue LV, QualType TypeInfo); template @@ -124,6 +165,7 @@ public: unsigned getCallIndex() const; unsigned getVersion() const; QualType getTypeInfoType() const; + QualType getDynamicAllocType() const; friend bool operator==(const LValueBase &LHS, const LValueBase &RHS); friend bool operator!=(const LValueBase &LHS, const LValueBase &RHS) { @@ -140,6 +182,8 @@ public: LocalState Local; /// The type std::type_info, if this is a TypeInfoLValue. void *TypeInfoType; + /// The QualType, if this is a DynamicAllocLValue. + void *DynamicAllocType; }; }; diff --git a/include/clang/AST/ASTContext.h b/include/clang/AST/ASTContext.h index 1d1aaf4fb11..5e2f4031d96 100644 --- a/include/clang/AST/ASTContext.h +++ b/include/clang/AST/ASTContext.h @@ -139,6 +139,12 @@ class FullComment; } // namespace comments +namespace interp { + +class Context; + +} // namespace interp + struct TypeInfo { uint64_t Width = 0; unsigned Align = 0; @@ -179,7 +185,8 @@ private: mutable llvm::FoldingSet LValueReferenceTypes; mutable llvm::FoldingSet RValueReferenceTypes; mutable llvm::FoldingSet MemberPointerTypes; - mutable llvm::FoldingSet ConstantArrayTypes; + mutable llvm::ContextualFoldingSet + ConstantArrayTypes; mutable llvm::FoldingSet IncompleteArrayTypes; mutable std::vector VariableArrayTypes; mutable llvm::FoldingSet DependentSizedArrayTypes; @@ -507,6 +514,8 @@ private: /// need to be consistently numbered for the mangler). llvm::DenseMap> MangleNumberingContexts; + llvm::DenseMap> + ExtraMangleNumberingContexts; /// Side-table of mangling numbers for declarations which rarely /// need them (like static local vars). @@ -564,6 +573,7 @@ private: const TargetInfo *Target = nullptr; const TargetInfo *AuxTarget = nullptr; clang::PrintingPolicy PrintingPolicy; + std::unique_ptr InterpContext; public: IdentifierTable &Idents; @@ -573,6 +583,9 @@ public: IntrusiveRefCntPtr ExternalSource; ASTMutationListener *Listener = nullptr; + /// Returns the clang bytecode interpreter context. + interp::Context &getInterpContext(); + /// Container for either a single DynTypedNode or for an ArrayRef to /// DynTypedNode. For use with ParentMap. class DynTypedNodeList { @@ -729,71 +742,49 @@ public: /// True if comments are already loaded from ExternalASTSource. mutable bool CommentsLoaded = false; - class RawCommentAndCacheFlags { - public: - enum Kind { - /// We searched for a comment attached to the particular declaration, but - /// didn't find any. - /// - /// getRaw() == 0. - NoCommentInDecl = 0, - - /// We have found a comment attached to this particular declaration. - /// - /// getRaw() != 0. - FromDecl, - - /// This declaration does not have an attached comment, and we have - /// searched the redeclaration chain. - /// - /// If getRaw() == 0, the whole redeclaration chain does not have any - /// comments. - /// - /// If getRaw() != 0, it is a comment propagated from other - /// redeclaration. - FromRedecl - }; - - Kind getKind() const LLVM_READONLY { - return Data.getInt(); - } - - void setKind(Kind K) { - Data.setInt(K); - } - - const RawComment *getRaw() const LLVM_READONLY { - return Data.getPointer(); - } - - void setRaw(const RawComment *RC) { - Data.setPointer(RC); - } - - const Decl *getOriginalDecl() const LLVM_READONLY { - return OriginalDecl; - } - - void setOriginalDecl(const Decl *Orig) { - OriginalDecl = Orig; - } - - private: - llvm::PointerIntPair Data; - const Decl *OriginalDecl; - }; - - /// Mapping from declarations to comments attached to any - /// redeclaration. + /// Mapping from declaration to directly attached comment. /// /// Raw comments are owned by Comments list. This mapping is populated /// lazily. - mutable llvm::DenseMap RedeclComments; + mutable llvm::DenseMap DeclRawComments; + + /// Mapping from canonical declaration to the first redeclaration in chain + /// that has a comment attached. + /// + /// Raw comments are owned by Comments list. This mapping is populated + /// lazily. + mutable llvm::DenseMap RedeclChainComments; + + /// Keeps track of redeclaration chains that don't have any comment attached. + /// Mapping from canonical declaration to redeclaration chain that has no + /// comments attached to any redeclaration. Specifically it's mapping to + /// the last redeclaration we've checked. + /// + /// Shall not contain declarations that have comments attached to any + /// redeclaration in their chain. + mutable llvm::DenseMap CommentlessRedeclChains; /// Mapping from declarations to parsed comments attached to any /// redeclaration. mutable llvm::DenseMap ParsedComments; + /// Attaches \p Comment to \p OriginalD and to its redeclaration chain + /// and removes the redeclaration chain from the set of commentless chains. + /// + /// Don't do anything if a comment has already been attached to \p OriginalD + /// or its redeclaration chain. + void cacheRawCommentForDecl(const Decl &OriginalD, + const RawComment &Comment) const; + + /// \returns searches \p CommentsInFile for doc comment for \p D. + /// + /// \p RepresentativeLocForDecl is used as a location for searching doc + /// comments. \p CommentsInFile is a mapping offset -> comment of files in the + /// same file where \p RepresentativeLocForDecl is. + RawComment *getRawCommentForDeclNoCacheImpl( + const Decl *D, const SourceLocation RepresentativeLocForDecl, + const std::map &CommentsInFile) const; + /// Return the documentation comment attached to a given declaration, /// without looking into cache. RawComment *getRawCommentForDeclNoCache(const Decl *D) const; @@ -818,6 +809,16 @@ public: getRawCommentForAnyRedecl(const Decl *D, const Decl **OriginalDecl = nullptr) const; + /// Searches existing comments for doc comments that should be attached to \p + /// Decls. If any doc comment is found, it is parsed. + /// + /// Requirement: All \p Decls are in the same file. + /// + /// If the last comment in the file is already attached we assume + /// there are not comments left to be attached to \p Decls. + void attachCommentsToJustParsedDecls(ArrayRef Decls, + const Preprocessor *PP); + /// Return parsed documentation comment attached to a given declaration. /// Returns nullptr if no comment is attached. /// @@ -1054,6 +1055,9 @@ public: #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ CanQualType Id##Ty; #include "clang/Basic/OpenCLExtensionTypes.def" +#define SVE_TYPE(Name, Id, SingletonId) \ + CanQualType SingletonId; +#include "clang/Basic/AArch64SVEACLETypes.def" // Types for deductions in C++0x [stmt.ranged]'s desugaring. Built on demand. mutable QualType AutoDeductTy; // Deduction against 'auto'. @@ -1329,6 +1333,7 @@ public: /// Return the unique reference to the type for a constant array of /// the specified element type. QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, + const Expr *SizeExpr, ArrayType::ArraySizeModifier ASM, unsigned IndexTypeQuals) const; @@ -1498,8 +1503,7 @@ public: bool isKindOf) const; QualType getObjCTypeParamType(const ObjCTypeParamDecl *Decl, - ArrayRef protocols, - QualType Canonical = QualType()) const; + ArrayRef protocols) const; bool ObjCObjectAdoptsQTypeProtocols(QualType QT, ObjCInterfaceDecl *Decl); @@ -2054,6 +2058,11 @@ public: /// types. bool areCompatibleVectorTypes(QualType FirstVec, QualType SecondVec); + /// Return true if the type has been explicitly qualified with ObjC ownership. + /// A type may be implicitly qualified with ownership under ObjC ARC, and in + /// some cases the compiler treats these differently. + bool hasDirectOwnershipQualifier(QualType Ty) const; + /// Return true if this is an \c NSObject object with its \c NSObject /// attribute set. static bool isObjCNSObjectType(QualType Ty) { @@ -2577,10 +2586,12 @@ public: return T == getObjCSelType(); } - bool ObjCQualifiedIdTypesAreCompatible(QualType LHS, QualType RHS, + bool ObjCQualifiedIdTypesAreCompatible(const ObjCObjectPointerType *LHS, + const ObjCObjectPointerType *RHS, bool ForCompare); - bool ObjCQualifiedClassTypesAreCompatible(QualType LHS, QualType RHS); + bool ObjCQualifiedClassTypesAreCompatible(const ObjCObjectPointerType *LHS, + const ObjCObjectPointerType *RHS); // Check the safety of assignment from LHS to RHS bool canAssignObjCInterfaces(const ObjCObjectPointerType *LHSOPT, @@ -2802,6 +2813,9 @@ public: /// Retrieve the context for computing mangling numbers in the given /// DeclContext. MangleNumberingContext &getManglingNumberContext(const DeclContext *DC); + enum NeedExtraManglingDecl_t { NeedExtraManglingDecl }; + MangleNumberingContext &getManglingNumberContext(NeedExtraManglingDecl_t, + const Decl *D); std::unique_ptr createMangleNumberingContext() const; diff --git a/include/clang/AST/ASTFwd.h b/include/clang/AST/ASTFwd.h index 93919bbdd52..25c32148544 100644 --- a/include/clang/AST/ASTFwd.h +++ b/include/clang/AST/ASTFwd.h @@ -24,7 +24,7 @@ class Stmt; #include "clang/AST/StmtNodes.inc" class Type; #define TYPE(DERIVED, BASE) class DERIVED##Type; -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" class CXXCtorInitializer; } // end namespace clang diff --git a/include/clang/AST/ASTImporter.h b/include/clang/AST/ASTImporter.h index 4a55c120a45..c82dcab35db 100644 --- a/include/clang/AST/ASTImporter.h +++ b/include/clang/AST/ASTImporter.h @@ -87,6 +87,10 @@ class TypeSourceInfo; using NonEquivalentDeclSet = llvm::DenseSet>; using ImportedCXXBaseSpecifierMap = llvm::DenseMap; + using FileIDImportHandlerType = + std::function; + + enum class ODRHandlingType { Conservative, Liberal }; // An ImportPath is the list of the AST nodes which we visit during an // Import call. @@ -210,6 +214,8 @@ class TypeSourceInfo; }; private: + FileIDImportHandlerType FileIDImportHandler; + std::shared_ptr SharedState = nullptr; /// The path which we go through during the import of a given AST node. @@ -232,6 +238,8 @@ class TypeSourceInfo; /// Whether to perform a minimal import. bool Minimal; + ODRHandlingType ODRHandling; + /// Whether the last diagnostic came from the "from" context. bool LastDiagFromFrom = false; @@ -310,10 +318,20 @@ class TypeSourceInfo; virtual ~ASTImporter(); + /// Set a callback function for FileID import handling. + /// The function is invoked when a FileID is imported from the From context. + /// The imported FileID in the To context and the original FileID in the + /// From context is passed to it. + void setFileIDImportHandler(FileIDImportHandlerType H) { + FileIDImportHandler = H; + } + /// Whether the importer will perform a minimal import, creating /// to-be-completed forward declarations when possible. bool isMinimalImport() const { return Minimal; } + void setODRHandling(ODRHandlingType T) { ODRHandling = T; } + /// \brief Import the given object, returns the result. /// /// \param To Import the object into this variable. @@ -366,6 +384,20 @@ class TypeSourceInfo; /// imported. If it does not exist nullptr is returned. TranslationUnitDecl *GetFromTU(Decl *ToD); + /// Return the declaration in the "from" context from which the declaration + /// in the "to" context was imported. If it was not imported or of the wrong + /// type a null value is returned. + template + llvm::Optional getImportedFromDecl(const DeclT *ToD) const { + auto FromI = ImportedFromDecls.find(ToD); + if (FromI == ImportedFromDecls.end()) + return {}; + auto *FromD = dyn_cast(FromI->second); + if (!FromD) + return {}; + return FromD; + } + /// Import the given declaration context from the "from" /// AST context into the "to" AST context. /// @@ -491,12 +523,11 @@ class TypeSourceInfo; /// /// \param NumDecls the number of conflicting declarations in \p Decls. /// - /// \returns the name that the newly-imported declaration should have. - virtual DeclarationName HandleNameConflict(DeclarationName Name, - DeclContext *DC, - unsigned IDNS, - NamedDecl **Decls, - unsigned NumDecls); + /// \returns the name that the newly-imported declaration should have. Or + /// an error if we can't handle the name conflict. + virtual Expected + HandleNameConflict(DeclarationName Name, DeclContext *DC, unsigned IDNS, + NamedDecl **Decls, unsigned NumDecls); /// Retrieve the context that AST nodes are being imported into. ASTContext &getToContext() const { return ToContext; } diff --git a/include/clang/AST/ASTImporterSharedState.h b/include/clang/AST/ASTImporterSharedState.h index 3635a62deef..829eb1c611c 100644 --- a/include/clang/AST/ASTImporterSharedState.h +++ b/include/clang/AST/ASTImporterSharedState.h @@ -47,7 +47,7 @@ public: ASTImporterSharedState() = default; ASTImporterSharedState(TranslationUnitDecl &ToTU) { - LookupTable = llvm::make_unique(ToTU); + LookupTable = std::make_unique(ToTU); } ASTImporterLookupTable *getLookupTable() { return LookupTable.get(); } diff --git a/include/clang/AST/ASTNodeTraverser.h b/include/clang/AST/ASTNodeTraverser.h index e43eacef86c..0bb2aad553f 100644 --- a/include/clang/AST/ASTNodeTraverser.h +++ b/include/clang/AST/ASTNodeTraverser.h @@ -237,6 +237,9 @@ public: for (const auto &TP : *TPL) Visit(TP); + + if (const Expr *RC = TPL->getRequiresClause()) + Visit(RC); } void diff --git a/include/clang/AST/ASTStructuralEquivalence.h b/include/clang/AST/ASTStructuralEquivalence.h index 70e0daa08a9..36a42070fd2 100644 --- a/include/clang/AST/ASTStructuralEquivalence.h +++ b/include/clang/AST/ASTStructuralEquivalence.h @@ -18,7 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" -#include +#include #include namespace clang { @@ -42,14 +42,13 @@ struct StructuralEquivalenceContext { /// AST contexts for which we are checking structural equivalence. ASTContext &FromCtx, &ToCtx; - /// The set of "tentative" equivalences between two canonical - /// declarations, mapping from a declaration in the first context to the - /// declaration in the second context that we believe to be equivalent. - llvm::DenseMap TentativeEquivalences; + // Queue of from-to Decl pairs that are to be checked to determine the final + // result of equivalence of a starting Decl pair. + std::queue> DeclsToCheck; - /// Queue of declarations in the first context whose equivalence - /// with a declaration in the second context still needs to be verified. - std::deque DeclsToCheck; + // Set of from-to Decl pairs that are already visited during the check + // (are in or were once in \c DeclsToCheck) of a starting Decl pair. + llvm::DenseSet> VisitedDecls; /// Declaration (from, to) pairs that are known not to be equivalent /// (which we have already complained about). @@ -88,14 +87,14 @@ struct StructuralEquivalenceContext { /// Implementation functions (all static functions in /// ASTStructuralEquivalence.cpp) must never call this function because that /// will wreak havoc the internal state (\c DeclsToCheck and - /// \c TentativeEquivalences members) and can cause faulty equivalent results. + /// \c VisitedDecls members) and can cause faulty equivalent results. bool IsEquivalent(Decl *D1, Decl *D2); /// Determine whether the two types are structurally equivalent. /// Implementation functions (all static functions in /// ASTStructuralEquivalence.cpp) must never call this function because that /// will wreak havoc the internal state (\c DeclsToCheck and - /// \c TentativeEquivalences members) and can cause faulty equivalent results. + /// \c VisitedDecls members) and can cause faulty equivalent results. bool IsEquivalent(QualType T1, QualType T2); /// Find the index of the given anonymous struct/union within its diff --git a/include/clang/AST/ASTTypeTraits.h b/include/clang/AST/ASTTypeTraits.h index a29a04e5d24..dd4ead2f0c2 100644 --- a/include/clang/AST/ASTTypeTraits.h +++ b/include/clang/AST/ASTTypeTraits.h @@ -148,7 +148,7 @@ private: #include "clang/AST/StmtNodes.inc" NKI_Type, #define TYPE(DERIVED, BASE) NKI_##DERIVED##Type, -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" NKI_OMPClause, #define OPENMP_CLAUSE(TextualSpelling, Class) NKI_##Class, #include "clang/Basic/OpenMPKinds.def" @@ -205,7 +205,7 @@ KIND_TO_KIND_ID(OMPClause) #define STMT(DERIVED, BASE) KIND_TO_KIND_ID(DERIVED) #include "clang/AST/StmtNodes.inc" #define TYPE(DERIVED, BASE) KIND_TO_KIND_ID(DERIVED##Type) -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" #define OPENMP_CLAUSE(TextualSpelling, Class) KIND_TO_KIND_ID(Class) #include "clang/Basic/OpenMPKinds.def" #undef KIND_TO_KIND_ID diff --git a/include/clang/AST/Attr.h b/include/clang/AST/Attr.h index 1fbed7ceebf..d315dde6ed4 100644 --- a/include/clang/AST/Attr.h +++ b/include/clang/AST/Attr.h @@ -19,6 +19,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/Type.h" #include "clang/Basic/AttrKinds.h" +#include "clang/Basic/AttributeCommonInfo.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/Sanitizers.h" @@ -32,6 +33,7 @@ namespace clang { class ASTContext; + class AttributeCommonInfo; class IdentifierInfo; class ObjCInterfaceDecl; class Expr; @@ -40,84 +42,79 @@ namespace clang { class TypeSourceInfo; /// Attr - This represents one attribute. -class Attr { -private: - SourceRange Range; - unsigned AttrKind : 16; + class Attr : public AttributeCommonInfo { + private: + unsigned AttrKind : 16; -protected: - /// An index into the spelling list of an - /// attribute defined in Attr.td file. - unsigned SpellingListIndex : 4; - unsigned Inherited : 1; - unsigned IsPackExpansion : 1; - unsigned Implicit : 1; - // FIXME: These are properties of the attribute kind, not state for this - // instance of the attribute. - unsigned IsLateParsed : 1; - unsigned InheritEvenIfAlreadyPresent : 1; + protected: + /// An index into the spelling list of an + /// attribute defined in Attr.td file. + unsigned Inherited : 1; + unsigned IsPackExpansion : 1; + unsigned Implicit : 1; + // FIXME: These are properties of the attribute kind, not state for this + // instance of the attribute. + unsigned IsLateParsed : 1; + unsigned InheritEvenIfAlreadyPresent : 1; - void *operator new(size_t bytes) noexcept { - llvm_unreachable("Attrs cannot be allocated with regular 'new'."); - } - void operator delete(void *data) noexcept { - llvm_unreachable("Attrs cannot be released with regular 'delete'."); - } + void *operator new(size_t bytes) noexcept { + llvm_unreachable("Attrs cannot be allocated with regular 'new'."); + } + void operator delete(void *data) noexcept { + llvm_unreachable("Attrs cannot be released with regular 'delete'."); + } -public: - // Forward so that the regular new and delete do not hide global ones. - void *operator new(size_t Bytes, ASTContext &C, - size_t Alignment = 8) noexcept { - return ::operator new(Bytes, C, Alignment); - } - void operator delete(void *Ptr, ASTContext &C, size_t Alignment) noexcept { - return ::operator delete(Ptr, C, Alignment); - } + public: + // Forward so that the regular new and delete do not hide global ones. + void *operator new(size_t Bytes, ASTContext &C, + size_t Alignment = 8) noexcept { + return ::operator new(Bytes, C, Alignment); + } + void operator delete(void *Ptr, ASTContext &C, size_t Alignment) noexcept { + return ::operator delete(Ptr, C, Alignment); + } -protected: - Attr(attr::Kind AK, SourceRange R, unsigned SpellingListIndex, - bool IsLateParsed) - : Range(R), AttrKind(AK), SpellingListIndex(SpellingListIndex), - Inherited(false), IsPackExpansion(false), Implicit(false), - IsLateParsed(IsLateParsed), InheritEvenIfAlreadyPresent(false) {} + protected: + Attr(ASTContext &Context, const AttributeCommonInfo &CommonInfo, + attr::Kind AK, bool IsLateParsed) + : AttributeCommonInfo(CommonInfo), AttrKind(AK), Inherited(false), + IsPackExpansion(false), Implicit(false), IsLateParsed(IsLateParsed), + InheritEvenIfAlreadyPresent(false) {} -public: + public: + attr::Kind getKind() const { return static_cast(AttrKind); } - attr::Kind getKind() const { - return static_cast(AttrKind); - } + unsigned getSpellingListIndex() const { + return getAttributeSpellingListIndex(); + } + const char *getSpelling() const; - unsigned getSpellingListIndex() const { return SpellingListIndex; } - const char *getSpelling() const; + SourceLocation getLocation() const { return getRange().getBegin(); } - SourceLocation getLocation() const { return Range.getBegin(); } - SourceRange getRange() const { return Range; } - void setRange(SourceRange R) { Range = R; } + bool isInherited() const { return Inherited; } - bool isInherited() const { return Inherited; } + /// Returns true if the attribute has been implicitly created instead + /// of explicitly written by the user. + bool isImplicit() const { return Implicit; } + void setImplicit(bool I) { Implicit = I; } - /// Returns true if the attribute has been implicitly created instead - /// of explicitly written by the user. - bool isImplicit() const { return Implicit; } - void setImplicit(bool I) { Implicit = I; } + void setPackExpansion(bool PE) { IsPackExpansion = PE; } + bool isPackExpansion() const { return IsPackExpansion; } - void setPackExpansion(bool PE) { IsPackExpansion = PE; } - bool isPackExpansion() const { return IsPackExpansion; } + // Clone this attribute. + Attr *clone(ASTContext &C) const; - // Clone this attribute. - Attr *clone(ASTContext &C) const; + bool isLateParsed() const { return IsLateParsed; } - bool isLateParsed() const { return IsLateParsed; } - - // Pretty print this attribute. - void printPretty(raw_ostream &OS, const PrintingPolicy &Policy) const; -}; + // Pretty print this attribute. + void printPretty(raw_ostream &OS, const PrintingPolicy &Policy) const; + }; class TypeAttr : public Attr { protected: - TypeAttr(attr::Kind AK, SourceRange R, unsigned SpellingListIndex, - bool IsLateParsed) - : Attr(AK, R, SpellingListIndex, IsLateParsed) {} + TypeAttr(ASTContext &Context, const AttributeCommonInfo &CommonInfo, + attr::Kind AK, bool IsLateParsed) + : Attr(Context, CommonInfo, AK, IsLateParsed) {} public: static bool classof(const Attr *A) { @@ -128,9 +125,9 @@ public: class StmtAttr : public Attr { protected: - StmtAttr(attr::Kind AK, SourceRange R, unsigned SpellingListIndex, - bool IsLateParsed) - : Attr(AK, R, SpellingListIndex, IsLateParsed) {} + StmtAttr(ASTContext &Context, const AttributeCommonInfo &CommonInfo, + attr::Kind AK, bool IsLateParsed) + : Attr(Context, CommonInfo, AK, IsLateParsed) {} public: static bool classof(const Attr *A) { @@ -141,9 +138,10 @@ public: class InheritableAttr : public Attr { protected: - InheritableAttr(attr::Kind AK, SourceRange R, unsigned SpellingListIndex, - bool IsLateParsed, bool InheritEvenIfAlreadyPresent) - : Attr(AK, R, SpellingListIndex, IsLateParsed) { + InheritableAttr(ASTContext &Context, const AttributeCommonInfo &CommonInfo, + attr::Kind AK, bool IsLateParsed, + bool InheritEvenIfAlreadyPresent) + : Attr(Context, CommonInfo, AK, IsLateParsed) { this->InheritEvenIfAlreadyPresent = InheritEvenIfAlreadyPresent; } @@ -165,9 +163,10 @@ public: class InheritableParamAttr : public InheritableAttr { protected: - InheritableParamAttr(attr::Kind AK, SourceRange R, unsigned SpellingListIndex, + InheritableParamAttr(ASTContext &Context, + const AttributeCommonInfo &CommonInfo, attr::Kind AK, bool IsLateParsed, bool InheritEvenIfAlreadyPresent) - : InheritableAttr(AK, R, SpellingListIndex, IsLateParsed, + : InheritableAttr(Context, CommonInfo, AK, IsLateParsed, InheritEvenIfAlreadyPresent) {} public: @@ -182,11 +181,11 @@ public: /// for the parameter. class ParameterABIAttr : public InheritableParamAttr { protected: - ParameterABIAttr(attr::Kind AK, SourceRange R, - unsigned SpellingListIndex, bool IsLateParsed, + ParameterABIAttr(ASTContext &Context, const AttributeCommonInfo &CommonInfo, + attr::Kind AK, bool IsLateParsed, bool InheritEvenIfAlreadyPresent) - : InheritableParamAttr(AK, R, SpellingListIndex, IsLateParsed, - InheritEvenIfAlreadyPresent) {} + : InheritableParamAttr(Context, CommonInfo, AK, IsLateParsed, + InheritEvenIfAlreadyPresent) {} public: ParameterABI getABI() const { diff --git a/include/clang/AST/CXXRecordDeclDefinitionBits.def b/include/clang/AST/CXXRecordDeclDefinitionBits.def new file mode 100644 index 00000000000..bd4d8247aec --- /dev/null +++ b/include/clang/AST/CXXRecordDeclDefinitionBits.def @@ -0,0 +1,236 @@ +//===-- CXXRecordDeclDefinitionBits.def - Class definition bits -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file enumerates the various bitfields that we want to store on C++ class +// definitions. +// +//===----------------------------------------------------------------------===// +// +/// @file CXXRecordDeclDefinitionBits.def +/// +/// In this file, each of the bitfields representing data about a C++ class +/// results in an expansion of the FIELD macro, which should be defined before +/// including this file. +/// +/// The macro have three operands: +/// +/// Name: The name of the field, as a member of CXXRecordDecl::DefinitionData. +/// +/// BitWidth: The width of the field in bits. +/// +/// MergePolicy: How to behave when the value of the field is different in +/// multiple translation units, one of: +/// NO_MERGE: It is an ODR violation if the fields do not match. +/// MERGE_OR: Merge the fields by ORing them together. + +#ifndef FIELD +#error define FIELD before including this file +#endif + +/// True if this class has any user-declared constructors. +FIELD(UserDeclaredConstructor, 1, NO_MERGE) + +/// The user-declared special members which this class has. +FIELD(UserDeclaredSpecialMembers, 6, NO_MERGE) + +/// True when this class is an aggregate. +FIELD(Aggregate, 1, NO_MERGE) + +/// True when this class is a POD-type. +FIELD(PlainOldData, 1, NO_MERGE) + +/// True when this class is empty for traits purposes, that is: +/// * has no data members other than 0-width bit-fields and empty fields +/// marked [[no_unique_address]] +/// * has no virtual function/base, and +/// * doesn't inherit from a non-empty class. +/// Doesn't take union-ness into account. +FIELD(Empty, 1, NO_MERGE) + +/// True when this class is polymorphic, i.e., has at +/// least one virtual member or derives from a polymorphic class. +FIELD(Polymorphic, 1, NO_MERGE) + +/// True when this class is abstract, i.e., has at least +/// one pure virtual function, (that can come from a base class). +FIELD(Abstract, 1, NO_MERGE) + +/// True when this class is standard-layout, per the applicable +/// language rules (including DRs). +FIELD(IsStandardLayout, 1, NO_MERGE) + +/// True when this class was standard-layout under the C++11 +/// definition. +/// +/// C++11 [class]p7. A standard-layout class is a class that: +/// * has no non-static data members of type non-standard-layout class (or +/// array of such types) or reference, +/// * has no virtual functions (10.3) and no virtual base classes (10.1), +/// * has the same access control (Clause 11) for all non-static data +/// members +/// * has no non-standard-layout base classes, +/// * either has no non-static data members in the most derived class and at +/// most one base class with non-static data members, or has no base +/// classes with non-static data members, and +/// * has no base classes of the same type as the first non-static data +/// member. +FIELD(IsCXX11StandardLayout, 1, NO_MERGE) + +/// True when any base class has any declared non-static data +/// members or bit-fields. +/// This is a helper bit of state used to implement IsStandardLayout more +/// efficiently. +FIELD(HasBasesWithFields, 1, NO_MERGE) + +/// True when any base class has any declared non-static data +/// members. +/// This is a helper bit of state used to implement IsCXX11StandardLayout +/// more efficiently. +FIELD(HasBasesWithNonStaticDataMembers, 1, NO_MERGE) + +/// True when there are private non-static data members. +FIELD(HasPrivateFields, 1, NO_MERGE) + +/// True when there are protected non-static data members. +FIELD(HasProtectedFields, 1, NO_MERGE) + +/// True when there are private non-static data members. +FIELD(HasPublicFields, 1, NO_MERGE) + +/// True if this class (or any subobject) has mutable fields. +FIELD(HasMutableFields, 1, NO_MERGE) + +/// True if this class (or any nested anonymous struct or union) +/// has variant members. +FIELD(HasVariantMembers, 1, NO_MERGE) + +/// True if there no non-field members declared by the user. +FIELD(HasOnlyCMembers, 1, NO_MERGE) + +/// True if any field has an in-class initializer, including those +/// within anonymous unions or structs. +FIELD(HasInClassInitializer, 1, NO_MERGE) + +/// True if any field is of reference type, and does not have an +/// in-class initializer. +/// +/// In this case, value-initialization of this class is illegal in C++98 +/// even if the class has a trivial default constructor. +FIELD(HasUninitializedReferenceMember, 1, NO_MERGE) + +/// True if any non-mutable field whose type doesn't have a user- +/// provided default ctor also doesn't have an in-class initializer. +FIELD(HasUninitializedFields, 1, NO_MERGE) + +/// True if there are any member using-declarations that inherit +/// constructors from a base class. +FIELD(HasInheritedConstructor, 1, NO_MERGE) + +/// True if there are any member using-declarations named +/// 'operator='. +FIELD(HasInheritedAssignment, 1, NO_MERGE) + +/// These flags are \c true if a defaulted corresponding special +/// member can't be fully analyzed without performing overload resolution. +/// @{ +FIELD(NeedOverloadResolutionForCopyConstructor, 1, NO_MERGE) +FIELD(NeedOverloadResolutionForMoveConstructor, 1, NO_MERGE) +FIELD(NeedOverloadResolutionForMoveAssignment, 1, NO_MERGE) +FIELD(NeedOverloadResolutionForDestructor, 1, NO_MERGE) +/// @} + +/// These flags are \c true if an implicit defaulted corresponding +/// special member would be defined as deleted. +/// @{ +FIELD(DefaultedCopyConstructorIsDeleted, 1, NO_MERGE) +FIELD(DefaultedMoveConstructorIsDeleted, 1, NO_MERGE) +FIELD(DefaultedMoveAssignmentIsDeleted, 1, NO_MERGE) +FIELD(DefaultedDestructorIsDeleted, 1, NO_MERGE) +/// @} + +/// The trivial special members which this class has, per +/// C++11 [class.ctor]p5, C++11 [class.copy]p12, C++11 [class.copy]p25, +/// C++11 [class.dtor]p5, or would have if the member were not suppressed. +/// +/// This excludes any user-declared but not user-provided special members +/// which have been declared but not yet defined. +FIELD(HasTrivialSpecialMembers, 6, MERGE_OR) + +/// These bits keep track of the triviality of special functions for the +/// purpose of calls. Only the bits corresponding to SMF_CopyConstructor, +/// SMF_MoveConstructor, and SMF_Destructor are meaningful here. +FIELD(HasTrivialSpecialMembersForCall, 6, MERGE_OR) + +/// The declared special members of this class which are known to be +/// non-trivial. +/// +/// This excludes any user-declared but not user-provided special members +/// which have been declared but not yet defined, and any implicit special +/// members which have not yet been declared. +FIELD(DeclaredNonTrivialSpecialMembers, 6, MERGE_OR) + +/// These bits keep track of the declared special members that are +/// non-trivial for the purpose of calls. +/// Only the bits corresponding to SMF_CopyConstructor, +/// SMF_MoveConstructor, and SMF_Destructor are meaningful here. +FIELD(DeclaredNonTrivialSpecialMembersForCall, 6, MERGE_OR) + +/// True when this class has a destructor with no semantic effect. +FIELD(HasIrrelevantDestructor, 1, NO_MERGE) + +/// True when this class has at least one user-declared constexpr +/// constructor which is neither the copy nor move constructor. +FIELD(HasConstexprNonCopyMoveConstructor, 1, MERGE_OR) + +/// True if this class has a (possibly implicit) defaulted default +/// constructor. +FIELD(HasDefaultedDefaultConstructor, 1, MERGE_OR) + +/// True if a defaulted default constructor for this class would +/// be constexpr. +FIELD(DefaultedDefaultConstructorIsConstexpr, 1, NO_MERGE) + +/// True if this class has a constexpr default constructor. +/// +/// This is true for either a user-declared constexpr default constructor +/// or an implicitly declared constexpr default constructor. +FIELD(HasConstexprDefaultConstructor, 1, MERGE_OR) + +/// True if a defaulted destructor for this class would be constexpr. +FIELD(DefaultedDestructorIsConstexpr, 1, NO_MERGE) + +/// True when this class contains at least one non-static data +/// member or base class of non-literal or volatile type. +FIELD(HasNonLiteralTypeFieldsOrBases, 1, NO_MERGE) + +/// Whether we have a C++11 user-provided default constructor (not +/// explicitly deleted or defaulted). +FIELD(UserProvidedDefaultConstructor, 1, NO_MERGE) + +/// The special members which have been declared for this class, +/// either by the user or implicitly. +FIELD(DeclaredSpecialMembers, 6, MERGE_OR) + +/// Whether an implicit copy constructor could have a const-qualified +/// parameter, for initializing virtual bases and for other subobjects. +FIELD(ImplicitCopyConstructorCanHaveConstParamForVBase, 1, NO_MERGE) +FIELD(ImplicitCopyConstructorCanHaveConstParamForNonVBase, 1, NO_MERGE) + +/// Whether an implicit copy assignment operator would have a +/// const-qualified parameter. +FIELD(ImplicitCopyAssignmentHasConstParam, 1, NO_MERGE) + +/// Whether any declared copy constructor has a const-qualified +/// parameter. +FIELD(HasDeclaredCopyConstructorWithConstParam, 1, MERGE_OR) + +/// Whether any declared copy assignment operator has either a +/// const-qualified reference parameter or a non-reference parameter. +FIELD(HasDeclaredCopyAssignmentWithConstParam, 1, MERGE_OR) + +#undef FIELD diff --git a/include/clang/AST/CharUnits.h b/include/clang/AST/CharUnits.h index 37f489c7708..f14d3abf71e 100644 --- a/include/clang/AST/CharUnits.h +++ b/include/clang/AST/CharUnits.h @@ -14,6 +14,7 @@ #define LLVM_CLANG_AST_CHARUNITS_H #include "llvm/ADT/DenseMapInfo.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/MathExtras.h" @@ -177,6 +178,10 @@ namespace clang { /// getQuantity - Get the raw integer representation of this quantity. QuantityType getQuantity() const { return Quantity; } + /// getAsAlign - Returns Quantity as a valid llvm::Align, + /// Beware llvm::Align assumes power of two 8-bit bytes. + llvm::Align getAsAlign() const { return llvm::Align(Quantity); } + /// alignTo - Returns the next integer (mod 2**64) that is /// greater than or equal to this quantity and is a multiple of \p Align. /// Align must be non-zero. diff --git a/include/clang/AST/CommentCommands.td b/include/clang/AST/CommentCommands.td index 958ee032e71..3b0d1603d40 100644 --- a/include/clang/AST/CommentCommands.td +++ b/include/clang/AST/CommentCommands.td @@ -139,6 +139,7 @@ def Post : BlockCommand<"post">; def Pre : BlockCommand<"pre">; def Remark : BlockCommand<"remark">; def Remarks : BlockCommand<"remarks">; +def Retval : BlockCommand<"retval">; def Sa : BlockCommand<"sa">; def See : BlockCommand<"see">; def Since : BlockCommand<"since">; diff --git a/include/clang/AST/CommentLexer.h b/include/clang/AST/CommentLexer.h index 9ddbb7d31d9..138fdaca0ff 100644 --- a/include/clang/AST/CommentLexer.h +++ b/include/clang/AST/CommentLexer.h @@ -352,8 +352,7 @@ public: void lex(Token &T); - StringRef getSpelling(const Token &Tok, const SourceManager &SourceMgr, - bool *Invalid = nullptr) const; + StringRef getSpelling(const Token &Tok, const SourceManager &SourceMgr) const; }; } // end namespace comments diff --git a/include/clang/AST/Decl.h b/include/clang/AST/Decl.h index 02742801f37..ce674e09c44 100644 --- a/include/clang/AST/Decl.h +++ b/include/clang/AST/Decl.h @@ -310,6 +310,14 @@ public: void printQualifiedName(raw_ostream &OS) const; void printQualifiedName(raw_ostream &OS, const PrintingPolicy &Policy) const; + /// Print only the nested name specifier part of a fully-qualified name, + /// including the '::' at the end. E.g. + /// when `printQualifiedName(D)` prints "A::B::i", + /// this function prints "A::B::". + void printNestedNameSpecifier(raw_ostream &OS) const; + void printNestedNameSpecifier(raw_ostream &OS, + const PrintingPolicy &Policy) const; + // FIXME: Remove string version. std::string getQualifiedNameAsString() const; @@ -800,12 +808,19 @@ struct EvaluatedStmt { /// valid if CheckedICE is true. bool IsICE : 1; + /// Whether this variable is known to have constant destruction. That is, + /// whether running the destructor on the initial value is a side-effect + /// (and doesn't inspect any state that might have changed during program + /// execution). This is currently only computed if the destructor is + /// non-trivial. + bool HasConstantDestruction : 1; + Stmt *Value; APValue Evaluated; - EvaluatedStmt() : WasEvaluated(false), IsEvaluating(false), CheckedICE(false), - CheckingICE(false), IsICE(false) {} - + EvaluatedStmt() + : WasEvaluated(false), IsEvaluating(false), CheckedICE(false), + CheckingICE(false), IsICE(false), HasConstantDestruction(false) {} }; /// Represents a variable declaration or definition. @@ -1226,6 +1241,14 @@ public: void setInit(Expr *I); + /// Get the initializing declaration of this variable, if any. This is + /// usually the definition, except that for a static data member it can be + /// the in-class declaration. + VarDecl *getInitializingDeclaration(); + const VarDecl *getInitializingDeclaration() const { + return const_cast(this)->getInitializingDeclaration(); + } + /// Determine whether this variable's value might be usable in a /// constant expression, according to the relevant language standard. /// This only checks properties of the declaration, and does not check @@ -1251,6 +1274,14 @@ public: /// to untyped APValue if the value could not be evaluated. APValue *getEvaluatedValue() const; + /// Evaluate the destruction of this variable to determine if it constitutes + /// constant destruction. + /// + /// \pre isInitICE() + /// \return \c true if this variable has constant destruction, \c false if + /// not. + bool evaluateDestruction(SmallVectorImpl &Notes) const; + /// Determines whether it is already known whether the /// initializer is an integral constant expression or not. bool isInitKnownICE() const; @@ -1489,9 +1520,14 @@ public: // has no definition within this source file. bool isKnownToBeDefined() const; - /// Do we need to emit an exit-time destructor for this variable? + /// Is destruction of this variable entirely suppressed? If so, the variable + /// need not have a usable destructor at all. bool isNoDestroy(const ASTContext &) const; + /// Do we need to emit an exit-time destructor for this variable, and if so, + /// what kind? + QualType::DestructionKind needsDestruction(const ASTContext &Ctx) const; + // Implement isa/cast/dyncast/etc. static bool classof(const Decl *D) { return classofKind(D->getKind()); } static bool classofKind(Kind K) { return K >= firstVar && K <= lastVar; } @@ -4078,13 +4114,9 @@ public: void setCaptures(ASTContext &Context, ArrayRef Captures, bool CapturesCXXThis); - unsigned getBlockManglingNumber() const { - return ManglingNumber; - } + unsigned getBlockManglingNumber() const { return ManglingNumber; } - Decl *getBlockManglingContextDecl() const { - return ManglingContextDecl; - } + Decl *getBlockManglingContextDecl() const { return ManglingContextDecl; } void setBlockMangling(unsigned Number, Decl *Ctx) { ManglingNumber = Number; diff --git a/include/clang/AST/DeclBase.h b/include/clang/AST/DeclBase.h index d64d0cb425d..01c2f180977 100644 --- a/include/clang/AST/DeclBase.h +++ b/include/clang/AST/DeclBase.h @@ -959,7 +959,7 @@ public: /// as this declaration, or NULL if there is no previous declaration. Decl *getPreviousDecl() { return getPreviousDeclImpl(); } - /// Retrieve the most recent declaration that declares the same entity + /// Retrieve the previous declaration that declares the same entity /// as this declaration, or NULL if there is no previous declaration. const Decl *getPreviousDecl() const { return const_cast(this)->getPreviousDeclImpl(); diff --git a/include/clang/AST/DeclCXX.h b/include/clang/AST/DeclCXX.h index 7add83f8962..66212f72b78 100644 --- a/include/clang/AST/DeclCXX.h +++ b/include/clang/AST/DeclCXX.h @@ -42,6 +42,7 @@ #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/PointerLikeTypeTraits.h" @@ -73,52 +74,6 @@ class TemplateDecl; class TemplateParameterList; class UsingDecl; -/// Represents any kind of function declaration, whether it is a -/// concrete function or a function template. -class AnyFunctionDecl { - NamedDecl *Function; - - AnyFunctionDecl(NamedDecl *ND) : Function(ND) {} - -public: - AnyFunctionDecl(FunctionDecl *FD) : Function(FD) {} - AnyFunctionDecl(FunctionTemplateDecl *FTD); - - /// Implicily converts any function or function template into a - /// named declaration. - operator NamedDecl *() const { return Function; } - - /// Retrieve the underlying function or function template. - NamedDecl *get() const { return Function; } - - static AnyFunctionDecl getFromNamedDecl(NamedDecl *ND) { - return AnyFunctionDecl(ND); - } -}; - -} // namespace clang - -namespace llvm { - - // Provide PointerLikeTypeTraits for non-cvr pointers. - template<> - struct PointerLikeTypeTraits< ::clang::AnyFunctionDecl> { - static void *getAsVoidPointer(::clang::AnyFunctionDecl F) { - return F.get(); - } - - static ::clang::AnyFunctionDecl getFromVoidPointer(void *P) { - return ::clang::AnyFunctionDecl::getFromNamedDecl( - static_cast< ::clang::NamedDecl*>(P)); - } - - enum { NumLowBitsAvailable = 2 }; - }; - -} // namespace llvm - -namespace clang { - /// Represents an access specifier followed by colon ':'. /// /// An objects of this class represents sugar for the syntactic occurrence @@ -322,207 +277,9 @@ class CXXRecordDecl : public RecordDecl { }; struct DefinitionData { - /// True if this class has any user-declared constructors. - unsigned UserDeclaredConstructor : 1; - - /// The user-declared special members which this class has. - unsigned UserDeclaredSpecialMembers : 6; - - /// True when this class is an aggregate. - unsigned Aggregate : 1; - - /// True when this class is a POD-type. - unsigned PlainOldData : 1; - - /// True when this class is empty for traits purposes, that is: - /// * has no data members other than 0-width bit-fields and empty fields - /// marked [[no_unique_address]] - /// * has no virtual function/base, and - /// * doesn't inherit from a non-empty class. - /// Doesn't take union-ness into account. - unsigned Empty : 1; - - /// True when this class is polymorphic, i.e., has at - /// least one virtual member or derives from a polymorphic class. - unsigned Polymorphic : 1; - - /// True when this class is abstract, i.e., has at least - /// one pure virtual function, (that can come from a base class). - unsigned Abstract : 1; - - /// True when this class is standard-layout, per the applicable - /// language rules (including DRs). - unsigned IsStandardLayout : 1; - - /// True when this class was standard-layout under the C++11 - /// definition. - /// - /// C++11 [class]p7. A standard-layout class is a class that: - /// * has no non-static data members of type non-standard-layout class (or - /// array of such types) or reference, - /// * has no virtual functions (10.3) and no virtual base classes (10.1), - /// * has the same access control (Clause 11) for all non-static data - /// members - /// * has no non-standard-layout base classes, - /// * either has no non-static data members in the most derived class and at - /// most one base class with non-static data members, or has no base - /// classes with non-static data members, and - /// * has no base classes of the same type as the first non-static data - /// member. - unsigned IsCXX11StandardLayout : 1; - - /// True when any base class has any declared non-static data - /// members or bit-fields. - /// This is a helper bit of state used to implement IsStandardLayout more - /// efficiently. - unsigned HasBasesWithFields : 1; - - /// True when any base class has any declared non-static data - /// members. - /// This is a helper bit of state used to implement IsCXX11StandardLayout - /// more efficiently. - unsigned HasBasesWithNonStaticDataMembers : 1; - - /// True when there are private non-static data members. - unsigned HasPrivateFields : 1; - - /// True when there are protected non-static data members. - unsigned HasProtectedFields : 1; - - /// True when there are private non-static data members. - unsigned HasPublicFields : 1; - - /// True if this class (or any subobject) has mutable fields. - unsigned HasMutableFields : 1; - - /// True if this class (or any nested anonymous struct or union) - /// has variant members. - unsigned HasVariantMembers : 1; - - /// True if there no non-field members declared by the user. - unsigned HasOnlyCMembers : 1; - - /// True if any field has an in-class initializer, including those - /// within anonymous unions or structs. - unsigned HasInClassInitializer : 1; - - /// True if any field is of reference type, and does not have an - /// in-class initializer. - /// - /// In this case, value-initialization of this class is illegal in C++98 - /// even if the class has a trivial default constructor. - unsigned HasUninitializedReferenceMember : 1; - - /// True if any non-mutable field whose type doesn't have a user- - /// provided default ctor also doesn't have an in-class initializer. - unsigned HasUninitializedFields : 1; - - /// True if there are any member using-declarations that inherit - /// constructors from a base class. - unsigned HasInheritedConstructor : 1; - - /// True if there are any member using-declarations named - /// 'operator='. - unsigned HasInheritedAssignment : 1; - - /// These flags are \c true if a defaulted corresponding special - /// member can't be fully analyzed without performing overload resolution. - /// @{ - unsigned NeedOverloadResolutionForCopyConstructor : 1; - unsigned NeedOverloadResolutionForMoveConstructor : 1; - unsigned NeedOverloadResolutionForMoveAssignment : 1; - unsigned NeedOverloadResolutionForDestructor : 1; - /// @} - - /// These flags are \c true if an implicit defaulted corresponding - /// special member would be defined as deleted. - /// @{ - unsigned DefaultedCopyConstructorIsDeleted : 1; - unsigned DefaultedMoveConstructorIsDeleted : 1; - unsigned DefaultedMoveAssignmentIsDeleted : 1; - unsigned DefaultedDestructorIsDeleted : 1; - /// @} - - /// The trivial special members which this class has, per - /// C++11 [class.ctor]p5, C++11 [class.copy]p12, C++11 [class.copy]p25, - /// C++11 [class.dtor]p5, or would have if the member were not suppressed. - /// - /// This excludes any user-declared but not user-provided special members - /// which have been declared but not yet defined. - unsigned HasTrivialSpecialMembers : 6; - - /// These bits keep track of the triviality of special functions for the - /// purpose of calls. Only the bits corresponding to SMF_CopyConstructor, - /// SMF_MoveConstructor, and SMF_Destructor are meaningful here. - unsigned HasTrivialSpecialMembersForCall : 6; - - /// The declared special members of this class which are known to be - /// non-trivial. - /// - /// This excludes any user-declared but not user-provided special members - /// which have been declared but not yet defined, and any implicit special - /// members which have not yet been declared. - unsigned DeclaredNonTrivialSpecialMembers : 6; - - /// These bits keep track of the declared special members that are - /// non-trivial for the purpose of calls. - /// Only the bits corresponding to SMF_CopyConstructor, - /// SMF_MoveConstructor, and SMF_Destructor are meaningful here. - unsigned DeclaredNonTrivialSpecialMembersForCall : 6; - - /// True when this class has a destructor with no semantic effect. - unsigned HasIrrelevantDestructor : 1; - - /// True when this class has at least one user-declared constexpr - /// constructor which is neither the copy nor move constructor. - unsigned HasConstexprNonCopyMoveConstructor : 1; - - /// True if this class has a (possibly implicit) defaulted default - /// constructor. - unsigned HasDefaultedDefaultConstructor : 1; - - /// True if a defaulted default constructor for this class would - /// be constexpr. - unsigned DefaultedDefaultConstructorIsConstexpr : 1; - - /// True if this class has a constexpr default constructor. - /// - /// This is true for either a user-declared constexpr default constructor - /// or an implicitly declared constexpr default constructor. - unsigned HasConstexprDefaultConstructor : 1; - - /// True when this class contains at least one non-static data - /// member or base class of non-literal or volatile type. - unsigned HasNonLiteralTypeFieldsOrBases : 1; - - /// True when visible conversion functions are already computed - /// and are available. - unsigned ComputedVisibleConversions : 1; - - /// Whether we have a C++11 user-provided default constructor (not - /// explicitly deleted or defaulted). - unsigned UserProvidedDefaultConstructor : 1; - - /// The special members which have been declared for this class, - /// either by the user or implicitly. - unsigned DeclaredSpecialMembers : 6; - - /// Whether an implicit copy constructor could have a const-qualified - /// parameter, for initializing virtual bases and for other subobjects. - unsigned ImplicitCopyConstructorCanHaveConstParamForVBase : 1; - unsigned ImplicitCopyConstructorCanHaveConstParamForNonVBase : 1; - - /// Whether an implicit copy assignment operator would have a - /// const-qualified parameter. - unsigned ImplicitCopyAssignmentHasConstParam : 1; - - /// Whether any declared copy constructor has a const-qualified - /// parameter. - unsigned HasDeclaredCopyConstructorWithConstParam : 1; - - /// Whether any declared copy assignment operator has either a - /// const-qualified reference parameter or a non-reference parameter. - unsigned HasDeclaredCopyAssignmentWithConstParam : 1; + #define FIELD(Name, Width, Merge) \ + unsigned Name : Width; + #include "CXXRecordDeclDefinitionBits.def" /// Whether this class describes a C++ lambda. unsigned IsLambda : 1; @@ -530,6 +287,10 @@ class CXXRecordDecl : public RecordDecl { /// Whether we are currently parsing base specifiers. unsigned IsParsingBaseSpecifiers : 1; + /// True when visible conversion functions are already computed + /// and are available. + unsigned ComputedVisibleConversions : 1; + unsigned HasODRHash : 1; /// A hash of parts of the class to help in ODR checking. @@ -628,9 +389,12 @@ class CXXRecordDecl : public RecordDecl { /// The number of explicit captures in this lambda. unsigned NumExplicitCaptures : 13; + /// Has known `internal` linkage. + unsigned HasKnownInternalLinkage : 1; + /// The number used to indicate this lambda expression for name /// mangling in the Itanium C++ ABI. - unsigned ManglingNumber = 0; + unsigned ManglingNumber : 31; /// The declaration that provides context for this lambda, if the /// actual DeclContext does not suffice. This is used for lambdas that @@ -645,12 +409,12 @@ class CXXRecordDecl : public RecordDecl { /// The type of the call method. TypeSourceInfo *MethodTyInfo; - LambdaDefinitionData(CXXRecordDecl *D, TypeSourceInfo *Info, - bool Dependent, bool IsGeneric, - LambdaCaptureDefault CaptureDefault) - : DefinitionData(D), Dependent(Dependent), IsGenericLambda(IsGeneric), - CaptureDefault(CaptureDefault), NumCaptures(0), NumExplicitCaptures(0), - MethodTyInfo(Info) { + LambdaDefinitionData(CXXRecordDecl *D, TypeSourceInfo *Info, bool Dependent, + bool IsGeneric, LambdaCaptureDefault CaptureDefault) + : DefinitionData(D), Dependent(Dependent), IsGenericLambda(IsGeneric), + CaptureDefault(CaptureDefault), NumCaptures(0), + NumExplicitCaptures(0), HasKnownInternalLinkage(0), ManglingNumber(0), + MethodTyInfo(Info) { IsLambda = true; // C++1z [expr.prim.lambda]p4: @@ -1214,6 +978,10 @@ public: /// if this is a closure type. CXXMethodDecl *getLambdaCallOperator() const; + /// Retrieve the dependent lambda call operator of the closure type + /// if this is a templated closure type. + FunctionTemplateDecl *getDependentLambdaCallOperator() const; + /// Retrieve the lambda static invoker, the address of which /// is returned by the conversion operator, and the body of which /// is forwarded to the lambda call operator. @@ -1398,7 +1166,8 @@ public: /// would be constexpr. bool defaultedDefaultConstructorIsConstexpr() const { return data().DefaultedDefaultConstructorIsConstexpr && - (!isUnion() || hasInClassInitializer() || !hasVariantMembers()); + (!isUnion() || hasInClassInitializer() || !hasVariantMembers() || + getASTContext().getLangOpts().CPlusPlus2a); } /// Determine whether this class has a constexpr default constructor. @@ -1486,6 +1255,16 @@ public: !(data().HasTrivialSpecialMembers & SMF_MoveAssignment)); } + /// Determine whether a defaulted default constructor for this class + /// would be constexpr. + bool defaultedDestructorIsConstexpr() const { + return data().DefaultedDestructorIsConstexpr && + getASTContext().getLangOpts().CPlusPlus2a; + } + + /// Determine whether this class has a constexpr destructor. + bool hasConstexprDestructor() const; + /// Determine whether this class has a trivial destructor /// (C++ [class.dtor]p3) bool hasTrivialDestructor() const { @@ -1577,8 +1356,10 @@ public: /// /// Only in C++17 and beyond, are lambdas literal types. bool isLiteral() const { - return hasTrivialDestructor() && - (!isLambda() || getASTContext().getLangOpts().CPlusPlus17) && + ASTContext &Ctx = getASTContext(); + return (Ctx.getLangOpts().CPlusPlus2a ? hasConstexprDestructor() + : hasTrivialDestructor()) && + (!isLambda() || Ctx.getLangOpts().CPlusPlus17) && !hasNonLiteralTypeFieldsOrBases() && (isAggregate() || isLambda() || hasConstexprNonCopyMoveConstructor() || @@ -1927,6 +1708,13 @@ public: return getLambdaData().ManglingNumber; } + /// The lambda is known to has internal linkage no matter whether it has name + /// mangling number. + bool hasKnownLambdaInternalLinkage() const { + assert(isLambda() && "Not a lambda closure type!"); + return getLambdaData().HasKnownInternalLinkage; + } + /// Retrieve the declaration that provides additional context for a /// lambda, when the normal declaration context is not specific enough. /// @@ -1940,9 +1728,12 @@ public: /// Set the mangling number and context declaration for a lambda /// class. - void setLambdaMangling(unsigned ManglingNumber, Decl *ContextDecl) { + void setLambdaMangling(unsigned ManglingNumber, Decl *ContextDecl, + bool HasKnownInternalLinkage = false) { + assert(isLambda() && "Not a lambda closure type!"); getLambdaData().ManglingNumber = ManglingNumber; getLambdaData().ContextDecl = ContextDecl; + getLambdaData().HasKnownInternalLinkage = HasKnownInternalLinkage; } /// Returns the inheritance model used for this record. @@ -2265,7 +2056,7 @@ public: const CXXRecordDecl *Decl); Qualifiers getMethodQualifiers() const { - return getType()->getAs()->getMethodQuals(); + return getType()->castAs()->getMethodQuals(); } /// Retrieve the ref-qualifier associated with this method. @@ -2280,7 +2071,7 @@ public: /// }; /// @endcode RefQualifierKind getRefQualifier() const { - return getType()->getAs()->getRefQualifier(); + return getType()->castAs()->getRefQualifier(); } bool hasInlineBody() const; @@ -2600,9 +2391,9 @@ class CXXConstructorDecl final ExplicitSpecifier getExplicitSpecifierInternal() const { if (CXXConstructorDeclBits.HasTrailingExplicitSpecifier) - return *getCanonicalDecl()->getTrailingObjects(); + return *getTrailingObjects(); return ExplicitSpecifier( - nullptr, getCanonicalDecl()->CXXConstructorDeclBits.IsSimpleExplicit + nullptr, CXXConstructorDeclBits.IsSimpleExplicit ? ExplicitSpecKind::ResolvedTrue : ExplicitSpecKind::ResolvedFalse); } @@ -2643,10 +2434,10 @@ public: InheritedConstructor Inherited = InheritedConstructor()); ExplicitSpecifier getExplicitSpecifier() { - return getExplicitSpecifierInternal(); + return getCanonicalDecl()->getExplicitSpecifierInternal(); } const ExplicitSpecifier getExplicitSpecifier() const { - return getExplicitSpecifierInternal(); + return getCanonicalDecl()->getExplicitSpecifierInternal(); } /// Return true if the declartion is already resolved to be explicit. @@ -2847,9 +2638,9 @@ class CXXDestructorDecl : public CXXMethodDecl { CXXDestructorDecl(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc, const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo, bool isInline, - bool isImplicitlyDeclared) + bool isImplicitlyDeclared, ConstexprSpecKind ConstexprKind) : CXXMethodDecl(CXXDestructor, C, RD, StartLoc, NameInfo, T, TInfo, - SC_None, isInline, CSK_unspecified, SourceLocation()) { + SC_None, isInline, ConstexprKind, SourceLocation()) { setImplicit(isImplicitlyDeclared); } @@ -2859,9 +2650,9 @@ public: static CXXDestructorDecl *Create(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc, const DeclarationNameInfo &NameInfo, - QualType T, TypeSourceInfo* TInfo, - bool isInline, - bool isImplicitlyDeclared); + QualType T, TypeSourceInfo *TInfo, + bool isInline, bool isImplicitlyDeclared, + ConstexprSpecKind ConstexprKind); static CXXDestructorDecl *CreateDeserialized(ASTContext & C, unsigned ID); void setOperatorDelete(FunctionDecl *OD, Expr *ThisArg); @@ -2934,7 +2725,7 @@ public: /// Returns the type that this conversion function is converting to. QualType getConversionType() const { - return getType()->getAs()->getReturnType(); + return getType()->castAs()->getReturnType(); } /// Determine whether this conversion function is a conversion from @@ -2971,8 +2762,10 @@ public: /// ensure a stable ABI for this, we choose the DW_LANG_ encodings /// from the dwarf standard. enum LanguageIDs { - lang_c = /* DW_LANG_C */ 0x0002, - lang_cxx = /* DW_LANG_C_plus_plus */ 0x0004 + lang_c = llvm::dwarf::DW_LANG_C, + lang_cxx = llvm::dwarf::DW_LANG_C_plus_plus, + lang_cxx_11 = llvm::dwarf::DW_LANG_C_plus_plus_11, + lang_cxx_14 = llvm::dwarf::DW_LANG_C_plus_plus_14 }; private: @@ -3469,12 +3262,6 @@ public: return IsVirtual; } - /// Get the constructor or constructor template in the derived class - /// correspnding to this using shadow declaration, if it has been implicitly - /// declared already. - CXXConstructorDecl *getConstructor() const; - void setConstructor(NamedDecl *Ctor); - static bool classof(const Decl *D) { return classofKind(D->getKind()); } static bool classofKind(Kind K) { return K == ConstructorUsingShadow; } }; diff --git a/include/clang/AST/DeclTemplate.h b/include/clang/AST/DeclTemplate.h index 235b31c1c31..ec14adc7de9 100644 --- a/include/clang/AST/DeclTemplate.h +++ b/include/clang/AST/DeclTemplate.h @@ -168,6 +168,16 @@ public: return HasRequiresClause ? *getTrailingObjects() : nullptr; } + /// \brief All associated constraints derived from this template parameter + /// list, including the requires clause and any constraints derived from + /// constrained-parameters. + /// + /// The constraints in the resulting list are to be treated as if in a + /// conjunction ("and"). + void getAssociatedConstraints(llvm::SmallVectorImpl &AC) const; + + bool hasAssociatedConstraints() const; + SourceLocation getTemplateLoc() const { return TemplateLoc; } SourceLocation getLAngleLoc() const { return LAngleLoc; } SourceLocation getRAngleLoc() const { return RAngleLoc; } @@ -369,33 +379,7 @@ public: // Kinds of Templates //===----------------------------------------------------------------------===// -/// Stores the template parameter list and associated constraints for -/// \c TemplateDecl objects that track associated constraints. -class ConstrainedTemplateDeclInfo { - friend TemplateDecl; - -public: - ConstrainedTemplateDeclInfo() = default; - - TemplateParameterList *getTemplateParameters() const { - return TemplateParams; - } - - Expr *getAssociatedConstraints() const { return AssociatedConstraints; } - -protected: - void setTemplateParameters(TemplateParameterList *TParams) { - TemplateParams = TParams; - } - - void setAssociatedConstraints(Expr *AC) { AssociatedConstraints = AC; } - - TemplateParameterList *TemplateParams = nullptr; - Expr *AssociatedConstraints = nullptr; -}; - - -/// The base class of all kinds of template declarations (e.g., +/// \brief The base class of all kinds of template declarations (e.g., /// class, function, etc.). /// /// The TemplateDecl class stores the list of template parameters and a @@ -404,54 +388,32 @@ class TemplateDecl : public NamedDecl { void anchor() override; protected: + // Construct a template decl with name, parameters, and templated element. + TemplateDecl(Kind DK, DeclContext *DC, SourceLocation L, DeclarationName Name, + TemplateParameterList *Params, NamedDecl *Decl); + // Construct a template decl with the given name and parameters. // Used when there is no templated element (e.g., for tt-params). - TemplateDecl(ConstrainedTemplateDeclInfo *CTDI, Kind DK, DeclContext *DC, - SourceLocation L, DeclarationName Name, - TemplateParameterList *Params) - : NamedDecl(DK, DC, L, Name), TemplatedDecl(nullptr), - TemplateParams(CTDI) { - this->setTemplateParameters(Params); - } - TemplateDecl(Kind DK, DeclContext *DC, SourceLocation L, DeclarationName Name, TemplateParameterList *Params) - : TemplateDecl(nullptr, DK, DC, L, Name, Params) {} - - // Construct a template decl with name, parameters, and templated element. - TemplateDecl(ConstrainedTemplateDeclInfo *CTDI, Kind DK, DeclContext *DC, - SourceLocation L, DeclarationName Name, - TemplateParameterList *Params, NamedDecl *Decl) - : NamedDecl(DK, DC, L, Name), TemplatedDecl(Decl), - TemplateParams(CTDI) { - this->setTemplateParameters(Params); - } - - TemplateDecl(Kind DK, DeclContext *DC, SourceLocation L, DeclarationName Name, - TemplateParameterList *Params, NamedDecl *Decl) - : TemplateDecl(nullptr, DK, DC, L, Name, Params, Decl) {} + : TemplateDecl(DK, DC, L, Name, Params, nullptr) {} public: + friend class ASTDeclReader; + friend class ASTDeclWriter; + /// Get the list of template parameters TemplateParameterList *getTemplateParameters() const { - const auto *const CTDI = - TemplateParams.dyn_cast(); - return CTDI ? CTDI->getTemplateParameters() - : TemplateParams.get(); + return TemplateParams; } - /// Get the constraint-expression from the associated requires-clause (if any) - const Expr *getRequiresClause() const { - const TemplateParameterList *const TP = getTemplateParameters(); - return TP ? TP->getRequiresClause() : nullptr; - } + /// \brief Get the total constraint-expression associated with this template, + /// including constraint-expressions derived from the requires-clause, + /// trailing requires-clause (for functions and methods) and constrained + /// template parameters. + void getAssociatedConstraints(llvm::SmallVectorImpl &AC) const; - Expr *getAssociatedConstraints() const { - const auto *const C = cast(getCanonicalDecl()); - const auto *const CTDI = - C->TemplateParams.dyn_cast(); - return CTDI ? CTDI->getAssociatedConstraints() : nullptr; - } + bool hasAssociatedConstraints() const; /// Get the underlying, templated declaration. NamedDecl *getTemplatedDecl() const { return TemplatedDecl; } @@ -470,29 +432,10 @@ public: protected: NamedDecl *TemplatedDecl; - - /// The template parameter list and optional requires-clause - /// associated with this declaration; alternatively, a - /// \c ConstrainedTemplateDeclInfo if the associated constraints of the - /// template are being tracked by this particular declaration. - llvm::PointerUnion - TemplateParams; + TemplateParameterList *TemplateParams; void setTemplateParameters(TemplateParameterList *TParams) { - if (auto *const CTDI = - TemplateParams.dyn_cast()) { - CTDI->setTemplateParameters(TParams); - } else { - TemplateParams = TParams; - } - } - - void setAssociatedConstraints(Expr *AC) { - assert(isCanonicalDecl() && - "Attaching associated constraints to non-canonical Decl"); - TemplateParams.get() - ->setAssociatedConstraints(AC); + TemplateParams = TParams; } public: @@ -889,17 +832,10 @@ protected: virtual CommonBase *newCommon(ASTContext &C) const = 0; // Construct a template decl with name, parameters, and templated element. - RedeclarableTemplateDecl(ConstrainedTemplateDeclInfo *CTDI, Kind DK, - ASTContext &C, DeclContext *DC, SourceLocation L, - DeclarationName Name, TemplateParameterList *Params, - NamedDecl *Decl) - : TemplateDecl(CTDI, DK, DC, L, Name, Params, Decl), redeclarable_base(C) - {} - RedeclarableTemplateDecl(Kind DK, ASTContext &C, DeclContext *DC, SourceLocation L, DeclarationName Name, TemplateParameterList *Params, NamedDecl *Decl) - : RedeclarableTemplateDecl(nullptr, DK, C, DC, L, Name, Params, Decl) {} + : TemplateDecl(DK, DC, L, Name, Params, Decl), redeclarable_base(C) {} public: friend class ASTDeclReader; @@ -2026,6 +1962,20 @@ public: return TemplateParams; } + /// \brief All associated constraints of this partial specialization, + /// including the requires clause and any constraints derived from + /// constrained-parameters. + /// + /// The constraints in the resulting list are to be treated as if in a + /// conjunction ("and"). + void getAssociatedConstraints(llvm::SmallVectorImpl &AC) const { + TemplateParams->getAssociatedConstraints(AC); + } + + bool hasAssociatedConstraints() const { + return TemplateParams->hasAssociatedConstraints(); + } + /// Get the template arguments as written. const ASTTemplateArgumentListInfo *getTemplateArgsAsWritten() const { return ArgsAsWritten; @@ -2145,16 +2095,10 @@ protected: llvm::FoldingSetVector & getPartialSpecializations(); - ClassTemplateDecl(ConstrainedTemplateDeclInfo *CTDI, ASTContext &C, - DeclContext *DC, SourceLocation L, DeclarationName Name, - TemplateParameterList *Params, NamedDecl *Decl) - : RedeclarableTemplateDecl(CTDI, ClassTemplate, C, DC, L, Name, Params, - Decl) {} - ClassTemplateDecl(ASTContext &C, DeclContext *DC, SourceLocation L, DeclarationName Name, TemplateParameterList *Params, NamedDecl *Decl) - : ClassTemplateDecl(nullptr, C, DC, L, Name, Params, Decl) {} + : RedeclarableTemplateDecl(ClassTemplate, C, DC, L, Name, Params, Decl) {} CommonBase *newCommon(ASTContext &C) const override; @@ -2180,14 +2124,12 @@ public: return getTemplatedDecl()->isThisDeclarationADefinition(); } - // FIXME: remove default argument for AssociatedConstraints - /// Create a class template node. + /// \brief Create a class template node. static ClassTemplateDecl *Create(ASTContext &C, DeclContext *DC, SourceLocation L, DeclarationName Name, TemplateParameterList *Params, - NamedDecl *Decl, - Expr *AssociatedConstraints = nullptr); + NamedDecl *Decl); /// Create an empty class template node. static ClassTemplateDecl *CreateDeserialized(ASTContext &C, unsigned ID); @@ -2527,10 +2469,6 @@ public: } }; -/// Implementation of inline functions that require the template declarations -inline AnyFunctionDecl::AnyFunctionDecl(FunctionTemplateDecl *FTD) - : Function(FTD) {} - /// Represents a variable template specialization, which refers to /// a variable template with a given set of template arguments. /// @@ -2866,7 +2804,21 @@ public: return ArgsAsWritten; } - /// Retrieve the member variable template partial specialization from + /// \brief All associated constraints of this partial specialization, + /// including the requires clause and any constraints derived from + /// constrained-parameters. + /// + /// The constraints in the resulting list are to be treated as if in a + /// conjunction ("and"). + void getAssociatedConstraints(llvm::SmallVectorImpl &AC) const { + TemplateParams->getAssociatedConstraints(AC); + } + + bool hasAssociatedConstraints() const { + return TemplateParams->hasAssociatedConstraints(); + } + + /// \brief Retrieve the member variable template partial specialization from /// which this particular variable template partial specialization was /// instantiated. /// @@ -3095,11 +3047,9 @@ class ConceptDecl : public TemplateDecl, public Mergeable { protected: Expr *ConstraintExpr; - ConceptDecl(DeclContext *DC, - SourceLocation L, DeclarationName Name, - TemplateParameterList *Params, - Expr *ConstraintExpr) - : TemplateDecl(nullptr, Concept, DC, L, Name, Params), + ConceptDecl(DeclContext *DC, SourceLocation L, DeclarationName Name, + TemplateParameterList *Params, Expr *ConstraintExpr) + : TemplateDecl(Concept, DC, L, Name, Params), ConstraintExpr(ConstraintExpr) {}; public: static ConceptDecl *Create(ASTContext &C, DeclContext *DC, diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h index d44a815c869..ffa7d4db96a 100644 --- a/include/clang/AST/Expr.h +++ b/include/clang/AST/Expr.h @@ -906,6 +906,11 @@ public: return skipRValueSubobjectAdjustments(CommaLHSs, Adjustments); } + /// Checks that the two Expr's will refer to the same value as a comparison + /// operand. The caller must ensure that the values referenced by the Expr's + /// are not modified between E1 and E2 or the result my be invalid. + static bool isSameComparisonOperand(const Expr* E1, const Expr* E2); + static bool classof(const Stmt *T) { return T->getStmtClass() >= firstExprConstant && T->getStmtClass() <= lastExprConstant; @@ -2619,9 +2624,8 @@ public: /// + sizeof(Stmt *) bytes of storage, aligned to alignof(CallExpr): /// /// \code{.cpp} - /// llvm::AlignedCharArray Buffer; - /// CallExpr *TheCall = CallExpr::CreateTemporary(Buffer.buffer, etc); + /// alignas(CallExpr) char Buffer[sizeof(CallExpr) + sizeof(Stmt *)]; + /// CallExpr *TheCall = CallExpr::CreateTemporary(Buffer, etc); /// \endcode static CallExpr *CreateTemporary(void *Mem, Expr *Fn, QualType Ty, ExprValueKind VK, SourceLocation RParenLoc, @@ -4496,6 +4500,8 @@ public: // Explicit InitListExpr's originate from source code (and have valid source // locations). Implicit InitListExpr's are created by the semantic analyzer. + // FIXME: This is wrong; InitListExprs created by semantic analysis have + // valid source locations too! bool isExplicit() const { return LBraceLoc.isValid() && RBraceLoc.isValid(); } @@ -4830,6 +4836,10 @@ public: SourceLocation getEqualOrColonLoc() const { return EqualOrColonLoc; } void setEqualOrColonLoc(SourceLocation L) { EqualOrColonLoc = L; } + /// Whether this designated initializer should result in direct-initialization + /// of the designated subobject (eg, '{.foo{1, 2, 3}}'). + bool isDirectInit() const { return EqualOrColonLoc.isInvalid(); } + /// Determines whether this designated initializer used the /// deprecated GNU syntax for designated initializers. bool usesGNUSyntax() const { return GNUSyntax; } diff --git a/include/clang/AST/ExprCXX.h b/include/clang/AST/ExprCXX.h index 28ed6cdfde1..2152e108c7c 100644 --- a/include/clang/AST/ExprCXX.h +++ b/include/clang/AST/ExprCXX.h @@ -17,6 +17,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" #include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclarationName.h" #include "clang/AST/Expr.h" #include "clang/AST/NestedNameSpecifier.h" @@ -185,15 +186,20 @@ public: static CXXMemberCallExpr *CreateEmpty(const ASTContext &Ctx, unsigned NumArgs, EmptyShell Empty); - /// Retrieves the implicit object argument for the member call. + /// Retrieve the implicit object argument for the member call. /// /// For example, in "x.f(5)", this returns the sub-expression "x". Expr *getImplicitObjectArgument() const; - /// Retrieves the declaration of the called method. + /// Retrieve the type of the object argument. + /// + /// Note that this always returns a non-pointer type. + QualType getObjectType() const; + + /// Retrieve the declaration of the called method. CXXMethodDecl *getMethodDecl() const; - /// Retrieves the CXXRecordDecl for the underlying type of + /// Retrieve the CXXRecordDecl for the underlying type of /// the implicit object argument. /// /// Note that this is may not be the same declaration as that of the class @@ -248,6 +254,96 @@ public: } }; +/// A rewritten comparison expression that was originally written using +/// operator syntax. +/// +/// In C++20, the following rewrites are performed: +/// - a == b -> b == a +/// - a != b -> !(a == b) +/// - a != b -> !(b == a) +/// - For \c \@ in \c <, \c <=, \c >, \c >=, \c <=>: +/// - a @ b -> (a <=> b) @ 0 +/// - a @ b -> 0 @ (b <=> a) +/// +/// This expression provides access to both the original syntax and the +/// rewritten expression. +/// +/// Note that the rewritten calls to \c ==, \c <=>, and \c \@ are typically +/// \c CXXOperatorCallExprs, but could theoretically be \c BinaryOperators. +class CXXRewrittenBinaryOperator : public Expr { + friend class ASTStmtReader; + + /// The rewritten semantic form. + Stmt *SemanticForm; + +public: + CXXRewrittenBinaryOperator(Expr *SemanticForm, bool IsReversed) + : Expr(CXXRewrittenBinaryOperatorClass, SemanticForm->getType(), + SemanticForm->getValueKind(), SemanticForm->getObjectKind(), + SemanticForm->isTypeDependent(), SemanticForm->isValueDependent(), + SemanticForm->isInstantiationDependent(), + SemanticForm->containsUnexpandedParameterPack()), + SemanticForm(SemanticForm) { + CXXRewrittenBinaryOperatorBits.IsReversed = IsReversed; + } + CXXRewrittenBinaryOperator(EmptyShell Empty) + : Expr(CXXRewrittenBinaryOperatorClass, Empty), SemanticForm() {} + + /// Get an equivalent semantic form for this expression. + Expr *getSemanticForm() { return cast(SemanticForm); } + const Expr *getSemanticForm() const { return cast(SemanticForm); } + + struct DecomposedForm { + /// The original opcode, prior to rewriting. + BinaryOperatorKind Opcode; + /// The original left-hand side. + const Expr *LHS; + /// The original right-hand side. + const Expr *RHS; + /// The inner \c == or \c <=> operator expression. + const Expr *InnerBinOp; + }; + + /// Decompose this operator into its syntactic form. + DecomposedForm getDecomposedForm() const LLVM_READONLY; + + /// Determine whether this expression was rewritten in reverse form. + bool isReversed() const { return CXXRewrittenBinaryOperatorBits.IsReversed; } + + BinaryOperatorKind getOperator() const { return getDecomposedForm().Opcode; } + const Expr *getLHS() const { return getDecomposedForm().LHS; } + const Expr *getRHS() const { return getDecomposedForm().RHS; } + + SourceLocation getOperatorLoc() const LLVM_READONLY { + return getDecomposedForm().InnerBinOp->getExprLoc(); + } + SourceLocation getExprLoc() const LLVM_READONLY { return getOperatorLoc(); } + + /// Compute the begin and end locations from the decomposed form. + /// The locations of the semantic form are not reliable if this is + /// a reversed expression. + //@{ + SourceLocation getBeginLoc() const LLVM_READONLY { + return getDecomposedForm().LHS->getBeginLoc(); + } + SourceLocation getEndLoc() const LLVM_READONLY { + return getDecomposedForm().RHS->getEndLoc(); + } + SourceRange getSourceRange() const LLVM_READONLY { + DecomposedForm DF = getDecomposedForm(); + return SourceRange(DF.LHS->getBeginLoc(), DF.RHS->getEndLoc()); + } + //@} + + child_range children() { + return child_range(&SemanticForm, &SemanticForm + 1); + } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == CXXRewrittenBinaryOperatorClass; + } +}; + /// Abstract class common to all of the C++ "named"/"keyword" casts. /// /// This abstract class is inherited by all of the classes @@ -1902,6 +1998,10 @@ public: /// lambda expression. CXXMethodDecl *getCallOperator() const; + /// Retrieve the function template call operator associated with this + /// lambda expression. + FunctionTemplateDecl *getDependentCallOperator() const; + /// If this is a generic lambda expression, retrieve the template /// parameter list associated with it, or else return null. TemplateParameterList *getTemplateParameterList() const; @@ -2091,8 +2191,7 @@ public: bool IsParenTypeId); QualType getAllocatedType() const { - assert(getType()->isPointerType()); - return getType()->getAs()->getPointeeType(); + return getType()->castAs()->getPointeeType(); } TypeSourceInfo *getAllocatedTypeSourceInfo() const { @@ -2270,8 +2369,8 @@ public: CXXDeleteExpr(QualType Ty, bool GlobalDelete, bool ArrayForm, bool ArrayFormAsWritten, bool UsualArrayDeleteWantsSize, FunctionDecl *OperatorDelete, Expr *Arg, SourceLocation Loc) - : Expr(CXXDeleteExprClass, Ty, VK_RValue, OK_Ordinary, false, false, - Arg->isInstantiationDependent(), + : Expr(CXXDeleteExprClass, Ty, VK_RValue, OK_Ordinary, false, + Arg->isValueDependent(), Arg->isInstantiationDependent(), Arg->containsUnexpandedParameterPack()), OperatorDelete(OperatorDelete), Argument(Arg) { CXXDeleteExprBits.GlobalDelete = GlobalDelete; @@ -4335,9 +4434,6 @@ private: }; llvm::PointerUnion State; - void initializeExtraState(const ValueDecl *ExtendedBy, - unsigned ManglingNumber); - public: MaterializeTemporaryExpr(QualType T, Expr *Temporary, bool BoundToLvalueReference) @@ -4745,6 +4841,125 @@ public: } }; +/// \brief Represents the specialization of a concept - evaluates to a prvalue +/// of type bool. +/// +/// According to C++2a [expr.prim.id]p3 an id-expression that denotes the +/// specialization of a concept results in a prvalue of type bool. +class ConceptSpecializationExpr final : public Expr, + private llvm::TrailingObjects { + friend class ASTStmtReader; + friend TrailingObjects; + + // \brief The optional nested name specifier used when naming the concept. + NestedNameSpecifierLoc NestedNameSpec; + + /// \brief The location of the template keyword, if specified when naming the + /// concept. + SourceLocation TemplateKWLoc; + + /// \brief The location of the concept name in the expression. + SourceLocation ConceptNameLoc; + + /// \brief The declaration found by name lookup when the expression was + /// created. + /// Can differ from NamedConcept when, for example, the concept was found + /// through a UsingShadowDecl. + NamedDecl *FoundDecl; + + /// \brief The concept named, and whether or not the concept with the given + /// arguments was satisfied when the expression was created. + /// If any of the template arguments are dependent (this expr would then be + /// isValueDependent()), this bit is to be ignored. + llvm::PointerIntPair NamedConcept; + + /// \brief The template argument list source info used to specialize the + /// concept. + const ASTTemplateArgumentListInfo *ArgsAsWritten = nullptr; + + /// \brief The number of template arguments in the tail-allocated list of + /// converted template arguments. + unsigned NumTemplateArgs; + + ConceptSpecializationExpr(ASTContext &C, NestedNameSpecifierLoc NNS, + SourceLocation TemplateKWLoc, + SourceLocation ConceptNameLoc, NamedDecl *FoundDecl, + ConceptDecl *NamedConcept, + const ASTTemplateArgumentListInfo *ArgsAsWritten, + ArrayRef ConvertedArgs, + Optional IsSatisfied); + + ConceptSpecializationExpr(EmptyShell Empty, unsigned NumTemplateArgs); + +public: + + static ConceptSpecializationExpr * + Create(ASTContext &C, NestedNameSpecifierLoc NNS, + SourceLocation TemplateKWLoc, SourceLocation ConceptNameLoc, + NamedDecl *FoundDecl, ConceptDecl *NamedConcept, + const ASTTemplateArgumentListInfo *ArgsAsWritten, + ArrayRef ConvertedArgs, Optional IsSatisfied); + + static ConceptSpecializationExpr * + Create(ASTContext &C, EmptyShell Empty, unsigned NumTemplateArgs); + + const NestedNameSpecifierLoc &getNestedNameSpecifierLoc() const { + return NestedNameSpec; + } + + NamedDecl *getFoundDecl() const { + return FoundDecl; + } + + ConceptDecl *getNamedConcept() const { + return NamedConcept.getPointer(); + } + + ArrayRef getTemplateArguments() const { + return ArrayRef(getTrailingObjects(), + NumTemplateArgs); + } + + const ASTTemplateArgumentListInfo *getTemplateArgsAsWritten() const { + return ArgsAsWritten; + } + + /// \brief Set new template arguments for this concept specialization. + void setTemplateArguments(const ASTTemplateArgumentListInfo *ArgsAsWritten, + ArrayRef Converted); + + /// \brief Whether or not the concept with the given arguments was satisfied + /// when the expression was created. This method assumes that the expression + /// is not dependent! + bool isSatisfied() const { + assert(!isValueDependent() + && "isSatisfied called on a dependent ConceptSpecializationExpr"); + return NamedConcept.getInt(); + } + + SourceLocation getConceptNameLoc() const { return ConceptNameLoc; } + + SourceLocation getTemplateKWLoc() const { return TemplateKWLoc; } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == ConceptSpecializationExprClass; + } + + SourceLocation getBeginLoc() const LLVM_READONLY { return ConceptNameLoc; } + SourceLocation getEndLoc() const LLVM_READONLY { + return ArgsAsWritten->RAngleLoc; + } + + // Iterators + child_range children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } +}; + } // namespace clang #endif // LLVM_CLANG_AST_EXPRCXX_H diff --git a/include/clang/AST/ExternalASTMerger.h b/include/clang/AST/ExternalASTMerger.h index d89189da04f..0230495a5ef 100644 --- a/include/clang/AST/ExternalASTMerger.h +++ b/include/clang/AST/ExternalASTMerger.h @@ -14,6 +14,7 @@ #define LLVM_CLANG_AST_EXTERNALASTMERGER_H #include "clang/AST/ASTImporter.h" +#include "clang/AST/ASTImporterSharedState.h" #include "clang/AST/ExternalASTSource.h" #include "llvm/Support/raw_ostream.h" @@ -22,7 +23,7 @@ namespace clang { /// ExternalASTSource implementation that merges information from several /// ASTContexts. /// -/// ExtermalASTMerger maintains a vector of ASTImporters that it uses to import +/// ExternalASTMerger maintains a vector of ASTImporters that it uses to import /// (potentially incomplete) Decls and DeclContexts from the source ASTContexts /// in response to ExternalASTSource API calls. /// @@ -36,7 +37,7 @@ namespace clang { /// lookup. In this case, Origins contains an entry overriding lookup and /// specifying the correct pair of DeclContext/ASTContext. /// -/// - The DeclContext of origin was determined by another ExterenalASTMerger. +/// - The DeclContext of origin was determined by another ExternalASTMerger. /// (This is possible when the source ASTContext for one of the Importers has /// its own ExternalASTMerger). The origin must be properly forwarded in this /// case. @@ -79,20 +80,47 @@ public: /// import SourceLocations properly. Additionally, when import occurs for /// a DeclContext whose origin has been overridden, then this /// ExternalASTMerger must be able to determine that. - struct ImporterSource { + class ImporterSource { ASTContext &AST; FileManager &FM; const OriginMap &OM; + /// True iff the source only exists temporary, i.e., it will be removed from + /// the ExternalASTMerger during the life time of the ExternalASTMerger. + bool Temporary; + /// If the ASTContext of this source has an ExternalASTMerger that imports + /// into this source, then this will point to that other ExternalASTMerger. + ExternalASTMerger *Merger; + + public: + ImporterSource(ASTContext &AST, FileManager &FM, const OriginMap &OM, + bool Temporary = false, ExternalASTMerger *Merger = nullptr) + : AST(AST), FM(FM), OM(OM), Temporary(Temporary), Merger(Merger) {} + ASTContext &getASTContext() const { return AST; } + FileManager &getFileManager() const { return FM; } + const OriginMap &getOriginMap() const { return OM; } + bool isTemporary() const { return Temporary; } + ExternalASTMerger *getMerger() const { return Merger; } }; private: - /// The target for this ExtenralASTMerger. + /// The target for this ExternalASTMerger. ImporterTarget Target; + /// ExternalASTMerger has multiple ASTImporters that import into the same + /// TU. This is the shared state for all ASTImporters of this + /// ExternalASTMerger. + /// See also the CrossTranslationUnitContext that has a similar setup. + std::shared_ptr SharedState; public: ExternalASTMerger(const ImporterTarget &Target, llvm::ArrayRef Sources); + /// Asks all connected ASTImporters if any of them imported the given + /// declaration. If any ASTImporter did import the given declaration, + /// then this function returns the declaration that D was imported from. + /// Returns nullptr if no ASTImporter did import import D. + Decl *FindOriginalDecl(Decl *D); + /// Add a set of ASTContexts as possible origins. /// /// Usually the set will be initialized in the constructor, but long-lived @@ -145,7 +173,7 @@ public: /// OriginContext. bool HasImporterForOrigin(ASTContext &OriginContext); - /// Returns a reference to the ASTRImporter from Importers whose origin + /// Returns a reference to the ASTImporter from Importers whose origin /// is OriginContext. This allows manual import of ASTs while preserving the /// OriginMap correctly. ASTImporter &ImporterForOrigin(ASTContext &OriginContext); diff --git a/include/clang/AST/FormatString.h b/include/clang/AST/FormatString.h index 643fb822f7f..8c944451f79 100644 --- a/include/clang/AST/FormatString.h +++ b/include/clang/AST/FormatString.h @@ -251,7 +251,21 @@ public: enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, AnyCharTy, CStrTy, WCStrTy, WIntTy }; - enum MatchKind { NoMatch = 0, Match = 1, NoMatchPedantic }; + /// How well a given conversion specifier matches its argument. + enum MatchKind { + /// The conversion specifier and the argument types are incompatible. For + /// instance, "%d" and float. + NoMatch = 0, + /// The conversion specifier and the argument type are compatible. For + /// instance, "%d" and _Bool. + Match = 1, + /// The conversion specifier and the argument type are disallowed by the C + /// standard, but are in practice harmless. For instance, "%p" and int*. + NoMatchPedantic, + /// The conversion specifier and the argument type are compatible, but still + /// seems likely to be an error. For instance, "%hd" and _Bool. + NoMatchTypeConfusion, + }; private: const Kind K; @@ -748,6 +762,12 @@ bool ParseScanfString(FormatStringHandler &H, const char *beg, const char *end, const LangOptions &LO, const TargetInfo &Target); +/// Return true if the given string has at least one formatting specifier. +bool parseFormatStringHasFormattingSpecifiers(const char *Begin, + const char *End, + const LangOptions &LO, + const TargetInfo &Target); + } // end analyze_format_string namespace } // end clang namespace #endif diff --git a/include/clang/AST/GlobalDecl.h b/include/clang/AST/GlobalDecl.h index 86fd0f6aa90..145e961a23a 100644 --- a/include/clang/AST/GlobalDecl.h +++ b/include/clang/AST/GlobalDecl.h @@ -59,6 +59,7 @@ public: GlobalDecl(const CapturedDecl *D) { Init(D); } GlobalDecl(const ObjCMethodDecl *D) { Init(D); } GlobalDecl(const OMPDeclareReductionDecl *D) { Init(D); } + GlobalDecl(const OMPDeclareMapperDecl *D) { Init(D); } GlobalDecl(const CXXConstructorDecl *D, CXXCtorType Type) : Value(D, Type) {} GlobalDecl(const CXXDestructorDecl *D, CXXDtorType Type) : Value(D, Type) {} GlobalDecl(const VarDecl *D, DynamicInitKind StubKind) diff --git a/include/clang/AST/JSONNodeDumper.h b/include/clang/AST/JSONNodeDumper.h index 238e43aad78..5f34440b8b5 100644 --- a/include/clang/AST/JSONNodeDumper.h +++ b/include/clang/AST/JSONNodeDumper.h @@ -141,6 +141,8 @@ class JSONNodeDumper JOS.attribute(Key, Value); } + void writeIncludeStack(PresumedLoc Loc, bool JustFirst = false); + // Writes the attributes of a SourceLocation object without. void writeBareSourceLocation(SourceLocation Loc, bool IsSpelling); diff --git a/include/clang/AST/Mangle.h b/include/clang/AST/Mangle.h index b1fbe936136..5db5c5b977d 100644 --- a/include/clang/AST/Mangle.h +++ b/include/clang/AST/Mangle.h @@ -56,7 +56,7 @@ private: llvm::DenseMap GlobalBlockIds; llvm::DenseMap LocalBlockIds; - llvm::DenseMap AnonStructIds; + llvm::DenseMap AnonStructIds; public: ManglerKind getKind() const { return Kind; } @@ -82,9 +82,9 @@ public: return Result.first->second; } - uint64_t getAnonymousStructId(const TagDecl *TD) { - std::pair::iterator, bool> - Result = AnonStructIds.insert(std::make_pair(TD, AnonStructIds.size())); + uint64_t getAnonymousStructId(const NamedDecl *D) { + std::pair::iterator, bool> + Result = AnonStructIds.insert(std::make_pair(D, AnonStructIds.size())); return Result.first->second; } @@ -170,6 +170,8 @@ public: virtual void mangleCXXDtorComdat(const CXXDestructorDecl *D, raw_ostream &) = 0; + virtual void mangleLambdaSig(const CXXRecordDecl *Lambda, raw_ostream &) = 0; + static bool classof(const MangleContext *C) { return C->getKind() == MK_Itanium; } @@ -248,8 +250,16 @@ class ASTNameGenerator { public: explicit ASTNameGenerator(ASTContext &Ctx); ~ASTNameGenerator(); + + /// Writes name for \p D to \p OS. + /// \returns true on failure, false on success. bool writeName(const Decl *D, raw_ostream &OS); + + /// \returns name for \p D std::string getName(const Decl *D); + + /// \returns all applicable mangled names. + /// For example C++ constructors/destructors can have multiple. std::vector getAllManglings(const Decl *D); private: diff --git a/include/clang/AST/NSAPI.h b/include/clang/AST/NSAPI.h index 21f0c5458d8..a8bd2d0f17e 100644 --- a/include/clang/AST/NSAPI.h +++ b/include/clang/AST/NSAPI.h @@ -55,9 +55,6 @@ public: /// The Objective-C NSString selectors. Selector getNSStringSelector(NSStringMethodKind MK) const; - /// Return NSStringMethodKind if \param Sel is such a selector. - Optional getNSStringMethodKind(Selector Sel) const; - /// Returns true if the expression \param E is a reference of /// "NSUTF8StringEncoding" enum constant. bool isNSUTF8StringEncodingConstant(const Expr *E) const { diff --git a/include/clang/AST/OpenMPClause.h b/include/clang/AST/OpenMPClause.h index eadcc62a345..b2a2035dcb3 100644 --- a/include/clang/AST/OpenMPClause.h +++ b/include/clang/AST/OpenMPClause.h @@ -519,7 +519,7 @@ public: /// \endcode /// In this example directive '#pragma omp task' has simple 'final' /// clause with condition 'a > 5'. -class OMPFinalClause : public OMPClause { +class OMPFinalClause : public OMPClause, public OMPClauseWithPreInit { friend class OMPClauseReader; /// Location of '('. @@ -534,18 +534,25 @@ class OMPFinalClause : public OMPClause { public: /// Build 'final' clause with condition \a Cond. /// + /// \param Cond Condition of the clause. + /// \param HelperCond Helper condition for the construct. + /// \param CaptureRegion Innermost OpenMP region where expressions in this + /// clause must be captured. /// \param StartLoc Starting location of the clause. /// \param LParenLoc Location of '('. - /// \param Cond Condition of the clause. /// \param EndLoc Ending location of the clause. - OMPFinalClause(Expr *Cond, SourceLocation StartLoc, SourceLocation LParenLoc, - SourceLocation EndLoc) - : OMPClause(OMPC_final, StartLoc, EndLoc), LParenLoc(LParenLoc), - Condition(Cond) {} + OMPFinalClause(Expr *Cond, Stmt *HelperCond, + OpenMPDirectiveKind CaptureRegion, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation EndLoc) + : OMPClause(OMPC_final, StartLoc, EndLoc), OMPClauseWithPreInit(this), + LParenLoc(LParenLoc), Condition(Cond) { + setPreInitStmt(HelperCond, CaptureRegion); + } /// Build an empty clause. OMPFinalClause() - : OMPClause(OMPC_final, SourceLocation(), SourceLocation()) {} + : OMPClause(OMPC_final, SourceLocation(), SourceLocation()), + OMPClauseWithPreInit(this) {} /// Sets the location of '('. void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } @@ -562,11 +569,10 @@ public: return const_child_range(&Condition, &Condition + 1); } - child_range used_children() { - return child_range(child_iterator(), child_iterator()); - } + child_range used_children(); const_child_range used_children() const { - return const_child_range(const_child_iterator(), const_child_iterator()); + auto Children = const_cast(this)->used_children(); + return const_child_range(Children.begin(), Children.end()); } static bool classof(const OMPClause *T) { @@ -2099,10 +2105,12 @@ public: } child_range used_children() { - return child_range(child_iterator(), child_iterator()); + return child_range(reinterpret_cast(varlist_begin()), + reinterpret_cast(varlist_end())); } const_child_range used_children() const { - return const_child_range(const_child_iterator(), const_child_iterator()); + auto Children = const_cast(this)->used_children(); + return const_child_range(Children.begin(), Children.end()); } static bool classof(const OMPClause *T) { @@ -2616,10 +2624,12 @@ public: } child_range used_children() { - return child_range(child_iterator(), child_iterator()); + return child_range(reinterpret_cast(varlist_begin()), + reinterpret_cast(varlist_end())); } const_child_range used_children() const { - return const_child_range(const_child_iterator(), const_child_iterator()); + auto Children = const_cast(this)->used_children(); + return const_child_range(Children.begin(), Children.end()); } static bool classof(const OMPClause *T) { @@ -3212,6 +3222,14 @@ class OMPLinearClause final return llvm::makeArrayRef(getUpdates().end(), varlist_size()); } + /// Gets the list of used expressions for linear variables. + MutableArrayRef getUsedExprs() { + return MutableArrayRef(getFinals().end() + 2, varlist_size() + 1); + } + ArrayRef getUsedExprs() const { + return llvm::makeArrayRef(getFinals().end() + 2, varlist_size() + 1); + } + /// Sets the list of the copies of original linear variables. /// \param PL List of expressions. void setPrivates(ArrayRef PL); @@ -3291,6 +3309,9 @@ public: /// \param FL List of expressions. void setFinals(ArrayRef FL); + /// Sets the list of used expressions for the linear clause. + void setUsedExprs(ArrayRef UE); + using privates_iterator = MutableArrayRef::iterator; using privates_const_iterator = ArrayRef::iterator; using privates_range = llvm::iterator_range; @@ -3343,6 +3364,21 @@ public: return finals_const_range(getFinals().begin(), getFinals().end()); } + using used_expressions_iterator = MutableArrayRef::iterator; + using used_expressions_const_iterator = ArrayRef::iterator; + using used_expressions_range = + llvm::iterator_range; + using used_expressions_const_range = + llvm::iterator_range; + + used_expressions_range used_expressions() { + return finals_range(getUsedExprs().begin(), getUsedExprs().end()); + } + + used_expressions_const_range used_expressions() const { + return finals_const_range(getUsedExprs().begin(), getUsedExprs().end()); + } + child_range children() { return child_range(reinterpret_cast(varlist_begin()), reinterpret_cast(varlist_end())); @@ -3353,11 +3389,11 @@ public: return const_child_range(Children.begin(), Children.end()); } - child_range used_children() { - return child_range(child_iterator(), child_iterator()); - } + child_range used_children(); + const_child_range used_children() const { - return const_child_range(const_child_iterator(), const_child_iterator()); + auto Children = const_cast(this)->used_children(); + return const_child_range(Children.begin(), Children.end()); } static bool classof(const OMPClause *T) { @@ -4995,12 +5031,17 @@ public: } child_range used_children() { + if (MapType == OMPC_MAP_to || MapType == OMPC_MAP_tofrom) + return child_range(reinterpret_cast(varlist_begin()), + reinterpret_cast(varlist_end())); return child_range(child_iterator(), child_iterator()); } const_child_range used_children() const { - return const_child_range(const_child_iterator(), const_child_iterator()); + auto Children = const_cast(this)->used_children(); + return const_child_range(Children.begin(), Children.end()); } + static bool classof(const OMPClause *T) { return T->getClauseKind() == OMPC_map; } @@ -5165,7 +5206,7 @@ public: /// \endcode /// In this example directive '#pragma omp teams' has clause 'priority' with /// single expression 'n'. -class OMPPriorityClause : public OMPClause { +class OMPPriorityClause : public OMPClause, public OMPClauseWithPreInit { friend class OMPClauseReader; /// Location of '('. @@ -5182,18 +5223,25 @@ class OMPPriorityClause : public OMPClause { public: /// Build 'priority' clause. /// - /// \param E Expression associated with this clause. + /// \param Priority Expression associated with this clause. + /// \param HelperPriority Helper priority for the construct. + /// \param CaptureRegion Innermost OpenMP region where expressions in this + /// clause must be captured. /// \param StartLoc Starting location of the clause. /// \param LParenLoc Location of '('. /// \param EndLoc Ending location of the clause. - OMPPriorityClause(Expr *E, SourceLocation StartLoc, SourceLocation LParenLoc, - SourceLocation EndLoc) - : OMPClause(OMPC_priority, StartLoc, EndLoc), LParenLoc(LParenLoc), - Priority(E) {} + OMPPriorityClause(Expr *Priority, Stmt *HelperPriority, + OpenMPDirectiveKind CaptureRegion, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation EndLoc) + : OMPClause(OMPC_priority, StartLoc, EndLoc), OMPClauseWithPreInit(this), + LParenLoc(LParenLoc), Priority(Priority) { + setPreInitStmt(HelperPriority, CaptureRegion); + } /// Build an empty clause. OMPPriorityClause() - : OMPClause(OMPC_priority, SourceLocation(), SourceLocation()) {} + : OMPClause(OMPC_priority, SourceLocation(), SourceLocation()), + OMPClauseWithPreInit(this) {} /// Sets the location of '('. void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } @@ -5213,11 +5261,10 @@ public: return const_child_range(&Priority, &Priority + 1); } - child_range used_children() { - return child_range(child_iterator(), child_iterator()); - } + child_range used_children(); const_child_range used_children() const { - return const_child_range(const_child_iterator(), const_child_iterator()); + auto Children = const_cast(this)->used_children(); + return const_child_range(Children.begin(), Children.end()); } static bool classof(const OMPClause *T) { @@ -5233,7 +5280,7 @@ public: /// \endcode /// In this example directive '#pragma omp taskloop' has clause 'grainsize' /// with single expression '4'. -class OMPGrainsizeClause : public OMPClause { +class OMPGrainsizeClause : public OMPClause, public OMPClauseWithPreInit { friend class OMPClauseReader; /// Location of '('. @@ -5249,16 +5296,23 @@ public: /// Build 'grainsize' clause. /// /// \param Size Expression associated with this clause. + /// \param HelperSize Helper grainsize for the construct. + /// \param CaptureRegion Innermost OpenMP region where expressions in this + /// clause must be captured. /// \param StartLoc Starting location of the clause. /// \param EndLoc Ending location of the clause. - OMPGrainsizeClause(Expr *Size, SourceLocation StartLoc, + OMPGrainsizeClause(Expr *Size, Stmt *HelperSize, + OpenMPDirectiveKind CaptureRegion, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) - : OMPClause(OMPC_grainsize, StartLoc, EndLoc), LParenLoc(LParenLoc), - Grainsize(Size) {} + : OMPClause(OMPC_grainsize, StartLoc, EndLoc), OMPClauseWithPreInit(this), + LParenLoc(LParenLoc), Grainsize(Size) { + setPreInitStmt(HelperSize, CaptureRegion); + } /// Build an empty clause. explicit OMPGrainsizeClause() - : OMPClause(OMPC_grainsize, SourceLocation(), SourceLocation()) {} + : OMPClause(OMPC_grainsize, SourceLocation(), SourceLocation()), + OMPClauseWithPreInit(this) {} /// Sets the location of '('. void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } @@ -5275,11 +5329,10 @@ public: return const_child_range(&Grainsize, &Grainsize + 1); } - child_range used_children() { - return child_range(child_iterator(), child_iterator()); - } + child_range used_children(); const_child_range used_children() const { - return const_child_range(const_child_iterator(), const_child_iterator()); + auto Children = const_cast(this)->used_children(); + return const_child_range(Children.begin(), Children.end()); } static bool classof(const OMPClause *T) { @@ -5334,7 +5387,7 @@ public: /// \endcode /// In this example directive '#pragma omp taskloop' has clause 'num_tasks' /// with single expression '4'. -class OMPNumTasksClause : public OMPClause { +class OMPNumTasksClause : public OMPClause, public OMPClauseWithPreInit { friend class OMPClauseReader; /// Location of '('. @@ -5350,16 +5403,23 @@ public: /// Build 'num_tasks' clause. /// /// \param Size Expression associated with this clause. + /// \param HelperSize Helper grainsize for the construct. + /// \param CaptureRegion Innermost OpenMP region where expressions in this + /// clause must be captured. /// \param StartLoc Starting location of the clause. /// \param EndLoc Ending location of the clause. - OMPNumTasksClause(Expr *Size, SourceLocation StartLoc, + OMPNumTasksClause(Expr *Size, Stmt *HelperSize, + OpenMPDirectiveKind CaptureRegion, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) - : OMPClause(OMPC_num_tasks, StartLoc, EndLoc), LParenLoc(LParenLoc), - NumTasks(Size) {} + : OMPClause(OMPC_num_tasks, StartLoc, EndLoc), OMPClauseWithPreInit(this), + LParenLoc(LParenLoc), NumTasks(Size) { + setPreInitStmt(HelperSize, CaptureRegion); + } /// Build an empty clause. explicit OMPNumTasksClause() - : OMPClause(OMPC_num_tasks, SourceLocation(), SourceLocation()) {} + : OMPClause(OMPC_num_tasks, SourceLocation(), SourceLocation()), + OMPClauseWithPreInit(this) {} /// Sets the location of '('. void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } @@ -5376,11 +5436,10 @@ public: return const_child_range(&NumTasks, &NumTasks + 1); } - child_range used_children() { - return child_range(child_iterator(), child_iterator()); - } + child_range used_children(); const_child_range used_children() const { - return const_child_range(const_child_iterator(), const_child_iterator()); + auto Children = const_cast(this)->used_children(); + return const_child_range(Children.begin(), Children.end()); } static bool classof(const OMPClause *T) { diff --git a/include/clang/AST/OperationKinds.def b/include/clang/AST/OperationKinds.def index 9af92c1ae7f..f29664e8eb3 100644 --- a/include/clang/AST/OperationKinds.def +++ b/include/clang/AST/OperationKinds.def @@ -66,8 +66,9 @@ CAST_OPERATION(BitCast) /// bool b; reinterpret_cast(b) = 'a'; CAST_OPERATION(LValueBitCast) -/// CK_LValueToRValueBitCast - A conversion that causes us to reinterpret an -/// lvalue as an rvalue of a different type. Created by __builtin_bit_cast. +/// CK_LValueToRValueBitCast - A conversion that causes us to reinterpret the +/// object representation of an lvalue as an rvalue. Created by +/// __builtin_bit_cast. CAST_OPERATION(LValueToRValueBitCast) /// CK_LValueToRValue - A conversion which causes the extraction of diff --git a/include/clang/AST/OptionalDiagnostic.h b/include/clang/AST/OptionalDiagnostic.h new file mode 100644 index 00000000000..c57199f0fdf --- /dev/null +++ b/include/clang/AST/OptionalDiagnostic.h @@ -0,0 +1,78 @@ +//===- OptionalDiagnostic.h - An optional diagnostic ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Implements a partial diagnostic which may not be emitted. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_OPTIONALDIAGNOSTIC_H +#define LLVM_CLANG_AST_OPTIONALDIAGNOSTIC_H + +#include "clang/AST/APValue.h" +#include "clang/Basic/PartialDiagnostic.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" + +namespace clang { + +/// A partial diagnostic which we might know in advance that we are not going +/// to emit. +class OptionalDiagnostic { + PartialDiagnostic *Diag; + +public: + explicit OptionalDiagnostic(PartialDiagnostic *Diag = nullptr) : Diag(Diag) {} + + template OptionalDiagnostic &operator<<(const T &v) { + if (Diag) + *Diag << v; + return *this; + } + + OptionalDiagnostic &operator<<(const llvm::APSInt &I) { + if (Diag) { + SmallVector Buffer; + I.toString(Buffer); + *Diag << StringRef(Buffer.data(), Buffer.size()); + } + return *this; + } + + OptionalDiagnostic &operator<<(const llvm::APFloat &F) { + if (Diag) { + // FIXME: Force the precision of the source value down so we don't + // print digits which are usually useless (we don't really care here if + // we truncate a digit by accident in edge cases). Ideally, + // APFloat::toString would automatically print the shortest + // representation which rounds to the correct value, but it's a bit + // tricky to implement. Could use std::to_chars. + unsigned precision = llvm::APFloat::semanticsPrecision(F.getSemantics()); + precision = (precision * 59 + 195) / 196; + SmallVector Buffer; + F.toString(Buffer, precision); + *Diag << StringRef(Buffer.data(), Buffer.size()); + } + return *this; + } + + OptionalDiagnostic &operator<<(const APFixedPoint &FX) { + if (Diag) { + SmallVector Buffer; + FX.toString(Buffer); + *Diag << StringRef(Buffer.data(), Buffer.size()); + } + return *this; + } +}; + +} // namespace clang + +#endif diff --git a/include/clang/AST/RawCommentList.h b/include/clang/AST/RawCommentList.h index 5dc8694e77e..1eea56dee62 100644 --- a/include/clang/AST/RawCommentList.h +++ b/include/clang/AST/RawCommentList.h @@ -10,8 +10,11 @@ #define LLVM_CLANG_AST_RAWCOMMENTLIST_H #include "clang/Basic/CommentOptions.h" +#include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include namespace clang { @@ -196,17 +199,25 @@ public: void addComment(const RawComment &RC, const CommentOptions &CommentOpts, llvm::BumpPtrAllocator &Allocator); - ArrayRef getComments() const { - return Comments; - } + /// \returns A mapping from an offset of the start of the comment to the + /// comment itself, or nullptr in case there are no comments in \p File. + const std::map *getCommentsInFile(FileID File) const; + + bool empty() const; + + unsigned getCommentBeginLine(RawComment *C, FileID File, + unsigned Offset) const; + unsigned getCommentEndOffset(RawComment *C) const; private: SourceManager &SourceMgr; - std::vector Comments; - - void addDeserializedComments(ArrayRef DeserializedComments); + // mapping: FileId -> comment begin offset -> comment + llvm::DenseMap> OrderedComments; + mutable llvm::DenseMap CommentBeginLine; + mutable llvm::DenseMap CommentEndOffset; friend class ASTReader; + friend class ASTWriter; }; } // end namespace clang diff --git a/include/clang/AST/RecursiveASTVisitor.h b/include/clang/AST/RecursiveASTVisitor.h index 698fba2f4ed..5b58eab95d6 100644 --- a/include/clang/AST/RecursiveASTVisitor.h +++ b/include/clang/AST/RecursiveASTVisitor.h @@ -431,7 +431,7 @@ public: // Declare Traverse*() for all concrete Type classes. #define ABSTRACT_TYPE(CLASS, BASE) #define TYPE(CLASS, BASE) bool Traverse##CLASS##Type(CLASS##Type *T); -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" // The above header #undefs ABSTRACT_TYPE and TYPE upon exit. // Define WalkUpFrom*() and empty Visit*() for all Type classes. @@ -444,7 +444,7 @@ public: return true; \ } \ bool Visit##CLASS##Type(CLASS##Type *T) { return true; } -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" // ---- Methods on TypeLocs ---- // FIXME: this currently just calls the matching Type methods @@ -460,7 +460,7 @@ public: bool VisitTypeLoc(TypeLoc TL) { return true; } // QualifiedTypeLoc and UnqualTypeLoc are not declared in - // TypeNodes.def and thus need to be handled specially. + // TypeNodes.inc and thus need to be handled specially. bool WalkUpFromQualifiedTypeLoc(QualifiedTypeLoc TL) { return getDerived().VisitUnqualTypeLoc(TL.getUnqualifiedLoc()); } @@ -478,7 +478,7 @@ public: return true; \ } \ bool Visit##CLASS##TypeLoc(CLASS##TypeLoc TL) { return true; } -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" // ---- Methods on Decls ---- @@ -676,7 +676,7 @@ bool RecursiveASTVisitor::TraverseType(QualType T) { #define TYPE(CLASS, BASE) \ case Type::CLASS: \ DISPATCH(CLASS##Type, CLASS##Type, const_cast(T.getTypePtr())); -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" } return true; @@ -722,12 +722,6 @@ bool RecursiveASTVisitor::TraverseDecl(Decl *D) { break; #include "clang/AST/DeclNodes.inc" } - - // Visit any attributes attached to this declaration. - for (auto *I : D->attrs()) { - if (!getDerived().TraverseAttr(I)) - return false; - } return true; } @@ -965,8 +959,11 @@ DEF_TRAVERSE_TYPE(AdjustedType, { TRY_TO(TraverseType(T->getOriginalType())); }) DEF_TRAVERSE_TYPE(DecayedType, { TRY_TO(TraverseType(T->getOriginalType())); }) -DEF_TRAVERSE_TYPE(ConstantArrayType, - { TRY_TO(TraverseType(T->getElementType())); }) +DEF_TRAVERSE_TYPE(ConstantArrayType, { + TRY_TO(TraverseType(T->getElementType())); + if (T->getSizeExpr()) + TRY_TO(TraverseStmt(const_cast(T->getSizeExpr()))); +}) DEF_TRAVERSE_TYPE(IncompleteArrayType, { TRY_TO(TraverseType(T->getElementType())); }) @@ -1407,6 +1404,11 @@ bool RecursiveASTVisitor::TraverseDeclContextHelper(DeclContext *DC) { { CODE; } \ if (ReturnValue && ShouldVisitChildren) \ TRY_TO(TraverseDeclContextHelper(dyn_cast(D))); \ + if (ReturnValue) { \ + /* Visit any attributes attached to this declaration. */ \ + for (auto *I : D->attrs()) \ + TRY_TO(getDerived().TraverseAttr(I)); \ + } \ if (ReturnValue && getDerived().shouldTraversePostOrder()) \ TRY_TO(WalkUpFrom##DECL(D)); \ return ReturnValue; \ @@ -1631,9 +1633,11 @@ template bool RecursiveASTVisitor::TraverseTemplateParameterListHelper( TemplateParameterList *TPL) { if (TPL) { - for (TemplateParameterList::iterator I = TPL->begin(), E = TPL->end(); - I != E; ++I) { - TRY_TO(TraverseDecl(*I)); + for (NamedDecl *D : *TPL) { + TRY_TO(TraverseDecl(D)); + } + if (Expr *RequiresClause = TPL->getRequiresClause()) { + TRY_TO(TraverseStmt(RequiresClause)); } } return true; @@ -2023,11 +2027,18 @@ bool RecursiveASTVisitor::TraverseFunctionHelper(FunctionDecl *D) { if (CXXConstructorDecl *Ctor = dyn_cast(D)) { // Constructor initializers. for (auto *I : Ctor->inits()) { - TRY_TO(TraverseConstructorInitializer(I)); + if (I->isWritten() || getDerived().shouldVisitImplicitCode()) + TRY_TO(TraverseConstructorInitializer(I)); } } - if (D->isThisDeclarationADefinition()) { + bool VisitBody = D->isThisDeclarationADefinition(); + // If a method is set to default outside the class definition the compiler + // generates the method body and adds it to the AST. + if (const auto *MD = dyn_cast(D)) + VisitBody &= !MD->isDefaulted() || getDerived().shouldVisitImplicitCode(); + + if (VisitBody) { TRY_TO(TraverseStmt(D->getBody())); // Function body. } return true; @@ -2308,19 +2319,30 @@ bool RecursiveASTVisitor::TraverseSynOrSemInitListExpr( return true; } -// This method is called once for each pair of syntactic and semantic -// InitListExpr, and it traverses the subtrees defined by the two forms. This -// may cause some of the children to be visited twice, if they appear both in -// the syntactic and the semantic form. +// If shouldVisitImplicitCode() returns false, this method traverses only the +// syntactic form of InitListExpr. +// If shouldVisitImplicitCode() return true, this method is called once for +// each pair of syntactic and semantic InitListExpr, and it traverses the +// subtrees defined by the two forms. This may cause some of the children to be +// visited twice, if they appear both in the syntactic and the semantic form. // // There is no guarantee about which form \p S takes when this method is called. template bool RecursiveASTVisitor::TraverseInitListExpr( InitListExpr *S, DataRecursionQueue *Queue) { + if (S->isSemanticForm() && S->isSyntacticForm()) { + // `S` does not have alternative forms, traverse only once. + TRY_TO(TraverseSynOrSemInitListExpr(S, Queue)); + return true; + } TRY_TO(TraverseSynOrSemInitListExpr( S->isSemanticForm() ? S->getSyntacticForm() : S, Queue)); - TRY_TO(TraverseSynOrSemInitListExpr( - S->isSemanticForm() ? S : S->getSemanticForm(), Queue)); + if (getDerived().shouldVisitImplicitCode()) { + // Only visit the semantic form if the clients are interested in implicit + // compiler-generated. + TRY_TO(TraverseSynOrSemInitListExpr( + S->isSemanticForm() ? S : S->getSemanticForm(), Queue)); + } return true; } @@ -2584,6 +2606,15 @@ DEF_TRAVERSE_STMT(SEHLeaveStmt, {}) DEF_TRAVERSE_STMT(CapturedStmt, { TRY_TO(TraverseDecl(S->getCapturedDecl())); }) DEF_TRAVERSE_STMT(CXXOperatorCallExpr, {}) +DEF_TRAVERSE_STMT(CXXRewrittenBinaryOperator, { + if (!getDerived().shouldVisitImplicitCode()) { + CXXRewrittenBinaryOperator::DecomposedForm Decomposed = + S->getDecomposedForm(); + TRY_TO(TraverseStmt(const_cast(Decomposed.LHS))); + TRY_TO(TraverseStmt(const_cast(Decomposed.RHS))); + ShouldVisitChildren = false; + } +}) DEF_TRAVERSE_STMT(OpaqueValueExpr, {}) DEF_TRAVERSE_STMT(TypoExpr, {}) DEF_TRAVERSE_STMT(CUDAKernelCallExpr, {}) @@ -2639,6 +2670,12 @@ DEF_TRAVERSE_STMT(CoyieldExpr, { } }) +DEF_TRAVERSE_STMT(ConceptSpecializationExpr, { + TRY_TO(TraverseTemplateArgumentLocsHelper( + S->getTemplateArgsAsWritten()->getTemplateArgs(), + S->getTemplateArgsAsWritten()->NumTemplateArgs)); +}) + // These literals (all of them) do not need any action. DEF_TRAVERSE_STMT(IntegerLiteral, {}) DEF_TRAVERSE_STMT(FixedPointLiteral, {}) @@ -2768,6 +2805,15 @@ DEF_TRAVERSE_STMT(OMPTaskLoopDirective, DEF_TRAVERSE_STMT(OMPTaskLoopSimdDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) +DEF_TRAVERSE_STMT(OMPMasterTaskLoopDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + +DEF_TRAVERSE_STMT(OMPMasterTaskLoopSimdDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + +DEF_TRAVERSE_STMT(OMPParallelMasterTaskLoopDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + DEF_TRAVERSE_STMT(OMPDistributeDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) @@ -2826,6 +2872,8 @@ bool RecursiveASTVisitor::TraverseOMPClause(OMPClause *C) { #include "clang/Basic/OpenMPKinds.def" case OMPC_threadprivate: case OMPC_uniform: + case OMPC_device_type: + case OMPC_match: case OMPC_unknown: break; } @@ -2870,6 +2918,7 @@ bool RecursiveASTVisitor::VisitOMPIfClause(OMPIfClause *C) { template bool RecursiveASTVisitor::VisitOMPFinalClause(OMPFinalClause *C) { + TRY_TO(VisitOMPClauseWithPreInit(C)); TRY_TO(TraverseStmt(C->getCondition())); return true; } @@ -3240,6 +3289,7 @@ bool RecursiveASTVisitor::VisitOMPThreadLimitClause( template bool RecursiveASTVisitor::VisitOMPPriorityClause( OMPPriorityClause *C) { + TRY_TO(VisitOMPClauseWithPreInit(C)); TRY_TO(TraverseStmt(C->getPriority())); return true; } @@ -3247,6 +3297,7 @@ bool RecursiveASTVisitor::VisitOMPPriorityClause( template bool RecursiveASTVisitor::VisitOMPGrainsizeClause( OMPGrainsizeClause *C) { + TRY_TO(VisitOMPClauseWithPreInit(C)); TRY_TO(TraverseStmt(C->getGrainsize())); return true; } @@ -3254,6 +3305,7 @@ bool RecursiveASTVisitor::VisitOMPGrainsizeClause( template bool RecursiveASTVisitor::VisitOMPNumTasksClause( OMPNumTasksClause *C) { + TRY_TO(VisitOMPClauseWithPreInit(C)); TRY_TO(TraverseStmt(C->getNumTasks())); return true; } diff --git a/include/clang/AST/Stmt.h b/include/clang/AST/Stmt.h index 403b88ac3a3..7aebbf2cb6a 100644 --- a/include/clang/AST/Stmt.h +++ b/include/clang/AST/Stmt.h @@ -604,6 +604,15 @@ protected: unsigned FPFeatures : 3; }; + class CXXRewrittenBinaryOperatorBitfields { + friend class ASTStmtReader; + friend class CXXRewrittenBinaryOperator; + + unsigned : NumCallExprBits; + + unsigned IsReversed : 1; + }; + class CXXBoolLiteralExprBitfields { friend class CXXBoolLiteralExpr; @@ -978,6 +987,7 @@ protected: // C++ Expressions CXXOperatorCallExprBitfields CXXOperatorCallExprBits; + CXXRewrittenBinaryOperatorBitfields CXXRewrittenBinaryOperatorBits; CXXBoolLiteralExprBitfields CXXBoolLiteralExprBits; CXXNullPtrLiteralExprBitfields CXXNullPtrLiteralExprBits; CXXThisExprBitfields CXXThisExprBits; diff --git a/include/clang/AST/StmtOpenMP.h b/include/clang/AST/StmtOpenMP.h index e37f5b1e000..ddfb3060b15 100644 --- a/include/clang/AST/StmtOpenMP.h +++ b/include/clang/AST/StmtOpenMP.h @@ -17,6 +17,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/Stmt.h" +#include "clang/AST/StmtCXX.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/SourceLocation.h" @@ -448,7 +449,8 @@ class OMPLoopDirective : public OMPExecutableDirective { PreInitsOffset = 8, // The '...End' enumerators do not correspond to child expressions - they // specify the offset to the end (and start of the following counters/ - // updates/finals arrays). + // updates/finals/dependent_counters/dependent_inits/finals_conditions + // arrays). DefaultEnd = 9, // The following 8 exprs are used by worksharing and distribute loops only. IsLastIterVariableOffset = 9, @@ -474,7 +476,8 @@ class OMPLoopDirective : public OMPExecutableDirective { CombinedNextUpperBoundOffset = 27, CombinedDistConditionOffset = 28, CombinedParForInDistConditionOffset = 29, - // Offset to the end (and start of the following counters/updates/finals + // Offset to the end (and start of the following + // counters/updates/finals/dependent_counters/dependent_inits/finals_conditions // arrays) for combined distribute loop directives. CombinedDistributeEnd = 30, }; @@ -517,6 +520,30 @@ class OMPLoopDirective : public OMPExecutableDirective { return MutableArrayRef(Storage, CollapsedNum); } + /// Get the dependent counters storage. + MutableArrayRef getDependentCounters() { + Expr **Storage = reinterpret_cast( + &*std::next(child_begin(), + getArraysOffset(getDirectiveKind()) + 5 * CollapsedNum)); + return MutableArrayRef(Storage, CollapsedNum); + } + + /// Get the dependent inits storage. + MutableArrayRef getDependentInits() { + Expr **Storage = reinterpret_cast( + &*std::next(child_begin(), + getArraysOffset(getDirectiveKind()) + 6 * CollapsedNum)); + return MutableArrayRef(Storage, CollapsedNum); + } + + /// Get the finals conditions storage. + MutableArrayRef getFinalsConditions() { + Expr **Storage = reinterpret_cast( + &*std::next(child_begin(), + getArraysOffset(getDirectiveKind()) + 7 * CollapsedNum)); + return MutableArrayRef(Storage, CollapsedNum); + } + protected: /// Build instance of loop directive of class \a Kind. /// @@ -551,9 +578,10 @@ protected: /// Children number. static unsigned numLoopChildren(unsigned CollapsedNum, OpenMPDirectiveKind Kind) { - return getArraysOffset(Kind) + 5 * CollapsedNum; // Counters, - // PrivateCounters, Inits, - // Updates and Finals + return getArraysOffset(Kind) + + 8 * CollapsedNum; // Counters, PrivateCounters, Inits, + // Updates, Finals, DependentCounters, + // DependentInits, FinalsConditions. } void setIterationVariable(Expr *IV) { @@ -703,6 +731,9 @@ protected: void setInits(ArrayRef A); void setUpdates(ArrayRef A); void setFinals(ArrayRef A); + void setDependentCounters(ArrayRef A); + void setDependentInits(ArrayRef A); + void setFinalsConditions(ArrayRef A); public: /// The expressions built to support OpenMP loops in combined/composite @@ -798,6 +829,15 @@ public: SmallVector Updates; /// Final loop counter values for GodeGen. SmallVector Finals; + /// List of counters required for the generation of the non-rectangular + /// loops. + SmallVector DependentCounters; + /// List of initializers required for the generation of the non-rectangular + /// loops. + SmallVector DependentInits; + /// List of final conditions required for the generation of the + /// non-rectangular loops. + SmallVector FinalsConditions; /// Init statement for all captured expressions. Stmt *PreInits; @@ -813,7 +853,9 @@ public: } /// Initialize all the fields to null. - /// \param Size Number of elements in the counters/finals/updates arrays. + /// \param Size Number of elements in the + /// counters/finals/updates/dependent_counters/dependent_inits/finals_conditions + /// arrays. void clear(unsigned Size) { IterationVarRef = nullptr; LastIteration = nullptr; @@ -839,12 +881,18 @@ public: Inits.resize(Size); Updates.resize(Size); Finals.resize(Size); + DependentCounters.resize(Size); + DependentInits.resize(Size); + FinalsConditions.resize(Size); for (unsigned i = 0; i < Size; ++i) { Counters[i] = nullptr; PrivateCounters[i] = nullptr; Inits[i] = nullptr; Updates[i] = nullptr; Finals[i] = nullptr; + DependentCounters[i] = nullptr; + DependentInits[i] = nullptr; + FinalsConditions[i] = nullptr; } PreInits = nullptr; DistCombinedFields.LB = nullptr; @@ -1040,10 +1088,22 @@ public: // This relies on the loop form is already checked by Sema. const Stmt *Body = getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); - Body = cast(Body)->getBody(); + if (auto *For = dyn_cast(Body)) { + Body = For->getBody(); + } else { + assert(isa(Body) && + "Expected canonical for loop or range-based for loop."); + Body = cast(Body)->getBody(); + } for (unsigned Cnt = 1; Cnt < CollapsedNum; ++Cnt) { Body = Body->IgnoreContainers(); - Body = cast(Body)->getBody(); + if (auto *For = dyn_cast(Body)) { + Body = For->getBody(); + } else { + assert(isa(Body) && + "Expected canonical for loop or range-based for loop."); + Body = cast(Body)->getBody(); + } } return Body; } @@ -1078,6 +1138,24 @@ public: return const_cast(this)->getFinals(); } + ArrayRef dependent_counters() { return getDependentCounters(); } + + ArrayRef dependent_counters() const { + return const_cast(this)->getDependentCounters(); + } + + ArrayRef dependent_inits() { return getDependentInits(); } + + ArrayRef dependent_inits() const { + return const_cast(this)->getDependentInits(); + } + + ArrayRef finals_conditions() { return getFinalsConditions(); } + + ArrayRef finals_conditions() const { + return const_cast(this)->getFinalsConditions(); + } + static bool classof(const Stmt *T) { return T->getStmtClass() == OMPSimdDirectiveClass || T->getStmtClass() == OMPForDirectiveClass || @@ -1086,6 +1164,9 @@ public: T->getStmtClass() == OMPParallelForSimdDirectiveClass || T->getStmtClass() == OMPTaskLoopDirectiveClass || T->getStmtClass() == OMPTaskLoopSimdDirectiveClass || + T->getStmtClass() == OMPMasterTaskLoopDirectiveClass || + T->getStmtClass() == OMPMasterTaskLoopSimdDirectiveClass || + T->getStmtClass() == OMPParallelMasterTaskLoopDirectiveClass || T->getStmtClass() == OMPDistributeDirectiveClass || T->getStmtClass() == OMPTargetParallelForDirectiveClass || T->getStmtClass() == OMPDistributeParallelForDirectiveClass || @@ -3041,6 +3122,211 @@ public: } }; +/// This represents '#pragma omp master taskloop' directive. +/// +/// \code +/// #pragma omp master taskloop private(a,b) grainsize(val) num_tasks(num) +/// \endcode +/// In this example directive '#pragma omp master taskloop' has clauses +/// 'private' with the variables 'a' and 'b', 'grainsize' with expression 'val' +/// and 'num_tasks' with expression 'num'. +/// +class OMPMasterTaskLoopDirective : public OMPLoopDirective { + friend class ASTStmtReader; + /// Build directive with the given start and end location. + /// + /// \param StartLoc Starting location of the directive kind. + /// \param EndLoc Ending location of the directive. + /// \param CollapsedNum Number of collapsed nested loops. + /// \param NumClauses Number of clauses. + /// + OMPMasterTaskLoopDirective(SourceLocation StartLoc, SourceLocation EndLoc, + unsigned CollapsedNum, unsigned NumClauses) + : OMPLoopDirective(this, OMPMasterTaskLoopDirectiveClass, + OMPD_master_taskloop, StartLoc, EndLoc, CollapsedNum, + NumClauses) {} + + /// Build an empty directive. + /// + /// \param CollapsedNum Number of collapsed nested loops. + /// \param NumClauses Number of clauses. + /// + explicit OMPMasterTaskLoopDirective(unsigned CollapsedNum, + unsigned NumClauses) + : OMPLoopDirective(this, OMPMasterTaskLoopDirectiveClass, + OMPD_master_taskloop, SourceLocation(), + SourceLocation(), CollapsedNum, NumClauses) {} + +public: + /// Creates directive with a list of \a Clauses. + /// + /// \param C AST context. + /// \param StartLoc Starting location of the directive kind. + /// \param EndLoc Ending Location of the directive. + /// \param CollapsedNum Number of collapsed loops. + /// \param Clauses List of clauses. + /// \param AssociatedStmt Statement, associated with the directive. + /// \param Exprs Helper expressions for CodeGen. + /// + static OMPMasterTaskLoopDirective * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + unsigned CollapsedNum, ArrayRef Clauses, + Stmt *AssociatedStmt, const HelperExprs &Exprs); + + /// Creates an empty directive with the place + /// for \a NumClauses clauses. + /// + /// \param C AST context. + /// \param CollapsedNum Number of collapsed nested loops. + /// \param NumClauses Number of clauses. + /// + static OMPMasterTaskLoopDirective *CreateEmpty(const ASTContext &C, + unsigned NumClauses, + unsigned CollapsedNum, + EmptyShell); + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPMasterTaskLoopDirectiveClass; + } +}; + +/// This represents '#pragma omp master taskloop simd' directive. +/// +/// \code +/// #pragma omp master taskloop simd private(a,b) grainsize(val) num_tasks(num) +/// \endcode +/// In this example directive '#pragma omp master taskloop simd' has clauses +/// 'private' with the variables 'a' and 'b', 'grainsize' with expression 'val' +/// and 'num_tasks' with expression 'num'. +/// +class OMPMasterTaskLoopSimdDirective : public OMPLoopDirective { + friend class ASTStmtReader; + /// Build directive with the given start and end location. + /// + /// \param StartLoc Starting location of the directive kind. + /// \param EndLoc Ending location of the directive. + /// \param CollapsedNum Number of collapsed nested loops. + /// \param NumClauses Number of clauses. + /// + OMPMasterTaskLoopSimdDirective(SourceLocation StartLoc, SourceLocation EndLoc, + unsigned CollapsedNum, unsigned NumClauses) + : OMPLoopDirective(this, OMPMasterTaskLoopSimdDirectiveClass, + OMPD_master_taskloop_simd, StartLoc, EndLoc, + CollapsedNum, NumClauses) {} + + /// Build an empty directive. + /// + /// \param CollapsedNum Number of collapsed nested loops. + /// \param NumClauses Number of clauses. + /// + explicit OMPMasterTaskLoopSimdDirective(unsigned CollapsedNum, + unsigned NumClauses) + : OMPLoopDirective(this, OMPMasterTaskLoopSimdDirectiveClass, + OMPD_master_taskloop_simd, SourceLocation(), + SourceLocation(), CollapsedNum, NumClauses) {} + +public: + /// Creates directive with a list of \p Clauses. + /// + /// \param C AST context. + /// \param StartLoc Starting location of the directive kind. + /// \param EndLoc Ending Location of the directive. + /// \param CollapsedNum Number of collapsed loops. + /// \param Clauses List of clauses. + /// \param AssociatedStmt Statement, associated with the directive. + /// \param Exprs Helper expressions for CodeGen. + /// + static OMPMasterTaskLoopSimdDirective * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + unsigned CollapsedNum, ArrayRef Clauses, + Stmt *AssociatedStmt, const HelperExprs &Exprs); + + /// Creates an empty directive with the place for \p NumClauses clauses. + /// + /// \param C AST context. + /// \param CollapsedNum Number of collapsed nested loops. + /// \param NumClauses Number of clauses. + /// + static OMPMasterTaskLoopSimdDirective *CreateEmpty(const ASTContext &C, + unsigned NumClauses, + unsigned CollapsedNum, + EmptyShell); + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPMasterTaskLoopSimdDirectiveClass; + } +}; + +/// This represents '#pragma omp parallel master taskloop' directive. +/// +/// \code +/// #pragma omp parallel master taskloop private(a,b) grainsize(val) +/// num_tasks(num) +/// \endcode +/// In this example directive '#pragma omp parallel master taskloop' has clauses +/// 'private' with the variables 'a' and 'b', 'grainsize' with expression 'val' +/// and 'num_tasks' with expression 'num'. +/// +class OMPParallelMasterTaskLoopDirective : public OMPLoopDirective { + friend class ASTStmtReader; + /// Build directive with the given start and end location. + /// + /// \param StartLoc Starting location of the directive kind. + /// \param EndLoc Ending location of the directive. + /// \param CollapsedNum Number of collapsed nested loops. + /// \param NumClauses Number of clauses. + /// + OMPParallelMasterTaskLoopDirective(SourceLocation StartLoc, + SourceLocation EndLoc, + unsigned CollapsedNum, unsigned NumClauses) + : OMPLoopDirective(this, OMPParallelMasterTaskLoopDirectiveClass, + OMPD_parallel_master_taskloop, StartLoc, EndLoc, + CollapsedNum, NumClauses) {} + + /// Build an empty directive. + /// + /// \param CollapsedNum Number of collapsed nested loops. + /// \param NumClauses Number of clauses. + /// + explicit OMPParallelMasterTaskLoopDirective(unsigned CollapsedNum, + unsigned NumClauses) + : OMPLoopDirective(this, OMPParallelMasterTaskLoopDirectiveClass, + OMPD_parallel_master_taskloop, SourceLocation(), + SourceLocation(), CollapsedNum, NumClauses) {} + +public: + /// Creates directive with a list of \a Clauses. + /// + /// \param C AST context. + /// \param StartLoc Starting location of the directive kind. + /// \param EndLoc Ending Location of the directive. + /// \param CollapsedNum Number of collapsed loops. + /// \param Clauses List of clauses. + /// \param AssociatedStmt Statement, associated with the directive. + /// \param Exprs Helper expressions for CodeGen. + /// + static OMPParallelMasterTaskLoopDirective * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + unsigned CollapsedNum, ArrayRef Clauses, + Stmt *AssociatedStmt, const HelperExprs &Exprs); + + /// Creates an empty directive with the place + /// for \a NumClauses clauses. + /// + /// \param C AST context. + /// \param CollapsedNum Number of collapsed nested loops. + /// \param NumClauses Number of clauses. + /// + static OMPParallelMasterTaskLoopDirective *CreateEmpty(const ASTContext &C, + unsigned NumClauses, + unsigned CollapsedNum, + EmptyShell); + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPParallelMasterTaskLoopDirectiveClass; + } +}; + /// This represents '#pragma omp distribute' directive. /// /// \code diff --git a/include/clang/AST/TextNodeDumper.h b/include/clang/AST/TextNodeDumper.h index 4c2d0710963..0ff5a614a86 100644 --- a/include/clang/AST/TextNodeDumper.h +++ b/include/clang/AST/TextNodeDumper.h @@ -146,8 +146,6 @@ class TextNodeDumper const comments::CommandTraits *Traits; - const ASTContext *Context; - const char *getCommandName(unsigned CommandID); public: diff --git a/include/clang/AST/Type.h b/include/clang/AST/Type.h index 584655fe789..c9238e95210 100644 --- a/include/clang/AST/Type.h +++ b/include/clang/AST/Type.h @@ -126,7 +126,7 @@ using CanQualType = CanQual; // Provide forward declarations for all of the *Type classes. #define TYPE(Class, Base) class Class##Type; -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" /// The collection of all-type qualifiers we support. /// Clang supports five independent qualifiers: @@ -972,6 +972,9 @@ public: friend bool operator!=(const QualType &LHS, const QualType &RHS) { return LHS.Value != RHS.Value; } + friend bool operator<(const QualType &LHS, const QualType &RHS) { + return LHS.Value < RHS.Value; + } static std::string getAsString(SplitQualType split, const PrintingPolicy &Policy) { @@ -1434,10 +1437,9 @@ class alignas(8) Type : public ExtQualsTypeCommonBase { public: enum TypeClass { #define TYPE(Class, Base) Class, -#define LAST_TYPE(Class) TypeLast = Class, +#define LAST_TYPE(Class) TypeLast = Class #define ABSTRACT_TYPE(Class, Base) -#include "clang/AST/TypeNodes.def" - TagFirst = Record, TagLast = Enum +#include "clang/AST/TypeNodes.inc" }; private: @@ -1511,6 +1513,15 @@ protected: unsigned SizeModifier : 3; }; + class ConstantArrayTypeBitfields { + friend class ConstantArrayType; + + unsigned : NumTypeBits + 3 + 3; + + /// Whether we have a stored size expression. + unsigned HasStoredSizeExpr : 1; + }; + class BuiltinTypeBitfields { friend class BuiltinType; @@ -1732,6 +1743,7 @@ protected: union { TypeBitfields TypeBits; ArrayTypeBitfields ArrayTypeBits; + ConstantArrayTypeBitfields ConstantArrayTypeBits; AttributedTypeBitfields AttributedTypeBits; AutoTypeBitfields AutoTypeBits; BuiltinTypeBitfields BuiltinTypeBits; @@ -2053,6 +2065,7 @@ public: bool isCARCBridgableType() const; bool isTemplateTypeParmType() const; // C++ template type parameter bool isNullPtrType() const; // C++11 std::nullptr_t + bool isNothrowT() const; // C++ std::nothrow_t bool isAlignValT() const; // C++17 std::align_val_t bool isStdByteType() const; // C++17 std::byte bool isAtomicType() const; // C11 _Atomic() @@ -2416,7 +2429,7 @@ template <> inline const Class##Type *Type::getAs() const { \ template <> inline const Class##Type *Type::castAs() const { \ return cast(CanonicalType); \ } -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" /// This class is used for builtin types like 'int'. Builtin /// types are always canonical and have a literal name field. @@ -2429,6 +2442,9 @@ public: // OpenCL extension types #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) Id, #include "clang/Basic/OpenCLExtensionTypes.def" +// SVE Types +#define SVE_TYPE(Name, Id, SingletonId) Id, +#include "clang/Basic/AArch64SVEACLETypes.def" // All other builtin types #define BUILTIN_TYPE(Id, SingletonId) Id, #define LAST_BUILTIN_TYPE(Id) LastKind = Id @@ -2858,22 +2874,8 @@ private: protected: friend class ASTContext; // ASTContext creates these. - // C++ [temp.dep.type]p1: - // A type is dependent if it is... - // - an array type constructed from any dependent type or whose - // size is specified by a constant expression that is - // value-dependent, - ArrayType(TypeClass tc, QualType et, QualType can, - ArraySizeModifier sm, unsigned tq, - bool ContainsUnexpandedParameterPack) - : Type(tc, can, et->isDependentType() || tc == DependentSizedArray, - et->isInstantiationDependentType() || tc == DependentSizedArray, - (tc == VariableArray || et->isVariablyModifiedType()), - ContainsUnexpandedParameterPack), - ElementType(et) { - ArrayTypeBits.IndexTypeQuals = tq; - ArrayTypeBits.SizeModifier = sm; - } + ArrayType(TypeClass tc, QualType et, QualType can, ArraySizeModifier sm, + unsigned tq, const Expr *sz = nullptr); public: QualType getElementType() const { return ElementType; } @@ -2901,25 +2903,35 @@ public: /// Represents the canonical version of C arrays with a specified constant size. /// For example, the canonical type for 'int A[4 + 4*100]' is a /// ConstantArrayType where the element type is 'int' and the size is 404. -class ConstantArrayType : public ArrayType { +class ConstantArrayType final + : public ArrayType, + private llvm::TrailingObjects { + friend class ASTContext; // ASTContext creates these. + friend TrailingObjects; + llvm::APInt Size; // Allows us to unique the type. ConstantArrayType(QualType et, QualType can, const llvm::APInt &size, - ArraySizeModifier sm, unsigned tq) - : ArrayType(ConstantArray, et, can, sm, tq, - et->containsUnexpandedParameterPack()), - Size(size) {} + const Expr *sz, ArraySizeModifier sm, unsigned tq) + : ArrayType(ConstantArray, et, can, sm, tq, sz), Size(size) { + ConstantArrayTypeBits.HasStoredSizeExpr = sz != nullptr; + if (ConstantArrayTypeBits.HasStoredSizeExpr) { + assert(!can.isNull() && "canonical constant array should not have size"); + *getTrailingObjects() = sz; + } + } -protected: - friend class ASTContext; // ASTContext creates these. - - ConstantArrayType(TypeClass tc, QualType et, QualType can, - const llvm::APInt &size, ArraySizeModifier sm, unsigned tq) - : ArrayType(tc, et, can, sm, tq, et->containsUnexpandedParameterPack()), - Size(size) {} + unsigned numTrailingObjects(OverloadToken) const { + return ConstantArrayTypeBits.HasStoredSizeExpr; + } public: const llvm::APInt &getSize() const { return Size; } + const Expr *getSizeExpr() const { + return ConstantArrayTypeBits.HasStoredSizeExpr + ? *getTrailingObjects() + : nullptr; + } bool isSugared() const { return false; } QualType desugar() const { return QualType(this, 0); } @@ -2933,19 +2945,15 @@ public: /// can require, which limits the maximum size of the array. static unsigned getMaxSizeBits(const ASTContext &Context); - void Profile(llvm::FoldingSetNodeID &ID) { - Profile(ID, getElementType(), getSize(), + void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Ctx) { + Profile(ID, Ctx, getElementType(), getSize(), getSizeExpr(), getSizeModifier(), getIndexTypeCVRQualifiers()); } - static void Profile(llvm::FoldingSetNodeID &ID, QualType ET, - const llvm::APInt &ArraySize, ArraySizeModifier SizeMod, - unsigned TypeQuals) { - ID.AddPointer(ET.getAsOpaquePtr()); - ID.AddInteger(ArraySize.getZExtValue()); - ID.AddInteger(SizeMod); - ID.AddInteger(TypeQuals); - } + static void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Ctx, + QualType ET, const llvm::APInt &ArraySize, + const Expr *SizeExpr, ArraySizeModifier SizeMod, + unsigned TypeQuals); static bool classof(const Type *T) { return T->getTypeClass() == ConstantArray; @@ -2960,8 +2968,7 @@ class IncompleteArrayType : public ArrayType { IncompleteArrayType(QualType et, QualType can, ArraySizeModifier sm, unsigned tq) - : ArrayType(IncompleteArray, et, can, sm, tq, - et->containsUnexpandedParameterPack()) {} + : ArrayType(IncompleteArray, et, can, sm, tq) {} public: friend class StmtIteratorBase; @@ -3013,8 +3020,7 @@ class VariableArrayType : public ArrayType { VariableArrayType(QualType et, QualType can, Expr *e, ArraySizeModifier sm, unsigned tq, SourceRange brackets) - : ArrayType(VariableArray, et, can, sm, tq, - et->containsUnexpandedParameterPack()), + : ArrayType(VariableArray, et, can, sm, tq, e), SizeExpr((Stmt*) e), Brackets(brackets) {} public: @@ -4429,7 +4435,7 @@ public: bool isBeingDefined() const; static bool classof(const Type *T) { - return T->getTypeClass() >= TagFirst && T->getTypeClass() <= TagLast; + return T->getTypeClass() == Enum || T->getTypeClass() == Record; } }; @@ -5563,7 +5569,7 @@ class ObjCTypeParamType : public Type, public: bool isSugared() const { return true; } - QualType desugar() const { return getCanonicalTypeInternal(); } + QualType desugar() const; static bool classof(const Type *T) { return T->getTypeClass() == ObjCTypeParam; @@ -6347,6 +6353,7 @@ inline bool QualType::isCForbiddenLValueType() const { /// \returns True for types specified in C++0x [basic.fundamental]. inline bool Type::isFundamentalType() const { return isVoidType() || + isNullPtrType() || // FIXME: It's really annoying that we don't have an // 'isArithmeticType()' which agrees with the standard definition. (isArithmeticType() && !isEnumeralType()); diff --git a/include/clang/AST/TypeLoc.h b/include/clang/AST/TypeLoc.h index 40d17f991f1..f305680d775 100644 --- a/include/clang/AST/TypeLoc.h +++ b/include/clang/AST/TypeLoc.h @@ -106,7 +106,7 @@ public: #define ABSTRACT_TYPE(Class, Base) #define TYPE(Class, Base) \ Class = Type::Class, -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" Qualified }; diff --git a/include/clang/AST/TypeLocNodes.def b/include/clang/AST/TypeLocNodes.def index c0dfe150d6c..81448c7e7ce 100644 --- a/include/clang/AST/TypeLocNodes.def +++ b/include/clang/AST/TypeLocNodes.def @@ -31,7 +31,7 @@ TYPELOC(Qualified, TypeLoc) #define TYPE(Class, Base) UNQUAL_TYPELOC(Class, Base##Loc) #define ABSTRACT_TYPE(Class, Base) ABSTRACT_TYPELOC(Class, Base##Loc) -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" #undef DECLARATOR_TYPELOC #undef TYPESPEC_TYPELOC diff --git a/include/clang/AST/TypeNodes.def b/include/clang/AST/TypeNodes.def deleted file mode 100644 index 58a5f880cbe..00000000000 --- a/include/clang/AST/TypeNodes.def +++ /dev/null @@ -1,135 +0,0 @@ -//===-- TypeNodes.def - Metadata about Type AST nodes -----------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the AST type info database. Each type node is -// enumerated by providing its name (e.g., "Builtin" or "Enum") and -// base class (e.g., "Type" or "TagType"). Depending on where in the -// abstract syntax tree the type will show up, the enumeration uses -// one of five different macros: -// -// TYPE(Class, Base) - A type that can show up anywhere in the AST, -// and might be dependent, canonical, or non-canonical. All clients -// will need to understand these types. -// -// ABSTRACT_TYPE(Class, Base) - An abstract class that shows up in -// the type hierarchy but has no concrete instances. -// -// NON_CANONICAL_TYPE(Class, Base) - A type that can show up -// anywhere in the AST but will never be a part of a canonical -// type. Clients that only need to deal with canonical types -// (ignoring, e.g., typedefs and other type aliases used for -// pretty-printing) can ignore these types. -// -// DEPENDENT_TYPE(Class, Base) - A type that will only show up -// within a C++ template that has not been instantiated, e.g., a -// type that is always dependent. Clients that do not need to deal -// with uninstantiated C++ templates can ignore these types. -// -// NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) - A type that -// is non-canonical unless it is dependent. Defaults to TYPE because -// it is neither reliably dependent nor reliably non-canonical. -// -// There is a sixth macro, independent of the others. Most clients -// will not need to use it. -// -// LEAF_TYPE(Class) - A type that never has inner types. Clients -// which can operate on such types more efficiently may wish to do so. -// -//===----------------------------------------------------------------------===// - -#ifndef ABSTRACT_TYPE -# define ABSTRACT_TYPE(Class, Base) TYPE(Class, Base) -#endif - -#ifndef NON_CANONICAL_TYPE -# define NON_CANONICAL_TYPE(Class, Base) TYPE(Class, Base) -#endif - -#ifndef DEPENDENT_TYPE -# define DEPENDENT_TYPE(Class, Base) TYPE(Class, Base) -#endif - -#ifndef NON_CANONICAL_UNLESS_DEPENDENT_TYPE -# define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) TYPE(Class, Base) -#endif - -TYPE(Builtin, Type) -TYPE(Complex, Type) -TYPE(Pointer, Type) -TYPE(BlockPointer, Type) -ABSTRACT_TYPE(Reference, Type) -TYPE(LValueReference, ReferenceType) -TYPE(RValueReference, ReferenceType) -TYPE(MemberPointer, Type) -ABSTRACT_TYPE(Array, Type) -TYPE(ConstantArray, ArrayType) -TYPE(IncompleteArray, ArrayType) -TYPE(VariableArray, ArrayType) -DEPENDENT_TYPE(DependentSizedArray, ArrayType) -DEPENDENT_TYPE(DependentSizedExtVector, Type) -DEPENDENT_TYPE(DependentAddressSpace, Type) -TYPE(Vector, Type) -DEPENDENT_TYPE(DependentVector, Type) -TYPE(ExtVector, VectorType) -ABSTRACT_TYPE(Function, Type) -TYPE(FunctionProto, FunctionType) -TYPE(FunctionNoProto, FunctionType) -DEPENDENT_TYPE(UnresolvedUsing, Type) -NON_CANONICAL_TYPE(Paren, Type) -NON_CANONICAL_TYPE(Typedef, Type) -NON_CANONICAL_TYPE(MacroQualified, Type) -NON_CANONICAL_TYPE(Adjusted, Type) -NON_CANONICAL_TYPE(Decayed, AdjustedType) -NON_CANONICAL_UNLESS_DEPENDENT_TYPE(TypeOfExpr, Type) -NON_CANONICAL_UNLESS_DEPENDENT_TYPE(TypeOf, Type) -NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Decltype, Type) -NON_CANONICAL_UNLESS_DEPENDENT_TYPE(UnaryTransform, Type) -ABSTRACT_TYPE(Tag, Type) -TYPE(Record, TagType) -TYPE(Enum, TagType) -NON_CANONICAL_TYPE(Elaborated, Type) -NON_CANONICAL_TYPE(Attributed, Type) -DEPENDENT_TYPE(TemplateTypeParm, Type) -NON_CANONICAL_TYPE(SubstTemplateTypeParm, Type) -DEPENDENT_TYPE(SubstTemplateTypeParmPack, Type) -NON_CANONICAL_UNLESS_DEPENDENT_TYPE(TemplateSpecialization, Type) -ABSTRACT_TYPE(Deduced, Type) -TYPE(Auto, DeducedType) -TYPE(DeducedTemplateSpecialization, DeducedType) -DEPENDENT_TYPE(InjectedClassName, Type) -DEPENDENT_TYPE(DependentName, Type) -DEPENDENT_TYPE(DependentTemplateSpecialization, Type) -NON_CANONICAL_UNLESS_DEPENDENT_TYPE(PackExpansion, Type) -NON_CANONICAL_TYPE(ObjCTypeParam, Type) -TYPE(ObjCObject, Type) -TYPE(ObjCInterface, ObjCObjectType) -TYPE(ObjCObjectPointer, Type) -TYPE(Pipe, Type) -TYPE(Atomic, Type) - -#ifdef LAST_TYPE -LAST_TYPE(Atomic) -#undef LAST_TYPE -#endif - -// These types are always leaves in the type hierarchy. -#ifdef LEAF_TYPE -LEAF_TYPE(Enum) -LEAF_TYPE(Builtin) -LEAF_TYPE(Record) -LEAF_TYPE(InjectedClassName) -LEAF_TYPE(ObjCInterface) -LEAF_TYPE(TemplateTypeParm) -#undef LEAF_TYPE -#endif - -#undef NON_CANONICAL_UNLESS_DEPENDENT_TYPE -#undef DEPENDENT_TYPE -#undef NON_CANONICAL_TYPE -#undef ABSTRACT_TYPE -#undef TYPE diff --git a/include/clang/AST/TypeVisitor.h b/include/clang/AST/TypeVisitor.h index 8930ec85394..17301835fb1 100644 --- a/include/clang/AST/TypeVisitor.h +++ b/include/clang/AST/TypeVisitor.h @@ -70,7 +70,7 @@ public: switch (T->getTypeClass()) { #define ABSTRACT_TYPE(CLASS, PARENT) #define TYPE(CLASS, PARENT) case Type::CLASS: DISPATCH(CLASS##Type); -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" } llvm_unreachable("Unknown type class!"); } @@ -80,7 +80,7 @@ public: #define TYPE(CLASS, PARENT) RetTy Visit##CLASS##Type(const CLASS##Type *T) { \ DISPATCH(PARENT); \ } -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" /// Method called if \c ImpClass doesn't provide specific handler /// for some type class. diff --git a/include/clang/ASTMatchers/ASTMatchers.h b/include/clang/ASTMatchers/ASTMatchers.h index 063d8217d9a..e34b31cbda8 100644 --- a/include/clang/ASTMatchers/ASTMatchers.h +++ b/include/clang/ASTMatchers/ASTMatchers.h @@ -19,15 +19,15 @@ // // For more complicated match expressions we're often interested in accessing // multiple parts of the matched AST nodes once a match is found. In that case, -// use the id(...) matcher around the match expressions that match the nodes -// you want to access. +// call `.bind("name")` on match expressions that match the nodes you want to +// access. // // For example, when we're interested in child classes of a certain class, we // would write: -// cxxRecordDecl(hasName("MyClass"), has(id("child", recordDecl()))) +// cxxRecordDecl(hasName("MyClass"), has(recordDecl().bind("child"))) // When the match is found via the MatchFinder, a user provided callback will // be called with a BoundNodes instance that contains a mapping from the -// strings that we provided for the id(...) calls to the nodes that were +// strings that we provided for the `.bind()` calls to the nodes that were // matched. // In the given example, each time our matcher finds a match we get a callback // where "child" is bound to the RecordDecl node of the matching child @@ -131,15 +131,6 @@ private: internal::BoundNodesMap MyBoundNodes; }; -/// If the provided matcher matches a node, binds the node to \c ID. -/// -/// FIXME: Do we want to support this now that we have bind()? -template -internal::Matcher id(StringRef ID, - const internal::BindableMatcher &InnerMatcher) { - return InnerMatcher.bind(ID); -} - /// Types of matchers for the top-level classes in the AST class /// hierarchy. /// @{ @@ -2611,8 +2602,9 @@ hasOverloadedOperatorName(StringRef Name) { AST_POLYMORPHIC_SUPPORTED_TYPES(CXXOperatorCallExpr, FunctionDecl)>(Name); } -/// Matches C++ classes that are directly or indirectly derived from -/// a class matching \c Base. +/// Matches C++ classes that are directly or indirectly derived from a class +/// matching \c Base, or Objective-C classes that directly or indirectly +/// subclass a class matching \c Base. /// /// Note that a class is not considered to be derived from itself. /// @@ -2632,33 +2624,128 @@ hasOverloadedOperatorName(StringRef Name) { /// typedef Foo X; /// class Bar : public Foo {}; // derived from a type that X is a typedef of /// \endcode -AST_MATCHER_P(CXXRecordDecl, isDerivedFrom, - internal::Matcher, Base) { - return Finder->classIsDerivedFrom(&Node, Base, Builder); +/// +/// In the following example, Bar matches isDerivedFrom(hasName("NSObject")) +/// \code +/// @interface NSObject @end +/// @interface Bar : NSObject @end +/// \endcode +/// +/// Usable as: Matcher, Matcher +AST_POLYMORPHIC_MATCHER_P( + isDerivedFrom, + AST_POLYMORPHIC_SUPPORTED_TYPES(CXXRecordDecl, ObjCInterfaceDecl), + internal::Matcher, Base) { + // Check if the node is a C++ struct/union/class. + if (const auto *RD = dyn_cast(&Node)) + return Finder->classIsDerivedFrom(RD, Base, Builder, /*Directly=*/false); + + // The node must be an Objective-C class. + const auto *InterfaceDecl = cast(&Node); + return Finder->objcClassIsDerivedFrom(InterfaceDecl, Base, Builder, + /*Directly=*/false); } /// Overloaded method as shortcut for \c isDerivedFrom(hasName(...)). -AST_MATCHER_P_OVERLOAD(CXXRecordDecl, isDerivedFrom, std::string, BaseName, 1) { - assert(!BaseName.empty()); - return isDerivedFrom(hasName(BaseName)).matches(Node, Finder, Builder); +AST_POLYMORPHIC_MATCHER_P_OVERLOAD( + isDerivedFrom, + AST_POLYMORPHIC_SUPPORTED_TYPES(CXXRecordDecl, ObjCInterfaceDecl), + std::string, BaseName, 1) { + if (BaseName.empty()) + return false; + + const auto M = isDerivedFrom(hasName(BaseName)); + + if (const auto *RD = dyn_cast(&Node)) + return Matcher(M).matches(*RD, Finder, Builder); + + const auto *InterfaceDecl = cast(&Node); + return Matcher(M).matches(*InterfaceDecl, Finder, Builder); } /// Similar to \c isDerivedFrom(), but also matches classes that directly /// match \c Base. -AST_MATCHER_P_OVERLOAD(CXXRecordDecl, isSameOrDerivedFrom, - internal::Matcher, Base, 0) { - return Matcher(anyOf(Base, isDerivedFrom(Base))) - .matches(Node, Finder, Builder); +AST_POLYMORPHIC_MATCHER_P_OVERLOAD( + isSameOrDerivedFrom, + AST_POLYMORPHIC_SUPPORTED_TYPES(CXXRecordDecl, ObjCInterfaceDecl), + internal::Matcher, Base, 0) { + const auto M = anyOf(Base, isDerivedFrom(Base)); + + if (const auto *RD = dyn_cast(&Node)) + return Matcher(M).matches(*RD, Finder, Builder); + + const auto *InterfaceDecl = cast(&Node); + return Matcher(M).matches(*InterfaceDecl, Finder, Builder); } /// Overloaded method as shortcut for /// \c isSameOrDerivedFrom(hasName(...)). -AST_MATCHER_P_OVERLOAD(CXXRecordDecl, isSameOrDerivedFrom, std::string, - BaseName, 1) { - assert(!BaseName.empty()); - return isSameOrDerivedFrom(hasName(BaseName)).matches(Node, Finder, Builder); +AST_POLYMORPHIC_MATCHER_P_OVERLOAD( + isSameOrDerivedFrom, + AST_POLYMORPHIC_SUPPORTED_TYPES(CXXRecordDecl, ObjCInterfaceDecl), + std::string, BaseName, 1) { + if (BaseName.empty()) + return false; + + const auto M = isSameOrDerivedFrom(hasName(BaseName)); + + if (const auto *RD = dyn_cast(&Node)) + return Matcher(M).matches(*RD, Finder, Builder); + + const auto *InterfaceDecl = cast(&Node); + return Matcher(M).matches(*InterfaceDecl, Finder, Builder); } +/// Matches C++ or Objective-C classes that are directly derived from a class +/// matching \c Base. +/// +/// Note that a class is not considered to be derived from itself. +/// +/// Example matches Y, C (Base == hasName("X")) +/// \code +/// class X; +/// class Y : public X {}; // directly derived +/// class Z : public Y {}; // indirectly derived +/// typedef X A; +/// typedef A B; +/// class C : public B {}; // derived from a typedef of X +/// \endcode +/// +/// In the following example, Bar matches isDerivedFrom(hasName("X")): +/// \code +/// class Foo; +/// typedef Foo X; +/// class Bar : public Foo {}; // derived from a type that X is a typedef of +/// \endcode +AST_POLYMORPHIC_MATCHER_P_OVERLOAD( + isDirectlyDerivedFrom, + AST_POLYMORPHIC_SUPPORTED_TYPES(CXXRecordDecl, ObjCInterfaceDecl), + internal::Matcher, Base, 0) { + // Check if the node is a C++ struct/union/class. + if (const auto *RD = dyn_cast(&Node)) + return Finder->classIsDerivedFrom(RD, Base, Builder, /*Directly=*/true); + + // The node must be an Objective-C class. + const auto *InterfaceDecl = cast(&Node); + return Finder->objcClassIsDerivedFrom(InterfaceDecl, Base, Builder, + /*Directly=*/true); +} + +/// Overloaded method as shortcut for \c isDirectlyDerivedFrom(hasName(...)). +AST_POLYMORPHIC_MATCHER_P_OVERLOAD( + isDirectlyDerivedFrom, + AST_POLYMORPHIC_SUPPORTED_TYPES(CXXRecordDecl, ObjCInterfaceDecl), + std::string, BaseName, 1) { + if (BaseName.empty()) + return false; + const auto M = isDirectlyDerivedFrom(hasName(BaseName)); + + if (const auto *RD = dyn_cast(&Node)) + return Matcher(M).matches(*RD, Finder, Builder); + + const auto *InterfaceDecl = cast(&Node); + return Matcher(M).matches(*InterfaceDecl, Finder, Builder); +} /// Matches the first method of a class or struct that satisfies \c /// InnerMatcher. /// @@ -6358,10 +6445,9 @@ extern const internal::VariadicDynCastAllOfMatcher /// expr(nullPointerConstant()) /// matches the initializer for v1, v2, v3, cp, and ip. Does not match the /// initializer for i. -AST_MATCHER_FUNCTION(internal::Matcher, nullPointerConstant) { - return anyOf( - gnuNullExpr(), cxxNullPtrLiteralExpr(), - integerLiteral(equals(0), hasParent(expr(hasType(pointerType()))))); +AST_MATCHER(Expr, nullPointerConstant) { + return Node.isNullPointerConstant(Finder->getASTContext(), + Expr::NPC_ValueDependentIsNull); } /// Matches declaration of the function the statement belongs to @@ -6375,7 +6461,7 @@ AST_MATCHER_FUNCTION(internal::Matcher, nullPointerConstant) { /// \endcode /// returnStmt(forFunction(hasName("operator="))) /// matches 'return *this' -/// but does match 'return > 0' +/// but does not match 'return v > 0' AST_MATCHER_P(Stmt, forFunction, internal::Matcher, InnerMatcher) { const auto &Parents = Finder->getASTContext().getParents(Node); @@ -6498,14 +6584,15 @@ AST_MATCHER(FunctionDecl, hasTrailingReturn) { } /// Matches expressions that match InnerMatcher that are possibly wrapped in an -/// elidable constructor. +/// elidable constructor and other corresponding bookkeeping nodes. /// -/// In C++17 copy elidable constructors are no longer being -/// generated in the AST as it is not permitted by the standard. They are -/// however part of the AST in C++14 and earlier. Therefore, to write a matcher -/// that works in all language modes, the matcher has to skip elidable -/// constructor AST nodes if they appear in the AST. This matcher can be used to -/// skip those elidable constructors. +/// In C++17, elidable copy constructors are no longer being generated in the +/// AST as it is not permitted by the standard. They are, however, part of the +/// AST in C++14 and earlier. So, a matcher must abstract over these differences +/// to work in all language modes. This matcher skips elidable constructor-call +/// AST nodes, `ExprWithCleanups` nodes wrapping elidable constructor-calls and +/// various implicit nodes inside the constructor calls, all of which will not +/// appear in the C++17 AST. /// /// Given /// @@ -6517,13 +6604,20 @@ AST_MATCHER(FunctionDecl, hasTrailingReturn) { /// } /// \endcode /// -/// ``varDecl(hasInitializer(any( -/// ignoringElidableConstructorCall(callExpr()), -/// exprWithCleanups(ignoringElidableConstructorCall(callExpr()))))`` -/// matches ``H D = G()`` +/// ``varDecl(hasInitializer(ignoringElidableConstructorCall(callExpr())))`` +/// matches ``H D = G()`` in C++11 through C++17 (and beyond). AST_MATCHER_P(Expr, ignoringElidableConstructorCall, ast_matchers::internal::Matcher, InnerMatcher) { - if (const auto *CtorExpr = dyn_cast(&Node)) { + // E tracks the node that we are examining. + const Expr *E = &Node; + // If present, remove an outer `ExprWithCleanups` corresponding to the + // underlying `CXXConstructExpr`. This check won't cover all cases of added + // `ExprWithCleanups` corresponding to `CXXConstructExpr` nodes (because the + // EWC is placed on the outermost node of the expression, which this may not + // be), but, it still improves the coverage of this matcher. + if (const auto *CleanupsExpr = dyn_cast(&Node)) + E = CleanupsExpr->getSubExpr(); + if (const auto *CtorExpr = dyn_cast(E)) { if (CtorExpr->isElidable()) { if (const auto *MaterializeTemp = dyn_cast(CtorExpr->getArg(0))) { diff --git a/include/clang/ASTMatchers/ASTMatchersInternal.h b/include/clang/ASTMatchers/ASTMatchersInternal.h index b1bb0bfa321..e9fa920b6bc 100644 --- a/include/clang/ASTMatchers/ASTMatchersInternal.h +++ b/include/clang/ASTMatchers/ASTMatchersInternal.h @@ -183,7 +183,8 @@ public: /// Note that we're using std::map here, as for memoization: /// - we need a comparison operator /// - we need an assignment operator - using IDToNodeMap = std::map; + using IDToNodeMap = + std::map>; const IDToNodeMap &getMap() const { return NodeMap; @@ -971,13 +972,23 @@ public: virtual ~ASTMatchFinder() = default; - /// Returns true if the given class is directly or indirectly derived + /// Returns true if the given C++ class is directly or indirectly derived /// from a base type matching \c base. /// - /// A class is considered to be also derived from itself. + /// A class is not considered to be derived from itself. virtual bool classIsDerivedFrom(const CXXRecordDecl *Declaration, const Matcher &Base, - BoundNodesTreeBuilder *Builder) = 0; + BoundNodesTreeBuilder *Builder, + bool Directly) = 0; + + /// Returns true if the given Objective-C class is directly or indirectly + /// derived from a base class matching \c base. + /// + /// A class is not considered to be derived from itself. + virtual bool objcClassIsDerivedFrom(const ObjCInterfaceDecl *Declaration, + const Matcher &Base, + BoundNodesTreeBuilder *Builder, + bool Directly) = 0; template bool matchesChildOf(const T &Node, const DynTypedMatcher &Matcher, @@ -1315,7 +1326,7 @@ class ForEachMatcher : public WrapperMatcherInterface { /// /// Input matchers can have any type (including other polymorphic matcher /// types), and the actual Matcher is generated on demand with an implicit -/// coversion operator. +/// conversion operator. template class VariadicOperatorMatcher { public: VariadicOperatorMatcher(DynTypedMatcher::VariadicOperator Op, Ps &&... Params) @@ -1324,14 +1335,14 @@ public: template operator Matcher() const { return DynTypedMatcher::constructVariadic( Op, ast_type_traits::ASTNodeKind::getFromNodeKind(), - getMatchers(llvm::index_sequence_for())) + getMatchers(std::index_sequence_for())) .template unconditionalConvertTo(); } private: // Helper method to unpack the tuple into a vector. template - std::vector getMatchers(llvm::index_sequence) const { + std::vector getMatchers(std::index_sequence) const { return {Matcher(std::get(Params))...}; } diff --git a/include/clang/Analysis/AnalysisDeclContext.h b/include/clang/Analysis/AnalysisDeclContext.h index 1961d571e9e..9faa78cde89 100644 --- a/include/clang/Analysis/AnalysisDeclContext.h +++ b/include/clang/Analysis/AnalysisDeclContext.h @@ -183,9 +183,8 @@ public: const ImplicitParamDecl *getSelfDecl() const; const StackFrameContext *getStackFrame(LocationContext const *Parent, - const Stmt *S, - const CFGBlock *Blk, - unsigned Idx); + const Stmt *S, const CFGBlock *Blk, + unsigned BlockCount, unsigned Idx); const BlockInvocationContext * getBlockInvocationContext(const LocationContext *parent, @@ -258,7 +257,7 @@ public: return getAnalysisDeclContext()->getAnalysis(); } - ParentMap &getParentMap() const { + const ParentMap &getParentMap() const { return getAnalysisDeclContext()->getParentMap(); } @@ -303,15 +302,19 @@ class StackFrameContext : public LocationContext { // The parent block of the callsite. const CFGBlock *Block; + // The number of times the 'Block' has been visited. + // It allows discriminating between stack frames of the same call that is + // called multiple times in a loop. + const unsigned BlockCount; + // The index of the callsite in the CFGBlock. - unsigned Index; + const unsigned Index; StackFrameContext(AnalysisDeclContext *ctx, const LocationContext *parent, - const Stmt *s, const CFGBlock *blk, - unsigned idx, - int64_t ID) - : LocationContext(StackFrame, ctx, parent, ID), CallSite(s), - Block(blk), Index(idx) {} + const Stmt *s, const CFGBlock *blk, unsigned blockCount, + unsigned idx, int64_t ID) + : LocationContext(StackFrame, ctx, parent, ID), CallSite(s), Block(blk), + BlockCount(blockCount), Index(idx) {} public: ~StackFrameContext() override = default; @@ -329,9 +332,10 @@ public: static void Profile(llvm::FoldingSetNodeID &ID, AnalysisDeclContext *ctx, const LocationContext *parent, const Stmt *s, - const CFGBlock *blk, unsigned idx) { + const CFGBlock *blk, unsigned blockCount, unsigned idx) { ProfileCommon(ID, StackFrame, ctx, parent, s); ID.AddPointer(blk); + ID.AddInteger(blockCount); ID.AddInteger(idx); } @@ -410,8 +414,8 @@ public: const StackFrameContext *getStackFrame(AnalysisDeclContext *ctx, const LocationContext *parent, - const Stmt *s, - const CFGBlock *blk, unsigned idx); + const Stmt *s, const CFGBlock *blk, + unsigned blockCount, unsigned idx); const ScopeContext *getScope(AnalysisDeclContext *ctx, const LocationContext *parent, @@ -483,26 +487,25 @@ public: bool synthesizeBodies() const { return SynthesizeBodies; } const StackFrameContext *getStackFrame(AnalysisDeclContext *Ctx, - LocationContext const *Parent, - const Stmt *S, - const CFGBlock *Blk, - unsigned Idx) { - return LocContexts.getStackFrame(Ctx, Parent, S, Blk, Idx); + const LocationContext *Parent, + const Stmt *S, const CFGBlock *Blk, + unsigned BlockCount, unsigned Idx) { + return LocContexts.getStackFrame(Ctx, Parent, S, Blk, BlockCount, Idx); } // Get the top level stack frame. const StackFrameContext *getStackFrame(const Decl *D) { return LocContexts.getStackFrame(getContext(D), nullptr, nullptr, nullptr, - 0); + 0, 0); } // Get a stack frame with parent. StackFrameContext const *getStackFrame(const Decl *D, - LocationContext const *Parent, - const Stmt *S, - const CFGBlock *Blk, - unsigned Idx) { - return LocContexts.getStackFrame(getContext(D), Parent, S, Blk, Idx); + const LocationContext *Parent, + const Stmt *S, const CFGBlock *Blk, + unsigned BlockCount, unsigned Idx) { + return LocContexts.getStackFrame(getContext(D), Parent, S, Blk, BlockCount, + Idx); } /// Get a reference to {@code BodyFarm} instance. diff --git a/include/clang/Analysis/CFG.h b/include/clang/Analysis/CFG.h index 277b2292e5e..a8301a0e006 100644 --- a/include/clang/Analysis/CFG.h +++ b/include/clang/Analysis/CFG.h @@ -121,6 +121,12 @@ public: x |= Data1.getInt(); return (Kind) x; } + + void dumpToStream(llvm::raw_ostream &OS) const; + + void dump() const { + dumpToStream(llvm::errs()); + } }; class CFGStmt : public CFGElement { @@ -610,6 +616,153 @@ class CFGBlock { bool empty() const { return Impl.empty(); } }; + /// A convenience class for comparing CFGElements, since methods of CFGBlock + /// like operator[] return CFGElements by value. This is practically a wrapper + /// around a (CFGBlock, Index) pair. + template class ElementRefImpl { + + template friend class ElementRefImpl; + + using CFGBlockPtr = + typename std::conditional::type; + + using CFGElementPtr = typename std::conditional::type; + + protected: + CFGBlockPtr Parent; + size_t Index; + + public: + ElementRefImpl(CFGBlockPtr Parent, size_t Index) + : Parent(Parent), Index(Index) {} + + template + ElementRefImpl(ElementRefImpl Other) + : ElementRefImpl(Other.Parent, Other.Index) {} + + size_t getIndexInBlock() const { return Index; } + + CFGBlockPtr getParent() { return Parent; } + CFGBlockPtr getParent() const { return Parent; } + + bool operator<(ElementRefImpl Other) const { + return std::make_pair(Parent, Index) < + std::make_pair(Other.Parent, Other.Index); + } + + bool operator==(ElementRefImpl Other) const { + return Parent == Other.Parent && Index == Other.Index; + } + + bool operator!=(ElementRefImpl Other) const { return !(*this == Other); } + CFGElement operator*() const { return (*Parent)[Index]; } + CFGElementPtr operator->() const { return &*(Parent->begin() + Index); } + + void dumpToStream(llvm::raw_ostream &OS) const { + OS << getIndexInBlock() + 1 << ": "; + (*this)->dumpToStream(OS); + } + + void dump() const { + dumpToStream(llvm::errs()); + } + }; + + template class ElementRefIterator { + + template + friend class ElementRefIterator; + + using CFGBlockRef = + typename std::conditional::type; + + using UnderlayingIteratorTy = typename std::conditional< + IsConst, + typename std::conditional::type, + typename std::conditional::type>::type; + + using IteratorTraits = typename std::iterator_traits; + using ElementRef = typename CFGBlock::ElementRefImpl; + + public: + using difference_type = typename IteratorTraits::difference_type; + using value_type = ElementRef; + using pointer = ElementRef *; + using iterator_category = typename IteratorTraits::iterator_category; + + private: + CFGBlockRef Parent; + UnderlayingIteratorTy Pos; + + public: + ElementRefIterator(CFGBlockRef Parent, UnderlayingIteratorTy Pos) + : Parent(Parent), Pos(Pos) {} + + template + ElementRefIterator(ElementRefIterator E) + : ElementRefIterator(E.Parent, E.Pos.base()) {} + + template + ElementRefIterator(ElementRefIterator E) + : ElementRefIterator(E.Parent, llvm::make_reverse_iterator(E.Pos)) {} + + bool operator<(ElementRefIterator Other) const { + assert(Parent == Other.Parent); + return Pos < Other.Pos; + } + + bool operator==(ElementRefIterator Other) const { + return Parent == Other.Parent && Pos == Other.Pos; + } + + bool operator!=(ElementRefIterator Other) const { + return !(*this == Other); + } + + private: + template + static size_t + getIndexInBlock(CFGBlock::ElementRefIterator E) { + return E.Parent->size() - (E.Pos - E.Parent->rbegin()) - 1; + } + + template + static size_t + getIndexInBlock(CFGBlock::ElementRefIterator E) { + return E.Pos - E.Parent->begin(); + } + + public: + value_type operator*() { return {Parent, getIndexInBlock(*this)}; } + + difference_type operator-(ElementRefIterator Other) const { + return Pos - Other.Pos; + } + + ElementRefIterator operator++() { + ++this->Pos; + return *this; + } + ElementRefIterator operator++(int) { + ElementRefIterator Ret = *this; + ++*this; + return Ret; + } + ElementRefIterator operator+(size_t count) { + this->Pos += count; + return *this; + } + ElementRefIterator operator-(size_t count) { + this->Pos -= count; + return *this; + } + }; + +public: /// The set of statements in the basic block. ElementList Elements; @@ -715,6 +868,8 @@ public: using reverse_iterator = ElementList::reverse_iterator; using const_reverse_iterator = ElementList::const_reverse_iterator; + size_t getIndexInCFG() const; + CFGElement front() const { return Elements.front(); } CFGElement back() const { return Elements.back(); } @@ -728,6 +883,38 @@ public: const_reverse_iterator rbegin() const { return Elements.rbegin(); } const_reverse_iterator rend() const { return Elements.rend(); } + using CFGElementRef = ElementRefImpl; + using ConstCFGElementRef = ElementRefImpl; + + using ref_iterator = ElementRefIterator; + using ref_iterator_range = llvm::iterator_range; + using const_ref_iterator = ElementRefIterator; + using const_ref_iterator_range = llvm::iterator_range; + + using reverse_ref_iterator = ElementRefIterator; + using reverse_ref_iterator_range = llvm::iterator_range; + + using const_reverse_ref_iterator = ElementRefIterator; + using const_reverse_ref_iterator_range = + llvm::iterator_range; + + ref_iterator ref_begin() { return {this, begin()}; } + ref_iterator ref_end() { return {this, end()}; } + const_ref_iterator ref_begin() const { return {this, begin()}; } + const_ref_iterator ref_end() const { return {this, end()}; } + + reverse_ref_iterator rref_begin() { return {this, rbegin()}; } + reverse_ref_iterator rref_end() { return {this, rend()}; } + const_reverse_ref_iterator rref_begin() const { return {this, rbegin()}; } + const_reverse_ref_iterator rref_end() const { return {this, rend()}; } + + ref_iterator_range refs() { return {ref_begin(), ref_end()}; } + const_ref_iterator_range refs() const { return {ref_begin(), ref_end()}; } + reverse_ref_iterator_range rrefs() { return {rref_begin(), rref_end()}; } + const_reverse_ref_iterator_range rrefs() const { + return {rref_begin(), rref_end()}; + } + unsigned size() const { return Elements.size(); } bool empty() const { return Elements.empty(); } @@ -855,6 +1042,10 @@ public: void setLoopTarget(const Stmt *loopTarget) { LoopTarget = loopTarget; } void setHasNoReturnElement() { HasNoReturnElement = true; } + /// Returns true if the block would eventually end with a sink (a noreturn + /// node). + bool isInevitablySinking() const; + CFGTerminator getTerminator() const { return Terminator; } Stmt *getTerminatorStmt() { return Terminator.getStmt(); } @@ -894,7 +1085,7 @@ public: void printTerminator(raw_ostream &OS, const LangOptions &LO) const; void printTerminatorJson(raw_ostream &Out, const LangOptions &LO, bool AddQuotes) const; - + void printAsOperand(raw_ostream &OS, bool /*PrintType*/) { OS << "BB#" << getBlockID(); } @@ -1010,7 +1201,6 @@ public: *I = CFGScopeEnd(VD, S); return ++I; } - }; /// CFGCallback defines methods that should be called when a logical @@ -1023,6 +1213,7 @@ public: virtual void compareAlwaysTrue(const BinaryOperator *B, bool isAlwaysTrue) {} virtual void compareBitwiseEquality(const BinaryOperator *B, bool isAlwaysTrue) {} + virtual void compareBitwiseOr(const BinaryOperator *B) {} }; /// Represents a source-level, intra-procedural CFG that represents the diff --git a/include/clang/Analysis/CallGraph.h b/include/clang/Analysis/CallGraph.h index 49c04490fed..dae2b58ffc1 100644 --- a/include/clang/Analysis/CallGraph.h +++ b/include/clang/Analysis/CallGraph.h @@ -131,6 +131,7 @@ public: bool shouldWalkTypesOfTypeLocs() const { return false; } bool shouldVisitTemplateInstantiations() const { return true; } + bool shouldVisitImplicitCode() const { return true; } private: /// Add the given declaration to the call graph. diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h b/include/clang/Analysis/PathDiagnostic.h similarity index 88% rename from include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h rename to include/clang/Analysis/PathDiagnostic.h index 5230742a4aa..6730057cf0a 100644 --- a/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h +++ b/include/clang/Analysis/PathDiagnostic.h @@ -52,11 +52,6 @@ class SourceManager; namespace ento { -class ExplodedNode; -class SymExpr; - -using SymbolRef = const SymExpr *; - //===----------------------------------------------------------------------===// // High-level interface for handlers of path-sensitive diagnostics. //===----------------------------------------------------------------------===// @@ -125,6 +120,13 @@ public: }; virtual PathGenerationScheme getGenerationScheme() const { return Minimal; } + + bool shouldGenerateDiagnostics() const { + return getGenerationScheme() != None; + } + + bool shouldAddPathEdges() const { return getGenerationScheme() == Extensive; } + virtual bool supportsLogicalOpControlFlow() const { return false; } /// Return true if the PathDiagnosticConsumer supports individual @@ -269,19 +271,21 @@ public: static PathDiagnosticLocation createDeclEnd(const LocationContext *LC, const SourceManager &SM); - /// Create a location corresponding to the given valid ExplodedNode. + /// Create a location corresponding to the given valid ProgramPoint. static PathDiagnosticLocation create(const ProgramPoint &P, const SourceManager &SMng); - /// Create a location corresponding to the next valid ExplodedNode as end - /// of path location. - static PathDiagnosticLocation createEndOfPath(const ExplodedNode* N, - const SourceManager &SM); - /// Convert the given location into a single kind location. static PathDiagnosticLocation createSingleLocation( const PathDiagnosticLocation &PDL); + /// Construct a source location that corresponds to either the beginning + /// or the end of the given statement, or a nearby valid source location + /// if the statement does not have a valid source location of its own. + static SourceLocation + getValidSourceLocation(const Stmt *S, LocationOrAnalysisDeclContext LAC, + bool UseEndOfStatement = false); + bool operator==(const PathDiagnosticLocation &X) const { return K == X.K && Loc == X.Loc && Range == X.Range; } @@ -326,13 +330,6 @@ public: void Profile(llvm::FoldingSetNodeID &ID) const; void dump() const; - - /// Given an exploded node, retrieve the statement that should be used - /// for the diagnostic location. - static const Stmt *getStmt(const ExplodedNode *N); - - /// Retrieve the statement corresponding to the successor node. - static const Stmt *getNextStmt(const ExplodedNode *N); }; class PathDiagnosticLocationPair { @@ -386,6 +383,7 @@ private: StringRef Tag; std::vector ranges; + std::vector fixits; protected: PathDiagnosticPiece(StringRef s, Kind k, DisplayHint hint = Below); @@ -430,9 +428,16 @@ public: ranges.push_back(SourceRange(B,E)); } + void addFixit(FixItHint F) { + fixits.push_back(F); + } + /// Return the SourceRanges associated with this PathDiagnosticPiece. ArrayRef getRanges() const { return ranges; } + /// Return the fix-it hints associated with this PathDiagnosticPiece. + ArrayRef getFixits() const { return fixits; } + virtual void Profile(llvm::FoldingSetNodeID &ID) const; void setAsLastInMainSourceFile() { @@ -446,7 +451,9 @@ public: virtual void dump() const = 0; }; -class PathPieces : public std::list> { +using PathDiagnosticPieceRef = std::shared_ptr; + +class PathPieces : public std::list { void flattenTo(PathPieces &Primary, PathPieces &Current, bool ShouldFlattenMacros) const; @@ -486,65 +493,13 @@ public: } }; -/// Interface for classes constructing Stack hints. -/// -/// If a PathDiagnosticEvent occurs in a different frame than the final -/// diagnostic the hints can be used to summarize the effect of the call. -class StackHintGenerator { -public: - virtual ~StackHintGenerator() = 0; - - /// Construct the Diagnostic message for the given ExplodedNode. - virtual std::string getMessage(const ExplodedNode *N) = 0; -}; - -/// Constructs a Stack hint for the given symbol. -/// -/// The class knows how to construct the stack hint message based on -/// traversing the CallExpr associated with the call and checking if the given -/// symbol is returned or is one of the arguments. -/// The hint can be customized by redefining 'getMessageForX()' methods. -class StackHintGeneratorForSymbol : public StackHintGenerator { -private: - SymbolRef Sym; - std::string Msg; - -public: - StackHintGeneratorForSymbol(SymbolRef S, StringRef M) : Sym(S), Msg(M) {} - ~StackHintGeneratorForSymbol() override = default; - - /// Search the call expression for the symbol Sym and dispatch the - /// 'getMessageForX()' methods to construct a specific message. - std::string getMessage(const ExplodedNode *N) override; - - /// Produces the message of the following form: - /// 'Msg via Nth parameter' - virtual std::string getMessageForArg(const Expr *ArgE, unsigned ArgIndex); - - virtual std::string getMessageForReturn(const CallExpr *CallExpr) { - return Msg; - } - - virtual std::string getMessageForSymbolNotFound() { - return Msg; - } -}; - class PathDiagnosticEventPiece : public PathDiagnosticSpotPiece { Optional IsPrunable; - /// If the event occurs in a different frame than the final diagnostic, - /// supply a message that will be used to construct an extra hint on the - /// returns from all the calls on the stack from this event to the final - /// diagnostic. - std::unique_ptr CallStackHint; - public: PathDiagnosticEventPiece(const PathDiagnosticLocation &pos, - StringRef s, bool addPosRange = true, - StackHintGenerator *stackHint = nullptr) - : PathDiagnosticSpotPiece(pos, s, Event, addPosRange), - CallStackHint(stackHint) {} + StringRef s, bool addPosRange = true) + : PathDiagnosticSpotPiece(pos, s, Event, addPosRange) {} ~PathDiagnosticEventPiece() override; /// Mark the diagnostic piece as being potentially prunable. This @@ -561,16 +516,6 @@ public: return IsPrunable.hasValue() ? IsPrunable.getValue() : false; } - bool hasCallStackHint() { return (bool)CallStackHint; } - - /// Produce the hint for the given node. The node contains - /// information about the call for which the diagnostic can be generated. - std::string getCallStackMessage(const ExplodedNode *N) { - if (CallStackHint) - return CallStackHint->getMessage(N); - return {}; - } - void dump() const override; static bool classof(const PathDiagnosticPiece *P) { @@ -726,8 +671,6 @@ public: PathPieces subPieces; - bool containsEvent() const; - void flattenLocations() override { PathDiagnosticSpotPiece::flattenLocations(); for (const auto &I : subPieces) @@ -782,7 +725,7 @@ using FilesToLineNumsMap = std::map>; /// diagnostic. It represents an ordered-collection of PathDiagnosticPieces, /// each which represent the pieces of the path. class PathDiagnostic : public llvm::FoldingSetNode { - std::string CheckName; + std::string CheckerName; const Decl *DeclWithIssue; std::string BugType; std::string VerboseDesc; @@ -806,7 +749,7 @@ class PathDiagnostic : public llvm::FoldingSetNode { public: PathDiagnostic() = delete; - PathDiagnostic(StringRef CheckName, const Decl *DeclWithIssue, + PathDiagnostic(StringRef CheckerName, const Decl *DeclWithIssue, StringRef bugtype, StringRef verboseDesc, StringRef shortDesc, StringRef category, PathDiagnosticLocation LocationToUnique, const Decl *DeclToUnique, @@ -836,7 +779,7 @@ public: bool isWithinCall() const { return !pathStack.empty(); } - void setEndOfPath(std::shared_ptr EndPiece) { + void setEndOfPath(PathDiagnosticPieceRef EndPiece) { assert(!Loc.isValid() && "End location already set!"); Loc = EndPiece->getLocation(); assert(Loc.isValid() && "Invalid location for end-of-path piece"); @@ -849,26 +792,16 @@ public: VerboseDesc += S; } - /// If the last piece of the report point to the header file, resets - /// the location of the report to be the last location in the main source - /// file. - void resetDiagnosticLocationToMainFile(); - StringRef getVerboseDescription() const { return VerboseDesc; } StringRef getShortDescription() const { return ShortDesc.empty() ? VerboseDesc : ShortDesc; } - StringRef getCheckName() const { return CheckName; } + StringRef getCheckerName() const { return CheckerName; } StringRef getBugType() const { return BugType; } StringRef getCategory() const { return Category; } - /// Return the semantic context where an issue occurred. If the - /// issue occurs along a path, this represents the "central" area - /// where the bug manifests. - const Decl *getDeclWithIssue() const { return DeclWithIssue; } - using meta_iterator = std::deque::const_iterator; meta_iterator meta_begin() const { return OtherDesc.begin(); } @@ -883,10 +816,23 @@ public: return *ExecutedLines; } + /// Return the semantic context where an issue occurred. If the + /// issue occurs along a path, this represents the "central" area + /// where the bug manifests. + const Decl *getDeclWithIssue() const { return DeclWithIssue; } + + void setDeclWithIssue(const Decl *D) { + DeclWithIssue = D; + } + PathDiagnosticLocation getLocation() const { return Loc; } + void setLocation(PathDiagnosticLocation NewLoc) { + Loc = NewLoc; + } + /// Get the location on which the report should be uniqued. PathDiagnosticLocation getUniqueingLoc() const { return UniqueingLoc; @@ -917,7 +863,6 @@ public: }; } // namespace ento - } // namespace clang #endif // LLVM_CLANG_STATICANALYZER_CORE_BUGREPORTER_PATHDIAGNOSTIC_H diff --git a/include/clang/Basic/AArch64SVEACLETypes.def b/include/clang/Basic/AArch64SVEACLETypes.def new file mode 100644 index 00000000000..7d387587dc2 --- /dev/null +++ b/include/clang/Basic/AArch64SVEACLETypes.def @@ -0,0 +1,70 @@ +//===-- AArch64SVEACLETypes.def - Metadata about SVE types ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines various SVE builtin types. The macros are: +// +// SVE_TYPE(Name, Id, SingletonId) - A builtin type that has not been +// covered by any other #define. Defining this macro covers all +// the builtins. +// +// SVE_VECTOR_TYPE(Name, Id, SingletonId, ElKind, ElBits, IsSigned, IsFP) - +// An SVE scalable vector. +// +// SVE_PREDICATE_TYPE(Name, Id, SingletonId, ElKind) - An SVE scalable +// predicate. +// +// where: +// +// - Name is the name of the builtin type. +// +// - BuiltinType::Id is the enumerator defining the type. +// +// - Context.SingletonId is the global singleton of this type. +// +// - ElKind enumerates the type of the elements. +// +// - ElBits is the size of one element in bits. +// +// - IsSigned is true for vectors of signed integer elements and +// for vectors of floating-point elements. +// +// - IsFP is true for vectors of floating-point elements. +// +//===----------------------------------------------------------------------===// + +#ifndef SVE_VECTOR_TYPE +#define SVE_VECTOR_TYPE(Name, Id, SingletonId, ElKind, ElBits, IsSigned, IsFP)\ + SVE_TYPE(Name, Id, SingletonId) +#endif + +#ifndef SVE_PREDICATE_TYPE +#define SVE_PREDICATE_TYPE(Name, Id, SingletonId, ElKind)\ + SVE_TYPE(Name, Id, SingletonId) +#endif + +//===- Vector point types -----------------------------------------------===// + +SVE_VECTOR_TYPE("__SVInt8_t", SveInt8, SveInt8Ty, SveElSInt8, 8, true, false) +SVE_VECTOR_TYPE("__SVInt16_t", SveInt16, SveInt16Ty, SveElSInt16, 16, true, false) +SVE_VECTOR_TYPE("__SVInt32_t", SveInt32, SveInt32Ty, SveElSInt32, 32, true, false) +SVE_VECTOR_TYPE("__SVInt64_t", SveInt64, SveInt64Ty, SveElSInt64, 64, true, false) + +SVE_VECTOR_TYPE("__SVUint8_t", SveUint8, SveUint8Ty, SveElUInt8, 8, false, false) +SVE_VECTOR_TYPE("__SVUint16_t", SveUint16, SveUint16Ty, SveElUInt16, 16, false, false) +SVE_VECTOR_TYPE("__SVUint32_t", SveUint32, SveUint32Ty, SveElUInt32, 32, false, false) +SVE_VECTOR_TYPE("__SVUint64_t", SveUint64, SveUint64Ty, SveElUInt64, 64, false, false) + +SVE_VECTOR_TYPE("__SVFloat16_t", SveFloat16, SveFloat16Ty, SveElHalf, 16, true, true) +SVE_VECTOR_TYPE("__SVFloat32_t", SveFloat32, SveFloat32Ty, SveElFloat, 32, true, true) +SVE_VECTOR_TYPE("__SVFloat64_t", SveFloat64, SveFloat64Ty, SveElDouble, 64, true, true) + +SVE_PREDICATE_TYPE("__SVBool_t", SveBool, SveBoolTy, SveElBool) + +#undef SVE_VECTOR_TYPE +#undef SVE_PREDICATE_TYPE +#undef SVE_TYPE diff --git a/include/clang/Basic/Attr.td b/include/clang/Basic/Attr.td index d39b16e62b7..c3a2ee325df 100644 --- a/include/clang/Basic/Attr.td +++ b/include/clang/Basic/Attr.td @@ -722,9 +722,25 @@ def AVRSignal : InheritableAttr, TargetSpecificAttr { def AsmLabel : InheritableAttr { let Spellings = [Keyword<"asm">, Keyword<"__asm__">]; - let Args = [StringArgument<"Label">]; + let Args = [ + // Label specifies the mangled name for the decl. + StringArgument<"Label">, + + // IsLiteralLabel specifies whether the label is literal (i.e. suppresses + // the global C symbol prefix) or not. If not, the mangle-suppression prefix + // ('\01') is omitted from the decl name at the LLVM IR level. + // + // Non-literal labels are used by some external AST sources like LLDB. + BoolArgument<"IsLiteralLabel", /*optional=*/0, /*fake=*/1> + ]; let SemaHandler = 0; - let Documentation = [Undocumented]; + let Documentation = [AsmLabelDocs]; + let AdditionalMembers = +[{ +bool isEquivalent(AsmLabelAttr *Other) const { + return getLabel() == Other->getLabel() && getIsLiteralLabel() == Other->getIsLiteralLabel(); +} +}]; } def Availability : InheritableAttr { @@ -911,6 +927,17 @@ def Const : InheritableAttr { let Documentation = [Undocumented]; } +def ConstInit : InheritableAttr { + // This attribute does not have a C [[]] spelling because it requires the + // CPlusPlus language option. + let Spellings = [Keyword<"constinit">, + Clang<"require_constant_initialization", 0>]; + let Subjects = SubjectList<[GlobalVar], ErrorDiag>; + let Accessors = [Accessor<"isConstinit", [Keyword<"constinit">]>]; + let Documentation = [ConstInitDocs]; + let LangOpts = [CPlusPlus]; +} + def Constructor : InheritableAttr { let Spellings = [GCC<"constructor">]; let Args = [DefaultIntArgument<"Priority", 65535>]; @@ -1170,7 +1197,7 @@ def ExtVectorType : Attr { def FallThrough : StmtAttr { let Spellings = [CXX11<"", "fallthrough", 201603>, C2x<"", "fallthrough">, - CXX11<"clang", "fallthrough">]; + CXX11<"clang", "fallthrough">, GCC<"fallthrough">]; // let Subjects = [NullStmt]; let Documentation = [FallthroughDocs]; } @@ -1935,15 +1962,6 @@ def ReqdWorkGroupSize : InheritableAttr { let Documentation = [Undocumented]; } -def RequireConstantInit : InheritableAttr { - // This attribute does not have a C [[]] spelling because it requires the - // CPlusPlus language option. - let Spellings = [Clang<"require_constant_initialization", 0>]; - let Subjects = SubjectList<[GlobalVar], ErrorDiag>; - let Documentation = [RequireConstantInitDocs]; - let LangOpts = [CPlusPlus]; -} - def WorkGroupSizeHint : InheritableAttr { // Does not have a [[]] spelling because it is an OpenCL-related attribute. let Spellings = [GNU<"work_group_size_hint">]; @@ -2002,6 +2020,14 @@ def PragmaClangRodataSection : InheritableAttr { let Documentation = [Undocumented]; } +def PragmaClangRelroSection : InheritableAttr { + // This attribute has no spellings as it is only ever created implicitly. + let Spellings = []; + let Args = [StringArgument<"Name">]; + let Subjects = SubjectList<[GlobalVar], ErrorDiag>; + let Documentation = [Undocumented]; +} + def PragmaClangTextSection : InheritableAttr { // This attribute has no spellings as it is only ever created implicitly. let Spellings = []; @@ -2335,11 +2361,19 @@ def WarnUnused : InheritableAttr { } def WarnUnusedResult : InheritableAttr { - let Spellings = [CXX11<"", "nodiscard", 201603>, C2x<"", "nodiscard">, + let Spellings = [CXX11<"", "nodiscard", 201907>, C2x<"", "nodiscard">, CXX11<"clang", "warn_unused_result">, GCC<"warn_unused_result">]; let Subjects = SubjectList<[ObjCMethod, Enum, Record, FunctionLike]>; + let Args = [StringArgument<"Message", 1>]; let Documentation = [WarnUnusedResultsDocs]; + let AdditionalMembers = [{ + // Check whether this the C++11 nodiscard version, even in non C++11 + // spellings. + bool IsCXX11NoDiscard() const { + return this->getSemanticSpelling() == CXX11_nodiscard; + } + }]; } def Weak : InheritableAttr { @@ -2428,6 +2462,12 @@ def NoSanitizeSpecific : InheritableAttr { let ASTNode = 0; } +def CFICanonicalJumpTable : InheritableAttr { + let Spellings = [Clang<"cfi_canonical_jump_table">]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let Documentation = [CFICanonicalJumpTableDocs]; +} + // C/C++ Thread safety attributes (e.g. for deadlock, data race checking) // Not all of these attributes will be given a [[]] spelling. The attributes // which require access to function parameter names cannot use the [[]] spelling @@ -2788,6 +2828,20 @@ def TypeTagForDatatype : InheritableAttr { let Documentation = [TypeTagForDatatypeDocs]; } +def Owner : InheritableAttr { + let Spellings = [CXX11<"gsl", "Owner">]; + let Subjects = SubjectList<[Struct]>; + let Args = [TypeArgument<"DerefType", /*opt=*/1>]; + let Documentation = [LifetimeOwnerDocs]; +} + +def Pointer : InheritableAttr { + let Spellings = [CXX11<"gsl", "Pointer">]; + let Subjects = SubjectList<[Struct]>; + let Args = [TypeArgument<"DerefType", /*opt=*/1>]; + let Documentation = [LifetimePointerDocs]; +} + // Microsoft-related attributes def MSNoVTable : InheritableAttr, TargetSpecificAttr { @@ -2981,10 +3035,12 @@ def LoopHint : Attr { let Args = [EnumArgument<"Option", "OptionType", ["vectorize", "vectorize_width", "interleave", "interleave_count", "unroll", "unroll_count", "unroll_and_jam", "unroll_and_jam_count", - "pipeline", "pipeline_initiation_interval", "distribute"], + "pipeline", "pipeline_initiation_interval", "distribute", + "vectorize_predicate"], ["Vectorize", "VectorizeWidth", "Interleave", "InterleaveCount", "Unroll", "UnrollCount", "UnrollAndJam", "UnrollAndJamCount", - "PipelineDisabled", "PipelineInitiationInterval", "Distribute"]>, + "PipelineDisabled", "PipelineInitiationInterval", "Distribute", + "VectorizePredicate"]>, EnumArgument<"State", "LoopHintState", ["enable", "disable", "numeric", "assume_safety", "full"], ["Enable", "Disable", "Numeric", "AssumeSafety", "Full"]>, @@ -3004,12 +3060,13 @@ def LoopHint : Attr { case PipelineDisabled: return "pipeline"; case PipelineInitiationInterval: return "pipeline_initiation_interval"; case Distribute: return "distribute"; + case VectorizePredicate: return "vectorize_predicate"; } llvm_unreachable("Unhandled LoopHint option."); } void printPrettyPragma(raw_ostream &OS, const PrintingPolicy &Policy) const { - unsigned SpellingIndex = getSpellingListIndex(); + unsigned SpellingIndex = getAttributeSpellingListIndex(); // For "#pragma unroll" and "#pragma nounroll" the string "unroll" or // "nounroll" is already emitted as the pragma name. if (SpellingIndex == Pragma_nounroll || SpellingIndex == Pragma_nounroll_and_jam) @@ -3045,7 +3102,7 @@ def LoopHint : Attr { // Return a string suitable for identifying this attribute in diagnostics. std::string getDiagnosticName(const PrintingPolicy &Policy) const { - unsigned SpellingIndex = getSpellingListIndex(); + unsigned SpellingIndex = getAttributeSpellingListIndex(); if (SpellingIndex == Pragma_nounroll) return "#pragma nounroll"; else if (SpellingIndex == Pragma_unroll) @@ -3176,11 +3233,16 @@ def OMPDeclareTargetDecl : InheritableAttr { let Args = [ EnumArgument<"MapType", "MapTypeTy", [ "to", "link" ], - [ "MT_To", "MT_Link" ]> + [ "MT_To", "MT_Link" ]>, + EnumArgument<"DevType", "DevTypeTy", + [ "host", "nohost", "any" ], + [ "DT_Host", "DT_NoHost", "DT_Any" ]> ]; let AdditionalMembers = [{ void printPrettyPragma(raw_ostream &OS, const PrintingPolicy &Policy) const { // Use fake syntax because it is for testing and debugging purpose only. + if (getDevType() != DT_Any) + OS << " device_type(" << ConvertDevTypeTyToStr(getDevType()) << ")"; if (getMapType() != MT_To) OS << ' ' << ConvertMapTypeTyToStr(getMapType()); } @@ -3193,6 +3255,14 @@ def OMPDeclareTargetDecl : InheritableAttr { return llvm::None; } + static llvm::Optional getDeviceType(const ValueDecl *VD) { + if (!VD->hasAttrs()) + return llvm::None; + if (const auto *Attr = VD->getAttr()) + return Attr->getDevType(); + + return llvm::None; + } }]; } @@ -3219,6 +3289,82 @@ def OMPAllocateDecl : InheritableAttr { let Documentation = [Undocumented]; } +def OMPDeclareVariant : InheritableAttr { + let Spellings = [Pragma<"omp", "declare variant">]; + let Subjects = SubjectList<[Function]>; + let SemaHandler = 0; + let HasCustomParsing = 1; + let InheritEvenIfAlreadyPresent = 1; + let Documentation = [OMPDeclareVariantDocs]; + let Args = [ + ExprArgument<"VariantFuncRef">, + ExprArgument<"Score">, + EnumArgument<"CtxSelectorSet", "CtxSelectorSetType", + [ "", "implementation" + ], + [ + "CtxSetUnknown", "CtxSetImplementation" + ]>, + EnumArgument<"CtxScore", "ScoreType", + [ "", "score" + ], + [ + "ScoreUnknown", "ScoreSpecified" + ]>, + EnumArgument<"CtxSelector", "CtxSelectorType", + [ "", "vendor" + ], + [ + "CtxUnknown", "CtxVendor" + ]>, + VariadicStringArgument<"ImplVendors"> + ]; + let AdditionalMembers = [{ + void printScore(raw_ostream & OS, const PrintingPolicy &Policy) const { + if (const Expr *E = getScore()) { + OS << "score("; + E->printPretty(OS, nullptr, Policy); + OS << "):"; + } + } + void printPrettyPragma(raw_ostream & OS, const PrintingPolicy &Policy) + const { + assert(getCtxSelectorSet() != CtxSetUnknown && + getCtxSelector() != CtxUnknown && "Unknown context selector."); + if (const Expr *E = getVariantFuncRef()) { + OS << "("; + E->printPretty(OS, nullptr, Policy); + OS << ")"; + } + // TODO: add printing of real context selectors. + OS << " match("; + switch (getCtxSelectorSet()) { + case CtxSetImplementation: + OS << "implementation={"; + switch (getCtxSelector()) { + case CtxVendor: + OS << "vendor("; + printScore(OS, Policy); + if (implVendors_size() > 0) { + OS << *implVendors(). begin(); + for (StringRef VendorName : llvm::drop_begin(implVendors(), 1)) + OS << ", " << VendorName; + } + OS << ")"; + break; + case CtxUnknown: + llvm_unreachable("Unknown context selector."); + } + OS << "}"; + break; + case CtxSetUnknown: + llvm_unreachable("Unknown context selector set."); + } + OS << ")"; + } + }]; +} + def InternalLinkage : InheritableAttr { let Spellings = [Clang<"internal_linkage">]; let Subjects = SubjectList<[Var, Function, CXXRecord]>; diff --git a/include/clang/Basic/AttrDocs.td b/include/clang/Basic/AttrDocs.td index fac6116057d..114a9856c5f 100644 --- a/include/clang/Basic/AttrDocs.td +++ b/include/clang/Basic/AttrDocs.td @@ -1408,15 +1408,17 @@ all optional, but the attribute has to have at least one clause. }]; } -def RequireConstantInitDocs : Documentation { +def ConstInitDocs : Documentation { let Category = DocCatVariable; + let Heading = "require_constant_initialization, constinit (C++20)"; let Content = [{ This attribute specifies that the variable to which it is attached is intended to have a `constant initializer `_ according to the rules of [basic.start.static]. The variable is required to have static or thread storage duration. If the initialization of the variable is not a constant initializer an error will be produced. This attribute may -only be used in C++. +only be used in C++; the ``constinit`` spelling is only accepted in C++20 +onwards. Note that in C++03 strict constant expression checking is not done. Instead the attribute reports if Clang can emit the variable as a constant, even if it's @@ -1431,6 +1433,12 @@ for constant initialization have been met. Since these requirements change between dialects and have subtle pitfalls it's important to fail fast instead of silently falling back on dynamic initialization. +The first use of the attribute on a variable must be part of, or precede, the +initializing declaration of the variable. C++20 requires the ``constinit`` +spelling of the attribute to be present on the initializing declaration if it +is used anywhere. The other spellings can be specified on a forward declaration +and omitted on a later initializing declaration. + .. code-block:: c++ // -std=c++14 @@ -1482,6 +1490,13 @@ generated when a function or its return type is marked with ``[[nodiscard]]`` potentially-evaluated discarded-value expression that is not explicitly cast to `void`. +A string literal may optionally be provided to the attribute, which will be +reproduced in any resulting diagnostics. Redeclarations using different forms +of the attribute (with or without the string literal or with different string +literal contents) are allowed. If there are redeclarations of the entity with +differing string literals, it is unspecified which one will be used by Clang +in any resulting diagnostics. + .. code-block: c++ struct [[nodiscard]] error_info { /*...*/ }; error_info enable_missile_safety_mode(); @@ -1493,6 +1508,33 @@ potentially-evaluated discarded-value expression that is not explicitly cast to } error_info &foo(); void f() { foo(); } // Does not diagnose, error_info is a reference. + +Additionally, discarded temporaries resulting from a call to a constructor +marked with ``[[nodiscard]]`` or a constructor of a type marked +``[[nodiscard]]`` will also diagnose. This also applies to type conversions that +use the annotated ``[[nodiscard]]`` constructor or result in an annotated type. + +.. code-block: c++ + struct [[nodiscard]] marked_type {/*..*/ }; + struct marked_ctor { + [[nodiscard]] marked_ctor(); + marked_ctor(int); + }; + + struct S { + operator marked_type() const; + [[nodiscard]] operator int() const; + }; + + void usages() { + marked_type(); // diagnoses. + marked_ctor(); // diagnoses. + marked_ctor(3); // Does not diagnose, int constructor isn't marked nodiscard. + + S s; + static_cast(s); // diagnoses + (int)s; // diagnoses + } }]; } @@ -2187,6 +2229,18 @@ to avoid false positives in other places. }]; } +def CFICanonicalJumpTableDocs : Documentation { + let Category = DocCatFunction; + let Heading = "cfi_canonical_jump_table"; + let Content = [{ +.. _langext-cfi_canonical_jump_table: + +Use ``__attribute__((cfi_canonical_jump_table))`` on a function declaration to +make the function's CFI jump table canonical. See :ref:`the CFI documentation +` for more details. + }]; +} + def DocCatTypeSafety : DocumentationCategory<"Type Safety Checking"> { let Content = [{ Clang supports additional attributes to enable checking type safety properties @@ -2504,6 +2558,30 @@ manipulating bits of the enumerator when issuing warnings. }]; } +def AsmLabelDocs : Documentation { + let Category = DocCatDecl; + let Content = [{ +This attribute can be used on a function or variable to specify its symbol name. + +On some targets, all C symbols are prefixed by default with a single character, typically ``_``. This was done historically to distinguish them from symbols used by other languages. (This prefix is also added to the standard Itanium C++ ABI prefix on "mangled" symbol names, so that e.g. on such targets the true symbol name for a C++ variable declared as ``int cppvar;`` would be ``__Z6cppvar``; note the two underscores.) This prefix is *not* added to the symbol names specified by the ``asm`` attribute; programmers wishing to match a C symbol name must compensate for this. + +For example, consider the following C code: + +.. code-block:: c + + int var1 asm("altvar") = 1; // "altvar" in symbol table. + int var2 = 1; // "_var2" in symbol table. + + void func1(void) asm("altfunc"); + void func1(void) {} // "altfunc" in symbol table. + void func2(void) {} // "_func2" in symbol table. + +Clang's implementation of this attribute is compatible with GCC's, `documented here `_. + +While it is possible to use this attribute to name a special symbol used internally by the compiler, such as an LLVM intrinsic, this is neither recommended nor supported and may cause the compiler to crash or miscompile. Users who wish to gain access to intrinsic behavior are strongly encouraged to request new builtin functions. + }]; +} + def EnumExtensibilityDocs : Documentation { let Category = DocCatDecl; let Content = [{ @@ -2583,7 +2661,7 @@ is retained by the return value of the annotated function It is only supported in C++. This attribute provides an experimental implementation of the facility -described in the C++ committee paper [http://wg21.link/p0936r0](P0936R0), +described in the C++ committee paper `P0936R0 `_, and is subject to change as the design of the corresponding functionality changes. }]; @@ -2709,9 +2787,10 @@ def LoopHintDocs : Documentation { let Content = [{ The ``#pragma clang loop`` directive allows loop optimization hints to be specified for the subsequent loop. The directive allows pipelining to be -disabled, or vectorization, interleaving, and unrolling to be enabled or disabled. -Vector width, interleave count, unrolling count, and the initiation interval -for pipelining can be explicitly specified. See `language extensions +disabled, or vectorization, vector predication, interleaving, and unrolling to +be enabled or disabled. Vector width, vector predication, interleave count, +unrolling count, and the initiation interval for pipelining can be explicitly +specified. See `language extensions `_ for details. }]; @@ -3129,6 +3208,55 @@ The syntax of the declare target directive is as follows: #pragma omp declare target new-line declarations-definition-seq #pragma omp end declare target new-line + +or + + .. code-block:: c + + #pragma omp declare target (extended-list) new-line + +or + + .. code-block:: c + + #pragma omp declare target clause[ [,] clause ... ] new-line + +where clause is one of the following: + + + .. code-block:: c + + to(extended-list) + link(list) + device_type(host | nohost | any) + }]; +} + +def OMPDeclareVariantDocs : Documentation { + let Category = DocCatFunction; + let Heading = "#pragma omp declare variant"; + let Content = [{ +The `declare variant` directive declares a specialized variant of a base + function and specifies the context in which that specialized variant is used. + The declare variant directive is a declarative directive. +The syntax of the `declare variant` construct is as follows: + + .. code-block:: none + + #pragma omp declare variant(variant-func-id) clause new-line + [#pragma omp declare variant(variant-func-id) clause new-line] + [...] + function definition or declaration + +where clause is one of the following: + + .. code-block:: none + + match(context-selector-specification) + +and where `variant-func-id` is the name of a function variant that is either a + base language identifier or, for C++, a template-id. + }]; } @@ -4194,4 +4322,72 @@ be accessed on both device side and host side. It has external linkage and is not initialized on device side. It has internal linkage and is initialized by the initializer on host side. }]; -} \ No newline at end of file +} + +def LifetimeOwnerDocs : Documentation { + let Category = DocCatDecl; + let Content = [{ +.. Note:: This attribute is experimental and its effect on analysis is subject to change in + a future version of clang. + +The attribute ``[[gsl::Owner(T)]]`` applies to structs and classes that own an +object of type ``T``: + +.. code-block:: c++ + + class [[gsl::Owner(int)]] IntOwner { + private: + int value; + public: + int *getInt() { return &value; } + }; + +The argument ``T`` is optional and is ignored. +This attribute may be used by analysis tools and has no effect on code +generation. + +See Pointer_ for an example. +}]; +} + +def LifetimePointerDocs : Documentation { + let Category = DocCatDecl; + let Content = [{ +.. Note:: This attribute is experimental and its effect on analysis is subject to change in + a future version of clang. + +The attribute ``[[gsl::Pointer(T)]]`` applies to structs and classes that behave +like pointers to an object of type ``T``: + +.. code-block:: c++ + + class [[gsl::Pointer(int)]] IntPointer { + private: + int *valuePointer; + public: + int *getInt() { return &valuePointer; } + }; + +The argument ``T`` is optional and is ignored. +This attribute may be used by analysis tools and has no effect on code +generation. + +Example: +When constructing an instance of a class annotated like this (a Pointer) from +an instance of a class annotated with ``[[gsl::Owner]]`` (an Owner), +then the analysis will consider the Pointer to point inside the Owner. +When the Owner's lifetime ends, it will consider the Pointer to be dangling. + +.. code-block:: c++ + + int f() { + IntPointer P; + if (true) { + IntOwner O(7); + P = IntPointer(O); // P "points into" O + } // P is dangling + return P.get(); // error: Using a dangling Pointer. + } + +}]; +} diff --git a/include/clang/Basic/AttributeCommonInfo.h b/include/clang/Basic/AttributeCommonInfo.h new file mode 100644 index 00000000000..545e7e9a2b4 --- /dev/null +++ b/include/clang/Basic/AttributeCommonInfo.h @@ -0,0 +1,190 @@ +//======- AttributeCommonInfo.h - Base info about Attributes-----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the AttributeCommonInfo type, which is the base for a +// ParsedAttr and is used by Attr as a way to share info between the two. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_ATTRIBUTECOMMONINFO_H +#define LLVM_CLANG_BASIC_ATTRIBUTECOMMONINFO_H +#include "clang/Basic/SourceLocation.h" + +namespace clang { +class IdentifierInfo; +class ASTRecordWriter; + +class AttributeCommonInfo { +public: + /// The style used to specify an attribute. + enum Syntax { + /// __attribute__((...)) + AS_GNU, + + /// [[...]] + AS_CXX11, + + /// [[...]] + AS_C2x, + + /// __declspec(...) + AS_Declspec, + + /// [uuid("...")] class Foo + AS_Microsoft, + + /// __ptr16, alignas(...), etc. + AS_Keyword, + + /// #pragma ... + AS_Pragma, + + // Note TableGen depends on the order above. Do not add or change the order + // without adding related code to TableGen/ClangAttrEmitter.cpp. + /// Context-sensitive version of a keyword attribute. + AS_ContextSensitiveKeyword, + }; + enum Kind { +#define PARSED_ATTR(NAME) AT_##NAME, +#include "clang/Sema/AttrParsedAttrList.inc" +#undef PARSED_ATTR + NoSemaHandlerAttribute, + IgnoredAttribute, + UnknownAttribute, + }; + +private: + const IdentifierInfo *AttrName = nullptr; + const IdentifierInfo *ScopeName = nullptr; + SourceRange AttrRange; + const SourceLocation ScopeLoc; + // Corresponds to the Kind enum. + unsigned AttrKind : 16; + /// Corresponds to the Syntax enum. + unsigned SyntaxUsed : 3; + unsigned SpellingIndex : 4; + +protected: + static constexpr unsigned SpellingNotCalculated = 0xf; + +public: + AttributeCommonInfo(SourceRange AttrRange) + : AttrRange(AttrRange), ScopeLoc(), AttrKind(0), SyntaxUsed(0), + SpellingIndex(SpellingNotCalculated) {} + + AttributeCommonInfo(SourceLocation AttrLoc) + : AttrRange(AttrLoc), ScopeLoc(), AttrKind(0), SyntaxUsed(0), + SpellingIndex(SpellingNotCalculated) {} + + AttributeCommonInfo(const IdentifierInfo *AttrName, + const IdentifierInfo *ScopeName, SourceRange AttrRange, + SourceLocation ScopeLoc, Syntax SyntaxUsed) + : AttrName(AttrName), ScopeName(ScopeName), AttrRange(AttrRange), + ScopeLoc(ScopeLoc), + AttrKind(getParsedKind(AttrName, ScopeName, SyntaxUsed)), + SyntaxUsed(SyntaxUsed), SpellingIndex(SpellingNotCalculated) {} + + AttributeCommonInfo(const IdentifierInfo *AttrName, + const IdentifierInfo *ScopeName, SourceRange AttrRange, + SourceLocation ScopeLoc, Kind AttrKind, Syntax SyntaxUsed) + : AttrName(AttrName), ScopeName(ScopeName), AttrRange(AttrRange), + ScopeLoc(ScopeLoc), AttrKind(AttrKind), SyntaxUsed(SyntaxUsed), + SpellingIndex(SpellingNotCalculated) {} + + AttributeCommonInfo(const IdentifierInfo *AttrName, + const IdentifierInfo *ScopeName, SourceRange AttrRange, + SourceLocation ScopeLoc, Kind AttrKind, Syntax SyntaxUsed, + unsigned Spelling) + : AttrName(AttrName), ScopeName(ScopeName), AttrRange(AttrRange), + ScopeLoc(ScopeLoc), AttrKind(AttrKind), SyntaxUsed(SyntaxUsed), + SpellingIndex(Spelling) {} + + AttributeCommonInfo(const IdentifierInfo *AttrName, SourceRange AttrRange, + Syntax SyntaxUsed) + : AttrName(AttrName), ScopeName(nullptr), AttrRange(AttrRange), + ScopeLoc(), AttrKind(getParsedKind(AttrName, ScopeName, SyntaxUsed)), + SyntaxUsed(SyntaxUsed), SpellingIndex(SpellingNotCalculated) {} + + AttributeCommonInfo(SourceRange AttrRange, Kind K, Syntax SyntaxUsed) + : AttrName(nullptr), ScopeName(nullptr), AttrRange(AttrRange), ScopeLoc(), + AttrKind(K), SyntaxUsed(SyntaxUsed), + SpellingIndex(SpellingNotCalculated) {} + + AttributeCommonInfo(SourceRange AttrRange, Kind K, Syntax SyntaxUsed, + unsigned Spelling) + : AttrName(nullptr), ScopeName(nullptr), AttrRange(AttrRange), ScopeLoc(), + AttrKind(K), SyntaxUsed(SyntaxUsed), SpellingIndex(Spelling) {} + + AttributeCommonInfo(AttributeCommonInfo &&) = default; + AttributeCommonInfo(const AttributeCommonInfo &) = default; + + Kind getParsedKind() const { return Kind(AttrKind); } + Syntax getSyntax() const { return Syntax(SyntaxUsed); } + const IdentifierInfo *getAttrName() const { return AttrName; } + SourceLocation getLoc() const { return AttrRange.getBegin(); } + SourceRange getRange() const { return AttrRange; } + void setRange(SourceRange R) { AttrRange = R; } + + bool hasScope() const { return ScopeName; } + const IdentifierInfo *getScopeName() const { return ScopeName; } + SourceLocation getScopeLoc() const { return ScopeLoc; } + + bool isDeclspecAttribute() const { return SyntaxUsed == AS_Declspec; } + bool isMicrosoftAttribute() const { return SyntaxUsed == AS_Microsoft; } + + bool isGNUScope() const; + + bool isAlignasAttribute() const { + // FIXME: Use a better mechanism to determine this. + return getParsedKind() == AT_Aligned && isKeywordAttribute(); + } + + bool isCXX11Attribute() const { + return SyntaxUsed == AS_CXX11 || isAlignasAttribute(); + } + + bool isC2xAttribute() const { return SyntaxUsed == AS_C2x; } + + bool isKeywordAttribute() const { + return SyntaxUsed == AS_Keyword || SyntaxUsed == AS_ContextSensitiveKeyword; + } + + bool isContextSensitiveKeywordAttribute() const { + return SyntaxUsed == AS_ContextSensitiveKeyword; + } + + unsigned getAttributeSpellingListIndex() const { + assert((isAttributeSpellingListCalculated() || AttrName) && + "Spelling cannot be found"); + return isAttributeSpellingListCalculated() + ? SpellingIndex + : calculateAttributeSpellingListIndex(); + } + void setAttributeSpellingListIndex(unsigned V) { SpellingIndex = V; } + + static Kind getParsedKind(const IdentifierInfo *Name, + const IdentifierInfo *Scope, Syntax SyntaxUsed); + +private: + /// Get an index into the attribute spelling list + /// defined in Attr.td. This index is used by an attribute + /// to pretty print itself. + unsigned calculateAttributeSpellingListIndex() const; + + friend class clang::ASTRecordWriter; + // Used exclusively by ASTDeclWriter to get the raw spelling list state. + unsigned getAttributeSpellingListIndexRaw() const { return SpellingIndex; } + +protected: + bool isAttributeSpellingListCalculated() const { + return SpellingIndex != SpellingNotCalculated; + } +}; +} // namespace clang + +#endif // LLVM_CLANG_BASIC_ATTRIBUTECOMMONINFO_H diff --git a/include/clang/Basic/Builtins.def b/include/clang/Basic/Builtins.def index 984e607a2fc..76e3b03de83 100644 --- a/include/clang/Basic/Builtins.def +++ b/include/clang/Basic/Builtins.def @@ -113,14 +113,17 @@ BUILTIN(__builtin_atan2l, "LdLdLd", "Fne") BUILTIN(__builtin_abs , "ii" , "ncF") BUILTIN(__builtin_copysign, "ddd", "ncF") BUILTIN(__builtin_copysignf, "fff", "ncF") +BUILTIN(__builtin_copysignf16, "hhh", "ncF") BUILTIN(__builtin_copysignl, "LdLdLd", "ncF") BUILTIN(__builtin_copysignf128, "LLdLLdLLd", "ncF") BUILTIN(__builtin_fabs , "dd" , "ncF") BUILTIN(__builtin_fabsf, "ff" , "ncF") BUILTIN(__builtin_fabsl, "LdLd", "ncF") +BUILTIN(__builtin_fabsf16, "hh" , "ncF") BUILTIN(__builtin_fabsf128, "LLdLLd", "ncF") BUILTIN(__builtin_fmod , "ddd" , "Fne") BUILTIN(__builtin_fmodf, "fff" , "Fne") +BUILTIN(__builtin_fmodf16, "hhh" , "Fne") BUILTIN(__builtin_fmodl, "LdLdLd", "Fne") BUILTIN(__builtin_frexp , "ddi*" , "Fn") BUILTIN(__builtin_frexpf, "ffi*" , "Fn") @@ -154,6 +157,7 @@ BUILTIN(__builtin_powif, "ffi" , "Fnc") BUILTIN(__builtin_powil, "LdLdi", "Fnc") BUILTIN(__builtin_pow , "ddd" , "Fne") BUILTIN(__builtin_powf, "fff" , "Fne") +BUILTIN(__builtin_powf16, "hhh" , "Fne") BUILTIN(__builtin_powl, "LdLdLd", "Fne") // Standard unary libc/libm functions with double/float/long double variants: @@ -180,9 +184,11 @@ BUILTIN(__builtin_cbrtf, "ff", "Fnc") BUILTIN(__builtin_cbrtl, "LdLd", "Fnc") BUILTIN(__builtin_ceil , "dd" , "Fnc") BUILTIN(__builtin_ceilf, "ff" , "Fnc") +BUILTIN(__builtin_ceilf16, "hh" , "Fnc") BUILTIN(__builtin_ceill, "LdLd", "Fnc") BUILTIN(__builtin_cos , "dd" , "Fne") BUILTIN(__builtin_cosf, "ff" , "Fne") +BUILTIN(__builtin_cosf16, "hh" , "Fne") BUILTIN(__builtin_cosh , "dd" , "Fne") BUILTIN(__builtin_coshf, "ff" , "Fne") BUILTIN(__builtin_coshl, "LdLd", "Fne") @@ -195,9 +201,11 @@ BUILTIN(__builtin_erfcf, "ff", "Fne") BUILTIN(__builtin_erfcl, "LdLd", "Fne") BUILTIN(__builtin_exp , "dd" , "Fne") BUILTIN(__builtin_expf, "ff" , "Fne") +BUILTIN(__builtin_expf16, "hh" , "Fne") BUILTIN(__builtin_expl, "LdLd", "Fne") BUILTIN(__builtin_exp2 , "dd" , "Fne") BUILTIN(__builtin_exp2f, "ff" , "Fne") +BUILTIN(__builtin_exp2f16, "hh" , "Fne") BUILTIN(__builtin_exp2l, "LdLd", "Fne") BUILTIN(__builtin_expm1 , "dd", "Fne") BUILTIN(__builtin_expm1f, "ff", "Fne") @@ -207,15 +215,19 @@ BUILTIN(__builtin_fdimf, "fff", "Fne") BUILTIN(__builtin_fdiml, "LdLdLd", "Fne") BUILTIN(__builtin_floor , "dd" , "Fnc") BUILTIN(__builtin_floorf, "ff" , "Fnc") +BUILTIN(__builtin_floorf16, "hh" , "Fnc") BUILTIN(__builtin_floorl, "LdLd", "Fnc") BUILTIN(__builtin_fma, "dddd", "Fne") BUILTIN(__builtin_fmaf, "ffff", "Fne") +BUILTIN(__builtin_fmaf16, "hhhh", "Fne") BUILTIN(__builtin_fmal, "LdLdLdLd", "Fne") BUILTIN(__builtin_fmax, "ddd", "Fnc") BUILTIN(__builtin_fmaxf, "fff", "Fnc") +BUILTIN(__builtin_fmaxf16, "hhh", "Fnc") BUILTIN(__builtin_fmaxl, "LdLdLd", "Fnc") BUILTIN(__builtin_fmin, "ddd", "Fnc") BUILTIN(__builtin_fminf, "fff", "Fnc") +BUILTIN(__builtin_fminf16, "hhh", "Fnc") BUILTIN(__builtin_fminl, "LdLdLd", "Fnc") BUILTIN(__builtin_hypot , "ddd" , "Fne") BUILTIN(__builtin_hypotf, "fff" , "Fne") @@ -235,17 +247,20 @@ BUILTIN(__builtin_llroundl, "LLiLd", "Fne") BUILTIN(__builtin_log , "dd" , "Fne") BUILTIN(__builtin_log10 , "dd" , "Fne") BUILTIN(__builtin_log10f, "ff" , "Fne") +BUILTIN(__builtin_log10f16, "hh" , "Fne") BUILTIN(__builtin_log10l, "LdLd", "Fne") BUILTIN(__builtin_log1p , "dd" , "Fne") BUILTIN(__builtin_log1pf, "ff" , "Fne") BUILTIN(__builtin_log1pl, "LdLd", "Fne") BUILTIN(__builtin_log2, "dd" , "Fne") BUILTIN(__builtin_log2f, "ff" , "Fne") +BUILTIN(__builtin_log2f16, "hh" , "Fne") BUILTIN(__builtin_log2l, "LdLd" , "Fne") BUILTIN(__builtin_logb , "dd", "Fne") BUILTIN(__builtin_logbf, "ff", "Fne") BUILTIN(__builtin_logbl, "LdLd", "Fne") BUILTIN(__builtin_logf, "ff" , "Fne") +BUILTIN(__builtin_logf16, "hh" , "Fne") BUILTIN(__builtin_logl, "LdLd", "Fne") BUILTIN(__builtin_lrint , "Lid", "Fne") BUILTIN(__builtin_lrintf, "Lif", "Fne") @@ -270,9 +285,11 @@ BUILTIN(__builtin_remquof, "fffi*", "Fn") BUILTIN(__builtin_remquol, "LdLdLdi*", "Fn") BUILTIN(__builtin_rint , "dd", "Fnc") BUILTIN(__builtin_rintf, "ff", "Fnc") +BUILTIN(__builtin_rintf16, "hh", "Fnc") BUILTIN(__builtin_rintl, "LdLd", "Fnc") BUILTIN(__builtin_round, "dd" , "Fnc") BUILTIN(__builtin_roundf, "ff" , "Fnc") +BUILTIN(__builtin_roundf16, "hh" , "Fnc") BUILTIN(__builtin_roundl, "LdLd" , "Fnc") BUILTIN(__builtin_scalbln , "ddLi", "Fne") BUILTIN(__builtin_scalblnf, "ffLi", "Fne") @@ -282,12 +299,14 @@ BUILTIN(__builtin_scalbnf, "ffi", "Fne") BUILTIN(__builtin_scalbnl, "LdLdi", "Fne") BUILTIN(__builtin_sin , "dd" , "Fne") BUILTIN(__builtin_sinf, "ff" , "Fne") +BUILTIN(__builtin_sinf16, "hh" , "Fne") BUILTIN(__builtin_sinh , "dd" , "Fne") BUILTIN(__builtin_sinhf, "ff" , "Fne") BUILTIN(__builtin_sinhl, "LdLd", "Fne") BUILTIN(__builtin_sinl, "LdLd", "Fne") BUILTIN(__builtin_sqrt , "dd" , "Fne") BUILTIN(__builtin_sqrtf, "ff" , "Fne") +BUILTIN(__builtin_sqrtf16, "hh" , "Fne") BUILTIN(__builtin_sqrtl, "LdLd", "Fne") BUILTIN(__builtin_tan , "dd" , "Fne") BUILTIN(__builtin_tanf, "ff" , "Fne") @@ -301,6 +320,7 @@ BUILTIN(__builtin_tgammal, "LdLd", "Fne") BUILTIN(__builtin_trunc , "dd", "Fnc") BUILTIN(__builtin_truncf, "ff", "Fnc") BUILTIN(__builtin_truncl, "LdLd", "Fnc") +BUILTIN(__builtin_truncf16, "hh", "Fnc") // C99 complex builtins BUILTIN(__builtin_cabs, "dXd", "Fne") @@ -394,6 +414,7 @@ BUILTIN(__builtin_signbitl, "iLd", "Fnc") // Special FP builtins. BUILTIN(__builtin_canonicalize, "dd", "nc") BUILTIN(__builtin_canonicalizef, "ff", "nc") +BUILTIN(__builtin_canonicalizef16, "hh", "nc") BUILTIN(__builtin_canonicalizel, "LdLd", "nc") // Builtins for arithmetic. @@ -440,7 +461,7 @@ BUILTIN(__builtin_rotateleft64, "UWiUWiUWi", "nc") BUILTIN(__builtin_rotateright8, "UcUcUc", "nc") BUILTIN(__builtin_rotateright16, "UsUsUs", "nc") BUILTIN(__builtin_rotateright32, "UZiUZiUZi", "nc") -BUILTIN(__builtin_rotateright64, "UWiUWiWi", "nc") +BUILTIN(__builtin_rotateright64, "UWiUWiUWi", "nc") // Random GCC builtins BUILTIN(__builtin_constant_p, "i.", "nctu") @@ -984,9 +1005,7 @@ LIBBUILTIN(pthread_create, "", "fC<2,3>", "pthread.h", ALL_GNU_LANGUAGES) LIBBUILTIN(_setjmp, "iJ", "fj", "setjmp.h", ALL_LANGUAGES) LIBBUILTIN(__sigsetjmp, "iSJi", "fj", "setjmp.h", ALL_LANGUAGES) LIBBUILTIN(sigsetjmp, "iSJi", "fj", "setjmp.h", ALL_LANGUAGES) -LIBBUILTIN(setjmp_syscall, "iJ", "fj", "setjmp.h", ALL_LANGUAGES) LIBBUILTIN(savectx, "iJ", "fj", "setjmp.h", ALL_LANGUAGES) -LIBBUILTIN(qsetjmp, "iJ", "fj", "setjmp.h", ALL_LANGUAGES) LIBBUILTIN(getcontext, "iK*", "fj", "setjmp.h", ALL_LANGUAGES) LIBBUILTIN(_longjmp, "vJi", "fr", "setjmp.h", ALL_GNU_LANGUAGES) @@ -1449,7 +1468,7 @@ BUILTIN(__builtin_operator_new, "v*z", "tc") BUILTIN(__builtin_operator_delete, "vv*", "tn") BUILTIN(__builtin_char_memchr, "c*cC*iz", "n") BUILTIN(__builtin_dump_struct, "ivC*v*", "tn") -BUILTIN(__builtin_preserve_access_index, "vC*vC*", "nU") +BUILTIN(__builtin_preserve_access_index, "v.", "t") // Safestack builtins BUILTIN(__builtin___get_unsafe_stack_start, "v*", "Fn") diff --git a/include/clang/Basic/BuiltinsAArch64.def b/include/clang/Basic/BuiltinsAArch64.def index 7701ad98f48..4df8d5a1676 100644 --- a/include/clang/Basic/BuiltinsAArch64.def +++ b/include/clang/Basic/BuiltinsAArch64.def @@ -91,12 +91,18 @@ LANGBUILTIN(__sevl, "v", "", ALL_MS_LANGUAGES) // Misc BUILTIN(__builtin_sponentry, "v*", "c") +// Transactional Memory Extension +BUILTIN(__builtin_arm_tstart, "WUi", "nj") +BUILTIN(__builtin_arm_tcommit, "v", "n") +BUILTIN(__builtin_arm_tcancel, "vWUIi", "n") +BUILTIN(__builtin_arm_ttest, "WUi", "nc") + TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_BitScanForward64, "UcUNi*ULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_BitScanReverse64, "UcUNi*ULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedAdd, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedAdd, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedAnd64, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedDecrement64, "LLiLLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchange64, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") @@ -106,9 +112,9 @@ TARGET_HEADER_BUILTIN(_InterlockedIncrement64, "LLiLLiD*", "nh", "intrin.h" TARGET_HEADER_BUILTIN(_InterlockedOr64, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedXor64, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd_acq, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd_rel, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd_nf, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd_acq, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd_rel, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd_nf, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd8_acq, "ccD*c", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd8_rel, "ccD*c", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd8_nf, "ccD*c", "nh", "intrin.h", ALL_MS_LANGUAGES, "") @@ -125,9 +131,9 @@ TARGET_HEADER_BUILTIN(_InterlockedExchange8_rel, "ccD*c", "nh", "intrin.h TARGET_HEADER_BUILTIN(_InterlockedExchange16_acq, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchange16_nf, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchange16_rel, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedExchange_acq, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedExchange_nf, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedExchange_rel, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedExchange_acq, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedExchange_nf, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedExchange_rel, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchange64_acq, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchange64_nf, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchange64_rel, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") @@ -138,9 +144,9 @@ TARGET_HEADER_BUILTIN(_InterlockedCompareExchange8_rel, "ccD*cc", "nh", TARGET_HEADER_BUILTIN(_InterlockedCompareExchange16_acq, "ssD*ss", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedCompareExchange16_nf, "ssD*ss", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedCompareExchange16_rel, "ssD*ss", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedCompareExchange_acq, "LiLiD*LiLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedCompareExchange_nf, "LiLiD*LiLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedCompareExchange_rel, "LiLiD*LiLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedCompareExchange_acq, "NiNiD*NiNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedCompareExchange_nf, "NiNiD*NiNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedCompareExchange_rel, "NiNiD*NiNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedCompareExchange64_acq, "LLiLLiD*LLiLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedCompareExchange64_nf, "LLiLLiD*LLiLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedCompareExchange64_rel, "LLiLLiD*LLiLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") @@ -151,9 +157,9 @@ TARGET_HEADER_BUILTIN(_InterlockedOr8_rel, "ccD*c", "nh", "intrin.h", ALL TARGET_HEADER_BUILTIN(_InterlockedOr16_acq, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedOr16_nf, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedOr16_rel, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedOr_acq, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedOr_nf, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedOr_rel, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedOr_acq, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedOr_nf, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedOr_rel, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedOr64_acq, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedOr64_nf, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedOr64_rel, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") @@ -164,9 +170,9 @@ TARGET_HEADER_BUILTIN(_InterlockedXor8_rel, "ccD*c", "nh", "intrin.h", AL TARGET_HEADER_BUILTIN(_InterlockedXor16_acq, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedXor16_nf, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedXor16_rel, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedXor_acq, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedXor_nf, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedXor_rel, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedXor_acq, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedXor_nf, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedXor_rel, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedXor64_acq, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedXor64_nf, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedXor64_rel, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") @@ -177,9 +183,9 @@ TARGET_HEADER_BUILTIN(_InterlockedAnd8_rel, "ccD*c", "nh", "intrin.h", AL TARGET_HEADER_BUILTIN(_InterlockedAnd16_acq, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedAnd16_nf, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedAnd16_rel, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedAnd_acq, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedAnd_nf, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedAnd_rel, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedAnd_acq, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedAnd_nf, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedAnd_rel, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedAnd64_acq, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedAnd64_nf, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedAnd64_rel, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") @@ -187,9 +193,9 @@ TARGET_HEADER_BUILTIN(_InterlockedAnd64_rel, "LLiLLiD*LLi", "nh", "intrin.h", AL TARGET_HEADER_BUILTIN(_InterlockedIncrement16_acq, "ssD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedIncrement16_nf, "ssD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedIncrement16_rel, "ssD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedIncrement_acq, "LiLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedIncrement_nf, "LiLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedIncrement_rel, "LiLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedIncrement_acq, "NiNiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedIncrement_nf, "NiNiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedIncrement_rel, "NiNiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedIncrement64_acq, "LLiLLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedIncrement64_nf, "LLiLLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedIncrement64_rel, "LLiLLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") @@ -197,9 +203,9 @@ TARGET_HEADER_BUILTIN(_InterlockedIncrement64_rel, "LLiLLiD*", "nh", "intrin.h", TARGET_HEADER_BUILTIN(_InterlockedDecrement16_acq, "ssD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedDecrement16_nf, "ssD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedDecrement16_rel, "ssD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedDecrement_acq, "LiLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedDecrement_nf, "LiLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedDecrement_rel, "LiLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedDecrement_acq, "NiNiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedDecrement_nf, "NiNiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedDecrement_rel, "NiNiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedDecrement64_acq, "LLiLLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedDecrement64_nf, "LLiLLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedDecrement64_rel, "LLiLLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") diff --git a/include/clang/Basic/BuiltinsAMDGPU.def b/include/clang/Basic/BuiltinsAMDGPU.def index 2f8fb9000a7..9b3a0f96798 100644 --- a/include/clang/Basic/BuiltinsAMDGPU.def +++ b/include/clang/Basic/BuiltinsAMDGPU.def @@ -118,6 +118,13 @@ BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc") BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc") BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc") BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc") +BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc") +BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc") +BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc") +BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc") +BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "LUiLUiUiLUi", "nc") +BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "LUiLUiUiLUi", "nc") +BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiLUiUiV4Ui", "nc") //===----------------------------------------------------------------------===// // CI+ only builtins. @@ -125,6 +132,8 @@ BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc") TARGET_BUILTIN(__builtin_amdgcn_s_dcache_inv_vol, "v", "n", "ci-insts") TARGET_BUILTIN(__builtin_amdgcn_buffer_wbinvl1_vol, "v", "n", "ci-insts") TARGET_BUILTIN(__builtin_amdgcn_ds_gws_sema_release_all, "vUi", "n", "ci-insts") +TARGET_BUILTIN(__builtin_amdgcn_is_shared, "bvC*0", "nc", "flat-address-space") +TARGET_BUILTIN(__builtin_amdgcn_is_private, "bvC*0", "nc", "flat-address-space") //===----------------------------------------------------------------------===// // Interpolation builtins. diff --git a/include/clang/Basic/BuiltinsARM.def b/include/clang/Basic/BuiltinsARM.def index 3f0765115b1..19b51694aa2 100644 --- a/include/clang/Basic/BuiltinsARM.def +++ b/include/clang/Basic/BuiltinsARM.def @@ -221,9 +221,9 @@ TARGET_HEADER_BUILTIN(_InterlockedIncrement64, "LLiLLiD*", "nh", "intrin.h" TARGET_HEADER_BUILTIN(_InterlockedOr64, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedXor64, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd_acq, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd_rel, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd_nf, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd_acq, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd_rel, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd_nf, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd8_acq, "ccD*c", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd8_rel, "ccD*c", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchangeAdd8_nf, "ccD*c", "nh", "intrin.h", ALL_MS_LANGUAGES, "") @@ -240,9 +240,9 @@ TARGET_HEADER_BUILTIN(_InterlockedExchange8_rel, "ccD*c", "nh", "intrin.h TARGET_HEADER_BUILTIN(_InterlockedExchange16_acq, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchange16_nf, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchange16_rel, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedExchange_acq, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedExchange_nf, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedExchange_rel, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedExchange_acq, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedExchange_nf, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedExchange_rel, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchange64_acq, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchange64_nf, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchange64_rel, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") @@ -253,9 +253,9 @@ TARGET_HEADER_BUILTIN(_InterlockedCompareExchange8_rel, "ccD*cc", "nh", TARGET_HEADER_BUILTIN(_InterlockedCompareExchange16_acq, "ssD*ss", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedCompareExchange16_nf, "ssD*ss", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedCompareExchange16_rel, "ssD*ss", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedCompareExchange_acq, "LiLiD*LiLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedCompareExchange_nf, "LiLiD*LiLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedCompareExchange_rel, "LiLiD*LiLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedCompareExchange_acq, "NiNiD*NiNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedCompareExchange_nf, "NiNiD*NiNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedCompareExchange_rel, "NiNiD*NiNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedCompareExchange64_acq, "LLiLLiD*LLiLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedCompareExchange64_nf, "LLiLLiD*LLiLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedCompareExchange64_rel, "LLiLLiD*LLiLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") @@ -266,9 +266,9 @@ TARGET_HEADER_BUILTIN(_InterlockedOr8_rel, "ccD*c", "nh", "intrin.h", ALL TARGET_HEADER_BUILTIN(_InterlockedOr16_acq, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedOr16_nf, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedOr16_rel, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedOr_acq, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedOr_nf, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedOr_rel, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedOr_acq, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedOr_nf, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedOr_rel, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedOr64_acq, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedOr64_nf, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedOr64_rel, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") @@ -279,9 +279,9 @@ TARGET_HEADER_BUILTIN(_InterlockedXor8_rel, "ccD*c", "nh", "intrin.h", AL TARGET_HEADER_BUILTIN(_InterlockedXor16_acq, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedXor16_nf, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedXor16_rel, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedXor_acq, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedXor_nf, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedXor_rel, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedXor_acq, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedXor_nf, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedXor_rel, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedXor64_acq, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedXor64_nf, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedXor64_rel, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") @@ -292,9 +292,9 @@ TARGET_HEADER_BUILTIN(_InterlockedAnd8_rel, "ccD*c", "nh", "intrin.h", AL TARGET_HEADER_BUILTIN(_InterlockedAnd16_acq, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedAnd16_nf, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedAnd16_rel, "ssD*s", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedAnd_acq, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedAnd_nf, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedAnd_rel, "LiLiD*Li", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedAnd_acq, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedAnd_nf, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedAnd_rel, "NiNiD*Ni", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedAnd64_acq, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedAnd64_nf, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedAnd64_rel, "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") @@ -302,9 +302,9 @@ TARGET_HEADER_BUILTIN(_InterlockedAnd64_rel, "LLiLLiD*LLi", "nh", "intrin.h", AL TARGET_HEADER_BUILTIN(_InterlockedIncrement16_acq, "ssD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedIncrement16_nf, "ssD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedIncrement16_rel, "ssD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedIncrement_acq, "LiLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedIncrement_nf, "LiLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedIncrement_rel, "LiLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedIncrement_acq, "NiNiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedIncrement_nf, "NiNiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedIncrement_rel, "NiNiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedIncrement64_acq, "LLiLLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedIncrement64_nf, "LLiLLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedIncrement64_rel, "LLiLLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") @@ -312,9 +312,9 @@ TARGET_HEADER_BUILTIN(_InterlockedIncrement64_rel, "LLiLLiD*", "nh", "intrin.h", TARGET_HEADER_BUILTIN(_InterlockedDecrement16_acq, "ssD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedDecrement16_nf, "ssD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedDecrement16_rel, "ssD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedDecrement_acq, "LiLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedDecrement_nf, "LiLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_InterlockedDecrement_rel, "LiLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedDecrement_acq, "NiNiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedDecrement_nf, "NiNiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_InterlockedDecrement_rel, "NiNiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedDecrement64_acq, "LLiLLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedDecrement64_nf, "LLiLLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedDecrement64_rel, "LLiLLiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") diff --git a/include/clang/Basic/BuiltinsBPF.def b/include/clang/Basic/BuiltinsBPF.def new file mode 100644 index 00000000000..bd96b9ef531 --- /dev/null +++ b/include/clang/Basic/BuiltinsBPF.def @@ -0,0 +1,24 @@ +//===--- BuiltinsBPF.def - BPF Builtin function database --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the BPF-specific builtin function database. Users of +// this file must define the BUILTIN macro to make use of this information. +// +//===----------------------------------------------------------------------===// + +// The format of this database matches clang/Basic/Builtins.def. + +#if defined(BUILTIN) && !defined(TARGET_BUILTIN) +# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) +#endif + +// Get record field information. +TARGET_BUILTIN(__builtin_preserve_field_info, "Ui.", "t", "") + +#undef BUILTIN +#undef TARGET_BUILTIN diff --git a/include/clang/Basic/BuiltinsPPC.def b/include/clang/Basic/BuiltinsPPC.def index 3b6348ad7d7..314e1cc0590 100644 --- a/include/clang/Basic/BuiltinsPPC.def +++ b/include/clang/Basic/BuiltinsPPC.def @@ -50,17 +50,17 @@ BUILTIN(__builtin_altivec_vavguw, "V4UiV4UiV4Ui", "") BUILTIN(__builtin_altivec_vrfip, "V4fV4f", "") -BUILTIN(__builtin_altivec_vcfsx, "V4fV4iIi", "") -BUILTIN(__builtin_altivec_vcfux, "V4fV4iIi", "") +BUILTIN(__builtin_altivec_vcfsx, "V4fV4SiIi", "") +BUILTIN(__builtin_altivec_vcfux, "V4fV4UiIi", "") BUILTIN(__builtin_altivec_vctsxs, "V4SiV4fIi", "") BUILTIN(__builtin_altivec_vctuxs, "V4UiV4fIi", "") -BUILTIN(__builtin_altivec_dss, "vUi", "") +BUILTIN(__builtin_altivec_dss, "vUIi", "") BUILTIN(__builtin_altivec_dssall, "v", "") -BUILTIN(__builtin_altivec_dst, "vvC*iUi", "") -BUILTIN(__builtin_altivec_dstt, "vvC*iUi", "") -BUILTIN(__builtin_altivec_dstst, "vvC*iUi", "") -BUILTIN(__builtin_altivec_dststt, "vvC*iUi", "") +BUILTIN(__builtin_altivec_dst, "vvC*iUIi", "") +BUILTIN(__builtin_altivec_dstt, "vvC*iUIi", "") +BUILTIN(__builtin_altivec_dstst, "vvC*iUIi", "") +BUILTIN(__builtin_altivec_dststt, "vvC*iUIi", "") BUILTIN(__builtin_altivec_vexptefp, "V4fV4f", "") diff --git a/include/clang/Basic/BuiltinsWebAssembly.def b/include/clang/Basic/BuiltinsWebAssembly.def index 63177f016ac..7fed9d2e43e 100644 --- a/include/clang/Basic/BuiltinsWebAssembly.def +++ b/include/clang/Basic/BuiltinsWebAssembly.def @@ -31,6 +31,8 @@ TARGET_BUILTIN(__builtin_wasm_data_drop, "vIUi", "", "bulk-memory") // Thread-local storage TARGET_BUILTIN(__builtin_wasm_tls_size, "z", "nc", "bulk-memory") +TARGET_BUILTIN(__builtin_wasm_tls_align, "z", "nc", "bulk-memory") +TARGET_BUILTIN(__builtin_wasm_tls_base, "v*", "nU", "bulk-memory") // Floating point min/max BUILTIN(__builtin_wasm_min_f32, "fff", "nc") @@ -47,6 +49,16 @@ BUILTIN(__builtin_wasm_atomic_wait_i32, "ii*iLLi", "n") BUILTIN(__builtin_wasm_atomic_wait_i64, "iLLi*LLiLLi", "n") BUILTIN(__builtin_wasm_atomic_notify, "Uii*Ui", "n") +// Trapping fp-to-int conversions +BUILTIN(__builtin_wasm_trunc_s_i32_f32, "if", "nc") +BUILTIN(__builtin_wasm_trunc_u_i32_f32, "if", "nc") +BUILTIN(__builtin_wasm_trunc_s_i32_f64, "id", "nc") +BUILTIN(__builtin_wasm_trunc_u_i32_f64, "id", "nc") +BUILTIN(__builtin_wasm_trunc_s_i64_f32, "LLif", "nc") +BUILTIN(__builtin_wasm_trunc_u_i64_f32, "LLif", "nc") +BUILTIN(__builtin_wasm_trunc_s_i64_f64, "LLid", "nc") +BUILTIN(__builtin_wasm_trunc_u_i64_f64, "LLid", "nc") + // Saturating fp-to-int conversions TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i32_f32, "if", "nc", "nontrapping-fptoint") TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i32_f32, "if", "nc", "nontrapping-fptoint") @@ -58,6 +70,8 @@ TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i64_f64, "LLid", "nc", "nontrappi TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i64_f64, "LLid", "nc", "nontrapping-fptoint") // SIMD builtins +TARGET_BUILTIN(__builtin_wasm_swizzle_v8x16, "V16cV16cV16c", "nc", "unimplemented-simd128") + TARGET_BUILTIN(__builtin_wasm_extract_lane_s_i8x16, "iV16cIi", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_extract_lane_u_i8x16, "iV16cIi", "nc", "unimplemented-simd128") TARGET_BUILTIN(__builtin_wasm_extract_lane_s_i16x8, "iV8sIi", "nc", "simd128") @@ -106,10 +120,29 @@ TARGET_BUILTIN(__builtin_wasm_max_f64x2, "V2dV2dV2d", "nc", "unimplemented-simd1 TARGET_BUILTIN(__builtin_wasm_sqrt_f32x4, "V4fV4f", "nc", "unimplemented-simd128") TARGET_BUILTIN(__builtin_wasm_sqrt_f64x2, "V2dV2d", "nc", "unimplemented-simd128") +TARGET_BUILTIN(__builtin_wasm_qfma_f32x4, "V4fV4fV4fV4f", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_qfms_f32x4, "V4fV4fV4fV4f", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_qfma_f64x2, "V2dV2dV2dV2d", "nc", "unimplemented-simd128") +TARGET_BUILTIN(__builtin_wasm_qfms_f64x2, "V2dV2dV2dV2d", "nc", "unimplemented-simd128") + TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i32x4_f32x4, "V4iV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i32x4_f32x4, "V4iV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i64x2_f64x2, "V2LLiV2d", "nc", "unimplemented-simd128") TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i64x2_f64x2, "V2LLiV2d", "nc", "unimplemented-simd128") +TARGET_BUILTIN(__builtin_wasm_narrow_s_i8x16_i16x8, "V16cV8sV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_narrow_u_i8x16_i16x8, "V16cV8sV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_narrow_s_i16x8_i32x4, "V8sV4iV4i", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_narrow_u_i16x8_i32x4, "V8sV4iV4i", "nc", "simd128") + +TARGET_BUILTIN(__builtin_wasm_widen_low_s_i16x8_i8x16, "V8sV16c", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_widen_high_s_i16x8_i8x16, "V8sV16c", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_widen_low_u_i16x8_i8x16, "V8sV16c", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_widen_high_u_i16x8_i8x16, "V8sV16c", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_widen_low_s_i32x4_i16x8, "V4iV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_widen_high_s_i32x4_i16x8, "V4iV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_widen_low_u_i32x4_i16x8, "V4iV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_widen_high_u_i32x4_i16x8, "V4iV8s", "nc", "simd128") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index a0ba0ecf36b..5ab9dc1c3ac 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -751,8 +751,8 @@ TARGET_BUILTIN(__builtin_ia32_bextri_u32, "UiUiIUi", "nc", "tbm") // LWP TARGET_BUILTIN(__builtin_ia32_llwpcb, "vv*", "n", "lwp") TARGET_BUILTIN(__builtin_ia32_slwpcb, "v*", "n", "lwp") -TARGET_BUILTIN(__builtin_ia32_lwpins32, "UcUiUiUi", "n", "lwp") -TARGET_BUILTIN(__builtin_ia32_lwpval32, "vUiUiUi", "n", "lwp") +TARGET_BUILTIN(__builtin_ia32_lwpins32, "UcUiUiIUi", "n", "lwp") +TARGET_BUILTIN(__builtin_ia32_lwpval32, "vUiUiIUi", "n", "lwp") // SHA TARGET_BUILTIN(__builtin_ia32_sha1rnds4, "V4iV4iV4iIc", "ncV:128:", "sha") diff --git a/include/clang/Basic/BuiltinsX86_64.def b/include/clang/Basic/BuiltinsX86_64.def index 56051af55e7..c535f43203e 100644 --- a/include/clang/Basic/BuiltinsX86_64.def +++ b/include/clang/Basic/BuiltinsX86_64.def @@ -86,8 +86,8 @@ TARGET_BUILTIN(__builtin_ia32_bzhi_di, "UOiUOiUOi", "nc", "bmi2") TARGET_BUILTIN(__builtin_ia32_pdep_di, "UOiUOiUOi", "nc", "bmi2") TARGET_BUILTIN(__builtin_ia32_pext_di, "UOiUOiUOi", "nc", "bmi2") TARGET_BUILTIN(__builtin_ia32_bextri_u64, "UOiUOiIUOi", "nc", "tbm") -TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcUOiUiUi", "n", "lwp") -TARGET_BUILTIN(__builtin_ia32_lwpval64, "vUOiUiUi", "n", "lwp") +TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcUOiUiIUi", "n", "lwp") +TARGET_BUILTIN(__builtin_ia32_lwpval64, "vUOiUiIUi", "n", "lwp") TARGET_BUILTIN(__builtin_ia32_vcvtsd2si64, "OiV2dIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi64, "UOiV2dIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtss2si64, "OiV4fIi", "ncV:128:", "avx512f") diff --git a/include/clang/Basic/CodeGenOptions.def b/include/clang/Basic/CodeGenOptions.def index cd7a8454876..d2266cc2d61 100644 --- a/include/clang/Basic/CodeGenOptions.def +++ b/include/clang/Basic/CodeGenOptions.def @@ -47,7 +47,8 @@ CODEGENOPT(CXXCtorDtorAliases, 1, 0) ///< Emit complete ctors/dtors as linker ///< aliases to base ctors when possible. CODEGENOPT(DataSections , 1, 0) ///< Set when -fdata-sections is enabled. CODEGENOPT(UniqueSectionNames, 1, 1) ///< Set for -funique-section-names. -CODEGENOPT(DisableFPElim , 1, 0) ///< Set when -fomit-frame-pointer is enabled. +ENUM_CODEGENOPT(FramePointer, FramePointerKind, 2, FramePointerKind::None) /// frame-pointer: all,non-leaf,none + CODEGENOPT(DisableFree , 1, 0) ///< Don't free memory. CODEGENOPT(DiscardValueNames , 1, 0) ///< Discard Value Names from the IR (LLVMContext flag) CODEGENOPT(DisableGCov , 1, 0) ///< Don't run the GCov pass, for testing. @@ -132,6 +133,7 @@ CODEGENOPT(NoDwarfDirectoryAsm , 1, 0) ///< Set when -fno-dwarf-directory-asm is CODEGENOPT(NoExecStack , 1, 0) ///< Set when -Wa,--noexecstack is enabled. CODEGENOPT(FatalWarnings , 1, 0) ///< Set when -Wa,--fatal-warnings is ///< enabled. +CODEGENOPT(NoWarn , 1, 0) ///< Set when -Wa,--no-warn is enabled. CODEGENOPT(EnableSegmentedStacks , 1, 0) ///< Set when -fsplit-stack is enabled. CODEGENOPT(NoImplicitFloat , 1, 0) ///< Set when -mno-implicit-float is enabled. CODEGENOPT(NoInfsFPMath , 1, 0) ///< Assume FP arguments, results not +-Inf. @@ -155,8 +157,6 @@ CODEGENOPT(NoZeroInitializedInBSS , 1, 0) ///< -fno-zero-initialized-in-bss. ENUM_CODEGENOPT(ObjCDispatchMethod, ObjCDispatchMethodKind, 2, Legacy) /// Replace certain message sends with calls to ObjC runtime entrypoints CODEGENOPT(ObjCConvertMessagesToRuntimeCalls , 1, 1) -CODEGENOPT(OmitLeafFramePointer , 1, 0) ///< Set when -momit-leaf-frame-pointer is - ///< enabled. VALUE_CODEGENOPT(OptimizationLevel, 2, 0) ///< The -O[0-3] option specified. VALUE_CODEGENOPT(OptimizeSize, 2, 0) ///< If -Os (==1) or -Oz (==2) is specified. @@ -195,6 +195,8 @@ CODEGENOPT(SanitizeMinimalRuntime, 1, 0) ///< Use "_minimal" sanitizer runtime f ///< diagnostics. CODEGENOPT(SanitizeCfiICallGeneralizePointers, 1, 0) ///< Generalize pointer types in ///< CFI icall function signatures +CODEGENOPT(SanitizeCfiCanonicalJumpTables, 1, 0) ///< Make jump table symbols canonical + ///< instead of creating a local jump table. CODEGENOPT(SanitizeCoverageType, 2, 0) ///< Type of sanitizer coverage ///< instrumentation. CODEGENOPT(SanitizeCoverageIndirectCalls, 1, 0) ///< Enable sanitizer coverage @@ -226,6 +228,8 @@ CODEGENOPT(StrictEnums , 1, 0) ///< Optimize based on strict enum definiti CODEGENOPT(StrictVTablePointers, 1, 0) ///< Optimize based on the strict vtable pointers CODEGENOPT(TimePasses , 1, 0) ///< Set when -ftime-report is enabled. CODEGENOPT(TimeTrace , 1, 0) ///< Set when -ftime-trace is enabled. +VALUE_CODEGENOPT(TimeTraceGranularity, 32, 500) ///< Minimum time granularity (in microseconds), + ///< traced by time profiler CODEGENOPT(UnrollLoops , 1, 0) ///< Control whether loops are unrolled. CODEGENOPT(RerollLoops , 1, 0) ///< Control whether loops are rerolled. CODEGENOPT(NoUseJumpTables , 1, 0) ///< Set when -fno-jump-tables is enabled. @@ -274,6 +278,10 @@ CODEGENOPT(EmitLLVMUseLists, 1, 0) ///< Control whether to serialize use-lists. CODEGENOPT(WholeProgramVTables, 1, 0) ///< Whether to apply whole-program /// vtable optimization. +CODEGENOPT(VirtualFunctionElimination, 1, 0) ///< Whether to apply the dead + /// virtual function elimination + /// optimization. + /// Whether to use public LTO visibility for entities in std and stdext /// namespaces. This is enabled by clang-cl's /MT and /MTd flags. CODEGENOPT(LTOVisibilityPublicStd, 1, 0) diff --git a/include/clang/Basic/CodeGenOptions.h b/include/clang/Basic/CodeGenOptions.h index 4e9025d2fea..8881a316d1f 100644 --- a/include/clang/Basic/CodeGenOptions.h +++ b/include/clang/Basic/CodeGenOptions.h @@ -117,6 +117,12 @@ public: enum SignReturnAddressKeyValue { AKey, BKey }; + enum class FramePointerKind { + None, // Omit all frame pointers. + NonLeaf, // Keep non-leaf frame pointers. + All, // Keep all frame pointers. + }; + /// The code model to use (-mcmodel). std::string CodeModel; diff --git a/include/clang/Basic/Diagnostic.h b/include/clang/Basic/Diagnostic.h index 5a707007e46..9e494aa371c 100644 --- a/include/clang/Basic/Diagnostic.h +++ b/include/clang/Basic/Diagnostic.h @@ -632,24 +632,22 @@ public: /// Suppress all diagnostics, to silence the front end when we /// know that we don't want any more diagnostics to be passed along to the /// client - void setSuppressAllDiagnostics(bool Val = true) { - SuppressAllDiagnostics = Val; - } + void setSuppressAllDiagnostics(bool Val) { SuppressAllDiagnostics = Val; } bool getSuppressAllDiagnostics() const { return SuppressAllDiagnostics; } /// Set type eliding, to skip outputting same types occurring in /// template types. - void setElideType(bool Val = true) { ElideType = Val; } + void setElideType(bool Val) { ElideType = Val; } bool getElideType() { return ElideType; } /// Set tree printing, to outputting the template difference in a /// tree format. - void setPrintTemplateTree(bool Val = false) { PrintTemplateTree = Val; } + void setPrintTemplateTree(bool Val) { PrintTemplateTree = Val; } bool getPrintTemplateTree() { return PrintTemplateTree; } /// Set color printing, so the type diffing will inject color markers /// into the output. - void setShowColors(bool Val = false) { ShowColors = Val; } + void setShowColors(bool Val) { ShowColors = Val; } bool getShowColors() { return ShowColors; } /// Specify which overload candidates to show when overload resolution @@ -667,7 +665,7 @@ public: /// the middle of another diagnostic. /// /// This can be used by clients who suppress diagnostics themselves. - void setLastDiagnosticIgnored(bool Ignored = true) { + void setLastDiagnosticIgnored(bool Ignored) { if (LastDiagLevel == DiagnosticIDs::Fatal) FatalErrorOccurred = true; LastDiagLevel = Ignored ? DiagnosticIDs::Ignored : DiagnosticIDs::Warning; @@ -1127,11 +1125,6 @@ public: Emit(); } - /// Retrieve an empty diagnostic builder. - static DiagnosticBuilder getEmpty() { - return {}; - } - /// Forces the diagnostic to be emitted. const DiagnosticBuilder &setForceEmit() const { IsForceEmit = true; diff --git a/include/clang/Basic/DiagnosticASTKinds.td b/include/clang/Basic/DiagnosticASTKinds.td index 23502152b4a..04d767445a8 100644 --- a/include/clang/Basic/DiagnosticASTKinds.td +++ b/include/clang/Basic/DiagnosticASTKinds.td @@ -37,8 +37,9 @@ def note_constexpr_virtual_call : Note< def note_constexpr_pure_virtual_call : Note< "pure virtual function %q0 called">; def note_constexpr_polymorphic_unknown_dynamic_type : Note< - "%select{||||virtual function called on|dynamic_cast applied to|" - "typeid applied to}0 object '%1' whose dynamic type is not constant">; + "%select{|||||virtual function called on|dynamic_cast applied to|" + "typeid applied to|construction of|destruction of}0 object '%1' " + "whose dynamic type is not constant">; def note_constexpr_dynamic_cast_to_reference_failed : Note< "reference dynamic_cast failed: %select{" "static type %1 of operand is a non-public base class of dynamic type %2|" @@ -53,6 +54,9 @@ def note_constexpr_nonliteral : Note< def note_constexpr_non_global : Note< "%select{pointer|reference}0 to %select{|subobject of }1" "%select{temporary|%3}2 is not a constant expression">; +def note_constexpr_dynamic_alloc : Note< + "%select{pointer|reference}0 to %select{|subobject of }1" + "heap-allocated object is not a constant expression">; def note_constexpr_uninitialized : Note< "%select{|sub}0object of type %1 is not initialized">; def note_constexpr_subobject_declared_here : Note< @@ -100,6 +104,7 @@ def note_constexpr_typeid_polymorphic : Note< def note_constexpr_void_comparison : Note< "comparison between unequal pointers to void has unspecified result">; def note_constexpr_temporary_here : Note<"temporary created here">; +def note_constexpr_dynamic_alloc_here : Note<"heap allocation performed here">; def note_constexpr_conditional_never_const : Note< "both arms of conditional operator are unable to produce a " "constant expression">; @@ -109,16 +114,20 @@ def note_constexpr_call_limit_exceeded : Note< "constexpr evaluation hit maximum call limit">; def note_constexpr_step_limit_exceeded : Note< "constexpr evaluation hit maximum step limit; possible infinite loop?">; +def note_constexpr_heap_alloc_limit_exceeded : Note< + "constexpr evaluation hit maximum heap allocation limit">; def note_constexpr_this : Note< "%select{|implicit }0use of 'this' pointer is only allowed within the " "evaluation of a call to a 'constexpr' member function">; def note_constexpr_lifetime_ended : Note< - "%select{read of|assignment to|increment of|decrement of|member call on|" - "dynamic_cast of|typeid applied to}0 " - "%select{temporary|variable}1 whose lifetime has ended">; + "%select{read of|read of|assignment to|increment of|decrement of|" + "member call on|dynamic_cast of|typeid applied to|construction of|" + "destruction of}0 %select{temporary|variable}1 whose " + "%plural{8:storage duration|:lifetime}0 has ended">; def note_constexpr_access_uninit : Note< - "%select{read of|assignment to|increment of|decrement of|member call on|" - "dynamic_cast of|typeid applied to}0 " + "%select{read of|read of|assignment to|increment of|decrement of|" + "member call on|dynamic_cast of|typeid applied to|" + "construction of subobject of|destruction of}0 " "%select{object outside its lifetime|uninitialized object}1 " "is not allowed in a constant expression">; def note_constexpr_use_uninit_reference : Note< @@ -128,16 +137,21 @@ def note_constexpr_modify_const_type : Note< "modification of object of const-qualified type %0 is not allowed " "in a constant expression">; def note_constexpr_access_volatile_type : Note< - "%select{read of|assignment to|increment of|decrement of||}0 " + "%select{read of|read of|assignment to|increment of|decrement of|" + "|||}0 " "volatile-qualified type %1 is not allowed in a constant expression">; def note_constexpr_access_volatile_obj : Note< - "%select{read of|assignment to|increment of|decrement of||}0 " + "%select{read of|read of|assignment to|increment of|decrement of|" + "|||}0 " "volatile %select{temporary|object %2|member %2}1 is not allowed in " "a constant expression">; def note_constexpr_volatile_here : Note< "volatile %select{temporary created|object declared|member declared}0 here">; -def note_constexpr_ltor_mutable : Note< - "read of mutable member %0 is not allowed in a constant expression">; +def note_constexpr_access_mutable : Note< + "%select{read of|read of|assignment to|increment of|decrement of|" + "member call on|dynamic_cast of|typeid applied to|construction of|" + "destruction of}0 " + "mutable member %1 is not allowed in a constant expression">; def note_constexpr_ltor_non_const_int : Note< "read of non-const variable %0 is not allowed in a constant expression">; def note_constexpr_ltor_non_constexpr : Note< @@ -145,31 +159,44 @@ def note_constexpr_ltor_non_constexpr : Note< def note_constexpr_ltor_incomplete_type : Note< "read of incomplete type %0 is not allowed in a constant expression">; def note_constexpr_access_null : Note< - "%select{read of|assignment to|increment of|decrement of|member call on|" - "dynamic_cast of|typeid applied to}0 " + "%select{read of|read of|assignment to|increment of|decrement of|" + "member call on|dynamic_cast of|typeid applied to|construction of|" + "destruction of}0 " "dereferenced null pointer is not allowed in a constant expression">; def note_constexpr_access_past_end : Note< - "%select{read of|assignment to|increment of|decrement of|member call on|" - "dynamic_cast of|typeid applied to}0 " - "dereferenced one-past-the-end pointer is not allowed in a constant expression">; + "%select{read of|read of|assignment to|increment of|decrement of|" + "member call on|dynamic_cast of|typeid applied to|construction of|" + "destruction of}0 " + "dereferenced one-past-the-end pointer is not allowed " + "in a constant expression">; def note_constexpr_access_unsized_array : Note< - "%select{read of|assignment to|increment of|decrement of|member call on|" - "dynamic_cast of|typeid applied to}0 " + "%select{read of|read of|assignment to|increment of|decrement of|" + "member call on|dynamic_cast of|typeid applied to|construction of|" + "destruction of}0 " "element of array without known bound " "is not allowed in a constant expression">; def note_constexpr_access_inactive_union_member : Note< - "%select{read of|assignment to|increment of|decrement of|member call on|" - "dynamic_cast of|typeid applied to}0 " + "%select{read of|read of|assignment to|increment of|decrement of|" + "member call on|dynamic_cast of|typeid applied to|" + "construction of subobject of|destruction of}0 " "member %1 of union with %select{active member %3|no active member}2 " "is not allowed in a constant expression">; def note_constexpr_access_static_temporary : Note< - "%select{read of|assignment to|increment of|decrement of|member call on|" - "dynamic_cast of|typeid applied to}0 temporary " + "%select{read of|read of|assignment to|increment of|decrement of|" + "member call on|dynamic_cast of|typeid applied to|reconstruction of|" + "destruction of}0 temporary " "is not allowed in a constant expression outside the expression that " "created the temporary">; def note_constexpr_access_unreadable_object : Note< - "%select{read of|assignment to|increment of|decrement of|member call on|" - "dynamic_cast of|typeid applied to}0 object '%1' whose value is not known">; + "%select{read of|read of|assignment to|increment of|decrement of|" + "member call on|dynamic_cast of|typeid applied to|construction of|" + "destruction of}0 " + "object '%1' whose value is not known">; +def note_constexpr_access_deleted_object : Note< + "%select{read of|read of|assignment to|increment of|decrement of|" + "member call on|dynamic_cast of|typeid applied to|construction of|" + "destruction of}0 " + "heap allocated object that has been deleted">; def note_constexpr_modify_global : Note< "a constant expression cannot modify an object that is visible outside " "that expression">; @@ -189,6 +216,13 @@ def note_constexpr_baa_insufficient_alignment : Note< def note_constexpr_baa_value_insufficient_alignment : Note< "value of the aligned pointer (%0) is not a multiple of the asserted %1 " "%plural{1:byte|:bytes}1">; +def note_constexpr_destroy_out_of_lifetime : Note< + "destroying object '%0' whose lifetime has already ended">; +def note_constexpr_unsupported_destruction : Note< + "non-trivial destruction of type %0 in a constant expression is not supported">; +def note_constexpr_unsupported_tempoarary_nontrivial_dtor : Note< + "non-trivial destruction of lifetime-extended temporary with type %0 " + "used in the result of a constant expression is not yet supported">; def note_constexpr_unsupported_unsized_array : Note< "array-to-pointer decay of array member without known bound is not supported">; def note_constexpr_unsized_array_indexed : Note< @@ -228,6 +262,62 @@ def note_constexpr_bit_cast_invalid_subtype : Note< def note_constexpr_bit_cast_indet_dest : Note< "indeterminate value can only initialize an object of type 'unsigned char'" "%select{, 'char',|}1 or 'std::byte'; %0 is invalid">; +def note_constexpr_pseudo_destructor : Note< + "pseudo-destructor call is not permitted in constant expressions " + "until C++20">; +def note_constexpr_construct_complex_elem : Note< + "construction of individual component of complex number is not yet supported " + "in constant expressions">; +def note_constexpr_destroy_complex_elem : Note< + "destruction of individual component of complex number is not yet supported " + "in constant expressions">; +def note_constexpr_new : Note< + "dynamic memory allocation is not permitted in constant expressions " + "until C++20">; +def note_constexpr_new_non_replaceable : Note< + "call to %select{placement|class-specific}0 %1">; +def note_constexpr_new_placement : Note< + "this placement new expression is not yet supported in constant expressions">; +def note_constexpr_placement_new_wrong_type : Note< + "placement new would change type of storage from %0 to %1">; +def note_constexpr_new_negative : Note< + "cannot allocate array; evaluated array bound %0 is negative">; +def note_constexpr_new_too_large : Note< + "cannot allocate array; evaluated array bound %0 is too large">; +def note_constexpr_new_too_small : Note< + "cannot allocate array; evaluated array bound %0 is too small to hold " + "%1 explicitly initialized elements">; +def note_constexpr_new_untyped : Note< + "cannot allocate untyped memory in a constant expression; " + "use 'std::allocator::allocate' to allocate memory of type 'T'">; +def note_constexpr_new_not_complete_object_type : Note< + "cannot allocate memory of %select{incomplete|function}0 type %1">; +def note_constexpr_operator_new_bad_size : Note< + "allocated size %0 is not a multiple of size %1 of element type %2">; +def note_constexpr_delete_not_heap_alloc : Note< + "delete of pointer '%0' that does not point to a heap-allocated object">; +def note_constexpr_double_delete : Note< + "delete of pointer that has already been deleted">; +def note_constexpr_double_destroy : Note< + "destruction of object that is already being destroyed">; +def note_constexpr_new_delete_mismatch : Note< + "%plural{2:'delete' used to delete pointer to object " + "allocated with 'std::allocator<...>::allocate'|" + ":%select{non-array delete|array delete|'std::allocator<...>::deallocate'}0 " + "used to delete pointer to " + "%select{array object of type %2|non-array object of type %2|" + "object allocated with 'new'}0}1">; +def note_constexpr_delete_subobject : Note< + "delete of pointer%select{ to subobject|}1 '%0' " + "%select{|that does not point to complete object}1">; +def note_constexpr_delete_base_nonvirt_dtor : Note< + "delete of object with dynamic type %1 through pointer to " + "base class type %0 with non-virtual destructor">; +def note_constexpr_memory_leak : Note< + "allocation performed here was not deallocated" + "%plural{0:|: (along with %0 other memory leak%s0)}0">; +def err_experimental_clang_interp_failed : Error< + "the experimental clang interpreter failed to evaluate an expression">; def warn_integer_constant_overflow : Warning< "overflow in expression; result is %0 with type %1">, @@ -277,7 +367,6 @@ def warn_odr_variable_multiple_def : Warning< "external variable %0 defined in multiple translation units">, InGroup; def note_odr_value_here : Note<"declared here with type %0">; -def note_odr_defined_here : Note<"also defined here">; def err_odr_function_type_inconsistent : Error< "external function %0 declared with incompatible types in different " "translation units (%1 vs. %2)">; diff --git a/include/clang/Basic/DiagnosticCommentKinds.td b/include/clang/Basic/DiagnosticCommentKinds.td index fcda3f3a211..c577ac40853 100644 --- a/include/clang/Basic/DiagnosticCommentKinds.td +++ b/include/clang/Basic/DiagnosticCommentKinds.td @@ -153,6 +153,12 @@ def warn_doc_deprecated_not_sync : Warning< def note_add_deprecation_attr : Note< "add a deprecation attribute to the declaration to silence this warning">; +// inline contents commands + +def warn_doc_inline_contents_no_argument : Warning< + "'%select{\\|@}0%1' command does not have a valid word argument">, + InGroup, DefaultIgnore; + // verbatim block commands def warn_verbatim_block_end_without_start : Warning< diff --git a/include/clang/Basic/DiagnosticCommonKinds.td b/include/clang/Basic/DiagnosticCommonKinds.td index ca2faf59d70..484cc317f96 100644 --- a/include/clang/Basic/DiagnosticCommonKinds.td +++ b/include/clang/Basic/DiagnosticCommonKinds.td @@ -12,11 +12,21 @@ let Component = "Common" in { +// Substitutions. + +def select_constexpr_spec_kind : TextSubstitution< + "%select{|constexpr|consteval|constinit}0">; + // Basic. def fatal_too_many_errors : Error<"too many errors emitted, stopping now">, DefaultFatal; +def warn_stack_exhausted : Warning< + "stack nearly exhausted; compilation time may suffer, and " + "crashes due to stack overflow are likely">, + InGroup>, NoSFINAE; + def note_declared_at : Note<"declared here">; def note_previous_definition : Note<"previous definition is here">; def note_previous_declaration : Note<"previous declaration is here">; @@ -107,6 +117,10 @@ def err_attribute_not_type_attr : Error< "%0 attribute cannot be applied to types">; def err_enum_template : Error<"enumeration cannot be a template">; +def warn_cxx20_compat_consteval : Warning< + "'consteval' specifier is incompatible with C++ standards before C++20">, + InGroup, DefaultIgnore; + } let CategoryName = "Nullability Issue" in { @@ -172,9 +186,6 @@ def ext_cxx11_longlong : Extension< def warn_cxx98_compat_longlong : Warning< "'long long' is incompatible with C++98">, InGroup, DefaultIgnore; -def warn_cxx20_compat_consteval : Warning< - "consteval is incompatible with C++ standards before C++20">, - InGroup, DefaultIgnore; def err_integer_literal_too_large : Error< "integer literal is too large to be represented in any %select{signed |}0" "integer type">; @@ -259,8 +270,6 @@ def err_target_unsupported_mcmse : Error< "-mcmse is not supported for %0">; def err_opt_not_valid_with_opt : Error< "option '%0' cannot be specified with '%1'">; -def err_opt_not_valid_without_opt : Error< - "option '%0' cannot be specified without '%1'">; def err_opt_not_valid_on_target : Error< "option '%0' cannot be specified on this target">; @@ -300,8 +309,13 @@ def err_omp_more_one_clause : Error< "directive '#pragma omp %0' cannot contain more than one '%1' clause%select{| with '%3' name modifier| with 'source' dependence}2">; // Static Analyzer Core -def err_unknown_analyzer_checker : Error< +def err_unknown_analyzer_checker_or_package : Error< "no analyzer checkers or packages are associated with '%0'">; def note_suggest_disabling_all_checkers : Note< "use -analyzer-disable-all-checks to disable all static analyzer checkers">; + +// Poison system directories. +def warn_poison_system_directories : Warning < + "include location '%0' is unsafe for cross-compilation">, + InGroup>, DefaultIgnore; } diff --git a/include/clang/Basic/DiagnosticDriverKinds.td b/include/clang/Basic/DiagnosticDriverKinds.td index eab453ee20e..5ff03e13356 100644 --- a/include/clang/Basic/DiagnosticDriverKinds.td +++ b/include/clang/Basic/DiagnosticDriverKinds.td @@ -184,9 +184,6 @@ def warn_drv_unknown_argument_clang_cl_with_suggestion : Warning< def warn_drv_ycyu_different_arg_clang_cl : Warning< "support for '/Yc' and '/Yu' with different filenames not implemented yet; flags ignored">, InGroup; -def warn_drv_ycyu_no_fi_arg_clang_cl : Warning< - "support for '%0' without a corresponding /FI flag not implemented yet; flag ignored">, - InGroup; def warn_drv_yc_multiple_inputs_clang_cl : Warning< "support for '/Yc' with more than one source file not implemented yet; flag ignored">, InGroup; @@ -368,6 +365,9 @@ def err_drv_ropi_rwpi_incompatible_with_pic : Error< def err_drv_ropi_incompatible_with_cxx : Error< "ROPI is not compatible with c++">; +def err_stack_tagging_requires_hardware_feature : Error< + "'-fsanitize=memtag' requires hardware support (+memtag)">; + def warn_target_unsupported_nan2008 : Warning< "ignoring '-mnan=2008' option because the '%0' architecture does not support it">, InGroup; @@ -383,6 +383,9 @@ def warn_target_unsupported_abs2008 : Warning< def warn_target_unsupported_compact_branches : Warning< "ignoring '-mcompact-branches=' option because the '%0' architecture does not" " support it">, InGroup; +def warn_target_unsupported_extension : Warning< + "ignoring extension '%0' because the '%1' architecture does not support it">, + InGroup; def warn_drv_unsupported_gpopt : Warning< "ignoring '-mgpopt' option as it cannot be used with %select{|the implicit" " usage of }0-mabicalls">, diff --git a/include/clang/Basic/DiagnosticFrontendKinds.td b/include/clang/Basic/DiagnosticFrontendKinds.td index 7a990164b0d..a798b498d4e 100644 --- a/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/include/clang/Basic/DiagnosticFrontendKinds.td @@ -64,8 +64,6 @@ def err_fe_backend_unsupported : Error<"%0">, BackendInfo; def err_fe_invalid_code_complete_file : Error< "cannot locate code-completion file %0">, DefaultFatal; -def err_fe_stdout_binary : Error<"unable to change standard output to binary">, - DefaultFatal; def err_fe_dependency_file_requires_MT : Error< "-dependency-file requires at least one -MT or -MQ option">; def err_fe_invalid_plugin_name : Error< @@ -217,10 +215,6 @@ def err_modules_embed_file_not_found : DefaultFatal; def err_module_header_file_not_found : Error<"module header file '%0' not found">, DefaultFatal; -def err_module_header_file_invalid : - Error<"unexpected module header file input '%0'">, DefaultFatal; - -def err_interface_stubs : Error<"clang-ifs (-emit-iterface-stubs): %0">; def err_test_module_file_extension_version : Error< "test module file extension '%0' has different version (%1.%2) than expected " @@ -275,7 +269,12 @@ def warn_profile_data_missing : Warning< def warn_profile_data_unprofiled : Warning< "no profile data available for file \"%0\"">, InGroup; - +def warn_profile_data_misexpect : Warning< + "Potential performance regression from use of __builtin_expect(): " + "Annotation was correct on %0 of profiled executions.">, + BackendInfo, + InGroup, + DefaultIgnore; } // end of instrumentation issue category } diff --git a/include/clang/Basic/DiagnosticGroups.td b/include/clang/Basic/DiagnosticGroups.td index 56f2ecfe8e4..92805953955 100644 --- a/include/clang/Basic/DiagnosticGroups.td +++ b/include/clang/Basic/DiagnosticGroups.td @@ -61,8 +61,16 @@ def BoolConversion : DiagGroup<"bool-conversion", [PointerBoolConversion, UndefinedBoolConversion]>; def IntConversion : DiagGroup<"int-conversion">; def EnumConversion : DiagGroup<"enum-conversion">; -def ImplicitIntConversion : DiagGroup<"implicit-int-conversion">; -def ImplicitFloatConversion : DiagGroup<"implicit-float-conversion">; +def ObjCSignedCharBoolImplicitIntConversion : + DiagGroup<"objc-signed-char-bool-implicit-int-conversion">; +def ImplicitIntConversion : DiagGroup<"implicit-int-conversion", + [ObjCSignedCharBoolImplicitIntConversion]>; +def ImplicitIntFloatConversion : DiagGroup<"implicit-int-float-conversion">; +def ObjCSignedCharBoolImplicitFloatConversion : + DiagGroup<"objc-signed-char-bool-implicit-float-conversion">; +def ImplicitFloatConversion : DiagGroup<"implicit-float-conversion", + [ImplicitIntFloatConversion, + ObjCSignedCharBoolImplicitFloatConversion]>; def ImplicitFixedPointConversion : DiagGroup<"implicit-fixed-point-conversion">; def FloatOverflowConversion : DiagGroup<"float-overflow-conversion">; @@ -99,7 +107,6 @@ def GNUComplexInteger : DiagGroup<"gnu-complex-integer">; def GNUConditionalOmittedOperand : DiagGroup<"gnu-conditional-omitted-operand">; def ConfigMacros : DiagGroup<"config-macros">; def : DiagGroup<"ctor-dtor-privacy">; -def GNUDesignator : DiagGroup<"gnu-designator">; def GNUStringLiteralOperatorTemplate : DiagGroup<"gnu-string-literal-operator-template">; def UndefinedVarTemplate : DiagGroup<"undefined-var-template">; @@ -113,11 +120,13 @@ def DeleteNonVirtualDtor : DiagGroup<"delete-non-virtual-dtor", [DeleteNonAbstractNonVirtualDtor, DeleteAbstractNonVirtualDtor]>; def AbstractFinalClass : DiagGroup<"abstract-final-class">; +def FinalDtorNonFinalClass : DiagGroup<"final-dtor-non-final-class">; def CXX11CompatDeprecatedWritableStr : DiagGroup<"c++11-compat-deprecated-writable-strings">; def DeprecatedAttributes : DiagGroup<"deprecated-attributes">; +def DeprecatedCommaSubscript : DiagGroup<"deprecated-comma-subscript">; def DeprecatedDeclarations : DiagGroup<"deprecated-declarations">; def UnavailableDeclarations : DiagGroup<"unavailable-declarations">; def UnguardedAvailabilityNew : DiagGroup<"unguarded-availability-new">; @@ -131,18 +140,27 @@ def DeprecatedImplementations :DiagGroup<"deprecated-implementations">; def DeprecatedIncrementBool : DiagGroup<"deprecated-increment-bool">; def DeprecatedRegister : DiagGroup<"deprecated-register">; def DeprecatedThisCapture : DiagGroup<"deprecated-this-capture">; +def DeprecatedVolatile : DiagGroup<"deprecated-volatile">; def DeprecatedWritableStr : DiagGroup<"deprecated-writable-strings", [CXX11CompatDeprecatedWritableStr]>; // FIXME: Why is DeprecatedImplementations not in this group? def Deprecated : DiagGroup<"deprecated", [DeprecatedAttributes, + DeprecatedCommaSubscript, DeprecatedDeclarations, DeprecatedDynamicExceptionSpec, DeprecatedIncrementBool, DeprecatedRegister, DeprecatedThisCapture, + DeprecatedVolatile, DeprecatedWritableStr]>, DiagCategory<"Deprecations">; +def CXX2aDesignator : DiagGroup<"c++2a-designator">; +// Allow -Wno-c99-designator to be used to turn off all warnings on valid C99 +// designators (including the warning controlled by -Wc++2a-designator). +def C99Designator : DiagGroup<"c99-designator", [CXX2aDesignator]>; +def GNUDesignator : DiagGroup<"gnu-designator">; + def DynamicExceptionSpec : DiagGroup<"dynamic-exception-spec", [DeprecatedDynamicExceptionSpec]>; @@ -278,6 +296,7 @@ def ExitTimeDestructors : DiagGroup<"exit-time-destructors">; def FlexibleArrayExtensions : DiagGroup<"flexible-array-extensions">; def FourByteMultiChar : DiagGroup<"four-char-constants">; def GlobalConstructors : DiagGroup<"global-constructors">; +def BitwiseConditionalParentheses: DiagGroup<"bitwise-conditional-parentheses">; def BitwiseOpParentheses: DiagGroup<"bitwise-op-parentheses">; def LogicalOpParentheses: DiagGroup<"logical-op-parentheses">; def LogicalNotParentheses: DiagGroup<"logical-not-parentheses">; @@ -286,9 +305,11 @@ def OverloadedShiftOpParentheses: DiagGroup<"overloaded-shift-op-parentheses">; def DanglingElse: DiagGroup<"dangling-else">; def DanglingField : DiagGroup<"dangling-field">; def DanglingInitializerList : DiagGroup<"dangling-initializer-list">; +def DanglingGsl : DiagGroup<"dangling-gsl">; def ReturnStackAddress : DiagGroup<"return-stack-address">; def Dangling : DiagGroup<"dangling", [DanglingField, DanglingInitializerList, + DanglingGsl, ReturnStackAddress]>; def DistributedObjectModifiers : DiagGroup<"distributed-object-modifiers">; def ExpansionToDefined : DiagGroup<"expansion-to-defined">; @@ -481,6 +502,7 @@ def StringCompare : DiagGroup<"string-compare">; def StringPlusInt : DiagGroup<"string-plus-int">; def StringPlusChar : DiagGroup<"string-plus-char">; def StrncatSize : DiagGroup<"strncat-size">; +def IntInBoolContext : DiagGroup<"int-in-bool-context">; def TautologicalTypeLimitCompare : DiagGroup<"tautological-type-limit-compare">; def TautologicalUnsignedZeroCompare : DiagGroup<"tautological-unsigned-zero-compare">; def TautologicalUnsignedEnumZeroCompare : DiagGroup<"tautological-unsigned-enum-zero-compare">; @@ -495,12 +517,14 @@ def TautologicalConstantCompare : DiagGroup<"tautological-constant-compare", [TautologicalOutOfRangeCompare]>; def TautologicalPointerCompare : DiagGroup<"tautological-pointer-compare">; def TautologicalOverlapCompare : DiagGroup<"tautological-overlap-compare">; +def TautologicalBitwiseCompare : DiagGroup<"tautological-bitwise-compare">; def TautologicalUndefinedCompare : DiagGroup<"tautological-undefined-compare">; def TautologicalObjCBoolCompare : DiagGroup<"tautological-objc-bool-compare">; def TautologicalCompare : DiagGroup<"tautological-compare", [TautologicalConstantCompare, TautologicalPointerCompare, TautologicalOverlapCompare, + TautologicalBitwiseCompare, TautologicalUndefinedCompare, TautologicalObjCBoolCompare]>; def HeaderHygiene : DiagGroup<"header-hygiene">; @@ -509,6 +533,7 @@ def CompareDistinctPointerType : DiagGroup<"compare-distinct-pointer-types">; def GNUUnionCast : DiagGroup<"gnu-union-cast">; def GNUVariableSizedTypeNotAtEnd : DiagGroup<"gnu-variable-sized-type-not-at-end">; def Varargs : DiagGroup<"varargs">; +def XorUsedAsPow : DiagGroup<"xor-used-as-pow">; def Unsequenced : DiagGroup<"unsequenced">; // GCC name for -Wunsequenced @@ -543,6 +568,7 @@ def CoveredSwitchDefault : DiagGroup<"covered-switch-default">; def SwitchBool : DiagGroup<"switch-bool">; def SwitchEnum : DiagGroup<"switch-enum">; def Switch : DiagGroup<"switch">; +def EnumCompareConditional : DiagGroup<"enum-compare-conditional">; def EnumCompareSwitch : DiagGroup<"enum-compare-switch">; def EnumCompare : DiagGroup<"enum-compare", [EnumCompareSwitch]>; def ImplicitFallthroughPerFunction : @@ -620,7 +646,9 @@ def UnusedGetterReturnValue : DiagGroup<"unused-getter-return-value">; def UsedButMarkedUnused : DiagGroup<"used-but-marked-unused">; def UserDefinedLiterals : DiagGroup<"user-defined-literals">; def UserDefinedWarnings : DiagGroup<"user-defined-warnings">; -def Reorder : DiagGroup<"reorder">; +def ReorderCtor : DiagGroup<"reorder-ctor">; +def ReorderInitList : DiagGroup<"reorder-init-list">; +def Reorder : DiagGroup<"reorder", [ReorderCtor, ReorderInitList]>; def UndeclaredSelector : DiagGroup<"undeclared-selector">; def ImplicitAtomic : DiagGroup<"implicit-atomic-properties">; def CustomAtomic : DiagGroup<"custom-atomic-properties">; @@ -710,6 +738,7 @@ def ParenthesesOnEquality : DiagGroup<"parentheses-equality">; def Parentheses : DiagGroup<"parentheses", [LogicalOpParentheses, LogicalNotParentheses, + BitwiseConditionalParentheses, BitwiseOpParentheses, ShiftOpParentheses, OverloadedShiftOpParentheses, @@ -757,6 +786,7 @@ def FormatSecurity : DiagGroup<"format-security">; def FormatNonStandard : DiagGroup<"format-non-iso">; def FormatY2K : DiagGroup<"format-y2k">; def FormatPedantic : DiagGroup<"format-pedantic">; +def FormatTypeConfusion : DiagGroup<"format-type-confusion">; def Format : DiagGroup<"format", [FormatExtraArgs, FormatZeroLength, NonNull, FormatSecurity, FormatY2K, FormatInvalidSpecifier]>, @@ -800,6 +830,7 @@ def Most : DiagGroup<"most", [ Format, Implicit, InfiniteRecursion, + IntInBoolContext, MismatchedTags, MissingBraces, Move, @@ -890,7 +921,7 @@ def CXX17 : DiagGroup<"c++17-extensions">; // A warning group for warnings about using C++2a features as extensions in // earlier C++ versions. -def CXX2a : DiagGroup<"c++2a-extensions">; +def CXX2a : DiagGroup<"c++2a-extensions", [CXX2aDesignator]>; def : DiagGroup<"c++0x-extensions", [CXX11]>; def : DiagGroup<"c++1y-extensions", [CXX14]>; @@ -903,7 +934,7 @@ def DelegatingCtorCycles : def C11 : DiagGroup<"c11-extensions">; // A warning group for warnings about using C99 features as extensions. -def C99 : DiagGroup<"c99-extensions">; +def C99 : DiagGroup<"c99-extensions", [C99Designator]>; // A warning group for warnings about GCC extensions. def GNU : DiagGroup<"gnu", [GNUAlignofExpression, GNUAnonymousStruct, @@ -1001,6 +1032,12 @@ def ObjCLiteralComparison : DiagGroup<"objc-literal-compare", [ ObjCStringComparison ]>; +def ObjCSignedCharBool : DiagGroup<"objc-signed-char-bool", + [ObjCSignedCharBoolImplicitIntConversion, + ObjCSignedCharBoolImplicitFloatConversion, + ObjCBoolConstantConversion, + TautologicalObjCBoolCompare]>; + // Inline ASM warnings. def ASMOperandWidths : DiagGroup<"asm-operand-widths">; def ASMIgnoredQualifier : DiagGroup<"asm-ignored-qualifier">; @@ -1028,6 +1065,7 @@ def BackendOptimizationFailure : DiagGroup<"pass-failed">; def ProfileInstrMissing : DiagGroup<"profile-instr-missing">; def ProfileInstrOutOfDate : DiagGroup<"profile-instr-out-of-date">; def ProfileInstrUnprofiled : DiagGroup<"profile-instr-unprofiled">; +def MisExpect : DiagGroup<"misexpect">; // AddressSanitizer frontend instrumentation remarks. def SanitizeAddressRemarks : DiagGroup<"sanitize-address">; diff --git a/include/clang/Basic/DiagnosticOptions.def b/include/clang/Basic/DiagnosticOptions.def index baafd7ac723..6d1a1af9282 100644 --- a/include/clang/Basic/DiagnosticOptions.def +++ b/include/clang/Basic/DiagnosticOptions.def @@ -49,6 +49,7 @@ DIAGOPT(Pedantic, 1, 0) /// -pedantic DIAGOPT(PedanticErrors, 1, 0) /// -pedantic-errors DIAGOPT(ShowColumn, 1, 1) /// Show column number on diagnostics. DIAGOPT(ShowLocation, 1, 1) /// Show source location information. +DIAGOPT(ShowLevel, 1, 1) /// Show diagnostic level. DIAGOPT(AbsolutePath, 1, 0) /// Use absolute paths. DIAGOPT(ShowCarets, 1, 1) /// Show carets in diagnostics. DIAGOPT(ShowFixits, 1, 1) /// Show fixit information. diff --git a/include/clang/Basic/DiagnosticParseKinds.td b/include/clang/Basic/DiagnosticParseKinds.td index 8e6ced0dea5..7c9f4da778a 100644 --- a/include/clang/Basic/DiagnosticParseKinds.td +++ b/include/clang/Basic/DiagnosticParseKinds.td @@ -119,18 +119,16 @@ def warn_microsoft_qualifiers_ignored : Warning< "qualifiers after comma in declarator list are ignored">, InGroup; -def ext_c11_generic_selection : Extension< - "generic selections are a C11-specific feature">, InGroup; def err_duplicate_default_assoc : Error< "duplicate default generic association">; def note_previous_default_assoc : Note< "previous default generic association is here">; -def ext_c11_alignment : Extension< - "%0 is a C11-specific feature">, InGroup; +def ext_c99_feature : Extension< + "'%0' is a C99 extension">, InGroup; +def ext_c11_feature : Extension< + "'%0' is a C11 extension">, InGroup; -def ext_c11_noreturn : Extension< - "_Noreturn functions are a C11-specific feature">, InGroup; def err_c11_noreturn_misplaced : Error< "'_Noreturn' keyword must precede function declarator">; @@ -203,6 +201,7 @@ def err_invalid_token_after_declarator_suggest_equal : Error< "invalid %0 at end of declaration; did you mean '='?">; def err_expected_statement : Error<"expected statement">; def err_expected_lparen_after : Error<"expected '(' after '%0'">; +def err_expected_lbrace_after : Error<"expected '{' after '%0'">; def err_expected_rparen_after : Error<"expected ')' after '%0'">; def err_expected_punc : Error<"expected ')' or ',' after '%0'">; def err_expected_less_after : Error<"expected '<' after '%0'">; @@ -360,7 +359,8 @@ def err_typename_invalid_storageclass : Error< def err_typename_invalid_functionspec : Error< "type name does not allow function specifier to be specified">; def err_typename_invalid_constexpr : Error< - "type name does not allow %select{constexpr|consteval}0 specifier to be specified">; + "type name does not allow %sub{select_constexpr_spec_kind}0 specifier " + "to be specified">; def err_typename_identifiers_only : Error< "typename is allowed for identifiers only">; @@ -374,8 +374,6 @@ def err_unexpected_token_in_nested_name_spec : Error< "'%0' cannot be a part of nested name specifier; did you mean ':'?">; def err_bool_redeclaration : Error< "redeclaration of C++ built-in type 'bool'">; -def ext_c11_static_assert : Extension< - "_Static_assert is a C11-specific feature">, InGroup; def warn_cxx98_compat_static_assert : Warning< "static_assert declarations are incompatible with C++98">, InGroup, DefaultIgnore; @@ -436,8 +434,6 @@ def err_objc_property_requires_field_name : Error< "property requires fields to be named">; def err_objc_property_bitfield : Error<"property name cannot be a bit-field">; def err_objc_expected_property_attr : Error<"unknown property attribute %0">; -def err_objc_properties_require_objc2 : Error< - "properties are an Objective-C 2 feature">; def err_objc_unexpected_attr : Error< "prefix attribute must be followed by an interface, protocol, or implementation">; def err_objc_postfix_attribute : Error < @@ -976,11 +972,13 @@ def warn_pragma_missing_argument : Warning< def warn_pragma_invalid_argument : Warning< "unexpected argument '%0' to '#pragma %1'%select{|; expected %3}2">, InGroup; +def err_pragma_misplaced_in_decl : Error<"this pragma cannot appear in %0 declaration">; + // '#pragma clang section' related errors def err_pragma_expected_clang_section_name : Error< - "expected one of [bss|data|rodata|text] section kind in '#pragma %0'">; + "expected one of [bss|data|rodata|text|relro] section kind in '#pragma %0'">; def err_pragma_clang_section_expected_equal : Error< - "expected '=' following '#pragma clang section %select{invalid|bss|data|rodata|text}0'">; + "expected '=' following '#pragma clang section %select{invalid|bss|data|rodata|text|relro}0'">; def warn_pragma_expected_section_name : Warning< "expected a string literal for the section name in '#pragma %0' - ignored">, InGroup; @@ -1180,8 +1178,8 @@ def err_omp_expected_identifier_for_critical : Error< "expected identifier specifying the name of the 'omp critical' directive">; def err_omp_expected_reduction_identifier : Error< "expected identifier or one of the following operators: '+', '-', '*', '&', '|', '^', '&&', or '||'">; -def err_omp_decl_in_declare_simd : Error< - "function declaration is expected after 'declare simd' directive">; +def err_omp_decl_in_declare_simd_variant : Error< + "function declaration is expected after 'declare %select{simd|variant}0' directive">; def err_omp_unknown_map_type : Error< "incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'">; def err_omp_unknown_map_type_modifier : Error< @@ -1195,13 +1193,36 @@ def err_omp_declare_simd_inbranch_notinbranch : Error< def err_expected_end_declare_target : Error< "expected '#pragma omp end declare target'">; def err_omp_declare_target_unexpected_clause: Error< - "unexpected '%0' clause, only 'to' or 'link' clauses expected">; + "unexpected '%0' clause, only %select{'to' or 'link'|'to', 'link' or 'device_type'}1 clauses expected">; def err_omp_expected_clause: Error< "expected at least one clause on '#pragma omp %0' directive">; def err_omp_mapper_illegal_identifier : Error< "illegal OpenMP user-defined mapper identifier">; def err_omp_mapper_expected_declarator : Error< "expected declarator on 'omp declare mapper' directive">; +def err_omp_declare_variant_wrong_clause : Error< + "expected '%0' clause on 'omp declare variant' directive">; +def err_omp_declare_variant_no_ctx_selector : Error< + "expected context selector in '%0' clause on 'omp declare variant' directive">; +def err_omp_declare_variant_equal_expected : Error< + "expected '=' after '%0' context selector set name on 'omp declare variant' directive">; +def warn_omp_declare_variant_cs_name_expected : Warning< + "unknown context selector in '%0' context selector set of 'omp declare variant' directive, ignored">, + InGroup; +def err_omp_declare_variant_item_expected : Error< + "expected %0 in '%1' context selector of '%2' selector set of 'omp declare variant' directive">; +def err_omp_declare_variant_ctx_set_mutiple_use : Error< + "context selector set '%0' is used already in the same 'omp declare variant' directive">; +def note_omp_declare_variant_ctx_set_used_here : Note< + "previously context selector set '%0' used here">; +def err_omp_expected_comma_brace : Error<"expected '}' or ',' after '%0'">; +def err_omp_declare_variant_ctx_mutiple_use : Error< + "context trait selector '%0' is used already in the same '%1' context selector set of 'omp declare variant' directive">; +def note_omp_declare_variant_ctx_used_here : Note< + "previously context trait selector '%0' used here">; +def warn_omp_more_one_device_type_clause : Warning< + "more than one 'device_type' clause is specified">, + InGroup; // Pragma loop support. def err_pragma_loop_missing_argument : Error< @@ -1210,7 +1231,7 @@ def err_pragma_loop_missing_argument : Error< def err_pragma_loop_invalid_option : Error< "%select{invalid|missing}0 option%select{ %1|}0; expected vectorize, " "vectorize_width, interleave, interleave_count, unroll, unroll_count, " - "pipeline, pipeline_initiation_interval, or distribute">; + "pipeline, pipeline_initiation_interval, vectorize_predicate, or distribute">; def err_pragma_fp_invalid_option : Error< "%select{invalid|missing}0 option%select{ %1|}0; expected contract">; diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td index effcbad78b2..d802a92c42c 100644 --- a/include/clang/Basic/DiagnosticSemaKinds.td +++ b/include/clang/Basic/DiagnosticSemaKinds.td @@ -12,7 +12,6 @@ let Component = "Sema" in { let CategoryName = "Semantic Issue" in { - def note_previous_decl : Note<"%0 declared here">; def note_entity_declared_at : Note<"%0 declared here">; def note_callee_decl : Note<"%0 declared here">; @@ -155,7 +154,7 @@ def err_variably_modified_new_type : Error< // C99 Designated Initializers def ext_designated_init : Extension< - "designated initializers are a C99 feature">, InGroup; + "designated initializers are a C99 feature">, InGroup; def err_array_designator_negative : Error< "array designator value '%0' is negative">; def err_array_designator_empty_range : Error< @@ -174,15 +173,17 @@ def err_field_designator_nonfield : Error< def note_field_designator_found : Note<"field designator refers here">; def err_designator_for_scalar_init : Error< "designator in initializer for scalar type %0">; -def warn_subobject_initializer_overrides : Warning< - "subobject initialization overrides initialization of other fields " - "within its enclosing subobject">, InGroup; def warn_initializer_overrides : Warning< - "initializer overrides prior initialization of this subobject">, - InGroup; + "initializer %select{partially |}0overrides prior initialization of " + "this subobject">, InGroup; +def ext_initializer_overrides : ExtWarn, + InGroup, SFINAEFailure; +def err_initializer_overrides_destructed : Error< + "initializer would partially override prior initialization of object of " + "type %1 with non-trivial destruction">; def note_previous_initializer : Note< "previous initialization %select{|with side effects }0is here" - "%select{| (side effects may not occur at run time)}0">; + "%select{| (side effects will not occur at run time)}0">; def err_designator_into_flexible_array_member : Error< "designator into flexible array member subobject">; def note_flexible_array_member : Note< @@ -190,6 +191,28 @@ def note_flexible_array_member : Note< def ext_flexible_array_init : Extension< "flexible array initialization is a GNU extension">, InGroup; +// C++20 designated initializers +def ext_cxx_designated_init : Extension< + "designated initializers are a C++20 extension">, InGroup; +def warn_cxx17_compat_designated_init : Warning< + "designated initializers are incompatible with C++ standards before C++20">, + InGroup, DefaultIgnore; +def ext_designated_init_mixed : ExtWarn< + "mixture of designated and non-designated initializers in the same " + "initializer list is a C99 extension">, InGroup; +def note_designated_init_mixed : Note< + "first non-designated initializer is here">; +def ext_designated_init_array : ExtWarn< + "array designators are a C99 extension">, InGroup; +def ext_designated_init_nested : ExtWarn< + "nested designators are a C99 extension">, InGroup; +def ext_designated_init_reordered : ExtWarn< + "ISO C++ requires field designators to be specified in declaration order; " + "field %1 will be initialized after field %0">, InGroup, + SFINAEFailure; +def note_previous_field_init : Note< + "previous initialization for field %0 is here">; + // Declarations. def ext_plain_complex : ExtWarn< "plain '_Complex' requires a type specifier; assuming '_Complex double'">; @@ -598,6 +621,10 @@ def ext_implicit_lib_function_decl : ExtWarn< def note_include_header_or_declare : Note< "include the header <%0> or explicitly provide a declaration for '%1'">; def note_previous_builtin_declaration : Note<"%0 is a builtin with type %1">; +def warn_implicit_decl_no_jmp_buf + : Warning<"declaration of built-in function '%0' requires the declaration" + " of the 'jmp_buf' type, commonly provided in the header .">, + InGroup>; def warn_implicit_decl_requires_sysheader : Warning< "declaration of built-in function '%1' requires inclusion of the header <%0>">, InGroup; @@ -2212,6 +2239,11 @@ def err_class_marked_final_used_as_base : Error< "base %0 is marked '%select{final|sealed}1'">; def warn_abstract_final_class : Warning< "abstract class is marked '%select{final|sealed}0'">, InGroup; +def warn_final_dtor_non_final_class : Warning< + "class with destructor marked '%select{final|sealed}0' cannot be inherited from">, + InGroup; +def note_final_dtor_non_final_class_silence : Note< + "mark %0 as '%select{final|sealed}1' to silence this warning">; // C++11 attributes def err_repeat_attribute : Error<"%0 attribute cannot be repeated">; @@ -2326,18 +2358,24 @@ def warn_cxx14_compat_constexpr_not_const : Warning< "in C++14; add 'const' to avoid a change in behavior">, InGroup>; def err_invalid_constexpr : Error< - "%select{function parameter|typedef|non-static data member}0 " - "cannot be %select{constexpr|consteval}1">; + "%select{function parameter|typedef}0 " + "cannot be %sub{select_constexpr_spec_kind}1">; def err_invalid_constexpr_member : Error<"non-static data member cannot be " "constexpr%select{; did you intend to make it %select{const|static}0?|}1">; def err_constexpr_tag : Error< "%select{class|struct|interface|union|enum}0 " - "cannot be marked %select{constexpr|consteval}1">; + "cannot be marked %sub{select_constexpr_spec_kind}1">; def err_constexpr_dtor : Error< - "destructor cannot be marked %select{constexpr|consteval}0">; + "destructor cannot be declared %sub{select_constexpr_spec_kind}0">; +def err_constexpr_dtor_subobject : Error< + "destructor cannot be declared %sub{select_constexpr_spec_kind}0 because " + "%select{data member %2|base class %3}1 does not have a " + "constexpr destructor">; +def note_constexpr_dtor_subobject : Note< + "%select{data member %1|base class %2}0 declared here">; def err_constexpr_wrong_decl_kind : Error< - "%select{constexpr|consteval}0 can only be used " - "in %select{variable and |}0function declarations">; + "%sub{select_constexpr_spec_kind}0 can only be used " + "in %select{|variable and function|function|variable}0 declarations">; def err_invalid_constexpr_var_decl : Error< "constexpr variable declaration must be a definition">; def err_constexpr_static_mem_var_requires_init : Error< @@ -2346,6 +2384,8 @@ def err_constexpr_var_non_literal : Error< "constexpr variable cannot have non-literal type %0">; def err_constexpr_var_requires_const_init : Error< "constexpr variable %0 must be initialized by a constant expression">; +def err_constexpr_var_requires_const_destruction : Error< + "constexpr variable %0 must have constant destruction">; def err_constexpr_redecl_mismatch : Error< "%select{non-constexpr|constexpr|consteval}1 declaration of %0" " follows %select{non-constexpr|constexpr|consteval}2 declaration">; @@ -2406,9 +2446,13 @@ def err_constexpr_local_var_static : Error< def err_constexpr_local_var_non_literal_type : Error< "variable of non-literal type %1 cannot be defined in a constexpr " "%select{function|constructor}0">; -def err_constexpr_local_var_no_init : Error< - "variables defined in a constexpr %select{function|constructor}0 must be " - "initialized">; +def ext_constexpr_local_var_no_init : ExtWarn< + "uninitialized variable in a constexpr %select{function|constructor}0 " + "is a C++20 extension">, InGroup; +def warn_cxx17_compat_constexpr_local_var_no_init : Warning< + "uninitialized variable in a constexpr %select{function|constructor}0 " + "is incompatible with C++ standards before C++20">, + InGroup, DefaultIgnore; def ext_constexpr_function_never_constant_expr : ExtWarn< "constexpr %select{function|constructor}0 never produces a " "constant expression">, InGroup>, DefaultError; @@ -2433,10 +2477,8 @@ def warn_cxx11_compat_constexpr_body_multiple_return : Warning< InGroup, DefaultIgnore; def note_constexpr_body_previous_return : Note< "previous return statement is here">; -def err_constexpr_function_try_block : Error< - "function try block not allowed in constexpr %select{function|constructor}0">; -// c++2a function try blocks in constexpr +// C++2a function try blocks in constexpr def ext_constexpr_function_try_block_cxx2a : ExtWarn< "function try block in constexpr %select{function|constructor}0 is " "a C++2a extension">, InGroup; @@ -2445,10 +2487,20 @@ def warn_cxx17_compat_constexpr_function_try_block : Warning< "incompatible with C++ standards before C++2a">, InGroup, DefaultIgnore; -def err_constexpr_union_ctor_no_init : Error< - "constexpr union constructor does not initialize any member">; -def err_constexpr_ctor_missing_init : Error< - "constexpr constructor must initialize all members">; +def ext_constexpr_union_ctor_no_init : ExtWarn< + "constexpr union constructor that does not initialize any member " + "is a C++20 extension">, InGroup; +def warn_cxx17_compat_constexpr_union_ctor_no_init : Warning< + "constexpr union constructor that does not initialize any member " + "is incompatible with C++ standards before C++20">, + InGroup, DefaultIgnore; +def ext_constexpr_ctor_missing_init : ExtWarn< + "constexpr constructor that does not initialize all members " + "is a C++20 extension">, InGroup; +def warn_cxx17_compat_constexpr_ctor_missing_init : Warning< + "constexpr constructor that does not initialize all members " + "is incompatible with C++ standards before C++20">, + InGroup, DefaultIgnore; def note_constexpr_ctor_missing_init : Note< "member not initialized by constructor">; def note_non_literal_no_constexpr_ctors : Note< @@ -2463,6 +2515,8 @@ def note_non_literal_user_provided_dtor : Note< "%0 is not literal because it has a user-provided destructor">; def note_non_literal_nontrivial_dtor : Note< "%0 is not literal because it has a non-trivial destructor">; +def note_non_literal_non_constexpr_dtor : Note< + "%0 is not literal because its destructor is not constexpr">; def note_non_literal_lambda : Note< "lambda closure types are non-literal types before C++17">; def warn_private_extern : Warning< @@ -2472,8 +2526,6 @@ def note_private_extern : Note< "use __attribute__((visibility(\"hidden\"))) attribute instead">; // C++ Concepts -def err_concept_initialized_with_non_bool_type : Error< - "constraint expression must be of type 'bool' but is of type %0">; def err_concept_decls_may_only_appear_in_global_namespace_scope : Error< "concept declarations may only appear in global or namespace scope">; def err_concept_no_parameters : Error< @@ -2485,9 +2537,14 @@ def err_concept_no_associated_constraints : Error< "concept cannot have associated constraints">; def err_concept_not_implemented : Error< "sorry, unimplemented concepts feature %0 used">; +def err_non_constant_constraint_expression : Error< + "substitution into constraint expression resulted in a non-constant " + "expression">; +def err_non_bool_atomic_constraint : Error< + "atomic constraint must be of type 'bool' (found %0)">; -def err_template_different_associated_constraints : Error< - "associated constraints differ in template redeclaration">; +def err_template_different_requires_clause : Error< + "requires clause differs in template redeclaration">; // C++11 char16_t/char32_t def warn_cxx98_compat_unicode_type : Warning< @@ -2519,6 +2576,9 @@ def err_nsobject_attribute : Error< "'NSObject' attribute is for pointer types only">; def err_attributes_are_not_compatible : Error< "%0 and %1 attributes are not compatible">; +def err_attribute_invalid_argument : Error< + "%select{'void'|a reference type|an array type|a non-vector or " + "non-vectorizable scalar type}0 is an invalid argument to attribute %1">; def err_attribute_wrong_number_arguments : Error< "%0 attribute %plural{0:takes no arguments|1:takes one argument|" ":requires exactly %1 arguments}1">; @@ -2567,8 +2627,6 @@ def err_attribute_argument_out_of_range : Error< def err_init_priority_object_attr : Error< "can only use 'init_priority' attribute on file-scope definitions " "of objects of class type">; -def err_attribute_argument_vec_type_hint : Error< - "invalid attribute argument %0 - expecting a vector or vectorizable scalar type">; def err_attribute_argument_out_of_bounds : Error< "%0 attribute parameter %1 is out of bounds">; def err_attribute_only_once_per_parameter : Error< @@ -2778,6 +2836,11 @@ def err_no_accessor_for_property : Error< def err_cannot_find_suitable_accessor : Error< "cannot find suitable %select{getter|setter}0 for property %1">; +def warn_alloca : Warning< + "use of function %0 is discouraged; there is no way to check for failure but " + "failure may still occur, resulting in a possibly exploitable security vulnerability">, + InGroup>, DefaultIgnore; + def warn_alloca_align_alignof : Warning< "second argument to __builtin_alloca_with_align is supposed to be in bits">, InGroup>; @@ -2793,6 +2856,10 @@ def err_alignment_dependent_typedef_name : Error< def err_attribute_aligned_too_great : Error< "requested alignment must be %0 bytes or smaller">; +def warn_assume_aligned_too_great + : Warning<"requested alignment must be %0 bytes or smaller; maximum " + "alignment assumed">, + InGroup>; def warn_redeclaration_without_attribute_prev_attribute_ignored : Warning< "%q0 redeclared without %1 attribute: previous %1 ignored">, InGroup; @@ -2950,6 +3017,10 @@ def warn_gnu_inline_attribute_requires_inline : Warning< "'gnu_inline' attribute requires function to be marked 'inline'," " attribute ignored">, InGroup; +def warn_gnu_inline_cplusplus_without_extern : Warning< + "'gnu_inline' attribute without 'extern' in C++ treated as externally" + " available, this changed in Clang 10">, + InGroup>; def err_attribute_vecreturn_only_vector_member : Error< "the vecreturn attribute can only be used on a class or structure with one member, which must be a vector">; def err_attribute_vecreturn_only_pod_record : Error< @@ -2966,6 +3037,7 @@ def warn_cconv_unsupported : Warning< "|on builtin function" "}1">, InGroup; +def error_cconv_unsupported : Error; def err_cconv_knr : Error< "function with no prototype cannot use the %0 calling convention">; def warn_cconv_knr : Warning< @@ -3251,9 +3323,9 @@ def warn_impcast_integer_precision_constant : Warning< def warn_impcast_bitfield_precision_constant : Warning< "implicit truncation from %2 to bit-field changes value from %0 to %1">, InGroup; -def warn_impcast_constant_int_to_objc_bool : Warning< - "implicit conversion from constant value %0 to BOOL; " - "the only well defined values for BOOL are YES and NO">, +def warn_impcast_constant_value_to_objc_bool : Warning< + "implicit conversion from constant value %0 to 'BOOL'; " + "the only well defined values for 'BOOL' are YES and NO">, InGroup; def warn_impcast_fixed_point_range : Warning< @@ -3269,6 +3341,20 @@ def warn_impcast_literal_float_to_integer_out_of_range : Warning< def warn_impcast_float_integer : Warning< "implicit conversion turns floating-point number into integer: %0 to %1">, InGroup, DefaultIgnore; +def warn_impcast_float_to_objc_signed_char_bool : Warning< + "implicit conversion from floating-point type %0 to 'BOOL'">, + InGroup; +def warn_impcast_int_to_objc_signed_char_bool : Warning< + "implicit conversion from integral type %0 to 'BOOL'">, + InGroup, DefaultIgnore; + +// Implicit int -> float conversion precision loss warnings. +def warn_impcast_integer_float_precision : Warning< + "implicit conversion from %0 to %1 may lose precision">, + InGroup, DefaultIgnore; +def warn_impcast_integer_float_precision_constant : Warning< + "implicit conversion from %2 to %3 changes value from %0 to %1">, + InGroup; def warn_impcast_float_to_integer : Warning< "implicit conversion from %0 to %1 changes value from %2 to %3">, @@ -3292,6 +3378,10 @@ def warn_impcast_bool_to_null_pointer : Warning< def warn_non_literal_null_pointer : Warning< "expression which evaluates to zero treated as a null pointer constant of " "type %0">, InGroup; +def warn_pointer_compare : Warning< + "comparing a pointer to a null character constant; did you mean " + "to compare to %select{NULL|(void *)0}0?">, + InGroup>; def warn_impcast_null_pointer_to_integer : Warning< "implicit conversion of %select{NULL|nullptr}0 constant to %1">, InGroup; @@ -3318,6 +3408,18 @@ def warn_address_of_reference_bool_conversion : Warning< "code; pointer may be assumed to always convert to true">, InGroup; +def warn_xor_used_as_pow : Warning< + "result of '%0' is %1; did you mean exponentiation?">, + InGroup; +def warn_xor_used_as_pow_base_extra : Warning< + "result of '%0' is %1; did you mean '%2' (%3)?">, + InGroup; +def warn_xor_used_as_pow_base : Warning< + "result of '%0' is %1; did you mean '%2'?">, + InGroup; +def note_xor_used_as_pow_silence : Note< + "replace expression with '%0' %select{|or use 'xor' instead of '^' }1to silence this warning">; + def warn_null_pointer_compare : Warning< "comparison of %select{address of|function|array}0 '%1' %select{not |}2" "equal to a null pointer is always %select{true|false}2">, @@ -3338,9 +3440,15 @@ def warn_address_of_reference_null_compare : Warning< InGroup; def note_reference_is_return_value : Note<"%0 returns a reference">; +def note_pointer_declared_here : Note< + "pointer %0 declared here">; def warn_division_sizeof_ptr : Warning< "'%0' will return the size of the pointer, not the array itself">, InGroup>; +def warn_division_sizeof_array : Warning< + "expression does not compute the number of elements in this array; element " + "type is %0, not %1">, + InGroup>; def note_function_warning_silence : Note< "prefix with the address-of operator to silence this warning">; @@ -3661,7 +3769,8 @@ def note_ovl_too_many_candidates : Note< "pass -fshow-overloads=all to show them">; def select_ovl_candidate_kind : TextSubstitution< - "%select{function|function|constructor|" + "%select{function|function|function (with reversed parameter order)|" + "constructor|" "constructor (the implicit default constructor)|" "constructor (the implicit copy constructor)|" "constructor (the implicit move constructor)|" @@ -3855,10 +3964,7 @@ def note_implicit_member_target_infer_collision : Note< def note_ambiguous_type_conversion: Note< "because of ambiguity in conversion %diff{of $ to $|between types}0,1">; -def note_ovl_builtin_binary_candidate : Note< - "built-in candidate %0">; -def note_ovl_builtin_unary_candidate : Note< - "built-in candidate %0">; +def note_ovl_builtin_candidate : Note<"built-in candidate %0">; def err_ovl_no_viable_function_in_init : Error< "no matching constructor for initialization of %0">; def err_ovl_no_conversion_in_cast : Error< @@ -3885,6 +3991,13 @@ def err_ovl_ambiguous_oper_unary : Error< "use of overloaded operator '%0' is ambiguous (operand type %1)">; def err_ovl_ambiguous_oper_binary : Error< "use of overloaded operator '%0' is ambiguous (with operand types %1 and %2)">; +def ext_ovl_ambiguous_oper_binary_reversed : ExtWarn< + "ISO C++20 considers use of overloaded operator '%0' (with operand types %1 " + "and %2) to be ambiguous despite there being a unique best viable function">, + InGroup>, SFINAEFailure; +def note_ovl_ambiguous_oper_binary_reversed_candidate : Note< + "ambiguity is between a regular call to this operator and a call with the " + "argument order reversed">; def err_ovl_no_viable_oper : Error<"no viable overloaded '%0'">; def note_assign_lhs_incomplete : Note<"type %0 is incomplete">; def err_ovl_deleted_oper : Error< @@ -3892,6 +4005,9 @@ def err_ovl_deleted_oper : Error< def err_ovl_deleted_special_oper : Error< "object of type %0 cannot be %select{constructed|copied|moved|assigned|" "assigned|destroyed}1 because its %sub{select_special_member_kind}1 is implicitly deleted">; +def err_ovl_rewrite_equalequal_not_bool : Error< + "return type %0 of selected 'operator==' function for rewritten " + "'%1' comparison is not 'bool'">; def err_ovl_no_viable_subscript : Error<"no viable overloaded operator[] for type %0">; def err_ovl_no_oper : @@ -3934,6 +4050,8 @@ def err_ovl_no_viable_literal_operator : Error< // C++ Template Declarations def err_template_param_shadow : Error< "declaration of %0 shadows template parameter">; +def ext_template_param_shadow : ExtWarn< + err_template_param_shadow.Text>, InGroup; def note_template_param_here : Note<"template parameter is declared here">; def warn_template_export_unsupported : Warning< "exported templates are unsupported">; @@ -4392,6 +4510,10 @@ def note_prior_template_arg_substitution : Note< " template parameter%1 %2">; def note_template_default_arg_checking : Note< "while checking a default template argument used here">; +def note_concept_specialization_here : Note< + "while checking the satisfaction of concept '%0' requested here">; +def note_constraint_substitution_here : Note< + "while substituting template arguments into constraint expression here">; def note_instantiation_contexts_suppressed : Note< "(skipping %0 context%s0 in backtrace; use -ftemplate-backtrace-limit=0 to " "see all)">; @@ -5334,9 +5456,6 @@ def err_arc_mismatched_cast : Error< " to %3 is disallowed with ARC">; def err_arc_nolifetime_behavior : Error< "explicit ownership qualifier on cast result has no effect">; -def err_arc_objc_object_in_tag : Error< - "ARC forbids %select{Objective-C objects|blocks}0 in " - "%select{struct|interface|union|<>|enum}1">; def err_arc_objc_property_default_assign_on_object : Error< "ARC forbids synthesizing a property of an Objective-C object " "with unspecified ownership or storage attribute">; @@ -5626,9 +5745,18 @@ def note_precedence_silence : Note< def warn_precedence_conditional : Warning< "operator '?:' has lower precedence than '%0'; '%0' will be evaluated first">, InGroup; +def warn_precedence_bitwise_conditional : Warning< + "operator '?:' has lower precedence than '%0'; '%0' will be evaluated first">, + InGroup; def note_precedence_conditional_first : Note< "place parentheses around the '?:' expression to evaluate it first">; +def warn_enum_constant_in_bool_context : Warning< + "converting the enum constant to a boolean">, + InGroup, DefaultIgnore; +def warn_left_shift_in_bool_context : Warning< + "converting the result of '<<' to a boolean; did you mean '(%0) != 0'?">, + InGroup, DefaultIgnore; def warn_logical_instead_of_bitwise : Warning< "use of logical '%0' with constant operand">, InGroup>; @@ -5638,10 +5766,10 @@ def note_logical_instead_of_bitwise_remove_constant : Note< "remove constant to silence this warning">; def warn_bitwise_op_in_bitwise_op : Warning< - "'%0' within '%1'">, InGroup; + "'%0' within '%1'">, InGroup, DefaultIgnore; def warn_logical_and_in_logical_or : Warning< - "'&&' within '||'">, InGroup; + "'&&' within '||'">, InGroup, DefaultIgnore; def warn_overloaded_shift_in_comparison :Warning< "overloaded operator %select{>>|<<}0 has higher precedence than " @@ -5718,6 +5846,9 @@ def err_arithmetic_nonfragile_interface : Error< "arithmetic on pointer to interface %0, which is not a constant size for " "this architecture and platform">; +def warn_deprecated_comma_subscript : Warning< + "top-level comma expression in array subscript is deprecated">, + InGroup; def ext_subscript_non_lvalue : Extension< "ISO C90 does not allow subscripting non-lvalue array">; @@ -6038,8 +6169,8 @@ def warn_tautological_constant_compare : Warning< "%select{%1|%3}0 is always %4">, InGroup, DefaultIgnore; def warn_tautological_compare_objc_bool : Warning< - "result of comparison of constant %0 with expression of type BOOL" - " is always %1, as the only well defined values for BOOL are YES and NO">, + "result of comparison of constant %0 with expression of type 'BOOL'" + " is always %1, as the only well defined values for 'BOOL' are YES and NO">, InGroup; def warn_mixed_sign_comparison : Warning< @@ -6051,10 +6182,22 @@ def warn_out_of_range_compare : Warning< InGroup; def warn_tautological_bool_compare : Warning, InGroup; +def warn_integer_constants_in_conditional_always_true : Warning< + "converting the result of '?:' with integer constants to a boolean always " + "evaluates to 'true'">, + InGroup; +def warn_left_shift_always : Warning< + "converting the result of '<<' to a boolean always evaluates " + "to %select{false|true}0">, + InGroup; def warn_comparison_of_mixed_enum_types : Warning< "comparison of two values with different enumeration types" "%diff{ ($ and $)|}0,1">, InGroup; +def warn_conditional_mixed_enum_types : Warning< + "enumeration type mismatch in conditional expression" + "%diff{ ($ and $)|}0,1">, + InGroup, DefaultIgnore; def warn_comparison_of_mixed_enum_types_switch : Warning< "comparison of two values with different enumeration types in switch statement" "%diff{ ($ and $)|}0,1">, @@ -6093,6 +6236,8 @@ def err_invalid_qualified_function_type : Error< def err_compound_qualified_function_type : Error< "%select{block pointer|pointer|reference}0 to function type %select{%2 |}1" "cannot have '%3' qualifier">; +def err_qualified_function_typeid : Error< + "type operand %0 of 'typeid' cannot have '%1' qualifier">; def err_ref_qualifier_overload : Error< "cannot overload a member function %select{without a ref-qualifier|with " @@ -6517,6 +6662,10 @@ def note_member_declared_here : Note< "member %0 declared here">; def note_member_first_declared_here : Note< "member %0 first declared here">; +def warn_bitwise_negation_bool : Warning< + "bitwise negation of a boolean expression%select{;| always evaluates to 'true';}0 " + "did you mean logical negation?">, + InGroup>; def err_decrement_bool : Error<"cannot decrement expression of type bool">; def warn_increment_bool : Warning< "incrementing expression of type bool is deprecated and " @@ -6526,6 +6675,26 @@ def ext_increment_bool : ExtWarn< DefaultError, InGroup; def err_increment_decrement_enum : Error< "cannot %select{decrement|increment}0 expression of enum type %1">; + +def warn_deprecated_increment_decrement_volatile : Warning< + "%select{decrement|increment}0 of object of volatile-qualified type %1 " + "is deprecated">, InGroup; +def warn_deprecated_simple_assign_volatile : Warning< + "use of result of assignment to object of volatile-qualified type %0 " + "is deprecated">, InGroup; +def warn_deprecated_compound_assign_volatile : Warning< + "compound assignment to object of volatile-qualified type %0 is deprecated">, + InGroup; +def warn_deprecated_volatile_return : Warning< + "volatile-qualified return type %0 is deprecated">, + InGroup; +def warn_deprecated_volatile_param : Warning< + "volatile-qualified parameter type %0 is deprecated">, + InGroup; +def warn_deprecated_volatile_structured_binding : Warning< + "volatile qualifier in structured binding declaration is deprecated">, + InGroup; + def err_catch_incomplete_ptr : Error< "cannot catch pointer to incomplete type %0">; def err_catch_incomplete_ref : Error< @@ -7426,6 +7595,12 @@ def warn_unused_container_subscript_expr : Warning< def warn_unused_call : Warning< "ignoring return value of function declared with %0 attribute">, InGroup; +def warn_unused_constructor : Warning< + "ignoring temporary created by a constructor declared with %0 attribute">, + InGroup; +def warn_unused_constructor_msg : Warning< + "ignoring temporary created by a constructor declared with %0 attribute: %1">, + InGroup; def warn_side_effects_unevaluated_context : Warning< "expression with side effects has no effect in an unevaluated context">, InGroup; @@ -7435,6 +7610,9 @@ def warn_side_effects_typeid : Warning< def warn_unused_result : Warning< "ignoring return value of function declared with %0 attribute">, InGroup; +def warn_unused_result_msg : Warning< + "ignoring return value of function declared with %0 attribute: %1">, + InGroup; def warn_unused_volatile : Warning< "expression result unused; assign into a variable to force a volatile load">, InGroup>; @@ -7443,6 +7621,8 @@ def ext_cxx14_attr : Extension< "use of the %0 attribute is a C++14 extension">, InGroup; def ext_cxx17_attr : Extension< "use of the %0 attribute is a C++17 extension">, InGroup; +def ext_cxx2a_attr : Extension< + "use of the %0 attribute is a C++2a extension">, InGroup; def warn_unused_comparison : Warning< "%select{equality|inequality|relational|three-way}0 comparison result unused">, @@ -7453,10 +7633,30 @@ def note_inequality_comparison_to_or_assign : Note< def err_incomplete_type_used_in_type_trait_expr : Error< "incomplete type %0 used in type trait expression">; +// C++20 constinit and require_constant_initialization attribute +def warn_cxx20_compat_constinit : Warning< + "'constinit' specifier is incompatible with C++ standards before C++20">, + InGroup, DefaultIgnore; +def err_constinit_local_variable : Error< + "local variable cannot be declared 'constinit'">; def err_require_constant_init_failed : Error< "variable does not have a constant initializer">; def note_declared_required_constant_init_here : Note< - "required by 'require_constant_initialization' attribute here">; + "required by %select{'require_constant_initialization' attribute|" + "'constinit' specifier}0 here">; +def ext_constinit_missing : ExtWarn< + "'constinit' specifier missing on initializing declaration of %0">, + InGroup>; +def note_constinit_specified_here : Note<"variable declared constinit here">; +def err_constinit_added_too_late : Error< + "'constinit' specifier added after initialization of variable">; +def warn_require_const_init_added_too_late : Warning< + "'require_constant_initialization' attribute added after initialization " + "of variable">, InGroup; +def note_constinit_missing_here : Note< + "add the " + "%select{'require_constant_initialization' attribute|'constinit' specifier}0 " + "to the initializing declaration here">; def err_dimension_expr_not_constant_integer : Error< "dimension expression does not evaluate to a constant unsigned int">; @@ -7493,8 +7693,6 @@ let CategoryName = "Inline Assembly Issue" in { "invalid lvalue in asm input for constraint '%0'">; def err_asm_invalid_input_constraint : Error< "invalid input constraint '%0' in asm">; - def err_asm_immediate_expected : Error<"constraint '%0' expects " - "an integer constant expression">; def err_asm_tying_incompatible_types : Error< "unsupported inline asm: input with type " "%diff{$ matching output with type $|}0,1">; @@ -7583,7 +7781,7 @@ def err_mem_init_not_member_or_class : Error< def warn_initializer_out_of_order : Warning< "%select{field|base class}0 %1 will be initialized after " "%select{field|base}2 %3">, - InGroup, DefaultIgnore; + InGroup, DefaultIgnore; def warn_abstract_vbase_init_ignored : Warning< "initializer for virtual base class %0 of abstract class %1 " "will never be used">, @@ -7919,7 +8117,7 @@ def warn_array_index_precedes_bounds : Warning< def warn_array_index_exceeds_bounds : Warning< "array index %0 is past the end of the array (which contains %1 " "element%s2)">, InGroup; -def note_array_index_out_of_bounds : Note< +def note_array_declared_here : Note< "array %0 declared here">; def warn_printf_insufficient_data_args : Warning< @@ -7940,16 +8138,17 @@ def warn_format_conversion_argument_type_mismatch : Warning< "%select{type|underlying type}2 %1">, InGroup; def warn_format_conversion_argument_type_mismatch_pedantic : Extension< - "format specifies type %0 but the argument has " - "%select{type|underlying type}2 %1">, + warn_format_conversion_argument_type_mismatch.Text>, InGroup; +def warn_format_conversion_argument_type_mismatch_confusion : Warning< + warn_format_conversion_argument_type_mismatch.Text>, + InGroup, DefaultIgnore; def warn_format_argument_needs_cast : Warning< "%select{values of type|enum values with underlying type}2 '%0' should not " "be used as format arguments; add an explicit cast to %1 instead">, InGroup; def warn_format_argument_needs_cast_pedantic : Warning< - "%select{values of type|enum values with underlying type}2 '%0' should not " - "be used as format arguments; add an explicit cast to %1 instead">, + warn_format_argument_needs_cast.Text>, InGroup, DefaultIgnore; def warn_printf_positional_arg_exceeds_data_args : Warning < "data argument position '%0' exceeds the number of data arguments (%1)">, @@ -8023,6 +8222,9 @@ def warn_printf_invalid_objc_flag: Warning< def warn_scanf_scanlist_incomplete : Warning< "no closing ']' for '%%[' in scanf format string">, InGroup; +def warn_format_bool_as_character : Warning< + "using '%0' format specifier, but argument has boolean value">, + InGroup; def note_format_string_defined : Note<"format string is defined here">; def note_format_fix_specifier : Note<"did you mean to use '%0'?">; def note_printf_c_str: Note<"did you mean to call the %0 method?">; @@ -8085,6 +8287,10 @@ def warn_dangling_member : Warning< "%select{binds to|is}2 a temporary object " "whose lifetime is shorter than the lifetime of the constructed object">, InGroup; +def warn_dangling_lifetime_pointer_member : Warning< + "initializing pointer member %0 to point to a temporary object " + "whose lifetime is shorter than the lifetime of the constructed object">, + InGroup; def note_lifetime_extending_member_declared_here : Note< "%select{%select{reference|'std::initializer_list'}0 member|" "member with %select{reference|'std::initializer_list'}0 subobject}1 " @@ -8103,6 +8309,10 @@ def warn_new_dangling_reference : Warning< "temporary bound to reference member of allocated object " "will be destroyed at the end of the full-expression">, InGroup; +def warn_dangling_lifetime_pointer : Warning< + "object backing the pointer " + "will be destroyed at the end of the full-expression">, + InGroup; def warn_new_dangling_initializer_list : Warning< "array backing " "%select{initializer list subobject of the allocated object|" @@ -8120,11 +8330,15 @@ def warn_unsupported_lifetime_extension : Warning< // should result in a warning, since these always evaluate to a constant. // Array comparisons have similar warnings def warn_comparison_always : Warning< - "%select{self-|array }0comparison always evaluates to %select{a constant|%2}1">, + "%select{self-|array }0comparison always evaluates to " + "%select{a constant|true|false|'std::strong_ordering::equal'}1">, InGroup; def warn_comparison_bitwise_always : Warning< "bitwise comparison always evaluates to %select{false|true}0">, - InGroup; + InGroup, DefaultIgnore; +def warn_comparison_bitwise_or : Warning< + "bitwise or with non-zero value always evaluates to true">, + InGroup, DefaultIgnore; def warn_tautological_overlap_comparison : Warning< "overlapping comparisons always evaluate to %select{false|true}0">, InGroup, DefaultIgnore; @@ -8503,10 +8717,11 @@ def warn_sync_fetch_and_nand_semantics_change : Warning< InGroup>; // Type -def ext_invalid_sign_spec : Extension<"'%0' cannot be signed or unsigned">; +def ext_wchar_t_sign_spec : ExtWarn<"'%0' cannot be signed or unsigned">, + InGroup>, DefaultError; def warn_receiver_forward_class : Warning< - "receiver %0 is a forward class and corresponding @interface may not exist">, - InGroup; + "receiver %0 is a forward class and corresponding @interface may not exist">, + InGroup; def note_method_sent_forward_class : Note<"method %0 is used for the forward class">; def ext_missing_declspec : ExtWarn< "declaration specifier missing, defaulting to 'int'">; @@ -8984,7 +9199,7 @@ def ext_omp_loop_not_canonical_init : ExtWarn< "('var = init' or 'T var = init')">, InGroup; def err_omp_loop_not_canonical_cond : Error< "condition of OpenMP for loop must be a relational comparison " - "('<', '<=', '>', or '>=') of loop variable %0">; + "('<', '<=', '>', %select{or '>='|'>=', or '!='}0) of loop variable %1">; def err_omp_loop_not_canonical_incr : Error< "increment clause of OpenMP for loop must perform simple addition " "or subtraction on loop variable %0">; @@ -9101,10 +9316,10 @@ def err_omp_single_copyprivate_with_nowait : Error< "the 'copyprivate' clause must not be used with the 'nowait' clause">; def note_omp_nowait_clause_here : Note< "'nowait' clause is here">; -def err_omp_single_decl_in_declare_simd : Error< - "single declaration is expected after 'declare simd' directive">; +def err_omp_single_decl_in_declare_simd_variant : Error< + "single declaration is expected after 'declare %select{simd|variant}0' directive">; def err_omp_function_expected : Error< - "'#pragma omp declare simd' can only be applied to functions">; + "'#pragma omp declare %select{simd|variant}0' can only be applied to functions">; def err_omp_wrong_cancel_region : Error< "one of 'for', 'parallel', 'sections' or 'taskgroup' is expected">; def err_omp_parent_cancel_region_nowait : Error< @@ -9286,6 +9501,44 @@ def err_omp_wrong_dependency_iterator_type : Error< "expected an integer or a pointer type of the outer loop counter '%0' for non-rectangular nests">; def err_omp_unsupported_type : Error < "host requires %0 bit size %1 type support, but device '%2' does not support it">; +def err_omp_lambda_capture_in_declare_target_not_to : Error< + "variable captured in declare target region must appear in a to clause">; +def err_omp_device_type_mismatch : Error< + "'device_type(%0)' does not match previously specified 'device_type(%1)' for the same declaration">; +def err_omp_wrong_device_function_call : Error< + "function with 'device_type(%0)' is not available on %select{device|host}1">; +def note_omp_marked_device_type_here : Note<"marked as 'device_type(%0)' here">; +def warn_omp_declare_target_after_first_use : Warning< + "declaration marked as declare target after first use, it may lead to incorrect results">, + InGroup; +def err_omp_declare_variant_incompat_attributes : Error< + "'#pragma omp declare variant' is not compatible with any target-specific attributes">; +def warn_omp_declare_variant_after_used : Warning< + "'#pragma omp declare variant' cannot be applied for function after first " + "usage; the original function might be used">, InGroup; +def warn_omp_declare_variant_after_emitted : Warning< + "'#pragma omp declare variant' cannot be applied to the function that was defined already;" + " the original function might be used">, InGroup; +def err_omp_declare_variant_noproto : Error< + "function with '#pragma omp declare variant' must have a prototype">; +def note_omp_declare_variant_specified_here : Note< + "'#pragma omp declare variant' for function specified here">; +def err_omp_declare_variant_doesnt_support : Error< + "'#pragma omp declare variant' does not " + "support %select{function templates|virtual functions|" + "deduced return types|constructors|destructors|deleted functions|" + "defaulted functions|constexpr functions|consteval function}0">; +def err_omp_declare_variant_diff : Error< + "function with '#pragma omp declare variant' has a different %select{calling convention" + "|return type|constexpr specification|inline specification|storage class|" + "linkage}0">; +def err_omp_declare_variant_incompat_types : Error< + "variant in '#pragma omp declare variant' with type %0 is incompatible with type %1" + >; +def warn_omp_declare_variant_marked_as_declare_variant : Warning< + "variant function in '#pragma omp declare variant' is itself marked as '#pragma omp declare variant'" + >, InGroup; +def note_omp_marked_declare_variant_here : Note<"marked as 'declare variant' here">; } // end of OpenMP category let CategoryName = "Related Result Type Issue" in { @@ -9725,6 +9978,8 @@ def err_std_compare_type_not_supported : Error< "member '%2' is missing|" "the type is not trivially copyable|" "the type does not have the expected form}1">; +def note_rewriting_operator_as_spaceship : Note< + "while rewriting comparison as call to 'operator<=>' declared here">; // Memory Tagging Extensions (MTE) diagnostics def err_memtag_arg_null_or_pointer : Error< @@ -9735,8 +9990,6 @@ def err_memtag_arg_must_be_pointer : Error< "%0 argument of MTE builtin function must be a pointer (%1 invalid)">; def err_memtag_arg_must_be_integer : Error< "%0 argument of MTE builtin function must be an integer type (%1 invalid)">; -def err_memtag_arg_must_be_unsigned : Error< - "%0 argument of MTE builtin function must be an unsigned integer type (%1 invalid)">; def warn_dereference_of_noderef_type : Warning< "dereferencing %0; was declared with a 'noderef' type">, InGroup; @@ -9751,6 +10004,11 @@ def err_builtin_launder_invalid_arg : Error< "%select{non-pointer|function pointer|void pointer}0 argument to " "'__builtin_launder' is not allowed">; +def err_preserve_field_info_not_field : Error< + "__builtin_preserve_field_info argument %0 not a field access">; +def err_preserve_field_info_not_const: Error< + "__builtin_preserve_field_info argument %0 not a constant">; + def err_bit_cast_non_trivially_copyable : Error< "__builtin_bit_cast %select{source|destination}0 type must be trivially copyable">; def err_bit_cast_type_size_mismatch : Error< diff --git a/include/clang/Basic/DiagnosticSerializationKinds.td b/include/clang/Basic/DiagnosticSerializationKinds.td index 43ba19b5853..757dbbeee3c 100644 --- a/include/clang/Basic/DiagnosticSerializationKinds.td +++ b/include/clang/Basic/DiagnosticSerializationKinds.td @@ -18,13 +18,16 @@ def err_fe_pch_malformed : Error< def err_fe_pch_malformed_block : Error< "malformed block record in PCH file: '%0'">, DefaultFatal; def err_fe_pch_file_modified : Error< - "file '%0' has been modified since the precompiled header '%1' was built">, + "file '%0' has been modified since the precompiled header '%1' was built" + ": %select{size|mtime|content}2 changed">, DefaultFatal; def err_fe_module_file_modified : Error< - "file '%0' has been modified since the module file '%1' was built">, + "file '%0' has been modified since the module file '%1' was built" + ": %select{size|mtime|content}2 changed">, DefaultFatal; def err_fe_ast_file_modified : Error< - "file '%0' has been modified since the AST file '%1' was built">, + "file '%0' has been modified since the AST file '%1' was built" + ": %select{size|mtime|content}2 changed">, DefaultFatal; def err_fe_pch_file_overridden : Error< "file '%0' from the precompiled header has been overridden">; @@ -77,13 +80,13 @@ def remark_module_import : Remark< InGroup; def err_imported_module_not_found : Error< - "module '%0' in AST file '%1' (imported by AST file '%2') " + "module '%0' in AST file '%1' %select{(imported by AST file '%2') |}4" "is not defined in any loaded module map file; " "maybe you need to load '%3'?">, DefaultFatal; def note_imported_by_pch_module_not_found : Note< "consider adding '%0' to the header search path">; def err_imported_module_modmap_changed : Error< - "module '%0' imported by AST file '%1' found in a different module map file" + "module '%0' %select{in|imported by}4 AST file '%1' found in a different module map file" " (%2) than when the importing AST file was built (%3)">, DefaultFatal; def err_imported_module_relocated : Error< "module '%0' was built in directory '%1' but now resides in " @@ -399,6 +402,8 @@ def warn_module_uses_date_time : Warning< def err_module_no_size_mtime_for_header : Error< "cannot emit module %0: %select{size|mtime}1 must be explicitly specified " "for missing header file \"%2\"">; +def err_module_unable_to_hash_content : Error< + "failed to hash content for '%0' because memory buffer cannot be retrieved">; } // let CategoryName } // let Component diff --git a/include/clang/Basic/Features.def b/include/clang/Basic/Features.def index 7081c02e83e..28eb694ba9a 100644 --- a/include/clang/Basic/Features.def +++ b/include/clang/Basic/Features.def @@ -39,6 +39,8 @@ FEATURE(address_sanitizer, LangOpts.Sanitize.hasOneOf(SanitizerKind::Address | SanitizerKind::KernelAddress)) +FEATURE(leak_sanitizer, + LangOpts.Sanitize.has(SanitizerKind::Leak)) FEATURE(hwaddress_sanitizer, LangOpts.Sanitize.hasOneOf(SanitizerKind::HWAddress | SanitizerKind::KernelHWAddress)) @@ -187,7 +189,7 @@ FEATURE(cxx_variable_templates, LangOpts.CPlusPlus14) // FEATURE(raw_invocation_type, LangOpts.CPlusPlus) // Type traits // N.B. Additional type traits should not be added to the following list. -// Instead, they should be detected by has_extension. +// Instead, they should be detected by has_builtin. FEATURE(has_nothrow_assign, LangOpts.CPlusPlus) FEATURE(has_nothrow_copy, LangOpts.CPlusPlus) FEATURE(has_nothrow_constructor, LangOpts.CPlusPlus) diff --git a/include/clang/Basic/FileManager.h b/include/clang/Basic/FileManager.h index 96983475f45..28cd0581808 100644 --- a/include/clang/Basic/FileManager.h +++ b/include/clang/Basic/FileManager.h @@ -45,12 +45,31 @@ class FileSystemStatCache; class DirectoryEntry { friend class FileManager; + // FIXME: We should not be storing a directory entry name here. StringRef Name; // Name of the directory. public: StringRef getName() const { return Name; } }; +/// A reference to a \c DirectoryEntry that includes the name of the directory +/// as it was accessed by the FileManager's client. +class DirectoryEntryRef { +public: + const DirectoryEntry &getDirEntry() const { return *Entry->getValue(); } + + StringRef getName() const { return Entry->getKey(); } + +private: + friend class FileManager; + + DirectoryEntryRef( + llvm::StringMapEntry> *Entry) + : Entry(Entry) {} + + const llvm::StringMapEntry> *Entry; +}; + /// Cached information about one file (either on disk /// or in the virtual file system). /// @@ -64,8 +83,8 @@ class FileEntry { off_t Size; // File size in bytes. time_t ModTime; // Modification time of file. const DirectoryEntry *Dir; // Directory file lives in. - unsigned UID; // A unique (small) ID for the file. llvm::sys::fs::UniqueID UniqueID; + unsigned UID; // A unique (small) ID for the file. bool IsNamedPipe; bool IsValid; // Is this \c FileEntry initialized and valid? @@ -106,6 +125,42 @@ public: bool isOpenForTests() const { return File != nullptr; } }; +/// A reference to a \c FileEntry that includes the name of the file as it was +/// accessed by the FileManager's client. +class FileEntryRef { +public: + FileEntryRef() = delete; + FileEntryRef(StringRef Name, const FileEntry &Entry) + : Name(Name), Entry(&Entry) {} + + const StringRef getName() const { return Name; } + + bool isValid() const { return Entry->isValid(); } + + const FileEntry &getFileEntry() const { return *Entry; } + + off_t getSize() const { return Entry->getSize(); } + + unsigned getUID() const { return Entry->getUID(); } + + const llvm::sys::fs::UniqueID &getUniqueID() const { + return Entry->getUniqueID(); + } + + time_t getModificationTime() const { return Entry->getModificationTime(); } + + friend bool operator==(const FileEntryRef &LHS, const FileEntryRef &RHS) { + return LHS.Entry == RHS.Entry && LHS.Name == RHS.Name; + } + friend bool operator!=(const FileEntryRef &LHS, const FileEntryRef &RHS) { + return !(LHS == RHS); + } + +private: + StringRef Name; + const FileEntry *Entry; +}; + /// Implements support for file system lookup, file system caching, /// and directory search management. /// @@ -131,21 +186,41 @@ class FileManager : public RefCountedBase { /// The virtual files that we have allocated. SmallVector, 4> VirtualFileEntries; + /// A set of files that bypass the maps and uniquing. They can have + /// conflicting filenames. + SmallVector, 0> BypassFileEntries; + /// A cache that maps paths to directory entries (either real or - /// virtual) we have looked up + /// virtual) we have looked up, or an error that occurred when we looked up + /// the directory. /// /// The actual Entries for real directories/files are /// owned by UniqueRealDirs/UniqueRealFiles above, while the Entries /// for virtual directories/files are owned by /// VirtualDirectoryEntries/VirtualFileEntries above. /// - llvm::StringMap SeenDirEntries; + llvm::StringMap, llvm::BumpPtrAllocator> + SeenDirEntries; + + /// A reference to the file entry that is associated with a particular + /// filename, or a reference to another filename that should be looked up + /// instead of the accessed filename. + /// + /// The reference to another filename is specifically useful for Redirecting + /// VFSs that use external names. In that case, the \c FileEntryRef returned + /// by the \c FileManager will have the external name, and not the name that + /// was used to lookup the file. + using SeenFileEntryOrRedirect = + llvm::PointerUnion; /// A cache that maps paths to file entries (either real or - /// virtual) we have looked up. + /// virtual) we have looked up, or an error that occurred when we looked up + /// the file. /// /// \see SeenDirEntries - llvm::StringMap SeenFileEntries; + llvm::StringMap, + llvm::BumpPtrAllocator> + SeenFileEntries; /// The canonical names of directories. llvm::DenseMap CanonicalDirNames; @@ -157,15 +232,12 @@ class FileManager : public RefCountedBase { /// unsigned NextFileUID; - // Statistics. - unsigned NumDirLookups, NumFileLookups; - unsigned NumDirCacheMisses, NumFileCacheMisses; - // Caching. std::unique_ptr StatCache; - bool getStatValue(StringRef Path, llvm::vfs::Status &Status, bool isFile, - std::unique_ptr *F); + std::error_code getStatValue(StringRef Path, llvm::vfs::Status &Status, + bool isFile, + std::unique_ptr *F); /// Add all ancestors of the given path (pointing to either a file /// or a directory) as virtual directories. @@ -195,27 +267,86 @@ public: /// Removes the FileSystemStatCache object from the manager. void clearStatCache(); + /// Returns the number of unique real file entries cached by the file manager. + size_t getNumUniqueRealFiles() const { return UniqueRealFiles.size(); } + /// Lookup, cache, and verify the specified directory (real or /// virtual). /// - /// This returns NULL if the directory doesn't exist. + /// This returns a \c std::error_code if there was an error reading the + /// directory. On success, returns the reference to the directory entry + /// together with the exact path that was used to access a file by a + /// particular call to getDirectoryRef. /// /// \param CacheFailure If true and the file does not exist, we'll cache /// the failure to find this file. - const DirectoryEntry *getDirectory(StringRef DirName, - bool CacheFailure = true); + llvm::Expected getDirectoryRef(StringRef DirName, + bool CacheFailure = true); + + /// Get a \c DirectoryEntryRef if it exists, without doing anything on error. + llvm::Optional + getOptionalDirectoryRef(StringRef DirName, bool CacheFailure = true) { + return llvm::expectedToOptional(getDirectoryRef(DirName, CacheFailure)); + } + + /// Lookup, cache, and verify the specified directory (real or + /// virtual). + /// + /// This function is deprecated and will be removed at some point in the + /// future, new clients should use + /// \c getDirectoryRef. + /// + /// This returns a \c std::error_code if there was an error reading the + /// directory. If there is no error, the DirectoryEntry is guaranteed to be + /// non-NULL. + /// + /// \param CacheFailure If true and the file does not exist, we'll cache + /// the failure to find this file. + llvm::ErrorOr + getDirectory(StringRef DirName, bool CacheFailure = true); /// Lookup, cache, and verify the specified file (real or /// virtual). /// - /// This returns NULL if the file doesn't exist. + /// This function is deprecated and will be removed at some point in the + /// future, new clients should use + /// \c getFileRef. + /// + /// This returns a \c std::error_code if there was an error loading the file. + /// If there is no error, the FileEntry is guaranteed to be non-NULL. /// /// \param OpenFile if true and the file exists, it will be opened. /// /// \param CacheFailure If true and the file does not exist, we'll cache /// the failure to find this file. - const FileEntry *getFile(StringRef Filename, bool OpenFile = false, - bool CacheFailure = true); + llvm::ErrorOr + getFile(StringRef Filename, bool OpenFile = false, bool CacheFailure = true); + + /// Lookup, cache, and verify the specified file (real or virtual). Return the + /// reference to the file entry together with the exact path that was used to + /// access a file by a particular call to getFileRef. If the underlying VFS is + /// a redirecting VFS that uses external file names, the returned FileEntryRef + /// will use the external name instead of the filename that was passed to this + /// method. + /// + /// This returns a \c std::error_code if there was an error loading the file, + /// or a \c FileEntryRef otherwise. + /// + /// \param OpenFile if true and the file exists, it will be opened. + /// + /// \param CacheFailure If true and the file does not exist, we'll cache + /// the failure to find this file. + llvm::Expected getFileRef(StringRef Filename, + bool OpenFile = false, + bool CacheFailure = true); + + /// Get a FileEntryRef if it exists, without doing anything on error. + llvm::Optional getOptionalFileRef(StringRef Filename, + bool OpenFile = false, + bool CacheFailure = true) { + return llvm::expectedToOptional( + getFileRef(Filename, OpenFile, CacheFailure)); + } /// Returns the current file system options FileSystemOptions &getFileSystemOpts() { return FileSystemOpts; } @@ -223,6 +354,10 @@ public: llvm::vfs::FileSystem &getVirtualFileSystem() const { return *FS; } + void setVirtualFileSystem(IntrusiveRefCntPtr FS) { + this->FS = std::move(FS); + } + /// Retrieve a file entry for a "virtual" file that acts as /// if there were a file with the given name on disk. /// @@ -230,24 +365,38 @@ public: const FileEntry *getVirtualFile(StringRef Filename, off_t Size, time_t ModificationTime); + /// Retrieve a FileEntry that bypasses VFE, which is expected to be a virtual + /// file entry, to access the real file. The returned FileEntry will have + /// the same filename as FE but a different identity and its own stat. + /// + /// This should be used only for rare error recovery paths because it + /// bypasses all mapping and uniquing, blindly creating a new FileEntry. + /// There is no attempt to deduplicate these; if you bypass the same file + /// twice, you get two new file entries. + llvm::Optional getBypassFile(FileEntryRef VFE); + /// Open the specified file as a MemoryBuffer, returning a new /// MemoryBuffer if successful, otherwise returning null. llvm::ErrorOr> - getBufferForFile(const FileEntry *Entry, bool isVolatile = false, - bool ShouldCloseOpenFile = true); + getBufferForFile(const FileEntry *Entry, bool isVolatile = false); llvm::ErrorOr> - getBufferForFile(StringRef Filename, bool isVolatile = false); + getBufferForFile(StringRef Filename, bool isVolatile = false) { + return getBufferForFileImpl(Filename, /*FileSize=*/-1, isVolatile); + } +private: + llvm::ErrorOr> + getBufferForFileImpl(StringRef Filename, int64_t FileSize, bool isVolatile); + +public: /// Get the 'stat' information for the given \p Path. /// /// If the path is relative, it will be resolved against the WorkingDir of the /// FileManager's FileSystemOptions. /// - /// \returns false on success, true on error. - bool getNoncachedStatValue(StringRef Path, llvm::vfs::Status &Result); - - /// Remove the real file \p Entry from the cache. - void invalidateCache(const FileEntry *Entry); + /// \returns a \c std::error_code describing an error, if there was one + std::error_code getNoncachedStatValue(StringRef Path, + llvm::vfs::Status &Result); /// If path is not absolute and FileSystemOptions set the working /// directory, the path is modified to be relative to the given @@ -265,11 +414,6 @@ public: void GetUniqueIDMapping( SmallVectorImpl &UIDToFiles) const; - /// Modifies the size and modification time of a previously created - /// FileEntry. Use with caution. - static void modifyFileEntry(FileEntry *File, off_t Size, - time_t ModificationTime); - /// Retrieve the canonical name for a given directory. /// /// This is a very expensive operation, despite its results being cached, diff --git a/include/clang/Basic/IdentifierTable.h b/include/clang/Basic/IdentifierTable.h index 465486ede71..37d7198c640 100644 --- a/include/clang/Basic/IdentifierTable.h +++ b/include/clang/Basic/IdentifierTable.h @@ -750,6 +750,12 @@ public: return getIdentifierInfoFlag() == ZeroArg; } + /// If this selector is the specific keyword selector described by Names. + bool isKeywordSelector(ArrayRef Names) const; + + /// If this selector is the specific unary selector described by Name. + bool isUnarySelector(StringRef Name) const; + unsigned getNumArgs() const; /// Retrieve the identifier at a given position in the selector. diff --git a/include/clang/Basic/LangOptions.def b/include/clang/Basic/LangOptions.def index 31aca2b0d69..a423654d5e0 100644 --- a/include/clang/Basic/LangOptions.def +++ b/include/clang/Basic/LangOptions.def @@ -111,6 +111,7 @@ BENIGN_LANGOPT(DollarIdents , 1, 1, "'$' in identifiers") BENIGN_LANGOPT(AsmPreprocessor, 1, 0, "preprocessor in asm mode") LANGOPT(GNUMode , 1, 1, "GNU extensions") LANGOPT(GNUKeywords , 1, 1, "GNU keywords") +VALUE_LANGOPT(GNUCVersion , 32, 0, "GNU C compatibility version") BENIGN_LANGOPT(ImplicitInt, 1, !C99 && !CPlusPlus, "C89 implicit 'int'") LANGOPT(Digraphs , 1, 0, "digraphs") BENIGN_LANGOPT(HexFloats , 1, C99, "C99 hexadecimal float constants") @@ -119,7 +120,8 @@ LANGOPT(AppleKext , 1, 0, "Apple kext support") BENIGN_LANGOPT(PascalStrings, 1, 0, "Pascal string support") LANGOPT(WritableStrings , 1, 0, "writable string support") LANGOPT(ConstStrings , 1, 0, "const-qualified string support") -LANGOPT(LaxVectorConversions , 1, 1, "lax vector conversions") +ENUM_LANGOPT(LaxVectorConversions, LaxVectorConversionKind, 2, + LaxVectorConversionKind::All, "lax vector conversions") LANGOPT(AltiVec , 1, 0, "AltiVec-style vector initializers") LANGOPT(ZVector , 1, 0, "System z vector extensions") LANGOPT(Exceptions , 1, 0, "exception handling") @@ -128,6 +130,7 @@ LANGOPT(CXXExceptions , 1, 0, "C++ exceptions") LANGOPT(DWARFExceptions , 1, 0, "dwarf exception handling") LANGOPT(SjLjExceptions , 1, 0, "setjmp-longjump exception handling") LANGOPT(SEHExceptions , 1, 0, "SEH .xdata exception handling") +LANGOPT(WasmExceptions , 1, 0, "WebAssembly exception handling") LANGOPT(ExternCNoUnwind , 1, 0, "Assume extern C functions don't unwind") LANGOPT(TraditionalCPP , 1, 0, "traditional CPP emulation") LANGOPT(RTTI , 1, 1, "run-time type information") @@ -224,6 +227,8 @@ LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code") LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device") +LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP") + LANGOPT(SizedDeallocation , 1, 0, "sized deallocation") LANGOPT(AlignedAllocation , 1, 0, "aligned allocation") LANGOPT(AlignedAllocationUnavailable, 1, 0, "aligned allocation functions are unavailable") @@ -288,6 +293,10 @@ BENIGN_LANGOPT(ConstexprCallDepth, 32, 512, "maximum constexpr call depth") BENIGN_LANGOPT(ConstexprStepLimit, 32, 1048576, "maximum constexpr evaluation steps") +BENIGN_LANGOPT(EnableNewConstInterp, 1, 0, + "enable the experimental new constant interpreter") +BENIGN_LANGOPT(ForceNewConstInterp, 1, 0, + "force the use of the experimental new constant interpreter") BENIGN_LANGOPT(BracketDepth, 32, 256, "maximum bracket nesting depth") BENIGN_LANGOPT(NumLargeByValueCopy, 32, 0, diff --git a/include/clang/Basic/LangOptions.h b/include/clang/Basic/LangOptions.h index 8099eed28c5..5f808f04e9a 100644 --- a/include/clang/Basic/LangOptions.h +++ b/include/clang/Basic/LangOptions.h @@ -138,6 +138,12 @@ public: /// rather than returning the required alignment. Ver7, + /// Attempt to be ABI-compatible with code generated by Clang 9.0.x + /// (SVN r351319). This causes vectors of __int128 to be passed in memory + /// instead of passing in multiple scalar registers on x86_64 on Linux and + /// NetBSD. + Ver9, + /// Conform to the underlying platform's C and C++ ABIs as closely /// as we can. Latest @@ -178,6 +184,16 @@ public: FEA_On }; + enum class LaxVectorConversionKind { + /// Permit no implicit vector bitcasts. + None, + /// Permit vector bitcasts between integer vectors with different numbers + /// of elements but the same total bit-width. + Integer, + /// Permit vector bitcasts between all vectors with the same total + /// bit-width. + All, + }; public: /// Set of enabled sanitizers. diff --git a/include/clang/Frontend/LangStandard.h b/include/clang/Basic/LangStandard.h similarity index 60% rename from include/clang/Frontend/LangStandard.h rename to include/clang/Basic/LangStandard.h index 244f14c793d..e7deb7d6463 100644 --- a/include/clang/Frontend/LangStandard.h +++ b/include/clang/Basic/LangStandard.h @@ -6,16 +6,37 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_FRONTEND_LANGSTANDARD_H -#define LLVM_CLANG_FRONTEND_LANGSTANDARD_H +#ifndef LLVM_CLANG_BASIC_LANGSTANDARD_H +#define LLVM_CLANG_BASIC_LANGSTANDARD_H #include "clang/Basic/LLVM.h" -#include "clang/Frontend/FrontendOptions.h" #include "llvm/ADT/StringRef.h" namespace clang { -namespace frontend { +/// The language for the input, used to select and validate the language +/// standard and possible actions. +enum class Language : uint8_t { + Unknown, + + /// Assembly: we accept this only so that we can preprocess it. + Asm, + + /// LLVM IR: we accept this so that we can run the optimizer on it, + /// and compile it to assembly or object code. + LLVM_IR, + + ///@{ Languages that the frontend can parse and compile. + C, + CXX, + ObjC, + ObjCXX, + OpenCL, + CUDA, + RenderScript, + HIP, + ///@} +}; enum LangFeatures { LineComment = (1 << 0), @@ -35,22 +56,20 @@ enum LangFeatures { OpenCL = (1 << 14) }; -} - /// LangStandard - Information about the properties of a particular language /// standard. struct LangStandard { enum Kind { #define LANGSTANDARD(id, name, lang, desc, features) \ lang_##id, -#include "clang/Frontend/LangStandards.def" +#include "clang/Basic/LangStandards.def" lang_unspecified }; const char *ShortName; const char *Description; unsigned Flags; - InputKind::Language Language; + clang::Language Language; public: /// getName - Get the name of this standard. @@ -60,54 +79,54 @@ public: const char *getDescription() const { return Description; } /// Get the language that this standard describes. - InputKind::Language getLanguage() const { return Language; } + clang::Language getLanguage() const { return Language; } /// Language supports '//' comments. - bool hasLineComments() const { return Flags & frontend::LineComment; } + bool hasLineComments() const { return Flags & LineComment; } /// isC99 - Language is a superset of C99. - bool isC99() const { return Flags & frontend::C99; } + bool isC99() const { return Flags & C99; } /// isC11 - Language is a superset of C11. - bool isC11() const { return Flags & frontend::C11; } + bool isC11() const { return Flags & C11; } /// isC17 - Language is a superset of C17. - bool isC17() const { return Flags & frontend::C17; } + bool isC17() const { return Flags & C17; } /// isC2x - Language is a superset of C2x. - bool isC2x() const { return Flags & frontend::C2x; } + bool isC2x() const { return Flags & C2x; } /// isCPlusPlus - Language is a C++ variant. - bool isCPlusPlus() const { return Flags & frontend::CPlusPlus; } + bool isCPlusPlus() const { return Flags & CPlusPlus; } /// isCPlusPlus11 - Language is a C++11 variant (or later). - bool isCPlusPlus11() const { return Flags & frontend::CPlusPlus11; } + bool isCPlusPlus11() const { return Flags & CPlusPlus11; } /// isCPlusPlus14 - Language is a C++14 variant (or later). - bool isCPlusPlus14() const { return Flags & frontend::CPlusPlus14; } + bool isCPlusPlus14() const { return Flags & CPlusPlus14; } /// isCPlusPlus17 - Language is a C++17 variant (or later). - bool isCPlusPlus17() const { return Flags & frontend::CPlusPlus17; } + bool isCPlusPlus17() const { return Flags & CPlusPlus17; } /// isCPlusPlus2a - Language is a post-C++17 variant (or later). - bool isCPlusPlus2a() const { return Flags & frontend::CPlusPlus2a; } - + bool isCPlusPlus2a() const { return Flags & CPlusPlus2a; } /// hasDigraphs - Language supports digraphs. - bool hasDigraphs() const { return Flags & frontend::Digraphs; } + bool hasDigraphs() const { return Flags & Digraphs; } /// isGNUMode - Language includes GNU extensions. - bool isGNUMode() const { return Flags & frontend::GNUMode; } + bool isGNUMode() const { return Flags & GNUMode; } /// hasHexFloats - Language supports hexadecimal float constants. - bool hasHexFloats() const { return Flags & frontend::HexFloat; } + bool hasHexFloats() const { return Flags & HexFloat; } /// hasImplicitInt - Language allows variables to be typed as int implicitly. - bool hasImplicitInt() const { return Flags & frontend::ImplicitInt; } + bool hasImplicitInt() const { return Flags & ImplicitInt; } /// isOpenCL - Language is a OpenCL variant. - bool isOpenCL() const { return Flags & frontend::OpenCL; } + bool isOpenCL() const { return Flags & OpenCL; } + static Kind getLangKind(StringRef Name); static const LangStandard &getLangStandardForKind(Kind K); static const LangStandard *getLangStandardForName(StringRef Name); }; diff --git a/include/clang/Frontend/LangStandards.def b/include/clang/Basic/LangStandards.def similarity index 98% rename from include/clang/Frontend/LangStandards.def rename to include/clang/Basic/LangStandards.def index 0964e9b90a0..427691fb71e 100644 --- a/include/clang/Frontend/LangStandards.def +++ b/include/clang/Basic/LangStandards.def @@ -14,7 +14,7 @@ /// /// \param IDENT - The name of the standard as a C++ identifier. /// \param NAME - The name of the standard. -/// \param LANG - The InputKind::Language for which this is a standard. +/// \param LANG - The Language for which this is a standard. /// \param DESC - A short description of the standard. /// \param FEATURES - The standard features as flags, these are enums from the /// clang::frontend namespace, which is assumed to be be available. @@ -165,7 +165,7 @@ LANGSTANDARD(opencl12, "cl1.2", LANGSTANDARD(opencl20, "cl2.0", OpenCL, "OpenCL 2.0", LineComment | C99 | Digraphs | HexFloat | OpenCL) -LANGSTANDARD(openclcpp, "c++", +LANGSTANDARD(openclcpp, "clc++", OpenCL, "C++ for OpenCL", LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 | Digraphs | HexFloat | OpenCL) @@ -174,6 +174,7 @@ LANGSTANDARD_ALIAS_DEPR(opencl10, "CL") LANGSTANDARD_ALIAS_DEPR(opencl11, "CL1.1") LANGSTANDARD_ALIAS_DEPR(opencl12, "CL1.2") LANGSTANDARD_ALIAS_DEPR(opencl20, "CL2.0") +LANGSTANDARD_ALIAS_DEPR(openclcpp, "CLC++") // CUDA LANGSTANDARD(cuda, "cuda", CUDA, "NVIDIA CUDA(tm)", diff --git a/include/clang/Basic/Linkage.h b/include/clang/Basic/Linkage.h index 696f85b1853..f4d442c084c 100644 --- a/include/clang/Basic/Linkage.h +++ b/include/clang/Basic/Linkage.h @@ -82,6 +82,12 @@ inline bool isDiscardableGVALinkage(GVALinkage L) { return L <= GVA_DiscardableODR; } +/// Do we know that this will be the only definition of this symbol (excluding +/// inlining-only definitions)? +inline bool isUniqueGVALinkage(GVALinkage L) { + return L == GVA_Internal || L == GVA_StrongExternal; +} + inline bool isExternallyVisible(Linkage L) { return L >= VisibleNoLinkage; } diff --git a/include/clang/Basic/OpenCLOptions.h b/include/clang/Basic/OpenCLOptions.h index 47310da1d6d..15661154eab 100644 --- a/include/clang/Basic/OpenCLOptions.h +++ b/include/clang/Basic/OpenCLOptions.h @@ -42,7 +42,7 @@ public: // Is supported as either an extension or an (optional) core feature for // OpenCL version \p CLVer. - bool isSupported(llvm::StringRef Ext, LangOptions LO) const { + bool isSupported(llvm::StringRef Ext, const LangOptions &LO) const { // In C++ mode all extensions should work at least as in v2.0. auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; auto I = OptMap.find(Ext)->getValue(); @@ -51,7 +51,7 @@ public: // Is supported (optional) OpenCL core features for OpenCL version \p CLVer. // For supported extension, return false. - bool isSupportedCore(llvm::StringRef Ext, LangOptions LO) const { + bool isSupportedCore(llvm::StringRef Ext, const LangOptions &LO) const { // In C++ mode all extensions should work at least as in v2.0. auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; auto I = OptMap.find(Ext)->getValue(); @@ -60,7 +60,7 @@ public: // Is supported OpenCL extension for OpenCL version \p CLVer. // For supported (optional) core feature, return false. - bool isSupportedExtension(llvm::StringRef Ext, LangOptions LO) const { + bool isSupportedExtension(llvm::StringRef Ext, const LangOptions &LO) const { // In C++ mode all extensions should work at least as in v2.0. auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; auto I = OptMap.find(Ext)->getValue(); diff --git a/include/clang/Basic/OpenMPKinds.def b/include/clang/Basic/OpenMPKinds.def index 9685af4cade..ff8f07aa5de 100644 --- a/include/clang/Basic/OpenMPKinds.def +++ b/include/clang/Basic/OpenMPKinds.def @@ -92,6 +92,15 @@ #ifndef OPENMP_TASKLOOP_SIMD_CLAUSE # define OPENMP_TASKLOOP_SIMD_CLAUSE(Name) #endif +#ifndef OPENMP_MASTER_TASKLOOP_CLAUSE +# define OPENMP_MASTER_TASKLOOP_CLAUSE(Name) +#endif +#ifndef OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE +# define OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(Name) +#endif +#ifndef OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE +# define OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(Name) +#endif #ifndef OPENMP_CRITICAL_CLAUSE # define OPENMP_CRITICAL_CLAUSE(Name) #endif @@ -191,6 +200,15 @@ #ifndef OPENMP_ALLOCATE_CLAUSE # define OPENMP_ALLOCATE_CLAUSE(Name) #endif +#ifndef OPENMP_DEVICE_TYPE_KIND +#define OPENMP_DEVICE_TYPE_KIND(Name) +#endif +#ifndef OPENMP_DECLARE_VARIANT_CLAUSE +#define OPENMP_DECLARE_VARIANT_CLAUSE(Name) +#endif +#ifndef OPENMP_MATCH_KIND +#define OPENMP_MATCH_KIND(Name) +#endif // OpenMP directives. OPENMP_DIRECTIVE(threadprivate) @@ -248,6 +266,10 @@ OPENMP_DIRECTIVE_EXT(target_teams_distribute_parallel_for, "target teams distrib OPENMP_DIRECTIVE_EXT(target_teams_distribute_parallel_for_simd, "target teams distribute parallel for simd") OPENMP_DIRECTIVE_EXT(target_teams_distribute_simd, "target teams distribute simd") OPENMP_DIRECTIVE(allocate) +OPENMP_DIRECTIVE_EXT(declare_variant, "declare variant") +OPENMP_DIRECTIVE_EXT(master_taskloop, "master taskloop") +OPENMP_DIRECTIVE_EXT(parallel_master_taskloop, "parallel master taskloop") +OPENMP_DIRECTIVE_EXT(master_taskloop_simd, "master taskloop simd") // OpenMP clauses. OPENMP_CLAUSE(allocator, OMPAllocatorClause) @@ -656,6 +678,69 @@ OPENMP_TASKLOOP_SIMD_CLAUSE(reduction) OPENMP_TASKLOOP_SIMD_CLAUSE(in_reduction) OPENMP_TASKLOOP_SIMD_CLAUSE(allocate) +// Clauses allowed for OpenMP directive 'master taskloop'. +OPENMP_MASTER_TASKLOOP_CLAUSE(if) +OPENMP_MASTER_TASKLOOP_CLAUSE(shared) +OPENMP_MASTER_TASKLOOP_CLAUSE(private) +OPENMP_MASTER_TASKLOOP_CLAUSE(firstprivate) +OPENMP_MASTER_TASKLOOP_CLAUSE(lastprivate) +OPENMP_MASTER_TASKLOOP_CLAUSE(default) +OPENMP_MASTER_TASKLOOP_CLAUSE(collapse) +OPENMP_MASTER_TASKLOOP_CLAUSE(final) +OPENMP_MASTER_TASKLOOP_CLAUSE(untied) +OPENMP_MASTER_TASKLOOP_CLAUSE(mergeable) +OPENMP_MASTER_TASKLOOP_CLAUSE(priority) +OPENMP_MASTER_TASKLOOP_CLAUSE(grainsize) +OPENMP_MASTER_TASKLOOP_CLAUSE(nogroup) +OPENMP_MASTER_TASKLOOP_CLAUSE(num_tasks) +OPENMP_MASTER_TASKLOOP_CLAUSE(reduction) +OPENMP_MASTER_TASKLOOP_CLAUSE(in_reduction) +OPENMP_MASTER_TASKLOOP_CLAUSE(allocate) + +// Clauses allowed for OpenMP directive 'master taskloop simd'. +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(if) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(shared) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(private) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(firstprivate) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(lastprivate) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(default) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(collapse) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(final) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(untied) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(mergeable) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(priority) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(linear) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(aligned) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(safelen) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(simdlen) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(grainsize) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(nogroup) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(num_tasks) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(reduction) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(in_reduction) +OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE(allocate) + +// Clauses allowed for OpenMP directive 'parallel master taskloop'. +OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(if) +OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(shared) +OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(private) +OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(firstprivate) +OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(lastprivate) +OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(default) +OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(collapse) +OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(final) +OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(untied) +OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(mergeable) +OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(priority) +OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(grainsize) +OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(nogroup) +OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(num_tasks) +OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(reduction) +OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(allocate) +OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(num_threads) +OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(proc_bind) +OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(copyin) + // Clauses allowed for OpenMP directive 'critical'. OPENMP_CRITICAL_CLAUSE(hint) @@ -950,9 +1035,27 @@ OPENMP_TASKGROUP_CLAUSE(allocate) // Clauses allowed for OpenMP directive 'declare mapper'. OPENMP_DECLARE_MAPPER_CLAUSE(map) +// Device types for 'device_type' clause. +OPENMP_DEVICE_TYPE_KIND(host) +OPENMP_DEVICE_TYPE_KIND(nohost) +OPENMP_DEVICE_TYPE_KIND(any) + +// Clauses allowed for OpenMP directive 'declare variant'. +OPENMP_DECLARE_VARIANT_CLAUSE(match) + +// Context selectors for 'match' clause. +// TODO: add other context selectors. +OPENMP_MATCH_KIND(implementation) + +#undef OPENMP_MATCH_KIND +#undef OPENMP_DECLARE_VARIANT_CLAUSE +#undef OPENMP_DEVICE_TYPE_KIND #undef OPENMP_ALLOCATE_CLAUSE #undef OPENMP_DECLARE_MAPPER_CLAUSE #undef OPENMP_TASKGROUP_CLAUSE +#undef OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE +#undef OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE +#undef OPENMP_MASTER_TASKLOOP_CLAUSE #undef OPENMP_TASKLOOP_SIMD_CLAUSE #undef OPENMP_TASKLOOP_CLAUSE #undef OPENMP_LINEAR_KIND diff --git a/include/clang/Basic/OpenMPKinds.h b/include/clang/Basic/OpenMPKinds.h index d8dee2310ec..4129cca0fe6 100644 --- a/include/clang/Basic/OpenMPKinds.h +++ b/include/clang/Basic/OpenMPKinds.h @@ -35,6 +35,8 @@ enum OpenMPClauseKind { #include "clang/Basic/OpenMPKinds.def" OMPC_threadprivate, OMPC_uniform, + OMPC_device_type, + OMPC_match, OMPC_unknown }; @@ -152,6 +154,14 @@ enum OpenMPAtomicDefaultMemOrderClauseKind { OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown }; +/// OpenMP device type for 'device_type' clause. +enum OpenMPDeviceType { +#define OPENMP_DEVICE_TYPE_KIND(Name) \ + OMPC_DEVICE_TYPE_##Name, +#include "clang/Basic/OpenMPKinds.def" + OMPC_DEVICE_TYPE_unknown +}; + /// Scheduling data for loop-based OpenMP directives. struct OpenMPScheduleTy final { OpenMPScheduleClauseKind Schedule = OMPC_SCHEDULE_unknown; @@ -259,7 +269,8 @@ bool isOpenMPPrivate(OpenMPClauseKind Kind); bool isOpenMPThreadPrivate(OpenMPClauseKind Kind); /// Checks if the specified directive kind is one of tasking directives - task, -/// taskloop or taksloop simd. +/// taskloop, taksloop simd, master taskloop, parallel master taskloop or master +/// taskloop simd. bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind); /// Checks if the specified directive kind is one of the composite or combined diff --git a/include/clang/Basic/OperatorKinds.h b/include/clang/Basic/OperatorKinds.h index 9757acaa530..d6618914451 100644 --- a/include/clang/Basic/OperatorKinds.h +++ b/include/clang/Basic/OperatorKinds.h @@ -30,6 +30,25 @@ enum OverloadedOperatorKind : int { /// the preceding "operator" keyword. const char *getOperatorSpelling(OverloadedOperatorKind Operator); +/// Get the other overloaded operator that the given operator can be rewritten +/// into, if any such operator exists. +inline OverloadedOperatorKind +getRewrittenOverloadedOperator(OverloadedOperatorKind Kind) { + switch (Kind) { + case OO_Less: + case OO_LessEqual: + case OO_Greater: + case OO_GreaterEqual: + return OO_Spaceship; + + case OO_ExclaimEqual: + return OO_EqualEqual; + + default: + return OO_None; + } +} + } // end namespace clang #endif diff --git a/include/clang/Basic/SourceManager.h b/include/clang/Basic/SourceManager.h index e32f749ae6a..3185ca0f4a2 100644 --- a/include/clang/Basic/SourceManager.h +++ b/include/clang/Basic/SourceManager.h @@ -140,9 +140,9 @@ namespace SrcMgr { /// exist. unsigned BufferOverridden : 1; - /// True if this content cache was initially created for a source - /// file considered as a system one. - unsigned IsSystemFile : 1; + /// True if this content cache was initially created for a source file + /// considered to be volatile (likely to change between stat and open). + unsigned IsFileVolatile : 1; /// True if this file may be transient, that is, if it might not /// exist at some later point in time when this content entry is used, @@ -152,15 +152,15 @@ namespace SrcMgr { ContentCache(const FileEntry *Ent = nullptr) : ContentCache(Ent, Ent) {} ContentCache(const FileEntry *Ent, const FileEntry *contentEnt) - : Buffer(nullptr, false), OrigEntry(Ent), ContentsEntry(contentEnt), - BufferOverridden(false), IsSystemFile(false), IsTransient(false) {} + : Buffer(nullptr, false), OrigEntry(Ent), ContentsEntry(contentEnt), + BufferOverridden(false), IsFileVolatile(false), IsTransient(false) {} /// The copy ctor does not allow copies where source object has either /// a non-NULL Buffer or SourceLineCache. Ownership of allocated memory /// is not transferred, so this is a logical error. ContentCache(const ContentCache &RHS) - : Buffer(nullptr, false), BufferOverridden(false), IsSystemFile(false), - IsTransient(false) { + : Buffer(nullptr, false), BufferOverridden(false), + IsFileVolatile(false), IsTransient(false) { OrigEntry = RHS.OrigEntry; ContentsEntry = RHS.ContentsEntry; @@ -185,7 +185,7 @@ namespace SrcMgr { /// /// \param Invalid If non-NULL, will be set \c true if an error occurred. const llvm::MemoryBuffer *getBuffer(DiagnosticsEngine &Diag, - const SourceManager &SM, + FileManager &FM, SourceLocation Loc = SourceLocation(), bool *Invalid = nullptr) const; @@ -265,16 +265,21 @@ namespace SrcMgr { llvm::PointerIntPair ContentAndKind; + /// The filename that is used to access the file entry represented by the + /// content cache. + StringRef Filename; + public: /// Return a FileInfo object. static FileInfo get(SourceLocation IL, const ContentCache *Con, - CharacteristicKind FileCharacter) { + CharacteristicKind FileCharacter, StringRef Filename) { FileInfo X; X.IncludeLoc = IL.getRawEncoding(); X.NumCreatedFIDs = 0; X.HasLineDirectives = false; X.ContentAndKind.setPointer(Con); X.ContentAndKind.setInt(FileCharacter); + X.Filename = Filename; return X; } @@ -299,6 +304,10 @@ namespace SrcMgr { void setHasLineDirectives() { HasLineDirectives = true; } + + /// Returns the name of the file that was used when the file was loaded from + /// the underlying file system. + StringRef getName() const { return Filename; } }; /// Each ExpansionInfo encodes the expansion location - where @@ -821,7 +830,18 @@ public: const SrcMgr::ContentCache *IR = getOrCreateContentCache(SourceFile, isSystem(FileCharacter)); assert(IR && "getOrCreateContentCache() cannot return NULL"); - return createFileID(IR, IncludePos, FileCharacter, LoadedID, LoadedOffset); + return createFileID(IR, SourceFile->getName(), IncludePos, FileCharacter, + LoadedID, LoadedOffset); + } + + FileID createFileID(FileEntryRef SourceFile, SourceLocation IncludePos, + SrcMgr::CharacteristicKind FileCharacter, + int LoadedID = 0, unsigned LoadedOffset = 0) { + const SrcMgr::ContentCache *IR = getOrCreateContentCache( + &SourceFile.getFileEntry(), isSystem(FileCharacter)); + assert(IR && "getOrCreateContentCache() cannot return NULL"); + return createFileID(IR, SourceFile.getName(), IncludePos, FileCharacter, + LoadedID, LoadedOffset); } /// Create a new FileID that represents the specified memory buffer. @@ -832,9 +852,10 @@ public: SrcMgr::CharacteristicKind FileCharacter = SrcMgr::C_User, int LoadedID = 0, unsigned LoadedOffset = 0, SourceLocation IncludeLoc = SourceLocation()) { + StringRef Name = Buffer->getBufferIdentifier(); return createFileID( createMemBufferContentCache(Buffer.release(), /*DoNotFree*/ false), - IncludeLoc, FileCharacter, LoadedID, LoadedOffset); + Name, IncludeLoc, FileCharacter, LoadedID, LoadedOffset); } enum UnownedTag { Unowned }; @@ -847,8 +868,9 @@ public: SrcMgr::CharacteristicKind FileCharacter = SrcMgr::C_User, int LoadedID = 0, unsigned LoadedOffset = 0, SourceLocation IncludeLoc = SourceLocation()) { - return createFileID(createMemBufferContentCache(Buffer, /*DoNotFree*/true), - IncludeLoc, FileCharacter, LoadedID, LoadedOffset); + return createFileID(createMemBufferContentCache(Buffer, /*DoNotFree*/ true), + Buffer->getBufferIdentifier(), IncludeLoc, + FileCharacter, LoadedID, LoadedOffset); } /// Get the FileID for \p SourceFile if it exists. Otherwise, create a @@ -930,11 +952,12 @@ public: return false; } - /// Disable overridding the contents of a file, previously enabled - /// with #overrideFileContents. + /// Bypass the overridden contents of a file. This creates a new FileEntry + /// and initializes the content cache for it. Returns nullptr if there is no + /// such file in the filesystem. /// /// This should be called before parsing has begun. - void disableFileContentsOverride(const FileEntry *File); + const FileEntry *bypassFileContentsOverride(const FileEntry &File); /// Specify that a file is transient. void setFileIsTransient(const FileEntry *SourceFile); @@ -964,8 +987,8 @@ public: return getFakeBufferForRecovery(); } - return Entry.getFile().getContentCache()->getBuffer(Diag, *this, Loc, - Invalid); + return Entry.getFile().getContentCache()->getBuffer(Diag, getFileManager(), + Loc, Invalid); } const llvm::MemoryBuffer *getBuffer(FileID FID, @@ -979,9 +1002,8 @@ public: return getFakeBufferForRecovery(); } - return Entry.getFile().getContentCache()->getBuffer(Diag, *this, - SourceLocation(), - Invalid); + return Entry.getFile().getContentCache()->getBuffer( + Diag, getFileManager(), SourceLocation(), Invalid); } /// Returns the FileEntry record for the provided FileID. @@ -997,6 +1019,19 @@ public: return Content->OrigEntry; } + /// Returns the FileEntryRef for the provided FileID. + Optional getFileEntryRefForID(FileID FID) const { + bool Invalid = false; + const SrcMgr::SLocEntry &Entry = getSLocEntry(FID, &Invalid); + if (Invalid || !Entry.isFile()) + return None; + + const SrcMgr::ContentCache *Content = Entry.getFile().getContentCache(); + if (!Content || !Content->OrigEntry) + return None; + return FileEntryRef(Entry.getFile().getName(), *Content->OrigEntry); + } + /// Returns the FileEntry record for the provided SLocEntry. const FileEntry *getFileEntryForSLocEntry(const SrcMgr::SLocEntry &sloc) const { @@ -1785,10 +1820,10 @@ private: /// /// This works regardless of whether the ContentCache corresponds to a /// file or some other input source. - FileID createFileID(const SrcMgr::ContentCache* File, + FileID createFileID(const SrcMgr::ContentCache *File, StringRef Filename, SourceLocation IncludePos, - SrcMgr::CharacteristicKind DirCharacter, - int LoadedID, unsigned LoadedOffset); + SrcMgr::CharacteristicKind DirCharacter, int LoadedID, + unsigned LoadedOffset); const SrcMgr::ContentCache * getOrCreateContentCache(const FileEntry *SourceFile, diff --git a/include/clang/Basic/Specifiers.h b/include/clang/Basic/Specifiers.h index d1236e798e5..fad97a26d95 100644 --- a/include/clang/Basic/Specifiers.h +++ b/include/clang/Basic/Specifiers.h @@ -32,7 +32,8 @@ namespace clang { enum ConstexprSpecKind { CSK_unspecified, CSK_constexpr, - CSK_consteval + CSK_consteval, + CSK_constinit }; /// Specifies the width of a type, e.g., short, long, or long long. diff --git a/include/clang/Basic/Stack.h b/include/clang/Basic/Stack.h index e0b04099de5..3418c3bad11 100644 --- a/include/clang/Basic/Stack.h +++ b/include/clang/Basic/Stack.h @@ -16,11 +16,40 @@ #include +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Compiler.h" + namespace clang { /// The amount of stack space that Clang would like to be provided with. /// If less than this much is available, we may be unable to reach our /// template instantiation depth limit and other similar limits. constexpr size_t DesiredStackSize = 8 << 20; + + /// Call this once on each thread, as soon after starting the thread as + /// feasible, to note the approximate address of the bottom of the stack. + void noteBottomOfStack(); + + /// Determine whether the stack is nearly exhausted. + bool isStackNearlyExhausted(); + + void runWithSufficientStackSpaceSlow(llvm::function_ref Diag, + llvm::function_ref Fn); + + /// Run a given function on a stack with "sufficient" space. If stack space + /// is insufficient, calls Diag to emit a diagnostic before calling Fn. + inline void runWithSufficientStackSpace(llvm::function_ref Diag, + llvm::function_ref Fn) { +#ifdef LLVM_ENABLE_THREADS + if (LLVM_UNLIKELY(isStackNearlyExhausted())) + runWithSufficientStackSpaceSlow(Diag, Fn); + else + Fn(); +#else + if (LLVM_UNLIKELY(isStackNearlyExhausted())) + Diag(); + Fn(); +#endif + } } // end namespace clang #endif // LLVM_CLANG_BASIC_STACK_H diff --git a/include/clang/Basic/StmtNodes.td b/include/clang/Basic/StmtNodes.td index be364de1a76..59444b2919a 100644 --- a/include/clang/Basic/StmtNodes.td +++ b/include/clang/Basic/StmtNodes.td @@ -114,6 +114,7 @@ def GNUNullExpr : DStmt; // C++ Expressions. def CXXOperatorCallExpr : DStmt; def CXXMemberCallExpr : DStmt; +def CXXRewrittenBinaryOperator : DStmt; def CXXNamedCastExpr : DStmt; def CXXStaticCastExpr : DStmt; def CXXDynamicCastExpr : DStmt; @@ -163,6 +164,9 @@ def CoawaitExpr : DStmt; def DependentCoawaitExpr : DStmt; def CoyieldExpr : DStmt; +// C++2a Concepts expressions +def ConceptSpecializationExpr : DStmt; + // Obj-C Expressions. def ObjCStringLiteral : DStmt; def ObjCBoxedExpr : DStmt; @@ -242,6 +246,9 @@ def OMPCancellationPointDirective : DStmt; def OMPCancelDirective : DStmt; def OMPTaskLoopDirective : DStmt; def OMPTaskLoopSimdDirective : DStmt; +def OMPMasterTaskLoopDirective : DStmt; +def OMPMasterTaskLoopSimdDirective : DStmt; +def OMPParallelMasterTaskLoopDirective : DStmt; def OMPDistributeDirective : DStmt; def OMPDistributeParallelForDirective : DStmt; def OMPDistributeParallelForSimdDirective : DStmt; diff --git a/include/clang/Basic/SyncScope.h b/include/clang/Basic/SyncScope.h index 15af02d83cd..ce8fb9cbed1 100644 --- a/include/clang/Basic/SyncScope.h +++ b/include/clang/Basic/SyncScope.h @@ -144,7 +144,7 @@ AtomicScopeModel::create(AtomicScopeModelKind K) { case AtomicScopeModelKind::None: return std::unique_ptr{}; case AtomicScopeModelKind::OpenCL: - return llvm::make_unique(); + return std::make_unique(); } llvm_unreachable("Invalid atomic scope model kind"); } diff --git a/include/clang/Basic/TargetBuiltins.h b/include/clang/Basic/TargetBuiltins.h index 50262fa310c..0e2f0753b0c 100644 --- a/include/clang/Basic/TargetBuiltins.h +++ b/include/clang/Basic/TargetBuiltins.h @@ -52,6 +52,16 @@ namespace clang { }; } + /// BPF builtins + namespace BPF { + enum { + LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1, + #define BUILTIN(ID, TYPE, ATTRS) BI##ID, + #include "clang/Basic/BuiltinsBPF.def" + LastTSBuiltin + }; + } + /// PPC builtins namespace PPC { enum { diff --git a/include/clang/Basic/TargetInfo.h b/include/clang/Basic/TargetInfo.h index 7a8384f5fbc..9a3bb986930 100644 --- a/include/clang/Basic/TargetInfo.h +++ b/include/clang/Basic/TargetInfo.h @@ -19,14 +19,15 @@ #include "clang/Basic/Specifiers.h" #include "clang/Basic/TargetCXXABI.h" #include "clang/Basic/TargetOptions.h" +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" -#include "llvm/IR/DataLayout.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/VersionTuple.h" #include @@ -35,6 +36,7 @@ namespace llvm { struct fltSemantics; +class DataLayout; } namespace clang { @@ -193,12 +195,12 @@ protected: unsigned IsRenderScriptTarget : 1; + unsigned HasAArch64SVETypes : 1; + // TargetInfo Constructor. Default initializes all fields. TargetInfo(const llvm::Triple &T); - void resetDataLayout(StringRef DL) { - DataLayout.reset(new llvm::DataLayout(DL)); - } + void resetDataLayout(StringRef DL); public: /// Construct a target for the given options. @@ -789,6 +791,10 @@ public: /// Returns true for RenderScript. bool isRenderScriptTarget() const { return IsRenderScriptTarget; } + /// Returns whether or not the AArch64 SVE built-in types are + /// available on this target. + bool hasAArch64SVETypes() const { return HasAArch64SVETypes; } + /// Returns whether the passed in string is a valid clobber in an /// inline asm statement. /// @@ -1249,15 +1255,9 @@ public: bool isBigEndian() const { return BigEndian; } bool isLittleEndian() const { return !BigEndian; } - enum CallingConvMethodType { - CCMT_Unknown, - CCMT_Member, - CCMT_NonMember - }; - /// Gets the default calling convention for the given target and /// declaration context. - virtual CallingConv getDefaultCallingConv(CallingConvMethodType MT) const { + virtual CallingConv getDefaultCallingConv() const { // Not all targets will specify an explicit calling convention that we can // express. This will always do the right thing, even though it's not // an explicit calling convention. @@ -1268,6 +1268,7 @@ public: CCCR_OK, CCCR_Warning, CCCR_Ignore, + CCCR_Error, }; /// Determines whether a given calling convention is valid for the diff --git a/include/clang/Basic/TokenKinds.def b/include/clang/Basic/TokenKinds.def index 55e94d387c9..94fe1ba63a9 100644 --- a/include/clang/Basic/TokenKinds.def +++ b/include/clang/Basic/TokenKinds.def @@ -68,6 +68,9 @@ #ifndef ANNOTATION #define ANNOTATION(X) TOK(annot_ ## X) #endif +#ifndef PRAGMA_ANNOTATION +#define PRAGMA_ANNOTATION(X) ANNOTATION(X) +#endif //===----------------------------------------------------------------------===// // Preprocessor keywords. @@ -386,6 +389,7 @@ MODULES_KEYWORD(import) // C++20 keywords. CXX2A_KEYWORD(char8_t , CHAR8SUPPORT) CXX2A_KEYWORD(consteval , 0) +CXX2A_KEYWORD(constinit , 0) // C11 Extension KEYWORD(_Float16 , KEYALL) @@ -446,16 +450,18 @@ TYPE_TRAIT_N(__is_nothrow_constructible, IsNothrowConstructible, KEYCXX) // MSVC14.0 / VS2015 Type Traits TYPE_TRAIT_2(__is_assignable, IsAssignable, KEYCXX) +// MSVC Type Traits of unknown vintage +TYPE_TRAIT_1(__has_nothrow_move_assign, HasNothrowMoveAssign, KEYCXX) +TYPE_TRAIT_1(__has_trivial_move_assign, HasTrivialMoveAssign, KEYCXX) +TYPE_TRAIT_1(__has_trivial_move_constructor, HasTrivialMoveConstructor, KEYCXX) + // GNU and MS Type Traits TYPE_TRAIT_1(__has_nothrow_assign, HasNothrowAssign, KEYCXX) -TYPE_TRAIT_1(__has_nothrow_move_assign, HasNothrowMoveAssign, KEYCXX) TYPE_TRAIT_1(__has_nothrow_copy, HasNothrowCopy, KEYCXX) TYPE_TRAIT_1(__has_nothrow_constructor, HasNothrowConstructor, KEYCXX) TYPE_TRAIT_1(__has_trivial_assign, HasTrivialAssign, KEYCXX) -TYPE_TRAIT_1(__has_trivial_move_assign, HasTrivialMoveAssign, KEYCXX) TYPE_TRAIT_1(__has_trivial_copy, HasTrivialCopy, KEYCXX) TYPE_TRAIT_1(__has_trivial_constructor, HasTrivialDefaultConstructor, KEYCXX) -TYPE_TRAIT_1(__has_trivial_move_constructor, HasTrivialMoveConstructor, KEYCXX) TYPE_TRAIT_1(__has_trivial_destructor, HasTrivialDestructor, KEYCXX) TYPE_TRAIT_1(__has_virtual_destructor, HasVirtualDestructor, KEYCXX) TYPE_TRAIT_1(__is_abstract, IsAbstract, KEYCXX) @@ -472,17 +478,18 @@ TYPE_TRAIT_1(__is_literal, IsLiteral, KEYCXX) ALIAS("__is_literal_type", __is_literal, KEYCXX) TYPE_TRAIT_1(__is_pod, IsPOD, KEYCXX) TYPE_TRAIT_1(__is_polymorphic, IsPolymorphic, KEYCXX) +TYPE_TRAIT_1(__is_standard_layout, IsStandardLayout, KEYCXX) TYPE_TRAIT_1(__is_trivial, IsTrivial, KEYCXX) +TYPE_TRAIT_2(__is_trivially_assignable, IsTriviallyAssignable, KEYCXX) +TYPE_TRAIT_N(__is_trivially_constructible, IsTriviallyConstructible, KEYCXX) +TYPE_TRAIT_1(__is_trivially_copyable, IsTriviallyCopyable, KEYCXX) TYPE_TRAIT_1(__is_union, IsUnion, KEYCXX) TYPE_TRAIT_1(__has_unique_object_representations, HasUniqueObjectRepresentations, KEYCXX) +KEYWORD(__underlying_type , KEYCXX) // Clang-only C++ Type Traits -TYPE_TRAIT_N(__is_trivially_constructible, IsTriviallyConstructible, KEYCXX) -TYPE_TRAIT_1(__is_trivially_copyable, IsTriviallyCopyable, KEYCXX) -TYPE_TRAIT_2(__is_trivially_assignable, IsTriviallyAssignable, KEYCXX) TYPE_TRAIT_2(__reference_binds_to_temporary, ReferenceBindsToTemporary, KEYCXX) -KEYWORD(__underlying_type , KEYCXX) // Embarcadero Expression Traits KEYWORD(__is_lvalue_expr , KEYCXX) @@ -509,7 +516,6 @@ TYPE_TRAIT_1(__is_member_function_pointer, IsMemberFunctionPointer, KEYCXX) TYPE_TRAIT_1(__is_member_pointer, IsMemberPointer, KEYCXX) TYPE_TRAIT_1(__is_const, IsConst, KEYCXX) TYPE_TRAIT_1(__is_volatile, IsVolatile, KEYCXX) -TYPE_TRAIT_1(__is_standard_layout, IsStandardLayout, KEYCXX) TYPE_TRAIT_1(__is_signed, IsSigned, KEYCXX) TYPE_TRAIT_1(__is_unsigned, IsUnsigned, KEYCXX) @@ -722,6 +728,11 @@ ANNOTATION(typename) // annotation for a C typedef name, a C++ (possibly ANNOTATION(template_id) // annotation for a C++ template-id that names a // function template specialization (not a type), // e.g., "std::swap" +ANNOTATION(non_type) // annotation for a single non-type declaration +ANNOTATION(non_type_undeclared) // annotation for an undeclared identifier that + // was assumed to be an ADL-only function name +ANNOTATION(non_type_dependent) // annotation for an assumed non-type member of + // a dependent base class ANNOTATION(primary_expr) // annotation for a primary expression ANNOTATION(decltype) // annotation for a decltype expression, // e.g., "decltype(foo.bar())" @@ -729,103 +740,103 @@ ANNOTATION(decltype) // annotation for a decltype expression, // Annotation for #pragma unused(...) // For each argument inside the parentheses the pragma handler will produce // one 'pragma_unused' annotation token followed by the argument token. -ANNOTATION(pragma_unused) +PRAGMA_ANNOTATION(pragma_unused) // Annotation for #pragma GCC visibility... // The lexer produces these so that they only take effect when the parser // handles them. -ANNOTATION(pragma_vis) +PRAGMA_ANNOTATION(pragma_vis) // Annotation for #pragma pack... // The lexer produces these so that they only take effect when the parser // handles them. -ANNOTATION(pragma_pack) +PRAGMA_ANNOTATION(pragma_pack) // Annotation for #pragma clang __debug parser_crash... // The lexer produces these so that they only take effect when the parser // handles them. -ANNOTATION(pragma_parser_crash) +PRAGMA_ANNOTATION(pragma_parser_crash) // Annotation for #pragma clang __debug captured... // The lexer produces these so that they only take effect when the parser // handles them. -ANNOTATION(pragma_captured) +PRAGMA_ANNOTATION(pragma_captured) // Annotation for #pragma clang __debug dump... // The lexer produces these so that the parser and semantic analysis can // look up and dump the operand. -ANNOTATION(pragma_dump) +PRAGMA_ANNOTATION(pragma_dump) // Annotation for #pragma ms_struct... // The lexer produces these so that they only take effect when the parser // handles them. -ANNOTATION(pragma_msstruct) +PRAGMA_ANNOTATION(pragma_msstruct) // Annotation for #pragma align... // The lexer produces these so that they only take effect when the parser // handles them. -ANNOTATION(pragma_align) +PRAGMA_ANNOTATION(pragma_align) // Annotation for #pragma weak id // The lexer produces these so that they only take effect when the parser // handles them. -ANNOTATION(pragma_weak) +PRAGMA_ANNOTATION(pragma_weak) // Annotation for #pragma weak id = id // The lexer produces these so that they only take effect when the parser // handles them. -ANNOTATION(pragma_weakalias) +PRAGMA_ANNOTATION(pragma_weakalias) // Annotation for #pragma redefine_extname... // The lexer produces these so that they only take effect when the parser // handles them. -ANNOTATION(pragma_redefine_extname) +PRAGMA_ANNOTATION(pragma_redefine_extname) // Annotation for #pragma STDC FP_CONTRACT... // The lexer produces these so that they only take effect when the parser // handles them. -ANNOTATION(pragma_fp_contract) +PRAGMA_ANNOTATION(pragma_fp_contract) // Annotation for #pragma STDC FENV_ACCESS // The lexer produces these so that they only take effect when the parser // handles them. -ANNOTATION(pragma_fenv_access) +PRAGMA_ANNOTATION(pragma_fenv_access) // Annotation for #pragma pointers_to_members... // The lexer produces these so that they only take effect when the parser // handles them. -ANNOTATION(pragma_ms_pointers_to_members) +PRAGMA_ANNOTATION(pragma_ms_pointers_to_members) // Annotation for #pragma vtordisp... // The lexer produces these so that they only take effect when the parser // handles them. -ANNOTATION(pragma_ms_vtordisp) +PRAGMA_ANNOTATION(pragma_ms_vtordisp) // Annotation for all microsoft #pragmas... // The lexer produces these so that they only take effect when the parser // handles them. -ANNOTATION(pragma_ms_pragma) +PRAGMA_ANNOTATION(pragma_ms_pragma) // Annotation for #pragma OPENCL EXTENSION... // The lexer produces these so that they only take effect when the parser // handles them. -ANNOTATION(pragma_opencl_extension) +PRAGMA_ANNOTATION(pragma_opencl_extension) // Annotations for OpenMP pragma directives - #pragma omp ... // The lexer produces these so that they only take effect when the parser // handles #pragma omp ... directives. -ANNOTATION(pragma_openmp) -ANNOTATION(pragma_openmp_end) +PRAGMA_ANNOTATION(pragma_openmp) +PRAGMA_ANNOTATION(pragma_openmp_end) // Annotations for loop pragma directives #pragma clang loop ... // The lexer produces these so that they only take effect when the parser // handles #pragma loop ... directives. -ANNOTATION(pragma_loop_hint) +PRAGMA_ANNOTATION(pragma_loop_hint) -ANNOTATION(pragma_fp) +PRAGMA_ANNOTATION(pragma_fp) // Annotation for the attribute pragma directives - #pragma clang attribute ... -ANNOTATION(pragma_attribute) +PRAGMA_ANNOTATION(pragma_attribute) // Annotations for module import translated from #include etc. ANNOTATION(module_include) @@ -836,6 +847,7 @@ ANNOTATION(module_end) // into the name of a header unit. ANNOTATION(header_unit) +#undef PRAGMA_ANNOTATION #undef ANNOTATION #undef TESTING_KEYWORD #undef OBJC_AT_KEYWORD diff --git a/include/clang/Basic/TokenKinds.h b/include/clang/Basic/TokenKinds.h index 1d5be5f9152..c25181e6827 100644 --- a/include/clang/Basic/TokenKinds.h +++ b/include/clang/Basic/TokenKinds.h @@ -90,13 +90,10 @@ inline bool isLiteral(TokenKind K) { } /// Return true if this is any of tok::annot_* kinds. -inline bool isAnnotation(TokenKind K) { -#define ANNOTATION(NAME) \ - if (K == tok::annot_##NAME) \ - return true; -#include "clang/Basic/TokenKinds.def" - return false; -} +bool isAnnotation(TokenKind K); + +/// Return true if this is an annotation token representing a pragma. +bool isPragmaAnnotation(TokenKind K); } // end namespace tok } // end namespace clang diff --git a/include/clang/Basic/TypeNodes.td b/include/clang/Basic/TypeNodes.td new file mode 100644 index 00000000000..b2554de24aa --- /dev/null +++ b/include/clang/Basic/TypeNodes.td @@ -0,0 +1,106 @@ +class Type { + bit Abstract = abstract; +} + +class DerivedType : Type { + Type Base = base; +} + +/// A type node that is only used to represent dependent types in C++. For +/// example, DependentTemplateSpecializationType is used to represent types +/// where the base template-id is dependent (such as `T::foo`). Code +/// that only works with non-dependent types can ignore these type nodes. +class AlwaysDependent {} + +/// A type node that is never used to represent a canonical type, which is to +/// say that it always represents some sort of type "sugar" which can +/// (supposedly) be erased without affecting the formal behavior of the +/// language. For example, in standard C/C++, typedefs do not introduce new +/// types and do not affect the semantics of the program. Code that only +/// works with canonical types can ignore these type nodes. +/// +/// Note that this simple story about non-canonical types is not the whole +/// truth. Languages and extensions often have formation rules which differ +/// based on how a type is spelled and which therefore are not consistent +/// with immediately stipping away type sugar. More critically, attributes on +/// typedefs can have semantic impacts in ways that are only reflected in our +/// AST by preserving the typedef sugar; for example, we do not otherwise +/// represent the alignment attribute on typedefs, and so it is necessary to +/// preserve typedef structure into most parts of IR generation. +class NeverCanonical {} + +/// A type node that only represents a canonical type in some dependent cases. +/// For example, `std::vector` (a TemplateSpecializationType) is +/// considered to be a non-canonical representation for the RecordType +/// referencing the concrete ClassTemplateSpecializationDecl; but +/// `std::vector` cannot be resolved to a concrete specialization +/// and so remains canonical. Code which only works with non-dependent +/// canonical types can ignore these nodes. +class NeverCanonicalUnlessDependent {} + +/// A type node which never has component type structure. Some code may be +/// able to operate on leaf types faster than they can on non-leaf types. +/// +/// For example, the function type `void (int)` is not a leaf type because it +/// is structurally composed of component types (`void` and `int`). +/// +/// A struct type is a leaf type because its field types are not part of its +/// type-expression. +/// +/// Nodes like `TypedefType` which are syntactically leaves but can desugar +/// to types that may not be leaves should not declare this. +class LeafType {} + +def BuiltinType : Type, LeafType; +def ComplexType : Type; +def PointerType : Type; +def BlockPointerType : Type; +def ReferenceType : Type<1>; +def LValueReferenceType : DerivedType; +def RValueReferenceType : DerivedType; +def MemberPointerType : Type; +def ArrayType : Type<1>; +def ConstantArrayType : DerivedType; +def IncompleteArrayType : DerivedType; +def VariableArrayType : DerivedType; +def DependentSizedArrayType : DerivedType, AlwaysDependent; +def DependentSizedExtVectorType : Type, AlwaysDependent; +def DependentAddressSpaceType : Type, AlwaysDependent; +def VectorType : Type; +def DependentVectorType : Type, AlwaysDependent; +def ExtVectorType : DerivedType; +def FunctionType : Type<1>; +def FunctionProtoType : DerivedType; +def FunctionNoProtoType : DerivedType; +def UnresolvedUsingType : Type, AlwaysDependent; +def ParenType : Type, NeverCanonical; +def TypedefType : Type, NeverCanonical; +def MacroQualifiedType : Type, NeverCanonical; +def AdjustedType : Type, NeverCanonical; +def DecayedType : DerivedType, NeverCanonical; +def TypeOfExprType : Type, NeverCanonicalUnlessDependent; +def TypeOfType : Type, NeverCanonicalUnlessDependent; +def DecltypeType : Type, NeverCanonicalUnlessDependent; +def UnaryTransformType : Type, NeverCanonicalUnlessDependent; +def TagType : Type<1>; +def RecordType : DerivedType, LeafType; +def EnumType : DerivedType, LeafType; +def ElaboratedType : Type, NeverCanonical; +def AttributedType : Type, NeverCanonical; +def TemplateTypeParmType : Type, AlwaysDependent, LeafType; +def SubstTemplateTypeParmType : Type, NeverCanonical; +def SubstTemplateTypeParmPackType : Type, AlwaysDependent; +def TemplateSpecializationType : Type, NeverCanonicalUnlessDependent; +def DeducedType : Type<1>; +def AutoType : DerivedType; +def DeducedTemplateSpecializationType : DerivedType; +def InjectedClassNameType : Type, AlwaysDependent, LeafType; +def DependentNameType : Type, AlwaysDependent; +def DependentTemplateSpecializationType : Type, AlwaysDependent; +def PackExpansionType : Type, NeverCanonicalUnlessDependent; +def ObjCTypeParamType : Type, NeverCanonical; +def ObjCObjectType : Type; +def ObjCInterfaceType : DerivedType, LeafType; +def ObjCObjectPointerType : Type; +def PipeType : Type; +def AtomicType : Type; diff --git a/include/clang/Basic/X86Target.def b/include/clang/Basic/X86Target.def index 94ccb9fd8b2..ba4e5981e7d 100644 --- a/include/clang/Basic/X86Target.def +++ b/include/clang/Basic/X86Target.def @@ -173,6 +173,10 @@ PROC(IcelakeClient, "icelake-client", PROC_64_BIT) /// Icelake server microarchitecture based processors. PROC(IcelakeServer, "icelake-server", PROC_64_BIT) +/// \name Tigerlake +/// Tigerlake microarchitecture based processors. +PROC(Tigerlake, "tigerlake", PROC_64_BIT) + /// \name Knights Landing /// Knights Landing processor. PROC_WITH_FEAT(KNL, "knl", PROC_64_BIT, FEATURE_AVX512F) @@ -297,6 +301,7 @@ FEATURE(FEATURE_VPCLMULQDQ) FEATURE(FEATURE_AVX512VNNI) FEATURE(FEATURE_AVX512BITALG) FEATURE(FEATURE_AVX512BF16) +FEATURE(FEATURE_AVX512VP2INTERSECT) // FIXME: When commented out features are supported in LLVM, enable them here. diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index 428c22d1a01..a52ed496580 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -1651,10 +1651,10 @@ let ArchGuard = "defined(__ARM_FEATURE_DOTPROD) && defined(__aarch64__)" in { // v8.2-A FP16 fused multiply-add long instructions. let ArchGuard = "defined(__ARM_FEATURE_FP16FML) && defined(__aarch64__)" in { - def VFMLAL_LOW : SInst<"vfmlal_low", "ffHH", "hQh">; - def VFMLSL_LOW : SInst<"vfmlsl_low", "ffHH", "hQh">; - def VFMLAL_HIGH : SInst<"vfmlal_high", "ffHH", "hQh">; - def VFMLSL_HIGH : SInst<"vfmlsl_high", "ffHH", "hQh">; + def VFMLAL_LOW : SInst<"vfmlal_low", "nndd", "hQh">; + def VFMLSL_LOW : SInst<"vfmlsl_low", "nndd", "hQh">; + def VFMLAL_HIGH : SInst<"vfmlal_high", "nndd", "hQh">; + def VFMLSL_HIGH : SInst<"vfmlsl_high", "nndd", "hQh">; def VFMLAL_LANE_LOW : SOpInst<"vfmlal_lane_low", "ffH0i", "hQh", OP_FMLAL_LN>; def VFMLSL_LANE_LOW : SOpInst<"vfmlsl_lane_low", "ffH0i", "hQh", OP_FMLSL_LN>; diff --git a/include/clang/CodeGen/CGFunctionInfo.h b/include/clang/CodeGen/CGFunctionInfo.h index 1f81072e23d..5069d9af42a 100644 --- a/include/clang/CodeGen/CGFunctionInfo.h +++ b/include/clang/CodeGen/CGFunctionInfo.h @@ -109,14 +109,12 @@ private: UnpaddedCoerceAndExpandType = T; } - ABIArgInfo(Kind K) - : TheKind(K), PaddingInReg(false), InReg(false) { - } - public: - ABIArgInfo() + ABIArgInfo(Kind K = Direct) : TypeData(nullptr), PaddingType(nullptr), DirectOffset(0), - TheKind(Direct), PaddingInReg(false), InReg(false) {} + TheKind(K), PaddingInReg(false), InAllocaSRet(false), + IndirectByVal(false), IndirectRealign(false), SRetAfterThis(false), + InReg(false), CanBeFlattened(false), SignExt(false) {} static ABIArgInfo getDirect(llvm::Type *T = nullptr, unsigned Offset = 0, llvm::Type *Padding = nullptr, diff --git a/include/clang/CrossTU/CrossTranslationUnit.h b/include/clang/CrossTU/CrossTranslationUnit.h index d64329cdff3..4d2b7109c62 100644 --- a/include/clang/CrossTU/CrossTranslationUnit.h +++ b/include/clang/CrossTU/CrossTranslationUnit.h @@ -17,6 +17,7 @@ #include "clang/AST/ASTImporterSharedState.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Error.h" @@ -153,18 +154,34 @@ public: /// was passed to the constructor. /// /// \return Returns the resulting definition or an error. - llvm::Expected importDefinition(const FunctionDecl *FD); - llvm::Expected importDefinition(const VarDecl *VD); + llvm::Expected importDefinition(const FunctionDecl *FD, + ASTUnit *Unit); + llvm::Expected importDefinition(const VarDecl *VD, + ASTUnit *Unit); /// Get a name to identify a named decl. - static std::string getLookupName(const NamedDecl *ND); + static llvm::Optional getLookupName(const NamedDecl *ND); /// Emit diagnostics for the user for potential configuration errors. void emitCrossTUDiagnostics(const IndexError &IE); + /// Determine the original source location in the original TU for an + /// imported source location. + /// \p ToLoc Source location in the imported-to AST. + /// \return Source location in the imported-from AST and the corresponding + /// ASTUnit object (the AST was loaded from a file using an internal ASTUnit + /// object that is returned here). + /// If any error happens (ToLoc is a non-imported source location) empty is + /// returned. + llvm::Optional> + getImportedFromSourceLocation(const clang::SourceLocation &ToLoc) const; + private: + using ImportedFileIDMap = + llvm::DenseMap>; + void lazyInitImporterSharedSt(TranslationUnitDecl *ToTU); - ASTImporter &getOrCreateASTImporter(ASTContext &From); + ASTImporter &getOrCreateASTImporter(ASTUnit *Unit); template llvm::Expected getCrossTUDefinitionImpl(const T *D, StringRef CrossTUDir, @@ -174,20 +191,114 @@ private: const T *findDefInDeclContext(const DeclContext *DC, StringRef LookupName); template - llvm::Expected importDefinitionImpl(const T *D); + llvm::Expected importDefinitionImpl(const T *D, ASTUnit *Unit); + + using ImporterMapTy = + llvm::DenseMap>; + + ImporterMapTy ASTUnitImporterMap; - llvm::StringMap> FileASTUnitMap; - llvm::StringMap NameASTUnitMap; - llvm::StringMap NameFileMap; - llvm::DenseMap> - ASTUnitImporterMap; - CompilerInstance &CI; ASTContext &Context; std::shared_ptr ImporterSharedSt; - /// \p CTULoadTreshold should serve as an upper limit to the number of TUs - /// imported in order to reduce the memory footprint of CTU analysis. - const unsigned CTULoadThreshold; - unsigned NumASTLoaded{0u}; + /// Map of imported FileID's (in "To" context) to FileID in "From" context + /// and the ASTUnit for the From context. + /// This map is used by getImportedFromSourceLocation to lookup a FileID and + /// its Preprocessor when knowing only the FileID in the 'To' context. The + /// FileID could be imported by any of multiple 'From' ASTImporter objects. + /// we do not want to loop over all ASTImporter's to find the one that + /// imported the FileID. + ImportedFileIDMap ImportedFileIDs; + + /// Functor for loading ASTUnits from AST-dump files. + class ASTFileLoader { + public: + ASTFileLoader(const CompilerInstance &CI); + std::unique_ptr operator()(StringRef ASTFilePath); + + private: + const CompilerInstance &CI; + }; + + /// Maintain number of AST loads and check for reaching the load limit. + class ASTLoadGuard { + public: + ASTLoadGuard(unsigned Limit) : Limit(Limit) {} + + /// Indicates, whether a new load operation is permitted, it is within the + /// threshold. + operator bool() const { return Count < Limit; } + + /// Tell that a new AST was loaded successfully. + void indicateLoadSuccess() { ++Count; } + + private: + /// The number of ASTs actually imported. + unsigned Count{0u}; + /// The limit (threshold) value for number of loaded ASTs. + const unsigned Limit; + }; + + /// Storage and load of ASTUnits, cached access, and providing searchability + /// are the concerns of ASTUnitStorage class. + class ASTUnitStorage { + public: + ASTUnitStorage(const CompilerInstance &CI); + /// Loads an ASTUnit for a function. + /// + /// \param FunctionName USR name of the function. + /// \param CrossTUDir Path to the directory used to store CTU related files. + /// \param IndexName Name of the file inside \p CrossTUDir which maps + /// function USR names to file paths. These files contain the corresponding + /// AST-dumps. + /// \param DisplayCTUProgress Display a message about loading new ASTs. + /// + /// \return An Expected instance which contains the ASTUnit pointer or the + /// error occured during the load. + llvm::Expected getASTUnitForFunction(StringRef FunctionName, + StringRef CrossTUDir, + StringRef IndexName, + bool DisplayCTUProgress); + /// Identifies the path of the file which can be used to load the ASTUnit + /// for a given function. + /// + /// \param FunctionName USR name of the function. + /// \param CrossTUDir Path to the directory used to store CTU related files. + /// \param IndexName Name of the file inside \p CrossTUDir which maps + /// function USR names to file paths. These files contain the corresponding + /// AST-dumps. + /// + /// \return An Expected instance containing the filepath. + llvm::Expected getFileForFunction(StringRef FunctionName, + StringRef CrossTUDir, + StringRef IndexName); + + private: + llvm::Error ensureCTUIndexLoaded(StringRef CrossTUDir, StringRef IndexName); + llvm::Expected getASTUnitForFile(StringRef FileName, + bool DisplayCTUProgress); + + template using BaseMapTy = llvm::StringMap; + using OwningMapTy = BaseMapTy>; + using NonOwningMapTy = BaseMapTy; + + OwningMapTy FileASTUnitMap; + NonOwningMapTy NameASTUnitMap; + + using IndexMapTy = BaseMapTy; + IndexMapTy NameFileMap; + + ASTFileLoader FileAccessor; + + /// Limit the number of loaded ASTs. Used to limit the memory usage of the + /// CrossTranslationUnitContext. + /// The ASTUnitStorage has the knowledge about if the AST to load is + /// actually loaded or returned from cache. This information is needed to + /// maintain the counter. + ASTLoadGuard LoadGuard; + }; + + ASTUnitStorage ASTStorage; + }; } // namespace cross_tu diff --git a/include/clang/DirectoryWatcher/DirectoryWatcher.h b/include/clang/DirectoryWatcher/DirectoryWatcher.h index e74443e0bc8..4475807dfce 100644 --- a/include/clang/DirectoryWatcher/DirectoryWatcher.h +++ b/include/clang/DirectoryWatcher/DirectoryWatcher.h @@ -11,6 +11,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" #include #include #include @@ -98,10 +99,11 @@ public: : Kind(Kind), Filename(Filename) {} }; - /// Returns nullptr if \param Path doesn't exist or isn't a directory. - /// Returns nullptr if OS kernel API told us we can't start watching. In such - /// case it's unclear whether just retrying has any chance to succeeed. - static std::unique_ptr + /// llvm fatal_error if \param Path doesn't exist or isn't a directory. + /// Returns llvm::Expected Error if OS kernel API told us we can't start + /// watching. In such case it's unclear whether just retrying has any chance + /// to succeed. + static llvm::Expected> create(llvm::StringRef Path, std::function Events, bool IsInitial)> diff --git a/include/clang/Driver/Action.h b/include/clang/Driver/Action.h index c1ff0b1a602..8ccbb6c2bbf 100644 --- a/include/clang/Driver/Action.h +++ b/include/clang/Driver/Action.h @@ -65,15 +65,17 @@ public: BackendJobClass, AssembleJobClass, LinkJobClass, + IfsMergeJobClass, LipoJobClass, DsymutilJobClass, VerifyDebugInfoJobClass, VerifyPCHJobClass, OffloadBundlingJobClass, OffloadUnbundlingJobClass, + OffloadWrapperJobClass, JobClassFirst = PreprocessJobClass, - JobClassLast = OffloadUnbundlingJobClass + JobClassLast = OffloadWrapperJobClass }; // The offloading kind determines if this action is binded to a particular @@ -485,6 +487,17 @@ public: } }; +class IfsMergeJobAction : public JobAction { + void anchor() override; + +public: + IfsMergeJobAction(ActionList &Inputs, types::ID Type); + + static bool classof(const Action *A) { + return A->getKind() == IfsMergeJobClass; + } +}; + class LinkJobAction : public JobAction { void anchor() override; @@ -613,6 +626,17 @@ public: } }; +class OffloadWrapperJobAction : public JobAction { + void anchor() override; + +public: + OffloadWrapperJobAction(ActionList &Inputs, types::ID Type); + + static bool classof(const Action *A) { + return A->getKind() == OffloadWrapperJobClass; + } +}; + } // namespace driver } // namespace clang diff --git a/include/clang/Driver/CC1Options.td b/include/clang/Driver/CC1Options.td index 1f6c000ecf6..4518aca82ef 100644 --- a/include/clang/Driver/CC1Options.td +++ b/include/clang/Driver/CC1Options.td @@ -140,7 +140,8 @@ def analyzer_checker_help_developer : Flag<["-"], "analyzer-checker-help-develop "and debug checkers">; def analyzer_config_help : Flag<["-"], "analyzer-config-help">, - HelpText<"Display the list of -analyzer-config options">; + HelpText<"Display the list of -analyzer-config options. These are meant for " + "development purposes only!">; def analyzer_list_enabled_checkers : Flag<["-"], "analyzer-list-enabled-checkers">, HelpText<"Display the list of enabled analyzer checkers">; @@ -200,6 +201,8 @@ def compress_debug_sections_EQ : Joined<["-", "--"], "compress-debug-sections="> HelpText<"DWARF debug sections compression type">; def mno_exec_stack : Flag<["-"], "mnoexecstack">, HelpText<"Mark the file as not needing an executable stack">; +def massembler_no_warn : Flag<["-"], "massembler-no-warn">, + HelpText<"Make assembler not emit warnings">; def massembler_fatal_warnings : Flag<["-"], "massembler-fatal-warnings">, HelpText<"Make assembler warnings fatal">; def mrelax_relocations : Flag<["--"], "mrelax-relocations">, @@ -285,8 +288,8 @@ def mcode_model : Separate<["-"], "mcode-model">, HelpText<"The code model to use">, Values<"tiny,small,kernel,medium,large">; def mdebug_pass : Separate<["-"], "mdebug-pass">, HelpText<"Enable additional debug output">; -def mdisable_fp_elim : Flag<["-"], "mdisable-fp-elim">, - HelpText<"Disable frame pointer elimination optimization">; +def mframe_pointer_EQ : Joined<["-"], "mframe-pointer=">, + HelpText<"Specify which frame pointers to retain (all, non-leaf, none).">, Values<"all,non-leaf,none">; def mdisable_tail_calls : Flag<["-"], "mdisable-tail-calls">, HelpText<"Disable tail call optimization, keeping the call stack accurate">; def menable_no_infinities : Flag<["-"], "menable-no-infs">, @@ -684,7 +687,7 @@ let Flags = [CC1Option, CC1AsOption, NoDriverOption] in { def version : Flag<["-"], "version">, HelpText<"Print the compiler version">; def main_file_name : Separate<["-"], "main-file-name">, - HelpText<"Main file name to use for debug info">; + HelpText<"Main file name to use for debug info and source if missing">; def split_dwarf_output : Separate<["-"], "split-dwarf-output">, HelpText<"File name to use for split dwarf debug info output">; @@ -812,6 +815,9 @@ def fdisable_module_hash : Flag<["-"], "fdisable-module-hash">, HelpText<"Disable the module hash">; def fmodules_hash_content : Flag<["-"], "fmodules-hash-content">, HelpText<"Enable hashing the content of a module file">; +def fmodules_strict_context_hash : Flag<["-"], "fmodules-strict-context-hash">, + HelpText<"Enable hashing of all compiler options that could impact the " + "semantics of a module in an implicit build">; def c_isystem : JoinedOrSeparate<["-"], "c-isystem">, MetaVarName<"">, HelpText<"Add directory to the C SYSTEM include search path">; def objc_isystem : JoinedOrSeparate<["-"], "objc-isystem">, @@ -843,6 +849,8 @@ def preamble_bytes_EQ : Joined<["-"], "preamble-bytes=">, "covering the first N bytes of the main file">; def detailed_preprocessing_record : Flag<["-"], "detailed-preprocessing-record">, HelpText<"include a detailed record of preprocessing actions">; +def setup_static_analyzer : Flag<["-"], "setup-static-analyzer">, + HelpText<"Set up preprocessor for static analyzer (done automatically when static analyzer is run).">; //===----------------------------------------------------------------------===// // OpenCL Options diff --git a/include/clang/Driver/CLCompatOptions.td b/include/clang/Driver/CLCompatOptions.td index a0af3035ea4..50d4622009c 100644 --- a/include/clang/Driver/CLCompatOptions.td +++ b/include/clang/Driver/CLCompatOptions.td @@ -254,7 +254,13 @@ def _SLASH_Zp_flag : CLFlag<"Zp">, Alias, AliasArgs<["1"]>; def _SLASH_Zs : CLFlag<"Zs">, HelpText<"Syntax-check only">, Alias; - +def _SLASH_openmp_ : CLFlag<"openmp-">, + HelpText<"Disable OpenMP support">, Alias; +def _SLASH_openmp : CLFlag<"openmp">, HelpText<"Enable OpenMP support">, + Alias; +def _SLASH_openmp_experimental : CLFlag<"openmp:experimental">, + HelpText<"Enable OpenMP support with experimental SIMD support">, + Alias; // Non-aliases: @@ -381,7 +387,6 @@ def _SLASH_FS : CLIgnoredFlag<"FS">; def _SLASH_JMC : CLIgnoredFlag<"JMC">; def _SLASH_kernel_ : CLIgnoredFlag<"kernel-">; def _SLASH_nologo : CLIgnoredFlag<"nologo">; -def _SLASH_openmp_ : CLIgnoredFlag<"openmp-">; def _SLASH_permissive_ : CLIgnoredFlag<"permissive-">; def _SLASH_RTC : CLIgnoredJoined<"RTC">; def _SLASH_sdl : CLIgnoredFlag<"sdl">; @@ -396,6 +401,9 @@ def _SLASH_Zc_inline : CLIgnoredFlag<"Zc:inline">; def _SLASH_Zc_rvalueCast : CLIgnoredFlag<"Zc:rvalueCast">; def _SLASH_Zc_ternary : CLIgnoredFlag<"Zc:ternary">; def _SLASH_Zc_wchar_t : CLIgnoredFlag<"Zc:wchar_t">; +def _SLASH_ZH_MD5 : CLIgnoredFlag<"ZH:MD5">; +def _SLASH_ZH_SHA1 : CLIgnoredFlag<"ZH:SHA1">; +def _SLASH_ZH_SHA_256 : CLIgnoredFlag<"ZH:SHA_256">; def _SLASH_Zm : CLIgnoredJoined<"Zm">; def _SLASH_Zo : CLIgnoredFlag<"Zo">; def _SLASH_Zo_ : CLIgnoredFlag<"Zo-">; @@ -436,8 +444,6 @@ def _SLASH_hotpatch : CLFlag<"hotpatch">; def _SLASH_kernel : CLFlag<"kernel">; def _SLASH_LN : CLFlag<"LN">; def _SLASH_MP : CLJoined<"MP">; -def _SLASH_openmp : CLFlag<"openmp">; -def _SLASH_openmp_experimental : CLFlag<"openmp:experimental">; def _SLASH_Qfast_transcendentals : CLFlag<"Qfast_transcendentals">; def _SLASH_QIfist : CLFlag<"QIfist">; def _SLASH_Qimprecise_fwaits : CLFlag<"Qimprecise_fwaits">; diff --git a/include/clang/Driver/Driver.h b/include/clang/Driver/Driver.h index f9528641073..5e7283e31ee 100644 --- a/include/clang/Driver/Driver.h +++ b/include/clang/Driver/Driver.h @@ -12,12 +12,14 @@ #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LLVM.h" #include "clang/Driver/Action.h" +#include "clang/Driver/Options.h" #include "clang/Driver/Phases.h" #include "clang/Driver/ToolChain.h" #include "clang/Driver/Types.h" #include "clang/Driver/Util.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/StringSaver.h" @@ -55,8 +57,6 @@ enum LTOKind { /// Driver - Encapsulate logic for constructing compilation processes /// from a set of gcc-driver-like command line arguments. class Driver { - std::unique_ptr Opts; - DiagnosticsEngine &Diags; IntrusiveRefCntPtr VFS; @@ -250,9 +250,17 @@ private: // getFinalPhase - Determine which compilation mode we are in and record // which option we used to determine the final phase. + // TODO: Much of what getFinalPhase returns are not actually true compiler + // modes. Fold this functionality into Types::getCompilationPhases and + // handleArguments. phases::ID getFinalPhase(const llvm::opt::DerivedArgList &DAL, llvm::opt::Arg **FinalPhaseArg = nullptr) const; + // handleArguments - All code related to claiming and printing diagnostics + // related to arguments to the driver are done here. + void handleArguments(Compilation &C, llvm::opt::DerivedArgList &Args, + const InputList &Inputs, ActionList &Actions) const; + // Before executing jobs, sets up response files for commands that need them. void setUpResponseFiles(Compilation &C, Command &Cmd); @@ -292,7 +300,7 @@ public: const std::string &getConfigFile() const { return ConfigFile; } - const llvm::opt::OptTable &getOpts() const { return *Opts; } + const llvm::opt::OptTable &getOpts() const { return getDriverOptTable(); } const DiagnosticsEngine &getDiags() const { return Diags; } diff --git a/include/clang/Driver/Options.h b/include/clang/Driver/Options.h index f8963d48112..7c5cddd9e89 100644 --- a/include/clang/Driver/Options.h +++ b/include/clang/Driver/Options.h @@ -47,7 +47,7 @@ enum ID { }; } -std::unique_ptr createDriverOptTable(); +const llvm::opt::OptTable &getDriverOptTable(); } } diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td index dfd27fab796..3ce6fcf29f9 100644 --- a/include/clang/Driver/Options.td +++ b/include/clang/Driver/Options.td @@ -211,6 +211,10 @@ def clang_ignored_legacy_options_Group : OptionGroup<"">, def : Flag<["-"], "fslp-vectorize-aggressive">, Group; def : Flag<["-"], "fno-slp-vectorize-aggressive">, Group; +// Retired with clang-10.0. Previously controlled X86 MPX ISA. +def mmpx : Flag<["-"], "mmpx">, Group; +def mno_mpx : Flag<["-"], "mno-mpx">, Group; + // Group that ignores all gcc optimizations that won't be implemented def clang_ignored_gcc_optimization_f_Group : OptionGroup< "">, Group, Flags<[Ignored]>; @@ -280,6 +284,8 @@ def arcmt_migrate_emit_arc_errors : Flag<["-"], "arcmt-migrate-emit-errors">, Flags<[CC1Option]>; def gen_reproducer: Flag<["-"], "gen-reproducer">, InternalDebugOpt, HelpText<"Auto-generates preprocessed source files and a reproduction script">; +def gen_cdb_fragment_path: Separate<["-"], "gen-cdb-fragment-path">, InternalDebugOpt, + HelpText<"Emit a compilation database fragment to the specified directory">; def _migrate : Flag<["--"], "migrate">, Flags<[DriverOption]>, HelpText<"Run the migrator">; @@ -518,7 +524,7 @@ def cl_mad_enable : Flag<["-"], "cl-mad-enable">, Group, Flags<[CC def cl_no_signed_zeros : Flag<["-"], "cl-no-signed-zeros">, Group, Flags<[CC1Option]>, HelpText<"OpenCL only. Allow use of less precise no signed zeros computations in the generated binary.">; def cl_std_EQ : Joined<["-"], "cl-std=">, Group, Flags<[CC1Option]>, - HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,c++">; + HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,clc++,CLC++">; def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group, Flags<[CC1Option]>, HelpText<"OpenCL only. Allow denormals to be flushed to zero.">; def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group, Flags<[CC1Option]>, @@ -593,9 +599,11 @@ def hip_device_lib_EQ : Joined<["--"], "hip-device-lib=">, Group, HelpText<"HIP device library">; def fhip_dump_offload_linker_script : Flag<["-"], "fhip-dump-offload-linker-script">, Group, Flags<[NoArgumentUnused, HelpHidden]>; +def fhip_new_launch_api : Flag<["-"], "fhip-new-launch-api">, + Flags<[CC1Option]>, HelpText<"Use new kernel launching API for HIP.">; +def fno_hip_new_launch_api : Flag<["-"], "fno-hip-new-launch-api">; def libomptarget_nvptx_path_EQ : Joined<["--"], "libomptarget-nvptx-path=">, Group, HelpText<"Path to libomptarget-nvptx libraries">; -def dA : Flag<["-"], "dA">, Group; def dD : Flag<["-"], "dD">, Group, Flags<[CC1Option]>, HelpText<"Print macro definitions in -E mode in addition to normal output">; def dI : Flag<["-"], "dI">, Group, Flags<[CC1Option]>, @@ -623,9 +631,12 @@ def emit_ast : Flag<["-"], "emit-ast">, HelpText<"Emit Clang AST files for source inputs">; def emit_llvm : Flag<["-"], "emit-llvm">, Flags<[CC1Option]>, Group, HelpText<"Use the LLVM representation for assembler and object files">; -def emit_iterface_stubs : Flag<["-"], "emit-interface-stubs">, Flags<[CC1Option]>, Group, +def emit_interface_stubs : Flag<["-"], "emit-interface-stubs">, Flags<[CC1Option]>, Group, HelpText<"Generate Inteface Stub Files.">; -def iterface_stub_version_EQ : JoinedOrSeparate<["-"], "interface-stub-version=">, Flags<[CC1Option]>; +def emit_merged_ifs : Flag<["-"], "emit-merged-ifs">, + Flags<[CC1Option]>, Group, + HelpText<"Generate Interface Stub Files, emit merged text not binary.">; +def interface_stub_version_EQ : JoinedOrSeparate<["-"], "interface-stub-version=">, Flags<[CC1Option]>; def exported__symbols__list : Separate<["-"], "exported_symbols_list">; def e : JoinedOrSeparate<["-"], "e">, Group; def fPIC : Flag<["-"], "fPIC">, Group; @@ -833,6 +844,10 @@ def fconstant_cfstrings : Flag<["-"], "fconstant-cfstrings">, Group; def fconstant_string_class_EQ : Joined<["-"], "fconstant-string-class=">, Group; def fconstexpr_depth_EQ : Joined<["-"], "fconstexpr-depth=">, Group; def fconstexpr_steps_EQ : Joined<["-"], "fconstexpr-steps=">, Group; +def fexperimental_new_constant_interpreter : Flag<["-"], "fexperimental-new-constant-interpreter">, Group, + HelpText<"Enable the experimental new constant interpreter">, Flags<[CC1Option]>; +def fforce_experimental_new_constant_interpreter : Flag<["-"], "fforce-experimental-new-constant-interpreter">, Group, + HelpText<"Force the use of the experimental new constant interpreter, failing on missing features">, Flags<[CC1Option]>; def fconstexpr_backtrace_limit_EQ : Joined<["-"], "fconstexpr-backtrace-limit=">, Group; def fno_crash_diagnostics : Flag<["-"], "fno-crash-diagnostics">, Group, Flags<[NoArgumentUnused, CoreOption]>, @@ -899,6 +914,8 @@ def fsjlj_exceptions : Flag<["-"], "fsjlj-exceptions">, Group, Flags<[CC1Option]>, HelpText<"Use SjLj style exceptions">; def fseh_exceptions : Flag<["-"], "fseh-exceptions">, Group, Flags<[CC1Option]>, HelpText<"Use SEH style exceptions">; +def fwasm_exceptions : Flag<["-"], "fwasm-exceptions">, Group, + Flags<[CC1Option]>, HelpText<"Use WebAssembly style exceptions">; def fexcess_precision_EQ : Joined<["-"], "fexcess-precision=">, Group; def : Flag<["-"], "fexpensive-optimizations">, Group; @@ -1043,8 +1060,14 @@ def fsanitize_minimal_runtime : Flag<["-"], "fsanitize-minimal-runtime">, Group; def fno_sanitize_minimal_runtime : Flag<["-"], "fno-sanitize-minimal-runtime">, Group; +def fsanitize_link_runtime : Flag<["-"], "fsanitize-link-runtime">, + Group; +def fno_sanitize_link_runtime : Flag<["-"], "fno-sanitize-link-runtime">, + Group; def fsanitize_link_cxx_runtime : Flag<["-"], "fsanitize-link-c++-runtime">, Group; +def fno_sanitize_link_cxx_runtime : Flag<["-"], "fno-sanitize-link-c++-runtime">, + Group; def fsanitize_cfi_cross_dso : Flag<["-"], "fsanitize-cfi-cross-dso">, Group, HelpText<"Enable control flow integrity (CFI) checks for cross-DSO calls.">; @@ -1055,6 +1078,13 @@ def fno_sanitize_cfi_cross_dso : Flag<["-"], "fno-sanitize-cfi-cross-dso">, def fsanitize_cfi_icall_generalize_pointers : Flag<["-"], "fsanitize-cfi-icall-generalize-pointers">, Group, HelpText<"Generalize pointers in CFI indirect call type signature checks">; +def fsanitize_cfi_canonical_jump_tables : Flag<["-"], "fsanitize-cfi-canonical-jump-tables">, + Group, + HelpText<"Make the jump table addresses canonical in the symbol table">; +def fno_sanitize_cfi_canonical_jump_tables : Flag<["-"], "fno-sanitize-cfi-canonical-jump-tables">, + Group, + Flags<[CoreOption, DriverOption]>, + HelpText<"Do not make the jump table addresses canonical in the symbol table">; def fsanitize_stats : Flag<["-"], "fsanitize-stats">, Group, HelpText<"Enable sanitizer statistics gathering.">; @@ -1117,7 +1147,8 @@ def ftrapping_math : Flag<["-"], "ftrapping-math">, Group, Flags<[CC1Op def fno_trapping_math : Flag<["-"], "fno-trapping-math">, Group, Flags<[CC1Option]>; def ffp_contract : Joined<["-"], "ffp-contract=">, Group, Flags<[CC1Option]>, HelpText<"Form fused FP ops (e.g. FMAs): fast (everywhere)" - " | on (according to FP_CONTRACT pragma, default) | off (never fuse)">, Values<"fast,on,off">; + " | on (according to FP_CONTRACT pragma) | off (never fuse). Default" + " is 'fast' for CUDA/HIP and 'on' otherwise.">, Values<"fast,on,off">; def fstrict_float_cast_overflow : Flag<["-"], "fstrict-float-cast-overflow">, Group, Flags<[CC1Option]>, @@ -1157,6 +1188,9 @@ def fno_use_line_directives : Flag<["-"], "fno-use-line-directives">, Group, Group, Flags<[CC1Option]>, HelpText<"Assert that the compilation takes place in a freestanding environment">; +def fgnuc_version_EQ : Joined<["-"], "fgnuc-version=">, Group, + HelpText<"Sets various macros to claim compatibility with the given GCC version (default is 4.2.1)">, + Flags<[CC1Option, CoreOption]>; def fgnu_keywords : Flag<["-"], "fgnu-keywords">, Group, Flags<[CC1Option]>, HelpText<"Allow GNU-extension keywords regardless of language standard">; def fgnu89_inline : Flag<["-"], "fgnu89-inline">, Group, Flags<[CC1Option]>, @@ -1254,7 +1288,10 @@ def fno_fine_grained_bitfield_accesses : Flag<["-"], HelpText<"Use large-integer access for consecutive bitfield runs.">; def flat__namespace : Flag<["-"], "flat_namespace">; -def flax_vector_conversions : Flag<["-"], "flax-vector-conversions">, Group; +def flax_vector_conversions_EQ : Joined<["-"], "flax-vector-conversions=">, Group, + HelpText<"Enable implicit vector bit-casts">, Values<"none,integer,all">, Flags<[CC1Option]>; +def flax_vector_conversions : Flag<["-"], "flax-vector-conversions">, Group, + Alias, AliasArgs<["integer"]>; def flimited_precision_EQ : Joined<["-"], "flimited-precision=">, Group; def fapple_link_rtlib : Flag<["-"], "fapple-link-rtlib">, Group, HelpText<"Force linking the clang builtins runtime library">; @@ -1331,6 +1368,28 @@ def fmodules_validate_system_headers : Flag<["-"], "fmodules-validate-system-hea HelpText<"Validate the system headers that a module depends on when loading the module">; def fno_modules_validate_system_headers : Flag<["-"], "fno-modules-validate-system-headers">, Group, Flags<[DriverOption]>; + +def fvalidate_ast_input_files_content: + Flag <["-"], "fvalidate-ast-input-files-content">, + Group, Flags<[CC1Option]>, + HelpText<"Compute and store the hash of input files used to build an AST." + " Files with mismatching mtime's are considered valid" + " if both contents is identical">; +def fmodules_validate_input_files_content: + Flag <["-"], "fmodules-validate-input-files-content">, + Group, Flags<[DriverOption]>, + HelpText<"Validate PCM input files based on content if mtime differs">; +def fno_modules_validate_input_files_content: + Flag <["-"], "fno_modules-validate-input-files-content">, + Group, Flags<[DriverOption]>; +def fpch_validate_input_files_content: + Flag <["-"], "fpch-validate-input-files-content">, + Group, Flags<[DriverOption]>, + HelpText<"Validate PCH input files based on content if mtime differs">; +def fno_pch_validate_input_files_content: + Flag <["-"], "fno_pch-validate-input-files-content">, + Group, Flags<[DriverOption]>; + def fmodules : Flag <["-"], "fmodules">, Group, Flags<[DriverOption, CC1Option]>, HelpText<"Enable the 'modules' language feature">; @@ -1428,7 +1487,7 @@ def fno_experimental_new_pass_manager : Flag<["-"], "fno-experimental-new-pass-m def fveclib : Joined<["-"], "fveclib=">, Group, Flags<[CC1Option]>, HelpText<"Use the given vector functions library">, Values<"Accelerate,MASSV,SVML,none">; def fno_lax_vector_conversions : Flag<["-"], "fno-lax-vector-conversions">, Group, - HelpText<"Disallow implicit conversions between vectors with a different number of elements or different element types">, Flags<[CC1Option]>; + Alias, AliasArgs<["none"]>; def fno_merge_all_constants : Flag<["-"], "fno-merge-all-constants">, Group, HelpText<"Disallow merging of constants">; def fno_modules : Flag <["-"], "fno-modules">, Group, @@ -1571,8 +1630,6 @@ def fnoopenmp_use_tls : Flag<["-"], "fnoopenmp-use-tls">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; def fopenmp_targets_EQ : CommaJoined<["-"], "fopenmp-targets=">, Flags<[DriverOption, CC1Option]>, HelpText<"Specify comma-separated list of triples OpenMP offloading targets to be supported">; -def fopenmp_dump_offload_linker_script : Flag<["-"], "fopenmp-dump-offload-linker-script">, - Group, Flags<[NoArgumentUnused, HelpHidden]>; def fopenmp_relocatable_target : Flag<["-"], "fopenmp-relocatable-target">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; def fnoopenmp_relocatable_target : Flag<["-"], "fnoopenmp-relocatable-target">, @@ -1598,6 +1655,8 @@ def fopenmp_optimistic_collapse : Flag<["-"], "fopenmp-optimistic-collapse">, Gr Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; def fno_openmp_optimistic_collapse : Flag<["-"], "fno-openmp-optimistic-collapse">, Group, Flags<[NoArgumentUnused, HelpHidden]>; +def static_openmp: Flag<["-"], "static-openmp">, + HelpText<"Use the static host OpenMP runtime while linking.">; def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group; def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group; def fno_escaping_block_tail_calls : Flag<["-"], "fno-escaping-block-tail-calls">, Group, Flags<[CC1Option]>; @@ -1683,10 +1742,10 @@ def fstack_protector : Flag<["-"], "fstack-protector">, Group, "alloca, which are of greater size than ssp-buffer-size (default: 8 bytes). " "All variable sized calls to alloca are considered vulnerable">; def ftrivial_auto_var_init : Joined<["-"], "ftrivial-auto-var-init=">, Group, - Flags<[CC1Option]>, HelpText<"Initialize trivial automatic stack variables: uninitialized (default)" + Flags<[CC1Option, CoreOption]>, HelpText<"Initialize trivial automatic stack variables: uninitialized (default)" " | pattern">, Values<"uninitialized,pattern">; -def enable_trivial_var_init_zero : Joined<["-"], "enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang">, - Flags<[CC1Option]>, +def enable_trivial_var_init_zero : Flag<["-"], "enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang">, + Flags<[CC1Option, CoreOption]>, HelpText<"Trivial automatic variable initialization to zero is only here for benchmarks, it'll eventually be removed, and I'm OK with that because I'm only using it to benchmark">; def fstandalone_debug : Flag<["-"], "fstandalone-debug">, Group, Flags<[CoreOption]>, HelpText<"Emit full debug info for all types used by the program">; @@ -1757,7 +1816,16 @@ def Wframe_larger_than_EQ : Joined<["-"], "Wframe-larger-than=">, Group def : Flag<["-"], "fterminated-vtables">, Alias; def fthreadsafe_statics : Flag<["-"], "fthreadsafe-statics">, Group; def ftime_report : Flag<["-"], "ftime-report">, Group, Flags<[CC1Option]>; -def ftime_trace : Flag<["-"], "ftime-trace">, Group, Flags<[CC1Option, CoreOption]>; +def ftime_trace : Flag<["-"], "ftime-trace">, Group, + HelpText<"Turn on time profiler. Generates JSON file based on output filename.">, + DocBrief<[{ +Turn on time profiler. Generates JSON file based on output filename. Results +can be analyzed with chrome://tracing or `Speedscope App +`_ for flamegraph visualization.}]>, + Flags<[CC1Option, CoreOption]>; +def ftime_trace_granularity_EQ : Joined<["-"], "ftime-trace-granularity=">, Group, + HelpText<"Minimum time granularity (in microseconds) traced by time profiler">, + Flags<[CC1Option, CoreOption]>; def ftlsmodel_EQ : Joined<["-"], "ftls-model=">, Group, Flags<[CC1Option]>; def ftrapv : Flag<["-"], "ftrapv">, Group, Flags<[CC1Option]>, HelpText<"Trap on integer overflow">; @@ -1791,6 +1859,7 @@ def fuse_init_array : Flag<["-"], "fuse-init-array">, Group, Flags<[CC1 HelpText<"Use .init_array instead of .ctors">; def fno_var_tracking : Flag<["-"], "fno-var-tracking">, Group; def fverbose_asm : Flag<["-"], "fverbose-asm">, Group; +def dA : Flag<["-"], "dA">, Alias; def fvisibility_EQ : Joined<["-"], "fvisibility=">, Group, HelpText<"Set the default symbol visibility for all global declarations">, Values<"hidden,default">; def fvisibility_inlines_hidden : Flag<["-"], "fvisibility-inlines-hidden">, Group, @@ -1816,6 +1885,13 @@ def fforce_emit_vtables : Flag<["-"], "fforce-emit-vtables">, Group, HelpText<"Emits more virtual tables to improve devirtualization">; def fno_force_emit_vtables : Flag<["-"], "fno-force-emit-vtables">, Group, Flags<[CoreOption]>; + +def fvirtual_function_elimination : Flag<["-"], "fvirtual-function-elimination">, Group, + Flags<[CoreOption, CC1Option]>, + HelpText<"Enables dead virtual function elimination optimization. Requires -flto=full">; +def fno_virtual_function_elimination : Flag<["-"], "fno-virtual-function_elimination">, Group, + Flags<[CoreOption]>; + def fwrapv : Flag<["-"], "fwrapv">, Group, Flags<[CC1Option]>, HelpText<"Treat signed integer overflow as two's complement">; def fwritable_strings : Flag<["-"], "fwritable-strings">, Group, Flags<[CC1Option]>, @@ -2023,9 +2099,14 @@ def malign_jumps_EQ : Joined<["-"], "malign-jumps=">, Group, Group; def mlong_calls : Flag<["-"], "mlong-calls">, Group, HelpText<"Generate branches with extended addressability, usually via indirect jumps.">; -def mlong_double_64 : Flag<["-"], "mlong-double-64">, Group, Flags<[CC1Option]>, +def LongDouble_Group : OptionGroup<"">, Group, + DocName<"Long double flags">, + DocBrief<[{Selects the long double implementation}]>; +def mlong_double_64 : Flag<["-"], "mlong-double-64">, Group, Flags<[CC1Option]>, HelpText<"Force long double to be 64 bits">; -def mlong_double_128 : Flag<["-"], "mlong-double-128">, Group, Flags<[CC1Option]>, +def mlong_double_80 : Flag<["-"], "mlong-double-80">, Group, Flags<[CC1Option]>, + HelpText<"Force long double to be 80 bits, padded to 128 bits for storage">; +def mlong_double_128 : Flag<["-"], "mlong-double-128">, Group, Flags<[CC1Option]>, HelpText<"Force long double to be 128 bits">; def mno_long_calls : Flag<["-"], "mno-long-calls">, Group, HelpText<"Restore the default behaviour of not generating long calls">; @@ -2138,6 +2219,12 @@ def msave_restore : Flag<["-"], "msave-restore">, Group, HelpText<"Enable using library calls for save and restore">; def mno_save_restore : Flag<["-"], "mno-save-restore">, Group, HelpText<"Disable using library calls for save and restore">; +def mcmodel_EQ_medlow : Flag<["-"], "mcmodel=medlow">, Group, + Flags<[CC1Option]>, Alias, AliasArgs<["small"]>, + HelpText<"Equivalent to -mcmodel=small, compatible with RISC-V gcc.">; +def mcmodel_EQ_medany : Flag<["-"], "mcmodel=medany">, Group, + Flags<[CC1Option]>, Alias, AliasArgs<["medium"]>, + HelpText<"Equivalent to -mcmodel=medium, compatible with RISC-V gcc.">; def munaligned_access : Flag<["-"], "munaligned-access">, Group, HelpText<"Allow memory accesses to be unaligned (AArch32/AArch64 only)">; @@ -2241,6 +2328,8 @@ def faltivec : Flag<["-"], "faltivec">, Group, Flags<[DriverOption]>; def fno_altivec : Flag<["-"], "fno-altivec">, Group, Flags<[DriverOption]>; def maltivec : Flag<["-"], "maltivec">, Group; def mno_altivec : Flag<["-"], "mno-altivec">, Group; +def mspe : Flag<["-"], "mspe">, Group; +def mno_spe : Flag<["-"], "mno-spe">, Group; def mvsx : Flag<["-"], "mvsx">, Group; def mno_vsx : Flag<["-"], "mno-vsx">, Group; def msecure_plt : Flag<["-"], "msecure-plt">, Group; @@ -2309,7 +2398,7 @@ def mno_backchain : Flag<["-"], "mno-backchain">, Group, Flags<[DriverO def mno_warn_nonportable_cfstrings : Flag<["-"], "mno-warn-nonportable-cfstrings">, Group; def mno_omit_leaf_frame_pointer : Flag<["-"], "mno-omit-leaf-frame-pointer">, Group; def momit_leaf_frame_pointer : Flag<["-"], "momit-leaf-frame-pointer">, Group, - HelpText<"Omit frame pointer setup for leaf functions">, Flags<[CC1Option]>; + HelpText<"Omit frame pointer setup for leaf functions">; def moslib_EQ : Joined<["-"], "moslib=">, Group; def mpascal_strings : Flag<["-"], "mpascal-strings">, Alias; def mred_zone : Flag<["-"], "mred-zone">, Group; @@ -2340,7 +2429,7 @@ def mpie_copy_relocations : Flag<["-"], "mpie-copy-relocations">, Group Flags<[CC1Option]>, HelpText<"Use copy relocations support for PIE builds">; def mno_pie_copy_relocations : Flag<["-"], "mno-pie-copy-relocations">, Group; -def mfentry : Flag<["-"], "mfentry">, HelpText<"Insert calls to fentry at function entry (x86 only)">, +def mfentry : Flag<["-"], "mfentry">, HelpText<"Insert calls to fentry at function entry (x86/SystemZ only)">, Flags<[CC1Option]>, Group; def mips16 : Flag<["-"], "mips16">, Group; def mno_mips16 : Flag<["-"], "mno-mips16">, Group; @@ -2506,7 +2595,9 @@ def no__dead__strip__inits__and__terms : Flag<["-"], "no_dead_strip_inits_and_te def nobuiltininc : Flag<["-"], "nobuiltininc">, Flags<[CC1Option, CoreOption]>, HelpText<"Disable builtin #include directories">; def nocudainc : Flag<["-"], "nocudainc">; -def nocudalib : Flag<["-"], "nocudalib">; +def nogpulib : Flag<["-"], "nogpulib">, + HelpText<"Do not link device library for CUDA/HIP device compilation">; +def : Flag<["-"], "nocudalib">, Alias; def nodefaultlibs : Flag<["-"], "nodefaultlibs">; def nofixprebinding : Flag<["-"], "nofixprebinding">; def nolibc : Flag<["-"], "nolibc">; @@ -2627,11 +2718,15 @@ def std_EQ : Joined<["-", "--"], "std=">, Flags<[CC1Option]>, const char *Values = #define LANGSTANDARD(id, name, lang, desc, features) name "," #define LANGSTANDARD_ALIAS(id, alias) alias "," - #include "clang/Frontend/LangStandards.def" + #include "clang/Basic/LangStandards.def" ; }]>; def stdlib_EQ : Joined<["-", "--"], "stdlib=">, Flags<[CC1Option]>, HelpText<"C++ standard library to use">, Values<"libc++,libstdc++,platform">; +def stdlibxx_isystem : JoinedOrSeparate<["-"], "stdlib++-isystem">, + Group, + HelpText<"Use directory as the C++ standard library include path">, + Flags<[DriverOption]>, MetaVarName<"">; def unwindlib_EQ : Joined<["-", "--"], "unwindlib=">, Flags<[CC1Option]>, HelpText<"Unwind library to use">, Values<"libgcc,unwindlib,platform">; def sub__library : JoinedOrSeparate<["-"], "sub_library">; @@ -2711,7 +2806,6 @@ def _mhwdiv : Separate<["--"], "mhwdiv">, Alias; def _CLASSPATH_EQ : Joined<["--"], "CLASSPATH=">, Alias; def _CLASSPATH : Separate<["--"], "CLASSPATH">, Alias; def _all_warnings : Flag<["--"], "all-warnings">, Alias; -def _analyze_auto : Flag<["--"], "analyze-auto">, Flags<[DriverOption]>; def _analyzer_no_default_checks : Flag<["--"], "analyzer-no-default-checks">, Flags<[DriverOption]>; def _analyzer_output : JoinedOrSeparate<["--"], "analyzer-output">, Flags<[DriverOption]>, HelpText<"Static analyzer report output format (html|plist|plist-multi-file|plist-html|text).">; @@ -2972,8 +3066,6 @@ def mmovdiri : Flag<["-"], "mmovdiri">, Group; def mno_movdiri : Flag<["-"], "mno-movdiri">, Group; def mmovdir64b : Flag<["-"], "mmovdir64b">, Group; def mno_movdir64b : Flag<["-"], "mno-movdir64b">, Group; -def mmpx : Flag<["-"], "mmpx">, Group; -def mno_mpx : Flag<["-"], "mno-mpx">, Group; def mmwaitx : Flag<["-"], "mmwaitx">, Group; def mno_mwaitx : Flag<["-"], "mno-mwaitx">, Group; def mpku : Flag<["-"], "mpku">, Group; diff --git a/include/clang/Driver/Phases.h b/include/clang/Driver/Phases.h index 7199c657848..63931c00c89 100644 --- a/include/clang/Driver/Phases.h +++ b/include/clang/Driver/Phases.h @@ -20,7 +20,8 @@ namespace phases { Compile, Backend, Assemble, - Link + Link, + IfsMerge, }; enum { diff --git a/include/clang/Driver/SanitizerArgs.h b/include/clang/Driver/SanitizerArgs.h index 957e752b687..c37499e0f20 100644 --- a/include/clang/Driver/SanitizerArgs.h +++ b/include/clang/Driver/SanitizerArgs.h @@ -32,6 +32,7 @@ class SanitizerArgs { bool MsanUseAfterDtor = true; bool CfiCrossDso = false; bool CfiICallGeneralizePointers = false; + bool CfiCanonicalJumpTables = false; int AsanFieldPadding = 0; bool SharedRuntime = false; bool AsanUseAfterScope = true; @@ -41,6 +42,7 @@ class SanitizerArgs { bool AsanInvalidPointerCmp = false; bool AsanInvalidPointerSub = false; std::string HwasanAbi; + bool LinkRuntimes = true; bool LinkCXXRuntimes = false; bool NeedPIE = false; bool SafeStackRuntime = false; @@ -59,7 +61,9 @@ class SanitizerArgs { bool needsSharedRt() const { return SharedRuntime; } bool needsAsanRt() const { return Sanitizers.has(SanitizerKind::Address); } - bool needsHwasanRt() const { return Sanitizers.has(SanitizerKind::HWAddress); } + bool needsHwasanRt() const { + return Sanitizers.has(SanitizerKind::HWAddress); + } bool needsTsanRt() const { return Sanitizers.has(SanitizerKind::Thread); } bool needsMsanRt() const { return Sanitizers.has(SanitizerKind::Memory); } bool needsFuzzer() const { return Sanitizers.has(SanitizerKind::Fuzzer); } @@ -80,6 +84,7 @@ class SanitizerArgs { bool requiresPIE() const; bool needsUnwindTables() const; bool needsLTO() const; + bool linkRuntimes() const { return LinkRuntimes; } bool linkCXXRuntimes() const { return LinkCXXRuntimes; } bool hasCrossDsoCfi() const { return CfiCrossDso; } bool hasAnySanitizer() const { return !Sanitizers.empty(); } diff --git a/include/clang/Driver/ToolChain.h b/include/clang/Driver/ToolChain.h index 7dd3db376c8..f0676eee2d6 100644 --- a/include/clang/Driver/ToolChain.h +++ b/include/clang/Driver/ToolChain.h @@ -136,13 +136,17 @@ private: mutable std::unique_ptr Clang; mutable std::unique_ptr Assemble; mutable std::unique_ptr Link; + mutable std::unique_ptr IfsMerge; mutable std::unique_ptr OffloadBundler; + mutable std::unique_ptr OffloadWrapper; Tool *getClang() const; Tool *getAssemble() const; Tool *getLink() const; + Tool *getIfsMerge() const; Tool *getClangAs() const; Tool *getOffloadBundler() const; + Tool *getOffloadWrapper() const; mutable std::unique_ptr SanitizerArguments; mutable std::unique_ptr XRayArguments; @@ -542,6 +546,11 @@ public: AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const; + /// AddClangCXXStdlibIsystemArgs - Add the clang -cc1 level arguments to set + /// the specified include paths for the C++ standard library. + void AddClangCXXStdlibIsystemArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const; + /// Returns if the C++ standard library should be linked in. /// Note that e.g. -lm should still be linked even if this returns false. bool ShouldLinkCXXStdlib(const llvm::opt::ArgList &Args) const; diff --git a/include/clang/Driver/Types.def b/include/clang/Driver/Types.def index b45789d4b31..79e8d109cd9 100644 --- a/include/clang/Driver/Types.def +++ b/include/clang/Driver/Types.def @@ -29,77 +29,74 @@ // The fourth value is the suffix to use when creating temporary files // of this type, or null if unspecified. -// The fifth value is a string containing option flags. Valid values: -// a - The type should only be assembled. -// p - The type should only be precompiled. -// u - The type can be user specified (with -x). -// m - Precompiling this type produces a module file. -// A - The type's temporary suffix should be appended when generating -// outputs of this type. - +// The final value is a variadic list of phases for each type. Eventually the +// options flag string will be replaced with this variadic list. +// Most of the options in Flags have been removed in favor of subsuming their +// meaning from the phases list. // C family source language (with and without preprocessing). -TYPE("cpp-output", PP_C, INVALID, "i", "u") -TYPE("c", C, PP_C, "c", "u") -TYPE("cl", CL, PP_C, "cl", "u") -TYPE("cuda-cpp-output", PP_CUDA, INVALID, "cui", "u") -TYPE("cuda", CUDA, PP_CUDA, "cu", "u") -TYPE("cuda", CUDA_DEVICE, PP_CUDA, "cu", "") -TYPE("hip-cpp-output", PP_HIP, INVALID, "cui", "u") -TYPE("hip", HIP, PP_HIP, "cu", "u") -TYPE("hip", HIP_DEVICE, PP_HIP, "cu", "") -TYPE("objective-c-cpp-output", PP_ObjC, INVALID, "mi", "u") -TYPE("objc-cpp-output", PP_ObjC_Alias, INVALID, "mi", "u") -TYPE("objective-c", ObjC, PP_ObjC, "m", "u") -TYPE("c++-cpp-output", PP_CXX, INVALID, "ii", "u") -TYPE("c++", CXX, PP_CXX, "cpp", "u") -TYPE("objective-c++-cpp-output", PP_ObjCXX, INVALID, "mii", "u") -TYPE("objc++-cpp-output", PP_ObjCXX_Alias, INVALID, "mii", "u") -TYPE("objective-c++", ObjCXX, PP_ObjCXX, "mm", "u") -TYPE("renderscript", RenderScript, PP_C, "rs", "u") +TYPE("cpp-output", PP_C, INVALID, "i", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("c", C, PP_C, "c", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("cl", CL, PP_C, "cl", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("cuda-cpp-output", PP_CUDA, INVALID, "cui", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("cuda", CUDA, PP_CUDA, "cu", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("cuda", CUDA_DEVICE, PP_CUDA, "cu", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("hip-cpp-output", PP_HIP, INVALID, "cui", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("hip", HIP, PP_HIP, "cu", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("hip", HIP_DEVICE, PP_HIP, "cu", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("objective-c-cpp-output", PP_ObjC, INVALID, "mi", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("objc-cpp-output", PP_ObjC_Alias, INVALID, "mi", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("objective-c", ObjC, PP_ObjC, "m", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("c++-cpp-output", PP_CXX, INVALID, "ii", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("c++", CXX, PP_CXX, "cpp", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("objective-c++-cpp-output", PP_ObjCXX, INVALID, "mii", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("objc++-cpp-output", PP_ObjCXX_Alias, INVALID, "mii", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("objective-c++", ObjCXX, PP_ObjCXX, "mm", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("renderscript", RenderScript, PP_C, "rs", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) // C family input files to precompile. -TYPE("c-header-cpp-output", PP_CHeader, INVALID, "i", "p") -TYPE("c-header", CHeader, PP_CHeader, "h", "pu") -TYPE("cl-header", CLHeader, PP_CHeader, "h", "pu") -TYPE("objective-c-header-cpp-output", PP_ObjCHeader, INVALID, "mi", "p") -TYPE("objective-c-header", ObjCHeader, PP_ObjCHeader, "h", "pu") -TYPE("c++-header-cpp-output", PP_CXXHeader, INVALID, "ii", "p") -TYPE("c++-header", CXXHeader, PP_CXXHeader, "hh", "pu") -TYPE("objective-c++-header-cpp-output", PP_ObjCXXHeader, INVALID, "mii", "p") -TYPE("objective-c++-header", ObjCXXHeader, PP_ObjCXXHeader, "h", "pu") -TYPE("c++-module", CXXModule, PP_CXXModule, "cppm", "mu") -TYPE("c++-module-cpp-output", PP_CXXModule, INVALID, "iim", "m") +TYPE("c-header-cpp-output", PP_CHeader, INVALID, "i", phases::Precompile) +TYPE("c-header", CHeader, PP_CHeader, "h", phases::Preprocess, phases::Precompile) +TYPE("cl-header", CLHeader, PP_CHeader, "h", phases::Preprocess, phases::Precompile) +TYPE("objective-c-header-cpp-output", PP_ObjCHeader, INVALID, "mi", phases::Precompile) +TYPE("objective-c-header", ObjCHeader, PP_ObjCHeader, "h", phases::Preprocess, phases::Precompile) +TYPE("c++-header-cpp-output", PP_CXXHeader, INVALID, "ii", phases::Precompile) +TYPE("c++-header", CXXHeader, PP_CXXHeader, "hh", phases::Preprocess, phases::Precompile) +TYPE("objective-c++-header-cpp-output", PP_ObjCXXHeader, INVALID, "mii", phases::Precompile) +TYPE("objective-c++-header", ObjCXXHeader, PP_ObjCXXHeader, "h", phases::Preprocess, phases::Precompile) +TYPE("c++-module", CXXModule, PP_CXXModule, "cppm", phases::Preprocess, phases::Precompile, phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("c++-module-cpp-output", PP_CXXModule, INVALID, "iim", phases::Precompile, phases::Compile, phases::Backend, phases::Assemble, phases::Link) // Other languages. -TYPE("ada", Ada, INVALID, nullptr, "u") -TYPE("assembler", PP_Asm, INVALID, "s", "au") -TYPE("assembler-with-cpp", Asm, PP_Asm, "S", "au") -TYPE("f95", PP_Fortran, INVALID, nullptr, "u") -TYPE("f95-cpp-input", Fortran, PP_Fortran, nullptr, "u") -TYPE("java", Java, INVALID, nullptr, "u") +TYPE("ada", Ada, INVALID, nullptr, phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("assembler", PP_Asm, INVALID, "s", phases::Assemble, phases::Link) +TYPE("assembler-with-cpp", Asm, PP_Asm, "S", phases::Preprocess, phases::Assemble, phases::Link) +TYPE("f95", PP_Fortran, INVALID, nullptr, phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("f95-cpp-input", Fortran, PP_Fortran, nullptr, phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("java", Java, INVALID, nullptr, phases::Compile, phases::Backend, phases::Assemble, phases::Link) // LLVM IR/LTO types. We define separate types for IR and LTO because LTO // outputs should use the standard suffixes. -TYPE("ir", LLVM_IR, INVALID, "ll", "u") -TYPE("ir", LLVM_BC, INVALID, "bc", "u") -TYPE("lto-ir", LTO_IR, INVALID, "s", "") -TYPE("lto-bc", LTO_BC, INVALID, "o", "") +TYPE("ir", LLVM_IR, INVALID, "ll", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("ir", LLVM_BC, INVALID, "bc", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("lto-ir", LTO_IR, INVALID, "s", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("lto-bc", LTO_BC, INVALID, "o", phases::Compile, phases::Backend, phases::Assemble, phases::Link) // Misc. -TYPE("ast", AST, INVALID, "ast", "u") -TYPE("ifs", IFS, INVALID, "ifs", "u") -TYPE("pcm", ModuleFile, INVALID, "pcm", "u") -TYPE("plist", Plist, INVALID, "plist", "") -TYPE("rewritten-objc", RewrittenObjC,INVALID, "cpp", "") -TYPE("rewritten-legacy-objc", RewrittenLegacyObjC,INVALID, "cpp", "") -TYPE("remap", Remap, INVALID, "remap", "") -TYPE("precompiled-header", PCH, INVALID, "gch", "A") -TYPE("object", Object, INVALID, "o", "") -TYPE("treelang", Treelang, INVALID, nullptr, "u") -TYPE("image", Image, INVALID, "out", "") -TYPE("dSYM", dSYM, INVALID, "dSYM", "A") -TYPE("dependencies", Dependencies, INVALID, "d", "") -TYPE("cuda-fatbin", CUDA_FATBIN, INVALID, "fatbin","A") -TYPE("hip-fatbin", HIP_FATBIN, INVALID, "hipfb", "A") -TYPE("none", Nothing, INVALID, nullptr, "u") +TYPE("ast", AST, INVALID, "ast", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("ifs", IFS, INVALID, "ifs", phases::IfsMerge) +TYPE("ifs-cpp", IFS_CPP, INVALID, "ifs", phases::Compile, phases::IfsMerge) +TYPE("pcm", ModuleFile, INVALID, "pcm", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("plist", Plist, INVALID, "plist", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("rewritten-objc", RewrittenObjC,INVALID, "cpp", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("rewritten-legacy-objc", RewrittenLegacyObjC,INVALID, "cpp", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("remap", Remap, INVALID, "remap", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("precompiled-header", PCH, INVALID, "gch", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("object", Object, INVALID, "o", phases::Link) +TYPE("treelang", Treelang, INVALID, nullptr, phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("image", Image, INVALID, "out", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("dSYM", dSYM, INVALID, "dSYM", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("dependencies", Dependencies, INVALID, "d", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("cuda-fatbin", CUDA_FATBIN, INVALID, "fatbin", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("hip-fatbin", HIP_FATBIN, INVALID, "hipfb", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("none", Nothing, INVALID, nullptr, phases::Compile, phases::Backend, phases::Assemble, phases::Link) diff --git a/include/clang/Driver/Types.h b/include/clang/Driver/Types.h index 53afada7abc..a605450e6e3 100644 --- a/include/clang/Driver/Types.h +++ b/include/clang/Driver/Types.h @@ -11,16 +11,18 @@ #include "clang/Driver/Phases.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Option/ArgList.h" namespace llvm { class StringRef; } namespace clang { namespace driver { +class Driver; namespace types { enum ID { TY_INVALID, -#define TYPE(NAME, ID, PP_TYPE, TEMP_SUFFIX, FLAGS) TY_##ID, +#define TYPE(NAME, ID, PP_TYPE, TEMP_SUFFIX, ...) TY_##ID, #include "clang/Driver/Types.def" #undef TYPE TY_LAST @@ -100,6 +102,9 @@ namespace types { void getCompilationPhases( ID Id, llvm::SmallVectorImpl &Phases); + void getCompilationPhases(const clang::driver::Driver &Driver, + llvm::opt::DerivedArgList &DAL, ID Id, + llvm::SmallVectorImpl &Phases); /// lookupCXXTypeForCType - Lookup CXX input type that corresponds to given /// C type (used for clang++ emulation of g++ behaviour) diff --git a/include/clang/Format/Format.h b/include/clang/Format/Format.h index 6388e4fc172..7e71b7e8b16 100644 --- a/include/clang/Format/Format.h +++ b/include/clang/Format/Format.h @@ -216,10 +216,37 @@ struct FormatStyle { /// \endcode bool AllowAllParametersOfDeclarationOnNextLine; - /// Allows contracting simple braced statements to a single line. - /// - /// E.g., this allows ``if (a) { return; }`` to be put on a single line. - bool AllowShortBlocksOnASingleLine; + /// Different styles for merging short blocks containing at most one + /// statement. + enum ShortBlockStyle { + /// Never merge blocks into a single line. + /// \code + /// while (true) { + /// } + /// while (true) { + /// continue; + /// } + /// \endcode + SBS_Never, + /// Only merge empty blocks. + /// \code + /// while (true) {} + /// while (true) { + /// continue; + /// } + /// \endcode + SBS_Empty, + /// Always merge short blocks into a single line. + /// \code + /// while (true) {} + /// while (true) { continue; } + /// \endcode + SBS_Always, + }; + + /// Dependent on the value, ``while (true) { continue; }`` can be put on a + /// single line. + ShortBlockStyle AllowShortBlocksOnASingleLine; /// If ``true``, short case labels will be contracted to a single line. /// \code @@ -462,38 +489,38 @@ struct FormatStyle { /// Different ways to break after the template declaration. enum BreakTemplateDeclarationsStyle { - /// Do not force break before declaration. - /// ``PenaltyBreakTemplateDeclaration`` is taken into account. - /// \code - /// template T foo() { - /// } - /// template T foo(int aaaaaaaaaaaaaaaaaaaaa, - /// int bbbbbbbbbbbbbbbbbbbbb) { - /// } - /// \endcode - BTDS_No, - /// Force break after template declaration only when the following - /// declaration spans multiple lines. - /// \code - /// template T foo() { - /// } - /// template - /// T foo(int aaaaaaaaaaaaaaaaaaaaa, - /// int bbbbbbbbbbbbbbbbbbbbb) { - /// } - /// \endcode - BTDS_MultiLine, - /// Always break after template declaration. - /// \code - /// template - /// T foo() { - /// } - /// template - /// T foo(int aaaaaaaaaaaaaaaaaaaaa, - /// int bbbbbbbbbbbbbbbbbbbbb) { - /// } - /// \endcode - BTDS_Yes + /// Do not force break before declaration. + /// ``PenaltyBreakTemplateDeclaration`` is taken into account. + /// \code + /// template T foo() { + /// } + /// template T foo(int aaaaaaaaaaaaaaaaaaaaa, + /// int bbbbbbbbbbbbbbbbbbbbb) { + /// } + /// \endcode + BTDS_No, + /// Force break after template declaration only when the following + /// declaration spans multiple lines. + /// \code + /// template T foo() { + /// } + /// template + /// T foo(int aaaaaaaaaaaaaaaaaaaaa, + /// int bbbbbbbbbbbbbbbbbbbbb) { + /// } + /// \endcode + BTDS_MultiLine, + /// Always break after template declaration. + /// \code + /// template + /// T foo() { + /// } + /// template + /// T foo(int aaaaaaaaaaaaaaaaaaaaa, + /// int bbbbbbbbbbbbbbbbbbbbb) { + /// } + /// \endcode + BTDS_Yes }; /// The template declaration breaking style to use. @@ -706,6 +733,32 @@ struct FormatStyle { /// B /// }; /// \endcode + BS_Whitesmiths, + /// Like ``Allman`` but always indent braces and line up code with braces. + /// \code + /// try + /// { + /// foo(); + /// } + /// catch () + /// { + /// } + /// void foo() { bar(); } + /// class foo + /// { + /// }; + /// if (foo()) + /// { + /// } + /// else + /// { + /// } + /// enum X : int + /// { + /// A, + /// B + /// }; + /// \endcode BS_GNU, /// Like ``Attach``, but break before functions. /// \code @@ -729,6 +782,40 @@ struct FormatStyle { /// The brace breaking style to use. BraceBreakingStyle BreakBeforeBraces; + // Different ways to wrap braces after control statements. + enum BraceWrappingAfterControlStatementStyle { + /// Never wrap braces after a control statement. + /// \code + /// if (foo()) { + /// } else { + /// } + /// for (int i = 0; i < 10; ++i) { + /// } + /// \endcode + BWACS_Never, + /// Only wrap braces after a multi-line control statement. + /// \code + /// if (foo && bar && + /// baz) + /// { + /// quux(); + /// } + /// while (foo || bar) { + /// } + /// \endcode + BWACS_MultiLine, + /// Always wrap braces after a control statement. + /// \code + /// if (foo()) + /// { + /// } else + /// {} + /// for (int i = 0; i < 10; ++i) + /// {} + /// \endcode + BWACS_Always + }; + /// Precise control over the wrapping of braces. /// \code /// # Should be declared this way: @@ -764,23 +851,7 @@ struct FormatStyle { /// \endcode bool AfterClass; /// Wrap control statements (``if``/``for``/``while``/``switch``/..). - /// \code - /// true: - /// if (foo()) - /// { - /// } else - /// {} - /// for (int i = 0; i < 10; ++i) - /// {} - /// - /// false: - /// if (foo()) { - /// } else { - /// } - /// for (int i = 0; i < 10; ++i) { - /// } - /// \endcode - bool AfterControlStatement; + BraceWrappingAfterControlStatementStyle AfterControlStatement; /// Wrap enum definitions. /// \code /// true: @@ -1238,6 +1309,22 @@ struct FormatStyle { /// \endcode bool IndentCaseLabels; + /// Indent goto labels. + /// + /// When ``false``, goto labels are flushed left. + /// \code + /// true: false: + /// int f() { vs. int f() { + /// if (foo()) { if (foo()) { + /// label1: label1: + /// bar(); bar(); + /// } } + /// label2: label2: + /// return 1; return 1; + /// } } + /// \endcode + bool IndentGotoLabels; + /// Options for indenting preprocessor directives. enum PPDirectiveIndentStyle { /// Does not indent any directives. @@ -1711,8 +1798,8 @@ struct FormatStyle { /// If ``false``, spaces will be removed before assignment operators. /// \code /// true: false: - /// int a = 5; vs. int a=5; - /// a += 42 a+=42; + /// int a = 5; vs. int a= 5; + /// a += 42; a+= 42; /// \endcode bool SpaceBeforeAssignmentOperators; @@ -1799,6 +1886,14 @@ struct FormatStyle { /// \endcode bool SpaceBeforeRangeBasedForLoopColon; + /// If ``true``, spaces will be inserted into ``{}``. + /// \code + /// true: false: + /// void f() { } vs. void f() {} + /// while (true) { } while (true) {} + /// \endcode + bool SpaceInEmptyBlock; + /// If ``true``, spaces may be inserted into ``()``. /// \code /// true: false: @@ -1868,15 +1963,32 @@ struct FormatStyle { /// \endcode bool SpacesInSquareBrackets; - /// Supported language standards. + /// Supported language standards for parsing and formatting C++ constructs. + /// \code + /// Latest: vector> + /// c++03 vs. vector > + /// \endcode + /// + /// The correct way to spell a specific language version is e.g. ``c++11``. + /// The historical aliases ``Cpp03`` and ``Cpp11`` are deprecated. enum LanguageStandard { - /// Use C++03-compatible syntax. + /// c++03: Parse and format as C++03. LS_Cpp03, - /// Use features of C++11, C++14 and C++1z (e.g. ``A>`` instead of - /// ``A >``). + /// c++11: Parse and format as C++11. LS_Cpp11, - /// Automatic detection based on the input. - LS_Auto + /// c++14: Parse and format as C++14. + LS_Cpp14, + /// c++17: Parse and format as C++17. + LS_Cpp17, + /// c++20: Parse and format as C++20. + LS_Cpp20, + /// Latest: Parse and format using the latest supported language version. + /// 'Cpp11' is an alias for LS_Latest for historical reasons. + LS_Latest, + + /// Auto: Automatic detection based on the input. + /// Parse using the latest language version. Format based on detected input. + LS_Auto, }; /// Format compatible with this standard, e.g. use ``A >`` @@ -1955,6 +2067,7 @@ struct FormatStyle { IncludeStyle.IncludeBlocks == R.IncludeStyle.IncludeBlocks && IncludeStyle.IncludeCategories == R.IncludeStyle.IncludeCategories && IndentCaseLabels == R.IndentCaseLabels && + IndentGotoLabels == R.IndentGotoLabels && IndentPPDirectives == R.IndentPPDirectives && IndentWidth == R.IndentWidth && Language == R.Language && IndentWrappedFunctionNames == R.IndentWrappedFunctionNames && @@ -1995,6 +2108,7 @@ struct FormatStyle { SpaceBeforeParens == R.SpaceBeforeParens && SpaceBeforeRangeBasedForLoopColon == R.SpaceBeforeRangeBasedForLoopColon && + SpaceInEmptyBlock == R.SpaceInEmptyBlock && SpaceInEmptyParentheses == R.SpaceInEmptyParentheses && SpacesBeforeTrailingComments == R.SpacesBeforeTrailingComments && SpacesInAngles == R.SpacesInAngles && @@ -2072,6 +2186,10 @@ FormatStyle getWebKitStyle(); /// http://www.gnu.org/prep/standards/standards.html FormatStyle getGNUStyle(); +/// Returns a format style complying with Microsoft style guide: +/// https://docs.microsoft.com/en-us/visualstudio/ide/editorconfig-code-style-settings-reference?view=vs-2017 +FormatStyle getMicrosoftStyle(FormatStyle::LanguageKind Language); + /// Returns style indicating formatting should be not applied at all. FormatStyle getNoStyle(); diff --git a/include/clang/Frontend/ASTUnit.h b/include/clang/Frontend/ASTUnit.h index 7fb1d2d9338..a36655150d4 100644 --- a/include/clang/Frontend/ASTUnit.h +++ b/include/clang/Frontend/ASTUnit.h @@ -315,7 +315,7 @@ public: CodeCompletionTUInfo &getCodeCompletionTUInfo() { if (!CCTUInfo) - CCTUInfo = llvm::make_unique( + CCTUInfo = std::make_unique( std::make_shared()); return *CCTUInfo; } @@ -390,7 +390,7 @@ private: /// just about any usage. /// Becomes a noop in release mode; only useful for debug mode checking. class ConcurrencyState { - void *Mutex; // a llvm::sys::MutexImpl in debug; + void *Mutex; // a std::recursive_mutex in debug; public: ConcurrencyState(); @@ -832,6 +832,7 @@ public: SkipFunctionBodiesScope::None, bool SingleFileParse = false, bool UserFilesAreVolatile = false, bool ForSerialization = false, + bool RetainExcludedConditionalBlocks = false, llvm::Optional ModuleFormat = llvm::None, std::unique_ptr *ErrAST = nullptr, IntrusiveRefCntPtr VFS = nullptr); diff --git a/include/clang/Frontend/CompilerInstance.h b/include/clang/Frontend/CompilerInstance.h index eb49c53ff40..d15bdc4665a 100644 --- a/include/clang/Frontend/CompilerInstance.h +++ b/include/clang/Frontend/CompilerInstance.h @@ -155,6 +155,12 @@ class CompilerInstance : public ModuleLoader { /// One or more modules failed to build. bool ModuleBuildFailed = false; + /// The stream for verbose output if owned, otherwise nullptr. + std::unique_ptr OwnedVerboseOutputStream; + + /// The stream for verbose output. + raw_ostream *VerboseOutputStream = &llvm::errs(); + /// Holds information about the output file. /// /// If TempFilename is not empty we must rename it to Filename at the end. @@ -217,9 +223,6 @@ public: /// \param Act - The action to execute. /// \return - True on success. // - // FIXME: This function should take the stream to write any debugging / - // verbose output to as an argument. - // // FIXME: Eliminate the llvm_shutdown requirement, that should either be part // of the context or else not CompilerInstance specific. bool ExecuteAction(FrontendAction &Act); @@ -349,6 +352,21 @@ public: return *Diagnostics->getClient(); } + /// } + /// @name VerboseOutputStream + /// } + + /// Replace the current stream for verbose output. + void setVerboseOutputStream(raw_ostream &Value); + + /// Replace the current stream for verbose output. + void setVerboseOutputStream(std::unique_ptr Value); + + /// Get the current stream for verbose output. + raw_ostream &getVerboseOutputStream() { + return *VerboseOutputStream; + } + /// } /// @name Target Info /// { diff --git a/include/clang/Frontend/CompilerInvocation.h b/include/clang/Frontend/CompilerInvocation.h index 413134be4ce..f3253d5b40e 100644 --- a/include/clang/Frontend/CompilerInvocation.h +++ b/include/clang/Frontend/CompilerInvocation.h @@ -14,13 +14,14 @@ #include "clang/Basic/FileSystemOptions.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/LangOptions.h" +#include "clang/Basic/LangStandard.h" #include "clang/Frontend/DependencyOutputOptions.h" #include "clang/Frontend/FrontendOptions.h" -#include "clang/Frontend/LangStandard.h" #include "clang/Frontend/MigratorOptions.h" #include "clang/Frontend/PreprocessorOutputOptions.h" #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/ADT/ArrayRef.h" #include #include @@ -147,13 +148,14 @@ public: /// Create a compiler invocation from a list of input options. /// \returns true on success. /// + /// \returns false if an error was encountered while parsing the arguments + /// and attempts to recover and continue parsing the rest of the arguments. + /// The recovery is best-effort and only guarantees that \p Res will end up in + /// one of the vaild-to-access (albeit arbitrary) states. + /// /// \param [out] Res - The resulting invocation. - /// \param ArgBegin - The first element in the argument vector. - /// \param ArgEnd - The last element in the argument vector. - /// \param Diags - The diagnostic engine to use for errors. static bool CreateFromArgs(CompilerInvocation &Res, - const char* const *ArgBegin, - const char* const *ArgEnd, + ArrayRef CommandLineArgs, DiagnosticsEngine &Diags); /// Get the directory where the compiler headers diff --git a/include/clang/Frontend/FrontendActions.h b/include/clang/Frontend/FrontendActions.h index 6c7bc6046f3..89ac20075fa 100644 --- a/include/clang/Frontend/FrontendActions.h +++ b/include/clang/Frontend/FrontendActions.h @@ -126,14 +126,7 @@ protected: bool hasASTFileSupport() const override { return false; } }; -// Support different interface stub formats this way: -class GenerateInterfaceYAMLExpV1Action : public GenerateInterfaceStubAction { -protected: - std::unique_ptr CreateASTConsumer(CompilerInstance &CI, - StringRef InFile) override; -}; - -class GenerateInterfaceTBEExpV1Action : public GenerateInterfaceStubAction { +class GenerateInterfaceIfsExpV1Action : public GenerateInterfaceStubAction { protected: std::unique_ptr CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override; diff --git a/include/clang/Frontend/FrontendOptions.h b/include/clang/Frontend/FrontendOptions.h index a0acb1f066f..09d83adf579 100644 --- a/include/clang/Frontend/FrontendOptions.h +++ b/include/clang/Frontend/FrontendOptions.h @@ -10,15 +10,16 @@ #define LLVM_CLANG_FRONTEND_FRONTENDOPTIONS_H #include "clang/AST/ASTDumperUtils.h" +#include "clang/Basic/LangStandard.h" #include "clang/Frontend/CommandLineSourceLoc.h" -#include "clang/Serialization/ModuleFileExtension.h" #include "clang/Sema/CodeCompleteOptions.h" +#include "clang/Serialization/ModuleFileExtension.h" #include "llvm/ADT/StringRef.h" #include #include #include -#include #include +#include namespace llvm { @@ -89,8 +90,7 @@ enum ActionKind { GeneratePCH, /// Generate Interface Stub Files. - GenerateInterfaceYAMLExpV1, - GenerateInterfaceTBEExpV1, + GenerateInterfaceIfsExpV1, /// Only execute frontend initialization. InitOnly, @@ -143,35 +143,11 @@ enum ActionKind { /// The kind of a file that we've been handed as an input. class InputKind { private: - unsigned Lang : 4; + Language Lang; unsigned Fmt : 3; unsigned Preprocessed : 1; public: - /// The language for the input, used to select and validate the language - /// standard and possible actions. - enum Language { - Unknown, - - /// Assembly: we accept this only so that we can preprocess it. - Asm, - - /// LLVM IR: we accept this so that we can run the optimizer on it, - /// and compile it to assembly or object code. - LLVM_IR, - - ///@{ Languages that the frontend can parse and compile. - C, - CXX, - ObjC, - ObjCXX, - OpenCL, - CUDA, - RenderScript, - HIP, - ///@} - }; - /// The input file format. enum Format { Source, @@ -179,7 +155,7 @@ public: Precompiled }; - constexpr InputKind(Language L = Unknown, Format F = Source, + constexpr InputKind(Language L = Language::Unknown, Format F = Source, bool PP = false) : Lang(L), Fmt(F), Preprocessed(PP) {} @@ -188,10 +164,12 @@ public: bool isPreprocessed() const { return Preprocessed; } /// Is the input kind fully-unknown? - bool isUnknown() const { return Lang == Unknown && Fmt == Source; } + bool isUnknown() const { return Lang == Language::Unknown && Fmt == Source; } /// Is the language of the input some dialect of Objective-C? - bool isObjectiveC() const { return Lang == ObjC || Lang == ObjCXX; } + bool isObjectiveC() const { + return Lang == Language::ObjC || Lang == Language::ObjCXX; + } InputKind getPreprocessed() const { return InputKind(getLanguage(), getFormat(), true); @@ -451,6 +429,9 @@ public: /// Filename to write statistics to. std::string StatsFile; + /// Minimum time granularity (in microseconds) traced by time profiler. + unsigned TimeTraceGranularity; + public: FrontendOptions() : DisableFree(false), RelocatablePCH(false), ShowHelp(false), @@ -461,12 +442,12 @@ public: UseGlobalModuleIndex(true), GenerateGlobalModuleIndex(true), ASTDumpDecls(false), ASTDumpLookups(false), BuildingImplicitModule(false), ModulesEmbedAllFiles(false), - IncludeTimestamps(true) {} + IncludeTimestamps(true), TimeTraceGranularity(500) {} /// getInputKindForExtension - Return the appropriate input kind for a file - /// extension. For example, "c" would return InputKind::C. + /// extension. For example, "c" would return Language::C. /// - /// \return The input kind for the extension, or InputKind::Unknown if the + /// \return The input kind for the extension, or Language::Unknown if the /// extension is not recognized. static InputKind getInputKindForExtension(StringRef Extension); }; diff --git a/include/clang/Frontend/Utils.h b/include/clang/Frontend/Utils.h index 74e563218c3..0f9b17ee508 100644 --- a/include/clang/Frontend/Utils.h +++ b/include/clang/Frontend/Utils.h @@ -99,11 +99,11 @@ public: /// Return true if system files should be passed to sawDependency(). virtual bool needSystemDependencies() { return false; } - // implementation detail /// Add a dependency \p Filename if it has not been seen before and /// sawDependency() returns true. - void maybeAddDependency(StringRef Filename, bool FromModule, bool IsSystem, - bool IsModuleFile, bool IsMissing); + virtual void maybeAddDependency(StringRef Filename, bool FromModule, + bool IsSystem, bool IsModuleFile, + bool IsMissing); protected: /// Return true if the filename was added to the list of dependencies, false @@ -213,13 +213,18 @@ createChainedIncludesSource(CompilerInstance &CI, /// createInvocationFromCommandLine - Construct a compiler invocation object for /// a command line argument vector. /// +/// \param ShouldRecoverOnErrors - whether we should attempt to return a +/// non-null (and possibly incorrect) CompilerInvocation if any errors were +/// encountered. When this flag is false, always return null on errors. +/// /// \return A CompilerInvocation, or 0 if none was built for the given /// argument vector. std::unique_ptr createInvocationFromCommandLine( ArrayRef Args, IntrusiveRefCntPtr Diags = IntrusiveRefCntPtr(), - IntrusiveRefCntPtr VFS = nullptr); + IntrusiveRefCntPtr VFS = nullptr, + bool ShouldRecoverOnErrors = false); /// Return the value of the last argument as an integer, or a default. If Diags /// is non-null, emits an error if the argument is given, but non-integral. diff --git a/include/clang/Index/CodegenNameGenerator.h b/include/clang/Index/CodegenNameGenerator.h deleted file mode 100644 index 98b3a5de817..00000000000 --- a/include/clang/Index/CodegenNameGenerator.h +++ /dev/null @@ -1,52 +0,0 @@ -//===- CodegenNameGenerator.h - Codegen name generation -------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Determines the name that the symbol will get for code generation. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_INDEX_CODEGENNAMEGENERATOR_H -#define LLVM_CLANG_INDEX_CODEGENNAMEGENERATOR_H - -#include "clang/AST/Mangle.h" -#include "clang/Basic/LLVM.h" -#include -#include -#include - -namespace clang { - class ASTContext; - class Decl; - -namespace index { - -class CodegenNameGenerator { -public: - explicit CodegenNameGenerator(ASTContext &Ctx); - ~CodegenNameGenerator(); - - /// \returns true on failure to produce a name for the given decl, false on - /// success. - bool writeName(const Decl *D, raw_ostream &OS); - - /// Version of \c writeName function that returns a string. - std::string getName(const Decl *D); - - /// This can return multiple mangled names when applicable, e.g. for C++ - /// constructors/destructors. - std::vector getAllManglings(const Decl *D); - -private: - struct Implementation; - std::unique_ptr Impl; -}; - -} // namespace index -} // namespace clang - -#endif // LLVM_CLANG_INDEX_CODEGENNAMEGENERATOR_H diff --git a/include/clang/Index/IndexDataConsumer.h b/include/clang/Index/IndexDataConsumer.h index bc1d86696df..72747821bf5 100644 --- a/include/clang/Index/IndexDataConsumer.h +++ b/include/clang/Index/IndexDataConsumer.h @@ -32,7 +32,7 @@ public: const DeclContext *ContainerDC; }; - virtual ~IndexDataConsumer() {} + virtual ~IndexDataConsumer() = default; virtual void initialize(ASTContext &Ctx) {} @@ -41,12 +41,16 @@ public: /// \returns true to continue indexing, or false to abort. virtual bool handleDeclOccurence(const Decl *D, SymbolRoleSet Roles, ArrayRef Relations, - SourceLocation Loc, ASTNodeInfo ASTNode); + SourceLocation Loc, ASTNodeInfo ASTNode) { + return true; + } /// \returns true to continue indexing, or false to abort. virtual bool handleMacroOccurence(const IdentifierInfo *Name, const MacroInfo *MI, SymbolRoleSet Roles, - SourceLocation Loc); + SourceLocation Loc) { + return true; + } /// \returns true to continue indexing, or false to abort. /// @@ -54,8 +58,10 @@ public: /// For "@import MyMod.SubMod", there will be a call for 'MyMod' with the /// 'reference' role, and a call for 'SubMod' with the 'declaration' role. virtual bool handleModuleOccurence(const ImportDecl *ImportD, - const Module *Mod, - SymbolRoleSet Roles, SourceLocation Loc); + const Module *Mod, SymbolRoleSet Roles, + SourceLocation Loc) { + return true; + } virtual void finish() {} }; diff --git a/include/clang/Index/IndexingAction.h b/include/clang/Index/IndexingAction.h index 9756f3c539e..9ed2a018f16 100644 --- a/include/clang/Index/IndexingAction.h +++ b/include/clang/Index/IndexingAction.h @@ -9,7 +9,9 @@ #ifndef LLVM_CLANG_INDEX_INDEXINGACTION_H #define LLVM_CLANG_INDEX_INDEXINGACTION_H +#include "clang/AST/ASTConsumer.h" #include "clang/Basic/LLVM.h" +#include "clang/Index/IndexingOptions.h" #include "clang/Lex/PPCallbacks.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/ArrayRef.h" @@ -17,6 +19,7 @@ namespace clang { class ASTContext; + class ASTConsumer; class ASTReader; class ASTUnit; class Decl; @@ -29,32 +32,24 @@ namespace serialization { namespace index { class IndexDataConsumer; -struct IndexingOptions { - enum class SystemSymbolFilterKind { - None, - DeclarationsOnly, - All, - }; +/// Creates an ASTConsumer that indexes all symbols (macros and AST decls). +std::unique_ptr createIndexingASTConsumer( + std::shared_ptr DataConsumer, + const IndexingOptions &Opts, std::shared_ptr PP, + std::function ShouldSkipFunctionBody); - SystemSymbolFilterKind SystemSymbolFilter - = SystemSymbolFilterKind::DeclarationsOnly; - bool IndexFunctionLocals = false; - bool IndexImplicitInstantiation = false; - // Whether to index macro definitions in the Preprocesor when preprocessor - // callback is not available (e.g. after parsing has finished). Note that - // macro references are not available in Proprocessor. - bool IndexMacrosInPreprocessor = false; - // Has no effect if IndexFunctionLocals are false. - bool IndexParametersInDeclarations = false; - bool IndexTemplateParameters = false; -}; +inline std::unique_ptr createIndexingASTConsumer( + std::shared_ptr DataConsumer, + const IndexingOptions &Opts, std::shared_ptr PP) { + return createIndexingASTConsumer( + std::move(DataConsumer), Opts, std::move(PP), + /*ShouldSkipFunctionBody=*/[](const Decl *) { return false; }); +} /// Creates a frontend action that indexes all symbols (macros and AST decls). -/// \param WrappedAction another frontend action to wrap over or null. std::unique_ptr createIndexingAction(std::shared_ptr DataConsumer, - IndexingOptions Opts, - std::unique_ptr WrappedAction); + const IndexingOptions &Opts); /// Recursively indexes all decls in the AST. void indexASTUnit(ASTUnit &Unit, IndexDataConsumer &DataConsumer, diff --git a/include/clang/Index/IndexingOptions.h b/include/clang/Index/IndexingOptions.h new file mode 100644 index 00000000000..bbfd6e4a72c --- /dev/null +++ b/include/clang/Index/IndexingOptions.h @@ -0,0 +1,42 @@ +//===--- IndexingOptions.h - Options for indexing ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_INDEX_INDEXINGOPTIONS_H +#define LLVM_CLANG_INDEX_INDEXINGOPTIONS_H + +#include "clang/Frontend/FrontendOptions.h" +#include +#include + +namespace clang { +namespace index { + +struct IndexingOptions { + enum class SystemSymbolFilterKind { + None, + DeclarationsOnly, + All, + }; + + SystemSymbolFilterKind SystemSymbolFilter = + SystemSymbolFilterKind::DeclarationsOnly; + bool IndexFunctionLocals = false; + bool IndexImplicitInstantiation = false; + // Whether to index macro definitions in the Preprocesor when preprocessor + // callback is not available (e.g. after parsing has finished). Note that + // macro references are not available in Proprocessor. + bool IndexMacrosInPreprocessor = false; + // Has no effect if IndexFunctionLocals are false. + bool IndexParametersInDeclarations = false; + bool IndexTemplateParameters = false; +}; + +} // namespace index +} // namespace clang + +#endif // LLVM_CLANG_INDEX_INDEXINGOPTIONS_H diff --git a/include/clang/Lex/DependencyDirectivesSourceMinimizer.h b/include/clang/Lex/DependencyDirectivesSourceMinimizer.h index 41641078afe..d832df6b614 100644 --- a/include/clang/Lex/DependencyDirectivesSourceMinimizer.h +++ b/include/clang/Lex/DependencyDirectivesSourceMinimizer.h @@ -38,6 +38,7 @@ enum TokenKind { pp_undef, pp_import, pp_pragma_import, + pp_pragma_once, pp_include_next, pp_if, pp_ifdef, @@ -46,6 +47,9 @@ enum TokenKind { pp_else, pp_endif, decl_at_import, + cxx_export_decl, + cxx_module_decl, + cxx_import_decl, pp_eof, }; @@ -62,6 +66,24 @@ struct Token { Token(TokenKind K, int Offset) : K(K), Offset(Offset) {} }; +/// Simplified token range to track the range of a potentially skippable PP +/// directive. +struct SkippedRange { + /// Offset into the output byte stream of where the skipped directive begins. + int Offset; + + /// The number of bytes that can be skipped before the preprocessing must + /// resume. + int Length; +}; + +/// Computes the potential source ranges that can be skipped by the preprocessor +/// when skipping a directive like #if, #ifdef or #elsif. +/// +/// \returns false on success, true on error. +bool computeSkippedRanges(ArrayRef Input, + llvm::SmallVectorImpl &Range); + } // end namespace minimize_source_to_dependency_directives /// Minimize the input down to the preprocessor directives that might have diff --git a/include/clang/Lex/DirectoryLookup.h b/include/clang/Lex/DirectoryLookup.h index 7c556ac3517..d526319a68c 100644 --- a/include/clang/Lex/DirectoryLookup.h +++ b/include/clang/Lex/DirectoryLookup.h @@ -36,14 +36,17 @@ public: LT_HeaderMap }; private: - union { // This union is discriminated by isHeaderMap. + union DLU { // This union is discriminated by isHeaderMap. /// Dir - This is the actual directory that we're referring to for a normal /// directory or a framework. - const DirectoryEntry *Dir; + DirectoryEntryRef Dir; /// Map - This is the HeaderMap if this is a headermap lookup. /// const HeaderMap *Map; + + DLU(DirectoryEntryRef Dir) : Dir(Dir) {} + DLU(const HeaderMap *Map) : Map(Map) {} } u; /// DirCharacteristic - The type of directory this is: this is an instance of @@ -62,24 +65,18 @@ private: unsigned SearchedAllModuleMaps : 1; public: - /// DirectoryLookup ctor - Note that this ctor *does not take ownership* of - /// 'dir'. - DirectoryLookup(const DirectoryEntry *dir, SrcMgr::CharacteristicKind DT, + /// This ctor *does not take ownership* of 'Dir'. + DirectoryLookup(DirectoryEntryRef Dir, SrcMgr::CharacteristicKind DT, bool isFramework) - : DirCharacteristic(DT), - LookupType(isFramework ? LT_Framework : LT_NormalDir), - IsIndexHeaderMap(false), SearchedAllModuleMaps(false) { - u.Dir = dir; - } + : u(Dir), DirCharacteristic(DT), + LookupType(isFramework ? LT_Framework : LT_NormalDir), + IsIndexHeaderMap(false), SearchedAllModuleMaps(false) {} - /// DirectoryLookup ctor - Note that this ctor *does not take ownership* of - /// 'map'. - DirectoryLookup(const HeaderMap *map, SrcMgr::CharacteristicKind DT, + /// This ctor *does not take ownership* of 'Map'. + DirectoryLookup(const HeaderMap *Map, SrcMgr::CharacteristicKind DT, bool isIndexHeaderMap) - : DirCharacteristic(DT), LookupType(LT_HeaderMap), - IsIndexHeaderMap(isIndexHeaderMap), SearchedAllModuleMaps(false) { - u.Map = map; - } + : u(Map), DirCharacteristic(DT), LookupType(LT_HeaderMap), + IsIndexHeaderMap(isIndexHeaderMap), SearchedAllModuleMaps(false) {} /// getLookupType - Return the kind of directory lookup that this is: either a /// normal directory, a framework path, or a HeaderMap. @@ -92,13 +89,17 @@ public: /// getDir - Return the directory that this entry refers to. /// const DirectoryEntry *getDir() const { - return isNormalDir() ? u.Dir : nullptr; + return isNormalDir() ? &u.Dir.getDirEntry() : nullptr; } /// getFrameworkDir - Return the directory that this framework refers to. /// const DirectoryEntry *getFrameworkDir() const { - return isFramework() ? u.Dir : nullptr; + return isFramework() ? &u.Dir.getDirEntry() : nullptr; + } + + Optional getFrameworkDirRef() const { + return isFramework() ? Optional(u.Dir) : None; } /// getHeaderMap - Return the directory that this entry refers to. @@ -176,27 +177,20 @@ public: /// \param [out] MappedName if this is a headermap which maps the filename to /// a framework include ("Foo.h" -> "Foo/Foo.h"), set the new name to this /// vector and point Filename to it. - const FileEntry *LookupFile(StringRef &Filename, HeaderSearch &HS, - SourceLocation IncludeLoc, - SmallVectorImpl *SearchPath, - SmallVectorImpl *RelativePath, - Module *RequestingModule, - ModuleMap::KnownHeader *SuggestedModule, - bool &InUserSpecifiedSystemFramework, - bool &IsFrameworkFound, - bool &HasBeenMapped, - SmallVectorImpl &MappedName) const; + Optional + LookupFile(StringRef &Filename, HeaderSearch &HS, SourceLocation IncludeLoc, + SmallVectorImpl *SearchPath, + SmallVectorImpl *RelativePath, Module *RequestingModule, + ModuleMap::KnownHeader *SuggestedModule, + bool &InUserSpecifiedSystemFramework, bool &IsFrameworkFound, + bool &IsInHeaderMap, SmallVectorImpl &MappedName) const; private: - const FileEntry *DoFrameworkLookup( - StringRef Filename, HeaderSearch &HS, - SmallVectorImpl *SearchPath, - SmallVectorImpl *RelativePath, - Module *RequestingModule, + Optional DoFrameworkLookup( + StringRef Filename, HeaderSearch &HS, SmallVectorImpl *SearchPath, + SmallVectorImpl *RelativePath, Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule, - bool &InUserSpecifiedSystemFramework, - bool &IsFrameworkFound) const; - + bool &InUserSpecifiedSystemFramework, bool &IsFrameworkFound) const; }; } // end namespace clang diff --git a/include/clang/Lex/HeaderMap.h b/include/clang/Lex/HeaderMap.h index eca8755d452..accb061e51b 100644 --- a/include/clang/Lex/HeaderMap.h +++ b/include/clang/Lex/HeaderMap.h @@ -13,6 +13,7 @@ #ifndef LLVM_CLANG_LEX_HEADERMAP_H #define LLVM_CLANG_LEX_HEADERMAP_H +#include "clang/Basic/FileManager.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/Optional.h" #include "llvm/Support/Compiler.h" @@ -21,8 +22,6 @@ namespace clang { -class FileEntry; -class FileManager; struct HMapBucket; struct HMapHeader; @@ -78,7 +77,7 @@ public: /// NULL and the file is found, RawPath will be set to the raw path at which /// the file was found in the file system. For example, for a search path /// ".." and a filename "../file.h" this would be "../../file.h". - const FileEntry *LookupFile(StringRef Filename, FileManager &FM) const; + Optional LookupFile(StringRef Filename, FileManager &FM) const; using HeaderMapImpl::lookupFilename; using HeaderMapImpl::getFileName; diff --git a/include/clang/Lex/HeaderSearch.h b/include/clang/Lex/HeaderSearch.h index c5e66242444..0d20dafe2cb 100644 --- a/include/clang/Lex/HeaderSearch.h +++ b/include/clang/Lex/HeaderSearch.h @@ -250,12 +250,6 @@ class HeaderSearch { /// Entity used to look up stored header file information. ExternalHeaderFileInfoSource *ExternalSource = nullptr; - // Various statistics we track for performance analysis. - unsigned NumIncluded = 0; - unsigned NumMultiIncludeFileOptzn = 0; - unsigned NumFrameworkLookups = 0; - unsigned NumSubFrameworkLookups = 0; - public: HeaderSearch(std::shared_ptr HSOpts, SourceManager &SourceMgr, DiagnosticsEngine &Diags, @@ -395,7 +389,7 @@ public: /// found in any of searched SearchDirs. Will be set to false if a framework /// is found only through header maps. Doesn't guarantee the requested file is /// found. - const FileEntry *LookupFile( + Optional LookupFile( StringRef Filename, SourceLocation IncludeLoc, bool isAngled, const DirectoryLookup *FromDir, const DirectoryLookup *&CurDir, ArrayRef> Includers, @@ -410,7 +404,7 @@ public: /// within ".../Carbon.framework/Headers/Carbon.h", check to see if /// HIToolbox is a subframework within Carbon.framework. If so, return /// the FileEntry for the designated file, otherwise return null. - const FileEntry *LookupSubframeworkHeader( + Optional LookupSubframeworkHeader( StringRef Filename, const FileEntry *ContextFileEnt, SmallVectorImpl *SearchPath, SmallVectorImpl *RelativePath, Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule); @@ -544,8 +538,6 @@ public: const FileEntry *lookupModuleMapFile(const DirectoryEntry *Dir, bool IsFramework); - void IncrementFrameworkLookupCount() { ++NumFrameworkLookups; } - /// Determine whether there is a module map that may map the header /// with the given file name to a (sub)module. /// Always returns false if modules are disabled. @@ -649,7 +641,7 @@ private: /// Look up the file with the specified name and determine its owning /// module. - const FileEntry * + Optional getFileAndSuggestModule(StringRef FileName, SourceLocation IncludeLoc, const DirectoryEntry *Dir, bool IsSystemHeaderDir, Module *RequestingModule, diff --git a/include/clang/Lex/HeaderSearchOptions.h b/include/clang/Lex/HeaderSearchOptions.h index ed128bce485..5c19a41986b 100644 --- a/include/clang/Lex/HeaderSearchOptions.h +++ b/include/clang/Lex/HeaderSearchOptions.h @@ -11,6 +11,7 @@ #include "clang/Basic/LLVM.h" #include "llvm/ADT/CachedHashString.h" +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringRef.h" #include @@ -195,6 +196,10 @@ public: /// Whether to validate system input files when a module is loaded. unsigned ModulesValidateSystemHeaders : 1; + // Whether the content of input files should be hashed and used to + // validate consistency. + unsigned ValidateASTInputFilesContent : 1; + /// Whether the module includes debug information (-gmodules). unsigned UseDebugInfo : 1; @@ -202,14 +207,23 @@ public: unsigned ModulesHashContent : 1; + /// Whether we should include all things that could impact the module in the + /// hash. + /// + /// This includes things like the full header search path, and enabled + /// diagnostics. + unsigned ModulesStrictContextHash : 1; + HeaderSearchOptions(StringRef _Sysroot = "/") : Sysroot(_Sysroot), ModuleFormat("raw"), DisableModuleHash(false), ImplicitModuleMaps(false), ModuleMapFileHomeIsCwd(false), UseBuiltinIncludes(true), UseStandardSystemIncludes(true), UseStandardCXXIncludes(true), UseLibcxx(false), Verbose(false), ModulesValidateOncePerBuildSession(false), - ModulesValidateSystemHeaders(false), UseDebugInfo(false), - ModulesValidateDiagnosticOptions(true), ModulesHashContent(false) {} + ModulesValidateSystemHeaders(false), + ValidateASTInputFilesContent(false), UseDebugInfo(false), + ModulesValidateDiagnosticOptions(true), ModulesHashContent(false), + ModulesStrictContextHash(false) {} /// AddPath - Add the \p Path path to the specified \p Group list. void AddPath(StringRef Path, frontend::IncludeDirGroup Group, @@ -233,6 +247,15 @@ public: } }; +inline llvm::hash_code hash_value(const HeaderSearchOptions::Entry &E) { + return llvm::hash_combine(E.Path, E.Group, E.IsFramework, E.IgnoreSysRoot); +} + +inline llvm::hash_code +hash_value(const HeaderSearchOptions::SystemHeaderPrefix &SHP) { + return llvm::hash_combine(SHP.Prefix, SHP.IsSystemHeader); +} + } // namespace clang #endif // LLVM_CLANG_LEX_HEADERSEARCHOPTIONS_H diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h index 69cfe62e4bd..97a222f4a70 100644 --- a/include/clang/Lex/Lexer.h +++ b/include/clang/Lex/Lexer.h @@ -265,6 +265,21 @@ public: /// Return the current location in the buffer. const char *getBufferLocation() const { return BufferPtr; } + /// Returns the current lexing offset. + unsigned getCurrentBufferOffset() { + assert(BufferPtr >= BufferStart && "Invalid buffer state"); + return BufferPtr - BufferStart; + } + + /// Skip over \p NumBytes bytes. + /// + /// If the skip is successful, the next token will be lexed from the new + /// offset. The lexer also assumes that we skipped to the start of the line. + /// + /// \returns true if the skip failed (new offset would have been past the + /// end of the buffer), false otherwise. + bool skipOver(unsigned NumBytes); + /// Stringify - Convert the specified string into a C string by i) escaping /// '\\' and " characters and ii) replacing newline character(s) with "\\n". /// If Charify is true, this escapes the ' character instead of ". diff --git a/include/clang/Lex/MacroArgs.h b/include/clang/Lex/MacroArgs.h index 8806f2d8c65..59676c30e0a 100644 --- a/include/clang/Lex/MacroArgs.h +++ b/include/clang/Lex/MacroArgs.h @@ -48,10 +48,6 @@ class MacroArgs final /// stream. std::vector > PreExpArgTokens; - /// StringifiedArgs - This contains arguments in 'stringified' form. If the - /// stringified form of an argument has not yet been computed, this is empty. - std::vector StringifiedArgs; - /// ArgCache - This is a linked list of MacroArgs objects that the /// Preprocessor owns which we use to avoid thrashing malloc/free. MacroArgs *ArgCache; @@ -94,12 +90,6 @@ public: const std::vector & getPreExpArgument(unsigned Arg, Preprocessor &PP); - /// getStringifiedArgument - Compute, cache, and return the specified argument - /// that has been 'stringified' as required by the # operator. - const Token &getStringifiedArgument(unsigned ArgNo, Preprocessor &PP, - SourceLocation ExpansionLocStart, - SourceLocation ExpansionLocEnd); - /// getNumMacroArguments - Return the number of arguments the invoked macro /// expects. unsigned getNumMacroArguments() const { return NumMacroArgs; } diff --git a/include/clang/Lex/PPCallbacks.h b/include/clang/Lex/PPCallbacks.h index f3f3796b1a3..1edcb567de6 100644 --- a/include/clang/Lex/PPCallbacks.h +++ b/include/clang/Lex/PPCallbacks.h @@ -57,10 +57,9 @@ public: /// \param FilenameTok The file name token in \#include "FileName" directive /// or macro expanded file name token from \#include MACRO(PARAMS) directive. /// Note that FilenameTok contains corresponding quotes/angles symbols. - virtual void FileSkipped(const FileEntry &SkippedFile, + virtual void FileSkipped(const FileEntryRef &SkippedFile, const Token &FilenameTok, - SrcMgr::CharacteristicKind FileType) { - } + SrcMgr::CharacteristicKind FileType) {} /// Callback invoked whenever an inclusion directive results in a /// file-not-found error. @@ -308,7 +307,7 @@ public: /// Hook called when a '__has_include' or '__has_include_next' directive is /// read. virtual void HasInclude(SourceLocation Loc, StringRef FileName, bool IsAngled, - const FileEntry *File, + Optional File, SrcMgr::CharacteristicKind FileType) {} /// Hook called when a source range is skipped. @@ -390,8 +389,7 @@ public: Second->FileChanged(Loc, Reason, FileType, PrevFID); } - void FileSkipped(const FileEntry &SkippedFile, - const Token &FilenameTok, + void FileSkipped(const FileEntryRef &SkippedFile, const Token &FilenameTok, SrcMgr::CharacteristicKind FileType) override { First->FileSkipped(SkippedFile, FilenameTok, FileType); Second->FileSkipped(SkippedFile, FilenameTok, FileType); @@ -491,7 +489,7 @@ public: } void HasInclude(SourceLocation Loc, StringRef FileName, bool IsAngled, - const FileEntry *File, + Optional File, SrcMgr::CharacteristicKind FileType) override { First->HasInclude(Loc, FileName, IsAngled, File, FileType); Second->HasInclude(Loc, FileName, IsAngled, File, FileType); diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h index f65b0cda462..1bdd2be04c0 100644 --- a/include/clang/Lex/Preprocessor.h +++ b/include/clang/Lex/Preprocessor.h @@ -28,6 +28,7 @@ #include "clang/Lex/ModuleLoader.h" #include "clang/Lex/ModuleMap.h" #include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" #include "clang/Lex/Token.h" #include "clang/Lex/TokenLexer.h" #include "llvm/ADT/ArrayRef.h" @@ -370,9 +371,9 @@ class Preprocessor { /// it expects a '.' or ';'. bool ModuleImportExpectsIdentifier = false; - /// The source location of the currently-active + /// The identifier and source location of the currently-active /// \#pragma clang arc_cf_code_audited begin. - SourceLocation PragmaARCCFCodeAuditedLoc; + std::pair PragmaARCCFCodeAuditedInfo; /// The source location of the currently-active /// \#pragma clang assume_nonnull begin. @@ -994,7 +995,7 @@ public: PPCallbacks *getPPCallbacks() const { return Callbacks.get(); } void addPPCallbacks(std::unique_ptr C) { if (Callbacks) - C = llvm::make_unique(std::move(C), + C = std::make_unique(std::move(C), std::move(Callbacks)); Callbacks = std::move(C); } @@ -1471,7 +1472,7 @@ public: if (LexLevel) { // It's not correct in general to enter caching lex mode while in the // middle of a nested lexing action. - auto TokCopy = llvm::make_unique(1); + auto TokCopy = std::make_unique(1); TokCopy[0] = Tok; EnterTokenStream(std::move(TokCopy), 1, true, IsReinject); } else { @@ -1601,14 +1602,16 @@ public: /// arc_cf_code_audited begin. /// /// Returns an invalid location if there is no such pragma active. - SourceLocation getPragmaARCCFCodeAuditedLoc() const { - return PragmaARCCFCodeAuditedLoc; + std::pair + getPragmaARCCFCodeAuditedInfo() const { + return PragmaARCCFCodeAuditedInfo; } /// Set the location of the currently-active \#pragma clang /// arc_cf_code_audited begin. An invalid location ends the pragma. - void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) { - PragmaARCCFCodeAuditedLoc = Loc; + void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident, + SourceLocation Loc) { + PragmaARCCFCodeAuditedInfo = {Ident, Loc}; } /// The location of the currently-active \#pragma clang @@ -1949,17 +1952,15 @@ public: /// Given a "foo" or \ reference, look up the indicated file. /// - /// Returns null on failure. \p isAngled indicates whether the file + /// Returns None on failure. \p isAngled indicates whether the file /// reference is for system \#include's or not (i.e. using <> instead of ""). - const FileEntry *LookupFile(SourceLocation FilenameLoc, StringRef Filename, - bool isAngled, const DirectoryLookup *FromDir, - const FileEntry *FromFile, - const DirectoryLookup *&CurDir, - SmallVectorImpl *SearchPath, - SmallVectorImpl *RelativePath, - ModuleMap::KnownHeader *SuggestedModule, - bool *IsMapped, bool *IsFrameworkFound, - bool SkipCache = false); + Optional + LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, + const DirectoryLookup *FromDir, const FileEntry *FromFile, + const DirectoryLookup *&CurDir, SmallVectorImpl *SearchPath, + SmallVectorImpl *RelativePath, + ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped, + bool *IsFrameworkFound, bool SkipCache = false); /// Get the DirectoryLookup structure used to find the current /// FileEntry, if CurLexer is non-null and if applicable. @@ -2202,6 +2203,15 @@ private: } }; + Optional LookupHeaderIncludeOrImport( + const DirectoryLookup *&CurDir, StringRef Filename, + SourceLocation FilenameLoc, CharSourceRange FilenameRange, + const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl, + bool &IsMapped, const DirectoryLookup *LookupFrom, + const FileEntry *LookupFromFile, StringRef LookupFilename, + SmallVectorImpl &RelativePath, SmallVectorImpl &SearchPath, + ModuleMap::KnownHeader &SuggestedModule, bool isAngled); + // File inclusion. void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok, const DirectoryLookup *LookupFrom = nullptr, @@ -2313,6 +2323,15 @@ public: /// A macro is used, update information about macros that need unused /// warnings. void markMacroAsUsed(MacroInfo *MI); + +private: + Optional + getSkippedRangeForExcludedConditionalBlock(SourceLocation HashLoc); + + /// Contains the currently active skipped range mappings for skipping excluded + /// conditional directives. + ExcludedPreprocessorDirectiveSkipMapping + *ExcludedConditionalDirectiveSkipMappings; }; /// Abstract base class that describes a handler that will receive diff --git a/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h b/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h new file mode 100644 index 00000000000..893b7ba7a9f --- /dev/null +++ b/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h @@ -0,0 +1,31 @@ +//===- PreprocessorExcludedConditionalDirectiveSkipMapping.h - --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_PREPROCESSOR_EXCLUDED_COND_DIRECTIVE_SKIP_MAPPING_H +#define LLVM_CLANG_LEX_PREPROCESSOR_EXCLUDED_COND_DIRECTIVE_SKIP_MAPPING_H + +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/MemoryBuffer.h" + +namespace clang { + +/// A mapping from an offset into a buffer to the number of bytes that can be +/// skipped by the preprocessor when skipping over excluded conditional +/// directive ranges. +using PreprocessorSkippedRangeMapping = llvm::DenseMap; + +/// The datastructure that holds the mapping between the active memory buffers +/// and the individual skip mappings. +using ExcludedPreprocessorDirectiveSkipMapping = + llvm::DenseMap; + +} // end namespace clang + +#endif // LLVM_CLANG_LEX_PREPROCESSOR_EXCLUDED_COND_DIRECTIVE_SKIP_MAPPING_H diff --git a/include/clang/Lex/PreprocessorOptions.h b/include/clang/Lex/PreprocessorOptions.h index 1480548c7fb..344afa89417 100644 --- a/include/clang/Lex/PreprocessorOptions.h +++ b/include/clang/Lex/PreprocessorOptions.h @@ -10,6 +10,7 @@ #define LLVM_CLANG_LEX_PREPROCESSOROPTIONS_H_ #include "clang/Basic/LLVM.h" +#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include @@ -142,6 +143,9 @@ public: /// compiler invocation and its buffers will be reused. bool RetainRemappedFileBuffers = false; + /// When enabled, excluded conditional blocks retain in the main file. + bool RetainExcludedConditionalBlocks = false; + /// The Objective-C++ ARC standard library that we should support, /// by providing appropriate definitions to retrofit the standard library /// with support for lifetime-qualified pointers. @@ -169,6 +173,17 @@ public: /// build it again. std::shared_ptr FailedModules; + /// Contains the currently active skipped range mappings for skipping excluded + /// conditional directives. + /// + /// The pointer is passed to the Preprocessor when it's constructed. The + /// pointer is unowned, the client is responsible for its lifetime. + ExcludedPreprocessorDirectiveSkipMapping + *ExcludedConditionalDirectiveSkipMappings = nullptr; + + /// Set up preprocessor for RunAnalysis action. + bool SetUpStaticAnalyzer = false; + public: PreprocessorOptions() : PrecompiledPreambleBytes(0, false) {} @@ -201,6 +216,7 @@ public: RetainRemappedFileBuffers = true; PrecompiledPreambleBytes.first = 0; PrecompiledPreambleBytes.second = false; + RetainExcludedConditionalBlocks = false; } }; diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h index 7c67c35f615..52d159062cd 100644 --- a/include/clang/Parse/Parser.h +++ b/include/clang/Parse/Parser.h @@ -766,6 +766,22 @@ private: Tok.setAnnotationValue(T.getAsOpaquePtr()); } + static NamedDecl *getNonTypeAnnotation(const Token &Tok) { + return static_cast(Tok.getAnnotationValue()); + } + + static void setNonTypeAnnotation(Token &Tok, NamedDecl *ND) { + Tok.setAnnotationValue(ND); + } + + static IdentifierInfo *getIdentifierAnnotation(const Token &Tok) { + return static_cast(Tok.getAnnotationValue()); + } + + static void setIdentifierAnnotation(Token &Tok, IdentifierInfo *ND) { + Tok.setAnnotationValue(ND); + } + /// Read an already-translated primary expression out of an annotation /// token. static ExprResult getExprAnnotation(const Token &Tok) { @@ -799,8 +815,7 @@ private: /// Annotation was successful. ANK_Success }; - AnnotatedNameKind TryAnnotateName(bool IsAddressOfOperand, - CorrectionCandidateCallback *CCC = nullptr); + AnnotatedNameKind TryAnnotateName(CorrectionCandidateCallback *CCC = nullptr); /// Push a tok::annot_cxxscope token onto the token stream. void AnnotateScopeToken(CXXScopeSpec &SS, bool IsNewAnnotation); @@ -972,7 +987,7 @@ private: }; /// Consume any extra semi-colons until the end of the line. - void ConsumeExtraSemi(ExtraSemiKind Kind, unsigned TST = TST_unspecified); + void ConsumeExtraSemi(ExtraSemiKind Kind, DeclSpec::TST T = TST_unspecified); /// Return false if the next token is an identifier. An 'expected identifier' /// error is emitted otherwise. @@ -2107,12 +2122,13 @@ private: DeclGroupPtrTy ParseDeclaration(DeclaratorContext Context, SourceLocation &DeclEnd, - ParsedAttributesWithRange &attrs); - DeclGroupPtrTy ParseSimpleDeclaration(DeclaratorContext Context, - SourceLocation &DeclEnd, - ParsedAttributesWithRange &attrs, - bool RequireSemi, - ForRangeInit *FRI = nullptr); + ParsedAttributesWithRange &attrs, + SourceLocation *DeclSpecStart = nullptr); + DeclGroupPtrTy + ParseSimpleDeclaration(DeclaratorContext Context, SourceLocation &DeclEnd, + ParsedAttributesWithRange &attrs, bool RequireSemi, + ForRangeInit *FRI = nullptr, + SourceLocation *DeclSpecStart = nullptr); bool MightBeDeclarator(DeclaratorContext Context); DeclGroupPtrTy ParseDeclGroup(ParsingDeclSpec &DS, DeclaratorContext Context, SourceLocation *DeclEnd = nullptr, @@ -2160,7 +2176,7 @@ private: const ParsedTemplateInfo &TemplateInfo, AccessSpecifier AS, DeclSpecContext DSC); void ParseEnumBody(SourceLocation StartLoc, Decl *TagDecl); - void ParseStructUnionBody(SourceLocation StartLoc, unsigned TagType, + void ParseStructUnionBody(SourceLocation StartLoc, DeclSpec::TST TagType, Decl *TagDecl); void ParseStructDeclaration( @@ -2833,6 +2849,17 @@ private: DeclGroupPtrTy ParseOMPDeclareSimdClauses(DeclGroupPtrTy Ptr, CachedTokens &Toks, SourceLocation Loc); + /// Parses OpenMP context selectors and calls \p Callback for each + /// successfully parsed context selector. + bool parseOpenMPContextSelectors( + SourceLocation Loc, + llvm::function_ref< + void(SourceRange, const Sema::OpenMPDeclareVariantCtsSelectorData &)> + Callback); + + /// Parse clauses for '#pragma omp declare variant'. + void ParseOMPDeclareVariantClauses(DeclGroupPtrTy Ptr, CachedTokens &Toks, + SourceLocation Loc); /// Parse clauses for '#pragma omp declare target'. DeclGroupPtrTy ParseOMPDeclareTargetClauses(); /// Parse '#pragma omp end declare target'. @@ -2926,7 +2953,8 @@ public: /// Parses simple expression in parens for single-expression clauses of OpenMP /// constructs. /// \param RLoc Returned location of right paren. - ExprResult ParseOpenMPParensExpr(StringRef ClauseName, SourceLocation &RLoc); + ExprResult ParseOpenMPParensExpr(StringRef ClauseName, SourceLocation &RLoc, + bool IsAddressOfOperand = false); /// Data used for parsing list of variables in OpenMP clauses. struct OpenMPVarListDataTy { diff --git a/include/clang/Rewrite/Core/Rewriter.h b/include/clang/Rewrite/Core/Rewriter.h index 84c5ac3d72e..c89015e4055 100644 --- a/include/clang/Rewrite/Core/Rewriter.h +++ b/include/clang/Rewrite/Core/Rewriter.h @@ -46,6 +46,17 @@ public: /// If true and removing some text leaves a blank line /// also remove the empty line (false by default). + /// + /// FIXME: This sometimes corrupts the file's rewrite buffer due to + /// incorrect indexing in the implementation (see the FIXME in + /// clang::RewriteBuffer::RemoveText). Moreover, it's inefficient because + /// it must scan the buffer from the beginning to find the start of the + /// line. When feasible, it's better for the caller to check for a blank + /// line and then, if found, expand the removal range to include it. + /// Checking for a blank line is easy if, for example, the caller can + /// guarantee this is the first edit of a line. In that case, it can just + /// scan before and after the removal range until the next newline or + /// begin/end of the input. bool RemoveLineIfEmpty = false; RewriteOptions() {} diff --git a/include/clang/Sema/Overload.h b/include/clang/Sema/Overload.h index 96aadeac2ba..a97a7181f7d 100644 --- a/include/clang/Sema/Overload.h +++ b/include/clang/Sema/Overload.h @@ -71,6 +71,30 @@ class Sema; OCD_ViableCandidates }; + /// The parameter ordering that will be used for the candidate. This is + /// used to represent C++20 binary operator rewrites that reverse the order + /// of the arguments. If the parameter ordering is Reversed, the Args list is + /// reversed (but obviously the ParamDecls for the function are not). + /// + /// After forming an OverloadCandidate with reversed parameters, the list + /// of conversions will (as always) be indexed by argument, so will be + /// in reverse parameter order. + enum class OverloadCandidateParamOrder : char { Normal, Reversed }; + + /// The kinds of rewrite we perform on overload candidates. Note that the + /// values here are chosen to serve as both bitflags and as a rank (lower + /// values are preferred by overload resolution). + enum OverloadCandidateRewriteKind : unsigned { + /// Candidate is not a rewritten candidate. + CRK_None = 0x0, + + /// Candidate is a rewritten candidate with a different operator name. + CRK_DifferentOperator = 0x1, + + /// Candidate is a rewritten candidate with a reversed order of parameters. + CRK_Reversed = 0x2, + }; + /// ImplicitConversionKind - The kind of implicit conversion used to /// convert an argument to a parameter's type. The enumerator values /// match with the table titled 'Conversions' in [over.ics.scs] and are listed @@ -757,7 +781,8 @@ class Sema; CXXConversionDecl *Surrogate; /// The conversion sequences used to convert the function arguments - /// to the function parameters. + /// to the function parameters. Note that these are indexed by argument, + /// so may not match the parameter order of Function. ConversionSequenceList Conversions; /// The FixIt hints which can be used to fix the Bad candidate. @@ -783,6 +808,9 @@ class Sema; /// True if the candidate was found using ADL. CallExpr::ADLCallKind IsADLCandidate : 1; + /// Whether this is a rewritten candidate, and if so, of what kind? + OverloadCandidateRewriteKind RewriteKind : 2; + /// FailureKind - The reason why this candidate is not viable. /// Actually an OverloadFailureKind. unsigned char FailureKind; @@ -826,10 +854,10 @@ class Sema; unsigned getNumParams() const { if (IsSurrogate) { - auto STy = Surrogate->getConversionType(); + QualType STy = Surrogate->getConversionType(); while (STy->isPointerType() || STy->isReferenceType()) STy = STy->getPointeeType(); - return STy->getAs()->getNumParams(); + return STy->castAs()->getNumParams(); } if (Function) return Function->getNumParams(); @@ -838,7 +866,8 @@ class Sema; private: friend class OverloadCandidateSet; - OverloadCandidate() : IsADLCandidate(CallExpr::NotADL) {} + OverloadCandidate() + : IsADLCandidate(CallExpr::NotADL), RewriteKind(CRK_None) {} }; /// OverloadCandidateSet - A set of overload candidates, used in C++ @@ -867,9 +896,54 @@ class Sema; CSK_InitByConstructor, }; + /// Information about operator rewrites to consider when adding operator + /// functions to a candidate set. + struct OperatorRewriteInfo { + OperatorRewriteInfo() + : OriginalOperator(OO_None), AllowRewrittenCandidates(false) {} + OperatorRewriteInfo(OverloadedOperatorKind Op, bool AllowRewritten) + : OriginalOperator(Op), AllowRewrittenCandidates(AllowRewritten) {} + + /// The original operator as written in the source. + OverloadedOperatorKind OriginalOperator; + /// Whether we should include rewritten candidates in the overload set. + bool AllowRewrittenCandidates; + + /// Would use of this function result in a rewrite using a different + /// operator? + bool isRewrittenOperator(const FunctionDecl *FD) { + return OriginalOperator && + FD->getDeclName().getCXXOverloadedOperator() != OriginalOperator; + } + + bool isAcceptableCandidate(const FunctionDecl *FD) { + return AllowRewrittenCandidates || !isRewrittenOperator(FD); + } + + /// Determine the kind of rewrite that should be performed for this + /// candidate. + OverloadCandidateRewriteKind + getRewriteKind(const FunctionDecl *FD, OverloadCandidateParamOrder PO) { + OverloadCandidateRewriteKind CRK = CRK_None; + if (isRewrittenOperator(FD)) + CRK = OverloadCandidateRewriteKind(CRK | CRK_DifferentOperator); + if (PO == OverloadCandidateParamOrder::Reversed) + CRK = OverloadCandidateRewriteKind(CRK | CRK_Reversed); + return CRK; + } + + /// Determine whether we should consider looking for and adding reversed + /// candidates for operator Op. + bool shouldAddReversed(OverloadedOperatorKind Op); + + /// Determine whether we should add a rewritten candidate for \p FD with + /// reversed parameter order. + bool shouldAddReversed(ASTContext &Ctx, const FunctionDecl *FD); + }; + private: SmallVector Candidates; - llvm::SmallPtrSet Functions; + llvm::SmallPtrSet Functions; // Allocator for ConversionSequenceLists. We store the first few of these // inline to avoid allocation for small sets. @@ -877,11 +951,12 @@ class Sema; SourceLocation Loc; CandidateSetKind Kind; + OperatorRewriteInfo RewriteInfo; constexpr static unsigned NumInlineBytes = 24 * sizeof(ImplicitConversionSequence); unsigned NumInlineBytesUsed = 0; - llvm::AlignedCharArray InlineSpace; + alignas(void *) char InlineSpace[NumInlineBytes]; // Address space of the object being constructed. LangAS DestAS = LangAS::Default; @@ -904,7 +979,7 @@ class Sema; unsigned NBytes = sizeof(T) * N; if (NBytes > NumInlineBytes - NumInlineBytesUsed) return SlabAllocator.Allocate(N); - char *FreeSpaceStart = InlineSpace.buffer + NumInlineBytesUsed; + char *FreeSpaceStart = InlineSpace + NumInlineBytesUsed; assert(uintptr_t(FreeSpaceStart) % alignof(void *) == 0 && "Misaligned storage!"); @@ -915,19 +990,24 @@ class Sema; void destroyCandidates(); public: - OverloadCandidateSet(SourceLocation Loc, CandidateSetKind CSK) - : Loc(Loc), Kind(CSK) {} + OverloadCandidateSet(SourceLocation Loc, CandidateSetKind CSK, + OperatorRewriteInfo RewriteInfo = {}) + : Loc(Loc), Kind(CSK), RewriteInfo(RewriteInfo) {} OverloadCandidateSet(const OverloadCandidateSet &) = delete; OverloadCandidateSet &operator=(const OverloadCandidateSet &) = delete; ~OverloadCandidateSet() { destroyCandidates(); } SourceLocation getLocation() const { return Loc; } CandidateSetKind getKind() const { return Kind; } + OperatorRewriteInfo getRewriteInfo() const { return RewriteInfo; } /// Determine when this overload candidate will be new to the /// overload set. - bool isNewCandidate(Decl *F) { - return Functions.insert(F->getCanonicalDecl()).second; + bool isNewCandidate(Decl *F, OverloadCandidateParamOrder PO = + OverloadCandidateParamOrder::Normal) { + uintptr_t Key = reinterpret_cast(F->getCanonicalDecl()); + Key |= static_cast(PO); + return Functions.insert(Key).second; } /// Clear out all of the candidates. diff --git a/include/clang/Sema/ParsedAttr.h b/include/clang/Sema/ParsedAttr.h index d87d5da04ac..d9d8585970d 100644 --- a/include/clang/Sema/ParsedAttr.h +++ b/include/clang/Sema/ParsedAttr.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_SEMA_ATTRIBUTELIST_H #include "clang/Basic/AttrSubjectMatchRules.h" +#include "clang/Basic/AttributeCommonInfo.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/TargetInfo.h" @@ -114,7 +115,8 @@ using ArgsVector = llvm::SmallVector; /// 4: __attribute__(( aligned(16) )). ParmName is unused, Args/Num used. /// class ParsedAttr final - : private llvm::TrailingObjects< + : public AttributeCommonInfo, + private llvm::TrailingObjects< ParsedAttr, ArgsUnion, detail::AvailabilityData, detail::TypeTagForDatatypeData, ParsedType, detail::PropertyData> { friend TrailingObjects; @@ -134,54 +136,15 @@ class ParsedAttr final return IsProperty; } -public: - /// The style used to specify an attribute. - enum Syntax { - /// __attribute__((...)) - AS_GNU, - - /// [[...]] - AS_CXX11, - - /// [[...]] - AS_C2x, - - /// __declspec(...) - AS_Declspec, - - /// [uuid("...")] class Foo - AS_Microsoft, - - /// __ptr16, alignas(...), etc. - AS_Keyword, - - /// #pragma ... - AS_Pragma, - - // Note TableGen depends on the order above. Do not add or change the order - // without adding related code to TableGen/ClangAttrEmitter.cpp. - /// Context-sensitive version of a keyword attribute. - AS_ContextSensitiveKeyword, - }; - private: - IdentifierInfo *AttrName; - IdentifierInfo *ScopeName; IdentifierInfo *MacroII = nullptr; SourceLocation MacroExpansionLoc; - SourceRange AttrRange; - SourceLocation ScopeLoc; SourceLocation EllipsisLoc; - unsigned AttrKind : 16; - /// The number of expression arguments this attribute has. /// The expressions themselves are stored after the object. unsigned NumArgs : 16; - /// Corresponds to the Syntax enum. - unsigned SyntaxUsed : 3; - /// True if already diagnosed as invalid. mutable unsigned Invalid : 1; @@ -239,14 +202,14 @@ private: IdentifierInfo *scopeName, SourceLocation scopeLoc, ArgsUnion *args, unsigned numArgs, Syntax syntaxUsed, SourceLocation ellipsisLoc) - : AttrName(attrName), ScopeName(scopeName), AttrRange(attrRange), - ScopeLoc(scopeLoc), EllipsisLoc(ellipsisLoc), NumArgs(numArgs), - SyntaxUsed(syntaxUsed), Invalid(false), UsedAsTypeAttr(false), - IsAvailability(false), IsTypeTagForDatatype(false), IsProperty(false), - HasParsedType(false), HasProcessingCache(false), - IsPragmaClangAttribute(false) { - if (numArgs) memcpy(getArgsBuffer(), args, numArgs * sizeof(ArgsUnion)); - AttrKind = getKind(getName(), getScopeName(), syntaxUsed); + : AttributeCommonInfo(attrName, scopeName, attrRange, scopeLoc, + syntaxUsed), + EllipsisLoc(ellipsisLoc), NumArgs(numArgs), Invalid(false), + UsedAsTypeAttr(false), IsAvailability(false), + IsTypeTagForDatatype(false), IsProperty(false), HasParsedType(false), + HasProcessingCache(false), IsPragmaClangAttribute(false) { + if (numArgs) + memcpy(getArgsBuffer(), args, numArgs * sizeof(ArgsUnion)); } /// Constructor for availability attributes. @@ -257,9 +220,9 @@ private: const AvailabilityChange &obsoleted, SourceLocation unavailable, const Expr *messageExpr, Syntax syntaxUsed, SourceLocation strict, const Expr *replacementExpr) - : AttrName(attrName), ScopeName(scopeName), AttrRange(attrRange), - ScopeLoc(scopeLoc), NumArgs(1), SyntaxUsed(syntaxUsed), Invalid(false), - UsedAsTypeAttr(false), IsAvailability(true), + : AttributeCommonInfo(attrName, scopeName, attrRange, scopeLoc, + syntaxUsed), + NumArgs(1), Invalid(false), UsedAsTypeAttr(false), IsAvailability(true), IsTypeTagForDatatype(false), IsProperty(false), HasParsedType(false), HasProcessingCache(false), IsPragmaClangAttribute(false), UnavailableLoc(unavailable), MessageExpr(messageExpr) { @@ -267,7 +230,6 @@ private: memcpy(getArgsBuffer(), &PVal, sizeof(ArgsUnion)); new (getAvailabilityData()) detail::AvailabilityData( introduced, deprecated, obsoleted, strict, replacementExpr); - AttrKind = getKind(getName(), getScopeName(), syntaxUsed); } /// Constructor for objc_bridge_related attributes. @@ -275,16 +237,16 @@ private: IdentifierInfo *scopeName, SourceLocation scopeLoc, IdentifierLoc *Parm1, IdentifierLoc *Parm2, IdentifierLoc *Parm3, Syntax syntaxUsed) - : AttrName(attrName), ScopeName(scopeName), AttrRange(attrRange), - ScopeLoc(scopeLoc), NumArgs(3), SyntaxUsed(syntaxUsed), Invalid(false), - UsedAsTypeAttr(false), IsAvailability(false), - IsTypeTagForDatatype(false), IsProperty(false), HasParsedType(false), - HasProcessingCache(false), IsPragmaClangAttribute(false) { + : AttributeCommonInfo(attrName, scopeName, attrRange, scopeLoc, + syntaxUsed), + NumArgs(3), Invalid(false), UsedAsTypeAttr(false), + IsAvailability(false), IsTypeTagForDatatype(false), IsProperty(false), + HasParsedType(false), HasProcessingCache(false), + IsPragmaClangAttribute(false) { ArgsUnion *Args = getArgsBuffer(); Args[0] = Parm1; Args[1] = Parm2; Args[2] = Parm3; - AttrKind = getKind(getName(), getScopeName(), syntaxUsed); } /// Constructor for type_tag_for_datatype attribute. @@ -292,31 +254,31 @@ private: IdentifierInfo *scopeName, SourceLocation scopeLoc, IdentifierLoc *ArgKind, ParsedType matchingCType, bool layoutCompatible, bool mustBeNull, Syntax syntaxUsed) - : AttrName(attrName), ScopeName(scopeName), AttrRange(attrRange), - ScopeLoc(scopeLoc), NumArgs(1), SyntaxUsed(syntaxUsed), Invalid(false), - UsedAsTypeAttr(false), IsAvailability(false), - IsTypeTagForDatatype(true), IsProperty(false), HasParsedType(false), - HasProcessingCache(false), IsPragmaClangAttribute(false) { + : AttributeCommonInfo(attrName, scopeName, attrRange, scopeLoc, + syntaxUsed), + NumArgs(1), Invalid(false), UsedAsTypeAttr(false), + IsAvailability(false), IsTypeTagForDatatype(true), IsProperty(false), + HasParsedType(false), HasProcessingCache(false), + IsPragmaClangAttribute(false) { ArgsUnion PVal(ArgKind); memcpy(getArgsBuffer(), &PVal, sizeof(ArgsUnion)); detail::TypeTagForDatatypeData &ExtraData = getTypeTagForDatatypeDataSlot(); new (&ExtraData.MatchingCType) ParsedType(matchingCType); ExtraData.LayoutCompatible = layoutCompatible; ExtraData.MustBeNull = mustBeNull; - AttrKind = getKind(getName(), getScopeName(), syntaxUsed); } /// Constructor for attributes with a single type argument. ParsedAttr(IdentifierInfo *attrName, SourceRange attrRange, IdentifierInfo *scopeName, SourceLocation scopeLoc, ParsedType typeArg, Syntax syntaxUsed) - : AttrName(attrName), ScopeName(scopeName), AttrRange(attrRange), - ScopeLoc(scopeLoc), NumArgs(0), SyntaxUsed(syntaxUsed), Invalid(false), - UsedAsTypeAttr(false), IsAvailability(false), - IsTypeTagForDatatype(false), IsProperty(false), HasParsedType(true), - HasProcessingCache(false), IsPragmaClangAttribute(false) { + : AttributeCommonInfo(attrName, scopeName, attrRange, scopeLoc, + syntaxUsed), + NumArgs(0), Invalid(false), UsedAsTypeAttr(false), + IsAvailability(false), IsTypeTagForDatatype(false), IsProperty(false), + HasParsedType(true), HasProcessingCache(false), + IsPragmaClangAttribute(false) { new (&getTypeBuffer()) ParsedType(typeArg); - AttrKind = getKind(getName(), getScopeName(), syntaxUsed); } /// Constructor for microsoft __declspec(property) attribute. @@ -324,13 +286,13 @@ private: IdentifierInfo *scopeName, SourceLocation scopeLoc, IdentifierInfo *getterId, IdentifierInfo *setterId, Syntax syntaxUsed) - : AttrName(attrName), ScopeName(scopeName), AttrRange(attrRange), - ScopeLoc(scopeLoc), NumArgs(0), SyntaxUsed(syntaxUsed), Invalid(false), - UsedAsTypeAttr(false), IsAvailability(false), - IsTypeTagForDatatype(false), IsProperty(true), HasParsedType(false), - HasProcessingCache(false), IsPragmaClangAttribute(false) { + : AttributeCommonInfo(attrName, scopeName, attrRange, scopeLoc, + syntaxUsed), + NumArgs(0), Invalid(false), UsedAsTypeAttr(false), + IsAvailability(false), IsTypeTagForDatatype(false), IsProperty(true), + HasParsedType(false), HasProcessingCache(false), + IsPragmaClangAttribute(false) { new (&getPropertyDataBuffer()) detail::PropertyData(getterId, setterId); - AttrKind = getKind(getName(), getScopeName(), syntaxUsed); } /// Type tag information is stored immediately following the arguments, if @@ -372,27 +334,6 @@ public: void operator delete(void *) = delete; - enum Kind { - #define PARSED_ATTR(NAME) AT_##NAME, - #include "clang/Sema/AttrParsedAttrList.inc" - #undef PARSED_ATTR - IgnoredAttribute, - UnknownAttribute - }; - - IdentifierInfo *getName() const { return AttrName; } - SourceLocation getLoc() const { return AttrRange.getBegin(); } - SourceRange getRange() const { return AttrRange; } - - bool hasScope() const { return ScopeName; } - IdentifierInfo *getScopeName() const { return ScopeName; } - SourceLocation getScopeLoc() const { return ScopeLoc; } - - bool isGNUScope() const { - return ScopeName && - (ScopeName->isStr("gnu") || ScopeName->isStr("__gnu__")); - } - bool hasParsedType() const { return HasParsedType; } /// Is this the Microsoft __declspec(property) attribute? @@ -400,30 +341,6 @@ public: return IsProperty; } - bool isAlignasAttribute() const { - // FIXME: Use a better mechanism to determine this. - return getKind() == AT_Aligned && isKeywordAttribute(); - } - - bool isDeclspecAttribute() const { return SyntaxUsed == AS_Declspec; } - bool isMicrosoftAttribute() const { return SyntaxUsed == AS_Microsoft; } - - bool isCXX11Attribute() const { - return SyntaxUsed == AS_CXX11 || isAlignasAttribute(); - } - - bool isC2xAttribute() const { - return SyntaxUsed == AS_C2x; - } - - bool isKeywordAttribute() const { - return SyntaxUsed == AS_Keyword || SyntaxUsed == AS_ContextSensitiveKeyword; - } - - bool isContextSensitiveKeywordAttribute() const { - return SyntaxUsed == AS_ContextSensitiveKeyword; - } - bool isInvalid() const { return Invalid; } void setInvalid(bool b = true) const { Invalid = b; } @@ -450,10 +367,6 @@ public: bool isPackExpansion() const { return EllipsisLoc.isValid(); } SourceLocation getEllipsisLoc() const { return EllipsisLoc; } - Kind getKind() const { return Kind(AttrKind); } - static Kind getKind(const IdentifierInfo *Name, const IdentifierInfo *Scope, - Syntax SyntaxUsed); - /// getNumArgs - Return the number of actual arguments to this attribute. unsigned getNumArgs() const { return NumArgs; } @@ -480,54 +393,61 @@ public: } const AvailabilityChange &getAvailabilityIntroduced() const { - assert(getKind() == AT_Availability && "Not an availability attribute"); + assert(getParsedKind() == AT_Availability && + "Not an availability attribute"); return getAvailabilityData()->Changes[detail::IntroducedSlot]; } const AvailabilityChange &getAvailabilityDeprecated() const { - assert(getKind() == AT_Availability && "Not an availability attribute"); + assert(getParsedKind() == AT_Availability && + "Not an availability attribute"); return getAvailabilityData()->Changes[detail::DeprecatedSlot]; } const AvailabilityChange &getAvailabilityObsoleted() const { - assert(getKind() == AT_Availability && "Not an availability attribute"); + assert(getParsedKind() == AT_Availability && + "Not an availability attribute"); return getAvailabilityData()->Changes[detail::ObsoletedSlot]; } SourceLocation getStrictLoc() const { - assert(getKind() == AT_Availability && "Not an availability attribute"); + assert(getParsedKind() == AT_Availability && + "Not an availability attribute"); return getAvailabilityData()->StrictLoc; } SourceLocation getUnavailableLoc() const { - assert(getKind() == AT_Availability && "Not an availability attribute"); + assert(getParsedKind() == AT_Availability && + "Not an availability attribute"); return UnavailableLoc; } const Expr * getMessageExpr() const { - assert(getKind() == AT_Availability && "Not an availability attribute"); + assert(getParsedKind() == AT_Availability && + "Not an availability attribute"); return MessageExpr; } const Expr *getReplacementExpr() const { - assert(getKind() == AT_Availability && "Not an availability attribute"); + assert(getParsedKind() == AT_Availability && + "Not an availability attribute"); return getAvailabilityData()->Replacement; } const ParsedType &getMatchingCType() const { - assert(getKind() == AT_TypeTagForDatatype && + assert(getParsedKind() == AT_TypeTagForDatatype && "Not a type_tag_for_datatype attribute"); return getTypeTagForDatatypeDataSlot().MatchingCType; } bool getLayoutCompatible() const { - assert(getKind() == AT_TypeTagForDatatype && + assert(getParsedKind() == AT_TypeTagForDatatype && "Not a type_tag_for_datatype attribute"); return getTypeTagForDatatypeDataSlot().LayoutCompatible; } bool getMustBeNull() const { - assert(getKind() == AT_TypeTagForDatatype && + assert(getParsedKind() == AT_TypeTagForDatatype && "Not a type_tag_for_datatype attribute"); return getTypeTagForDatatypeDataSlot().MustBeNull; } @@ -570,11 +490,6 @@ public: return MacroExpansionLoc; } - /// Get an index into the attribute spelling list - /// defined in Attr.td. This index is used by an attribute - /// to pretty print itself. - unsigned getAttributeSpellingListIndex() const; - bool isTargetSpecificAttr() const; bool isTypeAttr() const; bool isStmtAttr() const; @@ -603,7 +518,7 @@ public: /// If this is an OpenCL addr space attribute returns its representation /// in LangAS, otherwise returns default addr space. LangAS asOpenCLLangAS() const { - switch (getKind()) { + switch (getParsedKind()) { case ParsedAttr::AT_OpenCLConstantAddressSpace: return LangAS::opencl_constant; case ParsedAttr::AT_OpenCLGlobalAddressSpace: @@ -618,6 +533,8 @@ public: return LangAS::Default; } } + + AttributeCommonInfo::Kind getKind() const { return getParsedKind(); } }; class AttributePool; @@ -889,8 +806,9 @@ public: } bool hasAttribute(ParsedAttr::Kind K) const { - return llvm::any_of( - AttrList, [K](const ParsedAttr *AL) { return AL->getKind() == K; }); + return llvm::any_of(AttrList, [K](const ParsedAttr *AL) { + return AL->getParsedKind() == K; + }); } private: @@ -1038,28 +956,28 @@ enum AttributeDeclKind { inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, const ParsedAttr &At) { - DB.AddTaggedVal(reinterpret_cast(At.getName()), + DB.AddTaggedVal(reinterpret_cast(At.getAttrName()), DiagnosticsEngine::ak_identifierinfo); return DB; } inline const PartialDiagnostic &operator<<(const PartialDiagnostic &PD, const ParsedAttr &At) { - PD.AddTaggedVal(reinterpret_cast(At.getName()), + PD.AddTaggedVal(reinterpret_cast(At.getAttrName()), DiagnosticsEngine::ak_identifierinfo); return PD; } inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, const ParsedAttr *At) { - DB.AddTaggedVal(reinterpret_cast(At->getName()), + DB.AddTaggedVal(reinterpret_cast(At->getAttrName()), DiagnosticsEngine::ak_identifierinfo); return DB; } inline const PartialDiagnostic &operator<<(const PartialDiagnostic &PD, const ParsedAttr *At) { - PD.AddTaggedVal(reinterpret_cast(At->getName()), + PD.AddTaggedVal(reinterpret_cast(At->getAttrName()), DiagnosticsEngine::ak_identifierinfo); return PD; } diff --git a/include/clang/Sema/ScopeInfo.h b/include/clang/Sema/ScopeInfo.h index ea2595113d5..4f7534f9ef1 100644 --- a/include/clang/Sema/ScopeInfo.h +++ b/include/clang/Sema/ScopeInfo.h @@ -756,13 +756,16 @@ public: unsigned short CapRegionKind; unsigned short OpenMPLevel; + unsigned short OpenMPCaptureLevel; CapturedRegionScopeInfo(DiagnosticsEngine &Diag, Scope *S, CapturedDecl *CD, RecordDecl *RD, ImplicitParamDecl *Context, - CapturedRegionKind K, unsigned OpenMPLevel) + CapturedRegionKind K, unsigned OpenMPLevel, + unsigned OpenMPCaptureLevel) : CapturingScopeInfo(Diag, ImpCap_CapturedRegion), TheCapturedDecl(CD), TheRecordDecl(RD), TheScope(S), - ContextParam(Context), CapRegionKind(K), OpenMPLevel(OpenMPLevel) { + ContextParam(Context), CapRegionKind(K), OpenMPLevel(OpenMPLevel), + OpenMPCaptureLevel(OpenMPCaptureLevel) { Kind = SK_CapturedRegion; } @@ -817,6 +820,9 @@ public: /// Whether the lambda contains an unexpanded parameter pack. bool ContainsUnexpandedParameterPack = false; + /// Packs introduced by this lambda, if any. + SmallVector LocalPacks; + /// If this is a generic lambda, use this as the depth of /// each 'auto' parameter, during initial AST construction. unsigned AutoTemplateParameterDepth = 0; diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h index af762f74d74..a911c61a07f 100644 --- a/include/clang/Sema/Sema.h +++ b/include/clang/Sema/Sema.h @@ -57,6 +57,7 @@ #include #include #include +#include #include namespace llvm { @@ -158,6 +159,8 @@ namespace clang { class OMPClause; struct OMPVarListLocTy; struct OverloadCandidate; + enum class OverloadCandidateParamOrder : char; + enum OverloadCandidateRewriteKind : unsigned; class OverloadCandidateSet; class OverloadExpr; class ParenListExpr; @@ -405,13 +408,20 @@ public: /// Source location for newly created implicit MSInheritanceAttrs SourceLocation ImplicitMSInheritanceAttrLoc; + /// Holds TypoExprs that are created from `createDelayedTypo`. This is used by + /// `TransformTypos` in order to keep track of any TypoExprs that are created + /// recursively during typo correction and wipe them away if the correction + /// fails. + llvm::SmallVector TypoExprs; + /// pragma clang section kind enum PragmaClangSectionKind { PCSK_Invalid = 0, PCSK_BSS = 1, PCSK_Data = 2, PCSK_Rodata = 3, - PCSK_Text = 4 + PCSK_Text = 4, + PCSK_Relro = 5 }; enum PragmaClangSectionAction { @@ -432,6 +442,7 @@ public: PragmaClangSection PragmaClangBSSSection; PragmaClangSection PragmaClangDataSection; PragmaClangSection PragmaClangRodataSection; + PragmaClangSection PragmaClangRelroSection; PragmaClangSection PragmaClangTextSection; enum PragmaMsStackAction { @@ -1039,13 +1050,6 @@ public: /// suffice, e.g., in a default function argument. Decl *ManglingContextDecl; - /// The context information used to mangle lambda expressions - /// and block literals within this context. - /// - /// This mangling information is allocated lazily, since most contexts - /// do not have lambda expressions or block literals. - std::unique_ptr MangleNumbering; - /// If we are processing a decltype type, a set of call expressions /// for which we have deferred checking the completeness of the return type. SmallVector DelayedDecltypeCalls; @@ -1056,6 +1060,11 @@ public: llvm::SmallPtrSet PossibleDerefs; + /// Expressions appearing as the LHS of a volatile assignment in this + /// context. We produce a warning for these when popping the context if + /// they are not discarded-value expressions nor unevaluated operands. + SmallVector VolatileAssignmentLHSs; + /// \brief Describes whether we are in an expression constext which we have /// to handle differently. enum ExpressionKind { @@ -1069,12 +1078,7 @@ public: ExpressionKind ExprContext) : Context(Context), ParentCleanup(ParentCleanup), NumCleanupObjects(NumCleanupObjects), NumTypos(0), - ManglingContextDecl(ManglingContextDecl), MangleNumbering(), - ExprContext(ExprContext) {} - - /// Retrieve the mangling numbering context, used to consistently - /// number constructs like lambdas for mangling. - MangleNumberingContext &getMangleNumberingContext(ASTContext &Ctx); + ManglingContextDecl(ManglingContextDecl), ExprContext(ExprContext) {} bool isUnevaluated() const { return Context == ExpressionEvaluationContext::Unevaluated || @@ -1093,15 +1097,12 @@ public: void WarnOnPendingNoDerefs(ExpressionEvaluationContextRecord &Rec); /// Compute the mangling number context for a lambda expression or - /// block literal. + /// block literal. Also return the extra mangling decl if any. /// /// \param DC - The DeclContext containing the lambda expression or /// block literal. - /// \param[out] ManglingContextDecl - Returns the ManglingContextDecl - /// associated with the context, if relevant. - MangleNumberingContext *getCurrentMangleNumberContext( - const DeclContext *DC, - Decl *&ManglingContextDecl); + std::tuple + getCurrentMangleNumberContext(const DeclContext *DC); /// SpecialMemberOverloadResult - The overloading result for a special member @@ -1272,6 +1273,8 @@ public: void addImplicitTypedef(StringRef Name, QualType T); + bool WarnedStackExhausted = false; + public: Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer, TranslationUnitKind TUKind = TU_Complete, @@ -1303,6 +1306,16 @@ public: void PrintStats() const; + /// Warn that the stack is nearly exhausted. + void warnStackExhausted(SourceLocation Loc); + + /// Run some code with "sufficient" stack space. (Currently, at least 256K is + /// guaranteed). Produces a warning if we're low on stack space and allocates + /// more in that case. Use this in code that may recurse deeply (for example, + /// in template instantiation) to avoid stack overflow. + void runWithSufficientStackSpace(SourceLocation Loc, + llvm::function_ref Fn); + /// Helper class that creates diagnostics with optional /// template instantiation stacks. /// @@ -1415,8 +1428,8 @@ public: void RecordParsingTemplateParameterDepth(unsigned Depth); void PushCapturedRegionScope(Scope *RegionScope, CapturedDecl *CD, - RecordDecl *RD, - CapturedRegionKind K); + RecordDecl *RD, CapturedRegionKind K, + unsigned OpenMPCaptureLevel = 0); /// Custom deleter to allow FunctionScopeInfos to be kept alive for a short /// time after they've been popped. @@ -1456,6 +1469,11 @@ public: /// Retrieve the current block, if any. sema::BlockScopeInfo *getCurBlock(); + /// Get the innermost lambda enclosing the current location, if any. This + /// looks through intervening non-lambda scopes such as local functions and + /// blocks. + sema::LambdaScopeInfo *getEnclosingLambda() const; + /// Retrieve the current lambda scope info, if any. /// \param IgnoreNonLambdaCapturingScope true if should find the top-most /// lambda scope info ignoring all inner capturing scopes that are not @@ -1499,6 +1517,8 @@ public: QualType BuildAddressSpaceAttr(QualType &T, Expr *AddrSpace, SourceLocation AttrLoc); + bool CheckQualifiedFunctionForTypeId(QualType T, SourceLocation Loc); + bool CheckFunctionReturnType(QualType T, SourceLocation Loc); /// Build a function type. @@ -1621,7 +1641,7 @@ public: template void emit(const SemaDiagnosticBuilder &DB, - llvm::index_sequence) const { + std::index_sequence) const { // Apply all tuple elements to the builder in order. bool Dummy[] = {false, (DB << getPrintable(std::get(Args)))...}; (void)Dummy; @@ -1635,7 +1655,7 @@ public: void diagnose(Sema &S, SourceLocation Loc, QualType T) override { const SemaDiagnosticBuilder &DB = S.Diag(Loc, DiagID); - emit(DB, llvm::index_sequence_for()); + emit(DB, std::index_sequence_for()); DB << T; } }; @@ -1839,29 +1859,52 @@ public: /// Describes the result of the name lookup and resolution performed /// by \c ClassifyName(). enum NameClassificationKind { + /// This name is not a type or template in this context, but might be + /// something else. NC_Unknown, + /// Classification failed; an error has been produced. NC_Error, + /// The name has been typo-corrected to a keyword. NC_Keyword, + /// The name was classified as a type. NC_Type, - NC_Expression, - NC_NestedNameSpecifier, + /// The name was classified as a specific non-type, non-template + /// declaration. ActOnNameClassifiedAsNonType should be called to + /// convert the declaration to an expression. + NC_NonType, + /// The name was classified as an ADL-only function name. + /// ActOnNameClassifiedAsUndeclaredNonType should be called to convert the + /// result to an expression. + NC_UndeclaredNonType, + /// The name denotes a member of a dependent type that could not be + /// resolved. ActOnNameClassifiedAsDependentNonType should be called to + /// convert the result to an expression. + NC_DependentNonType, + /// The name was classified as a non-type, and an expression representing + /// that name has been formed. + NC_ContextIndependentExpr, + /// The name was classified as a template whose specializations are types. NC_TypeTemplate, + /// The name was classified as a variable template name. NC_VarTemplate, + /// The name was classified as a function template name. NC_FunctionTemplate, + /// The name was classified as an ADL-only function template name. NC_UndeclaredTemplate, }; class NameClassification { NameClassificationKind Kind; - ExprResult Expr; - TemplateName Template; - ParsedType Type; + union { + ExprResult Expr; + NamedDecl *NonTypeDecl; + TemplateName Template; + ParsedType Type; + }; explicit NameClassification(NameClassificationKind Kind) : Kind(Kind) {} public: - NameClassification(ExprResult Expr) : Kind(NC_Expression), Expr(Expr) {} - NameClassification(ParsedType Type) : Kind(NC_Type), Type(Type) {} NameClassification(const IdentifierInfo *Keyword) : Kind(NC_Keyword) {} @@ -1874,8 +1917,24 @@ public: return NameClassification(NC_Unknown); } - static NameClassification NestedNameSpecifier() { - return NameClassification(NC_NestedNameSpecifier); + static NameClassification ContextIndependentExpr(ExprResult E) { + NameClassification Result(NC_ContextIndependentExpr); + Result.Expr = E; + return Result; + } + + static NameClassification NonType(NamedDecl *D) { + NameClassification Result(NC_NonType); + Result.NonTypeDecl = D; + return Result; + } + + static NameClassification UndeclaredNonType() { + return NameClassification(NC_UndeclaredNonType); + } + + static NameClassification DependentNonType() { + return NameClassification(NC_DependentNonType); } static NameClassification TypeTemplate(TemplateName Name) { @@ -1904,14 +1963,19 @@ public: NameClassificationKind getKind() const { return Kind; } + ExprResult getExpression() const { + assert(Kind == NC_ContextIndependentExpr); + return Expr; + } + ParsedType getType() const { assert(Kind == NC_Type); return Type; } - ExprResult getExpression() const { - assert(Kind == NC_Expression); - return Expr; + NamedDecl *getNonTypeDecl() const { + assert(Kind == NC_NonType); + return NonTypeDecl; } TemplateName getTemplateName() const { @@ -1955,17 +2019,29 @@ public: /// \param NextToken The token following the identifier. Used to help /// disambiguate the name. /// - /// \param IsAddressOfOperand True if this name is the operand of a unary - /// address of ('&') expression, assuming it is classified as an - /// expression. - /// /// \param CCC The correction callback, if typo correction is desired. NameClassification ClassifyName(Scope *S, CXXScopeSpec &SS, IdentifierInfo *&Name, SourceLocation NameLoc, const Token &NextToken, - bool IsAddressOfOperand, CorrectionCandidateCallback *CCC = nullptr); + /// Act on the result of classifying a name as an undeclared (ADL-only) + /// non-type declaration. + ExprResult ActOnNameClassifiedAsUndeclaredNonType(IdentifierInfo *Name, + SourceLocation NameLoc); + /// Act on the result of classifying a name as an undeclared member of a + /// dependent base class. + ExprResult ActOnNameClassifiedAsDependentNonType(const CXXScopeSpec &SS, + IdentifierInfo *Name, + SourceLocation NameLoc, + bool IsAddressOfOperand); + /// Act on the result of classifying a name as a specific non-type + /// declaration. + ExprResult ActOnNameClassifiedAsNonType(Scope *S, const CXXScopeSpec &SS, + NamedDecl *Found, + SourceLocation NameLoc, + const Token &NextToken); + /// Describes the detailed kind of a template name. Used in diagnostics. enum class TemplateNameKindForDiagnostics { ClassTemplate, @@ -2076,8 +2152,16 @@ public: bool &AddToScope); bool AddOverriddenMethods(CXXRecordDecl *DC, CXXMethodDecl *MD); - bool CheckConstexprFunctionDecl(const FunctionDecl *FD); - bool CheckConstexprFunctionBody(const FunctionDecl *FD, Stmt *Body); + enum class CheckConstexprKind { + /// Diagnose issues that are non-constant or that are extensions. + Diagnose, + /// Identify whether this function satisfies the formal rules for constexpr + /// functions in the current lanugage mode (with no extensions). + CheckValid + }; + + bool CheckConstexprFunctionDefinition(const FunctionDecl *FD, + CheckConstexprKind Kind); void DiagnoseHiddenVirtualMethods(CXXMethodDecl *MD); void FindHiddenVirtualMethods(CXXMethodDecl *MD, @@ -2634,48 +2718,44 @@ public: }; /// Attribute merging methods. Return true if a new attribute was added. - AvailabilityAttr *mergeAvailabilityAttr( - NamedDecl *D, SourceRange Range, IdentifierInfo *Platform, bool Implicit, - VersionTuple Introduced, VersionTuple Deprecated, VersionTuple Obsoleted, - bool IsUnavailable, StringRef Message, bool IsStrict, - StringRef Replacement, AvailabilityMergeKind AMK, int Priority, - unsigned AttrSpellingListIndex); - TypeVisibilityAttr *mergeTypeVisibilityAttr(Decl *D, SourceRange Range, - TypeVisibilityAttr::VisibilityType Vis, - unsigned AttrSpellingListIndex); - VisibilityAttr *mergeVisibilityAttr(Decl *D, SourceRange Range, - VisibilityAttr::VisibilityType Vis, - unsigned AttrSpellingListIndex); - UuidAttr *mergeUuidAttr(Decl *D, SourceRange Range, - unsigned AttrSpellingListIndex, StringRef Uuid); - DLLImportAttr *mergeDLLImportAttr(Decl *D, SourceRange Range, - unsigned AttrSpellingListIndex); - DLLExportAttr *mergeDLLExportAttr(Decl *D, SourceRange Range, - unsigned AttrSpellingListIndex); + AvailabilityAttr * + mergeAvailabilityAttr(NamedDecl *D, const AttributeCommonInfo &CI, + IdentifierInfo *Platform, bool Implicit, + VersionTuple Introduced, VersionTuple Deprecated, + VersionTuple Obsoleted, bool IsUnavailable, + StringRef Message, bool IsStrict, StringRef Replacement, + AvailabilityMergeKind AMK, int Priority); + TypeVisibilityAttr * + mergeTypeVisibilityAttr(Decl *D, const AttributeCommonInfo &CI, + TypeVisibilityAttr::VisibilityType Vis); + VisibilityAttr *mergeVisibilityAttr(Decl *D, const AttributeCommonInfo &CI, + VisibilityAttr::VisibilityType Vis); + UuidAttr *mergeUuidAttr(Decl *D, const AttributeCommonInfo &CI, + StringRef Uuid); + DLLImportAttr *mergeDLLImportAttr(Decl *D, const AttributeCommonInfo &CI); + DLLExportAttr *mergeDLLExportAttr(Decl *D, const AttributeCommonInfo &CI); MSInheritanceAttr * - mergeMSInheritanceAttr(Decl *D, SourceRange Range, bool BestCase, - unsigned AttrSpellingListIndex, + mergeMSInheritanceAttr(Decl *D, const AttributeCommonInfo &CI, bool BestCase, MSInheritanceAttr::Spelling SemanticSpelling); - FormatAttr *mergeFormatAttr(Decl *D, SourceRange Range, + FormatAttr *mergeFormatAttr(Decl *D, const AttributeCommonInfo &CI, IdentifierInfo *Format, int FormatIdx, - int FirstArg, unsigned AttrSpellingListIndex); - SectionAttr *mergeSectionAttr(Decl *D, SourceRange Range, StringRef Name, - unsigned AttrSpellingListIndex); - CodeSegAttr *mergeCodeSegAttr(Decl *D, SourceRange Range, StringRef Name, - unsigned AttrSpellingListIndex); - AlwaysInlineAttr *mergeAlwaysInlineAttr(Decl *D, SourceRange Range, - IdentifierInfo *Ident, - unsigned AttrSpellingListIndex); - MinSizeAttr *mergeMinSizeAttr(Decl *D, SourceRange Range, - unsigned AttrSpellingListIndex); + int FirstArg); + SectionAttr *mergeSectionAttr(Decl *D, const AttributeCommonInfo &CI, + StringRef Name); + CodeSegAttr *mergeCodeSegAttr(Decl *D, const AttributeCommonInfo &CI, + StringRef Name); + AlwaysInlineAttr *mergeAlwaysInlineAttr(Decl *D, + const AttributeCommonInfo &CI, + const IdentifierInfo *Ident); + MinSizeAttr *mergeMinSizeAttr(Decl *D, const AttributeCommonInfo &CI); NoSpeculativeLoadHardeningAttr * mergeNoSpeculativeLoadHardeningAttr(Decl *D, const NoSpeculativeLoadHardeningAttr &AL); SpeculativeLoadHardeningAttr * mergeSpeculativeLoadHardeningAttr(Decl *D, const SpeculativeLoadHardeningAttr &AL); - OptimizeNoneAttr *mergeOptimizeNoneAttr(Decl *D, SourceRange Range, - unsigned AttrSpellingListIndex); + OptimizeNoneAttr *mergeOptimizeNoneAttr(Decl *D, + const AttributeCommonInfo &CI); InternalLinkageAttr *mergeInternalLinkageAttr(Decl *D, const ParsedAttr &AL); InternalLinkageAttr *mergeInternalLinkageAttr(Decl *D, const InternalLinkageAttr &AL); @@ -2786,6 +2866,9 @@ public: Expr *Value, bool AllowNRVO = true); + bool CanPerformAggregateInitializationForOverloadResolution( + const InitializedEntity &Entity, InitListExpr *From); + bool CanPerformCopyInitialization(const InitializedEntity &Entity, ExprResult Init); ExprResult PerformCopyInitialization(const InitializedEntity &Entity, @@ -2938,7 +3021,8 @@ public: bool AllowExplicit = true, bool AllowExplicitConversion = false, ADLCallKind IsADLCandidate = ADLCallKind::NotADL, - ConversionSequenceList EarlyConversions = None); + ConversionSequenceList EarlyConversions = None, + OverloadCandidateParamOrder PO = {}); void AddFunctionCandidates(const UnresolvedSetImpl &Functions, ArrayRef Args, OverloadCandidateSet &CandidateSet, @@ -2951,7 +3035,8 @@ public: Expr::Classification ObjectClassification, ArrayRef Args, OverloadCandidateSet& CandidateSet, - bool SuppressUserConversion = false); + bool SuppressUserConversion = false, + OverloadCandidateParamOrder PO = {}); void AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl, CXXRecordDecl *ActingContext, QualType ObjectType, @@ -2960,7 +3045,8 @@ public: OverloadCandidateSet& CandidateSet, bool SuppressUserConversions = false, bool PartialOverloading = false, - ConversionSequenceList EarlyConversions = None); + ConversionSequenceList EarlyConversions = None, + OverloadCandidateParamOrder PO = {}); void AddMethodTemplateCandidate(FunctionTemplateDecl *MethodTmpl, DeclAccessPair FoundDecl, CXXRecordDecl *ActingContext, @@ -2970,23 +3056,22 @@ public: ArrayRef Args, OverloadCandidateSet& CandidateSet, bool SuppressUserConversions = false, - bool PartialOverloading = false); + bool PartialOverloading = false, + OverloadCandidateParamOrder PO = {}); void AddTemplateOverloadCandidate( FunctionTemplateDecl *FunctionTemplate, DeclAccessPair FoundDecl, TemplateArgumentListInfo *ExplicitTemplateArgs, ArrayRef Args, OverloadCandidateSet &CandidateSet, bool SuppressUserConversions = false, bool PartialOverloading = false, bool AllowExplicit = true, - ADLCallKind IsADLCandidate = ADLCallKind::NotADL); - bool CheckNonDependentConversions(FunctionTemplateDecl *FunctionTemplate, - ArrayRef ParamTypes, - ArrayRef Args, - OverloadCandidateSet &CandidateSet, - ConversionSequenceList &Conversions, - bool SuppressUserConversions, - CXXRecordDecl *ActingContext = nullptr, - QualType ObjectType = QualType(), - Expr::Classification - ObjectClassification = {}); + ADLCallKind IsADLCandidate = ADLCallKind::NotADL, + OverloadCandidateParamOrder PO = {}); + bool CheckNonDependentConversions( + FunctionTemplateDecl *FunctionTemplate, ArrayRef ParamTypes, + ArrayRef Args, OverloadCandidateSet &CandidateSet, + ConversionSequenceList &Conversions, bool SuppressUserConversions, + CXXRecordDecl *ActingContext = nullptr, QualType ObjectType = QualType(), + Expr::Classification ObjectClassification = {}, + OverloadCandidateParamOrder PO = {}); void AddConversionCandidate( CXXConversionDecl *Conversion, DeclAccessPair FoundDecl, CXXRecordDecl *ActingContext, Expr *From, QualType ToType, @@ -3003,10 +3088,14 @@ public: const FunctionProtoType *Proto, Expr *Object, ArrayRef Args, OverloadCandidateSet& CandidateSet); + void AddNonMemberOperatorCandidates( + const UnresolvedSetImpl &Functions, ArrayRef Args, + OverloadCandidateSet &CandidateSet, + TemplateArgumentListInfo *ExplicitTemplateArgs = nullptr); void AddMemberOperatorCandidates(OverloadedOperatorKind Op, SourceLocation OpLoc, ArrayRef Args, - OverloadCandidateSet& CandidateSet, - SourceRange OpRange = SourceRange()); + OverloadCandidateSet &CandidateSet, + OverloadCandidateParamOrder PO = {}); void AddBuiltinCandidate(QualType *ParamTys, ArrayRef Args, OverloadCandidateSet& CandidateSet, bool IsAssignmentOperator = false, @@ -3022,9 +3111,10 @@ public: bool PartialOverloading = false); // Emit as a 'note' the specific overload candidate - void NoteOverloadCandidate(NamedDecl *Found, FunctionDecl *Fn, - QualType DestType = QualType(), - bool TakingAddress = false); + void NoteOverloadCandidate( + NamedDecl *Found, FunctionDecl *Fn, + OverloadCandidateRewriteKind RewriteKind = OverloadCandidateRewriteKind(), + QualType DestType = QualType(), bool TakingAddress = false); // Emit as a series of 'note's all template and non-templates identified by // the expression Expr @@ -3156,7 +3246,8 @@ public: BinaryOperatorKind Opc, const UnresolvedSetImpl &Fns, Expr *LHS, Expr *RHS, - bool RequiresADL = true); + bool RequiresADL = true, + bool AllowRewrittenCandidates = true); ExprResult CreateOverloadedArraySubscriptExpr(SourceLocation LLoc, SourceLocation RLoc, @@ -3384,6 +3475,7 @@ public: LookupNameKind NameKind, RedeclarationKind Redecl = NotForRedeclaration); + bool LookupBuiltin(LookupResult &R); bool LookupName(LookupResult &R, Scope *S, bool AllowBuiltinCreation = false); bool LookupQualifiedName(LookupResult &R, DeclContext *LookupCtx, @@ -3426,6 +3518,19 @@ public: bool DiagnoseMissing); bool isKnownName(StringRef name); + /// Status of the function emission on the CUDA/HIP/OpenMP host/device attrs. + enum class FunctionEmissionStatus { + Emitted, + CUDADiscarded, // Discarded due to CUDA/HIP hostness + OMPDiscarded, // Discarded due to OpenMP hostness + TemplateDiscarded, // Discarded due to uninstantiated templates + Unknown, + }; + FunctionEmissionStatus getEmissionStatus(FunctionDecl *Decl); + + // Whether the callee should be ignored in CUDA/HIP/OpenMP host/device check. + bool shouldIgnoreInHostDeviceCheck(FunctionDecl *Callee); + void ArgumentDependentLookup(DeclarationName Name, SourceLocation Loc, ArrayRef Args, ADLResult &Functions); @@ -4007,7 +4112,8 @@ public: typedef std::pair CapturedParamNameType; void ActOnCapturedRegionStart(SourceLocation Loc, Scope *CurScope, CapturedRegionKind Kind, - ArrayRef Params); + ArrayRef Params, + unsigned OpenMPCaptureLevel = 0); StmtResult ActOnCapturedRegionEnd(Stmt *S); void ActOnCapturedRegionError(); RecordDecl *CreateCapturedStmtRecordDecl(CapturedDecl *&CD, @@ -4215,6 +4321,9 @@ public: ExprResult TransformToPotentiallyEvaluated(Expr *E); ExprResult HandleExprEvaluationContextForTypeof(Expr *E); + ExprResult CheckUnevaluatedOperand(Expr *E); + void CheckUnusedVolatileAssignment(Expr *E); + ExprResult ActOnConstantExpression(ExprResult Res); // Functions for marking a declaration referenced. These functions also @@ -4349,6 +4458,10 @@ public: TemplateArgumentListInfo *ExplicitTemplateArgs = nullptr, ArrayRef Args = None, TypoExpr **Out = nullptr); + DeclResult LookupIvarInObjCMethod(LookupResult &Lookup, Scope *S, + IdentifierInfo *II); + ExprResult BuildIvarRefExpr(Scope *S, SourceLocation Loc, ObjCIvarDecl *IV); + ExprResult LookupInObjCMethod(LookupResult &LookUp, Scope *S, IdentifierInfo *II, bool AllowBuiltinCreation=false); @@ -4606,6 +4719,12 @@ public: MultiExprArg ArgExprs, SourceLocation RParenLoc, Expr *ExecConfig = nullptr, bool IsExecConfig = false); + enum class AtomicArgumentOrder { API, AST }; + ExprResult + BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, + SourceLocation RParenLoc, MultiExprArg Args, + AtomicExpr::AtomicOp Op, + AtomicArgumentOrder ArgOrder = AtomicArgumentOrder::API); ExprResult BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, SourceLocation LParenLoc, ArrayRef Arg, SourceLocation RParenLoc, @@ -4646,8 +4765,12 @@ public: MultiExprArg InitArgList, SourceLocation RBraceLoc); + ExprResult BuildInitList(SourceLocation LBraceLoc, + MultiExprArg InitArgList, + SourceLocation RBraceLoc); + ExprResult ActOnDesignatedInitializer(Designation &Desig, - SourceLocation Loc, + SourceLocation EqualOrColonLoc, bool GNUSyntax, ExprResult Init); @@ -5803,12 +5926,17 @@ public: LambdaCaptureDefault CaptureDefault); /// Start the definition of a lambda expression. - CXXMethodDecl * - startLambdaDefinition(CXXRecordDecl *Class, SourceRange IntroducerRange, - TypeSourceInfo *MethodType, SourceLocation EndLoc, - ArrayRef Params, - ConstexprSpecKind ConstexprKind, - Optional> Mangling = None); + CXXMethodDecl *startLambdaDefinition(CXXRecordDecl *Class, + SourceRange IntroducerRange, + TypeSourceInfo *MethodType, + SourceLocation EndLoc, + ArrayRef Params, + ConstexprSpecKind ConstexprKind); + + /// Number lambda for linkage purposes if necessary. + void handleLambdaNumbering( + CXXRecordDecl *Class, CXXMethodDecl *Method, + Optional> Mangling = None); /// Endow the lambda scope info with the relevant properties. void buildLambdaScope(sema::LambdaScopeInfo *LSI, @@ -5936,6 +6064,21 @@ public: CXXConversionDecl *Conv, Expr *Src); + /// Check whether the given expression is a valid constraint expression. + /// A diagnostic is emitted if it is not, and false is returned. + bool CheckConstraintExpression(Expr *CE); + + bool CalculateConstraintSatisfaction(ConceptDecl *NamedConcept, + MultiLevelTemplateArgumentList &MLTAL, + Expr *ConstraintExpr, + bool &IsSatisfied); + + /// Check that the associated constraints of a template declaration match the + /// associated constraints of an older declaration of which it is a + /// redeclaration. + bool CheckRedeclarationConstraintMatch(TemplateParameterList *Old, + TemplateParameterList *New); + // ParseObjCStringLiteral - Parse Objective-C string literals. ExprResult ParseObjCStringLiteral(SourceLocation *AtLocs, ArrayRef Strings); @@ -6150,6 +6293,17 @@ public: ClassTemplateSpecializationDecl *BaseTemplateSpec, SourceLocation BaseLoc); + /// Add gsl::Pointer attribute to std::container::iterator + /// \param ND The declaration that introduces the name + /// std::container::iterator. \param UnderlyingRecord The record named by ND. + void inferGslPointerAttribute(NamedDecl *ND, CXXRecordDecl *UnderlyingRecord); + + /// Add [[gsl::Owner]] and [[gsl::Pointer]] attributes for std:: types. + void inferGslOwnerPointerAttribute(CXXRecordDecl *Record); + + /// Add [[gsl::Pointer]] attributes for std:: types. + void inferGslPointerAttribute(TypedefNameDecl *TD); + void CheckCompletedCXXClass(CXXRecordDecl *Record); /// Check that the C++ class annoated with "trivial_abi" satisfies all the @@ -6596,9 +6750,9 @@ public: ExprResult CheckConceptTemplateId(const CXXScopeSpec &SS, - const DeclarationNameInfo &NameInfo, - ConceptDecl *Template, - SourceLocation TemplateLoc, + SourceLocation TemplateKWLoc, + SourceLocation ConceptNameLoc, NamedDecl *FoundDecl, + ConceptDecl *NamedConcept, const TemplateArgumentListInfo *TemplateArgs); void diagnoseMissingTemplateArguments(TemplateName Name, SourceLocation Loc); @@ -7517,6 +7671,18 @@ public: /// member). DefiningSynthesizedFunction, + // We are checking the constraints associated with a constrained entity or + // the constraint expression of a concept. This includes the checks that + // atomic constraints have the type 'bool' and that they can be constant + // evaluated. + ConstraintsCheck, + + // We are substituting template arguments into a constraint expression. + ConstraintSubstitution, + + /// We are rewriting a comparison operator in terms of an operator<=>. + RewritingOperatorAsSpaceship, + /// Added for Template instantiation observation. /// Memoization means we are _not_ instantiating a template because /// it is already instantiated (but we entered a context where we @@ -7777,6 +7943,23 @@ public: ArrayRef TemplateArgs, SourceRange InstantiationRange); + struct ConstraintsCheck {}; + /// \brief Note that we are checking the constraints associated with some + /// constrained entity (a concept declaration or a template with associated + /// constraints). + InstantiatingTemplate(Sema &SemaRef, SourceLocation PointOfInstantiation, + ConstraintsCheck, TemplateDecl *Template, + ArrayRef TemplateArgs, + SourceRange InstantiationRange); + + struct ConstraintSubstitution {}; + /// \brief Note that we are checking a constraint expression associated + /// with a template declaration or as part of the satisfaction check of a + /// concept. + InstantiatingTemplate(Sema &SemaRef, SourceLocation PointOfInstantiation, + ConstraintSubstitution, TemplateDecl *Template, + sema::TemplateDeductionInfo &DeductionInfo, + SourceRange InstantiationRange); /// Note that we have finished instantiating this template. void Clear(); @@ -8225,6 +8408,11 @@ public: LocalInstantiationScope *StartingScope, bool InstantiatingVarTemplate = false, VarTemplateSpecializationDecl *PrevVTSD = nullptr); + + VarDecl *getVarTemplateSpecialization( + VarTemplateDecl *VarTempl, const TemplateArgumentListInfo *TemplateArgs, + const DeclarationNameInfo &MemberNameInfo, SourceLocation TemplateKWLoc); + void InstantiateVariableInitializer( VarDecl *Var, VarDecl *OldVar, const MultiLevelTemplateArgumentList &TemplateArgs); @@ -8844,51 +9032,50 @@ public: void AddOptnoneAttributeIfNoConflicts(FunctionDecl *FD, SourceLocation Loc); /// AddAlignedAttr - Adds an aligned attribute to a particular declaration. - void AddAlignedAttr(SourceRange AttrRange, Decl *D, Expr *E, - unsigned SpellingListIndex, bool IsPackExpansion); - void AddAlignedAttr(SourceRange AttrRange, Decl *D, TypeSourceInfo *T, - unsigned SpellingListIndex, bool IsPackExpansion); + void AddAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E, + bool IsPackExpansion); + void AddAlignedAttr(Decl *D, const AttributeCommonInfo &CI, TypeSourceInfo *T, + bool IsPackExpansion); /// AddAssumeAlignedAttr - Adds an assume_aligned attribute to a particular /// declaration. - void AddAssumeAlignedAttr(SourceRange AttrRange, Decl *D, Expr *E, Expr *OE, - unsigned SpellingListIndex); + void AddAssumeAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E, + Expr *OE); /// AddAllocAlignAttr - Adds an alloc_align attribute to a particular /// declaration. - void AddAllocAlignAttr(SourceRange AttrRange, Decl *D, Expr *ParamExpr, - unsigned SpellingListIndex); + void AddAllocAlignAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *ParamExpr); /// AddAlignValueAttr - Adds an align_value attribute to a particular /// declaration. - void AddAlignValueAttr(SourceRange AttrRange, Decl *D, Expr *E, - unsigned SpellingListIndex); + void AddAlignValueAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E); /// AddLaunchBoundsAttr - Adds a launch_bounds attribute to a particular /// declaration. - void AddLaunchBoundsAttr(SourceRange AttrRange, Decl *D, Expr *MaxThreads, - Expr *MinBlocks, unsigned SpellingListIndex); + void AddLaunchBoundsAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *MaxThreads, Expr *MinBlocks); /// AddModeAttr - Adds a mode attribute to a particular declaration. - void AddModeAttr(SourceRange AttrRange, Decl *D, IdentifierInfo *Name, - unsigned SpellingListIndex, bool InInstantiation = false); + void AddModeAttr(Decl *D, const AttributeCommonInfo &CI, IdentifierInfo *Name, + bool InInstantiation = false); - void AddParameterABIAttr(SourceRange AttrRange, Decl *D, - ParameterABI ABI, unsigned SpellingListIndex); + void AddParameterABIAttr(Decl *D, const AttributeCommonInfo &CI, + ParameterABI ABI); enum class RetainOwnershipKind {NS, CF, OS}; - void AddXConsumedAttr(Decl *D, SourceRange SR, unsigned SpellingIndex, + void AddXConsumedAttr(Decl *D, const AttributeCommonInfo &CI, RetainOwnershipKind K, bool IsTemplateInstantiation); /// addAMDGPUFlatWorkGroupSizeAttr - Adds an amdgpu_flat_work_group_size /// attribute to a particular declaration. - void addAMDGPUFlatWorkGroupSizeAttr(SourceRange AttrRange, Decl *D, Expr *Min, - Expr *Max, unsigned SpellingListIndex); + void addAMDGPUFlatWorkGroupSizeAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *Min, Expr *Max); /// addAMDGPUWavePersEUAttr - Adds an amdgpu_waves_per_eu attribute to a /// particular declaration. - void addAMDGPUWavesPerEUAttr(SourceRange AttrRange, Decl *D, Expr *Min, - Expr *Max, unsigned SpellingListIndex); + void addAMDGPUWavesPerEUAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *Min, Expr *Max); bool checkNSReturnsRetainedReturnType(SourceLocation loc, QualType type); @@ -9002,6 +9189,10 @@ private: void adjustOpenMPTargetScopeIndex(unsigned &FunctionScopesIndex, unsigned Level) const; + /// Returns the number of scopes associated with the construct on the given + /// OpenMP level. + int getNumberOfConstructScopes(unsigned Level) const; + /// Push new OpenMP function region for non-capturing function. void pushOpenMPFunctionRegion(); @@ -9009,12 +9200,21 @@ private: void popOpenMPFunctionRegion(const sema::FunctionScopeInfo *OldFSI); /// Check whether we're allowed to call Callee from the current function. - void checkOpenMPDeviceFunction(SourceLocation Loc, FunctionDecl *Callee); + void checkOpenMPDeviceFunction(SourceLocation Loc, FunctionDecl *Callee, + bool CheckForDelayedContext = true); + + /// Check whether we're allowed to call Callee from the current function. + void checkOpenMPHostFunction(SourceLocation Loc, FunctionDecl *Callee, + bool CheckCaller = true); /// Check if the expression is allowed to be used in expressions for the /// OpenMP devices. void checkOpenMPDeviceExpr(const Expr *E); + /// Finishes analysis of the deferred functions calls that may be declared as + /// host/nohost during device/host compilation. + void finalizeOpenMPDelayedAnalysis(); + /// Checks if a type or a declaration is disabled due to the owning extension /// being disabled, and emits diagnostic messages if it is disabled. /// \param D type or declaration to be checked. @@ -9030,7 +9230,39 @@ private: MapT &Map, unsigned Selector = 0, SourceRange SrcRange = SourceRange()); + /// Marks all the functions that might be required for the currently active + /// OpenMP context. + void markOpenMPDeclareVariantFuncsReferenced(SourceLocation Loc, + FunctionDecl *Func, + bool MightBeOdrUse); + public: + /// Struct to store the context selectors info for declare variant directive. + struct OpenMPDeclareVariantCtsSelectorData { + OMPDeclareVariantAttr::CtxSelectorSetType CtxSet = + OMPDeclareVariantAttr::CtxSetUnknown; + OMPDeclareVariantAttr::CtxSelectorType Ctx = + OMPDeclareVariantAttr::CtxUnknown; + MutableArrayRef ImplVendors; + ExprResult CtxScore; + explicit OpenMPDeclareVariantCtsSelectorData() = default; + explicit OpenMPDeclareVariantCtsSelectorData( + OMPDeclareVariantAttr::CtxSelectorSetType CtxSet, + OMPDeclareVariantAttr::CtxSelectorType Ctx, + MutableArrayRef ImplVendors, ExprResult CtxScore) + : CtxSet(CtxSet), Ctx(Ctx), ImplVendors(ImplVendors), + CtxScore(CtxScore) {} + }; + + /// Checks if the variant/multiversion functions are compatible. + bool areMultiversionVariantFunctionsCompatible( + const FunctionDecl *OldFD, const FunctionDecl *NewFD, + const PartialDiagnostic &NoProtoDiagID, + const PartialDiagnosticAt &NoteCausedDiagIDAt, + const PartialDiagnosticAt &NoSupportDiagIDAt, + const PartialDiagnosticAt &DiffDiagIDAt, bool TemplatesSupported, + bool ConstexprSupported, bool CLinkageMayDiffer); + /// Function tries to capture lambda's captured variables in the OpenMP region /// before the original lambda is captured. void tryCaptureOpenMPLambdas(ValueDecl *V); @@ -9039,7 +9271,9 @@ public: /// reference. /// \param Level Relative level of nested OpenMP construct for that the check /// is performed. - bool isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level) const; + /// \param OpenMPCaptureLevel Capture level within an OpenMP construct. + bool isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level, + unsigned OpenMPCaptureLevel) const; /// Check if the specified variable is used in one of the private /// clauses (private, firstprivate, lastprivate, reduction etc.) in OpenMP @@ -9053,6 +9287,10 @@ public: /// construct. void startOpenMPLoop(); + /// If the current region is a range loop-based region, mark the start of the + /// loop construct. + void startOpenMPCXXRangeFor(); + /// Check if the specified variable is used in 'private' clause. /// \param Level Relative level of nested OpenMP construct for that the check /// is performed. @@ -9159,11 +9397,16 @@ public: bool ActOnStartOpenMPDeclareTargetDirective(SourceLocation Loc); /// Called at the end of target region i.e. '#pragme omp end declare target'. void ActOnFinishOpenMPDeclareTargetDirective(); + /// Searches for the provided declaration name for OpenMP declare target + /// directive. + NamedDecl * + lookupOpenMPDeclareTargetName(Scope *CurScope, CXXScopeSpec &ScopeSpec, + const DeclarationNameInfo &Id, + NamedDeclSetType &SameDirectiveDecls); /// Called on correct id-expression from the '#pragma omp declare target'. - void ActOnOpenMPDeclareTargetName(Scope *CurScope, CXXScopeSpec &ScopeSpec, - const DeclarationNameInfo &Id, + void ActOnOpenMPDeclareTargetName(NamedDecl *ND, SourceLocation Loc, OMPDeclareTargetDeclAttr::MapTypeTy MT, - NamedDeclSetType &SameDirectiveDecls); + OMPDeclareTargetDeclAttr::DevTypeTy DT); /// Check declaration inside target region. void checkDeclIsAllowedInOpenMPTarget(Expr *E, Decl *D, @@ -9348,6 +9591,21 @@ public: StmtResult ActOnOpenMPTaskLoopSimdDirective( ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp master taskloop' after parsing of the + /// associated statement. + StmtResult ActOnOpenMPMasterTaskLoopDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp master taskloop simd' after parsing of + /// the associated statement. + StmtResult ActOnOpenMPMasterTaskLoopSimdDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp parallel master taskloop' after + /// parsing of the associated statement. + StmtResult ActOnOpenMPParallelMasterTaskLoopDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA); /// Called on well-formed '\#pragma omp distribute' after parsing /// of the associated statement. StmtResult @@ -9448,6 +9706,29 @@ public: ArrayRef Alignments, ArrayRef Linears, ArrayRef LinModifiers, ArrayRef Steps, SourceRange SR); + /// Checks '\#pragma omp declare variant' variant function and original + /// functions after parsing of the associated method/function. + /// \param DG Function declaration to which declare variant directive is + /// applied to. + /// \param VariantRef Expression that references the variant function, which + /// must be used instead of the original one, specified in \p DG. + /// \returns None, if the function/variant function are not compatible with + /// the pragma, pair of original function/variant ref expression otherwise. + Optional> checkOpenMPDeclareVariantFunction( + DeclGroupPtrTy DG, Expr *VariantRef, SourceRange SR); + + /// Called on well-formed '\#pragma omp declare variant' after parsing of + /// the associated method/function. + /// \param FD Function declaration to which declare variant directive is + /// applied to. + /// \param VariantRef Expression that references the variant function, which + /// must be used instead of the original one, specified in \p DG. + /// \param Data Set of context-specific data for the specified context + /// selector. + void ActOnOpenMPDeclareVariantDirective( + FunctionDecl *FD, Expr *VariantRef, SourceRange SR, + const Sema::OpenMPDeclareVariantCtsSelectorData &Data); + OMPClause *ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, SourceLocation StartLoc, @@ -10051,6 +10332,7 @@ public: QualType CheckShiftOperands( // C99 6.5.7 ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc, bool IsCompAssign = false); + void CheckPtrComparisonWithNullChar(ExprResult &E, ExprResult &NullE); QualType CheckCompareOperands( // C99 6.5.8/9 ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc); @@ -10137,11 +10419,11 @@ public: Ref_Compatible }; - ReferenceCompareResult CompareReferenceRelationship(SourceLocation Loc, - QualType T1, QualType T2, - bool &DerivedToBase, - bool &ObjCConversion, - bool &ObjCLifetimeConversion); + ReferenceCompareResult + CompareReferenceRelationship(SourceLocation Loc, QualType T1, QualType T2, + bool &DerivedToBase, bool &ObjCConversion, + bool &ObjCLifetimeConversion, + bool &FunctionConversion); ExprResult checkUnknownAnyCast(SourceRange TypeRange, QualType CastType, Expr *CastExpr, CastKind &CastKind, @@ -10546,6 +10828,21 @@ public: /// // Otherwise, continue parsing as normal. DeviceDiagBuilder diagIfOpenMPDeviceCode(SourceLocation Loc, unsigned DiagID); + /// Creates a DeviceDiagBuilder that emits the diagnostic if the current + /// context is "used as host code". + /// + /// - If CurContext is a `declare target` function or it is known that the + /// function is emitted for the host, emits the diagnostics immediately. + /// - If CurContext is a non-host function, just ignore it. + /// + /// Example usage: + /// + /// // Variable-length arrays are not allowed in NVPTX device code. + /// if (diagIfOpenMPHostode(Loc, diag::err_vla_unsupported)) + /// return ExprError(); + /// // Otherwise, continue parsing as normal. + DeviceDiagBuilder diagIfOpenMPHostCode(SourceLocation Loc, unsigned DiagID); + DeviceDiagBuilder targetDiag(SourceLocation Loc, unsigned DiagID); enum CUDAFunctionTarget { @@ -10907,6 +11204,7 @@ private: bool CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckAArch64BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); + bool CheckBPFBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckHexagonBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckHexagonBuiltinCpu(unsigned BuiltinID, CallExpr *TheCall); bool CheckHexagonBuiltinArgument(unsigned BuiltinID, CallExpr *TheCall); @@ -11165,6 +11463,7 @@ public: // Emitting members of dllexported classes is delayed until the class // (including field initializers) is fully parsed. SmallVector DelayedDllExportClasses; + SmallVector DelayedDllExportMemberFunctions; private: class SavePendingParsedClassStateRAII { diff --git a/include/clang/Sema/SemaInternal.h b/include/clang/Sema/SemaInternal.h index dfb34daa14d..cdaf7b70a92 100644 --- a/include/clang/Sema/SemaInternal.h +++ b/include/clang/Sema/SemaInternal.h @@ -97,7 +97,7 @@ public: bool EnteringContext) : Typo(TypoName.getName().getAsIdentifierInfo()), CurrentTCIndex(0), SavedTCIndex(0), SemaRef(SemaRef), S(S), - SS(SS ? llvm::make_unique(*SS) : nullptr), + SS(SS ? std::make_unique(*SS) : nullptr), CorrectionValidator(std::move(CCC)), MemberContext(MemberContext), Result(SemaRef, TypoName, LookupKind), Namespaces(SemaRef.Context, SemaRef.CurContext, SS), diff --git a/include/clang/Sema/TypoCorrection.h b/include/clang/Sema/TypoCorrection.h index b49a96c0b93..e0f8d152dbe 100644 --- a/include/clang/Sema/TypoCorrection.h +++ b/include/clang/Sema/TypoCorrection.h @@ -356,7 +356,7 @@ public: : CorrectionCandidateCallback(Typo, TypoNNS) {} std::unique_ptr clone() override { - return llvm::make_unique(*this); + return std::make_unique(*this); } }; @@ -369,7 +369,7 @@ public: return candidate.getCorrectionDeclAs(); } std::unique_ptr clone() override { - return llvm::make_unique(*this); + return std::make_unique(*this); } }; @@ -384,7 +384,7 @@ public: bool ValidateCandidate(const TypoCorrection &candidate) override; std::unique_ptr clone() override { - return llvm::make_unique(*this); + return std::make_unique(*this); } private: @@ -409,7 +409,7 @@ public: return false; } std::unique_ptr clone() override { - return llvm::make_unique(*this); + return std::make_unique(*this); } }; diff --git a/include/clang/Serialization/ASTBitCodes.h b/include/clang/Serialization/ASTBitCodes.h index 0e1b9e0af9e..f3105727547 100644 --- a/include/clang/Serialization/ASTBitCodes.h +++ b/include/clang/Serialization/ASTBitCodes.h @@ -41,7 +41,7 @@ namespace serialization { /// Version 4 of AST files also requires that the version control branch and /// revision match exactly, since there is no backward compatibility of /// AST files at this time. - const unsigned VERSION_MAJOR = 7; + const unsigned VERSION_MAJOR = 8; /// AST file minor version number supported by this version of /// Clang. @@ -382,7 +382,10 @@ namespace serialization { /// inside the control block. enum InputFileRecordTypes { /// An input file. - INPUT_FILE = 1 + INPUT_FILE = 1, + + /// The input file content hash + INPUT_FILE_HASH }; /// Record types that occur within the AST block itself. @@ -1018,6 +1021,9 @@ namespace serialization { #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ PREDEF_TYPE_##Id##_ID, #include "clang/Basic/OpenCLExtensionTypes.def" + // \brief SVE types with auto numeration +#define SVE_TYPE(Name, Id, SingletonId) PREDEF_TYPE_##Id##_ID, +#include "clang/Basic/AArch64SVEACLETypes.def" }; /// The number of predefined type IDs that are reserved for @@ -1839,6 +1845,9 @@ namespace serialization { /// A CXXMemberCallExpr record. EXPR_CXX_MEMBER_CALL, + /// A CXXRewrittenBinaryOperator record. + EXPR_CXX_REWRITTEN_BINARY_OPERATOR, + /// A CXXConstructExpr record. EXPR_CXX_CONSTRUCT, @@ -1909,6 +1918,7 @@ namespace serialization { EXPR_FUNCTION_PARM_PACK, // FunctionParmPackExpr EXPR_MATERIALIZE_TEMPORARY, // MaterializeTemporaryExpr EXPR_CXX_FOLD, // CXXFoldExpr + EXPR_CONCEPT_SPECIALIZATION,// ConceptSpecializationExpr // CUDA EXPR_CUDA_KERNEL_CALL, // CUDAKernelCallExpr @@ -1958,6 +1968,9 @@ namespace serialization { STMT_OMP_CANCEL_DIRECTIVE, STMT_OMP_TASKLOOP_DIRECTIVE, STMT_OMP_TASKLOOP_SIMD_DIRECTIVE, + STMT_OMP_MASTER_TASKLOOP_DIRECTIVE, + STMT_OMP_MASTER_TASKLOOP_SIMD_DIRECTIVE, + STMT_OMP_PARALLEL_MASTER_TASKLOOP_DIRECTIVE, STMT_OMP_DISTRIBUTE_DIRECTIVE, STMT_OMP_TARGET_UPDATE_DIRECTIVE, STMT_OMP_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE, diff --git a/include/clang/Serialization/ASTReader.h b/include/clang/Serialization/ASTReader.h index 37bea48d884..7495c2b17aa 100644 --- a/include/clang/Serialization/ASTReader.h +++ b/include/clang/Serialization/ASTReader.h @@ -930,6 +930,9 @@ private: /// Whether validate system input files. bool ValidateSystemInputs; + /// Whether validate headers and module maps using hash based on contents. + bool ValidateASTInputFilesContent; + /// Whether we are allowed to use the global module index. bool UseGlobalIndex; @@ -1203,6 +1206,7 @@ private: struct InputFileInfo { std::string Filename; + uint64_t ContentHash; off_t StoredSize; time_t StoredTime; bool Overridden; @@ -1437,6 +1441,8 @@ private: void Error(StringRef Msg) const; void Error(unsigned DiagID, StringRef Arg1 = StringRef(), StringRef Arg2 = StringRef()) const; + void Error(unsigned DiagID, StringRef Arg1, StringRef Arg2, + unsigned Select) const; void Error(llvm::Error &&Err) const; public: @@ -1485,7 +1491,9 @@ public: StringRef isysroot = "", bool DisableValidation = false, bool AllowASTWithCompilerErrors = false, bool AllowConfigurationMismatch = false, - bool ValidateSystemInputs = false, bool UseGlobalIndex = true, + bool ValidateSystemInputs = false, + bool ValidateASTInputFilesContent = false, + bool UseGlobalIndex = true, std::unique_ptr ReadTimer = {}); ASTReader(const ASTReader &) = delete; ASTReader &operator=(const ASTReader &) = delete; @@ -1578,7 +1586,7 @@ public: /// Takes ownership of \p L. void addListener(std::unique_ptr L) { if (Listener) - L = llvm::make_unique(std::move(L), + L = std::make_unique(std::move(L), std::move(Listener)); Listener = std::move(L); } @@ -1594,7 +1602,7 @@ public: auto Old = Reader.takeListener(); if (Old) { Chained = true; - L = llvm::make_unique(std::move(L), + L = std::make_unique(std::move(L), std::move(Old)); } Reader.setListener(std::move(L)); diff --git a/include/clang/StaticAnalyzer/Checkers/Checkers.td b/include/clang/StaticAnalyzer/Checkers/Checkers.td index 2b29efba66a..4d52655045b 100644 --- a/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -504,6 +504,15 @@ def MoveChecker: Checker<"Move">, ]>, Documentation; +def VirtualCallModeling : Checker<"VirtualCallModeling">, + HelpText<"Auxiliary modeling for the virtual method call checkers">, + Documentation, + Hidden; + +def PureVirtualCallChecker : Checker<"PureVirtualCall">, + HelpText<"Check pure virtual function calls during construction/destruction">, + Dependencies<[VirtualCallModeling]>, + Documentation; } // end: "cplusplus" let ParentPackage = CplusplusOptIn in { @@ -552,14 +561,22 @@ def UninitializedObjectChecker: Checker<"UninitializedObject">, Documentation; def VirtualCallChecker : Checker<"VirtualCall">, - HelpText<"Check virtual function calls during construction or destruction">, + HelpText<"Check virtual function calls during construction/destruction">, CheckerOptions<[ CmdLineOption + InAlpha>, + CmdLineOption ]>, + Dependencies<[VirtualCallModeling]>, Documentation; } // end: "optin.cplusplus" @@ -636,6 +653,19 @@ let ParentPackage = DeadCode in { def DeadStoresChecker : Checker<"DeadStores">, HelpText<"Check for values stored to variables that are never read " "afterwards">, + CheckerOptions<[ + CmdLineOption, + CmdLineOption + ]>, Documentation; } // end DeadCode @@ -799,6 +829,13 @@ let ParentPackage = Taint in { def GenericTaintChecker : Checker<"TaintPropagation">, HelpText<"Generate taint information used by other checkers">, + CheckerOptions<[ + CmdLineOption, + ]>, Documentation; } // end "alpha.security.taint" diff --git a/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def index 70bd476b6c4..d853fb74f9c 100644 --- a/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def +++ b/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def @@ -294,12 +294,20 @@ ANALYZER_OPTION(bool, DisplayCTUProgress, "display-ctu-progress", ANALYZER_OPTION(bool, ShouldTrackConditions, "track-conditions", "Whether to track conditions that are a control dependency of " "an already tracked variable.", - false) + true) ANALYZER_OPTION(bool, ShouldTrackConditionsDebug, "track-conditions-debug", "Whether to place an event at each tracked condition.", false) +ANALYZER_OPTION(bool, ShouldEmitFixItHintsAsRemarks, "fixits-as-remarks", + "Emit fix-it hints as remarks for testing purposes", + false) + +//===----------------------------------------------------------------------===// +// Unsigned analyzer options. +//===----------------------------------------------------------------------===// + ANALYZER_OPTION(unsigned, CTUImportThreshold, "ctu-import-threshold", "The maximal amount of translation units that is considered " "for import when inlining functions during CTU analysis. " @@ -308,10 +316,6 @@ ANALYZER_OPTION(unsigned, CTUImportThreshold, "ctu-import-threshold", "various translation units.", 100u) -//===----------------------------------------------------------------------===// -// Unsinged analyzer options. -//===----------------------------------------------------------------------===// - ANALYZER_OPTION( unsigned, AlwaysInlineSize, "ipa-always-inline-size", "The size of the functions (in basic blocks), which should be considered " @@ -380,12 +384,6 @@ ANALYZER_OPTION( "Value: \"constructors\", \"destructors\", \"methods\".", "destructors") -ANALYZER_OPTION_DEPENDS_ON_USER_MODE( - StringRef, IPAMode, "ipa", - "Controls the mode of inter-procedural analysis. Value: \"none\", " - "\"basic-inlining\", \"inlining\", \"dynamic\", \"dynamic-bifurcate\".", - /* SHALLOW_VAL */ "inlining", /* DEEP_VAL */ "dynamic-bifurcate") - ANALYZER_OPTION( StringRef, ExplorationStrategy, "exploration_strategy", "Value: \"dfs\", \"bfs\", \"unexplored_first\", " @@ -393,5 +391,17 @@ ANALYZER_OPTION( "\"bfs_block_dfs_contents\".", "unexplored_first_queue") +ANALYZER_OPTION( + StringRef, RawSilencedCheckersAndPackages, "silence-checkers", + "A semicolon separated list of checker and package names to silence. " + "Silenced checkers will not emit reports, but the modeling remain enabled.", + "") + +ANALYZER_OPTION_DEPENDS_ON_USER_MODE( + StringRef, IPAMode, "ipa", + "Controls the mode of inter-procedural analysis. Value: \"none\", " + "\"basic-inlining\", \"inlining\", \"dynamic\", \"dynamic-bifurcate\".", + /* SHALLOW_VAL */ "inlining", /* DEEP_VAL */ "dynamic-bifurcate") + #undef ANALYZER_OPTION_DEPENDS_ON_USER_MODE #undef ANALYZER_OPTION diff --git a/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h b/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h index 9630a229bd3..ce16095e10c 100644 --- a/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h +++ b/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h @@ -163,9 +163,16 @@ class AnalyzerOptions : public RefCountedBase { public: using ConfigTable = llvm::StringMap; + /// Retrieves the list of checkers generated from Checkers.td. This doesn't + /// contain statically linked but non-generated checkers and plugin checkers! static std::vector getRegisteredCheckers(bool IncludeExperimental = false); + /// Retrieves the list of packages generated from Checkers.td. This doesn't + /// contain statically linked but non-generated packages and plugin packages! + static std::vector + getRegisteredPackages(bool IncludeExperimental = false); + /// Convenience function for printing options or checkers and their /// description in a formatted manner. If \p MinLineWidth is set to 0, no line /// breaks are introduced for the description. @@ -188,9 +195,11 @@ public: std::pair EntryDescPair, size_t EntryWidth, size_t InitialPad, size_t MinLineWidth = 0); + /// Pairs of checker/package name and enable/disable. + std::vector> CheckersAndPackages; - /// Pair of checker name and enable/disable. - std::vector> CheckersControlList; + /// Vector of checker/package names which will not emit warnings. + std::vector SilencedCheckersAndPackages; /// A key-value table of use-specified configuration values. // TODO: This shouldn't be public. @@ -212,12 +221,12 @@ public: /// The maximum number of times the analyzer visits a block. unsigned maxBlockVisitOnPath; - /// Disable all analyzer checks. + /// Disable all analyzer checkers. /// - /// This flag allows one to disable analyzer checks on the code processed by + /// This flag allows one to disable analyzer checkers on the code processed by /// the given analysis consumer. Note, the code will get parsed and the /// command-line options will get checked. - unsigned DisableAllChecks : 1; + unsigned DisableAllCheckers : 1; unsigned ShowCheckerHelp : 1; unsigned ShowCheckerHelpAlpha : 1; @@ -269,13 +278,13 @@ public: // Create an array of all -analyzer-config command line options. Sort it in // the constructor. - std::vector AnalyzerConfigCmdFlags = { + std::vector AnalyzerConfigCmdFlags = { #define ANALYZER_OPTION_DEPENDS_ON_USER_MODE(TYPE, NAME, CMDFLAG, DESC, \ SHALLOW_VAL, DEEP_VAL) \ ANALYZER_OPTION(TYPE, NAME, CMDFLAG, DESC, SHALLOW_VAL) #define ANALYZER_OPTION(TYPE, NAME, CMDFLAG, DESC, DEFAULT_VAL) \ - CMDFLAG, + llvm::StringLiteral(CMDFLAG), #include "clang/StaticAnalyzer/Core/AnalyzerOptions.def" #undef ANALYZER_OPTION @@ -292,7 +301,7 @@ public: } AnalyzerOptions() - : DisableAllChecks(false), ShowCheckerHelp(false), + : DisableAllCheckers(false), ShowCheckerHelp(false), ShowCheckerHelpAlpha(false), ShowCheckerHelpDeveloper(false), ShowCheckerOptionList(false), ShowCheckerOptionAlphaList(false), ShowCheckerOptionDeveloperList(false), ShowEnabledCheckerList(false), @@ -310,7 +319,7 @@ public: /// If an option value is not provided, returns the given \p DefaultVal. /// @param [in] CheckerName The *full name* of the checker. One may retrieve /// this from the checker object's field \c Name, or through \c - /// CheckerManager::getCurrentCheckName within the checker's registry + /// CheckerManager::getCurrentCheckerName within the checker's registry /// function. /// Checker options are retrieved in the following format: /// `-analyzer-config CheckerName:OptionName=Value. @@ -330,7 +339,7 @@ public: /// If an option value is not provided, returns the given \p DefaultVal. /// @param [in] CheckerName The *full name* of the checker. One may retrieve /// this from the checker object's field \c Name, or through \c - /// CheckerManager::getCurrentCheckName within the checker's registry + /// CheckerManager::getCurrentCheckerName within the checker's registry /// function. /// Checker options are retrieved in the following format: /// `-analyzer-config CheckerName:OptionName=Value. @@ -350,7 +359,7 @@ public: /// If an option value is not provided, returns the given \p DefaultVal. /// @param [in] CheckerName The *full name* of the checker. One may retrieve /// this from the checker object's field \c Name, or through \c - /// CheckerManager::getCurrentCheckName within the checker's registry + /// CheckerManager::getCurrentCheckerName within the checker's registry /// function. /// Checker options are retrieved in the following format: /// `-analyzer-config CheckerName:OptionName=Value. @@ -404,6 +413,43 @@ inline UserModeKind AnalyzerOptions::getUserMode() const { return K.getValue(); } +inline std::vector +AnalyzerOptions::getRegisteredCheckers(bool IncludeExperimental) { + static constexpr llvm::StringLiteral StaticAnalyzerCheckerNames[] = { +#define GET_CHECKERS +#define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN) \ + llvm::StringLiteral(FULLNAME), +#include "clang/StaticAnalyzer/Checkers/Checkers.inc" +#undef CHECKER +#undef GET_CHECKERS + }; + std::vector Checkers; + for (StringRef CheckerName : StaticAnalyzerCheckerNames) { + if (!CheckerName.startswith("debug.") && + (IncludeExperimental || !CheckerName.startswith("alpha."))) + Checkers.push_back(CheckerName); + } + return Checkers; +} + +inline std::vector +AnalyzerOptions::getRegisteredPackages(bool IncludeExperimental) { + static constexpr llvm::StringLiteral StaticAnalyzerPackageNames[] = { +#define GET_PACKAGES +#define PACKAGE(FULLNAME) llvm::StringLiteral(FULLNAME), +#include "clang/StaticAnalyzer/Checkers/Checkers.inc" +#undef PACKAGE +#undef GET_PACKAGES + }; + std::vector Packages; + for (StringRef PackageName : StaticAnalyzerPackageNames) { + if (PackageName != "debug" && + (IncludeExperimental || PackageName != "alpha")) + Packages.push_back(PackageName); + } + return Packages; +} + } // namespace clang #endif // LLVM_CLANG_STATICANALYZER_CORE_ANALYZEROPTIONS_H diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h b/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h index d30ad19b20f..e94b544172a 100644 --- a/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h +++ b/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h @@ -14,10 +14,10 @@ #ifndef LLVM_CLANG_STATICANALYZER_CORE_BUGREPORTER_BUGREPORTER_H #define LLVM_CLANG_STATICANALYZER_CORE_BUGREPORTER_BUGREPORTER_H +#include "clang/Analysis/PathDiagnostic.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceLocation.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h" -#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" @@ -70,59 +70,253 @@ class SValBuilder; using DiagnosticForConsumerMapTy = llvm::DenseMap>; +/// Interface for classes constructing Stack hints. +/// +/// If a PathDiagnosticEvent occurs in a different frame than the final +/// diagnostic the hints can be used to summarize the effect of the call. +class StackHintGenerator { +public: + virtual ~StackHintGenerator() = 0; + + /// Construct the Diagnostic message for the given ExplodedNode. + virtual std::string getMessage(const ExplodedNode *N) = 0; +}; + +/// Constructs a Stack hint for the given symbol. +/// +/// The class knows how to construct the stack hint message based on +/// traversing the CallExpr associated with the call and checking if the given +/// symbol is returned or is one of the arguments. +/// The hint can be customized by redefining 'getMessageForX()' methods. +class StackHintGeneratorForSymbol : public StackHintGenerator { +private: + SymbolRef Sym; + std::string Msg; + +public: + StackHintGeneratorForSymbol(SymbolRef S, StringRef M) : Sym(S), Msg(M) {} + ~StackHintGeneratorForSymbol() override = default; + + /// Search the call expression for the symbol Sym and dispatch the + /// 'getMessageForX()' methods to construct a specific message. + std::string getMessage(const ExplodedNode *N) override; + + /// Produces the message of the following form: + /// 'Msg via Nth parameter' + virtual std::string getMessageForArg(const Expr *ArgE, unsigned ArgIndex); + + virtual std::string getMessageForReturn(const CallExpr *CallExpr) { + return Msg; + } + + virtual std::string getMessageForSymbolNotFound() { + return Msg; + } +}; + /// This class provides an interface through which checkers can create /// individual bug reports. -class BugReport : public llvm::ilist_node { +class BugReport { public: - class NodeResolver { - virtual void anchor(); - - public: - virtual ~NodeResolver() = default; - - virtual const ExplodedNode* - getOriginalNode(const ExplodedNode *N) = 0; - }; - - using ranges_iterator = const SourceRange *; - using VisitorList = SmallVector, 8>; - using visitor_iterator = VisitorList::iterator; - using ExtraTextList = SmallVector; - using NoteList = SmallVector, 4>; + enum class Kind { Basic, PathSensitive }; protected: friend class BugReportEquivClass; friend class BugReporter; + Kind K; const BugType& BT; - const Decl *DeclWithIssue = nullptr; std::string ShortDescription; std::string Description; - PathDiagnosticLocation Location; - PathDiagnosticLocation UniqueingLocation; - const Decl *UniqueingDecl; - const ExplodedNode *ErrorNode = nullptr; SmallVector Ranges; - ExtraTextList ExtraText; - NoteList Notes; + SmallVector, 4> Notes; + SmallVector Fixits; - using Symbols = llvm::DenseSet; - using Regions = llvm::DenseSet; + BugReport(Kind kind, const BugType &bt, StringRef desc) + : K(kind), BT(bt), Description(desc) {} + + BugReport(Kind K, const BugType &BT, StringRef ShortDescription, + StringRef Description) + : K(K), BT(BT), ShortDescription(ShortDescription), + Description(Description) {} + +public: + virtual ~BugReport() = default; + + Kind getKind() const { return K; } + + const BugType& getBugType() const { return BT; } + + /// A verbose warning message that is appropriate for displaying next to + /// the source code that introduces the problem. The description should be + /// at least a full sentence starting with a capital letter. The period at + /// the end of the warning is traditionally omitted. If the description + /// consists of multiple sentences, periods between the sentences are + /// encouraged, but the period at the end of the description is still omitted. + StringRef getDescription() const { return Description; } + + /// A short general warning message that is appropriate for displaying in + /// the list of all reported bugs. It should describe what kind of bug is found + /// but does not need to try to go into details of that specific bug. + /// Grammatical conventions of getDescription() apply here as well. + StringRef getShortDescription(bool UseFallback = true) const { + if (ShortDescription.empty() && UseFallback) + return Description; + return ShortDescription; + } + + /// The primary location of the bug report that points at the undesirable + /// behavior in the code. UIs should attach the warning description to this + /// location. The warning description should describe the bad behavior + /// at this location. + virtual PathDiagnosticLocation getLocation() const = 0; + + /// The smallest declaration that contains the bug location. + /// This is purely cosmetic; the declaration can be displayed to the user + /// but it does not affect whether the report is emitted. + virtual const Decl *getDeclWithIssue() const = 0; + + /// Get the location on which the report should be uniqued. Two warnings are + /// considered to be equivalent whenever they have the same bug types, + /// descriptions, and uniqueing locations. Out of a class of equivalent + /// warnings only one gets displayed to the user. For most warnings the + /// uniqueing location coincides with their location, but sometimes + /// it makes sense to use different locations. For example, a leak + /// checker can place the warning at the location where the last reference + /// to the leaking resource is dropped but at the same time unique the warning + /// by where that resource is acquired (allocated). + virtual PathDiagnosticLocation getUniqueingLocation() const = 0; + + /// Get the declaration that corresponds to (usually contains) the uniqueing + /// location. This is not actively used for uniqueing, i.e. otherwise + /// identical reports that have different uniqueing decls will be considered + /// equivalent. + virtual const Decl *getUniqueingDecl() const = 0; + + /// Add new item to the list of additional notes that need to be attached to + /// this report. If the report is path-sensitive, these notes will not be + /// displayed as part of the execution path explanation, but will be displayed + /// separately. Use bug visitors if you need to add an extra path note. + void addNote(StringRef Msg, const PathDiagnosticLocation &Pos, + ArrayRef Ranges = {}) { + auto P = std::make_shared(Pos, Msg); + + for (const auto &R : Ranges) + P->addRange(R); + + Notes.push_back(std::move(P)); + } + + ArrayRef> getNotes() { + return Notes; + } + + /// Add a range to a bug report. + /// + /// Ranges are used to highlight regions of interest in the source code. + /// They should be at the same source code line as the BugReport location. + /// By default, the source range of the statement corresponding to the error + /// node will be used; add a single invalid range to specify absence of + /// ranges. + void addRange(SourceRange R) { + assert((R.isValid() || Ranges.empty()) && "Invalid range can only be used " + "to specify that the report does not have a range."); + Ranges.push_back(R); + } + + /// Get the SourceRanges associated with the report. + virtual ArrayRef getRanges() const { + return Ranges; + } + + /// Add a fix-it hint to the bug report. + /// + /// Fix-it hints are the suggested edits to the code that would resolve + /// the problem explained by the bug report. Fix-it hints should be + /// as conservative as possible because it is not uncommon for the user + /// to blindly apply all fixits to their project. Note that it is very hard + /// to produce a good fix-it hint for most path-sensitive warnings. + void addFixItHint(const FixItHint &F) { + Fixits.push_back(F); + } + + llvm::ArrayRef getFixits() const { return Fixits; } + + /// Reports are uniqued to ensure that we do not emit multiple diagnostics + /// for each bug. + virtual void Profile(llvm::FoldingSetNodeID& hash) const = 0; +}; + +class BasicBugReport : public BugReport { + PathDiagnosticLocation Location; + const Decl *DeclWithIssue = nullptr; + +public: + BasicBugReport(const BugType &bt, StringRef desc, PathDiagnosticLocation l) + : BugReport(Kind::Basic, bt, desc), Location(l) {} + + static bool classof(const BugReport *R) { + return R->getKind() == Kind::Basic; + } + + PathDiagnosticLocation getLocation() const override { + assert(Location.isValid()); + return Location; + } + + const Decl *getDeclWithIssue() const override { + return DeclWithIssue; + } + + PathDiagnosticLocation getUniqueingLocation() const override { + return getLocation(); + } + + const Decl *getUniqueingDecl() const override { + return getDeclWithIssue(); + } + + /// Specifically set the Decl where an issue occurred. This isn't necessary + /// for BugReports that cover a path as it will be automatically inferred. + void setDeclWithIssue(const Decl *declWithIssue) { + DeclWithIssue = declWithIssue; + } + + void Profile(llvm::FoldingSetNodeID& hash) const override; +}; + +class PathSensitiveBugReport : public BugReport { +public: + using VisitorList = SmallVector, 8>; + using visitor_iterator = VisitorList::iterator; + using visitor_range = llvm::iterator_range; + +protected: + /// The ExplodedGraph node against which the report was thrown. It corresponds + /// to the end of the execution path that demonstrates the bug. + const ExplodedNode *ErrorNode = nullptr; + + /// The range that corresponds to ErrorNode's program point. It is usually + /// highlighted in the report. + const SourceRange ErrorNodeRange; + + /// Profile to identify equivalent bug reports for error report coalescing. /// A (stack of) a set of symbols that are registered with this /// report as being "interesting", and thus used to help decide which /// diagnostics to include when constructing the final path diagnostic. /// The stack is largely used by BugReporter when generating PathDiagnostics /// for multiple PathDiagnosticConsumers. - SmallVector interestingSymbols; + llvm::DenseMap InterestingSymbols; /// A (stack of) set of regions that are registered with this report as being /// "interesting", and thus used to help decide which diagnostics /// to include when constructing the final path diagnostic. /// The stack is largely used by BugReporter when generating PathDiagnostics /// for multiple PathDiagnosticConsumers. - SmallVector interestingRegions; + llvm::DenseMap + InterestingRegions; /// A set of location contexts that correspoind to call sites which should be /// considered "interesting". @@ -156,66 +350,58 @@ protected: /// Conditions we're already tracking. llvm::SmallSet TrackedConditions; -private: - // Used internally by BugReporter. - Symbols &getInterestingSymbols(); - Regions &getInterestingRegions(); + /// Reports with different uniqueing locations are considered to be different + /// for the purposes of deduplication. + PathDiagnosticLocation UniqueingLocation; + const Decl *UniqueingDecl; - void lazyInitializeInterestingSets(); - void pushInterestingSymbolsAndRegions(); - void popInterestingSymbolsAndRegions(); + const Stmt *getStmt() const; + + /// If an event occurs in a different frame than the final diagnostic, + /// supply a message that will be used to construct an extra hint on the + /// returns from all the calls on the stack from this event to the final + /// diagnostic. + // FIXME: Allow shared_ptr keys in DenseMap? + std::map> + StackHints; public: - BugReport(const BugType& bt, StringRef desc, const ExplodedNode *errornode) - : BT(bt), Description(desc), ErrorNode(errornode) {} + PathSensitiveBugReport(const BugType &bt, StringRef desc, + const ExplodedNode *errorNode) + : BugReport(Kind::PathSensitive, bt, desc), ErrorNode(errorNode), + ErrorNodeRange(getStmt() ? getStmt()->getSourceRange() + : SourceRange()) {} - BugReport(const BugType& bt, StringRef shortDesc, StringRef desc, - const ExplodedNode *errornode) - : BT(bt), ShortDescription(shortDesc), Description(desc), - ErrorNode(errornode) {} + PathSensitiveBugReport(const BugType &bt, StringRef shortDesc, StringRef desc, + const ExplodedNode *errorNode) + : BugReport(Kind::PathSensitive, bt, shortDesc, desc), + ErrorNode(errorNode), + ErrorNodeRange(getStmt() ? getStmt()->getSourceRange() + : SourceRange()) {} - BugReport(const BugType &bt, StringRef desc, PathDiagnosticLocation l) - : BT(bt), Description(desc), Location(l) {} - - /// Create a BugReport with a custom uniqueing location. + /// Create a PathSensitiveBugReport with a custom uniqueing location. /// /// The reports that have the same report location, description, bug type, and /// ranges are uniqued - only one of the equivalent reports will be presented /// to the user. This method allows to rest the location which should be used /// for uniquing reports. For example, memory leaks checker, could set this to /// the allocation site, rather then the location where the bug is reported. - BugReport(BugType& bt, StringRef desc, const ExplodedNode *errornode, - PathDiagnosticLocation LocationToUnique, const Decl *DeclToUnique) - : BT(bt), Description(desc), UniqueingLocation(LocationToUnique), - UniqueingDecl(DeclToUnique), ErrorNode(errornode) {} + PathSensitiveBugReport(BugType &bt, StringRef desc, + const ExplodedNode *errorNode, + PathDiagnosticLocation LocationToUnique, + const Decl *DeclToUnique) + : BugReport(Kind::PathSensitive, bt, desc), ErrorNode(errorNode), + ErrorNodeRange(getStmt() ? getStmt()->getSourceRange() : SourceRange()), + UniqueingLocation(LocationToUnique), UniqueingDecl(DeclToUnique) { + assert(errorNode); + } - virtual ~BugReport(); - - const BugType& getBugType() const { return BT; } - //BugType& getBugType() { return BT; } - - /// True when the report has an execution path associated with it. - /// - /// A report is said to be path-sensitive if it was thrown against a - /// particular exploded node in the path-sensitive analysis graph. - /// Path-sensitive reports have their intermediate path diagnostics - /// auto-generated, perhaps with the help of checker-defined visitors, - /// and may contain extra notes. - /// Path-insensitive reports consist only of a single warning message - /// in a specific location, and perhaps extra notes. - /// Path-sensitive checkers are allowed to throw path-insensitive reports. - bool isPathSensitive() const { return ErrorNode != nullptr; } + static bool classof(const BugReport *R) { + return R->getKind() == Kind::PathSensitive; + } const ExplodedNode *getErrorNode() const { return ErrorNode; } - StringRef getDescription() const { return Description; } - - StringRef getShortDescription(bool UseFallback = true) const { - if (ShortDescription.empty() && UseFallback) - return Description; - return ShortDescription; - } - /// Indicates whether or not any path pruning should take place /// when generating a PathDiagnostic from this BugReport. bool shouldPrunePath() const { return !DoNotPrunePath; } @@ -223,15 +409,54 @@ public: /// Disable all path pruning when generating a PathDiagnostic. void disablePathPruning() { DoNotPrunePath = true; } - void markInteresting(SymbolRef sym); - void markInteresting(const MemRegion *R); - void markInteresting(SVal V); + /// Get the location on which the report should be uniqued. + PathDiagnosticLocation getUniqueingLocation() const override { + return UniqueingLocation; + } + + /// Get the declaration containing the uniqueing location. + const Decl *getUniqueingDecl() const override { + return UniqueingDecl; + } + + const Decl *getDeclWithIssue() const override; + + ArrayRef getRanges() const override; + + PathDiagnosticLocation getLocation() const override; + + /// Marks a symbol as interesting. Different kinds of interestingness will + /// be processed differently by visitors (e.g. if the tracking kind is + /// condition, will append "will be used as a condition" to the message). + void markInteresting(SymbolRef sym, bugreporter::TrackingKind TKind = + bugreporter::TrackingKind::Thorough); + + /// Marks a region as interesting. Different kinds of interestingness will + /// be processed differently by visitors (e.g. if the tracking kind is + /// condition, will append "will be used as a condition" to the message). + void markInteresting( + const MemRegion *R, + bugreporter::TrackingKind TKind = bugreporter::TrackingKind::Thorough); + + /// Marks a symbolic value as interesting. Different kinds of interestingness + /// will be processed differently by visitors (e.g. if the tracking kind is + /// condition, will append "will be used as a condition" to the message). + void markInteresting(SVal V, bugreporter::TrackingKind TKind = + bugreporter::TrackingKind::Thorough); void markInteresting(const LocationContext *LC); - bool isInteresting(SymbolRef sym); - bool isInteresting(const MemRegion *R); - bool isInteresting(SVal V); - bool isInteresting(const LocationContext *LC); + bool isInteresting(SymbolRef sym) const; + bool isInteresting(const MemRegion *R) const; + bool isInteresting(SVal V) const; + bool isInteresting(const LocationContext *LC) const; + + Optional + getInterestingnessKind(SymbolRef sym) const; + + Optional + getInterestingnessKind(const MemRegion *R) const; + + Optional getInterestingnessKind(SVal V) const; /// Returns whether or not this report should be considered valid. /// @@ -254,87 +479,10 @@ public: Invalidations.insert(std::make_pair(Tag, Data)); } - /// Return the canonical declaration, be it a method or class, where - /// this issue semantically occurred. - const Decl *getDeclWithIssue() const; - - /// Specifically set the Decl where an issue occurred. This isn't necessary - /// for BugReports that cover a path as it will be automatically inferred. - void setDeclWithIssue(const Decl *declWithIssue) { - DeclWithIssue = declWithIssue; - } - - /// Add new item to the list of additional notes that need to be attached to - /// this path-insensitive report. If you want to add extra notes to a - /// path-sensitive report, you need to use a BugReporterVisitor because it - /// allows you to specify where exactly in the auto-generated path diagnostic - /// the extra note should appear. - void addNote(StringRef Msg, const PathDiagnosticLocation &Pos, - ArrayRef Ranges) { - auto P = std::make_shared(Pos, Msg); - - for (const auto &R : Ranges) - P->addRange(R); - - Notes.push_back(std::move(P)); - } - - // FIXME: Instead of making an override, we could have default-initialized - // Ranges with {}, however it crashes the MSVC 2013 compiler. - void addNote(StringRef Msg, const PathDiagnosticLocation &Pos) { - std::vector Ranges; - addNote(Msg, Pos, Ranges); - } - - virtual const NoteList &getNotes() { - return Notes; - } - - /// This allows for addition of meta data to the diagnostic. - /// - /// Currently, only the HTMLDiagnosticClient knows how to display it. - void addExtraText(StringRef S) { - ExtraText.push_back(S); - } - - virtual const ExtraTextList &getExtraText() { - return ExtraText; - } - - /// Return the "definitive" location of the reported bug. - /// - /// While a bug can span an entire path, usually there is a specific - /// location that can be used to identify where the key issue occurred. - /// This location is used by clients rendering diagnostics. - virtual PathDiagnosticLocation getLocation(const SourceManager &SM) const; - - /// Get the location on which the report should be uniqued. - PathDiagnosticLocation getUniqueingLocation() const { - return UniqueingLocation; - } - - /// Get the declaration containing the uniqueing location. - const Decl *getUniqueingDecl() const { - return UniqueingDecl; - } - - const Stmt *getStmt() const; - - /// Add a range to a bug report. - /// - /// Ranges are used to highlight regions of interest in the source code. - /// They should be at the same source code line as the BugReport location. - /// By default, the source range of the statement corresponding to the error - /// node will be used; add a single invalid range to specify absence of - /// ranges. - void addRange(SourceRange R) { - assert((R.isValid() || Ranges.empty()) && "Invalid range can only be used " - "to specify that the report does not have a range."); - Ranges.push_back(R); - } - - /// Get the SourceRanges associated with the report. - virtual llvm::iterator_range getRanges(); + /// Profile to identify equivalent bug reports for error report coalescing. + /// Reports are uniqued to ensure that we do not emit multiple diagnostics + /// for each bug. + void Profile(llvm::FoldingSetNodeID &hash) const override; /// Add custom or predefined bug report visitors to this report. /// @@ -351,6 +499,7 @@ public: /// Iterators through the custom diagnostic visitors. visitor_iterator visitor_begin() { return Callbacks.begin(); } visitor_iterator visitor_end() { return Callbacks.end(); } + visitor_range visitors() { return {visitor_begin(), visitor_end()}; } /// Notes that the condition of the CFGBlock associated with \p Cond is /// being tracked. @@ -359,10 +508,25 @@ public: return TrackedConditions.insert(Cond).second; } - /// Profile to identify equivalent bug reports for error report coalescing. - /// Reports are uniqued to ensure that we do not emit multiple diagnostics - /// for each bug. - virtual void Profile(llvm::FoldingSetNodeID& hash) const; + void addCallStackHint(PathDiagnosticPieceRef Piece, + std::unique_ptr StackHint) { + StackHints[Piece] = std::move(StackHint); + } + + bool hasCallStackHint(PathDiagnosticPieceRef Piece) const { + return StackHints.count(Piece) > 0; + } + + /// Produce the hint for the given node. The node contains + /// information about the call for which the diagnostic can be generated. + std::string + getCallStackMessage(PathDiagnosticPieceRef Piece, + const ExplodedNode *N) const { + auto I = StackHints.find(Piece); + if (I != StackHints.end()) + return I->second->getMessage(N); + return ""; + } }; //===----------------------------------------------------------------------===// @@ -373,29 +537,21 @@ class BugReportEquivClass : public llvm::FoldingSetNode { friend class BugReporter; /// List of *owned* BugReport objects. - llvm::ilist Reports; + llvm::SmallVector, 4> Reports; - void AddReport(std::unique_ptr R) { - Reports.push_back(R.release()); + void AddReport(std::unique_ptr &&R) { + Reports.push_back(std::move(R)); } public: BugReportEquivClass(std::unique_ptr R) { AddReport(std::move(R)); } - ~BugReportEquivClass(); + + ArrayRef> getReports() const { return Reports; } void Profile(llvm::FoldingSetNodeID& ID) const { assert(!Reports.empty()); - Reports.front().Profile(ID); + Reports.front()->Profile(ID); } - - using iterator = llvm::ilist::iterator; - using const_iterator = llvm::ilist::const_iterator; - - iterator begin() { return Reports.begin(); } - iterator end() { return Reports.end(); } - - const_iterator begin() const { return Reports.begin(); } - const_iterator end() const { return Reports.end(); } }; //===----------------------------------------------------------------------===// @@ -404,9 +560,8 @@ public: class BugReporterData { public: - virtual ~BugReporterData(); + virtual ~BugReporterData() = default; - virtual DiagnosticsEngine& getDiagnostic() = 0; virtual ArrayRef getPathDiagnosticConsumers() = 0; virtual ASTContext &getASTContext() = 0; virtual SourceManager &getSourceManager() = 0; @@ -419,60 +574,29 @@ public: /// /// The base class is used for generating path-insensitive class BugReporter { -public: - enum Kind { BaseBRKind, GRBugReporterKind }; - private: - using BugTypesTy = llvm::ImmutableSet; - - BugTypesTy::Factory F; - BugTypesTy BugTypes; - - const Kind kind; BugReporterData& D; /// Generate and flush the diagnostics for the given bug report. void FlushReport(BugReportEquivClass& EQ); - /// Generate the diagnostics for the given bug report. - std::unique_ptr - generateDiagnosticForConsumerMap(BugReport *exampleReport, - ArrayRef consumers, - ArrayRef bugReports); - /// The set of bug reports tracked by the BugReporter. llvm::FoldingSet EQClasses; /// A vector of BugReports for tracking the allocated pointers and cleanup. std::vector EQClassesVector; -protected: - BugReporter(BugReporterData& d, Kind k) - : BugTypes(F.getEmptySet()), kind(k), D(d) {} - public: - BugReporter(BugReporterData& d) - : BugTypes(F.getEmptySet()), kind(BaseBRKind), D(d) {} + BugReporter(BugReporterData &d) : D(d) {} virtual ~BugReporter(); /// Generate and flush diagnostics for all bug reports. void FlushReports(); - Kind getKind() const { return kind; } - - DiagnosticsEngine& getDiagnostic() { - return D.getDiagnostic(); - } - ArrayRef getPathDiagnosticConsumers() { return D.getPathDiagnosticConsumers(); } - /// Iterator over the set of BugTypes tracked by the BugReporter. - using iterator = BugTypesTy::iterator; - iterator begin() { return BugTypes.begin(); } - iterator end() { return BugTypes.end(); } - /// Iterator over the set of BugReports tracked by the BugReporter. using EQClasses_iterator = llvm::FoldingSet::iterator; EQClasses_iterator EQClasses_begin() { return EQClasses.begin(); } @@ -480,126 +604,116 @@ public: ASTContext &getContext() { return D.getASTContext(); } - SourceManager &getSourceManager() { return D.getSourceManager(); } + const SourceManager &getSourceManager() { return D.getSourceManager(); } - AnalyzerOptions &getAnalyzerOptions() { return D.getAnalyzerOptions(); } - - virtual std::unique_ptr - generatePathDiagnostics(ArrayRef consumers, - ArrayRef &bugReports) { - return {}; - } - - void Register(const BugType *BT); + const AnalyzerOptions &getAnalyzerOptions() { return D.getAnalyzerOptions(); } /// Add the given report to the set of reports tracked by BugReporter. /// /// The reports are usually generated by the checkers. Further, they are /// folded based on the profile value, which is done to coalesce similar /// reports. - void emitReport(std::unique_ptr R); + virtual void emitReport(std::unique_ptr R); void EmitBasicReport(const Decl *DeclWithIssue, const CheckerBase *Checker, StringRef BugName, StringRef BugCategory, StringRef BugStr, PathDiagnosticLocation Loc, - ArrayRef Ranges = None); + ArrayRef Ranges = None, + ArrayRef Fixits = None); - void EmitBasicReport(const Decl *DeclWithIssue, CheckName CheckName, + void EmitBasicReport(const Decl *DeclWithIssue, CheckerNameRef CheckerName, StringRef BugName, StringRef BugCategory, StringRef BugStr, PathDiagnosticLocation Loc, - ArrayRef Ranges = None); + ArrayRef Ranges = None, + ArrayRef Fixits = None); private: llvm::StringMap StrBugTypes; /// Returns a BugType that is associated with the given name and /// category. - BugType *getBugTypeForName(CheckName CheckName, StringRef name, + BugType *getBugTypeForName(CheckerNameRef CheckerName, StringRef name, StringRef category); + + virtual BugReport * + findReportInEquivalenceClass(BugReportEquivClass &eqClass, + SmallVectorImpl &bugReports) { + return eqClass.getReports()[0].get(); + } + +protected: + /// Generate the diagnostics for the given bug report. + virtual std::unique_ptr + generateDiagnosticForConsumerMap(BugReport *exampleReport, + ArrayRef consumers, + ArrayRef bugReports); }; /// GRBugReporter is used for generating path-sensitive reports. -class GRBugReporter : public BugReporter { +class PathSensitiveBugReporter final : public BugReporter { ExprEngine& Eng; -public: - GRBugReporter(BugReporterData& d, ExprEngine& eng) - : BugReporter(d, GRBugReporterKind), Eng(eng) {} + BugReport *findReportInEquivalenceClass( + BugReportEquivClass &eqClass, + SmallVectorImpl &bugReports) override; - ~GRBugReporter() override; + /// Generate the diagnostics for the given bug report. + std::unique_ptr + generateDiagnosticForConsumerMap(BugReport *exampleReport, + ArrayRef consumers, + ArrayRef bugReports) override; +public: + PathSensitiveBugReporter(BugReporterData& d, ExprEngine& eng) + : BugReporter(d), Eng(eng) {} /// getGraph - Get the exploded graph created by the analysis engine /// for the analyzed method or function. - ExplodedGraph &getGraph(); + const ExplodedGraph &getGraph() const; /// getStateManager - Return the state manager used by the analysis /// engine. - ProgramStateManager &getStateManager(); + ProgramStateManager &getStateManager() const; /// \p bugReports A set of bug reports within a *single* equivalence class /// /// \return A mapping from consumers to the corresponding diagnostics. /// Iterates through the bug reports within a single equivalence class, /// stops at a first non-invalidated report. - std::unique_ptr - generatePathDiagnostics(ArrayRef consumers, - ArrayRef &bugReports) override; + std::unique_ptr generatePathDiagnostics( + ArrayRef consumers, + ArrayRef &bugReports); - /// classof - Used by isa<>, cast<>, and dyn_cast<>. - static bool classof(const BugReporter* R) { - return R->getKind() == GRBugReporterKind; - } + void emitReport(std::unique_ptr R) override; }; -class NodeMapClosure : public BugReport::NodeResolver { - InterExplodedGraphMap &M; - -public: - NodeMapClosure(InterExplodedGraphMap &m) : M(m) {} - - const ExplodedNode *getOriginalNode(const ExplodedNode *N) override { - return M.lookup(N); - } -}; - class BugReporterContext { - GRBugReporter &BR; - NodeMapClosure NMC; + PathSensitiveBugReporter &BR; virtual void anchor(); public: - BugReporterContext(GRBugReporter &br, InterExplodedGraphMap &Backmap) - : BR(br), NMC(Backmap) {} + BugReporterContext(PathSensitiveBugReporter &br) : BR(br) {} virtual ~BugReporterContext() = default; - GRBugReporter& getBugReporter() { return BR; } + PathSensitiveBugReporter& getBugReporter() { return BR; } - ExplodedGraph &getGraph() { return BR.getGraph(); } - - ProgramStateManager& getStateManager() { + ProgramStateManager& getStateManager() const { return BR.getStateManager(); } - SValBuilder &getSValBuilder() { - return getStateManager().getSValBuilder(); - } - - ASTContext &getASTContext() { + ASTContext &getASTContext() const { return BR.getContext(); } - SourceManager& getSourceManager() { + const SourceManager& getSourceManager() const { return BR.getSourceManager(); } - AnalyzerOptions &getAnalyzerOptions() { + const AnalyzerOptions &getAnalyzerOptions() const { return BR.getAnalyzerOptions(); } - - NodeMapClosure& getNodeResolver() { return NMC; } }; @@ -648,7 +762,7 @@ public: public: const NoteTag *makeNoteTag(Callback &&Cb, bool IsPrunable = false) { - // We cannot use make_unique because we cannot access the private + // We cannot use std::make_unique because we cannot access the private // constructor from inside it. std::unique_ptr T(new NoteTag(std::move(Cb), IsPrunable)); Tags.push_back(std::move(T)); diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h b/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h index ef5d327d39d..de0ee5de81b 100644 --- a/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h +++ b/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h @@ -33,11 +33,12 @@ class Stmt; namespace ento { -class BugReport; +class PathSensitiveBugReport; class BugReporterContext; class ExplodedNode; class MemRegion; class PathDiagnosticPiece; +using PathDiagnosticPieceRef = std::shared_ptr; /// BugReporterVisitors are used to add custom diagnostics along a path. class BugReporterVisitor : public llvm::FoldingSetNode { @@ -57,32 +58,68 @@ public: /// /// The last parameter can be used to register a new visitor with the given /// BugReport while processing a node. - virtual std::shared_ptr - VisitNode(const ExplodedNode *Succ, - BugReporterContext &BRC, BugReport &BR) = 0; + virtual PathDiagnosticPieceRef VisitNode(const ExplodedNode *Succ, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) = 0; /// Last function called on the visitor, no further calls to VisitNode /// would follow. virtual void finalizeVisitor(BugReporterContext &BRC, const ExplodedNode *EndPathNode, - BugReport &BR); + PathSensitiveBugReport &BR); /// Provide custom definition for the final diagnostic piece on the /// path - the piece, which is displayed before the path is expanded. /// /// NOTE that this function can be implemented on at most one used visitor, /// and otherwise it crahes at runtime. - virtual std::shared_ptr - getEndPath(BugReporterContext &BRC, const ExplodedNode *N, BugReport &BR); + virtual PathDiagnosticPieceRef getEndPath(BugReporterContext &BRC, + const ExplodedNode *N, + PathSensitiveBugReport &BR); virtual void Profile(llvm::FoldingSetNodeID &ID) const = 0; /// Generates the default final diagnostic piece. - static std::shared_ptr - getDefaultEndPath(BugReporterContext &BRC, const ExplodedNode *N, - BugReport &BR); + static PathDiagnosticPieceRef + getDefaultEndPath(const BugReporterContext &BRC, const ExplodedNode *N, + const PathSensitiveBugReport &BR); }; +namespace bugreporter { + +/// Specifies the type of tracking for an expression. +enum class TrackingKind { + /// Default tracking kind -- specifies that as much information should be + /// gathered about the tracked expression value as possible. + Thorough, + /// Specifies that a more moderate tracking should be used for the expression + /// value. This will essentially make sure that functions relevant to the it + /// aren't pruned, but otherwise relies on the user reading the code or + /// following the arrows. + Condition +}; + +/// Attempts to add visitors to track expression value back to its point of +/// origin. +/// +/// \param N A node "downstream" from the evaluation of the statement. +/// \param E The expression value which we are tracking +/// \param R The bug report to which visitors should be attached. +/// \param EnableNullFPSuppression Whether we should employ false positive +/// suppression (inlined defensive checks, returned null). +/// +/// \return Whether or not the function was able to add visitors for this +/// statement. Note that returning \c true does not actually imply +/// that any visitors were added. +bool trackExpressionValue(const ExplodedNode *N, const Expr *E, + PathSensitiveBugReport &R, + TrackingKind TKind = TrackingKind::Thorough, + bool EnableNullFPSuppression = true); + +const Expr *getDerefExpr(const Stmt *S); + +} // namespace bugreporter + /// Finds last store into the given region, /// which is different from a given symbolic value. class FindLastStoreBRVisitor final : public BugReporterVisitor { @@ -94,21 +131,34 @@ class FindLastStoreBRVisitor final : public BugReporterVisitor { /// bug, we are going to employ false positive suppression. bool EnableNullFPSuppression; -public: - /// Creates a visitor for every VarDecl inside a Stmt and registers it with - /// the BugReport. - static void registerStatementVarDecls(BugReport &BR, const Stmt *S, - bool EnableNullFPSuppression); + using TrackingKind = bugreporter::TrackingKind; + TrackingKind TKind; + const StackFrameContext *OriginSFC; +public: + /// \param V We're searching for the store where \c R received this value. + /// \param R The region we're tracking. + /// \param TKind May limit the amount of notes added to the bug report. + /// \param OriginSFC Only adds notes when the last store happened in a + /// different stackframe to this one. Disregarded if the tracking kind + /// is thorough. + /// This is useful, because for non-tracked regions, notes about + /// changes to its value in a nested stackframe could be pruned, and + /// this visitor can prevent that without polluting the bugpath too + /// much. FindLastStoreBRVisitor(KnownSVal V, const MemRegion *R, - bool InEnableNullFPSuppression) - : R(R), V(V), EnableNullFPSuppression(InEnableNullFPSuppression) {} + bool InEnableNullFPSuppression, TrackingKind TKind, + const StackFrameContext *OriginSFC = nullptr) + : R(R), V(V), EnableNullFPSuppression(InEnableNullFPSuppression), + TKind(TKind), OriginSFC(OriginSFC) { + assert(R); + } void Profile(llvm::FoldingSetNodeID &ID) const override; - std::shared_ptr VisitNode(const ExplodedNode *N, - BugReporterContext &BRC, - BugReport &BR) override; + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; }; class TrackConstraintBRVisitor final : public BugReporterVisitor { @@ -132,9 +182,9 @@ public: /// to make all PathDiagnosticPieces created by this visitor. static const char *getTag(); - std::shared_ptr VisitNode(const ExplodedNode *N, - BugReporterContext &BRC, - BugReport &BR) override; + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; private: /// Checks if the constraint is valid in the current state. @@ -150,9 +200,9 @@ public: ID.AddPointer(&x); } - std::shared_ptr VisitNode(const ExplodedNode *N, - BugReporterContext &BRC, - BugReport &BR) override; + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; /// If the statement is a message send expression with nil receiver, returns /// the receiver expression. Returns NULL otherwise. @@ -162,8 +212,10 @@ public: /// Visitor that tries to report interesting diagnostics from conditions. class ConditionBRVisitor final : public BugReporterVisitor { // FIXME: constexpr initialization isn't supported by MSVC2013. - static const char *const GenericTrueMessage; - static const char *const GenericFalseMessage; + constexpr static llvm::StringLiteral GenericTrueMessage = + "Assuming the condition is true"; + constexpr static llvm::StringLiteral GenericFalseMessage = + "Assuming the condition is false"; public: void Profile(llvm::FoldingSetNodeID &ID) const override { @@ -175,41 +227,44 @@ public: /// to make all PathDiagnosticPieces created by this visitor. static const char *getTag(); - std::shared_ptr VisitNode(const ExplodedNode *N, - BugReporterContext &BRC, - BugReport &BR) override; + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; - std::shared_ptr VisitNodeImpl(const ExplodedNode *N, - BugReporterContext &BRC, - BugReport &BR); + PathDiagnosticPieceRef VisitNodeImpl(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR); - std::shared_ptr + PathDiagnosticPieceRef VisitTerminator(const Stmt *Term, const ExplodedNode *N, - const CFGBlock *srcBlk, const CFGBlock *dstBlk, BugReport &R, - BugReporterContext &BRC); + const CFGBlock *SrcBlk, const CFGBlock *DstBlk, + PathSensitiveBugReport &R, BugReporterContext &BRC); - std::shared_ptr - VisitTrueTest(const Expr *Cond, BugReporterContext &BRC, BugReport &R, - const ExplodedNode *N, bool TookTrue); + PathDiagnosticPieceRef VisitTrueTest(const Expr *Cond, + BugReporterContext &BRC, + PathSensitiveBugReport &R, + const ExplodedNode *N, bool TookTrue); - std::shared_ptr - VisitTrueTest(const Expr *Cond, const DeclRefExpr *DR, - BugReporterContext &BRC, BugReport &R, const ExplodedNode *N, - bool TookTrue, bool IsAssuming); + PathDiagnosticPieceRef VisitTrueTest(const Expr *Cond, const DeclRefExpr *DR, + BugReporterContext &BRC, + PathSensitiveBugReport &R, + const ExplodedNode *N, bool TookTrue, + bool IsAssuming); - std::shared_ptr + PathDiagnosticPieceRef VisitTrueTest(const Expr *Cond, const BinaryOperator *BExpr, - BugReporterContext &BRC, BugReport &R, const ExplodedNode *N, - bool TookTrue, bool IsAssuming); + BugReporterContext &BRC, PathSensitiveBugReport &R, + const ExplodedNode *N, bool TookTrue, bool IsAssuming); - std::shared_ptr - VisitTrueTest(const Expr *Cond, const MemberExpr *ME, BugReporterContext &BRC, - BugReport &R, const ExplodedNode *N, bool TookTrue, - bool IsAssuming); + PathDiagnosticPieceRef VisitTrueTest(const Expr *Cond, const MemberExpr *ME, + BugReporterContext &BRC, + PathSensitiveBugReport &R, + const ExplodedNode *N, bool TookTrue, + bool IsAssuming); - std::shared_ptr + PathDiagnosticPieceRef VisitConditionVariable(StringRef LhsString, const Expr *CondVarExpr, - BugReporterContext &BRC, BugReport &R, + BugReporterContext &BRC, PathSensitiveBugReport &R, const ExplodedNode *N, bool TookTrue); /// Tries to print the value of the given expression. @@ -228,7 +283,7 @@ public: const Expr *ParentEx, raw_ostream &Out, BugReporterContext &BRC, - BugReport &R, + PathSensitiveBugReport &R, const ExplodedNode *N, Optional &prunable, bool IsSameFieldName); @@ -251,14 +306,13 @@ public: ID.AddPointer(getTag()); } - std::shared_ptr VisitNode(const ExplodedNode *, - BugReporterContext &, - BugReport &) override { + PathDiagnosticPieceRef VisitNode(const ExplodedNode *, BugReporterContext &, + PathSensitiveBugReport &) override { return nullptr; } void finalizeVisitor(BugReporterContext &BRC, const ExplodedNode *N, - BugReport &BR) override; + PathSensitiveBugReport &BR) override; }; /// When a region containing undefined value or '0' value is passed @@ -279,9 +333,9 @@ public: ID.AddPointer(R); } - std::shared_ptr VisitNode(const ExplodedNode *N, - BugReporterContext &BRC, - BugReport &BR) override; + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; }; class SuppressInlineDefensiveChecksVisitor final : public BugReporterVisitor { @@ -308,9 +362,9 @@ public: /// to make all PathDiagnosticPieces created by this visitor. static const char *getTag(); - std::shared_ptr VisitNode(const ExplodedNode *Succ, - BugReporterContext &BRC, - BugReport &BR) override; + PathDiagnosticPieceRef VisitNode(const ExplodedNode *Succ, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; }; /// The bug visitor will walk all the nodes in a path and collect all the @@ -326,12 +380,12 @@ public: void Profile(llvm::FoldingSetNodeID &ID) const override; - std::shared_ptr VisitNode(const ExplodedNode *N, - BugReporterContext &BRC, - BugReport &BR) override; + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &BR) override; void finalizeVisitor(BugReporterContext &BRC, const ExplodedNode *EndPathNode, - BugReport &BR) override; + PathSensitiveBugReport &BR) override; }; @@ -340,32 +394,11 @@ class TagVisitor : public BugReporterVisitor { public: void Profile(llvm::FoldingSetNodeID &ID) const override; - std::shared_ptr VisitNode(const ExplodedNode *N, - BugReporterContext &BRC, - BugReport &R) override; + PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + PathSensitiveBugReport &R) override; }; -namespace bugreporter { - -/// Attempts to add visitors to track expression value back to its point of -/// origin. -/// -/// \param N A node "downstream" from the evaluation of the statement. -/// \param E The expression value which we are tracking -/// \param R The bug report to which visitors should be attached. -/// \param EnableNullFPSuppression Whether we should employ false positive -/// suppression (inlined defensive checks, returned null). -/// -/// \return Whether or not the function was able to add visitors for this -/// statement. Note that returning \c true does not actually imply -/// that any visitors were added. -bool trackExpressionValue(const ExplodedNode *N, const Expr *E, BugReport &R, - bool EnableNullFPSuppression = true); - -const Expr *getDerefExpr(const Stmt *S); - -} // namespace bugreporter - } // namespace ento } // namespace clang diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h b/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h index 324b5312e79..237053df7e4 100644 --- a/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h +++ b/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h @@ -28,8 +28,8 @@ class ExprEngine; class BugType { private: - const CheckName Check; - const std::string Name; + const CheckerNameRef CheckerName; + const std::string Description; const std::string Category; const CheckerBase *Checker; bool SuppressOnSink; @@ -37,28 +37,27 @@ private: virtual void anchor(); public: - BugType(CheckName Check, StringRef Name, StringRef Cat, - bool SuppressOnSink=false) - : Check(Check), Name(Name), Category(Cat), Checker(nullptr), - SuppressOnSink(SuppressOnSink) {} + BugType(CheckerNameRef CheckerName, StringRef Name, StringRef Cat, + bool SuppressOnSink = false) + : CheckerName(CheckerName), Description(Name), Category(Cat), + Checker(nullptr), SuppressOnSink(SuppressOnSink) {} BugType(const CheckerBase *Checker, StringRef Name, StringRef Cat, - bool SuppressOnSink=false) - : Check(Checker->getCheckName()), Name(Name), Category(Cat), - Checker(Checker), SuppressOnSink(SuppressOnSink) {} + bool SuppressOnSink = false) + : CheckerName(Checker->getCheckerName()), Description(Name), + Category(Cat), Checker(Checker), SuppressOnSink(SuppressOnSink) {} virtual ~BugType() = default; - StringRef getName() const { return Name; } + StringRef getDescription() const { return Description; } StringRef getCategory() const { return Category; } - StringRef getCheckName() const { - // FIXME: This is a workaround to ensure that the correct check name is used - // The check names are set after the constructors are run. + StringRef getCheckerName() const { + // FIXME: This is a workaround to ensure that the correct checerk name is + // used. The checker names are set after the constructors are run. // In case the BugType object is initialized in the checker's ctor - // the Check field will be empty. To circumvent this problem we use + // the CheckerName field will be empty. To circumvent this problem we use // CheckerBase whenever it is possible. - StringRef CheckName = - Checker ? Checker->getCheckName().getName() : Check.getName(); - assert(!CheckName.empty() && "Check name is not set properly."); - return CheckName; + StringRef Ret = Checker ? Checker->getCheckerName() : CheckerName; + assert(!Ret.empty() && "Checker name is not set properly."); + return Ret; } /// isSuppressOnSink - Returns true if bug reports associated with this bug @@ -71,8 +70,9 @@ class BuiltinBug : public BugType { const std::string desc; void anchor() override; public: - BuiltinBug(class CheckName check, const char *name, const char *description) - : BugType(check, name, categories::LogicError), desc(description) {} + BuiltinBug(class CheckerNameRef checker, const char *name, + const char *description) + : BugType(checker, name, categories::LogicError), desc(description) {} BuiltinBug(const CheckerBase *checker, const char *name, const char *description) diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h b/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h index 85526eb49f0..22c1a7dd98c 100644 --- a/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h +++ b/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h @@ -18,6 +18,7 @@ namespace clang { extern const char * const MemoryRefCount; extern const char * const MemoryError; extern const char * const UnixAPI; + extern const char * const CXXObjectLifecycle; } } } diff --git a/include/clang/StaticAnalyzer/Core/Checker.h b/include/clang/StaticAnalyzer/Core/Checker.h index d0fe15f8b89..0c7acdbc3a9 100644 --- a/include/clang/StaticAnalyzer/Core/Checker.h +++ b/include/clang/StaticAnalyzer/Core/Checker.h @@ -490,12 +490,12 @@ public: } // end eval namespace class CheckerBase : public ProgramPointTag { - CheckName Name; + CheckerNameRef Name; friend class ::clang::ento::CheckerManager; public: StringRef getTagDescription() const override; - CheckName getCheckName() const; + CheckerNameRef getCheckerName() const; /// See CheckerManager::runCheckersForPrintState. virtual void printState(raw_ostream &Out, ProgramStateRef State, diff --git a/include/clang/StaticAnalyzer/Core/CheckerManager.h b/include/clang/StaticAnalyzer/Core/CheckerManager.h index 6cc4baa1687..38a9aaf72c2 100644 --- a/include/clang/StaticAnalyzer/Core/CheckerManager.h +++ b/include/clang/StaticAnalyzer/Core/CheckerManager.h @@ -90,21 +90,23 @@ enum PointerEscapeKind { PSK_EscapeOther }; -// This wrapper is used to ensure that only StringRefs originating from the -// CheckerRegistry are used as check names. We want to make sure all check -// name strings have a lifetime that keeps them alive at least until the path -// diagnostics have been processed. -class CheckName { +/// This wrapper is used to ensure that only StringRefs originating from the +/// CheckerRegistry are used as check names. We want to make sure all checker +/// name strings have a lifetime that keeps them alive at least until the path +/// diagnostics have been processed, since they are expected to be constexpr +/// string literals (most likely generated by TblGen). +class CheckerNameRef { friend class ::clang::ento::CheckerRegistry; StringRef Name; - explicit CheckName(StringRef Name) : Name(Name) {} + explicit CheckerNameRef(StringRef Name) : Name(Name) {} public: - CheckName() = default; + CheckerNameRef() = default; StringRef getName() const { return Name; } + operator StringRef() const { return Name; } }; enum class ObjCMessageVisitKind { @@ -117,7 +119,7 @@ class CheckerManager { ASTContext &Context; const LangOptions LangOpts; AnalyzerOptions &AOptions; - CheckName CurrentCheckName; + CheckerNameRef CurrentCheckerName; public: CheckerManager(ASTContext &Context, AnalyzerOptions &AOptions) @@ -125,8 +127,8 @@ public: ~CheckerManager(); - void setCurrentCheckName(CheckName name) { CurrentCheckName = name; } - CheckName getCurrentCheckName() const { return CurrentCheckName; } + void setCurrentCheckerName(CheckerNameRef name) { CurrentCheckerName = name; } + CheckerNameRef getCurrentCheckerName() const { return CurrentCheckerName; } bool hasPathSensitiveCheckers() const; @@ -162,7 +164,7 @@ public: assert(!ref && "Checker already registered, use getChecker!"); CHECKER *checker = new CHECKER(std::forward(Args)...); - checker->Name = CurrentCheckName; + checker->Name = CurrentCheckerName; CheckerDtors.push_back(CheckerDtor(checker, destruct)); CHECKER::_register(checker, *this); ref = checker; diff --git a/include/clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h b/include/clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h index ef6e7e0f45d..8601966c91e 100644 --- a/include/clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h +++ b/include/clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h @@ -20,17 +20,19 @@ namespace clang { class AnalyzerOptions; class Preprocessor; +namespace cross_tu { +class CrossTranslationUnitContext; +} namespace ento { class PathDiagnosticConsumer; typedef std::vector PathDiagnosticConsumers; -#define ANALYSIS_DIAGNOSTICS(NAME, CMDFLAG, DESC, CREATEFN)\ -void CREATEFN(AnalyzerOptions &AnalyzerOpts,\ - PathDiagnosticConsumers &C,\ - const std::string &Prefix,\ - const Preprocessor &PP); +#define ANALYSIS_DIAGNOSTICS(NAME, CMDFLAG, DESC, CREATEFN) \ + void CREATEFN(AnalyzerOptions &AnalyzerOpts, PathDiagnosticConsumers &C, \ + const std::string &Prefix, const Preprocessor &PP, \ + const cross_tu::CrossTranslationUnitContext &CTU); #include "clang/StaticAnalyzer/Core/Analyses.def" } // end 'ento' namespace diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h b/include/clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h index b0dda78a00a..d605a6a667f 100644 --- a/include/clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h +++ b/include/clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h @@ -15,9 +15,9 @@ #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_ANALYSISMANAGER_H #include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/PathDiagnostic.h" #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" -#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h" #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h" namespace clang { @@ -32,7 +32,6 @@ class AnalysisManager : public BugReporterData { AnalysisDeclContextManager AnaCtxMgr; ASTContext &Ctx; - DiagnosticsEngine &Diags; const LangOptions &LangOpts; PathDiagnosticConsumers PathConsumers; @@ -45,7 +44,7 @@ class AnalysisManager : public BugReporterData { public: AnalyzerOptions &options; - AnalysisManager(ASTContext &ctx, DiagnosticsEngine &diags, + AnalysisManager(ASTContext &ctx, const PathDiagnosticConsumers &Consumers, StoreManagerCreator storemgr, ConstraintManagerCreator constraintmgr, @@ -84,10 +83,6 @@ public: return getASTContext().getSourceManager(); } - DiagnosticsEngine &getDiagnostic() override { - return Diags; - } - const LangOptions &getLangOpts() const { return LangOpts; } diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h b/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h index db84102983a..fc1cc913882 100644 --- a/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h +++ b/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h @@ -347,7 +347,7 @@ public: ProgramStateRef invalidateRegions(unsigned BlockCount, ProgramStateRef Orig = nullptr) const; - using FrameBindingTy = std::pair; + using FrameBindingTy = std::pair; using BindingsTy = SmallVectorImpl; /// Populates the given SmallVector with the bindings in the callee's stack @@ -386,11 +386,12 @@ public: /// during analysis if the call is inlined, but it may still be useful /// in intermediate calculations even if the call isn't inlined. /// May fail; returns null on failure. - const StackFrameContext *getCalleeStackFrame() const; + const StackFrameContext *getCalleeStackFrame(unsigned BlockCount) const; /// Returns memory location for a parameter variable within the callee stack /// frame. May fail; returns null on failure. - const VarRegion *getParameterLocation(unsigned Index) const; + const VarRegion *getParameterLocation(unsigned Index, + unsigned BlockCount) const; /// Returns true if on the current path, the argument was constructed by /// calling a C++ constructor over it. This is an internal detail of the @@ -1063,8 +1064,19 @@ class CallDescription { // e.g. "{a, b}" represent the qualified names, like "a::b". std::vector QualifiedName; Optional RequiredArgs; + Optional RequiredParams; int Flags; + // A constructor helper. + static Optional readRequiredParams(Optional RequiredArgs, + Optional RequiredParams) { + if (RequiredParams) + return RequiredParams; + if (RequiredArgs) + return static_cast(*RequiredArgs); + return None; + } + public: /// Constructs a CallDescription object. /// @@ -1077,14 +1089,17 @@ public: /// call. Omit this parameter to match every occurrence of call with a given /// name regardless the number of arguments. CallDescription(int Flags, ArrayRef QualifiedName, - Optional RequiredArgs = None) + Optional RequiredArgs = None, + Optional RequiredParams = None) : QualifiedName(QualifiedName), RequiredArgs(RequiredArgs), + RequiredParams(readRequiredParams(RequiredArgs, RequiredParams)), Flags(Flags) {} /// Construct a CallDescription with default flags. CallDescription(ArrayRef QualifiedName, - Optional RequiredArgs = None) - : CallDescription(0, QualifiedName, RequiredArgs) {} + Optional RequiredArgs = None, + Optional RequiredParams = None) + : CallDescription(0, QualifiedName, RequiredArgs, RequiredParams) {} /// Get the name of the function that this object matches. StringRef getFunctionName() const { return QualifiedName.back(); } diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h b/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h index 981133e6697..7f4df0d88de 100644 --- a/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h +++ b/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h @@ -103,7 +103,7 @@ public: return Eng.getBugReporter(); } - SourceManager &getSourceManager() { + const SourceManager &getSourceManager() { return getBugReporter().getSourceManager(); } @@ -234,7 +234,7 @@ public: } /// A shorthand version of getNoteTag that doesn't require you to accept - /// the BugReporterContext arguments when you don't need it. + /// the 'BugReporterContext' argument when you don't need it. /// /// @param Cb Callback only with 'BugReport &' parameter. /// @param IsPrunable Whether the note is prunable. It allows BugReporter @@ -247,6 +247,19 @@ public: IsPrunable); } + /// A shorthand version of getNoteTag that doesn't require you to accept + /// the arguments when you don't need it. + /// + /// @param Cb Callback without parameters. + /// @param IsPrunable Whether the note is prunable. It allows BugReporter + /// to omit the note from the report if it would make the displayed + /// bug path significantly shorter. + const NoteTag *getNoteTag(std::function &&Cb, + bool IsPrunable = false) { + return getNoteTag([Cb](BugReporterContext &, BugReport &) { return Cb(); }, + IsPrunable); + } + /// A shorthand version of getNoteTag that accepts a plain note. /// /// @param Note The note. diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicCastInfo.h b/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicCastInfo.h new file mode 100644 index 00000000000..f5a710c77a6 --- /dev/null +++ b/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicCastInfo.h @@ -0,0 +1,55 @@ +//===- DynamicCastInfo.h - Runtime cast information -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_DYNAMICCASTINFO_H +#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_DYNAMICCASTINFO_H + +#include "clang/AST/Type.h" + +namespace clang { +namespace ento { + +class DynamicCastInfo { +public: + enum CastResult { Success, Failure }; + + DynamicCastInfo(QualType from, QualType to, CastResult resultKind) + : From(from), To(to), ResultKind(resultKind) {} + + QualType from() const { return From; } + QualType to() const { return To; } + + bool equals(QualType from, QualType to) const { + return From == from && To == to; + } + + bool succeeds() const { return ResultKind == CastResult::Success; } + bool fails() const { return ResultKind == CastResult::Failure; } + + bool operator==(const DynamicCastInfo &RHS) const { + return From == RHS.From && To == RHS.To; + } + bool operator<(const DynamicCastInfo &RHS) const { + return From < RHS.From && To < RHS.To; + } + + void Profile(llvm::FoldingSetNodeID &ID) const { + ID.Add(From); + ID.Add(To); + ID.AddInteger(ResultKind); + } + +private: + QualType From, To; + CastResult ResultKind; +}; + +} // namespace ento +} // namespace clang + +#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_DYNAMICCASTINFO_H diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h b/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h new file mode 100644 index 00000000000..356401d7756 --- /dev/null +++ b/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h @@ -0,0 +1,73 @@ +//===- DynamicType.h - Dynamic type related APIs ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines APIs that track and query dynamic type information. This +// information can be used to devirtualize calls during the symbolic execution +// or do type checking. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_DYNAMICTYPE_H +#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_DYNAMICTYPE_H + +#include "clang/AST/Type.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicCastInfo.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeInfo.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/Optional.h" +#include + +namespace clang { +namespace ento { + +/// Get dynamic type information for the region \p MR. +DynamicTypeInfo getDynamicTypeInfo(ProgramStateRef State, const MemRegion *MR); + +/// Get raw dynamic type information for the region \p MR. +const DynamicTypeInfo *getRawDynamicTypeInfo(ProgramStateRef State, + const MemRegion *MR); + +/// Get dynamic cast information from \p CastFromTy to \p CastToTy of \p MR. +const DynamicCastInfo *getDynamicCastInfo(ProgramStateRef State, + const MemRegion *MR, + QualType CastFromTy, + QualType CastToTy); + +/// Set dynamic type information of the region; return the new state. +ProgramStateRef setDynamicTypeInfo(ProgramStateRef State, const MemRegion *MR, + DynamicTypeInfo NewTy); + +/// Set dynamic type information of the region; return the new state. +ProgramStateRef setDynamicTypeInfo(ProgramStateRef State, const MemRegion *MR, + QualType NewTy, bool CanBeSubClassed = true); + +/// Set dynamic type and cast information of the region; return the new state. +ProgramStateRef setDynamicTypeAndCastInfo(ProgramStateRef State, + const MemRegion *MR, + QualType CastFromTy, + QualType CastToTy, + bool IsCastSucceeds); + +/// Removes the dead type informations from \p State. +ProgramStateRef removeDeadTypes(ProgramStateRef State, SymbolReaper &SR); + +/// Removes the dead cast informations from \p State. +ProgramStateRef removeDeadCasts(ProgramStateRef State, SymbolReaper &SR); + +void printDynamicTypeInfoJson(raw_ostream &Out, ProgramStateRef State, + const char *NL = "\n", unsigned int Space = 0, + bool IsDot = false); + +} // namespace ento +} // namespace clang + +#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_DYNAMICTYPE_H diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeInfo.h b/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeInfo.h index 9bb1e213756..6262c4a1ce3 100644 --- a/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeInfo.h +++ b/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeInfo.h @@ -1,10 +1,11 @@ -//== DynamicTypeInfo.h - Runtime type information ----------------*- C++ -*--=// +//===- DynamicTypeInfo.h - Runtime type information -------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_DYNAMICTYPEINFO_H #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_DYNAMICTYPEINFO_H @@ -16,36 +17,37 @@ namespace ento { /// Stores the currently inferred strictest bound on the runtime type /// of a region in a given state along the analysis path. class DynamicTypeInfo { -private: - QualType T; - bool CanBeASubClass; - public: + DynamicTypeInfo() : DynTy(QualType()) {} - DynamicTypeInfo() : T(QualType()) {} - DynamicTypeInfo(QualType WithType, bool CanBeSub = true) - : T(WithType), CanBeASubClass(CanBeSub) {} + DynamicTypeInfo(QualType Ty, bool CanBeSub = true) + : DynTy(Ty), CanBeASubClass(CanBeSub) {} - /// Return false if no dynamic type info is available. - bool isValid() const { return !T.isNull(); } - - /// Returns the currently inferred upper bound on the runtime type. - QualType getType() const { return T; } - - /// Returns false if the type information is precise (the type T is + /// Returns false if the type information is precise (the type 'DynTy' is /// the only type in the lattice), true otherwise. bool canBeASubClass() const { return CanBeASubClass; } + /// Returns true if the dynamic type info is available. + bool isValid() const { return !DynTy.isNull(); } + + /// Returns the currently inferred upper bound on the runtime type. + QualType getType() const { return DynTy; } + + bool operator==(const DynamicTypeInfo &RHS) const { + return DynTy == RHS.DynTy && CanBeASubClass == RHS.CanBeASubClass; + } + void Profile(llvm::FoldingSetNodeID &ID) const { - ID.Add(T); - ID.AddInteger((unsigned)CanBeASubClass); - } - bool operator==(const DynamicTypeInfo &X) const { - return T == X.T && CanBeASubClass == X.CanBeASubClass; + ID.Add(DynTy); + ID.AddBoolean(CanBeASubClass); } + +private: + QualType DynTy; + bool CanBeASubClass; }; -} // end ento -} // end clang +} // namespace ento +} // namespace clang -#endif +#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_DYNAMICTYPEINFO_H diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h b/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h deleted file mode 100644 index a84b2487206..00000000000 --- a/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h +++ /dev/null @@ -1,63 +0,0 @@ -//===- DynamicTypeMap.h - Dynamic type map ----------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file provides APIs for tracking dynamic type information. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_DYNAMICTYPEMAP_H -#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_DYNAMICTYPEMAP_H - -#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeInfo.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" -#include "llvm/ADT/ImmutableMap.h" -#include "clang/AST/Type.h" - -namespace clang { -namespace ento { - -class MemRegion; - -/// The GDM component containing the dynamic type info. This is a map from a -/// symbol to its most likely type. -struct DynamicTypeMap {}; - -using DynamicTypeMapTy = llvm::ImmutableMap; - -template <> -struct ProgramStateTrait - : public ProgramStatePartialTrait { - static void *GDMIndex(); -}; - -/// Get dynamic type information for a region. -DynamicTypeInfo getDynamicTypeInfo(ProgramStateRef State, - const MemRegion *Reg); - -/// Set dynamic type information of the region; return the new state. -ProgramStateRef setDynamicTypeInfo(ProgramStateRef State, const MemRegion *Reg, - DynamicTypeInfo NewTy); - -/// Set dynamic type information of the region; return the new state. -inline ProgramStateRef setDynamicTypeInfo(ProgramStateRef State, - const MemRegion *Reg, QualType NewTy, - bool CanBeSubClassed = true) { - return setDynamicTypeInfo(State, Reg, - DynamicTypeInfo(NewTy, CanBeSubClassed)); -} - -void printDynamicTypeInfoJson(raw_ostream &Out, ProgramStateRef State, - const char *NL = "\n", unsigned int Space = 0, - bool IsDot = false); - -} // namespace ento -} // namespace clang - -#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_DYNAMICTYPEMAP_H diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h b/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h index 727d04cba27..e87772c04b9 100644 --- a/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h +++ b/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h @@ -131,10 +131,12 @@ class ExplodedNode : public llvm::FoldingSetNode { /// Succs - The successors of this node. NodeGroup Succs; + int64_t Id; + public: explicit ExplodedNode(const ProgramPoint &loc, ProgramStateRef state, - bool IsSink) - : Location(loc), State(std::move(state)), Succs(IsSink) { + int64_t Id, bool IsSink) + : Location(loc), State(std::move(state)), Succs(IsSink), Id(Id) { assert(isSink() == IsSink); } @@ -153,7 +155,11 @@ public: CFG &getCFG() const { return *getLocationContext()->getCFG(); } - ParentMap &getParentMap() const {return getLocationContext()->getParentMap();} + const CFGBlock *getCFGBlock() const; + + const ParentMap &getParentMap() const { + return getLocationContext()->getParentMap(); + } template T &getAnalysis() const { @@ -219,12 +225,20 @@ public: // Iterators over successor and predecessor vertices. using succ_iterator = ExplodedNode * const *; + using succ_range = llvm::iterator_range; + using const_succ_iterator = const ExplodedNode * const *; + using const_succ_range = llvm::iterator_range; + using pred_iterator = ExplodedNode * const *; + using pred_range = llvm::iterator_range; + using const_pred_iterator = const ExplodedNode * const *; + using const_pred_range = llvm::iterator_range; pred_iterator pred_begin() { return Preds.begin(); } pred_iterator pred_end() { return Preds.end(); } + pred_range preds() { return {Preds.begin(), Preds.end()}; } const_pred_iterator pred_begin() const { return const_cast(this)->pred_begin(); @@ -232,9 +246,11 @@ public: const_pred_iterator pred_end() const { return const_cast(this)->pred_end(); } + const_pred_range preds() const { return {Preds.begin(), Preds.end()}; } succ_iterator succ_begin() { return Succs.begin(); } succ_iterator succ_end() { return Succs.end(); } + succ_range succs() { return {Succs.begin(), Succs.end()}; } const_succ_iterator succ_begin() const { return const_cast(this)->succ_begin(); @@ -242,8 +258,9 @@ public: const_succ_iterator succ_end() const { return const_cast(this)->succ_end(); } + const_succ_range succs() const { return {Succs.begin(), Succs.end()}; } - int64_t getID(ExplodedGraph *G) const; + int64_t getID() const { return Id; } /// The node is trivial if it has only one successor, only one predecessor, /// it's predecessor has only one successor, @@ -252,6 +269,30 @@ public: /// Trivial nodes may be skipped while printing exploded graph. bool isTrivial() const; + /// If the node's program point corresponds to a statement, retrieve that + /// statement. Useful for figuring out where to put a warning or a note. + /// If the statement belongs to a body-farmed definition, + /// retrieve the call site for that definition. + const Stmt *getStmtForDiagnostics() const; + + /// Find the next statement that was executed on this node's execution path. + /// Useful for explaining control flow that follows the current node. + /// If the statement belongs to a body-farmed definition, retrieve the + /// call site for that definition. + const Stmt *getNextStmtForDiagnostics() const; + + /// Find the statement that was executed immediately before this node. + /// Useful when the node corresponds to a CFG block entrance. + /// If the statement belongs to a body-farmed definition, retrieve the + /// call site for that definition. + const Stmt *getPreviousStmtForDiagnostics() const; + + /// Find the statement that was executed at or immediately before this node. + /// Useful when any nearby statement will do. + /// If the statement belongs to a body-farmed definition, retrieve the + /// call site for that definition. + const Stmt *getCurrentOrPreviousStmtForDiagnostics() const; + private: void replaceSuccessor(ExplodedNode *node) { Succs.replaceNode(node); } void replacePredecessor(ExplodedNode *node) { Preds.replaceNode(node); } @@ -285,7 +326,7 @@ protected: BumpVectorContext BVC; /// NumNodes - The number of nodes in the graph. - unsigned NumNodes = 0; + int64_t NumNodes = 0; /// A list of recently allocated nodes that can potentially be recycled. NodeVector ChangedNodes; @@ -319,10 +360,11 @@ public: /// ExplodedGraph for further processing. ExplodedNode *createUncachedNode(const ProgramPoint &L, ProgramStateRef State, + int64_t Id, bool IsSink = false); std::unique_ptr MakeEmptyGraph() const { - return llvm::make_unique(); + return std::make_unique(); } /// addRoot - Add an untyped node to the set of roots. diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h index 2629d7121de..2d0967616ff 100644 --- a/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h +++ b/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h @@ -145,9 +145,9 @@ private: ObjCNoReturn ObjCNoRet; /// The BugReporter associated with this engine. It is important that - /// this object be placed at the very end of member variables so that its - /// destructor is called before the rest of the ExprEngine is destroyed. - GRBugReporter BR; + /// this object be placed at the very end of member variables so that its + /// destructor is called before the rest of the ExprEngine is destroyed. + PathSensitiveBugReporter BR; /// The functions which have been analyzed through inlining. This is owned by /// AnalysisConsumer. It can be null. @@ -530,7 +530,7 @@ public: void VisitCXXDestructor(QualType ObjectType, const MemRegion *Dest, const Stmt *S, bool IsBaseDtor, ExplodedNode *Pred, ExplodedNodeSet &Dst, - const EvalCallOptions &Options); + EvalCallOptions &Options); void VisitCXXNewAllocatorCall(const CXXNewExpr *CNE, ExplodedNode *Pred, @@ -666,7 +666,7 @@ public: const LocationContext *LCtx, ProgramStateRef State); - /// Evaluate a call, running pre- and post-call checks and allowing checkers + /// Evaluate a call, running pre- and post-call checkers and allowing checkers /// to be responsible for handling the evaluation of the call itself. void evalCall(ExplodedNodeSet &Dst, ExplodedNode *Pred, const CallEvent &Call); diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h b/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h index 071e35085a5..71cbbe28fc2 100644 --- a/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h +++ b/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h @@ -169,6 +169,7 @@ public: Kind getKind() const { return kind; } template const RegionTy* getAs() const; + template const RegionTy* castAs() const; virtual bool isBoundable() const { return false; } @@ -1231,6 +1232,11 @@ const RegionTy* MemRegion::getAs() const { return nullptr; } +template +const RegionTy* MemRegion::castAs() const { + return cast(this); +} + //===----------------------------------------------------------------------===// // MemRegionManager - Factory object for creating regions. //===----------------------------------------------------------------------===// diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h b/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h index d38058f9af5..07920790c80 100644 --- a/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h +++ b/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h @@ -507,6 +507,10 @@ public: return *svalBuilder; } + const SValBuilder &getSValBuilder() const { + return *svalBuilder; + } + SymbolManager &getSymbolManager() { return svalBuilder->getSymbolManager(); } diff --git a/include/clang/Tooling/ASTDiff/ASTDiff.h b/include/clang/Tooling/ASTDiff/ASTDiff.h index d6cbc09dced..c1cc124e1e9 100644 --- a/include/clang/Tooling/ASTDiff/ASTDiff.h +++ b/include/clang/Tooling/ASTDiff/ASTDiff.h @@ -71,7 +71,7 @@ public: /// Constructs a tree from any AST node. template SyntaxTree(T *Node, ASTContext &AST) - : TreeImpl(llvm::make_unique(this, Node, AST)) {} + : TreeImpl(std::make_unique(this, Node, AST)) {} SyntaxTree(SyntaxTree &&Other) = default; ~SyntaxTree(); diff --git a/include/clang/Tooling/AllTUsExecution.h b/include/clang/Tooling/AllTUsExecution.h index e670f54234a..1e618b5ba2f 100644 --- a/include/clang/Tooling/AllTUsExecution.h +++ b/include/clang/Tooling/AllTUsExecution.h @@ -44,8 +44,6 @@ public: StringRef getExecutorName() const override { return ExecutorName; } - bool isSingleProcess() const override { return true; } - using ToolExecutor::execute; llvm::Error @@ -71,6 +69,7 @@ private: unsigned ThreadCount; }; +extern llvm::cl::opt ExecutorConcurrency; extern llvm::cl::opt Filter; } // end namespace tooling diff --git a/include/clang/Tooling/ArgumentsAdjusters.h b/include/clang/Tooling/ArgumentsAdjusters.h index bf088603432..c48a8725aae 100644 --- a/include/clang/Tooling/ArgumentsAdjusters.h +++ b/include/clang/Tooling/ArgumentsAdjusters.h @@ -43,6 +43,10 @@ ArgumentsAdjuster getClangSyntaxOnlyAdjuster(); /// arguments. ArgumentsAdjuster getClangStripOutputAdjuster(); +/// Gets an argument adjuster which removes command line arguments related to +/// diagnostic serialization. +ArgumentsAdjuster getClangStripSerializeDiagnosticAdjuster(); + /// Gets an argument adjuster which removes dependency-file /// related command line arguments. ArgumentsAdjuster getClangStripDependencyFileAdjuster(); diff --git a/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h new file mode 100644 index 00000000000..7d088134347 --- /dev/null +++ b/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h @@ -0,0 +1,188 @@ +//===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_FILESYSTEM_H +#define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_FILESYSTEM_H + +#include "clang/Basic/LLVM.h" +#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/VirtualFileSystem.h" +#include + +namespace clang { +namespace tooling { +namespace dependencies { + +/// An in-memory representation of a file system entity that is of interest to +/// the dependency scanning filesystem. +/// +/// It represents one of the following: +/// - an opened source file with minimized contents and a stat value. +/// - an opened source file with original contents and a stat value. +/// - a directory entry with its stat value. +/// - an error value to represent a file system error. +/// - a placeholder with an invalid stat indicating a not yet initialized entry. +class CachedFileSystemEntry { +public: + /// Default constructor creates an entry with an invalid stat. + CachedFileSystemEntry() : MaybeStat(llvm::vfs::Status()) {} + + CachedFileSystemEntry(std::error_code Error) : MaybeStat(std::move(Error)) {} + + /// Create an entry that represents an opened source file with minimized or + /// original contents. + /// + /// The filesystem opens the file even for `stat` calls open to avoid the + /// issues with stat + open of minimized files that might lead to a + /// mismatching size of the file. If file is not minimized, the full file is + /// read and copied into memory to ensure that it's not memory mapped to avoid + /// running out of file descriptors. + static CachedFileSystemEntry createFileEntry(StringRef Filename, + llvm::vfs::FileSystem &FS, + bool Minimize = true); + + /// Create an entry that represents a directory on the filesystem. + static CachedFileSystemEntry createDirectoryEntry(llvm::vfs::Status &&Stat); + + /// \returns True if the entry is valid. + bool isValid() const { return !MaybeStat || MaybeStat->isStatusKnown(); } + + /// \returns True if the current entry points to a directory. + bool isDirectory() const { return MaybeStat && MaybeStat->isDirectory(); } + + /// \returns The error or the file's contents. + llvm::ErrorOr getContents() const { + if (!MaybeStat) + return MaybeStat.getError(); + assert(!MaybeStat->isDirectory() && "not a file"); + assert(isValid() && "not initialized"); + return StringRef(Contents); + } + + /// \returns The error or the status of the entry. + llvm::ErrorOr getStatus() const { + assert(isValid() && "not initialized"); + return MaybeStat; + } + + /// \returns the name of the file. + StringRef getName() const { + assert(isValid() && "not initialized"); + return MaybeStat->getName(); + } + + /// Return the mapping between location -> distance that is used to speed up + /// the block skipping in the preprocessor. + const PreprocessorSkippedRangeMapping &getPPSkippedRangeMapping() const { + return PPSkippedRangeMapping; + } + + CachedFileSystemEntry(CachedFileSystemEntry &&) = default; + CachedFileSystemEntry &operator=(CachedFileSystemEntry &&) = default; + + CachedFileSystemEntry(const CachedFileSystemEntry &) = delete; + CachedFileSystemEntry &operator=(const CachedFileSystemEntry &) = delete; + +private: + llvm::ErrorOr MaybeStat; + // Store the contents in a small string to allow a + // move from the small string for the minimized contents. + // Note: small size of 1 allows us to store an empty string with an implicit + // null terminator without any allocations. + llvm::SmallString<1> Contents; + PreprocessorSkippedRangeMapping PPSkippedRangeMapping; +}; + +/// This class is a shared cache, that caches the 'stat' and 'open' calls to the +/// underlying real file system. +/// +/// It is sharded based on the hash of the key to reduce the lock contention for +/// the worker threads. +class DependencyScanningFilesystemSharedCache { +public: + struct SharedFileSystemEntry { + std::mutex ValueLock; + CachedFileSystemEntry Value; + }; + + DependencyScanningFilesystemSharedCache(); + + /// Returns a cache entry for the corresponding key. + /// + /// A new cache entry is created if the key is not in the cache. This is a + /// thread safe call. + SharedFileSystemEntry &get(StringRef Key); + +private: + struct CacheShard { + std::mutex CacheLock; + llvm::StringMap Cache; + }; + std::unique_ptr CacheShards; + unsigned NumShards; +}; + +/// A virtual file system optimized for the dependency discovery. +/// +/// It is primarily designed to work with source files whose contents was was +/// preprocessed to remove any tokens that are unlikely to affect the dependency +/// computation. +/// +/// This is not a thread safe VFS. A single instance is meant to be used only in +/// one thread. Multiple instances are allowed to service multiple threads +/// running in parallel. +class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { +public: + DependencyScanningWorkerFilesystem( + DependencyScanningFilesystemSharedCache &SharedCache, + IntrusiveRefCntPtr FS, + ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings) + : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache), + PPSkipMappings(PPSkipMappings) {} + + llvm::ErrorOr status(const Twine &Path) override; + llvm::ErrorOr> + openFileForRead(const Twine &Path) override; + + /// The set of files that should not be minimized. + llvm::StringSet<> IgnoredFiles; + +private: + void setCachedEntry(StringRef Filename, const CachedFileSystemEntry *Entry) { + bool IsInserted = Cache.try_emplace(Filename, Entry).second; + (void)IsInserted; + assert(IsInserted && "local cache is updated more than once"); + } + + const CachedFileSystemEntry *getCachedEntry(StringRef Filename) { + auto It = Cache.find(Filename); + return It == Cache.end() ? nullptr : It->getValue(); + } + + llvm::ErrorOr + getOrCreateFileSystemEntry(const StringRef Filename); + + DependencyScanningFilesystemSharedCache &SharedCache; + /// The local cache is used by the worker thread to cache file system queries + /// locally instead of querying the global cache every time. + llvm::StringMap Cache; + /// The optional mapping structure which records information about the + /// excluded conditional directive skip mappings that are used by the + /// currently active preprocessor. + ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings; +}; + +} // end namespace dependencies +} // end namespace tooling +} // end namespace clang + +#endif // LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_FILESYSTEM_H diff --git a/include/clang/Tooling/DependencyScanning/DependencyScanningService.h b/include/clang/Tooling/DependencyScanning/DependencyScanningService.h new file mode 100644 index 00000000000..fd8ed80b143 --- /dev/null +++ b/include/clang/Tooling/DependencyScanning/DependencyScanningService.h @@ -0,0 +1,65 @@ +//===- DependencyScanningService.h - clang-scan-deps service ===-*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_SERVICE_H +#define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_SERVICE_H + +#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h" + +namespace clang { +namespace tooling { +namespace dependencies { + +/// The mode in which the dependency scanner will operate to find the +/// dependencies. +enum class ScanningMode { + /// This mode is used to compute the dependencies by running the preprocessor + /// over + /// the unmodified source files. + CanonicalPreprocessing, + + /// This mode is used to compute the dependencies by running the preprocessor + /// over + /// the source files that have been minimized to contents that might affect + /// the dependencies. + MinimizedSourcePreprocessing +}; + +/// The dependency scanning service contains the shared state that is used by +/// the invidual dependency scanning workers. +class DependencyScanningService { +public: + DependencyScanningService(ScanningMode Mode, bool ReuseFileManager = true, + bool SkipExcludedPPRanges = true); + + ScanningMode getMode() const { return Mode; } + + bool canReuseFileManager() const { return ReuseFileManager; } + + bool canSkipExcludedPPRanges() const { return SkipExcludedPPRanges; } + + DependencyScanningFilesystemSharedCache &getSharedCache() { + return SharedCache; + } + +private: + const ScanningMode Mode; + const bool ReuseFileManager; + /// Set to true to use the preprocessor optimization that skips excluded PP + /// ranges by bumping the buffer pointer in the lexer instead of lexing the + /// tokens in the range until reaching the corresponding directive. + const bool SkipExcludedPPRanges; + /// The global file system cache. + DependencyScanningFilesystemSharedCache SharedCache; +}; + +} // end namespace dependencies +} // end namespace tooling +} // end namespace clang + +#endif // LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_SERVICE_H diff --git a/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h b/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h new file mode 100644 index 00000000000..0c9efccb1d8 --- /dev/null +++ b/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h @@ -0,0 +1,48 @@ +//===- DependencyScanningTool.h - clang-scan-deps service ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_TOOL_H +#define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_TOOL_H + +#include "clang/Tooling/DependencyScanning/DependencyScanningService.h" +#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" +#include "clang/Tooling/JSONCompilationDatabase.h" +#include + +namespace clang{ +namespace tooling{ +namespace dependencies{ + +/// The high-level implementation of the dependency discovery tool that runs on +/// an individual worker thread. +class DependencyScanningTool { +public: + /// Construct a dependency scanning tool. + /// + /// \param Compilations The reference to the compilation database that's + /// used by the clang tool. + DependencyScanningTool(DependencyScanningService &Service, const clang::tooling::CompilationDatabase &Compilations); + + /// Print out the dependency information into a string using the dependency + /// file format that is specified in the options (-MD is the default) and + /// return it. + /// + /// \returns A \c StringError with the diagnostic output if clang errors + /// occurred, dependency file contents otherwise. + llvm::Expected getDependencyFile(const std::string &Input, StringRef CWD); + +private: + DependencyScanningWorker Worker; + const tooling::CompilationDatabase &Compilations; +}; + +} // end namespace dependencies +} // end namespace tooling +} // end namespace clang + +#endif // LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_TOOL_H diff --git a/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h index 3ea261a30d0..45c9fb4f029 100644 --- a/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h +++ b/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h @@ -10,17 +10,35 @@ #define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_WORKER_H #include "clang/Basic/DiagnosticOptions.h" +#include "clang/Basic/FileManager.h" #include "clang/Basic/LLVM.h" #include "clang/Frontend/PCHContainerOperations.h" +#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include namespace clang { + +class DependencyOutputOptions; + namespace tooling { namespace dependencies { +class DependencyScanningService; +class DependencyScanningWorkerFilesystem; + +class DependencyConsumer { +public: + virtual ~DependencyConsumer() {} + + virtual void handleFileDependency(const DependencyOutputOptions &Opts, + StringRef Filename) = 0; + + // FIXME: Add support for reporting modular dependencies. +}; + /// An individual dependency scanning worker that is able to run on its own /// thread. /// @@ -29,26 +47,32 @@ namespace dependencies { /// using the regular processing run. class DependencyScanningWorker { public: - DependencyScanningWorker(); + DependencyScanningWorker(DependencyScanningService &Service); - /// Print out the dependency information into a string using the dependency - /// file format that is specified in the options (-MD is the default) and - /// return it. + /// Run the dependency scanning tool for a given clang driver invocation (as + /// specified for the given Input in the CDB), and report the discovered + /// dependencies to the provided consumer. /// /// \returns A \c StringError with the diagnostic output if clang errors - /// occurred, dependency file contents otherwise. - llvm::Expected getDependencyFile(const std::string &Input, - StringRef WorkingDirectory, - const CompilationDatabase &CDB); + /// occurred, success otherwise. + llvm::Error computeDependencies(const std::string &Input, + StringRef WorkingDirectory, + const CompilationDatabase &CDB, + DependencyConsumer &Consumer); private: IntrusiveRefCntPtr DiagOpts; std::shared_ptr PCHContainerOps; + std::unique_ptr PPSkipMappings; + llvm::IntrusiveRefCntPtr RealFS; /// The file system that is used by each worker when scanning for /// dependencies. This filesystem persists accross multiple compiler /// invocations. - llvm::IntrusiveRefCntPtr WorkerFS; + llvm::IntrusiveRefCntPtr DepFS; + /// The file manager that is reused accross multiple invocations by this + /// worker. If null, the file manager will not be reused. + llvm::IntrusiveRefCntPtr Files; }; } // end namespace dependencies diff --git a/include/clang/Tooling/Execution.h b/include/clang/Tooling/Execution.h index 74f0df5a5b9..ca6f22c5da3 100644 --- a/include/clang/Tooling/Execution.h +++ b/include/clang/Tooling/Execution.h @@ -115,13 +115,6 @@ public: /// Returns the name of a specific executor. virtual StringRef getExecutorName() const = 0; - /// Should return true iff executor runs all actions in a single process. - /// Clients can use this signal to find out if they can collect results - /// in-memory (e.g. to avoid serialization costs of using ToolResults). - /// The single-process executors can still run multiple threads, but all - /// executions are guaranteed to share the same memory. - virtual bool isSingleProcess() const = 0; - /// Executes each action with a corresponding arguments adjuster. virtual llvm::Error execute(llvm::ArrayRef< diff --git a/include/clang/Tooling/Inclusions/HeaderIncludes.h b/include/clang/Tooling/Inclusions/HeaderIncludes.h index ec6f0ea45ff..6e6d6d8fb02 100644 --- a/include/clang/Tooling/Inclusions/HeaderIncludes.h +++ b/include/clang/Tooling/Inclusions/HeaderIncludes.h @@ -32,6 +32,7 @@ public: /// 0. Otherwise, returns the priority of the matching category or INT_MAX. /// NOTE: this API is not thread-safe! int getIncludePriority(StringRef IncludeName, bool CheckMainHeader) const; + int getSortIncludePriority(StringRef IncludeName, bool CheckMainHeader) const; private: bool isMainHeader(StringRef IncludeName) const; diff --git a/include/clang/Tooling/Inclusions/IncludeStyle.h b/include/clang/Tooling/Inclusions/IncludeStyle.h index a0f236e6fc4..266763a5b1b 100644 --- a/include/clang/Tooling/Inclusions/IncludeStyle.h +++ b/include/clang/Tooling/Inclusions/IncludeStyle.h @@ -58,6 +58,8 @@ struct IncludeStyle { std::string Regex; /// The priority to assign to this category. int Priority; + /// The custom priority to sort before grouping. + int SortPriority; bool operator==(const IncludeCategory &Other) const { return Regex == Other.Regex && Priority == Other.Priority; } diff --git a/lib/Tooling/Refactoring/Extract/SourceExtraction.h b/include/clang/Tooling/Refactoring/Extract/SourceExtraction.h old mode 100644 new mode 100755 similarity index 88% rename from lib/Tooling/Refactoring/Extract/SourceExtraction.h rename to include/clang/Tooling/Refactoring/Extract/SourceExtraction.h index 545eb6c1a11..034a0aaaf6d --- a/lib/Tooling/Refactoring/Extract/SourceExtraction.h +++ b/include/clang/Tooling/Refactoring/Extract/SourceExtraction.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_LIB_TOOLING_REFACTORING_EXTRACT_SOURCE_EXTRACTION_H -#define LLVM_CLANG_LIB_TOOLING_REFACTORING_EXTRACT_SOURCE_EXTRACTION_H +#ifndef LLVM_CLANG_TOOLING_REFACTORING_EXTRACT_SOURCE_EXTRACTION_H +#define LLVM_CLANG_TOOLING_REFACTORING_EXTRACT_SOURCE_EXTRACTION_H #include "clang/Basic/LLVM.h" @@ -48,4 +48,4 @@ private: } // end namespace tooling } // end namespace clang -#endif // LLVM_CLANG_LIB_TOOLING_REFACTORING_EXTRACT_SOURCE_EXTRACTION_H +#endif //LLVM_CLANG_TOOLING_REFACTORING_EXTRACT_SOURCE_EXTRACTION_H diff --git a/include/clang/Tooling/Refactoring/RecursiveSymbolVisitor.h b/include/clang/Tooling/Refactoring/RecursiveSymbolVisitor.h index 41a448f035a..c0f995d85c1 100644 --- a/include/clang/Tooling/Refactoring/RecursiveSymbolVisitor.h +++ b/include/clang/Tooling/Refactoring/RecursiveSymbolVisitor.h @@ -98,7 +98,17 @@ public: TypeBeginLoc, TypeEndLoc)) return false; } - return visit(Loc.getType()->getAsCXXRecordDecl(), TypeBeginLoc, TypeEndLoc); + if (const Type *TP = Loc.getTypePtr()) { + if (TP->getTypeClass() == clang::Type::Record) + return visit(TP->getAsCXXRecordDecl(), TypeBeginLoc, TypeEndLoc); + } + return true; + } + + bool VisitTypedefTypeLoc(TypedefTypeLoc TL) { + const SourceLocation TypeEndLoc = + Lexer::getLocForEndOfToken(TL.getBeginLoc(), 0, SM, LangOpts); + return visit(TL.getTypedefNameDecl(), TL.getBeginLoc(), TypeEndLoc); } bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS) { @@ -122,8 +132,7 @@ private: ND, SourceRange(BeginLoc, EndLoc)); } bool visit(const NamedDecl *ND, SourceLocation Loc) { - return visit(ND, Loc, - Loc.getLocWithOffset(ND->getNameAsString().length() - 1)); + return visit(ND, Loc, Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts)); } }; diff --git a/include/clang/Tooling/Refactoring/RefactoringActionRulesInternal.h b/include/clang/Tooling/Refactoring/RefactoringActionRulesInternal.h index cc6ae83202f..fb373fcf502 100644 --- a/include/clang/Tooling/Refactoring/RefactoringActionRulesInternal.h +++ b/include/clang/Tooling/Refactoring/RefactoringActionRulesInternal.h @@ -47,7 +47,7 @@ template void invokeRuleAfterValidatingRequirements( RefactoringResultConsumer &Consumer, RefactoringRuleContext &Context, const std::tuple &Requirements, - llvm::index_sequence) { + std::index_sequence) { // Check if the requirements we're interested in can be evaluated. auto Values = std::make_tuple(std::get(Requirements).evaluate(Context)...); @@ -87,7 +87,7 @@ template void visitRefactoringOptions( RefactoringOptionVisitor &Visitor, const std::tuple &Requirements, - llvm::index_sequence) { + std::index_sequence) { visitRefactoringOptionsImpl(Visitor, std::get(Requirements)...); } @@ -131,7 +131,7 @@ createRefactoringActionRule(const RequirementTypes &... Requirements) { RefactoringRuleContext &Context) override { internal::invokeRuleAfterValidatingRequirements( Consumer, Context, Requirements, - llvm::index_sequence_for()); + std::index_sequence_for()); } bool hasSelectionRequirement() override { @@ -142,13 +142,13 @@ createRefactoringActionRule(const RequirementTypes &... Requirements) { void visitRefactoringOptions(RefactoringOptionVisitor &Visitor) override { internal::visitRefactoringOptions( Visitor, Requirements, - llvm::index_sequence_for()); + std::index_sequence_for()); } private: std::tuple Requirements; }; - return llvm::make_unique(std::make_tuple(Requirements...)); + return std::make_unique(std::make_tuple(Requirements...)); } } // end namespace tooling diff --git a/include/clang/Tooling/StandaloneExecution.h b/include/clang/Tooling/StandaloneExecution.h index 5fbc1e479c5..8db6229acf7 100644 --- a/include/clang/Tooling/StandaloneExecution.h +++ b/include/clang/Tooling/StandaloneExecution.h @@ -52,8 +52,6 @@ public: StringRef getExecutorName() const override { return ExecutorName; } - bool isSingleProcess() const override { return true; } - using ToolExecutor::execute; llvm::Error diff --git a/include/clang/Tooling/Syntax/Tokens.h b/include/clang/Tooling/Syntax/Tokens.h index 4640ccb2d30..301432d3888 100644 --- a/include/clang/Tooling/Syntax/Tokens.h +++ b/include/clang/Tooling/Syntax/Tokens.h @@ -236,6 +236,16 @@ public: /// #pragma, etc. llvm::ArrayRef spelledTokens(FileID FID) const; + /// Get all tokens that expand a macro in \p FID. For the following input + /// #define FOO B + /// #define FOO2(X) int X + /// FOO2(XY) + /// int B; + /// FOO; + /// macroExpansions() returns {"FOO2", "FOO"} (from line 3 and 5 + /// respecitvely). + std::vector macroExpansions(FileID FID) const; + const SourceManager &sourceManager() const { return *SourceMgr; } std::string dumpForTests() const; diff --git a/include/clang/Tooling/Tooling.h b/include/clang/Tooling/Tooling.h index 83fe43ac59e..19421f0a39f 100644 --- a/include/clang/Tooling/Tooling.h +++ b/include/clang/Tooling/Tooling.h @@ -99,9 +99,7 @@ public: DiagnosticConsumer *DiagConsumer) override; /// Returns a new clang::FrontendAction. - /// - /// The caller takes ownership of the returned action. - virtual FrontendAction *create() = 0; + virtual std::unique_ptr create() = 0; }; /// Returns a new FrontendActionFactory for a given type. @@ -156,7 +154,7 @@ inline std::unique_ptr newFrontendActionFactory( /// clang modules. /// /// \return - True if 'ToolAction' was successfully executed. -bool runToolOnCode(FrontendAction *ToolAction, const Twine &Code, +bool runToolOnCode(std::unique_ptr ToolAction, const Twine &Code, const Twine &FileName = "input.cc", std::shared_ptr PCHContainerOps = std::make_shared()); @@ -179,7 +177,7 @@ using FileContentMappings = std::vector>; /// /// \return - True if 'ToolAction' was successfully executed. bool runToolOnCodeWithArgs( - FrontendAction *ToolAction, const Twine &Code, + std::unique_ptr ToolAction, const Twine &Code, const std::vector &Args, const Twine &FileName = "input.cc", const Twine &ToolName = "clang-tool", std::shared_ptr PCHContainerOps = @@ -188,7 +186,7 @@ bool runToolOnCodeWithArgs( // Similar to the overload except this takes a VFS. bool runToolOnCodeWithArgs( - FrontendAction *ToolAction, const Twine &Code, + std::unique_ptr ToolAction, const Twine &Code, llvm::IntrusiveRefCntPtr VFS, const std::vector &Args, const Twine &FileName = "input.cc", const Twine &ToolName = "clang-tool", @@ -237,13 +235,13 @@ public: /// uses its binary name (CommandLine[0]) to locate its builtin headers. /// Callers have to ensure that they are installed in a compatible location /// (see clang driver implementation) or mapped in via mapVirtualFile. - /// \param FAction The action to be executed. Class takes ownership. + /// \param FAction The action to be executed. /// \param Files The FileManager used for the execution. Class does not take /// ownership. /// \param PCHContainerOps The PCHContainerOperations for loading and creating /// clang modules. - ToolInvocation(std::vector CommandLine, FrontendAction *FAction, - FileManager *Files, + ToolInvocation(std::vector CommandLine, + std::unique_ptr FAction, FileManager *Files, std::shared_ptr PCHContainerOps = std::make_shared()); @@ -314,12 +312,15 @@ public: /// clang modules. /// \param BaseFS VFS used for all underlying file accesses when running the /// tool. + /// \param Files The file manager to use for underlying file operations when + /// running the tool. ClangTool(const CompilationDatabase &Compilations, ArrayRef SourcePaths, std::shared_ptr PCHContainerOps = std::make_shared(), IntrusiveRefCntPtr BaseFS = - llvm::vfs::getRealFileSystem()); + llvm::vfs::getRealFileSystem(), + IntrusiveRefCntPtr Files = nullptr); ~ClangTool(); @@ -397,7 +398,9 @@ template std::unique_ptr newFrontendActionFactory() { class SimpleFrontendActionFactory : public FrontendActionFactory { public: - FrontendAction *create() override { return new T; } + std::unique_ptr create() override { + return std::make_unique(); + } }; return std::unique_ptr( @@ -413,8 +416,9 @@ inline std::unique_ptr newFrontendActionFactory( SourceFileCallbacks *Callbacks) : ConsumerFactory(ConsumerFactory), Callbacks(Callbacks) {} - FrontendAction *create() override { - return new ConsumerFactoryAdaptor(ConsumerFactory, Callbacks); + std::unique_ptr create() override { + return std::make_unique(ConsumerFactory, + Callbacks); } private: diff --git a/include/clang/Tooling/Transformer/MatchConsumer.h b/include/clang/Tooling/Transformer/MatchConsumer.h new file mode 100644 index 00000000000..0a1dbe13ea1 --- /dev/null +++ b/include/clang/Tooling/Transformer/MatchConsumer.h @@ -0,0 +1,62 @@ +//===--- MatchConsumer.h - MatchConsumer abstraction ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file This file defines the *MatchConsumer* abstraction: a computation over +/// match results, specifically the `ast_matchers::MatchFinder::MatchResult` +/// class. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_MATCH_CONSUMER_H_ +#define LLVM_CLANG_TOOLING_TRANSFORMER_MATCH_CONSUMER_H_ + +#include "clang/AST/ASTTypeTraits.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" + +namespace clang { +namespace transformer { +/// A failable computation over nodes bound by AST matchers. +/// +/// The computation should report any errors though its return value (rather +/// than terminating the program) to enable usage in interactive scenarios like +/// clang-query. +/// +/// This is a central abstraction of the Transformer framework. +template +using MatchConsumer = + std::function(const ast_matchers::MatchFinder::MatchResult &)>; + +/// Creates an error that signals that a `MatchConsumer` expected a certain node +/// to be bound by AST matchers, but it was not actually bound. +inline llvm::Error notBoundError(llvm::StringRef Id) { + return llvm::make_error(llvm::errc::invalid_argument, + "Id not bound: " + Id); +} + +/// Chooses between the two consumers, based on whether \p ID is bound in the +/// match. +template +MatchConsumer ifBound(std::string ID, MatchConsumer TrueC, + MatchConsumer FalseC) { + return [=](const ast_matchers::MatchFinder::MatchResult &Result) { + auto &Map = Result.Nodes.getMap(); + return (Map.find(ID) != Map.end() ? TrueC : FalseC)(Result); + }; +} +} // namespace transformer + +namespace tooling { +// DEPRECATED: Temporary alias supporting client migration to the `transformer` +// namespace. +using transformer::ifBound; +} // namespace tooling +} // namespace clang +#endif // LLVM_CLANG_TOOLING_TRANSFORMER_MATCH_CONSUMER_H_ diff --git a/include/clang/Tooling/Refactoring/RangeSelector.h b/include/clang/Tooling/Transformer/RangeSelector.h similarity index 80% rename from include/clang/Tooling/Refactoring/RangeSelector.h rename to include/clang/Tooling/Transformer/RangeSelector.h index b117e4d82ad..9f556d20632 100644 --- a/include/clang/Tooling/Refactoring/RangeSelector.h +++ b/include/clang/Tooling/Transformer/RangeSelector.h @@ -17,14 +17,14 @@ #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Tooling/Transformer/MatchConsumer.h" #include "llvm/Support/Error.h" #include #include namespace clang { -namespace tooling { -using RangeSelector = std::function( - const ast_matchers::MatchFinder::MatchResult &)>; +namespace transformer { +using RangeSelector = MatchConsumer; inline RangeSelector charRange(CharSourceRange R) { return [R](const ast_matchers::MatchFinder::MatchResult &) @@ -79,10 +79,34 @@ RangeSelector statements(std::string ID); // (all source between the braces). RangeSelector initListElements(std::string ID); +/// Given an \IfStmt (bound to \p ID), selects the range of the else branch, +/// starting from the \c else keyword. +RangeSelector elseBranch(std::string ID); + /// Selects the range from which `S` was expanded (possibly along with other /// source), if `S` is an expansion, and `S` itself, otherwise. Corresponds to /// `SourceManager::getExpansionRange`. RangeSelector expansion(RangeSelector S); +} // namespace transformer + +namespace tooling { +// DEPRECATED: These are temporary aliases supporting client migration to the +// `transformer` namespace. +using RangeSelector = transformer::RangeSelector; + +using transformer::after; +using transformer::before; +using transformer::callArgs; +using transformer::charRange; +using transformer::elseBranch; +using transformer::expansion; +using transformer::initListElements; +using transformer::member; +using transformer::name; +using transformer::node; +using transformer::range; +using transformer::statement; +using transformer::statements; } // namespace tooling } // namespace clang diff --git a/include/clang/Tooling/Refactoring/Transformer.h b/include/clang/Tooling/Transformer/RewriteRule.h similarity index 82% rename from include/clang/Tooling/Refactoring/Transformer.h rename to include/clang/Tooling/Transformer/RewriteRule.h index 6d9c5a37cc1..6e99151c1c7 100644 --- a/include/clang/Tooling/Refactoring/Transformer.h +++ b/include/clang/Tooling/Transformer/RewriteRule.h @@ -1,4 +1,4 @@ -//===--- Transformer.h - Clang source-rewriting library ---------*- C++ -*-===// +//===--- RewriteRule.h - RewriteRule class ----------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,42 +7,30 @@ //===----------------------------------------------------------------------===// /// /// \file -/// Defines a library supporting the concise specification of clang-based -/// source-to-source transformations. +/// Defines the RewriteRule class and related functions for creating, +/// modifying and interpreting RewriteRules. /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_TRANSFORMER_H_ -#define LLVM_CLANG_TOOLING_REFACTOR_TRANSFORMER_H_ +#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_REWRITE_RULE_H_ +#define LLVM_CLANG_TOOLING_TRANSFORMER_REWRITE_RULE_H_ #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/ASTMatchers/ASTMatchersInternal.h" #include "clang/Tooling/Refactoring/AtomicChange.h" -#include "clang/Tooling/Refactoring/RangeSelector.h" +#include "clang/Tooling/Transformer/MatchConsumer.h" +#include "clang/Tooling/Transformer/RangeSelector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Error.h" -#include #include #include -#include #include namespace clang { -namespace tooling { - -// Note that \p TextGenerator is allowed to fail, e.g. when trying to access a -// matched node that was not bound. Allowing this to fail simplifies error -// handling for interactive tools like clang-query. -using TextGenerator = std::function( - const ast_matchers::MatchFinder::MatchResult &)>; - -/// Wraps a string as a TextGenerator. -inline TextGenerator text(std::string M) { - return [M](const ast_matchers::MatchFinder::MatchResult &) - -> Expected { return M; }; -} +namespace transformer { +using TextGenerator = MatchConsumer; // Description of a source-code edit, expressed in terms of an AST node. // Includes: an ID for the (bound) node, a selector for source related to the @@ -160,11 +148,9 @@ inline RewriteRule makeRule(ast_matchers::internal::DynTypedMatcher M, void addInclude(RewriteRule &Rule, llvm::StringRef Header, IncludeFormat Format = IncludeFormat::Quoted); -/// Applies the first rule whose pattern matches; other rules are ignored. -/// -/// N.B. All of the rules must use the same kind of matcher (that is, share a -/// base class in the AST hierarchy). However, this constraint is caused by an -/// implementation detail and should be lifted in the future. +/// Applies the first rule whose pattern matches; other rules are ignored. If +/// the matchers are independent then order doesn't matter. In that case, +/// `applyFirst` is simply joining the set of rules into one. // // `applyFirst` is like an `anyOf` matcher with an edit action attached to each // of its cases. Anywhere you'd use `anyOf(m1.bind("id1"), m2.bind("id2"))` and @@ -230,7 +216,9 @@ inline ASTEdit insertAfter(RangeSelector S, TextGenerator Replacement) { /// Removes the source selected by \p S. inline ASTEdit remove(RangeSelector S) { - return change(std::move(S), text("")); + return change(std::move(S), + [](const ast_matchers::MatchFinder::MatchResult &) + -> Expected { return ""; }); } /// The following three functions are a low-level part of the RewriteRule @@ -243,8 +231,25 @@ inline ASTEdit remove(RangeSelector S) { // public and well-supported and move them out of `detail`. namespace detail { /// Builds a single matcher for the rule, covering all of the rule's cases. +/// Only supports Rules whose cases' matchers share the same base "kind" +/// (`Stmt`, `Decl`, etc.) Deprecated: use `buildMatchers` instead, which +/// supports mixing matchers of different kinds. ast_matchers::internal::DynTypedMatcher buildMatcher(const RewriteRule &Rule); +/// Builds a set of matchers that cover the rule (one for each distinct node +/// matcher base kind: Stmt, Decl, etc.). Node-matchers for `QualType` and +/// `Type` are not permitted, since such nodes carry no source location +/// information and are therefore not relevant for rewriting. If any such +/// matchers are included, will return an empty vector. +std::vector +buildMatchers(const RewriteRule &Rule); + +/// Gets the beginning location of the source matched by a rewrite rule. If the +/// match occurs within a macro expansion, returns the beginning of the +/// expansion point. `Result` must come from the matching of a rewrite rule. +SourceLocation +getRuleMatchLoc(const ast_matchers::MatchFinder::MatchResult &Result); + /// Returns the \c Case of \c Rule that was selected in the match result. /// Assumes a matcher built with \c buildMatcher. const RewriteRule::Case & @@ -273,36 +278,32 @@ Expected> translateEdits(const ast_matchers::MatchFinder::MatchResult &Result, llvm::ArrayRef Edits); } // namespace detail +} // namespace transformer -/// Handles the matcher and callback registration for a single rewrite rule, as -/// defined by the arguments of the constructor. -class Transformer : public ast_matchers::MatchFinder::MatchCallback { -public: - using ChangeConsumer = - std::function Change)>; +namespace tooling { +// DEPRECATED: These are temporary aliases supporting client migration to the +// `transformer` namespace. +/// Wraps a string as a TextGenerator. +using TextGenerator = transformer::TextGenerator; - /// \param Consumer Receives each rewrite or error. Will not necessarily be - /// called for each match; for example, if the rewrite is not applicable - /// because of macros, but doesn't fail. Note that clients are responsible - /// for handling the case that independent \c AtomicChanges conflict with each - /// other. - Transformer(RewriteRule Rule, ChangeConsumer Consumer) - : Rule(std::move(Rule)), Consumer(std::move(Consumer)) {} +inline TextGenerator text(std::string M) { + return [M](const ast_matchers::MatchFinder::MatchResult &) + -> Expected { return M; }; +} - /// N.B. Passes `this` pointer to `MatchFinder`. So, this object should not - /// be moved after this call. - void registerMatchers(ast_matchers::MatchFinder *MatchFinder); - - /// Not called directly by users -- called by the framework, via base class - /// pointer. - void run(const ast_matchers::MatchFinder::MatchResult &Result) override; - -private: - RewriteRule Rule; - /// Receives each successful rewrites as an \c AtomicChange. - ChangeConsumer Consumer; -}; +using transformer::addInclude; +using transformer::applyFirst; +using transformer::change; +using transformer::insertAfter; +using transformer::insertBefore; +using transformer::makeRule; +using transformer::remove; +using transformer::RewriteRule; +using transformer::IncludeFormat; +namespace detail { +using namespace transformer::detail; +} // namespace detail } // namespace tooling } // namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_TRANSFORMER_H_ +#endif // LLVM_CLANG_TOOLING_TRANSFORMER_REWRITE_RULE_H_ diff --git a/include/clang/Tooling/Refactoring/SourceCode.h b/include/clang/Tooling/Transformer/SourceCode.h similarity index 78% rename from include/clang/Tooling/Refactoring/SourceCode.h rename to include/clang/Tooling/Transformer/SourceCode.h index 498dbea96c7..bc9cc3d2a25 100644 --- a/include/clang/Tooling/Refactoring/SourceCode.h +++ b/include/clang/Tooling/Transformer/SourceCode.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_SOURCE_CODE_H -#define LLVM_CLANG_TOOLING_REFACTOR_SOURCE_CODE_H +#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_SOURCE_CODE_H +#define LLVM_CLANG_TOOLING_TRANSFORMER_SOURCE_CODE_H #include "clang/AST/ASTContext.h" #include "clang/Basic/SourceLocation.h" @@ -72,6 +72,19 @@ StringRef getExtendedText(const T &Node, tok::TokenKind Next, ASTContext &Context) { return getText(getExtendedRange(Node, Next, Context), Context); } + +// Attempts to resolve the given range to one that can be edited by a rewrite; +// generally, one that starts and ends within a particular file. It supports +// a limited set of cases involving source locations in macro expansions. +llvm::Optional +getRangeForEdit(const CharSourceRange &EditRange, const SourceManager &SM, + const LangOptions &LangOpts); + +inline llvm::Optional +getRangeForEdit(const CharSourceRange &EditRange, const ASTContext &Context) { + return getRangeForEdit(EditRange, Context.getSourceManager(), + Context.getLangOpts()); +} } // namespace tooling } // namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_SOURCE_CODE_H +#endif // LLVM_CLANG_TOOLING_TRANSFORMER_SOURCE_CODE_H diff --git a/include/clang/Tooling/Transformer/SourceCodeBuilders.h b/include/clang/Tooling/Transformer/SourceCodeBuilders.h new file mode 100644 index 00000000000..6c79a7588f2 --- /dev/null +++ b/include/clang/Tooling/Transformer/SourceCodeBuilders.h @@ -0,0 +1,86 @@ +//===--- SourceCodeBuilders.h - Source-code building facilities -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file collects facilities for generating source code strings. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_SOURCE_CODE_BUILDERS_H_ +#define LLVM_CLANG_TOOLING_TRANSFORMER_SOURCE_CODE_BUILDERS_H_ + +#include "clang/AST/ASTContext.h" +#include "clang/AST/Expr.h" +#include + +namespace clang { +namespace tooling { + +/// \name Code analysis utilities. +/// @{ +/// Ignores implicit object-construction expressions in addition to the normal +/// implicit expressions that are ignored. +const Expr *reallyIgnoreImplicit(const Expr &E); + +/// Determines whether printing this expression in *any* expression requires +/// parentheses to preserve its meaning. This analyses is necessarily +/// conservative because it lacks information about the target context. +bool mayEverNeedParens(const Expr &E); + +/// Determines whether printing this expression to the left of a dot or arrow +/// operator requires a parentheses to preserve its meaning. Given that +/// dot/arrow are (effectively) the highest precedence, this is equivalent to +/// asking whether it ever needs parens. +inline bool needParensBeforeDotOrArrow(const Expr &E) { + return mayEverNeedParens(E); +} + +/// Determines whether printing this expression to the right of a unary operator +/// requires a parentheses to preserve its meaning. +bool needParensAfterUnaryOperator(const Expr &E); +/// @} + +/// \name Basic code-string generation utilities. +/// @{ + +/// Builds source for an expression, adding parens if needed for unambiguous +/// parsing. +llvm::Optional buildParens(const Expr &E, + const ASTContext &Context); + +/// Builds idiomatic source for the dereferencing of `E`: prefix with `*` but +/// simplify when it already begins with `&`. \returns empty string on failure. +llvm::Optional buildDereference(const Expr &E, + const ASTContext &Context); + +/// Builds idiomatic source for taking the address of `E`: prefix with `&` but +/// simplify when it already begins with `*`. \returns empty string on failure. +llvm::Optional buildAddressOf(const Expr &E, + const ASTContext &Context); + +/// Adds a dot to the end of the given expression, but adds parentheses when +/// needed by the syntax, and simplifies to `->` when possible, e.g.: +/// +/// `x` becomes `x.` +/// `*a` becomes `a->` +/// `a+b` becomes `(a+b).` +llvm::Optional buildDot(const Expr &E, const ASTContext &Context); + +/// Adds an arrow to the end of the given expression, but adds parentheses +/// when needed by the syntax, and simplifies to `.` when possible, e.g.: +/// +/// `x` becomes `x->` +/// `&a` becomes `a.` +/// `a+b` becomes `(a+b)->` +llvm::Optional buildArrow(const Expr &E, + const ASTContext &Context); +/// @} + +} // namespace tooling +} // namespace clang +#endif // LLVM_CLANG_TOOLING_TRANSFORMER_SOURCE_CODE_BUILDERS_H_ diff --git a/include/clang/Tooling/Refactoring/Stencil.h b/include/clang/Tooling/Transformer/Stencil.h similarity index 58% rename from include/clang/Tooling/Refactoring/Stencil.h rename to include/clang/Tooling/Transformer/Stencil.h index e57a576e557..66d1388f971 100644 --- a/include/clang/Tooling/Refactoring/Stencil.h +++ b/include/clang/Tooling/Transformer/Stencil.h @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// /// -/// /file +/// \file /// This file defines the *Stencil* abstraction: a code-generating object, /// parameterized by named references to (bound) AST nodes. Given a match /// result, a stencil can be evaluated to a string of source code. @@ -17,21 +17,21 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_TOOLING_REFACTOR_STENCIL_H_ -#define LLVM_CLANG_TOOLING_REFACTOR_STENCIL_H_ +#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_STENCIL_H_ +#define LLVM_CLANG_TOOLING_TRANSFORMER_STENCIL_H_ #include "clang/AST/ASTContext.h" #include "clang/AST/ASTTypeTraits.h" #include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/Tooling/Refactoring/RangeSelector.h" +#include "clang/Tooling/Transformer/MatchConsumer.h" +#include "clang/Tooling/Transformer/RangeSelector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" #include #include namespace clang { -namespace tooling { - +namespace transformer { /// A stencil is represented as a sequence of "parts" that can each individually /// generate a code string based on a match result. The different kinds of /// parts include (raw) text, references to bound nodes and assorted operations @@ -47,21 +47,18 @@ public: virtual llvm::Error eval(const ast_matchers::MatchFinder::MatchResult &Match, std::string *Result) const = 0; - virtual bool isEqual(const StencilPartInterface &other) const = 0; - - const void *typeId() const { return TypeId; } + /// Constructs a string representation of the StencilPart. StencilParts + /// generated by the `selection` and `run` functions do not have a unique + /// string representation. + virtual std::string toString() const = 0; protected: - StencilPartInterface(const void *DerivedId) : TypeId(DerivedId) {} + StencilPartInterface() = default; // Since this is an abstract class, copying/assigning only make sense for // derived classes implementing `clone()`. StencilPartInterface(const StencilPartInterface &) = default; StencilPartInterface &operator=(const StencilPartInterface &) = default; - - /// Unique identifier of the concrete type of this instance. Supports safe - /// downcasting. - const void *TypeId; }; /// A copyable facade for a std::unique_ptr. Copies result @@ -77,12 +74,10 @@ public: return Impl->eval(Match, Result); } - bool operator==(const StencilPart &Other) const { - if (Impl == Other.Impl) - return true; - if (Impl == nullptr || Other.Impl == nullptr) - return false; - return Impl->isEqual(*Other.Impl); + std::string toString() const { + if (Impl == nullptr) + return ""; + return Impl->toString(); } private: @@ -119,8 +114,17 @@ public: return eval(Result); } + /// Constructs a string representation of the Stencil. The string is not + /// guaranteed to be unique. + std::string toString() const { + std::vector PartStrings; + PartStrings.reserve(Parts.size()); + for (const auto &Part : Parts) + PartStrings.push_back(Part.toString()); + return llvm::join(PartStrings, ", "); + } + private: - friend bool operator==(const Stencil &A, const Stencil &B); static StencilPart wrap(llvm::StringRef Text); static StencilPart wrap(RangeSelector Selector); static StencilPart wrap(StencilPart Part) { return Part; } @@ -128,14 +132,10 @@ private: std::vector Parts; }; -inline bool operator==(const Stencil &A, const Stencil &B) { - return A.Parts == B.Parts; -} - -inline bool operator!=(const Stencil &A, const Stencil &B) { return !(A == B); } - +// // Functions for conveniently building stencils. -namespace stencil { +// + /// Convenience wrapper for Stencil::cat that can be imported with a using decl. template Stencil cat(Ts &&... Parts) { return Stencil::cat(std::forward(Parts)...); @@ -147,27 +147,75 @@ StencilPart text(llvm::StringRef Text); /// \returns the source corresponding to the selected range. StencilPart selection(RangeSelector Selector); -/// \returns the source corresponding to the identified node. -/// FIXME: Deprecated. Write `selection(node(Id))` instead. -inline StencilPart node(llvm::StringRef Id) { - return selection(tooling::node(Id)); +/// Generates the source of the expression bound to \p Id, wrapping it in +/// parentheses if it may parse differently depending on context. For example, a +/// binary operation is always wrapped, while a variable reference is never +/// wrapped. +StencilPart expression(llvm::StringRef Id); + +/// Constructs an idiomatic dereferencing of the expression bound to \p ExprId. +/// \p ExprId is wrapped in parentheses, if needed. +StencilPart deref(llvm::StringRef ExprId); + +/// Constructs an expression that idiomatically takes the address of the +/// expression bound to \p ExprId. \p ExprId is wrapped in parentheses, if +/// needed. +StencilPart addressOf(llvm::StringRef ExprId); + +/// Constructs a `MemberExpr` that accesses the named member (\p Member) of the +/// object bound to \p BaseId. The access is constructed idiomatically: if \p +/// BaseId is bound to `e` and \p Member identifies member `m`, then returns +/// `e->m`, when e is a pointer, `e2->m` when e = `*e2` and `e.m` otherwise. +/// Additionally, `e` is wrapped in parentheses, if needed. +StencilPart access(llvm::StringRef BaseId, StencilPart Member); +inline StencilPart access(llvm::StringRef BaseId, llvm::StringRef Member) { + return access(BaseId, text(Member)); } -/// Variant of \c node() that identifies the node as a statement, for purposes -/// of deciding whether to include any trailing semicolon. Only relevant for -/// Expr nodes, which, by default, are *not* considered as statements. -/// \returns the source corresponding to the identified node, considered as a -/// statement. -/// FIXME: Deprecated. Write `selection(statement(Id))` instead. -inline StencilPart sNode(llvm::StringRef Id) { - return selection(tooling::statement(Id)); +/// Chooses between the two stencil parts, based on whether \p ID is bound in +/// the match. +StencilPart ifBound(llvm::StringRef Id, StencilPart TruePart, + StencilPart FalsePart); + +/// Chooses between the two strings, based on whether \p ID is bound in the +/// match. +inline StencilPart ifBound(llvm::StringRef Id, llvm::StringRef TrueText, + llvm::StringRef FalseText) { + return ifBound(Id, text(TrueText), text(FalseText)); } +/// Wraps a MatchConsumer in a StencilPart, so that it can be used in a Stencil. +/// This supports user-defined extensions to the Stencil language. +StencilPart run(MatchConsumer C); + /// For debug use only; semantics are not guaranteed. /// /// \returns the string resulting from calling the node's print() method. StencilPart dPrint(llvm::StringRef Id); +} // namespace transformer + +namespace tooling { +// DEPRECATED: These are temporary aliases supporting client migration to the +// `transformer` namespace. +using Stencil = transformer::Stencil; +using StencilPart = transformer::StencilPart; +namespace stencil { +using transformer::access; +using transformer::addressOf; +using transformer::cat; +using transformer::deref; +using transformer::dPrint; +using transformer::expression; +using transformer::ifBound; +using transformer::run; +using transformer::selection; +using transformer::text; +/// \returns the source corresponding to the identified node. +/// FIXME: Deprecated. Write `selection(node(Id))` instead. +inline transformer::StencilPart node(llvm::StringRef Id) { + return selection(tooling::node(Id)); +} } // namespace stencil } // namespace tooling } // namespace clang -#endif // LLVM_CLANG_TOOLING_REFACTOR_STENCIL_H_ +#endif // LLVM_CLANG_TOOLING_TRANSFORMER_STENCIL_H_ diff --git a/include/clang/Tooling/Transformer/Transformer.h b/include/clang/Tooling/Transformer/Transformer.h new file mode 100644 index 00000000000..31feacba5e2 --- /dev/null +++ b/include/clang/Tooling/Transformer/Transformer.h @@ -0,0 +1,52 @@ +//===--- Transformer.h - Transformer class ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_TRANSFORMER_TRANSFORMER_H_ +#define LLVM_CLANG_TOOLING_TRANSFORMER_TRANSFORMER_H_ + +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Tooling/Refactoring/AtomicChange.h" +#include "clang/Tooling/Transformer/RewriteRule.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace clang { +namespace tooling { +/// Handles the matcher and callback registration for a single `RewriteRule`, as +/// defined by the arguments of the constructor. +class Transformer : public ast_matchers::MatchFinder::MatchCallback { +public: + using ChangeConsumer = + std::function Change)>; + + /// \param Consumer Receives each rewrite or error. Will not necessarily be + /// called for each match; for example, if the rewrite is not applicable + /// because of macros, but doesn't fail. Note that clients are responsible + /// for handling the case that independent \c AtomicChanges conflict with each + /// other. + Transformer(transformer::RewriteRule Rule, ChangeConsumer Consumer) + : Rule(std::move(Rule)), Consumer(std::move(Consumer)) {} + + /// N.B. Passes `this` pointer to `MatchFinder`. So, this object should not + /// be moved after this call. + void registerMatchers(ast_matchers::MatchFinder *MatchFinder); + + /// Not called directly by users -- called by the framework, via base class + /// pointer. + void run(const ast_matchers::MatchFinder::MatchResult &Result) override; + +private: + transformer::RewriteRule Rule; + /// Receives each successful rewrites as an \c AtomicChange. + ChangeConsumer Consumer; +}; +} // namespace tooling +} // namespace clang + +#endif // LLVM_CLANG_TOOLING_TRANSFORMER_TRANSFORMER_H_ diff --git a/include/clang/module.modulemap b/include/clang/module.modulemap index 1f32ffe0c15..2cbe865bce8 100644 --- a/include/clang/module.modulemap +++ b/include/clang/module.modulemap @@ -18,9 +18,9 @@ module Clang_AST { umbrella "AST" textual header "AST/BuiltinTypes.def" + textual header "AST/CXXRecordDeclDefinitionBits.def" textual header "AST/OperationKinds.def" textual header "AST/TypeLocNodes.def" - textual header "AST/TypeNodes.def" module * { export * } } @@ -31,9 +31,11 @@ module Clang_Basic { requires cplusplus umbrella "Basic" + textual header "Basic/AArch64SVEACLETypes.def" textual header "Basic/BuiltinsAArch64.def" textual header "Basic/BuiltinsAMDGPU.def" textual header "Basic/BuiltinsARM.def" + textual header "Basic/BuiltinsBPF.def" textual header "Basic/Builtins.def" textual header "Basic/BuiltinsHexagon.def" textual header "Basic/BuiltinsLe64.def" @@ -100,7 +102,7 @@ module Clang_Frontend { requires cplusplus umbrella "Frontend" - textual header "Frontend/LangStandards.def" + textual header "Basic/LangStandards.def" module * { export * } } diff --git a/lib/ARCMigrate/ARCMT.cpp b/lib/ARCMigrate/ARCMT.cpp index 568e06f21fb..a9018c1c4bd 100644 --- a/lib/ARCMigrate/ARCMT.cpp +++ b/lib/ARCMigrate/ARCMT.cpp @@ -139,7 +139,7 @@ public: } // Non-ARC warnings are ignored. - Diags.setLastDiagnosticIgnored(); + Diags.setLastDiagnosticIgnored(true); } }; @@ -453,8 +453,8 @@ public: std::unique_ptr CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { CI.getPreprocessor().addPPCallbacks( - llvm::make_unique(ARCMTMacroLocs)); - return llvm::make_unique(); + std::make_unique(ARCMTMacroLocs)); + return std::make_unique(); } }; diff --git a/lib/ARCMigrate/FileRemapper.cpp b/lib/ARCMigrate/FileRemapper.cpp index 1a4862d09aa..a031fe22ac1 100644 --- a/lib/ARCMigrate/FileRemapper.cpp +++ b/lib/ARCMigrate/FileRemapper.cpp @@ -78,26 +78,26 @@ bool FileRemapper::initFromFile(StringRef filePath, DiagnosticsEngine &Diag, Diag); StringRef toFilename = lines[idx+2]; - const FileEntry *origFE = FileMgr->getFile(fromFilename); + llvm::ErrorOr origFE = FileMgr->getFile(fromFilename); if (!origFE) { if (ignoreIfFilesChanged) continue; return report("File does not exist: " + fromFilename, Diag); } - const FileEntry *newFE = FileMgr->getFile(toFilename); + llvm::ErrorOr newFE = FileMgr->getFile(toFilename); if (!newFE) { if (ignoreIfFilesChanged) continue; return report("File does not exist: " + toFilename, Diag); } - if ((uint64_t)origFE->getModificationTime() != timeModified) { + if ((uint64_t)(*origFE)->getModificationTime() != timeModified) { if (ignoreIfFilesChanged) continue; return report("File was modified: " + fromFilename, Diag); } - pairs.push_back(std::make_pair(origFE, newFE)); + pairs.push_back(std::make_pair(*origFE, *newFE)); } for (unsigned i = 0, e = pairs.size(); i != e; ++i) @@ -121,7 +121,7 @@ bool FileRemapper::flushToFile(StringRef outputPath, DiagnosticsEngine &Diag) { std::error_code EC; std::string infoFile = outputPath; - llvm::raw_fd_ostream infoOut(infoFile, EC, llvm::sys::fs::F_None); + llvm::raw_fd_ostream infoOut(infoFile, EC, llvm::sys::fs::OF_None); if (EC) return report(EC.message(), Diag); @@ -152,9 +152,11 @@ bool FileRemapper::flushToFile(StringRef outputPath, DiagnosticsEngine &Diag) { newOut.write(mem->getBufferStart(), mem->getBufferSize()); newOut.close(); - const FileEntry *newE = FileMgr->getFile(tempPath); - remap(origFE, newE); - infoOut << newE->getName() << '\n'; + auto newE = FileMgr->getFile(tempPath); + if (newE) { + remap(origFE, *newE); + infoOut << (*newE)->getName() << '\n'; + } } } @@ -175,7 +177,7 @@ bool FileRemapper::overwriteOriginal(DiagnosticsEngine &Diag, Diag); std::error_code EC; - llvm::raw_fd_ostream Out(origFE->getName(), EC, llvm::sys::fs::F_None); + llvm::raw_fd_ostream Out(origFE->getName(), EC, llvm::sys::fs::OF_None); if (EC) return report(EC.message(), Diag); @@ -224,7 +226,9 @@ void FileRemapper::remap(const FileEntry *file, const FileEntry *newfile) { } const FileEntry *FileRemapper::getOriginalFile(StringRef filePath) { - const FileEntry *file = FileMgr->getFile(filePath); + const FileEntry *file = nullptr; + if (auto fileOrErr = FileMgr->getFile(filePath)) + file = *fileOrErr; // If we are updating a file that overridden an original file, // actually update the original file. llvm::DenseMap::iterator diff --git a/lib/ARCMigrate/ObjCMT.cpp b/lib/ARCMigrate/ObjCMT.cpp index 7126a0873ea..4abb04fef5b 100644 --- a/lib/ARCMigrate/ObjCMT.cpp +++ b/lib/ARCMigrate/ObjCMT.cpp @@ -208,10 +208,10 @@ ObjCMigrateAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { CI.getPreprocessor().addPPCallbacks(std::unique_ptr(PPRec)); std::vector> Consumers; Consumers.push_back(WrapperFrontendAction::CreateASTConsumer(CI, InFile)); - Consumers.push_back(llvm::make_unique( + Consumers.push_back(std::make_unique( MigrateDir, ObjCMigAction, Remapper, CompInst->getFileManager(), PPRec, CompInst->getPreprocessor(), false, None)); - return llvm::make_unique(std::move(Consumers)); + return std::make_unique(std::move(Consumers)); } bool ObjCMigrateAction::BeginInvocation(CompilerInstance &CI) { @@ -1951,7 +1951,7 @@ void ObjCMigrateASTConsumer::HandleTranslationUnit(ASTContext &Ctx) { if (IsOutputFile) { std::error_code EC; - llvm::raw_fd_ostream OS(MigrateDir, EC, llvm::sys::fs::F_None); + llvm::raw_fd_ostream OS(MigrateDir, EC, llvm::sys::fs::OF_None); if (EC) { DiagnosticsEngine &Diags = Ctx.getDiagnostics(); Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0")) @@ -2034,7 +2034,7 @@ MigrateSourceAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { CI.getPreprocessor().addPPCallbacks(std::unique_ptr(PPRec)); std::vector WhiteList = getWhiteListFilenames(CI.getFrontendOpts().ObjCMTWhiteListPath); - return llvm::make_unique( + return std::make_unique( CI.getFrontendOpts().OutputFile, ObjCMTAction, Remapper, CI.getFileManager(), PPRec, CI.getPreprocessor(), /*isOutputFile=*/true, WhiteList); @@ -2141,10 +2141,11 @@ private: StringRef Val = ValueString->getValue(ValueStorage); if (Key == "file") { - const FileEntry *FE = FileMgr.getFile(Val); - if (!FE) + auto FE = FileMgr.getFile(Val); + if (FE) + Entry.File = *FE; + else Ignore = true; - Entry.File = FE; } else if (Key == "offset") { if (Val.getAsInteger(10, Entry.Offset)) Ignore = true; diff --git a/lib/ARCMigrate/PlistReporter.cpp b/lib/ARCMigrate/PlistReporter.cpp index 6d7fcb053b4..d01563b2974 100644 --- a/lib/ARCMigrate/PlistReporter.cpp +++ b/lib/ARCMigrate/PlistReporter.cpp @@ -56,7 +56,7 @@ void arcmt::writeARCDiagsToPlist(const std::string &outPath, } std::error_code EC; - llvm::raw_fd_ostream o(outPath, EC, llvm::sys::fs::F_Text); + llvm::raw_fd_ostream o(outPath, EC, llvm::sys::fs::OF_Text); if (EC) { llvm::errs() << "error: could not create file: " << outPath << '\n'; return; diff --git a/lib/AST/APValue.cpp b/lib/AST/APValue.cpp index 1993bba9bd1..50f8d05dacb 100644 --- a/lib/AST/APValue.cpp +++ b/lib/AST/APValue.cpp @@ -42,6 +42,14 @@ APValue::LValueBase::LValueBase(const ValueDecl *P, unsigned I, unsigned V) APValue::LValueBase::LValueBase(const Expr *P, unsigned I, unsigned V) : Ptr(P), Local{I, V} {} +APValue::LValueBase APValue::LValueBase::getDynamicAlloc(DynamicAllocLValue LV, + QualType Type) { + LValueBase Base; + Base.Ptr = LV; + Base.DynamicAllocType = Type.getAsOpaquePtr(); + return Base; +} + APValue::LValueBase APValue::LValueBase::getTypeInfo(TypeInfoLValue LV, QualType TypeInfo) { LValueBase Base; @@ -51,11 +59,12 @@ APValue::LValueBase APValue::LValueBase::getTypeInfo(TypeInfoLValue LV, } unsigned APValue::LValueBase::getCallIndex() const { - return is() ? 0 : Local.CallIndex; + return (is() || is()) ? 0 + : Local.CallIndex; } unsigned APValue::LValueBase::getVersion() const { - return is() ? 0 : Local.Version; + return (is() || is()) ? 0 : Local.Version; } QualType APValue::LValueBase::getTypeInfoType() const { @@ -63,6 +72,11 @@ QualType APValue::LValueBase::getTypeInfoType() const { return QualType::getFromOpaquePtr(TypeInfoType); } +QualType APValue::LValueBase::getDynamicAllocType() const { + assert(is() && "not a dynamic allocation lvalue"); + return QualType::getFromOpaquePtr(DynamicAllocType); +} + namespace clang { bool operator==(const APValue::LValueBase &LHS, const APValue::LValueBase &RHS) { @@ -111,7 +125,7 @@ llvm::DenseMapInfo::getTombstoneKey() { namespace clang { llvm::hash_code hash_value(const APValue::LValueBase &Base) { - if (Base.is()) + if (Base.is() || Base.is()) return llvm::hash_value(Base.getOpaqueValue()); return llvm::hash_combine(Base.getOpaqueValue(), Base.getCallIndex(), Base.getVersion()); @@ -479,7 +493,7 @@ void APValue::printPretty(raw_ostream &Out, const ASTContext &Ctx, return; case APValue::Vector: { Out << '{'; - QualType ElemTy = Ty->getAs()->getElementType(); + QualType ElemTy = Ty->castAs()->getElementType(); getVectorElt(0).printPretty(Out, Ctx, ElemTy); for (unsigned i = 1; i != getVectorLength(); ++i) { Out << ", "; @@ -528,13 +542,18 @@ void APValue::printPretty(raw_ostream &Out, const ASTContext &Ctx, S = CharUnits::One(); } Out << '&'; - } else if (!IsReference) + } else if (!IsReference) { Out << '&'; + } if (const ValueDecl *VD = Base.dyn_cast()) Out << *VD; else if (TypeInfoLValue TI = Base.dyn_cast()) { TI.print(Out, Ctx.getPrintingPolicy()); + } else if (DynamicAllocLValue DA = Base.dyn_cast()) { + Out << "{*new " + << Base.getDynamicAllocType().stream(Ctx.getPrintingPolicy()) << "#" + << DA.getIndex() << "}"; } else { assert(Base.get() != nullptr && "Expecting non-null Expr"); @@ -563,10 +582,17 @@ void APValue::printPretty(raw_ostream &Out, const ASTContext &Ctx, } else if (TypeInfoLValue TI = Base.dyn_cast()) { TI.print(Out, Ctx.getPrintingPolicy()); ElemTy = Base.getTypeInfoType(); + } else if (DynamicAllocLValue DA = Base.dyn_cast()) { + Out << "{*new " + << Base.getDynamicAllocType().stream(Ctx.getPrintingPolicy()) << "#" + << DA.getIndex() << "}"; + ElemTy = Base.getDynamicAllocType(); } else { const Expr *E = Base.get(); assert(E != nullptr && "Expecting non-null Expr"); E->printPretty(Out, nullptr, Ctx.getPrintingPolicy()); + // FIXME: This is wrong if E is a MaterializeTemporaryExpr with an lvalue + // adjustment. ElemTy = E->getType(); } @@ -626,7 +652,7 @@ void APValue::printPretty(raw_ostream &Out, const ASTContext &Ctx, } case APValue::Struct: { Out << '{'; - const RecordDecl *RD = Ty->getAs()->getDecl(); + const RecordDecl *RD = Ty->castAs()->getDecl(); bool First = true; if (unsigned N = getStructNumBases()) { const CXXRecordDecl *CD = cast(RD); diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp index 0d69eb90aba..cda51ec755a 100644 --- a/lib/AST/ASTContext.cpp +++ b/lib/AST/ASTContext.cpp @@ -12,6 +12,7 @@ #include "clang/AST/ASTContext.h" #include "CXXABI.h" +#include "Interp/Context.h" #include "clang/AST/APValue.h" #include "clang/AST/ASTMutationListener.h" #include "clang/AST/ASTTypeTraits.h" @@ -98,62 +99,60 @@ enum FloatingRank { Float16Rank, HalfRank, FloatRank, DoubleRank, LongDoubleRank, Float128Rank }; -RawComment *ASTContext::getRawCommentForDeclNoCache(const Decl *D) const { +/// \returns location that is relevant when searching for Doc comments related +/// to \p D. +static SourceLocation getDeclLocForCommentSearch(const Decl *D, + SourceManager &SourceMgr) { assert(D); - // If we already tried to load comments but there are none, - // we won't find anything. - if (CommentsLoaded && Comments.getComments().empty()) - return nullptr; - // User can not attach documentation to implicit declarations. if (D->isImplicit()) - return nullptr; + return {}; // User can not attach documentation to implicit instantiations. if (const auto *FD = dyn_cast(D)) { if (FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) - return nullptr; + return {}; } if (const auto *VD = dyn_cast(D)) { if (VD->isStaticDataMember() && VD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) - return nullptr; + return {}; } if (const auto *CRD = dyn_cast(D)) { if (CRD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) - return nullptr; + return {}; } if (const auto *CTSD = dyn_cast(D)) { TemplateSpecializationKind TSK = CTSD->getSpecializationKind(); if (TSK == TSK_ImplicitInstantiation || TSK == TSK_Undeclared) - return nullptr; + return {}; } if (const auto *ED = dyn_cast(D)) { if (ED->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) - return nullptr; + return {}; } if (const auto *TD = dyn_cast(D)) { // When tag declaration (but not definition!) is part of the // decl-specifier-seq of some other declaration, it doesn't get comment if (TD->isEmbeddedInDeclarator() && !TD->isCompleteDefinition()) - return nullptr; + return {}; } // TODO: handle comments for function parameters properly. if (isa(D)) - return nullptr; + return {}; // TODO: we could look up template parameter documentation in the template // documentation. if (isa(D) || isa(D) || isa(D)) - return nullptr; + return {}; // Find declaration location. // For Objective-C declarations we generally don't expect to have multiple @@ -161,20 +160,19 @@ RawComment *ASTContext::getRawCommentForDeclNoCache(const Decl *D) const { // location". // For all other declarations multiple declarators are used quite frequently, // so we use the location of the identifier as the "declaration location". - SourceLocation DeclLoc; if (isa(D) || isa(D) || isa(D) || isa(D) || isa(D)) - DeclLoc = D->getBeginLoc(); + return D->getBeginLoc(); else { - DeclLoc = D->getLocation(); + const SourceLocation DeclLoc = D->getLocation(); if (DeclLoc.isMacroID()) { if (isa(D)) { // If location of the typedef name is in a macro, it is because being // declared via a macro. Try using declaration's starting location as // the "declaration location". - DeclLoc = D->getBeginLoc(); + return D->getBeginLoc(); } else if (const auto *TD = dyn_cast(D)) { // If location of the tag decl is inside a macro, but the spelling of // the tag name comes from a macro argument, it looks like a special @@ -183,102 +181,73 @@ RawComment *ASTContext::getRawCommentForDeclNoCache(const Decl *D) const { // attach the comment to the tag decl. if (SourceMgr.isMacroArgExpansion(DeclLoc) && TD->isCompleteDefinition()) - DeclLoc = SourceMgr.getExpansionLoc(DeclLoc); + return SourceMgr.getExpansionLoc(DeclLoc); } } + return DeclLoc; } + return {}; +} + +RawComment *ASTContext::getRawCommentForDeclNoCacheImpl( + const Decl *D, const SourceLocation RepresentativeLocForDecl, + const std::map &CommentsInTheFile) const { // If the declaration doesn't map directly to a location in a file, we // can't find the comment. - if (DeclLoc.isInvalid() || !DeclLoc.isFileID()) + if (RepresentativeLocForDecl.isInvalid() || + !RepresentativeLocForDecl.isFileID()) return nullptr; - if (!CommentsLoaded && ExternalSource) { - ExternalSource->ReadComments(); - -#ifndef NDEBUG - ArrayRef RawComments = Comments.getComments(); - assert(std::is_sorted(RawComments.begin(), RawComments.end(), - BeforeThanCompare(SourceMgr))); -#endif - - CommentsLoaded = true; - } - - ArrayRef RawComments = Comments.getComments(); // If there are no comments anywhere, we won't find anything. - if (RawComments.empty()) + if (CommentsInTheFile.empty()) return nullptr; - // Find the comment that occurs just after this declaration. - ArrayRef::iterator Comment; - { - // When searching for comments during parsing, the comment we are looking - // for is usually among the last two comments we parsed -- check them - // first. - RawComment CommentAtDeclLoc( - SourceMgr, SourceRange(DeclLoc), LangOpts.CommentOpts, false); - BeforeThanCompare Compare(SourceMgr); - ArrayRef::iterator MaybeBeforeDecl = RawComments.end() - 1; - bool Found = Compare(*MaybeBeforeDecl, &CommentAtDeclLoc); - if (!Found && RawComments.size() >= 2) { - MaybeBeforeDecl--; - Found = Compare(*MaybeBeforeDecl, &CommentAtDeclLoc); - } - - if (Found) { - Comment = MaybeBeforeDecl + 1; - assert(Comment == - llvm::lower_bound(RawComments, &CommentAtDeclLoc, Compare)); - } else { - // Slow path. - Comment = llvm::lower_bound(RawComments, &CommentAtDeclLoc, Compare); - } - } - // Decompose the location for the declaration and find the beginning of the // file buffer. - std::pair DeclLocDecomp = SourceMgr.getDecomposedLoc(DeclLoc); + const std::pair DeclLocDecomp = + SourceMgr.getDecomposedLoc(RepresentativeLocForDecl); + + // Slow path. + auto OffsetCommentBehindDecl = + CommentsInTheFile.lower_bound(DeclLocDecomp.second); // First check whether we have a trailing comment. - if (Comment != RawComments.end() && - ((*Comment)->isDocumentation() || LangOpts.CommentOpts.ParseAllComments) - && (*Comment)->isTrailingComment() && - (isa(D) || isa(D) || isa(D) || - isa(D) || isa(D))) { - std::pair CommentBeginDecomp - = SourceMgr.getDecomposedLoc((*Comment)->getSourceRange().getBegin()); - // Check that Doxygen trailing comment comes after the declaration, starts - // on the same line and in the same file as the declaration. - if (DeclLocDecomp.first == CommentBeginDecomp.first && - SourceMgr.getLineNumber(DeclLocDecomp.first, DeclLocDecomp.second) - == SourceMgr.getLineNumber(CommentBeginDecomp.first, - CommentBeginDecomp.second)) { - (**Comment).setAttached(); - return *Comment; + if (OffsetCommentBehindDecl != CommentsInTheFile.end()) { + RawComment *CommentBehindDecl = OffsetCommentBehindDecl->second; + if ((CommentBehindDecl->isDocumentation() || + LangOpts.CommentOpts.ParseAllComments) && + CommentBehindDecl->isTrailingComment() && + (isa(D) || isa(D) || isa(D) || + isa(D) || isa(D))) { + + // Check that Doxygen trailing comment comes after the declaration, starts + // on the same line and in the same file as the declaration. + if (SourceMgr.getLineNumber(DeclLocDecomp.first, DeclLocDecomp.second) == + Comments.getCommentBeginLine(CommentBehindDecl, DeclLocDecomp.first, + OffsetCommentBehindDecl->first)) { + return CommentBehindDecl; + } } } // The comment just after the declaration was not a trailing comment. // Let's look at the previous comment. - if (Comment == RawComments.begin()) + if (OffsetCommentBehindDecl == CommentsInTheFile.begin()) return nullptr; - --Comment; + + auto OffsetCommentBeforeDecl = --OffsetCommentBehindDecl; + RawComment *CommentBeforeDecl = OffsetCommentBeforeDecl->second; // Check that we actually have a non-member Doxygen comment. - if (!((*Comment)->isDocumentation() || + if (!(CommentBeforeDecl->isDocumentation() || LangOpts.CommentOpts.ParseAllComments) || - (*Comment)->isTrailingComment()) + CommentBeforeDecl->isTrailingComment()) return nullptr; // Decompose the end of the comment. - std::pair CommentEndDecomp - = SourceMgr.getDecomposedLoc((*Comment)->getSourceRange().getEnd()); - - // If the comment and the declaration aren't in the same file, then they - // aren't related. - if (DeclLocDecomp.first != CommentEndDecomp.first) - return nullptr; + const unsigned CommentEndOffset = + Comments.getCommentEndOffset(CommentBeforeDecl); // Get the corresponding buffer. bool Invalid = false; @@ -288,26 +257,49 @@ RawComment *ASTContext::getRawCommentForDeclNoCache(const Decl *D) const { return nullptr; // Extract text between the comment and declaration. - StringRef Text(Buffer + CommentEndDecomp.second, - DeclLocDecomp.second - CommentEndDecomp.second); + StringRef Text(Buffer + CommentEndOffset, + DeclLocDecomp.second - CommentEndOffset); // There should be no other declarations or preprocessor directives between // comment and declaration. if (Text.find_first_of(";{}#@") != StringRef::npos) return nullptr; - (**Comment).setAttached(); - return *Comment; + return CommentBeforeDecl; +} + +RawComment *ASTContext::getRawCommentForDeclNoCache(const Decl *D) const { + const SourceLocation DeclLoc = getDeclLocForCommentSearch(D, SourceMgr); + + // If the declaration doesn't map directly to a location in a file, we + // can't find the comment. + if (DeclLoc.isInvalid() || !DeclLoc.isFileID()) + return nullptr; + + if (ExternalSource && !CommentsLoaded) { + ExternalSource->ReadComments(); + CommentsLoaded = true; + } + + if (Comments.empty()) + return nullptr; + + const FileID File = SourceMgr.getDecomposedLoc(DeclLoc).first; + const auto CommentsInThisFile = Comments.getCommentsInFile(File); + if (!CommentsInThisFile || CommentsInThisFile->empty()) + return nullptr; + + return getRawCommentForDeclNoCacheImpl(D, DeclLoc, *CommentsInThisFile); } /// If we have a 'templated' declaration for a template, adjust 'D' to /// refer to the actual template. /// If we have an implicit instantiation, adjust 'D' to refer to template. -static const Decl *adjustDeclToTemplate(const Decl *D) { - if (const auto *FD = dyn_cast(D)) { +static const Decl &adjustDeclToTemplate(const Decl &D) { + if (const auto *FD = dyn_cast(&D)) { // Is this function declaration part of a function template? if (const FunctionTemplateDecl *FTD = FD->getDescribedFunctionTemplate()) - return FTD; + return *FTD; // Nothing to do if function is not an implicit instantiation. if (FD->getTemplateSpecializationKind() != TSK_ImplicitInstantiation) @@ -315,28 +307,28 @@ static const Decl *adjustDeclToTemplate(const Decl *D) { // Function is an implicit instantiation of a function template? if (const FunctionTemplateDecl *FTD = FD->getPrimaryTemplate()) - return FTD; + return *FTD; // Function is instantiated from a member definition of a class template? if (const FunctionDecl *MemberDecl = FD->getInstantiatedFromMemberFunction()) - return MemberDecl; + return *MemberDecl; return D; } - if (const auto *VD = dyn_cast(D)) { + if (const auto *VD = dyn_cast(&D)) { // Static data member is instantiated from a member definition of a class // template? if (VD->isStaticDataMember()) if (const VarDecl *MemberDecl = VD->getInstantiatedFromStaticDataMember()) - return MemberDecl; + return *MemberDecl; return D; } - if (const auto *CRD = dyn_cast(D)) { + if (const auto *CRD = dyn_cast(&D)) { // Is this class declaration part of a class template? if (const ClassTemplateDecl *CTD = CRD->getDescribedClassTemplate()) - return CTD; + return *CTD; // Class is an implicit instantiation of a class template or partial // specialization? @@ -346,23 +338,23 @@ static const Decl *adjustDeclToTemplate(const Decl *D) { llvm::PointerUnion PU = CTSD->getSpecializedTemplateOrPartial(); - return PU.is() ? - static_cast(PU.get()) : - static_cast( - PU.get()); + return PU.is() + ? *static_cast(PU.get()) + : *static_cast( + PU.get()); } // Class is instantiated from a member definition of a class template? if (const MemberSpecializationInfo *Info = - CRD->getMemberSpecializationInfo()) - return Info->getInstantiatedFrom(); + CRD->getMemberSpecializationInfo()) + return *Info->getInstantiatedFrom(); return D; } - if (const auto *ED = dyn_cast(D)) { + if (const auto *ED = dyn_cast(&D)) { // Enum is instantiated from a member definition of a class template? if (const EnumDecl *MemberDecl = ED->getInstantiatedFromMemberEnum()) - return MemberDecl; + return *MemberDecl; return D; } @@ -373,72 +365,81 @@ static const Decl *adjustDeclToTemplate(const Decl *D) { const RawComment *ASTContext::getRawCommentForAnyRedecl( const Decl *D, const Decl **OriginalDecl) const { - D = adjustDeclToTemplate(D); + if (!D) { + if (OriginalDecl) + OriginalDecl = nullptr; + return nullptr; + } - // Check whether we have cached a comment for this declaration already. + D = &adjustDeclToTemplate(*D); + + // Any comment directly attached to D? { - llvm::DenseMap::iterator Pos = - RedeclComments.find(D); - if (Pos != RedeclComments.end()) { - const RawCommentAndCacheFlags &Raw = Pos->second; - if (Raw.getKind() != RawCommentAndCacheFlags::NoCommentInDecl) { - if (OriginalDecl) - *OriginalDecl = Raw.getOriginalDecl(); - return Raw.getRaw(); - } + auto DeclComment = DeclRawComments.find(D); + if (DeclComment != DeclRawComments.end()) { + if (OriginalDecl) + *OriginalDecl = D; + return DeclComment->second; } } - // Search for comments attached to declarations in the redeclaration chain. - const RawComment *RC = nullptr; - const Decl *OriginalDeclForRC = nullptr; - for (auto I : D->redecls()) { - llvm::DenseMap::iterator Pos = - RedeclComments.find(I); - if (Pos != RedeclComments.end()) { - const RawCommentAndCacheFlags &Raw = Pos->second; - if (Raw.getKind() != RawCommentAndCacheFlags::NoCommentInDecl) { - RC = Raw.getRaw(); - OriginalDeclForRC = Raw.getOriginalDecl(); - break; - } - } else { - RC = getRawCommentForDeclNoCache(I); - OriginalDeclForRC = I; - RawCommentAndCacheFlags Raw; - if (RC) { - // Call order swapped to work around ICE in VS2015 RTM (Release Win32) - // https://connect.microsoft.com/VisualStudio/feedback/details/1741530 - Raw.setKind(RawCommentAndCacheFlags::FromDecl); - Raw.setRaw(RC); - } else - Raw.setKind(RawCommentAndCacheFlags::NoCommentInDecl); - Raw.setOriginalDecl(I); - RedeclComments[I] = Raw; - if (RC) - break; + // Any comment attached to any redeclaration of D? + const Decl *CanonicalD = D->getCanonicalDecl(); + if (!CanonicalD) + return nullptr; + + { + auto RedeclComment = RedeclChainComments.find(CanonicalD); + if (RedeclComment != RedeclChainComments.end()) { + if (OriginalDecl) + *OriginalDecl = RedeclComment->second; + auto CommentAtRedecl = DeclRawComments.find(RedeclComment->second); + assert(CommentAtRedecl != DeclRawComments.end() && + "This decl is supposed to have comment attached."); + return CommentAtRedecl->second; } } - // If we found a comment, it should be a documentation comment. - assert(!RC || RC->isDocumentation() || LangOpts.CommentOpts.ParseAllComments); + // Any redeclarations of D that we haven't checked for comments yet? + // We can't use DenseMap::iterator directly since it'd get invalid. + auto LastCheckedRedecl = [this, CanonicalD]() -> const Decl * { + auto LookupRes = CommentlessRedeclChains.find(CanonicalD); + if (LookupRes != CommentlessRedeclChains.end()) + return LookupRes->second; + return nullptr; + }(); + + for (const auto Redecl : D->redecls()) { + assert(Redecl); + // Skip all redeclarations that have been checked previously. + if (LastCheckedRedecl) { + if (LastCheckedRedecl == Redecl) { + LastCheckedRedecl = nullptr; + } + continue; + } + const RawComment *RedeclComment = getRawCommentForDeclNoCache(Redecl); + if (RedeclComment) { + cacheRawCommentForDecl(*Redecl, *RedeclComment); + if (OriginalDecl) + *OriginalDecl = Redecl; + return RedeclComment; + } + CommentlessRedeclChains[CanonicalD] = Redecl; + } if (OriginalDecl) - *OriginalDecl = OriginalDeclForRC; + *OriginalDecl = nullptr; + return nullptr; +} - // Update cache for every declaration in the redeclaration chain. - RawCommentAndCacheFlags Raw; - Raw.setRaw(RC); - Raw.setKind(RawCommentAndCacheFlags::FromRedecl); - Raw.setOriginalDecl(OriginalDeclForRC); - - for (auto I : D->redecls()) { - RawCommentAndCacheFlags &R = RedeclComments[I]; - if (R.getKind() == RawCommentAndCacheFlags::NoCommentInDecl) - R = Raw; - } - - return RC; +void ASTContext::cacheRawCommentForDecl(const Decl &OriginalD, + const RawComment &Comment) const { + assert(Comment.isDocumentation() || LangOpts.CommentOpts.ParseAllComments); + DeclRawComments.try_emplace(&OriginalD, &Comment); + const Decl *const CanonicalDecl = OriginalD.getCanonicalDecl(); + RedeclChainComments.try_emplace(CanonicalDecl, &OriginalD); + CommentlessRedeclChains.erase(CanonicalDecl); } static void addRedeclaredMethods(const ObjCMethodDecl *ObjCMethod, @@ -458,6 +459,52 @@ static void addRedeclaredMethods(const ObjCMethodDecl *ObjCMethod, } } +void ASTContext::attachCommentsToJustParsedDecls(ArrayRef Decls, + const Preprocessor *PP) { + if (Comments.empty() || Decls.empty()) + return; + + // See if there are any new comments that are not attached to a decl. + // The location doesn't have to be precise - we care only about the file. + const FileID File = + SourceMgr.getDecomposedLoc((*Decls.begin())->getLocation()).first; + auto CommentsInThisFile = Comments.getCommentsInFile(File); + if (!CommentsInThisFile || CommentsInThisFile->empty() || + CommentsInThisFile->rbegin()->second->isAttached()) + return; + + // There is at least one comment not attached to a decl. + // Maybe it should be attached to one of Decls? + // + // Note that this way we pick up not only comments that precede the + // declaration, but also comments that *follow* the declaration -- thanks to + // the lookahead in the lexer: we've consumed the semicolon and looked + // ahead through comments. + + for (const Decl *D : Decls) { + assert(D); + if (D->isInvalidDecl()) + continue; + + D = &adjustDeclToTemplate(*D); + + const SourceLocation DeclLoc = getDeclLocForCommentSearch(D, SourceMgr); + + if (DeclLoc.isInvalid() || !DeclLoc.isFileID()) + continue; + + if (DeclRawComments.count(D) > 0) + continue; + + if (RawComment *const DocComment = + getRawCommentForDeclNoCacheImpl(D, DeclLoc, *CommentsInThisFile)) { + cacheRawCommentForDecl(*D, *DocComment); + comments::FullComment *FC = DocComment->parse(*this, PP, D); + ParsedComments[D->getCanonicalDecl()] = FC; + } + } +} + comments::FullComment *ASTContext::cloneFullComment(comments::FullComment *FC, const Decl *D) const { auto *ThisDeclInfo = new (*this) comments::DeclInfo; @@ -481,9 +528,9 @@ comments::FullComment *ASTContext::getLocalCommentForDeclUncached(const Decl *D) comments::FullComment *ASTContext::getCommentForDecl( const Decl *D, const Preprocessor *PP) const { - if (D->isInvalidDecl()) + if (!D || D->isInvalidDecl()) return nullptr; - D = adjustDeclToTemplate(D); + D = &adjustDeclToTemplate(*D); const Decl *Canonical = D->getCanonicalDecl(); llvm::DenseMap::iterator Pos = @@ -498,7 +545,7 @@ comments::FullComment *ASTContext::getCommentForDecl( return Pos->second; } - const Decl *OriginalDecl; + const Decl *OriginalDecl = nullptr; const RawComment *RC = getRawCommentForAnyRedecl(D, &OriginalDecl); if (!RC) { @@ -577,7 +624,7 @@ comments::FullComment *ASTContext::getCommentForDecl( // should parse the comment in context of that other Decl. This is important // because comments can contain references to parameter names which can be // different across redeclarations. - if (D != OriginalDecl) + if (D != OriginalDecl && OriginalDecl) return getCommentForDecl(OriginalDecl, PP); comments::FullComment *FC = RC->parse(*this, PP, D); @@ -691,7 +738,7 @@ ASTContext::getCanonicalTemplateTemplateParmDecl( cast(*P))); } - assert(!TTP->getRequiresClause() && + assert(!TTP->getTemplateParameters()->getRequiresClause() && "Unexpected requires-clause on template template-parameter"); Expr *const CanonRequiresClause = nullptr; @@ -737,6 +784,13 @@ CXXABI *ASTContext::createCXXABI(const TargetInfo &T) { llvm_unreachable("Invalid CXXABI type!"); } +interp::Context &ASTContext::getInterpContext() { + if (!InterpContext) { + InterpContext.reset(new interp::Context(*this)); + } + return *InterpContext.get(); +} + static const LangASMap *getAddressSpaceMap(const TargetInfo &T, const LangOptions &LOpts) { if (LOpts.FakeAddressSpaceMap) { @@ -775,7 +829,8 @@ static bool isAddrSpaceMapManglingEnabled(const TargetInfo &TI, ASTContext::ASTContext(LangOptions &LOpts, SourceManager &SM, IdentifierTable &idents, SelectorTable &sels, Builtin::Context &builtins) - : FunctionProtoTypes(this_()), TemplateSpecializationTypes(this_()), + : ConstantArrayTypes(this_()), FunctionProtoTypes(this_()), + TemplateSpecializationTypes(this_()), DependentTemplateSpecializationTypes(this_()), SubstTemplateTemplateParmPacks(this_()), SourceMgr(SM), LangOpts(LOpts), SanitizerBL(new SanitizerBlacklist(LangOpts.SanitizerBlacklistFiles, SM)), @@ -923,7 +978,7 @@ void ASTContext::PrintStats() const { unsigned counts[] = { #define TYPE(Name, Parent) 0, #define ABSTRACT_TYPE(Name, Parent) -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" 0 // Extra }; @@ -943,7 +998,7 @@ void ASTContext::PrintStats() const { TotalBytes += counts[Idx] * sizeof(Name##Type); \ ++Idx; #define ABSTRACT_TYPE(Name, Parent) -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" llvm::errs() << "Total bytes = " << TotalBytes << "\n"; @@ -1298,6 +1353,12 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target, #include "clang/Basic/OpenCLExtensionTypes.def" } + if (Target.hasAArch64SVETypes()) { +#define SVE_TYPE(Name, Id, SingletonId) \ + InitBuiltinType(SingletonId, BuiltinType::Id); +#include "clang/Basic/AArch64SVEACLETypes.def" + } + // Builtin type for __objc_yes and __objc_no ObjCBuiltinBoolTy = (Target.useSignedCharForObjCBool() ? SignedCharTy : BoolTy); @@ -1515,10 +1576,9 @@ void ASTContext::addedLocalImportDecl(ImportDecl *Import) { /// getFloatTypeSemantics - Return the APFloat 'semantics' for the specified /// scalar floating point type. const llvm::fltSemantics &ASTContext::getFloatTypeSemantics(QualType T) const { - const auto *BT = T->getAs(); - assert(BT && "Not a floating point type!"); - switch (BT->getKind()) { - default: llvm_unreachable("Not a floating point type!"); + switch (T->castAs()->getKind()) { + default: + llvm_unreachable("Not a floating point type!"); case BuiltinType::Float16: case BuiltinType::Half: return Target->getHalfFormat(); @@ -1749,7 +1809,7 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const { case Type::Class: \ assert(!T->isDependentType() && "should not see dependent types here"); \ return getTypeInfo(cast(T)->desugar().getTypePtr()); -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" llvm_unreachable("Should not see dependent types"); case Type::FunctionNoProto: @@ -1968,6 +2028,25 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const { Width = Target->getPointerWidth(AS); Align = Target->getPointerAlign(AS); break; + // The SVE types are effectively target-specific. The length of an + // SVE_VECTOR_TYPE is only known at runtime, but it is always a multiple + // of 128 bits. There is one predicate bit for each vector byte, so the + // length of an SVE_PREDICATE_TYPE is always a multiple of 16 bits. + // + // Because the length is only known at runtime, we use a dummy value + // of 0 for the static length. The alignment values are those defined + // by the Procedure Call Standard for the Arm Architecture. +#define SVE_VECTOR_TYPE(Name, Id, SingletonId, ElKind, ElBits, IsSigned, IsFP)\ + case BuiltinType::Id: \ + Width = 0; \ + Align = 128; \ + break; +#define SVE_PREDICATE_TYPE(Name, Id, SingletonId, ElKind) \ + case BuiltinType::Id: \ + Width = 0; \ + Align = 16; \ + break; +#include "clang/Basic/AArch64SVEACLETypes.def" } break; case Type::ObjCObjectPointer: @@ -2364,7 +2443,7 @@ structHasUniqueObjectRepresentations(const ASTContext &Context, // have tail padding, so just make sure there isn't an error. if (!isStructEmpty(Base.getType())) { llvm::Optional Size = structHasUniqueObjectRepresentations( - Context, Base.getType()->getAs()->getDecl()); + Context, Base.getType()->castAs()->getDecl()); if (!Size) return llvm::None; Bases.emplace_back(Base.getType(), Size.getValue()); @@ -2455,7 +2534,7 @@ bool ASTContext::hasUniqueObjectRepresentations(QualType Ty) const { } if (Ty->isRecordType()) { - const RecordDecl *Record = Ty->getAs()->getDecl(); + const RecordDecl *Record = Ty->castAs()->getDecl(); if (Record->isInvalidDecl()) return false; @@ -2790,7 +2869,7 @@ QualType ASTContext::getFunctionTypeWithExceptionSpec( // Anything else must be a function type. Rebuild it with the new exception // specification. - const auto *Proto = Orig->getAs(); + const auto *Proto = Orig->castAs(); return getFunctionType( Proto->getReturnType(), Proto->getParamTypes(), Proto->getExtProtoInfo().withExceptionSpec(ESI)); @@ -3087,31 +3166,38 @@ QualType ASTContext::getMemberPointerType(QualType T, const Type *Cls) const { /// array of the specified element type. QualType ASTContext::getConstantArrayType(QualType EltTy, const llvm::APInt &ArySizeIn, + const Expr *SizeExpr, ArrayType::ArraySizeModifier ASM, unsigned IndexTypeQuals) const { assert((EltTy->isDependentType() || EltTy->isIncompleteType() || EltTy->isConstantSizeType()) && "Constant array of VLAs is illegal!"); + // We only need the size as part of the type if it's instantiation-dependent. + if (SizeExpr && !SizeExpr->isInstantiationDependent()) + SizeExpr = nullptr; + // Convert the array size into a canonical width matching the pointer size for // the target. llvm::APInt ArySize(ArySizeIn); ArySize = ArySize.zextOrTrunc(Target->getMaxPointerWidth()); llvm::FoldingSetNodeID ID; - ConstantArrayType::Profile(ID, EltTy, ArySize, ASM, IndexTypeQuals); + ConstantArrayType::Profile(ID, *this, EltTy, ArySize, SizeExpr, ASM, + IndexTypeQuals); void *InsertPos = nullptr; if (ConstantArrayType *ATP = ConstantArrayTypes.FindNodeOrInsertPos(ID, InsertPos)) return QualType(ATP, 0); - // If the element type isn't canonical or has qualifiers, this won't - // be a canonical type either, so fill in the canonical type field. + // If the element type isn't canonical or has qualifiers, or the array bound + // is instantiation-dependent, this won't be a canonical type either, so fill + // in the canonical type field. QualType Canon; - if (!EltTy.isCanonical() || EltTy.hasLocalQualifiers()) { + if (!EltTy.isCanonical() || EltTy.hasLocalQualifiers() || SizeExpr) { SplitQualType canonSplit = getCanonicalType(EltTy).split(); - Canon = getConstantArrayType(QualType(canonSplit.Ty, 0), ArySize, + Canon = getConstantArrayType(QualType(canonSplit.Ty, 0), ArySize, nullptr, ASM, IndexTypeQuals); Canon = getQualifiedType(Canon, canonSplit.Quals); @@ -3121,8 +3207,11 @@ QualType ASTContext::getConstantArrayType(QualType EltTy, assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP; } - auto *New = new (*this,TypeAlignment) - ConstantArrayType(EltTy, Canon, ArySize, ASM, IndexTypeQuals); + void *Mem = Allocate( + ConstantArrayType::totalSizeToAlloc(SizeExpr ? 1 : 0), + TypeAlignment); + auto *New = new (Mem) + ConstantArrayType(EltTy, Canon, ArySize, SizeExpr, ASM, IndexTypeQuals); ConstantArrayTypes.InsertNode(New, InsertPos); Types.push_back(New); return QualType(New, 0); @@ -3143,7 +3232,7 @@ QualType ASTContext::getVariableArrayDecayedType(QualType type) const { #define TYPE(Class, Base) #define ABSTRACT_TYPE(Class, Base) #define NON_CANONICAL_TYPE(Class, Base) case Type::Class: -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" llvm_unreachable("didn't desugar past all non-canonical types?"); // These types should never be variably-modified. @@ -3219,6 +3308,7 @@ QualType ASTContext::getVariableArrayDecayedType(QualType type) const { result = getConstantArrayType( getVariableArrayDecayedType(cat->getElementType()), cat->getSize(), + cat->getSizeExpr(), cat->getSizeModifier(), cat->getIndexTypeCVRQualifiers()); break; @@ -4624,8 +4714,7 @@ ASTContext::applyObjCProtocolQualifiers(QualType type, QualType ASTContext::getObjCTypeParamType(const ObjCTypeParamDecl *Decl, - ArrayRef protocols, - QualType Canonical) const { + ArrayRef protocols) const { // Look in the folding set for an existing type. llvm::FoldingSetNodeID ID; ObjCTypeParamType::Profile(ID, Decl, protocols); @@ -4634,16 +4723,14 @@ ASTContext::getObjCTypeParamType(const ObjCTypeParamDecl *Decl, ObjCTypeParamTypes.FindNodeOrInsertPos(ID, InsertPos)) return QualType(TypeParam, 0); - if (Canonical.isNull()) { - // We canonicalize to the underlying type. - Canonical = getCanonicalType(Decl->getUnderlyingType()); - if (!protocols.empty()) { - // Apply the protocol qualifers. - bool hasError; - Canonical = getCanonicalType(applyObjCProtocolQualifiers( - Canonical, protocols, hasError, true /*allowOnPointerType*/)); - assert(!hasError && "Error when apply protocol qualifier to bound type"); - } + // We canonicalize to the underlying type. + QualType Canonical = getCanonicalType(Decl->getUnderlyingType()); + if (!protocols.empty()) { + // Apply the protocol qualifers. + bool hasError; + Canonical = getCanonicalType(applyObjCProtocolQualifiers( + Canonical, protocols, hasError, true /*allowOnPointerType*/)); + assert(!hasError && "Error when apply protocol qualifier to bound type"); } unsigned size = sizeof(ObjCTypeParamType); @@ -5114,7 +5201,7 @@ QualType ASTContext::getUnqualifiedArrayType(QualType type, if (const auto *CAT = dyn_cast(AT)) { return getConstantArrayType(unqualElementType, CAT->getSize(), - CAT->getSizeModifier(), 0); + CAT->getSizeExpr(), CAT->getSizeModifier(), 0); } if (const auto *IAT = dyn_cast(AT)) { @@ -5487,6 +5574,7 @@ const ArrayType *ASTContext::getAsArrayType(QualType T) const { if (const auto *CAT = dyn_cast(ATy)) return cast(getConstantArrayType(NewEltTy, CAT->getSize(), + CAT->getSizeExpr(), CAT->getSizeModifier(), CAT->getIndexTypeCVRQualifiers())); if (const auto *IAT = dyn_cast(ATy)) @@ -5599,8 +5687,7 @@ static FloatingRank getFloatingRank(QualType T) { if (const auto *CT = T->getAs()) return getFloatingRank(CT->getElementType()); - assert(T->getAs() && "getFloatingRank(): not a floating type"); - switch (T->getAs()->getKind()) { + switch (T->castAs()->getKind()) { default: llvm_unreachable("getFloatingRank(): not a floating type"); case BuiltinType::Float16: return Float16Rank; case BuiltinType::Half: return HalfRank; @@ -5977,12 +6064,10 @@ QualType ASTContext::getObjCSuperType() const { } void ASTContext::setCFConstantStringType(QualType T) { - const auto *TD = T->getAs(); - assert(TD && "Invalid CFConstantStringType"); + const auto *TD = T->castAs(); CFConstantStringTypeDecl = cast(TD->getDecl()); const auto *TagType = - CFConstantStringTypeDecl->getUnderlyingType()->getAs(); - assert(TagType && "Invalid CFConstantStringType"); + CFConstantStringTypeDecl->getUnderlyingType()->castAs(); CFConstantStringTagDecl = TagType->getDecl(); } @@ -6238,14 +6323,14 @@ std::string ASTContext::getObjCEncodingForBlock(const BlockExpr *Expr) const { const BlockDecl *Decl = Expr->getBlockDecl(); QualType BlockTy = - Expr->getType()->getAs()->getPointeeType(); + Expr->getType()->castAs()->getPointeeType(); + QualType BlockReturnTy = BlockTy->castAs()->getReturnType(); // Encode result type. if (getLangOpts().EncodeExtendedBlockSig) - getObjCEncodingForMethodParameter( - Decl::OBJC_TQ_None, BlockTy->getAs()->getReturnType(), S, - true /*Extended*/); + getObjCEncodingForMethodParameter(Decl::OBJC_TQ_None, BlockReturnTy, S, + true /*Extended*/); else - getObjCEncodingForType(BlockTy->getAs()->getReturnType(), S); + getObjCEncodingForType(BlockReturnTy, S); // Compute size of all parameters. // Start with computing size of a pointer in number of bytes. // FIXME: There might(should) be a better way of doing this computation! @@ -6556,8 +6641,9 @@ void ASTContext::getObjCEncodingForPropertyType(QualType T, /*Field=*/nullptr); } -static char getObjCEncodingForPrimitiveKind(const ASTContext *C, - BuiltinType::Kind kind) { +static char getObjCEncodingForPrimitiveType(const ASTContext *C, + const BuiltinType *BT) { + BuiltinType::Kind kind = BT->getKind(); switch (kind) { case BuiltinType::Void: return 'v'; case BuiltinType::Bool: return 'B'; @@ -6617,6 +6703,17 @@ static char getObjCEncodingForPrimitiveKind(const ASTContext *C, // FIXME: potentially need @encodes for these! return ' '; +#define SVE_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: +#include "clang/Basic/AArch64SVEACLETypes.def" + { + DiagnosticsEngine &Diags = C->getDiagnostics(); + unsigned DiagID = Diags.getCustomDiagID( + DiagnosticsEngine::Error, "cannot yet @encode type %0"); + Diags.Report(DiagID) << BT->getName(C->getPrintingPolicy()); + return ' '; + } + case BuiltinType::ObjCId: case BuiltinType::ObjCClass: case BuiltinType::ObjCSel: @@ -6653,7 +6750,7 @@ static char ObjCEncodingForEnumType(const ASTContext *C, const EnumType *ET) { // The encoding of a fixed enum type matches its fixed underlying type. const auto *BT = Enum->getIntegerType()->castAs(); - return getObjCEncodingForPrimitiveKind(C, BT->getKind()); + return getObjCEncodingForPrimitiveType(C, BT); } static void EncodeBitField(const ASTContext *Ctx, std::string& S, @@ -6693,7 +6790,7 @@ static void EncodeBitField(const ASTContext *Ctx, std::string& S, S += ObjCEncodingForEnumType(Ctx, ET); else { const auto *BT = T->castAs(); - S += getObjCEncodingForPrimitiveKind(Ctx, BT->getKind()); + S += getObjCEncodingForPrimitiveType(Ctx, BT); } } S += llvm::utostr(FD->getBitWidthValue(*Ctx)); @@ -6711,7 +6808,7 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string &S, if (FD && FD->isBitField()) return EncodeBitField(this, S, T, FD); if (const auto *BT = dyn_cast(CT)) - S += getObjCEncodingForPrimitiveKind(this, BT->getKind()); + S += getObjCEncodingForPrimitiveType(this, BT); else S += ObjCEncodingForEnumType(this, cast(CT)); return; @@ -6760,8 +6857,8 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string &S, } } else if (Options.IsOutermostType()) { QualType P = PointeeTy; - while (P->getAs()) - P = P->getAs()->getPointeeType(); + while (auto PT = P->getAs()) + P = PT->getPointeeType(); if (P.isConstQualified()) { isReadOnly = true; S += 'r'; @@ -7033,7 +7130,7 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string &S, case Type::KIND: #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(KIND, BASE) \ case Type::KIND: -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" llvm_unreachable("@encode for dependent type!"); } llvm_unreachable("bad type kind!"); @@ -7384,7 +7481,7 @@ static TypedefDecl *CreatePowerABIBuiltinVaListDecl(const ASTContext *Context) { llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 1); QualType VaListTagArrayType = Context->getConstantArrayType(VaListTagTypedefType, - Size, ArrayType::Normal, 0); + Size, nullptr, ArrayType::Normal, 0); return Context->buildImplicitTypedef(VaListTagArrayType, "__builtin_va_list"); } @@ -7437,16 +7534,16 @@ CreateX86_64ABIBuiltinVaListDecl(const ASTContext *Context) { // typedef struct __va_list_tag __builtin_va_list[1]; llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 1); - QualType VaListTagArrayType = - Context->getConstantArrayType(VaListTagType, Size, ArrayType::Normal, 0); + QualType VaListTagArrayType = Context->getConstantArrayType( + VaListTagType, Size, nullptr, ArrayType::Normal, 0); return Context->buildImplicitTypedef(VaListTagArrayType, "__builtin_va_list"); } static TypedefDecl *CreatePNaClABIBuiltinVaListDecl(const ASTContext *Context) { // typedef int __builtin_va_list[4]; llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 4); - QualType IntArrayType = - Context->getConstantArrayType(Context->IntTy, Size, ArrayType::Normal, 0); + QualType IntArrayType = Context->getConstantArrayType( + Context->IntTy, Size, nullptr, ArrayType::Normal, 0); return Context->buildImplicitTypedef(IntArrayType, "__builtin_va_list"); } @@ -7540,8 +7637,8 @@ CreateSystemZBuiltinVaListDecl(const ASTContext *Context) { // typedef __va_list_tag __builtin_va_list[1]; llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 1); - QualType VaListTagArrayType = - Context->getConstantArrayType(VaListTagType, Size, ArrayType::Normal, 0); + QualType VaListTagArrayType = Context->getConstantArrayType( + VaListTagType, Size, nullptr, ArrayType::Normal, 0); return Context->buildImplicitTypedef(VaListTagArrayType, "__builtin_va_list"); } @@ -7816,7 +7913,7 @@ Qualifiers::GC ASTContext::getObjCGCAttrKind(QualType Ty) const { if (Ty->isObjCObjectPointerType() || Ty->isBlockPointerType()) return Qualifiers::Strong; else if (Ty->isPointerType()) - return getObjCGCAttrKind(Ty->getAs()->getPointeeType()); + return getObjCGCAttrKind(Ty->castAs()->getPointeeType()); } else { // It's not valid to set GC attributes on anything that isn't a // pointer. @@ -7853,8 +7950,8 @@ bool ASTContext::areCompatibleVectorTypes(QualType FirstVec, // Treat Neon vector types and most AltiVec vector types as if they are the // equivalent GCC vector types. - const auto *First = FirstVec->getAs(); - const auto *Second = SecondVec->getAs(); + const auto *First = FirstVec->castAs(); + const auto *Second = SecondVec->castAs(); if (First->getNumElements() == Second->getNumElements() && hasSameType(First->getElementType(), Second->getElementType()) && First->getVectorKind() != VectorType::AltiVecPixel && @@ -7866,6 +7963,28 @@ bool ASTContext::areCompatibleVectorTypes(QualType FirstVec, return false; } +bool ASTContext::hasDirectOwnershipQualifier(QualType Ty) const { + while (true) { + // __strong id + if (const AttributedType *Attr = dyn_cast(Ty)) { + if (Attr->getAttrKind() == attr::ObjCOwnership) + return true; + + Ty = Attr->getModifiedType(); + + // X *__strong (...) + } else if (const ParenType *Paren = dyn_cast(Ty)) { + Ty = Paren->getInnerType(); + + // We do not want to look through typedefs, typeof(expr), + // typeof(type), or any other way that the type is somehow + // abstracted. + } else { + return false; + } + } +} + //===----------------------------------------------------------------------===// // ObjCQualifiedIdTypesAreCompatible - Compatibility testing for qualified id's. //===----------------------------------------------------------------------===// @@ -7885,15 +8004,11 @@ ASTContext::ProtocolCompatibleWithProtocol(ObjCProtocolDecl *lProto, /// ObjCQualifiedClassTypesAreCompatible - compare Class and /// Class. -bool ASTContext::ObjCQualifiedClassTypesAreCompatible(QualType lhs, - QualType rhs) { - const auto *lhsQID = lhs->getAs(); - const auto *rhsOPT = rhs->getAs(); - assert((lhsQID && rhsOPT) && "ObjCQualifiedClassTypesAreCompatible"); - - for (auto *lhsProto : lhsQID->quals()) { +bool ASTContext::ObjCQualifiedClassTypesAreCompatible( + const ObjCObjectPointerType *lhs, const ObjCObjectPointerType *rhs) { + for (auto *lhsProto : lhs->quals()) { bool match = false; - for (auto *rhsProto : rhsOPT->quals()) { + for (auto *rhsProto : rhs->quals()) { if (ProtocolCompatibleWithProtocol(lhsProto, rhsProto)) { match = true; break; @@ -7907,26 +8022,24 @@ bool ASTContext::ObjCQualifiedClassTypesAreCompatible(QualType lhs, /// ObjCQualifiedIdTypesAreCompatible - We know that one of lhs/rhs is an /// ObjCQualifiedIDType. -bool ASTContext::ObjCQualifiedIdTypesAreCompatible(QualType lhs, QualType rhs, - bool compare) { - // Allow id and an 'id' or void* type in all cases. - if (lhs->isVoidPointerType() || - lhs->isObjCIdType() || lhs->isObjCClassType()) - return true; - else if (rhs->isVoidPointerType() || - rhs->isObjCIdType() || rhs->isObjCClassType()) +bool ASTContext::ObjCQualifiedIdTypesAreCompatible( + const ObjCObjectPointerType *lhs, const ObjCObjectPointerType *rhs, + bool compare) { + // Allow id and an 'id' in all cases. + if (lhs->isObjCIdType() || rhs->isObjCIdType()) return true; - if (const ObjCObjectPointerType *lhsQID = lhs->getAsObjCQualifiedIdType()) { - const auto *rhsOPT = rhs->getAs(); + // Don't allow id to convert to Class or Class in either direction. + if (lhs->isObjCClassType() || lhs->isObjCQualifiedClassType() || + rhs->isObjCClassType() || rhs->isObjCQualifiedClassType()) + return false; - if (!rhsOPT) return false; - - if (rhsOPT->qual_empty()) { + if (lhs->isObjCQualifiedIdType()) { + if (rhs->qual_empty()) { // If the RHS is a unqualified interface pointer "NSString*", // make sure we check the class hierarchy. - if (ObjCInterfaceDecl *rhsID = rhsOPT->getInterfaceDecl()) { - for (auto *I : lhsQID->quals()) { + if (ObjCInterfaceDecl *rhsID = rhs->getInterfaceDecl()) { + for (auto *I : lhs->quals()) { // when comparing an id

on lhs with a static type on rhs, // see if static class implements all of id's protocols, directly or // through its super class and categories. @@ -7938,13 +8051,13 @@ bool ASTContext::ObjCQualifiedIdTypesAreCompatible(QualType lhs, QualType rhs, return true; } // Both the right and left sides have qualifiers. - for (auto *lhsProto : lhsQID->quals()) { + for (auto *lhsProto : lhs->quals()) { bool match = false; // when comparing an id

on lhs with a static type on rhs, // see if static class implements all of id's protocols, directly or // through its super class and categories. - for (auto *rhsProto : rhsOPT->quals()) { + for (auto *rhsProto : rhs->quals()) { if (ProtocolCompatibleWithProtocol(lhsProto, rhsProto) || (compare && ProtocolCompatibleWithProtocol(rhsProto, lhsProto))) { match = true; @@ -7953,8 +8066,8 @@ bool ASTContext::ObjCQualifiedIdTypesAreCompatible(QualType lhs, QualType rhs, } // If the RHS is a qualified interface pointer "NSString

*", // make sure we check the class hierarchy. - if (ObjCInterfaceDecl *rhsID = rhsOPT->getInterfaceDecl()) { - for (auto *I : lhsQID->quals()) { + if (ObjCInterfaceDecl *rhsID = rhs->getInterfaceDecl()) { + for (auto *I : lhs->quals()) { // when comparing an id

on lhs with a static type on rhs, // see if static class implements all of id's protocols, directly or // through its super class and categories. @@ -7971,13 +8084,11 @@ bool ASTContext::ObjCQualifiedIdTypesAreCompatible(QualType lhs, QualType rhs, return true; } - const ObjCObjectPointerType *rhsQID = rhs->getAsObjCQualifiedIdType(); - assert(rhsQID && "One of the LHS/RHS should be id"); + assert(rhs->isObjCQualifiedIdType() && "One of the LHS/RHS should be id"); - if (const ObjCObjectPointerType *lhsOPT = - lhs->getAsObjCInterfacePointerType()) { + if (lhs->getInterfaceType()) { // If both the right and left sides have qualifiers. - for (auto *lhsProto : lhsOPT->quals()) { + for (auto *lhsProto : lhs->quals()) { bool match = false; // when comparing an id

on rhs with a static type on lhs, @@ -7985,7 +8096,7 @@ bool ASTContext::ObjCQualifiedIdTypesAreCompatible(QualType lhs, QualType rhs, // through its super class and categories. // First, lhs protocols in the qualifier list must be found, direct // or indirect in rhs's qualifier list or it is a mismatch. - for (auto *rhsProto : rhsQID->quals()) { + for (auto *rhsProto : rhs->quals()) { if (ProtocolCompatibleWithProtocol(lhsProto, rhsProto) || (compare && ProtocolCompatibleWithProtocol(rhsProto, lhsProto))) { match = true; @@ -7998,17 +8109,17 @@ bool ASTContext::ObjCQualifiedIdTypesAreCompatible(QualType lhs, QualType rhs, // Static class's protocols, or its super class or category protocols // must be found, direct or indirect in rhs's qualifier list or it is a mismatch. - if (ObjCInterfaceDecl *lhsID = lhsOPT->getInterfaceDecl()) { + if (ObjCInterfaceDecl *lhsID = lhs->getInterfaceDecl()) { llvm::SmallPtrSet LHSInheritedProtocols; CollectInheritedProtocols(lhsID, LHSInheritedProtocols); // This is rather dubious but matches gcc's behavior. If lhs has // no type qualifier and its class has no static protocol(s) // assume that it is mismatch. - if (LHSInheritedProtocols.empty() && lhsOPT->qual_empty()) + if (LHSInheritedProtocols.empty() && lhs->qual_empty()) return false; for (auto *lhsProto : LHSInheritedProtocols) { bool match = false; - for (auto *rhsProto : rhsQID->quals()) { + for (auto *rhsProto : rhs->quals()) { if (ProtocolCompatibleWithProtocol(lhsProto, rhsProto) || (compare && ProtocolCompatibleWithProtocol(rhsProto, lhsProto))) { match = true; @@ -8032,9 +8143,8 @@ bool ASTContext::canAssignObjCInterfaces(const ObjCObjectPointerType *LHSOPT, const ObjCObjectType* LHS = LHSOPT->getObjectType(); const ObjCObjectType* RHS = RHSOPT->getObjectType(); - // If either type represents the built-in 'id' or 'Class' types, return true. - if (LHS->isObjCUnqualifiedIdOrClass() || - RHS->isObjCUnqualifiedIdOrClass()) + // If either type represents the built-in 'id' type, return true. + if (LHS->isObjCUnqualifiedId() || RHS->isObjCUnqualifiedId()) return true; // Function object that propagates a successful result or handles @@ -8052,15 +8162,20 @@ bool ASTContext::canAssignObjCInterfaces(const ObjCObjectPointerType *LHSOPT, LHSOPT->stripObjCKindOfTypeAndQuals(*this)); }; + // Casts from or to id

are allowed when the other side has compatible + // protocols. if (LHS->isObjCQualifiedId() || RHS->isObjCQualifiedId()) { - return finish(ObjCQualifiedIdTypesAreCompatible(QualType(LHSOPT,0), - QualType(RHSOPT,0), - false)); + return finish(ObjCQualifiedIdTypesAreCompatible(LHSOPT, RHSOPT, false)); } + // Verify protocol compatibility for casts from Class to Class. if (LHS->isObjCQualifiedClass() && RHS->isObjCQualifiedClass()) { - return finish(ObjCQualifiedClassTypesAreCompatible(QualType(LHSOPT,0), - QualType(RHSOPT,0))); + return finish(ObjCQualifiedClassTypesAreCompatible(LHSOPT, RHSOPT)); + } + + // Casts from Class to Class, or vice-versa, are allowed. + if (LHS->isObjCClass() && RHS->isObjCClass()) { + return true; } // If we have 2 user-defined types, fall into that path. @@ -8108,9 +8223,9 @@ bool ASTContext::canAssignObjCInterfacesInBlockPointer( } if (LHSOPT->isObjCQualifiedIdType() || RHSOPT->isObjCQualifiedIdType()) - return finish(ObjCQualifiedIdTypesAreCompatible(QualType(LHSOPT,0), - QualType(RHSOPT,0), - false)); + return finish(ObjCQualifiedIdTypesAreCompatible( + (BlockReturnType ? LHSOPT : RHSOPT), + (BlockReturnType ? RHSOPT : LHSOPT), false)); const ObjCInterfaceType* LHS = LHSOPT->getInterfaceType(); const ObjCInterfaceType* RHS = RHSOPT->getInterfaceType(); @@ -8834,7 +8949,7 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS, #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class: #define NON_CANONICAL_TYPE(Class, Base) case Type::Class: #define DEPENDENT_TYPE(Class, Base) case Type::Class: -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" llvm_unreachable("Non-canonical and dependent types shouldn't get here"); case Type::Auto: @@ -8854,8 +8969,8 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS, case Type::Pointer: { // Merge two pointer types, while trying to preserve typedef info - QualType LHSPointee = LHS->getAs()->getPointeeType(); - QualType RHSPointee = RHS->getAs()->getPointeeType(); + QualType LHSPointee = LHS->castAs()->getPointeeType(); + QualType RHSPointee = RHS->castAs()->getPointeeType(); if (Unqualified) { LHSPointee = LHSPointee.getUnqualifiedType(); RHSPointee = RHSPointee.getUnqualifiedType(); @@ -8873,8 +8988,8 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS, case Type::BlockPointer: { // Merge two block pointer types, while trying to preserve typedef info - QualType LHSPointee = LHS->getAs()->getPointeeType(); - QualType RHSPointee = RHS->getAs()->getPointeeType(); + QualType LHSPointee = LHS->castAs()->getPointeeType(); + QualType RHSPointee = RHS->castAs()->getPointeeType(); if (Unqualified) { LHSPointee = LHSPointee.getUnqualifiedType(); RHSPointee = RHSPointee.getUnqualifiedType(); @@ -8906,8 +9021,8 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS, case Type::Atomic: { // Merge two pointer types, while trying to preserve typedef info - QualType LHSValue = LHS->getAs()->getValueType(); - QualType RHSValue = RHS->getAs()->getValueType(); + QualType LHSValue = LHS->castAs()->getValueType(); + QualType RHSValue = RHS->castAs()->getValueType(); if (Unqualified) { LHSValue = LHSValue.getUnqualifiedType(); RHSValue = RHSValue.getUnqualifiedType(); @@ -8975,10 +9090,14 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS, return LHS; if (RCAT && getCanonicalType(RHSElem) == getCanonicalType(ResultType)) return RHS; - if (LCAT) return getConstantArrayType(ResultType, LCAT->getSize(), - ArrayType::ArraySizeModifier(), 0); - if (RCAT) return getConstantArrayType(ResultType, RCAT->getSize(), - ArrayType::ArraySizeModifier(), 0); + if (LCAT) + return getConstantArrayType(ResultType, LCAT->getSize(), + LCAT->getSizeExpr(), + ArrayType::ArraySizeModifier(), 0); + if (RCAT) + return getConstantArrayType(ResultType, RCAT->getSize(), + RCAT->getSizeExpr(), + ArrayType::ArraySizeModifier(), 0); if (LVAT && getCanonicalType(LHSElem) == getCanonicalType(ResultType)) return LHS; if (RVAT && getCanonicalType(RHSElem) == getCanonicalType(ResultType)) @@ -9013,34 +9132,30 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS, return {}; case Type::Vector: // FIXME: The merged type should be an ExtVector! - if (areCompatVectorTypes(LHSCan->getAs(), - RHSCan->getAs())) + if (areCompatVectorTypes(LHSCan->castAs(), + RHSCan->castAs())) return LHS; return {}; case Type::ObjCObject: { // Check if the types are assignment compatible. // FIXME: This should be type compatibility, e.g. whether // "LHS x; RHS x;" at global scope is legal. - const auto *LHSIface = LHS->getAs(); - const auto *RHSIface = RHS->getAs(); - if (canAssignObjCInterfaces(LHSIface, RHSIface)) + if (canAssignObjCInterfaces(LHS->castAs(), + RHS->castAs())) return LHS; - return {}; } case Type::ObjCObjectPointer: if (OfBlockPointer) { if (canAssignObjCInterfacesInBlockPointer( - LHS->getAs(), - RHS->getAs(), - BlockReturnType)) + LHS->castAs(), + RHS->castAs(), BlockReturnType)) return LHS; return {}; } - if (canAssignObjCInterfaces(LHS->getAs(), - RHS->getAs())) + if (canAssignObjCInterfaces(LHS->castAs(), + RHS->castAs())) return LHS; - return {}; case Type::Pipe: assert(LHS != RHS && @@ -9125,7 +9240,7 @@ QualType ASTContext::mergeObjCGCQualifiers(QualType LHS, QualType RHS) { if (ResReturnType == NewReturnType || ResReturnType == OldReturnType) { // id foo(); ... __strong id foo(); or: __strong id foo(); ... id foo(); // In either case, use OldReturnType to build the new function type. - const auto *F = LHS->getAs(); + const auto *F = LHS->castAs(); if (const auto *FPT = cast(F)) { FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo(); EPI.ExtInfo = getFunctionExtInfo(LHS); @@ -9166,8 +9281,8 @@ QualType ASTContext::mergeObjCGCQualifiers(QualType LHS, QualType RHS) { } if (LHSCan->isObjCObjectPointerType() && RHSCan->isObjCObjectPointerType()) { - QualType LHSBaseQT = LHS->getAs()->getPointeeType(); - QualType RHSBaseQT = RHS->getAs()->getPointeeType(); + QualType LHSBaseQT = LHS->castAs()->getPointeeType(); + QualType RHSBaseQT = RHS->castAs()->getPointeeType(); QualType ResQT = mergeObjCGCQualifiers(LHSBaseQT, RHSBaseQT); if (ResQT == LHSBaseQT) return LHS; @@ -9203,9 +9318,7 @@ QualType ASTContext::getCorrespondingUnsignedType(QualType T) const { if (const auto *ETy = T->getAs()) T = ETy->getDecl()->getIntegerType(); - const auto *BTy = T->getAs(); - assert(BTy && "Unexpected signed integer or fixed point type"); - switch (BTy->getKind()) { + switch (T->castAs()->getKind()) { case BuiltinType::Char_S: case BuiltinType::SChar: return UnsignedCharTy; @@ -9860,7 +9973,7 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) { return !D->getDeclContext()->isDependentContext(); else if (isa(D)) return !D->getDeclContext()->isDependentContext(); - else if (isa(D)) + else if (isa(D) || isa(D)) return !D->getDeclContext()->isDependentContext(); else if (isa(D)) return true; @@ -9963,7 +10076,7 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) { return false; // Variables that have destruction with side-effects are required. - if (VD->getType().isDestructedType()) + if (VD->needsDestruction(*this)) return true; // Variables that have initialization with side-effects are required. @@ -10035,7 +10148,7 @@ CallingConv ASTContext::getDefaultCallingConvention(bool IsVariadic, break; } } - return Target->getDefaultCallingConv(TargetInfo::CCMT_Unknown); + return Target->getDefaultCallingConv(); } bool ASTContext::isNearlyEmpty(const CXXRecordDecl *RD) const { @@ -10153,6 +10266,16 @@ ASTContext::getManglingNumberContext(const DeclContext *DC) { return *MCtx; } +MangleNumberingContext & +ASTContext::getManglingNumberContext(NeedExtraManglingDecl_t, const Decl *D) { + assert(LangOpts.CPlusPlus); // We don't need mangling numbers for plain C. + std::unique_ptr &MCtx = + ExtraMangleNumberingContexts[D]; + if (!MCtx) + MCtx = createMangleNumberingContext(); + return *MCtx; +} + std::unique_ptr ASTContext::createMangleNumberingContext() const { return ABI->createMangleNumberingContext(); @@ -10226,7 +10349,7 @@ QualType ASTContext::getStringLiteralArrayType(QualType EltTy, // Get an array type for the string, according to C99 6.4.5. This includes // the null terminator character. - return getConstantArrayType(EltTy, llvm::APInt(32, Length + 1), + return getConstantArrayType(EltTy, llvm::APInt(32, Length + 1), nullptr, ArrayType::Normal, /*IndexTypeQuals*/ 0); } @@ -10389,7 +10512,7 @@ ASTContext::getParents(const ast_type_traits::DynTypedNode &Node) { if (!Parents) // We build the parent map for the traversal scope (usually whole TU), as // hasAncestor can escape any subtree. - Parents = llvm::make_unique(*this); + Parents = std::make_unique(*this); return Parents->getParents(Node); } @@ -10446,8 +10569,7 @@ QualType ASTContext::getCorrespondingSaturatedType(QualType Ty) const { if (Ty->isSaturatedFixedPointType()) return Ty; - const auto &BT = Ty->getAs(); - switch (BT->getKind()) { + switch (Ty->castAs()->getKind()) { default: llvm_unreachable("Not a fixed point type!"); case BuiltinType::ShortAccum: @@ -10499,9 +10621,8 @@ clang::LazyGenerationalUpdatePtr< unsigned char ASTContext::getFixedPointScale(QualType Ty) const { assert(Ty->isFixedPointType()); - const auto *BT = Ty->getAs(); const TargetInfo &Target = getTargetInfo(); - switch (BT->getKind()) { + switch (Ty->castAs()->getKind()) { default: llvm_unreachable("Not a fixed point type!"); case BuiltinType::ShortAccum: @@ -10546,9 +10667,8 @@ unsigned char ASTContext::getFixedPointScale(QualType Ty) const { unsigned char ASTContext::getFixedPointIBits(QualType Ty) const { assert(Ty->isFixedPointType()); - const auto *BT = Ty->getAs(); const TargetInfo &Target = getTargetInfo(); - switch (BT->getKind()) { + switch (Ty->castAs()->getKind()) { default: llvm_unreachable("Not a fixed point type!"); case BuiltinType::ShortAccum: @@ -10613,9 +10733,8 @@ APFixedPoint ASTContext::getFixedPointMin(QualType Ty) const { QualType ASTContext::getCorrespondingSignedFixedPointType(QualType Ty) const { assert(Ty->isUnsignedFixedPointType() && "Expected unsigned fixed point type"); - const auto *BTy = Ty->getAs(); - switch (BTy->getKind()) { + switch (Ty->castAs()->getKind()) { case BuiltinType::UShortAccum: return ShortAccumTy; case BuiltinType::UAccum: diff --git a/lib/AST/ASTDiagnostic.cpp b/lib/AST/ASTDiagnostic.cpp index 15df8658529..30985441031 100644 --- a/lib/AST/ASTDiagnostic.cpp +++ b/lib/AST/ASTDiagnostic.cpp @@ -154,7 +154,7 @@ Underlying = CTy->desugar(); \ } \ break; \ } -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" } // If it wasn't sugared, we're done. diff --git a/lib/AST/ASTImporter.cpp b/lib/AST/ASTImporter.cpp index 9d5dd84161d..54acca7dc62 100644 --- a/lib/AST/ASTImporter.cpp +++ b/lib/AST/ASTImporter.cpp @@ -80,6 +80,7 @@ namespace clang { using ExpectedExpr = llvm::Expected; using ExpectedDecl = llvm::Expected; using ExpectedSLoc = llvm::Expected; + using ExpectedName = llvm::Expected; std::string ImportError::toString() const { // FIXME: Improve error texts. @@ -426,6 +427,9 @@ namespace clang { Error ImportFunctionDeclBody(FunctionDecl *FromFD, FunctionDecl *ToFD); + Error ImportDefaultArgOfParmVarDecl(const ParmVarDecl *FromParam, + ParmVarDecl *ToParam); + template bool hasSameVisibilityContext(T *Found, T *From); @@ -635,7 +639,8 @@ namespace clang { return ImportArrayChecked(InContainer.begin(), InContainer.end(), Obegin); } - void ImportOverrides(CXXMethodDecl *ToMethod, CXXMethodDecl *FromMethod); + Error ImportOverriddenMethods(CXXMethodDecl *ToMethod, + CXXMethodDecl *FromMethod); Expected FindFunctionTemplateSpecialization( FunctionDecl *FromFD); @@ -927,6 +932,27 @@ Expected ASTNodeImporter::import(const LambdaCapture &From) { EllipsisLoc); } +template +bool ASTNodeImporter::hasSameVisibilityContext(T *Found, T *From) { + if (From->hasExternalFormalLinkage()) + return Found->hasExternalFormalLinkage(); + if (Importer.GetFromTU(Found) != From->getTranslationUnitDecl()) + return false; + if (From->isInAnonymousNamespace()) + return Found->isInAnonymousNamespace(); + else + return !Found->isInAnonymousNamespace() && + !Found->hasExternalFormalLinkage(); +} + +template <> +bool ASTNodeImporter::hasSameVisibilityContext(TypedefNameDecl *Found, + TypedefNameDecl *From) { + if (From->isInAnonymousNamespace() && Found->isInAnonymousNamespace()) + return Importer.GetFromTU(Found) == From->getTranslationUnitDecl(); + return From->isInAnonymousNamespace() == Found->isInAnonymousNamespace(); +} + } // namespace clang //---------------------------------------------------------------------------- @@ -959,6 +985,10 @@ ExpectedType ASTNodeImporter::VisitBuiltinType(const BuiltinType *T) { case BuiltinType::Id: \ return Importer.getToContext().Id##Ty; #include "clang/Basic/OpenCLExtensionTypes.def" +#define SVE_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: \ + return Importer.getToContext().SingletonId; +#include "clang/Basic/AArch64SVEACLETypes.def" #define SHARED_SINGLETON_TYPE(Expansion) #define BUILTIN_TYPE(Id, SingletonId) \ case BuiltinType::Id: return Importer.getToContext().SingletonId; @@ -1068,14 +1098,16 @@ ASTNodeImporter::VisitMemberPointerType(const MemberPointerType *T) { ExpectedType ASTNodeImporter::VisitConstantArrayType(const ConstantArrayType *T) { - ExpectedType ToElementTypeOrErr = import(T->getElementType()); - if (!ToElementTypeOrErr) - return ToElementTypeOrErr.takeError(); + QualType ToElementType; + const Expr *ToSizeExpr; + if (auto Imp = importSeq(T->getElementType(), T->getSizeExpr())) + std::tie(ToElementType, ToSizeExpr) = *Imp; + else + return Imp.takeError(); - return Importer.getToContext().getConstantArrayType(*ToElementTypeOrErr, - T->getSize(), - T->getSizeModifier(), - T->getIndexTypeCVRQualifiers()); + return Importer.getToContext().getConstantArrayType( + ToElementType, T->getSize(), ToSizeExpr, T->getSizeModifier(), + T->getIndexTypeCVRQualifiers()); } ExpectedType @@ -1645,7 +1677,6 @@ ASTNodeImporter::ImportDeclContext(DeclContext *FromDC, bool ForceImport) { bool AccumulateChildErrors = isa(FromDC); Error ChildErrors = Error::success(); - llvm::SmallVector ImportedDecls; for (auto *From : FromDC->decls()) { ExpectedDecl ImportedOrErr = import(From); if (!ImportedOrErr) { @@ -1657,6 +1688,59 @@ ASTNodeImporter::ImportDeclContext(DeclContext *FromDC, bool ForceImport) { } } + // We reorder declarations in RecordDecls because they may have another order + // in the "to" context than they have in the "from" context. This may happen + // e.g when we import a class like this: + // struct declToImport { + // int a = c + b; + // int b = 1; + // int c = 2; + // }; + // During the import of `a` we import first the dependencies in sequence, + // thus the order would be `c`, `b`, `a`. We will get the normal order by + // first removing the already imported members and then adding them in the + // order as they apper in the "from" context. + // + // Keeping field order is vital because it determines structure layout. + // + // Here and below, we cannot call field_begin() method and its callers on + // ToDC if it has an external storage. Calling field_begin() will + // automatically load all the fields by calling + // LoadFieldsFromExternalStorage(). LoadFieldsFromExternalStorage() would + // call ASTImporter::Import(). This is because the ExternalASTSource + // interface in LLDB is implemented by the means of the ASTImporter. However, + // calling an import at this point would result in an uncontrolled import, we + // must avoid that. + const auto *FromRD = dyn_cast(FromDC); + if (!FromRD) + return ChildErrors; + + auto ToDCOrErr = Importer.ImportContext(FromDC); + if (!ToDCOrErr) { + consumeError(std::move(ChildErrors)); + return ToDCOrErr.takeError(); + } + + DeclContext *ToDC = *ToDCOrErr; + // Remove all declarations, which may be in wrong order in the + // lexical DeclContext and then add them in the proper order. + for (auto *D : FromRD->decls()) { + if (isa(D) || isa(D) || isa(D)) { + assert(D && "DC contains a null decl"); + Decl *ToD = Importer.GetAlreadyImportedOrNull(D); + // Remove only the decls which we successfully imported. + if (ToD) { + assert(ToDC == ToD->getLexicalDeclContext() && ToDC->containsDecl(ToD)); + // Remove the decl from its wrong place in the linked list. + ToDC->removeDecl(ToD); + // Add the decl to the end of the linked list. + // This time it will be at the proper place because the enclosing for + // loop iterates in the original (good) order of the decls. + ToDC->addDeclInternal(ToD); + } + } + } + return ChildErrors; } @@ -1752,71 +1836,10 @@ Error ASTNodeImporter::ImportDefinition( struct CXXRecordDecl::DefinitionData &ToData = ToCXX->data(); struct CXXRecordDecl::DefinitionData &FromData = FromCXX->data(); - ToData.UserDeclaredConstructor = FromData.UserDeclaredConstructor; - ToData.UserDeclaredSpecialMembers = FromData.UserDeclaredSpecialMembers; - ToData.Aggregate = FromData.Aggregate; - ToData.PlainOldData = FromData.PlainOldData; - ToData.Empty = FromData.Empty; - ToData.Polymorphic = FromData.Polymorphic; - ToData.Abstract = FromData.Abstract; - ToData.IsStandardLayout = FromData.IsStandardLayout; - ToData.IsCXX11StandardLayout = FromData.IsCXX11StandardLayout; - ToData.HasBasesWithFields = FromData.HasBasesWithFields; - ToData.HasBasesWithNonStaticDataMembers = - FromData.HasBasesWithNonStaticDataMembers; - ToData.HasPrivateFields = FromData.HasPrivateFields; - ToData.HasProtectedFields = FromData.HasProtectedFields; - ToData.HasPublicFields = FromData.HasPublicFields; - ToData.HasMutableFields = FromData.HasMutableFields; - ToData.HasVariantMembers = FromData.HasVariantMembers; - ToData.HasOnlyCMembers = FromData.HasOnlyCMembers; - ToData.HasInClassInitializer = FromData.HasInClassInitializer; - ToData.HasUninitializedReferenceMember - = FromData.HasUninitializedReferenceMember; - ToData.HasUninitializedFields = FromData.HasUninitializedFields; - ToData.HasInheritedConstructor = FromData.HasInheritedConstructor; - ToData.HasInheritedAssignment = FromData.HasInheritedAssignment; - ToData.NeedOverloadResolutionForCopyConstructor - = FromData.NeedOverloadResolutionForCopyConstructor; - ToData.NeedOverloadResolutionForMoveConstructor - = FromData.NeedOverloadResolutionForMoveConstructor; - ToData.NeedOverloadResolutionForMoveAssignment - = FromData.NeedOverloadResolutionForMoveAssignment; - ToData.NeedOverloadResolutionForDestructor - = FromData.NeedOverloadResolutionForDestructor; - ToData.DefaultedCopyConstructorIsDeleted - = FromData.DefaultedCopyConstructorIsDeleted; - ToData.DefaultedMoveConstructorIsDeleted - = FromData.DefaultedMoveConstructorIsDeleted; - ToData.DefaultedMoveAssignmentIsDeleted - = FromData.DefaultedMoveAssignmentIsDeleted; - ToData.DefaultedDestructorIsDeleted = FromData.DefaultedDestructorIsDeleted; - ToData.HasTrivialSpecialMembers = FromData.HasTrivialSpecialMembers; - ToData.HasIrrelevantDestructor = FromData.HasIrrelevantDestructor; - ToData.HasConstexprNonCopyMoveConstructor - = FromData.HasConstexprNonCopyMoveConstructor; - ToData.HasDefaultedDefaultConstructor - = FromData.HasDefaultedDefaultConstructor; - ToData.DefaultedDefaultConstructorIsConstexpr - = FromData.DefaultedDefaultConstructorIsConstexpr; - ToData.HasConstexprDefaultConstructor - = FromData.HasConstexprDefaultConstructor; - ToData.HasNonLiteralTypeFieldsOrBases - = FromData.HasNonLiteralTypeFieldsOrBases; - // ComputedVisibleConversions not imported. - ToData.UserProvidedDefaultConstructor - = FromData.UserProvidedDefaultConstructor; - ToData.DeclaredSpecialMembers = FromData.DeclaredSpecialMembers; - ToData.ImplicitCopyConstructorCanHaveConstParamForVBase - = FromData.ImplicitCopyConstructorCanHaveConstParamForVBase; - ToData.ImplicitCopyConstructorCanHaveConstParamForNonVBase - = FromData.ImplicitCopyConstructorCanHaveConstParamForNonVBase; - ToData.ImplicitCopyAssignmentHasConstParam - = FromData.ImplicitCopyAssignmentHasConstParam; - ToData.HasDeclaredCopyConstructorWithConstParam - = FromData.HasDeclaredCopyConstructorWithConstParam; - ToData.HasDeclaredCopyAssignmentWithConstParam - = FromData.HasDeclaredCopyAssignmentWithConstParam; + + #define FIELD(Name, Width, Merge) \ + ToData.Name = FromData.Name; + #include "clang/AST/CXXRecordDeclDefinitionBits.def" // Copy over the data stored in RecordDeclBits ToCXX->setArgPassingRestrictions(FromCXX->getArgPassingRestrictions()); @@ -2188,11 +2211,13 @@ ExpectedDecl ASTNodeImporter::VisitNamespaceDecl(NamespaceDecl *D) { } if (!ConflictingDecls.empty()) { - Name = Importer.HandleNameConflict(Name, DC, Decl::IDNS_Namespace, - ConflictingDecls.data(), - ConflictingDecls.size()); - if (!Name) - return make_error(ImportError::NameConflict); + ExpectedName NameOrErr = Importer.HandleNameConflict( + Name, DC, Decl::IDNS_Namespace, ConflictingDecls.data(), + ConflictingDecls.size()); + if (NameOrErr) + Name = NameOrErr.get(); + else + return NameOrErr.takeError(); } } @@ -2281,6 +2306,9 @@ ASTNodeImporter::VisitTypedefNameDecl(TypedefNameDecl *D, bool IsAlias) { // If this typedef is not in block scope, determine whether we've // seen a typedef with the same name (that we can merge with) or any // other entity by that name (which name lookup could conflict with). + // Note: Repeated typedefs are not valid in C99: + // 'typedef int T; typedef int T;' is invalid + // We do not care about this now. if (!DC->isFunctionOrMethod()) { SmallVector ConflictingDecls; unsigned IDNS = Decl::IDNS_Ordinary; @@ -2289,6 +2317,9 @@ ASTNodeImporter::VisitTypedefNameDecl(TypedefNameDecl *D, bool IsAlias) { if (!FoundDecl->isInIdentifierNamespace(IDNS)) continue; if (auto *FoundTypedef = dyn_cast(FoundDecl)) { + if (!hasSameVisibilityContext(FoundTypedef, D)) + continue; + QualType FromUT = D->getUnderlyingType(); QualType FoundUT = FoundTypedef->getUnderlyingType(); if (Importer.IsStructurallyEquivalent(FromUT, FoundUT)) { @@ -2296,21 +2327,21 @@ ASTNodeImporter::VisitTypedefNameDecl(TypedefNameDecl *D, bool IsAlias) { // already have a complete underlying type then return with that. if (!FromUT->isIncompleteType() && !FoundUT->isIncompleteType()) return Importer.MapImported(D, FoundTypedef); + // FIXME Handle redecl chain. When you do that make consistent changes + // in ASTImporterLookupTable too. + } else { + ConflictingDecls.push_back(FoundDecl); } - // FIXME Handle redecl chain. When you do that make consistent changes - // in ASTImporterLookupTable too. - break; } - - ConflictingDecls.push_back(FoundDecl); } if (!ConflictingDecls.empty()) { - Name = Importer.HandleNameConflict(Name, DC, IDNS, - ConflictingDecls.data(), - ConflictingDecls.size()); - if (!Name) - return make_error(ImportError::NameConflict); + ExpectedName NameOrErr = Importer.HandleNameConflict( + Name, DC, IDNS, ConflictingDecls.data(), ConflictingDecls.size()); + if (NameOrErr) + Name = NameOrErr.get(); + else + return NameOrErr.takeError(); } } @@ -2383,11 +2414,12 @@ ASTNodeImporter::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) { } if (!ConflictingDecls.empty()) { - Name = Importer.HandleNameConflict(Name, DC, IDNS, - ConflictingDecls.data(), - ConflictingDecls.size()); - if (!Name) - return make_error(ImportError::NameConflict); + ExpectedName NameOrErr = Importer.HandleNameConflict( + Name, DC, IDNS, ConflictingDecls.data(), ConflictingDecls.size()); + if (NameOrErr) + Name = NameOrErr.get(); + else + return NameOrErr.takeError(); } } @@ -2491,17 +2523,18 @@ ExpectedDecl ASTNodeImporter::VisitEnumDecl(EnumDecl *D) { continue; if (IsStructuralMatch(D, FoundEnum)) return Importer.MapImported(D, FoundEnum); + ConflictingDecls.push_back(FoundDecl); } - - ConflictingDecls.push_back(FoundDecl); } if (!ConflictingDecls.empty()) { - Name = Importer.HandleNameConflict(SearchName, DC, IDNS, - ConflictingDecls.data(), - ConflictingDecls.size()); - if (!Name) - return make_error(ImportError::NameConflict); + ExpectedName NameOrErr = Importer.HandleNameConflict( + SearchName, DC, IDNS, ConflictingDecls.data(), + ConflictingDecls.size()); + if (NameOrErr) + Name = NameOrErr.get(); + else + return NameOrErr.takeError(); } } @@ -2546,10 +2579,10 @@ ExpectedDecl ASTNodeImporter::VisitRecordDecl(RecordDecl *D) { } // Import the major distinguishing characteristics of this record. - DeclContext *DC, *LexicalDC; + DeclContext *DC = nullptr, *LexicalDC = nullptr; DeclarationName Name; SourceLocation Loc; - NamedDecl *ToD; + NamedDecl *ToD = nullptr; if (Error Err = ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc)) return std::move(Err); if (ToD) @@ -2568,7 +2601,7 @@ ExpectedDecl ASTNodeImporter::VisitRecordDecl(RecordDecl *D) { // We may already have a record of the same name; try to find and match it. RecordDecl *PrevDecl = nullptr; - if (!DC->isFunctionOrMethod()) { + if (!DC->isFunctionOrMethod() && !D->isLambda()) { SmallVector ConflictingDecls; auto FoundDecls = Importer.findDeclsInToCtx(DC, SearchName); @@ -2626,17 +2659,18 @@ ExpectedDecl ASTNodeImporter::VisitRecordDecl(RecordDecl *D) { PrevDecl = FoundRecord->getMostRecentDecl(); break; } - } - - ConflictingDecls.push_back(FoundDecl); + ConflictingDecls.push_back(FoundDecl); + } // kind is RecordDecl } // for if (!ConflictingDecls.empty() && SearchName) { - Name = Importer.HandleNameConflict(SearchName, DC, IDNS, - ConflictingDecls.data(), - ConflictingDecls.size()); - if (!Name) - return make_error(ImportError::NameConflict); + ExpectedName NameOrErr = Importer.HandleNameConflict( + SearchName, DC, IDNS, ConflictingDecls.data(), + ConflictingDecls.size()); + if (NameOrErr) + Name = NameOrErr.get(); + else + return NameOrErr.takeError(); } } @@ -2660,7 +2694,8 @@ ExpectedDecl ASTNodeImporter::VisitRecordDecl(RecordDecl *D) { ExpectedDecl CDeclOrErr = import(DCXX->getLambdaContextDecl()); if (!CDeclOrErr) return CDeclOrErr.takeError(); - D2CXX->setLambdaMangling(DCXX->getLambdaManglingNumber(), *CDeclOrErr); + D2CXX->setLambdaMangling(DCXX->getLambdaManglingNumber(), *CDeclOrErr, + DCXX->hasKnownLambdaInternalLinkage()); } else if (DCXX->isInjectedClassName()) { // We have to be careful to do a similar dance to the one in // Sema::ActOnStartCXXMemberDeclarations @@ -2795,17 +2830,17 @@ ExpectedDecl ASTNodeImporter::VisitEnumConstantDecl(EnumConstantDecl *D) { if (auto *FoundEnumConstant = dyn_cast(FoundDecl)) { if (IsStructuralMatch(D, FoundEnumConstant)) return Importer.MapImported(D, FoundEnumConstant); + ConflictingDecls.push_back(FoundDecl); } - - ConflictingDecls.push_back(FoundDecl); } if (!ConflictingDecls.empty()) { - Name = Importer.HandleNameConflict(Name, DC, IDNS, - ConflictingDecls.data(), - ConflictingDecls.size()); - if (!Name) - return make_error(ImportError::NameConflict); + ExpectedName NameOrErr = Importer.HandleNameConflict( + Name, DC, IDNS, ConflictingDecls.data(), ConflictingDecls.size()); + if (NameOrErr) + Name = NameOrErr.get(); + else + return NameOrErr.takeError(); } } @@ -2956,19 +2991,6 @@ Error ASTNodeImporter::ImportFunctionDeclBody(FunctionDecl *FromFD, return Error::success(); } -template -bool ASTNodeImporter::hasSameVisibilityContext(T *Found, T *From) { - if (From->hasExternalFormalLinkage()) - return Found->hasExternalFormalLinkage(); - if (Importer.GetFromTU(Found) != From->getTranslationUnitDecl()) - return false; - if (From->isInAnonymousNamespace()) - return Found->isInAnonymousNamespace(); - else - return !Found->isInAnonymousNamespace() && - !Found->hasExternalFormalLinkage(); -} - ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) { SmallVector Redecls = getCanonicalForwardRedeclChain(D); @@ -3043,17 +3065,17 @@ ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) { << Name << D->getType() << FoundFunction->getType(); Importer.ToDiag(FoundFunction->getLocation(), diag::note_odr_value_here) << FoundFunction->getType(); + ConflictingDecls.push_back(FoundDecl); } - - ConflictingDecls.push_back(FoundDecl); } if (!ConflictingDecls.empty()) { - Name = Importer.HandleNameConflict(Name, DC, IDNS, - ConflictingDecls.data(), - ConflictingDecls.size()); - if (!Name) - return make_error(ImportError::NameConflict); + ExpectedName NameOrErr = Importer.HandleNameConflict( + Name, DC, IDNS, ConflictingDecls.data(), ConflictingDecls.size()); + if (NameOrErr) + Name = NameOrErr.get(); + else + return NameOrErr.takeError(); } } @@ -3066,9 +3088,19 @@ ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) { if (FoundByLookup) { if (isa(FoundByLookup)) { if (D->getLexicalDeclContext() == D->getDeclContext()) { - if (!D->doesThisDeclarationHaveABody()) + if (!D->doesThisDeclarationHaveABody()) { + if (FunctionTemplateDecl *DescribedD = + D->getDescribedFunctionTemplate()) { + // Handle a "templated" function together with its described + // template. This avoids need for a similar check at import of the + // described template. + assert(FoundByLookup->getDescribedFunctionTemplate() && + "Templated function mapped to non-templated?"); + Importer.MapImported(DescribedD, + FoundByLookup->getDescribedFunctionTemplate()); + } return Importer.MapImported(D, FoundByLookup); - else { + } else { // Let's continue and build up the redecl chain in this case. // FIXME Merge the functions into one decl. } @@ -3154,7 +3186,7 @@ ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) { if (GetImportedOrCreateDecl( ToFunction, D, Importer.getToContext(), cast(DC), ToInnerLocStart, NameInfo, T, TInfo, D->isInlineSpecified(), - D->isImplicit())) + D->isImplicit(), D->getConstexprKind())) return ToFunction; CXXDestructorDecl *ToDtor = cast(ToFunction); @@ -3203,29 +3235,15 @@ ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) { // decl and its redeclarations may be required. } - // Import Ctor initializers. - if (auto *FromConstructor = dyn_cast(D)) { - if (unsigned NumInitializers = FromConstructor->getNumCtorInitializers()) { - SmallVector CtorInitializers(NumInitializers); - // Import first, then allocate memory and copy if there was no error. - if (Error Err = ImportContainerChecked( - FromConstructor->inits(), CtorInitializers)) - return std::move(Err); - auto **Memory = - new (Importer.getToContext()) CXXCtorInitializer *[NumInitializers]; - std::copy(CtorInitializers.begin(), CtorInitializers.end(), Memory); - auto *ToCtor = cast(ToFunction); - ToCtor->setCtorInitializers(Memory); - ToCtor->setNumCtorInitializers(NumInitializers); - } - } - ToFunction->setQualifierInfo(ToQualifierLoc); ToFunction->setAccess(D->getAccess()); ToFunction->setLexicalDeclContext(LexicalDC); ToFunction->setVirtualAsWritten(D->isVirtualAsWritten()); ToFunction->setTrivial(D->isTrivial()); ToFunction->setPure(D->isPure()); + ToFunction->setDefaulted(D->isDefaulted()); + ToFunction->setExplicitlyDefaulted(D->isExplicitlyDefaulted()); + ToFunction->setDeletedAsWritten(D->isDeletedAsWritten()); ToFunction->setRangeEnd(ToEndLoc); // Set the parameters. @@ -3260,6 +3278,23 @@ ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) { return ToFTOrErr.takeError(); } + // Import Ctor initializers. + if (auto *FromConstructor = dyn_cast(D)) { + if (unsigned NumInitializers = FromConstructor->getNumCtorInitializers()) { + SmallVector CtorInitializers(NumInitializers); + // Import first, then allocate memory and copy if there was no error. + if (Error Err = ImportContainerChecked( + FromConstructor->inits(), CtorInitializers)) + return std::move(Err); + auto **Memory = + new (Importer.getToContext()) CXXCtorInitializer *[NumInitializers]; + std::copy(CtorInitializers.begin(), CtorInitializers.end(), Memory); + auto *ToCtor = cast(ToFunction); + ToCtor->setCtorInitializers(Memory); + ToCtor->setNumCtorInitializers(NumInitializers); + } + } + if (D->doesThisDeclarationHaveABody()) { Error Err = ImportFunctionDeclBody(D, ToFunction); @@ -3292,7 +3327,9 @@ ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) { } if (auto *FromCXXMethod = dyn_cast(D)) - ImportOverrides(cast(ToFunction), FromCXXMethod); + if (Error Err = ImportOverriddenMethods(cast(ToFunction), + FromCXXMethod)) + return std::move(Err); // Import the rest of the chain. I.e. import all subsequent declarations. for (++RedeclIt; RedeclIt != Redecls.end(); ++RedeclIt) { @@ -3686,17 +3723,17 @@ ExpectedDecl ASTNodeImporter::VisitVarDecl(VarDecl *D) { << Name << D->getType() << FoundVar->getType(); Importer.ToDiag(FoundVar->getLocation(), diag::note_odr_value_here) << FoundVar->getType(); + ConflictingDecls.push_back(FoundDecl); } - - ConflictingDecls.push_back(FoundDecl); } if (!ConflictingDecls.empty()) { - Name = Importer.HandleNameConflict(Name, DC, IDNS, - ConflictingDecls.data(), - ConflictingDecls.size()); - if (!Name) - return make_error(ImportError::NameConflict); + ExpectedName NameOrErr = Importer.HandleNameConflict( + Name, DC, IDNS, ConflictingDecls.data(), ConflictingDecls.size()); + if (NameOrErr) + Name = NameOrErr.get(); + else + return NameOrErr.takeError(); } } @@ -3772,6 +3809,28 @@ ExpectedDecl ASTNodeImporter::VisitImplicitParamDecl(ImplicitParamDecl *D) { return ToParm; } +Error ASTNodeImporter::ImportDefaultArgOfParmVarDecl( + const ParmVarDecl *FromParam, ParmVarDecl *ToParam) { + ToParam->setHasInheritedDefaultArg(FromParam->hasInheritedDefaultArg()); + ToParam->setKNRPromoted(FromParam->isKNRPromoted()); + + if (FromParam->hasUninstantiatedDefaultArg()) { + if (auto ToDefArgOrErr = import(FromParam->getUninstantiatedDefaultArg())) + ToParam->setUninstantiatedDefaultArg(*ToDefArgOrErr); + else + return ToDefArgOrErr.takeError(); + } else if (FromParam->hasUnparsedDefaultArg()) { + ToParam->setUnparsedDefaultArg(); + } else if (FromParam->hasDefaultArg()) { + if (auto ToDefArgOrErr = import(FromParam->getDefaultArg())) + ToParam->setDefaultArg(*ToDefArgOrErr); + else + return ToDefArgOrErr.takeError(); + } + + return Error::success(); +} + ExpectedDecl ASTNodeImporter::VisitParmVarDecl(ParmVarDecl *D) { // Parameters are created in the translation unit's context, then moved // into the function declaration's context afterward. @@ -3798,23 +3857,11 @@ ExpectedDecl ASTNodeImporter::VisitParmVarDecl(ParmVarDecl *D) { /*DefaultArg*/ nullptr)) return ToParm; - // Set the default argument. - ToParm->setHasInheritedDefaultArg(D->hasInheritedDefaultArg()); - ToParm->setKNRPromoted(D->isKNRPromoted()); - - if (D->hasUninstantiatedDefaultArg()) { - if (auto ToDefArgOrErr = import(D->getUninstantiatedDefaultArg())) - ToParm->setUninstantiatedDefaultArg(*ToDefArgOrErr); - else - return ToDefArgOrErr.takeError(); - } else if (D->hasUnparsedDefaultArg()) { - ToParm->setUnparsedDefaultArg(); - } else if (D->hasDefaultArg()) { - if (auto ToDefArgOrErr = import(D->getDefaultArg())) - ToParm->setDefaultArg(*ToDefArgOrErr); - else - return ToDefArgOrErr.takeError(); - } + // Set the default argument. It should be no problem if it was already done. + // Do not import the default expression before GetImportedOrCreateDecl call + // to avoid possible infinite import loop because circular dependency. + if (Error Err = ImportDefaultArgOfParmVarDecl(D, ToParm)) + return std::move(Err); if (D->isObjCMethodParameter()) { ToParm->setObjCMethodScopeInfo(D->getFunctionScopeIndex()); @@ -5016,25 +5063,27 @@ ExpectedDecl ASTNodeImporter::VisitClassTemplateDecl(ClassTemplateDecl *D) { if (IsStructuralMatch(D, FoundTemplate)) { ClassTemplateDecl *TemplateWithDef = getTemplateDefinition(FoundTemplate); - if (D->isThisDeclarationADefinition() && TemplateWithDef) { + if (D->isThisDeclarationADefinition() && TemplateWithDef) return Importer.MapImported(D, TemplateWithDef); - } - FoundByLookup = FoundTemplate; - break; + if (!FoundByLookup) + FoundByLookup = FoundTemplate; + // Search in all matches because there may be multiple decl chains, + // see ASTTests test ImportExistingFriendClassTemplateDef. + continue; } + ConflictingDecls.push_back(FoundDecl); } - - ConflictingDecls.push_back(FoundDecl); } if (!ConflictingDecls.empty()) { - Name = Importer.HandleNameConflict(Name, DC, Decl::IDNS_Ordinary, - ConflictingDecls.data(), - ConflictingDecls.size()); + ExpectedName NameOrErr = Importer.HandleNameConflict( + Name, DC, Decl::IDNS_Ordinary, ConflictingDecls.data(), + ConflictingDecls.size()); + if (NameOrErr) + Name = NameOrErr.get(); + else + return NameOrErr.takeError(); } - - if (!Name) - return make_error(ImportError::NameConflict); } CXXRecordDecl *FromTemplated = D->getTemplatedDecl(); @@ -5307,22 +5356,20 @@ ExpectedDecl ASTNodeImporter::VisitVarTemplateDecl(VarTemplateDecl *D) { FoundTemplate->getTemplatedDecl()); return Importer.MapImported(D, FoundTemplate); } + ConflictingDecls.push_back(FoundDecl); } - - ConflictingDecls.push_back(FoundDecl); } if (!ConflictingDecls.empty()) { - Name = Importer.HandleNameConflict(Name, DC, Decl::IDNS_Ordinary, - ConflictingDecls.data(), - ConflictingDecls.size()); + ExpectedName NameOrErr = Importer.HandleNameConflict( + Name, DC, Decl::IDNS_Ordinary, ConflictingDecls.data(), + ConflictingDecls.size()); + if (NameOrErr) + Name = NameOrErr.get(); + else + return NameOrErr.takeError(); } - if (!Name) - // FIXME: Is it possible to get other error than name conflict? - // (Put this `if` into the previous `if`?) - return make_error(ImportError::NameConflict); - VarDecl *DTemplated = D->getTemplatedDecl(); // Import the type. @@ -5533,17 +5580,16 @@ ASTNodeImporter::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) { continue; if (auto *FoundTemplate = dyn_cast(FoundDecl)) { - if (FoundTemplate->hasExternalFormalLinkage() && - D->hasExternalFormalLinkage()) { - if (IsStructuralMatch(D, FoundTemplate)) { - FunctionTemplateDecl *TemplateWithDef = - getTemplateDefinition(FoundTemplate); - if (D->isThisDeclarationADefinition() && TemplateWithDef) { - return Importer.MapImported(D, TemplateWithDef); - } - FoundByLookup = FoundTemplate; - break; - } + if (!hasSameVisibilityContext(FoundTemplate, D)) + continue; + if (IsStructuralMatch(D, FoundTemplate)) { + FunctionTemplateDecl *TemplateWithDef = + getTemplateDefinition(FoundTemplate); + if (D->isThisDeclarationADefinition() && TemplateWithDef) + return Importer.MapImported(D, TemplateWithDef); + + FoundByLookup = FoundTemplate; + break; // TODO: handle conflicting names } } @@ -6868,8 +6914,23 @@ ExpectedStmt ASTNodeImporter::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { if (!UsedContextOrErr) return UsedContextOrErr.takeError(); - return CXXDefaultArgExpr::Create( - Importer.getToContext(), *ToUsedLocOrErr, *ToParamOrErr, *UsedContextOrErr); + // Import the default arg if it was not imported yet. + // This is needed because it can happen that during the import of the + // default expression (from VisitParmVarDecl) the same ParmVarDecl is + // encountered here. The default argument for a ParmVarDecl is set in the + // ParmVarDecl only after it is imported (set in VisitParmVarDecl if not here, + // see VisitParmVarDecl). + ParmVarDecl *ToParam = *ToParamOrErr; + if (!ToParam->getDefaultArg()) { + Optional FromParam = Importer.getImportedFromDecl(ToParam); + assert(FromParam && "ParmVarDecl was not imported?"); + + if (Error Err = ImportDefaultArgOfParmVarDecl(*FromParam, ToParam)) + return std::move(Err); + } + + return CXXDefaultArgExpr::Create(Importer.getToContext(), *ToUsedLocOrErr, + *ToParamOrErr, *UsedContextOrErr); } ExpectedStmt @@ -7701,15 +7762,18 @@ ExpectedStmt ASTNodeImporter::VisitCXXTypeidExpr(CXXTypeidExpr *E) { *ToTypeOrErr, *ToExprOperandOrErr, *ToSourceRangeOrErr); } -void ASTNodeImporter::ImportOverrides(CXXMethodDecl *ToMethod, - CXXMethodDecl *FromMethod) { +Error ASTNodeImporter::ImportOverriddenMethods(CXXMethodDecl *ToMethod, + CXXMethodDecl *FromMethod) { + Error ImportErrors = Error::success(); for (auto *FromOverriddenMethod : FromMethod->overridden_methods()) { if (auto ImportedOrErr = import(FromOverriddenMethod)) ToMethod->getCanonicalDecl()->addOverriddenMethod(cast( (*ImportedOrErr)->getCanonicalDecl())); else - consumeError(ImportedOrErr.takeError()); + ImportErrors = + joinErrors(std::move(ImportErrors), ImportedOrErr.takeError()); } + return ImportErrors; } ASTImporter::ASTImporter(ASTContext &ToContext, FileManager &ToFileManager, @@ -7718,7 +7782,7 @@ ASTImporter::ASTImporter(ASTContext &ToContext, FileManager &ToFileManager, std::shared_ptr SharedState) : SharedState(SharedState), ToContext(ToContext), FromContext(FromContext), ToFileManager(ToFileManager), FromFileManager(FromFileManager), - Minimal(MinimalImport) { + Minimal(MinimalImport), ODRHandling(ODRHandlingType::Conservative) { // Create a default state without the lookup table: LLDB case. if (!SharedState) { @@ -8390,13 +8454,13 @@ Expected ASTImporter::Import(FileID FromID, bool IsBuiltin) { // disk again // FIXME: We definitely want to re-use the existing MemoryBuffer, rather // than mmap the files several times. - const FileEntry *Entry = + auto Entry = ToFileManager.getFile(Cache->OrigEntry->getName()); // FIXME: The filename may be a virtual name that does probably not // point to a valid file and we get no Entry here. In this case try with // the memory buffer below. if (Entry) - ToID = ToSM.createFileID(Entry, *ToIncludeLoc, + ToID = ToSM.createFileID(*Entry, *ToIncludeLoc, FromSLoc.getFile().getFileCharacteristic()); } } @@ -8404,8 +8468,9 @@ Expected ASTImporter::Import(FileID FromID, bool IsBuiltin) { if (ToID.isInvalid() || IsBuiltin) { // FIXME: We want to re-use the existing MemoryBuffer! bool Invalid = true; - const llvm::MemoryBuffer *FromBuf = Cache->getBuffer( - FromContext.getDiagnostics(), FromSM, SourceLocation{}, &Invalid); + const llvm::MemoryBuffer *FromBuf = + Cache->getBuffer(FromContext.getDiagnostics(), + FromSM.getFileManager(), SourceLocation{}, &Invalid); if (!FromBuf || Invalid) // FIXME: Use a new error kind? return llvm::make_error(ImportError::Unknown); @@ -8421,6 +8486,10 @@ Expected ASTImporter::Import(FileID FromID, bool IsBuiltin) { assert(ToID.isValid() && "Unexpected invalid fileID was created."); ImportedFileIDs[FromID] = ToID; + + if (FileIDImportHandler) + FileIDImportHandler(ToID, FromID); + return ToID; } @@ -8640,12 +8709,17 @@ Expected ASTImporter::Import(Selector FromSel) { return ToContext.Selectors.getSelector(FromSel.getNumArgs(), Idents.data()); } -DeclarationName ASTImporter::HandleNameConflict(DeclarationName Name, - DeclContext *DC, - unsigned IDNS, - NamedDecl **Decls, - unsigned NumDecls) { - return Name; +Expected ASTImporter::HandleNameConflict(DeclarationName Name, + DeclContext *DC, + unsigned IDNS, + NamedDecl **Decls, + unsigned NumDecls) { + if (ODRHandling == ODRHandlingType::Conservative) + // Report error at any name conflict. + return make_error(ImportError::NameConflict); + else + // Allow to create the new Decl with the same name. + return Name; } DiagnosticBuilder ASTImporter::ToDiag(SourceLocation Loc, unsigned DiagID) { diff --git a/lib/AST/ASTStructuralEquivalence.cpp b/lib/AST/ASTStructuralEquivalence.cpp index 912db3c130c..db48405055c 100644 --- a/lib/AST/ASTStructuralEquivalence.cpp +++ b/lib/AST/ASTStructuralEquivalence.cpp @@ -235,12 +235,21 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, const TemplateName &N1, const TemplateName &N2) { - if (N1.getKind() != N2.getKind()) + TemplateDecl *TemplateDeclN1 = N1.getAsTemplateDecl(); + TemplateDecl *TemplateDeclN2 = N2.getAsTemplateDecl(); + if (TemplateDeclN1 && TemplateDeclN2) { + if (!IsStructurallyEquivalent(Context, TemplateDeclN1, TemplateDeclN2)) + return false; + // If the kind is different we compare only the template decl. + if (N1.getKind() != N2.getKind()) + return true; + } else if (TemplateDeclN1 || TemplateDeclN2) return false; + else if (N1.getKind() != N2.getKind()) + return false; + + // Check for special case incompatibilities. switch (N1.getKind()) { - case TemplateName::Template: - return IsStructurallyEquivalent(Context, N1.getAsTemplateDecl(), - N2.getAsTemplateDecl()); case TemplateName::OverloadedTemplate: { OverloadedTemplateStorage *OS1 = N1.getAsOverloadedTemplate(), @@ -259,14 +268,6 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, return TN1->getDeclName() == TN2->getDeclName(); } - case TemplateName::QualifiedTemplate: { - QualifiedTemplateName *QN1 = N1.getAsQualifiedTemplateName(), - *QN2 = N2.getAsQualifiedTemplateName(); - return IsStructurallyEquivalent(Context, QN1->getDecl(), QN2->getDecl()) && - IsStructurallyEquivalent(Context, QN1->getQualifier(), - QN2->getQualifier()); - } - case TemplateName::DependentTemplate: { DependentTemplateName *DN1 = N1.getAsDependentTemplateName(), *DN2 = N2.getAsDependentTemplateName(); @@ -281,15 +282,6 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, return false; } - case TemplateName::SubstTemplateTemplateParm: { - SubstTemplateTemplateParmStorage *TS1 = N1.getAsSubstTemplateTemplateParm(), - *TS2 = N2.getAsSubstTemplateTemplateParm(); - return IsStructurallyEquivalent(Context, TS1->getParameter(), - TS2->getParameter()) && - IsStructurallyEquivalent(Context, TS1->getReplacement(), - TS2->getReplacement()); - } - case TemplateName::SubstTemplateTemplateParmPack: { SubstTemplateTemplateParmPackStorage *P1 = N1.getAsSubstTemplateTemplateParmPack(), @@ -299,8 +291,16 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, IsStructurallyEquivalent(Context, P1->getParameterPack(), P2->getParameterPack()); } + + case TemplateName::Template: + case TemplateName::QualifiedTemplate: + case TemplateName::SubstTemplateTemplateParm: + // It is sufficient to check value of getAsTemplateDecl. + break; + } - return false; + + return true; } /// Determine whether two template arguments are equivalent. @@ -1574,20 +1574,24 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, Decl *D1, Decl *D2) { // FIXME: Check for known structural equivalences via a callback of some sort. + D1 = D1->getCanonicalDecl(); + D2 = D2->getCanonicalDecl(); + std::pair P{D1, D2}; + // Check whether we already know that these two declarations are not // structurally equivalent. - if (Context.NonEquivalentDecls.count( - std::make_pair(D1->getCanonicalDecl(), D2->getCanonicalDecl()))) + if (Context.NonEquivalentDecls.count(P)) return false; - // Determine whether we've already produced a tentative equivalence for D1. - Decl *&EquivToD1 = Context.TentativeEquivalences[D1->getCanonicalDecl()]; - if (EquivToD1) - return EquivToD1 == D2->getCanonicalDecl(); + // Check if a check for these declarations is already pending. + // If yes D1 and D2 will be checked later (from DeclsToCheck), + // or these are already checked (and equivalent). + bool Inserted = Context.VisitedDecls.insert(P).second; + if (!Inserted) + return true; + + Context.DeclsToCheck.push(P); - // Produce a tentative equivalence D1 <-> D2, which will be checked later. - EquivToD1 = D2->getCanonicalDecl(); - Context.DeclsToCheck.push_back(D1->getCanonicalDecl()); return true; } @@ -1703,11 +1707,13 @@ bool StructuralEquivalenceContext::IsEquivalent(Decl *D1, Decl *D2) { // Ensure that the implementation functions (all static functions in this TU) // never call the public ASTStructuralEquivalence::IsEquivalent() functions, // because that will wreak havoc the internal state (DeclsToCheck and - // TentativeEquivalences members) and can cause faulty behaviour. For - // instance, some leaf declarations can be stated and cached as inequivalent - // as a side effect of one inequivalent element in the DeclsToCheck list. + // VisitedDecls members) and can cause faulty behaviour. + // In other words: Do not start a graph search from a new node with the + // internal data of another search in progress. + // FIXME: Better encapsulation and separation of internal and public + // functionality. assert(DeclsToCheck.empty()); - assert(TentativeEquivalences.empty()); + assert(VisitedDecls.empty()); if (!::IsStructurallyEquivalent(*this, D1, D2)) return false; @@ -1717,7 +1723,7 @@ bool StructuralEquivalenceContext::IsEquivalent(Decl *D1, Decl *D2) { bool StructuralEquivalenceContext::IsEquivalent(QualType T1, QualType T2) { assert(DeclsToCheck.empty()); - assert(TentativeEquivalences.empty()); + assert(VisitedDecls.empty()); if (!::IsStructurallyEquivalent(*this, T1, T2)) return false; @@ -1876,11 +1882,11 @@ bool StructuralEquivalenceContext::CheckKindSpecificEquivalence( bool StructuralEquivalenceContext::Finish() { while (!DeclsToCheck.empty()) { // Check the next declaration. - Decl *D1 = DeclsToCheck.front(); - DeclsToCheck.pop_front(); + std::pair P = DeclsToCheck.front(); + DeclsToCheck.pop(); - Decl *D2 = TentativeEquivalences[D1]; - assert(D2 && "Unrecorded tentative equivalence?"); + Decl *D1 = P.first; + Decl *D2 = P.second; bool Equivalent = CheckCommonEquivalence(D1, D2) && CheckKindSpecificEquivalence(D1, D2); @@ -1888,8 +1894,8 @@ bool StructuralEquivalenceContext::Finish() { if (!Equivalent) { // Note that these two declarations are not equivalent (and we already // know about it). - NonEquivalentDecls.insert( - std::make_pair(D1->getCanonicalDecl(), D2->getCanonicalDecl())); + NonEquivalentDecls.insert(P); + return true; } } diff --git a/lib/AST/ASTTypeTraits.cpp b/lib/AST/ASTTypeTraits.cpp index ba1581bd3f6..6b7f6ec5108 100644 --- a/lib/AST/ASTTypeTraits.cpp +++ b/lib/AST/ASTTypeTraits.cpp @@ -15,6 +15,7 @@ #include "clang/AST/ASTTypeTraits.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclCXX.h" +#include "clang/AST/NestedNameSpecifier.h" namespace clang { namespace ast_type_traits { @@ -36,7 +37,7 @@ const ASTNodeKind::KindInfo ASTNodeKind::AllKindInfo[] = { #include "clang/AST/StmtNodes.inc" { NKI_None, "Type" }, #define TYPE(DERIVED, BASE) { NKI_##BASE, #DERIVED "Type" }, -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" { NKI_None, "OMPClause" }, #define OPENMP_CLAUSE(TextualSpelling, Class) {NKI_OMPClause, #Class}, #include "clang/Basic/OpenMPKinds.def" @@ -103,7 +104,7 @@ ASTNodeKind ASTNodeKind::getFromNode(const Type &T) { #define TYPE(Class, Base) \ case Type::Class: return ASTNodeKind(NKI_##Class##Type); #define ABSTRACT_TYPE(Class, Base) -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" } llvm_unreachable("invalid type kind"); } @@ -115,6 +116,8 @@ ASTNodeKind ASTNodeKind::getFromNode(const OMPClause &C) { #include "clang/Basic/OpenMPKinds.def" case OMPC_threadprivate: case OMPC_uniform: + case OMPC_device_type: + case OMPC_match: case OMPC_unknown: llvm_unreachable("unexpected OpenMP clause kind"); } @@ -129,9 +132,12 @@ void DynTypedNode::print(llvm::raw_ostream &OS, TN->print(OS, PP); else if (const NestedNameSpecifier *NNS = get()) NNS->print(OS, PP); - else if (const NestedNameSpecifierLoc *NNSL = get()) - NNSL->getNestedNameSpecifier()->print(OS, PP); - else if (const QualType *QT = get()) + else if (const NestedNameSpecifierLoc *NNSL = get()) { + if (const NestedNameSpecifier *NNS = NNSL->getNestedNameSpecifier()) + NNS->print(OS, PP); + else + OS << "(empty NestedNameSpecifierLoc)"; + } else if (const QualType *QT = get()) QT->print(OS, PP); else if (const TypeLoc *TL = get()) TL->getType().print(OS, PP); diff --git a/lib/AST/CXXInheritance.cpp b/lib/AST/CXXInheritance.cpp index ecf451b175a..a3a3794b2ed 100644 --- a/lib/AST/CXXInheritance.cpp +++ b/lib/AST/CXXInheritance.cpp @@ -44,7 +44,7 @@ void CXXBasePaths::ComputeDeclsFound() { Decls.insert(Path->Decls.front()); NumDeclsFound = Decls.size(); - DeclsFound = llvm::make_unique(NumDeclsFound); + DeclsFound = std::make_unique(NumDeclsFound); std::copy(Decls.begin(), Decls.end(), DeclsFound.get()); } diff --git a/lib/AST/Comment.cpp b/lib/AST/Comment.cpp index 25339c7901e..23dc7ba9359 100644 --- a/lib/AST/Comment.cpp +++ b/lib/AST/Comment.cpp @@ -13,10 +13,25 @@ #include "clang/AST/DeclTemplate.h" #include "clang/Basic/CharInfo.h" #include "llvm/Support/ErrorHandling.h" +#include namespace clang { namespace comments { +// Check that no comment class has a non-trival destructor. They are allocated +// with a BumpPtrAllocator and therefore their destructor is not executed. +#define ABSTRACT_COMMENT(COMMENT) +#define COMMENT(CLASS, PARENT) \ + static_assert(std::is_trivially_destructible::value, \ + #CLASS " should be trivially destructible!"); +#include "clang/AST/CommentNodes.inc" +#undef COMMENT +#undef ABSTRACT_COMMENT + +// DeclInfo is also allocated with a BumpPtrAllocator. +static_assert(std::is_trivially_destructible::value, + "DeclInfo should be trivially destructible!"); + const char *Comment::getCommentKindName() const { switch (getCommentKind()) { case NoCommentKind: return "NoCommentKind"; diff --git a/lib/AST/CommentLexer.cpp b/lib/AST/CommentLexer.cpp index 19485f6018c..c1ea3eab075 100644 --- a/lib/AST/CommentLexer.cpp +++ b/lib/AST/CommentLexer.cpp @@ -850,17 +850,14 @@ again: } StringRef Lexer::getSpelling(const Token &Tok, - const SourceManager &SourceMgr, - bool *Invalid) const { + const SourceManager &SourceMgr) const { SourceLocation Loc = Tok.getLocation(); std::pair LocInfo = SourceMgr.getDecomposedLoc(Loc); bool InvalidTemp = false; StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp); - if (InvalidTemp) { - *Invalid = true; + if (InvalidTemp) return StringRef(); - } const char *Begin = File.data() + LocInfo.second; return StringRef(Begin, Tok.getLength()); diff --git a/lib/AST/CommentParser.cpp b/lib/AST/CommentParser.cpp index c7f8aa7e16a..29983b0a16c 100644 --- a/lib/AST/CommentParser.cpp +++ b/lib/AST/CommentParser.cpp @@ -422,6 +422,12 @@ InlineCommandComment *Parser::parseInlineCommand() { IC = S.actOnInlineCommand(CommandTok.getLocation(), CommandTok.getEndLocation(), CommandTok.getCommandID()); + + Diag(CommandTok.getEndLocation().getLocWithOffset(1), + diag::warn_doc_inline_contents_no_argument) + << CommandTok.is(tok::at_command) + << Traits.getCommandInfo(CommandTok.getCommandID())->Name + << SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation()); } Retokenizer.putBackLeftoverTokens(); diff --git a/lib/AST/CommentSema.cpp b/lib/AST/CommentSema.cpp index 067b3ae4222..69d61dc5516 100644 --- a/lib/AST/CommentSema.cpp +++ b/lib/AST/CommentSema.cpp @@ -588,6 +588,8 @@ void Sema::checkReturnsCommand(const BlockCommandComment *Command) { if (isObjCPropertyDecl()) return; if (isFunctionDecl() || isFunctionOrBlockPointerVarLikeDecl()) { + assert(!ThisDeclInfo->ReturnType.isNull() && + "should have a valid return type"); if (ThisDeclInfo->ReturnType->isVoidType()) { unsigned DiagKind; switch (ThisDeclInfo->CommentDecl->getKind()) { @@ -873,6 +875,12 @@ bool Sema::isFunctionOrBlockPointerVarLikeDecl() { // can be ignored. if (QT->getAs()) return false; + if (const auto *P = QT->getAs()) + if (P->getPointeeType()->getAs()) + return false; + if (const auto *P = QT->getAs()) + if (P->getPointeeType()->getAs()) + return false; return QT->isFunctionPointerType() || QT->isBlockPointerType(); } diff --git a/lib/AST/Decl.cpp b/lib/AST/Decl.cpp index 21cf9da18a8..80235d8496d 100644 --- a/lib/AST/Decl.cpp +++ b/lib/AST/Decl.cpp @@ -1385,7 +1385,8 @@ LinkageInfo LinkageComputer::computeLVForDecl(const NamedDecl *D, case Decl::CXXRecord: { const auto *Record = cast(D); if (Record->isLambda()) { - if (!Record->getLambdaManglingNumber()) { + if (Record->hasKnownLambdaInternalLinkage() || + !Record->getLambdaManglingNumber()) { // This lambda has no mangling number, so it's internal. return getInternalLinkageFor(D); } @@ -1402,7 +1403,8 @@ LinkageInfo LinkageComputer::computeLVForDecl(const NamedDecl *D, // }; const CXXRecordDecl *OuterMostLambda = getOutermostEnclosingLambda(Record); - if (!OuterMostLambda->getLambdaManglingNumber()) + if (OuterMostLambda->hasKnownLambdaInternalLinkage() || + !OuterMostLambda->getLambdaManglingNumber()) return getInternalLinkageFor(D); return getLVForClosure( @@ -1558,6 +1560,24 @@ void NamedDecl::printQualifiedName(raw_ostream &OS) const { void NamedDecl::printQualifiedName(raw_ostream &OS, const PrintingPolicy &P) const { + if (getDeclContext()->isFunctionOrMethod()) { + // We do not print '(anonymous)' for function parameters without name. + printName(OS); + return; + } + printNestedNameSpecifier(OS, P); + if (getDeclName() || isa(this)) + OS << *this; + else + OS << "(anonymous)"; +} + +void NamedDecl::printNestedNameSpecifier(raw_ostream &OS) const { + printNestedNameSpecifier(OS, getASTContext().getPrintingPolicy()); +} + +void NamedDecl::printNestedNameSpecifier(raw_ostream &OS, + const PrintingPolicy &P) const { const DeclContext *Ctx = getDeclContext(); // For ObjC methods and properties, look through categories and use the @@ -1571,10 +1591,8 @@ void NamedDecl::printQualifiedName(raw_ostream &OS, Ctx = ID; } - if (Ctx->isFunctionOrMethod()) { - printName(OS); + if (Ctx->isFunctionOrMethod()) return; - } using ContextsTy = SmallVector; ContextsTy Contexts; @@ -1644,11 +1662,6 @@ void NamedDecl::printQualifiedName(raw_ostream &OS, } OS << "::"; } - - if (getDeclName() || isa(this)) - OS << *this; - else - OS << "(anonymous)"; } void NamedDecl::getNameForDiagnostic(raw_ostream &OS, @@ -2220,6 +2233,22 @@ Stmt **VarDecl::getInitAddress() { return Init.getAddrOfPtr1(); } +VarDecl *VarDecl::getInitializingDeclaration() { + VarDecl *Def = nullptr; + for (auto I : redecls()) { + if (I->hasInit()) + return I; + + if (I->isThisDeclarationADefinition()) { + if (isStaticDataMember()) + return I; + else + Def = I; + } + } + return Def; +} + bool VarDecl::isOutOfLine() const { if (Decl::isOutOfLine()) return true; @@ -2565,6 +2594,18 @@ bool VarDecl::isNoDestroy(const ASTContext &Ctx) const { !hasAttr())); } +QualType::DestructionKind +VarDecl::needsDestruction(const ASTContext &Ctx) const { + if (EvaluatedStmt *Eval = Init.dyn_cast()) + if (Eval->HasConstantDestruction) + return QualType::DK_none; + + if (isNoDestroy(Ctx)) + return QualType::DK_none; + + return getType().isDestructedType(); +} + MemberSpecializationInfo *VarDecl::getMemberSpecializationInfo() const { if (isStaticDataMember()) // FIXME: Remove ? @@ -2950,8 +2991,7 @@ bool FunctionDecl::isReplaceableGlobalAllocationFunction(bool *IsAligned) const Ty = Ty->getPointeeType(); if (Ty.getCVRQualifiers() != Qualifiers::Const) return false; - const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); - if (RD && isNamed(RD, "nothrow_t") && RD->isInStdNamespace()) + if (Ty->isNothrowT()) Consume(); } @@ -3235,6 +3275,9 @@ bool FunctionDecl::doesDeclarationForceExternallyVisibleDefinition() const { return true; } + if (Context.getLangOpts().CPlusPlus) + return false; + if (Context.getLangOpts().GNUInline || hasAttr()) { // With GNU inlining, a declaration with 'inline' but not 'extern', forces // an externally visible definition. @@ -3263,9 +3306,6 @@ bool FunctionDecl::doesDeclarationForceExternallyVisibleDefinition() const { return FoundBody; } - if (Context.getLangOpts().CPlusPlus) - return false; - // C99 6.7.4p6: // [...] If all of the file scope declarations for a function in a // translation unit include the inline function specifier without extern, @@ -3332,7 +3372,8 @@ SourceRange FunctionDecl::getExceptionSpecSourceRange() const { /// an externally visible symbol, but "extern inline" will not create an /// externally visible symbol. bool FunctionDecl::isInlineDefinitionExternallyVisible() const { - assert((doesThisDeclarationHaveABody() || willHaveBody()) && + assert((doesThisDeclarationHaveABody() || willHaveBody() || + hasAttr()) && "Must be a function definition"); assert(isInlined() && "Function must be inline"); ASTContext &Context = getASTContext(); @@ -3344,6 +3385,8 @@ bool FunctionDecl::isInlineDefinitionExternallyVisible() const { // If it's not the case that both 'inline' and 'extern' are // specified on the definition, then this inline definition is // externally visible. + if (Context.getLangOpts().CPlusPlus) + return false; if (!(isInlineSpecified() && getStorageClass() == SC_Extern)) return true; diff --git a/lib/AST/DeclBase.cpp b/lib/AST/DeclBase.cpp index fd80e1532eb..77a3a4c679a 100644 --- a/lib/AST/DeclBase.cpp +++ b/lib/AST/DeclBase.cpp @@ -12,6 +12,7 @@ #include "clang/AST/DeclBase.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/ASTLambda.h" #include "clang/AST/ASTMutationListener.h" #include "clang/AST/Attr.h" #include "clang/AST/AttrIterator.h" @@ -99,7 +100,7 @@ void *Decl::operator new(std::size_t Size, const ASTContext &Ctx, // Ensure required alignment of the resulting object by adding extra // padding at the start if required. size_t ExtraAlign = - llvm::OffsetToAlignment(sizeof(Module *), alignof(Decl)); + llvm::offsetToAlignment(sizeof(Module *), llvm::Align(alignof(Decl))); auto *Buffer = reinterpret_cast( ::operator new(ExtraAlign + sizeof(Module *) + Size + Extra, Ctx)); Buffer += ExtraAlign; @@ -958,11 +959,11 @@ const FunctionType *Decl::getFunctionType(bool BlocksToo) const { return nullptr; if (Ty->isFunctionPointerType()) - Ty = Ty->getAs()->getPointeeType(); + Ty = Ty->castAs()->getPointeeType(); else if (Ty->isFunctionReferenceType()) - Ty = Ty->getAs()->getPointeeType(); + Ty = Ty->castAs()->getPointeeType(); else if (BlocksToo && Ty->isBlockPointerType()) - Ty = Ty->getAs()->getPointeeType(); + Ty = Ty->castAs()->getPointeeType(); return Ty->getAs(); } @@ -1043,6 +1044,12 @@ DeclContext *DeclContext::getLookupParent() { getLexicalParent()->getRedeclContext()->isRecord()) return getLexicalParent(); + // A lookup within the call operator of a lambda never looks in the lambda + // class; instead, skip to the context in which that closure type is + // declared. + if (isLambdaCallOperator(this)) + return getParent()->getParent(); + return getParent(); } diff --git a/lib/AST/DeclCXX.cpp b/lib/AST/DeclCXX.cpp index 59710a55498..12ec44fa027 100644 --- a/lib/AST/DeclCXX.cpp +++ b/lib/AST/DeclCXX.cpp @@ -95,14 +95,16 @@ CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D) HasDefaultedDefaultConstructor(false), DefaultedDefaultConstructorIsConstexpr(true), HasConstexprDefaultConstructor(false), - HasNonLiteralTypeFieldsOrBases(false), ComputedVisibleConversions(false), + DefaultedDestructorIsConstexpr(true), + HasNonLiteralTypeFieldsOrBases(false), UserProvidedDefaultConstructor(false), DeclaredSpecialMembers(0), ImplicitCopyConstructorCanHaveConstParamForVBase(true), ImplicitCopyConstructorCanHaveConstParamForNonVBase(true), ImplicitCopyAssignmentHasConstParam(true), HasDeclaredCopyConstructorWithConstParam(false), HasDeclaredCopyAssignmentWithConstParam(false), IsLambda(false), - IsParsingBaseSpecifiers(false), HasODRHash(false), Definition(D) {} + IsParsingBaseSpecifiers(false), ComputedVisibleConversions(false), + HasODRHash(false), Definition(D) {} CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getBasesSlowCase() const { return Bases.get(Definition->getASTContext().getExternalSource()); @@ -217,7 +219,7 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases, if (BaseType->isDependentType()) continue; auto *BaseClassDecl = - cast(BaseType->getAs()->getDecl()); + cast(BaseType->castAs()->getDecl()); // C++2a [class]p7: // A standard-layout class is a class that: @@ -325,10 +327,12 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases, data().IsStandardLayout = false; data().IsCXX11StandardLayout = false; - // C++11 [dcl.constexpr]p4: - // In the definition of a constexpr constructor [...] - // -- the class shall not have any virtual base classes + // C++20 [dcl.constexpr]p3: + // In the definition of a constexpr function [...] + // -- if the function is a constructor or destructor, + // its class shall not have any virtual base classes data().DefaultedDefaultConstructorIsConstexpr = false; + data().DefaultedDestructorIsConstexpr = false; // C++1z [class.copy]p8: // The implicitly-declared copy constructor for a class X will have @@ -520,6 +524,19 @@ void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) { data().NeedOverloadResolutionForMoveConstructor = true; data().NeedOverloadResolutionForDestructor = true; } + + // C++2a [dcl.constexpr]p4: + // The definition of a constexpr destructor [shall] satisfy the + // following requirement: + // -- for every subobject of class type or (possibly multi-dimensional) + // array thereof, that class type shall have a constexpr destructor + if (!Subobj->hasConstexprDestructor()) + data().DefaultedDestructorIsConstexpr = false; +} + +bool CXXRecordDecl::hasConstexprDestructor() const { + auto *Dtor = getDestructor(); + return Dtor ? Dtor->isConstexpr() : defaultedDestructorIsConstexpr(); } bool CXXRecordDecl::hasAnyDependentBases() const { @@ -1263,7 +1280,8 @@ void CXXRecordDecl::addedMember(Decl *D) { } else { // Base element type of field is a non-class type. if (!T->isLiteralType(Context) || - (!Field->hasInClassInitializer() && !isUnion())) + (!Field->hasInClassInitializer() && !isUnion() && + !Context.getLangOpts().CPlusPlus2a)) data().DefaultedDefaultConstructorIsConstexpr = false; // C++11 [class.copy]p23: @@ -1382,17 +1400,29 @@ static bool allLookupResultsAreTheSame(const DeclContext::lookup_result &R) { } #endif -CXXMethodDecl* CXXRecordDecl::getLambdaCallOperator() const { - if (!isLambda()) return nullptr; +static NamedDecl* getLambdaCallOperatorHelper(const CXXRecordDecl &RD) { + if (!RD.isLambda()) return nullptr; DeclarationName Name = - getASTContext().DeclarationNames.getCXXOperatorName(OO_Call); - DeclContext::lookup_result Calls = lookup(Name); + RD.getASTContext().DeclarationNames.getCXXOperatorName(OO_Call); + DeclContext::lookup_result Calls = RD.lookup(Name); assert(!Calls.empty() && "Missing lambda call operator!"); assert(allLookupResultsAreTheSame(Calls) && "More than one lambda call operator!"); + return Calls.front(); +} + +FunctionTemplateDecl* CXXRecordDecl::getDependentLambdaCallOperator() const { + NamedDecl *CallOp = getLambdaCallOperatorHelper(*this); + return dyn_cast_or_null(CallOp); +} + +CXXMethodDecl *CXXRecordDecl::getLambdaCallOperator() const { + NamedDecl *CallOp = getLambdaCallOperatorHelper(*this); + + if (CallOp == nullptr) + return nullptr; - NamedDecl *CallOp = Calls.front(); if (const auto *CallOpTmpl = dyn_cast(CallOp)) return cast(CallOpTmpl->getTemplatedDecl()); @@ -1880,7 +1910,7 @@ bool CXXRecordDecl::mayBeAbstract() const { for (const auto &B : bases()) { const auto *BaseDecl = - cast(B.getType()->getAs()->getDecl()); + cast(B.getType()->castAs()->getDecl()); if (BaseDecl->isAbstract()) return true; } @@ -2067,10 +2097,15 @@ CXXMethodDecl *CXXMethodDecl::getDevirtualizedMethod(const Expr *Base, if (DevirtualizedMethod->hasAttr()) return DevirtualizedMethod; - // Similarly, if the class itself is marked 'final' it can't be overridden - // and we can therefore devirtualize the member function call. + // Similarly, if the class itself or its destructor is marked 'final', + // the class can't be derived from and we can therefore devirtualize the + // member function call. if (BestDynamicDecl->hasAttr()) return DevirtualizedMethod; + if (const auto *dtor = BestDynamicDecl->getDestructor()) { + if (dtor->hasAttr()) + return DevirtualizedMethod; + } if (const auto *DRE = dyn_cast(Base)) { if (const auto *VD = dyn_cast(DRE->getDecl())) @@ -2532,7 +2567,7 @@ bool CXXConstructorDecl::isConvertingConstructor(bool AllowExplicit) const { return false; return (getNumParams() == 0 && - getType()->getAs()->isVariadic()) || + getType()->castAs()->isVariadic()) || (getNumParams() == 1) || (getNumParams() > 1 && (getParamDecl(1)->hasDefaultArg() || @@ -2565,20 +2600,19 @@ CXXDestructorDecl * CXXDestructorDecl::CreateDeserialized(ASTContext &C, unsigned ID) { return new (C, ID) CXXDestructorDecl(C, nullptr, SourceLocation(), DeclarationNameInfo(), - QualType(), nullptr, false, false); + QualType(), nullptr, false, false, CSK_unspecified); } -CXXDestructorDecl * -CXXDestructorDecl::Create(ASTContext &C, CXXRecordDecl *RD, - SourceLocation StartLoc, - const DeclarationNameInfo &NameInfo, - QualType T, TypeSourceInfo *TInfo, - bool isInline, bool isImplicitlyDeclared) { +CXXDestructorDecl *CXXDestructorDecl::Create( + ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc, + const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo, + bool isInline, bool isImplicitlyDeclared, ConstexprSpecKind ConstexprKind) { assert(NameInfo.getName().getNameKind() == DeclarationName::CXXDestructorName && "Name must refer to a destructor"); - return new (C, RD) CXXDestructorDecl(C, RD, StartLoc, NameInfo, T, TInfo, - isInline, isImplicitlyDeclared); + return new (C, RD) + CXXDestructorDecl(C, RD, StartLoc, NameInfo, T, TInfo, isInline, + isImplicitlyDeclared, ConstexprKind); } void CXXDestructorDecl::setOperatorDelete(FunctionDecl *OD, Expr *ThisArg) { diff --git a/lib/AST/DeclPrinter.cpp b/lib/AST/DeclPrinter.cpp index f5c69944034..608b0b44072 100644 --- a/lib/AST/DeclPrinter.cpp +++ b/lib/AST/DeclPrinter.cpp @@ -1001,12 +1001,19 @@ void DeclPrinter::VisitCXXRecordDecl(CXXRecordDecl *D) { void DeclPrinter::VisitLinkageSpecDecl(LinkageSpecDecl *D) { const char *l; - if (D->getLanguage() == LinkageSpecDecl::lang_c) + switch (D->getLanguage()) { + case LinkageSpecDecl::lang_c: l = "C"; - else { - assert(D->getLanguage() == LinkageSpecDecl::lang_cxx && - "unknown language in linkage specification"); + break; + case LinkageSpecDecl::lang_cxx_14: + l = "C++14"; + break; + case LinkageSpecDecl::lang_cxx_11: + l = "C++11"; + break; + case LinkageSpecDecl::lang_cxx: l = "C++"; + break; } Out << "extern \"" << l << "\" "; diff --git a/lib/AST/DeclTemplate.cpp b/lib/AST/DeclTemplate.cpp index 40c39c845db..7e013c6c54d 100644 --- a/lib/AST/DeclTemplate.cpp +++ b/lib/AST/DeclTemplate.cpp @@ -70,6 +70,8 @@ TemplateParameterList::TemplateParameterList(SourceLocation TemplateLoc, } if (RequiresClause) { *getTrailingObjects() = RequiresClause; + if (RequiresClause->containsUnexpandedParameterPack()) + ContainsUnexpandedParameterPack = true; } } @@ -136,6 +138,18 @@ static void AdoptTemplateParameterList(TemplateParameterList *Params, } } +void TemplateParameterList:: +getAssociatedConstraints(llvm::SmallVectorImpl &AC) const { + // TODO: Concepts: Collect immediately-introduced constraints. + if (HasRequiresClause) + AC.push_back(getRequiresClause()); +} + +bool TemplateParameterList::hasAssociatedConstraints() const { + // TODO: Concepts: Regard immediately-introduced constraints. + return HasRequiresClause; +} + namespace clang { void *allocateDefaultArgStorageChain(const ASTContext &C) { @@ -144,6 +158,28 @@ void *allocateDefaultArgStorageChain(const ASTContext &C) { } // namespace clang +//===----------------------------------------------------------------------===// +// TemplateDecl Implementation +//===----------------------------------------------------------------------===// + +TemplateDecl::TemplateDecl(Kind DK, DeclContext *DC, SourceLocation L, + DeclarationName Name, TemplateParameterList *Params, + NamedDecl *Decl) + : NamedDecl(DK, DC, L, Name), TemplatedDecl(Decl), TemplateParams(Params) {} + +void TemplateDecl::anchor() {} + +void TemplateDecl:: +getAssociatedConstraints(llvm::SmallVectorImpl &AC) const { + // TODO: Concepts: Append function trailing requires clause. + TemplateParams->getAssociatedConstraints(AC); +} + +bool TemplateDecl::hasAssociatedConstraints() const { + // TODO: Concepts: Regard function trailing requires clause. + return TemplateParams->hasAssociatedConstraints(); +} + //===----------------------------------------------------------------------===// // RedeclarableTemplateDecl Implementation //===----------------------------------------------------------------------===// @@ -344,19 +380,10 @@ ClassTemplateDecl *ClassTemplateDecl::Create(ASTContext &C, SourceLocation L, DeclarationName Name, TemplateParameterList *Params, - NamedDecl *Decl, - Expr *AssociatedConstraints) { + NamedDecl *Decl) { AdoptTemplateParameterList(Params, cast(Decl)); - if (!AssociatedConstraints) { - return new (C, DC) ClassTemplateDecl(C, DC, L, Name, Params, Decl); - } - - auto *const CTDI = new (C) ConstrainedTemplateDeclInfo; - auto *const New = - new (C, DC) ClassTemplateDecl(CTDI, C, DC, L, Name, Params, Decl); - New->setAssociatedConstraints(AssociatedConstraints); - return New; + return new (C, DC) ClassTemplateDecl(C, DC, L, Name, Params, Decl); } ClassTemplateDecl *ClassTemplateDecl::CreateDeserialized(ASTContext &C, @@ -510,20 +537,24 @@ SourceRange TemplateTypeParmDecl::getSourceRange() const { if (hasDefaultArgument() && !defaultArgumentWasInherited()) return SourceRange(getBeginLoc(), getDefaultArgumentInfo()->getTypeLoc().getEndLoc()); - else - return TypeDecl::getSourceRange(); + // TypeDecl::getSourceRange returns a range containing name location, which is + // wrong for unnamed template parameters. e.g: + // it will return <[[typename>]] instead of <[[typename]]> + else if (getDeclName().isEmpty()) + return SourceRange(getBeginLoc()); + return TypeDecl::getSourceRange(); } unsigned TemplateTypeParmDecl::getDepth() const { - return getTypeForDecl()->getAs()->getDepth(); + return getTypeForDecl()->castAs()->getDepth(); } unsigned TemplateTypeParmDecl::getIndex() const { - return getTypeForDecl()->getAs()->getIndex(); + return getTypeForDecl()->castAs()->getIndex(); } bool TemplateTypeParmDecl::isParameterPack() const { - return getTypeForDecl()->getAs()->isParameterPack(); + return getTypeForDecl()->castAs()->isParameterPack(); } //===----------------------------------------------------------------------===// @@ -703,12 +734,6 @@ FunctionTemplateSpecializationInfo *FunctionTemplateSpecializationInfo::Create( FD, Template, TSK, TemplateArgs, ArgsAsWritten, POI, MSInfo); } -//===----------------------------------------------------------------------===// -// TemplateDecl Implementation -//===----------------------------------------------------------------------===// - -void TemplateDecl::anchor() {} - //===----------------------------------------------------------------------===// // ClassTemplateSpecializationDecl Implementation //===----------------------------------------------------------------------===// diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp index 6ef77b8aee6..3438c3aadc6 100644 --- a/lib/AST/Expr.cpp +++ b/lib/AST/Expr.cpp @@ -85,8 +85,8 @@ const Expr *Expr::skipRValueSubobjectAdjustments( CE->getCastKind() == CK_UncheckedDerivedToBase) && E->getType()->isRecordType()) { E = CE->getSubExpr(); - CXXRecordDecl *Derived - = cast(E->getType()->getAs()->getDecl()); + auto *Derived = + cast(E->getType()->castAs()->getDecl()); Adjustments.push_back(SubobjectAdjustment(CE, Derived)); continue; } @@ -185,6 +185,12 @@ bool Expr::isKnownToHaveBooleanValue() const { return CO->getTrueExpr()->isKnownToHaveBooleanValue() && CO->getFalseExpr()->isKnownToHaveBooleanValue(); + if (isa(E)) + return true; + + if (const auto *OVE = dyn_cast(E)) + return OVE->getSourceExpr()->isKnownToHaveBooleanValue(); + return false; } @@ -2563,13 +2569,31 @@ bool Expr::isUnusedResultAWarning(const Expr *&WarnE, SourceLocation &Loc, case CXXTemporaryObjectExprClass: case CXXConstructExprClass: { if (const CXXRecordDecl *Type = getType()->getAsCXXRecordDecl()) { - if (Type->hasAttr()) { + const auto *WarnURAttr = Type->getAttr(); + if (Type->hasAttr() || + (WarnURAttr && WarnURAttr->IsCXX11NoDiscard())) { WarnE = this; Loc = getBeginLoc(); R1 = getSourceRange(); return true; } } + + const auto *CE = cast(this); + if (const CXXConstructorDecl *Ctor = CE->getConstructor()) { + const auto *WarnURAttr = Ctor->getAttr(); + if (WarnURAttr && WarnURAttr->IsCXX11NoDiscard()) { + WarnE = this; + Loc = getBeginLoc(); + R1 = getSourceRange(); + + if (unsigned NumArgs = CE->getNumArgs()) + R2 = SourceRange(CE->getArg(0)->getBeginLoc(), + CE->getArg(NumArgs - 1)->getEndLoc()); + return true; + } + } + return false; } @@ -3181,7 +3205,7 @@ bool Expr::isConstantInitializer(ASTContext &Ctx, bool IsForRef, if (ILE->getType()->isRecordType()) { unsigned ElementNo = 0; - RecordDecl *RD = ILE->getType()->getAs()->getDecl(); + RecordDecl *RD = ILE->getType()->castAs()->getDecl(); for (const auto *Field : RD->fields()) { // If this is a union, skip all the fields that aren't being initialized. if (RD->isUnion() && ILE->getInitializedFieldInUnion() != Field) @@ -3379,6 +3403,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx, case CXXUuidofExprClass: case OpaqueValueExprClass: case SourceLocExprClass: + case ConceptSpecializationExprClass: // These never have a side-effect. return false; @@ -3448,6 +3473,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx, case ArrayInitLoopExprClass: case ParenListExprClass: case CXXPseudoDestructorExprClass: + case CXXRewrittenBinaryOperatorClass: case CXXStdInitializerListExprClass: case SubstNonTypeTemplateParmExprClass: case MaterializeTemporaryExprClass: @@ -3897,6 +3923,112 @@ bool Expr::refersToGlobalRegisterVar() const { return false; } +bool Expr::isSameComparisonOperand(const Expr* E1, const Expr* E2) { + E1 = E1->IgnoreParens(); + E2 = E2->IgnoreParens(); + + if (E1->getStmtClass() != E2->getStmtClass()) + return false; + + switch (E1->getStmtClass()) { + default: + return false; + case CXXThisExprClass: + return true; + case DeclRefExprClass: { + // DeclRefExpr without an ImplicitCastExpr can happen for integral + // template parameters. + const auto *DRE1 = cast(E1); + const auto *DRE2 = cast(E2); + return DRE1->isRValue() && DRE2->isRValue() && + DRE1->getDecl() == DRE2->getDecl(); + } + case ImplicitCastExprClass: { + // Peel off implicit casts. + while (true) { + const auto *ICE1 = dyn_cast(E1); + const auto *ICE2 = dyn_cast(E2); + if (!ICE1 || !ICE2) + return false; + if (ICE1->getCastKind() != ICE2->getCastKind()) + return false; + E1 = ICE1->getSubExpr()->IgnoreParens(); + E2 = ICE2->getSubExpr()->IgnoreParens(); + // The final cast must be one of these types. + if (ICE1->getCastKind() == CK_LValueToRValue || + ICE1->getCastKind() == CK_ArrayToPointerDecay || + ICE1->getCastKind() == CK_FunctionToPointerDecay) { + break; + } + } + + const auto *DRE1 = dyn_cast(E1); + const auto *DRE2 = dyn_cast(E2); + if (DRE1 && DRE2) + return declaresSameEntity(DRE1->getDecl(), DRE2->getDecl()); + + const auto *Ivar1 = dyn_cast(E1); + const auto *Ivar2 = dyn_cast(E2); + if (Ivar1 && Ivar2) { + return Ivar1->isFreeIvar() && Ivar2->isFreeIvar() && + declaresSameEntity(Ivar1->getDecl(), Ivar2->getDecl()); + } + + const auto *Array1 = dyn_cast(E1); + const auto *Array2 = dyn_cast(E2); + if (Array1 && Array2) { + if (!isSameComparisonOperand(Array1->getBase(), Array2->getBase())) + return false; + + auto Idx1 = Array1->getIdx(); + auto Idx2 = Array2->getIdx(); + const auto Integer1 = dyn_cast(Idx1); + const auto Integer2 = dyn_cast(Idx2); + if (Integer1 && Integer2) { + if (!llvm::APInt::isSameValue(Integer1->getValue(), + Integer2->getValue())) + return false; + } else { + if (!isSameComparisonOperand(Idx1, Idx2)) + return false; + } + + return true; + } + + // Walk the MemberExpr chain. + while (isa(E1) && isa(E2)) { + const auto *ME1 = cast(E1); + const auto *ME2 = cast(E2); + if (!declaresSameEntity(ME1->getMemberDecl(), ME2->getMemberDecl())) + return false; + if (const auto *D = dyn_cast(ME1->getMemberDecl())) + if (D->isStaticDataMember()) + return true; + E1 = ME1->getBase()->IgnoreParenImpCasts(); + E2 = ME2->getBase()->IgnoreParenImpCasts(); + } + + if (isa(E1) && isa(E2)) + return true; + + // A static member variable can end the MemberExpr chain with either + // a MemberExpr or a DeclRefExpr. + auto getAnyDecl = [](const Expr *E) -> const ValueDecl * { + if (const auto *DRE = dyn_cast(E)) + return DRE->getDecl(); + if (const auto *ME = dyn_cast(E)) + return ME->getMemberDecl(); + return nullptr; + }; + + const ValueDecl *VD1 = getAnyDecl(E1); + const ValueDecl *VD2 = getAnyDecl(E2); + return declaresSameEntity(VD1, VD2); + } + } +} + /// isArrow - Return true if the base expression is a pointer to vector, /// return false if the base expression is a vector. bool ExtVectorElementExpr::isArrow() const { diff --git a/lib/AST/ExprCXX.cpp b/lib/AST/ExprCXX.cpp index b30f785ba8f..904928bdf28 100644 --- a/lib/AST/ExprCXX.cpp +++ b/lib/AST/ExprCXX.cpp @@ -58,6 +58,76 @@ bool CXXOperatorCallExpr::isInfixBinaryOp() const { } } +CXXRewrittenBinaryOperator::DecomposedForm +CXXRewrittenBinaryOperator::getDecomposedForm() const { + DecomposedForm Result = {}; + const Expr *E = getSemanticForm()->IgnoreImplicit(); + + // Remove an outer '!' if it exists (only happens for a '!=' rewrite). + bool SkippedNot = false; + if (auto *NotEq = dyn_cast(E)) { + assert(NotEq->getOpcode() == UO_LNot); + E = NotEq->getSubExpr()->IgnoreImplicit(); + SkippedNot = true; + } + + // Decompose the outer binary operator. + if (auto *BO = dyn_cast(E)) { + assert(!SkippedNot || BO->getOpcode() == BO_EQ); + Result.Opcode = SkippedNot ? BO_NE : BO->getOpcode(); + Result.LHS = BO->getLHS(); + Result.RHS = BO->getRHS(); + Result.InnerBinOp = BO; + } else if (auto *BO = dyn_cast(E)) { + assert(!SkippedNot || BO->getOperator() == OO_EqualEqual); + assert(BO->isInfixBinaryOp()); + switch (BO->getOperator()) { + case OO_Less: Result.Opcode = BO_LT; break; + case OO_LessEqual: Result.Opcode = BO_LE; break; + case OO_Greater: Result.Opcode = BO_GT; break; + case OO_GreaterEqual: Result.Opcode = BO_GE; break; + case OO_Spaceship: Result.Opcode = BO_Cmp; break; + case OO_EqualEqual: Result.Opcode = SkippedNot ? BO_NE : BO_EQ; break; + default: llvm_unreachable("unexpected binop in rewritten operator expr"); + } + Result.LHS = BO->getArg(0); + Result.RHS = BO->getArg(1); + Result.InnerBinOp = BO; + } else { + llvm_unreachable("unexpected rewritten operator form"); + } + + // Put the operands in the right order for == and !=, and canonicalize the + // <=> subexpression onto the LHS for all other forms. + if (isReversed()) + std::swap(Result.LHS, Result.RHS); + + // If this isn't a spaceship rewrite, we're done. + if (Result.Opcode == BO_EQ || Result.Opcode == BO_NE) + return Result; + + // Otherwise, we expect a <=> to now be on the LHS. + E = Result.LHS->IgnoreImplicit(); + if (auto *BO = dyn_cast(E)) { + assert(BO->getOpcode() == BO_Cmp); + Result.LHS = BO->getLHS(); + Result.RHS = BO->getRHS(); + Result.InnerBinOp = BO; + } else if (auto *BO = dyn_cast(E)) { + assert(BO->getOperator() == OO_Spaceship); + Result.LHS = BO->getArg(0); + Result.RHS = BO->getArg(1); + Result.InnerBinOp = BO; + } else { + llvm_unreachable("unexpected rewritten operator form"); + } + + // Put the comparison operands in the right order. + if (isReversed()) + std::swap(Result.LHS, Result.RHS); + return Result; +} + bool CXXTypeidExpr::isPotentiallyEvaluated() const { if (isTypeOperand()) return false; @@ -124,6 +194,8 @@ CXXNewExpr::CXXNewExpr(bool IsGlobalNew, FunctionDecl *OperatorNew, if (ArraySize) { if (Expr *SizeExpr = *ArraySize) { + if (SizeExpr->isValueDependent()) + ExprBits.ValueDependent = true; if (SizeExpr->isInstantiationDependent()) ExprBits.InstantiationDependent = true; if (SizeExpr->containsUnexpandedParameterPack()) @@ -134,6 +206,8 @@ CXXNewExpr::CXXNewExpr(bool IsGlobalNew, FunctionDecl *OperatorNew, } if (Initializer) { + if (Initializer->isValueDependent()) + ExprBits.ValueDependent = true; if (Initializer->isInstantiationDependent()) ExprBits.InstantiationDependent = true; if (Initializer->containsUnexpandedParameterPack()) @@ -143,6 +217,8 @@ CXXNewExpr::CXXNewExpr(bool IsGlobalNew, FunctionDecl *OperatorNew, } for (unsigned I = 0; I != PlacementArgs.size(); ++I) { + if (PlacementArgs[I]->isValueDependent()) + ExprBits.ValueDependent = true; if (PlacementArgs[I]->isInstantiationDependent()) ExprBits.InstantiationDependent = true; if (PlacementArgs[I]->containsUnexpandedParameterPack()) @@ -245,7 +321,7 @@ QualType CXXDeleteExpr::getDestroyedType() const { if (ArgType->isDependentType() && !ArgType->isPointerType()) return QualType(); - return ArgType->getAs()->getPointeeType(); + return ArgType->castAs()->getPointeeType(); } // CXXPseudoDestructorExpr @@ -651,6 +727,13 @@ Expr *CXXMemberCallExpr::getImplicitObjectArgument() const { return nullptr; } +QualType CXXMemberCallExpr::getObjectType() const { + QualType Ty = getImplicitObjectArgument()->getType(); + if (Ty->isPointerType()) + Ty = Ty->getPointeeType(); + return Ty; +} + CXXMethodDecl *CXXMemberCallExpr::getMethodDecl() const { if (const auto *MemExpr = dyn_cast(getCallee()->IgnoreParens())) return cast(MemExpr->getMemberDecl()); @@ -1205,6 +1288,11 @@ CXXMethodDecl *LambdaExpr::getCallOperator() const { return Record->getLambdaCallOperator(); } +FunctionTemplateDecl *LambdaExpr::getDependentCallOperator() const { + CXXRecordDecl *Record = getLambdaClass(); + return Record->getDependentLambdaCallOperator(); +} + TemplateParameterList *LambdaExpr::getTemplateParameterList() const { CXXRecordDecl *Record = getLambdaClass(); return Record->getGenericLambdaTemplateParameterList(); @@ -1494,11 +1582,8 @@ CXXRecordDecl *UnresolvedMemberExpr::getNamingClass() { // Otherwise the naming class must have been the base class. else { QualType BaseType = getBaseType().getNonReferenceType(); - if (isArrow()) { - const auto *PT = BaseType->getAs(); - assert(PT && "base of arrow member access is not pointer"); - BaseType = PT->getPointeeType(); - } + if (isArrow()) + BaseType = BaseType->castAs()->getPointeeType(); Record = BaseType->getAsCXXRecordDecl(); assert(Record && "base of member expression does not name record"); @@ -1665,3 +1750,82 @@ CUDAKernelCallExpr *CUDAKernelCallExpr::CreateEmpty(const ASTContext &Ctx, alignof(CUDAKernelCallExpr)); return new (Mem) CUDAKernelCallExpr(NumArgs, Empty); } + +ConceptSpecializationExpr::ConceptSpecializationExpr(ASTContext &C, + NestedNameSpecifierLoc NNS, SourceLocation TemplateKWLoc, + SourceLocation ConceptNameLoc, NamedDecl *FoundDecl, + ConceptDecl *NamedConcept, const ASTTemplateArgumentListInfo *ArgsAsWritten, + ArrayRef ConvertedArgs, Optional IsSatisfied) + : Expr(ConceptSpecializationExprClass, C.BoolTy, VK_RValue, OK_Ordinary, + /*TypeDependent=*/false, + // All the flags below are set in setTemplateArguments. + /*ValueDependent=*/!IsSatisfied.hasValue(), + /*InstantiationDependent=*/false, + /*ContainsUnexpandedParameterPacks=*/false), + NestedNameSpec(NNS), TemplateKWLoc(TemplateKWLoc), + ConceptNameLoc(ConceptNameLoc), FoundDecl(FoundDecl), + NamedConcept(NamedConcept, IsSatisfied ? *IsSatisfied : true), + NumTemplateArgs(ConvertedArgs.size()) { + + setTemplateArguments(ArgsAsWritten, ConvertedArgs); +} + +ConceptSpecializationExpr::ConceptSpecializationExpr(EmptyShell Empty, + unsigned NumTemplateArgs) + : Expr(ConceptSpecializationExprClass, Empty), + NumTemplateArgs(NumTemplateArgs) { } + +void ConceptSpecializationExpr::setTemplateArguments( + const ASTTemplateArgumentListInfo *ArgsAsWritten, + ArrayRef Converted) { + assert(Converted.size() == NumTemplateArgs); + assert(!this->ArgsAsWritten && "setTemplateArguments can only be used once"); + this->ArgsAsWritten = ArgsAsWritten; + std::uninitialized_copy(Converted.begin(), Converted.end(), + getTrailingObjects()); + bool IsInstantiationDependent = false; + bool ContainsUnexpandedParameterPack = false; + for (const TemplateArgumentLoc& LocInfo : ArgsAsWritten->arguments()) { + if (LocInfo.getArgument().isInstantiationDependent()) + IsInstantiationDependent = true; + if (LocInfo.getArgument().containsUnexpandedParameterPack()) + ContainsUnexpandedParameterPack = true; + if (ContainsUnexpandedParameterPack && IsInstantiationDependent) + break; + } + + // Currently guaranteed by the fact concepts can only be at namespace-scope. + assert(!NestedNameSpec || + (!NestedNameSpec.getNestedNameSpecifier()->isInstantiationDependent() && + !NestedNameSpec.getNestedNameSpecifier() + ->containsUnexpandedParameterPack())); + setInstantiationDependent(IsInstantiationDependent); + setContainsUnexpandedParameterPack(ContainsUnexpandedParameterPack); + assert((!isValueDependent() || isInstantiationDependent()) && + "should not be value-dependent"); +} + +ConceptSpecializationExpr * +ConceptSpecializationExpr::Create(ASTContext &C, NestedNameSpecifierLoc NNS, + SourceLocation TemplateKWLoc, + SourceLocation ConceptNameLoc, + NamedDecl *FoundDecl, + ConceptDecl *NamedConcept, + const ASTTemplateArgumentListInfo *ArgsAsWritten, + ArrayRef ConvertedArgs, + Optional IsSatisfied) { + void *Buffer = C.Allocate(totalSizeToAlloc( + ConvertedArgs.size())); + return new (Buffer) ConceptSpecializationExpr(C, NNS, TemplateKWLoc, + ConceptNameLoc, FoundDecl, + NamedConcept, ArgsAsWritten, + ConvertedArgs, IsSatisfied); +} + +ConceptSpecializationExpr * +ConceptSpecializationExpr::Create(ASTContext &C, EmptyShell Empty, + unsigned NumTemplateArgs) { + void *Buffer = C.Allocate(totalSizeToAlloc( + NumTemplateArgs)); + return new (Buffer) ConceptSpecializationExpr(Empty, NumTemplateArgs); +} diff --git a/lib/AST/ExprClassification.cpp b/lib/AST/ExprClassification.cpp index c61ee703aca..9dbf6fe9e0f 100644 --- a/lib/AST/ExprClassification.cpp +++ b/lib/AST/ExprClassification.cpp @@ -192,6 +192,7 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) { case Expr::NoInitExprClass: case Expr::DesignatedInitUpdateExprClass: case Expr::SourceLocExprClass: + case Expr::ConceptSpecializationExprClass: return Cl::CL_PRValue; case Expr::ConstantExprClass: @@ -306,6 +307,10 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) { case Expr::CUDAKernelCallExprClass: return ClassifyUnnamed(Ctx, cast(E)->getCallReturnType(Ctx)); + case Expr::CXXRewrittenBinaryOperatorClass: + return ClassifyInternal( + Ctx, cast(E)->getSemanticForm()); + // __builtin_choose_expr is equivalent to the chosen expression. case Expr::ChooseExprClass: return ClassifyInternal(Ctx, cast(E)->getChosenSubExpr()); diff --git a/lib/AST/ExprConstant.cpp b/lib/AST/ExprConstant.cpp index f01b42e7ff7..42c746e6028 100644 --- a/lib/AST/ExprConstant.cpp +++ b/lib/AST/ExprConstant.cpp @@ -32,15 +32,21 @@ // //===----------------------------------------------------------------------===// +#include +#include +#include "Interp/Context.h" +#include "Interp/Frame.h" +#include "Interp/State.h" #include "clang/AST/APValue.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTDiagnostic.h" #include "clang/AST/ASTLambda.h" +#include "clang/AST/CXXInheritance.h" #include "clang/AST/CharUnits.h" #include "clang/AST/CurrentSourceLocExprScope.h" -#include "clang/AST/CXXInheritance.h" #include "clang/AST/Expr.h" #include "clang/AST/OSLog.h" +#include "clang/AST/OptionalDiagnostic.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtVisitor.h" #include "clang/AST/TypeLoc.h" @@ -51,8 +57,6 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/raw_ostream.h" -#include -#include #define DEBUG_TYPE "exprconstant" @@ -62,12 +66,10 @@ using llvm::APSInt; using llvm::APFloat; using llvm::Optional; -static bool IsGlobalLValue(APValue::LValueBase B); - namespace { struct LValue; - struct CallStackFrame; - struct EvalInfo; + class CallStackFrame; + class EvalInfo; using SourceLocExprScopeGuard = CurrentSourceLocExprScope::SourceLocExprScopeGuard; @@ -94,6 +96,9 @@ namespace { if (B.is()) return B.getTypeInfoType(); + if (B.is()) + return B.getDynamicAllocType(); + const Expr *Base = B.get(); // For a materialized temporary, the type of the temporary we materialized @@ -130,6 +135,14 @@ namespace { return E.getAsBaseOrMember().getInt(); } + /// Given an expression, determine the type used to store the result of + /// evaluating that expression. + static QualType getStorageType(const ASTContext &Ctx, const Expr *E) { + if (E->isRValue()) + return E->getType(); + return Ctx.getLValueReferenceType(E->getType()); + } + /// Given a CallExpr, try to get the alloc_size attribute. May return null. static const AllocSizeAttr *getAllocSizeAttr(const CallExpr *CE) { const FunctionDecl *Callee = CE->getDirectCallee(); @@ -222,12 +235,6 @@ namespace { return MostDerivedLength; } - // The order of this enum is important for diagnostics. - enum CheckSubobjectKind { - CSK_Base, CSK_Derived, CSK_Field, CSK_ArrayToPointer, CSK_ArrayIndex, - CSK_Real, CSK_Imag - }; - /// A path from a glvalue to a subobject of that glvalue. struct SubobjectDesignator { /// True if the subobject was named in a manner not supported by C++11. Such @@ -480,7 +487,8 @@ namespace { }; /// A stack frame in the constexpr call stack. - struct CallStackFrame { + class CallStackFrame : public interp::Frame { + public: EvalInfo &Info; /// Parent - The caller of this stack frame. @@ -573,7 +581,26 @@ namespace { return 0; } - APValue &createTemporary(const void *Key, bool IsLifetimeExtended); + /// Allocate storage for an object of type T in this stack frame. + /// Populates LV with a handle to the created object. Key identifies + /// the temporary within the stack frame, and must not be reused without + /// bumping the temporary version number. + template + APValue &createTemporary(const KeyT *Key, QualType T, + bool IsLifetimeExtended, LValue &LV); + + void describe(llvm::raw_ostream &OS) override; + + Frame *getCaller() const override { return Caller; } + SourceLocation getCallLocation() const override { return CallLoc; } + const FunctionDecl *getCallee() const override { return Callee; } + + bool isStdFunction() const { + for (const DeclContext *DC = Callee; DC; DC = DC->getParent()) + if (DC->isStdNamespace()) + return true; + return false; + } }; /// Temporarily override 'this'. @@ -591,71 +618,42 @@ namespace { CallStackFrame &Frame; const LValue *OldThis; }; +} - /// A partial diagnostic which we might know in advance that we are not going - /// to emit. - class OptionalDiagnostic { - PartialDiagnostic *Diag; - - public: - explicit OptionalDiagnostic(PartialDiagnostic *Diag = nullptr) - : Diag(Diag) {} - - template - OptionalDiagnostic &operator<<(const T &v) { - if (Diag) - *Diag << v; - return *this; - } - - OptionalDiagnostic &operator<<(const APSInt &I) { - if (Diag) { - SmallVector Buffer; - I.toString(Buffer); - *Diag << StringRef(Buffer.data(), Buffer.size()); - } - return *this; - } - - OptionalDiagnostic &operator<<(const APFloat &F) { - if (Diag) { - // FIXME: Force the precision of the source value down so we don't - // print digits which are usually useless (we don't really care here if - // we truncate a digit by accident in edge cases). Ideally, - // APFloat::toString would automatically print the shortest - // representation which rounds to the correct value, but it's a bit - // tricky to implement. - unsigned precision = - llvm::APFloat::semanticsPrecision(F.getSemantics()); - precision = (precision * 59 + 195) / 196; - SmallVector Buffer; - F.toString(Buffer, precision); - *Diag << StringRef(Buffer.data(), Buffer.size()); - } - return *this; - } - - OptionalDiagnostic &operator<<(const APFixedPoint &FX) { - if (Diag) { - SmallVector Buffer; - FX.toString(Buffer); - *Diag << StringRef(Buffer.data(), Buffer.size()); - } - return *this; - } - }; +static bool HandleDestruction(EvalInfo &Info, const Expr *E, + const LValue &This, QualType ThisType); +static bool HandleDestruction(EvalInfo &Info, SourceLocation Loc, + APValue::LValueBase LVBase, APValue &Value, + QualType T); +namespace { /// A cleanup, and a flag indicating whether it is lifetime-extended. class Cleanup { llvm::PointerIntPair Value; + APValue::LValueBase Base; + QualType T; public: - Cleanup(APValue *Val, bool IsLifetimeExtended) - : Value(Val, IsLifetimeExtended) {} + Cleanup(APValue *Val, APValue::LValueBase Base, QualType T, + bool IsLifetimeExtended) + : Value(Val, IsLifetimeExtended), Base(Base), T(T) {} bool isLifetimeExtended() const { return Value.getInt(); } - void endLifetime() { + bool endLifetime(EvalInfo &Info, bool RunDestructors) { + if (RunDestructors) { + SourceLocation Loc; + if (const ValueDecl *VD = Base.dyn_cast()) + Loc = VD->getLocation(); + else if (const Expr *E = Base.dyn_cast()) + Loc = E->getExprLoc(); + return HandleDestruction(Info, Loc, Base, *Value.getPointer(), T); + } *Value.getPointer() = APValue(); + return true; + } + + bool hasSideEffect() { + return T.isDestructedType(); } }; @@ -671,7 +669,13 @@ namespace { return llvm::hash_combine(Obj.Base, Obj.Path); } }; - enum class ConstructionPhase { None, Bases, AfterBases }; + enum class ConstructionPhase { + None, + Bases, + AfterBases, + Destroying, + DestroyingBases + }; } namespace llvm { @@ -693,6 +697,37 @@ template<> struct DenseMapInfo { } namespace { + /// A dynamically-allocated heap object. + struct DynAlloc { + /// The value of this heap-allocated object. + APValue Value; + /// The allocating expression; used for diagnostics. Either a CXXNewExpr + /// or a CallExpr (the latter is for direct calls to operator new inside + /// std::allocator::allocate). + const Expr *AllocExpr = nullptr; + + enum Kind { + New, + ArrayNew, + StdAllocator + }; + + /// Get the kind of the allocation. This must match between allocation + /// and deallocation. + Kind getKind() const { + if (auto *NE = dyn_cast(AllocExpr)) + return NE->isArray() ? ArrayNew : New; + assert(isa(AllocExpr)); + return StdAllocator; + } + }; + + struct DynAllocOrder { + bool operator()(DynamicAllocLValue L, DynamicAllocLValue R) const { + return L.getIndex() < R.getIndex(); + } + }; + /// EvalInfo - This is a private struct used by the evaluator to capture /// information about a subexpression as it is folded. It retains information /// about the AST context, but also maintains information about the folded @@ -707,7 +742,8 @@ namespace { /// rules. For example, the RHS of (0 && foo()) is not evaluated. We can /// evaluate the expression regardless of what the RHS is, but C only allows /// certain things in certain situations. - struct EvalInfo { + class EvalInfo : public interp::State { + public: ASTContext &Ctx; /// EvalStatus - Contains information about the evaluation. @@ -727,6 +763,13 @@ namespace { /// we will evaluate. unsigned StepsLeft; + /// Force the use of the experimental new constant interpreter, bailing out + /// with an error if a feature is not supported. + bool ForceNewConstInterp; + + /// Enable the experimental new constant interpreter. + bool EnableNewConstInterp; + /// BottomFrame - The frame in which evaluation started. This must be /// initialized after CurrentCall and CallStackDepth. CallStackFrame BottomFrame; @@ -739,6 +782,15 @@ namespace { /// evaluated, if any. APValue::LValueBase EvaluatingDecl; + enum class EvaluatingDeclKind { + None, + /// We're evaluating the construction of EvaluatingDecl. + Ctor, + /// We're evaluating the destruction of EvaluatingDecl. + Dtor, + }; + EvaluatingDeclKind IsEvaluatingDecl = EvaluatingDeclKind::None; + /// EvaluatingDeclValue - This is the value being constructed for the /// declaration whose initializer is being evaluated, if any. APValue *EvaluatingDeclValue; @@ -747,6 +799,14 @@ namespace { llvm::DenseMap ObjectsUnderConstruction; + /// Current heap allocations, along with the location where each was + /// allocated. We use std::map here because we need stable addresses + /// for the stored APValues. + std::map HeapAllocs; + + /// The number of heap allocations performed so far in this evaluation. + unsigned NumHeapAllocs = 0; + struct EvaluatingConstructorRAII { EvalInfo &EI; ObjectUnderConstruction Object; @@ -768,9 +828,29 @@ namespace { } }; + struct EvaluatingDestructorRAII { + EvalInfo &EI; + ObjectUnderConstruction Object; + bool DidInsert; + EvaluatingDestructorRAII(EvalInfo &EI, ObjectUnderConstruction Object) + : EI(EI), Object(Object) { + DidInsert = EI.ObjectsUnderConstruction + .insert({Object, ConstructionPhase::Destroying}) + .second; + } + void startedDestroyingBases() { + EI.ObjectsUnderConstruction[Object] = + ConstructionPhase::DestroyingBases; + } + ~EvaluatingDestructorRAII() { + if (DidInsert) + EI.ObjectsUnderConstruction.erase(Object); + } + }; + ConstructionPhase - isEvaluatingConstructor(APValue::LValueBase Base, - ArrayRef Path) { + isEvaluatingCtorDtor(APValue::LValueBase Base, + ArrayRef Path) { return ObjectsUnderConstruction.lookup({Base, Path}); } @@ -794,30 +874,21 @@ namespace { /// constant value. bool InConstantContext; + /// Whether we're checking that an expression is a potential constant + /// expression. If so, do not fail on constructs that could become constant + /// later on (such as a use of an undefined global). + bool CheckingPotentialConstantExpression = false; + + /// Whether we're checking for an expression that has undefined behavior. + /// If so, we will produce warnings if we encounter an operation that is + /// always undefined. + bool CheckingForUndefinedBehavior = false; + enum EvaluationMode { /// Evaluate as a constant expression. Stop if we find that the expression /// is not a constant expression. EM_ConstantExpression, - /// Evaluate as a potential constant expression. Keep going if we hit a - /// construct that we can't evaluate yet (because we don't yet know the - /// value of something) but stop if we hit something that could never be - /// a constant expression. - EM_PotentialConstantExpression, - - /// Fold the expression to a constant. Stop if we hit a side-effect that - /// we can't model. - EM_ConstantFold, - - /// Evaluate the expression looking for integer overflow and similar - /// issues. Don't worry about side-effects, and try to visit all - /// subexpressions. - EM_EvaluateForOverflow, - - /// Evaluate in any way we know how. Don't worry about side-effects that - /// can't be modeled. - EM_IgnoreSideEffects, - /// Evaluate as a constant expression. Stop if we find that the expression /// is not a constant expression. Some expressions can be retried in the /// optimizer if we don't constant fold them here, but in an unevaluated @@ -825,44 +896,51 @@ namespace { /// gets a chance to look at it. EM_ConstantExpressionUnevaluated, - /// Evaluate as a potential constant expression. Keep going if we hit a - /// construct that we can't evaluate yet (because we don't yet know the - /// value of something) but stop if we hit something that could never be - /// a constant expression. Some expressions can be retried in the - /// optimizer if we don't constant fold them here, but in an unevaluated - /// context we try to fold them immediately since the optimizer never - /// gets a chance to look at it. - EM_PotentialConstantExpressionUnevaluated, + /// Fold the expression to a constant. Stop if we hit a side-effect that + /// we can't model. + EM_ConstantFold, + + /// Evaluate in any way we know how. Don't worry about side-effects that + /// can't be modeled. + EM_IgnoreSideEffects, } EvalMode; /// Are we checking whether the expression is a potential constant /// expression? - bool checkingPotentialConstantExpression() const { - return EvalMode == EM_PotentialConstantExpression || - EvalMode == EM_PotentialConstantExpressionUnevaluated; + bool checkingPotentialConstantExpression() const override { + return CheckingPotentialConstantExpression; } /// Are we checking an expression for overflow? // FIXME: We should check for any kind of undefined or suspicious behavior // in such constructs, not just overflow. - bool checkingForOverflow() { return EvalMode == EM_EvaluateForOverflow; } - - EvalInfo(const ASTContext &C, Expr::EvalStatus &S, EvaluationMode Mode) - : Ctx(const_cast(C)), EvalStatus(S), CurrentCall(nullptr), - CallStackDepth(0), NextCallIndex(1), - StepsLeft(getLangOpts().ConstexprStepLimit), - BottomFrame(*this, SourceLocation(), nullptr, nullptr, nullptr), - EvaluatingDecl((const ValueDecl *)nullptr), - EvaluatingDeclValue(nullptr), HasActiveDiagnostic(false), - HasFoldFailureDiagnostic(false), - InConstantContext(false), EvalMode(Mode) {} - - void setEvaluatingDecl(APValue::LValueBase Base, APValue &Value) { - EvaluatingDecl = Base; - EvaluatingDeclValue = &Value; + bool checkingForUndefinedBehavior() const override { + return CheckingForUndefinedBehavior; } - const LangOptions &getLangOpts() const { return Ctx.getLangOpts(); } + EvalInfo(const ASTContext &C, Expr::EvalStatus &S, EvaluationMode Mode) + : Ctx(const_cast(C)), EvalStatus(S), CurrentCall(nullptr), + CallStackDepth(0), NextCallIndex(1), + StepsLeft(getLangOpts().ConstexprStepLimit), + ForceNewConstInterp(getLangOpts().ForceNewConstInterp), + EnableNewConstInterp(ForceNewConstInterp || + getLangOpts().EnableNewConstInterp), + BottomFrame(*this, SourceLocation(), nullptr, nullptr, nullptr), + EvaluatingDecl((const ValueDecl *)nullptr), + EvaluatingDeclValue(nullptr), HasActiveDiagnostic(false), + HasFoldFailureDiagnostic(false), InConstantContext(false), + EvalMode(Mode) {} + + ~EvalInfo() { + discardCleanups(); + } + + void setEvaluatingDecl(APValue::LValueBase Base, APValue &Value, + EvaluatingDeclKind EDK = EvaluatingDeclKind::Ctor) { + EvaluatingDecl = Base; + IsEvaluatingDecl = EDK; + EvaluatingDeclValue = &Value; + } bool CheckCallLimit(SourceLocation Loc) { // Don't perform any constexpr calls (other than the call we're checking) @@ -906,133 +984,124 @@ namespace { return true; } - private: - /// Add a diagnostic to the diagnostics list. - PartialDiagnostic &addDiag(SourceLocation Loc, diag::kind DiagId) { - PartialDiagnostic PD(DiagId, Ctx.getDiagAllocator()); - EvalStatus.Diag->push_back(std::make_pair(Loc, PD)); - return EvalStatus.Diag->back().second; + APValue *createHeapAlloc(const Expr *E, QualType T, LValue &LV); + + Optional lookupDynamicAlloc(DynamicAllocLValue DA) { + Optional Result; + auto It = HeapAllocs.find(DA); + if (It != HeapAllocs.end()) + Result = &It->second; + return Result; } - /// Add notes containing a call stack to the current point of evaluation. - void addCallStack(unsigned Limit); + /// Information about a stack frame for std::allocator::[de]allocate. + struct StdAllocatorCaller { + unsigned FrameIndex; + QualType ElemType; + explicit operator bool() const { return FrameIndex != 0; }; + }; + + StdAllocatorCaller getStdAllocatorCaller(StringRef FnName) const { + for (const CallStackFrame *Call = CurrentCall; Call != &BottomFrame; + Call = Call->Caller) { + const auto *MD = dyn_cast_or_null(Call->Callee); + if (!MD) + continue; + const IdentifierInfo *FnII = MD->getIdentifier(); + if (!FnII || !FnII->isStr(FnName)) + continue; + + const auto *CTSD = + dyn_cast(MD->getParent()); + if (!CTSD) + continue; + + const IdentifierInfo *ClassII = CTSD->getIdentifier(); + const TemplateArgumentList &TAL = CTSD->getTemplateArgs(); + if (CTSD->isInStdNamespace() && ClassII && + ClassII->isStr("allocator") && TAL.size() >= 1 && + TAL[0].getKind() == TemplateArgument::Type) + return {Call->Index, TAL[0].getAsType()}; + } + + return {}; + } + + void performLifetimeExtension() { + // Disable the cleanups for lifetime-extended temporaries. + CleanupStack.erase( + std::remove_if(CleanupStack.begin(), CleanupStack.end(), + [](Cleanup &C) { return C.isLifetimeExtended(); }), + CleanupStack.end()); + } + + /// Throw away any remaining cleanups at the end of evaluation. If any + /// cleanups would have had a side-effect, note that as an unmodeled + /// side-effect and return false. Otherwise, return true. + bool discardCleanups() { + for (Cleanup &C : CleanupStack) + if (C.hasSideEffect()) + if (!noteSideEffect()) + return false; + return true; + } private: - OptionalDiagnostic Diag(SourceLocation Loc, diag::kind DiagId, - unsigned ExtraNotes, bool IsCCEDiag) { + interp::Frame *getCurrentFrame() override { return CurrentCall; } + const interp::Frame *getBottomFrame() const override { return &BottomFrame; } - if (EvalStatus.Diag) { - // If we have a prior diagnostic, it will be noting that the expression - // isn't a constant expression. This diagnostic is more important, - // unless we require this evaluation to produce a constant expression. - // - // FIXME: We might want to show both diagnostics to the user in - // EM_ConstantFold mode. - if (!EvalStatus.Diag->empty()) { - switch (EvalMode) { - case EM_ConstantFold: - case EM_IgnoreSideEffects: - case EM_EvaluateForOverflow: - if (!HasFoldFailureDiagnostic) - break; - // We've already failed to fold something. Keep that diagnostic. - LLVM_FALLTHROUGH; - case EM_ConstantExpression: - case EM_PotentialConstantExpression: - case EM_ConstantExpressionUnevaluated: - case EM_PotentialConstantExpressionUnevaluated: - HasActiveDiagnostic = false; - return OptionalDiagnostic(); - } + bool hasActiveDiagnostic() override { return HasActiveDiagnostic; } + void setActiveDiagnostic(bool Flag) override { HasActiveDiagnostic = Flag; } + + void setFoldFailureDiagnostic(bool Flag) override { + HasFoldFailureDiagnostic = Flag; + } + + Expr::EvalStatus &getEvalStatus() const override { return EvalStatus; } + + ASTContext &getCtx() const override { return Ctx; } + + // If we have a prior diagnostic, it will be noting that the expression + // isn't a constant expression. This diagnostic is more important, + // unless we require this evaluation to produce a constant expression. + // + // FIXME: We might want to show both diagnostics to the user in + // EM_ConstantFold mode. + bool hasPriorDiagnostic() override { + if (!EvalStatus.Diag->empty()) { + switch (EvalMode) { + case EM_ConstantFold: + case EM_IgnoreSideEffects: + if (!HasFoldFailureDiagnostic) + break; + // We've already failed to fold something. Keep that diagnostic. + LLVM_FALLTHROUGH; + case EM_ConstantExpression: + case EM_ConstantExpressionUnevaluated: + setActiveDiagnostic(false); + return true; } - - unsigned CallStackNotes = CallStackDepth - 1; - unsigned Limit = Ctx.getDiagnostics().getConstexprBacktraceLimit(); - if (Limit) - CallStackNotes = std::min(CallStackNotes, Limit + 1); - if (checkingPotentialConstantExpression()) - CallStackNotes = 0; - - HasActiveDiagnostic = true; - HasFoldFailureDiagnostic = !IsCCEDiag; - EvalStatus.Diag->clear(); - EvalStatus.Diag->reserve(1 + ExtraNotes + CallStackNotes); - addDiag(Loc, DiagId); - if (!checkingPotentialConstantExpression()) - addCallStack(Limit); - return OptionalDiagnostic(&(*EvalStatus.Diag)[0].second); } - HasActiveDiagnostic = false; - return OptionalDiagnostic(); + return false; } + + unsigned getCallStackDepth() override { return CallStackDepth; } + public: - // Diagnose that the evaluation could not be folded (FF => FoldFailure) - OptionalDiagnostic - FFDiag(SourceLocation Loc, - diag::kind DiagId = diag::note_invalid_subexpr_in_const_expr, - unsigned ExtraNotes = 0) { - return Diag(Loc, DiagId, ExtraNotes, false); - } - - OptionalDiagnostic FFDiag(const Expr *E, diag::kind DiagId - = diag::note_invalid_subexpr_in_const_expr, - unsigned ExtraNotes = 0) { - if (EvalStatus.Diag) - return Diag(E->getExprLoc(), DiagId, ExtraNotes, /*IsCCEDiag*/false); - HasActiveDiagnostic = false; - return OptionalDiagnostic(); - } - - /// Diagnose that the evaluation does not produce a C++11 core constant - /// expression. - /// - /// FIXME: Stop evaluating if we're in EM_ConstantExpression or - /// EM_PotentialConstantExpression mode and we produce one of these. - OptionalDiagnostic CCEDiag(SourceLocation Loc, diag::kind DiagId - = diag::note_invalid_subexpr_in_const_expr, - unsigned ExtraNotes = 0) { - // Don't override a previous diagnostic. Don't bother collecting - // diagnostics if we're evaluating for overflow. - if (!EvalStatus.Diag || !EvalStatus.Diag->empty()) { - HasActiveDiagnostic = false; - return OptionalDiagnostic(); - } - return Diag(Loc, DiagId, ExtraNotes, true); - } - OptionalDiagnostic CCEDiag(const Expr *E, diag::kind DiagId - = diag::note_invalid_subexpr_in_const_expr, - unsigned ExtraNotes = 0) { - return CCEDiag(E->getExprLoc(), DiagId, ExtraNotes); - } - /// Add a note to a prior diagnostic. - OptionalDiagnostic Note(SourceLocation Loc, diag::kind DiagId) { - if (!HasActiveDiagnostic) - return OptionalDiagnostic(); - return OptionalDiagnostic(&addDiag(Loc, DiagId)); - } - - /// Add a stack of notes to a prior diagnostic. - void addNotes(ArrayRef Diags) { - if (HasActiveDiagnostic) { - EvalStatus.Diag->insert(EvalStatus.Diag->end(), - Diags.begin(), Diags.end()); - } - } - /// Should we continue evaluation after encountering a side-effect that we /// couldn't model? bool keepEvaluatingAfterSideEffect() { switch (EvalMode) { - case EM_PotentialConstantExpression: - case EM_PotentialConstantExpressionUnevaluated: - case EM_EvaluateForOverflow: case EM_IgnoreSideEffects: return true; case EM_ConstantExpression: case EM_ConstantExpressionUnevaluated: case EM_ConstantFold: - return false; + // By default, assume any side effect might be valid in some other + // evaluation of this expression from a different context. + return checkingPotentialConstantExpression() || + checkingForUndefinedBehavior(); } llvm_unreachable("Missed EvalMode case"); } @@ -1047,16 +1116,13 @@ namespace { /// Should we continue evaluation after encountering undefined behavior? bool keepEvaluatingAfterUndefinedBehavior() { switch (EvalMode) { - case EM_EvaluateForOverflow: case EM_IgnoreSideEffects: case EM_ConstantFold: return true; - case EM_PotentialConstantExpression: - case EM_PotentialConstantExpressionUnevaluated: case EM_ConstantExpression: case EM_ConstantExpressionUnevaluated: - return false; + return checkingForUndefinedBehavior(); } llvm_unreachable("Missed EvalMode case"); } @@ -1064,28 +1130,24 @@ namespace { /// Note that we hit something that was technically undefined behavior, but /// that we can evaluate past it (such as signed overflow or floating-point /// division by zero.) - bool noteUndefinedBehavior() { + bool noteUndefinedBehavior() override { EvalStatus.HasUndefinedBehavior = true; return keepEvaluatingAfterUndefinedBehavior(); } /// Should we continue evaluation as much as possible after encountering a /// construct which can't be reduced to a value? - bool keepEvaluatingAfterFailure() { + bool keepEvaluatingAfterFailure() const override { if (!StepsLeft) return false; switch (EvalMode) { - case EM_PotentialConstantExpression: - case EM_PotentialConstantExpressionUnevaluated: - case EM_EvaluateForOverflow: - return true; - case EM_ConstantExpression: case EM_ConstantExpressionUnevaluated: case EM_ConstantFold: case EM_IgnoreSideEffects: - return false; + return checkingPotentialConstantExpression() || + checkingForUndefinedBehavior(); } llvm_unreachable("Missed EvalMode case"); } @@ -1142,9 +1204,7 @@ namespace { Info.EvalStatus.Diag->empty() && !Info.EvalStatus.HasSideEffects), OldMode(Info.EvalMode) { - if (Enabled && - (Info.EvalMode == EvalInfo::EM_ConstantExpression || - Info.EvalMode == EvalInfo::EM_ConstantExpressionUnevaluated)) + if (Enabled) Info.EvalMode = EvalInfo::EM_ConstantFold; } void keepDiagnostics() { Enabled = false; } @@ -1163,8 +1223,7 @@ namespace { EvalInfo::EvaluationMode OldMode; explicit IgnoreSideEffectsRAII(EvalInfo &Info) : Info(Info), OldMode(Info.EvalMode) { - if (!Info.checkingPotentialConstantExpression()) - Info.EvalMode = EvalInfo::EM_IgnoreSideEffects; + Info.EvalMode = EvalInfo::EM_IgnoreSideEffects; } ~IgnoreSideEffectsRAII() { Info.EvalMode = OldMode; } @@ -1230,29 +1289,45 @@ namespace { // temporaries created in different iterations of a loop. Info.CurrentCall->pushTempVersion(); } + bool destroy(bool RunDestructors = true) { + bool OK = cleanup(Info, RunDestructors, OldStackSize); + OldStackSize = -1U; + return OK; + } ~ScopeRAII() { + if (OldStackSize != -1U) + destroy(false); // Body moved to a static method to encourage the compiler to inline away // instances of this class. - cleanup(Info, OldStackSize); Info.CurrentCall->popTempVersion(); } private: - static void cleanup(EvalInfo &Info, unsigned OldStackSize) { - unsigned NewEnd = OldStackSize; - for (unsigned I = OldStackSize, N = Info.CleanupStack.size(); - I != N; ++I) { - if (IsFullExpression && Info.CleanupStack[I].isLifetimeExtended()) { - // Full-expression cleanup of a lifetime-extended temporary: nothing - // to do, just move this cleanup to the right place in the stack. - std::swap(Info.CleanupStack[I], Info.CleanupStack[NewEnd]); - ++NewEnd; - } else { - // End the lifetime of the object. - Info.CleanupStack[I].endLifetime(); + static bool cleanup(EvalInfo &Info, bool RunDestructors, + unsigned OldStackSize) { + assert(OldStackSize <= Info.CleanupStack.size() && + "running cleanups out of order?"); + + // Run all cleanups for a block scope, and non-lifetime-extended cleanups + // for a full-expression scope. + bool Success = true; + for (unsigned I = Info.CleanupStack.size(); I > OldStackSize; --I) { + if (!(IsFullExpression && + Info.CleanupStack[I - 1].isLifetimeExtended())) { + if (!Info.CleanupStack[I - 1].endLifetime(Info, RunDestructors)) { + Success = false; + break; + } } } - Info.CleanupStack.erase(Info.CleanupStack.begin() + NewEnd, - Info.CleanupStack.end()); + + // Compact lifetime-extended cleanups. + auto NewEnd = Info.CleanupStack.begin() + OldStackSize; + if (IsFullExpression) + NewEnd = + std::remove_if(NewEnd, Info.CleanupStack.end(), + [](Cleanup &C) { return !C.isLifetimeExtended(); }); + Info.CleanupStack.erase(NewEnd, Info.CleanupStack.end()); + return Success; } }; typedef ScopeRAII BlockScopeRAII; @@ -1312,74 +1387,14 @@ CallStackFrame::~CallStackFrame() { Info.CurrentCall = Caller; } -APValue &CallStackFrame::createTemporary(const void *Key, - bool IsLifetimeExtended) { - unsigned Version = Info.CurrentCall->getTempVersion(); - APValue &Result = Temporaries[MapKeyTy(Key, Version)]; - assert(Result.isAbsent() && "temporary created multiple times"); - Info.CleanupStack.push_back(Cleanup(&Result, IsLifetimeExtended)); - return Result; +static bool isRead(AccessKinds AK) { + return AK == AK_Read || AK == AK_ReadObjectRepresentation; } -static void describeCall(CallStackFrame *Frame, raw_ostream &Out); - -void EvalInfo::addCallStack(unsigned Limit) { - // Determine which calls to skip, if any. - unsigned ActiveCalls = CallStackDepth - 1; - unsigned SkipStart = ActiveCalls, SkipEnd = SkipStart; - if (Limit && Limit < ActiveCalls) { - SkipStart = Limit / 2 + Limit % 2; - SkipEnd = ActiveCalls - Limit / 2; - } - - // Walk the call stack and add the diagnostics. - unsigned CallIdx = 0; - for (CallStackFrame *Frame = CurrentCall; Frame != &BottomFrame; - Frame = Frame->Caller, ++CallIdx) { - // Skip this call? - if (CallIdx >= SkipStart && CallIdx < SkipEnd) { - if (CallIdx == SkipStart) { - // Note that we're skipping calls. - addDiag(Frame->CallLoc, diag::note_constexpr_calls_suppressed) - << unsigned(ActiveCalls - Limit); - } - continue; - } - - // Use a different note for an inheriting constructor, because from the - // user's perspective it's not really a function at all. - if (auto *CD = dyn_cast_or_null(Frame->Callee)) { - if (CD->isInheritingConstructor()) { - addDiag(Frame->CallLoc, diag::note_constexpr_inherited_ctor_call_here) - << CD->getParent(); - continue; - } - } - - SmallVector Buffer; - llvm::raw_svector_ostream Out(Buffer); - describeCall(Frame, Out); - addDiag(Frame->CallLoc, diag::note_constexpr_call_here) << Out.str(); - } -} - -/// Kinds of access we can perform on an object, for diagnostics. Note that -/// we consider a member function call to be a kind of access, even though -/// it is not formally an access of the object, because it has (largely) the -/// same set of semantic restrictions. -enum AccessKinds { - AK_Read, - AK_Assign, - AK_Increment, - AK_Decrement, - AK_MemberCall, - AK_DynamicCast, - AK_TypeId, -}; - static bool isModification(AccessKinds AK) { switch (AK) { case AK_Read: + case AK_ReadObjectRepresentation: case AK_MemberCall: case AK_DynamicCast: case AK_TypeId: @@ -1387,14 +1402,20 @@ static bool isModification(AccessKinds AK) { case AK_Assign: case AK_Increment: case AK_Decrement: + case AK_Construct: + case AK_Destroy: return true; } llvm_unreachable("unknown access kind"); } +static bool isAnyAccess(AccessKinds AK) { + return isRead(AK) || isModification(AK); +} + /// Is this an access per the C++ definition? static bool isFormalAccess(AccessKinds AK) { - return AK == AK_Read || isModification(AK); + return isAnyAccess(AK) && AK != AK_Construct && AK != AK_Destroy; } namespace { @@ -1490,9 +1511,10 @@ namespace { IsNullPtr = false; } - void setNull(QualType PointerTy, uint64_t TargetVal) { + void setNull(ASTContext &Ctx, QualType PointerTy) { Base = (Expr *)nullptr; - Offset = CharUnits::fromQuantity(TargetVal); + Offset = + CharUnits::fromQuantity(Ctx.getTargetNullPointerValue(PointerTy)); InvalidBase = false; Designator = SubobjectDesignator(PointerTy->getPointeeType()); IsNullPtr = true; @@ -1502,6 +1524,12 @@ namespace { set(B, true); } + std::string toString(ASTContext &Ctx, QualType T) const { + APValue Printable; + moveInto(Printable); + return Printable.getAsString(Ctx, T); + } + private: // Check that this LValue is not based on a null pointer. If it is, produce // a diagnostic and mark the designator as invalid. @@ -1724,15 +1752,6 @@ static bool EvaluateFixedPoint(const Expr *E, APFixedPoint &Result, // Misc utilities //===----------------------------------------------------------------------===// -/// A helper function to create a temporary and set an LValue. -template -static APValue &createTemporary(const KeyTy *Key, bool IsLifetimeExtended, - LValue &LV, CallStackFrame &Frame) { - LV.set({Key, Frame.Info.CurrentCall->Index, - Frame.Info.CurrentCall->getTempVersion()}); - return Frame.createTemporary(Key, IsLifetimeExtended); -} - /// Negate an APSInt in place, converting it to a signed form if necessary, and /// preserving its value (by extending by up to one bit as needed). static void negateAsSigned(APSInt &Int) { @@ -1743,37 +1762,74 @@ static void negateAsSigned(APSInt &Int) { Int = -Int; } +template +APValue &CallStackFrame::createTemporary(const KeyT *Key, QualType T, + bool IsLifetimeExtended, LValue &LV) { + unsigned Version = getTempVersion(); + APValue::LValueBase Base(Key, Index, Version); + LV.set(Base); + APValue &Result = Temporaries[MapKeyTy(Key, Version)]; + assert(Result.isAbsent() && "temporary created multiple times"); + + // If we're creating a temporary immediately in the operand of a speculative + // evaluation, don't register a cleanup to be run outside the speculative + // evaluation context, since we won't actually be able to initialize this + // object. + if (Index <= Info.SpeculativeEvaluationDepth) { + if (T.isDestructedType()) + Info.noteSideEffect(); + } else { + Info.CleanupStack.push_back(Cleanup(&Result, Base, T, IsLifetimeExtended)); + } + return Result; +} + +APValue *EvalInfo::createHeapAlloc(const Expr *E, QualType T, LValue &LV) { + if (NumHeapAllocs > DynamicAllocLValue::getMaxIndex()) { + FFDiag(E, diag::note_constexpr_heap_alloc_limit_exceeded); + return nullptr; + } + + DynamicAllocLValue DA(NumHeapAllocs++); + LV.set(APValue::LValueBase::getDynamicAlloc(DA, T)); + auto Result = HeapAllocs.emplace(std::piecewise_construct, + std::forward_as_tuple(DA), std::tuple<>()); + assert(Result.second && "reused a heap alloc index?"); + Result.first->second.AllocExpr = E; + return &Result.first->second.Value; +} + /// Produce a string describing the given constexpr call. -static void describeCall(CallStackFrame *Frame, raw_ostream &Out) { +void CallStackFrame::describe(raw_ostream &Out) { unsigned ArgIndex = 0; - bool IsMemberCall = isa(Frame->Callee) && - !isa(Frame->Callee) && - cast(Frame->Callee)->isInstance(); + bool IsMemberCall = isa(Callee) && + !isa(Callee) && + cast(Callee)->isInstance(); if (!IsMemberCall) - Out << *Frame->Callee << '('; + Out << *Callee << '('; - if (Frame->This && IsMemberCall) { + if (This && IsMemberCall) { APValue Val; - Frame->This->moveInto(Val); - Val.printPretty(Out, Frame->Info.Ctx, - Frame->This->Designator.MostDerivedType); + This->moveInto(Val); + Val.printPretty(Out, Info.Ctx, + This->Designator.MostDerivedType); // FIXME: Add parens around Val if needed. - Out << "->" << *Frame->Callee << '('; + Out << "->" << *Callee << '('; IsMemberCall = false; } - for (FunctionDecl::param_const_iterator I = Frame->Callee->param_begin(), - E = Frame->Callee->param_end(); I != E; ++I, ++ArgIndex) { + for (FunctionDecl::param_const_iterator I = Callee->param_begin(), + E = Callee->param_end(); I != E; ++I, ++ArgIndex) { if (ArgIndex > (unsigned)IsMemberCall) Out << ", "; const ParmVarDecl *Param = *I; - const APValue &Arg = Frame->Arguments[ArgIndex]; - Arg.printPretty(Out, Frame->Info.Ctx, Param->getType()); + const APValue &Arg = Arguments[ArgIndex]; + Arg.printPretty(Out, Info.Ctx, Param->getType()); if (ArgIndex == 0 && IsMemberCall) - Out << "->" << *Frame->Callee << '('; + Out << "->" << *Callee << '('; } Out << ')'; @@ -1813,7 +1869,7 @@ static bool IsGlobalLValue(APValue::LValueBase B) { return isa(D); } - if (B.is()) + if (B.is() || B.is()) return true; const Expr *E = B.get(); @@ -1912,15 +1968,39 @@ static void NoteLValueLocation(EvalInfo &Info, APValue::LValueBase Base) { Info.Note(VD->getLocation(), diag::note_declared_at); else if (const Expr *E = Base.dyn_cast()) Info.Note(E->getExprLoc(), diag::note_constexpr_temporary_here); + else if (DynamicAllocLValue DA = Base.dyn_cast()) { + // FIXME: Produce a note for dangling pointers too. + if (Optional Alloc = Info.lookupDynamicAlloc(DA)) + Info.Note((*Alloc)->AllocExpr->getExprLoc(), + diag::note_constexpr_dynamic_alloc_here); + } // We have no information to show for a typeid(T) object. } +enum class CheckEvaluationResultKind { + ConstantExpression, + FullyInitialized, +}; + +/// Materialized temporaries that we've already checked to determine if they're +/// initializsed by a constant expression. +using CheckedTemporaries = + llvm::SmallPtrSet; + +static bool CheckEvaluationResult(CheckEvaluationResultKind CERK, + EvalInfo &Info, SourceLocation DiagLoc, + QualType Type, const APValue &Value, + Expr::ConstExprUsage Usage, + SourceLocation SubobjectLoc, + CheckedTemporaries &CheckedTemps); + /// Check that this reference or pointer core constant expression is a valid /// value for an address or reference constant expression. Return true if we /// can fold this expression, whether or not it's a constant expression. static bool CheckLValueConstantExpression(EvalInfo &Info, SourceLocation Loc, QualType Type, const LValue &LVal, - Expr::ConstExprUsage Usage) { + Expr::ConstExprUsage Usage, + CheckedTemporaries &CheckedTemps) { bool IsReferenceType = Type->isReferenceType(); APValue::LValueBase Base = LVal.getLValueBase(); @@ -1946,14 +2026,23 @@ static bool CheckLValueConstantExpression(EvalInfo &Info, SourceLocation Loc, LVal.getLValueCallIndex() == 0) && "have call index for global lvalue"); + if (Base.is()) { + Info.FFDiag(Loc, diag::note_constexpr_dynamic_alloc) + << IsReferenceType << !Designator.Entries.empty(); + NoteLValueLocation(Info, Base); + return false; + } + if (const ValueDecl *VD = Base.dyn_cast()) { if (const VarDecl *Var = dyn_cast(VD)) { // Check if this is a thread-local variable. if (Var->getTLSKind()) + // FIXME: Diagnostic! return false; // A dllimport variable never acts like a constant. if (Usage == Expr::EvaluateForCodeGen && Var->hasAttr()) + // FIXME: Diagnostic! return false; } if (const auto *FD = dyn_cast(VD)) { @@ -1969,6 +2058,25 @@ static bool CheckLValueConstantExpression(EvalInfo &Info, SourceLocation Loc, // perform initialization with the address of the thunk. if (Info.getLangOpts().CPlusPlus && Usage == Expr::EvaluateForCodeGen && FD->hasAttr()) + // FIXME: Diagnostic! + return false; + } + } else if (const auto *MTE = dyn_cast_or_null( + Base.dyn_cast())) { + if (CheckedTemps.insert(MTE).second) { + QualType TempType = getType(Base); + if (TempType.isDestructedType()) { + Info.FFDiag(MTE->getExprLoc(), + diag::note_constexpr_unsupported_tempoarary_nontrivial_dtor) + << TempType; + return false; + } + + APValue *V = Info.Ctx.getMaterializedTemporaryValue(MTE, false); + assert(V && "evasluation result refers to uninitialised temporary"); + if (!CheckEvaluationResult(CheckEvaluationResultKind::ConstantExpression, + Info, MTE->getExprLoc(), TempType, *V, + Usage, SourceLocation(), CheckedTemps)) return false; } } @@ -2043,14 +2151,12 @@ static bool CheckLiteralType(EvalInfo &Info, const Expr *E, return false; } -/// Check that this core constant expression value is a valid value for a -/// constant expression. If not, report an appropriate diagnostic. Does not -/// check that the expression is of literal type. -static bool -CheckConstantExpression(EvalInfo &Info, SourceLocation DiagLoc, QualType Type, - const APValue &Value, - Expr::ConstExprUsage Usage = Expr::EvaluateForCodeGen, - SourceLocation SubobjectLoc = SourceLocation()) { +static bool CheckEvaluationResult(CheckEvaluationResultKind CERK, + EvalInfo &Info, SourceLocation DiagLoc, + QualType Type, const APValue &Value, + Expr::ConstExprUsage Usage, + SourceLocation SubobjectLoc, + CheckedTemporaries &CheckedTemps) { if (!Value.hasValue()) { Info.FFDiag(DiagLoc, diag::note_constexpr_uninitialized) << true << Type; @@ -2070,30 +2176,31 @@ CheckConstantExpression(EvalInfo &Info, SourceLocation DiagLoc, QualType Type, if (Value.isArray()) { QualType EltTy = Type->castAsArrayTypeUnsafe()->getElementType(); for (unsigned I = 0, N = Value.getArrayInitializedElts(); I != N; ++I) { - if (!CheckConstantExpression(Info, DiagLoc, EltTy, - Value.getArrayInitializedElt(I), Usage, - SubobjectLoc)) + if (!CheckEvaluationResult(CERK, Info, DiagLoc, EltTy, + Value.getArrayInitializedElt(I), Usage, + SubobjectLoc, CheckedTemps)) return false; } if (!Value.hasArrayFiller()) return true; - return CheckConstantExpression(Info, DiagLoc, EltTy, Value.getArrayFiller(), - Usage, SubobjectLoc); + return CheckEvaluationResult(CERK, Info, DiagLoc, EltTy, + Value.getArrayFiller(), Usage, SubobjectLoc, + CheckedTemps); } if (Value.isUnion() && Value.getUnionField()) { - return CheckConstantExpression(Info, DiagLoc, - Value.getUnionField()->getType(), - Value.getUnionValue(), Usage, - Value.getUnionField()->getLocation()); + return CheckEvaluationResult( + CERK, Info, DiagLoc, Value.getUnionField()->getType(), + Value.getUnionValue(), Usage, Value.getUnionField()->getLocation(), + CheckedTemps); } if (Value.isStruct()) { RecordDecl *RD = Type->castAs()->getDecl(); if (const CXXRecordDecl *CD = dyn_cast(RD)) { unsigned BaseIndex = 0; for (const CXXBaseSpecifier &BS : CD->bases()) { - if (!CheckConstantExpression(Info, DiagLoc, BS.getType(), - Value.getStructBase(BaseIndex), Usage, - BS.getBeginLoc())) + if (!CheckEvaluationResult(CERK, Info, DiagLoc, BS.getType(), + Value.getStructBase(BaseIndex), Usage, + BS.getBeginLoc(), CheckedTemps)) return false; ++BaseIndex; } @@ -2102,26 +2209,66 @@ CheckConstantExpression(EvalInfo &Info, SourceLocation DiagLoc, QualType Type, if (I->isUnnamedBitfield()) continue; - if (!CheckConstantExpression(Info, DiagLoc, I->getType(), - Value.getStructField(I->getFieldIndex()), - Usage, I->getLocation())) + if (!CheckEvaluationResult(CERK, Info, DiagLoc, I->getType(), + Value.getStructField(I->getFieldIndex()), + Usage, I->getLocation(), CheckedTemps)) return false; } } - if (Value.isLValue()) { + if (Value.isLValue() && + CERK == CheckEvaluationResultKind::ConstantExpression) { LValue LVal; LVal.setFrom(Info.Ctx, Value); - return CheckLValueConstantExpression(Info, DiagLoc, Type, LVal, Usage); + return CheckLValueConstantExpression(Info, DiagLoc, Type, LVal, Usage, + CheckedTemps); } - if (Value.isMemberPointer()) + if (Value.isMemberPointer() && + CERK == CheckEvaluationResultKind::ConstantExpression) return CheckMemberPointerConstantExpression(Info, DiagLoc, Type, Value, Usage); // Everything else is fine. return true; } +/// Check that this core constant expression value is a valid value for a +/// constant expression. If not, report an appropriate diagnostic. Does not +/// check that the expression is of literal type. +static bool +CheckConstantExpression(EvalInfo &Info, SourceLocation DiagLoc, QualType Type, + const APValue &Value, + Expr::ConstExprUsage Usage = Expr::EvaluateForCodeGen) { + CheckedTemporaries CheckedTemps; + return CheckEvaluationResult(CheckEvaluationResultKind::ConstantExpression, + Info, DiagLoc, Type, Value, Usage, + SourceLocation(), CheckedTemps); +} + +/// Check that this evaluated value is fully-initialized and can be loaded by +/// an lvalue-to-rvalue conversion. +static bool CheckFullyInitialized(EvalInfo &Info, SourceLocation DiagLoc, + QualType Type, const APValue &Value) { + CheckedTemporaries CheckedTemps; + return CheckEvaluationResult( + CheckEvaluationResultKind::FullyInitialized, Info, DiagLoc, Type, Value, + Expr::EvaluateForCodeGen, SourceLocation(), CheckedTemps); +} + +/// Enforce C++2a [expr.const]/4.17, which disallows new-expressions unless +/// "the allocated storage is deallocated within the evaluation". +static bool CheckMemoryLeaks(EvalInfo &Info) { + if (!Info.HeapAllocs.empty()) { + // We can still fold to a constant despite a compile-time memory leak, + // so long as the heap allocation isn't referenced in the result (we check + // that in CheckConstantExpression). + Info.CCEDiag(Info.HeapAllocs.begin()->second.AllocExpr, + diag::note_constexpr_memory_leak) + << unsigned(Info.HeapAllocs.size() - 1); + } + return true; +} + static bool EvalPointerValueAsBool(const APValue &Value, bool &Result) { // A null base expression indicates a null pointer. These are always // evaluatable, and they are false unless the offset is zero. @@ -2323,7 +2470,7 @@ static bool CheckedIntArithmetic(EvalInfo &Info, const Expr *E, APSInt Value(Op(LHS.extend(BitWidth), RHS.extend(BitWidth)), false); Result = Value.trunc(LHS.getBitWidth()); if (Result.extend(BitWidth) != Value) { - if (Info.checkingForOverflow()) + if (Info.checkingForUndefinedBehavior()) Info.Ctx.getDiagnostics().Report(E->getExprLoc(), diag::warn_integer_constant_overflow) << Result.toString(10) << E->getType(); @@ -2813,9 +2960,10 @@ static APSInt extractStringLiteralCharacter(EvalInfo &Info, const Expr *Lit, // FIXME: This is inefficient; we should probably introduce something similar // to the LLVM ConstantDataArray to make this cheaper. static void expandStringLiteral(EvalInfo &Info, const StringLiteral *S, - APValue &Result) { - const ConstantArrayType *CAT = - Info.Ctx.getAsConstantArrayType(S->getType()); + APValue &Result, + QualType AllocType = QualType()) { + const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType( + AllocType.isNull() ? S->getType() : AllocType); assert(CAT && "string literal isn't an array"); QualType CharType = CAT->getElementType(); assert(CharType->isIntegerType() && "unexpected character type"); @@ -2879,8 +3027,8 @@ static bool isReadByLvalueToRvalueConversion(QualType T) { /// Diagnose an attempt to read from any unreadable field within the specified /// type, which might be a class type. -static bool diagnoseUnreadableFields(EvalInfo &Info, const Expr *E, - QualType T) { +static bool diagnoseMutableFields(EvalInfo &Info, const Expr *E, AccessKinds AK, + QualType T) { CXXRecordDecl *RD = T->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); if (!RD) return false; @@ -2895,17 +3043,17 @@ static bool diagnoseUnreadableFields(EvalInfo &Info, const Expr *E, // FIXME: Add core issue number for the union case. if (Field->isMutable() && (RD->isUnion() || isReadByLvalueToRvalueConversion(Field->getType()))) { - Info.FFDiag(E, diag::note_constexpr_ltor_mutable, 1) << Field; + Info.FFDiag(E, diag::note_constexpr_access_mutable, 1) << AK << Field; Info.Note(Field->getLocation(), diag::note_declared_at); return true; } - if (diagnoseUnreadableFields(Info, E, Field->getType())) + if (diagnoseMutableFields(Info, E, AK, Field->getType())) return true; } for (auto &BaseSpec : RD->bases()) - if (diagnoseUnreadableFields(Info, E, BaseSpec.getType())) + if (diagnoseMutableFields(Info, E, AK, BaseSpec.getType())) return true; // All mutable fields were empty, and thus not actually read. @@ -2913,7 +3061,8 @@ static bool diagnoseUnreadableFields(EvalInfo &Info, const Expr *E, } static bool lifetimeStartedInEvaluation(EvalInfo &Info, - APValue::LValueBase Base) { + APValue::LValueBase Base, + bool MutableSubobject = false) { // A temporary we created. if (Base.getCallIndex()) return true; @@ -2922,19 +3071,42 @@ static bool lifetimeStartedInEvaluation(EvalInfo &Info, if (!Evaluating) return false; - // The variable whose initializer we're evaluating. - if (auto *BaseD = Base.dyn_cast()) - if (declaresSameEntity(Evaluating, BaseD)) - return true; + auto *BaseD = Base.dyn_cast(); - // A temporary lifetime-extended by the variable whose initializer we're - // evaluating. - if (auto *BaseE = Base.dyn_cast()) - if (auto *BaseMTE = dyn_cast(BaseE)) - if (declaresSameEntity(BaseMTE->getExtendingDecl(), Evaluating)) - return true; + switch (Info.IsEvaluatingDecl) { + case EvalInfo::EvaluatingDeclKind::None: + return false; - return false; + case EvalInfo::EvaluatingDeclKind::Ctor: + // The variable whose initializer we're evaluating. + if (BaseD) + return declaresSameEntity(Evaluating, BaseD); + + // A temporary lifetime-extended by the variable whose initializer we're + // evaluating. + if (auto *BaseE = Base.dyn_cast()) + if (auto *BaseMTE = dyn_cast(BaseE)) + return declaresSameEntity(BaseMTE->getExtendingDecl(), Evaluating); + return false; + + case EvalInfo::EvaluatingDeclKind::Dtor: + // C++2a [expr.const]p6: + // [during constant destruction] the lifetime of a and its non-mutable + // subobjects (but not its mutable subobjects) [are] considered to start + // within e. + // + // FIXME: We can meaningfully extend this to cover non-const objects, but + // we will need special handling: we should be able to access only + // subobjects of such objects that are themselves declared const. + if (!BaseD || + !(BaseD->getType().isConstQualified() || + BaseD->getType()->isReferenceType()) || + MutableSubobject) + return false; + return declaresSameEntity(Evaluating, BaseD); + } + + llvm_unreachable("unknown evaluating decl kind"); } namespace { @@ -2952,13 +3124,13 @@ struct CompleteObject { CompleteObject(APValue::LValueBase Base, APValue *Value, QualType Type) : Base(Base), Value(Value), Type(Type) {} - bool mayReadMutableMembers(EvalInfo &Info) const { + bool mayAccessMutableMembers(EvalInfo &Info, AccessKinds AK) const { // In C++14 onwards, it is permitted to read a mutable member whose // lifetime began within the evaluation. // FIXME: Should we also allow this in C++11? if (!Info.getLangOpts().CPlusPlus14) return false; - return lifetimeStartedInEvaluation(Info, Base); + return lifetimeStartedInEvaluation(Info, Base, /*MutableSubobject*/true); } explicit operator bool() const { return !Type.isNull(); } @@ -3006,19 +3178,22 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, // Walk the designator's path to find the subobject. for (unsigned I = 0, N = Sub.Entries.size(); /**/; ++I) { // Reading an indeterminate value is undefined, but assigning over one is OK. - if (O->isAbsent() || (O->isIndeterminate() && handler.AccessKind != AK_Assign)) { + if ((O->isAbsent() && !(handler.AccessKind == AK_Construct && I == N)) || + (O->isIndeterminate() && handler.AccessKind != AK_Construct && + handler.AccessKind != AK_Assign && + handler.AccessKind != AK_ReadObjectRepresentation)) { if (!Info.checkingPotentialConstantExpression()) Info.FFDiag(E, diag::note_constexpr_access_uninit) << handler.AccessKind << O->isIndeterminate(); return handler.failed(); } - // C++ [class.ctor]p5: + // C++ [class.ctor]p5, C++ [class.dtor]p5: // const and volatile semantics are not applied on an object under - // construction. + // {con,de}struction. if ((ObjType.isConstQualified() || ObjType.isVolatileQualified()) && ObjType->isRecordType() && - Info.isEvaluatingConstructor( + Info.isEvaluatingCtorDtor( Obj.Base, llvm::makeArrayRef(Sub.Entries.begin(), Sub.Entries.begin() + I)) != ConstructionPhase::None) { @@ -3061,9 +3236,9 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, // things we need to check: if there are any mutable subobjects, we // cannot perform this read. (This only happens when performing a trivial // copy or assignment.) - if (ObjType->isRecordType() && handler.AccessKind == AK_Read && - !Obj.mayReadMutableMembers(Info) && - diagnoseUnreadableFields(Info, E, ObjType)) + if (ObjType->isRecordType() && + !Obj.mayAccessMutableMembers(Info, handler.AccessKind) && + diagnoseMutableFields(Info, E, handler.AccessKind, ObjType)) return handler.failed(); } @@ -3101,7 +3276,7 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, if (O->getArrayInitializedElts() > Index) O = &O->getArrayInitializedElt(Index); - else if (handler.AccessKind != AK_Read) { + else if (!isRead(handler.AccessKind)) { expandArray(*O, Index); O = &O->getArrayInitializedElt(Index); } else @@ -3131,10 +3306,10 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, : O->getComplexFloatReal(), ObjType); } } else if (const FieldDecl *Field = getAsField(Sub.Entries[I])) { - if (Field->isMutable() && handler.AccessKind == AK_Read && - !Obj.mayReadMutableMembers(Info)) { - Info.FFDiag(E, diag::note_constexpr_ltor_mutable, 1) - << Field; + if (Field->isMutable() && + !Obj.mayAccessMutableMembers(Info, handler.AccessKind)) { + Info.FFDiag(E, diag::note_constexpr_access_mutable, 1) + << handler.AccessKind << Field; Info.Note(Field->getLocation(), diag::note_declared_at); return handler.failed(); } @@ -3145,9 +3320,18 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, const FieldDecl *UnionField = O->getUnionField(); if (!UnionField || UnionField->getCanonicalDecl() != Field->getCanonicalDecl()) { - Info.FFDiag(E, diag::note_constexpr_access_inactive_union_member) - << handler.AccessKind << Field << !UnionField << UnionField; - return handler.failed(); + if (I == N - 1 && handler.AccessKind == AK_Construct) { + // Placement new onto an inactive union member makes it active. + O->setUnion(Field, APValue()); + } else { + // FIXME: If O->getUnionValue() is absent, report that there's no + // active union member rather than reporting the prior active union + // member. We'll need to fix nullptr_t to not use APValue() as its + // representation first. + Info.FFDiag(E, diag::note_constexpr_access_inactive_union_member) + << handler.AccessKind << Field << !UnionField << UnionField; + return handler.failed(); + } } O = &O->getUnionValue(); } else @@ -3171,15 +3355,17 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, namespace { struct ExtractSubobjectHandler { EvalInfo &Info; + const Expr *E; APValue &Result; - - static const AccessKinds AccessKind = AK_Read; + const AccessKinds AccessKind; typedef bool result_type; bool failed() { return false; } bool found(APValue &Subobj, QualType SubobjType) { Result = Subobj; - return true; + if (AccessKind == AK_ReadObjectRepresentation) + return true; + return CheckFullyInitialized(Info, E->getExprLoc(), SubobjType, Result); } bool found(APSInt &Value, QualType SubobjType) { Result = APValue(Value); @@ -3192,14 +3378,13 @@ struct ExtractSubobjectHandler { }; } // end anonymous namespace -const AccessKinds ExtractSubobjectHandler::AccessKind; - /// Extract the designated sub-object of an rvalue. static bool extractSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj, - const SubobjectDesignator &Sub, - APValue &Result) { - ExtractSubobjectHandler Handler = { Info, Result }; + const SubobjectDesignator &Sub, APValue &Result, + AccessKinds AK = AK_Read) { + assert(AK == AK_Read || AK == AK_ReadObjectRepresentation); + ExtractSubobjectHandler Handler = {Info, E, Result, AK}; return findSubobject(Info, E, Obj, Sub, Handler); } @@ -3345,13 +3530,13 @@ static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E, } } - bool IsAccess = isFormalAccess(AK); + bool IsAccess = isAnyAccess(AK); // C++11 DR1311: An lvalue-to-rvalue conversion on a volatile-qualified type // is not a constant expression (even if the object is non-volatile). We also // apply this rule to C++98, in order to conform to the expected 'volatile' // semantics. - if (IsAccess && LValType.isVolatileQualified()) { + if (isFormalAccess(AK) && LValType.isVolatileQualified()) { if (Info.getLangOpts().CPlusPlus) Info.FFDiag(E, diag::note_constexpr_access_volatile_type) << AK << LValType; @@ -3386,8 +3571,7 @@ static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E, // the variable we're reading must be const. if (!Frame) { if (Info.getLangOpts().CPlusPlus14 && - declaresSameEntity( - VD, Info.EvaluatingDecl.dyn_cast())) { + lifetimeStartedInEvaluation(Info, LVal.Base)) { // OK, we can read and modify an object if we're in the process of // evaluating its initializer, because its lifetime began in this // evaluation. @@ -3446,6 +3630,14 @@ static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E, if (!evaluateVarDeclInit(Info, E, VD, Frame, BaseVal, &LVal)) return CompleteObject(); + } else if (DynamicAllocLValue DA = LVal.Base.dyn_cast()) { + Optional Alloc = Info.lookupDynamicAlloc(DA); + if (!Alloc) { + Info.FFDiag(E, diag::note_constexpr_access_deleted_object) << AK; + return CompleteObject(); + } + return CompleteObject(LVal.Base, &(*Alloc)->Value, + LVal.Base.getDynamicAllocType()); } else { const Expr *Base = LVal.Base.dyn_cast(); @@ -3469,11 +3661,14 @@ static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E, // int x = ++r; // constexpr int k = r; // Therefore we use the C++14 rules in C++11 too. - const ValueDecl *VD = Info.EvaluatingDecl.dyn_cast(); - const ValueDecl *ED = MTE->getExtendingDecl(); + // + // Note that temporaries whose lifetimes began while evaluating a + // variable's constructor are not usable while evaluating the + // corresponding destructor, not even if they're of const-qualified + // types. if (!(BaseType.isConstQualified() && BaseType->isIntegralOrEnumerationType()) && - !(VD && VD->getCanonicalDecl() == ED->getCanonicalDecl())) { + !lifetimeStartedInEvaluation(Info, LVal.Base)) { if (!IsAccess) return CompleteObject(LVal.getLValueBase(), nullptr, BaseType); Info.FFDiag(E, diag::note_constexpr_access_static_temporary, 1) << AK; @@ -3525,15 +3720,22 @@ static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E, /// case of a non-class type). /// \param LVal - The glvalue on which we are attempting to perform this action. /// \param RVal - The produced value will be placed here. -static bool handleLValueToRValueConversion(EvalInfo &Info, const Expr *Conv, - QualType Type, - const LValue &LVal, APValue &RVal) { +/// \param WantObjectRepresentation - If true, we're looking for the object +/// representation rather than the value, and in particular, +/// there is no requirement that the result be fully initialized. +static bool +handleLValueToRValueConversion(EvalInfo &Info, const Expr *Conv, QualType Type, + const LValue &LVal, APValue &RVal, + bool WantObjectRepresentation = false) { if (LVal.Designator.Invalid) return false; // Check for special cases where there is no existing APValue to look at. const Expr *Base = LVal.Base.dyn_cast(); + AccessKinds AK = + WantObjectRepresentation ? AK_ReadObjectRepresentation : AK_Read; + if (Base && !LVal.getLValueCallIndex() && !Type.isVolatileQualified()) { if (const CompoundLiteralExpr *CLE = dyn_cast(Base)) { // In C99, a CompoundLiteralExpr is an lvalue, and we defer evaluating the @@ -3547,7 +3749,7 @@ static bool handleLValueToRValueConversion(EvalInfo &Info, const Expr *Conv, if (!Evaluate(Lit, Info, CLE->getInitializer())) return false; CompleteObject LitObj(LVal.Base, &Lit, Base->getType()); - return extractSubobject(Info, Conv, LitObj, LVal.Designator, RVal); + return extractSubobject(Info, Conv, LitObj, LVal.Designator, RVal, AK); } else if (isa(Base) || isa(Base)) { // Special-case character extraction so we don't have to construct an // APValue for the whole string. @@ -3562,7 +3764,7 @@ static bool handleLValueToRValueConversion(EvalInfo &Info, const Expr *Conv, } if (LVal.Designator.isOnePastTheEnd()) { if (Info.getLangOpts().CPlusPlus11) - Info.FFDiag(Conv, diag::note_constexpr_access_past_end) << AK_Read; + Info.FFDiag(Conv, diag::note_constexpr_access_past_end) << AK; else Info.FFDiag(Conv); return false; @@ -3573,8 +3775,8 @@ static bool handleLValueToRValueConversion(EvalInfo &Info, const Expr *Conv, } } - CompleteObject Obj = findCompleteObject(Info, Conv, AK_Read, LVal, Type); - return Obj && extractSubobject(Info, Conv, Obj, LVal.Designator, RVal); + CompleteObject Obj = findCompleteObject(Info, Conv, AK, LVal, Type); + return Obj && extractSubobject(Info, Conv, Obj, LVal.Designator, RVal, AK); } /// Perform an assignment of Val to LVal. Takes ownership of Val. @@ -3866,7 +4068,7 @@ static bool handleIncDec(EvalInfo &Info, const Expr *E, const LValue &LVal, /// Build an lvalue for the object argument of a member function call. static bool EvaluateObjectArgument(EvalInfo &Info, const Expr *Object, LValue &This) { - if (Object->getType()->isPointerType()) + if (Object->getType()->isPointerType() && Object->isRValue()) return EvaluatePointer(Object, This, Info); if (Object->isGLValue()) @@ -4028,6 +4230,40 @@ static bool HandleBaseToDerivedCast(EvalInfo &Info, const CastExpr *E, return CastToDerivedClass(Info, E, Result, TargetType, NewEntriesSize); } +/// Get the value to use for a default-initialized object of type T. +static APValue getDefaultInitValue(QualType T) { + if (auto *RD = T->getAsCXXRecordDecl()) { + if (RD->isUnion()) + return APValue((const FieldDecl*)nullptr); + + APValue Struct(APValue::UninitStruct(), RD->getNumBases(), + std::distance(RD->field_begin(), RD->field_end())); + + unsigned Index = 0; + for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(), + End = RD->bases_end(); I != End; ++I, ++Index) + Struct.getStructBase(Index) = getDefaultInitValue(I->getType()); + + for (const auto *I : RD->fields()) { + if (I->isUnnamedBitfield()) + continue; + Struct.getStructField(I->getFieldIndex()) = + getDefaultInitValue(I->getType()); + } + return Struct; + } + + if (auto *AT = + dyn_cast_or_null(T->getAsArrayTypeUnsafe())) { + APValue Array(APValue::UninitArray(), 0, AT->getSize().getZExtValue()); + if (Array.hasArrayFiller()) + Array.getArrayFiller() = getDefaultInitValue(AT->getElementType()); + return Array; + } + + return APValue::IndeterminateValue(); +} + namespace { enum EvalStmtResult { /// Evaluation failed. @@ -4051,14 +4287,13 @@ static bool EvaluateVarDecl(EvalInfo &Info, const VarDecl *VD) { return true; LValue Result; - APValue &Val = createTemporary(VD, true, Result, *Info.CurrentCall); + APValue &Val = + Info.CurrentCall->createTemporary(VD, VD->getType(), true, Result); const Expr *InitE = VD->getInit(); if (!InitE) { - Info.FFDiag(VD->getBeginLoc(), diag::note_constexpr_uninitialized) - << false << VD->getType(); - Val = APValue(); - return false; + Val = getDefaultInitValue(VD->getType()); + return true; } if (InitE->isValueDependent()) @@ -4095,7 +4330,9 @@ static bool EvaluateCond(EvalInfo &Info, const VarDecl *CondDecl, FullExpressionRAII Scope(Info); if (CondDecl && !EvaluateDecl(Info, CondDecl)) return false; - return EvaluateAsBooleanCondition(Cond, Result, Info); + if (!EvaluateAsBooleanCondition(Cond, Result, Info)) + return false; + return Scope.destroy(); } namespace { @@ -4131,7 +4368,12 @@ static EvalStmtResult EvaluateLoopBody(StmtResult &Result, EvalInfo &Info, const Stmt *Body, const SwitchCase *Case = nullptr) { BlockScopeRAII Scope(Info); - switch (EvalStmtResult ESR = EvaluateStmt(Result, Info, Body, Case)) { + + EvalStmtResult ESR = EvaluateStmt(Result, Info, Body, Case); + if (ESR != ESR_Failed && ESR != ESR_CaseNotFound && !Scope.destroy()) + ESR = ESR_Failed; + + switch (ESR) { case ESR_Break: return ESR_Succeeded; case ESR_Succeeded: @@ -4153,17 +4395,23 @@ static EvalStmtResult EvaluateSwitch(StmtResult &Result, EvalInfo &Info, // Evaluate the switch condition. APSInt Value; { - FullExpressionRAII Scope(Info); if (const Stmt *Init = SS->getInit()) { EvalStmtResult ESR = EvaluateStmt(Result, Info, Init); - if (ESR != ESR_Succeeded) + if (ESR != ESR_Succeeded) { + if (ESR != ESR_Failed && !Scope.destroy()) + ESR = ESR_Failed; return ESR; + } } + + FullExpressionRAII CondScope(Info); if (SS->getConditionVariable() && !EvaluateDecl(Info, SS->getConditionVariable())) return ESR_Failed; if (!EvaluateInteger(SS->getCond(), Value, Info)) return ESR_Failed; + if (!CondScope.destroy()) + return ESR_Failed; } // Find the switch case corresponding to the value of the condition. @@ -4187,10 +4435,14 @@ static EvalStmtResult EvaluateSwitch(StmtResult &Result, EvalInfo &Info, } if (!Found) - return ESR_Succeeded; + return Scope.destroy() ? ESR_Succeeded : ESR_Failed; // Search the switch body for the switch case and evaluate it from there. - switch (EvalStmtResult ESR = EvaluateStmt(Result, Info, SS->getBody(), Found)) { + EvalStmtResult ESR = EvaluateStmt(Result, Info, SS->getBody(), Found); + if (ESR != ESR_Failed && ESR != ESR_CaseNotFound && !Scope.destroy()) + return ESR_Failed; + + switch (ESR) { case ESR_Break: return ESR_Succeeded; case ESR_Succeeded: @@ -4217,10 +4469,6 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, // If we're hunting down a 'case' or 'default' label, recurse through // substatements until we hit the label. if (Case) { - // FIXME: We don't start the lifetime of objects whose initialization we - // jump over. However, such objects must be of class type with a trivial - // default constructor that initialize all subobjects, so must be empty, - // so this almost never matters. switch (S->getStmtClass()) { case Stmt::CompoundStmtClass: // FIXME: Precompute which substatement of a compound statement we @@ -4246,10 +4494,35 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, // preceded by our switch label. BlockScopeRAII Scope(Info); + // Step into the init statement in case it brings an (uninitialized) + // variable into scope. + if (const Stmt *Init = IS->getInit()) { + EvalStmtResult ESR = EvaluateStmt(Result, Info, Init, Case); + if (ESR != ESR_CaseNotFound) { + assert(ESR != ESR_Succeeded); + return ESR; + } + } + + // Condition variable must be initialized if it exists. + // FIXME: We can skip evaluating the body if there's a condition + // variable, as there can't be any case labels within it. + // (The same is true for 'for' statements.) + EvalStmtResult ESR = EvaluateStmt(Result, Info, IS->getThen(), Case); - if (ESR != ESR_CaseNotFound || !IS->getElse()) + if (ESR == ESR_Failed) return ESR; - return EvaluateStmt(Result, Info, IS->getElse(), Case); + if (ESR != ESR_CaseNotFound) + return Scope.destroy() ? ESR : ESR_Failed; + if (!IS->getElse()) + return ESR_CaseNotFound; + + ESR = EvaluateStmt(Result, Info, IS->getElse(), Case); + if (ESR == ESR_Failed) + return ESR; + if (ESR != ESR_CaseNotFound) + return Scope.destroy() ? ESR : ESR_Failed; + return ESR_CaseNotFound; } case Stmt::WhileStmtClass: { @@ -4262,21 +4535,47 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, case Stmt::ForStmtClass: { const ForStmt *FS = cast(S); + BlockScopeRAII Scope(Info); + + // Step into the init statement in case it brings an (uninitialized) + // variable into scope. + if (const Stmt *Init = FS->getInit()) { + EvalStmtResult ESR = EvaluateStmt(Result, Info, Init, Case); + if (ESR != ESR_CaseNotFound) { + assert(ESR != ESR_Succeeded); + return ESR; + } + } + EvalStmtResult ESR = EvaluateLoopBody(Result, Info, FS->getBody(), Case); if (ESR != ESR_Continue) return ESR; if (FS->getInc()) { FullExpressionRAII IncScope(Info); - if (!EvaluateIgnoredValue(Info, FS->getInc())) + if (!EvaluateIgnoredValue(Info, FS->getInc()) || !IncScope.destroy()) return ESR_Failed; } break; } - case Stmt::DeclStmtClass: - // FIXME: If the variable has initialization that can't be jumped over, - // bail out of any immediately-surrounding compound-statement too. + case Stmt::DeclStmtClass: { + // Start the lifetime of any uninitialized variables we encounter. They + // might be used by the selected branch of the switch. + const DeclStmt *DS = cast(S); + for (const auto *D : DS->decls()) { + if (const auto *VD = dyn_cast(D)) { + if (VD->hasLocalStorage() && !VD->getInit()) + if (!EvaluateVarDecl(Info, VD)) + return ESR_Failed; + // FIXME: If the variable has initialization that can't be jumped + // over, bail out of any immediately-surrounding compound-statement + // too. There can't be any case labels here. + } + } + return ESR_CaseNotFound; + } + default: return ESR_CaseNotFound; } @@ -4287,8 +4586,10 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, if (const Expr *E = dyn_cast(S)) { // Don't bother evaluating beyond an expression-statement which couldn't // be evaluated. + // FIXME: Do we need the FullExpressionRAII object here? + // VisitExprWithCleanups should create one when necessary. FullExpressionRAII Scope(Info); - if (!EvaluateIgnoredValue(Info, E)) + if (!EvaluateIgnoredValue(Info, E) || !Scope.destroy()) return ESR_Failed; return ESR_Succeeded; } @@ -4301,12 +4602,12 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, case Stmt::DeclStmtClass: { const DeclStmt *DS = cast(S); - for (const auto *DclIt : DS->decls()) { + for (const auto *D : DS->decls()) { // Each declaration initialization is its own full-expression. - // FIXME: This isn't quite right; if we're performing aggregate - // initialization, each braced subexpression is its own full-expression. FullExpressionRAII Scope(Info); - if (!EvaluateDecl(Info, DclIt) && !Info.noteFailure()) + if (!EvaluateDecl(Info, D) && !Info.noteFailure()) + return ESR_Failed; + if (!Scope.destroy()) return ESR_Failed; } return ESR_Succeeded; @@ -4320,7 +4621,7 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, ? EvaluateInPlace(Result.Value, Info, *Result.Slot, RetExpr) : Evaluate(Result.Value, Info, RetExpr))) return ESR_Failed; - return ESR_Returned; + return Scope.destroy() ? ESR_Returned : ESR_Failed; } case Stmt::CompoundStmtClass: { @@ -4331,10 +4632,15 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, EvalStmtResult ESR = EvaluateStmt(Result, Info, BI, Case); if (ESR == ESR_Succeeded) Case = nullptr; - else if (ESR != ESR_CaseNotFound) + else if (ESR != ESR_CaseNotFound) { + if (ESR != ESR_Failed && !Scope.destroy()) + return ESR_Failed; return ESR; + } } - return Case ? ESR_CaseNotFound : ESR_Succeeded; + if (Case) + return ESR_CaseNotFound; + return Scope.destroy() ? ESR_Succeeded : ESR_Failed; } case Stmt::IfStmtClass: { @@ -4344,8 +4650,11 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, BlockScopeRAII Scope(Info); if (const Stmt *Init = IS->getInit()) { EvalStmtResult ESR = EvaluateStmt(Result, Info, Init); - if (ESR != ESR_Succeeded) + if (ESR != ESR_Succeeded) { + if (ESR != ESR_Failed && !Scope.destroy()) + return ESR_Failed; return ESR; + } } bool Cond; if (!EvaluateCond(Info, IS->getConditionVariable(), IS->getCond(), Cond)) @@ -4353,10 +4662,13 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, if (const Stmt *SubStmt = Cond ? IS->getThen() : IS->getElse()) { EvalStmtResult ESR = EvaluateStmt(Result, Info, SubStmt); - if (ESR != ESR_Succeeded) + if (ESR != ESR_Succeeded) { + if (ESR != ESR_Failed && !Scope.destroy()) + return ESR_Failed; return ESR; + } } - return ESR_Succeeded; + return Scope.destroy() ? ESR_Succeeded : ESR_Failed; } case Stmt::WhileStmtClass: { @@ -4371,8 +4683,13 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, break; EvalStmtResult ESR = EvaluateLoopBody(Result, Info, WS->getBody()); - if (ESR != ESR_Continue) + if (ESR != ESR_Continue) { + if (ESR != ESR_Failed && !Scope.destroy()) + return ESR_Failed; return ESR; + } + if (!Scope.destroy()) + return ESR_Failed; } return ESR_Succeeded; } @@ -4387,7 +4704,8 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, Case = nullptr; FullExpressionRAII CondScope(Info); - if (!EvaluateAsBooleanCondition(DS->getCond(), Continue, Info)) + if (!EvaluateAsBooleanCondition(DS->getCond(), Continue, Info) || + !CondScope.destroy()) return ESR_Failed; } while (Continue); return ESR_Succeeded; @@ -4395,14 +4713,17 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, case Stmt::ForStmtClass: { const ForStmt *FS = cast(S); - BlockScopeRAII Scope(Info); + BlockScopeRAII ForScope(Info); if (FS->getInit()) { EvalStmtResult ESR = EvaluateStmt(Result, Info, FS->getInit()); - if (ESR != ESR_Succeeded) + if (ESR != ESR_Succeeded) { + if (ESR != ESR_Failed && !ForScope.destroy()) + return ESR_Failed; return ESR; + } } while (true) { - BlockScopeRAII Scope(Info); + BlockScopeRAII IterScope(Info); bool Continue = true; if (FS->getCond() && !EvaluateCond(Info, FS->getConditionVariable(), FS->getCond(), Continue)) @@ -4411,16 +4732,22 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, break; EvalStmtResult ESR = EvaluateLoopBody(Result, Info, FS->getBody()); - if (ESR != ESR_Continue) + if (ESR != ESR_Continue) { + if (ESR != ESR_Failed && (!IterScope.destroy() || !ForScope.destroy())) + return ESR_Failed; return ESR; + } if (FS->getInc()) { FullExpressionRAII IncScope(Info); - if (!EvaluateIgnoredValue(Info, FS->getInc())) + if (!EvaluateIgnoredValue(Info, FS->getInc()) || !IncScope.destroy()) return ESR_Failed; } + + if (!IterScope.destroy()) + return ESR_Failed; } - return ESR_Succeeded; + return ForScope.destroy() ? ESR_Succeeded : ESR_Failed; } case Stmt::CXXForRangeStmtClass: { @@ -4430,22 +4757,34 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, // Evaluate the init-statement if present. if (FS->getInit()) { EvalStmtResult ESR = EvaluateStmt(Result, Info, FS->getInit()); - if (ESR != ESR_Succeeded) + if (ESR != ESR_Succeeded) { + if (ESR != ESR_Failed && !Scope.destroy()) + return ESR_Failed; return ESR; + } } // Initialize the __range variable. EvalStmtResult ESR = EvaluateStmt(Result, Info, FS->getRangeStmt()); - if (ESR != ESR_Succeeded) + if (ESR != ESR_Succeeded) { + if (ESR != ESR_Failed && !Scope.destroy()) + return ESR_Failed; return ESR; + } // Create the __begin and __end iterators. ESR = EvaluateStmt(Result, Info, FS->getBeginStmt()); - if (ESR != ESR_Succeeded) + if (ESR != ESR_Succeeded) { + if (ESR != ESR_Failed && !Scope.destroy()) + return ESR_Failed; return ESR; + } ESR = EvaluateStmt(Result, Info, FS->getEndStmt()); - if (ESR != ESR_Succeeded) + if (ESR != ESR_Succeeded) { + if (ESR != ESR_Failed && !Scope.destroy()) + return ESR_Failed; return ESR; + } while (true) { // Condition: __begin != __end. @@ -4461,20 +4800,29 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info, // User's variable declaration, initialized by *__begin. BlockScopeRAII InnerScope(Info); ESR = EvaluateStmt(Result, Info, FS->getLoopVarStmt()); - if (ESR != ESR_Succeeded) + if (ESR != ESR_Succeeded) { + if (ESR != ESR_Failed && (!InnerScope.destroy() || !Scope.destroy())) + return ESR_Failed; return ESR; + } // Loop body. ESR = EvaluateLoopBody(Result, Info, FS->getBody()); - if (ESR != ESR_Continue) + if (ESR != ESR_Continue) { + if (ESR != ESR_Failed && (!InnerScope.destroy() || !Scope.destroy())) + return ESR_Failed; return ESR; + } // Increment: ++__begin if (!EvaluateIgnoredValue(Info, FS->getInc())) return ESR_Failed; + + if (!InnerScope.destroy()) + return ESR_Failed; } - return ESR_Succeeded; + return Scope.destroy() ? ESR_Succeeded : ESR_Failed; } case Stmt::SwitchStmtClass: @@ -4649,9 +4997,13 @@ static bool checkDynamicType(EvalInfo &Info, const Expr *E, const LValue &This, /// Check that the pointee of the 'this' pointer in a member function call is /// either within its lifetime or in its period of construction or destruction. -static bool checkNonVirtualMemberCallThisPointer(EvalInfo &Info, const Expr *E, - const LValue &This) { - return checkDynamicType(Info, E, This, AK_MemberCall, false); +static bool +checkNonVirtualMemberCallThisPointer(EvalInfo &Info, const Expr *E, + const LValue &This, + const CXXMethodDecl *NamedMember) { + return checkDynamicType( + Info, E, This, + isa(NamedMember) ? AK_Destroy : AK_MemberCall, false); } struct DynamicType { @@ -4699,16 +5051,19 @@ static Optional ComputeDynamicType(EvalInfo &Info, const Expr *E, ArrayRef Path = This.Designator.Entries; for (unsigned PathLength = This.Designator.MostDerivedPathLength; PathLength <= Path.size(); ++PathLength) { - switch (Info.isEvaluatingConstructor(This.getLValueBase(), - Path.slice(0, PathLength))) { + switch (Info.isEvaluatingCtorDtor(This.getLValueBase(), + Path.slice(0, PathLength))) { case ConstructionPhase::Bases: - // We're constructing a base class. This is not the dynamic type. + case ConstructionPhase::DestroyingBases: + // We're constructing or destroying a base class. This is not the dynamic + // type. break; case ConstructionPhase::None: case ConstructionPhase::AfterBases: - // We've finished constructing the base classes, so this is the dynamic - // type. + case ConstructionPhase::Destroying: + // We've finished constructing the base classes and not yet started + // destroying them again, so this is the dynamic type. return DynamicType{getBaseClassType(This.Designator, PathLength), PathLength}; } @@ -4725,8 +5080,9 @@ static Optional ComputeDynamicType(EvalInfo &Info, const Expr *E, static const CXXMethodDecl *HandleVirtualDispatch( EvalInfo &Info, const Expr *E, LValue &This, const CXXMethodDecl *Found, llvm::SmallVectorImpl &CovariantAdjustmentPath) { - Optional DynType = - ComputeDynamicType(Info, E, This, AK_MemberCall); + Optional DynType = ComputeDynamicType( + Info, E, This, + isa(Found) ? AK_Destroy : AK_MemberCall); if (!DynType) return nullptr; @@ -4862,8 +5218,7 @@ static bool HandleDynamicCast(EvalInfo &Info, const ExplicitCastExpr *E, if (!E->isGLValue()) { // The value of a failed cast to pointer type is the null pointer value // of the required result type. - auto TargetVal = Info.Ctx.getTargetNullPointerValue(E->getType()); - Ptr.setNull(E->getType(), TargetVal); + Ptr.setNull(Info.Ctx, E->getType()); return true; } @@ -4928,39 +5283,6 @@ struct StartLifetimeOfUnionMemberHandler { static const AccessKinds AccessKind = AK_Assign; - APValue getDefaultInitValue(QualType SubobjType) { - if (auto *RD = SubobjType->getAsCXXRecordDecl()) { - if (RD->isUnion()) - return APValue((const FieldDecl*)nullptr); - - APValue Struct(APValue::UninitStruct(), RD->getNumBases(), - std::distance(RD->field_begin(), RD->field_end())); - - unsigned Index = 0; - for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(), - End = RD->bases_end(); I != End; ++I, ++Index) - Struct.getStructBase(Index) = getDefaultInitValue(I->getType()); - - for (const auto *I : RD->fields()) { - if (I->isUnnamedBitfield()) - continue; - Struct.getStructField(I->getFieldIndex()) = - getDefaultInitValue(I->getType()); - } - return Struct; - } - - if (auto *AT = dyn_cast_or_null( - SubobjType->getAsArrayTypeUnsafe())) { - APValue Array(APValue::UninitArray(), 0, AT->getSize().getZExtValue()); - if (Array.hasArrayFiller()) - Array.getArrayFiller() = getDefaultInitValue(AT->getElementType()); - return Array; - } - - return APValue::IndeterminateValue(); - } - typedef bool result_type; bool failed() { return false; } bool found(APValue &Subobj, QualType SubobjType) { @@ -4973,7 +5295,8 @@ struct StartLifetimeOfUnionMemberHandler { // * No variant members' lifetimes begin // * All scalar subobjects whose lifetimes begin have indeterminate values assert(SubobjType->isUnionType()); - if (!declaresSameEntity(Subobj.getUnionField(), Field)) + if (!declaresSameEntity(Subobj.getUnionField(), Field) || + !Subobj.getUnionValue().hasValue()) Subobj.setUnion(Field, getDefaultInitValue(Field->getType())); return true; } @@ -5005,7 +5328,9 @@ static bool HandleUnionActiveMemberChange(EvalInfo &Info, const Expr *LHSExpr, // -- If E is of the form A.B, S(E) contains the elements of S(A)... if (auto *ME = dyn_cast(E)) { auto *FD = dyn_cast(ME->getMemberDecl()); - if (!FD) + // Note that we can't implicitly start the lifetime of a reference, + // so we don't need to proceed any further if we reach one. + if (!FD || FD->getType()->isReferenceType()) break; // ... and also contains A.B if B names a union member @@ -5116,18 +5441,18 @@ static bool EvaluateArgs(ArrayRef Args, ArgVector &ArgValues, } } } - for (ArrayRef::iterator I = Args.begin(), E = Args.end(); - I != E; ++I) { - if (!Evaluate(ArgValues[I - Args.begin()], Info, *I)) { + for (unsigned Idx = 0; Idx < Args.size(); Idx++) { + if (!Evaluate(ArgValues[Idx], Info, Args[Idx])) { // If we're checking for a potential constant expression, evaluate all // initializers even if some of them fail. if (!Info.noteFailure()) return false; Success = false; } else if (!ForbiddenNullArgs.empty() && - ForbiddenNullArgs[I - Args.begin()] && - ArgValues[I - Args.begin()].isNullPointer()) { - Info.CCEDiag(*I, diag::note_non_null_attribute_failed); + ForbiddenNullArgs[Idx] && + ArgValues[Idx].isLValue() && + ArgValues[Idx].isNullPointer()) { + Info.CCEDiag(Args[Idx], diag::note_non_null_attribute_failed); if (!Info.noteFailure()) return false; Success = false; @@ -5166,8 +5491,8 @@ static bool HandleFunctionCall(SourceLocation CallLoc, LValue RHS; RHS.setFrom(Info.Ctx, ArgValues[0]); APValue RHSValue; - if (!handleLValueToRValueConversion(Info, Args[0], Args[0]->getType(), - RHS, RHSValue)) + if (!handleLValueToRValueConversion(Info, Args[0], Args[0]->getType(), RHS, + RHSValue, MD->getParent()->isUnion())) return false; if (Info.getLangOpts().CPlusPlus2a && MD->isTrivial() && !HandleUnionActiveMemberChange(Info, Args[0], *This)) @@ -5230,7 +5555,8 @@ static bool HandleConstructorCall(const Expr *E, const LValue &This, CXXConstructorDecl::init_const_iterator I = Definition->init_begin(); { FullExpressionRAII InitScope(Info); - if (!EvaluateInPlace(Result, Info, This, (*I)->getInit())) + if (!EvaluateInPlace(Result, Info, This, (*I)->getInit()) || + !InitScope.destroy()) return false; } return EvaluateStmt(Ret, Info, Definition->getBody()) != ESR_Failed; @@ -5251,7 +5577,7 @@ static bool HandleConstructorCall(const Expr *E, const LValue &This, RHS.setFrom(Info.Ctx, ArgValues[0]); return handleLValueToRValueConversion( Info, E, Definition->getParamDecl(0)->getType().getNonReferenceType(), - RHS, Result); + RHS, Result, Definition->getParent()->isUnion()); } // Reserve space for the struct members. @@ -5270,6 +5596,25 @@ static bool HandleConstructorCall(const Expr *E, const LValue &This, #ifndef NDEBUG CXXRecordDecl::base_class_const_iterator BaseIt = RD->bases_begin(); #endif + CXXRecordDecl::field_iterator FieldIt = RD->field_begin(); + auto SkipToField = [&](FieldDecl *FD, bool Indirect) { + // We might be initializing the same field again if this is an indirect + // field initialization. + if (FieldIt == RD->field_end() || + FieldIt->getFieldIndex() > FD->getFieldIndex()) { + assert(Indirect && "fields out of order?"); + return; + } + + // Default-initialize any fields with no explicit initializer. + for (; !declaresSameEntity(*FieldIt, FD); ++FieldIt) { + assert(FieldIt != RD->field_end() && "missing field?"); + if (!FieldIt->isUnnamedBitfield()) + Result.getStructField(FieldIt->getFieldIndex()) = + getDefaultInitValue(FieldIt->getType()); + } + ++FieldIt; + }; for (const auto *I : Definition->inits()) { LValue Subobject = This; LValue SubobjectParent = This; @@ -5298,6 +5643,7 @@ static bool HandleConstructorCall(const Expr *E, const LValue &This, Result = APValue(FD); Value = &Result.getUnionValue(); } else { + SkipToField(FD, false); Value = &Result.getStructField(FD->getFieldIndex()); } } else if (IndirectFieldDecl *IFD = I->getIndirectMember()) { @@ -5317,8 +5663,10 @@ static bool HandleConstructorCall(const Expr *E, const LValue &This, if (CD->isUnion()) *Value = APValue(FD); else - *Value = APValue(APValue::UninitStruct(), CD->getNumBases(), - std::distance(CD->field_begin(), CD->field_end())); + // FIXME: This immediately starts the lifetime of all members of an + // anonymous struct. It would be preferable to strictly start member + // lifetime in initialization order. + *Value = getDefaultInitValue(Info.Ctx.getRecordType(CD)); } // Store Subobject as its parent before updating it for the last element // in the chain. @@ -5328,8 +5676,11 @@ static bool HandleConstructorCall(const Expr *E, const LValue &This, return false; if (CD->isUnion()) Value = &Value->getUnionValue(); - else + else { + if (C == IndirectFieldChain.front() && !RD->isUnion()) + SkipToField(FD, true); Value = &Value->getStructField(FD->getFieldIndex()); + } } } else { llvm_unreachable("unknown base initializer kind"); @@ -5358,8 +5709,18 @@ static bool HandleConstructorCall(const Expr *E, const LValue &This, EvalObj.finishedConstructingBases(); } + // Default-initialize any remaining fields. + if (!RD->isUnion()) { + for (; FieldIt != RD->field_end(); ++FieldIt) { + if (!FieldIt->isUnnamedBitfield()) + Result.getStructField(FieldIt->getFieldIndex()) = + getDefaultInitValue(FieldIt->getType()); + } + } + return Success && - EvaluateStmt(Ret, Info, Definition->getBody()) != ESR_Failed; + EvaluateStmt(Ret, Info, Definition->getBody()) != ESR_Failed && + LifetimeExtendedScope.destroy(); } static bool HandleConstructorCall(const Expr *E, const LValue &This, @@ -5374,6 +5735,381 @@ static bool HandleConstructorCall(const Expr *E, const LValue &This, Info, Result); } +static bool HandleDestructionImpl(EvalInfo &Info, SourceLocation CallLoc, + const LValue &This, APValue &Value, + QualType T) { + // Objects can only be destroyed while they're within their lifetimes. + // FIXME: We have no representation for whether an object of type nullptr_t + // is in its lifetime; it usually doesn't matter. Perhaps we should model it + // as indeterminate instead? + if (Value.isAbsent() && !T->isNullPtrType()) { + APValue Printable; + This.moveInto(Printable); + Info.FFDiag(CallLoc, diag::note_constexpr_destroy_out_of_lifetime) + << Printable.getAsString(Info.Ctx, Info.Ctx.getLValueReferenceType(T)); + return false; + } + + // Invent an expression for location purposes. + // FIXME: We shouldn't need to do this. + OpaqueValueExpr LocE(CallLoc, Info.Ctx.IntTy, VK_RValue); + + // For arrays, destroy elements right-to-left. + if (const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(T)) { + uint64_t Size = CAT->getSize().getZExtValue(); + QualType ElemT = CAT->getElementType(); + + LValue ElemLV = This; + ElemLV.addArray(Info, &LocE, CAT); + if (!HandleLValueArrayAdjustment(Info, &LocE, ElemLV, ElemT, Size)) + return false; + + // Ensure that we have actual array elements available to destroy; the + // destructors might mutate the value, so we can't run them on the array + // filler. + if (Size && Size > Value.getArrayInitializedElts()) + expandArray(Value, Value.getArraySize() - 1); + + for (; Size != 0; --Size) { + APValue &Elem = Value.getArrayInitializedElt(Size - 1); + if (!HandleLValueArrayAdjustment(Info, &LocE, ElemLV, ElemT, -1) || + !HandleDestructionImpl(Info, CallLoc, ElemLV, Elem, ElemT)) + return false; + } + + // End the lifetime of this array now. + Value = APValue(); + return true; + } + + const CXXRecordDecl *RD = T->getAsCXXRecordDecl(); + if (!RD) { + if (T.isDestructedType()) { + Info.FFDiag(CallLoc, diag::note_constexpr_unsupported_destruction) << T; + return false; + } + + Value = APValue(); + return true; + } + + if (RD->getNumVBases()) { + Info.FFDiag(CallLoc, diag::note_constexpr_virtual_base) << RD; + return false; + } + + const CXXDestructorDecl *DD = RD->getDestructor(); + if (!DD && !RD->hasTrivialDestructor()) { + Info.FFDiag(CallLoc); + return false; + } + + if (!DD || DD->isTrivial() || + (RD->isAnonymousStructOrUnion() && RD->isUnion())) { + // A trivial destructor just ends the lifetime of the object. Check for + // this case before checking for a body, because we might not bother + // building a body for a trivial destructor. Note that it doesn't matter + // whether the destructor is constexpr in this case; all trivial + // destructors are constexpr. + // + // If an anonymous union would be destroyed, some enclosing destructor must + // have been explicitly defined, and the anonymous union destruction should + // have no effect. + Value = APValue(); + return true; + } + + if (!Info.CheckCallLimit(CallLoc)) + return false; + + const FunctionDecl *Definition = nullptr; + const Stmt *Body = DD->getBody(Definition); + + if (!CheckConstexprFunction(Info, CallLoc, DD, Definition, Body)) + return false; + + CallStackFrame Frame(Info, CallLoc, Definition, &This, nullptr); + + // We're now in the period of destruction of this object. + unsigned BasesLeft = RD->getNumBases(); + EvalInfo::EvaluatingDestructorRAII EvalObj( + Info, + ObjectUnderConstruction{This.getLValueBase(), This.Designator.Entries}); + if (!EvalObj.DidInsert) { + // C++2a [class.dtor]p19: + // the behavior is undefined if the destructor is invoked for an object + // whose lifetime has ended + // (Note that formally the lifetime ends when the period of destruction + // begins, even though certain uses of the object remain valid until the + // period of destruction ends.) + Info.FFDiag(CallLoc, diag::note_constexpr_double_destroy); + return false; + } + + // FIXME: Creating an APValue just to hold a nonexistent return value is + // wasteful. + APValue RetVal; + StmtResult Ret = {RetVal, nullptr}; + if (EvaluateStmt(Ret, Info, Definition->getBody()) == ESR_Failed) + return false; + + // A union destructor does not implicitly destroy its members. + if (RD->isUnion()) + return true; + + const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD); + + // We don't have a good way to iterate fields in reverse, so collect all the + // fields first and then walk them backwards. + SmallVector Fields(RD->field_begin(), RD->field_end()); + for (const FieldDecl *FD : llvm::reverse(Fields)) { + if (FD->isUnnamedBitfield()) + continue; + + LValue Subobject = This; + if (!HandleLValueMember(Info, &LocE, Subobject, FD, &Layout)) + return false; + + APValue *SubobjectValue = &Value.getStructField(FD->getFieldIndex()); + if (!HandleDestructionImpl(Info, CallLoc, Subobject, *SubobjectValue, + FD->getType())) + return false; + } + + if (BasesLeft != 0) + EvalObj.startedDestroyingBases(); + + // Destroy base classes in reverse order. + for (const CXXBaseSpecifier &Base : llvm::reverse(RD->bases())) { + --BasesLeft; + + QualType BaseType = Base.getType(); + LValue Subobject = This; + if (!HandleLValueDirectBase(Info, &LocE, Subobject, RD, + BaseType->getAsCXXRecordDecl(), &Layout)) + return false; + + APValue *SubobjectValue = &Value.getStructBase(BasesLeft); + if (!HandleDestructionImpl(Info, CallLoc, Subobject, *SubobjectValue, + BaseType)) + return false; + } + assert(BasesLeft == 0 && "NumBases was wrong?"); + + // The period of destruction ends now. The object is gone. + Value = APValue(); + return true; +} + +namespace { +struct DestroyObjectHandler { + EvalInfo &Info; + const Expr *E; + const LValue &This; + const AccessKinds AccessKind; + + typedef bool result_type; + bool failed() { return false; } + bool found(APValue &Subobj, QualType SubobjType) { + return HandleDestructionImpl(Info, E->getExprLoc(), This, Subobj, + SubobjType); + } + bool found(APSInt &Value, QualType SubobjType) { + Info.FFDiag(E, diag::note_constexpr_destroy_complex_elem); + return false; + } + bool found(APFloat &Value, QualType SubobjType) { + Info.FFDiag(E, diag::note_constexpr_destroy_complex_elem); + return false; + } +}; +} + +/// Perform a destructor or pseudo-destructor call on the given object, which +/// might in general not be a complete object. +static bool HandleDestruction(EvalInfo &Info, const Expr *E, + const LValue &This, QualType ThisType) { + CompleteObject Obj = findCompleteObject(Info, E, AK_Destroy, This, ThisType); + DestroyObjectHandler Handler = {Info, E, This, AK_Destroy}; + return Obj && findSubobject(Info, E, Obj, This.Designator, Handler); +} + +/// Destroy and end the lifetime of the given complete object. +static bool HandleDestruction(EvalInfo &Info, SourceLocation Loc, + APValue::LValueBase LVBase, APValue &Value, + QualType T) { + // If we've had an unmodeled side-effect, we can't rely on mutable state + // (such as the object we're about to destroy) being correct. + if (Info.EvalStatus.HasSideEffects) + return false; + + LValue LV; + LV.set({LVBase}); + return HandleDestructionImpl(Info, Loc, LV, Value, T); +} + +/// Perform a call to 'perator new' or to `__builtin_operator_new'. +static bool HandleOperatorNewCall(EvalInfo &Info, const CallExpr *E, + LValue &Result) { + if (Info.checkingPotentialConstantExpression() || + Info.SpeculativeEvaluationDepth) + return false; + + // This is permitted only within a call to std::allocator::allocate. + auto Caller = Info.getStdAllocatorCaller("allocate"); + if (!Caller) { + Info.FFDiag(E->getExprLoc(), Info.getLangOpts().CPlusPlus2a + ? diag::note_constexpr_new_untyped + : diag::note_constexpr_new); + return false; + } + + QualType ElemType = Caller.ElemType; + if (ElemType->isIncompleteType() || ElemType->isFunctionType()) { + Info.FFDiag(E->getExprLoc(), + diag::note_constexpr_new_not_complete_object_type) + << (ElemType->isIncompleteType() ? 0 : 1) << ElemType; + return false; + } + + APSInt ByteSize; + if (!EvaluateInteger(E->getArg(0), ByteSize, Info)) + return false; + bool IsNothrow = false; + for (unsigned I = 1, N = E->getNumArgs(); I != N; ++I) { + EvaluateIgnoredValue(Info, E->getArg(I)); + IsNothrow |= E->getType()->isNothrowT(); + } + + CharUnits ElemSize; + if (!HandleSizeof(Info, E->getExprLoc(), ElemType, ElemSize)) + return false; + APInt Size, Remainder; + APInt ElemSizeAP(ByteSize.getBitWidth(), ElemSize.getQuantity()); + APInt::udivrem(ByteSize, ElemSizeAP, Size, Remainder); + if (Remainder != 0) { + // This likely indicates a bug in the implementation of 'std::allocator'. + Info.FFDiag(E->getExprLoc(), diag::note_constexpr_operator_new_bad_size) + << ByteSize << APSInt(ElemSizeAP, true) << ElemType; + return false; + } + + if (ByteSize.getActiveBits() > ConstantArrayType::getMaxSizeBits(Info.Ctx)) { + if (IsNothrow) { + Result.setNull(Info.Ctx, E->getType()); + return true; + } + + Info.FFDiag(E, diag::note_constexpr_new_too_large) << APSInt(Size, true); + return false; + } + + QualType AllocType = Info.Ctx.getConstantArrayType(ElemType, Size, nullptr, + ArrayType::Normal, 0); + APValue *Val = Info.createHeapAlloc(E, AllocType, Result); + *Val = APValue(APValue::UninitArray(), 0, Size.getZExtValue()); + Result.addArray(Info, E, cast(AllocType)); + return true; +} + +static bool hasVirtualDestructor(QualType T) { + if (CXXRecordDecl *RD = T->getAsCXXRecordDecl()) + if (CXXDestructorDecl *DD = RD->getDestructor()) + return DD->isVirtual(); + return false; +} + +static const FunctionDecl *getVirtualOperatorDelete(QualType T) { + if (CXXRecordDecl *RD = T->getAsCXXRecordDecl()) + if (CXXDestructorDecl *DD = RD->getDestructor()) + return DD->isVirtual() ? DD->getOperatorDelete() : nullptr; + return nullptr; +} + +/// Check that the given object is a suitable pointer to a heap allocation that +/// still exists and is of the right kind for the purpose of a deletion. +/// +/// On success, returns the heap allocation to deallocate. On failure, produces +/// a diagnostic and returns None. +static Optional CheckDeleteKind(EvalInfo &Info, const Expr *E, + const LValue &Pointer, + DynAlloc::Kind DeallocKind) { + auto PointerAsString = [&] { + return Pointer.toString(Info.Ctx, Info.Ctx.VoidPtrTy); + }; + + DynamicAllocLValue DA = Pointer.Base.dyn_cast(); + if (!DA) { + Info.FFDiag(E, diag::note_constexpr_delete_not_heap_alloc) + << PointerAsString(); + if (Pointer.Base) + NoteLValueLocation(Info, Pointer.Base); + return None; + } + + Optional Alloc = Info.lookupDynamicAlloc(DA); + if (!Alloc) { + Info.FFDiag(E, diag::note_constexpr_double_delete); + return None; + } + + QualType AllocType = Pointer.Base.getDynamicAllocType(); + if (DeallocKind != (*Alloc)->getKind()) { + Info.FFDiag(E, diag::note_constexpr_new_delete_mismatch) + << DeallocKind << (*Alloc)->getKind() << AllocType; + NoteLValueLocation(Info, Pointer.Base); + return None; + } + + bool Subobject = false; + if (DeallocKind == DynAlloc::New) { + Subobject = Pointer.Designator.MostDerivedPathLength != 0 || + Pointer.Designator.isOnePastTheEnd(); + } else { + Subobject = Pointer.Designator.Entries.size() != 1 || + Pointer.Designator.Entries[0].getAsArrayIndex() != 0; + } + if (Subobject) { + Info.FFDiag(E, diag::note_constexpr_delete_subobject) + << PointerAsString() << Pointer.Designator.isOnePastTheEnd(); + return None; + } + + return Alloc; +} + +// Perform a call to 'operator delete' or '__builtin_operator_delete'. +bool HandleOperatorDeleteCall(EvalInfo &Info, const CallExpr *E) { + if (Info.checkingPotentialConstantExpression() || + Info.SpeculativeEvaluationDepth) + return false; + + // This is permitted only within a call to std::allocator::deallocate. + if (!Info.getStdAllocatorCaller("deallocate")) { + Info.FFDiag(E->getExprLoc()); + return true; + } + + LValue Pointer; + if (!EvaluatePointer(E->getArg(0), Pointer, Info)) + return false; + for (unsigned I = 1, N = E->getNumArgs(); I != N; ++I) + EvaluateIgnoredValue(Info, E->getArg(I)); + + if (Pointer.Designator.Invalid) + return false; + + // Deleting a null pointer has no effect. + if (Pointer.isNullPointer()) + return true; + + if (!CheckDeleteKind(Info, E, Pointer, DynAlloc::StdAllocator)) + return false; + + Info.HeapAllocs.erase(Pointer.Base.get()); + return true; +} + //===----------------------------------------------------------------------===// // Generic Evaluation //===----------------------------------------------------------------------===// @@ -5706,9 +6442,8 @@ class BufferToAPValueConverter { QualType RepresentationType = Ty->getDecl()->getIntegerType(); assert(!RepresentationType.isNull() && "enum forward decl should be caught by Sema"); - const BuiltinType *AsBuiltin = - RepresentationType.getCanonicalType()->getAs(); - assert(AsBuiltin && "non-integral enum underlying type?"); + const auto *AsBuiltin = + RepresentationType.getCanonicalType()->castAs(); // Recurse into the underlying type. Treat std::byte transparently as // unsigned char. return visit(AsBuiltin, Offset, /*EnumTy=*/Ty); @@ -5752,7 +6487,7 @@ class BufferToAPValueConverter { #define NON_CANONICAL_UNLESS_DEPENDENT(Class, Base) \ case Type::Class: \ llvm_unreachable("either dependent or not canonical!"); -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" } llvm_unreachable("Unhandled Type::TypeClass"); } @@ -5843,9 +6578,9 @@ static bool handleLValueToRValueBitCast(EvalInfo &Info, APValue &DestValue, LValue SourceLValue; APValue SourceRValue; SourceLValue.setFrom(Info.Ctx, SourceValue); - if (!handleLValueToRValueConversion(Info, BCE, - BCE->getSubExpr()->getType().withConst(), - SourceLValue, SourceRValue)) + if (!handleLValueToRValueConversion( + Info, BCE, BCE->getSubExpr()->getType().withConst(), SourceLValue, + SourceRValue, /*WantObjectRepresentation=*/true)) return false; // Read out SourceValue into a char buffer. @@ -5984,10 +6719,16 @@ public: return StmtVisitorTy::Visit(E->getExpr()); } - // We cannot create any objects for which cleanups are required, so there is - // nothing to do here; all cleanups must come from unevaluated subexpressions. - bool VisitExprWithCleanups(const ExprWithCleanups *E) - { return StmtVisitorTy::Visit(E->getSubExpr()); } + bool VisitExprWithCleanups(const ExprWithCleanups *E) { + FullExpressionRAII Scope(Info); + return StmtVisitorTy::Visit(E->getSubExpr()) && Scope.destroy(); + } + + // Temporaries are registered when created, so we don't care about + // CXXBindTemporaryExpr. + bool VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *E) { + return StmtVisitorTy::Visit(E->getSubExpr()); + } bool VisitCXXReinterpretCastExpr(const CXXReinterpretCastExpr *E) { CCEDiag(E, diag::note_constexpr_invalid_cast) << 0; @@ -6024,10 +6765,18 @@ public: } } + bool VisitCXXRewrittenBinaryOperator(const CXXRewrittenBinaryOperator *E) { + return StmtVisitorTy::Visit(E->getSemanticForm()); + } + bool VisitBinaryConditionalOperator(const BinaryConditionalOperator *E) { // Evaluate and cache the common expression. We treat it as a temporary, // even though it's not quite the same thing. - if (!Evaluate(Info.CurrentCall->createTemporary(E->getOpaqueValue(), false), + LValue CommonLV; + if (!Evaluate(Info.CurrentCall->createTemporary( + E->getOpaqueValue(), + getStorageType(Info.Ctx, E->getOpaqueValue()), false, + CommonLV), Info, E->getCommon())) return false; @@ -6047,6 +6796,8 @@ public: // Always assume __builtin_constant_p(...) ? ... : ... is a potential // constant expression; we can't check whether it's potentially foldable. + // FIXME: We should instead treat __builtin_constant_p as non-constant if + // it would return 'false' in this mode. if (Info.checkingPotentialConstantExpression() && IsBcpCall) return false; @@ -6104,11 +6855,21 @@ public: HasQualifier = ME->hasQualifier(); } else if (const BinaryOperator *BE = dyn_cast(Callee)) { // Indirect bound member calls ('.*' or '->*'). - Member = dyn_cast_or_null( - HandleMemberPointerAccess(Info, BE, ThisVal, false)); + const ValueDecl *D = + HandleMemberPointerAccess(Info, BE, ThisVal, false); + if (!D) + return false; + Member = dyn_cast(D); if (!Member) return Error(Callee); This = &ThisVal; + } else if (const auto *PDE = dyn_cast(Callee)) { + if (!Info.getLangOpts().CPlusPlus2a) + Info.CCEDiag(PDE, diag::note_constexpr_pseudo_destructor); + // FIXME: If pseudo-destructor calls ever start ending the lifetime of + // their callee, we should start calling HandleDestruction here. + // For now, we just evaluate the object argument and discard it. + return EvaluateObjectArgument(Info, PDE->getBase(), ThisVal); } else return Error(Callee); FD = Member; @@ -6177,6 +6938,17 @@ public: FD = cast(CorrespondingCallOpSpecialization); } else FD = LambdaCallOp; + } else if (FD->isReplaceableGlobalAllocationFunction()) { + if (FD->getDeclName().getCXXOverloadedOperator() == OO_New || + FD->getDeclName().getCXXOverloadedOperator() == OO_Array_New) { + LValue Ptr; + if (!HandleOperatorNewCall(Info, E, Ptr)) + return false; + Ptr.moveInto(Result); + return true; + } else { + return HandleOperatorDeleteCall(Info, E); + } } } else return Error(E); @@ -6192,11 +6964,20 @@ public: return false; } else { // Check that the 'this' pointer points to an object of the right type. - if (!checkNonVirtualMemberCallThisPointer(Info, E, *This)) + // FIXME: If this is an assignment operator call, we may need to change + // the active union member before we check this. + if (!checkNonVirtualMemberCallThisPointer(Info, E, *This, NamedMember)) return false; } } + // Destructor calls are different enough that they have their own codepath. + if (auto *DD = dyn_cast(FD)) { + assert(This && "no 'this' pointer for destructor call"); + return HandleDestruction(Info, E, *This, + Info.Ctx.getRecordType(DD->getParent())); + } + const FunctionDecl *Definition = nullptr; Stmt *Body = FD->getBody(Definition); @@ -6329,14 +7110,14 @@ public: bool VisitStmtExpr(const StmtExpr *E) { // We will have checked the full-expressions inside the statement expression // when they were completed, and don't need to check them again now. - if (Info.checkingForOverflow()) + if (Info.checkingForUndefinedBehavior()) return Error(E); - BlockScopeRAII Scope(Info); const CompoundStmt *CS = E->getSubStmt(); if (CS->body_empty()) return true; + BlockScopeRAII Scope(Info); for (CompoundStmt::const_body_iterator BI = CS->body_begin(), BE = CS->body_end(); /**/; ++BI) { @@ -6347,7 +7128,7 @@ public: diag::note_constexpr_stmt_expr_unsupported); return false; } - return this->Visit(FinalExpr); + return this->Visit(FinalExpr) && Scope.destroy(); } APValue ReturnValue; @@ -6440,7 +7221,7 @@ public: const ValueDecl *MD = E->getMemberDecl(); if (const FieldDecl *FD = dyn_cast(E->getMemberDecl())) { - assert(BaseTy->getAs()->getDecl()->getCanonicalDecl() == + assert(BaseTy->castAs()->getDecl()->getCanonicalDecl() == FD->getParent()->getCanonicalDecl() && "record / field mismatch"); (void)BaseTy; if (!HandleLValueMember(this->Info, E, Result, FD)) @@ -6696,16 +7477,14 @@ bool LValueExprEvaluator::VisitMaterializeTemporaryExpr( *Value = APValue(); Result.set(E); } else { - Value = &createTemporary(E, E->getStorageDuration() == SD_Automatic, Result, - *Info.CurrentCall); + Value = &Info.CurrentCall->createTemporary( + E, E->getType(), E->getStorageDuration() == SD_Automatic, Result); } QualType Type = Inner->getType(); // Materialize the temporary itself. - if (!EvaluateInPlace(*Value, Info, Result, Inner) || - (E->getStorageDuration() == SD_Static && - !CheckConstantExpression(Info, E->getExprLoc(), Type, *Value))) { + if (!EvaluateInPlace(*Value, Info, Result, Inner)) { *Value = APValue(); return false; } @@ -7035,8 +7814,7 @@ public: return true; } bool ZeroInitialization(const Expr *E) { - auto TargetVal = Info.Ctx.getTargetNullPointerValue(E->getType()); - Result.setNull(E->getType(), TargetVal); + Result.setNull(Info.Ctx, E->getType()); return true; } @@ -7097,6 +7875,8 @@ public: return true; } + bool VisitCXXNewExpr(const CXXNewExpr *E); + bool VisitSourceLocExpr(const SourceLocExpr *E) { assert(E->isStringType() && "SourceLocExpr isn't a pointer type?"); APValue LValResult = E->EvaluateInContext( @@ -7161,12 +7941,22 @@ bool PointerExprEvaluator::VisitCastExpr(const CastExpr *E) { // permitted in constant expressions in C++11. Bitcasts from cv void* are // also static_casts, but we disallow them as a resolution to DR1312. if (!E->getType()->isVoidPointerType()) { - Result.Designator.setInvalid(); - if (SubExpr->getType()->isVoidPointerType()) - CCEDiag(E, diag::note_constexpr_invalid_cast) - << 3 << SubExpr->getType(); - else - CCEDiag(E, diag::note_constexpr_invalid_cast) << 2; + if (!Result.InvalidBase && !Result.Designator.Invalid && + !Result.IsNullPtr && + Info.Ctx.hasSameUnqualifiedType(Result.Designator.getType(Info.Ctx), + E->getType()->getPointeeType()) && + Info.getStdAllocatorCaller("allocate")) { + // Inside a call to std::allocator::allocate and friends, we permit + // casting from void* back to cv1 T* for a pointer that points to a + // cv2 T. + } else { + Result.Designator.setInvalid(); + if (SubExpr->getType()->isVoidPointerType()) + CCEDiag(E, diag::note_constexpr_invalid_cast) + << 3 << SubExpr->getType(); + else + CCEDiag(E, diag::note_constexpr_invalid_cast) << 2; + } } if (E->getCastKind() == CK_AddressSpaceConversion && Result.IsNullPtr) ZeroInitialization(E); @@ -7229,8 +8019,8 @@ bool PointerExprEvaluator::VisitCastExpr(const CastExpr *E) { if (!evaluateLValue(SubExpr, Result)) return false; } else { - APValue &Value = createTemporary(SubExpr, false, Result, - *Info.CurrentCall); + APValue &Value = Info.CurrentCall->createTemporary( + SubExpr, SubExpr->getType(), false, Result); if (!EvaluateInPlace(Value, Info, Result, SubExpr)) return false; } @@ -7403,6 +8193,8 @@ bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return true; } + case Builtin::BI__builtin_operator_new: + return HandleOperatorNewCall(Info, E, Result); case Builtin::BI__builtin_launder: return evaluatePointer(E->getArg(0), Result); case Builtin::BIstrchr: @@ -7638,6 +8430,8 @@ bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, while (true) { APValue Val; + // FIXME: Set WantObjectRepresentation to true if we're copying a + // char-like type? if (!handleLValueToRValueConversion(Info, E, T, Src, Val) || !handleAssignment(Info, E, Dest, T, Val)) return false; @@ -7652,10 +8446,208 @@ bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, } default: - return visitNonBuiltinCallExpr(E); + break; } + + return visitNonBuiltinCallExpr(E); } +static bool EvaluateArrayNewInitList(EvalInfo &Info, LValue &This, + APValue &Result, const InitListExpr *ILE, + QualType AllocType); + +bool PointerExprEvaluator::VisitCXXNewExpr(const CXXNewExpr *E) { + if (!Info.getLangOpts().CPlusPlus2a) + Info.CCEDiag(E, diag::note_constexpr_new); + + // We cannot speculatively evaluate a delete expression. + if (Info.SpeculativeEvaluationDepth) + return false; + + FunctionDecl *OperatorNew = E->getOperatorNew(); + + bool IsNothrow = false; + bool IsPlacement = false; + if (OperatorNew->isReservedGlobalPlacementOperator() && + Info.CurrentCall->isStdFunction() && !E->isArray()) { + // FIXME Support array placement new. + assert(E->getNumPlacementArgs() == 1); + if (!EvaluatePointer(E->getPlacementArg(0), Result, Info)) + return false; + if (Result.Designator.Invalid) + return false; + IsPlacement = true; + } else if (!OperatorNew->isReplaceableGlobalAllocationFunction()) { + Info.FFDiag(E, diag::note_constexpr_new_non_replaceable) + << isa(OperatorNew) << OperatorNew; + return false; + } else if (E->getNumPlacementArgs()) { + // The only new-placement list we support is of the form (std::nothrow). + // + // FIXME: There is no restriction on this, but it's not clear that any + // other form makes any sense. We get here for cases such as: + // + // new (std::align_val_t{N}) X(int) + // + // (which should presumably be valid only if N is a multiple of + // alignof(int), and in any case can't be deallocated unless N is + // alignof(X) and X has new-extended alignment). + if (E->getNumPlacementArgs() != 1 || + !E->getPlacementArg(0)->getType()->isNothrowT()) + return Error(E, diag::note_constexpr_new_placement); + + LValue Nothrow; + if (!EvaluateLValue(E->getPlacementArg(0), Nothrow, Info)) + return false; + IsNothrow = true; + } + + const Expr *Init = E->getInitializer(); + const InitListExpr *ResizedArrayILE = nullptr; + + QualType AllocType = E->getAllocatedType(); + if (Optional ArraySize = E->getArraySize()) { + const Expr *Stripped = *ArraySize; + for (; auto *ICE = dyn_cast(Stripped); + Stripped = ICE->getSubExpr()) + if (ICE->getCastKind() != CK_NoOp && + ICE->getCastKind() != CK_IntegralCast) + break; + + llvm::APSInt ArrayBound; + if (!EvaluateInteger(Stripped, ArrayBound, Info)) + return false; + + // C++ [expr.new]p9: + // The expression is erroneous if: + // -- [...] its value before converting to size_t [or] applying the + // second standard conversion sequence is less than zero + if (ArrayBound.isSigned() && ArrayBound.isNegative()) { + if (IsNothrow) + return ZeroInitialization(E); + + Info.FFDiag(*ArraySize, diag::note_constexpr_new_negative) + << ArrayBound << (*ArraySize)->getSourceRange(); + return false; + } + + // -- its value is such that the size of the allocated object would + // exceed the implementation-defined limit + if (ConstantArrayType::getNumAddressingBits(Info.Ctx, AllocType, + ArrayBound) > + ConstantArrayType::getMaxSizeBits(Info.Ctx)) { + if (IsNothrow) + return ZeroInitialization(E); + + Info.FFDiag(*ArraySize, diag::note_constexpr_new_too_large) + << ArrayBound << (*ArraySize)->getSourceRange(); + return false; + } + + // -- the new-initializer is a braced-init-list and the number of + // array elements for which initializers are provided [...] + // exceeds the number of elements to initialize + if (Init) { + auto *CAT = Info.Ctx.getAsConstantArrayType(Init->getType()); + assert(CAT && "unexpected type for array initializer"); + + unsigned Bits = + std::max(CAT->getSize().getBitWidth(), ArrayBound.getBitWidth()); + llvm::APInt InitBound = CAT->getSize().zextOrSelf(Bits); + llvm::APInt AllocBound = ArrayBound.zextOrSelf(Bits); + if (InitBound.ugt(AllocBound)) { + if (IsNothrow) + return ZeroInitialization(E); + + Info.FFDiag(*ArraySize, diag::note_constexpr_new_too_small) + << AllocBound.toString(10, /*Signed=*/false) + << InitBound.toString(10, /*Signed=*/false) + << (*ArraySize)->getSourceRange(); + return false; + } + + // If the sizes differ, we must have an initializer list, and we need + // special handling for this case when we initialize. + if (InitBound != AllocBound) + ResizedArrayILE = cast(Init); + } + + AllocType = Info.Ctx.getConstantArrayType(AllocType, ArrayBound, nullptr, + ArrayType::Normal, 0); + } else { + assert(!AllocType->isArrayType() && + "array allocation with non-array new"); + } + + APValue *Val; + if (IsPlacement) { + AccessKinds AK = AK_Construct; + struct FindObjectHandler { + EvalInfo &Info; + const Expr *E; + QualType AllocType; + const AccessKinds AccessKind; + APValue *Value; + + typedef bool result_type; + bool failed() { return false; } + bool found(APValue &Subobj, QualType SubobjType) { + // FIXME: Reject the cases where [basic.life]p8 would not permit the + // old name of the object to be used to name the new object. + if (!Info.Ctx.hasSameUnqualifiedType(SubobjType, AllocType)) { + Info.FFDiag(E, diag::note_constexpr_placement_new_wrong_type) << + SubobjType << AllocType; + return false; + } + Value = &Subobj; + return true; + } + bool found(APSInt &Value, QualType SubobjType) { + Info.FFDiag(E, diag::note_constexpr_construct_complex_elem); + return false; + } + bool found(APFloat &Value, QualType SubobjType) { + Info.FFDiag(E, diag::note_constexpr_construct_complex_elem); + return false; + } + } Handler = {Info, E, AllocType, AK, nullptr}; + + CompleteObject Obj = findCompleteObject(Info, E, AK, Result, AllocType); + if (!Obj || !findSubobject(Info, E, Obj, Result.Designator, Handler)) + return false; + + Val = Handler.Value; + + // [basic.life]p1: + // The lifetime of an object o of type T ends when [...] the storage + // which the object occupies is [...] reused by an object that is not + // nested within o (6.6.2). + *Val = APValue(); + } else { + // Perform the allocation and obtain a pointer to the resulting object. + Val = Info.createHeapAlloc(E, AllocType, Result); + if (!Val) + return false; + } + + if (ResizedArrayILE) { + if (!EvaluateArrayNewInitList(Info, Result, *Val, ResizedArrayILE, + AllocType)) + return false; + } else if (Init) { + if (!EvaluateInPlace(*Val, Info, Result, Init)) + return false; + } else { + *Val = getDefaultInitValue(AllocType); + } + + // Array new returns a pointer to the first element, not a pointer to the + // array. + if (auto *AT = AllocType->getAsArrayTypeUnsafe()) + Result.addArray(Info, E, cast(AT)); + + return true; +} //===----------------------------------------------------------------------===// // Member Pointer Evaluation //===----------------------------------------------------------------------===// @@ -7779,7 +8771,6 @@ namespace { bool VisitCXXInheritedCtorInitExpr(const CXXInheritedCtorInitExpr *E); bool VisitCXXConstructExpr(const CXXConstructExpr *E, QualType T); bool VisitCXXStdInitializerListExpr(const CXXStdInitializerListExpr *E); - bool VisitBinCmp(const BinaryOperator *E); }; } @@ -8013,15 +9004,11 @@ bool RecordExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E, if (Result.hasValue()) return true; - // We can get here in two different ways: - // 1) We're performing value-initialization, and should zero-initialize - // the object, or - // 2) We're performing default-initialization of an object with a trivial - // constexpr default constructor, in which case we should start the - // lifetimes of all the base subobjects (there can be no data member - // subobjects in this case) per [basic.life]p1. - // Either way, ZeroInitialization is appropriate. - return ZeroInitialization(E, T); + if (ZeroInit) + return ZeroInitialization(E, T); + + Result = getDefaultInitValue(T); + return true; } const FunctionDecl *Definition = nullptr; @@ -8121,9 +9108,8 @@ bool RecordExprEvaluator::VisitCXXStdInitializerListExpr( bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) { const CXXRecordDecl *ClosureClass = E->getLambdaClass(); - if (ClosureClass->isInvalidDecl()) return false; - - if (Info.checkingPotentialConstantExpression()) return true; + if (ClosureClass->isInvalidDecl()) + return false; const size_t NumFields = std::distance(ClosureClass->field_begin(), ClosureClass->field_end()); @@ -8183,7 +9169,8 @@ public: /// Visit an expression which constructs the value of this temporary. bool VisitConstructExpr(const Expr *E) { - APValue &Value = createTemporary(E, false, Result, *Info.CurrentCall); + APValue &Value = + Info.CurrentCall->createTemporary(E, E->getType(), false, Result); return EvaluateInPlace(Value, Info, Result, E); } @@ -8383,7 +9370,7 @@ VectorExprEvaluator::VisitInitListExpr(const InitListExpr *E) { bool VectorExprEvaluator::ZeroInitialization(const Expr *E) { - const VectorType *VT = E->getType()->getAs(); + const auto *VT = E->getType()->castAs(); QualType EltTy = VT->getElementType(); APValue ZeroElement; if (EltTy->isIntegerType()) @@ -8441,14 +9428,16 @@ namespace { bool VisitCallExpr(const CallExpr *E) { return handleCallExpr(E, Result, &This); } - bool VisitInitListExpr(const InitListExpr *E); + bool VisitInitListExpr(const InitListExpr *E, + QualType AllocType = QualType()); bool VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E); bool VisitCXXConstructExpr(const CXXConstructExpr *E); bool VisitCXXConstructExpr(const CXXConstructExpr *E, const LValue &Subobject, APValue *Value, QualType Type); - bool VisitStringLiteral(const StringLiteral *E) { - expandStringLiteral(Info, E, Result); + bool VisitStringLiteral(const StringLiteral *E, + QualType AllocType = QualType()) { + expandStringLiteral(Info, E, Result, AllocType); return true; } }; @@ -8460,6 +9449,15 @@ static bool EvaluateArray(const Expr *E, const LValue &This, return ArrayExprEvaluator(Info, This, Result).Visit(E); } +static bool EvaluateArrayNewInitList(EvalInfo &Info, LValue &This, + APValue &Result, const InitListExpr *ILE, + QualType AllocType) { + assert(ILE->isRValue() && ILE->getType()->isArrayType() && + "not an array rvalue"); + return ArrayExprEvaluator(Info, This, Result) + .VisitInitListExpr(ILE, AllocType); +} + // Return true iff the given array filler may depend on the element index. static bool MaybeElementDependentArrayFiller(const Expr *FillerExpr) { // For now, just whitelist non-class value-initialization and initialization @@ -8476,15 +9474,23 @@ static bool MaybeElementDependentArrayFiller(const Expr *FillerExpr) { return true; } -bool ArrayExprEvaluator::VisitInitListExpr(const InitListExpr *E) { - const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(E->getType()); +bool ArrayExprEvaluator::VisitInitListExpr(const InitListExpr *E, + QualType AllocType) { + const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType( + AllocType.isNull() ? E->getType() : AllocType); if (!CAT) return Error(E); // C++11 [dcl.init.string]p1: A char array [...] can be initialized by [...] // an appropriately-typed string literal enclosed in braces. - if (E->isStringLiteralInit()) - return Visit(E->getInit(0)); + if (E->isStringLiteralInit()) { + auto *SL = dyn_cast(E->getInit(0)->IgnoreParens()); + // FIXME: Support ObjCEncodeExpr here once we support it in + // ArrayExprEvaluator generally. + if (!SL) + return Error(E); + return VisitStringLiteral(SL, AllocType); + } bool Success = true; @@ -8543,8 +9549,12 @@ bool ArrayExprEvaluator::VisitInitListExpr(const InitListExpr *E) { } bool ArrayExprEvaluator::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E) { + LValue CommonLV; if (E->getCommonExpr() && - !Evaluate(Info.CurrentCall->createTemporary(E->getCommonExpr(), false), + !Evaluate(Info.CurrentCall->createTemporary( + E->getCommonExpr(), + getStorageType(Info.Ctx, E->getCommonExpr()), false, + CommonLV), Info, E->getCommonExpr()->getSourceExpr())) return false; @@ -8762,6 +9772,7 @@ public: bool VisitCXXNoexceptExpr(const CXXNoexceptExpr *E); bool VisitSizeOfPackExpr(const SizeOfPackExpr *E); bool VisitSourceLocExpr(const SourceLocExpr *E); + bool VisitConceptSpecializationExpr(const ConceptSpecializationExpr *E); // FIXME: Missing: array subscript of vector, member of vector }; @@ -8944,7 +9955,7 @@ EvaluateBuiltinClassifyType(QualType T, const LangOptions &LangOpts) { #define DEPENDENT_TYPE(ID, BASE) case Type::ID: #define NON_CANONICAL_TYPE(ID, BASE) case Type::ID: #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(ID, BASE) case Type::ID: -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" case Type::Auto: case Type::DeducedTemplateSpecialization: llvm_unreachable("unexpected non-canonical or dependent type"); @@ -9008,6 +10019,9 @@ EvaluateBuiltinClassifyType(QualType T, const LangOptions &LangOpts) { case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: case BuiltinType::OCLReserveID: +#define SVE_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: +#include "clang/Basic/AArch64SVEACLETypes.def" return GCCTypeClass::None; case BuiltinType::Dependent: @@ -9161,6 +10175,8 @@ static QualType getObjectType(APValue::LValueBase B) { return E->getType(); } else if (B.is()) { return B.getTypeInfoType(); + } else if (B.is()) { + return B.getDynamicAllocType(); } return QualType(); @@ -9499,14 +10515,11 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, // size of the referenced object. switch (Info.EvalMode) { case EvalInfo::EM_ConstantExpression: - case EvalInfo::EM_PotentialConstantExpression: case EvalInfo::EM_ConstantFold: - case EvalInfo::EM_EvaluateForOverflow: case EvalInfo::EM_IgnoreSideEffects: // Leave it to IR generation. return Error(E); case EvalInfo::EM_ConstantExpressionUnevaluated: - case EvalInfo::EM_PotentialConstantExpressionUnevaluated: // Reduce it to a constant now. return Success((Type & 2) ? 0 : -1, E); } @@ -10834,7 +11847,7 @@ bool IntExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) { Info.CCEDiag(E, diag::note_constexpr_pointer_subtraction_not_same_array); QualType Type = E->getLHS()->getType(); - QualType ElementType = Type->getAs()->getPointeeType(); + QualType ElementType = Type->castAs()->getPointeeType(); CharUnits ElementSize; if (!HandleSizeof(Info, E->getExprLoc(), ElementType, ElementSize)) @@ -11242,6 +12255,12 @@ bool IntExprEvaluator::VisitCXXNoexceptExpr(const CXXNoexceptExpr *E) { return Success(E->getValue(), E); } +bool IntExprEvaluator::VisitConceptSpecializationExpr( + const ConceptSpecializationExpr *E) { + return Success(E->isSatisfied(), E); +} + + bool FixedPointExprEvaluator::VisitUnaryOperator(const UnaryOperator *E) { switch (E->getOpcode()) { default: @@ -11731,9 +12750,9 @@ bool ComplexExprEvaluator::VisitCastExpr(const CastExpr *E) { if (!Visit(E->getSubExpr())) return false; - QualType To = E->getType()->getAs()->getElementType(); + QualType To = E->getType()->castAs()->getElementType(); QualType From - = E->getSubExpr()->getType()->getAs()->getElementType(); + = E->getSubExpr()->getType()->castAs()->getElementType(); return HandleFloatToFloatCast(Info, E, From, To, Result.FloatReal) && HandleFloatToFloatCast(Info, E, From, To, Result.FloatImag); @@ -11743,9 +12762,9 @@ bool ComplexExprEvaluator::VisitCastExpr(const CastExpr *E) { if (!Visit(E->getSubExpr())) return false; - QualType To = E->getType()->getAs()->getElementType(); + QualType To = E->getType()->castAs()->getElementType(); QualType From - = E->getSubExpr()->getType()->getAs()->getElementType(); + = E->getSubExpr()->getType()->castAs()->getElementType(); Result.makeComplexInt(); return HandleFloatToIntCast(Info, E, From, Result.FloatReal, To, Result.IntReal) && @@ -11767,9 +12786,9 @@ bool ComplexExprEvaluator::VisitCastExpr(const CastExpr *E) { if (!Visit(E->getSubExpr())) return false; - QualType To = E->getType()->getAs()->getElementType(); + QualType To = E->getType()->castAs()->getElementType(); QualType From - = E->getSubExpr()->getType()->getAs()->getElementType(); + = E->getSubExpr()->getType()->castAs()->getElementType(); Result.IntReal = HandleIntToIntCast(Info, E, To, From, Result.IntReal); Result.IntImag = HandleIntToIntCast(Info, E, To, From, Result.IntImag); @@ -12143,17 +13162,98 @@ public: bool VisitCallExpr(const CallExpr *E) { switch (E->getBuiltinCallee()) { - default: - return ExprEvaluatorBaseTy::VisitCallExpr(E); case Builtin::BI__assume: case Builtin::BI__builtin_assume: // The argument is not evaluated! return true; + + case Builtin::BI__builtin_operator_delete: + return HandleOperatorDeleteCall(Info, E); + + default: + break; } + + return ExprEvaluatorBaseTy::VisitCallExpr(E); } + + bool VisitCXXDeleteExpr(const CXXDeleteExpr *E); }; } // end anonymous namespace +bool VoidExprEvaluator::VisitCXXDeleteExpr(const CXXDeleteExpr *E) { + // We cannot speculatively evaluate a delete expression. + if (Info.SpeculativeEvaluationDepth) + return false; + + FunctionDecl *OperatorDelete = E->getOperatorDelete(); + if (!OperatorDelete->isReplaceableGlobalAllocationFunction()) { + Info.FFDiag(E, diag::note_constexpr_new_non_replaceable) + << isa(OperatorDelete) << OperatorDelete; + return false; + } + + const Expr *Arg = E->getArgument(); + + LValue Pointer; + if (!EvaluatePointer(Arg, Pointer, Info)) + return false; + if (Pointer.Designator.Invalid) + return false; + + // Deleting a null pointer has no effect. + if (Pointer.isNullPointer()) { + // This is the only case where we need to produce an extension warning: + // the only other way we can succeed is if we find a dynamic allocation, + // and we will have warned when we allocated it in that case. + if (!Info.getLangOpts().CPlusPlus2a) + Info.CCEDiag(E, diag::note_constexpr_new); + return true; + } + + Optional Alloc = CheckDeleteKind( + Info, E, Pointer, E->isArrayForm() ? DynAlloc::ArrayNew : DynAlloc::New); + if (!Alloc) + return false; + QualType AllocType = Pointer.Base.getDynamicAllocType(); + + // For the non-array case, the designator must be empty if the static type + // does not have a virtual destructor. + if (!E->isArrayForm() && Pointer.Designator.Entries.size() != 0 && + !hasVirtualDestructor(Arg->getType()->getPointeeType())) { + Info.FFDiag(E, diag::note_constexpr_delete_base_nonvirt_dtor) + << Arg->getType()->getPointeeType() << AllocType; + return false; + } + + // For a class type with a virtual destructor, the selected operator delete + // is the one looked up when building the destructor. + if (!E->isArrayForm() && !E->isGlobalDelete()) { + const FunctionDecl *VirtualDelete = getVirtualOperatorDelete(AllocType); + if (VirtualDelete && + !VirtualDelete->isReplaceableGlobalAllocationFunction()) { + Info.FFDiag(E, diag::note_constexpr_new_non_replaceable) + << isa(VirtualDelete) << VirtualDelete; + return false; + } + } + + if (!HandleDestruction(Info, E->getExprLoc(), Pointer.getLValueBase(), + (*Alloc)->Value, AllocType)) + return false; + + if (!Info.HeapAllocs.erase(Pointer.Base.dyn_cast())) { + // The element was already erased. This means the destructor call also + // deleted the object. + // FIXME: This probably results in undefined behavior before we get this + // far, and should be diagnosed elsewhere first. + Info.FFDiag(E, diag::note_constexpr_double_delete); + return false; + } + + return true; +} + static bool EvaluateVoid(const Expr *E, EvalInfo &Info) { assert(E->isRValue() && E->getType()->isVoidType()); return VoidExprEvaluator(Info).Visit(E); @@ -12203,13 +13303,14 @@ static bool Evaluate(APValue &Result, EvalInfo &Info, const Expr *E) { return true; } else if (T->isArrayType()) { LValue LV; - APValue &Value = createTemporary(E, false, LV, *Info.CurrentCall); + APValue &Value = + Info.CurrentCall->createTemporary(E, T, false, LV); if (!EvaluateArray(E, LV, Value, Info)) return false; Result = Value; } else if (T->isRecordType()) { LValue LV; - APValue &Value = createTemporary(E, false, LV, *Info.CurrentCall); + APValue &Value = Info.CurrentCall->createTemporary(E, T, false, LV); if (!EvaluateRecord(E, LV, Value, Info)) return false; Result = Value; @@ -12223,7 +13324,7 @@ static bool Evaluate(APValue &Result, EvalInfo &Info, const Expr *E) { QualType Unqual = T.getAtomicUnqualifiedType(); if (Unqual->isArrayType() || Unqual->isRecordType()) { LValue LV; - APValue &Value = createTemporary(E, false, LV, *Info.CurrentCall); + APValue &Value = Info.CurrentCall->createTemporary(E, Unqual, false, LV); if (!EvaluateAtomic(E, &LV, Value, Info)) return false; } else { @@ -12273,6 +13374,18 @@ static bool EvaluateInPlace(APValue &Result, EvalInfo &Info, const LValue &This, /// EvaluateAsRValue - Try to evaluate this expression, performing an implicit /// lvalue-to-rvalue cast if it is an lvalue. static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result) { + if (Info.EnableNewConstInterp) { + auto &InterpCtx = Info.Ctx.getInterpContext(); + switch (InterpCtx.evaluateAsRValue(Info, E, Result)) { + case interp::InterpResult::Success: + return true; + case interp::InterpResult::Fail: + return false; + case interp::InterpResult::Bail: + break; + } + } + if (E->getType().isNull()) return false; @@ -12290,7 +13403,8 @@ static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result) { } // Check this core constant expression is a constant expression. - return CheckConstantExpression(Info, E->getExprLoc(), E->getType(), Result); + return CheckConstantExpression(Info, E->getExprLoc(), E->getType(), Result) && + CheckMemoryLeaks(Info); } static bool FastEvaluateAsRValue(const Expr *Exp, Expr::EvalResult &Result, @@ -12439,10 +13553,12 @@ bool Expr::EvaluateAsLValue(EvalResult &Result, const ASTContext &Ctx, EvalInfo Info(Ctx, Result, EvalInfo::EM_ConstantFold); Info.InConstantContext = InConstantContext; LValue LV; - if (!EvaluateLValue(this, LV, Info) || Result.HasSideEffects || + CheckedTemporaries CheckedTemps; + if (!EvaluateLValue(this, LV, Info) || !Info.discardCleanups() || + Result.HasSideEffects || !CheckLValueConstantExpression(Info, getExprLoc(), Ctx.getLValueReferenceType(getType()), LV, - Expr::EvaluateForCodeGen)) + Expr::EvaluateForCodeGen, CheckedTemps)) return false; LV.moveInto(Result.Val); @@ -12458,11 +13574,15 @@ bool Expr::EvaluateAsConstantExpr(EvalResult &Result, ConstExprUsage Usage, EvalInfo Info(Ctx, Result, EM); Info.InConstantContext = true; - if (!::Evaluate(Result.Val, Info, this)) + if (!::Evaluate(Result.Val, Info, this) || Result.HasSideEffects) return false; - return CheckConstantExpression(Info, getExprLoc(), getType(), Result.Val, - Usage); + if (!Info.discardCleanups()) + llvm_unreachable("Unhandled cleanup; missing full expression marker?"); + + return CheckConstantExpression(Info, getExprLoc(), getStorageType(Ctx, this), + Result.Val, Usage) && + CheckMemoryLeaks(Info); } bool Expr::EvaluateAsInitializer(APValue &Value, const ASTContext &Ctx, @@ -12480,11 +13600,29 @@ bool Expr::EvaluateAsInitializer(APValue &Value, const ASTContext &Ctx, Expr::EvalStatus EStatus; EStatus.Diag = &Notes; - EvalInfo InitInfo(Ctx, EStatus, VD->isConstexpr() + EvalInfo Info(Ctx, EStatus, VD->isConstexpr() ? EvalInfo::EM_ConstantExpression : EvalInfo::EM_ConstantFold); - InitInfo.setEvaluatingDecl(VD, Value); - InitInfo.InConstantContext = true; + Info.setEvaluatingDecl(VD, Value); + Info.InConstantContext = true; + + SourceLocation DeclLoc = VD->getLocation(); + QualType DeclTy = VD->getType(); + + if (Info.EnableNewConstInterp) { + auto &InterpCtx = const_cast(Ctx).getInterpContext(); + switch (InterpCtx.evaluateAsInitializer(Info, VD, Value)) { + case interp::InterpResult::Fail: + // Bail out if an error was encountered. + return false; + case interp::InterpResult::Success: + // Evaluation succeeded and value was set. + return CheckConstantExpression(Info, DeclLoc, DeclTy, Value); + case interp::InterpResult::Bail: + // Evaluate the value again for the tree evaluator to use. + break; + } + } LValue LVal; LVal.set(VD); @@ -12494,20 +13632,62 @@ bool Expr::EvaluateAsInitializer(APValue &Value, const ASTContext &Ctx, // zero-initialized before any other initialization takes place. // This behavior is not present in C. if (Ctx.getLangOpts().CPlusPlus && !VD->hasLocalStorage() && - !VD->getType()->isReferenceType()) { - ImplicitValueInitExpr VIE(VD->getType()); - if (!EvaluateInPlace(Value, InitInfo, LVal, &VIE, + !DeclTy->isReferenceType()) { + ImplicitValueInitExpr VIE(DeclTy); + if (!EvaluateInPlace(Value, Info, LVal, &VIE, /*AllowNonLiteralTypes=*/true)) return false; } - if (!EvaluateInPlace(Value, InitInfo, LVal, this, + if (!EvaluateInPlace(Value, Info, LVal, this, /*AllowNonLiteralTypes=*/true) || EStatus.HasSideEffects) return false; - return CheckConstantExpression(InitInfo, VD->getLocation(), VD->getType(), - Value); + // At this point, any lifetime-extended temporaries are completely + // initialized. + Info.performLifetimeExtension(); + + if (!Info.discardCleanups()) + llvm_unreachable("Unhandled cleanup; missing full expression marker?"); + + return CheckConstantExpression(Info, DeclLoc, DeclTy, Value) && + CheckMemoryLeaks(Info); +} + +bool VarDecl::evaluateDestruction( + SmallVectorImpl &Notes) const { + assert(getEvaluatedValue() && !getEvaluatedValue()->isAbsent() && + "cannot evaluate destruction of non-constant-initialized variable"); + + Expr::EvalStatus EStatus; + EStatus.Diag = &Notes; + + // Make a copy of the value for the destructor to mutate. + APValue DestroyedValue = *getEvaluatedValue(); + + EvalInfo Info(getASTContext(), EStatus, EvalInfo::EM_ConstantExpression); + Info.setEvaluatingDecl(this, DestroyedValue, + EvalInfo::EvaluatingDeclKind::Dtor); + Info.InConstantContext = true; + + SourceLocation DeclLoc = getLocation(); + QualType DeclTy = getType(); + + LValue LVal; + LVal.set(this); + + // FIXME: Consider storing whether this variable has constant destruction in + // the EvaluatedStmt so that CodeGen can query it. + if (!HandleDestruction(Info, DeclLoc, LVal.Base, DestroyedValue, DeclTy) || + EStatus.HasSideEffects) + return false; + + if (!Info.discardCleanups()) + llvm_unreachable("Unhandled cleanup; missing full expression marker?"); + + ensureEvaluatedStmt()->HasConstantDestruction = true; + return true; } /// isEvaluatable - Call EvaluateAsRValue to see if this expression can be @@ -12546,8 +13726,9 @@ APSInt Expr::EvaluateKnownConstIntCheckOverflow( EvalResult EVResult; EVResult.Diag = Diag; - EvalInfo Info(Ctx, EVResult, EvalInfo::EM_EvaluateForOverflow); + EvalInfo Info(Ctx, EVResult, EvalInfo::EM_IgnoreSideEffects); Info.InConstantContext = true; + Info.CheckingForUndefinedBehavior = true; bool Result = ::EvaluateAsRValue(Info, this, EVResult.Val); (void)Result; @@ -12564,7 +13745,8 @@ void Expr::EvaluateForOverflow(const ASTContext &Ctx) const { bool IsConst; EvalResult EVResult; if (!FastEvaluateAsRValue(this, EVResult, Ctx, IsConst)) { - EvalInfo Info(Ctx, EVResult, EvalInfo::EM_EvaluateForOverflow); + EvalInfo Info(Ctx, EVResult, EvalInfo::EM_IgnoreSideEffects); + Info.CheckingForUndefinedBehavior = true; (void)::EvaluateAsRValue(Info, this, EVResult.Val); } } @@ -12752,6 +13934,7 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) { case Expr::CXXBoolLiteralExprClass: case Expr::CXXScalarValueInitExprClass: case Expr::TypeTraitExprClass: + case Expr::ConceptSpecializationExprClass: case Expr::ArrayTypeTraitExprClass: case Expr::ExpressionTraitExprClass: case Expr::CXXNoexceptExprClass: @@ -12766,6 +13949,9 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) { return CheckEvalInICE(E, Ctx); return ICEDiag(IK_NotICE, E->getBeginLoc()); } + case Expr::CXXRewrittenBinaryOperatorClass: + return CheckICE(cast(E)->getSemanticForm(), + Ctx); case Expr::DeclRefExprClass: { if (isa(cast(E)->getDecl())) return NoDiag(); @@ -13111,7 +14297,11 @@ bool Expr::isCXX11ConstantExpr(const ASTContext &Ctx, APValue *Result, EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantExpression); APValue Scratch; - bool IsConstExpr = ::EvaluateAsRValue(Info, this, Result ? *Result : Scratch); + bool IsConstExpr = + ::EvaluateAsRValue(Info, this, Result ? *Result : Scratch) && + // FIXME: We don't produce a diagnostic for this, but the callers that + // call us on arbitrary full-expressions should generally not care. + Info.discardCleanups() && !Status.HasSideEffects; if (!Diags.empty()) { IsConstExpr = false; @@ -13163,7 +14353,8 @@ bool Expr::EvaluateWithSubstitution(APValue &Value, ASTContext &Ctx, // Build fake call to Callee. CallStackFrame Frame(Info, Callee->getLocation(), Callee, ThisPtr, ArgValues.data()); - return Evaluate(Value, Info, this) && !Info.EvalStatus.HasSideEffects; + return Evaluate(Value, Info, this) && Info.discardCleanups() && + !Info.EvalStatus.HasSideEffects; } bool Expr::isPotentialConstantExpr(const FunctionDecl *FD, @@ -13178,9 +14369,21 @@ bool Expr::isPotentialConstantExpr(const FunctionDecl *FD, Expr::EvalStatus Status; Status.Diag = &Diags; - EvalInfo Info(FD->getASTContext(), Status, - EvalInfo::EM_PotentialConstantExpression); + EvalInfo Info(FD->getASTContext(), Status, EvalInfo::EM_ConstantExpression); Info.InConstantContext = true; + Info.CheckingPotentialConstantExpression = true; + + // The constexpr VM attempts to compile all methods to bytecode here. + if (Info.EnableNewConstInterp) { + auto &InterpCtx = Info.Ctx.getInterpContext(); + switch (InterpCtx.isPotentialConstantExpr(Info, FD)) { + case interp::InterpResult::Success: + case interp::InterpResult::Fail: + return Diags.empty(); + case interp::InterpResult::Bail: + break; + } + } const CXXMethodDecl *MD = dyn_cast(FD); const CXXRecordDecl *RD = MD ? MD->getParent()->getCanonicalDecl() : nullptr; @@ -13219,8 +14422,9 @@ bool Expr::isPotentialConstantExprUnevaluated(Expr *E, Status.Diag = &Diags; EvalInfo Info(FD->getASTContext(), Status, - EvalInfo::EM_PotentialConstantExpressionUnevaluated); + EvalInfo::EM_ConstantExpressionUnevaluated); Info.InConstantContext = true; + Info.CheckingPotentialConstantExpression = true; // Fabricate a call stack frame to give the arguments a plausible cover story. ArrayRef Args; diff --git a/lib/AST/ExternalASTMerger.cpp b/lib/AST/ExternalASTMerger.cpp index 61e657da7c9..f678c2dd3b5 100644 --- a/lib/AST/ExternalASTMerger.cpp +++ b/lib/AST/ExternalASTMerger.cpp @@ -101,21 +101,103 @@ private: ExternalASTMerger &Parent; ASTImporter Reverse; const ExternalASTMerger::OriginMap &FromOrigins; - + /// @see ExternalASTMerger::ImporterSource::Temporary + bool TemporarySource; + /// Map of imported declarations back to the declarations they originated + /// from. + llvm::DenseMap ToOrigin; + /// @see ExternalASTMerger::ImporterSource::Merger + ExternalASTMerger *SourceMerger; llvm::raw_ostream &logs() { return Parent.logs(); } public: LazyASTImporter(ExternalASTMerger &_Parent, ASTContext &ToContext, - FileManager &ToFileManager, ASTContext &FromContext, - FileManager &FromFileManager, - const ExternalASTMerger::OriginMap &_FromOrigins) - : ASTImporter(ToContext, ToFileManager, FromContext, FromFileManager, - /*MinimalImport=*/true), - Parent(_Parent), Reverse(FromContext, FromFileManager, ToContext, - ToFileManager, /*MinimalImport=*/true), FromOrigins(_FromOrigins) {} + FileManager &ToFileManager, + const ExternalASTMerger::ImporterSource &S, + std::shared_ptr SharedState) + : ASTImporter(ToContext, ToFileManager, S.getASTContext(), + S.getFileManager(), + /*MinimalImport=*/true, SharedState), + Parent(_Parent), + Reverse(S.getASTContext(), S.getFileManager(), ToContext, ToFileManager, + /*MinimalImport=*/true), + FromOrigins(S.getOriginMap()), TemporarySource(S.isTemporary()), + SourceMerger(S.getMerger()) {} + + llvm::Expected ImportImpl(Decl *FromD) override { + if (!TemporarySource || !SourceMerger) + return ASTImporter::ImportImpl(FromD); + + // If we get here, then this source is importing from a temporary ASTContext + // that also has another ExternalASTMerger attached. It could be + // possible that the current ExternalASTMerger and the temporary ASTContext + // share a common ImporterSource, which means that the temporary + // AST could contain declarations that were imported from a source + // that this ExternalASTMerger can access directly. Instead of importing + // such declarations from the temporary ASTContext, they should instead + // be directly imported by this ExternalASTMerger from the original + // source. This way the ExternalASTMerger can safely do a minimal import + // without creating incomplete declarations originated from a temporary + // ASTContext. If we would try to complete such declarations later on, we + // would fail to do so as their temporary AST could be deleted (which means + // that the missing parts of the minimally imported declaration in that + // ASTContext were also deleted). + // + // The following code tracks back any declaration that needs to be + // imported from the temporary ASTContext to a persistent ASTContext. + // Then the ExternalASTMerger tries to import from the persistent + // ASTContext directly by using the associated ASTImporter. If that + // succeeds, this ASTImporter just maps the declarations imported by + // the other (persistent) ASTImporter to this (temporary) ASTImporter. + // The steps can be visualized like this: + // + // Target AST <--- 3. Indirect import --- Persistent AST + // ^ of persistent decl ^ + // | | + // 1. Current import 2. Tracking back to persistent decl + // 4. Map persistent decl | + // & pretend we imported. | + // | | + // Temporary AST -------------------------------' + + // First, ask the ExternalASTMerger of the source where the temporary + // declaration originated from. + Decl *Persistent = SourceMerger->FindOriginalDecl(FromD); + // FromD isn't from a persistent AST, so just do a normal import. + if (!Persistent) + return ASTImporter::ImportImpl(FromD); + // Now ask the current ExternalASTMerger to try import the persistent + // declaration into the target. + ASTContext &PersistentCtx = Persistent->getASTContext(); + ASTImporter &OtherImporter = Parent.ImporterForOrigin(PersistentCtx); + // Check that we never end up in the current Importer again. + assert((&PersistentCtx != &getFromContext()) && (&OtherImporter != this) && + "Delegated to same Importer?"); + auto DeclOrErr = OtherImporter.Import(Persistent); + // Errors when importing the persistent decl are treated as if we + // had errors with importing the temporary decl. + if (!DeclOrErr) + return DeclOrErr.takeError(); + Decl *D = *DeclOrErr; + // Tell the current ASTImporter that this has already been imported + // to prevent any further queries for the temporary decl. + MapImported(FromD, D); + return D; + } + + /// Implements the ASTImporter interface for tracking back a declaration + /// to its original declaration it came from. + Decl *GetOriginalDecl(Decl *To) override { + auto It = ToOrigin.find(To); + if (It != ToOrigin.end()) + return It->second; + return nullptr; + } /// Whenever a DeclContext is imported, ensure that ExternalASTSource's origin /// map is kept up to date. Also set the appropriate flags. void Imported(Decl *From, Decl *To) override { + ToOrigin[To] = From; + if (auto *ToDC = dyn_cast(To)) { const bool LoggingEnabled = Parent.LoggingEnabled(); if (LoggingEnabled) @@ -314,28 +396,40 @@ void ExternalASTMerger::RecordOriginImpl(const DeclContext *ToDC, DCOrigin Origi ExternalASTMerger::ExternalASTMerger(const ImporterTarget &Target, llvm::ArrayRef Sources) : LogStream(&llvm::nulls()), Target(Target) { + SharedState = std::make_shared( + *Target.AST.getTranslationUnitDecl()); AddSources(Sources); } +Decl *ExternalASTMerger::FindOriginalDecl(Decl *D) { + assert(&D->getASTContext() == &Target.AST); + for (const auto &I : Importers) + if (auto Result = I->GetOriginalDecl(D)) + return Result; + return nullptr; +} + void ExternalASTMerger::AddSources(llvm::ArrayRef Sources) { for (const ImporterSource &S : Sources) { - assert(&S.AST != &Target.AST); - Importers.push_back(llvm::make_unique( - *this, Target.AST, Target.FM, S.AST, S.FM, S.OM)); + assert(&S.getASTContext() != &Target.AST); + // Check that the associated merger actually imports into the source AST. + assert(!S.getMerger() || &S.getMerger()->Target.AST == &S.getASTContext()); + Importers.push_back(std::make_unique( + *this, Target.AST, Target.FM, S, SharedState)); } } void ExternalASTMerger::RemoveSources(llvm::ArrayRef Sources) { if (LoggingEnabled()) for (const ImporterSource &S : Sources) - logs() << "(ExternalASTMerger*)" << (void*)this - << " removing source (ASTContext*)" << (void*)&S.AST + logs() << "(ExternalASTMerger*)" << (void *)this + << " removing source (ASTContext*)" << (void *)&S.getASTContext() << "\n"; Importers.erase( std::remove_if(Importers.begin(), Importers.end(), [&Sources](std::unique_ptr &Importer) -> bool { for (const ImporterSource &S : Sources) { - if (&Importer->getFromContext() == &S.AST) + if (&Importer->getFromContext() == &S.getASTContext()) return true; } return false; @@ -345,7 +439,7 @@ void ExternalASTMerger::RemoveSources(llvm::ArrayRef Sources) { std::pair Origin = *OI; bool Erase = false; for (const ImporterSource &S : Sources) { - if (&S.AST == Origin.second.AST) { + if (&S.getASTContext() == Origin.second.AST) { Erase = true; break; } diff --git a/lib/AST/FormatString.cpp b/lib/AST/FormatString.cpp index 578d5bc5673..fcc0b3b11e2 100644 --- a/lib/AST/FormatString.cpp +++ b/lib/AST/FormatString.cpp @@ -359,6 +359,7 @@ ArgType::matchesType(ASTContext &C, QualType argTy) const { case BuiltinType::SChar: case BuiltinType::UChar: case BuiltinType::Char_U: + case BuiltinType::Bool: return Match; } return NoMatch; @@ -386,6 +387,9 @@ ArgType::matchesType(ASTContext &C, QualType argTy) const { case BuiltinType::SChar: case BuiltinType::Char_U: case BuiltinType::UChar: + case BuiltinType::Bool: + if (T == C.UnsignedShortTy || T == C.ShortTy) + return NoMatchTypeConfusion; return T == C.UnsignedCharTy || T == C.SignedCharTy ? Match : NoMatch; case BuiltinType::Short: diff --git a/lib/AST/FormatStringParsing.h b/lib/AST/FormatStringParsing.h index 9da829adcb4..764e5d46394 100644 --- a/lib/AST/FormatStringParsing.h +++ b/lib/AST/FormatStringParsing.h @@ -1,3 +1,16 @@ +//===----- FormatStringParsing.h - Format String Parsing --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This provides some shared functions between printf and scanf format string +// parsing code. +// +//===----------------------------------------------------------------------===// + #ifndef LLVM_CLANG_LIB_ANALYSIS_FORMATSTRINGPARSING_H #define LLVM_CLANG_LIB_ANALYSIS_FORMATSTRINGPARSING_H diff --git a/lib/AST/InheritViz.cpp b/lib/AST/InheritViz.cpp index 4b3d5bee563..2ed0ce1c79c 100644 --- a/lib/AST/InheritViz.cpp +++ b/lib/AST/InheritViz.cpp @@ -90,8 +90,8 @@ void InheritanceHierarchyWriter::WriteNode(QualType Type, bool FromVirtual) { Out << " \"];\n"; // Display the base classes. - const CXXRecordDecl *Decl - = static_cast(Type->getAs()->getDecl()); + const auto *Decl = + static_cast(Type->castAs()->getDecl()); for (const auto &Base : Decl->bases()) { QualType CanonBaseType = Context.getCanonicalType(Base.getType()); diff --git a/lib/AST/Interp/Block.cpp b/lib/AST/Interp/Block.cpp new file mode 100644 index 00000000000..5fc93eb39f4 --- /dev/null +++ b/lib/AST/Interp/Block.cpp @@ -0,0 +1,87 @@ +//===--- Block.cpp - Allocated blocks for the interpreter -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the classes describing allocated blocks. +// +//===----------------------------------------------------------------------===// + +#include "Block.h" +#include "Pointer.h" + +using namespace clang; +using namespace clang::interp; + + + +void Block::addPointer(Pointer *P) { + if (IsStatic) + return; + if (Pointers) + Pointers->Prev = P; + P->Next = Pointers; + P->Prev = nullptr; + Pointers = P; +} + +void Block::removePointer(Pointer *P) { + if (IsStatic) + return; + if (Pointers == P) + Pointers = P->Next; + if (P->Prev) + P->Prev->Next = P->Next; + if (P->Next) + P->Next->Prev = P->Prev; +} + +void Block::cleanup() { + if (Pointers == nullptr && IsDead) + (reinterpret_cast(this + 1) - 1)->free(); +} + +void Block::movePointer(Pointer *From, Pointer *To) { + if (IsStatic) + return; + To->Prev = From->Prev; + if (To->Prev) + To->Prev->Next = To; + To->Next = From->Next; + if (To->Next) + To->Next->Prev = To; + if (Pointers == From) + Pointers = To; + + From->Prev = nullptr; + From->Next = nullptr; +} + +DeadBlock::DeadBlock(DeadBlock *&Root, Block *Blk) + : Root(Root), B(Blk->Desc, Blk->IsStatic, Blk->IsExtern, /*isDead=*/true) { + // Add the block to the chain of dead blocks. + if (Root) + Root->Prev = this; + + Next = Root; + Prev = nullptr; + Root = this; + + // Transfer pointers. + B.Pointers = Blk->Pointers; + for (Pointer *P = Blk->Pointers; P; P = P->Next) + P->Pointee = &B; +} + +void DeadBlock::free() { + if (Prev) + Prev->Next = Next; + if (Next) + Next->Prev = Prev; + if (Root == this) + Root = Next; + ::free(this); +} diff --git a/lib/AST/Interp/Block.h b/lib/AST/Interp/Block.h new file mode 100644 index 00000000000..97fb9a3ca09 --- /dev/null +++ b/lib/AST/Interp/Block.h @@ -0,0 +1,140 @@ +//===--- Block.h - Allocated blocks for the interpreter ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the classes describing allocated blocks. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_BLOCK_H +#define LLVM_CLANG_AST_INTERP_BLOCK_H + +#include "Descriptor.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ComparisonCategories.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang { +namespace interp { +class Block; +class DeadBlock; +class Context; +class InterpState; +class Pointer; +class Function; +enum PrimType : unsigned; + +/// A memory block, either on the stack or in the heap. +/// +/// The storage described by the block immediately follows it in memory. +class Block { +public: + // Creates a new block. + Block(const llvm::Optional &DeclID, Descriptor *Desc, + bool IsStatic = false, bool IsExtern = false) + : DeclID(DeclID), IsStatic(IsStatic), IsExtern(IsExtern), Desc(Desc) {} + + Block(Descriptor *Desc, bool IsStatic = false, bool IsExtern = false) + : DeclID((unsigned)-1), IsStatic(IsStatic), IsExtern(IsExtern), + Desc(Desc) {} + + /// Returns the block's descriptor. + Descriptor *getDescriptor() const { return Desc; } + /// Checks if the block has any live pointers. + bool hasPointers() const { return Pointers; } + /// Checks if the block is extern. + bool isExtern() const { return IsExtern; } + /// Checks if the block has static storage duration. + bool isStatic() const { return IsStatic; } + /// Checks if the block is temporary. + bool isTemporary() const { return Desc->IsTemporary; } + /// Returns the size of the block. + InterpSize getSize() const { return Desc->getAllocSize(); } + /// Returns the declaration ID. + llvm::Optional getDeclID() const { return DeclID; } + + /// Returns a pointer to the stored data. + char *data() { return reinterpret_cast(this + 1); } + + /// Returns a view over the data. + template + T &deref() { return *reinterpret_cast(data()); } + + /// Invokes the constructor. + void invokeCtor() { + std::memset(data(), 0, getSize()); + if (Desc->CtorFn) + Desc->CtorFn(this, data(), Desc->IsConst, Desc->IsMutable, + /*isActive=*/true, Desc); + } + +protected: + friend class Pointer; + friend class DeadBlock; + friend class InterpState; + + Block(Descriptor *Desc, bool IsExtern, bool IsStatic, bool IsDead) + : IsStatic(IsStatic), IsExtern(IsExtern), IsDead(true), Desc(Desc) {} + + // Deletes a dead block at the end of its lifetime. + void cleanup(); + + // Pointer chain management. + void addPointer(Pointer *P); + void removePointer(Pointer *P); + void movePointer(Pointer *From, Pointer *To); + + /// Start of the chain of pointers. + Pointer *Pointers = nullptr; + /// Unique identifier of the declaration. + llvm::Optional DeclID; + /// Flag indicating if the block has static storage duration. + bool IsStatic = false; + /// Flag indicating if the block is an extern. + bool IsExtern = false; + /// Flag indicating if the pointer is dead. + bool IsDead = false; + /// Pointer to the stack slot descriptor. + Descriptor *Desc; +}; + +/// Descriptor for a dead block. +/// +/// Dead blocks are chained in a double-linked list to deallocate them +/// whenever pointers become dead. +class DeadBlock { +public: + /// Copies the block. + DeadBlock(DeadBlock *&Root, Block *Blk); + + /// Returns a pointer to the stored data. + char *data() { return B.data(); } + +private: + friend class Block; + friend class InterpState; + + void free(); + + /// Root pointer of the list. + DeadBlock *&Root; + /// Previous block in the list. + DeadBlock *Prev; + /// Next block in the list. + DeadBlock *Next; + + /// Actual block storing data and tracking pointers. + Block B; +}; + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/Boolean.h b/lib/AST/Interp/Boolean.h new file mode 100644 index 00000000000..3e6c8b5da9f --- /dev/null +++ b/lib/AST/Interp/Boolean.h @@ -0,0 +1,148 @@ +//===--- Boolean.h - Wrapper for boolean types for the VM -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_BOOLEAN_H +#define LLVM_CLANG_AST_INTERP_BOOLEAN_H + +#include +#include +#include "Integral.h" +#include "clang/AST/APValue.h" +#include "clang/AST/ComparisonCategories.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang { +namespace interp { + +/// Wrapper around boolean types. +class Boolean { + private: + /// Underlying boolean. + bool V; + + /// Construct a wrapper from a boolean. + explicit Boolean(bool V) : V(V) {} + + public: + /// Zero-initializes a boolean. + Boolean() : V(false) {} + + bool operator<(Boolean RHS) const { return V < RHS.V; } + bool operator>(Boolean RHS) const { return V > RHS.V; } + bool operator<=(Boolean RHS) const { return V <= RHS.V; } + bool operator>=(Boolean RHS) const { return V >= RHS.V; } + bool operator==(Boolean RHS) const { return V == RHS.V; } + bool operator!=(Boolean RHS) const { return V != RHS.V; } + + bool operator>(unsigned RHS) const { return static_cast(V) > RHS; } + + Boolean operator-() const { return Boolean(V); } + Boolean operator~() const { return Boolean(true); } + + explicit operator unsigned() const { return V; } + explicit operator int64_t() const { return V; } + explicit operator uint64_t() const { return V; } + + APSInt toAPSInt() const { + return APSInt(APInt(1, static_cast(V), false), true); + } + APSInt toAPSInt(unsigned NumBits) const { + return APSInt(toAPSInt().zextOrTrunc(NumBits), true); + } + APValue toAPValue() const { return APValue(toAPSInt()); } + + Boolean toUnsigned() const { return *this; } + + constexpr static unsigned bitWidth() { return true; } + bool isZero() const { return !V; } + bool isMin() const { return isZero(); } + + constexpr static bool isMinusOne() { return false; } + + constexpr static bool isSigned() { return false; } + + constexpr static bool isNegative() { return false; } + constexpr static bool isPositive() { return !isNegative(); } + + ComparisonCategoryResult compare(const Boolean &RHS) const { + return Compare(V, RHS.V); + } + + unsigned countLeadingZeros() const { return V ? 0 : 1; } + + Boolean truncate(unsigned TruncBits) const { return *this; } + + void print(llvm::raw_ostream &OS) const { OS << (V ? "true" : "false"); } + + static Boolean min(unsigned NumBits) { return Boolean(false); } + static Boolean max(unsigned NumBits) { return Boolean(true); } + + template + static typename std::enable_if::value, Boolean>::type + from(T Value) { + return Boolean(Value != 0); + } + + template + static typename std::enable_if::type from( + Integral Value) { + return Boolean(!Value.isZero()); + } + + template + static Boolean from(Integral<0, SrcSign> Value) { + return Boolean(!Value.isZero()); + } + + static Boolean zero() { return from(false); } + + template + static Boolean from(T Value, unsigned NumBits) { + return Boolean(Value); + } + + static bool inRange(int64_t Value, unsigned NumBits) { + return Value == 0 || Value == 1; + } + + static bool increment(Boolean A, Boolean *R) { + *R = Boolean(true); + return false; + } + + static bool decrement(Boolean A, Boolean *R) { + llvm_unreachable("Cannot decrement booleans"); + } + + static bool add(Boolean A, Boolean B, unsigned OpBits, Boolean *R) { + *R = Boolean(A.V || B.V); + return false; + } + + static bool sub(Boolean A, Boolean B, unsigned OpBits, Boolean *R) { + *R = Boolean(A.V ^ B.V); + return false; + } + + static bool mul(Boolean A, Boolean B, unsigned OpBits, Boolean *R) { + *R = Boolean(A.V && B.V); + return false; + } +}; + +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Boolean &B) { + B.print(OS); + return OS; +} + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/ByteCodeEmitter.cpp b/lib/AST/Interp/ByteCodeEmitter.cpp new file mode 100644 index 00000000000..7a4569820a1 --- /dev/null +++ b/lib/AST/Interp/ByteCodeEmitter.cpp @@ -0,0 +1,175 @@ +//===--- ByteCodeEmitter.cpp - Instruction emitter for the VM ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ByteCodeEmitter.h" +#include "Context.h" +#include "Opcode.h" +#include "Program.h" +#include "clang/AST/DeclCXX.h" + +using namespace clang; +using namespace clang::interp; + +using APSInt = llvm::APSInt; +using Error = llvm::Error; + +Expected ByteCodeEmitter::compileFunc(const FunctionDecl *F) { + // Do not try to compile undefined functions. + if (!F->isDefined(F) || (!F->hasBody() && F->willHaveBody())) + return nullptr; + + // Set up argument indices. + unsigned ParamOffset = 0; + SmallVector ParamTypes; + llvm::DenseMap ParamDescriptors; + + // If the return is not a primitive, a pointer to the storage where the value + // is initialized in is passed as the first argument. + QualType Ty = F->getReturnType(); + if (!Ty->isVoidType() && !Ctx.classify(Ty)) { + ParamTypes.push_back(PT_Ptr); + ParamOffset += align(primSize(PT_Ptr)); + } + + // Assign descriptors to all parameters. + // Composite objects are lowered to pointers. + for (const ParmVarDecl *PD : F->parameters()) { + PrimType Ty; + if (llvm::Optional T = Ctx.classify(PD->getType())) { + Ty = *T; + } else { + Ty = PT_Ptr; + } + + Descriptor *Desc = P.createDescriptor(PD, Ty); + ParamDescriptors.insert({ParamOffset, {Ty, Desc}}); + Params.insert({PD, ParamOffset}); + ParamOffset += align(primSize(Ty)); + ParamTypes.push_back(Ty); + } + + // Create a handle over the emitted code. + Function *Func = P.createFunction(F, ParamOffset, std::move(ParamTypes), + std::move(ParamDescriptors)); + // Compile the function body. + if (!F->isConstexpr() || !visitFunc(F)) { + // Return a dummy function if compilation failed. + if (BailLocation) + return llvm::make_error(*BailLocation); + else + return Func; + } else { + // Create scopes from descriptors. + llvm::SmallVector Scopes; + for (auto &DS : Descriptors) { + Scopes.emplace_back(std::move(DS)); + } + + // Set the function's code. + Func->setCode(NextLocalOffset, std::move(Code), std::move(SrcMap), + std::move(Scopes)); + return Func; + } +} + +Scope::Local ByteCodeEmitter::createLocal(Descriptor *D) { + NextLocalOffset += sizeof(Block); + unsigned Location = NextLocalOffset; + NextLocalOffset += align(D->getAllocSize()); + return {Location, D}; +} + +void ByteCodeEmitter::emitLabel(LabelTy Label) { + const size_t Target = Code.size(); + LabelOffsets.insert({Label, Target}); + auto It = LabelRelocs.find(Label); + if (It != LabelRelocs.end()) { + for (unsigned Reloc : It->second) { + using namespace llvm::support; + + /// Rewrite the operand of all jumps to this label. + void *Location = Code.data() + Reloc - sizeof(int32_t); + const int32_t Offset = Target - static_cast(Reloc); + endian::write(Location, Offset); + } + LabelRelocs.erase(It); + } +} + +int32_t ByteCodeEmitter::getOffset(LabelTy Label) { + // Compute the PC offset which the jump is relative to. + const int64_t Position = Code.size() + sizeof(Opcode) + sizeof(int32_t); + + // If target is known, compute jump offset. + auto It = LabelOffsets.find(Label); + if (It != LabelOffsets.end()) { + return It->second - Position; + } + + // Otherwise, record relocation and return dummy offset. + LabelRelocs[Label].push_back(Position); + return 0ull; +} + +bool ByteCodeEmitter::bail(const SourceLocation &Loc) { + if (!BailLocation) + BailLocation = Loc; + return false; +} + +template +bool ByteCodeEmitter::emitOp(Opcode Op, const Tys &... Args, const SourceInfo &SI) { + bool Success = true; + + /// Helper to write bytecode and bail out if 32-bit offsets become invalid. + auto emit = [this, &Success](const char *Data, size_t Size) { + if (Code.size() + Size > std::numeric_limits::max()) { + Success = false; + return; + } + Code.insert(Code.end(), Data, Data + Size); + }; + + /// The opcode is followed by arguments. The source info is + /// attached to the address after the opcode. + emit(reinterpret_cast(&Op), sizeof(Opcode)); + if (SI) + SrcMap.emplace_back(Code.size(), SI); + + /// The initializer list forces the expression to be evaluated + /// for each argument in the variadic template, in order. + (void)std::initializer_list{ + (emit(reinterpret_cast(&Args), sizeof(Args)), 0)...}; + + return Success; +} + +bool ByteCodeEmitter::jumpTrue(const LabelTy &Label) { + return emitJt(getOffset(Label), SourceInfo{}); +} + +bool ByteCodeEmitter::jumpFalse(const LabelTy &Label) { + return emitJf(getOffset(Label), SourceInfo{}); +} + +bool ByteCodeEmitter::jump(const LabelTy &Label) { + return emitJmp(getOffset(Label), SourceInfo{}); +} + +bool ByteCodeEmitter::fallthrough(const LabelTy &Label) { + emitLabel(Label); + return true; +} + +//===----------------------------------------------------------------------===// +// Opcode emitters +//===----------------------------------------------------------------------===// + +#define GET_LINK_IMPL +#include "Opcodes.inc" +#undef GET_LINK_IMPL diff --git a/lib/AST/Interp/ByteCodeEmitter.h b/lib/AST/Interp/ByteCodeEmitter.h new file mode 100644 index 00000000000..03452a350c9 --- /dev/null +++ b/lib/AST/Interp/ByteCodeEmitter.h @@ -0,0 +1,112 @@ +//===--- ByteCodeEmitter.h - Instruction emitter for the VM ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the instruction emitters. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_LINKEMITTER_H +#define LLVM_CLANG_AST_INTERP_LINKEMITTER_H + +#include "ByteCodeGenError.h" +#include "Context.h" +#include "InterpStack.h" +#include "InterpState.h" +#include "PrimType.h" +#include "Program.h" +#include "Source.h" +#include "llvm/Support/Error.h" + +namespace clang { +namespace interp { +class Context; +class SourceInfo; +enum Opcode : uint32_t; + +/// An emitter which links the program to bytecode for later use. +class ByteCodeEmitter { +protected: + using LabelTy = uint32_t; + using AddrTy = uintptr_t; + using Local = Scope::Local; + +public: + /// Compiles the function into the module. + llvm::Expected compileFunc(const FunctionDecl *F); + +protected: + ByteCodeEmitter(Context &Ctx, Program &P) : Ctx(Ctx), P(P) {} + + virtual ~ByteCodeEmitter() {} + + /// Define a label. + void emitLabel(LabelTy Label); + /// Create a label. + LabelTy getLabel() { return ++NextLabel; } + + /// Methods implemented by the compiler. + virtual bool visitFunc(const FunctionDecl *E) = 0; + virtual bool visitExpr(const Expr *E) = 0; + virtual bool visitDecl(const VarDecl *E) = 0; + + /// Bails out if a given node cannot be compiled. + bool bail(const Stmt *S) { return bail(S->getBeginLoc()); } + bool bail(const Decl *D) { return bail(D->getBeginLoc()); } + bool bail(const SourceLocation &Loc); + + /// Emits jumps. + bool jumpTrue(const LabelTy &Label); + bool jumpFalse(const LabelTy &Label); + bool jump(const LabelTy &Label); + bool fallthrough(const LabelTy &Label); + + /// Callback for local registration. + Local createLocal(Descriptor *D); + + /// Parameter indices. + llvm::DenseMap Params; + /// Local descriptors. + llvm::SmallVector, 2> Descriptors; + +private: + /// Current compilation context. + Context &Ctx; + /// Program to link to. + Program &P; + /// Index of the next available label. + LabelTy NextLabel = 0; + /// Offset of the next local variable. + unsigned NextLocalOffset = 0; + /// Location of a failure. + llvm::Optional BailLocation; + /// Label information for linker. + llvm::DenseMap LabelOffsets; + /// Location of label relocations. + llvm::DenseMap> LabelRelocs; + /// Program code. + std::vector Code; + /// Opcode to expression mapping. + SourceMap SrcMap; + + /// Returns the offset for a jump or records a relocation. + int32_t getOffset(LabelTy Label); + + /// Emits an opcode. + template + bool emitOp(Opcode Op, const Tys &... Args, const SourceInfo &L); + +protected: +#define GET_LINK_PROTO +#include "Opcodes.inc" +#undef GET_LINK_PROTO +}; + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/ByteCodeExprGen.cpp b/lib/AST/Interp/ByteCodeExprGen.cpp new file mode 100644 index 00000000000..5c8cb427426 --- /dev/null +++ b/lib/AST/Interp/ByteCodeExprGen.cpp @@ -0,0 +1,580 @@ +//===--- ByteCodeExprGen.cpp - Code generator for expressions ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ByteCodeExprGen.h" +#include "ByteCodeEmitter.h" +#include "ByteCodeGenError.h" +#include "Context.h" +#include "Function.h" +#include "PrimType.h" +#include "Program.h" +#include "State.h" + +using namespace clang; +using namespace clang::interp; + +using APSInt = llvm::APSInt; +template using Expected = llvm::Expected; +template using Optional = llvm::Optional; + +namespace clang { +namespace interp { + +/// Scope used to handle temporaries in toplevel variable declarations. +template class DeclScope final : public LocalScope { +public: + DeclScope(ByteCodeExprGen *Ctx, const VarDecl *VD) + : LocalScope(Ctx), Scope(Ctx->P, VD) {} + + void addExtended(const Scope::Local &Local) override { + return this->addLocal(Local); + } + +private: + Program::DeclScope Scope; +}; + +/// Scope used to handle initialization methods. +template class OptionScope { +public: + using InitFnRef = typename ByteCodeExprGen::InitFnRef; + using ChainedInitFnRef = std::function; + + /// Root constructor, compiling or discarding primitives. + OptionScope(ByteCodeExprGen *Ctx, bool NewDiscardResult) + : Ctx(Ctx), OldDiscardResult(Ctx->DiscardResult), + OldInitFn(std::move(Ctx->InitFn)) { + Ctx->DiscardResult = NewDiscardResult; + Ctx->InitFn = llvm::Optional{}; + } + + /// Root constructor, setting up compilation state. + OptionScope(ByteCodeExprGen *Ctx, InitFnRef NewInitFn) + : Ctx(Ctx), OldDiscardResult(Ctx->DiscardResult), + OldInitFn(std::move(Ctx->InitFn)) { + Ctx->DiscardResult = true; + Ctx->InitFn = NewInitFn; + } + + /// Extends the chain of initialisation pointers. + OptionScope(ByteCodeExprGen *Ctx, ChainedInitFnRef NewInitFn) + : Ctx(Ctx), OldDiscardResult(Ctx->DiscardResult), + OldInitFn(std::move(Ctx->InitFn)) { + assert(OldInitFn && "missing initializer"); + Ctx->InitFn = [this, NewInitFn] { return NewInitFn(*OldInitFn); }; + } + + ~OptionScope() { + Ctx->DiscardResult = OldDiscardResult; + Ctx->InitFn = std::move(OldInitFn); + } + +private: + /// Parent context. + ByteCodeExprGen *Ctx; + /// Old discard flag to restore. + bool OldDiscardResult; + /// Old pointer emitter to restore. + llvm::Optional OldInitFn; +}; + +} // namespace interp +} // namespace clang + +template +bool ByteCodeExprGen::VisitCastExpr(const CastExpr *CE) { + auto *SubExpr = CE->getSubExpr(); + switch (CE->getCastKind()) { + + case CK_LValueToRValue: { + return dereference( + CE->getSubExpr(), DerefKind::Read, + [](PrimType) { + // Value loaded - nothing to do here. + return true; + }, + [this, CE](PrimType T) { + // Pointer on stack - dereference it. + if (!this->emitLoadPop(T, CE)) + return false; + return DiscardResult ? this->emitPop(T, CE) : true; + }); + } + + case CK_ArrayToPointerDecay: + case CK_AtomicToNonAtomic: + case CK_ConstructorConversion: + case CK_FunctionToPointerDecay: + case CK_NonAtomicToAtomic: + case CK_NoOp: + case CK_UserDefinedConversion: + return this->Visit(SubExpr); + + case CK_ToVoid: + return discard(SubExpr); + + default: { + // TODO: implement other casts. + return this->bail(CE); + } + } +} + +template +bool ByteCodeExprGen::VisitIntegerLiteral(const IntegerLiteral *LE) { + if (DiscardResult) + return true; + + auto Val = LE->getValue(); + QualType LitTy = LE->getType(); + if (Optional T = classify(LitTy)) + return emitConst(*T, getIntWidth(LitTy), LE->getValue(), LE); + return this->bail(LE); +} + +template +bool ByteCodeExprGen::VisitParenExpr(const ParenExpr *PE) { + return this->Visit(PE->getSubExpr()); +} + +template +bool ByteCodeExprGen::VisitBinaryOperator(const BinaryOperator *BO) { + const Expr *LHS = BO->getLHS(); + const Expr *RHS = BO->getRHS(); + + // Deal with operations which have composite or void types. + switch (BO->getOpcode()) { + case BO_Comma: + if (!discard(LHS)) + return false; + if (!this->Visit(RHS)) + return false; + return true; + default: + break; + } + + // Typecheck the args. + Optional LT = classify(LHS->getType()); + Optional RT = classify(RHS->getType()); + if (!LT || !RT) { + return this->bail(BO); + } + + if (Optional T = classify(BO->getType())) { + if (!visit(LHS)) + return false; + if (!visit(RHS)) + return false; + + auto Discard = [this, T, BO](bool Result) { + if (!Result) + return false; + return DiscardResult ? this->emitPop(*T, BO) : true; + }; + + switch (BO->getOpcode()) { + case BO_EQ: + return Discard(this->emitEQ(*LT, BO)); + case BO_NE: + return Discard(this->emitNE(*LT, BO)); + case BO_LT: + return Discard(this->emitLT(*LT, BO)); + case BO_LE: + return Discard(this->emitLE(*LT, BO)); + case BO_GT: + return Discard(this->emitGT(*LT, BO)); + case BO_GE: + return Discard(this->emitGE(*LT, BO)); + case BO_Sub: + return Discard(this->emitSub(*T, BO)); + case BO_Add: + return Discard(this->emitAdd(*T, BO)); + case BO_Mul: + return Discard(this->emitMul(*T, BO)); + default: + return this->bail(BO); + } + } + + return this->bail(BO); +} + +template +bool ByteCodeExprGen::discard(const Expr *E) { + OptionScope Scope(this, /*discardResult=*/true); + return this->Visit(E); +} + +template +bool ByteCodeExprGen::visit(const Expr *E) { + OptionScope Scope(this, /*discardResult=*/false); + return this->Visit(E); +} + +template +bool ByteCodeExprGen::visitBool(const Expr *E) { + if (Optional T = classify(E->getType())) { + return visit(E); + } else { + return this->bail(E); + } +} + +template +bool ByteCodeExprGen::visitZeroInitializer(PrimType T, const Expr *E) { + switch (T) { + case PT_Bool: + return this->emitZeroBool(E); + case PT_Sint8: + return this->emitZeroSint8(E); + case PT_Uint8: + return this->emitZeroUint8(E); + case PT_Sint16: + return this->emitZeroSint16(E); + case PT_Uint16: + return this->emitZeroUint16(E); + case PT_Sint32: + return this->emitZeroSint32(E); + case PT_Uint32: + return this->emitZeroUint32(E); + case PT_Sint64: + return this->emitZeroSint64(E); + case PT_Uint64: + return this->emitZeroUint64(E); + case PT_Ptr: + return this->emitNullPtr(E); + } + llvm_unreachable("unknown primitive type"); +} + +template +bool ByteCodeExprGen::dereference( + const Expr *LV, DerefKind AK, llvm::function_ref Direct, + llvm::function_ref Indirect) { + if (Optional T = classify(LV->getType())) { + if (!LV->refersToBitField()) { + // Only primitive, non bit-field types can be dereferenced directly. + if (auto *DE = dyn_cast(LV)) { + if (!DE->getDecl()->getType()->isReferenceType()) { + if (auto *PD = dyn_cast(DE->getDecl())) + return dereferenceParam(LV, *T, PD, AK, Direct, Indirect); + if (auto *VD = dyn_cast(DE->getDecl())) + return dereferenceVar(LV, *T, VD, AK, Direct, Indirect); + } + } + } + + if (!visit(LV)) + return false; + return Indirect(*T); + } + + return false; +} + +template +bool ByteCodeExprGen::dereferenceParam( + const Expr *LV, PrimType T, const ParmVarDecl *PD, DerefKind AK, + llvm::function_ref Direct, + llvm::function_ref Indirect) { + auto It = this->Params.find(PD); + if (It != this->Params.end()) { + unsigned Idx = It->second; + switch (AK) { + case DerefKind::Read: + return DiscardResult ? true : this->emitGetParam(T, Idx, LV); + + case DerefKind::Write: + if (!Direct(T)) + return false; + if (!this->emitSetParam(T, Idx, LV)) + return false; + return DiscardResult ? true : this->emitGetPtrParam(Idx, LV); + + case DerefKind::ReadWrite: + if (!this->emitGetParam(T, Idx, LV)) + return false; + if (!Direct(T)) + return false; + if (!this->emitSetParam(T, Idx, LV)) + return false; + return DiscardResult ? true : this->emitGetPtrParam(Idx, LV); + } + return true; + } + + // If the param is a pointer, we can dereference a dummy value. + if (!DiscardResult && T == PT_Ptr && AK == DerefKind::Read) { + if (auto Idx = P.getOrCreateDummy(PD)) + return this->emitGetPtrGlobal(*Idx, PD); + return false; + } + + // Value cannot be produced - try to emit pointer and do stuff with it. + return visit(LV) && Indirect(T); +} + +template +bool ByteCodeExprGen::dereferenceVar( + const Expr *LV, PrimType T, const VarDecl *VD, DerefKind AK, + llvm::function_ref Direct, + llvm::function_ref Indirect) { + auto It = Locals.find(VD); + if (It != Locals.end()) { + const auto &L = It->second; + switch (AK) { + case DerefKind::Read: + if (!this->emitGetLocal(T, L.Offset, LV)) + return false; + return DiscardResult ? this->emitPop(T, LV) : true; + + case DerefKind::Write: + if (!Direct(T)) + return false; + if (!this->emitSetLocal(T, L.Offset, LV)) + return false; + return DiscardResult ? true : this->emitGetPtrLocal(L.Offset, LV); + + case DerefKind::ReadWrite: + if (!this->emitGetLocal(T, L.Offset, LV)) + return false; + if (!Direct(T)) + return false; + if (!this->emitSetLocal(T, L.Offset, LV)) + return false; + return DiscardResult ? true : this->emitGetPtrLocal(L.Offset, LV); + } + } else if (auto Idx = getGlobalIdx(VD)) { + switch (AK) { + case DerefKind::Read: + if (!this->emitGetGlobal(T, *Idx, LV)) + return false; + return DiscardResult ? this->emitPop(T, LV) : true; + + case DerefKind::Write: + if (!Direct(T)) + return false; + if (!this->emitSetGlobal(T, *Idx, LV)) + return false; + return DiscardResult ? true : this->emitGetPtrGlobal(*Idx, LV); + + case DerefKind::ReadWrite: + if (!this->emitGetGlobal(T, *Idx, LV)) + return false; + if (!Direct(T)) + return false; + if (!this->emitSetGlobal(T, *Idx, LV)) + return false; + return DiscardResult ? true : this->emitGetPtrGlobal(*Idx, LV); + } + } + + // If the declaration is a constant value, emit it here even + // though the declaration was not evaluated in the current scope. + // The access mode can only be read in this case. + if (!DiscardResult && AK == DerefKind::Read) { + if (VD->hasLocalStorage() && VD->hasInit() && !VD->isConstexpr()) { + QualType VT = VD->getType(); + if (VT.isConstQualified() && VT->isFundamentalType()) + return this->Visit(VD->getInit()); + } + } + + // Value cannot be produced - try to emit pointer. + return visit(LV) && Indirect(T); +} + +template +bool ByteCodeExprGen::emitConst(PrimType T, unsigned NumBits, + const APInt &Value, const Expr *E) { + switch (T) { + case PT_Sint8: + return this->emitConstSint8(Value.getSExtValue(), E); + case PT_Uint8: + return this->emitConstUint8(Value.getZExtValue(), E); + case PT_Sint16: + return this->emitConstSint16(Value.getSExtValue(), E); + case PT_Uint16: + return this->emitConstUint16(Value.getZExtValue(), E); + case PT_Sint32: + return this->emitConstSint32(Value.getSExtValue(), E); + case PT_Uint32: + return this->emitConstUint32(Value.getZExtValue(), E); + case PT_Sint64: + return this->emitConstSint64(Value.getSExtValue(), E); + case PT_Uint64: + return this->emitConstUint64(Value.getZExtValue(), E); + case PT_Bool: + return this->emitConstBool(Value.getBoolValue(), E); + case PT_Ptr: + llvm_unreachable("Invalid integral type"); + break; + } + llvm_unreachable("unknown primitive type"); +} + +template +unsigned ByteCodeExprGen::allocateLocalPrimitive(DeclTy &&Src, + PrimType Ty, + bool IsConst, + bool IsExtended) { + Descriptor *D = P.createDescriptor(Src, Ty, IsConst, Src.is()); + Scope::Local Local = this->createLocal(D); + if (auto *VD = dyn_cast_or_null(Src.dyn_cast())) + Locals.insert({VD, Local}); + VarScope->add(Local, IsExtended); + return Local.Offset; +} + +template +llvm::Optional +ByteCodeExprGen::allocateLocal(DeclTy &&Src, bool IsExtended) { + QualType Ty; + + const ValueDecl *Key = nullptr; + bool IsTemporary = false; + if (auto *VD = dyn_cast_or_null(Src.dyn_cast())) { + Key = VD; + Ty = VD->getType(); + } + if (auto *E = Src.dyn_cast()) { + IsTemporary = true; + Ty = E->getType(); + } + + Descriptor *D = P.createDescriptor(Src, Ty.getTypePtr(), + Ty.isConstQualified(), IsTemporary); + if (!D) + return {}; + + Scope::Local Local = this->createLocal(D); + if (Key) + Locals.insert({Key, Local}); + VarScope->add(Local, IsExtended); + return Local.Offset; +} + +template +bool ByteCodeExprGen::visitInitializer( + const Expr *Init, InitFnRef InitFn) { + OptionScope Scope(this, InitFn); + return this->Visit(Init); +} + +template +bool ByteCodeExprGen::getPtrVarDecl(const VarDecl *VD, const Expr *E) { + // Generate a pointer to the local, loading refs. + if (Optional Idx = getGlobalIdx(VD)) { + if (VD->getType()->isReferenceType()) + return this->emitGetGlobalPtr(*Idx, E); + else + return this->emitGetPtrGlobal(*Idx, E); + } + return this->bail(VD); +} + +template +llvm::Optional +ByteCodeExprGen::getGlobalIdx(const VarDecl *VD) { + if (VD->isConstexpr()) { + // Constexpr decl - it must have already been defined. + return P.getGlobal(VD); + } + if (!VD->hasLocalStorage()) { + // Not constexpr, but a global var - can have pointer taken. + Program::DeclScope Scope(P, VD); + return P.getOrCreateGlobal(VD); + } + return {}; +} + +template +const RecordType *ByteCodeExprGen::getRecordTy(QualType Ty) { + if (auto *PT = dyn_cast(Ty)) + return PT->getPointeeType()->getAs(); + else + return Ty->getAs(); +} + +template +Record *ByteCodeExprGen::getRecord(QualType Ty) { + if (auto *RecordTy = getRecordTy(Ty)) { + return getRecord(RecordTy->getDecl()); + } + return nullptr; +} + +template +Record *ByteCodeExprGen::getRecord(const RecordDecl *RD) { + return P.getOrCreateRecord(RD); +} + +template +bool ByteCodeExprGen::visitExpr(const Expr *Exp) { + ExprScope RootScope(this); + if (!visit(Exp)) + return false; + + if (Optional T = classify(Exp)) + return this->emitRet(*T, Exp); + else + return this->emitRetValue(Exp); +} + +template +bool ByteCodeExprGen::visitDecl(const VarDecl *VD) { + const Expr *Init = VD->getInit(); + + if (Optional I = P.createGlobal(VD)) { + if (Optional T = classify(VD->getType())) { + { + // Primitive declarations - compute the value and set it. + DeclScope LocalScope(this, VD); + if (!visit(Init)) + return false; + } + + // If the declaration is global, save the value for later use. + if (!this->emitDup(*T, VD)) + return false; + if (!this->emitInitGlobal(*T, *I, VD)) + return false; + return this->emitRet(*T, VD); + } else { + { + // Composite declarations - allocate storage and initialize it. + DeclScope LocalScope(this, VD); + if (!visitGlobalInitializer(Init, *I)) + return false; + } + + // Return a pointer to the global. + if (!this->emitGetPtrGlobal(*I, VD)) + return false; + return this->emitRetValue(VD); + } + } + + return this->bail(VD); +} + +template +void ByteCodeExprGen::emitCleanup() { + for (VariableScope *C = VarScope; C; C = C->getParent()) + C->emitDestruction(); +} + +namespace clang { +namespace interp { + +template class ByteCodeExprGen; +template class ByteCodeExprGen; + +} // namespace interp +} // namespace clang diff --git a/lib/AST/Interp/ByteCodeExprGen.h b/lib/AST/Interp/ByteCodeExprGen.h new file mode 100644 index 00000000000..1d0e34fc991 --- /dev/null +++ b/lib/AST/Interp/ByteCodeExprGen.h @@ -0,0 +1,331 @@ +//===--- ByteCodeExprGen.h - Code generator for expressions -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the constexpr bytecode compiler. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_BYTECODEEXPRGEN_H +#define LLVM_CLANG_AST_INTERP_BYTECODEEXPRGEN_H + +#include "ByteCodeEmitter.h" +#include "EvalEmitter.h" +#include "Pointer.h" +#include "PrimType.h" +#include "Record.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/StmtVisitor.h" +#include "llvm/ADT/Optional.h" + +namespace clang { +class QualType; + +namespace interp { +class Function; +class State; + +template class LocalScope; +template class RecordScope; +template class VariableScope; +template class DeclScope; +template class OptionScope; + +/// Compilation context for expressions. +template +class ByteCodeExprGen : public ConstStmtVisitor, bool>, + public Emitter { +protected: + // Emitters for opcodes of various arities. + using NullaryFn = bool (ByteCodeExprGen::*)(const SourceInfo &); + using UnaryFn = bool (ByteCodeExprGen::*)(PrimType, const SourceInfo &); + using BinaryFn = bool (ByteCodeExprGen::*)(PrimType, PrimType, + const SourceInfo &); + + // Aliases for types defined in the emitter. + using LabelTy = typename Emitter::LabelTy; + using AddrTy = typename Emitter::AddrTy; + + // Reference to a function generating the pointer of an initialized object.s + using InitFnRef = std::function; + + /// Current compilation context. + Context &Ctx; + /// Program to link to. + Program &P; + +public: + /// Initializes the compiler and the backend emitter. + template + ByteCodeExprGen(Context &Ctx, Program &P, Tys &&... Args) + : Emitter(Ctx, P, Args...), Ctx(Ctx), P(P) {} + + // Expression visitors - result returned on stack. + bool VisitCastExpr(const CastExpr *E); + bool VisitIntegerLiteral(const IntegerLiteral *E); + bool VisitParenExpr(const ParenExpr *E); + bool VisitBinaryOperator(const BinaryOperator *E); + +protected: + bool visitExpr(const Expr *E) override; + bool visitDecl(const VarDecl *VD) override; + +protected: + /// Emits scope cleanup instructions. + void emitCleanup(); + + /// Returns a record type from a record or pointer type. + const RecordType *getRecordTy(QualType Ty); + + /// Returns a record from a record or pointer type. + Record *getRecord(QualType Ty); + Record *getRecord(const RecordDecl *RD); + + /// Returns the size int bits of an integer. + unsigned getIntWidth(QualType Ty) { + auto &ASTContext = Ctx.getASTContext(); + return ASTContext.getIntWidth(Ty); + } + + /// Returns the value of CHAR_BIT. + unsigned getCharBit() const { + auto &ASTContext = Ctx.getASTContext(); + return ASTContext.getTargetInfo().getCharWidth(); + } + + /// Classifies a type. + llvm::Optional classify(const Expr *E) const { + return E->isGLValue() ? PT_Ptr : classify(E->getType()); + } + llvm::Optional classify(QualType Ty) const { + return Ctx.classify(Ty); + } + + /// Checks if a pointer needs adjustment. + bool needsAdjust(QualType Ty) const { + return true; + } + + /// Classifies a known primitive type + PrimType classifyPrim(QualType Ty) const { + if (auto T = classify(Ty)) { + return *T; + } + llvm_unreachable("not a primitive type"); + } + + /// Evaluates an expression for side effects and discards the result. + bool discard(const Expr *E); + /// Evaluates an expression and places result on stack. + bool visit(const Expr *E); + /// Compiles an initializer for a local. + bool visitInitializer(const Expr *E, InitFnRef GenPtr); + + /// Visits an expression and converts it to a boolean. + bool visitBool(const Expr *E); + + /// Visits an initializer for a local. + bool visitLocalInitializer(const Expr *Init, unsigned I) { + return visitInitializer(Init, [this, I, Init] { + return this->emitGetPtrLocal(I, Init); + }); + } + + /// Visits an initializer for a global. + bool visitGlobalInitializer(const Expr *Init, unsigned I) { + return visitInitializer(Init, [this, I, Init] { + return this->emitGetPtrGlobal(I, Init); + }); + } + + /// Visits a delegated initializer. + bool visitThisInitializer(const Expr *I) { + return visitInitializer(I, [this, I] { return this->emitThis(I); }); + } + + /// Creates a local primitive value. + unsigned allocateLocalPrimitive(DeclTy &&Decl, PrimType Ty, bool IsMutable, + bool IsExtended = false); + + /// Allocates a space storing a local given its type. + llvm::Optional allocateLocal(DeclTy &&Decl, + bool IsExtended = false); + +private: + friend class VariableScope; + friend class LocalScope; + friend class RecordScope; + friend class DeclScope; + friend class OptionScope; + + /// Emits a zero initializer. + bool visitZeroInitializer(PrimType T, const Expr *E); + + enum class DerefKind { + /// Value is read and pushed to stack. + Read, + /// Direct method generates a value which is written. Returns pointer. + Write, + /// Direct method receives the value, pushes mutated value. Returns pointer. + ReadWrite, + }; + + /// Method to directly load a value. If the value can be fetched directly, + /// the direct handler is called. Otherwise, a pointer is left on the stack + /// and the indirect handler is expected to operate on that. + bool dereference(const Expr *LV, DerefKind AK, + llvm::function_ref Direct, + llvm::function_ref Indirect); + bool dereferenceParam(const Expr *LV, PrimType T, const ParmVarDecl *PD, + DerefKind AK, + llvm::function_ref Direct, + llvm::function_ref Indirect); + bool dereferenceVar(const Expr *LV, PrimType T, const VarDecl *PD, + DerefKind AK, llvm::function_ref Direct, + llvm::function_ref Indirect); + + /// Emits an APInt constant. + bool emitConst(PrimType T, unsigned NumBits, const llvm::APInt &Value, + const Expr *E); + + /// Emits an integer constant. + template bool emitConst(const Expr *E, T Value) { + QualType Ty = E->getType(); + unsigned NumBits = getIntWidth(Ty); + APInt WrappedValue(NumBits, Value, std::is_signed::value); + return emitConst(*Ctx.classify(Ty), NumBits, WrappedValue, E); + } + + /// Returns a pointer to a variable declaration. + bool getPtrVarDecl(const VarDecl *VD, const Expr *E); + + /// Returns the index of a global. + llvm::Optional getGlobalIdx(const VarDecl *VD); + + /// Emits the initialized pointer. + bool emitInitFn() { + assert(InitFn && "missing initializer"); + return (*InitFn)(); + } + +protected: + /// Variable to storage mapping. + llvm::DenseMap Locals; + + /// OpaqueValueExpr to location mapping. + llvm::DenseMap OpaqueExprs; + + /// Current scope. + VariableScope *VarScope = nullptr; + + /// Current argument index. + llvm::Optional ArrayIndex; + + /// Flag indicating if return value is to be discarded. + bool DiscardResult = false; + + /// Expression being initialized. + llvm::Optional InitFn = {}; +}; + +extern template class ByteCodeExprGen; +extern template class ByteCodeExprGen; + +/// Scope chain managing the variable lifetimes. +template class VariableScope { +public: + virtual ~VariableScope() { Ctx->VarScope = this->Parent; } + + void add(const Scope::Local &Local, bool IsExtended) { + if (IsExtended) + this->addExtended(Local); + else + this->addLocal(Local); + } + + virtual void addLocal(const Scope::Local &Local) { + if (this->Parent) + this->Parent->addLocal(Local); + } + + virtual void addExtended(const Scope::Local &Local) { + if (this->Parent) + this->Parent->addExtended(Local); + } + + virtual void emitDestruction() {} + + VariableScope *getParent() { return Parent; } + +protected: + VariableScope(ByteCodeExprGen *Ctx) + : Ctx(Ctx), Parent(Ctx->VarScope) { + Ctx->VarScope = this; + } + + /// ByteCodeExprGen instance. + ByteCodeExprGen *Ctx; + /// Link to the parent scope. + VariableScope *Parent; +}; + +/// Scope for local variables. +/// +/// When the scope is destroyed, instructions are emitted to tear down +/// all variables declared in this scope. +template class LocalScope : public VariableScope { +public: + LocalScope(ByteCodeExprGen *Ctx) : VariableScope(Ctx) {} + + ~LocalScope() override { this->emitDestruction(); } + + void addLocal(const Scope::Local &Local) override { + if (!Idx.hasValue()) { + Idx = this->Ctx->Descriptors.size(); + this->Ctx->Descriptors.emplace_back(); + } + + this->Ctx->Descriptors[*Idx].emplace_back(Local); + } + + void emitDestruction() override { + if (!Idx.hasValue()) + return; + this->Ctx->emitDestroy(*Idx, SourceInfo{}); + } + +protected: + /// Index of the scope in the chain. + Optional Idx; +}; + +/// Scope for storage declared in a compound statement. +template class BlockScope final : public LocalScope { +public: + BlockScope(ByteCodeExprGen *Ctx) : LocalScope(Ctx) {} + + void addExtended(const Scope::Local &Local) override { + llvm_unreachable("Cannot create temporaries in full scopes"); + } +}; + +/// Expression scope which tracks potentially lifetime extended +/// temporaries which are hoisted to the parent scope on exit. +template class ExprScope final : public LocalScope { +public: + ExprScope(ByteCodeExprGen *Ctx) : LocalScope(Ctx) {} + + void addExtended(const Scope::Local &Local) override { + this->Parent->addLocal(Local); + } +}; + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/ByteCodeGenError.cpp b/lib/AST/Interp/ByteCodeGenError.cpp new file mode 100644 index 00000000000..5fd3d77c384 --- /dev/null +++ b/lib/AST/Interp/ByteCodeGenError.cpp @@ -0,0 +1,14 @@ +//===--- ByteCodeGenError.h - Byte code generation error --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ByteCodeGenError.h" + +using namespace clang; +using namespace clang::interp; + +char ByteCodeGenError::ID; diff --git a/lib/AST/Interp/ByteCodeGenError.h b/lib/AST/Interp/ByteCodeGenError.h new file mode 100644 index 00000000000..a4fa4917705 --- /dev/null +++ b/lib/AST/Interp/ByteCodeGenError.h @@ -0,0 +1,46 @@ +//===--- ByteCodeGenError.h - Byte code generation error ----------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_BYTECODEGENERROR_H +#define LLVM_CLANG_AST_INTERP_BYTECODEGENERROR_H + +#include "clang/AST/Decl.h" +#include "clang/AST/Stmt.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/Support/Error.h" + +namespace clang { +namespace interp { + +/// Error thrown by the compiler. +struct ByteCodeGenError : public llvm::ErrorInfo { +public: + ByteCodeGenError(SourceLocation Loc) : Loc(Loc) {} + ByteCodeGenError(const Stmt *S) : ByteCodeGenError(S->getBeginLoc()) {} + ByteCodeGenError(const Decl *D) : ByteCodeGenError(D->getBeginLoc()) {} + + void log(raw_ostream &OS) const override { OS << "unimplemented feature"; } + + const SourceLocation &getLoc() const { return Loc; } + + static char ID; + +private: + // Start of the item where the error occurred. + SourceLocation Loc; + + // Users are not expected to use error_code. + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } +}; + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/ByteCodeStmtGen.cpp b/lib/AST/Interp/ByteCodeStmtGen.cpp new file mode 100644 index 00000000000..c71301598bd --- /dev/null +++ b/lib/AST/Interp/ByteCodeStmtGen.cpp @@ -0,0 +1,265 @@ +//===--- ByteCodeStmtGen.cpp - Code generator for expressions ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ByteCodeStmtGen.h" +#include "ByteCodeEmitter.h" +#include "ByteCodeGenError.h" +#include "Context.h" +#include "Function.h" +#include "PrimType.h" +#include "Program.h" +#include "State.h" + +using namespace clang; +using namespace clang::interp; + +template using Expected = llvm::Expected; +template using Optional = llvm::Optional; + +namespace clang { +namespace interp { + +/// Scope managing label targets. +template class LabelScope { +public: + virtual ~LabelScope() { } + +protected: + LabelScope(ByteCodeStmtGen *Ctx) : Ctx(Ctx) {} + /// ByteCodeStmtGen instance. + ByteCodeStmtGen *Ctx; +}; + +/// Sets the context for break/continue statements. +template class LoopScope final : public LabelScope { +public: + using LabelTy = typename ByteCodeStmtGen::LabelTy; + using OptLabelTy = typename ByteCodeStmtGen::OptLabelTy; + + LoopScope(ByteCodeStmtGen *Ctx, LabelTy BreakLabel, + LabelTy ContinueLabel) + : LabelScope(Ctx), OldBreakLabel(Ctx->BreakLabel), + OldContinueLabel(Ctx->ContinueLabel) { + this->Ctx->BreakLabel = BreakLabel; + this->Ctx->ContinueLabel = ContinueLabel; + } + + ~LoopScope() { + this->Ctx->BreakLabel = OldBreakLabel; + this->Ctx->ContinueLabel = OldContinueLabel; + } + +private: + OptLabelTy OldBreakLabel; + OptLabelTy OldContinueLabel; +}; + +// Sets the context for a switch scope, mapping labels. +template class SwitchScope final : public LabelScope { +public: + using LabelTy = typename ByteCodeStmtGen::LabelTy; + using OptLabelTy = typename ByteCodeStmtGen::OptLabelTy; + using CaseMap = typename ByteCodeStmtGen::CaseMap; + + SwitchScope(ByteCodeStmtGen *Ctx, CaseMap &&CaseLabels, + LabelTy BreakLabel, OptLabelTy DefaultLabel) + : LabelScope(Ctx), OldBreakLabel(Ctx->BreakLabel), + OldDefaultLabel(this->Ctx->DefaultLabel), + OldCaseLabels(std::move(this->Ctx->CaseLabels)) { + this->Ctx->BreakLabel = BreakLabel; + this->Ctx->DefaultLabel = DefaultLabel; + this->Ctx->CaseLabels = std::move(CaseLabels); + } + + ~SwitchScope() { + this->Ctx->BreakLabel = OldBreakLabel; + this->Ctx->DefaultLabel = OldDefaultLabel; + this->Ctx->CaseLabels = std::move(OldCaseLabels); + } + +private: + OptLabelTy OldBreakLabel; + OptLabelTy OldDefaultLabel; + CaseMap OldCaseLabels; +}; + +} // namespace interp +} // namespace clang + +template +bool ByteCodeStmtGen::visitFunc(const FunctionDecl *F) { + // Classify the return type. + ReturnType = this->classify(F->getReturnType()); + + // Set up fields and context if a constructor. + if (auto *MD = dyn_cast(F)) + return this->bail(MD); + + if (auto *Body = F->getBody()) + if (!visitStmt(Body)) + return false; + + // Emit a guard return to protect against a code path missing one. + if (F->getReturnType()->isVoidType()) + return this->emitRetVoid(SourceInfo{}); + else + return this->emitNoRet(SourceInfo{}); +} + +template +bool ByteCodeStmtGen::visitStmt(const Stmt *S) { + switch (S->getStmtClass()) { + case Stmt::CompoundStmtClass: + return visitCompoundStmt(cast(S)); + case Stmt::DeclStmtClass: + return visitDeclStmt(cast(S)); + case Stmt::ReturnStmtClass: + return visitReturnStmt(cast(S)); + case Stmt::IfStmtClass: + return visitIfStmt(cast(S)); + case Stmt::NullStmtClass: + return true; + default: { + if (auto *Exp = dyn_cast(S)) + return this->discard(Exp); + return this->bail(S); + } + } +} + +template +bool ByteCodeStmtGen::visitCompoundStmt( + const CompoundStmt *CompoundStmt) { + BlockScope Scope(this); + for (auto *InnerStmt : CompoundStmt->body()) + if (!visitStmt(InnerStmt)) + return false; + return true; +} + +template +bool ByteCodeStmtGen::visitDeclStmt(const DeclStmt *DS) { + for (auto *D : DS->decls()) { + // Variable declarator. + if (auto *VD = dyn_cast(D)) { + if (!visitVarDecl(VD)) + return false; + continue; + } + + // Decomposition declarator. + if (auto *DD = dyn_cast(D)) { + return this->bail(DD); + } + } + + return true; +} + +template +bool ByteCodeStmtGen::visitReturnStmt(const ReturnStmt *RS) { + if (const Expr *RE = RS->getRetValue()) { + ExprScope RetScope(this); + if (ReturnType) { + // Primitive types are simply returned. + if (!this->visit(RE)) + return false; + this->emitCleanup(); + return this->emitRet(*ReturnType, RS); + } else { + // RVO - construct the value in the return location. + auto ReturnLocation = [this, RE] { return this->emitGetParamPtr(0, RE); }; + if (!this->visitInitializer(RE, ReturnLocation)) + return false; + this->emitCleanup(); + return this->emitRetVoid(RS); + } + } else { + this->emitCleanup(); + if (!this->emitRetVoid(RS)) + return false; + return true; + } +} + +template +bool ByteCodeStmtGen::visitIfStmt(const IfStmt *IS) { + BlockScope IfScope(this); + if (auto *CondInit = IS->getInit()) + if (!visitStmt(IS->getInit())) + return false; + + if (const DeclStmt *CondDecl = IS->getConditionVariableDeclStmt()) + if (!visitDeclStmt(CondDecl)) + return false; + + if (!this->visitBool(IS->getCond())) + return false; + + if (const Stmt *Else = IS->getElse()) { + LabelTy LabelElse = this->getLabel(); + LabelTy LabelEnd = this->getLabel(); + if (!this->jumpFalse(LabelElse)) + return false; + if (!visitStmt(IS->getThen())) + return false; + if (!this->jump(LabelEnd)) + return false; + this->emitLabel(LabelElse); + if (!visitStmt(Else)) + return false; + this->emitLabel(LabelEnd); + } else { + LabelTy LabelEnd = this->getLabel(); + if (!this->jumpFalse(LabelEnd)) + return false; + if (!visitStmt(IS->getThen())) + return false; + this->emitLabel(LabelEnd); + } + + return true; +} + +template +bool ByteCodeStmtGen::visitVarDecl(const VarDecl *VD) { + auto DT = VD->getType(); + + if (!VD->hasLocalStorage()) { + // No code generation required. + return true; + } + + // Integers, pointers, primitives. + if (Optional T = this->classify(DT)) { + auto Off = this->allocateLocalPrimitive(VD, *T, DT.isConstQualified()); + // Compile the initialiser in its own scope. + { + ExprScope Scope(this); + if (!this->visit(VD->getInit())) + return false; + } + // Set the value. + return this->emitSetLocal(*T, Off, VD); + } else { + // Composite types - allocate storage and initialize it. + if (auto Off = this->allocateLocal(VD)) { + return this->visitLocalInitializer(VD->getInit(), *Off); + } else { + return this->bail(VD); + } + } +} + +namespace clang { +namespace interp { + +template class ByteCodeStmtGen; + +} // namespace interp +} // namespace clang diff --git a/lib/AST/Interp/ByteCodeStmtGen.h b/lib/AST/Interp/ByteCodeStmtGen.h new file mode 100644 index 00000000000..d9c0b64ed4b --- /dev/null +++ b/lib/AST/Interp/ByteCodeStmtGen.h @@ -0,0 +1,89 @@ +//===--- ByteCodeStmtGen.h - Code generator for expressions -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the constexpr bytecode compiler. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_BYTECODESTMTGEN_H +#define LLVM_CLANG_AST_INTERP_BYTECODESTMTGEN_H + +#include "ByteCodeEmitter.h" +#include "ByteCodeExprGen.h" +#include "EvalEmitter.h" +#include "Pointer.h" +#include "PrimType.h" +#include "Record.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/StmtVisitor.h" +#include "llvm/ADT/Optional.h" + +namespace clang { +class QualType; + +namespace interp { +class Function; +class State; + +template class LoopScope; +template class SwitchScope; +template class LabelScope; + +/// Compilation context for statements. +template +class ByteCodeStmtGen : public ByteCodeExprGen { + using LabelTy = typename Emitter::LabelTy; + using AddrTy = typename Emitter::AddrTy; + using OptLabelTy = llvm::Optional; + using CaseMap = llvm::DenseMap; + +public: + template + ByteCodeStmtGen(Tys&&... Args) + : ByteCodeExprGen(std::forward(Args)...) {} + +protected: + bool visitFunc(const FunctionDecl *F) override; + +private: + friend class LabelScope; + friend class LoopScope; + friend class SwitchScope; + + // Statement visitors. + bool visitStmt(const Stmt *S); + bool visitCompoundStmt(const CompoundStmt *S); + bool visitDeclStmt(const DeclStmt *DS); + bool visitReturnStmt(const ReturnStmt *RS); + bool visitIfStmt(const IfStmt *IS); + + /// Compiles a variable declaration. + bool visitVarDecl(const VarDecl *VD); + +private: + /// Type of the expression returned by the function. + llvm::Optional ReturnType; + + /// Switch case mapping. + CaseMap CaseLabels; + + /// Point to break to. + OptLabelTy BreakLabel; + /// Point to continue to. + OptLabelTy ContinueLabel; + /// Default case label. + OptLabelTy DefaultLabel; +}; + +extern template class ByteCodeExprGen; + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/Context.cpp b/lib/AST/Interp/Context.cpp new file mode 100644 index 00000000000..4f8f7b96e7c --- /dev/null +++ b/lib/AST/Interp/Context.cpp @@ -0,0 +1,148 @@ +//===--- Context.cpp - Context for the constexpr VM -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Context.h" +#include "ByteCodeEmitter.h" +#include "ByteCodeExprGen.h" +#include "ByteCodeStmtGen.h" +#include "EvalEmitter.h" +#include "Interp.h" +#include "InterpFrame.h" +#include "InterpStack.h" +#include "PrimType.h" +#include "Program.h" +#include "clang/AST/Expr.h" + +using namespace clang; +using namespace clang::interp; + +Context::Context(ASTContext &Ctx) + : Ctx(Ctx), ForceInterp(getLangOpts().ForceNewConstInterp), + P(new Program(*this)) {} + +Context::~Context() {} + +InterpResult Context::isPotentialConstantExpr(State &Parent, + const FunctionDecl *FD) { + Function *Func = P->getFunction(FD); + if (!Func) { + if (auto R = ByteCodeStmtGen(*this, *P).compileFunc(FD)) { + Func = *R; + } else if (ForceInterp) { + handleAllErrors(R.takeError(), [&Parent](ByteCodeGenError &Err) { + Parent.FFDiag(Err.getLoc(), diag::err_experimental_clang_interp_failed); + }); + return InterpResult::Fail; + } else { + consumeError(R.takeError()); + return InterpResult::Bail; + } + } + + if (!Func->isConstexpr()) + return InterpResult::Fail; + + APValue Dummy; + return Run(Parent, Func, Dummy); +} + +InterpResult Context::evaluateAsRValue(State &Parent, const Expr *E, + APValue &Result) { + ByteCodeExprGen C(*this, *P, Parent, Stk, Result); + return Check(Parent, C.interpretExpr(E)); +} + +InterpResult Context::evaluateAsInitializer(State &Parent, const VarDecl *VD, + APValue &Result) { + ByteCodeExprGen C(*this, *P, Parent, Stk, Result); + return Check(Parent, C.interpretDecl(VD)); +} + +const LangOptions &Context::getLangOpts() const { return Ctx.getLangOpts(); } + +llvm::Optional Context::classify(QualType T) { + if (T->isReferenceType() || T->isPointerType()) { + return PT_Ptr; + } + + if (T->isBooleanType()) + return PT_Bool; + + if (T->isSignedIntegerOrEnumerationType()) { + switch (Ctx.getIntWidth(T)) { + case 64: + return PT_Sint64; + case 32: + return PT_Sint32; + case 16: + return PT_Sint16; + case 8: + return PT_Sint8; + default: + return {}; + } + } + + if (T->isUnsignedIntegerOrEnumerationType()) { + switch (Ctx.getIntWidth(T)) { + case 64: + return PT_Uint64; + case 32: + return PT_Uint32; + case 16: + return PT_Uint16; + case 8: + return PT_Uint8; + default: + return {}; + } + } + + if (T->isNullPtrType()) + return PT_Ptr; + + if (auto *AT = dyn_cast(T)) + return classify(AT->getValueType()); + + return {}; +} + +unsigned Context::getCharBit() const { + return Ctx.getTargetInfo().getCharWidth(); +} + +InterpResult Context::Run(State &Parent, Function *Func, APValue &Result) { + InterpResult Flag; + { + InterpState State(Parent, *P, Stk, *this); + State.Current = new InterpFrame(State, Func, nullptr, {}, {}); + if (Interpret(State, Result)) { + Flag = InterpResult::Success; + } else { + Flag = InterpResult::Fail; + } + } + + if (Flag != InterpResult::Success) + Stk.clear(); + return Flag; +} + +InterpResult Context::Check(State &Parent, llvm::Expected &&R) { + if (R) { + return *R ? InterpResult::Success : InterpResult::Fail; + } else if (ForceInterp) { + handleAllErrors(R.takeError(), [&Parent](ByteCodeGenError &Err) { + Parent.FFDiag(Err.getLoc(), diag::err_experimental_clang_interp_failed); + }); + return InterpResult::Fail; + } else { + consumeError(R.takeError()); + return InterpResult::Bail; + } +} diff --git a/lib/AST/Interp/Context.h b/lib/AST/Interp/Context.h new file mode 100644 index 00000000000..96368b6e5f0 --- /dev/null +++ b/lib/AST/Interp/Context.h @@ -0,0 +1,100 @@ +//===--- Context.h - Context for the constexpr VM ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the constexpr execution context. +// +// The execution context manages cached bytecode and the global context. +// It invokes the compiler and interpreter, propagating errors. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_CONTEXT_H +#define LLVM_CLANG_AST_INTERP_CONTEXT_H + +#include "Context.h" +#include "InterpStack.h" +#include "clang/AST/APValue.h" +#include "llvm/ADT/PointerIntPair.h" + +namespace clang { +class ASTContext; +class LangOptions; +class Stmt; +class FunctionDecl; +class VarDecl; + +namespace interp { +class Function; +class Program; +class State; +enum PrimType : unsigned; + +/// Wrapper around interpreter termination results. +enum class InterpResult { + /// Interpreter successfully computed a value. + Success, + /// Interpreter encountered an error and quit. + Fail, + /// Interpreter encountered an unimplemented feature, AST fallback. + Bail, +}; + +/// Holds all information required to evaluate constexpr code in a module. +class Context { +public: + /// Initialises the constexpr VM. + Context(ASTContext &Ctx); + + /// Cleans up the constexpr VM. + ~Context(); + + /// Checks if a function is a potential constant expression. + InterpResult isPotentialConstantExpr(State &Parent, + const FunctionDecl *FnDecl); + + /// Evaluates a toplevel expression as an rvalue. + InterpResult evaluateAsRValue(State &Parent, const Expr *E, APValue &Result); + + /// Evaluates a toplevel initializer. + InterpResult evaluateAsInitializer(State &Parent, const VarDecl *VD, + APValue &Result); + + /// Returns the AST context. + ASTContext &getASTContext() const { return Ctx; } + /// Returns the language options. + const LangOptions &getLangOpts() const; + /// Returns the interpreter stack. + InterpStack &getStack() { return Stk; } + /// Returns CHAR_BIT. + unsigned getCharBit() const; + + /// Classifies an expression. + llvm::Optional classify(QualType T); + +private: + /// Runs a function. + InterpResult Run(State &Parent, Function *Func, APValue &Result); + + /// Checks a result fromt the interpreter. + InterpResult Check(State &Parent, llvm::Expected &&R); + +private: + /// Current compilation context. + ASTContext &Ctx; + /// Flag to indicate if the use of the interpreter is mandatory. + bool ForceInterp; + /// Interpreter stack, shared across invocations. + InterpStack Stk; + /// Constexpr program. + std::unique_ptr P; +}; + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/Descriptor.cpp b/lib/AST/Interp/Descriptor.cpp new file mode 100644 index 00000000000..5c1a8a9cf30 --- /dev/null +++ b/lib/AST/Interp/Descriptor.cpp @@ -0,0 +1,292 @@ +//===--- Descriptor.cpp - Types for the constexpr VM ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Descriptor.h" +#include "Pointer.h" +#include "PrimType.h" +#include "Record.h" + +using namespace clang; +using namespace clang::interp; + +template +static void ctorTy(Block *, char *Ptr, bool, bool, bool, Descriptor *) { + new (Ptr) T(); +} + +template static void dtorTy(Block *, char *Ptr, Descriptor *) { + reinterpret_cast(Ptr)->~T(); +} + +template +static void moveTy(Block *, char *Src, char *Dst, Descriptor *) { + auto *SrcPtr = reinterpret_cast(Src); + auto *DstPtr = reinterpret_cast(Dst); + new (DstPtr) T(std::move(*SrcPtr)); +} + +template +static void ctorArrayTy(Block *, char *Ptr, bool, bool, bool, Descriptor *D) { + for (unsigned I = 0, NE = D->getNumElems(); I < NE; ++I) { + new (&reinterpret_cast(Ptr)[I]) T(); + } +} + +template +static void dtorArrayTy(Block *, char *Ptr, Descriptor *D) { + for (unsigned I = 0, NE = D->getNumElems(); I < NE; ++I) { + reinterpret_cast(Ptr)[I].~T(); + } +} + +template +static void moveArrayTy(Block *, char *Src, char *Dst, Descriptor *D) { + for (unsigned I = 0, NE = D->getNumElems(); I < NE; ++I) { + auto *SrcPtr = &reinterpret_cast(Src)[I]; + auto *DstPtr = &reinterpret_cast(Dst)[I]; + new (DstPtr) T(std::move(*SrcPtr)); + } +} + +static void ctorArrayDesc(Block *B, char *Ptr, bool IsConst, bool IsMutable, + bool IsActive, Descriptor *D) { + const unsigned NumElems = D->getNumElems(); + const unsigned ElemSize = + D->ElemDesc->getAllocSize() + sizeof(InlineDescriptor); + + unsigned ElemOffset = 0; + for (unsigned I = 0; I < NumElems; ++I, ElemOffset += ElemSize) { + auto *ElemPtr = Ptr + ElemOffset; + auto *Desc = reinterpret_cast(ElemPtr); + auto *ElemLoc = reinterpret_cast(Desc + 1); + auto *SD = D->ElemDesc; + + Desc->Offset = ElemOffset + sizeof(InlineDescriptor); + Desc->Desc = SD; + Desc->IsInitialized = true; + Desc->IsBase = false; + Desc->IsActive = IsActive; + Desc->IsConst = IsConst || D->IsConst; + Desc->IsMutable = IsMutable || D->IsMutable; + if (auto Fn = D->ElemDesc->CtorFn) + Fn(B, ElemLoc, Desc->IsConst, Desc->IsMutable, IsActive, D->ElemDesc); + } +} + +static void dtorArrayDesc(Block *B, char *Ptr, Descriptor *D) { + const unsigned NumElems = D->getNumElems(); + const unsigned ElemSize = + D->ElemDesc->getAllocSize() + sizeof(InlineDescriptor); + + unsigned ElemOffset = 0; + for (unsigned I = 0; I < NumElems; ++I, ElemOffset += ElemSize) { + auto *ElemPtr = Ptr + ElemOffset; + auto *Desc = reinterpret_cast(ElemPtr); + auto *ElemLoc = reinterpret_cast(Desc + 1); + if (auto Fn = D->ElemDesc->DtorFn) + Fn(B, ElemLoc, D->ElemDesc); + } +} + +static void moveArrayDesc(Block *B, char *Src, char *Dst, Descriptor *D) { + const unsigned NumElems = D->getNumElems(); + const unsigned ElemSize = + D->ElemDesc->getAllocSize() + sizeof(InlineDescriptor); + + unsigned ElemOffset = 0; + for (unsigned I = 0; I < NumElems; ++I, ElemOffset += ElemSize) { + auto *SrcPtr = Src + ElemOffset; + auto *DstPtr = Dst + ElemOffset; + + auto *SrcDesc = reinterpret_cast(SrcPtr); + auto *SrcElemLoc = reinterpret_cast(SrcDesc + 1); + auto *DstDesc = reinterpret_cast(DstPtr); + auto *DstElemLoc = reinterpret_cast(DstDesc + 1); + + *DstDesc = *SrcDesc; + if (auto Fn = D->ElemDesc->MoveFn) + Fn(B, SrcElemLoc, DstElemLoc, D->ElemDesc); + } +} + +static void ctorRecord(Block *B, char *Ptr, bool IsConst, bool IsMutable, + bool IsActive, Descriptor *D) { + const bool IsUnion = D->ElemRecord->isUnion(); + auto CtorSub = [=](unsigned SubOff, Descriptor *F, bool IsBase) { + auto *Desc = reinterpret_cast(Ptr + SubOff) - 1; + Desc->Offset = SubOff; + Desc->Desc = F; + Desc->IsInitialized = (B->isStatic() || F->IsArray) && !IsBase; + Desc->IsBase = IsBase; + Desc->IsActive = IsActive && !IsUnion; + Desc->IsConst = IsConst || F->IsConst; + Desc->IsMutable = IsMutable || F->IsMutable; + if (auto Fn = F->CtorFn) + Fn(B, Ptr + SubOff, Desc->IsConst, Desc->IsMutable, Desc->IsActive, F); + }; + for (const auto &B : D->ElemRecord->bases()) + CtorSub(B.Offset, B.Desc, /*isBase=*/true); + for (const auto &F : D->ElemRecord->fields()) + CtorSub(F.Offset, F.Desc, /*isBase=*/false); + for (const auto &V : D->ElemRecord->virtual_bases()) + CtorSub(V.Offset, V.Desc, /*isBase=*/true); +} + +static void dtorRecord(Block *B, char *Ptr, Descriptor *D) { + auto DtorSub = [=](unsigned SubOff, Descriptor *F) { + if (auto Fn = F->DtorFn) + Fn(B, Ptr + SubOff, F); + }; + for (const auto &F : D->ElemRecord->bases()) + DtorSub(F.Offset, F.Desc); + for (const auto &F : D->ElemRecord->fields()) + DtorSub(F.Offset, F.Desc); + for (const auto &F : D->ElemRecord->virtual_bases()) + DtorSub(F.Offset, F.Desc); +} + +static void moveRecord(Block *B, char *Src, char *Dst, Descriptor *D) { + for (const auto &F : D->ElemRecord->fields()) { + auto FieldOff = F.Offset; + auto FieldDesc = F.Desc; + + *(reinterpret_cast(Dst + FieldOff) - 1) = FieldDesc; + if (auto Fn = FieldDesc->MoveFn) + Fn(B, Src + FieldOff, Dst + FieldOff, FieldDesc); + } +} + +static BlockCtorFn getCtorPrim(PrimType Type) { + COMPOSITE_TYPE_SWITCH(Type, return ctorTy, return nullptr); +} + +static BlockDtorFn getDtorPrim(PrimType Type) { + COMPOSITE_TYPE_SWITCH(Type, return dtorTy, return nullptr); +} + +static BlockMoveFn getMovePrim(PrimType Type) { + COMPOSITE_TYPE_SWITCH(Type, return moveTy, return nullptr); +} + +static BlockCtorFn getCtorArrayPrim(PrimType Type) { + COMPOSITE_TYPE_SWITCH(Type, return ctorArrayTy, return nullptr); +} + +static BlockDtorFn getDtorArrayPrim(PrimType Type) { + COMPOSITE_TYPE_SWITCH(Type, return dtorArrayTy, return nullptr); +} + +static BlockMoveFn getMoveArrayPrim(PrimType Type) { + COMPOSITE_TYPE_SWITCH(Type, return moveArrayTy, return nullptr); +} + +Descriptor::Descriptor(const DeclTy &D, PrimType Type, bool IsConst, + bool IsTemporary, bool IsMutable) + : Source(D), ElemSize(primSize(Type)), Size(ElemSize), AllocSize(Size), + IsConst(IsConst), IsMutable(IsMutable), IsTemporary(IsTemporary), + CtorFn(getCtorPrim(Type)), DtorFn(getDtorPrim(Type)), + MoveFn(getMovePrim(Type)) { + assert(Source && "Missing source"); +} + +Descriptor::Descriptor(const DeclTy &D, PrimType Type, size_t NumElems, + bool IsConst, bool IsTemporary, bool IsMutable) + : Source(D), ElemSize(primSize(Type)), Size(ElemSize * NumElems), + AllocSize(align(Size) + sizeof(InitMap *)), IsConst(IsConst), + IsMutable(IsMutable), IsTemporary(IsTemporary), IsArray(true), + CtorFn(getCtorArrayPrim(Type)), DtorFn(getDtorArrayPrim(Type)), + MoveFn(getMoveArrayPrim(Type)) { + assert(Source && "Missing source"); +} + +Descriptor::Descriptor(const DeclTy &D, PrimType Type, bool IsTemporary, + UnknownSize) + : Source(D), ElemSize(primSize(Type)), Size(UnknownSizeMark), + AllocSize(alignof(void *)), IsConst(true), IsMutable(false), + IsTemporary(IsTemporary), IsArray(true), CtorFn(getCtorArrayPrim(Type)), + DtorFn(getDtorArrayPrim(Type)), MoveFn(getMoveArrayPrim(Type)) { + assert(Source && "Missing source"); +} + +Descriptor::Descriptor(const DeclTy &D, Descriptor *Elem, unsigned NumElems, + bool IsConst, bool IsTemporary, bool IsMutable) + : Source(D), ElemSize(Elem->getAllocSize() + sizeof(InlineDescriptor)), + Size(ElemSize * NumElems), + AllocSize(std::max(alignof(void *), Size)), ElemDesc(Elem), + IsConst(IsConst), IsMutable(IsMutable), IsTemporary(IsTemporary), + IsArray(true), CtorFn(ctorArrayDesc), DtorFn(dtorArrayDesc), + MoveFn(moveArrayDesc) { + assert(Source && "Missing source"); +} + +Descriptor::Descriptor(const DeclTy &D, Descriptor *Elem, bool IsTemporary, + UnknownSize) + : Source(D), ElemSize(Elem->getAllocSize() + sizeof(InlineDescriptor)), + Size(UnknownSizeMark), AllocSize(alignof(void *)), ElemDesc(Elem), + IsConst(true), IsMutable(false), IsTemporary(IsTemporary), IsArray(true), + CtorFn(ctorArrayDesc), DtorFn(dtorArrayDesc), MoveFn(moveArrayDesc) { + assert(Source && "Missing source"); +} + +Descriptor::Descriptor(const DeclTy &D, Record *R, bool IsConst, + bool IsTemporary, bool IsMutable) + : Source(D), ElemSize(std::max(alignof(void *), R->getFullSize())), + Size(ElemSize), AllocSize(Size), ElemRecord(R), IsConst(IsConst), + IsMutable(IsMutable), IsTemporary(IsTemporary), CtorFn(ctorRecord), + DtorFn(dtorRecord), MoveFn(moveRecord) { + assert(Source && "Missing source"); +} + +QualType Descriptor::getType() const { + if (auto *E = asExpr()) + return E->getType(); + if (auto *D = asValueDecl()) + return D->getType(); + llvm_unreachable("Invalid descriptor type"); +} + +SourceLocation Descriptor::getLocation() const { + if (auto *D = Source.dyn_cast()) + return D->getLocation(); + if (auto *E = Source.dyn_cast()) + return E->getExprLoc(); + llvm_unreachable("Invalid descriptor type"); +} + +InitMap::InitMap(unsigned N) : UninitFields(N) { + for (unsigned I = 0; I < N / PER_FIELD; ++I) { + data()[I] = 0; + } +} + +InitMap::T *InitMap::data() { + auto *Start = reinterpret_cast(this) + align(sizeof(InitMap)); + return reinterpret_cast(Start); +} + +bool InitMap::initialize(unsigned I) { + unsigned Bucket = I / PER_FIELD; + unsigned Mask = 1ull << static_cast(I % PER_FIELD); + if (!(data()[Bucket] & Mask)) { + data()[Bucket] |= Mask; + UninitFields -= 1; + } + return UninitFields == 0; +} + +bool InitMap::isInitialized(unsigned I) { + unsigned Bucket = I / PER_FIELD; + unsigned Mask = 1ull << static_cast(I % PER_FIELD); + return data()[Bucket] & Mask; +} + +InitMap *InitMap::allocate(unsigned N) { + const size_t NumFields = ((N + PER_FIELD - 1) / PER_FIELD); + const size_t Size = align(sizeof(InitMap)) + NumFields * PER_FIELD; + return new (malloc(Size)) InitMap(N); +} diff --git a/lib/AST/Interp/Descriptor.h b/lib/AST/Interp/Descriptor.h new file mode 100644 index 00000000000..b260b760097 --- /dev/null +++ b/lib/AST/Interp/Descriptor.h @@ -0,0 +1,220 @@ +//===--- Descriptor.h - Types for the constexpr VM --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines descriptors which characterise allocations. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_DESCRIPTOR_H +#define LLVM_CLANG_AST_INTERP_DESCRIPTOR_H + +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" + +namespace clang { +namespace interp { +class Block; +class Record; +struct Descriptor; +enum PrimType : unsigned; + +using DeclTy = llvm::PointerUnion; + +/// Invoked whenever a block is created. The constructor method fills in the +/// inline descriptors of all fields and array elements. It also initializes +/// all the fields which contain non-trivial types. +using BlockCtorFn = void (*)(Block *Storage, char *FieldPtr, bool IsConst, + bool IsMutable, bool IsActive, + Descriptor *FieldDesc); + +/// Invoked when a block is destroyed. Invokes the destructors of all +/// non-trivial nested fields of arrays and records. +using BlockDtorFn = void (*)(Block *Storage, char *FieldPtr, + Descriptor *FieldDesc); + +/// Invoked when a block with pointers referencing it goes out of scope. Such +/// blocks are persisted: the move function copies all inline descriptors and +/// non-trivial fields, as existing pointers might need to reference those +/// descriptors. Data is not copied since it cannot be legally read. +using BlockMoveFn = void (*)(Block *Storage, char *SrcFieldPtr, + char *DstFieldPtr, Descriptor *FieldDesc); + +/// Object size as used by the interpreter. +using InterpSize = unsigned; + +/// Describes a memory block created by an allocation site. +struct Descriptor { +private: + /// Original declaration, used to emit the error message. + const DeclTy Source; + /// Size of an element, in host bytes. + const InterpSize ElemSize; + /// Size of the storage, in host bytes. + const InterpSize Size; + /// Size of the allocation (storage + metadata), in host bytes. + const InterpSize AllocSize; + + /// Value to denote arrays of unknown size. + static constexpr unsigned UnknownSizeMark = (unsigned)-1; + +public: + /// Token to denote structures of unknown size. + struct UnknownSize {}; + + /// Pointer to the record, if block contains records. + Record *const ElemRecord = nullptr; + /// Descriptor of the array element. + Descriptor *const ElemDesc = nullptr; + /// Flag indicating if the block is mutable. + const bool IsConst = false; + /// Flag indicating if a field is mutable. + const bool IsMutable = false; + /// Flag indicating if the block is a temporary. + const bool IsTemporary = false; + /// Flag indicating if the block is an array. + const bool IsArray = false; + + /// Storage management methods. + const BlockCtorFn CtorFn = nullptr; + const BlockDtorFn DtorFn = nullptr; + const BlockMoveFn MoveFn = nullptr; + + /// Allocates a descriptor for a primitive. + Descriptor(const DeclTy &D, PrimType Type, bool IsConst, bool IsTemporary, + bool IsMutable); + + /// Allocates a descriptor for an array of primitives. + Descriptor(const DeclTy &D, PrimType Type, size_t NumElems, bool IsConst, + bool IsTemporary, bool IsMutable); + + /// Allocates a descriptor for an array of primitives of unknown size. + Descriptor(const DeclTy &D, PrimType Type, bool IsTemporary, UnknownSize); + + /// Allocates a descriptor for an array of composites. + Descriptor(const DeclTy &D, Descriptor *Elem, unsigned NumElems, bool IsConst, + bool IsTemporary, bool IsMutable); + + /// Allocates a descriptor for an array of composites of unknown size. + Descriptor(const DeclTy &D, Descriptor *Elem, bool IsTemporary, UnknownSize); + + /// Allocates a descriptor for a record. + Descriptor(const DeclTy &D, Record *R, bool IsConst, bool IsTemporary, + bool IsMutable); + + QualType getType() const; + SourceLocation getLocation() const; + + const Decl *asDecl() const { return Source.dyn_cast(); } + const Expr *asExpr() const { return Source.dyn_cast(); } + + const ValueDecl *asValueDecl() const { + return dyn_cast_or_null(asDecl()); + } + + const FieldDecl *asFieldDecl() const { + return dyn_cast_or_null(asDecl()); + } + + const RecordDecl *asRecordDecl() const { + return dyn_cast_or_null(asDecl()); + } + + /// Returns the size of the object without metadata. + unsigned getSize() const { + assert(!isUnknownSizeArray() && "Array of unknown size"); + return Size; + } + + /// Returns the allocated size, including metadata. + unsigned getAllocSize() const { return AllocSize; } + /// returns the size of an element when the structure is viewed as an array. + unsigned getElemSize() const { return ElemSize; } + + /// Returns the number of elements stored in the block. + unsigned getNumElems() const { + return Size == UnknownSizeMark ? 0 : (getSize() / getElemSize()); + } + + /// Checks if the descriptor is of an array of primitives. + bool isPrimitiveArray() const { return IsArray && !ElemDesc; } + /// Checks if the descriptor is of an array of zero size. + bool isZeroSizeArray() const { return Size == 0; } + /// Checks if the descriptor is of an array of unknown size. + bool isUnknownSizeArray() const { return Size == UnknownSizeMark; } + + /// Checks if the descriptor is of a primitive. + bool isPrimitive() const { return !IsArray && !ElemRecord; } + + /// Checks if the descriptor is of an array. + bool isArray() const { return IsArray; } +}; + +/// Inline descriptor embedded in structures and arrays. +/// +/// Such descriptors precede all composite array elements and structure fields. +/// If the base of a pointer is not zero, the base points to the end of this +/// structure. The offset field is used to traverse the pointer chain up +/// to the root structure which allocated the object. +struct InlineDescriptor { + /// Offset inside the structure/array. + unsigned Offset; + + /// Flag indicating if the storage is constant or not. + /// Relevant for primitive fields. + unsigned IsConst : 1; + /// For primitive fields, it indicates if the field was initialized. + /// Primitive fields in static storage are always initialized. + /// Arrays are always initialized, even though their elements might not be. + /// Base classes are initialized after the constructor is invoked. + unsigned IsInitialized : 1; + /// Flag indicating if the field is an embedded base class. + unsigned IsBase : 1; + /// Flag indicating if the field is the active member of a union. + unsigned IsActive : 1; + /// Flag indicating if the field is mutable (if in a record). + unsigned IsMutable : 1; + + Descriptor *Desc; +}; + +/// Bitfield tracking the initialisation status of elements of primitive arrays. +/// A pointer to this is embedded at the end of all primitive arrays. +/// If the map was not yet created and nothing was initialied, the pointer to +/// this structure is 0. If the object was fully initialized, the pointer is -1. +struct InitMap { +private: + /// Type packing bits. + using T = uint64_t; + /// Bits stored in a single field. + static constexpr uint64_t PER_FIELD = sizeof(T) * CHAR_BIT; + + /// Initializes the map with no fields set. + InitMap(unsigned N); + + /// Returns a pointer to storage. + T *data(); + +public: + /// Initializes an element. Returns true when object if fully initialized. + bool initialize(unsigned I); + + /// Checks if an element was initialized. + bool isInitialized(unsigned I); + + /// Allocates a map holding N elements. + static InitMap *allocate(unsigned N); + +private: + /// Number of fields initialized. + unsigned UninitFields; +}; + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/Disasm.cpp b/lib/AST/Interp/Disasm.cpp new file mode 100644 index 00000000000..e77a825eb1f --- /dev/null +++ b/lib/AST/Interp/Disasm.cpp @@ -0,0 +1,69 @@ +//===--- Disasm.cpp - Disassembler for bytecode functions -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Dump method for Function which disassembles the bytecode. +// +//===----------------------------------------------------------------------===// + +#include "Function.h" +#include "Opcode.h" +#include "PrimType.h" +#include "Program.h" +#include "clang/AST/DeclCXX.h" +#include "llvm/Support/Compiler.h" + +using namespace clang; +using namespace clang::interp; + +LLVM_DUMP_METHOD void Function::dump() const { dump(llvm::errs()); } + +LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS) const { + if (F) { + if (auto *Cons = dyn_cast(F)) { + const std::string &Name = Cons->getParent()->getNameAsString(); + OS << Name << "::" << Name << ":\n"; + } else { + OS << F->getNameAsString() << ":\n"; + } + } else { + OS << "<>\n"; + } + + OS << "frame size: " << getFrameSize() << "\n"; + OS << "arg size: " << getArgSize() << "\n"; + OS << "rvo: " << hasRVO() << "\n"; + + auto PrintName = [&OS](const char *Name) { + OS << Name; + for (long I = 0, N = strlen(Name); I < 30 - N; ++I) { + OS << ' '; + } + }; + + for (CodePtr Start = getCodeBegin(), PC = Start; PC != getCodeEnd();) { + size_t Addr = PC - Start; + auto Op = PC.read(); + OS << llvm::format("%8d", Addr) << " "; + switch (Op) { +#define GET_DISASM +#include "Opcodes.inc" +#undef GET_DISASM + } + } +} + +LLVM_DUMP_METHOD void Program::dump() const { dump(llvm::errs()); } + +LLVM_DUMP_METHOD void Program::dump(llvm::raw_ostream &OS) const { + for (auto &Func : Funcs) { + Func.second->dump(); + } + for (auto &Anon : AnonFuncs) { + Anon->dump(); + } +} diff --git a/lib/AST/Interp/EvalEmitter.cpp b/lib/AST/Interp/EvalEmitter.cpp new file mode 100644 index 00000000000..22e8695b921 --- /dev/null +++ b/lib/AST/Interp/EvalEmitter.cpp @@ -0,0 +1,253 @@ +//===--- EvalEmitter.cpp - Instruction emitter for the VM -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "EvalEmitter.h" +#include "Context.h" +#include "Interp.h" +#include "Opcode.h" +#include "Program.h" +#include "clang/AST/DeclCXX.h" + +using namespace clang; +using namespace clang::interp; + +using APSInt = llvm::APSInt; +template using Expected = llvm::Expected; + +EvalEmitter::EvalEmitter(Context &Ctx, Program &P, State &Parent, + InterpStack &Stk, APValue &Result) + : Ctx(Ctx), P(P), S(Parent, P, Stk, Ctx, this), Result(Result) { + // Create a dummy frame for the interpreter which does not have locals. + S.Current = new InterpFrame(S, nullptr, nullptr, CodePtr(), Pointer()); +} + +llvm::Expected EvalEmitter::interpretExpr(const Expr *E) { + if (this->visitExpr(E)) + return true; + if (BailLocation) + return llvm::make_error(*BailLocation); + return false; +} + +llvm::Expected EvalEmitter::interpretDecl(const VarDecl *VD) { + if (this->visitDecl(VD)) + return true; + if (BailLocation) + return llvm::make_error(*BailLocation); + return false; +} + +void EvalEmitter::emitLabel(LabelTy Label) { + CurrentLabel = Label; +} + +EvalEmitter::LabelTy EvalEmitter::getLabel() { return NextLabel++; } + +Scope::Local EvalEmitter::createLocal(Descriptor *D) { + // Allocate memory for a local. + auto Memory = std::make_unique(sizeof(Block) + D->getAllocSize()); + auto *B = new (Memory.get()) Block(D, /*isStatic=*/false); + B->invokeCtor(); + + // Register the local. + unsigned Off = Locals.size(); + Locals.insert({Off, std::move(Memory)}); + return {Off, D}; +} + +bool EvalEmitter::bail(const SourceLocation &Loc) { + if (!BailLocation) + BailLocation = Loc; + return false; +} + +bool EvalEmitter::jumpTrue(const LabelTy &Label) { + if (isActive()) { + if (S.Stk.pop()) + ActiveLabel = Label; + } + return true; +} + +bool EvalEmitter::jumpFalse(const LabelTy &Label) { + if (isActive()) { + if (!S.Stk.pop()) + ActiveLabel = Label; + } + return true; +} + +bool EvalEmitter::jump(const LabelTy &Label) { + if (isActive()) + CurrentLabel = ActiveLabel = Label; + return true; +} + +bool EvalEmitter::fallthrough(const LabelTy &Label) { + if (isActive()) + ActiveLabel = Label; + CurrentLabel = Label; + return true; +} + +template bool EvalEmitter::emitRet(const SourceInfo &Info) { + if (!isActive()) + return true; + using T = typename PrimConv::T; + return ReturnValue(S.Stk.pop(), Result); +} + +bool EvalEmitter::emitRetVoid(const SourceInfo &Info) { return true; } + +bool EvalEmitter::emitRetValue(const SourceInfo &Info) { + // Method to recursively traverse composites. + std::function Composite; + Composite = [this, &Composite](QualType Ty, const Pointer &Ptr, APValue &R) { + if (auto *AT = Ty->getAs()) + Ty = AT->getValueType(); + + if (auto *RT = Ty->getAs()) { + auto *Record = Ptr.getRecord(); + assert(Record && "Missing record descriptor"); + + bool Ok = true; + if (RT->getDecl()->isUnion()) { + const FieldDecl *ActiveField = nullptr; + APValue Value; + for (auto &F : Record->fields()) { + const Pointer &FP = Ptr.atField(F.Offset); + QualType FieldTy = F.Decl->getType(); + if (FP.isActive()) { + if (llvm::Optional T = Ctx.classify(FieldTy)) { + TYPE_SWITCH(*T, Ok &= ReturnValue(FP.deref(), Value)); + } else { + Ok &= Composite(FieldTy, FP, Value); + } + break; + } + } + R = APValue(ActiveField, Value); + } else { + unsigned NF = Record->getNumFields(); + unsigned NB = Record->getNumBases(); + unsigned NV = Ptr.isBaseClass() ? 0 : Record->getNumVirtualBases(); + + R = APValue(APValue::UninitStruct(), NB, NF); + + for (unsigned I = 0; I < NF; ++I) { + const Record::Field *FD = Record->getField(I); + QualType FieldTy = FD->Decl->getType(); + const Pointer &FP = Ptr.atField(FD->Offset); + APValue &Value = R.getStructField(I); + + if (llvm::Optional T = Ctx.classify(FieldTy)) { + TYPE_SWITCH(*T, Ok &= ReturnValue(FP.deref(), Value)); + } else { + Ok &= Composite(FieldTy, FP, Value); + } + } + + for (unsigned I = 0; I < NB; ++I) { + const Record::Base *BD = Record->getBase(I); + QualType BaseTy = Ctx.getASTContext().getRecordType(BD->Decl); + const Pointer &BP = Ptr.atField(BD->Offset); + Ok &= Composite(BaseTy, BP, R.getStructBase(I)); + } + + for (unsigned I = 0; I < NV; ++I) { + const Record::Base *VD = Record->getVirtualBase(I); + QualType VirtBaseTy = Ctx.getASTContext().getRecordType(VD->Decl); + const Pointer &VP = Ptr.atField(VD->Offset); + Ok &= Composite(VirtBaseTy, VP, R.getStructBase(NB + I)); + } + } + return Ok; + } + if (auto *AT = Ty->getAsArrayTypeUnsafe()) { + const size_t NumElems = Ptr.getNumElems(); + QualType ElemTy = AT->getElementType(); + R = APValue(APValue::UninitArray{}, NumElems, NumElems); + + bool Ok = true; + for (unsigned I = 0; I < NumElems; ++I) { + APValue &Slot = R.getArrayInitializedElt(I); + const Pointer &EP = Ptr.atIndex(I); + if (llvm::Optional T = Ctx.classify(ElemTy)) { + TYPE_SWITCH(*T, Ok &= ReturnValue(EP.deref(), Slot)); + } else { + Ok &= Composite(ElemTy, EP.narrow(), Slot); + } + } + return Ok; + } + llvm_unreachable("invalid value to return"); + }; + + // Return the composite type. + const auto &Ptr = S.Stk.pop(); + return Composite(Ptr.getType(), Ptr, Result); +} + +bool EvalEmitter::emitGetPtrLocal(uint32_t I, const SourceInfo &Info) { + if (!isActive()) + return true; + + auto It = Locals.find(I); + assert(It != Locals.end() && "Missing local variable"); + S.Stk.push(reinterpret_cast(It->second.get())); + return true; +} + +template +bool EvalEmitter::emitGetLocal(uint32_t I, const SourceInfo &Info) { + if (!isActive()) + return true; + + using T = typename PrimConv::T; + + auto It = Locals.find(I); + assert(It != Locals.end() && "Missing local variable"); + auto *B = reinterpret_cast(It->second.get()); + S.Stk.push(*reinterpret_cast(B + 1)); + return true; +} + +template +bool EvalEmitter::emitSetLocal(uint32_t I, const SourceInfo &Info) { + if (!isActive()) + return true; + + using T = typename PrimConv::T; + + auto It = Locals.find(I); + assert(It != Locals.end() && "Missing local variable"); + auto *B = reinterpret_cast(It->second.get()); + *reinterpret_cast(B + 1) = S.Stk.pop(); + return true; +} + +bool EvalEmitter::emitDestroy(uint32_t I, const SourceInfo &Info) { + if (!isActive()) + return true; + + for (auto &Local : Descriptors[I]) { + auto It = Locals.find(Local.Offset); + assert(It != Locals.end() && "Missing local variable"); + S.deallocate(reinterpret_cast(It->second.get())); + } + + return true; +} + +//===----------------------------------------------------------------------===// +// Opcode evaluators +//===----------------------------------------------------------------------===// + +#define GET_EVAL_IMPL +#include "Opcodes.inc" +#undef GET_EVAL_IMPL diff --git a/lib/AST/Interp/EvalEmitter.h b/lib/AST/Interp/EvalEmitter.h new file mode 100644 index 00000000000..eec2ff8ee75 --- /dev/null +++ b/lib/AST/Interp/EvalEmitter.h @@ -0,0 +1,129 @@ +//===--- EvalEmitter.h - Instruction emitter for the VM ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the instruction emitters. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_EVALEMITTER_H +#define LLVM_CLANG_AST_INTERP_EVALEMITTER_H + +#include "ByteCodeGenError.h" +#include "Context.h" +#include "InterpStack.h" +#include "InterpState.h" +#include "PrimType.h" +#include "Program.h" +#include "Source.h" +#include "llvm/Support/Error.h" + +namespace clang { +class FunctionDecl; +namespace interp { +class Context; +class Function; +class InterpState; +class Program; +class SourceInfo; +enum Opcode : uint32_t; + +/// An emitter which evaluates opcodes as they are emitted. +class EvalEmitter : public SourceMapper { +public: + using LabelTy = uint32_t; + using AddrTy = uintptr_t; + using Local = Scope::Local; + + llvm::Expected interpretExpr(const Expr *E); + llvm::Expected interpretDecl(const VarDecl *VD); + +protected: + EvalEmitter(Context &Ctx, Program &P, State &Parent, InterpStack &Stk, + APValue &Result); + + virtual ~EvalEmitter() {} + + /// Define a label. + void emitLabel(LabelTy Label); + /// Create a label. + LabelTy getLabel(); + + /// Methods implemented by the compiler. + virtual bool visitExpr(const Expr *E) = 0; + virtual bool visitDecl(const VarDecl *VD) = 0; + + bool bail(const Stmt *S) { return bail(S->getBeginLoc()); } + bool bail(const Decl *D) { return bail(D->getBeginLoc()); } + bool bail(const SourceLocation &Loc); + + /// Emits jumps. + bool jumpTrue(const LabelTy &Label); + bool jumpFalse(const LabelTy &Label); + bool jump(const LabelTy &Label); + bool fallthrough(const LabelTy &Label); + + /// Callback for registering a local. + Local createLocal(Descriptor *D); + + /// Returns the source location of the current opcode. + SourceInfo getSource(Function *F, CodePtr PC) const override { + return F ? F->getSource(PC) : CurrentSource; + } + + /// Parameter indices. + llvm::DenseMap Params; + /// Local descriptors. + llvm::SmallVector, 2> Descriptors; + +private: + /// Current compilation context. + Context &Ctx; + /// Current program. + Program &P; + /// Callee evaluation state. + InterpState S; + /// Location to write the result to. + APValue &Result; + + /// Temporaries which require storage. + llvm::DenseMap> Locals; + + // The emitter always tracks the current instruction and sets OpPC to a token + // value which is mapped to the location of the opcode being evaluated. + CodePtr OpPC; + /// Location of a failure. + llvm::Optional BailLocation; + /// Location of the current instruction. + SourceInfo CurrentSource; + + /// Next label ID to generate - first label is 1. + LabelTy NextLabel = 1; + /// Label being executed - 0 is the entry label. + LabelTy CurrentLabel = 0; + /// Active block which should be executed. + LabelTy ActiveLabel = 0; + + /// Since expressions can only jump forward, predicated execution is + /// used to deal with if-else statements. + bool isActive() { return CurrentLabel == ActiveLabel; } + + /// Helper to invoke a method. + bool ExecuteCall(Function *F, Pointer &&This, const SourceInfo &Info); + /// Helper to emit a diagnostic on a missing method. + bool ExecuteNoCall(const FunctionDecl *F, const SourceInfo &Info); + +protected: +#define GET_EVAL_PROTO +#include "Opcodes.inc" +#undef GET_EVAL_PROTO +}; + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/Frame.cpp b/lib/AST/Interp/Frame.cpp new file mode 100644 index 00000000000..16134aa1db3 --- /dev/null +++ b/lib/AST/Interp/Frame.cpp @@ -0,0 +1,14 @@ +//===--- Frame.cpp - Call frame for the VM and AST Walker -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Frame.h" + +using namespace clang; +using namespace clang::interp; + +Frame::~Frame() {} diff --git a/lib/AST/Interp/Frame.h b/lib/AST/Interp/Frame.h new file mode 100644 index 00000000000..b9a0ea9412f --- /dev/null +++ b/lib/AST/Interp/Frame.h @@ -0,0 +1,45 @@ +//===--- Frame.h - Call frame for the VM and AST Walker ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the base class of interpreter and evaluator stack frames. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_FRAME_H +#define LLVM_CLANG_AST_INTERP_FRAME_H + +#include "clang/Basic/SourceLocation.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang { +class FunctionDecl; + +namespace interp { + +/// Base class for stack frames, shared between VM and walker. +class Frame { +public: + virtual ~Frame(); + + /// Generates a human-readable description of the call site. + virtual void describe(llvm::raw_ostream &OS) = 0; + + /// Returns a pointer to the caller frame. + virtual Frame *getCaller() const = 0; + + /// Returns the location of the call site. + virtual SourceLocation getCallLocation() const = 0; + + /// Returns the called function's declaration. + virtual const FunctionDecl *getCallee() const = 0; +}; + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/Function.cpp b/lib/AST/Interp/Function.cpp new file mode 100644 index 00000000000..0ed13a92aa3 --- /dev/null +++ b/lib/AST/Interp/Function.cpp @@ -0,0 +1,48 @@ +//===--- Function.h - Bytecode function for the VM --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Function.h" +#include "Program.h" +#include "Opcode.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" + +using namespace clang; +using namespace clang::interp; + +Function::Function(Program &P, const FunctionDecl *F, unsigned ArgSize, + llvm::SmallVector &&ParamTypes, + llvm::DenseMap &&Params) + : P(P), Loc(F->getBeginLoc()), F(F), ArgSize(ArgSize), + ParamTypes(std::move(ParamTypes)), Params(std::move(Params)) {} + +CodePtr Function::getCodeBegin() const { return Code.data(); } + +CodePtr Function::getCodeEnd() const { return Code.data() + Code.size(); } + +Function::ParamDescriptor Function::getParamDescriptor(unsigned Offset) const { + auto It = Params.find(Offset); + assert(It != Params.end() && "Invalid parameter offset"); + return It->second; +} + +SourceInfo Function::getSource(CodePtr PC) const { + unsigned Offset = PC - getCodeBegin(); + using Elem = std::pair; + auto It = std::lower_bound(SrcMap.begin(), SrcMap.end(), Elem{Offset, {}}, + [](Elem A, Elem B) { return A.first < B.first; }); + if (It == SrcMap.end() || It->first != Offset) + llvm::report_fatal_error("missing source location"); + return It->second; +} + +bool Function::isVirtual() const { + if (auto *M = dyn_cast(F)) + return M->isVirtual(); + return false; +} diff --git a/lib/AST/Interp/Function.h b/lib/AST/Interp/Function.h new file mode 100644 index 00000000000..28531f04b6e --- /dev/null +++ b/lib/AST/Interp/Function.h @@ -0,0 +1,163 @@ +//===--- Function.h - Bytecode function for the VM --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the Function class which holds all bytecode function-specific data. +// +// The scope class which describes local variables is also defined here. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_FUNCTION_H +#define LLVM_CLANG_AST_INTERP_FUNCTION_H + +#include "Pointer.h" +#include "Source.h" +#include "clang/AST/Decl.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang { +namespace interp { +class Program; +class ByteCodeEmitter; +enum PrimType : uint32_t; + +/// Describes a scope block. +/// +/// The block gathers all the descriptors of the locals defined in this block. +class Scope { +public: + /// Information about a local's storage. + struct Local { + /// Offset of the local in frame. + unsigned Offset; + /// Descriptor of the local. + Descriptor *Desc; + }; + + using LocalVectorTy = llvm::SmallVector; + + Scope(LocalVectorTy &&Descriptors) : Descriptors(std::move(Descriptors)) {} + + llvm::iterator_range locals() { + return llvm::make_range(Descriptors.begin(), Descriptors.end()); + } + +private: + /// Object descriptors in this block. + LocalVectorTy Descriptors; +}; + +/// Bytecode function. +/// +/// Contains links to the bytecode of the function, as well as metadata +/// describing all arguments and stack-local variables. +class Function { +public: + using ParamDescriptor = std::pair; + + /// Returns the size of the function's local stack. + unsigned getFrameSize() const { return FrameSize; } + /// Returns the size of the argument stackx + unsigned getArgSize() const { return ArgSize; } + + /// Returns a pointer to the start of the code. + CodePtr getCodeBegin() const; + /// Returns a pointer to the end of the code. + CodePtr getCodeEnd() const; + + /// Returns the original FunctionDecl. + const FunctionDecl *getDecl() const { return F; } + + /// Returns the lcoation. + SourceLocation getLoc() const { return Loc; } + + /// Returns a parameter descriptor. + ParamDescriptor getParamDescriptor(unsigned Offset) const; + + /// Checks if the first argument is a RVO pointer. + bool hasRVO() const { return ParamTypes.size() != Params.size(); } + + /// Range over the scope blocks. + llvm::iterator_range::iterator> scopes() { + return llvm::make_range(Scopes.begin(), Scopes.end()); + } + + /// Range over argument types. + using arg_reverse_iterator = SmallVectorImpl::reverse_iterator; + llvm::iterator_range args_reverse() { + return llvm::make_range(ParamTypes.rbegin(), ParamTypes.rend()); + } + + /// Returns a specific scope. + Scope &getScope(unsigned Idx) { return Scopes[Idx]; } + + /// Returns the source information at a given PC. + SourceInfo getSource(CodePtr PC) const; + + /// Checks if the function is valid to call in constexpr. + bool isConstexpr() const { return IsValid; } + + /// Checks if the function is virtual. + bool isVirtual() const; + + /// Checks if the function is a constructor. + bool isConstructor() const { return isa(F); } + +private: + /// Construct a function representing an actual function. + Function(Program &P, const FunctionDecl *F, unsigned ArgSize, + llvm::SmallVector &&ParamTypes, + llvm::DenseMap &&Params); + + /// Sets the code of a function. + void setCode(unsigned NewFrameSize, std::vector &&NewCode, SourceMap &&NewSrcMap, + llvm::SmallVector &&NewScopes) { + FrameSize = NewFrameSize; + Code = std::move(NewCode); + SrcMap = std::move(NewSrcMap); + Scopes = std::move(NewScopes); + IsValid = true; + } + +private: + friend class Program; + friend class ByteCodeEmitter; + + /// Program reference. + Program &P; + /// Location of the executed code. + SourceLocation Loc; + /// Declaration this function was compiled from. + const FunctionDecl *F; + /// Local area size: storage + metadata. + unsigned FrameSize; + /// Size of the argument stack. + unsigned ArgSize; + /// Program code. + std::vector Code; + /// Opcode-to-expression mapping. + SourceMap SrcMap; + /// List of block descriptors. + llvm::SmallVector Scopes; + /// List of argument types. + llvm::SmallVector ParamTypes; + /// Map from byte offset to parameter descriptor. + llvm::DenseMap Params; + /// Flag to indicate if the function is valid. + bool IsValid = false; + +public: + /// Dumps the disassembled bytecode to \c llvm::errs(). + void dump() const; + void dump(llvm::raw_ostream &OS) const; +}; + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/Integral.h b/lib/AST/Interp/Integral.h new file mode 100644 index 00000000000..7cc788070de --- /dev/null +++ b/lib/AST/Interp/Integral.h @@ -0,0 +1,269 @@ +//===--- Integral.h - Wrapper for numeric types for the VM ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the VM types and helpers operating on types. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_INTEGRAL_H +#define LLVM_CLANG_AST_INTERP_INTEGRAL_H + +#include "clang/AST/ComparisonCategories.h" +#include "clang/AST/APValue.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +namespace clang { +namespace interp { + +using APInt = llvm::APInt; +using APSInt = llvm::APSInt; + +/// Helper to compare two comparable types. +template +ComparisonCategoryResult Compare(const T &X, const T &Y) { + if (X < Y) + return ComparisonCategoryResult::Less; + if (X > Y) + return ComparisonCategoryResult::Greater; + return ComparisonCategoryResult::Equal; +} + +// Helper structure to select the representation. +template struct Repr; +template <> struct Repr<8, false> { using Type = uint8_t; }; +template <> struct Repr<16, false> { using Type = uint16_t; }; +template <> struct Repr<32, false> { using Type = uint32_t; }; +template <> struct Repr<64, false> { using Type = uint64_t; }; +template <> struct Repr<8, true> { using Type = int8_t; }; +template <> struct Repr<16, true> { using Type = int16_t; }; +template <> struct Repr<32, true> { using Type = int32_t; }; +template <> struct Repr<64, true> { using Type = int64_t; }; + +/// Wrapper around numeric types. +/// +/// These wrappers are required to shared an interface between APSint and +/// builtin primitive numeral types, while optimising for storage and +/// allowing methods operating on primitive type to compile to fast code. +template class Integral { +private: + template friend class Integral; + + // The primitive representing the integral. + using T = typename Repr::Type; + T V; + + /// Primitive representing limits. + static const auto Min = std::numeric_limits::min(); + static const auto Max = std::numeric_limits::max(); + + /// Construct an integral from anything that is convertible to storage. + template explicit Integral(T V) : V(V) {} + +public: + /// Zero-initializes an integral. + Integral() : V(0) {} + + /// Constructs an integral from another integral. + template + explicit Integral(Integral V) : V(V.V) {} + + /// Construct an integral from a value based on signedness. + explicit Integral(const APSInt &V) + : V(V.isSigned() ? V.getSExtValue() : V.getZExtValue()) {} + + bool operator<(Integral RHS) const { return V < RHS.V; } + bool operator>(Integral RHS) const { return V > RHS.V; } + bool operator<=(Integral RHS) const { return V <= RHS.V; } + bool operator>=(Integral RHS) const { return V >= RHS.V; } + bool operator==(Integral RHS) const { return V == RHS.V; } + bool operator!=(Integral RHS) const { return V != RHS.V; } + + bool operator>(unsigned RHS) const { + return V >= 0 && static_cast(V) > RHS; + } + + Integral operator-() const { return Integral(-V); } + Integral operator~() const { return Integral(~V); } + + template + explicit operator Integral() const { + return Integral(V); + } + + explicit operator unsigned() const { return V; } + explicit operator int64_t() const { return V; } + explicit operator uint64_t() const { return V; } + + APSInt toAPSInt() const { + return APSInt(APInt(Bits, static_cast(V), Signed), !Signed); + } + APSInt toAPSInt(unsigned NumBits) const { + if (Signed) + return APSInt(toAPSInt().sextOrTrunc(NumBits), !Signed); + else + return APSInt(toAPSInt().zextOrTrunc(NumBits), !Signed); + } + APValue toAPValue() const { return APValue(toAPSInt()); } + + Integral toUnsigned() const { + return Integral(*this); + } + + constexpr static unsigned bitWidth() { return Bits; } + + bool isZero() const { return !V; } + + bool isMin() const { return *this == min(bitWidth()); } + + bool isMinusOne() const { return Signed && V == T(-1); } + + constexpr static bool isSigned() { return Signed; } + + bool isNegative() const { return V < T(0); } + bool isPositive() const { return !isNegative(); } + + ComparisonCategoryResult compare(const Integral &RHS) const { + return Compare(V, RHS.V); + } + + unsigned countLeadingZeros() const { return llvm::countLeadingZeros(V); } + + Integral truncate(unsigned TruncBits) const { + if (TruncBits >= Bits) + return *this; + const T BitMask = (T(1) << T(TruncBits)) - 1; + const T SignBit = T(1) << (TruncBits - 1); + const T ExtMask = ~BitMask; + return Integral((V & BitMask) | (Signed && (V & SignBit) ? ExtMask : 0)); + } + + void print(llvm::raw_ostream &OS) const { OS << V; } + + static Integral min(unsigned NumBits) { + return Integral(Min); + } + static Integral max(unsigned NumBits) { + return Integral(Max); + } + + template + static typename std::enable_if::value, Integral>::type + from(T Value) { + return Integral(Value); + } + + template + static typename std::enable_if::type + from(Integral Value) { + return Integral(Value.V); + } + + template static Integral from(Integral<0, SrcSign> Value) { + if (SrcSign) + return Integral(Value.V.getSExtValue()); + else + return Integral(Value.V.getZExtValue()); + } + + static Integral zero() { return from(0); } + + template static Integral from(T Value, unsigned NumBits) { + return Integral(Value); + } + + static bool inRange(int64_t Value, unsigned NumBits) { + return CheckRange(Value); + } + + static bool increment(Integral A, Integral *R) { + return add(A, Integral(T(1)), A.bitWidth(), R); + } + + static bool decrement(Integral A, Integral *R) { + return sub(A, Integral(T(1)), A.bitWidth(), R); + } + + static bool add(Integral A, Integral B, unsigned OpBits, Integral *R) { + return CheckAddUB(A.V, B.V, R->V); + } + + static bool sub(Integral A, Integral B, unsigned OpBits, Integral *R) { + return CheckSubUB(A.V, B.V, R->V); + } + + static bool mul(Integral A, Integral B, unsigned OpBits, Integral *R) { + return CheckMulUB(A.V, B.V, R->V); + } + +private: + template + static typename std::enable_if::value, bool>::type + CheckAddUB(T A, T B, T &R) { + return llvm::AddOverflow(A, B, R); + } + + template + static typename std::enable_if::value, bool>::type + CheckAddUB(T A, T B, T &R) { + R = A + B; + return false; + } + + template + static typename std::enable_if::value, bool>::type + CheckSubUB(T A, T B, T &R) { + return llvm::SubOverflow(A, B, R); + } + + template + static typename std::enable_if::value, bool>::type + CheckSubUB(T A, T B, T &R) { + R = A - B; + return false; + } + + template + static typename std::enable_if::value, bool>::type + CheckMulUB(T A, T B, T &R) { + return llvm::MulOverflow(A, B, R); + } + + template + static typename std::enable_if::value, bool>::type + CheckMulUB(T A, T B, T &R) { + R = A * B; + return false; + } + + template + static typename std::enable_if::value, bool>::type + CheckRange(int64_t V) { + return Min <= V && V <= Max; + } + + template + static typename std::enable_if::value, bool>::type + CheckRange(int64_t V) { + return V >= 0 && static_cast(V) <= Max; + } +}; + +template +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Integral I) { + I.print(OS); + return OS; +} + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/Interp.cpp b/lib/AST/Interp/Interp.cpp new file mode 100644 index 00000000000..1a8109cedf7 --- /dev/null +++ b/lib/AST/Interp/Interp.cpp @@ -0,0 +1,417 @@ +//===--- InterpState.cpp - Interpreter for the constexpr VM -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Interp.h" +#include +#include +#include "Function.h" +#include "InterpFrame.h" +#include "InterpStack.h" +#include "Opcode.h" +#include "PrimType.h" +#include "Program.h" +#include "State.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/ASTDiagnostic.h" +#include "clang/AST/CXXInheritance.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "llvm/ADT/APSInt.h" + +using namespace clang; +using namespace clang::interp; + +//===----------------------------------------------------------------------===// +// Ret +//===----------------------------------------------------------------------===// + +template ::T> +static bool Ret(InterpState &S, CodePtr &PC, APValue &Result) { + S.CallStackDepth--; + const T &Ret = S.Stk.pop(); + + assert(S.Current->getFrameOffset() == S.Stk.size() && "Invalid frame"); + if (!S.checkingPotentialConstantExpression()) + S.Current->popArgs(); + + if (InterpFrame *Caller = S.Current->Caller) { + PC = S.Current->getRetPC(); + delete S.Current; + S.Current = Caller; + S.Stk.push(Ret); + } else { + delete S.Current; + S.Current = nullptr; + if (!ReturnValue(Ret, Result)) + return false; + } + return true; +} + +static bool RetVoid(InterpState &S, CodePtr &PC, APValue &Result) { + S.CallStackDepth--; + + assert(S.Current->getFrameOffset() == S.Stk.size() && "Invalid frame"); + if (!S.checkingPotentialConstantExpression()) + S.Current->popArgs(); + + if (InterpFrame *Caller = S.Current->Caller) { + PC = S.Current->getRetPC(); + delete S.Current; + S.Current = Caller; + } else { + delete S.Current; + S.Current = nullptr; + } + return true; +} + +static bool RetValue(InterpState &S, CodePtr &Pt, APValue &Result) { + llvm::report_fatal_error("Interpreter cannot return values"); +} + +//===----------------------------------------------------------------------===// +// Jmp, Jt, Jf +//===----------------------------------------------------------------------===// + +static bool Jmp(InterpState &S, CodePtr &PC, int32_t Offset) { + PC += Offset; + return true; +} + +static bool Jt(InterpState &S, CodePtr &PC, int32_t Offset) { + if (S.Stk.pop()) { + PC += Offset; + } + return true; +} + +static bool Jf(InterpState &S, CodePtr &PC, int32_t Offset) { + if (!S.Stk.pop()) { + PC += Offset; + } + return true; +} + +static bool CheckInitialized(InterpState &S, CodePtr OpPC, const Pointer &Ptr, + AccessKinds AK) { + if (Ptr.isInitialized()) + return true; + if (!S.checkingPotentialConstantExpression()) { + const SourceInfo &Loc = S.Current->getSource(OpPC); + S.FFDiag(Loc, diag::note_constexpr_access_uninit) << AK << false; + } + return false; +} + +static bool CheckActive(InterpState &S, CodePtr OpPC, const Pointer &Ptr, + AccessKinds AK) { + if (Ptr.isActive()) + return true; + + // Get the inactive field descriptor. + const FieldDecl *InactiveField = Ptr.getField(); + + // Walk up the pointer chain to find the union which is not active. + Pointer U = Ptr.getBase(); + while (!U.isActive()) { + U = U.getBase(); + } + + // Find the active field of the union. + Record *R = U.getRecord(); + assert(R && R->isUnion() && "Not a union"); + const FieldDecl *ActiveField = nullptr; + for (unsigned I = 0, N = R->getNumFields(); I < N; ++I) { + const Pointer &Field = U.atField(R->getField(I)->Offset); + if (Field.isActive()) { + ActiveField = Field.getField(); + break; + } + } + + const SourceInfo &Loc = S.Current->getSource(OpPC); + S.FFDiag(Loc, diag::note_constexpr_access_inactive_union_member) + << AK << InactiveField << !ActiveField << ActiveField; + return false; +} + +static bool CheckTemporary(InterpState &S, CodePtr OpPC, const Pointer &Ptr, + AccessKinds AK) { + if (auto ID = Ptr.getDeclID()) { + if (!Ptr.isStaticTemporary()) + return true; + + if (Ptr.getDeclDesc()->getType().isConstQualified()) + return true; + + if (S.P.getCurrentDecl() == ID) + return true; + + const SourceInfo &E = S.Current->getSource(OpPC); + S.FFDiag(E, diag::note_constexpr_access_static_temporary, 1) << AK; + S.Note(Ptr.getDeclLoc(), diag::note_constexpr_temporary_here); + return false; + } + return true; +} + +static bool CheckGlobal(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { + if (auto ID = Ptr.getDeclID()) { + if (!Ptr.isStatic()) + return true; + + if (S.P.getCurrentDecl() == ID) + return true; + + S.FFDiag(S.Current->getLocation(OpPC), diag::note_constexpr_modify_global); + return false; + } + return true; +} + +namespace clang { +namespace interp { + +bool CheckExtern(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { + if (!Ptr.isExtern()) + return true; + + if (!S.checkingPotentialConstantExpression()) { + auto *VD = Ptr.getDeclDesc()->asValueDecl(); + const SourceInfo &Loc = S.Current->getSource(OpPC); + S.FFDiag(Loc, diag::note_constexpr_ltor_non_constexpr, 1) << VD; + S.Note(VD->getLocation(), diag::note_declared_at); + } + return false; +} + +bool CheckArray(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { + if (!Ptr.isUnknownSizeArray()) + return true; + const SourceInfo &E = S.Current->getSource(OpPC); + S.FFDiag(E, diag::note_constexpr_unsized_array_indexed); + return false; +} + +bool CheckLive(InterpState &S, CodePtr OpPC, const Pointer &Ptr, + AccessKinds AK) { + const auto &Src = S.Current->getSource(OpPC); + if (Ptr.isZero()) { + + if (Ptr.isField()) + S.FFDiag(Src, diag::note_constexpr_null_subobject) << CSK_Field; + else + S.FFDiag(Src, diag::note_constexpr_access_null) << AK; + + return false; + } + + if (!Ptr.isLive()) { + bool IsTemp = Ptr.isTemporary(); + + S.FFDiag(Src, diag::note_constexpr_lifetime_ended, 1) << AK << !IsTemp; + + if (IsTemp) + S.Note(Ptr.getDeclLoc(), diag::note_constexpr_temporary_here); + else + S.Note(Ptr.getDeclLoc(), diag::note_declared_at); + + return false; + } + + return true; +} + +bool CheckNull(InterpState &S, CodePtr OpPC, const Pointer &Ptr, + CheckSubobjectKind CSK) { + if (!Ptr.isZero()) + return true; + const SourceInfo &Loc = S.Current->getSource(OpPC); + S.FFDiag(Loc, diag::note_constexpr_null_subobject) << CSK; + return false; +} + +bool CheckRange(InterpState &S, CodePtr OpPC, const Pointer &Ptr, + AccessKinds AK) { + if (!Ptr.isOnePastEnd()) + return true; + const SourceInfo &Loc = S.Current->getSource(OpPC); + S.FFDiag(Loc, diag::note_constexpr_access_past_end) << AK; + return false; +} + +bool CheckRange(InterpState &S, CodePtr OpPC, const Pointer &Ptr, + CheckSubobjectKind CSK) { + if (!Ptr.isElementPastEnd()) + return true; + const SourceInfo &Loc = S.Current->getSource(OpPC); + S.FFDiag(Loc, diag::note_constexpr_past_end_subobject) << CSK; + return false; +} + +bool CheckConst(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { + assert(Ptr.isLive() && "Pointer is not live"); + if (!Ptr.isConst()) { + return true; + } + + const QualType Ty = Ptr.getType(); + const SourceInfo &Loc = S.Current->getSource(OpPC); + S.FFDiag(Loc, diag::note_constexpr_modify_const_type) << Ty; + return false; +} + +bool CheckMutable(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { + assert(Ptr.isLive() && "Pointer is not live"); + if (!Ptr.isMutable()) { + return true; + } + + const SourceInfo &Loc = S.Current->getSource(OpPC); + const FieldDecl *Field = Ptr.getField(); + S.FFDiag(Loc, diag::note_constexpr_access_mutable, 1) << AK_Read << Field; + S.Note(Field->getLocation(), diag::note_declared_at); + return false; +} + +bool CheckLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { + if (!CheckLive(S, OpPC, Ptr, AK_Read)) + return false; + if (!CheckExtern(S, OpPC, Ptr)) + return false; + if (!CheckRange(S, OpPC, Ptr, AK_Read)) + return false; + if (!CheckInitialized(S, OpPC, Ptr, AK_Read)) + return false; + if (!CheckActive(S, OpPC, Ptr, AK_Read)) + return false; + if (!CheckTemporary(S, OpPC, Ptr, AK_Read)) + return false; + if (!CheckMutable(S, OpPC, Ptr)) + return false; + return true; +} + +bool CheckStore(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { + if (!CheckLive(S, OpPC, Ptr, AK_Assign)) + return false; + if (!CheckExtern(S, OpPC, Ptr)) + return false; + if (!CheckRange(S, OpPC, Ptr, AK_Assign)) + return false; + if (!CheckGlobal(S, OpPC, Ptr)) + return false; + if (!CheckConst(S, OpPC, Ptr)) + return false; + return true; +} + +bool CheckInvoke(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { + if (!CheckLive(S, OpPC, Ptr, AK_MemberCall)) + return false; + if (!CheckExtern(S, OpPC, Ptr)) + return false; + if (!CheckRange(S, OpPC, Ptr, AK_MemberCall)) + return false; + return true; +} + +bool CheckInit(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { + if (!CheckLive(S, OpPC, Ptr, AK_Assign)) + return false; + if (!CheckRange(S, OpPC, Ptr, AK_Assign)) + return false; + return true; +} + +bool CheckCallable(InterpState &S, CodePtr OpPC, Function *F) { + const SourceLocation &Loc = S.Current->getLocation(OpPC); + + if (F->isVirtual()) { + if (!S.getLangOpts().CPlusPlus2a) { + S.CCEDiag(Loc, diag::note_constexpr_virtual_call); + return false; + } + } + + if (!F->isConstexpr()) { + if (S.getLangOpts().CPlusPlus11) { + const FunctionDecl *DiagDecl = F->getDecl(); + + // If this function is not constexpr because it is an inherited + // non-constexpr constructor, diagnose that directly. + auto *CD = dyn_cast(DiagDecl); + if (CD && CD->isInheritingConstructor()) { + auto *Inherited = CD->getInheritedConstructor().getConstructor(); + if (!Inherited->isConstexpr()) + DiagDecl = CD = Inherited; + } + + // FIXME: If DiagDecl is an implicitly-declared special member function + // or an inheriting constructor, we should be much more explicit about why + // it's not constexpr. + if (CD && CD->isInheritingConstructor()) + S.FFDiag(Loc, diag::note_constexpr_invalid_inhctor, 1) + << CD->getInheritedConstructor().getConstructor()->getParent(); + else + S.FFDiag(Loc, diag::note_constexpr_invalid_function, 1) + << DiagDecl->isConstexpr() << (bool)CD << DiagDecl; + S.Note(DiagDecl->getLocation(), diag::note_declared_at); + } else { + S.FFDiag(Loc, diag::note_invalid_subexpr_in_const_expr); + } + return false; + } + + return true; +} + +bool CheckThis(InterpState &S, CodePtr OpPC, const Pointer &This) { + if (!This.isZero()) + return true; + + const SourceInfo &Loc = S.Current->getSource(OpPC); + + bool IsImplicit = false; + if (auto *E = dyn_cast_or_null(Loc.asExpr())) + IsImplicit = E->isImplicit(); + + if (S.getLangOpts().CPlusPlus11) + S.FFDiag(Loc, diag::note_constexpr_this) << IsImplicit; + else + S.FFDiag(Loc); + + return false; +} + +bool CheckPure(InterpState &S, CodePtr OpPC, const CXXMethodDecl *MD) { + if (!MD->isPure()) + return true; + const SourceInfo &E = S.Current->getSource(OpPC); + S.FFDiag(E, diag::note_constexpr_pure_virtual_call, 1) << MD; + S.Note(MD->getLocation(), diag::note_declared_at); + return false; +} +bool Interpret(InterpState &S, APValue &Result) { + CodePtr PC = S.Current->getPC(); + + for (;;) { + auto Op = PC.read(); + CodePtr OpPC = PC; + + switch (Op) { +#define GET_INTERP +#include "Opcodes.inc" +#undef GET_INTERP + } + } +} + +} // namespace interp +} // namespace clang diff --git a/lib/AST/Interp/Interp.h b/lib/AST/Interp/Interp.h new file mode 100644 index 00000000000..8934efa13b9 --- /dev/null +++ b/lib/AST/Interp/Interp.h @@ -0,0 +1,960 @@ +//===--- Interp.h - Interpreter for the constexpr VM ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Definition of the interpreter state and entry point. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_INTERP_H +#define LLVM_CLANG_AST_INTERP_INTERP_H + +#include +#include +#include "Function.h" +#include "InterpFrame.h" +#include "InterpStack.h" +#include "InterpState.h" +#include "Opcode.h" +#include "PrimType.h" +#include "Program.h" +#include "State.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/ASTDiagnostic.h" +#include "clang/AST/CXXInheritance.h" +#include "clang/AST/Expr.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/Support/Endian.h" + +namespace clang { +namespace interp { + +using APInt = llvm::APInt; +using APSInt = llvm::APSInt; + +/// Convers a value to an APValue. +template bool ReturnValue(const T &V, APValue &R) { + R = V.toAPValue(); + return true; +} + +/// Checks if the variable has externally defined storage. +bool CheckExtern(InterpState &S, CodePtr OpPC, const Pointer &Ptr); + +/// Checks if the array is offsetable. +bool CheckArray(InterpState &S, CodePtr OpPC, const Pointer &Ptr); + +/// Checks if a pointer is live and accesible. +bool CheckLive(InterpState &S, CodePtr OpPC, const Pointer &Ptr, + AccessKinds AK); +/// Checks if a pointer is null. +bool CheckNull(InterpState &S, CodePtr OpPC, const Pointer &Ptr, + CheckSubobjectKind CSK); + +/// Checks if a pointer is in range. +bool CheckRange(InterpState &S, CodePtr OpPC, const Pointer &Ptr, + AccessKinds AK); + +/// Checks if a field from which a pointer is going to be derived is valid. +bool CheckRange(InterpState &S, CodePtr OpPC, const Pointer &Ptr, + CheckSubobjectKind CSK); + +/// Checks if a pointer points to const storage. +bool CheckConst(InterpState &S, CodePtr OpPC, const Pointer &Ptr); + +/// Checks if a pointer points to a mutable field. +bool CheckMutable(InterpState &S, CodePtr OpPC, const Pointer &Ptr); + +/// Checks if a value can be loaded from a block. +bool CheckLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr); + +/// Checks if a value can be stored in a block. +bool CheckStore(InterpState &S, CodePtr OpPC, const Pointer &Ptr); + +/// Checks if a method can be invoked on an object. +bool CheckInvoke(InterpState &S, CodePtr OpPC, const Pointer &Ptr); + +/// Checks if a value can be initialized. +bool CheckInit(InterpState &S, CodePtr OpPC, const Pointer &Ptr); + +/// Checks if a method can be called. +bool CheckCallable(InterpState &S, CodePtr OpPC, Function *F); + +/// Checks the 'this' pointer. +bool CheckThis(InterpState &S, CodePtr OpPC, const Pointer &This); + +/// Checks if a method is pure virtual. +bool CheckPure(InterpState &S, CodePtr OpPC, const CXXMethodDecl *MD); + +template inline bool IsTrue(const T &V) { return !V.isZero(); } + +//===----------------------------------------------------------------------===// +// Add, Sub, Mul +//===----------------------------------------------------------------------===// + +template class OpAP> +bool AddSubMulHelper(InterpState &S, CodePtr OpPC, unsigned Bits, const T &LHS, + const T &RHS) { + // Fast path - add the numbers with fixed width. + T Result; + if (!OpFW(LHS, RHS, Bits, &Result)) { + S.Stk.push(Result); + return true; + } + + // If for some reason evaluation continues, use the truncated results. + S.Stk.push(Result); + + // Slow path - compute the result using another bit of precision. + APSInt Value = OpAP()(LHS.toAPSInt(Bits), RHS.toAPSInt(Bits)); + + // Report undefined behaviour, stopping if required. + const Expr *E = S.Current->getExpr(OpPC); + QualType Type = E->getType(); + if (S.checkingForUndefinedBehavior()) { + auto Trunc = Value.trunc(Result.bitWidth()).toString(10); + auto Loc = E->getExprLoc(); + S.report(Loc, diag::warn_integer_constant_overflow) << Trunc << Type; + return true; + } else { + S.CCEDiag(E, diag::note_constexpr_overflow) << Value << Type; + return S.noteUndefinedBehavior(); + } +} + +template ::T> +bool Add(InterpState &S, CodePtr OpPC) { + const T &RHS = S.Stk.pop(); + const T &LHS = S.Stk.pop(); + const unsigned Bits = RHS.bitWidth() + 1; + return AddSubMulHelper(S, OpPC, Bits, LHS, RHS); +} + +template ::T> +bool Sub(InterpState &S, CodePtr OpPC) { + const T &RHS = S.Stk.pop(); + const T &LHS = S.Stk.pop(); + const unsigned Bits = RHS.bitWidth() + 1; + return AddSubMulHelper(S, OpPC, Bits, LHS, RHS); +} + +template ::T> +bool Mul(InterpState &S, CodePtr OpPC) { + const T &RHS = S.Stk.pop(); + const T &LHS = S.Stk.pop(); + const unsigned Bits = RHS.bitWidth() * 2; + return AddSubMulHelper(S, OpPC, Bits, LHS, RHS); +} + +//===----------------------------------------------------------------------===// +// EQ, NE, GT, GE, LT, LE +//===----------------------------------------------------------------------===// + +using CompareFn = llvm::function_ref; + +template +bool CmpHelper(InterpState &S, CodePtr OpPC, CompareFn Fn) { + using BoolT = PrimConv::T; + const T &RHS = S.Stk.pop(); + const T &LHS = S.Stk.pop(); + S.Stk.push(BoolT::from(Fn(LHS.compare(RHS)))); + return true; +} + +template +bool CmpHelperEQ(InterpState &S, CodePtr OpPC, CompareFn Fn) { + return CmpHelper(S, OpPC, Fn); +} + +template <> +inline bool CmpHelper(InterpState &S, CodePtr OpPC, CompareFn Fn) { + using BoolT = PrimConv::T; + const Pointer &RHS = S.Stk.pop(); + const Pointer &LHS = S.Stk.pop(); + + if (!Pointer::hasSameBase(LHS, RHS)) { + const SourceInfo &Loc = S.Current->getSource(OpPC); + S.FFDiag(Loc, diag::note_invalid_subexpr_in_const_expr); + return false; + } else { + unsigned VL = LHS.getByteOffset(); + unsigned VR = RHS.getByteOffset(); + S.Stk.push(BoolT::from(Fn(Compare(VL, VR)))); + return true; + } +} + +template <> +inline bool CmpHelperEQ(InterpState &S, CodePtr OpPC, CompareFn Fn) { + using BoolT = PrimConv::T; + const Pointer &RHS = S.Stk.pop(); + const Pointer &LHS = S.Stk.pop(); + + if (LHS.isZero() || RHS.isZero()) { + if (LHS.isZero() && RHS.isZero()) + S.Stk.push(BoolT::from(Fn(ComparisonCategoryResult::Equal))); + else + S.Stk.push(BoolT::from(Fn(ComparisonCategoryResult::Nonequal))); + return true; + } + + if (!Pointer::hasSameBase(LHS, RHS)) { + S.Stk.push(BoolT::from(Fn(ComparisonCategoryResult::Unordered))); + return true; + } else { + unsigned VL = LHS.getByteOffset(); + unsigned VR = RHS.getByteOffset(); + S.Stk.push(BoolT::from(Fn(Compare(VL, VR)))); + return true; + } +} + +template ::T> +bool EQ(InterpState &S, CodePtr OpPC) { + return CmpHelperEQ(S, OpPC, [](ComparisonCategoryResult R) { + return R == ComparisonCategoryResult::Equal; + }); +} + +template ::T> +bool NE(InterpState &S, CodePtr OpPC) { + return CmpHelperEQ(S, OpPC, [](ComparisonCategoryResult R) { + return R != ComparisonCategoryResult::Equal; + }); +} + +template ::T> +bool LT(InterpState &S, CodePtr OpPC) { + return CmpHelper(S, OpPC, [](ComparisonCategoryResult R) { + return R == ComparisonCategoryResult::Less; + }); +} + +template ::T> +bool LE(InterpState &S, CodePtr OpPC) { + return CmpHelper(S, OpPC, [](ComparisonCategoryResult R) { + return R == ComparisonCategoryResult::Less || + R == ComparisonCategoryResult::Equal; + }); +} + +template ::T> +bool GT(InterpState &S, CodePtr OpPC) { + return CmpHelper(S, OpPC, [](ComparisonCategoryResult R) { + return R == ComparisonCategoryResult::Greater; + }); +} + +template ::T> +bool GE(InterpState &S, CodePtr OpPC) { + return CmpHelper(S, OpPC, [](ComparisonCategoryResult R) { + return R == ComparisonCategoryResult::Greater || + R == ComparisonCategoryResult::Equal; + }); +} + +//===----------------------------------------------------------------------===// +// InRange +//===----------------------------------------------------------------------===// + +template ::T> +bool InRange(InterpState &S, CodePtr OpPC) { + const T RHS = S.Stk.pop(); + const T LHS = S.Stk.pop(); + const T Value = S.Stk.pop(); + + S.Stk.push(LHS <= Value && Value <= RHS); + return true; +} + +//===----------------------------------------------------------------------===// +// Dup, Pop, Test +//===----------------------------------------------------------------------===// + +template ::T> +bool Dup(InterpState &S, CodePtr OpPC) { + S.Stk.push(S.Stk.peek()); + return true; +} + +template ::T> +bool Pop(InterpState &S, CodePtr OpPC) { + S.Stk.pop(); + return true; +} + +//===----------------------------------------------------------------------===// +// Const +//===----------------------------------------------------------------------===// + +template ::T> +bool Const(InterpState &S, CodePtr OpPC, const T &Arg) { + S.Stk.push(Arg); + return true; +} + +//===----------------------------------------------------------------------===// +// Get/Set Local/Param/Global/This +//===----------------------------------------------------------------------===// + +template ::T> +bool GetLocal(InterpState &S, CodePtr OpPC, uint32_t I) { + S.Stk.push(S.Current->getLocal(I)); + return true; +} + +template ::T> +bool SetLocal(InterpState &S, CodePtr OpPC, uint32_t I) { + S.Current->setLocal(I, S.Stk.pop()); + return true; +} + +template ::T> +bool GetParam(InterpState &S, CodePtr OpPC, uint32_t I) { + if (S.checkingPotentialConstantExpression()) { + return false; + } + S.Stk.push(S.Current->getParam(I)); + return true; +} + +template ::T> +bool SetParam(InterpState &S, CodePtr OpPC, uint32_t I) { + S.Current->setParam(I, S.Stk.pop()); + return true; +} + +template ::T> +bool GetField(InterpState &S, CodePtr OpPC, uint32_t I) { + const Pointer &Obj = S.Stk.peek(); + if (!CheckNull(S, OpPC, Obj, CSK_Field)) + return false; + if (!CheckRange(S, OpPC, Obj, CSK_Field)) + return false; + const Pointer &Field = Obj.atField(I); + if (!CheckLoad(S, OpPC, Field)) + return false; + S.Stk.push(Field.deref()); + return true; +} + +template ::T> +bool SetField(InterpState &S, CodePtr OpPC, uint32_t I) { + const T &Value = S.Stk.pop(); + const Pointer &Obj = S.Stk.peek(); + if (!CheckNull(S, OpPC, Obj, CSK_Field)) + return false; + if (!CheckRange(S, OpPC, Obj, CSK_Field)) + return false; + const Pointer &Field = Obj.atField(I); + if (!CheckStore(S, OpPC, Field)) + return false; + Field.deref() = Value; + return true; +} + +template ::T> +bool GetFieldPop(InterpState &S, CodePtr OpPC, uint32_t I) { + const Pointer &Obj = S.Stk.pop(); + if (!CheckNull(S, OpPC, Obj, CSK_Field)) + return false; + if (!CheckRange(S, OpPC, Obj, CSK_Field)) + return false; + const Pointer &Field = Obj.atField(I); + if (!CheckLoad(S, OpPC, Field)) + return false; + S.Stk.push(Field.deref()); + return true; +} + +template ::T> +bool GetThisField(InterpState &S, CodePtr OpPC, uint32_t I) { + if (S.checkingPotentialConstantExpression()) + return false; + const Pointer &This = S.Current->getThis(); + if (!CheckThis(S, OpPC, This)) + return false; + const Pointer &Field = This.atField(I); + if (!CheckLoad(S, OpPC, Field)) + return false; + S.Stk.push(Field.deref()); + return true; +} + +template ::T> +bool SetThisField(InterpState &S, CodePtr OpPC, uint32_t I) { + if (S.checkingPotentialConstantExpression()) + return false; + const T &Value = S.Stk.pop(); + const Pointer &This = S.Current->getThis(); + if (!CheckThis(S, OpPC, This)) + return false; + const Pointer &Field = This.atField(I); + if (!CheckStore(S, OpPC, Field)) + return false; + Field.deref() = Value; + return true; +} + +template ::T> +bool GetGlobal(InterpState &S, CodePtr OpPC, uint32_t I) { + auto *B = S.P.getGlobal(I); + if (B->isExtern()) + return false; + S.Stk.push(B->deref()); + return true; +} + +template ::T> +bool SetGlobal(InterpState &S, CodePtr OpPC, uint32_t I) { + // TODO: emit warning. + return false; +} + +template ::T> +bool InitGlobal(InterpState &S, CodePtr OpPC, uint32_t I) { + S.P.getGlobal(I)->deref() = S.Stk.pop(); + return true; +} + +template ::T> +bool InitThisField(InterpState &S, CodePtr OpPC, uint32_t I) { + if (S.checkingPotentialConstantExpression()) + return false; + const Pointer &This = S.Current->getThis(); + if (!CheckThis(S, OpPC, This)) + return false; + const Pointer &Field = This.atField(I); + Field.deref() = S.Stk.pop(); + Field.initialize(); + return true; +} + +template ::T> +bool InitThisBitField(InterpState &S, CodePtr OpPC, const Record::Field *F) { + if (S.checkingPotentialConstantExpression()) + return false; + const Pointer &This = S.Current->getThis(); + if (!CheckThis(S, OpPC, This)) + return false; + const Pointer &Field = This.atField(F->Offset); + const auto &Value = S.Stk.pop(); + Field.deref() = Value.truncate(F->Decl->getBitWidthValue(S.getCtx())); + Field.initialize(); + return true; +} + +template ::T> +bool InitThisFieldActive(InterpState &S, CodePtr OpPC, uint32_t I) { + if (S.checkingPotentialConstantExpression()) + return false; + const Pointer &This = S.Current->getThis(); + if (!CheckThis(S, OpPC, This)) + return false; + const Pointer &Field = This.atField(I); + Field.deref() = S.Stk.pop(); + Field.activate(); + Field.initialize(); + return true; +} + +template ::T> +bool InitField(InterpState &S, CodePtr OpPC, uint32_t I) { + const T &Value = S.Stk.pop(); + const Pointer &Field = S.Stk.pop().atField(I); + Field.deref() = Value; + Field.activate(); + Field.initialize(); + return true; +} + +template ::T> +bool InitBitField(InterpState &S, CodePtr OpPC, const Record::Field *F) { + const T &Value = S.Stk.pop(); + const Pointer &Field = S.Stk.pop().atField(F->Offset); + Field.deref() = Value.truncate(F->Decl->getBitWidthValue(S.getCtx())); + Field.activate(); + Field.initialize(); + return true; +} + +template ::T> +bool InitFieldActive(InterpState &S, CodePtr OpPC, uint32_t I) { + const T &Value = S.Stk.pop(); + const Pointer &Ptr = S.Stk.pop(); + const Pointer &Field = Ptr.atField(I); + Field.deref() = Value; + Field.activate(); + Field.initialize(); + return true; +} + +//===----------------------------------------------------------------------===// +// GetPtr Local/Param/Global/Field/This +//===----------------------------------------------------------------------===// + +inline bool GetPtrLocal(InterpState &S, CodePtr OpPC, uint32_t I) { + S.Stk.push(S.Current->getLocalPointer(I)); + return true; +} + +inline bool GetPtrParam(InterpState &S, CodePtr OpPC, uint32_t I) { + if (S.checkingPotentialConstantExpression()) { + return false; + } + S.Stk.push(S.Current->getParamPointer(I)); + return true; +} + +inline bool GetPtrGlobal(InterpState &S, CodePtr OpPC, uint32_t I) { + S.Stk.push(S.P.getPtrGlobal(I)); + return true; +} + +inline bool GetPtrField(InterpState &S, CodePtr OpPC, uint32_t Off) { + const Pointer &Ptr = S.Stk.pop(); + if (!CheckNull(S, OpPC, Ptr, CSK_Field)) + return false; + if (!CheckExtern(S, OpPC, Ptr)) + return false; + if (!CheckRange(S, OpPC, Ptr, CSK_Field)) + return false; + S.Stk.push(Ptr.atField(Off)); + return true; +} + +inline bool GetPtrThisField(InterpState &S, CodePtr OpPC, uint32_t Off) { + if (S.checkingPotentialConstantExpression()) + return false; + const Pointer &This = S.Current->getThis(); + if (!CheckThis(S, OpPC, This)) + return false; + S.Stk.push(This.atField(Off)); + return true; +} + +inline bool GetPtrActiveField(InterpState &S, CodePtr OpPC, uint32_t Off) { + const Pointer &Ptr = S.Stk.pop(); + if (!CheckNull(S, OpPC, Ptr, CSK_Field)) + return false; + if (!CheckRange(S, OpPC, Ptr, CSK_Field)) + return false; + Pointer Field = Ptr.atField(Off); + Ptr.deactivate(); + Field.activate(); + S.Stk.push(std::move(Field)); + return true; +} + +inline bool GetPtrActiveThisField(InterpState &S, CodePtr OpPC, uint32_t Off) { + if (S.checkingPotentialConstantExpression()) + return false; + const Pointer &This = S.Current->getThis(); + if (!CheckThis(S, OpPC, This)) + return false; + Pointer Field = This.atField(Off); + This.deactivate(); + Field.activate(); + S.Stk.push(std::move(Field)); + return true; +} + +inline bool GetPtrBase(InterpState &S, CodePtr OpPC, uint32_t Off) { + const Pointer &Ptr = S.Stk.pop(); + if (!CheckNull(S, OpPC, Ptr, CSK_Base)) + return false; + S.Stk.push(Ptr.atField(Off)); + return true; +} + +inline bool GetPtrThisBase(InterpState &S, CodePtr OpPC, uint32_t Off) { + if (S.checkingPotentialConstantExpression()) + return false; + const Pointer &This = S.Current->getThis(); + if (!CheckThis(S, OpPC, This)) + return false; + S.Stk.push(This.atField(Off)); + return true; +} + +inline bool VirtBaseHelper(InterpState &S, CodePtr OpPC, const RecordDecl *Decl, + const Pointer &Ptr) { + Pointer Base = Ptr; + while (Base.isBaseClass()) + Base = Base.getBase(); + + auto *Field = Base.getRecord()->getVirtualBase(Decl); + S.Stk.push(Base.atField(Field->Offset)); + return true; +} + +inline bool GetPtrVirtBase(InterpState &S, CodePtr OpPC, const RecordDecl *D) { + const Pointer &Ptr = S.Stk.pop(); + if (!CheckNull(S, OpPC, Ptr, CSK_Base)) + return false; + return VirtBaseHelper(S, OpPC, D, Ptr); +} + +inline bool GetPtrThisVirtBase(InterpState &S, CodePtr OpPC, + const RecordDecl *D) { + if (S.checkingPotentialConstantExpression()) + return false; + const Pointer &This = S.Current->getThis(); + if (!CheckThis(S, OpPC, This)) + return false; + return VirtBaseHelper(S, OpPC, D, S.Current->getThis()); +} + +//===----------------------------------------------------------------------===// +// Load, Store, Init +//===----------------------------------------------------------------------===// + +template ::T> +bool Load(InterpState &S, CodePtr OpPC) { + const Pointer &Ptr = S.Stk.peek(); + if (!CheckLoad(S, OpPC, Ptr)) + return false; + S.Stk.push(Ptr.deref()); + return true; +} + +template ::T> +bool LoadPop(InterpState &S, CodePtr OpPC) { + const Pointer &Ptr = S.Stk.pop(); + if (!CheckLoad(S, OpPC, Ptr)) + return false; + S.Stk.push(Ptr.deref()); + return true; +} + +template ::T> +bool Store(InterpState &S, CodePtr OpPC) { + const T &Value = S.Stk.pop(); + const Pointer &Ptr = S.Stk.peek(); + if (!CheckStore(S, OpPC, Ptr)) + return false; + Ptr.deref() = Value; + return true; +} + +template ::T> +bool StorePop(InterpState &S, CodePtr OpPC) { + const T &Value = S.Stk.pop(); + const Pointer &Ptr = S.Stk.pop(); + if (!CheckStore(S, OpPC, Ptr)) + return false; + Ptr.deref() = Value; + return true; +} + +template ::T> +bool StoreBitField(InterpState &S, CodePtr OpPC) { + const T &Value = S.Stk.pop(); + const Pointer &Ptr = S.Stk.peek(); + if (!CheckStore(S, OpPC, Ptr)) + return false; + if (auto *FD = Ptr.getField()) { + Ptr.deref() = Value.truncate(FD->getBitWidthValue(S.getCtx())); + } else { + Ptr.deref() = Value; + } + return true; +} + +template ::T> +bool StoreBitFieldPop(InterpState &S, CodePtr OpPC) { + const T &Value = S.Stk.pop(); + const Pointer &Ptr = S.Stk.pop(); + if (!CheckStore(S, OpPC, Ptr)) + return false; + if (auto *FD = Ptr.getField()) { + Ptr.deref() = Value.truncate(FD->getBitWidthValue(S.getCtx())); + } else { + Ptr.deref() = Value; + } + return true; +} + +template ::T> +bool InitPop(InterpState &S, CodePtr OpPC) { + const T &Value = S.Stk.pop(); + const Pointer &Ptr = S.Stk.pop(); + if (!CheckInit(S, OpPC, Ptr)) + return false; + Ptr.initialize(); + new (&Ptr.deref()) T(Value); + return true; +} + +template ::T> +bool InitElem(InterpState &S, CodePtr OpPC, uint32_t Idx) { + const T &Value = S.Stk.pop(); + const Pointer &Ptr = S.Stk.peek().atIndex(Idx); + if (!CheckInit(S, OpPC, Ptr)) + return false; + Ptr.initialize(); + new (&Ptr.deref()) T(Value); + return true; +} + +template ::T> +bool InitElemPop(InterpState &S, CodePtr OpPC, uint32_t Idx) { + const T &Value = S.Stk.pop(); + const Pointer &Ptr = S.Stk.pop().atIndex(Idx); + if (!CheckInit(S, OpPC, Ptr)) + return false; + Ptr.initialize(); + new (&Ptr.deref()) T(Value); + return true; +} + +//===----------------------------------------------------------------------===// +// AddOffset, SubOffset +//===----------------------------------------------------------------------===// + +template bool OffsetHelper(InterpState &S, CodePtr OpPC) { + // Fetch the pointer and the offset. + const T &Offset = S.Stk.pop(); + const Pointer &Ptr = S.Stk.pop(); + if (!CheckNull(S, OpPC, Ptr, CSK_ArrayIndex)) + return false; + if (!CheckRange(S, OpPC, Ptr, CSK_ArrayToPointer)) + return false; + + // Get a version of the index comparable to the type. + T Index = T::from(Ptr.getIndex(), Offset.bitWidth()); + // A zero offset does not change the pointer, but in the case of an array + // it has to be adjusted to point to the first element instead of the array. + if (Offset.isZero()) { + S.Stk.push(Index.isZero() ? Ptr.atIndex(0) : Ptr); + return true; + } + // Arrays of unknown bounds cannot have pointers into them. + if (!CheckArray(S, OpPC, Ptr)) + return false; + + // Compute the largest index into the array. + unsigned MaxIndex = Ptr.getNumElems(); + + // Helper to report an invalid offset, computed as APSInt. + auto InvalidOffset = [&]() { + const unsigned Bits = Offset.bitWidth(); + APSInt APOffset(Offset.toAPSInt().extend(Bits + 2), false); + APSInt APIndex(Index.toAPSInt().extend(Bits + 2), false); + APSInt NewIndex = Add ? (APIndex + APOffset) : (APIndex - APOffset); + S.CCEDiag(S.Current->getSource(OpPC), diag::note_constexpr_array_index) + << NewIndex + << /*array*/ static_cast(!Ptr.inArray()) + << static_cast(MaxIndex); + return false; + }; + + // If the new offset would be negative, bail out. + if (Add && Offset.isNegative() && (Offset.isMin() || -Offset > Index)) + return InvalidOffset(); + if (!Add && Offset.isPositive() && Index < Offset) + return InvalidOffset(); + + // If the new offset would be out of bounds, bail out. + unsigned MaxOffset = MaxIndex - Ptr.getIndex(); + if (Add && Offset.isPositive() && Offset > MaxOffset) + return InvalidOffset(); + if (!Add && Offset.isNegative() && (Offset.isMin() || -Offset > MaxOffset)) + return InvalidOffset(); + + // Offset is valid - compute it on unsigned. + int64_t WideIndex = static_cast(Index); + int64_t WideOffset = static_cast(Offset); + int64_t Result = Add ? (WideIndex + WideOffset) : (WideIndex - WideOffset); + S.Stk.push(Ptr.atIndex(static_cast(Result))); + return true; +} + +template ::T> +bool AddOffset(InterpState &S, CodePtr OpPC) { + return OffsetHelper(S, OpPC); +} + +template ::T> +bool SubOffset(InterpState &S, CodePtr OpPC) { + return OffsetHelper(S, OpPC); +} + + +//===----------------------------------------------------------------------===// +// Destroy +//===----------------------------------------------------------------------===// + +inline bool Destroy(InterpState &S, CodePtr OpPC, uint32_t I) { + S.Current->destroy(I); + return true; +} + +//===----------------------------------------------------------------------===// +// Cast, CastFP +//===----------------------------------------------------------------------===// + +template bool Cast(InterpState &S, CodePtr OpPC) { + using T = typename PrimConv::T; + using U = typename PrimConv::T; + S.Stk.push(U::from(S.Stk.pop())); + return true; +} + +//===----------------------------------------------------------------------===// +// Zero, Nullptr +//===----------------------------------------------------------------------===// + +template ::T> +bool Zero(InterpState &S, CodePtr OpPC) { + S.Stk.push(T::zero()); + return true; +} + +template ::T> +inline bool Null(InterpState &S, CodePtr OpPC) { + S.Stk.push(); + return true; +} + +//===----------------------------------------------------------------------===// +// This, ImplicitThis +//===----------------------------------------------------------------------===// + +inline bool This(InterpState &S, CodePtr OpPC) { + // Cannot read 'this' in this mode. + if (S.checkingPotentialConstantExpression()) { + return false; + } + + const Pointer &This = S.Current->getThis(); + if (!CheckThis(S, OpPC, This)) + return false; + + S.Stk.push(This); + return true; +} + +//===----------------------------------------------------------------------===// +// Shr, Shl +//===----------------------------------------------------------------------===// + +template ::T> +unsigned Trunc(InterpState &S, CodePtr OpPC, unsigned Bits, const T &V) { + // C++11 [expr.shift]p1: Shift width must be less than the bit width of + // the shifted type. + if (Bits > 1 && V >= T::from(Bits, V.bitWidth())) { + const Expr *E = S.Current->getExpr(OpPC); + const APSInt Val = V.toAPSInt(); + QualType Ty = E->getType(); + S.CCEDiag(E, diag::note_constexpr_large_shift) << Val << Ty << Bits; + return Bits; + } else { + return static_cast(V); + } +} + +template ::T> +inline bool ShiftRight(InterpState &S, CodePtr OpPC, const T &V, unsigned RHS) { + if (RHS >= V.bitWidth()) { + S.Stk.push(T::from(0, V.bitWidth())); + } else { + S.Stk.push(T::from(V >> RHS, V.bitWidth())); + } + return true; +} + +template ::T> +inline bool ShiftLeft(InterpState &S, CodePtr OpPC, const T &V, unsigned RHS) { + if (V.isSigned() && !S.getLangOpts().CPlusPlus2a) { + // C++11 [expr.shift]p2: A signed left shift must have a non-negative + // operand, and must not overflow the corresponding unsigned type. + // C++2a [expr.shift]p2: E1 << E2 is the unique value congruent to + // E1 x 2^E2 module 2^N. + if (V.isNegative()) { + const Expr *E = S.Current->getExpr(OpPC); + S.CCEDiag(E, diag::note_constexpr_lshift_of_negative) << V.toAPSInt(); + } else if (V.countLeadingZeros() < RHS) { + S.CCEDiag(S.Current->getExpr(OpPC), diag::note_constexpr_lshift_discards); + } + } + + if (V.bitWidth() == 1) { + S.Stk.push(V); + } else if (RHS >= V.bitWidth()) { + S.Stk.push(T::from(0, V.bitWidth())); + } else { + S.Stk.push(T::from(V.toUnsigned() << RHS, V.bitWidth())); + } + return true; +} + +template +inline bool Shr(InterpState &S, CodePtr OpPC) { + const auto &RHS = S.Stk.pop::T>(); + const auto &LHS = S.Stk.pop::T>(); + const unsigned Bits = LHS.bitWidth(); + + if (RHS.isSigned() && RHS.isNegative()) { + const SourceInfo &Loc = S.Current->getSource(OpPC); + S.CCEDiag(Loc, diag::note_constexpr_negative_shift) << RHS.toAPSInt(); + return ShiftLeft(S, OpPC, LHS, Trunc(S, OpPC, Bits, -RHS)); + } else { + return ShiftRight(S, OpPC, LHS, Trunc(S, OpPC, Bits, RHS)); + } +} + +template +inline bool Shl(InterpState &S, CodePtr OpPC) { + const auto &RHS = S.Stk.pop::T>(); + const auto &LHS = S.Stk.pop::T>(); + const unsigned Bits = LHS.bitWidth(); + + if (RHS.isSigned() && RHS.isNegative()) { + const SourceInfo &Loc = S.Current->getSource(OpPC); + S.CCEDiag(Loc, diag::note_constexpr_negative_shift) << RHS.toAPSInt(); + return ShiftRight(S, OpPC, LHS, Trunc(S, OpPC, Bits, -RHS)); + } else { + return ShiftLeft(S, OpPC, LHS, Trunc(S, OpPC, Bits, RHS)); + } +} + +//===----------------------------------------------------------------------===// +// NoRet +//===----------------------------------------------------------------------===// + +inline bool NoRet(InterpState &S, CodePtr OpPC) { + SourceLocation EndLoc = S.Current->getCallee()->getEndLoc(); + S.FFDiag(EndLoc, diag::note_constexpr_no_return); + return false; +} + +//===----------------------------------------------------------------------===// +// NarrowPtr, ExpandPtr +//===----------------------------------------------------------------------===// + +inline bool NarrowPtr(InterpState &S, CodePtr OpPC) { + const Pointer &Ptr = S.Stk.pop(); + S.Stk.push(Ptr.narrow()); + return true; +} + +inline bool ExpandPtr(InterpState &S, CodePtr OpPC) { + const Pointer &Ptr = S.Stk.pop(); + S.Stk.push(Ptr.expand()); + return true; +} + +/// Interpreter entry point. +bool Interpret(InterpState &S, APValue &Result); + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/InterpFrame.cpp b/lib/AST/Interp/InterpFrame.cpp new file mode 100644 index 00000000000..9d01bf0333f --- /dev/null +++ b/lib/AST/Interp/InterpFrame.cpp @@ -0,0 +1,193 @@ +//===--- InterpFrame.cpp - Call Frame implementation for the VM -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "InterpFrame.h" +#include "Function.h" +#include "Interp.h" +#include "InterpStack.h" +#include "PrimType.h" +#include "Program.h" +#include "clang/AST/DeclCXX.h" + +using namespace clang; +using namespace clang::interp; + +InterpFrame::InterpFrame(InterpState &S, Function *Func, InterpFrame *Caller, + CodePtr RetPC, Pointer &&This) + : Caller(Caller), S(S), Func(Func), This(std::move(This)), RetPC(RetPC), + ArgSize(Func ? Func->getArgSize() : 0), + Args(static_cast(S.Stk.top())), FrameOffset(S.Stk.size()) { + if (Func) { + if (unsigned FrameSize = Func->getFrameSize()) { + Locals = std::make_unique(FrameSize); + for (auto &Scope : Func->scopes()) { + for (auto &Local : Scope.locals()) { + Block *B = new (localBlock(Local.Offset)) Block(Local.Desc); + B->invokeCtor(); + } + } + } + } +} + +InterpFrame::~InterpFrame() { + if (Func && Func->isConstructor() && This.isBaseClass()) + This.initialize(); + for (auto &Param : Params) + S.deallocate(reinterpret_cast(Param.second.get())); +} + +void InterpFrame::destroy(unsigned Idx) { + for (auto &Local : Func->getScope(Idx).locals()) { + S.deallocate(reinterpret_cast(localBlock(Local.Offset))); + } +} + +void InterpFrame::popArgs() { + for (PrimType Ty : Func->args_reverse()) + TYPE_SWITCH(Ty, S.Stk.discard()); +} + +template +static void print(llvm::raw_ostream &OS, const T &V, ASTContext &, QualType) { + OS << V; +} + +template <> +void print(llvm::raw_ostream &OS, const Pointer &P, ASTContext &Ctx, + QualType Ty) { + if (P.isZero()) { + OS << "nullptr"; + return; + } + + auto printDesc = [&OS, &Ctx](Descriptor *Desc) { + if (auto *D = Desc->asDecl()) { + // Subfields or named values. + if (auto *VD = dyn_cast(D)) { + OS << *VD; + return; + } + // Base classes. + if (isa(D)) { + return; + } + } + // Temporary expression. + if (auto *E = Desc->asExpr()) { + E->printPretty(OS, nullptr, Ctx.getPrintingPolicy()); + return; + } + llvm_unreachable("Invalid descriptor type"); + }; + + if (!Ty->isReferenceType()) + OS << "&"; + llvm::SmallVector Levels; + for (Pointer F = P; !F.isRoot(); ) { + Levels.push_back(F); + F = F.isArrayElement() ? F.getArray().expand() : F.getBase(); + } + + printDesc(P.getDeclDesc()); + for (auto It = Levels.rbegin(); It != Levels.rend(); ++It) { + if (It->inArray()) { + OS << "[" << It->expand().getIndex() << "]"; + continue; + } + if (auto Index = It->getIndex()) { + OS << " + " << Index; + continue; + } + OS << "."; + printDesc(It->getFieldDesc()); + } +} + +void InterpFrame::describe(llvm::raw_ostream &OS) { + const FunctionDecl *F = getCallee(); + auto *M = dyn_cast(F); + if (M && M->isInstance() && !isa(F)) { + print(OS, This, S.getCtx(), S.getCtx().getRecordType(M->getParent())); + OS << "->"; + } + OS << *F << "("; + unsigned Off = Func->hasRVO() ? primSize(PT_Ptr) : 0; + for (unsigned I = 0, N = F->getNumParams(); I < N; ++I) { + QualType Ty = F->getParamDecl(I)->getType(); + + PrimType PrimTy; + if (llvm::Optional T = S.Ctx.classify(Ty)) { + PrimTy = *T; + } else { + PrimTy = PT_Ptr; + } + + TYPE_SWITCH(PrimTy, print(OS, stackRef(Off), S.getCtx(), Ty)); + Off += align(primSize(PrimTy)); + if (I + 1 != N) + OS << ", "; + } + OS << ")"; +} + +Frame *InterpFrame::getCaller() const { + if (Caller->Caller) + return Caller; + return S.getSplitFrame(); +} + +SourceLocation InterpFrame::getCallLocation() const { + if (!Caller->Func) + return S.getLocation(nullptr, {}); + return S.getLocation(Caller->Func, RetPC - sizeof(uintptr_t)); +} + +const FunctionDecl *InterpFrame::getCallee() const { + return Func->getDecl(); +} + +Pointer InterpFrame::getLocalPointer(unsigned Offset) { + assert(Offset < Func->getFrameSize() && "Invalid local offset."); + return Pointer( + reinterpret_cast(Locals.get() + Offset - sizeof(Block))); +} + +Pointer InterpFrame::getParamPointer(unsigned Off) { + // Return the block if it was created previously. + auto Pt = Params.find(Off); + if (Pt != Params.end()) { + return Pointer(reinterpret_cast(Pt->second.get())); + } + + // Allocate memory to store the parameter and the block metadata. + const auto &Desc = Func->getParamDescriptor(Off); + size_t BlockSize = sizeof(Block) + Desc.second->getAllocSize(); + auto Memory = std::make_unique(BlockSize); + auto *B = new (Memory.get()) Block(Desc.second); + + // Copy the initial value. + TYPE_SWITCH(Desc.first, new (B->data()) T(stackRef(Off))); + + // Record the param. + Params.insert({Off, std::move(Memory)}); + return Pointer(B); +} + +SourceInfo InterpFrame::getSource(CodePtr PC) const { + return S.getSource(Func, PC); +} + +const Expr *InterpFrame::getExpr(CodePtr PC) const { + return S.getExpr(Func, PC); +} + +SourceLocation InterpFrame::getLocation(CodePtr PC) const { + return S.getLocation(Func, PC); +} + diff --git a/lib/AST/Interp/InterpFrame.h b/lib/AST/Interp/InterpFrame.h new file mode 100644 index 00000000000..b8391b0bcf9 --- /dev/null +++ b/lib/AST/Interp/InterpFrame.h @@ -0,0 +1,153 @@ +//===--- InterpFrame.h - Call Frame implementation for the VM ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the class storing information about stack frames in the interpreter. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_INTERPFRAME_H +#define LLVM_CLANG_AST_INTERP_INTERPFRAME_H + +#include "Frame.h" +#include "Pointer.h" +#include "Program.h" +#include "State.h" +#include +#include + +namespace clang { +namespace interp { +class Function; +class InterpState; + +/// Frame storing local variables. +class InterpFrame final : public Frame { +public: + /// The frame of the previous function. + InterpFrame *Caller; + + /// Creates a new frame for a method call. + InterpFrame(InterpState &S, Function *Func, InterpFrame *Caller, + CodePtr RetPC, Pointer &&This); + + /// Destroys the frame, killing all live pointers to stack slots. + ~InterpFrame(); + + /// Invokes the destructors for a scope. + void destroy(unsigned Idx); + + /// Pops the arguments off the stack. + void popArgs(); + + /// Describes the frame with arguments for diagnostic purposes. + void describe(llvm::raw_ostream &OS); + + /// Returns the parent frame object. + Frame *getCaller() const; + + /// Returns the location of the call to the frame. + SourceLocation getCallLocation() const; + + /// Returns the caller. + const FunctionDecl *getCallee() const; + + /// Returns the current function. + Function *getFunction() const { return Func; } + + /// Returns the offset on the stack at which the frame starts. + size_t getFrameOffset() const { return FrameOffset; } + + /// Returns the value of a local variable. + template const T &getLocal(unsigned Offset) { + return localRef(Offset); + } + + /// Mutates a local variable. + template void setLocal(unsigned Offset, const T &Value) { + localRef(Offset) = Value; + } + + /// Returns a pointer to a local variables. + Pointer getLocalPointer(unsigned Offset); + + /// Returns the value of an argument. + template const T &getParam(unsigned Offset) { + auto Pt = Params.find(Offset); + if (Pt == Params.end()) { + return stackRef(Offset); + } else { + return Pointer(reinterpret_cast(Pt->second.get())).deref(); + } + } + + /// Mutates a local copy of a parameter. + template void setParam(unsigned Offset, const T &Value) { + getParamPointer(Offset).deref() = Value; + } + + /// Returns a pointer to an argument - lazily creates a block. + Pointer getParamPointer(unsigned Offset); + + /// Returns the 'this' pointer. + const Pointer &getThis() const { return This; } + + /// Checks if the frame is a root frame - return should quit the interpreter. + bool isRoot() const { return !Func; } + + /// Returns the PC of the frame's code start. + CodePtr getPC() const { return Func->getCodeBegin(); } + + /// Returns the return address of the frame. + CodePtr getRetPC() const { return RetPC; } + + /// Map a location to a source. + virtual SourceInfo getSource(CodePtr PC) const; + const Expr *getExpr(CodePtr PC) const; + SourceLocation getLocation(CodePtr PC) const; + +private: + /// Returns an original argument from the stack. + template const T &stackRef(unsigned Offset) { + return *reinterpret_cast(Args - ArgSize + Offset); + } + + /// Returns an offset to a local. + template T &localRef(unsigned Offset) { + return *reinterpret_cast(Locals.get() + Offset); + } + + /// Returns a pointer to a local's block. + void *localBlock(unsigned Offset) { + return Locals.get() + Offset - sizeof(Block); + } + +private: + /// Reference to the interpreter state. + InterpState &S; + /// Reference to the function being executed. + Function *Func; + /// Current object pointer for methods. + Pointer This; + /// Return address. + CodePtr RetPC; + /// The size of all the arguments. + const unsigned ArgSize; + /// Pointer to the arguments in the callee's frame. + char *Args = nullptr; + /// Fixed, initial storage for known local variables. + std::unique_ptr Locals; + /// Offset on the stack at entry. + const size_t FrameOffset; + /// Mapping from arg offsets to their argument blocks. + llvm::DenseMap> Params; +}; + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/InterpStack.cpp b/lib/AST/Interp/InterpStack.cpp new file mode 100644 index 00000000000..5c803f3d942 --- /dev/null +++ b/lib/AST/Interp/InterpStack.cpp @@ -0,0 +1,78 @@ +//===--- InterpStack.cpp - Stack implementation for the VM ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include "InterpStack.h" + +using namespace clang; +using namespace clang::interp; + +InterpStack::~InterpStack() { + clear(); +} + +void InterpStack::clear() { + if (Chunk && Chunk->Next) + free(Chunk->Next); + if (Chunk) + free(Chunk); + Chunk = nullptr; + StackSize = 0; +} + +void *InterpStack::grow(size_t Size) { + assert(Size < ChunkSize - sizeof(StackChunk) && "Object too large"); + + if (!Chunk || sizeof(StackChunk) + Chunk->size() + Size > ChunkSize) { + if (Chunk && Chunk->Next) { + Chunk = Chunk->Next; + } else { + StackChunk *Next = new (malloc(ChunkSize)) StackChunk(Chunk); + if (Chunk) + Chunk->Next = Next; + Chunk = Next; + } + } + + auto *Object = reinterpret_cast(Chunk->End); + Chunk->End += Size; + StackSize += Size; + return Object; +} + +void *InterpStack::peek(size_t Size) { + assert(Chunk && "Stack is empty!"); + + StackChunk *Ptr = Chunk; + while (Size > Ptr->size()) { + Size -= Ptr->size(); + Ptr = Ptr->Prev; + assert(Ptr && "Offset too large"); + } + + return reinterpret_cast(Ptr->End - Size); +} + +void InterpStack::shrink(size_t Size) { + assert(Chunk && "Chunk is empty!"); + + while (Size > Chunk->size()) { + Size -= Chunk->size(); + if (Chunk->Next) { + free(Chunk->Next); + Chunk->Next = nullptr; + } + Chunk->End = Chunk->start(); + Chunk = Chunk->Prev; + assert(Chunk && "Offset too large"); + } + + Chunk->End -= Size; + StackSize -= Size; +} diff --git a/lib/AST/Interp/InterpStack.h b/lib/AST/Interp/InterpStack.h new file mode 100644 index 00000000000..127adb6b8eb --- /dev/null +++ b/lib/AST/Interp/InterpStack.h @@ -0,0 +1,113 @@ +//===--- InterpStack.h - Stack implementation for the VM --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the upwards-growing stack used by the interpreter. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_INTERPSTACK_H +#define LLVM_CLANG_AST_INTERP_INTERPSTACK_H + +#include + +namespace clang { +namespace interp { + +/// Stack frame storing temporaries and parameters. +class InterpStack final { +public: + InterpStack() {} + + /// Destroys the stack, freeing up storage. + ~InterpStack(); + + /// Constructs a value in place on the top of the stack. + template void push(Tys &&... Args) { + new (grow(aligned_size())) T(std::forward(Args)...); + } + + /// Returns the value from the top of the stack and removes it. + template T pop() { + auto *Ptr = &peek(); + auto Value = std::move(*Ptr); + Ptr->~T(); + shrink(aligned_size()); + return Value; + } + + /// Discards the top value from the stack. + template void discard() { + auto *Ptr = &peek(); + Ptr->~T(); + shrink(aligned_size()); + } + + /// Returns a reference to the value on the top of the stack. + template T &peek() { + return *reinterpret_cast(peek(aligned_size())); + } + + /// Returns a pointer to the top object. + void *top() { return Chunk ? peek(0) : nullptr; } + + /// Returns the size of the stack in bytes. + size_t size() const { return StackSize; } + + /// Clears the stack without calling any destructors. + void clear(); + +private: + /// All stack slots are aligned to the native pointer alignment for storage. + /// The size of an object is rounded up to a pointer alignment multiple. + template constexpr size_t aligned_size() const { + constexpr size_t PtrAlign = alignof(void *); + return ((sizeof(T) + PtrAlign - 1) / PtrAlign) * PtrAlign; + } + + /// Grows the stack to accomodate a value and returns a pointer to it. + void *grow(size_t Size); + /// Returns a pointer from the top of the stack. + void *peek(size_t Size); + /// Shrinks the stack. + void shrink(size_t Size); + + /// Allocate stack space in 1Mb chunks. + static constexpr size_t ChunkSize = 1024 * 1024; + + /// Metadata for each stack chunk. + /// + /// The stack is composed of a linked list of chunks. Whenever an allocation + /// is out of bounds, a new chunk is linked. When a chunk becomes empty, + /// it is not immediately freed: a chunk is deallocated only when the + /// predecessor becomes empty. + struct StackChunk { + StackChunk *Next; + StackChunk *Prev; + char *End; + + StackChunk(StackChunk *Prev = nullptr) + : Next(nullptr), Prev(Prev), End(reinterpret_cast(this + 1)) {} + + /// Returns the size of the chunk, minus the header. + size_t size() { return End - start(); } + + /// Returns a pointer to the start of the data region. + char *start() { return reinterpret_cast(this + 1); } + }; + static_assert(sizeof(StackChunk) < ChunkSize, "Invalid chunk size"); + + /// First chunk on the stack. + StackChunk *Chunk = nullptr; + /// Total size of the stack. + size_t StackSize = 0; +}; + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/InterpState.cpp b/lib/AST/Interp/InterpState.cpp new file mode 100644 index 00000000000..25684f3c093 --- /dev/null +++ b/lib/AST/Interp/InterpState.cpp @@ -0,0 +1,74 @@ +//===--- InterpState.cpp - Interpreter for the constexpr VM -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "InterpState.h" +#include +#include "Function.h" +#include "InterpFrame.h" +#include "InterpStack.h" +#include "Opcode.h" +#include "PrimType.h" +#include "Program.h" +#include "State.h" + +using namespace clang; +using namespace clang::interp; + +using APSInt = llvm::APSInt; + +InterpState::InterpState(State &Parent, Program &P, InterpStack &Stk, + Context &Ctx, SourceMapper *M) + : Parent(Parent), M(M), P(P), Stk(Stk), Ctx(Ctx), Current(nullptr), + CallStackDepth(Parent.getCallStackDepth() + 1) {} + +InterpState::~InterpState() { + while (Current) { + InterpFrame *Next = Current->Caller; + delete Current; + Current = Next; + } + + while (DeadBlocks) { + DeadBlock *Next = DeadBlocks->Next; + free(DeadBlocks); + DeadBlocks = Next; + } +} + +Frame *InterpState::getCurrentFrame() { + if (Current && Current->Caller) { + return Current; + } else { + return Parent.getCurrentFrame(); + } +} + +bool InterpState::reportOverflow(const Expr *E, const llvm::APSInt &Value) { + QualType Type = E->getType(); + CCEDiag(E, diag::note_constexpr_overflow) << Value << Type; + return noteUndefinedBehavior(); +} + +void InterpState::deallocate(Block *B) { + Descriptor *Desc = B->getDescriptor(); + if (B->hasPointers()) { + size_t Size = B->getSize(); + + // Allocate a new block, transferring over pointers. + char *Memory = reinterpret_cast(malloc(sizeof(DeadBlock) + Size)); + auto *D = new (Memory) DeadBlock(DeadBlocks, B); + + // Move data from one block to another. + if (Desc->MoveFn) + Desc->MoveFn(B, B->data(), D->data(), Desc); + } else { + // Free storage, if necessary. + if (Desc->DtorFn) + Desc->DtorFn(B, B->data(), Desc); + } +} diff --git a/lib/AST/Interp/InterpState.h b/lib/AST/Interp/InterpState.h new file mode 100644 index 00000000000..c2209bbcbb9 --- /dev/null +++ b/lib/AST/Interp/InterpState.h @@ -0,0 +1,112 @@ +//===--- InterpState.h - Interpreter state for the constexpr VM -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Definition of the interpreter state and entry point. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_INTERPSTATE_H +#define LLVM_CLANG_AST_INTERP_INTERPSTATE_H + +#include "Context.h" +#include "Function.h" +#include "InterpStack.h" +#include "State.h" +#include "clang/AST/APValue.h" +#include "clang/AST/ASTDiagnostic.h" +#include "clang/AST/Expr.h" +#include "clang/AST/OptionalDiagnostic.h" + +namespace clang { +namespace interp { +class Context; +class Function; +class InterpStack; +class InterpFrame; +class SourceMapper; + +/// Interpreter context. +class InterpState final : public State, public SourceMapper { +public: + InterpState(State &Parent, Program &P, InterpStack &Stk, Context &Ctx, + SourceMapper *M = nullptr); + + ~InterpState(); + + // Stack frame accessors. + Frame *getSplitFrame() { return Parent.getCurrentFrame(); } + Frame *getCurrentFrame() override; + unsigned getCallStackDepth() override { return CallStackDepth; } + const Frame *getBottomFrame() const override { + return Parent.getBottomFrame(); + } + + // Acces objects from the walker context. + Expr::EvalStatus &getEvalStatus() const override { + return Parent.getEvalStatus(); + } + ASTContext &getCtx() const override { return Parent.getCtx(); } + + // Forward status checks and updates to the walker. + bool checkingForUndefinedBehavior() const override { + return Parent.checkingForUndefinedBehavior(); + } + bool keepEvaluatingAfterFailure() const override { + return Parent.keepEvaluatingAfterFailure(); + } + bool checkingPotentialConstantExpression() const override { + return Parent.checkingPotentialConstantExpression(); + } + bool noteUndefinedBehavior() override { + return Parent.noteUndefinedBehavior(); + } + bool hasActiveDiagnostic() override { return Parent.hasActiveDiagnostic(); } + void setActiveDiagnostic(bool Flag) override { + Parent.setActiveDiagnostic(Flag); + } + void setFoldFailureDiagnostic(bool Flag) override { + Parent.setFoldFailureDiagnostic(Flag); + } + bool hasPriorDiagnostic() override { return Parent.hasPriorDiagnostic(); } + + /// Reports overflow and return true if evaluation should continue. + bool reportOverflow(const Expr *E, const llvm::APSInt &Value); + + /// Deallocates a pointer. + void deallocate(Block *B); + + /// Delegates source mapping to the mapper. + SourceInfo getSource(Function *F, CodePtr PC) const override { + return M ? M->getSource(F, PC) : F->getSource(PC); + } + +private: + /// AST Walker state. + State &Parent; + /// Dead block chain. + DeadBlock *DeadBlocks = nullptr; + /// Reference to the offset-source mapping. + SourceMapper *M; + +public: + /// Reference to the module containing all bytecode. + Program &P; + /// Temporary stack. + InterpStack &Stk; + /// Interpreter Context. + Context &Ctx; + /// The current frame. + InterpFrame *Current = nullptr; + /// Call stack depth. + unsigned CallStackDepth; +}; + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/Opcode.h b/lib/AST/Interp/Opcode.h new file mode 100644 index 00000000000..d2daa1ea52a --- /dev/null +++ b/lib/AST/Interp/Opcode.h @@ -0,0 +1,30 @@ +//===--- Opcode.h - Opcodes for the constexpr VM ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines all opcodes executed by the VM and emitted by the compiler. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_OPCODE_H +#define LLVM_CLANG_AST_INTERP_OPCODE_H + +#include + +namespace clang { +namespace interp { + +enum Opcode : uint32_t { +#define GET_OPCODE_NAMES +#include "Opcodes.inc" +#undef GET_OPCODE_NAMES +}; + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/Opcodes.td b/lib/AST/Interp/Opcodes.td new file mode 100644 index 00000000000..4aba5f5cd83 --- /dev/null +++ b/lib/AST/Interp/Opcodes.td @@ -0,0 +1,422 @@ +//===--- Opcodes.td - Opcode defitions for the constexpr VM -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Helper file used to generate opcodes, the interpreter and the disassembler. +// +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// Types evaluated by the interpreter. +//===----------------------------------------------------------------------===// + +class Type; +def Bool : Type; +def Sint8 : Type; +def Uint8 : Type; +def Sint16 : Type; +def Uint16 : Type; +def Sint32 : Type; +def Uint32 : Type; +def Sint64 : Type; +def Uint64 : Type; +def Ptr : Type; + +//===----------------------------------------------------------------------===// +// Types transferred to the interpreter. +//===----------------------------------------------------------------------===// + +class ArgType { string Name = ?; } +def ArgSint8 : ArgType { let Name = "int8_t"; } +def ArgUint8 : ArgType { let Name = "uint8_t"; } +def ArgSint16 : ArgType { let Name = "int16_t"; } +def ArgUint16 : ArgType { let Name = "uint16_t"; } +def ArgSint32 : ArgType { let Name = "int32_t"; } +def ArgUint32 : ArgType { let Name = "uint32_t"; } +def ArgSint64 : ArgType { let Name = "int64_t"; } +def ArgUint64 : ArgType { let Name = "uint64_t"; } +def ArgBool : ArgType { let Name = "bool"; } + +def ArgFunction : ArgType { let Name = "Function *"; } +def ArgRecord : ArgType { let Name = "Record *"; } + +def ArgSema : ArgType { let Name = "const fltSemantics *"; } + +def ArgExpr : ArgType { let Name = "const Expr *"; } +def ArgFloatingLiteral : ArgType { let Name = "const FloatingLiteral *"; } +def ArgCXXMethodDecl : ArgType { let Name = "const CXXMethodDecl *"; } +def ArgFunctionDecl : ArgType { let Name = "const FunctionDecl *"; } +def ArgRecordDecl : ArgType { let Name = "const RecordDecl *"; } +def ArgCXXRecordDecl : ArgType { let Name = "const CXXRecordDecl *"; } +def ArgValueDecl : ArgType { let Name = "const ValueDecl *"; } +def ArgRecordField : ArgType { let Name = "const Record::Field *"; } + +//===----------------------------------------------------------------------===// +// Classes of types intructions operate on. +//===----------------------------------------------------------------------===// + +class TypeClass { + list Types; +} + +def AluTypeClass : TypeClass { + let Types = [Sint8, Uint8, Sint16, Uint16, Sint32, + Uint32, Sint64, Uint64, Bool]; +} + +def PtrTypeClass : TypeClass { + let Types = [Ptr]; +} + +def AllTypeClass : TypeClass { + let Types = !listconcat(AluTypeClass.Types, PtrTypeClass.Types); +} + +def ComparableTypeClass : TypeClass { + let Types = !listconcat(AluTypeClass.Types, [Ptr]); +} + +class SingletonTypeClass : TypeClass { + let Types = [Ty]; +} + +//===----------------------------------------------------------------------===// +// Record describing all opcodes. +//===----------------------------------------------------------------------===// + +class Opcode { + list Types = []; + list Args = []; + string Name = ""; + bit CanReturn = 0; + bit ChangesPC = 0; + bit HasCustomLink = 0; + bit HasCustomEval = 0; + bit HasGroup = 0; +} + +class AluOpcode : Opcode { + let Types = [AluTypeClass]; + let HasGroup = 1; +} + +//===----------------------------------------------------------------------===// +// Jump opcodes +//===----------------------------------------------------------------------===// + +class JumpOpcode : Opcode { + let Args = [ArgSint32]; + let ChangesPC = 1; + let HasCustomEval = 1; +} + +// [] -> [] +def Jmp : JumpOpcode; +// [Bool] -> [], jumps if true. +def Jt : JumpOpcode; +// [Bool] -> [], jumps if false. +def Jf : JumpOpcode; + +//===----------------------------------------------------------------------===// +// Returns +//===----------------------------------------------------------------------===// + +// [Value] -> [] +def Ret : Opcode { + let Types = [AllTypeClass]; + let ChangesPC = 1; + let CanReturn = 1; + let HasGroup = 1; + let HasCustomEval = 1; +} +// [] -> [] +def RetVoid : Opcode { + let CanReturn = 1; + let ChangesPC = 1; + let HasCustomEval = 1; +} +// [Value] -> [] +def RetValue : Opcode { + let CanReturn = 1; + let ChangesPC = 1; + let HasCustomEval = 1; +} +// [] -> EXIT +def NoRet : Opcode {} + +//===----------------------------------------------------------------------===// +// Frame management +//===----------------------------------------------------------------------===// + +// [] -> [] +def Destroy : Opcode { + let Args = [ArgUint32]; + let HasCustomEval = 1; +} + +//===----------------------------------------------------------------------===// +// Constants +//===----------------------------------------------------------------------===// + +class ConstOpcode : Opcode { + let Types = [SingletonTypeClass]; + let Args = [ArgTy]; + let Name = "Const"; +} + +// [] -> [Integer] +def ConstSint8 : ConstOpcode; +def ConstUint8 : ConstOpcode; +def ConstSint16 : ConstOpcode; +def ConstUint16 : ConstOpcode; +def ConstSint32 : ConstOpcode; +def ConstUint32 : ConstOpcode; +def ConstSint64 : ConstOpcode; +def ConstUint64 : ConstOpcode; +def ConstBool : ConstOpcode; + +// [] -> [Integer] +def Zero : Opcode { + let Types = [AluTypeClass]; +} + +// [] -> [Pointer] +def Null : Opcode { + let Types = [PtrTypeClass]; +} + +//===----------------------------------------------------------------------===// +// Pointer generation +//===----------------------------------------------------------------------===// + +// [] -> [Pointer] +def GetPtrLocal : Opcode { + // Offset of local. + let Args = [ArgUint32]; + bit HasCustomEval = 1; +} +// [] -> [Pointer] +def GetPtrParam : Opcode { + // Offset of parameter. + let Args = [ArgUint32]; +} +// [] -> [Pointer] +def GetPtrGlobal : Opcode { + // Index of global. + let Args = [ArgUint32]; +} +// [Pointer] -> [Pointer] +def GetPtrField : Opcode { + // Offset of field. + let Args = [ArgUint32]; +} +// [Pointer] -> [Pointer] +def GetPtrActiveField : Opcode { + // Offset of field. + let Args = [ArgUint32]; +} +// [] -> [Pointer] +def GetPtrActiveThisField : Opcode { + // Offset of field. + let Args = [ArgUint32]; +} +// [] -> [Pointer] +def GetPtrThisField : Opcode { + // Offset of field. + let Args = [ArgUint32]; +} +// [Pointer] -> [Pointer] +def GetPtrBase : Opcode { + // Offset of field, which is a base. + let Args = [ArgUint32]; +} +// [Pointer] -> [Pointer] +def GetPtrVirtBase : Opcode { + // RecordDecl of base class. + let Args = [ArgRecordDecl]; +} +// [] -> [Pointer] +def GetPtrThisBase : Opcode { + // Offset of field, which is a base. + let Args = [ArgUint32]; +} +// [] -> [Pointer] +def GetPtrThisVirtBase : Opcode { + // RecordDecl of base class. + let Args = [ArgRecordDecl]; +} +// [] -> [Pointer] +def This : Opcode; + +// [Pointer] -> [Pointer] +def NarrowPtr : Opcode; +// [Pointer] -> [Pointer] +def ExpandPtr : Opcode; + +//===----------------------------------------------------------------------===// +// Direct field accessors +//===----------------------------------------------------------------------===// + +class AccessOpcode : Opcode { + let Types = [AllTypeClass]; + let Args = [ArgUint32]; + let HasGroup = 1; +} + +class BitFieldOpcode : Opcode { + let Types = [AluTypeClass]; + let Args = [ArgRecordField]; + let HasGroup = 1; +} + +// [] -> [Pointer] +def GetLocal : AccessOpcode { let HasCustomEval = 1; } +// [] -> [Pointer] +def SetLocal : AccessOpcode { let HasCustomEval = 1; } + +// [] -> [Value] +def GetGlobal : AccessOpcode; +// [Value] -> [] +def InitGlobal : AccessOpcode; +// [Value] -> [] +def SetGlobal : AccessOpcode; + +// [] -> [Value] +def GetParam : AccessOpcode; +// [Value] -> [] +def SetParam : AccessOpcode; + +// [Pointer] -> [Pointer, Value] +def GetField : AccessOpcode; +// [Pointer] -> [Value] +def GetFieldPop : AccessOpcode; +// [] -> [Value] +def GetThisField : AccessOpcode; + +// [Pointer, Value] -> [Pointer] +def SetField : AccessOpcode; +// [Value] -> [] +def SetThisField : AccessOpcode; + +// [Value] -> [] +def InitThisField : AccessOpcode; +// [Value] -> [] +def InitThisFieldActive : AccessOpcode; +// [Value] -> [] +def InitThisBitField : BitFieldOpcode; +// [Pointer, Value] -> [] +def InitField : AccessOpcode; +// [Pointer, Value] -> [] +def InitBitField : BitFieldOpcode; +// [Pointer, Value] -> [] +def InitFieldActive : AccessOpcode; + +//===----------------------------------------------------------------------===// +// Pointer access +//===----------------------------------------------------------------------===// + +class LoadOpcode : Opcode { + let Types = [AllTypeClass]; + let HasGroup = 1; +} + +// [Pointer] -> [Pointer, Value] +def Load : LoadOpcode {} +// [Pointer] -> [Value] +def LoadPop : LoadOpcode {} + +class StoreOpcode : Opcode { + let Types = [AllTypeClass]; + let HasGroup = 1; +} + +class StoreBitFieldOpcode : Opcode { + let Types = [AluTypeClass]; + let HasGroup = 1; +} + +// [Pointer, Value] -> [Pointer] +def Store : StoreOpcode {} +// [Pointer, Value] -> [] +def StorePop : StoreOpcode {} + +// [Pointer, Value] -> [Pointer] +def StoreBitField : StoreBitFieldOpcode {} +// [Pointer, Value] -> [] +def StoreBitFieldPop : StoreBitFieldOpcode {} + +// [Pointer, Value] -> [] +def InitPop : StoreOpcode {} +// [Pointer, Value] -> [Pointer] +def InitElem : Opcode { + let Types = [AllTypeClass]; + let Args = [ArgUint32]; + let HasGroup = 1; +} +// [Pointer, Value] -> [] +def InitElemPop : Opcode { + let Types = [AllTypeClass]; + let Args = [ArgUint32]; + let HasGroup = 1; +} + +//===----------------------------------------------------------------------===// +// Pointer arithmetic. +//===----------------------------------------------------------------------===// + +// [Pointer, Integral] -> [Pointer] +def AddOffset : AluOpcode; +// [Pointer, Integral] -> [Pointer] +def SubOffset : AluOpcode; + +//===----------------------------------------------------------------------===// +// Binary operators. +//===----------------------------------------------------------------------===// + +// [Real, Real] -> [Real] +def Sub : AluOpcode; +def Add : AluOpcode; +def Mul : AluOpcode; + +//===----------------------------------------------------------------------===// +// Comparison opcodes. +//===----------------------------------------------------------------------===// + +class EqualityOpcode : Opcode { + let Types = [AllTypeClass]; + let HasGroup = 1; +} + +def EQ : EqualityOpcode; +def NE : EqualityOpcode; + +class ComparisonOpcode : Opcode { + let Types = [ComparableTypeClass]; + let HasGroup = 1; +} + +def LT : ComparisonOpcode; +def LE : ComparisonOpcode; +def GT : ComparisonOpcode; +def GE : ComparisonOpcode; + +//===----------------------------------------------------------------------===// +// Stack management. +//===----------------------------------------------------------------------===// + +// [Value] -> [] +def Pop : Opcode { + let Types = [AllTypeClass]; + let HasGroup = 1; +} + +// [Value] -> [Value, Value] +def Dup : Opcode { + let Types = [AllTypeClass]; + let HasGroup = 1; +} diff --git a/lib/AST/Interp/Pointer.cpp b/lib/AST/Interp/Pointer.cpp new file mode 100644 index 00000000000..1a10723aaca --- /dev/null +++ b/lib/AST/Interp/Pointer.cpp @@ -0,0 +1,193 @@ +//===--- Pointer.cpp - Types for the constexpr VM ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Pointer.h" +#include "Block.h" +#include "Function.h" +#include "PrimType.h" + +using namespace clang; +using namespace clang::interp; + +Pointer::Pointer(Block *Pointee) : Pointer(Pointee, 0, 0) {} + +Pointer::Pointer(const Pointer &P) : Pointer(P.Pointee, P.Base, P.Offset) {} + +Pointer::Pointer(Pointer &&P) + : Pointee(P.Pointee), Base(P.Base), Offset(P.Offset) { + if (Pointee) + Pointee->movePointer(&P, this); +} + +Pointer::Pointer(Block *Pointee, unsigned Base, unsigned Offset) + : Pointee(Pointee), Base(Base), Offset(Offset) { + assert((Base == RootPtrMark || Base % alignof(void *) == 0) && "wrong base"); + if (Pointee) + Pointee->addPointer(this); +} + +Pointer::~Pointer() { + if (Pointee) { + Pointee->removePointer(this); + Pointee->cleanup(); + } +} + +void Pointer::operator=(const Pointer &P) { + Block *Old = Pointee; + + if (Pointee) + Pointee->removePointer(this); + + Offset = P.Offset; + Base = P.Base; + + Pointee = P.Pointee; + if (Pointee) + Pointee->addPointer(this); + + if (Old) + Old->cleanup(); +} + +void Pointer::operator=(Pointer &&P) { + Block *Old = Pointee; + + if (Pointee) + Pointee->removePointer(this); + + Offset = P.Offset; + Base = P.Base; + + Pointee = P.Pointee; + if (Pointee) + Pointee->movePointer(&P, this); + + if (Old) + Old->cleanup(); +} + +APValue Pointer::toAPValue() const { + APValue::LValueBase Base; + llvm::SmallVector Path; + CharUnits Offset; + bool IsNullPtr; + bool IsOnePastEnd; + + if (isZero()) { + Base = static_cast(nullptr); + IsNullPtr = true; + IsOnePastEnd = false; + Offset = CharUnits::Zero(); + } else { + // Build the lvalue base from the block. + Descriptor *Desc = getDeclDesc(); + if (auto *VD = Desc->asValueDecl()) + Base = VD; + else if (auto *E = Desc->asExpr()) + Base = E; + else + llvm_unreachable("Invalid allocation type"); + + // Not a null pointer. + IsNullPtr = false; + + if (isUnknownSizeArray()) { + IsOnePastEnd = false; + Offset = CharUnits::Zero(); + } else { + // TODO: compute the offset into the object. + Offset = CharUnits::Zero(); + + // Build the path into the object. + Pointer Ptr = *this; + while (Ptr.isField()) { + if (Ptr.isArrayElement()) { + Path.push_back(APValue::LValuePathEntry::ArrayIndex(Ptr.getIndex())); + Ptr = Ptr.getArray(); + } else { + // TODO: figure out if base is virtual + bool IsVirtual = false; + + // Create a path entry for the field. + Descriptor *Desc = Ptr.getFieldDesc(); + if (auto *BaseOrMember = Desc->asDecl()) { + Path.push_back(APValue::LValuePathEntry({BaseOrMember, IsVirtual})); + Ptr = Ptr.getBase(); + continue; + } + llvm_unreachable("Invalid field type"); + } + } + + IsOnePastEnd = isOnePastEnd(); + } + } + + return APValue(Base, Offset, Path, IsOnePastEnd, IsNullPtr); +} + +bool Pointer::isInitialized() const { + assert(Pointee && "Cannot check if null pointer was initialized"); + Descriptor *Desc = getFieldDesc(); + if (Desc->isPrimitiveArray()) { + if (Pointee->IsStatic) + return true; + // Primitive array field are stored in a bitset. + InitMap *Map = getInitMap(); + if (!Map) + return false; + if (Map == (InitMap *)-1) + return true; + return Map->isInitialized(getIndex()); + } else { + // Field has its bit in an inline descriptor. + return Base == 0 || getInlineDesc()->IsInitialized; + } +} + +void Pointer::initialize() const { + assert(Pointee && "Cannot initialize null pointer"); + Descriptor *Desc = getFieldDesc(); + if (Desc->isPrimitiveArray()) { + if (!Pointee->IsStatic) { + // Primitive array initializer. + InitMap *&Map = getInitMap(); + if (Map == (InitMap *)-1) + return; + if (Map == nullptr) + Map = InitMap::allocate(Desc->getNumElems()); + if (Map->initialize(getIndex())) { + free(Map); + Map = (InitMap *)-1; + } + } + } else { + // Field has its bit in an inline descriptor. + assert(Base != 0 && "Only composite fields can be initialised"); + getInlineDesc()->IsInitialized = true; + } +} + +void Pointer::activate() const { + // Field has its bit in an inline descriptor. + assert(Base != 0 && "Only composite fields can be initialised"); + getInlineDesc()->IsActive = true; +} + +void Pointer::deactivate() const { + // TODO: this only appears in constructors, so nothing to deactivate. +} + +bool Pointer::hasSameBase(const Pointer &A, const Pointer &B) { + return A.Pointee == B.Pointee; +} + +bool Pointer::hasSameArray(const Pointer &A, const Pointer &B) { + return A.Base == B.Base && A.getFieldDesc()->IsArray; +} diff --git a/lib/AST/Interp/Pointer.h b/lib/AST/Interp/Pointer.h new file mode 100644 index 00000000000..b8fa98e24fa --- /dev/null +++ b/lib/AST/Interp/Pointer.h @@ -0,0 +1,353 @@ +//===--- Pointer.h - Types for the constexpr VM -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the classes responsible for pointer tracking. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_POINTER_H +#define LLVM_CLANG_AST_INTERP_POINTER_H + +#include "Block.h" +#include "Descriptor.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ComparisonCategories.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang { +namespace interp { +class Block; +class DeadBlock; +class Context; +class InterpState; +class Pointer; +class Function; +enum PrimType : unsigned; + +/// A pointer to a memory block, live or dead. +/// +/// This object can be allocated into interpreter stack frames. If pointing to +/// a live block, it is a link in the chain of pointers pointing to the block. +class Pointer { +private: + static constexpr unsigned PastEndMark = (unsigned)-1; + static constexpr unsigned RootPtrMark = (unsigned)-1; + +public: + Pointer() {} + Pointer(Block *B); + Pointer(const Pointer &P); + Pointer(Pointer &&P); + ~Pointer(); + + void operator=(const Pointer &P); + void operator=(Pointer &&P); + + /// Converts the pointer to an APValue. + APValue toAPValue() const; + + /// Offsets a pointer inside an array. + Pointer atIndex(unsigned Idx) const { + if (Base == RootPtrMark) + return Pointer(Pointee, RootPtrMark, getDeclDesc()->getSize()); + unsigned Off = Idx * elemSize(); + if (getFieldDesc()->ElemDesc) + Off += sizeof(InlineDescriptor); + else + Off += sizeof(InitMap *); + return Pointer(Pointee, Base, Base + Off); + } + + /// Creates a pointer to a field. + Pointer atField(unsigned Off) const { + unsigned Field = Offset + Off; + return Pointer(Pointee, Field, Field); + } + + /// Restricts the scope of an array element pointer. + Pointer narrow() const { + // Null pointers cannot be narrowed. + if (isZero() || isUnknownSizeArray()) + return *this; + + // Pointer to an array of base types - enter block. + if (Base == RootPtrMark) + return Pointer(Pointee, 0, Offset == 0 ? Offset : PastEndMark); + + // Pointer is one past end - magic offset marks that. + if (isOnePastEnd()) + return Pointer(Pointee, Base, PastEndMark); + + // Primitive arrays are a bit special since they do not have inline + // descriptors. If Offset != Base, then the pointer already points to + // an element and there is nothing to do. Otherwise, the pointer is + // adjusted to the first element of the array. + if (inPrimitiveArray()) { + if (Offset != Base) + return *this; + return Pointer(Pointee, Base, Offset + sizeof(InitMap *)); + } + + // Pointer is to a field or array element - enter it. + if (Offset != Base) + return Pointer(Pointee, Offset, Offset); + + // Enter the first element of an array. + if (!getFieldDesc()->isArray()) + return *this; + + const unsigned NewBase = Base + sizeof(InlineDescriptor); + return Pointer(Pointee, NewBase, NewBase); + } + + /// Expands a pointer to the containing array, undoing narrowing. + Pointer expand() const { + if (isElementPastEnd()) { + // Revert to an outer one-past-end pointer. + unsigned Adjust; + if (inPrimitiveArray()) + Adjust = sizeof(InitMap *); + else + Adjust = sizeof(InlineDescriptor); + return Pointer(Pointee, Base, Base + getSize() + Adjust); + } + + // Do not step out of array elements. + if (Base != Offset) + return *this; + + // If at base, point to an array of base types. + if (Base == 0) + return Pointer(Pointee, RootPtrMark, 0); + + // Step into the containing array, if inside one. + unsigned Next = Base - getInlineDesc()->Offset; + Descriptor *Desc = Next == 0 ? getDeclDesc() : getDescriptor(Next)->Desc; + if (!Desc->IsArray) + return *this; + return Pointer(Pointee, Next, Offset); + } + + /// Checks if the pointer is null. + bool isZero() const { return Pointee == nullptr; } + /// Checks if the pointer is live. + bool isLive() const { return Pointee && !Pointee->IsDead; } + /// Checks if the item is a field in an object. + bool isField() const { return Base != 0 && Base != RootPtrMark; } + + /// Accessor for information about the declaration site. + Descriptor *getDeclDesc() const { return Pointee->Desc; } + SourceLocation getDeclLoc() const { return getDeclDesc()->getLocation(); } + + /// Returns a pointer to the object of which this pointer is a field. + Pointer getBase() const { + if (Base == RootPtrMark) { + assert(Offset == PastEndMark && "cannot get base of a block"); + return Pointer(Pointee, Base, 0); + } + assert(Offset == Base && "not an inner field"); + unsigned NewBase = Base - getInlineDesc()->Offset; + return Pointer(Pointee, NewBase, NewBase); + } + /// Returns the parent array. + Pointer getArray() const { + if (Base == RootPtrMark) { + assert(Offset != 0 && Offset != PastEndMark && "not an array element"); + return Pointer(Pointee, Base, 0); + } + assert(Offset != Base && "not an array element"); + return Pointer(Pointee, Base, Base); + } + + /// Accessors for information about the innermost field. + Descriptor *getFieldDesc() const { + if (Base == 0 || Base == RootPtrMark) + return getDeclDesc(); + return getInlineDesc()->Desc; + } + + /// Returns the type of the innermost field. + QualType getType() const { return getFieldDesc()->getType(); } + + /// Returns the element size of the innermost field. + size_t elemSize() const { + if (Base == RootPtrMark) + return getDeclDesc()->getSize(); + return getFieldDesc()->getElemSize(); + } + /// Returns the total size of the innermost field. + size_t getSize() const { return getFieldDesc()->getSize(); } + + /// Returns the offset into an array. + unsigned getOffset() const { + assert(Offset != PastEndMark && "invalid offset"); + if (Base == RootPtrMark) + return Offset; + + unsigned Adjust = 0; + if (Offset != Base) { + if (getFieldDesc()->ElemDesc) + Adjust = sizeof(InlineDescriptor); + else + Adjust = sizeof(InitMap *); + } + return Offset - Base - Adjust; + } + + /// Checks if the innermost field is an array. + bool inArray() const { return getFieldDesc()->IsArray; } + /// Checks if the structure is a primitive array. + bool inPrimitiveArray() const { return getFieldDesc()->isPrimitiveArray(); } + /// Checks if the structure is an array of unknown size. + bool isUnknownSizeArray() const { + return getFieldDesc()->isUnknownSizeArray(); + } + /// Checks if the pointer points to an array. + bool isArrayElement() const { return Base != Offset; } + /// Pointer points directly to a block. + bool isRoot() const { + return (Base == 0 || Base == RootPtrMark) && Offset == 0; + } + + /// Returns the record descriptor of a class. + Record *getRecord() const { return getFieldDesc()->ElemRecord; } + /// Returns the field information. + const FieldDecl *getField() const { return getFieldDesc()->asFieldDecl(); } + + /// Checks if the object is a union. + bool isUnion() const; + + /// Checks if the storage is extern. + bool isExtern() const { return Pointee->isExtern(); } + /// Checks if the storage is static. + bool isStatic() const { return Pointee->isStatic(); } + /// Checks if the storage is temporary. + bool isTemporary() const { return Pointee->isTemporary(); } + /// Checks if the storage is a static temporary. + bool isStaticTemporary() const { return isStatic() && isTemporary(); } + + /// Checks if the field is mutable. + bool isMutable() const { return Base != 0 && getInlineDesc()->IsMutable; } + /// Checks if an object was initialized. + bool isInitialized() const; + /// Checks if the object is active. + bool isActive() const { return Base == 0 || getInlineDesc()->IsActive; } + /// Checks if a structure is a base class. + bool isBaseClass() const { return isField() && getInlineDesc()->IsBase; } + + /// Checks if an object or a subfield is mutable. + bool isConst() const { + return Base == 0 ? getDeclDesc()->IsConst : getInlineDesc()->IsConst; + } + + /// Returns the declaration ID. + llvm::Optional getDeclID() const { return Pointee->getDeclID(); } + + /// Returns the byte offset from the start. + unsigned getByteOffset() const { + return Offset; + } + + /// Returns the number of elements. + unsigned getNumElems() const { return getSize() / elemSize(); } + + /// Returns the index into an array. + int64_t getIndex() const { + if (isElementPastEnd()) + return 1; + if (auto ElemSize = elemSize()) + return getOffset() / ElemSize; + return 0; + } + + /// Checks if the index is one past end. + bool isOnePastEnd() const { + return isElementPastEnd() || getSize() == getOffset(); + } + + /// Checks if the pointer is an out-of-bounds element pointer. + bool isElementPastEnd() const { return Offset == PastEndMark; } + + /// Dereferences the pointer, if it's live. + template T &deref() const { + assert(isLive() && "Invalid pointer"); + return *reinterpret_cast(Pointee->data() + Offset); + } + + /// Dereferences a primitive element. + template T &elem(unsigned I) const { + return reinterpret_cast(Pointee->data())[I]; + } + + /// Initializes a field. + void initialize() const; + /// Activats a field. + void activate() const; + /// Deactivates an entire strurcutre. + void deactivate() const; + + /// Checks if two pointers are comparable. + static bool hasSameBase(const Pointer &A, const Pointer &B); + /// Checks if two pointers can be subtracted. + static bool hasSameArray(const Pointer &A, const Pointer &B); + + /// Prints the pointer. + void print(llvm::raw_ostream &OS) const { + OS << "{" << Base << ", " << Offset << ", "; + if (Pointee) + OS << Pointee->getSize(); + else + OS << "nullptr"; + OS << "}"; + } + +private: + friend class Block; + friend class DeadBlock; + + Pointer(Block *Pointee, unsigned Base, unsigned Offset); + + /// Returns the embedded descriptor preceding a field. + InlineDescriptor *getInlineDesc() const { return getDescriptor(Base); } + + /// Returns a descriptor at a given offset. + InlineDescriptor *getDescriptor(unsigned Offset) const { + assert(Offset != 0 && "Not a nested pointer"); + return reinterpret_cast(Pointee->data() + Offset) - 1; + } + + /// Returns a reference to the pointer which stores the initialization map. + InitMap *&getInitMap() const { + return *reinterpret_cast(Pointee->data() + Base); + } + + /// The block the pointer is pointing to. + Block *Pointee = nullptr; + /// Start of the current subfield. + unsigned Base = 0; + /// Offset into the block. + unsigned Offset = 0; + + /// Previous link in the pointer chain. + Pointer *Prev = nullptr; + /// Next link in the pointer chain. + Pointer *Next = nullptr; +}; + +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Pointer &P) { + P.print(OS); + return OS; +} + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/PrimType.cpp b/lib/AST/Interp/PrimType.cpp new file mode 100644 index 00000000000..082bfaf3c20 --- /dev/null +++ b/lib/AST/Interp/PrimType.cpp @@ -0,0 +1,23 @@ +//===--- Type.cpp - Types for the constexpr VM ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PrimType.h" + +using namespace clang; +using namespace clang::interp; + +namespace clang { +namespace interp { + +size_t primSize(PrimType Type) { + TYPE_SWITCH(Type, return sizeof(T)); + llvm_unreachable("not a primitive type"); +} + +} // namespace interp +} // namespace clang diff --git a/lib/AST/Interp/PrimType.h b/lib/AST/Interp/PrimType.h new file mode 100644 index 00000000000..f5f4f8e5c32 --- /dev/null +++ b/lib/AST/Interp/PrimType.h @@ -0,0 +1,115 @@ +//===--- PrimType.h - Types for the constexpr VM --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the VM types and helpers operating on types. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_TYPE_H +#define LLVM_CLANG_AST_INTERP_TYPE_H + +#include +#include +#include +#include "Boolean.h" +#include "Integral.h" +#include "Pointer.h" + +namespace clang { +namespace interp { + +/// Enumeration of the primitive types of the VM. +enum PrimType : unsigned { + PT_Sint8, + PT_Uint8, + PT_Sint16, + PT_Uint16, + PT_Sint32, + PT_Uint32, + PT_Sint64, + PT_Uint64, + PT_Bool, + PT_Ptr, +}; + +/// Mapping from primitive types to their representation. +template struct PrimConv; +template <> struct PrimConv { using T = Integral<8, true>; }; +template <> struct PrimConv { using T = Integral<8, false>; }; +template <> struct PrimConv { using T = Integral<16, true>; }; +template <> struct PrimConv { using T = Integral<16, false>; }; +template <> struct PrimConv { using T = Integral<32, true>; }; +template <> struct PrimConv { using T = Integral<32, false>; }; +template <> struct PrimConv { using T = Integral<64, true>; }; +template <> struct PrimConv { using T = Integral<64, false>; }; +template <> struct PrimConv { using T = Boolean; }; +template <> struct PrimConv { using T = Pointer; }; + +/// Returns the size of a primitive type in bytes. +size_t primSize(PrimType Type); + +/// Aligns a size to the pointer alignment. +constexpr size_t align(size_t Size) { + return ((Size + alignof(void *) - 1) / alignof(void *)) * alignof(void *); +} + +inline bool isPrimitiveIntegral(PrimType Type) { + switch (Type) { + case PT_Bool: + case PT_Sint8: + case PT_Uint8: + case PT_Sint16: + case PT_Uint16: + case PT_Sint32: + case PT_Uint32: + case PT_Sint64: + case PT_Uint64: + return true; + default: + return false; + } +} + +} // namespace interp +} // namespace clang + +/// Helper macro to simplify type switches. +/// The macro implicitly exposes a type T in the scope of the inner block. +#define TYPE_SWITCH_CASE(Name, B) \ + case Name: { using T = PrimConv::T; do {B;} while(0); break; } +#define TYPE_SWITCH(Expr, B) \ + switch (Expr) { \ + TYPE_SWITCH_CASE(PT_Sint8, B) \ + TYPE_SWITCH_CASE(PT_Uint8, B) \ + TYPE_SWITCH_CASE(PT_Sint16, B) \ + TYPE_SWITCH_CASE(PT_Uint16, B) \ + TYPE_SWITCH_CASE(PT_Sint32, B) \ + TYPE_SWITCH_CASE(PT_Uint32, B) \ + TYPE_SWITCH_CASE(PT_Sint64, B) \ + TYPE_SWITCH_CASE(PT_Uint64, B) \ + TYPE_SWITCH_CASE(PT_Bool, B) \ + TYPE_SWITCH_CASE(PT_Ptr, B) \ + } +#define COMPOSITE_TYPE_SWITCH(Expr, B, D) \ + switch (Expr) { \ + TYPE_SWITCH_CASE(PT_Ptr, B) \ + default: do { D; } while(0); break; \ + } +#define INT_TYPE_SWITCH(Expr, B) \ + switch (Expr) { \ + TYPE_SWITCH_CASE(PT_Sint8, B) \ + TYPE_SWITCH_CASE(PT_Uint8, B) \ + TYPE_SWITCH_CASE(PT_Sint16, B) \ + TYPE_SWITCH_CASE(PT_Uint16, B) \ + TYPE_SWITCH_CASE(PT_Sint32, B) \ + TYPE_SWITCH_CASE(PT_Uint32, B) \ + TYPE_SWITCH_CASE(PT_Sint64, B) \ + TYPE_SWITCH_CASE(PT_Uint64, B) \ + default: llvm_unreachable("not an integer"); \ + } +#endif diff --git a/lib/AST/Interp/Program.cpp b/lib/AST/Interp/Program.cpp new file mode 100644 index 00000000000..fcbab0ea817 --- /dev/null +++ b/lib/AST/Interp/Program.cpp @@ -0,0 +1,364 @@ +//===--- Program.cpp - Bytecode for the constexpr VM ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Program.h" +#include "ByteCodeStmtGen.h" +#include "Context.h" +#include "Function.h" +#include "Opcode.h" +#include "PrimType.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" + +using namespace clang; +using namespace clang::interp; + +unsigned Program::createGlobalString(const StringLiteral *S) { + const size_t CharWidth = S->getCharByteWidth(); + const size_t BitWidth = CharWidth * Ctx.getCharBit(); + + PrimType CharType; + switch (CharWidth) { + case 1: + CharType = PT_Sint8; + break; + case 2: + CharType = PT_Uint16; + break; + case 4: + CharType = PT_Uint32; + break; + default: + llvm_unreachable("unsupported character width"); + } + + // Create a descriptor for the string. + Descriptor *Desc = allocateDescriptor(S, CharType, S->getLength() + 1, + /*isConst=*/true, + /*isTemporary=*/false, + /*isMutable=*/false); + + // Allocate storage for the string. + // The byte length does not include the null terminator. + unsigned I = Globals.size(); + unsigned Sz = Desc->getAllocSize(); + auto *G = new (Allocator, Sz) Global(Desc, /*isStatic=*/true, + /*isExtern=*/false); + Globals.push_back(G); + + // Construct the string in storage. + const Pointer Ptr(G->block()); + for (unsigned I = 0, N = S->getLength(); I <= N; ++I) { + Pointer Field = Ptr.atIndex(I).narrow(); + const uint32_t CodePoint = I == N ? 0 : S->getCodeUnit(I); + switch (CharType) { + case PT_Sint8: { + using T = PrimConv::T; + Field.deref() = T::from(CodePoint, BitWidth); + break; + } + case PT_Uint16: { + using T = PrimConv::T; + Field.deref() = T::from(CodePoint, BitWidth); + break; + } + case PT_Uint32: { + using T = PrimConv::T; + Field.deref() = T::from(CodePoint, BitWidth); + break; + } + default: + llvm_unreachable("unsupported character type"); + } + } + return I; +} + +Pointer Program::getPtrGlobal(unsigned Idx) { + assert(Idx < Globals.size()); + return Pointer(Globals[Idx]->block()); +} + +llvm::Optional Program::getGlobal(const ValueDecl *VD) { + auto It = GlobalIndices.find(VD); + if (It != GlobalIndices.end()) + return It->second; + + // Find any previous declarations which were aleady evaluated. + llvm::Optional Index; + for (const Decl *P = VD; P; P = P->getPreviousDecl()) { + auto It = GlobalIndices.find(P); + if (It != GlobalIndices.end()) { + Index = It->second; + break; + } + } + + // Map the decl to the existing index. + if (Index) { + GlobalIndices[VD] = *Index; + return {}; + } + + return Index; +} + +llvm::Optional Program::getOrCreateGlobal(const ValueDecl *VD) { + if (auto Idx = getGlobal(VD)) + return Idx; + + if (auto Idx = createGlobal(VD)) { + GlobalIndices[VD] = *Idx; + return Idx; + } + return {}; +} + +llvm::Optional Program::getOrCreateDummy(const ParmVarDecl *PD) { + auto &ASTCtx = Ctx.getASTContext(); + + // Create a pointer to an incomplete array of the specified elements. + QualType ElemTy = PD->getType()->castAs()->getPointeeType(); + QualType Ty = ASTCtx.getIncompleteArrayType(ElemTy, ArrayType::Normal, 0); + + // Dedup blocks since they are immutable and pointers cannot be compared. + auto It = DummyParams.find(PD); + if (It != DummyParams.end()) + return It->second; + + if (auto Idx = createGlobal(PD, Ty, /*isStatic=*/true, /*isExtern=*/true)) { + DummyParams[PD] = *Idx; + return Idx; + } + return {}; +} + +llvm::Optional Program::createGlobal(const ValueDecl *VD) { + bool IsStatic, IsExtern; + if (auto *Var = dyn_cast(VD)) { + IsStatic = !Var->hasLocalStorage(); + IsExtern = !Var->getAnyInitializer(); + } else { + IsStatic = false; + IsExtern = true; + } + if (auto Idx = createGlobal(VD, VD->getType(), IsStatic, IsExtern)) { + for (const Decl *P = VD; P; P = P->getPreviousDecl()) + GlobalIndices[P] = *Idx; + return *Idx; + } + return {}; +} + +llvm::Optional Program::createGlobal(const Expr *E) { + return createGlobal(E, E->getType(), /*isStatic=*/true, /*isExtern=*/false); +} + +llvm::Optional Program::createGlobal(const DeclTy &D, QualType Ty, + bool IsStatic, bool IsExtern) { + // Create a descriptor for the global. + Descriptor *Desc; + const bool IsConst = Ty.isConstQualified(); + const bool IsTemporary = D.dyn_cast(); + if (auto T = Ctx.classify(Ty)) { + Desc = createDescriptor(D, *T, IsConst, IsTemporary); + } else { + Desc = createDescriptor(D, Ty.getTypePtr(), IsConst, IsTemporary); + } + if (!Desc) + return {}; + + // Allocate a block for storage. + unsigned I = Globals.size(); + + auto *G = new (Allocator, Desc->getAllocSize()) + Global(getCurrentDecl(), Desc, IsStatic, IsExtern); + G->block()->invokeCtor(); + + Globals.push_back(G); + + return I; +} + +Function *Program::getFunction(const FunctionDecl *F) { + F = F->getDefinition(); + auto It = Funcs.find(F); + return It == Funcs.end() ? nullptr : It->second.get(); +} + +llvm::Expected Program::getOrCreateFunction(const FunctionDecl *F) { + if (Function *Func = getFunction(F)) { + return Func; + } + + // Try to compile the function if it wasn't compiled yet. + if (const FunctionDecl *FD = F->getDefinition()) + return ByteCodeStmtGen(Ctx, *this).compileFunc(FD); + + // A relocation which traps if not resolved. + return nullptr; +} + +Record *Program::getOrCreateRecord(const RecordDecl *RD) { + // Use the actual definition as a key. + RD = RD->getDefinition(); + if (!RD) + return nullptr; + + // Deduplicate records. + auto It = Records.find(RD); + if (It != Records.end()) { + return It->second; + } + + // Number of bytes required by fields and base classes. + unsigned Size = 0; + // Number of bytes required by virtual base. + unsigned VirtSize = 0; + + // Helper to get a base descriptor. + auto GetBaseDesc = [this](const RecordDecl *BD, Record *BR) -> Descriptor * { + if (!BR) + return nullptr; + return allocateDescriptor(BD, BR, /*isConst=*/false, + /*isTemporary=*/false, + /*isMutable=*/false); + }; + + // Reserve space for base classes. + Record::BaseList Bases; + Record::VirtualBaseList VirtBases; + if (auto *CD = dyn_cast(RD)) { + for (const CXXBaseSpecifier &Spec : CD->bases()) { + if (Spec.isVirtual()) + continue; + + const RecordDecl *BD = Spec.getType()->castAs()->getDecl(); + Record *BR = getOrCreateRecord(BD); + if (Descriptor *Desc = GetBaseDesc(BD, BR)) { + Size += align(sizeof(InlineDescriptor)); + Bases.push_back({BD, Size, Desc, BR}); + Size += align(BR->getSize()); + continue; + } + return nullptr; + } + + for (const CXXBaseSpecifier &Spec : CD->vbases()) { + const RecordDecl *BD = Spec.getType()->castAs()->getDecl(); + Record *BR = getOrCreateRecord(BD); + + if (Descriptor *Desc = GetBaseDesc(BD, BR)) { + VirtSize += align(sizeof(InlineDescriptor)); + VirtBases.push_back({BD, VirtSize, Desc, BR}); + VirtSize += align(BR->getSize()); + continue; + } + return nullptr; + } + } + + // Reserve space for fields. + Record::FieldList Fields; + for (const FieldDecl *FD : RD->fields()) { + // Reserve space for the field's descriptor and the offset. + Size += align(sizeof(InlineDescriptor)); + + // Classify the field and add its metadata. + QualType FT = FD->getType(); + const bool IsConst = FT.isConstQualified(); + const bool IsMutable = FD->isMutable(); + Descriptor *Desc; + if (llvm::Optional T = Ctx.classify(FT)) { + Desc = createDescriptor(FD, *T, IsConst, /*isTemporary=*/false, + IsMutable); + } else { + Desc = createDescriptor(FD, FT.getTypePtr(), IsConst, + /*isTemporary=*/false, IsMutable); + } + if (!Desc) + return nullptr; + Fields.push_back({FD, Size, Desc}); + Size += align(Desc->getAllocSize()); + } + + Record *R = new (Allocator) Record(RD, std::move(Bases), std::move(Fields), + std::move(VirtBases), VirtSize, Size); + Records.insert({RD, R}); + return R; +} + +Descriptor *Program::createDescriptor(const DeclTy &D, const Type *Ty, + bool IsConst, bool IsTemporary, + bool IsMutable) { + // Classes and structures. + if (auto *RT = Ty->getAs()) { + if (auto *Record = getOrCreateRecord(RT->getDecl())) + return allocateDescriptor(D, Record, IsConst, IsTemporary, IsMutable); + } + + // Arrays. + if (auto ArrayType = Ty->getAsArrayTypeUnsafe()) { + QualType ElemTy = ArrayType->getElementType(); + // Array of well-known bounds. + if (auto CAT = dyn_cast(ArrayType)) { + size_t NumElems = CAT->getSize().getZExtValue(); + if (llvm::Optional T = Ctx.classify(ElemTy)) { + // Arrays of primitives. + unsigned ElemSize = primSize(*T); + if (std::numeric_limits::max() / ElemSize <= NumElems) { + return {}; + } + return allocateDescriptor(D, *T, NumElems, IsConst, IsTemporary, + IsMutable); + } else { + // Arrays of composites. In this case, the array is a list of pointers, + // followed by the actual elements. + Descriptor *Desc = + createDescriptor(D, ElemTy.getTypePtr(), IsConst, IsTemporary); + if (!Desc) + return nullptr; + InterpSize ElemSize = Desc->getAllocSize() + sizeof(InlineDescriptor); + if (std::numeric_limits::max() / ElemSize <= NumElems) + return {}; + return allocateDescriptor(D, Desc, NumElems, IsConst, IsTemporary, + IsMutable); + } + } + + // Array of unknown bounds - cannot be accessed and pointer arithmetic + // is forbidden on pointers to such objects. + if (isa(ArrayType)) { + if (llvm::Optional T = Ctx.classify(ElemTy)) { + return allocateDescriptor(D, *T, IsTemporary, + Descriptor::UnknownSize{}); + } else { + Descriptor *Desc = + createDescriptor(D, ElemTy.getTypePtr(), IsConst, IsTemporary); + if (!Desc) + return nullptr; + return allocateDescriptor(D, Desc, IsTemporary, + Descriptor::UnknownSize{}); + } + } + } + + // Atomic types. + if (auto *AT = Ty->getAs()) { + const Type *InnerTy = AT->getValueType().getTypePtr(); + return createDescriptor(D, InnerTy, IsConst, IsTemporary, IsMutable); + } + + // Complex types - represented as arrays of elements. + if (auto *CT = Ty->getAs()) { + PrimType ElemTy = *Ctx.classify(CT->getElementType()); + return allocateDescriptor(D, ElemTy, 2, IsConst, IsTemporary, IsMutable); + } + + return nullptr; +} diff --git a/lib/AST/Interp/Program.h b/lib/AST/Interp/Program.h new file mode 100644 index 00000000000..5f0012db9b3 --- /dev/null +++ b/lib/AST/Interp/Program.h @@ -0,0 +1,220 @@ +//===--- Program.h - Bytecode for the constexpr VM --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines a program which organises and links multiple bytecode functions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_PROGRAM_H +#define LLVM_CLANG_AST_INTERP_PROGRAM_H + +#include +#include +#include "Function.h" +#include "Pointer.h" +#include "PrimType.h" +#include "Record.h" +#include "Source.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" + +namespace clang { +class RecordDecl; +class Expr; +class FunctionDecl; +class Stmt; +class StringLiteral; +class VarDecl; + +namespace interp { +class Context; +class State; +class Record; +class Scope; + +/// The program contains and links the bytecode for all functions. +class Program { +public: + Program(Context &Ctx) : Ctx(Ctx) {} + + /// Emits a string literal among global data. + unsigned createGlobalString(const StringLiteral *S); + + /// Returns a pointer to a global. + Pointer getPtrGlobal(unsigned Idx); + + /// Returns the value of a global. + Block *getGlobal(unsigned Idx) { + assert(Idx < Globals.size()); + return Globals[Idx]->block(); + } + + /// Finds a global's index. + llvm::Optional getGlobal(const ValueDecl *VD); + + /// Returns or creates a global an creates an index to it. + llvm::Optional getOrCreateGlobal(const ValueDecl *VD); + + /// Returns or creates a dummy value for parameters. + llvm::Optional getOrCreateDummy(const ParmVarDecl *PD); + + /// Creates a global and returns its index. + llvm::Optional createGlobal(const ValueDecl *VD); + + /// Creates a global from a lifetime-extended temporary. + llvm::Optional createGlobal(const Expr *E); + + /// Creates a new function from a code range. + template + Function *createFunction(const FunctionDecl *Def, Ts &&... Args) { + auto *Func = new Function(*this, Def, std::forward(Args)...); + Funcs.insert({Def, std::unique_ptr(Func)}); + return Func; + } + /// Creates an anonymous function. + template + Function *createFunction(Ts &&... Args) { + auto *Func = new Function(*this, std::forward(Args)...); + AnonFuncs.emplace_back(Func); + return Func; + } + + /// Returns a function. + Function *getFunction(const FunctionDecl *F); + + /// Returns a pointer to a function if it exists and can be compiled. + /// If a function couldn't be compiled, an error is returned. + /// If a function was not yet defined, a null pointer is returned. + llvm::Expected getOrCreateFunction(const FunctionDecl *F); + + /// Returns a record or creates one if it does not exist. + Record *getOrCreateRecord(const RecordDecl *RD); + + /// Creates a descriptor for a primitive type. + Descriptor *createDescriptor(const DeclTy &D, PrimType Type, + bool IsConst = false, + bool IsTemporary = false, + bool IsMutable = false) { + return allocateDescriptor(D, Type, IsConst, IsTemporary, IsMutable); + } + + /// Creates a descriptor for a composite type. + Descriptor *createDescriptor(const DeclTy &D, const Type *Ty, + bool IsConst = false, bool IsTemporary = false, + bool IsMutable = false); + + /// Context to manage declaration lifetimes. + class DeclScope { + public: + DeclScope(Program &P, const VarDecl *VD) : P(P) { P.startDeclaration(VD); } + ~DeclScope() { P.endDeclaration(); } + + private: + Program &P; + }; + + /// Returns the current declaration ID. + llvm::Optional getCurrentDecl() const { + if (CurrentDeclaration == NoDeclaration) + return llvm::Optional{}; + return LastDeclaration; + } + +private: + friend class DeclScope; + + llvm::Optional createGlobal(const DeclTy &D, QualType Ty, + bool IsStatic, bool IsExtern); + + /// Reference to the VM context. + Context &Ctx; + /// Mapping from decls to cached bytecode functions. + llvm::DenseMap> Funcs; + /// List of anonymous functions. + std::vector> AnonFuncs; + + /// Function relocation locations. + llvm::DenseMap> Relocs; + + /// Custom allocator for global storage. + using PoolAllocTy = llvm::BumpPtrAllocatorImpl; + + /// Descriptor + storage for a global object. + /// + /// Global objects never go out of scope, thus they do not track pointers. + class Global { + public: + /// Create a global descriptor for string literals. + template + Global(Tys... Args) : B(std::forward(Args)...) {} + + /// Allocates the global in the pool, reserving storate for data. + void *operator new(size_t Meta, PoolAllocTy &Alloc, size_t Data) { + return Alloc.Allocate(Meta + Data, alignof(void *)); + } + + /// Return a pointer to the data. + char *data() { return B.data(); } + /// Return a pointer to the block. + Block *block() { return &B; } + + private: + /// Required metadata - does not actually track pointers. + Block B; + }; + + /// Allocator for globals. + PoolAllocTy Allocator; + + /// Global objects. + std::vector Globals; + /// Cached global indices. + llvm::DenseMap GlobalIndices; + + /// Mapping from decls to record metadata. + llvm::DenseMap Records; + + /// Dummy parameter to generate pointers from. + llvm::DenseMap DummyParams; + + /// Creates a new descriptor. + template + Descriptor *allocateDescriptor(Ts &&... Args) { + return new (Allocator) Descriptor(std::forward(Args)...); + } + + /// No declaration ID. + static constexpr unsigned NoDeclaration = (unsigned)-1; + /// Last declaration ID. + unsigned LastDeclaration = 0; + /// Current declaration ID. + unsigned CurrentDeclaration = NoDeclaration; + + /// Starts evaluating a declaration. + void startDeclaration(const VarDecl *Decl) { + LastDeclaration += 1; + CurrentDeclaration = LastDeclaration; + } + + /// Ends a global declaration. + void endDeclaration() { + CurrentDeclaration = NoDeclaration; + } + +public: + /// Dumps the disassembled bytecode to \c llvm::errs(). + void dump() const; + void dump(llvm::raw_ostream &OS) const; +}; + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/Record.cpp b/lib/AST/Interp/Record.cpp new file mode 100644 index 00000000000..f440c470505 --- /dev/null +++ b/lib/AST/Interp/Record.cpp @@ -0,0 +1,46 @@ +//===--- Record.cpp - struct and class metadata for the VM ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Record.h" + +using namespace clang; +using namespace clang::interp; + +Record::Record(const RecordDecl *Decl, BaseList &&SrcBases, + FieldList &&SrcFields, VirtualBaseList &&SrcVirtualBases, + unsigned VirtualSize, unsigned BaseSize) + : Decl(Decl), Bases(std::move(SrcBases)), Fields(std::move(SrcFields)), + BaseSize(BaseSize), VirtualSize(VirtualSize) { + for (Base &V : SrcVirtualBases) + VirtualBases.push_back({ V.Decl, V.Offset + BaseSize, V.Desc, V.R }); + + for (Base &B : Bases) + BaseMap[B.Decl] = &B; + for (Field &F : Fields) + FieldMap[F.Decl] = &F; + for (Base &V : VirtualBases) + VirtualBaseMap[V.Decl] = &V; +} + +const Record::Field *Record::getField(const FieldDecl *FD) const { + auto It = FieldMap.find(FD); + assert(It != FieldMap.end() && "Missing field"); + return It->second; +} + +const Record::Base *Record::getBase(const RecordDecl *FD) const { + auto It = BaseMap.find(FD); + assert(It != BaseMap.end() && "Missing base"); + return It->second; +} + +const Record::Base *Record::getVirtualBase(const RecordDecl *FD) const { + auto It = VirtualBaseMap.find(FD); + assert(It != VirtualBaseMap.end() && "Missing virtual base"); + return It->second; +} diff --git a/lib/AST/Interp/Record.h b/lib/AST/Interp/Record.h new file mode 100644 index 00000000000..9cdee900375 --- /dev/null +++ b/lib/AST/Interp/Record.h @@ -0,0 +1,121 @@ +//===--- Record.h - struct and class metadata for the VM --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A record is part of a program to describe the layout and methods of a struct. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_RECORD_H +#define LLVM_CLANG_AST_INTERP_RECORD_H + +#include "Pointer.h" + +namespace clang { +namespace interp { +class Program; + +/// Structure/Class descriptor. +class Record { +public: + /// Describes a record field. + struct Field { + const FieldDecl *Decl; + unsigned Offset; + Descriptor *Desc; + }; + + /// Describes a base class. + struct Base { + const RecordDecl *Decl; + unsigned Offset; + Descriptor *Desc; + Record *R; + }; + + /// Mapping from identifiers to field descriptors. + using FieldList = llvm::SmallVector; + /// Mapping from identifiers to base classes. + using BaseList = llvm::SmallVector; + /// List of virtual base classes. + using VirtualBaseList = llvm::SmallVector; + +public: + /// Returns the underlying declaration. + const RecordDecl *getDecl() const { return Decl; } + /// Checks if the record is a union. + bool isUnion() const { return getDecl()->isUnion(); } + /// Returns the size of the record. + unsigned getSize() const { return BaseSize; } + /// Returns the full size of the record, including records. + unsigned getFullSize() const { return BaseSize + VirtualSize; } + /// Returns a field. + const Field *getField(const FieldDecl *FD) const; + /// Returns a base descriptor. + const Base *getBase(const RecordDecl *FD) const; + /// Returns a virtual base descriptor. + const Base *getVirtualBase(const RecordDecl *RD) const; + + using const_field_iter = FieldList::const_iterator; + llvm::iterator_range fields() const { + return llvm::make_range(Fields.begin(), Fields.end()); + } + + unsigned getNumFields() { return Fields.size(); } + Field *getField(unsigned I) { return &Fields[I]; } + + using const_base_iter = BaseList::const_iterator; + llvm::iterator_range bases() const { + return llvm::make_range(Bases.begin(), Bases.end()); + } + + unsigned getNumBases() { return Bases.size(); } + Base *getBase(unsigned I) { return &Bases[I]; } + + using const_virtual_iter = VirtualBaseList::const_iterator; + llvm::iterator_range virtual_bases() const { + return llvm::make_range(VirtualBases.begin(), VirtualBases.end()); + } + + unsigned getNumVirtualBases() { return VirtualBases.size(); } + Base *getVirtualBase(unsigned I) { return &VirtualBases[I]; } + +private: + /// Constructor used by Program to create record descriptors. + Record(const RecordDecl *, BaseList &&Bases, FieldList &&Fields, + VirtualBaseList &&VirtualBases, unsigned VirtualSize, + unsigned BaseSize); + +private: + friend class Program; + + /// Original declaration. + const RecordDecl *Decl; + /// List of all base classes. + BaseList Bases; + /// List of all the fields in the record. + FieldList Fields; + /// List o fall virtual bases. + VirtualBaseList VirtualBases; + + /// Mapping from declarations to bases. + llvm::DenseMap BaseMap; + /// Mapping from field identifiers to descriptors. + llvm::DenseMap FieldMap; + /// Mapping from declarations to virtual bases. + llvm::DenseMap VirtualBaseMap; + /// Mapping from + /// Size of the structure. + unsigned BaseSize; + /// Size of all virtual bases. + unsigned VirtualSize; +}; + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/Source.cpp b/lib/AST/Interp/Source.cpp new file mode 100644 index 00000000000..4bec8781263 --- /dev/null +++ b/lib/AST/Interp/Source.cpp @@ -0,0 +1,39 @@ +//===--- Source.cpp - Source expression tracking ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Source.h" +#include "clang/AST/Expr.h" + +using namespace clang; +using namespace clang::interp; + +SourceLocation SourceInfo::getLoc() const { + if (const Expr *E = asExpr()) + return E->getExprLoc(); + if (const Stmt *S = asStmt()) + return S->getBeginLoc(); + if (const Decl *D = asDecl()) + return D->getBeginLoc(); + return SourceLocation(); +} + +const Expr *SourceInfo::asExpr() const { + if (auto *S = Source.dyn_cast()) + return dyn_cast(S); + return nullptr; +} + +const Expr *SourceMapper::getExpr(Function *F, CodePtr PC) const { + if (const Expr *E = getSource(F, PC).asExpr()) + return E; + llvm::report_fatal_error("missing source expression"); +} + +SourceLocation SourceMapper::getLocation(Function *F, CodePtr PC) const { + return getSource(F, PC).getLoc(); +} diff --git a/lib/AST/Interp/Source.h b/lib/AST/Interp/Source.h new file mode 100644 index 00000000000..e591c3399d7 --- /dev/null +++ b/lib/AST/Interp/Source.h @@ -0,0 +1,118 @@ +//===--- Source.h - Source location provider for the VM --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines a program which organises and links multiple bytecode functions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_SOURCE_H +#define LLVM_CLANG_AST_INTERP_SOURCE_H + +#include "clang/AST/Decl.h" +#include "clang/AST/Stmt.h" +#include "llvm/Support/Endian.h" + +namespace clang { +namespace interp { +class Function; + +/// Pointer into the code segment. +class CodePtr { +public: + CodePtr() : Ptr(nullptr) {} + + CodePtr &operator+=(int32_t Offset) { + Ptr += Offset; + return *this; + } + + int32_t operator-(const CodePtr &RHS) const { + assert(Ptr != nullptr && RHS.Ptr != nullptr && "Invalid code pointer"); + return Ptr - RHS.Ptr; + } + + CodePtr operator-(size_t RHS) const { + assert(Ptr != nullptr && "Invalid code pointer"); + return CodePtr(Ptr - RHS); + } + + bool operator!=(const CodePtr &RHS) const { return Ptr != RHS.Ptr; } + + /// Reads data and advances the pointer. + template T read() { + T Value = ReadHelper(Ptr); + Ptr += sizeof(T); + return Value; + } + +private: + /// Constructor used by Function to generate pointers. + CodePtr(const char *Ptr) : Ptr(Ptr) {} + + /// Helper to decode a value or a pointer. + template + static typename std::enable_if::value, T>::type + ReadHelper(const char *Ptr) { + using namespace llvm::support; + return endian::read(Ptr); + } + + template + static typename std::enable_if::value, T>::type + ReadHelper(const char *Ptr) { + using namespace llvm::support; + auto Punned = endian::read(Ptr); + return reinterpret_cast(Punned); + } + +private: + friend class Function; + + /// Pointer into the code owned by a function. + const char *Ptr; +}; + +/// Describes the statement/declaration an opcode was generated from. +class SourceInfo { +public: + SourceInfo() {} + SourceInfo(const Stmt *E) : Source(E) {} + SourceInfo(const Decl *D) : Source(D) {} + + SourceLocation getLoc() const; + + const Stmt *asStmt() const { return Source.dyn_cast(); } + const Decl *asDecl() const { return Source.dyn_cast(); } + const Expr *asExpr() const; + + operator bool() const { return !Source.isNull(); } + +private: + llvm::PointerUnion Source; +}; + +using SourceMap = std::vector>; + +/// Interface for classes which map locations to sources. +class SourceMapper { +public: + virtual ~SourceMapper() {} + + /// Returns source information for a given PC in a function. + virtual SourceInfo getSource(Function *F, CodePtr PC) const = 0; + + /// Returns the expression if an opcode belongs to one, null otherwise. + const Expr *getExpr(Function *F, CodePtr PC) const; + /// Returns the location from which an opcode originates. + SourceLocation getLocation(Function *F, CodePtr PC) const; +}; + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/Interp/State.cpp b/lib/AST/Interp/State.cpp new file mode 100644 index 00000000000..692cc2e8d69 --- /dev/null +++ b/lib/AST/Interp/State.cpp @@ -0,0 +1,158 @@ +//===--- State.cpp - State chain for the VM and AST Walker ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "State.h" +#include "Frame.h" +#include "Program.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/CXXInheritance.h" + +using namespace clang; +using namespace clang::interp; + +State::~State() {} + +OptionalDiagnostic State::FFDiag(SourceLocation Loc, diag::kind DiagId, + unsigned ExtraNotes) { + return diag(Loc, DiagId, ExtraNotes, false); +} + +OptionalDiagnostic State::FFDiag(const Expr *E, diag::kind DiagId, + unsigned ExtraNotes) { + if (getEvalStatus().Diag) + return diag(E->getExprLoc(), DiagId, ExtraNotes, false); + setActiveDiagnostic(false); + return OptionalDiagnostic(); +} + +OptionalDiagnostic State::FFDiag(const SourceInfo &SI, diag::kind DiagId, + unsigned ExtraNotes) { + if (getEvalStatus().Diag) + return diag(SI.getLoc(), DiagId, ExtraNotes, false); + setActiveDiagnostic(false); + return OptionalDiagnostic(); +} + +OptionalDiagnostic State::CCEDiag(SourceLocation Loc, diag::kind DiagId, + unsigned ExtraNotes) { + // Don't override a previous diagnostic. Don't bother collecting + // diagnostics if we're evaluating for overflow. + if (!getEvalStatus().Diag || !getEvalStatus().Diag->empty()) { + setActiveDiagnostic(false); + return OptionalDiagnostic(); + } + return diag(Loc, DiagId, ExtraNotes, true); +} + +OptionalDiagnostic State::CCEDiag(const Expr *E, diag::kind DiagId, + unsigned ExtraNotes) { + return CCEDiag(E->getExprLoc(), DiagId, ExtraNotes); +} + +OptionalDiagnostic State::CCEDiag(const SourceInfo &SI, diag::kind DiagId, + unsigned ExtraNotes) { + return CCEDiag(SI.getLoc(), DiagId, ExtraNotes); +} + +OptionalDiagnostic State::Note(SourceLocation Loc, diag::kind DiagId) { + if (!hasActiveDiagnostic()) + return OptionalDiagnostic(); + return OptionalDiagnostic(&addDiag(Loc, DiagId)); +} + +void State::addNotes(ArrayRef Diags) { + if (hasActiveDiagnostic()) { + getEvalStatus().Diag->insert(getEvalStatus().Diag->end(), Diags.begin(), + Diags.end()); + } +} + +DiagnosticBuilder State::report(SourceLocation Loc, diag::kind DiagId) { + return getCtx().getDiagnostics().Report(Loc, DiagId); +} + +/// Add a diagnostic to the diagnostics list. +PartialDiagnostic &State::addDiag(SourceLocation Loc, diag::kind DiagId) { + PartialDiagnostic PD(DiagId, getCtx().getDiagAllocator()); + getEvalStatus().Diag->push_back(std::make_pair(Loc, PD)); + return getEvalStatus().Diag->back().second; +} + +OptionalDiagnostic State::diag(SourceLocation Loc, diag::kind DiagId, + unsigned ExtraNotes, bool IsCCEDiag) { + Expr::EvalStatus &EvalStatus = getEvalStatus(); + if (EvalStatus.Diag) { + if (hasPriorDiagnostic()) { + return OptionalDiagnostic(); + } + + unsigned CallStackNotes = getCallStackDepth() - 1; + unsigned Limit = getCtx().getDiagnostics().getConstexprBacktraceLimit(); + if (Limit) + CallStackNotes = std::min(CallStackNotes, Limit + 1); + if (checkingPotentialConstantExpression()) + CallStackNotes = 0; + + setActiveDiagnostic(true); + setFoldFailureDiagnostic(!IsCCEDiag); + EvalStatus.Diag->clear(); + EvalStatus.Diag->reserve(1 + ExtraNotes + CallStackNotes); + addDiag(Loc, DiagId); + if (!checkingPotentialConstantExpression()) { + addCallStack(Limit); + } + return OptionalDiagnostic(&(*EvalStatus.Diag)[0].second); + } + setActiveDiagnostic(false); + return OptionalDiagnostic(); +} + +const LangOptions &State::getLangOpts() const { return getCtx().getLangOpts(); } + +void State::addCallStack(unsigned Limit) { + // Determine which calls to skip, if any. + unsigned ActiveCalls = getCallStackDepth() - 1; + unsigned SkipStart = ActiveCalls, SkipEnd = SkipStart; + if (Limit && Limit < ActiveCalls) { + SkipStart = Limit / 2 + Limit % 2; + SkipEnd = ActiveCalls - Limit / 2; + } + + // Walk the call stack and add the diagnostics. + unsigned CallIdx = 0; + Frame *Top = getCurrentFrame(); + const Frame *Bottom = getBottomFrame(); + for (Frame *F = Top; F != Bottom; F = F->getCaller(), ++CallIdx) { + SourceLocation CallLocation = F->getCallLocation(); + + // Skip this call? + if (CallIdx >= SkipStart && CallIdx < SkipEnd) { + if (CallIdx == SkipStart) { + // Note that we're skipping calls. + addDiag(CallLocation, diag::note_constexpr_calls_suppressed) + << unsigned(ActiveCalls - Limit); + } + continue; + } + + // Use a different note for an inheriting constructor, because from the + // user's perspective it's not really a function at all. + if (auto *CD = dyn_cast_or_null(F->getCallee())) { + if (CD->isInheritingConstructor()) { + addDiag(CallLocation, diag::note_constexpr_inherited_ctor_call_here) + << CD->getParent(); + continue; + } + } + + SmallVector Buffer; + llvm::raw_svector_ostream Out(Buffer); + F->describe(Out); + addDiag(CallLocation, diag::note_constexpr_call_here) << Out.str(); + } +} diff --git a/lib/AST/Interp/State.h b/lib/AST/Interp/State.h new file mode 100644 index 00000000000..d9a645a3eb3 --- /dev/null +++ b/lib/AST/Interp/State.h @@ -0,0 +1,133 @@ +//===--- State.h - State chain for the VM and AST Walker --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the base class of the interpreter and evaluator state. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_INTERP_STATE_H +#define LLVM_CLANG_AST_INTERP_STATE_H + +#include "clang/AST/ASTDiagnostic.h" +#include "clang/AST/Expr.h" +#include "clang/AST/OptionalDiagnostic.h" + +namespace clang { + +/// Kinds of access we can perform on an object, for diagnostics. Note that +/// we consider a member function call to be a kind of access, even though +/// it is not formally an access of the object, because it has (largely) the +/// same set of semantic restrictions. +enum AccessKinds { + AK_Read, + AK_ReadObjectRepresentation, + AK_Assign, + AK_Increment, + AK_Decrement, + AK_MemberCall, + AK_DynamicCast, + AK_TypeId, + AK_Construct, + AK_Destroy, +}; + +// The order of this enum is important for diagnostics. +enum CheckSubobjectKind { + CSK_Base, + CSK_Derived, + CSK_Field, + CSK_ArrayToPointer, + CSK_ArrayIndex, + CSK_Real, + CSK_Imag +}; + +namespace interp { +class Frame; +class SourceInfo; + +/// Interface for the VM to interact with the AST walker's context. +class State { +public: + virtual ~State(); + + virtual bool checkingForUndefinedBehavior() const = 0; + virtual bool checkingPotentialConstantExpression() const = 0; + virtual bool noteUndefinedBehavior() = 0; + virtual bool keepEvaluatingAfterFailure() const = 0; + virtual Frame *getCurrentFrame() = 0; + virtual const Frame *getBottomFrame() const = 0; + virtual bool hasActiveDiagnostic() = 0; + virtual void setActiveDiagnostic(bool Flag) = 0; + virtual void setFoldFailureDiagnostic(bool Flag) = 0; + virtual Expr::EvalStatus &getEvalStatus() const = 0; + virtual ASTContext &getCtx() const = 0; + virtual bool hasPriorDiagnostic() = 0; + virtual unsigned getCallStackDepth() = 0; + +public: + // Diagnose that the evaluation could not be folded (FF => FoldFailure) + OptionalDiagnostic + FFDiag(SourceLocation Loc, + diag::kind DiagId = diag::note_invalid_subexpr_in_const_expr, + unsigned ExtraNotes = 0); + + OptionalDiagnostic + FFDiag(const Expr *E, + diag::kind DiagId = diag::note_invalid_subexpr_in_const_expr, + unsigned ExtraNotes = 0); + + OptionalDiagnostic + FFDiag(const SourceInfo &SI, + diag::kind DiagId = diag::note_invalid_subexpr_in_const_expr, + unsigned ExtraNotes = 0); + + /// Diagnose that the evaluation does not produce a C++11 core constant + /// expression. + /// + /// FIXME: Stop evaluating if we're in EM_ConstantExpression or + /// EM_PotentialConstantExpression mode and we produce one of these. + OptionalDiagnostic + CCEDiag(SourceLocation Loc, + diag::kind DiagId = diag::note_invalid_subexpr_in_const_expr, + unsigned ExtraNotes = 0); + + OptionalDiagnostic + CCEDiag(const Expr *E, + diag::kind DiagId = diag::note_invalid_subexpr_in_const_expr, + unsigned ExtraNotes = 0); + + OptionalDiagnostic + CCEDiag(const SourceInfo &SI, + diag::kind DiagId = diag::note_invalid_subexpr_in_const_expr, + unsigned ExtraNotes = 0); + + /// Add a note to a prior diagnostic. + OptionalDiagnostic Note(SourceLocation Loc, diag::kind DiagId); + + /// Add a stack of notes to a prior diagnostic. + void addNotes(ArrayRef Diags); + + /// Directly reports a diagnostic message. + DiagnosticBuilder report(SourceLocation Loc, diag::kind DiagId); + + const LangOptions &getLangOpts() const; + +private: + void addCallStack(unsigned Limit); + + PartialDiagnostic &addDiag(SourceLocation Loc, diag::kind DiagId); + + OptionalDiagnostic diag(SourceLocation Loc, diag::kind DiagId, + unsigned ExtraNotes, bool IsCCEDiag); +}; + +} // namespace interp +} // namespace clang + +#endif diff --git a/lib/AST/ItaniumCXXABI.cpp b/lib/AST/ItaniumCXXABI.cpp index 727a905d08a..069add8464a 100644 --- a/lib/AST/ItaniumCXXABI.cpp +++ b/lib/AST/ItaniumCXXABI.cpp @@ -19,10 +19,12 @@ #include "CXXABI.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclCXX.h" +#include "clang/AST/Mangle.h" #include "clang/AST/MangleNumberingContext.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/Type.h" #include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/iterator.h" using namespace clang; @@ -73,10 +75,33 @@ struct DecompositionDeclName { } namespace llvm { +template bool isDenseMapKeyEmpty(T V) { + return llvm::DenseMapInfo::isEqual( + V, llvm::DenseMapInfo::getEmptyKey()); +} +template bool isDenseMapKeyTombstone(T V) { + return llvm::DenseMapInfo::isEqual( + V, llvm::DenseMapInfo::getTombstoneKey()); +} + +template +Optional areDenseMapKeysEqualSpecialValues(T LHS, T RHS) { + bool LHSEmpty = isDenseMapKeyEmpty(LHS); + bool RHSEmpty = isDenseMapKeyEmpty(RHS); + if (LHSEmpty || RHSEmpty) + return LHSEmpty && RHSEmpty; + + bool LHSTombstone = isDenseMapKeyTombstone(LHS); + bool RHSTombstone = isDenseMapKeyTombstone(RHS); + if (LHSTombstone || RHSTombstone) + return LHSTombstone && RHSTombstone; + + return None; +} + template<> struct DenseMapInfo { using ArrayInfo = llvm::DenseMapInfo>; - using IdentInfo = llvm::DenseMapInfo; static DecompositionDeclName getEmptyKey() { return {ArrayInfo::getEmptyKey()}; } @@ -88,10 +113,10 @@ struct DenseMapInfo { return llvm::hash_combine_range(Key.begin(), Key.end()); } static bool isEqual(DecompositionDeclName LHS, DecompositionDeclName RHS) { - if (ArrayInfo::isEqual(LHS.Bindings, ArrayInfo::getEmptyKey())) - return ArrayInfo::isEqual(RHS.Bindings, ArrayInfo::getEmptyKey()); - if (ArrayInfo::isEqual(LHS.Bindings, ArrayInfo::getTombstoneKey())) - return ArrayInfo::isEqual(RHS.Bindings, ArrayInfo::getTombstoneKey()); + if (Optional Result = areDenseMapKeysEqualSpecialValues( + LHS.Bindings, RHS.Bindings)) + return *Result; + return LHS.Bindings.size() == RHS.Bindings.size() && std::equal(LHS.begin(), LHS.end(), RHS.begin()); } @@ -103,29 +128,32 @@ namespace { /// Keeps track of the mangled names of lambda expressions and block /// literals within a particular context. class ItaniumNumberingContext : public MangleNumberingContext { - llvm::DenseMap ManglingNumbers; + ItaniumMangleContext *Mangler; + llvm::StringMap LambdaManglingNumbers; + unsigned BlockManglingNumber = 0; llvm::DenseMap VarManglingNumbers; llvm::DenseMap TagManglingNumbers; llvm::DenseMap DecompsitionDeclManglingNumbers; public: - unsigned getManglingNumber(const CXXMethodDecl *CallOperator) override { - const FunctionProtoType *Proto = - CallOperator->getType()->getAs(); - ASTContext &Context = CallOperator->getASTContext(); + ItaniumNumberingContext(ItaniumMangleContext *Mangler) : Mangler(Mangler) {} - FunctionProtoType::ExtProtoInfo EPI; - EPI.Variadic = Proto->isVariadic(); - QualType Key = - Context.getFunctionType(Context.VoidTy, Proto->getParamTypes(), EPI); - Key = Context.getCanonicalType(Key); - return ++ManglingNumbers[Key->castAs()]; + unsigned getManglingNumber(const CXXMethodDecl *CallOperator) override { + const CXXRecordDecl *Lambda = CallOperator->getParent(); + assert(Lambda->isLambda()); + + // Computation of the is non-trivial and subtle. Rather than + // duplicating it here, just mangle the directly. + llvm::SmallString<128> LambdaSig; + llvm::raw_svector_ostream Out(LambdaSig); + Mangler->mangleLambdaSig(Lambda, Out); + + return ++LambdaManglingNumbers[LambdaSig]; } unsigned getManglingNumber(const BlockDecl *BD) override { - const Type *Ty = nullptr; - return ++ManglingNumbers[Ty]; + return ++BlockManglingNumber; } unsigned getStaticLocalNumber(const VarDecl *VD) override { @@ -154,10 +182,13 @@ public: }; class ItaniumCXXABI : public CXXABI { +private: + std::unique_ptr Mangler; protected: ASTContext &Context; public: - ItaniumCXXABI(ASTContext &Ctx) : Context(Ctx) { } + ItaniumCXXABI(ASTContext &Ctx) + : Mangler(Ctx.createMangleContext()), Context(Ctx) {} MemberPointerInfo getMemberPointerInfo(const MemberPointerType *MPT) const override { @@ -177,7 +208,7 @@ public: if (!isVariadic && T.isWindowsGNUEnvironment() && T.getArch() == llvm::Triple::x86) return CC_X86ThisCall; - return CC_C; + return Context.getTargetInfo().getDefaultCallingConv(); } // We cheat and just check that the class has a vtable pointer, and that it's @@ -218,7 +249,8 @@ public: std::unique_ptr createMangleNumberingContext() const override { - return llvm::make_unique(); + return std::make_unique( + cast(Mangler.get())); } }; } diff --git a/lib/AST/ItaniumMangle.cpp b/lib/AST/ItaniumMangle.cpp index 6c813f09a4b..c55a9013757 100644 --- a/lib/AST/ItaniumMangle.cpp +++ b/lib/AST/ItaniumMangle.cpp @@ -170,6 +170,8 @@ public: void mangleStringLiteral(const StringLiteral *, raw_ostream &) override; + void mangleLambdaSig(const CXXRecordDecl *Lambda, raw_ostream &) override; + bool getNextDiscriminator(const NamedDecl *ND, unsigned &disc) { // Lambda closure types are already numbered. if (isLambda(ND)) @@ -424,6 +426,7 @@ public: void mangleName(const NamedDecl *ND); void mangleType(QualType T); void mangleNameOrStandardSubstitution(const NamedDecl *ND); + void mangleLambdaSig(const CXXRecordDecl *Lambda); private: @@ -513,7 +516,7 @@ private: #define ABSTRACT_TYPE(CLASS, PARENT) #define NON_CANONICAL_TYPE(CLASS, PARENT) #define TYPE(CLASS, PARENT) void mangleType(const CLASS##Type *T); -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" void mangleType(const TagType*); void mangleType(TemplateName); @@ -550,7 +553,7 @@ private: void mangleTemplateArgs(const TemplateArgumentList &AL); void mangleTemplateArg(TemplateArgument A); - void mangleTemplateParameter(unsigned Index); + void mangleTemplateParameter(unsigned Depth, unsigned Index); void mangleFunctionParam(const ParmVarDecl *parm); @@ -965,8 +968,8 @@ void CXXNameMangler::mangleUnscopedTemplateName( if (const auto *TTP = dyn_cast(ND)) { assert(!AdditionalAbiTags && "template template param cannot have abi tags"); - mangleTemplateParameter(TTP->getIndex()); - } else if (isa(ND)) { + mangleTemplateParameter(TTP->getDepth(), TTP->getIndex()); + } else if (isa(ND) || isa(ND)) { mangleUnscopedName(ND, AdditionalAbiTags); } else { mangleUnscopedName(ND->getTemplatedDecl(), AdditionalAbiTags); @@ -1321,7 +1324,7 @@ void CXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND, if (const VarDecl *VD = dyn_cast(ND)) { // We must have an anonymous union or struct declaration. - const RecordDecl *RD = VD->getType()->getAs()->getDecl(); + const RecordDecl *RD = VD->getType()->castAs()->getDecl(); // Itanium C++ ABI 5.1.2: // @@ -1685,17 +1688,45 @@ void CXXNameMangler::mangleUnqualifiedBlock(const BlockDecl *Block) { // ::= Ty # template type parameter // ::= Tn # template non-type parameter // ::= Tt * E # template template parameter +// ::= Tp # template parameter pack void CXXNameMangler::mangleTemplateParamDecl(const NamedDecl *Decl) { - if (isa(Decl)) { + if (auto *Ty = dyn_cast(Decl)) { + if (Ty->isParameterPack()) + Out << "Tp"; Out << "Ty"; } else if (auto *Tn = dyn_cast(Decl)) { - Out << "Tn"; - mangleType(Tn->getType()); + if (Tn->isExpandedParameterPack()) { + for (unsigned I = 0, N = Tn->getNumExpansionTypes(); I != N; ++I) { + Out << "Tn"; + mangleType(Tn->getExpansionType(I)); + } + } else { + QualType T = Tn->getType(); + if (Tn->isParameterPack()) { + Out << "Tp"; + if (auto *PackExpansion = T->getAs()) + T = PackExpansion->getPattern(); + } + Out << "Tn"; + mangleType(T); + } } else if (auto *Tt = dyn_cast(Decl)) { - Out << "Tt"; - for (auto *Param : *Tt->getTemplateParameters()) - mangleTemplateParamDecl(Param); - Out << "E"; + if (Tt->isExpandedParameterPack()) { + for (unsigned I = 0, N = Tt->getNumExpansionTemplateParameters(); I != N; + ++I) { + Out << "Tt"; + for (auto *Param : *Tt->getExpansionTemplateParameters(I)) + mangleTemplateParamDecl(Param); + Out << "E"; + } + } else { + if (Tt->isParameterPack()) + Out << "Tp"; + Out << "Tt"; + for (auto *Param : *Tt->getTemplateParameters()) + mangleTemplateParamDecl(Param); + Out << "E"; + } } } @@ -1726,12 +1757,7 @@ void CXXNameMangler::mangleLambda(const CXXRecordDecl *Lambda) { } Out << "Ul"; - for (auto *D : Lambda->getLambdaExplicitTemplateParameters()) - mangleTemplateParamDecl(D); - const FunctionProtoType *Proto = Lambda->getLambdaTypeInfo()->getType()-> - getAs(); - mangleBareFunctionType(Proto, /*MangleReturnType=*/false, - Lambda->getLambdaStaticInvoker()); + mangleLambdaSig(Lambda); Out << "E"; // The number is omitted for the first closure type with a given @@ -1746,6 +1772,15 @@ void CXXNameMangler::mangleLambda(const CXXRecordDecl *Lambda) { Out << '_'; } +void CXXNameMangler::mangleLambdaSig(const CXXRecordDecl *Lambda) { + for (auto *D : Lambda->getLambdaExplicitTemplateParameters()) + mangleTemplateParamDecl(D); + const FunctionProtoType *Proto = Lambda->getLambdaTypeInfo()->getType()-> + getAs(); + mangleBareFunctionType(Proto, /*MangleReturnType=*/false, + Lambda->getLambdaStaticInvoker()); +} + void CXXNameMangler::manglePrefix(NestedNameSpecifier *qualifier) { switch (qualifier->getKind()) { case NestedNameSpecifier::Global: @@ -1852,10 +1887,10 @@ void CXXNameMangler::mangleTemplatePrefix(const TemplateDecl *ND, // ::= if (const auto *TTP = dyn_cast(ND)) { - mangleTemplateParameter(TTP->getIndex()); + mangleTemplateParameter(TTP->getDepth(), TTP->getIndex()); } else { manglePrefix(getEffectiveDeclContext(ND), NoFunction); - if (isa(ND)) + if (isa(ND) || isa(ND)) mangleUnqualifiedName(ND, nullptr); else mangleUnqualifiedName(ND->getTemplatedDecl(), nullptr); @@ -1885,8 +1920,8 @@ void CXXNameMangler::mangleType(TemplateName TN) { goto HaveDecl; HaveDecl: - if (isa(TD)) - mangleTemplateParameter(cast(TD)->getIndex()); + if (auto *TTP = dyn_cast(TD)) + mangleTemplateParameter(TTP->getDepth(), TTP->getIndex()); else mangleName(TD); break; @@ -2464,7 +2499,7 @@ void CXXNameMangler::mangleType(QualType T) { case Type::CLASS: \ mangleType(static_cast(ty)); \ break; -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" } } @@ -2671,6 +2706,15 @@ void CXXNameMangler::mangleType(const BuiltinType *T) { Out << type_name.size() << type_name; \ break; #include "clang/Basic/OpenCLExtensionTypes.def" + // The SVE types are effectively target-specific. The mangling scheme + // is defined in the appendices to the Procedure Call Standard for the + // Arm Architecture. +#define SVE_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: \ + type_name = Name; \ + Out << 'u' << type_name.size() << type_name; \ + break; +#include "clang/Basic/AArch64SVEACLETypes.def" } } @@ -2955,7 +2999,7 @@ void CXXNameMangler::mangleType(const MemberPointerType *T) { // ::= void CXXNameMangler::mangleType(const TemplateTypeParmType *T) { - mangleTemplateParameter(T->getIndex()); + mangleTemplateParameter(T->getDepth(), T->getIndex()); } // ::= @@ -3526,7 +3570,7 @@ void CXXNameMangler::mangleDeclRefExpr(const NamedDecl *D) { case Decl::NonTypeTemplateParm: const NonTypeTemplateParmDecl *PD = cast(D); - mangleTemplateParameter(PD->getIndex()); + mangleTemplateParameter(PD->getDepth(), PD->getIndex()); break; } } @@ -4046,6 +4090,17 @@ recurse: break; } + case Expr::CXXRewrittenBinaryOperatorClass: { + // The mangled form represents the original syntax. + CXXRewrittenBinaryOperator::DecomposedForm Decomposed = + cast(E)->getDecomposedForm(); + mangleOperatorName(BinaryOperator::getOverloadedOperator(Decomposed.Opcode), + /*Arity=*/2); + mangleExpression(Decomposed.LHS); + mangleExpression(Decomposed.RHS); + break; + } + case Expr::ConditionalOperatorClass: { const ConditionalOperator *CO = cast(E); mangleOperatorName(OO_Conditional, /*Arity=*/3); @@ -4123,6 +4178,18 @@ recurse: mangleExpression(cast(E)->getSubExpr(), Arity); break; + + case Expr::ConceptSpecializationExprClass: { + // ::= L E # external name + Out << "L_Z"; + auto *CSE = cast(E); + mangleTemplateName(CSE->getNamedConcept(), + CSE->getTemplateArguments().data(), + CSE->getTemplateArguments().size()); + Out << 'E'; + break; + } + case Expr::DeclRefExprClass: mangleDeclRefExpr(cast(E)->getDecl()); break; @@ -4229,8 +4296,11 @@ recurse: } case Expr::GNUNullExprClass: - // FIXME: should this really be mangled the same as nullptr? - // fallthrough + // Mangle as if an integer literal 0. + Out << 'L'; + mangleType(E->getType()); + Out << "0E"; + break; case Expr::CXXNullPtrLiteralExprClass: { Out << "LDnE"; @@ -4255,13 +4325,13 @@ recurse: Out << "sZ"; const NamedDecl *Pack = SPE->getPack(); if (const TemplateTypeParmDecl *TTP = dyn_cast(Pack)) - mangleTemplateParameter(TTP->getIndex()); + mangleTemplateParameter(TTP->getDepth(), TTP->getIndex()); else if (const NonTypeTemplateParmDecl *NTTP = dyn_cast(Pack)) - mangleTemplateParameter(NTTP->getIndex()); + mangleTemplateParameter(NTTP->getDepth(), NTTP->getIndex()); else if (const TemplateTemplateParmDecl *TempTP = dyn_cast(Pack)) - mangleTemplateParameter(TempTP->getIndex()); + mangleTemplateParameter(TempTP->getDepth(), TempTP->getIndex()); else mangleFunctionParam(cast(Pack)); break; @@ -4548,13 +4618,21 @@ void CXXNameMangler::mangleTemplateArg(TemplateArgument A) { } } -void CXXNameMangler::mangleTemplateParameter(unsigned Index) { +void CXXNameMangler::mangleTemplateParameter(unsigned Depth, unsigned Index) { // ::= T_ # first template parameter // ::= T _ - if (Index == 0) - Out << "T_"; - else - Out << 'T' << (Index - 1) << '_'; + // ::= TL __ + // ::= TL _ + // _ + // + // The latter two manglings are from a proposal here: + // https://github.com/itanium-cxx-abi/cxx-abi/issues/31#issuecomment-528122117 + Out << 'T'; + if (Depth != 0) + Out << 'L' << (Depth - 1) << '_'; + if (Index != 0) + Out << (Index - 1); + Out << '_'; } void CXXNameMangler::mangleSeqID(unsigned SeqID) { @@ -5071,6 +5149,12 @@ void ItaniumMangleContextImpl::mangleStringLiteral(const StringLiteral *, raw_os llvm_unreachable("Can't mangle string literals"); } +void ItaniumMangleContextImpl::mangleLambdaSig(const CXXRecordDecl *Lambda, + raw_ostream &Out) { + CXXNameMangler Mangler(*this, Out); + Mangler.mangleLambdaSig(Lambda); +} + ItaniumMangleContext * ItaniumMangleContext::create(ASTContext &Context, DiagnosticsEngine &Diags) { return new ItaniumMangleContextImpl(Context, Diags); diff --git a/lib/AST/JSONNodeDumper.cpp b/lib/AST/JSONNodeDumper.cpp index 04b933b0fb3..f60d761c996 100644 --- a/lib/AST/JSONNodeDumper.cpp +++ b/lib/AST/JSONNodeDumper.cpp @@ -66,6 +66,10 @@ void JSONNodeDumper::Visit(const Stmt *S) { void JSONNodeDumper::Visit(const Type *T) { JOS.attribute("id", createPointerRepresentation(T)); + + if (!T) + return; + JOS.attribute("kind", (llvm::Twine(T->getTypeClassName()) + "Type").str()); JOS.attribute("type", createQualType(QualType(T, 0), /*Desugar*/ false)); attributeOnlyIfTrue("isDependent", T->isDependentType()); @@ -107,9 +111,14 @@ void JSONNodeDumper::Visit(const Decl *D) { if (const auto *ND = dyn_cast(D)) attributeOnlyIfTrue("isHidden", ND->isHidden()); - if (D->getLexicalDeclContext() != D->getDeclContext()) - JOS.attribute("parentDeclContext", - createPointerRepresentation(D->getDeclContext())); + if (D->getLexicalDeclContext() != D->getDeclContext()) { + // Because of multiple inheritance, a DeclContext pointer does not produce + // the same pointer representation as a Decl pointer that references the + // same AST Node. + const auto *ParentDeclContextDecl = dyn_cast(D->getDeclContext()); + JOS.attribute("parentDeclContextId", + createPointerRepresentation(ParentDeclContextDecl)); + } addPreviousDeclaration(D); InnerDeclVisitor::Visit(D); @@ -171,12 +180,30 @@ void JSONNodeDumper::Visit(const GenericSelectionExpr::ConstAssociation &A) { attributeOnlyIfTrue("selected", A.isSelected()); } +void JSONNodeDumper::writeIncludeStack(PresumedLoc Loc, bool JustFirst) { + if (Loc.isInvalid()) + return; + + JOS.attributeBegin("includedFrom"); + JOS.objectBegin(); + + if (!JustFirst) { + // Walk the stack recursively, then print out the presumed location. + writeIncludeStack(SM.getPresumedLoc(Loc.getIncludeLoc())); + } + + JOS.attribute("file", Loc.getFilename()); + JOS.objectEnd(); + JOS.attributeEnd(); +} + void JSONNodeDumper::writeBareSourceLocation(SourceLocation Loc, bool IsSpelling) { PresumedLoc Presumed = SM.getPresumedLoc(Loc); unsigned ActualLine = IsSpelling ? SM.getSpellingLineNumber(Loc) : SM.getExpansionLineNumber(Loc); if (Presumed.isValid()) { + JOS.attribute("offset", SM.getDecomposedLoc(Loc).second); if (LastLocFilename != Presumed.getFilename()) { JOS.attribute("file", Presumed.getFilename()); JOS.attribute("line", ActualLine); @@ -193,6 +220,12 @@ void JSONNodeDumper::writeBareSourceLocation(SourceLocation Loc, LastLocFilename = Presumed.getFilename(); LastLocPresumedLine = PresumedLine; LastLocLine = ActualLine; + + // Orthogonal to the file, line, and column de-duplication is whether the + // given location was a result of an include. If so, print where the + // include location came from. + writeIncludeStack(SM.getPresumedLoc(Presumed.getIncludeLoc()), + /*JustFirst*/ true); } } @@ -238,6 +271,8 @@ llvm::json::Object JSONNodeDumper::createQualType(QualType QT, bool Desugar) { SplitQualType DSQT = QT.getSplitDesugaredType(); if (DSQT != SQT) Ret["desugaredQualType"] = QualType::getAsString(DSQT, PrintPolicy); + if (const auto *TT = QT->getAs()) + Ret["typeAliasDeclId"] = createPointerRepresentation(TT->getDecl()); } return Ret; } @@ -275,7 +310,7 @@ llvm::json::Array JSONNodeDumper::createCastPath(const CastExpr *C) { for (auto I = C->path_begin(), E = C->path_end(); I != E; ++I) { const CXXBaseSpecifier *Base = *I; const auto *RD = - cast(Base->getType()->getAs()->getDecl()); + cast(Base->getType()->castAs()->getDecl()); llvm::json::Object Val{{"name", RD->getName()}}; if (Base->isVirtual()) @@ -839,6 +874,12 @@ void JSONNodeDumper::VisitLinkageSpecDecl(const LinkageSpecDecl *LSD) { switch (LSD->getLanguage()) { case LinkageSpecDecl::lang_c: Lang = "C"; break; case LinkageSpecDecl::lang_cxx: Lang = "C++"; break; + case LinkageSpecDecl::lang_cxx_11: + Lang = "C++11"; + break; + case LinkageSpecDecl::lang_cxx_14: + Lang = "C++14"; + break; } JOS.attribute("language", Lang); attributeOnlyIfTrue("hasBraces", LSD->hasBraces()); diff --git a/lib/AST/Mangle.cpp b/lib/AST/Mangle.cpp index 625282368a4..32d466cb571 100644 --- a/lib/AST/Mangle.cpp +++ b/lib/AST/Mangle.cpp @@ -122,15 +122,21 @@ void MangleContext::mangleName(const NamedDecl *D, raw_ostream &Out) { if (const AsmLabelAttr *ALA = D->getAttr()) { // If we have an asm name, then we use it as the mangling. + // If the label isn't literal, or if this is an alias for an LLVM intrinsic, + // do not add a "\01" prefix. + if (!ALA->getIsLiteralLabel() || ALA->getLabel().startswith("llvm.")) { + Out << ALA->getLabel(); + return; + } + // Adding the prefix can cause problems when one file has a "foo" and // another has a "\01foo". That is known to happen on ELF with the // tricks normally used for producing aliases (PR9177). Fortunately the // llvm mangler on ELF is a nop, so we can just avoid adding the \01 - // marker. We also avoid adding the marker if this is an alias for an - // LLVM intrinsic. + // marker. char GlobalPrefix = getASTContext().getTargetInfo().getDataLayout().getGlobalPrefix(); - if (GlobalPrefix && !ALA->getLabel().startswith("llvm.")) + if (GlobalPrefix) Out << '\01'; // LLVM IR Marker for __asm("foo") Out << ALA->getLabel(); @@ -380,7 +386,7 @@ public: auto hasDefaultCXXMethodCC = [](ASTContext &C, const CXXMethodDecl *MD) { auto DefaultCC = C.getDefaultCallingConvention(/*IsVariadic=*/false, /*IsCXXMethod=*/true); - auto CC = MD->getType()->getAs()->getCallConv(); + auto CC = MD->getType()->castAs()->getCallConv(); return CC == DefaultCC; }; @@ -470,7 +476,7 @@ private: }; ASTNameGenerator::ASTNameGenerator(ASTContext &Ctx) - : Impl(llvm::make_unique(Ctx)) {} + : Impl(std::make_unique(Ctx)) {} ASTNameGenerator::~ASTNameGenerator() {} diff --git a/lib/AST/MicrosoftCXXABI.cpp b/lib/AST/MicrosoftCXXABI.cpp index 4dc4156df9c..074abba3d45 100644 --- a/lib/AST/MicrosoftCXXABI.cpp +++ b/lib/AST/MicrosoftCXXABI.cpp @@ -82,7 +82,7 @@ public: if (!isVariadic && Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86) return CC_X86ThisCall; - return CC_C; + return Context.getTargetInfo().getDefaultCallingConv(); } bool isNearlyEmpty(const CXXRecordDecl *RD) const override { @@ -132,7 +132,7 @@ public: std::unique_ptr createMangleNumberingContext() const override { - return llvm::make_unique(); + return std::make_unique(); } }; } diff --git a/lib/AST/MicrosoftMangle.cpp b/lib/AST/MicrosoftMangle.cpp index 5e9358e24fc..f871a1b9990 100644 --- a/lib/AST/MicrosoftMangle.cpp +++ b/lib/AST/MicrosoftMangle.cpp @@ -27,11 +27,11 @@ #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/JamCRC.h" -#include "llvm/Support/xxhash.h" +#include "llvm/Support/CRC.h" #include "llvm/Support/MD5.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/StringSaver.h" +#include "llvm/Support/xxhash.h" using namespace clang; @@ -364,7 +364,7 @@ private: #define TYPE(CLASS, PARENT) void mangleType(const CLASS##Type *T, \ Qualifiers Quals, \ SourceRange Range); -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" #undef ABSTRACT_TYPE #undef NON_CANONICAL_TYPE #undef TYPE @@ -615,6 +615,8 @@ void MicrosoftCXXNameMangler::mangleMemberDataPointer(const CXXRecordDecl *RD, case MSInheritanceAttr::Keyword_multiple_inheritance: Code = '0'; break; case MSInheritanceAttr::Keyword_virtual_inheritance: Code = 'F'; break; case MSInheritanceAttr::Keyword_unspecified_inheritance: Code = 'G'; break; + case MSInheritanceAttr::SpellingNotCalculated: + llvm_unreachable("not reachable"); } Out << '$' << Code; @@ -646,6 +648,8 @@ MicrosoftCXXNameMangler::mangleMemberFunctionPointer(const CXXRecordDecl *RD, case MSInheritanceAttr::Keyword_multiple_inheritance: Code = 'H'; break; case MSInheritanceAttr::Keyword_virtual_inheritance: Code = 'I'; break; case MSInheritanceAttr::Keyword_unspecified_inheritance: Code = 'J'; break; + case MSInheritanceAttr::SpellingNotCalculated: + llvm_unreachable("not reachable"); } // If non-virtual, mangle the name. If virtual, mangle as a virtual memptr @@ -842,7 +846,7 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND, TemplateArgStringStorage.save(TemplateMangling.str()); } } else { - Out << Found->second; // Outputs a StringRef. + Out << Found->second << '@'; // Outputs a StringRef. } } else { Out << Found->second; // Outputs a back reference (an int). @@ -868,16 +872,11 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND, } if (const DecompositionDecl *DD = dyn_cast(ND)) { - // FIXME: Invented mangling for decomposition declarations: - // [X,Y,Z] - // where X,Y,Z are the names of the bindings. - llvm::SmallString<128> Name("["); - for (auto *BD : DD->bindings()) { - if (Name.size() > 1) - Name += ','; - Name += BD->getDeclName().getAsIdentifierInfo()->getName(); - } - Name += ']'; + // Decomposition declarations are considered anonymous, and get + // numbered with a $S prefix. + llvm::SmallString<64> Name("$S"); + // Get a unique id for the anonymous struct. + Name += llvm::utostr(Context.getAnonymousStructId(DD) + 1); mangleSourceName(Name); break; } @@ -1942,7 +1941,7 @@ void MicrosoftCXXNameMangler::mangleType(QualType T, SourceRange Range, case Type::CLASS: \ mangleType(cast(ty), Quals, Range); \ break; -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" #undef ABSTRACT_TYPE #undef NON_CANONICAL_TYPE #undef TYPE @@ -2109,6 +2108,9 @@ void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T, Qualifiers, mangleArtificialTagType(TTK_Struct, "_Half", {"__clang"}); break; +#define SVE_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: +#include "clang/Basic/AArch64SVEACLETypes.def" case BuiltinType::ShortAccum: case BuiltinType::Accum: case BuiltinType::LongAccum: diff --git a/lib/AST/NSAPI.cpp b/lib/AST/NSAPI.cpp index 5104dc59d62..ae6ff04f512 100644 --- a/lib/AST/NSAPI.cpp +++ b/lib/AST/NSAPI.cpp @@ -75,17 +75,6 @@ Selector NSAPI::getNSStringSelector(NSStringMethodKind MK) const { return NSStringSelectors[MK]; } -Optional -NSAPI::getNSStringMethodKind(Selector Sel) const { - for (unsigned i = 0; i != NumNSStringMethods; ++i) { - NSStringMethodKind MK = NSStringMethodKind(i); - if (Sel == getNSStringSelector(MK)) - return MK; - } - - return None; -} - Selector NSAPI::getNSArraySelector(NSArrayMethodKind MK) const { if (NSArraySelectors[MK].isNull()) { Selector Sel; @@ -482,6 +471,9 @@ NSAPI::getNSNumberFactoryMethodKind(QualType T) const { case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: case BuiltinType::OCLReserveID: +#define SVE_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: +#include "clang/Basic/AArch64SVEACLETypes.def" case BuiltinType::BoundMember: case BuiltinType::Dependent: case BuiltinType::Overload: diff --git a/lib/AST/OpenMPClause.cpp b/lib/AST/OpenMPClause.cpp index 9d8a7ebc302..fe1334469d4 100644 --- a/lib/AST/OpenMPClause.cpp +++ b/lib/AST/OpenMPClause.cpp @@ -43,6 +43,8 @@ OMPClause::child_range OMPClause::used_children() { #include "clang/Basic/OpenMPKinds.def" case OMPC_threadprivate: case OMPC_uniform: + case OMPC_device_type: + case OMPC_match: case OMPC_unknown: break; } @@ -82,9 +84,16 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { return static_cast(C); case OMPC_device: return static_cast(C); + case OMPC_grainsize: + return static_cast(C); + case OMPC_num_tasks: + return static_cast(C); + case OMPC_final: + return static_cast(C); + case OMPC_priority: + return static_cast(C); case OMPC_default: case OMPC_proc_bind: - case OMPC_final: case OMPC_safelen: case OMPC_simdlen: case OMPC_allocator: @@ -110,10 +119,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { case OMPC_threads: case OMPC_simd: case OMPC_map: - case OMPC_priority: - case OMPC_grainsize: case OMPC_nogroup: - case OMPC_num_tasks: case OMPC_hint: case OMPC_defaultmap: case OMPC_unknown: @@ -127,6 +133,8 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { case OMPC_reverse_offload: case OMPC_dynamic_allocators: case OMPC_atomic_default_mem_order: + case OMPC_device_type: + case OMPC_match: break; } @@ -203,6 +211,8 @@ const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C) case OMPC_reverse_offload: case OMPC_dynamic_allocators: case OMPC_atomic_default_mem_order: + case OMPC_device_type: + case OMPC_match: break; } @@ -228,6 +238,30 @@ OMPClause::child_range OMPIfClause::used_children() { return child_range(&Condition, &Condition + 1); } +OMPClause::child_range OMPGrainsizeClause::used_children() { + if (Stmt **C = getAddrOfExprAsWritten(getPreInitStmt())) + return child_range(C, C + 1); + return child_range(&Grainsize, &Grainsize + 1); +} + +OMPClause::child_range OMPNumTasksClause::used_children() { + if (Stmt **C = getAddrOfExprAsWritten(getPreInitStmt())) + return child_range(C, C + 1); + return child_range(&NumTasks, &NumTasks + 1); +} + +OMPClause::child_range OMPFinalClause::used_children() { + if (Stmt **C = getAddrOfExprAsWritten(getPreInitStmt())) + return child_range(C, C + 1); + return child_range(&Condition, &Condition + 1); +} + +OMPClause::child_range OMPPriorityClause::used_children() { + if (Stmt **C = getAddrOfExprAsWritten(getPreInitStmt())) + return child_range(C, C + 1); + return child_range(&Priority, &Priority + 1); +} + OMPOrderedClause *OMPOrderedClause::Create(const ASTContext &C, Expr *Num, unsigned NumLoops, SourceLocation StartLoc, @@ -429,15 +463,23 @@ void OMPLinearClause::setFinals(ArrayRef FL) { std::copy(FL.begin(), FL.end(), getUpdates().end()); } +void OMPLinearClause::setUsedExprs(ArrayRef UE) { + assert( + UE.size() == varlist_size() + 1 && + "Number of used expressions is not the same as the preallocated buffer"); + std::copy(UE.begin(), UE.end(), getFinals().end() + 2); +} + OMPLinearClause *OMPLinearClause::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc, SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef VL, ArrayRef PL, ArrayRef IL, Expr *Step, Expr *CalcStep, Stmt *PreInit, Expr *PostUpdate) { - // Allocate space for 4 lists (Vars, Inits, Updates, Finals) and 2 expressions - // (Step and CalcStep). - void *Mem = C.Allocate(totalSizeToAlloc(5 * VL.size() + 2)); + // Allocate space for 5 lists (Vars, Inits, Updates, Finals), 2 expressions + // (Step and CalcStep), list of used expression + step. + void *Mem = + C.Allocate(totalSizeToAlloc(5 * VL.size() + 2 + VL.size() + 1)); OMPLinearClause *Clause = new (Mem) OMPLinearClause( StartLoc, LParenLoc, Modifier, ModifierLoc, ColonLoc, EndLoc, VL.size()); Clause->setVarRefs(VL); @@ -449,6 +491,8 @@ OMPLinearClause *OMPLinearClause::Create( nullptr); std::fill(Clause->getUpdates().end(), Clause->getUpdates().end() + VL.size(), nullptr); + std::fill(Clause->getUsedExprs().begin(), Clause->getUsedExprs().end(), + nullptr); Clause->setStep(Step); Clause->setCalcStep(CalcStep); Clause->setPreInitStmt(PreInit); @@ -458,12 +502,19 @@ OMPLinearClause *OMPLinearClause::Create( OMPLinearClause *OMPLinearClause::CreateEmpty(const ASTContext &C, unsigned NumVars) { - // Allocate space for 4 lists (Vars, Inits, Updates, Finals) and 2 expressions - // (Step and CalcStep). - void *Mem = C.Allocate(totalSizeToAlloc(5 * NumVars + 2)); + // Allocate space for 5 lists (Vars, Inits, Updates, Finals), 2 expressions + // (Step and CalcStep), list of used expression + step. + void *Mem = C.Allocate(totalSizeToAlloc(5 * NumVars + 2 + NumVars +1)); return new (Mem) OMPLinearClause(NumVars); } +OMPClause::child_range OMPLinearClause::used_children() { + // Range includes only non-nullptr elements. + return child_range( + reinterpret_cast(getUsedExprs().begin()), + reinterpret_cast(llvm::find(getUsedExprs(), nullptr))); +} + OMPAlignedClause * OMPAlignedClause::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation ColonLoc, diff --git a/lib/AST/PrintfFormatString.cpp b/lib/AST/PrintfFormatString.cpp index a1207aae5aa..bae60d46440 100644 --- a/lib/AST/PrintfFormatString.cpp +++ b/lib/AST/PrintfFormatString.cpp @@ -463,6 +463,23 @@ bool clang::analyze_format_string::ParseFormatStringHasSArg(const char *I, return false; } +bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers( + const char *Begin, const char *End, const LangOptions &LO, + const TargetInfo &Target) { + unsigned ArgIndex = 0; + // Keep looking for a formatting specifier until we have exhausted the string. + FormatStringHandler H; + while (Begin != End) { + const PrintfSpecifierResult &FSR = + ParsePrintfSpecifier(H, Begin, End, ArgIndex, LO, Target, false, false); + if (FSR.shouldStop()) + break; + if (FSR.hasValue()) + return true; + } + return false; +} + //===----------------------------------------------------------------------===// // Methods on PrintfSpecifier. //===----------------------------------------------------------------------===// @@ -769,6 +786,9 @@ bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ case BuiltinType::Id: #include "clang/Basic/OpenCLExtensionTypes.def" +#define SVE_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: +#include "clang/Basic/AArch64SVEACLETypes.def" #define SIGNED_TYPE(Id, SingletonId) #define UNSIGNED_TYPE(Id, SingletonId) #define FLOATING_TYPE(Id, SingletonId) diff --git a/lib/AST/RawCommentList.cpp b/lib/AST/RawCommentList.cpp index df53b7fa100..83e8a0b942a 100644 --- a/lib/AST/RawCommentList.cpp +++ b/lib/AST/RawCommentList.cpp @@ -275,27 +275,25 @@ void RawCommentList::addComment(const RawComment &RC, if (RC.isInvalid()) return; - // Check if the comments are not in source order. - while (!Comments.empty() && - !SourceMgr.isBeforeInTranslationUnit(Comments.back()->getBeginLoc(), - RC.getBeginLoc())) { - // If they are, just pop a few last comments that don't fit. - // This happens if an \#include directive contains comments. - Comments.pop_back(); - } - // Ordinary comments are not interesting for us. if (RC.isOrdinary() && !CommentOpts.ParseAllComments) return; + std::pair Loc = + SourceMgr.getDecomposedLoc(RC.getBeginLoc()); + + const FileID CommentFile = Loc.first; + const unsigned CommentOffset = Loc.second; + // If this is the first Doxygen comment, save it (because there isn't // anything to merge it with). - if (Comments.empty()) { - Comments.push_back(new (Allocator) RawComment(RC)); + if (OrderedComments[CommentFile].empty()) { + OrderedComments[CommentFile][CommentOffset] = + new (Allocator) RawComment(RC); return; } - const RawComment &C1 = *Comments.back(); + const RawComment &C1 = *OrderedComments[CommentFile].rbegin()->second; const RawComment &C2 = RC; // Merge comments only if there is only whitespace between them. @@ -318,21 +316,43 @@ void RawCommentList::addComment(const RawComment &RC, onlyWhitespaceBetween(SourceMgr, C1.getEndLoc(), C2.getBeginLoc(), /*MaxNewlinesAllowed=*/1)) { SourceRange MergedRange(C1.getBeginLoc(), C2.getEndLoc()); - *Comments.back() = RawComment(SourceMgr, MergedRange, CommentOpts, true); + *OrderedComments[CommentFile].rbegin()->second = + RawComment(SourceMgr, MergedRange, CommentOpts, true); } else { - Comments.push_back(new (Allocator) RawComment(RC)); + OrderedComments[CommentFile][CommentOffset] = + new (Allocator) RawComment(RC); } } -void RawCommentList::addDeserializedComments(ArrayRef DeserializedComments) { - std::vector MergedComments; - MergedComments.reserve(Comments.size() + DeserializedComments.size()); +const std::map * +RawCommentList::getCommentsInFile(FileID File) const { + auto CommentsInFile = OrderedComments.find(File); + if (CommentsInFile == OrderedComments.end()) + return nullptr; - std::merge(Comments.begin(), Comments.end(), - DeserializedComments.begin(), DeserializedComments.end(), - std::back_inserter(MergedComments), - BeforeThanCompare(SourceMgr)); - std::swap(Comments, MergedComments); + return &CommentsInFile->second; +} + +bool RawCommentList::empty() const { return OrderedComments.empty(); } + +unsigned RawCommentList::getCommentBeginLine(RawComment *C, FileID File, + unsigned Offset) const { + auto Cached = CommentBeginLine.find(C); + if (Cached != CommentBeginLine.end()) + return Cached->second; + const unsigned Line = SourceMgr.getLineNumber(File, Offset); + CommentBeginLine[C] = Line; + return Line; +} + +unsigned RawCommentList::getCommentEndOffset(RawComment *C) const { + auto Cached = CommentEndOffset.find(C); + if (Cached != CommentEndOffset.end()) + return Cached->second; + const unsigned Offset = + SourceMgr.getDecomposedLoc(C->getSourceRange().getEnd()).second; + CommentEndOffset[C] = Offset; + return Offset; } std::string RawComment::getFormattedText(const SourceManager &SourceMgr, diff --git a/lib/AST/Stmt.cpp b/lib/AST/Stmt.cpp index 0a4d403106b..80a1451ac78 100644 --- a/lib/AST/Stmt.cpp +++ b/lib/AST/Stmt.cpp @@ -41,6 +41,7 @@ #include #include #include +#include using namespace clang; @@ -83,6 +84,16 @@ const char *Stmt::getStmtClassName() const { #CLASS " should not be polymorphic!"); #include "clang/AST/StmtNodes.inc" +// Check that no statement / expression class has a non-trival destructor. +// Statements and expressions are allocated with the BumpPtrAllocator from +// ASTContext and therefore their destructor is not executed. +#define STMT(CLASS, PARENT) \ + static_assert(std::is_trivially_destructible::value, \ + #CLASS " should be trivially destructible!"); +// FIXME: InitListExpr is not trivially destructible due to its ASTVector. +#define INITLISTEXPR(CLASS, PARENT) +#include "clang/AST/StmtNodes.inc" + void Stmt::PrintStats() { // Ensure the table is primed. getStmtInfoTableEntry(Stmt::NullStmtClass); diff --git a/lib/AST/StmtOpenMP.cpp b/lib/AST/StmtOpenMP.cpp index 4e829897ceb..da1364ebffc 100644 --- a/lib/AST/StmtOpenMP.cpp +++ b/lib/AST/StmtOpenMP.cpp @@ -72,6 +72,25 @@ void OMPLoopDirective::setFinals(ArrayRef A) { std::copy(A.begin(), A.end(), getFinals().begin()); } +void OMPLoopDirective::setDependentCounters(ArrayRef A) { + assert( + A.size() == getCollapsedNumber() && + "Number of dependent counters is not the same as the collapsed number"); + llvm::copy(A, getDependentCounters().begin()); +} + +void OMPLoopDirective::setDependentInits(ArrayRef A) { + assert(A.size() == getCollapsedNumber() && + "Number of dependent inits is not the same as the collapsed number"); + llvm::copy(A, getDependentInits().begin()); +} + +void OMPLoopDirective::setFinalsConditions(ArrayRef A) { + assert(A.size() == getCollapsedNumber() && + "Number of finals conditions is not the same as the collapsed number"); + llvm::copy(A, getFinalsConditions().begin()); +} + OMPParallelDirective *OMPParallelDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, ArrayRef Clauses, Stmt *AssociatedStmt, bool HasCancel) { @@ -122,6 +141,9 @@ OMPSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc, Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); return Dir; } @@ -170,6 +192,9 @@ OMPForDirective::Create(const ASTContext &C, SourceLocation StartLoc, Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); Dir->setHasCancel(HasCancel); return Dir; @@ -220,6 +245,9 @@ OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc, Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); return Dir; } @@ -383,6 +411,9 @@ OMPParallelForDirective *OMPParallelForDirective::Create( Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); Dir->setHasCancel(HasCancel); return Dir; @@ -432,6 +463,9 @@ OMPParallelForSimdDirective *OMPParallelForSimdDirective::Create( Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); return Dir; } @@ -772,6 +806,9 @@ OMPTargetParallelForDirective *OMPTargetParallelForDirective::Create( Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); Dir->setHasCancel(HasCancel); return Dir; @@ -914,6 +951,9 @@ OMPTaskLoopDirective *OMPTaskLoopDirective::Create( Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); return Dir; } @@ -963,6 +1003,9 @@ OMPTaskLoopSimdDirective *OMPTaskLoopSimdDirective::Create( Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); return Dir; } @@ -978,6 +1021,167 @@ OMPTaskLoopSimdDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses, return new (Mem) OMPTaskLoopSimdDirective(CollapsedNum, NumClauses); } +OMPMasterTaskLoopDirective *OMPMasterTaskLoopDirective::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, + const HelperExprs &Exprs) { + unsigned Size = + llvm::alignTo(sizeof(OMPMasterTaskLoopDirective), alignof(OMPClause *)); + void *Mem = C.Allocate( + Size + sizeof(OMPClause *) * Clauses.size() + + sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_master_taskloop)); + OMPMasterTaskLoopDirective *Dir = new (Mem) OMPMasterTaskLoopDirective( + StartLoc, EndLoc, CollapsedNum, Clauses.size()); + Dir->setClauses(Clauses); + Dir->setAssociatedStmt(AssociatedStmt); + Dir->setIterationVariable(Exprs.IterationVarRef); + Dir->setLastIteration(Exprs.LastIteration); + Dir->setCalcLastIteration(Exprs.CalcLastIteration); + Dir->setPreCond(Exprs.PreCond); + Dir->setCond(Exprs.Cond); + Dir->setInit(Exprs.Init); + Dir->setInc(Exprs.Inc); + Dir->setIsLastIterVariable(Exprs.IL); + Dir->setLowerBoundVariable(Exprs.LB); + Dir->setUpperBoundVariable(Exprs.UB); + Dir->setStrideVariable(Exprs.ST); + Dir->setEnsureUpperBound(Exprs.EUB); + Dir->setNextLowerBound(Exprs.NLB); + Dir->setNextUpperBound(Exprs.NUB); + Dir->setNumIterations(Exprs.NumIterations); + Dir->setCounters(Exprs.Counters); + Dir->setPrivateCounters(Exprs.PrivateCounters); + Dir->setInits(Exprs.Inits); + Dir->setUpdates(Exprs.Updates); + Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); + Dir->setPreInits(Exprs.PreInits); + return Dir; +} + +OMPMasterTaskLoopDirective * +OMPMasterTaskLoopDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses, + unsigned CollapsedNum, EmptyShell) { + unsigned Size = + llvm::alignTo(sizeof(OMPMasterTaskLoopDirective), alignof(OMPClause *)); + void *Mem = C.Allocate( + Size + sizeof(OMPClause *) * NumClauses + + sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_master_taskloop)); + return new (Mem) OMPMasterTaskLoopDirective(CollapsedNum, NumClauses); +} + +OMPMasterTaskLoopSimdDirective *OMPMasterTaskLoopSimdDirective::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, + const HelperExprs &Exprs) { + unsigned Size = llvm::alignTo(sizeof(OMPMasterTaskLoopSimdDirective), + alignof(OMPClause *)); + void *Mem = + C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + + sizeof(Stmt *) * + numLoopChildren(CollapsedNum, OMPD_master_taskloop_simd)); + auto *Dir = new (Mem) OMPMasterTaskLoopSimdDirective( + StartLoc, EndLoc, CollapsedNum, Clauses.size()); + Dir->setClauses(Clauses); + Dir->setAssociatedStmt(AssociatedStmt); + Dir->setIterationVariable(Exprs.IterationVarRef); + Dir->setLastIteration(Exprs.LastIteration); + Dir->setCalcLastIteration(Exprs.CalcLastIteration); + Dir->setPreCond(Exprs.PreCond); + Dir->setCond(Exprs.Cond); + Dir->setInit(Exprs.Init); + Dir->setInc(Exprs.Inc); + Dir->setIsLastIterVariable(Exprs.IL); + Dir->setLowerBoundVariable(Exprs.LB); + Dir->setUpperBoundVariable(Exprs.UB); + Dir->setStrideVariable(Exprs.ST); + Dir->setEnsureUpperBound(Exprs.EUB); + Dir->setNextLowerBound(Exprs.NLB); + Dir->setNextUpperBound(Exprs.NUB); + Dir->setNumIterations(Exprs.NumIterations); + Dir->setCounters(Exprs.Counters); + Dir->setPrivateCounters(Exprs.PrivateCounters); + Dir->setInits(Exprs.Inits); + Dir->setUpdates(Exprs.Updates); + Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); + Dir->setPreInits(Exprs.PreInits); + return Dir; +} + +OMPMasterTaskLoopSimdDirective * +OMPMasterTaskLoopSimdDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses, + unsigned CollapsedNum, EmptyShell) { + unsigned Size = llvm::alignTo(sizeof(OMPMasterTaskLoopSimdDirective), + alignof(OMPClause *)); + void *Mem = + C.Allocate(Size + sizeof(OMPClause *) * NumClauses + + sizeof(Stmt *) * + numLoopChildren(CollapsedNum, OMPD_master_taskloop_simd)); + return new (Mem) OMPMasterTaskLoopSimdDirective(CollapsedNum, NumClauses); +} + +OMPParallelMasterTaskLoopDirective *OMPParallelMasterTaskLoopDirective::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, + const HelperExprs &Exprs) { + unsigned Size = llvm::alignTo(sizeof(OMPParallelMasterTaskLoopDirective), + alignof(OMPClause *)); + void *Mem = C.Allocate( + Size + sizeof(OMPClause *) * Clauses.size() + + sizeof(Stmt *) * + numLoopChildren(CollapsedNum, OMPD_parallel_master_taskloop)); + auto *Dir = new (Mem) OMPParallelMasterTaskLoopDirective( + StartLoc, EndLoc, CollapsedNum, Clauses.size()); + Dir->setClauses(Clauses); + Dir->setAssociatedStmt(AssociatedStmt); + Dir->setIterationVariable(Exprs.IterationVarRef); + Dir->setLastIteration(Exprs.LastIteration); + Dir->setCalcLastIteration(Exprs.CalcLastIteration); + Dir->setPreCond(Exprs.PreCond); + Dir->setCond(Exprs.Cond); + Dir->setInit(Exprs.Init); + Dir->setInc(Exprs.Inc); + Dir->setIsLastIterVariable(Exprs.IL); + Dir->setLowerBoundVariable(Exprs.LB); + Dir->setUpperBoundVariable(Exprs.UB); + Dir->setStrideVariable(Exprs.ST); + Dir->setEnsureUpperBound(Exprs.EUB); + Dir->setNextLowerBound(Exprs.NLB); + Dir->setNextUpperBound(Exprs.NUB); + Dir->setNumIterations(Exprs.NumIterations); + Dir->setCounters(Exprs.Counters); + Dir->setPrivateCounters(Exprs.PrivateCounters); + Dir->setInits(Exprs.Inits); + Dir->setUpdates(Exprs.Updates); + Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); + Dir->setPreInits(Exprs.PreInits); + return Dir; +} + +OMPParallelMasterTaskLoopDirective * +OMPParallelMasterTaskLoopDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses, + unsigned CollapsedNum, + EmptyShell) { + unsigned Size = llvm::alignTo(sizeof(OMPParallelMasterTaskLoopDirective), + alignof(OMPClause *)); + void *Mem = C.Allocate( + Size + sizeof(OMPClause *) * NumClauses + + sizeof(Stmt *) * + numLoopChildren(CollapsedNum, OMPD_parallel_master_taskloop)); + return new (Mem) OMPParallelMasterTaskLoopDirective(CollapsedNum, NumClauses); +} + OMPDistributeDirective *OMPDistributeDirective::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, @@ -1011,6 +1215,9 @@ OMPDistributeDirective *OMPDistributeDirective::Create( Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); return Dir; } @@ -1089,6 +1296,9 @@ OMPDistributeParallelForDirective *OMPDistributeParallelForDirective::Create( Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); Dir->setCombinedLowerBoundVariable(Exprs.DistCombinedFields.LB); Dir->setCombinedUpperBoundVariable(Exprs.DistCombinedFields.UB); @@ -1157,6 +1367,9 @@ OMPDistributeParallelForSimdDirective::Create( Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); Dir->setCombinedLowerBoundVariable(Exprs.DistCombinedFields.LB); Dir->setCombinedUpperBoundVariable(Exprs.DistCombinedFields.UB); @@ -1219,6 +1432,9 @@ OMPDistributeSimdDirective *OMPDistributeSimdDirective::Create( Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); return Dir; } @@ -1271,6 +1487,9 @@ OMPTargetParallelForSimdDirective *OMPTargetParallelForSimdDirective::Create( Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); return Dir; } @@ -1315,6 +1534,9 @@ OMPTargetSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc, Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); return Dir; } @@ -1363,6 +1585,9 @@ OMPTeamsDistributeDirective *OMPTeamsDistributeDirective::Create( Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); return Dir; } @@ -1414,6 +1639,9 @@ OMPTeamsDistributeSimdDirective *OMPTeamsDistributeSimdDirective::Create( Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); return Dir; } @@ -1471,6 +1699,9 @@ OMPTeamsDistributeParallelForSimdDirective::Create( Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); Dir->setCombinedLowerBoundVariable(Exprs.DistCombinedFields.LB); Dir->setCombinedUpperBoundVariable(Exprs.DistCombinedFields.UB); @@ -1540,6 +1771,9 @@ OMPTeamsDistributeParallelForDirective::Create( Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); Dir->setCombinedLowerBoundVariable(Exprs.DistCombinedFields.LB); Dir->setCombinedUpperBoundVariable(Exprs.DistCombinedFields.UB); @@ -1628,6 +1862,9 @@ OMPTargetTeamsDistributeDirective *OMPTargetTeamsDistributeDirective::Create( Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); return Dir; } @@ -1688,6 +1925,9 @@ OMPTargetTeamsDistributeParallelForDirective::Create( Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); Dir->setCombinedLowerBoundVariable(Exprs.DistCombinedFields.LB); Dir->setCombinedUpperBoundVariable(Exprs.DistCombinedFields.UB); @@ -1761,6 +2001,9 @@ OMPTargetTeamsDistributeParallelForSimdDirective::Create( Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); Dir->setCombinedLowerBoundVariable(Exprs.DistCombinedFields.LB); Dir->setCombinedUpperBoundVariable(Exprs.DistCombinedFields.UB); @@ -1826,6 +2069,9 @@ OMPTargetTeamsDistributeSimdDirective::Create( Dir->setInits(Exprs.Inits); Dir->setUpdates(Exprs.Updates); Dir->setFinals(Exprs.Finals); + Dir->setDependentCounters(Exprs.DependentCounters); + Dir->setDependentInits(Exprs.DependentInits); + Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); return Dir; } diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp index 46802d765e1..7759ff6c138 100644 --- a/lib/AST/StmtPrinter.cpp +++ b/lib/AST/StmtPrinter.cpp @@ -823,6 +823,24 @@ void StmtPrinter::VisitOMPTaskLoopSimdDirective( PrintOMPExecutableDirective(Node); } +void StmtPrinter::VisitOMPMasterTaskLoopDirective( + OMPMasterTaskLoopDirective *Node) { + Indent() << "#pragma omp master taskloop"; + PrintOMPExecutableDirective(Node); +} + +void StmtPrinter::VisitOMPMasterTaskLoopSimdDirective( + OMPMasterTaskLoopSimdDirective *Node) { + Indent() << "#pragma omp master taskloop simd"; + PrintOMPExecutableDirective(Node); +} + +void StmtPrinter::VisitOMPParallelMasterTaskLoopDirective( + OMPParallelMasterTaskLoopDirective *Node) { + Indent() << "#pragma omp parallel master taskloop"; + PrintOMPExecutableDirective(Node); +} + void StmtPrinter::VisitOMPDistributeDirective(OMPDistributeDirective *Node) { Indent() << "#pragma omp distribute"; PrintOMPExecutableDirective(Node); @@ -1102,7 +1120,7 @@ void StmtPrinter::VisitIntegerLiteral(IntegerLiteral *Node) { OS << Node->getValue().toString(10, isSigned); // Emit suffixes. Integer literals are always a builtin integer type. - switch (Node->getType()->getAs()->getKind()) { + switch (Node->getType()->castAs()->getKind()) { default: llvm_unreachable("Unexpected type for integer literal!"); case BuiltinType::Char_S: case BuiltinType::Char_U: OS << "i8"; break; @@ -1123,7 +1141,7 @@ void StmtPrinter::VisitFixedPointLiteral(FixedPointLiteral *Node) { return; OS << Node->getValueAsString(/*Radix=*/10); - switch (Node->getType()->getAs()->getKind()) { + switch (Node->getType()->castAs()->getKind()) { default: llvm_unreachable("Unexpected type for fixed point literal!"); case BuiltinType::ShortFract: OS << "hr"; break; case BuiltinType::ShortAccum: OS << "hk"; break; @@ -1152,7 +1170,7 @@ static void PrintFloatingLiteral(raw_ostream &OS, FloatingLiteral *Node, return; // Emit suffixes. Float literals are always a builtin float type. - switch (Node->getType()->getAs()->getKind()) { + switch (Node->getType()->castAs()->getKind()) { default: llvm_unreachable("Unexpected type for float literal!"); case BuiltinType::Half: break; // FIXME: suffix? case BuiltinType::Double: break; // no suffix. @@ -1679,6 +1697,15 @@ void StmtPrinter::VisitCUDAKernelCallExpr(CUDAKernelCallExpr *Node) { OS << ")"; } +void StmtPrinter::VisitCXXRewrittenBinaryOperator( + CXXRewrittenBinaryOperator *Node) { + CXXRewrittenBinaryOperator::DecomposedForm Decomposed = + Node->getDecomposedForm(); + PrintExpr(const_cast(Decomposed.LHS)); + OS << ' ' << BinaryOperator::getOpcodeStr(Decomposed.Opcode) << ' '; + PrintExpr(const_cast(Decomposed.RHS)); +} + void StmtPrinter::VisitCXXNamedCastExpr(CXXNamedCastExpr *Node) { OS << Node->getCastName() << '<'; Node->getTypeAsWritten().print(OS, Policy); @@ -1952,7 +1979,7 @@ void StmtPrinter::VisitLambdaExpr(LambdaExpr *Node) { if (Node->isMutable()) OS << " mutable"; - auto *Proto = Method->getType()->getAs(); + auto *Proto = Method->getType()->castAs(); Proto->printExceptionSpecification(OS, Policy); // FIXME: Attributes @@ -2219,6 +2246,17 @@ void StmtPrinter::VisitCXXFoldExpr(CXXFoldExpr *E) { OS << ")"; } +void StmtPrinter::VisitConceptSpecializationExpr(ConceptSpecializationExpr *E) { + NestedNameSpecifierLoc NNS = E->getNestedNameSpecifierLoc(); + if (NNS) + NNS.getNestedNameSpecifier()->print(OS, Policy); + if (E->getTemplateKWLoc().isValid()) + OS << "template "; + OS << E->getFoundDecl()->getName(); + printTemplateArgumentList(OS, E->getTemplateArgsAsWritten()->arguments(), + Policy); +} + // C++ Coroutines TS void StmtPrinter::VisitCoroutineBodyStmt(CoroutineBodyStmt *S) { diff --git a/lib/AST/StmtProfile.cpp b/lib/AST/StmtProfile.cpp index f92c3dc60ba..d1e85653893 100644 --- a/lib/AST/StmtProfile.cpp +++ b/lib/AST/StmtProfile.cpp @@ -440,6 +440,7 @@ void OMPClauseProfiler::VisitOMPIfClause(const OMPIfClause *C) { } void OMPClauseProfiler::VisitOMPFinalClause(const OMPFinalClause *C) { + VistOMPClauseWithPreInit(C); if (C->getCondition()) Profiler->VisitStmt(C->getCondition()); } @@ -736,14 +737,17 @@ void OMPClauseProfiler::VisitOMPThreadLimitClause( Profiler->VisitStmt(C->getThreadLimit()); } void OMPClauseProfiler::VisitOMPPriorityClause(const OMPPriorityClause *C) { + VistOMPClauseWithPreInit(C); if (C->getPriority()) Profiler->VisitStmt(C->getPriority()); } void OMPClauseProfiler::VisitOMPGrainsizeClause(const OMPGrainsizeClause *C) { + VistOMPClauseWithPreInit(C); if (C->getGrainsize()) Profiler->VisitStmt(C->getGrainsize()); } void OMPClauseProfiler::VisitOMPNumTasksClause(const OMPNumTasksClause *C) { + VistOMPClauseWithPreInit(C); if (C->getNumTasks()) Profiler->VisitStmt(C->getNumTasks()); } @@ -918,6 +922,21 @@ void StmtProfiler::VisitOMPTaskLoopSimdDirective( VisitOMPLoopDirective(S); } +void StmtProfiler::VisitOMPMasterTaskLoopDirective( + const OMPMasterTaskLoopDirective *S) { + VisitOMPLoopDirective(S); +} + +void StmtProfiler::VisitOMPMasterTaskLoopSimdDirective( + const OMPMasterTaskLoopSimdDirective *S) { + VisitOMPLoopDirective(S); +} + +void StmtProfiler::VisitOMPParallelMasterTaskLoopDirective( + const OMPParallelMasterTaskLoopDirective *S) { + VisitOMPLoopDirective(S); +} + void StmtProfiler::VisitOMPDistributeDirective( const OMPDistributeDirective *S) { VisitOMPLoopDirective(S); @@ -1297,6 +1316,14 @@ void StmtProfiler::VisitAtomicExpr(const AtomicExpr *S) { ID.AddInteger(S->getOp()); } +void StmtProfiler::VisitConceptSpecializationExpr( + const ConceptSpecializationExpr *S) { + VisitExpr(S); + VisitDecl(S->getFoundDecl()); + VisitTemplateArguments(S->getTemplateArgsAsWritten()->getTemplateArgs(), + S->getTemplateArgsAsWritten()->NumTemplateArgs); +} + static Stmt::StmtClass DecodeOperatorCall(const CXXOperatorCallExpr *S, UnaryOperatorKind &UnaryOp, BinaryOperatorKind &BinaryOp) { @@ -1530,6 +1557,16 @@ void StmtProfiler::VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *S) { ID.AddInteger(S->getOperator()); } +void StmtProfiler::VisitCXXRewrittenBinaryOperator( + const CXXRewrittenBinaryOperator *S) { + // If a rewritten operator were ever to be type-dependent, we should profile + // it following its syntactic operator. + assert(!S->isTypeDependent() && + "resolved rewritten operator should never be type-dependent"); + ID.AddBoolean(S->isReversed()); + VisitExpr(S->getSemanticForm()); +} + #if defined(_MSC_VER) && !defined(__clang__) #if _MSC_VER == 1911 #pragma optimize("", on) diff --git a/lib/AST/TemplateBase.cpp b/lib/AST/TemplateBase.cpp index cb4cbd2f76a..db16c2a06b6 100644 --- a/lib/AST/TemplateBase.cpp +++ b/lib/AST/TemplateBase.cpp @@ -370,7 +370,7 @@ TemplateArgument TemplateArgument::getPackExpansionPattern() const { switch (getKind()) { case Type: - return getAsType()->getAs()->getPattern(); + return getAsType()->castAs()->getPattern(); case Expression: return cast(getAsExpr())->getPattern(); diff --git a/lib/AST/TextNodeDumper.cpp b/lib/AST/TextNodeDumper.cpp index cba9091b106..63a6510324f 100644 --- a/lib/AST/TextNodeDumper.cpp +++ b/lib/AST/TextNodeDumper.cpp @@ -223,7 +223,6 @@ void TextNodeDumper::Visit(const Decl *D) { return; } - Context = &D->getASTContext(); { ColorScope Color(OS, ShowColors, DeclKindNameColor); OS << D->getDeclKindName() << "Decl"; @@ -637,8 +636,8 @@ static void dumpBasePath(raw_ostream &OS, const CastExpr *Node) { if (!First) OS << " -> "; - const CXXRecordDecl *RD = - cast(Base->getType()->getAs()->getDecl()); + const auto *RD = + cast(Base->getType()->castAs()->getDecl()); if (Base->isVirtual()) OS << "virtual "; @@ -688,7 +687,7 @@ void TextNodeDumper::VisitConstantExpr(const ConstantExpr *Node) { if (Node->getResultAPValueKind() != APValue::None) { ColorScope Color(OS, ShowColors, ValueColor); OS << " "; - Node->getAPValueResult().printPretty(OS, *Context, Node->getType()); + Node->getAPValueResult().dump(OS); } } @@ -1385,6 +1384,8 @@ void TextNodeDumper::VisitVarDecl(const VarDecl *D) { break; } } + if (D->needsDestruction(D->getASTContext())) + OS << " destroyed"; if (D->isParameterPack()) OS << " pack"; } @@ -1537,6 +1538,7 @@ void TextNodeDumper::VisitCXXRecordDecl(const CXXRecordDecl *D) { FLAG(isGenericLambda, generic); FLAG(isLambda, lambda); + FLAG(isAnonymousStructOrUnion, is_anonymous); FLAG(canPassInRegisters, pass_in_registers); FLAG(isEmpty, empty); FLAG(isAggregate, aggregate); @@ -1641,6 +1643,7 @@ void TextNodeDumper::VisitCXXRecordDecl(const CXXRecordDecl *D) { FLAG(hasTrivialDestructor, trivial); FLAG(hasNonTrivialDestructor, non_trivial); FLAG(hasUserDeclaredDestructor, user_declared); + FLAG(hasConstexprDestructor, constexpr); FLAG(needsImplicitDestructor, needs_implicit); FLAG(needsOverloadResolutionForDestructor, needs_overload_resolution); if (!D->needsOverloadResolutionForDestructor()) @@ -1766,6 +1769,12 @@ void TextNodeDumper::VisitLinkageSpecDecl(const LinkageSpecDecl *D) { case LinkageSpecDecl::lang_cxx: OS << " C++"; break; + case LinkageSpecDecl::lang_cxx_11: + OS << " C++11"; + break; + case LinkageSpecDecl::lang_cxx_14: + OS << " C++14"; + break; } } diff --git a/lib/AST/Type.cpp b/lib/AST/Type.cpp index ed75a0b5bcd..4d54ea1061e 100644 --- a/lib/AST/Type.cpp +++ b/lib/AST/Type.cpp @@ -50,6 +50,7 @@ #include #include #include +#include using namespace clang; @@ -74,11 +75,11 @@ const IdentifierInfo* QualType::getBaseTypeIdentifier() const { if (ty->isPointerType() || ty->isReferenceType()) return ty->getPointeeType().getBaseTypeIdentifier(); else if (ty->isRecordType()) - ND = ty->getAs()->getDecl(); + ND = ty->castAs()->getDecl(); else if (ty->isEnumeralType()) - ND = ty->getAs()->getDecl(); + ND = ty->castAs()->getDecl(); else if (ty->getTypeClass() == Type::Typedef) - ND = ty->getAs()->getDecl(); + ND = ty->castAs()->getDecl(); else if (ty->isArrayType()) return ty->castAsArrayTypeUnsafe()-> getElementType().getBaseTypeIdentifier(); @@ -108,6 +109,33 @@ bool QualType::isConstant(QualType T, const ASTContext &Ctx) { return T.getAddressSpace() == LangAS::opencl_constant; } +// C++ [temp.dep.type]p1: +// A type is dependent if it is... +// - an array type constructed from any dependent type or whose +// size is specified by a constant expression that is +// value-dependent, +ArrayType::ArrayType(TypeClass tc, QualType et, QualType can, + ArraySizeModifier sm, unsigned tq, const Expr *sz) + // Note, we need to check for DependentSizedArrayType explicitly here + // because we use a DependentSizedArrayType with no size expression as the + // type of a dependent array of unknown bound with a dependent braced + // initializer: + // + // template int arr[] = {N...}; + : Type(tc, can, + et->isDependentType() || (sz && sz->isValueDependent()) || + tc == DependentSizedArray, + et->isInstantiationDependentType() || + (sz && sz->isInstantiationDependent()) || + tc == DependentSizedArray, + (tc == VariableArray || et->isVariablyModifiedType()), + et->containsUnexpandedParameterPack() || + (sz && sz->containsUnexpandedParameterPack())), + ElementType(et) { + ArrayTypeBits.IndexTypeQuals = tq; + ArrayTypeBits.SizeModifier = sm; +} + unsigned ConstantArrayType::getNumAddressingBits(const ASTContext &Context, QualType ElementType, const llvm::APInt &NumElements) { @@ -155,14 +183,26 @@ unsigned ConstantArrayType::getMaxSizeBits(const ASTContext &Context) { return Bits; } +void ConstantArrayType::Profile(llvm::FoldingSetNodeID &ID, + const ASTContext &Context, QualType ET, + const llvm::APInt &ArraySize, + const Expr *SizeExpr, ArraySizeModifier SizeMod, + unsigned TypeQuals) { + ID.AddPointer(ET.getAsOpaquePtr()); + ID.AddInteger(ArraySize.getZExtValue()); + ID.AddInteger(SizeMod); + ID.AddInteger(TypeQuals); + ID.AddBoolean(SizeExpr != 0); + if (SizeExpr) + SizeExpr->Profile(ID, Context, true); +} + DependentSizedArrayType::DependentSizedArrayType(const ASTContext &Context, QualType et, QualType can, Expr *e, ArraySizeModifier sm, unsigned tq, SourceRange brackets) - : ArrayType(DependentSizedArray, et, can, sm, tq, - (et->containsUnexpandedParameterPack() || - (e && e->containsUnexpandedParameterPack()))), + : ArrayType(DependentSizedArray, et, can, sm, tq, e), Context(Context), SizeExpr((Stmt*) e), Brackets(brackets) {} void DependentSizedArrayType::Profile(llvm::FoldingSetNodeID &ID, @@ -297,7 +337,19 @@ QualType QualType::getSingleStepDesugaredTypeImpl(QualType type, #define TYPE(CLASS, BASE) \ static_assert(!std::is_polymorphic::value, \ #CLASS "Type should not be polymorphic!"); -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" + +// Check that no type class has a non-trival destructor. Types are +// allocated with the BumpPtrAllocator from ASTContext and therefore +// their destructor is not executed. +// +// FIXME: ConstantArrayType is not trivially destructible because of its +// APInt member. It should be replaced in favor of ASTContext allocation. +#define TYPE(CLASS, BASE) \ + static_assert(std::is_trivially_destructible::value || \ + std::is_same::value, \ + #CLASS "Type should be trivially destructible!"); +#include "clang/AST/TypeNodes.inc" QualType Type::getLocallyUnqualifiedSingleStepDesugaredType() const { switch (getTypeClass()) { @@ -308,7 +360,7 @@ QualType Type::getLocallyUnqualifiedSingleStepDesugaredType() const { if (!ty->isSugared()) return QualType(ty, 0); \ return ty->desugar(); \ } -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" } llvm_unreachable("bad type kind!"); } @@ -329,7 +381,7 @@ SplitQualType QualType::getSplitDesugaredType(QualType T) { Cur = Ty->desugar(); \ break; \ } -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" } } } @@ -357,7 +409,7 @@ SplitQualType QualType::getSplitUnqualifiedTypeImpl(QualType type) { next = ty->desugar(); \ break; \ } -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" } // Otherwise, split the underlying type. If that yields qualifiers, @@ -396,7 +448,7 @@ template static const T *getAsSugar(const Type *Cur) { Cur = Ty->desugar().getTypePtr(); \ break; \ } -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" } } } @@ -429,7 +481,7 @@ const Type *Type::getUnqualifiedDesugaredType() const { Cur = Ty->desugar().getTypePtr(); \ break; \ } -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" } } } @@ -611,6 +663,10 @@ ObjCTypeParamType::ObjCTypeParamType(const ObjCTypeParamDecl *D, initialize(protocols); } +QualType ObjCTypeParamType::desugar() const { + return getDecl()->getUnderlyingType(); +} + ObjCObjectType::ObjCObjectType(QualType Canonical, QualType Base, ArrayRef typeArgs, ArrayRef protocols, @@ -753,7 +809,7 @@ public: #define TYPE(Class, Base) #define DEPENDENT_TYPE(Class, Base) \ QualType Visit##Class##Type(const Class##Type *T) { return QualType(T, 0); } -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" #define TRIVIAL_TYPE_CLASS(Class) \ QualType Visit##Class##Type(const Class##Type *T) { return QualType(T, 0); } @@ -847,7 +903,7 @@ public: if (elementType.getAsOpaquePtr() == T->getElementType().getAsOpaquePtr()) return QualType(T, 0); - return Ctx.getConstantArrayType(elementType, T->getSize(), + return Ctx.getConstantArrayType(elementType, T->getSize(), T->getSizeExpr(), T->getSizeModifier(), T->getIndexTypeCVRQualifiers()); } @@ -2477,6 +2533,15 @@ bool QualType::isCXX11PODType(const ASTContext &Context) const { return false; } +bool Type::isNothrowT() const { + if (const auto *RD = getAsCXXRecordDecl()) { + IdentifierInfo *II = RD->getIdentifier(); + if (II && II->isStr("nothrow_t") && RD->isInStdNamespace()) + return true; + } + return false; +} + bool Type::isAlignValT() const { if (const auto *ET = getAs()) { IdentifierInfo *II = ET->getDecl()->getIdentifier(); @@ -2690,7 +2755,7 @@ const char *Type::getTypeClassName() const { switch (TypeBits.TC) { #define ABSTRACT_TYPE(Derived, Base) #define TYPE(Derived, Base) case Derived: return #Derived; -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" } llvm_unreachable("Invalid type class."); @@ -2841,6 +2906,10 @@ StringRef BuiltinType::getName(const PrintingPolicy &Policy) const { case Id: \ return #ExtType; #include "clang/Basic/OpenCLExtensionTypes.def" +#define SVE_TYPE(Name, Id, SingletonId) \ + case Id: \ + return Name; +#include "clang/Basic/AArch64SVEACLETypes.def" } llvm_unreachable("Invalid builtin type."); @@ -3556,14 +3625,15 @@ static CachedProperties computeCachedProperties(const Type *T) { switch (T->getTypeClass()) { #define TYPE(Class,Base) #define NON_CANONICAL_TYPE(Class,Base) case Type::Class: -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" llvm_unreachable("didn't expect a non-canonical type here"); #define TYPE(Class,Base) #define DEPENDENT_TYPE(Class,Base) case Type::Class: #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class,Base) case Type::Class: -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" // Treat instantiation-dependent types as external. + if (!T->isInstantiationDependentType()) T->dump(); assert(T->isInstantiationDependentType()); return CachedProperties(ExternalLinkage, false); @@ -3659,13 +3729,13 @@ LinkageInfo LinkageComputer::computeTypeLinkageInfo(const Type *T) { switch (T->getTypeClass()) { #define TYPE(Class,Base) #define NON_CANONICAL_TYPE(Class,Base) case Type::Class: -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" llvm_unreachable("didn't expect a non-canonical type here"); #define TYPE(Class,Base) #define DEPENDENT_TYPE(Class,Base) case Type::Class: #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class,Base) case Type::Class: -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" // Treat instantiation-dependent types as external. assert(T->isInstantiationDependentType()); return LinkageInfo::external(); @@ -3774,7 +3844,7 @@ bool Type::canHaveNullability(bool ResultIfUnknown) const { case Type::Class: \ llvm_unreachable("non-canonical type"); #define TYPE(Class, Parent) -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" // Pointer types. case Type::Pointer: @@ -3842,6 +3912,9 @@ bool Type::canHaveNullability(bool ResultIfUnknown) const { case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: case BuiltinType::OCLReserveID: +#define SVE_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: +#include "clang/Basic/AArch64SVEACLETypes.def" case BuiltinType::BuiltinFn: case BuiltinType::NullPtr: case BuiltinType::OMPArraySection: diff --git a/lib/AST/TypeLoc.cpp b/lib/AST/TypeLoc.cpp index abe4c4eb25e..e4788f32b26 100644 --- a/lib/AST/TypeLoc.cpp +++ b/lib/AST/TypeLoc.cpp @@ -391,6 +391,9 @@ TypeSpecifierType BuiltinTypeLoc::getWrittenTypeSpec() const { case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: case BuiltinType::OCLReserveID: +#define SVE_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: +#include "clang/Basic/AArch64SVEACLETypes.def" case BuiltinType::BuiltinFn: case BuiltinType::OMPArraySection: return TST_unspecified; diff --git a/lib/AST/TypePrinter.cpp b/lib/AST/TypePrinter.cpp index 8d5c37299e5..dacbf9a96d8 100644 --- a/lib/AST/TypePrinter.cpp +++ b/lib/AST/TypePrinter.cpp @@ -125,7 +125,7 @@ namespace { #define TYPE(CLASS, PARENT) \ void print##CLASS##Before(const CLASS##Type *T, raw_ostream &OS); \ void print##CLASS##After(const CLASS##Type *T, raw_ostream &OS); -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" private: void printBefore(const Type *ty, Qualifiers qs, raw_ostream &OS); @@ -321,7 +321,7 @@ void TypePrinter::printBefore(const Type *T,Qualifiers Quals, raw_ostream &OS) { #define TYPE(CLASS, PARENT) case Type::CLASS: \ print##CLASS##Before(cast(T), OS); \ break; -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" } if (hasAfterQuals) { @@ -347,7 +347,7 @@ void TypePrinter::printAfter(const Type *T, Qualifiers Quals, raw_ostream &OS) { #define TYPE(CLASS, PARENT) case Type::CLASS: \ print##CLASS##After(cast(T), OS); \ break; -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" } } @@ -1204,7 +1204,8 @@ void TypePrinter::printTag(TagDecl *D, raw_ostream &OS) { // arguments. if (const auto *Spec = dyn_cast(D)) { ArrayRef Args; - if (TypeSourceInfo *TAW = Spec->getTypeAsWritten()) { + TypeSourceInfo *TAW = Spec->getTypeAsWritten(); + if (!Policy.PrintCanonicalTypes && TAW) { const TemplateSpecializationType *TST = cast(TAW->getType()); Args = TST->template_arguments(); @@ -1537,7 +1538,7 @@ void TypePrinter::printAttributedAfter(const AttributedType *T, QualType t = T->getEquivalentType(); while (!t->isFunctionType()) t = t->getPointeeType(); - OS << (t->getAs()->getCallConv() == CC_AAPCS ? + OS << (t->castAs()->getCallConv() == CC_AAPCS ? "\"aapcs\"" : "\"aapcs-vfp\""); OS << ')'; break; diff --git a/lib/AST/VTTBuilder.cpp b/lib/AST/VTTBuilder.cpp index 53d0ef09f14..d58e8751778 100644 --- a/lib/AST/VTTBuilder.cpp +++ b/lib/AST/VTTBuilder.cpp @@ -64,8 +64,8 @@ void VTTBuilder::LayoutSecondaryVTTs(BaseSubobject Base) { if (I.isVirtual()) continue; - const CXXRecordDecl *BaseDecl = - cast(I.getType()->getAs()->getDecl()); + const auto *BaseDecl = + cast(I.getType()->castAs()->getDecl()); const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD); CharUnits BaseOffset = Base.getBaseOffset() + @@ -90,8 +90,8 @@ VTTBuilder::LayoutSecondaryVirtualPointers(BaseSubobject Base, return; for (const auto &I : RD->bases()) { - const CXXRecordDecl *BaseDecl = - cast(I.getType()->getAs()->getDecl()); + const auto *BaseDecl = + cast(I.getType()->castAs()->getDecl()); // Itanium C++ ABI 2.6.2: // Secondary virtual pointers are present for all bases with either @@ -154,8 +154,8 @@ VTTBuilder::LayoutSecondaryVirtualPointers(BaseSubobject Base, void VTTBuilder::LayoutVirtualVTTs(const CXXRecordDecl *RD, VisitedVirtualBasesSetTy &VBases) { for (const auto &I : RD->bases()) { - const CXXRecordDecl *BaseDecl = - cast(I.getType()->getAs()->getDecl()); + const auto *BaseDecl = + cast(I.getType()->castAs()->getDecl()); // Check if this is a virtual base. if (I.isVirtual()) { diff --git a/lib/AST/VTableBuilder.cpp b/lib/AST/VTableBuilder.cpp index 0c699571555..5688042dadd 100644 --- a/lib/AST/VTableBuilder.cpp +++ b/lib/AST/VTableBuilder.cpp @@ -2268,7 +2268,7 @@ CreateVTableLayout(const ItaniumVTableBuilder &Builder) { SmallVector VTableThunks(Builder.vtable_thunks_begin(), Builder.vtable_thunks_end()); - return llvm::make_unique( + return std::make_unique( Builder.VTableIndices, Builder.vtable_components(), VTableThunks, Builder.getAddressPoints()); } @@ -3253,7 +3253,7 @@ void MicrosoftVTableContext::computeVTablePaths(bool ForVBTables, // Base case: this subobject has its own vptr. if (ForVBTables ? Layout.hasOwnVBPtr() : Layout.hasOwnVFPtr()) - Paths.push_back(llvm::make_unique(RD)); + Paths.push_back(std::make_unique(RD)); // Recursive case: get all the vbtables from our bases and remove anything // that shares a virtual base. @@ -3276,7 +3276,7 @@ void MicrosoftVTableContext::computeVTablePaths(bool ForVBTables, continue; // Copy the path and adjust it as necessary. - auto P = llvm::make_unique(*BaseInfo); + auto P = std::make_unique(*BaseInfo); // We mangle Base into the path if the path would've been ambiguous and it // wasn't already extended with Base. @@ -3562,7 +3562,7 @@ void MicrosoftVTableContext::computeVTableRelatedInformation( const VTableLayout::AddressPointsMapTy EmptyAddressPointsMap; { - auto VFPtrs = llvm::make_unique(); + auto VFPtrs = std::make_unique(); computeVTablePaths(/*ForVBTables=*/false, RD, *VFPtrs); computeFullPathsForVFTables(Context, RD, *VFPtrs); VFPtrLocations[RD] = std::move(VFPtrs); @@ -3576,7 +3576,7 @@ void MicrosoftVTableContext::computeVTableRelatedInformation( assert(VFTableLayouts.count(id) == 0); SmallVector VTableThunks( Builder.vtable_thunks_begin(), Builder.vtable_thunks_end()); - VFTableLayouts[id] = llvm::make_unique( + VFTableLayouts[id] = std::make_unique( ArrayRef{0}, Builder.vtable_components(), VTableThunks, EmptyAddressPointsMap); Thunks.insert(Builder.thunks_begin(), Builder.thunks_end()); @@ -3668,7 +3668,7 @@ const VirtualBaseInfo &MicrosoftVTableContext::computeVBTableRelatedInformation( std::unique_ptr &Entry = VBaseInfo[RD]; if (Entry) return *Entry; - Entry = llvm::make_unique(); + Entry = std::make_unique(); VBI = Entry.get(); } diff --git a/lib/ASTMatchers/ASTMatchFinder.cpp b/lib/ASTMatchers/ASTMatchFinder.cpp index f407e0875ac..c51fd630e64 100644 --- a/lib/ASTMatchers/ASTMatchFinder.cpp +++ b/lib/ASTMatchers/ASTMatchFinder.cpp @@ -374,6 +374,12 @@ public: return true; } + bool VisitObjCCompatibleAliasDecl(ObjCCompatibleAliasDecl *CAD) { + const ObjCInterfaceDecl *InterfaceDecl = CAD->getClassInterface(); + CompatibleAliases[InterfaceDecl].insert(CAD); + return true; + } + bool TraverseDecl(Decl *DeclNode); bool TraverseStmt(Stmt *StmtNode, DataRecursionQueue *Queue = nullptr); bool TraverseType(QualType TypeNode); @@ -430,7 +436,13 @@ public: bool classIsDerivedFrom(const CXXRecordDecl *Declaration, const Matcher &Base, - BoundNodesTreeBuilder *Builder) override; + BoundNodesTreeBuilder *Builder, + bool Directly) override; + + bool objcClassIsDerivedFrom(const ObjCInterfaceDecl *Declaration, + const Matcher &Base, + BoundNodesTreeBuilder *Builder, + bool Directly) override; // Implements ASTMatchFinder::matchesChildOf. bool matchesChildOf(const ast_type_traits::DynTypedNode &Node, @@ -762,6 +774,23 @@ private: return false; } + bool + objcClassHasMatchingCompatibilityAlias(const ObjCInterfaceDecl *InterfaceDecl, + const Matcher &Matcher, + BoundNodesTreeBuilder *Builder) { + auto Aliases = CompatibleAliases.find(InterfaceDecl); + if (Aliases == CompatibleAliases.end()) + return false; + for (const ObjCCompatibleAliasDecl *Alias : Aliases->second) { + BoundNodesTreeBuilder Result(*Builder); + if (Matcher.matches(*Alias, this, &Result)) { + *Builder = std::move(Result); + return true; + } + } + return false; + } + /// Bucket to record map. /// /// Used to get the appropriate bucket for each matcher. @@ -786,6 +815,11 @@ private: // Maps a canonical type to its TypedefDecls. llvm::DenseMap > TypeAliases; + // Maps an Objective-C interface to its ObjCCompatibleAliasDecls. + llvm::DenseMap> + CompatibleAliases; + // Maps (matcher, node) -> the match result for memoization. typedef std::map MemoizationMap; MemoizationMap ResultCache; @@ -812,12 +846,13 @@ getAsCXXRecordDeclOrPrimaryTemplate(const Type *TypeNode) { return nullptr; } -// Returns true if the given class is directly or indirectly derived +// Returns true if the given C++ class is directly or indirectly derived // from a base type with the given name. A class is not considered to be // derived from itself. bool MatchASTVisitor::classIsDerivedFrom(const CXXRecordDecl *Declaration, const Matcher &Base, - BoundNodesTreeBuilder *Builder) { + BoundNodesTreeBuilder *Builder, + bool Directly) { if (!Declaration->hasDefinition()) return false; for (const auto &It : Declaration->bases()) { @@ -842,12 +877,40 @@ bool MatchASTVisitor::classIsDerivedFrom(const CXXRecordDecl *Declaration, *Builder = std::move(Result); return true; } - if (classIsDerivedFrom(ClassDecl, Base, Builder)) + if (!Directly && classIsDerivedFrom(ClassDecl, Base, Builder, Directly)) return true; } return false; } +// Returns true if the given Objective-C class is directly or indirectly +// derived from a matching base class. A class is not considered to be derived +// from itself. +bool MatchASTVisitor::objcClassIsDerivedFrom( + const ObjCInterfaceDecl *Declaration, const Matcher &Base, + BoundNodesTreeBuilder *Builder, bool Directly) { + // Check if any of the superclasses of the class match. + for (const ObjCInterfaceDecl *ClassDecl = Declaration->getSuperClass(); + ClassDecl != nullptr; ClassDecl = ClassDecl->getSuperClass()) { + // Check if there are any matching compatibility aliases. + if (objcClassHasMatchingCompatibilityAlias(ClassDecl, Base, Builder)) + return true; + + // Check if there are any matching type aliases. + const Type *TypeNode = ClassDecl->getTypeForDecl(); + if (typeHasMatchingAlias(TypeNode, Base, Builder)) + return true; + + if (Base.matches(*ClassDecl, this, Builder)) + return true; + + if (Directly) + return false; + } + + return false; +} + bool MatchASTVisitor::TraverseDecl(Decl *DeclNode) { if (!DeclNode) { return true; @@ -1015,7 +1078,7 @@ bool MatchFinder::addDynamicMatcher(const internal::DynTypedMatcher &NodeMatch, } std::unique_ptr MatchFinder::newASTConsumer() { - return llvm::make_unique(this, ParsingDone); + return std::make_unique(this, ParsingDone); } void MatchFinder::match(const clang::ast_type_traits::DynTypedNode &Node, diff --git a/lib/ASTMatchers/Dynamic/Marshallers.h b/lib/ASTMatchers/Dynamic/Marshallers.h index fac2fc98e09..9f46108d184 100644 --- a/lib/ASTMatchers/Dynamic/Marshallers.h +++ b/lib/ASTMatchers/Dynamic/Marshallers.h @@ -729,7 +729,7 @@ std::unique_ptr makeMatcherAutoMarshall(ReturnType (*Func)(), StringRef MatcherName) { std::vector RetTypes; BuildReturnTypeVector::build(RetTypes); - return llvm::make_unique( + return std::make_unique( matcherMarshall0, reinterpret_cast(Func), MatcherName, RetTypes, None); } @@ -741,7 +741,7 @@ makeMatcherAutoMarshall(ReturnType (*Func)(ArgType1), StringRef MatcherName) { std::vector RetTypes; BuildReturnTypeVector::build(RetTypes); ArgKind AK = ArgTypeTraits::getKind(); - return llvm::make_unique( + return std::make_unique( matcherMarshall1, reinterpret_cast(Func), MatcherName, RetTypes, AK); } @@ -755,7 +755,7 @@ makeMatcherAutoMarshall(ReturnType (*Func)(ArgType1, ArgType2), BuildReturnTypeVector::build(RetTypes); ArgKind AKs[] = { ArgTypeTraits::getKind(), ArgTypeTraits::getKind() }; - return llvm::make_unique( + return std::make_unique( matcherMarshall2, reinterpret_cast(Func), MatcherName, RetTypes, AKs); } @@ -766,7 +766,7 @@ template makeMatcherAutoMarshall( ast_matchers::internal::VariadicFunction VarFunc, StringRef MatcherName) { - return llvm::make_unique(VarFunc, MatcherName); + return std::make_unique(VarFunc, MatcherName); } /// Overload for VariadicDynCastAllOfMatchers. @@ -778,7 +778,7 @@ std::unique_ptr makeMatcherAutoMarshall( ast_matchers::internal::VariadicDynCastAllOfMatcher VarFunc, StringRef MatcherName) { - return llvm::make_unique(VarFunc, MatcherName); + return std::make_unique(VarFunc, MatcherName); } /// Argument adaptative overload. @@ -791,7 +791,7 @@ std::unique_ptr makeMatcherAutoMarshall( std::vector> Overloads; AdaptativeOverloadCollector(MatcherName, Overloads); - return llvm::make_unique(Overloads); + return std::make_unique(Overloads); } template